diff --git a/.gitignore b/.gitignore
index d11a504bdc56ee98b3d5a0c33f9f75d996e45567..be75938ec401b1d72fa54773c85191aaac7d7f35 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,7 +6,7 @@ node_modules
 /bazel-*
 /bazel_pip
 /tools/python_bin_path.sh
-/tools/git/gen
+/tensorflow/tools/git/gen
 /pip_test
 /_python_build
 *.pyc
@@ -26,4 +26,11 @@ Podfile.lock
 /tensorflow/contrib/lite/gen/**
 /tensorflow/contrib/lite/examples/ios/simple/data/*.txt
 /tensorflow/contrib/lite/examples/ios/simple/data/*.tflite
-xcuserdata/**
\ No newline at end of file
+xcuserdata/**
+
+# Android
+.gradle
+.idea
+*.iml
+local.properties
+gradleBuild
diff --git a/BUILD b/BUILD
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..4bf647e47aa56cff0b3fd5af7d5df99d8b70549b 100644
--- a/BUILD
+++ b/BUILD
@@ -0,0 +1,6 @@
+exports_files(
+    [
+        "LICENSE",
+        "ACKNOWLEDGEMENTS",
+    ],
+)
diff --git a/CODEOWNERS b/CODEOWNERS
index 57a4df40e651f45dc03493af631d73332e46c182..007a304c3e706ce968576ec8979c08f1a3bcc552 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -1,53 +1,53 @@
 # NOTE: Disabled temporarily because it's too noisy on pushes.
 # Where component owners are known, add them here.
 
-#tensorflow/core/platform/windows/* @mrry
-#tensorflow/java/* @asimshankar
-#tensorflow/tensorboard/* @jart @dandelionmane
-#tensorflow/tools/docs/* @markdaoust
+# /tensorflow/core/platform/windows/ @mrry
+# /tensorflow/java/ @asimshankar
+# /tensorflow/tensorboard/ @jart @dandelionmane
+# /tensorflow/tools/docs/ @markdaoust
 
 # contrib
 
-# NEED OWNER: tensorflow/contrib/avro/*
-#tensorflow/contrib/batching/* @alextp @chrisolston
-#tensorflow/contrib/bayesflow/* @ebrevdo @rsepassi @jvdillon
-#tensorflow/contrib/boosted_trees/* @sshrdp @yk5 @nataliaponomareva
-#tensorflow/contrib/cmake/* @mrry @benoitsteiner
-#tensorflow/contrib/copy_graph/* @tucker @poxvoculi
-#tensorflow/contrib/crf/* @kentonl
-#tensorflow/contrib/data/* @mrry
-#tensorflow/contrib/distributions/* @jvdillon @langmore @rsepassi
-#tensorflow/contrib/factorization/* @agarwal-ashish @xavigonzalvo
-#tensorflow/contrib/ffmpeg/* @fredbertsch
-# NEED OWNER: tensorflow/contrib/framework/*
-#tensorflow/contrib/graph_editor/* @purpledog
-# NEED OWNER: tensorflow/contrib/grid_rnn/*
-#tensorflow/contrib/hvx/* @satok16
-#tensorflow/contrib/integrate/* @shoyer
-#tensorflow/contrib/kernel_methods/* @petrosmol
-#tensorflow/contrib/ios_examples/* @petewarden
-#tensorflow/contrib/labeled_tensor/* @shoyer
-#tensorflow/contrib/layers/* @fchollet @martinwicke
-#tensorflow/contrib/learn/* @martinwicke @ispirmustafa @alextp
-#tensorflow/contrib/linalg/* @langmore
-#tensorflow/contrib/linear_optimizer/* @petrosmol @andreasst @katsiapis
-#tensorflow/contrib/lookup/* @ysuematsu @andreasst
-#tensorflow/contrib/losses/* @alextp @ispirmustafa
-#tensorflow/contrib/makefile/* @petewarden @satok16 @wolffg
-#tensorflow/contrib/metrics/* @alextp @honkentuber @ispirmustafa
-#tensorflow/contrib/nccl/* @cwhipkey @zheng-xq
-#tensorflow/contrib/opt/* @strategist333
-#tensorflow/contrib/pi_examples/* @maciekcc
-#tensorflow/contrib/quantization/* @petewarden @cwhipkey @keveman
-#tensorflow/contrib/rnn/* @ebrevdo
-#tensorflow/contrib/saved_model/* @nfiedel @sukritiramesh
-#tensorflow/contrib/seq2seq/* @lukaszkaiser
-#tensorflow/contrib/session_bundle/* @nfiedel @sukritiramesh
-#tensorflow/contrib/slim/* @sguada @thenbasilmanran
-#tensorflow/contrib/stateless/* @girving
-#tensorflow/contrib/tensor_forest/* @gilberthendry @thomascolthurst
-#tensorflow/contrib/testing/* @dandelionmane
-#tensorflow/contrib/timeseries/* @allenlavoie
-#tensorflow/contrib/tpu/* @frankchn @saeta @jhseu
-#tensorflow/contrib/training/* @joel-shor @ebrevdo
-#tensorflow/contrib/util/* @sherrym
+# NEED OWNER: /tensorflow/contrib/avro/
+# /tensorflow/contrib/batching/ @alextp @chrisolston
+# /tensorflow/contrib/bayesflow/ @ebrevdo @rsepassi @jvdillon
+# /tensorflow/contrib/boosted_trees/ @sshrdp @yk5 @nataliaponomareva
+# /tensorflow/contrib/cmake/ @mrry @benoitsteiner
+# /tensorflow/contrib/copy_graph/ @tucker @poxvoculi
+# /tensorflow/contrib/crf/ @kentonl
+# /tensorflow/contrib/data/ @mrry
+# /tensorflow/contrib/distributions/ @jvdillon @langmore @rsepassi
+# /tensorflow/contrib/factorization/ @agarwal-ashish @xavigonzalvo
+# /tensorflow/contrib/ffmpeg/ @fredbertsch
+# NEED OWNER: /tensorflow/contrib/framework/
+# /tensorflow/contrib/graph_editor/ @purpledog
+# NEED OWNER: /tensorflow/contrib/grid_rnn/
+# /tensorflow/contrib/hvx/ @satok16
+# /tensorflow/contrib/integrate/ @shoyer
+# /tensorflow/contrib/kernel_methods/ @petrosmol
+# /tensorflow/contrib/ios_examples/ @petewarden
+# /tensorflow/contrib/labeled_tensor/ @shoyer
+# /tensorflow/contrib/layers/ @fchollet @martinwicke
+# /tensorflow/contrib/learn/ @martinwicke @ispirmustafa @alextp
+# /tensorflow/contrib/linalg/ @langmore
+# /tensorflow/contrib/linear_optimizer/ @petrosmol @andreasst @katsiapis
+# /tensorflow/contrib/lookup/ @ysuematsu @andreasst
+# /tensorflow/contrib/losses/ @alextp @ispirmustafa
+# /tensorflow/contrib/makefile/ @petewarden @satok16 @wolffg
+# /tensorflow/contrib/metrics/ @alextp @honkentuber @ispirmustafa
+# /tensorflow/contrib/nccl/ @cwhipkey @zheng-xq
+# /tensorflow/contrib/opt/ @strategist333
+# /tensorflow/contrib/pi_examples/ @maciekcc
+# /tensorflow/contrib/quantization/ @petewarden @cwhipkey @keveman
+# /tensorflow/contrib/rnn/ @ebrevdo
+# /tensorflow/contrib/saved_model/ @nfiedel @sukritiramesh
+# /tensorflow/contrib/seq2seq/ @lukaszkaiser
+# /tensorflow/contrib/session_bundle/ @nfiedel @sukritiramesh
+# /tensorflow/contrib/slim/ @sguada @thenbasilmanran
+# /tensorflow/contrib/stateless/ @girving
+# /tensorflow/contrib/tensor_forest/ @gilberthendry @thomascolthurst
+# /tensorflow/contrib/testing/ @dandelionmane
+# /tensorflow/contrib/timeseries/ @allenlavoie
+# /tensorflow/contrib/tpu/ @frankchn @saeta @jhseu
+# /tensorflow/contrib/training/ @joel-shor @ebrevdo
+# /tensorflow/contrib/util/ @sherrym
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index ff11d131409b65880f16b80f9fe38dc39ac0e5fa..5fff9d05a1c589636bc9c711e6eb7cc4aba86b2f 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -67,4 +67,4 @@ If the Project Stewards receive a report alleging a violation of the Code of Con
 
 ## Attribution
 
-This Code of Conduct is adapted from the Contributor Covenant, version 1.4, available at http://contributor-covenant.org/version/1/4, and includes some aspects of the Geek Feminism Code of Conduct and the Drupal Code of Conduct.
+This Code of Conduct is adapted from the Contributor Covenant, version 1.4, available at https://contributor-covenant.org/version/1/4, and includes some aspects of the Geek Feminism Code of Conduct and the Drupal Code of Conduct.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 1b537ca73cc94e992e7537fe69c8d0cc8fd13102..de4fded6ae6e66995aa9f1687a9d598017416f7a 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -8,8 +8,8 @@ We'd love to accept your patches! Before we can take them, we have to jump a cou
 
 Please fill out either the individual or corporate Contributor License Agreement (CLA).
 
-  * If you are an individual writing original source code and you're sure you own the intellectual property, then you'll need to sign an [individual CLA](http://code.google.com/legal/individual-cla-v1.0.html).
-  * If you work for a company that wants to allow you to contribute your work, then you'll need to sign a [corporate CLA](http://code.google.com/legal/corporate-cla-v1.0.html).
+  * If you are an individual writing original source code and you're sure you own the intellectual property, then you'll need to sign an [individual CLA](https://code.google.com/legal/individual-cla-v1.0.html).
+  * If you work for a company that wants to allow you to contribute your work, then you'll need to sign a [corporate CLA](https://code.google.com/legal/corporate-cla-v1.0.html).
 
 Follow either of the two links above to access the appropriate CLA and instructions for how to sign and return it. Once we receive it, we'll be able to accept your pull requests.
 
@@ -20,6 +20,9 @@ Follow either of the two links above to access the appropriate CLA and instructi
 If you have improvements to TensorFlow, send us your pull requests! For those
 just getting started, Github has a [howto](https://help.github.com/articles/using-pull-requests/).
 
+TensorFlow team members will be assigned to review your pull requests. Once the pull requests are approved and pass continuous integration checks, we will merge the pull requests.
+For some pull requests, we will apply the patch for each pull request to our internal version control system first, and export the change out as a new commit later, at which point the original pull request will be closed. The commits in the pull request will be squashed into a single commit with the pull request creator as the author. These pull requests will be labeled as pending merge internally.
+
 If you want to contribute but you're not sure where to start, take a look at the
 [issues with the "contributions welcome" label](https://github.com/tensorflow/tensorflow/labels/stat%3Acontributions%20welcome).
 These are issues that we believe are particularly well suited for outside
@@ -114,7 +117,7 @@ pylint --rcfile=/tmp/pylintrc myfile.py
 * [Google Java Style Guide](https://google.github.io/styleguide/javaguide.html)
 * [Google JavaScript Style Guide](https://google.github.io/styleguide/jsguide.html)
 * [Google Shell Style Guide](https://google.github.io/styleguide/shell.xml)
-* [Google Objective-C Style Guide](http://google.github.io/styleguide/objcguide.html)
+* [Google Objective-C Style Guide](https://google.github.io/styleguide/objcguide.html)
 
 #### Running sanity check
 
diff --git a/LICENSE b/LICENSE
index 15ae42140452d32ccf929f59f7eca01a3c7b555f..4862420c0234f7542d4fe8f3520516b484a64aed 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright 2017 The TensorFlow Authors.  All rights reserved.
+Copyright 2018 The TensorFlow Authors.  All rights reserved.
 
                                  Apache License
                            Version 2.0, January 2004
diff --git a/README.md b/README.md
index aff3427bddb307aea6d6c2466eac14c9edffcc32..0c93813e584d4e41fe80d50e047069b2dad8311a 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,10 @@ tracking requests and bugs. So please see
 [TensorFlow Discuss](https://groups.google.com/a/tensorflow.org/forum/#!forum/discuss) for general questions
 and discussion, and please direct specific questions to [Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow).**
 
+The TensorFlow project strives to abide by generally accepted best practices in open-source software development:
+
+[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/1486/badge)](https://bestpractices.coreinfrastructure.org/projects/1486)
+
 ## Installation
 *See [Installing TensorFlow](https://www.tensorflow.org/get_started/os_setup.html) for instructions on how to install our release binaries or how to build from source.*
 
@@ -46,11 +50,11 @@ packages on Linux, Mac, and Windows.
 
 
 **Individual whl files**
-* Linux CPU-only: [Python 2](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/)) / [Python 3.4](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=cpu-slave/))
-* Linux GPU: [Python 2](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/42/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/))
+* Linux CPU-only: [Python 2](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/)) / [Python 3.4](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=cpu-slave/)) / [Python 3.6](http://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.6,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp36-cp36m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.6,label=cpu-slave/))
+* Linux GPU: [Python 2](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/42/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/)) / [Python 3.6](http://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.6,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp36-cp36m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.6,label=gpu-linux/))
 * Mac CPU-only: [Python 2](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/)) / [Python 3](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/))
-* Windows CPU-only: [Python 3.5 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=35/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly-1.head-cp35-cp35m-win_amd64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=35/)) / [Python 3.6 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=36/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly-1.head-cp36-cp36m-win_amd64.whl) ([build history](http://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=36/))
-* Windows GPU: [Python 3.5 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=35/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly_gpu-1.head-cp35-cp35m-win_amd64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=35/)) / [Python 3.6 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=36/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly_gpu-1.head-cp36-cp36m-win_amd64.whl) ([build history](http://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=36/))
+* Windows CPU-only: [Python 3.5 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=35/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly-1.head-cp35-cp35m-win_amd64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=35/)) / [Python 3.6 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=36/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly-1.head-cp36-cp36m-win_amd64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=36/))
+* Windows GPU: [Python 3.5 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=35/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly_gpu-1.head-cp35-cp35m-win_amd64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=35/)) / [Python 3.6 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=36/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly_gpu-1.head-cp36-cp36m-win_amd64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=36/))
 * Android: [demo APK](https://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/tensorflow_demo.apk), [native libs](https://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/native/)
 ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-android/))
 
diff --git a/RELEASE.md b/RELEASE.md
index e04bd3fc505d51ade9e9fa12c822cb695e90b4f3..39fc46ac6357300ea2b3365fa4c6d432d2a206db 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,3 +1,175 @@
+# Release 1.5.0
+
+## Breaking Changes
+* Prebuilt binaries are now built against CUDA 9 and cuDNN 7.
+* Our Linux binaries are built using ubuntu 16 containers, potentially
+  introducing glibc incompatibility issues with ubuntu 14.
+* Starting from 1.6 release, our prebuilt binaries will use AVX instructions.
+  This may break TF on older CPUs.
+
+## Major Features And Improvements
+* [Eager execution](https://github.com/tensorflow/tensorflow/tree/r1.5/tensorflow/contrib/eager)
+  preview version is now available.
+* [TensorFlow Lite](https://github.com/tensorflow/tensorflow/tree/r1.5/tensorflow/contrib/lite)
+  dev preview is now available.
+* CUDA 9 and cuDNN 7 support.
+* Accelerated Linear Algebra (XLA):
+  * Add `complex64` support to XLA compiler.
+  * `bfloat` support is now added to XLA infrastructure.
+  * Make `ClusterSpec` propagation work with XLA devices.
+  * Use a determinisitic executor to generate XLA graph.
+* `tf.contrib`:
+  * `tf.contrib.distributions`:
+    * Add `tf.contrib.distributions.Autoregressive`.
+    * Make `tf.contrib.distributions` QuadratureCompound classes support batch
+    * Infer `tf.contrib.distributions.RelaxedOneHotCategorical` `dtype` from arguments.
+    * Make `tf.contrib.distributions` quadrature family parameterized by
+      `quadrature_grid_and_prob` vs `quadrature_degree`.
+    * `auto_correlation` added to `tf.contrib.distributions`
+  * Add `tf.contrib.bayesflow.layers`, a collection of probabilistic (neural) layers.
+  * Add `tf.contrib.bayesflow.halton_sequence`.
+  * Add `tf.contrib.data.make_saveable_from_iterator.`
+  * Add `tf.contrib.data.shuffle_and_repeat`.
+  * Add new custom transformation: `tf.contrib.data.scan()`.
+  * `tf.contrib.distributions.bijectors`:
+    * Add `tf.contrib.distributions.bijectors.MaskedAutoregressiveFlow`.
+    * Add `tf.contrib.distributions.bijectors.Permute`.
+    * Add `tf.contrib.distributions.bijectors.Gumbel`.
+    * Add `tf.contrib.distributions.bijectors.Reshape`.
+    * Support shape inference (i.e., shapes containing -1) in the Reshape bijector.
+* Add `streaming_precision_recall_at_equal_thresholds,` a method for computing
+  streaming precision and recall with `O(num_thresholds + size of predictions)`
+  time and space complexity.
+* Change `RunConfig` default behavior to not set a random seed, making random
+  behavior independently random on distributed workers. We expect this to
+  generally improve training performance. Models that do rely on determinism
+  should set a random seed explicitly.
+* Replaced the implementation of `tf.flags` with `absl.flags`.
+* Add support for `CUBLAS_TENSOR_OP_MATH` in fp16 GEMM
+* Add support for CUDA on NVIDIA Tegra devices
+
+## Bug Fixes and Other Changes
+* Documentation updates:
+  * Clarified that you can only install TensorFlow on 64-bit machines.
+  * Added a short doc explaining how `Estimator`s save checkpoints.
+  * Add documentation for ops supported by the `tf2xla` bridge.
+  * Fix minor typos in the doc of `SpaceToDepth` and `DepthToSpace`.
+  * Updated documentation comments in `mfcc_mel_filterbank.h` and `mfcc.h` to
+    clarify that the input domain is squared magnitude spectra and the weighting
+    is done on linear magnitude spectra (sqrt of inputs).
+  * Change `tf.contrib.distributions` docstring examples to use `tfd` alias
+    rather than `ds`, `bs`.
+  * Fix docstring typos in `tf.distributions.bijectors.Bijector`.
+  * `tf.assert_equal` no longer raises `ValueError.` It now raises
+    `InvalidArgumentError,` as documented.
+  * Update Getting Started docs and API intro.
+* Google Cloud Storage (GCS):
+  * Add userspace DNS caching for the GCS client.
+  * Customize request timeouts for the GCS filesystem.
+  * Improve GCS filesystem caching.
+* Bug Fixes:
+  * Fix bug where partitioned integer variables got their wrong shapes. Before
+  * Fix correctness bug in CPU and GPU implementations of Adadelta.
+  * Fix a bug in `import_meta_graph`'s handling of partitioned variables when
+    importing into a scope. WARNING: This may break loading checkpoints of
+    graphs with partitioned variables saved after using `import_meta_graph` with
+    a non-empty `import_scope` argument.
+  * Fix bug in offline debugger which prevented viewing events.
+  * Added the `WorkerService.DeleteWorkerSession` method to the gRPC interface,
+    to fix a memory leak. Ensure that your master and worker servers are running
+    the same version of TensorFlow to avoid compatibility issues.
+  * Fix bug in peephole implementation of BlockLSTM cell.
+  * Fix bug by casting dtype of `log_det_jacobian` to match `log_prob` in
+    `TransformedDistribution`.
+  * Fix a bug in `import_meta_graph`'s handling of partitioned variables when
+  * Ensure `tf.distributions.Multinomial` doesn't underflow in `log_prob`.
+    Before this change, all partitions of an integer variable were initialized
+    with the shape of the unpartitioned variable; after this change they are
+    initialized correctly.
+* Other:
+  * Add necessary shape util support for bfloat16.
+  * Add a way to run ops using a step function to MonitoredSession.
+  * Add `DenseFlipout` probabilistic layer.
+  * A new flag `ignore_live_threads` is available on train. If set to `True`, it
+    will ignore threads that remain running when tearing down infrastructure
+    after successfully completing training, instead of throwing a RuntimeError.
+  * Restandardize `DenseVariational` as simpler template for other probabilistic
+    layers.
+  * `tf.data` now supports `tf.SparseTensor` components in dataset elements.
+  * It is now possible to iterate over `Tensor`s.
+  * Allow `SparseSegmentReduction` ops to have missing segment IDs.
+  * Modify custom export strategy to account for multidimensional sparse float
+    splits.
+  * `Conv2D`, `Conv2DBackpropInput`, `Conv2DBackpropFilter` now supports arbitrary
+    dilations with GPU and cuDNNv6 support.
+  * `Estimator` now supports `Dataset`: `input_fn` can return a `Dataset`
+    instead of `Tensor`s.
+  * Add `RevBlock`, a memory-efficient implementation of reversible residual layers.
+  * Reduce BFCAllocator internal fragmentation.
+  * Add `cross_entropy` and `kl_divergence` to `tf.distributions.Distribution`.
+  * Add `tf.nn.softmax_cross_entropy_with_logits_v2` which enables backprop
+    w.r.t. the labels.
+  * GPU back-end now uses `ptxas` to compile generated PTX.
+  * `BufferAssignment`'s protocol buffer dump is now deterministic.
+  * Change embedding op to use parallel version of `DynamicStitch`.
+  * Add support for sparse multidimensional feature columns.
+  * Speed up the case for sparse float columns that have only 1 value.
+  * Allow sparse float splits to support multivalent feature columns.
+  * Add `quantile` to `tf.distributions.TransformedDistribution`.
+  * Add `NCHW_VECT_C` support for `tf.depth_to_space` on GPU.
+  * Add `NCHW_VECT_C` support for `tf.space_to_depth` on GPU.
+
+## API Changes
+* Rename `SqueezeDims` attribute to `Axis` in C++ API for Squeeze op.
+* `Stream::BlockHostUntilDone` now returns Status rather than bool.
+* Minor refactor: move stats files from `stochastic` to `common` and remove
+  `stochastic`.
+
+## Thanks to our Contributors
+
+This release contains contributions from many people at Google, as well as:
+
+Adam Zahran, Ag Ramesh, Alan Lee, Alan Yee, Alex Sergeev, Alexander, Amir H. Jadidinejad,
+Amy, Anastasios Doumoulakis, Andrei Costinescu, Andrei Nigmatulin, Anthony Platanios,
+Anush Elangovan, arixlin, Armen Donigian, ArtëM Sobolev, Atlas7, Ben Barsdell, Bill Prin,
+Bo Wang, Brett Koonce, Cameron Thomas, Carl Thomé, Cem Eteke, cglewis, Changming Sun,
+Charles Shenton, Chi-Hung, Chris Donahue, Chris Filo Gorgolewski, Chris Hoyean Song,
+Chris Tava, Christian Grail, Christoph Boeddeker, cinqS, Clayne Robison, codrut3, concerttttt,
+CQY, Dan Becker, Dan Jarvis, Daniel Zhang, David Norman, dmaclach, Dmitry Trifonov,
+Donggeon Lim, dongpilYu, Dr. Kashif Rasul, Edd Wilder-James, Eric Lv, fcharras, Felix Abecassis,
+FirefoxMetzger, formath, FredZhang, Gaojin Cao, Gary Deer, Guenther Schmuelling, Hanchen Li,
+Hanmin Qin, hannesa2, hyunyoung2, Ilya Edrenkin, Jackson Kontny, Jan, Javier Luraschi,
+Jay Young, Jayaram Bobba, Jeff, Jeff Carpenter, Jeremy Sharpe, Jeroen BéDorf, Jimmy Jia,
+Jinze Bai, Jiongyan Zhang, Joe Castagneri, Johan Ju, Josh Varty, Julian Niedermeier,
+JxKing, Karl Lessard, Kb Sriram, Keven Wang, Koan-Sin Tan, Kyle Mills, lanhin, LevineHuang,
+Loki Der Quaeler, Loo Rong Jie, Luke Iwanski, LáSzló Csomor, Mahdi Abavisani, Mahmoud Abuzaina,
+ManHyuk, Marek ŠUppa, MathSquared, Mats Linander, Matt Wytock, Matthew Daley, Maximilian Bachl,
+mdymczyk, melvyniandrag, Michael Case, Mike Traynor, miqlas, Namrata-Ibm, Nathan Luehr,
+Nathan Van Doorn, Noa Ezra, Nolan Liu, Oleg Zabluda, opensourcemattress, Ouwen Huang,
+Paul Van Eck, peisong, Peng Yu, PinkySan, pks, powderluv, Qiao Hai-Jun, Qiao Longfei,
+Rajendra Arora, Ralph Tang, resec, Robin Richtsfeld, Rohan Varma, Ryohei Kuroki, SaintNazaire,
+Samuel He, Sandeep Dcunha, sandipmgiri, Sang Han, scott, Scott Mudge, Se-Won Kim, Simon Perkins,
+Simone Cirillo, Steffen Schmitz, Suvojit Manna, Sylvus, Taehoon Lee, Ted Chang, Thomas Deegan,
+Till Hoffmann, Tim, Toni Kunic, Toon Verstraelen, Tristan Rice, Urs KöSter, Utkarsh Upadhyay,
+Vish (Ishaya) Abrams, Winnie Tsang, Yan Chen, Yan Facai (颜发才), Yi Yang, Yong Tang,
+Youssef Hesham, Yuan (Terry) Tang, Zhengsheng Wei, zxcqwe4906, 张志豪, 田传武 
+
+We are also grateful to all who filed issues or helped resolve them, asked and
+answered questions, and were part of inspiring discussions.
+
+# Release 1.4.1
+
+## Bug Fixes and Other Changes
+* `LinearClassifier` fix for CloudML Engine.
+
+# Release 1.4.0
+
+## Major Features And Improvements
+* `tf.keras` is now part of the core TensorFlow API.
+* [`tf.data`](http://tensorflow.org/programmers_guide/datasets) is now part of
+  the core TensorFlow API.
+  * The API is now subject to backwards compatibility guarantees.
+
 # Release 1.4.0
 
 ## Major Features And Improvements
diff --git a/WORKSPACE b/WORKSPACE
index b40913801ba8e3c8ee73f7ba69540b520ad698a6..7ae39374f18efd3bddb9aae9bb8dba5c13a61dcc 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -2,11 +2,11 @@ workspace(name = "org_tensorflow")
 
 http_archive(
     name = "io_bazel_rules_closure",
-    sha256 = "110fe68753413777944b473c25eed6368c4a0487cee23a7bac1b13cc49d3e257",
-    strip_prefix = "rules_closure-4af89ef1db659eb41f110df189b67d4cf14073e1",
+    sha256 = "6691c58a2cd30a86776dd9bb34898b041e37136f2dc7e24cadaeaf599c95c657",
+    strip_prefix = "rules_closure-08039ba8ca59f64248bb3b6ae016460fe9c9914f",
     urls = [
-        "https://mirror.bazel.build/github.com/bazelbuild/rules_closure/archive/4af89ef1db659eb41f110df189b67d4cf14073e1.tar.gz",
-        "https://github.com/bazelbuild/rules_closure/archive/4af89ef1db659eb41f110df189b67d4cf14073e1.tar.gz",  # 2017-08-28
+        "https://mirror.bazel.build/github.com/bazelbuild/rules_closure/archive/08039ba8ca59f64248bb3b6ae016460fe9c9914f.tar.gz",
+        "https://github.com/bazelbuild/rules_closure/archive/08039ba8ca59f64248bb3b6ae016460fe9c9914f.tar.gz",  # 2018-01-16
     ],
 )
 
diff --git a/configure.py b/configure.py
index cf562bdee8ef288e4c2938f50e5c6366ce05ccff..cf16ef483763733cc12c838ea92b144c6493f0b1 100644
--- a/configure.py
+++ b/configure.py
@@ -34,8 +34,10 @@ except ImportError:
 
 _TF_BAZELRC = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                            '.tf_configure.bazelrc')
-_DEFAULT_CUDA_VERSION = '8.0'
-_DEFAULT_CUDNN_VERSION = '6'
+_TF_WORKSPACE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                             'WORKSPACE')
+_DEFAULT_CUDA_VERSION = '9.0'
+_DEFAULT_CUDNN_VERSION = '7'
 _DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,5.2'
 _DEFAULT_CUDA_PATH = '/usr/local/cuda'
 _DEFAULT_CUDA_PATH_LINUX = '/opt/cuda'
@@ -44,6 +46,13 @@ _DEFAULT_CUDA_PATH_WIN = ('C:/Program Files/NVIDIA GPU Computing '
 _TF_OPENCL_VERSION = '1.2'
 _DEFAULT_COMPUTECPP_TOOLKIT_PATH = '/usr/local/computecpp'
 _DEFAULT_TRISYCL_INCLUDE_DIR = '/usr/local/triSYCL/include'
+_SUPPORTED_ANDROID_NDK_VERSIONS = [10, 11, 12, 13, 14, 15]
+
+_DEFAULT_PROMPT_ASK_ATTEMPTS = 10
+
+
+class UserInputError(Exception):
+  pass
 
 
 def is_windows():
@@ -158,7 +167,7 @@ def get_python_path(environ_cp, python_bin_path):
   try:
     library_paths = run_shell(
         [python_bin_path, '-c',
-         'import site; print("\\n".join(site.getsitepackages()))']).split("\n")
+         'import site; print("\\n".join(site.getsitepackages()))']).split('\n')
   except subprocess.CalledProcessError:
     library_paths = [run_shell(
         [python_bin_path, '-c',
@@ -256,19 +265,6 @@ def reset_tf_configure_bazelrc():
     f.write('import %workspace%/.tf_configure.bazelrc\n')
 
 
-def run_gen_git_source(environ_cp):
-  """Run the gen_git_source to create links.
-
-  The links are for bazel to track dependencies for git hash propagation.
-
-  Args:
-    environ_cp: copy of the os.environ.
-  """
-  cmd = '"%s" tensorflow/tools/git/gen_git_source.py --configure %s' % (
-      environ_cp.get('PYTHON_BIN_PATH'), os.getcwd())
-  os.system(cmd)
-
-
 def cleanup_makefile():
   """Delete any leftover BUILD files from the Makefile build.
 
@@ -306,6 +302,12 @@ def get_var(environ_cp,
 
   Returns:
     boolean value of the variable.
+
+  Raises:
+    UserInputError: if an environment variable is set, but it cannot be
+      interpreted as a boolean indicator, assume that the user has made a
+      scripting error, and will continue to provide invalid input.
+      Raise the error to avoid infinitely looping.
   """
   if not question:
     question = 'Do you wish to build TensorFlow with %s support?' % query_item
@@ -323,6 +325,23 @@ def get_var(environ_cp,
     question += ' [y/N]: '
 
   var = environ_cp.get(var_name)
+  if var is not None:
+    var_content = var.strip().lower()
+    true_strings = ('1', 't', 'true', 'y', 'yes')
+    false_strings = ('0', 'f', 'false', 'n', 'no')
+    if var_content in true_strings:
+      var = True
+    elif var_content in false_strings:
+      var = False
+    else:
+      raise UserInputError(
+          'Environment variable %s must be set as a boolean indicator.\n'
+          'The following are accepted as TRUE : %s.\n'
+          'The following are accepted as FALSE: %s.\n'
+          'Current value is %s.' % (
+              var_name, ', '.join(true_strings), ', '.join(false_strings),
+              var))
+
   while var is None:
     user_input_origin = get_input(question)
     user_input = user_input_origin.strip().lower()
@@ -509,6 +528,21 @@ def set_tf_cuda_clang(environ_cp):
       no_reply=no_reply)
 
 
+def set_tf_download_clang(environ_cp):
+  """Set TF_DOWNLOAD_CLANG action_env."""
+  question = 'Do you want to download a fresh release of clang? (Experimental)'
+  yes_reply = 'Clang will be downloaded and used to compile tensorflow.'
+  no_reply = 'Clang will not be downloaded.'
+  set_action_env_var(
+      environ_cp,
+      'TF_DOWNLOAD_CLANG',
+      None,
+      False,
+      question=question,
+      yes_reply=yes_reply,
+      no_reply=no_reply)
+
+
 def get_from_env_or_user_or_default(environ_cp, var_name, ask_for_var,
                                     var_default):
   """Get var_name either from env, or user or default.
@@ -557,6 +591,219 @@ def set_clang_cuda_compiler_path(environ_cp):
                               clang_cuda_compiler_path)
 
 
+def prompt_loop_or_load_from_env(
+    environ_cp,
+    var_name,
+    var_default,
+    ask_for_var,
+    check_success,
+    error_msg,
+    suppress_default_error=False,
+    n_ask_attempts=_DEFAULT_PROMPT_ASK_ATTEMPTS
+):
+  """Loop over user prompts for an ENV param until receiving a valid response.
+
+  For the env param var_name, read from the environment or verify user input
+  until receiving valid input. When done, set var_name in the environ_cp to its
+  new value.
+
+  Args:
+    environ_cp: (Dict) copy of the os.environ.
+    var_name: (String) string for name of environment variable, e.g. "TF_MYVAR".
+    var_default: (String) default value string.
+    ask_for_var: (String) string for how to ask for user input.
+    check_success: (Function) function that takes one argument and returns a
+      boolean. Should return True if the value provided is considered valid. May
+      contain a complex error message if error_msg does not provide enough
+      information. In that case, set suppress_default_error to True.
+    error_msg: (String) String with one and only one '%s'. Formatted with each
+      invalid response upon check_success(input) failure.
+    suppress_default_error: (Bool) Suppress the above error message in favor of
+      one from the check_success function.
+    n_ask_attempts: (Integer) Number of times to query for valid input before
+      raising an error and quitting.
+
+  Returns:
+    [String] The value of var_name after querying for input.
+
+  Raises:
+    UserInputError: if a query has been attempted n_ask_attempts times without
+      success, assume that the user has made a scripting error, and will
+      continue to provide invalid input. Raise the error to avoid infinitely
+      looping.
+  """
+  default = environ_cp.get(var_name) or var_default
+  full_query = '%s [Default is %s]: ' % (
+      ask_for_var,
+      default,
+  )
+
+  for _ in range(n_ask_attempts):
+    val = get_from_env_or_user_or_default(environ_cp,
+                                          var_name,
+                                          full_query,
+                                          default)
+    if check_success(val):
+      break
+    if not suppress_default_error:
+      print(error_msg % val)
+    environ_cp[var_name] = ''
+  else:
+    raise UserInputError('Invalid %s setting was provided %d times in a row. '
+                         'Assuming to be a scripting mistake.' %
+                         (var_name, n_ask_attempts))
+
+  environ_cp[var_name] = val
+  return val
+
+
+def create_android_ndk_rule(environ_cp):
+  """Set ANDROID_NDK_HOME and write Android NDK WORKSPACE rule."""
+  if is_windows() or is_cygwin():
+    default_ndk_path = cygpath('%s/Android/Sdk/ndk-bundle' %
+                               environ_cp['APPDATA'])
+  elif is_macos():
+    default_ndk_path = '%s/library/Android/Sdk/ndk-bundle' % environ_cp['HOME']
+  else:
+    default_ndk_path = '%s/Android/Sdk/ndk-bundle' % environ_cp['HOME']
+
+  def valid_ndk_path(path):
+    return (os.path.exists(path) and
+            os.path.exists(os.path.join(path, 'source.properties')))
+
+  android_ndk_home_path = prompt_loop_or_load_from_env(
+      environ_cp,
+      var_name='ANDROID_NDK_HOME',
+      var_default=default_ndk_path,
+      ask_for_var='Please specify the home path of the Android NDK to use.',
+      check_success=valid_ndk_path,
+      error_msg=('The path %s or its child file "source.properties" '
+                 'does not exist.')
+  )
+
+  write_android_ndk_workspace_rule(android_ndk_home_path)
+
+
+def create_android_sdk_rule(environ_cp):
+  """Set Android variables and write Android SDK WORKSPACE rule."""
+  if is_windows() or is_cygwin():
+    default_sdk_path = cygpath('%s/Android/Sdk' % environ_cp['APPDATA'])
+  elif is_macos():
+    default_sdk_path = '%s/library/Android/Sdk/ndk-bundle' % environ_cp['HOME']
+  else:
+    default_sdk_path = '%s/Android/Sdk' % environ_cp['HOME']
+
+  def valid_sdk_path(path):
+    return (os.path.exists(path) and
+            os.path.exists(os.path.join(path, 'platforms')) and
+            os.path.exists(os.path.join(path, 'build-tools')))
+
+  android_sdk_home_path = prompt_loop_or_load_from_env(
+      environ_cp,
+      var_name='ANDROID_SDK_HOME',
+      var_default=default_sdk_path,
+      ask_for_var='Please specify the home path of the Android SDK to use.',
+      check_success=valid_sdk_path,
+      error_msg=('Either %s does not exist, or it does not contain the '
+                 'subdirectories "platforms" and "build-tools".'))
+
+  platforms = os.path.join(android_sdk_home_path, 'platforms')
+  api_levels = sorted(os.listdir(platforms))
+  api_levels = [x.replace('android-', '') for x in api_levels]
+
+  def valid_api_level(api_level):
+    return os.path.exists(os.path.join(android_sdk_home_path,
+                                       'platforms',
+                                       'android-' + api_level))
+
+  android_api_level = prompt_loop_or_load_from_env(
+      environ_cp,
+      var_name='ANDROID_API_LEVEL',
+      var_default=api_levels[-1],
+      ask_for_var=('Please specify the Android SDK API level to use. '
+                   '[Available levels: %s]') % api_levels,
+      check_success=valid_api_level,
+      error_msg='Android-%s is not present in the SDK path.')
+
+  build_tools = os.path.join(android_sdk_home_path, 'build-tools')
+  versions = sorted(os.listdir(build_tools))
+
+  def valid_build_tools(version):
+    return os.path.exists(os.path.join(android_sdk_home_path,
+                                       'build-tools',
+                                       version))
+
+  android_build_tools_version = prompt_loop_or_load_from_env(
+      environ_cp,
+      var_name='ANDROID_BUILD_TOOLS_VERSION',
+      var_default=versions[-1],
+      ask_for_var=('Please specify an Android build tools version to use. '
+                   '[Available versions: %s]') % versions,
+      check_success=valid_build_tools,
+      error_msg=('The selected SDK does not have build-tools version %s '
+                 'available.'))
+
+  write_android_sdk_workspace_rule(android_sdk_home_path,
+                                   android_build_tools_version,
+                                   android_api_level)
+
+
+def write_android_sdk_workspace_rule(android_sdk_home_path,
+                                     android_build_tools_version,
+                                     android_api_level):
+  print('Writing android_sdk_workspace rule.\n')
+  with open(_TF_WORKSPACE, 'a') as f:
+    f.write("""
+android_sdk_repository(
+  name="androidsdk",
+  api_level=%s,
+  path="%s",
+  build_tools_version="%s")\n
+""" % (android_api_level, android_sdk_home_path, android_build_tools_version))
+
+
+def write_android_ndk_workspace_rule(android_ndk_home_path):
+  print('Writing android_ndk_workspace rule.')
+  ndk_api_level = check_ndk_level(android_ndk_home_path)
+  if int(ndk_api_level) not in _SUPPORTED_ANDROID_NDK_VERSIONS:
+    print('WARNING: The API level of the NDK in %s is %s, which is not '
+          'supported by Bazel (officially supported versions: %s). Please use '
+          'another version. Compiling Android targets may result in confusing '
+          'errors.\n' % (android_ndk_home_path, ndk_api_level,
+                         _SUPPORTED_ANDROID_NDK_VERSIONS))
+  with open(_TF_WORKSPACE, 'a') as f:
+    f.write("""
+android_ndk_repository(
+  name="androidndk",
+  path="%s",
+  api_level=%s)\n
+""" % (android_ndk_home_path, ndk_api_level))
+
+
+def check_ndk_level(android_ndk_home_path):
+  """Check the revision number of an Android NDK path."""
+  properties_path = '%s/source.properties' % android_ndk_home_path
+  if is_windows() or is_cygwin():
+    properties_path = cygpath(properties_path)
+  with open(properties_path, 'r') as f:
+    filedata = f.read()
+
+  revision = re.search(r'Pkg.Revision = (\d+)', filedata)
+  if revision:
+    return revision.group(1)
+  return None
+
+
+def workspace_has_any_android_rule():
+  """Check the WORKSPACE for existing android_*_repository rules."""
+  with open(_TF_WORKSPACE, 'r') as f:
+    workspace = f.read()
+  has_any_rule = re.search(r'^android_[ns]dk_repository',
+                           workspace,
+                           re.MULTILINE)
+  return has_any_rule
+
+
 def set_gcc_host_compiler_path(environ_cp):
   """Set GCC_HOST_COMPILER_PATH."""
   default_gcc_host_compiler_path = which('gcc') or ''
@@ -566,23 +813,16 @@ def set_gcc_host_compiler_path(environ_cp):
     # os.readlink is only available in linux
     default_gcc_host_compiler_path = os.path.realpath(cuda_bin_symlink)
 
-  ask_gcc_path = (
-      'Please specify which gcc should be used by nvcc as the '
-      'host compiler. [Default is %s]: ') % default_gcc_host_compiler_path
-  while True:
-    gcc_host_compiler_path = get_from_env_or_user_or_default(
-        environ_cp, 'GCC_HOST_COMPILER_PATH', ask_gcc_path,
-        default_gcc_host_compiler_path)
-
-    if os.path.exists(gcc_host_compiler_path):
-      break
-
-    # Reset and retry
-    print('Invalid gcc path. %s cannot be found' % gcc_host_compiler_path)
-    environ_cp['GCC_HOST_COMPILER_PATH'] = ''
+  gcc_host_compiler_path = prompt_loop_or_load_from_env(
+      environ_cp,
+      var_name='GCC_HOST_COMPILER_PATH',
+      var_default=default_gcc_host_compiler_path,
+      ask_for_var=
+      'Please specify which gcc should be used by nvcc as the host compiler.',
+      check_success=os.path.exists,
+      error_msg='Invalid gcc path. %s cannot be found.',
+  )
 
-  # Set GCC_HOST_COMPILER_PATH
-  environ_cp['GCC_HOST_COMPILER_PATH'] = gcc_host_compiler_path
   write_action_env_to_bazelrc('GCC_HOST_COMPILER_PATH', gcc_host_compiler_path)
 
 
@@ -592,7 +832,7 @@ def set_tf_cuda_version(environ_cp):
       'Please specify the CUDA SDK version you want to use, '
       'e.g. 7.0. [Leave empty to default to CUDA %s]: ') % _DEFAULT_CUDA_VERSION
 
-  while True:
+  for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS):
     # Configure the Cuda SDK version to use.
     tf_cuda_version = get_from_env_or_user_or_default(
         environ_cp, 'TF_CUDA_VERSION', ask_cuda_version, _DEFAULT_CUDA_VERSION)
@@ -630,6 +870,11 @@ def set_tf_cuda_version(environ_cp):
     environ_cp['TF_CUDA_VERSION'] = ''
     environ_cp['CUDA_TOOLKIT_PATH'] = ''
 
+  else:
+    raise UserInputError('Invalid TF_CUDA_SETTING setting was provided %d '
+                         'times in a row. Assuming to be a scripting mistake.' %
+                         _DEFAULT_PROMPT_ASK_ATTEMPTS)
+
   # Set CUDA_TOOLKIT_PATH and TF_CUDA_VERSION
   environ_cp['CUDA_TOOLKIT_PATH'] = cuda_toolkit_path
   write_action_env_to_bazelrc('CUDA_TOOLKIT_PATH', cuda_toolkit_path)
@@ -643,7 +888,7 @@ def set_tf_cudnn_version(environ_cp):
       'Please specify the cuDNN version you want to use. '
       '[Leave empty to default to cuDNN %s.0]: ') % _DEFAULT_CUDNN_VERSION
 
-  while True:
+  for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS):
     tf_cudnn_version = get_from_env_or_user_or_default(
         environ_cp, 'TF_CUDNN_VERSION', ask_cudnn_version,
         _DEFAULT_CUDNN_VERSION)
@@ -702,6 +947,10 @@ def set_tf_cudnn_version(environ_cp):
       print('%s.%s' % (cudnn_path_from_ldconfig, tf_cudnn_version))
 
     environ_cp['TF_CUDNN_VERSION'] = ''
+  else:
+    raise UserInputError('Invalid TF_CUDNN setting was provided %d '
+                         'times in a row. Assuming to be a scripting mistake.' %
+                         _DEFAULT_PROMPT_ASK_ATTEMPTS)
 
   # Set CUDNN_INSTALL_PATH and TF_CUDNN_VERSION
   environ_cp['CUDNN_INSTALL_PATH'] = cudnn_install_path
@@ -810,90 +1059,83 @@ def set_other_cuda_vars(environ_cp):
 def set_host_cxx_compiler(environ_cp):
   """Set HOST_CXX_COMPILER."""
   default_cxx_host_compiler = which('g++') or ''
-  ask_cxx_host_compiler = (
-      'Please specify which C++ compiler should be used as'
-      ' the host C++ compiler. [Default is %s]: ') % default_cxx_host_compiler
 
-  while True:
-    host_cxx_compiler = get_from_env_or_user_or_default(
-        environ_cp, 'HOST_CXX_COMPILER', ask_cxx_host_compiler,
-        default_cxx_host_compiler)
-    if os.path.exists(host_cxx_compiler):
-      break
-
-    # Reset and retry
-    print('Invalid C++ compiler path. %s cannot be found' % host_cxx_compiler)
-    environ_cp['HOST_CXX_COMPILER'] = ''
+  host_cxx_compiler = prompt_loop_or_load_from_env(
+      environ_cp,
+      var_name='HOST_CXX_COMPILER',
+      var_default=default_cxx_host_compiler,
+      ask_for_var=('Please specify which C++ compiler should be used as the '
+                   'host C++ compiler.'),
+      check_success=os.path.exists,
+      error_msg='Invalid C++ compiler path. %s cannot be found.',
+  )
 
-  # Set HOST_CXX_COMPILER
-  environ_cp['HOST_CXX_COMPILER'] = host_cxx_compiler
   write_action_env_to_bazelrc('HOST_CXX_COMPILER', host_cxx_compiler)
 
 
 def set_host_c_compiler(environ_cp):
   """Set HOST_C_COMPILER."""
   default_c_host_compiler = which('gcc') or ''
-  ask_c_host_compiler = (
-      'Please specify which C compiler should be used as the'
-      ' host C compiler. [Default is %s]: ') % default_c_host_compiler
-
-  while True:
-    host_c_compiler = get_from_env_or_user_or_default(
-        environ_cp, 'HOST_C_COMPILER', ask_c_host_compiler,
-        default_c_host_compiler)
-    if os.path.exists(host_c_compiler):
-      break
 
-    # Reset and retry
-    print('Invalid C compiler path. %s cannot be found' % host_c_compiler)
-    environ_cp['HOST_C_COMPILER'] = ''
+  host_c_compiler = prompt_loop_or_load_from_env(
+      environ_cp,
+      var_name='HOST_C_COMPILER',
+      var_default=default_c_host_compiler,
+      ask_for_var=('Please specify which C compiler should be used as the host'
+                   'C compiler.'),
+      check_success=os.path.exists,
+      error_msg='Invalid C compiler path. %s cannot be found.',
+  )
 
-  # Set HOST_C_COMPILER
-  environ_cp['HOST_C_COMPILER'] = host_c_compiler
   write_action_env_to_bazelrc('HOST_C_COMPILER', host_c_compiler)
 
 
 def set_computecpp_toolkit_path(environ_cp):
   """Set COMPUTECPP_TOOLKIT_PATH."""
-  ask_computecpp_toolkit_path = ('Please specify the location where ComputeCpp '
-                                 'for SYCL %s is installed. [Default is %s]: '
-                                ) % (_TF_OPENCL_VERSION,
-                                     _DEFAULT_COMPUTECPP_TOOLKIT_PATH)
 
-  while True:
-    computecpp_toolkit_path = get_from_env_or_user_or_default(
-        environ_cp, 'COMPUTECPP_TOOLKIT_PATH', ask_computecpp_toolkit_path,
-        _DEFAULT_COMPUTECPP_TOOLKIT_PATH)
+  def toolkit_exists(toolkit_path):
+    """Check if a computecpp toolkit path is valid."""
     if is_linux():
       sycl_rt_lib_path = 'lib/libComputeCpp.so'
     else:
       sycl_rt_lib_path = ''
 
-    sycl_rt_lib_path_full = os.path.join(computecpp_toolkit_path,
+    sycl_rt_lib_path_full = os.path.join(toolkit_path,
                                          sycl_rt_lib_path)
-    if os.path.exists(sycl_rt_lib_path_full):
-      break
+    exists = os.path.exists(sycl_rt_lib_path_full)
+    if not exists:
+      print('Invalid SYCL %s library path. %s cannot be found' %
+            (_TF_OPENCL_VERSION, sycl_rt_lib_path_full))
+    return exists
 
-    print('Invalid SYCL %s library path. %s cannot be found' %
-          (_TF_OPENCL_VERSION, sycl_rt_lib_path_full))
-    environ_cp['COMPUTECPP_TOOLKIT_PATH'] = ''
+  computecpp_toolkit_path = prompt_loop_or_load_from_env(
+      environ_cp,
+      var_name='COMPUTECPP_TOOLKIT_PATH',
+      var_default=_DEFAULT_COMPUTECPP_TOOLKIT_PATH,
+      ask_for_var=(
+          'Please specify the location where ComputeCpp for SYCL %s is '
+          'installed.' % _TF_OPENCL_VERSION),
+      check_success=toolkit_exists,
+      error_msg='Invalid SYCL compiler path. %s cannot be found.',
+      suppress_default_error=True)
 
-  # Set COMPUTECPP_TOOLKIT_PATH
-  environ_cp['COMPUTECPP_TOOLKIT_PATH'] = computecpp_toolkit_path
   write_action_env_to_bazelrc('COMPUTECPP_TOOLKIT_PATH',
                               computecpp_toolkit_path)
 
+
 def set_trisycl_include_dir(environ_cp):
-  """Set TRISYCL_INCLUDE_DIR"""
+  """Set TRISYCL_INCLUDE_DIR."""
+
   ask_trisycl_include_dir = ('Please specify the location of the triSYCL '
                              'include directory. (Use --config=sycl_trisycl '
                              'when building with Bazel) '
                              '[Default is %s]: '
-                             ) % (_DEFAULT_TRISYCL_INCLUDE_DIR)
+                            ) % (_DEFAULT_TRISYCL_INCLUDE_DIR)
+
   while True:
     trisycl_include_dir = get_from_env_or_user_or_default(
-      environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir,
-      _DEFAULT_TRISYCL_INCLUDE_DIR)
+        environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir,
+        _DEFAULT_TRISYCL_INCLUDE_DIR)
     if os.path.exists(trisycl_include_dir):
       break
 
@@ -905,50 +1147,30 @@ def set_trisycl_include_dir(environ_cp):
   write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR',
                               trisycl_include_dir)
 
-def set_trisycl_include_dir(environ_cp):
-  """Set TRISYCL_INCLUDE_DIR."""
-  ask_trisycl_include_dir = ('Please specify the location of the triSYCL '
-                             'include directory. (Use --config=sycl_trisycl '
-                             'when building with Bazel) '
-                             '[Default is %s]: ') % (
-                                 _DEFAULT_TRISYCL_INCLUDE_DIR)
-  while True:
-    trisycl_include_dir = get_from_env_or_user_or_default(
-        environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir,
-        _DEFAULT_TRISYCL_INCLUDE_DIR)
-    if os.path.exists(trisycl_include_dir):
-      break
-
-    print('Invalid triSYCL include directory, %s cannot be found' %
-          (trisycl_include_dir))
-
-  # Set TRISYCL_INCLUDE_DIR
-  environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir
-  write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', trisycl_include_dir)
-
 
 def set_mpi_home(environ_cp):
   """Set MPI_HOME."""
+
   default_mpi_home = which('mpirun') or which('mpiexec') or ''
   default_mpi_home = os.path.dirname(os.path.dirname(default_mpi_home))
 
-  ask_mpi_home = ('Please specify the MPI toolkit folder. [Default is %s]: '
-                 ) % default_mpi_home
-  while True:
-    mpi_home = get_from_env_or_user_or_default(environ_cp, 'MPI_HOME',
-                                               ask_mpi_home, default_mpi_home)
-
-    if os.path.exists(os.path.join(mpi_home, 'include')) and os.path.exists(
-        os.path.join(mpi_home, 'lib')):
-      break
-
-    print('Invalid path to the MPI Toolkit. %s or %s cannot be found' %
-          (os.path.join(mpi_home, 'include'),
-           os.path.exists(os.path.join(mpi_home, 'lib'))))
-    environ_cp['MPI_HOME'] = ''
+  def valid_mpi_path(mpi_home):
+    exists = (os.path.exists(os.path.join(mpi_home, 'include')) and
+              os.path.exists(os.path.join(mpi_home, 'lib')))
+    if not exists:
+      print('Invalid path to the MPI Toolkit. %s or %s cannot be found' %
+            (os.path.join(mpi_home, 'include'),
+             os.path.exists(os.path.join(mpi_home, 'lib'))))
+    return exists
 
-  # Set MPI_HOME
-  environ_cp['MPI_HOME'] = str(mpi_home)
+  _ = prompt_loop_or_load_from_env(
+      environ_cp,
+      var_name='MPI_HOME',
+      var_default=default_mpi_home,
+      ask_for_var='Please specify the MPI toolkit folder.',
+      check_success=valid_mpi_path,
+      error_msg='',
+      suppress_default_error=True)
 
 
 def set_other_mpi_vars(environ_cp):
@@ -983,47 +1205,25 @@ def set_other_mpi_vars(environ_cp):
     raise ValueError('Cannot find the MPI library file in %s/lib' % mpi_home)
 
 
-def set_mkl():
-  write_to_bazelrc('build:mkl --define using_mkl=true')
-  write_to_bazelrc('build:mkl -c opt')
-  print(
-      'Add "--config=mkl" to your bazel command to build with MKL '
-      'support.\nPlease note that MKL on MacOS or windows is still not '
-      'supported.\nIf you would like to use a local MKL instead of '
-      'downloading, please set the environment variable \"TF_MKL_ROOT\" every '
-      'time before build.')
-
-
-def set_monolithic():
-  # Add --config=monolithic to your bazel command to use a mostly-static
-  # build and disable modular op registration support (this will revert to
-  # loading TensorFlow with RTLD_GLOBAL in Python). By default (without
-  # --config=monolithic), TensorFlow will build with a dependence on
-  # //tensorflow:libtensorflow_framework.so.
-  write_to_bazelrc('build:monolithic --define framework_shared_object=false')
-  # For projects which use TensorFlow as part of a Bazel build process, putting
-  # nothing in a bazelrc will default to a monolithic build. The following line
-  # opts in to modular op registration support by default:
-  write_to_bazelrc('build --define framework_shared_object=true')
-
-
-def create_android_bazelrc_configs():
-  # Flags for --config=android
-  write_to_bazelrc('build:android --crosstool_top=//external:android/crosstool')
-  write_to_bazelrc(
-      'build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain')
-  # Flags for --config=android_arm
-  write_to_bazelrc('build:android_arm --config=android')
-  write_to_bazelrc('build:android_arm --cpu=armeabi-v7a')
-  # Flags for --config=android_arm64
-  write_to_bazelrc('build:android_arm64 --config=android')
-  write_to_bazelrc('build:android_arm64 --cpu=arm64-v8a')
-
-
 def set_grpc_build_flags():
   write_to_bazelrc('build --define grpc_no_ares=true')
 
 
+def set_windows_build_flags():
+  if is_windows():
+    # The non-monolithic build is not supported yet
+    write_to_bazelrc('build --config monolithic')
+    # Suppress warning messages
+    write_to_bazelrc('build --copt=-w --host_copt=-w')
+    # Output more verbose information when something goes wrong
+    write_to_bazelrc('build --verbose_failures')
+
+
+def config_info_line(name, help_text):
+  """Helper function to print formatted help text for Bazel config options."""
+  print('\t--config=%-12s\t# %s' % (name, help_text))
+
+
 def main():
   # Make a copy of os.environ to be clear when functions and getting and setting
   # environment variables.
@@ -1034,7 +1234,6 @@ def main():
   reset_tf_configure_bazelrc()
   cleanup_makefile()
   setup_python(environ_cp)
-  run_gen_git_source(environ_cp)
 
   if is_windows():
     environ_cp['TF_NEED_S3'] = '0'
@@ -1083,8 +1282,19 @@ def main():
 
     set_tf_cuda_clang(environ_cp)
     if environ_cp.get('TF_CUDA_CLANG') == '1':
-      # Set up which clang we should use as the cuda / host compiler.
-      set_clang_cuda_compiler_path(environ_cp)
+      if not is_windows():
+        # Ask if we want to download clang release while building.
+        set_tf_download_clang(environ_cp)
+      else:
+        # We use bazel's generated crosstool on Windows and there is no
+        # way to provide downloaded toolchain for that yet.
+        # TODO(ibiryukov): Investigate using clang as a cuda compiler on
+        # Windows.
+        environ_cp['TF_DOWNLOAD_CLANG'] = '0'
+
+      if environ_cp.get('TF_DOWNLOAD_CLANG') != '1':
+        # Set up which clang we should use as the cuda / host compiler.
+        set_clang_cuda_compiler_path(environ_cp)
     else:
       # Set up which gcc nvcc should use as the host compiler
       # No need to set this on Windows
@@ -1099,9 +1309,29 @@ def main():
 
   set_grpc_build_flags()
   set_cc_opt_flags(environ_cp)
-  set_mkl()
-  set_monolithic()
-  create_android_bazelrc_configs()
+  set_windows_build_flags()
+
+  if workspace_has_any_android_rule():
+    print('The WORKSPACE file has at least one of ["android_sdk_repository", '
+          '"android_ndk_repository"] already set. Will not ask to help '
+          'configure the WORKSPACE. Please delete the existing rules to '
+          'activate the helper.\n')
+  else:
+    if get_var(
+        environ_cp, 'TF_SET_ANDROID_WORKSPACE', 'android workspace',
+        False,
+        ('Would you like to interactively configure ./WORKSPACE for '
+         'Android builds?'),
+        'Searching for NDK and SDK installations.',
+        'Not configuring the WORKSPACE for Android builds.'):
+      create_android_ndk_rule(environ_cp)
+      create_android_sdk_rule(environ_cp)
+
+  print('Preconfigured Bazel build configs. You can use any of the below by '
+        'adding "--config=<>" to your build command. See tools/bazel.rc for '
+        'more details.')
+  config_info_line('mkl', 'Build with MKL support.')
+  config_info_line('monolithic', 'Config for mostly static monolithic build.')
 
 if __name__ == '__main__':
   main()
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index bfebe8a5678a2c0508b31f5dd898eac22186a072..da37564697a7159518a6ba71271f911713e3e58e 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -364,11 +364,9 @@ config_setting(
     visibility = ["//visibility:public"],
 )
 
-# Make a dummy rule that we can change "default" in select statements to.
-# to disable dependencies in copybara.
 config_setting(
-    name = "dummy_disabled_internal",
-    values = {"define": "with_dummy_disabled_internal=true"},
+    name = "override_eigen_strong_inline",
+    values = {"define": "override_eigen_strong_inline=true"},
     visibility = ["//visibility:public"],
 )
 
@@ -409,6 +407,8 @@ filegroup(
         "//tensorflow/c:all_files",
         "//tensorflow/cc:all_files",
         "//tensorflow/cc/saved_model:all_files",
+        "//tensorflow/cc/saved_model/python:all_files",
+        "//tensorflow/cc/tools:all_files",
         "//tensorflow/compiler/aot:all_files",
         "//tensorflow/compiler/aot/tests:all_files",
         "//tensorflow/compiler/jit:all_files",
@@ -427,6 +427,7 @@ filegroup(
         "//tensorflow/compiler/xla/client:all_files",
         "//tensorflow/compiler/xla/client/lib:all_files",
         "//tensorflow/compiler/xla/legacy_flags:all_files",
+        "//tensorflow/compiler/xla/python:all_files",
         "//tensorflow/compiler/xla/service:all_files",
         "//tensorflow/compiler/xla/service/cpu:all_files",
         "//tensorflow/compiler/xla/service/gpu:all_files",
@@ -452,6 +453,7 @@ filegroup(
         "//tensorflow/contrib/cloud:all_files",
         "//tensorflow/contrib/cloud/kernels:all_files",
         "//tensorflow/contrib/cluster_resolver:all_files",
+        "//tensorflow/contrib/coder:all_files",
         "//tensorflow/contrib/compiler:all_files",
         "//tensorflow/contrib/copy_graph:all_files",
         "//tensorflow/contrib/crf:all_files",
@@ -461,10 +463,13 @@ filegroup(
         "//tensorflow/contrib/data/python/kernel_tests:all_files",
         "//tensorflow/contrib/data/python/ops:all_files",
         "//tensorflow/contrib/decision_trees/proto:all_files",
+        "//tensorflow/contrib/deprecated:all_files",
         "//tensorflow/contrib/distributions:all_files",
+        "//tensorflow/contrib/eager/proto:all_files",
         "//tensorflow/contrib/eager/python:all_files",
         "//tensorflow/contrib/estimator:all_files",
         "//tensorflow/contrib/factorization:all_files",
+        "//tensorflow/contrib/factorization/examples:all_files",
         "//tensorflow/contrib/factorization/kernels:all_files",
         "//tensorflow/contrib/ffmpeg:all_files",
         "//tensorflow/contrib/ffmpeg/default:all_files",
@@ -475,6 +480,7 @@ filegroup(
         "//tensorflow/contrib/graph_editor:all_files",
         "//tensorflow/contrib/grid_rnn:all_files",
         "//tensorflow/contrib/hooks:all_files",
+        "//tensorflow/contrib/hvx/clock_cycle_profiling:all_files",
         "//tensorflow/contrib/hvx/hvx_ops_support_checker:all_files",
         "//tensorflow/contrib/image:all_files",
         "//tensorflow/contrib/input_pipeline:all_files",
@@ -492,6 +498,8 @@ filegroup(
         "//tensorflow/contrib/layers/kernels:all_files",
         "//tensorflow/contrib/learn:all_files",
         "//tensorflow/contrib/learn/python/learn/datasets:all_files",
+        "//tensorflow/contrib/legacy_seq2seq:all_files",
+        "//tensorflow/contrib/libsvm:all_files",
         "//tensorflow/contrib/linalg:all_files",
         "//tensorflow/contrib/linear_optimizer:all_files",
         "//tensorflow/contrib/lite:all_files",
@@ -516,15 +524,22 @@ filegroup(
         "//tensorflow/contrib/lookup:all_files",
         "//tensorflow/contrib/losses:all_files",
         "//tensorflow/contrib/makefile:all_files",
+        "//tensorflow/contrib/memory_stats:all_files",
         "//tensorflow/contrib/meta_graph_transform:all_files",
         "//tensorflow/contrib/metrics:all_files",
         "//tensorflow/contrib/model_pruning:all_files",
-        "//tensorflow/contrib/mpi_collectives:all_files",
+        "//tensorflow/contrib/model_pruning/examples/cifar10:all_files",
+        "//tensorflow/contrib/nccl:all_files",
         "//tensorflow/contrib/ndlstm:all_files",
         "//tensorflow/contrib/nearest_neighbor:all_files",
         "//tensorflow/contrib/nn:all_files",
         "//tensorflow/contrib/opt:all_files",
+        "//tensorflow/contrib/periodic_resample:all_files",
         "//tensorflow/contrib/predictor:all_files",
+        "//tensorflow/contrib/py2tf:all_files",
+        "//tensorflow/contrib/py2tf/convert:all_files",
+        "//tensorflow/contrib/py2tf/pyct:all_files",
+        "//tensorflow/contrib/py2tf/pyct/static_analysis:all_files",
         "//tensorflow/contrib/quantize:all_files",
         "//tensorflow/contrib/receptive_field:all_files",
         "//tensorflow/contrib/reduce_slice_ops:all_files",
@@ -567,6 +582,7 @@ filegroup(
         "//tensorflow/contrib/util:all_files",
         "//tensorflow/contrib/verbs:all_files",
         "//tensorflow/core:all_files",
+        "//tensorflow/core/api_def:all_files",
         "//tensorflow/core/debug:all_files",
         "//tensorflow/core/distributed_runtime:all_files",
         "//tensorflow/core/distributed_runtime/rpc:all_files",
@@ -577,6 +593,9 @@ filegroup(
         "//tensorflow/core/grappler/optimizers:all_files",
         "//tensorflow/core/grappler/utils:all_files",
         "//tensorflow/core/kernels:all_files",
+        "//tensorflow/core/kernels/batching_util:all_files",
+        "//tensorflow/core/kernels/data:all_files",
+        "//tensorflow/core/kernels/data/sql:all_files",
         "//tensorflow/core/kernels/fuzzing:all_files",
         "//tensorflow/core/kernels/hexagon:all_files",
         "//tensorflow/core/kernels/neon:all_files",
@@ -591,6 +610,7 @@ filegroup(
         "//tensorflow/core/profiler/internal/advisor:all_files",
         "//tensorflow/core/util/ctc:all_files",
         "//tensorflow/core/util/tensor_bundle:all_files",
+        "//tensorflow/examples/adding_an_op:all_files",
         "//tensorflow/examples/android:all_files",
         "//tensorflow/examples/benchmark:all_files",
         "//tensorflow/examples/get_started/regression:all_files",
@@ -598,10 +618,13 @@ filegroup(
         "//tensorflow/examples/image_retraining:all_files",
         "//tensorflow/examples/label_image:all_files",
         "//tensorflow/examples/learn:all_files",
+        "//tensorflow/examples/multibox_detector:all_files",
         "//tensorflow/examples/saved_model:all_files",
         "//tensorflow/examples/speech_commands:all_files",
         "//tensorflow/examples/tutorials/estimators:all_files",
+        "//tensorflow/examples/tutorials/layers:all_files",
         "//tensorflow/examples/tutorials/mnist:all_files",
+        "//tensorflow/examples/tutorials/monitors:all_files",
         "//tensorflow/examples/tutorials/word2vec:all_files",
         "//tensorflow/examples/wav_to_spectrogram:all_files",
         "//tensorflow/go:all_files",
@@ -610,6 +633,7 @@ filegroup(
         "//tensorflow/java/src/main/native:all_files",
         "//tensorflow/python:all_files",
         "//tensorflow/python/data:all_files",
+        "//tensorflow/python/data/kernel_tests:all_files",
         "//tensorflow/python/data/ops:all_files",
         "//tensorflow/python/data/util:all_files",
         "//tensorflow/python/debug:all_files",
@@ -623,6 +647,7 @@ filegroup(
         "//tensorflow/python/kernel_tests/random:all_files",
         "//tensorflow/python/ops/distributions:all_files",
         "//tensorflow/python/ops/linalg:all_files",
+        "//tensorflow/python/ops/losses:all_files",
         "//tensorflow/python/profiler:all_files",
         "//tensorflow/python/profiler/internal:all_files",
         "//tensorflow/python/saved_model:all_files",
@@ -633,6 +658,7 @@ filegroup(
         "//tensorflow/tools/api/tests:all_files",
         "//tensorflow/tools/benchmark:all_files",
         "//tensorflow/tools/build_info:all_files",
+        "//tensorflow/tools/ci_build/gpu_build:all_files",
         "//tensorflow/tools/common:all_files",
         "//tensorflow/tools/compatibility:all_files",
         "//tensorflow/tools/dist_test/server:all_files",
@@ -640,17 +666,20 @@ filegroup(
         "//tensorflow/tools/docker/notebooks:all_files",
         "//tensorflow/tools/docs:all_files",
         "//tensorflow/tools/git:all_files",
+        "//tensorflow/tools/graph_transforms:all_files",
         "//tensorflow/tools/mlpbtxt:all_files",
         "//tensorflow/tools/proto_text:all_files",
         "//tensorflow/tools/quantization:all_files",
         "//tensorflow/tools/test:all_files",
         "//tensorflow/user_ops:all_files",
+        "//third_party/eigen3:all_files",
+        "//third_party/fft2d:all_files",
+        "//third_party/flatbuffers:all_files",
         "//third_party/hadoop:all_files",
-        "//third_party/mpi:all_files",
         "//third_party/sycl:all_files",
         "//third_party/sycl/sycl:all_files",
     ],
-    visibility = [":__subpackages__"],
+    visibility = ["//visibility:public"],
 )
 
 load(
@@ -774,6 +803,7 @@ tf_cc_shared_object(
         "//tensorflow/cc:cc_ops",
         "//tensorflow/cc:client_session",
         "//tensorflow/cc:scope",
+        "//tensorflow/cc/profiler",
         "//tensorflow/core:tensorflow",
     ],
 )
diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD
index ef7eb5a4d16b29aecc34f33cb41dd7cf9450c5f2..f258bcd95684cc58c2ead3886b3ce74e4af6c5aa 100644
--- a/tensorflow/c/BUILD
+++ b/tensorflow/c/BUILD
@@ -42,6 +42,7 @@ tf_cuda_library(
             "//tensorflow/core:core_cpu",
             "//tensorflow/core:framework",
             "//tensorflow/core:lib",
+            "//tensorflow/core:op_gen_lib",
         ],
     }),
 )
@@ -73,6 +74,7 @@ tf_cuda_library(
             "//tensorflow/core:core_cpu",
             "//tensorflow/core:core_cpu_internal",
             "//tensorflow/core:framework",
+            "//tensorflow/core:op_gen_lib",
             "//tensorflow/core:protos_all_cc",
             "//tensorflow/core:lib",
             "//tensorflow/core:lib_internal",
diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index bb41f92306b413d610bf115d144b15faa568ee14..6fc75a98f1e05c3971cb4546bd16f015c25b6709 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/cc/framework/scope_internal.h"
 #include "tensorflow/cc/ops/while_loop.h"
 #include "tensorflow/cc/saved_model/loader.h"
+#include "tensorflow/core/framework/op_gen_lib.h"
 #endif
 #include "tensorflow/c/c_api_internal.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
@@ -383,12 +384,11 @@ void TF_Reset_Helper(const TF_SessionOptions* opt, const char** containers,
 // be less than the total node count.
 Status ValidateNoCycles(const Graph& g) {
   // TODO(nolivia): check this on a subset of the graph instead of all of it.
-  int total_num_nodes = g.num_node_ids();
   // A node is ready when all of its inputs have been visited.
   std::vector<const Node*> ready;
-  std::vector<int> pending_count(total_num_nodes, 0);
+  std::vector<int> pending_count(g.num_node_ids(), 0);
 
-  for (int i = 0; i < total_num_nodes; ++i) {
+  for (int i = 0; i < g.num_node_ids(); ++i) {
     const Node* n = g.FindNodeId(i);
     if (n == nullptr) continue;
     pending_count[i] = n->in_edges().size();
@@ -421,7 +421,7 @@ Status ValidateNoCycles(const Graph& g) {
     }
   }
 
-  if (processed < total_num_nodes) {
+  if (processed < g.num_nodes()) {
     std::vector<string> nodes_in_cycle;
     for (int i = 0; i < pending_count.size() && nodes_in_cycle.size() < 3;
          ++i) {
@@ -430,7 +430,7 @@ Status ValidateNoCycles(const Graph& g) {
       }
     }
     return errors::InvalidArgument(
-        "Graph is invalid, contains a cycle with ", total_num_nodes - processed,
+        "Graph is invalid, contains a cycle with ", g.num_nodes() - processed,
         " nodes, including: ", str_util::Join(nodes_in_cycle, ", "));
   }
   return Status::OK();
@@ -580,6 +580,7 @@ TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src,
       status->status = InvalidArgument(
           "invalid string tensor encoding (string #", i, " of ",
           srcarray.size(), "): ", status->status.error_message());
+      delete[] base;
       return nullptr;
     }
     dst += consumed;
@@ -589,6 +590,7 @@ TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src,
     status->status = InvalidArgument(
         "invalid string tensor encoding (decoded ", (dst - base),
         " bytes, but the tensor is encoded in ", size, " bytes");
+    delete[] base;
     return nullptr;
   }
 
@@ -625,6 +627,73 @@ Status MessageToBuffer(const tensorflow::protobuf::Message& in,
   return Status::OK();
 }
 
+void RecordMutation(TF_Graph* graph, const TF_Operation& op,
+                    const char* mutation_type)
+    EXCLUSIVE_LOCKS_REQUIRED(graph->mu) {
+  // If any session has already run this node_id, mark this session as
+  // unrunnable.
+  for (auto it : graph->sessions) {
+    if (it.first->last_num_graph_nodes > op.node.id()) {
+      it.second = FailedPrecondition(
+          "Operation '", op.node.DebugString(), "' was changed by ",
+          mutation_type,
+          " after it was run by a session. Nodes can be mutated "
+          "only before they are executed by a session. Either don't modify "
+          "nodes after running them or create a new session.");
+    }
+  }
+}
+
+namespace {
+
+// Helper method that creates a shape handle for a shape described by dims.
+tensorflow::shape_inference::ShapeHandle ShapeHandleFromDims(
+    tensorflow::shape_inference::InferenceContext* ic, int num_dims,
+    const int64_t* dims) {
+  if (num_dims != -1) {
+    std::vector<tensorflow::shape_inference::DimensionHandle> dim_vec;
+    dim_vec.reserve(num_dims);
+    for (int i = 0; i < num_dims; ++i) {
+      dim_vec.push_back(ic->MakeDim(dims[i]));
+    }
+    return ic->MakeShape(dim_vec);
+  } else {
+    return ic->UnknownShape();
+  }
+}
+
+}  // namespace
+
+void TF_GraphSetOutputHandleShapesAndTypes(TF_Graph* graph, TF_Output output,
+                                           int num_shapes_and_types,
+                                           const int64_t** shapes,
+                                           const int* ranks,
+                                           const TF_DataType* types,
+                                           TF_Status* status) {
+  Node* node = &output.oper->node;
+
+  mutex_lock l(graph->mu);
+  tensorflow::shape_inference::InferenceContext* ic =
+      graph->refiner.GetContext(node);
+  if (ic == nullptr) {
+    status->status =
+        InvalidArgument("Node ", node->name(), " was not found in the graph");
+    return;
+  }
+
+  auto shape_and_type_vec =
+      std::vector<tensorflow::shape_inference::ShapeAndType>(
+          num_shapes_and_types);
+  for (int i = 0; i < num_shapes_and_types; ++i) {
+    tensorflow::shape_inference::ShapeHandle shape_handle =
+        ShapeHandleFromDims(ic, ranks[i], shapes[i]);
+    shape_and_type_vec[i] = tensorflow::shape_inference::ShapeAndType(
+        shape_handle, static_cast<DataType>(types[i]));
+  }
+
+  ic->set_output_handle_shapes_and_types(output.index, shape_and_type_vec);
+}
+
 // Helpers for loading a TensorFlow plugin (a .so file).
 Status LoadLibrary(const char* library_filename, void** result,
                    const void** buf, size_t* len);
@@ -930,7 +999,6 @@ void TF_GraphSetTensorShape(TF_Graph* graph, TF_Output output,
   Node* node = &output.oper->node;
 
   mutex_lock l(graph->mu);
-  // Set the shape.
   tensorflow::shape_inference::InferenceContext* ic =
       graph->refiner.GetContext(node);
   if (ic == nullptr) {
@@ -938,18 +1006,8 @@ void TF_GraphSetTensorShape(TF_Graph* graph, TF_Output output,
         InvalidArgument("Node ", node->name(), " was not found in the graph");
     return;
   }
-
-  tensorflow::shape_inference::ShapeHandle new_shape;
-  if (num_dims != -1) {
-    std::vector<tensorflow::shape_inference::DimensionHandle> dim_vec;
-    dim_vec.reserve(num_dims);
-    for (int i = 0; i < num_dims; ++i) {
-      dim_vec.push_back(ic->MakeDim(dims[i]));
-    }
-    new_shape = ic->MakeShape(dim_vec);
-  } else {
-    new_shape = ic->UnknownShape();
-  }
+  tensorflow::shape_inference::ShapeHandle new_shape =
+      tensorflow::ShapeHandleFromDims(ic, num_dims, dims);
   status->status = graph->refiner.SetShape(node, output.index, new_shape);
 }
 
@@ -1143,6 +1201,13 @@ void TF_SetAttrTypeList(TF_OperationDescription* desc, const char* attr_name,
                      reinterpret_cast<const DataType*>(values), num_values));
 }
 
+void TF_SetAttrFuncName(TF_OperationDescription* desc, const char* attr_name,
+                        const char* value, size_t length) {
+  tensorflow::NameAttrList func_name;
+  func_name.set_name(std::string(value, value + length));
+  desc->node_builder.Attr(attr_name, func_name);
+}
+
 void TF_SetAttrShape(TF_OperationDescription* desc, const char* attr_name,
                      const int64_t* dims, int num_dims) {
   PartialTensorShape shape;
@@ -1745,7 +1810,6 @@ void TF_OperationToNodeDef(TF_Operation* oper, TF_Buffer* output_node_def,
 TF_Graph::TF_Graph()
     : graph(tensorflow::OpRegistry::Global()),
       refiner(graph.versions().producer(), graph.op_registry()),
-      num_sessions(0),
       delete_requested(false),
       parent(nullptr),
       parent_inputs(nullptr) {}
@@ -1755,7 +1819,7 @@ TF_Graph* TF_NewGraph() { return new TF_Graph; }
 void TF_DeleteGraph(TF_Graph* g) {
   g->mu.lock();
   g->delete_requested = true;
-  const bool del = g->num_sessions == 0;
+  const bool del = g->sessions.empty();
   g->mu.unlock();
   if (del) delete g;
 }
@@ -1835,6 +1899,16 @@ void TF_ImportGraphDefOptionsSetPrefix(TF_ImportGraphDefOptions* opts,
   opts->opts.prefix = prefix;
 }
 
+void TF_ImportGraphDefOptionsSetUniquifyNames(TF_ImportGraphDefOptions* opts,
+                                              unsigned char uniquify_names) {
+  opts->opts.uniquify_names = uniquify_names;
+}
+
+void TF_ImportGraphDefOptionsSetUniquifyPrefix(TF_ImportGraphDefOptions* opts,
+                                               unsigned char uniquify_prefix) {
+  opts->opts.uniquify_prefix = uniquify_prefix;
+}
+
 void TF_ImportGraphDefOptionsAddInputMapping(TF_ImportGraphDefOptions* opts,
                                              const char* src_name,
                                              int src_index, TF_Output dst) {
@@ -1892,12 +1966,12 @@ void TF_ImportGraphDefResultsReturnOperations(TF_ImportGraphDefResults* results,
   *opers = results->return_nodes.data();
 }
 
-void TF_ImportGraphDefResultsUnusedInputMappings(
-    TF_ImportGraphDefResults* results, int* num_unused_input_mappings,
+void TF_ImportGraphDefResultsMissingUnusedInputMappings(
+    TF_ImportGraphDefResults* results, int* num_missing_unused_input_mappings,
     const char*** src_names, int** src_indexes) {
-  *num_unused_input_mappings = results->unused_key_names.size();
-  *src_names = results->unused_key_names.data();
-  *src_indexes = results->unused_key_indexes.data();
+  *num_missing_unused_input_mappings = results->missing_unused_key_names.size();
+  *src_names = results->missing_unused_key_names.data();
+  *src_indexes = results->missing_unused_key_indexes.data();
 }
 
 void TF_DeleteImportGraphDefResults(TF_ImportGraphDefResults* results) {
@@ -1937,18 +2011,21 @@ static void GraphImportGraphDefLocked(TF_Graph* graph, const GraphDef& def,
     tf_results->return_nodes[i] = ToOperation(results.return_nodes[i]);
   }
 
-  // Populate unused map keys
-  DCHECK(tf_results->unused_key_names.empty());
-  DCHECK(tf_results->unused_key_indexes.empty());
-  DCHECK(tf_results->unused_key_names_data.empty());
-  tf_results->unused_key_names.resize(results.unused_input_map_keys.size());
-  tf_results->unused_key_indexes.resize(results.unused_input_map_keys.size());
-  for (int i = 0; i < results.unused_input_map_keys.size(); ++i) {
-    TensorId id = results.unused_input_map_keys[i];
-    tf_results->unused_key_names_data.push_back(id.first.ToString());
-    tf_results->unused_key_names[i] =
-        tf_results->unused_key_names_data.back().c_str();
-    tf_results->unused_key_indexes[i] = id.second;
+  // Populate missing unused map keys
+  DCHECK(tf_results->missing_unused_key_names.empty());
+  DCHECK(tf_results->missing_unused_key_indexes.empty());
+  DCHECK(tf_results->missing_unused_key_names_data.empty());
+
+  size_t size = results.missing_unused_input_map_keys.size();
+  tf_results->missing_unused_key_names.resize(size);
+  tf_results->missing_unused_key_indexes.resize(size);
+
+  for (int i = 0; i < size; ++i) {
+    TensorId id = results.missing_unused_input_map_keys[i];
+    tf_results->missing_unused_key_names_data.push_back(id.first.ToString());
+    tf_results->missing_unused_key_names[i] =
+        tf_results->missing_unused_key_names_data.back().c_str();
+    tf_results->missing_unused_key_indexes[i] = id.second;
   }
 }
 
@@ -2325,11 +2402,12 @@ TF_Session* TF_NewSession(TF_Graph* graph, const TF_SessionOptions* opt,
   Session* session;
   status->status = NewSession(opt->options, &session);
   if (status->status.ok()) {
+    TF_Session* new_session = new TF_Session(session, graph);
     if (graph != nullptr) {
       mutex_lock l(graph->mu);
-      graph->num_sessions += 1;
+      graph->sessions[new_session] = Status::OK();
     }
-    return new TF_Session(session, graph);
+    return new_session;
   } else {
     DCHECK_EQ(nullptr, session);
     return nullptr;
@@ -2393,7 +2471,7 @@ TF_Session* TF_LoadSessionFromSavedModel(
 
   TF_Session* session = new TF_Session(bundle.session.release(), graph);
 
-  graph->num_sessions += 1;
+  graph->sessions[session] = Status::OK();
   session->last_num_graph_nodes = graph->graph.num_node_ids();
   return session;
 #endif  // __ANDROID__
@@ -2408,8 +2486,8 @@ void TF_DeleteSession(TF_Session* s, TF_Status* status) {
   TF_Graph* const graph = s->graph;
   if (graph != nullptr) {
     graph->mu.lock();
-    graph->num_sessions -= 1;
-    const bool del = graph->delete_requested && graph->num_sessions == 0;
+    graph->sessions.erase(s);
+    const bool del = graph->delete_requested && graph->sessions.empty();
     graph->mu.unlock();
     if (del) delete graph;
   }
@@ -2425,6 +2503,13 @@ static bool ExtendSessionGraphHelper(TF_Session* session, TF_Status* status) {
     mutex_lock session_lock(session->mu);
     session->graph->mu.lock();
     const Graph& graph = session->graph->graph;
+
+    status->status = session->graph->sessions[session];
+    if (!status->status.ok()) {
+      session->graph->mu.unlock();
+      return false;
+    }
+
     const auto num_nodes = graph.num_node_ids();
     if (session->last_num_graph_nodes < num_nodes) {
       status->status = tensorflow::ValidateNoCycles(session->graph->graph);
@@ -2580,4 +2665,54 @@ void TF_SessionPRun(TF_Session* session, const char* handle,
                 output_values, target_names, nullptr, status);
 }
 
+TF_ApiDefMap* TF_NewApiDefMap(TF_Buffer* op_list_buffer, TF_Status* status) {
+  tensorflow::OpList op_list;
+  if (!op_list.ParseFromArray(op_list_buffer->data, op_list_buffer->length)) {
+    status->status = InvalidArgument("Unparseable OpList");
+    return nullptr;
+  }
+  status->status = Status::OK();
+  return new TF_ApiDefMap(op_list);
+}
+
+void TF_DeleteApiDefMap(TF_ApiDefMap* apimap) { delete apimap; }
+
+void TF_ApiDefMapPut(TF_ApiDefMap* api_def_map, const char* text,
+                     size_t text_len, TF_Status* status) {
+#ifdef __ANDROID__
+  status->status = tensorflow::errors::Unimplemented(
+      "ApiDefMap is not supported in Android.");
+#else
+  mutex_lock l(api_def_map->lock);
+  if (api_def_map->update_docs_called) {
+    status->status = FailedPrecondition(
+        "TF_ApiDefMapPut cannot be called after TF_ApiDefMapGet has been "
+        "called.");
+    return;
+  }
+  string api_def_text(text, text_len);
+  status->status = api_def_map->api_def_map.LoadApiDef(api_def_text);
+#endif  // __ANDROID__
+}
+
+TF_Buffer* TF_ApiDefMapGet(TF_ApiDefMap* api_def_map, const char* name,
+                           size_t name_len, TF_Status* status) {
+#ifdef __ANDROID__
+  status->status = tensorflow::errors::Unimplemented(
+      "ApiDefMap is not supported in Android.");
+  return nullptr;
+#else
+  mutex_lock l(api_def_map->lock);
+  if (!api_def_map->update_docs_called) {
+    api_def_map->api_def_map.UpdateDocs();
+    api_def_map->update_docs_called = true;
+  }
+  string name_str(name, name_len);
+  const auto* api_def = api_def_map->api_def_map.GetApiDef(name_str);
+
+  TF_Buffer* ret = TF_NewBuffer();
+  status->status = MessageToBuffer(*api_def, ret);
+  return ret;
+#endif  // __ANDROID__
+}
 }  // end extern "C"
diff --git a/tensorflow/c/c_api.h b/tensorflow/c/c_api.h
index bb569d67fcbcec29e9494236abd79b3e40db91cd..d2e45341bf1b9ee4579f84064550ce26041dd04a 100644
--- a/tensorflow/c/c_api.h
+++ b/tensorflow/c/c_api.h
@@ -511,6 +511,11 @@ TF_CAPI_EXPORT extern void TF_SetAttrTypeList(TF_OperationDescription* desc,
                                               const char* attr_name,
                                               const TF_DataType* values,
                                               int num_values);
+// Set a 'func' attribute to the specified name.
+// `value` must point to a string of length `length` bytes.
+TF_CAPI_EXPORT extern void TF_SetAttrFuncName(TF_OperationDescription* desc,
+                                              const char* attr_name,
+                                              const char* value, size_t length);
 
 // Set `num_dims` to -1 to represent "unknown rank".  Otherwise,
 // `dims` points to an array of length `num_dims`.  `dims[i]` must be
@@ -889,6 +894,20 @@ TF_CAPI_EXPORT extern void TF_DeleteImportGraphDefOptions(
 TF_CAPI_EXPORT extern void TF_ImportGraphDefOptionsSetPrefix(
     TF_ImportGraphDefOptions* opts, const char* prefix);
 
+// Set whether to uniquify imported operation names. If true, imported operation
+// names will be modified if their name already exists in the graph. If false,
+// conflicting names will be treated as an error. Note that this option has no
+// effect if a prefix is set, since the prefix will guarantee all names are
+// unique. Defaults to false.
+TF_CAPI_EXPORT extern void TF_ImportGraphDefOptionsSetUniquifyNames(
+    TF_ImportGraphDefOptions* opts, unsigned char uniquify_names);
+
+// If true, the specified prefix will be modified if it already exists as an
+// operation name or prefix in the graph. If false, a conflicting prefix will be
+// treated as an error. This option has no effect if no prefix is specified.
+TF_CAPI_EXPORT extern void TF_ImportGraphDefOptionsSetUniquifyPrefix(
+    TF_ImportGraphDefOptions* opts, unsigned char uniquify_prefix);
+
 // Set any imported nodes with input `src_name:src_index` to have that input
 // replaced with `dst`. `src_name` refers to a node in the graph to be imported,
 // `dst` references a node already existing in the graph being imported into.
@@ -948,16 +967,16 @@ TF_CAPI_EXPORT extern void TF_ImportGraphDefResultsReturnOperations(
     TF_ImportGraphDefResults* results, int* num_opers, TF_Operation*** opers);
 
 // Fetches any input mappings requested via
-// TF_ImportGraphDefOptionsAddInputMapping() that weren't used as input to any
-// node in the imported graph def. The number of fetched mappings is returned in
-// `num_unused_input_mappings`. The array of each mapping's source node name is
-// returned in `src_names`, and the array of each mapping's source index is
-// returned in `src_indexes`.
+// TF_ImportGraphDefOptionsAddInputMapping() that didn't appear in the GraphDef
+// and weren't used as input to any node in the imported graph def. The number
+// of fetched mappings is returned in `num_missing_unused_input_mappings`. The
+// array of each mapping's source node name is returned in `src_names`, and the
+// array of each mapping's source index is returned in `src_indexes`.
 //
 // `*src_names`, `*src_indexes`, and the memory backing each string in
 // `src_names` are owned by and have the lifetime of `results`.
-TF_CAPI_EXPORT extern void TF_ImportGraphDefResultsUnusedInputMappings(
-    TF_ImportGraphDefResults* results, int* num_unused_input_mappings,
+TF_CAPI_EXPORT extern void TF_ImportGraphDefResultsMissingUnusedInputMappings(
+    TF_ImportGraphDefResults* results, int* num_missing_unused_input_mappings,
     const char*** src_names, int** src_indexes);
 
 // Deletes a results object returned by TF_GraphImportGraphDefWithResults().
@@ -1015,6 +1034,23 @@ TF_CAPI_EXPORT extern void TF_GraphCopyFunction(TF_Graph* g,
                                                 const TF_Function* grad,
                                                 TF_Status* status);
 
+// Returns the number of TF_Functions registered in `g`.
+TF_CAPI_EXPORT extern int TF_GraphNumFunctions(TF_Graph* g);
+
+// Fills in `funcs` with the TF_Function* registered in `g`.
+// `funcs` must point to an array of TF_Function* of length at least
+// `max_func`. In usual usage, max_func should be set to the result of
+// TF_GraphNumFunctions(g). In this case, all the functions registered in
+// `g` will be returned. Else, an unspecified subset.
+//
+// If successful, returns the number of TF_Function* successfully set in
+// `funcs` and sets status to OK. The caller takes ownership of
+// all the returned TF_Functions. They must be deleted with TF_DeleteFunction.
+// On error, returns 0, sets status to the encountered error, and the contents
+// of funcs will be undefined.
+TF_CAPI_EXPORT extern int TF_GraphGetFunctions(TF_Graph* g, TF_Function** funcs,
+                                               int max_func, TF_Status* status);
+
 // Note: The following function may fail on very large protos in the future.
 
 TF_CAPI_EXPORT extern void TF_OperationToNodeDef(TF_Operation* oper,
@@ -1504,6 +1540,49 @@ TF_CAPI_EXPORT extern void TF_DeleteLibraryHandle(TF_Library* lib_handle);
 // in this address space.
 TF_CAPI_EXPORT extern TF_Buffer* TF_GetAllOpList();
 
+// TF_ApiDefMap encapsulates a collection of API definitions for an operation.
+//
+// This object maps the name of a TensorFlow operation to a description of the
+// API to generate for it, as defined by the ApiDef protocol buffer (
+// https://www.tensorflow.org/code/tensorflow/core/framework/api_def.proto)
+//
+// The ApiDef messages are typically used to generate convenience wrapper
+// functions for TensorFlow operations in various language bindings.
+typedef struct TF_ApiDefMap TF_ApiDefMap;
+
+// Creates a new TF_ApiDefMap instance.
+//
+// Params:
+//  op_list_buffer - TF_Buffer instance containing serialized OpList
+//    protocol buffer. (See
+//    https://www.tensorflow.org/code/tensorflow/core/framework/op_def.proto
+//    for the OpList proto definition).
+//  status - Set to OK on success and an appropriate error on failure.
+TF_CAPI_EXPORT extern TF_ApiDefMap* TF_NewApiDefMap(TF_Buffer* op_list_buffer,
+                                                    TF_Status* status);
+
+// Deallocates a TF_ApiDefMap.
+TF_CAPI_EXPORT extern void TF_DeleteApiDefMap(TF_ApiDefMap* apimap);
+
+// Add ApiDefs to the map.
+//
+// `text` corresponds to a text representation of an ApiDefs protocol message.
+// (https://www.tensorflow.org/code/tensorflow/core/framework/api_def.proto).
+//
+// The provided ApiDefs will be merged with existing ones in the map, with
+// precedence given to the newly added version in case of conflicts with
+// previous calls to TF_ApiDefMapPut.
+TF_CAPI_EXPORT extern void TF_ApiDefMapPut(TF_ApiDefMap* api_def_map,
+                                           const char* text, size_t text_len,
+                                           TF_Status* status);
+
+// Returns a serialized ApiDef protocol buffer for the TensorFlow operation
+// named `name`.
+TF_CAPI_EXPORT extern TF_Buffer* TF_ApiDefMapGet(TF_ApiDefMap* api_def_map,
+                                                 const char* name,
+                                                 size_t name_len,
+                                                 TF_Status* status);
+
 #ifdef __cplusplus
 } /* end extern "C" */
 #endif
diff --git a/tensorflow/c/c_api_function.cc b/tensorflow/c/c_api_function.cc
index dcb818b88b6fca460852beb6e948d2eb6964f663..46271e0514f473099848a8573cb7cb6fad33f7dc 100644
--- a/tensorflow/c/c_api_function.cc
+++ b/tensorflow/c/c_api_function.cc
@@ -68,7 +68,7 @@ class NodeNameMapping {
   // This is a superset of values in name_mapping_.
   std::unordered_set<string> used_names_;
   // Mapping from original node name from the graph to the normalized
-  // and uniqified version of it.
+  // and uniquified version of it.
   std::unordered_map<string, string> name_mapping_;
 };
 
@@ -226,12 +226,17 @@ Status FillFunctionBody(
       }
       node_def->add_input(strings::StrCat("^", normalized));
     }
+
+    // A function is stateful if any of its nodes are stateful.
+    if (node->op_def().is_stateful()) {
+      fdef->mutable_signature()->set_is_stateful(true);
+    }
   }
   return Status::OK();
 }
 
 // Graph to FunctionDef conversion. This code is closely modeled on the Python
-// code in third_party/tensorflow/python/framework/function.py.
+// code in tensorflow/python/framework/function.py.
 Status GraphToFunctionDef(const Graph& fn_body, const string& fn_name,
                           bool append_hash_to_fn_name,
                           const std::vector<const Node*>& body_nodes,
@@ -307,7 +312,7 @@ Status GraphToFunctionDef(const Graph& fn_body, const string& fn_name,
     TF_RETURN_IF_ERROR(
         NameRangesForNode(*node, node->op_def(), nullptr, &output_ranges));
     for (const auto& output : output_ranges) {
-      const string& output_name = output.first;
+      const StringPiece& output_name = output.first;
       int index_start = output.second.first;
       int index_end = output.second.second;
       for (int i = index_start; i < index_end; ++i) {
@@ -543,6 +548,28 @@ void TF_GraphCopyFunction(TF_Graph* g, const TF_Function* func,
   status->status = g->graph.AddFunctionLibrary(fdef_lib);
 }
 
+int TF_GraphNumFunctions(TF_Graph* g) {
+  tensorflow::mutex_lock l(g->mu);
+  return g->graph.flib_def().num_functions();
+}
+
+int TF_GraphGetFunctions(TF_Graph* g, TF_Function** funcs, int max_func,
+                         TF_Status* status) {
+  tensorflow::FunctionDefLibrary lib;
+  {
+    tensorflow::mutex_lock l(g->mu);
+    lib = g->graph.flib_def().ToProto();
+  }
+  const auto len = std::min(max_func, static_cast<int>(lib.function_size()));
+  for (int i = 0; i < len; ++i) {
+    TF_Function* func = new TF_Function();
+    func->fdef = lib.function(i);
+    funcs[i] = func;
+  }
+  status->status = tensorflow::Status::OK();
+  return len;
+}
+
 void TF_FunctionToFunctionDef(TF_Function* func, TF_Buffer* output_func_def,
                               TF_Status* status) {
   status->status = MessageToBuffer(func->fdef, output_func_def);
diff --git a/tensorflow/c/c_api_function_test.cc b/tensorflow/c/c_api_function_test.cc
index d5580b658992413ae6f9cb79ef88751ee28ce465..dbce66d2317a8e89288fab932cf69055f8b5a7f0 100644
--- a/tensorflow/c/c_api_function_test.cc
+++ b/tensorflow/c/c_api_function_test.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "tensorflow/core/framework/function.pb.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/logging.h"
@@ -1462,7 +1463,11 @@ TEST_F(CApiFunctionTest, AppendHash) {
                  /*append_hash=*/true);
   tensorflow::FunctionDef fdef;
   ASSERT_TRUE(GetFunctionDef(func_, &fdef));
+#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+  ASSERT_EQ(string("func_name_base_ZpgUD4x8oqk"), fdef.signature().name());
+#else
   ASSERT_EQ(string("func_name_base_qaJ8jA8UmGY"), fdef.signature().name());
+#endif
 }
 
 TEST_F(CApiFunctionTest, GetOpDef) {
@@ -1482,9 +1487,124 @@ TEST_F(CApiFunctionTest, GetOpDef) {
   EXPECT_EQ(op_def.name(), func_name_);
   EXPECT_EQ(op_def.input_arg_size(), 1);
   EXPECT_EQ(op_def.output_arg_size(), 1);
+  EXPECT_FALSE(op_def.is_stateful());
 
   TF_DeleteBuffer(buffer);
 }
 
+void DefineStatefulFunction(const char* name, TF_Function** func) {
+  std::unique_ptr<TF_Graph, decltype(&TF_DeleteGraph)> func_graph(
+      TF_NewGraph(), TF_DeleteGraph);
+  std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> s(TF_NewStatus(),
+                                                           TF_DeleteStatus);
+
+  TF_Tensor* tensor_shape = Int32Tensor({37, 1});
+  TF_Operation* shape = Const(tensor_shape, func_graph.get(), s.get(), "shape");
+  TF_Operation* random =
+      RandomUniform(shape, TF_FLOAT, func_graph.get(), s.get());
+
+  TF_Output inputs[] = {};
+  TF_Output outputs[] = {{random, 0}};
+  *func = TF_GraphToFunction(func_graph.get(), name, /*append_hash=*/false, -1,
+                             /*opers=*/nullptr, 0, inputs, 1, outputs,
+                             /*output_names=*/nullptr,
+                             /*opts=*/nullptr, "", s.get());
+  ASSERT_EQ(TF_OK, TF_GetCode(s.get())) << TF_Message(s.get());
+  ASSERT_NE(*func, nullptr);
+  TF_DeleteTensor(tensor_shape);
+}
+
+TEST_F(CApiFunctionTest, StatefulOpDef) {
+  DefineStatefulFunction(func_name_, &func_);
+  TF_GraphCopyFunction(host_graph_, func_, nullptr, s_);
+  ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+
+  // Test we can retrieve function OpDef from graph
+  TF_Buffer* buffer = TF_NewBuffer();
+  TF_GraphGetOpDef(host_graph_, func_name_, buffer, s_);
+  ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+
+  // Sanity check returned OpDef
+  string data(static_cast<const char*>(buffer->data), buffer->length);
+  OpDef op_def;
+  op_def.ParseFromString(data);
+  EXPECT_EQ(op_def.name(), func_name_);
+  EXPECT_EQ(op_def.input_arg_size(), 0);
+  EXPECT_EQ(op_def.output_arg_size(), 1);
+  EXPECT_TRUE(op_def.is_stateful());
+
+  TF_DeleteBuffer(buffer);
+}
+
+void AssertEqual(TF_Function* f1, TF_Function* f2) {
+  string s1, s2;
+  tensorflow::FunctionDef fdef1, fdef2;
+  ASSERT_TRUE(GetFunctionDef(f1, &fdef1));
+  ASSERT_TRUE(GetFunctionDef(f2, &fdef2));
+  SerializeToStringDeterministic(fdef1, &s1);
+  SerializeToStringDeterministic(fdef2, &s2);
+  ASSERT_EQ(s1, s2);
+}
+
+string GetName(TF_Function* func) {
+  tensorflow::FunctionDef fdef;
+  GetFunctionDef(func, &fdef);
+  return fdef.signature().name();
+}
+
+TEST_F(CApiFunctionTest, GetFunctionsFromGraph) {
+  TF_Function* funcs[2];
+
+  // Get functions from empty graph
+  EXPECT_EQ(TF_GraphNumFunctions(host_graph_), 0);
+  TF_GraphGetFunctions(host_graph_, nullptr, 0, s_);
+  ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+
+  // Define a function and add it to host_graph_
+  TF_Function* func0;
+  DefineFunction("FooFunc0", &func0);
+  TF_GraphCopyFunction(host_graph_, func0, nullptr, s_);
+  ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+
+  // Get this function from host_graph_
+  EXPECT_EQ(TF_GraphNumFunctions(host_graph_), 1);
+  EXPECT_EQ(TF_GraphGetFunctions(host_graph_, funcs, 0, s_), 0);
+  ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+  EXPECT_EQ(TF_GraphGetFunctions(host_graph_, funcs, 1, s_), 1);
+  ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+  AssertEqual(func0, funcs[0]);
+  TF_DeleteFunction(funcs[0]);
+  EXPECT_EQ(TF_GraphGetFunctions(host_graph_, funcs, 2, s_), 1);
+  ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+  AssertEqual(func0, funcs[0]);
+  TF_DeleteFunction(funcs[0]);
+
+  // Define a second function
+  TF_Function* func1;
+  DefineFunction("FooFunc1", &func1);
+  TF_GraphCopyFunction(host_graph_, func1, nullptr, s_);
+  ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+
+  // Get both function from host_graph_
+  EXPECT_EQ(TF_GraphNumFunctions(host_graph_), 2);
+  EXPECT_EQ(TF_GraphGetFunctions(host_graph_, funcs, 0, s_), 0);
+  ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+  EXPECT_EQ(TF_GraphGetFunctions(host_graph_, funcs, 2, s_), 2);
+  ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+  if (GetName(funcs[0]) == GetName(func0)) {
+    AssertEqual(func0, funcs[0]);
+    AssertEqual(func1, funcs[1]);
+  } else {
+    AssertEqual(func0, funcs[1]);
+    AssertEqual(func1, funcs[0]);
+  }
+
+  TF_DeleteFunction(funcs[0]);
+  TF_DeleteFunction(funcs[1]);
+
+  TF_DeleteFunction(func0);
+  TF_DeleteFunction(func1);
+}
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/c/c_api_internal.h b/tensorflow/c/c_api_internal.h
index bb04e01beec931a8ea66d0855eec9625d3a6a5ab..91667056e0eeb224b4b8a034766f11a123cd1a03 100644
--- a/tensorflow/c/c_api_internal.h
+++ b/tensorflow/c/c_api_internal.h
@@ -24,6 +24,9 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
+#ifndef __ANDROID__
+#include "tensorflow/core/framework/op_gen_lib.h"
+#endif
 #include "tensorflow/core/common_runtime/shape_refiner.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
@@ -81,12 +84,20 @@ struct TF_Graph {
   std::unordered_map<tensorflow::string, tensorflow::Node*> name_map
       GUARDED_BY(mu);
 
-  // TF_Graph may only / must be deleted when
-  //   num_sessions == 0 && delete_requested == true
-
-  // num_sessions incremented by TF_NewSession, and decremented by
+  // The keys of this map are all the active sessions using this graph.
+  // Each value is the current "runnability" status of the corresponding
+  // session. Under normal conditions all statuses are Status::OK(), but
+  // if some operation is mutated after it was run by a session (this
+  // is detected in RecordMutation function), that session is no longer
+  // safe to run. Its status will contain the error that will be returned
+  // to the user, should she try running this session.
+  //
+  // Sessions are added to this map in TF_NewSession, and removed in
   // TF_DeleteSession.
-  int num_sessions GUARDED_BY(mu);
+  // TF_Graph may only / must be deleted when
+  //   sessions.size() == 0 && delete_requested == true
+  tensorflow::gtl::FlatMap<TF_Session*, tensorflow::Status> sessions
+      GUARDED_BY(mu);
   bool delete_requested GUARDED_BY(mu);  // set true by TF_DeleteGraph
 
   // Used to link graphs contained in TF_WhileParams to the parent graph that
@@ -135,11 +146,11 @@ struct TF_ImportGraphDefOptions {
 struct TF_ImportGraphDefResults {
   std::vector<TF_Output> return_tensors;
   std::vector<TF_Operation*> return_nodes;
-  std::vector<const char*> unused_key_names;
-  std::vector<int> unused_key_indexes;
+  std::vector<const char*> missing_unused_key_names;
+  std::vector<int> missing_unused_key_indexes;
 
-  // Backing memory for unused_key_names values.
-  std::list<tensorflow::string> unused_key_names_data;
+  // Backing memory for missing_unused_key_names values.
+  std::list<tensorflow::string> missing_unused_key_names_data;
 };
 
 struct TF_DeviceList {
@@ -150,6 +161,22 @@ struct TF_Function {
   tensorflow::FunctionDef fdef;
 };
 
+struct TF_ApiDefMap {
+  explicit TF_ApiDefMap(const tensorflow::OpList& op_list)
+      :
+#ifndef __ANDROID__
+        api_def_map(op_list),
+#endif
+        update_docs_called(false) {
+  }
+
+#ifndef __ANDROID__
+  tensorflow::ApiDefMap api_def_map GUARDED_BY(lock);
+#endif
+  bool update_docs_called GUARDED_BY(lock);
+  tensorflow::mutex lock;
+};
+
 namespace tensorflow {
 
 class TensorCApi {
@@ -167,6 +194,24 @@ TF_Tensor* TF_TensorFromTensor(const Tensor& src, TF_Status* status);
 
 Status MessageToBuffer(const tensorflow::protobuf::Message& in, TF_Buffer* out);
 
+// Set the shapes and types of the output's handle.
+//
+// The lengths of the arrays pointed to by `shapes`, `ranks`, and `types` must
+// all be equal to `num_shapes_and_types`. If `ranks[i] != -1`, (i.e., if the
+// rank is known), then it must be equal to the length of `shapes[i]`; if
+// `ranks[i] == 1`, then `shapes[i]` may be nullptr.
+//
+// TODO(akshayka): Implement a corresponding getter method.
+void TF_GraphSetOutputHandleShapesAndTypes(TF_Graph* graph, TF_Output output,
+                                           int num_shapes_and_types,
+                                           const int64_t** shapes,
+                                           const int* ranks,
+                                           const TF_DataType* types,
+                                           TF_Status* status);
+
+void RecordMutation(TF_Graph* graph, const TF_Operation& op,
+                    const char* mutation_type);
+
 }  // end namespace tensorflow
 
 #endif  // TENSORFLOW_C_C_API_INTERNAL_H_
diff --git a/tensorflow/c/c_api_test.cc b/tensorflow/c/c_api_test.cc
index 6ec1db8ccfdb713f330b708e604bd4b502ff7202..df697e16d3d3fcaac66f967c0d3938450f0b0be6 100644
--- a/tensorflow/c/c_api_test.cc
+++ b/tensorflow/c/c_api_test.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/cc/saved_model/tag_constants.h"
 #include "tensorflow/core/example/example.pb.h"
 #include "tensorflow/core/example/feature.pb.h"
+#include "tensorflow/core/framework/api_def.pb.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/graph.pb_text.h"
 #include "tensorflow/core/framework/node_def.pb_text.h"
@@ -773,7 +774,7 @@ TEST(CAPI, ImportGraphDef_WithReturnOutputs) {
   TF_DeleteStatus(s);
 }
 
-TEST(CAPI, ImportGraphDef_UnusedInputMappings) {
+TEST(CAPI, ImportGraphDef_MissingUnusedInputMappings) {
   TF_Status* s = TF_NewStatus();
   TF_Graph* graph = TF_NewGraph();
 
@@ -816,7 +817,7 @@ TEST(CAPI, ImportGraphDef_UnusedInputMappings) {
   int num_unused_input_mappings;
   const char** src_names;
   int* src_indexes;
-  TF_ImportGraphDefResultsUnusedInputMappings(
+  TF_ImportGraphDefResultsMissingUnusedInputMappings(
       results, &num_unused_input_mappings, &src_names, &src_indexes);
   ASSERT_EQ(1, num_unused_input_mappings);
   EXPECT_EQ(string("fake"), string(src_names[0]));
@@ -2027,6 +2028,77 @@ TEST_F(CApiAttributesTest, Errors) {
   EXPECT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_)) << TF_Message(s_);
 }
 
+TEST(TestApiDef, TestCreateApiDef) {
+  TF_Status* status = TF_NewStatus();
+  TF_Library* lib =
+      TF_LoadLibrary("tensorflow/c/test_op.so", status);
+  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  TF_DeleteStatus(status);
+
+  TF_Buffer op_list_buf = TF_GetOpList(lib);
+  status = TF_NewStatus();
+  auto* api_def_map = TF_NewApiDefMap(&op_list_buf, status);
+  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  TF_DeleteStatus(status);
+
+  string op_name = "TestCApi";
+  status = TF_NewStatus();
+  auto* api_def_buf =
+      TF_ApiDefMapGet(api_def_map, op_name.c_str(), op_name.size(), status);
+  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  TF_DeleteStatus(status);
+
+  tensorflow::ApiDef api_def;
+  EXPECT_TRUE(api_def.ParseFromArray(api_def_buf->data, api_def_buf->length));
+  EXPECT_EQ(op_name, api_def.graph_op_name());
+  EXPECT_EQ(R"doc(Used to test C API)doc", api_def.summary());
+
+  TF_DeleteBuffer(api_def_buf);
+  TF_DeleteApiDefMap(api_def_map);
+  TF_DeleteLibraryHandle(lib);
+}
+
+TEST(TestApiDef, TestCreateApiDefWithOverwrites) {
+  TF_Status* status = TF_NewStatus();
+  TF_Library* lib =
+      TF_LoadLibrary("tensorflow/c/test_op.so", status);
+  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  TF_DeleteStatus(status);
+
+  TF_Buffer op_list_buf = TF_GetOpList(lib);
+  status = TF_NewStatus();
+  auto* api_def_map = TF_NewApiDefMap(&op_list_buf, status);
+  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  TF_DeleteStatus(status);
+
+  string api_def_overwrites = R"(op: <
+  graph_op_name: "TestCApi"
+  summary: "New summary"
+>
+)";
+  status = TF_NewStatus();
+  TF_ApiDefMapPut(api_def_map, api_def_overwrites.c_str(),
+                  api_def_overwrites.size(), status);
+  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  TF_DeleteStatus(status);
+
+  string op_name = "TestCApi";
+  status = TF_NewStatus();
+  auto* api_def_buf =
+      TF_ApiDefMapGet(api_def_map, op_name.c_str(), op_name.size(), status);
+  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  TF_DeleteStatus(status);
+
+  tensorflow::ApiDef api_def;
+  EXPECT_TRUE(api_def.ParseFromArray(api_def_buf->data, api_def_buf->length));
+  EXPECT_EQ(op_name, api_def.graph_op_name());
+  EXPECT_EQ("New summary", api_def.summary());
+
+  TF_DeleteBuffer(api_def_buf);
+  TF_DeleteApiDefMap(api_def_map);
+  TF_DeleteLibraryHandle(lib);
+}
+
 #undef EXPECT_TF_META
 
 }  // namespace
diff --git a/tensorflow/c/c_test_util.cc b/tensorflow/c/c_test_util.cc
index c291a2e440a8515e968b0ce0395b289080f04e8b..37439ff0beac5a5220460465e954b6c093ee1ba9 100644
--- a/tensorflow/c/c_test_util.cc
+++ b/tensorflow/c/c_test_util.cc
@@ -193,6 +193,15 @@ TF_Operation* LessThan(TF_Output l, TF_Output r, TF_Graph* graph,
   return TF_FinishOperation(desc, s);
 }
 
+TF_Operation* RandomUniform(TF_Operation* shape, TF_DataType dtype,
+                            TF_Graph* graph, TF_Status* s) {
+  TF_OperationDescription* desc =
+      TF_NewOperation(graph, "RandomUniform", "random_uniform");
+  TF_AddInput(desc, {shape, 0});
+  TF_SetAttrType(desc, "dtype", dtype);
+  return TF_FinishOperation(desc, s);
+}
+
 void Split3Helper(TF_Operation* input, TF_Graph* graph, TF_Status* s,
                   const char* name, TF_Operation** op) {
   TF_Operation* zero = ScalarConst(
diff --git a/tensorflow/c/c_test_util.h b/tensorflow/c/c_test_util.h
index d54733749248fa32c39d88bb0281d329dd50c7bd..3429009a71a863ae6b69b5cd29ace3c7fd078f4c 100644
--- a/tensorflow/c/c_test_util.h
+++ b/tensorflow/c/c_test_util.h
@@ -74,7 +74,10 @@ TF_Operation* Neg(TF_Operation* n, TF_Graph* graph, TF_Status* s,
 
 TF_Operation* LessThan(TF_Output l, TF_Output r, TF_Graph* graph, TF_Status* s);
 
-// Split `input` along the first dimention into 3 tensors
+TF_Operation* RandomUniform(TF_Operation* shape, TF_DataType dtype,
+                            TF_Graph* graph, TF_Status* s);
+
+// Split `input` along the first dimension into 3 tensors
 TF_Operation* Split3(TF_Operation* input, TF_Graph* graph, TF_Status* s,
                      const char* name = "split3");
 
diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD
index d533758e360bc44a6f52f57eaae5b222e0482860..74190cb135ac6c17bfcc9d8bd2f7c75ac5e8c076 100644
--- a/tensorflow/c/eager/BUILD
+++ b/tensorflow/c/eager/BUILD
@@ -33,7 +33,7 @@ tf_cuda_library(
             "//tensorflow/core:lib_internal",
             "//tensorflow/core:protos_all_cc",
         ],
-    }),
+    }) + ["//tensorflow/core:gpu_runtime"],
 )
 
 tf_cuda_library(
@@ -55,6 +55,10 @@ tf_cuda_library(
 tf_cuda_cc_test(
     name = "c_api_test",
     srcs = ["c_api_test.cc"],
+    tags = [
+        "guitar",
+        "multi_gpu",
+    ],
     deps = [
         ":c_api",
         "//tensorflow/core:lib",
@@ -113,3 +117,9 @@ cc_library(
         "//tensorflow/core:lib",
     ],
 )
+
+filegroup(
+    name = "headers",
+    srcs = ["c_api.h"],
+    visibility = ["//tensorflow:__subpackages__"],
+)
diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc
index 706c89536db019c7f7389af576815746b2425520..04a415b909ba3e76dfc12a3522f85d290ba6d36f 100644
--- a/tensorflow/c/eager/c_api.cc
+++ b/tensorflow/c/eager/c_api.cc
@@ -25,6 +25,7 @@ limitations under the License.
 #include "tensorflow/c/c_api_internal.h"
 #include "tensorflow/c/eager/c_api_internal.h"
 #include "tensorflow/c/eager/runtime.h"
+#include "tensorflow/core/common_runtime/copy_tensor.h"
 #include "tensorflow/core/common_runtime/device_factory.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
 #include "tensorflow/core/common_runtime/function.h"
@@ -97,7 +98,10 @@ TFE_Context* TFE_NewContext(const TFE_ContextOptions* opts, TF_Status* status) {
 
 void TFE_DeleteContext(TFE_Context* ctx, TF_Status* status) {
   status->status = tensorflow::Status::OK();
-  tensorflow::gtl::STLDeleteValues(&ctx->kernel_cache);
+  {
+    tensorflow::mutex_lock ml(ctx->cache_mu);
+    tensorflow::gtl::STLDeleteValues(&ctx->kernel_cache);
+  }
   TF_Graph* graph = ctx->session->graph;
   TF_DeleteSession(ctx->session, status);
   TF_DeleteGraph(graph);
@@ -109,6 +113,11 @@ TF_DeviceList* TFE_ContextListDevices(TFE_Context* ctx, TF_Status* status) {
   return TF_SessionListDevices(ctx->session, status);
 }
 
+void TFE_ContextClearCaches(TFE_Context* ctx) {
+  tensorflow::mutex_lock ml(ctx->cache_mu);
+  tensorflow::gtl::STLDeleteValues(&ctx->kernel_cache);
+}
+
 TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t, TF_Status* status) {
   tensorflow::Tensor tensor;
   status->status = tensorflow::TF_TensorToTensor(t, &tensor);
@@ -164,23 +173,13 @@ TFE_TensorHandle* TFE_TensorHandleCopyToDevice(TFE_TensorHandle* h,
   bool is_same_device =
       (srcd == dstd) || (DeviceName(srcd) == DeviceName(dstd));
   const bool dst_cpu = IsCPU(dstd);
+  const bool src_cpu = IsCPU(srcd);
   if (is_same_device) {
     return new TFE_TensorHandle(h->t, dst_cpu ? nullptr : dstd);
   }
-  const bool src_cpu = IsCPU(srcd);
-  if (src_cpu == dst_cpu) {
-    TF_SetStatus(
-        status, TF_INVALID_ARGUMENT,
-        tensorflow::strings::StrCat(
-            "TFE_TensorHandleCopyToDevice requires either the source "
-            "TFE_TensorHandle be on or the destination device be on CPU "
-            "or be the same (they are ",
-            DeviceName(srcd), " and ", DeviceName(dstd), " in this call)")
-            .c_str());
-    return nullptr;
-  }
   tensorflow::Tensor* src = &(h->t);
-  if (!dst_cpu && !tensorflow::DataTypeCanUseMemcpy(src->dtype())) {
+  if (!dst_cpu && (src->dtype() != tensorflow::DT_VARIANT &&
+                   !tensorflow::DataTypeCanUseMemcpy(src->dtype()))) {
     TF_SetStatus(
         status, TF_INVALID_ARGUMENT,
         tensorflow::strings::StrCat("Can't copy Tensor with type ",
@@ -189,26 +188,22 @@ TFE_TensorHandle* TFE_TensorHandleCopyToDevice(TFE_TensorHandle* h,
             .c_str());
     return nullptr;
   }
-  if (src_cpu) {
-    tensorflow::Tensor dst(
-        dstd->GetAllocator(tensorflow::AllocatorAttributes()), src->dtype(),
-        src->shape());
-    if (src->shape().num_elements() == 0) {
-      return new TFE_TensorHandle(dst, dstd);
-    }
-    tensorflow::Notification n;
-    dstd->tensorflow_gpu_device_info()->default_context->CopyCPUTensorToDevice(
-        src, dstd, &dst, [status, &n](const tensorflow::Status& s) {
-          status->status = s;
-          n.Notify();
-        });
-    n.WaitForNotification();
-    return (TF_GetCode(status) == TF_OK) ? new TFE_TensorHandle(dst, dstd)
-                                         : nullptr;
-  }
-  CHECK(dst_cpu);
-  tensorflow::Tensor dst(src->dtype(), src->shape());
-  tensorflow::Notification n;
+  tensorflow::AllocatorAttributes attr;
+  if (src->dtype() == tensorflow::DT_VARIANT) {
+    attr.set_on_host(true);
+  }
+  tensorflow::Tensor dst(dstd->GetAllocator(attr), src->dtype(), src->shape());
+  if (src->shape().num_elements() == 0) {
+    return new TFE_TensorHandle(dst, dst_cpu ? nullptr : dstd);
+  }
+  tensorflow::DeviceContext* src_device_context = nullptr;
+  if (!src_cpu) {
+    src_device_context = srcd->tensorflow_gpu_device_info()->default_context;
+  }
+  tensorflow::DeviceContext* dst_device_context = nullptr;
+  if (!dst_cpu) {
+    dst_device_context = dstd->tensorflow_gpu_device_info()->default_context;
+  }
   // TODO(ashankar): The Sync() call below may be more aggressive than
   // necessary. It is based on knowledge of implementation details - that
   // GPU devices are implemented using 3 streams - one for host->device copies,
@@ -217,16 +212,18 @@ TFE_TensorHandle* TFE_TensorHandleCopyToDevice(TFE_TensorHandle* h,
   // but more than necessary (since it waits for operations that might have
   // nothing to do with this tensor to complete).
   status->status = srcd->Sync();
-  if (!status->status.ok()) return nullptr;
-  srcd->tensorflow_gpu_device_info()->default_context->CopyDeviceTensorToCPU(
-      src, "IGNORE_MY_TENSOR_NAME", srcd, &dst,
-      [status, &n](const tensorflow::Status& s) {
-        status->status = s;
-        n.Notify();
-      });
+  tensorflow::Notification n;
+  tensorflow::CopyTensor::ViaDMA("copy", src_device_context, dst_device_context,
+                                 srcd, dstd, tensorflow::AllocatorAttributes(),
+                                 tensorflow::AllocatorAttributes(), src, &dst,
+                                 [status, &n](const tensorflow::Status& s) {
+                                   status->status = s;
+                                   n.Notify();
+                                 });
   n.WaitForNotification();
-  return (TF_GetCode(status) == TF_OK) ? new TFE_TensorHandle(dst, nullptr)
-                                       : nullptr;
+  return (TF_GetCode(status) == TF_OK)
+             ? new TFE_TensorHandle(dst, dst_cpu ? nullptr : dstd)
+             : nullptr;
 }
 
 TFE_Op* TFE_NewOp(TFE_Context* ctx, const char* op_or_function_name,
@@ -505,8 +502,11 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals,
   std::vector<tensorflow::Tensor> outputs(1);
   const tensorflow::MemoryTypeVector* output_memory_types = nullptr;
   tensorflow::Fprint128 cache_key = op->attrs.CacheKey(device->name());
-  tensorflow::KernelAndDevice* kernel =
-      tensorflow::gtl::FindPtrOrNull(ctx->kernel_cache, cache_key);
+  tensorflow::KernelAndDevice* kernel;
+  {
+    tensorflow::tf_shared_lock l(ctx->cache_mu);
+    kernel = tensorflow::gtl::FindPtrOrNull(ctx->kernel_cache, cache_key);
+  }
   if (kernel == nullptr) {
     const tensorflow::NodeDef& ndef = op->attrs.BuildNodeDef();
     kernel = new tensorflow::KernelAndDevice(ctx->rendezvous);
@@ -522,6 +522,7 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals,
       delete kernel;
       return;
     }
+    tensorflow::mutex_lock ml(ctx->cache_mu);
     tensorflow::gtl::InsertOrUpdate(&(ctx->kernel_cache), cache_key, kernel);
   }
   std::vector<TFE_TensorHandle*> copied_tensors;
@@ -534,19 +535,54 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals,
     }
     return;
   }
+  std::unique_ptr<tensorflow::NodeExecStats> maybe_stats;
+  if (ctx->should_store_metadata.load()) {
+    maybe_stats.reset(new tensorflow::NodeExecStats);
+    maybe_stats->set_node_name(op->name);
+    maybe_stats->set_all_start_micros(tensorflow::Env::Default()->NowMicros());
+    maybe_stats->set_op_start_rel_micros(0);
+    maybe_stats->set_scheduled_micros(tensorflow::Env::Default()->NowMicros());
+    // TODO(apassos) track referenced tensors
+  }
   // WARNING: kernel->Run utilizes the FunctionLibraryRuntime
   // (ctx->func_lib(device)), which in turn holds a pointer to func_lib_def,
   // which is GUARDED_BY(ctx->functions_mu). But knowledge of the implementation
-  // of FunctionLibraryRuntime tells use that func_lib_def is not accessed by
+  // of FunctionLibraryRuntime tells us that func_lib_def is not accessed by
   // FunctionLibraryRuntime::Run(), so there is no thread-safety concern here.
   // This is quite subtle. Re-work things to make this better?  (Would it make
   // sense for FunctionLibraryRuntime to ensure thread-safe access to
-  // FunctionLibraryDefinition?).
-  status->status = kernel->Run(&op->inputs, &outputs);
+  // FunctionLibraryDefinition?).  TODO(apassos) figure out how to record stats
+  // for ops which are a part of functions.
+  status->status = kernel->Run(&op->inputs, &outputs, maybe_stats.get());
   for (auto* t : copied_tensors) {
     TFE_DeleteTensorHandle(t);
   }
   if (!status->status.ok()) return;
+  if (maybe_stats != nullptr) {
+    maybe_stats->set_op_end_rel_micros(tensorflow::Env::Default()->NowMicros() -
+                                       maybe_stats->all_start_micros());
+    tensorflow::mutex_lock ml(ctx->metadata_mu);
+    if (ctx->should_store_metadata.load()) {
+      auto* step_stats = ctx->run_metadata.mutable_step_stats();
+      // Lazily initialize the RunMetadata with information about all devices if
+      // this is the first call.
+      while (step_stats->dev_stats_size() < ctx->devices().size()) {
+        step_stats->add_dev_stats();
+      }
+      // Find the current device's index.
+      int device_idx = 0;
+      for (int i = 0; i < ctx->devices().size(); ++i) {
+        if (ctx->devices()[i] == device) {
+          device_idx = i;
+          break;
+        }
+      }
+      // Populate the device stats for this device.
+      auto* dev_stats = step_stats->mutable_dev_stats(device_idx);
+      dev_stats->set_device(device->name());
+      *dev_stats->add_node_stats() = *maybe_stats;
+    }
+  }
   *num_retvals = std::min<int>(*num_retvals, outputs.size());
   for (int i = 0; i < *num_retvals; ++i) {
     tensorflow::Device* d = IsCPU(device) ? nullptr : device;
@@ -593,3 +629,20 @@ const tensorflow::Tensor* TFE_TensorHandleUnderlyingTensorInHostMemory(
   }
   return &h->t;
 }
+
+void TFE_ContextEnableRunMetadata(TFE_Context* ctx) {
+  ctx->should_store_metadata.store(true);
+}
+
+void TFE_ContextDisableRunMetadata(TFE_Context* ctx) {
+  tensorflow::mutex_lock ml(ctx->metadata_mu);
+  ctx->should_store_metadata.store(false);
+  ctx->run_metadata.Clear();
+}
+
+void TFE_ContextExportRunMetadata(TFE_Context* ctx, TF_Buffer* buf,
+                                  TF_Status* status) {
+  tensorflow::mutex_lock ml(ctx->metadata_mu);
+  status->status = MessageToBuffer(ctx->run_metadata, buf);
+  ctx->run_metadata.Clear();
+}
diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h
index ca105962df0d6655946304159937621022e7fcba..9b0fd037da35f31e9b97f29b1269bbca9e4c849d 100644
--- a/tensorflow/c/eager/c_api.h
+++ b/tensorflow/c/eager/c_api.h
@@ -17,6 +17,8 @@ limitations under the License.
 #define TENSORFLOW_C_EAGER_C_API_H_
 
 // C API extensions to experiment with eager execution of kernels.
+// WARNING: Unlike tensorflow/c/c_api.h, the API here is not guaranteed to be
+// stable and can change without notice.
 
 #include "tensorflow/c/c_api.h"
 
@@ -87,6 +89,10 @@ TF_CAPI_EXPORT extern void TFE_DeleteContext(TFE_Context* ctx, TF_Status* status
 TF_CAPI_EXPORT extern TF_DeviceList* TFE_ContextListDevices(TFE_Context* ctx,
                                                             TF_Status* status);
 
+// Clears the internal caches in the TFE context. Useful when reseeding random
+// ops.
+TF_CAPI_EXPORT extern void TFE_ContextClearCaches(TFE_Context* ctx);
+
 // A handle to a tensor on a device.
 //
 // Like a TF_Tensor, a TFE_TensorHandle refers to a tensor with a value, shape,
@@ -207,6 +213,19 @@ TF_CAPI_EXPORT extern void TFE_ContextAddFunction(TFE_Context* ctx,
                                                   TF_Function* function,
                                                   TF_Status* status);
 
+// Enables tracing of RunMetadata on the ops executed from this context.
+TF_CAPI_EXPORT extern void TFE_ContextEnableRunMetadata(TFE_Context* ctx);
+
+// Disables tracing of RunMetadata on the ops executed from this context.
+TF_CAPI_EXPORT extern void TFE_ContextDisableRunMetadata(TFE_Context* ctx);
+
+// Populates the passed-in buffer with a serialized RunMetadata protocol buffer
+// containing any run metadata information accumulated so far and clears this
+// information.
+TF_CAPI_EXPORT extern void TFE_ContextExportRunMetadata(TFE_Context* ctx,
+                                                        TF_Buffer* buf,
+                                                        TF_Status* status);
+
 #ifdef __cplusplus
 } /* end extern "C" */
 #endif
diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h
index 0971e2ab2fe98cc8bf6f631f41d5adce90ee7051..55a04d48bad63a8c19ffdc39675b1e1b70ac80d7 100644
--- a/tensorflow/c/eager/c_api_internal.h
+++ b/tensorflow/c/eager/c_api_internal.h
@@ -58,15 +58,21 @@ struct TFE_Context {
   // session->devices[i].
   std::unique_ptr<tensorflow::ProcessFunctionLibraryRuntime> pflr;
 
+  tensorflow::mutex cache_mu;
   std::unordered_map<tensorflow::Fprint128, tensorflow::KernelAndDevice*,
                      tensorflow::Fprint128Hasher>
-      kernel_cache;
+      kernel_cache GUARDED_BY(cache_mu);
 
   tensorflow::FunctionLibraryRuntime* func_lib(tensorflow::Device* d) {
     return pflr->GetFLR(d->name());
   }
 
   const std::vector<tensorflow::Device*>& devices() { return session->devices; }
+
+  // Whether we should compute RunMetadata.
+  std::atomic<bool> should_store_metadata{false};
+  tensorflow::mutex metadata_mu;
+  tensorflow::RunMetadata run_metadata GUARDED_BY(metadata_mu);
 };
 
 struct TFE_TensorHandle {
diff --git a/tensorflow/c/eager/c_api_test.cc b/tensorflow/c/eager/c_api_test.cc
index 3fe0b7efa11bc619ed98bf9a1634ade5b6ed0a7c..423a7e1ff71bfdc5f51e36ae63359869ea079ddc 100644
--- a/tensorflow/c/eager/c_api_test.cc
+++ b/tensorflow/c/eager/c_api_test.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/test_benchmark.h"
+#include "tensorflow/core/protobuf/config.pb.h"
 
 using tensorflow::string;
 
@@ -216,6 +217,64 @@ TEST(CAPI, TensorHandleCopyBetweenDevices) {
   EXPECT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
 }
 
+TEST(CAPI, TensorHandleCopyBetweenTwoGPUDevices) {
+  std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
+      TF_NewStatus(), TF_DeleteStatus);
+  TFE_ContextOptions* opts = TFE_NewContextOptions();
+  TFE_Context* ctx = TFE_NewContext(opts, status.get());
+  TFE_DeleteContextOptions(opts);
+  ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+  TFE_TensorHandle* hcpu = TestMatrixTensorHandle();
+  TF_Tensor* t = TFE_TensorHandleResolve(hcpu, status.get());
+  ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+  TF_DeviceList* devices = TFE_ContextListDevices(ctx, status.get());
+  ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+  const int num_devices = TF_DeviceListCount(devices);
+
+  const char* kCPUDevice = "CPU:0";
+  if (num_devices < 3) {
+    TF_DeleteDeviceList(devices);
+    TF_DeleteTensor(t);
+    TFE_DeleteTensorHandle(hcpu);
+    TFE_DeleteContext(ctx, status.get());
+    return;
+  }
+  const string gpu_1_name(TF_DeviceListName(devices, 1, status.get()));
+  ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK);
+  const string gpu_2_name(TF_DeviceListName(devices, 2, status.get()));
+  ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK);
+  TFE_TensorHandle* hdevice =
+      TFE_TensorHandleCopyToDevice(hcpu, ctx, gpu_1_name.c_str(), status.get());
+  ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK);
+
+  TFE_TensorHandle* hdevice2 = TFE_TensorHandleCopyToDevice(
+      hdevice, ctx, gpu_2_name.c_str(), status.get());
+  ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK);
+  TFE_DeleteTensorHandle(hdevice);
+  // Copy back to CPU
+  TFE_TensorHandle* hcopy =
+      TFE_TensorHandleCopyToDevice(hdevice2, ctx, kCPUDevice, status.get());
+  ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK);
+  TFE_DeleteTensorHandle(hdevice2);
+
+  // Ensure that the contents are the same!
+  TF_Tensor* tcopy = TFE_TensorHandleResolve(hcopy, status.get());
+  TFE_DeleteTensorHandle(hcopy);
+  ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK);
+  EXPECT_EQ(TF_TensorByteSize(t), TF_TensorByteSize(tcopy));
+  EXPECT_EQ(
+      0, memcmp(TF_TensorData(t), TF_TensorData(tcopy), TF_TensorByteSize(t)));
+  TF_DeleteTensor(tcopy);
+
+  TF_DeleteDeviceList(devices);
+  TF_DeleteTensor(t);
+  TFE_DeleteTensorHandle(hcpu);
+  TFE_DeleteContext(ctx, status.get());
+  EXPECT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+}
+
 TEST(CAPI, TensorHandleSilentCopy) {
   std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
       TF_NewStatus(), TF_DeleteStatus);
@@ -295,6 +354,47 @@ TEST(CAPI, Execute) {
   TF_DeleteStatus(status);
 }
 
+TEST(CAPI, ExecuteWithTracing) {
+  TF_Status* status = TF_NewStatus();
+  TFE_ContextOptions* opts = TFE_NewContextOptions();
+  TFE_Context* ctx = TFE_NewContext(opts, status);
+  TFE_ContextEnableRunMetadata(ctx);
+  CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  TFE_DeleteContextOptions(opts);
+
+  TFE_TensorHandle* m = TestMatrixTensorHandle();
+  TFE_Op* matmul = MatMulOp(ctx, m, m);
+  TFE_TensorHandle* retvals[2] = {nullptr};
+  int num_retvals = 2;  // Should be reduced to 1 by the TFE_Execute call.
+  TFE_Execute(matmul, &retvals[0], &num_retvals, status);
+  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  TFE_DeleteOp(matmul);
+  TFE_DeleteTensorHandle(m);
+  TF_Buffer* b = TF_NewBuffer();
+  TFE_ContextExportRunMetadata(ctx, b, status);
+  ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  tensorflow::RunMetadata rm;
+  EXPECT_TRUE(
+      rm.ParseFromString({reinterpret_cast<const char*>(b->data), b->length}));
+  TF_DeleteBuffer(b);
+  TFE_DeleteContext(ctx, status);
+  ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  ASSERT_EQ(1, num_retvals);
+
+  TF_Tensor* t = TFE_TensorHandleResolve(retvals[0], status);
+  TFE_DeleteTensorHandle(retvals[0]);
+  ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  float product[4] = {0};
+  EXPECT_EQ(sizeof(product), TF_TensorByteSize(t));
+  memcpy(&product[0], TF_TensorData(t), TF_TensorByteSize(t));
+  TF_DeleteTensor(t);
+  EXPECT_EQ(7, product[0]);
+  EXPECT_EQ(10, product[1]);
+  EXPECT_EQ(15, product[2]);
+  EXPECT_EQ(22, product[3]);
+  TF_DeleteStatus(status);
+}
+
 TEST(CAPI, Function) {
   // First create a simple identity function.
   TF_Graph* function_graph = TF_NewGraph();
diff --git a/tensorflow/c/eager/runtime.cc b/tensorflow/c/eager/runtime.cc
index 38066682a9fc5038c34a4ac3b20a67ceb08ab951..3a9951e14de3a70e0b9e47fa62e6342e063c4bed 100644
--- a/tensorflow/c/eager/runtime.cc
+++ b/tensorflow/c/eager/runtime.cc
@@ -262,7 +262,8 @@ Status KernelAndDevice::Init(const NodeDef& ndef, FunctionLibraryRuntime* flib,
 }
 
 Status KernelAndDevice::Run(std::vector<Tensor>* input_tensors,
-                            std::vector<Tensor>* output_tensors) {
+                            std::vector<Tensor>* output_tensors,
+                            NodeExecStats* stats) {
   gtl::InlinedVector<TensorValue, 4> inputs;
   for (Tensor& t : *input_tensors) {
     inputs.push_back(TensorValue(&t));
@@ -284,6 +285,9 @@ Status KernelAndDevice::Run(std::vector<Tensor>* input_tensors,
   params.function_library = flib_;
   params.slice_reader_cache = &slice_reader_cache_;
   params.rendezvous = rendez_;
+  if (stats != nullptr) {
+    params.track_allocations = true;
+  }
   // TODO(apassos): use a thread pool.
   std::function<void(std::function<void()>)> runner =
       [](std::function<void()> f) { f(); };
@@ -297,6 +301,28 @@ Status KernelAndDevice::Run(std::vector<Tensor>* input_tensors,
   for (int i = 0; i < context.num_outputs(); ++i) {
     output_tensors->push_back(Tensor(*context.mutable_output(i)));
   }
+  if (stats != nullptr) {
+    for (const auto& allocator_pair : context.wrapped_allocators()) {
+      AllocatorMemoryUsed* memory = stats->add_memory();
+      memory->set_allocator_name(allocator_pair.first->Name());
+      auto sizes = allocator_pair.second->GetSizes();
+      memory->set_total_bytes(std::get<0>(sizes));
+      memory->set_peak_bytes(std::get<1>(sizes));
+      memory->set_live_bytes(std::get<2>(sizes));
+
+      AllocatorStats allocator_stats;
+      allocator_pair.first->GetStats(&allocator_stats);
+      memory->set_allocator_bytes_in_use(allocator_stats.bytes_in_use);
+      allocator_pair.second->GetRecordsAndUnRef();
+    }
+    auto* ms = stats->mutable_memory_stats();
+    ms->set_temp_memory_size(context.temp_memory_size());
+    for (const auto& alloc_id : context.persistent_alloc_ids()) {
+      ms->mutable_persistent_tensor_alloc_ids()->Add(alloc_id);
+    }
+
+    ms->set_persistent_memory_size(context.persistent_memory_allocated());
+  }
   return Status::OK();
 }
 
diff --git a/tensorflow/c/eager/runtime.h b/tensorflow/c/eager/runtime.h
index fb97e94a94103d17164cb30f6c6e0ed3e07dc103..e28a416e67f8382dbd490648106a7eb6e5fcfd13 100644
--- a/tensorflow/c/eager/runtime.h
+++ b/tensorflow/c/eager/runtime.h
@@ -175,7 +175,8 @@ class KernelAndDevice {
       : device_(nullptr), flib_(nullptr), rendez_(rendez) {}
 
   // TODO(ashankar): Handle list-valued inputs.
-  Status Run(std::vector<Tensor>* inputs, std::vector<Tensor>* outputs);
+  Status Run(std::vector<Tensor>* inputs, std::vector<Tensor>* outputs,
+             NodeExecStats* stats);
 
   const OpKernel* kernel() const { return kernel_.get(); }
 
diff --git a/tensorflow/c/eager/runtime_test.cc b/tensorflow/c/eager/runtime_test.cc
index 3236c6be0ec5281e8099219968dd5f5c6c2048c3..2ccca66f672b96b3c782ddbfc828eeda270cebee 100644
--- a/tensorflow/c/eager/runtime_test.cc
+++ b/tensorflow/c/eager/runtime_test.cc
@@ -96,7 +96,7 @@ TEST(KernelAndDevice, Run) {
       KernelAndDevice::Init(ndef, env.function_library_runtime(), &kernel);
   ASSERT_TRUE(s.ok()) << s;
   std::vector<Tensor> outputs;
-  s = kernel.Run(&inputs, &outputs);
+  s = kernel.Run(&inputs, &outputs, nullptr);
   ASSERT_TRUE(s.ok()) << s;
   ASSERT_EQ(1, outputs.size());
   const Tensor& out = outputs[0];
@@ -183,7 +183,7 @@ void BM_KernelAndDeviceRun(int iters) {
       KernelAndDevice::Init(ndef, env.function_library_runtime(), &kernel));
   tensorflow::testing::StartTiming();
   for (int i = 0; i < iters; ++i) {
-    TF_CHECK_OK(kernel.Run(&inputs, &outputs));
+    TF_CHECK_OK(kernel.Run(&inputs, &outputs, nullptr));
   }
 }
 BENCHMARK(BM_KernelAndDeviceRun);
diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h
index f52248e7d567b8edd911c6dba1786ceb5d5c721c..2b65e38f54090af6731685f78d5f7f914a875e3c 100644
--- a/tensorflow/c/eager/tape.h
+++ b/tensorflow/c/eager/tape.h
@@ -161,7 +161,7 @@ class GradientTape {
   // the tape refer to it); to aid in tape garbage collection.
   std::unordered_map<int64, int64> tensor_usage_;
 
-  // If true, all activations are deleted in the first call to ComputeGradient.
+  // If false, all activations are deleted in the first call to ComputeGradient.
   // Else, only when this is destructed.
   bool persistent_;
 };
@@ -350,7 +350,7 @@ BackpropInitialState<BackwardFunction> PrepareBackprop(
     // Call destructors for all unneeded gradient functions and
     // clear the op_tape. We can clear the tape because ownership of
     // backward functions that will be used for gradient computation
-    // has been transfered to `result`.
+    // has been transferred to `result`.
     for (const auto& op_pair : *op_tape) {
       op_pair.second.backward_function_deleter();
     }
@@ -491,6 +491,7 @@ Status GradientTape<Gradient, BackwardFunction>::ComputeGradient(
     state.op_tape.erase(op_it);
     std::vector<Gradient*> out_gradients;
     out_gradients.reserve(trace.output_tensor_info.size());
+    bool any_gradient_nonzero = false;
     for (int i = 0; i < trace.output_tensor_info.size(); ++i) {
       const int64 id = trace.output_tensor_info[i].id;
       auto grad_it = gradients.find(id);
@@ -506,6 +507,7 @@ Status GradientTape<Gradient, BackwardFunction>::ComputeGradient(
                            trace.output_tensor_info[i].dtype));
         }
       } else {
+        any_gradient_nonzero = true;
         out_gradients.push_back(vspace.AggregateGradients(grad_it->second));
         if (sources_set.find(grad_it->first) == sources_set.end()) {
           gradients.erase(grad_it);
@@ -513,14 +515,26 @@ Status GradientTape<Gradient, BackwardFunction>::ComputeGradient(
       }
     }
     std::vector<Gradient*> in_gradients;
-    Status s = vspace.CallBackwardFunction(trace.backward_function,
-                                           out_gradients, &in_gradients);
-    if (!persistent_) {
-      vspace.ReleaseBackwardFunction(trace.backward_function);
-    }
-    if (!s.ok()) {
-      cleanup();
-      return s;
+    if (any_gradient_nonzero) {
+      Status s = vspace.CallBackwardFunction(trace.backward_function,
+                                             out_gradients, &in_gradients);
+      if (!persistent_) {
+        vspace.ReleaseBackwardFunction(trace.backward_function);
+      }
+      if (!s.ok()) {
+        cleanup();
+        return s;
+      }
+    } else {
+      in_gradients.resize(trace.input_tensor_id.size());
+      if (!persistent_) {
+        vspace.ReleaseBackwardFunction(trace.backward_function);
+      }
+      for (Gradient* grad : out_gradients) {
+        if (grad != nullptr) {
+          vspace.DeleteGradient(grad);
+        }
+      }
     }
     VLOG(1) << "Got " << in_gradients.size() << " in_gradients for "
             << trace.input_tensor_id.size() << " sources";
diff --git a/tensorflow/c/python_api.cc b/tensorflow/c/python_api.cc
index ba5a9268b4f671499590d66fb41060dd18e1ce47..6e37cdb5f4beea53d4a2ded0705ae482d0bc2d68 100644
--- a/tensorflow/c/python_api.cc
+++ b/tensorflow/c/python_api.cc
@@ -22,6 +22,7 @@ namespace tensorflow {
 void AddControlInput(TF_Graph* graph, TF_Operation* op, TF_Operation* input) {
   mutex_lock l(graph->mu);
   graph->graph.AddControlEdge(&input->node, &op->node);
+  RecordMutation(graph, *op, "adding control input");
 }
 
 void SetAttr(TF_Graph* graph, TF_Operation* op, const char* attr_name,
@@ -36,11 +37,13 @@ void SetAttr(TF_Graph* graph, TF_Operation* op, const char* attr_name,
 
   mutex_lock l(graph->mu);
   op->node.AddAttr(attr_name, attr_val);
+  RecordMutation(graph, *op, "setting attribute");
 }
 
 void SetRequestedDevice(TF_Graph* graph, TF_Operation* op, const char* device) {
   mutex_lock l(graph->mu);
   op->node.set_requested_device(device);
+  RecordMutation(graph, *op, "setting device");
 }
 
 void UpdateEdge(TF_Graph* graph, TF_Output new_src, TF_Input dst,
@@ -75,6 +78,25 @@ void UpdateEdge(TF_Graph* graph, TF_Output new_src, TF_Input dst,
   }
   status->status = graph->graph.UpdateEdge(&new_src.oper->node, new_src.index,
                                            &dst.oper->node, dst.index);
+
+  if (status->status.ok()) {
+    // This modification only updates the destination node for
+    // the purposes of running this graph in a session. Thus, we don't
+    // record the source node as being modified.
+    RecordMutation(graph, *dst.oper, "updating input tensor");
+  }
+}
+
+void RemoveAllControlInputs(TF_Graph* graph, TF_Operation* op) {
+  mutex_lock l(graph->mu);
+  std::vector<const Edge*> control_edges;
+  for (const Edge* edge : op->node.in_edges()) {
+    if (!edge->IsControlEdge()) continue;
+    control_edges.push_back(edge);
+  }
+  for (const Edge* edge : control_edges) {
+    graph->graph.RemoveControlEdge(edge);
+  }
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/c/python_api.h b/tensorflow/c/python_api.h
index f54585b0a1034ff108202272a11416e34985959e..b51ef2b53122802fef598a26bd6f1843976f11b0 100644
--- a/tensorflow/c/python_api.h
+++ b/tensorflow/c/python_api.h
@@ -35,6 +35,8 @@ void SetRequestedDevice(TF_Graph* graph, TF_Operation* op, const char* device);
 void UpdateEdge(TF_Graph* graph, TF_Output new_src, TF_Input dst,
                 TF_Status* status);
 
+void RemoveAllControlInputs(TF_Graph* graph, TF_Operation* op);
+
 }  // namespace tensorflow
 
 #endif  // THIRD_PARTY_TENSORFLOW_C_PYTHON_API_H_
diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD
index e354831d7d25af83c068a68a4f844056263a598c..ddcee3deee444382f4bdb206de6f06ee62265a51 100644
--- a/tensorflow/cc/BUILD
+++ b/tensorflow/cc/BUILD
@@ -421,7 +421,7 @@ tf_cc_test(
 
 tf_gen_op_wrappers_cc(
     name = "cc_ops",
-    api_def_srcs = ["//tensorflow/core:base_api_def"],
+    api_def_srcs = ["//tensorflow/core/api_def:base_api_def"],
     op_lib_names = [
         "array_ops",
         "audio_ops",
@@ -448,7 +448,6 @@ tf_gen_op_wrappers_cc(
         "ops/const_op.h",
         "ops/standard_ops.h",
     ],
-    override_file = "ops/op_gen_overrides.pbtxt",
     pkg = "//tensorflow/core",
 )
 
@@ -527,14 +526,13 @@ cc_library_with_android_deps(
     ],
     copts = tf_copts(),
     data = [
-        "//tensorflow/core:base_api_def",
+        "//tensorflow/core/api_def:base_api_def",
     ],
     deps = [
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:op_gen_lib",
-        "//tensorflow/core:op_gen_overrides_proto_cc",
         "//tensorflow/core:proto_text",
         "//tensorflow/core:protos_all_cc",
     ],
@@ -547,15 +545,11 @@ tf_cc_test(
         "framework/cc_op_gen.h",
         "framework/cc_op_gen_test.cc",
     ],
-    data = [
-        "//tensorflow/cc:ops/op_gen_overrides.pbtxt",
-    ],
     deps = [
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:op_gen_lib",
-        "//tensorflow/core:op_gen_overrides_proto_cc",
         "//tensorflow/core:proto_text",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
diff --git a/tensorflow/cc/client/client_session_test.cc b/tensorflow/cc/client/client_session_test.cc
index dfbac9788e16e9c7c65abcd1ea213b51d5d5d060..ea5cf5a1f12be316cc6e0d0a02cd3caf4d177400 100644
--- a/tensorflow/cc/client/client_session_test.cc
+++ b/tensorflow/cc/client/client_session_test.cc
@@ -23,7 +23,13 @@ limitations under the License.
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
-using namespace ops;  // NOLINT(build/namespaces)
+namespace {
+
+using ops::Add;
+using ops::Const;
+using ops::Mul;
+using ops::Placeholder;
+using ops::Sub;
 
 TEST(ClientSessionTest, Basic) {
   Scope root = Scope::NewRootScope();
@@ -89,4 +95,5 @@ TEST(ClientSessionTest, MultiThreaded) {
   test::ExpectTensorEqual<int>(outputs[0], test::AsTensor<int>({-1, 2}, {2}));
 }
 
-}  // end namespace tensorflow
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/cc/framework/cc_op_gen.cc b/tensorflow/cc/framework/cc_op_gen.cc
index d889c518f9c38a9f070970b37a2ad4b1fc26671b..a40ad1ffc3b262840e6ca0043139b1b61e04510d 100644
--- a/tensorflow/cc/framework/cc_op_gen.cc
+++ b/tensorflow/cc/framework/cc_op_gen.cc
@@ -1057,16 +1057,9 @@ string MakeInternal(const string& fname) {
 }  // namespace
 
 void WriteCCOps(const OpList& ops, const ApiDefMap& api_def_map,
-                const string& dot_h_fname, const string& dot_cc_fname,
-                const string& overrides_fnames) {
+                const string& dot_h_fname, const string& dot_cc_fname) {
   Env* env = Env::Default();
 
-  // Load the override map.
-  OpGenOverrideMap override_map;
-  if (!overrides_fnames.empty()) {
-    TF_CHECK_OK(override_map.LoadFileList(env, overrides_fnames));
-  }
-
   // Write the initial boilerplate to the .h and .cc files.
   std::unique_ptr<WritableFile> h = nullptr;
   std::unique_ptr<WritableFile> cc = nullptr;
diff --git a/tensorflow/cc/framework/cc_op_gen.h b/tensorflow/cc/framework/cc_op_gen.h
index cea28990144b9371e8009ce13f912b44044f9aac..1b5f7dd923731e56ab3d7e5288d17fef9eb3beb0 100644
--- a/tensorflow/cc/framework/cc_op_gen.h
+++ b/tensorflow/cc/framework/cc_op_gen.h
@@ -24,8 +24,7 @@ namespace tensorflow {
 
 /// Result is written to files dot_h and dot_cc.
 void WriteCCOps(const OpList& ops, const ApiDefMap& api_def_map,
-                const string& dot_h_fname, const string& dot_cc_fname,
-                const string& overrides_fnames);
+                const string& dot_h_fname, const string& dot_cc_fname);
 
 }  // namespace tensorflow
 
diff --git a/tensorflow/cc/framework/cc_op_gen_main.cc b/tensorflow/cc/framework/cc_op_gen_main.cc
index 326d5668b8803ee39ffe24900c92e1db87b93601..3157792e15a006555e4924eea3c72ea643e79c1c 100644
--- a/tensorflow/cc/framework/cc_op_gen_main.cc
+++ b/tensorflow/cc/framework/cc_op_gen_main.cc
@@ -28,7 +28,7 @@ namespace tensorflow {
 namespace {
 
 void PrintAllCCOps(const std::string& dot_h, const std::string& dot_cc,
-                   const std::string& overrides_fnames, bool include_internal,
+                   bool include_internal,
                    const std::vector<string>& api_def_dirs) {
   OpList ops;
   OpRegistry::Global()->Export(include_internal, &ops);
@@ -49,7 +49,7 @@ void PrintAllCCOps(const std::string& dot_h, const std::string& dot_cc,
 
   api_def_map.UpdateDocs();
 
-  WriteCCOps(ops, api_def_map, dot_h, dot_cc, overrides_fnames);
+  WriteCCOps(ops, api_def_map, dot_h, dot_cc);
 }
 
 }  // namespace
@@ -57,24 +57,21 @@ void PrintAllCCOps(const std::string& dot_h, const std::string& dot_cc,
 
 int main(int argc, char* argv[]) {
   tensorflow::port::InitMain(argv[0], &argc, &argv);
-  // TODO(annarev): Update this file to no longer take op_gen_overrides.pbtxt
-  // as an argument.
-  if (argc != 6) {
+  if (argc != 5) {
     for (int i = 1; i < argc; ++i) {
       fprintf(stderr, "Arg %d = %s\n", i, argv[i]);
     }
     fprintf(stderr,
-            "Usage: %s out.h out.cc overrides1.pbtxt,2.pbtxt include_internal "
+            "Usage: %s out.h out.cc include_internal "
             "api_def_dirs1,api_def_dir2 ...\n"
             "  include_internal: 1 means include internal ops\n",
             argv[0]);
     exit(1);
   }
 
-  bool include_internal = tensorflow::StringPiece("1") == argv[4];
+  bool include_internal = tensorflow::StringPiece("1") == argv[3];
   std::vector<tensorflow::string> api_def_dirs = tensorflow::str_util::Split(
-      argv[5], ",", tensorflow::str_util::SkipEmpty());
-  tensorflow::PrintAllCCOps(argv[1], argv[2], argv[3], include_internal,
-                            api_def_dirs);
+      argv[4], ",", tensorflow::str_util::SkipEmpty());
+  tensorflow::PrintAllCCOps(argv[1], argv[2], include_internal, api_def_dirs);
   return 0;
 }
diff --git a/tensorflow/cc/framework/cc_op_gen_test.cc b/tensorflow/cc/framework/cc_op_gen_test.cc
index 0b7e720a5c7b343415eee1aa157b8de755a1e1a5..1e0f2d241bb350897a840dda90d6d0c009b1daad 100644
--- a/tensorflow/cc/framework/cc_op_gen_test.cc
+++ b/tensorflow/cc/framework/cc_op_gen_test.cc
@@ -24,10 +24,6 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
-// TODO(annarev): Remove this op_gen_overrides.pbtxt reference.
-// It is needed only because WriteCCOps takes it as an argument.
-constexpr char kOverridesFnames[] =
-    "tensorflow/cc/ops/op_gen_overrides.pbtxt";
 constexpr char kBaseOpDef[] = R"(
 op {
   name: "Foo"
@@ -96,7 +92,7 @@ void GenerateCcOpFiles(Env* env, const OpList& ops,
   const auto internal_h_file_path = io::JoinPath(tmpdir, "test_internal.h");
   const auto internal_cc_file_path = io::JoinPath(tmpdir, "test_internal.cc");
 
-  WriteCCOps(ops, api_def_map, h_file_path, cc_file_path, kOverridesFnames);
+  WriteCCOps(ops, api_def_map, h_file_path, cc_file_path);
 
   TF_ASSERT_OK(ReadFileToString(env, h_file_path, h_file_text));
   TF_ASSERT_OK(
diff --git a/tensorflow/cc/framework/cc_ops_test.cc b/tensorflow/cc/framework/cc_ops_test.cc
index 5da23036eaadbef270ba839357dc4613bf3bf490..ac05e3cf95b1ce4009ee1424713baf2d34902a94 100644
--- a/tensorflow/cc/framework/cc_ops_test.cc
+++ b/tensorflow/cc/framework/cc_ops_test.cc
@@ -22,8 +22,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/status_test_util.h"
 
 namespace tensorflow {
-using namespace ops;  // NOLINT(build/namespaces)
-
+namespace ops {
 namespace {
 
 Output Linear(const Scope& scope, Input x, Input w, Input b) {
@@ -39,8 +38,6 @@ void GetColocationConstraints(const Output& tensor,
                            constraints));
 }
 
-}  // namespace
-
 TEST(CCOpTest, Basic) {
   Scope root = Scope::NewRootScope();
   auto c = Const(root, {{1, 1}});
@@ -249,4 +246,6 @@ TEST(CCOpTest, InvalidFinalize) {
             string::npos);
 }
 
+}  // namespace
+}  // namespace ops
 }  // namespace tensorflow
diff --git a/tensorflow/cc/framework/gradient_checker_test.cc b/tensorflow/cc/framework/gradient_checker_test.cc
index fdc457f40af875d7c0c243246755d0cb87c44a62..d4f0a7f5ab3716be41e22c02a21aca028f76fb88 100644
--- a/tensorflow/cc/framework/gradient_checker_test.cc
+++ b/tensorflow/cc/framework/gradient_checker_test.cc
@@ -24,10 +24,18 @@ limitations under the License.
 #include "tensorflow/core/util/equal_graph_def.h"
 
 namespace tensorflow {
-using namespace ops;  // NOLINT(build/namespaces)
-
 namespace {
 
+using ops::Complex;
+using ops::Const;
+using ops::MatMul;
+using ops::Placeholder;
+using ops::Real;
+using ops::Split;
+using ops::Square;
+using ops::Stack;
+using ops::Unstack;
+
 TEST(GradientCheckerTest, BasicFloat) {
   Scope scope = Scope::NewRootScope();
   TensorShape shape({2, 4, 3});
diff --git a/tensorflow/cc/framework/gradients_test.cc b/tensorflow/cc/framework/gradients_test.cc
index 07a062e704ed6ffc6389b5897309957a1bfcd1c2..26e3170ad8e4f4fba1c2dc014086acf24d949f72 100644
--- a/tensorflow/cc/framework/gradients_test.cc
+++ b/tensorflow/cc/framework/gradients_test.cc
@@ -26,10 +26,20 @@ limitations under the License.
 #include "tensorflow/core/util/equal_graph_def.h"
 
 namespace tensorflow {
-using namespace ops;  // NOLINT(build/namespaces)
-
 namespace {
 
+using ops::Assign;
+using ops::Const;
+using ops::Identity;
+using ops::MatMul;
+using ops::OnesLike;
+using ops::Placeholder;
+using ops::Square;
+using ops::Stack;
+using ops::StopGradient;
+using ops::Unstack;
+using ops::Variable;
+
 // TODO(andydavis) Add more unit tests once more gradient functions are ported.
 class GradientsTest : public ::testing::Test {
  protected:
diff --git a/tensorflow/cc/gradients/array_grad_test.cc b/tensorflow/cc/gradients/array_grad_test.cc
index 455d7330c10cf230462869475f25a1f1b9bf9e9e..4a215fcc9299cf8b8da04cbf151640631ed0d449 100644
--- a/tensorflow/cc/gradients/array_grad_test.cc
+++ b/tensorflow/cc/gradients/array_grad_test.cc
@@ -23,11 +23,11 @@ limitations under the License.
 #include "tensorflow/core/lib/core/status_test_util.h"
 
 namespace tensorflow {
+namespace {
+
 using namespace ops;  // NOLINT(build/namespaces)
 using ops::internal::MirrorPadGrad;
 
-namespace {
-
 class ArrayGradTest : public ::testing::Test {
  protected:
   ArrayGradTest() : scope_(Scope::NewRootScope()) {}
diff --git a/tensorflow/cc/gradients/data_flow_grad_test.cc b/tensorflow/cc/gradients/data_flow_grad_test.cc
index 734dfd3af97b856a7c8c4894c4a6d1a3ade10992..0ba3c0e27b1e545a30925ea3ef9e2c54dc9d0ae9 100644
--- a/tensorflow/cc/gradients/data_flow_grad_test.cc
+++ b/tensorflow/cc/gradients/data_flow_grad_test.cc
@@ -23,10 +23,13 @@ limitations under the License.
 #include "tensorflow/core/lib/random/random.h"
 
 namespace tensorflow {
-using namespace ops;  // NOLINT(build/namespaces)
-
 namespace {
 
+using ops::Const;
+using ops::DynamicPartition;
+using ops::DynamicStitch;
+using ops::Placeholder;
+
 class DataFlowGradTest : public ::testing::Test {
  protected:
   DataFlowGradTest() : scope_(Scope::NewRootScope()) {}
diff --git a/tensorflow/cc/gradients/grad_testutil.cc b/tensorflow/cc/gradients/grad_testutil.cc
index 04b29d4e8b21eeee200d9e7390868d701eda3c22..304117d3719346202d3a8a18637f7c915d4a47f9 100644
--- a/tensorflow/cc/gradients/grad_testutil.cc
+++ b/tensorflow/cc/gradients/grad_testutil.cc
@@ -18,16 +18,14 @@ limitations under the License.
 #include "tensorflow/cc/framework/grad_op_registry.h"
 
 namespace tensorflow {
-using namespace ops;  // NOLINT(build/namespaces)
-
 namespace test {
 
 Status CallGradFunction(const Scope& scope, const Operation& op,
                         const std::vector<Output>& grad_inputs,
                         std::vector<Output>* grad_outputs) {
-  GradFunc grad_fn;
-  TF_RETURN_IF_ERROR(
-      GradOpRegistry::Global()->Lookup(op.node()->type_string(), &grad_fn));
+  ops::GradFunc grad_fn;
+  TF_RETURN_IF_ERROR(ops::GradOpRegistry::Global()->Lookup(
+      op.node()->type_string(), &grad_fn));
   TF_RETURN_IF_ERROR(grad_fn(scope, op, grad_inputs, grad_outputs));
   TF_RETURN_IF_ERROR(scope.status());
   return Status::OK();
diff --git a/tensorflow/cc/gradients/math_grad.cc b/tensorflow/cc/gradients/math_grad.cc
index d7446b9560fd7dc8377ea3710641906b274313a9..52c177212a8c88f1857defcc38de4a01ac47dab0 100644
--- a/tensorflow/cc/gradients/math_grad.cc
+++ b/tensorflow/cc/gradients/math_grad.cc
@@ -473,6 +473,41 @@ Status AddNGrad(const Scope& scope, const Operation& op,
 }
 REGISTER_GRADIENT_OP("AddN", AddNGrad);
 
+Status PowGrad(const Scope& scope, const Operation& op,
+               const std::vector<Output>& grad_inputs,
+               std::vector<Output>* grad_outputs) {
+  auto x = ConjugateHelper(scope, op.input(0));
+  auto y = ConjugateHelper(scope, op.input(1));
+  auto z = ConjugateHelper(scope, op.output(0));
+  auto grad = grad_inputs[0];
+  // grad * y * pow(x, y - 1)
+  auto one = Cast(scope, Const(scope, 1.0), y.type());
+  auto gx_1 = Mul(scope,
+                  Mul(scope, grad, y),
+                  Pow(scope, x, Sub(scope, y, one)));
+  // Avoid false singularity at x = 0
+  DataType x_dtype = x.type();
+  auto zero = Cast(scope, Const(scope, 0.0), x_dtype);
+  if (x_dtype == DT_COMPLEX64 || x_dtype == DT_COMPLEX128) {
+    // real(x) < 0 is fine for the complex case
+    auto log_x = Where3(scope,
+                        NotEqual(scope, x, zero),
+                        Log(scope, x),
+                        ZerosLike(scope, x));
+    auto gy_1 = Mul(scope, Mul(scope, grad, z), log_x);
+    return BinaryGradCommon(scope, op, grad_outputs, gx_1, gy_1);
+  } else {
+    // There's no sensible real value to return if x < 0, so return 0
+    auto log_x = Where3(scope,
+                        Greater(scope, x, zero),
+                        Log(scope, x),
+                        ZerosLike(scope, x));
+    auto gy_1 = Mul(scope, Mul(scope, grad, z), log_x);
+    return BinaryGradCommon(scope, op, grad_outputs, gx_1, gy_1);
+  }
+}
+REGISTER_GRADIENT_OP("Pow", PowGrad);
+
 // MaximumMinimumGradCommon adds shared ops to calculate gradients for
 // the binary Maximum and Minimum ops.
 Status MaximumMinimumGradCommon(const Scope& scope, const Operation& op,
@@ -794,6 +829,183 @@ Status MinOrMaxGrad(const Scope& scope, const Operation& op,
 REGISTER_GRADIENT_OP("Min", MinOrMaxGrad);
 REGISTER_GRADIENT_OP("Max", MinOrMaxGrad);
 
+Status ProdGrad(const Scope& scope, const Operation& op,
+                const std::vector<Output>& grad_inputs,
+                std::vector<Output>* grad_outputs) {
+  auto zero = Const(scope, 0);
+  auto one = Const(scope, 1);
+
+  // The gradient can be expressed by dividing the product by each entry of
+  // the input tensor. If our input is
+  // [
+  //  [3, 4],
+  //  [5, 6],
+  //  [7, 8]
+  // ]
+  // and we do a Prod operation on the axis 1, we will obtain [[105, 192]].
+  // The gradient will have the same shape as the input
+  //     [
+  //       [105/3, 192/4],
+  // dz *  [105/5, 192/6],
+  //       [105/7, 192/6]
+  //     ]
+  // If the input contains a zero, the division is impossible but
+  // if we take the calculation that gave the first gradient
+  // (3 * 5 * 6)/3 is equal to 5 * 6
+  // the trick will be to cumprod the elements on the axis without
+  // the element at the current position (3 in the example above).
+  // We will take as example:
+  // [
+  //   [
+  //     [3.0, 4.0],
+  //     [5.0, 6.0],
+  //     [7.0, 8.0]
+  //   ],
+  //   [
+  //     [3.0, 5.0],
+  //     [0.0, 6.0],
+  //     [5.0, 6.0]
+  //   ]
+  // ]
+
+  // [2, 3, 2]
+  auto input_shape = Shape(scope, op.input(0));
+
+  // The Reshape with -1 flattens the reduction indices.
+  // [1]
+  auto reduction_indices = Reshape(scope, op.input(1), {-1});
+
+  // [2, 1, 2]
+  auto output_shape_kept_dims =
+      ReducedShapeHelper(scope, input_shape, reduction_indices);
+
+  // [1, 3, 1]
+  auto tile_scaling = SafeDivHelper(scope, input_shape, output_shape_kept_dims);
+
+  // [[[105, 192]], [[0, 180]]]
+  auto grad = Reshape(scope, grad_inputs[0], output_shape_kept_dims);
+
+  // [[[105, 192], [105, 192], [105, 192]], [[0, 180], [0, 180], [0, 180]]]
+  auto grad_tiled = Tile(scope, grad, tile_scaling);
+
+  Scope cpu_scope = scope.WithDevice("/cpu:0");
+
+  // [3]
+  auto rank = Rank(cpu_scope, op.input(0));
+
+
+  // Normalize any negative indices in the reduction_axes to positive values.
+  auto reduction_indices_pos = Mod(cpu_scope, Add(cpu_scope, reduction_indices, rank), rank);
+
+  // [1]
+  auto reduced = Cast(cpu_scope, reduction_indices_pos, DataType::DT_INT32);
+
+  // [0, 1, 2]
+  auto idx = Range(cpu_scope, zero, rank, one);
+
+  // [0, 2]
+  auto other = SetDiff1D(cpu_scope, idx, reduced).out;
+
+  // [1, 0, 2]
+  auto perm =
+      Concat(cpu_scope, std::initializer_list<Input>{reduced, other}, 0);
+
+  // 3 => [3]
+  auto reduced_num = Prod(cpu_scope, Gather(scope, input_shape, reduced), 0);
+
+  // 2 * 2 => [2]
+  auto other_num = Prod(cpu_scope, Gather(scope, input_shape, other), 0);
+
+  // [
+  //    [
+  //       [ 3.,  4.],
+  //       [ 3.,  5.]
+  //   ],
+  //   [
+  //       [ 5.,  6.],
+  //       [ 0.,  6.]
+  //   ],
+  //   [
+  //       [ 7.,  8.],
+  //       [ 5.,  6.]
+  //   ]
+  // ]
+  auto permuted = Transpose(scope, op.input(0), perm);
+
+  // [3, 2, 2]
+  auto permuted_shape = Shape(scope, permuted);
+
+  // [
+  //   [ 3.,  4.,  3.,  5.],
+  //   [ 5.,  6.,  0.,  6.],
+  //   [ 7.,  8.,  5.,  6.]
+  // ]
+  auto reshaped = Reshape(
+      scope, permuted,
+      Stack(scope, std::initializer_list<Input>{reduced_num, other_num}));
+
+  // [
+  //   [ 1.,  1.,  1.,  1.],
+  //   [ 3.,  4.,  3.,  5.],
+  //   [ 15.,  24.,  0.,  30.]
+  // ]
+  auto left = Cumprod(scope, reshaped, zero, Cumprod::Exclusive(true));
+
+  // [
+  //   [ 35.,  48.,  0.,  36.],
+  //   [  7.,   8.,   5.,   6.],
+  //   [  1.,   1.,   1.,   1.]
+  // ]
+  auto right =
+      Cumprod(scope, reshaped, zero, Cumprod::Exclusive(true).Reverse(true));
+
+  // left * right =
+  // [
+  //   [ 35.,  48.,  0.,  36.],
+  //   [ 21.,  32.,  15.,  30.],
+  //   [ 15.,  24.,  0.,  30.]
+  // ]
+  // y =
+  // [
+  //   [
+  //     [ 35.,  48.],
+  //     [ 0.,  36.]
+  //   ],
+  //   [
+  //     [ 21.,  32.],
+  //     [ 15.,  30.]
+  //   ],
+  //   [
+  //     [ 15.,  24.],
+  //     [ 0.,  30.]
+  //   ]
+  // ]
+  auto y = Reshape(scope, Mul(scope, left, right), permuted_shape);
+
+  // out = 
+  // [
+  //   [
+  //     [ 35.,  48.],
+  //     [ 21.,  32.],
+  //     [ 15.,  24.]
+  //   ],
+  //   [
+  //     [ 0.,   36.],
+  //     [ 15.,  30.],
+  //     [ 0.,  30.]
+  //   ]
+  // ]
+  auto out =
+      Mul(scope, grad_tiled, Transpose(scope, y, InvertPermutation(scope, perm)));
+
+  grad_outputs->push_back(Reshape(scope, out, input_shape));
+
+  // stop propagation along reduction_indices
+  grad_outputs->push_back(NoGradient());
+  return scope.status();
+}
+REGISTER_GRADIENT_OP("Prod", ProdGrad);
+
 // MatMulGrad helper function used to compute two MatMul operations
 // based on input matrix transposition combinations.
 Status MatMulGradHelper(const Scope& scope, const bool is_batch,
diff --git a/tensorflow/cc/gradients/math_grad_test.cc b/tensorflow/cc/gradients/math_grad_test.cc
index 6313f41da5e5f9cf88be4c8a84408a8df77f0e25..1b4c7c2688083e74433da3dce2849b8c37443684 100644
--- a/tensorflow/cc/gradients/math_grad_test.cc
+++ b/tensorflow/cc/gradients/math_grad_test.cc
@@ -23,10 +23,31 @@ limitations under the License.
 #include "tensorflow/core/lib/random/random.h"
 
 namespace tensorflow {
-using namespace ops;  // NOLINT(build/namespaces)
-
 namespace {
 
+using ops::Abs;
+using ops::Add;
+using ops::AddN;
+using ops::BatchMatMul;
+using ops::Const;
+using ops::Div;
+using ops::Greater;
+using ops::MatMul;
+using ops::Max;
+using ops::Maximum;
+using ops::Mean;
+using ops::Min;
+using ops::Minimum;
+using ops::Mul;
+using ops::Placeholder;
+using ops::Pow;
+using ops::Prod;
+using ops::RealDiv;
+using ops::SquaredDifference;
+using ops::Sub;
+using ops::Sum;
+using ops::Where3;
+
 // TODO(andydavis) Test gradient function against numeric gradients output.
 // TODO(andydavis) As more gradients are added move common test functions
 // to a testutil library.
@@ -83,6 +104,7 @@ class CWiseUnaryGradTest : public ::testing::Test {
 
     Output y;
     switch (op_type) {
+      using namespace ops;  // NOLINT(build/namespaces)
       case ABS:
         y = Abs(scope_, x);
         break;
@@ -843,6 +865,14 @@ TEST_F(NaryGradTest, SquaredDifference) {
   RunTest({x1, x2}, {x1_shape, x2_shape}, {y}, {x1_shape});
 }
 
+TEST_F(NaryGradTest, Pow) {
+  TensorShape shape({3});
+  auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape));
+  // fix exponent to avoid overflow
+  auto y = Pow(scope_, x, Const(scope_, {1.f, 2.f, 3.f}));
+  RunTest({x}, {shape}, {y}, {shape});
+}
+
 TEST_F(NaryGradTest, Maximum) {
   TensorShape shape({3, 2});
   auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape));
@@ -865,5 +895,14 @@ TEST_F(NaryGradTest, Minimum) {
   RunTest(x, x_init_value, y, shape);
 }
 
+TEST_F(NaryGradTest, Prod) {
+  TensorShape x_shape({2, 3, 2});
+  auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape));
+  auto y = Prod(scope_, x, {1});
+  // y's shape is the result of reducing x along axes 1
+  TensorShape y_shape({2, 1, 2});
+  RunTest({x}, {x_shape}, {y}, {y_shape});
+}
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc
index f9063e836509669d81d03b1d2f0d32d1166b6eca..0cfe5f6e3c49f7c4a3cafbf48ff4e54a0ffd0d47 100644
--- a/tensorflow/cc/gradients/nn_grad_test.cc
+++ b/tensorflow/cc/gradients/nn_grad_test.cc
@@ -23,10 +23,22 @@ limitations under the License.
 #include "tensorflow/core/lib/random/random.h"
 
 namespace tensorflow {
-using namespace ops;  // NOLINT(build/namespaces)
-
 namespace {
 
+using ops::BiasAdd;
+using ops::Conv2D;
+using ops::Elu;
+using ops::L2Loss;
+using ops::LogSoftmax;
+using ops::LRN;
+using ops::MaxPool;
+using ops::MaxPoolV2;
+using ops::Placeholder;
+using ops::Relu;
+using ops::Relu6;
+using ops::Selu;
+using ops::Softmax;
+
 class NNGradTest : public ::testing::Test {
  protected:
   NNGradTest() : scope_(Scope::NewRootScope()) {}
diff --git a/tensorflow/cc/ops/op_gen_overrides.pbtxt b/tensorflow/cc/ops/op_gen_overrides.pbtxt
deleted file mode 100644
index 4aac990e748b0a79cbc3b353b4121a582b0883b0..0000000000000000000000000000000000000000
--- a/tensorflow/cc/ops/op_gen_overrides.pbtxt
+++ /dev/null
@@ -1,238 +0,0 @@
-# array_ops
-op { name: "BroadcastArgs" rename_to: "BroadcastDynamicShape" }
-op { name: "BroadcastGradientArgs" hide: true }
-op { name: "ConcatOffset" skip: true }  # Maybe should just be hidden?
-op { name: "Concat" skip: true }
-op { name: "ConcatV2" rename_to: "Concat" }
-op { name: "ExpandDims" input_rename: { from: "dim" to: "axis" } }
-op { name: "ListDiff" rename_to: "SetDiff1D" }
-op { name: "MirrorPadGrad" hide: true }
-op { name: "Reverse" skip: true }
-op { name: "ReverseV2" rename_to: "Reverse" }
-op { name: "Split" input_rename: { from: "split_dim" to: "axis" } }
-op { name: "SplitV" input_rename: { from: "split_dim" to: "axis" } }
-op { name: "Squeeze" attr_rename: { from: "squeeze_dims" to: "axis" } }
-op { name: "Pack" rename_to: "Stack" }
-op { name: "Unpack" rename_to: "Unstack" }
-op { name: "Select" rename_to: "Where3" input_rename: { from: "t" to: "x" } input_rename: { from: "e" to: "y" } }
-op { name: "Where" input_rename: { from: "input" to: "condition" } }
-
-
-# candidate_sampling_ops
-op { name: "ThreadUnsafeUnigramCandidateSampler", skip: true }
-
-# control_flow_ops
-# TODO(joshl): Hide Switch and Merge once we write and migrate users to
-# a Cond() API.
-#op { name: "Switch" hide: true }
-#op { name: "Merge" hide: true }
-op { name: "RefMerge" hide: true }
-op { name: "Exit" hide: true }
-op { name: "RefExit" hide: true }
-op { name: "Enter" hide: true }
-op { name: "RefEnter" hide: true }
-op { name: "RefIdentity" hide: true }
-
-# ctc_ops
-
-# data_flow_ops
-op { name: "FakeQueue" skip: true }
-op { name: "FIFOQueue" skip: true}
-op { name: "FIFOQueueV2" rename_to: "FIFOQueue" }
-op { name: "PaddingFIFOQueue" skip: true }
-op { name: "PaddingFIFOQueueV2" rename_to: "PaddingFIFOQueue" }
-op { name: "PriorityQueue" skip: true }
-op { name: "PriorityQueueV2" rename_to: "PriorityQueue" }
-op { name: "QueueClose" skip: true }
-op { name: "QueueCloseV2" rename_to: "QueueClose" }
-op { name: "QueueDequeue" skip: true }
-op { name: "QueueDequeueV2" rename_to: "QueueDequeue" }
-op { name: "QueueDequeueMany" skip: true }
-op { name: "QueueDequeueManyV2" rename_to: "QueueDequeueMany" }
-op { name: "QueueDequeueUpTo" skip: true }
-op { name: "QueueDequeueUpToV2" rename_to: "QueueDequeueUpTo" }
-op { name: "QueueEnqueue" skip: true }
-op { name: "QueueEnqueueV2" rename_to: "QueueEnqueue" }
-op { name: "QueueEnqueueMany" skip: true }
-op { name: "QueueEnqueueManyV2" rename_to: "QueueEnqueueMany" }
-op { name: "QueueSize" skip: true }
-op { name: "QueueSizeV2" rename_to: "QueueSize" }
-op { name: "RandomShuffleQueue" skip: true }
-op { name: "RandomShuffleQueueV2" rename_to: "RandomShuffleQueue" }
-op { name: "ReaderNumRecordsProduced" skip: true }
-op { name: "ReaderNumRecordsProducedV2" rename_to: "ReaderNumRecordsProduced" }
-op { name: "ReaderNumWorkUnitsCompleted" skip: true }
-op { name: "ReaderNumWorkUnitsCompletedV2" rename_to: "ReaderNumWorkUnitsCompleted" }
-op { name: "ReaderRead" skip: true }
-op { name: "ReaderReadUpTo" skip: true }
-op { name: "ReaderReadUpToV2" rename_to: "ReaderReadUpTo" }
-op { name: "ReaderReadV2" rename_to: "ReaderRead" }
-op { name: "ReaderReset" skip: true }
-op { name: "ReaderResetV2" rename_to: "ReaderReset" }
-op { name: "ReaderRestoreState" skip: true }
-op { name: "ReaderRestoreStateV2" rename_to: "ReaderRestoreState" }
-op { name: "ReaderSerializeState" skip: true }
-op { name: "ReaderSerializeStateV2" rename_to: "ReaderSerializeState" }
-op { name: "FixedLengthRecordReader" skip: true }
-op { name: "FixedLengthRecordReaderV2" rename_to: "FixedLengthRecordReader" }
-op { name: "IdentityReader" skip: true }
-op { name: "IdentityReaderV2" rename_to: "IdentityReader" }
-op { name: "TFRecordReader" skip: true }
-op { name: "TFRecordReaderV2" rename_to: "TFRecordReader" }
-op { name: "TextLineReader" skip: true }
-op { name: "TextLineReaderV2" rename_to: "TextLineReader" }
-
-# Skip hash table ops until we have better support in C++ (ops are currently
-# only used in contrib)
-op { name: "HashTable" skip: true }
-op { name: "InitializeTable" skip: true }
-op { name: "InitializeTableFromTextFile" skip: true }
-op { name: "LookupTableFind" skip: true }
-op { name: "LookupTableImport" skip: true }
-op { name: "LookupTableInsert" skip: true }
-op { name: "LookupTableSize" skip: true }
-op { name: "MutableDenseHashTable" skip: true }
-op { name: "MutableHashTable" skip: true }
-op { name: "MutableHashTableOfTensors" skip: true }
-
-# Stack ops are internal to control flow gradients (not yet implemented in C++)
-op { name: "Stack" skip: true }
-op { name: "StackClose" skip: true }
-op { name: "StackPop" skip: true }
-op { name: "StackPush" skip: true }
-op { name: "StackV2" skip: true }
-op { name: "StackCloseV2" skip: true }
-op { name: "StackPopV2" skip: true }
-op { name: "StackPushV2" skip: true }
-
-op { name: "TensorArrayCloseV2" skip: true }
-op { name: "TensorArrayCloseV3" rename_to: "TensorArrayClose" }
-op { name: "TensorArrayConcatV2" skip: true }
-op { name: "TensorArrayConcatV3" rename_to: "TensorArrayConcat" }
-op { name: "TensorArrayGatherV2" skip: true }
-op { name: "TensorArrayGatherV3" rename_to: "TensorArrayGather" }
-op { name: "TensorArrayGradV2" skip: true }
-op { name: "TensorArrayGradV3" rename_to: "TensorArrayGrad" }
-op { name: "TensorArrayReadV2" skip: true }
-op { name: "TensorArrayReadV3" rename_to: "TensorArrayRead" }
-op { name: "TensorArrayScatterV2" skip: true }
-op { name: "TensorArrayScatterV3" rename_to: "TensorArrayScatter" }
-op { name: "TensorArraySizeV2" skip: true }
-op { name: "TensorArraySizeV3" rename_to: "TensorArraySize" }
-op { name: "TensorArraySplitV2" skip: true }
-op { name: "TensorArraySplitV3" rename_to: "TensorArraySplit" }
-op { name: "TensorArrayV2" skip: true }
-op { name: "TensorArrayV3" rename_to: "TensorArray" }
-op { name: "TensorArrayWriteV2" skip: true }
-op { name: "TensorArrayWriteV3" rename_to: "TensorArrayWrite" }
-
-op { name: "WholeFileReader" skip: true }
-op { name: "WholeFileReaderV2" rename_to: "WholeFileReader" }
-
-# functional_ops
-
-# image_ops
-op { name: "AdjustContrastv2" rename_to: "AdjustContrast" }
-op { name: "ResizeBilinearGrad" hide: true }
-op { name: "ResizeBicubicGrad" hide: true }
-op { name: "ResizeNearestNeighborGrad" hide: true }
-
-# io_ops
-
-# linalg_ops
-op { name: "SelfAdjointEigV2" rename_to: "SelfAdjointEig" }
-
-# logging_ops
-op { name: "AudioSummaryV2" rename_to: "AudioSummary" }
-
-# lookup_ops
-op { name: "LookupTableFind" skip: true }
-op { name: "LookupTableFindV2" rename_to: "LookupTableFind" }
-op { name: "LookupTableInsert" skip: true }
-op { name: "LookupTableInsertV2" rename_to: "LookupTableInsert" }
-op { name: "LookupTableSize" skip: true }
-op { name: "LookupTableSizeV2" rename_to: "LookupTableSize" }
-op { name: "LookupTableExport" skip: true }
-op { name: "LookupTableExportV2" rename_to: "LookupTableExport" }
-op { name: "LookupTableImport" skip: true }
-op { name: "LookupTableImportV2" rename_to: "LookupTableImport" }
-op { name: "HashTable" skip: true }
-op { name: "HashTableV2" rename_to: "HashTable" }
-op { name: "MutableHashTable" skip: true }
-op { name: "MutableHashTableV2" rename_to: "MutableHashTable" }
-op { name: "MutableHashTableOfTensors" skip: true }
-op { name: "MutableHashTableOfTensorsV2" rename_to: "MutableHashTableOfTensors" }
-op { name: "MutableDenseHashTable" skip: true }
-op { name: "MutableDenseHashTableV2" rename_to: "MutableDenseHashTable" }
-op { name: "InitializeTable" skip: true }
-op { name: "InitializeTableV2" rename_to: "InitializeTable" }
-op { name: "InitializeTableFromTextFile" skip: true }
-op { name: "InitializeTableFromTextFileV2" rename_to: "InitializeTableFromTextFile" }
-
-# math_ops
-op { name: "All" alias: "ReduceAll" input_rename: { from: "reduction_indices" to: "axis" } }
-op { name: "Any" alias: "ReduceAny" input_rename: { from: "reduction_indices" to: "axis" } }
-op { name: "Max" alias: "ReduceMax" input_rename: { from: "reduction_indices" to: "axis" } }
-op { name: "Mean" alias: "ReduceMean" input_rename: { from: "reduction_indices" to: "axis" } }
-op { name: "Min" alias: "ReduceMin" input_rename: { from: "reduction_indices" to: "axis" } }
-op { name: "Mul" rename_to: "Multiply" alias: "Mul" }
-op { name: "Neg" rename_to: "Negate" alias: "Neg" }
-op { name: "Prod" alias: "ReduceProd" input_rename: { from: "reduction_indices" to: "axis" } }
-op { name: "Sub" rename_to: "Subtract" alias: "Sub" }
-op { name: "Sum" alias: "ReduceSum" input_rename: { from: "reduction_indices" to: "axis" } }
-op { name: "SigmoidGrad" hide: true }
-op { name: "TanhGrad" hide: true }
-op { name: "InvGrad" hide: true }
-op { name: "ReciprocalGrad" hide: true }
-op { name: "SqrtGrad" hide: true }
-op { name: "RsqrtGrad" hide: true }
-
-# *Grad ops get hidden, only for use by the gradient code.
-op { name: "SigmoidGrad" hide: true }
-op { name: "TanhGrad" hide: true }
-op { name: "InvGrad" hide: true }
-op { name: "ReciprocalGrad" hide: true }
-op { name: "SqrtGrad" hide: true }
-op { name: "RsqrtGrad" hide: true }
-
-# nn_ops
-op { name: "AvgPoolGrad" hide: true }
-op { name: "LRNGrad" hide: true }
-op { name: "MaxPoolGrad" hide: true }
-op { name: "MaxPoolGradWithArgmax" hide: true }
-op { name: "ReluGrad" hide: true }
-op { name: "Relu6Grad" hide: true }
-op { name: "EluGrad" hide: true }
-op { name: "SeluGrad" hide: true }
-op { name: "SoftplusGrad" hide: true }
-op { name: "SoftsignGrad" hide: true }
-op { name: "FractionalAvgPoolGrad" hide: true }
-op { name: "FractionalMaxPoolGrad" hide: true }
-op { name: "TopKV2" rename_to: "TopK" }
-op { name: "BiasAddV1" skip: true }  # Use BiasAdd instead
-
-# parsing_ops
-
-# random_ops
-
-op { name: "RandomStandardNormal" rename_to: "RandomNormal" }
-# script_ops
-# Calling Python functions from a C++ program isn't supported
-op { name: "PyFunc" skip: true }
-op { name: "PyFuncStateless" skip: true}
-
-# sdca_ops
-
-# state_ops
-
-op { name: "Variable" skip: true }
-op { name: "VariableV2" rename_to: "Variable" }
-
-# sparse_ops
-
-# string_ops
-
-# user_ops
-
-# training_ops
-
diff --git a/tensorflow/cc/ops/while_loop.cc b/tensorflow/cc/ops/while_loop.cc
index e0251efb2a424f86bd5a4885ef22d1928e04bd3e..d1c918d464bc9684b0db6dade2fb80cb2bd6691a 100644
--- a/tensorflow/cc/ops/while_loop.cc
+++ b/tensorflow/cc/ops/while_loop.cc
@@ -116,7 +116,7 @@ Status CreateCond(const Scope& scope, const CondGraphBuilderFn& cond,
   return Status::OK();
 }
 
-// Create the bdoy subgraph defined by `body`. `outputs` must be non-null and
+// Create the body subgraph defined by `body`. `outputs` must be non-null and
 // empty.
 Status CreateBody(const Scope& scope, const BodyGraphBuilderFn& body,
                   const std::vector<Output>& inputs,
diff --git a/tensorflow/cc/profiler/BUILD b/tensorflow/cc/profiler/BUILD
new file mode 100644
index 0000000000000000000000000000000000000000..00799526fce572e7bb80199ccb8ce1cc89874031
--- /dev/null
+++ b/tensorflow/cc/profiler/BUILD
@@ -0,0 +1,36 @@
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_test")
+
+tf_cuda_cc_test(
+    name = "profiler_test",
+    srcs = ["profiler_test.cc"],
+    deps = [
+        ":profiler",
+        "//tensorflow/cc:cc_ops",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:tensorflow",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
+
+cc_library(
+    name = "profiler",
+    srcs = ["profiler.cc"],
+    hdrs = ["profiler.h"],
+    deps = [
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core/profiler:protos_all_cc",
+        "//tensorflow/core/profiler:tfprof_options",
+        "//tensorflow/core/profiler/internal:tfprof_stats",
+    ],
+)
diff --git a/tensorflow/cc/profiler/profiler.cc b/tensorflow/cc/profiler/profiler.cc
new file mode 100644
index 0000000000000000000000000000000000000000..3e55bac73e6d32a1fa5ddcc1937744e2cf56657d
--- /dev/null
+++ b/tensorflow/cc/profiler/profiler.cc
@@ -0,0 +1,57 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/cc/profiler/profiler.h"
+
+namespace tensorflow {
+namespace tfprof {
+
+Profiler::Profiler(const GraphDef& graph) {
+  std::unique_ptr<GraphDef> graph_ptr(new GraphDef());
+  *graph_ptr = graph;
+  stats_.reset(new TFStats(std::move(graph_ptr), nullptr, nullptr, nullptr));
+}
+
+void Profiler::AddStep(int64 step, const RunMetadata& run_meta) {
+  std::unique_ptr<RunMetadata> run_meta_ptr(new RunMetadata());
+  *run_meta_ptr = run_meta;
+  stats_->AddRunMeta(step, std::move(run_meta_ptr));
+}
+
+GraphNodeProto Profiler::ProfileGraph(const Options& options) {
+  stats_->BuildView(kCmds[1]);
+  return stats_->ShowGraphNode(kCmds[1], options);
+}
+
+GraphNodeProto Profiler::ProfileNameScope(const Options& options) {
+  stats_->BuildView(kCmds[0]);
+  return stats_->ShowGraphNode(kCmds[0], options);
+}
+
+MultiGraphNodeProto Profiler::ProfileOperations(const Options& options) {
+  stats_->BuildView(kCmds[3]);
+  return stats_->ShowMultiGraphNode(kCmds[3], options);
+}
+
+Status Profiler::SerializeToString(string* content) {
+  if (!content) {
+    return Status(error::Code::INVALID_ARGUMENT,
+                  "Cannot use null string pointer for SerializeToString.");
+  }
+  stats_->SerializeToString(content);
+  return Status::OK();
+}
+
+}  // namespace tfprof
+}  // namespace tensorflow
diff --git a/tensorflow/cc/profiler/profiler.h b/tensorflow/cc/profiler/profiler.h
new file mode 100644
index 0000000000000000000000000000000000000000..e1ce315d3c125ef9f0cb16209e199690211df440
--- /dev/null
+++ b/tensorflow/cc/profiler/profiler.h
@@ -0,0 +1,97 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CC_PROFILER_PROFILER_H_
+#define THIRD_PARTY_TENSORFLOW_CC_PROFILER_PROFILER_H_
+
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/profiler/internal/tfprof_stats.h"
+#include "tensorflow/core/profiler/tfprof_options.h"
+#include "tensorflow/core/profiler/tfprof_output.pb.h"
+
+namespace tensorflow {
+namespace tfprof {
+
+/// @addtogroup core
+/// @{
+
+/// A `Profiler` object lets the caller profile the execution of a graph.
+///
+/// Example:
+///     // First build a graph and run tracing.
+///     Scope root = Scope::NewRootScope();
+///     auto a = Placeholder(root, DT_INT32);
+///     auto c = Add(root, a, {41});
+///
+///     ClientSession session(root);
+///     std::vector<Tensor> outputs;
+///     RunOptions run_options;
+///     run_options.set_trace_level(RunOptions::FULL_TRACE);
+///     RunMetadata run_meta;
+///     Status s = session.Run(run_options, { {a, {1}} }, {c}, &outputs,
+///                            &run_meta);
+///     if (!s.ok()) { ... }
+///
+///     // Then create profiler to do profiling.
+///     GraphDef graph;
+///     root.ToGraphDef(&graph);
+///     Profiler profiler(graph);
+///     profiler.AddStep(0, run_meta);
+///     Options opts = ...  // TODO(xpan): Support option building API.
+///     MultiGraphNodeProto r = profiler.ProfileOperations(opts);
+///
+class Profiler {
+ public:
+  /// `graph` is the model's GraphDef.
+  Profiler(const GraphDef& graph);
+
+  /// Adds tracing information `run_meta` to profiler. A `run_meta` is
+  /// generated by a TensorFlow session run call. `step` is the key
+  /// to the `run_meta`. When calling ProfileXXX methods, caller can specify
+  /// `step` in `options` to seletively profile the corresponding `run_meta`.
+  /// Multiple different `run_meta` can be keyed by the same `step` in order
+  /// to group them together.
+  void AddStep(int64 step, const RunMetadata& run_meta);
+
+  /// Profiles the model by organizing nodes in graph structure.
+  /// Each node is an op and the nodes are contected by the op inputs/outputs.
+  GraphNodeProto ProfileGraph(const Options& options);
+
+  /// Profiles the model by organizing nodes in name scope structure.
+  /// Each node is an op, and nodes are organized by the ops' name
+  /// scope, similar to a filesystem tree.
+  /// E.g. /foo is the root of operation /foo/matmul_1 and foo/conv_2.
+  GraphNodeProto ProfileNameScope(const Options& options);
+
+  /// Profiles the model by organizing nodes by operation types.
+  /// Each node is an operation type (e.g. Conv2D or MatMul), containing all
+  /// ops belonging to that type in the model.
+  MultiGraphNodeProto ProfileOperations(const Options& options);
+
+  /// Serialize the profile content (ProfileProto) into a binary string,
+  /// User can write the string to file for offline analysis by
+  /// tfprof command-line tools or graphical user interface.
+  Status SerializeToString(string* content);
+
+ private:
+  std::unique_ptr<TFStats> stats_;
+};
+/// @}
+
+}  // namespace tfprof
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CC_PROFILER_PROFILER_H_
diff --git a/tensorflow/cc/profiler/profiler_test.cc b/tensorflow/cc/profiler/profiler_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..280cd74827fc8ae80737eaf61286535fec959aa8
--- /dev/null
+++ b/tensorflow/cc/profiler/profiler_test.cc
@@ -0,0 +1,177 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/platform/test.h"
+
+#include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/cc/profiler/profiler.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/graph/default_device.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/public/session.h"
+
+namespace tensorflow {
+namespace tfprof {
+
+class ProfilerTest : public ::testing::Test {
+ protected:
+  ProfilerTest() {}
+};
+
+GraphDef CreateGraphDef() {
+  Scope root = Scope::NewRootScope();
+
+  auto a = ops::Const<float>(root, {{3, 2}, {-1, 0}});
+
+  auto x = ops::Const(root.WithOpName("x"), {{1.f}, {1.f}});
+
+  auto y = ops::MatMul(root.WithOpName("y"), a, x);
+
+  auto y2 = ops::Square(root, y);
+
+  auto y2_sum = ops::Sum(root, y2, 0);
+
+  auto y_norm = ops::Sqrt(root, y2_sum);
+
+  auto y_div = ops::Div(root.WithOpName("y_normalized"), y, y_norm);
+
+  GraphDef def;
+  TF_CHECK_OK(root.ToGraphDef(&def));
+
+  return def;
+}
+
+Options Default() {
+  Options opts(1000,       /* max_depth */
+               0,          /* min_bytes */
+               0,          /* min_peak_bytes */
+               0,          /* min_residual_bytes */
+               0,          /* min_output_bytes */
+               0,          /* min_micros */
+               0,          /* min_accelerator_micros */
+               0,          /* min_cpu_micros */
+               0,          /* min_params */
+               0,          /* min_float_ops */
+               0,          /* min_occurrence */
+               0,          /* step */
+               "name",     /* order_by */
+               {".*"},     /* account_type_regexes */
+               {".*"},     /* start_name_regexes */
+               {},         /* trim_name_regexes */
+               {".*"}, {}, /* hide_name_regexes */
+               false,      /* account_displayed_op_only */
+               {"micros"}, /* select */
+               {"none"},   /* output_type */
+               {});
+  return opts;
+}
+
+template <typename T>
+const T* ExtractNode(const T& pb, const string& name) {
+  if (pb.name() == name) {
+    return &pb;
+  }
+  for (const T& c : pb.children()) {
+    const T* ret = ExtractNode(c, name);
+    if (ret) return ret;
+  }
+  return nullptr;
+}
+
+TEST_F(ProfilerTest, Basics) {
+  SessionOptions options;
+  options.config.set_allow_soft_placement(true);
+  std::unique_ptr<Session> session(NewSession(options));
+  GraphDef def = CreateGraphDef();
+  if (options.target.empty()) {
+    graph::SetDefaultDevice("/gpu:0", &def);
+  }
+
+  TF_CHECK_OK(session->Create(def));
+
+  Tensor x(DT_FLOAT, TensorShape({2, 1}));
+  auto x_flat = x.flat<float>();
+  x_flat.setRandom();
+  Eigen::Tensor<float, 0, Eigen::RowMajor> inv_norm =
+      x_flat.square().sum().sqrt().inverse();
+  x_flat = x_flat * inv_norm();
+
+  std::vector<Tensor> outputs;
+  RunOptions run_options;
+  run_options.set_trace_level(RunOptions::FULL_TRACE);
+  RunMetadata run_metadata;
+  outputs.clear();
+
+  Profiler profiler(def);
+  for (int i = 0; i < 2; ++i) {
+    TF_CHECK_OK(session->Run(run_options, {{"x", x}}, {"y:0", "y_normalized:0"},
+                             {}, &outputs, &run_metadata));
+    profiler.AddStep(i, run_metadata);
+    CHECK_EQ(size_t{2}, outputs.size());
+  }
+
+  std::vector<DeviceAttributes> resp;
+  TF_CHECK_OK(session->ListDevices(&resp));
+  bool has_gpu = false;
+  for (const auto& dev : resp) {
+    if (dev.device_type() == "GPU") {
+      has_gpu = true;
+    }
+  }
+
+  GraphNodeProto ret = profiler.ProfileNameScope(Default());
+  const GraphNodeProto* matmul = ExtractNode(ret, "y");
+  EXPECT_TRUE(matmul);
+  EXPECT_GT(matmul->exec_micros(), 0);
+  if (has_gpu) {
+    EXPECT_GT(matmul->accelerator_exec_micros(), 0);
+  } else {
+    EXPECT_EQ(matmul->accelerator_exec_micros(), 0);
+  }
+  const GraphNodeProto* square = ExtractNode(ret, "Square");
+  EXPECT_TRUE(square);
+  EXPECT_GT(square->exec_micros(), 0);
+  if (has_gpu) {
+    EXPECT_GT(square->accelerator_exec_micros(), 0);
+  } else {
+    EXPECT_EQ(square->accelerator_exec_micros(), 0);
+  }
+
+  Options opts2 = Default();
+  opts2.output_type = "timeline";
+  string timeline_file = io::JoinPath(testing::TmpDir(), "timeline");
+  opts2.output_options["outfile"] = timeline_file;
+  GraphNodeProto ret2 = profiler.ProfileGraph(opts2);
+  string s;
+  TF_CHECK_OK(ReadFileToString(Env::Default(), timeline_file + "_0", &s));
+  EXPECT_TRUE(s.find("Square") != s.npos);
+
+  MultiGraphNodeProto ret3 = profiler.ProfileOperations(Default());
+  const MultiGraphNodeProto* matmul2 = ExtractNode(ret3, "MatMul");
+  EXPECT_TRUE(matmul2);
+  EXPECT_GT(matmul2->exec_micros(), 0);
+  if (has_gpu) {
+    EXPECT_GT(matmul2->accelerator_exec_micros(), 0);
+  } else {
+    EXPECT_EQ(matmul2->accelerator_exec_micros(), 0);
+  }
+
+  TF_CHECK_OK(session->Close());
+}
+
+}  // namespace tfprof
+}  // namespace tensorflow
diff --git a/tensorflow/cc/saved_model/loader.cc b/tensorflow/cc/saved_model/loader.cc
index f98abc8a817eca7bc129bb03a2ad31b97d957065..acef098c7d07f45d171679bff7c41e13ef0424f1 100644
--- a/tensorflow/cc/saved_model/loader.cc
+++ b/tensorflow/cc/saved_model/loader.cc
@@ -62,6 +62,15 @@ Status ReadSavedModel(const string& export_dir, SavedModel* saved_model_proto) {
                     export_dir);
 }
 
+string GetTagsAsString(const std::unordered_set<string>& tags) {
+  string tags_as_string = "{ ";
+  for (const string& tag : tags) {
+    tags_as_string = strings::StrCat(tags_as_string, tag, " ");
+  }
+  tags_as_string = strings::StrCat(tags_as_string, "}");
+  return tags_as_string;
+}
+
 Status FindMetaGraphDefToLoad(const SavedModel& saved_model_proto,
                               const std::unordered_set<string>& tags,
                               MetaGraphDef* meta_graph_def_to_load) {
@@ -77,14 +86,9 @@ Status FindMetaGraphDefToLoad(const SavedModel& saved_model_proto,
       return Status::OK();
     }
   }
-  string tags_as_string = "{ ";
-  for (const string& tag : tags) {
-    tags_as_string = strings::StrCat(tags_as_string, tag, " ");
-  }
-  tags_as_string = strings::StrCat(tags_as_string, "}");
   return Status(error::Code::NOT_FOUND,
                 "Could not find meta graph def matching supplied tags: " +
-                    tags_as_string +
+                    GetTagsAsString(tags) +
                     ". To inspect available tag-sets in the SavedModel, please "
                     "use the SavedModel CLI: `saved_model_cli`");
 }
@@ -233,7 +237,8 @@ Status LoadSavedModelInternal(const SessionOptions& session_options,
     return Status(error::Code::NOT_FOUND,
                   "SavedModel not found in export directory: " + export_dir);
   }
-  LOG(INFO) << "Loading SavedModel from: " << export_dir;
+  LOG(INFO) << "Loading SavedModel with tags: " << GetTagsAsString(tags)
+            << "; from: " << export_dir;
 
   SavedModel saved_model_proto;
   TF_RETURN_IF_ERROR(ReadSavedModel(export_dir, &saved_model_proto));
@@ -281,7 +286,8 @@ Status LoadSavedModel(const SessionOptions& session_options,
     return end_microseconds - start_microseconds;
   }();
   auto log_and_count = [&](const string& status_str) {
-    LOG(INFO) << "Loading SavedModel: " << status_str << ". Took "
+    LOG(INFO) << "SavedModel load for tags " << GetTagsAsString(tags)
+              << "; Status: " << status_str << ". Took "
               << load_latency_microsecs << " microseconds.";
     load_attempt_count->GetCell(export_dir, status_str)->IncrementBy(1);
   };
diff --git a/tensorflow/cc/saved_model/python/BUILD b/tensorflow/cc/saved_model/python/BUILD
new file mode 100644
index 0000000000000000000000000000000000000000..f5fbc75edcba9d5ae9ef7432de224df766bcab9e
--- /dev/null
+++ b/tensorflow/cc/saved_model/python/BUILD
@@ -0,0 +1,30 @@
+# Description:
+# CLIF wrappers for TensorFlow SavedModels.
+
+licenses(["notice"])  # Apache 2.0
+
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
+
+load("//tensorflow/core:platform/default/build_config.bzl", "tf_py_clif_cc")
+
+tf_py_clif_cc(
+    name = "loader",
+    srcs = ["loader.clif"],
+    deps = [
+        "//tensorflow/cc/saved_model:loader",
+    ],
+)
diff --git a/tensorflow/cc/saved_model/python/loader.clif b/tensorflow/cc/saved_model/python/loader.clif
new file mode 100644
index 0000000000000000000000000000000000000000..b102757d2eeb46ee713d8ed0d0c3d66b58740ee0
--- /dev/null
+++ b/tensorflow/cc/saved_model/python/loader.clif
@@ -0,0 +1,4 @@
+from "third_party/tensorflow/cc/saved_model/loader.h":
+  namespace `tensorflow`:
+    class SavedModelBundle:
+      def __init__(self)
diff --git a/tensorflow/cc/tools/BUILD b/tensorflow/cc/tools/BUILD
new file mode 100644
index 0000000000000000000000000000000000000000..0a7c37383f96ca65bf5ae05cf0827c01dc4d799b
--- /dev/null
+++ b/tensorflow/cc/tools/BUILD
@@ -0,0 +1,58 @@
+# Description:
+# TensorFlow cc tools.
+
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+load(
+    "//tensorflow:tensorflow.bzl",
+    "tf_cc_test",
+)
+
+cc_library(
+    name = "freeze_saved_model",
+    srcs = ["freeze_saved_model.cc"],
+    hdrs = ["freeze_saved_model.h"],
+    deps = [
+        "//tensorflow/cc/saved_model:loader",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:tensorflow",
+    ],
+)
+
+tf_cc_test(
+    name = "freeze_saved_model_test",
+    srcs = ["freeze_saved_model_test.cc"],
+    deps = [
+        ":freeze_saved_model",
+        "//tensorflow/cc:cc_ops",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ],
+)
+
+# -----------------------------------------------------------------------------
+# Google-internal targets.
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
diff --git a/tensorflow/cc/tools/freeze_saved_model.cc b/tensorflow/cc/tools/freeze_saved_model.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ddf372cdef21e1b3892c9a03714478d5a5785517
--- /dev/null
+++ b/tensorflow/cc/tools/freeze_saved_model.cc
@@ -0,0 +1,194 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/cc/tools/freeze_saved_model.h"
+
+#include <queue>
+
+#include "tensorflow/core/framework/attr_value.pb.h"
+#include "tensorflow/core/framework/function.pb.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/versions.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/protobuf/meta_graph.pb.h"
+
+namespace tensorflow {
+
+namespace {
+
+// Gets tensor names from tensor_info and inserts them into the set of tensor
+// names.
+void GetTensorNamesFromTensorInfo(const TensorInfo& tensor_info,
+                                  std::unordered_set<string>* tensor_names) {
+  if (tensor_info.has_coo_sparse()) {
+    // If the tensor is sparse we have to add all three tensors of the sparse
+    // representations.
+    const TensorInfo_CooSparse& coo_sparse = tensor_info.coo_sparse();
+    tensor_names->insert(coo_sparse.values_tensor_name());
+    tensor_names->insert(coo_sparse.indices_tensor_name());
+    tensor_names->insert(coo_sparse.dense_shape_tensor_name());
+  } else {
+    tensor_names->insert(tensor_info.name());
+  }
+}
+
+// Gets the union of all inputs and outputs of all SignatureDefs in the bundle
+void GetSignatureDefsInputsAndOutputs(
+    const SavedModelBundle& saved_model_bundle,
+    std::unordered_set<string>* inputs, std::unordered_set<string>* outputs) {
+  for (auto& sigdef_elem : saved_model_bundle.meta_graph_def.signature_def()) {
+    const SignatureDef& signature_def = sigdef_elem.second;
+    for (auto& input_elem : signature_def.inputs()) {
+      GetTensorNamesFromTensorInfo(input_elem.second, inputs);
+    }
+    for (auto& output_elem : signature_def.outputs()) {
+      GetTensorNamesFromTensorInfo(output_elem.second, outputs);
+    }
+  }
+}
+
+// Gets a map from string node name to NodeDef.
+void GetNodeNameToNodeDefMap(
+    GraphDef* graph_def,
+    std::unordered_map<string, NodeDef*>* name_to_node_map) {
+  for (size_t i = 0; i < graph_def->node_size(); i++) {
+    NodeDef* node = graph_def->mutable_node(i);
+    (*name_to_node_map)[node->name()] = node;
+  }
+}
+
+// Gets the set of node names needed by `outputs` and the corresponding set of
+// variable nodes to convert.
+void GetReachableNodesAndVariables(
+    GraphDef* graph_def, const std::unordered_set<string>& outputs,
+    std::unordered_set<string>* reachable_node_names,
+    std::unordered_set<string>* variable_node_names) {
+  // TODO(suharshs): Add support for ResourceVariables.
+  static const std::unordered_set<string>* kVariableTypes =
+      new std::unordered_set<string>({"Variable", "VariableV2"});
+  // name_to_node_map is needed to get the inputs from the NodeDef corresponding
+  // the a string node name. These inputs are used when doing our backwards
+  // traversal.
+  std::unordered_map<string, NodeDef*> name_to_node_map;
+  GetNodeNameToNodeDefMap(graph_def, &name_to_node_map);
+  std::queue<string> nodes_to_visit;
+  for (const string& tensor_name : outputs) {
+    // We need to strip off the tensor part to get the node name.
+    std::vector<string> tensor_name_parts = str_util::Split(tensor_name, ':');
+    nodes_to_visit.push(tensor_name_parts[0]);
+  }
+  // We do a traversal backwards from the outputs specified in the MetaGraphDef.
+  while (!nodes_to_visit.empty()) {
+    const string node_name = nodes_to_visit.front();
+    nodes_to_visit.pop();
+    if (reachable_node_names->find(node_name) != reachable_node_names->end()) {
+      continue;
+    }
+    reachable_node_names->insert(node_name);
+    NodeDef* node = name_to_node_map[node_name];
+    if (kVariableTypes->find(node->op()) != kVariableTypes->end()) {
+      variable_node_names->insert(node->name());
+    }
+    for (const string& input : node->input()) {
+      nodes_to_visit.push(input);
+    }
+  }
+}
+
+// Gets a map from variable name to variable value.
+Status GetVariableNameToTensorMap(
+    Session* session, std::unordered_set<string> variable_names_set,
+    std::unordered_map<string, Tensor>* variable_name_to_value_map) {
+  if (variable_names_set.empty()) {
+    return Status::OK();
+  }
+  std::vector<string> variable_names;
+  std::vector<string> tensor_names;
+  for (const string& node_name : variable_names_set) {
+    variable_names.push_back(node_name);
+    // We need to run tensors, so append ":0".
+    tensor_names.push_back(node_name + ":0");
+  }
+  std::vector<Tensor> outputs;
+  TF_RETURN_IF_ERROR(
+      session->Run(/* inputs */ {}, tensor_names, /* targets */ {}, &outputs));
+  for (size_t i = 0; i < variable_names.size(); i++) {
+    (*variable_name_to_value_map)[variable_names[i]] = outputs[i];
+  }
+  return Status::OK();
+}
+
+// Converts a Variable NodeDef into a Constant NodeDef.
+void ConvertVariableToConstant(const NodeDef& variable_node,
+                               const Tensor& variable_value,
+                               NodeDef* const_node) {
+  const_node->set_name(variable_node.name());
+  const_node->set_op("Const");
+  (*const_node->mutable_attr())["dtype"] = variable_node.attr().at("dtype");
+  variable_value.AsProtoTensorContent(
+      (*const_node->mutable_attr())["value"].mutable_tensor());
+}
+
+// Freezes the subgraph of all nodes needed by `outputs`.
+Status FreezeGraphDef(const SavedModelBundle& saved_model_bundle,
+                      const std::unordered_set<string>& outputs,
+                      GraphDef* frozen_graph_def) {
+  GraphDef graph_def = saved_model_bundle.meta_graph_def.graph_def();
+  // Copy versions and library as-is from original graph.
+  *frozen_graph_def->mutable_versions() = graph_def.versions();
+  *frozen_graph_def->mutable_library() = graph_def.library();
+  // If the graph is empty there is nothing left to do.
+  if (graph_def.node_size() == 0) {
+    return Status::OK();
+  }
+  std::unordered_set<string> reachable_node_names;
+  std::unordered_set<string> variable_node_names;
+  GetReachableNodesAndVariables(&graph_def, outputs, &reachable_node_names,
+                                &variable_node_names);
+  std::unordered_map<string, Tensor> variable_to_value_map;
+  TF_RETURN_IF_ERROR(
+      GetVariableNameToTensorMap(saved_model_bundle.session.get(),
+                                 variable_node_names, &variable_to_value_map));
+  // We copy the nodes in the same order they were in the original graph_def.
+  for (const NodeDef& node : graph_def.node()) {
+    if (reachable_node_names.find(node.name()) == reachable_node_names.end()) {
+      continue;
+    }
+    if (variable_node_names.find(node.name()) != variable_node_names.end()) {
+      ConvertVariableToConstant(node, variable_to_value_map[node.name()],
+                                frozen_graph_def->add_node());
+    } else {
+      // If the node isn't a variable, just copy the node as-is.
+      *frozen_graph_def->add_node() = node;
+    }
+  }
+  return Status::OK();
+}
+
+}  // namespace
+
+Status FreezeSavedModel(const SavedModelBundle& saved_model_bundle,
+                        GraphDef* frozen_graph_def,
+                        std::unordered_set<string>* inputs,
+                        std::unordered_set<string>* outputs) {
+  GetSignatureDefsInputsAndOutputs(saved_model_bundle, inputs, outputs);
+  TF_RETURN_IF_ERROR(
+      FreezeGraphDef(saved_model_bundle, *outputs, frozen_graph_def));
+  return Status::OK();
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/cc/tools/freeze_saved_model.h b/tensorflow/cc/tools/freeze_saved_model.h
new file mode 100644
index 0000000000000000000000000000000000000000..bd5e0516c8999dc235747ccec75a57542b0f9bf7
--- /dev/null
+++ b/tensorflow/cc/tools/freeze_saved_model.h
@@ -0,0 +1,43 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef THIRD_PARTY_TENSORFLOW_CC_TOOLS_FREEZE_SAVED_MODEL_H_
+#define THIRD_PARTY_TENSORFLOW_CC_TOOLS_FREEZE_SAVED_MODEL_H_
+
+#include <unordered_set>
+
+#include "tensorflow/cc/saved_model/loader.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/lib/core/status.h"
+
+namespace tensorflow {
+
+// Returns a frozen GraphDef, input tensors, and output tensors from the loaded
+// SavedModelBundle.
+// `inputs` and `outputs` consist of the union of all inputs and outputs in the
+// SignatureDefs in the SavedModelBundle.
+// FreezeSavedModel sets `frozen_graph_def` to a GraphDef of all nodes needed by
+// `outputs`. All variables in the supplied SavedModelBundle are converted to
+// constants, set to the value of the variables, by running the restored Session
+// in the SavedModelBundle.
+// WARNING: Only the variable checkpoints will be reflected in the frozen
+// graph_def. All saved_model assets will be ignored.
+Status FreezeSavedModel(const SavedModelBundle& saved_model_bundle,
+                        GraphDef* frozen_graph_def,
+                        std::unordered_set<string>* inputs,
+                        std::unordered_set<string>* outputs);
+
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CC_TOOLS_FREEZE_SAVED_MODEL_H_
diff --git a/tensorflow/cc/tools/freeze_saved_model_test.cc b/tensorflow/cc/tools/freeze_saved_model_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..57244a4f0adeb9775e35445f77205f3d221ee05b
--- /dev/null
+++ b/tensorflow/cc/tools/freeze_saved_model_test.cc
@@ -0,0 +1,307 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/cc/tools/freeze_saved_model.h"
+
+#include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/core/framework/function_testlib.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/framework/versions.pb.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/public/session.h"
+#include "tensorflow/core/public/session_options.h"
+
+namespace tensorflow {
+namespace {
+
+class FreezeTest : public ::testing::Test {
+ protected:
+  void GraphDefEqual(const GraphDef& actual, const GraphDef& expected) {
+    EXPECT_EQ(actual.ShortDebugString(), expected.ShortDebugString());
+  }
+
+  // Builds a SignatureDef with the provided `inputs` and `outputs`.
+  SignatureDef BuildSignatureDef(const std::unordered_set<string>& inputs,
+                                 const std::unordered_set<string>& outputs) {
+    SignatureDef signature_def;
+    for (const string& input : inputs) {
+      (*signature_def.mutable_inputs())[input].set_name(input);
+    }
+    for (const string& output : outputs) {
+      (*signature_def.mutable_outputs())[output].set_name(output);
+    }
+    return signature_def;
+  }
+
+  // Adds `signature_def` to `saved_model_bundle` under `key`.
+  void AddSignatureDefToSavedModelBundle(const SignatureDef& signature_def,
+                                         const string& key,
+                                         SavedModelBundle* saved_model_bundle) {
+    MetaGraphDef* meta_graph_def = &saved_model_bundle->meta_graph_def;
+    (*meta_graph_def->mutable_signature_def())[key] = signature_def;
+  }
+
+  // Adds an initialized session to `saved_model_bundle` using `graph_def` and
+  // initializing with `init_node`.
+  Status InitializeSavedModelBundleSession(
+      const GraphDef& graph_def, const string& init_node,
+      SavedModelBundle* saved_model_bundle) {
+    SessionOptions session_options;
+    saved_model_bundle->session.reset(NewSession(session_options));
+    TF_RETURN_IF_ERROR(saved_model_bundle->session->Create(graph_def));
+    if (!init_node.empty()) {
+      std::vector<Tensor> outputs;
+      return saved_model_bundle->session->Run(
+          /* inputs */ {}, /* output_tensors */ {}, {init_node}, &outputs);
+    }
+    return Status::OK();
+  }
+
+  // Adds `graph_def` to `saved_model_bundle` and intializes a session with
+  // `init_node`.
+  Status AddGraphDefToSavedModelBundle(const GraphDef& graph_def,
+                                       const string& init_node,
+                                       SavedModelBundle* saved_model_bundle) {
+    MetaGraphDef* meta_graph_def = &saved_model_bundle->meta_graph_def;
+    *meta_graph_def->mutable_graph_def() = graph_def;
+    return InitializeSavedModelBundleSession(graph_def, init_node,
+                                             saved_model_bundle);
+  }
+
+  // Adds `graph_def` and `outputs` as the GraphDef and SignatureDef in
+  // `saved_model_bundle` and initializes a session with `init_node`.
+  Status AddGraphDefWithOutputsToSavedModelBundle(
+      const GraphDef& graph_def, const std::unordered_set<string>& outputs,
+      const string& init_node, SavedModelBundle* saved_model_bundle) {
+    SignatureDef signature_def =
+        BuildSignatureDef(std::unordered_set<string>(), outputs);
+    AddSignatureDefToSavedModelBundle(signature_def, "signature_def",
+                                      saved_model_bundle);
+    return AddGraphDefToSavedModelBundle(graph_def, init_node,
+                                         saved_model_bundle);
+  }
+
+  // Runs and compares the outputs of `tensor_name` on both the
+  // `unfrozen_session` and the `frozen_graph_def.
+  void RunAndCompareFrozenAndUnfrozenGraphs(Session* unfrozen_session,
+                                            const GraphDef& frozen_graph_def,
+                                            const string& tensor_name) {
+    std::vector<Tensor> unfrozen_outputs;
+    TF_ASSERT_OK(unfrozen_session->Run(/* inputs */ {}, {tensor_name},
+                                       /* targets */ {}, &unfrozen_outputs));
+
+    SessionOptions session_options;
+    std::unique_ptr<Session> frozen_session(NewSession(session_options));
+    TF_ASSERT_OK(frozen_session->Create(frozen_graph_def));
+    std::vector<Tensor> frozen_outputs;
+    TF_ASSERT_OK(frozen_session->Run(/* inputs */ {}, {tensor_name},
+                                     /* targets */ {}, &frozen_outputs));
+
+    test::ExpectTensorEqual<float>(unfrozen_outputs[0], frozen_outputs[0]);
+  }
+};
+
+TEST_F(FreezeTest, InputsAndOutputsSingleSignatureDef) {
+  // Test that inputs and outputs get correctly populated for a single
+  // SignatureDef.
+  SavedModelBundle saved_model_bundle;
+  std::unordered_set<string> expected_inputs = {"input0:0", "input1:0"};
+  std::unordered_set<string> expected_outputs = {"output0:0", "output1:0"};
+  SignatureDef signature_def =
+      BuildSignatureDef(expected_inputs, expected_outputs);
+  AddSignatureDefToSavedModelBundle(signature_def, "signature_def",
+                                    &saved_model_bundle);
+  GraphDef frozen_graph_def;
+  std::unordered_set<string> inputs;
+  std::unordered_set<string> outputs;
+  TF_ASSERT_OK(FreezeSavedModel(saved_model_bundle, &frozen_graph_def, &inputs,
+                                &outputs));
+  EXPECT_EQ(expected_inputs, inputs);
+  EXPECT_EQ(expected_outputs, outputs);
+}
+
+TEST_F(FreezeTest, InputsAndOutputsMultipleSignatureDefs) {
+  // Test that inputs and outputs get correctly merged and populated when
+  // multiple SignatureDefs are provided.
+  SavedModelBundle saved_model_bundle;
+  SignatureDef signature_def_0 = BuildSignatureDef({"input0:0"}, {"output0:0"});
+  SignatureDef signature_def_1 = BuildSignatureDef({"input1:0"}, {"output1:0"});
+  AddSignatureDefToSavedModelBundle(signature_def_0, "signature_def_0",
+                                    &saved_model_bundle);
+  AddSignatureDefToSavedModelBundle(signature_def_1, "signature_def_1",
+                                    &saved_model_bundle);
+  GraphDef frozen_graph_def;
+  std::unordered_set<string> inputs;
+  std::unordered_set<string> outputs;
+  TF_ASSERT_OK(FreezeSavedModel(saved_model_bundle, &frozen_graph_def, &inputs,
+                                &outputs));
+  std::unordered_set<string> expected_inputs = {"input0:0", "input1:0"};
+  std::unordered_set<string> expected_outputs = {"output0:0", "output1:0"};
+  EXPECT_EQ(expected_inputs, inputs);
+  EXPECT_EQ(expected_outputs, outputs);
+}
+
+TEST_F(FreezeTest, GraphDefVersionsAndLibrary) {
+  // Test that GraphDef versions and library are copied correctly into the
+  // frozen graph.
+  SavedModelBundle saved_model_bundle;
+  GraphDef graph_def;
+  graph_def.mutable_versions()->set_producer(1234);
+  graph_def.mutable_versions()->set_min_consumer(1234);
+  *graph_def.mutable_library()->add_function() = test::function::NonZero();
+  TF_ASSERT_OK(
+      AddGraphDefToSavedModelBundle(graph_def, "", &saved_model_bundle));
+
+  GraphDef frozen_graph_def;
+  std::unordered_set<string> inputs;
+  std::unordered_set<string> outputs;
+  TF_ASSERT_OK(FreezeSavedModel(saved_model_bundle, &frozen_graph_def, &inputs,
+                                &outputs));
+
+  GraphDefEqual(frozen_graph_def, graph_def);
+}
+
+TEST_F(FreezeTest, GraphDefWithNoVariables) {
+  // Test freezing a graph with no variables.
+  SavedModelBundle saved_model_bundle;
+  GraphDef graph_def;
+  Scope scope = Scope::NewRootScope();
+  Output a = ops::Const(scope.WithOpName("a"), 10.0f, {});
+  Output b = ops::Const(scope.WithOpName("b"), 10.0f, {});
+  Output c = ops::Mul(scope.WithOpName("c"), a, b);
+  TF_ASSERT_OK(scope.ToGraphDef(&graph_def));
+  TF_ASSERT_OK(AddGraphDefWithOutputsToSavedModelBundle(graph_def, {"c:0"}, "",
+                                                        &saved_model_bundle));
+
+  GraphDef frozen_graph_def;
+  std::unordered_set<string> inputs;
+  std::unordered_set<string> outputs;
+  TF_ASSERT_OK(FreezeSavedModel(saved_model_bundle, &frozen_graph_def, &inputs,
+                                &outputs));
+
+  GraphDefEqual(frozen_graph_def, graph_def);
+}
+
+TEST_F(FreezeTest, GraphDefWithVariablesNotNeededByOutputs) {
+  // Test freezing a graph with variables that are not needed by the outputs in
+  // the SignatureDef. The resulting graph shouldn't be frozen, but
+  // non-dependent nodes should be pruned.
+  SavedModelBundle saved_model_bundle;
+  GraphDef graph_def;
+  Scope scope = Scope::NewRootScope();
+  Output a = ops::Const(scope.WithOpName("a"), 10.0f, {});
+  Output b = ops::Const(scope.WithOpName("b"), 10.0f, {});
+  Output c = ops::Mul(scope.WithOpName("c"), a, b);
+  Output var = ops::Variable(scope.WithOpName("var"), {}, DataType::DT_FLOAT);
+  Output assign = ops::Assign(scope.WithOpName("assign"), var, a);
+  TF_ASSERT_OK(scope.ToGraphDef(&graph_def));
+  // "c" isnt dependent on the variable, so nothing should be frozen.
+  TF_ASSERT_OK(AddGraphDefWithOutputsToSavedModelBundle(
+      graph_def, {"c:0"}, assign.name(), &saved_model_bundle));
+
+  GraphDef frozen_graph_def;
+  std::unordered_set<string> inputs;
+  std::unordered_set<string> outputs;
+  TF_ASSERT_OK(FreezeSavedModel(saved_model_bundle, &frozen_graph_def, &inputs,
+                                &outputs));
+
+  GraphDef expected_graph_def;
+  Scope expected_scope = Scope::NewRootScope();
+  Output expected_a = ops::Const(expected_scope.WithOpName("a"), 10.0f, {});
+  Output expected_b = ops::Const(expected_scope.WithOpName("b"), 10.0f, {});
+  Output expected_c =
+      ops::Mul(expected_scope.WithOpName("c"), expected_a, expected_b);
+  TF_ASSERT_OK(expected_scope.ToGraphDef(&expected_graph_def));
+
+  GraphDefEqual(frozen_graph_def, expected_graph_def);
+
+  RunAndCompareFrozenAndUnfrozenGraphs(saved_model_bundle.session.get(),
+                                       frozen_graph_def, "c:0");
+}
+
+TEST_F(FreezeTest, GraphDefWithVariablesNeededByOutputs) {
+  // Test freezing a graph with variables that are needed by outputs in the
+  // SignatureDef. The variables should be frozen.
+  SavedModelBundle saved_model_bundle;
+  GraphDef graph_def;
+  Scope scope = Scope::NewRootScope();
+  Output a = ops::Const(scope.WithOpName("a"), 10.0f, {});
+  Output var = ops::Variable(scope.WithOpName("var"), {}, DataType::DT_FLOAT);
+  Output c = ops::Mul(scope.WithOpName("c"), a, var);
+  Output assign = ops::Assign(scope.WithOpName("assign"), var, a);
+  TF_ASSERT_OK(scope.ToGraphDef(&graph_def));
+  // "c" isnt dependent on the variable, so nothing should be frozen.
+  TF_ASSERT_OK(AddGraphDefWithOutputsToSavedModelBundle(
+      graph_def, {"c:0"}, assign.name(), &saved_model_bundle));
+
+  GraphDef frozen_graph_def;
+  std::unordered_set<string> inputs;
+  std::unordered_set<string> outputs;
+  TF_ASSERT_OK(FreezeSavedModel(saved_model_bundle, &frozen_graph_def, &inputs,
+                                &outputs));
+
+  // There should be 3 nodes in the resulting graph_def, and none should be
+  // variables.
+  EXPECT_EQ(frozen_graph_def.node_size(), 3);
+  for (const NodeDef& node : frozen_graph_def.node()) {
+    EXPECT_NE(node.op(), "Variable") << node.name();
+    EXPECT_NE(node.op(), "VariableV2") << node.name();
+  }
+
+  RunAndCompareFrozenAndUnfrozenGraphs(saved_model_bundle.session.get(),
+                                       frozen_graph_def, "c:0");
+}
+
+TEST_F(FreezeTest, GraphDefWithVariablesNeededAndNotNeededByOutputs) {
+  // Test freezing a graph with some variables that are needed and not needed by
+  // the outputs in the SignatureDef. The resulting graph should only freeze
+  // dependent variables.
+  SavedModelBundle saved_model_bundle;
+  GraphDef graph_def;
+  Scope scope = Scope::NewRootScope();
+  Output a = ops::Const(scope.WithOpName("a"), 10.0f, {});
+  Output var = ops::Variable(scope.WithOpName("var"), {}, DataType::DT_FLOAT);
+  Output c = ops::Mul(scope.WithOpName("c"), a, var);
+  Output assign = ops::Assign(scope.WithOpName("assign"), var, a);
+  Output var_1 =
+      ops::Variable(scope.WithOpName("var_1"), {}, DataType::DT_FLOAT);
+  Output assign_1 = ops::Assign(scope.WithOpName("assign_1"), var, a);
+  TF_ASSERT_OK(scope.ToGraphDef(&graph_def));
+  // "c" isnt dependent on the variable, so nothing should be frozen.
+  TF_ASSERT_OK(AddGraphDefWithOutputsToSavedModelBundle(
+      graph_def, {"c:0"}, assign.name(), &saved_model_bundle));
+
+  GraphDef frozen_graph_def;
+  std::unordered_set<string> inputs;
+  std::unordered_set<string> outputs;
+  TF_ASSERT_OK(FreezeSavedModel(saved_model_bundle, &frozen_graph_def, &inputs,
+                                &outputs));
+
+  // There should be 3 nodes in the resulting graph_def, and none should be
+  // variables.
+  EXPECT_EQ(frozen_graph_def.node_size(), 3);
+  for (const NodeDef& node : frozen_graph_def.node()) {
+    EXPECT_NE(node.op(), "Variable") << node.name();
+    EXPECT_NE(node.op(), "VariableV2") << node.name();
+  }
+
+  RunAndCompareFrozenAndUnfrozenGraphs(saved_model_bundle.session.get(),
+                                       frozen_graph_def, "c:0");
+}
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/aot/BUILD b/tensorflow/compiler/aot/BUILD
index a9a6ea84319a18a8fbce648391bf5918ff6d9a08..0540260efd83e18258ec6e93c514d14e328791b1 100644
--- a/tensorflow/compiler/aot/BUILD
+++ b/tensorflow/compiler/aot/BUILD
@@ -24,7 +24,6 @@ tf_cc_test(
     srcs = ["runtime_test.cc"],
     deps = [
         ":runtime",
-        "//tensorflow/compiler/tf2xla:xla_local_runtime_context",
         "//tensorflow/core:framework",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
@@ -53,6 +52,7 @@ cc_library(
         "flags.h",
     ],
     deps = [
+        ":embedded_protocol_buffers",
         ":runtime",  # needed by codegen to print aligned_buffer_bytes
         "//tensorflow/compiler/tf2xla",
         "//tensorflow/compiler/tf2xla:common",
@@ -69,9 +69,7 @@ cc_library(
         "//tensorflow/compiler/xla/client:compile_only_client",
         "//tensorflow/compiler/xla/service:compiler",
         "//tensorflow/compiler/xla/service/cpu:cpu_compiler",
-        "//tensorflow/core:core_cpu",
         "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:framework",
         "//tensorflow/core:framework_internal",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
@@ -81,13 +79,18 @@ cc_library(
 tf_cc_test(
     name = "codegen_test",
     srcs = ["codegen_test.cc"],
-    data = ["codegen_test_h.golden"],
+    data = [
+        "codegen_test_h.golden",
+        "codegen_test_o.golden",
+    ],
     deps = [
         ":tfcompile_lib",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/core:lib",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
+        "@llvm//:support",  # fixdeps: keep
+        "@llvm//:x86_code_gen",  # fixdeps: keep
     ],
 )
 
@@ -111,6 +114,7 @@ cc_library(
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
         "//tensorflow/core:framework_internal",
+        "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
     ],
@@ -190,6 +194,23 @@ cc_library(
     visibility = ["//visibility:public"],
 )
 
+cc_library(
+    name = "embedded_protocol_buffers",
+    srcs = ["embedded_protocol_buffers.cc"],
+    hdrs = ["embedded_protocol_buffers.h"],
+    deps = [
+        "//tensorflow/compiler/tf2xla:common",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla:util",
+        "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
+        "//tensorflow/core:lib",
+        "@llvm//:core",
+        "@llvm//:execution_engine",
+        "@llvm//:support",
+        "@llvm//:target",
+    ],
+)
+
 tf_cc_test(
     name = "benchmark_test",
     srcs = ["benchmark_test.cc"],
diff --git a/tensorflow/compiler/aot/codegen.cc b/tensorflow/compiler/aot/codegen.cc
index ae22f7edc423247b34895411d19d7a3c21f86d4f..2cae85e8965216eaaee4d3032015d0016258a5c1 100644
--- a/tensorflow/compiler/aot/codegen.cc
+++ b/tensorflow/compiler/aot/codegen.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "tensorflow/compiler/aot/embedded_protocol_buffers.h"
 #include "tensorflow/compiler/aot/runtime.h"
 #include "tensorflow/compiler/tf2xla/str_util.h"
 #include "tensorflow/compiler/tf2xla/tf2xla_util.h"
@@ -101,21 +102,8 @@ Status ComputeArgSizes(const CompileResult& compile_result,
                        std::vector<int64>* arg_sizes) {
   const xla::ProgramShape& ps = compile_result.program_shape;
   for (int i = 0; i < ps.parameters_size(); ++i) {
-    if (i == ps.parameters_size() - 1 && compile_result.has_context_arg) {
-      // If the compiled function needs a XlaLocalRuntimeContext* arg, it's
-      // always last, and must be represented as an opaque type.
-      const xla::PrimitiveType type = ps.parameters(i).element_type();
-      if (type != xla::OPAQUE) {
-        return errors::InvalidArgument(
-            "expected final context arg to be opaque, but got type: ",
-            xla::PrimitiveType_Name(type), ", from program shape: ",
-            xla::ShapeUtil::HumanString(ps));
-      }
-      arg_sizes->push_back(-1);
-    } else {
-      arg_sizes->push_back(xla::ShapeUtil::ByteSizeOf(
-          ps.parameters(i), compile_result.pointer_size));
-    }
+    arg_sizes->push_back(xla::ShapeUtil::ByteSizeOf(
+        ps.parameters(i), compile_result.pointer_size));
   }
   return Status::OK();
 }
@@ -165,11 +153,6 @@ string RewriteWithName(const string& name, string code,
 Status GenArgMethods(const tf2xla::Config& config, const xla::ProgramShape& ps,
                      const CompileResult& compile_result, string* methods) {
   size_t num_args = ps.parameters_size();
-  if (compile_result.has_context_arg) {
-    // If the compiled function needs a XlaLocalRuntimeContext* arg, it's
-    // always last, and is set in the class constructor.
-    num_args--;
-  }
   if (config.feed_size() != num_args) {
     return errors::InvalidArgument("mismatch between feed_size(",
                                    config.feed_size(), ") and num_args(",
@@ -281,49 +264,6 @@ string GenNameToIndexCode(const T& entries, bool generate) {
   return code;
 }
 
-// Converts the given `str` into a comma-separated list of per-character values.
-string StringToCharList(const string& str) {
-  string list;
-  for (const char c : str) {
-    if (!list.empty()) {
-      list += ",";
-    }
-    list += strings::StrCat(static_cast<int>(c));
-  }
-  return list;
-}
-
-string GenProgramShapeCode(xla::ProgramShape program_shape, bool generate) {
-  // No need for any static magic if we're not supposed to generate the data.
-  if (!generate) {
-    return "{\n    return nullptr;\n  }";
-  }
-  // The parameter names are currently meaningless, and redundant with the rest
-  // of our metadata, so clear them out to avoid confusion and save space.
-  program_shape.clear_parameter_names();
-  const string proto_str = program_shape.SerializeAsString();
-  // Embed the program shape as a serialized protobuf in the header file.
-  //
-  // TODO(toddw): This strategy will likely fail for larger protobufs, depending
-  // on the C++ compiler that is used. Figure out another solution if necessary.
-  string code = R"({
-    static const xla::ProgramShape* kShape = []() {
-      static const char kProto[] = {{{PROTO_LIST}}};
-      static constexpr int kProtoSize = {{PROTO_SIZE}};
-      xla::ProgramShape* shape = new xla::ProgramShape;
-      shape->ParseFromArray(kProto, kProtoSize);
-      return shape;
-    }();
-    return kShape;
-  })";
-  str_util::ReplaceAllPairs(
-      &code, {
-                 {"{{PROTO_LIST}}", StringToCharList(proto_str)},
-                 {"{{PROTO_SIZE}}", strings::StrCat(proto_str.size())},
-             });
-  return code;
-}
-
 Status ValidateFeedFetchCppNames(const tf2xla::Config& config) {
   for (const tf2xla::Feed& feed : config.feed()) {
     if (!feed.name().empty()) {
@@ -340,8 +280,9 @@ Status ValidateFeedFetchCppNames(const tf2xla::Config& config) {
 
 }  // namespace
 
-Status GenerateHeader(const HeaderOpts& opts, const tf2xla::Config& config,
-                      const CompileResult& compile_result, string* header) {
+Status GenerateHeader(const CodegenOpts& opts, const tf2xla::Config& config,
+                      const CompileResult& compile_result,
+                      const MetadataResult& metadata_result, string* header) {
   TF_RETURN_IF_ERROR(ValidateConfig(config));
   TF_RETURN_IF_ERROR(ValidateFeedFetchCppNames(config));
   const int64 result_index = compile_result.aot->result_buffer_index();
@@ -391,8 +332,6 @@ Status GenerateHeader(const HeaderOpts& opts, const tf2xla::Config& config,
           ?
           R"(#include "tensorflow/compiler/xla/xla_data.pb.h")"
           : "";
-  const string program_shape_code =
-      GenProgramShapeCode(ps, opts.gen_program_shape);
 
   // Use a poor-man's text templating mechanism; first populate the full header
   // with placeholder tokens, and then rewrite the tokens with real values.
@@ -418,7 +357,9 @@ namespace xla { class ExecutableRunOptions; }
 // (Implementation detail) Entry point to the function in the object file.
 extern "C" void {{ENTRY}}(
     void* result, const xla::ExecutableRunOptions* run_options,
-    const void** args, void** temps);
+    const void** args, void** temps, tensorflow::int64* profile_counters);
+
+{{DECLS_FROM_OBJ_FILE}}
 
 {{NS_START}}
 // {{CLASS}} represents a computation previously specified in a
@@ -474,7 +415,6 @@ class {{CLASS}} : public tensorflow::XlaCompiledCpuFunction {
       data->temp_sizes = TempSizes();
       data->num_temps = kNumTemps;
       data->result_index = kResultIndex;
-      data->requires_runtime_context = {{HAS_CONTEXT_ARG}};
       data->arg_names = StaticArgNames();
       data->result_names = StaticResultNames();
       data->program_shape = StaticProgramShape();
@@ -483,7 +423,7 @@ class {{CLASS}} : public tensorflow::XlaCompiledCpuFunction {
     return *kStaticData;
   }
 
-  {{CLASS}}(AllocMode alloc_mode = AllocMode::ARGS_RESULTS_AND_TEMPS)
+  {{CLASS}}(AllocMode alloc_mode = AllocMode::ARGS_RESULTS_PROFILES_AND_TEMPS)
       : XlaCompiledCpuFunction(StaticData(), alloc_mode) {}
 
   {{CLASS}}(const {{CLASS}}&) = delete;
@@ -496,8 +436,8 @@ class {{CLASS}} : public tensorflow::XlaCompiledCpuFunction {
   // void set_argN_data(void* data)
   //   Sets the buffer of type T for positional argument N. May be called in
   //   any AllocMode. Must be called before Run to have an affect. Must be
-  //   called in AllocMode::RESULTS_AND_TEMPS_ONLY for each positional argument,
-  //   to set the argument buffers.
+  //   called in AllocMode::RESULTS_PROFILES_AND_TEMPS_ONLY for each positional
+  //   argument, to set the argument buffers.
   //
   // T* argN_data()
   //   Returns the buffer of type T for positional argument N.
@@ -543,7 +483,10 @@ class {{CLASS}} : public tensorflow::XlaCompiledCpuFunction {
   static const char** StaticResultNames() {{RESULT_NAMES_CODE}}
 
   // Shape of the args and results.
-  static const xla::ProgramShape* StaticProgramShape() {{PROGRAM_SHAPE_CODE}}
+  static const xla::ProgramShape* StaticProgramShape() {
+    static const xla::ProgramShape* kShape = {{PROGRAM_SHAPE_SHIM_EXPRESSION}};
+    return kShape;
+  }
 };
 {{NS_END}}
 
@@ -560,26 +503,68 @@ class {{CLASS}} : public tensorflow::XlaCompiledCpuFunction {
       {"{{ARG_SIZES}}", str_util::Join(arg_sizes, ", ")},
       {"{{CLASS}}", opts.class_name},
       {"{{ENTRY}}", compile_result.entry_point},
-      {"{{HAS_CONTEXT_ARG}}",
-       compile_result.has_context_arg ? "true" : "false"},
       {"{{INCLUDE_XLA_DATA_PROTO}}", include_xla_data_proto},
       {"{{METHODS_ARG}}\n", methods_arg},
       {"{{METHODS_RESULT}}\n", methods_result},
       {"{{NS_END}}\n", ns_end},
       {"{{NS_START}}\n", ns_start},
       {"{{PROGRAM_SHAPE}}", xla::ShapeUtil::HumanString(ps)},
-      {"{{PROGRAM_SHAPE_CODE}}", program_shape_code},
       {"{{RESULT_INDEX}}", strings::StrCat(result_index)},
       {"{{RESULT_NAMES_CODE}}", result_names_code},
       {"{{TEMP_BYTES_ALIGNED}}", strings::StrCat(temp_bytes_aligned)},
       {"{{TEMP_BYTES_TOTAL}}", strings::StrCat(temp_bytes_total)},
       {"{{TEMP_NUM}}", strings::StrCat(temp_sizes.size())},
       {"{{TEMP_SIZES}}", str_util::Join(temp_sizes, ", ")},
-  };
+      {"{{DECLS_FROM_OBJ_FILE}}",
+       str_util::Join(metadata_result.header_variable_decls, "\n")},
+      {"{{PROGRAM_SHAPE_SHIM_EXPRESSION}}",
+       metadata_result.program_shape_access_shim}};
   str_util::ReplaceAllPairs(header, rewrites);
   return Status::OK();
 }
 
+static string CreateUniqueIdentifierForProgramShape(const CodegenOpts& opts) {
+  string result = "__tfcompile";
+  for (const string& n : opts.namespaces) {
+    strings::StrAppend(&result, "_", n);
+  }
+
+  strings::StrAppend(&result, "_", opts.class_name, "_ProgramShape");
+  return result;
+}
+
+Status GenerateMetadata(const CodegenOpts& opts,
+                        const CompileResult& compile_result,
+                        MetadataResult* metadata_result) {
+  std::unique_ptr<xla::ProgramShape> program_shape;
+
+  if (opts.gen_program_shape) {
+    program_shape =
+        tensorflow::MakeUnique<xla::ProgramShape>(compile_result.program_shape);
+    // The parameter names are currently meaningless, and redundant with the
+    // rest of our metadata, so clear them out to avoid confusion and save
+    // space.
+    program_shape->clear_parameter_names();
+  }
+
+  // When asked to serialize a null protobuf, CreateEmbeddedProtocolBuffer gives
+  // a shim that evaluates to nullptr, which is what we want.
+
+  TF_ASSIGN_OR_RETURN(
+      EmbeddedProtocolBuffer embedded_program_shape,
+      CreateEmbeddedProtocolBuffer(opts.target_triple,
+                                   CreateUniqueIdentifierForProgramShape(opts),
+                                   "xla::ProgramShape", program_shape.get()));
+
+  metadata_result->program_shape_access_shim =
+      std::move(embedded_program_shape.cpp_shim_expression);
+  metadata_result->header_variable_decls.emplace_back(
+      std::move(embedded_program_shape.cpp_variable_decl));
+  metadata_result->object_file_data =
+      std::move(embedded_program_shape.object_file_data);
+  return Status::OK();
+}
+
 Status ParseCppClass(const string& cpp_class, string* class_name,
                      std::vector<string>* namespaces) {
   class_name->clear();
diff --git a/tensorflow/compiler/aot/codegen.h b/tensorflow/compiler/aot/codegen.h
index 76dd0cc3cf9470a1beb2a4725724f640aecfec7f..3430b1f96cf4d3c035b76c77ccf124c5d164751e 100644
--- a/tensorflow/compiler/aot/codegen.h
+++ b/tensorflow/compiler/aot/codegen.h
@@ -26,11 +26,15 @@ limitations under the License.
 namespace tensorflow {
 namespace tfcompile {
 
-// HeaderOpts specifies options for header-file generation.
-struct HeaderOpts {
+// CodegenOpts specifies code generation options for the generated header file
+// and the generated metadata object file.
+struct CodegenOpts {
   // The name of the generated C++ class, wrapping the generated function.
   string class_name;
 
+  // Target triple for the architecture we're targeting.
+  string target_triple;
+
   // Namespaces specifies a list of C++ namespaces to add to the generated
   // header.  If empty, all symbols will be in the global namespace.
   std::vector<string> namespaces;
@@ -42,11 +46,36 @@ struct HeaderOpts {
   bool gen_program_shape = false;
 };
 
+// Describes a generated metadata object file.
+struct MetadataResult {
+  // These are top level "extern C" declarations that are expected to be visible
+  // wherever program_shape_access_shim is emitted.
+  std::vector<string> header_variable_decls;
+
+  // program_shape_access_shim is a C++ expression that constructs the
+  // xla::ProgramShape instance for the CompileResult passed to
+  // GenerateMetadata.
+  string program_shape_access_shim;
+
+  // The contents of the object (".o") file.
+  string object_file_data;
+};
+
+// Generates a metadata object file according to `opts` and `compile_result`.
+// The generated object file is returned via `metadata_result`.
+Status GenerateMetadata(const CodegenOpts& opts,
+                        const CompileResult& compile_result,
+                        MetadataResult* metadata_result);
+
 // GenerateHeader uses the meta-information from compile_result to generate a
 // C++ header giving access to the function in the generated object file.  The
 // header includes API usage documentation.
-Status GenerateHeader(const HeaderOpts& opts, const tf2xla::Config& config,
-                      const CompileResult& compile_result, string* header);
+//
+// metadata_result is an instance of MetadataResult obtained by a previous
+// invocation to GenerateMetadata.
+Status GenerateHeader(const CodegenOpts& opts, const tf2xla::Config& config,
+                      const CompileResult& compile_result,
+                      const MetadataResult& metadata_result, string* header);
 
 // ParseCppClass parses `cpp_class` into its `class_name` and `namespaces`
 // components.  The syntax is [[<optional_namespace>::],...]<class_name>.  This
diff --git a/tensorflow/compiler/aot/codegen_test.cc b/tensorflow/compiler/aot/codegen_test.cc
index 0f6114666fcc89c631434527d2ae8c92c039ffea..972b7d51ecb3798e61757ac55e973075a23b433a 100644
--- a/tensorflow/compiler/aot/codegen_test.cc
+++ b/tensorflow/compiler/aot/codegen_test.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "llvm/Support/TargetSelect.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
@@ -123,9 +124,39 @@ TEST_F(ParseCppClassTest, ParseFail) {
   ExpectFail("good::0bad");
 }
 
-TEST(GenerateHeader, Golden) {
-  HeaderOpts opts;
+static void CompareWithGoldenFile(
+    const string& tensorflow_relative_golden_file_name,
+    const string& expected_contents) {
+  // To update the golden file, flip update_golden to true and run the
+  // following:
+  // bazel test --test_strategy=local \
+  //   third_party/tensorflow/compiler/aot:codegen_test
+  const bool update_golden = false;
+  const string golden_file_name = io::JoinPath(
+      testing::TensorFlowSrcRoot(), tensorflow_relative_golden_file_name);
+
+  if (update_golden) {
+    TF_EXPECT_OK(
+        WriteStringToFile(Env::Default(), golden_file_name, expected_contents));
+  }
+
+  string golden_file_contents;
+  TF_ASSERT_OK(ReadFileToString(Env::Default(), golden_file_name,
+                                &golden_file_contents));
+  EXPECT_EQ(golden_file_contents, expected_contents);
+}
+
+TEST(CodegenTest, Golden) {
+  // Normally CpuCompiler::CpuCompiler does this, but in this test we've
+  // bypassed the Cpu compiler so we have to do this manually.
+  llvm::InitializeNativeTarget();
+  llvm::InitializeNativeTargetAsmPrinter();
+  LLVMInitializeX86Target();
+  LLVMInitializeX86TargetMC();
+
+  CodegenOpts opts;
   opts.class_name = "MyClass";
+  opts.target_triple = "x86_64-pc-linux";
   opts.namespaces = {"foo", "bar"};
   opts.gen_name_to_index = true;
   opts.gen_program_shape = true;
@@ -145,32 +176,27 @@ TEST(GenerateHeader, Golden) {
       {
           xla::ShapeUtil::MakeShape(xla::F32, {1, 2}),
           xla::ShapeUtil::MakeShape(xla::S64, {3, 4}),
-          xla::ShapeUtil::MakeOpaqueShape(),
       },
       xla::ShapeUtil::MakeTupleShape(
           {xla::ShapeUtil::MakeShape(xla::U32, {5, 6})}));
-  compile_result.has_context_arg = true;
   compile_result.entry_point = "entry_point";
   compile_result.pointer_size = 8;
+
+  MetadataResult metadata_result;
+  TF_ASSERT_OK(GenerateMetadata(opts, compile_result, &metadata_result));
+
+  // The other fields in metadata_result are tested as part of the generated
+  // header test.
+
+  CompareWithGoldenFile("compiler/aot/codegen_test_o.golden",
+                        metadata_result.object_file_data);
+
   string header;
-  TF_EXPECT_OK(GenerateHeader(opts, config, compile_result, &header));
+  TF_ASSERT_OK(
+      GenerateHeader(opts, config, compile_result, metadata_result, &header));
 
-  // Compare against the golden file.
-  const string golden_name = io::JoinPath(testing::TensorFlowSrcRoot(),
-                                          "compiler/aot/codegen_test_h.golden");
-  // To update the golden file, flip update_golden to true and run the
-  // following:
-  // bazel test --test_strategy=local \
-  //   third_party/tensorflow/compiler/aot:codegen_test
-  const bool update_golden = false;
-  if (update_golden) {
-    TF_EXPECT_OK(WriteStringToFile(Env::Default(), golden_name, header));
-  }
-  string golden_data;
-  TF_EXPECT_OK(ReadFileToString(Env::Default(), golden_name, &golden_data));
-  EXPECT_EQ(header, golden_data);
+  CompareWithGoldenFile("compiler/aot/codegen_test_h.golden", header);
 }
-
 }  // namespace
 }  // namespace tfcompile
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/aot/codegen_test_h.golden b/tensorflow/compiler/aot/codegen_test_h.golden
index 65f342ce27ef09092f252f791973f245a8cdd6f3..ac3b5873318873b5fdf41bd556a0b2abddc2b30b 100644
--- a/tensorflow/compiler/aot/codegen_test_h.golden
+++ b/tensorflow/compiler/aot/codegen_test_h.golden
@@ -19,7 +19,9 @@ namespace xla { class ExecutableRunOptions; }
 // (Implementation detail) Entry point to the function in the object file.
 extern "C" void entry_point(
     void* result, const xla::ExecutableRunOptions* run_options,
-    const void** args, void** temps);
+    const void** args, void** temps, tensorflow::int64* profile_counters);
+
+extern "C" char __tfcompile_foo_bar_MyClass_ProgramShape_protobuf_array_contents[];
 
 namespace foo {
 namespace bar {
@@ -48,7 +50,7 @@ namespace bar {
 //   is guaranteed that no thread may call a non-const method.
 //
 // The logical function signature is:
-//   ((unknown): f32[1,2], (unknown): s64[3,4], (unknown): opaque[]) -> (u32[5,6])
+//   ((unknown): f32[1,2], (unknown): s64[3,4]) -> (u32[5,6])
 //
 // Memory stats:
 //   arg bytes total:    104
@@ -58,11 +60,11 @@ namespace bar {
 class MyClass : public tensorflow::XlaCompiledCpuFunction {
  public:
   // Number of input arguments for the compiled computation.
-  static constexpr size_t kNumArgs = 3;
+  static constexpr size_t kNumArgs = 2;
 
   // Byte size of each argument buffer. There are kNumArgs entries.
   static const intptr_t* ArgSizes() {
-    static constexpr intptr_t kArgSizes[kNumArgs] = {8, 96, -1};
+    static constexpr intptr_t kArgSizes[kNumArgs] = {8, 96};
     return kArgSizes;
   }
 
@@ -77,7 +79,6 @@ class MyClass : public tensorflow::XlaCompiledCpuFunction {
       data->temp_sizes = TempSizes();
       data->num_temps = kNumTemps;
       data->result_index = kResultIndex;
-      data->requires_runtime_context = true;
       data->arg_names = StaticArgNames();
       data->result_names = StaticResultNames();
       data->program_shape = StaticProgramShape();
@@ -86,7 +87,7 @@ class MyClass : public tensorflow::XlaCompiledCpuFunction {
     return *kStaticData;
   }
 
-  MyClass(AllocMode alloc_mode = AllocMode::ARGS_RESULTS_AND_TEMPS)
+  MyClass(AllocMode alloc_mode = AllocMode::ARGS_RESULTS_PROFILES_AND_TEMPS)
       : XlaCompiledCpuFunction(StaticData(), alloc_mode) {}
 
   MyClass(const MyClass&) = delete;
@@ -99,8 +100,8 @@ class MyClass : public tensorflow::XlaCompiledCpuFunction {
   // void set_argN_data(void* data)
   //   Sets the buffer of type T for positional argument N. May be called in
   //   any AllocMode. Must be called before Run to have an affect. Must be
-  //   called in AllocMode::RESULTS_AND_TEMPS_ONLY for each positional argument,
-  //   to set the argument buffers.
+  //   called in AllocMode::RESULTS_PROFILES_AND_TEMPS_ONLY for each positional
+  //   argument, to set the argument buffers.
   //
   // T* argN_data()
   //   Returns the buffer of type T for positional argument N.
@@ -236,12 +237,10 @@ class MyClass : public tensorflow::XlaCompiledCpuFunction {
   // Shape of the args and results.
   static const xla::ProgramShape* StaticProgramShape() {
     static const xla::ProgramShape* kShape = []() {
-      static const char kProto[] = {10,12,16,11,26,2,1,2,42,4,10,2,1,0,10,12,16,5,26,2,3,4,42,4,10,2,1,0,10,2,16,14,18,16,16,13,34,12,16,8,26,2,5,6,42,4,10,2,1,0};
-      static constexpr int kProtoSize = 50;
-      xla::ProgramShape* shape = new xla::ProgramShape;
-      shape->ParseFromArray(kProto, kProtoSize);
-      return shape;
-    }();
+    xla::ProgramShape* proto = new xla::ProgramShape;
+    proto->ParseFromArray(&__tfcompile_foo_bar_MyClass_ProgramShape_protobuf_array_contents[0], 52);
+    return proto;
+  }();
     return kShape;
   }
 };
diff --git a/tensorflow/compiler/aot/codegen_test_o.golden b/tensorflow/compiler/aot/codegen_test_o.golden
new file mode 100644
index 0000000000000000000000000000000000000000..eb001c5d45bdfefc76629d7303d89f5480432235
Binary files /dev/null and b/tensorflow/compiler/aot/codegen_test_o.golden differ
diff --git a/tensorflow/compiler/aot/compile.cc b/tensorflow/compiler/aot/compile.cc
index 2b8cc6024cb85e4f6269313927ff66d1d9a1cf79..c87f2b75dfa18ad5c3eda4bd6fcbcb3083ef73fd 100644
--- a/tensorflow/compiler/aot/compile.cc
+++ b/tensorflow/compiler/aot/compile.cc
@@ -94,9 +94,8 @@ Status CompileGraph(const GraphDef& graph_def, const tf2xla::Config& config,
       xla::ClientLibrary::GetOrCreateCompileOnlyClient(cpu_platform)
           .ValueOrDie();
   xla::Computation computation;
-  TF_RETURN_IF_ERROR(ConvertGraphDefToXla(graph_def, config, client,
-                                          &computation,
-                                          &compile_result->has_context_arg));
+  TF_RETURN_IF_ERROR(
+      ConvertGraphDefToXla(graph_def, config, client, &computation));
   if (!flags.out_session_module.empty()) {
     TF_ASSIGN_OR_RETURN(std::unique_ptr<xla::SessionModule> module,
                         computation.Snapshot());
diff --git a/tensorflow/compiler/aot/compile.h b/tensorflow/compiler/aot/compile.h
index 965c2960816b3acc8d2209e6824d88647de0ce14..e03c5b1aa77c1262ed903aae3072ef65f34d80a2 100644
--- a/tensorflow/compiler/aot/compile.h
+++ b/tensorflow/compiler/aot/compile.h
@@ -34,7 +34,6 @@ struct CompileResult {
   // Contains object file and meta-info.
   std::unique_ptr<xla::cpu::CpuAotCompilationResult> aot;
   xla::ProgramShape program_shape;  // Static shape of args and results.
-  bool has_context_arg = false;     // Is last arg XlaLocalRuntimeContext?
   string entry_point;               // Name of generated function.
   int pointer_size = 0;             // Size of a pointer in bytes.
 };
diff --git a/tensorflow/compiler/aot/embedded_protocol_buffers.cc b/tensorflow/compiler/aot/embedded_protocol_buffers.cc
new file mode 100644
index 0000000000000000000000000000000000000000..6489929a576d6469c4ff1358ca5ee9d27fb578bb
--- /dev/null
+++ b/tensorflow/compiler/aot/embedded_protocol_buffers.cc
@@ -0,0 +1,158 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/aot/embedded_protocol_buffers.h"
+
+#include <memory>
+#include <string>
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/ObjectMemoryBuffer.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "tensorflow/compiler/tf2xla/str_util.h"
+#include "tensorflow/compiler/xla/ptr_util.h"
+#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
+#include "tensorflow/compiler/xla/util.h"
+
+namespace tensorflow {
+namespace tfcompile {
+
+using xla::llvm_ir::AsStringRef;
+
+static std::unique_ptr<llvm::Module> CreateModuleWithEmbeddedProtocolBuffer(
+    llvm::LLVMContext* llvm_context, llvm::TargetMachine* target_machine,
+    const ::tensorflow::protobuf::MessageLite& proto,
+    StringPiece unique_identifier, string* protobuf_array_symbol_name,
+    int64* protobuf_array_size) {
+  string protobuf_array_contents = proto.SerializeAsString();
+  *protobuf_array_symbol_name =
+      strings::StrCat(unique_identifier, "_protobuf_array_contents");
+  *protobuf_array_size = protobuf_array_contents.size();
+
+  std::unique_ptr<llvm::Module> module =
+      MakeUnique<llvm::Module>("embedded_data_module", *llvm_context);
+
+  llvm::Constant* protobuf_array_initializer =
+      llvm::ConstantDataArray::getString(*llvm_context,
+                                         AsStringRef(protobuf_array_contents),
+                                         /*AddNull=*/false);
+  new llvm::GlobalVariable(
+      *module, protobuf_array_initializer->getType(),
+      /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
+      protobuf_array_initializer, AsStringRef(*protobuf_array_symbol_name));
+
+  return module;
+}
+
+static string CreateCPPShimExpression(StringPiece qualified_cpp_protobuf_name,
+                                      StringPiece protobuf_array_symbol_name,
+                                      int64 protobuf_array_size) {
+  string code =
+      "[]() {\n"
+      "    {{PROTOBUF_NAME}}* proto = new {{PROTOBUF_NAME}};\n"
+      "    proto->ParseFromArray(&{{ARRAY_SYMBOL}}[0], {{ARRAY_SIZE}});\n"
+      "    return proto;\n"
+      "  }()";
+
+  str_util::ReplaceAllPairs(
+      &code,
+      {
+          {"{{ARRAY_SYMBOL}}", strings::StrCat(protobuf_array_symbol_name)},
+          {"{{ARRAY_SIZE}}", strings::StrCat(protobuf_array_size)},
+          {"{{PROTOBUF_NAME}}", strings::StrCat(qualified_cpp_protobuf_name)},
+      });
+  return code;
+}
+
+static StatusOr<string> CodegenModule(llvm::TargetMachine* target_machine,
+                                      std::unique_ptr<llvm::Module> module) {
+  llvm::SmallVector<char, 0> stream_buffer;
+  llvm::raw_svector_ostream ostream(stream_buffer);
+  llvm::legacy::PassManager codegen_passes;
+
+  if (target_machine->addPassesToEmitFile(
+          codegen_passes, ostream, llvm::TargetMachine::CGFT_ObjectFile)) {
+    return xla::InternalError(
+        "Could not create pass pipeline to generate object file");
+  }
+
+  codegen_passes.run(*module);
+
+  return string(stream_buffer.begin(), stream_buffer.end());
+}
+
+static StatusOr<std::unique_ptr<llvm::TargetMachine>>
+GetTargetMachineFromTriple(StringPiece target_triple) {
+  std::string error;
+  std::string normalized_triple =
+      llvm::Triple::normalize(AsStringRef(target_triple));
+  const llvm::Target* target =
+      llvm::TargetRegistry::lookupTarget(normalized_triple, error);
+  if (target == nullptr) {
+    return xla::InternalError("TargetRegistry::lookupTarget failed: %s",
+                              error.c_str());
+  }
+
+  return WrapUnique(target->createTargetMachine(
+      normalized_triple, /*CPU=*/"",
+      /*Features=*/"", llvm::TargetOptions(), llvm::None));
+}
+
+StatusOr<EmbeddedProtocolBuffer> CreateEmbeddedProtocolBuffer(
+    StringPiece target_triple, StringPiece symbol_prefix,
+    StringPiece qualified_cpp_protobuf_name,
+    const ::tensorflow::protobuf::MessageLite* proto) {
+  TF_ASSIGN_OR_RETURN(std::unique_ptr<llvm::TargetMachine> target_machine,
+                      GetTargetMachineFromTriple(target_triple));
+
+  llvm::LLVMContext llvm_context;
+  string object_file, cpp_shim, cpp_variable_decl;
+
+  if (proto) {
+    string protobuf_array_symbol_name;
+    int64 protobuf_array_size;
+
+    std::unique_ptr<llvm::Module> module_with_serialized_proto =
+        CreateModuleWithEmbeddedProtocolBuffer(
+            &llvm_context, target_machine.get(), *proto, symbol_prefix,
+            &protobuf_array_symbol_name, &protobuf_array_size);
+    TF_ASSIGN_OR_RETURN(object_file,
+                        CodegenModule(target_machine.get(),
+                                      std::move(module_with_serialized_proto)));
+    cpp_shim = CreateCPPShimExpression(qualified_cpp_protobuf_name,
+                                       protobuf_array_symbol_name,
+                                       protobuf_array_size);
+
+    cpp_variable_decl = strings::StrCat("extern \"C\" char ",
+                                        protobuf_array_symbol_name, "[];");
+  } else {
+    TF_ASSIGN_OR_RETURN(
+        object_file,
+        CodegenModule(target_machine.get(),
+                      MakeUnique<llvm::Module>("empty_module", llvm_context)));
+    cpp_shim = "nullptr";
+  }
+
+  return {{cpp_shim, cpp_variable_decl, object_file}};
+}
+
+}  // namespace tfcompile
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/aot/embedded_protocol_buffers.h b/tensorflow/compiler/aot/embedded_protocol_buffers.h
new file mode 100644
index 0000000000000000000000000000000000000000..8436e0ff67f352a24e3d16b46f16c1ad2f3a5957
--- /dev/null
+++ b/tensorflow/compiler/aot/embedded_protocol_buffers.h
@@ -0,0 +1,73 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This file defines utilities to help "embed" protocol buffers into object
+// (".o") files.  These C++ binaries and shared objects can link in these .o to
+// get access to said protocol buffers at runtime.
+
+#ifndef TENSORFLOW_COMPILER_AOT_EMBEDDED_PROTOCOL_BUFFERS_H_
+#define TENSORFLOW_COMPILER_AOT_EMBEDDED_PROTOCOL_BUFFERS_H_
+
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/core/platform/protobuf.h"
+
+namespace tensorflow {
+namespace tfcompile {
+using xla::StatusOr;
+
+// Represents a protocol buffer embedded into an object file and describes a way
+// to access it at runtime.
+struct EmbeddedProtocolBuffer {
+  // cpp_shim_expression is a C++ expression that creates an instance of said
+  // protocol buffer when executed.
+  string cpp_shim_expression;
+
+  // cpp_variable_decl is an "extern C" array declaration that is used in
+  // cpp_shim_expression.  It must be visible wherever cpp_shim_expression is
+  // emitted.
+  string cpp_variable_decl;
+
+  // The contents of the object (".o") file the protocol buffer is embbed in.
+  // This needs to be linked in to any program that wants to execute
+  // cpp_variable_decl .
+  string object_file_data;
+};
+
+// Creates an object file that contains `proto`.
+//
+// `proto` is allowed to be nullptr, in which case the generated C++ shim
+// expression is just `nullptr`, and the generated object file does not define
+// any symbols.
+//
+// `target_triple` is the target triple for the target architecture for the
+// generated object file.
+//
+// `symbol_prefix` is prefix that is guaranteed to be unique across the binary
+// or DSO the generated object file will be linked into.
+//
+// `qualified_cpp_protobuf_name` is a qualified ("qualified" as in C++
+// namespace qualified) protocol buffer name.  This needs is only used in
+// EmbeddedProtocolBuffer::cpp_shim_expression so relatively qualified
+// names are fine as long as they're valid wherever cpp_shim_expression
+// is emitted.
+StatusOr<EmbeddedProtocolBuffer> CreateEmbeddedProtocolBuffer(
+    StringPiece target_triple, StringPiece symbol_prefix,
+    StringPiece qualified_cpp_protobuf_name,
+    const ::tensorflow::protobuf::MessageLite* proto);
+
+}  // namespace tfcompile
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_COMPILER_AOT_EMBEDDED_PROTOCOL_BUFFERS_H_
diff --git a/tensorflow/compiler/aot/flags.cc b/tensorflow/compiler/aot/flags.cc
index 7c2f27e550d44c2487f91acf1029c962ac3f5d01..8c95cb8f90ee031fdbb97fabd9d86f848b42e4c5 100644
--- a/tensorflow/compiler/aot/flags.cc
+++ b/tensorflow/compiler/aot/flags.cc
@@ -59,8 +59,13 @@ void AppendMainFlags(std::vector<Flag>* flag_list, MainFlags* flags) {
        "namespaces may precede the class name, separated by double-colons.  "
        "The class will be generated in the given namespace(s), or if no "
        "namespaces are given, within the global namespace."},
-      {"out_object", &flags->out_object, "Output object file name."},
+      {"out_function_object", &flags->out_function_object,
+       "Output object file containing the generated function for the "
+       "TensorFlow model."},
       {"out_header", &flags->out_header, "Output header file name."},
+      {"out_metadata_object", &flags->out_metadata_object,
+       "Output object file name containing optional metadata for the generated "
+       "function."},
       {"out_session_module", &flags->out_session_module,
        "Output session module proto."},
       {"gen_name_to_index", &flags->gen_name_to_index,
diff --git a/tensorflow/compiler/aot/flags.h b/tensorflow/compiler/aot/flags.h
index 3519659e3af7cd345f30080a07ce91fb858623fb..d266fbead61f7eb43863d1c67c0f86926ae9452d 100644
--- a/tensorflow/compiler/aot/flags.h
+++ b/tensorflow/compiler/aot/flags.h
@@ -34,7 +34,8 @@ struct MainFlags {
   string target_features;
   string entry_point;
   string cpp_class;
-  string out_object;
+  string out_function_object;
+  string out_metadata_object;
   string out_header;
   string out_session_module;
 
diff --git a/tensorflow/compiler/aot/runtime_test.cc b/tensorflow/compiler/aot/runtime_test.cc
index ac79c278c1fdf8b6aedcb52121c767b8ba0ad358..6d603a02eb4ceade6832ba67b2981814ee25327a 100644
--- a/tensorflow/compiler/aot/runtime_test.cc
+++ b/tensorflow/compiler/aot/runtime_test.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/compiler/aot/runtime.h"
 
-#include "tensorflow/compiler/tf2xla/xla_local_runtime_context.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/platform/test.h"
 
diff --git a/tensorflow/compiler/aot/tests/make_test_graphs.py b/tensorflow/compiler/aot/tests/make_test_graphs.py
index a898eab1d1ab0eb5d55983bf366753c968887296..89c7cd4507cbd476104a039d6083d8f89de11278 100644
--- a/tensorflow/compiler/aot/tests/make_test_graphs.py
+++ b/tensorflow/compiler/aot/tests/make_test_graphs.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import argparse
+import os
 import sys
 
 from tensorflow.core.protobuf import saver_pb2
@@ -53,7 +54,7 @@ def tfadd_with_ckpt(out_dir):
     sess.run(init_op)
     sess.run(y.assign(y + 42))
     # Without the checkpoint, the variable won't be set to 42.
-    ckpt = '%s/test_graph_tfadd_with_ckpt.ckpt' % out_dir
+    ckpt = os.path.join(out_dir, 'test_graph_tfadd_with_ckpt.ckpt')
     saver.save(sess, ckpt)
 
 
@@ -68,10 +69,10 @@ def tfadd_with_ckpt_saver(out_dir):
     sess.run(init_op)
     sess.run(y.assign(y + 42))
     # Without the checkpoint, the variable won't be set to 42.
-    ckpt_file = '%s/test_graph_tfadd_with_ckpt_saver.ckpt' % out_dir
+    ckpt_file = os.path.join(out_dir, 'test_graph_tfadd_with_ckpt_saver.ckpt')
     saver.save(sess, ckpt_file)
     # Without the SaverDef, the restore op won't be named correctly.
-    saver_file = '%s/test_graph_tfadd_with_ckpt_saver.saver' % out_dir
+    saver_file = os.path.join(out_dir, 'test_graph_tfadd_with_ckpt_saver.saver')
     with open(saver_file, 'wb') as f:
       f.write(saver.as_saver_def().SerializeToString())
 
@@ -129,7 +130,7 @@ def write_graph(build_graph, out_dir):
   g = ops.Graph()
   with g.as_default():
     build_graph(out_dir)
-    filename = '%s/test_graph_%s.pb' % (out_dir, build_graph.__name__)
+    filename = os.path.join(out_dir, 'test_graph_%s.pb' % build_graph.__name__)
     with open(filename, 'wb') as f:
       f.write(g.as_graph_def().SerializeToString())
 
diff --git a/tensorflow/compiler/aot/tests/tfcompile_test.cc b/tensorflow/compiler/aot/tests/tfcompile_test.cc
index 6b037f276ad1d6771b904bb970f45f32ae9531b8..413efd9cea3b6f71574615ad9ca92471ff925781 100644
--- a/tensorflow/compiler/aot/tests/tfcompile_test.cc
+++ b/tensorflow/compiler/aot/tests/tfcompile_test.cc
@@ -70,7 +70,7 @@ TEST(TFCompileTest, Add) {
 // Run tests that use set_argN_data separately, to avoid accidentally re-using
 // non-existent buffers.
 TEST(TFCompileTest, Add_SetArg) {
-  AddComp add(AddComp::AllocMode::RESULTS_AND_TEMPS_ONLY);
+  AddComp add(AddComp::AllocMode::RESULTS_PROFILES_AND_TEMPS_ONLY);
 
   int32 arg_x = 10;
   int32 arg_y = 32;
@@ -258,7 +258,7 @@ TEST(TFCompileTest, MatMul2_SetArg) {
   Eigen::ThreadPoolDevice device(&tp, tp.NumThreads());
 
   foo::bar::MatMulComp matmul(
-      foo::bar::MatMulComp::AllocMode::RESULTS_AND_TEMPS_ONLY);
+      foo::bar::MatMulComp::AllocMode::RESULTS_PROFILES_AND_TEMPS_ONLY);
   matmul.set_thread_pool(&device);
 
   // Test using the set_argN_data() methods.
diff --git a/tensorflow/compiler/aot/tfcompile.bzl b/tensorflow/compiler/aot/tfcompile.bzl
index 6c385af3b36df78b3f674b3464d68d904ca92907..2b9c83ba149adf9e089786b91039e256216579c8 100644
--- a/tensorflow/compiler/aot/tfcompile.bzl
+++ b/tensorflow/compiler/aot/tfcompile.bzl
@@ -128,7 +128,8 @@ def tf_library(name, graph, config,
 
   # Rule that runs tfcompile to produce the header and object file.
   header_file = name + ".h"
-  object_file = name + ".o"
+  metadata_object_file = name + "_tfcompile_metadata.o"
+  function_object_file = name + "_tfcompile_function.o"
   ep = ("__" + PACKAGE_NAME + "__" + name).replace("/", "_")
   if type(tfcompile_flags) == type(""):
     flags = tfcompile_flags
@@ -142,7 +143,8 @@ def tf_library(name, graph, config,
       ],
       outs=[
           header_file,
-          object_file,
+          metadata_object_file,
+          function_object_file,
       ],
       cmd=("$(location " + tfcompile_tool + ")" +
            " --graph=$(location " + tfcompile_graph + ")" +
@@ -151,7 +153,8 @@ def tf_library(name, graph, config,
            " --cpp_class=" + cpp_class +
            " --target_triple=" + target_llvm_triple() +
            " --out_header=$(@D)/" + header_file +
-           " --out_object=$(@D)/" + object_file +
+           " --out_metadata_object=$(@D)/" + metadata_object_file +
+           " --out_function_object=$(@D)/" + function_object_file +
            " " + flags),
       tools=[tfcompile_tool],
       visibility=visibility,
@@ -202,7 +205,7 @@ def tf_library(name, graph, config,
   need_xla_data_proto = (flags and flags.find("--gen_program_shape") != -1)
   native.cc_library(
       name=name,
-      srcs=[object_file],
+      srcs=[function_object_file, metadata_object_file],
       hdrs=[header_file],
       visibility=visibility,
       testonly=testonly,
@@ -267,7 +270,6 @@ def tf_library(name, graph, config,
         srcs=[test_file],
         deps=[
             ":" + name,
-            "@org_tensorflow//tensorflow/compiler/tf2xla:xla_local_runtime_context",
             "@org_tensorflow//tensorflow/compiler/aot:runtime",
             "@org_tensorflow//tensorflow/compiler/aot:tf_library_test_main",
             "@org_tensorflow//tensorflow/compiler/xla:executable_run_options",
@@ -313,7 +315,6 @@ def tf_library(name, graph, config,
         linkopts = if_android(["-pie", "-s"]),
         deps=[
             ":" + name,
-            "@org_tensorflow//tensorflow/compiler/tf2xla:xla_local_runtime_context",
             "@org_tensorflow//tensorflow/compiler/aot:benchmark",
             "@org_tensorflow//tensorflow/compiler/aot:runtime",
             "@org_tensorflow//tensorflow/compiler/xla:executable_run_options",
diff --git a/tensorflow/compiler/aot/tfcompile_main.cc b/tensorflow/compiler/aot/tfcompile_main.cc
index 6ab3d474187c7df2131f94c9f42f0d0f2f9d99d7..e2f01179d4e2e4f6ef72b2761d06e130ffa3a94f 100644
--- a/tensorflow/compiler/aot/tfcompile_main.cc
+++ b/tensorflow/compiler/aot/tfcompile_main.cc
@@ -91,19 +91,26 @@ Status Main(const MainFlags& flags) {
   // Write output files.
   Env* env = Env::Default();
   const std::vector<char>& obj = compile_result.aot->object_file_data();
-  TF_RETURN_IF_ERROR(WriteStringToFile(env, flags.out_object,
+  TF_RETURN_IF_ERROR(WriteStringToFile(env, flags.out_function_object,
                                        StringPiece(obj.data(), obj.size())));
-  HeaderOpts header_opts;
-  header_opts.gen_name_to_index = flags.gen_name_to_index;
-  header_opts.gen_program_shape = flags.gen_program_shape;
+  CodegenOpts codegen_opts;
+  codegen_opts.gen_name_to_index = flags.gen_name_to_index;
+  codegen_opts.gen_program_shape = flags.gen_program_shape;
+  codegen_opts.target_triple = flags.target_triple;
   if (flags.cpp_class.empty()) {
     return errors::InvalidArgument("Must specify --cpp_class");
   }
-  TF_RETURN_IF_ERROR(ParseCppClass(flags.cpp_class, &header_opts.class_name,
-                                   &header_opts.namespaces));
-  string header;
+  TF_RETURN_IF_ERROR(ParseCppClass(flags.cpp_class, &codegen_opts.class_name,
+                                   &codegen_opts.namespaces));
+
+  MetadataResult metadata_result;
   TF_RETURN_IF_ERROR(
-      GenerateHeader(header_opts, config, compile_result, &header));
+      GenerateMetadata(codegen_opts, compile_result, &metadata_result));
+  TF_RETURN_IF_ERROR(WriteStringToFile(env, flags.out_metadata_object,
+                                       metadata_result.object_file_data));
+  string header;
+  TF_RETURN_IF_ERROR(GenerateHeader(codegen_opts, config, compile_result,
+                                    metadata_result, &header));
   TF_RETURN_IF_ERROR(WriteStringToFile(env, flags.out_header, header));
   return Status::OK();
 }
@@ -114,7 +121,8 @@ Status Main(const MainFlags& flags) {
 int main(int argc, char** argv) {
   tensorflow::tfcompile::MainFlags flags;
   flags.target_triple = "x86_64-pc-linux";
-  flags.out_object = "out.o";
+  flags.out_function_object = "out_model.o";
+  flags.out_metadata_object = "out_helper.o";
   flags.out_header = "out.h";
   flags.entry_point = "entry";
 
diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
index bf7d9cf14d10f41aa48ea594a8d63db97b9973e1..a711319607f4ff2b83aa0ebe50e215b3d0e2258e 100644
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD
@@ -110,19 +110,6 @@ cc_library(
     alwayslink = True,
 )
 
-# Internal targets below this point.
-
-cc_library(
-    name = "common",
-    srcs = [
-        "defs.cc",
-    ],
-    hdrs = [
-        "defs.h",
-    ],
-    visibility = [":friends"],
-)
-
 cc_library(
     name = "xla_device",
     srcs = [
@@ -135,6 +122,8 @@ cc_library(
         "xla_device_context.h",
         "xla_device_ops.h",
     ],
+    # Public visibility is needed for external TF/XLA backends.
+    visibility = ["//visibility:public"],
     deps = [
         ":common",
         ":jit_compilation_passes",
@@ -164,6 +153,19 @@ cc_library(
     ],
 )
 
+# Internal targets below this point.
+
+cc_library(
+    name = "common",
+    srcs = [
+        "defs.cc",
+    ],
+    hdrs = [
+        "defs.h",
+    ],
+    visibility = [":friends"],
+)
+
 cc_library(
     name = "xla_compilation_cache",
     srcs = ["xla_compilation_cache.cc"],
@@ -215,7 +217,6 @@ cc_library(
         ":common",
         ":compilation_passes",
         "//tensorflow/compiler/jit/kernels:xla_launch_op",
-        "//tensorflow/compiler/tf2xla:const_analysis",
         "//tensorflow/compiler/tf2xla:xla_compiler",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
@@ -245,12 +246,13 @@ cc_library(
         "//tensorflow/compiler/jit/legacy_flags:mark_for_compilation_pass_flags",
         "//tensorflow/compiler/jit/ops:parallel_check_op",
         "//tensorflow/compiler/jit/ops:xla_ops",
-        "//tensorflow/compiler/tf2xla:const_analysis",
         "//tensorflow/compiler/tf2xla:dump_graph",
         "//tensorflow/compiler/tf2xla:xla_compiler",
+        "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
+        "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
index 22899ebeebc929055518893b358f7950d380d6f6..0de163d3a8f082eab4d8d802485da1bbc56e8180 100644
--- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
+++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
@@ -16,13 +16,18 @@ limitations under the License.
 #include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"
 
 #include <functional>
+#include <memory>
 #include <numeric>
+#include <string>
+#include <unordered_map>
+#include <vector>
 
 #include "tensorflow/compiler/jit/graph_to_functiondef.h"
 #include "tensorflow/compiler/jit/legacy_flags/encapsulate_subgraphs_pass_flags.h"
 #include "tensorflow/compiler/jit/mark_for_compilation_pass.h"
 #include "tensorflow/compiler/tf2xla/const_analysis.h"
 #include "tensorflow/compiler/tf2xla/dump_graph.h"
+#include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/common_runtime/optimization_registry.h"
 #include "tensorflow/core/framework/function.h"
@@ -32,6 +37,7 @@ limitations under the License.
 #include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/graph/tensor_id.h"
+#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -48,19 +54,75 @@ const char* const kXlaNumResourceArgsAttr = "_XlaNumResourceArgs";
 
 namespace {
 
+bool AreAllParentsConst(const Node& n,
+                        const gtl::FlatSet<const Node*>& runtime_const_nodes) {
+  if (n.type_string() == "GuaranteeConst" || n.type_string() == "Const") {
+    // If the current node is itself a cast-to-const, no need
+    // to look at the incoming edges.
+    return true;
+  }
+
+  bool all_parents_const = true;
+  bool atleast_one_non_control_edge = false;
+  for (const Edge* in : n.in_edges()) {
+    atleast_one_non_control_edge =
+        atleast_one_non_control_edge || !in->IsControlEdge();
+    if (!in->IsControlEdge() && runtime_const_nodes.count(in->src()) == 0) {
+      all_parents_const = false;
+      break;
+    }
+  }
+  return all_parents_const && atleast_one_non_control_edge;
+}
+
+void MarkGuaranteedConstants(
+    const Graph& graph,
+    const std::vector<std::pair<const Node*, Node*>>& src_arg_pairs) {
+  gtl::FlatSet<const Node*> guaranteed_const_nodes;
+  std::vector<const Node*> srcs;
+  srcs.reserve(src_arg_pairs.size());
+  for (const auto& src_arg : src_arg_pairs) {
+    srcs.push_back(src_arg.first);
+  }
+  ReverseDFSFrom(graph, srcs, /*enter=*/nullptr,
+                 /*leave=*/[&guaranteed_const_nodes](const Node* n) {
+                   // TODO(vinuraja): Doesn't work in the presence of loops.
+                   if (AreAllParentsConst(*n, guaranteed_const_nodes)) {
+                     guaranteed_const_nodes.insert(n);
+                   }
+                 });
+
+  for (auto& src_arg : src_arg_pairs) {
+    if (guaranteed_const_nodes.count(src_arg.first) != 0) {
+      VLOG(1) << "Guaranteed const found: " << src_arg.first->DebugString();
+      src_arg.second->AddAttr("_is_guaranteed_constant", true);
+    }
+  }
+}
+
 // A node/slot pair.
 // TODO(phawkins): is there a common definition of this?
 struct NodeSlot {
-  NodeSlot() : node(nullptr), slot(-1) {}
-  NodeSlot(const Node* node, int slot) : node(node), slot(slot) {}
+  NodeSlot() : node(nullptr), slot(-1), dtype(DT_INVALID) {}
+  NodeSlot(const Node* node, int slot)
+      : node(node), slot(slot), dtype(DT_INVALID) {}
+  NodeSlot(const Node* node, int slot, DataType dtype)
+      : node(node), slot(slot), dtype(dtype) {}
 
   const Node* node;
   int slot;
 
+  // Optional: used to record the destination type of a source NodeSlot in case
+  // the source output is a Ref type that is cast to a Tensor at the
+  // destination.
+  DataType dtype;
+
   bool operator==(const NodeSlot& other) const {
-    return node == other.node && slot == other.slot;
+    return node == other.node && slot == other.slot && dtype == other.dtype;
   }
 
+  // Leave dtype out of the hash since there are never two NodeSlots with the
+  // same node and slot and different dtypes.
   struct Hasher {
     uint64 operator()(NodeSlot const& s) const {
       return Hash64Combine(std::hash<const Node*>()(s.node),
@@ -75,10 +137,23 @@ struct NodeSlot {
   };
 };
 
+// TODO(phawkins) add a canonical copy of these operator names and refactor
+// everything to use it.
+static const char* const kArgOp = "_Arg";
+static const char* const kRetValOp = "_Retval";
+static const char* const kSendToHostOp = "_XlaSendToHost";
+static const char* const kRecvFromHostOp = "_XlaRecvFromHost";
+static const char* const kSendFromHostOp = "_XlaSendFromHost";
+static const char* const kRecvAtHostOp = "_XlaRecvAtHost";
+
 class Encapsulator {
  public:
-  Encapsulator(string group_attribute, Graph const* graph_in)
-      : group_attribute_(std::move(group_attribute)), graph_in_(graph_in) {}
+  Encapsulator(string group_attribute, string outside_compilation_attribute,
+               Graph const* graph_in)
+      : group_attribute_(std::move(group_attribute)),
+        outside_compilation_attribute_(
+            std::move(outside_compilation_attribute)),
+        graph_in_(graph_in) {}
 
   // Find subgraphs marked with 'group_attribute', and build a new
   // subgraph, one for each value of 'group_attribute'.
@@ -99,54 +174,350 @@ class Encapsulator {
   Status BuildOutputGraph(bool parallel_checking, Graph* graph_out);
 
  private:
-  // Returns the key attribute associated with a node. Returns the empty string
-  // if no key attribute is found.
-  string GetFunctionNameAttr(const Node* node) const;
-
   // A subgraph of the input, all marked with a common 'group_attribute'
-  // value.
-  struct Subgraph {
+  // value. A subgraph may contain multiple `outside_compilation' clusters.
+  //
+  // In the following simple example, A, B, ..., E are nodes in the original
+  // graph. The group attributes and outside_compilation attributes g and oc are
+  // each shown as either 0 or empty.
+  //
+  //  A  -->  B  -->  C  -->  D  -->  E
+  //  g:      g:0     g:0     g:0     g:
+  //  oc:     oc:     oc:0    oc:     oc:
+  //
+  // The example is rewritten to two graphs; one on the host and one to be
+  // compiled. The host graph is as follows. RAH is a RecvAtHost node receiving
+  // input from the compiled cluster, and SFH is a SendFromHost node sending
+  // input back to the compiled cluster. Dotted edges are control edges. A
+  // 'sequencing' node S is inserted, and both RAH and SFH are connected via S
+  // to E (and in general all nodes that depend on nodes in the compiled
+  // cluster) to ensure that they are not pruned.
+  //
+  //  A  -->  Call  -->  E
+  //                     ^
+  //                     .
+  //           ........> S
+  //       ....          ^
+  //     ..             .
+  //  RAH -->  C  --> SFH
+  //
+  // The compiled cluster is as follows. STH is a SendToHost node which is the
+  // source of a channel to the RAH node above. RFH is a RecvFromHost node which
+  // is the destination of a channel from the SFH node above. There is a control
+  // edge that ensures RFH follows STH, which is used in shape inference to
+  // ensure that the shapes on the STH host channel are known before the RFH
+  // channel is compiled.
+  //
+  //  Arg  --> B  --> STH  ..>  RFH  --> D --> Retval
+  //
+  // The channels STH/RAH and SFH/RFH each transmit a tuple, so there is at most
+  // one RAH and SFH in each compiled cluster. This design is preferred over
+  // adding separate Arg/Retval nodes for each transmitted value because it
+  // simplifies the host code that would like to limit communication between
+  // host and device and, e.g., raise only one interrupt per channel rather than
+  // one per transmitted value.
+  class Subgraph {
+   public:
+    // Creates a graph to build the subgraph in, if it doesn't already exist,
+    // using the same op registry and versions as graph_in.
+    Node* MakeNodeImage(const Graph* graph_in, Node* node);
+
+    // Returns the graph the subgraph is being built in.
+    Graph* GetGraph() const;
+
+    // Builds a FunctionDef, and adds it to 'library'. The value of the
+    // 'group_attribute' annotations becomes the function name.  If
+    // 'reuse_existing_functions' is set, use an existing function with the same
+    // name, if any.  If 'rewrite_subgraph_fn' is set, it is applied to the
+    // subgraph before function conversion.
+    Status BuildFunctionDef(const string& name_in,
+                            const RewriteSubgraphFn& rewrite_subgraph_fn,
+                            bool reuse_existing_functions,
+                            FunctionLibraryDefinition* library);
+
+    // Adds the function call node to graph_out.
+    Status AddFunctionCallNode(
+        const std::unordered_map<const Node*, Node*>& node_images,
+        bool parallel_checking, Graph* graph_out);
+
+    // Adds _RecvAtHost and _SendFromHost nodes, where needed, to graph_out.
+    Status AddOutsideCompilationHostIONodes(
+        const string& subgraph_name,
+        const std::unordered_map<const Node*, Node*>& node_images,
+        Graph* graph_out);
+
+    // Returns the Node that inputs to the function should be wired up to.
+    Node* GetCallNodeForInputs() const;
+
+    // Returns the Node that outputs to the function should be wired up to.
+    Node* GetCallNodeForOutputs() const;
+
+    // Returns the index of the arg that the dst of edge should connect to.
+    int GetArgIndexForEdge(const Edge* edge) const;
+
+    // Returns the index of the result that the src of edge should connect to.
+    int GetResultIndexForEdge(const Edge* edge) const;
+
+    // Returns the RecvAtHost node for an outside_compilation subgraph.
+    Node* GetRecvAtHostNode(
+        const string& outside_compilation_subgraph_name) const;
+
+    // Returns the output slot for the RecvAtHost node that corresponds to the
+    // source of edge in an outside_compilation subgraph.
+    int GetRecvAtHostSlot(const string& outside_compilation_subgraph_name,
+                          const Edge* edge) const;
+
+    // Returns the SendFromHost node for an outside_compilation subgraph.
+    Node* GetSendFromHostNode(
+        const string& outside_compilation_subgraph_name) const;
+
+    // Returns the input slot for the SendFromHost node that corresponds to the
+    // destination of edge in an outside_compilation subgraph.
+    int GetSendFromHostSlot(const string& outside_compilation_subgraph_name,
+                            const Edge* edge) const;
+
+    // Creates an _Arg node for the src node of edge, and add its index to
+    // args_by_src_, if none exists yet. Also adds its index to args_by_dst_,
+    // and adds the edge within the subgraph from the _Arg node to the image of
+    // the dst node.
+    Status RecordArg(const Edge* edge,
+                     const std::unordered_map<const Node*, Node*>& node_images,
+                     std::vector<std::pair<const Node*, Node*>>* src_arg_pairs);
+
+    // Creates a _Retval node for the src node of edge, and add it to results_,
+    // if none exists yet. If a new _Retval node is created, also adds the edge
+    // within the subgraph from the src to the _Retval node.
+    Status RecordResult(
+        const Edge* edge,
+        const std::unordered_map<const Node*, Node*>& node_images);
+
+    // Creates an outside_compilation subgraph for outside_compilation_id if
+    // none exists yet. Creates an entry for the src node of edge in the list of
+    // inputs for the outside_compilation subgraph, if none exists yet.
+    void RecordOutsideCompilationInputOrControl(
+        const string& outside_compilation_id, const Edge* edge);
+
+    // Creates an outside_compilation subgraph for outside_compilation_id if
+    // none exists yet. Creates an entry for the src node of edge in the list of
+    // outputs by src for the outside_compilation subgraph, if none exists
+    // yet. Creates an entry for the dst node of edge in the list of outputs by
+    // dst for the outside_compilation subgraph.
+    void RecordOutsideCompilationOutputOrControl(
+        const string& outside_compilation_id, const Edge* edge);
+
+    // Adds the SendToHost nodes for each outside_compilation subgraph once the
+    // edges have all been recorded via RecordOutsideCompilationInputOrControl.
+    Status AddSendsToOutsideCompilation(
+        const std::unordered_map<const Node*, Node*>& node_images);
+
+    // Adds the RecvFromHost nodes for each outside_compilation subgraph once
+    // the edges have all been recorded via
+    // RecordOutsideCompilationOutputOrControl.
+    Status AddRecvsFromOutsideCompilation(
+        const std::unordered_map<const Node*, Node*>& node_images);
+
+    // Creates the sequencer node if it doesn't exist, adding it to graph_out.
+    Status MakeSequencingNode(const string& subgraph_name, Graph* graph_out);
+
+    // If there is a sequencer node, adds a control edge from the sequencer to
+    // all the downstream nodes of call_node_outputs.
+    void ConnectSequencerToOutputs(Graph* graph_out);
+
+   private:
+    struct OutsideCompilationSubgraph {
+      // Map from source (producer node/slot) tensors in the original graph to
+      // input index (slot number in the SendToHost/RecvAtHost nodes that will
+      // be created) for the outside_compilation subgraph.
+      std::unordered_map<NodeSlot, int, NodeSlot::Hasher> inputs;
+
+      // Set of nodes in the original graph that are the source of control edges
+      // that cross from the containing compiled subgraph into the
+      // outside_compilation subgraph. These are recorded by
+      // RecordOutsideCompilationInputOrControl while walking all the subgraph
+      // edges, and lifted control edges within the subgraph are added by
+      // AddSendsToOutsideCompilation once the _SendToHost node has been
+      // created. The matching control edge from _RecvAtHost to the
+      // destination is added by CopyEdgeToOutputGraph.
+      std::unordered_set<const Node*> control_inputs;
+
+      // Maps from source (producer node/slot) and destination (consumer
+      // node/slot) tensors in the original graph to output index (slot number
+      // in the SendFromHost/RecvFromHost nodes that will be created) for the
+      // outside_compilation subgraph.
+      std::unordered_map<NodeSlot, int, NodeSlot::Hasher> outputs_by_src;
+      std::unordered_map<NodeSlot, int, NodeSlot::Hasher> outputs_by_dst;
+
+      // Set of nodes in the original graph that are the destination of control
+      // edges that cross from the outside_compilation subgraph into the
+      // containing compiled subgraph. These are recorded by
+      // RecordOutsideCompilationOutputOrControl while walking all the subgraph
+      // edges, and lifted control edges within the subgraph are added by
+      // AddRecvsFromToOutsideCompilation once the _RecvFromHost node has been
+      // created. The matching control edge from the source to _SendFromHost to
+      // the destination is added by CopyEdgeToOutputGraph.
+      std::unordered_set<const Node*> control_outputs;
+
+      // _SendToHost node in the subgraph. Not owned.
+      Node* send_to_host = nullptr;
+
+      // _RecvAtHost node in the output graph. Not owned.
+      Node* recv_at_host = nullptr;
+
+      // _SendFromHost node in the output graph. Not owned.
+      Node* send_from_host = nullptr;
+    };
+
+    // Builds a ParallelCheck op that compares the output of the original
+    // subgraph with the encapsulated subgraph.
+    Status BuildParallelCheckOp(
+        const std::unordered_map<const Node*, Node*>& node_images,
+        Graph* graph_out);
+
+    // Builds a _RecvAtHost node producing all the inputs of an
+    // outside_compilation subgraph and stores it in oc_subgraph.recv_at_host.
+    Status AddRecvAtHostNode(const string& subgraph_name,
+                             const string& oc_subgraph_name,
+                             OutsideCompilationSubgraph* oc_subgraph,
+                             Graph* graph_out);
+
+    // Builds a _SendFromHost node consuming all the outputs of an
+    // outside_compilation subgraph and stores it in oc_subgraph.send_from_host.
+    Status AddSendFromHostNode(
+        const std::unordered_map<const Node*, Node*>& node_images,
+        const string& subgraph_name, const string& oc_subgraph_name,
+        OutsideCompilationSubgraph* oc_subgraph, Graph* graph_out);
+
     // The subgraph extracted from the input graph, suitable for being turned
     // into a FunctionDef. Inputs are fed by _Arg nodes, and outputs are
     // returned by _Retval nodes.
-    std::unique_ptr<Graph> graph;
+    std::unique_ptr<Graph> graph_;
 
     // Which device are these nodes on? Used to assign a device to the call
     // node.
-    string device;
+    string device_;
 
     // NodeDef for the function call node.
-    NodeDef call_node_def;
+    NodeDef call_node_def_;
 
     // Function call node(s) in the output graph. Not owned.
     // If parallel_checking is enabled, 'call_node_inputs' is the function call
     // node to which inputs should be fed, and 'call_node_outputs' is the
     // parallel check op from which outputs should be read. If parallel checking
     // is disabled, both point to the function call node.
-    Node* call_node_inputs;
-    Node* call_node_outputs;
+    Node* call_node_inputs_;
+    Node* call_node_outputs_;
 
     // Maps from source (producer node/slot) and destination
     // (consumer node/slot) tensors in the input graph to _Arg numbers in
     // the subgraph. The source map is one-to-one, whereas the dest map may be
     // many-to-one.
-    std::unordered_map<NodeSlot, int, NodeSlot::Hasher> args_by_src;
-    std::unordered_map<NodeSlot, int, NodeSlot::Hasher> args_by_dst;
+    std::unordered_map<NodeSlot, int, NodeSlot::Hasher> args_by_src_;
+    std::unordered_map<NodeSlot, int, NodeSlot::Hasher> args_by_dst_;
 
     // The _Arg nodes in the subgraph, in order by argument number.
-    std::vector<Node*> args;
+    std::vector<Node*> args_;
 
     // Map from source tensor in the input graph to result #.
-    std::unordered_map<NodeSlot, int, NodeSlot::Hasher> results;
+    std::unordered_map<NodeSlot, int, NodeSlot::Hasher> results_;
+
+    // The outside_compilation clusters in this subgraph.
+    std::unordered_map<string, OutsideCompilationSubgraph>
+        outside_compilation_subgraphs_;
+
+    // NoOp node in the output graph that is sequenced after the call node and
+    // used to prevent host-side outside_compilation sends and recvs from being
+    // pruned.
+    Node* sequencer_ = nullptr;
   };
 
-  // Builds a ParallelCheck op that compares the output of the original subgraph
-  // with the encapsulated subgraph.
-  Status BuildParallelCheckOp(
+  // Returns the key attribute and outside_compilation attribute associated
+  // with a node in attr, and outside_compilation_attr, respectively. Sets
+  // either result to the empty string if the respective attribute is not
+  // found. Returns error status if there is an outside_compilation attribute
+  // and no key attribute,
+  Status GetFunctionNameAttr(Node const* node, string* attr,
+                             string* outside_compilation_attr) const;
+
+  // Copies edges local to a subgraph. Adds _Arg and _Retval nodes to
+  // subgraphs for data edges that cross subgraph boundaries.
+  Status CopySubgraphEdges(
       const std::unordered_map<const Node*, Node*>& node_images,
-      const Subgraph& subgraph, Graph* graph_out, Node** parallel_check_op);
+      std::vector<std::pair<const Node*, Node*>>* src_arg_pairs);
+
+  // Copies all marked nodes to a subgraph. Does nothing for unmarked nodes,
+  // or nodes marked outside_compilation.
+  Status CopySubgraphNodes(std::unordered_map<const Node*, Node*>* node_images);
+
+  // Copies all nodes that aren't in a compiled subgraph to the output graph.
+  Status CopyNodesToOutputGraph(
+      bool parallel_checking, Graph* graph_out,
+      std::unordered_map<const Node*, Node*>* node_images);
+
+  // Adds function call nodes for each compiled subgraph.
+  Status AddFunctionCallNodes(
+      const std::unordered_map<const Node*, Node*>& node_images,
+      bool parallel_checking, Graph* graph_out);
+
+  // Adds _RecvAtHost and _SendFromHost nodes, where needed, for all
+  // outside_compilation subgraphs.
+  Status AddOutsideCompilationHostIONodes(
+      const std::unordered_map<const Node*, Node*>& node_images,
+      Graph* graph_out);
+
+  // Finds the image of an edge source in the output graph. If the edge crosses
+  // a subgraph boundary it is the output of a call node, otherwise it is a node
+  // in the output graph.
+  Status FindOutputImageOfEdgeSrc(
+      const string& src_func_id, const string& src_outside_compilation_id,
+      const string& dst_func_id, const string& dst_outside_compilation_id,
+      const std::unordered_map<const Node*, Node*>& node_images,
+      const Node* original_src_node, Node** src_image);
+
+  // Finds an edge source slot in the output graph. If the edge crosses a
+  // subgraph boundary it is a slot on the output of a call node or a
+  // _RecvAtHost node, otherwise it is a slot on a node in the output graph.
+  int FindOutputSlotOfEdgeSrc(const string& src_func_id,
+                              const string& src_outside_compilation_id,
+                              const string& dst_func_id,
+                              const string& dst_outside_compilation_id,
+                              const Edge* edge);
+
+  // Finds the image of an edge destination in the output graph. If the edge
+  // crosses a subgraph boundary it is the input of a call node or a
+  // _SendFromHost node, otherwise it is a node in the output graph.
+  Status FindOutputImageOfEdgeDst(
+      const string& src_func_id, const string& src_outside_compilation_id,
+      const string& dst_func_id, const string& dst_outside_compilation_id,
+      const std::unordered_map<const Node*, Node*>& node_images,
+      const Node* original_dst_node, Node** dst_image);
+
+  // Finds an edge destination slot in the output graph. If the edge crosses a
+  // subgraph boundary it is a slot on the input of a call node or a
+  // _SendFromHost node, otherwise it is a slot on a node in the output graph.
+  int FindOutputSlotOfEdgeDst(const string& src_func_id,
+                              const string& src_outside_compilation_id,
+                              const string& dst_func_id,
+                              const string& dst_outside_compilation_id,
+                              const Edge* edge);
+
+  // Copies a single edge to the output graph. The edge is either entirely
+  // within the output graph, or crosses into or out of a compiled subgraph.
+  Status CopyEdgeToOutputGraph(
+      const Edge* edge, const string& src_func_id,
+      const string& src_outside_compilation_id, const string& dst_func_id,
+      const string& dst_outside_compilation_id,
+      const std::unordered_map<const Node*, Node*>& node_images,
+      bool parallel_checking, Graph* graph_out,
+      std::unordered_set<std::pair<NodeSlot, NodeSlot>, NodeSlot::PairHasher>*
+          edges_added);
+
+  // Adds all edges to the output graph.
+  Status AddEdgesToOutputGraph(
+      const std::unordered_map<const Node*, Node*>& node_images,
+      bool parallel_checking, Graph* graph_out);
 
   const string group_attribute_;
+  const string outside_compilation_attribute_;
   const Graph* graph_in_;
 
   std::unordered_map<string, Subgraph> subgraphs_;
@@ -154,224 +525,370 @@ class Encapsulator {
   TF_DISALLOW_COPY_AND_ASSIGN(Encapsulator);
 };
 
-// TODO(phawkins) add a canonical copy of these operator names and refactor
-// everything to use it.
-static const char* const kArgOp = "_Arg";
-static const char* const kRetValOp = "_Retval";
+Node* Encapsulator::Subgraph::GetCallNodeForInputs() const {
+  return call_node_inputs_;
+}
 
-// Returns the function name attached to 'node', or the empty string if there is
-// none.
-string Encapsulator::GetFunctionNameAttr(Node const* node) const {
-  string attr;
-  if (!GetNodeAttr(node->attrs(), group_attribute_, &attr).ok()) {
-    attr.clear();
-  }
-  return attr;
+Node* Encapsulator::Subgraph::GetCallNodeForOutputs() const {
+  return call_node_outputs_;
 }
 
-Status Encapsulator::SplitIntoSubgraphs() {
-  Status s;
+int Encapsulator::Subgraph::GetArgIndexForEdge(const Edge* edge) const {
+  return args_by_dst_.at(NodeSlot(edge->dst(), edge->dst_input()));
+}
 
-  // Map from input graph nodes to subgraph nodes.
-  std::unordered_map<Node*, Node*> node_images;
+int Encapsulator::Subgraph::GetResultIndexForEdge(const Edge* edge) const {
+  return results_.at(NodeSlot(edge->src(), edge->src_output()));
+}
 
-  // Copy all marked nodes to a subgraph. Do nothing for unmarked nodes.
-  for (Node* node : graph_in_->op_nodes()) {
-    string func_id = GetFunctionNameAttr(node);
-    if (func_id.empty()) continue;
+Node* Encapsulator::Subgraph::GetRecvAtHostNode(
+    const string& outside_compilation_subgraph_name) const {
+  return outside_compilation_subgraphs_.at(outside_compilation_subgraph_name)
+      .recv_at_host;
+}
 
-    Subgraph& subgraph = subgraphs_[func_id];
-    if (!subgraph.graph) {
-      subgraph.graph.reset(new Graph(graph_in_->op_registry()));
-      subgraph.graph->set_versions(graph_in_->versions());
-    }
+int Encapsulator::Subgraph::GetRecvAtHostSlot(
+    const string& outside_compilation_subgraph_name, const Edge* edge) const {
+  return outside_compilation_subgraphs_.at(outside_compilation_subgraph_name)
+      .inputs.at(NodeSlot(edge->src(), edge->src_output()));
+}
 
-    Node* image = subgraph.graph->CopyNode(node);
-    image->ClearAttr(group_attribute_);
-    node_images[node] = image;
+Node* Encapsulator::Subgraph::GetSendFromHostNode(
+    const string& outside_compilation_subgraph_name) const {
+  return outside_compilation_subgraphs_.at(outside_compilation_subgraph_name)
+      .send_from_host;
+}
 
-    if (subgraph.device.empty()) {
-      subgraph.device = node->assigned_device_name().empty()
-                            ? node->requested_device()
-                            : node->assigned_device_name();
-    }
+int Encapsulator::Subgraph::GetSendFromHostSlot(
+    const string& outside_compilation_subgraph_name, const Edge* edge) const {
+  return outside_compilation_subgraphs_.at(outside_compilation_subgraph_name)
+      .outputs_by_dst.at(NodeSlot(edge->dst(), edge->dst_input()));
+}
+
+Node* Encapsulator::Subgraph::MakeNodeImage(const Graph* graph_in, Node* node) {
+  if (!graph_) {
+    graph_.reset(new Graph(graph_in->op_registry()));
+    graph_->set_versions(graph_in->versions());
   }
 
-  // Copy edges local to a subgraph. Add _Arg and _Retval nodes to subgraphs for
-  // data edges that cross subgraph boundaries.
-  for (const Edge* edge : graph_in_->edges()) {
-    string src_func_id = GetFunctionNameAttr(edge->src());
-    string dst_func_id = GetFunctionNameAttr(edge->dst());
-    Node* src_image = gtl::FindWithDefault(node_images, edge->src(), nullptr);
-    Node* dst_image = gtl::FindWithDefault(node_images, edge->dst(), nullptr);
+  if (device_.empty()) {
+    device_ = node->assigned_device_name().empty()
+                  ? node->requested_device()
+                  : node->assigned_device_name();
+  }
 
-    // Copy edges that are local to a subgraph.
-    if (!src_func_id.empty() && src_func_id == dst_func_id) {
-      Graph* g = subgraphs_[src_func_id].graph.get();
-      if (edge->IsControlEdge()) {
-        g->AddControlEdge(src_image, dst_image);
-      } else {
-        g->AddEdge(src_image, edge->src_output(), dst_image, edge->dst_input());
-      }
-      continue;
-    }
+  return graph_->CopyNode(node);
+}
 
-    // Ignore cross-boundary control edges for right now. We will lift them
-    // onto the enclosing call operators in BuildOutputGraph().
-    if (edge->IsControlEdge()) continue;
+Graph* Encapsulator::Subgraph::GetGraph() const { return graph_.get(); }
+
+Status Encapsulator::Subgraph::RecordArg(
+    const Edge* edge, const std::unordered_map<const Node*, Node*>& node_images,
+    std::vector<std::pair<const Node*, Node*>>* src_arg_pairs) {
+  Node* src_node = edge->src();
+  int src_slot = edge->src_output();
+  std::unordered_map<NodeSlot, int, NodeSlot::Hasher>::iterator iter;
+  bool inserted;
+  std::tie(iter, inserted) =
+      args_by_src_.emplace(NodeSlot(src_node, src_slot), args_by_src_.size());
+  int arg_index = iter->second;
+  if (inserted) {
+    NodeDef arg_def;
+    NodeDefBuilder builder(
+        strings::StrCat(src_node->name(), "_", src_slot, "_arg"), kArgOp);
+    DataType dtype = edge->dst()->input_type(edge->dst_input());
+    builder.Attr("T", dtype);
+    builder.Attr("index", arg_index);
+    Status s = builder.Finalize(&arg_def);
+    if (!s.ok()) return s;
 
-    // Add 'src' as an output of its subgraph, if applicable.
-    if (!src_func_id.empty()) {
-      Subgraph& src_subgraph = subgraphs_[src_func_id];
-      int ret_index = src_subgraph.results.size();
-      if (src_subgraph.results
-              .emplace(NodeSlot(edge->src(), edge->src_output()), ret_index)
-              .second) {
-        // Create a new _Retval node
-        DataType dtype = edge->src()->output_type(edge->src_output());
+    Node* arg = graph_->AddNode(arg_def, &s);
+    if (!s.ok()) return s;
 
-        if (IsRefType(dtype)) {
-          return errors::InvalidArgument(
-              "Ref Tensors (e.g., Variables) are not supported: tensor ",
-              edge->src()->name(), ":", edge->src_output());
-        }
+    src_arg_pairs->push_back({src_node, arg});
+    args_.push_back(arg);
+  }
+  Node* dst_node = edge->dst();
+  Node* dst_image = node_images.at(dst_node);
+  int dst_slot = edge->dst_input();
+  args_by_dst_[NodeSlot(dst_node, dst_slot)] = arg_index;
+  graph_->AddEdge(args_[arg_index], 0, dst_image, dst_slot);
+  return Status::OK();
+}
 
-        NodeDef ret_def;
-        ret_def.set_op(kRetValOp);
-        ret_def.set_name(strings::StrCat(edge->src()->name(), "_",
-                                         edge->src_output(), "_retval"));
-        AddNodeAttr("T", dtype, &ret_def);
-        AddNodeAttr("index", ret_index, &ret_def);
-        Node* ret = src_subgraph.graph->AddNode(ret_def, &s);
-        if (!s.ok()) return s;
-
-        // Add an edge from 'src' to _Retval.
-        src_subgraph.graph->AddEdge(src_image, edge->src_output(), ret, 0);
-      }
-    }
+Status Encapsulator::Subgraph::RecordResult(
+    const Edge* edge,
+    const std::unordered_map<const Node*, Node*>& node_images) {
+  Node* src_node = edge->src();
+  Node* src_image = node_images.at(src_node);
+  int src_slot = edge->src_output();
+  std::unordered_map<NodeSlot, int, NodeSlot::Hasher>::iterator iter;
+  bool inserted;
+  std::tie(iter, inserted) =
+      results_.emplace(NodeSlot(src_node, src_slot), results_.size());
+  int ret_index = iter->second;
+  if (inserted) {
+    NodeDef ret_def;
+    NodeDefBuilder builder(
+        strings::StrCat(src_node->name(), "_", src_slot, "_retval"), kRetValOp);
+    DataType dtype = src_node->output_type(src_slot);
+    builder.Attr("T", dtype);
+    builder.Attr("index", ret_index);
+    builder.Input(src_image->name(), src_slot, dtype);
+    Status s = builder.Finalize(&ret_def);
+    if (!s.ok()) return s;
+    Node* ret = graph_->AddNode(ret_def, &s);
+    if (!s.ok()) return s;
 
-    // Add 'dst' as an input of its subgraph, if applicable.
-    if (!dst_func_id.empty()) {
-      Subgraph& dst_subgraph = subgraphs_[dst_func_id];
+    graph_->AddEdge(src_image, src_slot, ret, 0);
+  }
+  return Status::OK();
+}
 
-      // Create an _Arg node for this tensor, if none exists yet.
-      std::unordered_map<NodeSlot, int, NodeSlot::Hasher>::iterator iter;
-      bool inserted;
-      std::tie(iter, inserted) = dst_subgraph.args_by_src.emplace(
-          NodeSlot(edge->src(), edge->src_output()), dst_subgraph.args.size());
-      int arg_index = iter->second;
-      if (inserted) {
-        // This is the first time we have seen this tensor. Create an _Arg node.
-        DataType dtype = edge->dst()->input_type(edge->dst_input());
+void Encapsulator::Subgraph::RecordOutsideCompilationInputOrControl(
+    const string& outside_compilation_id, const Edge* edge) {
+  auto iter = outside_compilation_subgraphs_
+                  .emplace(outside_compilation_id, OutsideCompilationSubgraph())
+                  .first;
+  OutsideCompilationSubgraph& outside_subgraph = iter->second;
+  if (edge->IsControlEdge()) {
+    outside_subgraph.control_inputs.insert(edge->src());
+  } else {
+    int input_index = outside_subgraph.inputs.size();
+    outside_subgraph.inputs.emplace(NodeSlot(edge->src(), edge->src_output()),
+                                    input_index);
+  }
+}
 
-        if (IsRefType(dtype)) {
-          return errors::InvalidArgument(
-              "Ref Tensors (e.g., Variables) are not supported: tensor ",
-              edge->src()->name(), ":", edge->src_output());
-        }
+void Encapsulator::Subgraph::RecordOutsideCompilationOutputOrControl(
+    const string& outside_compilation_id, const Edge* edge) {
+  auto subgraph_iter =
+      outside_compilation_subgraphs_
+          .emplace(outside_compilation_id, OutsideCompilationSubgraph())
+          .first;
+  OutsideCompilationSubgraph& outside_subgraph = subgraph_iter->second;
+  if (edge->IsControlEdge()) {
+    outside_subgraph.control_outputs.insert(edge->dst());
+  } else {
+    DataType dtype = edge->dst()->input_type(edge->dst_input());
+    auto output_iter =
+        outside_subgraph.outputs_by_src
+            .emplace(NodeSlot(edge->src(), edge->src_output(), dtype),
+                     outside_subgraph.outputs_by_src.size())
+            .first;
+    int output_index = output_iter->second;
+    outside_subgraph.outputs_by_dst[NodeSlot(edge->dst(), edge->dst_input())] =
+        output_index;
+  }
+}
 
-        NodeDef arg_def;
-        NodeDefBuilder builder(strings::StrCat(edge->src()->name(), "_",
-                                               edge->src_output(), "_arg"),
-                               kArgOp);
-        builder.Attr("T", dtype);
-        builder.Attr("index", arg_index);
-        s = builder.Finalize(&arg_def);
-        if (!s.ok()) return s;
+Status Encapsulator::Subgraph::AddSendsToOutsideCompilation(
+    const std::unordered_map<const Node*, Node*>& node_images) {
+  for (auto& oc_subgraph_iter : outside_compilation_subgraphs_) {
+    const string& oc_subgraph_name = oc_subgraph_iter.first;
+    OutsideCompilationSubgraph& oc_subgraph = oc_subgraph_iter.second;
+    if (!oc_subgraph.inputs.empty() || !oc_subgraph.control_inputs.empty()) {
+      // Build a _SendToHost node sending all the args of the appropriate
+      // types.
+      std::vector<DataType> dtypes(oc_subgraph.inputs.size(), DT_INVALID);
+      std::vector<NodeDefBuilder::NodeOut> inputs(oc_subgraph.inputs.size());
+
+      for (const auto& input_src : oc_subgraph.inputs) {
+        const Node* src_node = input_src.first.node;
+        Node* src_image = node_images.at(src_node);
+        int src_slot = input_src.first.slot;
+        int input_index = input_src.second;
+
+        DataType dtype = src_node->output_type(src_slot);
+        dtypes[input_index] = dtype;
+        inputs[input_index].Reset(src_image->name(), src_slot, dtype);
+      }
 
-        Node* arg = dst_subgraph.graph->AddNode(arg_def, &s);
-        if (!s.ok()) return s;
+      NodeDef send_def;
+      NodeDefBuilder builder(
+          strings::StrCat("outside_compilation_", oc_subgraph_name, "_send"),
+          kSendToHostOp);
+      builder.Attr("dtypes", dtypes);
+      builder.Input(inputs);
+      Status s = builder.Finalize(&send_def);
+      if (!s.ok()) return s;
+
+      oc_subgraph.send_to_host = graph_->AddNode(send_def, &s);
+      if (!s.ok()) return s;
+
+      // Connect the _SendToHost node to its producers in the subgraph.
+      for (auto& input_src : oc_subgraph.inputs) {
+        const Node* src_node = input_src.first.node;
+        Node* src_image = node_images.at(src_node);
+        int src_slot = input_src.first.slot;
+        int input_index = input_src.second;
+        graph_->AddEdge(src_image, src_slot, oc_subgraph.send_to_host,
+                        input_index);
+      }
 
-        dst_subgraph.args.push_back(arg);
+      // Connect the _SendToHost node to its control edge producers in the
+      // subgraph.
+      for (const auto& src_node : oc_subgraph.control_inputs) {
+        Node* src_image = node_images.at(src_node);
+        graph_->AddControlEdge(src_image, oc_subgraph.send_to_host);
       }
-      // Add an edge from the _Arg node to 'dst' in the subgraph.
-      dst_subgraph.args_by_dst[NodeSlot(edge->dst(), edge->dst_input())] =
-          arg_index;
-      dst_subgraph.graph->AddEdge(dst_subgraph.args[arg_index], 0, dst_image,
-                                  edge->dst_input());
     }
   }
 
-  for (auto& entry : subgraphs_) {
-    FixupSourceAndSinkEdges(entry.second.graph.get());
-  }
-
-  return s;
+  return Status::OK();
 }
 
-Status Encapsulator::BuildFunctionDefs(
-    const RewriteSubgraphFn& rewrite_subgraph_fn, bool reuse_existing_functions,
-    FunctionLibraryDefinition* library) {
-  // For each subgraph, build a FunctionDef.
-  for (auto& subgraph_entry : subgraphs_) {
-    string name = subgraph_entry.first;
-    Subgraph& subgraph = subgraph_entry.second;
-
-    subgraph.call_node_def.set_op(name);
-    subgraph.call_node_def.set_name(name);
-    subgraph.call_node_def.set_device(subgraph.device);
-
-    if (rewrite_subgraph_fn) {
-      // Initialize the input and output permutations to the identity.
-      std::vector<int> input_permutation(subgraph.args_by_src.size());
-      std::iota(input_permutation.begin(), input_permutation.end(), 0);
-      std::vector<int> output_permutation(subgraph.results.size());
-      std::iota(output_permutation.begin(), output_permutation.end(), 0);
-
-      TF_RETURN_IF_ERROR(
-          rewrite_subgraph_fn(&subgraph.graph, &input_permutation,
-                              &output_permutation, &subgraph.call_node_def));
-
-      // Apply the input/output permutations to the 'args_by_...' and 'results'
-      // mappings in 'subgraph', so when we build edges in BuildOutputGraph() we
-      // connect them to the right input/output positions.
-      if (input_permutation.size() != subgraph.args_by_src.size()) {
-        return errors::InvalidArgument("Input permutation has incorrect size.");
+Status Encapsulator::Subgraph::AddRecvsFromOutsideCompilation(
+    const std::unordered_map<const Node*, Node*>& node_images) {
+  for (auto& oc_subgraph_iter : outside_compilation_subgraphs_) {
+    const string& oc_subgraph_name = oc_subgraph_iter.first;
+    OutsideCompilationSubgraph& oc_subgraph = oc_subgraph_iter.second;
+    if (!oc_subgraph.outputs_by_src.empty() ||
+        !oc_subgraph.control_outputs.empty()) {
+      // Build a _RecvFromHost node producing all the outputs of the appropriate
+      // types.
+      std::vector<DataType> dtypes(oc_subgraph.outputs_by_src.size(),
+                                   DT_INVALID);
+
+      for (const auto& output : oc_subgraph.outputs_by_src) {
+        DataType dtype = output.first.dtype;
+        int output_index = output.second;
+        dtypes[output_index] = dtype;
       }
-      if (output_permutation.size() != subgraph.results.size()) {
-        return errors::InvalidArgument(
-            "Output permutation has incorrect size.");
-      }
-      for (auto& arg : subgraph.args_by_src) {
-        arg.second = input_permutation[arg.second];
-      }
-      for (auto& arg : subgraph.args_by_dst) {
-        arg.second = input_permutation[arg.second];
+
+      NodeDef recv_def;
+      NodeDefBuilder builder(
+          strings::StrCat("outside_compilation_", oc_subgraph_name, "_recv"),
+          kRecvFromHostOp);
+      builder.Attr("dtypes", dtypes);
+      Status s = builder.Finalize(&recv_def);
+      if (!s.ok()) return s;
+
+      Node* recv = graph_->AddNode(recv_def, &s);
+      if (!s.ok()) return s;
+
+      // Connect the consumers in the subgraph to the _RecvFromHost node.
+      for (const auto& output : oc_subgraph.outputs_by_dst) {
+        const Node* dst_node = output.first.node;
+        Node* dst_image = node_images.at(dst_node);
+        int dst_slot = output.first.slot;
+        int output_index = output.second;
+
+        graph_->AddEdge(recv, output_index, dst_image, dst_slot);
       }
-      for (auto& result : subgraph.results) {
-        result.second = output_permutation[result.second];
+
+      // Connect the control edge consumers in the subgraph to the _RecvFromHost
+      // node.
+      for (const auto& dst_node : oc_subgraph.control_outputs) {
+        Node* dst_image = node_images.at(dst_node);
+        graph_->AddControlEdge(recv, dst_image);
       }
 
-      name = subgraph.call_node_def.op();
+      // Add a control edge in the subgraph so that the _SendToHost node, if
+      // any, is compiled before the _RecvFromHost node.
+      if (oc_subgraph.send_to_host != nullptr) {
+        graph_->AddControlEdge(oc_subgraph.send_to_host, recv);
+      }
     }
+  }
+
+  return Status::OK();
+}
+
+Status Encapsulator::Subgraph::MakeSequencingNode(const string& subgraph_name,
+                                                  Graph* graph_out) {
+  if (sequencer_ == nullptr) {
+    NodeDef seq_def;
+    NodeDefBuilder builder(strings::StrCat(subgraph_name, "_sequencer"),
+                           "NoOp");
+    Status s = builder.Finalize(&seq_def);
+    if (!s.ok()) return s;
 
-    FunctionDef fdef;
-    TF_RETURN_IF_ERROR(GraphToFunctionDef(*subgraph.graph, name, &fdef));
+    sequencer_ = graph_out->AddNode(seq_def, &s);
+    if (!s.ok()) return s;
+    sequencer_->set_assigned_device_name(device_);
+  }
+  return Status::OK();
+}
 
-    if (VLOG_IS_ON(1)) {
-      VLOG(2) << "Build function def " << name;
-      dump_graph::DumpGraphToFile(
-          strings::StrCat("encapsulate_fdef_graph_", name), *subgraph.graph,
-          library);
-      dump_graph::DumpFunctionDefToFile(
-          strings::StrCat("encapsulate_fdef_", name), fdef);
+void Encapsulator::Subgraph::ConnectSequencerToOutputs(Graph* graph_out) {
+  if (sequencer_ != nullptr) {
+    std::unordered_set<Node*> output_dependencies;
+    for (Node* node : call_node_outputs_->out_nodes()) {
+      output_dependencies.insert(node);
+    }
+    for (Node* node : output_dependencies) {
+      graph_out->AddControlEdge(sequencer_, node);
     }
+  }
+}
 
-    if (!reuse_existing_functions || library->Find(name) == nullptr) {
-      TF_RETURN_IF_ERROR(library->AddFunctionDef(fdef));
+Status Encapsulator::Subgraph::BuildFunctionDef(
+    const string& name_in, const RewriteSubgraphFn& rewrite_subgraph_fn,
+    bool reuse_existing_functions, FunctionLibraryDefinition* library) {
+  // name_in is copied here because name may be modified below if
+  // rewrite_subgraph_fn is true.
+  string name = name_in;
+  call_node_def_.set_op(name);
+  call_node_def_.set_name(name);
+  call_node_def_.set_device(device_);
+
+  if (rewrite_subgraph_fn) {
+    // Initialize the input and output permutations to the identity.
+    std::vector<int> input_permutation(args_by_src_.size());
+    std::iota(input_permutation.begin(), input_permutation.end(), 0);
+    std::vector<int> output_permutation(results_.size());
+    std::iota(output_permutation.begin(), output_permutation.end(), 0);
+
+    TF_RETURN_IF_ERROR(rewrite_subgraph_fn(
+        &graph_, &input_permutation, &output_permutation, &call_node_def_));
+
+    // Apply the input/output permutations to the 'args_by_...' and 'results_'
+    // mappings, so when we build edges in BuildOutputGraph() we
+    // connect them to the right input/output positions.
+    if (input_permutation.size() != args_by_src_.size()) {
+      return errors::InvalidArgument("Input permutation has incorrect size.");
+    }
+    if (output_permutation.size() != results_.size()) {
+      return errors::InvalidArgument("Output permutation has incorrect size.");
+    }
+    for (auto& arg : args_by_src_) {
+      arg.second = input_permutation[arg.second];
+    }
+    for (auto& arg : args_by_dst_) {
+      arg.second = input_permutation[arg.second];
     }
+    for (auto& result : results_) {
+      result.second = output_permutation[result.second];
+    }
+
+    name = call_node_def_.op();
+  }
+
+  FunctionDef fdef;
+  TF_RETURN_IF_ERROR(GraphToFunctionDef(*graph_, name, &fdef));
+
+  if (VLOG_IS_ON(1)) {
+    VLOG(2) << "Build function def " << name;
+    dump_graph::DumpGraphToFile(
+        strings::StrCat("encapsulate_fdef_graph_", name), *graph_, library);
+    dump_graph::DumpFunctionDefToFile(
+        strings::StrCat("encapsulate_fdef_", name), fdef);
+  }
+
+  if (!reuse_existing_functions || library->Find(name) == nullptr) {
+    TF_RETURN_IF_ERROR(library->AddFunctionDef(fdef));
   }
   return Status::OK();
 }
 
-Status Encapsulator::BuildParallelCheckOp(
+Status Encapsulator::Subgraph::BuildParallelCheckOp(
     const std::unordered_map<const Node*, Node*>& node_images,
-    const Encapsulator::Subgraph& subgraph, Graph* graph_out,
-    Node** parallel_check_op) {
+    Graph* graph_out) {
   // Build an index mapping output positions to node/slot pairs in the
   // original graph.
-  std::vector<NodeSlot> results_by_num(subgraph.results.size());
-  for (const auto& entry : subgraph.results) {
+  std::vector<NodeSlot> results_by_num(results_.size());
+  for (const auto& entry : results_) {
     results_by_num[entry.second] = entry.first;
   }
 
@@ -386,22 +903,22 @@ Status Encapsulator::BuildParallelCheckOp(
     expected_outputs[i] =
         NodeDefBuilder::NodeOut(node_images.at(node_slot.node)->name(),
                                 node_slot.slot, result_dtypes[i]);
-    actual_outputs[i] = NodeDefBuilder::NodeOut(subgraph.call_node_def.name(),
-                                                i, result_dtypes[i]);
+    actual_outputs[i] =
+        NodeDefBuilder::NodeOut(call_node_def_.name(), i, result_dtypes[i]);
   }
   // Assign the parallel check op to a CPU on the same task as the cluster it is
   // checking.
   string device, dummy;
   if (!DeviceNameUtils::SplitDeviceName(
-          subgraph.call_node_inputs->assigned_device_name(), &device, &dummy)) {
+          call_node_inputs_->assigned_device_name(), &device, &dummy)) {
     return errors::InvalidArgument("Could not parse device name");
   }
   strings::StrAppend(&device, "/cpu:0");
 
   NodeDef check_def;
   TF_RETURN_IF_ERROR(
-      NodeDefBuilder(graph_out->NewName(strings::StrCat(
-                         subgraph.call_node_def.name(), "_parallel_check")),
+      NodeDefBuilder(graph_out->NewName(strings::StrCat(call_node_def_.name(),
+                                                        "_parallel_check")),
                      "ParallelCheck")
           .Device(device)
           .Attr("T", result_dtypes)
@@ -421,65 +938,548 @@ Status Encapsulator::BuildParallelCheckOp(
     const NodeSlot& node_slot = results_by_num[i];
     graph_out->AddEdge(node_images.at(node_slot.node), node_slot.slot, check_op,
                        i);
-    graph_out->AddEdge(subgraph.call_node_inputs, i, check_op, num_results + i);
+    graph_out->AddEdge(call_node_inputs_, i, check_op, num_results + i);
   }
 
-  *parallel_check_op = check_op;
+  call_node_outputs_ = check_op;
   return Status::OK();
 }
 
-Status Encapsulator::BuildOutputGraph(bool parallel_checking,
-                                      Graph* graph_out) {
+Status Encapsulator::Subgraph::AddFunctionCallNode(
+    const std::unordered_map<const Node*, Node*>& node_images,
+    bool parallel_checking, Graph* graph_out) {
   Status s;
+  call_node_inputs_ = graph_out->AddNode(call_node_def_, &s);
+  if (!s.ok()) return s;
 
-  // Map from nodes in the input graph to nodes in the output graph.
+  // Copy the assigned device and the key_annotation over.
+  call_node_inputs_->set_assigned_device_name(device_);
+  call_node_outputs_ = call_node_inputs_;
+
+  if (parallel_checking) {
+    TF_RETURN_IF_ERROR(BuildParallelCheckOp(node_images, graph_out));
+  }
+  return Status::OK();
+}
+
+Status Encapsulator::Subgraph::AddRecvAtHostNode(
+    const string& subgraph_name, const string& oc_subgraph_name,
+    OutsideCompilationSubgraph* oc_subgraph, Graph* graph_out) {
+  std::vector<DataType> dtypes(oc_subgraph->inputs.size(), DT_INVALID);
+
+  for (const auto& input : oc_subgraph->inputs) {
+    const Node* src_node = input.first.node;
+    int src_slot = input.first.slot;
+    int input_index = input.second;
+
+    DataType dtype = src_node->output_type(src_slot);
+    dtypes[input_index] = dtype;
+  }
+
+  NodeDef recv_def;
+  NodeDefBuilder builder(strings::StrCat("outside_compilation_", subgraph_name,
+                                         "_", oc_subgraph_name, "_recv"),
+                         kRecvAtHostOp);
+  builder.Attr("dtypes", dtypes);
+  Status s = builder.Finalize(&recv_def);
+  if (!s.ok()) return s;
+
+  oc_subgraph->recv_at_host = graph_out->AddNode(recv_def, &s);
+  if (!s.ok()) return s;
+  oc_subgraph->recv_at_host->set_assigned_device_name(device_);
+
+  // Add a control dependency forcing the RecvAtHost to run before the subgraph
+  // completes. This has no effect on execution order but prevents the
+  // RecvAtHost being pruned.
+  TF_RETURN_IF_ERROR(MakeSequencingNode(subgraph_name, graph_out));
+  graph_out->AddControlEdge(oc_subgraph->recv_at_host, sequencer_);
+
+  return Status::OK();
+}
+
+Status Encapsulator::Subgraph::AddSendFromHostNode(
+    const std::unordered_map<const Node*, Node*>& node_images,
+    const string& subgraph_name, const string& oc_subgraph_name,
+    OutsideCompilationSubgraph* oc_subgraph, Graph* graph_out) {
+  std::vector<DataType> dtypes(oc_subgraph->outputs_by_src.size(), DT_INVALID);
+  std::vector<NodeDefBuilder::NodeOut> inputs(
+      oc_subgraph->outputs_by_src.size());
+
+  for (const auto& output : oc_subgraph->outputs_by_src) {
+    const Node* src_node = output.first.node;
+    Node* src_image = node_images.at(src_node);
+    int src_slot = output.first.slot;
+    int output_index = output.second;
+
+    DataType dtype = src_node->output_type(src_slot);
+    dtypes[output_index] = dtype;
+    inputs[output_index].Reset(src_image->name(), src_slot, dtype);
+  }
+
+  NodeDef send_def;
+  NodeDefBuilder builder(strings::StrCat("outside_compilation_", subgraph_name,
+                                         "_", oc_subgraph_name, "_send"),
+                         kSendFromHostOp);
+  builder.Attr("dtypes", dtypes);
+  builder.Input(inputs);
+  Status s = builder.Finalize(&send_def);
+  if (!s.ok()) return s;
+
+  oc_subgraph->send_from_host = graph_out->AddNode(send_def, &s);
+  if (!s.ok()) return s;
+  oc_subgraph->send_from_host->set_assigned_device_name(device_);
+
+  // Add a control dependency forcing the SendFromHost to run before the
+  // subgraph completes. This has no effect on execution order but prevents the
+  // RecvAtHost being pruned.
+  TF_RETURN_IF_ERROR(MakeSequencingNode(subgraph_name, graph_out));
+  graph_out->AddControlEdge(oc_subgraph->send_from_host, sequencer_);
+
+  return Status::OK();
+}
+
+Status Encapsulator::Subgraph::AddOutsideCompilationHostIONodes(
+    const string& subgraph_name,
+    const std::unordered_map<const Node*, Node*>& node_images,
+    Graph* graph_out) {
+  for (auto& outside_compilation_subgraph_entry :
+       outside_compilation_subgraphs_) {
+    const string& oc_name = outside_compilation_subgraph_entry.first;
+    OutsideCompilationSubgraph& oc_subgraph =
+        outside_compilation_subgraph_entry.second;
+
+    if (!oc_subgraph.inputs.empty() || !oc_subgraph.control_inputs.empty()) {
+      TF_RETURN_IF_ERROR(
+          AddRecvAtHostNode(subgraph_name, oc_name, &oc_subgraph, graph_out));
+    }
+
+    if (!oc_subgraph.outputs_by_src.empty() ||
+        !oc_subgraph.control_outputs.empty()) {
+      TF_RETURN_IF_ERROR(AddSendFromHostNode(node_images, subgraph_name,
+                                             oc_name, &oc_subgraph, graph_out));
+    }
+  }
+  return Status::OK();
+}
+
+Status Encapsulator::GetFunctionNameAttr(
+    Node const* node, string* attr, string* outside_compilation_attr) const {
+  Status s = GetNodeAttr(node->attrs(), group_attribute_, attr);
+  if (s.code() == error::Code::NOT_FOUND) {
+    // Return empty attr if there's no group_attribute.
+    attr->clear();
+  } else {
+    TF_RETURN_IF_ERROR(s);
+  }
+  bool has_group_attr = s.ok();
+  s = GetNodeAttr(node->attrs(), outside_compilation_attribute_,
+                  outside_compilation_attr);
+  if (s.code() == error::Code::NOT_FOUND) {
+    // Return empty attr if there's no outside_compilation attribute.
+    outside_compilation_attr->clear();
+  } else {
+    TF_RETURN_IF_ERROR(s);
+    if (!has_group_attr) {
+      return errors::InvalidArgument(
+          "Node ", node->name(), " has ", outside_compilation_attribute_,
+          " attribute but no ", group_attribute_, " attribute.");
+    }
+  }
+  return Status::OK();
+}
+
+bool IsInSubgraph(const string& func_id, const string& outside_compilation_id) {
+  return !func_id.empty() && outside_compilation_id.empty();
+}
+
+Status Encapsulator::CopySubgraphNodes(
+    std::unordered_map<const Node*, Node*>* node_images) {
+  for (Node* node : graph_in_->op_nodes()) {
+    string func_id;
+    string outside_compilation_id;
+    TF_RETURN_IF_ERROR(
+        GetFunctionNameAttr(node, &func_id, &outside_compilation_id));
+    if (!IsInSubgraph(func_id, outside_compilation_id)) continue;
+
+    Subgraph& subgraph = subgraphs_[func_id];
+    Node* image = subgraph.MakeNodeImage(graph_in_, node);
+    image->ClearAttr(group_attribute_);
+    (*node_images)[node] = image;
+  }
+  return Status::OK();
+}
+
+Status Encapsulator::CopySubgraphEdges(
+    const std::unordered_map<const Node*, Node*>& node_images,
+    std::vector<std::pair<const Node*, Node*>>* src_arg_pairs) {
+  for (const Edge* edge : graph_in_->edges()) {
+    string src_func_id;
+    string src_outside_compilation_id;
+    TF_RETURN_IF_ERROR(GetFunctionNameAttr(edge->src(), &src_func_id,
+                                           &src_outside_compilation_id));
+    string dst_func_id;
+    string dst_outside_compilation_id;
+    TF_RETURN_IF_ERROR(GetFunctionNameAttr(edge->dst(), &dst_func_id,
+                                           &dst_outside_compilation_id));
+    Node* src_image = gtl::FindWithDefault(node_images, edge->src(), nullptr);
+    Node* dst_image = gtl::FindWithDefault(node_images, edge->dst(), nullptr);
+
+    // Copy edges that are local to a subgraph.
+    if (IsInSubgraph(src_func_id, src_outside_compilation_id) &&
+        IsInSubgraph(dst_func_id, dst_outside_compilation_id) &&
+        src_func_id == dst_func_id) {
+      Graph* g = subgraphs_[src_func_id].GetGraph();
+      if (edge->IsControlEdge()) {
+        g->AddControlEdge(src_image, dst_image);
+      } else {
+        g->AddEdge(src_image, edge->src_output(), dst_image, edge->dst_input());
+      }
+      continue;
+    }
+
+    // Record 'src' as an output of its subgraph, if applicable.
+    if (IsInSubgraph(src_func_id, src_outside_compilation_id)) {
+      if (!edge->IsControlEdge()) {
+        DataType dtype = edge->src()->output_type(edge->src_output());
+        if (IsRefType(dtype)) {
+          return errors::InvalidArgument(
+              "Ref Tensors (e.g., Variables) are not supported as results: "
+              "tensor ",
+              edge->src()->name(), ":", edge->src_output());
+        }
+      }
+
+      Subgraph& src_subgraph = subgraphs_[src_func_id];
+      if (src_func_id == dst_func_id) {
+        // src is in the subgraph and dst is outside_compilation in the same
+        // subgraph.
+        src_subgraph.RecordOutsideCompilationInputOrControl(
+            dst_outside_compilation_id, edge);
+      } else {
+        // Ignore control edges leaving the subgraph. We will lift them onto the
+        // enclosing call operators in BuildOutputGraph().
+        if (!edge->IsControlEdge()) {
+          TF_RETURN_IF_ERROR(src_subgraph.RecordResult(edge, node_images));
+        }
+      }
+    }
+
+    // Record 'dst' as an input of its subgraph, if applicable.
+    if (IsInSubgraph(dst_func_id, dst_outside_compilation_id)) {
+      // Look at the type of the destination not the source, since Ref output
+      // Tensors can be automatically cast to non-Ref Tensors at the
+      // destination.
+      if (!edge->IsControlEdge()) {
+        DataType dtype = edge->dst()->input_type(edge->dst_input());
+        if (IsRefType(dtype)) {
+          return errors::InvalidArgument(
+              "Ref Tensors (e.g., Variables) are not supported as args: "
+              "tensor ",
+              edge->src()->name(), ":", edge->src_output());
+        }
+      }
+
+      Subgraph& dst_subgraph = subgraphs_[dst_func_id];
+      if (src_func_id == dst_func_id) {
+        // dst is in the subgraph and src is outside_compilation in the same
+        // subgraph.
+        dst_subgraph.RecordOutsideCompilationOutputOrControl(
+            src_outside_compilation_id, edge);
+      } else {
+        // Ignore control edges entering the subgraph. We will lift them onto
+        // the enclosing call operators in BuildOutputGraph().
+        if (!edge->IsControlEdge()) {
+          TF_RETURN_IF_ERROR(
+              dst_subgraph.RecordArg(edge, node_images, src_arg_pairs));
+        }
+      }
+    }
+  }
+  return Status::OK();
+}
+
+Status Encapsulator::SplitIntoSubgraphs() {
+  Status s;
+
+  // Map from input graph nodes to subgraph nodes.
   std::unordered_map<const Node*, Node*> node_images;
 
-  // Copy all unmarked nodes to the output graph.
+  // Each entry of src_arg_pairs is a pair whose first element is a node in the
+  // original graph that has an output edge in the subgraph, and whose second
+  // element is the arg node in the subgraph that it sends to. The vector will
+  // be filled in below in AddArgs.
+  std::vector<std::pair<const Node*, Node*>> src_arg_pairs;
+
+  TF_RETURN_IF_ERROR(CopySubgraphNodes(&node_images));
+  TF_RETURN_IF_ERROR(CopySubgraphEdges(node_images, &src_arg_pairs));
+
+  // For each subgraph, add the nodes that deal with inputs and outputs its
+  // nested outside_compilation subgraphs. These could not be added earlier
+  // during CopySubgraphEdges since we need to discover all the types of the
+  // inputs and outputs for an outside_compilation subgraph before creating a
+  // single input and output node for it.
+  for (auto& entry : subgraphs_) {
+    Subgraph& subgraph = entry.second;
+    TF_RETURN_IF_ERROR(subgraph.AddSendsToOutsideCompilation(node_images));
+    TF_RETURN_IF_ERROR(subgraph.AddRecvsFromOutsideCompilation(node_images));
+  }
+
+  MarkGuaranteedConstants(*graph_in_, src_arg_pairs);
+
+  for (auto& entry : subgraphs_) {
+    Subgraph& subgraph = entry.second;
+    FixupSourceAndSinkEdges(subgraph.GetGraph());
+  }
+
+  return s;
+}
+
+Status Encapsulator::BuildFunctionDefs(
+    const RewriteSubgraphFn& rewrite_subgraph_fn, bool reuse_existing_functions,
+    FunctionLibraryDefinition* library) {
+  for (auto& subgraph_entry : subgraphs_) {
+    string name = subgraph_entry.first;
+    Subgraph& subgraph = subgraph_entry.second;
+    TF_RETURN_IF_ERROR(subgraph.BuildFunctionDef(
+        name, rewrite_subgraph_fn, reuse_existing_functions, library));
+  }
+  return Status::OK();
+}
+
+Status Encapsulator::CopyNodesToOutputGraph(
+    bool parallel_checking, Graph* graph_out,
+    std::unordered_map<const Node*, Node*>* node_images) {
   for (Node* node : graph_in_->op_nodes()) {
-    string func_id = GetFunctionNameAttr(node);
+    string func_id;
+    string outside_compilation_id;
+    TF_RETURN_IF_ERROR(
+        GetFunctionNameAttr(node, &func_id, &outside_compilation_id));
 
     // Don't copy nodes that going to be encapsulated, unless parallel checking
     // is enabled.
-    if (!func_id.empty() && !parallel_checking) continue;
+    if (IsInSubgraph(func_id, outside_compilation_id) && !parallel_checking)
+      continue;
 
     Node* image = graph_out->CopyNode(node);
-    node_images[node] = image;
+    if (!outside_compilation_id.empty()) {
+      if (parallel_checking) {
+        return errors::InvalidArgument(
+            "Parallel checking is not supported when outside_compilation "
+            "clusters are present.");
+      }
+      image->ClearAttr(group_attribute_);
+      image->ClearAttr(outside_compilation_attribute_);
+    }
+    (*node_images)[node] = image;
+  }
+  (*node_images)[graph_in_->source_node()] = graph_out->source_node();
+  (*node_images)[graph_in_->sink_node()] = graph_out->sink_node();
+  return Status::OK();
+}
+
+Status Encapsulator::AddFunctionCallNodes(
+    const std::unordered_map<const Node*, Node*>& node_images,
+    bool parallel_checking, Graph* graph_out) {
+  for (auto& subgraph_entry : subgraphs_) {
+    TF_RETURN_IF_ERROR(subgraph_entry.second.AddFunctionCallNode(
+        node_images, parallel_checking, graph_out));
   }
-  node_images[graph_in_->source_node()] = graph_out->source_node();
-  node_images[graph_in_->sink_node()] = graph_out->sink_node();
+  return Status::OK();
+}
 
-  // Add function call nodes for each subgraph.
+Status Encapsulator::AddOutsideCompilationHostIONodes(
+    const std::unordered_map<const Node*, Node*>& node_images,
+    Graph* graph_out) {
   for (auto& subgraph_entry : subgraphs_) {
+    const string& subgraph_name = subgraph_entry.first;
     Subgraph& subgraph = subgraph_entry.second;
+    TF_RETURN_IF_ERROR(subgraph.AddOutsideCompilationHostIONodes(
+        subgraph_name, node_images, graph_out));
+  }
+  return Status::OK();
+}
 
-    subgraph.call_node_inputs = graph_out->AddNode(subgraph.call_node_def, &s);
-    if (!s.ok()) return s;
+Status Encapsulator::FindOutputImageOfEdgeSrc(
+    const string& src_func_id, const string& src_outside_compilation_id,
+    const string& dst_func_id, const string& dst_outside_compilation_id,
+    const std::unordered_map<const Node*, Node*>& node_images,
+    const Node* original_src_node, Node** src_image) {
+  if (IsInSubgraph(src_func_id, src_outside_compilation_id)) {
+    if (dst_func_id == src_func_id) {
+      // The edge is from a subgraph to an outside_compilation cluster in the
+      // same subgraph so use the appropriate _RecvAtHost node in the output
+      // graph.
+      TF_RET_CHECK(!dst_outside_compilation_id.empty());
+      *src_image = subgraphs_.at(src_func_id)
+                       .GetRecvAtHostNode(dst_outside_compilation_id);
+    } else {
+      // The edge is from a subgraph to a regular node in the output graph so
+      // use the subgraph's call node output.
+      *src_image = subgraphs_.at(src_func_id).GetCallNodeForOutputs();
+    }
+  } else {
+    // The source of the edge is in the output graph so use the node image in
+    // the output graph.
+    *src_image = node_images.at(original_src_node);
+  }
+  return Status::OK();
+}
+
+int Encapsulator::FindOutputSlotOfEdgeSrc(
+    const string& src_func_id, const string& src_outside_compilation_id,
+    const string& dst_func_id, const string& dst_outside_compilation_id,
+    const Edge* edge) {
+  if (IsInSubgraph(src_func_id, src_outside_compilation_id)) {
+    const Subgraph& src_subgraph = subgraphs_.at(src_func_id);
+    if (src_func_id == dst_func_id) {
+      // 'src' is in a subgraph and 'dst' is outside_compilation in the same
+      // subgraph. Use the corresponding _RecvAtHost output instead.
+      return src_subgraph.GetRecvAtHostSlot(dst_outside_compilation_id, edge);
+    } else {
+      // 'src' is in a subgraph and 'dst' is a regular node in the output
+      // graph. Use the corresponding call output instead.
+      return src_subgraph.GetResultIndexForEdge(edge);
+    }
+  } else {
+    // The source of the edge is in the output graph so use the regular edge
+    // slot.
+    return edge->src_output();
+  }
+}
+
+Status Encapsulator::FindOutputImageOfEdgeDst(
+    const string& src_func_id, const string& src_outside_compilation_id,
+    const string& dst_func_id, const string& dst_outside_compilation_id,
+    const std::unordered_map<const Node*, Node*>& node_images,
+    const Node* original_dst_node, Node** dst_image) {
+  if (IsInSubgraph(dst_func_id, dst_outside_compilation_id)) {
+    if (src_func_id == dst_func_id) {
+      // The edge is to a subgraph from an outside_compilation cluster in the
+      // same subgraph so use the appropriate _SendFromHost node in the output
+      // graph.
+      TF_RET_CHECK(!src_outside_compilation_id.empty());
+      *dst_image = subgraphs_.at(dst_func_id)
+                       .GetSendFromHostNode(src_outside_compilation_id);
+    } else {
+      // The edge is to a subgraph from a regular node in the output graph so
+      // use the subgraph's call node input.
+      *dst_image = subgraphs_.at(dst_func_id).GetCallNodeForInputs();
+    }
+  } else {
+    // The destination of the edge is in the output graph so use the node image
+    // in the output graph.
+    *dst_image = node_images.at(original_dst_node);
+  }
+  return Status::OK();
+}
 
-    // Copy the assigned device and the key_annotation over.
-    subgraph.call_node_inputs->set_assigned_device_name(subgraph.device);
-    subgraph.call_node_outputs = subgraph.call_node_inputs;
+int Encapsulator::FindOutputSlotOfEdgeDst(
+    const string& src_func_id, const string& src_outside_compilation_id,
+    const string& dst_func_id, const string& dst_outside_compilation_id,
+    const Edge* edge) {
+  if (IsInSubgraph(dst_func_id, dst_outside_compilation_id)) {
+    const Subgraph& dst_subgraph = subgraphs_.at(dst_func_id);
+    if (dst_func_id == src_func_id) {
+      // 'dst' is in a subgraph and 'src' is outside_compilation in the same
+      // subgraph. Use the corresponding _SendFromHost input instead.
+      return dst_subgraph.GetSendFromHostSlot(src_outside_compilation_id, edge);
+    } else {
+      // 'dst' is in a subgraph and 'src' is a regular node in the output
+      // graph. Use the corresponding call input instead.
+      return dst_subgraph.GetArgIndexForEdge(edge);
+    }
+  } else {
+    // The destination of the edge is in the output graph so use the regular
+    // edge slot.
+    return edge->dst_input();
+  }
+}
 
+Status Encapsulator::CopyEdgeToOutputGraph(
+    const Edge* edge, const string& src_func_id,
+    const string& src_outside_compilation_id, const string& dst_func_id,
+    const string& dst_outside_compilation_id,
+    const std::unordered_map<const Node*, Node*>& node_images,
+    bool parallel_checking, Graph* graph_out,
+    std::unordered_set<std::pair<NodeSlot, NodeSlot>, NodeSlot::PairHasher>*
+        edges_added) {
+  Node* src_image;
+  TF_RETURN_IF_ERROR(FindOutputImageOfEdgeSrc(
+      src_func_id, src_outside_compilation_id, dst_func_id,
+      dst_outside_compilation_id, node_images, edge->src(), &src_image));
+  Node* dst_image;
+  TF_RETURN_IF_ERROR(FindOutputImageOfEdgeDst(
+      src_func_id, src_outside_compilation_id, dst_func_id,
+      dst_outside_compilation_id, node_images, edge->dst(), &dst_image));
+
+  // If this is a control edge then copy it and return. Lift control edges onto
+  // the enclosing call operator.
+  if (edge->IsControlEdge()) {
+    // Add the control edge, if we have not already added it, using the images
+    // determined above (potentially call operators or RecvAtHost/SendFromHost).
+    if (edges_added->emplace(NodeSlot(src_image, -1), NodeSlot(dst_image, -1))
+            .second) {
+      graph_out->AddControlEdge(src_image, dst_image);
+    }
+
+    // If parallel checking is enabled, also add a control edge to the
+    // corresponding parallel check op.
     if (parallel_checking) {
-      TF_RETURN_IF_ERROR(BuildParallelCheckOp(node_images, subgraph, graph_out,
-                                              &subgraph.call_node_outputs));
+      graph_out->AddControlEdge(src_image, node_images.at(edge->dst()));
     }
+    return Status::OK();
   }
 
+  int src_output =
+      FindOutputSlotOfEdgeSrc(src_func_id, src_outside_compilation_id,
+                              dst_func_id, dst_outside_compilation_id, edge);
+
+  int dst_input =
+      FindOutputSlotOfEdgeDst(src_func_id, src_outside_compilation_id,
+                              dst_func_id, dst_outside_compilation_id, edge);
+
+  if (IsInSubgraph(dst_func_id, dst_outside_compilation_id) &&
+      parallel_checking) {
+    // If we are parallel checking, also feed the tensor as an input to the
+    // corresponding parallel check subgraph.
+    graph_out->AddEdge(src_image, src_output, node_images.at(edge->dst()),
+                       edge->dst_input());
+  }
+
+  // Add the edge, if we have not already added it.
+  if (edges_added
+          ->emplace(NodeSlot(src_image, src_output),
+                    NodeSlot(dst_image, dst_input))
+          .second) {
+    graph_out->AddEdge(src_image, src_output, dst_image, dst_input);
+  }
+  return Status::OK();
+}
+
+Status Encapsulator::AddEdgesToOutputGraph(
+    const std::unordered_map<const Node*, Node*>& node_images,
+    bool parallel_checking, Graph* graph_out) {
   // Set of edges already added to the output graph, represented as (src, dst)
   // pairs. We use the set to deduplicate edges; multiple edges in the input
   // graph may map to one edge in the output graph.
   std::unordered_set<std::pair<NodeSlot, NodeSlot>, NodeSlot::PairHasher>
       edges_added;
 
-  // Add edges to the graph_out graph.
   for (const Edge* edge : graph_in_->edges()) {
-    string src_func_id = GetFunctionNameAttr(edge->src());
-    string dst_func_id = GetFunctionNameAttr(edge->dst());
+    string src_func_id;
+    string src_outside_compilation_id;
+    TF_RETURN_IF_ERROR(GetFunctionNameAttr(edge->src(), &src_func_id,
+                                           &src_outside_compilation_id));
+    string dst_func_id;
+    string dst_outside_compilation_id;
+    TF_RETURN_IF_ERROR(GetFunctionNameAttr(edge->dst(), &dst_func_id,
+                                           &dst_outside_compilation_id));
 
     // Ignore edges that are strictly contained within one subgraph, unless
     // we are constructing parallel check graphs.
-    if (!src_func_id.empty() && src_func_id == dst_func_id) {
+    if (IsInSubgraph(src_func_id, src_outside_compilation_id) &&
+        IsInSubgraph(dst_func_id, dst_outside_compilation_id) &&
+        src_func_id == dst_func_id) {
       if (parallel_checking) {
         Node* src_image = node_images.at(edge->src());
         Node* dst_image = node_images.at(edge->dst());
@@ -493,89 +1493,62 @@ Status Encapsulator::BuildOutputGraph(bool parallel_checking,
       continue;
     }
 
-    // We have an edge that crosses a cluster boundary.
-    Node* src_image = src_func_id.empty()
-                          ? node_images.at(edge->src())
-                          : subgraphs_.at(src_func_id).call_node_outputs;
-    Node* dst_image = dst_func_id.empty()
-                          ? node_images.at(edge->dst())
-                          : subgraphs_.at(dst_func_id).call_node_inputs;
-
-    // Copy control edges. Lift control edges onto the enclosing call operator.
-    if (edge->IsControlEdge()) {
-      // Add the control edge, if we have not already added it.
-      if (edges_added.emplace(NodeSlot(src_image, -1), NodeSlot(dst_image, -1))
-              .second) {
-        graph_out->AddControlEdge(src_image, dst_image);
-      }
-
-      // If parallel checking is enabled, also add a control edge to the
-      // corresponding parallel check op.
-      if (parallel_checking) {
-        graph_out->AddControlEdge(src_image, node_images.at(edge->dst()));
-      }
-      continue;
-    }
+    // We have an edge that crosses a cluster boundary or is entirely within the
+    // unclustered graph.
+    TF_RETURN_IF_ERROR(CopyEdgeToOutputGraph(
+        edge, src_func_id, src_outside_compilation_id, dst_func_id,
+        dst_outside_compilation_id, node_images, parallel_checking, graph_out,
+        &edges_added));
+  }
 
-    int src_output = edge->src_output();
-    if (!src_func_id.empty()) {
-      // 'src' is in a subgraph. Use the corresponding call output instead.
-      const Subgraph& src_subgraph = subgraphs_.at(src_func_id);
-      src_output =
-          src_subgraph.results.at(NodeSlot(edge->src(), edge->src_output()));
-    }
+  for (auto& subgraph_entry : subgraphs_) {
+    Subgraph& subgraph = subgraph_entry.second;
+    subgraph.ConnectSequencerToOutputs(graph_out);
+  }
 
-    int dst_input = edge->dst_input();
+  return Status::OK();
+}
 
-    if (!dst_func_id.empty()) {
-      // 'dst' is in a subgraph. Use the corresponding call input instead.
-      const Subgraph& dst_subgraph = subgraphs_.at(dst_func_id);
-      dst_input =
-          dst_subgraph.args_by_dst.at(NodeSlot(edge->dst(), edge->dst_input()));
+Status Encapsulator::BuildOutputGraph(bool parallel_checking,
+                                      Graph* graph_out) {
+  // Map from nodes in the input graph to nodes in the output graph.
+  std::unordered_map<const Node*, Node*> node_images;
 
-      // If we are parallel checking, also feed the tensor as an input to the
-      // corresponding parallel check subgraph.
-      if (parallel_checking) {
-        graph_out->AddEdge(src_image, src_output, node_images.at(edge->dst()),
-                           edge->dst_input());
-      }
-    }
-    // Add the edge, if we have not already added it.
-    if (edges_added
-            .emplace(NodeSlot(src_image, src_output),
-                     NodeSlot(dst_image, dst_input))
-            .second) {
-      graph_out->AddEdge(src_image, src_output, dst_image, dst_input);
-    }
-  }
+  TF_RETURN_IF_ERROR(
+      CopyNodesToOutputGraph(parallel_checking, graph_out, &node_images));
+  TF_RETURN_IF_ERROR(
+      AddFunctionCallNodes(node_images, parallel_checking, graph_out));
+  TF_RETURN_IF_ERROR(AddOutsideCompilationHostIONodes(node_images, graph_out));
+  TF_RETURN_IF_ERROR(
+      AddEdgesToOutputGraph(node_images, parallel_checking, graph_out));
 
-  return s;
+  return Status::OK();
 }
 
 }  // anonymous namespace
 
 Status EncapsulateSubgraphsInFunctions(
-    string group_attribute, const Graph& graph_in,
-    const RewriteSubgraphFn& rewrite_subgraph_fn, bool parallel_checking,
-    bool reuse_existing_functions, std::unique_ptr<Graph>* graph_out,
-    FunctionLibraryDefinition* library) {
+    string group_attribute, string outside_compilation_attribute,
+    const Graph& graph_in, const RewriteSubgraphFn& rewrite_subgraph_fn,
+    bool parallel_checking, bool reuse_existing_functions,
+    std::unique_ptr<Graph>* graph_out, FunctionLibraryDefinition* library) {
   Status s;
 
-  Encapsulator encapsulator(std::move(group_attribute), &graph_in);
-  s = encapsulator.SplitIntoSubgraphs();
-  if (!s.ok()) return s;
+  Encapsulator encapsulator(std::move(group_attribute),
+                            std::move(outside_compilation_attribute),
+                            &graph_in);
+  TF_RETURN_IF_ERROR(encapsulator.SplitIntoSubgraphs());
 
-  s = encapsulator.BuildFunctionDefs(rewrite_subgraph_fn,
-                                     reuse_existing_functions, library);
-  if (!s.ok()) return s;
+  TF_RETURN_IF_ERROR(encapsulator.BuildFunctionDefs(
+      rewrite_subgraph_fn, reuse_existing_functions, library));
 
   std::unique_ptr<Graph> out(new Graph(library));
   out->set_versions(graph_in.versions());
-  s = encapsulator.BuildOutputGraph(parallel_checking, out.get());
-  if (!s.ok()) return s;
+  TF_RETURN_IF_ERROR(
+      encapsulator.BuildOutputGraph(parallel_checking, out.get()));
 
   *graph_out = std::move(out);
-  return s;
+  return Status::OK();
 }
 
 // Finds the types of the _Arg nodes, indexed by position.
@@ -690,9 +1663,9 @@ Status EncapsulateSubgraphsPass::Run(
   };
 
   TF_RETURN_IF_ERROR(EncapsulateSubgraphsInFunctions(
-      kXlaClusterAttr, **options.graph, rewrite_subgraph,
-      flags->tf_xla_parallel_checking, /*reuse_existing_functions=*/false,
-      &graph_out, library));
+      kXlaClusterAttr, kXlaOutsideCompilationAttr, **options.graph,
+      rewrite_subgraph, flags->tf_xla_parallel_checking,
+      /*reuse_existing_functions=*/false, &graph_out, library));
 
   if (VLOG_IS_ON(1)) {
     dump_graph::DumpGraphToFile("after_encapsulate_subgraphs", *graph_out,
diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h
index b0987f76c91ed48df52fab303ea6052ebd8fd336..34be4409a381197d2191e083727aa8d48ab8cd63 100644
--- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h
+++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h
@@ -48,6 +48,16 @@ typedef std::function<Status(
 // 'group_attribute' must be a string valued-attribute that names the new
 // functions to introduce.
 //
+// 'outside_compilation_attribute' must be a string-valued attribute that is
+// used to tag nodes within a subgraph to be part of an 'outside_compilation'
+// cluster within the subgraph. A cluster is formed from the set of nodes with
+// the same value of outside_compilation_subgraph and group_attribute. The nodes
+// in an outside_compilation cluster are left in the original graph. Edges
+// crossing from the subgraph to an outside_compilation cluster nested in the
+// subgraph are lifted into a SendToHost/RecvAtHost pair of nodes, and edges
+// crossing from an outside_compilation cluster into its enclosing subgraph are
+// lifted into a SendFromHost/RecvFromHost pair of nodes.
+//
 // If 'rewrite_subgraph_fn' is set, it is applied to each subgraph before
 // function conversion.
 //
@@ -64,10 +74,10 @@ typedef std::function<Status(
 // dep from B. Originally D must run after C, post-transformation this
 // dependency is lost.
 Status EncapsulateSubgraphsInFunctions(
-    string group_attribute, const Graph& graph_in,
-    const RewriteSubgraphFn& rewrite_subgraph_fn, bool parallel_checking,
-    bool reuse_existing_functions, std::unique_ptr<Graph>* graph_out,
-    FunctionLibraryDefinition* library);
+    string group_attribute, string outside_compilation_attribute,
+    const Graph& graph_in, const RewriteSubgraphFn& rewrite_subgraph_fn,
+    bool parallel_checking, bool reuse_existing_functions,
+    std::unique_ptr<Graph>* graph_out, FunctionLibraryDefinition* library);
 
 // The attribute that marks function calls produced by the encapsulate
 // subgraphs pass and that should in turn be compiled via _XlaLaunch operators.
diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc
index 4a1dbaf05dc7824835f3567c6abcf48222720230..b100861d5e9c04a8f9d32d486e0ee7252b79c62b 100644
--- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc
+++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc
@@ -36,7 +36,7 @@ bool EqualFunctionDef(const FunctionDef& a, const FunctionDef& b,
     if (diff) {
       *diff = strings::StrCat("Definition mismatch for function ",
                               a.signature().name(), ", expected:\n",
-                              a.DebugString());
+                              a.DebugString(), "\ngot:\n", b.DebugString());
     }
     return false;
   }
@@ -82,6 +82,24 @@ bool EqualFunctionDefLibrary(const FunctionDefLibrary& expected,
         << diff << "\nActual: " << actual.DebugString();          \
   } while (false)
 
+// TODO(misard): remove these fake registrations once there are real Ops to be
+// compiled.
+REGISTER_OP("_XlaSendToHost")
+    .Input("input: dtypes")
+    .Attr("dtypes: list(type) >= 0");
+
+REGISTER_OP("_XlaRecvFromHost")
+    .Output("output: dtypes")
+    .Attr("dtypes: list(type) >= 0");
+
+REGISTER_OP("_XlaSendFromHost")
+    .Input("input: dtypes")
+    .Attr("dtypes: list(type) >= 0");
+
+REGISTER_OP("_XlaRecvAtHost")
+    .Output("output: dtypes")
+    .Attr("dtypes: list(type) >= 0");
+
 REGISTER_OP("InputTest").Output("o: float");
 
 REGISTER_OP("UnaryTest").Input("a: float").Output("o: float");
@@ -98,10 +116,32 @@ REGISTER_OP("AddNLikeTest")
     .SetIsCommutative()
     .SetIsAggregate();
 
+Node* NoOp(const GraphDefBuilder::Options& opts) {
+  return ops::SourceOp("NoOp", opts);
+}
+
 Node* Input(const GraphDefBuilder::Options& opts) {
   return ops::SourceOp("InputTest", opts);
 }
 
+Node* RecvAtHost(const gtl::ArraySlice<DataType>& dtypes,
+                 const GraphDefBuilder::Options& opts) {
+  if (opts.HaveError()) return nullptr;
+  NodeBuilder node_builder(opts.GetNameForOp("_XlaRecvAtHost"),
+                           "_XlaRecvAtHost", opts.op_registry());
+  return opts.WithAttr("dtypes", dtypes).FinalizeBuilder(&node_builder);
+}
+
+Node* SendFromHost(const std::vector<ops::NodeOut>& inputs,
+                   const gtl::ArraySlice<DataType>& dtypes,
+                   const GraphDefBuilder::Options& opts) {
+  if (opts.HaveError()) return nullptr;
+  NodeBuilder node_builder(opts.GetNameForOp("_XlaSendFromHost"),
+                           "_XlaSendFromHost", opts.op_registry());
+  node_builder.Input(inputs);
+  return opts.WithAttr("dtypes", dtypes).FinalizeBuilder(&node_builder);
+}
+
 Node* Unary(ops::NodeOut a, const GraphDefBuilder::Options& opts) {
   return ops::UnaryOp("UnaryTest", std::move(a), opts);
 }
@@ -145,7 +185,7 @@ Status Encapsulate(GraphDef* graphdef, FunctionDefLibrary* library) {
   if (!s.ok()) return s;
 
   std::unique_ptr<Graph> graph_out;
-  s = EncapsulateSubgraphsInFunctions("_encapsulate", *graph,
+  s = EncapsulateSubgraphsInFunctions("_encapsulate", "_outside", *graph,
                                       /*rewrite_subgraph_fn=*/{},
                                       /*parallel_checking=*/false,
                                       /*reuse_existing_functions=*/false,
@@ -178,6 +218,7 @@ TEST(EncapsulateSubgraphsTest, NoFunctions) {
   FunctionDefLibrary library_out = library_in;
   TF_EXPECT_OK(Encapsulate(&graphdef_out, &library_out));
 
+  // If there are no marked nodes, funcification should be a no-op.
   TF_EXPECT_GRAPH_EQ(graphdef_in, graphdef_out);
   TF_EXPECT_FUNCTIONDEFLIBRARY_EQ(library_in, library_out);
 }
@@ -230,7 +271,6 @@ TEST(EncapsulateSubgraphsTest, OneFunction) {
     TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected));
   }
 
-  // If there are no marked nodes, funcification should be a no-op.
   TF_EXPECT_GRAPH_EQ(graphdef_expected, graphdef);
   TF_EXPECT_FUNCTIONDEFLIBRARY_EQ(library_expected, library);
 }
@@ -342,9 +382,9 @@ TEST(EncapsulateSubgraphsTest, InputDeduplication) {
   FunctionLibraryDefinition library(OpRegistry::Global(), {});
   std::unique_ptr<Graph> graph;
   TF_ASSERT_OK(EncapsulateSubgraphsInFunctions(
-      "_cluster", graph_before_encapsulation, /*rewrite_subgraph_fn=*/{},
-      /*parallel_checking=*/false, /*reuse_existing_functions=*/false, &graph,
-      &library));
+      "_cluster", "_outside", graph_before_encapsulation,
+      /*rewrite_subgraph_fn=*/{}, /*parallel_checking=*/false,
+      /*reuse_existing_functions=*/false, &graph, &library));
 
   std::vector<string> expected_nodes = {"cluster1", "cluster2", "mul", "x"};
   EXPECT_EQ(expected_nodes, GraphNodes(*graph));
@@ -374,9 +414,9 @@ TEST(EncapsulateSubgraphsTest, ParallelChecking) {
   FunctionLibraryDefinition library(OpRegistry::Global(), {});
   std::unique_ptr<Graph> graph;
   TF_ASSERT_OK(EncapsulateSubgraphsInFunctions(
-      "_cluster", graph_before_encapsulation, /*rewrite_subgraph_fn=*/{},
-      /*parallel_checking=*/true, /*reuse_existing_functions=*/false, &graph,
-      &library));
+      "_cluster", "_outside", graph_before_encapsulation,
+      /*rewrite_subgraph_fn=*/{}, /*parallel_checking=*/true,
+      /*reuse_existing_functions=*/false, &graph, &library));
 
   std::vector<string> expected_nodes = {
       "add1", "add2", "cluster1", "cluster1_parallel_check/_0",
@@ -398,5 +438,782 @@ TEST(EncapsulateSubgraphsTest, ParallelChecking) {
   EXPECT_EQ(expected_edges, GraphEdges(*graph));
 }
 
+const Node* FindNodeByName(const Graph& graph, const string& name) {
+  for (const Node* node : graph.nodes()) {
+    if (node->name() == name) return node;
+  }
+  return nullptr;
+}
+
+bool HasGuaranteeConstAttr(const Node& n) {
+  bool is_guaranteed_constant = false;
+  if (!GetNodeAttr(n.attrs(), "_is_guaranteed_constant",
+                   &is_guaranteed_constant)
+           .ok()) {
+    return false;
+  }
+  return is_guaranteed_constant;
+}
+
+TEST(EncapsulateSubgraphsWithGuaranteeConstOpTest, Simple) {
+  Scope root = Scope::NewRootScope().ExitOnError().WithDevice(
+      "/job:localhost/replica:0/task:0/cpu:0");
+  auto x1 = ops::Placeholder(root.WithOpName("x1"), DT_FLOAT);
+  auto const_x2 = ops::Const(root.WithOpName("const_x2"), 10.0f);
+  auto const_guarantee_x1 =
+      ops::GuaranteeConst(root.WithOpName("const_guarantee_x1"), x1);
+  auto add1 = ops::Add(root.WithOpName("add1"), const_guarantee_x1, const_x2);
+  add1.node()->AddAttr("_encapsulate", "encapsulate1");
+
+  Graph graph_before(OpRegistry::Global());
+  TF_ASSERT_OK(root.ToGraph(&graph_before));
+
+  std::unique_ptr<Graph> graph_after;
+  FunctionLibraryDefinition library(OpRegistry::Global(), {});
+  int guaranteed_consts = 0;
+  TF_ASSERT_OK(EncapsulateSubgraphsInFunctions(
+      "_encapsulate", "_outside", graph_before,
+      /*rewrite_subgraph_fn=*/
+      [&guaranteed_consts](std::unique_ptr<Graph>* graph_ptr,
+                           std::vector<int>* input_permutation,
+                           std::vector<int>* output_permutation,
+                           NodeDef* call_def) {
+        Graph* graph = graph_ptr->get();
+        for (const Node* n : graph->nodes()) {
+          if (n->type_string() == "_Arg" &&
+              StringPiece(n->name()).starts_with("const")) {
+            ++guaranteed_consts;
+            EXPECT_TRUE(HasGuaranteeConstAttr(*n));
+          } else {
+            EXPECT_FALSE(HasGuaranteeConstAttr(*n));
+          }
+        }
+        return Status::OK();
+      },
+      /*parallel_checking=*/false,
+      /*reuse_existing_functions=*/false, &graph_after, &library));
+  EXPECT_EQ(2, guaranteed_consts);
+}
+
+TEST(EncapsulateSubgraphsWithGuaranteeConstOpTest, Add) {
+  Scope root = Scope::NewRootScope().ExitOnError().WithDevice(
+      "/job:localhost/replica:0/task:0/cpu:0");
+  auto x1 = ops::Placeholder(root.WithOpName("x1"), DT_FLOAT);
+  auto x2 = ops::Placeholder(root.WithOpName("x2"), DT_FLOAT);
+  auto const_guarantee_x1 =
+      ops::GuaranteeConst(root.WithOpName("const_guarantee_x1"), x1);
+  auto const_guarantee_x2 =
+      ops::GuaranteeConst(root.WithOpName("const_guarantee_x2"), x2);
+  auto const_guarantee_add1 = ops::Add(root.WithOpName("const_guarantee_add1"),
+                                       const_guarantee_x1, const_guarantee_x2);
+  auto add2 = ops::Add(root.WithOpName("add2"), const_guarantee_x1, x2);
+  auto mul1 = ops::Mul(root.WithOpName("mul1"), const_guarantee_add1, add2);
+  mul1.node()->AddAttr("_encapsulate", "encapsulate1");
+
+  Graph graph_before(OpRegistry::Global());
+  TF_ASSERT_OK(root.ToGraph(&graph_before));
+
+  std::unique_ptr<Graph> graph_after;
+  FunctionLibraryDefinition library(OpRegistry::Global(), {});
+  int guaranteed_consts = 0;
+  TF_ASSERT_OK(EncapsulateSubgraphsInFunctions(
+      "_encapsulate", "_outside", graph_before,
+      /*rewrite_subgraph_fn=*/
+      [&guaranteed_consts](std::unique_ptr<Graph>* graph_ptr,
+                           std::vector<int>* input_permutation,
+                           std::vector<int>* output_permutation,
+                           NodeDef* call_def) {
+        Graph* graph = graph_ptr->get();
+        for (const Node* n : graph->nodes()) {
+          if (n->type_string() == "_Arg" &&
+              StringPiece(n->name()).starts_with("const")) {
+            ++guaranteed_consts;
+            EXPECT_TRUE(HasGuaranteeConstAttr(*n));
+          } else {
+            EXPECT_FALSE(HasGuaranteeConstAttr(*n));
+          }
+        }
+        return Status::OK();
+      },
+      /*parallel_checking=*/false,
+      /*reuse_existing_functions=*/false, &graph_after, &library));
+  // Only 1 runtime const, which is const_guarantee_add1. Add2 has one const
+  // and another non-const, so overall non-const.
+  EXPECT_EQ(1, guaranteed_consts);
+}
+
+// Test with one function to transform and one outside_compilation cluster.
+TEST(EncapsulateSubgraphsTest, OneFunctionOneOutside) {
+  FunctionDefLibrary library;
+  GraphDef graphdef;
+
+  {
+    *library.add_function() = test::function::XTimesTwo();
+
+    GraphDefBuilder b1(GraphDefBuilder::kFailImmediately);
+    Node* a = Input(b1.opts().WithName("A"));
+    Node* b = Input(b1.opts().WithName("B"));
+    // Give nodes 'c' and 'd' names that collide after lowercasing.
+    Node* c = Unary(a, b1.opts().WithName("C").WithAttr("_encapsulate", "F1"));
+    Node* d = Binary(b, c,
+                     b1.opts().WithName("c").WithControlInput(c).WithAttr(
+                         "_encapsulate", "F1"));
+    Node* e = Binary(c, d,
+                     b1.opts()
+                         .WithName("E")
+                         .WithControlInputs({b, d})
+                         .WithAttr("_encapsulate", "F1")
+                         .WithAttr("_outside", "O1"));
+    Node* f = Binary(c, e,
+                     b1.opts().WithName("F").WithControlInput(e).WithAttr(
+                         "_encapsulate", "F1"));
+    Binary(a, f, b1.opts().WithName("G").WithControlInput(e));
+    TF_EXPECT_OK(b1.ToGraphDef(&graphdef));
+  }
+
+  TF_EXPECT_OK(Encapsulate(&graphdef, &library));
+
+  FunctionDefLibrary library_expected;
+  GraphDef graphdef_expected;
+
+  *library_expected.add_function() = test::function::XTimesTwo();
+  *library_expected.add_function() = FunctionDefHelper::Create(
+      "F1", {"a_0_arg:float", "b_0_arg:float"}, {"f_0_retval:float"}, {},
+      {
+          {{"C"}, "UnaryTest", {"a_0_arg"}},
+          {{"c"}, "BinaryTest", {"b_0_arg", "C:o:0"}, {}, {"C"}},
+          {{"F"},
+           "BinaryTest",
+           {"C:o:0", "outside_compilation_O1_recv:output:0"},
+           {},
+           {"outside_compilation_O1_recv"}},
+          {{"outside_compilation_O1_send"},
+           "_XlaSendToHost",
+           {"C:o:0", "c:o:0"},
+           {{"dtypes", gtl::ArraySlice<DataType>({DT_FLOAT, DT_FLOAT})}},
+           {"c"}},
+          {{"outside_compilation_O1_recv"},
+           "_XlaRecvFromHost",
+           {},
+           {{"dtypes", gtl::ArraySlice<DataType>({DT_FLOAT})}},
+           {"outside_compilation_O1_send"}},
+      },
+      {{"f_0_retval", "F:o:0"}});
+
+  {
+    std::unique_ptr<FunctionLibraryDefinition> lib_def(
+        new FunctionLibraryDefinition(OpRegistry::Global(), library_expected));
+    GraphDefBuilder b2(GraphDefBuilder::kFailImmediately, lib_def.get());
+    Node* a = Input(b2.opts().WithName("A"));
+    Node* b = Input(b2.opts().WithName("B"));
+
+    NodeBuilder node_builder("F1", "F1", lib_def.get());
+    node_builder.Input(a).Input(b);
+    Node* call = b2.opts().FinalizeBuilder(&node_builder);
+
+    Node* recv =
+        RecvAtHost({DT_FLOAT, DT_FLOAT},
+                   b2.opts().WithName("outside_compilation_F1_O1_recv"));
+    Node* e = Binary(ops::NodeOut(recv, 0), ops::NodeOut(recv, 1),
+                     b2.opts().WithName("E").WithControlInputs({recv, b}));
+    Node* send = SendFromHost({e}, {DT_FLOAT},
+                              b2.opts()
+                                  .WithName("outside_compilation_F1_O1_send")
+                                  .WithControlInput(e));
+
+    Node* s = NoOp(
+        b2.opts().WithName("F1_sequencer").WithControlInputs({recv, send}));
+
+    Binary(a, call, b2.opts().WithName("G").WithControlInputs({s, e}));
+    TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected));
+  }
+
+  TF_EXPECT_GRAPH_EQ(graphdef_expected, graphdef);
+  TF_EXPECT_FUNCTIONDEFLIBRARY_EQ(library_expected, library);
+}
+
+// Test with one function to transform and two outside_compilation clusters.
+TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) {
+  FunctionDefLibrary library;
+  GraphDef graphdef;
+
+  {
+    GraphDefBuilder b1(GraphDefBuilder::kFailImmediately);
+    Node* a = Input(b1.opts().WithName("A"));
+    Node* b = Input(b1.opts().WithName("B"));
+    Node* c = Unary(a, b1.opts().WithName("C").WithAttr("_encapsulate", "F1"));
+    Node* d =
+        Binary(b, c, b1.opts().WithName("D").WithAttr("_encapsulate", "F1"));
+    Node* e = Binary(c, d,
+                     b1.opts()
+                         .WithName("E")
+                         .WithControlInputs({b, d})
+                         .WithAttr("_encapsulate", "F1")
+                         .WithAttr("_outside", "O1"));
+    Node* f = Binary(c, e,
+                     b1.opts().WithName("F").WithControlInput(e).WithAttr(
+                         "_encapsulate", "F1"));
+    Node* g = Binary(e, f,
+                     b1.opts()
+                         .WithName("G")
+                         .WithControlInputs({e, f})
+                         .WithAttr("_encapsulate", "F1")
+                         .WithAttr("_outside", "O2"));
+    Node* h = Binary(d, e,
+                     b1.opts()
+                         .WithName("H")
+                         .WithAttr("_encapsulate", "F1")
+                         .WithAttr("_outside", "O2"));
+    Node* i = Unary(h, b1.opts().WithName("I").WithAttr("_encapsulate", "F1"));
+    Binary(g, i, b1.opts().WithName("J"));
+    TF_EXPECT_OK(b1.ToGraphDef(&graphdef));
+  }
+
+  TF_EXPECT_OK(Encapsulate(&graphdef, &library));
+
+  FunctionDefLibrary library_expected;
+  GraphDef graphdef_expected;
+
+  *library_expected.add_function() = FunctionDefHelper::Create(
+      "F1", {"a_0_arg:float", "b_0_arg:float"}, {"i_0_retval:float"}, {},
+      {
+          {{"C"}, "UnaryTest", {"a_0_arg"}},
+          {{"D"}, "BinaryTest", {"b_0_arg", "C:o:0"}, {}},
+          {{"I"}, "UnaryTest", {"outside_compilation_O2_recv:output:0"}},
+          {{"F"},
+           "BinaryTest",
+           {"C:o:0", "outside_compilation_O1_recv:output:0"},
+           {},
+           {"outside_compilation_O1_recv"}},
+          {{"outside_compilation_O2_send"},
+           "_XlaSendToHost",
+           {"D:o:0", "F:o:0"},
+           {{"dtypes", gtl::ArraySlice<DataType>({DT_FLOAT, DT_FLOAT})}},
+           {"F"}},
+          {{"outside_compilation_O1_send"},
+           "_XlaSendToHost",
+           {"C:o:0", "D:o:0"},
+           {{"dtypes", gtl::ArraySlice<DataType>({DT_FLOAT, DT_FLOAT})}},
+           {"D"}},
+          {{"outside_compilation_O2_recv"},
+           "_XlaRecvFromHost",
+           {},
+           {{"dtypes", gtl::ArraySlice<DataType>({DT_FLOAT})}},
+           {"outside_compilation_O2_send"}},
+          {{"outside_compilation_O1_recv"},
+           "_XlaRecvFromHost",
+           {},
+           {{"dtypes", gtl::ArraySlice<DataType>({DT_FLOAT})}},
+           {"outside_compilation_O1_send"}},
+      },
+      {{"i_0_retval", "I:o:0"}});
+
+  {
+    std::unique_ptr<FunctionLibraryDefinition> lib_def(
+        new FunctionLibraryDefinition(OpRegistry::Global(), library_expected));
+    GraphDefBuilder b2(GraphDefBuilder::kFailImmediately, lib_def.get());
+    Node* a = Input(b2.opts().WithName("A"));
+    Node* b = Input(b2.opts().WithName("B"));
+
+    NodeBuilder node_builder("F1", "F1", lib_def.get());
+    node_builder.Input(a).Input(b);
+    Node* call = b2.opts().FinalizeBuilder(&node_builder);
+
+    Node* recv1 =
+        RecvAtHost({DT_FLOAT, DT_FLOAT},
+                   b2.opts().WithName("outside_compilation_F1_O1_recv"));
+    Node* e = Binary(ops::NodeOut(recv1, 0), ops::NodeOut(recv1, 1),
+                     b2.opts().WithName("E").WithControlInputs({recv1, b}));
+    Node* send1 = SendFromHost({e}, {DT_FLOAT},
+                               b2.opts()
+                                   .WithName("outside_compilation_F1_O1_send")
+                                   .WithControlInput(e));
+
+    Node* recv2 =
+        RecvAtHost({DT_FLOAT, DT_FLOAT},
+                   b2.opts().WithName("outside_compilation_F1_O2_recv"));
+    Node* g = Binary(e, ops::NodeOut(recv2, 1),
+                     b2.opts().WithName("G").WithControlInputs({recv2, e}));
+    Node* h = Binary(ops::NodeOut(recv2, 0), e, b2.opts().WithName("H"));
+    Node* send2 = SendFromHost(
+        {h}, {DT_FLOAT}, b2.opts().WithName("outside_compilation_F1_O2_send"));
+
+    Node* s = NoOp(b2.opts()
+                       .WithName("F1_sequencer")
+                       .WithControlInputs({recv1, send1, recv2, send2}));
+
+    Binary(g, call, b2.opts().WithName("J").WithControlInput(s));
+    TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected));
+  }
+
+  TF_EXPECT_GRAPH_EQ(graphdef_expected, graphdef);
+  TF_EXPECT_FUNCTIONDEFLIBRARY_EQ(library_expected, library);
+}
+
+// Test with two functions to transform, each with one outside_compilation
+// cluster.
+TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) {
+  FunctionDefLibrary library;
+  GraphDef graphdef;
+
+  {
+    GraphDefBuilder b1(GraphDefBuilder::kFailImmediately);
+    Node* a = Input(b1.opts().WithName("A"));
+    Node* b = Input(b1.opts().WithName("B"));
+    Node* c = Unary(a, b1.opts().WithName("C").WithAttr("_encapsulate", "F1"));
+    Node* d =
+        Binary(b, c, b1.opts().WithName("D").WithAttr("_encapsulate", "F1"));
+    Node* e = Binary(c, d,
+                     b1.opts()
+                         .WithName("E")
+                         .WithControlInputs({b, d})
+                         .WithAttr("_encapsulate", "F1")
+                         .WithAttr("_outside", "O1"));
+    Node* f = Binary(c, e,
+                     b1.opts().WithName("F").WithControlInput(e).WithAttr(
+                         "_encapsulate", "F1"));
+    Node* g = Binary(e, f,
+                     b1.opts().WithName("G").WithControlInputs({e, f}).WithAttr(
+                         "_encapsulate", "F2"));
+    Node* h = Binary(d, g,
+                     b1.opts()
+                         .WithName("H")
+                         .WithAttr("_encapsulate", "F2")
+                         .WithAttr("_outside", "O1"));
+    Node* i =
+        Binary(f, h, b1.opts().WithName("I").WithAttr("_encapsulate", "F2"));
+    Binary(g, i, b1.opts().WithName("J"));
+    TF_EXPECT_OK(b1.ToGraphDef(&graphdef));
+  }
+
+  TF_EXPECT_OK(Encapsulate(&graphdef, &library));
+
+  FunctionDefLibrary library_expected;
+  GraphDef graphdef_expected;
+
+  *library_expected.add_function() = FunctionDefHelper::Create(
+      "F1", {"a_0_arg:float", "b_0_arg:float"},
+      {"f_0_retval:float", "d_0_retval:float"}, {},
+      {
+          {{"C"}, "UnaryTest", {"a_0_arg"}},
+          {{"D"}, "BinaryTest", {"b_0_arg", "C:o:0"}},
+          {{"F"},
+           "BinaryTest",
+           {"C:o:0", "outside_compilation_O1_recv:output:0"},
+           {},
+           {"outside_compilation_O1_recv"}},
+          {{"outside_compilation_O1_send"},
+           "_XlaSendToHost",
+           {"C:o:0", "D:o:0"},
+           {{"dtypes", gtl::ArraySlice<DataType>({DT_FLOAT, DT_FLOAT})}},
+           {"D"}},
+          {{"outside_compilation_O1_recv"},
+           "_XlaRecvFromHost",
+           {},
+           {{"dtypes", gtl::ArraySlice<DataType>({DT_FLOAT})}},
+           {"outside_compilation_O1_send"}},
+      },
+      {{"d_0_retval", "D:o:0"}, {"f_0_retval", "F:o:0"}});
+
+  *library_expected.add_function() = FunctionDefHelper::Create(
+      "F2", {"e_0_arg:float", "f_0_arg:float"},
+      {"g_0_retval:float", "i_0_retval:float"}, {},
+      {
+          {{"G"}, "BinaryTest", {"e_0_arg", "f_0_arg"}},
+          {{"I"},
+           "BinaryTest",
+           {"f_0_arg", "outside_compilation_O1_recv:output:0"}},
+          {{"outside_compilation_O1_send"},
+           "_XlaSendToHost",
+           {"G:o:0"},
+           {{"dtypes", gtl::ArraySlice<DataType>({DT_FLOAT})}}},
+          {{"outside_compilation_O1_recv"},
+           "_XlaRecvFromHost",
+           {},
+           {{"dtypes", gtl::ArraySlice<DataType>({DT_FLOAT})}},
+           {"outside_compilation_O1_send"}},
+      },
+      {{"g_0_retval", "G:o:0"}, {"i_0_retval", "I:o:0"}});
+
+  {
+    std::unique_ptr<FunctionLibraryDefinition> lib_def(
+        new FunctionLibraryDefinition(OpRegistry::Global(), library_expected));
+    GraphDefBuilder b2(GraphDefBuilder::kFailImmediately, lib_def.get());
+    Node* a = Input(b2.opts().WithName("A"));
+    Node* b = Input(b2.opts().WithName("B"));
+
+    Node* recv1 =
+        RecvAtHost({DT_FLOAT, DT_FLOAT},
+                   b2.opts().WithName("outside_compilation_F1_O1_recv"));
+    Node* e = Binary(ops::NodeOut(recv1, 0), ops::NodeOut(recv1, 1),
+                     b2.opts().WithName("E").WithControlInputs({recv1, b}));
+    Node* send1 = SendFromHost({e}, {DT_FLOAT},
+                               b2.opts()
+                                   .WithName("outside_compilation_F1_O1_send")
+                                   .WithControlInput(e));
+    NodeBuilder node_builder1("F1", "F1", lib_def.get());
+    node_builder1.Input(a).Input(b);
+    Node* call1 = b2.opts().FinalizeBuilder(&node_builder1);
+    Node* s1 = NoOp(
+        b2.opts().WithName("F1_sequencer").WithControlInputs({recv1, send1}));
+
+    Node* recv2 = RecvAtHost(
+        {DT_FLOAT}, b2.opts().WithName("outside_compilation_F2_O1_recv"));
+    Node* h = Binary(ops::NodeOut(call1, 1), recv2,
+                     b2.opts().WithName("H").WithControlInput(s1));
+    Node* send2 = SendFromHost(
+        {h}, {DT_FLOAT}, b2.opts().WithName("outside_compilation_F2_O1_send"));
+
+    NodeBuilder node_builder2("F2", "F2", lib_def.get());
+    node_builder2.Input(e).Input(call1);
+    Node* call2 = b2.opts()
+                      .WithControlInputs({s1, e, call1})
+                      .FinalizeBuilder(&node_builder2);
+    Node* s2 = NoOp(
+        b2.opts().WithName("F2_sequencer").WithControlInputs({recv2, send2}));
+    Binary(call2, ops::NodeOut(call2, 1),
+           b2.opts().WithName("J").WithControlInput(s2));
+    TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected));
+  }
+
+  TF_EXPECT_GRAPH_EQ(graphdef_expected, graphdef);
+  TF_EXPECT_FUNCTIONDEFLIBRARY_EQ(library_expected, library);
+}
+
+// Test with one outside_compilation cluster that has no inputs from the
+// compiled subgraph.
+TEST(EncapsulateSubgraphsTest, OutsideCompilationNoInputs) {
+  FunctionDefLibrary library;
+  GraphDef graphdef;
+
+  {
+    GraphDefBuilder b1(GraphDefBuilder::kFailImmediately);
+    Node* a = Input(b1.opts().WithName("A"));
+    Node* b = Input(b1.opts().WithName("B"));
+    Node* c = Unary(a, b1.opts().WithName("C").WithAttr("_encapsulate", "F1"));
+    Node* d =
+        Binary(b, c, b1.opts().WithName("D").WithAttr("_encapsulate", "F1"));
+    Node* e = Unary(a, b1.opts()
+                           .WithName("E")
+                           .WithAttr("_encapsulate", "F1")
+                           .WithAttr("_outside", "O1"));
+    Node* f =
+        Binary(d, e, b1.opts().WithName("F").WithAttr("_encapsulate", "F1"));
+    Unary(f, b1.opts().WithName("G"));
+    TF_EXPECT_OK(b1.ToGraphDef(&graphdef));
+  }
+
+  TF_EXPECT_OK(Encapsulate(&graphdef, &library));
+
+  FunctionDefLibrary library_expected;
+  GraphDef graphdef_expected;
+
+  *library_expected.add_function() = FunctionDefHelper::Create(
+      "F1", {"a_0_arg:float", "b_0_arg:float"}, {"f_0_retval:float"}, {},
+      {
+          {{"C"}, "UnaryTest", {"a_0_arg"}},
+          {{"D"}, "BinaryTest", {"b_0_arg", "C:o:0"}},
+          {{"F"},
+           "BinaryTest",
+           {"D:o:0", "outside_compilation_O1_recv:output:0"}},
+          {{"outside_compilation_O1_recv"},
+           "_XlaRecvFromHost",
+           {},
+           {{"dtypes", gtl::ArraySlice<DataType>({DT_FLOAT})}}},
+      },
+      {{"f_0_retval", "F:o:0"}});
+
+  {
+    std::unique_ptr<FunctionLibraryDefinition> lib_def(
+        new FunctionLibraryDefinition(OpRegistry::Global(), library_expected));
+    GraphDefBuilder b2(GraphDefBuilder::kFailImmediately, lib_def.get());
+    Node* a = Input(b2.opts().WithName("A"));
+    Node* b = Input(b2.opts().WithName("B"));
+
+    Node* e = Unary(a, b2.opts().WithName("E"));
+    Node* send1 = SendFromHost(
+        {e}, {DT_FLOAT}, b2.opts().WithName("outside_compilation_F1_O1_send"));
+    NodeBuilder node_builder1("F1", "F1", lib_def.get());
+    node_builder1.Input(a).Input(b);
+    Node* call1 = b2.opts().FinalizeBuilder(&node_builder1);
+    Node* s1 = NoOp(b2.opts().WithName("F1_sequencer").WithControlInput(send1));
+
+    Unary(call1, b2.opts().WithName("G").WithControlInput(s1));
+    TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected));
+  }
+
+  TF_EXPECT_GRAPH_EQ(graphdef_expected, graphdef);
+  TF_EXPECT_FUNCTIONDEFLIBRARY_EQ(library_expected, library);
+}
+
+// Test with one outside_compilation cluster that has no data inputs but has a
+// control input from the compiled subgraph.
+TEST(EncapsulateSubgraphsTest, OutsideCompilationControlInput) {
+  FunctionDefLibrary library;
+  GraphDef graphdef;
+
+  {
+    GraphDefBuilder b1(GraphDefBuilder::kFailImmediately);
+    Node* a = Input(b1.opts().WithName("A"));
+    Node* b = Input(b1.opts().WithName("B"));
+    Node* c = Unary(a, b1.opts().WithName("C").WithAttr("_encapsulate", "F1"));
+    Node* d =
+        Binary(b, c, b1.opts().WithName("D").WithAttr("_encapsulate", "F1"));
+    Node* e = Unary(a, b1.opts()
+                           .WithName("E")
+                           .WithControlInput(d)
+                           .WithAttr("_encapsulate", "F1")
+                           .WithAttr("_outside", "O1"));
+    Node* f =
+        Binary(d, e, b1.opts().WithName("F").WithAttr("_encapsulate", "F1"));
+    Unary(f, b1.opts().WithName("G"));
+    TF_EXPECT_OK(b1.ToGraphDef(&graphdef));
+  }
+
+  TF_EXPECT_OK(Encapsulate(&graphdef, &library));
+
+  FunctionDefLibrary library_expected;
+  GraphDef graphdef_expected;
+
+  *library_expected.add_function() = FunctionDefHelper::Create(
+      "F1", {"a_0_arg:float", "b_0_arg:float"}, {"f_0_retval:float"}, {},
+      {
+          {{"C"}, "UnaryTest", {"a_0_arg"}},
+          {{"D"}, "BinaryTest", {"b_0_arg", "C:o:0"}},
+          {{"F"},
+           "BinaryTest",
+           {"D:o:0", "outside_compilation_O1_recv:output:0"}},
+          {{"outside_compilation_O1_send"},
+           "_XlaSendToHost",
+           {},
+           {{"dtypes", gtl::ArraySlice<DataType>({})}},
+           {"D"}},
+          {{"outside_compilation_O1_recv"},
+           "_XlaRecvFromHost",
+           {},
+           {{"dtypes", gtl::ArraySlice<DataType>({DT_FLOAT})}},
+           {"outside_compilation_O1_send"}},
+      },
+      {{"f_0_retval", "F:o:0"}});
+
+  {
+    std::unique_ptr<FunctionLibraryDefinition> lib_def(
+        new FunctionLibraryDefinition(OpRegistry::Global(), library_expected));
+    GraphDefBuilder b2(GraphDefBuilder::kFailImmediately, lib_def.get());
+    Node* a = Input(b2.opts().WithName("A"));
+    Node* b = Input(b2.opts().WithName("B"));
+
+    Node* recv1 =
+        RecvAtHost({}, b2.opts().WithName("outside_compilation_F1_O1_recv"));
+    Node* e = Unary(a, b2.opts().WithName("E").WithControlInput(recv1));
+    Node* send1 = SendFromHost(
+        {e}, {DT_FLOAT}, b2.opts().WithName("outside_compilation_F1_O1_send"));
+    NodeBuilder node_builder1("F1", "F1", lib_def.get());
+    node_builder1.Input(a).Input(b);
+    Node* call1 = b2.opts().FinalizeBuilder(&node_builder1);
+    Node* s1 = NoOp(
+        b2.opts().WithName("F1_sequencer").WithControlInputs({recv1, send1}));
+
+    Unary(call1, b2.opts().WithName("G").WithControlInput(s1));
+    TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected));
+  }
+
+  TF_EXPECT_GRAPH_EQ(graphdef_expected, graphdef);
+  TF_EXPECT_FUNCTIONDEFLIBRARY_EQ(library_expected, library);
+}
+
+// Test with one outside_compilation cluster that has no outputs from the
+// compiled subgraph.
+TEST(EncapsulateSubgraphsTest, OutsideCompilationNoOutputs) {
+  FunctionDefLibrary library;
+  GraphDef graphdef;
+
+  {
+    GraphDefBuilder b1(GraphDefBuilder::kFailImmediately);
+    Node* a = Input(b1.opts().WithName("A"));
+    Node* b = Input(b1.opts().WithName("B"));
+    Node* c = Unary(a, b1.opts().WithName("C").WithAttr("_encapsulate", "F1"));
+    Node* d =
+        Binary(b, c, b1.opts().WithName("D").WithAttr("_encapsulate", "F1"));
+    Node* e = Unary(d, b1.opts()
+                           .WithName("E")
+                           .WithAttr("_encapsulate", "F1")
+                           .WithAttr("_outside", "O1"));
+    Node* f = Unary(d, b1.opts().WithName("F").WithAttr("_encapsulate", "F1"));
+    Binary(e, f, b1.opts().WithName("G"));
+    TF_EXPECT_OK(b1.ToGraphDef(&graphdef));
+  }
+
+  TF_EXPECT_OK(Encapsulate(&graphdef, &library));
+
+  FunctionDefLibrary library_expected;
+  GraphDef graphdef_expected;
+
+  *library_expected.add_function() = FunctionDefHelper::Create(
+      "F1", {"a_0_arg:float", "b_0_arg:float"}, {"f_0_retval:float"}, {},
+      {
+          {{"C"}, "UnaryTest", {"a_0_arg"}},
+          {{"D"}, "BinaryTest", {"b_0_arg", "C:o:0"}},
+          {{"F"}, "UnaryTest", {"D:o:0"}},
+          {{"outside_compilation_O1_send"},
+           "_XlaSendToHost",
+           {"D:o:0"},
+           {{"dtypes", gtl::ArraySlice<DataType>({DT_FLOAT})}}},
+      },
+      {{"f_0_retval", "F:o:0"}});
+
+  {
+    std::unique_ptr<FunctionLibraryDefinition> lib_def(
+        new FunctionLibraryDefinition(OpRegistry::Global(), library_expected));
+    GraphDefBuilder b2(GraphDefBuilder::kFailImmediately, lib_def.get());
+    Node* a = Input(b2.opts().WithName("A"));
+    Node* b = Input(b2.opts().WithName("B"));
+
+    Node* recv1 = RecvAtHost(
+        {DT_FLOAT}, b2.opts().WithName("outside_compilation_F1_O1_recv"));
+    Node* e = Unary(recv1, b2.opts().WithName("E"));
+    NodeBuilder node_builder1("F1", "F1", lib_def.get());
+    node_builder1.Input(a).Input(b);
+    Node* call1 = b2.opts().FinalizeBuilder(&node_builder1);
+    Node* s1 = NoOp(b2.opts().WithName("F1_sequencer").WithControlInput(recv1));
+
+    Binary(e, call1, b2.opts().WithName("G").WithControlInput(s1));
+    TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected));
+  }
+
+  TF_EXPECT_GRAPH_EQ(graphdef_expected, graphdef);
+  TF_EXPECT_FUNCTIONDEFLIBRARY_EQ(library_expected, library);
+}
+
+// Test with one outside_compilation cluster that has no data outputs but has a
+// control output to the compiled subgraph.
+TEST(EncapsulateSubgraphsTest, OutsideCompilationControlOutput) {
+  FunctionDefLibrary library;
+  GraphDef graphdef;
+
+  {
+    GraphDefBuilder b1(GraphDefBuilder::kFailImmediately);
+    Node* a = Input(b1.opts().WithName("A"));
+    Node* b = Input(b1.opts().WithName("B"));
+    Node* c = Unary(a, b1.opts().WithName("C").WithAttr("_encapsulate", "F1"));
+    Node* d =
+        Binary(b, c, b1.opts().WithName("D").WithAttr("_encapsulate", "F1"));
+    Node* e = Unary(d, b1.opts()
+                           .WithName("E")
+                           .WithAttr("_encapsulate", "F1")
+                           .WithAttr("_outside", "O1"));
+    Node* f = Unary(d, b1.opts().WithName("F").WithControlInput(e).WithAttr(
+                           "_encapsulate", "F1"));
+    Binary(e, f, b1.opts().WithName("G"));
+    TF_EXPECT_OK(b1.ToGraphDef(&graphdef));
+  }
+
+  TF_EXPECT_OK(Encapsulate(&graphdef, &library));
+
+  FunctionDefLibrary library_expected;
+  GraphDef graphdef_expected;
+
+  *library_expected.add_function() = FunctionDefHelper::Create(
+      "F1", {"a_0_arg:float", "b_0_arg:float"}, {"f_0_retval:float"}, {},
+      {
+          {{"C"}, "UnaryTest", {"a_0_arg"}},
+          {{"D"}, "BinaryTest", {"b_0_arg", "C:o:0"}},
+          {{"F"}, "UnaryTest", {"D:o:0"}, {}, {"outside_compilation_O1_recv"}},
+          {{"outside_compilation_O1_send"},
+           "_XlaSendToHost",
+           {"D:o:0"},
+           {{"dtypes", gtl::ArraySlice<DataType>({DT_FLOAT})}}},
+          {{"outside_compilation_O1_recv"},
+           "_XlaRecvFromHost",
+           {},
+           {{"dtypes", gtl::ArraySlice<DataType>({})}},
+           {"outside_compilation_O1_send"}},
+      },
+      {{"f_0_retval", "F:o:0"}});
+
+  {
+    std::unique_ptr<FunctionLibraryDefinition> lib_def(
+        new FunctionLibraryDefinition(OpRegistry::Global(), library_expected));
+    GraphDefBuilder b2(GraphDefBuilder::kFailImmediately, lib_def.get());
+    Node* a = Input(b2.opts().WithName("A"));
+    Node* b = Input(b2.opts().WithName("B"));
+
+    Node* recv1 = RecvAtHost(
+        {DT_FLOAT}, b2.opts().WithName("outside_compilation_F1_O1_recv"));
+    Node* e = Unary(recv1, b2.opts().WithName("E"));
+    Node* send1 = SendFromHost({}, {},
+                               b2.opts()
+                                   .WithName("outside_compilation_F1_O1_send")
+                                   .WithControlInput(e));
+    NodeBuilder node_builder1("F1", "F1", lib_def.get());
+    node_builder1.Input(a).Input(b);
+    Node* call1 = b2.opts().FinalizeBuilder(&node_builder1);
+    Node* s1 = NoOp(
+        b2.opts().WithName("F1_sequencer").WithControlInputs({recv1, send1}));
+
+    Binary(e, call1, b2.opts().WithName("G").WithControlInput(s1));
+    TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected));
+  }
+
+  TF_EXPECT_GRAPH_EQ(graphdef_expected, graphdef);
+  TF_EXPECT_FUNCTIONDEFLIBRARY_EQ(library_expected, library);
+}
+
+// Test with one outside_compilation cluster that has no outputs from the
+// compiled subgraph.
+TEST(EncapsulateSubgraphsTest, OutsideCompilationNoInputsOrOutputs) {
+  FunctionDefLibrary library;
+  GraphDef graphdef;
+
+  {
+    GraphDefBuilder b1(GraphDefBuilder::kFailImmediately);
+    Node* a = Input(b1.opts().WithName("A"));
+    Node* b = Input(b1.opts().WithName("B"));
+    Node* c = Unary(a, b1.opts().WithName("C").WithAttr("_encapsulate", "F1"));
+    Node* d =
+        Binary(b, c, b1.opts().WithName("D").WithAttr("_encapsulate", "F1"));
+    Node* e = Unary(a, b1.opts()
+                           .WithName("E")
+                           .WithAttr("_encapsulate", "F1")
+                           .WithAttr("_outside", "O1"));
+    Node* f = Unary(d, b1.opts().WithName("F").WithAttr("_encapsulate", "F1"));
+    Binary(e, f, b1.opts().WithName("G"));
+    TF_EXPECT_OK(b1.ToGraphDef(&graphdef));
+  }
+
+  TF_EXPECT_OK(Encapsulate(&graphdef, &library));
+
+  FunctionDefLibrary library_expected;
+  GraphDef graphdef_expected;
+
+  *library_expected.add_function() = FunctionDefHelper::Create(
+      "F1", {"a_0_arg:float", "b_0_arg:float"}, {"f_0_retval:float"}, {},
+      {
+          {{"C"}, "UnaryTest", {"a_0_arg"}},
+          {{"D"}, "BinaryTest", {"b_0_arg", "C:o:0"}},
+          {{"F"}, "UnaryTest", {"D:o:0"}},
+      },
+      {{"f_0_retval", "F:o:0"}});
+
+  {
+    std::unique_ptr<FunctionLibraryDefinition> lib_def(
+        new FunctionLibraryDefinition(OpRegistry::Global(), library_expected));
+    GraphDefBuilder b2(GraphDefBuilder::kFailImmediately, lib_def.get());
+    Node* a = Input(b2.opts().WithName("A"));
+    Node* b = Input(b2.opts().WithName("B"));
+
+    Node* e = Unary(a, b2.opts().WithName("E"));
+    NodeBuilder node_builder1("F1", "F1", lib_def.get());
+    node_builder1.Input(a).Input(b);
+    Node* call1 = b2.opts().FinalizeBuilder(&node_builder1);
+
+    Binary(e, call1, b2.opts().WithName("G"));
+    TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected));
+  }
+
+  TF_EXPECT_GRAPH_EQ(graphdef_expected, graphdef);
+  TF_EXPECT_FUNCTIONDEFLIBRARY_EQ(library_expected, library);
+}
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/kernels/BUILD b/tensorflow/compiler/jit/kernels/BUILD
index 459a582e157f5ddc63997ca93e7c0294293517d3..9bea5663319c8a25249fdc265cee0191556a7c04 100644
--- a/tensorflow/compiler/jit/kernels/BUILD
+++ b/tensorflow/compiler/jit/kernels/BUILD
@@ -16,7 +16,6 @@ cc_library(
         "//tensorflow/compiler/jit:xla_device",
         "//tensorflow/compiler/tf2xla:common",
         "//tensorflow/compiler/tf2xla:xla_compiler",
-        "//tensorflow/compiler/tf2xla:xla_local_runtime_context",
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla/client:client_library",
         "//tensorflow/compiler/xla/client:local_client",
diff --git a/tensorflow/compiler/jit/kernels/xla_launch_op.cc b/tensorflow/compiler/jit/kernels/xla_launch_op.cc
index e481796d9e626fc8cdf36687ad110b0a8a788be0..4842877d9af332bdaa4a142867dde89ba66bd9a2 100644
--- a/tensorflow/compiler/jit/kernels/xla_launch_op.cc
+++ b/tensorflow/compiler/jit/kernels/xla_launch_op.cc
@@ -19,7 +19,6 @@ limitations under the License.
 #include "tensorflow/compiler/jit/xla_device.h"
 #include "tensorflow/compiler/tf2xla/shape_util.h"
 #include "tensorflow/compiler/tf2xla/xla_compiler.h"
-#include "tensorflow/compiler/tf2xla/xla_local_runtime_context.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
 #include "tensorflow/compiler/xla/client/client_library.h"
 #include "tensorflow/compiler/xla/client/local_client.h"
@@ -103,7 +102,6 @@ xla::StatusOr<gpu::DeviceMemoryBase> XlaAllocator::Allocate(
   }
   void* data =
       reinterpret_cast<void*>(const_cast<char*>(t.tensor_data().data()));
-  TF_RET_CHECK(data != nullptr);
   tensors_[data] = t;
   return gpu::DeviceMemoryBase(data, size);
 }
@@ -111,7 +109,6 @@ xla::StatusOr<gpu::DeviceMemoryBase> XlaAllocator::Allocate(
 Status XlaAllocator::RegisterArgument(const Tensor* t) {
   void* data =
       reinterpret_cast<void*>(const_cast<char*>(t->tensor_data().data()));
-  TF_RET_CHECK(data != nullptr);
   tensors_[data] = *t;
   return Status::OK();
 }
@@ -260,14 +257,15 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) {
 
   const XlaCompiler::CompilationResult* kernel;
   xla::LocalExecutable* executable;
+
   OP_REQUIRES_OK(ctx, cache->Compile(options, function_, num_constant_args_,
-                                     variables, ctx, &kernel, &executable));
+                                     variables, ctx, &kernel, &executable,
+                                     /*compile_options=*/nullptr));
 
   VLOG(1) << "Executing XLA Computation...";
 
   // Builds an XLA allocator for the device.
   XlaAllocator xla_allocator(client->platform(), ctx);
-  XlaLocalRuntimeContext local_runtime_context;
 
   std::unique_ptr<xla::ShapedBuffer> output;
   // Build xla::ShapedBuffers that point directly to the Tensor buffers.
@@ -291,27 +289,22 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) {
     gpu::DeviceMemoryBase dmem = gpu::DeviceMemoryBase(
         const_cast<char*>(t->tensor_data().data()), t->tensor_data().size());
 
-    arg_buffers[i] =
-        xla::ShapedBuffer::MakeArrayShapedBuffer(
-            shape, client->platform(), client->default_device_ordinal(), dmem)
-            .ConsumeValueOrDie();
+    const xla::Shape on_device_shape =
+        client->backend().transfer_manager()->HostShapeToDeviceShape(shape);
+    CHECK(xla::ShapeUtil::Equal(shape, on_device_shape))
+        << "On-device shape "
+        << xla::ShapeUtil::HumanStringWithLayout(on_device_shape)
+        << " not the same as on-host shape "
+        << xla::ShapeUtil::HumanStringWithLayout(shape);
+    arg_buffers[i] = xla::MakeUnique<xla::ShapedBuffer>(
+        /*on_host_shape=*/shape, /*on_device_shape=*/shape, client->platform(),
+        client->default_device_ordinal());
+    arg_buffers[i]->set_buffer(dmem, /*index=*/{});
     arg_ptrs[i] = arg_buffers[i].get();
 
     OP_REQUIRES_OK(ctx, xla_allocator.RegisterArgument(t));
   }
 
-  // Make the final parameter point at local_runtime_context.
-  if (kernel->requires_runtime_context) {
-    gpu::DeviceMemoryBase local_runtime_context_dmem(
-        &local_runtime_context, sizeof(local_runtime_context));
-    arg_buffers.push_back(
-        xla::ShapedBuffer::MakeArrayShapedBuffer(
-            xla::ShapeUtil::MakeOpaqueShape(), client->platform(),
-            client->default_device_ordinal(), local_runtime_context_dmem)
-            .ConsumeValueOrDie());
-    arg_ptrs.push_back(arg_buffers.back().get());
-  }
-
   // Execute the computation.
   VLOG(2) << "Executing computation.";
   xla::ExecutableRunOptions run_options;
@@ -323,19 +316,13 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) {
   auto run_result = executable->Run(arg_ptrs, run_options);
   OP_REQUIRES(ctx, run_result.ok(), run_result.status());
 
-  if (local_runtime_context.error) {
-    ctx->CtxFailure(errors::InvalidArgument("Compiled kernel returned error: ",
-                                            local_runtime_context.error_msg));
-    return;
-  }
-
   output = run_result.ConsumeValueOrDie()->release();
   auto elapsed = env->NowMicros() - start_time;
   VLOG(2) << "Elapsed time: " << elapsed << "us";
 
   // Computation output should always be a tuple.
   if (VLOG_IS_ON(2)) {
-    VLOG(2) << "Result tuple shape: " << output->shape().DebugString();
+    VLOG(2) << "Result tuple shape: " << output->on_host_shape().DebugString();
   }
   CHECK_EQ(ctx->num_outputs(), kernel->outputs.size());
 
diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.cc b/tensorflow/compiler/jit/mark_for_compilation_pass.cc
index 74c9791f5eaf1fbc43b152520df496a3b552af18..79b02baba83cb47f4f2f16544ad711a4b6937d90 100644
--- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc
+++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc
@@ -41,6 +41,7 @@ limitations under the License.
 namespace tensorflow {
 
 const char* const kXlaClusterAttr = "_XlaCluster";
+const char* const kXlaOutsideCompilationAttr = "_XlaOutsideCompilation";
 
 namespace {
 
@@ -172,10 +173,15 @@ bool HasResourceInputOrOutput(const Node& node) {
                    DT_RESOURCE) != node.output_types().end();
 }
 
+struct NodeCompare {
+  bool operator()(const Node* a, const Node* b) { return a->id() < b->id(); }
+};
+using OrderedNodeSet = std::set<Node*, NodeCompare>;
+
 Status FindCompilationCandidates(
     const Graph& graph, FunctionLibraryDefinition* flib_def, Env* env,
     const std::function<bool(const Node*, const DeviceType&)>& is_compilable_fn,
-    std::unordered_set<Node*>* candidates) {
+    OrderedNodeSet* candidates) {
   OptimizerOptions opts;
   std::unique_ptr<ProcessFunctionLibraryRuntime> pflr(
       new ProcessFunctionLibraryRuntime(nullptr, env, TF_GRAPH_DEF_VERSION,
@@ -210,6 +216,13 @@ Status FindCompilationCandidates(
         !IsCompilableWhile(*node, jit_device_type, 0, lib_runtime)) {
       continue;
     }
+    // _Retval nodes in a top-level function represent fetches.
+    // Do not compile them.
+    if (node->type_string() == "_Retval") {
+      VLOG(2) << "Compilation rejected node: return value " << node->name()
+              << ": " << node->type_string();
+      continue;
+    }
     candidates->insert(node);
   }
   return Status::OK();
@@ -347,7 +360,7 @@ Status MarkForCompilationPass::RunImpl(
 
   Graph* graph = options.graph->get();
 
-  std::unordered_set<Node*> compilation_candidates;
+  OrderedNodeSet compilation_candidates;
   TF_RETURN_IF_ERROR(FindCompilationCandidates(
       *graph, options.flib_def,
       (options.session_options != nullptr) ? options.session_options->env
diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.h b/tensorflow/compiler/jit/mark_for_compilation_pass.h
index f91695800f585f37b72173d5e582c38b1154b69b..e9acbfb19e42cb43cb0b986c438a569de29b2ebc 100644
--- a/tensorflow/compiler/jit/mark_for_compilation_pass.h
+++ b/tensorflow/compiler/jit/mark_for_compilation_pass.h
@@ -28,6 +28,10 @@ namespace tensorflow {
 // encapsulate subgraphs pass.
 extern const char* const kXlaClusterAttr;
 
+// The attribute that marks nodes in a cluster to be placed outside the xla
+// compilation by the encapsulate subgraphs pass.
+extern const char* const kXlaOutsideCompilationAttr;
+
 // Pass that marks a subset of operators in the graph with attribute
 // _XlaCluster so they are compiled by the EncapsulateSubgraphsPass.
 class MarkForCompilationPass : public GraphOptimizationPass {
diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc
index b3d258aea177fbefa4bae51d8156da2ff86c9032..454f0aeae98d7afd51f12b2cfb1810de275a57f7 100644
--- a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc
+++ b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc
@@ -525,5 +525,32 @@ TEST(XlaCompilationTest, IllegalCycle_UsefulErrorMessage) {
                             "+-- c\n"));
 }
 
+TEST(XlaCompilationTest, Retval) {
+  std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
+  GraphDef graphdef;
+  {
+    GraphDefBuilder builder(GraphDefBuilder::kFailImmediately);
+    Node* a = ops::SourceOp("Const", builder.opts()
+                                         .WithName("A")
+                                         .WithAttr("dtype", DT_FLOAT)
+                                         .WithAttr("value", Tensor()));
+    Node* b = ops::UnaryOp("Relu", a, builder.opts().WithName("B"));
+    ops::UnaryOp("_Retval", b,
+                 builder.opts()
+                     .WithName("R")
+                     .WithAttr("T", DT_FLOAT)
+                     .WithAttr("index", 0));
+
+    TF_EXPECT_OK(builder.ToGraph(graph.get()));
+  }
+
+  TF_ASSERT_OK(MarkForCompilation(&graph));
+  auto clusters = GetClusters(*graph);
+
+  EXPECT_EQ(2, clusters.size());
+  EXPECT_TRUE(clusters.find("R") == clusters.cend());
+  EXPECT_EQ(clusters["A"], clusters["B"]);
+}
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/xla_compilation_cache.cc b/tensorflow/compiler/jit/xla_compilation_cache.cc
index bc2eccd2779b9ff68ae2121f7bc53d6f74aec3e3..bfff52c55a7d5a4490224347019db9b3333f7e2e 100644
--- a/tensorflow/compiler/jit/xla_compilation_cache.cc
+++ b/tensorflow/compiler/jit/xla_compilation_cache.cc
@@ -214,17 +214,12 @@ Status XlaCompilationCache::BuildExecutable(
     const XlaCompiler::CompilationResult& result,
     std::unique_ptr<xla::LocalExecutable>* executable) {
   VLOG(2) << "Compiling to local executable";
-  xla::Shape opaque_shape = xla::ShapeUtil::MakeOpaqueShape();
 
   std::vector<const xla::Shape*> argument_layouts(
       result.xla_input_shapes.size());
   for (int i = 0; i < result.xla_input_shapes.size(); ++i) {
     argument_layouts[i] = &result.xla_input_shapes[i];
   }
-  if (result.requires_runtime_context) {
-    // The final arg is the XlaLocalRuntimeContext*.
-    argument_layouts.push_back(&opaque_shape);
-  }
   xla::ExecutableBuildOptions build_options;
   build_options.set_device_ordinal(client_->default_device_ordinal());
   build_options.set_result_layout(result.xla_output_shape);
@@ -243,7 +238,8 @@ Status XlaCompilationCache::Compile(
     int num_constant_args, const std::vector<OptionalTensor>& variable_args,
     OpKernelContext* ctx,
     const XlaCompiler::CompilationResult** compilation_result,
-    xla::LocalExecutable** executable) {
+    xla::LocalExecutable** executable,
+    const XlaCompiler::CompileOptions* compile_options) {
   VLOG(1) << "XlaCompilationCache::Compile " << DebugString();
 
   if (VLOG_IS_ON(2)) {
@@ -302,9 +298,9 @@ Status XlaCompilationCache::Compile(
 
     XlaCompiler compiler(options);
     entry->compiled = true;
-    entry->compilation_status =
-        compiler.CompileFunction(XlaCompiler::CompileOptions(), function, args,
-                                 &entry->compilation_result);
+    entry->compilation_status = compiler.CompileFunction(
+        compile_options ? *compile_options : XlaCompiler::CompileOptions(),
+        function, args, &entry->compilation_result);
   }
   *compilation_result = &entry->compilation_result;
   if (entry->compilation_status.ok() && executable) {
diff --git a/tensorflow/compiler/jit/xla_compilation_cache.h b/tensorflow/compiler/jit/xla_compilation_cache.h
index c3a8f68a157a2d34d4a6716c9951b2b698aead79..0858020716fcf4763e42dc0699ad22cfda756942 100644
--- a/tensorflow/compiler/jit/xla_compilation_cache.h
+++ b/tensorflow/compiler/jit/xla_compilation_cache.h
@@ -66,7 +66,8 @@ class XlaCompilationCache : public ResourceBase {
                  const std::vector<OptionalTensor>& variable_args,
                  OpKernelContext* ctx,
                  const XlaCompiler::CompilationResult** compilation_result,
-                 xla::LocalExecutable** executable);
+                 xla::LocalExecutable** executable,
+                 const XlaCompiler::CompileOptions* compile_options);
 
   xla::LocalClient* client() const { return client_; }
   const DeviceType& device_type() const { return device_type_; }
diff --git a/tensorflow/compiler/jit/xla_device_context.cc b/tensorflow/compiler/jit/xla_device_context.cc
index fed2c92d763c33aad3c5b3f07c1f33364c797793..c936222f32056e92efced82d5adb3a96c8041a17 100644
--- a/tensorflow/compiler/jit/xla_device_context.cc
+++ b/tensorflow/compiler/jit/xla_device_context.cc
@@ -71,12 +71,14 @@ void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
     void* dst_ptr = DMAHelper::base(device_tensor);
     se::DeviceMemoryBase dev_dst_ptr(dst_ptr, total_bytes);
 
-    Status status = Status::OK();
+    Status status;
     stream_->ThenMemcpy(&dev_dst_ptr, src_ptr, total_bytes);
     // TODO(hpucha): Make this asynchronous.
-    if (!stream_->BlockHostUntilDone()) {
+    Status block_status = stream_->BlockHostUntilDone();
+    if (!block_status.ok()) {
       status = xla::InternalError(
-          "Failed to complete data transfer on stream %p", stream_);
+          "Failed to complete data transfer on stream %p: %s", stream_,
+          block_status.error_message().c_str());
     }
 
     done(status);
@@ -105,12 +107,14 @@ void XlaTransferManager::CopyDeviceTensorToCPU(const Tensor* device_tensor,
     se::DeviceMemoryBase dev_src_ptr(src_ptr, total_bytes);
     void* dst_ptr = DMAHelper::base(cpu_tensor);
 
-    Status status = Status::OK();
+    Status status;
     stream_->ThenMemcpy(dst_ptr, dev_src_ptr, total_bytes);
     // TODO(hpucha): Make this asynchronous.
-    if (!stream_->BlockHostUntilDone()) {
+    Status block_status = stream_->BlockHostUntilDone();
+    if (!block_status.ok()) {
       status = xla::InternalError(
-          "Failed to complete data transfer on stream %p", stream_);
+          "Failed to complete data transfer on stream %p: %s", stream_,
+          block_status.error_message().c_str());
     }
 
     done(status);
diff --git a/tensorflow/compiler/plugin/BUILD b/tensorflow/compiler/plugin/BUILD
index c1edf2448c54ffddd7b70dcdfb1609080ca81b65..da4bc44c7a75c9f8faf16c537a17a1f2d16d5d61 100644
--- a/tensorflow/compiler/plugin/BUILD
+++ b/tensorflow/compiler/plugin/BUILD
@@ -41,6 +41,15 @@ cc_library(
     ],
 )
 
+# This target is added purely for the purpose of ensuring that `:xla_device` is
+# always publicly visible to external XLA backend/plugin developers.
+cc_library(
+    name = "plugin_device",
+    deps = [
+        "//tensorflow/compiler/jit:xla_device",
+    ],
+)
+
 #-----------------------------------------------------------------------------
 
 filegroup(
diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index 6cad2b0824d86a9549cb77518448a7e4eb781bef..f7c6cd293a8a4788bd73cc42c5c61e60d4a2c110 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -240,6 +240,23 @@ tf_xla_py_test(
     ],
 )
 
+tf_xla_py_test(
+    name = "fft_test",
+    size = "medium",
+    srcs = ["fft_test.py"],
+    shard_count = 3,
+    tags = ["optonly"],
+    deps = [
+        ":xla_test",
+        "//tensorflow/contrib/signal:signal_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:extra_py_tests_deps",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:spectral_ops",
+    ],
+)
+
 tf_xla_py_test(
     name = "slice_ops_test",
     size = "small",
@@ -279,6 +296,22 @@ tf_xla_py_test(
     ],
 )
 
+tf_xla_py_test(
+    name = "image_ops_test",
+    size = "small",
+    srcs = ["image_ops_test.py"],
+    tags = [
+        "optonly",  # Times out frequently in fastbuild mode.
+    ],
+    deps = [
+        ":xla_test",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:image_ops",
+        "//tensorflow/python:platform_test",
+    ],
+)
+
 tf_xla_py_test(
     name = "lrn_ops_test",
     size = "medium",
@@ -367,7 +400,14 @@ tf_xla_py_test(
     size = "small",
     srcs = ["random_ops_test.py"],
     # TODO(b/31361304): enable RNG ops on GPU when parallelized.
-    disabled_backends = ["gpu"],
+    disabled_backends = [
+        "gpu",
+    ],
+    tags = [
+        "manual",
+        "no_oss",
+        "notap",
+    ],
     deps = [
         ":xla_test",
         "//tensorflow/python:framework_for_generated_wrappers",
@@ -416,6 +456,20 @@ tf_xla_py_test(
     ],
 )
 
+tf_xla_py_test(
+    name = "scan_ops_test",
+    size = "small",
+    srcs = ["scan_ops_test.py"],
+    tags = ["optonly"],
+    deps = [
+        ":xla_test",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform_test",
+    ],
+)
+
 tf_xla_py_test(
     name = "segment_reduction_ops_test",
     size = "medium",
diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py
index 654dc15e86b21c7742d49281d53c1a75e6a45d3b..65706b35d616eb4dce94f0a7056a1604a97ff4c1 100644
--- a/tensorflow/compiler/tests/binary_ops_test.py
+++ b/tensorflow/compiler/tests/binary_ops_test.py
@@ -94,14 +94,12 @@ class BinaryOpsTest(XLATestCase):
           dtype(4),
           expected=np.array([[16], [81]], dtype=dtype))
 
-      atan2_supported = self.device == "XLA_GPU"
-      if atan2_supported:
-        self._testBinary(
-            math_ops.atan2,
-            np.array([0, np.sqrt(2), 1, np.sqrt(2), 0], dtype),
-            np.array([1, np.sqrt(2), 0, -np.sqrt(2), -1], dtype),
-            expected=np.array(
-                [0, np.pi / 4, np.pi / 2, np.pi * 3 / 4, np.pi], dtype=dtype))
+      self._testBinary(
+          math_ops.atan2,
+          np.array([0, np.sqrt(2), 1, np.sqrt(2), 0], dtype),
+          np.array([1, np.sqrt(2), 0, -np.sqrt(2), -1], dtype),
+          expected=np.array(
+              [0, np.pi / 4, np.pi / 2, np.pi * 3 / 4, np.pi], dtype=dtype))
 
       self._testBinary(
           gen_math_ops._reciprocal_grad,
@@ -388,30 +386,28 @@ class BinaryOpsTest(XLATestCase):
               ],
               dtype=dtype))
 
-      atan2_supported = self.device == "XLA_GPU"
-      if atan2_supported:
-        self._testBinary(
-            math_ops.pow,
-            dtype(3 + 2j),
-            dtype(4 - 5j),
-            expected=np.power(dtype(3 + 2j), dtype(4 - 5j)))
-        self._testBinary(  # empty rhs
-            math_ops.pow,
-            np.array([1 + 2j, 2 - 3j], dtype=dtype),
-            np.zeros(shape=[0, 2], dtype=dtype),
-            expected=np.zeros(shape=[0, 2], dtype=dtype))
-        self._testBinary(  # to zero power
-            math_ops.pow,
-            np.array([1 + 2j, 2 - 3j], dtype=dtype),
-            np.zeros(shape=[1, 2], dtype=dtype),
-            expected=np.ones(shape=[1, 2], dtype=dtype))
-        lhs = np.array([1 - 2j, 4 + 3j, 2 - 3j, 3, 2j, 1, 4], dtype=dtype)
-        rhs = np.array([2, 3j, 3 + 4j, 2 + 3j, 3 - 2j, 2, 3 + 3j], dtype=dtype)
-        scalar = dtype(2 + 2j)
-        self._testBinary(math_ops.pow, lhs, rhs, expected=np.power(lhs, rhs))
-        self._testBinary(
-            math_ops.pow, scalar, rhs, expected=np.power(scalar, rhs))
-        self._testBinary(math_ops.pow, lhs, scalar, np.power(lhs, scalar))
+      self._testBinary(
+          math_ops.pow,
+          dtype(3 + 2j),
+          dtype(4 - 5j),
+          expected=np.power(dtype(3 + 2j), dtype(4 - 5j)))
+      self._testBinary(  # empty rhs
+          math_ops.pow,
+          np.array([1 + 2j, 2 - 3j], dtype=dtype),
+          np.zeros(shape=[0, 2], dtype=dtype),
+          expected=np.zeros(shape=[0, 2], dtype=dtype))
+      self._testBinary(  # to zero power
+          math_ops.pow,
+          np.array([1 + 2j, 2 - 3j], dtype=dtype),
+          np.zeros(shape=[1, 2], dtype=dtype),
+          expected=np.ones(shape=[1, 2], dtype=dtype))
+      lhs = np.array([1 - 2j, 4 + 3j, 2 - 3j, 3, 2j, 1, 4], dtype=dtype)
+      rhs = np.array([2, 3j, 3 + 4j, 2 + 3j, 3 - 2j, 2, 3 + 3j], dtype=dtype)
+      scalar = dtype(2 + 2j)
+      self._testBinary(math_ops.pow, lhs, rhs, expected=np.power(lhs, rhs))
+      self._testBinary(
+          math_ops.pow, scalar, rhs, expected=np.power(scalar, rhs))
+      self._testBinary(math_ops.pow, lhs, scalar, np.power(lhs, scalar))
 
       lhs = np.array([4 + 2j, -3 - 1j, 2j, 1], dtype=dtype)
       rhs = np.array([5, -6j, 7 - 3j, -8j], dtype=dtype)
@@ -421,9 +417,8 @@ class BinaryOpsTest(XLATestCase):
       self._testBinary(
           gen_math_ops._sigmoid_grad, lhs, rhs, expected=rhs * lhs * (1 - lhs))
 
-      if atan2_supported:
-        self._testBinary(
-            gen_math_ops._rsqrt_grad, lhs, rhs, expected=lhs**3 * rhs / -2)
+      self._testBinary(
+          gen_math_ops._rsqrt_grad, lhs, rhs, expected=lhs**3 * rhs / -2)
 
       self._testBinary(
           gen_math_ops._sqrt_grad, lhs, rhs, expected=rhs / (2 * lhs))
@@ -547,7 +542,7 @@ class BinaryOpsTest(XLATestCase):
       self._testDivision(dtype)
 
   def testFloatDivision(self):
-    for dtype in self.float_types + self.complex_types:
+    for dtype in self.float_types | self.complex_types:
       self._testDivision(dtype)
 
   def _testRemainder(self, dtype):
diff --git a/tensorflow/compiler/tests/categorical_op_test.py b/tensorflow/compiler/tests/categorical_op_test.py
index 5e06f9a72401935b9681c35a164b51f50a8538ae..035cdea1786d39f3d21bb63be5c8ccffe1608bdf 100644
--- a/tensorflow/compiler/tests/categorical_op_test.py
+++ b/tensorflow/compiler/tests/categorical_op_test.py
@@ -35,6 +35,9 @@ from tensorflow.python.platform import googletest
 class CategoricalTest(XLATestCase):
   """Test cases for random-number generating operators."""
 
+  def output_dtypes(self):
+    return set(self.int_types).intersection([np.int32, np.int64])
+
   def _chi2(self, expected, actual):
     """Returns Chi2 GOF statistic."""
     actual = np.asarray(actual)
@@ -55,7 +58,8 @@ class CategoricalTest(XLATestCase):
     """
     with self.test_session() as sess, self.test_scope():
       random_seed.set_random_seed(1618)
-      op = random_ops.multinomial(logits, num_samples)
+      op = random_ops.multinomial(logits, num_samples,
+                                  output_dtype=dtypes.int32)
       d = sess.run(op)
 
     batch_size, num_classes = logits.shape
@@ -73,11 +77,11 @@ class CategoricalTest(XLATestCase):
 
     return freqs_mat
 
-  def _testRngIsNotConstant(self, rng, dtype):
+  def _testRngIsNotConstant(self, rng, dtype, output_dtype):
     # Tests that 'rng' does not always return the same value.
     with self.test_session() as sess:
       with self.test_scope():
-        x = rng(dtype)
+        x = rng(dtype, output_dtype)
 
       # The random-number generator, if working correctly, should produce the
       # same output multiple times with low probability.
@@ -92,21 +96,25 @@ class CategoricalTest(XLATestCase):
                       (not np.array_equal(y, w)))
 
   def testCategoricalIsNotConstant(self):
-    def rng(unused_dtype):
-      return random_ops.multinomial([[1., 1., 1.]], 10)
+    def rng(dtype, output_dtype):
+      return random_ops.multinomial(np.array([[1., 1., 1.]], dtype=dtype), 10,
+                                    output_dtype=output_dtype)
 
-    dtype = dtypes.float32
-    self._testRngIsNotConstant(rng, dtype)
+    dtype = np.float32
+    for output_dtype in self.output_dtypes():
+      self._testRngIsNotConstant(rng, dtype, output_dtype)
 
   def testCategoricalIsInRange(self):
-    for dtype in [dtypes.float32, dtypes.float64]:
-      with self.test_session() as sess:
-        with self.test_scope():
-          x = random_ops.multinomial(
-              array_ops.ones(shape=[1, 20], dtype=dtype), 1000)
-        y = sess.run(x)
-        self.assertTrue((y >= 0).sum() == 1000)
-        self.assertTrue((y < 20).sum() == 1000)
+    for dtype in self.float_types:
+      for output_dtype in self.output_dtypes():
+        with self.test_session() as sess:
+          with self.test_scope():
+            x = random_ops.multinomial(
+                array_ops.ones(shape=[1, 20], dtype=dtype), 1000,
+                output_dtype=output_dtype)
+          y = sess.run(x)
+          self.assertTrue((y >= 0).sum() == 1000)
+          self.assertTrue((y < 20).sum() == 1000)
 
   def testSamplingCorrectness(self):
     np.random.seed(1618)  # Make it reproducible.
diff --git a/tensorflow/compiler/tests/conv2d_test.py b/tensorflow/compiler/tests/conv2d_test.py
index 0d617eb37c5d92c87abb0f996b731112257a2b80..62577b70ce96e220d79978f01614b2d9a3647680 100644
--- a/tensorflow/compiler/tests/conv2d_test.py
+++ b/tensorflow/compiler/tests/conv2d_test.py
@@ -34,7 +34,13 @@ from tensorflow.python.platform import googletest
 
 class Conv2DTest(XLATestCase):
 
-  def _VerifyValues(self, input_sizes, filter_sizes, stride, padding, expected):
+  def _VerifyValues(self,
+                    input_sizes=None,
+                    filter_sizes=None,
+                    strides=None,
+                    dilations=None,
+                    padding=None,
+                    expected=None):
     """Tests that tf.nn.conv2d produces the expected value.
 
     Args:
@@ -42,7 +48,8 @@ class Conv2DTest(XLATestCase):
         [batch, input_rows, input_cols, input_depth].
       filter_sizes: Filter tensor dimensions in
         [kernel_rows, kernel_cols, input_depth, output_depth].
-      stride: Stride.
+      strides: Strides.
+      dilations: RHS dilations.
       padding: Padding type.
       expected: Expected output.
     """
@@ -50,73 +57,136 @@ class Conv2DTest(XLATestCase):
     total_size_2 = np.prod(filter_sizes)
     x1 = np.arange(1, total_size_1 + 1, dtype=np.float32).reshape(input_sizes)
     x2 = np.arange(1, total_size_2 + 1, dtype=np.float32).reshape(filter_sizes)
-    strides = [1, stride, stride, 1]
+    strides = [1] + strides + [1]
+    if dilations is None:
+      dilations = [1, 1]
+    dilations = [1] + dilations + [1]
 
     with self.test_session() as sess:
+      t1 = array_ops.placeholder(dtypes.float32, shape=input_sizes)
+      t2 = array_ops.placeholder(dtypes.float32, shape=filter_sizes)
       with self.test_scope():
-        t1 = array_ops.placeholder(dtypes.float32, shape=input_sizes)
-        t2 = array_ops.placeholder(dtypes.float32, shape=filter_sizes)
         out = nn_ops.conv2d(
-            t1, t2, strides=strides, padding=padding, data_format="NHWC")
+            t1,
+            t2,
+            strides=strides,
+            padding=padding,
+            data_format="NHWC",
+            dilations=dilations)
       value = sess.run(out, {t1: x1, t2: x2})
-      self.assertArrayNear(expected, np.ravel(value), 1e-3)
+      self.assertAllClose(expected, value, 1e-3)
 
   def testConv2D1x1Filter(self):
-    expected_output = [
+    expected_output = np.reshape([
         30.0, 36.0, 42.0, 66.0, 81.0, 96.0, 102.0, 126.0, 150.0, 138.0, 171.0,
         204.0, 174.0, 216.0, 258.0, 210.0, 261.0, 312.0
-    ]
+    ], [1, 2, 3, 3])
     self._VerifyValues(
         input_sizes=[1, 2, 3, 3],
         filter_sizes=[1, 1, 3, 3],
-        stride=1,
+        strides=[1, 1],
         padding="VALID",
         expected=expected_output)
 
   def testConv2D2x2Filter(self):
-    expected_output = [2271.0, 2367.0, 2463.0, 2901.0, 3033.0, 3165.0]
+    expected_output = np.reshape(
+        [2271.0, 2367.0, 2463.0, 2901.0, 3033.0, 3165.0], [1, 1, 2, 3])
     self._VerifyValues(
         input_sizes=[1, 2, 3, 3],
         filter_sizes=[2, 2, 3, 3],
-        stride=1,
+        strides=[1, 1],
+        padding="VALID",
+        expected=expected_output)
+
+  def testConv2D2x2Filter2x1Dilation(self):
+    expected_output = np.array([[[[72], [82], [92]], [[112], [122], [132]]]])
+    self._VerifyValues(
+        input_sizes=[1, 4, 4, 1],
+        filter_sizes=[2, 2, 1, 1],
+        strides=[1, 1],
+        dilations=[2, 1],
         padding="VALID",
         expected=expected_output)
 
   def testConv2D1x2Filter(self):
-    expected_output = [
+    expected_output = np.reshape([
         231.0, 252.0, 273.0, 384.0, 423.0, 462.0, 690.0, 765.0, 840.0, 843.0,
         936.0, 1029.0
-    ]
+    ], [1, 2, 2, 3])
     self._VerifyValues(
         input_sizes=[1, 2, 3, 3],
         filter_sizes=[1, 2, 3, 3],
-        stride=1,
+        strides=[1, 1],
         padding="VALID",
         expected=expected_output)
 
   def testConv2D2x2FilterStride2(self):
-    expected_output = [2271.0, 2367.0, 2463.0]
+    expected_output = np.reshape([2271.0, 2367.0, 2463.0], [1, 1, 1, 3])
     self._VerifyValues(
         input_sizes=[1, 2, 3, 3],
         filter_sizes=[2, 2, 3, 3],
-        stride=2,
+        strides=[2, 2],
         padding="VALID",
         expected=expected_output)
 
   def testConv2D2x2FilterStride2Same(self):
-    expected_output = [2271.0, 2367.0, 2463.0, 1230.0, 1305.0, 1380.0]
+    expected_output = np.reshape(
+        [2271.0, 2367.0, 2463.0, 1230.0, 1305.0, 1380.0], [1, 1, 2, 3])
     self._VerifyValues(
         input_sizes=[1, 2, 3, 3],
         filter_sizes=[2, 2, 3, 3],
-        stride=2,
+        strides=[2, 2],
         padding="SAME",
         expected=expected_output)
 
+  def testConv2DEmptyDilation(self):
+    self._VerifyValues(
+        input_sizes=[0, 2, 3, 3],
+        filter_sizes=[1, 1, 3, 3],
+        strides=[1, 1],
+        dilations=[2, 1],
+        padding="VALID",
+        expected=np.zeros([0, 2, 3, 3]))
+
+  def testConv2D2x2FilterDilation(self):
+    self._VerifyValues(
+        input_sizes=[1, 2, 3, 3],
+        filter_sizes=[2, 2, 3, 3],
+        strides=[1, 1],
+        dilations=[1, 2],
+        padding="VALID",
+        expected=np.reshape([2667, 2781, 2895], [1, 1, 1, 3]))
+
+  def testConv2D1x2FilterDilation(self):
+    self._VerifyValues(
+        input_sizes=[1, 2, 3, 3],
+        filter_sizes=[1, 2, 3, 3],
+        strides=[1, 1],
+        dilations=[2, 1],
+        padding="VALID",
+        expected=np.array([[[[231, 252, 273], [384, 423, 462]],
+                            [[690, 765, 840], [843, 936, 1029]]]]))
+
+  def testConv2DKernelSizeMatchesInputSizeDilation(self):
+    self._VerifyValues(
+        input_sizes=[1, 3, 3, 1],
+        filter_sizes=[2, 2, 1, 2],
+        strides=[1, 1],
+        dilations=[2, 2],
+        padding="VALID",
+        expected=np.reshape([108, 128], [1, 1, 1, 2]))
+
 
 class Conv2DBackpropInputTest(XLATestCase):
 
-  def _VerifyValues(self, input_sizes, filter_sizes, out_backprop_sizes, stride,
-                    padding, expected):
+  def _VerifyValues(self,
+                    input_sizes=None,
+                    filter_sizes=None,
+                    out_backprop_sizes=None,
+                    strides=None,
+                    dilations=None,
+                    padding=None,
+                    expected=None):
     """Tests that gen_nn_ops.conv2d_backprop_input produces the expected output.
 
     Args:
@@ -125,7 +195,8 @@ class Conv2DBackpropInputTest(XLATestCase):
       filter_sizes: Filter tensor dimensions in
         [kernel_rows, kernel_cols, input_depth, output_depth].
       out_backprop_sizes: Output gradients tensor dimensions.
-      stride: Stride.
+      strides: Strides.
+      dilations: Dilations.
       padding: Padding type.
       expected: Expected output.
     """
@@ -134,21 +205,25 @@ class Conv2DBackpropInputTest(XLATestCase):
     x1 = np.arange(1, total_size_1 + 1, dtype=np.float32).reshape(filter_sizes)
     x2 = np.arange(
         1, total_size_2 + 1, dtype=np.float32).reshape(out_backprop_sizes)
-    strides = [1, stride, stride, 1]
+    strides = [1] + strides + [1]
+    if dilations is not None:
+      dilations = [1] + dilations + [1]
 
     with self.test_session() as sess:
+      t1 = array_ops.placeholder(dtypes.float32, shape=filter_sizes)
+      t2 = array_ops.placeholder(dtypes.float32, shape=out_backprop_sizes)
       with self.test_scope():
-        t1 = array_ops.placeholder(dtypes.float32, shape=filter_sizes)
-        t2 = array_ops.placeholder(dtypes.float32, shape=out_backprop_sizes)
         out = gen_nn_ops.conv2d_backprop_input(
             input_sizes=input_sizes,
             filter=t1,
             out_backprop=t2,
             strides=strides,
+            dilations=dilations,
             padding=padding,
             data_format="NHWC")
       value = sess.run(out, {t1: x1, t2: x2})
-      self.assertArrayNear(expected, np.ravel(value), 1e-3)
+      self.assertAllEqual(input_sizes, value.shape)
+      self.assertAllClose(expected, np.ravel(value), 1e-3)
 
   def testConv2D1x1Filter(self):
     expected_output = [
@@ -160,7 +235,7 @@ class Conv2DBackpropInputTest(XLATestCase):
         input_sizes=[1, 4, 4, 3],
         filter_sizes=[1, 1, 3, 2],
         out_backprop_sizes=[1, 4, 4, 2],
-        stride=1,
+        strides=[1, 1],
         padding="VALID",
         expected=expected_output)
 
@@ -170,7 +245,7 @@ class Conv2DBackpropInputTest(XLATestCase):
         input_sizes=[1, 1, 5, 1],
         filter_sizes=[1, 2, 1, 1],
         out_backprop_sizes=[1, 1, 2, 1],
-        stride=3,
+        strides=[3, 3],
         padding="VALID",
         expected=expected_output)
 
@@ -180,7 +255,7 @@ class Conv2DBackpropInputTest(XLATestCase):
         input_sizes=[1, 1, 6, 1],
         filter_sizes=[1, 2, 1, 1],
         out_backprop_sizes=[1, 1, 2, 1],
-        stride=3,
+        strides=[3, 3],
         padding="VALID",
         expected=expected_output)
 
@@ -190,7 +265,7 @@ class Conv2DBackpropInputTest(XLATestCase):
         input_sizes=[1, 1, 7, 1],
         filter_sizes=[1, 2, 1, 1],
         out_backprop_sizes=[1, 1, 2, 1],
-        stride=3,
+        strides=[3, 3],
         padding="VALID",
         expected=expected_output)
 
@@ -200,7 +275,7 @@ class Conv2DBackpropInputTest(XLATestCase):
         input_sizes=[1, 2, 3, 1],
         filter_sizes=[2, 2, 1, 1],
         out_backprop_sizes=[1, 2, 3, 1],
-        stride=1,
+        strides=[1, 1],
         padding="SAME",
         expected=expected_output)
 
@@ -213,7 +288,7 @@ class Conv2DBackpropInputTest(XLATestCase):
         input_sizes=[1, 2, 3, 3],
         filter_sizes=[2, 2, 3, 3],
         out_backprop_sizes=[1, 1, 2, 3],
-        stride=1,
+        strides=[1, 1],
         padding="VALID",
         expected=expected_output)
 
@@ -226,7 +301,7 @@ class Conv2DBackpropInputTest(XLATestCase):
         input_sizes=[1, 2, 3, 3],
         filter_sizes=[2, 2, 3, 3],
         out_backprop_sizes=[1, 2, 3, 3],
-        stride=1,
+        strides=[1, 1],
         padding="SAME",
         expected=expected_output)
 
@@ -236,7 +311,7 @@ class Conv2DBackpropInputTest(XLATestCase):
         input_sizes=[1, 3, 3, 1],
         filter_sizes=[1, 2, 1, 1],
         out_backprop_sizes=[1, 3, 2, 1],
-        stride=1,
+        strides=[1, 1],
         padding="VALID",
         expected=expected_output)
 
@@ -246,7 +321,7 @@ class Conv2DBackpropInputTest(XLATestCase):
         input_sizes=[1, 3, 3, 1],
         filter_sizes=[1, 2, 1, 1],
         out_backprop_sizes=[1, 3, 3, 1],
-        stride=1,
+        strides=[1, 1],
         padding="SAME",
         expected=expected_output)
 
@@ -256,7 +331,7 @@ class Conv2DBackpropInputTest(XLATestCase):
         input_sizes=[1, 3, 5, 1],
         filter_sizes=[1, 3, 1, 1],
         out_backprop_sizes=[1, 2, 2, 1],
-        stride=2,
+        strides=[2, 2],
         padding="VALID",
         expected=expected_output)
 
@@ -266,15 +341,76 @@ class Conv2DBackpropInputTest(XLATestCase):
         input_sizes=[1, 2, 3, 1],
         filter_sizes=[2, 2, 1, 1],
         out_backprop_sizes=[1, 1, 2, 1],
-        stride=2,
+        strides=[2, 2],
         padding="SAME",
         expected=expected_output)
 
+  def testConv2D2x2Depth3ValidBackpropInputStride1x1Dilation2x1(self):
+    self._VerifyValues(
+        input_sizes=[1, 3, 6, 1],
+        filter_sizes=[2, 2, 1, 1],
+        out_backprop_sizes=[1, 1, 5, 1],
+        strides=[1, 1],
+        dilations=[2, 1],
+        padding="VALID",
+        expected=[1, 4, 7, 10, 13, 10, 0, 0, 0, 0, 0, 0, 3, 10, 17, 24, 31, 20])
+
+  def testConv2D2x2Depth1ValidBackpropInputDilation1x2(self):
+    self._VerifyValues(
+        input_sizes=[1, 2, 3, 1],
+        filter_sizes=[2, 2, 1, 1],
+        out_backprop_sizes=[1, 1, 1, 1],
+        strides=[1, 1],
+        dilations=[1, 2],
+        padding="VALID",
+        expected=[1, 0, 2, 3, 0, 4])
+
+  def testConv2DEmptyBackpropInputDilation1x2(self):
+    self._VerifyValues(
+        input_sizes=[0, 2, 3, 1],
+        filter_sizes=[2, 2, 1, 1],
+        out_backprop_sizes=[0, 1, 1, 1],
+        strides=[1, 1],
+        dilations=[1, 2],
+        padding="VALID",
+        expected=np.zeros([0]))
+
+  def testConv2D2x2Depth3ValidBackpropInputDilation2x1(self):
+    # The GPU version of this test is not very stable. So adjusting the
+    # error threshold to 1e-4.
+    self._VerifyValues(
+        input_sizes=[1, 3, 2, 3],
+        filter_sizes=[2, 2, 3, 3],
+        out_backprop_sizes=[1, 1, 1, 3],
+        strides=[1, 1],
+        dilations=[2, 1],
+        padding="VALID",
+        expected=[
+            14, 32, 50, 68, 86, 104, 0, 0, 0, 0, 0, 0, 122, 140, 158, 176, 194,
+            212
+        ])
+
+  def testConv2DKernelSizeMatchesInputSizeBackpropInputDilation2x2(self):
+    self._VerifyValues(
+        input_sizes=[1, 3, 3, 1],
+        filter_sizes=[2, 2, 1, 2],
+        out_backprop_sizes=[1, 1, 1, 2],
+        strides=[1, 1],
+        dilations=[2, 2],
+        padding="VALID",
+        expected=[5, 0, 11, 0, 0, 0, 17, 0, 23])
+
 
 class Conv2DBackpropFilterTest(XLATestCase):
 
-  def _VerifyValues(self, input_sizes, filter_sizes, out_backprop_sizes, stride,
-                    padding, expected):
+  def _VerifyValues(self,
+                    input_sizes=None,
+                    filter_sizes=None,
+                    out_backprop_sizes=None,
+                    strides=None,
+                    dilations=None,
+                    padding=None,
+                    expected=None):
     """Tests that gen_nn_ops.conv2d_backprop_filter produces the right output.
 
     Args:
@@ -283,7 +419,8 @@ class Conv2DBackpropFilterTest(XLATestCase):
       filter_sizes: Filter tensor dimensions in
         [kernel_rows, kernel_cols, input_depth, output_depth].
       out_backprop_sizes: Output gradients tensor dimensions.
-      stride: Stride.
+      strides: Stride.
+      dilations: Dilations.
       padding: Padding type.
       expected: Expected output.
     """
@@ -293,22 +430,26 @@ class Conv2DBackpropFilterTest(XLATestCase):
     x1 = np.arange(1, total_size_1 + 1, dtype=np.float32).reshape(input_sizes)
     x2 = np.arange(
         1, total_size_2 + 1, dtype=np.float32).reshape(out_backprop_sizes)
-    strides = [1, stride, stride, 1]
+    strides = [1] + strides + [1]
+    if dilations is not None:
+      dilations = [1] + dilations + [1]
 
     with self.test_session() as sess:
+      t1 = array_ops.placeholder(dtypes.float32, shape=input_sizes)
+      t2 = array_ops.placeholder(dtypes.float32, shape=out_backprop_sizes)
       with self.test_scope():
-        t1 = array_ops.placeholder(dtypes.float32, shape=input_sizes)
-        t2 = array_ops.placeholder(dtypes.float32, shape=out_backprop_sizes)
         tensor = gen_nn_ops.conv2d_backprop_filter(
             input=t1,
             filter_sizes=filter_sizes,
             out_backprop=t2,
             strides=strides,
+            dilations=dilations,
             padding=padding,
             data_format="NHWC")
 
       value = sess.run(tensor, {t1: x1, t2: x2})
-      self.assertArrayNear(expected, np.ravel(value), 1e-3)
+      self.assertAllEqual(filter_sizes, value.shape)
+      self.assertAllClose(expected, np.ravel(value), 1e-3)
 
   def testConv2D1x1Filter(self):
     expected_output = [8056, 8432, 8312, 8704, 8568, 8976]
@@ -316,7 +457,7 @@ class Conv2DBackpropFilterTest(XLATestCase):
         input_sizes=[1, 4, 4, 3],
         filter_sizes=[1, 1, 3, 2],
         out_backprop_sizes=[1, 4, 4, 2],
-        stride=1,
+        strides=[1, 1],
         padding="VALID",
         expected=expected_output)
 
@@ -326,7 +467,7 @@ class Conv2DBackpropFilterTest(XLATestCase):
         input_sizes=[1, 3, 3, 1],
         filter_sizes=[1, 2, 1, 1],
         out_backprop_sizes=[1, 3, 2, 1],
-        stride=1,
+        strides=[1, 1],
         padding="VALID",
         expected=expected_output)
 
@@ -336,7 +477,7 @@ class Conv2DBackpropFilterTest(XLATestCase):
         input_sizes=[1, 2, 3, 1],
         filter_sizes=[2, 2, 1, 1],
         out_backprop_sizes=[1, 1, 2, 1],
-        stride=1,
+        strides=[1, 1],
         padding="VALID",
         expected=expected_output)
 
@@ -350,7 +491,7 @@ class Conv2DBackpropFilterTest(XLATestCase):
         input_sizes=[1, 2, 3, 3],
         filter_sizes=[2, 2, 3, 3],
         out_backprop_sizes=[1, 1, 2, 3],
-        stride=1,
+        strides=[1, 1],
         padding="VALID",
         expected=expected_output)
 
@@ -360,7 +501,7 @@ class Conv2DBackpropFilterTest(XLATestCase):
         input_sizes=[1, 1, 5, 1],
         filter_sizes=[1, 2, 1, 1],
         out_backprop_sizes=[1, 1, 2, 1],
-        stride=3,
+        strides=[3, 3],
         padding="VALID",
         expected=expected_output)
 
@@ -370,7 +511,7 @@ class Conv2DBackpropFilterTest(XLATestCase):
         input_sizes=[1, 1, 6, 1],
         filter_sizes=[1, 2, 1, 1],
         out_backprop_sizes=[1, 1, 2, 1],
-        stride=3,
+        strides=[3, 3],
         padding="VALID",
         expected=expected_output)
 
@@ -380,7 +521,7 @@ class Conv2DBackpropFilterTest(XLATestCase):
         input_sizes=[1, 1, 7, 1],
         filter_sizes=[1, 2, 1, 1],
         out_backprop_sizes=[1, 1, 2, 1],
-        stride=3,
+        strides=[3, 3],
         padding="VALID",
         expected=expected_output)
 
@@ -390,7 +531,7 @@ class Conv2DBackpropFilterTest(XLATestCase):
         input_sizes=[1, 1, 4, 1],
         filter_sizes=[1, 3, 1, 1],
         out_backprop_sizes=[1, 1, 2, 1],
-        stride=1,
+        strides=[1, 1],
         padding="VALID",
         expected=expected_output)
 
@@ -400,7 +541,7 @@ class Conv2DBackpropFilterTest(XLATestCase):
         input_sizes=[1, 1, 4, 1],
         filter_sizes=[1, 3, 1, 1],
         out_backprop_sizes=[1, 1, 4, 1],
-        stride=1,
+        strides=[1, 1],
         padding="SAME",
         expected=expected_output)
 
@@ -410,7 +551,7 @@ class Conv2DBackpropFilterTest(XLATestCase):
         input_sizes=[1, 1, 4, 1],
         filter_sizes=[1, 3, 1, 1],
         out_backprop_sizes=[1, 1, 2, 1],
-        stride=2,
+        strides=[2, 2],
         padding="SAME",
         expected=expected_output)
 
@@ -420,7 +561,7 @@ class Conv2DBackpropFilterTest(XLATestCase):
         input_sizes=[1, 2, 3, 1],
         filter_sizes=[2, 2, 1, 1],
         out_backprop_sizes=[1, 2, 3, 1],
-        stride=1,
+        strides=[1, 1],
         padding="SAME",
         expected=expected_output)
 
@@ -430,7 +571,7 @@ class Conv2DBackpropFilterTest(XLATestCase):
         input_sizes=[1, 3, 5, 1],
         filter_sizes=[1, 3, 1, 1],
         out_backprop_sizes=[1, 2, 2, 1],
-        stride=2,
+        strides=[2, 2],
         padding="VALID",
         expected=expected_output)
 
@@ -440,10 +581,64 @@ class Conv2DBackpropFilterTest(XLATestCase):
         input_sizes=[1, 2, 3, 1],
         filter_sizes=[2, 2, 1, 1],
         out_backprop_sizes=[1, 1, 2, 1],
-        stride=2,
+        strides=[2, 2],
         padding="SAME",
         expected=expected_output)
 
+  def testConv2D2x2Depth3ValidBackpropFilterStride1x1Dilation2x1(self):
+    self._VerifyValues(
+        input_sizes=[1, 3, 6, 1],
+        filter_sizes=[2, 2, 1, 1],
+        out_backprop_sizes=[1, 1, 5, 1],
+        strides=[1, 1],
+        dilations=[2, 1],
+        padding="VALID",
+        expected=[55, 70, 235, 250])
+
+  def testConv2D2x2Depth1ValidBackpropFilterDilation1x2(self):
+    self._VerifyValues(
+        input_sizes=[1, 2, 3, 1],
+        filter_sizes=[2, 2, 1, 1],
+        out_backprop_sizes=[1, 1, 1, 1],
+        strides=[1, 1],
+        dilations=[1, 2],
+        padding="VALID",
+        expected=[1, 3, 4, 6])
+
+  def testConv2DEmptyBackpropFilterDilation1x2(self):
+    self._VerifyValues(
+        input_sizes=[1, 2, 3, 1],
+        filter_sizes=[2, 2, 1, 0],
+        out_backprop_sizes=[1, 1, 1, 0],
+        strides=[1, 1],
+        dilations=[1, 2],
+        padding="VALID",
+        expected=np.zeros([0]))
+
+  def testConv2D2x2Depth3ValidBackpropFilterDilation2x2(self):
+    self._VerifyValues(
+        input_sizes=[1, 3, 4, 3],
+        filter_sizes=[2, 2, 3, 3],
+        out_backprop_sizes=[1, 1, 2, 3],
+        strides=[1, 1],
+        dilations=[2, 2],
+        padding="VALID",
+        expected=[
+            17, 22, 27, 22, 29, 36, 27, 36, 45, 47, 64, 81, 52, 71, 90, 57, 78,
+            99, 137, 190, 243, 142, 197, 252, 147, 204, 261, 167, 232, 297, 172,
+            239, 306, 177, 246, 315
+        ])
+
+  def testConv2DKernelSizeMatchesInputSizeBackpropFilterDilation2x2(self):
+    self._VerifyValues(
+        input_sizes=[1, 3, 3, 1],
+        filter_sizes=[2, 2, 1, 2],
+        out_backprop_sizes=[1, 1, 1, 2],
+        strides=[1, 1],
+        dilations=[2, 2],
+        padding="VALID",
+        expected=[1, 2, 3, 6, 7, 14, 9, 18])
+
 
 if __name__ == "__main__":
   googletest.main()
diff --git a/tensorflow/compiler/tests/fft_test.py b/tensorflow/compiler/tests/fft_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..afb5fa4bb4fefe5bc2ecded826143ffc83c2b559
--- /dev/null
+++ b/tensorflow/compiler/tests/fft_test.py
@@ -0,0 +1,204 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for FFT via the XLA JIT."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import itertools
+
+import numpy as np
+import scipy.signal as sps
+
+from tensorflow.compiler.tests.xla_test import XLATestCase
+from tensorflow.contrib.signal.python.ops import spectral_ops as signal
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import spectral_ops
+from tensorflow.python.platform import googletest
+
+BATCH_DIMS = (3, 5)
+RTOL = 0.02  # Eigen/cuFFT differ widely from np, especially for FFT3D
+ATOL = 1e-3
+
+
+def pick_10(x):
+  x = list(x)
+  np.random.seed(123)
+  np.random.shuffle(x)
+  return x[:10]
+
+
+def to_32bit(x):
+  if x.dtype == np.complex128:
+    return x.astype(np.complex64)
+  if x.dtype == np.float64:
+    return x.astype(np.float32)
+  return x
+
+
+POWS_OF_2 = 2**np.arange(3, 12)
+INNER_DIMS_1D = list((x,) for x in POWS_OF_2)
+POWS_OF_2 = 2**np.arange(3, 8)  # To avoid OOM on GPU.
+INNER_DIMS_2D = pick_10(itertools.product(POWS_OF_2, POWS_OF_2))
+INNER_DIMS_3D = pick_10(itertools.product(POWS_OF_2, POWS_OF_2, POWS_OF_2))
+
+
+class FFTTest(XLATestCase):
+
+  def _VerifyFftMethod(self, inner_dims, complex_to_input, input_to_expected,
+                       tf_method):
+    for indims in inner_dims:
+      print("nfft =", indims)
+      shape = BATCH_DIMS + indims
+      data = np.arange(np.prod(shape) * 2) / np.prod(indims)
+      np.random.seed(123)
+      np.random.shuffle(data)
+      data = np.reshape(data.astype(np.float32).view(np.complex64), shape)
+      data = to_32bit(complex_to_input(data))
+      expected = to_32bit(input_to_expected(data))
+      with self.test_session() as sess:
+        with self.test_scope():
+          ph = array_ops.placeholder(
+              dtypes.as_dtype(data.dtype), shape=data.shape)
+          out = tf_method(ph)
+        value = sess.run(out, {ph: data})
+        self.assertAllClose(expected, value, rtol=RTOL, atol=ATOL)
+
+  def testContribSignalSTFT(self):
+    ws = 512
+    hs = 128
+    dims = (ws * 20,)
+    shape = BATCH_DIMS + dims
+    data = np.arange(np.prod(shape)) / np.prod(dims)
+    np.random.seed(123)
+    np.random.shuffle(data)
+    data = np.reshape(data.astype(np.float32), shape)
+    window = sps.get_window("hann", ws)
+    expected = sps.stft(
+        data, nperseg=ws, noverlap=ws - hs, boundary=None, window=window)[2]
+    expected = np.swapaxes(expected, -1, -2)
+    expected *= window.sum()  # scipy divides by window sum
+    with self.test_session() as sess:
+      with self.test_scope():
+        ph = array_ops.placeholder(
+            dtypes.as_dtype(data.dtype), shape=data.shape)
+        out = signal.stft(ph, ws, hs)
+
+      value = sess.run(out, {ph: data})
+      self.assertAllClose(expected, value, rtol=RTOL, atol=ATOL)
+
+  def testFFT(self):
+    self._VerifyFftMethod(INNER_DIMS_1D, lambda x: x, np.fft.fft,
+                          spectral_ops.fft)
+
+  def testFFT2D(self):
+    self._VerifyFftMethod(INNER_DIMS_2D, lambda x: x, np.fft.fft2,
+                          spectral_ops.fft2d)
+
+  def testFFT3D(self):
+    self._VerifyFftMethod(INNER_DIMS_3D, lambda x: x,
+                          lambda x: np.fft.fftn(x, axes=(-3, -2, -1)),
+                          spectral_ops.fft3d)
+
+  def testIFFT(self):
+    self._VerifyFftMethod(INNER_DIMS_1D, lambda x: x, np.fft.ifft,
+                          spectral_ops.ifft)
+
+  def testIFFT2D(self):
+    self._VerifyFftMethod(INNER_DIMS_2D, lambda x: x, np.fft.ifft2,
+                          spectral_ops.ifft2d)
+
+  def testIFFT3D(self):
+    self._VerifyFftMethod(INNER_DIMS_3D, lambda x: x,
+                          lambda x: np.fft.ifftn(x, axes=(-3, -2, -1)),
+                          spectral_ops.ifft3d)
+
+  def testRFFT(self):
+    self._VerifyFftMethod(
+        INNER_DIMS_1D, np.real, lambda x: np.fft.rfft(x, n=x.shape[-1]),
+        lambda x: spectral_ops.rfft(x, fft_length=[x.shape[-1].value]))
+
+  def testRFFT2D(self):
+
+    def _tf_fn(x):
+      return spectral_ops.rfft2d(
+          x, fft_length=[x.shape[-2].value, x.shape[-1].value])
+
+    self._VerifyFftMethod(
+        INNER_DIMS_2D, np.real,
+        lambda x: np.fft.rfft2(x, s=[x.shape[-2], x.shape[-1]]), _tf_fn)
+
+  def testRFFT3D(self):
+
+    def _to_expected(x):
+      return np.fft.rfftn(
+          x, axes=(-3, -2, -1), s=[x.shape[-3], x.shape[-2], x.shape[-1]])
+
+    def _tf_fn(x):
+      return spectral_ops.rfft3d(
+          x,
+          fft_length=[x.shape[-3].value, x.shape[-2].value, x.shape[-1].value])
+
+    self._VerifyFftMethod(INNER_DIMS_3D, np.real, _to_expected, _tf_fn)
+
+  def testIRFFT(self):
+
+    def _tf_fn(x):
+      return spectral_ops.irfft(x, fft_length=[2 * (x.shape[-1].value - 1)])
+
+    self._VerifyFftMethod(
+        INNER_DIMS_1D, lambda x: np.fft.rfft(np.real(x), n=x.shape[-1]),
+        lambda x: np.fft.irfft(x, n=2 * (x.shape[-1] - 1)), _tf_fn)
+
+  def testIRFFT2D(self):
+
+    def _tf_fn(x):
+      return spectral_ops.irfft2d(
+          x, fft_length=[x.shape[-2].value, 2 * (x.shape[-1].value - 1)])
+
+    self._VerifyFftMethod(
+        INNER_DIMS_2D,
+        lambda x: np.fft.rfft2(np.real(x), s=[x.shape[-2], x.shape[-1]]),
+        lambda x: np.fft.irfft2(x, s=[x.shape[-2], 2 * (x.shape[-1] - 1)]),
+        _tf_fn)
+
+  def testIRFFT3D(self):
+
+    def _to_input(x):
+      return np.fft.rfftn(
+          np.real(x),
+          axes=(-3, -2, -1),
+          s=[x.shape[-3], x.shape[-2], x.shape[-1]])
+
+    def _to_expected(x):
+      return np.fft.irfftn(
+          x,
+          axes=(-3, -2, -1),
+          s=[x.shape[-3], x.shape[-2], 2 * (x.shape[-1] - 1)])
+
+    def _tf_fn(x):
+      return spectral_ops.irfft3d(
+          x,
+          fft_length=[
+              x.shape[-3].value, x.shape[-2].value, 2 * (x.shape[-1].value - 1)
+          ])
+
+    self._VerifyFftMethod(INNER_DIMS_3D, _to_input, _to_expected, _tf_fn)
+
+
+if __name__ == "__main__":
+  googletest.main()
diff --git a/tensorflow/compiler/tests/ftrl_test.py b/tensorflow/compiler/tests/ftrl_test.py
index 7e3871312c86530b6d3cb0bbacc16c25d3469832..f9db4cf2017c0b4b6dc0cfeeda6dca7bb9d14f19 100644
--- a/tensorflow/compiler/tests/ftrl_test.py
+++ b/tensorflow/compiler/tests/ftrl_test.py
@@ -161,9 +161,9 @@ class FtrlOptimizerTest(XLATestCase):
           ftrl_update.run()
 
         # Validate updated params
-        self.assertAllClose(
+        self.assertAllCloseAccordingToType(
             np.array([-2.55607247, -3.98729396]), var0.eval(), 1e-5, 1e-5)
-        self.assertAllClose(
+        self.assertAllCloseAccordingToType(
             np.array([-0.28232238, -0.56096673]), var1.eval(), 1e-5, 1e-5)
 
   def testFtrlWithL1(self):
@@ -189,10 +189,10 @@ class FtrlOptimizerTest(XLATestCase):
           ftrl_update.run()
 
         # Validate updated params
-        self.assertAllClose(np.array([-7.66718769, -10.91273689]), var0.eval(),
-                            rtol=1e-4)
-        self.assertAllClose(np.array([-0.93460727, -1.86147261]), var1.eval(),
-                            rtol=1e-4)
+        self.assertAllCloseAccordingToType(
+            np.array([-7.66718769, -10.91273689]), var0.eval(), rtol=1e-4)
+        self.assertAllCloseAccordingToType(
+            np.array([-0.93460727, -1.86147261]), var1.eval(), rtol=1e-4)
 
   def testFtrlWithL1_L2(self):
     for dtype in self.float_types:
@@ -217,10 +217,10 @@ class FtrlOptimizerTest(XLATestCase):
           ftrl_update.run()
 
         # Validate updated params
-        self.assertAllClose(np.array([-0.24059935, -0.46829352]), var0.eval(),
-                            rtol=1e-5)
-        self.assertAllClose(np.array([-0.02406147, -0.04830509]), var1.eval(),
-                            rtol=1e-5)
+        self.assertAllCloseAccordingToType(
+            np.array([-0.24059935, -0.46829352]), var0.eval(), rtol=1e-5)
+        self.assertAllCloseAccordingToType(
+            np.array([-0.02406147, -0.04830509]), var1.eval(), rtol=1e-5)
 
   def testFtrlWithL1_L2_L2Shrinkage(self):
     """Test the new FTRL op with support for l2 shrinkage.
@@ -244,18 +244,18 @@ class FtrlOptimizerTest(XLATestCase):
         ftrl_update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
         variables.global_variables_initializer().run()
         # Fetch params to validate initial values
-        self.assertAllClose([1.0, 2.0], var0.eval())
-        self.assertAllClose([4.0, 3.0], var1.eval())
+        self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval())
+        self.assertAllCloseAccordingToType([4.0, 3.0], var1.eval())
 
         # Run 10 steps FTRL
         for _ in range(10):
           ftrl_update.run()
 
         # Validate updated params
-        self.assertAllClose(np.array([-0.21931979, -0.40642974]), var0.eval(),
-                            rtol=1e-4)
-        self.assertAllClose(np.array([-0.0282721, -0.07188385]), var1.eval(),
-                            rtol=1e-4)
+        self.assertAllCloseAccordingToType(
+            np.array([-0.21931979, -0.40642974]), var0.eval(), rtol=1e-4)
+        self.assertAllCloseAccordingToType(
+            np.array([-0.0282721, -0.07188385]), var1.eval(), rtol=1e-4)
 
   # When variables are initialized with Zero, FTRL-Proximal has two properties:
   # 1. Without L1&L2 but with fixed learning rate, FTRL-Proximal is identical
@@ -272,8 +272,8 @@ class FtrlOptimizerTest(XLATestCase):
       with self.test_session(), self.test_scope():
         val2, val3 = self.equivAdagradTest_AdagradPart(steps, dtype)
 
-    self.assertAllClose(val0, val2, rtol=1e-4)
-    self.assertAllClose(val1, val3, rtol=1e-4)
+    self.assertAllCloseAccordingToType(val0, val2, rtol=1e-4)
+    self.assertAllCloseAccordingToType(val1, val3, rtol=1e-4)
 
   def testEquivGradientDescentwithoutRegularization(self):
     steps = 5
@@ -284,8 +284,8 @@ class FtrlOptimizerTest(XLATestCase):
         val2, val3 = self.equivGradientDescentTest_GradientDescentPart(
             steps, dtype)
 
-    self.assertAllClose(val0, val2, rtol=1e-5)
-    self.assertAllClose(val1, val3, rtol=1e-5)
+    self.assertAllCloseAccordingToType(val0, val2, rtol=1e-5)
+    self.assertAllCloseAccordingToType(val1, val3, rtol=1e-5)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/compiler/tests/fused_batchnorm_test.py b/tensorflow/compiler/tests/fused_batchnorm_test.py
index 00a9c9a65ba03d099581a3ee0dbe32c33e111231..a80d69fa5f5099b8a8b67df0da9c92b957e9d194 100644
--- a/tensorflow/compiler/tests/fused_batchnorm_test.py
+++ b/tensorflow/compiler/tests/fused_batchnorm_test.py
@@ -155,7 +155,7 @@ class FusedBatchNormTest(XLATestCase):
   def testLearningWithGradientChecker(self):
     self._testLearning(True)
 
-  def testGradient(self):
+  def testGradientTraining(self):
     # TODO(b/64270657): Use gradient_checker here in addition to comparing with
     # this reference implementation.
     channel = 3
@@ -175,7 +175,7 @@ class FusedBatchNormTest(XLATestCase):
       var = array_ops.placeholder(np.float32, shape=scale_shape, name="var")
       scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale")
       grad_x, grad_scale, grad_offset, _, _ = gen_nn_ops.fused_batch_norm_grad(
-          grad, x, scale, mean, var, data_format="NHWC")
+          grad, x, scale, mean, var, data_format="NHWC", is_training=True)
 
       grad_x_val, grad_scale_val, grad_offset_val = sess.run(
           [grad_x, grad_scale, grad_offset], {
@@ -193,6 +193,53 @@ class FusedBatchNormTest(XLATestCase):
       self.assertAllClose(grad_scale_val, grad_scale_ref, atol=1e-2)
       self.assertAllClose(grad_offset_val, grad_offset_ref, atol=1e-3)
 
+  def testGradientInference(self):
+    # TODO(b/64270657): Use gradient_checker here in addition to comparing with
+    # this reference implementation.
+    channel = 3
+    x_shape = [2, 2, 6, channel]
+    scale_shape = [channel]
+    grad_val = np.random.random_sample(x_shape).astype(np.float32)
+    x_val = np.random.random_sample(x_shape).astype(np.float32)
+    scale_val = np.random.random_sample(scale_shape).astype(np.float32)
+    mean_val = np.random.random_sample(scale_shape).astype(np.float32)
+    var_val = np.random.random_sample(scale_shape).astype(np.float32)
+
+    with self.test_session() as sess, self.test_scope():
+      grad = array_ops.placeholder(np.float32, shape=x_shape, name="grad")
+      x = array_ops.placeholder(np.float32, shape=x_shape, name="x")
+      mean = array_ops.placeholder(np.float32, shape=scale_shape, name="mean")
+      var = array_ops.placeholder(np.float32, shape=scale_shape, name="var")
+      scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale")
+      with self.test_scope():
+        out = gen_nn_ops.fused_batch_norm_grad(
+            grad, x, scale, mean, var, data_format="NHWC", is_training=False)
+        grad_x, grad_scale, grad_offset, _, _ = out
+
+      ref_x, ref_scale, ref_offset, _, _ = gen_nn_ops.fused_batch_norm_grad(
+          grad, x, scale, mean, var, data_format="NHWC", is_training=False)
+
+      grad_x_val, grad_scale_val, grad_offset_val, = sess.run(
+          [grad_x, grad_scale, grad_offset], {
+              grad: grad_val,
+              x: x_val,
+              mean: mean_val,
+              var: var_val,
+              scale: scale_val
+          })
+      grad_x_ref, grad_scale_ref, grad_offset_ref, = sess.run(
+          [ref_x, ref_scale, ref_offset], {
+              grad: grad_val,
+              x: x_val,
+              mean: mean_val,
+              var: var_val,
+              scale: scale_val
+          })
+
+      self.assertAllClose(grad_x_val, grad_x_ref, atol=1e-2)
+      self.assertAllClose(grad_scale_val, grad_scale_ref, atol=1e-2)
+      self.assertAllClose(grad_offset_val, grad_offset_ref, atol=1e-3)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/compiler/tests/image_ops_test.py b/tensorflow/compiler/tests/image_ops_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..e84b790037c3b341a01c0a4d295e36890ea1f28e
--- /dev/null
+++ b/tensorflow/compiler/tests/image_ops_test.py
@@ -0,0 +1,547 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for image ops."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import colorsys
+import math
+
+import numpy as np
+
+from six.moves import xrange  # pylint: disable=redefined-builtin
+
+from tensorflow.compiler.tests.xla_test import XLATestCase
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_image_ops
+from tensorflow.python.ops import image_ops
+from tensorflow.python.platform import test
+
+
+class RGBToHSVTest(XLATestCase):
+
+  def testBatch(self):
+    # Build an arbitrary RGB image
+    np.random.seed(7)
+    batch_size = 5
+    shape = (batch_size, 2, 7, 3)
+
+    for nptype in self.float_types:
+      inp = np.random.rand(*shape).astype(nptype)
+
+      # Convert to HSV and back, as a batch and individually
+      with self.test_session() as sess:
+        batch0 = array_ops.placeholder(nptype, shape=shape)
+        with self.test_scope():
+          batch1 = image_ops.rgb_to_hsv(batch0)
+          batch2 = image_ops.hsv_to_rgb(batch1)
+        split0 = array_ops.unstack(batch0)
+        with self.test_scope():
+          split1 = list(map(image_ops.rgb_to_hsv, split0))
+          split2 = list(map(image_ops.hsv_to_rgb, split1))
+        join1 = array_ops.stack(split1)
+        join2 = array_ops.stack(split2)
+        batch1, batch2, join1, join2 = sess.run([batch1, batch2, join1, join2],
+                                                {
+                                                    batch0: inp
+                                                })
+
+      # Verify that processing batch elements together is the same as separate
+      self.assertAllClose(batch1, join1)
+      self.assertAllClose(batch2, join2)
+      self.assertAllClose(batch2, inp)
+
+  def testRGBToHSVRoundTrip(self):
+    data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+    for nptype in self.float_types:
+      rgb_np = np.array(data, dtype=nptype).reshape([2, 2, 3]) / 255.
+      with self.test_session():
+        placeholder = array_ops.placeholder(nptype)
+        with self.test_scope():
+          hsv = image_ops.rgb_to_hsv(placeholder)
+          rgb = image_ops.hsv_to_rgb(hsv)
+        rgb_tf = rgb.eval(feed_dict={placeholder: rgb_np})
+      self.assertAllClose(rgb_tf, rgb_np)
+
+  def testRGBToHSVNumpy(self):
+    """Tests the RGB to HSV conversion matches a reference implementation."""
+    for nptype in self.float_types:
+      rgb_flat = np.random.random(64 * 3).reshape((64, 3)).astype(nptype)
+      rgb_np = rgb_flat.reshape(4, 4, 4, 3)
+      hsv_np = np.array([colorsys.rgb_to_hsv(r, g, b) for r, g, b in rgb_flat])
+      hsv_np = hsv_np.reshape(4, 4, 4, 3)
+      with self.test_session():
+        placeholder = array_ops.placeholder(nptype)
+        with self.test_scope():
+          hsv_op = image_ops.rgb_to_hsv(placeholder)
+        hsv_tf = hsv_op.eval(feed_dict={placeholder: rgb_np})
+      self.assertAllClose(hsv_tf, hsv_np)
+
+
+class AdjustContrastTest(XLATestCase):
+
+  def _testContrast(self, x_np, y_np, contrast_factor):
+    with self.test_session():
+      x = array_ops.placeholder(x_np.dtype, shape=x_np.shape)
+      flt_x = image_ops.convert_image_dtype(x, dtypes.float32)
+      with self.test_scope():
+        y = image_ops.adjust_contrast(flt_x, contrast_factor)
+      y = image_ops.convert_image_dtype(y, x.dtype, saturate=True)
+      y_tf = y.eval({x: x_np})
+      self.assertAllClose(y_tf, y_np, 1e-6)
+
+  def testFloatContrast(self):
+    x_shape = [1, 2, 2, 3]
+    x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+    x_np = np.array(x_data, dtype=np.float32).reshape(x_shape) / 255.
+
+    y_data = [
+        -45.25, -90.75, -92.5, 62.75, 169.25, 333.5, 28.75, -84.75, 349.5,
+        134.75, 409.25, -116.5
+    ]
+    y_np = np.array(y_data, dtype=np.float32).reshape(x_shape) / 255.
+
+    self._testContrast(x_np, y_np, contrast_factor=2.0)
+
+  def testBatchContrast(self):
+    x_shape = [2, 1, 2, 3]
+    x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+    x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+
+    y_data = [0, 0, 0, 81, 200, 255, 10, 0, 255, 116, 255, 0]
+    y_np = np.array(y_data, dtype=np.uint8).reshape(x_shape)
+
+    self._testContrast(x_np, y_np, contrast_factor=2.0)
+
+  def _adjustContrastNp(self, x_np, contrast_factor):
+    mean = np.mean(x_np, (1, 2), keepdims=True)
+    y_np = mean + contrast_factor * (x_np - mean)
+    return y_np
+
+  def _adjustContrastTf(self, x_np, contrast_factor):
+    with self.test_session():
+      x = array_ops.placeholder(np.float32)
+      with self.test_scope():
+        y = image_ops.adjust_contrast(x, contrast_factor)
+      y_tf = y.eval({x: x_np})
+    return y_tf
+
+  def testRandomContrast(self):
+    x_shapes = [
+        [1, 2, 2, 3],
+        [2, 1, 2, 3],
+        [1, 2, 2, 3],
+        [2, 5, 5, 3],
+        [2, 1, 1, 3],
+    ]
+    for x_shape in x_shapes:
+      x_np = np.random.rand(*x_shape) * 255.
+      contrast_factor = np.random.rand() * 2.0 + 0.1
+      y_np = self._adjustContrastNp(x_np, contrast_factor)
+      y_tf = self._adjustContrastTf(x_np, contrast_factor)
+      self.assertAllClose(y_tf, y_np, rtol=1e-5, atol=1e-5)
+
+
+class AdjustHueTest(XLATestCase):
+
+  def testAdjustNegativeHue(self):
+    x_shape = [2, 2, 3]
+    x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+    x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+
+    delta = -0.25
+    y_data = [0, 13, 1, 54, 226, 59, 8, 234, 150, 255, 39, 1]
+    y_np = np.array(y_data, dtype=np.uint8).reshape(x_shape)
+
+    with self.test_session():
+      x = array_ops.placeholder(x_np.dtype, shape=x_shape)
+      flt_x = image_ops.convert_image_dtype(x, dtypes.float32)
+      with self.test_scope():
+        y = gen_image_ops.adjust_hue(flt_x, delta)
+      y = image_ops.convert_image_dtype(y, x.dtype, saturate=True)
+      y_tf = y.eval({x: x_np})
+      self.assertAllEqual(y_tf, y_np)
+
+  def testAdjustPositiveHue(self):
+    x_shape = [2, 2, 3]
+    x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+    x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+
+    delta = 0.25
+    y_data = [13, 0, 11, 226, 54, 221, 234, 8, 92, 1, 217, 255]
+    y_np = np.array(y_data, dtype=np.uint8).reshape(x_shape)
+
+    with self.test_session():
+      x = array_ops.placeholder(x_np.dtype, shape=x_shape)
+      flt_x = image_ops.convert_image_dtype(x, dtypes.float32)
+      with self.test_scope():
+        y = gen_image_ops.adjust_hue(flt_x, delta)
+      y = image_ops.convert_image_dtype(y, x.dtype, saturate=True)
+      y_tf = y.eval({x: x_np})
+      self.assertAllEqual(y_tf, y_np)
+
+  def testBatchAdjustHue(self):
+    x_shape = [2, 1, 2, 3]
+    x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+    x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+
+    delta = 0.25
+    y_data = [13, 0, 11, 226, 54, 221, 234, 8, 92, 1, 217, 255]
+    y_np = np.array(y_data, dtype=np.uint8).reshape(x_shape)
+
+    with self.test_session():
+      x = array_ops.placeholder(x_np.dtype, shape=x_shape)
+      flt_x = image_ops.convert_image_dtype(x, dtypes.float32)
+      with self.test_scope():
+        y = gen_image_ops.adjust_hue(flt_x, delta)
+      y = image_ops.convert_image_dtype(y, x.dtype, saturate=True)
+      y_tf = y.eval({x: x_np})
+      self.assertAllEqual(y_tf, y_np)
+
+  def _adjustHueNp(self, x_np, delta_h):
+    self.assertEqual(x_np.shape[-1], 3)
+    x_v = x_np.reshape([-1, 3])
+    y_v = np.ndarray(x_v.shape, dtype=x_v.dtype)
+    channel_count = x_v.shape[0]
+    for i in xrange(channel_count):
+      r = x_v[i][0]
+      g = x_v[i][1]
+      b = x_v[i][2]
+      h, s, v = colorsys.rgb_to_hsv(r, g, b)
+      h += delta_h
+      h = math.fmod(h + 10.0, 1.0)
+      r, g, b = colorsys.hsv_to_rgb(h, s, v)
+      y_v[i][0] = r
+      y_v[i][1] = g
+      y_v[i][2] = b
+    return y_v.reshape(x_np.shape)
+
+  def _adjustHueTf(self, x_np, delta_h):
+    with self.test_session():
+      x = array_ops.placeholder(dtypes.float32)
+      with self.test_scope():
+        y = gen_image_ops.adjust_hue(x, delta_h)
+      y_tf = y.eval({x: x_np})
+    return y_tf
+
+  def testAdjustRandomHue(self):
+    x_shapes = [
+        [2, 2, 3],
+        [4, 2, 3],
+        [2, 4, 3],
+        [2, 5, 3],
+        [1000, 1, 3],
+    ]
+    test_styles = [
+        "all_random",
+        "rg_same",
+        "rb_same",
+        "gb_same",
+        "rgb_same",
+    ]
+    for x_shape in x_shapes:
+      for test_style in test_styles:
+        x_np = np.random.rand(*x_shape) * 255.
+        delta_h = np.random.rand() * 2.0 - 1.0
+        if test_style == "all_random":
+          pass
+        elif test_style == "rg_same":
+          x_np[..., 1] = x_np[..., 0]
+        elif test_style == "rb_same":
+          x_np[..., 2] = x_np[..., 0]
+        elif test_style == "gb_same":
+          x_np[..., 2] = x_np[..., 1]
+        elif test_style == "rgb_same":
+          x_np[..., 1] = x_np[..., 0]
+          x_np[..., 2] = x_np[..., 0]
+        else:
+          raise AssertionError("Invalid test style: %s" % (test_style))
+        y_np = self._adjustHueNp(x_np, delta_h)
+        y_tf = self._adjustHueTf(x_np, delta_h)
+        self.assertAllClose(y_tf, y_np, rtol=2e-5, atol=1e-4)
+
+  def testInvalidShapes(self):
+    fused = False
+    if not fused:
+      # The tests are known to pass with the fused adjust_hue. We will enable
+      # them when the fused implementation is the default.
+      return
+    x_np = np.random.rand(2, 3) * 255.
+    delta_h = np.random.rand() * 2.0 - 1.0
+    fused = False
+    with self.assertRaisesRegexp(ValueError, "Shape must be at least rank 3"):
+      self._adjustHueTf(x_np, delta_h)
+    x_np = np.random.rand(4, 2, 4) * 255.
+    delta_h = np.random.rand() * 2.0 - 1.0
+    with self.assertRaisesOpError("input must have 3 channels"):
+      self._adjustHueTf(x_np, delta_h)
+
+
+class AdjustSaturationTest(XLATestCase):
+
+  def _adjust_saturation(self, image, saturation_factor):
+    image = ops.convert_to_tensor(image, name="image")
+    orig_dtype = image.dtype
+    flt_image = image_ops.convert_image_dtype(image, dtypes.float32)
+    with self.test_scope():
+      saturation_adjusted_image = gen_image_ops.adjust_saturation(
+          flt_image, saturation_factor)
+    return image_ops.convert_image_dtype(saturation_adjusted_image, orig_dtype)
+
+  def testHalfSaturation(self):
+    x_shape = [2, 2, 3]
+    x_rgb_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+    x_np = np.array(x_rgb_data, dtype=np.uint8).reshape(x_shape)
+
+    saturation_factor = 0.5
+    y_rgb_data = [6, 9, 13, 140, 180, 226, 135, 121, 234, 172, 255, 128]
+    y_np = np.array(y_rgb_data, dtype=np.uint8).reshape(x_shape)
+
+    with self.test_session():
+      x = array_ops.placeholder(x_np.dtype, shape=x_shape)
+      y = self._adjust_saturation(x, saturation_factor)
+      y_tf = y.eval({x: x_np})
+      self.assertAllEqual(y_tf, y_np)
+
+  def testTwiceSaturation(self):
+    x_shape = [2, 2, 3]
+    x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+    x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+
+    saturation_factor = 2.0
+    y_data = [0, 5, 13, 0, 106, 226, 30, 0, 234, 89, 255, 0]
+    y_np = np.array(y_data, dtype=np.uint8).reshape(x_shape)
+
+    with self.test_session():
+      x = array_ops.placeholder(x_np.dtype, shape=x_shape)
+      y = self._adjust_saturation(x, saturation_factor)
+      y_tf = y.eval({x: x_np})
+      self.assertAllEqual(y_tf, y_np)
+
+  def _adjustSaturationNp(self, x_np, scale):
+    self.assertEqual(x_np.shape[-1], 3)
+    x_v = x_np.reshape([-1, 3])
+    y_v = np.ndarray(x_v.shape, dtype=x_v.dtype)
+    channel_count = x_v.shape[0]
+    for i in xrange(channel_count):
+      r = x_v[i][0]
+      g = x_v[i][1]
+      b = x_v[i][2]
+      h, s, v = colorsys.rgb_to_hsv(r, g, b)
+      s *= scale
+      s = min(1.0, max(0.0, s))
+      r, g, b = colorsys.hsv_to_rgb(h, s, v)
+      y_v[i][0] = r
+      y_v[i][1] = g
+      y_v[i][2] = b
+    return y_v.reshape(x_np.shape)
+
+  def testAdjustRandomSaturation(self):
+    x_shapes = [
+        [2, 2, 3],
+        [4, 2, 3],
+        [2, 4, 3],
+        [2, 5, 3],
+        [1000, 1, 3],
+    ]
+    test_styles = [
+        "all_random",
+        "rg_same",
+        "rb_same",
+        "gb_same",
+        "rgb_same",
+    ]
+    with self.test_session():
+      for x_shape in x_shapes:
+        for test_style in test_styles:
+          x_np = np.random.rand(*x_shape) * 255.
+          scale = np.random.rand()
+          if test_style == "all_random":
+            pass
+          elif test_style == "rg_same":
+            x_np[..., 1] = x_np[..., 0]
+          elif test_style == "rb_same":
+            x_np[..., 2] = x_np[..., 0]
+          elif test_style == "gb_same":
+            x_np[..., 2] = x_np[..., 1]
+          elif test_style == "rgb_same":
+            x_np[..., 1] = x_np[..., 0]
+            x_np[..., 2] = x_np[..., 0]
+          else:
+            raise AssertionError("Invalid test style: %s" % (test_style))
+          y_baseline = self._adjustSaturationNp(x_np, scale)
+          x = array_ops.placeholder(dtypes.float32, shape=x_shape)
+          with self.test_scope():
+            y_fused = self._adjust_saturation(x,
+                                              scale).eval(feed_dict={
+                                                  x: x_np
+                                              })
+          self.assertAllClose(y_fused, y_baseline, rtol=2e-5, atol=1e-5)
+
+
+class ResizeBilinearTest(XLATestCase):
+
+  def _assertForwardOpMatchesExpected(self,
+                                      image_np,
+                                      target_shape,
+                                      expected=None):
+    if expected is None:
+      self.fail("expected must be specified")
+    with self.test_session() as sess, self.test_scope():
+      image = array_ops.placeholder(image_np.dtype)
+      resized = gen_image_ops.resize_bilinear(
+          image, target_shape, align_corners=True)
+      out = sess.run(resized, {image: image_np[np.newaxis, :, :, np.newaxis]})
+      self.assertAllClose(expected[np.newaxis, :, :, np.newaxis], out)
+
+  def _assertBackwardOpMatchesExpected(self,
+                                       grads_np,
+                                       input_shape=None,
+                                       dtype=None,
+                                       expected=None):
+    if input_shape is None:
+      self.fail("input_shape must be specified")
+    if expected is None:
+      self.fail("expected must be specified")
+    with self.test_session() as sess, self.test_scope():
+      dtype = dtype or np.float32
+      grads = array_ops.placeholder(np.float32)
+      resized = gen_image_ops._resize_bilinear_grad(
+          grads,
+          np.zeros([1, input_shape[0], input_shape[1], 1], dtype=dtype),
+          align_corners=True)
+      out = sess.run(resized, {grads: grads_np[np.newaxis, :, :, np.newaxis]})
+      self.assertAllClose(expected[np.newaxis, :, :, np.newaxis], out)
+
+  def testAlignCorners1x2To3x2(self):
+    for dtype in self.float_types:
+      self._assertForwardOpMatchesExpected(
+          np.array([[1, 2]], dtype=dtype), [3, 3],
+          expected=np.array(
+              [[1, 1.5, 2], [1, 1.5, 2], [1, 1.5, 2]], dtype=np.float32))
+
+  def testAlignCorners1x2To3x2Grad(self):
+    for dtype in self.float_types:
+      self._assertBackwardOpMatchesExpected(
+          np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float32),
+          input_shape=[1, 2],
+          dtype=dtype,
+          expected=np.array([[9, 12]], dtype=np.float32))
+
+  def testAlignCorners2x2To1x1(self):
+    for dtype in self.float_types:
+      self._assertForwardOpMatchesExpected(
+          np.array([[1, 2], [3, 4]], dtype=dtype), [1, 1],
+          expected=np.array([[1]], dtype=np.float32))
+
+  def testAlignCorners2x2To1x1Grad(self):
+    for dtype in self.float_types:
+      self._assertBackwardOpMatchesExpected(
+          np.array([[7]], dtype=np.float32),
+          input_shape=[2, 2],
+          dtype=dtype,
+          expected=np.array([[7, 0], [0, 0]], dtype=np.float32))
+
+  def testAlignCorners2x2To3x3(self):
+    for dtype in self.float_types:
+      self._assertForwardOpMatchesExpected(
+          np.array([[1, 2], [3, 4]], dtype=dtype), [3, 3],
+          expected=np.array(
+              [[1, 1.5, 2], [2, 2.5, 3], [3, 3.5, 4]], dtype=np.float32))
+
+  def testAlignCorners2x2To3x3Grad(self):
+    self._assertBackwardOpMatchesExpected(
+        np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.float32),
+        input_shape=[2, 2],
+        expected=np.array([[5.25, 8.25], [14.25, 17.25]], dtype=np.float32))
+
+  def testAlignCorners3x3To2x2(self):
+    for dtype in self.float_types:
+      self._assertForwardOpMatchesExpected(
+          np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=dtype), [2, 2],
+          expected=np.array([[1, 3], [7, 9]], dtype=np.float32))
+
+  def testAlignCorners3x3To2x2Grad(self):
+    for dtype in self.float_types:
+      self._assertBackwardOpMatchesExpected(
+          np.array([[7, 13], [22, 4]], dtype=np.float32),
+          input_shape=[3, 3],
+          dtype=dtype,
+          expected=np.array(
+              [[7, 0, 13], [0, 0, 0], [22, 0, 4]], dtype=np.float32))
+
+  def testAlignCorners4x4To3x3(self):
+    for dtype in self.float_types:
+      self._assertForwardOpMatchesExpected(
+          np.array(
+              [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]],
+              dtype=dtype), [3, 3],
+          expected=np.array(
+              [[1, 2.5, 4], [7, 8.5, 10], [13, 14.5, 16]], dtype=np.float32))
+
+  def testAlignCorners4x4To3x3Grad(self):
+    for dtype in self.float_types:
+      self._assertBackwardOpMatchesExpected(
+          np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.float32),
+          input_shape=[4, 4],
+          dtype=dtype,
+          expected=np.array(
+              [[1, 1, 1, 3], [2, 1.25, 1.25, 3], [2, 1.25, 1.25, 3],
+               [7, 4, 4, 9]],
+              dtype=np.float32))
+
+  def testAlignCorners3x3To9x9(self):
+    for dtype in self.float_types:
+      self._assertForwardOpMatchesExpected(
+          np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=dtype), [9, 9],
+          expected=np.array(
+              [[1.0, 1.25, 1.50, 1.75, 2.00, 2.25, 2.50, 2.75, 3.00], [
+                  1.75, 2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 3.75
+              ], [2.50, 2.75, 3.00, 3.25, 3.50, 3.75, 4.00, 4.25, 4.50], [
+                  3.25, 3.50, 3.75, 4.00, 4.25, 4.50, 4.75, 5.00, 5.25
+              ], [4.00, 4.25, 4.50, 4.75, 5.00, 5.25, 5.50, 5.75, 6.00], [
+                  4.75, 5.00, 5.25, 5.50, 5.75, 6.00, 6.25, 6.50, 6.75
+              ], [5.50, 5.75, 6.00, 6.25, 6.50, 6.75, 7.00, 7.25, 7.50], [
+                  6.25, 6.50, 6.75, 7.00, 7.25, 7.50, 7.75, 8.00, 8.25
+              ], [7.00, 7.25, 7.50, 7.75, 8.00, 8.25, 8.50, 8.75, 9.00]],
+              dtype=np.float32))
+
+  def testAlignCorners3x3To9x9Grad(self):
+    for dtype in self.float_types:
+      self._assertBackwardOpMatchesExpected(
+          np.array(
+              [[1.00, 1.25, 1.50, 1.75, 2.00, 2.25, 2.50, 2.75, 3.00], [
+                  1.75, 2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 3.75
+              ], [2.50, 2.75, 3.00, 3.25, 3.50, 3.75, 4.00, 4.25, 4.50], [
+                  3.25, 3.50, 3.75, 4.00, 4.25, 4.50, 4.75, 5.00, 5.25
+              ], [4.00, 4.25, 4.50, 4.75, 5.00, 5.25, 5.50, 5.75, 6.00], [
+                  4.75, 5.00, 5.25, 5.50, 5.75, 6.00, 6.25, 6.50, 6.75
+              ], [5.50, 5.75, 6.00, 6.25, 6.50, 6.75, 7.00, 7.25, 7.50], [
+                  6.25, 6.50, 6.75, 7.00, 7.25, 7.50, 7.75, 8.00, 8.25
+              ], [7.00, 7.25, 7.50, 7.75, 8.00, 8.25, 8.50, 8.75, 9.00]],
+              dtype=np.float32),
+          input_shape=[3, 3],
+          dtype=dtype,
+          expected=np.array(
+              [[12.5, 27.5, 21.875], [42.5, 80.0, 57.5], [40.625, 72.5, 50]],
+              dtype=np.float32))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/compiler/tests/momentum_test.py b/tensorflow/compiler/tests/momentum_test.py
index c00e3035a0982b2b2e59eb6f53499918515ae71d..af9394e7d7dc9cf7dd009420ff9c845aec8785bd 100644
--- a/tensorflow/compiler/tests/momentum_test.py
+++ b/tensorflow/compiler/tests/momentum_test.py
@@ -96,28 +96,27 @@ class MomentumOptimizerTest(XLATestCase):
   def testNesterovMomentum(self):
     for dtype in self.float_types:
       with self.test_session(), self.test_scope():
-        var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype)
-        var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype)
-        var0_np = np.array([1.0, 2.0], dtype=dtype)
-        var1_np = np.array([3.0, 4.0], dtype=dtype)
+        var0 = resource_variable_ops.ResourceVariable([0.1, 0.2], dtype=dtype)
+        var1 = resource_variable_ops.ResourceVariable([0.3, 0.4], dtype=dtype)
+        var0_np = np.array([0.1, 0.2], dtype=dtype)
+        var1_np = np.array([0.3, 0.4], dtype=dtype)
         accum0_np = np.array([0.0, 0.0], dtype=dtype)
         accum1_np = np.array([0.0, 0.0], dtype=dtype)
-        cost = 5 * var0 * var0 + 3 * var1
+        cost = 0.4 * var0 * var0 + 0.9 * var1
         global_step = resource_variable_ops.ResourceVariable(
             array_ops.zeros([], dtypes.int32), name="global_step")
         mom_op = momentum_lib.MomentumOptimizer(
-            learning_rate=2.0, momentum=0.9, use_nesterov=True)
+            learning_rate=0.1, momentum=0.9, use_nesterov=True)
         opt_op = mom_op.minimize(cost, global_step, [var0, var1])
         variables.global_variables_initializer().run()
         for _ in range(1, 5):
           opt_op.run()
           var0_np, accum0_np = self._update_nesterov_momentum_numpy(
-              var0_np, accum0_np, var0_np * 10, 2.0, 0.9)
-          var1_np, accum1_np = self._update_nesterov_momentum_numpy(var1_np,
-                                                                    accum1_np,
-                                                                    3, 2.0, 0.9)
-          self.assertAllClose(var0_np, var0.eval())
-          self.assertAllClose(var1_np, var1.eval())
+              var0_np, accum0_np, var0_np * 0.8, 0.1, 0.9)
+          var1_np, accum1_np = self._update_nesterov_momentum_numpy(
+              var1_np, accum1_np, 0.9, 0.1, 0.9)
+          self.assertAllCloseAccordingToType(var0_np, var0.eval())
+          self.assertAllCloseAccordingToType(var1_np, var1.eval())
 
   def testTensorLearningRateAndMomentum(self):
     for dtype in self.float_types:
diff --git a/tensorflow/compiler/tests/randomized_tests.cc b/tensorflow/compiler/tests/randomized_tests.cc
index 6a8c3bcd55a6e454a19b6249cf4eb48739c8657f..e72dd4eea9f127e1df96ab166103c4c16372adb6 100644
--- a/tensorflow/compiler/tests/randomized_tests.cc
+++ b/tensorflow/compiler/tests/randomized_tests.cc
@@ -93,11 +93,11 @@ class OpTestBuilder {
  public:
   explicit OpTestBuilder(const string& op_name);
 
-  // Adds an input 'tensor'.
+  // Adds an input 'tensor' as a Placeholder node.
   OpTestBuilder& Input(const Tensor& tensor);
 
-  // Adds a random input tensor with 'type'. If 'dims' is not provided,
-  // RandomDims() is used.
+  // Adds a random input tensor with 'type' as a Placeholder node.
+  // If 'dims' is not provided, RandomDims() is used.
   OpTestBuilder& RandomInput(DataType type);
   OpTestBuilder& RandomInput(DataType type, std::vector<int64> dims);
 
@@ -998,6 +998,13 @@ TEST_F(OpTest, Atanh) {
   });
 }
 
+TEST_F(OpTest, Atan) {
+  Repeatedly([this]() {
+    return ExpectTfAndXlaOutputsAreClose(
+        OpTestBuilder("Atan").RandomInput(DT_FLOAT).Attr("T", DT_FLOAT));
+  });
+}
+
 TEST_F(OpTest, Atan2) {
   Repeatedly([this]() {
     auto dims = BroadcastableDims();
@@ -1368,6 +1375,121 @@ TEST_F(OpTest, Conj) {
   });
 }
 
+TEST_F(OpTest, FFT) {
+  Repeatedly([this]() {
+    std::vector<int64> dims = RandomDims(1, kDefaultMaxRank);
+    return ExpectTfAndXlaOutputsAreClose(
+        OpTestBuilder("FFT").RandomInput(DT_COMPLEX64, dims));
+  });
+}
+
+TEST_F(OpTest, FFT2D) {
+  Repeatedly([this]() {
+    std::vector<int64> dims = RandomDims(2, kDefaultMaxRank);
+    return ExpectTfAndXlaOutputsAreClose(
+        OpTestBuilder("FFT2D").RandomInput(DT_COMPLEX64, dims));
+  });
+}
+
+TEST_F(OpTest, FFT3D) {
+  Repeatedly([this]() {
+    std::vector<int64> dims = RandomDims(3, kDefaultMaxRank);
+    return ExpectTfAndXlaOutputsAreClose(
+        OpTestBuilder("FFT3D").RandomInput(DT_COMPLEX64, dims));
+  });
+}
+
+TEST_F(OpTest, IFFT) {
+  Repeatedly([this]() {
+    std::vector<int64> dims = RandomDims(1, kDefaultMaxRank);
+    return ExpectTfAndXlaOutputsAreClose(
+        OpTestBuilder("IFFT").RandomInput(DT_COMPLEX64, dims));
+  });
+}
+
+TEST_F(OpTest, IFFT2D) {
+  Repeatedly([this]() {
+    std::vector<int64> dims = RandomDims(2, kDefaultMaxRank);
+    return ExpectTfAndXlaOutputsAreClose(
+        OpTestBuilder("IFFT2D").RandomInput(DT_COMPLEX64, dims));
+  });
+}
+
+TEST_F(OpTest, IFFT3D) {
+  Repeatedly([this]() {
+    std::vector<int64> dims = RandomDims(3, kDefaultMaxRank);
+    return ExpectTfAndXlaOutputsAreClose(
+        OpTestBuilder("IFFT3D").RandomInput(DT_COMPLEX64, dims));
+  });
+}
+
+TEST_F(OpTest, RFFT) {
+  Repeatedly([this]() {
+    std::vector<int64> dims = RandomDims(1, kDefaultMaxRank, 3);
+    Tensor fft_shape = test::AsTensor<int32>(AsInt32s({dims[dims.size() - 1]}));
+    return ExpectTfAndXlaOutputsAreClose(
+        OpTestBuilder("RFFT").RandomInput(DT_FLOAT, dims).Input(fft_shape));
+  });
+}
+
+TEST_F(OpTest, RFFT2D) {
+  Repeatedly([this]() {
+    std::vector<int64> dims = RandomDims(2, kDefaultMaxRank, 3);
+    Tensor fft_shape = test::AsTensor<int32>(
+        AsInt32s({dims[dims.size() - 2], dims[dims.size() - 1]}));
+    return ExpectTfAndXlaOutputsAreClose(
+        OpTestBuilder("RFFT2D").RandomInput(DT_FLOAT, dims).Input(fft_shape));
+  });
+}
+
+TEST_F(OpTest, RFFT3D) {
+  Repeatedly([this]() {
+    std::vector<int64> dims = RandomDims(3, kDefaultMaxRank, 3);
+    Tensor fft_shape = test::AsTensor<int32>(AsInt32s(
+        {dims[dims.size() - 3], dims[dims.size() - 2], dims[dims.size() - 1]}));
+    return ExpectTfAndXlaOutputsAreClose(
+        OpTestBuilder("RFFT3D").RandomInput(DT_FLOAT, dims).Input(fft_shape));
+  });
+}
+
+TEST_F(OpTest, IRFFT) {
+  Repeatedly([this]() {
+    std::vector<int64> dims = RandomDims(1, kDefaultMaxRank, 3);
+    int64 orig_size = dims[dims.size() - 1];
+    dims[dims.size() - 1] = dims[dims.size() - 1] / 2 + 1;
+    Tensor fft_shape = test::AsTensor<int32>(AsInt32s({orig_size}));
+    return ExpectTfAndXlaOutputsAreClose(OpTestBuilder("IRFFT")
+                                             .RandomInput(DT_COMPLEX64, dims)
+                                             .Input(fft_shape));
+  });
+}
+
+TEST_F(OpTest, IRFFT2D) {
+  Repeatedly([this]() {
+    std::vector<int64> dims = RandomDims(2, kDefaultMaxRank, 3);
+    std::vector<int64> orig_size = {dims[dims.size() - 2],
+                                    dims[dims.size() - 1]};
+    dims[dims.size() - 1] = dims[dims.size() - 1] / 2 + 1;
+    Tensor fft_shape = test::AsTensor<int32>(AsInt32s({orig_size}));
+    return ExpectTfAndXlaOutputsAreClose(OpTestBuilder("IRFFT2D")
+                                             .RandomInput(DT_COMPLEX64, dims)
+                                             .Input(fft_shape));
+  });
+}
+
+TEST_F(OpTest, IRFFT3D) {
+  Repeatedly([this]() {
+    std::vector<int64> dims = RandomDims(3, kDefaultMaxRank, 3);
+    std::vector<int64> orig_size = {
+        dims[dims.size() - 3], dims[dims.size() - 2], dims[dims.size() - 1]};
+    dims[dims.size() - 1] = dims[dims.size() - 1] / 2 + 1;
+    Tensor fft_shape = test::AsTensor<int32>(AsInt32s({orig_size}));
+    return ExpectTfAndXlaOutputsAreClose(OpTestBuilder("IRFFT3D")
+                                             .RandomInput(DT_COMPLEX64, dims)
+                                             .Input(fft_shape));
+  });
+}
+
 TEST_F(OpTest, Conv2D) {
   Repeatedly([this]() {
     WindowedSpatialDims d = ChooseWindowedSpatialDims(2);
@@ -1382,7 +1504,7 @@ TEST_F(OpTest, Conv2D) {
 
     std::vector<int64> kernel_dims = {d.kernel_dims[0], d.kernel_dims[1],
                                       features_in, features_out};
-    DataType type = DT_FLOAT;  // TODO(b/65408531): COMPLEX_64 support
+    DataType type = DT_FLOAT;
     return ExpectTfAndXlaOutputsAreClose(
         OpTestBuilder("Conv2D")
             .RandomInput(type, data_dims)
@@ -1407,7 +1529,7 @@ TEST_F(OpTest, Conv2DBackpropFilter) {
         ImageDims(FORMAT_NHWC, batch, features_out, d.output_dims);
     Tensor kernel_shape = test::AsTensor<int32>(AsInt32s(
         {d.kernel_dims[0], d.kernel_dims[1], features_in, features_out}));
-    DataType type = DT_FLOAT;  // TODO(b/65408531): COMPLEX_64 support
+    DataType type = DT_FLOAT;
     return ExpectTfAndXlaOutputsAreClose(
         OpTestBuilder("Conv2DBackpropFilter")
             .RandomInput(type, activations)
@@ -1433,7 +1555,7 @@ TEST_F(OpTest, Conv2DBackpropInput) {
         ImageDims(FORMAT_NHWC, batch, features_out, d.output_dims);
     std::vector<int64> kernel = {d.kernel_dims[0], d.kernel_dims[1],
                                  features_in, features_out};
-    DataType type = DT_FLOAT;  // TODO(b/65408531): COMPLEX_64 support
+    DataType type = DT_FLOAT;
     return ExpectTfAndXlaOutputsAreClose(
         OpTestBuilder("Conv2DBackpropInput")
             .Input(in_shape)
@@ -1457,7 +1579,7 @@ TEST_F(OpTest, Conv3D) {
 
     std::vector<int64> kernel = {d.kernel_dims[0], d.kernel_dims[1],
                                  d.kernel_dims[2], features_in, features_out};
-    DataType type = DT_FLOAT;  // TODO(b/65408531): COMPLEX_64 support
+    DataType type = DT_FLOAT;
     return ExpectTfAndXlaOutputsAreClose(
         OpTestBuilder("Conv3D")
             .RandomInput(type, data)
@@ -1482,7 +1604,7 @@ TEST_F(OpTest, Conv3DBackpropFilter) {
     Tensor kernel_shape = test::AsTensor<int32>(
         AsInt32s({d.kernel_dims[0], d.kernel_dims[1], d.kernel_dims[2],
                   features_in, features_out}));
-    DataType type = DT_FLOAT;  // TODO(b/65408531): COMPLEX_64 support
+    DataType type = DT_FLOAT;
     return ExpectTfAndXlaOutputsAreClose(
         OpTestBuilder("Conv3DBackpropFilterV2")
             .RandomInput(type, activations)
@@ -2460,6 +2582,36 @@ TEST_F(OpTest, Reshape) {
   });
 }
 
+TEST_F(OpTest, ResizeBilinear) {
+  Repeatedly([this]() {
+    std::vector<int64> in_dims = RandomDims(4, 4);
+    std::vector<int64> out_dims = RandomDims(2, 2);
+
+    return ExpectTfAndXlaOutputsAreClose(
+        OpTestBuilder("ResizeBilinear")
+            .RandomInput(DT_FLOAT, in_dims)
+            .Input(test::AsTensor<int32>(
+                std::vector<int32>(out_dims.begin(), out_dims.end())))
+            .Attr("T", DT_FLOAT)
+            .Attr("align_corners", true));
+  });
+}
+
+TEST_F(OpTest, ResizeBilinearGrad) {
+  Repeatedly([this]() {
+    std::vector<int64> in_dims = RandomDims(4, 4);
+    std::vector<int64> out_dims = RandomDims(2, 2);
+
+    return ExpectTfAndXlaOutputsAreClose(
+        OpTestBuilder("ResizeBilinearGrad")
+            .RandomInput(DT_FLOAT, in_dims)
+            .RandomInput(DT_FLOAT,
+                         {in_dims[0], out_dims[0], out_dims[1], in_dims[3]})
+            .Attr("T", DT_FLOAT)
+            .Attr("align_corners", true));
+  });
+}
+
 TEST_F(OpTest, Reverse) {
   Repeatedly([this]() {
     std::vector<int64> dims = RandomDims(1);
diff --git a/tensorflow/compiler/tests/scan_ops_test.py b/tensorflow/compiler/tests/scan_ops_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..3260e63b23226d736a7ddc0f21a94a8c791e0442
--- /dev/null
+++ b/tensorflow/compiler/tests/scan_ops_test.py
@@ -0,0 +1,229 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functional tests for scan ops."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.compiler.tests.xla_test import XLATestCase
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import errors_impl
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test
+
+
+def numpy_reverse(x, axis):
+  length = len(x.shape)
+  if axis < 0:
+    axis = length + axis
+
+  ix = [
+      slice(None, None, -1) if i == axis else slice(None) for i in range(length)
+  ]
+  return x[ix]
+
+
+def handle_options(func, x, axis, exclusive, reverse):
+  """Adds tf options to numpy scan ops."""
+  length = len(x.shape)
+  if axis < 0:
+    axis = length + axis
+
+  if reverse:
+    x = numpy_reverse(x, axis)
+
+  if exclusive:
+    ix_head = [slice(0, 1) if i == axis else slice(None) for i in range(length)]
+    ix_init = [
+        slice(0, -1) if i == axis else slice(None) for i in range(length)
+    ]
+    if func == np.cumsum:
+      init = np.zeros_like(x[ix_head])
+    elif func == np.cumprod:
+      init = np.ones_like(x[ix_head])
+    else:
+      raise ValueError("Unknown scan function.")
+    x = np.concatenate([init, func(x[ix_init], axis)], axis=axis)
+  else:
+    x = func(x, axis=axis)
+
+  if reverse:
+    x = numpy_reverse(x, axis)
+  return x
+
+
+class CumsumTest(XLATestCase):
+
+  valid_dtypes = [np.float32]
+
+  def axis_dtypes(self):
+    return set(self.int_types).intersection([np.int32, np.int64])
+
+  def _compare(self, x, axis, exclusive, reverse):
+    np_out = handle_options(np.cumsum, x, axis, exclusive, reverse)
+    with self.test_session(), self.test_scope():
+      p = array_ops.placeholder(x.dtype)
+      tf_out = math_ops.cumsum(p, axis, exclusive, reverse).eval(
+          feed_dict={p: x})
+
+    self.assertAllClose(np_out, tf_out)
+
+  def _compareAll(self, x, axis):
+    for exclusive in [True, False]:
+      for reverse in [True, False]:
+        self._compare(x, axis, exclusive, reverse)
+
+  def testEmpty(self):
+    for dtype in self.valid_dtypes:
+      x = np.zeros([0]).astype(dtype)
+      for axis in (-1, 0):
+        self._compareAll(x, axis)
+
+  def testAxisType(self):
+    for dtype in self.valid_dtypes:
+      x = np.arange(1, 6).reshape([5]).astype(dtype)
+      for axis_dtype in self.axis_dtypes():
+        with self.test_session(), self.test_scope():
+          p = array_ops.placeholder(x.dtype)
+          axis = constant_op.constant(0, axis_dtype)
+          math_ops.cumsum(p, axis).eval(feed_dict={p: x})
+
+  def test1D(self):
+    for dtype in self.valid_dtypes:
+      x = np.arange(1, 6).reshape([5]).astype(dtype)
+      for axis in (-1, 0):
+        self._compareAll(x, axis)
+
+  def test2D(self):
+    for dtype in self.valid_dtypes:
+      x = np.arange(0, 10).reshape([2, 5]).astype(dtype)
+      for axis in (-2, -1, 0, 1):
+        self._compareAll(x, axis)
+
+  def test3D(self):
+    for dtype in self.valid_dtypes:
+      x = np.arange(0, 20).reshape([2, 2, 5]).astype(dtype)
+      for axis in (-3, -2, -1, 0, 1, 2):
+        self._compareAll(x, axis)
+
+  def test6D(self):
+    for dtype in self.valid_dtypes:
+      x = np.arange(1, 145).reshape([2, 2, 3, 3, 2, 2]).astype(dtype)
+      for axis in range(-6, 6, 3):
+        self._compareAll(x, axis)
+
+  def testInvalidAxis(self):
+    x = np.arange(0, 10).reshape([2, 5]).astype(np.float32)
+    with self.test_session(), self.test_scope():
+      input_tensor = ops.convert_to_tensor(x)
+      with self.assertRaisesWithPredicateMatch(
+          errors_impl.InvalidArgumentError,
+          lambda e: "Expected scan axis in the range [-2, 2)" in str(e)):
+        math_ops.cumsum(input_tensor, -3).eval()
+      with self.assertRaisesWithPredicateMatch(
+          errors_impl.InvalidArgumentError,
+          lambda e: "Expected scan axis in the range [-2, 2)" in str(e)):
+        math_ops.cumsum(input_tensor, 2).eval()
+      with self.assertRaisesWithPredicateMatch(
+          errors_impl.InvalidArgumentError,
+          lambda e: "axis must be a scalar" in str(e)):
+        math_ops.cumsum(input_tensor, [0]).eval()
+
+
+class CumprodTest(XLATestCase):
+
+  valid_dtypes = [np.float32]
+
+  def axis_dtypes(self):
+    return set(self.int_types).intersection([np.int32, np.int64])
+
+  def _compare(self, x, axis, exclusive, reverse):
+    np_out = handle_options(np.cumprod, x, axis, exclusive, reverse)
+    with self.test_session(), self.test_scope():
+      p = array_ops.placeholder(x.dtype)
+      prod = math_ops.cumprod(p, axis, exclusive, reverse)
+      tf_out = prod.eval(feed_dict={p: x})
+
+    self.assertAllClose(np_out, tf_out)
+
+  def _compareAll(self, x, axis):
+    for exclusive in [True, False]:
+      for reverse in [True, False]:
+        self._compare(x, axis, exclusive, reverse)
+
+  def testEmpty(self):
+    for dtype in self.valid_dtypes:
+      x = np.zeros([0]).astype(dtype)
+      for axis in (-1, 0):
+        self._compareAll(x, axis)
+
+  def testAxisType(self):
+    for dtype in self.valid_dtypes:
+      x = np.arange(1, 6).reshape([5]).astype(dtype)
+      for axis_dtype in self.axis_dtypes():
+        with self.test_session(), self.test_scope():
+          p = array_ops.placeholder(x.dtype)
+          axis = constant_op.constant(0, axis_dtype)
+          math_ops.cumprod(x, axis).eval(feed_dict={p: x})
+
+  def test1D(self):
+    for dtype in self.valid_dtypes:
+      x = np.arange(1, 6).reshape([5]).astype(dtype)
+      for axis in (-1, 0):
+        self._compareAll(x, axis)
+
+  def test2D(self):
+    for dtype in self.valid_dtypes:
+      x = np.arange(1, 11).reshape([2, 5]).astype(dtype)
+      for axis in (-2, -1, 0, 1):
+        self._compareAll(x, axis)
+
+  def test3D(self):
+    for dtype in self.valid_dtypes:
+      x = np.arange(1, 21).reshape([2, 2, 5]).astype(dtype)
+      for axis in (-3, -2, -1, 0, 1, 2):
+        self._compareAll(x, axis)
+
+  def test6D(self):
+    for dtype in self.valid_dtypes:
+      x = np.arange(1, 145).reshape([2, 2, 3, 3, 2, 2]).astype(dtype)
+      for axis in range(-6, 6, 3):
+        self._compareAll(x, axis)
+
+  def testInvalidAxis(self):
+    x = np.arange(0, 10).reshape([2, 5]).astype(np.float32)
+    with self.test_session(), self.test_scope():
+      input_tensor = ops.convert_to_tensor(x)
+      with self.assertRaisesWithPredicateMatch(
+          errors_impl.InvalidArgumentError,
+          lambda e: "Expected scan axis in the range [-2, 2)" in str(e)):
+        math_ops.cumprod(input_tensor, -3).eval()
+      with self.assertRaisesWithPredicateMatch(
+          errors_impl.InvalidArgumentError,
+          lambda e: "Expected scan axis in the range [-2, 2)" in str(e)):
+        math_ops.cumprod(input_tensor, 2).eval()
+      with self.assertRaisesWithPredicateMatch(
+          errors_impl.InvalidArgumentError,
+          lambda e: "axis must be a scalar" in str(e)):
+        math_ops.cumprod(input_tensor, [0]).eval()
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/compiler/tests/tensor_array_ops_test.py b/tensorflow/compiler/tests/tensor_array_ops_test.py
index ac039e01623b954e291760fb9b50ef8eae3da7c1..a62925a1818da00cb0a9e82e1281db20fb38b208 100644
--- a/tensorflow/compiler/tests/tensor_array_ops_test.py
+++ b/tensorflow/compiler/tests/tensor_array_ops_test.py
@@ -330,8 +330,7 @@ class TensorArrayTest(xla_test.XLATestCase):
     # Find two different floating point types, create an array of
     # the first type, but try to read the other type.
     if len(self.float_types) > 1:
-      dtype1 = self.float_types[0]
-      dtype2 = self.float_types[1]
+      dtype1, dtype2 = list(self.float_types)[:2]
       with self.test_session(), self.test_scope():
         ta = tensor_array_ops.TensorArray(
             dtype=dtype1, tensor_array_name="foo", size=3)
diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py
index a9a3f4f97f649260e9863fff8ff05d046bd91947..0a6fe04d3cdd29f1d40d33be1f4319090e7ba3d1 100644
--- a/tensorflow/compiler/tests/unary_ops_test.py
+++ b/tensorflow/compiler/tests/unary_ops_test.py
@@ -33,6 +33,17 @@ from tensorflow.python.ops import nn_ops
 from tensorflow.python.platform import googletest
 
 
+def nhwc_to_format(x, data_format):
+  """Converts a numpy array from NHWC format to `data_format`."""
+  rank = len(x.shape)
+  if data_format == "NCHW":
+    return np.transpose(x, [0, rank - 1] + list(range(1, rank - 1)))
+  elif data_format == "NHWC":
+    return x
+  else:
+    raise ValueError("Unknown format {}".format(data_format))
+
+
 class UnaryOpsTest(XLATestCase):
   """Test cases for unary operators."""
 
@@ -56,7 +67,7 @@ class UnaryOpsTest(XLATestCase):
         output = op(pinp)
       result = session.run(output, {pinp: inp})
       if equality_test is None:
-        equality_test = self.assertAllClose
+        equality_test = self.assertAllCloseAccordingToType
       equality_test(result, expected, rtol=rtol, atol=atol)
 
   def ListsAreClose(self, result, expected, rtol, atol):
@@ -76,6 +87,12 @@ class UnaryOpsTest(XLATestCase):
           array_ops.diag_part,
           np.arange(36).reshape([2, 3, 2, 3]).astype(dtype),
           np.array([[0, 7, 14], [21, 28, 35]], dtype=dtype))
+      self._assertOpOutputMatchesExpected(
+          array_ops.diag, np.array([[1, 2], [3, 4]], dtype=dtype),
+          np.array(
+              [[[[1, 0], [0, 0]], [[0, 2], [0, 0]]], [[[0, 0], [3, 0]],
+                                                      [[0, 0], [0, 4]]]],
+              dtype=dtype))
 
       self._assertOpOutputMatchesExpected(
           array_ops.identity,
@@ -86,6 +103,21 @@ class UnaryOpsTest(XLATestCase):
           array_ops.matrix_diag,
           np.array([[1, 2], [3, 4]], dtype=dtype),
           np.array([[[1, 0], [0, 2]], [[3, 0], [0, 4]]], dtype=dtype))
+      self._assertOpOutputMatchesExpected(
+          array_ops.matrix_diag, np.array([1, 2, 3, 4], dtype=dtype),
+          np.array(
+              [[1, 0, 0, 0], [0, 2, 0, 0], [0, 0, 3, 0], [0, 0, 0, 4]],
+              dtype=dtype))
+      self._assertOpOutputMatchesExpected(
+          array_ops.matrix_diag,
+          np.array(
+              [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], dtype=dtype),
+          np.array(
+              [[[[1, 0, 0], [0, 2, 0], [0, 0, 3]],
+                [[4, 0, 0], [0, 5, 0], [0, 0, 6]]],
+               [[[7, 0, 0], [0, 8, 0], [0, 0, 9]],
+                [[10, 0, 0], [0, 11, 0], [0, 0, 12]]]],
+              dtype=dtype))
       self._assertOpOutputMatchesExpected(
           array_ops.matrix_diag_part,
           np.arange(3 * 2 * 4).reshape([3, 2, 4]).astype(dtype),
@@ -331,26 +363,23 @@ class UnaryOpsTest(XLATestCase):
   def testComplexOps(self):
     for dtype in self.complex_types:
 
-      # TODO(b/65408531): Wider support for log (needs atan2).
-      atan2_supported = self.device == "XLA_GPU"
-      if atan2_supported:
-        self._assertOpOutputMatchesExpected(
-            math_ops.acosh,
-            np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype),
-            expected=np.arccosh(
-                np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype)))
+      self._assertOpOutputMatchesExpected(
+          math_ops.acosh,
+          np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype),
+          expected=np.arccosh(
+              np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype)))
 
-        self._assertOpOutputMatchesExpected(
-            math_ops.asinh,
-            np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype),
-            expected=np.arcsinh(
-                np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype)))
+      self._assertOpOutputMatchesExpected(
+          math_ops.asinh,
+          np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype),
+          expected=np.arcsinh(
+              np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype)))
 
-        self._assertOpOutputMatchesExpected(
-            math_ops.atanh,
-            np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype),
-            expected=np.arctanh(
-                np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype)))
+      self._assertOpOutputMatchesExpected(
+          math_ops.atanh,
+          np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype),
+          expected=np.arctanh(
+              np.array([0.1, 0.2j, 0.3 - 0.1j, 0.4 + 0.5j], dtype=dtype)))
 
       self._assertOpOutputMatchesExpected(
           math_ops.cosh,
@@ -377,11 +406,10 @@ class UnaryOpsTest(XLATestCase):
           np.array([[1, 2j, 2 + 3j]], dtype=dtype),
           expected=1.0 / np.array([[1, 2j, 2 + 3j]], dtype=dtype))
 
-      if atan2_supported:
-        self._assertOpOutputMatchesExpected(
-            math_ops.log,
-            np.array([[5j, 3 - 2j]], dtype=dtype),
-            expected=np.log(np.array([[5j, 3 - 2j]], dtype=dtype)))
+      self._assertOpOutputMatchesExpected(
+          math_ops.log,
+          np.array([[5j, 3 - 2j]], dtype=dtype),
+          expected=np.log(np.array([[5j, 3 - 2j]], dtype=dtype)))
 
       self._assertOpOutputMatchesExpected(
           math_ops.sin,
@@ -395,27 +423,26 @@ class UnaryOpsTest(XLATestCase):
 
       # TODO(b/34703906): improve log1p implementation and make tolerance
       # tighter.
-      if atan2_supported:  # TODO(b/34703906): log support
-        self._assertOpOutputMatchesExpected(
-            math_ops.log1p,
-            np.array([[1e-14, 1e-15j, 0.6 - 0.3j]], dtype=dtype),
-            expected=np.log1p(
-                np.array([[1e-14, 1e-15j, 0.6 - 0.3j]], dtype=dtype)))
+      self._assertOpOutputMatchesExpected(
+          math_ops.log1p,
+          np.array([[1e-14, 1e-15j, 0.6 - 0.3j]], dtype=dtype),
+          expected=np.log1p(
+              np.array([[1e-14, 1e-15j, 0.6 - 0.3j]], dtype=dtype)))
 
-        val = np.array([1, 2j, 2 - 3j, 4 + 5j], dtype=dtype)
-        self._assertOpOutputMatchesExpected(
-            math_ops.rsqrt, val, expected=1 / np.sqrt(val))
+      val = np.array([1, 2j, 2 - 3j, 4 + 5j], dtype=dtype)
+      self._assertOpOutputMatchesExpected(
+          math_ops.rsqrt, val, expected=1 / np.sqrt(val))
 
-        self._assertOpOutputMatchesExpected(
-            math_ops.sigmoid, val, expected=1 / (1 + np.exp(-val)))
+      self._assertOpOutputMatchesExpected(
+          math_ops.sigmoid, val, expected=1 / (1 + np.exp(-val)))
 
-        self._assertOpOutputMatchesExpected(
-            math_ops.sqrt, val, expected=np.sqrt(val))
+      self._assertOpOutputMatchesExpected(
+          math_ops.sqrt, val, expected=np.sqrt(val))
 
-        self._assertOpOutputMatchesExpected(
-            math_ops.tanh,
-            np.array([1, 2j, 2 - 3j, 4 + 5j], dtype=dtype),
-            expected=np.tanh(np.array([1, 2j, 2 - 3j, 4 + 5j], dtype=dtype)))
+      self._assertOpOutputMatchesExpected(
+          math_ops.tanh,
+          np.array([1, 2j, 2 - 3j, 4 + 5j], dtype=dtype),
+          expected=np.tanh(np.array([1, 2j, 2 - 3j, 4 + 5j], dtype=dtype)))
 
       self._assertOpOutputMatchesExpected(
           math_ops.tan,
@@ -448,12 +475,10 @@ class UnaryOpsTest(XLATestCase):
           np.array([[-4j, 3 + 2j], [2, -1j]], dtype=dtype),
           expected=np.array([[1, 1], [1, 1]], dtype=dtype))
 
-      if atan2_supported:  # TODO(b/34703906): atan2 support
-        self._assertOpOutputMatchesExpected(
-            math_ops.angle,
-            np.array([1 + 3j, -4 + 7j, 2.7, -3j], dtype=dtype),
-            expected=np.angle(
-                np.array([1 + 3j, -4 + 7j, 2.7, -3j], dtype=dtype)))
+      self._assertOpOutputMatchesExpected(
+          math_ops.angle,
+          np.array([1 + 3j, -4 + 7j, 2.7, -3j], dtype=dtype),
+          expected=np.angle(np.array([1 + 3j, -4 + 7j, 2.7, -3j], dtype=dtype)))
 
       self._assertOpOutputMatchesExpected(
           math_ops.conj,
@@ -541,7 +566,8 @@ class UnaryOpsTest(XLATestCase):
 
   def testCast(self):
     shapes = [[], [4], [2, 3], [2, 0, 4]]
-    types = [dtypes.bool, dtypes.int32, dtypes.float32] + self.complex_tf_types
+    types = (set([dtypes.bool, dtypes.int32, dtypes.float32]) |
+             self.complex_tf_types)
     for shape in shapes:
       for src_type in types:
         for dst_type in types:
@@ -641,55 +667,88 @@ class UnaryOpsTest(XLATestCase):
         equality_test=self.ListsAreClose)
 
   def testDepthToSpace(self):
+    def make_op(data_format):
+      def op(x):
+        return array_ops.depth_to_space(x, block_size=2,
+                                        data_format=data_format)
+      return op
+
     for dtype in self.numeric_types:
-      self._assertOpOutputMatchesExpected(
-          lambda x: array_ops.depth_to_space(x, block_size=2),
-          np.array([[[[1, 2, 3, 4]]]], dtype=dtype),
-          expected=np.array([[[[1], [2]],
-                              [[3], [4]]]], dtype=dtype))
+      for data_format in ["NCHW", "NHWC"]:
+        self._assertOpOutputMatchesExpected(
+            make_op(data_format),
+            nhwc_to_format(np.array([[[[1, 2, 3, 4]]]], dtype=dtype),
+                           data_format),
+            expected=nhwc_to_format(np.array([[[[1], [2]],
+                                               [[3], [4]]]], dtype=dtype),
+                                    data_format))
 
-      self._assertOpOutputMatchesExpected(
-          lambda x: array_ops.depth_to_space(x, block_size=2),
-          np.array([[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]], dtype=dtype),
-          expected=np.array([[[[1, 2, 3], [4, 5, 6]],
-                              [[7, 8, 9], [10, 11, 12]]]], dtype=dtype))
+        self._assertOpOutputMatchesExpected(
+            make_op(data_format),
+            nhwc_to_format(
+                np.array([[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]],
+                         dtype=dtype),
+                data_format),
+            expected=nhwc_to_format(
+                np.array([[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]],
+                         dtype=dtype),
+                data_format))
 
-      self._assertOpOutputMatchesExpected(
-          lambda x: array_ops.depth_to_space(x, block_size=2),
-          np.array([[[[1, 2, 3, 4],
-                      [5, 6, 7, 8]],
-                     [[9, 10, 11, 12],
-                      [13, 14, 15, 16]]]], dtype=dtype),
-          expected=np.array([[[[1], [2], [5], [6]],
-                              [[3], [4], [7], [8]],
-                              [[9], [10], [13], [14]],
-                              [[11], [12], [15], [16]]]], dtype=dtype))
+        self._assertOpOutputMatchesExpected(
+            make_op(data_format),
+            nhwc_to_format(
+                np.array([[[[1, 2, 3, 4],
+                            [5, 6, 7, 8]],
+                           [[9, 10, 11, 12],
+                            [13, 14, 15, 16]]]], dtype=dtype),
+                data_format),
+            expected=nhwc_to_format(
+                np.array([[[[1], [2], [5], [6]],
+                           [[3], [4], [7], [8]],
+                           [[9], [10], [13], [14]],
+                           [[11], [12], [15], [16]]]], dtype=dtype),
+                data_format))
 
   def testSpaceToDepth(self):
+    def make_op(data_format):
+      def op(x):
+        return array_ops.space_to_depth(x, block_size=2,
+                                        data_format=data_format)
+      return op
+
     for dtype in self.numeric_types:
-      self._assertOpOutputMatchesExpected(
-          lambda x: array_ops.space_to_depth(x, block_size=2),
-          np.array([[[[1], [2]],
-                     [[3], [4]]]], dtype=dtype),
-          expected=np.array([[[[1, 2, 3, 4]]]], dtype=dtype))
+      for data_format in ["NCHW", "NHWC"]:
+        self._assertOpOutputMatchesExpected(
+            make_op(data_format),
+            nhwc_to_format(np.array([[[[1], [2]],
+                                      [[3], [4]]]], dtype=dtype),
+                           data_format),
+            expected=nhwc_to_format(np.array([[[[1, 2, 3, 4]]]], dtype=dtype),
+                                    data_format))
 
-      self._assertOpOutputMatchesExpected(
-          lambda x: array_ops.space_to_depth(x, block_size=2),
-          np.array([[[[1, 2, 3], [4, 5, 6]],
-                     [[7, 8, 9], [10, 11, 12]]]], dtype=dtype),
-          expected=np.array([[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]],
-                            dtype=dtype))
+        self._assertOpOutputMatchesExpected(
+            make_op(data_format),
+            nhwc_to_format(np.array([[[[1, 2, 3], [4, 5, 6]],
+                                      [[7, 8, 9], [10, 11, 12]]]], dtype=dtype),
+                           data_format),
+            expected=nhwc_to_format(
+                np.array([[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]],
+                         dtype=dtype),
+                data_format))
 
-      self._assertOpOutputMatchesExpected(
-          lambda x: array_ops.space_to_depth(x, block_size=2),
-          np.array([[[[1], [2], [5], [6]],
-                     [[3], [4], [7], [8]],
-                     [[9], [10], [13], [14]],
-                     [[11], [12], [15], [16]]]], dtype=dtype),
-          expected=np.array([[[[1, 2, 3, 4],
-                               [5, 6, 7, 8]],
-                              [[9, 10, 11, 12],
-                               [13, 14, 15, 16]]]], dtype=dtype))
+        self._assertOpOutputMatchesExpected(
+            make_op(data_format),
+            nhwc_to_format(np.array([[[[1], [2], [5], [6]],
+                                      [[3], [4], [7], [8]],
+                                      [[9], [10], [13], [14]],
+                                      [[11], [12], [15], [16]]]], dtype=dtype),
+                           data_format),
+            expected=nhwc_to_format(
+                np.array([[[[1, 2, 3, 4],
+                            [5, 6, 7, 8]],
+                           [[9, 10, 11, 12],
+                            [13, 14, 15, 16]]]], dtype=dtype),
+                data_format))
 
   def _assertSoftplusMatchesExpected(self, features, dtype):
     features = np.array(features, dtype=dtype)
diff --git a/tensorflow/compiler/tests/variable_ops_test.py b/tensorflow/compiler/tests/variable_ops_test.py
index c50342dee45eba6ae54f01653ecc81ef096b547b..b08d6ab21e0746558cb3d4818d4c822c45d2e9ee 100644
--- a/tensorflow/compiler/tests/variable_ops_test.py
+++ b/tensorflow/compiler/tests/variable_ops_test.py
@@ -107,11 +107,26 @@ class VariableOpsTest(XLATestCase):
                  [[[30, 31, 32], [33, 34, 35]], [[0, 1, 2], [3, 4, 5]]]],
             ).astype(dtype), sess.run(x))
 
+  def testShape(self):
+    for dtype in self.numeric_types:
+      init = np.ones([2, 3]).astype(dtype)
+      with self.test_session() as session, self.test_scope():
+        v = resource_variable_ops.ResourceVariable(init)
+        session.run(variables.variables_initializer([v]))
+        h = v.handle
+        s32, s64 = session.run([
+            resource_variable_ops.variable_shape(h),
+            resource_variable_ops.variable_shape(h, out_type=dtypes.int64)
+        ])
+        self.assertEqual(s32.dtype, np.int32)
+        self.assertEqual(s64.dtype, np.int64)
+        self.assertAllEqual(s32, [2, 3])
+        self.assertAllEqual(s64, [2, 3])
+
   def testReadWrite(self):
     """Tests initialization, reading, and writing a resource variable."""
     for dtype in self.numeric_types:
       with self.test_session() as session:
-        print(ops.get_default_graph())
         with self.test_scope():
           with variable_scope.variable_scope("ascope", use_resource=True):
             x = variable_scope.get_variable(
diff --git a/tensorflow/compiler/tests/xla_test.py b/tensorflow/compiler/tests/xla_test.py
index 0be127997e5211f810ca791187486760881fe172..7e1f5c76ed65946363cc3c113ab1a9862f87b289 100644
--- a/tensorflow/compiler/tests/xla_test.py
+++ b/tensorflow/compiler/tests/xla_test.py
@@ -53,41 +53,100 @@ class XLATestCase(test.TestCase):
     super(XLATestCase, self).__init__(method_name)
     self.device = FLAGS.test_device
     self.has_custom_call = (self.device == 'XLA_CPU')
-    self.all_tf_types = [
+    self._all_tf_types = set([
         dtypes.as_dtype(types_pb2.DataType.Value(name))
         for name in FLAGS.types.split(',')
-    ]
-    self.int_tf_types = [
-        dtype for dtype in self.all_tf_types if dtype.is_integer
-    ]
-    self.float_tf_types = [
-        dtype for dtype in self.all_tf_types if dtype.is_floating
-    ]
-    self.complex_tf_types = [
-        dtype for dtype in self.all_tf_types if dtype.is_complex
-    ]
-    self.numeric_tf_types = (
-        self.int_tf_types + self.float_tf_types + self.complex_tf_types)
-
-    self.all_types = [dtype.as_numpy_dtype for dtype in self.all_tf_types]
-    self.int_types = [dtype.as_numpy_dtype for dtype in self.int_tf_types]
-    self.float_types = [dtype.as_numpy_dtype for dtype in self.float_tf_types]
-    self.complex_types = [
+    ])
+    self.int_tf_types = set([
+        dtype for dtype in self._all_tf_types if dtype.is_integer
+    ])
+    self._float_tf_types = set([
+        dtype for dtype in self._all_tf_types if dtype.is_floating
+    ])
+    self.complex_tf_types = set([
+        dtype for dtype in self._all_tf_types if dtype.is_complex
+    ])
+    self._numeric_tf_types = set(
+        self.int_tf_types | self._float_tf_types | self.complex_tf_types)
+
+    self._all_types = set(
+        [dtype.as_numpy_dtype for dtype in self._all_tf_types])
+    self.int_types = set([dtype.as_numpy_dtype for dtype in self.int_tf_types])
+    self._float_types = set(
+        [dtype.as_numpy_dtype for dtype in self._float_tf_types])
+    self.complex_types = set([
         dtype.as_numpy_dtype for dtype in self.complex_tf_types
-    ]
-    self.numeric_types = self.int_types + self.float_types + self.complex_types
+    ])
+    self._numeric_types = set(
+        self.int_types | self._float_types | self.complex_types)
 
     # Parse the manifest file, if any, into a regex identifying tests to
     # disable
     self.disabled_regex = None
+    self._method_types_filter = dict()
+    # TODO(xpan): Make it text proto if it doesn't scale.
+    # Each line of the manifest file specifies an entry. The entry can be
+    # 1) TestNameRegex  // E.g. CumprodTest.* Or
+    # 2) TestName TypeName  // E.g. AdamOptimizerTest.testSharing DT_BFLOAT16
+    # The 1) disables the entire test. While 2) only filter some numeric types
+    # so that they are not used in those tests.
+
     if FLAGS.disabled_manifest is not None:
       comments_re = re.compile('#.*$')
       manifest_file = open(FLAGS.disabled_manifest, 'r')
-      lines = manifest_file.read().splitlines()
-      lines = [comments_re.sub('', l).strip() for l in lines]
-      self.disabled_regex = re.compile('|'.join(lines))
+      disabled_tests = []
+      disabled_method_types = []
+      for l in manifest_file.read().splitlines():
+        entry = comments_re.sub('', l).strip().split(' ')
+        if len(entry) == 1:
+          disabled_tests.append(entry[0])
+        elif len(entry) == 2:
+          disabled_method_types.append(
+              (entry[0], entry[1].strip().split(',')))
+        else:
+          raise ValueError('Bad entry in manifest file.')
+
+      self.disabled_regex = re.compile('|'.join(disabled_tests))
+      for method, types in disabled_method_types:
+        self._method_types_filter[method] = set([
+            dtypes.as_dtype(types_pb2.DataType.Value(name)).as_numpy_dtype
+            for name in types])
       manifest_file.close()
 
+  @property
+  def all_tf_types(self):
+    name = '{}.{}'.format(type(self).__name__, self._testMethodName)
+    tf_types = set([dtypes.as_dtype(t)
+                    for t in self._method_types_filter.get(name, set())])
+    return self._all_tf_types - tf_types
+
+  @property
+  def float_types(self):
+    name = '{}.{}'.format(type(self).__name__, self._testMethodName)
+    return self._float_types - self._method_types_filter.get(name, set())
+
+  @property
+  def float_tf_types(self):
+    name = '{}.{}'.format(type(self).__name__, self._testMethodName)
+    return self._float_tf_types - self._method_types_filter.get(name, set())
+
+  @property
+  def numeric_tf_types(self):
+    name = '{}.{}'.format(type(self).__name__, self._testMethodName)
+    tf_types = set([dtypes.as_dtype(t)
+                    for t in self._method_types_filter.get(name, set())])
+    return self._numeric_tf_types - tf_types
+
+  @property
+  def numeric_types(self):
+    name = '{}.{}'.format(type(self).__name__, self._testMethodName)
+    return self._numeric_types - self._method_types_filter.get(name, set())
+
+  @property
+  def all_types(self):
+    name = '{}.{}'.format(type(self).__name__, self._testMethodName)
+    return self._all_types - self._method_types_filter.get(name, set())
+
   def setUp(self):
     super(XLATestCase, self).setUp()
     name = '{}.{}'.format(type(self).__name__, self._testMethodName)
diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD
index 5a81438b1c48e7f0ef66dae072092974db24c621..3c7dfef03dfb5d86dd63fd4aa84ad56081833035 100644
--- a/tensorflow/compiler/tf2xla/BUILD
+++ b/tensorflow/compiler/tf2xla/BUILD
@@ -1,6 +1,6 @@
 licenses(["notice"])  # Apache 2.0
 
-load("//tensorflow:tensorflow.bzl", "tf_cc_test")
+load("//tensorflow:tensorflow.bzl", "tf_cc_binary", "tf_cc_test")
 
 package_group(
     name = "internal",
@@ -25,6 +25,30 @@ package(
 load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda_is_configured")
 load("//tensorflow/compiler/xla:xla.bzl", "xla_proto_library")
 
+cc_library(
+    name = "tf2xla_supported_ops_lib",
+    srcs = ["tf2xla_supported_ops.cc"],
+    hdrs = ["tf2xla_supported_ops.h"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":xla_compiler",
+        "//tensorflow/compiler/tf2xla/kernels:xla_cpu_only_ops",
+        "//tensorflow/compiler/tf2xla/kernels:xla_ops",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:ops",
+        "//tensorflow/core:protos_all_cc",
+    ],
+)
+
+tf_cc_binary(
+    name = "tf2xla_supported_ops",
+    srcs = ["tf2xla_supported_ops_main.cc"],
+    visibility = ["//visibility:public"],
+    deps = [":tf2xla_supported_ops_lib"],
+)
+
 xla_proto_library(
     name = "tf2xla_proto",
     srcs = ["tf2xla.proto"],
@@ -67,7 +91,6 @@ cc_library(
         # Keep dependencies to a minimum here; this library is used in every AOT
         # binary produced by tfcompile.
         "//tensorflow/compiler/aot:runtime",
-        "//tensorflow/compiler/tf2xla:xla_local_runtime_context",
         "//tensorflow/compiler/xla:executable_run_options",
         "//tensorflow/core:framework_lite",
     ],
@@ -97,18 +120,21 @@ cc_library(
 cc_library(
     name = "xla_compiler",
     srcs = [
+        "const_analysis.cc",
+        "graph_compiler.cc",
         "xla_compilation_device.cc",
         "xla_compiler.cc",
         "xla_context.cc",
         "xla_helpers.cc",
         "xla_op_kernel.cc",
         "xla_op_registry.cc",
-        "graph_compiler.cc",
+        "xla_resource.cc",
         "xla_cpu_backend.cc",
     ] + if_cuda_is_configured([
         "xla_gpu_backend.cc",
     ]),
     hdrs = [
+        "const_analysis.h",
         "graph_compiler.h",
         "xla_compilation_device.h",
         "xla_compiler.h",
@@ -116,11 +142,11 @@ cc_library(
         "xla_helpers.h",
         "xla_op_kernel.h",
         "xla_op_registry.h",
+        "xla_resource.h",
     ],
     visibility = [":friends"],
     deps = [
         ":common",
-        ":const_analysis",
         ":dump_graph",
         ":functionalize_control_flow",
         ":sharding_util",
@@ -180,6 +206,7 @@ cc_library(
     deps = [
         "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/compiler/xla/client:computation_builder",
+        "//tensorflow/compiler/xla/client:sharding_builder",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
@@ -215,6 +242,7 @@ cc_library(
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
         "//tensorflow/core:framework_internal",
+        "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
     ],
@@ -328,28 +356,16 @@ tf_cc_test(
     ],
 )
 
-cc_library(
-    name = "const_analysis",
-    srcs = ["const_analysis.cc"],
-    hdrs = ["const_analysis.h"],
-    deps = [
-        "//tensorflow/core:core_cpu",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
 tf_cc_test(
     name = "const_analysis_test",
     size = "small",
     srcs = ["const_analysis_test.cc"],
     deps = [
-        ":const_analysis",
+        ":xla_compiler",
         "//tensorflow/cc:cc_ops",
         "//tensorflow/cc:function_ops",
         "//tensorflow/cc:ops",
+        "//tensorflow/compiler/tf2xla/kernels:xla_ops",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:ops",
         "//tensorflow/core:test",
@@ -357,13 +373,6 @@ tf_cc_test(
     ],
 )
 
-cc_library(
-    name = "xla_local_runtime_context",
-    hdrs = ["xla_local_runtime_context.h"],
-    visibility = ["//visibility:public"],
-    deps = ["//tensorflow/core:framework_lite"],
-)
-
 cc_library(
     name = "dump_graph",
     srcs = [
@@ -400,6 +409,7 @@ cc_library(
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
+        "//tensorflow/core:graph",
         "//tensorflow/core:lib",
     ],
 )
diff --git a/tensorflow/compiler/tf2xla/const_analysis.cc b/tensorflow/compiler/tf2xla/const_analysis.cc
index d57273d84442c17565a6ace1c29170a0f3ba583b..0249500910c6ae441f038fe9ad6178794f1997ac 100644
--- a/tensorflow/compiler/tf2xla/const_analysis.cc
+++ b/tensorflow/compiler/tf2xla/const_analysis.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <unordered_map>
 #include <unordered_set>
 
+#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/graph/algorithm.h"
 
@@ -27,93 +28,18 @@ namespace tensorflow {
 // compile-time constants.
 Status BackwardsConstAnalysis(const Graph& g,
                               std::vector<bool>* compile_time_const_args) {
-  // TODO(phawkins): annotate these on the kernel registrations, rather than
-  // using a hard-coded list.
-  // (operator, argument) pairs that must be compile-time constants.
-  const std::unordered_multimap<string, string> compile_time_const_inputs = {
-      {"All", "reduction_indices"},
-      {"Any", "reduction_indices"},
-      {"ArgMin", "dimension"},
-      {"ArgMax", "dimension"},
-      {"AvgPoolGrad", "orig_input_shape"},
-      {"AvgPool3DGrad", "orig_input_shape"},
-      {"BatchToSpace", "crops"},
-      {"BatchToSpaceND", "block_shape"},
-      {"BatchToSpaceND", "crops"},
-      {"BroadcastArgs", "s0"},
-      {"BroadcastArgs", "s1"},
-      {"BroadcastGradientArgs", "s0"},
-      {"BroadcastGradientArgs", "s1"},
-      {"Concat", "concat_dim"},
-      {"ConcatV2", "axis"},
-      {"ConcatOffset", "concat_dim"},
-      {"ConcatOffset", "shape"},
-      {"Conv2DBackpropFilter", "filter_sizes"},
-      {"Conv2DBackpropInput", "input_sizes"},
-      {"Conv3DBackpropFilterV2", "filter_sizes"},
-      {"Conv3DBackpropInputV2", "input_sizes"},
-      {"DepthwiseConv2dNativeBackpropFilter", "filter_sizes"},
-      {"DepthwiseConv2dNativeBackpropInput", "input_sizes"},
-      {"DynamicStitch", "indices"},
-      {"ExpandDims", "dim"},
-      {"Fill", "dims"},
-      {"GatherV2", "axis"},
-      {"InvertPermutation", "x"},
-      {"LinSpace", "start"},
-      {"LinSpace", "stop"},
-      {"LinSpace", "num"},
-      {"Max", "reduction_indices"},
-      {"Mean", "reduction_indices"},
-      {"Min", "reduction_indices"},
-      {"OneHot", "depth"},
-      {"Pad", "paddings"},
-      {"PadV2", "paddings"},
-      {"MirrorPad", "paddings"},
-      {"Multinomial", "num_samples"},
-      {"Prod", "reduction_indices"},
-      {"RandomStandardNormal", "shape"},
-      {"RandomUniform", "shape"},
-      {"RandomUniformInt", "shape"},
-      {"Range", "start"},
-      {"Range", "limit"},
-      {"Range", "delta"},
-      {"Reshape", "shape"},
-      {"ResourceStridedSliceAssign", "begin"},
-      {"ResourceStridedSliceAssign", "end"},
-      {"ResourceStridedSliceAssign", "strides"},
-      {"Reverse", "dims"},
-      {"ReverseV2", "axis"},
-      {"Slice", "begin"},
-      {"Slice", "size"},
-      {"SpaceToBatch", "paddings"},
-      {"SpaceToBatchND", "block_shape"},
-      {"SpaceToBatchND", "paddings"},
-      {"Split", "split_dim"},
-      {"SplitV", "split_dim"},
-      {"SplitV", "size_splits"},
-      {"StackV2", "max_size"},
-      {"StridedSlice", "begin"},
-      {"StridedSlice", "end"},
-      {"StridedSlice", "strides"},
-      {"StridedSliceGrad", "shape"},
-      {"StridedSliceGrad", "begin"},
-      {"StridedSliceGrad", "end"},
-      {"StridedSliceGrad", "strides"},
-      {"Sum", "reduction_indices"},
-      {"TensorArrayV3", "size"},
-      {"TensorArraySplitV3", "lengths"},
-      {"Tile", "multiples"},
-      {"Transpose", "perm"}};
-
   // Operators that don't look at the data of their inputs, just the shapes.
   const std::unordered_set<string> metadata_ops = {
-      "Rank", "Shape", "ShapeN", "Size",
+      "Rank",
+      "Shape",
+      "ShapeN",
+      "Size",
   };
 
   Status status;
   std::unordered_set<Node*> must_be_const;
-  auto visit = [&status, &metadata_ops, &compile_time_const_inputs,
-                &must_be_const, compile_time_const_args](Node* node) {
+  auto visit = [&status, &metadata_ops, &must_be_const,
+                compile_time_const_args](Node* node) {
     if (!status.ok()) return;
 
     // If this is a metadata-only op, don't propagate the const requirement.
@@ -136,16 +62,17 @@ Status BackwardsConstAnalysis(const Graph& g,
     }
 
     // Mark any compile-time constant operator arguments as const.
-    auto range = compile_time_const_inputs.equal_range(node->type_string());
-    if (range.first == range.second) return;
+    const std::unordered_set<string>* const_inputs =
+        XlaOpRegistry::CompileTimeConstantInputs(node->type_string());
+    if (!const_inputs) return;
 
     NameRangeMap input_name_ranges;
     status =
         NameRangesForNode(*node, node->op_def(), &input_name_ranges, nullptr);
     if (!status.ok()) return;
 
-    for (auto it = range.first; it != range.second; ++it) {
-      auto name_range = input_name_ranges.find(it->second);
+    for (const string& input : *const_inputs) {
+      auto name_range = input_name_ranges.find(input);
       if (name_range == input_name_ranges.end()) continue;
 
       for (Edge const* edge : node->in_edges()) {
diff --git a/tensorflow/compiler/tf2xla/dump_graph.cc b/tensorflow/compiler/tf2xla/dump_graph.cc
index ddd912b87315f7943915153b5bf73531107af54d..03603ee9baefd1d20d220faf63c9c1c427ebdf31 100644
--- a/tensorflow/compiler/tf2xla/dump_graph.cc
+++ b/tensorflow/compiler/tf2xla/dump_graph.cc
@@ -63,7 +63,12 @@ string MakeUniquePath(string name) {
 
 string DumpGraphDefToFile(const string& name, GraphDef const& graph_def) {
   string path = MakeUniquePath(name);
-  TF_CHECK_OK(WriteTextProto(Env::Default(), path, graph_def));
+  Status status = WriteTextProto(Env::Default(), path, graph_def);
+  if (!status.ok()) {
+    VLOG(1) << "Failed to dump GraphDef to file: " << path << " : " << status;
+    path.clear();
+    path = "(unavailable)";
+  }
   return path;
 }
 
@@ -79,7 +84,13 @@ string DumpGraphToFile(const string& name, Graph const& graph,
 
 string DumpFunctionDefToFile(const string& name, FunctionDef const& fdef) {
   string path = MakeUniquePath(name);
-  TF_CHECK_OK(WriteTextProto(Env::Default(), path, fdef));
+  Status status = WriteTextProto(Env::Default(), path, fdef);
+  if (!status.ok()) {
+    VLOG(1) << "Failed to dump FunctionDef to file: " << path << " : "
+            << status;
+    path.clear();
+    path = "(unavailable)";
+  }
   return path;
 }
 
diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
index 5726d8294a7c7fe81d7f6b803af89ca305aa2deb..1d9e0fb33ee4a4229c78d116831e95391a5ac3f8 100644
--- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
+++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
@@ -29,6 +29,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/graph/control_flow.h"
 #include "tensorflow/core/lib/gtl/optional.h"
 
@@ -36,6 +37,8 @@ namespace tensorflow {
 
 namespace {
 
+using xla::StatusOr;
+
 const char* const kArgOp = "_Arg";
 const char* const kRetValOp = "_Retval";
 
@@ -75,6 +78,20 @@ struct Frame {
   std::unordered_set<Node*> nodes;
 };
 
+// Comparison function used for sorting nodes consistently.
+// a) resource variables are last, and
+// b) sort lexicographically by name (for deterministic output).
+struct NodeCmp {
+  bool operator()(const Node* lhs, const Node* rhs) const {
+    bool lhs_is_resource =
+        lhs->num_inputs() > 0 ? (lhs->input_type(0) == DT_RESOURCE) : false;
+    bool rhs_is_resource =
+        rhs->num_inputs() > 0 ? (rhs->input_type(0) == DT_RESOURCE) : false;
+    return std::tie(lhs_is_resource, lhs->name()) <
+           std::tie(rhs_is_resource, rhs->name());
+  }
+};
+
 // Returns a textual representation of the names of the nodes in the input.
 template <typename T>
 string NodesToString(const T& nodes) {
@@ -140,7 +157,7 @@ Status CopySubgraph(const Graph& graph, const Frame* frame,
   return Status::OK();
 }
 
-xla::StatusOr<Node*> AddNode(const NodeDef& node_def, Graph* graph) {
+StatusOr<Node*> AddNode(const NodeDef& node_def, Graph* graph) {
   Status status;
   Node* inserted_node = graph->AddNode(node_def, &status);
   if (!status.ok()) {
@@ -149,7 +166,7 @@ xla::StatusOr<Node*> AddNode(const NodeDef& node_def, Graph* graph) {
   return inserted_node;
 }
 
-xla::StatusOr<Node*> BuildArgNode(Graph* graph, DataType type, int index) {
+StatusOr<Node*> BuildArgNode(Graph* graph, DataType type, int index) {
   NodeDef arg_def;
   NodeDefBuilder builder(strings::StrCat(kArgOp, index), kArgOp);
   builder.Attr("T", type);
@@ -158,7 +175,7 @@ xla::StatusOr<Node*> BuildArgNode(Graph* graph, DataType type, int index) {
   return AddNode(arg_def, graph);
 }
 
-xla::StatusOr<Node*> BuildRetvalNode(Graph* graph, DataType type, int index) {
+StatusOr<Node*> BuildRetvalNode(Graph* graph, DataType type, int index) {
   NodeDef ret_def;
   ret_def.set_op(kRetValOp);
   ret_def.set_name(strings::StrCat(kRetValOp, index));
@@ -309,16 +326,9 @@ Status FunctionalizeLoop(Graph* graph, Frame* frame,
   }
   frame->args = std::move(args);
 
-  // Order the arguments so that:
-  // a) resource variables are last, and
-  // b) sort lexicographically by name (for deterministic output).
-  std::sort(frame->args.begin(), frame->args.end(),
-            [](const Arg& a, const Arg& b) {
-              bool a_is_resource = (a.enter->input_type(0) == DT_RESOURCE);
-              bool b_is_resource = (b.enter->input_type(0) == DT_RESOURCE);
-              return std::tie(a_is_resource, a.enter->name()) <
-                     std::tie(b_is_resource, b.enter->name());
-            });
+  std::sort(
+      frame->args.begin(), frame->args.end(),
+      [](const Arg& a, const Arg& b) { return NodeCmp()(a.enter, b.enter); });
 
   if (frame->loop_cond == nullptr) {
     return errors::InvalidArgument("Loop ", frame->name,
@@ -528,259 +538,127 @@ Status FunctionalizeLoop(Graph* graph, Frame* frame,
 
 class FunctionalizeCond {
  public:
-  // Identifies the connected parts of the tf.Cond.
-  struct ClusterHandle {
-    explicit ClusterHandle(int representative = -1)
-        : representative(representative) {}
+  // All nodes are assumed to be either in no branch, then branch, else branch,
+  // or both branches (such as merge nodes).
+  enum Branch {
+    kElseBranch = 0,
+    kThenBranch = 1,
+    kBoth = 2,
+    kNeither = 3,
+    kNumBranchTypes = 4
+  };
 
-    bool operator==(const ClusterHandle& other) const {
-      return representative == other.representative;
-    }
+  // Returns a textual representation of the Branch b.
+  static string Branch_Name(FunctionalizeCond::Branch b);
 
-    bool operator!=(const ClusterHandle& other) const {
-      return !(*this == other);
-    }
+  // Functionalize all the switch-merge nodes of a loop-free graph into XlaIf
+  // nodes. That is, attempt to transform every remaining switch and merge nodes
+  // in the graph into XlaIf nodes.
+  // Precondition: All while loops have been removed from graph.
+  static Status Functionalize(Graph* graph, FunctionLibraryDefinition* library);
 
-    bool operator<(const ClusterHandle& other) const {
-      return representative < other.representative;
+ private:
+  // CondArgNode represents a input to the conditional and its corresponding
+  // switch nodes.
+  struct CondArgNode {
+    explicit CondArgNode(Node* input) : input(input) {}
+    string ToString() const {
+      return strings::StrCat("input=", input->name(),
+                             " switches=", NodesToString(switch_nodes));
     }
 
-    bool operator>(const ClusterHandle& other) const {
-      return representative > other.representative;
-    }
+    Node* input;
+    std::vector<Node*> switch_nodes;
+  };
+  using CondArgNodes = std::vector<CondArgNode>;
 
+  struct ForwardFlowNode {
+    explicit ForwardFlowNode(Branch branch = Branch::kNeither)
+        : branch(branch), count(0) {}
     string ToString() const {
-      return strings::StrCat("Cluster_", representative);
+      return strings::StrCat("branch=", Branch_Name(branch), " count=", count);
     }
-
-    // Vector of UnionFind<ClusterHandle> indexable by ClusterHandle and Node*.
-    struct Vector {
-      explicit Vector(size_t size) : clusters(size) {}
-
-      UnionFind<ClusterHandle>& at(const ClusterHandle& cluster) {
-        return clusters.at(cluster.representative);
-      }
-
-      UnionFind<ClusterHandle>& at(const Node* node) {
-        return clusters.at(node->id());
-      }
-
-      UnionFind<ClusterHandle>& operator[](const Node* node) {
-        return clusters.at(node->id());
-      }
-
-      size_t size() const { return clusters.size(); }
-
-      void resize(size_t count) { return clusters.resize(count); }
-
-     private:
-      std::vector<UnionFind<ClusterHandle>> clusters;
-    };
-
-   private:
-    int representative;
+    Branch branch;
+    int count;
   };
 
-  // Represents a node in the clustered graph consisting of switch_nodes,
-  // merge_nodes as well as the edges into and out of this node to other
-  // Clusters. Each Cluster corresponds to a ClusterHandle and has a
-  // corresponding representative.
-  struct Cluster {
-    std::unordered_set<Node*> switch_nodes;
-    std::unordered_set<Node*> merge_nodes;
-    std::unordered_set<Cluster*> in_nodes;
-    std::unordered_set<Cluster*> out_nodes;
-
-    // A member of the ClusterHandle corresponding to this Cluster.
-    ClusterHandle representative;
-    bool visited = false;
-  };
+  struct PredicateSwitches {
+    explicit PredicateSwitches(Node* predicate) : predicate(predicate) {}
 
-  // Represent the clustered graph as map from cluster representative to
-  // Cluster.
-  using ClusteredGraph = std::map<ClusterHandle, Cluster>;
-
-  // The arguments and condition of a XlaIf. The arguments are ordered by node
-  // id in the original graph.
-  struct CondArgs {
-    struct CondCmp {
-      bool operator()(const Node* lhs, const Node* rhs) const {
-        bool lhs_is_resource =
-            lhs->num_inputs() > 0 ? (lhs->input_type(0) == DT_RESOURCE) : false;
-        bool rhs_is_resource =
-            rhs->num_inputs() > 0 ? (rhs->input_type(0) == DT_RESOURCE) : false;
-        return std::tie(lhs_is_resource, lhs->name()) <
-               std::tie(rhs_is_resource, rhs->name());
-      }
-    };
-    Node* conditional = nullptr;
-    std::set<Node*, CondCmp> args;
+    Node* predicate;
+    std::vector<Node*> switches;
   };
 
-  static Status Functionalize(Graph* graph, FunctionLibraryDefinition* library);
-
- private:
   FunctionalizeCond(Graph* graph, FunctionLibraryDefinition* library)
-      : clusters_(graph->num_node_ids()), library_(library), graph_(graph) {}
-
-  // Returns a vector of Switch nodes from the clustered graph where the nodes
-  // are sorted by the number of switch nodes minus number of merge nodes
-  // from a root of the clustered graph to the given Merge node, with ties
-  // broken by the representative of the Cluster. This corresponds to sorting by
-  // nesting depth, from deepest nested to outermost.
-  std::vector<std::pair<int, Cluster*>> SortedSwitchNodes();
-
-  // Returns whether the graph has no conditionals.
-  bool NoConditionals() const { return merge_nodes_.empty(); }
-
-  // Construct the clustered graph by creating nodes for each cluster and the
-  // connections between the clusters. Switch and Merge nodes partition
-  // clusters, so iterate over those. Note: a Cluster may have neither a
-  // Merge or Switch but will have an in/out edge from a Cluster that has.
-  void CreateClusters();
-
-  // Creates the clustered graph by identifying all the edges between different
-  // clusters and collecting all switch and merge nodes that correspond to a
-  // cluster.
-  void CreateClusteredGraph();
-
-  // If `from` and `to` correspond to different clusters, then merge the nodes
-  // in the clustered graph corresponding to `from` and `to`.
-  //
-  // If `remove_from_graph` is specified then the `from` node is also removed
-  // from the clustered graph post contracting the edge.
-  void ContractEdge(Cluster* from, Cluster* to, bool remove_from_graph = false);
-
-  // Converts a Merge node to a XlaIf. This encapsulates the process of
-  // extracting the bodies needed for the then and else branch, creates a XlaIf
-  // node, removing the nodes of the branches from the graph and replacing the
-  // merge node with a XlaIf.
-  Status ConvertCorrespondingMergeToXlaIf(Cluster* switch_cluster);
-
-  // Removes a Switch cluster feeding directly into a Merge cluster by removing
-  // the Switch and Merge nodes and collapsing into a single cluster.
-  Status RemoveTrivialSwitch(Cluster* switch_cluster);
-
-  // Returns the merge cluster corresponding to the switch node. This function
-  // only returns the merge cluster in the case where we have a switch node that
-  // is the single entry point for all paths to a common merge cluster, this
-  // merge cluster may be created by combining multiple merge clusters, that
-  // share the switch cluster as common ancestor, together.
-  //
-  //           Switch
-  //          /      \
-  //     Branch      Branch
-  //          \      /
-  //        merge_cluster
-  //
-  // Note: either of the branches may be empty. The case where both branches are
-  // empty is handled by RemoveTrivialSwitch.
-  gtl::optional<Cluster*> CreateCorrespondingMergeCluster(
-      Cluster* switch_cluster);
-
-  // Determines the arguments needed as input to the Merge cluster originating
-  // from the Switch cluster.
-  xla::StatusOr<CondArgs> DetermineCondArgs(const Cluster& merge_cluster,
-                                            const Cluster& switch_cluster);
-
-  // Builds a XlaIfOp to replace the Merge node with.
-  xla::StatusOr<Node*> BuildAndAddXlaIfOp(const CondArgs& cond_args,
-                                          const Cluster& merge_cluster,
-                                          const std::vector<Node*>& outputs);
+      : library_(library), graph_(graph) {}
+
+  // Perform the actual cond functionalization. Iterate over groups of switch
+  // nodes (linked by common predicate), from innermost to outermost, and
+  // extract into XlaIf nodes.
+  Status FunctionalizeInternal();
+
+  // Determines the branch_map (mapping from node to branch of cond) and
+  // frontier (the nodes where the cond ends).
+  StatusOr<std::pair<std::unordered_map<Node*, ForwardFlowNode>,
+                     std::unordered_set<Node*>>>
+  DetermineBranchMapAndFrontier(const std::vector<Node*>& switches);
+
+  // Returns XlaIf node created from subgraph of merge and switch nodes. This
+  // encapsulates the process of extracting the bodies needed for the then and
+  // else branch, creates a XlaIf node, removing the nodes of the branches from
+  // the graph and replacing the merge node with a XlaIf.
+  StatusOr<Node*> ConvertToXlaIf(const CondArgNodes& cond_arg_nodes,
+                                 const std::vector<Node*>& switch_nodes,
+                                 const std::vector<Node*>& merge_nodes,
+                                 Node* predicate);
+
+  // Builds a XlaIfOp to replace the Switch-Graph-Merge cluster with.
+  StatusOr<Node*> BuildAndAddXlaIfOp(const CondArgNodes& cond_arg_nodes,
+                                     const std::vector<Node*>& switch_nodes,
+                                     const std::vector<Node*>& merge_nodes,
+                                     Node* predicate);
 
   // Extracts a function body corresponding to the given input edge of the merge
   // node.
-  Status ExtractBody(const CondArgs& cond_args, const Cluster& merge_cluster,
-                     const std::vector<Node*>& outputs, int input_edge,
+  Status ExtractBody(const CondArgNodes& cond_arg_nodes,
+                     const std::vector<Node*>& switch_nodes,
+                     const std::vector<Node*>& merge_nodes, int input_edge,
                      Graph* body);
 
   // Adds all the input edges to `if_node` corresponding to the arguments.
-  Status AddInputEdges(const CondArgs& cond_args, Node* if_node);
+  Status AddInputEdges(const CondArgNodes& cond_arg_nodes, Node* predicate,
+                       Node* if_node);
 
   // Adds all output edges from the `if_node`.
   Status AddOutputEdges(const std::vector<Node*>& outputs, Node* if_node);
 
-  // Removes all nodes from the graph that are part of cluster.
-  void RemoveClusterNodes(Cluster* cluster);
-
-  // Removes all argument nodes that are unused.
-  template <class T>
-  void RemoveUnusedArgs(const T& args);
-
-  // Removes all Merge nodes in merge_cluster.
-  void RemoveMergeNodes(Cluster* merge_cluster);
-
-  // Returns the representative member of the corresponding cluster.
-  ClusterHandle Representative(const Node* node) {
-    return clusters_.at(node).Get();
-  }
+  // Returns the switches of graph_ (along with grouping predicates) in
+  // postorder. Dead switch nodes are skipped and removed from the graph.
+  std::vector<PredicateSwitches> DeterminePredicateSwitchOrder();
+
+  // Update the state for destination based on the state of source and the node
+  // being updated.
+  Status Join(const ForwardFlowNode& src_state, const Node* dst,
+              ForwardFlowNode* dst_state);
+
+  // Ensure that all nodes in the branch_map are dominated by the switch
+  // nodes. Returns nodes that are not dominated by the switches but are a
+  // control dependency of a node in the cond, and remove such control
+  // dependencies.
+  StatusOr<std::vector<Node*>> EnsureDominanceAndReturnNonDominatedControlNodes(
+      const std::unordered_map<Node*, ForwardFlowNode>& branch_map,
+      const std::vector<Node*>& switches);
+
+  // Validates that the frontier of nodes for the conditional
+  // section are as expected.
+  Status ValidateFrontier(
+      const std::unordered_map<Node*, ForwardFlowNode>& branch_map,
+      const std::unordered_set<Node*>& frontier);
 
-  ClusteredGraph clustered_graph_;
-  ClusterHandle::Vector clusters_;
-  std::unordered_set<Node*> merge_nodes_;
-  std::unordered_set<Node*> switch_nodes_;
   FunctionLibraryDefinition* library_;
   Graph* graph_;
 };
 
-std::ostream& operator<<(std::ostream& os,
-                         const FunctionalizeCond::ClusterHandle& c) {
-  os << c.ToString();
-  return os;
-}
-
-// Returns a dot representation of the clustered graph showing the connections
-// between the nodes and the nodes in each cluster.
-string DebugString(const Graph& graph,
-                   FunctionalizeCond::ClusterHandle::Vector* clusters) {
-  string ret = "digraph {\ncompound=true;labeljust=\"r\";ranksep=0.24\n";
-  std::map<FunctionalizeCond::ClusterHandle, string> subgraphs;
-  auto name = [](const Node* n) {
-    return strings::StrCat(n->type_string(), "_", n->id());
-  };
-  for (Node* n : graph.nodes()) {
-    strings::StrAppend(&subgraphs[clusters->at(n).Get()], n->id(), " [label=\"",
-                       name(n), "\"];\n");
-  }
-  for (auto kv : subgraphs) {
-    strings::StrAppend(&ret, "subgraph cluster_", kv.first.ToString(), " {\n",
-                       "style=filled; color=lightgrey;", "label = \"",
-                       kv.first.ToString(), "\";\n", kv.second, "}\n");
-  }
-  for (Node* n : graph.nodes()) {
-    for (Node* in : n->in_nodes()) {
-      strings::StrAppend(&ret, in->id(), " -> ", n->id(), ";\n");
-    }
-  }
-  return strings::StrCat(ret, "} // end");
-}
-
-string DebugString(const FunctionalizeCond::ClusteredGraph& clustered_graph) {
-  string ret = "digraph {\ncompound=true;labeljust=\"r\";\n";
-  auto name = [](const FunctionalizeCond::Cluster& cluster) {
-    return cluster.representative.ToString();
-  };
-  for (auto kv : clustered_graph) {
-    if (!kv.second.switch_nodes.empty() || !kv.second.merge_nodes.empty()) {
-      strings::StrAppend(
-          &ret, kv.first.ToString(), " [label=\"", name(kv.second),
-          kv.second.switch_nodes.empty()
-              ? ""
-              : strings::StrCat(" switches=", kv.second.switch_nodes.size()),
-          kv.second.merge_nodes.empty()
-              ? ""
-              : strings::StrCat(" merges=", kv.second.merge_nodes.size()),
-          "\"];\n");
-    }
-  }
-  for (auto kv : clustered_graph) {
-    for (auto in : kv.second.in_nodes) {
-      strings::StrAppend(&ret, name(*in), " -> ", name(kv.second), ";\n");
-    }
-  }
-  return strings::StrCat(ret, "} // end");
-}
-
 bool IsDeadSwitch(const Node* node) {
   for (const Edge* e : node->out_edges()) {
     const Node* dst = e->dst();
@@ -796,337 +674,285 @@ bool IsDeadSwitch(const Node* node) {
   return true;
 }
 
-void FunctionalizeCond::CreateClusters() {
-  ClusterHandle source_cluster = ClusterHandle(Graph::kSourceId);
-  auto& source = clusters_.at(source_cluster);
-  std::deque<std::pair<ClusterHandle, std::deque<Node*>>> workqueue;
-  workqueue.push_back({source_cluster, {}});
-  for (Node* node : graph_->nodes()) {
-    if (IsSwitch(node)) {
-      switch_nodes_.insert(node);
-    } else if (IsMerge(node)) {
-      merge_nodes_.insert(node);
-    }
-    ClusterHandle& cluster = clusters_.at(node).Get();
-    cluster = ClusterHandle(node->id());
-    // Group all source clusters together.
-    if (node->IsSource() || node->in_edges().empty()) {
-      clusters_.at(node).Merge(&source);
-      source.Merge(&clusters_.at(node));
-      workqueue.front().second.push_back(node);
+string FunctionalizeCond::Branch_Name(FunctionalizeCond::Branch b) {
+  const string branch_name[FunctionalizeCond::kNumBranchTypes + 1] = {
+      "else", "then", "both", "neither", "count"};
+  return branch_name[b];
+}
+
+Status FunctionalizeCond::ValidateFrontier(
+    const std::unordered_map<Node*, FunctionalizeCond::ForwardFlowNode>&
+        branch_map,
+    const std::unordered_set<Node*>& frontier) {
+  std::unordered_set<const Node*> pending[kNumBranchTypes];
+  for (Node* n : frontier) {
+    pending[branch_map.at(n).branch].insert(n);
+  }
+  TF_RET_CHECK(pending[kNeither].empty()) << NodesToString(pending[kNeither]);
+  for (const Node* n : pending[kBoth]) {
+    TF_RET_CHECK(IsMerge(n)) << n->DebugString();
+    // Merge nodes may be in then or else branch too
+  }
+  int index = (pending[kThenBranch].size() <= pending[kElseBranch].size())
+                  ? kThenBranch
+                  : kElseBranch;
+  int other = 1 - index;
+  for (const Node* n : pending[index]) {
+    if (pending[other].find(n) != pending[other].end()) {
+      return errors::Internal(
+          "Node (", n->DebugString().c_str(),
+          ") in both Else and Then branch should be in Both.");
     }
   }
-
-  // If there are no Merge nodes, then terminate.
-  if (merge_nodes_.empty()) {
-    return;
+  if (pending[kBoth].empty() && pending[kThenBranch].empty() &&
+      pending[kElseBranch].empty()) {
+    return errors::Internal("Unexpected empty frontier for switch nodes");
   }
+  return Status::OK();
+}
 
-  // Remove all dead Switch nodes.
-  RemoveUnusedArgs(switch_nodes_);
-
-  // All parent_'s are still nullptr so clusters_ may still be resized. Resize
-  // conservatively assuming all merge nodes become XlaIf nodes.
-  clusters_.resize(clusters_.size() + merge_nodes_.size());
-
-  std::unordered_set<Node*> marked;
-  while (!workqueue.empty()) {
-    auto cluster_queue = workqueue.front();
-    VLOG(4) << "Cluster: " << cluster_queue.first << " Queue: {"
-            << str_util::Join(cluster_queue.second, ",",
-                              [](string* output, const Node* node) {
-                                strings::StrAppend(output, node->id());
-                              })
-            << "}";
-
-    UnionFind<ClusterHandle>& repr = clusters_.at(cluster_queue.first);
-    workqueue.pop_front();
-    std::deque<Node*> switch_nodes;
-    std::deque<Node*> merge_nodes;
-    std::unordered_set<Node*> cluster_member;
-    while (!cluster_queue.second.empty()) {
-      // Iterate node workqueue and flow forward merging all nodes reachable
-      // that are neither a Switch or a Merge and whose inputs are all part of
-      // the same cluster.
-      Node* cur = cluster_queue.second.front();
-      cluster_queue.second.pop_front();
-      if (marked.find(cur) != marked.end()) {
-        continue;
-      }
-      if (IsMerge(cur)) {
-        merge_nodes.push_back(cur);
-        marked.insert(cur);
-        continue;
-      }
-      if (IsSwitch(cur)) {
-        switch_nodes.push_back(cur);
-        marked.insert(cur);
-        continue;
-      }
-      clusters_.at(cur).Merge(&repr);
-      cluster_member.insert(cur);
-      for (Node* out : cur->out_nodes()) {
-        bool all_ancestors_in_cluster = true;
-        for (Node* in : out->in_nodes()) {
-          if (IsMerge(out)) {
-            merge_nodes.push_back(out);
-          }
-          if (IsSwitch(out)) {
-            switch_nodes.push_back(out);
-          }
-          if (cluster_member.find(in) == cluster_member.end()) {
-            all_ancestors_in_cluster = false;
-            break;
-          }
-        }
-        if (all_ancestors_in_cluster && out->IsOp()) {
-          cluster_queue.second.push_back(out);
-          marked.insert(cur);
-        }
-      }
+Status FunctionalizeCond::Join(const ForwardFlowNode& src_state,
+                               const Node* dst, ForwardFlowNode* dst_state) {
+  TF_RET_CHECK(dst_state->branch != Branch::kBoth &&
+               dst_state->branch != Branch::kNumBranchTypes)
+      << "Unexpected/Invalid branch type: Merging "
+      << Branch_Name(src_state.branch) << " with "
+      << Branch_Name(dst_state->branch);
+  if (dst_state->branch == Branch::kNeither) {
+    dst_state->branch = src_state.branch;
+  } else if (src_state.branch != dst_state->branch &&
+             src_state.branch != Branch::kNeither) {
+    if (IsMerge(dst)) {
+      dst_state->branch = Branch::kBoth;
+    } else {
+      return errors::Internal("Illegal merge: ", src_state.ToString(), " with ",
+                              dst_state->ToString(), " for ",
+                              dst->DebugString());
     }
+  }
+  ++dst_state->count;
+  return Status::OK();
+}
 
-    VLOG(4) << "Switches: {"
-            << str_util::Join(switch_nodes, ",",
-                              [](string* output, const Node* node) {
-                                strings::StrAppend(output, node->id());
-                              })
-            << "}";
-
-    // Merge Switch nodes with common predicate.
-    std::unordered_map<Node*, std::vector<Node*>> predicate_to_switch;
-    for (Node* node : switch_nodes) {
-      Node* tmp;
-      TF_CHECK_OK(node->input_node(1, &tmp));
-      predicate_to_switch[tmp].push_back(node);
-    }
-    for (auto kv : predicate_to_switch) {
-      Node* first = kv.second.front();
-      for (Node* switch_node : kv.second) {
-        clusters_.at(first).Merge(&clusters_.at(switch_node));
+std::vector<FunctionalizeCond::PredicateSwitches>
+FunctionalizeCond::DeterminePredicateSwitchOrder() {
+  std::vector<Node*> dead_switches;
+  std::vector<Node*> switch_order;
+  DFS(*graph_, nullptr, [this, &dead_switches, &switch_order](Node* n) {
+    if (IsSwitch(n)) {
+      if (IsDeadSwitch(n)) {
+        dead_switches.push_back(n);
+      } else {
+        switch_order.push_back(n);
       }
     }
+  });
 
-    // Enqueue each edge of the switch node separately. That is, group all the
-    // nodes that are due to the true/false edge of the switch together and
-    // consider all nodes that only have a control dependency on the switch node
-    // separately. We want to group together all nodes that are part of the same
-    // branch, as these will be extracted into the `then` and `else` functions
-    // of the functional if. The ops due to control edges are different as they
-    // could be involved with either branch and merging them here could result
-    // in invalid graphs.
-    for (auto kv : predicate_to_switch) {
-      ClusterHandle none = ClusterHandle(-1);
-      ClusterHandle first[2] = {none, none};
-      std::deque<Node*>* queue[2];
-      for (auto switch_node : kv.second) {
-        for (const auto e : switch_node->out_edges()) {
-          if (IsSwitch(e->dst()) || IsMerge(e->dst())) {
-            continue;
-          }
-          // Control edges are enqueued on their own.
-          if (e->IsControlEdge()) {
-            workqueue.push_back({Representative(e->dst()), {e->dst()}});
-            continue;
-          }
-          // Combine all outputs of the same output port of a switch cluster
-          // into the same workqueue entry.
-          if (first[e->src_output()] == none) {
-            ClusterHandle repr = Representative(e->dst());
-            first[e->src_output()] = repr;
-            workqueue.push_back({repr, {}});
-            queue[e->src_output()] = &workqueue.back().second;
-          }
-          clusters_.at(first[e->src_output()]).Merge(&clusters_.at(e->dst()));
-          queue[e->src_output()]->push_back(e->dst());
-        }
-      }
-    }
+  // Remove all dead switch nodes.
+  for (Node* n : dead_switches) {
+    VLOG(2) << "Removing dead switch: " << n->DebugString();
+    graph_->RemoveNode(n);
   }
-}
 
-void FunctionalizeCond::ContractEdge(Cluster* from, Cluster* to,
-                                     bool remove_from_graph) {
-  VLOG(3) << "ContractEdge from = " << from->representative
-          << " to = " << to->representative;
-  if (from->representative == to->representative) {
-    return;
+  std::vector<PredicateSwitches> predicate_switch_order;
+  if (switch_order.empty()) {
+    return predicate_switch_order;
   }
-  to->merge_nodes.insert(from->merge_nodes.begin(), from->merge_nodes.end());
-  from->merge_nodes.clear();
-  to->switch_nodes.insert(from->switch_nodes.begin(), from->switch_nodes.end());
-  from->switch_nodes.clear();
-
-  for (Cluster* from_out : from->out_nodes) {
-    from_out->in_nodes.erase(from);
-    if (from_out->representative != to->representative) {
-      from_out->in_nodes.insert(to);
-      to->out_nodes.insert(from_out);
-    }
-  }
-  from->out_nodes.clear();
 
-  for (Cluster* from_in : from->in_nodes) {
-    from_in->out_nodes.erase(from);
-    if (from_in->representative != to->representative) {
-      from_in->out_nodes.insert(to);
-      to->in_nodes.insert(from_in);
+  // Merge Switch nodes with common predicate.
+  std::unordered_map<Node*, int> predicate_index;
+  // The nodes in switch_order are in reverse topological order, but the
+  // clustered switches need not be (i.e., when considered as a cluster one
+  // element of a cluster may be later in the topological order than another
+  // node whose cluster is later in the topological order of clustered
+  // switches).
+  for (auto it = switch_order.rbegin(); it != switch_order.rend(); ++it) {
+    Node* pred;
+    TF_CHECK_OK((*it)->input_node(1, &pred));
+    if (predicate_index.find(pred) == predicate_index.end()) {
+      predicate_index[pred] = predicate_switch_order.size();
+      predicate_switch_order.emplace_back(pred);
     }
+    predicate_switch_order[predicate_index[pred]].switches.push_back(*it);
   }
-  from->in_nodes.clear();
-
-  to->in_nodes.erase(from);
-  to->out_nodes.erase(from);
-  clusters_.at(to->representative).Merge(&clusters_.at(from->representative));
-  from->visited = true;
-
-  if (remove_from_graph) {
-    clustered_graph_.erase(from->representative);
-  }
+  return predicate_switch_order;
 }
 
-void FunctionalizeCond::CreateClusteredGraph() {
-  auto update_cluster_for_node = [this](Node* node) -> Cluster& {
-    ClusterHandle repr = Representative(node);
-    Cluster& cluster_node = clustered_graph_[repr];
-    cluster_node.representative = repr;
-    for (const Node* in : node->in_nodes()) {
-      ClusterHandle other_repr = Representative(in);
-      // Skip source, sink and internal edges.
-      if (other_repr == repr) {
-        continue;
+StatusOr<std::vector<Node*>>
+FunctionalizeCond::EnsureDominanceAndReturnNonDominatedControlNodes(
+    const std::unordered_map<Node*, ForwardFlowNode>& branch_map,
+    const std::vector<Node*>& switches) {
+  std::vector<Node*> old_control_nodes;
+  for (const auto& kv : branch_map) {
+    if (kv.second.count != kv.first->in_edges().size()) {
+      std::vector<const Edge*> delete_edges;
+      for (const Edge* in : kv.first->in_edges()) {
+        auto it = branch_map.find(in->src());
+        if (it == branch_map.end()) {
+          if (in->IsControlEdge()) {
+            old_control_nodes.push_back(in->src());
+            delete_edges.push_back(in);
+          } else {
+            if (IsSwitch(in->src())) {
+              if (std::find(switches.begin(), switches.end(), in->src()) ==
+                  switches.end()) {
+                return errors::Internal(
+                    "Unexpected switch node found during flow forward: ",
+                    in->src()->DebugString());
+              }
+              continue;
+            }
+            return errors::InvalidArgument(
+                "Value ", kv.first->name(), "'s input, ", in->src()->name(),
+                ", is not dominated by switch nodes ", NodesToString(switches));
+          }
+        }
       }
-      Cluster& cluster_node_in = clustered_graph_[other_repr];
-      cluster_node.in_nodes.insert(&cluster_node_in);
-      cluster_node_in.out_nodes.insert(&cluster_node);
-      cluster_node_in.representative = other_repr;
-    }
-    for (const Node* out : node->out_nodes()) {
-      ClusterHandle other_repr = Representative(out);
-      // Skip source, sink and internal edges.
-      if (other_repr == repr) {
-        continue;
+      // Remove control edges from nodes that are not dominated by the switch
+      // nodes. New control dependencies will be added between these nodes and
+      // the XlaIf node inserted.
+      for (const Edge* e : delete_edges) {
+        graph_->RemoveEdge(e);
       }
-      Cluster& cluster_node_out = clustered_graph_[other_repr];
-      cluster_node.out_nodes.insert(&cluster_node_out);
-      cluster_node_out.in_nodes.insert(&cluster_node);
-      cluster_node_out.representative = other_repr;
     }
-    return cluster_node;
-  };
-  update_cluster_for_node(graph_->source_node());
-  for (Node* node : switch_nodes_) {
-    update_cluster_for_node(node).switch_nodes.insert(node);
   }
-  for (Node* node : merge_nodes_) {
-    update_cluster_for_node(node).merge_nodes.insert(node);
-  }
-
-  VLOG(3) << "Graph with clusters: " << DebugString(*graph_, &clusters_);
-  VLOG(3) << "ClusteredGraph: " << DebugString(clustered_graph_);
+  return old_control_nodes;
 }
 
-gtl::optional<FunctionalizeCond::Cluster*>
-FunctionalizeCond::CreateCorrespondingMergeCluster(Cluster* switch_cluster) {
-  VLOG(3) << "CreateCorrespondingMergeCluster for "
-          << switch_cluster->representative;
-  std::unordered_set<Cluster*> merges;
-  std::unordered_set<Cluster*> dominated;
-  dominated.insert(switch_cluster);
-  std::deque<Cluster*> queue;
-  auto enqueue_or_update_merge = [this, &queue, &merges](Cluster* c) {
-    if (c->merge_nodes.empty()) {
-      queue.push_back(c);
-    } else {
-      merges.insert(c);
-    }
-  };
-  // Enqueue all the outputs of the switch cluster in the workqueue.
-  for (auto* out : switch_cluster->out_nodes) {
-    enqueue_or_update_merge(out);
-  }
-  std::unordered_set<Cluster*> visited;
-  while (!queue.empty()) {
-    Cluster* cur = queue.front();
-    queue.pop_front();
-    if (visited.find(cur) != visited.end()) {
+StatusOr<
+    std::pair<std::unordered_map<Node*, FunctionalizeCond::ForwardFlowNode>,
+              std::unordered_set<Node*>>>
+FunctionalizeCond::DetermineBranchMapAndFrontier(
+    const std::vector<Node*>& switches) {
+  std::unordered_map<Node*, ForwardFlowNode> branch_map;
+  std::unordered_set<Node*> frontier;
+  std::vector<Node*> stack = switches;
+  std::vector<bool> visited(graph_->num_node_ids(), false);
+  while (!stack.empty()) {
+    Node* n = stack.back();
+    stack.pop_back();
+
+    if (visited[n->id()]) {
       continue;
     }
-    visited.insert(cur);
-    // Ensure all inputs to the current node are in the dominated set.
-    for (Cluster* in : cur->in_nodes) {
-      if (dominated.find(in) == dominated.end()) {
-        return gtl::nullopt;
+    visited[n->id()] = true;
+
+    // Propagate branch state along each edge of a switch node.
+    bool sink_only = true;
+    for (const Edge* e : n->out_edges()) {
+      Node* out = e->dst();
+      if (!out->IsOp()) {
+        continue;
+      }
+      sink_only = false;
+      // Propagate branch information.
+      ForwardFlowNode& ffn = branch_map[out];
+      if (IsSwitch(n)) {
+        int index = e->IsControlEdge() ? Branch::kNeither : e->src_output();
+        TF_RETURN_IF_ERROR(Join(ForwardFlowNode(Branch(index)), out, &ffn));
+      } else {
+        TF_RETURN_IF_ERROR(Join(branch_map[n], out, &ffn));
+      }
+      if (IsMerge(out)) {
+        if (out->in_edges().size() == ffn.count) {
+          frontier.insert(out);
+        }
+      } else if (!visited[out->id()]) {
+        stack.push_back(out);
       }
     }
-    for (Cluster* out : cur->out_nodes) {
-      // No switch nodes beyond the entry one is expected.
-      if (!out->switch_nodes.empty()) {
-        return gtl::nullopt;
+    if (sink_only) {
+      if (!IsIdentity(n)) {
+        VLOG(1) << "Feeding into sink: " << n->DebugString();
       }
-      enqueue_or_update_merge(out);
     }
   }
-  auto it = merges.begin();
-  Cluster* merge_cluster = *it;
-  for (++it; it != merges.end(); ++it) {
-    ContractEdge(*it, merge_cluster);
-  }
-
-  // TODO(jpienaar): Clean up graph, merging nodes.
 
-  return merge_cluster;
+  if (VLOG_IS_ON(2)) {
+    for (const auto& kv : branch_map) {
+      // Append attribute to the graph if running with logging to make the
+      // changes clearer in the visualization.
+      kv.first->AddAttr("_XlaFunctionalizeBranch",
+                        Branch_Name(kv.second.branch));
+    }
+  }
+  return std::make_pair(std::move(branch_map), std::move(frontier));
 }
 
-xla::StatusOr<FunctionalizeCond::CondArgs> FunctionalizeCond::DetermineCondArgs(
-    const Cluster& merge_cluster, const Cluster& switch_cluster) {
-  VLOG(2) << "DetermineCondArgs for " << merge_cluster.representative
-          << " with switch cluster " << switch_cluster.representative;
-  CondArgs ret;
-  auto feeds_into_branch_cluster = [&](Node* switch_cluster) {
-    for (Node* out : switch_cluster->out_nodes()) {
-      ClusterHandle repr = Representative(out);
-      if (repr == merge_cluster.representative) {
-        return true;
-      }
-      for (Cluster* in : merge_cluster.in_nodes) {
-        if (repr == in->representative) {
-          return true;
-        }
+Status FunctionalizeCond::FunctionalizeInternal() {
+  std::vector<PredicateSwitches> predicate_switch_order =
+      DeterminePredicateSwitchOrder();
+
+  // Iterate from innermost set of clustered switches to outermost, replacing
+  // matching switch->merge subgraphs with single XlaIf nodes.
+  for (auto it = predicate_switch_order.rbegin();
+       it != predicate_switch_order.rend(); ++it) {
+    auto& ps = *it;
+    VLOG(3) << "Flow down from: " << NodesToString(ps.switches) << " ("
+            << ps.predicate->name() << ")";
+
+    std::unordered_map<Node*, ForwardFlowNode> branch_map;
+    std::unordered_set<Node*> frontier;
+    TF_ASSIGN_OR_RETURN(std::tie(branch_map, frontier),
+                        DetermineBranchMapAndFrontier(ps.switches));
+
+    VLOG(2) << "FunctionalizeControlFlow (before XlaIf conversion): "
+            << dump_graph::DumpGraphToFile("functionalize_bc", *graph_);
+    TF_RETURN_IF_ERROR(ValidateFrontier(branch_map, frontier));
+
+    // Sort the merge and switch nodes using NodeCmp. The switch-nodes are
+    // further grouped (post sorting) by input to the switch node as in the
+    // functionalized form each input will be passed in only once. This grouping
+    // should retain the sorted order.
+    CondArgNodes cond_arg_nodes;
+    std::unordered_map<Node*, int> input_index;
+    std::sort(ps.switches.begin(), ps.switches.end(), NodeCmp());
+    for (Node* switch_node : ps.switches) {
+      Node* in;
+      TF_RETURN_IF_ERROR(switch_node->input_node(0, &in));
+      if (input_index.find(in) == input_index.end()) {
+        input_index[in] = cond_arg_nodes.size();
+        cond_arg_nodes.emplace_back(in);
       }
+      cond_arg_nodes.at(input_index.at(in)).switch_nodes.push_back(switch_node);
     }
-    return false;
-  };
-  for (Node* switch_cluster_node : switch_cluster.switch_nodes) {
-    if (!feeds_into_branch_cluster(switch_cluster_node)) {
-      continue;
+    std::vector<Node*> merge_nodes(frontier.begin(), frontier.end());
+    std::sort(merge_nodes.begin(), merge_nodes.end(), NodeCmp());
+
+    TF_ASSIGN_OR_RETURN(std::vector<Node*> old_control_nodes,
+                        EnsureDominanceAndReturnNonDominatedControlNodes(
+                            branch_map, ps.switches));
+
+    TF_ASSIGN_OR_RETURN(
+        Node * if_node,
+        ConvertToXlaIf(cond_arg_nodes, ps.switches, merge_nodes, ps.predicate));
+    for (Node* old : old_control_nodes) {
+      graph_->AddControlEdge(old, if_node);
     }
 
-    Node* tmp;
-    TF_RETURN_IF_ERROR(switch_cluster_node->input_node(1, &tmp));
-    if (ret.conditional == nullptr) {
-      ret.conditional = tmp;
-    } else if (ret.conditional != tmp) {
-      return errors::Unimplemented(
-          "Switch statements with different conditionals cannot be "
-          "converted into functional conditional.");
+    for (auto& del_kv : branch_map) {
+      graph_->RemoveNode(del_kv.first);
     }
-    ret.args.insert(switch_cluster_node);
+    for (auto& kv : cond_arg_nodes) {
+      for (Node* node : kv.switch_nodes) {
+        graph_->RemoveNode(node);
+      }
+    }
+    VLOG(2) << "FunctionalizeControlFlow (after XlaIf conversion): "
+            << dump_graph::DumpGraphToFile("functionalize_ac", *graph_);
   }
-  return ret;
+  return Status::OK();
 }
 
-xla::StatusOr<Node*> FunctionalizeCond::BuildAndAddXlaIfOp(
-    const CondArgs& cond_args, const Cluster& merge_cluster,
-    const std::vector<Node*>& outputs) {
-  VLOG(2) << "Build if op for " << NodesToString(merge_cluster.merge_nodes)
-          << " with input " << NodesToString(cond_args.args);
+StatusOr<Node*> FunctionalizeCond::BuildAndAddXlaIfOp(
+    const CondArgNodes& cond_arg_nodes, const std::vector<Node*>& switch_nodes,
+    const std::vector<Node*>& merge_nodes, Node* predicate) {
+  VLOG(2) << "Build if op for " << NodesToString(merge_nodes) << " with input "
+          << NodesToString(switch_nodes);
 
   NodeDef if_def;
   // Create a new If node using the name of the merge node.
-  NodeDefBuilder builder(
-      strings::StrCat((*merge_cluster.merge_nodes.begin())->name(), "_If"),
-      "XlaIf");
+  NodeDefBuilder builder(strings::StrCat(predicate->name(), "_If"), "XlaIf");
   string branch[] = {"else_branch", "then_branch"};
   for (int i = 0; i < 2; ++i) {
     static std::atomic<int64> sequence_num(0LL);
@@ -1137,8 +963,11 @@ xla::StatusOr<Node*> FunctionalizeCond::BuildAndAddXlaIfOp(
         strings::StrCat("_functionalize_if_", branch[i], "_", id));
     auto body = xla::MakeUnique<Graph>(graph_->op_registry());
     TF_RETURN_IF_ERROR(
-        ExtractBody(cond_args, merge_cluster, outputs, i, body.get()));
+        ExtractBody(cond_arg_nodes, switch_nodes, merge_nodes, i, body.get()));
     VLOG(3) << "Body " << branch[i] << ": " << DebugString(body.get());
+    VLOG(4) << "FunctionalizeControlFlow (" << branch[i] << "): "
+            << dump_graph::DumpGraphToFile(
+                   strings::StrCat("functionalize_", branch[i]), *body);
     FunctionDef body_fdef;
     TF_RETURN_IF_ERROR(GraphToFunctionDef(*body, body_name.name(), &body_fdef));
     TF_RETURN_IF_ERROR(library_->AddFunctionDef(body_fdef));
@@ -1148,33 +977,39 @@ xla::StatusOr<Node*> FunctionalizeCond::BuildAndAddXlaIfOp(
   // Build input type.
   std::vector<NodeDefBuilder::NodeOut> inputs;
   DataTypeVector in_arg_types;
-  for (const Node* arg : cond_args.args) {
-    const Edge* in_edge;
-    TF_RETURN_IF_ERROR(arg->input_edge(0, &in_edge));
-    if (in_edge->IsControlEdge()) {
-      builder.ControlInput(in_edge->src()->name());
-    } else {
-      DataType dtype = arg->input_type(0);
-      inputs.emplace_back(NodeDefBuilder::NodeOut(
-          in_edge->src()->name(), in_edge->src_output(), dtype));
-      in_arg_types.push_back(dtype);
+  for (auto& kv : cond_arg_nodes) {
+    bool inserted = false;
+    for (const Node* arg : kv.switch_nodes) {
+      const Edge* in_edge;
+      TF_RETURN_IF_ERROR(arg->input_edge(0, &in_edge));
+      if (in_edge->IsControlEdge()) {
+        builder.ControlInput(in_edge->src()->name());
+      } else {
+        if (!inserted) {
+          DataType dtype = arg->input_type(0);
+          inputs.emplace_back(NodeDefBuilder::NodeOut(
+              in_edge->src()->name(), in_edge->src_output(), dtype));
+          in_arg_types.push_back(dtype);
+          inserted = true;
+        }
+      }
     }
   }
   builder.Attr("Tin", in_arg_types);
 
   // Build output type.
   DataTypeVector out_type;
-  for (const Node* merge : merge_cluster.merge_nodes) {
+  for (const Node* merge : merge_nodes) {
     DataType dtype = merge->output_type(0);
     out_type.push_back(dtype);
   }
   builder.Attr("Tout", out_type);
 
   builder.Attr("Tcond", DT_BOOL);
-  builder.Device(cond_args.conditional->assigned_device_name());
+  builder.Device(predicate->assigned_device_name());
   // Conditional should be the first input ...
-  builder.Input(NodeDefBuilder::NodeOut(cond_args.conditional->name(), 0,
-                                        cond_args.conditional->output_type(0)));
+  builder.Input(
+      NodeDefBuilder::NodeOut(predicate->name(), 0, predicate->output_type(0)));
   // ... followed by the other inputs.
   builder.Input(inputs);
 
@@ -1183,64 +1018,31 @@ xla::StatusOr<Node*> FunctionalizeCond::BuildAndAddXlaIfOp(
   return if_node;
 }
 
-void FunctionalizeCond::RemoveClusterNodes(Cluster* cluster) {
-  VLOG(3) << "RemoveClusterNodes for " << cluster->representative;
-  ClusterHandle repr = cluster->representative;
-  std::deque<Node*> to_delete;
-  for (Node* node : graph_->nodes()) {
-    if (Representative(node) == repr) {
-      to_delete.push_back(node);
-    }
-  }
-  for (Node* n : to_delete) {
-    graph_->RemoveNode(n);
-  }
-}
-
-template <class T>
-void FunctionalizeCond::RemoveUnusedArgs(const T& args) {
-  VLOG(2) << "RemoveUnusedArgs among: " << NodesToString(args);
-
-  std::deque<Node*> to_delete;
-  for (Node* arg : args) {
-    if (IsDeadSwitch(arg)) {
-      to_delete.push_back(arg);
-      for (Node* n : arg->out_nodes()) {
-        to_delete.push_back(n);
-      }
-    }
-  }
-  for (Node* n : to_delete) {
-    switch_nodes_.erase(n);
-    auto it = clustered_graph_.find(Representative(n));
-    if (it != clustered_graph_.end()) {
-      it->second.switch_nodes.erase(n);
-    }
-    graph_->RemoveNode(n);
-  }
-}
-
-Status FunctionalizeCond::ExtractBody(const CondArgs& cond_args,
-                                      const Cluster& merge_cluster,
-                                      const std::vector<Node*>& outputs,
+Status FunctionalizeCond::ExtractBody(const CondArgNodes& cond_arg_nodes,
+                                      const std::vector<Node*>& switch_nodes,
+                                      const std::vector<Node*>& merge_nodes,
                                       int input_edge, Graph* body) {
-  VLOG(2) << "ExtractBody for " << merge_cluster.representative
-          << " along edge " << input_edge;
+  VLOG(2) << "ExtractBody for " << NodesToString(merge_nodes) << " along edge "
+          << input_edge;
   std::vector<bool> squash_src_outputs(graph_->num_node_ids(), false);
   std::vector<Node*> node_map(graph_->num_node_ids(), nullptr);
   int arg_count = 0;
-  for (const auto* arg : cond_args.args) {
-    DataType dtype = arg->input_type(0);
-    TF_ASSIGN_OR_RETURN(Node * arg_node,
-                        BuildArgNode(body, dtype, arg_count++));
-    node_map.at(arg->id()) = arg_node;
-    squash_src_outputs.at(arg->id()) = true;
+  for (auto& kv : cond_arg_nodes) {
+    Node* arg_node = nullptr;
+    for (const auto* arg : kv.switch_nodes) {
+      DataType dtype = arg->input_type(0);
+      if (arg_node == nullptr) {
+        TF_ASSIGN_OR_RETURN(arg_node, BuildArgNode(body, dtype, arg_count++));
+      }
+      node_map.at(arg->id()) = arg_node;
+      squash_src_outputs.at(arg->id()) = true;
+    }
   }
 
   std::vector<Node*> stack;
-  stack.reserve(outputs.size());
-  for (int j = 0; j < outputs.size(); ++j) {
-    Node* node = outputs[j];
+  stack.reserve(merge_nodes.size());
+  for (int j = 0; j < merge_nodes.size(); ++j) {
+    Node* node = merge_nodes[j];
     TF_ASSIGN_OR_RETURN(node_map.at(node->id()),
                         BuildRetvalNode(body, node->output_type(0),
                                         /*index=*/j));
@@ -1251,7 +1053,8 @@ Status FunctionalizeCond::ExtractBody(const CondArgs& cond_args,
       node_map.at(in->id()) = body->CopyNode(in);
     }
 
-    if (cond_args.args.find(in) == cond_args.args.end()) {
+    if (std::find(switch_nodes.begin(), switch_nodes.end(), in) ==
+        switch_nodes.end()) {
       body->AddEdge(node_map.at(in->id()), in_edge->src_output(),
                     node_map.at(node->id()), 0);
     } else {
@@ -1266,18 +1069,25 @@ Status FunctionalizeCond::ExtractBody(const CondArgs& cond_args,
                       body);
 }
 
-Status FunctionalizeCond::AddInputEdges(const CondArgs& cond_args,
-                                        Node* if_node) {
+Status FunctionalizeCond::AddInputEdges(const CondArgNodes& cond_arg_nodes,
+                                        Node* predicate, Node* if_node) {
   VLOG(3) << "AddInputEdges for " << if_node->name();
-  int i = 0;
-  graph_->AddEdge(cond_args.conditional, 0, if_node, i++);
-  for (const Node* arg : cond_args.args) {
-    const Edge* in_edge;
-    TF_RETURN_IF_ERROR(arg->input_edge(0, &in_edge));
-    if (in_edge->IsControlEdge()) {
-      graph_->AddControlEdge(in_edge->src(), if_node);
-    } else {
-      graph_->AddEdge(in_edge->src(), in_edge->src_output(), if_node, i++);
+  int index = 0;
+  graph_->AddEdge(predicate, 0, if_node, index++);
+  for (auto& kv : cond_arg_nodes) {
+    bool inserted = false;
+    for (const Node* arg : kv.switch_nodes) {
+      const Edge* in_edge;
+      TF_RETURN_IF_ERROR(arg->input_edge(0, &in_edge));
+      if (in_edge->IsControlEdge()) {
+        graph_->AddControlEdge(in_edge->src(), if_node);
+      } else {
+        if (!inserted) {
+          graph_->AddEdge(in_edge->src(), in_edge->src_output(), if_node,
+                          index++);
+          inserted = true;
+        }
+      }
     }
   }
   return Status::OK();
@@ -1308,186 +1118,27 @@ Status FunctionalizeCond::AddOutputEdges(const std::vector<Node*>& outputs,
   return Status::OK();
 }
 
-void FunctionalizeCond::RemoveMergeNodes(Cluster* merge_cluster) {
-  VLOG(3) << "RemoveMergeNodes for " << merge_cluster->representative;
-  // Remove all merge nodes now dead post extraction of If.
-  for (auto it = merge_cluster->merge_nodes.begin();
-       it != merge_cluster->merge_nodes.end();) {
-    Node* node = *it;
-    graph_->RemoveNode(node);
-    merge_cluster->merge_nodes.erase(*it++);
-  }
-}
-
-Status FunctionalizeCond::RemoveTrivialSwitch(Cluster* switch_cluster) {
-  Cluster* merge_cluster = *switch_cluster->out_nodes.begin();
-  if (merge_cluster->merge_nodes.empty()) {
-    return errors::FailedPrecondition(
-        "Not a trivial switch: no Merge node feeding into Switch node");
-  }
-
-  for (auto it = merge_cluster->merge_nodes.begin();
-       it != merge_cluster->merge_nodes.end();) {
-    // We have the following structure:
-    //   Op -> Switch -> Merge -> Consumer
-    // and we want to transform it to:
-    //   Op -> Consumer
-    Node* merge_node = *it;
-    Node* switch_node;
-    const Edge* in = nullptr;
-    TF_RETURN_IF_ERROR(merge_node->input_node(0, &switch_node));
-    TF_RETURN_IF_ERROR(switch_node->input_edge(0, &in));
-    for (auto out : merge_node->out_edges()) {
-      int src_output = out->dst_input() == Graph::kControlSlot
-                           ? Graph::kControlSlot
-                           : in->src_output();
-      graph_->AddEdge(in->src(), src_output, out->dst(), out->dst_input());
-    }
-    graph_->RemoveNode(*it++);
-  }
-  RemoveUnusedArgs(switch_cluster->switch_nodes);
-
-  return Status::OK();
-}
-
-Status FunctionalizeCond::ConvertCorrespondingMergeToXlaIf(
-    Cluster* switch_cluster) {
-  VLOG(1) << "ConvertMergeToXlaIf for " << switch_cluster->representative;
-  gtl::optional<Cluster*> maybe_merge =
-      CreateCorrespondingMergeCluster(switch_cluster);
-  if (!maybe_merge.has_value()) {
-    return errors::FailedPrecondition(
-        "Switch cluster was not part of a simple conditional in the clustered "
-        "graph. Graph nodes in switch cluster ",
-        NodesToString(switch_cluster->switch_nodes));
-  }
-  Cluster* merge_cluster = *maybe_merge;
-  if (merge_cluster->merge_nodes.empty()) {
-    return errors::Internal(
-        "Merge node in clustered graph contains no merge nodes: ",
-        merge_cluster->representative.ToString());
-  }
-  TF_ASSIGN_OR_RETURN(auto cond_args,
-                      DetermineCondArgs(*merge_cluster, *switch_cluster));
-
-  // Sort the outputs by ID to produce more stable output.
-  std::vector<Node*> outputs(merge_cluster->merge_nodes.begin(),
-                             merge_cluster->merge_nodes.end());
-  std::sort(outputs.begin(), outputs.end(), CondArgs::CondCmp());
+StatusOr<Node*> FunctionalizeCond::ConvertToXlaIf(
+    const CondArgNodes& cond_arg_nodes, const std::vector<Node*>& switch_nodes,
+    const std::vector<Node*>& merge_nodes, Node* predicate) {
+  VLOG(1) << "ConvertToXlaIf for " << NodesToString(switch_nodes) << " -> "
+          << NodesToString(merge_nodes);
 
   // Extract bodies and builds a If operator.
-  TF_ASSIGN_OR_RETURN(Node * if_node,
-                      BuildAndAddXlaIfOp(cond_args, *merge_cluster, outputs));
-  TF_RETURN_IF_ERROR(AddInputEdges(cond_args, if_node));
-  TF_RETURN_IF_ERROR(AddOutputEdges(outputs, if_node));
-
-  // Remove the old nodes from the graph_ and contract the edges of the
-  // clustered graph.
-  for (auto in : merge_cluster->in_nodes) {
-    if (in != switch_cluster) {
-      RemoveClusterNodes(in);
-    }
-  }
-  RemoveMergeNodes(merge_cluster);
-  RemoveUnusedArgs(cond_args.args);
-  auto in_nodes = merge_cluster->in_nodes;
-  for (auto it = in_nodes.begin(); it != in_nodes.end();) {
-    ContractEdge(*it++, switch_cluster);
-  }
-  ContractEdge(merge_cluster, switch_cluster);
-  clusters_[if_node].Get() = ClusterHandle(switch_cluster->representative);
-
-  return Status::OK();
-}
+  TF_ASSIGN_OR_RETURN(
+      Node * if_node,
+      BuildAndAddXlaIfOp(cond_arg_nodes, switch_nodes, merge_nodes, predicate));
+  TF_RETURN_IF_ERROR(AddInputEdges(cond_arg_nodes, predicate, if_node));
+  TF_RETURN_IF_ERROR(AddOutputEdges(merge_nodes, if_node));
 
-std::vector<std::pair<int, FunctionalizeCond::Cluster*>>
-FunctionalizeCond::SortedSwitchNodes() {
-  VLOG(2) << "ProcessClusteredGraph";
-  std::stack<std::pair<int, Cluster*>> stack;
-  // Initialize with the source node.
-  stack.push({0, &clustered_graph_[Representative(graph_->source_node())]});
-
-  // Perform a depth-first traversal of the clustered graph computing the
-  // switch-merge depth.
-  std::vector<std::pair<int, Cluster*>> queue;
-  std::unordered_set<Cluster*> visited;
-  while (!stack.empty()) {
-    Cluster* n = stack.top().second;
-    size_t depth = stack.top().first;
-    stack.pop();
-
-    auto inserted = visited.insert(n);
-    if (!inserted.second) {
-      continue;
-    }
-
-    size_t new_depth = depth;
-    if (!n->merge_nodes.empty()) {
-      --new_depth;
-    }
-    if (!n->switch_nodes.empty()) {
-      queue.emplace_back(depth, n);
-      ++new_depth;
-    }
-    for (Cluster* e : n->out_nodes) {
-      stack.emplace(new_depth, e);
-    }
-  }
-
-  // Sort in reverse order of switch-merge depth with ties broken by the
-  // ClusterHandle.
-  std::sort(queue.begin(), queue.end(),
-            [](const std::pair<int, Cluster*>& lhs,
-               const std::pair<int, Cluster*>& rhs) {
-              return std::tie(lhs.first, lhs.second->representative) >
-                     std::tie(rhs.first, rhs.second->representative);
-            });
-
-  return queue;
+  return if_node;
 }
 
 Status FunctionalizeCond::Functionalize(Graph* graph,
                                         FunctionLibraryDefinition* library) {
   VLOG(1) << "FunctionalizeCond::Functionalize";
   FunctionalizeCond fc(graph, library);
-  fc.CreateClusters();
-  if (fc.NoConditionals()) {
-    return Status::OK();
-  }
-  fc.CreateClusteredGraph();
-
-  auto queue = fc.SortedSwitchNodes();
-  for (auto it = queue.begin(); it != queue.end();) {
-    Cluster* switch_cluster = (*it).second;
-    ++it;
-    if (switch_cluster->out_nodes.size() == 1) {
-      TF_RETURN_IF_ERROR(fc.RemoveTrivialSwitch(switch_cluster));
-    } else {
-      TF_RETURN_IF_ERROR(fc.ConvertCorrespondingMergeToXlaIf(switch_cluster));
-    }
-
-    // Contract newly Switch free switch_cluster with outgoing nodes without
-    // Switch or Merge nodes.
-    for (auto& nodes : {switch_cluster->out_nodes, switch_cluster->in_nodes}) {
-      std::vector<Cluster*> copy_nodes(nodes.begin(), nodes.end());
-      for (auto* node : copy_nodes) {
-        if (node->merge_nodes.empty() && node->switch_nodes.empty()) {
-          fc.ContractEdge(node, switch_cluster);
-        }
-      }
-    }
-
-    VLOG(3) << "Graph with clusters: "
-            << DebugString(*fc.graph_, &fc.clusters_);
-    VLOG(3) << "ClusteredGraph: " << DebugString(fc.clustered_graph_);
-  }
-
-  if (!fc.switch_nodes_.empty()) {
-    return errors::Internal(
-        "Failed to functionalize control flow with Switch nodes remaining: ",
-        NodesToString(fc.switch_nodes_));
-  }
-  return Status::OK();
+  return fc.FunctionalizeInternal();
 }
 
 }  // namespace
diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow_test.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow_test.cc
index 01d2b282751f387cfa9c8887cdeb48090c96bff4..71f12a13339b9b5495631b8f9350579f6a0785a3 100644
--- a/tensorflow/compiler/tf2xla/functionalize_control_flow_test.cc
+++ b/tensorflow/compiler/tf2xla/functionalize_control_flow_test.cc
@@ -109,7 +109,7 @@ TEST(FunctionalizeControlFlow, Conditional) {
     auto y = ops::Placeholder(scope.WithOpName("y"), DT_INT32);
     auto x = ops::Placeholder(scope.WithOpName("x"), DT_INT32);
     auto less = ops::Less(scope.WithOpName("cond/Less"), y, x);
-    auto if_op = ops::XlaIf(scope.WithOpName("cond/Merge_If"), less,
+    auto if_op = ops::XlaIf(scope.WithOpName("cond/Less_If"), less,
                             std::initializer_list<Input>{less, y, x}, then_fn,
                             else_fn, {DT_INT32});
     GraphDef expected;
diff --git a/tensorflow/compiler/tf2xla/g3doc/cpu_supported_ops.md b/tensorflow/compiler/tf2xla/g3doc/cpu_supported_ops.md
new file mode 100644
index 0000000000000000000000000000000000000000..82b3b46a2f1e97001d1e0c6b993ec243170bc7d8
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/g3doc/cpu_supported_ops.md
@@ -0,0 +1,242 @@
+**Supported operators for device: XLA_CPU_JIT**
+
+Operator                              | Type Constraint
+------------------------------------- | ---------------
+`Abs`                                 | `T={double,float,int32,int64}`
+`Acosh`                               | `T={complex64,double,float}`
+`Add`                                 | `T={complex64,double,float,int32,int64}`
+`AddN`                                | `T={complex64,double,float,int32,int64,uint32,uint64}`
+`All`                                 | `Tidx={int32,int64}`
+`Angle`                               | `Tout={double,float}`<br>`T={complex64}`
+`Any`                                 | `Tidx={int32,int64}`
+`ApproximateEqual`                    | `T={complex64,double,float,int32,int64,uint32,uint64}`
+`ArgMax`                              | `Tidx={int32,int64}`<br>`output_type={int32,int64}`<br>`T={float}`
+`ArgMin`                              | `Tidx={int32,int64}`<br>`output_type={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`Asinh`                               | `T={complex64,double,float}`
+`AssignAddVariableOp`                 | `dtype={complex64,double,float,int32,int64,uint32,uint64}`
+`AssignSubVariableOp`                 | `dtype={complex64,double,float,int32,int64,uint32,uint64}`
+`AssignVariableOp`                    | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Atan2`                               | `T={double,float}`
+`Atanh`                               | `T={complex64,double,float}`
+`AvgPool`                             | `T={double,float}`
+`AvgPool3D`                           | `T={double,float}`
+`AvgPool3DGrad`                       | `T={double,float}`
+`AvgPoolGrad`                         | `T={double,float}`
+`BatchMatMul`                         | `T={complex64,double,float,int32}`
+`BatchToSpace`                        | `Tidx={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`BatchToSpaceND`                      | `Tcrops={int32,int64}`<br>`Tblock_shape={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`BiasAdd`                             | `T={complex64,double,float,int32,int64,uint32,uint64}`
+`BiasAddGrad`                         | `T={complex64,double,float,int32,int64,uint32,uint64}`
+`BiasAddV1`                           | `T={complex64,double,float,int32,int64,uint32,uint64}`
+`BitwiseAnd`                          | `T={int32,int64,uint32,uint64}`
+`BitwiseOr`                           | `T={int32,int64,uint32,uint64}`
+`BroadcastArgs`                       | `T={int32,int64}`
+`BroadcastGradientArgs`               | `T={int32,int64}`
+`Cast`                                | `DstT={bool,complex64,double,float,int32,int64,uint32,uint64}`<br>`SrcT={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Ceil`                                | `T={double,float}`
+`Cholesky`                            | `T={complex64,double,float}`
+`Complex`                             | `Tout={complex64}`<br>`T={double,float}`
+`ComplexAbs`                          | `Tout={double,float}`<br>`T={complex64}`
+`Concat`                              | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`ConcatOffset`                        |
+`ConcatV2`                            | `Tidx={int32}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Conj`                                | `T={complex64}`
+`Const`                               | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`ControlTrigger`                      |
+`Conv2D`                              | `T={float}`
+`Conv2DBackpropFilter`                | `T={float}`
+`Conv2DBackpropInput`                 | `T={float}`
+`Conv3D`                              | `T={double,float}`
+`Conv3DBackpropFilterV2`              | `T={double,float}`
+`Conv3DBackpropInputV2`               | `T={double,float}`
+`Cos`                                 | `T={complex64,double,float}`
+`Cosh`                                | `T={complex64,double,float}`
+`Cross`                               | `T={double,float,int32,int64,uint32,uint64}`
+`Cumprod`                             | `Tidx={int32,int64}`<br>`T={float}`
+`Cumsum`                              | `Tidx={int32,int64}`<br>`T={float}`
+`DepthToSpace`                        | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`DepthwiseConv2dNative`               | `T={double,float}`
+`DepthwiseConv2dNativeBackpropFilter` | `T={double,float}`
+`DepthwiseConv2dNativeBackpropInput`  | `T={double,float}`
+`Diag`                                | `T={complex64,double,float,int32,int64}`
+`DiagPart`                            | `T={complex64,double,float,int32,int64}`
+`Div`                                 | `T={complex64,double,float,int32,int64}`
+`DynamicStitch`                       | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Elu`                                 | `T={double,float}`
+`EluGrad`                             | `T={double,float}`
+`Equal`                               | `T={bool,complex64,double,float,int32,int64}`
+`Exp`                                 | `T={complex64,double,float}`
+`ExpandDims`                          | `Tdim={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Expm1`                               | `T={complex64,double,float}`
+`Fill`                                | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Floor`                               | `T={double,float}`
+`FloorDiv`                            | `T={complex64,double,float,int32,int64}`
+`FloorMod`                            | `T={double,float,int32,int64}`
+`FusedBatchNorm`                      | `T={float}`
+`FusedBatchNormGrad`                  | `T={float}`
+`FusedBatchNormGradV2`                | `U={float}`<br>`T={float}`
+`FusedBatchNormV2`                    | `U={float}`<br>`T={float}`
+`Gather`                              | `Tindices={int32,int64}`<br>`Tparams={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`GatherV2`                            | `Taxis={int32,int64}`<br>`Tindices={int32,int64}`<br>`Tparams={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Greater`                             | `T={double,float,int32,int64,uint32,uint64}`
+`GreaterEqual`                        | `T={double,float,int32,int64,uint32,uint64}`
+`Identity`                            | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`IdentityN`                           | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Imag`                                | `Tout={double,float}`<br>`T={complex64}`
+`Inv`                                 | `T={complex64,double,float,int32,int64}`
+`Invert`                              | `T={int32,int64,uint32,uint64}`
+`InvertPermutation`                   | `T={int32}`
+`IsFinite`                            | `T={double,float}`
+`IsInf`                               | `T={double,float}`
+`IsNan`                               | `T={double,float}`
+`L2Loss`                              | `T={double,float}`
+`LRN`                                 | `T={float}`
+`LRNGrad`                             | `T={float}`
+`LeftShift`                           | `T={int32,int64,uint32,uint64}`
+`Less`                                | `T={double,float,int32,int64,uint32,uint64}`
+`LessEqual`                           | `T={double,float,int32,int64,uint32,uint64}`
+`LinSpace`                            | `Tidx={int32,int64}`<br>`T={double,float}`
+`Log`                                 | `T={complex64,double,float}`
+`Log1p`                               | `T={complex64,double,float}`
+`LogSoftmax`                          | `T={double,float}`
+`LogicalAnd`                          |
+`LogicalNot`                          |
+`LogicalOr`                           |
+`MatMul`                              | `T={complex64,double,float}`
+`MatrixDiag`                          | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`MatrixDiagPart`                      | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Max`                                 | `Tidx={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`MaxPool`                             | `T={double,float,int32,int64}`
+`MaxPool3D`                           | `T={float}`
+`MaxPool3DGrad`                       | `TInput={float}`<br>`T={float}`
+`MaxPoolGrad`                         | `T={double,float,int32,int64,uint32,uint64}`
+`Maximum`                             | `T={double,float,int32,int64}`
+`Mean`                                | `Tidx={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`Min`                                 | `Tidx={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`Minimum`                             | `T={double,float,int32,int64}`
+`MirrorPad`                           | `Tpaddings={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Mod`                                 | `T={double,float,int32,int64}`
+`Mul`                                 | `T={complex64,double,float,int32,int64}`
+`Multinomial`                         | `output_dtype={int32,int64}`<br>`T={double,float,int32,int64,uint32,uint64}`
+`Neg`                                 | `T={complex64,double,float,int32,int64}`
+`NoOp`                                |
+`NotEqual`                            | `T={bool,complex64,double,float,int32,int64}`
+`OneHot`                              | `TI={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`OnesLike`                            | `T={bool,complex64,double,float,int32,int64}`
+`Pack`                                | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Pad`                                 | `Tpaddings={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`PadV2`                               | `Tpaddings={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`ParallelDynamicStitch`               | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Pow`                                 | `T={complex64,double,float,int32,int64}`
+`PreventGradient`                     | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Prod`                                | `Tidx={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`QuantizeAndDequantizeV2`             | `T={double,float}`
+`RandomStandardNormal`                | `dtype={float}`
+`RandomUniform`                       | `T={int32,int64}`<br>`dtype={double,float}`
+`RandomUniformInt`                    | `T={int32,int64}`<br>`Tout={int32,int64}`
+`Range`                               | `Tidx={double,float,int32,int64}`
+`Rank`                                | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`ReadVariableOp`                      | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Real`                                | `Tout={double,float}`<br>`T={complex64}`
+`RealDiv`                             | `T={complex64,double,float,int32,int64}`
+`Reciprocal`                          | `T={complex64,double,float,int32,int64}`
+`ReciprocalGrad`                      | `T={complex64,double,float}`
+`Relu`                                | `T={double,float,int32,int64,uint32,uint64}`
+`Relu6`                               | `T={double,float,int32,int64,uint32,uint64}`
+`Relu6Grad`                           | `T={double,float,int32,int64,uint32,uint64}`
+`ReluGrad`                            | `T={double,float,int32,int64,uint32,uint64}`
+`Reshape`                             | `Tshape={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`ResourceApplyAdagrad`                | `T={double,float}`
+`ResourceApplyAdam`                   | `T={double,float}`
+`ResourceApplyFtrl`                   | `T={double,float}`
+`ResourceApplyFtrlV2`                 | `T={double,float}`
+`ResourceApplyGradientDescent`        | `T={double,float}`
+`ResourceApplyMomentum`               | `T={double,float}`
+`ResourceApplyRMSProp`                | `T={double,float}`
+`ResourceGather`                      | `Tindices={int32,int64}`<br>`dtype={complex64,double,float,int32,int64,uint32,uint64}`
+`ResourceStridedSliceAssign`          | `Index={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Reverse`                             | `T={bool,complex64,double,float,int32,int64}`
+`ReverseV2`                           | `T={bool,complex64,double,float,int32,int64}`<br>`Tidx={int32,int64}`
+`RightShift`                          | `T={int32,int64,uint32,uint64}`
+`Rint`                                | `T={double,float}`
+`Round`                               | `T={complex64,double,float,int32,int64}`
+`Rsqrt`                               | `T={complex64,double,float}`
+`RsqrtGrad`                           | `T={complex64,double,float}`
+`Select`                              | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Selu`                                | `T={double,float}`
+`SeluGrad`                            | `T={double,float}`
+`Shape`                               | `out_type={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`ShapeN`                              | `out_type={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Sigmoid`                             | `T={complex64,double,float}`
+`SigmoidGrad`                         | `T={complex64,double,float}`
+`Sign`                                | `T={complex64,double,float,int32,int64}`
+`Sin`                                 | `T={complex64,double,float}`
+`Sinh`                                | `T={complex64,double,float}`
+`Size`                                | `out_type={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Slice`                               | `Index={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Softmax`                             | `T={double,float}`
+`SoftmaxCrossEntropyWithLogits`       | `T={double,float}`
+`Softplus`                            | `T={double,float,int32,int64,uint32,uint64}`
+`SoftplusGrad`                        | `T={double,float,int32,int64,uint32,uint64}`
+`Softsign`                            | `T={double,float,int32,int64,uint32,uint64}`
+`SoftsignGrad`                        | `T={double,float,int32,int64,uint32,uint64}`
+`SpaceToBatch`                        | `Tpaddings={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`SpaceToBatchND`                      | `Tblock_shape={int32,int64}`<br>`Tpaddings={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`SpaceToDepth`                        | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`SparseMatMul`                        | `Tb={float}`<br>`Ta={float}`
+`SparseSoftmaxCrossEntropyWithLogits` | `Tlabels={int32,int64}`<br>`T={double,float}`
+`Split`                               | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`SplitV`                              | `Tlen={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Sqrt`                                | `T={complex64,double,float}`
+`SqrtGrad`                            | `T={complex64,double,float}`
+`Square`                              | `T={complex64,double,float,int32,int64}`
+`SquaredDifference`                   | `T={complex64,double,float,int32,int64}`
+`Squeeze`                             | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`StackCloseV2`                        |
+`StackPopV2`                          | `elem_type={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`StackPushV2`                         | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`StackV2`                             | `elem_type={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`StatelessRandomNormal`               | `Tseed={int32}`<br>`T={int32,int64}`<br>`dtype={float}`
+`StatelessRandomUniform`              | `Tseed={int32}`<br>`T={int32,int64}`<br>`dtype={float}`
+`StopGradient`                        | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`StridedSlice`                        | `Index={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`StridedSliceGrad`                    | `Index={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Sub`                                 | `T={complex64,double,float,int32,int64}`
+`Sum`                                 | `Tidx={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`SymbolicGradient`                    | `Tout={bool,complex64,double,float,int32,int64,uint32,uint64}`<br>`Tin={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Tan`                                 | `T={complex64,double,float,int32,int64}`
+`Tanh`                                | `T={complex64,double,float}`
+`TanhGrad`                            | `T={complex64,double,float}`
+`TensorArrayCloseV3`                  |
+`TensorArrayConcatV3`                 | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TensorArrayGatherV3`                 | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TensorArrayGradV3`                   |
+`TensorArrayReadV3`                   | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TensorArrayScatterV3`                | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TensorArraySizeV3`                   |
+`TensorArraySplitV3`                  | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TensorArrayV3`                       | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TensorArrayWriteV3`                  | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Tile`                                | `Tmultiples={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Transpose`                           | `Tperm={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TruncateDiv`                         | `T={complex64,double,float,int32,int64}`
+`TruncateMod`                         | `T={double,float,int32,int64}`
+`TruncatedNormal`                     | `T={int32,int64}`<br>`dtype={double,float}`
+`Unpack`                              | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`UnsortedSegmentSum`                  | `Tnumsegments={int32,int64}`<br>`Tindices={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`VarIsInitializedOp`                  |
+`VariableShape`                       | `out_type={int32,int64}`
+`XlaWhile`                            | `T={bool,complex64,double,float,int32,int64,resource,uint32,uint64}`
+`ZerosLike`                           | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`_Arg`                                | `T={bool,complex64,double,float,int32,int64,resource,uint32,uint64}`
+`_ArrayToList`                        | `out_types={bool,complex64,double,float,int32,int64,uint32,uint64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`_ListToArray`                        | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`<br>`Tin={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`_Retval`                             | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`_XLARecv`                            | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`_XLASend`                            | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+
+To regenerate this table, run:
+
+```shell
+bazel run -c opt -- tensorflow/compiler/tf2xla:tf2xla_supported_ops --device=XLA_CPU_JIT
+```
diff --git a/tensorflow/compiler/tf2xla/g3doc/gpu_supported_ops.md b/tensorflow/compiler/tf2xla/g3doc/gpu_supported_ops.md
new file mode 100644
index 0000000000000000000000000000000000000000..d4b7621ad2858fe17e93d292dd807e4f7c1c336b
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/g3doc/gpu_supported_ops.md
@@ -0,0 +1,238 @@
+**Supported operators for device: XLA_GPU_JIT**
+
+Operator                              | Type Constraint
+------------------------------------- | ---------------
+`Abs`                                 | `T={double,float,int32,int64}`
+`Acosh`                               | `T={complex64,double,float}`
+`Add`                                 | `T={complex64,double,float,int32,int64}`
+`AddN`                                | `T={complex64,double,float,int32,int64,uint32,uint64}`
+`All`                                 | `Tidx={int32,int64}`
+`Angle`                               | `Tout={double,float}`<br>`T={complex64}`
+`Any`                                 | `Tidx={int32,int64}`
+`ApproximateEqual`                    | `T={complex64,double,float,int32,int64,uint32,uint64}`
+`ArgMax`                              | `Tidx={int32,int64}`<br>`output_type={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`ArgMin`                              | `Tidx={int32,int64}`<br>`output_type={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`Asinh`                               | `T={complex64,double,float}`
+`AssignAddVariableOp`                 | `dtype={complex64,double,float,int32,int64,uint32,uint64}`
+`AssignSubVariableOp`                 | `dtype={complex64,double,float,int32,int64,uint32,uint64}`
+`AssignVariableOp`                    | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Atan2`                               | `T={double,float}`
+`Atanh`                               | `T={complex64,double,float}`
+`AvgPool`                             | `T={double,float}`
+`AvgPool3D`                           | `T={double,float}`
+`AvgPool3DGrad`                       | `T={double,float}`
+`AvgPoolGrad`                         | `T={double,float}`
+`BatchMatMul`                         | `T={complex64,double,float,int32}`
+`BatchToSpace`                        | `Tidx={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`BatchToSpaceND`                      | `Tcrops={int32,int64}`<br>`Tblock_shape={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`BiasAdd`                             | `T={complex64,double,float,int32,int64,uint32,uint64}`
+`BiasAddGrad`                         | `T={complex64,double,float,int32,int64,uint32,uint64}`
+`BiasAddV1`                           | `T={complex64,double,float,int32,int64,uint32,uint64}`
+`BitwiseAnd`                          | `T={int32,int64,uint32,uint64}`
+`BitwiseOr`                           | `T={int32,int64,uint32,uint64}`
+`BroadcastArgs`                       | `T={int32,int64}`
+`BroadcastGradientArgs`               | `T={int32,int64}`
+`Cast`                                | `DstT={bool,complex64,double,float,int32,int64,uint32,uint64}`<br>`SrcT={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Ceil`                                | `T={double,float}`
+`Cholesky`                            | `T={complex64,double,float}`
+`Complex`                             | `Tout={complex64}`<br>`T={double,float}`
+`ComplexAbs`                          | `Tout={double,float}`<br>`T={complex64}`
+`Concat`                              | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`ConcatOffset`                        |
+`ConcatV2`                            | `Tidx={int32}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Conj`                                | `T={complex64}`
+`Const`                               | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`ControlTrigger`                      |
+`Conv2D`                              | `T={float}`
+`Conv2DBackpropFilter`                | `T={float}`
+`Conv2DBackpropInput`                 | `T={float}`
+`Conv3D`                              | `T={double,float}`
+`Conv3DBackpropFilterV2`              | `T={double,float}`
+`Conv3DBackpropInputV2`               | `T={double,float}`
+`Cos`                                 | `T={complex64,double,float}`
+`Cosh`                                | `T={complex64,double,float}`
+`Cross`                               | `T={double,float,int32,int64,uint32,uint64}`
+`Cumprod`                             | `Tidx={int32,int64}`<br>`T={float}`
+`Cumsum`                              | `Tidx={int32,int64}`<br>`T={float}`
+`DepthToSpace`                        | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`DepthwiseConv2dNative`               | `T={double,float}`
+`DepthwiseConv2dNativeBackpropFilter` | `T={double,float}`
+`DepthwiseConv2dNativeBackpropInput`  | `T={double,float}`
+`Diag`                                | `T={complex64,double,float,int32,int64}`
+`DiagPart`                            | `T={complex64,double,float,int32,int64}`
+`Div`                                 | `T={complex64,double,float,int32,int64}`
+`DynamicStitch`                       | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Elu`                                 | `T={double,float}`
+`EluGrad`                             | `T={double,float}`
+`Equal`                               | `T={bool,complex64,double,float,int32,int64}`
+`Exp`                                 | `T={complex64,double,float}`
+`ExpandDims`                          | `Tdim={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Expm1`                               | `T={complex64,double,float}`
+`Fill`                                | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Floor`                               | `T={double,float}`
+`FloorDiv`                            | `T={complex64,double,float,int32,int64}`
+`FloorMod`                            | `T={double,float,int32,int64}`
+`FusedBatchNorm`                      | `T={float}`
+`FusedBatchNormGrad`                  | `T={float}`
+`FusedBatchNormGradV2`                | `U={float}`<br>`T={float}`
+`FusedBatchNormV2`                    | `U={float}`<br>`T={float}`
+`Gather`                              | `Tindices={int32,int64}`<br>`Tparams={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`GatherV2`                            | `Taxis={int32,int64}`<br>`Tindices={int32,int64}`<br>`Tparams={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Greater`                             | `T={double,float,int32,int64,uint32,uint64}`
+`GreaterEqual`                        | `T={double,float,int32,int64,uint32,uint64}`
+`Identity`                            | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`IdentityN`                           | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Imag`                                | `Tout={double,float}`<br>`T={complex64}`
+`Inv`                                 | `T={complex64,double,float,int32,int64}`
+`Invert`                              | `T={int32,int64,uint32,uint64}`
+`InvertPermutation`                   | `T={int32}`
+`IsFinite`                            | `T={double,float}`
+`IsInf`                               | `T={double,float}`
+`IsNan`                               | `T={double,float}`
+`L2Loss`                              | `T={double,float}`
+`LRN`                                 | `T={float}`
+`LRNGrad`                             | `T={float}`
+`LeftShift`                           | `T={int32,int64,uint32,uint64}`
+`Less`                                | `T={double,float,int32,int64,uint32,uint64}`
+`LessEqual`                           | `T={double,float,int32,int64,uint32,uint64}`
+`LinSpace`                            | `Tidx={int32,int64}`<br>`T={double,float}`
+`Log`                                 | `T={complex64,double,float}`
+`Log1p`                               | `T={complex64,double,float}`
+`LogSoftmax`                          | `T={double,float}`
+`LogicalAnd`                          |
+`LogicalNot`                          |
+`LogicalOr`                           |
+`MatMul`                              | `T={complex64,double,float}`
+`MatrixDiag`                          | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`MatrixDiagPart`                      | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Max`                                 | `Tidx={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`MaxPool`                             | `T={double,float,int32,int64}`
+`MaxPool3D`                           | `T={float}`
+`MaxPool3DGrad`                       | `TInput={float}`<br>`T={float}`
+`MaxPoolGrad`                         | `T={double,float,int32,int64,uint32,uint64}`
+`Maximum`                             | `T={double,float,int32,int64}`
+`Mean`                                | `Tidx={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`Min`                                 | `Tidx={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`Minimum`                             | `T={double,float,int32,int64}`
+`MirrorPad`                           | `Tpaddings={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Mod`                                 | `T={double,float,int32,int64}`
+`Mul`                                 | `T={complex64,double,float,int32,int64}`
+`Multinomial`                         | `output_dtype={int32,int64}`<br>`T={double,float,int32,int64,uint32,uint64}`
+`Neg`                                 | `T={complex64,double,float,int32,int64}`
+`NoOp`                                |
+`NotEqual`                            | `T={bool,complex64,double,float,int32,int64}`
+`OneHot`                              | `TI={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`OnesLike`                            | `T={bool,complex64,double,float,int32,int64}`
+`Pack`                                | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Pad`                                 | `Tpaddings={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`PadV2`                               | `Tpaddings={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`ParallelDynamicStitch`               | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Pow`                                 | `T={complex64,double,float,int32,int64}`
+`PreventGradient`                     | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Prod`                                | `Tidx={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`QuantizeAndDequantizeV2`             | `T={double,float}`
+`Range`                               | `Tidx={double,float,int32,int64}`
+`Rank`                                | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`ReadVariableOp`                      | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Real`                                | `Tout={double,float}`<br>`T={complex64}`
+`RealDiv`                             | `T={complex64,double,float,int32,int64}`
+`Reciprocal`                          | `T={complex64,double,float,int32,int64}`
+`ReciprocalGrad`                      | `T={complex64,double,float}`
+`Relu`                                | `T={double,float,int32,int64,uint32,uint64}`
+`Relu6`                               | `T={double,float,int32,int64,uint32,uint64}`
+`Relu6Grad`                           | `T={double,float,int32,int64,uint32,uint64}`
+`ReluGrad`                            | `T={double,float,int32,int64,uint32,uint64}`
+`Reshape`                             | `Tshape={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`ResourceApplyAdagrad`                | `T={double,float}`
+`ResourceApplyAdam`                   | `T={double,float}`
+`ResourceApplyFtrl`                   | `T={double,float}`
+`ResourceApplyFtrlV2`                 | `T={double,float}`
+`ResourceApplyGradientDescent`        | `T={double,float}`
+`ResourceApplyMomentum`               | `T={double,float}`
+`ResourceApplyRMSProp`                | `T={double,float}`
+`ResourceGather`                      | `Tindices={int32,int64}`<br>`dtype={complex64,double,float,int32,int64,uint32,uint64}`
+`ResourceStridedSliceAssign`          | `Index={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Reverse`                             | `T={bool,complex64,double,float,int32,int64}`
+`ReverseV2`                           | `T={bool,complex64,double,float,int32,int64}`<br>`Tidx={int32,int64}`
+`RightShift`                          | `T={int32,int64,uint32,uint64}`
+`Rint`                                | `T={double,float}`
+`Round`                               | `T={complex64,double,float,int32,int64}`
+`Rsqrt`                               | `T={complex64,double,float}`
+`RsqrtGrad`                           | `T={complex64,double,float}`
+`Select`                              | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Selu`                                | `T={double,float}`
+`SeluGrad`                            | `T={double,float}`
+`Shape`                               | `out_type={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`ShapeN`                              | `out_type={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Sigmoid`                             | `T={complex64,double,float}`
+`SigmoidGrad`                         | `T={complex64,double,float}`
+`Sign`                                | `T={complex64,double,float,int32,int64}`
+`Sin`                                 | `T={complex64,double,float}`
+`Sinh`                                | `T={complex64,double,float}`
+`Size`                                | `out_type={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Slice`                               | `Index={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Softmax`                             | `T={double,float}`
+`SoftmaxCrossEntropyWithLogits`       | `T={double,float}`
+`Softplus`                            | `T={double,float,int32,int64,uint32,uint64}`
+`SoftplusGrad`                        | `T={double,float,int32,int64,uint32,uint64}`
+`Softsign`                            | `T={double,float,int32,int64,uint32,uint64}`
+`SoftsignGrad`                        | `T={double,float,int32,int64,uint32,uint64}`
+`SpaceToBatch`                        | `Tpaddings={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`SpaceToBatchND`                      | `Tblock_shape={int32,int64}`<br>`Tpaddings={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`SpaceToDepth`                        | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`SparseMatMul`                        | `Tb={float}`<br>`Ta={float}`
+`SparseSoftmaxCrossEntropyWithLogits` | `Tlabels={int32,int64}`<br>`T={double,float}`
+`Split`                               | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`SplitV`                              | `Tlen={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Sqrt`                                | `T={complex64,double,float}`
+`SqrtGrad`                            | `T={complex64,double,float}`
+`Square`                              | `T={complex64,double,float,int32,int64}`
+`SquaredDifference`                   | `T={complex64,double,float,int32,int64}`
+`Squeeze`                             | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`StackCloseV2`                        |
+`StackPopV2`                          | `elem_type={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`StackPushV2`                         | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`StackV2`                             | `elem_type={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`StatelessRandomNormal`               | `Tseed={int32}`<br>`T={int32,int64}`<br>`dtype={float}`
+`StatelessRandomUniform`              | `Tseed={int32}`<br>`T={int32,int64}`<br>`dtype={float}`
+`StopGradient`                        | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`StridedSlice`                        | `Index={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`StridedSliceGrad`                    | `Index={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Sub`                                 | `T={complex64,double,float,int32,int64}`
+`Sum`                                 | `Tidx={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`SymbolicGradient`                    | `Tout={bool,complex64,double,float,int32,int64,uint32,uint64}`<br>`Tin={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Tan`                                 | `T={complex64,double,float,int32,int64}`
+`Tanh`                                | `T={complex64,double,float}`
+`TanhGrad`                            | `T={complex64,double,float}`
+`TensorArrayCloseV3`                  |
+`TensorArrayConcatV3`                 | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TensorArrayGatherV3`                 | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TensorArrayGradV3`                   |
+`TensorArrayReadV3`                   | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TensorArrayScatterV3`                | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TensorArraySizeV3`                   |
+`TensorArraySplitV3`                  | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TensorArrayV3`                       | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TensorArrayWriteV3`                  | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Tile`                                | `Tmultiples={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`Transpose`                           | `Tperm={int32,int64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`TruncateDiv`                         | `T={complex64,double,float,int32,int64}`
+`TruncateMod`                         | `T={double,float,int32,int64}`
+`Unpack`                              | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`UnsortedSegmentSum`                  | `Tnumsegments={int32,int64}`<br>`Tindices={int32,int64}`<br>`T={complex64,double,float,int32,int64,uint32,uint64}`
+`VarIsInitializedOp`                  |
+`VariableShape`                       | `out_type={int32,int64}`
+`XlaWhile`                            | `T={bool,complex64,double,float,int32,int64,resource,uint32,uint64}`
+`ZerosLike`                           | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`_Arg`                                | `T={bool,complex64,double,float,int32,int64,resource,uint32,uint64}`
+`_ArrayToList`                        | `out_types={bool,complex64,double,float,int32,int64,uint32,uint64}`<br>`T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`_ListToArray`                        | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`<br>`Tin={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`_Retval`                             | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`_XLARecv`                            | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+`_XLASend`                            | `T={bool,complex64,double,float,int32,int64,uint32,uint64}`
+
+To regenerate this table, run:
+
+```shell
+bazel run -c opt -- tensorflow/compiler/tf2xla:tf2xla_supported_ops --device=XLA_GPU_JIT
+```
diff --git a/tensorflow/compiler/tf2xla/graph_compiler.cc b/tensorflow/compiler/tf2xla/graph_compiler.cc
index 8062f0c03ca60e88bd5c021092dceb105232219f..02215b5112d37f726604da2c2caa4f804388d6e5 100644
--- a/tensorflow/compiler/tf2xla/graph_compiler.cc
+++ b/tensorflow/compiler/tf2xla/graph_compiler.cc
@@ -34,6 +34,7 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/common_runtime/graph_optimizer.h"
 #include "tensorflow/core/framework/attr_value_util.h"
+#include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/graph/graph_constructor.h"
@@ -144,7 +145,9 @@ Status GraphCompiler::Compile() {
     } else {
       device_->Compute(CHECK_NOTNULL(params.op_kernel), &op_context);
       Status s = op_context.status();
-      TF_RETURN_IF_ERROR(s);
+      if (!s.ok()) {
+        return AttachDef(s, n->def());
+      }
     }
 
     // Set up outputs. Also check if outputs from the previous computation is
diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD
index 6302fece1ffb27b6c7170fcfb90f5985f5b50659..5e1b01878b74f2fbc2e84f8c2db1fa37c2c1eb0e 100644
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@@ -4,6 +4,7 @@ package(
     default_visibility = ["//tensorflow/compiler/tf2xla:internal"],
 )
 
+load("//tensorflow:tensorflow.bzl", "tf_copts")
 load("//tensorflow:tensorflow.bzl", "tf_kernel_library")
 
 tf_kernel_library(
@@ -30,11 +31,14 @@ tf_kernel_library(
         "diag_op.cc",
         "dynamic_stitch_op.cc",
         "elu_op.cc",
+        "fft_ops.cc",
         "fill_op.cc",
         "function_ops.cc",
         "gather_op.cc",
         "gather_op_helpers.h",
         "identity_op.cc",
+        "image_ops.cc",
+        "image_resize_ops.cc",
         "index_ops.cc",
         "l2loss_op.cc",
         "lrn_ops.cc",
@@ -54,11 +58,13 @@ tf_kernel_library(
         "reshape_op.cc",
         "retval_op.cc",
         "reverse_op.cc",
+        "scan_ops.cc",
         "segment_reduction_ops.cc",
         "select_op.cc",
         "sendrecv_ops.cc",
         "sequence_ops.cc",
         "shape_op.cc",
+        "shape_util.cc",
         "slice_op.cc",
         "softmax_op.cc",
         "spacetobatch_op.cc",
@@ -78,6 +84,7 @@ tf_kernel_library(
     hdrs = [
         "gather_op.h",
         "index_ops.h",
+        "shape_util.h",
     ],
     deps = [
         ":while_op",
@@ -85,7 +92,9 @@ tf_kernel_library(
         "//tensorflow/compiler/tf2xla:xla_compiler",
         "//tensorflow/compiler/tf2xla/lib:batch_dot",
         "//tensorflow/compiler/tf2xla/lib:cholesky",
+        "//tensorflow/compiler/tf2xla/lib:util",
         "//tensorflow/compiler/tf2xla/ops:sendrecv_ops",
+        "//tensorflow/compiler/xla:array4d",
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:util",
@@ -94,9 +103,11 @@ tf_kernel_library(
         "//tensorflow/compiler/xla/client:computation_builder",
         "//tensorflow/compiler/xla/client/lib:arithmetic",
         "//tensorflow/core:framework",
+        "//tensorflow/core:image_ops_op_lib",
         "//tensorflow/core:lib",
         "//tensorflow/core:linalg_ops_op_lib",
         "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:spectral_ops_op_lib",
         "//tensorflow/core:stateless_random_ops_op_lib",
         "//tensorflow/core/kernels:bounds_check",
         "//tensorflow/core/kernels:concat_lib",
@@ -157,6 +168,7 @@ tf_kernel_library(
 cc_library(
     name = "index_ops_kernel_argmax_float_1d",
     srcs = ["index_ops_kernel_argmax_float_1d.cc"],
+    copts = tf_copts(),
     visibility = ["//visibility:public"],
     deps = [
         "//tensorflow/compiler/xla/service/cpu:custom_call_target_registry",
@@ -169,6 +181,7 @@ cc_library(
 cc_library(
     name = "index_ops_kernel_argmax_float_2d",
     srcs = ["index_ops_kernel_argmax_float_2d.cc"],
+    copts = tf_copts(),
     visibility = ["//visibility:public"],
     deps = [
         "//tensorflow/compiler/xla/service/cpu:custom_call_target_registry",
diff --git a/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc b/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc
index 248e9d111e556dcdd75581aa6562a66fc8b57063..a249b1869f547f8e5aa725f9f5cf391b10429928 100644
--- a/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc
@@ -14,7 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 // XLA implementation of BatchNorm operations.
-#include "tensorflow/compiler/tf2xla/literal_util.h"
+#include "tensorflow/compiler/tf2xla/type_util.h"
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
@@ -26,43 +26,63 @@ namespace {
 class FusedBatchNormOp : public XlaOpKernel {
  public:
   explicit FusedBatchNormOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
-    string data_format;
     OP_REQUIRES_OK(ctx, ctx->GetAttr("epsilon", &epsilon_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("is_training", &is_training_));
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("data_format", &data_format));
-    TensorFormat tensor_format;
-    if (ctx->GetAttr("data_format", &data_format).ok()) {
-      OP_REQUIRES(ctx, FormatFromString(data_format, &tensor_format),
-                  errors::InvalidArgument("Invalid data format"));
-      OP_REQUIRES(
-          ctx, (tensor_format == FORMAT_NHWC || tensor_format == FORMAT_NCHW),
-          errors::InvalidArgument("Not supported format"));
-      feature_index_ = GetTensorFeatureDimIndex(/*num_dims=*/4, tensor_format);
-    }
+    string data_format_str;
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("data_format", &data_format_str));
+    OP_REQUIRES(
+        ctx, FormatFromString(data_format_str, &data_format_),
+        errors::InvalidArgument("Invalid data format: ", data_format_str));
+    OP_REQUIRES(ctx,
+                (data_format_ == FORMAT_NHWC || data_format_ == FORMAT_NCHW),
+                errors::InvalidArgument(
+                    "Unsupported data format ", ToString(data_format_),
+                    "; supported formats are NHWC and NCHW"));
   }
 
   void Compile(XlaOpKernelContext* ctx) override {
+    xla::PrimitiveType input_type;
+    OP_REQUIRES_OK(ctx,
+                   DataTypeToPrimitiveType(ctx->input_type(0), &input_type));
+    xla::PrimitiveType scale_type;
+    OP_REQUIRES_OK(ctx,
+                   DataTypeToPrimitiveType(ctx->input_type(1), &scale_type));
+
+    xla::ComputationBuilder* builder = ctx->builder();
+
+    xla::ComputationDataHandle input = ctx->Input(0);
+    TensorShape input_shape = ctx->InputShape(0);
+
+    int feature_index =
+        GetTensorFeatureDimIndex(input_shape.dims(), data_format_);
+
+    // TODO(b/69928690): support mixed precision in the XLA batch normalization
+    // operators. As a workaround, cast everything to the statistics type (which
+    // may be more precise than the input type).
+    input = builder->ConvertElementType(input, scale_type);
+
     if (is_training_) {
-      xla::ComputationDataHandle output = ctx->builder()->BatchNormTraining(
-          ctx->Input(0), ctx->Input(1), ctx->Input(2), epsilon_,
-          feature_index_);
+      xla::ComputationDataHandle output = builder->BatchNormTraining(
+          input, ctx->Input(1), ctx->Input(2), epsilon_, feature_index);
 
       // In training mode, outputs the normalized value as well as the
       // calculated mean and variance.
-      for (int i = 0; i < 3; i++) {
-        ctx->SetOutput(i, ctx->builder()->GetTupleElement(output, i));
-      }
+      ctx->SetOutput(0, builder->ConvertElementType(
+                            builder->GetTupleElement(output, 0), input_type));
+      ctx->SetOutput(1, builder->GetTupleElement(output, 1));
+      ctx->SetOutput(2, builder->GetTupleElement(output, 2));
+
       // Output 3 and 4 for "FusedBatchNorm" are currently marked as "reserved
       // space 1 & 2". They are used to pass the per-batch mean and
       // variance to the gradient. Here we maintain the same behavior by setting
       // them to the mean and variance calculated by BatchNormTraining.
-      ctx->SetOutput(3, ctx->builder()->GetTupleElement(output, 1));
-      ctx->SetOutput(4, ctx->builder()->GetTupleElement(output, 2));
+      ctx->SetOutput(3, builder->GetTupleElement(output, 1));
+      ctx->SetOutput(4, builder->GetTupleElement(output, 2));
     } else {
-      xla::ComputationDataHandle output = ctx->builder()->BatchNormInference(
-          ctx->Input(0), ctx->Input(1), ctx->Input(2), ctx->Input(3),
-          ctx->Input(4), epsilon_, feature_index_);
-      ctx->SetOutput(0, output);
+      xla::ComputationDataHandle output = builder->BatchNormInference(
+          input, ctx->Input(1), ctx->Input(2), ctx->Input(3), ctx->Input(4),
+          epsilon_, feature_index);
+      ctx->SetOutput(0, builder->ConvertElementType(output, input_type));
       // Directly send input to output as mean and variance in inference mode.
       ctx->SetOutput(1, ctx->Input(3));
       ctx->SetOutput(2, ctx->Input(4));
@@ -73,55 +93,113 @@ class FusedBatchNormOp : public XlaOpKernel {
 
  private:
   float epsilon_;
-  int64 feature_index_;
+  TensorFormat data_format_;
   bool is_training_;
 };
 
 REGISTER_XLA_OP(Name("FusedBatchNorm"), FusedBatchNormOp);
+REGISTER_XLA_OP(Name("FusedBatchNormV2"), FusedBatchNormOp);
 
 class FusedBatchNormGradOp : public XlaOpKernel {
  public:
   explicit FusedBatchNormGradOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
-    string data_format;
     OP_REQUIRES_OK(ctx, ctx->GetAttr("epsilon", &epsilon_));
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("data_format", &data_format));
-    bool is_training;
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("is_training", &is_training));
-    CHECK(is_training) << "FusedBatchNormGradOp with is_training=False cannot "
-                          "be used with XLA for now!";
-    TensorFormat tensor_format;
-    if (ctx->GetAttr("data_format", &data_format).ok()) {
-      OP_REQUIRES(ctx, FormatFromString(data_format, &tensor_format),
-                  errors::InvalidArgument("Invalid data format"));
-      OP_REQUIRES(
-          ctx, (tensor_format == FORMAT_NHWC || tensor_format == FORMAT_NCHW),
-          errors::InvalidArgument("Not supported format"));
-      feature_index_ = GetTensorFeatureDimIndex(4, tensor_format);
-    }
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("is_training", &is_training_));
+    string data_format_str;
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("data_format", &data_format_str));
+    OP_REQUIRES(
+        ctx, FormatFromString(data_format_str, &data_format_),
+        errors::InvalidArgument("Invalid data format: ", data_format_str));
+    OP_REQUIRES(ctx,
+                (data_format_ == FORMAT_NHWC || data_format_ == FORMAT_NCHW),
+                errors::InvalidArgument(
+                    "Unsupported data format ", ToString(data_format_),
+                    "; supported formats are NHWC and NCHW"));
   }
 
   void Compile(XlaOpKernelContext* ctx) override {
-    auto grad_output = ctx->Input(0);
-    auto activation = ctx->Input(1);
+    xla::ComputationBuilder* b = ctx->builder();
+
+    auto grad_backprop = ctx->Input(0);
+    auto activations = ctx->Input(1);
     auto scale = ctx->Input(2);
     auto mean = ctx->Input(3);
     auto var = ctx->Input(4);
-    xla::ComputationDataHandle output = ctx->builder()->BatchNormGrad(
-        activation, scale, mean, var, grad_output, epsilon_, feature_index_);
 
-    for (int i = 0; i < 3; i++) {
-      ctx->SetOutput(i, ctx->builder()->GetTupleElement(output, i));
+    TensorShape input_shape = ctx->InputShape(0);
+    int feature_index =
+        GetTensorFeatureDimIndex(input_shape.dims(), data_format_);
+
+    DataType input_dtype = ctx->input_type(0);
+    DataType scale_dtype = ctx->input_type(2);
+    xla::PrimitiveType input_type;
+    OP_REQUIRES_OK(ctx, DataTypeToPrimitiveType(input_dtype, &input_type));
+    xla::PrimitiveType scale_type;
+    OP_REQUIRES_OK(ctx, DataTypeToPrimitiveType(scale_dtype, &scale_type));
+
+    // TODO(b/69928690): support mixed precision in the XLA batch normalization
+    // operators. For now, cast everything to the statistics type (which
+    // may be more precise than the input type).
+    grad_backprop = b->ConvertElementType(grad_backprop, scale_type);
+    activations = b->ConvertElementType(activations, scale_type);
+
+    xla::ComputationDataHandle x_backprop;
+    xla::ComputationDataHandle scale_backprop;
+    xla::ComputationDataHandle offset_backprop;
+    if (is_training_) {
+      xla::ComputationDataHandle output =
+          b->BatchNormGrad(activations, scale, mean, var, grad_backprop,
+                           epsilon_, feature_index);
+
+      x_backprop = b->GetTupleElement(output, 0);
+      scale_backprop = b->GetTupleElement(output, 1);
+      offset_backprop = b->GetTupleElement(output, 2);
+    } else {
+      // Reduce over all dimensions except the feature dim.
+      std::vector<int64> reduction_dims(input_shape.dims() - 1);
+      std::iota(reduction_dims.begin(), reduction_dims.begin() + feature_index,
+                0);
+      std::iota(reduction_dims.begin() + feature_index, reduction_dims.end(),
+                feature_index + 1);
+      // offset_backprop  = sum(y_backprop)
+      // scale_backprop = y_backprop * ((x - pop_mean) * rsqrt(pop_var +
+      // epsilon))
+      // x_backprop = y_backprop * (scale * rsqrt(pop_var + epsilon))
+      offset_backprop =
+          b->Reduce(grad_backprop, XlaHelpers::Zero(b, scale_dtype),
+                    *ctx->GetOrCreateAdd(scale_dtype), reduction_dims);
+
+      // scratch1 = rsqrt(pop_var + epsilon)
+      auto neg_half = XlaHelpers::FloatLiteral(b, scale_dtype, -0.5);
+      auto scratch1 =
+          b->Pow(b->Add(var, b->ConstantR0<float>(epsilon_)), neg_half);
+
+      // scratch2 = sum(y_backprop * (x - mean))
+      auto scratch2 = b->Reduce(
+          b->Mul(grad_backprop, b->Sub(activations, mean, {feature_index})),
+          XlaHelpers::Zero(b, scale_dtype), *ctx->GetOrCreateAdd(scale_dtype),
+          reduction_dims);
+
+      x_backprop =
+          b->Mul(grad_backprop, b->Mul(scratch1, scale), {feature_index});
+      scale_backprop = b->Mul(scratch1, scratch2);
     }
-    ctx->SetOutput(3, ctx->builder()->GetTupleElement(output, 1));
-    ctx->SetOutput(4, ctx->builder()->GetTupleElement(output, 2));
+
+    ctx->SetOutput(0, b->ConvertElementType(x_backprop, input_type));
+    ctx->SetOutput(1, scale_backprop);
+    ctx->SetOutput(2, offset_backprop);
+    ctx->SetConstantOutput(3, Tensor(scale_dtype, {}));
+    ctx->SetConstantOutput(4, Tensor(scale_dtype, {}));
   }
 
  private:
+  TensorFormat data_format_;
   float epsilon_;
-  int64 feature_index_;
+  bool is_training_;
 };
 
 REGISTER_XLA_OP(Name("FusedBatchNormGrad"), FusedBatchNormGradOp);
+REGISTER_XLA_OP(Name("FusedBatchNormGradV2"), FusedBatchNormGradOp);
 
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/batchtospace_op.cc b/tensorflow/compiler/tf2xla/kernels/batchtospace_op.cc
index 21d3e64872e19109852297838043975cea6d7921..344a2ab2b6835c518c41de6f7a30fb2a34d130d2 100644
--- a/tensorflow/compiler/tf2xla/kernels/batchtospace_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/batchtospace_op.cc
@@ -159,7 +159,8 @@ class BatchToSpaceNDOp : public XlaOpKernel {
                  block_shape, crops);
   }
 };
-REGISTER_XLA_OP(Name("BatchToSpaceND"), BatchToSpaceNDOp);
+REGISTER_XLA_OP(Name("BatchToSpaceND").CompileTimeConstInput("crops"),
+                BatchToSpaceNDOp);
 
 class BatchToSpaceOp : public XlaOpKernel {
  public:
@@ -181,7 +182,10 @@ class BatchToSpaceOp : public XlaOpKernel {
  private:
   int block_size_;
 };
-REGISTER_XLA_OP(Name("BatchToSpace"), BatchToSpaceOp);
+REGISTER_XLA_OP(Name("BatchToSpace")
+                    .CompileTimeConstInput("crops")
+                    .CompileTimeConstInput("block_shape"),
+                BatchToSpaceOp);
 
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/bcast_ops.cc b/tensorflow/compiler/tf2xla/kernels/bcast_ops.cc
index bb031b8c471e08ba90c554e309b850a26c3edae0..ee2c920453c3bbaef2c145df743fddf999167c39 100644
--- a/tensorflow/compiler/tf2xla/kernels/bcast_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/bcast_ops.cc
@@ -65,7 +65,10 @@ class BCastArgsOp : public XlaOpKernel {
  private:
   TF_DISALLOW_COPY_AND_ASSIGN(BCastArgsOp);
 };
-REGISTER_XLA_OP(Name("BroadcastArgs"), BCastArgsOp);
+REGISTER_XLA_OP(Name("BroadcastArgs")
+                    .CompileTimeConstInput("s0")
+                    .CompileTimeConstInput("s1"),
+                BCastArgsOp);
 
 // Given shapes of two tensors, computes the reduction indices for the
 // gradient computation.
@@ -121,7 +124,10 @@ class BCastGradArgsOp : public XlaOpKernel {
   TF_DISALLOW_COPY_AND_ASSIGN(BCastGradArgsOp);
 };
 
-REGISTER_XLA_OP(Name("BroadcastGradientArgs"), BCastGradArgsOp);
+REGISTER_XLA_OP(Name("BroadcastGradientArgs")
+                    .CompileTimeConstInput("s0")
+                    .CompileTimeConstInput("s1"),
+                BCastGradArgsOp);
 
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
index 1de91924326464338352b1ac9edf77141f25ad35..2436a6074a11ad66387b232dd1c5aa135875bfc3 100644
--- a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/client/computation_builder.h"
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/types.h"
 
 namespace tensorflow {
 namespace {
@@ -75,7 +76,7 @@ static xla::ComputationDataHandle FloorDivImpl(xla::ComputationBuilder* b,
   auto abs_y = b->Abs(y);
   auto t = b->Neg(b->Sub(b->Add(abs_x, abs_y), one));
   auto result = b->Select(different_sign, b->Div(t, abs_y), b->Div(x, y));
-  if (dtype == DT_FLOAT || dtype == DT_DOUBLE) {
+  if (DataTypeIsFloating(dtype)) {
     result = b->Floor(result);
   }
   return result;
diff --git a/tensorflow/compiler/tf2xla/kernels/categorical_op.cc b/tensorflow/compiler/tf2xla/kernels/categorical_op.cc
index 592f3ecc3ce2abf33ddffe8b0e59c4e12e73e956..545aa364f937b2dc972dbe7b8c18b5897aa8e5c3 100644
--- a/tensorflow/compiler/tf2xla/kernels/categorical_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/categorical_op.cc
@@ -92,7 +92,8 @@ class CategoricalOp : public XlaOpKernel {
 };
 
 // TODO(b/68769717): Rename this sampler to Categorical.
-REGISTER_XLA_OP(Name("Multinomial"), CategoricalOp);
+REGISTER_XLA_OP(Name("Multinomial").CompileTimeConstInput("num_samples"),
+                CategoricalOp);
 
 }  // anonymous namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/concat_op.cc b/tensorflow/compiler/tf2xla/kernels/concat_op.cc
index 73a4740e29af7fa57e71ef42a342f46b0e24231d..1a246e8df9b2cd83147b50d960744332f8582a51 100644
--- a/tensorflow/compiler/tf2xla/kernels/concat_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/concat_op.cc
@@ -84,8 +84,8 @@ class ConcatBaseOp : public XlaOpKernel {
           in_shape.dims() == input_dims || (input_is_scalar && in_is_scalar),
           errors::InvalidArgument(
               "ConcatOp : Ranks of all input tensors should match: shape[0] = ",
-              input_shape.DebugString(), " vs. shape[", i, "] = ",
-              in_shape.DebugString()));
+              input_shape.DebugString(), " vs. shape[", i,
+              "] = ", in_shape.DebugString()));
       if (in_shape.dims() == 0) {
         // Inputs that come in as scalars must be reshaped to 1-vectors.
         input_data.push_back(ctx->builder()->Reshape(handle, {1}));
@@ -117,8 +117,11 @@ class ConcatV2Op : public ConcatBaseOp {
       : ConcatBaseOp(c, /* axis_index */ c->num_inputs() - 1) {}
 };
 
-REGISTER_XLA_OP(Name("Concat"), ConcatOp);
-REGISTER_XLA_OP(Name("ConcatV2").TypeConstraint("Tidx", DT_INT32), ConcatV2Op);
+REGISTER_XLA_OP(Name("Concat").CompileTimeConstInput("concat_dim"), ConcatOp);
+REGISTER_XLA_OP(Name("ConcatV2")
+                    .TypeConstraint("Tidx", DT_INT32)
+                    .CompileTimeConstInput("axis"),
+                ConcatV2Op);
 
 class ConcatOffsetOp : public XlaOpKernel {
  public:
@@ -189,10 +192,10 @@ class ConcatOffsetOp : public XlaOpKernel {
         } else {
           const int32 inp0_element = inp0_literal.Get<int>({j});
           const int32 inp_element = inp_literal.Get<int>({j});
-          OP_REQUIRES(
-              ctx, (inp0_element == inp_element),
-              errors::InvalidArgument("input[", i, ",", j, "] mismatch: ",
-                                      inp0_element, " vs. ", inp_element));
+          OP_REQUIRES(ctx, (inp0_element == inp_element),
+                      errors::InvalidArgument("input[", i, ",", j,
+                                              "] mismatch: ", inp0_element,
+                                              " vs. ", inp_element));
           out_vec(j) = 0;
         }
       }
@@ -202,7 +205,10 @@ class ConcatOffsetOp : public XlaOpKernel {
   }
 };
 
-REGISTER_XLA_OP(Name("ConcatOffset"), ConcatOffsetOp);
+REGISTER_XLA_OP(Name("ConcatOffset")
+                    .CompileTimeConstInput("concat_dim")
+                    .CompileTimeConstInput("shape"),
+                ConcatOffsetOp);
 
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/conv_ops.cc b/tensorflow/compiler/tf2xla/kernels/conv_ops.cc
index c5017704e2a45b0bd740f7a8fdcf3a0be1d445a4..81cea6d376d02c956a5257c5475fe5c10b83deb9 100644
--- a/tensorflow/compiler/tf2xla/kernels/conv_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/conv_ops.cc
@@ -46,72 +46,130 @@ TensorShape ExpandedFilterShapeForDepthwiseConvolution(
   return expanded_shape;
 }
 
+// Broadcast zeros to ExpandedFilterShapeForDepthwiseConvolution.
+xla::ComputationDataHandle CreateExpandedZero(
+    const TensorShape& filter_shape, DataType dtype,
+    xla::ComputationBuilder* builder) {
+  TensorShape expanded_filter_shape =
+      ExpandedFilterShapeForDepthwiseConvolution(filter_shape);
+  return builder->Broadcast(XlaHelpers::Zero(builder, dtype),
+                            expanded_filter_shape.dim_sizes());
+}
+
+// Create a mask for depthwise convolution that will make a normal convolution
+// produce the same results as a depthwise convolution. For a [2, 2, 3, 2]
+// depthwise filter this returns a [2, 2, 3, 6] tesnsor
+//   1 1 0 0 0 0   1 1 0 0 0 0
+//   0 0 1 1 0 0   0 0 1 1 0 0
+//   0 0 0 0 1 1   0 0 0 0 1 1
+//
+//   1 1 0 0 0 0   1 1 0 0 0 0
+//   0 0 1 1 0 0   0 0 1 1 0 0
+//   0 0 0 0 1 1   0 0 0 0 1 1
+//
+// The first step is to create a one tensor, A, that is [3]
+//   0 1 2
+//
+// and another tensor, B,  that is [3 * 2]
+//   0 1 2 3 4 5
+//
+// and divide B it by 2 to get
+//   0 0 1 1 2 2
+//
+// then we broadcast the B to [2, 2, 3, 3 * 2]
+//   0 0 1 1 2 2   0 0 1 1 2 2
+//   0 0 1 1 2 2   0 0 1 1 2 2
+//   0 0 1 1 2 2   0 0 1 1 2 2
+//
+//   0 0 1 1 2 2   0 0 1 1 2 2
+//   0 0 1 1 2 2   0 0 1 1 2 2
+//   0 0 1 1 2 2   0 0 1 1 2 2
+//
+// Finally compare A and broadcasted B in dimension 2 amd return the result at
+// the beginning of the comment.
+xla::ComputationDataHandle CreateExpandedFilterMask(
+    const TensorShape& filter_shape, xla::ComputationBuilder* builder) {
+  TensorShape expanded_filter_shape =
+      ExpandedFilterShapeForDepthwiseConvolution(filter_shape);
+  int64 depthwise_multiplier = filter_shape.dim_size(filter_shape.dims() - 1);
+  int64 input_feature = filter_shape.dim_size(filter_shape.dims() - 2);
+
+  // Create a M sized linspace and an M*N sized linspace that will be
+  // broadcasted into perpendicular dimensions and compared.
+  xla::ComputationDataHandle input_feature_iota;
+  // DT_INT32 Iota will always return status::OK().
+  TF_CHECK_OK(XlaHelpers::Iota(builder, DataType::DT_INT32, input_feature,
+                               &input_feature_iota));
+  xla::ComputationDataHandle expanded_feature_iota;
+  TF_CHECK_OK(XlaHelpers::Iota(builder, DataType::DT_INT32,
+                               input_feature * depthwise_multiplier,
+                               &expanded_feature_iota));
+
+  // Divide the M*N sized linspace by the depthwise_multiplier to create
+  // [0 0 1 1 2 2] in the example in the function comment.
+  expanded_feature_iota =
+      builder->Div(expanded_feature_iota,
+                   XlaHelpers::IntegerLiteral(builder, DataType::DT_INT32,
+                                              depthwise_multiplier));
+
+  // Broadcast the N*M linspace to [H, W, ..., M, M*N].
+  auto expanded_feature_broadcast_dims = expanded_filter_shape.dim_sizes();
+  expanded_feature_broadcast_dims.pop_back();
+  auto broadcasted_expanded_feature_iota = builder->Broadcast(
+      expanded_feature_iota, expanded_feature_broadcast_dims);
+
+  // Compare the broadcasted linspace to the input feature linspace in the
+  // input feature dimension to create a diagonal predicate.
+  return builder->Eq(broadcasted_expanded_feature_iota, input_feature_iota,
+                     {expanded_filter_shape.dims() - 2});
+}
+
 // Expands a filter of shape [H, W, ..., M, N] to [H, W, ..., M, M*N] by adding
 // zeros for the cross-depth filters. Used to build a depthwise convolution.
 xla::ComputationDataHandle ExpandFilterForDepthwiseConvolution(
     const TensorShape& filter_shape, DataType dtype,
     const xla::ComputationDataHandle& filter,
     xla::ComputationBuilder* builder) {
-  // Filter has shape [H, W, ..., M, N]
-  // Dilate to [H, W, ..., M*M, N] using M inter-element padding, and then
-  // reshape to [H, W, ..., M, M*N].
-  int num_spatial_dims = filter_shape.dims() - 2;
-  const int64 in_depth = filter_shape.dim_size(num_spatial_dims);
-  xla::PaddingConfig padding = xla::MakeNoPaddingConfig(filter_shape.dims());
-  padding.mutable_dimensions(num_spatial_dims)->set_interior_padding(in_depth);
-  auto dilated_filter =
-      builder->Pad(filter, XlaHelpers::Zero(builder, dtype), padding);
-
+  int64 depthwise_multiplier = filter_shape.dim_size(filter_shape.dims() - 1);
+  int64 input_feature = filter_shape.dim_size(filter_shape.dims() - 2);
   TensorShape expanded_filter_shape =
       ExpandedFilterShapeForDepthwiseConvolution(filter_shape);
-  return builder->Reshape(dilated_filter, expanded_filter_shape.dim_sizes());
+
+  // Create a [H, W, ..., 1, N*M] reshape of the filter.
+  TensorShape implicit_broadcast_filter_shape = expanded_filter_shape;
+  implicit_broadcast_filter_shape.set_dim(
+      implicit_broadcast_filter_shape.dims() - 2, 1);
+  implicit_broadcast_filter_shape.set_dim(
+      implicit_broadcast_filter_shape.dims() - 1,
+      depthwise_multiplier * input_feature);
+  auto implicit_broadcast_filter =
+      builder->Reshape(filter, implicit_broadcast_filter_shape.dim_sizes());
+
+  // Broadcast the filter to  [H, W, ..., M, M*N].
+  auto expanded_zero = CreateExpandedZero(filter_shape, dtype, builder);
+  auto expanded_filter = builder->Add(implicit_broadcast_filter, expanded_zero);
+
+  // If the filter mask is set, choose the broadcasted filter, othwerwise,
+  // choose zero.
+  return builder->Select(CreateExpandedFilterMask(filter_shape, builder),
+                         expanded_filter, expanded_zero);
 }
 
 // Inverse of ExpandFilterForDepthwiseConvolution.
 xla::ComputationDataHandle ContractFilterForDepthwiseBackprop(
-    const TensorShape& filter_shape, DataType dtype,
+    XlaOpKernelContext* ctx, const TensorShape& filter_shape, DataType dtype,
     const xla::ComputationDataHandle& filter_backprop,
     xla::ComputationBuilder* builder) {
-  int num_spatial_dims = filter_shape.dims() - 2;
-
-  // Reshape to [H, W, ..., M*M, N]
-  TensorShape shape = filter_shape;
-  int64 in_depth = filter_shape.dim_size(num_spatial_dims);
-  shape.set_dim(num_spatial_dims, in_depth * in_depth);
-  auto reshaped = builder->Reshape(filter_backprop, shape.dim_sizes());
-
-  std::vector<int64> zeros(filter_shape.dims());
-  std::vector<int64> strides(filter_shape.dims(), 1LL);
-  strides[num_spatial_dims] = in_depth + 1;
-  return builder->Slice(reshaped, zeros, shape.dim_sizes(), strides);
-
-  // Alternate implementation for backends without strided Slice() support.
-  // TODO(phawkins): Remove when all backends support strided slice.
-  //   // Pad [..., M * (M + 1), N]
-  //   xla::PaddingConfig config =
-  //   xla::MakeNoPaddingConfig(filter_shape.dims());
-  //   config.mutable_dimensions(num_spatial_dims)
-  //     ->set_edge_padding_high(in_depth);
-  //   auto zero = XlaHelpers::Zero(builder, dtype);
-  //   auto padded = builder->Pad(reshaped, zero, config);
-  //
-  //   // Reshape to [..., M, M + 1, N]
-  //   shape = filter_shape;
-  //   shape.set_dim(num_spatial_dims, in_depth);
-  //   shape.set_dim(num_spatial_dims + 1, in_depth + 1);
-  //   int64 out_depth = filter_shape.dim_size(num_spatial_dims + 1);
-  //   shape.AddDim(out_depth);
-  //   reshaped = builder->Reshape(padded, shape.dim_sizes());
-  //
-  //   // Slice to [..., M, 1, N]
-  //   std::vector<int64> zeros(shape.dims());
-  //   std::vector<int64> strides(shape.dims(), 1LL);
-  //   shape.set_dim(num_spatial_dims + 1, 1);
-  //   auto sliced = builder->Slice(reshaped, zeros, shape.dim_sizes(),
-  //   strides);
-  //
-  //   // Reshape to [..., M, N]
-  //   return builder->Reshape(sliced, filter_shape.dim_sizes());
+  TensorShape expanded_filter_shape =
+      ExpandedFilterShapeForDepthwiseConvolution(filter_shape);
+  auto masked_expanded_filter = builder->Select(
+      CreateExpandedFilterMask(filter_shape, builder), filter_backprop,
+      CreateExpandedZero(filter_shape, dtype, builder));
+  return builder->Reshape(
+      builder->Reduce(masked_expanded_filter, XlaHelpers::Zero(builder, dtype),
+                      *ctx->GetOrCreateAdd(dtype),
+                      {expanded_filter_shape.dims() - 2}),
+      filter_shape.dim_sizes());
 }
 
 class ConvOp : public XlaOpKernel {
@@ -121,6 +179,7 @@ class ConvOp : public XlaOpKernel {
       : XlaOpKernel(ctx),
         num_spatial_dims_(num_spatial_dims),
         depthwise_(depthwise) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("dilations", &dilations_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("strides", &strides_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("padding", &padding_));
 
@@ -144,6 +203,22 @@ class ConvOp : public XlaOpKernel {
         errors::Unimplemented("Current implementation does not yet support "
                               "strides in the batch and depth dimensions."));
 
+    OP_REQUIRES(ctx, dilations_.size() == num_dims(),
+                errors::InvalidArgument("Dilations field must "
+                                        "specify ",
+                                        num_dims(), " dimensions"));
+    OP_REQUIRES(
+        ctx, dilations_[batch_dim] == 1 && dilations_[feature_dim] == 1,
+        errors::Unimplemented("Current implementation does not support "
+                              "dilations in the batch and depth dimensions."));
+    for (int i = 0; i < num_spatial_dims_; ++i) {
+      int input_dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i);
+      OP_REQUIRES(ctx, dilations_[input_dim] >= 1,
+                  errors::Unimplemented("Dilation values must be positive; ", i,
+                                        "th spatial dimension had dilation ",
+                                        dilations_[input_dim]));
+    }
+
     const TensorShape input_shape = ctx->InputShape(0);
     // Input filter is of the following dimensions:
     // [ filter_rows, filter_cols, ..., in_depth, out_depth]
@@ -172,38 +247,53 @@ class ConvOp : public XlaOpKernel {
     xla::ComputationBuilder* b = ctx->builder();
 
     xla::ComputationDataHandle filter = ctx->Input(1);
+    TensorShape expanded_filter_shape = filter_shape;
     if (depthwise_) {
       filter = ExpandFilterForDepthwiseConvolution(
           filter_shape, ctx->input_type(0), filter, b);
+      expanded_filter_shape =
+          ExpandedFilterShapeForDepthwiseConvolution(filter_shape);
     }
 
     xla::ConvolutionDimensionNumbers dims;
-    std::vector<int64> window_strides;
+    std::vector<int64> window_strides(num_spatial_dims_);
+    std::vector<int64> lhs_dilation(num_spatial_dims_, 1);
+    std::vector<int64> rhs_dilation(num_spatial_dims_);
+    std::vector<std::pair<int64, int64>> padding(num_spatial_dims_);
+
     dims.set_input_batch_dimension(batch_dim);
     dims.set_output_batch_dimension(batch_dim);
     dims.set_input_feature_dimension(feature_dim);
     dims.set_output_feature_dimension(feature_dim);
+    dims.set_kernel_input_feature_dimension(num_spatial_dims_);
+    dims.set_kernel_output_feature_dimension(num_spatial_dims_ + 1);
+
     for (int i = 0; i < num_spatial_dims_; ++i) {
-      int64 dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i);
+      const int64 dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i);
       dims.add_input_spatial_dimensions(dim);
       dims.add_kernel_spatial_dimensions(i);
       dims.add_output_spatial_dimensions(dim);
-      window_strides.push_back(strides_.at(dim));
+      window_strides[i] = strides_.at(dim);
+      rhs_dilation[i] = dilations_.at(dim);
+
+      int64 unused_output_size;
+      OP_REQUIRES_OK(
+          ctx, GetWindowedOutputSizeVerboseV2(
+                   input_shape.dim_size(dim), expanded_filter_shape.dim_size(i),
+                   rhs_dilation[i], window_strides[i], padding_,
+                   &unused_output_size, &padding[i].first, &padding[i].second));
     }
-    dims.set_kernel_input_feature_dimension(num_spatial_dims_);
-    dims.set_kernel_output_feature_dimension(num_spatial_dims_ + 1);
 
-    xla::Padding xla_padding =
-        (padding_ == VALID) ? xla::Padding::kValid : xla::Padding::kSame;
-
-    xla::ComputationDataHandle conv = b->ConvWithGeneralDimensions(
-        ctx->Input(0), filter, window_strides, xla_padding, dims);
+    xla::ComputationDataHandle conv =
+        b->ConvGeneralDilated(ctx->Input(0), filter, window_strides, padding,
+                              lhs_dilation, rhs_dilation, dims);
     ctx->SetOutput(0, conv);
   }
 
  protected:
   const int num_spatial_dims_;
   const bool depthwise_;
+  std::vector<int32> dilations_;
   std::vector<int32> strides_;
   Padding padding_;
   TensorFormat data_format_ = FORMAT_NHWC;
@@ -241,6 +331,7 @@ class ConvBackpropInputOp : public XlaOpKernel {
       : XlaOpKernel(ctx),
         num_spatial_dims_(num_spatial_dims),
         depthwise_(depthwise) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("dilations", &dilations_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("strides", &strides_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("padding", &padding_));
     string data_format;
@@ -263,6 +354,22 @@ class ConvBackpropInputOp : public XlaOpKernel {
         errors::Unimplemented("Current implementation does not yet support "
                               "strides in the batch and depth dimensions."));
 
+    OP_REQUIRES(ctx, dilations_.size() == num_dims(),
+                errors::InvalidArgument("Dilations field must "
+                                        "specify ",
+                                        num_dims(), " dimensions"));
+    OP_REQUIRES(
+        ctx, dilations_[batch_dim] == 1 && dilations_[feature_dim] == 1,
+        errors::Unimplemented("Current implementation does not support "
+                              "dilations in the batch and depth dimensions."));
+    for (int i = 0; i < num_spatial_dims_; ++i) {
+      int input_dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i);
+      OP_REQUIRES(ctx, dilations_[input_dim] >= 1,
+                  errors::Unimplemented("Dilation values must be positive; ", i,
+                                        "th spatial dimension had dilation ",
+                                        dilations_[input_dim]));
+    }
+
     TensorShape input_shape;
     OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(0, &input_shape));
 
@@ -274,10 +381,11 @@ class ConvBackpropInputOp : public XlaOpKernel {
                    : filter_shape;
     // Reuse dimension computation logic from conv_grad_ops.cc.
     ConvBackpropDimensions dims;
-    OP_REQUIRES_OK(ctx, ConvBackpropComputeDimensions(
-                            type_string(), num_spatial_dims_, input_shape,
-                            expanded_filter_shape, out_backprop_shape, strides_,
-                            padding_, data_format_, &dims));
+    OP_REQUIRES_OK(ctx,
+                   ConvBackpropComputeDimensionsV2(
+                       type_string(), num_spatial_dims_, input_shape,
+                       expanded_filter_shape, out_backprop_shape, dilations_,
+                       strides_, padding_, data_format_, &dims));
 
     xla::ComputationBuilder* b = ctx->builder();
     auto filter = ctx->Input(1);
@@ -301,6 +409,7 @@ class ConvBackpropInputOp : public XlaOpKernel {
     std::vector<int64> kernel_spatial_dims(num_spatial_dims_);
     std::vector<std::pair<int64, int64>> padding(num_spatial_dims_);
     std::vector<int64> lhs_dilation(num_spatial_dims_);
+    std::vector<int64> rhs_dilation(num_spatial_dims_);
     std::vector<int64> ones(num_spatial_dims_, 1);
     for (int i = 0; i < num_spatial_dims_; ++i) {
       int64 dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i);
@@ -312,6 +421,7 @@ class ConvBackpropInputOp : public XlaOpKernel {
       padding[i] = {dims.spatial_dims[i].pad_before,
                     dims.spatial_dims[i].pad_after};
       lhs_dilation[i] = dims.spatial_dims[i].stride;
+      rhs_dilation[i] = dilations_[dim];
     }
 
     // If this is a depthwise convolution, expand the filter.
@@ -328,7 +438,7 @@ class ConvBackpropInputOp : public XlaOpKernel {
     //   = gradients (with padding and dilation) <conv> mirrored_weights
     xla::ComputationDataHandle in_backprop = b->ConvGeneralDilated(
         out_backprop, mirrored_weights, /*window_strides=*/ones, padding,
-        lhs_dilation, /*rhs_dilation=*/ones, dnums);
+        lhs_dilation, rhs_dilation, dnums);
 
     ctx->SetOutput(0, in_backprop);
   }
@@ -336,6 +446,7 @@ class ConvBackpropInputOp : public XlaOpKernel {
  protected:
   const int num_spatial_dims_;
   const bool depthwise_;
+  std::vector<int32> dilations_;
   std::vector<int32> strides_;
   Padding padding_;
   TensorFormat data_format_ = FORMAT_NHWC;
@@ -349,21 +460,26 @@ class Conv2DBackpropInputOp : public ConvBackpropInputOp {
   explicit Conv2DBackpropInputOp(OpKernelConstruction* ctx)
       : ConvBackpropInputOp(ctx, /*num_spatial_dims=*/2, /*depthwise=*/false) {}
 };
-REGISTER_XLA_OP(Name("Conv2DBackpropInput"), Conv2DBackpropInputOp);
+REGISTER_XLA_OP(
+    Name("Conv2DBackpropInput").CompileTimeConstInput("input_sizes"),
+    Conv2DBackpropInputOp);
 
 class Conv3DBackpropInputOp : public ConvBackpropInputOp {
  public:
   explicit Conv3DBackpropInputOp(OpKernelConstruction* ctx)
       : ConvBackpropInputOp(ctx, /*num_spatial_dims=*/3, /*depthwise=*/false) {}
 };
-REGISTER_XLA_OP(Name("Conv3DBackpropInputV2"), Conv3DBackpropInputOp);
+REGISTER_XLA_OP(
+    Name("Conv3DBackpropInputV2").CompileTimeConstInput("input_sizes"),
+    Conv3DBackpropInputOp);
 
 class DepthwiseConv2DBackpropInputOp : public ConvBackpropInputOp {
  public:
   explicit DepthwiseConv2DBackpropInputOp(OpKernelConstruction* ctx)
       : ConvBackpropInputOp(ctx, /*num_spatial_dims=*/2, /*depthwise=*/true) {}
 };
-REGISTER_XLA_OP(Name("DepthwiseConv2dNativeBackpropInput"),
+REGISTER_XLA_OP(Name("DepthwiseConv2dNativeBackpropInput")
+                    .CompileTimeConstInput("input_sizes"),
                 DepthwiseConv2DBackpropInputOp);
 
 class ConvBackpropFilterOp : public XlaOpKernel {
@@ -373,6 +489,7 @@ class ConvBackpropFilterOp : public XlaOpKernel {
       : XlaOpKernel(ctx),
         num_spatial_dims_(num_spatial_dims),
         depthwise_(depthwise) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("dilations", &dilations_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("strides", &strides_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("padding", &padding_));
     string data_format;
@@ -392,6 +509,22 @@ class ConvBackpropFilterOp : public XlaOpKernel {
         errors::InvalidArgument("Current implementation does not yet support "
                                 "strides in the batch and depth dimensions."));
 
+    OP_REQUIRES(ctx, dilations_.size() == num_dims(),
+                errors::InvalidArgument("Dilations field must "
+                                        "specify ",
+                                        num_dims(), " dimensions"));
+    OP_REQUIRES(
+        ctx, dilations_[n_dim] == 1 && dilations_[c_dim] == 1,
+        errors::Unimplemented("Current implementation does not support "
+                              "dilations in the batch and depth dimensions."));
+    for (int i = 0; i < num_spatial_dims_; ++i) {
+      int input_dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i);
+      OP_REQUIRES(ctx, dilations_[input_dim] >= 1,
+                  errors::Unimplemented("Dilation values must be positive; ", i,
+                                        "th spatial dimension had dilation ",
+                                        dilations_[input_dim]));
+    }
+
     const TensorShape activations_shape = ctx->InputShape(0);
     TensorShape filter_shape;
     OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(1, &filter_shape));
@@ -403,10 +536,11 @@ class ConvBackpropFilterOp : public XlaOpKernel {
 
     // Reuse dimension computation logic from conv_grad_ops.cc.
     ConvBackpropDimensions dims;
-    OP_REQUIRES_OK(ctx, ConvBackpropComputeDimensions(
-                            type_string(), num_spatial_dims_, activations_shape,
-                            expanded_filter_shape, out_backprop_shape, strides_,
-                            padding_, data_format_, &dims));
+    OP_REQUIRES_OK(ctx,
+                   ConvBackpropComputeDimensionsV2(
+                       type_string(), num_spatial_dims_, activations_shape,
+                       expanded_filter_shape, out_backprop_shape, dilations_,
+                       strides_, padding_, data_format_, &dims));
 
     xla::ComputationBuilder* b = ctx->builder();
     xla::ComputationDataHandle activations = ctx->Input(0);
@@ -426,9 +560,7 @@ class ConvBackpropFilterOp : public XlaOpKernel {
 
     // Swap n_dim and c_dim in the activations.
     dnums.set_input_batch_dimension(c_dim);
-    dnums.set_output_batch_dimension(c_dim);
     dnums.set_input_feature_dimension(n_dim);
-    dnums.set_output_feature_dimension(n_dim);
 
     // The gradients become the RHS of the convolution.
     // The gradients have shape [batch, out_rows, out_cols, ..., out_depth]
@@ -438,21 +570,29 @@ class ConvBackpropFilterOp : public XlaOpKernel {
 
     std::vector<std::pair<int64, int64>> padding(num_spatial_dims_);
     std::vector<int64> rhs_dilation(num_spatial_dims_);
+    std::vector<int64> window_strides(num_spatial_dims_);
     std::vector<int64> ones(num_spatial_dims_, 1);
 
+    // Tensorflow filter shape is [ H, W, ..., inC, outC ].
+    for (int i = 0; i < num_spatial_dims_; ++i) {
+      dnums.add_output_spatial_dimensions(i);
+    }
+    dnums.set_output_batch_dimension(num_spatial_dims_);
+    dnums.set_output_feature_dimension(num_spatial_dims_ + 1);
+
     for (int i = 0; i < num_spatial_dims_; ++i) {
       int64 dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i);
       dnums.add_input_spatial_dimensions(dim);
       dnums.add_kernel_spatial_dimensions(dim);
-      dnums.add_output_spatial_dimensions(dim);
 
       // We will also need to pad the input with zeros such that after the
       // convolution, we get the right size for the filter.
       // The padded_in_rows should be such that when we convolve this with the
       // expanded_out_rows as a filter, we should get filter_rows back.
       //
-      const int padded_in_size = dims.spatial_dims[i].expanded_output_size +
-                                 dims.spatial_dims[i].filter_size - 1;
+      const int64 padded_in_size =
+          dims.spatial_dims[i].expanded_output_size +
+          (dims.spatial_dims[i].filter_size - 1) * dilations_[dim];
 
       // However it can be smaller than input_rows: in this
       // case it means some of the inputs are not used.
@@ -468,8 +608,7 @@ class ConvBackpropFilterOp : public XlaOpKernel {
       // and input "C" is not used at all.
       //
       // We apply negative padding in this case.
-      const int total_pad_in_size =
-          padded_in_size - dims.spatial_dims[i].input_size;
+      const int64 pad_total = padded_in_size - dims.spatial_dims[i].input_size;
 
       // + For the VALID padding, we don't pad anything on the top/left side
       //   and pad the bottom/right side with the remaining space.
@@ -479,13 +618,12 @@ class ConvBackpropFilterOp : public XlaOpKernel {
       // In addition, if the padded input size is smaller than the input size,
       // we need to ignore some training elements of the input. We do this by
       // applying negative padding on the right/bottom.
-      const int before_pad_in_size =
-          (total_pad_in_size > 0 && padding_ == Padding::SAME)
-              ? total_pad_in_size / 2
-              : 0;
+      const int64 pad_before =
+          padding_ == Padding::SAME ? std::max<int64>(pad_total / 2, 0) : 0;
 
-      padding[i] = {before_pad_in_size, total_pad_in_size - before_pad_in_size};
+      padding[i] = {pad_before, pad_total - pad_before};
       rhs_dilation[i] = dims.spatial_dims[i].stride;
+      window_strides[i] = dilations_[dim];
     }
 
     // Besides padding the input, we will also expand output_rows to
@@ -497,35 +635,20 @@ class ConvBackpropFilterOp : public XlaOpKernel {
     // This is done by specifying the window dilation factors in the
     // convolution HLO below.
     auto filter_backprop =
-        b->ConvGeneralDilated(activations, gradients,
-                              /*window_strides=*/ones, padding,
+        b->ConvGeneralDilated(activations, gradients, window_strides, padding,
                               /*lhs_dilation=*/ones, rhs_dilation, dnums);
 
-    // The layout of filter_backprop will match the layout of
-    // padded_activations
-    // and so will have layout: [out_feature, h, w, ..., in_feature]
-    // Tensorflow filter shape is [ H, W, ..., inC, outC ], so we transpose the
-    // output.
-    std::vector<int64> transpose_dims;
-    transpose_dims.reserve(num_dims());
-    for (int i = 0; i < num_spatial_dims_; ++i) {
-      transpose_dims.push_back(dnums.output_spatial_dimensions(i));
-    }
-    transpose_dims.push_back(c_dim);
-    transpose_dims.push_back(n_dim);
-    xla::ComputationDataHandle filter_backprop_reshaped =
-        b->Transpose(filter_backprop, transpose_dims);
-
     if (depthwise_) {
-      filter_backprop_reshaped = ContractFilterForDepthwiseBackprop(
-          filter_shape, ctx->input_type(0), filter_backprop_reshaped, b);
+      filter_backprop = ContractFilterForDepthwiseBackprop(
+          ctx, filter_shape, ctx->input_type(0), filter_backprop, b);
     }
-    ctx->SetOutput(0, filter_backprop_reshaped);
+    ctx->SetOutput(0, filter_backprop);
   }
 
  protected:
   const int num_spatial_dims_;
   const bool depthwise_;
+  std::vector<int32> dilations_;
   std::vector<int32> strides_;
   Padding padding_;
   TensorFormat data_format_ = FORMAT_NHWC;
@@ -540,7 +663,9 @@ class Conv2DBackpropFilterOp : public ConvBackpropFilterOp {
       : ConvBackpropFilterOp(ctx, /*num_spatial_dims=*/2, /*depthwise=*/false) {
   }
 };
-REGISTER_XLA_OP(Name("Conv2DBackpropFilter"), Conv2DBackpropFilterOp);
+REGISTER_XLA_OP(
+    Name("Conv2DBackpropFilter").CompileTimeConstInput("filter_sizes"),
+    Conv2DBackpropFilterOp);
 
 class Conv3DBackpropFilterOp : public ConvBackpropFilterOp {
  public:
@@ -548,14 +673,17 @@ class Conv3DBackpropFilterOp : public ConvBackpropFilterOp {
       : ConvBackpropFilterOp(ctx, /*num_spatial_dims=*/3, /*depthwise=*/false) {
   }
 };
-REGISTER_XLA_OP(Name("Conv3DBackpropFilterV2"), Conv3DBackpropFilterOp);
+REGISTER_XLA_OP(
+    Name("Conv3DBackpropFilterV2").CompileTimeConstInput("filter_sizes"),
+    Conv3DBackpropFilterOp);
 
 class DepthwiseConv2DBackpropFilterOp : public ConvBackpropFilterOp {
  public:
   explicit DepthwiseConv2DBackpropFilterOp(OpKernelConstruction* ctx)
       : ConvBackpropFilterOp(ctx, /*num_spatial_dims=*/2, /*depthwise=*/true) {}
 };
-REGISTER_XLA_OP(Name("DepthwiseConv2dNativeBackpropFilter"),
+REGISTER_XLA_OP(Name("DepthwiseConv2dNativeBackpropFilter")
+                    .CompileTimeConstInput("filter_sizes"),
                 DepthwiseConv2DBackpropFilterOp);
 
 }  // namespace
diff --git a/tensorflow/compiler/tf2xla/kernels/depthtospace_op.cc b/tensorflow/compiler/tf2xla/kernels/depthtospace_op.cc
index a4ea65ea89e348cb77412efb0c5c0fcb1a9f33f3..96d7809f7995634b6bc31ab801b93526d9da7e6f 100644
--- a/tensorflow/compiler/tf2xla/kernels/depthtospace_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/depthtospace_op.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/core/util/tensor_format.h"
 
 namespace tensorflow {
 namespace {
@@ -23,6 +24,16 @@ namespace {
 class DepthToSpaceOp : public XlaOpKernel {
  public:
   explicit DepthToSpaceOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    string data_format_str;
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("data_format", &data_format_str));
+    OP_REQUIRES(ctx, FormatFromString(data_format_str, &data_format_),
+                errors::InvalidArgument("Invalid data format"));
+
+    OP_REQUIRES(ctx, data_format_ == FORMAT_NCHW || data_format_ == FORMAT_NHWC,
+                errors::InvalidArgument("Unsupported data format ",
+                                        ToString(data_format_),
+                                        "; expected formats NHWC or NCHW"));
+
     OP_REQUIRES_OK(ctx, ctx->GetAttr("block_size", &block_size_));
     OP_REQUIRES(
         ctx, block_size_ > 1,
@@ -31,18 +42,79 @@ class DepthToSpaceOp : public XlaOpKernel {
 
   void Compile(XlaOpKernelContext* ctx) override {
     const TensorShape input_tensor_shape = ctx->InputShape(0);
-    // The input is presumed to be [batch, height, width, depth]
     int input_rank = input_tensor_shape.dims();
     static const int kRequiredDims = 4;
     OP_REQUIRES(ctx, kRequiredDims == input_rank,
-                errors::InvalidArgument("Input rank should be: ", kRequiredDims,
-                                        " instead of: ", input_rank));
+                errors::InvalidArgument("Input rank should be ", kRequiredDims,
+                                        "; got: ", input_rank));
     const gtl::InlinedVector<int64, 4> input_shape =
         input_tensor_shape.dim_sizes();
 
     xla::ComputationBuilder* b = ctx->builder();
     xla::ComputationDataHandle input = ctx->Input(0);
 
+    int feature_dim = GetTensorFeatureDimIndex(input_rank, data_format_);
+    int num_spatial_dims = GetTensorSpatialDims(input_rank, data_format_);
+
+    std::vector<int64> reshaped_shape;
+    std::vector<int64> transpose_order;
+    std::vector<int64> output_shape;
+    reshaped_shape.reserve(input_rank);
+    transpose_order.reserve(input_rank);
+    output_shape.reserve(input_rank);
+    if (data_format_ == FORMAT_NHWC) {
+      reshaped_shape.push_back(input_shape[0]);
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        reshaped_shape.push_back(input_shape[1 + i]);
+      }
+      int64 block_elems = 1;
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        reshaped_shape.push_back(block_size_);
+        block_elems *= block_size_;
+      }
+      reshaped_shape.push_back(input_shape[feature_dim] / block_elems);
+
+      transpose_order.push_back(0);
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        transpose_order.push_back(i + 1);
+        transpose_order.push_back(i + 1 + num_spatial_dims);
+      }
+      transpose_order.push_back(feature_dim + num_spatial_dims);
+
+      output_shape.push_back(input_shape[0]);
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        output_shape.push_back(input_shape[1 + i] * block_size_);
+      }
+      output_shape.push_back(input_shape[feature_dim] / block_elems);
+    } else {
+      // NCHW format.
+      reshaped_shape.push_back(input_shape[0]);
+      int64 block_elems = 1;
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        reshaped_shape.push_back(block_size_);
+        block_elems *= block_size_;
+      }
+      reshaped_shape.push_back(input_shape[feature_dim] / block_elems);
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        reshaped_shape.push_back(input_shape[2 + i]);
+      }
+
+      transpose_order.push_back(0);
+      transpose_order.push_back(1 + num_spatial_dims);
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        transpose_order.push_back(2 + num_spatial_dims + i);
+        transpose_order.push_back(1 + i);
+      }
+
+      output_shape.push_back(input_shape[0]);
+      output_shape.push_back(input_shape[feature_dim] / block_elems);
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        output_shape.push_back(input_shape[2 + i] * block_size_);
+      }
+    }
+
+    // Note: comments are given in NHWC format; NCHW is similar with a different
+    // dimension order.
     // 1. Reshape `input` to `reshaped` of shape:
     //
     //      [batch,
@@ -51,14 +123,14 @@ class DepthToSpaceOp : public XlaOpKernel {
     //       block_size_,
     //       block_size_,
     //       depth / (block_size_ * block_size_)]
-    OP_REQUIRES(ctx, input_shape[3] % (block_size_ * block_size_) == 0,
+    OP_REQUIRES(ctx,
+                input_shape[feature_dim] % (block_size_ * block_size_) == 0,
                 errors::InvalidArgument(
                     "Input depth dimension (", input_shape[3],
                     ") is not divisible by square of the block size (",
                     block_size_, ")"));
-    xla::ComputationDataHandle reshaped = b->Reshape(
-        input, {input_shape[0], input_shape[1], input_shape[2], block_size_,
-                block_size_, input_shape[3] / (block_size_ * block_size_)});
+
+    xla::ComputationDataHandle reshaped = b->Reshape(input, reshaped_shape);
 
     // 2. Permute dimensions of `reshaped` to produce
     //    `permuted_reshaped` of shape:
@@ -70,7 +142,7 @@ class DepthToSpaceOp : public XlaOpKernel {
     //       block_size_,
     //       depth / (block_size_ * block_size_)]
     xla::ComputationDataHandle permuted_reshaped =
-        b->Transpose(reshaped, {0, 1, 3, 2, 4, 5});
+        b->Transpose(reshaped, transpose_order);
 
     // 3. Reshape `permuted_reshaped` to flatten `block_shape` into the
     //    batch dimension, producing an output tensor of shape:
@@ -80,15 +152,14 @@ class DepthToSpaceOp : public XlaOpKernel {
     //       input_shape[2] * block_size_,
     //       depth / (block_size_ * block_size_)]
     //
-    xla::ComputationDataHandle output = b->Reshape(
-        permuted_reshaped, {input_shape[0], input_shape[1] * block_size_,
-                            input_shape[2] * block_size_,
-                            input_shape[3] / (block_size_ * block_size_)});
+    xla::ComputationDataHandle output =
+        b->Reshape(permuted_reshaped, output_shape);
 
     ctx->SetOutput(0, output);
   }
 
  private:
+  TensorFormat data_format_;
   int block_size_;
 };
 REGISTER_XLA_OP(Name("DepthToSpace"), DepthToSpaceOp);
diff --git a/tensorflow/compiler/tf2xla/kernels/diag_op.cc b/tensorflow/compiler/tf2xla/kernels/diag_op.cc
index ec5017f6ab96bd3fc273a746b77fbb7e74fd9f35..765ea922a532a085a552192348ab360c4c30ff0a 100644
--- a/tensorflow/compiler/tf2xla/kernels/diag_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/diag_op.cc
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "tensorflow/compiler/tf2xla/lib/util.h"
+#include "tensorflow/compiler/tf2xla/type_util.h"
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
@@ -22,6 +24,62 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
+// Create a diagonal / batch diagonal matrix with 'input' on the diagonal.
+xla::StatusOr<xla::ComputationDataHandle> CreateDiagonal(
+    const xla::ComputationDataHandle& input, int64 last_dim_size,
+    tensorflow::gtl::ArraySlice<int64> other_dims, XlaOpKernelContext* ctx,
+    xla::ComputationBuilder* builder) {
+  // Create two matrices that have the following forms, and compare them:
+  //
+  // [[0, 0, 0, 0]            [[0, 1, 2, 3]
+  //  [1, 1, 1, 1]             [0, 1, 2, 3]
+  //  [2, 2, 2, 2]             [0, 1, 2, 3]
+  //  [3, 3, 3, 3]]            [0, 1, 2, 3]]
+  //
+  // This produces a predicate matrix of the right size, with "true" on the
+  // diagonal.
+  xla::ComputationDataHandle iota;
+  TF_RETURN_IF_ERROR(
+      XlaHelpers::Iota(builder, DataType::DT_INT32, last_dim_size, &iota));
+  xla::ComputationDataHandle iota_broadcast =
+      builder->Broadcast(iota, {last_dim_size});
+  xla::ComputationDataHandle mask = builder->Eq(iota_broadcast, iota, {0});
+
+  // If this is a batched diagonal, broadcast the mask across the other
+  // dimensions.
+  if (!other_dims.empty()) {
+    mask = builder->Broadcast(mask, other_dims);
+  }
+
+  // Broadcast the input, and then use the mask computed above to select the
+  // diagonal:
+  // e.g, in 2D:
+  //         [[t, f, f]    [[1, 1, 1]    [[0, 0, 0]      [[1, 0, 0]
+  // select(  [f, t, f]  ,  [4, 4, 4]  ,  [0, 0, 0]  ) =  [0, 4, 0]
+  //          [f, f, t]]    [9, 9, 9]]    [0, 0, 0]]      [0, 0, 9]]
+  //
+  // Broadcasting the input is less-than-trivial, since we need to broadcast
+  // into a "middle" dimension. We can do this with a reshape + implicit
+  // broadcast.
+  // TODO(b/30112114): Replace with in-dim broadcast when those are supported.
+  std::vector<int64> broadcast_dims(other_dims.begin(), other_dims.end());
+  broadcast_dims.push_back(1LL);
+  broadcast_dims.push_back(last_dim_size);
+  xla::ComputationDataHandle input_broadcast =
+      builder->Reshape(input, broadcast_dims);
+
+  broadcast_dims[broadcast_dims.size() - 2] = last_dim_size;
+  xla::PrimitiveType element_type;
+  TF_RETURN_IF_ERROR(
+      DataTypeToPrimitiveType(ctx->input_type(0), &element_type));
+  auto broadcast_shape =
+      xla::ShapeUtil::MakeShape(element_type, broadcast_dims);
+  xla::ComputationDataHandle zeros = Zeros(builder, broadcast_shape);
+
+  input_broadcast = builder->Add(input_broadcast, zeros);
+  return builder->Select(mask, input_broadcast, zeros);
+}
+
 class DiagOp : public XlaOpKernel {
  public:
   explicit DiagOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {}
@@ -29,6 +87,8 @@ class DiagOp : public XlaOpKernel {
   void Compile(XlaOpKernelContext* ctx) override {
     xla::ComputationBuilder* builder = ctx->builder();
 
+    OP_REQUIRES(ctx, ctx->num_inputs() >= 1,
+                errors::InvalidArgument("Diag op must have at an input"));
     const TensorShape input_shape = ctx->InputShape(0);
 
     auto dims = input_shape.dim_sizes();
@@ -36,7 +96,7 @@ class DiagOp : public XlaOpKernel {
                 errors::InvalidArgument("Expected 1 <= dims, got shape ",
                                         input_shape.DebugString()));
 
-    xla::ComputationDataHandle diag = ctx->Input(0);
+    xla::ComputationDataHandle input = ctx->Input(0);
 
     // Picture:
     // tf.diag([1, 2, 3, 4]) ==> [[1, 0, 0, 0]
@@ -46,13 +106,13 @@ class DiagOp : public XlaOpKernel {
 
     // Flattens the input to 1D.
     int64 size = input_shape.num_elements();
-    diag = builder->Reshape(diag, {size});
+    input = builder->Reshape(input, {size});
 
-    // Adds inter-element padding of 'size'.
-    xla::PaddingConfig config;
-    auto* dim = config.add_dimensions();
-    dim->set_interior_padding(size);
-    diag = builder->Pad(diag, XlaHelpers::Zero(builder, input_type(0)), config);
+    // Create an R2 with the R1 diagonal.
+    auto diag_or_status =
+        CreateDiagonal(input, size, /*other_dims=*/{}, ctx, builder);
+    OP_REQUIRES_OK(ctx, diag_or_status.status());
+    xla::ComputationDataHandle diag = diag_or_status.ValueOrDie();
 
     // Reshapes to the final shape.
     std::vector<int64> new_dims(dims.size() * 2);
@@ -141,6 +201,8 @@ class MatrixDiagOp : public XlaOpKernel {
   void Compile(XlaOpKernelContext* ctx) override {
     xla::ComputationBuilder* builder = ctx->builder();
 
+    OP_REQUIRES(ctx, ctx->num_inputs() >= 1,
+                errors::InvalidArgument("MatrixDiag op must have at an input"));
     const TensorShape input_shape = ctx->InputShape(0);
 
     auto dims = input_shape.dim_sizes();
@@ -152,17 +214,13 @@ class MatrixDiagOp : public XlaOpKernel {
 
     int last_dim = dims.size() - 1;
     int64 last_dim_size = input_shape.dim_size(last_dim);
+    tensorflow::gtl::ArraySlice<int64> other_dims(dims);
+    other_dims.pop_back();
 
-    // Adds inter-element padding of 'last_dim_size' to the last dimension.
-    xla::PaddingConfig config = xla::MakeNoPaddingConfig(dims.size());
-    auto* dim = config.mutable_dimensions(last_dim);
-    dim->set_interior_padding(last_dim_size);
-    diag = builder->Pad(diag, XlaHelpers::Zero(builder, input_type(0)), config);
-
-    // Reshapes to the final shape.
-    dims.push_back(last_dim_size);
-    diag = builder->Reshape(diag, dims);
-
+    auto diag_or_status =
+        CreateDiagonal(diag, last_dim_size, other_dims, ctx, builder);
+    OP_REQUIRES_OK(ctx, diag_or_status.status());
+    diag = diag_or_status.ValueOrDie();
     ctx->SetOutput(0, diag);
   }
 };
diff --git a/tensorflow/compiler/tf2xla/kernels/dynamic_stitch_op.cc b/tensorflow/compiler/tf2xla/kernels/dynamic_stitch_op.cc
index 7349dcb987cd88c423570889c0502d1a0bd12c52..f2cd21ffb9ce88747c04f3c71e66dadeb1faf0f9 100644
--- a/tensorflow/compiler/tf2xla/kernels/dynamic_stitch_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/dynamic_stitch_op.cc
@@ -72,22 +72,24 @@ class DynamicStitchOp : public XlaOpKernel {
                      XLAShapeToTensorShape(indices_input[input_num].shape(),
                                            &indices_shape));
       const TensorShape& data_shape = data_shapes[input_num];
-      OP_REQUIRES(ctx, TensorShapeUtils::StartsWith(data_shape, indices_shape),
-                  errors::InvalidArgument(
-                      "data[", input_num, "].shape = ",
-                      data_shape.DebugString(), " does not start with indices[",
-                      input_num, "].shape = ", indices_shape.DebugString()));
-      OP_REQUIRES(ctx,
-                  input_num == 0 || SameExtraShape(data0_shape, indices0_shape,
-                                                   data_shape, indices_shape),
-                  errors::InvalidArgument(
-                      "Need data[0].shape[", indices0_shape.dims(),
-                      ":] = data[", input_num, "].shape[", indices_shape.dims(),
-                      ":], got data[0].shape = ", data0_shape.DebugString(),
-                      ", data[", input_num, "].shape = ",
-                      data_shape.DebugString(), ", indices[0].shape = ",
-                      indices0_shape.DebugString(), ", indices[", input_num,
-                      "].shape = ", indices_shape.DebugString()));
+      OP_REQUIRES(
+          ctx, TensorShapeUtils::StartsWith(data_shape, indices_shape),
+          errors::InvalidArgument("data[", input_num,
+                                  "].shape = ", data_shape.DebugString(),
+                                  " does not start with indices[", input_num,
+                                  "].shape = ", indices_shape.DebugString()));
+      OP_REQUIRES(
+          ctx,
+          input_num == 0 || SameExtraShape(data0_shape, indices0_shape,
+                                           data_shape, indices_shape),
+          errors::InvalidArgument(
+              "Need data[0].shape[", indices0_shape.dims(), ":] = data[",
+              input_num, "].shape[", indices_shape.dims(),
+              ":], got data[0].shape = ", data0_shape.DebugString(), ", data[",
+              input_num, "].shape = ", data_shape.DebugString(),
+              ", indices[0].shape = ", indices0_shape.DebugString(),
+              ", indices[", input_num,
+              "].shape = ", indices_shape.DebugString()));
 
       OP_REQUIRES_OK(ctx,
                      XlaHelpers::ReshapeLiteral(indices_input[input_num],
@@ -159,8 +161,8 @@ class DynamicStitchOp : public XlaOpKernel {
                                    indices0_shape.dims());
     std::vector<int64> slice_limit(1 + data0_shape.dims() -
                                    indices0_shape.dims());
-    std::vector<int64> stride(1 + data0_shape.dims() -
-                              indices0_shape.dims(), 1);
+    std::vector<int64> stride(1 + data0_shape.dims() - indices0_shape.dims(),
+                              1);
     for (int d = indices0_shape.dims(); d < data0_shape.dims(); d++) {
       slice_limit[1 + d - indices0_shape.dims()] = data0_shape.dim_size(d);
     }
@@ -198,8 +200,10 @@ class DynamicStitchOp : public XlaOpKernel {
   }
 };
 
-REGISTER_XLA_OP(Name("DynamicStitch"), DynamicStitchOp);
-REGISTER_XLA_OP(Name("ParallelDynamicStitch"), DynamicStitchOp);
+REGISTER_XLA_OP(Name("DynamicStitch").CompileTimeConstInput("indices"),
+                DynamicStitchOp);
+REGISTER_XLA_OP(Name("ParallelDynamicStitch").CompileTimeConstInput("indices"),
+                DynamicStitchOp);
 
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/fft_ops.cc b/tensorflow/compiler/tf2xla/kernels/fft_ops.cc
new file mode 100644
index 0000000000000000000000000000000000000000..a4f3c1c3ad9a928e0552c388a25ed9fcb08edabb
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/kernels/fft_ops.cc
@@ -0,0 +1,122 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// XLA-specific Ops for FFT.
+
+#include "tensorflow/compiler/tf2xla/xla_helpers.h"
+#include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
+#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/core/framework/numeric_op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/tensor_slice.h"
+#include "tensorflow/core/kernels/bounds_check.h"
+#include "tensorflow/core/kernels/conv_grad_ops.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/util/padding.h"
+#include "tensorflow/core/util/tensor_format.h"
+
+namespace tensorflow {
+
+namespace {
+
+using xla::FftType;
+
+class GenericFftOp : public XlaOpKernel {
+ public:
+  explicit GenericFftOp(OpKernelConstruction* ctx, FftType fft_type,
+                        int fft_rank)
+      : XlaOpKernel(ctx), fft_type_(fft_type), fft_rank_(fft_rank) {}
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    const TensorShape input_shape = ctx->InputShape(0);
+    OP_REQUIRES(
+        ctx, TensorShapeUtils::IsVectorOrHigher(input_shape),
+        errors::InvalidArgument("input must be at least 1 dimensional"));
+
+    std::vector<int64> fft_length;
+    if (fft_type_ == FftType::RFFT || fft_type_ == FftType::IRFFT) {
+      OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntVector(1, &fft_length));
+      OP_REQUIRES(ctx, fft_length.size() == fft_rank_,
+                  errors::InvalidArgument("fft_length must be length ",
+                                          fft_rank_, " vector"));
+    } else {
+      // Innermost axis provides the FFT length.
+      for (int i = 0; i < fft_rank_; i++) {
+        fft_length.push_back(
+            input_shape.dim_size(input_shape.dims() - fft_rank_ + i));
+      }
+    }
+
+    xla::ComputationBuilder* b = ctx->builder();
+    xla::ComputationDataHandle fft =
+        b->Fft(ctx->Input(0), fft_type_, fft_length);
+    ctx->SetOutput(0, fft);
+  }
+
+ protected:
+  const FftType fft_type_;
+  const int fft_rank_;
+
+ private:
+  TF_DISALLOW_COPY_AND_ASSIGN(GenericFftOp);
+};
+
+template <int FFTRank>
+class FFTOp : public GenericFftOp {
+ public:
+  explicit FFTOp(OpKernelConstruction* ctx)
+      : GenericFftOp(ctx, /*fft_type=*/FftType::FFT, /*fft_rank=*/FFTRank) {}
+};
+REGISTER_XLA_OP(Name("FFT"), FFTOp<1>);
+REGISTER_XLA_OP(Name("FFT2D"), FFTOp<2>);
+REGISTER_XLA_OP(Name("FFT3D"), FFTOp<3>);
+
+template <int FFTRank>
+class IFFTOp : public GenericFftOp {
+ public:
+  explicit IFFTOp(OpKernelConstruction* ctx)
+      : GenericFftOp(ctx, /*fft_type=*/FftType::IFFT, /*fft_rank=*/FFTRank) {}
+};
+REGISTER_XLA_OP(Name("IFFT"), IFFTOp<1>);
+REGISTER_XLA_OP(Name("IFFT2D"), IFFTOp<2>);
+REGISTER_XLA_OP(Name("IFFT3D"), IFFTOp<3>);
+
+template <int FFTRank>
+class RFFTOp : public GenericFftOp {
+ public:
+  explicit RFFTOp(OpKernelConstruction* ctx)
+      : GenericFftOp(ctx, /*fft_type=*/FftType::RFFT, /*fft_rank=*/FFTRank) {}
+};
+REGISTER_XLA_OP(Name("RFFT").CompileTimeConstInput("fft_length"), RFFTOp<1>);
+REGISTER_XLA_OP(Name("RFFT2D").CompileTimeConstInput("fft_length"), RFFTOp<2>);
+REGISTER_XLA_OP(Name("RFFT3D").CompileTimeConstInput("fft_length"), RFFTOp<3>);
+
+template <int FFTRank>
+class IRFFTOp : public GenericFftOp {
+ public:
+  explicit IRFFTOp(OpKernelConstruction* ctx)
+      : GenericFftOp(ctx, /*fft_type=*/FftType::IRFFT, /*fft_rank=*/FFTRank) {}
+};
+REGISTER_XLA_OP(Name("IRFFT").CompileTimeConstInput("fft_length"), IRFFTOp<1>);
+REGISTER_XLA_OP(Name("IRFFT2D").CompileTimeConstInput("fft_length"),
+                IRFFTOp<2>);
+REGISTER_XLA_OP(Name("IRFFT3D").CompileTimeConstInput("fft_length"),
+                IRFFTOp<3>);
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/fill_op.cc b/tensorflow/compiler/tf2xla/kernels/fill_op.cc
index 9e090fe01cbfd4dab81b0de21e3a44e42c2ef18e..eaa13b8dfacce9aaca42ce5fcdfa467ce7fa7b7f 100644
--- a/tensorflow/compiler/tf2xla/kernels/fill_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/fill_op.cc
@@ -69,7 +69,7 @@ class FillOp : public XlaOpKernel {
   }
 };
 
-REGISTER_XLA_OP(Name("Fill"), FillOp);
+REGISTER_XLA_OP(Name("Fill").CompileTimeConstInput("dims"), FillOp);
 
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/gather_op.cc b/tensorflow/compiler/tf2xla/kernels/gather_op.cc
index e420f21ca33fe7de9b33f404ce04eae62d9c041e..ffed38249416766850ba10f1069e706570b995fe 100644
--- a/tensorflow/compiler/tf2xla/kernels/gather_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/gather_op.cc
@@ -46,26 +46,15 @@ xla::ComputationDataHandle XlaComputeGatherDynamicSlice(
   TensorShape slice_shape(input_shape);
   slice_shape.set_dim(axis, 1);
 
-  // TODO(b/37575001) The tensor in which we construct the output during
-  // the loop must have rank >= 3 as a workaround for lowering issues.
-  int64 extra_dims = 0;
-  if (input_shape.dims() < 3) extra_dims = 3 - input_shape.dims();
-
   TensorShape loop_out_shape;
-  for (int64 k = 0; k < extra_dims; ++k) loop_out_shape.AddDim(1);
   loop_out_shape.AppendShape(input_shape_pre_axis);
   loop_out_shape.AddDim(num_indices);
   loop_out_shape.AppendShape(input_shape_post_axis);
-
-  // Slices are reshaped into the rank >= 3 shape of the loop carried output.
   TensorShape loop_out_slice_shape;
-  for (int64 k = 0; k < extra_dims; ++k) loop_out_slice_shape.AddDim(1);
   loop_out_slice_shape.AppendShape(input_shape_pre_axis);
   loop_out_slice_shape.AddDim(1);
   loop_out_slice_shape.AppendShape(input_shape_post_axis);
 
-  // Finally, the loop-carried rank >= 3 output is reshaped to the op's
-  // specified result shape.
   TensorShape out_shape;
   out_shape.AppendShape(input_shape_pre_axis);
   out_shape.AppendShape(indices_shape);
@@ -89,7 +78,7 @@ xla::ComputationDataHandle XlaComputeGatherDynamicSlice(
        xla::ShapeUtil::MakeShape(ptype, input_shape.dim_sizes()),
        // The gather indices are reshaped to rank 1. Loop invariant.
        xla::ShapeUtil::MakeShape(idxtype, {num_indices}),
-       // The output array is rank >= 3, and is updated on each loop iteration.
+       // The output array, which is updated on each loop iteration.
        xla::ShapeUtil::MakeShape(ptype, loop_out_shape.dim_sizes())});
   xla::Shape tuple_shape = xla::ShapeUtil::MakeTupleShape(tuple_shapes);
 
@@ -135,12 +124,11 @@ xla::ComputationDataHandle XlaComputeGatherDynamicSlice(
         bodyb.DynamicSlice(input, start_indices, slice_shape.dim_sizes()),
         loop_out_slice_shape.dim_sizes());
 
-    // Construct the index into the R3+ output Tensor 0, ..., <index>, 0, ...
+    // Construct the index into the output Tensor 0, ..., <index>, 0, ...
     std::vector<xla::ComputationDataHandle> out_index_vals(
         loop_out_shape.dims(),
         bodyb.Reshape(XlaHelpers::Zero(&bodyb, index_type), {1}));
-    out_index_vals[input_shape_pre_axis.dims() + extra_dims] =
-        bodyb.Reshape(i, {1});
+    out_index_vals[input_shape_pre_axis.dims()] = bodyb.Reshape(i, {1});
     auto out_index = bodyb.ConcatInDim(out_index_vals, 0);
 
     // Update the output Tensor
@@ -198,6 +186,7 @@ void GatherOpDynamicSlice::Compile(XlaOpKernelContext* context) {
 }
 
 REGISTER_XLA_OP(Name("Gather"), GatherOpDynamicSlice);
-REGISTER_XLA_OP(Name("GatherV2"), GatherOpDynamicSlice);
+REGISTER_XLA_OP(Name("GatherV2").CompileTimeConstInput("axis"),
+                GatherOpDynamicSlice);
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/image_ops.cc b/tensorflow/compiler/tf2xla/kernels/image_ops.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f22f384256a8ddd8c05de4a1322aba741dc4d7fd
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/kernels/image_ops.cc
@@ -0,0 +1,305 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/tf2xla/lib/util.h"
+#include "tensorflow/compiler/tf2xla/xla_helpers.h"
+#include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
+#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+
+namespace tensorflow {
+namespace {
+
+// Converts 'input' from RGB format to HSV format.
+// 'shape' is the shape of the red/green/blue tensors.
+std::array<xla::ComputationDataHandle, 3> RGBToHSV(
+    XlaOpKernelContext* ctx, xla::ComputationBuilder* b,
+    const std::array<xla::ComputationDataHandle, 3>& rgb, DataType dtype,
+    const TensorShape& shape) {
+  auto zero = XlaHelpers::Zero(b, dtype);
+  auto one = XlaHelpers::One(b, dtype);
+
+  auto red = rgb[0];
+  auto green = rgb[1];
+  auto blue = rgb[2];
+  auto value = b->Max(b->Max(red, green), blue);
+  auto minimum = b->Min(b->Min(red, green), blue);
+  auto range = b->Sub(value, minimum);
+
+  auto zeros = b->Broadcast(zero, shape.dim_sizes());
+  auto saturation = b->Select(b->Gt(value, zero), b->Div(range, value), zeros);
+
+  auto norm = b->Div(XlaHelpers::FloatLiteral(b, dtype, 1.0 / 6.0), range);
+
+  auto hue = b->Select(b->Eq(green, value),
+                       b->Add(b->Mul(norm, b->Sub(blue, red)),
+                              XlaHelpers::FloatLiteral(b, dtype, 2.0 / 6.0)),
+                       b->Add(b->Mul(norm, b->Sub(red, green)),
+                              XlaHelpers::FloatLiteral(b, dtype, 4.0 / 6.0)));
+  hue = b->Select(b->Eq(red, value), b->Mul(norm, b->Sub(green, blue)), hue);
+  hue = b->Select(b->Gt(range, zero), hue, zeros);
+  hue = b->Select(b->Lt(hue, zero), b->Add(hue, one), hue);
+  return {hue, saturation, value};
+}
+
+// Converts 'input' from HSV format to RGB format.
+std::array<xla::ComputationDataHandle, 3> HSVToRGB(
+    xla::ComputationBuilder* b,
+    const std::array<xla::ComputationDataHandle, 3>& hsv, DataType dtype) {
+  xla::ComputationDataHandle hue = hsv[0];
+  xla::ComputationDataHandle saturation = hsv[1];
+  xla::ComputationDataHandle value = hsv[2];
+  auto zero = XlaHelpers::Zero(b, dtype);
+  auto one = XlaHelpers::FloatLiteral(b, dtype, 1.0);
+  auto two = XlaHelpers::FloatLiteral(b, dtype, 2.0);
+  auto three = XlaHelpers::FloatLiteral(b, dtype, 3.0);
+  auto four = XlaHelpers::FloatLiteral(b, dtype, 4.0);
+  auto six = XlaHelpers::FloatLiteral(b, dtype, 6.0);
+
+  auto dh = b->Mul(hue, six);
+  auto dr = b->Clamp(zero, b->Sub(b->Abs(b->Sub(dh, three)), one), one);
+  auto dg = b->Clamp(zero, b->Sub(two, b->Abs(b->Sub(dh, two))), one);
+  auto db = b->Clamp(zero, b->Sub(two, b->Abs(b->Sub(dh, four))), one);
+  auto one_minus_s = b->Sub(one, saturation);
+
+  auto red = b->Mul(b->Add(one_minus_s, b->Mul(saturation, dr)), value);
+  auto green = b->Mul(b->Add(one_minus_s, b->Mul(saturation, dg)), value);
+  auto blue = b->Mul(b->Add(one_minus_s, b->Mul(saturation, db)), value);
+  return {red, green, blue};
+}
+
+class RGBToHSVOp : public XlaOpKernel {
+ public:
+  explicit RGBToHSVOp(OpKernelConstruction* context) : XlaOpKernel(context) {}
+
+  void Compile(XlaOpKernelContext* context) override {
+    const TensorShape input_shape = context->InputShape(0);
+    OP_REQUIRES(context, input_shape.dims() >= 1,
+                errors::InvalidArgument("input must be at least 1D",
+                                        input_shape.DebugString()));
+    int channel_dim = input_shape.dims() - 1;
+    int64 channels = input_shape.dim_size(channel_dim);
+    OP_REQUIRES(
+        context, channels == 3,
+        errors::FailedPrecondition("input must have 3 channels but input has ",
+                                   channels, " channels."));
+
+    xla::ComputationBuilder* b = context->builder();
+    xla::ComputationDataHandle input = context->Input(0);
+
+    xla::ComputationDataHandle red =
+        b->SliceInDim(input, /*start_index=*/0, /*limit_index=*/1, /*stride=*/1,
+                      /*dimno=*/channel_dim);
+    xla::ComputationDataHandle green =
+        b->SliceInDim(input, /*start_index=*/1, /*limit_index=*/2, /*stride=*/1,
+                      /*dimno=*/channel_dim);
+    xla::ComputationDataHandle blue =
+        b->SliceInDim(input, /*start_index=*/2, /*limit_index=*/3, /*stride=*/1,
+                      /*dimno=*/channel_dim);
+    TensorShape channel_shape = input_shape;
+    channel_shape.set_dim(channel_dim, 1);
+    auto hsv = RGBToHSV(context, b, {red, green, blue}, context->input_type(0),
+                        channel_shape);
+
+    context->SetOutput(0, b->ConcatInDim(hsv, channel_dim));
+  }
+};
+REGISTER_XLA_OP(Name("RGBToHSV"), RGBToHSVOp);
+
+class HSVToRGBOp : public XlaOpKernel {
+ public:
+  explicit HSVToRGBOp(OpKernelConstruction* context) : XlaOpKernel(context) {}
+
+  void Compile(XlaOpKernelContext* context) override {
+    const TensorShape input_shape = context->InputShape(0);
+    OP_REQUIRES(context, input_shape.dims() >= 1,
+                errors::InvalidArgument("input must be at least 1D",
+                                        input_shape.DebugString()));
+    int channel_dim = input_shape.dims() - 1;
+    int64 channels = input_shape.dim_size(channel_dim);
+    OP_REQUIRES(
+        context, channels == 3,
+        errors::FailedPrecondition("input must have 3 channels but input has ",
+                                   channels, " channels."));
+
+    xla::ComputationBuilder* b = context->builder();
+    xla::ComputationDataHandle input = context->Input(0);
+    xla::ComputationDataHandle hue =
+        b->SliceInDim(input, /*start_index=*/0, /*limit_index=*/1, /*stride=*/1,
+                      /*dimno=*/channel_dim);
+    xla::ComputationDataHandle saturation =
+        b->SliceInDim(input, /*start_index=*/1, /*limit_index=*/2, /*stride=*/1,
+                      /*dimno=*/channel_dim);
+    xla::ComputationDataHandle value =
+        b->SliceInDim(input, /*start_index=*/2, /*limit_index=*/3, /*stride=*/1,
+                      /*dimno=*/channel_dim);
+
+    auto rgb = HSVToRGB(context->builder(), {hue, saturation, value},
+                        context->input_type(0));
+
+    context->SetOutput(0, b->ConcatInDim(rgb, channel_dim));
+  }
+};
+REGISTER_XLA_OP(Name("HSVToRGB"), HSVToRGBOp);
+
+class AdjustContrastOpV2 : public XlaOpKernel {
+ public:
+  explicit AdjustContrastOpV2(OpKernelConstruction* context)
+      : XlaOpKernel(context) {}
+
+  void Compile(XlaOpKernelContext* context) override {
+    const TensorShape& input_shape = context->InputShape(0);
+    const TensorShape& factor_shape = context->InputShape(1);
+    OP_REQUIRES(context, input_shape.dims() >= 3,
+                errors::InvalidArgument("input must be at least 3-D, got shape",
+                                        input_shape.DebugString()));
+    int height_dim = input_shape.dims() - 3;
+    int width_dim = input_shape.dims() - 2;
+    int channel_dim = input_shape.dims() - 1;
+    const int64 height = input_shape.dim_size(height_dim);
+    const int64 width = input_shape.dim_size(width_dim);
+
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(factor_shape),
+                errors::InvalidArgument("contrast_factor must be scalar: ",
+                                        factor_shape.DebugString()));
+
+    xla::ComputationBuilder* b = context->builder();
+    xla::ComputationDataHandle input = context->Input(0);
+    xla::ComputationDataHandle factor = context->Input(1);
+
+    DataType type = context->input_type(0);
+
+    auto output = b->Reduce(input, /*init_value=*/XlaHelpers::Zero(b, type),
+                            /*computation=*/*context->GetOrCreateAdd(type),
+                            {height_dim, width_dim});
+    output = b->Div(output, XlaHelpers::FloatLiteral(b, type, height * width));
+
+    std::vector<int64> broadcast_dims(input_shape.dims() - 2);
+    std::iota(broadcast_dims.begin(), broadcast_dims.end(), 0);
+    broadcast_dims.back() = channel_dim;
+    output = b->Add(b->Mul(input, factor),
+                    b->Mul(output, b->Sub(XlaHelpers::One(b, type), factor)),
+                    broadcast_dims);
+    context->SetOutput(0, output);
+  }
+};
+REGISTER_XLA_OP(Name("AdjustContrastv2"), AdjustContrastOpV2);
+
+class AdjustSaturationOp : public XlaOpKernel {
+ public:
+  explicit AdjustSaturationOp(OpKernelConstruction* context)
+      : XlaOpKernel(context) {}
+
+  void Compile(XlaOpKernelContext* context) override {
+    const TensorShape& input_shape = context->InputShape(0);
+    const TensorShape& scale_shape = context->InputShape(1);
+    OP_REQUIRES(context, input_shape.dims() >= 3,
+                errors::InvalidArgument("input must be at least 3-D, got shape",
+                                        input_shape.DebugString()));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(scale_shape),
+                errors::InvalidArgument("scale must be scalar: ",
+                                        scale_shape.DebugString()));
+    const int channel_dim = input_shape.dims() - 1;
+    const int64 channels = input_shape.dim_size(channel_dim);
+    OP_REQUIRES(
+        context, channels == 3,
+        errors::InvalidArgument("input must have 3 channels but instead has ",
+                                channels, " channels."));
+
+    xla::ComputationBuilder* b = context->builder();
+    xla::ComputationDataHandle input = context->Input(0);
+    xla::ComputationDataHandle scale = context->Input(1);
+
+    DataType type = context->input_type(0);
+
+    xla::ComputationDataHandle red =
+        b->SliceInDim(input, /*start_index=*/0, /*limit_index=*/1, /*stride=*/1,
+                      /*dimno=*/channel_dim);
+    xla::ComputationDataHandle green =
+        b->SliceInDim(input, /*start_index=*/1, /*limit_index=*/2, /*stride=*/1,
+                      /*dimno=*/channel_dim);
+    xla::ComputationDataHandle blue =
+        b->SliceInDim(input, /*start_index=*/2, /*limit_index=*/3, /*stride=*/1,
+                      /*dimno=*/channel_dim);
+    TensorShape channel_shape = input_shape;
+    channel_shape.set_dim(channel_dim, 1);
+    auto hsv = RGBToHSV(context, b, {red, green, blue}, context->input_type(0),
+                        channel_shape);
+
+    hsv[1] = b->Clamp(XlaHelpers::Zero(b, type), b->Mul(hsv[1], scale),
+                      XlaHelpers::One(b, type));
+
+    auto rgb = HSVToRGB(context->builder(), hsv, context->input_type(0));
+
+    context->SetOutput(0, b->ConcatInDim(rgb, channel_dim));
+  }
+};
+REGISTER_XLA_OP(Name("AdjustSaturation"), AdjustSaturationOp);
+
+class AdjustHueOp : public XlaOpKernel {
+ public:
+  explicit AdjustHueOp(OpKernelConstruction* context) : XlaOpKernel(context) {}
+
+  void Compile(XlaOpKernelContext* context) override {
+    const TensorShape& input_shape = context->InputShape(0);
+    const TensorShape& delta_shape = context->InputShape(1);
+    OP_REQUIRES(context, input_shape.dims() >= 3,
+                errors::InvalidArgument("input must be at least 3-D, got shape",
+                                        input_shape.DebugString()));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(delta_shape),
+                errors::InvalidArgument("delta must be scalar: ",
+                                        delta_shape.DebugString()));
+    const int channel_dim = input_shape.dims() - 1;
+    const int64 channels = input_shape.dim_size(channel_dim);
+    OP_REQUIRES(
+        context, channels == 3,
+        errors::InvalidArgument("input must have 3 channels but instead has ",
+                                channels, " channels."));
+
+    xla::ComputationBuilder* b = context->builder();
+    xla::ComputationDataHandle input = context->Input(0);
+    xla::ComputationDataHandle delta = context->Input(1);
+
+    DataType type = context->input_type(0);
+
+    xla::ComputationDataHandle red =
+        b->SliceInDim(input, /*start_index=*/0, /*limit_index=*/1, /*stride=*/1,
+                      /*dimno=*/channel_dim);
+    xla::ComputationDataHandle green =
+        b->SliceInDim(input, /*start_index=*/1, /*limit_index=*/2, /*stride=*/1,
+                      /*dimno=*/channel_dim);
+    xla::ComputationDataHandle blue =
+        b->SliceInDim(input, /*start_index=*/2, /*limit_index=*/3, /*stride=*/1,
+                      /*dimno=*/channel_dim);
+    TensorShape channel_shape = input_shape;
+    channel_shape.set_dim(channel_dim, 1);
+    auto hsv = RGBToHSV(context, b, {red, green, blue}, context->input_type(0),
+                        channel_shape);
+
+    auto zero = XlaHelpers::Zero(b, type);
+    auto one = XlaHelpers::One(b, type);
+
+    auto& hue = hsv[0];
+    hue = b->Rem(b->Add(hsv[0], delta), one);
+    hue = b->Select(b->Lt(hue, zero), b->Rem(b->Add(one, hue), one), hue);
+
+    auto rgb = HSVToRGB(context->builder(), hsv, context->input_type(0));
+
+    context->SetOutput(0, b->ConcatInDim(rgb, channel_dim));
+  }
+};
+REGISTER_XLA_OP(Name("AdjustHue"), AdjustHueOp);
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc b/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f36b3f594826c27b7866d956c855aa3638db9cb4
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc
@@ -0,0 +1,449 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/tf2xla/type_util.h"
+#include "tensorflow/compiler/tf2xla/xla_helpers.h"
+#include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
+#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/compiler/xla/array4d.h"
+#include "tensorflow/core/framework/kernel_def_builder.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/lib/math/math_util.h"
+
+namespace tensorflow {
+namespace {
+
+// We implement bilinear interpolation by upsampling followed by convolution.
+// The basic idea is as follows. To scale from NxN to RxR:
+//
+//    1. S := (N - 1) /  gcd(N-1, R-1)
+//    2. k := (R - 1) /  gcd(N-1, R-1)
+//    3. Convolution(kxk, stride=S, lhs_dilation=k, padding=k-1)
+//
+// For example, to Scale from 7x7 -> 15x15:
+//
+//    1. S := (7-1) / gcd(7-1, 15-1) = 6 / gcd(6, 14) = 6 / 2 = 3
+//    2. k := (15 - 1) / gcd(7-1, 15-1) = 14 / gcd(6, 14) = 14 / 2 = 7
+//    3. Convolution(7x7, stride=3, lhs_dilation=3, padding=2)
+//
+//
+// The 7x7 -> 15x15 case is much too large to write out in full as an
+// example. The smallest interesting example is 3x3 -> 4x4.
+//
+// S := 2
+// k := 3
+//
+// 00 03 06    00 00 00 00 00 00 00 00 00 00 00      00 02 04 06
+// 09 12 15 -> 00 00 00 00 00 00 00 00 00 00 00   -> 06 08 10 12
+// 18 21 24    00 00 00 00 00 03 00 00 06 00 00      12 14 16 18
+//             00 00 00 00 00 00 00 00 00 00 00      18 20 22 24
+//             00 00 00 00 00 00 00 00 00 00 00
+//             00 00 09 00 00 12 00 00 15 00 00
+//             00 00 00 00 00 00 00 00 00 00 00
+//             00 00 00 00 00 00 00 00 00 00 00
+//             00 00 18 00 00 21 00 00 24 00 00
+//             00 00 00 00 00 00 00 00 00 00 00
+//             00 00 00 00 00 00 00 00 00 00 00
+//
+// with the following convolutional kernel, with stride [2, 2]:
+//       1 2 3 2 1
+//       2 4 6 4 2
+// 1/9 * 3 6 9 6 3
+//       2 4 6 4 2
+//       1 2 3 2 1
+
+// Computes the size of the convolutional kernel and stride to use when resizing
+// from in_size to out_size.
+struct ResizeConvolutionDims {
+  // Size of the kernel to use.
+  std::vector<int64> kernel_size;
+
+  // Stride of the convolution to use.
+  std::vector<int64> stride;
+};
+ResizeConvolutionDims ComputeResizeConvolutionParameters(
+    gtl::ArraySlice<int64> in_size, gtl::ArraySlice<int64> out_size) {
+  CHECK_EQ(in_size.size(), out_size.size());
+  int num_spatial_dims = in_size.size();
+  ResizeConvolutionDims dims;
+  dims.kernel_size.resize(num_spatial_dims);
+  dims.stride.resize(num_spatial_dims);
+  for (int i = 0; i < num_spatial_dims; ++i) {
+    if (in_size[i] == 1) {
+      // We must handle input size 1 specially because XLA convolution does
+      // not allow stride 0.
+      dims.stride[i] = dims.kernel_size[i] = 1;
+    } else if (out_size[i] == 1) {
+      // If in_size[i] > 1 but out_size[i] == 1, then we slice out the first
+      // entry before resizing.
+      dims.stride[i] = dims.kernel_size[i] = 1;
+    } else {
+      int64 gcd = MathUtil::GCD(static_cast<uint64>(in_size[i] - 1),
+                                static_cast<uint64>(out_size[i] - 1));
+      dims.stride[i] = (in_size[i] - 1) / gcd;
+      dims.kernel_size[i] = (out_size[i] - 1) / gcd;
+    }
+  }
+  return dims;
+}
+
+xla::ComputationDataHandle MakeBilinearResizeKernel(
+    xla::ComputationBuilder* builder, gtl::ArraySlice<int64> kernel_size,
+    int64 channels) {
+  // Form a 2D convolution kernel like:
+  //       1 2 3 2 1
+  //       2 4 6 4 2
+  // 1/9 * 3 6 9 6 3
+  //       2 4 6 4 2
+  //       1 2 3 2 1
+  // by multiplying two 1D kernels of the form:
+  // 1/3 * [1 2 3 2 1]
+  auto make_1d_kernel = [](int64 n) {
+    std::vector<float> kernel(n * 2 - 1);
+    for (int64 i = 0; i < n; ++i) {
+      float v = (i + 1.0f) / n;
+      kernel[i] = v;
+      kernel[n * 2 - 2 - i] = v;
+    }
+    return kernel;
+  };
+
+  xla::ComputationDataHandle channels_iota;
+  // DT_INT32 Iota will always return status::OK().
+  TF_CHECK_OK(
+      XlaHelpers::Iota(builder, DataType::DT_INT32, channels, &channels_iota));
+
+  auto diag = builder->ConvertElementType(
+      builder->Eq(
+          builder->Broadcast(channels_iota, {2 * kernel_size[0] - 1,
+                                             2 * kernel_size[1] - 1, channels}),
+          channels_iota, /*broadcast_dimensions=*/{2}),
+      xla::PrimitiveType::F32);
+  return builder->Mul(
+      builder->Mul(diag,
+                   builder->ConstantR1<float>(make_1d_kernel(kernel_size[1])),
+                   /*broadcast_dimensions=*/{1}),
+      builder->ConstantR1<float>(make_1d_kernel(kernel_size[0])),
+      /*broadcast_dimensions=*/{0});
+}
+
+xla::ComputationDataHandle ResizeUsingDilationAndConvolution(
+    xla::ComputationBuilder* builder, const xla::ComputationDataHandle& input,
+    const int num_spatial_dims, std::vector<int64> in_size,
+    std::vector<int64> out_size, const int64 channels) {
+  // Picture for a 1x3 to 1x4 resize:
+  // stride = 2, kernel size = 3
+  // Input:
+  // 3 6 9
+  // Input with dilation and padding:
+  // 0 0 3 0 0 6 0 0 9 0 0
+  // Convolution kernel:
+  // 1/3 * [1 2 3 2 1]
+  // Output:
+  // 3 5 7 9
+  xla::ConvolutionDimensionNumbers dimension_numbers;
+  dimension_numbers.set_input_batch_dimension(0);
+  dimension_numbers.set_output_batch_dimension(0);
+  dimension_numbers.set_input_feature_dimension(3);
+  dimension_numbers.set_output_feature_dimension(3);
+  for (int i = 0; i < num_spatial_dims; ++i) {
+    dimension_numbers.add_input_spatial_dimensions(1 + i);
+    dimension_numbers.add_output_spatial_dimensions(1 + i);
+    dimension_numbers.add_kernel_spatial_dimensions(i);
+  }
+  dimension_numbers.set_kernel_input_feature_dimension(num_spatial_dims);
+  dimension_numbers.set_kernel_output_feature_dimension(num_spatial_dims + 1);
+
+  ResizeConvolutionDims dims =
+      ComputeResizeConvolutionParameters(in_size, out_size);
+  xla::ComputationDataHandle kernel =
+      MakeBilinearResizeKernel(builder, dims.kernel_size, channels);
+  xla::ComputationDataHandle output = builder->ConvGeneralDilated(
+      input, kernel, dims.stride,
+      /*padding=*/
+      {{dims.kernel_size[0] - 1, dims.kernel_size[0] - 1},
+       {dims.kernel_size[1] - 1, dims.kernel_size[1] - 1}},
+      /*lhs_dilation=*/dims.kernel_size,
+      /*rhs_dilation=*/{1, 1}, dimension_numbers);
+
+  // Add broadcasts to handle expanding from a size == 1 dimension to a
+  // size > 1 dimension.
+  for (int i = 0; i < num_spatial_dims; ++i) {
+    if (in_size[i] == 1 && out_size[i] > 1) {
+      output = builder->Add(output, builder->ConstantR1<float>(out_size[i], 0),
+                            /*broadcast_dimensions=*/{1 + i});
+    }
+  }
+  return output;
+}
+
+xla::ComputationDataHandle ResizeUsingDilationAndConvolutionGradOp(
+    xla::ComputationBuilder* builder, const xla::ComputationDataHandle& grad,
+    const int num_spatial_dims, std::vector<int64> in_size,
+    std::vector<int64> grad_size, const int64 channels) {
+  ResizeConvolutionDims dims =
+      ComputeResizeConvolutionParameters(in_size, grad_size);
+
+  // To form the backward convolution, we keep the kernel unchanged (it is
+  // already symmetric) and swap the roles of strides and LHS dilation.
+  xla::ConvolutionDimensionNumbers dimension_numbers;
+  dimension_numbers.set_input_batch_dimension(0);
+  dimension_numbers.set_output_batch_dimension(0);
+  dimension_numbers.set_input_feature_dimension(3);
+  dimension_numbers.set_output_feature_dimension(3);
+  for (int i = 0; i < num_spatial_dims; ++i) {
+    dimension_numbers.add_input_spatial_dimensions(1 + i);
+    dimension_numbers.add_output_spatial_dimensions(1 + i);
+    dimension_numbers.add_kernel_spatial_dimensions(i);
+  }
+  dimension_numbers.set_kernel_input_feature_dimension(num_spatial_dims);
+  dimension_numbers.set_kernel_output_feature_dimension(num_spatial_dims + 1);
+  xla::ComputationDataHandle kernel =
+      MakeBilinearResizeKernel(builder, dims.kernel_size, channels);
+
+  // Broadcast the input kernel where the forward op expanded from a size == 1
+  // dimension to a size > 1 dimension. This has the effect of summing the
+  // gradient contributions in that dimension.
+  for (int i = 0; i < num_spatial_dims; ++i) {
+    if (in_size[i] == 1 && grad_size[i] > 1) {
+      kernel = builder->Add(kernel, builder->ConstantR1<float>(grad_size[i], 0),
+                            /*broadcast_dimensions=*/{i});
+    }
+  }
+
+  xla::ComputationDataHandle output = builder->ConvGeneralDilated(
+      grad, kernel, /*window_strides=*/dims.kernel_size,
+      /*padding=*/
+      {{dims.kernel_size[0] - 1, dims.kernel_size[0] - 1},
+       {dims.kernel_size[1] - 1, dims.kernel_size[1] - 1}},
+      /*lhs_dilation=*/dims.stride,
+      /*rhs_dilation=*/{1, 1}, dimension_numbers);
+
+  // If in_size[i] > 1 and grad_size[i] == 1, pad the output in dimension i.
+  // Opposite of the slice performed by the forward op.
+  xla::PaddingConfig padding = xla::MakeNoPaddingConfig(4);
+  bool pad_output = false;
+  for (int i = 0; i < num_spatial_dims; ++i) {
+    if (in_size[i] > 1 && grad_size[i] == 1) {
+      pad_output = true;
+      padding.mutable_dimensions(1 + i)->set_edge_padding_high(in_size[i] - 1);
+    }
+  }
+  if (pad_output) {
+    output = builder->Pad(output, builder->ConstantR0<float>(0.0f), padding);
+  }
+  return output;
+}
+
+class ResizeBilinearOp : public XlaOpKernel {
+ public:
+  explicit ResizeBilinearOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("align_corners", &align_corners_));
+    OP_REQUIRES(
+        ctx, align_corners_ == true,
+        errors::Unimplemented(
+            "ResizeBilinear with align_corners=False is not yet implemented"));
+  }
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    xla::ComputationBuilder* b = ctx->builder();
+
+    TensorShape input_shape = ctx->InputShape(0);
+    OP_REQUIRES(ctx, input_shape.dims() == 4,
+                errors::InvalidArgument("input must be 4-dimensional",
+                                        input_shape.DebugString()));
+    const int64 batch = input_shape.dim_size(0);
+    std::vector<int64> in_size = {input_shape.dim_size(1),
+                                  input_shape.dim_size(2)};
+    const int64 channels = input_shape.dim_size(3);
+    OP_REQUIRES(ctx, in_size[0] > 0 && in_size[1] > 0,
+                errors::InvalidArgument("input size must be positive, got [",
+                                        in_size[0], ",", in_size[1], "]"));
+
+    std::vector<int64> out_size;
+    OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntVector(1, &out_size));
+    OP_REQUIRES(ctx, out_size.size() == 2,
+                errors::InvalidArgument("output size must be length 2, got ",
+                                        out_size.size()));
+    OP_REQUIRES(ctx, out_size[0] > 0 && out_size[1] > 0,
+                errors::InvalidArgument("output size must be positive, got [",
+                                        out_size[0], ",", out_size[1], "]"));
+
+    const int num_spatial_dims = 2;
+
+    xla::ComputationDataHandle input = ctx->Input(0);
+
+    // If in_size[i] > 1 and out_size[i] == 1, slice out the first input in
+    // dimension i.
+    std::vector<int64> slice_size = in_size;
+    bool slice_input = false;
+    for (int i = 0; i < num_spatial_dims; ++i) {
+      if (in_size[i] > 1 && out_size[i] == 1) {
+        // If in_size[i] > 1 but out_size[i] == 1, then we slice out the first
+        // entry before resizing.
+        slice_input = true;
+        slice_size[i] = 1;
+      }
+    }
+    if (slice_input) {
+      input = b->Slice(input, {0, 0, 0, 0},
+                       {batch, slice_size[0], slice_size[1], channels},
+                       {1, 1, 1, 1});
+    }
+
+    // Output is always type float.
+    input = b->ConvertElementType(input, xla::F32);
+
+    // Special Case:
+    // Instead of doing a ResizeUsingDilationAndConvolution directly,
+    // while (out_size[0]-1) = c * 2^x * (in_size[0]-1) for x>1 c>1, resize the
+    // image to 2*(in_size[0]-1)+1 x-times and then resize by scale c(int here).
+    // Instead of resizing directly we resize it iteratively.
+    //
+    // Since bilinear resize can be broken down as 2 sequential linear
+    // operations along different dimensions.
+    // Given sufficient numerical stability and a<e<c and b<f<d, bilinear resize
+    // from image of size axb -> cxd is same as resizing axb -> exf -> cxd.
+    //
+    // This makes the convolutions kernels smaller and the operation faster.
+    xla::ComputationDataHandle output = input;
+    while (in_size != out_size) {
+      if (in_size[0] != 1 && in_size[1] != 1) {
+        std::vector<float> k = {
+            (static_cast<float>(out_size[0]) - 1) / ((in_size[0] - 1) * 2),
+            (static_cast<float>(out_size[1]) - 1) / ((in_size[1] - 1) * 2)};
+        if ((k[0] == std::floor(k[0])) && (k[1] == std::floor(k[1])) &&
+            k[0] > 1 && k[1] > 1) {
+          std::vector<int64> next_out_size = {(in_size[0] - 1) * 2 + 1,
+                                              (in_size[1] - 1) * 2 + 1};
+          output = ResizeUsingDilationAndConvolution(
+              b, input, num_spatial_dims, in_size, next_out_size, channels);
+          input = output;
+          in_size = next_out_size;
+        } else {
+          output = ResizeUsingDilationAndConvolution(
+              b, input, num_spatial_dims, in_size, out_size, channels);
+          in_size = out_size;
+        }
+      } else {
+        output = ResizeUsingDilationAndConvolution(b, input, num_spatial_dims,
+                                                   in_size, out_size, channels);
+        in_size = out_size;
+      }
+    }
+
+    ctx->SetOutput(0, output);
+  }
+
+ private:
+  bool align_corners_;
+};
+
+REGISTER_XLA_OP(Name("ResizeBilinear").CompileTimeConstInput("size"),
+                ResizeBilinearOp);
+
+class ResizeBilinearGradOp : public XlaOpKernel {
+ public:
+  explicit ResizeBilinearGradOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("align_corners", &align_corners_));
+    OP_REQUIRES(
+        ctx, align_corners_ == true,
+        errors::Unimplemented("ResizeBilinearGrad with align_corners=False is "
+                              "not yet implemented"));
+
+    DataType output_dtype;
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("T", &output_dtype));
+    OP_REQUIRES_OK(ctx, DataTypeToPrimitiveType(output_dtype, &output_type_));
+  }
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    xla::ComputationBuilder* b = ctx->builder();
+
+    TensorShape input_shape = ctx->InputShape(1);
+    OP_REQUIRES(ctx, input_shape.dims() == 4,
+                errors::InvalidArgument("input must be 4-dimensional",
+                                        input_shape.DebugString()));
+    const int64 batch = input_shape.dim_size(0);
+    std::vector<int64> in_size = {input_shape.dim_size(1),
+                                  input_shape.dim_size(2)};
+    const int64 channels = input_shape.dim_size(3);
+    OP_REQUIRES(ctx, in_size[0] > 0 && in_size[1] > 0,
+                errors::InvalidArgument("input size must be positive, got [",
+                                        in_size[0], ",", in_size[1], "]"));
+
+    TensorShape grad_shape = ctx->InputShape(0);
+    OP_REQUIRES(ctx, grad_shape.dims() == 4,
+                errors::InvalidArgument("gradient must be 4-dimensional",
+                                        grad_shape.DebugString()));
+    const int64 grad_batch = grad_shape.dim_size(0);
+    const std::vector<int64> grad_size = {grad_shape.dim_size(1),
+                                          grad_shape.dim_size(2)};
+    const int64 grad_channels = grad_shape.dim_size(3);
+    OP_REQUIRES(ctx, batch == grad_batch,
+                errors::InvalidArgument(
+                    "activations and gradients must have the same batch size (",
+                    batch, " vs. ", grad_batch, ")"));
+    OP_REQUIRES(ctx, grad_size[0] > 0 && grad_size[1] > 0,
+                errors::InvalidArgument("gradient size must be positive, got [",
+                                        grad_size[0], ",", grad_size[1], "]"));
+    OP_REQUIRES(
+        ctx, channels == grad_channels,
+        errors::InvalidArgument(
+            "activations and gradients must have the same number of channels (",
+            channels, " vs. ", grad_channels, ")"));
+
+    const int num_spatial_dims = 2;
+
+    xla::ComputationDataHandle grad = ctx->Input(0);
+
+    xla::ComputationDataHandle output = grad;
+    while (in_size != grad_size) {
+      if (in_size[0] != 1 && in_size[1] != 1) {
+        std::vector<float> k = {
+            (static_cast<float>(grad_size[0]) - 1) / ((in_size[0] - 1) * 2),
+            (static_cast<float>(grad_size[1]) - 1) / ((in_size[1] - 1) * 2)};
+        if ((k[0] == std::floor(k[0])) && (k[1] == std::floor(k[1])) &&
+            k[0] > 1 && k[1] > 1) {
+          std::vector<int64> next_grad_size = {(in_size[0] - 1) * 2 + 1,
+                                               (in_size[1] - 1) * 2 + 1};
+          output = ResizeUsingDilationAndConvolutionGradOp(
+              b, grad, num_spatial_dims, in_size, next_grad_size, channels);
+          grad = output;
+          in_size = next_grad_size;
+        } else {
+          output = ResizeUsingDilationAndConvolutionGradOp(
+              b, grad, num_spatial_dims, in_size, grad_size, channels);
+          in_size = grad_size;
+        }
+      } else {
+        output = ResizeUsingDilationAndConvolutionGradOp(
+            b, grad, num_spatial_dims, in_size, grad_size, channels);
+        in_size = grad_size;
+      }
+    }
+
+    output = b->ConvertElementType(output, output_type_);
+    ctx->SetOutput(0, output);
+  }
+
+ private:
+  bool align_corners_;
+  xla::PrimitiveType output_type_;
+};
+
+REGISTER_XLA_OP(Name("ResizeBilinearGrad"), ResizeBilinearGradOp);
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/index_ops.cc b/tensorflow/compiler/tf2xla/kernels/index_ops.cc
index e0dc1870f2a4934c35163f0cc10196e8fcbed9be..7bf4b435f526afa93d8a218b191928acb932cd6b 100644
--- a/tensorflow/compiler/tf2xla/kernels/index_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/index_ops.cc
@@ -80,7 +80,10 @@ void XlaArgMinMaxOp::Compile(XlaOpKernelContext* ctx) {
 
 XlaArgMaxOp::XlaArgMaxOp(OpKernelConstruction* ctx)
     : XlaArgMinMaxOp(ctx, /*is_min=*/false) {}
-REGISTER_XLA_OP(Name("ArgMax").Device(DEVICE_GPU_XLA_JIT), XlaArgMaxOp);
+REGISTER_XLA_OP(Name("ArgMax")
+                    .Device(DEVICE_GPU_XLA_JIT)
+                    .CompileTimeConstInput("dimension"),
+                XlaArgMaxOp);
 
 namespace {
 
@@ -90,7 +93,7 @@ class XlaArgMinOp : public XlaArgMinMaxOp {
 };
 XlaArgMinOp::XlaArgMinOp(OpKernelConstruction* ctx)
     : XlaArgMinMaxOp(ctx, /*is_min=*/true) {}
-REGISTER_XLA_OP(Name("ArgMin"), XlaArgMinOp);
+REGISTER_XLA_OP(Name("ArgMin").CompileTimeConstInput("dimension"), XlaArgMinOp);
 
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc b/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc
index 20946e247a9459d7c8a0d8a666fef24bd32838f2..b1f3c3c298ce0cadf38b9bda715761fe7e2896d7 100644
--- a/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc
+++ b/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc
@@ -56,10 +56,10 @@ class ArgMaxCustomCallOp : public XlaOpKernel {
         errors::InvalidArgument("dim must be < input rank (",
                                 input_shape.dims(), "), but got: ", dim));
     const int64 dim_size = input_shape.dim_size(dim);
-    OP_REQUIRES(
-        ctx, dim_size > 0,
-        errors::InvalidArgument("Reduction axis ", dim, " is empty in shape: ",
-                                input_shape.DebugString()));
+    OP_REQUIRES(ctx, dim_size > 0,
+                errors::InvalidArgument(
+                    "Reduction axis ", dim,
+                    " is empty in shape: ", input_shape.DebugString()));
 
     // The output shape is the input shape contracted along dim.
     TensorShape output_shape;
@@ -113,9 +113,11 @@ class ArgMaxCustomCallOp : public XlaOpKernel {
   TF_DISALLOW_COPY_AND_ASSIGN(ArgMaxCustomCallOp);
 };
 
-REGISTER_XLA_OP(
-    Name("ArgMax").TypeConstraint("T", DT_FLOAT).Device(DEVICE_CPU_XLA_JIT),
-    ArgMaxCustomCallOp);
+REGISTER_XLA_OP(Name("ArgMax")
+                    .TypeConstraint("T", DT_FLOAT)
+                    .Device(DEVICE_CPU_XLA_JIT)
+                    .CompileTimeConstInput("dimension"),
+                ArgMaxCustomCallOp);
 
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/matmul_op.cc b/tensorflow/compiler/tf2xla/kernels/matmul_op.cc
index fcef497e5845d9080bc83b54e92dcf2fdecf5f12..886baf8115243a22b7255a3961c914d4cf6c2ed5 100644
--- a/tensorflow/compiler/tf2xla/kernels/matmul_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/matmul_op.cc
@@ -23,16 +23,18 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
-constexpr std::array<DataType, 4> kMatmulTypes = {
-    {DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64}};
+constexpr std::array<DataType, 5> kMatmulTypes = {
+    {DT_HALF, DT_BFLOAT16, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64}};
 
 class MatMulOp : public XlaOpKernel {
  public:
   explicit MatMulOp(OpKernelConstruction* ctx, bool is_sparse = false)
-      : XlaOpKernel(ctx) {
+      : XlaOpKernel(ctx), is_sparse_(is_sparse) {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("transpose_a", &transpose_a_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("transpose_b", &transpose_b_));
     if (is_sparse) {
+      OP_REQUIRES_OK(ctx, ctx->GetAttr("Ta", &a_type_));
+      OP_REQUIRES_OK(ctx, ctx->GetAttr("Tb", &b_type_));
       // SparseMatMul is actually dense matmul with a hint that one or
       // both of the inputs may contain a lot of zeroes. On CPU these
       // inputs are dynamically converted to sparse representation
@@ -66,14 +68,25 @@ class MatMulOp : public XlaOpKernel {
 
     xla::ComputationDataHandle a = ctx->Input(0);
     xla::ComputationDataHandle b = ctx->Input(1);
+    if (is_sparse_) {
+      if (a_type_ == DT_BFLOAT16) {
+        a = ctx->builder()->ConvertElementType(a, xla::F32);
+      }
+      if (b_type_ == DT_BFLOAT16) {
+        b = ctx->builder()->ConvertElementType(b, xla::F32);
+      }
+    }
     auto lhs = (transpose_a_) ? ctx->builder()->Transpose(a, {1, 0}) : a;
     auto rhs = (transpose_b_) ? ctx->builder()->Transpose(b, {1, 0}) : b;
     ctx->SetOutput(0, ctx->builder()->Dot(lhs, rhs));
   }
 
  private:
+  bool is_sparse_;
   bool transpose_a_;
   bool transpose_b_;
+  DataType a_type_;
+  DataType b_type_;
 };
 
 REGISTER_XLA_OP(Name("MatMul").TypeConstraint("T", kMatmulTypes), MatMulOp);
@@ -85,10 +98,7 @@ class SparseMatMulOp : public MatMulOp {
   ~SparseMatMulOp() override = default;
 };
 
-REGISTER_XLA_OP(Name("SparseMatMul")
-                    .TypeConstraint("Ta", kFloatTypes)
-                    .TypeConstraint("Tb", kFloatTypes),
-                SparseMatMulOp);
+REGISTER_XLA_OP(Name("SparseMatMul"), SparseMatMulOp);
 
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/mirror_pad_op.cc b/tensorflow/compiler/tf2xla/kernels/mirror_pad_op.cc
index bea1d1600b5b5fc0c44f0208d394f25061ecbb68..05a36a031ad73be289604da1b7e56203ff12fbf5 100644
--- a/tensorflow/compiler/tf2xla/kernels/mirror_pad_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/mirror_pad_op.cc
@@ -92,7 +92,8 @@ class MirrorPadOp : public XlaOpKernel {
   TF_DISALLOW_COPY_AND_ASSIGN(MirrorPadOp);
 };
 
-REGISTER_XLA_OP(Name("MirrorPad"), MirrorPadOp);
+REGISTER_XLA_OP(Name("MirrorPad").CompileTimeConstInput("paddings"),
+                MirrorPadOp);
 
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/one_hot_op.cc b/tensorflow/compiler/tf2xla/kernels/one_hot_op.cc
index 2a9cfcb2eb86399bd446db8d591012a7a2f3d667..9f7c9913802d311895479b914b66553e135aa426 100644
--- a/tensorflow/compiler/tf2xla/kernels/one_hot_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/one_hot_op.cc
@@ -76,7 +76,7 @@ class OneHotOp : public XlaOpKernel {
   TF_DISALLOW_COPY_AND_ASSIGN(OneHotOp);
 };
 
-REGISTER_XLA_OP(Name("OneHot"), OneHotOp);
+REGISTER_XLA_OP(Name("OneHot").CompileTimeConstInput("depth"), OneHotOp);
 
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/pad_op.cc b/tensorflow/compiler/tf2xla/kernels/pad_op.cc
index d841bd37b33c31dbc156fa824ff62a58169a99cb..791351637aee61c5fdd911dd8a48959990514395 100644
--- a/tensorflow/compiler/tf2xla/kernels/pad_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/pad_op.cc
@@ -83,8 +83,8 @@ class PadOp : public XlaOpKernel {
   }
 };
 
-REGISTER_XLA_OP(Name("Pad"), PadOp);
-REGISTER_XLA_OP(Name("PadV2"), PadOp);
+REGISTER_XLA_OP(Name("Pad").CompileTimeConstInput("paddings"), PadOp);
+REGISTER_XLA_OP(Name("PadV2").CompileTimeConstInput("paddings"), PadOp);
 
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc b/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc
index 2b6053d19dd64a0c893b3613133c8f4691f9cd27..0b5a38967aeb5b4cd66de5220e2c764371440c2d 100644
--- a/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc
@@ -455,14 +455,16 @@ class AvgPool2DGradOp : public AvgPoolGradOp {
                 errors::InvalidArgument("Invalid data format"));
   }
 };
-REGISTER_XLA_OP(Name("AvgPoolGrad"), AvgPool2DGradOp);
+REGISTER_XLA_OP(Name("AvgPoolGrad").CompileTimeConstInput("orig_input_shape"),
+                AvgPool2DGradOp);
 
 class AvgPool3DGradOp : public AvgPoolGradOp {
  public:
   explicit AvgPool3DGradOp(OpKernelConstruction* ctx)
       : AvgPoolGradOp(ctx, /*num_spatial_dims=*/3) {}
 };
-REGISTER_XLA_OP(Name("AvgPool3DGrad"), AvgPool3DGradOp);
+REGISTER_XLA_OP(Name("AvgPool3DGrad").CompileTimeConstInput("orig_input_shape"),
+                AvgPool3DGradOp);
 
 }  // anonymous namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/random_ops.cc b/tensorflow/compiler/tf2xla/kernels/random_ops.cc
index 2421825ead17a3acee9f145f00904d382fb656f4..c0994c434bca5174eaee7b9e63e10432d9c2ed8d 100644
--- a/tensorflow/compiler/tf2xla/kernels/random_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/random_ops.cc
@@ -52,7 +52,8 @@ class RandomUniformOp : public XlaOpKernel {
   TF_DISALLOW_COPY_AND_ASSIGN(RandomUniformOp);
 };
 
-REGISTER_XLA_OP(Name("RandomUniform"), RandomUniformOp);
+REGISTER_XLA_OP(Name("RandomUniform").CompileTimeConstInput("shape"),
+                RandomUniformOp);
 
 class RandomUniformIntOp : public XlaOpKernel {
  public:
@@ -83,7 +84,8 @@ class RandomUniformIntOp : public XlaOpKernel {
   TF_DISALLOW_COPY_AND_ASSIGN(RandomUniformIntOp);
 };
 
-REGISTER_XLA_OP(Name("RandomUniformInt"), RandomUniformIntOp);
+REGISTER_XLA_OP(Name("RandomUniformInt").CompileTimeConstInput("shape"),
+                RandomUniformIntOp);
 
 class RandomStandardNormalOp : public XlaOpKernel {
  public:
@@ -111,7 +113,8 @@ class RandomStandardNormalOp : public XlaOpKernel {
   TF_DISALLOW_COPY_AND_ASSIGN(RandomStandardNormalOp);
 };
 
-REGISTER_XLA_OP(Name("RandomStandardNormal"), RandomStandardNormalOp);
+REGISTER_XLA_OP(Name("RandomStandardNormal").CompileTimeConstInput("shape"),
+                RandomStandardNormalOp);
 
 class TruncatedNormalOp : public XlaOpKernel {
  public:
@@ -183,7 +186,8 @@ class TruncatedNormalOp : public XlaOpKernel {
   }
 };
 
-REGISTER_XLA_OP(Name("TruncatedNormal"), TruncatedNormalOp);
+REGISTER_XLA_OP(Name("TruncatedNormal").CompileTimeConstInput("shape"),
+                TruncatedNormalOp);
 
 }  // anonymous namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/reduction_ops.cc b/tensorflow/compiler/tf2xla/kernels/reduction_ops.cc
index 647b6274083cf8886af6c451b746416445a4a2b2..03b13b2924f4b81c1017804c91d5ffb81c44ea0b 100644
--- a/tensorflow/compiler/tf2xla/kernels/reduction_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/reduction_ops.cc
@@ -35,7 +35,7 @@ class SumOp : public XlaReductionOp {
   }
 };
 
-REGISTER_XLA_OP(Name("Sum"), SumOp);
+REGISTER_XLA_OP(Name("Sum").CompileTimeConstInput("reduction_indices"), SumOp);
 
 class ProdOp : public XlaReductionOp {
  public:
@@ -53,7 +53,8 @@ class ProdOp : public XlaReductionOp {
   }
 };
 
-REGISTER_XLA_OP(Name("Prod"), ProdOp);
+REGISTER_XLA_OP(Name("Prod").CompileTimeConstInput("reduction_indices"),
+                ProdOp);
 
 class MinOp : public XlaReductionOp {
  public:
@@ -73,7 +74,7 @@ class MinOp : public XlaReductionOp {
   }
 };
 
-REGISTER_XLA_OP(Name("Min"), MinOp);
+REGISTER_XLA_OP(Name("Min").CompileTimeConstInput("reduction_indices"), MinOp);
 
 class MaxOp : public XlaReductionOp {
  public:
@@ -93,7 +94,7 @@ class MaxOp : public XlaReductionOp {
   }
 };
 
-REGISTER_XLA_OP(Name("Max"), MaxOp);
+REGISTER_XLA_OP(Name("Max").CompileTimeConstInput("reduction_indices"), MaxOp);
 
 class MeanOp : public XlaReductionOp {
  public:
@@ -115,7 +116,8 @@ class MeanOp : public XlaReductionOp {
   }
 };
 
-REGISTER_XLA_OP(Name("Mean"), MeanOp);
+REGISTER_XLA_OP(Name("Mean").CompileTimeConstInput("reduction_indices"),
+                MeanOp);
 
 class AllOp : public XlaReductionOp {
  public:
@@ -133,7 +135,7 @@ class AllOp : public XlaReductionOp {
   }
 };
 
-REGISTER_XLA_OP(Name("All"), AllOp);
+REGISTER_XLA_OP(Name("All").CompileTimeConstInput("reduction_indices"), AllOp);
 
 class AnyOp : public XlaReductionOp {
  public:
@@ -151,7 +153,7 @@ class AnyOp : public XlaReductionOp {
   }
 };
 
-REGISTER_XLA_OP(Name("Any"), AnyOp);
+REGISTER_XLA_OP(Name("Any").CompileTimeConstInput("reduction_indices"), AnyOp);
 
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/reshape_op.cc b/tensorflow/compiler/tf2xla/kernels/reshape_op.cc
index 5952e752724d1e6953dd4dbb6a8099b847c64d08..af4d64b159c09ed7e01017f25a2b23e58542dc3c 100644
--- a/tensorflow/compiler/tf2xla/kernels/reshape_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/reshape_op.cc
@@ -95,7 +95,7 @@ class ReshapeOp : public XlaOpKernel {
   }
 };
 
-REGISTER_XLA_OP(Name("Reshape"), ReshapeOp);
+REGISTER_XLA_OP(Name("Reshape").CompileTimeConstInput("shape"), ReshapeOp);
 
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/reverse_op.cc b/tensorflow/compiler/tf2xla/kernels/reverse_op.cc
index 7489321f72f50c8f55f8da9dabb9f4b5c7797195..e51d386926763ecbb5a943dfb6f872e78901dc69 100644
--- a/tensorflow/compiler/tf2xla/kernels/reverse_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/reverse_op.cc
@@ -16,7 +16,6 @@ limitations under the License.
 // XLA-specific reverse Op.
 
 #include "tensorflow/compiler/tf2xla/type_util.h"
-#include "tensorflow/compiler/tf2xla/xla_compilation_device.h"
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
@@ -53,7 +52,8 @@ class ReverseOp : public XlaOpKernel {
     xla::Literal lax;
     OP_REQUIRES_OK(ctx, ctx->ConstantInputReshaped(1, {x_shape.dims()}, &lax));
     std::vector<bool> revdims(x_shape.dims());
-    std::copy(lax.preds().begin(), lax.preds().end(), revdims.begin());
+    std::copy(lax.data<bool>().begin(), lax.data<bool>().end(),
+              revdims.begin());
     std::vector<int64> dimensions;
 
     for (int d = 0; d < x_shape.dims(); ++d) {
@@ -66,7 +66,7 @@ class ReverseOp : public XlaOpKernel {
   }
 };
 
-REGISTER_XLA_OP(Name("Reverse"), ReverseOp);
+REGISTER_XLA_OP(Name("Reverse").CompileTimeConstInput("dims"), ReverseOp);
 
 class ReverseV2Op : public XlaOpKernel {
  public:
@@ -104,7 +104,7 @@ class ReverseV2Op : public XlaOpKernel {
   }
 };
 
-REGISTER_XLA_OP(Name("ReverseV2"), ReverseV2Op);
+REGISTER_XLA_OP(Name("ReverseV2").CompileTimeConstInput("axis"), ReverseV2Op);
 
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/scan_ops.cc b/tensorflow/compiler/tf2xla/kernels/scan_ops.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ee4a94164c4a43828eb4feedbfa9d1a9e231ef8f
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/kernels/scan_ops.cc
@@ -0,0 +1,147 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <vector>
+
+#include "tensorflow/compiler/tf2xla/shape_util.h"
+#include "tensorflow/compiler/tf2xla/type_util.h"
+#include "tensorflow/compiler/tf2xla/xla_helpers.h"
+#include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
+#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/partial_tensor_shape.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/bounds_check.h"
+#include "tensorflow/core/kernels/concat_lib.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+namespace {
+
+// TODO(phawkins): implement double-sized windowed reductions in XLA and remove
+// the type constraint.
+constexpr std::array<DataType, 3> kScanOpTypes = {
+    {DT_HALF, DT_BFLOAT16, DT_FLOAT}};
+
+class ScanOp : public XlaOpKernel {
+ public:
+  ScanOp(OpKernelConstruction* ctx, bool sum) : XlaOpKernel(ctx), sum_(sum) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("reverse", &reverse_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("exclusive", &exclusive_));
+  }
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    const TensorShape input_shape = ctx->InputShape(0);
+    const TensorShape tensor_axis_shape = ctx->InputShape(1);
+
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(tensor_axis_shape),
+                errors::InvalidArgument("ScanOp: axis must be a scalar, not ",
+                                        tensor_axis_shape.DebugString()));
+
+    int64 axis;
+    OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntScalar(1, &axis));
+    if (axis < 0) {
+      axis += input_shape.dims();
+    }
+    OP_REQUIRES(
+        ctx, FastBoundsCheck(axis, input_shape.dims()),
+        errors::InvalidArgument("ScanOp: Expected scan axis in the range [",
+                                -input_shape.dims(), ", ", input_shape.dims(),
+                                "), but got ", axis));
+
+    DataType dtype = ctx->input_type(0);
+
+    if (input_shape.num_elements() == 0) {
+      // Exit early if there is nothing to compute.
+      ctx->SetOutput(0, ctx->Input(0));
+      return;
+    }
+
+    xla::ComputationBuilder* builder = ctx->builder();
+
+    std::vector<int64> window_strides(input_shape.dims(), 1);
+    std::vector<int64> window_dims(input_shape.dims(), 1);
+    window_dims[axis] = input_shape.dim_size(axis);
+
+    std::vector<std::pair<int64, int64>> padding(input_shape.dims(), {0, 0});
+    padding[axis].first = input_shape.dim_size(axis) - 1;
+    // In exclusive mode, add an extra padding element so there is a complete
+    // window of padding before the data starts.
+    if (exclusive_) {
+      ++padding[axis].first;
+    }
+    if (reverse_) {
+      std::swap(padding[axis].first, padding[axis].second);
+    }
+
+    xla::ComputationDataHandle input = ctx->Input(0);
+    xla::ComputationDataHandle init;
+    const xla::Computation* reducer;
+    if (sum_) {
+      init = XlaHelpers::Zero(builder, dtype);
+      reducer = ctx->GetOrCreateAdd(dtype);
+    } else {
+      init = XlaHelpers::One(builder, dtype);
+      reducer = ctx->GetOrCreateMul(dtype);
+    }
+    auto output = builder->ReduceWindowWithGeneralPadding(
+        ctx->Input(0), init, *reducer, window_dims, window_strides, padding);
+
+    // In exclusive mode, we have computed an extra element containing the sum
+    // of all the input elements. Slice off this extra "last" element.
+    if (exclusive_) {
+      if (reverse_) {
+        output = builder->SliceInDim(output, 1, input_shape.dim_size(axis) + 1,
+                                     1, axis);
+
+      } else {
+        output =
+            builder->SliceInDim(output, 0, input_shape.dim_size(axis), 1, axis);
+      }
+    }
+    ctx->SetOutput(0, output);
+  }
+
+ private:
+  const bool sum_;  // True=cumulative sum. False=cumulative product.
+  bool reverse_;
+  bool exclusive_;
+};
+
+class CumsumOp : public ScanOp {
+ public:
+  explicit CumsumOp(OpKernelConstruction* ctx) : ScanOp(ctx, /*sum=*/true) {}
+};
+REGISTER_XLA_OP(Name("Cumsum")
+                    .TypeConstraint("T", kScanOpTypes)
+                    .CompileTimeConstInput("axis"),
+                CumsumOp);
+
+class CumprodOp : public ScanOp {
+ public:
+  explicit CumprodOp(OpKernelConstruction* ctx) : ScanOp(ctx, /*sum=*/false) {}
+};
+REGISTER_XLA_OP(Name("Cumprod")
+                    .TypeConstraint("T", kScanOpTypes)
+                    .CompileTimeConstInput("axis"),
+                CumprodOp);
+
+}  // anonymous namespace
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/segment_reduction_ops.cc b/tensorflow/compiler/tf2xla/kernels/segment_reduction_ops.cc
index 8a67c0b67fcd95f4841c5e011a4e51638eea5b0f..c220edd588071ef262621784015d34cd475b2918 100644
--- a/tensorflow/compiler/tf2xla/kernels/segment_reduction_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/segment_reduction_ops.cc
@@ -43,24 +43,11 @@ xla::ComputationDataHandle XlaComputeScatterAddDynamicSlice(
   TensorShape out_shape(flat_shape);
   out_shape.set_dim(0, num_segments);
 
-  // TODO(b/37575001) The tensor in which we construct the output during
-  // the loop must have rank >= 3 as a workaround for lowering issues.
-  int64 extra_dims = 0;
-  if (out_shape.dims() < 3) {
-    extra_dims = 3 - out_shape.dims();
-  }
-  TensorShape loop_out_shape;
-  for (int64 k = 0; k < extra_dims; ++k) {
-    loop_out_shape.AddDim(1);
-  }
-  loop_out_shape.AppendShape(out_shape);
-
   // Slices from the input data are same shape as the input data, except dim 0.
   TensorShape slice_shape(flat_shape);
   slice_shape.set_dim(0, 1);
-  // slices are reshaped into the rank >= 3 shape of the loop-carried output
-  TensorShape loop_out_slice_shape(loop_out_shape);
-  loop_out_slice_shape.set_dim(extra_dims, 1);
+  TensorShape loop_out_slice_shape(out_shape);
+  loop_out_slice_shape.set_dim(0, 1);
 
   // Construct the initial values of the loop-carried variables
   // Flatten the indices into 1-D for ease of iteration.
@@ -70,7 +57,7 @@ xla::ComputationDataHandle XlaComputeScatterAddDynamicSlice(
 
   auto init_i = builder->ConstantR0<int32>(0);
   auto init_out = builder->Broadcast(XlaHelpers::Zero(builder, dtype),
-                                     loop_out_shape.dim_sizes());
+                                     out_shape.dim_sizes());
 
   xla::PrimitiveType ptype;
   TF_CHECK_OK(DataTypeToPrimitiveType(dtype, &ptype));
@@ -83,7 +70,7 @@ xla::ComputationDataHandle XlaComputeScatterAddDynamicSlice(
        // The scatter indices tensor is loop invariant.
        xla::ShapeUtil::MakeShape(xla::S32, {indices_shape.num_elements()}),
        // The output data array is updated each loop iteration.
-       xla::ShapeUtil::MakeShape(ptype, loop_out_shape.dim_sizes())});
+       xla::ShapeUtil::MakeShape(ptype, out_shape.dim_sizes())});
   xla::Shape tuple_shape = xla::ShapeUtil::MakeTupleShape(tuple_shapes);
 
   auto init = builder->Tuple({init_i, data_flat, indices_1d, init_out});
@@ -95,7 +82,6 @@ xla::ComputationDataHandle XlaComputeScatterAddDynamicSlice(
                condb.Parameter(0, tuple_shape, "ScatterAddWhileTuple"), 0),
            condb.ConstantR0<int32>(indices_shape.num_elements()));
   auto cond_status = condb.Build();
-  // TF_CHECK_OK(cond_status);
   auto cond = cond_status.ConsumeValueOrDie();
 
   // Construct the while loop body's function. The implementation of scatter is:
@@ -123,11 +109,9 @@ xla::ComputationDataHandle XlaComputeScatterAddDynamicSlice(
                       loop_out_slice_shape.dim_sizes());
 
     // Index into the output array.
-    // Construct the index into the R3+ output array 0, ..., <index>, 0, ...
-    std::vector<xla::ComputationDataHandle> out_index_vals(
-        loop_out_shape.dims(), zero);
-    out_index_vals[extra_dims] =
-        bodyb.DynamicSlice(idcs, bodyb.Reshape(i, {1}), {1});
+    std::vector<xla::ComputationDataHandle> out_index_vals(out_shape.dims(),
+                                                           zero);
+    out_index_vals[0] = bodyb.DynamicSlice(idcs, bodyb.Reshape(i, {1}), {1});
     auto out_index = bodyb.ConcatInDim(out_index_vals, 0);
 
     // Slice the output array, update value, and update the output slice.
@@ -142,12 +126,10 @@ xla::ComputationDataHandle XlaComputeScatterAddDynamicSlice(
     bodyb.Tuple({ip1, data, idcs, updated_output});
   }
   auto body_status = bodyb.Build();
-  // TF_CHECK_OK(body_status);
   auto body = body_status.ConsumeValueOrDie();
 
   auto gather_while = builder->While(cond, body, init);
-  auto updated_output = builder->GetTupleElement(gather_while, 3);
-  return builder->Reshape(updated_output, out_shape.dim_sizes());
+  return builder->GetTupleElement(gather_while, 3);
 }
 
 namespace {
diff --git a/tensorflow/compiler/tf2xla/kernels/sequence_ops.cc b/tensorflow/compiler/tf2xla/kernels/sequence_ops.cc
index c2b0e1bb4c1a141d0ab3f5b3ff5397d9da620bd8..2c31f8d90891924f6f86a54ccf548de4df87f3bd 100644
--- a/tensorflow/compiler/tf2xla/kernels/sequence_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/sequence_ops.cc
@@ -138,7 +138,11 @@ class RangeOp : public XlaOpKernel {
   }
 };
 
-REGISTER_XLA_OP(Name("Range"), RangeOp);
+REGISTER_XLA_OP(Name("Range")
+                    .CompileTimeConstInput("start")
+                    .CompileTimeConstInput("limit")
+                    .CompileTimeConstInput("delta"),
+                RangeOp);
 
 class LinSpaceOp : public XlaOpKernel {
  public:
@@ -207,7 +211,11 @@ class LinSpaceOp : public XlaOpKernel {
   }
 };
 
-REGISTER_XLA_OP(Name("LinSpace"), LinSpaceOp);
+REGISTER_XLA_OP(Name("LinSpace")
+                    .CompileTimeConstInput("start")
+                    .CompileTimeConstInput("stop")
+                    .CompileTimeConstInput("num"),
+                LinSpaceOp);
 
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/shape_op.cc b/tensorflow/compiler/tf2xla/kernels/shape_op.cc
index 24a99f253d6dc8bb699fff587c363b12c227e821..05354bca5bb089703fdcceb6f44648bbb98d004b 100644
--- a/tensorflow/compiler/tf2xla/kernels/shape_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/shape_op.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 // XLA-specific Shape Ops.
 
+#include "tensorflow/compiler/tf2xla/kernels/shape_util.h"
 #include "tensorflow/compiler/tf2xla/type_util.h"
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
@@ -27,56 +28,42 @@ namespace {
 
 class ShapeOp : public XlaOpKernel {
  public:
-  explicit ShapeOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {}
+  explicit ShapeOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("out_type", &out_dtype_));
+  }
 
   void Compile(XlaOpKernelContext* ctx) override {
     const TensorShape input_shape = ctx->InputShape(0);
-    const int rank = input_shape.dims();
-    Tensor shape_constant(DT_INT32, TensorShape({rank}));
-    auto vec = shape_constant.vec<int32>();
-    // TODO(dga): support int64.  b/28119922.
-    for (int i = 0; i < rank; ++i) {
-      int64 dim_size = input_shape.dim_size(i);
-      OP_REQUIRES(
-          ctx, FastBoundsCheck(dim_size, std::numeric_limits<int32>::max()),
-          errors::InvalidArgument("Shape does not support tensors > int32max",
-                                  " but dim ", i, " is ", dim_size));
-      vec(i) = static_cast<int32>(dim_size);
-    }
-
+    Tensor shape_constant(out_dtype_, TensorShape({input_shape.dims()}));
+    OP_REQUIRES_OK(ctx, TensorShapeToConstant(input_shape, &shape_constant));
     ctx->SetConstantOutput(0, shape_constant);
   }
+
+ private:
+  DataType out_dtype_;
 };
 
 REGISTER_XLA_OP(Name("Shape"), ShapeOp);
 
 class ShapeNOp : public XlaOpKernel {
  public:
-  explicit ShapeNOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {}
+  explicit ShapeNOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("out_type", &out_dtype_));
+  }
 
   void Compile(XlaOpKernelContext* ctx) override {
     for (int i = 0; i < ctx->num_inputs(); ++i) {
-      const TensorShape shape = ctx->InputShape(i);
-      const int dims = shape.dims();
-      Tensor shape_constant(DT_INT32, TensorShape({dims}));
-      auto vec = shape_constant.vec<int32>();
-
-      // TODO(dga): support int64.  b/28119922.
-      for (int j = 0; j < dims; ++j) {
-        int64 dim_size = shape.dim_size(j);
-        OP_REQUIRES(
-            ctx, FastBoundsCheck(dim_size, std::numeric_limits<int32>::max()),
-            errors::InvalidArgument("Shape does not support tensors > int32max",
-                                    " but shape ", i, " dim ", j, " is ",
-                                    dim_size));
-        vec(j) = static_cast<int32>(dim_size);
-      }
-
+      const TensorShape input_shape = ctx->InputShape(i);
+      Tensor shape_constant(out_dtype_, TensorShape({input_shape.dims()}));
+      OP_REQUIRES_OK(ctx, TensorShapeToConstant(input_shape, &shape_constant));
       ctx->SetConstantOutput(i, shape_constant);
     }
   }
 
   bool IsExpensive() override { return false; }
+
+ private:
+  DataType out_dtype_;
 };
 REGISTER_XLA_OP(Name("ShapeN"), ShapeNOp);
 
@@ -134,7 +121,7 @@ class ExpandDimsOp : public XlaOpKernel {
     xla::Literal literal;
     OP_REQUIRES_OK(ctx, ctx->ConstantInputReshaped(1, {1}, &literal));
 
-    int dim = literal.s32s(0);
+    int dim = literal.data<int32>()[0];
 
     OP_REQUIRES(ctx,
                 (dim >= -1 - input_shape.dims() && dim <= input_shape.dims()),
@@ -163,7 +150,7 @@ class ExpandDimsOp : public XlaOpKernel {
     ctx->SetOutput(0, ctx->builder()->Reshape(ctx->Input(0), new_shape));
   }
 };
-REGISTER_XLA_OP(Name("ExpandDims"), ExpandDimsOp);
+REGISTER_XLA_OP(Name("ExpandDims").CompileTimeConstInput("dim"), ExpandDimsOp);
 
 class SqueezeOp : public XlaOpKernel {
  public:
diff --git a/tensorflow/compiler/tf2xla/kernels/shape_util.cc b/tensorflow/compiler/tf2xla/kernels/shape_util.cc
new file mode 100644
index 0000000000000000000000000000000000000000..76ea5f525598f511f295eb5a30f3cf603fbf57aa
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/kernels/shape_util.cc
@@ -0,0 +1,48 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/tf2xla/kernels/shape_util.h"
+
+#include <limits>
+
+#include "tensorflow/core/kernels/bounds_check.h"
+
+namespace tensorflow {
+
+Status TensorShapeToConstant(const TensorShape& input_shape,
+                             Tensor* shape_constant) {
+  const int dims = input_shape.dims();
+  if (shape_constant->dtype() == DT_INT32) {
+    auto vec = shape_constant->vec<int32>();
+    for (int i = 0; i < dims; ++i) {
+      int64 dim_size = input_shape.dim_size(i);
+      if (!FastBoundsCheck(dim_size, std::numeric_limits<int32>::max())) {
+        return errors::InvalidArgument(
+            "Shape with out_type=int32 does not support tensors > int32max",
+            " but dim ", i, " is ", dim_size);
+      }
+      vec(i) = static_cast<int32>(dim_size);
+    }
+  } else {
+    auto vec = shape_constant->vec<int64>();
+    for (int i = 0; i < dims; ++i) {
+      int64 dim_size = input_shape.dim_size(i);
+      vec(i) = dim_size;
+    }
+  }
+  return Status::OK();
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/shape_util.h b/tensorflow/compiler/tf2xla/kernels/shape_util.h
new file mode 100644
index 0000000000000000000000000000000000000000..575086e118080f6799a54d3ae6409b2b641c4341
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/kernels/shape_util.h
@@ -0,0 +1,34 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_TF2XLA_KERNELS_SHAPE_UTIL_H_
+#define TENSORFLOW_COMPILER_TF2XLA_KERNELS_SHAPE_UTIL_H_
+
+#include <limits>
+
+#include "tensorflow/core/framework/tensor.h"
+
+namespace tensorflow {
+
+// Converts a TensorShape to a constant Tensor.
+//
+// The input TensorShape input_shape is used to populate the elements of
+// shape_constant, which is modified in place.
+Status TensorShapeToConstant(const TensorShape& input_shape,
+                             Tensor* shape_constant);
+
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_COMPILER_TF2XLA_KERNELS_SHAPE_UTIL_H_
diff --git a/tensorflow/compiler/tf2xla/kernels/slice_op.cc b/tensorflow/compiler/tf2xla/kernels/slice_op.cc
index fbe8c78d8fb5f800967942555531a50937cad0ca..be1e97bf26fa4cde1b741c8d0b843a85ce33a59c 100644
--- a/tensorflow/compiler/tf2xla/kernels/slice_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/slice_op.cc
@@ -112,7 +112,9 @@ class SliceOp : public XlaOpKernel {
   }
 };
 
-REGISTER_XLA_OP(Name("Slice"), SliceOp);
+REGISTER_XLA_OP(
+    Name("Slice").CompileTimeConstInput("begin").CompileTimeConstInput("size"),
+    SliceOp);
 
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/spacetobatch_op.cc b/tensorflow/compiler/tf2xla/kernels/spacetobatch_op.cc
index 83a87f19a718ce86a105e3c33ab9eaf0faff3a76..01b46e160d1f1f10a43faf7ca35afb42dfde6e33 100644
--- a/tensorflow/compiler/tf2xla/kernels/spacetobatch_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/spacetobatch_op.cc
@@ -162,7 +162,10 @@ class SpaceToBatchNDOp : public XlaOpKernel {
                  block_shape, paddings);
   }
 };
-REGISTER_XLA_OP(Name("SpaceToBatchND"), SpaceToBatchNDOp);
+REGISTER_XLA_OP(Name("SpaceToBatchND")
+                    .CompileTimeConstInput("paddings")
+                    .CompileTimeConstInput("block_shape"),
+                SpaceToBatchNDOp);
 
 class SpaceToBatchOp : public XlaOpKernel {
  public:
@@ -184,7 +187,8 @@ class SpaceToBatchOp : public XlaOpKernel {
  private:
   int block_size_;
 };
-REGISTER_XLA_OP(Name("SpaceToBatch"), SpaceToBatchOp);
+REGISTER_XLA_OP(Name("SpaceToBatch").CompileTimeConstInput("paddings"),
+                SpaceToBatchOp);
 
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/spacetodepth_op.cc b/tensorflow/compiler/tf2xla/kernels/spacetodepth_op.cc
index 89befda346ec06fec23ab1d1c9d910ded8cd806d..806fda632cde64c1b37ae3b9199028d6b6b0a215 100644
--- a/tensorflow/compiler/tf2xla/kernels/spacetodepth_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/spacetodepth_op.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/core/util/tensor_format.h"
 
 namespace tensorflow {
 namespace {
@@ -23,6 +24,16 @@ namespace {
 class SpaceToDepthOp : public XlaOpKernel {
  public:
   explicit SpaceToDepthOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    string data_format_str;
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("data_format", &data_format_str));
+    OP_REQUIRES(ctx, FormatFromString(data_format_str, &data_format_),
+                errors::InvalidArgument("Invalid data format"));
+
+    OP_REQUIRES(ctx, data_format_ == FORMAT_NCHW || data_format_ == FORMAT_NHWC,
+                errors::InvalidArgument("Unsupported data format ",
+                                        ToString(data_format_),
+                                        "; expected formats NHWC or NCHW"));
+
     OP_REQUIRES_OK(ctx, ctx->GetAttr("block_size", &block_size_));
     OP_REQUIRES(
         ctx, block_size_ > 1,
@@ -31,34 +42,100 @@ class SpaceToDepthOp : public XlaOpKernel {
 
   void Compile(XlaOpKernelContext* ctx) override {
     const TensorShape input_tensor_shape = ctx->InputShape(0);
-    // The input is presumed to be [batch, height, width, depth]
     int input_rank = input_tensor_shape.dims();
     static const int kRequiredDims = 4;
     OP_REQUIRES(ctx, kRequiredDims == input_rank,
-                errors::InvalidArgument("Input rank should be: ", kRequiredDims,
-                                        " instead of: ", input_rank));
+                errors::InvalidArgument("Input rank should be ", kRequiredDims,
+                                        "; got ", input_rank));
     const gtl::InlinedVector<int64, 4> input_shape =
         input_tensor_shape.dim_sizes();
 
     xla::ComputationBuilder* b = ctx->builder();
     xla::ComputationDataHandle input = ctx->Input(0);
 
+    int feature_dim = GetTensorFeatureDimIndex(input_rank, data_format_);
+    int num_spatial_dims = GetTensorSpatialDims(input_rank, data_format_);
+
+    std::vector<int64> reshaped_shape;
+    std::vector<int64> transpose_order;
+    std::vector<int64> output_shape;
+    reshaped_shape.reserve(input_rank);
+    transpose_order.reserve(input_rank);
+    output_shape.reserve(input_rank);
+    if (data_format_ == FORMAT_NHWC) {
+      int64 block_elems = 1;
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        OP_REQUIRES(ctx, input_shape[1 + i] % block_size_ == 0,
+                    errors::InvalidArgument(
+                        "input shape[", 1 + i, "]=", input_shape[1 + i],
+                        " is not divisible by block_size=", block_size_));
+        block_elems *= block_size_;
+      }
+
+      reshaped_shape.push_back(input_shape[0]);
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        reshaped_shape.push_back(input_shape[1 + i] / block_size_);
+        reshaped_shape.push_back(block_size_);
+      }
+      reshaped_shape.push_back(input_shape[feature_dim]);
+
+      transpose_order.push_back(0);
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        transpose_order.push_back(i * 2 + 1);
+      }
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        transpose_order.push_back(i * 2 + 2);
+      }
+      transpose_order.push_back(feature_dim + num_spatial_dims);
+
+      output_shape.push_back(input_shape[0]);
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        output_shape.push_back(input_shape[1 + i] / block_size_);
+      }
+      output_shape.push_back(input_shape[feature_dim] * block_elems);
+    } else {
+      // FORMAT_NCHW
+      int64 block_elems = 1;
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        OP_REQUIRES(ctx, input_shape[2 + i] % block_size_ == 0,
+                    errors::InvalidArgument(
+                        "input shape[", 2 + i, "]=", input_shape[2 + i],
+                        " is not divisible by block_size=", block_size_));
+        block_elems *= block_size_;
+      }
+
+      reshaped_shape.push_back(input_shape[0]);
+      reshaped_shape.push_back(input_shape[feature_dim]);
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        reshaped_shape.push_back(input_shape[2 + i] / block_size_);
+        reshaped_shape.push_back(block_size_);
+      }
+
+      transpose_order.push_back(0);
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        transpose_order.push_back(i * 2 + 3);
+      }
+      transpose_order.push_back(feature_dim);
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        transpose_order.push_back(i * 2 + 2);
+      }
+
+      output_shape.push_back(input_shape[0]);
+      output_shape.push_back(input_shape[feature_dim] * block_elems);
+      for (int i = 0; i < num_spatial_dims; ++i) {
+        output_shape.push_back(input_shape[2 + i] / block_size_);
+      }
+    }
+
+    // Note: comments are given in NHWC format; NCHW is similar with a different
+    // dimension order.
     // 1. Reshape `input` to `reshaped` of shape:
     //
     //      [batch,
     //       input_shape[1] / block_size_, block_size_,
     //       input_shape[2] / block_size_, block_size_,
     //       depth]
-    const int block_rank = 2;
-    for (int i = 0; i < block_rank; ++i) {
-      OP_REQUIRES(ctx, input_shape[1 + i] % block_size_ == 0,
-                  errors::InvalidArgument(
-                      "input shape[", 1 + i, "]=", input_shape[1 + i],
-                      " is not divisible by block_size=", block_size_));
-    }
-    xla::ComputationDataHandle reshaped = b->Reshape(
-        input, {input_shape[0], input_shape[1] / block_size_, block_size_,
-                input_shape[2] / block_size_, block_size_, input_shape[3]});
+    xla::ComputationDataHandle reshaped = b->Reshape(input, reshaped_shape);
 
     // 2. Permute dimensions of `reshaped` to produce
     //    `permuted_reshaped` of shape:
@@ -69,7 +146,7 @@ class SpaceToDepthOp : public XlaOpKernel {
     //       block_size_, block_size_,
     //       depth]
     xla::ComputationDataHandle permuted_reshaped =
-        b->Transpose(reshaped, {0, 1, 3, 2, 4, 5});
+        b->Transpose(reshaped, transpose_order);
 
     // 3. Reshape `permuted_reshaped` to flatten `block_shape` into the
     //    batch dimension, producing an output tensor of shape:
@@ -79,15 +156,14 @@ class SpaceToDepthOp : public XlaOpKernel {
     //       input_shape[2] / block_size_,
     //       block_size_ * block_size_ * depth]
     //
-    xla::ComputationDataHandle output = b->Reshape(
-        permuted_reshaped, {input_shape[0], input_shape[1] / block_size_,
-                            input_shape[2] / block_size_,
-                            block_size_ * block_size_ * input_shape[3]});
+    xla::ComputationDataHandle output =
+        b->Reshape(permuted_reshaped, output_shape);
 
     ctx->SetOutput(0, output);
   }
 
  private:
+  TensorFormat data_format_;
   int block_size_;
 };
 REGISTER_XLA_OP(Name("SpaceToDepth"), SpaceToDepthOp);
diff --git a/tensorflow/compiler/tf2xla/kernels/split_op.cc b/tensorflow/compiler/tf2xla/kernels/split_op.cc
index 795eb1794f577e0f7fd2a2068878e540ff0c1a1d..79c435c90a1f57250be90c2c2523bf3d7d231461 100644
--- a/tensorflow/compiler/tf2xla/kernels/split_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/split_op.cc
@@ -103,7 +103,7 @@ class SplitOp : public XlaOpKernel {
   }
 };
 
-REGISTER_XLA_OP(Name("Split"), SplitOp);
+REGISTER_XLA_OP(Name("Split").CompileTimeConstInput("split_dim"), SplitOp);
 
 class SplitVOp : public XlaOpKernel {
  public:
@@ -142,8 +142,9 @@ class SplitVOp : public XlaOpKernel {
     int neg_one_dim = -1;
     std::vector<int64> split_sizes_vec(num_split, -1);
     const TensorShape split_size_shape = ctx->InputShape(1);
-    OP_REQUIRES(ctx, split_size_shape.dims() == 1 &&
-                         split_size_shape.num_elements() == num_split,
+    OP_REQUIRES(ctx,
+                split_size_shape.dims() == 1 &&
+                    split_size_shape.num_elements() == num_split,
                 errors::InvalidArgument(
                     "shape of tensor describing "
                     " the output must have dimension 1 and the same "
@@ -171,10 +172,11 @@ class SplitVOp : public XlaOpKernel {
     }
 
     OP_REQUIRES(
-        ctx, (neg_one_dim == -1 &&
-              total_split_size == input_shape.dim_size(split_dim)) ||
-                 (neg_one_dim >= 0 &&
-                  total_split_size <= input_shape.dim_size(split_dim)),
+        ctx,
+        (neg_one_dim == -1 &&
+         total_split_size == input_shape.dim_size(split_dim)) ||
+            (neg_one_dim >= 0 &&
+             total_split_size <= input_shape.dim_size(split_dim)),
         errors::InvalidArgument("Determined shape must either match "
                                 "input shape along split_dim exactly if "
                                 "fully specified, or be less than the size of "
@@ -206,7 +208,10 @@ class SplitVOp : public XlaOpKernel {
   }
 };
 
-REGISTER_XLA_OP(Name("SplitV"), SplitVOp);
+REGISTER_XLA_OP(Name("SplitV")
+                    .CompileTimeConstInput("split_dim")
+                    .CompileTimeConstInput("size_splits"),
+                SplitVOp);
 
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/stack_ops.cc b/tensorflow/compiler/tf2xla/kernels/stack_ops.cc
index bb7891b31f6d52fd84cf72579c343f50473e1632..d77fb768ef4d124c403a1dc9b321c4f29571d806 100644
--- a/tensorflow/compiler/tf2xla/kernels/stack_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/stack_ops.cc
@@ -40,7 +40,7 @@ namespace {
 
 Status GetStackShape(xla::ComputationBuilder* builder, XlaResource* resource,
                      TensorShape* stack_shape) {
-  auto shape_or_status = builder->GetShape(resource->value);
+  auto shape_or_status = builder->GetShape(resource->value());
   if (!shape_or_status.ok()) {
     return shape_or_status.status();
   }
@@ -63,22 +63,24 @@ Status GetStackShape(xla::ComputationBuilder* builder, XlaResource* resource,
 Status MaybeInitializeStack(xla::ComputationBuilder* builder,
                             XlaResource* resource, DataType dtype,
                             const TensorShape& elem_shape) {
-  if (resource->type != dtype) {
+  if (resource->type() != dtype) {
     return errors::InvalidArgument(
-        "Stack dtype is ", DataTypeString(resource->type), " but op has dtype ",
-        DataTypeString(dtype), ".");
+        "Stack dtype is ", DataTypeString(resource->type()),
+        " but op has dtype ", DataTypeString(dtype), ".");
   }
 
   TensorShape stack_shape;
-  stack_shape.AddDim(resource->tensor_array_size);
+  stack_shape.AddDim(resource->tensor_array_size());
   stack_shape.AppendShape(elem_shape);
 
-  if (resource->value.handle() == 0) {
+  if (!resource->initialized()) {
     // Stack has not been initialized.
-    xla::ComputationDataHandle zero = XlaHelpers::Zero(builder, resource->type);
-    resource->value =
+    xla::ComputationDataHandle zero =
+        XlaHelpers::Zero(builder, resource->type());
+    TF_RETURN_IF_ERROR(resource->SetValue(
+        dtype,
         builder->Tuple({builder->Broadcast(zero, stack_shape.dim_sizes()),
-                        builder->ConstantR0<int32>(0)});
+                        builder->ConstantR0<int32>(0)})));
   } else {
     // Checks the expected shape matches the actual shape.
     TensorShape actual_shape;
@@ -105,7 +107,9 @@ class StackOp : public XlaOpKernel {
     OP_REQUIRES(
         ctx, size >= 0,
         errors::InvalidArgument(
-            "XLA compilation requires a fixed stack size upper bound."));
+            "XLA compilation requires a fixed stack size upper bound. If "
+            "you are using tf.while_loop, set the maximum_iterations parameter "
+            "to fix this issue."));
 
     // We defer initializing the Stack resource until we see the first push.
     // Otherwise we do not know the shape of the stack elements.
@@ -116,7 +120,7 @@ class StackOp : public XlaOpKernel {
     OP_REQUIRES_OK(
         ctx, xc.CreateResource(XlaResource::kStack, -1, std::move(name), dtype_,
                                value, &resource));
-    resource->tensor_array_size = size;
+    resource->set_tensor_array_size(size);
     ctx->SetResourceOutput(0, resource);
   }
 
@@ -127,7 +131,7 @@ class StackOp : public XlaOpKernel {
   TF_DISALLOW_COPY_AND_ASSIGN(StackOp);
 };
 
-REGISTER_XLA_OP(Name("StackV2"), StackOp);
+REGISTER_XLA_OP(Name("StackV2").CompileTimeConstInput("max_size"), StackOp);
 
 class StackPushOp : public XlaOpKernel {
  public:
@@ -145,8 +149,8 @@ class StackPushOp : public XlaOpKernel {
     // Initializes the Stack, if the element shape was not already known.
     OP_REQUIRES_OK(ctx, MaybeInitializeStack(b, resource, dtype_, elem_shape));
 
-    xla::ComputationDataHandle ta = b->GetTupleElement(resource->value, 0);
-    xla::ComputationDataHandle index = b->GetTupleElement(resource->value, 1);
+    xla::ComputationDataHandle ta = b->GetTupleElement(resource->value(), 0);
+    xla::ComputationDataHandle index = b->GetTupleElement(resource->value(), 1);
     xla::ComputationDataHandle value = ctx->Input(1);
 
     // start_indices of the DynamicUpdateSlice are [index, 0, 0, ..., 0].
@@ -160,9 +164,11 @@ class StackPushOp : public XlaOpKernel {
 
     // TODO(phawkins): We don't check the index is in bounds --- there is no
     // error mechanism in XLA.
-    resource->value =
-        b->Tuple({b->DynamicUpdateSlice(ta, update, start_indices),
-                  b->Add(index, b->ConstantR0<int32>(1))});
+    OP_REQUIRES_OK(
+        ctx,
+        resource->SetValue(
+            dtype_, b->Tuple({b->DynamicUpdateSlice(ta, update, start_indices),
+                              b->Add(index, b->ConstantR0<int32>(1))})));
 
     ctx->SetOutput(0, value);
   }
@@ -187,27 +193,22 @@ class StackPopOp : public XlaOpKernel {
     XlaResource* resource;
     OP_REQUIRES_OK(ctx, ctx->GetResourceInput(0, &resource));
 
-    OP_REQUIRES(ctx, resource->type == dtype_,
-                errors::InvalidArgument(
-                    "Stack dtype is ", DataTypeString(resource->type),
-                    " but Op requested dtype ", DataTypeString(dtype_), "."));
-
     // There is a somewhat subtle issue here: here "uninitialized" means we have
     // not yet seen a pop in the order that we compile operators, not the order
     // that we run them. However, in practice the two orders should be the same
     // for the sole user of the stack operators (loop gradients).
-    OP_REQUIRES(ctx, resource->value.handle() != 0,
+    OP_REQUIRES(ctx, resource->initialized(),
                 errors::InvalidArgument("Stack pop on uninitialized stack"));
 
     TensorShape stack_shape;
     OP_REQUIRES_OK(ctx, GetStackShape(b, resource, &stack_shape));
 
-    xla::ComputationDataHandle state = resource->value;
+    xla::ComputationDataHandle state = resource->value();
     xla::ComputationDataHandle ta = b->GetTupleElement(state, 0);
     xla::ComputationDataHandle index = b->GetTupleElement(state, 1);
 
     index = b->Sub(index, b->ConstantR0<int32>(1));
-    resource->value = b->Tuple({ta, index});
+    OP_REQUIRES_OK(ctx, resource->SetValue(dtype_, b->Tuple({ta, index})));
 
     // start_indices of the DynamicSlice are [index, 0, 0, ..., 0].
     auto start_indices =
diff --git a/tensorflow/compiler/tf2xla/kernels/strided_slice_op.cc b/tensorflow/compiler/tf2xla/kernels/strided_slice_op.cc
index 6af4bd0496e0da926726e3f74376281f539e925a..f0525a5fb86d6d6f0aae954a916186cffc7f3a9f 100644
--- a/tensorflow/compiler/tf2xla/kernels/strided_slice_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/strided_slice_op.cc
@@ -106,7 +106,11 @@ class StridedSliceOp : public XlaOpKernel {
   DataType index_type_;
 };
 
-REGISTER_XLA_OP(Name("StridedSlice"), StridedSliceOp);
+REGISTER_XLA_OP(Name("StridedSlice")
+                    .CompileTimeConstInput("begin")
+                    .CompileTimeConstInput("end")
+                    .CompileTimeConstInput("strides"),
+                StridedSliceOp);
 
 class StridedSliceGradOp : public XlaOpKernel {
  public:
@@ -211,7 +215,12 @@ class StridedSliceGradOp : public XlaOpKernel {
   DataType index_type_;
 };
 
-REGISTER_XLA_OP(Name("StridedSliceGrad"), StridedSliceGradOp);
+REGISTER_XLA_OP(Name("StridedSliceGrad")
+                    .CompileTimeConstInput("shape")
+                    .CompileTimeConstInput("begin")
+                    .CompileTimeConstInput("end")
+                    .CompileTimeConstInput("strides"),
+                StridedSliceGradOp);
 
 class StridedSliceAssignOp : public XlaOpKernel {
  public:
@@ -320,7 +329,11 @@ class StridedSliceAssignOp : public XlaOpKernel {
   DataType index_type_;
 };
 
-REGISTER_XLA_OP(Name("ResourceStridedSliceAssign"), StridedSliceAssignOp);
+REGISTER_XLA_OP(Name("ResourceStridedSliceAssign")
+                    .CompileTimeConstInput("begin")
+                    .CompileTimeConstInput("end")
+                    .CompileTimeConstInput("strides"),
+                StridedSliceAssignOp);
 
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc b/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc
index 351fda251798e43b607fb445f2c98abd57b3d86b..9224072a3cb92b8ff0e99c79e568ca1a76966ed6 100644
--- a/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc
@@ -21,10 +21,10 @@ limitations under the License.
 #include "tensorflow/compiler/tf2xla/kernels/gather_op_helpers.h"
 #include "tensorflow/compiler/tf2xla/shape_util.h"
 #include "tensorflow/compiler/tf2xla/type_util.h"
-#include "tensorflow/compiler/tf2xla/xla_compilation_device.h"
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/compiler/tf2xla/xla_resource.h"
 #include "tensorflow/compiler/xla/literal_util.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
@@ -50,29 +50,30 @@ namespace {
 Status MaybeInitializeTensorArray(xla::ComputationBuilder* builder,
                                   XlaResource* resource, DataType dtype,
                                   const TensorShape& elem_shape) {
-  if (resource->kind != XlaResource::kTensorArray) {
+  if (resource->kind() != XlaResource::kTensorArray) {
     return errors::InvalidArgument("Unexpected non-TensorArray resource");
   }
 
-  if (resource->type != dtype) {
+  if (resource->type() != dtype) {
     return errors::InvalidArgument(
-        "TensorArray dtype is ", DataTypeString(resource->type),
+        "TensorArray dtype is ", DataTypeString(resource->type()),
         " but op has dtype ", DataTypeString(dtype), ".");
   }
 
-  TF_RET_CHECK(resource->tensor_array_size >= 0)
-      << resource->name << " size " << resource->tensor_array_size;
+  TF_RET_CHECK(resource->tensor_array_size() >= 0)
+      << resource->name() << " size " << resource->tensor_array_size();
   TensorShape ta_shape;
-  ta_shape.AddDim(resource->tensor_array_size);
+  ta_shape.AddDim(resource->tensor_array_size());
   ta_shape.AppendShape(elem_shape);
 
-  if (resource->value.handle() == 0) {
-    // TensorArray has not been initialized.
-    xla::ComputationDataHandle zero = XlaHelpers::Zero(builder, resource->type);
-    resource->value = builder->Broadcast(zero, ta_shape.dim_sizes());
+  if (!resource->initialized()) {
+    xla::ComputationDataHandle zero =
+        XlaHelpers::Zero(builder, resource->type());
+    TF_RETURN_IF_ERROR(resource->SetValue(
+        dtype, builder->Broadcast(zero, ta_shape.dim_sizes())));
   } else {
     // Checks the elem_shape matches the TensorArray shape.
-    auto shape_or_status = builder->GetShape(resource->value);
+    auto shape_or_status = builder->GetShape(resource->value());
     if (!shape_or_status.ok()) {
       return shape_or_status.status();
     }
@@ -93,19 +94,17 @@ Status MaybeInitializeTensorArray(xla::ComputationBuilder* builder,
 Status CheckTensorArrayIsInitialized(const string& op_name,
                                      const XlaResource* resource,
                                      DataType dtype) {
-  if (resource->kind != XlaResource::kTensorArray) {
+  if (resource->kind() != XlaResource::kTensorArray) {
     return errors::InvalidArgument(
-        "Unexpected non-TensorArray resource passed "
-        "to ",
-        op_name);
+        "Unexpected non-TensorArray resource passed to ", op_name);
   }
-  if (resource->value.handle() == 0) {
+  if (!resource->initialized()) {
     return errors::InvalidArgument("Uninitialized TensorArray passed to ",
                                    op_name);
   }
-  if (resource->type != dtype) {
+  if (resource->type() != dtype) {
     return errors::InvalidArgument(
-        "TensorArray dtype is ", DataTypeString(resource->type),
+        "TensorArray dtype is ", DataTypeString(resource->type()),
         " but op has dtype ", DataTypeString(dtype), ".");
   }
 
@@ -177,7 +176,7 @@ class TensorArrayOp : public XlaOpKernel {
     OP_REQUIRES_OK(
         ctx, xc.CreateResource(XlaResource::kTensorArray, -1, std::move(name),
                                dtype_, value, &var));
-    var->tensor_array_size = size;
+    var->set_tensor_array_size(size);
     ctx->SetResourceOutput(0, var);
 
     Tensor flow(DT_FLOAT, TensorShape({}));
@@ -193,7 +192,8 @@ class TensorArrayOp : public XlaOpKernel {
   TF_DISALLOW_COPY_AND_ASSIGN(TensorArrayOp);
 };
 
-REGISTER_XLA_OP(Name("TensorArrayV3"), TensorArrayOp);
+REGISTER_XLA_OP(Name("TensorArrayV3").CompileTimeConstInput("size"),
+                TensorArrayOp);
 
 class TensorArrayWriteOp : public XlaOpKernel {
  public:
@@ -213,7 +213,7 @@ class TensorArrayWriteOp : public XlaOpKernel {
     OP_REQUIRES_OK(ctx,
                    MaybeInitializeTensorArray(b, resource, dtype_, elem_shape));
 
-    xla::ComputationDataHandle ta = resource->value;
+    xla::ComputationDataHandle ta = resource->value();
     xla::ComputationDataHandle index = ctx->Input(1);
     xla::ComputationDataHandle value = ctx->Input(2);
     xla::ComputationDataHandle flow = ctx->Input(3);
@@ -230,7 +230,7 @@ class TensorArrayWriteOp : public XlaOpKernel {
     xla::ComputationDataHandle written =
         DynamicAddSlice(b, ta, update, slice_shape.dim_sizes(), start_indices);
 
-    resource->value = written;
+    OP_REQUIRES_OK(ctx, resource->SetValue(dtype_, written));
     ctx->SetOutput(0, flow);
   }
 
@@ -259,7 +259,7 @@ class TensorArrayReadOp : public XlaOpKernel {
     TensorShape ta_shape;
     OP_REQUIRES_OK(ctx, GetTensorArrayShape(resource, b, &ta_shape));
 
-    xla::ComputationDataHandle ta = resource->value;
+    xla::ComputationDataHandle ta = resource->value();
     xla::ComputationDataHandle index = ctx->Input(1);
 
     // start_indices of the DynamicSlice are [index, 0, 0, ..., 0].
@@ -309,7 +309,33 @@ class TensorArrayGatherOp : public XlaOpKernel {
     auto indices = ctx->Input(1);
     DataType index_type = ctx->input_type(1);
 
-    xla::ComputationDataHandle ta = resource->value;
+    xla::ComputationDataHandle ta = resource->value();
+
+    // Look for the case where the gather takes a simple slice from the
+    // tensor array (0, 1, 2, 3, 4, ..., N)
+    std::vector<int64> const_indices;
+    Status status = ctx->ConstantInputAsIntVector(1, &const_indices);
+    if (status.ok()) {
+      bool gather_is_dense_slice = true;
+      for (auto i = 0; i < const_indices.size(); i++) {
+        if (const_indices[i] != i) {
+          gather_is_dense_slice = false;
+          break;
+        }
+      }
+
+      if (gather_is_dense_slice) {
+        std::vector<int64> begin(ta_shape.dims(), 0);
+        std::vector<int64> strides(ta_shape.dims(), 1);
+        std::vector<int64> end(ta_shape.dims(), 1);
+        end[0] = const_indices.size();
+        for (auto i = 1; i < ta_shape.dims(); i++) {
+          end[i] = ta_shape.dim_size(i);
+        }
+        ctx->SetOutput(0, b->Slice(ta, begin, end, strides));
+        return;
+      }
+    }
 
     xla::ComputationDataHandle gather = XlaComputeGatherDynamicSlice(
         ctx, ta, ta_shape, indices, indices_shape, 0, dtype_, index_type, b);
@@ -348,35 +374,54 @@ class TensorArrayScatterOp : public XlaOpKernel {
     const int num_indices = indices_shape.dim_size(0);
     const xla::ComputationDataHandle indices = ctx->Input(1);
 
-    xla::ComputationDataHandle ta = resource->value;
+    xla::ComputationDataHandle ta = resource->value();
     const xla::ComputationDataHandle value = ctx->Input(2);
     const xla::ComputationDataHandle flow = ctx->Input(3);
 
-    auto slice_dims = value_shape.dim_sizes();
-    slice_dims[0] = 1LL;
-
-    std::vector<int64> value_starts(value_shape.dims(), 0);
-    auto value_ends = value_shape.dim_sizes();
-
-    std::vector<int64> value_strides(value_shape.dims(), 1);
-
-    // For every (index, value) pair, update the corresponding TensorArray
-    // storage.
-    for (int i = 0; i < num_indices; ++i) {
-      // Slice out part of the value.
-      value_starts[0] = i;
-      value_ends[0] = i + 1;
-      auto slice = b->Slice(value, value_starts, value_ends, value_strides);
+    // Look for the case where the scatter is for each sub-tensor in order. The
+    // tensor array implementation allows for this to be a straight addition.
+    bool scatter_all_elements_in_order = false;
+    std::vector<int64> const_indices;
+    Status status = ctx->ConstantInputAsIntVector(1, &const_indices);
+    if (status.ok() && num_indices == value_shape.dim_size(0)) {
+      scatter_all_elements_in_order = true;
+      for (auto i = 0; i < num_indices; i++) {
+        if (const_indices[i] != i) {
+          scatter_all_elements_in_order = false;
+          break;
+        }
+      }
+    }
 
-      // start_indices of the DynamicUpdateSlice are [index, 0, 0, ..., 0].
-      auto index = b->Slice(indices, {i}, {i + 1}, {1});
-      auto start_indices =
-          b->Pad(b->Reshape(index, {1}), b->ConstantR0<int32>(0),
-                 xla::MakeEdgePaddingConfig({{0, elem_shape.dims()}}));
-      ta = DynamicAddSlice(b, ta, slice, slice_dims, start_indices);
+    if (scatter_all_elements_in_order) {
+      ta = b->Add(ta, value);
+    } else {
+      auto slice_dims = value_shape.dim_sizes();
+      slice_dims[0] = 1LL;
+
+      std::vector<int64> value_starts(value_shape.dims(), 0);
+      auto value_ends = value_shape.dim_sizes();
+
+      std::vector<int64> value_strides(value_shape.dims(), 1);
+
+      // For every (index, value) pair, update the corresponding TensorArray
+      // storage.
+      for (int i = 0; i < num_indices; ++i) {
+        // Slice out part of the value.
+        value_starts[0] = i;
+        value_ends[0] = i + 1;
+        auto slice = b->Slice(value, value_starts, value_ends, value_strides);
+
+        // start_indices of the DynamicUpdateSlice are [index, 0, 0, ..., 0].
+        auto index = b->Slice(indices, {i}, {i + 1}, {1});
+        auto start_indices =
+            b->Pad(b->Reshape(index, {1}), b->ConstantR0<int32>(0),
+                   xla::MakeEdgePaddingConfig({{0, elem_shape.dims()}}));
+        ta = DynamicAddSlice(b, ta, slice, slice_dims, start_indices);
+      }
     }
 
-    resource->value = ta;
+    OP_REQUIRES_OK(ctx, resource->SetValue(dtype_, ta));
     ctx->SetOutput(0, flow);
   }
 
@@ -405,7 +450,7 @@ class TensorArrayConcatOp : public XlaOpKernel {
     TensorShape ta_shape;
     OP_REQUIRES_OK(ctx, GetTensorArrayShape(resource, b, &ta_shape));
 
-    xla::ComputationDataHandle ta = resource->value;
+    xla::ComputationDataHandle ta = resource->value();
 
     auto ta_dims = ta_shape.dim_sizes();
     std::vector<int64> shape(ta_dims.begin() + 1, ta_dims.end());
@@ -460,16 +505,17 @@ class TensorArraySplitOp : public XlaOpKernel {
     OP_REQUIRES_OK(ctx, ctx->GetResourceInput(0, &resource));
     OP_REQUIRES_OK(ctx,
                    MaybeInitializeTensorArray(b, resource, dtype_, elem_shape));
-    xla::ComputationDataHandle ta = resource->value;
+    xla::ComputationDataHandle ta = resource->value();
 
     TensorShape ta_shape;
-    ta_shape.AddDim(resource->tensor_array_size);
+    ta_shape.AddDim(resource->tensor_array_size());
     ta_shape.AppendShape(elem_shape);
 
-    OP_REQUIRES(ctx, lengths.size() == resource->tensor_array_size,
-                errors::InvalidArgument(
-                    "TensorArray's size is not equal to the size of lengths (",
-                    lengths.size(), " vs. ", resource->tensor_array_size, ")"));
+    OP_REQUIRES(
+        ctx, lengths.size() == resource->tensor_array_size(),
+        errors::InvalidArgument(
+            "TensorArray's size is not equal to the size of lengths (",
+            lengths.size(), " vs. ", resource->tensor_array_size(), ")"));
 
     const xla::ComputationDataHandle value = ctx->Input(1);
     const xla::ComputationDataHandle flow = ctx->Input(3);
@@ -479,7 +525,9 @@ class TensorArraySplitOp : public XlaOpKernel {
                                         value_shape.DebugString(), " vs. ",
                                         ta_shape.DebugString()));
 
-    resource->value = b->Add(ta, b->Reshape(value, ta_shape.dim_sizes()));
+    OP_REQUIRES_OK(
+        ctx, resource->SetValue(
+                 dtype_, b->Add(ta, b->Reshape(value, ta_shape.dim_sizes()))));
 
     ctx->SetOutput(0, flow);
   }
@@ -490,7 +538,8 @@ class TensorArraySplitOp : public XlaOpKernel {
   TF_DISALLOW_COPY_AND_ASSIGN(TensorArraySplitOp);
 };
 
-REGISTER_XLA_OP(Name("TensorArraySplitV3"), TensorArraySplitOp);
+REGISTER_XLA_OP(Name("TensorArraySplitV3").CompileTimeConstInput("lengths"),
+                TensorArraySplitOp);
 
 class TensorArraySizeOp : public XlaOpKernel {
  public:
@@ -500,7 +549,8 @@ class TensorArraySizeOp : public XlaOpKernel {
     XlaResource* var;
     OP_REQUIRES_OK(ctx, ctx->GetResourceInput(0, &var));
     Tensor size_tensor(DT_INT32, {});
-    size_tensor.scalar<int32>()() = static_cast<int32>(var->tensor_array_size);
+    size_tensor.scalar<int32>()() =
+        static_cast<int32>(var->tensor_array_size());
     ctx->SetConstantOutput(0, size_tensor);
   }
 
@@ -523,7 +573,7 @@ class TensorArrayGradOp : public XlaOpKernel {
     OP_REQUIRES_OK(ctx, ctx->GetResourceInput(0, &resource));
 
     OP_REQUIRES_OK(
-        ctx, CheckTensorArrayIsInitialized(name(), resource, resource->type));
+        ctx, CheckTensorArrayIsInitialized(name(), resource, resource->type()));
     TensorShape ta_shape;
     OP_REQUIRES_OK(ctx, GetTensorArrayShape(resource, b, &ta_shape));
 
diff --git a/tensorflow/compiler/tf2xla/kernels/tile_ops.cc b/tensorflow/compiler/tf2xla/kernels/tile_ops.cc
index 9ee6bd892504e683a191484fb09259619759f36d..9aefcd4fc7f94a1dba1c56273c55d0b98fbbfaf2 100644
--- a/tensorflow/compiler/tf2xla/kernels/tile_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/tile_ops.cc
@@ -122,7 +122,7 @@ class TileOp : public XlaOpKernel {
   TF_DISALLOW_COPY_AND_ASSIGN(TileOp);
 };
 
-REGISTER_XLA_OP(Name("Tile"), TileOp);
+REGISTER_XLA_OP(Name("Tile").CompileTimeConstInput("multiples"), TileOp);
 
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/transpose_op.cc b/tensorflow/compiler/tf2xla/kernels/transpose_op.cc
index 2fc5d40d1059b868eef0a632071e7cccdecaf9f4..c167642174b328a968d7f7ce1f0ad6e0ab8a7a68 100644
--- a/tensorflow/compiler/tf2xla/kernels/transpose_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/transpose_op.cc
@@ -54,7 +54,8 @@ class TransposeOp : public XlaOpKernel {
     OP_REQUIRES_OK(ctx, ctx->ConstantInputReshaped(1, {dims}, &literal));
 
     std::vector<int32> perm(dims);
-    std::copy(literal.s32s().begin(), literal.s32s().end(), perm.begin());
+    std::copy(literal.data<int32>().begin(), literal.data<int32>().end(),
+              perm.begin());
 
     std::vector<int64> transposed_order;
     // Check whether permutation is a permutation of integers of [0 .. dims).
@@ -72,8 +73,9 @@ class TransposeOp : public XlaOpKernel {
       }
     }
     for (int i = 0; i < dims; ++i) {
-      OP_REQUIRES(ctx, bits[i], errors::InvalidArgument(
-                                    i, " is missing from 'perm' argument."));
+      OP_REQUIRES(
+          ctx, bits[i],
+          errors::InvalidArgument(i, " is missing from 'perm' argument."));
     }
 
     // 0-D, 1-D, and identity transposes do nothing.
@@ -87,7 +89,7 @@ class TransposeOp : public XlaOpKernel {
   }
 };
 
-REGISTER_XLA_OP(Name("Transpose"), TransposeOp);
+REGISTER_XLA_OP(Name("Transpose").CompileTimeConstInput("perm"), TransposeOp);
 
 // InvertPermutation frequently forms part of the gradient of Transpose.
 //
@@ -103,8 +105,9 @@ class InvertPermutationOp : public XlaOpKernel {
   explicit InvertPermutationOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {}
 
   void Compile(XlaOpKernelContext* ctx) override {
-    OP_REQUIRES(ctx, FastBoundsCheck(ctx->InputShape(0).num_elements(),
-                                     std::numeric_limits<int32>::max()),
+    OP_REQUIRES(ctx,
+                FastBoundsCheck(ctx->InputShape(0).num_elements(),
+                                std::numeric_limits<int32>::max()),
                 errors::InvalidArgument("permutation of nonnegative int32s "
                                         "must have <= int32 max elements"));
 
@@ -128,7 +131,9 @@ class InvertPermutationOp : public XlaOpKernel {
   }
 };
 
-REGISTER_XLA_OP(Name("InvertPermutation").TypeConstraint("T", DT_INT32),
+REGISTER_XLA_OP(Name("InvertPermutation")
+                    .TypeConstraint("T", DT_INT32)
+                    .CompileTimeConstInput("x"),
                 InvertPermutationOp);
 
 }  // namespace
diff --git a/tensorflow/compiler/tf2xla/kernels/variable_ops.cc b/tensorflow/compiler/tf2xla/kernels/variable_ops.cc
index b19ea22f50d2dd44e8d1d81f5930263f364030e1..68847ae7a2cb926edd9d29007e24b0db7fb5a75f 100644
--- a/tensorflow/compiler/tf2xla/kernels/variable_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/variable_ops.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/tf2xla/kernels/cwise_ops.h"
 #include "tensorflow/compiler/tf2xla/kernels/gather_op_helpers.h"
+#include "tensorflow/compiler/tf2xla/kernels/shape_util.h"
 #include "tensorflow/compiler/tf2xla/shape_util.h"
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
@@ -22,6 +23,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/literal_util.h"
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/bounds_check.h"
 #include "tensorflow/core/kernels/no_op.h"
 
 namespace tensorflow {
@@ -121,5 +123,26 @@ class ResourceGatherOp : public XlaOpKernel {
 REGISTER_XLA_OP(Name("ResourceGather").TypeConstraint("dtype", kNumericTypes),
                 ResourceGatherOp);
 
+class VariableShapeOp : public XlaOpKernel {
+ public:
+  explicit VariableShapeOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("out_type", &out_dtype_));
+  }
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    DataType variable_dtype;
+    TensorShape shape;
+    OP_REQUIRES_OK(ctx,
+                   ctx->GetVariableTypeAndShape(0, &variable_dtype, &shape));
+    Tensor shape_constant(out_dtype_, TensorShape({shape.dims()}));
+    OP_REQUIRES_OK(ctx, TensorShapeToConstant(shape, &shape_constant));
+    ctx->SetConstantOutput(0, shape_constant);
+  }
+
+ private:
+  DataType out_dtype_;
+};
+
+REGISTER_XLA_OP(Name("VariableShape"), VariableShapeOp);
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/while_op.cc b/tensorflow/compiler/tf2xla/kernels/while_op.cc
index ead26478ff2a3a1302e95e4ee5dbbf366b04efc6..4a711e4d9b7aedb166a8a0ec9fe9ec2390f01b17 100644
--- a/tensorflow/compiler/tf2xla/kernels/while_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/while_op.cc
@@ -39,7 +39,7 @@ Status MakeXlaCompilerArgumentsFromInputs(
   *has_uninitialized_vars = false;
   *has_tensor_arrays = false;
   for (int i = 0; i < ctx->num_inputs(); ++i) {
-    VLOG(2) << "  Input " << i
+    VLOG(2) << " Input " << i
             << " type: " << DataTypeString(ctx->input_type(i))
             << " shape: " << ctx->InputShape(i).DebugString();
     XlaCompiler::Argument& arg = (*args)[i];
@@ -50,25 +50,25 @@ Status MakeXlaCompilerArgumentsFromInputs(
       XlaResource* resource;
       TF_RETURN_IF_ERROR(ctx->GetResourceInput(i, &resource));
 
-      arg.initialized = resource->value.handle() > 0;
+      arg.initialized = resource->initialized();
       arg.kind = XlaCompiler::Argument::kResource;
-      arg.resource_kind = resource->kind;
+      arg.resource_kind = resource->kind();
       if (arg.resource_kind == XlaResource::kTensorArray) {
         *has_tensor_arrays = true;
       }
 
-      arg.type = resource->type;
+      arg.type = resource->type();
       if (arg.initialized) {
         TF_RETURN_IF_ERROR(resource->PackedShape(ctx->builder(), &arg.shape));
       } else {
         *has_uninitialized_vars = true;
       }
-      arg.tensor_array_size = resource->tensor_array_size;
-      for (const auto& gradient : resource->tensor_array_gradients) {
+      arg.tensor_array_size = resource->tensor_array_size();
+      for (const auto& gradient : resource->tensor_array_gradients()) {
         arg.tensor_array_gradients.insert(gradient.first);
       }
-      arg.name = resource->name;
-      VLOG(2) << "    resource " << resource->name
+      arg.name = resource->name();
+      VLOG(2) << "    resource " << resource->name()
               << " type: " << DataTypeString(arg.type)
               << " shape: " << xla::ShapeUtil::HumanString(arg.shape)
               << " initialized: " << arg.initialized;
@@ -120,6 +120,7 @@ void XlaWhileOp::Compile(XlaOpKernelContext* ctx) {
   body_options.use_tuple_arg = true;
   body_options.return_updated_values_for_all_resources = true;
   body_options.resolve_compile_time_constants = false;
+  body_options.is_entry_computation = false;
   XlaCompiler::CompilationResult body;
   OP_REQUIRES_OK(ctx, compiler->CompileFunction(body_options, body_name_attr_,
                                                 arguments, &body));
@@ -162,13 +163,14 @@ void XlaWhileOp::Compile(XlaOpKernelContext* ctx) {
         }
         std::unique_ptr<xla::Literal> zero =
             xla::Literal::CreateFromShape(shape);
-        resource->value = builder->ConstantLiteral(*zero);
+        OP_REQUIRES_OK(ctx, resource->SetValue(
+                                update.type, builder->ConstantLiteral(*zero)));
       }
 
       // Add any TensorArray gradients touched by the body to the enclosing
       // graph.
       for (const string& grad_source : update.tensor_array_gradients_accessed) {
-        VLOG(4) << "TensorArray " << resource->name << " accessed gradient "
+        VLOG(4) << "TensorArray " << resource->name() << " accessed gradient "
                 << grad_source;
         XlaResource* gradient;
         OP_REQUIRES_OK(ctx, resource->GetOrCreateTensorArrayGradient(
@@ -177,7 +179,7 @@ void XlaWhileOp::Compile(XlaOpKernelContext* ctx) {
 
       // Add all of the TensorArray gradients to the argument. For simplicity,
       // we always pass all known gradients.
-      for (const auto& gradient : resource->tensor_array_gradients) {
+      for (const auto& gradient : resource->tensor_array_gradients()) {
         arg.tensor_array_gradients.insert(gradient.first);
       }
 
@@ -196,14 +198,21 @@ void XlaWhileOp::Compile(XlaOpKernelContext* ctx) {
   XlaCompiler::CompileOptions cond_options;
   cond_options.use_tuple_arg = true;
   cond_options.resolve_compile_time_constants = false;
+  cond_options.is_entry_computation = false;
   XlaCompiler::CompilationResult cond;
   OP_REQUIRES_OK(ctx, compiler->CompileFunction(cond_options, cond_name_attr_,
                                                 arguments, &cond));
 
-  xla::Shape body_input_shape =
-      xla::ShapeUtil::MakeTupleShape(body.xla_input_shapes);
-  xla::Shape cond_input_shape =
-      xla::ShapeUtil::MakeTupleShape(cond.xla_input_shapes);
+  OP_REQUIRES(ctx, body.xla_input_shapes.size() == 1,
+              errors::FailedPrecondition("Expected one input shape"));
+  xla::Shape body_input_shape = body.xla_input_shapes[0];
+  OP_REQUIRES(ctx, xla::ShapeUtil::IsTuple(body_input_shape),
+              errors::FailedPrecondition("Expected tuple shape"));
+  OP_REQUIRES(ctx, cond.xla_input_shapes.size() == 1,
+              errors::FailedPrecondition("Expected one input shape"));
+  xla::Shape cond_input_shape = cond.xla_input_shapes[0];
+  OP_REQUIRES(ctx, xla::ShapeUtil::IsTuple(cond_input_shape),
+              errors::FailedPrecondition("Expected tuple shape"));
 
   VLOG(2) << "Body shape: " << xla::ShapeUtil::HumanString(body_input_shape)
           << " -> " << xla::ShapeUtil::HumanString(body.xla_output_shape);
@@ -283,10 +292,11 @@ void XlaWhileOp::Compile(XlaOpKernelContext* ctx) {
       OP_REQUIRES_OK(ctx,
                      resource->SetFromPack(
                          arguments[update.input_index].tensor_array_gradients,
-                         builder->GetTupleElement(while_result, pos), builder));
+                         builder->GetTupleElement(while_result, pos),
+                         /*reset_initial_values=*/false, builder));
     }
     VLOG(2) << "Loop-carried variable: pos: " << update.input_index
-            << " name: " << resource->name << " modified: " << update.modified
+            << " name: " << resource->name() << " modified: " << update.modified
             << " type: " << DataTypeString(update.type)
             << " shape: " << xla::ShapeUtil::HumanString(update.shape);
     // Copies the identity of the resource variable from input to output
diff --git a/tensorflow/compiler/tf2xla/lib/batch_dot.cc b/tensorflow/compiler/tf2xla/lib/batch_dot.cc
index 28a5e6a58bb312f4c4821bcce484a08160009d56..9b0e6174475c22e325c090bec5f1d56822e106bc 100644
--- a/tensorflow/compiler/tf2xla/lib/batch_dot.cc
+++ b/tensorflow/compiler/tf2xla/lib/batch_dot.cc
@@ -27,7 +27,6 @@ namespace tensorflow {
 
 // The current implementation simply unrolls the computation along the batch
 // dimension.
-// TODO(andydavis): add batching support to XLA's Dot operator.
 xla::StatusOr<xla::ComputationDataHandle> BatchDot(
     xla::ComputationBuilder* builder, xla::ComputationDataHandle x,
     xla::ComputationDataHandle y, bool transpose_x, bool transpose_y) {
@@ -52,26 +51,20 @@ xla::StatusOr<xla::ComputationDataHandle> BatchDot(
 
   // The batch dimensions must be equal and the matrix dimensions must be
   // valid.
-  std::vector<int64> dimensions;
-  int64 batch_count = 1;
+  std::vector<int64> batch_dimension_numbers;
   for (int i = 0; i < ndims - 2; ++i) {
-    int64 x_size = x_shape->dimensions(i);
-    int64 y_size = y_shape->dimensions(i);
-    if (x_size != y_size) {
+    if (x_shape->dimensions(i) != y_shape->dimensions(i)) {
       return errors::InvalidArgument(
           "Dimension ", i, " of inputs to BatchedDot must be equal: ",
           xla::ShapeUtil::HumanString(*x_shape), " vs ",
           xla::ShapeUtil::HumanString(*y_shape));
     }
-    dimensions.push_back(x_size);
-    batch_count *= x_size;
+    batch_dimension_numbers.push_back(i);
   }
 
   int x_inner_dim = transpose_x ? (ndims - 2) : (ndims - 1);
   int y_inner_dim = transpose_y ? (ndims - 1) : (ndims - 2);
-  int64 x_inner_dim_size = x_shape->dimensions(x_inner_dim);
-  int64 y_inner_dim_size = y_shape->dimensions(y_inner_dim);
-  if (x_inner_dim_size != y_inner_dim_size) {
+  if (x_shape->dimensions(x_inner_dim) != y_shape->dimensions(y_inner_dim)) {
     return errors::InvalidArgument(
         "Dimensions ", x_inner_dim, " and ", y_inner_dim,
         " of arguments to BatchedDot must be equal: ",
@@ -80,19 +73,22 @@ xla::StatusOr<xla::ComputationDataHandle> BatchDot(
         " transpose: ", transpose_y);
   }
 
-  // If there are no batch dimensions, use a regular Dot. This case exists
-  // to improve the readability of the emitted graphs.
-  if (dimensions.empty()) {
-    auto lhs = transpose_x ? builder->Transpose(x, {1, 0}) : x;
-    auto rhs = transpose_y ? builder->Transpose(y, {1, 0}) : y;
-    return builder->Dot(lhs, rhs);
+  // Check for zero lhs/rhs dim size.
+  if (xla::ShapeUtil::HasZeroElements(*x_shape) ||
+      xla::ShapeUtil::HasZeroElements(*y_shape)) {
+    std::vector<int64> dimensions(batch_dimension_numbers.size());
+    for (int i = 0; i < batch_dimension_numbers.size(); ++i) {
+      dimensions[i] = x_shape->dimensions(batch_dimension_numbers[i]);
+    }
+    int x_outer_dim = transpose_x ? (ndims - 1) : (ndims - 2);
+    int y_outer_dim = transpose_y ? (ndims - 2) : (ndims - 1);
+    dimensions.push_back(x_shape->dimensions(x_outer_dim));
+    dimensions.push_back(y_shape->dimensions(y_outer_dim));
+    return builder->Broadcast(
+        builder->ConstantLiteral(xla::Literal::Zero(x_shape->element_type())),
+        dimensions);
   }
 
-  int x_outer_dim = transpose_x ? (ndims - 1) : (ndims - 2);
-  int y_outer_dim = transpose_y ? (ndims - 2) : (ndims - 1);
-  dimensions.push_back(x_shape->dimensions(x_outer_dim));
-  dimensions.push_back(y_shape->dimensions(y_outer_dim));
-
   if (x_shape->element_type() == xla::C64 && transpose_x) {
     x = builder->Conj(x);
   }
@@ -100,55 +96,23 @@ xla::StatusOr<xla::ComputationDataHandle> BatchDot(
     y = builder->Conj(y);
   }
 
-  // Reshape input tensors into 3D tensors by flattening the batch
-  // dimensions. This makes it easier to unroll the batch dimension.
-  auto x_flat =
-      builder->Reshape(x, {batch_count, x_shape->dimensions(ndims - 2),
-                           x_shape->dimensions(ndims - 1)});
-  auto y_flat =
-      builder->Reshape(y, {batch_count, y_shape->dimensions(ndims - 2),
-                           y_shape->dimensions(ndims - 1)});
-
-  // Slice batches into individual matrices and multiply them.
-  std::vector<xla::ComputationDataHandle> out_slices;
-  for (int64 i = 0; i < batch_count; ++i) {
-    // Slice off individual matrices and reshape to 2D tensors.
-    auto x_slice = builder->Slice(
-        x_flat, {i, 0, 0},
-        {i + 1, x_shape->dimensions(ndims - 2), x_shape->dimensions(ndims - 1)},
-        {1, 1, 1});
-    x_slice = builder->Reshape(x_slice, {x_shape->dimensions(ndims - 2),
-                                         x_shape->dimensions(ndims - 1)});
-    auto y_slice = builder->Slice(
-        y_flat, {i, 0, 0},
-        {i + 1, y_shape->dimensions(ndims - 2), y_shape->dimensions(ndims - 1)},
-        {1, 1, 1});
-    y_slice = builder->Reshape(y_slice, {y_shape->dimensions(ndims - 2),
-                                         y_shape->dimensions(ndims - 1)});
-
-    // Transpose if needed.
-    auto lhs = transpose_x ? builder->Transpose(x_slice, {1, 0}) : x_slice;
-    auto rhs = transpose_y ? builder->Transpose(y_slice, {1, 0}) : y_slice;
-
-    // Multiply matrices and add an outer singleton dimension to the output
-    // so we can concatenate along the flattened batch dimension later.
-    auto out = builder->Dot(lhs, rhs);
-    out = builder->Reshape(out,
-                           {1, dimensions[ndims - 2], dimensions[ndims - 1]});
-    out_slices.push_back(out);
+  // If there are no batch dimensions, use a regular Dot.
+  // TODO(b/69062148) Remove this code when Dot emitters can be passed
+  // dimensions to transpose directly (i.e. without requiring a Transpose HLO).
+  if (batch_dimension_numbers.empty()) {
+    auto lhs = transpose_x ? builder->Transpose(x, {1, 0}) : x;
+    auto rhs = transpose_y ? builder->Transpose(y, {1, 0}) : y;
+    return builder->Dot(lhs, rhs);
   }
 
-  // Concatenate output slices and reshape to original number of dimensions.
-  xla::ComputationDataHandle data;
-  if (out_slices.empty()) {
-    // It is illegal to pass an empty list to ConcatInDim.
-    // The batch count is empty, so both inputs must have zero elements.
-    // Arbitrarily use the left input as the argument to Reshape().
-    data = x;
-  } else {
-    data = builder->ConcatInDim(out_slices, 0);
+  xla::DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(x_inner_dim);
+  dot_dnums.add_rhs_contracting_dimensions(y_inner_dim);
+  for (auto batch_dimension_number : batch_dimension_numbers) {
+    dot_dnums.add_lhs_batch_dimensions(batch_dimension_number);
+    dot_dnums.add_rhs_batch_dimensions(batch_dimension_number);
   }
-  return builder->Reshape(data, dimensions);
+  return builder->DotGeneral(x, y, dot_dnums);
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/lib/util.cc b/tensorflow/compiler/tf2xla/lib/util.cc
index 7ffe0aa6df9b21c4311eb6c8d311fba1e115b3f4..ce24b61b5dc7176f3caa05e3eb9257399fef7926 100644
--- a/tensorflow/compiler/tf2xla/lib/util.cc
+++ b/tensorflow/compiler/tf2xla/lib/util.cc
@@ -28,7 +28,7 @@ limitations under the License.
 namespace tensorflow {
 
 xla::ComputationDataHandle Zeros(xla::ComputationBuilder* builder,
-                                 xla::Shape& shape) {
+                                 const xla::Shape& shape) {
   return builder->Broadcast(
       builder->ConstantLiteral(xla::Literal::Zero(shape.element_type())),
       xla::AsInt64Slice(shape.dimensions()));
@@ -40,6 +40,9 @@ xla::ComputationDataHandle FloatLiteral(xla::ComputationBuilder* builder,
     case xla::F16:
       return builder->ConstantR0<xla::half>(static_cast<xla::half>(value));
       break;
+    case xla::BF16:
+      return builder->ConstantR0<bfloat16>(static_cast<bfloat16>(value));
+      break;
     case xla::F32:
       return builder->ConstantR0<float>(static_cast<float>(value));
       break;
diff --git a/tensorflow/compiler/tf2xla/lib/util.h b/tensorflow/compiler/tf2xla/lib/util.h
index 8fba6b5cf247e9b2c26533c53ece8b0d7d4f4c36..fb138b4f736500aac8184770d97fbf930ced69ea 100644
--- a/tensorflow/compiler/tf2xla/lib/util.h
+++ b/tensorflow/compiler/tf2xla/lib/util.h
@@ -25,7 +25,7 @@ namespace tensorflow {
 
 // Returns a zero-filled tensor with shape `shape`.
 xla::ComputationDataHandle Zeros(xla::ComputationBuilder* builder,
-                                 xla::Shape& shape);
+                                 const xla::Shape& shape);
 
 // Returns a floating point scalar constant of 'type' with 'value'.
 // If 'type' is complex, returns a real value with zero imaginary component.
diff --git a/tensorflow/compiler/tf2xla/literal_util.cc b/tensorflow/compiler/tf2xla/literal_util.cc
index 576cd9bf9abb43e29d9eb8f706e0f42ac2d038e9..fcbd157c6191655865d5e250fdf71338780bc2a6 100644
--- a/tensorflow/compiler/tf2xla/literal_util.cc
+++ b/tensorflow/compiler/tf2xla/literal_util.cc
@@ -23,17 +23,17 @@ limitations under the License.
 namespace tensorflow {
 
 Status HostTensorToLiteral(const Tensor& host_tensor, xla::Literal* literal) {
-  literal->Clear();
+  xla::Shape literal_shape;
   TF_RETURN_IF_ERROR(TensorShapeToXLAShape(
-      host_tensor.dtype(), host_tensor.shape(), literal->mutable_shape()));
+      host_tensor.dtype(), host_tensor.shape(), &literal_shape));
 
-  literal->Reserve(host_tensor.NumElements());
+  *literal = xla::Literal(literal_shape);
 
   // memcpy over the payload ...
   // TODO(phawkins): handle string types.
   size_t total_bytes = host_tensor.TotalBytes();
   if (total_bytes > 0) {
-    void* dst_ptr = literal->MutableInternalData();
+    void* dst_ptr = literal->untyped_data();
     const void* src_ptr = DMAHelper::base(&host_tensor);
     memcpy(dst_ptr, src_ptr, total_bytes);
   }
@@ -56,7 +56,7 @@ Status LiteralToHostTensor(const xla::Literal& literal, DataType target_type,
   *host_tensor = Tensor(target_type, shape);
   size_t total_bytes = host_tensor->TotalBytes();
   if (total_bytes > 0) {
-    const void* src_ptr = literal.InternalData();
+    const void* src_ptr = literal.untyped_data();
     void* dst_ptr = DMAHelper::base(host_tensor);
     memcpy(dst_ptr, src_ptr, total_bytes);
   }
diff --git a/tensorflow/compiler/tf2xla/sharding_util.cc b/tensorflow/compiler/tf2xla/sharding_util.cc
index d9c839b61019b92b6de3a77a7bec610ae848a9a4..1a0e09758f7cc6714793300c6ece14093a8ad246 100644
--- a/tensorflow/compiler/tf2xla/sharding_util.cc
+++ b/tensorflow/compiler/tf2xla/sharding_util.cc
@@ -14,34 +14,59 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/compiler/tf2xla/sharding_util.h"
 
+#include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/util/device_name_utils.h"
 
 namespace tensorflow {
+namespace {
+const char kDeviceSuffixReplicatedCore[] = "REPLICATED_CORE";
+const char kShardingAttribute[] = "_XlaSharding";
+}  // namespace
 
-static const char DEVICE_SUFFIX_REPLICATED_CORE[] = "REPLICATED_CORE";
+namespace {
+xla::StatusOr<tensorflow::gtl::optional<xla::OpSharding>>
+GetShardingFromNodeDef(const NodeDef& node_def) {
+  if (!HasNodeAttr(node_def, kShardingAttribute)) {
+    return tensorflow::gtl::optional<xla::OpSharding>();
+  }
+  string value;
+  xla::OpSharding sharding;
+  TF_RETURN_IF_ERROR(GetNodeAttr(node_def, kShardingAttribute, &value));
+  if (!sharding.ParseFromString(value)) {
+    return xla::InvalidArgument(
+        "Experimental _XlaSharding attribute was not a valid encoded "
+        "xla::OpSharding proto.");
+  }
+  return tensorflow::gtl::optional<xla::OpSharding>(sharding);
+}
 
-static Status CoreOutOfRangeError(int core, int num_cores_per_replica) {
+Status CoreOutOfRangeError(int core, int num_cores_per_replica) {
   return errors::InvalidArgument(
       "Invalid replicated core id: ", core,
       "; num_cores_per_replica=", num_cores_per_replica);
 }
+}  // namespace
 
 xla::StatusOr<tensorflow::gtl::optional<xla::OpSharding>>
-ParseShardingFromDevice(const string& device_name, int num_cores_per_replica) {
+ParseShardingFromDevice(
+    const string& device_name, int num_cores_per_replica,
+    tensorflow::gtl::optional<xla::OpSharding> explicit_sharding) {
   if (device_name.empty()) {
     return tensorflow::gtl::optional<xla::OpSharding>();
   }
-
   DeviceNameUtils::ParsedName parsed_device;
   if (!DeviceNameUtils::ParseFullName(device_name, &parsed_device)) {
     return errors::InvalidArgument("Malformed assigned device '", device_name,
                                    "'");
   }
-  if (!parsed_device.has_type ||
-      !StringPiece(parsed_device.type)
-           .ends_with(DEVICE_SUFFIX_REPLICATED_CORE)) {
+
+  if (explicit_sharding.has_value()) {
+    return explicit_sharding;
+  } else if (!parsed_device.has_type || !parsed_device.has_id ||
+             !StringPiece(parsed_device.type)
+                  .contains(kDeviceSuffixReplicatedCore)) {
     return tensorflow::gtl::optional<xla::OpSharding>();
   } else {
     const int core = parsed_device.id;
@@ -49,24 +74,38 @@ ParseShardingFromDevice(const string& device_name, int num_cores_per_replica) {
       return CoreOutOfRangeError(core, num_cores_per_replica);
     }
     return tensorflow::gtl::optional<xla::OpSharding>(
-        xla::ShardingBuilder::AssignDevice(core));
+        xla::sharding_builder::AssignDevice(core));
   }
 }
 
+xla::StatusOr<tensorflow::gtl::optional<xla::OpSharding>>
+ParseShardingFromDevice(const NodeDef& node_def, int num_cores_per_replica) {
+  const string& device_name = node_def.device();
+  TF_ASSIGN_OR_RETURN(tensorflow::gtl::optional<xla::OpSharding> sharding,
+                      GetShardingFromNodeDef(node_def));
+  return ParseShardingFromDevice(device_name, num_cores_per_replica, sharding);
+}
+
 xla::StatusOr<tensorflow::gtl::optional<xla::OpSharding>>
 ParseShardingFromDevice(const Node& node, int num_cores_per_replica) {
   string device_name = node.assigned_device_name();
   if (device_name.empty()) {
     device_name = node.requested_device();
   }
-  return ParseShardingFromDevice(device_name, num_cores_per_replica);
+  TF_ASSIGN_OR_RETURN(tensorflow::gtl::optional<xla::OpSharding> sharding,
+                      GetShardingFromNodeDef(node.def()));
+  return ParseShardingFromDevice(device_name, num_cores_per_replica, sharding);
 }
+
 void SetShardingDeviceAssignmentFromNode(const Node& src, Node* dst) {
   string device_name = src.assigned_device_name();
   if (device_name.empty()) {
     device_name = src.requested_device();
   }
   dst->set_assigned_device_name(device_name);
+  if (const AttrValue* attr = src.attrs().Find(kShardingAttribute)) {
+    dst->AddAttr(kShardingAttribute, *attr);
+  }
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/sharding_util.h b/tensorflow/compiler/tf2xla/sharding_util.h
index f6468bba9f950fec88dcc6b3ec760f014d3a0ef3..b1c817bdcc211648b16e395313ca171d1acb9ea9 100644
--- a/tensorflow/compiler/tf2xla/sharding_util.h
+++ b/tensorflow/compiler/tf2xla/sharding_util.h
@@ -17,7 +17,7 @@ limitations under the License.
 
 #include <string>
 
-#include "tensorflow/compiler/xla/client/computation_builder.h"
+#include "tensorflow/compiler/xla/client/sharding_builder.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/lib/core/status.h"
@@ -29,14 +29,21 @@ namespace tensorflow {
 // - if the device name is invalid.
 // - the core is parsed and is out of the range [0, num_cores_per_replica).
 //
-// Otherwise, returns either a non-value or a sharding set as per
-// xla:ShardingBuilder::AssignDevice.
+// Otherwise, returns either:
+// - explicit_sharding if explicit_sharding.has_value()
+// - a non-value if there is no assigned core or
+// - a sharding set as per xla::sharding_builder::AssignDevice.
 xla::StatusOr<tensorflow::gtl::optional<xla::OpSharding>>
-ParseShardingFromDevice(const string& device_name, int num_cores_per_replica);
+ParseShardingFromDevice(const string& device_name, int num_cores_per_replica,
+                        tensorflow::gtl::optional<xla::OpSharding>
+                            explicit_sharding = tensorflow::gtl::nullopt);
 
 xla::StatusOr<tensorflow::gtl::optional<xla::OpSharding>>
 ParseShardingFromDevice(const Node& node, int num_cores_per_replica);
 
+xla::StatusOr<tensorflow::gtl::optional<xla::OpSharding>>
+ParseShardingFromDevice(const NodeDef& node_def, int num_cores_per_replica);
+
 void SetShardingDeviceAssignmentFromNode(const Node& src, Node* dst);
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/tf2xla.cc b/tensorflow/compiler/tf2xla/tf2xla.cc
index a14c93a2b9494b89f579bc20ee0510c136f8f01b..906f2290433face4cce3296b2f815d50d8c496ce 100644
--- a/tensorflow/compiler/tf2xla/tf2xla.cc
+++ b/tensorflow/compiler/tf2xla/tf2xla.cc
@@ -253,8 +253,7 @@ Status CreateXlaArgs(const Graph& graph,
 // Converts the TensorFlow graph into an XLA computation, by executing the
 // graph symbolically, with each op building up the XLA HLO.
 Status ConvertGraphToXla(std::unique_ptr<Graph> graph, xla::Client* client,
-                         xla::Computation* computation,
-                         bool* requires_runtime_context) {
+                         xla::Computation* computation) {
   XlaOpRegistry::RegisterCompilationKernels();
   for (Node* node : graph->nodes()) {
     node->set_assigned_device_name(
@@ -277,7 +276,6 @@ Status ConvertGraphToXla(std::unique_ptr<Graph> graph, xla::Client* client,
   TF_RETURN_IF_ERROR(compiler.CompileGraph(XlaCompiler::CompileOptions(),
                                            "tfcompile", std::move(graph),
                                            xla_args, &result));
-  *requires_runtime_context = result.requires_runtime_context;
   *computation = std::move(*result.computation);
 
   int num_const_results = 0;
@@ -352,12 +350,10 @@ Status InitGraph(const GraphDef& graph_def, const tf2xla::Config& config,
 
 Status ConvertGraphDefToXla(const GraphDef& graph_def,
                             const tf2xla::Config& config, xla::Client* client,
-                            xla::Computation* computation,
-                            bool* requires_runtime_context) {
+                            xla::Computation* computation) {
   std::unique_ptr<Graph> graph;
   TF_RETURN_IF_ERROR(InitGraph(graph_def, config, &graph));
-  TF_RETURN_IF_ERROR(ConvertGraphToXla(std::move(graph), client, computation,
-                                       requires_runtime_context));
+  TF_RETURN_IF_ERROR(ConvertGraphToXla(std::move(graph), client, computation));
   return Status::OK();
 }
 
diff --git a/tensorflow/compiler/tf2xla/tf2xla.h b/tensorflow/compiler/tf2xla/tf2xla.h
index ab99beebf7946237425d4d304a858ac6817177b8..473c431b12d441c652f1d0d6c11c5e87836ab36d 100644
--- a/tensorflow/compiler/tf2xla/tf2xla.h
+++ b/tensorflow/compiler/tf2xla/tf2xla.h
@@ -30,13 +30,9 @@ namespace tensorflow {
 //
 // The computation is built in the context of the given `client`, which may
 // subsequently be used to compile or execute the computation.
-//
-// If `requires_runtime_context` is filled with true, this indicates the last
-// argument of the computation is XlaLocalRuntimeContext*.
 Status ConvertGraphDefToXla(const GraphDef& graph_def,
                             const tf2xla::Config& config, xla::Client* client,
-                            xla::Computation* computation,
-                            bool* requires_runtime_context);
+                            xla::Computation* computation);
 
 }  // namespace tensorflow
 
diff --git a/tensorflow/compiler/tf2xla/tf2xla_supported_ops.cc b/tensorflow/compiler/tf2xla/tf2xla_supported_ops.cc
new file mode 100644
index 0000000000000000000000000000000000000000..7aca889a266439538c4cd1c153460e6cc871b246
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/tf2xla_supported_ops.cc
@@ -0,0 +1,97 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/tf2xla/tf2xla_supported_ops.h"
+
+#include <algorithm>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/core/framework/kernel_def.pb.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/platform/init_main.h"
+#include "tensorflow/core/util/command_line_flags.h"
+
+namespace tensorflow {
+namespace tf2xla {
+namespace {
+
+void PrintSupportedOps(const string& device, const string& regen_run) {
+  XlaOpRegistry::RegisterCompilationKernels();
+
+  std::vector<const KernelDef*> kdefs =
+      XlaOpRegistry::DeviceKernels(device,
+                                   /*include_compilation_only_kernels=*/true);
+  std::sort(
+      kdefs.begin(), kdefs.end(),
+      [](const KernelDef* a, const KernelDef* b) { return a->op() < b->op(); });
+
+  std::cout << "**Supported operators for device: " << device << "**\n\n"
+            << "Operator | Type Constraint\n"
+            << "-------- | ---------------" << std::endl;
+  for (const KernelDef* kdef : kdefs) {
+    std::vector<string> constraints;
+    for (const KernelDef::AttrConstraint& constraint : kdef->constraint()) {
+      std::vector<string> types;
+      for (int type : constraint.allowed_values().list().type()) {
+        types.push_back(DataTypeString(static_cast<DataType>(type)));
+      }
+      std::sort(types.begin(), types.end());
+      constraints.push_back("`" + constraint.name() + "={" +
+                            str_util::Join(types, ",") + "}`");
+    }
+    std::cout << "`" << kdef->op() << "` | "
+              << str_util::Join(constraints, "<br>") << std::endl;
+  }
+
+  std::cout << "\nTo regenerate this table, run:\n\n```shell\n"
+            << regen_run << " --device=" << device << "\n```" << std::endl;
+}
+
+}  // namespace
+
+void SupportedOpsMain(int argc, char** argv, const char* regen_run) {
+  std::vector<string> device_names = XlaOpRegistry::BackendNames();
+  std::sort(device_names.begin(), device_names.end());
+
+  // Set up and parse flags.
+  string device;
+  std::vector<Flag> flag_list = {
+      {"device", &device,
+       "Name of the compilation device for which to print supported ops, "
+       "one of: " +
+           str_util::Join(device_names, ",")},
+  };
+  string usage = Flags::Usage(argv[0], flag_list);
+  bool parsed_flags_ok = Flags::Parse(&argc, argv, flag_list);
+  QCHECK(parsed_flags_ok) << "\n" << usage;
+  QCHECK(XlaOpRegistry::IsBackendRegistered(device))
+      << "\nUnknown device: " << device << "\n"
+      << usage;
+
+  // Run the program.
+  port::InitMain(usage.c_str(), &argc, &argv);
+  QCHECK(argc == 1) << "\nERROR: This command does not take any arguments "
+                       "other than flags\n\n"
+                    << usage;
+  PrintSupportedOps(device, regen_run);
+}
+
+}  // namespace tf2xla
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/tf2xla_supported_ops.h b/tensorflow/compiler/tf2xla/tf2xla_supported_ops.h
new file mode 100644
index 0000000000000000000000000000000000000000..1b45fb4cdd3b0173b04e130b7416874a9a406dc5
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/tf2xla_supported_ops.h
@@ -0,0 +1,33 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_TF2XLA_TF2XLA_SUPPORTED_OPS_H_
+#define TENSORFLOW_COMPILER_TF2XLA_TF2XLA_SUPPORTED_OPS_H_
+
+namespace tensorflow {
+namespace tf2xla {
+
+// The implementation of a main function for a binary that prints a table of
+// supported tf2xla operators for a given device, along with their type
+// constraints, to stdout.
+//
+// Pass the argc and argv from main, unmodified.  Use regen_run to specify the
+// command used to regenerate the table.
+void SupportedOpsMain(int argc, char** argv, const char* regen_run);
+
+}  // namespace tf2xla
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_COMPILER_TF2XLA_TF2XLA_SUPPORTED_OPS_H_
diff --git a/tensorflow/compiler/tf2xla/tf2xla_supported_ops_main.cc b/tensorflow/compiler/tf2xla/tf2xla_supported_ops_main.cc
new file mode 100644
index 0000000000000000000000000000000000000000..690666c2400d45e33c1a5d1818b68a86a70a5be3
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/tf2xla_supported_ops_main.cc
@@ -0,0 +1,22 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/tf2xla/tf2xla_supported_ops.h"
+
+int main(int argc, char** argv) {
+  const char* regen_run =
+      "bazel run -c opt -- tensorflow/compiler/tf2xla:tf2xla_supported_ops";
+  tensorflow::tf2xla::SupportedOpsMain(argc, argv, regen_run);
+}
diff --git a/tensorflow/compiler/tf2xla/tf2xla_test.cc b/tensorflow/compiler/tf2xla/tf2xla_test.cc
index ecd15652fe84b0c19d2f7fc18f877236547f9be9..a9978e697b091715ce120f0d18fdddd259e08b32 100644
--- a/tensorflow/compiler/tf2xla/tf2xla_test.cc
+++ b/tensorflow/compiler/tf2xla/tf2xla_test.cc
@@ -70,10 +70,7 @@ TEST(ConvertGraphDefToXla, Sum) {
 
   xla::LocalClient* client = xla::ClientLibrary::LocalClientOrDie();
   xla::Computation computation;
-  bool requires_runtime_context;
-  TF_EXPECT_OK(ConvertGraphDefToXla(graph_def, config, client, &computation,
-                                    &requires_runtime_context));
-  ASSERT_FALSE(requires_runtime_context);
+  TF_EXPECT_OK(ConvertGraphDefToXla(graph_def, config, client, &computation));
 
   // Set up arguments.
   auto x_literal = xla::Literal::CreateR0<int32>(10);
diff --git a/tensorflow/compiler/tf2xla/tf2xla_util.cc b/tensorflow/compiler/tf2xla/tf2xla_util.cc
index 55f2f3149c6ba7bfa18608f961c8a76103a50756..f428a194328935fec1210ea96245344de859e611 100644
--- a/tensorflow/compiler/tf2xla/tf2xla_util.cc
+++ b/tensorflow/compiler/tf2xla/tf2xla_util.cc
@@ -88,8 +88,8 @@ Status ValidateConfig(const tf2xla::Config& config) {
     TF_RETURN_IF_ERROR(CheckNameDuplicates("fetch", fetch.name(), &names));
   }
   TF_RETURN_IF_ERROR(CheckFeedFetchNameConflicts("fetch", names));
-  if (config.feed().empty() || config.fetch().empty()) {
-    return errors::InvalidArgument("feeds and fetches must be specified");
+  if (config.fetch().empty()) {
+    return errors::InvalidArgument("fetches must be specified");
   }
   return Status::OK();
 }
diff --git a/tensorflow/compiler/tf2xla/tf2xla_util_test.cc b/tensorflow/compiler/tf2xla/tf2xla_util_test.cc
index 436039e154842443f779aba276bc571fc2ab7537..ed10d80609641b090cf78bf2e17364fe2fa89c31 100644
--- a/tensorflow/compiler/tf2xla/tf2xla_util_test.cc
+++ b/tensorflow/compiler/tf2xla/tf2xla_util_test.cc
@@ -58,24 +58,14 @@ TEST(ValidateConfig, Good) {
 
 TEST(ValidateConfig, BadEmpty) {
   tf2xla::Config config;
-  ExpectErrorContains(ValidateConfig(config),
-                      "feeds and fetches must be specified");
-}
-
-TEST(ValidateConfig, BadNoFeed) {
-  tf2xla::Config config;
-  tf2xla::Fetch* fetch = config.add_fetch();
-  fetch->mutable_id()->set_node_name("foo");
-  ExpectErrorContains(ValidateConfig(config),
-                      "feeds and fetches must be specified");
+  ExpectErrorContains(ValidateConfig(config), "fetches must be specified");
 }
 
 TEST(ValidateConfig, BadNoFetch) {
   tf2xla::Config config;
   tf2xla::Feed* feed = config.add_feed();
   feed->mutable_id()->set_node_name("foo");
-  ExpectErrorContains(ValidateConfig(config),
-                      "feeds and fetches must be specified");
+  ExpectErrorContains(ValidateConfig(config), "fetches must be specified");
 }
 
 TEST(ValidateConfig, BadFeedNodeName) {
diff --git a/tensorflow/compiler/tf2xla/xla_compilation_device.cc b/tensorflow/compiler/tf2xla/xla_compilation_device.cc
index 4f32c29954b2d809d31ef8c584b6a6c3dcdf5cef..fcb0a4e63814b4afc114bdaea312a92dd8396a2e 100644
--- a/tensorflow/compiler/tf2xla/xla_compilation_device.cc
+++ b/tensorflow/compiler/tf2xla/xla_compilation_device.cc
@@ -100,7 +100,7 @@ void XlaCompilationDevice::Compute(OpKernel* op_kernel,
   b->SetOpMetadata(metadata);
 
   auto sharding_parse_result = ParseShardingFromDevice(
-      op_kernel->requested_device(), std::numeric_limits<int>::max());
+      op_kernel->def(), std::numeric_limits<int>::max());
   OP_REQUIRES_OK(context, sharding_parse_result.status());
   tensorflow::gtl::optional<xla::OpSharding> op_sharding =
       sharding_parse_result.ValueOrDie();
@@ -135,98 +135,4 @@ void XlaExpression::set_constant_value(Tensor value) {
   constant_value_ = std::move(value);
 }
 
-Status XlaResource::GetXlaShape(xla::ComputationBuilder* builder,
-                                xla::Shape* shape) const {
-  auto shape_or_status = builder->GetShape(value);
-  if (!shape_or_status.ok()) {
-    return shape_or_status.status();
-  }
-  *shape = *shape_or_status.ValueOrDie();
-  return Status::OK();
-}
-
-Status XlaResource::GetShape(xla::ComputationBuilder* builder,
-                             TensorShape* shape) const {
-  xla::Shape xla_shape;
-  TF_RETURN_IF_ERROR(GetXlaShape(builder, &xla_shape));
-  TF_RETURN_IF_ERROR(XLAShapeToTensorShape(xla_shape, shape));
-  return Status::OK();
-}
-
-Status XlaResource::GetOrCreateTensorArrayGradient(
-    const string& source, xla::ComputationBuilder* builder,
-    XlaResource** gradient_out) {
-  VLOG(2) << "Gradient lookup for resource: " << name
-          << " gradient: " << source;
-  TF_RET_CHECK(kind == kTensorArray);
-  std::unique_ptr<XlaResource>& gradient = tensor_array_gradients[source];
-  if (!gradient) {
-    gradient.reset(new XlaResource);
-    gradient->kind = XlaResource::kTensorArray;
-    gradient->name = strings::StrCat("TensorArrayGrad: ", name);
-    gradient->type = type;
-    gradient->tensor_array_size = tensor_array_size;
-
-    TensorShape ta_shape;
-    TF_RETURN_IF_ERROR(GetShape(builder, &ta_shape));
-    gradient->value = builder->Broadcast(XlaHelpers::Zero(builder, type),
-                                         ta_shape.dim_sizes());
-    gradient->initial_value = gradient->value;
-  }
-  *gradient_out = gradient.get();
-  return Status::OK();
-}
-
-Status XlaResource::PackedShape(xla::ComputationBuilder* builder,
-                                xla::Shape* packed_shape) const {
-  if (tensor_array_gradients.empty()) {
-    return GetXlaShape(builder, packed_shape);
-  }
-  TF_RET_CHECK(kind == kTensorArray);
-  std::vector<xla::Shape> elem_shapes(1 + tensor_array_gradients.size());
-  int pos = 0;
-  TF_RETURN_IF_ERROR(GetXlaShape(builder, &elem_shapes[pos++]));
-  for (const auto& gradient : tensor_array_gradients) {
-    TF_RETURN_IF_ERROR(
-        gradient.second->GetXlaShape(builder, &elem_shapes[pos++]));
-  }
-  *packed_shape = xla::ShapeUtil::MakeTupleShape(elem_shapes);
-  return Status::OK();
-}
-
-Status XlaResource::Pack(xla::ComputationDataHandle* pack,
-                         xla::ComputationBuilder* builder) const {
-  if (tensor_array_gradients.empty()) {
-    *pack = value;
-  } else {
-    TF_RET_CHECK(kind == kTensorArray);
-    std::vector<xla::ComputationDataHandle> elems;
-    elems.push_back(value);
-    for (const auto& gradient : tensor_array_gradients) {
-      elems.push_back(gradient.second->value);
-    }
-    *pack = builder->Tuple(elems);
-  }
-  return Status::OK();
-}
-
-Status XlaResource::SetFromPack(const std::set<string>& gradient_sources,
-                                const xla::ComputationDataHandle& pack,
-                                xla::ComputationBuilder* builder) {
-  if (gradient_sources.empty()) {
-    value = pack;
-  } else {
-    TF_RET_CHECK(kind == kTensorArray);
-    int pos = 0;
-    value = builder->GetTupleElement(pack, pos++);
-    for (const auto& source : gradient_sources) {
-      XlaResource* gradient;
-      TF_RETURN_IF_ERROR(
-          GetOrCreateTensorArrayGradient(source, builder, &gradient));
-      gradient->value = builder->GetTupleElement(pack, pos++);
-    }
-  }
-  return Status::OK();
-}
-
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/xla_compilation_device.h b/tensorflow/compiler/tf2xla/xla_compilation_device.h
index 6230acd718bc330f178007b575b5119de5b3d4f4..0243ee332fbdca0fe5e28b1a7d9530df4417f807 100644
--- a/tensorflow/compiler/tf2xla/xla_compilation_device.h
+++ b/tensorflow/compiler/tf2xla/xla_compilation_device.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <memory>
 
+#include "tensorflow/compiler/tf2xla/xla_resource.h"
 #include "tensorflow/compiler/xla/client/computation_builder.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/common_runtime/local_device.h"
@@ -66,87 +67,6 @@ class XlaCompilationDevice : public LocalDevice {
   std::unique_ptr<XlaCompilationAllocator> allocator_;
 };
 
-// Represents a resource, such as a Variable or TensorArray.
-// TODO(phawkins): make this into a properly abstracted class.
-struct XlaResource {
-  enum Kind {
-    kInvalid,
-    kVariable,
-    kTensorArray,
-    kStack,
-  };
-
-  Kind kind = kInvalid;
-
-  // If this resource is visible externally, what was its argument number?
-  int arg_num = -1;
-
-  // A descriptive name for the resource, used in error messages.
-  string name;
-
-  // Current type and value of the resource. Uninitialized resources are
-  // represented by a default (zero) handle and type DT_INVALID.
-  // While the type of a resource is notionally fixed during execution, when
-  // a resource is first initialized we do not yet know its type, so we keep
-  // track of its type dynamically.
-  DataType type = DT_INVALID;
-  xla::ComputationDataHandle value;
-
-  // Value of the resource at computation entry. Used to detect which
-  // variables have new values that need to be written back.
-  xla::ComputationDataHandle initial_value;
-
-  // TensorArray-specific fields
-
-  // 'tensor_array_size' stores the expected size of the TensorArray. We need
-  // to store this since sometimes TensorArrays must be initialized lazily since
-  // we do not know the element shape at construction time.
-  int64 tensor_array_size = -1;
-
-  // 'tensor_array_gradient' is a map from TensorArrayGradV3 'source' attributes
-  // to an XlaResource containing the gradient TensorArrays. We store a pointer
-  // here since there should only be one gradient TensorArray per 'source'
-  // string, irrespective of the number of calls to TensorArrayGrad. The map
-  // is ordered since values are packed into tuples by Pack() sorted by name
-  // order.
-  std::map<string, std::unique_ptr<XlaResource>> tensor_array_gradients;
-
-  // Returns the shape of the resource as an xla::Shape.
-  Status GetXlaShape(xla::ComputationBuilder* builder, xla::Shape* shape) const;
-
-  // Returns the shape of the resource as an TensorShape. Fails if the shape is
-  // not representable as a TensorShape.
-  Status GetShape(xla::ComputationBuilder* builder, TensorShape* shape) const;
-
-  // Looks up the gradient for `source`, or creates it if it does not already
-  // exist. The call target must be an initialized TensorArray resource. A
-  // TensorArray can have multiple named gradients; see the operator
-  // documentation for TensorArrayGradV3 for details.
-  Status GetOrCreateTensorArrayGradient(const string& source,
-                                        xla::ComputationBuilder* builder,
-                                        XlaResource** gradient_out);
-
-  // Packs a resource into a single XLA value `pack`, suitable for use as
-  // an XlaCompiler::Argument. For non-TensorArrays or TensorArrays without
-  // gradients, sets `*pack` to `value`.
-  // For TensorArrays with gradients, packs the value and its gradient values in
-  // a tuple; the gradients values are packed in order by source name.
-  Status Pack(xla::ComputationDataHandle* pack,
-              xla::ComputationBuilder* builder) const;
-
-  // Returns the shape of the `pack` value computed by `Pack()`.
-  Status PackedShape(xla::ComputationBuilder* builder,
-                     xla::Shape* packed_shape) const;
-
-  // Updates the resource with values from `pack`. If `gradient_sources` is
-  // non-empty, treats `pack` as a tuple that represents a TensorArray and
-  // its gradients, and unpacks and updates the gradient resources. Opposite
-  // of Pack().
-  Status SetFromPack(const std::set<string>& gradient_sources,
-                     const xla::ComputationDataHandle& pack,
-                     xla::ComputationBuilder* builder);
-};
-
 // A XlaExpression wraps an XLA computation. Each Tensor on an
 // XlaCompilationDevice contains an XlaExpression, and the shape of the Tensor
 // matches the shape of the subcomputation in the ComputationDataHandle. Each
diff --git a/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.cc b/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.cc
index b5c17c5273bb15e20184b2fefd93880d4828105e..79da701fd244a461a60588153b601d5c1870fa89 100644
--- a/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.cc
+++ b/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.cc
@@ -28,9 +28,10 @@ XlaCompiledCpuFunction::XlaCompiledCpuFunction(const StaticData& static_data,
       temps_(new void*[static_data.num_temps]),
       arg_names_(static_data.arg_names),
       result_names_(static_data.result_names),
-      program_shape_(static_data.program_shape) {
+      program_shape_(static_data.program_shape),
+      hlo_profile_printer_(static_data.hlo_profile_printer) {
   // Allocate arg and temp buffers.
-  if (alloc_mode == AllocMode::ARGS_RESULTS_AND_TEMPS) {
+  if (alloc_mode == AllocMode::ARGS_RESULTS_PROFILES_AND_TEMPS) {
     alloc_args_ = tensorflow::tfcompile::runtime::MallocContiguousBuffers(
         static_data.arg_sizes, static_data.num_args, args_,
         /*annotate_initialized=*/false);
@@ -39,9 +40,13 @@ XlaCompiledCpuFunction::XlaCompiledCpuFunction(const StaticData& static_data,
       static_data.temp_sizes, static_data.num_temps, temps_,
       /*annotate_initialized=*/true);
 
-  // The runtime context is always the last arg, if it is required.
-  if (static_data.requires_runtime_context) {
-    args_[static_data.num_args - 1] = &context_;
+  // If Hlo profiling is enabled the generated code expects an appropriately
+  // sized buffer to be passed in as the last argument.  If Hlo profiling is
+  // disabled the last function argument is still present in the function
+  // signature, but it is ignored by the generated code and we pass in null for
+  // it.
+  if (hlo_profiling_enabled()) {
+    profile_counters_ = new int64[static_data.profile_counters_size]();
   }
 }
 
@@ -50,6 +55,7 @@ XlaCompiledCpuFunction::~XlaCompiledCpuFunction() {
   tensorflow::tfcompile::runtime::FreeContiguous(alloc_temps_);
   delete[] args_;
   delete[] temps_;
+  delete[] profile_counters_;
 }
 
 namespace {
diff --git a/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.h b/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.h
index f49a7889222ff989144217ab10b27595f89e4311..e0ae3ed9a811bcc49ce8862037a67d293e879e57 100644
--- a/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.h
+++ b/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.h
@@ -16,10 +16,9 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_TF2XLA_XLA_COMPILED_CPU_FUNCTION_H_
 #define TENSORFLOW_COMPILER_TF2XLA_XLA_COMPILED_CPU_FUNCTION_H_
 
-#include <functional>
+#include <cassert>
 #include <string>
 
-#include "tensorflow/compiler/tf2xla/xla_local_runtime_context.h"
 #include "tensorflow/compiler/xla/executable_run_options.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -27,6 +26,7 @@ limitations under the License.
 // never use this functionality.
 namespace xla {
 class ProgramShape;
+class HloProfilePrinter;
 }
 
 namespace tensorflow {
@@ -48,12 +48,10 @@ namespace tensorflow {
 class XlaCompiledCpuFunction {
  public:
   // Type of the raw function, produced by either JIT or AOT.
-  //
-  // TODO(toddw): Add support for hlo profiling, and replace std::function with
-  // a raw function pointer, for some codesize savings.
-  using RawFunction = std::function<void(
-      void* result, const xla::ExecutableRunOptions* run_options,
-      const void** args, void** temps)>;
+  using RawFunction = void (*)(void* result,
+                               const xla::ExecutableRunOptions* run_options,
+                               const void** args, void** temps,
+                               int64* profile_counters);
 
   // StaticData represents the state necessary to run an XLA-compiled
   // function. For JIT this is backed by data in XlaJitCompiledCpuFunction; for
@@ -71,9 +69,6 @@ class XlaCompiledCpuFunction {
     // The 0-based index of the result tuple, in the temp buffers.
     size_t result_index = 0;
 
-    // Is the final arg XlaLocalRuntimeContext?
-    bool requires_runtime_context = false;
-
     // [Optional] Arrays of arg and result names. These are arrays of C-style
     // strings, where the array is terminated by nullptr.
     const char** arg_names = nullptr;
@@ -81,21 +76,29 @@ class XlaCompiledCpuFunction {
 
     // [Optional] Arg and result shapes.
     const xla::ProgramShape* program_shape = nullptr;
+
+    // [Optional] Profile printer.  Null if profiling is disabled.
+    const xla::HloProfilePrinter* hlo_profile_printer = nullptr;
+
+    // [Optional] The number of profile counters expected in the profile counter
+    // buffer by the generated code and hlo_profile_printer.  0 if profiling is
+    // disabled.
+    int64 profile_counters_size = 0;
   };
 
   // AllocMode controls the buffer allocation mode.
   enum class AllocMode {
-    // Allocate all buffers - args, results and temps.
-    ARGS_RESULTS_AND_TEMPS,
+    // Allocate all buffers - args, results, profile and temps.
+    ARGS_RESULTS_PROFILES_AND_TEMPS,
 
-    // Only allocate result and temp buffers.
+    // Only allocate result, profile and temp buffers.
     // Use set_arg_data to set argument buffers before Run is called.
-    RESULTS_AND_TEMPS_ONLY,
+    RESULTS_PROFILES_AND_TEMPS_ONLY,
   };
 
   XlaCompiledCpuFunction(
       const StaticData& static_data,
-      AllocMode alloc_mode = AllocMode::ARGS_RESULTS_AND_TEMPS);
+      AllocMode alloc_mode = AllocMode::ARGS_RESULTS_PROFILES_AND_TEMPS);
   virtual ~XlaCompiledCpuFunction();
 
   XlaCompiledCpuFunction(const XlaCompiledCpuFunction&) = delete;
@@ -104,21 +107,22 @@ class XlaCompiledCpuFunction {
   // Sets the intra-op thread pool used to run individual ops concurrently.
   void set_thread_pool(const Eigen::ThreadPoolDevice* pool) {
     run_options_.set_intra_op_thread_pool(pool);
-    context_.thread_pool = pool;
   }
 
   // Runs the computation, with inputs read from arg buffers, and outputs
   // written to result buffers. Returns true on success and false on failure.
   bool Run() {
-    context_.error = false;
-    context_.error_msg.clear();
     raw_function_(temps_[result_index_], &run_options_,
-                  const_cast<const void**>(args_), temps_);
-    return !context_.error;
+                  const_cast<const void**>(args_), temps_, profile_counters_);
+    return true;
   }
 
   // Returns the error message from the previous failed Run call.
-  const string& error_msg() const { return context_.error_msg; }
+  //
+  // TODO(fschneider): For now this always returns an empty string because there
+  // is no support for error reporting in XLA. Remove this once all callers are
+  // updated.
+  string error_msg() const { return {}; }
 
   // ------------------------------
   // Arg methods for managing input buffers. Buffers are in row-major order.
@@ -141,10 +145,6 @@ class XlaCompiledCpuFunction {
   // tensorflow::tfcompile::runtime::kAlign. If possible, use the functions in
   // tensorflow/compiler/aot/runtime.h to ensure correct alignment.
   //
-  // If StaticData.requires_runtime_context==true, the final argument is an
-  // XlaLocalRuntimeContext, which is managed internally by this class, and
-  // should not be changed.
-  //
   // Aliasing of argument and result buffers is not allowed, and results in
   // undefined behavior.
   void set_arg_data(size_t index, void* data) { args_[index] = data; }
@@ -162,6 +162,16 @@ class XlaCompiledCpuFunction {
     return static_cast<const void* const*>(temps_[result_index_]);
   }
 
+  // Profile counters for this XLA computation.
+  //
+  // When Hlo profiling is enabled (`hlo_profiling_enabled()` return true in
+  // this case) these counters are non-null and are automatically populated by
+  // `Run`.  The counters can then be pretty-printed using
+  // `hlo_profile_printer()`.
+  //
+  // When Hlo profiling is disabled, this accessor returns null.
+  const int64* profile_counters() const { return profile_counters_; }
+
   // Returns the buffer for the positional result at the given `index`.
   void* result_data(size_t index) { return results()[index]; }
   const void* result_data(size_t index) const { return results()[index]; }
@@ -195,6 +205,12 @@ class XlaCompiledCpuFunction {
   // program shape isn't available.
   const xla::ProgramShape* ProgramShape() const { return program_shape_; }
 
+  bool hlo_profiling_enabled() const { return hlo_profile_printer_ != nullptr; }
+  const xla::HloProfilePrinter& hlo_profile_printer() const {
+    assert(hlo_profiling_enabled());
+    return *hlo_profile_printer_;
+  }
+
  private:
   const RawFunction raw_function_;
   const size_t result_index_;
@@ -208,14 +224,17 @@ class XlaCompiledCpuFunction {
   void* alloc_args_ = nullptr;
   void* alloc_temps_ = nullptr;
 
+  // Backing memory for profiling counters.
+  int64* profile_counters_ = nullptr;
+
   // Options and context passed to the compiled function.
   xla::ExecutableRunOptions run_options_;
-  tensorflow::XlaLocalRuntimeContext context_;
 
   // Optional metadata.
   const char** arg_names_ = nullptr;
   const char** result_names_ = nullptr;
   const xla::ProgramShape* program_shape_ = nullptr;
+  const xla::HloProfilePrinter* hlo_profile_printer_ = nullptr;
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc
index 48cebdf74c71f974bf075e0255626ec57eb9a149..69b265436bb19bbbdd9deb872f4097d4bac7ea52 100644
--- a/tensorflow/compiler/tf2xla/xla_compiler.cc
+++ b/tensorflow/compiler/tf2xla/xla_compiler.cc
@@ -268,7 +268,8 @@ Status BuildArguments(const Graph& graph,
                       XlaContext* context, std::vector<int>* arg_cores,
                       std::vector<XlaExpression>* arg_expressions,
                       std::vector<int>* input_mapping,
-                      std::vector<xla::Shape>* input_shapes) {
+                      std::vector<xla::Shape>* input_shapes,
+                      bool is_entry_computation) {
   arg_expressions->resize(args.size());
   *arg_cores = std::vector<int>(args.size(), -1);
 
@@ -292,7 +293,7 @@ Status BuildArguments(const Graph& graph,
         TF_RETURN_IF_ERROR(
             context->CreateResource(arg.resource_kind, i, arg.name, arg.type,
                                     xla::ComputationDataHandle(), &resource));
-        resource->tensor_array_size = arg.tensor_array_size;
+        resource->set_tensor_array_size(arg.tensor_array_size);
         arg_expression.set_resource(resource);
         if (arg.initialized) {
           resources.push_back(i);
@@ -316,15 +317,22 @@ Status BuildArguments(const Graph& graph,
     return Status::OK();
   }
 
-  input_shapes->resize(parameters.size());
+  std::vector<xla::Shape> arg_shapes;
+  arg_shapes.reserve(parameters.size());
   input_mapping->resize(parameters.size());
   for (std::vector<int>::size_type i = 0; i < parameters.size(); ++i) {
     const XlaCompiler::Argument& arg = args[parameters[i]];
     // Computes the shapes of non-constant arguments.
-    (*input_shapes)[i] = arg.shape;
+    arg_shapes.push_back(arg.shape);
     (*input_mapping)[i] = parameters[i];
   }
 
+  if (use_tuple_arg) {
+    input_shapes->push_back(xla::ShapeUtil::MakeTupleShape(arg_shapes));
+  } else {
+    *input_shapes = arg_shapes;
+  }
+
   // Use the _Arg nodes in the graph to resolve core assignments.
   for (const Node* n : graph.nodes()) {
     if (StringPiece(n->type_string()) != "_Arg") continue;
@@ -348,14 +356,28 @@ Status BuildArguments(const Graph& graph,
   // Build parameter handles for non-constant arguments.
   std::vector<xla::ComputationDataHandle> arg_handles(parameters.size());
   if (use_tuple_arg) {
-    xla::Shape tuple_shape = xla::ShapeUtil::MakeTupleShape(*input_shapes);
-    xla::ComputationDataHandle tuple =
-        builder->Parameter(0, tuple_shape, "arg_tuple");
+    xla::ComputationDataHandle tuple;
+    if (is_entry_computation) {
+      xla::OpSharding tuple_sharding;
+      tuple_sharding.set_type(xla::OpSharding::Type::OpSharding_Type_TUPLE);
+      for (int64 parameter : parameters) {
+        const int core = (*arg_cores)[parameter];
+        const int root_device = 0;
+        *tuple_sharding.add_tuple_shardings() =
+            core == -1 ? xla::sharding_builder::AssignDevice(root_device)
+                       : xla::sharding_builder::AssignDevice(core);
+      }
+      xla::ScopedShardingAssignment assign_tuple_sharding(builder,
+                                                          tuple_sharding);
+      tuple = builder->Parameter(0, (*input_shapes)[0], "arg_tuple");
+    } else {
+      tuple = builder->Parameter(0, (*input_shapes)[0], "arg_tuple");
+    }
     for (std::vector<int>::size_type i = 0; i < parameters.size(); ++i) {
       const int core = (*arg_cores)[parameters[i]];
       xla::ScopedShardingAssignment assign_sharding(
           builder, core == -1 ? tensorflow::gtl::optional<xla::OpSharding>()
-                              : xla::ShardingBuilder::AssignDevice(core));
+                              : xla::sharding_builder::AssignDevice(core));
       arg_handles[i] = builder->GetTupleElement(tuple, i);
     }
   } else {
@@ -363,7 +385,7 @@ Status BuildArguments(const Graph& graph,
       const int core = (*arg_cores)[parameters[i]];
       xla::ScopedShardingAssignment assign_sharding(
           builder, core == -1 ? tensorflow::gtl::optional<xla::OpSharding>()
-                              : xla::ShardingBuilder::AssignDevice(core));
+                              : xla::sharding_builder::AssignDevice(core));
       arg_handles[i] =
           builder->Parameter(i, (*input_shapes)[i], strings::StrCat("arg", i));
     }
@@ -374,21 +396,18 @@ Status BuildArguments(const Graph& graph,
   for (std::vector<int>::size_type i = 0; i < parameters.size(); ++i) {
     const XlaCompiler::Argument& arg = args[parameters[i]];
     VLOG(2) << "  XLA arg " << i
-            << " shape: " << xla::ShapeUtil::HumanString((*input_shapes)[i])
+            << " shape: " << xla::ShapeUtil::HumanString(arg_shapes[i])
             << " name: " << arg.name << " TF arg " << parameters[i];
     XlaExpression& arg_expression = (*arg_expressions)[parameters[i]];
     switch (arg.kind) {
       case XlaCompiler::Argument::kResource: {
         TF_RET_CHECK(arg.initialized);
         XlaResource* resource = arg_expression.resource();
-        TF_RETURN_IF_ERROR(resource->SetFromPack(arg.tensor_array_gradients,
-                                                 arg_handles[i], builder));
+        TF_RETURN_IF_ERROR(
+            resource->SetFromPack(arg.tensor_array_gradients, arg_handles[i],
+                                  /*reset_initial_values=*/true, builder));
         VLOG(2) << "    resource: num_gradients: "
                 << arg.tensor_array_gradients.size();
-        resource->initial_value = resource->value;
-        for (const auto& gradient : resource->tensor_array_gradients) {
-          gradient.second->initial_value = gradient.second->value;
-        }
         break;
       }
       case XlaCompiler::Argument::kParameter:
@@ -439,43 +458,43 @@ Status BuildComputation(
   std::vector<const XlaResource*> arg_resources;
   arg_resources.reserve(resources.size());
   for (const auto& resource : resources) {
-    if (resource->arg_num >= 0) {
+    if (resource->arg_num() >= 0) {
       arg_resources.push_back(resource.get());
     }
   }
   std::sort(arg_resources.begin(), arg_resources.end(),
             [](const XlaResource* a, const XlaResource* b) {
-              return a->arg_num < b->arg_num;
+              return a->arg_num() < b->arg_num();
             });
 
   for (const XlaResource* resource : arg_resources) {
-    const XlaCompiler::Argument& arg = args[resource->arg_num];
-    const int core = arg_cores[resource->arg_num];
-    DCHECK_LT(resource->arg_num, arg_cores.size());
+    const XlaCompiler::Argument& arg = args[resource->arg_num()];
+    const int core = arg_cores[resource->arg_num()];
+    DCHECK_LT(resource->arg_num(), arg_cores.size());
     bool modified =
-        resource->value.handle() != resource->initial_value.handle();
+        resource->value().handle() != resource->initial_value().handle();
     // TensorArray gradients were modified if their values changed or there are
     // any newly created gradients.
-    for (const auto& grad : resource->tensor_array_gradients) {
-      modified =
-          modified ||
-          grad.second->value.handle() != grad.second->initial_value.handle() ||
-          arg.tensor_array_gradients.count(grad.first) == 0;
+    for (const auto& grad : resource->tensor_array_gradients()) {
+      modified = modified ||
+                 grad.second->value().handle() !=
+                     grad.second->initial_value().handle() ||
+                 arg.tensor_array_gradients.count(grad.first) == 0;
     }
     if (return_updated_values_for_all_resources || modified) {
       resource_updates->emplace_back();
       XlaCompiler::ResourceUpdate& update = resource_updates->back();
-      update.input_index = resource->arg_num;
-      update.type = resource->type;
+      update.input_index = resource->arg_num();
+      update.type = resource->type();
       update.modified = modified;
-      for (const auto& grad : resource->tensor_array_gradients) {
+      for (const auto& grad : resource->tensor_array_gradients()) {
         update.tensor_array_gradients_accessed.insert(grad.first);
       }
 
       // Request that the value be returned on a specific core.
       xla::ScopedShardingAssignment assign_sharding(
           builder, core == -1 ? tensorflow::gtl::optional<xla::OpSharding>()
-                              : xla::ShardingBuilder::AssignDevice(core));
+                              : xla::sharding_builder::AssignDevice(core));
 
       xla::ComputationDataHandle handle;
       TF_RETURN_IF_ERROR(resource->Pack(&handle, builder));
@@ -502,18 +521,6 @@ Status BuildComputation(
   return Status::OK();
 }
 
-void AssignMajorToMinorLayout(xla::Shape* shape) {
-  if (xla::ShapeUtil::IsTuple(*shape)) {
-    for (xla::Shape& elem_shape : *shape->mutable_tuple_shapes()) {
-      AssignMajorToMinorLayout(&elem_shape);
-    }
-  } else {
-    auto& minor_to_major = *shape->mutable_layout()->mutable_minor_to_major();
-    minor_to_major.Resize(xla::ShapeUtil::Rank(*shape), 0);
-    std::iota(minor_to_major.rbegin(), minor_to_major.rend(), 0);
-  }
-}
-
 }  // namespace
 
 Status XlaCompiler::CompileGraph(const XlaCompiler::CompileOptions& options,
@@ -543,13 +550,12 @@ Status XlaCompiler::CompileGraph(const XlaCompiler::CompileOptions& options,
                      options.resolve_compile_time_constants);
   core::ScopedUnref context_unref(context);
 
-  result->tuple_arg = options.use_tuple_arg;
-
   std::vector<XlaExpression> arg_expressions;
   std::vector<int> arg_cores;
-  TF_RETURN_IF_ERROR(BuildArguments(
-      *graph, args, options.use_tuple_arg, &builder, context, &arg_cores,
-      &arg_expressions, &result->input_mapping, &result->xla_input_shapes));
+  TF_RETURN_IF_ERROR(
+      BuildArguments(*graph, args, options.use_tuple_arg, &builder, context,
+                     &arg_cores, &arg_expressions, &result->input_mapping,
+                     &result->xla_input_shapes, options.is_entry_computation));
   context->set_args(std::move(arg_expressions));
 
   TF_RETURN_IF_ERROR(ExecuteGraph(context, std::move(graph), device_,
@@ -564,11 +570,6 @@ Status XlaCompiler::CompileGraph(const XlaCompiler::CompileOptions& options,
       result->computation.get(), &num_computation_outputs,
       &num_nonconst_outputs, &result->resource_updates));
 
-  result->requires_runtime_context = context->has_context_parameter();
-
-  // Tuple arguments and runtime context parameters are incompatible.
-  TF_RET_CHECK(!(options.use_tuple_arg && result->requires_runtime_context));
-
   VLOG(2) << "Outputs: total: " << context->retvals().size()
           << " nonconstant: " << num_nonconst_outputs;
   result->outputs.resize(context->retvals().size());
@@ -596,7 +597,7 @@ Status XlaCompiler::CompileGraph(const XlaCompiler::CompileOptions& options,
           << xla::ShapeUtil::HumanString(result->xla_output_shape);
 
   // Tensorflow expects a major-to-minor order of results.
-  AssignMajorToMinorLayout(&result->xla_output_shape);
+  xla::LayoutUtil::SetToDefaultLayout(&result->xla_output_shape);
 
   // Converts the output shapes to TensorShapes.
   int computation_output = 0;
diff --git a/tensorflow/compiler/tf2xla/xla_compiler.h b/tensorflow/compiler/tf2xla/xla_compiler.h
index ac7d4cfb127d1de8c92f3a855191c45af77888ad..6a46e54f61cb4dbb2a2c1916696655a4e3d85fff 100644
--- a/tensorflow/compiler/tf2xla/xla_compiler.h
+++ b/tensorflow/compiler/tf2xla/xla_compiler.h
@@ -54,8 +54,6 @@ namespace tensorflow {
 //   +---------------------+-----------------------------------------+
 // Within each block, the arguments are arranged by the _Arg index from which
 // they were derived.
-// If `Options::requires_runtime_context` is true, then an additional runtime
-// context argument is passed as a final argument.
 //
 // The run-time outputs of the XLA computation are arranged in the following
 // order:
@@ -154,6 +152,10 @@ class XlaCompiler {
     // as Tensors at compile-time, rather than as run-time outputs of the
     // computation.
     bool resolve_compile_time_constants = true;
+
+    // True when compiling the entry computation, false for subcomputations
+    // (while, call, etc.)
+    bool is_entry_computation = true;
   };
 
   struct OutputDescription {
@@ -191,16 +193,9 @@ class XlaCompiler {
     // original arguments, and are not necessarily in the same order.)
     std::vector<int> input_mapping;
 
-    // Does the computation require the local runtime context to be passed as
-    // the last argument?
-    bool requires_runtime_context = false;
-
     // Input shapes of the computation.
     std::vector<xla::Shape> xla_input_shapes;
 
-    // Should the arguments be packed into a single tuple?
-    bool tuple_arg;
-
     // Output shape in XLA format. The output shape is always a tuple.
     xla::Shape xla_output_shape;
 
@@ -232,8 +227,7 @@ class XlaCompiler {
     int graph_def_version = TF_GRAPH_DEF_VERSION;
 
     // If 'allow_cpu_custom_calls' is true, kernels may make use of CustomCall()
-    // for CPU; additionally, an optional XlaLocalRuntimeContext* may be passed
-    // to the computation.
+    // for CPU.
     bool allow_cpu_custom_calls = false;
 
     // If not nullptr, populate_resource_manager is called with the
diff --git a/tensorflow/compiler/tf2xla/xla_compiler_test.cc b/tensorflow/compiler/tf2xla/xla_compiler_test.cc
index 93aae8485d157cd4afbf804d695d5c0ab8d7946c..7ebe4b75bc1e33e506624314b11163e36a2477de 100644
--- a/tensorflow/compiler/tf2xla/xla_compiler_test.cc
+++ b/tensorflow/compiler/tf2xla/xla_compiler_test.cc
@@ -227,6 +227,42 @@ TEST_F(XlaCompilerTest, Simple) {
   xla::LiteralTestUtil::ExpectEqual(*expected_literal, *actual_literal);
 }
 
+TEST_F(XlaCompilerTest, HasSaneErrorOnNonCompileTimeConstantInputToReshape) {
+  // Builds a graph that adds reshapes a tensor, but with the shape not
+  // statically known.
+  Scope scope = Scope::NewRootScope().ExitOnError();
+  auto a = ops::_Arg(scope.WithOpName("A"), DT_INT32, 0);
+  auto b = ops::_Arg(scope.WithOpName("B"), DT_INT32, 1);
+  auto c = ops::Reshape(scope.WithOpName("C"), a, b);
+  auto d = ops::_Retval(scope.WithOpName("D"), c, 0);
+  std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
+  TF_ASSERT_OK(scope.ToGraph(graph.get()));
+
+  // Builds a description of the arguments.
+  std::vector<XlaCompiler::Argument> args(2);
+  args[0].kind = XlaCompiler::Argument::kParameter;
+  args[0].type = DT_INT32;
+  args[0].shape = xla::ShapeUtil::MakeShape(xla::S32, {2});
+  args[1].kind = XlaCompiler::Argument::kParameter;
+  args[1].type = DT_INT32;
+  args[1].shape = xla::ShapeUtil::MakeShape(xla::S32, {2});
+
+  // Compiles the graph.
+  XlaCompiler compiler(DefaultOptions());
+
+  XlaCompiler::CompilationResult result;
+  Status status =
+      compiler.CompileGraph(XlaCompiler::CompileOptions(), "reshape",
+                            std::move(graph), args, &result);
+  EXPECT_FALSE(status.ok());
+  EXPECT_TRUE(
+      StringPiece(status.error_message()).contains("depends on a parameter"))
+      << status.error_message();
+  EXPECT_TRUE(
+      StringPiece(status.error_message()).contains("[[Node: C = Reshape"))
+      << status.error_message();
+}
+
 // Tests handling of compile-time constant outputs.
 TEST_F(XlaCompilerTest, ConstantOutputs) {
   // Builds a graph with one compile-time constant output and one data-dependent
diff --git a/tensorflow/compiler/tf2xla/xla_context.cc b/tensorflow/compiler/tf2xla/xla_context.cc
index 651bafd6c5d946adfedd63ebbe93e4ea016f0b37..e8d17e2e0a1ba01f16d4bbbd2895b112f4dd1989 100644
--- a/tensorflow/compiler/tf2xla/xla_context.cc
+++ b/tensorflow/compiler/tf2xla/xla_context.cc
@@ -70,24 +70,6 @@ XlaContext::XlaContext(XlaCompiler* compiler, xla::ComputationBuilder* builder,
       allow_cpu_custom_calls_(allow_cpu_custom_calls),
       resolve_compile_time_constants_(resolve_compile_time_constants) {}
 
-const xla::ComputationDataHandle&
-XlaContext::GetOrCreateRuntimeContextParameter() {
-  CHECK(allow_cpu_custom_calls_);
-  if (has_context_parameter_) return context_parameter_;
-  has_context_parameter_ = true;
-
-  // Allocate the next available parameter for the context parameter.
-  int num_parameters = 0;
-  for (const XlaExpression& arg : args_) {
-    if (!arg.has_constant_value()) {
-      ++num_parameters;
-    }
-  }
-  context_parameter_ = builder_->Parameter(
-      num_parameters, xla::ShapeUtil::MakeOpaqueShape(), "tf_context");
-  return context_parameter_;
-}
-
 string XlaContext::DebugString() { return "TLA JIT context"; }
 
 // This is called by the Retval Op to associate a computed value
@@ -125,14 +107,9 @@ Status XlaContext::CreateResource(XlaResource::Kind kind, int arg_num,
                                   string name, DataType type,
                                   const xla::ComputationDataHandle& handle,
                                   XlaResource** resource) {
-  resources_.emplace_back(new XlaResource);
+  resources_.emplace_back(
+      new XlaResource(kind, arg_num, std::move(name), type, handle));
   *resource = resources_.back().get();
-  XlaResource& r = **resource;
-  r.kind = kind;
-  r.arg_num = arg_num;
-  r.name = std::move(name);
-  r.type = type;
-  r.initial_value = r.value = handle;
   return Status::OK();
 }
 
@@ -178,6 +155,20 @@ const xla::Computation* XlaContext::GetOrCreateAdd(const DataType type) {
   });
 }
 
+const xla::Computation* XlaContext::GetOrCreateMul(const DataType type) {
+  return LookupOrCreate(type, &mul_func_, [this, type] {
+    const string type_string = DataTypeString(type);
+    VLOG(1) << "Building Mul() for " << type_string;
+    xla::ComputationBuilder b(builder()->client(), "mul<" + type_string + ">");
+    xla::PrimitiveType xla_type;
+    TF_CHECK_OK(DataTypeToPrimitiveType(type, &xla_type));
+    auto x = b.Parameter(0, xla::ShapeUtil::MakeShape(xla_type, {}), "x");
+    auto y = b.Parameter(1, xla::ShapeUtil::MakeShape(xla_type, {}), "y");
+    b.Mul(x, y);
+    return b.Build().ConsumeValueOrDie();
+  });
+}
+
 const xla::Computation* XlaContext::LookupOrCreate(
     DataType type, ComputationMap* out,
     const std::function<xla::Computation()>& create) {
diff --git a/tensorflow/compiler/tf2xla/xla_context.h b/tensorflow/compiler/tf2xla/xla_context.h
index de8aafa3628e6eebdabbc508cd95a2ac86e3472f..1a7dafe8cdb56cc9b8fcd3ba6e262c21c2a07d90 100644
--- a/tensorflow/compiler/tf2xla/xla_context.h
+++ b/tensorflow/compiler/tf2xla/xla_context.h
@@ -56,15 +56,10 @@ class XlaContext : public ResourceBase {
   xla::ComputationBuilder* builder();
 
   bool allow_cpu_custom_calls() const { return allow_cpu_custom_calls_; }
-  bool has_context_parameter() const { return has_context_parameter_; }
 
   const std::vector<XlaExpression>& args() const { return args_; }
   void set_args(std::vector<XlaExpression> args);
 
-  // Get the runtime context parameter, adding one if it does not already exist.
-  // Dies if not compiling a local executable.
-  const xla::ComputationDataHandle& GetOrCreateRuntimeContextParameter();
-
   const std::vector<XlaExpression>& retvals() { return retvals_; }
 
   // This is called by the Retval Op to associate a computed value
@@ -102,6 +97,11 @@ class XlaContext : public ResourceBase {
   // separate specialization of the computation for each DataType.
   const xla::Computation* GetOrCreateAdd(const DataType type);
 
+  // Get an XLA lambda to compute Mul. This is cached in the
+  // XlaContext since it may be used by multiple Ops. There is a
+  // separate specialization of the computation for each DataType.
+  const xla::Computation* GetOrCreateMul(const DataType type);
+
   // The name of the XlaContext resource during symbolic graph execution.
   static const char kXlaContextResourceName[];
 
@@ -116,16 +116,9 @@ class XlaContext : public ResourceBase {
   const bool allow_cpu_custom_calls_;
 
   // If true, constant return values are returned as Tensors instead of
-  // run-time computation outptus.
+  // run-time computation outputs.
   const bool resolve_compile_time_constants_;
 
-  // When 'has_context_parameter_' is true, this is the computation handle
-  // for an additional final parameter to the computation, through which will be
-  // passed a XlaLocalRuntimeContext* at runtime. Created on demand by
-  // GetOrCreateRuntimeContextParameter().
-  bool has_context_parameter_ = false;
-  xla::ComputationDataHandle context_parameter_;
-
   // Arguments to the Tensorflow graph, indexed by _Arg index.
   // Includes both compile-time constant arguments and runtime parameters.
   std::vector<XlaExpression> args_;
@@ -155,6 +148,9 @@ class XlaContext : public ResourceBase {
   // Cached computation to compute Sum of two elements, specialized by type.
   ComputationMap add_func_;
 
+  // Cached computation to compute Mul of two elements, specialized by type.
+  ComputationMap mul_func_;
+
   // Cached computation to compute Sigmoid of an element, specialized by type.
   ComputationMap sigmoid_func_;
 
diff --git a/tensorflow/compiler/tf2xla/xla_gpu_backend.cc b/tensorflow/compiler/tf2xla/xla_gpu_backend.cc
index d504613d232c779e47a506657d2825d052e726dc..8ca757e72355d890c13b8b448d35c327d3986696 100644
--- a/tensorflow/compiler/tf2xla/xla_gpu_backend.cc
+++ b/tensorflow/compiler/tf2xla/xla_gpu_backend.cc
@@ -21,8 +21,6 @@ namespace tensorflow {
 bool GpuOpFilter(KernelDef* kdef) {
   // TODO(b/31361304): The GPU backend does not parallelize PRNG ops, leading to
   // slow code.
-  // TODO(b/34969189) The implementation of TruncatedNormal generates illegal
-  // code on GPU.
   if (kdef->op() == "RandomStandardNormal" || kdef->op() == "RandomUniform" ||
       kdef->op() == "RandomUniformInt" || kdef->op() == "TruncatedNormal") {
     return false;
diff --git a/tensorflow/compiler/tf2xla/xla_helpers.cc b/tensorflow/compiler/tf2xla/xla_helpers.cc
index 9c3e15d2fa4c84af94d137f2e03107bcc980f4cd..77e24162676045b88dc8b62d2c6a4ecc1e738e96 100644
--- a/tensorflow/compiler/tf2xla/xla_helpers.cc
+++ b/tensorflow/compiler/tf2xla/xla_helpers.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-// This file defines helper routines for Tla JIT compilation.
+// This file defines helper routines for XLA compilation.
 
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/lib/util.h"
@@ -121,6 +121,8 @@ xla::ComputationDataHandle XlaHelpers::One(xla::ComputationBuilder* b,
 xla::ComputationDataHandle XlaHelpers::Epsilon(xla::ComputationBuilder* b,
                                                DataType data_type) {
   switch (data_type) {
+    case DT_BFLOAT16:
+      return b->ConstantR0<bfloat16>(bfloat16::epsilon());
     case DT_FLOAT:
       return b->ConstantR0<float>(std::numeric_limits<float>::epsilon());
     case DT_DOUBLE:
@@ -138,40 +140,44 @@ xla::ComputationDataHandle XlaHelpers::IntegerLiteral(
   TF_CHECK_OK(DataTypeToPrimitiveType(data_type, &type));
   switch (type) {
     case xla::U8:
-      literal = *xla::Literal::CreateR0<uint8>(value);
+      literal = std::move(*xla::Literal::CreateR0<uint8>(value));
       break;
     case xla::U32:
-      literal = *xla::Literal::CreateR0<uint32>(value);
+      literal = std::move(*xla::Literal::CreateR0<uint32>(value));
       break;
     case xla::U64:
-      literal = *xla::Literal::CreateR0<uint64>(value);
+      literal = std::move(*xla::Literal::CreateR0<uint64>(value));
       break;
     case xla::S8:
-      literal = *xla::Literal::CreateR0<int8>(value);
+      literal = std::move(*xla::Literal::CreateR0<int8>(value));
       break;
     case xla::S32:
-      literal = *xla::Literal::CreateR0<int32>(value);
+      literal = std::move(*xla::Literal::CreateR0<int32>(value));
       break;
     case xla::S64:
-      literal = *xla::Literal::CreateR0<int64>(value);
+      literal = std::move(*xla::Literal::CreateR0<int64>(value));
       break;
     case xla::F32:
-      literal = *xla::Literal::CreateR0<float>(value);
+      literal = std::move(*xla::Literal::CreateR0<float>(value));
       break;
     case xla::F64:
-      literal = *xla::Literal::CreateR0<double>(value);
+      literal = std::move(*xla::Literal::CreateR0<double>(value));
       break;
     case xla::C64:
-      literal = *xla::Literal::CreateR0<complex64>(value);
+      literal = std::move(*xla::Literal::CreateR0<complex64>(value));
       break;
     case xla::PRED:
       LOG(FATAL) << "pred element type is not integral";
     case xla::S16:
     case xla::U16:
       LOG(FATAL) << "u16/s16 literals not yet implemented";
+    case xla::BF16:
+      literal = std::move(
+          *xla::Literal::CreateR0<bfloat16>(static_cast<bfloat16>(value)));
+      break;
     case xla::F16:
-      literal =
-          *xla::Literal::CreateR0<xla::half>(static_cast<xla::half>(value));
+      literal = std::move(
+          *xla::Literal::CreateR0<xla::half>(static_cast<xla::half>(value)));
       break;
     case xla::TUPLE:
       LOG(FATAL) << "tuple element type is not integral";
@@ -207,8 +213,8 @@ xla::ComputationDataHandle XlaHelpers::FloatLiteral(xla::ComputationBuilder* b,
         "elements.");
   }
 
-  *output = input;
-  output->mutable_shape()->Swap(&shape);
+  *output = input.Clone();
+  output->mutable_shape_do_not_use()->Swap(&shape);
   return Status::OK();
 }
 
diff --git a/tensorflow/compiler/tf2xla/xla_jit_compiled_cpu_function.cc b/tensorflow/compiler/tf2xla/xla_jit_compiled_cpu_function.cc
index 1dd454ea8d57e21526e5bcde0c8efc5514983b93..584417bc72c8f6645c05912e857b031cfb394e54 100644
--- a/tensorflow/compiler/tf2xla/xla_jit_compiled_cpu_function.cc
+++ b/tensorflow/compiler/tf2xla/xla_jit_compiled_cpu_function.cc
@@ -37,27 +37,14 @@ namespace {
 
 // Returns a vector of positional argument buffer sizes.
 xla::StatusOr<std::vector<intptr_t>> ComputeArgSizes(
-    const xla::ProgramShape& program_shape, bool requires_runtime_context) {
+    const xla::ProgramShape& program_shape) {
   std::vector<intptr_t> arg_sizes;
   const size_t num_args = program_shape.parameters_size();
   arg_sizes.reserve(num_args);
   for (int i = 0; i < num_args; ++i) {
     const xla::Shape& arg_shape = program_shape.parameters(i);
-    if (i == num_args - 1 && requires_runtime_context) {
-      // If the compiled function needs an XlaLocalRuntimeContext* arg, it's
-      // always last, and must be represented as an opaque type.
-      const xla::PrimitiveType type = arg_shape.element_type();
-      if (type != xla::OPAQUE) {
-        return errors::InvalidArgument(
-            "expected final context arg to be opaque, but got type: ",
-            xla::PrimitiveType_Name(type), ", from program shape: ",
-            xla::ShapeUtil::HumanString(program_shape));
-      }
-      arg_sizes.push_back(-1);
-    } else {
-      constexpr size_t kPointerSize = sizeof(void*);
-      arg_sizes.push_back(xla::ShapeUtil::ByteSizeOf(arg_shape, kPointerSize));
-    }
+    constexpr size_t kPointerSize = sizeof(void*);
+    arg_sizes.push_back(xla::ShapeUtil::ByteSizeOf(arg_shape, kPointerSize));
   }
   return std::move(arg_sizes);
 }
@@ -90,21 +77,6 @@ xla::StatusOr<size_t> ComputeResultIndex(
   return result_slice.index();
 }
 
-// Adapt ComputeFunctionType, which includes a final profile_counters arg, to
-// RawFunction, which doesn't include that final arg.
-//
-// TODO(toddw): Change RawFunction and AOT to also pass the final
-// profile_counters arg, and remove this adapter.
-XlaCompiledCpuFunction::RawFunction RawFunctionAdapter(
-    xla::cpu::CpuExecutable::ComputeFunctionType compute_function) {
-  return [compute_function](void* result,
-                            const xla::ExecutableRunOptions* run_options,
-                            const void** args, void** temps) {
-    return compute_function(result, run_options, args, temps,
-                            /*profile_counters=*/nullptr);
-  };
-}
-
 // Collect names from `entries`, where T is one of tf2xla::{Feed,Fetch}. We hold
 // the actual strings in nonempty_names, and hold arrays of pointers in
 // name_ptrs, terminated by a nullptr entry.
@@ -144,9 +116,8 @@ XlaJitCompiledCpuFunction::Compile(
   TF_ASSIGN_OR_RETURN(xla::LocalClient * client,
                       xla::ClientLibrary::GetOrCreateLocalClient());
   xla::Computation computation;
-  bool requires_runtime_context;
-  TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToXla(
-      graph_def, config, client, &computation, &requires_runtime_context));
+  TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToXla(graph_def, config, client,
+                                                      &computation));
 
   // Get and verify the program shape.
   TF_ASSIGN_OR_RETURN(std::unique_ptr<xla::ProgramShape> program_shape,
@@ -177,14 +148,13 @@ XlaJitCompiledCpuFunction::Compile(
   const xla::cpu::CpuExecutable* cpu_executable =
       static_cast<xla::cpu::CpuExecutable*>(executable->executable());
   XlaCompiledCpuFunction::RawFunction raw_function =
-      RawFunctionAdapter(cpu_executable->compute_function());
+      cpu_executable->compute_function();
   const xla::BufferAssignment& buffer_assignment =
       cpu_executable->buffer_assignment();
 
   // Compute buffer sizes and the result index, needed to run the raw function.
-  TF_ASSIGN_OR_RETURN(
-      std::vector<intptr_t> arg_sizes,
-      ComputeArgSizes(*program_shape, requires_runtime_context));
+  TF_ASSIGN_OR_RETURN(std::vector<intptr_t> arg_sizes,
+                      ComputeArgSizes(*program_shape));
   TF_ASSIGN_OR_RETURN(std::vector<intptr_t> temp_sizes,
                       ComputeTempSizes(buffer_assignment));
   TF_ASSIGN_OR_RETURN(size_t result_index,
@@ -203,7 +173,6 @@ XlaJitCompiledCpuFunction::Compile(
   jit->static_data_.temp_sizes = jit->temp_sizes_.data();
   jit->static_data_.num_temps = jit->temp_sizes_.size();
   jit->static_data_.result_index = result_index;
-  jit->static_data_.requires_runtime_context = requires_runtime_context;
   // Optional metadata is collected and set below.
   CollectNames(config.feed(), &jit->nonempty_arg_names_, &jit->arg_names_);
   CollectNames(config.fetch(), &jit->nonempty_result_names_,
@@ -211,6 +180,14 @@ XlaJitCompiledCpuFunction::Compile(
   jit->static_data_.arg_names = jit->arg_names_.data();
   jit->static_data_.result_names = jit->result_names_.data();
   jit->static_data_.program_shape = jit->program_shape_.get();
+
+  if (cpu_executable->hlo_profiling_enabled()) {
+    jit->static_data_.hlo_profile_printer =
+        &cpu_executable->hlo_profile_printer();
+    jit->static_data_.profile_counters_size =
+        cpu_executable->hlo_profile_printer().profile_counters_size();
+  }
+
   return std::move(jit_unique_ptr);
 }
 
diff --git a/tensorflow/compiler/tf2xla/xla_local_runtime_context.h b/tensorflow/compiler/tf2xla/xla_local_runtime_context.h
deleted file mode 100644
index dca420d6ee3fec45f88ac3b450ab0cb4fb83d38a..0000000000000000000000000000000000000000
--- a/tensorflow/compiler/tf2xla/xla_local_runtime_context.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_COMPILER_TF2XLA_XLA_LOCAL_RUNTIME_CONTEXT_H_
-#define TENSORFLOW_COMPILER_TF2XLA_XLA_LOCAL_RUNTIME_CONTEXT_H_
-
-#include "tensorflow/core/platform/macros.h"
-#include "tensorflow/core/platform/types.h"
-
-// Forward-declare the ThreadPoolDevice so that it can be ignored unless it's
-// actually used.  E.g. some ahead-of-time compiled computations don't need a
-// thread pool.
-namespace Eigen {
-struct ThreadPoolDevice;
-}
-
-namespace tensorflow {
-
-// An instance of this class is passed to each call from tensorflow into a
-// compiled XLA computation. See xla_launch_ops.cc.
-struct XlaLocalRuntimeContext {
- public:
-  XlaLocalRuntimeContext() {}
-
-  // Kernels implemented using custom call ops set this if they encounter an
-  // error. The error is checked after the entire XLA computation is
-  // complete.
-  //
-  // error+error_msg are used instead of Status to reduce the binary size
-  // overhead for ahead-of-time compiled binaries.
-  bool error = false;
-  string error_msg;
-
-  // Kernels that need a thread pool can get it from here.
-  const Eigen::ThreadPoolDevice* thread_pool = nullptr;
-
- private:
-  TF_DISALLOW_COPY_AND_ASSIGN(XlaLocalRuntimeContext);
-};
-
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_COMPILER_TF2XLA_XLA_LOCAL_RUNTIME_CONTEXT_H_
diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.cc b/tensorflow/compiler/tf2xla/xla_op_kernel.cc
index 2b4cc9ba2d62b0e559e1456e6bfe6ab1e094e1df..c0c4251eabcd06d7c84ae76f349d657fa9f6d641 100644
--- a/tensorflow/compiler/tf2xla/xla_op_kernel.cc
+++ b/tensorflow/compiler/tf2xla/xla_op_kernel.cc
@@ -118,13 +118,36 @@ Status XlaOpKernelContext::ConstantInputReshaped(
   std::iota(layout_indices.rbegin(), layout_indices.rend(), 0);
   xla::Layout layout = xla::LayoutUtil::MakeLayout(layout_indices);
 
+  xla::StatusOr<bool> is_constant = builder()->IsConstant(handle);
+  if (!is_constant.ok()) {
+    Status status = is_constant.status();
+    errors::AppendToMessage(&status, "while evaluating input ", index, " of ",
+                            context_->op_kernel().type_string(),
+                            " operator as a compile-time constant.");
+    return status;
+  }
+
+  if (!is_constant.ValueOrDie()) {
+    return errors::InvalidArgument(
+        "Input ", index, " to ", context_->op_kernel().type_string(),
+        " operator must be a compile-time constant.\n"
+        "\n"
+        "XLA compilation requires that operator arguments that represent "
+        "shapes or dimensions be evaluated to concrete values at compile time. "
+        "This error means that a shape or dimension argument could not be "
+        "evaluated at compile time, usually because the value of the argument "
+        "depends on a parameter to the computation, on a variable, or on a "
+        "stateful operation such as a random number generator.");
+  }
+
   // Ask the XLA compiler to evaluate the data handle to a literal.
   xla::StatusOr<std::unique_ptr<xla::Literal>> computed =
       builder()->ComputeConstant(handle, &layout);
   if (!computed.ok()) {
-    return errors::InvalidArgument(
-        "Error evaluating ", context_->op_kernel().name(), " input ", index,
-        ": ", computed.status().error_message());
+    return errors::Internal("Error evaluating ", context_->op_kernel().name(),
+                            " input ", index,
+                            "as a compile-time constant.\nError: ",
+                            computed.status().error_message());
   }
   *constant_literal = std::move(*computed.ValueOrDie());
 
@@ -206,15 +229,15 @@ Status XlaOpKernelContext::ConstantInputAsInt64Literal(int index,
   xla::Literal literal;
   TF_RETURN_IF_ERROR(ConstantInput(index, &literal));
   switch (literal.shape().element_type()) {
-    case xla::S32:
-      out->Clear();
-      *out->mutable_shape() = literal.shape();
-      out->mutable_shape()->set_element_type(xla::S64);
-      for (int32 x : literal.s32s()) {
-        out->add_s64s(x);
+    case xla::S32: {
+      *out = xla::Literal(
+          xla::ShapeUtil::ChangeElementType(literal.shape(), xla::S64));
+      auto src_data = literal.data<int32>();
+      for (int64 i = 0; i < src_data.size(); ++i) {
+        out->data<int64>()[i] = src_data[i];
       }
       return Status::OK();
-
+    }
     case xla::S64:
       *out = std::move(literal);
       return Status::OK();
@@ -268,12 +291,12 @@ Status XlaOpKernelContext::ReadVariableInput(
   const XlaExpression* expression = CastExpressionFromTensor(tensor);
   XlaResource* variable = expression->resource();
   TF_RET_CHECK(variable != nullptr);
-  TF_RET_CHECK(variable->kind == XlaResource::kVariable);
-  if (variable->value.handle() == 0) {
+  TF_RET_CHECK(variable->kind() == XlaResource::kVariable);
+  if (!variable->initialized()) {
     return errors::InvalidArgument("Read of uninitialized variable ",
-                                   variable->name);
+                                   variable->name());
   }
-  *value = variable->value;
+  *value = variable->value();
   return Status::OK();
 }
 
@@ -283,13 +306,13 @@ Status XlaOpKernelContext::GetVariableTypeAndShape(int index, DataType* type,
   const XlaExpression* expression = CastExpressionFromTensor(tensor);
   XlaResource* variable = expression->resource();
   TF_RET_CHECK(variable != nullptr);
-  TF_RET_CHECK(variable->kind == XlaResource::kVariable);
-  if (variable->value.handle() == 0) {
+  TF_RET_CHECK(variable->kind() == XlaResource::kVariable);
+  if (!variable->initialized()) {
     return errors::InvalidArgument("Read of uninitialized variable ",
-                                   variable->name);
+                                   variable->name());
   }
-  *type = variable->type;
-  auto shape_or_status = builder()->GetShape(variable->value);
+  *type = variable->type();
+  auto shape_or_status = builder()->GetShape(variable->value());
   if (!shape_or_status.ok()) {
     return shape_or_status.status();
   }
@@ -381,16 +404,8 @@ Status XlaOpKernelContext::AssignVariable(
       CastExpressionFromTensor(context_->input(input_index));
   XlaResource* variable = expression->resource();
   TF_RET_CHECK(variable != nullptr);
-  TF_RET_CHECK(variable->kind == XlaResource::kVariable);
-  if (!((variable->type == DT_INVALID && type != DT_INVALID) ||
-        (variable->type == type))) {
-    return errors::InvalidArgument(
-        "Types of variables cannot change after initialization: old type was ",
-        DataTypeString(variable->type), ", new type is ", DataTypeString(type));
-  }
-  variable->type = type;
-  variable->value = handle;
-  return Status::OK();
+  TF_RET_CHECK(variable->kind() == XlaResource::kVariable);
+  return variable->SetValue(type, handle);
 }
 
 XlaCompiler* XlaOpKernelContext::compiler() const {
@@ -417,6 +432,11 @@ const xla::Computation* XlaOpKernelContext::GetOrCreateAdd(
   return XlaContext::Get(context_).GetOrCreateAdd(type);
 }
 
+const xla::Computation* XlaOpKernelContext::GetOrCreateMul(
+    const DataType type) {
+  return XlaContext::Get(context_).GetOrCreateMul(type);
+}
+
 XlaOpKernel::XlaOpKernel(OpKernelConstruction* context) : OpKernel(context) {}
 
 void XlaOpKernel::Compute(OpKernelContext* context) {
diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.h b/tensorflow/compiler/tf2xla/xla_op_kernel.h
index 76bcf594e6a0601763844847583c18ee26d8adf3..f1ae81a5aa9d507a3e0dd577568377385b1844e6 100644
--- a/tensorflow/compiler/tf2xla/xla_op_kernel.h
+++ b/tensorflow/compiler/tf2xla/xla_op_kernel.h
@@ -178,7 +178,7 @@ class XlaOpKernelContext {
 
   // If this kernel invocation is within a function execution,
   // call_frame() returns the call frame for the function call.
-  FunctionCallFrame* call_frame() const { return context_->call_frame(); }
+  CallFrameInterface* call_frame() const { return context_->call_frame(); }
 
   FunctionLibraryRuntime* function_library() const {
     return context_->function_library();
@@ -210,6 +210,11 @@ class XlaOpKernelContext {
   // separate specialization of the computation for each DataType.
   const xla::Computation* GetOrCreateAdd(const DataType type);
 
+  // Gets an XLA lambda to compute Mul. This is cached in the
+  // XlaContext since it may be used by multiple Ops. There is a
+  // separate specialization of the computation for each DataType.
+  const xla::Computation* GetOrCreateMul(const DataType type);
+
  private:
   OpKernelContext* const context_;
 };
diff --git a/tensorflow/compiler/tf2xla/xla_op_registry.cc b/tensorflow/compiler/tf2xla/xla_op_registry.cc
index 02318cf7fa1d4edc12507f6b4d66a8e897cbe100..0dde6a986c61bdd5b0b2e6d7a16b29ab95be98ab 100644
--- a/tensorflow/compiler/tf2xla/xla_op_registry.cc
+++ b/tensorflow/compiler/tf2xla/xla_op_registry.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/framework/device_base.h"
 #include "tensorflow/core/framework/kernel_def.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/op_def_util.h"
 #include "tensorflow/core/platform/mem.h"
 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
 
@@ -82,6 +83,11 @@ XlaOpRegistry::~XlaOpRegistry() = default;
       return false;
     }
   }
+  if (x.compile_time_constant_inputs != y.compile_time_constant_inputs) {
+    LOG(WARNING) << "Registrations of " << x.name
+                 << " have incompatible compile time constant inputs.";
+    return false;
+  }
   return true;
 }
 
@@ -155,7 +161,14 @@ void XlaOpRegistry::RegisterCompilationKernels() {
     const string& op_name = op.first;
     const std::unique_ptr<OpRegistration>& op_registration = op.second;
     const OpDef* op_def;
-    TF_CHECK_OK(op_registry->LookUpOpDef(op_name, &op_def));
+    Status lookup_status = op_registry->LookUpOpDef(op_name, &op_def);
+    if (!lookup_status.ok()) {
+      LOG(ERROR) << lookup_status.error_message();
+      XLA_LOG_LINES(
+          ERROR, "Ops registered: \n" +
+                     dynamic_cast<OpRegistry*>(op_registry)->DebugString(true));
+    }
+    TF_CHECK_OK(lookup_status);
 
     std::unordered_set<string> type_attrs;
     for (const OpDef::AttrDef& attr_def : op_def->attr()) {
@@ -187,22 +200,39 @@ void XlaOpRegistry::RegisterCompilationKernels() {
 
       // Constrain each type attribute to the intersection of:
       // a) the types supported by the backend, and
-      // b) the attribute's type constraints.
-      // TODO(phawkins): it may be necessary to also take the intersection with
-      // the set of types supported by the OpDef.
+      // b) the types allowed by the OpDef, and
+      // c) the type constraints.
       for (const string& type_attr : type_attrs) {
         KernelDef::AttrConstraint* attr_constraint = kdef->add_constraint();
         attr_constraint->set_name(type_attr);
         auto* allowed_values =
             attr_constraint->mutable_allowed_values()->mutable_list();
 
-        auto it = op_registration->type_constraints.find(type_attr);
+        const OpDef::AttrDef& op_def_attr = *FindAttr(type_attr, *op_def);
+        const auto* op_def_allowed_types =
+            op_def_attr.has_allowed_values()
+                ? &op_def_attr.allowed_values().list().type()
+                : nullptr;
+        auto constraint_it = op_registration->type_constraints.find(type_attr);
+        const std::set<DataType>* type_constraints =
+            constraint_it != op_registration->type_constraints.end()
+                ? &constraint_it->second
+                : nullptr;
         for (DataType dtype : backend.second.supported_types) {
-          if (it == op_registration->type_constraints.end() ||
-              (it != op_registration->type_constraints.end() &&
-               it->second.find(dtype) != it->second.end())) {
-            allowed_values->add_type(dtype);
+          // Filter out types that aren't allowed by the OpDef.
+          if (op_def_allowed_types != nullptr &&
+              std::find(op_def_allowed_types->begin(),
+                        op_def_allowed_types->end(),
+                        dtype) == op_def_allowed_types->end()) {
+            continue;
+          }
+          // Filter out types based on the type constraints.
+          if (type_constraints != nullptr &&
+              type_constraints->find(dtype) == type_constraints->end()) {
+            continue;
           }
+          // Passed all the filters, this type is allowed.
+          allowed_values->add_type(dtype);
         }
         if (op_registration->allow_resource_types) {
           allowed_values->add_type(DT_RESOURCE);
@@ -245,6 +275,33 @@ std::vector<const KernelDef*> XlaOpRegistry::DeviceKernels(
   return kernels;
 }
 
+/* static */ const std::unordered_set<string>*
+XlaOpRegistry::CompileTimeConstantInputs(const string& op) {
+  XlaOpRegistry& registry = Instance();
+  mutex_lock lock(registry.mutex_);
+  auto it = registry.ops_.find(op);
+  if (it == registry.ops_.end()) {
+    return nullptr;
+  }
+  return &it->second->compile_time_constant_inputs;
+}
+
+std::vector<string> XlaOpRegistry::BackendNames() {
+  std::vector<string> names;
+  XlaOpRegistry& registry = Instance();
+  mutex_lock lock(registry.mutex_);
+  for (const auto& backend_pair : registry.backends_) {
+    names.push_back(backend_pair.first);
+  }
+  return names;
+}
+
+bool XlaOpRegistry::IsBackendRegistered(const string& name) {
+  XlaOpRegistry& registry = Instance();
+  mutex_lock lock(registry.mutex_);
+  return registry.backends_.find(name) != registry.backends_.end();
+}
+
 XlaOpRegistry& XlaOpRegistry::Instance() {
   static XlaOpRegistry* r = new XlaOpRegistry;
   return *r;
@@ -303,6 +360,12 @@ XlaOpRegistrationBuilder& XlaOpRegistrationBuilder::TypeConstraint(
   return *this;
 }
 
+XlaOpRegistrationBuilder& XlaOpRegistrationBuilder::CompileTimeConstInput(
+    StringPiece input_name) {
+  registration_->compile_time_constant_inputs.insert(input_name.ToString());
+  return *this;
+}
+
 std::unique_ptr<XlaOpRegistry::OpRegistration> XlaOpRegistrationBuilder::Build(
     XlaOpRegistry::Factory factory) {
   registration_->factory = factory;
diff --git a/tensorflow/compiler/tf2xla/xla_op_registry.h b/tensorflow/compiler/tf2xla/xla_op_registry.h
index 6aee8c91cc01b4382ef867fa8e438eede008ac73..ff7453194af3a85bded86a5ce298f8779422dccb 100644
--- a/tensorflow/compiler/tf2xla/xla_op_registry.h
+++ b/tensorflow/compiler/tf2xla/xla_op_registry.h
@@ -45,11 +45,11 @@ extern const char* const DEVICE_GPU_XLA_JIT;  // "GPU_XLA_JIT"
 extern const char* const DEVICE_XLA_CPU;
 extern const char* const DEVICE_XLA_GPU;
 
-constexpr std::array<DataType, 3> kFloatTypes = {
-    {DT_HALF, DT_FLOAT, DT_DOUBLE}};
-constexpr std::array<DataType, 8> kNumericTypes = {
+constexpr std::array<DataType, 4> kFloatTypes = {
+    {DT_HALF, DT_FLOAT, DT_DOUBLE, DT_BFLOAT16}};
+constexpr std::array<DataType, 9> kNumericTypes = {
     {DT_UINT32, DT_UINT64, DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE,
-     DT_COMPLEX64}};
+     DT_COMPLEX64, DT_BFLOAT16}};
 
 constexpr std::array<DataType, 8> kCpuAllTypes = {
     {DT_UINT32, DT_UINT64, DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE,
@@ -97,6 +97,12 @@ class XlaOpRegistry {
                               gtl::ArraySlice<DataType> supported_types,
                               BackendOpFilter op_filter);
 
+  // Returns the names of the registered backends.
+  static std::vector<string> BackendNames();
+
+  // Returns true iff a backend with the given name is registered.
+  static bool IsBackendRegistered(const string& name);
+
   // Registers `device_name` for XLA compilation, using information from
   // `registration`.
   static void RegisterCompilationDevice(const string& device_name,
@@ -116,12 +122,17 @@ class XlaOpRegistry {
   static void RegisterCompilationKernels();
 
   // Returns KernelDefs for compilation ops registered on
-  // 'compilation_device_name'.
-  // Does not include kernels registered as CompilationOnly.
+  // 'compilation_device_name'.  Does not include kernels registered as
+  // CompilationOnly, iff include_compilation_only_kernels=false.
   static std::vector<const KernelDef*> DeviceKernels(
       const string& compilation_device_name,
       bool include_compilation_only_kernels);
 
+  // Returns the set of compile-time constant inputs to 'op'. Returns nullptr
+  // if the op is not registered.
+  static const std::unordered_set<string>* CompileTimeConstantInputs(
+      const string& op);
+
  private:
   friend class XlaBackendRegistrar;
   friend class XlaOpRegistrar;
@@ -175,6 +186,9 @@ class XlaOpRegistry {
     bool has_device_whitelist = false;
     std::unordered_set<string> device_whitelist;
 
+    // Names of arguments that must be compile-time constants.
+    std::unordered_set<string> compile_time_constant_inputs;
+
     // Factory used to build OpKernels that perform symbolic execution.
     Factory factory;
   };
@@ -236,6 +250,9 @@ class XlaOpRegistrationBuilder {
   // Allow DT_RESOURCE types for type parameters.
   XlaOpRegistrationBuilder& AllowResourceTypes();
 
+  // Mark 'input_name' as an argument whose value must be known at compile-time.
+  XlaOpRegistrationBuilder& CompileTimeConstInput(StringPiece input_name);
+
   std::unique_ptr<XlaOpRegistry::OpRegistration> Build(
       XlaOpRegistry::Factory factory);
 
diff --git a/tensorflow/compiler/tf2xla/xla_resource.cc b/tensorflow/compiler/tf2xla/xla_resource.cc
new file mode 100644
index 0000000000000000000000000000000000000000..9abac8bdaa77c99a57b2f8ac66fe6ed06fbcd102
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/xla_resource.cc
@@ -0,0 +1,157 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/tf2xla/xla_resource.h"
+
+#include <functional>
+#include <memory>
+
+#include "tensorflow/compiler/tf2xla/shape_util.h"
+#include "tensorflow/compiler/tf2xla/sharding_util.h"
+#include "tensorflow/compiler/tf2xla/xla_context.h"
+#include "tensorflow/compiler/tf2xla/xla_helpers.h"
+
+namespace tensorflow {
+
+XlaResource::XlaResource(Kind kind, int arg_num, string name,
+                         DataType initial_type,
+                         const xla::ComputationDataHandle& initial_value)
+    : kind_(kind),
+      arg_num_(arg_num),
+      name_(std::move(name)),
+      type_(initial_type),
+      value_(initial_value),
+      initial_value_(initial_value) {
+  CHECK(kind_ != kInvalid);
+}
+
+Status XlaResource::SetValue(DataType type,
+                             const xla::ComputationDataHandle& value) {
+  if (type_ == DT_INVALID && type == DT_INVALID) {
+    return errors::InvalidArgument("Attempted to initialized resource ", name_,
+                                   " to an invalid type");
+  }
+  if (type_ != DT_INVALID && type_ != type) {
+    return errors::InvalidArgument("Type of resource ", name_,
+                                   " cannot be changed after initialization: "
+                                   "old type was ",
+                                   DataTypeString(type_), ", new type is ",
+                                   DataTypeString(type));
+  }
+  type_ = type;
+  value_ = value;
+  return Status::OK();
+}
+
+Status XlaResource::GetXlaShape(xla::ComputationBuilder* builder,
+                                xla::Shape* shape) const {
+  auto shape_or_status = builder->GetShape(value_);
+  if (!shape_or_status.ok()) {
+    return shape_or_status.status();
+  }
+  *shape = *shape_or_status.ValueOrDie();
+  return Status::OK();
+}
+
+Status XlaResource::GetShape(xla::ComputationBuilder* builder,
+                             TensorShape* shape) const {
+  xla::Shape xla_shape;
+  TF_RETURN_IF_ERROR(GetXlaShape(builder, &xla_shape));
+  TF_RETURN_IF_ERROR(XLAShapeToTensorShape(xla_shape, shape));
+  return Status::OK();
+}
+
+Status XlaResource::GetOrCreateTensorArrayGradient(
+    const string& source, xla::ComputationBuilder* builder,
+    XlaResource** gradient_out) {
+  VLOG(2) << "Gradient lookup for resource: " << name_
+          << " gradient: " << source;
+  TF_RET_CHECK(kind_ == kTensorArray);
+  std::unique_ptr<XlaResource>& gradient = tensor_array_gradients_[source];
+  if (!gradient) {
+    TensorShape ta_shape;
+    TF_RETURN_IF_ERROR(GetShape(builder, &ta_shape));
+    xla::ComputationDataHandle gradient_value = builder->Broadcast(
+        XlaHelpers::Zero(builder, type_), ta_shape.dim_sizes());
+    gradient.reset(
+        new XlaResource(/*kind=*/kTensorArray, /*arg_num=*/-1,
+                        /*name=*/strings::StrCat("TensorArrayGrad: ", name_),
+                        type_, gradient_value));
+    gradient->tensor_array_size_ = tensor_array_size_;
+  }
+  *gradient_out = gradient.get();
+  return Status::OK();
+}
+
+Status XlaResource::PackedShape(xla::ComputationBuilder* builder,
+                                xla::Shape* packed_shape) const {
+  if (tensor_array_gradients_.empty()) {
+    return GetXlaShape(builder, packed_shape);
+  }
+  TF_RET_CHECK(kind_ == kTensorArray);
+  std::vector<xla::Shape> elem_shapes(1 + tensor_array_gradients_.size());
+  int pos = 0;
+  TF_RETURN_IF_ERROR(GetXlaShape(builder, &elem_shapes[pos++]));
+  for (const auto& gradient : tensor_array_gradients_) {
+    TF_RETURN_IF_ERROR(
+        gradient.second->GetXlaShape(builder, &elem_shapes[pos++]));
+  }
+  *packed_shape = xla::ShapeUtil::MakeTupleShape(elem_shapes);
+  return Status::OK();
+}
+
+Status XlaResource::Pack(xla::ComputationDataHandle* pack,
+                         xla::ComputationBuilder* builder) const {
+  if (tensor_array_gradients_.empty()) {
+    *pack = value_;
+  } else {
+    TF_RET_CHECK(kind_ == kTensorArray);
+    std::vector<xla::ComputationDataHandle> elems;
+    elems.push_back(value_);
+    for (const auto& gradient : tensor_array_gradients_) {
+      elems.push_back(gradient.second->value_);
+    }
+    *pack = builder->Tuple(elems);
+  }
+  return Status::OK();
+}
+
+Status XlaResource::SetFromPack(const std::set<string>& gradient_sources,
+                                const xla::ComputationDataHandle& pack,
+                                bool reset_initial_values,
+                                xla::ComputationBuilder* builder) {
+  if (gradient_sources.empty()) {
+    value_ = pack;
+  } else {
+    TF_RET_CHECK(kind_ == kTensorArray);
+    int pos = 0;
+    value_ = builder->GetTupleElement(pack, pos++);
+    for (const auto& source : gradient_sources) {
+      XlaResource* gradient;
+      TF_RETURN_IF_ERROR(
+          GetOrCreateTensorArrayGradient(source, builder, &gradient));
+      gradient->value_ = builder->GetTupleElement(pack, pos++);
+      if (reset_initial_values) {
+        gradient->initial_value_ = gradient->value_;
+      }
+    }
+  }
+  if (reset_initial_values) {
+    initial_value_ = value_;
+  }
+  return Status::OK();
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/xla_resource.h b/tensorflow/compiler/tf2xla/xla_resource.h
new file mode 100644
index 0000000000000000000000000000000000000000..6b46089e4f5e10c195bb59f78c33305c2fa3f84d
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/xla_resource.h
@@ -0,0 +1,149 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_TF2XLA_XLA_RESOURCE_H_
+#define TENSORFLOW_COMPILER_TF2XLA_XLA_RESOURCE_H_
+
+#include <memory>
+
+#include "tensorflow/compiler/xla/client/computation_builder.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/lib/core/status.h"
+
+namespace tensorflow {
+
+// Represents a resource, such as a Variable or TensorArray.
+class XlaResource {
+ public:
+  enum Kind {
+    kInvalid,
+    kVariable,
+    kTensorArray,
+    kStack,
+  };
+
+  XlaResource(Kind kind, int arg_num, string name, DataType initial_type,
+              const xla::ComputationDataHandle& initial_value);
+
+  XlaResource(const XlaResource&) = delete;
+  XlaResource(XlaResource&&) = delete;
+  XlaResource& operator=(const XlaResource&) = delete;
+  XlaResource& operator=(XlaResource&&) = delete;
+
+  Kind kind() const { return kind_; }
+
+  // If this resource is visible externally to the computation, what was its
+  // argument number?
+  // < 0 means "not visible externally".
+  int arg_num() const { return arg_num_; }
+
+  // A descriptive name for the resource, used in error messages.
+  const string& name() const { return name_; }
+
+  // Current type and value of the resource. Uninitialized resources are
+  // represented by a default (zero) handle and type DT_INVALID.
+  // While the type of a resource is notionally fixed during execution, when
+  // a resource is first initialized we do not yet know its type, so we keep
+  // track of its type dynamically.
+  DataType type() const { return type_; }
+  const xla::ComputationDataHandle& value() const { return value_; }
+
+  // Value of the resource at computation entry. Used to detect which
+  // variables have new values that need to be written back.
+  const xla::ComputationDataHandle& initial_value() const {
+    return initial_value_;
+  }
+
+  bool initialized() const { return value_.handle() > 0; }
+
+  // Sets the current type/value of the resource.
+  Status SetValue(DataType type, const xla::ComputationDataHandle& value);
+
+  // Returns the shape of the resource as an xla::Shape.
+  Status GetXlaShape(xla::ComputationBuilder* builder, xla::Shape* shape) const;
+
+  // Returns the shape of the resource as an TensorShape. Fails if the shape is
+  // not representable as a TensorShape.
+  Status GetShape(xla::ComputationBuilder* builder, TensorShape* shape) const;
+
+  // Looks up the gradient for `source`, or creates it if it does not already
+  // exist. The call target must be an initialized TensorArray resource. A
+  // TensorArray can have multiple named gradients; see the operator
+  // documentation for TensorArrayGradV3 for details.
+  Status GetOrCreateTensorArrayGradient(const string& source,
+                                        xla::ComputationBuilder* builder,
+                                        XlaResource** gradient_out);
+
+  // Packs a resource into a single XLA value `pack`, suitable for use as
+  // an XlaCompiler::Argument. For non-TensorArrays or TensorArrays without
+  // gradients, sets `*pack` to `value`.
+  // For TensorArrays with gradients, packs the value and its gradient values in
+  // a tuple; the gradients values are packed in order by source name.
+  Status Pack(xla::ComputationDataHandle* pack,
+              xla::ComputationBuilder* builder) const;
+
+  // Returns the shape of the `pack` value computed by `Pack()`.
+  Status PackedShape(xla::ComputationBuilder* builder,
+                     xla::Shape* packed_shape) const;
+
+  // Updates the resource with values from `pack`. If `gradient_sources` is
+  // non-empty, treats `pack` as a tuple that represents a TensorArray and
+  // its gradients, and unpacks and updates the gradient resources.
+  // If `reset_initial_values` is true, sets the initial_values as well as the
+  // values.
+  // Opposite of Pack().
+  Status SetFromPack(const std::set<string>& gradient_sources,
+                     const xla::ComputationDataHandle& pack,
+                     bool reset_initial_values,
+                     xla::ComputationBuilder* builder);
+
+  // TensorArray-specific fields
+
+  // 'tensor_array_size' stores the expected size of the TensorArray or Stack.
+  // We need to store this since sometimes TensorArrays must be initialized
+  // lazily since we do not know the element shape at construction time.
+  int64 tensor_array_size() const { return tensor_array_size_; }
+  void set_tensor_array_size(int64 size) { tensor_array_size_ = size; }
+
+  // 'tensor_array_gradient' is a map from TensorArrayGradV3 'source' attributes
+  // to an XlaResource containing the gradient TensorArrays. We store a pointer
+  // here since there should only be one gradient TensorArray per 'source'
+  // string, irrespective of the number of calls to TensorArrayGrad. The map
+  // is ordered since values are packed into tuples by Pack() sorted by name
+  // order.
+  const std::map<string, std::unique_ptr<XlaResource>>& tensor_array_gradients()
+      const {
+    return tensor_array_gradients_;
+  }
+
+ private:
+  const Kind kind_;
+  const int arg_num_;
+  const string name_;
+
+  DataType type_;
+  xla::ComputationDataHandle value_;
+  xla::ComputationDataHandle initial_value_;
+
+  int64 tensor_array_size_ = -1;
+
+  std::map<string, std::unique_ptr<XlaResource>> tensor_array_gradients_;
+};
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_COMPILER_TF2XLA_XLA_RESOURCE_H_
diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD
index d3f292207fee396fb4248dede5c0eeb5cd2b87c9..438f1443f17717a3806827abcb36d4ccbbbf756c 100644
--- a/tensorflow/compiler/xla/BUILD
+++ b/tensorflow/compiler/xla/BUILD
@@ -20,6 +20,10 @@ package_group(
 load("//tensorflow:tensorflow.bzl", "cc_header_only_library")
 load("//tensorflow:tensorflow.bzl", "tf_cc_test")
 load("//tensorflow/compiler/xla:xla.bzl", "xla_proto_library")
+load(
+    "//tensorflow/core:platform/default/build_config.bzl",
+    "tf_proto_library_py",
+)
 
 # Filegroup used to collect source files for dependency checking.
 filegroup(
@@ -36,6 +40,12 @@ xla_proto_library(
     visibility = ["//visibility:public"],
 )
 
+tf_proto_library_py(
+    name = "xla_data_proto",  # bzl adds a _py suffix
+    srcs = ["xla_data.proto"],
+    visibility = ["//visibility:public"],
+)
+
 xla_proto_library(
     name = "xla_proto",
     srcs = ["xla.proto"],
@@ -250,6 +260,7 @@ tf_cc_test(
     srcs = ["shape_util_test.cc"],
     deps = [
         ":shape_util",
+        ":status_macros",
         ":test",
         ":test_helpers",
         ":types",
@@ -290,7 +301,9 @@ cc_library(
         ":array2d",
         ":array3d",
         ":array4d",
+        ":shape_tree",
         ":shape_util",
+        ":sparse_index_array",
         ":status_macros",
         ":types",
         ":util",
@@ -617,6 +630,28 @@ tf_cc_test(
     ],
 )
 
+cc_library(
+    name = "sparse_index_array",
+    srcs = ["sparse_index_array.cc"],
+    hdrs = ["sparse_index_array.h"],
+    deps = [
+        ":array2d",
+        ":shape_util",
+        ":xla_data_proto",
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_cc_test(
+    name = "sparse_index_array_test",
+    srcs = ["sparse_index_array_test.cc"],
+    deps = [
+        ":sparse_index_array",
+        ":test",
+        "//tensorflow/core:test_main",
+    ],
+)
+
 # -----------------------------------------------------------------------------
 
 filegroup(
diff --git a/tensorflow/compiler/xla/array.h b/tensorflow/compiler/xla/array.h
index 213e0bac6c77e9972de8d4dd7dfc8c7cf3a1b865..71aa057cd3a1c273c0e851497a78f94ba37c778e 100644
--- a/tensorflow/compiler/xla/array.h
+++ b/tensorflow/compiler/xla/array.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include <initializer_list>
 #include <iterator>
 #include <memory>
+#include <numeric>
 #include <random>
 #include <type_traits>
 #include <vector>
diff --git a/tensorflow/compiler/xla/client/BUILD b/tensorflow/compiler/xla/client/BUILD
index f953407a567b91fdf6ae727d6982a2a778c5873e..d6b4ebfc39ae039ff27fe9fb8a3487c870832f3e 100644
--- a/tensorflow/compiler/xla/client/BUILD
+++ b/tensorflow/compiler/xla/client/BUILD
@@ -186,6 +186,20 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "sharding_builder",
+    srcs = ["sharding_builder.cc"],
+    hdrs = ["sharding_builder.h"],
+    deps = [
+        "//tensorflow/compiler/xla:array",
+        "//tensorflow/compiler/xla:shape_tree",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla:util",
+        "//tensorflow/compiler/xla:xla_data_proto",
+    ],
+)
+
 # -----------------------------------------------------------------------------
 
 filegroup(
diff --git a/tensorflow/compiler/xla/client/client.cc b/tensorflow/compiler/xla/client/client.cc
index 66937d64aff18817bbd5310e0c24e19556e9d727..d15ccb0c28522c647617153aaa8e738d029dfaba 100644
--- a/tensorflow/compiler/xla/client/client.cc
+++ b/tensorflow/compiler/xla/client/client.cc
@@ -60,7 +60,7 @@ StatusOr<std::unique_ptr<Literal>> Client::Transfer(
         "server provided response without a literal in "
         "TransferToClient request");
   }
-  return MakeUnique<Literal>(response.literal());
+  return Literal::CreateFromProto(*response.mutable_literal());
 }
 
 StatusOr<std::unique_ptr<GlobalData>> Client::TransferToServer(
@@ -142,7 +142,7 @@ StatusOr<std::unique_ptr<Literal>> Client::TransferFromOutfeed(
         "TransferToClient request");
   }
 
-  return MakeUnique<Literal>(response.literal());
+  return Literal::CreateFromProto(response.literal());
 }
 
 Status Client::ResetDevice() {
diff --git a/tensorflow/compiler/xla/client/computation_builder.cc b/tensorflow/compiler/xla/client/computation_builder.cc
index cce931000331e98b00f57025cb13a5d3982c2845..46f2ed4836eda6bf6d5b68f2e29ac6888cd1749b 100644
--- a/tensorflow/compiler/xla/client/computation_builder.cc
+++ b/tensorflow/compiler/xla/client/computation_builder.cc
@@ -34,25 +34,9 @@ limitations under the License.
 
 namespace xla {
 
-ComputationDataHandle ComputationBuilder::ParseOpResponse(
-    const Status& status, OpResponse* response) {
-  VLOG(2) << "done with op request";
-
-  if (!status.ok()) {
-    NoteError(status);
-    return ComputationDataHandle();
-  }
-
-  if (response->output().handle() == 0) {
-    NoteError(InternalError("No output handle"));
-    return ComputationDataHandle();
-  }
-  return response->output();
-}
-
 ComputationBuilder::ComputationBuilder(Client* client,
                                        const string& computation_name)
-    : name_(computation_name), first_error_(Status::OK()), client_(client) {}
+    : name_(computation_name), client_(client) {}
 
 ComputationBuilder::~ComputationBuilder() {}
 
@@ -76,9 +60,8 @@ std::unique_ptr<ComputationBuilder> ComputationBuilder::CreateSubBuilder(
 }
 
 Status ComputationBuilder::PrepareComputation() {
-  if (!first_error_.ok()) {
-    return first_error_;
-  }
+  TF_RETURN_IF_ERROR(first_error_);
+
   if (!computation_.IsNull()) {
     return Status::OK();
   }
@@ -100,6 +83,49 @@ Status ComputationBuilder::PrepareComputation() {
   return Status::OK();
 }
 
+Status ComputationBuilder::RunOp(OpRequest* op_request,
+                                 OpResponse* op_response) {
+  TF_RETURN_IF_ERROR(first_error_);
+  TF_RETURN_IF_ERROR(PrepareComputation());
+
+  // Fill in fields that are set on every OpRequest.
+  *op_request->mutable_computation() = computation_.handle();
+  *op_request->mutable_metadata() = metadata_;
+  if (sharding_) {
+    *op_request->mutable_sharding() = *sharding_;
+  }
+
+  const string& op_name =
+      OpRequest::descriptor()->FindFieldByNumber(op_request->op_case())->name();
+  VLOG(2) << "running op request: " << op_name;
+  Status status = client_->stub()->Op(op_request, op_response);
+  VLOG(2) << "done with op request: " << op_name;
+  return status;
+}
+
+void ComputationBuilder::RunOpAndNoteError(OpRequest* op_request) {
+  OpResponse op_response;
+  Status status = RunOp(op_request, &op_response);
+  if (!status.ok()) {
+    NoteError(status);
+  }
+}
+
+ComputationDataHandle ComputationBuilder::RunOpAndParseResponse(
+    OpRequest* op_request) {
+  OpResponse op_response;
+  Status status = RunOp(op_request, &op_response);
+  if (!status.ok()) {
+    NoteError(status);
+    return ComputationDataHandle();
+  }
+  if (op_response.output().handle() == 0) {
+    NoteError(InternalError("No output handle"));
+    return ComputationDataHandle();
+  }
+  return op_response.output();
+}
+
 bool ComputationBuilder::MakeWindow(
     tensorflow::gtl::ArraySlice<int64> window_dimensions,
     tensorflow::gtl::ArraySlice<int64> window_strides,
@@ -158,81 +184,55 @@ bool ComputationBuilder::MakeWindow(
   return true;
 }
 
-ComputationDataHandle ComputationBuilder::ConstantOp(
-    const PopulateLiteral& populate) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
-  ConstantRequest request;
-  Literal literal;
-  populate(&literal);
-  *request.mutable_literal() = literal.ToProto();
-  VLOG(3) << "created constant: " << request.literal().ShortDebugString();
-  OpRequest op_request;
-  *op_request.mutable_constant_request() = request;
-  *op_request.mutable_computation() = computation_.handle();
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making constant request";
-  Status s = client_->stub()->Op(&op_request, &response);
-  return ParseOpResponse(s, &response);
-}
-
 ComputationDataHandle ComputationBuilder::ConstantLiteral(
     const Literal& literal) {
-  return ConstantOp(
-      [literal](Literal* mutable_literal) { *mutable_literal = literal; });
+  OpRequest op_request;
+  ConstantRequest* request = op_request.mutable_constant_request();
+  *request->mutable_literal() = literal.ToProto();
+  VLOG(3) << "created constant: " << request->literal().ShortDebugString();
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::Parameter(int64 parameter_number,
                                                     const Shape& shape,
                                                     const string& name) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
-  ParameterRequest request;
-  *request.mutable_shape() = shape;
-  request.set_parameter(parameter_number);
-  request.set_name(name);
   OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_parameter_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making parameter request";
-  Status s = client_->stub()->Op(&op_request, &response);
-  return ParseOpResponse(s, &response);
+  ParameterRequest* request = op_request.mutable_parameter_request();
+  *request->mutable_shape() = shape;
+  request->set_parameter(parameter_number);
+  request->set_name(name);
+  return RunOpAndParseResponse(&op_request);
 }
 
-StatusOr<std::unique_ptr<Shape>> ComputationBuilder::GetShape(
+StatusOr<std::unique_ptr<Shape>> ComputationBuilder::GetShapeWithoutNoteError(
     const ComputationDataHandle& operand) {
-  if (!first_error_.ok()) {
-    return first_error_;
-  }
-
   GetLocalShapeRequest request;
   *request.mutable_computation() = computation_.handle();
   *request.mutable_operand() = operand;
   GetLocalShapeResponse response;
 
   VLOG(2) << "making get-shape request";
-  Status s = client_->stub()->GetLocalShape(&request, &response);
+  TF_RETURN_IF_ERROR(client_->stub()->GetLocalShape(&request, &response));
   VLOG(2) << "done with request";
 
-  if (!s.ok()) {
-    NoteError(s);
-    return first_error_;
-  }
   TF_RET_CHECK(response.has_shape());
   std::unique_ptr<Shape> shape = WrapUnique(response.release_shape());
   TF_RET_CHECK(shape != nullptr);
   return std::move(shape);
 }
 
+StatusOr<std::unique_ptr<Shape>> ComputationBuilder::GetShape(
+    const ComputationDataHandle& operand) {
+  TF_RETURN_IF_ERROR(first_error_);
+
+  auto status_or_shape = GetShapeWithoutNoteError(operand);
+  if (!status_or_shape.ok()) {
+    NoteError(status_or_shape.status());
+    return first_error_;
+  }
+  return status_or_shape;
+}
+
 ComputationDataHandle ComputationBuilder::CheckShape(
     const ComputationDataHandle& operand, const Shape& expected_shape) {
   std::unique_ptr<Shape> actual_shape = GetShape(operand).ConsumeValueOrDie();
@@ -258,30 +258,19 @@ ComputationDataHandle ComputationBuilder::Slice(
     tensorflow::gtl::ArraySlice<int64> start_indices,
     tensorflow::gtl::ArraySlice<int64> limit_indices,
     tensorflow::gtl::ArraySlice<int64> strides) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
-  SliceRequest request;
-  *request.mutable_operand() = operand;
+  OpRequest op_request;
+  SliceRequest* request = op_request.mutable_slice_request();
+  *request->mutable_operand() = operand;
   for (int64 index : start_indices) {
-    request.add_start_indices(index);
+    request->add_start_indices(index);
   }
   for (int64 index : limit_indices) {
-    request.add_limit_indices(index);
+    request->add_limit_indices(index);
   }
   for (int64 index : strides) {
-    request.add_strides(index);
+    request->add_strides(index);
   }
-  OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_slice_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making slice request";
-  Status s = client_->stub()->Op(&op_request, &response);
-  return ParseOpResponse(s, &response);
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::SliceInDim(
@@ -307,143 +296,78 @@ ComputationDataHandle ComputationBuilder::DynamicSlice(
     const ComputationDataHandle& operand,
     const ComputationDataHandle& start_indices,
     tensorflow::gtl::ArraySlice<int64> slice_sizes) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
-  DynamicSliceRequest request;
-  *request.mutable_operand() = operand;
-  *request.mutable_start_indices() = start_indices;
+  OpRequest op_request;
+  DynamicSliceRequest* request = op_request.mutable_dynamic_slice_request();
+  *request->mutable_operand() = operand;
+  *request->mutable_start_indices() = start_indices;
   for (int64 index : slice_sizes) {
-    request.add_slice_sizes(index);
+    request->add_slice_sizes(index);
   }
-  OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_dynamic_slice_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making dynamic slice request";
-  Status s = client_->stub()->Op(&op_request, &response);
-  return ParseOpResponse(s, &response);
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::DynamicUpdateSlice(
     const ComputationDataHandle& operand, const ComputationDataHandle& update,
     const ComputationDataHandle& start_indices) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
-  DynamicUpdateSliceRequest request;
-  *request.mutable_operand() = operand;
-  *request.mutable_update() = update;
-  *request.mutable_start_indices() = start_indices;
   OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_dynamic_update_slice_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making dynamic update slice request";
-  Status s = client_->stub()->Op(&op_request, &response);
-  return ParseOpResponse(s, &response);
+  DynamicUpdateSliceRequest* request =
+      op_request.mutable_dynamic_update_slice_request();
+  *request->mutable_operand() = operand;
+  *request->mutable_update() = update;
+  *request->mutable_start_indices() = start_indices;
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::ConcatInDim(
     tensorflow::gtl::ArraySlice<ComputationDataHandle> operands,
     int64 dimension) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
-  ConcatenateRequest request;
+  OpRequest op_request;
+  ConcatenateRequest* request = op_request.mutable_concatenate_request();
   for (const ComputationDataHandle& operand : operands) {
-    *request.add_operands() = operand;
+    *request->add_operands() = operand;
   }
-  request.set_dimension(dimension);
-  OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_concatenate_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making concatenate request";
-  Status s = client_->stub()->Op(&op_request, &response);
-  return ParseOpResponse(s, &response);
+  request->set_dimension(dimension);
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::Broadcast(
     const ComputationDataHandle& operand,
     tensorflow::gtl::ArraySlice<int64> broadcast_sizes) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
-  BroadcastRequest request;
-  *request.mutable_operand() = operand;
+  OpRequest op_request;
+  BroadcastRequest* request = op_request.mutable_broadcast_request();
+  *request->mutable_operand() = operand;
   for (int64 size : broadcast_sizes) {
-    request.add_broadcast_sizes(size);
+    request->add_broadcast_sizes(size);
   }
-  OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_broadcast_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making broadcast request";
-  Status s = client_->stub()->Op(&op_request, &response);
-  return ParseOpResponse(s, &response);
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::Pad(
     const ComputationDataHandle& operand,
     const ComputationDataHandle& padding_value,
     const PaddingConfig& padding_config) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
-  PadRequest request;
-  *request.mutable_operand() = operand;
-  *request.mutable_padding_value() = padding_value;
-  *request.mutable_padding_config() = padding_config;
   OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_pad_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making pad request";
-  Status s = client_->stub()->Op(&op_request, &response);
-  return ParseOpResponse(s, &response);
+  PadRequest* request = op_request.mutable_pad_request();
+  *request->mutable_operand() = operand;
+  *request->mutable_padding_value() = padding_value;
+  *request->mutable_padding_config() = padding_config;
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::Reshape(
     const ComputationDataHandle& operand,
     tensorflow::gtl::ArraySlice<int64> dimensions,
     tensorflow::gtl::ArraySlice<int64> new_sizes) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
-  ReshapeRequest request;
-  *request.mutable_operand() = operand;
+  OpRequest op_request;
+  ReshapeRequest* request = op_request.mutable_reshape_request();
+  *request->mutable_operand() = operand;
   for (int64 dimension : dimensions) {
-    request.add_dimensions(dimension);
+    request->add_dimensions(dimension);
   }
   for (int64 new_size : new_sizes) {
-    request.add_new_sizes(new_size);
+    request->add_new_sizes(new_size);
   }
-  OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_reshape_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making reshape request";
-  Status s = client_->stub()->Op(&op_request, &response);
-  return ParseOpResponse(s, &response);
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::Reshape(
@@ -455,7 +379,6 @@ ComputationDataHandle ComputationBuilder::Reshape(
 
   StatusOr<std::unique_ptr<Shape>> shape = GetShape(operand);
   if (!shape.ok()) {
-    first_error_ = shape.status();
     return ComputationDataHandle();
   }
   std::vector<int64> dimensions(shape.ValueOrDie()->dimensions().size());
@@ -485,7 +408,6 @@ ComputationDataHandle ComputationBuilder::Collapse(
   // dimensions by the product of their sizes.
   StatusOr<std::unique_ptr<Shape>> shape_or_status = GetShape(operand);
   if (!shape_or_status.ok()) {
-    first_error_ = shape_or_status.status();
     return ComputationDataHandle();
   }
   std::unique_ptr<Shape> original_shape = shape_or_status.ConsumeValueOrDie();
@@ -517,26 +439,11 @@ ComputationDataHandle ComputationBuilder::Collapse(
 
 void ComputationBuilder::Trace(const string& tag,
                                const ComputationDataHandle& operand) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return;
-  }
-
-  TraceRequest request;
-  request.set_tag(tag);
-  *request.mutable_operand() = operand;
   OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_trace_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making trace request";
-  Status s = client_->stub()->Op(&op_request, &response);
-  VLOG(2) << "done with request";
-
-  if (!s.ok()) {
-    NoteError(s);
-  }
+  TraceRequest* request = op_request.mutable_trace_request();
+  request->set_tag(tag);
+  *request->mutable_operand() = operand;
+  RunOpAndNoteError(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::Select(
@@ -547,44 +454,23 @@ ComputationDataHandle ComputationBuilder::Select(
 
 ComputationDataHandle ComputationBuilder::Tuple(
     tensorflow::gtl::ArraySlice<ComputationDataHandle> elements) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
-  VariadicOpRequest request;
-  request.set_varop(VAROP_TUPLE);
+  OpRequest op_request;
+  VariadicOpRequest* request = op_request.mutable_variadic_op_request();
+  request->set_varop(VAROP_TUPLE);
   for (const ComputationDataHandle& operand : elements) {
-    *request.add_operands() = operand;
+    *request->add_operands() = operand;
   }
-  OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_variadic_op_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making variadic op request";
-  Status s = client_->stub()->Op(&op_request, &response);
-  return ParseOpResponse(s, &response);
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::GetTupleElement(
     const ComputationDataHandle& tuple_data, int64 index) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
-  GetTupleElementRequest request;
-  *request.mutable_operand() = tuple_data;
-  request.set_index(index);
   OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_get_tuple_element_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making get tuple element op request";
-  Status s = client_->stub()->Op(&op_request, &response);
-  return ParseOpResponse(s, &response);
+  GetTupleElementRequest* request =
+      op_request.mutable_get_tuple_element_request();
+  *request->mutable_operand() = tuple_data;
+  request->set_index(index);
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::Eq(
@@ -625,16 +511,33 @@ ComputationDataHandle ComputationBuilder::Lt(
 
 ComputationDataHandle ComputationBuilder::Dot(
     const ComputationDataHandle& lhs, const ComputationDataHandle& rhs) {
-  return BinaryOp(BINOP_DOT, lhs, rhs, /*broadcast_dimensions=*/{});
+  StatusOr<std::unique_ptr<Shape>> lhs_shape_or_status = GetShape(lhs);
+  if (!lhs_shape_or_status.ok()) {
+    return ComputationDataHandle();
+  }
+  std::unique_ptr<Shape> lhs_shape = lhs_shape_or_status.ConsumeValueOrDie();
+
+  DotDimensionNumbers dimension_numbers;
+  dimension_numbers.add_lhs_contracting_dimensions(
+      lhs_shape->dimensions_size() == 1 ? 0 : 1);
+  dimension_numbers.add_rhs_contracting_dimensions(0);
+  return DotGeneral(lhs, rhs, dimension_numbers);
+}
+
+ComputationDataHandle ComputationBuilder::DotGeneral(
+    const ComputationDataHandle& lhs, const ComputationDataHandle& rhs,
+    const DotDimensionNumbers& dimension_numbers) {
+  OpRequest op_request;
+  DotRequest* request = op_request.mutable_dot_request();
+  *request->mutable_lhs() = lhs;
+  *request->mutable_rhs() = rhs;
+  *request->mutable_dimension_numbers() = dimension_numbers;
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::Conv(
     const ComputationDataHandle& lhs, const ComputationDataHandle& rhs,
     tensorflow::gtl::ArraySlice<int64> window_strides, Padding padding) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
   return ConvWithGeneralDimensions(
       lhs, rhs, window_strides, padding,
       CreateDefaultConvDimensionNumbers(window_strides.size()));
@@ -644,10 +547,6 @@ ComputationDataHandle ComputationBuilder::ConvWithGeneralPadding(
     const ComputationDataHandle& lhs, const ComputationDataHandle& rhs,
     tensorflow::gtl::ArraySlice<int64> window_strides,
     tensorflow::gtl::ArraySlice<std::pair<int64, int64>> padding) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
   return ConvGeneral(lhs, rhs, window_strides, padding,
                      CreateDefaultConvDimensionNumbers(window_strides.size()));
 }
@@ -715,13 +614,11 @@ ComputationDataHandle ComputationBuilder::ConvWithGeneralDimensions(
 
   StatusOr<std::unique_ptr<Shape>> lhs_shape_or_status = GetShape(lhs);
   if (!lhs_shape_or_status.ok()) {
-    first_error_ = lhs_shape_or_status.status();
     return ComputationDataHandle();
   }
 
   StatusOr<std::unique_ptr<Shape>> rhs_shape_or_status = GetShape(rhs);
   if (!rhs_shape_or_status.ok()) {
-    first_error_ = rhs_shape_or_status.status();
     return ComputationDataHandle();
   }
 
@@ -776,13 +673,11 @@ ComputationDataHandle ComputationBuilder::ConvGeneralDilated(
 
   StatusOr<std::unique_ptr<Shape>> lhs_shape_or_status = GetShape(lhs);
   if (!lhs_shape_or_status.ok()) {
-    first_error_ = lhs_shape_or_status.status();
     return ComputationDataHandle();
   }
 
   StatusOr<std::unique_ptr<Shape>> rhs_shape_or_status = GetShape(rhs);
   if (!rhs_shape_or_status.ok()) {
-    first_error_ = rhs_shape_or_status.status();
     return ComputationDataHandle();
   }
 
@@ -800,122 +695,78 @@ ComputationDataHandle ComputationBuilder::ConvGeneralDilated(
         rhs_shape->dimensions(dimension_numbers.kernel_spatial_dimensions(i));
   }
 
-  ConvolveRequest request;
-  *request.mutable_lhs() = lhs;
-  *request.mutable_rhs() = rhs;
-  *request.mutable_dimension_numbers() = dimension_numbers;
+  OpRequest op_request;
+  ConvolveRequest* request = op_request.mutable_convolve_request();
+  *request->mutable_lhs() = lhs;
+  *request->mutable_rhs() = rhs;
+  *request->mutable_dimension_numbers() = dimension_numbers;
 
   if (!MakeWindow(window_dimensions, window_strides, padding, lhs_dilation,
-                  rhs_dilation, request.mutable_window())) {
+                  rhs_dilation, request->mutable_window())) {
     // Error is recorded in MakeWindow.
     return ComputationDataHandle();
   }
-  OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_convolve_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
 
-  VLOG(2) << "making convolve request";
-  Status s = client_->stub()->Op(&op_request, &response);
-  return ParseOpResponse(s, &response);
+  return RunOpAndParseResponse(&op_request);
 }
 
-ComputationDataHandle ComputationBuilder::Infeed(const Shape& shape,
-                                                 const string& config) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
+ComputationDataHandle ComputationBuilder::Fft(
+    const ComputationDataHandle& operand, const FftType fft_type,
+    const tensorflow::gtl::ArraySlice<int64> fft_length) {
+  OpRequest op_request;
+  FftRequest* request = op_request.mutable_fft_request();
+  *request->mutable_operand() = operand;
+  request->set_fft_type(fft_type);
+  for (int64 dim_len : fft_length) {
+    request->add_fft_length(dim_len);
   }
+  return RunOpAndParseResponse(&op_request);
+}
 
-  InfeedRequest request;
-  *request.mutable_shape() = shape;
-  *request.mutable_config() = config;
+ComputationDataHandle ComputationBuilder::Infeed(const Shape& shape,
+                                                 const string& config) {
   OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_infeed_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making infeed op request";
-  Status s = client_->stub()->Op(&op_request, &response);
-
-  return ParseOpResponse(s, &response);
+  InfeedRequest* request = op_request.mutable_infeed_request();
+  *request->mutable_shape() = shape;
+  *request->mutable_config() = config;
+  return RunOpAndParseResponse(&op_request);
 }
 
 void ComputationBuilder::Outfeed(const ComputationDataHandle& operand,
                                  const Shape& shape,
                                  const string& outfeed_config) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return;
-  }
-
-  OutfeedRequest request;
-  request.set_outfeed_config(outfeed_config);
-  *request.mutable_operand() = operand;
-  *request.mutable_shape() = shape;
   OpRequest op_request;
-  *op_request.mutable_outfeed_request() = request;
-  *op_request.mutable_computation() = computation_.handle();
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making outfeed op request";
-  tensorflow::Status s = client_->stub()->Op(&op_request, &response);
-
-  if (!s.ok()) {
-    NoteError(s);
-    return;
-  }
+  OutfeedRequest* request = op_request.mutable_outfeed_request();
+  request->set_outfeed_config(outfeed_config);
+  *request->mutable_operand() = operand;
+  *request->mutable_shape() = shape;
+  RunOpAndNoteError(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::Call(
     const Computation& computation,
     tensorflow::gtl::ArraySlice<ComputationDataHandle> operands) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
-  CallRequest request;
-  *request.mutable_to_apply() = computation.handle();
+  OpRequest op_request;
+  CallRequest* request = op_request.mutable_call_request();
+  *request->mutable_to_apply() = computation.handle();
   for (const ComputationDataHandle& operand : operands) {
-    *request.add_operands() = operand;
+    *request->add_operands() = operand;
   }
-  OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_call_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making call op request";
-  Status s = client_->stub()->Op(&op_request, &response);
-
-  return ParseOpResponse(s, &response);
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::CustomCall(
     const string& call_target_name,
     tensorflow::gtl::ArraySlice<ComputationDataHandle> operands,
     const Shape& shape) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
-  CustomCallRequest request;
-  request.set_call_target_name(call_target_name);
+  OpRequest op_request;
+  CustomCallRequest* request = op_request.mutable_custom_call_request();
+  request->set_call_target_name(call_target_name);
   for (const ComputationDataHandle& operand : operands) {
-    *request.add_operands() = operand;
+    *request->add_operands() = operand;
   }
-  *request.mutable_shape() = shape;
-  OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_custom_call_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making custom call op request";
-  Status s = client_->stub()->Op(&op_request, &response);
-
-  return ParseOpResponse(s, &response);
+  *request->mutable_shape() = shape;
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::Complex(
@@ -1080,47 +931,25 @@ ComputationDataHandle ComputationBuilder::IsFinite(
 ComputationDataHandle ComputationBuilder::Transpose(
     const ComputationDataHandle& operand,
     tensorflow::gtl::ArraySlice<int64> permutation) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
   OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
   TransposeRequest* request = op_request.mutable_transpose_request();
   *request->mutable_operand() = operand;
   for (int64 dimension : permutation) {
     request->add_dimensions(dimension);
   }
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making transpose request";
-  Status s = client_->stub()->Op(&op_request, &response);
-  return ParseOpResponse(s, &response);
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::Rev(
     const ComputationDataHandle& operand,
     tensorflow::gtl::ArraySlice<int64> dimensions) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
-  ReverseRequest request;
-  *request.mutable_operand() = operand;
+  OpRequest op_request;
+  ReverseRequest* request = op_request.mutable_reverse_request();
+  *request->mutable_operand() = operand;
   for (int64 dimension : dimensions) {
-    request.add_dimensions(dimension);
+    request->add_dimensions(dimension);
   }
-  OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_reverse_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making reverse op request";
-  Status s = client_->stub()->Op(&op_request, &response);
-
-  return ParseOpResponse(s, &response);
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::Sort(
@@ -1148,24 +977,15 @@ ComputationDataHandle ComputationBuilder::ConvertElementType(
 
   StatusOr<std::unique_ptr<Shape>> shape_status = GetShape(operand);
   if (!shape_status.ok()) {
-    first_error_ = shape_status.status();
     return ComputationDataHandle();
   }
   std::unique_ptr<Shape> original = shape_status.ConsumeValueOrDie();
 
-  ConvertRequest request;
-  *request.mutable_operand() = operand;
-  request.set_new_element_type(new_element_type);
   OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_convert_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making convert request";
-  Status s = client_->stub()->Op(&op_request, &response);
-
-  return ParseOpResponse(s, &response);
+  ConvertRequest* request = op_request.mutable_convert_request();
+  *request->mutable_operand() = operand;
+  request->set_new_element_type(new_element_type);
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::BitcastConvertType(
@@ -1176,24 +996,15 @@ ComputationDataHandle ComputationBuilder::BitcastConvertType(
 
   StatusOr<std::unique_ptr<Shape>> shape_status = GetShape(operand);
   if (!shape_status.ok()) {
-    first_error_ = shape_status.status();
     return ComputationDataHandle();
   }
   std::unique_ptr<Shape> original = shape_status.ConsumeValueOrDie();
 
-  ConvertRequest request;
-  *request.mutable_operand() = operand;
-  request.set_new_element_type(new_element_type);
   OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_bitcast_convert_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making bitcast convert request";
-  Status s = client_->stub()->Op(&op_request, &response);
-
-  return ParseOpResponse(s, &response);
+  ConvertRequest* request = op_request.mutable_bitcast_convert_request();
+  *request->mutable_operand() = operand;
+  request->set_new_element_type(new_element_type);
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::SquareF32(
@@ -1221,107 +1032,57 @@ ComputationDataHandle ComputationBuilder::Clamp(
 
 ComputationDataHandle ComputationBuilder::UnaryOp(
     UnaryOperation unop, const ComputationDataHandle& operand) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
-  UnaryOpRequest request;
-  request.set_unop(unop);
-  *request.mutable_operand() = operand;
   OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_unary_op_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making unop request";
-  Status s = client_->stub()->Op(&op_request, &response);
-
-  return ParseOpResponse(s, &response);
+  UnaryOpRequest* request = op_request.mutable_unary_op_request();
+  request->set_unop(unop);
+  *request->mutable_operand() = operand;
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::BinaryOp(
     BinaryOperation binop, const ComputationDataHandle& lhs,
     const ComputationDataHandle& rhs,
     tensorflow::gtl::ArraySlice<int64> broadcast_dimensions) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
-  BinaryOpRequest request;
-  request.set_binop(binop);
-  *request.mutable_lhs() = lhs;
-  *request.mutable_rhs() = rhs;
+  OpRequest op_request;
+  BinaryOpRequest* request = op_request.mutable_binary_op_request();
+  request->set_binop(binop);
+  *request->mutable_lhs() = lhs;
+  *request->mutable_rhs() = rhs;
   for (int64 dimension : broadcast_dimensions) {
-    request.add_broadcast_dimensions(dimension);
+    request->add_broadcast_dimensions(dimension);
   }
-  OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_binary_op_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making binop request";
-  Status s = client_->stub()->Op(&op_request, &response);
-
-  return ParseOpResponse(s, &response);
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::RngOp(
     RandomDistribution distribution,
     tensorflow::gtl::ArraySlice<ComputationDataHandle> parameters,
     const Shape& shape) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
-  RngRequest request;
-  request.set_distribution(distribution);
+  OpRequest op_request;
+  RngRequest* request = op_request.mutable_rng_request();
+  request->set_distribution(distribution);
   for (const ComputationDataHandle& param : parameters) {
-    *request.add_parameter() = param;
+    *request->add_parameter() = param;
   }
-  *request.mutable_shape() = shape;
-  OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_rng_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making rngop request";
-  Status s = client_->stub()->Op(&op_request, &response);
-
-  return ParseOpResponse(s, &response);
+  *request->mutable_shape() = shape;
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::TernaryOp(
     TernaryOperation triop, const ComputationDataHandle& lhs,
     const ComputationDataHandle& rhs, const ComputationDataHandle& ehs) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
-  TernaryOpRequest request;
-  request.set_triop(triop);
-  *request.mutable_lhs() = lhs;
-  *request.mutable_rhs() = rhs;
-  *request.mutable_ehs() = ehs;
   OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_ternary_op_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making triop request";
-  Status s = client_->stub()->Op(&op_request, &response);
-
-  return ParseOpResponse(s, &response);
+  TernaryOpRequest* request = op_request.mutable_ternary_op_request();
+  request->set_triop(triop);
+  *request->mutable_lhs() = lhs;
+  *request->mutable_rhs() = rhs;
+  *request->mutable_ehs() = ehs;
+  return RunOpAndParseResponse(&op_request);
 }
 
 Status ComputationBuilder::SetReturnValue(
     const ComputationDataHandle& operand) {
-  if (!first_error_.ok()) {
-    return first_error_;
-  }
+  TF_RETURN_IF_ERROR(first_error_);
 
   SetReturnValueRequest request;
   *request.mutable_computation() = computation_.handle();
@@ -1343,9 +1104,7 @@ Status ComputationBuilder::SetReturnValue(
 
 StatusOr<bool> ComputationBuilder::IsConstant(
     const ComputationDataHandle& operand, int64 num_parameters) {
-  if (!first_error_.ok()) {
-    return first_error_;
-  }
+  TF_RETURN_IF_ERROR(first_error_);
 
   IsConstantRequest request;
   *request.mutable_computation() = computation_.handle();
@@ -1366,9 +1125,7 @@ StatusOr<bool> ComputationBuilder::IsConstant(
 StatusOr<std::unique_ptr<Literal>> ComputationBuilder::ComputeConstant(
     const ComputationDataHandle& operand, const Layout* output_layout,
     tensorflow::gtl::ArraySlice<Literal> parameters) {
-  if (!first_error_.ok()) {
-    return first_error_;
-  }
+  TF_RETURN_IF_ERROR(first_error_);
 
   ComputeConstantRequest request;
   *request.mutable_computation() = computation_.handle();
@@ -1397,7 +1154,7 @@ StatusOr<std::unique_ptr<Literal>> ComputationBuilder::ComputeConstant(
         "no computed literal in the provided response in ComputeConstant "
         "request");
   }
-  return MakeUnique<Literal>(response.literal());
+  return Literal::CreateFromProto(response.literal());
 }
 
 ComputationDataHandle ComputationBuilder::Map(
@@ -1405,30 +1162,19 @@ ComputationDataHandle ComputationBuilder::Map(
     const Computation& computation,
     tensorflow::gtl::ArraySlice<int64> dimensions,
     tensorflow::gtl::ArraySlice<ComputationDataHandle> static_operands) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
-  MapRequest request;
+  OpRequest op_request;
+  MapRequest* request = op_request.mutable_map_request();
   for (const ComputationDataHandle& operand : operands) {
-    *request.add_operands() = operand;
+    *request->add_operands() = operand;
   }
-  *request.mutable_to_apply() = computation.handle();
+  *request->mutable_to_apply() = computation.handle();
   for (int64 dimension : dimensions) {
-    request.add_dimensions(dimension);
+    request->add_dimensions(dimension);
   }
   for (const ComputationDataHandle& sop : static_operands) {
-    *request.add_static_operands() = sop;
+    *request->add_static_operands() = sop;
   }
-  OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_map_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making Map request";
-  Status s = client_->stub()->Op(&op_request, &response);
-  return ParseOpResponse(s, &response);
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::RngNormal(
@@ -1443,57 +1189,46 @@ ComputationDataHandle ComputationBuilder::RngUniform(
   return RngOp(RandomDistribution::RNG_UNIFORM, {a, b}, shape);
 }
 
-ComputationDataHandle ComputationBuilder::RngBernoulli(
-    const ComputationDataHandle& mean, const Shape& shape) {
-  return RngOp(RandomDistribution::RNG_BERNOULLI, {mean}, shape);
-}
-
 ComputationDataHandle ComputationBuilder::While(
     const Computation& condition, const Computation& body,
     const ComputationDataHandle& init) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
-  WhileRequest request;
-  *request.mutable_condition() = condition.handle();
-  *request.mutable_body() = body.handle();
-  *request.mutable_init() = init;
   OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_while_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making while request";
-  Status s = client_->stub()->Op(&op_request, &response);
-  return ParseOpResponse(s, &response);
+  WhileRequest* request = op_request.mutable_while_request();
+  *request->mutable_condition() = condition.handle();
+  *request->mutable_body() = body.handle();
+  *request->mutable_init() = init;
+  return RunOpAndParseResponse(&op_request);
+}
+
+ComputationDataHandle ComputationBuilder::Conditional(
+    const ComputationDataHandle& predicate,
+    const ComputationDataHandle& true_operand,
+    const Computation& true_computation,
+    const ComputationDataHandle& false_operand,
+    const Computation& false_computation) {
+  OpRequest op_request;
+  ConditionalRequest* request = op_request.mutable_conditional_request();
+  *request->mutable_predicate() = predicate;
+  *request->mutable_true_operand() = true_operand;
+  *request->mutable_true_computation() = true_computation.handle();
+  *request->mutable_false_operand() = false_operand;
+  *request->mutable_false_computation() = false_computation.handle();
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::Reduce(
     const ComputationDataHandle& operand,
     const ComputationDataHandle& init_value, const Computation& computation,
     tensorflow::gtl::ArraySlice<int64> dimensions_to_reduce) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
-  ReduceRequest request;
-  *request.mutable_operand() = operand;
-  *request.mutable_init_value() = init_value;
+  OpRequest op_request;
+  ReduceRequest* request = op_request.mutable_reduce_request();
+  *request->mutable_operand() = operand;
+  *request->mutable_init_value() = init_value;
   for (int64 dimension : dimensions_to_reduce) {
-    request.add_dimensions(dimension);
+    request->add_dimensions(dimension);
   }
-  *request.mutable_to_apply() = computation.handle();
-  OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_reduce_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making reduce request";
-  Status s = client_->stub()->Op(&op_request, &response);
-  return ParseOpResponse(s, &response);
+  *request->mutable_to_apply() = computation.handle();
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::ReduceAll(
@@ -1505,7 +1240,6 @@ ComputationDataHandle ComputationBuilder::ReduceAll(
 
   StatusOr<std::unique_ptr<Shape>> shape = GetShape(operand);
   if (!shape.ok()) {
-    first_error_ = shape.status();
     return ComputationDataHandle();
   }
 
@@ -1525,7 +1259,6 @@ ComputationDataHandle ComputationBuilder::ReduceWindow(
 
   StatusOr<std::unique_ptr<Shape>> shape = GetShape(operand);
   if (!shape.ok()) {
-    first_error_ = shape.status();
     return ComputationDataHandle();
   }
 
@@ -1551,84 +1284,50 @@ ComputationDataHandle ComputationBuilder::ReduceWindowWithGeneralPadding(
     tensorflow::gtl::ArraySlice<int64> window_dimensions,
     tensorflow::gtl::ArraySlice<int64> window_strides,
     tensorflow::gtl::ArraySlice<std::pair<int64, int64>> padding) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
-  ReduceWindowRequest request;
-  *request.mutable_operand() = operand;
-  *request.mutable_to_apply() = computation.handle();
-  *request.mutable_init_value() = init_value;
+  OpRequest op_request;
+  ReduceWindowRequest* request = op_request.mutable_reduce_window_request();
+  *request->mutable_operand() = operand;
+  *request->mutable_to_apply() = computation.handle();
+  *request->mutable_init_value() = init_value;
 
   if (!MakeWindow(window_dimensions, window_strides, padding, {}, {},
-                  request.mutable_window())) {
+                  request->mutable_window())) {
     NoteError(InternalError("failed to make window"));
     return ComputationDataHandle();
   }
-  OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_reduce_window_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
 
-  VLOG(2) << "making reduce-window request";
-  Status s = client_->stub()->Op(&op_request, &response);
-  return ParseOpResponse(s, &response);
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::BatchNormTraining(
     const ComputationDataHandle& operand, const ComputationDataHandle& scale,
     const ComputationDataHandle& offset, float epsilon, int64 feature_index) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-  BatchNormTrainingRequest request;
-  *request.mutable_operand() = operand;
-  *request.mutable_scale() = scale;
-  *request.mutable_offset() = offset;
-  request.set_epsilon(epsilon);
-  request.set_feature_index(feature_index);
-
   OpRequest op_request;
-  *op_request.mutable_batch_norm_training_request() = request;
-  *op_request.mutable_computation() = computation_.handle();
-  AddCommonFieldsToOpRequest(&op_request);
-
-  OpResponse response;
-
-  VLOG(2) << "making BatchNormTraining request";
-
-  Status s = client_->stub()->Op(&op_request, &response);
-  return ParseOpResponse(s, &response);
+  BatchNormTrainingRequest* request =
+      op_request.mutable_batch_norm_training_request();
+  *request->mutable_operand() = operand;
+  *request->mutable_scale() = scale;
+  *request->mutable_offset() = offset;
+  request->set_epsilon(epsilon);
+  request->set_feature_index(feature_index);
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::BatchNormInference(
     const ComputationDataHandle& operand, const ComputationDataHandle& scale,
     const ComputationDataHandle& offset, const ComputationDataHandle& mean,
     const ComputationDataHandle& variance, float epsilon, int64 feature_index) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-  BatchNormInferenceRequest request;
-  *request.mutable_operand() = operand;
-  *request.mutable_scale() = scale;
-  *request.mutable_offset() = offset;
-  *request.mutable_mean() = mean;
-  *request.mutable_variance() = variance;
-  request.set_epsilon(epsilon);
-  request.set_feature_index(feature_index);
-
   OpRequest op_request;
-  *op_request.mutable_batch_norm_inference_request() = request;
-  *op_request.mutable_computation() = computation_.handle();
-  AddCommonFieldsToOpRequest(&op_request);
-
-  OpResponse response;
-
-  VLOG(2) << "making BatchNormInference request";
-
-  Status s = client_->stub()->Op(&op_request, &response);
-  return ParseOpResponse(s, &response);
+  BatchNormInferenceRequest* request =
+      op_request.mutable_batch_norm_inference_request();
+  *request->mutable_operand() = operand;
+  *request->mutable_scale() = scale;
+  *request->mutable_offset() = offset;
+  *request->mutable_mean() = mean;
+  *request->mutable_variance() = variance;
+  request->set_epsilon(epsilon);
+  request->set_feature_index(feature_index);
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::BatchNormGrad(
@@ -1636,49 +1335,25 @@ ComputationDataHandle ComputationBuilder::BatchNormGrad(
     const ComputationDataHandle& mean, const ComputationDataHandle& var,
     const ComputationDataHandle& grad_output, float epsilon,
     int64 feature_index) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-  BatchNormGradRequest request;
-  *request.mutable_operand() = operand;
-  *request.mutable_scale() = scale;
-  *request.mutable_mean() = mean;
-  *request.mutable_variance() = var;
-  *request.mutable_grad_output() = grad_output;
-  request.set_epsilon(epsilon);
-  request.set_feature_index(feature_index);
-
   OpRequest op_request;
-  *op_request.mutable_batch_norm_grad_request() = request;
-  *op_request.mutable_computation() = computation_.handle();
-  AddCommonFieldsToOpRequest(&op_request);
-
-  OpResponse response;
-
-  VLOG(2) << "making BatchNormGrad request";
-
-  Status s = client_->stub()->Op(&op_request, &response);
-
-  return ParseOpResponse(s, &response);
+  BatchNormGradRequest* request = op_request.mutable_batch_norm_grad_request();
+  *request->mutable_operand() = operand;
+  *request->mutable_scale() = scale;
+  *request->mutable_mean() = mean;
+  *request->mutable_variance() = var;
+  *request->mutable_grad_output() = grad_output;
+  request->set_epsilon(epsilon);
+  request->set_feature_index(feature_index);
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::CrossReplicaSum(
     const ComputationDataHandle& operand) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
-  CrossReplicaSumRequest request;
-  *request.mutable_operand() = operand;
   OpRequest op_request;
-  *op_request.mutable_cross_replica_sum_request() = request;
-  *op_request.mutable_computation() = computation_.handle();
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making cross-replica-sum request";
-  Status s = client_->stub()->Op(&op_request, &response);
-  return ParseOpResponse(s, &response);
+  CrossReplicaSumRequest* request =
+      op_request.mutable_cross_replica_sum_request();
+  *request->mutable_operand() = operand;
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::SelectAndScatter(
@@ -1693,7 +1368,6 @@ ComputationDataHandle ComputationBuilder::SelectAndScatter(
 
   StatusOr<std::unique_ptr<Shape>> shape = GetShape(operand);
   if (!shape.ok()) {
-    first_error_ = shape.status();
     return ComputationDataHandle();
   }
   return SelectAndScatterWithGeneralPadding(
@@ -1710,98 +1384,53 @@ ComputationDataHandle ComputationBuilder::SelectAndScatterWithGeneralPadding(
     tensorflow::gtl::ArraySlice<std::pair<int64, int64>> padding,
     const ComputationDataHandle& source,
     const ComputationDataHandle& init_value, const Computation& scatter) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
-  SelectAndScatterRequest request;
-  *request.mutable_operand() = operand;
-  *request.mutable_select() = select.handle();
-  *request.mutable_source() = source;
-  *request.mutable_init_value() = init_value;
-  *request.mutable_scatter() = scatter.handle();
+  OpRequest op_request;
+  SelectAndScatterRequest* request =
+      op_request.mutable_select_and_scatter_request();
+  *request->mutable_operand() = operand;
+  *request->mutable_select() = select.handle();
+  *request->mutable_source() = source;
+  *request->mutable_init_value() = init_value;
+  *request->mutable_scatter() = scatter.handle();
 
   if (!MakeWindow(window_dimensions, window_strides, padding, {}, {},
-                  request.mutable_window())) {
+                  request->mutable_window())) {
     NoteError(InternalError("failed to make window"));
     return ComputationDataHandle();
   }
-  OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_select_and_scatter_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
 
-  VLOG(2) << "making select-and-scatter request";
-  Status s = client_->stub()->Op(&op_request, &response);
-  return ParseOpResponse(s, &response);
+  return RunOpAndParseResponse(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::ReducePrecision(
     const ComputationDataHandle& operand, const int exponent_bits,
     const int mantissa_bits) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
-  ReducePrecisionRequest request;
-  *request.mutable_operand() = operand;
-  request.set_exponent_bits(exponent_bits);
-  request.set_mantissa_bits(mantissa_bits);
   OpRequest op_request;
-  *op_request.mutable_computation() = computation_.handle();
-  *op_request.mutable_reduce_precision_request() = request;
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making reduce-precision request";
-  Status s = client_->stub()->Op(&op_request, &response);
-  return ParseOpResponse(s, &response);
+  ReducePrecisionRequest* request =
+      op_request.mutable_reduce_precision_request();
+  *request->mutable_operand() = operand;
+  request->set_exponent_bits(exponent_bits);
+  request->set_mantissa_bits(mantissa_bits);
+  return RunOpAndParseResponse(&op_request);
 }
 
 void ComputationBuilder::Send(const ComputationDataHandle& operand,
                               const ChannelHandle& handle) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return;
-  }
-
-  SendRequest request;
-  *request.mutable_operand() = operand;
-  *request.mutable_channel_handle() = handle;
   OpRequest op_request;
-  *op_request.mutable_send_request() = request;
+  SendRequest* request = op_request.mutable_send_request();
+  *request->mutable_operand() = operand;
+  *request->mutable_channel_handle() = handle;
   *op_request.mutable_computation() = computation_.handle();
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making send request";
-  Status s = client_->stub()->Op(&op_request, &response);
-  VLOG(2) << "done with op request";
-
-  if (!s.ok()) {
-    NoteError(s);
-    return;
-  }
+  RunOpAndNoteError(&op_request);
 }
 
 ComputationDataHandle ComputationBuilder::Recv(const Shape& shape,
                                                const ChannelHandle& handle) {
-  if (!first_error_.ok() || !PrepareComputation().ok()) {
-    return ComputationDataHandle();
-  }
-
-  RecvRequest request;
-  *request.mutable_shape() = shape;
-  *request.mutable_channel_handle() = handle;
   OpRequest op_request;
-  *op_request.mutable_recv_request() = request;
-  *op_request.mutable_computation() = computation_.handle();
-  AddCommonFieldsToOpRequest(&op_request);
-  OpResponse response;
-
-  VLOG(2) << "making recv request";
-  Status s = client_->stub()->Op(&op_request, &response);
-  return ParseOpResponse(s, &response);
+  RecvRequest* request = op_request.mutable_recv_request();
+  *request->mutable_shape() = shape;
+  *request->mutable_channel_handle() = handle;
+  return RunOpAndParseResponse(&op_request);
 }
 
 Computation ComputationBuilder::BuildAndNoteError() {
@@ -1830,13 +1459,6 @@ StatusOr<Computation> ComputationBuilder::Build() {
   return {std::move(computation_)};
 }
 
-void ComputationBuilder::AddCommonFieldsToOpRequest(OpRequest* request) const {
-  *request->mutable_metadata() = metadata_;
-  if (sharding_) {
-    *request->mutable_sharding() = *sharding_;
-  }
-}
-
 /* static */ ConvolutionDimensionNumbers
 ComputationBuilder::CreateDefaultConvDimensionNumbers(int num_spatial_dims) {
   ConvolutionDimensionNumbers dimension_numbers;
diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h
index d2dbbbbebbd5a9386f8841576de33a1fdb767000..d82ba63e8ad0b9ceac0eb5f0cd7720cac0cbe6d3 100644
--- a/tensorflow/compiler/xla/client/computation_builder.h
+++ b/tensorflow/compiler/xla/client/computation_builder.h
@@ -43,59 +43,6 @@ limitations under the License.
 
 namespace xla {
 
-class ShardingBuilder {
- public:
-  // A shaped array used to describe the assignment of tiles to devices.
-  using TileAssignment = Array<int64>;
-
-  // Creates a replicated sharding - replicate a tensor on every device.
-  static OpSharding Replicate() {
-    OpSharding result;
-    result.set_type(OpSharding::Type::OpSharding_Type_REPLICATED);
-    return result;
-  }
-  // Creates a sharding that assigns a tensor to just one device.
-  static OpSharding AssignDevice(int device) {
-    OpSharding result;
-    result.set_type(OpSharding::Type::OpSharding_Type_MAXIMAL);
-    result.add_tile_assignment_dimensions(1);
-    result.add_tile_assignment_devices(device);
-    return result;
-  }
-  // Creates a tiled sharding with the given tile shape and assignment of tiles
-  // to devices.
-  static OpSharding Tile(Shape tile_shape,
-                         const TileAssignment& tile_assignment) {
-    OpSharding result;
-    result.set_type(OpSharding::Type::OpSharding_Type_OTHER);
-    *result.mutable_tile_shape() = tile_shape;
-    for (int64 dim : tile_assignment.dimensions()) {
-      result.add_tile_assignment_dimensions(dim);
-    }
-    for (uint32 device : tile_assignment) {
-      result.add_tile_assignment_devices(device);
-    }
-    return result;
-  }
-  // Creates a sharding in one dimension, with the given tile shape which must
-  // be rank 1 and using devices 0..num_tiles.
-  static OpSharding Tile1D(Shape tile_shape, int64 num_tiles) {
-    OpSharding result;
-    result.set_type(OpSharding::Type::OpSharding_Type_OTHER);
-
-    CHECK_EQ(ShapeUtil::Rank(tile_shape), 1);
-    std::vector<int64> dimensions(1, num_tiles);
-    auto& tile_dimension = (*tile_shape.mutable_dimensions())[0];
-    tile_dimension = CeilOfRatio(static_cast<int64>(tile_dimension), num_tiles);
-    *result.mutable_tile_shape() = tile_shape;
-    result.add_tile_assignment_dimensions(num_tiles);
-    for (int64 i = 0; i < num_tiles; ++i) {
-      result.add_tile_assignment_devices(i);
-    }
-    return result;
-  }
-};
-
 // Wraps an XLA client with a convenient interface for building up
 // computations. Any errors encountered in building up the computation are
 // deferred from being handled until Build() is called.
@@ -393,6 +340,11 @@ class ComputationBuilder {
   ComputationDataHandle Dot(const ComputationDataHandle& lhs,
                             const ComputationDataHandle& rhs);
 
+  // Enqueues a general dot instruction onto the computation.
+  ComputationDataHandle DotGeneral(
+      const ComputationDataHandle& lhs, const ComputationDataHandle& rhs,
+      const DotDimensionNumbers& dimension_numbers);
+
   // Default dimension numbers used for a 2D convolution.
   static constexpr int64 kConvBatchDimension = 0;
   static constexpr int64 kConvFeatureDimension = 1;
@@ -458,14 +410,24 @@ class ComputationBuilder {
       tensorflow::gtl::ArraySlice<int64> rhs_dilation,
       const ConvolutionDimensionNumbers& dimension_numbers);
 
+  // Enqueues an FFT instruction onto the computation, of the given type and
+  // with the given FFT length.
+  ComputationDataHandle Fft(const ComputationDataHandle& operand,
+                            FftType fft_type,
+                            tensorflow::gtl::ArraySlice<int64> fft_length);
+
   // Enqueues an infeed instruction onto the computation, which writes data of
   // the given shape to the infeed buffer of the device.
   ComputationDataHandle Infeed(const Shape& shape, const string& config = "");
 
   // Enqueues an outfeed instruction onto the computation. This instruction
   // generates outgoing data transfers for the given data.
-  void Outfeed(const ComputationDataHandle& operand, const Shape& shape,
-               const string& outfeed_config);
+  //
+  // shape_with_layout communicates the laid out shape that we want to outfeed
+  // -- if !ShapeUtil::Compatible(GetShape(operand), shape_with_layout) an error
+  // will occur.
+  void Outfeed(const ComputationDataHandle& operand,
+               const Shape& shape_with_layout, const string& outfeed_config);
 
   // Enqueues a call instruction onto the computation.
   ComputationDataHandle Call(
@@ -726,16 +688,18 @@ class ComputationBuilder {
                                    const ComputationDataHandle& b,
                                    const Shape& shape);
 
-  // Enqueues a B(1, p) random number generation instruction onto the
-  // computation.
-  ComputationDataHandle RngBernoulli(const ComputationDataHandle& mean,
-                                     const Shape& shape);
-
   // Enqueues a while node onto the computation.
   ComputationDataHandle While(const Computation& condition,
                               const Computation& body,
                               const ComputationDataHandle& init);
 
+  // Enqueues a conditional node onto the computation.
+  ComputationDataHandle Conditional(const ComputationDataHandle& predicate,
+                                    const ComputationDataHandle& true_operand,
+                                    const Computation& true_computation,
+                                    const ComputationDataHandle& false_operand,
+                                    const Computation& false_computation);
+
   // Enqueues a ReducePrecision node onto the computation.
   ComputationDataHandle ReducePrecision(const ComputationDataHandle& operand,
                                         const int exponent_bits,
@@ -751,7 +715,7 @@ class ComputationBuilder {
   ComputationDataHandle Recv(const Shape& shape, const ChannelHandle& handle);
 
   // Returns true if 'operand' is a compile-time constant. A compile-time
-  // constant does not depend on parameters with higher index then
+  // constant does not depend on parameters with index greater than or equal to
   // `num_parameters`, or on stateful operators such as `RngNormal` or `Infeed`.
   // Unlike `ComputeConstant`, `IsConstant` tests whether a computation is a
   // compile-time constant without evaluating the computation.
@@ -811,7 +775,7 @@ class ComputationBuilder {
   // The operand must represent a constant value, which in this case
   // means that it must not statically depend on any parameter of the
   // computation that is being built other then the ones specified on the
-  // paramtere list. The parameters in the list will be indexed by their
+  // parameter list. The parameters in the list will be indexed by their
   // parameter id property so the number of parameters specified should be at
   // least as many as the largest used parameter index.
   //
@@ -870,8 +834,6 @@ class ComputationBuilder {
   Status first_error() const { return first_error_; }
 
  private:
-  using PopulateLiteral = std::function<void(Literal*)>;
-
   // Limited checking of convolution parameters. Returns false on
   // error.
   bool VerifyConvolution(const Shape& lhs_shape, const Shape& rhs_shape,
@@ -890,11 +852,6 @@ class ComputationBuilder {
                   tensorflow::gtl::ArraySlice<int64> rhs_dilation,
                   Window* window);
 
-  // Internal helper method that makes a request for a constant operation -- the
-  // provided function is used to populate the literal before sending the
-  // request.
-  ComputationDataHandle ConstantOp(const PopulateLiteral& populate);
-
   // Internal helper method that does the building for an arbitrary unary op.
   ComputationDataHandle UnaryOp(UnaryOperation binop,
                                 const ComputationDataHandle& operand);
@@ -924,19 +881,28 @@ class ComputationBuilder {
   // This is used before any given operation is enqueued.
   Status PrepareComputation();
 
-  // Helper function for parsing a method response and either returning the
-  // output computation data handle (on success) or a vacuous computation data
-  // handle (on failure).
-  ComputationDataHandle ParseOpResponse(const Status& status,
-                                        OpResponse* response);
-
   // Notes that the error occurred by:
   // * storing it internally and capturing a backtrace if it's the first error
   //   (this deferred value will be produced on the call to Build())
   // * dying if die_immediately_on_error_ is true
   void NoteError(const Status& error);
 
-  void AddCommonFieldsToOpRequest(OpRequest* request) const;
+  // Helper function that runs the given op_request, filling in op_response.
+  // Before the op is run, PrepareComputation is called, and common fields in
+  // the op_request are filled in.
+  Status RunOp(OpRequest* op_request, OpResponse* op_response);
+
+  // Helper function that calls RunOp and calls NoteError on failures.
+  void RunOpAndNoteError(OpRequest* op_request);
+
+  // Helper function that calls RunOp and either returns the output computation
+  // data handle (on success) or a vacuous computation data handle (on failure).
+  ComputationDataHandle RunOpAndParseResponse(OpRequest* op_request);
+
+  // Helper function that implements GetShape without noting errors. This makes
+  // it easier to ensure the real GetShape will note errors on every error path.
+  StatusOr<std::unique_ptr<Shape>> GetShapeWithoutNoteError(
+      const ComputationDataHandle& operand);
 
   string name_;  // Name to use for the built computation.
 
@@ -970,68 +936,66 @@ class ComputationBuilder {
 
 template <typename NativeT>
 ComputationDataHandle ComputationBuilder::ConstantR0(NativeT value) {
-  return ConstantOp([value](Literal* literal) { literal->PopulateR0(value); });
+  return ConstantLiteral(*Literal::CreateR0<NativeT>(value));
 }
 
 template <typename NativeT>
 ComputationDataHandle ComputationBuilder::ConstantR1(
     tensorflow::gtl::ArraySlice<NativeT> values) {
-  return ConstantOp(
-      [&values](Literal* literal) { literal->PopulateR1(values); });
+  return ConstantLiteral(*Literal::CreateR1<NativeT>(values));
 }
 
 template <typename NativeT>
 ComputationDataHandle ComputationBuilder::ConstantR1(int64 length,
                                                      NativeT value) {
-  return ConstantOp([length, value](Literal* literal) {
-    literal->PopulateWithValue(value, {length});
-  });
+  Literal literal(ShapeUtil::MakeShape(
+      primitive_util::NativeToPrimitiveType<NativeT>(), {length}));
+  literal.PopulateWithValue(value);
+  return ConstantLiteral(literal);
 }
 
 inline ComputationDataHandle ComputationBuilder::ConstantR1(
     const tensorflow::core::Bitmap& values) {
-  return ConstantOp(
-      [&values](Literal* literal) { literal->PopulateR1(values); });
+  return ConstantLiteral(*Literal::CreateR1(values));
 }
 
 template <typename NativeT>
 ComputationDataHandle ComputationBuilder::ConstantR2(
     std::initializer_list<std::initializer_list<NativeT>> values) {
-  return ConstantOp(
-      [&values](Literal* literal) { literal->PopulateR2(values); });
+  return ConstantLiteral(*Literal::CreateR2<NativeT>(values));
 }
 
 template <typename NativeT>
 ComputationDataHandle ComputationBuilder::ConstantFromArrayWithLayout(
     const Array<NativeT>& values, const Layout& layout) {
-  return ConstantOp([&values, &layout](Literal* literal) {
-    literal->PopulateFromArrayWithLayout(values, layout);
-  });
+  return ConstantLiteral(
+      *Literal::CreateFromArrayWithLayout<NativeT>(values, layout));
 }
 
 template <typename NativeT>
 ComputationDataHandle ComputationBuilder::ConstantFromArray(
     const Array<NativeT>& values) {
-  return ConstantOp(
-      [&values](Literal* literal) { literal->PopulateFromArray(values); });
+  return ConstantLiteral(*Literal::CreateFromArray<NativeT>(values));
 }
 
 template <typename NativeT>
 ComputationDataHandle ComputationBuilder::ConstantR2FromArray2DWithLayout(
     const Array2D<NativeT>& values, const Layout& layout) {
-  return ConstantFromArrayWithLayout(values, layout);
+  return ConstantLiteral(
+      *Literal::CreateFromArrayWithLayout<NativeT>(values, layout));
 }
 
 template <typename NativeT>
 ComputationDataHandle ComputationBuilder::ConstantR2FromArray2D(
     const Array2D<NativeT>& values) {
-  return ConstantFromArray(values);
+  return ConstantLiteral(*Literal::CreateR2FromArray2D<NativeT>(values));
 }
 
 template <typename NativeT>
 ComputationDataHandle ComputationBuilder::ConstantR3FromArray3DWithLayout(
     const Array3D<NativeT>& values, const Layout& layout) {
-  return ConstantFromArrayWithLayout(values, layout);
+  return ConstantLiteral(
+      *Literal::CreateR3FromArray3DWithLayout<NativeT>(values, layout));
 }
 
 template <typename NativeT>
diff --git a/tensorflow/compiler/xla/client/local_client.cc b/tensorflow/compiler/xla/client/local_client.cc
index b051955f0fd85b7ca886bc0238068aeb94427209..523169fdd266d445c9d0d056ba20091f77610ad9 100644
--- a/tensorflow/compiler/xla/client/local_client.cc
+++ b/tensorflow/compiler/xla/client/local_client.cc
@@ -78,14 +78,14 @@ tensorflow::Status LocalExecutable::ValidateExecutionOptions(
   }
   for (int i = 0; i < arguments.size(); ++i) {
     if (!computation_layout.parameter_layout(i).MatchesLayoutInShape(
-            arguments[i]->shape())) {
+            arguments[i]->on_host_shape())) {
       return InvalidArgument(
           "argument does not match shape or layout of computation parameter "
           "%d: expected %s, got %s",
           i,
           ShapeUtil::HumanString(computation_layout.parameter_layout(i).shape())
               .c_str(),
-          ShapeUtil::HumanString(arguments[i]->shape()).c_str());
+          ShapeUtil::HumanString(arguments[i]->on_host_shape()).c_str());
     }
   }
 
@@ -184,7 +184,7 @@ StatusOr<std::unique_ptr<ScopedShapedBuffer>> LocalExecutable::Run(
   }
   TF_ASSIGN_OR_RETURN(
       std::unique_ptr<ShapedBuffer> result,
-      executable_->ExecuteOnStreamWrapper<std::unique_ptr<ShapedBuffer>>(
+      executable_->ExecuteOnStreamWrapper(
           &service_options, options.execution_profile(), arguments));
   return ScopedShapedBuffer::MakeScoped(result.get(),
                                         actual_options.allocator());
@@ -281,13 +281,9 @@ LocalClient::LiteralToShapedBuffer(const Literal& literal, int device_ordinal,
   if (allocator == nullptr) {
     allocator = backend().memory_allocator();
   }
-  TF_ASSIGN_OR_RETURN(
-      auto scoped_buffer,
-      ScopedShapedBuffer::Allocate(
-          literal.shape(), allocator, device_ordinal,
-          [this](const Shape& shape) {
-            return backend().transfer_manager()->GetByteSizeRequirement(shape);
-          }));
+  TF_ASSIGN_OR_RETURN(auto scoped_buffer,
+                      backend().transfer_manager()->AllocateScopedShapedBuffer(
+                          literal.shape(), allocator, device_ordinal));
   TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor,
                       backend().stream_executor(device_ordinal));
   TF_RETURN_IF_ERROR(backend().transfer_manager()->TransferLiteralToDevice(
@@ -322,4 +318,8 @@ StatusOr<std::unique_ptr<Literal>> LocalClient::TransferFromOutfeedLocal(
   return std::move(literal);
 }
 
+StatusOr<int> LocalClient::ReplicaNumberToDeviceOrdinal(int replica_number) {
+  return local_service_->ReplicaNumberToDeviceOrdinal(replica_number);
+}
+
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/client/local_client.h b/tensorflow/compiler/xla/client/local_client.h
index 3ca0d2ef5513cfb6b0dbfbc63b311f81a318356e..19fd14f76bc69d528193f7981a51a305f03f987e 100644
--- a/tensorflow/compiler/xla/client/local_client.h
+++ b/tensorflow/compiler/xla/client/local_client.h
@@ -176,6 +176,13 @@ class LocalClient : public Client {
   StatusOr<std::unique_ptr<Literal>> TransferFromOutfeedLocal(
       const Shape& shape, int device_ordinal);
 
+  // Returns the device ordinal that corresponds to the given replica number.
+  //
+  // This returns an error if there is not a one-to-one correspondence of
+  // replicas to device ordinals, but is useful as a short term mechanism for
+  // the "easy" case where a single replica is a single device.
+  StatusOr<int> ReplicaNumberToDeviceOrdinal(int replica_number);
+
   // Returns the platform that the underlying service targets.
   perftools::gputools::Platform* platform() const;
 
diff --git a/tensorflow/compiler/xla/client/sharding_builder.cc b/tensorflow/compiler/xla/client/sharding_builder.cc
new file mode 100644
index 0000000000000000000000000000000000000000..176802b33ef824a1f898255a19e44def3c1fc982
--- /dev/null
+++ b/tensorflow/compiler/xla/client/sharding_builder.cc
@@ -0,0 +1,76 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/client/sharding_builder.h"
+
+namespace xla {
+namespace sharding_builder {
+
+OpSharding Replicate() {
+  OpSharding result;
+  result.set_type(OpSharding::Type::OpSharding_Type_REPLICATED);
+  return result;
+}
+
+OpSharding AssignDevice(int device) {
+  OpSharding result;
+  result.set_type(OpSharding::Type::OpSharding_Type_MAXIMAL);
+  result.add_tile_assignment_dimensions(1);
+  result.add_tile_assignment_devices(device);
+  return result;
+}
+
+OpSharding Tile(const Shape& tile_shape,
+                const TileAssignment& tile_assignment) {
+  OpSharding result;
+  result.set_type(OpSharding::Type::OpSharding_Type_OTHER);
+  *result.mutable_tile_shape() = tile_shape;
+  for (int64 dim : tile_assignment.dimensions()) {
+    result.add_tile_assignment_dimensions(dim);
+  }
+  for (uint32 device : tile_assignment) {
+    result.add_tile_assignment_devices(device);
+  }
+  return result;
+}
+
+OpSharding Tile1D(const Shape& tile_shape, int64 num_tiles) {
+  OpSharding result;
+  result.set_type(OpSharding::Type::OpSharding_Type_OTHER);
+
+  CHECK_EQ(ShapeUtil::Rank(tile_shape), 1);
+  std::vector<int64> dimensions(1, num_tiles);
+  *result.mutable_tile_shape() = tile_shape;
+  auto& tile_dimension =
+      (*result.mutable_tile_shape()->mutable_dimensions())[0];
+  tile_dimension = CeilOfRatio(static_cast<int64>(tile_dimension), num_tiles);
+  result.add_tile_assignment_dimensions(num_tiles);
+  for (int64 i = 0; i < num_tiles; ++i) {
+    result.add_tile_assignment_devices(i);
+  }
+  return result;
+}
+
+OpSharding Tuple(const ShapeTree<OpSharding>& shardings) {
+  OpSharding result;
+  result.set_type(OpSharding::Type::OpSharding_Type_TUPLE);
+  for (const auto& index_to_sharding : shardings.leaves()) {
+    *result.add_tuple_shardings() = index_to_sharding.second;
+  }
+  return result;
+}
+
+}  // namespace sharding_builder
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/client/sharding_builder.h b/tensorflow/compiler/xla/client/sharding_builder.h
new file mode 100644
index 0000000000000000000000000000000000000000..34763e54d946690289ff42a7712b980168933eee
--- /dev/null
+++ b/tensorflow/compiler/xla/client/sharding_builder.h
@@ -0,0 +1,59 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_CLIENT_SHARDING_BUILDER_H_
+#define TENSORFLOW_COMPILER_XLA_CLIENT_SHARDING_BUILDER_H_
+
+#include <vector>
+
+#include "tensorflow/compiler/xla/array.h"
+#include "tensorflow/compiler/xla/shape_tree.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+
+namespace xla {
+namespace sharding_builder {
+// A shaped array used to describe the assignment of tiles to devices.
+using TileAssignment = Array<int64>;
+
+// Creates a replicated sharding - replicate a tensor on every device.
+OpSharding Replicate();
+
+// Creates a sharding that assigns a tensor to just one device.
+OpSharding AssignDevice(int device);
+
+// Creates a tiled sharding with the given tile shape and assignment of tiles
+// to devices.
+//
+// If tile_shape is not evenly divisible by the number of devices in
+// tile_assignment, operations behave as if implicit padding had been inserted.
+// The value of this padding is undefined.
+OpSharding Tile(const Shape& tile_shape, const TileAssignment& tile_assignment);
+
+// Creates a sharding in one dimension, with the given tile shape which must
+// be rank 1 and using devices [0..num_tiles).
+//
+// This is simply a convenience wrapper for Tile().
+OpSharding Tile1D(const Shape& tile_shape, int64 num_tiles);
+
+// Creates a tuple sharding from the given ShapeTree of element shardings.
+OpSharding Tuple(const ShapeTree<OpSharding>& shardings);
+
+}  // namespace sharding_builder
+}  // namespace xla
+
+#endif
diff --git a/tensorflow/compiler/xla/executable_run_options.cc b/tensorflow/compiler/xla/executable_run_options.cc
index 33d5b6f1d4d15d5143a3421c87eab9b7a7d11345..392ad9010ab81923a089c7b00a79ddc281af92bb 100644
--- a/tensorflow/compiler/xla/executable_run_options.cc
+++ b/tensorflow/compiler/xla/executable_run_options.cc
@@ -83,7 +83,7 @@ ExecutableRunOptions& ExecutableRunOptions::set_device_assignment(
   return *this;
 }
 
-DeviceAssignment* ExecutableRunOptions::device_assignment() const {
+const DeviceAssignment* ExecutableRunOptions::device_assignment() const {
   return device_assignment_;
 }
 
diff --git a/tensorflow/compiler/xla/executable_run_options.h b/tensorflow/compiler/xla/executable_run_options.h
index deb3ddb203d263d25bef0499a8a53a6098d0de0c..d4fcbf0493c936ebcd0639a432e56b62ee15672c 100644
--- a/tensorflow/compiler/xla/executable_run_options.h
+++ b/tensorflow/compiler/xla/executable_run_options.h
@@ -82,7 +82,7 @@ class ExecutableRunOptions {
 
   ExecutableRunOptions& set_device_assignment(
       DeviceAssignment* device_assignment);
-  DeviceAssignment* device_assignment() const;
+  const DeviceAssignment* device_assignment() const;
 
  private:
   DeviceMemoryAllocator* allocator_ = nullptr;
diff --git a/tensorflow/compiler/xla/index_util.cc b/tensorflow/compiler/xla/index_util.cc
index 76c0168f370ff1f0749759705b7ecff359a80341..ffd1fb79e986f82e1c2721f0eefbf3b4c0838e41 100644
--- a/tensorflow/compiler/xla/index_util.cc
+++ b/tensorflow/compiler/xla/index_util.cc
@@ -78,7 +78,7 @@ namespace xla {
   int64 scale = 1;
   int64 linear_index = 0;
   bool first = true;
-  for (auto dimension : shape.layout().minor_to_major()) {
+  for (auto dimension : LayoutUtil::MinorToMajor(shape)) {
     if (first) {
       // Avoid two multiplies on the first loop iteration
       linear_index = multi_index[dimension];
@@ -110,7 +110,7 @@ namespace xla {
 
   // Accumulated product D{L(0)} * D{L(1)} * ...
   int64 divisor = 1;
-  for (auto dimension : shape.layout().minor_to_major()) {
+  for (auto dimension : LayoutUtil::MinorToMajor(shape)) {
     multi_index[dimension] =
         (linear_index / divisor) % shape.dimensions(dimension);
     divisor *= shape.dimensions(dimension);
@@ -133,21 +133,49 @@ namespace xla {
 
 /* static */ int64 IndexUtil::GetDimensionStride(const Shape& shape,
                                                  int64 dimension) {
-  const Layout& layout = shape.layout();
-  int64 pdim_size = layout.padded_dimensions_size();
+  int64 pdim_size = LayoutUtil::PaddedDimensions(shape).size();
   int64 stride = 1;
   DCHECK(pdim_size == 0 || pdim_size == shape.dimensions_size());
-  for (auto dim : layout.minor_to_major()) {
+  for (auto dim : LayoutUtil::MinorToMajor(shape)) {
     if (dim == dimension) {
       break;
     }
     if (pdim_size == 0) {
       stride *= shape.dimensions(dim);
     } else {
-      stride *= layout.padded_dimensions(dim);
+      stride *= LayoutUtil::PaddedDimension(shape, dim);
     }
   }
   return stride;
 }
 
+/* static */ bool IndexUtil::IndexInBounds(
+    const Shape& shape, tensorflow::gtl::ArraySlice<int64> index) {
+  int64 rank = ShapeUtil::Rank(shape);
+  if (rank != index.size()) {
+    return false;
+  }
+  for (int64 d = 0; d < rank; ++d) {
+    if (index[d] >= shape.dimensions(d)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+/* static */ int IndexUtil::CompareIndices(
+    tensorflow::gtl::ArraySlice<int64> lhs,
+    tensorflow::gtl::ArraySlice<int64> rhs) {
+  int64 rank = lhs.size();
+  CHECK_EQ(rhs.size(), rank);
+  for (int64 dim = 0; dim < rank; ++dim) {
+    if (lhs[dim] < rhs[dim]) {
+      return -1;
+    } else if (lhs[dim] > rhs[dim]) {
+      return 1;
+    }
+  }
+  return 0;
+}
+
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/index_util.h b/tensorflow/compiler/xla/index_util.h
index c9838966a5b67397eb5fc4afe3ab9d98e82eb2b1..0b9188e8524d6f1367541496dc5a86a250a0d530 100644
--- a/tensorflow/compiler/xla/index_util.h
+++ b/tensorflow/compiler/xla/index_util.h
@@ -69,6 +69,18 @@ class IndexUtil {
   //    sizeof(dimension(3)) * sizeof(dimension(2)) == 4 * 10
   static int64 GetDimensionStride(const Shape& shape, int64 dimension);
 
+  // Returns true iff the given multi-index is contained in the bounds for the
+  // shape.
+  static bool IndexInBounds(const Shape& shape,
+                            tensorflow::gtl::ArraySlice<int64> index);
+
+  // Compares the given indices in lexicographic order.  lhs[0] and rhs[0] are
+  // compared first, and lhs[rank-1] and rhs[rank-1] last.  If lhs is larger,
+  // then -1 is returned. If rhs is larger, then 1 is returned.  Otherwise, 0 is
+  // returned.
+  static int CompareIndices(tensorflow::gtl::ArraySlice<int64> lhs,
+                            tensorflow::gtl::ArraySlice<int64> rhs);
+
  private:
   TF_DISALLOW_COPY_AND_ASSIGN(IndexUtil);
 };
diff --git a/tensorflow/compiler/xla/layout_util.cc b/tensorflow/compiler/xla/layout_util.cc
index 5c2cc2a7a99cc51ded3d98c9dd5903e4b3078548..fdc4bbdd8b162b7115788e267c2a53e73c186123 100644
--- a/tensorflow/compiler/xla/layout_util.cc
+++ b/tensorflow/compiler/xla/layout_util.cc
@@ -57,17 +57,26 @@ void SetDefaultLayoutToContainer(
 /* static */ Layout LayoutUtil::MakeLayout(
     tensorflow::gtl::ArraySlice<int64> minor_to_major) {
   Layout layout;
+  layout.set_format(DENSE);
   for (int64 dimension_number : minor_to_major) {
     layout.add_minor_to_major(dimension_number);
   }
   return layout;
 }
 
+/* static */ Layout LayoutUtil::MakeSparseLayout(int64 max_sparse_elements) {
+  Layout layout;
+  layout.set_format(SPARSE);
+  layout.set_max_sparse_elements(max_sparse_elements);
+  return layout;
+}
+
 namespace {
 
 // Internal helper that creates a default layout for an array of the given rank.
 Layout CreateDefaultLayoutForRank(int64 rank) {
   Layout layout;
+  layout.set_format(DENSE);
   tensorflow::protobuf::RepeatedField<tensorflow::protobuf_int64>*
       minor_to_major = layout.mutable_minor_to_major();
   minor_to_major->Resize(rank, 0);
@@ -105,7 +114,11 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
     for (auto& element_shape : *shape->mutable_tuple_shapes()) {
       SetToDefaultLayout(&element_shape);
     }
+    shape->clear_layout();
+  } else if (ShapeUtil::IsOpaque(*shape)) {
+    shape->clear_layout();
   } else {
+    shape->mutable_layout()->set_format(DENSE);
     tensorflow::protobuf::RepeatedField<tensorflow::protobuf_int64>*
         minor_to_major = shape->mutable_layout()->mutable_minor_to_major();
     minor_to_major->Resize(shape->dimensions_size(), 0);
@@ -137,8 +150,10 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
       TF_RETURN_IF_ERROR(ValidateLayoutInShape(element_shape));
     }
     return tensorflow::Status::OK();
-  } else if (ShapeUtil::Rank(shape) == 0 && !shape.has_layout()) {
-    // A scalar without a layout is ok.
+  } else if (ShapeUtil::IsOpaque(shape)) {
+    if (shape.has_layout()) {
+      return InvalidArgument("opaque should not have a layout field");
+    }
     return tensorflow::Status::OK();
   } else {
     // Array shape.
@@ -156,46 +171,59 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
     return InvalidArgument("a single Layout is not valid for tuple shapes");
   }
 
-  if (layout.minor_to_major_size() != ShapeUtil::Rank(shape)) {
+  if (ShapeUtil::IsOpaque(shape)) {
+    return tensorflow::Status::OK();
+  }
+
+  if (layout.format() == INVALID_FORMAT) {
     return InvalidArgument(
-        "layout minor_to_major field contains %d elements, "
-        "but shape is rank %lld: {%s}; shape: %s",
-        layout.minor_to_major_size(), ShapeUtil::Rank(shape),
-        tensorflow::str_util::Join(layout.minor_to_major(), ", ").c_str(),
-        shape.ShortDebugString().c_str());
+        "Layout does not have a valid format: layout {%s}, shape {%s}",
+        layout.ShortDebugString().c_str(), shape.ShortDebugString().c_str());
   }
 
-  std::vector<bool> dimensions_in_layout(ShapeUtil::Rank(shape), false);
-  for (int64 i = 0; i < ShapeUtil::Rank(shape); ++i) {
-    int64 dim = layout.minor_to_major(i);
-    if (dim < 0 || dim >= ShapeUtil::Rank(shape)) {
+  if (layout.format() == DENSE) {
+    if (layout.minor_to_major_size() != ShapeUtil::Rank(shape)) {
       return InvalidArgument(
-          "layout minor_to_major field has out-of-bounds value: %s",
-          HumanString(layout).c_str());
+          "layout minor_to_major field contains %d elements, "
+          "but shape is rank %lld: {%s}; shape: %s",
+          layout.minor_to_major_size(), ShapeUtil::Rank(shape),
+          tensorflow::str_util::Join(layout.minor_to_major(), ", ").c_str(),
+          shape.ShortDebugString().c_str());
     }
-    if (dimensions_in_layout[dim]) {
-      return InvalidArgument(
-          "layout minor_to_major field has duplicate values: {%s}",
-          HumanString(layout).c_str());
-    }
-    dimensions_in_layout[dim] = true;
-  }
 
-  if (layout.padded_dimensions_size() > 0) {
-    if (layout.padded_dimensions_size() != ShapeUtil::Rank(shape)) {
-      return InvalidArgument(
-          "layout has %d padded dimensions, but shape is rank %lld",
-          layout.padded_dimensions_size(), ShapeUtil::Rank(shape));
+    std::vector<bool> dimensions_in_layout(ShapeUtil::Rank(shape), false);
+    for (int64 i = 0; i < ShapeUtil::Rank(shape); ++i) {
+      int64 dim = layout.minor_to_major(i);
+      if (dim < 0 || dim >= ShapeUtil::Rank(shape)) {
+        return InvalidArgument(
+            "layout minor_to_major field has out-of-bounds value: %s",
+            HumanString(layout).c_str());
+      }
+      if (dimensions_in_layout[dim]) {
+        return InvalidArgument(
+            "layout minor_to_major field has duplicate values: {%s}",
+            HumanString(layout).c_str());
+      }
+      dimensions_in_layout[dim] = true;
     }
-    for (int i = 0; i < layout.padded_dimensions_size(); ++i) {
-      if (layout.padded_dimensions(i) < shape.dimensions(i)) {
+
+    if (layout.padded_dimensions_size() > 0) {
+      if (layout.padded_dimensions_size() != ShapeUtil::Rank(shape)) {
         return InvalidArgument(
-            "for dimension %d, dimension padding (%lld) is smaller than "
-            "the dimension size (%lld) of the shape",
-            i, layout.padded_dimensions(i), shape.dimensions(i));
+            "layout has %d padded dimensions, but shape is rank %lld",
+            layout.padded_dimensions_size(), ShapeUtil::Rank(shape));
+      }
+      for (int i = 0; i < layout.padded_dimensions_size(); ++i) {
+        if (layout.padded_dimensions(i) < shape.dimensions(i)) {
+          return InvalidArgument(
+              "for dimension %d, dimension padding (%lld) is smaller than "
+              "the dimension size (%lld) of the shape",
+              i, layout.padded_dimensions(i), shape.dimensions(i));
+        }
       }
     }
   }
+
   return tensorflow::Status::OK();
 }
 
@@ -213,12 +241,23 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
   LayoutUtil::ClearLayout(program_shape->mutable_result());
 }
 
+/* static */ bool LayoutUtil::IsDenseArray(const Shape& shape) {
+  return ShapeUtil::IsArray(shape) && shape.has_layout() &&
+         IsDense(shape.layout());
+}
+
+/* static */ bool LayoutUtil::IsDense(const Layout& layout) {
+  return layout.format() == DENSE;
+}
+
 /* static */ bool LayoutUtil::IsMonotonicWithDim0Minor(const Layout& layout) {
+  CHECK(layout.format() == DENSE);
   return std::is_sorted(layout.minor_to_major().begin(),
                         layout.minor_to_major().end());
 }
 
 /* static */ bool LayoutUtil::IsMonotonicWithDim0Major(const Layout& layout) {
+  CHECK(layout.format() == DENSE);
   return std::is_sorted(layout.minor_to_major().begin(),
                         layout.minor_to_major().end(), std::greater<int64>());
 }
@@ -228,6 +267,7 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
       shape.layout().padded_dimensions_size() == 0) {
     return false;
   }
+  CHECK(IsDenseArray(shape));
   CHECK_EQ(shape.dimensions_size(), shape.layout().padded_dimensions_size());
   for (int64 i = 0; i < shape.dimensions_size(); ++i) {
     if (shape.layout().padded_dimensions(i) > shape.dimensions(i)) {
@@ -237,15 +277,46 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
   return false;
 }
 
+/* static */ tensorflow::gtl::ArraySlice<int64> LayoutUtil::PaddedDimensions(
+    const Shape& shape) {
+  CHECK(IsDenseArray(shape));
+  return AsInt64Slice(shape.layout().padded_dimensions());
+}
+
+/* static */ int64 LayoutUtil::PaddedDimension(const Shape& shape,
+                                               int64 index) {
+  CHECK(IsDenseArray(shape));
+  return shape.layout().padded_dimensions(index);
+}
+
+/* static */ PaddingValue LayoutUtil::GetPaddingValue(const Shape& shape) {
+  CHECK(IsDenseArray(shape));
+  return shape.layout().padding_value();
+}
+
+/* static */ bool LayoutUtil::IsSparseArray(const Shape& shape) {
+  return ShapeUtil::IsArray(shape) && shape.has_layout() &&
+         IsSparse(shape.layout());
+}
+
+/* static */ bool LayoutUtil::IsSparse(const Layout& layout) {
+  return layout.format() == SPARSE;
+}
+
+/* static */ int64 LayoutUtil::MaxSparseElements(const Layout& layout) {
+  CHECK(IsSparse(layout));
+  return layout.max_sparse_elements();
+}
+
 /* static */ bool LayoutUtil::HasLayout(const Shape& shape) {
   if (ShapeUtil::IsTuple(shape)) {
     // Tuple shape: all subshapes must have a layout.
     return std::all_of(shape.tuple_shapes().begin(), shape.tuple_shapes().end(),
                        [](const Shape& s) { return HasLayout(s); });
+  } else if (ShapeUtil::IsOpaque(shape)) {
+    return true;
   }
-  // A scalar trivially always has a layout.
-  return (ShapeUtil::Rank(shape) == 0 ||
-          (shape.has_layout() && (shape.layout().minor_to_major_size() > 0)));
+  return shape.has_layout() && shape.layout().format() != INVALID_FORMAT;
 }
 
 /* static */ bool LayoutUtil::HasLayout(const ProgramShape& program_shape) {
@@ -261,6 +332,18 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
   return protobuf_util::ProtobufEquals(lhs, rhs);
 }
 
+/* static */ tensorflow::gtl::ArraySlice<int64> LayoutUtil::MinorToMajor(
+    const Shape& shape) {
+  CHECK(IsDenseArray(shape));
+  return AsInt64Slice(shape.layout().minor_to_major());
+}
+
+/* static */ tensorflow::gtl::ArraySlice<int64> LayoutUtil::MinorToMajor(
+    const Layout& layout) {
+  CHECK(layout.format() == DENSE);
+  return AsInt64Slice(layout.minor_to_major());
+}
+
 /* static */ int64 LayoutUtil::Major(const Layout& layout,
                                      int64 physical_dimension_number) {
   CHECK_LE(0, physical_dimension_number);
@@ -271,6 +354,7 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
 
 /* static */ int64 LayoutUtil::Minor(const Layout& layout,
                                      int64 physical_dimension_number) {
+  CHECK_EQ(layout.format(), DENSE);
   CHECK_LE(0, physical_dimension_number);
   CHECK_LT(physical_dimension_number, layout.minor_to_major_size());
   return layout.minor_to_major(physical_dimension_number);
@@ -287,6 +371,11 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
 }
 
 /* static */ string LayoutUtil::HumanString(const Layout& layout) {
+  if (IsSparse(layout)) {
+    return tensorflow::strings::StrCat("sparse{", layout.max_sparse_elements(),
+                                       "}");
+  }
+  CHECK(IsDense(layout));
   return tensorflow::strings::StrCat(
       "{", tensorflow::str_util::Join(layout.minor_to_major(), ","), "}");
 }
@@ -356,6 +445,7 @@ tensorflow::Status LayoutUtil::CopyLayoutBetweenShapes(const Shape& src,
 
 /* static */ bool LayoutUtil::AreDimensionsConsecutive(
     const Layout& layout, tensorflow::gtl::ArraySlice<int64> dims) {
+  CHECK(IsDense(layout));
   std::vector<int64> positions_in_layout;
   for (int64 dim : dims) {
     positions_in_layout.push_back(
@@ -370,4 +460,9 @@ tensorflow::Status LayoutUtil::CopyLayoutBetweenShapes(const Shape& src,
   return true;
 }
 
+std::ostream& operator<<(std::ostream& out, const Layout& layout) {
+  out << LayoutUtil::HumanString(layout);
+  return out;
+}
+
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/layout_util.h b/tensorflow/compiler/xla/layout_util.h
index bc42e222292933be35e82d1fe50802e8830d16b3..6c54eb2201b66a4a0c5695bceb14bb2367133935 100644
--- a/tensorflow/compiler/xla/layout_util.h
+++ b/tensorflow/compiler/xla/layout_util.h
@@ -36,6 +36,10 @@ class LayoutUtil {
   // convenience function for protobuf construction.)
   static Layout MakeLayout(tensorflow::gtl::ArraySlice<int64> minor_to_major);
 
+  // Creates a sparse layout with the given maximum number of elements. (This is
+  // a convenience function for protobuf construction.)
+  static Layout MakeSparseLayout(int64 max_sparse_elements);
+
   // Returns default layout for the given shape.
   static Layout GetDefaultLayoutForShape(const Shape& shape);
 
@@ -71,6 +75,12 @@ class LayoutUtil {
   // Clears the layout on all Shapes within the given ProgramShape.
   static void ClearLayout(ProgramShape* program_shape);
 
+  // Returns whether the given Shape is an array and has a dense format layout.
+  static bool IsDenseArray(const Shape& shape);
+
+  // Returns whether the given Layout has a dense format.
+  static bool IsDense(const Layout& layout);
+
   // Returns whether the layout is monotonic and dim 0 is minor in the layout.
   // * R0 and R1: this is always trivially true.
   // * R2+: equivalent to column-major. Dimension 0 is the minor, dimension 1 is
@@ -88,6 +98,30 @@ class LayoutUtil {
   // dimension size).
   static bool IsPadded(const Shape& shape);
 
+  // Returns the padded_dimensions array for the given Shape.  Requires that the
+  // shape is an array and has a dense layout.
+  static tensorflow::gtl::ArraySlice<int64> PaddedDimensions(
+      const Shape& shape);
+
+  // Returns the given index of the padded_dimensions array for the given Shape.
+  // Requires that the shape is an array and has a dense layout.
+  static int64 PaddedDimension(const Shape& shape, int64 index);
+
+  // Returns the padding_value for the given Shape.  Requires that the shape is
+  // an array and has a dense layout.
+  static PaddingValue GetPaddingValue(const Shape& shape);
+
+  // Returns whether the given Shape is an array (i.e. not a tuple) and has a
+  // sparse format layout.
+  static bool IsSparseArray(const Shape& shape);
+
+  // Returns whether the given Layout has a sparse format.
+  static bool IsSparse(const Layout& layout);
+
+  // Returns the maximum number of elements that can be stored in a sparse
+  // layout.
+  static int64 MaxSparseElements(const Layout& layout);
+
   // Returns whether the given shape has a layout. For tuple shapes, true is
   // returned only if all elements have layouts.
   static bool HasLayout(const Shape& shape);
@@ -98,7 +132,12 @@ class LayoutUtil {
   // Returns whether lhs and rhs are identical.
   static bool Equal(const Layout& lhs, const Layout& rhs);
 
-  // Major(0) is the most major logical dimension number, major(1) is the
+  // Returns the minor_to_major array for the given Shape.  Requires that the
+  // shape is an array and has a dense layout.
+  static tensorflow::gtl::ArraySlice<int64> MinorToMajor(const Shape& shape);
+  static tensorflow::gtl::ArraySlice<int64> MinorToMajor(const Layout& layout);
+
+  // Major(0) is the most major logical dimension number, Major(1) is the
   // second-most-major logical dimension number and so on.
   //
   // This can be used to translate physical dimension numbers to logical
@@ -160,6 +199,8 @@ class LayoutUtil {
   TF_DISALLOW_COPY_AND_ASSIGN(LayoutUtil);
 };
 
+std::ostream& operator<<(std::ostream& out, const Layout& layout);
+
 }  // namespace xla
 
 #endif  // TENSORFLOW_COMPILER_XLA_LAYOUT_UTIL_H_
diff --git a/tensorflow/compiler/xla/layout_util_test.cc b/tensorflow/compiler/xla/layout_util_test.cc
index 331bb9afa94e9e7c97d9c880dbac31c60ac0da18..4fd1d818e3e3b417eee9f6b14bb598bfb9480c6e 100644
--- a/tensorflow/compiler/xla/layout_util_test.cc
+++ b/tensorflow/compiler/xla/layout_util_test.cc
@@ -14,6 +14,9 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/compiler/xla/layout_util.h"
+
+#include <sstream>
+
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/test.h"
 #include "tensorflow/compiler/xla/test_helpers.h"
@@ -30,6 +33,14 @@ class LayoutUtilTest : public ::testing::Test {
     *shape.mutable_layout() = LayoutUtil::MakeLayout(minor_to_major);
     return shape;
   }
+
+  Shape MakeShapeWithSparseLayout(PrimitiveType element_type,
+                                  tensorflow::gtl::ArraySlice<int64> dimensions,
+                                  int64 max_sparse_elements) {
+    Shape shape = ShapeUtil::MakeShape(element_type, dimensions);
+    *shape.mutable_layout() = LayoutUtil::MakeSparseLayout(max_sparse_elements);
+    return shape;
+  }
 };
 
 TEST_F(LayoutUtilTest, TupleLayoutComparison) {
@@ -81,6 +92,29 @@ TEST_F(LayoutUtilTest, CopyLayoutArray) {
   EXPECT_FALSE(dst.has_layout());
 }
 
+TEST_F(LayoutUtilTest, CopyLayoutSparse) {
+  Shape src = MakeShapeWithSparseLayout(F32, {2, 3}, 2);
+  Shape dst = MakeShapeWithLayout(F32, {2, 3}, {1, 0});
+
+  EXPECT_FALSE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+  EXPECT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst));
+  EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+
+  // Should work if destination has no layout.
+  dst.clear_layout();
+  EXPECT_FALSE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+  EXPECT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst));
+  EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+
+  // If source is cleared, then destination should be cleared.
+  src.clear_layout();
+  EXPECT_FALSE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+  EXPECT_TRUE(dst.has_layout());
+  EXPECT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst));
+  EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+  EXPECT_FALSE(dst.has_layout());
+}
+
 TEST_F(LayoutUtilTest, CopyLayoutTuple) {
   Shape src = ShapeUtil::MakeTupleShape(
       {MakeShapeWithLayout(F32, {2, 3}, {0, 1}),
@@ -100,6 +134,25 @@ TEST_F(LayoutUtilTest, CopyLayoutTuple) {
   EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
 }
 
+TEST_F(LayoutUtilTest, CopyLayoutTupleSparse) {
+  Shape src = ShapeUtil::MakeTupleShape(
+      {MakeShapeWithSparseLayout(F32, {2, 3}, 4),
+       MakeShapeWithSparseLayout(F32, {42, 123}, 4),
+       ShapeUtil::MakeTupleShape(
+           {MakeShapeWithLayout(F32, {}, {}),
+            MakeShapeWithSparseLayout(F32, {1, 2, 3}, 6)})});
+  Shape dst = ShapeUtil::MakeTupleShape(
+      {MakeShapeWithLayout(F32, {2, 3}, {1, 0}),
+       MakeShapeWithLayout(F32, {42, 123}, {1, 0}),
+       ShapeUtil::MakeTupleShape(
+           {MakeShapeWithLayout(F32, {}, {}),
+            MakeShapeWithLayout(F32, {1, 2, 3}, {1, 2, 0})})});
+
+  EXPECT_FALSE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+  EXPECT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst));
+  EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+}
+
 TEST_F(LayoutUtilTest, CopyLayoutNotCompatibleSameRank) {
   Shape src = MakeShapeWithLayout(F32, {123, 42, 7}, {2, 0, 1});
   Shape dst = MakeShapeWithLayout(F32, {2, 3, 5}, {1, 0});
@@ -107,6 +160,13 @@ TEST_F(LayoutUtilTest, CopyLayoutNotCompatibleSameRank) {
   EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
 }
 
+TEST_F(LayoutUtilTest, CopyLayoutSparseNotCompatibleSameRank) {
+  Shape src = MakeShapeWithSparseLayout(F32, {123, 42, 7}, 6);
+  Shape dst = MakeShapeWithLayout(F32, {2, 3, 5}, {1, 0});
+  ASSERT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst));
+  EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+}
+
 TEST_F(LayoutUtilTest, CopyLayoutNotCompatibleDifferentRank) {
   Shape src = MakeShapeWithLayout(F32, {123, 42, 7}, {2, 0, 1});
   Shape dst = MakeShapeWithLayout(F32, {2, 3}, {1, 0});
@@ -116,6 +176,15 @@ TEST_F(LayoutUtilTest, CopyLayoutNotCompatibleDifferentRank) {
               ::testing::ContainsRegex("cannot copy layout from shape"));
 }
 
+TEST_F(LayoutUtilTest, CopyLayoutSparseNotCompatibleDifferentRank) {
+  Shape src = MakeShapeWithLayout(F32, {123, 42, 7}, {2, 0, 1});
+  Shape dst = MakeShapeWithSparseLayout(F32, {2, 3}, 4);
+  auto status = LayoutUtil::CopyLayoutBetweenShapes(src, &dst);
+  EXPECT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(),
+              ::testing::ContainsRegex("cannot copy layout from shape"));
+}
+
 TEST_F(LayoutUtilTest, CopyLayoutNotCompatibleTuple) {
   Shape src =
       ShapeUtil::MakeTupleShape({MakeShapeWithLayout(F32, {2, 3}, {0, 1}),
@@ -221,5 +290,16 @@ TEST_F(LayoutUtilTest, DefaultLayoutGettersMajorToMinor) {
                             ShapeUtil::MakeShape(F32, {10, 20, 30, 15, 25}))));
 }
 
+TEST_F(LayoutUtilTest, SparseLayoutMaxElements) {
+  EXPECT_EQ(LayoutUtil::MaxSparseElements(LayoutUtil::MakeSparseLayout(101)),
+            101);
+}
+
+TEST_F(LayoutUtilTest, StreamOut) {
+  std::ostringstream oss;
+  oss << LayoutUtil::MakeLayout({0, 1, 2});
+  EXPECT_EQ(oss.str(), "{0,1,2}");
+}
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc b/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc
index bfafef0a40f55e13ac94b2d1750df25146081784..e88bffd0ba2dacb837c568023f5da1338fea40f3 100644
--- a/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc
+++ b/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc
@@ -40,6 +40,10 @@ void SetDebugOptionsDefaults(DebugOptions* flags) {
   flags->set_xla_cpu_multi_thread_eigen(true);
   flags->set_xla_gpu_cuda_data_dir("./cuda_sdk_lib");
   flags->set_xla_eliminate_hlo_implicit_broadcast(true);
+
+  // Set cudnn batchnorm off by default; it does not provide a performance win
+  // on average.
+  flags->set_xla_gpu_use_cudnn_batchnorm(false);
 }
 
 // Allocates flag_values and flag_objects; this function must not be called more
@@ -96,179 +100,184 @@ void AllocateFlags() {
             option_proto, reduce_precision_option_value);
       };
 
-  flag_objects = new std::vector<tensorflow::Flag>(
-      {tensorflow::Flag(
-           "xla_generate_hlo_graph",
-           flag_values->mutable_xla_generate_hlo_graph(),
-           "HLO modules matching this regex will be dumped to a .dot file "
-           "throughout various stages in compilation."),
-       tensorflow::Flag(
-           "xla_hlo_graph_addresses",
-           bool_setter_for(&DebugOptions::set_xla_hlo_graph_addresses),
-           flag_values->xla_hlo_graph_addresses(),
-           "With xla_generate_hlo_graph, show addresses of HLO ops in "
-           "graph dump."),
-       tensorflow::Flag(
-           "xla_hlo_graph_path", flag_values->mutable_xla_hlo_graph_path(),
-           "With xla_generate_hlo_graph, dump the graphs into this path."),
-       tensorflow::Flag(
-           "xla_hlo_dump_as_graphdef",
-           bool_setter_for(&DebugOptions::set_xla_hlo_dump_as_graphdef),
-           flag_values->xla_hlo_dump_as_graphdef(),
-           "Dump HLO graphs as TensorFlow GraphDefs."),
-       tensorflow::Flag(
-           "xla_hlo_graph_sharding_color",
-           bool_setter_for(&DebugOptions::set_xla_hlo_graph_sharding_color),
-           flag_values->xla_hlo_graph_sharding_color(),
-           "Assign colors based on sharding assignments when generating the "
-           "HLO graphs."),
-       tensorflow::Flag(
-           "xla_hlo_tfgraph_device_scopes",
-           bool_setter_for(&DebugOptions::set_xla_hlo_tfgraph_device_scopes),
-           flag_values->xla_hlo_tfgraph_device_scopes(),
-           "When generating TensorFlow HLO graphs, if the HLO instructions "
-           "are assigned to a specific device, prefix the name scope with "
-           "\"devX\" with X being the device ordinal."),
-       tensorflow::Flag(
-           "xla_log_hlo_text", flag_values->mutable_xla_log_hlo_text(),
-           "HLO modules matching this regex will be dumped to LOG(INFO)."),
-       tensorflow::Flag(
-           "xla_generate_hlo_text_to",
-           flag_values->mutable_xla_generate_hlo_text_to(),
-           "Dump all HLO modules as text into the provided directory path."),
-       tensorflow::Flag(
-           "xla_enable_fast_math",
-           bool_setter_for(&DebugOptions::set_xla_enable_fast_math),
-           flag_values->xla_enable_fast_math(),
-           "Enable unsafe fast-math optimizations in the compiler; "
-           "this may produce faster code at the expense of some accuracy."),
-       tensorflow::Flag(
-           "xla_llvm_enable_alias_scope_metadata",
-           bool_setter_for(
-               &DebugOptions::set_xla_llvm_enable_alias_scope_metadata),
-           flag_values->xla_llvm_enable_alias_scope_metadata(),
-           "In LLVM-based backends, enable the emission of "
-           "!alias.scope metadata in the generated IR."),
-       tensorflow::Flag(
-           "xla_llvm_enable_noalias_metadata",
-           bool_setter_for(&DebugOptions::set_xla_llvm_enable_noalias_metadata),
-           flag_values->xla_llvm_enable_noalias_metadata(),
-           "In LLVM-based backends, enable the emission of "
-           "!noalias metadata in the generated IR."),
-       tensorflow::Flag(
-           "xla_llvm_enable_invariant_load_metadata",
-           bool_setter_for(
-               &DebugOptions::set_xla_llvm_enable_invariant_load_metadata),
-           flag_values->xla_llvm_enable_invariant_load_metadata(),
-           "In LLVM-based backends, enable the emission of "
-           "!invariant.load metadata in "
-           "the generated IR."),
-       tensorflow::Flag(
-           "xla_llvm_disable_expensive_passes",
-           bool_setter_for(
-               &DebugOptions::set_xla_llvm_disable_expensive_passes),
-           flag_values->xla_llvm_disable_expensive_passes(),
-           "In LLVM-based backends, disable a custom set of "
-           "expensive optimization passes."),
-       tensorflow::Flag(
-           "xla_backend_optimization_level",
-           int32_setter_for(&DebugOptions::set_xla_backend_optimization_level),
-           flag_values->xla_backend_optimization_level(),
-           "Numerical optimization level for the XLA compiler backend."),
-       tensorflow::Flag(
-           "xla_disable_hlo_passes", setter_for_xla_disable_hlo_passes, "",
-           "Comma-separated list of hlo passes to be disabled. These names "
-           "must exactly match the passes' names; no whitespace around "
-           "commas."),
-       tensorflow::Flag(
-           "xla_embed_ir_in_executable",
-           bool_setter_for(&DebugOptions::set_xla_embed_ir_in_executable),
-           flag_values->xla_embed_ir_in_executable(),
-           "Embed the compiler IR as a string in the executable."),
-       tensorflow::Flag(
-           "xla_dump_ir_to", flag_values->mutable_xla_dump_ir_to(),
-           "Dump the compiler IR into this directory as individual files."),
-       tensorflow::Flag(
-           "xla_eliminate_hlo_implicit_broadcast",
-           bool_setter_for(
-               &DebugOptions::set_xla_eliminate_hlo_implicit_broadcast),
-           flag_values->xla_eliminate_hlo_implicit_broadcast(),
-           "Eliminate implicit broadcasts when lowering user "
-           "computations to HLO instructions; use explicit "
-           "broadcast instead."),
-       tensorflow::Flag(
-           "xla_cpu_multi_thread_eigen",
-           bool_setter_for(&DebugOptions::set_xla_cpu_multi_thread_eigen),
-           flag_values->xla_cpu_multi_thread_eigen(),
-           "When generating calls to Eigen in the CPU backend, "
-           "use multi-threaded Eigen mode."),
-       tensorflow::Flag("xla_gpu_cuda_data_dir",
-                        flag_values->mutable_xla_gpu_cuda_data_dir(),
-                        "If non-empty, speficies a local directory containing "
-                        "ptxas and nvvm libdevice files; otherwise we use "
-                        "those from runfile directories."),
-       tensorflow::Flag("xla_gpu_ftz",
-                        bool_setter_for(&DebugOptions::set_xla_gpu_ftz),
-                        flag_values->xla_gpu_ftz(),
-                        "If true, flush-to-zero semantics are enabled in the "
-                        "code generated for GPUs."),
-       tensorflow::Flag(
-           "xla_gpu_disable_multi_streaming",
-           bool_setter_for(&DebugOptions::set_xla_gpu_disable_multi_streaming),
-           flag_values->xla_gpu_disable_multi_streaming(),
-           "If true, multi-streaming in the GPU backend is disabled."),
-       tensorflow::Flag(
-           "xla_dump_hlo_proto_to",
-           flag_values->mutable_xla_dump_hlo_proto_to(),
-           "Dump compilation artifacts as proto binary into this directory."),
-       tensorflow::Flag(
-           "xla_test_all_output_layouts",
-           bool_setter_for(&DebugOptions::set_xla_test_all_output_layouts),
-           flag_values->xla_test_all_output_layouts(),
-           "Let ClientLibraryTestBase::ComputeAndCompare* test "
-           "all permutations of output layouts. For example, with "
-           "a 3D shape, all permutations of the set {0, 1, 2} are "
-           "tried."),
-       tensorflow::Flag(
-           "xla_test_all_input_layouts",
-           bool_setter_for(&DebugOptions::set_xla_test_all_input_layouts),
-           flag_values->xla_test_all_input_layouts(),
-           "Let ClientLibraryTestBase::ComputeAndCompare* test "
-           "all permutations of *input* layouts. For example, for "
-           "2 input arguments with 2D shape and 4D shape, the "
-           "computation will run 2! * 4! times for every possible "
-           "layouts"),
-       tensorflow::Flag(
-           "xla_hlo_profile",
-           bool_setter_for(&DebugOptions::set_xla_hlo_profile),
-           flag_values->xla_hlo_profile(),
-           "Instrument the computation to collect per-HLO cycle counts"),
-       tensorflow::Flag("xla_dump_computations_to",
-                        flag_values->mutable_xla_dump_computations_to(),
-                        "Dump computations that XLA executes into the provided "
-                        "directory path"),
-       tensorflow::Flag("xla_dump_executions_to",
-                        flag_values->mutable_xla_dump_executions_to(),
-                        "Dump parameters and results of computations that XLA "
-                        "executes into the provided directory path"),
-       tensorflow::Flag("xla_backend_extra_options",
-                        setter_for_xla_backend_extra_options, "",
-                        "Extra options to pass to a backend; "
-                        "comma-separated list of 'key=val' strings (=val "
-                        "may be omitted); no whitespace around commas."),
-       tensorflow::Flag("xla_reduce_precision", setter_for_xla_reduce_precision,
-                        "",
-                        "Directions for adding reduce-precision operations. "
-                        "Format is 'LOCATION=E,M:OPS;NAMES' where LOCATION is "
-                        "the class of locations in which to insert the "
-                        "operations (e.g., 'OP_OUTPUTS'), E and M are the "
-                        "exponent and matissa bit counts respectively, and "
-                        "OPS and NAMES are comma-separated (no spaces) lists "
-                        "of the operation types and names to which to attach "
-                        "the reduce-precision operations.  The NAMES string "
-                        "and its preceding ';' may be omitted.  This option "
-                        "may be repeated to define multiple sets of added "
-                        "reduce-precision operations.")});
+  flag_objects = new std::vector<tensorflow::Flag>({
+      tensorflow::Flag(
+          "xla_generate_hlo_graph",
+          flag_values->mutable_xla_generate_hlo_graph(),
+          "HLO modules matching this regex will be dumped to a .dot file "
+          "throughout various stages in compilation."),
+      tensorflow::Flag(
+          "xla_hlo_graph_addresses",
+          bool_setter_for(&DebugOptions::set_xla_hlo_graph_addresses),
+          flag_values->xla_hlo_graph_addresses(),
+          "With xla_generate_hlo_graph, show addresses of HLO ops in "
+          "graph dump."),
+      tensorflow::Flag(
+          "xla_hlo_graph_path", flag_values->mutable_xla_hlo_graph_path(),
+          "With xla_generate_hlo_graph, dump the graphs into this path."),
+      tensorflow::Flag(
+          "xla_hlo_dump_as_graphdef",
+          bool_setter_for(&DebugOptions::set_xla_hlo_dump_as_graphdef),
+          flag_values->xla_hlo_dump_as_graphdef(),
+          "Dump HLO graphs as TensorFlow GraphDefs."),
+      tensorflow::Flag(
+          "xla_hlo_graph_sharding_color",
+          bool_setter_for(&DebugOptions::set_xla_hlo_graph_sharding_color),
+          flag_values->xla_hlo_graph_sharding_color(),
+          "Assign colors based on sharding assignments when generating the "
+          "HLO graphs."),
+      tensorflow::Flag(
+          "xla_hlo_tfgraph_device_scopes",
+          bool_setter_for(&DebugOptions::set_xla_hlo_tfgraph_device_scopes),
+          flag_values->xla_hlo_tfgraph_device_scopes(),
+          "When generating TensorFlow HLO graphs, if the HLO instructions "
+          "are assigned to a specific device, prefix the name scope with "
+          "\"devX\" with X being the device ordinal."),
+      tensorflow::Flag(
+          "xla_log_hlo_text", flag_values->mutable_xla_log_hlo_text(),
+          "HLO modules matching this regex will be dumped to LOG(INFO)."),
+      tensorflow::Flag(
+          "xla_generate_hlo_text_to",
+          flag_values->mutable_xla_generate_hlo_text_to(),
+          "Dump all HLO modules as text into the provided directory path."),
+      tensorflow::Flag(
+          "xla_enable_fast_math",
+          bool_setter_for(&DebugOptions::set_xla_enable_fast_math),
+          flag_values->xla_enable_fast_math(),
+          "Enable unsafe fast-math optimizations in the compiler; "
+          "this may produce faster code at the expense of some accuracy."),
+      tensorflow::Flag(
+          "xla_llvm_enable_alias_scope_metadata",
+          bool_setter_for(
+              &DebugOptions::set_xla_llvm_enable_alias_scope_metadata),
+          flag_values->xla_llvm_enable_alias_scope_metadata(),
+          "In LLVM-based backends, enable the emission of "
+          "!alias.scope metadata in the generated IR."),
+      tensorflow::Flag(
+          "xla_llvm_enable_noalias_metadata",
+          bool_setter_for(&DebugOptions::set_xla_llvm_enable_noalias_metadata),
+          flag_values->xla_llvm_enable_noalias_metadata(),
+          "In LLVM-based backends, enable the emission of "
+          "!noalias metadata in the generated IR."),
+      tensorflow::Flag(
+          "xla_llvm_enable_invariant_load_metadata",
+          bool_setter_for(
+              &DebugOptions::set_xla_llvm_enable_invariant_load_metadata),
+          flag_values->xla_llvm_enable_invariant_load_metadata(),
+          "In LLVM-based backends, enable the emission of "
+          "!invariant.load metadata in "
+          "the generated IR."),
+      tensorflow::Flag(
+          "xla_llvm_disable_expensive_passes",
+          bool_setter_for(&DebugOptions::set_xla_llvm_disable_expensive_passes),
+          flag_values->xla_llvm_disable_expensive_passes(),
+          "In LLVM-based backends, disable a custom set of "
+          "expensive optimization passes."),
+      tensorflow::Flag(
+          "xla_backend_optimization_level",
+          int32_setter_for(&DebugOptions::set_xla_backend_optimization_level),
+          flag_values->xla_backend_optimization_level(),
+          "Numerical optimization level for the XLA compiler backend."),
+      tensorflow::Flag(
+          "xla_disable_hlo_passes", setter_for_xla_disable_hlo_passes, "",
+          "Comma-separated list of hlo passes to be disabled. These names "
+          "must exactly match the passes' names; no whitespace around "
+          "commas."),
+      tensorflow::Flag(
+          "xla_embed_ir_in_executable",
+          bool_setter_for(&DebugOptions::set_xla_embed_ir_in_executable),
+          flag_values->xla_embed_ir_in_executable(),
+          "Embed the compiler IR as a string in the executable."),
+      tensorflow::Flag(
+          "xla_dump_ir_to", flag_values->mutable_xla_dump_ir_to(),
+          "Dump the compiler IR into this directory as individual files."),
+      tensorflow::Flag(
+          "xla_eliminate_hlo_implicit_broadcast",
+          bool_setter_for(
+              &DebugOptions::set_xla_eliminate_hlo_implicit_broadcast),
+          flag_values->xla_eliminate_hlo_implicit_broadcast(),
+          "Eliminate implicit broadcasts when lowering user "
+          "computations to HLO instructions; use explicit "
+          "broadcast instead."),
+      tensorflow::Flag(
+          "xla_cpu_multi_thread_eigen",
+          bool_setter_for(&DebugOptions::set_xla_cpu_multi_thread_eigen),
+          flag_values->xla_cpu_multi_thread_eigen(),
+          "When generating calls to Eigen in the CPU backend, "
+          "use multi-threaded Eigen mode."),
+      tensorflow::Flag("xla_gpu_cuda_data_dir",
+                       flag_values->mutable_xla_gpu_cuda_data_dir(),
+                       "If non-empty, speficies a local directory containing "
+                       "ptxas and nvvm libdevice files; otherwise we use "
+                       "those from runfile directories."),
+      tensorflow::Flag("xla_gpu_ftz",
+                       bool_setter_for(&DebugOptions::set_xla_gpu_ftz),
+                       flag_values->xla_gpu_ftz(),
+                       "If true, flush-to-zero semantics are enabled in the "
+                       "code generated for GPUs."),
+      tensorflow::Flag(
+          "xla_gpu_disable_multi_streaming",
+          bool_setter_for(&DebugOptions::set_xla_gpu_disable_multi_streaming),
+          flag_values->xla_gpu_disable_multi_streaming(),
+          "If true, multi-streaming in the GPU backend is disabled."),
+      tensorflow::Flag(
+          "xla_dump_hlo_proto_to", flag_values->mutable_xla_dump_hlo_proto_to(),
+          "Dump compilation artifacts as proto binary into this directory."),
+      tensorflow::Flag(
+          "xla_test_all_output_layouts",
+          bool_setter_for(&DebugOptions::set_xla_test_all_output_layouts),
+          flag_values->xla_test_all_output_layouts(),
+          "Let ClientLibraryTestBase::ComputeAndCompare* test "
+          "all permutations of output layouts. For example, with "
+          "a 3D shape, all permutations of the set {0, 1, 2} are "
+          "tried."),
+      tensorflow::Flag(
+          "xla_test_all_input_layouts",
+          bool_setter_for(&DebugOptions::set_xla_test_all_input_layouts),
+          flag_values->xla_test_all_input_layouts(),
+          "Let ClientLibraryTestBase::ComputeAndCompare* test "
+          "all permutations of *input* layouts. For example, for "
+          "2 input arguments with 2D shape and 4D shape, the "
+          "computation will run 2! * 4! times for every possible "
+          "layouts"),
+      tensorflow::Flag(
+          "xla_hlo_profile",
+          bool_setter_for(&DebugOptions::set_xla_hlo_profile),
+          flag_values->xla_hlo_profile(),
+          "Instrument the computation to collect per-HLO cycle counts"),
+      tensorflow::Flag("xla_dump_computations_to",
+                       flag_values->mutable_xla_dump_computations_to(),
+                       "Dump computations that XLA executes into the provided "
+                       "directory path"),
+      tensorflow::Flag("xla_dump_executions_to",
+                       flag_values->mutable_xla_dump_executions_to(),
+                       "Dump parameters and results of computations that XLA "
+                       "executes into the provided directory path"),
+      tensorflow::Flag("xla_backend_extra_options",
+                       setter_for_xla_backend_extra_options, "",
+                       "Extra options to pass to a backend; "
+                       "comma-separated list of 'key=val' strings (=val "
+                       "may be omitted); no whitespace around commas."),
+      tensorflow::Flag("xla_reduce_precision", setter_for_xla_reduce_precision,
+                       "",
+                       "Directions for adding reduce-precision operations. "
+                       "Format is 'LOCATION=E,M:OPS;NAMES' where LOCATION is "
+                       "the class of locations in which to insert the "
+                       "operations (e.g., 'OP_OUTPUTS'), E and M are the "
+                       "exponent and matissa bit counts respectively, and "
+                       "OPS and NAMES are comma-separated (no spaces) lists "
+                       "of the operation types and names to which to attach "
+                       "the reduce-precision operations.  The NAMES string "
+                       "and its preceding ';' may be omitted.  This option "
+                       "may be repeated to define multiple sets of added "
+                       "reduce-precision operations."),
+      tensorflow::Flag(
+          "xla_gpu_use_cudnn_batchnorm",
+          bool_setter_for(&DebugOptions::set_xla_gpu_use_cudnn_batchnorm),
+          flag_values->xla_gpu_use_cudnn_batchnorm(),
+          "Allows the GPU backend to implement batchnorm HLOs using cudnn, "
+          "rather than expanding them to a soup of HLOs."),
+  });
   ParseFlagsFromEnv(*flag_objects);
 }
 
diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc
index 93d3cd425f0a868b51677058796e9c40c2d3dff8..7f0201e74ab51f8f9906dd045ae7dfb96158f8e9 100644
--- a/tensorflow/compiler/xla/literal_util.cc
+++ b/tensorflow/compiler/xla/literal_util.cc
@@ -1,4 +1,4 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -27,14 +27,20 @@ limitations under the License.
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/core/lib/core/casts.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
+
+using tensorflow::strings::Printf;
+using tensorflow::strings::StrCat;
+
+namespace xla {
+
 namespace {
-using tensorflow::int64;
 
 constexpr bool kLittleEndian = __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__;
 
@@ -46,9 +52,8 @@ void ConvertEndianShort(char* bytes, int64 size) {
     std::swap(bytes[i], bytes[i + 1]);
   }
 }
-}  // namespace
 
-namespace xla {
+}  // namespace
 
 std::ostream& operator<<(std::ostream& out, const Literal& literal) {
   out << literal.ToString();
@@ -64,12 +69,12 @@ Literal::StrideConfig::StrideConfig(
   if (!dimensions.empty()) {
     // Selects the shape with the largest minor dimension as the one upon
     // which to run the tight stride loop.
-    if (dimensions[source_shape.layout().minor_to_major()[0]] >=
-        dimensions[dest_shape.layout().minor_to_major()[0]]) {
-      minor_dimension = source_shape.layout().minor_to_major()[0];
+    if (dimensions[LayoutUtil::Minor(source_shape.layout(), 0)] >=
+        dimensions[LayoutUtil::Minor(dest_shape.layout(), 0)]) {
+      minor_dimension = LayoutUtil::Minor(source_shape.layout(), 0);
       dest_stride = IndexUtil::GetDimensionStride(dest_shape, minor_dimension);
     } else {
-      minor_dimension = dest_shape.layout().minor_to_major()[0];
+      minor_dimension = LayoutUtil::Minor(dest_shape.layout(), 0);
       source_stride =
           IndexUtil::GetDimensionStride(source_shape, minor_dimension);
     }
@@ -78,52 +83,134 @@ Literal::StrideConfig::StrideConfig(
   }
 }
 
+Literal::Literal(const Shape& shape)
+    : Literal(shape, /*allocate_arrays=*/true) {}
+
+Literal::Literal(const Shape& shape, bool allocate_arrays)
+    : shape_(shape), pieces_(shape), owns_buffers_(true) {
+  CHECK(LayoutUtil::HasLayout(shape));
+  for (auto& pair : pieces_) {
+    const ShapeIndex& index = pair.first;
+    Piece& piece = pair.second;
+
+    piece.set_subshape(&ShapeUtil::GetSubshape(shape_, index));
+    const Shape& subshape = piece.subshape();
+    if (ShapeUtil::IsArray(subshape)) {
+      if (allocate_arrays) {
+        piece.set_buffer(new char[piece.size_bytes()]);
+        if (LayoutUtil::IsSparseArray(subshape)) {
+          piece.set_sparse_indices(new SparseIndexArray(
+              LayoutUtil::MaxSparseElements(subshape.layout()),
+              ShapeUtil::Rank(subshape)));
+        }
+      } else {
+        piece.set_buffer(nullptr);
+      }
+    }
+  }
+}
+
+Literal::~Literal() { DeallocateBuffers(); }
+
+void Literal::DeallocateBuffers() {
+  if (owns_buffers_) {
+    for (auto& pair : pieces_) {
+      Piece& piece = pair.second;
+      if (piece.buffer() != nullptr) {
+        delete[] piece.buffer();
+        delete piece.sparse_indices();
+      }
+    }
+  }
+}
+
+Literal::Literal(Literal&& other) {
+  shape_ = std::move(other.shape_);
+  pieces_ = std::move(other.pieces_);
+  // We need to iterate through the pieces to set the subshape pointer
+  // properly. It must refer to subshapes within shape_.
+  for (auto& pair : pieces_) {
+    const ShapeIndex& index = pair.first;
+    Piece& piece = pair.second;
+    piece.set_subshape(&ShapeUtil::GetSubshape(shape_, index));
+  }
+  owns_buffers_ = other.owns_buffers_;
+
+  other.shape_ = ShapeUtil::MakeNil();
+  other.pieces_ = ShapeTree<Piece>(other.shape_);
+  other.piece({}).set_subshape(&other.shape_);
+}
+
+Literal& Literal::operator=(Literal&& other) {
+  DeallocateBuffers();
+  shape_ = std::move(other.shape_);
+  pieces_ = std::move(other.pieces_);
+  // We need to iterate through the pieces to set the subshape pointer
+  // properly. It must refer to subshapes within shape_.
+  for (auto& pair : pieces_) {
+    const ShapeIndex& index = pair.first;
+    Piece& piece = pair.second;
+    piece.set_subshape(&ShapeUtil::GetSubshape(shape_, index));
+  }
+  owns_buffers_ = other.owns_buffers_;
+
+  other.shape_ = ShapeUtil::MakeNil();
+  other.pieces_ = ShapeTree<Piece>(other.shape_);
+  other.piece({}).set_subshape(&other.shape_);
+  return *this;
+}
+
 std::unique_ptr<Literal> Literal::CreateFromShape(const Shape& shape) {
-  auto literal = MakeUnique<Literal>();
-  *literal->mutable_shape() = shape;
-  if (ShapeUtil::IsTuple(shape)) {
-    int64 num_elements = ShapeUtil::TupleElementCount(shape);
-    literal->tuple_literals_.resize(num_elements);
-    for (int i = 0; i < num_elements; ++i) {
-      std::unique_ptr<Literal> elem =
-          CreateFromShape(ShapeUtil::GetTupleElementShape(shape, i));
-      literal->tuple_literals_[i] = std::move(*elem);
+  auto literal = MakeUnique<Literal>(shape);
+  for (auto& pair : literal->pieces_) {
+    Piece& piece = pair.second;
+    if (ShapeUtil::IsArray(piece.subshape())) {
+      memset(piece.untyped_data(), 0, piece.size_bytes());
     }
-  } else {
-    literal->Reserve(ShapeUtil::ElementsIn(literal->shape()));
   }
   return literal;
 }
 
+const SparseIndexArray* Literal::sparse_indices(
+    const ShapeIndex& shape_index) const {
+  return piece(shape_index).sparse_indices();
+}
+
+SparseIndexArray* Literal::sparse_indices(const ShapeIndex& shape_index) {
+  return piece(shape_index).sparse_indices();
+}
+
 /* static */ std::unique_ptr<Literal> Literal::CreateFromDimensions(
     PrimitiveType primitive_type,
     tensorflow::gtl::ArraySlice<int64> dimensions) {
   return CreateFromShape(ShapeUtil::MakeShape(primitive_type, dimensions));
 }
 
-template <typename T>
-Status Literal::CopyRange(const Literal& src_literal,
-                          tensorflow::gtl::ArraySlice<int64> src_base,
-                          tensorflow::gtl::ArraySlice<int64> dest_base,
-                          tensorflow::gtl::ArraySlice<int64> copy_size) {
-  const Shape& src_shape = src_literal.shape();
-  const Shape& dest_shape = shape();
-  tensorflow::gtl::ArraySlice<T> src_data = src_literal.GetArraySlice<T>();
-  tensorflow::gtl::MutableArraySlice<T> dest_data = GetMutableArraySlice<T>();
-
-  TF_RET_CHECK(ShapeUtil::Rank(src_shape) == src_base.size());
-  TF_RET_CHECK(ShapeUtil::Rank(dest_shape) == dest_base.size());
+template <typename NativeT>
+Status Literal::CopySliceFromInternal(
+    const Literal& src_literal, tensorflow::gtl::ArraySlice<int64> src_base,
+    tensorflow::gtl::ArraySlice<int64> dest_base,
+    tensorflow::gtl::ArraySlice<int64> copy_size) {
+  TF_RET_CHECK(ShapeUtil::Rank(src_literal.shape()) == src_base.size());
+  TF_RET_CHECK(ShapeUtil::Rank(shape()) == dest_base.size());
+
+  auto linear_index = [](const Shape& shape,
+                         tensorflow::gtl::ArraySlice<int64> multi_index) {
+    return IndexUtil::MultidimensionalIndexToLinearIndex(shape, multi_index);
+  };
 
-  if (ShapeUtil::Rank(src_shape) == 0 || ShapeUtil::Rank(dest_shape) == 0) {
+  if (ShapeUtil::Rank(src_literal.shape()) == 0 ||
+      ShapeUtil::Rank(shape()) == 0) {
     // If any of the two shapes are scalars, we can just call the StridedCopy()
     // directly, and we know we will be copying only one value.
     TF_RET_CHECK(copy_size.empty());
-    StridedCopy(dest_data, LinearIndex(dest_base), 0, src_data,
-                src_literal.LinearIndex(src_base), 0, 1);
-  } else if (!ShapeUtil::HasZeroElements(dest_shape) &&
-             !ShapeUtil::HasZeroElements(src_shape)) {
-    // Perform copy if neither src literal nor dest literal has dimensions with
-    // zero element, otherwise it's a no-op.
+    StridedCopy(data<NativeT>(), linear_index(shape(), dest_base), 0,
+                src_literal.data<NativeT>(),
+                linear_index(src_literal.shape(), src_base), 0, 1);
+  } else if (!ShapeUtil::HasZeroElements(shape()) &&
+             !ShapeUtil::HasZeroElements(src_literal.shape())) {
+    // Perform copy if neither src nor dest has dimensions with zero element,
+    // otherwise it's a no-op.
     TF_RET_CHECK(src_base.size() == dest_base.size());
     TF_RET_CHECK(src_base.size() == copy_size.size());
 
@@ -133,7 +220,8 @@ Status Literal::CopyRange(const Literal& src_literal,
     // proper stride size at the matching dimension.
     DimensionVector src_indexes(src_base.size(), 0);
     DimensionVector dest_indexes(dest_base.size(), 0);
-    StrideConfig stride_config(src_shape, dest_shape, copy_size);
+    Literal::StrideConfig stride_config(src_literal.shape(), shape(),
+                                        copy_size);
 
     auto copy_proc = [&](const std::vector<int64>& indexes) {
       // Map from multi-dimensional index, to source index.
@@ -143,89 +231,295 @@ Status Literal::CopyRange(const Literal& src_literal,
       std::transform(indexes.begin(), indexes.end(), dest_base.begin(),
                      dest_indexes.begin(), std::plus<int64>());
 
-      int64 src_index = src_literal.LinearIndex(src_indexes);
-      int64 dest_index = LinearIndex(dest_indexes);
+      int64 src_index = linear_index(src_literal.shape(), src_indexes);
+      int64 dest_index = linear_index(shape(), dest_indexes);
 
-      StridedCopy(dest_data, dest_index, stride_config.dest_stride, src_data,
-                  src_index, stride_config.source_stride,
-                  stride_config.minor_loop_size);
+      StridedCopy(data<NativeT>(), dest_index, stride_config.dest_stride,
+                  src_literal.data<NativeT>(), src_index,
+                  stride_config.source_stride, stride_config.minor_loop_size);
       return true;
     };
 
-    ShapeUtil::ForEachIndex(src_shape, stride_config.base,
+    ShapeUtil::ForEachIndex(src_literal.shape(), stride_config.base,
                             stride_config.dimensions, stride_config.step,
                             copy_proc);
   }
   return Status::OK();
 }
 
-Status Literal::Copy(const Literal& src_literal,
-                     tensorflow::gtl::ArraySlice<int64> src_base,
-                     tensorflow::gtl::ArraySlice<int64> dest_base,
-                     tensorflow::gtl::ArraySlice<int64> copy_size) {
+std::vector<Literal> Literal::DecomposeTuple() {
+  CHECK(ShapeUtil::IsTuple(shape()));
+  std::vector<Literal> elements;
+  for (int i = 0; i < ShapeUtil::TupleElementCount(shape()); ++i) {
+    elements.push_back(Literal(ShapeUtil::GetSubshape(shape(), {i}),
+                               /*allocate_arrays=*/false));
+    Literal& element = elements.back();
+    for (auto& pair : element.pieces_) {
+      const ShapeIndex& index = pair.first;
+      Piece& dest_piece = pair.second;
+      ShapeIndex src_index = {i};
+      for (int64 j : index) {
+        src_index.push_back(j);
+      }
+      Piece& src_piece = piece(src_index);
+
+      // Move the respective buffer and sparse indices over to the element
+      // Literal.
+      dest_piece.set_buffer(src_piece.buffer());
+      src_piece.set_buffer(nullptr);
+      dest_piece.set_sparse_indices(src_piece.sparse_indices());
+      src_piece.set_sparse_indices(nullptr);
+    }
+  }
+  // Set this literal to be nil-shaped.
+  *this = Literal();
+  return elements;
+}
+
+/* static */ Literal Literal::MoveIntoTuple(
+    tensorflow::gtl::MutableArraySlice<Literal> elements) {
+  std::vector<Shape> element_shapes;
+  for (const Literal& element : elements) {
+    element_shapes.push_back(element.shape());
+  }
+  Literal literal(ShapeUtil::MakeTupleShape(element_shapes),
+                  /*allocate_arrays=*/false);
+  for (int i = 0; i < elements.size(); ++i) {
+    TF_CHECK_OK(
+        literal.MoveFrom(std::move(elements[i]), /*dest_shape_index=*/{i}));
+  }
+  return literal;
+}
+
+namespace {
+
+// Copies the elements in 'src' to 'dest'. The shape and layout of the data in
+// the array slices are indicated by dest_shape and src_shape respectively.
+template <typename NativeT>
+void CopyElementsBetween(tensorflow::gtl::MutableArraySlice<NativeT> dest,
+                         tensorflow::gtl::ArraySlice<NativeT> src,
+                         const Shape& dest_shape, const Shape& src_shape) {
+  CHECK(ShapeUtil::Compatible(dest_shape, src_shape));
+  if (ShapeUtil::HasZeroElements(dest_shape)) {
+    return;
+  }
+  std::vector<int64> index(ShapeUtil::Rank(dest_shape));
+  do {
+    dest[IndexUtil::MultidimensionalIndexToLinearIndex(dest_shape, index)] =
+        src[IndexUtil::MultidimensionalIndexToLinearIndex(src_shape, index)];
+  } while (IndexUtil::BumpIndices(dest_shape, &index));
+}
+
+}  // namespace
+
+Status Literal::Piece::CopyFrom(const Literal::Piece& src) {
+  if (ShapeUtil::Equal(subshape(), src.subshape())) {
+    // If the layouts are equal it's faster just to memcpy.
+    memcpy(buffer(), src.buffer(), src.size_bytes());
+  } else {
+    TF_RET_CHECK(ShapeUtil::Compatible(src.subshape(), subshape()));
+    std::vector<int64> origin(ShapeUtil::Rank(subshape()), 0);
+    switch (subshape().element_type()) {
+#define COPY_ELEMENTS(XLA_T, NATIVE_T)                                    \
+  case (XLA_T):                                                           \
+    CopyElementsBetween<NATIVE_T>(data<NATIVE_T>(), src.data<NATIVE_T>(), \
+                                  subshape(), src.subshape());            \
+    break;
+      COPY_ELEMENTS(U8, uint8);
+      COPY_ELEMENTS(U16, uint16);
+      COPY_ELEMENTS(U32, uint32);
+      COPY_ELEMENTS(U64, uint64);
+      COPY_ELEMENTS(S8, int8);
+      COPY_ELEMENTS(S16, int16);
+      COPY_ELEMENTS(S32, int32);
+      COPY_ELEMENTS(S64, int64);
+      COPY_ELEMENTS(F16, half);
+      COPY_ELEMENTS(BF16, bfloat16);
+      COPY_ELEMENTS(F32, float);
+      COPY_ELEMENTS(F64, double);
+      COPY_ELEMENTS(C64, complex64);
+      COPY_ELEMENTS(PRED, bool);
+#undef COPY_ELEMENTS
+      default:
+        return Unimplemented(
+            "Unhandled primitive type %s",
+            PrimitiveType_Name(subshape().element_type()).c_str());
+    }
+  }
+  return Status::OK();
+}
+
+Status Literal::CopyFrom(const Literal& src_literal,
+                         const ShapeIndex& dest_shape_index,
+                         const ShapeIndex& src_shape_index) {
+  const Shape& dest_subshape =
+      ShapeUtil::GetSubshape(shape(), dest_shape_index);
+  const Shape& src_subshape =
+      ShapeUtil::GetSubshape(src_literal.shape(), src_shape_index);
+  if (!ShapeUtil::Compatible(dest_subshape, src_subshape)) {
+    return InvalidArgument(
+        "Destination subshape incompatible with source subshape: %s vs %s",
+        ShapeUtil::HumanString(dest_subshape).c_str(),
+        ShapeUtil::HumanString(src_subshape).c_str());
+  }
+
+  for (auto& pair : pieces_) {
+    const ShapeIndex& index = pair.first;
+    Piece& piece = pair.second;
+    if (!ShapeUtil::IsArray(piece.subshape())) {
+      continue;
+    }
+
+    // Determine if this index is in the part of this literal that we want to
+    // copy over from src_literal.
+    bool in_subtree_to_copy = true;
+    for (int i = 0; i < dest_shape_index.size(); ++i) {
+      if (index[i] != dest_shape_index[i]) {
+        in_subtree_to_copy = false;
+        break;
+      }
+    }
+    if (!in_subtree_to_copy) {
+      continue;
+    }
+
+    // Construct the index of the corresponding piece in the source literal.
+    ShapeIndex src_piece_index = src_shape_index;
+    for (int64 i = dest_shape_index.size(); i < index.size(); ++i) {
+      src_piece_index.push_back(index[i]);
+    }
+
+    TF_RETURN_IF_ERROR(piece.CopyFrom(src_literal.piece(src_piece_index)));
+  }
+  return Status::OK();
+}
+
+Status Literal::MoveFrom(Literal&& src_literal,
+                         const ShapeIndex& dest_shape_index) {
+  const Shape& dest_subshape =
+      ShapeUtil::GetSubshape(shape(), dest_shape_index);
+  if (!ShapeUtil::Equal(dest_subshape, src_literal.shape())) {
+    return InvalidArgument(
+        "Destination subshape not equal to source shape: %s vs %s",
+        ShapeUtil::HumanString(dest_subshape).c_str(),
+        ShapeUtil::HumanString(src_literal.shape()).c_str());
+  }
+
+  if (!(owns_buffers_ && src_literal.owns_buffers_)) {
+    return InvalidArgument(
+        "Source and destination literals must both own their buffers (ie, not "
+        "be views)");
+  }
+
+  for (auto& pair : src_literal.pieces_) {
+    const ShapeIndex& src_index = pair.first;
+    Piece& src_piece = pair.second;
+    if (!ShapeUtil::IsArray(src_piece.subshape())) {
+      continue;
+    }
+
+    ShapeIndex dest_index = dest_shape_index;
+    for (int64 i : src_index) {
+      dest_index.push_back(i);
+    }
+    Piece& dest_piece = piece(dest_index);
+    delete[] dest_piece.buffer();
+    dest_piece.set_buffer(src_piece.buffer());
+    delete dest_piece.sparse_indices();
+    dest_piece.set_sparse_indices(src_piece.sparse_indices());
+  }
+
+  src_literal.shape_ = ShapeUtil::MakeNil();
+  src_literal.pieces_ = ShapeTree<Piece>(src_literal.shape_);
+  src_literal.piece({}).set_subshape(&src_literal.shape_);
+  return Status::OK();
+}
+
+Status Literal::CopySliceFrom(const Literal& src_literal,
+                              tensorflow::gtl::ArraySlice<int64> src_base,
+                              tensorflow::gtl::ArraySlice<int64> dest_base,
+                              tensorflow::gtl::ArraySlice<int64> copy_size) {
+  TF_RET_CHECK(ShapeUtil::IsArray(shape())) << ShapeUtil::HumanString(shape());
+  TF_RET_CHECK(ShapeUtil::IsArray(src_literal.shape()))
+      << ShapeUtil::HumanString(src_literal.shape());
   TF_RET_CHECK(ShapeUtil::SameElementType(src_literal.shape(), shape()));
-  switch (src_literal.shape().element_type()) {
+
+  switch (shape().element_type()) {
     case U8:
-      return CopyRange<uint8>(src_literal, src_base, dest_base, copy_size);
+      return CopySliceFromInternal<uint8>(src_literal, src_base, dest_base,
+                                          copy_size);
     case U16:
-      return CopyRange<uint16>(src_literal, src_base, dest_base, copy_size);
+      return CopySliceFromInternal<uint16>(src_literal, src_base, dest_base,
+                                           copy_size);
     case U32:
-      return CopyRange<uint32>(src_literal, src_base, dest_base, copy_size);
+      return CopySliceFromInternal<uint32>(src_literal, src_base, dest_base,
+                                           copy_size);
     case U64:
-      return CopyRange<uint64>(src_literal, src_base, dest_base, copy_size);
+      return CopySliceFromInternal<uint64>(src_literal, src_base, dest_base,
+                                           copy_size);
     case S8:
-      return CopyRange<int8>(src_literal, src_base, dest_base, copy_size);
+      return CopySliceFromInternal<int8>(src_literal, src_base, dest_base,
+                                         copy_size);
     case S16:
-      return CopyRange<int16>(src_literal, src_base, dest_base, copy_size);
+      return CopySliceFromInternal<int16>(src_literal, src_base, dest_base,
+                                          copy_size);
     case S32:
-      return CopyRange<int32>(src_literal, src_base, dest_base, copy_size);
+      return CopySliceFromInternal<int32>(src_literal, src_base, dest_base,
+                                          copy_size);
     case S64:
-      return CopyRange<int64>(src_literal, src_base, dest_base, copy_size);
+      return CopySliceFromInternal<int64>(src_literal, src_base, dest_base,
+                                          copy_size);
     case F16:
-      return CopyRange<half>(src_literal, src_base, dest_base, copy_size);
+      return CopySliceFromInternal<half>(src_literal, src_base, dest_base,
+                                         copy_size);
     case BF16:
-      return CopyRange<bfloat16>(src_literal, src_base, dest_base, copy_size);
+      return CopySliceFromInternal<bfloat16>(src_literal, src_base, dest_base,
+                                             copy_size);
     case F32:
-      return CopyRange<float>(src_literal, src_base, dest_base, copy_size);
+      return CopySliceFromInternal<float>(src_literal, src_base, dest_base,
+                                          copy_size);
     case F64:
-      return CopyRange<double>(src_literal, src_base, dest_base, copy_size);
+      return CopySliceFromInternal<double>(src_literal, src_base, dest_base,
+                                           copy_size);
     case C64:
-      return CopyRange<complex64>(src_literal, src_base, dest_base, copy_size);
+      return CopySliceFromInternal<complex64>(src_literal, src_base, dest_base,
+                                              copy_size);
     case PRED:
-      return CopyRange<bool>(src_literal, src_base, dest_base, copy_size);
+      return CopySliceFromInternal<bool>(src_literal, src_base, dest_base,
+                                         copy_size);
     default:
       break;
   }
-  return Unimplemented("Unhandled primitive type %d",
-                       src_literal.shape().element_type());
+  return Unimplemented("Unhandled primitive type %d", shape().element_type());
 }
 
 /* static */ Literal Literal::Zero(PrimitiveType primitive_type) {
   switch (primitive_type) {
     case U8:
-      return *Literal::CreateR0<uint8>(0);
+      return std::move(*Literal::CreateR0<uint8>(0));
     case U32:
-      return *Literal::CreateR0<uint32>(0);
+      return std::move(*Literal::CreateR0<uint32>(0));
     case U64:
-      return *Literal::CreateR0<uint64>(0);
+      return std::move(*Literal::CreateR0<uint64>(0));
     case S8:
-      return *Literal::CreateR0<int8>(0);
+      return std::move(*Literal::CreateR0<int8>(0));
     case S32:
-      return *Literal::CreateR0<int32>(0);
+      return std::move(*Literal::CreateR0<int32>(0));
     case S64:
-      return *Literal::CreateR0<int64>(0);
+      return std::move(*Literal::CreateR0<int64>(0));
     case F16:
-      return *Literal::CreateR0<half>(static_cast<half>(0.0f));
+      return std::move(*Literal::CreateR0<half>(static_cast<half>(0.0f)));
     case BF16:
-      return *Literal::CreateR0<bfloat16>(static_cast<bfloat16>(0.0f));
+      return std::move(
+          *Literal::CreateR0<bfloat16>(static_cast<bfloat16>(0.0f)));
     case F32:
-      return *Literal::CreateR0<float>(0);
+      return std::move(*Literal::CreateR0<float>(0));
     case F64:
-      return *Literal::CreateR0<double>(0);
+      return std::move(*Literal::CreateR0<double>(0));
     case C64:
-      return *Literal::CreateR0<complex64>(0);
+      return std::move(*Literal::CreateR0<complex64>(0));
     case PRED:
-      return *Literal::CreateR0<bool>(false);
+      return std::move(*Literal::CreateR0<bool>(false));
     case S16:
     case U16:
       LOG(FATAL) << "u16/s16 literals not yet implemented";
@@ -241,30 +535,33 @@ Status Literal::Copy(const Literal& src_literal,
 /* static */ Literal Literal::One(PrimitiveType primitive_type) {
   switch (primitive_type) {
     case U8:
-      return *Literal::CreateR0<uint8>(1);
+      return std::move(*Literal::CreateR0<uint8>(1));
     case U32:
-      return *Literal::CreateR0<uint32>(1);
+      return std::move(*Literal::CreateR0<uint32>(1));
     case U64:
-      return *Literal::CreateR0<uint64>(1);
+      return std::move(*Literal::CreateR0<uint64>(1));
     case S8:
-      return *Literal::CreateR0<int8>(1);
+      return std::move(*Literal::CreateR0<int8>(1));
     case S32:
-      return *Literal::CreateR0<int32>(1);
+      return std::move(*Literal::CreateR0<int32>(1));
     case S64:
-      return *Literal::CreateR0<int64>(1);
+      return std::move(*Literal::CreateR0<int64>(1));
+    case F16:
+      return std::move(*Literal::CreateR0<half>(static_cast<half>(1.0f)));
+    case BF16:
+      return std::move(
+          *Literal::CreateR0<bfloat16>(static_cast<bfloat16>(1.0f)));
     case F32:
-      return *Literal::CreateR0<float>(1);
+      return std::move(*Literal::CreateR0<float>(1));
     case F64:
-      return *Literal::CreateR0<double>(1);
+      return std::move(*Literal::CreateR0<double>(1));
     case C64:
-      return *Literal::CreateR0<complex64>(1);
+      return std::move(*Literal::CreateR0<complex64>(1));
     case PRED:
-      return *Literal::CreateR0<bool>(true);
+      return std::move(*Literal::CreateR0<bool>(true));
     case S16:
     case U16:
       LOG(FATAL) << "u16/s16 literals not yet implemented";
-    case F16:
-      return *Literal::CreateR0<half>(static_cast<half>(1.0f));
     case TUPLE:
       LOG(FATAL) << "tuple element type cannot take on value of 1";
     case OPAQUE:
@@ -277,35 +574,42 @@ Status Literal::Copy(const Literal& src_literal,
 /* static */ Literal Literal::MinValue(PrimitiveType primitive_type) {
   switch (primitive_type) {
     case U8:
-      return *Literal::CreateR0<uint8>(std::numeric_limits<uint8>::min());
+      return std::move(
+          *Literal::CreateR0<uint8>(std::numeric_limits<uint8>::min()));
     case U32:
-      return *Literal::CreateR0<uint32>(std::numeric_limits<uint32>::min());
+      return std::move(
+          *Literal::CreateR0<uint32>(std::numeric_limits<uint32>::min()));
     case U64:
-      return *Literal::CreateR0<uint64>(std::numeric_limits<uint64>::min());
+      return std::move(
+          *Literal::CreateR0<uint64>(std::numeric_limits<uint64>::min()));
     case S8:
-      return *Literal::CreateR0<int8>(std::numeric_limits<int8>::min());
+      return std::move(
+          *Literal::CreateR0<int8>(std::numeric_limits<int8>::min()));
     case S32:
-      return *Literal::CreateR0<int32>(std::numeric_limits<int32>::min());
+      return std::move(
+          *Literal::CreateR0<int32>(std::numeric_limits<int32>::min()));
     case S64:
-      return *Literal::CreateR0<int64>(std::numeric_limits<int64>::min());
+      return std::move(
+          *Literal::CreateR0<int64>(std::numeric_limits<int64>::min()));
     case F32:
-      return *Literal::CreateR0<float>(-std::numeric_limits<float>::infinity());
+      return std::move(
+          *Literal::CreateR0<float>(-std::numeric_limits<float>::infinity()));
     case F64:
-      return *Literal::CreateR0<double>(
-          -std::numeric_limits<double>::infinity());
+      return std::move(
+          *Literal::CreateR0<double>(-std::numeric_limits<double>::infinity()));
     case C64:
       LOG(FATAL) << "C64 element type has no minimum value";
     case PRED:
-      return *Literal::CreateR0<bool>(false);
+      return std::move(*Literal::CreateR0<bool>(false));
     case S16:
     case U16:
       LOG(FATAL) << "u16/s16 literals not yet implemented";
     case F16:
-      return *Literal::CreateR0<half>(
-          static_cast<half>(-std::numeric_limits<float>::infinity()));
+      return std::move(*Literal::CreateR0<half>(
+          static_cast<half>(-std::numeric_limits<float>::infinity())));
     case BF16:
-      return *Literal::CreateR0<bfloat16>(
-          static_cast<bfloat16>(-std::numeric_limits<float>::infinity()));
+      return std::move(*Literal::CreateR0<bfloat16>(
+          static_cast<bfloat16>(-std::numeric_limits<float>::infinity())));
     case TUPLE:
       LOG(FATAL) << "tuple element type has no minimum value";
     case OPAQUE:
@@ -318,33 +622,40 @@ Status Literal::Copy(const Literal& src_literal,
 /* static */ Literal Literal::MaxValue(PrimitiveType primitive_type) {
   switch (primitive_type) {
     case U8:
-      return *Literal::CreateR0<uint8>(std::numeric_limits<uint8>::max());
+      return std::move(
+          *Literal::CreateR0<uint8>(std::numeric_limits<uint8>::max()));
     case U32:
-      return *Literal::CreateR0<uint32>(std::numeric_limits<uint32>::max());
+      return std::move(
+          *Literal::CreateR0<uint32>(std::numeric_limits<uint32>::max()));
     case U64:
-      return *Literal::CreateR0<uint64>(std::numeric_limits<uint64>::max());
+      return std::move(
+          *Literal::CreateR0<uint64>(std::numeric_limits<uint64>::max()));
     case S8:
-      return *Literal::CreateR0<int8>(std::numeric_limits<int8>::max());
+      return std::move(
+          *Literal::CreateR0<int8>(std::numeric_limits<int8>::max()));
     case S32:
-      return *Literal::CreateR0<int32>(std::numeric_limits<int32>::max());
+      return std::move(
+          *Literal::CreateR0<int32>(std::numeric_limits<int32>::max()));
     case S64:
-      return *Literal::CreateR0<int64>(std::numeric_limits<int64>::max());
+      return std::move(
+          *Literal::CreateR0<int64>(std::numeric_limits<int64>::max()));
     case F32:
-      return *Literal::CreateR0<float>(std::numeric_limits<float>::infinity());
+      return std::move(
+          *Literal::CreateR0<float>(std::numeric_limits<float>::infinity()));
     case F64:
-      return *Literal::CreateR0<double>(
-          std::numeric_limits<double>::infinity());
+      return std::move(
+          *Literal::CreateR0<double>(std::numeric_limits<double>::infinity()));
     case PRED:
-      return *Literal::CreateR0<bool>(true);
+      return std::move(*Literal::CreateR0<bool>(true));
     case S16:
     case U16:
       LOG(FATAL) << "u16/s16 literals not yet implemented";
     case F16:
-      return *Literal::CreateR0<half>(
-          static_cast<half>(std::numeric_limits<float>::infinity()));
+      return std::move(*Literal::CreateR0<half>(
+          static_cast<half>(std::numeric_limits<float>::infinity())));
     case BF16:
-      return *Literal::CreateR0<bfloat16>(
-          static_cast<bfloat16>(std::numeric_limits<float>::infinity()));
+      return std::move(*Literal::CreateR0<bfloat16>(
+          static_cast<bfloat16>(std::numeric_limits<float>::infinity())));
     case TUPLE:
       LOG(FATAL) << "tuple element type has no maximum value";
     case OPAQUE:
@@ -356,17 +667,29 @@ Status Literal::Copy(const Literal& src_literal,
 
 /* static */ std::unique_ptr<Literal> Literal::CreateR1(
     const tensorflow::core::Bitmap& values) {
-  auto literal = MakeUnique<Literal>();
+  auto literal = MakeUnique<Literal>(
+      ShapeUtil::MakeShape(PRED, {static_cast<int64>(values.bits())}));
   literal->PopulateR1(values);
   return literal;
 }
 
+void Literal::PopulateR1(const tensorflow::core::Bitmap& values) {
+  CHECK(ShapeUtil::IsArray(shape()));
+  CHECK_EQ(ShapeUtil::Rank(shape()), 1);
+  CHECK_EQ(element_count(), values.bits());
+  CHECK_EQ(shape().element_type(), PRED);
+  for (int64 i = 0; i < static_cast<int64>(values.bits()); ++i) {
+    Set({i}, values.get(i));
+  }
+}
+
 /* static */ std::unique_ptr<Literal> Literal::CreateR1U8(
     tensorflow::StringPiece value) {
-  auto literal = MakeUnique<Literal>();
-  *literal->mutable_shape() =
-      ShapeUtil::MakeShape(U8, {static_cast<int64>(value.size())});
-  literal->set_u8s(tensorflow::StringPiece(value.ToString()));
+  auto literal = MakeUnique<Literal>(
+      ShapeUtil::MakeShape(U8, {static_cast<int64>(value.size())}));
+  for (int i = 0; i < value.size(); ++i) {
+    literal->Set<uint8>({i}, value[i]);
+  }
   return literal;
 }
 
@@ -380,46 +703,50 @@ Status Literal::Copy(const Literal& src_literal,
 
 std::unique_ptr<Literal> Literal::Relayout(
     const Layout& new_layout, const ShapeIndex& shape_index) const {
-  std::unique_ptr<Literal> outer_result = CloneToUnique();
-
-  const Literal* copy_from = this;
-  Literal* copy_to = outer_result.get();
-  for (int64 i = 0; i < shape_index.size(); i++) {
-    *ShapeUtil::GetMutableSubshape(copy_to->mutable_shape(), {shape_index, i})
-         ->mutable_layout() = new_layout;
-    copy_from = &copy_from->tuple_literals_[shape_index[i]];
-    copy_to = &copy_to->tuple_literals_[shape_index[i]];
-  }
-
-  DimensionVector base(ShapeUtil::Rank(copy_from->shape()), 0);
-  DimensionVector copy_size(copy_from->shape().dimensions().begin(),
-                            copy_from->shape().dimensions().end());
+  // Create new shape with 'new_layout' set at the given shape index.
+  Shape new_shape = shape();
+  Shape* subshape = ShapeUtil::GetMutableSubshape(&new_shape, shape_index);
+  TF_CHECK_OK(LayoutUtil::ValidateLayoutForShape(new_layout, *subshape));
+  *subshape->mutable_layout() = new_layout;
+  auto result = MakeUnique<Literal>(new_shape);
+  TF_CHECK_OK(result->CopyFrom(*this));
+  return result;
+}
 
-  CHECK(ShapeUtil::IsArray(copy_from->shape()));
-  CHECK(ShapeUtil::IsArray(copy_to->shape()));
-  *copy_to->mutable_shape()->mutable_layout() = new_layout;
-  TF_CHECK_OK(copy_to->Copy(*copy_from, base, base, copy_size));
-  return outer_result;
+std::unique_ptr<Literal> Literal::Relayout(
+    const Shape& shape_with_layout) const {
+  CHECK(ShapeUtil::Compatible(shape_with_layout, shape()))
+      << "Given shape_with_layout " << ShapeUtil::HumanString(shape_with_layout)
+      << " not compatible with literal shape "
+      << ShapeUtil::HumanString(shape());
+  std::unique_ptr<Literal> result = CreateFromShape(shape_with_layout);
+  ShapeUtil::ForEachSubshape(
+      result->shape(),
+      [this, &result](const Shape& subshape, const ShapeIndex& index) {
+        if (ShapeUtil::IsArray(subshape)) {
+          TF_CHECK_OK(result->CopyFrom(*this,
+                                       /*dest_shape_index=*/index,
+                                       /*src_shape_index=*/index));
+        }
+      });
+  return result;
 }
 
 StatusOr<std::unique_ptr<Literal>> Literal::Reshape(
     tensorflow::gtl::ArraySlice<int64> dimensions) const {
-  if (ShapeUtil::IsTuple(shape())) {
+  if (!ShapeUtil::IsArray(shape())) {
     return InvalidArgument("Reshape does not support tuples.");
   }
   std::unique_ptr<Literal> output;
   if (!LayoutUtil::IsMonotonicWithDim0Major(shape().layout())) {
-    std::vector<int64> minor_to_major(ShapeUtil::Rank(shape()));
-    std::iota(minor_to_major.rbegin(), minor_to_major.rend(),
-              static_cast<int64>(0));
-    output = Relayout(LayoutUtil::MakeLayout(minor_to_major));
+    output =
+        Relayout(LayoutUtil::GetDefaultLayoutForRank(ShapeUtil::Rank(shape())));
   } else {
     output = CloneToUnique();
   }
   // Because the layout is monotonic, we can simply reuse the same sequence of
   // values without changing their order.
-  *output->mutable_shape() =
-      ShapeUtil::MakeShape(shape().element_type(), dimensions);
+  output->shape_ = ShapeUtil::MakeShape(shape().element_type(), dimensions);
 
   int64 elements_before = ShapeUtil::ElementsIn(shape());
   int64 elements_after = ShapeUtil::ElementsIn(output->shape());
@@ -435,7 +762,7 @@ StatusOr<std::unique_ptr<Literal>> Literal::Reshape(
 
 std::unique_ptr<Literal> Literal::Transpose(
     tensorflow::gtl::ArraySlice<int64> permutation) const {
-  CHECK(!ShapeUtil::IsTuple(shape())) << "Tuple is not supported for transpose";
+  CHECK(ShapeUtil::IsArray(shape())) << "Tuple is not supported for transpose";
   CHECK(IsPermutation(permutation, ShapeUtil::Rank(shape())))
       << "Given permutation is not a permutation of dimension numbers";
   // To transpose the array, we just permute the dimensions and layout, and
@@ -458,23 +785,24 @@ std::unique_ptr<Literal> Literal::Transpose(
   // dimension has within the transposed array, a layout is affine if
   // MinMaj(Di) == TMinMaj(T(Di)), with TMinMaj() being the minor to major
   // vector of the affine layout.
+  CHECK(LayoutUtil::IsDenseArray(permuted_shape));
   Layout* layout = permuted_shape.mutable_layout();
   layout->clear_minor_to_major();
-  for (auto index : shape().layout().minor_to_major()) {
+  for (auto index : LayoutUtil::MinorToMajor(shape())) {
     layout->add_minor_to_major(inverse_permutation[index]);
   }
   std::unique_ptr<Literal> new_literal = CreateFromShape(permuted_shape);
   DCHECK_GE(ShapeUtil::ByteSizeOf(new_literal->shape()),
             ShapeUtil::ByteSizeOf(shape()));
-  std::memcpy(new_literal->MutableInternalData(), InternalData(),
-              ShapeUtil::ByteSizeOf(shape()));
+  std::memcpy(new_literal->root_piece().buffer(), root_piece().buffer(),
+              root_piece().size_bytes());
   return new_literal;
 }
 
 std::unique_ptr<Literal> Literal::Slice(
     tensorflow::gtl::ArraySlice<int64> start_indices,
     tensorflow::gtl::ArraySlice<int64> limit_indices) const {
-  CHECK(!ShapeUtil::IsTuple(shape())) << "tuple is not supported for reshape";
+  CHECK(ShapeUtil::IsArray(shape())) << "tuple is not supported for slice";
 
   DimensionVector result_dimensions;
   for (int64 dnum = 0; dnum < ShapeUtil::Rank(shape()); ++dnum) {
@@ -484,13 +812,11 @@ std::unique_ptr<Literal> Literal::Slice(
     CHECK_GT(dimension, 0);
     result_dimensions.push_back(dimension);
   }
-  const auto result_shape = ShapeUtil::MakeShapeWithLayout(
-      shape().element_type(), result_dimensions,
-      AsInt64Slice(shape().layout().minor_to_major()));
+  const auto result_shape =
+      ShapeUtil::MakeShapeWithLayout(shape().element_type(), result_dimensions,
+                                     LayoutUtil::MinorToMajor(shape()));
 
-  auto result_literal = MakeUnique<Literal>();
-  *result_literal->mutable_shape() = result_shape;
-  result_literal->Reserve(ShapeUtil::ElementsIn(result_shape));
+  auto result_literal = MakeUnique<Literal>(result_shape);
 
   DimensionVector new_indices(ShapeUtil::Rank(result_shape));
   switch (result_shape.element_type()) {
@@ -530,48 +856,116 @@ std::unique_ptr<Literal> Literal::Slice(
   }
 }
 
+Literal Literal::Clone() const {
+  Literal result(shape());
+  TF_CHECK_OK(result.CopyFrom(*this));
+  return result;
+}
+
 std::unique_ptr<Literal> Literal::CloneToUnique() const {
-  auto unique = MakeUnique<Literal>();
-  *unique = *this;
-  return unique;
+  auto result = MakeUnique<Literal>(shape());
+  TF_CHECK_OK(result->CopyFrom(*this));
+  return result;
 }
 
-string Literal::GetAsString(
-    tensorflow::gtl::ArraySlice<int64> multi_index) const {
-  switch (shape().element_type()) {
+string Literal::GetAsString(tensorflow::gtl::ArraySlice<int64> multi_index,
+                            const ShapeIndex& shape_index) const {
+  const Shape& subshape = ShapeUtil::GetSubshape(shape(), shape_index);
+  CHECK(LayoutUtil::IsDenseArray(subshape));
+  switch (subshape.element_type()) {
     case PRED:
-      return Get<bool>(multi_index) ? "true" : "false";
-    case U8:
-      return tensorflow::strings::StrCat(Get<uint8>(multi_index));
+      return Get<bool>(multi_index, shape_index) ? "true" : "false";
+    case S8:
+      return StrCat(Get<int8>(multi_index, shape_index));
+    case S16:
+      return StrCat(Get<int16>(multi_index, shape_index));
     case S32:
-      return tensorflow::strings::StrCat(Get<int32>(multi_index));
+      return StrCat(Get<int32>(multi_index, shape_index));
     case S64:
-      return tensorflow::strings::StrCat(Get<int64>(multi_index));
+      return StrCat(Get<int64>(multi_index, shape_index));
+    case U8:
+      return StrCat(Get<uint8>(multi_index, shape_index));
+    case U16:
+      return StrCat(Get<uint16>(multi_index, shape_index));
     case U32:
-      return tensorflow::strings::StrCat(Get<uint32>(multi_index));
+      return StrCat(Get<uint32>(multi_index, shape_index));
     case U64:
-      return tensorflow::strings::StrCat(Get<uint64>(multi_index));
+      return StrCat(Get<uint64>(multi_index, shape_index));
+    case F16:
+      return StrCat(Get<half>(multi_index, shape_index));
     case F32:
-      return tensorflow::strings::StrCat(Get<float>(multi_index));
+      return StrCat(Get<float>(multi_index, shape_index));
+    case BF16:
+      return StrCat(
+          static_cast<float>(Get<bfloat16>(multi_index, shape_index)));
     case F64:
-      return tensorflow::strings::StrCat(Get<double>(multi_index));
+      return StrCat(Get<double>(multi_index, shape_index));
     case C64: {
-      complex64 c = Get<complex64>(multi_index);
-      return tensorflow::strings::StrCat("(", c.real(), ", ", c.imag(), ")");
+      complex64 c = Get<complex64>(multi_index, shape_index);
+      return StrCat("(", c.real(), ", ", c.imag(), ")");
     }
+    default:
+      LOG(FATAL) << PrimitiveType_Name(subshape.element_type());
+  }
+}
+
+string Literal::GetSparseElementAsString(int64 sparse_element_number,
+                                         const ShapeIndex& shape_index) const {
+  const Shape& subshape = ShapeUtil::GetSubshape(shape(), shape_index);
+  CHECK(LayoutUtil::IsSparseArray(subshape));
+  switch (subshape.element_type()) {
+    case PRED:
+      return GetSparseElement<bool>(sparse_element_number, shape_index)
+                 ? "true"
+                 : "false";
+    case S8:
+      return StrCat(GetSparseElement<int8>(sparse_element_number, shape_index));
+    case S16:
+      return StrCat(
+          GetSparseElement<int16>(sparse_element_number, shape_index));
+    case S32:
+      return StrCat(
+          GetSparseElement<int32>(sparse_element_number, shape_index));
+    case S64:
+      return StrCat(
+          GetSparseElement<int64>(sparse_element_number, shape_index));
+    case U8:
+      return StrCat(
+          GetSparseElement<uint8>(sparse_element_number, shape_index));
+    case U16:
+      return StrCat(
+          GetSparseElement<uint16>(sparse_element_number, shape_index));
+    case U32:
+      return StrCat(
+          GetSparseElement<uint32>(sparse_element_number, shape_index));
+    case U64:
+      return StrCat(
+          GetSparseElement<uint64>(sparse_element_number, shape_index));
     case F16:
-      return tensorflow::strings::StrCat(Get<half>(multi_index));
+      return StrCat(GetSparseElement<half>(sparse_element_number, shape_index));
+    case F32:
+      return StrCat(
+          GetSparseElement<float>(sparse_element_number, shape_index));
     case BF16:
-      return tensorflow::strings::StrCat(
-          static_cast<float>(Get<bfloat16>(multi_index)));
+      return StrCat(static_cast<float>(
+          GetSparseElement<bfloat16>(sparse_element_number, shape_index)));
+    case F64:
+      return StrCat(
+          GetSparseElement<double>(sparse_element_number, shape_index));
+    case C64: {
+      complex64 c =
+          GetSparseElement<complex64>(sparse_element_number, shape_index);
+      return StrCat("(", c.real(), ", ", c.imag(), ")");
+    }
     default:
-      return tensorflow::strings::StrCat(
-          "[", PrimitiveType_Name(shape().element_type()), "]");
+      LOG(FATAL) << "Invalid element type for sparse arrays: "
+                 << PrimitiveType_Name(subshape.element_type());
   }
 }
 
 StatusOr<int64> Literal::GetIntegralAsS64(
     tensorflow::gtl::ArraySlice<int64> multi_index) const {
+  CHECK(LayoutUtil::IsDenseArray(shape()));
   switch (shape().element_type()) {
     case PRED:
       return Get<bool>(multi_index);
@@ -592,13 +986,83 @@ StatusOr<int64> Literal::GetIntegralAsS64(
   }
 }
 
-int64 Literal::LinearIndex(
-    tensorflow::gtl::ArraySlice<int64> multi_index) const {
-  return IndexUtil::MultidimensionalIndexToLinearIndex(shape(), multi_index);
+tensorflow::gtl::ArraySlice<int64> Literal::GetSparseIndex(
+    int64 sparse_element_number, const ShapeIndex& shape_index) const {
+  const Piece& p = piece(shape_index);
+  CHECK_GE(sparse_element_number, 0);
+  CHECK_LT(sparse_element_number, p.sparse_indices()->index_count());
+  return p.sparse_indices()->At(sparse_element_number);
 }
 
-string Literal::ToString(bool print_layout) const {
-  std::vector<string> pieces;
+void Literal::SortSparseElements(const ShapeIndex& shape_index) {
+  piece(shape_index).SortSparseElements();
+}
+
+void Literal::Piece::SortSparseElements() {
+  switch (subshape().element_type()) {
+    case PRED:
+      SortSparseElementsInternal<bool>();
+      break;
+    case S8:
+      SortSparseElementsInternal<int8>();
+      break;
+    case U8:
+      SortSparseElementsInternal<uint8>();
+      break;
+    case S16:
+      SortSparseElementsInternal<int16>();
+      break;
+    case U16:
+      SortSparseElementsInternal<uint16>();
+      break;
+    case S32:
+      SortSparseElementsInternal<int32>();
+      break;
+    case U32:
+      SortSparseElementsInternal<uint32>();
+      break;
+    case S64:
+      SortSparseElementsInternal<int64>();
+      break;
+    case U64:
+      SortSparseElementsInternal<uint64>();
+      break;
+    case F32:
+      SortSparseElementsInternal<float>();
+      break;
+    case F64:
+      SortSparseElementsInternal<double>();
+      break;
+    case C64:
+      SortSparseElementsInternal<complex64>();
+      break;
+    case F16:
+      SortSparseElementsInternal<half>();
+      break;
+    case BF16:
+      SortSparseElementsInternal<bfloat16>();
+      break;
+    default:
+      LOG(FATAL) << "Element type not valid for sparse array: "
+                 << PrimitiveType_Name(subshape().element_type());
+  }
+}
+
+template <typename NativeT>
+void Literal::Piece::SortSparseElementsInternal() {
+  CHECK(LayoutUtil::IsSparseArray(subshape()));
+  int64 num_elements = sparse_indices()->index_count();
+  auto values = data<NativeT>();
+  CHECK_LE(num_elements, values.size());
+  sparse_indices()->SortWithValues(
+      tensorflow::gtl::MutableArraySlice<NativeT>(values.data(), num_elements));
+}
+
+namespace {
+
+void ToStringHelper(const Literal& literal, const ShapeIndex& shape_index,
+                    bool print_layout, std::vector<string>* pieces) {
+  const Shape& subshape = ShapeUtil::GetSubshape(literal.shape(), shape_index);
 
   auto shape_to_string = [print_layout](const Shape& shape) {
     if (print_layout) {
@@ -608,272 +1072,186 @@ string Literal::ToString(bool print_layout) const {
     }
   };
 
+  // TODO(b/32894291): refactor this code to reduce code duplication.
+  if (ShapeUtil::IsTuple(subshape)) {
+    pieces->push_back(shape_to_string(subshape));
+    pieces->push_back(" (\n");
+    std::vector<string> tuple_pieces;
+    for (int i = 0; i < ShapeUtil::TupleElementCount(subshape); ++i) {
+      ShapeIndex element_index = shape_index;
+      element_index.push_back(i);
+      std::vector<string> element_pieces;
+      ToStringHelper(literal, element_index, print_layout, &element_pieces);
+      tuple_pieces.push_back(tensorflow::str_util::Join(element_pieces, ""));
+    }
+    pieces->push_back(tensorflow::str_util::Join(tuple_pieces, ",\n"));
+    pieces->push_back("\n)");
+    return;
+  }
+
+  if (LayoutUtil::IsSparseArray(subshape)) {
+    pieces->push_back(shape_to_string(subshape));
+    pieces->push_back("{");
+    int64 rank = ShapeUtil::Rank(subshape);
+    int64 num_elements = literal.sparse_element_count();
+    for (int64 i = 0; i < num_elements; ++i) {
+      if (i > 0) {
+        pieces->push_back(", ");
+      }
+      if (rank == 1) {
+        pieces->push_back(StrCat(literal.GetSparseIndex(i)[0]));
+        pieces->push_back(": ");
+      } else {
+        pieces->push_back("[");
+        pieces->push_back(
+            tensorflow::str_util::Join(literal.GetSparseIndex(i), ", "));
+        pieces->push_back("]: ");
+      }
+      pieces->push_back(literal.GetSparseElementAsString(i));
+    }
+    pieces->push_back("}");
+    return;
+  }
+
+  CHECK(LayoutUtil::IsDenseArray(subshape));
+
   auto element_to_string =
-      [this](tensorflow::gtl::ArraySlice<int64> indices) -> string {
-    PrimitiveType element_type = shape().element_type();
+      [&](tensorflow::gtl::ArraySlice<int64> indices) -> string {
+    PrimitiveType element_type = subshape.element_type();
     if (element_type == PRED) {
       // We display predicates in a densely packed form.
-      return Get<bool>(indices) ? "1" : "0";
+      return literal.Get<bool>(indices, shape_index) ? "1" : "0";
     }
     return ((!indices.empty() && indices.back() > 0) ? ", " : "") +
-           GetAsString(indices);
+           literal.GetAsString(indices, shape_index);
   };
 
-  // TODO(b/32894291): refactor this code to reduce code duplication.
-  if (ShapeUtil::IsTuple(shape())) {
-    pieces.push_back(shape_to_string(shape()));
-    pieces.push_back(" (\n");
-    pieces.push_back(tensorflow::str_util::Join(
-        tuple_literals(), ",\n", [](string* out, const Literal& element) {
-          tensorflow::strings::StrAppend(out, element.ToString());
-        }));
-    pieces.push_back("\n)");
-  } else if (ShapeUtil::Rank(shape()) == 0) {
-    pieces.push_back(GetAsString({}));
-  } else if (ShapeUtil::Rank(shape()) == 1) {
-    pieces.push_back("{");
-    for (int64 i0 = 0; i0 < shape().dimensions(0); ++i0) {
-      pieces.push_back(element_to_string({i0}));
+  if (ShapeUtil::Rank(subshape) == 0) {
+    pieces->push_back(literal.GetAsString({}, shape_index));
+  } else if (ShapeUtil::Rank(subshape) == 1) {
+    pieces->push_back("{");
+    for (int64 i0 = 0; i0 < subshape.dimensions(0); ++i0) {
+      pieces->push_back(element_to_string({i0}));
     }
-    pieces.push_back("}");
-  } else if (ShapeUtil::Rank(shape()) == 2) {
-    pieces.push_back(shape_to_string(shape()));
-    pieces.push_back(" {\n");
-    for (int64 i0 = 0; i0 < shape().dimensions(0); ++i0) {
-      pieces.push_back("  { ");
-      for (int64 i1 = 0; i1 < shape().dimensions(1); ++i1) {
-        pieces.push_back(element_to_string({i0, i1}));
+    pieces->push_back("}");
+  } else if (ShapeUtil::Rank(subshape) == 2) {
+    pieces->push_back(shape_to_string(subshape));
+    pieces->push_back(" {\n");
+    for (int64 i0 = 0; i0 < subshape.dimensions(0); ++i0) {
+      pieces->push_back("  { ");
+      for (int64 i1 = 0; i1 < subshape.dimensions(1); ++i1) {
+        pieces->push_back(element_to_string({i0, i1}));
       }
-      pieces.push_back(" ");
-      pieces.push_back(i0 == shape().dimensions(0) - 1 ? "}\n" : "},\n");
+      pieces->push_back(" ");
+      pieces->push_back(i0 == subshape.dimensions(0) - 1 ? "}\n" : "},\n");
     }
-    pieces.push_back("}");
-  } else if (ShapeUtil::Rank(shape()) == 3) {
-    pieces.push_back(shape_to_string(shape()));
-    pieces.push_back(" {\n");
-    for (int64 i0 = 0; i0 < shape().dimensions(0); ++i0) {
-      pieces.push_back(i0 > 0 ? ",\n{" : "{");
-      for (int64 i1 = 0; i1 < shape().dimensions(1); ++i1) {
-        pieces.push_back(i1 > 0 ? ",\n  { " : " { ");
-        for (int64 i2 = 0; i2 < shape().dimensions(2); ++i2) {
-          pieces.push_back(element_to_string({i0, i1, i2}));
+    pieces->push_back("}");
+  } else if (ShapeUtil::Rank(subshape) == 3) {
+    pieces->push_back(shape_to_string(subshape));
+    pieces->push_back(" {\n");
+    for (int64 i0 = 0; i0 < subshape.dimensions(0); ++i0) {
+      pieces->push_back(i0 > 0 ? ",\n{" : "{");
+      for (int64 i1 = 0; i1 < subshape.dimensions(1); ++i1) {
+        pieces->push_back(i1 > 0 ? ",\n  { " : " { ");
+        for (int64 i2 = 0; i2 < subshape.dimensions(2); ++i2) {
+          pieces->push_back(element_to_string({i0, i1, i2}));
         }
-        pieces.push_back(" }");
+        pieces->push_back(" }");
       }
-      pieces.push_back(" }");
+      pieces->push_back(" }");
     }
-    pieces.push_back("\n}");
-  } else if (ShapeUtil::Rank(shape()) == 4) {
-    pieces.push_back(shape_to_string(shape()));
-    pieces.push_back(" {\n");
-    for (int64 i0 = 0; i0 < shape().dimensions(0); ++i0) {
-      pieces.push_back(tensorflow::strings::Printf("  {  /*i0=%lld*/\n", i0));
-      for (int64 i1 = 0; i1 < shape().dimensions(1); ++i1) {
-        pieces.push_back(
-            tensorflow::strings::Printf("    {  /*i1=%lld*/\n", i1));
-        for (int64 i2 = 0; i2 < shape().dimensions(2); ++i2) {
-          pieces.push_back("      {");
-          for (int64 i3 = 0; i3 < shape().dimensions(3); ++i3) {
-            pieces.push_back(element_to_string({i0, i1, i2, i3}));
+    pieces->push_back("\n}");
+  } else if (ShapeUtil::Rank(subshape) == 4) {
+    pieces->push_back(shape_to_string(subshape));
+    pieces->push_back(" {\n");
+    for (int64 i0 = 0; i0 < subshape.dimensions(0); ++i0) {
+      pieces->push_back(Printf("  {  /*i0=%lld*/\n", i0));
+      for (int64 i1 = 0; i1 < subshape.dimensions(1); ++i1) {
+        pieces->push_back(Printf("    {  /*i1=%lld*/\n", i1));
+        for (int64 i2 = 0; i2 < subshape.dimensions(2); ++i2) {
+          pieces->push_back("      {");
+          for (int64 i3 = 0; i3 < subshape.dimensions(3); ++i3) {
+            pieces->push_back(element_to_string({i0, i1, i2, i3}));
           }
-          pieces.push_back(i2 == shape().dimensions(2) - 1 ? "}\n" : "},\n");
+          pieces->push_back(i2 == subshape.dimensions(2) - 1 ? "}\n" : "},\n");
         }
-        pieces.push_back(i1 == shape().dimensions(1) - 1 ? "    }\n"
-                                                         : "    },\n");
+        pieces->push_back(i1 == subshape.dimensions(1) - 1 ? "    }\n"
+                                                           : "    },\n");
       }
-      pieces.push_back(i0 == shape().dimensions(0) - 1 ? "  }\n" : "  },\n");
+      pieces->push_back(i0 == subshape.dimensions(0) - 1 ? "  }\n" : "  },\n");
     }
-    pieces.push_back("}");
-  } else if (ShapeUtil::Rank(shape()) == 5) {
-    pieces.push_back(shape_to_string(shape()));
-    pieces.push_back(" {\n");
-    for (int64 i0 = 0; i0 < shape().dimensions(0); ++i0) {
-      pieces.push_back(tensorflow::strings::Printf("  {  /*i0=%lld*/\n", i0));
-      for (int64 i1 = 0; i1 < shape().dimensions(1); ++i1) {
-        pieces.push_back(
-            tensorflow::strings::Printf("    {  /*i1=%lld*/\n", i1));
-        for (int64 i2 = 0; i2 < shape().dimensions(2); ++i2) {
-          pieces.push_back(
-              tensorflow::strings::Printf("      {  /*i2=%lld*/\n", i2));
-          for (int64 i3 = 0; i3 < shape().dimensions(3); ++i3) {
-            pieces.push_back("        {");
-            for (int64 i4 = 0; i4 < shape().dimensions(4); ++i4) {
-              pieces.push_back(element_to_string({i0, i1, i2, i3, i4}));
+    pieces->push_back("}");
+  } else if (ShapeUtil::Rank(subshape) == 5) {
+    pieces->push_back(shape_to_string(subshape));
+    pieces->push_back(" {\n");
+    for (int64 i0 = 0; i0 < subshape.dimensions(0); ++i0) {
+      pieces->push_back(Printf("  {  /*i0=%lld*/\n", i0));
+      for (int64 i1 = 0; i1 < subshape.dimensions(1); ++i1) {
+        pieces->push_back(Printf("    {  /*i1=%lld*/\n", i1));
+        for (int64 i2 = 0; i2 < subshape.dimensions(2); ++i2) {
+          pieces->push_back(Printf("      {  /*i2=%lld*/\n", i2));
+          for (int64 i3 = 0; i3 < subshape.dimensions(3); ++i3) {
+            pieces->push_back("        {");
+            for (int64 i4 = 0; i4 < subshape.dimensions(4); ++i4) {
+              pieces->push_back(element_to_string({i0, i1, i2, i3, i4}));
             }
-            pieces.push_back(i3 == shape().dimensions(3) - 1 ? "}\n" : "},\n");
+            pieces->push_back(i3 == subshape.dimensions(3) - 1 ? "}\n"
+                                                               : "},\n");
           }
-          pieces.push_back(i2 == shape().dimensions(2) - 1 ? "      }\n"
-                                                           : "      },\n");
+          pieces->push_back(i2 == subshape.dimensions(2) - 1 ? "      }\n"
+                                                             : "      },\n");
         }
-        pieces.push_back(i1 == shape().dimensions(1) - 1 ? "    }\n"
-                                                         : "    },\n");
+        pieces->push_back(i1 == subshape.dimensions(1) - 1 ? "    }\n"
+                                                           : "    },\n");
       }
-      pieces.push_back(i0 == shape().dimensions(0) - 1 ? "  }\n" : "  },\n");
+      pieces->push_back(i0 == subshape.dimensions(0) - 1 ? "  }\n" : "  },\n");
     }
-    pieces.push_back("}");
+    pieces->push_back("}");
   } else {
-    pieces.push_back(shape_to_string(shape()));
-    pieces.push_back(" {...}");
+    pieces->push_back(shape_to_string(subshape));
+    pieces->push_back(" {");
+    literal.EachCellAsString(
+        [&](tensorflow::gtl::ArraySlice<int64> indices, const string& value) {
+          pieces->push_back(" ");
+          pieces->push_back(value);
+        });
+    pieces->push_back("}");
   }
+}
+
+}  // namespace
+
+int64 Literal::sparse_element_count() const {
+  CHECK(LayoutUtil::IsSparseArray(shape()));
+  return sparse_indices()->index_count();
+}
 
+string Literal::ToString(bool print_layout) const {
+  std::vector<string> pieces;
+  ToStringHelper(*this, {}, print_layout, &pieces);
   return tensorflow::str_util::Join(pieces, "");
 }
 
 /* static */ std::unique_ptr<Literal> Literal::MakeTuple(
     tensorflow::gtl::ArraySlice<const Literal*> elements) {
-  auto literal = MakeUnique<Literal>();
-  std::vector<Shape> shape;
-  for (const Literal* tuple_element : elements) {
-    *literal->add_tuple_literals() = *tuple_element;
-    shape.push_back(tuple_element->shape());
+  std::vector<Shape> element_shapes;
+  for (const Literal* element : elements) {
+    element_shapes.push_back(element->shape());
+  }
+  auto literal = MakeUnique<Literal>(ShapeUtil::MakeTupleShape(element_shapes));
+  for (int i = 0; i < elements.size(); ++i) {
+    TF_CHECK_OK(literal->CopyFrom(*elements[i], /*dest_shape_index=*/{i}));
   }
-  *literal->mutable_shape() = ShapeUtil::MakeTupleShape(shape);
   return literal;
 }
 
 /* static */ std::unique_ptr<Literal> Literal::MakeTupleOwned(
     std::vector<std::unique_ptr<Literal>> elements) {
-  auto literal = MakeUnique<Literal>();
-  std::vector<Shape> shape;
-  for (auto& tuple_element : elements) {
-    shape.push_back(tuple_element->shape());
-    *literal->add_tuple_literals() = std::move(*tuple_element);
-  }
-  *literal->mutable_shape() = ShapeUtil::MakeTupleShape(shape);
-  return literal;
-}
-
-const void* Literal::InternalData() const {
-  return const_cast<const void*>(
-      const_cast<Literal*>(this)->MutableInternalData());
-}
-
-void* Literal::MutableInternalData() {
-  // NOTE: We access the vectors directly to avoid the const reference
-  // created by the accessor functions.
-  switch (shape().element_type()) {
-    case PRED:
-    case U8:
-      return reinterpret_cast<void*>(u8s_.data());
-    case S32:
-      return reinterpret_cast<void*>(s32s_.data());
-    case S64:
-      return reinterpret_cast<void*>(s64s_.data());
-    case U32:
-      return reinterpret_cast<void*>(u32s_.data());
-    case U64:
-      return reinterpret_cast<void*>(u64s_.data());
-    case F32:
-      return reinterpret_cast<void*>(f32s_.data());
-    case F64:
-      return reinterpret_cast<void*>(f64s_.data());
-    case C64:
-      return reinterpret_cast<void*>(c64s_.data());
-    case F16:
-      return reinterpret_cast<void*>(f16s_.data());
-    case BF16:
-      return reinterpret_cast<void*>(bf16s_.data());
-    default:
-      LOG(FATAL) << "primitive type not supported in literals: "
-                 << PrimitiveType_Name(shape().element_type());
-  }
-}
-
-void Literal::Reserve(int64 num_elements) {
-  CHECK_EQ(ShapeUtil::ElementsIn(shape()), num_elements);
-  switch (shape().element_type()) {
-    case PRED:
-      Resize<bool>(num_elements, false);
-      break;
-    case S8:
-      Resize<int8>(num_elements, 0);
-      break;
-    case U8:
-      Resize<uint8>(num_elements, 0);
-      break;
-    case S32:
-      Resize<int32>(num_elements, 0);
-      break;
-    case S64:
-      Resize<int64>(num_elements, 0);
-      break;
-    case U32:
-      Resize<uint32>(num_elements, 0);
-      break;
-    case U64:
-      Resize<uint64>(num_elements, 0);
-      break;
-    case F32:
-      Resize<float>(num_elements, 0);
-      break;
-    case F64:
-      Resize<double>(num_elements, 0);
-      break;
-    case C64:
-      Resize<complex64>(num_elements, 0);
-      break;
-    case F16:
-      Resize<half>(num_elements, static_cast<half>(0.0f));
-      break;
-    case BF16:
-      Resize<bfloat16>(num_elements, static_cast<bfloat16>(0.0f));
-      break;
-    default:
-      LOG(FATAL) << "primitive type not supported in literals: "
-                 << PrimitiveType_Name(shape().element_type());
-  }
-}
-
-tensorflow::Status Literal::ValidateLiteral() const {
-  TF_CHECK_OK(ShapeUtil::ValidateShape(shape()));
-  int64 expected = ShapeUtil::ElementsIn(shape());
-  int64 actual = -1;
-  switch (shape().element_type()) {
-    case PRED:
-    case U8:
-      actual = u8s_size();
-      break;
-    case S32:
-      actual = s32s_size();
-      break;
-    case U32:
-      actual = u32s_size();
-      break;
-    case S64:
-      actual = s64s_size();
-      break;
-    case U64:
-      actual = u64s_size();
-      break;
-    case F32:
-      actual = f32s_size();
-      break;
-    case F64:
-      actual = f64s_size();
-      break;
-    case C64:
-      actual = c64s_size();
-      break;
-    case F16:
-      actual = f16s().size() / sizeof(half);
-      break;
-    case BF16:
-      actual = bf16s().size();
-      break;
-    default:
-      return tensorflow::errors::Unimplemented(
-          "unhandled element type for literal validation: " +
-          PrimitiveType_Name(shape().element_type()));
-  }
-
-  if (expected != actual) {
-    return tensorflow::errors::InvalidArgument(tensorflow::strings::Printf(
-        "literal has bad number of elements for its shape %s: want %lld "
-        "got %lld",
-        ShapeUtil::HumanString(shape()).c_str(), expected, actual));
+  std::vector<const Literal*> element_ptrs;
+  for (const auto& element : elements) {
+    element_ptrs.push_back(element.get());
   }
-
-  return tensorflow::Status::OK();
+  return MakeTuple(element_ptrs);
 }
 
 void Literal::EachCellAsString(
@@ -892,17 +1270,13 @@ void Literal::EachCellAsString(
 namespace {
 template <typename NativeSrcT, typename NativeDestT>
 std::unique_ptr<Literal> ConvertBetweenNativeTypes(const Literal& src_literal) {
-  auto result_literal = MakeUnique<Literal>();
-  Shape* result_shape = result_literal->mutable_shape();
-  *result_shape = src_literal.shape();
-  result_shape->set_element_type(
-      primitive_util::NativeToPrimitiveType<NativeDestT>());
-  result_literal->Reserve(ShapeUtil::ElementsIn(*result_shape));
-  tensorflow::gtl::ArraySlice<NativeSrcT> src_data =
-      src_literal.GetArraySlice<NativeSrcT>();
-  tensorflow::gtl::MutableArraySlice<NativeDestT> dest_data =
-      result_literal->GetMutableArraySlice<NativeDestT>();
-  int64 num_elements = ShapeUtil::ElementsIn(src_literal.shape());
+  CHECK(ShapeUtil::IsArray(src_literal.shape()));
+  auto result_literal = MakeUnique<Literal>(ShapeUtil::ChangeElementType(
+      src_literal.shape(),
+      primitive_util::NativeToPrimitiveType<NativeDestT>()));
+  auto src_data = src_literal.data<NativeSrcT>();
+  auto dest_data = result_literal->template data<NativeDestT>();
+  int64 num_elements = src_literal.element_count();
 
   for (int64 i = 0; i < num_elements; ++i) {
     dest_data[i] = static_cast<NativeDestT>(src_data[i]);
@@ -912,18 +1286,16 @@ std::unique_ptr<Literal> ConvertBetweenNativeTypes(const Literal& src_literal) {
 
 template <PrimitiveType primitive_src_type>
 std::unique_ptr<Literal> ConvertToC64(const Literal& src_literal) {
-  auto result_literal = MakeUnique<Literal>();
-  Shape* result_shape = result_literal->mutable_shape();
-  *result_shape = src_literal.shape();
-  result_shape->set_element_type(C64);
-  result_literal->Reserve(ShapeUtil::ElementsIn(*result_shape));
+  CHECK(ShapeUtil::IsArray(src_literal.shape()));
+  auto result_literal = MakeUnique<Literal>(
+      ShapeUtil::ChangeElementType(src_literal.shape(), C64));
   using NativeSrcT =
       typename primitive_util::PrimitiveTypeToNative<primitive_src_type>::type;
   tensorflow::gtl::ArraySlice<NativeSrcT> src_data =
-      src_literal.GetArraySlice<NativeSrcT>();
+      src_literal.data<NativeSrcT>();
   tensorflow::gtl::MutableArraySlice<complex64> dest_data =
-      result_literal->GetMutableArraySlice<complex64>();
-  int64 num_elements = ShapeUtil::ElementsIn(src_literal.shape());
+      result_literal->data<complex64>();
+  int64 num_elements = src_literal.element_count();
   for (int64 i = 0; i < num_elements; ++i) {
     dest_data[i] = complex64(static_cast<float>(src_data[i]), 0);
   }
@@ -968,10 +1340,12 @@ StatusOr<std::unique_ptr<Literal>> ConvertIfDestTypeMatches(
           PrimitiveType_Name(primitive_dest_type).c_str());
   }
 }
+
 }  // namespace
 
 StatusOr<std::unique_ptr<Literal>> Literal::Convert(
     PrimitiveType primitive_dest_type) const {
+  TF_RET_CHECK(ShapeUtil::IsArray(shape()));
   switch (shape().element_type()) {
 #define CONVERT_IF_DEST_TYPE_MATCHES(type) \
   case (type):                             \
@@ -996,356 +1370,192 @@ StatusOr<std::unique_ptr<Literal>> Literal::Convert(
   }
 }
 
-namespace {
-
-// Helper function which compares whether the elements of literal1 are equal to
-// the elements of literal2. Recursively iterates through the entire
-// multidimensional index space and compares the literal elements
-// one-by-one. literal1 and literal2 must be compatible (same dimensions and
-// type).
 template <typename NativeT>
-bool EqualElements(const Literal& literal1, const Literal& literal2,
-                   int dimension, std::vector<int64>* multi_index) {
-  if (dimension == ShapeUtil::Rank(literal1.shape())) {
-    return (literal1.Get<NativeT>(*multi_index) ==
-            literal2.Get<NativeT>(*multi_index));
-  }
-  for (int64 i = 0; i < literal1.shape().dimensions(dimension); ++i) {
-    (*multi_index)[dimension] = i;
-    if (!EqualElements<NativeT>(literal1, literal2, dimension + 1,
-                                multi_index)) {
+bool Literal::Piece::EqualElementsInternal(
+    const Literal::Piece& other, std::vector<int64>* multi_index) const {
+  if (multi_index->size() == ShapeUtil::Rank(subshape())) {
+    return (Get<NativeT>(*multi_index) == other.Get<NativeT>(*multi_index));
+  }
+  for (int64 i = 0; i < subshape().dimensions(multi_index->size()); ++i) {
+    multi_index->push_back(i);
+    if (!EqualElementsInternal<NativeT>(other, multi_index)) {
       return false;
     }
+    multi_index->pop_back();
   }
   return true;
 }
 
-}  // namespace
+bool Literal::Piece::EqualElements(const Literal::Piece& other) const {
+  DCHECK(ShapeUtil::Compatible(subshape(), other.subshape()));
+
+  std::vector<int64> multi_index;
+  switch (subshape().element_type()) {
+    case PRED:
+      return EqualElementsInternal<bool>(other, &multi_index);
+    case U8:
+      return EqualElementsInternal<uint8>(other, &multi_index);
+    case S32:
+      return EqualElementsInternal<int32>(other, &multi_index);
+    case S64:
+      return EqualElementsInternal<int64>(other, &multi_index);
+    case U32:
+      return EqualElementsInternal<uint32>(other, &multi_index);
+    case U64:
+      return EqualElementsInternal<uint64>(other, &multi_index);
+    case F32:
+      return EqualElementsInternal<float>(other, &multi_index);
+    case F64:
+      return EqualElementsInternal<double>(other, &multi_index);
+    case F16:
+      return EqualElementsInternal<half>(other, &multi_index);
+    case BF16:
+      return EqualElementsInternal<bfloat16>(other, &multi_index);
+    case C64:
+      return EqualElementsInternal<complex64>(other, &multi_index);
+    default:
+      LOG(FATAL) << "Unimplemented: Literal::Piece::EqualElements for type "
+                 << PrimitiveType_Name(subshape().element_type());
+  }
+}
 
 bool Literal::operator==(const Literal& other) const {
   if (!ShapeUtil::Compatible(shape(), other.shape())) {
     return false;
   }
-  if (ShapeUtil::IsTuple(shape())) {
-    // Because the shapes are compatible, they must have the same number of
-    // tuple elements.
-    CHECK_EQ(tuple_literals_size(), other.tuple_literals_size());
-    for (int i = 0; i < tuple_literals_size(); ++i) {
-      if (tuple_literals(i) != other.tuple_literals(i)) {
-        return false;
-      }
+  for (const auto& pair : pieces_) {
+    const ShapeIndex& index = pair.first;
+    const Piece& piece = pair.second;
+    if (!ShapeUtil::IsArray(piece.subshape())) {
+      continue;
     }
-    return true;
-  } else {
-    std::vector<int64> multi_index(ShapeUtil::Rank(shape()), 0);
-    switch (shape().element_type()) {
-      case PRED:
-        return EqualElements<bool>(*this, other, 0, &multi_index);
-      case U8:
-        return EqualElements<uint8>(*this, other, 0, &multi_index);
-      case S32:
-        return EqualElements<int32>(*this, other, 0, &multi_index);
-      case S64:
-        return EqualElements<int64>(*this, other, 0, &multi_index);
-      case U32:
-        return EqualElements<uint32>(*this, other, 0, &multi_index);
-      case U64:
-        return EqualElements<uint64>(*this, other, 0, &multi_index);
-      case F32:
-        return EqualElements<float>(*this, other, 0, &multi_index);
-      case F64:
-        return EqualElements<double>(*this, other, 0, &multi_index);
-      case F16:
-        return EqualElements<half>(*this, other, 0, &multi_index);
-      case BF16:
-        return EqualElements<bfloat16>(*this, other, 0, &multi_index);
-      case C64:
-        return EqualElements<complex64>(*this, other, 0, &multi_index);
-      default:
-        LOG(FATAL) << "Unimplemented: Literal::Equal for type "
-                   << PrimitiveType_Name(shape().element_type());
+
+    const Piece& other_piece = other.piece(index);
+    if (!piece.EqualElements(other_piece)) {
+      return false;
     }
   }
+  return true;
 }
 
-template <>
-tensorflow::gtl::MutableArraySlice<bool> Literal::GetMutableArraySlice() {
-  auto values = mutable_preds();
-  return tensorflow::gtl::MutableArraySlice<bool>(
-      reinterpret_cast<bool*>(values->data()), values->size());
-}
-
-template <>
-tensorflow::gtl::MutableArraySlice<int8> Literal::GetMutableArraySlice() {
-  auto values = mutable_u8s();
-  return tensorflow::gtl::MutableArraySlice<int8>(
-      reinterpret_cast<int8*>(values->data()), values->size());
-}
-
-template <>
-tensorflow::gtl::MutableArraySlice<uint8> Literal::GetMutableArraySlice() {
-  auto values = mutable_u8s();
-  return tensorflow::gtl::MutableArraySlice<uint8>(values->data(),
-                                                   values->size());
-}
-
-template <>
-tensorflow::gtl::MutableArraySlice<int16> Literal::GetMutableArraySlice() {
-  auto values = mutable_s16s();
-  return tensorflow::gtl::MutableArraySlice<int16>(values->data(),
-                                                   values->size());
-}
-
-template <>
-tensorflow::gtl::MutableArraySlice<uint16> Literal::GetMutableArraySlice() {
-  auto values = mutable_u16s();
-  return tensorflow::gtl::MutableArraySlice<uint16>(values->data(),
-                                                    values->size());
-}
-
-template <>
-tensorflow::gtl::MutableArraySlice<int32> Literal::GetMutableArraySlice() {
-  auto values = mutable_s32s();
-  return tensorflow::gtl::MutableArraySlice<int32>(values->data(),
-                                                   values->size());
-}
-
-template <>
-tensorflow::gtl::MutableArraySlice<uint32> Literal::GetMutableArraySlice() {
-  auto values = mutable_u32s();
-  return tensorflow::gtl::MutableArraySlice<uint32>(values->data(),
-                                                    values->size());
-}
-
-template <>
-tensorflow::gtl::MutableArraySlice<int64> Literal::GetMutableArraySlice() {
-  static_assert(sizeof(int64) == sizeof(tensorflow::protobuf_int64) &&
-                    alignof(int64) == alignof(tensorflow::protobuf_int64),
-                "The int64 and tensorflow::protobuf_int64 types are not "
-                "compatible");
-  auto values = mutable_s64s();
-  // Because of the fact that tensorflow::protobuf_int64 is defined as int64_t
-  // while tensorflow::int64 is defined as long long, a reinterpret_cast<> is
-  // necessary from the raw data pointer returned by the mutable_data() API.
-  return tensorflow::gtl::MutableArraySlice<int64>(
-      reinterpret_cast<int64*>(values->data()), values->size());
-}
-
-template <>
-tensorflow::gtl::MutableArraySlice<uint64> Literal::GetMutableArraySlice() {
-  static_assert(sizeof(uint64) == sizeof(tensorflow::protobuf_uint64) &&
-                    alignof(uint64) == alignof(tensorflow::protobuf_uint64),
-                "The uint64 and tensorflow::protobuf_uint64 types are not "
-                "compatible");
-  auto values = mutable_u64s();
-  // Because of the fact that tensorflow::protobuf_uint64 is defined as uint64_t
-  // while tensorflow::uint64 is defined as unsigned long long, a
-  // reinterpret_cast<> is necessary from the raw data pointer returned by the
-  // mutable_data() API.
-  return tensorflow::gtl::MutableArraySlice<uint64>(
-      reinterpret_cast<uint64*>(values->data()), values->size());
-}
-
-template <>
-tensorflow::gtl::MutableArraySlice<float> Literal::GetMutableArraySlice() {
-  auto values = mutable_f32s();
-  return tensorflow::gtl::MutableArraySlice<float>(values->data(),
-                                                   values->size());
-}
-
-template <>
-tensorflow::gtl::MutableArraySlice<double> Literal::GetMutableArraySlice() {
-  auto values = mutable_f64s();
-  return tensorflow::gtl::MutableArraySlice<double>(values->data(),
-                                                    values->size());
-}
-
-template <>
-tensorflow::gtl::MutableArraySlice<complex64> Literal::GetMutableArraySlice() {
-  auto values = mutable_c64s();
-  return {values->data(), values->size()};
-}
-
-template <>
-tensorflow::gtl::MutableArraySlice<half> Literal::GetMutableArraySlice<half>() {
-  auto values = mutable_f16s();
-  return tensorflow::gtl::MutableArraySlice<half>(values->data(),
-                                                  values->size());
-}
-
-template <>
-tensorflow::gtl::MutableArraySlice<bfloat16>
-Literal::GetMutableArraySlice<bfloat16>() {
-  auto values = mutable_bf16s();
-  return {values->data(), values->size()};
-}
-
-template <>
-tensorflow::gtl::ArraySlice<bool> Literal::GetArraySlice<bool>() const {
-  CHECK_EQ(shape().element_type(), PRED);
-  return tensorflow::gtl::ArraySlice<bool>(
-      reinterpret_cast<const bool*>(preds().data()), preds().size());
-}
-
-template <>
-tensorflow::gtl::ArraySlice<uint8> Literal::GetArraySlice<uint8>() const {
-  CHECK_EQ(shape().element_type(), U8);
-  return tensorflow::gtl::ArraySlice<uint8>(
-      reinterpret_cast<const uint8*>(u8s().data()), u8s().size());
-}
-
-template <>
-tensorflow::gtl::ArraySlice<int8> Literal::GetArraySlice<int8>() const {
-  CHECK_EQ(shape().element_type(), S8);
-  return tensorflow::gtl::ArraySlice<int8>(
-      reinterpret_cast<const int8*>(u8s().data()), u8s().size());
-}
-
-template <>
-tensorflow::gtl::ArraySlice<uint16> Literal::GetArraySlice<uint16>() const {
-  CHECK_EQ(shape().element_type(), U16);
-  return tensorflow::gtl::ArraySlice<uint16>(u16s().data(), u16s().size());
-}
-
-template <>
-tensorflow::gtl::ArraySlice<int16> Literal::GetArraySlice<int16>() const {
-  CHECK_EQ(shape().element_type(), S16);
-  return tensorflow::gtl::ArraySlice<int16>(s16s().data(), s16s().size());
-}
-
-template <>
-tensorflow::gtl::ArraySlice<uint32> Literal::GetArraySlice<uint32>() const {
-  CHECK_EQ(shape().element_type(), U32);
-  return u32s();
-}
-
-template <>
-tensorflow::gtl::ArraySlice<uint64> Literal::GetArraySlice<uint64>() const {
-  CHECK_EQ(shape().element_type(), U64);
-  return u64s();
-}
-
-template <>
-tensorflow::gtl::ArraySlice<int32> Literal::GetArraySlice<int32>() const {
-  CHECK_EQ(shape().element_type(), S32);
-  return s32s();
-}
-
-template <>
-tensorflow::gtl::ArraySlice<int64> Literal::GetArraySlice<int64>() const {
-  CHECK_EQ(shape().element_type(), S64);
-  return s64s();
-}
-
-template <>
-tensorflow::gtl::ArraySlice<double> Literal::GetArraySlice<double>() const {
-  CHECK_EQ(shape().element_type(), F64);
-  return f64s();
-}
-
-template <>
-tensorflow::gtl::ArraySlice<half> Literal::GetArraySlice<half>() const {
-  CHECK_EQ(shape().element_type(), F16);
-  return tensorflow::gtl::ArraySlice<half>(f16s().data(),
-                                           f16s().size() / sizeof(half));
-}
-
-template <>
-tensorflow::gtl::ArraySlice<bfloat16> Literal::GetArraySlice<bfloat16>() const {
-  CHECK_EQ(shape().element_type(), BF16);
-  return {bf16s().data(), bf16s().size()};
-}
-
-template <>
-tensorflow::gtl::ArraySlice<complex64> Literal::GetArraySlice<complex64>()
-    const {
-  CHECK_EQ(shape().element_type(), C64);
-  return c64s();
-}
+namespace {
 
 template <typename NativeT>
-static bool AllElementsEqualValue(const Literal& literal, NativeT value) {
-  for (int64 i = 0; i < ShapeUtil::ElementsIn(literal.shape()); ++i) {
-    auto multi_index =
-        IndexUtil::LinearIndexToMultidimensionalIndex(literal.shape(), i);
-    if (literal.Get<NativeT>(multi_index) != value) {
+static bool AllElementsEqualValue(tensorflow::gtl::ArraySlice<NativeT> data,
+                                  NativeT value) {
+  for (int64 i = 0; i < data.size(); ++i) {
+    if (data[i] != value) {
       return false;
     }
   }
   return true;
 }
 
+}  // namespace
+
 bool Literal::IsAll(int8 value) const {
-  switch (shape().element_type()) {
-    case U8:
-      if (value >= 0) {
-        return AllElementsEqualValue<uint8>(*this, value);
-      }
-      return false;
-    case U32:
-      if (value >= 0) {
-        return AllElementsEqualValue<uint32>(*this, value);
-      }
-      return false;
-    case U64:
-      if (value >= 0) {
-        return AllElementsEqualValue<uint64>(*this, value);
-      }
-      return false;
-    case S8:
-      return AllElementsEqualValue<int8>(*this, value);
-    case S32:
-      return AllElementsEqualValue<int32>(*this, value);
-    case S64:
-      return AllElementsEqualValue<int64>(*this, value);
-    case F32:
-      return AllElementsEqualValue<float>(*this, value);
-    case F64:
-      return AllElementsEqualValue<double>(*this, value);
-    case F16:
-      return AllElementsEqualValue<half>(*this, static_cast<half>(value));
-    case BF16:
-      return AllElementsEqualValue<bfloat16>(*this,
-                                             static_cast<bfloat16>(value));
-    case PRED:
-      if (value == 0) {
-        return AllElementsEqualValue<bool>(*this, false);
-      }
-      if (value == 1) {
-        return AllElementsEqualValue<bool>(*this, true);
+  for (const auto& pair : pieces_) {
+    const Piece& piece = pair.second;
+    if (!ShapeUtil::IsArray(piece.subshape())) {
+      continue;
+    }
+
+    auto piece_is_all = [&]() {
+      switch (shape().element_type()) {
+        case U8:
+          if (value >= 0) {
+            return AllElementsEqualValue<uint8>(piece.data<uint8>(), value);
+          }
+          return false;
+        case U32:
+          if (value >= 0) {
+            return AllElementsEqualValue<uint32>(piece.data<uint32>(), value);
+          }
+          return false;
+        case U64:
+          if (value >= 0) {
+            return AllElementsEqualValue<uint64>(piece.data<uint64>(), value);
+          }
+          return false;
+        case S8:
+          return AllElementsEqualValue<int8>(piece.data<int8>(), value);
+        case S32:
+          return AllElementsEqualValue<int32>(piece.data<int32>(), value);
+        case S64:
+          return AllElementsEqualValue<int64>(piece.data<int64>(), value);
+        case F32:
+          return AllElementsEqualValue<float>(piece.data<float>(), value);
+        case F64:
+          return AllElementsEqualValue<double>(piece.data<double>(), value);
+        case F16:
+          return AllElementsEqualValue<half>(piece.data<half>(),
+                                             static_cast<half>(value));
+        case BF16:
+          return AllElementsEqualValue<bfloat16>(piece.data<bfloat16>(),
+                                                 static_cast<bfloat16>(value));
+        case PRED:
+          if (value == 0) {
+            return AllElementsEqualValue<bool>(piece.data<bool>(), false);
+          }
+          if (value == 1) {
+            return AllElementsEqualValue<bool>(piece.data<bool>(), true);
+          }
+          return false;
+        default:
+          return false;
       }
       return false;
-    default:
+    };
+
+    if (!piece_is_all()) {
       return false;
+    }
   }
+  return true;
 }
 
 bool Literal::IsAllFloat(float value) const {
-  switch (shape().element_type()) {
-    case F32:
-      return AllElementsEqualValue<float>(*this, value);
-    case F64:
-      return AllElementsEqualValue<double>(*this, value);
-    case F16:
-      return AllElementsEqualValue<half>(*this, static_cast<half>(value));
-    case BF16:
-      return AllElementsEqualValue<bfloat16>(*this,
-                                             static_cast<bfloat16>(value));
-    default:
+  for (const auto& pair : pieces_) {
+    const Piece& piece = pair.second;
+    if (!ShapeUtil::IsArray(piece.subshape())) {
+      continue;
+    }
+
+    auto piece_is_all = [&]() {
+      switch (shape().element_type()) {
+        case F32:
+          return AllElementsEqualValue<float>(piece.data<float>(), value);
+        case F64:
+          return AllElementsEqualValue<double>(piece.data<double>(), value);
+        case F16:
+          return AllElementsEqualValue<half>(piece.data<half>(),
+                                             static_cast<half>(value));
+        case BF16:
+          return AllElementsEqualValue<bfloat16>(piece.data<bfloat16>(),
+                                                 static_cast<bfloat16>(value));
+        default:
+          return false;
+      }
+    };
+    if (!piece_is_all()) {
       return false;
+    }
   }
+  return true;
 }
 
 bool Literal::IsAllComplex(complex64 value) const {
   switch (shape().element_type()) {
     case C64:
-      return AllElementsEqualValue<complex64>(*this, value);
+      return AllElementsEqualValue<complex64>(root_piece().data<complex64>(),
+                                              value);
     default:
       return false;
   }
 }
 
 bool Literal::IsZero(tensorflow::gtl::ArraySlice<int64> indices) const {
+  CHECK(ShapeUtil::IsArray(shape()));
   switch (shape().element_type()) {
     case U8:
       return Get<uint8>(indices) == 0;
@@ -1376,247 +1586,294 @@ bool Literal::IsZero(tensorflow::gtl::ArraySlice<int64> indices) const {
   }
 }
 
-template <>
-/* static */ void Literal::Resize<bool>(int64 num_elements, bool value) {
-  CHECK_EQ(ShapeUtil::ElementsIn(shape()), num_elements);
-  mutable_preds()->resize(num_elements, value);
-}
-
-template <>
-void Literal::Resize<int8>(int64 num_elements, int8 value) {
-  CHECK_EQ(ShapeUtil::ElementsIn(shape()), num_elements);
-  mutable_u8s()->resize(num_elements, value);
-}
-
-template <>
-void Literal::Resize<uint8>(int64 num_elements, uint8 value) {
-  CHECK_EQ(ShapeUtil::ElementsIn(shape()), num_elements);
-  mutable_u8s()->resize(num_elements, value);
-}
-
-template <>
-void Literal::Resize<int32>(int64 num_elements, int32 value) {
-  CHECK_EQ(ShapeUtil::ElementsIn(shape()), num_elements);
-  mutable_s32s()->resize(num_elements, value);
-}
-
-template <>
-void Literal::Resize<uint32>(int64 num_elements, uint32 value) {
-  CHECK_EQ(ShapeUtil::ElementsIn(shape()), num_elements);
-  mutable_u32s()->resize(num_elements, value);
-}
-
-template <>
-void Literal::Resize<int64>(int64 num_elements, int64 value) {
-  CHECK_EQ(ShapeUtil::ElementsIn(shape()), num_elements);
-  mutable_s64s()->resize(num_elements, value);
-}
-
-template <>
-void Literal::Resize<uint64>(int64 num_elements, uint64 value) {
-  CHECK_EQ(ShapeUtil::ElementsIn(shape()), num_elements);
-  mutable_u64s()->resize(num_elements, value);
-}
-
-template <>
-void Literal::Resize<float>(int64 num_elements, float value) {
-  CHECK_EQ(ShapeUtil::ElementsIn(shape()), num_elements);
-  mutable_f32s()->resize(num_elements, value);
-}
-
-template <>
-void Literal::Resize<double>(int64 num_elements, double value) {
-  CHECK_EQ(ShapeUtil::ElementsIn(shape()), num_elements);
-  mutable_f64s()->resize(num_elements, value);
-}
-
-template <>
-void Literal::Resize<half>(int64 num_elements, half value) {
-  CHECK_EQ(ShapeUtil::ElementsIn(shape()), num_elements);
-  mutable_f16s()->resize(num_elements, value);
-}
-
-template <>
-void Literal::Resize<bfloat16>(int64 num_elements, bfloat16 value) {
-  CHECK_EQ(ShapeUtil::ElementsIn(shape()), num_elements);
-  mutable_bf16s()->resize(num_elements, value);
-}
-
-template <>
-void Literal::Resize<complex64>(int64 num_elements, complex64 value) {
-  CHECK_EQ(ShapeUtil::ElementsIn(shape()), num_elements);
-  mutable_c64s()->resize(num_elements, value);
-}
+namespace {
 
 template <typename RepeatedFieldT, typename NativeT>
 void CopyToRepeatedField(RepeatedFieldT* dest,
-                         const std::vector<NativeT>& src) {
+                         const tensorflow::gtl::ArraySlice<NativeT> src) {
   *dest = RepeatedFieldT(src.begin(), src.end());
 }
 
-template <>
-void CopyToRepeatedField<tensorflow::protobuf::RepeatedField<float>, complex64>(
-    tensorflow::protobuf::RepeatedField<float>* dest,
-    const std::vector<complex64>& src) {
-  *dest = tensorflow::protobuf::RepeatedField<float>(
-      reinterpret_cast<const float*>(src.data()),
-      reinterpret_cast<const float*>(src.data()) + src.size() * 2);
-}
+}  // namespace
 
-LiteralProto Literal::ToProto() const {
-  LiteralProto proto;
-  proto.Clear();
-  *proto.mutable_shape() = shape();
-  switch (shape().element_type()) {
+void Literal::Piece::WriteToProto(LiteralProto* proto) const {
+  *proto->mutable_shape() = subshape();
+  switch (subshape().element_type()) {
     case PRED:
-      CopyToRepeatedField(proto.mutable_preds(), preds());
+      CopyToRepeatedField(proto->mutable_preds(), data<bool>());
       break;
     case U8:
-      *proto.mutable_u8s() = u8s_string();
-      break;
-    case S32:
-      CopyToRepeatedField(proto.mutable_s32s(), s32s());
-      break;
-    case S64:
-      CopyToRepeatedField(proto.mutable_s64s(), s64s());
+      proto->set_u8s(static_cast<const unsigned char*>(data<uint8>().data()),
+                     element_count());
       break;
     case U32:
-      CopyToRepeatedField(proto.mutable_u32s(), u32s());
+      CopyToRepeatedField(proto->mutable_u32s(), data<uint32>());
       break;
     case U64:
-      CopyToRepeatedField(proto.mutable_u64s(), u64s());
+      CopyToRepeatedField(proto->mutable_u64s(), data<uint64>());
+      break;
+    case S32:
+      CopyToRepeatedField(proto->mutable_s32s(), data<int32>());
+      break;
+    case S64:
+      CopyToRepeatedField(proto->mutable_s64s(), data<int64>());
       break;
     case F16:
-      *proto.mutable_f16s() =
-          string(reinterpret_cast<const char*>(f16s_.data()),
-                 f16s_.size() * sizeof(half));
+      *proto->mutable_f16s() = string(
+          reinterpret_cast<const char*>(data<half>().data()), size_bytes());
       if (!kLittleEndian) {
-        ConvertEndianShort(const_cast<char*>(proto.mutable_f16s()->data()),
-                           proto.f16s().size());
+        ConvertEndianShort(const_cast<char*>(proto->mutable_f16s()->data()),
+                           proto->f16s().size());
       }
       break;
     case BF16:
-      *proto.mutable_bf16s() =
-          string(reinterpret_cast<const char*>(bf16s_.data()),
-                 bf16s_.size() * sizeof(bfloat16));
+      *proto->mutable_bf16s() = string(
+          reinterpret_cast<const char*>(data<bfloat16>().data()), size_bytes());
       if (!kLittleEndian) {
-        ConvertEndianShort(const_cast<char*>(proto.mutable_bf16s()->data()),
-                           proto.bf16s().size());
+        ConvertEndianShort(const_cast<char*>(proto->mutable_bf16s()->data()),
+                           proto->bf16s().size());
       }
       break;
     case F32:
-      CopyToRepeatedField(proto.mutable_f32s(), f32s());
+      CopyToRepeatedField(proto->mutable_f32s(), data<float>());
       break;
     case F64:
-      CopyToRepeatedField(proto.mutable_f64s(), f64s());
+      CopyToRepeatedField(proto->mutable_f64s(), data<double>());
       break;
     case C64:
-      CopyToRepeatedField(proto.mutable_c64s(), c64s());
-      break;
-    case TUPLE:
-      for (const auto& tuple : tuple_literals()) {
-        *proto.add_tuple_literals() = tuple.ToProto();
+      for (complex64 value : data<complex64>()) {
+        proto->add_c64s(value.real());
+        proto->add_c64s(value.imag());
       }
       break;
+    case TUPLE:
+      // Nothing to do but assign the shape which is done above.
+      return;
     default:
-      LOG(FATAL) << "Unhandled primitive type " << shape().element_type();
+      LOG(FATAL) << "Unhandled primitive type " << subshape().element_type();
   }
-
-  return proto;
 }
 
-template <typename RepeatedFieldT, typename NativeT>
-void CopyFromRepeatedField(std::vector<NativeT>* dest,
-                           const RepeatedFieldT& src) {
-  *dest = std::vector<NativeT>(src.begin(), src.end());
+const void* Literal::Piece::untyped_data() const {
+  CHECK(ShapeUtil::IsArray(subshape())) << ShapeUtil::HumanString(subshape());
+  return buffer();
 }
 
-template <>
-void CopyFromRepeatedField<tensorflow::protobuf::RepeatedField<float>,
-                           complex64>(
-    std::vector<complex64>* dest,
-    const tensorflow::protobuf::RepeatedField<float>& src) {
-  *dest = std::vector<complex64>(
-      reinterpret_cast<const complex64*>(src.data()),
-      reinterpret_cast<const complex64*>(src.data()) + src.size() / 2);
+void* Literal::Piece::untyped_data() {
+  CHECK(ShapeUtil::IsArray(subshape())) << ShapeUtil::HumanString(subshape());
+  return buffer();
 }
 
-void Literal::CopyFromProto(const LiteralProto& literal_proto) {
-  if (!literal_proto.has_shape()) {
-    return;
+namespace {
+
+template <typename RepeatedFieldT, typename NativeT>
+Status CopyFromRepeatedField(tensorflow::gtl::MutableArraySlice<NativeT> dest,
+                             const RepeatedFieldT& src) {
+  if (dest.size() != src.size()) {
+    return InvalidArgument(
+        "Expected %lu elements in LiteralProto repeated field, has %d",
+        dest.size(), src.size());
   }
+  std::copy(src.begin(), src.end(), dest.begin());
+  return Status::OK();
+}
 
-  *mutable_shape() = literal_proto.shape();
-  switch (shape().element_type()) {
+}  // namespace
+
+Status Literal::Piece::CopyFromProto(const LiteralProto& proto) {
+  // These conditions should have been checked in Literal::CreateFromProto.
+  TF_RET_CHECK(proto.has_shape());
+  TF_RET_CHECK(LayoutUtil::HasLayout(proto.shape()));
+  TF_RET_CHECK(ShapeUtil::Equal(proto.shape(), subshape()));
+
+  switch (subshape().element_type()) {
     case PRED:
-      CopyFromRepeatedField(mutable_preds(), literal_proto.preds());
-      break;
-    case U8:
-      set_u8s(literal_proto.u8s());
+      TF_RETURN_IF_ERROR(CopyFromRepeatedField(data<bool>(), proto.preds()));
       break;
+    case U8: {
+      auto u8_data = data<uint8>();
+      TF_RET_CHECK(proto.u8s().size() == u8_data.size());
+      std::copy(proto.u8s().begin(), proto.u8s().end(), u8_data.begin());
+    } break;
     case S32:
-      CopyFromRepeatedField(mutable_s32s(), literal_proto.s32s());
+      TF_RETURN_IF_ERROR(CopyFromRepeatedField(data<int32>(), proto.s32s()));
       break;
     case S64:
-      CopyFromRepeatedField(mutable_s64s(), literal_proto.s64s());
+      TF_RETURN_IF_ERROR(CopyFromRepeatedField(data<int64>(), proto.s64s()));
       break;
     case U32:
-      CopyFromRepeatedField(mutable_u32s(), literal_proto.u32s());
+      TF_RETURN_IF_ERROR(CopyFromRepeatedField(data<uint32>(), proto.u32s()));
       break;
     case U64:
-      CopyFromRepeatedField(mutable_u64s(), literal_proto.u64s());
+      TF_RETURN_IF_ERROR(CopyFromRepeatedField(data<uint64>(), proto.u64s()));
       break;
     case F16: {
-      const string& s(literal_proto.f16s());
-      CHECK_EQ(0, s.size() % sizeof(half));
-      f16s_ = std::vector<half>(s.size() / sizeof(half));
-      memcpy(f16s_.data(), s.data(), s.size());
-
+      const string& s(proto.f16s());
+      TF_RET_CHECK(data<half>().size() * sizeof(half) == s.size());
+      memcpy(untyped_data(), s.data(), s.size());
       if (!kLittleEndian) {
-        ConvertEndianShort(reinterpret_cast<char*>(f16s_.data()), s.size());
+        ConvertEndianShort(reinterpret_cast<char*>(untyped_data()), s.size());
       }
-      break;
-    }
-    case BF16: {
-      const string& s(literal_proto.bf16s());
-      CHECK_EQ(0, s.size() % sizeof(bfloat16));
-      bf16s_ = std::vector<bfloat16>(s.size() / sizeof(bfloat16));
-      memcpy(bf16s_.data(), s.data(), s.size());
+    } break;
 
+    case BF16: {
+      const string& s(proto.bf16s());
+      TF_RET_CHECK(data<bfloat16>().size() * sizeof(bfloat16) == s.size());
+      memcpy(untyped_data(), s.data(), s.size());
       if (!kLittleEndian) {
-        ConvertEndianShort(reinterpret_cast<char*>(bf16s_.data()), s.size());
+        ConvertEndianShort(reinterpret_cast<char*>(untyped_data()), s.size());
       }
-      break;
-    }
+    } break;
     case F32:
-      CopyFromRepeatedField(mutable_f32s(), literal_proto.f32s());
+      TF_RETURN_IF_ERROR(CopyFromRepeatedField(data<float>(), proto.f32s()));
       break;
     case F64:
-      CopyFromRepeatedField(mutable_f64s(), literal_proto.f64s());
+      TF_RETURN_IF_ERROR(CopyFromRepeatedField(data<double>(), proto.f64s()));
       break;
-    case C64:
-      CopyFromRepeatedField(mutable_c64s(), literal_proto.c64s());
-      break;
-    case TUPLE:
-      for (const auto& proto : literal_proto.tuple_literals()) {
-        mutable_tuple_literals()->push_back(Literal(proto));
+    case C64: {
+      auto complex_data = data<complex64>();
+      TF_RET_CHECK(proto.c64s_size() == complex_data.size() * 2);
+      for (int64 i = 0; i < complex_data.size(); ++i) {
+        complex_data[i] = complex64{proto.c64s(i * 2), proto.c64s(i * 2 + 1)};
       }
+    } break;
+    case TUPLE:
+      LOG(FATAL) << "Should not be called on tuple shapes: "
+                 << ShapeUtil::HumanString(subshape());
       break;
     default:
-      LOG(FATAL) << "Unhandled primitive type " << shape().element_type();
+      LOG(FATAL) << "Unhandled primitive type " << subshape().element_type();
   }
+  return Status::OK();
 }
 
-const Literal& Literal::GetSubliteral(const ShapeIndex& index) const {
-  return const_cast<Literal*>(this)->GetSubliteral(index);
+LiteralProto Literal::ToProto() const {
+  LiteralProto proto;
+  for (const auto& pair : pieces_) {
+    const ShapeIndex& index = pair.first;
+    const Piece& piece = pair.second;
+
+    LiteralProto* proto_piece = &proto;
+    for (int64 i : index) {
+      while (proto_piece->tuple_literals_size() <= i) {
+        proto_piece->add_tuple_literals();
+      }
+      proto_piece = proto_piece->mutable_tuple_literals(i);
+    }
+    piece.WriteToProto(proto_piece);
+  }
+
+  if (LayoutUtil::IsSparseArray(shape())) {
+    CopyToRepeatedField(proto.mutable_sparse_indices(),
+                        sparse_indices()->data());
+  }
+
+  return proto;
+}
+
+/* static */
+StatusOr<std::unique_ptr<Literal>> Literal::CreateFromProto(
+    const LiteralProto& proto) {
+  if (!proto.has_shape()) {
+    return InvalidArgument("LiteralProto has no shape");
+  }
+  if (!LayoutUtil::HasLayout(proto.shape())) {
+    return InvalidArgument("LiteralProto has no layout");
+  }
+
+  auto literal = MakeUnique<Literal>(proto.shape());
+
+  for (auto& pair : literal->pieces_) {
+    const ShapeIndex& index = pair.first;
+    Piece& piece = pair.second;
+    const LiteralProto* proto_element = &proto;
+    for (int64 i : index) {
+      TF_RET_CHECK(i < proto_element->tuple_literals_size());
+      proto_element = &proto_element->tuple_literals(i);
+    }
+
+    if (ShapeUtil::IsTuple(piece.subshape())) {
+      if (proto_element->tuple_literals_size() !=
+          ShapeUtil::TupleElementCount(piece.subshape())) {
+        return InvalidArgument(
+            "Expected %lld tuple elements in LiteralProto, has %d",
+            ShapeUtil::TupleElementCount(piece.subshape()),
+            proto_element->tuple_literals_size());
+      }
+      continue;
+    }
+
+    TF_RET_CHECK(ShapeUtil::IsArray(piece.subshape()));
+    TF_RETURN_IF_ERROR(piece.CopyFromProto(*proto_element));
+  }
+  return std::move(literal);
+}
+
+const void* Literal::untyped_data(const ShapeIndex& shape_index) const {
+  return piece(shape_index).untyped_data();
+}
+
+void* Literal::untyped_data(const ShapeIndex& shape_index) {
+  return piece(shape_index).untyped_data();
+}
+
+int64 Literal::size_bytes(const ShapeIndex& shape_index) const {
+  return piece(shape_index).size_bytes();
+}
+
+string Literal::GetR1U8AsString() const {
+  CHECK(ShapeUtil::IsArray(shape()));
+  CHECK_EQ(ShapeUtil::Rank(shape()), 1);
+  CHECK_EQ(shape().element_type(), U8);
+  return string(tensorflow::bit_cast<const char*>(data<uint8>().data()),
+                ShapeUtil::ElementsIn(shape()));
+}
+
+/* static */ const LiteralView LiteralView::Create(
+    const Literal& literal, const ShapeIndex& view_root) {
+  return LiteralView(literal, view_root);
+}
+
+LiteralView::LiteralView(const Literal& literal, const ShapeIndex& view_root) {
+  shape_ = ShapeUtil::GetSubshape(literal.shape(), view_root);
+  pieces_ = ShapeTree<Piece>(shape_);
+  owns_buffers_ = false;
+  for (auto& pair : pieces_) {
+    const ShapeIndex& index = pair.first;
+    Piece& piece = pair.second;
+
+    ShapeIndex src_index = view_root;
+    for (int64 i : index) {
+      src_index.push_back(i);
+    }
+    const Piece& src_piece = literal.piece(src_index);
+    piece.set_buffer(src_piece.buffer());
+    piece.set_sparse_indices(src_piece.sparse_indices());
+    piece.set_subshape(&ShapeUtil::GetSubshape(shape_, index));
+  }
+}
+
+LiteralView::~LiteralView() {}
+
+LiteralView::LiteralView(const LiteralView& other) { CopyFrom(other); }
+
+LiteralView& LiteralView::operator=(const LiteralView& other) {
+  CopyFrom(other);
+  return *this;
 }
 
-Literal& Literal::GetSubliteral(const ShapeIndex& index) {
-  Literal* subliteral = this;
-  for (int64 i : index) {
-    subliteral = &subliteral->tuple_literals_.at(i);
+void LiteralView::CopyFrom(const LiteralView& other) {
+  // We can't use the default copy-constructor/copy-assignment because
+  // Piece::subshape_ points to subshapes within the Shape of the owning
+  // Literal/LiteralView.
+  shape_ = other.shape();
+  pieces_ = other.pieces_;
+  for (auto& pair : pieces_) {
+    const ShapeIndex& index = pair.first;
+    Piece& piece = pair.second;
+    piece.set_subshape(&ShapeUtil::GetSubshape(shape_, index));
   }
-  return *subliteral;
+  owns_buffers_ = false;
 }
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h
index f37e529caf54e3aded1a418d1f01c1440cd0f284..e0196509a7483abac3d9c0e59a54b591a327b980 100644
--- a/tensorflow/compiler/xla/literal_util.h
+++ b/tensorflow/compiler/xla/literal_util.h
@@ -34,7 +34,9 @@ limitations under the License.
 #include "tensorflow/compiler/xla/layout_util.h"
 #include "tensorflow/compiler/xla/primitive_util.h"
 #include "tensorflow/compiler/xla/ptr_util.h"
+#include "tensorflow/compiler/xla/shape_tree.h"
 #include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/sparse_index_array.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
@@ -50,152 +52,70 @@ limitations under the License.
 
 namespace xla {
 
-// Utility class for dealing with XLA literal values.  Most methods are
-// templated by native (host) type which corresponds to a unique XLA
-// PrimitiveType. See ComputationBuilder for details.  Not all primitive types
-// defined in xla_data.proto have a corresponding native type or even have a
-// storage location in the Literal proto yet (for example, primitive type F16).
+// Class representing literal values in XLA.
+//
+// TODO(b/67651157): The methods in this class should be reduced to a minimal
+// set of methods which construct Literals and accessors methods. Other methods
+// which perform computation on Literals (Reshape, Slice, etc) should be moved
+// elsewhere, and perhaps combined with evaluator code which operates on
+// Literals.
 class Literal {
  public:
-  Literal() {}
+  Literal() : Literal(ShapeUtil::MakeNil()) {}
 
-  Literal(const Literal& other) = default;
-  Literal(Literal&&) = default;
+  // Create a literal of the given shape. The literal is allocated sufficient
+  // memory to hold the shape. Memory is uninitialized.
+  explicit Literal(const Shape& shape);
+  virtual ~Literal();
 
-  explicit Literal(const LiteralProto& other) { CopyFromProto(other); }
-
-  Literal& operator=(const Literal& other) = default;
-  Literal& operator=(Literal&&) = default;
+  // Literals are moveable, but not copyable. To copy a literal use
+  // Literal::Clone or Literal::CloneToUnique. This prevents inadvertent copies
+  // of literals which can be expensive.
+  Literal(const Literal& other) = delete;
+  Literal& operator=(const Literal& other) = delete;
+  Literal(Literal&& other);
+  Literal& operator=(Literal&& other);
 
   // Literals are equal if they have compatible shapes and the same data
-  // values. Layout is not checked.
+  // values. Layout is not compared.
   bool operator==(const Literal& other) const;
   bool operator!=(const Literal& other) const { return !(*this == other); }
 
+  // Serialize to and from a proto.
+  static StatusOr<std::unique_ptr<Literal>> CreateFromProto(
+      const LiteralProto& proto);
   LiteralProto ToProto() const;
 
-  bool has_shape() const {
-    return shape_.element_type() != PRIMITIVE_TYPE_INVALID;
-  }
-
-  // Basic accessor functions.  Names mirror the original protobuf
-  // functions for convenience.
-  string DebugString() const { return ToProto().DebugString(); }
-  string ShortDebugString() const { return ToProto().ShortDebugString(); }
-
-  // Return the nested literal at the given shape index.
-  const Literal& GetSubliteral(const ShapeIndex& index) const;
-  Literal& GetSubliteral(const ShapeIndex& index);
-
-  void Clear() {
-    shape_.Clear();
-    u8s_.clear();
-    s16s_.clear();
-    s32s_.clear();
-    s64s_.clear();
-    u16s_.clear();
-    u32s_.clear();
-    u64s_.clear();
-    f16s_.clear();
-    f32s_.clear();
-    f64s_.clear();
-    tuple_literals_.clear();
-  }
-
-  int preds_size() const { return u8s().size(); }
-  const std::vector<uint8>& preds() const {
-    static_assert(sizeof(uint8) == sizeof(bool),
-                  "The uint8 and bool types should be the same size");
-    return u8s_;
-  }
-  std::vector<uint8>* mutable_preds() {
-    static_assert(sizeof(uint8) == sizeof(bool),
-                  "The uint8 and bool types should be the same size");
-    return &u8s_;
-  }
-
-  int s16s_size() const { return s16s().size(); }
-  int32 s16s(int i) const { return s16s_[i]; }
-  const std::vector<int16>& s16s() const { return s16s_; }
-  std::vector<int16>* mutable_s16s() { return &s16s_; }
-
-  int s32s_size() const { return s32s().size(); }
-  int32 s32s(int i) const { return s32s_[i]; }
-  const std::vector<int32>& s32s() const { return s32s_; }
-  std::vector<int32>* mutable_s32s() { return &s32s_; }
-
-  int s64s_size() const { return s64s().size(); }
-  void add_s64s(int64 value) { s64s_.push_back(value); }
-  const std::vector<int64>& s64s() const { return s64s_; }
-  std::vector<int64>* mutable_s64s() { return &s64s_; }
-
-  int u16s_size() const { return u16s().size(); }
-  uint32 u16s(int i) const { return u16s_[i]; }
-  const std::vector<uint16>& u16s() const { return u16s_; }
-  std::vector<uint16>* mutable_u16s() { return &u16s_; }
-
-  int u32s_size() const { return u32s().size(); }
-  uint32 u32s(int i) const { return u32s_[i]; }
-  const std::vector<uint32>& u32s() const { return u32s_; }
-  std::vector<uint32>* mutable_u32s() { return &u32s_; }
-
-  int u64s_size() const { return u64s().size(); }
-  const std::vector<uint64>& u64s() const { return u64s_; }
-  std::vector<uint64>* mutable_u64s() { return &u64s_; }
-
-  int f16s_size() const { return f16s().size(); }
-  half f16s(int i) const { return f16s_[i]; }
-  const std::vector<half>& f16s() const { return f16s_; }
-  std::vector<half>* mutable_f16s() { return &f16s_; }
-
-  int f32s_size() const { return f32s().size(); }
-  float f32s(int i) const { return f32s_[i]; }
-  void add_f32s(float value) { f32s_.push_back(value); }
-  const std::vector<float>& f32s() const { return f32s_; }
-  std::vector<float>& f32s() { return f32s_; }
-  std::vector<float>* mutable_f32s() { return &f32s_; }
-
-  int f64s_size() const { return f64s().size(); }
-  const std::vector<double>& f64s() const { return f64s_; }
-  std::vector<double>* mutable_f64s() { return &f64s_; }
-
-  int c64s_size() const { return c64s().size(); }
-  const std::vector<complex64>& c64s() const { return c64s_; }
-  std::vector<complex64>* mutable_c64s() { return &c64s_; }
-
-  int bf16s_size() const { return bf16s().size(); }
-  bfloat16 bf16s(int i) const { return bf16s_[i]; }
-  const std::vector<bfloat16>& bf16s() const { return bf16s_; }
-  std::vector<bfloat16>* mutable_bf16s() { return &bf16s_; }
-
-  int tuple_literals_size() const { return tuple_literals().size(); }
-  const Literal& tuple_literals(int i) const { return tuple_literals_[i]; }
-  Literal* add_tuple_literals() {
-    tuple_literals_.push_back(Literal());
-    return &tuple_literals_.back();
-  }
-  std::vector<Literal>* mutable_tuple_literals() { return &tuple_literals_; }
-  const std::vector<Literal>& tuple_literals() const { return tuple_literals_; }
-
-  int u8s_size() const { return u8s().size(); }
-  const std::vector<uint8>& u8s() const { return u8s_; }
-  void set_u8s(const std::vector<uint8>& value) { u8s_ = value; }
-  void set_u8s(tensorflow::StringPiece value) {
-    u8s_ = std::vector<uint8>(value.size());
-    u8s_.clear();
-    append_u8s(value);
-  }
-
-  void append_u8s(tensorflow::StringPiece value) {
-    u8s_.insert(u8s_.end(), value.begin(), value.end());
-  }
-
-  string u8s_string() const { return string(u8s().begin(), u8s().end()); }
+  // Return the shape of the literal.
+  const Shape& shape() const { return shape_; }
 
-  std::vector<uint8>* mutable_u8s() { return &u8s_; }
+  // TODO(b/67651157): Remove this accessor. Literal users should not be able to
+  // mutate the shape as this can produce malformed Literals.
+  Shape* mutable_shape_do_not_use() { return &shape_; }
 
-  const Shape& shape() const { return shape_; }
-  Shape* mutable_shape() { return &shape_; }
+  // Returns a (Mutable)ArraySlice view of the array for this literal for the
+  // given NativeT (e.g., float). CHECKs if the subshape of the literal at the
+  // given ShapeIndex is not array. See primitive_util.h for the mapping from
+  // XLA type to native type.
+  template <typename NativeT>
+  tensorflow::gtl::ArraySlice<NativeT> data(
+      const ShapeIndex& shape_index = {}) const;
+  template <typename NativeT>
+  tensorflow::gtl::MutableArraySlice<NativeT> data(
+      const ShapeIndex& shape_index = {});
+
+  // Returns a pointer to the sparse index array. Returns nullptr if the literal
+  // is not a sparse array.
+  const SparseIndexArray* sparse_indices(
+      const ShapeIndex& shape_index = {}) const;
+  SparseIndexArray* sparse_indices(const ShapeIndex& shape_index = {});
+
+  // Returns a pointer to (or size of) the underlying buffer holding the array
+  // at the given shape index. CHECKs if the subshape of the literal at the
+  // given ShapeIndex is not array.
+  const void* untyped_data(const ShapeIndex& shape_index = {}) const;
+  void* untyped_data(const ShapeIndex& shape_index = {});
+  int64 size_bytes(const ShapeIndex& shape_index = {}) const;
 
   // Creates a new literal of a given rank. To minimize ambiguity (for users
   // and the compiler) these CreateR[0-2] methods should explicitly specify the
@@ -243,6 +163,60 @@ class Literal {
           values,
       const Layout& layout);
 
+  // Returns this literal's data as a string. This literal must be a rank-1 U8
+  // array.
+  string GetR1U8AsString() const;
+
+  // Creates a literal with a sparse layout and the given indices and values.
+  // The shape is initialized from the given dimensions.  The minor dimension of
+  // the indices array must equal the rank of the shape (i.e. size of the
+  // dimensions array). The major dimension of the indices array must equal the
+  // number of elements in the values array. The maximum number of elements in
+  // the array is taken from the max_indices() value of the index array.
+  //
+  // XLA assumes that sparse literals are in sorted order for all operations. If
+  // the `sort` argument is true, then the indices and values will be sorted
+  // while copying them into the literal. If you have ensured that the indices
+  // and values are already sorted, then you may set the `sort` argument to
+  // false to skip the sorting step.
+  //
+  // For example:
+  //
+  //   CreateSparse(
+  //     {12, 12, 12},
+  //     SparseIndexArray(10, 3,
+  //                      Array2D{
+  //                        {0, 1, 2},
+  //                        {3, 4, 5},
+  //                        {6, 7, 8},
+  //                        {9, 10, 11},
+  //                      }),
+  //     {1.0, 2.0 3.0, 4.0})
+  //
+  // This creates an array with shape F64[12,12,12]sparse{10}, that has the
+  // following non-zero values:
+  //
+  //     [0,  1,  2]: 1.0
+  //     [3,  4,  5]: 2.0
+  //     [6,  7,  8]: 3.0
+  //     [9, 10, 11]: 4.0
+  //
+  template <typename NativeT>
+  static std::unique_ptr<Literal> CreateSparse(
+      tensorflow::gtl::ArraySlice<int64> dimensions, SparseIndexArray indices,
+      tensorflow::gtl::ArraySlice<NativeT> values, bool sort = true);
+
+  // Populates a literal with a sparse layout with the given indices and values.
+  // Each index in the indices array is CHECKed against the dimensions in the
+  // literal's shape.  If sort is true, then the indices and values will be
+  // sorted.  If sort is false, then the indices and values are assumed to
+  // already be in sorted order.  See CreateSparse for an example of how data
+  // are populated.
+  template <typename NativeT>
+  void PopulateSparse(SparseIndexArray indices,
+                      tensorflow::gtl::ArraySlice<NativeT> values,
+                      bool sort = true);
+
   // Creates a new Literal object with the shape specified as parameter.
   // The content of the literal values is the default value of the primitive
   // type of literal itself (0 for numeric types, and false for predicates).
@@ -256,6 +230,23 @@ class Literal {
       PrimitiveType primitive_type,
       tensorflow::gtl::ArraySlice<int64> dimensions);
 
+  // Copy values from 'src_literal' rooted at 'src_shape_index' into this
+  // literal rooted at 'dest_shape_index'. The subshape of this literal rooted
+  // at 'dest_shape_index' must be compatible with the subshape of 'src_literal'
+  // rooted at 'src_shape_index', but need not be arrays.
+  Status CopyFrom(const Literal& src_literal,
+                  const ShapeIndex& dest_shape_index = {},
+                  const ShapeIndex& src_shape_index = {});
+
+  // Similar to CopyFrom, but with move semantincs. The subshape of this literal
+  // rooted at 'dest_shape_index' must be *equal* to the shape 'src_literal'
+  // (layouts and shapes must match), but need not be arrays. The memory
+  // allocated in this literal for the subshape at dest_shape_index is
+  // deallocated, and the respective buffers are replaced with those in
+  // src_literal. Upon return, src_literal is set to a nil shape (empty tuple).
+  Status MoveFrom(Literal&& src_literal,
+                  const ShapeIndex& dest_shape_index = {});
+
   // Copies the values from src_literal, starting at src_base shape indexes,
   // to this literal, starting at dest_base, where the copy size in each
   // dimension is specified by copy_size.
@@ -265,10 +256,24 @@ class Literal {
   // Note: if either src_literal or this literal contains dimensions with zero
   // element, then copy_size must be 0 in these dimensions while the
   // corresponding base indices being 0.
-  Status Copy(const Literal& src_literal,
-              tensorflow::gtl::ArraySlice<int64> src_base,
-              tensorflow::gtl::ArraySlice<int64> dest_base,
-              tensorflow::gtl::ArraySlice<int64> copy_size);
+  // This literal and 'src_literal' must be arrays.
+  Status CopySliceFrom(const Literal& src_literal,
+                       tensorflow::gtl::ArraySlice<int64> src_base,
+                       tensorflow::gtl::ArraySlice<int64> dest_base,
+                       tensorflow::gtl::ArraySlice<int64> copy_size);
+
+  // Returns a vector containing the tuple elements of this Literal as separate
+  // Literals. This Literal must be tuple-shaped and can be a nested tuple. The
+  // elements are moved into the new Literals; no data is copied. Upon return
+  // this Literal is set to a nil shape (empty tuple)
+  std::vector<Literal> DecomposeTuple();
+
+  // This operation is the inverse of DecomposeTuple. The given elements are
+  // moved into the tuple elements of a new tuple-shaped Literal which is
+  // returned. Upon return, each of the Literals in 'elements' is set to a nil
+  // shape (empty tuple).
+  static Literal MoveIntoTuple(
+      tensorflow::gtl::MutableArraySlice<Literal> elements);
 
   // Creates a new value that has the equivalent value as this literal, but
   // conforms to new_layout; e.g. a literal matrix that was in {0, 1}
@@ -285,11 +290,16 @@ class Literal {
   std::unique_ptr<Literal> Relayout(const Layout& new_layout,
                                     const ShapeIndex& shape_index = {}) const;
 
-  // Creates a new literal by reshaping this literal to have 'shape'. Both the
-  // original shape and 'shape' must contain the same number of elements. The
+  // An overload of Relayout which changes the layout of the entire shape rather
+  // than being limited to a single array within the shape.
+  std::unique_ptr<Literal> Relayout(const Shape& shape_with_layout) const;
+
+  // Creates a new literal by reshaping this literal to have the given
+  // dimensions. The total number of elements must not change; The
   // implementation currently only supports monotonic dim0-major layouts.
+  // This literal must be an array.
   StatusOr<std::unique_ptr<Literal>> Reshape(
-      tensorflow::gtl::ArraySlice<int64> shape) const;
+      tensorflow::gtl::ArraySlice<int64> dimensions) const;
 
   // Creates a new literal by reordering the dimensions of this literal.
   // The given `permutation` must be a permutation of the dimension numbers
@@ -297,6 +307,7 @@ class Literal {
   // in the result literal (i.e., new_order[i] = old_order[permutation[i]]).
   // For example, a transpose call on a literal of shape [3 x 8 x 4] and
   // `permutation` = {2, 0, 1} returns a new literal of shape [4 x 3 x 8].
+  // This literal must be an array.
   std::unique_ptr<Literal> Transpose(
       tensorflow::gtl::ArraySlice<int64> permutation) const;
 
@@ -305,6 +316,7 @@ class Literal {
   // same rank and layout as for the given literal. The number of indices in
   // start_indices and limit_indices must be the rank of the literal, and the
   // indices follow the order of the dimensions.
+  // This literal must be an array.
   std::unique_ptr<Literal> Slice(
       tensorflow::gtl::ArraySlice<int64> start_indices,
       tensorflow::gtl::ArraySlice<int64> limit_indices) const;
@@ -312,34 +324,35 @@ class Literal {
   // Creates a literal with a prepended dimension with bound "times"; e.g. a
   // f32[3x2] with times=4 will produce a f32[4x3x2] with the 3x2 from this
   // literal replicated four times.
+  // This literal must be an array.
   template <typename NativeT>
   std::unique_ptr<Literal> Replicate(int64 times) const;
 
   // Converts this literal to another primitive type. Returns an error if the
-  // conversion is not possible.
+  // conversion is not possible. This literal must be array-shaped.
   StatusOr<std::unique_ptr<Literal>> Convert(
       PrimitiveType primitive_dest_type) const;
 
-  // Creates a literal value zero of the given primitive type.
+  // Creates a scalar literal value zero of the given primitive type.
   static Literal Zero(PrimitiveType primitive_type);
 
-  // Creates a literal value one of the given primitive type.
+  // Creates a scalar literal value one of the given primitive type.
   static Literal One(PrimitiveType primitive_type);
 
-  // Creates a literal value containing the minimum value of the given
+  // Creates a scalar literal value containing the minimum value of the given
   // primitive type. For floating-point types, returns -inf.
   static Literal MinValue(PrimitiveType primitive_type);
 
-  // Creates a literal value containing the maximum value of the given
+  // Creates a scalar literal value containing the maximum value of the given
   // primitive type. For floating-point types, returns inf.
   static Literal MaxValue(PrimitiveType primitive_type);
 
   // Creates a literal of the given shape where each element is `value`.
   template <typename NativeT>
-  static std::unique_ptr<Literal> CreateFullWithMonotonicDim0MajorLayout(
+  static std::unique_ptr<Literal> CreateFullWithDescendingLayout(
       tensorflow::gtl::ArraySlice<int64> dimensions, NativeT value);
 
-  // Creates a new literal from an array. The variants not ending with
+  // Creates a new literal from an Array type. The variants not ending with
   // WithLayout use the default XLA layout for the literal's linear
   // representation in memory.
   template <typename NativeT>
@@ -388,35 +401,50 @@ class Literal {
       std::initializer_list<std::initializer_list<NativeT>> values,
       int64 projection_p, int64 projection_z);
 
-  // Clones this literal into an owned unique_ptr version.
+  // Clones this literal into a new Literal, or new std::unique_ptr<Literal>.
+  Literal Clone() const;
   std::unique_ptr<Literal> CloneToUnique() const;
 
-  // Returns the linear index of the given index within this literal's
-  // element_type repeated field.
-  int64 LinearIndex(tensorflow::gtl::ArraySlice<int64> multi_index) const;
+  // Gets or sets an element in the literal at the given index. The multi_index
+  // is CHECKed against the dimension sizes.
+  template <typename NativeT>
+  NativeT Get(tensorflow::gtl::ArraySlice<int64> multi_index,
+              const ShapeIndex& shape_index) const;
+  template <typename NativeT>
+  void Set(tensorflow::gtl::ArraySlice<int64> multi_index,
+           const ShapeIndex& shape_index, NativeT value);
 
-  // Gets or sets an element in the literal at the given index. The index is
-  // CHECKed against the dimension sizes.
+  // Overloads of Get and Set for array literals. CHECKs if the literal is not
+  // array-shaped and dense.
   template <typename NativeT>
   NativeT Get(tensorflow::gtl::ArraySlice<int64> multi_index) const;
   template <typename NativeT>
   void Set(tensorflow::gtl::ArraySlice<int64> multi_index, NativeT value);
 
-  // Returns a (Mutable)ArraySlice view of the array for this literal for the
-  // given NativeT (e.g., float). These functions map native type to XLA
-  // PrimitiveType via template specialization. The unspecialized forms below
-  // aborts to handle the error case where the given native type does not map to
-  // an XLA primitive type.
+  // Returns the multi-index of the element in a sparse literal at the given
+  // sparse element number.  The sparse element number is the position with in
+  // the sparse array's list of (index, value) pairs, and is checked against the
+  // total number of (index, value) pairs in the sparse array.
+  tensorflow::gtl::ArraySlice<int64> GetSparseIndex(
+      int64 sparse_element_number, const ShapeIndex& shape_index = {}) const;
+
+  // Returns the value of the element in a sparse literal at the given sparse
+  // element number.  The sparse element number is the position with in the
+  // sparse array's list of (index, value) pairs, and is checked against the
+  // total number of (index, value) pairs in the sparse array.
   template <typename NativeT>
-  tensorflow::gtl::ArraySlice<NativeT> GetArraySlice() const {
-    static_assert(!std::is_same<NativeT, NativeT>::value,
-                  "Cannot map native type to primitive type.");
-  }
+  NativeT GetSparseElement(int64 sparse_element_number,
+                           const ShapeIndex& shape_index = {}) const;
+
+  // Appends the given element to the literal.  If the elements are not appended
+  // in sorted order, then SortSparseElements should be called before calling
+  // other methods.  This literal must have a sparse layout.
   template <typename NativeT>
-  tensorflow::gtl::MutableArraySlice<NativeT> GetMutableArraySlice() {
-    static_assert(!std::is_same<NativeT, NativeT>::value,
-                  "Cannot map native type to primitive type.");
-  }
+  void AppendSparseElement(tensorflow::gtl::ArraySlice<int64> multi_index,
+                           NativeT value, const ShapeIndex& shape_index = {});
+
+  // Sorts the elements in a sparse array.
+  void SortSparseElements(const ShapeIndex& shape_index = {});
 
   // Returns the element value at index (0, ..., 0), however many zeroes are
   // required for that index.
@@ -425,10 +453,16 @@ class Literal {
 
   // As Get(), but determines the correct type and converts the value
   // into text.
-  string GetAsString(tensorflow::gtl::ArraySlice<int64> multi_index) const;
+  string GetAsString(tensorflow::gtl::ArraySlice<int64> multi_index,
+                     const ShapeIndex& shape_index = {}) const;
+
+  // As GetSparseElement(), but determines the correct type and converts the
+  // value into text.
+  string GetSparseElementAsString(int64 sparse_element_number,
+                                  const ShapeIndex& shape_index = {}) const;
 
   // As Get(), but determines the correct type and converts the value into
-  // int64.
+  // int64.  This literal must be an array.
   StatusOr<int64> GetIntegralAsS64(
       tensorflow::gtl::ArraySlice<int64> multi_index) const;
 
@@ -436,7 +470,8 @@ class Literal {
   template <typename NativeT>
   static std::unique_ptr<Literal> MakeIdentityR2(int64 size);
 
-  // Returns a tuple literal composed of given literals.
+  // Returns a tuple literal composed of given literals. Data is copied from the
+  // given elements into the returned literal.
   static std::unique_ptr<Literal> MakeTuple(
       tensorflow::gtl::ArraySlice<const Literal*> elements);
 
@@ -450,10 +485,6 @@ class Literal {
   static std::unique_ptr<Literal> MakeTupleOwned(
       std::vector<std::unique_ptr<Literal>> elements);
 
-  // Validates that the data payload of the literal matches the literal shape;
-  // if it does not, an appropriate status is returned.
-  tensorflow::Status ValidateLiteral() const;
-
   // Returns a string representation of the literal value.
   string ToString(bool print_layout = false) const;
 
@@ -464,6 +495,8 @@ class Literal {
   // This function is useful if you want a polymorphic representation
   // of the tensor's elements (turning it to a string for something
   // like representation in a protobuf).
+  //
+  // This literal must have a dense layout.
   void EachCellAsString(
       const std::function<void(tensorflow::gtl::ArraySlice<int64> indices,
                                const string& value)>& per_cell) const;
@@ -472,80 +505,45 @@ class Literal {
                                    NativeT value)>
                     per_cell) const;
 
-  // Templated methods which populate the given repeated field in this literal
-  // with the given value(s). The Shape field of this literal is set
-  // to match the array dimensions and type. Examples:
+  // Populate this literal with the given values. Examples:
   //
   //   // Populate with floats.
   //   Array2D<float> float_values = ...
   //   literal.PopulateR2FromArray2D(values);
   //
   //   // Populate with int32s.
-  //   literal.PopulateR2({{1, 2}, {3, 4}});
+  //   literal.PopulateR2<int32>({{1, 2}, {3, 4}});
   //
-  template <typename NativeT>
-  void PopulateR0(NativeT values);
+  // The shape and element type of this literal must match given values. For
+  // example, in the call above to literal.PopulateR2(), 'literal' must be a 2x2
+  // array of S32.
   template <typename NativeT>
   void PopulateR1(tensorflow::gtl::ArraySlice<NativeT> values);
   void PopulateR1(const tensorflow::core::Bitmap& values);
   template <typename NativeT>
   void PopulateR2(std::initializer_list<std::initializer_list<NativeT>> values);
   template <typename NativeT>
-  void PopulateR2WithLayout(
-      std::initializer_list<std::initializer_list<NativeT>> values,
-      const Layout& layout);
-  template <typename NativeT>
   void PopulateFromArray(const Array<NativeT>& values);
   template <typename NativeT>
-  void PopulateFromArrayWithLayout(const Array<NativeT>& values,
-                                   const Layout& layout);
-  template <typename NativeT>
   void PopulateR2FromArray2D(const Array2D<NativeT>& values);
   template <typename NativeT>
-  void PopulateR2FromArray2DWithLayout(const Array2D<NativeT>& values,
-                                       const Layout& layout);
-  template <typename NativeT>
   void PopulateR3FromArray3D(const Array3D<NativeT>& values);
   template <typename NativeT>
-  void PopulateR3FromArray3DWithLayout(const Array3D<NativeT>& values,
-                                       const Layout& layout);
-  template <typename NativeT>
   void PopulateR4FromArray4D(const Array4D<NativeT>& values);
-  template <typename NativeT>
-  void PopulateR4FromArray4DWithLayout(const Array4D<NativeT>& values,
-                                       const Layout& layout);
 
   // Populates literal values by calling the generator function for every cell
   // in this literal object.
   //
   // generator must be a callable of the type
   // NativeT(tensorflow::gtl::ArraySlice<int64> indexes) or compatible.
+  //
+  // This literal must have a dense layout.
   template <typename NativeT, typename FnType>
   Status Populate(const FnType& generator);
 
-  // Creates a Literal of the given dimensions with all elements set to the
-  // given value.
-  template <typename NativeT>
-  void PopulateWithValue(NativeT value,
-                         tensorflow::gtl::ArraySlice<int64> dimensions);
-
-  // Returns a pointer to the underlying vector corresponding to the Literal's
-  // shape.
-  const void* InternalData() const;
-  void* MutableInternalData();
-
-  // Allocates space in the underlying vector of this literal sufficient to hold
-  // num_elements of this literal's primitive type. Values in the vector are set
-  // to zero. num_elements must equal the number of elements in the literal's
-  // shape.
-  void Reserve(int64 num_elements);
-
-  // Allocates space in the underlying vector of this literal sufficient to hold
-  // num_elements of this literal's primitive type and sets each element in this
-  // literal to the given value. num_elements must equal the number of elements
-  // in this literal's shape.
+  // Fills this literal with the given value.
   template <typename NativeT>
-  void Resize(int64 num_elements, NativeT value);
+  void PopulateWithValue(NativeT value);
 
   // Returns whether every element in this literal is equal to value.
   //
@@ -555,7 +553,7 @@ class Literal {
   //
   // If value doesn't fit in this literal's type, returns false.  Values of 1/0
   // are considered equal to true/false; other values are not considered equal
-  // to true.
+  // to true. Also if this literal is not array-shaped false is returned.
   bool IsAll(int8 value) const;
 
   // Like IsAll(const Literal&, int8), except we check whether the literal is
@@ -566,7 +564,7 @@ class Literal {
   // This casts value to the type of literal, then compares using ==.  The usual
   // admonishments about floating-point equality checks apply.  We expect you to
   // use this to check for values that can be expressed precisely as a float,
-  // e.g. -0.5.
+  // e.g. -0.5.  Also if this literal is not array-shaped false is returned.
   bool IsAllFloat(float value) const;
 
   // Like IsAll(const Literal&, int8), except we check whether the literal is
@@ -578,23 +576,38 @@ class Literal {
   // admonishments about floating-point equality checks apply.  We expect you to
   // use this to check for complex values that can be expressed precisely as
   // float pairs e.g. (-0.5, 1.0).
+  //
+  // This literal must have a dense layout.
   bool IsAllComplex(complex64 value) const;
 
   // Returns whether this literal is zero at the specified index. This literal
-  // must be an array.
+  // must be an array with a dense layout.
   bool IsZero(tensorflow::gtl::ArraySlice<int64> indices) const;
 
- private:
-  // Copy from a LiteralProto instance.
-  void CopyFromProto(const LiteralProto& literal_proto);
+  // Return the count of the elements in the array at the given shape index in
+  // this literal.
+  int64 element_count(const ShapeIndex& index = {}) const {
+    return ShapeUtil::ElementsIn(ShapeUtil::GetSubshape(shape(), index));
+  }
+
+  // Return the count of the elements in the sparse array at the given shape
+  // index in this literal, which will be no larger than
+  // LayoutUtil::MaxSparseElements(SetSubshape(shape(), index).layout()).
+  int64 sparse_element_count() const;
+
+ protected:
+  // 'allocate_arrays' indicates whether to allocate memory for the arrays in
+  // the shape. If false, buffer pointers inside of the Literal::Pieces are set
+  // to nullptr.
+  Literal(const Shape& shape, bool allocate_arrays);
 
-  // Internal template helper for the Copy() API, matching its arguments one by
-  // one.
-  template <typename T>
-  Status CopyRange(const Literal& src_literal,
-                   tensorflow::gtl::ArraySlice<int64> src_base,
-                   tensorflow::gtl::ArraySlice<int64> dest_base,
-                   tensorflow::gtl::ArraySlice<int64> copy_size);
+  // Internal template helper for the Literal::CopySliceFrom(), matching its
+  // arguments one by one.
+  template <typename NativeT>
+  Status CopySliceFromInternal(const Literal& src_literal,
+                               tensorflow::gtl::ArraySlice<int64> src_base,
+                               tensorflow::gtl::ArraySlice<int64> dest_base,
+                               tensorflow::gtl::ArraySlice<int64> copy_size);
 
   // Utility structure which is used to create the optimal configuration for
   // a ShapeUtil::ForEachIndex() scan across two literals.
@@ -619,163 +632,243 @@ class Literal {
     int64 minor_loop_size = 1;
   };
 
-  Shape shape_;
-  std::vector<uint8> u8s_;
-  std::vector<int16> s16s_;
-  std::vector<int32> s32s_;
-  std::vector<int64> s64s_;
-  std::vector<uint16> u16s_;
-  std::vector<uint32> u32s_;
-  std::vector<uint64> u64s_;
-  std::vector<bfloat16> bf16s_;
-  std::vector<half> f16s_;
-  std::vector<float> f32s_;
-  std::vector<double> f64s_;
-  std::vector<complex64> c64s_;
-  std::vector<Literal> tuple_literals_;
-};
-
-std::ostream& operator<<(std::ostream& out, const Literal& literal);
-
-// Declarations of template specializations for GetArraySlice and
-// GetMutableArraySlice. The specializations map native type to XLA primitive
-// type.
-template <>
-tensorflow::gtl::ArraySlice<bool> Literal::GetArraySlice<bool>() const;
-
-template <>
-tensorflow::gtl::ArraySlice<uint8> Literal::GetArraySlice<uint8>() const;
-
-template <>
-tensorflow::gtl::ArraySlice<int8> Literal::GetArraySlice<int8>() const;
-
-template <>
-tensorflow::gtl::ArraySlice<uint16> Literal::GetArraySlice<uint16>() const;
+  // A data structure representing a subshape at a particular ShapeIndex within
+  // the literal. For array-shaped ShapeIndexes, this data structure holds the
+  // pointer to the memory allocated for the array data.
+  class Piece {
+   public:
+    // Return the buffer holding the array data for this piece as an array
+    // slice. This piece must be array-shaped.
+    template <typename NativeT>
+    tensorflow::gtl::ArraySlice<NativeT> data() const;
+    template <typename NativeT>
+    tensorflow::gtl::MutableArraySlice<NativeT> data();
+
+    // Return the buffer holding the array data for this piece as a void*. This
+    // piece must be array-shaped.
+    void* untyped_data();
+    const void* untyped_data() const;
+
+    // Gets or sets an element in the array at the given index. The multi_index
+    // is CHECKed against the dimension sizes of the array.  This piece must be
+    // array-shaped.
+    template <typename NativeT>
+    NativeT Get(tensorflow::gtl::ArraySlice<int64> index) const;
+    template <typename NativeT>
+    void Set(tensorflow::gtl::ArraySlice<int64> index, NativeT value);
+
+    // Gets/sets the buffer holding the array data.
+    char* buffer() const { return buffer_; }
+    void set_buffer(char* buffer) { buffer_ = buffer; }
+
+    // The array of multi-indices that provide the locations of non-zero
+    // elements in a sparse array.  Only used if
+    // LayoutUtil::IsSparseArray(shape()) is true.
+    SparseIndexArray* sparse_indices() const { return sparse_indices_; }
+    void set_sparse_indices(SparseIndexArray* sparse_indices) {
+      sparse_indices_ = sparse_indices;
+    }
 
-template <>
-tensorflow::gtl::ArraySlice<int16> Literal::GetArraySlice<int16>() const;
+    // Gets or sets the subshape of this piece. This reference points to a
+    // subshape within the shape in the containing Literal (Literal::shape_).
+    const Shape& subshape() const { return *subshape_; }
+    void set_subshape(const Shape* subshape) { subshape_ = subshape; }
 
-template <>
-tensorflow::gtl::ArraySlice<uint32> Literal::GetArraySlice<uint32>() const;
+    // Returns the size in bytes of the buffer holding the array data.
+    int64 size_bytes() const { return ShapeUtil::ByteSizeOf(subshape()); }
 
-template <>
-tensorflow::gtl::ArraySlice<uint64> Literal::GetArraySlice<uint64>() const;
+    // Returns the number of elements in this piece's array.
+    int64 element_count() const { return ShapeUtil::ElementsIn(subshape()); }
 
-template <>
-tensorflow::gtl::ArraySlice<int32> Literal::GetArraySlice<int32>() const;
+    // Copy the data from 'src' into this piece's buffer. Shapes of this piece
+    // and src must be compatible.
+    Status CopyFrom(const Piece& src);
 
-template <>
-tensorflow::gtl::ArraySlice<int64> Literal::GetArraySlice<int64>() const;
+    // Returns true if this piece and 'other' contain the same data. This piece
+    // and 'other' must be array-shaped and compatible.
+    bool EqualElements(const Piece& other) const;
 
-template <>
-inline tensorflow::gtl::ArraySlice<float> Literal::GetArraySlice<float>()
-    const {
-  DCHECK(shape().element_type() == F32);
-  return f32s();
-}
+    // Writes the shape and data (if array-shaped) into the given proto.
+    void WriteToProto(LiteralProto* proto) const;
 
-template <>
-tensorflow::gtl::ArraySlice<double> Literal::GetArraySlice<double>() const;
+    // Copies the data from the given proto into this piece. The shape of this
+    // piece must be equal (not just compatible) to the shape of the proto.
+    Status CopyFromProto(const LiteralProto& proto);
 
-template <>
-tensorflow::gtl::ArraySlice<half> Literal::GetArraySlice<half>() const;
+    // Sorts the elements in a sparse array.
+    void SortSparseElements();
 
-template <>
-tensorflow::gtl::ArraySlice<bfloat16> Literal::GetArraySlice<bfloat16>() const;
+   private:
+    // Recursive helper for EqualElements.
+    template <typename NativeT>
+    bool EqualElementsInternal(const Piece& other,
+                               std::vector<int64>* multi_index) const;
 
-template <>
-tensorflow::gtl::ArraySlice<complex64> Literal::GetArraySlice<complex64>()
-    const;
+    // Helper for SortSparseElements that has the element type as a template
+    // parameter.
+    template <typename NativeT>
+    void SortSparseElementsInternal();
 
-template <>
-tensorflow::gtl::MutableArraySlice<bool> Literal::GetMutableArraySlice();
+    // For array-shaped pieces, this is the buffer holding the literal data.
+    char* buffer_ = nullptr;
 
-template <>
-tensorflow::gtl::MutableArraySlice<int8> Literal::GetMutableArraySlice();
+    // For sparse arrays, this is the array of indices.
+    SparseIndexArray* sparse_indices_ = nullptr;
 
-template <>
-tensorflow::gtl::MutableArraySlice<uint8> Literal::GetMutableArraySlice();
-
-template <>
-tensorflow::gtl::MutableArraySlice<int16> Literal::GetMutableArraySlice();
+    // The shape of piece. This points into the shape of the containing Literal
+    // (Literal::shape_).
+    const Shape* subshape_ = nullptr;
+  };
 
-template <>
-tensorflow::gtl::MutableArraySlice<uint16> Literal::GetMutableArraySlice();
+  // Returns the piece at the given ShapeIndex.
+  Piece& piece(const ShapeIndex& shape_index) {
+    return *pieces_.mutable_element(shape_index);
+  }
+  const Piece& piece(const ShapeIndex& shape_index) const {
+    return pieces_.element(shape_index);
+  }
 
-template <>
-tensorflow::gtl::MutableArraySlice<int32> Literal::GetMutableArraySlice();
+  // Returns the piece at the root of the shape (empty ShapeIndex).
+  Piece& root_piece() { return piece({}); }
+  const Piece& root_piece() const { return piece({}); }
 
-template <>
-tensorflow::gtl::MutableArraySlice<uint32> Literal::GetMutableArraySlice();
+  // Deallocate the buffers held by this literal (if the literal owns the
+  // buffer).
+  void DeallocateBuffers();
 
-template <>
-tensorflow::gtl::MutableArraySlice<int64> Literal::GetMutableArraySlice();
+  Shape shape_;
+  ShapeTree<Piece> pieces_;
 
-template <>
-tensorflow::gtl::MutableArraySlice<uint64> Literal::GetMutableArraySlice();
+  // Whether the buffers held in pieces_ are owned by this Literal.
+  bool owns_buffers_;
 
-template <>
-tensorflow::gtl::MutableArraySlice<float> Literal::GetMutableArraySlice();
+  // LiteralView must access and manipulate Pieces of other Literals.
+  friend class LiteralView;
+};  // namespace xla
 
-template <>
-tensorflow::gtl::MutableArraySlice<double> Literal::GetMutableArraySlice();
+std::ostream& operator<<(std::ostream& out, const Literal& literal);
 
-template <>
-tensorflow::gtl::MutableArraySlice<half> Literal::GetMutableArraySlice();
+// A read-only view of a Literal. A LiteralView contains pointers to buffers
+// owned by the viewed Literal.
+//
+// TODO(b/71550060): Replace LiteralView with Literal slice classes (immutable
+// and mutable) similar to (Mutable)ArraySlice.
+class LiteralView : public Literal {
+ public:
+  // Create and return a view of the given literal rooted at the given shape
+  // index within the given literal. A factory is used rather than a public
+  // constructor because only const LiteralViews are supported. It's still
+  // possible to create non-const LiteralViews via the copy constructors, but
+  // the factory method makes it a bit less likely. Implementing literal slices
+  // will fix this undesirable situation (b/71550060).
+  static const LiteralView Create(const Literal& literal,
+                                  const ShapeIndex& view_root = {});
 
-template <>
-tensorflow::gtl::MutableArraySlice<bfloat16> Literal::GetMutableArraySlice();
+  LiteralView(const LiteralView& other);
+  LiteralView& operator=(const LiteralView& other);
 
-template <>
-tensorflow::gtl::MutableArraySlice<complex64> Literal::GetMutableArraySlice();
+  virtual ~LiteralView();
 
-template <>
-void Literal::Resize<bool>(int64 num_elements, bool value);
+ private:
+  LiteralView(const Literal& literal, const ShapeIndex& view_root);
 
-template <>
-void Literal::Resize<int8>(int64 num_elements, int8 value);
+  // Helper for the copy constructor and copy assignment operator.
+  void CopyFrom(const LiteralView& other);
+};
 
-template <>
-void Literal::Resize<uint8>(int64 num_elements, uint8 value);
+template <typename NativeT>
+tensorflow::gtl::ArraySlice<NativeT> Literal::Piece::data() const {
+  CHECK(ShapeUtil::IsArray(subshape())) << ShapeUtil::HumanString(subshape());
+  CHECK_EQ(subshape().element_type(),
+           primitive_util::NativeToPrimitiveType<NativeT>())
+      << "Attempting to access "
+      << PrimitiveType_Name(primitive_util::NativeToPrimitiveType<NativeT>())
+      << " type, but literal element type is "
+      << PrimitiveType_Name(subshape().element_type());
+  return tensorflow::gtl::ArraySlice<NativeT>(
+      reinterpret_cast<const NativeT*>(buffer()),
+      ShapeUtil::ElementsIn(subshape()));
+}
 
-template <>
-void Literal::Resize<int32>(int64 num_elements, int32 value);
+template <typename NativeT>
+tensorflow::gtl::MutableArraySlice<NativeT> Literal::Piece::data() {
+  CHECK(ShapeUtil::IsArray(subshape())) << ShapeUtil::HumanString(subshape());
+  CHECK_EQ(subshape().element_type(),
+           primitive_util::NativeToPrimitiveType<NativeT>())
+      << "Attempting to access "
+      << PrimitiveType_Name(primitive_util::NativeToPrimitiveType<NativeT>())
+      << " type, but literal element type is "
+      << PrimitiveType_Name(subshape().element_type());
+  return tensorflow::gtl::MutableArraySlice<NativeT>(
+      reinterpret_cast<NativeT*>(buffer()), ShapeUtil::ElementsIn(subshape()));
+}
 
-template <>
-void Literal::Resize<uint32>(int64 num_elements, uint32 value);
+template <typename NativeT>
+NativeT Literal::Piece::Get(
+    tensorflow::gtl::ArraySlice<int64> multi_index) const {
+  CHECK(LayoutUtil::IsDenseArray(subshape()));
+  return data<NativeT>()[IndexUtil::MultidimensionalIndexToLinearIndex(
+      subshape(), multi_index)];
+}
 
-template <>
-void Literal::Resize<int64>(int64 num_elements, int64 value);
+template <typename NativeT>
+void Literal::Piece::Set(tensorflow::gtl::ArraySlice<int64> multi_index,
+                         NativeT value) {
+  CHECK(LayoutUtil::IsDenseArray(subshape()));
+  data<NativeT>()[IndexUtil::MultidimensionalIndexToLinearIndex(
+      subshape(), multi_index)] = value;
+}
 
-template <>
-void Literal::Resize<uint64>(int64 num_elements, uint64 value);
+template <typename NativeT>
+tensorflow::gtl::ArraySlice<NativeT> Literal::data(
+    const ShapeIndex& shape_index) const {
+  return piece(shape_index).data<NativeT>();
+}
 
-template <>
-void Literal::Resize<float>(int64 num_elements, float value);
+template <typename NativeT>
+tensorflow::gtl::MutableArraySlice<NativeT> Literal::data(
+    const ShapeIndex& shape_index) {
+  return piece(shape_index).data<NativeT>();
+}
 
-template <>
-void Literal::Resize<double>(int64 num_elements, double value);
+template <typename NativeT>
+inline NativeT Literal::Get(tensorflow::gtl::ArraySlice<int64> multi_index,
+                            const ShapeIndex& shape_index) const {
+  return piece(shape_index).Get<NativeT>(multi_index);
+}
 
-template <>
-void Literal::Resize<half>(int64 num_elements, half value);
+template <typename NativeT>
+inline NativeT Literal::Get(
+    tensorflow::gtl::ArraySlice<int64> multi_index) const {
+  return root_piece().Get<NativeT>(multi_index);
+}
 
-template <>
-void Literal::Resize<bfloat16>(int64 num_elements, bfloat16 value);
+template <typename NativeT>
+inline void Literal::Set(tensorflow::gtl::ArraySlice<int64> multi_index,
+                         const ShapeIndex& shape_index, NativeT value) {
+  return piece(shape_index).Set<NativeT>(multi_index, value);
+}
 
-template <>
-void Literal::Resize<complex64>(int64 num_elements, complex64 value);
+template <typename NativeT>
+inline void Literal::Set(tensorflow::gtl::ArraySlice<int64> multi_index,
+                         NativeT value) {
+  return root_piece().Set<NativeT>(multi_index, value);
+}
 
 template <typename NativeT>
 /* static */ std::unique_ptr<Literal> Literal::CreateR0(NativeT value) {
-  auto literal = MakeUnique<Literal>();
-  literal->PopulateR0<NativeT>(value);
+  auto literal = MakeUnique<Literal>(ShapeUtil::MakeShape(
+      primitive_util::NativeToPrimitiveType<NativeT>(), {}));
+  literal->Set({}, value);
   return literal;
 }
 
 template <typename NativeT>
 /* static */ std::unique_ptr<Literal> Literal::CreateR1(
     tensorflow::gtl::ArraySlice<NativeT> values) {
-  auto literal = MakeUnique<Literal>();
+  auto literal = MakeUnique<Literal>(
+      ShapeUtil::MakeShape(primitive_util::NativeToPrimitiveType<NativeT>(),
+                           {static_cast<int64>(values.size())}));
   literal->PopulateR1(values);
   return literal;
 }
@@ -784,8 +877,12 @@ template <typename NativeT>
 /* static */ std::unique_ptr<Literal> Literal::CreateR2WithLayout(
     std::initializer_list<std::initializer_list<NativeT>> values,
     const Layout& layout) {
-  auto literal = MakeUnique<Literal>();
-  literal->PopulateR2WithLayout(values, layout);
+  auto literal = MakeUnique<Literal>(ShapeUtil::MakeShapeWithLayout(
+      primitive_util::NativeToPrimitiveType<NativeT>(),
+      {static_cast<int64>(values.size()),
+       static_cast<int64>(values.begin()->size())},
+      AsInt64Slice(layout.minor_to_major())));
+  literal->PopulateR2(values);
   return literal;
 }
 
@@ -858,6 +955,21 @@ template <typename NativeT>
   return CreateR4FromArray4DWithLayout(tmp, layout);
 }
 
+template <typename NativeT>
+/* static */ std::unique_ptr<Literal> Literal::CreateSparse(
+    tensorflow::gtl::ArraySlice<int64> dimensions, SparseIndexArray indices,
+    tensorflow::gtl::ArraySlice<NativeT> values, bool sort) {
+  int64 num_elements = values.size();
+  int64 rank = dimensions.size();
+  CHECK_EQ(num_elements, indices.index_count());
+  CHECK_EQ(rank, indices.rank());
+  auto literal = MakeUnique<Literal>(ShapeUtil::MakeShapeWithSparseLayout(
+      primitive_util::NativeToPrimitiveType<NativeT>(), dimensions,
+      indices.max_indices()));
+  literal->PopulateSparse(indices, values, sort);
+  return literal;
+}
+
 template <typename NativeT>
 /* static */ std::unique_ptr<Literal> Literal::CreateR4(
     std::initializer_list<std::initializer_list<
@@ -869,8 +981,10 @@ template <typename NativeT>
 template <typename NativeT>
 /* static */ std::unique_ptr<Literal> Literal::CreateFromArrayWithLayout(
     const Array<NativeT>& values, const Layout& layout) {
-  auto literal = MakeUnique<Literal>();
-  literal->PopulateFromArrayWithLayout(values, layout);
+  auto literal = MakeUnique<Literal>(ShapeUtil::MakeShapeWithLayout(
+      primitive_util::NativeToPrimitiveType<NativeT>(), values.dimensions(),
+      AsInt64Slice(layout.minor_to_major())));
+  literal->PopulateFromArray(values);
   return literal;
 }
 
@@ -970,81 +1084,33 @@ template <typename NativeT>
   return CreateFromArrayWithLayout(values, layout);
 }
 
-template <typename NativeT>
-NativeT Literal::Get(tensorflow::gtl::ArraySlice<int64> multi_index) const {
-  int64 linear_index = LinearIndex(multi_index);
-  return GetArraySlice<NativeT>().at(linear_index);
-}
-
 template <typename NativeT>
 NativeT Literal::GetFirstElement() const {
-  return GetArraySlice<NativeT>().at(0);
-}
-
-template <>
-inline uint8 Literal::Get<uint8>(
-    tensorflow::gtl::ArraySlice<int64> multi_index) const {
-  CHECK(shape().element_type() == U8);
-  int64 linear_index = LinearIndex(multi_index);
-  return u8s()[linear_index];
-}
-
-template <>
-inline int8 Literal::Get<int8>(
-    tensorflow::gtl::ArraySlice<int64> multi_index) const {
-  CHECK(shape().element_type() == S8);
-  int64 linear_index = LinearIndex(multi_index);
-  return u8s()[linear_index];
-}
-
-template <>
-inline half Literal::Get<half>(
-    tensorflow::gtl::ArraySlice<int64> multi_index) const {
-  CHECK(shape().element_type() == F16);
-  int64 linear_index = LinearIndex(multi_index);
-  return GetArraySlice<half>()[linear_index];
-}
-
-template <>
-inline bfloat16 Literal::Get<bfloat16>(
-    tensorflow::gtl::ArraySlice<int64> multi_index) const {
-  CHECK(shape().element_type() == BF16);
-  int64 linear_index = LinearIndex(multi_index);
-  return GetArraySlice<bfloat16>()[linear_index];
+  return data<NativeT>().at(0);
 }
 
 template <typename NativeT>
-void Literal::Set(tensorflow::gtl::ArraySlice<int64> multi_index,
-                  NativeT value) {
-  int64 linear_index = LinearIndex(multi_index);
-  GetMutableArraySlice<NativeT>().at(linear_index) = value;
-}
-
-template <>
-inline void Literal::Set(tensorflow::gtl::ArraySlice<int64> multi_index,
-                         uint8 value) {
-  int64 linear_index = LinearIndex(multi_index);
-  (*mutable_u8s())[linear_index] = value;
+NativeT Literal::GetSparseElement(int64 sparse_element_number,
+                                  const ShapeIndex& shape_index) const {
+  CHECK(
+      LayoutUtil::IsSparseArray(ShapeUtil::GetSubshape(shape(), shape_index)));
+  return data<NativeT>(shape_index)[sparse_element_number];
 }
 
-template <>
-inline void Literal::Set(tensorflow::gtl::ArraySlice<int64> multi_index,
-                         int8 value) {
-  return Set<uint8>(multi_index, value);
-}
-
-template <>
-inline void Literal::Set(tensorflow::gtl::ArraySlice<int64> multi_index,
-                         int64 value) {
-  int64 linear_index = LinearIndex(multi_index);
-  (*mutable_s64s())[linear_index] = value;
-}
-
-template <>
-/* static */ inline void Literal::Set(
-    tensorflow::gtl::ArraySlice<int64> multi_index, uint64 value) {
-  int64 linear_index = LinearIndex(multi_index);
-  (*mutable_u64s())[linear_index] = value;
+template <typename NativeT>
+void Literal::AppendSparseElement(
+    tensorflow::gtl::ArraySlice<int64> multi_index, NativeT value,
+    const ShapeIndex& shape_index) {
+  Piece& p = piece(shape_index);
+  const Shape& subshape = p.subshape();
+  CHECK(LayoutUtil::IsSparseArray(subshape));
+  int64 rank = ShapeUtil::Rank(subshape);
+  CHECK_EQ(multi_index.size(), rank);
+  int64 last_element = p.sparse_indices()->index_count();
+  CHECK_LT(last_element, LayoutUtil::MaxSparseElements(subshape.layout()));
+  p.sparse_indices()->Append(multi_index);
+  CHECK_LT(last_element, p.data<NativeT>().size());
+  p.data<NativeT>()[last_element] = value;
 }
 
 // Returns an identity matrix (rank 2) with the given row and column count.
@@ -1071,51 +1137,31 @@ void Literal::EachCell(
   } while (IndexUtil::BumpIndices(shape(), &indices));
 }
 
-template <typename NativeT>
-inline void Literal::PopulateR0(NativeT value) {
-  *mutable_shape() = ShapeUtil::MakeShape(
-      primitive_util::NativeToPrimitiveType<NativeT>(), {});
-  Resize<NativeT>(1, value);
-}
-
 template <typename NativeT>
 inline void Literal::PopulateR1(tensorflow::gtl::ArraySlice<NativeT> values) {
-  *mutable_shape() =
-      ShapeUtil::MakeShape(primitive_util::NativeToPrimitiveType<NativeT>(),
-                           {static_cast<int64>(values.size())});
-  Reserve(values.size());
+  CHECK(ShapeUtil::IsArray(shape()));
+  CHECK_EQ(ShapeUtil::Rank(shape()), 1);
+  CHECK_EQ(ShapeUtil::ElementsIn(shape()), values.size());
+  CHECK_EQ(shape().element_type(),
+           primitive_util::NativeToPrimitiveType<NativeT>());
   for (int64 i = 0; i < values.size(); ++i) {
     Set({i}, values[i]);
   }
 }
 
-inline void Literal::PopulateR1(const tensorflow::core::Bitmap& values) {
-  *mutable_shape() =
-      ShapeUtil::MakeShape(PRED, {static_cast<int64>(values.bits())});
-  Reserve(values.bits());
-  for (int64 i = 0; i < static_cast<int64>(values.bits()); ++i) {
-    Set({i}, values.get(i));
-  }
-}
-
 template <typename NativeT>
-void Literal::PopulateR2WithLayout(
-    std::initializer_list<std::initializer_list<NativeT>> values,
-    const Layout& layout) {
-  *mutable_shape() = ShapeUtil::MakeShapeWithLayout(
-      primitive_util::NativeToPrimitiveType<NativeT>(),
-      {static_cast<int64>(values.size()),
-       static_cast<int64>(values.begin()->size())},
-      AsInt64Slice(layout.minor_to_major()));
+void Literal::PopulateR2(
+    std::initializer_list<std::initializer_list<NativeT>> values) {
+  CHECK(ShapeUtil::IsArray(shape()));
+  CHECK_EQ(ShapeUtil::Rank(shape()), 2);
+  CHECK_EQ(shape().element_type(),
+           primitive_util::NativeToPrimitiveType<NativeT>());
 
   const int64 dim0_size = values.size();
   const int64 dim1_size = values.begin()->size();
   CHECK_EQ(dim0_size, shape().dimensions(0));
   CHECK_EQ(dim1_size, shape().dimensions(1));
 
-  const int64 num_elements = dim1_size * dim0_size;
-  Reserve(num_elements);
-
   int64 dim0 = 0;
   for (auto inner_list : values) {
     int64 dim1 = 0;
@@ -1129,69 +1175,65 @@ void Literal::PopulateR2WithLayout(
 }
 
 template <typename NativeT>
-void Literal::PopulateR2(
-    std::initializer_list<std::initializer_list<NativeT>> values) {
-  PopulateR2WithLayout(values, LayoutUtil::GetDefaultLayoutForR2());
-}
-
-template <typename NativeT>
-void Literal::PopulateFromArrayWithLayout(const Array<NativeT>& values,
-                                          const Layout& layout) {
-  *mutable_shape() = ShapeUtil::MakeShapeWithLayout(
-      primitive_util::NativeToPrimitiveType<NativeT>(), values.dimensions(),
-      AsInt64Slice(layout.minor_to_major()));
-  Reserve(values.num_elements());
+void Literal::PopulateFromArray(const Array<NativeT>& values) {
+  CHECK(ShapeUtil::IsArray(shape()));
+  CHECK_EQ(shape().element_type(),
+           primitive_util::NativeToPrimitiveType<NativeT>());
+  CHECK_EQ(ShapeUtil::Rank(shape()), values.num_dimensions());
+  for (int dim = 0; dim < values.num_dimensions(); ++dim) {
+    CHECK_EQ(values.dim(dim), shape().dimensions(dim));
+  }
   values.Each([this](tensorflow::gtl::ArraySlice<int64> indices,
                      NativeT value) { this->Set(indices, value); });
 }
 
-template <typename NativeT>
-void Literal::PopulateFromArray(const Array<NativeT>& values) {
-  PopulateFromArrayWithLayout(
-      values, LayoutUtil::GetDefaultLayoutForRank(values.num_dimensions()));
-}
-
-template <typename NativeT>
-void Literal::PopulateR2FromArray2DWithLayout(const Array2D<NativeT>& values,
-                                              const Layout& layout) {
-  PopulateFromArrayWithLayout(values, layout);
-}
-
 template <typename NativeT>
 void Literal::PopulateR2FromArray2D(const Array2D<NativeT>& values) {
   PopulateFromArray(values);
 }
 
-template <typename NativeT>
-void Literal::PopulateR3FromArray3DWithLayout(const Array3D<NativeT>& values,
-                                              const Layout& layout) {
-  PopulateFromArrayWithLayout(values, layout);
-}
-
 template <typename NativeT>
 void Literal::PopulateR3FromArray3D(const Array3D<NativeT>& values) {
   PopulateFromArray(values);
 }
 
 template <typename NativeT>
-void Literal::PopulateR4FromArray4DWithLayout(const Array4D<NativeT>& values,
-                                              const Layout& layout) {
-  PopulateFromArrayWithLayout(values, layout);
+void Literal::PopulateR4FromArray4D(const Array4D<NativeT>& values) {
+  PopulateFromArray(values);
 }
 
 template <typename NativeT>
-void Literal::PopulateR4FromArray4D(const Array4D<NativeT>& values) {
-  PopulateFromArray(values);
+void Literal::PopulateSparse(SparseIndexArray indices,
+                             tensorflow::gtl::ArraySlice<NativeT> values,
+                             bool sort) {
+  CHECK(LayoutUtil::IsSparseArray(shape()));
+  int rank = ShapeUtil::Rank(shape());
+  CHECK_EQ(indices.rank(), rank);
+  int64 max_elements = LayoutUtil::MaxSparseElements(shape().layout());
+  CHECK_LE(indices.max_indices(), max_elements);
+  int64 num_elements = values.size();
+  CHECK_LE(num_elements, max_elements);
+  CHECK_EQ(num_elements, indices.index_count());
+  auto root_data = root_piece().data<NativeT>();
+  root_data.remove_suffix(max_elements - values.size());
+  std::copy(values.begin(), values.end(), root_data.begin());
+  *this->root_piece().sparse_indices() = std::move(indices);
+  if (sort) {
+    auto root_data = this->root_piece().data<NativeT>();
+    root_data.remove_suffix(root_data.size() - num_elements);
+    this->root_piece().sparse_indices()->SortWithValues(root_data);
+  }
+  DCHECK(this->root_piece().sparse_indices()->Validate(shape()));
 }
 
 template <typename NativeT, typename FnType>
 Status Literal::Populate(const FnType& generator) {
   const Shape& this_shape = shape();
   const int64 rank = ShapeUtil::Rank(this_shape);
+  TF_RET_CHECK(LayoutUtil::IsDenseArray(this_shape));
   TF_RET_CHECK(this_shape.element_type() ==
                primitive_util::NativeToPrimitiveType<NativeT>());
-  tensorflow::gtl::MutableArraySlice<NativeT> data =
-      GetMutableArraySlice<NativeT>();
+  tensorflow::gtl::MutableArraySlice<NativeT> literal_data = data<NativeT>();
   if (rank > 0) {
     StrideConfig stride_config(this_shape, this_shape,
                                AsInt64Slice(this_shape.dimensions()));
@@ -1200,11 +1242,12 @@ Status Literal::Populate(const FnType& generator) {
         ShapeUtil::GetDimension(this_shape, stride_config.minor_dimension);
 
     auto init_function = [&](const std::vector<int64>& indexes) {
-      const int64 index = LinearIndex(indexes);
+      const int64 index =
+          IndexUtil::MultidimensionalIndexToLinearIndex(shape(), indexes);
       std::copy(indexes.begin(), indexes.end(), minor_scan_indexes.begin());
       for (int64 i = 0; i < minor_dimension_size; ++i) {
         minor_scan_indexes[stride_config.minor_dimension] = i;
-        data.at(index + i) = generator(minor_scan_indexes);
+        literal_data.at(index + i) = generator(minor_scan_indexes);
       }
       return true;
     };
@@ -1213,32 +1256,27 @@ Status Literal::Populate(const FnType& generator) {
                             init_function);
   } else {
     // For scalars.
-    data.at(0) = generator({});
+    literal_data.at(0) = generator({});
   }
   return Status::OK();
 }
 
 template <typename NativeT>
-void Literal::PopulateWithValue(NativeT value,
-                                tensorflow::gtl::ArraySlice<int64> dimensions) {
-  *mutable_shape() = ShapeUtil::MakeShape(
-      primitive_util::NativeToPrimitiveType<NativeT>(), dimensions);
-  Resize<NativeT>(ShapeUtil::ElementsIn(shape()), value);
+void Literal::PopulateWithValue(NativeT value) {
+  CHECK(ShapeUtil::IsArray(shape()));
+  CHECK_EQ(shape().element_type(),
+           primitive_util::NativeToPrimitiveType<NativeT>());
+  for (NativeT& element : data<NativeT>()) {
+    element = value;
+  }
 }
 
 template <typename NativeT>
-/* static */ std::unique_ptr<Literal>
-Literal::CreateFullWithMonotonicDim0MajorLayout(
+/* static */ std::unique_ptr<Literal> Literal::CreateFullWithDescendingLayout(
     tensorflow::gtl::ArraySlice<int64> dimensions, NativeT value) {
-  Shape this_shape = ShapeUtil::MakeShapeWithMonotonicDim0MajorLayout(
-      primitive_util::NativeToPrimitiveType<NativeT>(), dimensions);
-  auto literal = MakeUnique<Literal>();
-  *literal->mutable_shape() = this_shape;
-  literal->Reserve(ShapeUtil::ElementsIn(this_shape));
-  std::vector<int64> index(dimensions.size(), 0);
-  do {
-    literal->Set(index, value);
-  } while (IndexUtil::BumpIndices(this_shape, &index));
+  auto literal = MakeUnique<Literal>(ShapeUtil::MakeShapeWithDescendingLayout(
+      primitive_util::NativeToPrimitiveType<NativeT>(), dimensions));
+  literal->PopulateWithValue(value);
   return literal;
 }
 
@@ -1249,14 +1287,12 @@ std::unique_ptr<Literal> Literal::Replicate(int64 times) const {
   for (int64 bound : shape().dimensions()) {
     bounds.push_back(bound);
   }
-  auto literal = MakeUnique<Literal>();
-  *literal->mutable_shape() =
-      ShapeUtil::MakeShape(shape().element_type(), bounds);
+  auto literal =
+      MakeUnique<Literal>(ShapeUtil::MakeShape(shape().element_type(), bounds));
   int64 elements = ShapeUtil::ElementsIn(literal->shape());
   if (elements == 0) {
     return literal;
   }
-  literal->Reserve(elements);
 
   DimensionVector output_indices(bounds.size(), 0);
   tensorflow::gtl::ArraySlice<int64> input_indices = output_indices;
diff --git a/tensorflow/compiler/xla/literal_util_test.cc b/tensorflow/compiler/xla/literal_util_test.cc
index 816bb3c549eaae4e8fc2b7d438627266603272f9..b3583c2eb75de8297d5e7507430491f119bd4462 100644
--- a/tensorflow/compiler/xla/literal_util_test.cc
+++ b/tensorflow/compiler/xla/literal_util_test.cc
@@ -31,6 +31,7 @@ namespace xla {
 namespace {
 
 using ::testing::ElementsAre;
+using ::testing::HasSubstr;
 
 class LiteralUtilTest : public ::testing::Test {
  protected:
@@ -192,6 +193,34 @@ TEST_F(LiteralUtilTest, CreateR3FromArray3d) {
   ASSERT_EQ(expected, result);
 }
 
+TEST_F(LiteralUtilTest, CreateSparse) {
+  std::vector<int64> dimensions = {8, 8, 8};
+  Array2D<int64> indices = {
+      {3, 4, 5},
+      {1, 2, 3},
+      {2, 3, 4},
+      {3, 5, 6},
+  };
+  std::vector<int64> values = {7, 8, 9, 10};
+  auto literal = Literal::CreateSparse<int64>(
+      dimensions, SparseIndexArray(indices.n1() + 3, indices), values);
+
+  Array2D<int64> expected_indices = {
+      {1, 2, 3},
+      {2, 3, 4},
+      {3, 4, 5},
+      {3, 5, 6},
+  };
+  std::vector<int64> expected_values = {8, 9, 7, 10};
+
+  EXPECT_EQ(literal->sparse_indices()->data(),
+            tensorflow::gtl::ArraySlice<int64>(
+                expected_indices.data(), expected_indices.num_elements()));
+  EXPECT_EQ(tensorflow::gtl::ArraySlice<int64>(literal->data<int64>().data(),
+                                               expected_values.size()),
+            tensorflow::gtl::ArraySlice<int64>(expected_values));
+}
+
 TEST_F(LiteralUtilTest, LiteralR4F32ProjectedStringifies) {
   // clang-format off
   auto literal = Literal::CreateR4Projected<float>({
@@ -293,29 +322,28 @@ TEST_F(LiteralUtilTest, NonScalarEquality) {
   auto matrix_different = Literal::CreateR2<float>({{4.0, 3.0}, {1.0, 2.0}});
   auto vector_literal = Literal::CreateR1<float>({1.0, 2.0, 3.0, 4.0});
   auto scalar = Literal::CreateR0<float>(1.0);
+  Literal nil(ShapeUtil::MakeNil());
 
   EXPECT_EQ(*matrix, *matrix);
   EXPECT_EQ(*matrix, *matrix_clone);
   EXPECT_NE(*matrix, *matrix_different);
   EXPECT_NE(*matrix, *vector_literal);
   EXPECT_NE(*matrix, *scalar);
+  EXPECT_NE(*matrix, nil);
+  EXPECT_EQ(nil, nil);
 }
 
 TEST_F(LiteralUtilTest, DifferentLayoutEquality) {
   // Test equality with literals which have different layouts.
-  auto colmajor = MakeUnique<Literal>();
-  *colmajor->mutable_shape() = ShapeUtil::MakeShape(F32, {2, 2});
-  *colmajor->mutable_shape()->mutable_layout() = LayoutUtil::MakeLayout({0, 1});
-  colmajor->Reserve(4);
+  auto colmajor =
+      MakeUnique<Literal>(ShapeUtil::MakeShapeWithLayout(F32, {2, 2}, {0, 1}));
   colmajor->Set<float>({0, 0}, 1.0);
   colmajor->Set<float>({0, 1}, 2.0);
   colmajor->Set<float>({1, 0}, 3.0);
   colmajor->Set<float>({1, 1}, 4.0);
 
-  auto rowmajor = MakeUnique<Literal>();
-  *rowmajor->mutable_shape() = ShapeUtil::MakeShape(F32, {2, 2});
-  *rowmajor->mutable_shape()->mutable_layout() = LayoutUtil::MakeLayout({1, 0});
-  rowmajor->Reserve(4);
+  auto rowmajor =
+      MakeUnique<Literal>(ShapeUtil::MakeShapeWithLayout(F32, {2, 2}, {1, 0}));
   rowmajor->Set<float>({0, 0}, 1.0);
   rowmajor->Set<float>({0, 1}, 2.0);
   rowmajor->Set<float>({1, 0}, 3.0);
@@ -515,7 +543,7 @@ TYPED_TEST(LiteralUtilTestTemplated, Relayout2x2) {
 
 TEST_F(LiteralUtilTest, ReshapeR0) {
   auto original = Literal::CreateR0<float>(1.7f);
-  auto reshape = original->Reshape(/*shape=*/{}).ConsumeValueOrDie();
+  auto reshape = original->Reshape(/*dimensions=*/{}).ConsumeValueOrDie();
   EXPECT_EQ(*original, *reshape);
 }
 
@@ -597,24 +625,26 @@ TEST_F(LiteralUtilTest, TestR4RelayoutEquivalence) {
 
 TEST_F(LiteralUtilTest, TestR2LinearLayout) {
   // Test expected memory layout of R2 dim0-minor (column-major) literal.
-  auto mat_dim0minor = Literal::CreateR2WithLayout<int>({{1, 2, 3}, {4, 5, 6}},
-                                                        layout_r2_dim0minor_);
-  EXPECT_EQ(mat_dim0minor->s32s_size(), 6);
-  EXPECT_THAT(mat_dim0minor->s32s(), ElementsAre(1, 4, 2, 5, 3, 6));
+  auto mat_dim0minor = Literal::CreateR2WithLayout<int32>(
+      {{1, 2, 3}, {4, 5, 6}}, layout_r2_dim0minor_);
+  EXPECT_EQ(mat_dim0minor->element_count(), 6);
+  EXPECT_THAT(mat_dim0minor->data<int32>(), ElementsAre(1, 4, 2, 5, 3, 6));
 
   // Test expected memory layout when using Relayout to row major.
   auto relaid_mat_to_dim0major = mat_dim0minor->Relayout(layout_r2_dim0major_);
-  EXPECT_THAT(relaid_mat_to_dim0major->s32s(), ElementsAre(1, 2, 3, 4, 5, 6));
+  EXPECT_THAT(relaid_mat_to_dim0major->data<int32>(),
+              ElementsAre(1, 2, 3, 4, 5, 6));
 
   // Test expected memory layout of R2 created with dim0-major (row-major).
-  auto mat_dim0major = Literal::CreateR2WithLayout<int>({{1, 2, 3}, {4, 5, 6}},
-                                                        layout_r2_dim0major_);
-  EXPECT_EQ(mat_dim0major->s32s_size(), 6);
-  EXPECT_THAT(mat_dim0major->s32s(), ElementsAre(1, 2, 3, 4, 5, 6));
+  auto mat_dim0major = Literal::CreateR2WithLayout<int32>(
+      {{1, 2, 3}, {4, 5, 6}}, layout_r2_dim0major_);
+  EXPECT_EQ(mat_dim0major->element_count(), 6);
+  EXPECT_THAT(mat_dim0major->data<int32>(), ElementsAre(1, 2, 3, 4, 5, 6));
 
   // Test expected memory layout when using Relayout to column major.
   auto relaid_mat_to_dim0minor = mat_dim0major->Relayout(layout_r2_dim0minor_);
-  EXPECT_THAT(relaid_mat_to_dim0minor->s32s(), ElementsAre(1, 4, 2, 5, 3, 6));
+  EXPECT_THAT(relaid_mat_to_dim0minor->data<int32>(),
+              ElementsAre(1, 4, 2, 5, 3, 6));
 }
 
 TEST_F(LiteralUtilTest, TestR3LinearLayout) {
@@ -634,27 +664,27 @@ TEST_F(LiteralUtilTest, TestR3LinearLayout) {
   auto lit_dim0minor =
       Literal::CreateR3FromArray3DWithLayout<int>(arr3d, layout_r3_dim0minor_);
 
-  EXPECT_EQ(lit_dim0minor->s32s_size(), 12);
+  EXPECT_EQ(lit_dim0minor->element_count(), 12);
   std::vector<int> expected_dim0minor{1, 7, 4, 10, 2, 8, 5, 11, 3, 9, 6, 12};
-  EXPECT_THAT(lit_dim0minor->s32s(),
+  EXPECT_THAT(lit_dim0minor->data<int32>(),
               testing::ElementsAreArray(expected_dim0minor));
 
   // Test expected memory layout when using Relayout to row major.
   auto relaid_lit_to_dim0major = lit_dim0minor->Relayout(layout_r3_dim0major_);
   std::vector<int> expected_dim0major{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
-  EXPECT_THAT(relaid_lit_to_dim0major->s32s(),
+  EXPECT_THAT(relaid_lit_to_dim0major->data<int32>(),
               testing::ElementsAreArray(expected_dim0major));
 
   // Test expected memory layout of R3 created with dim0-major (row-major).
   auto lit_dim0major =
       Literal::CreateR3FromArray3DWithLayout<int>(arr3d, layout_r3_dim0major_);
-  EXPECT_EQ(lit_dim0major->s32s_size(), 12);
-  EXPECT_THAT(lit_dim0major->s32s(),
+  EXPECT_EQ(lit_dim0major->element_count(), 12);
+  EXPECT_THAT(lit_dim0major->data<int32>(),
               testing::ElementsAreArray(expected_dim0major));
 
   // Test expected memory layout when using Relayout to column major.
   auto relaid_lit_to_dim0minor = lit_dim0major->Relayout(layout_r3_dim0minor_);
-  EXPECT_THAT(relaid_lit_to_dim0minor->s32s(),
+  EXPECT_THAT(relaid_lit_to_dim0minor->data<int32>(),
               testing::ElementsAreArray(expected_dim0minor));
 }
 
@@ -687,28 +717,28 @@ TEST_F(LiteralUtilTest, SliceR3U32Full) {
 }
 
 TEST_F(LiteralUtilTest, PopulateR1S64) {
-  Literal output;
+  Literal output(ShapeUtil::MakeShape(S64, {1}));
   output.PopulateR1<int64>({77});
   auto expected = Literal::CreateR1<int64>({77});
   EXPECT_EQ(output, *expected);
 }
 
 TEST_F(LiteralUtilTest, PopulateR1U64) {
-  Literal output;
+  Literal output(ShapeUtil::MakeShape(U64, {2}));
   output.PopulateR1<uint64>({{77, 88}});
   auto expected = Literal::CreateR1<uint64>({{77, 88}});
   EXPECT_EQ(output, *expected);
 }
 
 TEST_F(LiteralUtilTest, PopulateR1C64) {
-  Literal output;
+  Literal output(ShapeUtil::MakeShape(C64, {1}));
   output.PopulateR1<complex64>({{77, 88}});
   auto expected = Literal::CreateR1<complex64>({{77, 88}});
   EXPECT_EQ(output, *expected);
 }
 
 TEST_F(LiteralUtilTest, PopulateR2C64) {
-  Literal output;
+  Literal output(ShapeUtil::MakeShape(C64, {2, 2}));
   output.PopulateR2<complex64>({{{7, 8}, {9, 10}}, {{1, 2}, {3, 4}}});
   auto expected =
       Literal::CreateR2<complex64>({{{7, 8}, {9, 10}}, {{1, 2}, {3, 4}}});
@@ -716,78 +746,78 @@ TEST_F(LiteralUtilTest, PopulateR2C64) {
 }
 
 TEST_F(LiteralUtilTest, PopulateWithValueR0BF16) {
-  Literal output;
+  Literal output(ShapeUtil::MakeShape(BF16, {}));
   bfloat16 h(0.25f);
-  output.PopulateWithValue<bfloat16>(h, {});
+  output.PopulateWithValue<bfloat16>(h);
   auto expected = Literal::CreateR0<bfloat16>(h);
   EXPECT_EQ(output, *expected);
 }
 
 TEST_F(LiteralUtilTest, PopulateWithValueR1BF16) {
-  Literal output;
+  Literal output(ShapeUtil::MakeShape(BF16, {3}));
   bfloat16 h(0.5f);
-  output.PopulateWithValue<bfloat16>(h, {3});
+  output.PopulateWithValue<bfloat16>(h);
   auto expected = Literal::CreateR1<bfloat16>({h, h, h});
   EXPECT_EQ(output, *expected);
 }
 
 TEST_F(LiteralUtilTest, PopulateWithValueR2BF16) {
-  Literal output;
+  Literal output(ShapeUtil::MakeShape(BF16, {2, 2}));
   bfloat16 h(2.0f);
-  output.PopulateWithValue<bfloat16>(h, {2, 2});
+  output.PopulateWithValue<bfloat16>(h);
   auto expected = Literal::CreateR2<bfloat16>({{h, h}, {h, h}});
   EXPECT_EQ(output, *expected);
 }
 
 TEST_F(LiteralUtilTest, PopulateWithValueR0F32) {
-  Literal output;
-  output.PopulateWithValue<float>(2.5f, {});
+  Literal output(ShapeUtil::MakeShape(F32, {}));
+  output.PopulateWithValue<float>(2.5f);
   auto expected = Literal::CreateR0<float>(2.5f);
   EXPECT_EQ(output, *expected);
 }
 
 TEST_F(LiteralUtilTest, PopulateWithValueR1S64) {
-  Literal output;
-  output.PopulateWithValue<int64>(-7, {3});
+  Literal output(ShapeUtil::MakeShape(S64, {3}));
+  output.PopulateWithValue<int64>(-7);
   auto expected = Literal::CreateR1<int64>({-7, -7, -7});
   EXPECT_EQ(output, *expected);
 }
 
 TEST_F(LiteralUtilTest, PopulateWithValueR2U64) {
-  Literal output;
-  output.PopulateWithValue<uint64>(42, {2, 2});
+  Literal output(ShapeUtil::MakeShape(U64, {2, 2}));
+  output.PopulateWithValue<uint64>(42);
   auto expected = Literal::CreateR2<uint64>({{42, 42}, {42, 42}});
   EXPECT_EQ(output, *expected);
 }
 
 TEST_F(LiteralUtilTest, PopulateWithValueR2C64) {
-  Literal output;
-  output.PopulateWithValue<complex64>({4, 2}, {2, 2});
+  Literal output(ShapeUtil::MakeShape(C64, {2, 2}));
+  output.PopulateWithValue<complex64>({4, 2});
   auto expected =
       Literal::CreateR2<complex64>({{{4, 2}, {4, 2}}, {{4, 2}, {4, 2}}});
   EXPECT_EQ(output, *expected);
 }
 
 TEST_F(LiteralUtilTest, PopulateWithValueR0F16) {
-  Literal output;
+  Literal output(ShapeUtil::MakeShape(F16, {}));
   half h(0.25f);
-  output.PopulateWithValue<half>(h, {});
+  output.PopulateWithValue<half>(h);
   auto expected = Literal::CreateR0<half>(h);
   EXPECT_EQ(output, *expected);
 }
 
 TEST_F(LiteralUtilTest, PopulateWithValueR1F16) {
-  Literal output;
+  Literal output(ShapeUtil::MakeShape(F16, {3}));
   half h(0.5f);
-  output.PopulateWithValue<half>(h, {3});
+  output.PopulateWithValue<half>(h);
   auto expected = Literal::CreateR1<half>({h, h, h});
   EXPECT_EQ(output, *expected);
 }
 
 TEST_F(LiteralUtilTest, PopulateWithValueR2F16) {
-  Literal output;
+  Literal output(ShapeUtil::MakeShape(F16, {2, 2}));
   half h(2.0f);
-  output.PopulateWithValue<half>(h, {2, 2});
+  output.PopulateWithValue<half>(h);
   auto expected = Literal::CreateR2<half>({{h, h}, {h, h}});
   EXPECT_EQ(output, *expected);
 }
@@ -803,7 +833,7 @@ TEST_F(LiteralUtilTest, ReplicateR2U32) {
   EXPECT_EQ(*output, *expected);
 }
 
-TEST_F(LiteralUtilTest, Copy) {
+TEST_F(LiteralUtilTest, CopySliceFrom) {
   const int64 dimensions[] = {17, 15, 34, 21};
   const int64 layouts[][4] = {
       {3, 2, 1, 0}, {0, 2, 1, 3}, {0, 1, 2, 3}, {2, 0, 3, 1}, {1, 3, 0, 2}};
@@ -826,7 +856,7 @@ TEST_F(LiteralUtilTest, Copy) {
     const int64 src_base[] = {3, 1, 5, 7};
     const int64 dest_base[] = {6, 4, 12, 2};
     const int64 copy_size[] = {7, 8, 11, 9};
-    TF_EXPECT_OK(blank->Copy(*source, src_base, dest_base, copy_size));
+    TF_EXPECT_OK(blank->CopySliceFrom(*source, src_base, dest_base, copy_size));
 
     std::vector<int64> source_indexes(TF_ARRAYSIZE(dimensions), 0);
     std::vector<int64> blank_indexes(TF_ARRAYSIZE(dimensions), 0);
@@ -849,16 +879,16 @@ TEST_F(LiteralUtilTest, Copy) {
   }
 }
 
-TEST_F(LiteralUtilTest, CopyScalars) {
+TEST_F(LiteralUtilTest, CopyFromScalars) {
   auto zero = Literal::CreateR0<uint32>(0);
   auto nine = Literal::CreateR0<uint32>(9);
-  TF_EXPECT_OK(zero->Copy(*nine, {}, {}, {}));
+  TF_EXPECT_OK(zero->CopyFrom(*nine));
   EXPECT_EQ(*zero, *nine);
 
   auto vect = Literal::CreateR1<uint32>({3, 4, 9, 12, 5, 17, 21});
-  TF_EXPECT_OK(zero->Copy(*vect, {5}, {}, {}));
+  TF_EXPECT_OK(zero->CopySliceFrom(*vect, {5}, {}, {}));
   EXPECT_EQ(zero->Get<uint32>({}), 17);
-  TF_EXPECT_OK(vect->Copy(*zero, {}, {4}, {}));
+  TF_EXPECT_OK(vect->CopySliceFrom(*zero, {}, {4}, {}));
   EXPECT_EQ(vect->Get<uint32>({4}), 17);
 }
 
@@ -872,7 +902,7 @@ TEST_F(LiteralUtilTest, CopyFromAndToZeroElement) {
     const auto empty = Literal::CreateFromShape(empty_r1_shape);
     auto nine = Literal::CreateR1<float>({9});
 
-    TF_EXPECT_OK(nine->Copy(*empty, {0}, {0}, {0}));
+    TF_EXPECT_OK(nine->CopySliceFrom(*empty, {0}, {0}, {0}));
     EXPECT_EQ(*nine, *const_nine);
   }
 
@@ -881,18 +911,101 @@ TEST_F(LiteralUtilTest, CopyFromAndToZeroElement) {
     const auto empty = Literal::CreateFromShape(empty_r1_shape);
     auto nine = Literal::CreateR1<float>({9});
 
-    TF_EXPECT_OK(empty->Copy(*nine, {0}, {0}, {0}));
+    TF_EXPECT_OK(empty->CopySliceFrom(*nine, {0}, {0}, {0}));
     EXPECT_EQ(*empty, *const_empty);
   }
 }
 
+TEST_F(LiteralUtilTest, CopyFromNilShape) {
+  Literal nil_literal0(ShapeUtil::MakeNil());
+  Literal nil_literal1(ShapeUtil::MakeNil());
+  // This doesn't actually do any copying, but it should succeed.
+  TF_ASSERT_OK(nil_literal0.CopyFrom(nil_literal1));
+}
+
+TEST_F(LiteralUtilTest, CopyFromArrays) {
+  auto scalar_42 = Literal::CreateR0<float>(42.0);
+  auto scalar_123 = Literal::CreateR0<float>(123.0);
+  EXPECT_NE(*scalar_42, *scalar_123);
+  TF_ASSERT_OK(scalar_42->CopyFrom(*scalar_123, /*dest_shape_index=*/{},
+                                   /*src_shape_index=*/{}));
+  EXPECT_EQ(*scalar_42, *scalar_123);
+  EXPECT_EQ(scalar_42->Get<float>({}), 123.0f);
+
+  auto matrix_1234 = Literal::CreateR2<float>({{1.0, 2.0}, {3.0, 4.0}});
+  auto matrix_5678 = Literal::CreateR2<float>({{5.0, 6.0}, {7.0, 8.0}});
+  EXPECT_NE(*matrix_1234, *matrix_5678);
+  EXPECT_EQ(matrix_1234->Get<float>({0, 0}), 1.0f);
+  TF_ASSERT_OK(matrix_1234->CopyFrom(*matrix_5678, /*dest_shape_index=*/{},
+                                     /*src_shape_index=*/{}));
+  EXPECT_EQ(*matrix_1234, *matrix_5678);
+  EXPECT_EQ(matrix_1234->Get<float>({0, 0}), 5.0f);
+}
+
+TEST_F(LiteralUtilTest, CopyFromTuples) {
+  auto matrix = Literal::CreateR2<float>({{1.0, 2.0}, {3.0, 4.0}});
+  Literal nil_literal(ShapeUtil::MakeNil());
+  auto nested_tuple = Literal::MakeTuple(
+      {matrix.get(),
+       Literal::MakeTuple({Literal::CreateR0<int32>(42).get(),
+                           Literal::CreateR1<double>({23.0, 44.0}).get(),
+                           &nil_literal})
+           .get()});
+  // Create a tuple the same shape as the inner tuple of nested_tuple but with
+  // different values..
+  auto tuple = Literal::MakeTuple({Literal::CreateR0<int32>(-5).get(),
+                                   Literal::CreateR1<double>({2.0, 4.0}).get(),
+                                   &nil_literal});
+
+  EXPECT_EQ(*matrix, LiteralView::Create(*nested_tuple, {0}));
+  EXPECT_EQ(nested_tuple->Get<int32>({}, {1, 0}), 42);
+  EXPECT_EQ(nested_tuple->Get<double>({0}, {1, 1}), 23.0);
+  EXPECT_EQ(nested_tuple->Get<double>({1}, {1, 1}), 44.0);
+
+  // Overwrite the inner tuple element of nested_tuple with the contents of
+  // 'tuple'.
+  TF_ASSERT_OK(nested_tuple->CopyFrom(*tuple, /*dest_shape_index=*/{1},
+                                      /*src_shape_index=*/{}));
+
+  // The matrix element should be unchanged.
+  EXPECT_EQ(*matrix, LiteralView::Create(*nested_tuple, {0}));
+
+  // The tuple element should have been copied from 'tuple'.
+  EXPECT_EQ(nested_tuple->Get<int32>({}, {1, 0}), -5);
+  EXPECT_EQ(nested_tuple->Get<double>({0}, {1, 1}), 2.0);
+  EXPECT_EQ(nested_tuple->Get<double>({1}, {1, 1}), 4.0);
+}
+TEST_F(LiteralUtilTest, CopyBetweenSameTuple) {
+  auto tuple = Literal::MakeTuple(
+      {Literal::CreateR0<int32>(-2).get(), Literal::CreateR0<int32>(4).get()});
+
+  EXPECT_EQ(tuple->Get<int32>({}, {0}), -2);
+  EXPECT_EQ(tuple->Get<int32>({}, {1}), 4);
+
+  // Copy from one element to the other.
+  TF_ASSERT_OK(tuple->CopyFrom(*tuple, /*dest_shape_index=*/{1},
+                               /*src_shape_index=*/{0}));
+
+  EXPECT_EQ(tuple->Get<int32>({}, {0}), -2);
+  EXPECT_EQ(tuple->Get<int32>({}, {1}), -2);
+}
+
+TEST_F(LiteralUtilTest, CopyFromDifferentShapes) {
+  auto matrix = Literal::CreateR2<float>({{1.0, 2.0}, {3.0, 4.0}});
+  auto vector = Literal::CreateR1<float>({5.0, 7.0});
+  Status status = matrix->CopyFrom(*vector);
+  ASSERT_FALSE(status.ok());
+  ASSERT_THAT(status.error_message(),
+              HasSubstr("Destination subshape incompatible"));
+}
+
 TEST_F(LiteralUtilTest, F16) {
   // Verify that the internal data views are consistent and that they
   // are in little endian format
   // TODO - modify if we make the data format machine endianess dependent
   auto m1 = Literal::CreateFromShape(ShapeUtil::MakeShape(F16, {2, 2}));
   Literal* l1 = m1.get();
-  const char* d1 = static_cast<const char*>(l1->InternalData());
+  const char* d1 = reinterpret_cast<const char*>(l1->data<half>().data());
   EXPECT_EQ(d1[0], 0);
   EXPECT_EQ(d1[1], 0);
   EXPECT_EQ(d1[2], 0);
@@ -901,13 +1014,12 @@ TEST_F(LiteralUtilTest, F16) {
   EXPECT_EQ(d1[5], 0);
   EXPECT_EQ(d1[6], 0);
   EXPECT_EQ(d1[7], 0);
-  EXPECT_EQ(l1->InternalData(), l1->MutableInternalData());
 
   half h1(1.0f);
   half h2(2.0f);
   auto m2 = Literal::CreateR2<half>({{h1, h2}, {h2, h1}});
   Literal* l2 = m2.get();
-  const char* d2 = static_cast<const char*>(l2->InternalData());
+  const char* d2 = reinterpret_cast<const char*>(l2->data<half>().data());
   EXPECT_EQ(d2[0], 0);
   EXPECT_EQ(d2[1], 0x3C);
   EXPECT_EQ(d2[2], 0);
@@ -916,7 +1028,6 @@ TEST_F(LiteralUtilTest, F16) {
   EXPECT_EQ(d2[5], 0x40);
   EXPECT_EQ(d2[6], 0);
   EXPECT_EQ(d2[7], 0x3C);
-  EXPECT_EQ(l2->InternalData(), l2->MutableInternalData());
 }
 
 TEST_F(LiteralUtilTest, Populate) {
@@ -941,7 +1052,9 @@ TEST_F(LiteralUtilTest, Populate) {
     auto generator = [&](tensorflow::gtl::ArraySlice<int64> indexes) -> uint32 {
       // Offsets from linear index just to avoid R0 literals to be initialized
       // with zero.
-      return literal->LinearIndex(indexes) + 17;
+      return IndexUtil::MultidimensionalIndexToLinearIndex(literal->shape(),
+                                                           indexes) +
+             17;
     };
     TF_EXPECT_OK(literal->Populate<uint32>(generator));
 
@@ -1118,16 +1231,18 @@ TEST_F(LiteralUtilTest, CopyFromProto_Bool) {
   for (int len = 0; len < 25; ++len) {
     p.mutable_shape()->clear_dimensions();
     p.mutable_shape()->add_dimensions(len);
+    LayoutUtil::SetToDefaultLayout(p.mutable_shape());
     p.clear_preds();
     for (int i = 0; i < len; ++i) {
       p.add_preds((i % 2) == (len % 2));
     }
 
-    Literal literal(p);
-    ASSERT_EQ(len, literal.preds_size());
+    TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Literal> literal,
+                            Literal::CreateFromProto(p));
+    ASSERT_EQ(len, literal->data<bool>().size());
     int i = 0;
-    for (auto it = literal.preds().begin(); it < literal.preds().end(); ++it) {
-      EXPECT_EQ((i % 2) == (len % 2), *it);
+    for (bool value : literal->data<bool>()) {
+      EXPECT_EQ((i % 2) == (len % 2), value);
       ++i;
     }
   }
@@ -1141,8 +1256,7 @@ TEST_F(LiteralUtilTest, ToProto_f16) {
   auto m = Literal::CreateR2<half>({{h1, h2}, {h2, h1}});
   Literal* l = m.get();
   EXPECT_EQ(4, ShapeUtil::ElementsIn(l->shape()));
-  EXPECT_EQ(4, l->f16s().size());
-  EXPECT_EQ(4, l->f16s_size());
+  EXPECT_EQ(4, l->data<half>().size());
 
   LiteralProto p = l->ToProto();
   EXPECT_EQ(4, ShapeUtil::ElementsIn(p.shape()));
@@ -1168,17 +1282,12 @@ TEST_F(LiteralUtilTest, CopyFromProto_f16) {
   p.mutable_shape()->set_element_type(F16);
   p.mutable_shape()->clear_dimensions();
   p.mutable_shape()->add_dimensions(4);
+  LayoutUtil::SetToDefaultLayout(p.mutable_shape());
   p.clear_f16s();
   p.set_f16s(half_vals, 8);
-
-  Literal literal(p);
-  ASSERT_EQ(4, literal.f16s_size());
-  ASSERT_EQ(h1, literal.f16s(0));
-  ASSERT_EQ(h2, literal.f16s(1));
-  ASSERT_EQ(h2, literal.f16s(2));
-  ASSERT_EQ(h1, literal.f16s(3));
-
-  const std::vector<half>& r = literal.f16s();
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Literal> literal,
+                          Literal::CreateFromProto(p));
+  auto r = literal->data<half>();
   ASSERT_EQ(4, r.size());
   ASSERT_EQ(h1, r[0]);
   ASSERT_EQ(h2, r[1]);
@@ -1186,24 +1295,402 @@ TEST_F(LiteralUtilTest, CopyFromProto_f16) {
   ASSERT_EQ(h1, r[3]);
 }
 
-TEST_F(LiteralUtilTest, Subliterals) {
+TEST_F(LiteralUtilTest, LiteralViewTest) {
+  auto scalar = Literal::CreateR0<float>(1.0);
+  auto matrix = Literal::CreateR2<float>({{1.0, 2.0}, {3.0, 4.0}});
+  auto tuple = Literal::MakeTuple({scalar.get(), matrix.get()});
+  auto nested_tuple = Literal::MakeTuple({tuple.get(), scalar.get()});
+  Literal nil(ShapeUtil::MakeNil());
+
+  EXPECT_EQ(LiteralView::Create(*scalar, {}), *scalar);
+  EXPECT_EQ(LiteralView::Create(*matrix, {}), *matrix);
+  EXPECT_EQ(LiteralView::Create(*tuple, {}), *tuple);
+  EXPECT_EQ(LiteralView::Create(*nested_tuple, {}), *nested_tuple);
+  EXPECT_EQ(LiteralView::Create(nil, {}), nil);
+
+  EXPECT_EQ(LiteralView::Create(*tuple, {0}), *scalar);
+  EXPECT_EQ(LiteralView::Create(*tuple, {1}), *matrix);
+
+  EXPECT_EQ(LiteralView::Create(*nested_tuple, {0}), *tuple);
+  EXPECT_EQ(LiteralView::Create(*nested_tuple, {0, 0}), *scalar);
+  EXPECT_EQ(LiteralView::Create(*nested_tuple, {0, 1}), *matrix);
+  EXPECT_EQ(LiteralView::Create(*nested_tuple, {1}), *scalar);
+}
+
+TEST_F(LiteralUtilTest, MutatingLiteralView) {
+  auto scalar = Literal::CreateR0<float>(1.0);
+  auto matrix = Literal::CreateR2<float>({{1.0, 2.0}, {3.0, 4.0}});
+  auto tuple = Literal::MakeTuple({scalar.get(), matrix.get()});
+  auto nested_tuple = Literal::MakeTuple({tuple.get(), scalar.get()});
+  // Verify that changing the underlying data beneath the view changes the
+  // data of the view itself.
+  const auto nested_tuple_view = LiteralView::Create(*nested_tuple);
+  EXPECT_EQ(
+      nested_tuple->Get<float>(/*multi_index=*/{}, /*shape_index=*/{0, 0}),
+      1.0f);
+  EXPECT_EQ(nested_tuple_view.Get<float>(/*multi_index=*/{},
+                                         /*shape_index=*/{0, 0}),
+            1.0f);
+  nested_tuple->Set<float>(/*multi_index=*/{}, /*shape_index=*/{0, 0}, 555.0f);
+  EXPECT_EQ(
+      nested_tuple->Get<float>(/*multi_index=*/{}, /*shape_index=*/{0, 0}),
+      555.0f);
+  EXPECT_EQ(nested_tuple_view.Get<float>(/*multi_index=*/{},
+                                         /*shape_index=*/{0, 0}),
+            555.0f);
+}
+
+TEST_F(LiteralUtilTest, LiteralViewOfALiteralView) {
   auto scalar = Literal::CreateR0<float>(1.0);
   auto matrix = Literal::CreateR2<float>({{1.0, 2.0}, {3.0, 4.0}});
   auto tuple = Literal::MakeTuple({scalar.get(), matrix.get()});
   auto nested_tuple = Literal::MakeTuple({tuple.get(), scalar.get()});
 
-  EXPECT_EQ(&scalar->GetSubliteral(/*index=*/{}), scalar.get());
-  EXPECT_EQ(&matrix->GetSubliteral(/*index=*/{}), matrix.get());
-  EXPECT_EQ(&tuple->GetSubliteral(/*index=*/{}), tuple.get());
-  EXPECT_EQ(&nested_tuple->GetSubliteral(/*index=*/{}), nested_tuple.get());
+  const auto nested_tuple_view = LiteralView::Create(*nested_tuple);
+  const auto tuple_view =
+      LiteralView::Create(nested_tuple_view, /*view_root=*/{0});
+  const auto matrix_view = LiteralView::Create(tuple_view, /*view_root=*/{1});
+  EXPECT_EQ(matrix_view, *Literal::CreateR2<float>({{1.0, 2.0}, {3.0, 4.0}}));
+}
+
+TEST_F(LiteralUtilTest, LiteralMove) {
+  std::unique_ptr<Literal> matrix =
+      Literal::CreateR2<float>({{1.0, 2.0}, {3.0, 4.0}});
+  Literal literal(std::move(*matrix));
+
+  EXPECT_TRUE(
+      ShapeUtil::Equal(ShapeUtil::MakeShape(F32, {2, 2}), literal.shape()));
+  EXPECT_EQ(literal.Get<float>({0, 0}), 1.0);
+  EXPECT_EQ(literal.Get<float>({0, 1}), 2.0);
+  EXPECT_EQ(literal.Get<float>({1, 0}), 3.0);
+  EXPECT_EQ(literal.Get<float>({1, 1}), 4.0);
+}
 
-  EXPECT_EQ(tuple->GetSubliteral(/*index=*/{0}), *scalar);
-  EXPECT_EQ(tuple->GetSubliteral(/*index=*/{1}), *matrix);
+TEST_F(LiteralUtilTest, DecomposeTuple) {
+  Literal nil_literal(ShapeUtil::MakeNil());
+  auto nested_tuple = Literal::MakeTuple(
+      {Literal::CreateR2<int32>({{1, 2}, {3, 4}}).get(),
+       Literal::MakeTuple({Literal::CreateR0<int32>(42).get(),
+                           Literal::CreateR1<double>({23.0, 44.0}).get(),
+                           &nil_literal})
+           .get(),
+       &nil_literal});
+
+  EXPECT_FALSE(ShapeUtil::IsNil(nested_tuple->shape()));
+  std::vector<Literal> elements = nested_tuple->DecomposeTuple();
+  EXPECT_TRUE(ShapeUtil::IsNil(nested_tuple->shape()));
+
+  ASSERT_EQ(elements.size(), 3);
+
+  EXPECT_TRUE(ShapeUtil::Compatible(elements[0].shape(),
+                                    ShapeUtil::MakeShape(S32, {2, 2})));
+  EXPECT_EQ(elements[0].Get<int32>({0, 0}), 1);
+  EXPECT_EQ(elements[0].Get<int32>({0, 1}), 2);
+  EXPECT_EQ(elements[0].Get<int32>({1, 0}), 3);
+  EXPECT_EQ(elements[0].Get<int32>({1, 1}), 4);
+
+  EXPECT_TRUE(ShapeUtil::Compatible(
+      elements[1].shape(),
+      ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape(S32, {}),
+                                 ShapeUtil::MakeShape(F64, {2}),
+                                 ShapeUtil::MakeNil()})));
+  EXPECT_EQ(elements[1].Get<int32>({}, /*shape_index=*/{0}), 42);
+  EXPECT_EQ(elements[1].Get<double>({0}, /*shape_index=*/{1}), 23.0);
+  EXPECT_EQ(elements[1].Get<double>({1}, /*shape_index=*/{1}), 44.0);
+
+  EXPECT_TRUE(ShapeUtil::Compatible(elements[2].shape(), ShapeUtil::MakeNil()));
+}
+
+TEST_F(LiteralUtilTest, DecomposeEmptyTuple) {
+  Literal nil_literal(ShapeUtil::MakeNil());
+  std::vector<Literal> elements = nil_literal.DecomposeTuple();
+  EXPECT_EQ(elements.size(), 0);
+}
+
+TEST_F(LiteralUtilTest, MoveIntoTuple) {
+  std::vector<Literal> elements;
+  elements.push_back(std::move(*Literal::CreateR0<float>(1.0)));
+  elements.push_back(std::move(*Literal::CreateR1<int32>({4, 8})));
+  elements.push_back(std::move(
+      *Literal::MakeTuple({Literal::CreateR0<int32>(42).get(),
+                           Literal::CreateR1<double>({23.0, 44.0}).get()})
+
+          ));
+
+  Literal literal = Literal::MoveIntoTuple(&elements);
+  ASSERT_TRUE(ShapeUtil::IsTuple(literal.shape()));
+  ASSERT_EQ(ShapeUtil::TupleElementCount(literal.shape()), 3);
+
+  EXPECT_EQ(literal.Get<float>({}, /*shape_index=*/{0}), 1.0);
+  EXPECT_EQ(literal.Get<int32>({0}, /*shape_index=*/{1}), 4);
+  EXPECT_EQ(literal.Get<int32>({1}, /*shape_index=*/{1}), 8);
+  EXPECT_EQ(literal.Get<int32>({}, /*shape_index=*/{2, 0}), 42);
+  EXPECT_EQ(literal.Get<double>({0}, /*shape_index=*/{2, 1}), 23.0);
+  EXPECT_EQ(literal.Get<double>({1}, /*shape_index=*/{2, 1}), 44.0);
+
+  for (const Literal& element : elements) {
+    EXPECT_TRUE(ShapeUtil::IsNil(element.shape()));
+  }
+}
+
+TEST_F(LiteralUtilTest, MoveIntoEmptyTuple) {
+  Literal literal = Literal::MoveIntoTuple({});
+  ASSERT_TRUE(ShapeUtil::IsTuple(literal.shape()));
+  ASSERT_EQ(ShapeUtil::TupleElementCount(literal.shape()), 0);
+}
+
+TEST_F(LiteralUtilTest, LiteralMoveAssignment) {
+  Literal literal;
+  EXPECT_TRUE(ShapeUtil::Equal(ShapeUtil::MakeNil(), literal.shape()));
+
+  std::unique_ptr<Literal> matrix =
+      Literal::CreateR2<float>({{1.0, 2.0}, {3.0, 4.0}});
+  literal = std::move(*matrix);
+
+  EXPECT_TRUE(
+      ShapeUtil::Equal(ShapeUtil::MakeShape(F32, {2, 2}), literal.shape()));
+  EXPECT_EQ(literal.Get<float>({0, 0}), 1.0);
+  EXPECT_EQ(literal.Get<float>({0, 1}), 2.0);
+  EXPECT_EQ(literal.Get<float>({1, 0}), 3.0);
+  EXPECT_EQ(literal.Get<float>({1, 1}), 4.0);
+}
+
+TEST_F(LiteralUtilTest, LiteralViewCopy) {
+  std::unique_ptr<Literal> matrix =
+      Literal::CreateR2<float>({{1.0, 2.0}, {3.0, 4.0}});
+  const auto matrix_view = LiteralView::Create(*matrix);
+  LiteralView matrix_view_copy(matrix_view);
+
+  EXPECT_EQ(matrix_view_copy.Get<float>({0, 0}), 1.0);
+  EXPECT_EQ(matrix_view_copy.Get<float>({0, 1}), 2.0);
+  EXPECT_EQ(matrix_view_copy.Get<float>({1, 0}), 3.0);
+  EXPECT_EQ(matrix_view_copy.Get<float>({1, 1}), 4.0);
+}
+
+TEST_F(LiteralUtilTest, GetSetTuple) {
+  auto tuple = Literal::MakeTuple(
+      {Literal::CreateR0<float>(42.0).get(),
+       Literal::CreateR2<float>({{1.0, 2.0}, {3.0, 4.0}}).get()});
+  EXPECT_EQ(tuple->Get<float>(/*multi_index=*/{}, /*shape_index=*/{0}), 42.0);
+  tuple->Set<float>(/*multi_index=*/{}, /*shape_index=*/{0}, -5.0);
+  EXPECT_EQ(tuple->Get<float>(/*multi_index=*/{}, /*shape_index=*/{0}), -5.0);
+
+  EXPECT_EQ(tuple->Get<float>(/*multi_index=*/{1, 0}, /*shape_index=*/{1}),
+            3.0);
+  tuple->Set<float>(/*multi_index=*/{1, 0}, /*shape_index=*/{1}, -4.0);
+  EXPECT_EQ(tuple->Get<float>(/*multi_index=*/{1, 0}, /*shape_index=*/{1}),
+            -4.0);
+}
+
+TEST_F(LiteralUtilTest, CreateFromShapeZeroInitialized) {
+  // Literals constructed using CreateFromShape should be zero initialized.
+  std::unique_ptr<Literal> scalar_f32 =
+      Literal::CreateFromShape(ShapeUtil::MakeShape(F32, {}));
+  EXPECT_EQ(scalar_f32->Get<float>({}), 0.0);
+  EXPECT_TRUE(scalar_f32->IsAll(0));
+
+  std::unique_ptr<Literal> vector_s32 =
+      Literal::CreateFromShape(ShapeUtil::MakeShape(S32, {3}));
+  EXPECT_EQ(vector_s32->Get<int32>({0}), 0);
+  EXPECT_EQ(vector_s32->Get<int32>({1}), 0);
+  EXPECT_EQ(vector_s32->Get<int32>({2}), 0);
+  EXPECT_TRUE(vector_s32->IsAll(0));
+
+  std::unique_ptr<Literal> tuple =
+      Literal::CreateFromShape(ShapeUtil::MakeTupleShape(
+          {ShapeUtil::MakeShape(F64, {}), ShapeUtil::MakeShape(PRED, {2}),
+           ShapeUtil::MakeShape(U64, {2, 1}), ShapeUtil::MakeShape(C64, {})}));
+
+  EXPECT_EQ(tuple->Get<double>({}, {0}), 0.0);
+  EXPECT_EQ(tuple->Get<bool>({0}, {1}), false);
+  EXPECT_EQ(tuple->Get<bool>({1}, {1}), false);
+  EXPECT_EQ(tuple->Get<uint64>({0, 0}, {2}), 0);
+  EXPECT_EQ(tuple->Get<uint64>({1, 0}, {2}), 0);
+  EXPECT_EQ(tuple->Get<complex64>({}, {3}), complex64(0.0f, 0.0f));
+}
+
+TEST_F(LiteralUtilTest, ProtoRoundTrip) {
+  // Test serializing then deserializing a Literal through a proto.
+  auto one_f32 = Literal::CreateR0<float>(1.0);
+  auto two_f32 = Literal::CreateR0<float>(2.0);
+  auto vector_int8 = Literal::CreateR1<int8>({-128, 0, 2, 4, 7, 56, 127});
+  auto vector_c64 = Literal::CreateR1<complex64>({{1.0, 2.0}, {3.0, 4.0}});
+  auto vector_bfloat16 = Literal::CreateR1<bfloat16>(
+      {bfloat16{-1.0}, bfloat16{2.0}, bfloat16{-3.0}});
+  auto vector_half =
+      Literal::CreateR1<half>({half{10.0}, half{20.0}, half{-30.0}});
+  auto matrix_pred =
+      Literal::CreateR2<bool>({{true, false, true}, {false, false, true}});
+  auto tuple = Literal::MakeTuple(
+      {one_f32.get(), vector_half.get(), matrix_pred.get(), matrix_pred.get()});
+  Literal nil_literal(ShapeUtil::MakeNil());
+  auto nested_tuple = Literal::MakeTuple(
+      {tuple.get(), vector_bfloat16.get(), tuple.get(), &nil_literal});
+
+  auto to_from_proto = [](const Literal& literal) -> Literal {
+    return std::move(*Literal::CreateFromProto(literal.ToProto()).ValueOrDie());
+  };
+
+  EXPECT_EQ(*one_f32, to_from_proto(*one_f32));
+  EXPECT_EQ(*vector_c64, to_from_proto(*vector_c64));
+  EXPECT_EQ(*vector_bfloat16, to_from_proto(*vector_bfloat16));
+  EXPECT_EQ(*matrix_pred, to_from_proto(*matrix_pred));
+  EXPECT_EQ(*tuple, to_from_proto(*tuple));
+  EXPECT_EQ(*nested_tuple, to_from_proto(*nested_tuple));
+  EXPECT_EQ(nil_literal, to_from_proto(nil_literal));
+
+  EXPECT_NE(*one_f32, *two_f32);
+  EXPECT_NE(*one_f32, to_from_proto(*two_f32));
+}
+
+TEST_F(LiteralUtilTest, InvalidProtoNoValues) {
+  // Proto contains a shape, but no values.
+  LiteralProto proto;
+  *proto.mutable_shape() = ShapeUtil::MakeShape(F32, {3});
+  Status status = Literal::CreateFromProto(proto).status();
+  ASSERT_FALSE(status.ok());
+  ASSERT_THAT(status.error_message(),
+              HasSubstr("Expected 3 elements in LiteralProto"));
+}
+
+TEST_F(LiteralUtilTest, InvalidProtoNoShape) {
+  // Proto contains values, but no shape.
+  LiteralProto proto;
+  proto.add_preds(false);
+  proto.add_preds(true);
+  proto.add_preds(false);
+  Status status = Literal::CreateFromProto(proto).status();
+  ASSERT_FALSE(status.ok());
+  ASSERT_THAT(status.error_message(), HasSubstr("LiteralProto has no shape"));
+}
+
+TEST_F(LiteralUtilTest, InvalidProtoWrongContainer) {
+  // Proto contains values in wrong container.
+  LiteralProto proto;
+  *proto.mutable_shape() = ShapeUtil::MakeShape(F32, {3});
+  proto.add_preds(false);
+  proto.add_preds(true);
+  proto.add_preds(false);
+  Status status = Literal::CreateFromProto(proto).status();
+  ASSERT_FALSE(status.ok());
+  ASSERT_THAT(status.error_message(),
+              HasSubstr("Expected 3 elements in LiteralProto"));
+}
+
+TEST_F(LiteralUtilTest, InvalidProtoTooFewValues) {
+  // Proto contains too few values.
+  LiteralProto proto;
+  *proto.mutable_shape() = ShapeUtil::MakeShape(F32, {42, 2});
+  proto.add_f32s(1.0);
+  proto.add_f32s(2.0);
+  proto.add_f32s(3.0);
+  Status status = Literal::CreateFromProto(proto).status();
+  ASSERT_FALSE(status.ok());
+  ASSERT_THAT(status.error_message(),
+              HasSubstr("Expected 84 elements in LiteralProto"));
+}
+
+TEST_F(LiteralUtilTest, InvalidProtoTooManyValues) {
+  // Proto contains too many values.
+  LiteralProto proto;
+  *proto.mutable_shape() = ShapeUtil::MakeShape(S32, {2});
+  proto.add_s32s(42);
+  proto.add_s32s(-10);
+  proto.add_s32s(100);
+  Status status = Literal::CreateFromProto(proto).status();
+  ASSERT_FALSE(status.ok());
+  ASSERT_THAT(status.error_message(),
+              HasSubstr("Expected 2 elements in LiteralProto"));
+}
+
+TEST_F(LiteralUtilTest, InvalidProtoMissingLayout) {
+  // Proto shape missing layout.
+  LiteralProto proto;
+  *proto.mutable_shape() = ShapeUtil::MakeShape(PRED, {2, 2});
+  LayoutUtil::ClearLayout(proto.mutable_shape());
+  proto.add_preds(true);
+  proto.add_preds(false);
+  proto.add_preds(true);
+  proto.add_preds(false);
+  Status status = Literal::CreateFromProto(proto).status();
+  ASSERT_FALSE(status.ok());
+  ASSERT_THAT(status.error_message(), HasSubstr("LiteralProto has no layout"));
+}
+
+TEST_F(LiteralUtilTest, InvalidProtoTooFewTupleElements) {
+  // Proto has the too few tuple elements.
+  LiteralProto proto;
+  *proto.mutable_shape() = ShapeUtil::MakeTupleShape(
+      {ShapeUtil::MakeShape(PRED, {2}), ShapeUtil::MakeShape(F32, {})});
+  LiteralProto* element0 = proto.add_tuple_literals();
+  *element0->mutable_shape() =
+      ShapeUtil::GetTupleElementShape(proto.shape(), 0);
+  element0->add_preds(false);
+  element0->add_preds(true);
+
+  Status status = Literal::CreateFromProto(proto).status();
+  ASSERT_FALSE(status.ok());
+  ASSERT_THAT(status.error_message(), HasSubstr("Expected 2 tuple elements"));
+}
+
+TEST_F(LiteralUtilTest, InvalidProtoTooManyTupleElements) {
+  // Proto has the too many tuple elements.
+  LiteralProto proto;
+  *proto.mutable_shape() = ShapeUtil::MakeTupleShape(
+      {ShapeUtil::MakeShape(PRED, {2}), ShapeUtil::MakeShape(F32, {})});
+  LiteralProto* element0 = proto.add_tuple_literals();
+  *element0->mutable_shape() =
+      ShapeUtil::GetTupleElementShape(proto.shape(), 0);
+  element0->add_preds(false);
+  element0->add_preds(true);
+  LiteralProto* element1 = proto.add_tuple_literals();
+  *element1->mutable_shape() =
+      ShapeUtil::GetTupleElementShape(proto.shape(), 1);
+  element1->add_f32s(42.0);
+  LiteralProto* element2 = proto.add_tuple_literals();
+  *element2->mutable_shape() = ShapeUtil::MakeShape(F32, {});
+  element2->add_f32s(123.0);
+
+  Status status = Literal::CreateFromProto(proto).status();
+  ASSERT_FALSE(status.ok());
+  ASSERT_THAT(status.error_message(), HasSubstr("Expected 2 tuple elements"));
+}
+
+TEST_F(LiteralUtilTest, SortSparseElements) {
+  auto literal =
+      Literal::CreateSparse<float>({10, 10, 10}, SparseIndexArray(10, 3), {});
+  literal->AppendSparseElement<float>({2, 3, 4}, 2.0);
+  literal->AppendSparseElement<float>({3, 4, 5}, 3.0);
+  literal->AppendSparseElement<float>({1, 2, 3}, 1.0);
+  literal->SortSparseElements();
+  ASSERT_EQ(literal->ToString(false),
+            "f32[10,10,10]{[1, 2, 3]: 1, [2, 3, 4]: 2, [3, 4, 5]: 3}");
+}
 
-  EXPECT_EQ(nested_tuple->GetSubliteral(/*index=*/{0}), *tuple);
-  EXPECT_EQ(nested_tuple->GetSubliteral(/*index=*/{0, 0}), *scalar);
-  EXPECT_EQ(nested_tuple->GetSubliteral(/*index=*/{0, 1}), *matrix);
-  EXPECT_EQ(nested_tuple->GetSubliteral(/*index=*/{1}), *scalar);
+TEST_F(LiteralUtilTest, GetSparseElementAsString) {
+  std::vector<int64> dimensions = {10, 10, 10};
+  SparseIndexArray indices(10, {{1, 2, 3}, {2, 3, 4}, {3, 4, 5}});
+
+  ASSERT_EQ(
+      Literal::CreateSparse<bool>(dimensions, indices, {true, false, true})
+          ->GetSparseElementAsString(1),
+      "false");
+  ASSERT_EQ(Literal::CreateSparse<int64>(dimensions, indices, {1, 2, 3})
+                ->GetSparseElementAsString(1),
+            tensorflow::strings::StrCat(int64{2}));
+  ASSERT_EQ(Literal::CreateSparse<double>(dimensions, indices, {1.0, 2.0, 3.0})
+                ->GetSparseElementAsString(1),
+            tensorflow::strings::StrCat(double{2.0}));
+  ASSERT_EQ(Literal::CreateSparse<half>(dimensions, indices,
+                                        {half{1.0}, half{2.0}, half{3.0}})
+                ->GetSparseElementAsString(1),
+            tensorflow::strings::StrCat(half{2.0}));
+  ASSERT_EQ(
+      Literal::CreateSparse<complex64>(
+          dimensions, indices,
+          std::vector<complex64>{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}})
+          ->GetSparseElementAsString(1),
+      tensorflow::strings::StrCat("(", float{3.0}, ", ", float{4.0}, ")"));
 }
 
 }  // namespace
diff --git a/tensorflow/compiler/xla/map_util.h b/tensorflow/compiler/xla/map_util.h
index 51d0d5f86f00c539951e8e2baa6296337a5a21e9..50659c12405f2a29c69b03b3c7de5bd6cb6af9c2 100644
--- a/tensorflow/compiler/xla/map_util.h
+++ b/tensorflow/compiler/xla/map_util.h
@@ -60,6 +60,12 @@ bool ContainsKey(const Collection& collection, const Key& key) {
   return collection.find(key) != collection.end();
 }
 
+// Inserts `value` into `set`. Dies if it was already present.
+template <class Set>
+void InsertOrDie(Set* const set, const typename Set::value_type& value) {
+  CHECK(set->insert(value).second) << "duplicate value: " << value;
+}
+
 }  // namespace xla
 
 #endif  // TENSORFLOW_COMPILER_XLA_MAP_UTIL_H_
diff --git a/tensorflow/compiler/xla/packed_literal_reader.cc b/tensorflow/compiler/xla/packed_literal_reader.cc
index 70e0f5a74711c8ceef1b6d4225141aa1cc9c6219..857aae0a7982a57bb3057a6f267f5f033a0fdde4 100644
--- a/tensorflow/compiler/xla/packed_literal_reader.cc
+++ b/tensorflow/compiler/xla/packed_literal_reader.cc
@@ -44,11 +44,11 @@ StatusOr<std::unique_ptr<Literal>> PackedLiteralReader::Read(
   VLOG(3) << "reading shape from file: " << ShapeUtil::HumanString(shape)
           << " layout: "
           << (layout == nullptr ? "<none>" : layout->ShortDebugString());
-  auto result = MakeUnique<Literal>();
-  *result->mutable_shape() = shape;
+  Shape literal_shape = shape;
   if (layout != nullptr) {
-    TF_RETURN_IF_ERROR(LayoutUtil::ValidateLayoutForShape(*layout, shape));
-    *result->mutable_shape()->mutable_layout() = *layout;
+    TF_RETURN_IF_ERROR(
+        LayoutUtil::ValidateLayoutForShape(*layout, literal_shape));
+    *literal_shape.mutable_layout() = *layout;
   }
 
   if (shape.element_type() != F32) {
@@ -57,10 +57,12 @@ StatusOr<std::unique_ptr<Literal>> PackedLiteralReader::Read(
         PrimitiveType_Name(shape.element_type()).c_str());
   }
 
+  auto result = MakeUnique<Literal>(literal_shape);
+  result->PopulateWithValue(std::numeric_limits<float>::quiet_NaN());
+
   int64 elements = ShapeUtil::ElementsIn(shape);
-  result->Resize(elements, std::numeric_limits<float>::quiet_NaN());
-  std::vector<float>* field = result->mutable_f32s();
-  char* data = tensorflow::bit_cast<char*>(field->data());
+  tensorflow::gtl::ArraySlice<float> field = result->data<float>();
+  char* data = tensorflow::bit_cast<char*>(field.data());
   uint64 bytes = elements * sizeof(float);
   tensorflow::StringPiece sp;
   auto s = file_->Read(offset_, bytes, &sp, data);
diff --git a/tensorflow/compiler/xla/primitive_util.h b/tensorflow/compiler/xla/primitive_util.h
index 19c6a138885c61f1304bfae3d8bb5d958a1bb5bc..cb4583d198b454be1432134a9f6a77dbbbe5bdd8 100644
--- a/tensorflow/compiler/xla/primitive_util.h
+++ b/tensorflow/compiler/xla/primitive_util.h
@@ -26,6 +26,13 @@ limitations under the License.
 namespace xla {
 namespace primitive_util {
 
+// The number of exponent bits in a BF16 value.
+const int kBFloat16ExponentBits = 8;
+
+// The number of mantissa bits in a BF16 value. There is an implicit leading
+// 1, so there is an implicit additional bit of precision.
+const int kBFloat16MantissaBits = 7;
+
 // Returns the XLA primitive type (eg, F32) corresponding to the given
 // template parameter native type (eg, float).
 template <typename NativeT>
diff --git a/tensorflow/compiler/xla/python/BUILD b/tensorflow/compiler/xla/python/BUILD
new file mode 100644
index 0000000000000000000000000000000000000000..a8ca0e3ea0115d412e96ebacb320cc0dde061dff
--- /dev/null
+++ b/tensorflow/compiler/xla/python/BUILD
@@ -0,0 +1,85 @@
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//tensorflow:internal"])
+
+load("//tensorflow:tensorflow.bzl", "tf_py_wrap_cc")
+
+py_library(
+    name = "xla_client",
+    srcs = ["xla_client.py"],
+    srcs_version = "PY2AND3",
+    visibility = ["//visibility:public"],
+    deps = [
+        ":pywrap_xla",
+        "//tensorflow/compiler/xla:xla_data_proto_py",
+    ],
+)
+
+py_test(
+    name = "xla_client_test",
+    srcs = ["xla_client_test.py"],
+    main = "xla_client_test.py",
+    srcs_version = "PY2AND3",
+    deps = [
+        ":xla_client",
+        "//tensorflow/python:platform_test",
+    ],
+)
+
+cc_library(
+    name = "numpy_bridge",
+    srcs = ["numpy_bridge.cc"],
+    hdrs = ["numpy_bridge.h"],
+    deps = [
+        "//tensorflow/compiler/xla:literal_util",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/core:lib",
+        "//tensorflow/python:numpy_lib",
+    ],
+)
+
+cc_library(
+    name = "local_computation_builder",
+    srcs = ["local_computation_builder.cc"],
+    hdrs = ["local_computation_builder.h"],
+    deps = [
+        "//tensorflow/compiler/xla:executable_run_options",
+        "//tensorflow/compiler/xla:util",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/client:client_library",
+        "//tensorflow/compiler/xla/client:computation_builder",
+        "//tensorflow/compiler/xla/client:local_client",
+        "//tensorflow/compiler/xla/service:shaped_buffer",
+        "//tensorflow/core:framework_lite",
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_py_wrap_cc(
+    name = "pywrap_xla",
+    srcs = ["xla.i"],
+    swig_includes = [
+        "local_computation_builder.i",
+    ],
+    deps = [
+        ":local_computation_builder",
+        ":numpy_bridge",
+        "//tensorflow/compiler/xla:literal_util",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/service:cpu_plugin",
+    ],
+)
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
diff --git a/tensorflow/compiler/xla/python/__init__.py b/tensorflow/compiler/xla/python/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/tensorflow/compiler/xla/python/local_computation_builder.cc b/tensorflow/compiler/xla/python/local_computation_builder.cc
new file mode 100644
index 0000000000000000000000000000000000000000..37f1eada2bc9f5ef72d99a835a17b4e78a354ae6
--- /dev/null
+++ b/tensorflow/compiler/xla/python/local_computation_builder.cc
@@ -0,0 +1,537 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/python/local_computation_builder.h"
+#include "tensorflow/compiler/xla/executable_run_options.h"
+#include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/core/platform/default/thread_annotations.h"
+
+namespace xla {
+
+namespace swig {
+
+// TODO(b/34473877) Ideally XLA would support AllReduce among arbitrary sets of
+// device handles instead of needing to set the number of replicas at XLA
+// service initialization time.
+tensorflow::mutex g_local_client_mutex(tensorflow::LINKER_INITIALIZED);
+int g_replica_count GUARDED_BY(g_local_client_mutex) = 1;
+LocalClient* g_local_client GUARDED_BY(g_local_client_mutex) = nullptr;
+
+Status InitializeReplicaCount(int replica_count) {
+  if (replica_count < 1) {
+    return InvalidArgument("Replica count must be >= 1; got %d.",
+                           replica_count);
+  }
+  tensorflow::mutex_lock lock(g_local_client_mutex);
+  if (g_local_client != nullptr) {
+    return FailedPrecondition(
+        "Attempted to set the replica count to %d, but a local XLA service was "
+        "previously created with a replica count of %d.",
+        replica_count, g_replica_count);
+  }
+  g_replica_count = replica_count;
+  return Status::OK();
+}
+
+int GetReplicaCount() {
+  tensorflow::mutex_lock lock(g_local_client_mutex);
+  return g_replica_count;
+}
+
+LocalClient* GetOrCreateLocalClient() {
+  tensorflow::mutex_lock lock(g_local_client_mutex);
+  if (g_local_client != nullptr) {
+    return g_local_client;
+  }
+  LocalClientOptions options;
+  options.set_number_of_replicas(g_replica_count);
+  g_local_client = ClientLibrary::GetOrCreateLocalClient(options).ValueOrDie();
+  CHECK(g_local_client != nullptr);
+  return g_local_client;
+}
+
+Status TransferToInfeedLocal(const Literal& literal) {
+  VLOG(1) << "Infeeding literal without replica number; shape: "
+          << literal.shape();
+  LocalClient* client = GetOrCreateLocalClient();
+  return client->TransferToInfeedLocal(literal, /*device_ordinal=*/0);
+}
+
+Status TransferToInfeedLocalReplica(const Literal& literal,
+                                    int replica_number) {
+  VLOG(1) << "Infeeding shape " << literal.shape()
+          << " to replica number: " << replica_number;
+  LocalClient* client = GetOrCreateLocalClient();
+  TF_ASSIGN_OR_RETURN(int device_ordinal,
+                      client->ReplicaNumberToDeviceOrdinal(replica_number));
+  return client->TransferToInfeedLocal(literal, device_ordinal);
+}
+
+StatusOr<std::unique_ptr<Literal>> TransferFromOutfeedLocalReplica(
+    const Shape& shape, int replica_number) {
+  VLOG(1) << "Outfeeding literal from replica number: " << replica_number
+          << " shape: " << shape;
+  LocalClient* client = GetOrCreateLocalClient();
+  TF_ASSIGN_OR_RETURN(int device_ordinal,
+                      client->ReplicaNumberToDeviceOrdinal(replica_number));
+  return client->TransferFromOutfeedLocal(shape, device_ordinal);
+}
+
+LocalShapedBuffer::LocalShapedBuffer(
+    std::unique_ptr<ScopedShapedBuffer> shaped_buffer)
+    : shaped_buffer_(std::move(shaped_buffer)) {}
+
+const std::unique_ptr<ScopedShapedBuffer>& LocalShapedBuffer::shaped_buffer()
+    const {
+  return shaped_buffer_;
+}
+
+/* static */
+LocalShapedBuffer* LocalShapedBuffer::FromLiteral(const Literal& argument) {
+  LocalClient* client = GetOrCreateLocalClient();
+  std::unique_ptr<ScopedShapedBuffer> buf =
+      client
+          ->LiteralToShapedBuffer(argument,
+                                  /*device_ordinal=*/0,
+                                  client->backend().memory_allocator())
+          .ConsumeValueOrDie();
+  return new LocalShapedBuffer(std::move(buf));
+}
+
+std::unique_ptr<Literal> LocalShapedBuffer::ToLiteral() const {
+  LocalClient* client = GetOrCreateLocalClient();
+  return client->ShapedBufferToLiteral(*shaped_buffer()).ConsumeValueOrDie();
+}
+
+CompiledLocalComputation::CompiledLocalComputation(
+    std::unique_ptr<LocalExecutable> executable)
+    : executable_(std::move(executable)) {}
+
+StatusOr<std::unique_ptr<Literal>> CompiledLocalComputation::Execute(
+    const std::vector<Literal>& arguments) {
+  LocalClient* client = GetOrCreateLocalClient();
+
+  VLOG(1) << "Execution requested with " << GetReplicaCount() << " replicas.";
+
+  // Each replica populates a StatusOr result, but only replica zero actually
+  // retrieves its literal value.
+  std::vector<StatusOr<std::unique_ptr<Literal>>> results(GetReplicaCount());
+  {
+    tensorflow::thread::ThreadPool pool(tensorflow::Env::Default(), "xlarun",
+                                        GetReplicaCount());
+
+    for (int replica = 0; replica < GetReplicaCount(); ++replica) {
+      pool.Schedule([this, client, replica, &arguments, &results] {
+        StatusOr<int> device_ordinal_status =
+            client->ReplicaNumberToDeviceOrdinal(replica);
+        if (!device_ordinal_status.ok()) {
+          results[replica] = device_ordinal_status.status();
+          return;
+        }
+        const int device_ordinal = device_ordinal_status.ValueOrDie();
+        VLOG(3) << "Replica " << replica
+                << " mapped to device ordinal for execution: "
+                << device_ordinal;
+        // Transfer arguments in
+        std::vector<std::unique_ptr<ScopedShapedBuffer>> scoped_buffers;
+        scoped_buffers.reserve(arguments.size());
+        for (const Literal& argument : arguments) {
+          StatusOr<std::unique_ptr<ScopedShapedBuffer>> pushed =
+              client->LiteralToShapedBuffer(
+                  argument, device_ordinal,
+                  client->backend().memory_allocator());
+          if (!pushed.ok()) {
+            results[replica] = pushed.status();
+            return;
+          }
+          scoped_buffers.push_back(std::move(pushed).ValueOrDie());
+        }
+
+        // Execute
+        std::vector<const ShapedBuffer*> argument_buffers;
+        argument_buffers.reserve(scoped_buffers.size());
+        for (auto& buffer : scoped_buffers) {
+          argument_buffers.push_back(buffer.get());
+        }
+
+        DeviceAssignment device_assignment =
+            client->backend()
+                .computation_placer()
+                ->AssignDevices(GetReplicaCount(), /*computation_count=*/1)
+                .ConsumeValueOrDie();
+
+        ExecutableRunOptions options;
+        options.set_device_ordinal(device_ordinal);
+        options.set_allocator(client->backend().memory_allocator());
+        options.set_inter_op_thread_pool(
+            client->backend().inter_op_thread_pool());
+        options.set_intra_op_thread_pool(
+            client->backend().eigen_intra_op_thread_pool_device());
+        options.set_device_assignment(&device_assignment);
+        StatusOr<std::unique_ptr<ScopedShapedBuffer>> result_buffer_status =
+            executable_->Run(argument_buffers, options);
+        if (!result_buffer_status.ok()) {
+          results[replica] = result_buffer_status.status();
+          return;
+        }
+
+        // Transfer result out
+        results[replica] =
+            client->ShapedBufferToLiteral(*result_buffer_status.ValueOrDie());
+      });
+    }
+  }
+
+  for (int replica = 0; replica < GetReplicaCount(); ++replica) {
+    const auto& statusor = results[replica];
+    if (!statusor.ok()) {
+      return InternalError(
+          "Failed running replica %d (other replicas may have failed as well): "
+          "%s.",
+          replica, statusor.status().ToString().c_str());
+    }
+  }
+
+  return std::move(results[0]);
+}
+
+LocalShapedBuffer* CompiledLocalComputation::ExecuteWithShapedBuffers(
+    tensorflow::gtl::ArraySlice<LocalShapedBuffer*> argument_handles) {
+  LocalClient* client = GetOrCreateLocalClient();
+
+  std::vector<const ShapedBuffer*> argument_buffers;
+  argument_buffers.reserve(argument_handles.size());
+  for (auto& handle : argument_handles) {
+    argument_buffers.push_back(handle->shaped_buffer().get());
+  }
+
+  // Execute
+  ExecutableRunOptions options;
+  options.set_allocator(client->backend().memory_allocator());
+  options.set_inter_op_thread_pool(client->backend().inter_op_thread_pool());
+  options.set_intra_op_thread_pool(
+      client->backend().eigen_intra_op_thread_pool_device());
+  std::unique_ptr<ScopedShapedBuffer> result_buffer =
+      executable_->Run(argument_buffers, options).ConsumeValueOrDie();
+
+  return new LocalShapedBuffer(std::move(result_buffer));
+}
+
+LocalComputation::LocalComputation(Computation computation)
+    : computation_(std::move(computation)) {}
+
+StatusOr<CompiledLocalComputation*> LocalComputation::Compile(
+    const std::vector<Shape>& argument_shapes) {
+  std::vector<const Shape*> argument_shape_pointers;
+  argument_shape_pointers.reserve(argument_shapes.size());
+  for (auto& argument_shape : argument_shapes) {
+    argument_shape_pointers.push_back(&argument_shape);
+  }
+
+  LocalClient* client = GetOrCreateLocalClient();
+  ExecutableBuildOptions options;
+  TF_ASSIGN_OR_RETURN(
+      auto local_executable,
+      client->Compile(computation_, argument_shape_pointers, options));
+  return new CompiledLocalComputation(std::move(local_executable));
+}
+
+const Computation& LocalComputation::computation() const {
+  return computation_;
+}
+
+LocalComputationBuilder::LocalComputationBuilder(const string& computation_name)
+    : builder_(GetOrCreateLocalClient(), computation_name) {}
+
+void LocalComputationBuilder::SetOpMetadata(const OpMetadata& metadata) {
+  builder_.SetOpMetadata(metadata);
+}
+
+void LocalComputationBuilder::ClearOpMetadata() { builder_.ClearOpMetadata(); }
+
+StatusOr<LocalComputation*> LocalComputationBuilder::Build() {
+  TF_ASSIGN_OR_RETURN(Computation computation, builder_.Build());
+  return new LocalComputation(std::move(computation));
+}
+
+ComputationDataHandle LocalComputationBuilder::Parameter(int64 parameter_number,
+                                                         const Shape& shape,
+                                                         const string& name) {
+  return builder_.Parameter(parameter_number, shape, name);
+}
+
+std::unique_ptr<Shape> LocalComputationBuilder::GetShape(
+    const ComputationDataHandle& operand) {
+  return builder_.GetShape(operand).ConsumeValueOrDie();
+}
+
+ComputationDataHandle LocalComputationBuilder::Infeed(const Shape& shape) {
+  return builder_.Infeed(shape);
+}
+
+void LocalComputationBuilder::Outfeed(const ComputationDataHandle& operand,
+                                      const Shape& shape,
+                                      const string& outfeed_config) {
+  builder_.Outfeed(operand, shape, outfeed_config);
+}
+
+ComputationDataHandle LocalComputationBuilder::ConstantLiteral(
+    const Literal& literal) {
+  return builder_.ConstantLiteral(literal);
+}
+
+ComputationDataHandle LocalComputationBuilder::Broadcast(
+    const ComputationDataHandle& operand,
+    tensorflow::gtl::ArraySlice<int64> broadcast_sizes) {
+  return builder_.Broadcast(operand, broadcast_sizes);
+}
+
+ComputationDataHandle LocalComputationBuilder::Pad(
+    const ComputationDataHandle& operand,
+    const ComputationDataHandle& padding_value,
+    const PaddingConfig& padding_config) {
+  return builder_.Pad(operand, padding_value, padding_config);
+}
+
+ComputationDataHandle LocalComputationBuilder::Reshape(
+    const ComputationDataHandle& operand,
+    tensorflow::gtl::ArraySlice<int64> dimensions,
+    tensorflow::gtl::ArraySlice<int64> new_sizes) {
+  return builder_.Reshape(operand, dimensions, new_sizes);
+}
+
+ComputationDataHandle LocalComputationBuilder::Collapse(
+    const ComputationDataHandle& operand,
+    tensorflow::gtl::ArraySlice<int64> dimensions) {
+  return builder_.Collapse(operand, dimensions);
+}
+
+ComputationDataHandle LocalComputationBuilder::CrossReplicaSum(
+    const ComputationDataHandle& operand) {
+  return builder_.CrossReplicaSum(operand);
+}
+
+ComputationDataHandle LocalComputationBuilder::Slice(
+    const ComputationDataHandle& operand,
+    tensorflow::gtl::ArraySlice<int64> start_indices,
+    tensorflow::gtl::ArraySlice<int64> limit_indices,
+    tensorflow::gtl::ArraySlice<int64> strides) {
+  return builder_.Slice(operand, start_indices, limit_indices, strides);
+}
+
+ComputationDataHandle LocalComputationBuilder::DynamicSlice(
+    const ComputationDataHandle& operand,
+    const ComputationDataHandle& start_indices,
+    tensorflow::gtl::ArraySlice<int64> slice_sizes) {
+  return builder_.DynamicSlice(operand, start_indices, slice_sizes);
+}
+
+ComputationDataHandle LocalComputationBuilder::DynamicUpdateSlice(
+    const ComputationDataHandle& operand, const ComputationDataHandle& update,
+    const ComputationDataHandle& start_indices) {
+  return builder_.DynamicUpdateSlice(operand, update, start_indices);
+}
+
+ComputationDataHandle LocalComputationBuilder::ConcatInDim(
+    tensorflow::gtl::ArraySlice<ComputationDataHandle> operands,
+    int64 dimension) {
+  return builder_.ConcatInDim(operands, dimension);
+}
+
+ComputationDataHandle
+LocalComputationBuilder::SelectAndScatterWithGeneralPadding(
+    const ComputationDataHandle& operand, const LocalComputation& select,
+    tensorflow::gtl::ArraySlice<int64> window_dimensions,
+    tensorflow::gtl::ArraySlice<int64> window_strides,
+    tensorflow::gtl::ArraySlice<std::pair<int64, int64>> padding,
+    const ComputationDataHandle& source,
+    const ComputationDataHandle& init_value, const LocalComputation& scatter) {
+  return builder_.SelectAndScatterWithGeneralPadding(
+      operand, select.computation(), window_dimensions, window_strides, padding,
+      source, init_value, scatter.computation());
+}
+
+ComputationDataHandle LocalComputationBuilder::Select(
+    const ComputationDataHandle& pred, const ComputationDataHandle& on_true,
+    const ComputationDataHandle& on_false) {
+  return builder_.Select(pred, on_true, on_false);
+}
+
+ComputationDataHandle LocalComputationBuilder::Tuple(
+    tensorflow::gtl::ArraySlice<ComputationDataHandle> elements) {
+  return builder_.Tuple(elements);
+}
+
+ComputationDataHandle LocalComputationBuilder::GetTupleElement(
+    const ComputationDataHandle& tuple_data, int64 index) {
+  return builder_.GetTupleElement(tuple_data, index);
+}
+
+ComputationDataHandle LocalComputationBuilder::Dot(
+    const ComputationDataHandle& lhs, const ComputationDataHandle& rhs) {
+  return builder_.Dot(lhs, rhs);
+}
+
+ComputationDataHandle LocalComputationBuilder::ConvGeneralDilated(
+    const ComputationDataHandle& lhs, const ComputationDataHandle& rhs,
+    tensorflow::gtl::ArraySlice<int64> window_strides,
+    tensorflow::gtl::ArraySlice<std::pair<int64, int64>> padding,
+    tensorflow::gtl::ArraySlice<int64> lhs_dilation,
+    tensorflow::gtl::ArraySlice<int64> rhs_dilation,
+    const ConvolutionDimensionNumbers& dimension_numbers) {
+  return builder_.ConvGeneralDilated(lhs, rhs, window_strides, padding,
+                                     lhs_dilation, rhs_dilation,
+                                     dimension_numbers);
+}
+
+ComputationDataHandle LocalComputationBuilder::ConvertElementType(
+    const ComputationDataHandle& operand, PrimitiveType new_element_type) {
+  return builder_.ConvertElementType(operand, new_element_type);
+}
+
+ComputationDataHandle LocalComputationBuilder::Call(
+    const LocalComputation& local_computation,
+    tensorflow::gtl::ArraySlice<ComputationDataHandle> operands) {
+  return builder_.Call(local_computation.computation(), operands);
+}
+
+ComputationDataHandle LocalComputationBuilder::Transpose(
+    const ComputationDataHandle& operand,
+    tensorflow::gtl::ArraySlice<int64> permutation) {
+  return builder_.Transpose(operand, permutation);
+}
+
+ComputationDataHandle LocalComputationBuilder::Rev(
+    const ComputationDataHandle& operand,
+    tensorflow::gtl::ArraySlice<int64> dimensions) {
+  return builder_.Rev(operand, dimensions);
+}
+
+ComputationDataHandle LocalComputationBuilder::Map(
+    tensorflow::gtl::ArraySlice<ComputationDataHandle> operands,
+    const LocalComputation& local_computation,
+    tensorflow::gtl::ArraySlice<int64> dimensions,
+    tensorflow::gtl::ArraySlice<ComputationDataHandle> static_operands) {
+  return builder_.Map(operands, local_computation.computation(), dimensions,
+                      static_operands);
+}
+
+ComputationDataHandle LocalComputationBuilder::Reduce(
+    const ComputationDataHandle& operand,
+    const ComputationDataHandle& init_value,
+    const LocalComputation& local_computation,
+    tensorflow::gtl::ArraySlice<int64> dimensions_to_reduce) {
+  return builder_.Reduce(operand, init_value, local_computation.computation(),
+                         dimensions_to_reduce);
+}
+
+ComputationDataHandle LocalComputationBuilder::ReduceWindowWithGeneralPadding(
+    const ComputationDataHandle& operand,
+    const ComputationDataHandle& init_value,
+    const LocalComputation& local_computation,
+    tensorflow::gtl::ArraySlice<int64> window_dimensions,
+    tensorflow::gtl::ArraySlice<int64> window_strides,
+    tensorflow::gtl::ArraySlice<std::pair<int64, int64>> padding) {
+  return builder_.ReduceWindowWithGeneralPadding(
+      operand, init_value, local_computation.computation(), window_dimensions,
+      window_strides, padding);
+}
+
+ComputationDataHandle LocalComputationBuilder::RngNormal(
+    const ComputationDataHandle& mu, const ComputationDataHandle& sigma,
+    const Shape& shape) {
+  return builder_.RngNormal(mu, sigma, shape);
+}
+
+ComputationDataHandle LocalComputationBuilder::RngUniform(
+    const ComputationDataHandle& a, const ComputationDataHandle& b,
+    const Shape& shape) {
+  return builder_.RngUniform(a, b, shape);
+}
+
+ComputationDataHandle LocalComputationBuilder::While(
+    const LocalComputation& condition, const LocalComputation& body,
+    const ComputationDataHandle& init) {
+  return builder_.While(condition.computation(), body.computation(), init);
+}
+
+#define _FORWARD(method_name, return_sig, args_sig, args)    \
+  return_sig LocalComputationBuilder::method_name args_sig { \
+    return builder_.method_name args;                        \
+  }
+
+#define _FORWARD_UNOP(method_name)             \
+  _FORWARD(method_name, ComputationDataHandle, \
+           (const ComputationDataHandle& operand), (operand))
+
+#define _FORWARD_BINOP(method_name)                                        \
+  _FORWARD(                                                                \
+      method_name, ComputationDataHandle,                                  \
+      (const ComputationDataHandle& lhs, const ComputationDataHandle& rhs, \
+       tensorflow::gtl::ArraySlice<int64> broadcast_dimensions),           \
+      (lhs, rhs, broadcast_dimensions))
+
+_FORWARD_BINOP(Eq)
+_FORWARD_BINOP(Ne)
+_FORWARD_BINOP(Ge)
+_FORWARD_BINOP(Gt)
+_FORWARD_BINOP(Lt)
+_FORWARD_BINOP(Le)
+_FORWARD_BINOP(Add)
+_FORWARD_BINOP(Sub)
+_FORWARD_BINOP(Mul)
+_FORWARD_BINOP(Div)
+_FORWARD_BINOP(Rem)
+_FORWARD_BINOP(Max)
+_FORWARD_BINOP(Min)
+_FORWARD_BINOP(And)
+_FORWARD_BINOP(Or)
+_FORWARD_UNOP(Not)
+_FORWARD_UNOP(Abs)
+_FORWARD_UNOP(Exp)
+_FORWARD_UNOP(Floor)
+_FORWARD_UNOP(Ceil)
+_FORWARD_UNOP(Log)
+_FORWARD_UNOP(Sign)
+_FORWARD_UNOP(Cos)
+_FORWARD_UNOP(Sin)
+_FORWARD_UNOP(Tanh)
+_FORWARD_UNOP(SqrtF32)
+_FORWARD_UNOP(SquareF32)
+_FORWARD_BINOP(Pow)
+_FORWARD_UNOP(IsFinite)
+_FORWARD_UNOP(ReciprocalF32)
+_FORWARD_UNOP(Neg)
+_FORWARD_UNOP(Sort)
+
+#undef _FORWARD
+#undef _FORWARD_UNOP
+#undef _FORWARD_BINOP
+
+void DeleteLocalShapedBuffer(LocalShapedBuffer* local_shaped_buffer) {
+  delete local_shaped_buffer;
+}
+
+void DeleteCompiledLocalComputation(CompiledLocalComputation* computation) {
+  delete computation;
+}
+
+void DeleteLocalComputation(LocalComputation* computation) {
+  delete computation;
+}
+
+}  // namespace swig
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/python/local_computation_builder.h b/tensorflow/compiler/xla/python/local_computation_builder.h
new file mode 100644
index 0000000000000000000000000000000000000000..e5503cd52fa60eff30eea38c83aafe0f0ff1efc8
--- /dev/null
+++ b/tensorflow/compiler/xla/python/local_computation_builder.h
@@ -0,0 +1,305 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_LOCAL_COMPUTATION_BUILDER_H_
+#define TENSORFLOW_COMPILER_XLA_PYTHON_LOCAL_COMPUTATION_BUILDER_H_
+
+#include "tensorflow/compiler/xla/client/client_library.h"
+#include "tensorflow/compiler/xla/client/computation_builder.h"
+#include "tensorflow/compiler/xla/client/local_client.h"
+#include "tensorflow/compiler/xla/service/shaped_buffer.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+
+namespace xla {
+
+namespace swig {
+
+// Initializes the number of replicas that XLA will be initialized with (when
+// first obtaining a handle to the local XLA service). If this is called after
+// the handle to the local XLA service has been established, then an error is
+// returned.
+Status InitializeReplicaCount(int replica_count);
+
+// Returns the replica count that is currently set, regardless of whether the
+// local XLA service has been instantiated yet or not.
+int GetReplicaCount();
+
+// Wraps the local client's infeed-transfer function.
+//
+// The default device ordinal (0) is used.
+Status TransferToInfeedLocal(const Literal& literal);
+
+// Transfers the given literal to the infeed of the given replica.
+//
+// The replica number is resolved to an appropriate device ordinal.
+Status TransferToInfeedLocalReplica(const Literal& literal, int replica_number);
+
+// Transfers a literal of the given shape from the outfeed of the given replica.
+//
+// The replica number is resolved to an appropriate device ordinal.
+StatusOr<std::unique_ptr<Literal> > TransferFromOutfeedLocalReplica(
+    const Shape& shape, int replica_number);
+
+// Wraps a ScopedShapedBuffer produced by copying a literal "to
+// device," i.e. copying a literal to a scoped buffer via the local
+// client.
+class LocalShapedBuffer {
+ public:
+  static LocalShapedBuffer* FromLiteral(const Literal& argument);
+  LocalShapedBuffer(std::unique_ptr<ScopedShapedBuffer> shaped_buffer);
+  const std::unique_ptr<ScopedShapedBuffer>& shaped_buffer() const;
+  std::unique_ptr<Literal> ToLiteral() const;
+
+ private:
+  std::unique_ptr<ScopedShapedBuffer> shaped_buffer_;
+};
+
+// Wraps a LocalExecutable produced by compiling a
+// LocalComputation. The Execute method forwards to that of the
+// underlying LocalExecutable, and additionally handles tranferring
+// arguments and return values in and back out of the client library's
+// local client. This class is intended to be made available to Python
+// via SWIG.
+class CompiledLocalComputation {
+ public:
+  CompiledLocalComputation(std::unique_ptr<LocalExecutable> executable);
+  StatusOr<std::unique_ptr<Literal> > Execute(
+      const std::vector<Literal>& arguments);
+  LocalShapedBuffer* ExecuteWithShapedBuffers(
+      tensorflow::gtl::ArraySlice<LocalShapedBuffer*> argument_handles);
+
+ private:
+  std::unique_ptr<LocalExecutable> executable_;
+};
+
+// Wraps a Computation produced by a LocalComputationBuilder. The
+// Compile method compiles the computation to a (local) executable via
+// the client library's local client. This class is intended to be
+// made available to Python via SWIG.
+class LocalComputation {
+ public:
+  LocalComputation(Computation computation);
+  StatusOr<CompiledLocalComputation*> Compile(
+      const std::vector<Shape>& argument_shapes);
+  const Computation& computation() const;
+
+ private:
+  Computation computation_;
+};
+
+// Wraps the ComputationBuilder API in order to:
+// - Support consumption by SWIG in order to be made available to
+//   Python.
+// - Set up the underlying builder to use the client library's
+//   LocalClient.
+// - Wrap Computations in LocalComputations for Python access.
+// - Correspondingly unwrap incoming LocalComputations.
+class LocalComputationBuilder {
+ public:
+  LocalComputationBuilder(const string& computation_name);
+
+  void SetOpMetadata(const OpMetadata& metadata);
+  void ClearOpMetadata();
+
+  // Returns an owned LocalComputation to the caller on success.
+  StatusOr<LocalComputation*> Build();
+
+  ComputationDataHandle Parameter(int64 parameter_number, const Shape& shape,
+                                  const string& name);
+
+  std::unique_ptr<Shape> GetShape(const ComputationDataHandle& operand);
+
+  ComputationDataHandle Infeed(const Shape& shape);
+
+  void Outfeed(const ComputationDataHandle& operand, const Shape& shape,
+               const string& outfeed_config);
+
+  ComputationDataHandle ConstantLiteral(const Literal& literal);
+
+  ComputationDataHandle Broadcast(
+      const ComputationDataHandle& operand,
+      tensorflow::gtl::ArraySlice<int64> broadcast_sizes);
+
+  ComputationDataHandle Pad(const ComputationDataHandle& operand,
+                            const ComputationDataHandle& padding_value,
+                            const PaddingConfig& padding_config);
+
+  ComputationDataHandle Reshape(const ComputationDataHandle& operand,
+                                tensorflow::gtl::ArraySlice<int64> dimensions,
+                                tensorflow::gtl::ArraySlice<int64> new_sizes);
+
+  ComputationDataHandle Collapse(const ComputationDataHandle& operand,
+                                 tensorflow::gtl::ArraySlice<int64> dimensions);
+
+  ComputationDataHandle CrossReplicaSum(const ComputationDataHandle& operand);
+
+  ComputationDataHandle Slice(const ComputationDataHandle& operand,
+                              tensorflow::gtl::ArraySlice<int64> start_indices,
+                              tensorflow::gtl::ArraySlice<int64> limit_indices,
+                              tensorflow::gtl::ArraySlice<int64> strides);
+
+  ComputationDataHandle DynamicSlice(
+      const ComputationDataHandle& operand,
+      const ComputationDataHandle& start_indices,
+      tensorflow::gtl::ArraySlice<int64> slice_sizes);
+
+  ComputationDataHandle DynamicUpdateSlice(
+      const ComputationDataHandle& operand, const ComputationDataHandle& update,
+      const ComputationDataHandle& start_indices);
+
+  ComputationDataHandle ConcatInDim(
+      tensorflow::gtl::ArraySlice<ComputationDataHandle> operands,
+      int64 dimension);
+
+  ComputationDataHandle SelectAndScatterWithGeneralPadding(
+      const ComputationDataHandle& operand, const LocalComputation& select,
+      tensorflow::gtl::ArraySlice<int64> window_dimensions,
+      tensorflow::gtl::ArraySlice<int64> window_strides,
+      tensorflow::gtl::ArraySlice<std::pair<int64, int64> > padding,
+      const ComputationDataHandle& source,
+      const ComputationDataHandle& init_value, const LocalComputation& scatter);
+
+  ComputationDataHandle Select(const ComputationDataHandle& pred,
+                               const ComputationDataHandle& on_true,
+                               const ComputationDataHandle& on_false);
+
+  ComputationDataHandle Tuple(
+      tensorflow::gtl::ArraySlice<ComputationDataHandle> elements);
+
+  ComputationDataHandle GetTupleElement(const ComputationDataHandle& tuple_data,
+                                        int64 index);
+
+  ComputationDataHandle Dot(const ComputationDataHandle& lhs,
+                            const ComputationDataHandle& rhs);
+
+  ComputationDataHandle ConvGeneralDilated(
+      const ComputationDataHandle& lhs, const ComputationDataHandle& rhs,
+      tensorflow::gtl::ArraySlice<int64> window_strides,
+      tensorflow::gtl::ArraySlice<std::pair<int64, int64> > padding,
+      tensorflow::gtl::ArraySlice<int64> lhs_dilation,
+      tensorflow::gtl::ArraySlice<int64> rhs_dilation,
+      const ConvolutionDimensionNumbers& dimension_numbers);
+
+  ComputationDataHandle ConvertElementType(const ComputationDataHandle& operand,
+                                           PrimitiveType new_element_type);
+
+  ComputationDataHandle Call(
+      const LocalComputation& local_computation,
+      tensorflow::gtl::ArraySlice<ComputationDataHandle> operands);
+
+  ComputationDataHandle Transpose(
+      const ComputationDataHandle& operand,
+      tensorflow::gtl::ArraySlice<int64> permutation);
+
+  ComputationDataHandle Rev(const ComputationDataHandle& operand,
+                            tensorflow::gtl::ArraySlice<int64> dimensions);
+
+  ComputationDataHandle Map(
+      tensorflow::gtl::ArraySlice<ComputationDataHandle> operands,
+      const LocalComputation& local_computation,
+      tensorflow::gtl::ArraySlice<int64> dimensions,
+      tensorflow::gtl::ArraySlice<ComputationDataHandle> static_operands);
+
+  ComputationDataHandle Reduce(
+      const ComputationDataHandle& operand,
+      const ComputationDataHandle& init_value,
+      const LocalComputation& local_computation,
+      tensorflow::gtl::ArraySlice<int64> dimensions_to_reduce);
+
+  ComputationDataHandle ReduceWindowWithGeneralPadding(
+      const ComputationDataHandle& operand,
+      const ComputationDataHandle& init_value,
+      const LocalComputation& local_computation,
+      tensorflow::gtl::ArraySlice<int64> window_dimensions,
+      tensorflow::gtl::ArraySlice<int64> window_strides,
+      tensorflow::gtl::ArraySlice<std::pair<int64, int64> > padding);
+
+  ComputationDataHandle RngNormal(const ComputationDataHandle& mu,
+                                  const ComputationDataHandle& sigma,
+                                  const Shape& shape);
+
+  ComputationDataHandle RngUniform(const ComputationDataHandle& a,
+                                   const ComputationDataHandle& b,
+                                   const Shape& shape);
+
+  ComputationDataHandle While(const LocalComputation& condition,
+                              const LocalComputation& body,
+                              const ComputationDataHandle& init);
+
+#define _FORWARD(method_name, return_sig, args_sig) \
+  return_sig method_name args_sig;
+
+#define _FORWARD_UNOP(method_name)             \
+  _FORWARD(method_name, ComputationDataHandle, \
+           (const ComputationDataHandle& operand))
+
+#define _FORWARD_BINOP(method_name)                                        \
+  _FORWARD(                                                                \
+      method_name, ComputationDataHandle,                                  \
+      (const ComputationDataHandle& lhs, const ComputationDataHandle& rhs, \
+       tensorflow::gtl::ArraySlice<int64> broadcast_dimensions))
+
+  _FORWARD_BINOP(Eq)
+  _FORWARD_BINOP(Ne)
+  _FORWARD_BINOP(Ge)
+  _FORWARD_BINOP(Gt)
+  _FORWARD_BINOP(Lt)
+  _FORWARD_BINOP(Le)
+  _FORWARD_BINOP(Add)
+  _FORWARD_BINOP(Sub)
+  _FORWARD_BINOP(Mul)
+  _FORWARD_BINOP(Div)
+  _FORWARD_BINOP(Rem)
+  _FORWARD_BINOP(Max)
+  _FORWARD_BINOP(Min)
+  _FORWARD_BINOP(And)
+  _FORWARD_BINOP(Or)
+  _FORWARD_UNOP(Not)
+  _FORWARD_UNOP(Abs)
+  _FORWARD_UNOP(Exp)
+  _FORWARD_UNOP(Floor)
+  _FORWARD_UNOP(Ceil)
+  _FORWARD_UNOP(Log)
+  _FORWARD_UNOP(Sign)
+  _FORWARD_UNOP(Cos)
+  _FORWARD_UNOP(Sin)
+  _FORWARD_UNOP(Tanh)
+  _FORWARD_UNOP(SqrtF32)
+  _FORWARD_UNOP(SquareF32)
+  _FORWARD_BINOP(Pow)
+  _FORWARD_UNOP(IsFinite)
+  _FORWARD_UNOP(ReciprocalF32)
+  _FORWARD_UNOP(Neg)
+  _FORWARD_UNOP(Sort)
+
+#undef _FORWARD
+#undef _FORWARD_UNOP
+#undef _FORWARD_BINOP
+
+ private:
+  ComputationBuilder builder_;
+};
+
+// Functions for freeing resources from the Python side.
+void DeleteLocalShapedBuffer(LocalShapedBuffer* local_shaped_buffer);
+void DeleteCompiledLocalComputation(CompiledLocalComputation* computation);
+void DeleteLocalComputation(LocalComputation* computation);
+
+}  // namespace swig
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_PYTHON_LOCAL_COMPUTATION_BUILDER_H_
diff --git a/tensorflow/compiler/xla/python/local_computation_builder.i b/tensorflow/compiler/xla/python/local_computation_builder.i
new file mode 100644
index 0000000000000000000000000000000000000000..31789259609714e7d20247eec072e05a181715e6
--- /dev/null
+++ b/tensorflow/compiler/xla/python/local_computation_builder.i
@@ -0,0 +1,719 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// SWIG typemaps and declarations for building, compiling, and
+// executing XLA computations, wrapping most of what is declared in
+// local_computation_builder.h.
+//
+// The typemaps below implement/assert the following correspondences
+// (with elaborations below):
+//
+//    C++                                  Python
+// -------------------------------------+---------------------------------------
+//  ComputationDataHandle              <-> int
+//  ArraySlice<int64>                  <-  sequence of int
+//  ArraySlice<ComputationDataHandle>  <-  sequence of int
+//  Literal                            <-> (nested tuple of) numpy ndarray
+//  std::vector<Literal>               <-  sequence of (nested tuple of) ndarray
+//  Shape                              <-> pair holding (dtype, dimensions)
+//  std::vector<Shape>                 <-  sequence of shape information pairs
+//  PrimitiveType                      <-  int
+//  ArraySlice<pair<int64, in64>>      <-  sequence of int pairs
+//  PaddingConfig proto                <-  corresponding Python proto
+//  ConvolutionDimensionNumbers proto  <-  corresponding Python proto
+//
+// Arrows indicate whether a conversion only ever occurs in one
+// direction, or whether it is maintained bidirectionally.
+//
+// The Python objects corresponding to C++ Literals have the type:
+//
+//   T = ndarray | (T, ...)
+//
+// where a terminal numpy ndarray translates to a Literal with a
+// non-tuple Shape, an XLA primitive element type corresponding to the
+// ndarray's dtype. Meanwhile, a non-terminal "tuple of T" translates
+// to a tuple-shaped Literal whose tuple components are translated
+// recursively. For example, if x is a numpy ndarray in Python, with
+// shape (2, 3) and dtype of dtype('float32'), then x translates to a
+// Literal with rank 2, dimension 2 and 3, and XLA primitive type
+// F32. Meanwhile,
+//
+//   (x, (x, x), (x,)),
+//
+// translates to a tuple-shaped XLA Literal, whose component subshapes
+// are a 2x3 F32-shaped literal followed by two tuple-shaped literals.
+//
+// The Python objects corresponding to C++ Shapes have the type:
+//
+//   T            = (dtype, S)
+//   S            = DIMENSIONS | TUPLE_SHAPES
+//   DIMENSIONS   = (int, ...)
+//   TUPLE_SHAPES = (T, ...)
+//
+// In the pair described by the T rule, the terminal dtype determines
+// whether S expands as DIMENSIONS or TUPLE_SHAPES. Namely if it is
+// dtype('O'), numpy's object dtype, the structure represents a tuple
+// shape and the expansion of the non-terminal S is
+// TUPLE_SHAPES. Otherwise, dtype describes a primitive element type
+// and S expands into DIMENSIONS giving dimension sizes. For example:
+//
+//   (dtype('float32'), (3, 5, 7))
+//
+// describes a 3x5x7 array of F32s, and
+//
+//   (dtype('O'), ((dtype('float32'), (2, 3)),
+//                 (dtype('float64'), (4, 5))))
+//
+// describes a tuple shape with two subshapes: the first a 2x3 F32,
+// and the other a 4x5 F64.
+//
+// The Python int corresponding to a PrimitiveType enum must be valid
+// per xla_data.proto (e.g. xla_data.PRED, xla_data.F32).
+//
+// The SWIG object wrappers generated by this file are not intended
+// for end use, but rather for internal use in the Python XLA client,
+// xla_client.py.
+//
+// One central reason for the Python-side indirection is that the
+// Python-side objects produced by the typemaps in this file are
+// further packaged up by xla_client before being passed on. For
+// instance, xla_client wraps the long produced for a C++
+// ComputationDataHandle in a Python ComputationDataHandle proto,
+// rather than exposing a raw long outside of the client. Similarly,
+// the Python pair produced for a C++ Shape is further wrapped in a
+// Python class (xla_client.Shape) so as not to expose the raw pair
+// externally.
+//
+// Other SWIG object wrappers (e.g. of LocalComputation) are further
+// wrapped by xla_client in order to set up a custom destructor that
+// triggers memory deallocation on the C++ side.
+
+%module(threads="1") local_computation_builder
+
+// Keep the GIL except where explicitly specified.
+%nothread;
+
+%include "tensorflow/python/platform/base.i"
+
+%{
+// Must be included first
+#include "tensorflow/python/lib/core/numpy.h"
+
+#include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/compiler/xla/python/numpy_bridge.h"
+#include "tensorflow/compiler/xla/python/local_computation_builder.h"
+
+using namespace xla;
+using namespace xla::swig;
+
+namespace xla {
+namespace swig {
+
+bool GetIntAttr(PyObject* o, const char* field, int64* result) {
+  PyObject* fo = PyObject_GetAttrString(o, field);
+  if (!fo) {
+    return false;
+  }
+  const int64 value = numpy::PyIntOrPyLongToLong(fo);
+  if (value == -1 && PyErr_Occurred()) {
+    Py_DECREF(fo);
+    return false;
+  }
+  Py_DECREF(fo);
+  *result = value;
+  return true;
+}
+
+}
+}
+%}
+
+// Required to use PyArray_* functions.
+%init %{
+tensorflow::ImportNumpy();
+%}
+
+// ComputationDataHandle
+
+%typemap(in) const ComputationDataHandle& (ComputationDataHandle temp) {
+  const int64 handle = numpy::PyIntOrPyLongToLong($input);
+  if (handle == -1 && PyErr_Occurred()) {
+    return NULL;
+  }
+  temp.set_handle(handle);
+  $1 = &temp;
+}
+
+%typemap(out) ComputationDataHandle {
+  $result = numpy::LongToPyIntOrPyLong($1.handle());
+}
+
+%typemap(out) StatusOr<xla::swig::CompiledLocalComputation*> {
+  if ($1.ok()) {
+    auto* value = $1.ValueOrDie();
+    {
+      auto* $1 = value;
+      $typemap(out, xla::swig::CompiledLocalComputation*)
+    }
+  } else {
+    PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str());
+    return NULL;
+  }
+}
+
+%typemap(out) StatusOr<xla::swig::LocalComputation*> {
+  if ($1.ok()) {
+    auto* value = $1.ValueOrDie();
+    {
+      auto* $1 = value;
+      $typemap(out, xla::swig::LocalComputation*)
+    }
+  } else {
+    PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str());
+    return NULL;
+  }
+}
+
+%typemap(out) Status {
+  if (!$1.ok()) {
+    PyErr_SetString(
+        PyExc_RuntimeError, $1.ToString().c_str());
+    return NULL;
+  }
+  $result = Py_None;
+}
+
+// ArraySlice<int64>
+
+%typemap(in) tensorflow::gtl::ArraySlice<int64>
+    (std::vector<int64> temps) {
+  if (!PySequence_Check($input)) {
+    PyErr_SetString(PyExc_TypeError, "Argument is not a sequence");
+    return NULL;
+  }
+  const int size = PySequence_Size($input);
+  temps.resize(size);
+  for (int i = 0; i < size; ++i) {
+    PyObject* o = PySequence_GetItem($input, i);
+    PyObject* py_int = numpy::PyNumberToPyInt(o);
+    if (!py_int) {
+      PyErr_SetString(
+          PyExc_TypeError,
+          "Argument sequence element cannot be converted to int");
+      Py_DECREF(o);
+      return NULL;
+    }
+    temps[i] = numpy::PyIntOrPyLongToLong(py_int);
+    if (temps[i] == -1 && PyErr_Occurred()) {
+      Py_DECREF(py_int);
+      Py_DECREF(o);
+      return NULL;
+    }
+    Py_DECREF(py_int);
+    Py_DECREF(o);
+  }
+  $1 = temps;
+}
+
+// ComputationDataHandle
+
+%typemap(in) tensorflow::gtl::ArraySlice<ComputationDataHandle>
+    (std::vector<ComputationDataHandle> temps) {
+  if (!PySequence_Check($input)) {
+    PyErr_SetString(PyExc_TypeError, "Argument is not a sequence");
+    return NULL;
+  }
+  const int size = PySequence_Size($input);
+  temps.resize(size);
+  for (int i = 0; i < size; ++i) {
+    PyObject* o = PySequence_GetItem($input, i);
+    PyObject* py_int = numpy::PyNumberToPyInt(o);
+    if (!py_int) {
+      PyErr_SetString(
+          PyExc_TypeError,
+          "Argument sequence element cannot be converted to int");
+      return NULL;
+    }
+    const int64 handle = numpy::PyIntOrPyLongToLong(py_int);
+    if (handle == -1 && PyErr_Occurred()) {
+      Py_DECREF(py_int);
+      Py_DECREF(o);
+      return NULL;
+    }
+    temps[i].set_handle(handle);
+    Py_DECREF(py_int);
+    Py_DECREF(o);
+  }
+  $1 = temps;
+}
+
+// LocalShapedBuffer*
+
+%typemap(in) tensorflow::gtl::ArraySlice<xla::swig::LocalShapedBuffer*>
+    (std::vector<LocalShapedBuffer*> temps) {
+  if (!PySequence_Check($input)) {
+    PyErr_SetString(PyExc_TypeError, "Argument is not a sequence");
+    return NULL;
+  }
+  const int size = PySequence_Size($input);
+  temps.reserve(size);
+  for (int i = 0; i < size; ++i) {
+    PyObject* o = PySequence_GetItem($input, i);
+    LocalShapedBuffer* lsbp;
+    if ((SWIG_ConvertPtr(o, (void**) &lsbp, $descriptor(xla::swig::LocalShapedBuffer*),
+                         SWIG_POINTER_EXCEPTION)) == -1) {
+      return NULL;
+    }
+    temps.push_back(lsbp);
+    Py_DECREF(o);
+  }
+  $1 = temps;
+}
+
+// Literal
+
+%typemap(in) const Literal& (StatusOr< std::unique_ptr<Literal> > literal_status) {
+  literal_status = numpy::XlaLiteralFromPyObject($input);
+  if (!literal_status.ok()) {
+    PyErr_SetString(PyExc_RuntimeError, literal_status.status().ToString().c_str());
+    return NULL;
+  }
+  $1 = literal_status.ValueOrDie().get();
+}
+
+%typemap(out) std::unique_ptr<Literal> {
+  $result = numpy::PyObjectFromXlaLiteral(*$1);
+}
+
+%typemap(out) StatusOr< std::unique_ptr<Literal> > {
+  if (!$1.ok()) {
+    PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str());
+    return NULL;
+  }
+  $result = numpy::PyObjectFromXlaLiteral(*$1.ValueOrDie());
+}
+
+%typemap(in) const std::vector<Literal>& (std::vector<Literal> temps) {
+  if (!PySequence_Check($input)) {
+    PyErr_SetString(PyExc_TypeError, "Argument is not a sequence");
+    return NULL;
+  }
+  const int size = PySequence_Size($input);
+  for (int i = 0; i < size; ++i) {
+    PyObject* o = PySequence_GetItem($input, i);
+    StatusOr< std::unique_ptr<Literal> > literal_status = numpy::XlaLiteralFromPyObject(o);
+    if (!literal_status.ok()) {
+      PyErr_SetString(PyExc_RuntimeError, literal_status.status().ToString().c_str());
+      Py_DECREF(o);
+      return NULL;
+    }
+    temps.push_back(std::move(*literal_status.ConsumeValueOrDie()));
+    Py_DECREF(o);
+  }
+  $1 = &temps;
+}
+
+// OpMetadata
+
+%typemap(in) const OpMetadata& (OpMetadata temp) {
+  StatusOr<OpMetadata> statusor = numpy::OpMetadataFromPyObject($input);
+  if (!statusor.ok()) {
+    PyErr_SetString(PyExc_RuntimeError, statusor.status().ToString().c_str());
+    return NULL;
+  }
+  temp = std::move(statusor).ValueOrDie();
+  $1 = &temp;
+}
+
+// Shape
+
+%typemap(in) const Shape& (Shape temp) {
+  Status shape_status = numpy::CheckPyShapeInfo($input);
+  if (!shape_status.ok()) {
+    PyErr_SetString(PyExc_RuntimeError, shape_status.ToString().c_str());
+    return NULL;
+  }
+  temp = numpy::XlaShapeFromPyShapeInfo($input);
+  $1 = &temp;
+}
+
+%typemap(out) std::unique_ptr<Shape> {
+  $result = numpy::PyShapeInfoFromXlaShape(*$1);
+}
+
+%typemap(in) const std::vector<Shape>& (std::vector<Shape> temps) {
+  if (!PySequence_Check($input)) {
+    PyErr_SetString(PyExc_TypeError, "Argument is not a sequence");
+    return NULL;
+  }
+  const int size = PySequence_Size($input);
+  for (int i = 0; i < size; ++i) {
+    PyObject* o = PySequence_GetItem($input, i);
+    Status shape_status = numpy::CheckPyShapeInfo(o);
+    if (!shape_status.ok()) {
+      PyErr_SetString(PyExc_RuntimeError, shape_status.ToString().c_str());
+      Py_DECREF(o);
+      return NULL;
+    }
+    temps.push_back(numpy::XlaShapeFromPyShapeInfo(o));
+    Py_DECREF(o);
+  }
+  $1 = &temps;
+}
+
+// PrimitiveType
+
+%typemap(in) PrimitiveType {
+  PyObject* py_int = numpy::PyNumberToPyInt($input);
+  if (!py_int) {
+    PyErr_SetString(PyExc_TypeError, "Argument cannot be converted to int");
+    return NULL;
+  }
+  const long value = numpy::PyIntOrPyLongToLong(py_int);
+  if (value == -1 && PyErr_Occurred()) {
+    Py_DECREF(py_int);
+    return NULL;
+  }
+  if (!PrimitiveType_IsValid(value)) {
+    PyErr_SetString(
+        PyExc_TypeError, "Argument not valid for PrimitiveType enum");
+    Py_DECREF(py_int);
+    return NULL;
+  }
+  $1 = static_cast<PrimitiveType>(value);
+}
+
+// ArraySlice<pair<int64, in64>>
+
+%typemap(in) tensorflow::gtl::ArraySlice<std::pair<int64, int64> >
+    (std::vector<std::pair<int64, int64> > temps) {
+  if (!PySequence_Check($input)) {
+    PyErr_SetString(PyExc_TypeError, "Argument is not a sequence");
+    return NULL;
+  }
+  const int size = PySequence_Size($input);
+  temps.reserve(size);
+  for (int i = 0; i < size; ++i) {
+    PyObject* o = PySequence_GetItem($input, i);
+    if (!o) {
+      return NULL;
+    }
+    PyObject* first = PyTuple_GetItem(o, 0);
+    if (!first) {
+      Py_DECREF(o);
+      return NULL;
+    }
+    PyObject* first_pyint = numpy::PyNumberToPyInt(first);
+    if (!first_pyint) {
+      PyErr_SetString(
+          PyExc_TypeError,
+          "First pair item cannot be converted to int");
+      Py_DECREF(o);
+      return NULL;
+    }
+    PyObject* second = PyTuple_GetItem(o, 1);
+    if (!second) {
+      Py_DECREF(o);
+      Py_DECREF(first_pyint);
+      return NULL;
+    }
+    PyObject* second_pyint = numpy::PyNumberToPyInt(second);
+    if (!second_pyint) {
+      PyErr_SetString(
+          PyExc_TypeError,
+          "Second pair item cannot be converted to int");
+      Py_DECREF(o);
+      Py_DECREF(first_pyint);
+      return NULL;
+    }
+    const int64 first_value = numpy::PyIntOrPyLongToLong(first_pyint);
+    if (first_value == -1 && PyErr_Occurred()) {
+      Py_DECREF(o);
+      Py_DECREF(first_pyint);
+      Py_DECREF(second_pyint);
+      return NULL;
+    }
+    const int64 second_value = numpy::PyIntOrPyLongToLong(second_pyint);
+    if (second_value == -1 && PyErr_Occurred()) {
+      Py_DECREF(o);
+      Py_DECREF(first_pyint);
+      Py_DECREF(second_pyint);
+      return NULL;
+    }
+    temps.push_back(std::make_pair(first_value, second_value));
+    Py_DECREF(o);
+  }
+  $1 = temps;
+}
+
+// PaddingConfig
+
+%typemap(in) const PaddingConfig&
+    (PaddingConfig padding_config) {
+  PyObject* dimensions = PyObject_GetAttrString($input, "dimensions");
+  if (!dimensions) {
+    return NULL;
+  }
+
+  int length = PySequence_Size(dimensions);
+  if (length == -1) {
+    Py_DECREF(dimensions);
+    return NULL;
+  }
+
+  for (int i = 0; i < length; ++i) {
+    PyObject* item = PySequence_GetItem(dimensions, i);
+    if (!item) {
+      Py_DECREF(dimensions);
+      return NULL;
+    }
+    int64 edge_padding_low, edge_padding_high, interior_padding;
+    if (!GetIntAttr(item, "edge_padding_low", &edge_padding_low)
+        || !GetIntAttr(item, "edge_padding_high", &edge_padding_high)
+        || !GetIntAttr(item, "interior_padding", &interior_padding)) {
+      Py_DECREF(item);
+      Py_DECREF(dimensions);
+      return NULL;
+    }
+    Py_DECREF(item);
+
+    PaddingConfig::PaddingConfigDimension* dimension =
+        padding_config.add_dimensions();
+    dimension->set_edge_padding_low(edge_padding_low);
+    dimension->set_edge_padding_high(edge_padding_high);
+    dimension->set_interior_padding(interior_padding);
+  }
+  Py_DECREF(dimensions);
+
+  $1 = &padding_config;
+}
+
+// ConvolutionDimensionNumbers
+
+%typemap(in) const ConvolutionDimensionNumbers&
+    (ConvolutionDimensionNumbers dimension_numbers) {
+  int64 value;
+
+  if (!GetIntAttr($input, "input_batch_dimension", &value)) {
+    return NULL;
+  }
+  dimension_numbers.set_input_batch_dimension(value);
+
+  if (!GetIntAttr($input, "input_feature_dimension", &value)) {
+    return NULL;
+  }
+  dimension_numbers.set_input_feature_dimension(value);
+
+  if (!GetIntAttr($input, "output_batch_dimension", &value)) {
+    return NULL;
+  }
+  dimension_numbers.set_output_batch_dimension(value);
+
+  if (!GetIntAttr($input, "output_feature_dimension", &value)) {
+    return NULL;
+  }
+  dimension_numbers.set_output_feature_dimension(value);
+
+  if (!GetIntAttr($input, "kernel_output_feature_dimension", &value)) {
+    return NULL;
+  }
+  dimension_numbers.set_kernel_output_feature_dimension(value);
+
+  if (!GetIntAttr($input, "kernel_input_feature_dimension", &value)) {
+    return NULL;
+  }
+  dimension_numbers.set_kernel_input_feature_dimension(value);
+
+  PyObject* o;
+  int length;
+
+  o = PyObject_GetAttrString($input, "input_spatial_dimensions");
+  if (!o) {
+    return NULL;
+  }
+  length = PySequence_Size(o);
+  if (length == -1) {
+    Py_DECREF(o);
+    return NULL;
+  }
+  for (int i = 0; i < length; ++i) {
+    PyObject* item = PySequence_GetItem(o, i);
+    if (!item) {
+      Py_DECREF(o);
+      return NULL;
+    }
+    const int64 dimension = numpy::PyIntOrPyLongToLong(item);
+    if (dimension == -1 && PyErr_Occurred()) {
+      Py_DECREF(item);
+      Py_DECREF(o);
+      return NULL;
+    }
+    dimension_numbers.add_input_spatial_dimensions(dimension);
+    Py_DECREF(item);
+  }
+  Py_DECREF(o);
+
+  o = PyObject_GetAttrString($input, "kernel_spatial_dimensions");
+  if (!o) {
+    return NULL;
+  }
+  length = PySequence_Size(o);
+  if (length == -1) {
+    Py_DECREF(o);
+    return NULL;
+  }
+  for (int i = 0; i < length; ++i) {
+    PyObject* item = PySequence_GetItem(o, i);
+    if (!item) {
+      Py_DECREF(o);
+      return NULL;
+    }
+    const int64 dimension = numpy::PyIntOrPyLongToLong(item);
+    if (dimension == -1 && PyErr_Occurred()) {
+      Py_DECREF(item);
+      Py_DECREF(o);
+      return NULL;
+    }
+    dimension_numbers.add_kernel_spatial_dimensions(dimension);
+    Py_DECREF(item);
+  }
+  Py_DECREF(o);
+
+  o = PyObject_GetAttrString($input, "output_spatial_dimensions");
+  if (!o) {
+    return NULL;
+  }
+  length = PySequence_Size(o);
+  if (length == -1) {
+    Py_DECREF(o);
+    return NULL;
+  }
+  for (int i = 0; i < length; ++i) {
+    PyObject* item = PySequence_GetItem(o, i);
+    if (!item) {
+      Py_DECREF(o);
+      return NULL;
+    }
+    const int64 dimension = numpy::PyIntOrPyLongToLong(item);
+    if (dimension == -1 && PyErr_Occurred()) {
+      Py_DECREF(item);
+      Py_DECREF(o);
+      return NULL;
+    }
+    dimension_numbers.add_output_spatial_dimensions(dimension);
+    Py_DECREF(item);
+  }
+  Py_DECREF(o);
+
+  $1 = &dimension_numbers;
+}
+
+%ignoreall
+%unignore xla;
+%unignore xla::swig;
+%unignore xla::swig::InitializeReplicaCount;
+%unignore xla::swig::GetReplicaCount;
+%unignore xla::swig::TransferToInfeedLocal;
+%unignore xla::swig::TransferToInfeedLocalReplica;
+%unignore xla::swig::TransferFromOutfeedLocalReplica;
+%unignore xla::swig::LocalShapedBuffer;
+%unignore xla::swig::LocalShapedBuffer::FromLiteral;
+%unignore xla::swig::LocalShapedBuffer::ToLiteral;
+%unignore xla::swig::CompiledLocalComputation;
+%unignore xla::swig::CompiledLocalComputation::Execute;
+%unignore xla::swig::CompiledLocalComputation::ExecuteWithShapedBuffers;
+%unignore xla::swig::LocalComputation;
+%unignore xla::swig::LocalComputation::Compile;
+%unignore xla::swig::LocalComputationBuilder;
+%unignore xla::swig::LocalComputationBuilder::LocalComputationBuilder;
+%unignore xla::swig::LocalComputationBuilder::Build;
+%unignore xla::swig::LocalComputationBuilder::SetOpMetadata;
+%unignore xla::swig::LocalComputationBuilder::ClearOpMetadata;
+%unignore xla::swig::LocalComputationBuilder::Parameter;
+%unignore xla::swig::LocalComputationBuilder::GetShape;
+%unignore xla::swig::LocalComputationBuilder::Infeed;
+%unignore xla::swig::LocalComputationBuilder::Outfeed;
+%unignore xla::swig::LocalComputationBuilder::ConstantLiteral;
+%unignore xla::swig::LocalComputationBuilder::ConstantR0;
+%unignore xla::swig::LocalComputationBuilder::Broadcast;
+%unignore xla::swig::LocalComputationBuilder::Pad;
+%unignore xla::swig::LocalComputationBuilder::Reshape;
+%unignore xla::swig::LocalComputationBuilder::Collapse;
+%unignore xla::swig::LocalComputationBuilder::CrossReplicaSum;
+%unignore xla::swig::LocalComputationBuilder::Slice;
+%unignore xla::swig::LocalComputationBuilder::DynamicSlice;
+%unignore xla::swig::LocalComputationBuilder::DynamicUpdateSlice;
+%unignore xla::swig::LocalComputationBuilder::ConcatInDim;
+%unignore xla::swig::LocalComputationBuilder::SelectAndScatterWithGeneralPadding;
+%unignore xla::swig::LocalComputationBuilder::Select;
+%unignore xla::swig::LocalComputationBuilder::Tuple;
+%unignore xla::swig::LocalComputationBuilder::GetTupleElement;
+%unignore xla::swig::LocalComputationBuilder::ConvertElementType;
+%unignore xla::swig::LocalComputationBuilder::Call;
+%unignore xla::swig::LocalComputationBuilder::Transpose;
+%unignore xla::swig::LocalComputationBuilder::Rev;
+%unignore xla::swig::LocalComputationBuilder::Map;
+%unignore xla::swig::LocalComputationBuilder::Reduce;
+%unignore xla::swig::LocalComputationBuilder::ReduceWindowWithGeneralPadding;
+%unignore xla::swig::LocalComputationBuilder::RngNormal;
+%unignore xla::swig::LocalComputationBuilder::RngUniform;
+%unignore xla::swig::LocalComputationBuilder::RngBernoulli;
+%unignore xla::swig::LocalComputationBuilder::While;
+%unignore xla::swig::LocalComputationBuilder::Eq;
+%unignore xla::swig::LocalComputationBuilder::Ne;
+%unignore xla::swig::LocalComputationBuilder::Ge;
+%unignore xla::swig::LocalComputationBuilder::Gt;
+%unignore xla::swig::LocalComputationBuilder::Lt;
+%unignore xla::swig::LocalComputationBuilder::Le;
+%unignore xla::swig::LocalComputationBuilder::Dot;
+%unignore xla::swig::LocalComputationBuilder::ConvGeneralDilated;
+%unignore xla::swig::LocalComputationBuilder::Add;
+%unignore xla::swig::LocalComputationBuilder::Sub;
+%unignore xla::swig::LocalComputationBuilder::Mul;
+%unignore xla::swig::LocalComputationBuilder::Div;
+%unignore xla::swig::LocalComputationBuilder::Rem;
+%unignore xla::swig::LocalComputationBuilder::Max;
+%unignore xla::swig::LocalComputationBuilder::Min;
+%unignore xla::swig::LocalComputationBuilder::And;
+%unignore xla::swig::LocalComputationBuilder::Or;
+%unignore xla::swig::LocalComputationBuilder::Not;
+%unignore xla::swig::LocalComputationBuilder::Abs;
+%unignore xla::swig::LocalComputationBuilder::Exp;
+%unignore xla::swig::LocalComputationBuilder::Floor;
+%unignore xla::swig::LocalComputationBuilder::Ceil;
+%unignore xla::swig::LocalComputationBuilder::Log;
+%unignore xla::swig::LocalComputationBuilder::Sign;
+%unignore xla::swig::LocalComputationBuilder::Cos;
+%unignore xla::swig::LocalComputationBuilder::Sin;
+%unignore xla::swig::LocalComputationBuilder::Tanh;
+%unignore xla::swig::LocalComputationBuilder::SqrtF32;
+%unignore xla::swig::LocalComputationBuilder::SquareF32;
+%unignore xla::swig::LocalComputationBuilder::Pow;
+%unignore xla::swig::LocalComputationBuilder::IsFinite;
+%unignore xla::swig::LocalComputationBuilder::ReciprocalF32;
+%unignore xla::swig::LocalComputationBuilder::Neg;
+%unignore xla::swig::LocalComputationBuilder::Sort;
+%unignore xla::swig::DeleteLocalShapedBuffer;
+%unignore xla::swig::DeleteLocalComputation;
+%unignore xla::swig::DeleteCompiledLocalComputation;
+
+%thread;
+%include "tensorflow/compiler/xla/python/local_computation_builder.h"
+%nothread;
+
+%unignoreall
diff --git a/tensorflow/compiler/xla/python/numpy_bridge.cc b/tensorflow/compiler/xla/python/numpy_bridge.cc
new file mode 100644
index 0000000000000000000000000000000000000000..5c722623e318ece9eca6bdc8750195ce5fd5defb
--- /dev/null
+++ b/tensorflow/compiler/xla/python/numpy_bridge.cc
@@ -0,0 +1,495 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/python/numpy_bridge.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace xla {
+
+namespace swig {
+
+namespace numpy {
+
+int PrimitiveTypeToNumpyType(PrimitiveType primitive_type) {
+  switch (primitive_type) {
+    case PRED:
+      return NPY_BOOL;
+    case S8:
+      return NPY_INT8;
+    case S16:
+      return NPY_INT16;
+    case S32:
+      return NPY_INT32;
+    case S64:
+      return NPY_INT64;
+    case U8:
+      return NPY_UINT8;
+    case U16:
+      return NPY_UINT16;
+    case U32:
+      return NPY_UINT32;
+    case U64:
+      return NPY_UINT64;
+    case F16:
+      return NPY_FLOAT16;
+    case F32:
+      return NPY_FLOAT32;
+    case F64:
+      return NPY_FLOAT64;
+    case TUPLE:
+      return NPY_OBJECT;
+    default:
+      LOG(FATAL) << "No Numpy type for XLA primitive type " << primitive_type;
+  }
+}
+
+PrimitiveType NumpyTypeToPrimitiveType(int np_type) {
+  switch (np_type) {
+    case NPY_BOOL:
+      return PRED;
+    case NPY_INT8:
+      return S8;
+    case NPY_INT16:
+      return S16;
+    case NPY_INT32:
+      return S32;
+    case NPY_INT64:
+      return S64;
+    case NPY_UINT8:
+      return U8;
+    case NPY_UINT16:
+      return U16;
+    case NPY_UINT32:
+      return U32;
+    case NPY_UINT64:
+      return U64;
+    case NPY_FLOAT16:
+      return F16;
+    case NPY_FLOAT32:
+      return F32;
+    case NPY_FLOAT64:
+      return F64;
+    case NPY_OBJECT:
+      return TUPLE;
+    default:
+      LOG(FATAL) << "No XLA primitive type for Numpy type " << np_type;
+  }
+}
+
+bool NumpyTypeIsValid(int np_type) {
+  switch (np_type) {
+    case NPY_BOOL:
+    case NPY_INT8:
+    case NPY_INT16:
+    case NPY_INT32:
+    case NPY_INT64:
+    case NPY_UINT8:
+    case NPY_UINT16:
+    case NPY_UINT32:
+    case NPY_UINT64:
+    case NPY_FLOAT16:
+    case NPY_FLOAT32:
+    case NPY_FLOAT64:
+    case NPY_OBJECT:
+      return true;
+    default:
+      return false;
+  }
+}
+
+PyObject* PyShapeInfoFromXlaShape(const Shape& shape) {
+  int np_typenum = PrimitiveTypeToNumpyType(shape.element_type());
+  PyArray_Descr* np_dtype = PyArray_DescrFromType(np_typenum);
+
+  PyObject* dimensions;
+  if (ShapeUtil::IsTuple(shape)) {
+    int num_elements = ShapeUtil::TupleElementCount(shape);
+    dimensions = PyTuple_New(ShapeUtil::TupleElementCount(shape));
+    for (int i = 0; i < num_elements; ++i) {
+      PyTuple_SET_ITEM(
+          dimensions, i,
+          PyShapeInfoFromXlaShape(ShapeUtil::GetTupleElementShape(shape, i)));
+    }
+  } else {
+    int rank = ShapeUtil::Rank(shape);
+    dimensions = PyTuple_New(rank);
+    for (int i = 0; i < rank; ++i) {
+      PyTuple_SET_ITEM(dimensions, i,
+                       LongToPyIntOrPyLong(ShapeUtil::GetDimension(shape, i)));
+    }
+  }
+  return PyTuple_Pack(2, np_dtype, dimensions);
+}
+
+// Precondition: o->ob_type == &PyArrayDescr_Type
+static int NumpyTypenum(PyObject* o) {
+  return reinterpret_cast<PyArray_Descr*>(o)->type_num;
+}
+
+// Extracts the string held inside r and returns it as a C++ string.
+//
+// NOTE: this is an internal helper for conversion to a C++, and so decrefs r.
+static string ExtractStringAndDecref(PyObject* r) {
+  auto error = [r] {
+    return tensorflow::strings::Printf("<failed conversion of %p>", r);
+  };
+  if (r == nullptr) {
+    return error();
+  }
+#if PY_MAJOR_VERSION < 3
+  string result = PyString_AsString(r);
+#else
+  PyObject* bytes = PyUnicode_AsEncodedString(r, 0, 0);
+  if (bytes == nullptr) {
+    return error();
+  }
+  CHECK(PyBytes_Check(bytes));
+  string result = PyBytes_AsString(bytes);
+  Py_DECREF(bytes);
+#endif
+  Py_DECREF(r);
+  return result;
+}
+
+// Safely returns a str of the given Python object o as a C++ string.
+static string PyObjectCppStr(PyObject* o) {
+  PyObject* s = PyObject_Str(o);
+  return ExtractStringAndDecref(s);
+}
+
+// Safely returns a repr of the given Python object o as a C++ string.
+static string PyObjectCppRepr(PyObject* o) {
+  PyObject* r = PyObject_Repr(o);
+  return ExtractStringAndDecref(r);
+}
+
+Status CheckPyShapeInfo(PyObject* o) {
+  auto error = [o](const string& prefix) {
+    return InvalidArgument("%s; got %s", prefix.c_str(),
+                           PyObjectCppRepr(o).c_str());
+  };
+  // The object is a tuple (a pair)
+  if (!PyTuple_Check(o)) {
+    return error("Shape record must be a tuple");
+  }
+  if (PyTuple_Size(o) != 2) {
+    return error("Shape record tuple must be of length 2");
+  }
+
+  // It has a first element, which is a numpy dtype object
+  PyObject* first = PyTuple_GetItem(o, 0);
+  if (first == nullptr) {
+    return error("Tuple has no item 0 (shape dtype)");
+  }
+  if (first->ob_type != &PyArrayDescr_Type) {
+    return error(
+        "Shape record does not have a numpy dtype as its first element");
+  }
+  const int np_type = NumpyTypenum(first);
+  if (!NumpyTypeIsValid(np_type)) {
+    return error("Shape record has an invalid integer dtype");
+  }
+
+  // It has a second element, which is a tuple, either of shape
+  // records or of Python ints
+  PyObject* second = PyTuple_GetItem(o, 1);
+  if (!second) {
+    return error("Tuple has no item 0 (shape dimensions)");
+  }
+  if (!PyTuple_Check(second)) {
+    return error("Shape record does not have a tuple as its second element");
+  }
+  const int length = PyTuple_Size(second);
+  const PrimitiveType element_type = NumpyTypeToPrimitiveType(np_type);
+  for (int i = 0; i < length; i++) {
+    PyObject* dimension = PyTuple_GetItem(second, i);
+    if (element_type == TUPLE) {
+      VLOG(3) << "element_type is tuple, checking member: " << i;
+      Status result = CheckPyShapeInfo(dimension);
+      if (!result.ok()) {
+        return AddStatus(
+            result, tensorflow::strings::StrCat("Validating tuple member ", i,
+                                                " of ", PyObjectCppRepr(o)));
+      }
+    } else if (!CheckPyIntOrLong(dimension)) {
+      return error("Non-tuple shape record has a non-integer dimension");
+    }
+  }
+
+  return Status::OK();
+}
+
+// Precondition: CheckPyShapeInfo(o)
+Shape XlaShapeFromPyShapeInfo(PyObject* o) {
+  const int np_type = NumpyTypenum(PyTuple_GetItem(o, 0));
+  const PrimitiveType element_type = NumpyTypeToPrimitiveType(np_type);
+  PyObject* py_dimensions = PyTuple_GetItem(o, 1);
+  const int length = PyTuple_Size(py_dimensions);
+  if (element_type == TUPLE) {
+    std::vector<Shape> subshapes;
+    subshapes.reserve(length);
+    for (int i = 0; i < length; i++) {
+      subshapes.push_back(
+          XlaShapeFromPyShapeInfo(PyTuple_GetItem(py_dimensions, i)));
+    }
+    return ShapeUtil::MakeTupleShape(subshapes);
+  } else {
+    std::vector<int64> dimensions(length);
+    for (int i = 0; i < length; i++) {
+      dimensions[i] = PyIntOrPyLongToLong(PyTuple_GetItem(py_dimensions, i));
+      if (dimensions[i] == -1) {
+        CHECK(!PyErr_Occurred());
+      }
+    }
+    return ShapeUtil::MakeShape(element_type, dimensions);
+  }
+}
+
+// Helper that retrieves the member with attr_name, stringifies it if is not
+// None, and returns it as a C++ string.
+static tensorflow::gtl::optional<string> GetAttrAsString(
+    PyObject* o, const string& attr_name) {
+  if (!PyObject_HasAttrString(o, attr_name.c_str())) {
+    return tensorflow::gtl::nullopt;
+  }
+  PyObject* attr = PyObject_GetAttrString(o, attr_name.c_str());
+  if (attr == Py_None) {
+    Py_DECREF(attr);
+    return tensorflow::gtl::nullopt;
+  }
+  string result = PyObjectCppStr(attr);
+  Py_DECREF(attr);
+  return result;
+}
+
+// Helper that retrieves the member with attr_name, checks that it is an integer
+// if it is not None, and returns it as an int32 value.
+static tensorflow::gtl::optional<int32> GetAttrAsInt32(
+    PyObject* o, const string& attr_name) {
+  if (!PyObject_HasAttrString(o, attr_name.c_str())) {
+    return tensorflow::gtl::nullopt;
+  }
+  PyObject* attr = PyObject_GetAttrString(o, attr_name.c_str());
+  if (attr == Py_None) {
+    Py_DECREF(attr);
+    return tensorflow::gtl::nullopt;
+  }
+  if (!CheckPyIntOrLong(attr)) {
+    Py_DECREF(attr);
+    return tensorflow::gtl::nullopt;
+  }
+  long value = PyIntOrPyLongToLong(attr);  // NOLINT
+  Py_DECREF(attr);
+  if (value == -1 && PyErr_Occurred() != nullptr) {
+    return tensorflow::gtl::nullopt;
+  }
+  if (static_cast<int32>(value) != value) {
+    return tensorflow::gtl::nullopt;
+  }
+  return value;
+}
+
+StatusOr<OpMetadata> OpMetadataFromPyObject(PyObject* o) {
+  OpMetadata result;
+  tensorflow::gtl::optional<string> op_type = GetAttrAsString(o, "op_type");
+  if (op_type.has_value()) {
+    result.set_op_type(op_type.value());
+  }
+  tensorflow::gtl::optional<string> op_name = GetAttrAsString(o, "op_name");
+  if (op_name.has_value()) {
+    result.set_op_name(op_name.value());
+  }
+  tensorflow::gtl::optional<string> source_file =
+      GetAttrAsString(o, "source_file");
+  if (source_file.has_value()) {
+    result.set_source_file(source_file.value());
+  }
+  tensorflow::gtl::optional<int32> source_line =
+      GetAttrAsInt32(o, "source_line");
+  if (source_line.has_value()) {
+    result.set_source_line(source_line.value());
+  }
+  return result;
+}
+
+PyObject* PyObjectFromXlaLiteral(const Literal& literal) {
+  if (ShapeUtil::IsTuple(literal.shape())) {
+    int num_elements = ShapeUtil::TupleElementCount(literal.shape());
+    PyObject* tuple = PyTuple_New(num_elements);
+    for (int i = 0; i < num_elements; i++) {
+      PyTuple_SET_ITEM(
+          tuple, i, PyObjectFromXlaLiteral(LiteralView::Create(literal, {i})));
+    }
+    return tuple;
+  } else {
+    int rank = ShapeUtil::Rank(literal.shape());
+    std::vector<long> dimensions(rank);  // NOLINT - PyArray requires a long*
+    for (int i = 0; i < rank; i++) {
+      dimensions[i] = ShapeUtil::GetDimension(literal.shape(), i);
+    }
+    int np_type = PrimitiveTypeToNumpyType(literal.shape().element_type());
+    PyObject* array =
+        PyArray_EMPTY(rank, dimensions.data(), np_type, /*fortran=*/0);
+    CopyLiteralToNumpyArray(np_type, literal,
+                            reinterpret_cast<PyArrayObject*>(array));
+    return array;
+  }
+}
+
+StatusOr<std::unique_ptr<Literal>> XlaLiteralFromPyObject(PyObject* o) {
+  if (PyTuple_Check(o)) {
+    int num_elements = PyTuple_Size(o);
+    std::vector<std::unique_ptr<Literal>> elements;
+    elements.reserve(num_elements);
+    for (int i = 0; i < num_elements; i++) {
+      PyObject* element = PyTuple_GetItem(o, i);
+      TF_ASSIGN_OR_RETURN(auto literal, XlaLiteralFromPyObject(element));
+      elements.push_back(std::move(literal));
+    }
+    return Literal::MakeTupleOwned(std::move(elements));
+  } else if (PyArray_Check(o)) {
+    PyArrayObject* py_array = reinterpret_cast<PyArrayObject*>(o);
+    int rank = PyArray_NDIM(py_array);
+    std::vector<int64> dimensions(rank);
+    for (int i = 0; i < rank; i++) {
+      dimensions[i] = PyArray_DIM(py_array, i);
+    }
+    int np_type = PyArray_TYPE(py_array);
+    auto literal = Literal::CreateFromDimensions(
+        NumpyTypeToPrimitiveType(np_type), dimensions);
+    TF_RETURN_IF_ERROR(
+        CopyNumpyArrayToLiteral(np_type, py_array, literal.get()));
+    return std::move(literal);
+  } else {
+    return InvalidArgument(
+        "Non-tuple or Numpy array encountered in conversion to XLA literal.");
+  }
+}
+
+Status CopyNumpyArrayToLiteral(int np_type, PyArrayObject* py_array,
+                               Literal* literal) {
+  switch (np_type) {
+    case NPY_BOOL:
+      CopyNumpyArrayToLiteral<bool>(py_array, literal);
+      break;
+    case NPY_INT32:
+      CopyNumpyArrayToLiteral<int32>(py_array, literal);
+      break;
+    case NPY_INT64:
+      CopyNumpyArrayToLiteral<int64>(py_array, literal);
+      break;
+    case NPY_UINT8:
+      CopyNumpyArrayToLiteral<uint8>(py_array, literal);
+      break;
+    case NPY_UINT32:
+      CopyNumpyArrayToLiteral<uint32>(py_array, literal);
+      break;
+    case NPY_UINT64:
+      CopyNumpyArrayToLiteral<uint64>(py_array, literal);
+      break;
+    case NPY_FLOAT16:
+      CopyNumpyArrayToLiteral<half>(py_array, literal);
+      break;
+    case NPY_FLOAT32:
+      CopyNumpyArrayToLiteral<float>(py_array, literal);
+      break;
+    case NPY_FLOAT64:
+      CopyNumpyArrayToLiteral<double>(py_array, literal);
+      break;
+    default:
+      return InvalidArgument(
+          "No XLA literal container for Numpy type number: %d", np_type);
+  }
+  return Status::OK();
+}
+
+void CopyLiteralToNumpyArray(int np_type, const Literal& literal,
+                             PyArrayObject* py_array) {
+  switch (np_type) {
+    case NPY_BOOL:
+      CopyLiteralToNumpyArray<bool>(literal, py_array);
+      break;
+    case NPY_INT32:
+      CopyLiteralToNumpyArray<int32>(literal, py_array);
+      break;
+    case NPY_INT64:
+      CopyLiteralToNumpyArray<int64>(literal, py_array);
+      break;
+    case NPY_UINT8:
+      CopyLiteralToNumpyArray<uint8>(literal, py_array);
+      break;
+    case NPY_UINT32:
+      CopyLiteralToNumpyArray<uint32>(literal, py_array);
+      break;
+    case NPY_UINT64:
+      CopyLiteralToNumpyArray<uint64>(literal, py_array);
+      break;
+    case NPY_FLOAT16:
+      CopyLiteralToNumpyArray<half>(literal, py_array);
+      break;
+    case NPY_FLOAT32:
+      CopyLiteralToNumpyArray<float>(literal, py_array);
+      break;
+    case NPY_FLOAT64:
+      CopyLiteralToNumpyArray<double>(literal, py_array);
+      break;
+    default:
+      LOG(FATAL) << "No XLA literal container for Numpy type" << np_type;
+  }
+}
+
+PyObject* LongToPyIntOrPyLong(long x) {  // NOLINT
+#if PY_MAJOR_VERSION < 3
+  return PyInt_FromLong(x);
+#else
+  return PyLong_FromLong(x);
+#endif
+}
+
+long PyIntOrPyLongToLong(PyObject* o) {  // NOLINT
+#if PY_MAJOR_VERSION < 3
+  return PyInt_AsLong(o);
+#else
+  return PyLong_AsLong(o);
+#endif
+}
+
+bool CheckPyIntOrLong(PyObject* o) {
+#if PY_MAJOR_VERSION < 3
+  return PyInt_Check(o);
+#else
+  if (!PyLong_Check(o)) {
+    return false;
+  }
+  int overflow = 0;
+  PyLong_AsLongAndOverflow(o, &overflow);
+  return (overflow == 0);
+#endif
+}
+
+PyObject* PyNumberToPyInt(PyObject* o) {
+#if PY_MAJOR_VERSION < 3
+  return PyNumber_Int(o);
+#else
+  return PyNumber_Long(o);
+#endif
+}
+
+}  // namespace numpy
+
+}  // namespace swig
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/python/numpy_bridge.h b/tensorflow/compiler/xla/python/numpy_bridge.h
new file mode 100644
index 0000000000000000000000000000000000000000..6ff1c34cfc5e0323a6729bdfd5572239f4966211
--- /dev/null
+++ b/tensorflow/compiler/xla/python/numpy_bridge.h
@@ -0,0 +1,127 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// These functions transform Python/Numpy data structures to XLA data
+// structures and vice versa, performing copies where
+// appropriate. Python tuples and Numpy ndarrays translate to XLA
+// tuples and XLA literals, respectively, and Numpy shape/dtype
+// information is translated to XLA shape information.
+
+#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_NUMPY_BRIDGE_H_
+#define TENSORFLOW_COMPILER_XLA_PYTHON_NUMPY_BRIDGE_H_
+
+#include <algorithm>
+#include <memory>
+
+#include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/python/lib/core/numpy.h"
+
+namespace xla {
+
+namespace swig {
+
+namespace numpy {
+
+// Maps XLA primitive types (PRED, S8, F32, ..., and TUPLE) to numpy
+// dtypes (NPY_BOOL, NPY_INT8, NPY_FLOAT32, ..., and NPY_OBJECT), and
+// vice versa.
+int PrimitiveTypeToNumpyType(PrimitiveType primitive_type);
+PrimitiveType NumpyTypeToPrimitiveType(int np_type);
+
+// Determines whether an integer-encoded Numpy dtype is valid,
+// i.e. has a supported conversion to an XLA PrimitiveType.
+bool NumpyTypeIsValid(int np_type);
+
+// Converts XLA shape information into a Python pair of the form
+// (numpy dtype, dimensions). If the XLA shape represents a tuple,
+// then the numpy dtype is NPY_OBJECT ('O') and `dimensions` is a
+// Python tuple of shape-description pairs, created
+// recursively. Otherwise, `dimensions` is a Python tuple-of-integers
+// providing the array dimensions.
+//
+// The return value is a new reference.
+PyObject* PyShapeInfoFromXlaShape(const Shape& shape);
+
+// Returns the outcome of a best-effort check that the Python object
+// is a pair of the form (numpy dtype, dimensions), as produced by
+// PyShapeInfoFromXlaShape.
+Status CheckPyShapeInfo(PyObject* o);
+
+// Performs the inverse conversion to that of PyShapeInfoFromXlaShape.
+//
+// The return value is a new reference.
+Shape XlaShapeFromPyShapeInfo(PyObject* o);
+
+// Converts a PyObject that represents operation metadata into protocol buffer
+// form.
+StatusOr<OpMetadata> OpMetadataFromPyObject(PyObject* o);
+
+// Converts an XLA literal to a Python object, either a Numpy ndarray
+// or a nested Python tuple thereof.
+//
+// To avoid transferring ownership of the data buffers that underlie
+// PyArrays and XLA literals, this function makes deep copies of all
+// array data.
+//
+// The return value is a new reference.
+PyObject* PyObjectFromXlaLiteral(const Literal& literal);
+
+// Converts a Numpy ndarray or a nested Python tuple thereof to a
+// corresponding XLA literal.
+//
+// To avoid transferring ownership of the data buffers that underlie
+// PyArrays and XLA literals, this function makes deep copies of all
+// array data.
+StatusOr<std::unique_ptr<Literal> > XlaLiteralFromPyObject(PyObject* o);
+
+// The following functions copy array data from the buffers underlying Numpy
+// ndarrays into those underlying XLA literals, and vice versa.
+
+Status CopyNumpyArrayToLiteral(int np_type, PyArrayObject* py_array,
+                               Literal* literal);
+
+void CopyLiteralToNumpyArray(int np_type, const Literal& literal,
+                             PyArrayObject* py_array);
+
+template <typename NativeT>
+void CopyNumpyArrayToLiteral(PyArrayObject* py_array, Literal* literal) {
+  NativeT* source = static_cast<NativeT*>(PyArray_DATA(py_array));
+  auto dest = literal->data<NativeT>();
+  std::copy(source, source + PyArray_SIZE(py_array), dest.data());
+}
+
+template <typename NativeT>
+void CopyLiteralToNumpyArray(const Literal& literal, PyArrayObject* py_array) {
+  NativeT* dest = static_cast<NativeT*>(PyArray_DATA(py_array));
+  auto source = literal.data<NativeT>();
+  std::copy(source.begin(), source.end(), dest);
+}
+
+// Workarounds for Python 2 and 3 interop
+
+PyObject* LongToPyIntOrPyLong(long x);  // NOLINT
+long PyIntOrPyLongToLong(PyObject* o);  // NOLINT
+bool CheckPyIntOrLong(PyObject* o);
+PyObject* PyNumberToPyInt(PyObject* o);
+
+}  // namespace numpy
+
+}  // namespace swig
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_PYTHON_NUMPY_BRIDGE_H_
diff --git a/tensorflow/compiler/xla/python/xla.i b/tensorflow/compiler/xla/python/xla.i
new file mode 100644
index 0000000000000000000000000000000000000000..1c4021a558d3fcff2abfdbdbad7f3928e86ed3b8
--- /dev/null
+++ b/tensorflow/compiler/xla/python/xla.i
@@ -0,0 +1,18 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+/* XLA-wide SWIG wrapper */
+
+%include "tensorflow/compiler/xla/python/local_computation_builder.i"
diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..5455adafcded90dbe38b4c444d2bc03fae445888
--- /dev/null
+++ b/tensorflow/compiler/xla/python/xla_client.py
@@ -0,0 +1,999 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""An in-process, local XLA client in Python, supporting AOT compilation."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import enum  # pylint: disable=g-bad-import-order
+import inspect
+import itertools
+import os
+
+import numpy as np
+
+from tensorflow.compiler.xla import xla_data_pb2
+from tensorflow.compiler.xla.python import pywrap_xla as c_api
+
+
+# Most functions are snake_case for consistency with other modules,
+# whereas method names of ComputationBuilder and LocalComputation are
+# CamelCase for consistency with XLA.
+# pylint: disable=invalid-name
+
+
+OpMetadata = collections.namedtuple(
+    'OpMetadata',
+    [
+        'op_type',
+        'op_name',
+        'source_file',
+        'source_line',
+    ],
+)
+
+
+def CurrentSourceInfoMetadata(op_type=None, op_name=None, skip_frames=1):
+  """Helper for use in source mapping that returns an OpMetadata object."""
+  full_filename, lineno = inspect.stack()[skip_frames][1:3]
+  filename = os.path.basename(full_filename)
+  return OpMetadata(
+      op_type=op_type,
+      op_name=op_name,
+      source_file=filename,
+      source_line=lineno)
+
+
+class PaddingType(enum.Enum):
+  VALID = 1
+  SAME = 2
+
+
+def _convert_padding_type_to_pad_values(padding_type, lhs_dims, rhs_dims,
+                                        window_strides):
+  """Maps PaddingType (VALID or SAME) to pad values (list of pairs of ints)."""
+  if padding_type == PaddingType.VALID:
+    return [(0, 0)] * len(window_strides)
+
+  out_shape = np.ceil(np.true_divide(lhs_dims, window_strides)).astype(int)
+  pad_sizes = [max((out_size - 1) * stride + filter_size - in_size, 0)
+               for out_size, stride, filter_size, in_size
+               in zip(out_shape, window_strides, rhs_dims, lhs_dims)]
+  return [(pad_size // 2, pad_size - pad_size // 2)
+          for pad_size in pad_sizes]
+
+
+_UNARY_OPS = [
+    'Not',
+    'Abs',
+    'Exp',
+    'Floor',
+    'Ceil',
+    'Log',
+    'Sign',
+    'Cos',
+    'Sin',
+    'Tanh',
+    'SqrtF32',
+    'SquareF32',
+    'IsFinite',
+    'ReciprocalF32',
+    'Neg',
+    'Sort',
+]
+
+_BINARY_OPS = [
+    'Eq',
+    'Ne',
+    'Ge',
+    'Gt',
+    'Lt',
+    'Le',
+    'Add',
+    'Sub',
+    'Mul',
+    'Div',
+    'Rem',
+    'Max',
+    'Min',
+    'And',
+    'Or',
+    'Pow',
+]
+
+XLA_ELEMENT_TYPE_TO_DTYPE = {
+    xla_data_pb2.F32: np.dtype(np.float32),
+    xla_data_pb2.F64: np.dtype(np.float64),
+    xla_data_pb2.S32: np.dtype(np.int32),
+    xla_data_pb2.S64: np.dtype(np.int64),
+    xla_data_pb2.U32: np.dtype(np.uint32),
+    xla_data_pb2.U64: np.dtype(np.uint64),
+    xla_data_pb2.PRED: np.dtype(np.bool),
+    xla_data_pb2.TUPLE: np.dtype(np.object),
+}
+
+# Note the conversion on the key. Numpy has a known issue wherein dtype hashing
+# doesn't work as expected (https://github.com/numpy/numpy/issues/7242). Thus,
+# when keying by dtype in this dict, we use the string form of dtypes.
+DTYPE_TO_XLA_ELEMENT_TYPE = {
+    str(v): k
+    for k, v in XLA_ELEMENT_TYPE_TO_DTYPE.items()
+}
+
+
+class LocalBuffer(object):
+  """Represents a handle to data owned by XLA.
+
+  The referent is ready for use in executing a local, compiled
+  Computation. On XLA platforms involving a device (e.g. GPU), this
+  means the referent is in device memory.
+  """
+
+  def __init__(self, c_local_shaped_buffer):
+    self.c_local_shaped_buffer = c_local_shaped_buffer
+    self._delete = c_api.DeleteLocalShapedBuffer
+
+  @staticmethod
+  def from_py(npval):
+    npval = require_numpy_array_layout(npval)
+    return LocalBuffer(c_api.LocalShapedBuffer.FromLiteral(npval))
+
+  def to_py(self):
+    return self.c_local_shaped_buffer.ToLiteral()
+
+  def delete(self):
+    if self.c_local_shaped_buffer is not None:
+      self._delete(self.c_local_shaped_buffer)
+      self.c_local_shaped_buffer = None
+
+  def is_deleted(self):
+    return self.c_local_shaped_buffer is None
+
+  def __del__(self):
+    self.delete()
+
+
+class Shape(object):
+  """XLA shape.
+
+  Represents an XLA shape by a corresponding Python/Numpy type and a
+  list of dimensions, which are themselves Shapes in case this one
+  represents an XLA tuple.
+  """
+
+  def __init__(self, np_dtype, dimensions):
+    self.np_dtype = np_dtype
+    self._dimensions = dimensions
+
+  def __repr__(self):
+    return 'xla_client.Shape(np_dtype={!r}, dimensions={!r})'.format(
+        self.np_dtype, self._dimensions)
+
+  def element_type(self):
+    return DTYPE_TO_XLA_ELEMENT_TYPE[str(self.np_dtype)]
+
+  def is_tuple(self):
+    return self.element_type() == xla_data_pb2.TUPLE
+
+  def dimensions(self):
+    if self.is_tuple():
+      raise ValueError('Tuple shape has no dimensions')
+    return self._dimensions
+
+  def tuple_shapes(self):
+    if not self.is_tuple():
+      raise ValueError('Shape is not a tuple shape')
+    return self._dimensions
+
+  @staticmethod
+  def from_numpy(npval):
+
+    def convert(npval):
+      if isinstance(npval, tuple):
+        return Shape(np.dtype('O'), tuple(convert(elt) for elt in npval))
+      else:
+        return Shape(npval.dtype, np.shape(npval))
+
+    return convert(require_numpy_array_layout(npval))
+
+
+def _wrap_shape(shape_info):
+  dtype, dims = shape_info
+  element_type = DTYPE_TO_XLA_ELEMENT_TYPE[str(dtype)]
+  if element_type == xla_data_pb2.TUPLE:
+    dims = [_wrap_shape(subshape_info) for subshape_info in dims]
+  return Shape(dtype, dims)
+
+
+def _unwrap_shape(shape):
+  if shape.is_tuple():
+    components = tuple(
+        _unwrap_shape(subshape) for subshape in shape.tuple_shapes())
+  else:
+    components = shape.dimensions()
+  return (shape.np_dtype, components)
+
+
+def _unwrap_shapes(shapes):
+  return [_unwrap_shape(shape) for shape in shapes]
+
+
+def _wrap_data_handle(handle):
+  cdh = xla_data_pb2.ComputationDataHandle()
+  cdh.handle = handle
+  return cdh
+
+
+def _unwrap_data_handle(handle_proto):
+  return handle_proto.handle
+
+
+def _unwrap_data_handles(handle_protos):
+  return [_unwrap_data_handle(cdh) for cdh in handle_protos]
+
+
+def require_numpy_array_layout(value):
+  if isinstance(value, tuple):
+    return tuple(require_numpy_array_layout(x) for x in value)
+  else:
+    return np.require(value, requirements=['C', 'A'])
+
+
+def transfer_to_infeed(value, replica_number=None):
+  """Transfers the given value into the XLA infeed queue.
+
+  XLA's infeed queue is a single queue that feeds the "XLA virtual machine" with
+  a totally ordered stream of values. This is dequeued from XLA computations via
+  the Infeed() operation.
+
+  Args:
+    value: the value that the caller would like to enqueue into the XLA infeed
+      queue
+    replica_number: the replica number to infeed the value to -- if not
+      provided, then the default replica (trivially replica 0) is used.
+  """
+  if replica_number is None:
+    c_api.TransferToInfeedLocal(require_numpy_array_layout(value))
+  else:
+    c_api.TransferToInfeedLocalReplica(
+        require_numpy_array_layout(value), replica_number)
+
+
+def transfer_from_outfeed(shape, replica_number=None):
+  """Transfers a literal of the given shape from replica_number's outfeed.
+
+  Args:
+    shape: The shape of the value to transfer from outfeed.
+    replica_number: The replica number ordinal to transfer the outfeed value
+      from. (Each replica has a distinct outfeed queue.)
+
+  Returns:
+    The literal value that is produced from the outfeed queue.
+  """
+  return c_api.TransferFromOutfeedLocalReplica(
+      _unwrap_shape(shape), replica_number or 0)
+
+
+class LocalComputation(object):
+  """Python wrapper for a local XLA Computation.
+
+  A LocalComputation can be executed if it is compiled. Otherwise, it
+  can still be used as a Computation where required by the
+  ComputationBuilder methods.
+  """
+
+  def __init__(self, c_local_computation, is_compiled):
+    self.c_local_computation = c_local_computation
+    self.is_compiled = is_compiled
+
+    # Ensure a reference to C-based destructor for use in __del__.
+    if is_compiled:
+      self._delete = c_api.DeleteCompiledLocalComputation
+    else:
+      self._delete = c_api.DeleteLocalComputation
+
+  def Compile(self, argument_shapes=()):
+    if self.is_compiled:
+      raise ValueError('Attempt to compile a compiled local XLA computation.')
+    return LocalComputation(
+        self.c_local_computation.Compile(_unwrap_shapes(argument_shapes)),
+        is_compiled=True)
+
+  def CompileWithExampleArguments(self, arguments=()):
+    return self.Compile(
+        argument_shapes=[Shape.from_numpy(arg) for arg in arguments])
+
+  def Execute(self, arguments=()):
+    if not self.is_compiled:
+      raise ValueError('Cannot execute an uncompiled local XLA computation.')
+    arguments = tuple(map(require_numpy_array_layout, arguments))
+    return self.c_local_computation.Execute(arguments)
+
+  def ExecuteWithLocalBuffers(self, arguments=()):
+    """Execute with LocalBuffer arguments and return value."""
+    if not self.is_compiled:
+      raise ValueError('Cannot execute an uncompiled local XLA computation.')
+    arguments = tuple(arguments)
+    if any(arg.is_deleted() for arg in arguments):
+      raise ValueError('Executing with deleted local buffer argument')
+    return LocalBuffer(
+        self.c_local_computation.ExecuteWithShapedBuffers(
+            [arg.c_local_shaped_buffer for arg in arguments]))
+
+  def __del__(self):
+    self._delete(self.c_local_computation)
+
+
+class ComputationBuilder(object):
+  """XLA computation builder.
+
+  Enqueues XLA ops in sequence and in order to build a
+  LocalComputation, which in turn can be compiled into a
+  CompiledLocalComputation, which in turn can be locally executed.
+  """
+
+  # The methods of this class map 1-to-1 onto the XLA C++
+  # computation builder API. Therefore, there's no need to laboriously list
+  # arguments and return values for every method, especially where it's obvious.
+  #
+  # pylint: disable=g-doc-return-or-yield
+  # pylint: disable=g-doc-args
+
+  def __init__(self, name):
+    self._client = c_api.LocalComputationBuilder(name.encode('utf8'))
+    self._parameter_numbering = itertools.count()
+
+  def Build(self):
+    return LocalComputation(self._client.Build(), is_compiled=False)
+
+  def SetOpMetadata(self, op_metadata):
+    """Set metadata for operations that are about to be enqueued."""
+    self._client.SetOpMetadata(op_metadata)
+
+  def ClearOpMetadata(self):
+    """Clear metadata for operations that are about to be enqueued."""
+    self._client.ClearOpMetadata()
+
+  def Infeed(self, shape):
+    """Enqueues an infeed op onto the computation.
+
+    Infeed operations dequeue data of the given shape from the device's infeed
+    queue for subsequent use in the computation.
+
+    Returns:
+      A  ComputationDataHandle message.
+    """
+    return _wrap_data_handle(self._client.Infeed(_unwrap_shape(shape)))
+
+  def Outfeed(self, operand):
+    """Enqueues an outfeed op onto the computation.
+
+    Outfeed operations enqueue data, using the given operand, onto the XLA
+    outfeed queue for subsequent dequeue via the client API.
+    """
+    self._client.Outfeed(
+        _unwrap_data_handle(operand), _unwrap_shape(self.GetShape(operand)),
+        ''.encode('utf-8'))
+
+  def Constant(self, value):
+    """Enqueues a constant op onto the computation.
+
+    Args:
+      value: value for the constant, as a np.array with an explicit dtype set
+             to one of the supported types.
+
+    Returns:
+      A ComputationDataHandle message.
+    """
+    value = require_numpy_array_layout(value)
+    return _wrap_data_handle(self._client.ConstantLiteral(value))
+
+  def ConstantF32Scalar(self, value):
+    """Convenience method to enqueue a scalar F32 constant op.
+
+    Args:
+      value: a floating-point number.
+
+    Returns:
+      A ComputationDataHandle message.
+    """
+    return self.Constant(np.array(value, dtype=np.float32))
+
+  def ConstantF64Scalar(self, value):
+    """Convenience method to enqueue a scalar F32 constant op.
+
+    Args:
+      value: a floating-point number.
+
+    Returns:
+      A ComputationDataHandle message.
+    """
+    return self.Constant(np.array(value, dtype=np.float64))
+
+  def ConstantS32Scalar(self, value):
+    """Convenience method to enqueue a scalar S32 constant op.
+
+    Args:
+      value: a floating-point number.
+
+    Returns:
+      A ComputationDataHandle message.
+    """
+    return self.Constant(np.array(value, dtype=np.int32))
+
+  def ConstantS64Scalar(self, value):
+    """Convenience method to enqueue a scalar S64 constant op.
+
+    Args:
+      value: a floating-point number.
+
+    Returns:
+      A ComputationDataHandle message.
+    """
+    return self.Constant(np.array(value, dtype=np.int64))
+
+  def ConstantPredScalar(self, value):
+    """Convenience method to enqueue a scalar PRED constant op.
+
+    Args:
+      value: a boolean value.
+
+    Returns:
+      A ComputationDataHandle message.
+    """
+    return self.Constant(np.array(value, dtype=np.bool))
+
+  def ParameterWithShape(self, shape, name=None, parameter_num=None):
+    """Enqueues a Parameter op onto the computation, given a shape.
+
+    Args:
+      shape: the parameter's shape as a Shape object.
+      name: optional string name for the parameter.
+      parameter_num: parameter number in the computation function. If None,
+        the next linear parameter number is used. The default value capability
+        can be used for auto-numbering. If you're using auto-numbering for some
+        parameters, use it for *all* parameters to avoid clashes.
+
+    Returns:
+      A ComputationDataHandle message.
+    """
+    if name is None:
+      name = ''
+    if parameter_num is None:
+      parameter_num = next(self._parameter_numbering)
+
+    return _wrap_data_handle(
+        self._client.Parameter(
+            parameter_num, _unwrap_shape(shape), name.encode('utf8')))
+
+  def ParameterFromNumpy(self, value, name=None, parameter_num=None):
+    """Enqueues a Parameter op onto the computation.
+
+    Args:
+      value: a Numpy array, or a nested tuple thereof, from which the
+        shape is inferred.
+      name: as in ParameterWithShape.
+      parameter_num: as in ParameterWithShape.
+
+    Returns:
+      A ComputationDataHandle message.
+    """
+    return self.ParameterWithShape(
+        Shape.from_numpy(value), name=name, parameter_num=parameter_num)
+
+  def Broadcast(self, operand, sizes):
+    """Enqueues a broadcast operation onto the computation.
+
+    Args:
+      operand: the operand ComputationDataHandle to broadcast.
+      sizes: an iterable of broadcast sizes.
+
+    Returns:
+      A ComputationDataHandle representing the added broadcast op.
+    """
+    return _wrap_data_handle(
+        self._client.Broadcast(_unwrap_data_handle(operand), sizes))
+
+  def Concatenate(self, operands, dimension):
+    """Enqueues a concatenate operation onto the computation.
+
+    Args:
+      operands: the operands to concatenate.
+      dimension: the dimension in which to perform the concatenation.
+
+    Returns:
+      A ComputationDataHandle representing the added concatenate op.
+    """
+    return _wrap_data_handle(
+        self._client.ConcatInDim(_unwrap_data_handles(operands), dimension))
+
+  def ConvertElementType(self, operand, new_element_type):
+    """Enqueues an element type conversion operation onto the computation.
+
+    Args:
+      operand: the operand to convert.
+      new_element_type: the target primitive type.
+
+    Returns:
+      A ComputationDataHandle representing the added conversion op.
+    """
+    return _wrap_data_handle(
+        self._client.ConvertElementType(
+            _unwrap_data_handle(operand), new_element_type))
+
+  def GetShape(self, operand):
+    return _wrap_shape(self._client.GetShape(_unwrap_data_handle(operand)))
+
+  def GetComputationStats(self):
+    raise NotImplementedError()
+
+  def Pad(self, operand, padding_value, padding_config):
+    """Enqueues a Pad operation onto the computation.
+
+    Args:
+      operand: ComputationDataHandle representing the array to pad.
+      padding_value: ComputationDataHandle representing the scalar pad value.
+      padding_config: either an xla_data_pb2.PaddingConfig or a list of integer
+        triples (edge_padding_low, edge_padding_high, interior_padding)
+        representing the configuration of the padding operation.
+
+    Returns:
+      A ComputationDataHandle representing the added pad op.
+    """
+    if not isinstance(padding_config, xla_data_pb2.PaddingConfig):
+      padding_config = self._GetPaddingConfigFromTriples(padding_config)
+    return _wrap_data_handle(
+        self._client.Pad(_unwrap_data_handle(operand),
+                         _unwrap_data_handle(padding_value),
+                         padding_config))
+
+  def _GetPaddingConfigFromTriples(self, triples):
+    """Create PaddingConfig proto from list of triples of integers."""
+    padding_config = xla_data_pb2.PaddingConfig()
+    for lo, hi, interior in triples:
+      dimension = padding_config.dimensions.add()
+      dimension.edge_padding_low = lo
+      dimension.edge_padding_high = hi
+      dimension.interior_padding = interior
+    return padding_config
+
+  def Reshape(self, operand, dimensions, new_sizes):
+    """Reshape op."""
+    return _wrap_data_handle(
+        self._client.Reshape(
+            _unwrap_data_handle(operand), dimensions, new_sizes))
+
+  def CrossReplicaSum(self, operand):
+    """CrossReplicaSum op.
+
+    Args:
+      operand: the operand to sum across replica instances.
+
+    Returns:
+      A ComputationDataHandle that has the sum of the value among all replicas.
+    """
+    return _wrap_data_handle(
+        self._client.CrossReplicaSum(_unwrap_data_handle(operand)))
+
+  def Collapse(self, operand, dimensions):
+    """Collapse op."""
+    return _wrap_data_handle(
+        self._client.Collapse(_unwrap_data_handle(operand), dimensions))
+
+  def Trans(self, operand):
+    """Specialized matrix transpose op."""
+    return _wrap_data_handle(
+        self._client.Transpose(_unwrap_data_handle(operand), [1, 0]))
+
+  def Transpose(self, operand, permutation):
+    """Transpose op."""
+    return _wrap_data_handle(
+        self._client.Transpose(_unwrap_data_handle(operand), permutation))
+
+  def Rev(self, operand, dimensions):
+    """Rev op."""
+    return _wrap_data_handle(
+        self._client.Rev(_unwrap_data_handle(operand), dimensions))
+
+  def SelectAndScatter(self, operand, select, window_dimensions, window_strides,
+                       padding, source, init_value, scatter):
+    """Select and scatter op, used by the gradient of ReduceWindow.
+
+    Args:
+      operand: ComputationDataHandle for array of dimension N and type T over
+        which the windows slide.
+      select: Computation of type (T, T) -> Pred to apply to the elements of
+        each window to indicate which element is selected.
+      window_dimensions: sequence of N integers for dimensions of the window.
+      window_strides: sequence of N integers for the strides of the window.
+      padding: PaddingType representing either 'SAME' or 'VALID ' padding.
+      source: ComputationDataHandle for array of type T with values to scatter.
+      init_value: ComputationDataHandle of scalar type T for initial out value.
+      scatter: Computation of type (T, T) -> T to apply to each scatter source
+        element with its destination element.
+
+    Returns:
+      A ComputationDataHandle representing the added SelectAndScatter op.
+    """
+    pads = _convert_padding_type_to_pad_values(
+        padding, self.GetShape(operand).dimensions(),
+        window_dimensions, window_strides)
+    return _wrap_data_handle(
+        self._client.SelectAndScatterWithGeneralPadding(
+            _unwrap_data_handle(operand), select.c_local_computation,
+            window_dimensions, window_strides, pads,
+            _unwrap_data_handle(source), _unwrap_data_handle(init_value),
+            scatter.c_local_computation))
+
+  def Select(self, pred, on_true, on_false):
+    """Element-wise selection op.
+
+    Constructs an output array from elements of two input arrays, based on the
+    values of a predicate array.
+    """
+    return _wrap_data_handle(
+        self._client.Select(
+            _unwrap_data_handle(pred),
+            _unwrap_data_handle(on_true),
+            _unwrap_data_handle(on_false)))
+
+  def Slice(self, operand, start_indices, limit_indices, strides=None):
+    """Enqueues a slice operation onto the computation.
+
+    Args:
+      operand: ComputationDataHandle for the N dimensional array to be sliced.
+      start_indices: iterable of N integers containing the starting indices of
+        the slice for each dimension.
+      limit_indices: iterable of N integers containing the ending indices
+        (exclusive) of the slice for each dimension.
+      strides: optional iterable of N integers containing the stride sizes for
+        each dimension.
+
+    Returns:
+      A ComputationDataHandle representing the added Slice op.
+    """
+    if strides is None:
+      start_indices = list(start_indices)
+      strides = [1] * len(start_indices)
+    return _wrap_data_handle(
+        self._client.Slice(
+            _unwrap_data_handle(operand),
+            start_indices,
+            limit_indices,
+            strides))
+
+  def DynamicSlice(self, operand, start_indices, slice_sizes):
+    """Enqueues a slice op with dynamic start indices onto the computation.
+
+    Args:
+      operand: ComputationDataHandle for the N dimensional array to be sliced.
+      start_indices: ComputationDataHandle for the 1D array of N integers
+        containing the starting indices of the slice.
+      slice_sizes: iterable of N integers containing the slice sizes in each
+        dimension.
+
+    Returns:
+      A ComputationDataHandle representing the added DynamicSlice op.
+    """
+    return _wrap_data_handle(
+        self._client.DynamicSlice(
+            _unwrap_data_handle(operand),
+            _unwrap_data_handle(start_indices),
+            slice_sizes))
+
+  def DynamicUpdateSlice(self, operand, update, start_indices):
+    """Enqueues a dynamic update slice operation onto the computation.
+
+    Args:
+      operand: ComputationDataHandle for the N dimensional array to be updated.
+      update: N dimensional array comprising the slice update.
+      start_indices: Rank-1 array of N integers comprising the starting indices
+        of the slice along each dimension.
+    Returns:
+      A ComputationDataHandle representing the added DynamicUpdateSlice op.
+    """
+    return _wrap_data_handle(
+        self._client.DynamicUpdateSlice(
+            _unwrap_data_handle(operand),
+            _unwrap_data_handle(update),
+            _unwrap_data_handle(start_indices)))
+
+  def Tuple(self, *ops):
+    """Enqueues a tuple operation onto the computation.
+
+    Args:
+      ops: a sequence of tuple operands (each a ComputationDataHandle).
+
+    Returns:
+      A ComputationDataHandle representing the added Tuple op.
+    """
+    return _wrap_data_handle(self._client.Tuple(_unwrap_data_handles(ops)))
+
+  def GetTupleElement(self, tup, index):
+    """Enqueues a 'get tuple element' operation onto the computation.
+
+    Args:
+      tup: the tuple operand (a ComputationDataHandle).
+      index: numeric index to select from the tuple.
+
+    Returns:
+      A ComputationDataHandle representing the added GetTupleElement op.
+    """
+    return _wrap_data_handle(
+        self._client.GetTupleElement(_unwrap_data_handle(tup), index))
+
+  def Call(self, computation_to_apply, operands):
+    """Enqueues a call operation onto the computation.
+
+    Args:
+      computation_to_apply: a Computation object.
+      operands: an iterable of ComputationDataHandle. The number and types of
+        operands must match the arity of computation_to_apply.
+
+    Returns:
+      A ComputationDataHandle representing the added call op.
+    """
+    return _wrap_data_handle(
+        self._client.Call(computation_to_apply.c_local_computation,
+                          _unwrap_data_handles(operands)))
+
+  def Map(self, operands, computation_to_apply, dimensions, static_operands=()):
+    """Enqueues a map operation onto the computation.
+
+    Args:
+      operands: an iterable of ComputationDataHandle.
+      computation_to_apply: a Computation object.
+      dimensions: dimensions over which to apply map the function.
+      static_operands: auxiliary arguments passed to the applied computation.
+
+    Returns:
+      A ComputationDataHandle representing the added Map op.
+    """
+    return _wrap_data_handle(
+        self._client.Map(
+            _unwrap_data_handles(operands),
+            computation_to_apply.c_local_computation,
+            dimensions,
+            _unwrap_data_handles(static_operands)))
+
+  def Reduce(self, operand, init_value, computation_to_apply, dimensions):
+    """Enqueues a reduction operation onto the computation.
+
+    Args:
+      operand: reduction operand (ComputationDataHandle).
+      init_value: reduction initial value (ComputationDataHandle).
+      computation_to_apply: a Computation object - binary reduction function.
+      dimensions: sequence of dimensions (integers) to reduce on.
+
+    Returns:
+      A ComputationDataHandle representing the added Reduce op.
+    """
+    return _wrap_data_handle(
+        self._client.Reduce(
+            _unwrap_data_handle(operand),
+            _unwrap_data_handle(init_value),
+            computation_to_apply.c_local_computation,
+            dimensions))
+
+  def ReduceWindow(self, operand, init_value, computation_to_apply,
+                   window_dimensions, window_strides, padding):
+    """Enqueues a windowed reduction operation onto the computation.
+
+    Args:
+      operand: reduction operand (ComputationDataHandle).
+      init_value: reduction initial value (ComputationDataHandle).
+      computation_to_apply: a binary reduction function (Computation).
+      window_dimensions: dimensions of window (sequence of integers).
+      window_strides: strides for window (sequence of integers).
+      padding: PaddingType representing either 'SAME' or 'VALID' padding.
+
+    Returns:
+      A ComputationDataHandle representing the added ReduceWindow op.
+    """
+    pads = _convert_padding_type_to_pad_values(
+        padding, self.GetShape(operand).dimensions(), window_dimensions,
+        window_strides)
+    return _wrap_data_handle(
+        self._client.ReduceWindowWithGeneralPadding(
+            _unwrap_data_handle(operand),
+            _unwrap_data_handle(init_value),
+            computation_to_apply.c_local_computation,
+            window_dimensions, window_strides, pads))
+
+  def RngNormal(self, mu, sigma, dims):
+    """Enqueues an RngNormal operation onto the computation.
+
+    Args:
+      mu: A ComputationDataHandle to an F32 scalar specifying the mean.
+      sigma: A ComputationDataHandle to an F32 scalar specifying the standard
+        deviation.
+      dims: A 1D array-like of nonnegative integers specifying the dimensions.
+
+    Returns: a ComputationDataHandle to the generated array of F32 values.
+    """
+    shape = Shape(self.GetShape(mu).np_dtype, dims)
+    return _wrap_data_handle(
+        self._client.RngNormal(
+            _unwrap_data_handle(mu), _unwrap_data_handle(sigma),
+            _unwrap_shape(shape)))
+
+  def RngUniform(self, a, b, dims):
+    """Enqueues an RngUniform operation onto the computation.
+
+    Args:
+      a: a ComputationDataHandle to an F32, S32, or U32 scalar (consistent with
+        the type of b) specifying the low end of the interval [a, b) over which
+        values are generated.
+      b: a ComputationDataHandle to an F32, S32, or U32 scalar (consistent with
+        the type of a) specifying the high end of the interval [a, b) over which
+        values are generated.
+      dims: A 1D array-like of nonnegative integers specifying the dimensions.
+
+    Returns: a ComputationDataHandle to the generated array of values with the
+      same numeric type (F32, S32, or U32) as the arguments a and b.
+    """
+    shape = Shape(self.GetShape(a).np_dtype, dims)
+    return _wrap_data_handle(
+        self._client.RngUniform(
+            _unwrap_data_handle(a), _unwrap_data_handle(b),
+            _unwrap_shape(shape)))
+
+  def While(self, cond, body, init):
+    """Enqueues a While operation onto the computation.
+
+    Args:
+      cond: a Computation for the loop condition, which has type T -> PRED
+      body: a Computation for the loop body, which has type T -> T
+      init: an ComputationDataHandle for the initial parameter, which has type T
+
+    Returns: a ComputationDataHandle representing the While operation.
+    """
+    return _wrap_data_handle(
+        self._client.While(cond.c_local_computation,
+                           body.c_local_computation,
+                           _unwrap_data_handle(init)))
+
+  def Dot(self, lhs, rhs):
+    """Matrix multiplication between lhs and rhs."""
+    return _wrap_data_handle(
+        self._client.Dot(_unwrap_data_handle(lhs), _unwrap_data_handle(rhs)))
+
+  def Conv(self, lhs, rhs, window_strides, padding):
+    """Enqueues a Conv operation onto the computation.
+
+    Args:
+      lhs: ComputationDataHandle for the rank N+2 array of inputs.
+      rhs: ComputationDataHandle for the rank N+2 array of kernel weights.
+      window_strides: length-N array-like of integer kernel strides.
+      padding: PaddingType representing either 'SAME' or 'VALID' padding.
+
+    Returns: a ComputationDataHandle representing the Conv operation.
+    """
+    pads = _convert_padding_type_to_pad_values(
+        padding, self.GetShape(lhs).dimensions()[2:],
+        self.GetShape(rhs).dimensions()[2:], window_strides)
+    dimension_numbers = self._GetConvDimensionNumbers(len(window_strides))
+    return _wrap_data_handle(
+        self._client.ConvGeneralDilated(_unwrap_data_handle(lhs),
+                                        _unwrap_data_handle(rhs),
+                                        window_strides,
+                                        pads,
+                                        (),
+                                        (),
+                                        dimension_numbers))
+
+  def ConvWithGeneralPadding(self, lhs, rhs, window_strides, padding,
+                             lhs_dilation, rhs_dilation):
+    """Enqueues a ConvWithGeneralPadding operation onto the computation.
+
+    Args:
+      lhs: ComputationDataHandle for the rank N+2 array of inputs.
+      rhs: ComputationDataHandle for the rank N+2 array of kernel weights.
+      window_strides: length-N array-like of kernel strides.
+      padding: length-N array-like of pairs of integers of (low, high) padding.
+      lhs_dilation: length-N array-like of dilation factors.
+      rhs_dilation: length-N array-like of dilation factors.
+
+    Returns:
+      A ComputationdataHandle representing the added ConvWithGeneralPadding op.
+    """
+    dimension_numbers = self._GetConvDimensionNumbers(len(window_strides))
+    return _wrap_data_handle(
+        self._client.ConvGeneralDilated(_unwrap_data_handle(lhs),
+                                        _unwrap_data_handle(rhs),
+                                        window_strides,
+                                        padding,
+                                        lhs_dilation,
+                                        rhs_dilation,
+                                        dimension_numbers))
+
+  def _GetConvDimensionNumbers(self, num_spatial_dims):
+    """Create ConvolutionDimensionNumbers proto for convolutions."""
+    nd = num_spatial_dims
+    dimension_numbers = xla_data_pb2.ConvolutionDimensionNumbers()
+    dimension_numbers.input_batch_dimension = 0
+    dimension_numbers.input_feature_dimension = 1
+    dimension_numbers.output_batch_dimension = 0
+    dimension_numbers.output_feature_dimension = 1
+    dimension_numbers.kernel_output_feature_dimension = 0
+    dimension_numbers.kernel_input_feature_dimension = 1
+    dimension_numbers.input_spatial_dimensions.extend(range(2, 2 + nd))
+    dimension_numbers.kernel_spatial_dimensions.extend(range(2, 2 + nd))
+    dimension_numbers.output_spatial_dimensions.extend(range(2, 2 + nd))
+    return dimension_numbers
+
+
+def _forward_methods_to_local_builder():
+  """Forward remaining ComputationBuilder methods to the C API.
+
+  Set up methods, corresponding to unary and binary XLA operations,
+  whose calls are forwarded in a boilerplate manner to the underlying
+  LocalComputationBuilder C-extension API.
+  """
+
+  def forward_to_local_builder_with_handles(target_method, is_binop=False):
+    """Generate a forwarding method that wraps/unwraps data handles."""
+
+    def forward(self, *args, **kwargs):
+      unwrapped_args = [_unwrap_data_handle(arg) for arg in args]
+
+      if is_binop and len(unwrapped_args) < 3:
+        unwrapped_args.append(kwargs.get('broadcast_dimensions', ()))
+
+      return _wrap_data_handle(
+          target_method(
+              self._client,  # pylint: disable=protected-access
+              *unwrapped_args))
+
+    return forward
+
+  for method_name in _UNARY_OPS:
+    forward = forward_to_local_builder_with_handles(
+        getattr(c_api.LocalComputationBuilder, method_name))
+    forward.__name__ = method_name
+    setattr(ComputationBuilder, method_name, forward)
+
+  for method_name in _BINARY_OPS:
+    forward = forward_to_local_builder_with_handles(
+        getattr(c_api.LocalComputationBuilder, method_name), is_binop=True)
+    forward.__name__ = method_name
+    setattr(ComputationBuilder, method_name, forward)
+
+
+_forward_methods_to_local_builder()
+
+
+def initialize_replica_count(replica_count):
+  """Initializes the desired replica count to use on XLA service init.
+
+  Args:
+    replica_count: number of replicas that are desired for set up during XLA
+      initalization.
+
+  Raises:
+    A runtime exception if the XLA service has already been initialized.
+  """
+  c_api.InitializeReplicaCount(replica_count)
+
+
+def get_replica_count():
+  """Returns the current replica count used for the XLA service.
+
+  Note: this will return a value whether the XLA service has been initialized
+  yet or not.
+  """
+  return c_api.GetReplicaCount()
diff --git a/tensorflow/compiler/xla/python/xla_client_test.py b/tensorflow/compiler/xla/python/xla_client_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..c0413b9bbc3b7f8b63e4cf7a8f24980322cffc47
--- /dev/null
+++ b/tensorflow/compiler/xla/python/xla_client_test.py
@@ -0,0 +1,1223 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the Python extension-based XLA client."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import itertools
+import threading
+
+import numpy as np
+
+from tensorflow.compiler.xla.python import xla_client
+import unittest
+
+
+class LocalComputationTest(unittest.TestCase):
+  """Base class for running an XLA Computation through the local client."""
+
+  def _NewComputation(self, name=None):
+    if name is None:
+      name = self.id()
+    return xla_client.ComputationBuilder(name)
+
+  def _Execute(self, c, arguments):
+    compiled_c = c.Build().CompileWithExampleArguments(arguments)
+    return compiled_c.Execute(arguments)
+
+  def _ExecuteAndAssertWith(self, assert_func, c, arguments, expected):
+    assert expected is not None
+    result = self._Execute(c, arguments)
+    # Numpy's comparison methods are a bit too lenient by treating inputs as
+    # "array-like", meaning that scalar 4 will be happily compared equal to
+    # [[4]]. We'd like to be more strict so assert shapes as well.
+    self.assertEqual(np.asanyarray(result).shape, np.asanyarray(expected).shape)
+    assert_func(result, expected)
+
+  def _ExecuteAndCompareExact(self, c, arguments=(), expected=None):
+    self._ExecuteAndAssertWith(np.testing.assert_equal, c, arguments, expected)
+
+  def _ExecuteAndCompareClose(self, c, arguments=(), expected=None):
+    self._ExecuteAndAssertWith(np.testing.assert_allclose, c, arguments,
+                               expected)
+
+
+def NumpyArrayF32(*args, **kwargs):
+  """Convenience wrapper to create Numpy arrays with a np.float32 dtype."""
+  return np.array(*args, dtype=np.float32, **kwargs)
+
+
+def NumpyArrayF64(*args, **kwargs):
+  """Convenience wrapper to create Numpy arrays with a np.float64 dtype."""
+  return np.array(*args, dtype=np.float64, **kwargs)
+
+
+def NumpyArrayS32(*args, **kwargs):
+  """Convenience wrapper to create Numpy arrays with a np.int32 dtype."""
+  return np.array(*args, dtype=np.int32, **kwargs)
+
+
+def NumpyArrayS64(*args, **kwargs):
+  """Convenience wrapper to create Numpy arrays with a np.int64 dtype."""
+  return np.array(*args, dtype=np.int64, **kwargs)
+
+
+def NumpyArrayBool(*args, **kwargs):
+  """Convenience wrapper to create Numpy arrays with a np.bool dtype."""
+  return np.array(*args, dtype=np.bool, **kwargs)
+
+
+class ComputationsWithConstantsTest(LocalComputationTest):
+  """Tests focusing on Constant ops."""
+
+  def testConstantScalarSumF32(self):
+    c = self._NewComputation()
+    c.Add(c.ConstantF32Scalar(1.11), c.ConstantF32Scalar(3.14))
+    self._ExecuteAndCompareClose(c, expected=4.25)
+
+  def testConstantScalarSumF64(self):
+    c = self._NewComputation()
+    c.Add(c.ConstantF64Scalar(1.11), c.ConstantF64Scalar(3.14))
+    self._ExecuteAndCompareClose(c, expected=4.25)
+
+  def testConstantScalarSumS32(self):
+    c = self._NewComputation()
+    c.Add(c.ConstantS32Scalar(1), c.ConstantS32Scalar(2))
+    self._ExecuteAndCompareClose(c, expected=3)
+
+  def testConstantScalarSumS64(self):
+    c = self._NewComputation()
+    c.Add(c.ConstantS64Scalar(1), c.ConstantS64Scalar(2))
+    self._ExecuteAndCompareClose(c, expected=3)
+
+  def testConstantVectorMulF32(self):
+    c = self._NewComputation()
+    c.Mul(
+        c.Constant(NumpyArrayF32([2.5, 3.3, -1.2, 0.7])),
+        c.Constant(NumpyArrayF32([-1.2, 2, -2, -3])))
+    self._ExecuteAndCompareClose(c, expected=[-3, 6.6, 2.4, -2.1])
+
+  def testConstantVectorMulF64(self):
+    c = self._NewComputation()
+    c.Mul(
+        c.Constant(NumpyArrayF64([2.5, 3.3, -1.2, 0.7])),
+        c.Constant(NumpyArrayF64([-1.2, 2, -2, -3])))
+    self._ExecuteAndCompareClose(c, expected=[-3, 6.6, 2.4, -2.1])
+
+  def testConstantVectorScalarDivF32(self):
+    c = self._NewComputation()
+    c.Div(
+        c.Constant(NumpyArrayF32([1.5, 2.5, 3.0, -10.8])),
+        c.ConstantF32Scalar(2.0))
+    self._ExecuteAndCompareClose(c, expected=[0.75, 1.25, 1.5, -5.4])
+
+  def testConstantVectorScalarDivF64(self):
+    c = self._NewComputation()
+    c.Div(
+        c.Constant(NumpyArrayF64([1.5, 2.5, 3.0, -10.8])),
+        c.ConstantF64Scalar(2.0))
+    self._ExecuteAndCompareClose(c, expected=[0.75, 1.25, 1.5, -5.4])
+
+  def testConstantVectorScalarPowF32(self):
+    c = self._NewComputation()
+    c.Pow(c.Constant(NumpyArrayF32([1.5, 2.5, 3.0])), c.ConstantF32Scalar(2.))
+    self._ExecuteAndCompareClose(c, expected=[2.25, 6.25, 9.])
+
+  def testConstantVectorScalarPowF64(self):
+    c = self._NewComputation()
+    c.Pow(c.Constant(NumpyArrayF64([1.5, 2.5, 3.0])), c.ConstantF64Scalar(2.))
+    self._ExecuteAndCompareClose(c, expected=[2.25, 6.25, 9.])
+
+  def testBooleanAnd(self):
+    c = self._NewComputation()
+    c.And(
+        c.Constant(NumpyArrayBool([True, False, True, False])),
+        c.Constant(NumpyArrayBool([True, True, False, False])))
+    self._ExecuteAndCompareExact(c, expected=[True, False, False, False])
+
+  def testBooleanOr(self):
+    c = self._NewComputation()
+    c.Or(
+        c.Constant(NumpyArrayBool([True, False, True, False])),
+        c.Constant(NumpyArrayBool([True, True, False, False])))
+    self._ExecuteAndCompareExact(c, expected=[True, True, True, False])
+
+  def testSum2DF32(self):
+    c = self._NewComputation()
+    c.Add(
+        c.Constant(NumpyArrayF32([[1, 2, 3], [4, 5, 6]])),
+        c.Constant(NumpyArrayF32([[1, -1, 1], [-1, 1, -1]])))
+    self._ExecuteAndCompareClose(c, expected=[[2, 1, 4], [3, 6, 5]])
+
+  def testSum2DF64(self):
+    c = self._NewComputation()
+    c.Add(
+        c.Constant(NumpyArrayF64([[1, 2, 3], [4, 5, 6]])),
+        c.Constant(NumpyArrayF64([[1, -1, 1], [-1, 1, -1]])))
+    self._ExecuteAndCompareClose(c, expected=[[2, 1, 4], [3, 6, 5]])
+
+  def testSum2DWith1DBroadcastDim0F32(self):
+    # sum of a 2D array with a 1D array where the latter is replicated across
+    # dimension 0 to match the former's shape.
+    c = self._NewComputation()
+    c.Add(
+        c.Constant(NumpyArrayF32([[1, 2, 3], [4, 5, 6], [7, 8, 9]])),
+        c.Constant(NumpyArrayF32([10, 20, 30])),
+        broadcast_dimensions=(0,))
+    self._ExecuteAndCompareClose(
+        c, expected=[[11, 12, 13], [24, 25, 26], [37, 38, 39]])
+
+  def testSum2DWith1DBroadcastDim0F64(self):
+    # sum of a 2D array with a 1D array where the latter is replicated across
+    # dimension 0 to match the former's shape.
+    c = self._NewComputation()
+    c.Add(
+        c.Constant(NumpyArrayF64([[1, 2, 3], [4, 5, 6], [7, 8, 9]])),
+        c.Constant(NumpyArrayF64([10, 20, 30])),
+        broadcast_dimensions=(0,))
+    self._ExecuteAndCompareClose(
+        c, expected=[[11, 12, 13], [24, 25, 26], [37, 38, 39]])
+
+  def testSum2DWith1DBroadcastDim1F32(self):
+    # sum of a 2D array with a 1D array where the latter is replicated across
+    # dimension 1 to match the former's shape.
+    c = self._NewComputation()
+    c.Add(
+        c.Constant(NumpyArrayF32([[1, 2, 3], [4, 5, 6], [7, 8, 9]])),
+        c.Constant(NumpyArrayF32([10, 20, 30])),
+        broadcast_dimensions=(1,))
+    self._ExecuteAndCompareClose(
+        c, expected=[[11, 22, 33], [14, 25, 36], [17, 28, 39]])
+
+  def testSum2DWith1DBroadcastDim1F64(self):
+    # sum of a 2D array with a 1D array where the latter is replicated across
+    # dimension 1 to match the former's shape.
+    c = self._NewComputation()
+    c.Add(
+        c.Constant(NumpyArrayF64([[1, 2, 3], [4, 5, 6], [7, 8, 9]])),
+        c.Constant(NumpyArrayF64([10, 20, 30])),
+        broadcast_dimensions=(1,))
+    self._ExecuteAndCompareClose(
+        c, expected=[[11, 22, 33], [14, 25, 36], [17, 28, 39]])
+
+  def testConstantAxpyF32(self):
+    c = self._NewComputation()
+    c.Add(
+        c.Mul(
+            c.ConstantF32Scalar(2),
+            c.Constant(NumpyArrayF32([2.2, 3.3, 4.4, 5.5]))),
+        c.Constant(NumpyArrayF32([100, -100, 200, -200])))
+    self._ExecuteAndCompareClose(c, expected=[104.4, -93.4, 208.8, -189])
+
+  def testConstantAxpyF64(self):
+    c = self._NewComputation()
+    c.Add(
+        c.Mul(
+            c.ConstantF64Scalar(2),
+            c.Constant(NumpyArrayF64([2.2, 3.3, 4.4, 5.5]))),
+        c.Constant(NumpyArrayF64([100, -100, 200, -200])))
+    self._ExecuteAndCompareClose(c, expected=[104.4, -93.4, 208.8, -189])
+
+
+class ParametersTest(LocalComputationTest):
+  """Tests focusing on Parameter ops and argument-passing."""
+
+  def setUp(self):
+    self.f32_scalar_2 = NumpyArrayF32(2.0)
+    self.f32_4vector = NumpyArrayF32([-2.3, 3.3, -4.3, 5.3])
+    self.f64_scalar_2 = NumpyArrayF64(2.0)
+    self.f64_4vector = NumpyArrayF64([-2.3, 3.3, -4.3, 5.3])
+    self.s32_scalar_3 = NumpyArrayS32(3)
+    self.s32_4vector = NumpyArrayS32([10, 15, -2, 7])
+    self.s64_scalar_3 = NumpyArrayS64(3)
+    self.s64_4vector = NumpyArrayS64([10, 15, -2, 7])
+
+  def testScalarTimesVectorAutonumberF32(self):
+    c = self._NewComputation()
+    p0 = c.ParameterFromNumpy(self.f32_scalar_2)
+    p1 = c.ParameterFromNumpy(self.f32_4vector)
+    c.Mul(p0, p1)
+    self._ExecuteAndCompareClose(
+        c,
+        arguments=[self.f32_scalar_2, self.f32_4vector],
+        expected=[-4.6, 6.6, -8.6, 10.6])
+
+  def testScalarTimesVectorAutonumberF64(self):
+    c = self._NewComputation()
+    p0 = c.ParameterFromNumpy(self.f64_scalar_2)
+    p1 = c.ParameterFromNumpy(self.f64_4vector)
+    c.Mul(p0, p1)
+    self._ExecuteAndCompareClose(
+        c,
+        arguments=[self.f64_scalar_2, self.f64_4vector],
+        expected=[-4.6, 6.6, -8.6, 10.6])
+
+  def testScalarTimesVectorS32(self):
+    c = self._NewComputation()
+    p0 = c.ParameterFromNumpy(self.s32_scalar_3)
+    p1 = c.ParameterFromNumpy(self.s32_4vector)
+    c.Mul(p0, p1)
+    self._ExecuteAndCompareExact(
+        c,
+        arguments=[self.s32_scalar_3, self.s32_4vector],
+        expected=[30, 45, -6, 21])
+
+  def testScalarTimesVectorS64(self):
+    c = self._NewComputation()
+    p0 = c.ParameterFromNumpy(self.s64_scalar_3)
+    p1 = c.ParameterFromNumpy(self.s64_4vector)
+    c.Mul(p0, p1)
+    self._ExecuteAndCompareExact(
+        c,
+        arguments=[self.s64_scalar_3, self.s64_4vector],
+        expected=[30, 45, -6, 21])
+
+  def testScalarMinusVectorExplicitNumberingF32(self):
+    # Use explicit numbering and pass parameter_num first. Sub is used since
+    # it's not commutative and can help catch parameter reversal within the
+    # computation.
+    c = self._NewComputation()
+    p1 = c.ParameterFromNumpy(self.f32_4vector, parameter_num=1)
+    p0 = c.ParameterFromNumpy(self.f32_scalar_2, parameter_num=0)
+    c.Sub(p1, p0)
+    self._ExecuteAndCompareClose(
+        c,
+        arguments=[self.f32_scalar_2, self.f32_4vector],
+        expected=[-4.3, 1.3, -6.3, 3.3])
+
+  def testScalarMinusVectorExplicitNumberingF64(self):
+    # Use explicit numbering and pass parameter_num first. Sub is used since
+    # it's not commutative and can help catch parameter reversal within the
+    # computation.
+    c = self._NewComputation()
+    p1 = c.ParameterFromNumpy(self.f64_4vector, parameter_num=1)
+    p0 = c.ParameterFromNumpy(self.f64_scalar_2, parameter_num=0)
+    c.Sub(p1, p0)
+    self._ExecuteAndCompareClose(
+        c,
+        arguments=[self.f64_scalar_2, self.f64_4vector],
+        expected=[-4.3, 1.3, -6.3, 3.3])
+
+
+class LocalBufferTest(LocalComputationTest):
+  """Tests focusing on execution with LocalBuffers."""
+
+  def _Execute(self, c, arguments):
+    compiled_c = c.Build().CompileWithExampleArguments(arguments)
+    arg_buffers = [xla_client.LocalBuffer.from_py(arg) for arg in arguments]
+    result_buffer = compiled_c.ExecuteWithLocalBuffers(arg_buffers)
+    return result_buffer.to_py()
+
+  def testConstantSum(self):
+    c = self._NewComputation()
+    c.Add(c.ConstantF32Scalar(1.11), c.ConstantF32Scalar(3.14))
+    self._ExecuteAndCompareClose(c, expected=4.25)
+
+  def testOneParameterSum(self):
+    c = self._NewComputation()
+    c.Add(c.ParameterFromNumpy(NumpyArrayF32(0.)), c.ConstantF32Scalar(3.14))
+    self._ExecuteAndCompareClose(
+        c,
+        arguments=[NumpyArrayF32(1.11)],
+        expected=4.25)
+
+  def testTwoParameterSum(self):
+    c = self._NewComputation()
+    c.Add(c.ParameterFromNumpy(NumpyArrayF32(0.)),
+          c.ParameterFromNumpy(NumpyArrayF32(0.)))
+    self._ExecuteAndCompareClose(
+        c,
+        arguments=[NumpyArrayF32(1.11), NumpyArrayF32(3.14)],
+        expected=4.25)
+
+  def testCannotCallWithDeletedBuffers(self):
+    c = self._NewComputation()
+    c.Add(c.ParameterFromNumpy(NumpyArrayF32(0.)), c.ConstantF32Scalar(3.14))
+    arg = NumpyArrayF32(1.11)
+    compiled_c = c.Build().CompileWithExampleArguments([arg])
+    arg_buffer = xla_client.LocalBuffer.from_py(arg)
+    arg_buffer.delete()
+    with self.assertRaises(ValueError):
+      compiled_c.ExecuteWithLocalBuffers([arg_buffer])
+
+
+class SingleOpTest(LocalComputationTest):
+  """Tests for single ops.
+
+  The goal here is smoke testing - to exercise the most basic functionality of
+  single XLA ops. As minimal as possible number of additional ops are added
+  around the op being tested.
+  """
+
+  def testConcatenateF32(self):
+    c = self._NewComputation()
+    c.Concatenate(
+        (c.Constant(NumpyArrayF32([1.0, 2.0, 3.0])),
+         c.Constant(NumpyArrayF32([4.0, 5.0, 6.0]))),
+        dimension=0)
+    self._ExecuteAndCompareClose(c, expected=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
+
+  def testConcatenateF64(self):
+    c = self._NewComputation()
+    c.Concatenate(
+        (c.Constant(NumpyArrayF64([1.0, 2.0, 3.0])),
+         c.Constant(NumpyArrayF64([4.0, 5.0, 6.0]))),
+        dimension=0)
+    self._ExecuteAndCompareClose(c, expected=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
+
+  def testConvertElementType(self):
+    xla_types = {
+        np.bool: xla_client.xla_data_pb2.PRED,
+        np.int32: xla_client.xla_data_pb2.S32,
+        np.int64: xla_client.xla_data_pb2.S64,
+        np.float32: xla_client.xla_data_pb2.F32,
+        np.float64: xla_client.xla_data_pb2.F64,
+    }
+
+    def _ConvertAndTest(template, src_dtype, dst_dtype):
+      c = self._NewComputation()
+      x = c.Constant(np.array(template, dtype=src_dtype))
+      c.ConvertElementType(x, xla_types[dst_dtype])
+
+      result = c.Build().Compile().Execute()
+      expected = np.array(template, dtype=dst_dtype)
+
+      self.assertEqual(result.shape, expected.shape)
+      self.assertEqual(result.dtype, expected.dtype)
+      np.testing.assert_equal(result, expected)
+
+    x = [0, 1, 0, 0, 1]
+    for src_dtype, dst_dtype in itertools.product(xla_types, xla_types):
+      _ConvertAndTest(x, src_dtype, dst_dtype)
+
+  def testCrossReplicaSumOneReplica(self):
+    samples = [
+        NumpyArrayF32(42.0),
+        NumpyArrayF32([97.0]),
+        NumpyArrayF32([64.0, 117.0]),
+        NumpyArrayF32([[2.0, 3.0], [4.0, 5.0]]),
+    ]
+    for lhs in samples:
+      c = self._NewComputation()
+      c.CrossReplicaSum(c.Constant(lhs))
+      self._ExecuteAndCompareExact(c, expected=lhs)
+
+  def testDotMatrixVectorF32(self):
+    c = self._NewComputation()
+    lhs = NumpyArrayF32([[2.0, 3.0], [4.0, 5.0]])
+    rhs = NumpyArrayF32([[10.0], [20.0]])
+    c.Dot(c.Constant(lhs), c.Constant(rhs))
+    self._ExecuteAndCompareClose(c, expected=np.dot(lhs, rhs))
+
+  def testDotMatrixVectorF64(self):
+    c = self._NewComputation()
+    lhs = NumpyArrayF64([[2.0, 3.0], [4.0, 5.0]])
+    rhs = NumpyArrayF64([[10.0], [20.0]])
+    c.Dot(c.Constant(lhs), c.Constant(rhs))
+    self._ExecuteAndCompareClose(c, expected=np.dot(lhs, rhs))
+
+  def testDotMatrixMatrixF32(self):
+    c = self._NewComputation()
+    lhs = NumpyArrayF32([[2.0, 3.0], [4.0, 5.0]])
+    rhs = NumpyArrayF32([[10.0, 20.0], [100.0, 200.0]])
+    c.Dot(c.Constant(lhs), c.Constant(rhs))
+    self._ExecuteAndCompareClose(c, expected=np.dot(lhs, rhs))
+
+  def testDotMatrixMatrixF64(self):
+    c = self._NewComputation()
+    lhs = NumpyArrayF64([[2.0, 3.0], [4.0, 5.0]])
+    rhs = NumpyArrayF64([[10.0, 20.0], [100.0, 200.0]])
+    c.Dot(c.Constant(lhs), c.Constant(rhs))
+    self._ExecuteAndCompareClose(c, expected=np.dot(lhs, rhs))
+
+  def testConvF32Same(self):
+    c = self._NewComputation()
+    a = lambda *dims: np.arange(np.prod(dims)).reshape(dims).astype("float32")
+    lhs = a(1, 2, 3, 4)
+    rhs = a(1, 2, 1, 2) * 10
+    c.Conv(c.Constant(lhs), c.Constant(rhs),
+           [1, 1], xla_client.PaddingType.SAME)
+    result = np.array([[[[640., 700., 760., 300.],
+                         [880., 940., 1000., 380.],
+                         [1120., 1180., 1240., 460.]]]])
+    self._ExecuteAndCompareClose(c, expected=result)
+
+  def testConvF32Valid(self):
+    c = self._NewComputation()
+    a = lambda *dims: np.arange(np.prod(dims)).reshape(dims).astype("float32")
+    lhs = a(1, 2, 3, 4)
+    rhs = a(1, 2, 1, 2) * 10
+    c.Conv(c.Constant(lhs), c.Constant(rhs),
+           [2, 1], xla_client.PaddingType.VALID)
+    result = np.array([[[[640., 700., 760.],
+                         [1120., 1180., 1240.]]]])
+    self._ExecuteAndCompareClose(c, expected=result)
+
+  def testConvWithGeneralPaddingF32(self):
+    c = self._NewComputation()
+    a = lambda *dims: np.arange(np.prod(dims)).reshape(dims).astype("float32")
+    lhs = a(1, 1, 2, 3)
+    rhs = a(1, 1, 1, 2) * 10
+    strides = [1, 1]
+    pads = [(1, 0), (0, 1)]
+    lhs_dilation = (2, 1)
+    rhs_dilation = (1, 1)
+    c.ConvWithGeneralPadding(c.Constant(lhs), c.Constant(rhs),
+                             strides, pads, lhs_dilation, rhs_dilation)
+    result = np.array([[[[0., 0., 0.],
+                         [10., 20., 0.],
+                         [0., 0., 0.],
+                         [40., 50., 0.]]]])
+    self._ExecuteAndCompareClose(c, expected=result)
+
+  def testBooleanNot(self):
+    c = self._NewComputation()
+    arr = NumpyArrayBool([True, False, True])
+    c.Not(c.Constant(arr))
+    self._ExecuteAndCompareClose(c, expected=~arr)
+
+  def testExp(self):
+    c = self._NewComputation()
+    arr = NumpyArrayF32([3.3, 12.1])
+    c.Exp(c.Constant(arr))
+    self._ExecuteAndCompareClose(c, expected=np.exp(arr))
+
+  def testLog(self):
+    c = self._NewComputation()
+    arr = NumpyArrayF32([3.3, 12.1])
+    c.Log(c.Constant(arr))
+    self._ExecuteAndCompareClose(c, expected=np.log(arr))
+
+  def testNeg(self):
+    c = self._NewComputation()
+    arr = NumpyArrayF32([3.3, 12.1])
+    c.Neg(c.Constant(arr))
+    self._ExecuteAndCompareClose(c, expected=-arr)
+
+  def testFloor(self):
+    c = self._NewComputation()
+    arr = NumpyArrayF32([3.3, 12.1])
+    c.Floor(c.Constant(arr))
+    self._ExecuteAndCompareClose(c, expected=np.floor(arr))
+
+  def testCeil(self):
+    c = self._NewComputation()
+    arr = NumpyArrayF32([3.3, 12.1])
+    c.Ceil(c.Constant(arr))
+    self._ExecuteAndCompareClose(c, expected=np.ceil(arr))
+
+  def testAbs(self):
+    c = self._NewComputation()
+    arr = NumpyArrayF32([3.3, -12.1, 2.4, -1.])
+    c.Abs(c.Constant(arr))
+    self._ExecuteAndCompareClose(c, expected=np.abs(arr))
+
+  def testTanh(self):
+    c = self._NewComputation()
+    arr = NumpyArrayF32([3.3, 12.1])
+    c.Tanh(c.Constant(arr))
+    self._ExecuteAndCompareClose(c, expected=np.tanh(arr))
+
+  def testTrans(self):
+
+    def _TransposeAndTest(array):
+      c = self._NewComputation()
+      c.Trans(c.Constant(array))
+      self._ExecuteAndCompareClose(c, expected=array.T)
+
+    # Test square and non-square matrices in both default (C) and F orders.
+    for array_fun in [NumpyArrayF32, NumpyArrayF64]:
+      _TransposeAndTest(array_fun([[1, 2, 3], [4, 5, 6]]))
+      _TransposeAndTest(array_fun([[1, 2, 3], [4, 5, 6]], order="F"))
+      _TransposeAndTest(array_fun([[1, 2], [4, 5]]))
+      _TransposeAndTest(array_fun([[1, 2], [4, 5]], order="F"))
+
+  def testTranspose(self):
+
+    def _TransposeAndTest(array, permutation):
+      c = self._NewComputation()
+      c.Transpose(c.Constant(array), permutation)
+      expected = np.transpose(array, permutation)
+      self._ExecuteAndCompareClose(c, expected=expected)
+
+    _TransposeAndTest(NumpyArrayF32([[1, 2, 3], [4, 5, 6]]), [0, 1])
+    _TransposeAndTest(NumpyArrayF32([[1, 2, 3], [4, 5, 6]]), [1, 0])
+    _TransposeAndTest(NumpyArrayF32([[1, 2], [4, 5]]), [0, 1])
+    _TransposeAndTest(NumpyArrayF32([[1, 2], [4, 5]]), [1, 0])
+
+    arr = np.random.RandomState(0).randn(2, 3, 4).astype(np.float32)
+    for permutation in itertools.permutations(range(arr.ndim)):
+      _TransposeAndTest(arr, permutation)
+      _TransposeAndTest(np.asfortranarray(arr), permutation)
+
+  def testEq(self):
+    c = self._NewComputation()
+    c.Eq(
+        c.Constant(NumpyArrayS32([1, 2, 3, 4])),
+        c.Constant(NumpyArrayS32([4, 2, 3, 1])))
+    self._ExecuteAndCompareExact(c, expected=[False, True, True, False])
+
+  def testNe(self):
+    c = self._NewComputation()
+    c.Ne(
+        c.Constant(NumpyArrayS32([1, 2, 3, 4])),
+        c.Constant(NumpyArrayS32([4, 2, 3, 1])))
+    self._ExecuteAndCompareExact(c, expected=[True, False, False, True])
+
+    c.Ne(
+        c.Constant(NumpyArrayF32([-2.0, 0.0,
+                                  float("nan"),
+                                  float("nan")])),
+        c.Constant(NumpyArrayF32([2.0, -0.0, 1.0, float("nan")])))
+    self._ExecuteAndAssertWith(
+        np.testing.assert_allclose, c, (), expected=[True, False, True, True])
+
+  def testGt(self):
+    c = self._NewComputation()
+    c.Gt(
+        c.Constant(NumpyArrayS32([1, 2, 3, 4, 9])),
+        c.Constant(NumpyArrayS32([1, 0, 2, 7, 12])))
+    self._ExecuteAndCompareExact(c, expected=[False, True, True, False, False])
+
+  def testGe(self):
+    c = self._NewComputation()
+    c.Ge(
+        c.Constant(NumpyArrayS32([1, 2, 3, 4, 9])),
+        c.Constant(NumpyArrayS32([1, 0, 2, 7, 12])))
+    self._ExecuteAndCompareExact(c, expected=[True, True, True, False, False])
+
+  def testLt(self):
+    c = self._NewComputation()
+    c.Lt(
+        c.Constant(NumpyArrayS32([1, 2, 3, 4, 9])),
+        c.Constant(NumpyArrayS32([1, 0, 2, 7, 12])))
+    self._ExecuteAndCompareExact(c, expected=[False, False, False, True, True])
+
+  def testLe(self):
+    c = self._NewComputation()
+    c.Le(
+        c.Constant(NumpyArrayS32([1, 2, 3, 4, 9])),
+        c.Constant(NumpyArrayS32([1, 0, 2, 7, 12])))
+    self._ExecuteAndCompareExact(c, expected=[True, False, False, True, True])
+
+  def testMax(self):
+    c = self._NewComputation()
+    c.Max(
+        c.Constant(NumpyArrayF32([1.0, 2.0, 3.0, 4.0, 9.0])),
+        c.Constant(NumpyArrayF32([1.0, 0.0, 2.0, 7.0, 12.0])))
+    self._ExecuteAndCompareExact(c, expected=[1.0, 2.0, 3.0, 7.0, 12.0])
+
+  def testMaxExplicitBroadcastDim0(self):
+    c = self._NewComputation()
+    c.Max(
+        c.Constant(NumpyArrayF32([[1, 2, 3], [4, 5, 6], [7, 8, 9]])),
+        c.Constant(NumpyArrayF32([3, 4, 5])),
+        broadcast_dimensions=(0,))
+    self._ExecuteAndCompareExact(c, expected=[[3, 3, 3], [4, 5, 6], [7, 8, 9]])
+
+  def testMaxExplicitBroadcastDim1(self):
+    c = self._NewComputation()
+    c.Max(
+        c.Constant(NumpyArrayF32([[1, 2, 3], [4, 5, 6], [7, 8, 9]])),
+        c.Constant(NumpyArrayF32([3, 4, 5])),
+        broadcast_dimensions=(1,))
+    self._ExecuteAndCompareExact(c, expected=[[3, 4, 5], [4, 5, 6], [7, 8, 9]])
+
+  def testMin(self):
+    c = self._NewComputation()
+    c.Min(
+        c.Constant(NumpyArrayF32([1.0, 2.0, 3.0, 4.0, 9.0])),
+        c.Constant(NumpyArrayF32([1.0, 0.0, 2.0, 7.0, 12.0])))
+    self._ExecuteAndCompareExact(c, expected=[1.0, 0.0, 2.0, 4.0, 9.0])
+
+  def testPad(self):
+    c = self._NewComputation()
+    c.Pad(
+        c.Constant(NumpyArrayF32([[1.0, 2.0], [3.0, 4.0]])),
+        c.Constant(NumpyArrayF32(0.0)),
+        [(1, 2, 1), (0, 1, 0)])
+    self._ExecuteAndCompareClose(c, expected=[[0.0, 0.0, 0.0],
+                                              [1.0, 2.0, 0.0],
+                                              [0.0, 0.0, 0.0],
+                                              [3.0, 4.0, 0.0],
+                                              [0.0, 0.0, 0.0],
+                                              [0.0, 0.0, 0.0]])
+
+  def testPadWithPaddingConfig(self):
+    c = self._NewComputation()
+    padding_config = xla_client.xla_data_pb2.PaddingConfig()
+    for lo, hi, interior in [(1, 2, 1), (0, 1, 0)]:
+      dimension = padding_config.dimensions.add()
+      dimension.edge_padding_low = lo
+      dimension.edge_padding_high = hi
+      dimension.interior_padding = interior
+    c.Pad(
+        c.Constant(NumpyArrayF32([[1.0, 2.0], [3.0, 4.0]])),
+        c.Constant(NumpyArrayF32(0.0)),
+        padding_config)
+    self._ExecuteAndCompareClose(c, expected=[[0.0, 0.0, 0.0],
+                                              [1.0, 2.0, 0.0],
+                                              [0.0, 0.0, 0.0],
+                                              [3.0, 4.0, 0.0],
+                                              [0.0, 0.0, 0.0],
+                                              [0.0, 0.0, 0.0]])
+
+  def testReshape(self):
+    c = self._NewComputation()
+    c.Reshape(
+        c.Constant(NumpyArrayS32([[1, 2], [3, 4], [5, 6]])),
+        dimensions=[0, 1],
+        new_sizes=[2, 3])
+    self._ExecuteAndCompareExact(c, expected=[[1, 2, 3], [4, 5, 6]])
+
+  def testCollapse(self):
+    c = self._NewComputation()
+    c.Collapse(
+        c.Constant(NumpyArrayS32([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])),
+        dimensions=[1, 2])
+    self._ExecuteAndCompareExact(c, expected=[[1, 2, 3, 4], [5, 6, 7, 8]])
+
+  def testRev(self):
+    c = self._NewComputation()
+    c.Rev(
+        c.Constant(NumpyArrayS32([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])),
+        dimensions=[0, 2])
+    self._ExecuteAndCompareExact(
+        c, expected=[[[6, 5], [8, 7]], [[2, 1], [4, 3]]])
+
+  def testSelect(self):
+    c = self._NewComputation()
+    c.Select(
+        c.Constant(NumpyArrayBool([True, False, False, True, False])),
+        c.Constant(NumpyArrayS32([1, 2, 3, 4, 5])),
+        c.Constant(NumpyArrayS32([-1, -2, -3, -4, -5])))
+    self._ExecuteAndCompareExact(c, expected=[1, -2, -3, 4, -5])
+
+  def testSlice(self):
+    c = self._NewComputation()
+    c.Slice(
+        c.Constant(NumpyArrayS32([[1, 2, 3], [4, 5, 6], [7, 8, 9]])), [1, 0],
+        [3, 2])
+    self._ExecuteAndCompareExact(c, expected=[[4, 5], [7, 8]])
+
+  def testDynamicSlice(self):
+    c = self._NewComputation()
+    c.DynamicSlice(
+        c.Constant(NumpyArrayS32([[1, 2, 3], [4, 5, 6], [7, 8, 9]])),
+        c.Constant(NumpyArrayS32([1, 0])), [2, 2])
+    self._ExecuteAndCompareExact(c, expected=[[4, 5], [7, 8]])
+
+  def testDynamicUpdateSlice(self):
+    c = self._NewComputation()
+    c.DynamicUpdateSlice(
+        c.Constant(NumpyArrayS32([[1, 2, 3], [4, 5, 6], [7, 8, 9]])),
+        c.Constant(NumpyArrayS32([[1, 2], [3, 4]])),
+        c.Constant(NumpyArrayS32([1, 1])))
+    self._ExecuteAndCompareExact(c, expected=[[1, 2, 3], [4, 1, 2], [7, 3, 4]])
+
+  def testTuple(self):
+    c = self._NewComputation()
+    c.Tuple(
+        c.ConstantS32Scalar(42), c.Constant(NumpyArrayF32([1.0, 2.0])),
+        c.Constant(NumpyArrayBool([True, False, False, True])))
+    result = c.Build().Compile().Execute()
+    self.assertIsInstance(result, tuple)
+    np.testing.assert_equal(result[0], 42)
+    np.testing.assert_allclose(result[1], [1.0, 2.0])
+    np.testing.assert_equal(result[2], [True, False, False, True])
+
+  def testGetTupleElement(self):
+    c = self._NewComputation()
+    c.GetTupleElement(
+        c.Tuple(
+            c.ConstantS32Scalar(42), c.Constant(NumpyArrayF32([1.0, 2.0])),
+            c.Constant(NumpyArrayBool([True, False, False, True]))), 1)
+    self._ExecuteAndCompareClose(c, expected=[1.0, 2.0])
+
+  def testBroadcast(self):
+    c = self._NewComputation()
+    c.Broadcast(c.Constant(NumpyArrayS32([10, 20, 30, 40])), sizes=(3,))
+    self._ExecuteAndCompareExact(
+        c, expected=[[10, 20, 30, 40], [10, 20, 30, 40], [10, 20, 30, 40]])
+
+  def testRngNormal(self):
+    shape = (2, 3)
+    c = self._NewComputation()
+    c.RngNormal(c.Constant(NumpyArrayF32(0.)), c.Constant(NumpyArrayF32(1.)),
+                dims=shape)
+    result = c.Build().Compile().Execute()
+    # since the result is random, we just check shape and uniqueness
+    self.assertEqual(result.shape, shape)
+    self.assertEqual(len(np.unique(result)), np.prod(shape))
+
+  def testRngUniformF32(self):
+    lo, hi = 2., 4.
+    shape = (2, 3)
+    c = self._NewComputation()
+    c.RngUniform(c.Constant(NumpyArrayF32(lo)), c.Constant(NumpyArrayF32(hi)),
+                 dims=shape)
+    result = c.Build().Compile().Execute()
+    # since the result is random, we just check shape, uniqueness, and range
+    self.assertEqual(result.shape, shape)
+    self.assertEqual(len(np.unique(result)), np.prod(shape))
+    self.assertTrue(np.all(lo <= result))
+    self.assertTrue(np.all(result < hi))
+
+  def testRngUniformS32(self):
+    lo, hi = 2, 4
+    shape = (2, 3)
+    c = self._NewComputation()
+    c.RngUniform(c.Constant(NumpyArrayS32(lo)), c.Constant(NumpyArrayS32(hi)),
+                 dims=shape)
+    result = c.Build().Compile().Execute()
+    # since the result is random, we just check shape, integrality, and range
+    self.assertEqual(result.shape, shape)
+    self.assertEqual(result.dtype, np.int32)
+    self.assertTrue(np.all(lo <= result))
+    self.assertTrue(np.all(result < hi))
+
+
+class EmbeddedComputationsTest(LocalComputationTest):
+  """Tests for XLA graphs with embedded computations (such as maps)."""
+
+  def _CreateConstantS32Computation(self):
+    """Computation (f32) -> s32 that returns a constant 1 for any input."""
+    c = self._NewComputation("constant_s32_one")
+    # TODO(eliben): consider adding a nicer way to create new parameters without
+    # having to create dummy Numpy arrays or populating Shape messages. Perhaps
+    # we need our own (Python-client-own) way to represent Shapes conveniently.
+    c.ParameterFromNumpy(NumpyArrayF32(0))
+    c.ConstantS32Scalar(1)
+    return c.Build()
+
+  def _CreateConstantS64Computation(self):
+    """Computation (f64) -> s64 that returns a constant 1 for any input."""
+    c = self._NewComputation("constant_s64_one")
+    # TODO(eliben): consider adding a nicer way to create new parameters without
+    # having to create dummy Numpy arrays or populating Shape messages. Perhaps
+    # we need our own (Python-client-own) way to represent Shapes conveniently.
+    c.ParameterFromNumpy(NumpyArrayF64(0))
+    c.ConstantS64Scalar(1)
+    return c.Build()
+
+  def _CreateConstantF32Computation(self):
+    """Computation (f32) -> f32 that returns a constant 1.0 for any input."""
+    c = self._NewComputation("constant_f32_one")
+    c.ParameterFromNumpy(NumpyArrayF32(0))
+    c.ConstantF32Scalar(1.0)
+    return c.Build()
+
+  def _CreateConstantF64Computation(self):
+    """Computation (f64) -> f64 that returns a constant 1.0 for any input."""
+    c = self._NewComputation("constant_f64_one")
+    c.ParameterFromNumpy(NumpyArrayF64(0))
+    c.ConstantF64Scalar(1.0)
+    return c.Build()
+
+  def _CreateMulF32By2Computation(self):
+    """Computation (f32) -> f32 that multiplies its parameter by 2."""
+    c = self._NewComputation("mul_f32_by2")
+    c.Mul(c.ParameterFromNumpy(NumpyArrayF32(0)), c.ConstantF32Scalar(2.0))
+    return c.Build()
+
+  def _CreateMulF64By2Computation(self):
+    """Computation (f64) -> f64 that multiplies its parameter by 2."""
+    c = self._NewComputation("mul_f64_by2")
+    c.Mul(c.ParameterFromNumpy(NumpyArrayF64(0)), c.ConstantF64Scalar(2.0))
+    return c.Build()
+
+  def _CreateBinaryAddF32Computation(self):
+    """Computation (f32, f32) -> f32 that adds its two parameters."""
+    c = self._NewComputation("add_param0_by_param1")
+    c.Add(
+        c.ParameterFromNumpy(NumpyArrayF32(0)),
+        c.ParameterFromNumpy(NumpyArrayF32(0)))
+    return c.Build()
+
+  def _CreateBinaryAddF64Computation(self):
+    """Computation (f64, f64) -> f64 that adds its two parameters."""
+    c = self._NewComputation("add_param0_by_param1")
+    c.Add(
+        c.ParameterFromNumpy(NumpyArrayF64(0)),
+        c.ParameterFromNumpy(NumpyArrayF64(0)))
+    return c.Build()
+
+  def _CreateBinaryDivF32Computation(self):
+    """Computation (f32, f32) -> f32 that divides its two parameters."""
+    c = self._NewComputation("div_param0_by_param1")
+    c.Div(
+        c.ParameterFromNumpy(NumpyArrayF32(0)),
+        c.ParameterFromNumpy(NumpyArrayF32(0)))
+    return c.Build()
+
+  def _CreateBinaryDivF64Computation(self):
+    """Computation (f64, f64) -> f64 that divides its two parameters."""
+    c = self._NewComputation("div_param0_by_param1")
+    c.Div(
+        c.ParameterFromNumpy(NumpyArrayF64(0)),
+        c.ParameterFromNumpy(NumpyArrayF64(0)))
+    return c.Build()
+
+  def _CreateTestF32Lt10Computation(self):
+    """Computation (f32) -> bool that tests if its parameter is less than 10."""
+    c = self._NewComputation("test_f32_lt_10")
+    c.Lt(c.ParameterFromNumpy(NumpyArrayF32(0)), c.ConstantF32Scalar(10.))
+    return c.Build()
+
+  def _CreateTestF64Lt10Computation(self):
+    """Computation (f64) -> bool that tests if its parameter is less than 10."""
+    c = self._NewComputation("test_f64_lt_10")
+    c.Lt(c.ParameterFromNumpy(NumpyArrayF64(0)), c.ConstantF64Scalar(10.))
+    return c.Build()
+
+  def _CreateBinaryGeF32Computation(self):
+    """Computation (f32, f32) -> bool that tests first_param >= second_param."""
+    c = self._NewComputation("param0_lt_param1")
+    c.Ge(c.ParameterFromNumpy(NumpyArrayF32(0)),
+         c.ParameterFromNumpy(NumpyArrayF32(0)))
+    return c.Build()
+
+  def _CreateBinaryGeF64Computation(self):
+    """Computation (f64, f64) -> bool that tests first_param >= second_param."""
+    c = self._NewComputation("param0_lt_param1")
+    c.Ge(c.ParameterFromNumpy(NumpyArrayF64(0)),
+         c.ParameterFromNumpy(NumpyArrayF64(0)))
+    return c.Build()
+
+  def _MakeSample3DArrayF32(self):
+    return NumpyArrayF32([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]],
+                          [[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]])
+
+  def _MakeSample3DArrayF64(self):
+    return NumpyArrayF64([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]],
+                          [[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]])
+
+  def testCallF32(self):
+    c = self._NewComputation()
+    c.Call(
+        self._CreateMulF32By2Computation(),
+        operands=(c.ConstantF32Scalar(5.0),))
+    self._ExecuteAndCompareClose(c, expected=10.0)
+
+  def testCallF64(self):
+    c = self._NewComputation()
+    c.Call(
+        self._CreateMulF64By2Computation(),
+        operands=(c.ConstantF64Scalar(5.0),))
+    self._ExecuteAndCompareClose(c, expected=10.0)
+
+  def testMapEachElementToS32Constant(self):
+    c = self._NewComputation()
+    c.Map([c.Constant(NumpyArrayF32([1.0, 2.0, 3.0, 4.0]))],
+          self._CreateConstantS32Computation(), [0])
+    self._ExecuteAndCompareExact(c, expected=[1, 1, 1, 1])
+
+  def testMapEachElementToS64Constant(self):
+    c = self._NewComputation()
+    c.Map([c.Constant(NumpyArrayF64([1.0, 2.0, 3.0, 4.0]))],
+          self._CreateConstantS64Computation(), [0])
+    self._ExecuteAndCompareExact(c, expected=[1, 1, 1, 1])
+
+  def testMapMulBy2F32(self):
+    c = self._NewComputation()
+    c.Map([c.Constant(NumpyArrayF32([1.0, 2.0, 3.0, 4.0]))],
+          self._CreateMulF32By2Computation(), [0])
+    self._ExecuteAndCompareClose(c, expected=[2.0, 4.0, 6.0, 8.0])
+
+  def testMapMulBy2F64(self):
+    c = self._NewComputation()
+    c.Map([c.Constant(NumpyArrayF64([1.0, 2.0, 3.0, 4.0]))],
+          self._CreateMulF64By2Computation(), [0])
+    self._ExecuteAndCompareClose(c, expected=[2.0, 4.0, 6.0, 8.0])
+
+  def testSimpleMapChainF32(self):
+    # Chains a map of constant-f32 with a map of mul-by-2
+    c = self._NewComputation()
+    const_f32 = c.Map([c.Constant(NumpyArrayF32([1.0, 2.0, 3.0, 4.0]))],
+                      self._CreateConstantF32Computation(), [0])
+    c.Map([const_f32], self._CreateMulF32By2Computation(), [0])
+    self._ExecuteAndCompareClose(c, expected=[2.0, 2.0, 2.0, 2.0])
+
+  def testSimpleMapChainF64(self):
+    # Chains a map of constant-f64 with a map of mul-by-2
+    c = self._NewComputation()
+    const_f64 = c.Map([c.Constant(NumpyArrayF64([1.0, 2.0, 3.0, 4.0]))],
+                      self._CreateConstantF64Computation(), [0])
+    c.Map([const_f64], self._CreateMulF64By2Computation(), [0])
+    self._ExecuteAndCompareClose(c, expected=[2.0, 2.0, 2.0, 2.0])
+
+  def testDivVectorsWithMapF32(self):
+    c = self._NewComputation()
+    c.Map((c.Constant(NumpyArrayF32([1.0, 2.0, 3.0, 4.0])),
+           c.Constant(NumpyArrayF32([5.0, 5.0, 4.0, 4.0]))),
+          self._CreateBinaryDivF32Computation(), [0])
+    self._ExecuteAndCompareClose(c, expected=[0.2, 0.4, 0.75, 1.0])
+
+  def testDivVectorsWithMapF64(self):
+    c = self._NewComputation()
+    c.Map((c.Constant(NumpyArrayF64([1.0, 2.0, 3.0, 4.0])),
+           c.Constant(NumpyArrayF64([5.0, 5.0, 4.0, 4.0]))),
+          self._CreateBinaryDivF64Computation(), [0])
+    self._ExecuteAndCompareClose(c, expected=[0.2, 0.4, 0.75, 1.0])
+
+  def testSelectAndScatterF32(self):
+    c = self._NewComputation()
+    c.SelectAndScatter(c.Constant(NumpyArrayF32([[1., 2., 6.], [4., 5., 3.]])),
+                       select=self._CreateBinaryGeF32Computation(),
+                       window_dimensions=(2, 1),
+                       window_strides=(1, 2),
+                       padding=xla_client.PaddingType.VALID,
+                       source=c.Constant(NumpyArrayF32([[0.1, 0.2]])),
+                       init_value=c.Constant(NumpyArrayF32(1)),
+                       scatter=self._CreateBinaryAddF32Computation())
+    self._ExecuteAndCompareClose(c, expected=[[1., 1., 1.2], [1.1, 1., 1.]])
+
+  def testSelectAndScatterF64(self):
+    c = self._NewComputation()
+    c.SelectAndScatter(c.Constant(NumpyArrayF64([[1., 2., 6.], [4., 5., 3.]])),
+                       select=self._CreateBinaryGeF64Computation(),
+                       window_dimensions=(2, 1),
+                       window_strides=(1, 2),
+                       padding=xla_client.PaddingType.VALID,
+                       source=c.Constant(NumpyArrayF64([[0.1, 0.2]])),
+                       init_value=c.Constant(NumpyArrayF64(1)),
+                       scatter=self._CreateBinaryAddF64Computation())
+    self._ExecuteAndCompareClose(c, expected=[[1., 1., 1.2], [1.1, 1., 1.]])
+
+  def testReduce1DtoScalarF32(self):
+    c = self._NewComputation()
+    c.Reduce(
+        operand=c.Constant(NumpyArrayF32([1.0, 2.0, 3.0, 4.0])),
+        init_value=c.ConstantF32Scalar(0),
+        computation_to_apply=self._CreateBinaryAddF32Computation(),
+        dimensions=[0])
+    self._ExecuteAndCompareClose(c, expected=10)
+
+  def testReduce1DtoScalarF64(self):
+    c = self._NewComputation()
+    c.Reduce(
+        operand=c.Constant(NumpyArrayF64([1.0, 2.0, 3.0, 4.0])),
+        init_value=c.ConstantF64Scalar(0),
+        computation_to_apply=self._CreateBinaryAddF64Computation(),
+        dimensions=[0])
+    self._ExecuteAndCompareClose(c, expected=10)
+
+  def testReduce2DTo1DDim0F32(self):
+    input_array = NumpyArrayF32([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+    c = self._NewComputation()
+    c.Reduce(
+        operand=c.Constant(input_array),
+        init_value=c.ConstantF32Scalar(0),
+        computation_to_apply=self._CreateBinaryAddF32Computation(),
+        dimensions=[0])
+    self._ExecuteAndCompareClose(c, expected=[5, 7, 9])
+
+  def testReduce2DTo1DDim0F64(self):
+    input_array = NumpyArrayF64([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+    c = self._NewComputation()
+    c.Reduce(
+        operand=c.Constant(input_array),
+        init_value=c.ConstantF64Scalar(0),
+        computation_to_apply=self._CreateBinaryAddF64Computation(),
+        dimensions=[0])
+    self._ExecuteAndCompareClose(c, expected=[5, 7, 9])
+
+  def testReduce2DTo1DDim1F32(self):
+    input_array = NumpyArrayF32([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+    c = self._NewComputation()
+    c.Reduce(
+        operand=c.Constant(input_array),
+        init_value=c.ConstantF32Scalar(0),
+        computation_to_apply=self._CreateBinaryAddF32Computation(),
+        dimensions=[1])
+    self._ExecuteAndCompareClose(c, expected=[6, 15])
+
+  def testReduce2DTo1DDim1F64(self):
+    input_array = NumpyArrayF64([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+    c = self._NewComputation()
+    c.Reduce(
+        operand=c.Constant(input_array),
+        init_value=c.ConstantF64Scalar(0),
+        computation_to_apply=self._CreateBinaryAddF64Computation(),
+        dimensions=[1])
+    self._ExecuteAndCompareClose(c, expected=[6, 15])
+
+  def testReduce3DAllPossibleWaysF32(self):
+    input_array = self._MakeSample3DArrayF32()
+
+    def _ReduceAndTest(*dims):
+      c = self._NewComputation()
+      c.Reduce(
+          operand=c.Constant(input_array),
+          init_value=c.ConstantF32Scalar(0),
+          computation_to_apply=self._CreateBinaryAddF32Computation(),
+          dimensions=dims)
+      self._ExecuteAndCompareClose(
+          c, expected=np.sum(input_array, axis=tuple(dims)))
+
+    _ReduceAndTest(0)
+    _ReduceAndTest(0)
+    _ReduceAndTest(0, 1)
+    _ReduceAndTest(0, 2)
+    _ReduceAndTest(1, 2)
+    _ReduceAndTest(0, 1, 2)
+
+  def testReduce3DAllPossibleWaysF64(self):
+    input_array = self._MakeSample3DArrayF64()
+
+    def _ReduceAndTest(*dims):
+      c = self._NewComputation()
+      c.Reduce(
+          operand=c.Constant(input_array),
+          init_value=c.ConstantF64Scalar(0),
+          computation_to_apply=self._CreateBinaryAddF64Computation(),
+          dimensions=dims)
+      self._ExecuteAndCompareClose(
+          c, expected=np.sum(input_array, axis=tuple(dims)))
+
+    _ReduceAndTest(0)
+    _ReduceAndTest(0)
+    _ReduceAndTest(0, 1)
+    _ReduceAndTest(0, 2)
+    _ReduceAndTest(1, 2)
+    _ReduceAndTest(0, 1, 2)
+
+  def testReduceWindowValidUnitStridesF32(self):
+    input_array = NumpyArrayF32([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+    c = self._NewComputation()
+    c.ReduceWindow(operand=c.Constant(input_array),
+                   init_value=c.ConstantF32Scalar(0),
+                   computation_to_apply=self._CreateBinaryAddF32Computation(),
+                   window_dimensions=(2, 1), window_strides=(1, 1),
+                   padding=xla_client.PaddingType.VALID)
+    self._ExecuteAndCompareClose(c, expected=[[5., 7., 9.]])
+
+  def testReduceWindowSameUnitStridesF32(self):
+    input_array = NumpyArrayF32([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+    c = self._NewComputation()
+    c.ReduceWindow(operand=c.Constant(input_array),
+                   init_value=c.ConstantF32Scalar(0),
+                   computation_to_apply=self._CreateBinaryAddF32Computation(),
+                   window_dimensions=(2, 1), window_strides=(1, 1),
+                   padding=xla_client.PaddingType.SAME)
+    self._ExecuteAndCompareClose(c, expected=[[5., 7., 9.], [4., 5., 6.]])
+
+  def testReduceWindowValidGeneralStridesF32(self):
+    input_array = NumpyArrayF32([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+    c = self._NewComputation()
+    c.ReduceWindow(operand=c.Constant(input_array),
+                   init_value=c.ConstantF32Scalar(0),
+                   computation_to_apply=self._CreateBinaryAddF32Computation(),
+                   window_dimensions=(2, 1), window_strides=(1, 2),
+                   padding=xla_client.PaddingType.VALID)
+    self._ExecuteAndCompareClose(c, expected=[[5., 9.]])
+
+  def testReduceWindowValidUnitStridesF64(self):
+    input_array = NumpyArrayF64([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+    c = self._NewComputation()
+    c.ReduceWindow(operand=c.Constant(input_array),
+                   init_value=c.ConstantF64Scalar(0),
+                   computation_to_apply=self._CreateBinaryAddF64Computation(),
+                   window_dimensions=(2, 1), window_strides=(1, 1),
+                   padding=xla_client.PaddingType.VALID)
+    self._ExecuteAndCompareClose(c, expected=[[5., 7., 9.]])
+
+  def testReduceWindowSameUnitStridesF64(self):
+    input_array = NumpyArrayF64([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+    c = self._NewComputation()
+    c.ReduceWindow(operand=c.Constant(input_array),
+                   init_value=c.ConstantF64Scalar(0),
+                   computation_to_apply=self._CreateBinaryAddF64Computation(),
+                   window_dimensions=(2, 1), window_strides=(1, 1),
+                   padding=xla_client.PaddingType.SAME)
+    self._ExecuteAndCompareClose(c, expected=[[5., 7., 9.], [4., 5., 6.]])
+
+  def testReduceWindowValidGeneralStridesF64(self):
+    input_array = NumpyArrayF64([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+    c = self._NewComputation()
+    c.ReduceWindow(operand=c.Constant(input_array),
+                   init_value=c.ConstantF64Scalar(0),
+                   computation_to_apply=self._CreateBinaryAddF64Computation(),
+                   window_dimensions=(2, 1), window_strides=(1, 2),
+                   padding=xla_client.PaddingType.VALID)
+    self._ExecuteAndCompareClose(c, expected=[[5., 9.]])
+
+  def testWhileF32(self):
+    cond = self._CreateTestF32Lt10Computation()
+    body = self._CreateMulF32By2Computation()
+    c = self._NewComputation()
+    init = c.ConstantF32Scalar(1.)
+    c.While(cond, body, init)
+    self._ExecuteAndCompareClose(c, expected=16.)
+
+  def testWhileF64(self):
+    cond = self._CreateTestF64Lt10Computation()
+    body = self._CreateMulF64By2Computation()
+    c = self._NewComputation()
+    init = c.ConstantF64Scalar(1.)
+    c.While(cond, body, init)
+    self._ExecuteAndCompareClose(c, expected=16.)
+
+  def testInfeedS32Values(self):
+    to_infeed = NumpyArrayS32([1, 2, 3, 4])
+    c = self._NewComputation()
+    c.Infeed(xla_client.Shape.from_numpy(to_infeed[0]))
+    compiled_c = c.Build().CompileWithExampleArguments()
+    for item in to_infeed:
+      xla_client.transfer_to_infeed(item)
+
+    for item in to_infeed:
+      result = compiled_c.Execute()
+      self.assertEqual(result, item)
+
+  def testInfeedThenOutfeedS32(self):
+    to_round_trip = NumpyArrayS32([1, 2, 3, 4])
+    c = self._NewComputation()
+    x = c.Infeed(xla_client.Shape.from_numpy(to_round_trip[0]))
+    c.Outfeed(x)
+
+    compiled_c = c.Build().CompileWithExampleArguments()
+
+    for want in to_round_trip:
+      execution = threading.Thread(target=compiled_c.Execute)
+      execution.start()
+      xla_client.transfer_to_infeed(want)
+      got = xla_client.transfer_from_outfeed(
+          xla_client.Shape.from_numpy(to_round_trip[0]))
+      execution.join()
+      self.assertEqual(want, got)
+
+
+class ErrorTest(LocalComputationTest):
+
+  def setUp(self):
+    self.f32_scalar_2 = NumpyArrayF32(2.0)
+    self.s32_scalar_2 = NumpyArrayS32(2)
+
+  def testInvokeWithWrongElementType(self):
+    c = self._NewComputation()
+    c.SetOpMetadata(xla_client.CurrentSourceInfoMetadata())
+    c.ParameterFromNumpy(self.s32_scalar_2)
+    c.ClearOpMetadata()
+    self.assertRaisesRegexp(
+        RuntimeError, r"Invalid argument shape.*xla_client_test.py.*"
+        r"expected s32\[\], got f32\[\]",
+        lambda: c.Build().CompileWithExampleArguments([self.f32_scalar_2]))
+
+
+if __name__ == "__main__":
+  unittest.main()
diff --git a/tensorflow/compiler/xla/reference_util.cc b/tensorflow/compiler/xla/reference_util.cc
index 5bb81b80dde4c6d9324d33ddd5d6b6d6ad3cc1ac..a9acdae380af5b7f9efb3d08302fc717108f5e40 100644
--- a/tensorflow/compiler/xla/reference_util.cc
+++ b/tensorflow/compiler/xla/reference_util.cc
@@ -195,14 +195,26 @@ ReferenceUtil::ReduceWindow1DGeneric(
     const tensorflow::gtl::ArraySlice<int64>& window,
     const tensorflow::gtl::ArraySlice<int64>& stride, Padding padding) {
   std::vector<int64> dim_lengths{static_cast<int64>(operand.size())};
-  auto padding_both = xla::MakePadding(dim_lengths, window, stride, padding);
+  return ReduceWindow1DGeneric(
+      operand, init, reduce_func, window, stride,
+      xla::MakePadding(dim_lengths, window, stride, padding));
+}
 
+/* static  */ std::unique_ptr<std::vector<float>>
+ReferenceUtil::ReduceWindow1DGeneric(
+    const tensorflow::gtl::ArraySlice<float>& operand, float init,
+    const std::function<float(float, float)>& reduce_func,
+    const tensorflow::gtl::ArraySlice<int64>& window,
+    const tensorflow::gtl::ArraySlice<int64>& stride,
+    const tensorflow::gtl::ArraySlice<std::pair<int64, int64>>& padding) {
+  std::vector<int64> dim_lengths{static_cast<int64>(operand.size())};
   std::vector<int64> window_counts(window.size(), 0);
   std::vector<int64> pad_low(window.size(), 0);
   for (int64 i = 0; i < window.size(); ++i) {
+    int64 padded_width = padding[i].first + dim_lengths[i] + padding[i].second;
     window_counts[i] =
-        WindowCount(dim_lengths[i], window[i], stride[i], padding);
-    pad_low[i] = padding_both[i].first;
+        window_util::StridedBound(padded_width, window[i], stride[i]);
+    pad_low[i] = padding[i].first;
   }
   auto result = MakeUnique<std::vector<float>>(window_counts[0]);
 
@@ -269,6 +281,51 @@ ReferenceUtil::ReduceWindow1DAdd(
   return result;
 }
 
+/* static  */ std::unique_ptr<Array3D<float>> ReferenceUtil::ReduceWindow3DAdd(
+    const Array3D<float>& operand, float init,
+    const tensorflow::gtl::ArraySlice<int64>& window,
+    const tensorflow::gtl::ArraySlice<int64>& stride, Padding padding) {
+  std::vector<int64> dim_lengths{operand.n1(), operand.n2(), operand.n3()};
+  auto padding_both = xla::MakePadding(dim_lengths, window, stride, padding);
+
+  std::vector<int64> window_counts(window.size(), 0);
+  std::vector<int64> pad_low(window.size(), 0);
+  for (int64 i = 0; i < window.size(); ++i) {
+    window_counts[i] =
+        WindowCount(dim_lengths[i], window[i], stride[i], padding);
+    pad_low[i] = padding_both[i].first;
+  }
+  auto result = MakeUnique<Array3D<float>>(window_counts[0], window_counts[1],
+                                           window_counts[2]);
+
+  for (int64 i0 = 0; i0 < window_counts[0]; ++i0) {
+    for (int64 i1 = 0; i1 < window_counts[1]; ++i1) {
+      for (int64 i2 = 0; i2 < window_counts[2]; ++i2) {
+        int64 i0_base = i0 * stride[0] - pad_low[0];
+        int64 i1_base = i1 * stride[1] - pad_low[1];
+        int64 i2_base = i2 * stride[2] - pad_low[2];
+
+        float val = init;
+        for (int64 i0_win = 0; i0_win < window[0]; ++i0_win) {
+          for (int64 i1_win = 0; i1_win < window[1]; ++i1_win) {
+            for (int64 i2_win = 0; i2_win < window[2]; ++i2_win) {
+              if (i0_base + i0_win >= 0 && i1_base + i1_win >= 0 &&
+                  i2_base + i2_win >= 0 && i0_base + i0_win < operand.n1() &&
+                  i1_base + i1_win < operand.n2() &&
+                  i2_base + i2_win < operand.n3()) {
+                val += operand(i0_base + i0_win, i1_base + i1_win,
+                               i2_base + i2_win);
+              }
+            }
+          }
+        }
+        (*result)(i0, i1, i2) = val;
+      }
+    }
+  }
+  return result;
+}
+
 /* static */ std::unique_ptr<Array4D<float>>
 ReferenceUtil::ReduceWindow4DGeneric(
     const Array4D<float>& operand, float init,
@@ -520,7 +577,7 @@ ReferenceUtil::ConvArray4DGeneralDimensionsDilated(
 
   HloEvaluator evaluator;
   std::unique_ptr<Literal> result_literal =
-      evaluator.Evaluate(*computation, {}).ConsumeValueOrDie();
+      evaluator.Evaluate<const Literal*>(*computation, {}).ConsumeValueOrDie();
 
   CHECK_EQ(ShapeUtil::Rank(result_literal->shape()), 4);
   auto result =
@@ -594,8 +651,12 @@ ReferenceUtil::ReduceToRowArray2D(
                    i2 == 0 || (dim_set.count(2) && i2 < array.n3()); ++i2) {
                 for (int64 i3 = 0;
                      i3 == 0 || (dim_set.count(3) && i3 < array.n4()); ++i3) {
-                  accumulator = reduce_function(
-                      accumulator, array(a0 + i0, a1 + i1, a2 + i2, a3 + i3));
+                  // Handle zero-sized arrays.
+                  if (array.n1() > 0 && array.n2() > 0 && array.n3() > 0 &&
+                      array.n4() > 0) {
+                    accumulator = reduce_function(
+                        accumulator, array(a0 + i0, a1 + i1, a2 + i2, a3 + i3));
+                  }
                 }
               }
             }
diff --git a/tensorflow/compiler/xla/reference_util.h b/tensorflow/compiler/xla/reference_util.h
index 62d455d71a70407e903a1e0be42a7e9f1898e523..3ec96f2f38b8f91e1549419b60481327fa9bbd5f 100644
--- a/tensorflow/compiler/xla/reference_util.h
+++ b/tensorflow/compiler/xla/reference_util.h
@@ -70,7 +70,7 @@ class ReferenceUtil {
   // dilation factors.
   static std::unique_ptr<Array4D<float>> ConvArray4DGeneralDimensionsDilated(
       const Array4D<float>& lhs, const Array4D<float>& rhs,
-      std::pair<int64, int64> stride, Padding padding,
+      std::pair<int64, int64> kernel_stride, Padding padding,
       std::pair<int64, int64> lhs_dilation,
       std::pair<int64, int64> rhs_dilation, ConvolutionDimensionNumbers dnums);
 
@@ -173,6 +173,10 @@ class ReferenceUtil {
       const Array2D<float>& operand, float init,
       const tensorflow::gtl::ArraySlice<int64>& window,
       const tensorflow::gtl::ArraySlice<int64>& stride, Padding padding);
+  static std::unique_ptr<Array3D<float>> ReduceWindow3DAdd(
+      const Array3D<float>& operand, float init,
+      const tensorflow::gtl::ArraySlice<int64>& window,
+      const tensorflow::gtl::ArraySlice<int64>& stride, Padding padding);
   static std::unique_ptr<Array4D<float>> ReduceWindow4DAdd(
       const Array4D<float>& operand, float init,
       const tensorflow::gtl::ArraySlice<int64>& window,
@@ -184,11 +188,18 @@ class ReferenceUtil {
       const std::function<float(float, float)>& reduce_func,
       const tensorflow::gtl::ArraySlice<int64>& window,
       const tensorflow::gtl::ArraySlice<int64>& stride, Padding padding);
+  static std::unique_ptr<std::vector<float>> ReduceWindow1DGeneric(
+      const tensorflow::gtl::ArraySlice<float>& operand, float init,
+      const std::function<float(float, float)>& reduce_func,
+      const tensorflow::gtl::ArraySlice<int64>& window,
+      const tensorflow::gtl::ArraySlice<int64>& stride,
+      const tensorflow::gtl::ArraySlice<std::pair<int64, int64>>& padding);
   static std::unique_ptr<Array4D<float>> ReduceWindow4DGeneric(
       const Array4D<float>& operand, float init,
       const std::function<float(float, float)>& reduce_func,
       const tensorflow::gtl::ArraySlice<int64>& window,
       const tensorflow::gtl::ArraySlice<int64>& stride, Padding padding);
+  // With arbitrary padding.
   static std::unique_ptr<Array4D<float>> ReduceWindow4DGeneric(
       const Array4D<float>& operand, float init,
       const std::function<float(float, float)>& reduce_func,
diff --git a/tensorflow/compiler/xla/reference_util_test.cc b/tensorflow/compiler/xla/reference_util_test.cc
index 846ccdc83df900e3afedb6ababe07ebb1bd68f41..9da9bc60a2025e63b57a3be9ed360d150f88d73c 100644
--- a/tensorflow/compiler/xla/reference_util_test.cc
+++ b/tensorflow/compiler/xla/reference_util_test.cc
@@ -86,6 +86,13 @@ TEST_F(ReferenceUtilTest, ReduceToRowArray2D) {
                                        ErrorSpec(0.0001));
 }
 
+TEST_F(ReferenceUtilTest, Reduce4Dto1DZeroSizedArray) {
+  auto result = Literal::CreateR1<float>(ReferenceUtil::Reduce4DTo1D(
+      Array4D<float>(1, 0, 1, 1), /*init=*/0, /*dims=*/{0, 1, 2},
+      [](float a, float b) { return a + b; }));
+  LiteralTestUtil::ExpectR1Equal<float>({0}, *result);
+}
+
 TEST_F(ReferenceUtilTest, MapArray2D) {
   auto identity = [](float value) { return log(exp(value)); };
   auto result = ReferenceUtil::MapArray2D(*matrix_, identity);
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index d3175c1e49974b060cc495d463d4995c925abcf7..71341c6f1e9a359a6d2a8aa9f2fb97b140ade23d 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -108,6 +108,7 @@ tf_cc_test(
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:reference_util",
         "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:status",
         "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:test",
@@ -115,6 +116,7 @@ tf_cc_test(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/compiler/xla/client:computation_builder",
+        "//tensorflow/compiler/xla/service:hlo_element_type_converter",
         "//tensorflow/compiler/xla/tests:hlo_verified_test_base",
         "//tensorflow/compiler/xla/tests:literal_test_util",
         "//tensorflow/compiler/xla/tests:xla_internal_test_main",  # fixdeps: keep
@@ -1009,9 +1011,9 @@ tf_cc_test(
 )
 
 cc_library(
-    name = "batchnorm_rewriter",
-    srcs = ["batchnorm_rewriter.cc"],
-    hdrs = ["batchnorm_rewriter.h"],
+    name = "batchnorm_expander",
+    srcs = ["batchnorm_expander.cc"],
+    hdrs = ["batchnorm_expander.h"],
     deps = [
         ":hlo",
         ":hlo_pass",
@@ -1029,11 +1031,11 @@ cc_library(
 )
 
 tf_cc_test(
-    name = "batchnorm_rewriter_test",
+    name = "batchnorm_expander_test",
     size = "small",
-    srcs = ["batchnorm_rewriter_test.cc"],
+    srcs = ["batchnorm_expander_test.cc"],
     deps = [
-        ":batchnorm_rewriter",
+        ":batchnorm_expander",
         ":hlo",
         ":hlo_matchers",
         ":hlo_pass",
@@ -1099,6 +1101,8 @@ cc_library(
         ":hlo",
         ":hlo_evaluator",
         ":hlo_pass",
+        ":tuple_util",
+        ":while_util",
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/core:lib",
     ],
@@ -1143,6 +1147,21 @@ tf_cc_test(
     ],
 )
 
+cc_library(
+    name = "dot_decomposer",
+    srcs = ["dot_decomposer.cc"],
+    hdrs = ["dot_decomposer.h"],
+    deps = [
+        ":hlo",
+        ":hlo_pass",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:status_macros",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/core:lib",
+    ],
+)
+
 cc_library(
     name = "tuple_simplifier",
     srcs = ["tuple_simplifier.cc"],
@@ -1663,6 +1682,7 @@ tf_cc_test(
         ":hlo",
         ":hlo_graph_dumper",
         ":hlo_matchers",
+        ":hlo_runner",
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:test",
@@ -1670,7 +1690,6 @@ tf_cc_test(
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/compiler/xla/legacy_flags:debug_options_flags",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
-        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:test",
     ],
 )
@@ -1703,6 +1722,22 @@ cc_library(
     ],
 )
 
+tf_cc_test(
+    name = "hlo_verifier_test",
+    srcs = ["hlo_verifier_test.cc"],
+    deps = [
+        ":hlo",
+        ":hlo_verifier",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:test",
+    ],
+)
+
 cc_library(
     name = "hlo_rematerialization",
     srcs = ["hlo_rematerialization.cc"],
@@ -1889,6 +1924,22 @@ tf_cc_test(
     ],
 )
 
+cc_library(
+    name = "hlo_element_type_converter",
+    srcs = ["hlo_element_type_converter.cc"],
+    hdrs = ["hlo_element_type_converter.h"],
+    deps = [
+        ":hlo",
+        ":hlo_evaluator",
+        ":hlo_pass",
+        ":hlo_query",
+        "//tensorflow/compiler/xla:literal_util",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/core:lib",
+    ],
+)
+
 cc_library(
     name = "device_memory_allocator",
     srcs = ["device_memory_allocator.cc"],
@@ -2021,6 +2072,7 @@ cc_library(
         "//tensorflow/compiler/xla:window_util",
         "//tensorflow/compiler/xla:xla_proto",
         "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
         "//tensorflow/core:regexp_internal",
     ],
     alwayslink = 1,
@@ -2074,6 +2126,41 @@ tf_cc_test(
     ],
 )
 
+cc_library(
+    name = "zero_sized_hlo_elimination",
+    srcs = ["zero_sized_hlo_elimination.cc"],
+    hdrs = ["zero_sized_hlo_elimination.h"],
+    deps = [
+        ":hlo",
+        ":hlo_pass",
+        "//tensorflow/compiler/xla:literal_util",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:status_macros",
+        "//tensorflow/compiler/xla:util",
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_cc_test(
+    name = "zero_sized_hlo_elimination_test",
+    srcs = ["zero_sized_hlo_elimination_test.cc"],
+    deps = [
+        ":hlo",
+        ":shape_inference",
+        ":zero_sized_hlo_elimination",
+        "//tensorflow/compiler/xla:literal_util",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:status_macros",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla:test_helpers",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/client:computation_builder",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:lib",
+    ],
+)
+
 cc_library(
     name = "pool",
     hdrs = ["pool.h"],
@@ -2170,6 +2257,78 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "tuple_util",
+    srcs = ["tuple_util.cc"],
+    hdrs = ["tuple_util.h"],
+    deps = [
+        ":hlo",
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_cc_test(
+    name = "tuple_util_test",
+    srcs = ["tuple_util_test.cc"],
+    deps = [
+        ":tuple_util",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla/service:hlo_matchers",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/compiler/xla/tools/parser:hlo_parser",
+    ],
+)
+
+cc_library(
+    name = "while_util",
+    srcs = ["while_util.cc"],
+    hdrs = ["while_util.h"],
+    deps = [
+        ":call_inliner",
+        ":hlo",
+        ":tuple_util",
+    ],
+)
+
+tf_cc_test(
+    name = "while_util_test",
+    srcs = ["while_util_test.cc"],
+    deps = [
+        ":while_util",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla/service:hlo_matchers",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/compiler/xla/tools/parser:hlo_parser",
+    ],
+)
+
+cc_library(
+    name = "while_loop_invariant_code_motion",
+    srcs = ["while_loop_invariant_code_motion.cc"],
+    hdrs = ["while_loop_invariant_code_motion.h"],
+    deps = [
+        ":hlo",
+        ":hlo_pass",
+        ":tuple_util",
+        ":while_util",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla:util",
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_cc_test(
+    name = "while_loop_invariant_code_motion_test",
+    srcs = ["while_loop_invariant_code_motion_test.cc"],
+    deps = [
+        ":hlo_matchers",
+        ":while_loop_invariant_code_motion",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla/tests:hlo_verified_test_base",
+        "//tensorflow/core:test",
+    ],
+)
+
 # -----------------------------------------------------------------------------
 
 filegroup(
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
index 71491218aa221cb26ea45f288ddc47173a15df3f..90a3f0b6748fc00c9cd9226700805bf243a1acdd 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
@@ -193,6 +193,33 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault {
         enable_dot_strength_reduction_(enable_dot_strength_reduction),
         enable_conv_simplification_(enable_conv_simplification) {}
 
+  // Transforms Dots where at least one input is a vector or has a degenerate
+  // dimension and converts it into a multiply and reduce. This should enable
+  // more fusion than leaving the nodes as Dot operations.
+  StatusOr<bool> HandleDotStrengthReduction(HloInstruction* dot);
+
+  // Reshapes an instruction to rank 1 if it is not already rank 1.
+  HloInstruction* Flatten(HloInstruction* hlo) {
+    if (ShapeUtil::Rank(hlo->shape()) == 1) {
+      return hlo;
+    }
+    return computation_->AddInstruction(HloInstruction::CreateReshape(
+        ShapeUtil::MakeShape(hlo->shape().element_type(),
+                             {ShapeUtil::ElementsIn(hlo->shape())}),
+        hlo));
+  }
+
+  // Helper method to perform and add reduction in a single dimension.
+  HloInstruction* AddReduce(HloInstruction* hlo, int64 dim) {
+    HloInstruction* zero = computation_->AddInstruction(
+        HloInstruction::CreateConstant(Literal::CreateR0(0.0f)));
+    HloComputation* AddReduce_computation = CreateScalarBinaryComputation(
+        computation_->parent(), F32, HloOpcode::kAdd);
+    Shape shape = ShapeUtil::DeleteDimension(dim, hlo->shape());
+    return computation_->AddInstruction(HloInstruction::CreateReduce(
+        shape, hlo, zero, {dim}, AddReduce_computation));
+  }
+
   // Convenience method for replacing an instruction with a bitcast.
   void ReplaceWithBitcast(HloInstruction* instruction);
 
@@ -252,6 +279,11 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault {
     return Status::OK();
   }
 
+  StatusOr<HloInstruction*> OptimizeDotOfConcat(HloInstruction* dot);
+  StatusOr<HloInstruction*> OptimizeDotOfConcatHelper(
+      const Shape& dot_shape, HloInstruction* lhs, int64 lhs_contracting_dim,
+      HloInstruction* rhs, int64 rhs_contracting_dim, bool swapped);
+
   // Current HloComputation instance the AlgebraicSimplifierVisitor is
   // traversing.
   HloComputation* computation_;
@@ -329,6 +361,39 @@ Status AlgebraicSimplifierVisitor::HandleAdd(HloInstruction* add) {
     return Status::OK();
   }
 
+  // Canonicalization: Put constants on the right.  This makes the reassociation
+  // rules below simpler.
+  VLOG(10) << "trying transform [Const + A => A + Const]";
+  if (lhs->IsConstant() && !rhs->IsConstant()) {
+    return ReplaceWithNewInstruction(
+        add,
+        HloInstruction::CreateBinary(add->shape(), HloOpcode::kAdd, rhs, lhs));
+  }
+
+  // Reassociate to allow constant folding.
+  //
+  // Note: This is not general.  For example, we won't reassociate
+  //
+  //   (A + C1) + (B + C2) =>  A + B + (C1 + C2).
+  //
+  VLOG(10) << "trying transform [(A + C1) + C2 => A + (C1 + C2)]";
+  if (rhs->IsConstant() && lhs->opcode() == HloOpcode::kAdd &&
+      !lhs->operand(0)->IsConstant() && lhs->operand(1)->IsConstant()) {
+    auto* c1 = lhs->mutable_operand(1);
+    auto* c2 = rhs;
+    TF_ASSIGN_OR_RETURN(
+        Shape sum_of_constants_shape,
+        ShapeInference::InferBinaryOpShape(HloOpcode::kAdd, c1, c2));
+
+    auto* sum_of_constants =
+        computation_->AddInstruction(HloInstruction::CreateBinary(
+            sum_of_constants_shape, HloOpcode::kAdd, c1, c2));
+    return ReplaceWithNewInstruction(
+        add, HloInstruction::CreateBinary(add->shape(), HloOpcode::kAdd,
+                                          lhs->mutable_operand(0),
+                                          sum_of_constants));
+  }
+
   return Status::OK();
 }
 
@@ -433,13 +498,14 @@ static HloInstruction* BuildTupleConstant(HloComputation* computation,
   if (ShapeUtil::IsTuple(literal.shape())) {
     std::vector<HloInstruction*> elems;
     elems.reserve(ShapeUtil::TupleElementCount(literal.shape()));
-    for (const Literal& child : literal.tuple_literals()) {
-      elems.push_back(BuildTupleConstant(computation, child));
+    for (int i = 0; i < ShapeUtil::TupleElementCount(literal.shape()); ++i) {
+      elems.push_back(
+          BuildTupleConstant(computation, LiteralView::Create(literal, {i})));
     }
     return computation->AddInstruction(HloInstruction::CreateTuple(elems));
   } else {
     return computation->AddInstruction(
-        HloInstruction::CreateConstant(MakeUnique<Literal>(literal)));
+        HloInstruction::CreateConstant(literal.CloneToUnique()));
   }
 }
 
@@ -462,6 +528,16 @@ Status AlgebraicSimplifierVisitor::HandleSubtract(HloInstruction* sub) {
     return Status::OK();
   }
 
+  // Canonicalize subtraction of a constant to addition.
+  VLOG(10) << "trying transform [A - Const => A + (-Const)]";
+  if (rhs->IsConstant() && !lhs->IsConstant()) {
+    HloInstruction* negative_const = computation_->AddInstruction(
+        HloInstruction::CreateUnary(rhs->shape(), HloOpcode::kNegate, rhs));
+    return ReplaceWithNewInstruction(
+        sub, HloInstruction::CreateBinary(sub->shape(), HloOpcode::kAdd, lhs,
+                                          negative_const));
+  }
+
   return Status::OK();
 }
 
@@ -523,6 +599,23 @@ Status AlgebraicSimplifierVisitor::HandleDivide(HloInstruction* divide) {
     return Status::OK();
   }
 
+  // A / Const => A * (1 / Const)
+  //
+  // (Backends can do this transformation, but generally only if the constant is
+  // a scalar.)
+  if (lhs->opcode() != HloOpcode::kConstant &&
+      rhs->opcode() == HloOpcode::kConstant) {
+    HloInstruction* one =
+        computation_->AddInstruction(HloInstruction::CreateConstant(
+            Literal::One(lhs->shape().element_type()).CloneToUnique()));
+    HloInstruction* inverse =
+        computation_->AddInstruction(HloInstruction::CreateBinary(
+            rhs->shape(), HloOpcode::kDivide, one, rhs));
+    return ReplaceWithNewInstruction(
+        divide, HloInstruction::CreateBinary(
+                    divide->shape(), HloOpcode::kMultiply, lhs, inverse));
+  }
+
   // (A / B) / (C / D)  =>  (A / B)*(D / C) => (A * D) / (B * C)
   if (lhs->opcode() == HloOpcode::kDivide &&
       rhs->opcode() == HloOpcode::kDivide) {
@@ -574,70 +667,72 @@ Status AlgebraicSimplifierVisitor::HandleDivide(HloInstruction* divide) {
   return Status::OK();
 }
 
-Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) {
-  auto lhs = dot->mutable_operand(0);
-  auto rhs = dot->mutable_operand(1);
+StatusOr<bool> AlgebraicSimplifierVisitor::HandleDotStrengthReduction(
+    HloInstruction* dot) {
+  HloInstruction* lhs = dot->mutable_operand(0);
+  HloInstruction* rhs = dot->mutable_operand(1);
+  int64 lhs_collapsing_dim =
+      dot->dot_dimension_numbers().lhs_contracting_dimensions(0);
+  if (lhs->IsRank2Transpose()) {
+    lhs = lhs->mutable_operand(0);
+    lhs_collapsing_dim = 1 - lhs_collapsing_dim;
+  }
+  const int64 lhs_kept_dim = 1 - lhs_collapsing_dim;
+
+  int64 rhs_collapsing_dim =
+      dot->dot_dimension_numbers().rhs_contracting_dimensions(0);
+  if (rhs->IsRank2Transpose()) {
+    rhs = rhs->mutable_operand(0);
+    rhs_collapsing_dim = 1 - rhs_collapsing_dim;
+  }
+  const int64 rhs_kept_dim = 1 - rhs_collapsing_dim;
+
+  auto reshape_if_necessary = [&](HloInstruction* hlo) {
+    if (ShapeUtil::SameDimensions(hlo->shape(), dot->shape())) {
+      return hlo;
+    }
+    return computation_->AddInstruction(
+        HloInstruction::CreateReshape(dot->shape(), hlo));
+  };
 
-  // Only optimize F32 dot operations where the dot, rhs and lhs are rank 2 or
-  // below.
-  if (dot->shape().element_type() != F32 || ShapeUtil::Rank(lhs->shape()) > 2 ||
-      ShapeUtil::Rank(rhs->shape()) > 2 || ShapeUtil::Rank(dot->shape()) > 2) {
-    return Status::OK();
-  }
+  auto broadcast_to_dim = [&](HloInstruction* hlo, const Shape& shape,
+                              int64 dim) {
+    return computation_->AddInstruction(
+        HloInstruction::CreateBroadcast(shape, hlo, {dim}));
+  };
 
-  // Replace a zero element dot with a broadcast of the constant 0.
-  if (ShapeUtil::HasZeroElements(dot->shape()) ||
-      ShapeUtil::HasZeroElements(lhs->shape()) ||
-      ShapeUtil::HasZeroElements(rhs->shape())) {
-    auto zero = computation_->AddInstruction(
-        HloInstruction::CreateConstant(Literal::CreateR0(0.0f)));
-    return ReplaceWithNewInstruction(
-        dot, HloInstruction::CreateBroadcast(dot->shape(), zero, {}));
-  }
+  auto multiply = [&](HloInstruction* local_lhs, HloInstruction* local_rhs) {
+    return computation_->AddInstruction(HloInstruction::CreateBinary(
+        local_lhs->shape(), HloOpcode::kMultiply, local_lhs, local_rhs));
+  };
 
-  // Simplify dot(transpose(a), transpose(b)) to transpose(dot(b,a)).
-  if (lhs->IsRank2Transpose() && rhs->IsRank2Transpose()) {
-    auto new_dot = computation_->AddInstruction(HloInstruction::CreateBinary(
-        ShapeUtil::PermuteDimensions({1, 0}, dot->shape()), HloOpcode::kDot,
-        rhs->mutable_operand(0), lhs->mutable_operand(0)));
-    return ReplaceWithNewInstruction(
-        dot, HloInstruction::CreateTranspose(dot->shape(), new_dot, {1, 0}));
+  // Strength reduce dot(a[K] , b[K]) =
+  //  reshape(result.shape,
+  //          reduce_sum(multiply(a, b), {0}))
+  if (ShapeUtil::Rank(rhs->shape()) == 1 &&
+      ShapeUtil::Rank(lhs->shape()) == 1) {
+    TF_RETURN_IF_ERROR(
+        ReplaceInstruction(dot, reshape_if_necessary(AddReduce(
+                                    multiply(Flatten(lhs), Flatten(rhs)), 0))));
+    return true;
   }
 
-  if (!enable_dot_strength_reduction_) {
-    return Status::OK();
+  if (ShapeUtil::IsEffectiveScalar(rhs->shape()) &&
+      ShapeUtil::IsEffectiveScalar(lhs->shape())) {
+    TF_RETURN_IF_ERROR(ReplaceInstruction(
+        dot, reshape_if_necessary(multiply(Flatten(lhs), Flatten(rhs)))));
+    return true;
   }
 
   // Simplify outer product into multiply with implicit broadcasting.
   //
   // A dot(a[M, 1], b[1, N]) = multiply(a [M,1], b [1, N])
-  if (ShapeUtil::Rank(rhs->shape()) == 2 && rhs->shape().dimensions(0) == 1) {
-    return ReplaceWithNewInstruction(
-        dot, HloInstruction::CreateBinary(dot->shape(), HloOpcode::kMultiply,
-                                          lhs, rhs));
-  }
-
-  // The following graph transformations take Dots where at least one input is a
-  // vector or has a degenerate dimension and converts it into a multiply and
-  // reduce. This should enable more fusion than leaving the nodes as Dot
-  // operations.
-
-  // Strength reduce dot(a[K] , b[K]) =
-  //  reshape(result.shape,
-  //          reduce_sum(multiply(a, b), {0}))
-  if (ShapeUtil::Rank(rhs->shape()) == 1 &&
-      ShapeUtil::Rank(lhs->shape()) == 1) {
-    auto multiply = computation_->AddInstruction(HloInstruction::CreateBinary(
-        rhs->shape(), HloOpcode::kMultiply, lhs, rhs));
-    HloComputation* add_reduce_computation = CreateScalarBinaryComputation(
-        computation_->parent(), F32, HloOpcode::kAdd);
-    auto zero = computation_->AddInstruction(
-        HloInstruction::CreateConstant(Literal::CreateR0(0.0f)));
-    auto reduce = computation_->AddInstruction(HloInstruction::CreateReduce(
-        ShapeUtil::MakeShape(dot->shape().element_type(), {}), multiply, zero,
-        {0}, add_reduce_computation));
-    return ReplaceWithNewInstruction(
-        dot, HloInstruction::CreateReshape(dot->shape(), reduce));
+  if (ShapeUtil::Rank(rhs->shape()) == 2 &&
+      rhs->shape().dimensions(rhs_collapsing_dim) == 1) {
+    TF_RETURN_IF_ERROR(ReplaceInstruction(
+        dot, multiply(broadcast_to_dim(Flatten(lhs), dot->shape(), 0),
+                      broadcast_to_dim(Flatten(rhs), dot->shape(), 1))));
+    return true;
   }
 
   // Strength reduce dot(a[1, K], b) =
@@ -648,35 +743,21 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) {
   //      )
   //    )
   if (ShapeUtil::Rank(lhs->shape()) == 1 ||
-      (ShapeUtil::Rank(lhs->shape()) == 2 && lhs->shape().dimensions(0) == 1)) {
-    auto new_lhs = computation_->AddInstruction(HloInstruction::CreateReshape(
-        ShapeUtil::MakeShape(lhs->shape().element_type(),
-                             {ShapeUtil::ElementsIn(lhs->shape())}),
-        lhs));
-    HloComputation* add_reduce_computation = CreateScalarBinaryComputation(
-        computation_->parent(), F32, HloOpcode::kAdd);
-    auto zero = computation_->AddInstruction(
-        HloInstruction::CreateConstant(Literal::CreateR0(0.0f)));
-    HloInstruction* reduce;
+      (ShapeUtil::Rank(lhs->shape()) == 2 &&
+       lhs->shape().dimensions(lhs_kept_dim) == 1)) {
     if (ShapeUtil::Rank(rhs->shape()) == 1) {
-      auto multiply = computation_->AddInstruction(HloInstruction::CreateBinary(
-          rhs->shape(), HloOpcode::kMultiply, new_lhs, rhs));
-      reduce = computation_->AddInstruction(HloInstruction::CreateReduce(
-          ShapeUtil::MakeShape(dot->shape().element_type(), {}), multiply, zero,
-          {0}, add_reduce_computation));
-    } else {
-      new_lhs = computation_->AddInstruction(
-          HloInstruction::CreateBroadcast(rhs->shape(), new_lhs, {0}));
-      auto multiply = computation_->AddInstruction(HloInstruction::CreateBinary(
-          rhs->shape(), HloOpcode::kMultiply, new_lhs, rhs));
-
-      reduce = computation_->AddInstruction(HloInstruction::CreateReduce(
-          ShapeUtil::MakeShape(dot->shape().element_type(),
-                               {rhs->shape().dimensions(1)}),
-          multiply, zero, {0}, add_reduce_computation));
+      TF_RETURN_IF_ERROR(ReplaceInstruction(
+          dot,
+          reshape_if_necessary(AddReduce(multiply(Flatten(lhs), rhs), 0))));
+      return true;
     }
-    return ReplaceWithNewInstruction(
-        dot, HloInstruction::CreateReshape(dot->shape(), reduce));
+    TF_RETURN_IF_ERROR(ReplaceInstruction(
+        dot, reshape_if_necessary(
+                 AddReduce(multiply(broadcast_to_dim(Flatten(lhs), rhs->shape(),
+                                                     rhs_collapsing_dim),
+                                    rhs),
+                           rhs_collapsing_dim))));
+    return true;
   }
 
   // Strength reduce dot(a, b[K, 1]) =
@@ -684,26 +765,208 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) {
   //    reduce_sum(multiply(a, broadcast(reshape([K],b), {1})), {0})
   //  )
   if (ShapeUtil::Rank(rhs->shape()) == 1 ||
-      (ShapeUtil::Rank(rhs->shape()) == 2 && rhs->shape().dimensions(1) == 1)) {
-    auto new_rhs = computation_->AddInstruction(HloInstruction::CreateReshape(
-        ShapeUtil::MakeShape(rhs->shape().element_type(),
-                             {ShapeUtil::ElementsIn(rhs->shape())}),
-        rhs));
-    new_rhs = computation_->AddInstruction(
-        HloInstruction::CreateBroadcast(lhs->shape(), new_rhs, {1}));
-    auto multiply = computation_->AddInstruction(HloInstruction::CreateBinary(
-        lhs->shape(), HloOpcode::kMultiply, lhs, new_rhs));
-    HloComputation* add_reduce_computation = CreateScalarBinaryComputation(
-        computation_->parent(), F32, HloOpcode::kAdd);
+      (ShapeUtil::Rank(rhs->shape()) == 2 &&
+       rhs->shape().dimensions(rhs_kept_dim) == 1)) {
+    TF_RETURN_IF_ERROR(ReplaceInstruction(
+        dot, reshape_if_necessary(AddReduce(
+                 multiply(lhs, broadcast_to_dim(Flatten(rhs), lhs->shape(),
+                                                lhs_collapsing_dim)),
+                 lhs_collapsing_dim))));
+    return true;
+  }
+  return false;
+}
+
+StatusOr<HloInstruction*> AlgebraicSimplifierVisitor::OptimizeDotOfConcat(
+    HloInstruction* dot) {
+  const DotDimensionNumbers& dnums = dot->dot_dimension_numbers();
+  if (dnums.lhs_contracting_dimensions_size() != 1 ||
+      dnums.lhs_batch_dimensions_size() != 0) {
+    return nullptr;
+  }
+
+  const int64 lhs_contracting_dim = dnums.lhs_contracting_dimensions(0);
+  const int64 rhs_contracting_dim = dnums.rhs_contracting_dimensions(0);
+  HloInstruction* lhs = dot->mutable_operand(0);
+  HloInstruction* rhs = dot->mutable_operand(1);
+
+  TF_ASSIGN_OR_RETURN(
+      HloInstruction * optimized_lhs_concat,
+      OptimizeDotOfConcatHelper(dot->shape(), lhs, lhs_contracting_dim, rhs,
+                                rhs_contracting_dim, /*swapped=*/false));
+  if (optimized_lhs_concat) {
+    return optimized_lhs_concat;
+  }
+
+  return OptimizeDotOfConcatHelper(dot->shape(), rhs, rhs_contracting_dim, lhs,
+                                   lhs_contracting_dim, /*swapped=*/true);
+}
+
+StatusOr<HloInstruction*> AlgebraicSimplifierVisitor::OptimizeDotOfConcatHelper(
+    const Shape& dot_shape, HloInstruction* lhs, int64 lhs_contracting_dim,
+    HloInstruction* rhs, int64 rhs_contracting_dim, bool swapped) {
+  bool can_optimize = lhs->opcode() == HloOpcode::kConcatenate &&
+                      lhs->concatenate_dimension() == lhs_contracting_dim &&
+                      rhs->opcode() == HloOpcode::kConstant;
+  if (!can_optimize) {
+    return nullptr;
+  }
+
+  // We're replacing this:
+  //
+  //   +-----+-----+-----+      +-------------------+
+  //   |     |     |     |      |                   |
+  //   |     |     |     |      |        R_0        |
+  //   |     |     |     |      |                   |
+  //   |     |     |     |      +-------------------+
+  //   |     |     |     |      |                   |
+  //   | L_0 | L_1 | L_2 |   *  |        R_1        |
+  //   |     |     |     |      |                   |
+  //   |     |     |     |      +-------------------+
+  //   |     |     |     |      |                   |
+  //   |     |     |     |      |        R_2        |
+  //   |     |     |     |      |                   |
+  //   +-----+-----+-----+      +-------------------+
+  //
+  // with this:
+  //
+  // [Sum over i]
+  //
+  //   +-----+     +-------------------+
+  //   |     |     |                   |
+  //   |     |  *  |        R_i        |
+  //   |     |     |                   |
+  //   |     |     +-------------------+
+  //   |     |
+  //   | L_i |
+  //   |     |
+  //   |     |
+  //   |     |
+  //   |     |
+  //   |     |
+  //   +-----+
+  //
+  // where the LHS is a concatenate operation (so we can "split" the LHS tensor
+  // for free) and the RHS is a constant tensor (and thus can be split at
+  // compile time).  In the future, we may also want to do this when both the
+  // LHS and the RHS are concatenate operations that line up along the dimension
+  // being contracted over.
+  //
+  // We should be able to generalize this transform to work on a non-constant
+  // RHS when/if we have in-place slices or support input-fusing slices into
+  // Dots.
+
+  // Dimension numbers for the new dot instructions we'll create (L_i * R_i in
+  // the diagram above).
+  DotDimensionNumbers new_dot_dnums;
+  new_dot_dnums.add_lhs_contracting_dimensions(swapped ? rhs_contracting_dim
+                                                       : lhs_contracting_dim);
+  new_dot_dnums.add_rhs_contracting_dimensions(swapped ? lhs_contracting_dim
+                                                       : rhs_contracting_dim);
+
+  // Here we use the MKN notation, where the contracted dimension has K
+  // elements and the two non-contracted dimensions have M and N elements.
+  HloInstruction* add_result = nullptr;
+  int64 rhs_contracting_dim_offset = 0;
+  int64 n = rhs->shape().dimensions(1 - rhs_contracting_dim);
+  for (HloInstruction* concat_op : lhs->operands()) {
+    int64 sub_k = concat_op->shape().dimensions(lhs_contracting_dim);
+    Shape rhs_slice_shape(rhs->shape());
+    rhs_slice_shape.set_dimensions(rhs_contracting_dim, sub_k);
+
+    std::array<int64, 2> start_indices;
+    start_indices[rhs_contracting_dim] = rhs_contracting_dim_offset;
+    start_indices[1 - rhs_contracting_dim] = 0;
+
+    std::array<int64, 2> limit_indices;
+    limit_indices[rhs_contracting_dim] = rhs_contracting_dim_offset + sub_k;
+    limit_indices[1 - rhs_contracting_dim] = n;
+
+    HloInstruction* rhs_slice =
+        computation_->AddInstruction(HloInstruction::CreateSlice(
+            rhs_slice_shape, rhs, /*start_indices=*/start_indices,
+            /*limit_indices=*/limit_indices, /*strides=*/{1, 1}));
+
+    // TODO(b/69062148): We can get rid of `swapped` once all backends support
+    // "non-canonical" contraction dimensions (that contracts dimension 1 of the
+    // LHS with dimension 0 of the RHS).  But for now we keep the same
+    // contraction dimensions as the incoming dot operation to ensure the new
+    // dot operations can be lowered.
+    HloInstruction *new_dot_lhs, *new_dot_rhs;
+    if (swapped) {
+      new_dot_lhs = rhs_slice;
+      new_dot_rhs = concat_op;
+    } else {
+      new_dot_lhs = concat_op;
+      new_dot_rhs = rhs_slice;
+    }
+
+    auto* new_dot = computation_->AddInstruction(HloInstruction::CreateDot(
+        dot_shape, new_dot_lhs, new_dot_rhs, new_dot_dnums));
+
+    if (add_result) {
+      add_result = computation_->AddInstruction(HloInstruction::CreateBinary(
+          dot_shape, HloOpcode::kAdd, add_result, new_dot));
+    } else {
+      add_result = new_dot;
+    }
+
+    rhs_contracting_dim_offset += sub_k;
+  }
+
+  return add_result;
+}
+
+Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) {
+  auto lhs = dot->mutable_operand(0);
+  auto rhs = dot->mutable_operand(1);
+
+  // Only optimize F32 dot operations where the dot, rhs and lhs are rank 2 or
+  // below.
+  if (dot->shape().element_type() != F32 || ShapeUtil::Rank(lhs->shape()) > 2 ||
+      ShapeUtil::Rank(rhs->shape()) > 2 || ShapeUtil::Rank(dot->shape()) > 2) {
+    return Status::OK();
+  }
+
+  // Replace a zero element dot with a broadcast of the constant 0.
+  if (ShapeUtil::HasZeroElements(dot->shape()) ||
+      ShapeUtil::HasZeroElements(lhs->shape()) ||
+      ShapeUtil::HasZeroElements(rhs->shape())) {
     auto zero = computation_->AddInstruction(
         HloInstruction::CreateConstant(Literal::CreateR0(0.0f)));
-    auto reduce = computation_->AddInstruction(HloInstruction::CreateReduce(
-        ShapeUtil::MakeShape(dot->shape().element_type(),
-                             {lhs->shape().dimensions(0)}),
-        multiply, zero, {1}, add_reduce_computation));
     return ReplaceWithNewInstruction(
-        dot, HloInstruction::CreateReshape(dot->shape(), reduce));
+        dot, HloInstruction::CreateBroadcast(dot->shape(), zero, {}));
+  }
+
+  TF_ASSIGN_OR_RETURN(HloInstruction * dot_of_concat_optimized,
+                      OptimizeDotOfConcat(dot));
+  if (dot_of_concat_optimized) {
+    VLOG(10) << "Replaced dot(concat(...), constant) with add(dot(..., "
+                "constant)...)";
+    return ReplaceInstruction(dot, dot_of_concat_optimized);
+  }
+
+  if (enable_dot_strength_reduction_ && !is_layout_sensitive_) {
+    TF_ASSIGN_OR_RETURN(bool did_strength_reduction,
+                        HandleDotStrengthReduction(dot));
+    if (did_strength_reduction) {
+      return Status::OK();
+    }
   }
+
+  // Simplify dot(transpose(a), transpose(b)) to transpose(dot(b,a)).
+  if (lhs->IsRank2Transpose() && rhs->IsRank2Transpose()) {
+    DotDimensionNumbers dot_dimension_numbers;
+    dot_dimension_numbers.add_lhs_contracting_dimensions(1);
+    dot_dimension_numbers.add_rhs_contracting_dimensions(0);
+    auto new_dot = computation_->AddInstruction(HloInstruction::CreateDot(
+        ShapeUtil::PermuteDimensions({1, 0}, dot->shape()),
+        rhs->mutable_operand(0), lhs->mutable_operand(0),
+        dot_dimension_numbers));
+    return ReplaceWithNewInstruction(
+        dot, HloInstruction::CreateTranspose(dot->shape(), new_dot, {1, 0}));
+  }
+
   return Status::OK();
 }
 
@@ -980,6 +1243,11 @@ Status AlgebraicSimplifierVisitor::HandleImag(HloInstruction* imag) {
 }
 
 Status AlgebraicSimplifierVisitor::HandlePad(HloInstruction* pad) {
+  if (ShapeUtil::HasZeroElements(pad->operand(0)->shape())) {
+    return ReplaceWithNewInstruction(
+        pad, HloInstruction::CreateBroadcast(pad->shape(),
+                                             pad->mutable_operand(1), {}));
+  }
   // Eliminate nop pads (padding all zero), and replace a pad with negative
   // padding with a pad with non-negative padding followed by a slice.
   bool all_zero = true;
@@ -1120,6 +1388,27 @@ Status AlgebraicSimplifierVisitor::HandlePower(HloInstruction* power) {
         power, HloInstruction::CreateBinary(power->shape(), HloOpcode::kDivide,
                                             broadcast_one, lhs));
   }
+
+  VLOG(10) << "trying transform [pow(pow(A, X), Y) => pow(A, X*Y)]: "
+           << power->ToString();
+
+  // Don't perform this optimization if either of the exponents is complex; this
+  // identity is true only for real-valued exponents.  In addition, we cowardly
+  // refuse to do this transformation if the two expontents have different
+  // element types.
+  if (lhs->opcode() == HloOpcode::kPower &&
+      !ShapeUtil::ElementIsComplex(lhs->operand(1)->shape()) &&
+      !ShapeUtil::ElementIsComplex(rhs->shape()) &&
+      ShapeUtil::SameElementType(lhs->operand(1)->shape(), rhs->shape())) {
+    auto exponent_product =
+        computation_->AddInstruction(HloInstruction::CreateBinary(
+            rhs->shape(), HloOpcode::kMultiply, lhs->mutable_operand(1), rhs));
+    return ReplaceWithNewInstruction(
+        power, HloInstruction::CreateBinary(power->shape(), HloOpcode::kPower,
+                                            lhs->mutable_operand(0),
+                                            exponent_product));
+  }
+
   return Status::OK();
 }
 
@@ -1173,7 +1462,7 @@ StatusOr<bool> AlgebraicSimplifierVisitor::
         ShapeUtil::MakeShapeWithLayout(
             user->shape().element_type(),
             AsInt64Slice(operand->shape().dimensions()),
-            AsInt64Slice(operand->shape().layout().minor_to_major())),
+            LayoutUtil::MinorToMajor(operand->shape())),
         new_user_operands));
     VLOG(4) << "  new user: " << new_user->ToString();
     HloInstruction* new_reshape_or_broadcast = nullptr;
@@ -1183,8 +1472,7 @@ StatusOr<bool> AlgebraicSimplifierVisitor::
               ShapeUtil::MakeShapeWithLayout(
                   user->shape().element_type(),
                   AsInt64Slice(reshape_or_broadcast->shape().dimensions()),
-                  AsInt64Slice(
-                      reshape_or_broadcast->shape().layout().minor_to_major())),
+                  LayoutUtil::MinorToMajor(reshape_or_broadcast->shape())),
               new_user));
     } else {
       TF_RET_CHECK(reshape_or_broadcast->opcode() == HloOpcode::kBroadcast);
@@ -1193,8 +1481,7 @@ StatusOr<bool> AlgebraicSimplifierVisitor::
               ShapeUtil::MakeShapeWithLayout(
                   user->shape().element_type(),
                   AsInt64Slice(reshape_or_broadcast->shape().dimensions()),
-                  AsInt64Slice(
-                      reshape_or_broadcast->shape().layout().minor_to_major())),
+                  LayoutUtil::MinorToMajor(reshape_or_broadcast->shape())),
               new_user, reshape_or_broadcast->dimensions()));
     }
     VLOG(4) << "  new reshape/broadcast: "
@@ -1403,6 +1690,12 @@ Status AlgebraicSimplifierVisitor::HandleReduce(HloInstruction* reduce) {
 
 Status AlgebraicSimplifierVisitor::HandleReduceWindow(
     HloInstruction* reduce_window) {
+  if (ShapeUtil::HasZeroElements(reduce_window->operand(0)->shape())) {
+    return ReplaceWithNewInstruction(
+        reduce_window,
+        HloInstruction::CreateBroadcast(reduce_window->shape(),
+                                        reduce_window->mutable_operand(1), {}));
+  }
   auto operand = reduce_window->mutable_operand(0);
   const Window& window = reduce_window->window();
   auto function = reduce_window->to_apply();
@@ -1473,7 +1766,6 @@ Status AlgebraicSimplifierVisitor::HandleReduceWindow(
 
 Status AlgebraicSimplifierVisitor::HandleTranspose(HloInstruction* transpose) {
   auto operand = transpose->mutable_operand(0);
-
   if (std::is_sorted(transpose->dimensions().begin(),
                      transpose->dimensions().end())) {
     VLOG(10) << "deleting no-op transpose";
@@ -1500,6 +1792,18 @@ Status AlgebraicSimplifierVisitor::HandleConvolution(
     HloInstruction* convolution) {
   auto lhs = convolution->mutable_operand(0);
   auto rhs = convolution->mutable_operand(1);
+  if (ShapeUtil::HasZeroElements(lhs->shape()) ||
+      ShapeUtil::HasZeroElements(rhs->shape())) {
+    return ReplaceWithNewInstruction(
+        convolution,
+        HloInstruction::CreateBroadcast(
+            convolution->shape(),
+            computation_->AddInstruction(HloInstruction::CreateConvert(
+                ShapeUtil::MakeShape(convolution->shape().element_type(), {}),
+                computation_->AddInstruction(
+                    HloInstruction::CreateConstant(Literal::CreateR0(0.0f))))),
+            {}));
+  }
   const auto& window = convolution->window();
   if (!enable_conv_simplification_) {
     return Status::OK();
@@ -1556,15 +1860,15 @@ Status AlgebraicSimplifierVisitor::HandleConvolution(
   // still convert Conv into more efficient Matmul with operand transposition
   // (such as the transposition flags in cuBLAS SGEMM).
   if (!LayoutUtil::Equal(input_shape.layout(), convolution_shape.layout()) ||
-      input_shape.layout().minor_to_major(0) !=
+      LayoutUtil::Minor(input_shape.layout(), 0) !=
           dnums.input_feature_dimension() ||
-      convolution_shape.layout().minor_to_major(0) !=
+      LayoutUtil::Minor(convolution_shape.layout(), 0) !=
           dnums.output_feature_dimension() ||
       // The input feature dimension should come later in the minor-to-major
       // order.
-      (PositionInContainer(filter_shape.layout().minor_to_major(),
+      (PositionInContainer(LayoutUtil::MinorToMajor(filter_shape),
                            dnums.kernel_input_feature_dimension()) <
-       PositionInContainer(filter_shape.layout().minor_to_major(),
+       PositionInContainer(LayoutUtil::MinorToMajor(filter_shape),
                            dnums.kernel_output_feature_dimension()))) {
     return Status::OK();
   }
@@ -1592,18 +1896,15 @@ Status AlgebraicSimplifierVisitor::HandleConvolution(
 
   // We already checked feature_dimension is most minor, so data in input_shape
   // and row-major {conv_width,input_channels} are bitwise identical.
-  const Shape new_input_shape =
-      ShapeUtil::MakeShapeWithMonotonicDim0MajorLayout(
-          input_shape.element_type(), {conv_width, input_channels});
+  const Shape new_input_shape = ShapeUtil::MakeShapeWithDescendingLayout(
+      input_shape.element_type(), {conv_width, input_channels});
   // We already checked input_feature_dimension is more major than
   // output_feature_dimension, so data in filter_shape and row-major
   // {input_channels,output_channels} are bitwise identical.
-  const Shape new_filter_shape =
-      ShapeUtil::MakeShapeWithMonotonicDim0MajorLayout(
-          filter_shape.element_type(), {input_channels, output_channels});
-  const Shape dot_output_shape =
-      ShapeUtil::MakeShapeWithMonotonicDim0MajorLayout(
-          convolution_shape.element_type(), {conv_width, output_channels});
+  const Shape new_filter_shape = ShapeUtil::MakeShapeWithDescendingLayout(
+      filter_shape.element_type(), {input_channels, output_channels});
+  const Shape dot_output_shape = ShapeUtil::MakeShapeWithDescendingLayout(
+      convolution_shape.element_type(), {conv_width, output_channels});
 
   // We cannot insert bitcasts if the layouts will not be compatible.
   // TODO(b/33178038): Consider inserting a transpose if a bitcast would be
@@ -1616,8 +1917,11 @@ Status AlgebraicSimplifierVisitor::HandleConvolution(
 
   auto new_lhs = add_bitcast(new_input_shape, lhs);
   auto new_rhs = add_bitcast(new_filter_shape, rhs);
-  auto dot = computation_->AddInstruction(HloInstruction::CreateBinary(
-      dot_output_shape, HloOpcode::kDot, new_lhs, new_rhs));
+  DotDimensionNumbers dot_dimension_numbers;
+  dot_dimension_numbers.add_lhs_contracting_dimensions(1);
+  dot_dimension_numbers.add_rhs_contracting_dimensions(0);
+  auto dot = computation_->AddInstruction(HloInstruction::CreateDot(
+      dot_output_shape, new_lhs, new_rhs, dot_dimension_numbers));
   return ReplaceInstruction(convolution, add_bitcast(convolution_shape, dot));
 }
 
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
index 56dfb1cf0bc22ed62653d1f0772fdcae58498c27..e7c4dfb0a1690683bbdb7e61067392b48fdba8a5 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
@@ -71,6 +71,55 @@ TEST_F(AlgebraicSimplifierTest, AddZero) {
   EXPECT_EQ(root, param0);
 }
 
+// Test that Const + A is canonicalized to A + Const.
+TEST_F(AlgebraicSimplifierTest, AddConstOnLHS) {
+  Shape r0f32 = ShapeUtil::MakeShape(F32, {});
+  HloComputation::Builder builder(TestName());
+  HloInstruction* param0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, r0f32, "param0"));
+  HloInstruction* constant = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0(42.0f)));
+  builder.AddInstruction(
+      HloInstruction::CreateBinary(r0f32, HloOpcode::kAdd, constant, param0));
+
+  auto module = CreateNewModule();
+  auto computation = module->AddEntryComputation(builder.Build());
+  HloInstruction* root = computation->root_instruction();
+  EXPECT_EQ(root->opcode(), HloOpcode::kAdd);
+  AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
+                                 non_bitcasting_callback());
+  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  root = computation->root_instruction();
+  EXPECT_THAT(root, op::Add(param0, op::Constant()));
+}
+
+// Test that [(A + C1) + C2] => [A + (C1 + C2)] for constants C1 and C2.
+TEST_F(AlgebraicSimplifierTest, AddReassociateMergeConstants) {
+  Shape r0f32 = ShapeUtil::MakeShape(F32, {});
+  HloComputation::Builder builder(TestName());
+  HloInstruction* param0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, r0f32, "param0"));
+  HloInstruction* constant1 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0(42.0f)));
+  HloInstruction* constant2 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0(3.14159f)));
+
+  HloInstruction* add1 = builder.AddInstruction(
+      HloInstruction::CreateBinary(r0f32, HloOpcode::kAdd, param0, constant1));
+  builder.AddInstruction(
+      HloInstruction::CreateBinary(r0f32, HloOpcode::kAdd, add1, constant2));
+
+  auto module = CreateNewModule();
+  auto computation = module->AddEntryComputation(builder.Build());
+  HloInstruction* root = computation->root_instruction();
+  EXPECT_EQ(root->opcode(), HloOpcode::kAdd);
+  AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
+                                 non_bitcasting_callback());
+  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  root = computation->root_instruction();
+  EXPECT_THAT(root, op::Add(param0, op::Add(constant1, constant2)));
+}
+
 TEST_F(AlgebraicSimplifierTest, AddBroadcastZeroR0Operand) {
   Shape r2f32 = ShapeUtil::MakeShape(F32, {3, 2});
   HloComputation::Builder builder(TestName());
@@ -139,6 +188,28 @@ TEST_F(AlgebraicSimplifierTest, SubZero) {
   EXPECT_EQ(root, param0);
 }
 
+// Test that A - Const is canonicalized to A + (-Const).
+TEST_F(AlgebraicSimplifierTest, SubConstCanonicalization) {
+  Shape r0f32 = ShapeUtil::MakeShape(F32, {});
+  HloComputation::Builder builder(TestName());
+  HloInstruction* param0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, r0f32, "param0"));
+  HloInstruction* constant = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(42.0f)));
+  builder.AddInstruction(HloInstruction::CreateBinary(
+      r0f32, HloOpcode::kSubtract, param0, constant));
+
+  auto module = CreateNewModule();
+  auto computation = module->AddEntryComputation(builder.Build());
+  HloInstruction* root = computation->root_instruction();
+  EXPECT_EQ(root->opcode(), HloOpcode::kSubtract);
+  AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
+                                 non_bitcasting_callback());
+  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  root = computation->root_instruction();
+  EXPECT_THAT(root, op::Add(param0, op::Negate(constant)));
+}
+
 // Test that (A/B)/C is simplified to A/(B*C).
 TEST_F(AlgebraicSimplifierTest, LhsDivOfDiv) {
   Shape r0f32 = ShapeUtil::MakeShape(F32, {});
@@ -327,6 +398,78 @@ TEST_F(AlgebraicSimplifierTest, DivOfBroadcastingPower) {
   EXPECT_EQ(0, negate_shape.dimensions_size());
 }
 
+// A / Const => A * (1 / Const)
+TEST_F(AlgebraicSimplifierTest, DivideByConstant) {
+  Shape r1f32 = ShapeUtil::MakeShape(F32, {3});
+  HloComputation::Builder builder(TestName());
+  HloInstruction* param0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, r1f32, "param0"));
+  HloInstruction* constant =
+      builder.AddInstruction(HloInstruction::CreateConstant(
+          Literal::CreateR1<float>({0.f, 1.f, 2.f})));
+  builder.AddInstruction(HloInstruction::CreateBinary(r1f32, HloOpcode::kDivide,
+                                                      param0, constant));
+
+  auto module = CreateNewModule();
+  auto computation = module->AddEntryComputation(builder.Build());
+
+  AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
+                                 non_bitcasting_callback());
+  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+
+  EXPECT_THAT(computation->root_instruction(),
+              op::Multiply(param0, op::Divide(op::Constant(), constant)));
+}
+
+// pow(pow(A, X), Y) => pow(A, X*Y)
+TEST_F(AlgebraicSimplifierTest, PowerOfPower) {
+  Shape r0f32 = ShapeUtil::MakeShape(F32, {});
+  Shape r1f32 = ShapeUtil::MakeShape(F32, {7});
+  HloComputation::Builder builder(TestName());
+  HloInstruction* base = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, r1f32, "param0"));
+  HloInstruction* exp1 = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, r0f32, "param1"));
+  HloInstruction* exp2 = builder.AddInstruction(
+      HloInstruction::CreateParameter(2, r0f32, "param2"));
+  HloInstruction* inner_power = builder.AddInstruction(
+      HloInstruction::CreateBinary(r1f32, HloOpcode::kPower, base, exp1));
+  builder.AddInstruction(HloInstruction::CreateBinary(r1f32, HloOpcode::kPower,
+                                                      inner_power, exp2));
+
+  auto module = CreateNewModule();
+  auto computation = module->AddEntryComputation(builder.Build());
+  AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
+                                 non_bitcasting_callback());
+  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  EXPECT_THAT(computation->root_instruction(),
+              op::Power(base, op::Multiply(exp1, exp2)));
+}
+
+// Don't simplify pow(pow(A, X), Y) => pow(A, X*Y) if X and Y are complex
+// numbers.
+TEST_F(AlgebraicSimplifierTest, PowerOfPowerComplex) {
+  Shape r0c64 = ShapeUtil::MakeShape(C64, {});
+  Shape r1f32 = ShapeUtil::MakeShape(F32, {7});
+  HloComputation::Builder builder(TestName());
+  HloInstruction* base = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, r1f32, "param0"));
+  HloInstruction* exp1 = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, r0c64, "param1"));
+  HloInstruction* exp2 = builder.AddInstruction(
+      HloInstruction::CreateParameter(2, r0c64, "param2"));
+  HloInstruction* inner_power = builder.AddInstruction(
+      HloInstruction::CreateBinary(r1f32, HloOpcode::kPower, base, exp1));
+  builder.AddInstruction(HloInstruction::CreateBinary(r1f32, HloOpcode::kPower,
+                                                      inner_power, exp2));
+
+  auto module = CreateNewModule();
+  module->AddEntryComputation(builder.Build());
+  AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
+                                 non_bitcasting_callback());
+  ASSERT_FALSE(simplifier.Run(module.get()).ValueOrDie());
+}
+
 // Test that A/1 is simplified to A for a scalar.
 TEST_F(AlgebraicSimplifierTest, DivOneScalar) {
   Shape r0f32 = ShapeUtil::MakeShape(F32, {});
@@ -767,6 +910,120 @@ TEST_F(AlgebraicSimplifierTest, PowNegative1) {
             1);
 }
 
+TEST_F(AlgebraicSimplifierTest, ZeroSizedConvolution) {
+  auto builder = HloComputation::Builder(TestName());
+  HloInstruction* lhs = builder.AddInstruction(HloInstruction::CreateParameter(
+      0, ShapeUtil::MakeShape(F32, {3, 3, 0}), "lhs"));
+
+  HloInstruction* rhs = builder.AddInstruction(HloInstruction::CreateParameter(
+      1, ShapeUtil::MakeShape(F32, {3, 0, 3}), "rhs"));
+
+  ConvolutionDimensionNumbers dnums;
+  dnums.set_input_batch_dimension(0);
+  dnums.add_input_spatial_dimensions(1);
+  dnums.set_input_feature_dimension(2);
+
+  dnums.set_output_batch_dimension(0);
+  dnums.add_output_spatial_dimensions(1);
+  dnums.set_output_feature_dimension(2);
+
+  dnums.add_kernel_spatial_dimensions(0);
+  dnums.set_kernel_input_feature_dimension(1);
+  dnums.set_kernel_output_feature_dimension(2);
+  Window window;
+  WindowDimension* dim = window.add_dimensions();
+  dim->set_size(3);
+  dim->set_padding_low(0);
+  dim->set_padding_high(0);
+  dim->set_stride(1);
+  dim->set_window_dilation(1);
+  dim->set_base_dilation(1);
+  dim->set_window_reversal(false);
+  // Create add computation.
+  std::unique_ptr<HloModule> module = CreateNewModule();
+  builder.AddInstruction(HloInstruction::CreateConvolve(
+      ShapeUtil::MakeShape(F32, {3, 3, 3}), lhs, rhs, window, dnums));
+  module->AddEntryComputation(builder.Build());
+  HloPassFix<AlgebraicSimplifier> simplifier(/*is_layout_sensitive=*/false,
+                                             non_bitcasting_callback());
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              op::Convolution(lhs, rhs));
+  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              op::Broadcast(op::Constant()));
+}
+
+TEST_F(AlgebraicSimplifierTest, ZeroSizedReduceWindow) {
+  auto builder = HloComputation::Builder(TestName());
+  HloInstruction* param =
+      builder.AddInstruction(HloInstruction::CreateParameter(
+          0, ShapeUtil::MakeShape(F32, {3, 0}), "op"));
+  Window window;
+  for (int64 i = 0; i < 2; ++i) {
+    WindowDimension* dim = window.add_dimensions();
+    dim->set_size(1);
+    dim->set_padding_low(1);
+    dim->set_padding_high(1);
+    dim->set_window_dilation(1);
+    dim->set_base_dilation(1);
+  }
+  // Create add computation.
+  std::unique_ptr<HloModule> module = CreateNewModule();
+  HloComputation* add_computation = nullptr;
+  {
+    HloComputation::Builder builder(TestName() + ".add");
+    const Shape scalar_shape = ShapeUtil::MakeShape(F32, {});
+    HloInstruction* p0 = builder.AddInstruction(
+        HloInstruction::CreateParameter(0, scalar_shape, "p0"));
+    HloInstruction* p1 = builder.AddInstruction(
+        HloInstruction::CreateParameter(1, scalar_shape, "p1"));
+    builder.AddInstruction(
+        HloInstruction::CreateBinary(scalar_shape, HloOpcode::kAdd, p0, p1));
+    add_computation = module->AddEmbeddedComputation(builder.Build());
+  }
+  builder.AddInstruction(HloInstruction::CreateReduceWindow(
+      ShapeUtil::MakeShape(F32, {5, 2}), param,
+      builder.AddInstruction(
+          HloInstruction::CreateConstant(Literal::CreateR0<float>(0.0f))),
+      window, add_computation));
+  module->AddEntryComputation(builder.Build());
+  HloPassFix<AlgebraicSimplifier> simplifier(/*is_layout_sensitive=*/false,
+                                             non_bitcasting_callback());
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              op::ReduceWindow(param, op::Constant()));
+  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              op::Broadcast(op::Constant()));
+}
+
+TEST_F(AlgebraicSimplifierTest, ZeroSizedPad) {
+  auto builder = HloComputation::Builder(TestName());
+  HloInstruction* param =
+      builder.AddInstruction(HloInstruction::CreateParameter(
+          0, ShapeUtil::MakeShape(F32, {3, 0}), "op"));
+  PaddingConfig padding;
+  for (int i = 0; i < 2; ++i) {
+    PaddingConfig::PaddingConfigDimension* dimension = padding.add_dimensions();
+    dimension->set_edge_padding_low(1);
+    dimension->set_edge_padding_high(1);
+    dimension->set_interior_padding(0);
+  }
+  builder.AddInstruction(HloInstruction::CreatePad(
+      ShapeUtil::MakeShape(F32, {5, 2}), param,
+      builder.AddInstruction(
+          HloInstruction::CreateConstant(Literal::CreateR0(0.0f))),
+      padding));
+  std::unique_ptr<HloModule> module = CreateNewModule();
+  module->AddEntryComputation(builder.Build());
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              op::Pad(param, op::Constant()));
+  HloPassFix<AlgebraicSimplifier> simplifier(/*is_layout_sensitive=*/false,
+                                             non_bitcasting_callback());
+  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              op::Broadcast(op::Constant()));
+}
+
 TEST_F(AlgebraicSimplifierTest, ReshapeBroadcast) {
   Shape r0f32 = ShapeUtil::MakeShape(F32, {});
 
@@ -1260,7 +1517,7 @@ TEST_F(AlgebraicSimplifierTest, CopiesMerged) {
   HloComputation::Builder builder(TestName());
   HloInstruction* param0 =
       builder.AddInstruction(HloInstruction::CreateParameter(
-          0, ShapeUtil::MakeShapeWithMonotonicDim0MajorLayout(F32, {2, 2, 2}),
+          0, ShapeUtil::MakeShapeWithDescendingLayout(F32, {2, 2, 2}),
           "param0"));
 
   HloInstruction* copy1 = builder.AddInstruction(HloInstruction::CreateUnary(
@@ -2138,8 +2395,10 @@ TEST_F(AlgebraicSimplifierTest, IteratorInvalidation) {
       builder.AddInstruction(HloInstruction::CreateParameter(0, r1f32, "x"));
   HloInstruction* y =
       builder.AddInstruction(HloInstruction::CreateParameter(1, r1f32, "y"));
-  builder.AddInstruction(
-      HloInstruction::CreateBinary(r1f32, HloOpcode::kDot, x, y));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  builder.AddInstruction(HloInstruction::CreateDot(r1f32, x, y, dot_dnums));
   std::unique_ptr<HloComputation> dot_computation(builder.Build());
 
   HloComputation::Builder call_builder(TestName() + ".Call");
@@ -2236,5 +2495,210 @@ TEST_F(AlgebraicSimplifierTest, TrivialDynamicUpdateSlice) {
               op::DynamicSlice(op::Parameter(), op::Parameter()));
 }
 
+class DotStrengthReductionTest
+    : public AlgebraicSimplifierTest,
+      public ::testing::WithParamInterface<
+          ::testing::tuple<int, int, int, bool, bool>> {};
+TEST_P(DotStrengthReductionTest, DotStrengthReduction) {
+  int m, k, n;
+  bool transpose_lhs, transpose_rhs;
+  std::tie(m, k, n, transpose_lhs, transpose_rhs) = GetParam();
+
+  Shape dot_shape = ShapeUtil::MakeShape(F32, {m, n});
+  Shape lhs_shape = ShapeUtil::MakeShape(F32, {m, k});
+  Shape transposed_lhs_shape = ShapeUtil::MakeShape(F32, {k, m});
+  Shape rhs_shape = ShapeUtil::MakeShape(F32, {k, n});
+  Shape transposed_rhs_shape = ShapeUtil::MakeShape(F32, {n, k});
+  HloComputation::Builder builder(TestName());
+
+  auto lhs = builder.AddInstruction(HloInstruction::CreateParameter(
+      0, transpose_lhs ? transposed_lhs_shape : lhs_shape, "lhs"));
+  if (transpose_lhs) {
+    lhs = builder.AddInstruction(
+        HloInstruction::CreateTranspose(lhs_shape, lhs, {1, 0}));
+  }
+  auto rhs = builder.AddInstruction(HloInstruction::CreateParameter(
+      1, transpose_rhs ? transposed_rhs_shape : rhs_shape, "rhs"));
+  if (transpose_rhs) {
+    rhs = builder.AddInstruction(
+        HloInstruction::CreateTranspose(rhs_shape, rhs, {1, 0}));
+  }
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  builder.AddInstruction(
+      HloInstruction::CreateDot(dot_shape, lhs, rhs, dot_dnums));
+  auto module = CreateNewModule();
+  auto computation = module->AddEntryComputation(builder.Build());
+  AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
+                                 non_bitcasting_callback());
+  TF_ASSERT_OK_AND_ASSIGN(bool changed, simplifier.Run(module.get()));
+  const bool dot_should_be_transformed = m == 1 || k == 1 || n == 1;
+  const bool computation_should_be_modified =
+      dot_should_be_transformed || (transpose_lhs && transpose_rhs);
+  EXPECT_EQ(changed, computation_should_be_modified);
+  bool has_no_dot = true;
+  for (const auto& hlo : computation->instructions()) {
+    if (hlo->opcode() == HloOpcode::kDot) {
+      has_no_dot = false;
+      break;
+    }
+  }
+  EXPECT_EQ(has_no_dot, dot_should_be_transformed);
+}
+
+INSTANTIATE_TEST_CASE_P(
+    DotStrengthReductionTestInstantiation, DotStrengthReductionTest,
+    ::testing::Combine(::testing::Values(1, 2), ::testing::Values(1, 2),
+                       ::testing::Values(1, 2), ::testing::Bool(),
+                       ::testing::Bool()));
+
+struct DotOfConcatTestSpec {
+  int64 m;
+  int64 k;
+  int64 n;
+};
+
+class DotOfConcatSimplificationTest
+    : public HloTestBase,
+      public ::testing::WithParamInterface<DotOfConcatTestSpec> {};
+
+// Test that we transform
+//  dot(const, concat(A, B, C))
+// to
+//  add(dot(const_0, A), dot(const_1, B),  dot(const_2, C))
+TEST_P(DotOfConcatSimplificationTest, ConstantLHS) {
+  HloComputation::Builder builder(TestName());
+
+  DotOfConcatTestSpec spec = GetParam();
+
+  ASSERT_GE(spec.k, 3);
+
+  int64 k0 = spec.k / 3;
+  int64 k1 = spec.k / 3;
+  int64 k2 = spec.k - k0 - k1;
+
+  Shape lhs_shape = ShapeUtil::MakeShape(F32, {spec.m, spec.k});
+  auto* lhs = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR2F32Linspace(
+          /*from=*/10.0, /*to=*/10000.0, /*rows=*/spec.m, /*cols=*/spec.k)));
+
+  Shape rhs0_shape = ShapeUtil::MakeShape(F32, {k0, spec.n});
+  Shape rhs1_shape = ShapeUtil::MakeShape(F32, {k1, spec.n});
+  Shape rhs2_shape = ShapeUtil::MakeShape(F32, {k2, spec.n});
+
+  HloInstruction* rhs0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, rhs0_shape, "rhs0"));
+  HloInstruction* rhs1 = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, rhs1_shape, "rhs1"));
+  HloInstruction* rhs2 = builder.AddInstruction(
+      HloInstruction::CreateParameter(2, rhs2_shape, "rhs2"));
+
+  Shape rhs_shape = ShapeUtil::MakeShape(F32, {spec.k, spec.n});
+  HloInstruction* rhs = builder.AddInstruction(
+      HloInstruction::CreateConcatenate(rhs_shape, {rhs0, rhs1, rhs2}, 0));
+
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+
+  Shape dot_shape = ShapeUtil::MakeShape(F32, {spec.m, spec.n});
+  builder.AddInstruction(
+      HloInstruction::CreateDot(dot_shape, lhs, rhs, dot_dnums));
+
+  auto module = CreateNewModule();
+  auto computation = module->AddEntryComputation(builder.Build());
+  AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
+                                 non_bitcasting_callback());
+  TF_ASSERT_OK_AND_ASSIGN(bool run_successful, simplifier.Run(module.get()));
+  ASSERT_TRUE(run_successful);
+
+  EXPECT_TRUE(
+      ShapeUtil::Equal(computation->root_instruction()->shape(), dot_shape));
+
+  auto match_dot_0 = op::Dot(op::Slice(op::Constant()), op::Parameter(0));
+  auto match_dot_1 = op::Dot(op::Slice(op::Constant()), op::Parameter(1));
+  auto match_dot_2 = op::Dot(op::Slice(op::Constant()), op::Parameter(2));
+  EXPECT_THAT(computation->root_instruction(),
+              op::Add(op::Add(match_dot_0, match_dot_1), match_dot_2));
+}
+
+// Test that we transform
+//  dot(concat(A, B, C), const)
+// to
+//  add(dot(A, const_0), dot(B, const_1),  dot(C, const_2))
+TEST_P(DotOfConcatSimplificationTest, ConstantRHS) {
+  HloComputation::Builder builder(TestName());
+
+  DotOfConcatTestSpec spec = GetParam();
+
+  ASSERT_GE(spec.k, 4);
+
+  int64 k0 = spec.k / 4;
+  int64 k1 = spec.k / 4;
+  int64 k2 = spec.k / 4;
+  int64 k3 = spec.k - k0 - k1 - k2;
+
+  Shape lhs0_shape = ShapeUtil::MakeShape(F32, {spec.m, k0});
+  Shape lhs1_shape = ShapeUtil::MakeShape(F32, {spec.m, k1});
+  Shape lhs2_shape = ShapeUtil::MakeShape(F32, {spec.m, k2});
+  Shape lhs3_shape = ShapeUtil::MakeShape(F32, {spec.m, k3});
+
+  HloInstruction* lhs0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, lhs0_shape, "lhs0"));
+  HloInstruction* lhs1 = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, lhs1_shape, "lhs1"));
+  HloInstruction* lhs2 = builder.AddInstruction(
+      HloInstruction::CreateParameter(2, lhs2_shape, "lhs2"));
+  HloInstruction* lhs3 = builder.AddInstruction(
+      HloInstruction::CreateParameter(3, lhs2_shape, "lhs3"));
+
+  Shape lhs_shape = ShapeUtil::MakeShape(F32, {spec.m, spec.k});
+  HloInstruction* lhs =
+      builder.AddInstruction(HloInstruction::CreateConcatenate(
+          lhs_shape, {lhs0, lhs1, lhs2, lhs3}, 1));
+
+  Shape rhs_shape = ShapeUtil::MakeShape(F32, {spec.k, spec.m});
+  auto* rhs = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR2F32Linspace(
+          /*from=*/10.0, /*to=*/10000.0, /*rows=*/spec.k, /*cols=*/spec.m)));
+
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+
+  Shape dot_shape = ShapeUtil::MakeShape(F32, {spec.m, spec.n});
+  builder.AddInstruction(
+      HloInstruction::CreateDot(dot_shape, lhs, rhs, dot_dnums));
+
+  auto module = CreateNewModule();
+  auto computation = module->AddEntryComputation(builder.Build());
+  AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
+                                 non_bitcasting_callback());
+  TF_ASSERT_OK_AND_ASSIGN(bool run_successful, simplifier.Run(module.get()));
+  ASSERT_TRUE(run_successful);
+  EXPECT_TRUE(
+      ShapeUtil::Equal(computation->root_instruction()->shape(), dot_shape));
+
+  auto match_dot_0 = op::Dot(op::Parameter(0), op::Slice(op::Constant()));
+  auto match_dot_1 = op::Dot(op::Parameter(1), op::Slice(op::Constant()));
+  auto match_dot_2 = op::Dot(op::Parameter(2), op::Slice(op::Constant()));
+  auto match_dot_3 = op::Dot(op::Parameter(3), op::Slice(op::Constant()));
+  EXPECT_THAT(computation->root_instruction(),
+              op::Add(op::Add(op::Add(match_dot_0, match_dot_1), match_dot_2),
+                      match_dot_3));
+}
+
+DotOfConcatTestSpec kDotOfConcatTestSpecs[] = {
+    {/*m=*/3, /*k=*/9, /*n=*/3},    //
+    {/*m=*/3, /*k=*/20, /*n=*/3},   //
+    {/*m=*/1, /*k=*/18, /*n=*/5},   //
+    {/*m=*/20, /*k=*/20, /*n=*/1},  //
+    {/*m=*/1, /*k=*/16, /*n=*/1},   //
+};
+
+INSTANTIATE_TEST_CASE_P(DotOfConcatSimplificationTestInstantiation,
+                        DotOfConcatSimplificationTest,
+                        ::testing::ValuesIn(kDotOfConcatTestSpecs));
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/allocation_tracker.cc b/tensorflow/compiler/xla/service/allocation_tracker.cc
index ad2fee2d39a8ca183b87212bdeea22c351aaa88a..4e80679c11dfdf7fdf8077a9f354139a4cab6803 100644
--- a/tensorflow/compiler/xla/service/allocation_tracker.cc
+++ b/tensorflow/compiler/xla/service/allocation_tracker.cc
@@ -27,191 +27,161 @@ limitations under the License.
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/logging.h"
-#include "tensorflow/core/platform/stream_executor_no_cuda.h"
-
-namespace se = ::perftools::gputools;
 
 namespace xla {
 
-AllocationTracker::AllocationTracker() : next_handle_(1) {}
-
-GlobalDataHandle AllocationTracker::Register(Backend* backend,
-                                             int device_ordinal,
-                                             se::DeviceMemoryBase device_memory,
-                                             const Shape& shape,
-                                             const string& tag) {
-  tensorflow::mutex_lock lock(allocation_mutex_);
+StatusOr<GlobalDataHandle> AllocationTracker::Register(
+    std::unique_ptr<ShapedBuffer> shaped_buffer, const string& tag) {
+  tensorflow::mutex_lock lock(mutex_);
   VLOG(2) << "Register";
-  return RegisterInternal(backend, device_ordinal, device_memory, shape, tag,
-                          /*initial_ref_count=*/1);
+  return RegisterInternal(std::move(shaped_buffer), tag);
 }
 
-GlobalDataHandle AllocationTracker::RegisterInternal(
-    Backend* backend, int device_ordinal, se::DeviceMemoryBase device_memory,
-    const Shape& shape, const string& tag, int initial_ref_count) {
+StatusOr<GlobalDataHandle> AllocationTracker::RegisterInternal(
+    std::unique_ptr<ShapedBuffer> shaped_buffer, const string& tag) {
   VLOG(2) << "RegisterInternal("
           << "tag: \"" << tag << "\" "
-          << "device_ordinal: " << device_ordinal << " "
-          << "device_memory: " << device_memory.opaque() << " "
-          << "shape: " << shape.ShortDebugString() << ")";
-  TF_CHECK_OK(ShapeUtil::ValidateShape(shape));
-
-  int64 handle;
-  HandleMap& handle_map = GetOrCreateOpaqueToHandleMap(device_ordinal);
-  auto handle_it = handle_map.find(device_memory.opaque());
-  if (handle_it != handle_map.end()) {
-    handle = handle_it->second;
-    auto& allocation = FindOrDie(handle_to_allocation_, handle);
-    int ref_count = allocation->ref_count();
-    CHECK_GT(ref_count, 0);
-    VLOG(2) << "ref_count: " << ref_count << " -> " <<
-            (ref_count + initial_ref_count);
-    allocation->increment_ref_count(initial_ref_count);
-  } else {
-    handle = next_handle_++;
-    VLOG(2) << "ref_count: " << initial_ref_count;
-    InsertOrDie(&handle_map, device_memory.opaque(), handle);
-    auto inserted = handle_to_allocation_.emplace(
-        handle, MakeUnique<Allocation>(backend, device_ordinal, device_memory,
-                                       shape, tag, initial_ref_count));
-    CHECK(inserted.second);
+          << "shaped_buffer: " << *shaped_buffer;
+  if (shaped_buffer->platform() != backend_->platform()) {
+    return InvalidArgument(
+        "AllocationTracker for platform %s cannot register buffer from "
+        "platform %s",
+        backend_->platform()->Name().c_str(),
+        shaped_buffer->platform()->Name().c_str());
   }
 
+  int64 handle = next_handle_++;
+  std::vector<ShapeIndex> shape_indices;
+  ShapeUtil::ForEachSubshape(shaped_buffer->on_device_shape(),
+                             [this, &shape_indices](const Shape& /*subshape*/,
+                                                    const ShapeIndex& index) {
+                               shape_indices.push_back(index);
+                             });
+  for (const ShapeIndex& index : shape_indices) {
+    AddAllocationOrIncrementRefCount(shaped_buffer->buffer(index),
+                                     shaped_buffer->device_ordinal());
+  }
   GlobalDataHandle result;
   result.set_handle(handle);
+
+  handle_to_shaped_buffer_[handle] = std::move(shaped_buffer);
+
   VLOG(2) << "handle: " << handle;
 
   return result;
 }
 
 tensorflow::Status AllocationTracker::Unregister(const GlobalDataHandle& data) {
-  tensorflow::mutex_lock lock(allocation_mutex_);
-  TF_ASSIGN_OR_RETURN(Allocation * allocation, ResolveInternal(data));
-  std::set<void*> deallocated_buffers;
-  TF_RETURN_IF_ERROR(
-      DeallocateShape(allocation->backend(), allocation->device_ordinal(),
-                      allocation->mutable_device_memory(), allocation->shape(),
-                      &deallocated_buffers));
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status AllocationTracker::DeallocateShape(
-    Backend* backend, int device_ordinal, se::DeviceMemoryBase* device_memory,
-    const Shape& shape, std::set<void*>* deallocated_buffers) {
-  VLOG(2) << "DeallocateShape("
-          << "shape: \"" << shape.ShortDebugString() << "\" "
-          << "device_memory: " << device_memory->opaque() << ")";
-  if (ContainsKey(*deallocated_buffers, device_memory->opaque())) {
-    // Buffer has already been deallocated. Nothing to do.
-    VLOG(2) << "already deallocated";
-    return tensorflow::Status::OK();
-  }
-
-  // Add buffer to deallocated set so we do not try to deallocate it again
-  // if it is encountered again while traversing a tuple.
-  deallocated_buffers->insert(device_memory->opaque());
-
-  HandleMap& handle_map = GetOrCreateOpaqueToHandleMap(device_ordinal);
-  auto handle_it = handle_map.find(device_memory->opaque());
-  if (handle_it != handle_map.end()) {
-    int64 handle = handle_it->second;
-    auto& allocation = FindOrDie(handle_to_allocation_, handle);
-    int ref_count = allocation->ref_count();
-    VLOG(2) << "ref_count: " << ref_count << " -> " << ref_count - 1;
-    allocation->decrement_ref_count();
-    if (allocation->ref_count() > 0) {
-      // Buffer is referred to by another allocation. Don't deallocate it.
-      return tensorflow::Status::OK();
-    }
-    handle_map.erase(device_memory->opaque());
+  tensorflow::mutex_lock lock(mutex_);
+  VLOG(2) << "Unregister("
+          << "handle: " << data.handle() << ")";
+  TF_ASSIGN_OR_RETURN(ShapedBuffer * shaped_buffer, ResolveInternal(data));
+  std::vector<ShapeIndex> shape_indices;
+  ShapeUtil::ForEachSubshape(shaped_buffer->on_device_shape(),
+                             [this, &shape_indices](const Shape& /*subshape*/,
+                                                    const ShapeIndex& index) {
+                               shape_indices.push_back(index);
+                             });
+  for (const ShapeIndex& index : shape_indices) {
+    TF_RETURN_IF_ERROR(DecrementRefCount(shaped_buffer->buffer(index),
+                                         shaped_buffer->device_ordinal()));
   }
 
-  if (ShapeUtil::IsTuple(shape)) {
-    // Traverse into tuple recursively deallocating buffers.
-    TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor,
-                        backend->stream_executor(device_ordinal));
-    TF_ASSIGN_OR_RETURN(std::vector<se::DeviceMemoryBase> elements,
-                        backend->transfer_manager()->ShallowCopyTupleFromDevice(
-                            executor, *device_memory, shape));
-
-    TF_RET_CHECK(ShapeUtil::TupleElementCount(shape) == elements.size())
-        << "tuple has unexpected number of elements: " << elements.size()
-        << " != " << ShapeUtil::TupleElementCount(shape);
-    for (size_t i = 0; i < elements.size(); ++i) {
-      VLOG(2) << "recursing onto the tuple elements";
-      TF_RETURN_IF_ERROR(DeallocateShape(backend, device_ordinal, &elements[i],
-                                         shape.tuple_shapes(i),
-                                         deallocated_buffers));
-    }
-  }
+  // Keep a nullptr as a tombstone for unregistered handles. This enables better
+  // error messages. That is, "handle has been deallocated" versus "handle does
+  // not exist".
+  handle_to_shaped_buffer_.at(data.handle()).reset();
 
-  return backend->memory_allocator()->Deallocate(device_ordinal, device_memory);
+  return tensorflow::Status::OK();
 }
 
 StatusOr<std::vector<GlobalDataHandle>> AllocationTracker::DeconstructTuple(
     const GlobalDataHandle& data) {
-  tensorflow::mutex_lock lock(allocation_mutex_);
-  TF_ASSIGN_OR_RETURN(Allocation * allocation, ResolveInternal(data));
+  tensorflow::mutex_lock lock(mutex_);
 
-  if (!ShapeUtil::IsTuple(allocation->shape())) {
+  TF_ASSIGN_OR_RETURN(ShapedBuffer * shaped_buffer, ResolveInternal(data));
+  if (!ShapeUtil::IsTuple(shaped_buffer->on_host_shape())) {
     return InvalidArgument("global data handle %lld is not a tuple",
                            data.handle());
   }
+  // If the on-host representation is a tuple, then the on-device one should be
+  // as well.
+  TF_RET_CHECK(ShapeUtil::IsTuple(shaped_buffer->on_device_shape()));
 
-  if (ShapeUtil::IsNestedTuple(allocation->shape())) {
+  if (ShapeUtil::IsNestedTuple(shaped_buffer->on_device_shape())) {
     return Unimplemented("deconstructing nested tuples not yet supported");
   }
 
-  TF_ASSIGN_OR_RETURN(
-      se::StreamExecutor * executor,
-      allocation->backend()->stream_executor(allocation->device_ordinal()));
-  TF_ASSIGN_OR_RETURN(
-      std::vector<se::DeviceMemoryBase> element_bases,
-      allocation->backend()->transfer_manager()->ShallowCopyTupleFromDevice(
-          executor, allocation->device_memory(), allocation->shape()));
-
   std::vector<GlobalDataHandle> element_handles;
-  element_handles.reserve(element_bases.size());
-  for (int i = 0; i < element_bases.size(); ++i) {
-    element_handles.push_back(RegisterInternal(
-        allocation->backend(), allocation->device_ordinal(), element_bases[i],
-        ShapeUtil::GetSubshape(allocation->shape(), {i}),
-        tensorflow::strings::StrCat(allocation->tag(), ".element_", i),
-        /*initial_ref_count=*/2));
+  for (int i = 0;
+       i < ShapeUtil::TupleElementCount(shaped_buffer->on_device_shape());
+       ++i) {
+    auto element_buffer = MakeUnique<ShapedBuffer>(
+        ShapeUtil::GetTupleElementShape(shaped_buffer->on_host_shape(), i),
+        ShapeUtil::GetTupleElementShape(shaped_buffer->on_device_shape(), i),
+        shaped_buffer->platform(), shaped_buffer->device_ordinal());
+    element_buffer->set_buffer(shaped_buffer->buffer(/*index=*/{i}),
+                               /*index=*/{});
+    TF_ASSIGN_OR_RETURN(
+        GlobalDataHandle element_handle,
+        RegisterInternal(std::move(element_buffer), "deconstructed tuple"));
+
+    element_handles.push_back(element_handle);
   }
   return std::move(element_handles);
 }
 
-StatusOr<const Allocation*> AllocationTracker::Resolve(
+StatusOr<const ShapedBuffer*> AllocationTracker::Resolve(
     const GlobalDataHandle& data) {
-  tensorflow::mutex_lock lock(allocation_mutex_);
+  tensorflow::mutex_lock lock(mutex_);
   return AllocationTracker::ResolveInternal(data);
 }
 
-StatusOr<Allocation*> AllocationTracker::ResolveInternal(
+StatusOr<ShapedBuffer*> AllocationTracker::ResolveInternal(
     const GlobalDataHandle& data) {
   VLOG(2) << "resolve:" << data.handle();
-  auto it = handle_to_allocation_.find(data.handle());
-  if (it == handle_to_allocation_.end()) {
+  auto it = handle_to_shaped_buffer_.find(data.handle());
+  if (it == handle_to_shaped_buffer_.end()) {
     return NotFound("no allocation record for global data handle: %lld",
                     data.handle());
   }
-  Allocation* allocation = it->second.get();
+  ShapedBuffer* shaped_buffer = it->second.get();
 
-  if (allocation->is_deallocated()) {
+  if (shaped_buffer == nullptr) {
     return InvalidArgument("global data handle %lld was previously deallocated",
                            data.handle());
   }
 
-  return allocation;
+  return shaped_buffer;
+}
+
+void AllocationTracker::AddAllocationOrIncrementRefCount(
+    perftools::gputools::DeviceMemoryBase device_memory, int device_ordinal) {
+  AllocationMap& allocation_map = opaque_to_allocation_map_[device_ordinal];
+  auto it = allocation_map.find(device_memory.opaque());
+  if (it == allocation_map.end()) {
+    allocation_map[device_memory.opaque()] = {device_memory, device_ordinal,
+                                              /*ref_count=*/1};
+  } else {
+    it->second.ref_count++;
+  }
 }
 
-AllocationTracker::HandleMap& AllocationTracker::GetOrCreateOpaqueToHandleMap(
-    int device_ordinal) {
-  if (opaque_to_handle_.size() <= device_ordinal) {
-    opaque_to_handle_.resize(device_ordinal + 1);
+Status AllocationTracker::DecrementRefCount(
+    perftools::gputools::DeviceMemoryBase device_memory, int device_ordinal) {
+  AllocationMap& allocation_map = opaque_to_allocation_map_[device_ordinal];
+  auto it = allocation_map.find(device_memory.opaque());
+  TF_RET_CHECK(it != allocation_map.end());
+  Allocation& allocation = it->second;
+  TF_RET_CHECK(allocation.ref_count >= 1);
+  if (allocation.ref_count == 1) {
+    TF_RETURN_IF_ERROR(backend_->memory_allocator()->Deallocate(
+        device_ordinal, &device_memory));
+    allocation_map.erase(it);
+  } else {
+    allocation.ref_count--;
   }
-  return opaque_to_handle_[device_ordinal];
+  return tensorflow::Status::OK();
 }
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/allocation_tracker.h b/tensorflow/compiler/xla/service/allocation_tracker.h
index ebbf35b6fe87bc7322ccb99cfe8f8eed56de06b3..807af8694972083d097604a67ee46d2f73d9545a 100644
--- a/tensorflow/compiler/xla/service/allocation_tracker.h
+++ b/tensorflow/compiler/xla/service/allocation_tracker.h
@@ -28,147 +28,92 @@ limitations under the License.
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/mutex.h"
-#include "tensorflow/core/platform/stream_executor_no_cuda.h"
 #include "tensorflow/core/platform/thread_annotations.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace xla {
 
-// A global allocation in device space, tracked by the XLA service.
-class Allocation {
- public:
-  Allocation(Backend* backend, int device_ordinal,
-             perftools::gputools::DeviceMemoryBase device_memory,
-             const Shape& shape, const string& tag, int initial_ref_count)
-      : backend_(backend),
-        device_ordinal_(device_ordinal),
-        device_memory_(device_memory),
-        shape_(shape),
-        tag_(tag),
-        ref_count_(initial_ref_count) {}
-
-  Backend* backend() const { return backend_; }
-  int device_ordinal() const { return device_ordinal_; }
-  perftools::gputools::DeviceMemoryBase device_memory() const {
-    return device_memory_;
-  }
-  const Shape& shape() const { return shape_; }
-  const string& tag() const { return tag_; }
-
-  bool is_deallocated() const {
-    CHECK_GE(ref_count_, 0);
-    return ref_count_ == 0;
-  }
-  int ref_count() const {
-    CHECK_GE(ref_count_, 0);
-    return ref_count_;
-  }
-  void increment_ref_count(int inc) {
-    CHECK_GT(ref_count_, 0);
-    CHECK_LE(ref_count_, INT_MAX - inc);
-    ref_count_ += inc;
-  }
-  void decrement_ref_count() {
-    CHECK_GT(ref_count_, 0);
-    --ref_count_;
-  }
-  perftools::gputools::DeviceMemoryBase* mutable_device_memory() {
-    return &device_memory_;
-  }
-
- private:
-  // The backend that the memory is allocated on.
-  Backend* backend_;
-
-  // The device that the memory is allocated on.
-  int device_ordinal_;
-
-  // The pointer to this allocation.
-  perftools::gputools::DeviceMemoryBase device_memory_;
-
-  // The shape of this allocation.
-  Shape shape_;
-
-  // An informal description of this allocation shown in tools.
-  string tag_;
-
-  // This is the number of Allocation objects which refer to this memory
-  // allocation.
-  int ref_count_;
-
-  // Return a string representation of this allocation for debugging or logging
-  // purposes.
-  string ToString() const;
-};
-
 // Tracks allocations for the XLA service; allocations can be registered
 // with shape/device/tag and resolved from a handle for later use.
 class AllocationTracker {
  public:
-  AllocationTracker();
+  // The allocator is used for deallocating memory when allocations are
+  // deregistered. All registered allocations must have the same platform as the
+  // allocator.
+  AllocationTracker(Backend* backend) : backend_(backend), next_handle_(1) {}
 
-  // Registers device memory with a given shape, device identifier, and tag, and
-  // returns a corresponding handle that can be used for talking to XLA
-  // clients.
-  GlobalDataHandle Register(Backend* backend, int device_ordinal,
-                            perftools::gputools::DeviceMemoryBase device_memory,
-                            const Shape& shape, const string& tag);
+  // Registers a shaped buffer of device memory, and returns a corresponding
+  // handle that can be used for talking to XLA clients.
+  StatusOr<GlobalDataHandle> Register(
+      std::unique_ptr<ShapedBuffer> shaped_buffer, const string& tag);
 
   // Unregister the allocation for the given data handle.
-  tensorflow::Status Unregister(const GlobalDataHandle& data);
+  Status Unregister(const GlobalDataHandle& data);
 
   // Returns a vector of global data handles that point to the tuple elements.
   StatusOr<std::vector<GlobalDataHandle>> DeconstructTuple(
       const GlobalDataHandle& Data);
 
-  // Resolve a handle from an XLA client to an allocation, or provide an
-  // error status to say whether it was not found (or found, but found
-  // deallocated).
-  StatusOr<const Allocation*> Resolve(const GlobalDataHandle& data);
+  // Resolve a handle from an XLA client to a shaped buffer, or provide an error
+  // status to say whether it was not found (or found, but found deallocated).
+  StatusOr<const ShapedBuffer*> Resolve(const GlobalDataHandle& data);
 
  private:
-  // Internal helper which resolves the given GlobalDataHandle to an Allocation.
-  StatusOr<Allocation*> ResolveInternal(const GlobalDataHandle& data)
-      EXCLUSIVE_LOCKS_REQUIRED(allocation_mutex_);
-
-  GlobalDataHandle RegisterInternal(
-      Backend* backend, int device_ordinal,
-      perftools::gputools::DeviceMemoryBase device_memory, const Shape& shape,
-      const string& tag, int initial_ref_count)
-      EXCLUSIVE_LOCKS_REQUIRED(allocation_mutex_);
-
-  // Helper function which deallocates the memory buffer containing the given
-  // shape referred to by device_memory. Tuples are traversed recursively
-  // deallocating all nested buffers. The parameter deallocated_buffers contains
-  // the set of buffers deallocated so far stored as opaque values (void *) from
-  // DeviceMemoryBase. Keeping track of deallocated buffers prevents
-  // double-freeing of buffers which may be referred to more than once in a
-  // nested tuple.
-  tensorflow::Status DeallocateShape(
-      Backend* backend, int device_ordinal,
-      perftools::gputools::DeviceMemoryBase* device_memory, const Shape& shape,
-      std::set<void*>* deallocated_buffers)
-      EXCLUSIVE_LOCKS_REQUIRED(allocation_mutex_);
-
-  // Returns the opaque_to_handle_ map for the given device_ordinal, creating
-  // a new map if there is not one for the device_ordinal.
-  using HandleMap = std::map<void*, int64>;
-  HandleMap& GetOrCreateOpaqueToHandleMap(int device_ordinal)
-      EXCLUSIVE_LOCKS_REQUIRED(allocation_mutex_);
-
-  tensorflow::mutex allocation_mutex_;  // Guards the allocation mapping.
+  // Data structure encapsulating single memory allocation on the device.
+  struct Allocation {
+    // The pointer to this allocation.
+    perftools::gputools::DeviceMemoryBase device_memory;
+
+    // The device that the memory is allocated on.
+    int device_ordinal;
+
+    // This is the number of times this memory allocation is referred to by
+    // registered data handles.
+    int ref_count;
+  };
+
+  // Internal helper which resolves the given GlobalDataHandle to a
+  // ShapedBuffer.
+  StatusOr<ShapedBuffer*> ResolveInternal(const GlobalDataHandle& data)
+      EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // Internal helper which registers a shaped buffer.
+  StatusOr<GlobalDataHandle> RegisterInternal(
+      std::unique_ptr<ShapedBuffer> shaped_buffer, const string& tag)
+      EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // Adds the given device address to the allocation tracker, or if it already
+  // exists, then increment it's reference count.
+  void AddAllocationOrIncrementRefCount(
+      perftools::gputools::DeviceMemoryBase device_memory, int device_ordinal)
+      EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // Decrements the reference count of the given device memory. Then, if it is
+  // zero, deallocate the memory.
+  Status DecrementRefCount(perftools::gputools::DeviceMemoryBase device_memory,
+                           int device_ordinal) EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // A map from device memory opaque value to allocation. One such map is
+  // maintained per device ordinal.
+  using AllocationMap = tensorflow::gtl::FlatMap<const void*, Allocation>;
+
+  tensorflow::mutex mutex_;
+
+  // Backend to use with this tracker. The backend supplies the memory allocator
+  // to use when deallocating memory.
+  Backend* backend_;
 
   // The next handle to assign to an allocation, guarded by the same mutex as
   // the mapping as they'll be mutated at the same time.
-  int64 next_handle_ GUARDED_BY(allocation_mutex_);
+  int64 next_handle_ GUARDED_BY(mutex_);
 
-  // A map from DeviceMemoryBase to handle for each device_ordinal.
-  std::vector<HandleMap> opaque_to_handle_ GUARDED_BY(allocation_mutex_);
+  // A map from device ordinal to AllocationMap.
+  tensorflow::gtl::FlatMap<int, AllocationMap> opaque_to_allocation_map_
+      GUARDED_BY(mutex_);
 
-  // Mapping from GlobalDataHandle handle to the corresponding registered
-  // Allocation object.
-  std::map<int64, std::unique_ptr<Allocation>> handle_to_allocation_
-      GUARDED_BY(allocation_mutex_);
+  // A map from data handle to ShapedBuffer.
+  tensorflow::gtl::FlatMap<int64, std::unique_ptr<ShapedBuffer>>
+      handle_to_shaped_buffer_ GUARDED_BY(mutex_);
 
   TF_DISALLOW_COPY_AND_ASSIGN(AllocationTracker);
 };
diff --git a/tensorflow/compiler/xla/service/batchnorm_rewriter.cc b/tensorflow/compiler/xla/service/batchnorm_expander.cc
similarity index 58%
rename from tensorflow/compiler/xla/service/batchnorm_rewriter.cc
rename to tensorflow/compiler/xla/service/batchnorm_expander.cc
index c6193b3fbbd651088a823605af3ba84bca4a77ee..27ddfd47aa3096afd3e245af1ac3cedd9b48ce4a 100644
--- a/tensorflow/compiler/xla/service/batchnorm_rewriter.cc
+++ b/tensorflow/compiler/xla/service/batchnorm_expander.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/batchnorm_rewriter.h"
+#include "tensorflow/compiler/xla/service/batchnorm_expander.h"
 
 #include <algorithm>
 #include <memory>
@@ -45,9 +45,9 @@ limitations under the License.
 
 namespace xla {
 
-// BatchNormRewriterVisitor traverses the HLO computation and rewrites BatchNorm
+// BatchNormExpanderVisitor traverses the HLO computation and rewrites BatchNorm
 // operations into smaller operations.
-class BatchNormRewriterVisitor : public DfsHloVisitorWithDefault {
+class BatchNormExpanderVisitor : public DfsHloVisitorWithDefault {
  public:
   // Default visitor action is to do nothing and return OK.
   Status DefaultAction(HloInstruction* /*hlo_instruction*/) override {
@@ -68,10 +68,10 @@ class BatchNormRewriterVisitor : public DfsHloVisitorWithDefault {
   // Returns whether any batch norm ops were rewritten.
   const bool changed() const { return changed_; }
 
-  ~BatchNormRewriterVisitor() override = default;
+  ~BatchNormExpanderVisitor() override = default;
 
  private:
-  explicit BatchNormRewriterVisitor(HloComputation* computation,
+  explicit BatchNormExpanderVisitor(HloComputation* computation,
                                     bool rewrite_training_op,
                                     bool rewrite_inference_op,
                                     bool rewrite_grad_op, bool use_fusion)
@@ -94,7 +94,7 @@ class BatchNormRewriterVisitor : public DfsHloVisitorWithDefault {
     return computation_->parent()->AddEmbeddedComputation(b.Build(scalar_op));
   }
 
-  // Current HloComputation instance the BatchNormRewriter is
+  // Current HloComputation instance the BatchNormExpander is
   // traversing.
   HloComputation* computation_;
 
@@ -130,11 +130,11 @@ class BatchNormRewriterVisitor : public DfsHloVisitorWithDefault {
   }
 };
 
-bool BatchNormRewriterVisitor::Run(HloComputation* computation,
+bool BatchNormExpanderVisitor::Run(HloComputation* computation,
                                    bool rewrite_training_op,
                                    bool rewrite_inference_op,
                                    bool rewrite_grad_op, bool use_fusion) {
-  BatchNormRewriterVisitor visitor(
+  BatchNormExpanderVisitor visitor(
       computation,
       /*rewrite_training_op=*/rewrite_training_op,
       /*rewrite_inference_op=*/rewrite_inference_op,
@@ -144,11 +144,20 @@ bool BatchNormRewriterVisitor::Run(HloComputation* computation,
   return visitor.changed_;
 }
 
-Status BatchNormRewriterVisitor::HandleBatchNormTraining(
+Status BatchNormExpanderVisitor::HandleBatchNormTraining(
     HloInstruction* batch_norm) {
   if (!rewrite_training_op_) {
     return Status::OK();
   }
+
+  std::vector<HloInstruction*> added_instructions;
+  auto add = [&](std::unique_ptr<HloInstruction> inst) {
+    HloInstruction* added_inst = computation_->AddInstruction(std::move(inst));
+    added_instructions.push_back(added_inst);
+    return added_inst;
+  };
+  int64 instruction_count_before = computation_->instruction_count();
+
   // Expand batch norm training into smaller HLO ops.
   HloInstruction* operand = batch_norm->mutable_operand(0);
   const Shape operand_shape = operand->shape();
@@ -160,7 +169,7 @@ Status BatchNormRewriterVisitor::HandleBatchNormTraining(
       Literal::CreateR0<float>(size_in_elements / feature_count);
   TF_ASSIGN_OR_RETURN(elements_per_feature_literal,
                       elements_per_feature_literal->Convert(ptype));
-  auto elements_per_feature = computation_->AddInstruction(
+  auto elements_per_feature = add(
       HloInstruction::CreateConstant(std::move(elements_per_feature_literal)));
 
   HloInstruction* scale = batch_norm->mutable_operand(1);
@@ -169,14 +178,12 @@ Status BatchNormRewriterVisitor::HandleBatchNormTraining(
 
   auto zero_literal = Literal::CreateR0(0.0f);
   TF_ASSIGN_OR_RETURN(zero_literal, zero_literal->Convert(ptype));
-  auto zero = computation_->AddInstruction(
-      HloInstruction::CreateConstant(std::move(zero_literal)));
+  auto zero = add(HloInstruction::CreateConstant(std::move(zero_literal)));
 
   auto epsilon_literal = Literal::CreateR0(batch_norm->epsilon());
   TF_ASSIGN_OR_RETURN(epsilon_literal, epsilon_literal->Convert(ptype));
-  auto epsilon = computation_->AddInstruction(
-      HloInstruction::CreateConstant(std::move(epsilon_literal)));
-
+  auto epsilon =
+      add(HloInstruction::CreateConstant(std::move(epsilon_literal)));
   std::vector<int64> dimensions_without_feature;
 
   for (int64 i = 0; i < ShapeUtil::Rank(operand_shape); ++i) {
@@ -185,109 +192,116 @@ Status BatchNormRewriterVisitor::HandleBatchNormTraining(
     }
   }
 
-  auto scale_broadcasted = computation_->AddInstruction(
+  auto scale_broadcasted = add(
       HloInstruction::CreateBroadcast(operand_shape, scale, {feature_index}));
 
-  auto offset_broadcasted = computation_->AddInstruction(
+  auto offset_broadcasted = add(
       HloInstruction::CreateBroadcast(operand_shape, offset, {feature_index}));
 
   HloComputation* add_reduce_computation =
       GetScalarBinaryComputation(ptype, HloOpcode::kAdd);
 
   // X^2.
-  auto operand_squared =
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          operand_shape, HloOpcode::kMultiply, operand, operand));
+  auto operand_squared = add(HloInstruction::CreateBinary(
+      operand_shape, HloOpcode::kMultiply, operand, operand));
   // Sum[X].
-  auto sum = computation_->AddInstruction(HloInstruction::CreateReduce(
-      feature_shape, operand, zero, dimensions_without_feature,
-      add_reduce_computation));
+  auto sum = add(HloInstruction::CreateReduce(feature_shape, operand, zero,
+                                              dimensions_without_feature,
+                                              add_reduce_computation));
 
   // Sum[X^2].
-  auto squared_sum = computation_->AddInstruction(HloInstruction::CreateReduce(
+  auto squared_sum = add(HloInstruction::CreateReduce(
       feature_shape, operand_squared, zero, dimensions_without_feature,
       add_reduce_computation));
 
   // Fuse two parallel reduces together to improve performance.
-  if (use_fusion_) {
-    auto tuple = computation_->AddInstruction(
-        HloInstruction::CreateTuple({sum, squared_sum}));
+  if (use_fusion_ && !batch_norm->has_sharding()) {
+    auto tuple = add(HloInstruction::CreateTuple({sum, squared_sum}));
 
     auto fused = computation_->CreateFusionInstruction(
         {tuple, sum, squared_sum, operand_squared},
         HloInstruction::FusionKind::kInput);
 
-    sum = computation_->AddInstruction(
-        HloInstruction::CreateGetTupleElement(feature_shape, fused, 0));
+    sum = add(HloInstruction::CreateGetTupleElement(feature_shape, fused, 0));
 
-    squared_sum = computation_->AddInstruction(
-        HloInstruction::CreateGetTupleElement(feature_shape, fused, 1));
+    squared_sum =
+        add(HloInstruction::CreateGetTupleElement(feature_shape, fused, 1));
   }
 
   // E[X].
-  auto mean = computation_->AddInstruction(HloInstruction::CreateBinary(
+  auto mean = add(HloInstruction::CreateBinary(
       feature_shape, HloOpcode::kDivide, sum, elements_per_feature));
 
-  auto mean_broadcasted = computation_->AddInstruction(
+  auto mean_broadcasted = add(
       HloInstruction::CreateBroadcast(operand_shape, mean, {feature_index}));
 
   // E[X^2].
-  auto square_mean = computation_->AddInstruction(HloInstruction::CreateBinary(
+  auto square_mean = add(HloInstruction::CreateBinary(
       feature_shape, HloOpcode::kDivide, squared_sum, elements_per_feature));
 
   // E^2[X].
-  auto mean_square = computation_->AddInstruction(HloInstruction::CreateBinary(
+  auto mean_square = add(HloInstruction::CreateBinary(
       feature_shape, HloOpcode::kMultiply, mean, mean));
 
   // Var[X].
-  auto var = computation_->AddInstruction(HloInstruction::CreateBinary(
+  auto var = add(HloInstruction::CreateBinary(
       feature_shape, HloOpcode::kSubtract, square_mean, mean_square));
 
-  auto var_broadcasted = computation_->AddInstruction(
-      HloInstruction::CreateBroadcast(operand_shape, var, {feature_index}));
+  auto var_broadcasted =
+      add(HloInstruction::CreateBroadcast(operand_shape, var, {feature_index}));
 
   // Var[X] + epsilon.
-  auto var_add_epsilon =
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          operand_shape, HloOpcode::kAdd, var_broadcasted, epsilon));
+  auto var_add_epsilon = add(HloInstruction::CreateBinary(
+      operand_shape, HloOpcode::kAdd, var_broadcasted, epsilon));
 
   auto neg_half_literal = Literal::CreateR0(-0.5f);
   TF_ASSIGN_OR_RETURN(neg_half_literal, neg_half_literal->Convert(ptype));
-  auto neg_half = computation_->AddInstruction(
-      HloInstruction::CreateConstant(std::move(neg_half_literal)));
+  auto neg_half =
+      add(HloInstruction::CreateConstant(std::move(neg_half_literal)));
 
   // 1 / Sqrt[Var[X] + epsilon].
-  auto rsqrt_var_add_epsilon =
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          operand_shape, HloOpcode::kPower, var_add_epsilon, neg_half));
+  auto rsqrt_var_add_epsilon = add(HloInstruction::CreateBinary(
+      operand_shape, HloOpcode::kPower, var_add_epsilon, neg_half));
 
   // X - E[X].
-  auto operand_minus_mean =
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          operand_shape, HloOpcode::kSubtract, operand, mean_broadcasted));
+  auto operand_minus_mean = add(HloInstruction::CreateBinary(
+      operand_shape, HloOpcode::kSubtract, operand, mean_broadcasted));
 
   // (X - E[X]) / Sqrt[Var[X] + epsilon].
-  auto normalized = computation_->AddInstruction(
+  auto normalized = add(
       HloInstruction::CreateBinary(operand_shape, HloOpcode::kMultiply,
                                    operand_minus_mean, rsqrt_var_add_epsilon));
 
   // (X - E[X]) / Sqrt[Var[X] + epsilon] * scale.
-  auto scaled_normalized =
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          operand_shape, HloOpcode::kMultiply, normalized, scale_broadcasted));
+  auto scaled_normalized = add(HloInstruction::CreateBinary(
+      operand_shape, HloOpcode::kMultiply, normalized, scale_broadcasted));
 
   // (X - E[X]) / Sqrt[Var[X] + epsilon] * scale + offset.
-  auto shifted_normalized = computation_->AddInstruction(
-      HloInstruction::CreateBinary(operand_shape, HloOpcode::kAdd,
-                                   scaled_normalized, offset_broadcasted));
-
-  TF_CHECK_OK(ReplaceWithNewInstruction(
-      batch_norm,
-      HloInstruction::CreateTuple({shifted_normalized, mean, var})));
+  auto shifted_normalized = add(HloInstruction::CreateBinary(
+      operand_shape, HloOpcode::kAdd, scaled_normalized, offset_broadcasted));
+
+  auto tuple = HloInstruction::CreateTuple({shifted_normalized, mean, var});
+
+  if (batch_norm->has_sharding()) {
+    int64 instruction_count_after = computation_->instruction_count();
+    CHECK_EQ(instruction_count_after,
+             instruction_count_before + added_instructions.size());
+    HloSharding operand_sharding =
+        batch_norm->sharding().GetAsShapeTree(batch_norm->shape()).element({0});
+    for (HloInstruction* inst : added_instructions) {
+      if (ShapeUtil::Equal(inst->shape(), operand_shape)) {
+        inst->set_sharding(operand_sharding);
+      } else {
+        inst->set_sharding(HloSharding::Replicate());
+      }
+    }
+    tuple->set_sharding(batch_norm->sharding());
+  }
+  TF_CHECK_OK(ReplaceWithNewInstruction(batch_norm, std::move(tuple)));
   return Status::OK();
 }
 
-Status BatchNormRewriterVisitor::HandleBatchNormInference(
+Status BatchNormExpanderVisitor::HandleBatchNormInference(
     HloInstruction* batch_norm) {
   if (!rewrite_inference_op_) {
     return Status::OK();
@@ -317,58 +331,75 @@ Status BatchNormRewriterVisitor::HandleBatchNormInference(
     }
   }
 
-  auto scale_broadcasted = computation_->AddInstruction(
+  std::vector<HloInstruction*> added_instructions;
+  auto add = [&](std::unique_ptr<HloInstruction> inst) {
+    HloInstruction* added_inst = computation_->AddInstruction(std::move(inst));
+    added_instructions.push_back(added_inst);
+    return added_inst;
+  };
+  int64 instruction_count_before = computation_->instruction_count();
+
+  auto scale_broadcasted = add(
       HloInstruction::CreateBroadcast(operand_shape, scale, {feature_index}));
 
-  auto offset_broadcasted = computation_->AddInstruction(
+  auto offset_broadcasted = add(
       HloInstruction::CreateBroadcast(operand_shape, offset, {feature_index}));
 
-  auto mean_broadcasted = computation_->AddInstruction(
+  auto mean_broadcasted = add(
       HloInstruction::CreateBroadcast(operand_shape, mean, {feature_index}));
 
-  auto var_broadcasted = computation_->AddInstruction(
-      HloInstruction::CreateBroadcast(operand_shape, var, {feature_index}));
+  auto var_broadcasted =
+      add(HloInstruction::CreateBroadcast(operand_shape, var, {feature_index}));
 
   // Var[X] + epsilon.
-  auto var_add_epsilon =
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          operand_shape, HloOpcode::kAdd, var_broadcasted, epsilon));
+  auto var_add_epsilon = add(HloInstruction::CreateBinary(
+      operand_shape, HloOpcode::kAdd, var_broadcasted, epsilon));
 
   auto neg_half_literal = Literal::CreateR0(-0.5f);
   TF_ASSIGN_OR_RETURN(neg_half_literal, neg_half_literal->Convert(ptype));
-  auto neg_half = computation_->AddInstruction(
-      HloInstruction::CreateConstant(std::move(neg_half_literal)));
+  auto neg_half =
+      add(HloInstruction::CreateConstant(std::move(neg_half_literal)));
 
   // 1 / Sqrt[Var[X] + epsilon].
-  auto rsqrt_var_add_epsilon =
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          operand_shape, HloOpcode::kPower, var_add_epsilon, neg_half));
+  auto rsqrt_var_add_epsilon = add(HloInstruction::CreateBinary(
+      operand_shape, HloOpcode::kPower, var_add_epsilon, neg_half));
 
   // X - E[X].
-  auto operand_minus_mean =
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          operand_shape, HloOpcode::kSubtract, operand, mean_broadcasted));
+  auto operand_minus_mean = add(HloInstruction::CreateBinary(
+      operand_shape, HloOpcode::kSubtract, operand, mean_broadcasted));
 
   // (X - E[X]) / Sqrt[Var[X] + epsilon].
-  auto normalized = computation_->AddInstruction(
+  auto normalized = add(
       HloInstruction::CreateBinary(operand_shape, HloOpcode::kMultiply,
                                    operand_minus_mean, rsqrt_var_add_epsilon));
 
   // (X - E[X]) / Sqrt[Var[X] + epsilon] * scale.
-  auto scaled_normalized =
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          operand_shape, HloOpcode::kMultiply, normalized, scale_broadcasted));
+  auto scaled_normalized = add(HloInstruction::CreateBinary(
+      operand_shape, HloOpcode::kMultiply, normalized, scale_broadcasted));
 
   // (X - E[X]) / Sqrt[Var[X] + epsilon] * scale + offset.
   auto shifted_normalized = HloInstruction::CreateBinary(
       operand_shape, HloOpcode::kAdd, scaled_normalized, offset_broadcasted);
 
+  int64 instruction_count_after = computation_->instruction_count();
+  CHECK_EQ(instruction_count_after,
+           instruction_count_before + added_instructions.size());
+  if (batch_norm->has_sharding()) {
+    for (HloInstruction* inst : added_instructions) {
+      if (ShapeUtil::Equal(inst->shape(), operand_shape)) {
+        inst->set_sharding(batch_norm->sharding());
+      } else {
+        inst->set_sharding(HloSharding::Replicate());
+      }
+    }
+    shifted_normalized->set_sharding(batch_norm->sharding());
+  }
   TF_CHECK_OK(
       ReplaceWithNewInstruction(batch_norm, std::move(shifted_normalized)));
   return Status::OK();
 }
 
-Status BatchNormRewriterVisitor::HandleBatchNormGrad(
+Status BatchNormExpanderVisitor::HandleBatchNormGrad(
     HloInstruction* batch_norm) {
   // Use the following formulas to calculate gradients:
   // scale_grad =
@@ -385,6 +416,13 @@ Status BatchNormRewriterVisitor::HandleBatchNormGrad(
   if (!rewrite_grad_op_) {
     return Status::OK();
   }
+  std::vector<HloInstruction*> added_instructions;
+  auto add = [&](std::unique_ptr<HloInstruction> inst) {
+    HloInstruction* added_inst = computation_->AddInstruction(std::move(inst));
+    added_instructions.push_back(added_inst);
+    return added_inst;
+  };
+  int64 instruction_count_before = computation_->instruction_count();
 
   HloInstruction* activation = batch_norm->mutable_operand(0);
   const Shape activation_shape = activation->shape();
@@ -403,23 +441,22 @@ Status BatchNormRewriterVisitor::HandleBatchNormGrad(
       Literal::CreateR0<float>(size_in_elements / feature_count);
   TF_ASSIGN_OR_RETURN(elements_per_feature_literal,
                       elements_per_feature_literal->Convert(ptype));
-  auto elements_per_feature = computation_->AddInstruction(
+  auto elements_per_feature = add(
       HloInstruction::CreateConstant(std::move(elements_per_feature_literal)));
 
   auto zero_literal = Literal::CreateR0(0.0f);
   TF_ASSIGN_OR_RETURN(zero_literal, zero_literal->Convert(ptype));
-  auto zero = computation_->AddInstruction(
-      HloInstruction::CreateConstant(std::move(zero_literal)));
+  auto zero = add(HloInstruction::CreateConstant(std::move(zero_literal)));
 
   auto neg_half_literal = Literal::CreateR0(-0.5f);
   TF_ASSIGN_OR_RETURN(neg_half_literal, neg_half_literal->Convert(ptype));
-  auto neg_half = computation_->AddInstruction(
-      HloInstruction::CreateConstant(std::move(neg_half_literal)));
+  auto neg_half =
+      add(HloInstruction::CreateConstant(std::move(neg_half_literal)));
 
   auto epsilon_literal = Literal::CreateR0(batch_norm->epsilon());
   TF_ASSIGN_OR_RETURN(epsilon_literal, epsilon_literal->Convert(ptype));
-  auto epsilon = computation_->AddInstruction(
-      HloInstruction::CreateConstant(std::move(epsilon_literal)));
+  auto epsilon =
+      add(HloInstruction::CreateConstant(std::move(epsilon_literal)));
 
   std::vector<int64> dimensions_without_feature;
 
@@ -429,141 +466,148 @@ Status BatchNormRewriterVisitor::HandleBatchNormGrad(
     }
   }
 
-  auto scale_broadcasted =
-      computation_->AddInstruction(HloInstruction::CreateBroadcast(
-          activation_shape, scale, {feature_index}));
-  auto variance_broadcasted =
-      computation_->AddInstruction(HloInstruction::CreateBroadcast(
-          activation_shape, variance, {feature_index}));
+  auto scale_broadcasted = add(HloInstruction::CreateBroadcast(
+      activation_shape, scale, {feature_index}));
+  auto variance_broadcasted = add(HloInstruction::CreateBroadcast(
+      activation_shape, variance, {feature_index}));
 
   // E[X].
-  auto mean_broadcasted = computation_->AddInstruction(
+  auto mean_broadcasted = add(
       HloInstruction::CreateBroadcast(activation_shape, mean, {feature_index}));
 
   // rsqrt[Var[X] + epsilon].
-  auto rsqrt_var_add_epsilon_broadcasted =
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          activation_shape, HloOpcode::kPower,
-          computation_->AddInstruction(
-              HloInstruction::CreateBinary(activation_shape, HloOpcode::kAdd,
-                                           variance_broadcasted, epsilon)),
-          neg_half));
-
-  auto rsqrt_var_add_epsilon =
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          feature_shape, HloOpcode::kPower,
-          computation_->AddInstruction(HloInstruction::CreateBinary(
-              feature_shape, HloOpcode::kAdd, variance, epsilon)),
-          neg_half));
+  auto rsqrt_var_add_epsilon_broadcasted = add(HloInstruction::CreateBinary(
+      activation_shape, HloOpcode::kPower,
+      add(HloInstruction::CreateBinary(activation_shape, HloOpcode::kAdd,
+                                       variance_broadcasted, epsilon)),
+      neg_half));
+
+  auto rsqrt_var_add_epsilon = add(HloInstruction::CreateBinary(
+      feature_shape, HloOpcode::kPower,
+      add(HloInstruction::CreateBinary(feature_shape, HloOpcode::kAdd, variance,
+                                       epsilon)),
+      neg_half));
 
   // X - E[X].
-  auto activation_minus_mean = computation_->AddInstruction(
-      HloInstruction::CreateBinary(activation_shape, HloOpcode::kSubtract,
-                                   activation, mean_broadcasted));
+  auto activation_minus_mean = add(HloInstruction::CreateBinary(
+      activation_shape, HloOpcode::kSubtract, activation, mean_broadcasted));
 
   // Grad[Y] * (X - E[X]).
-  auto grad_output_times_activiation_minus_mean = computation_->AddInstruction(
-      HloInstruction::CreateBinary(activation_shape, HloOpcode::kMultiply,
-                                   grad_output, activation_minus_mean));
+  auto grad_output_times_activiation_minus_mean =
+      add(HloInstruction::CreateBinary(activation_shape, HloOpcode::kMultiply,
+                                       grad_output, activation_minus_mean));
 
   HloComputation* add_reduce_computation =
       GetScalarBinaryComputation(ptype, HloOpcode::kAdd);
 
   // sum(Grad[Y] * (X - E[X])).
   auto sum_grad_output_times_activiation_minus_mean =
-      computation_->AddInstruction(HloInstruction::CreateReduce(
+      add(HloInstruction::CreateReduce(
           feature_shape, grad_output_times_activiation_minus_mean, zero,
           dimensions_without_feature, add_reduce_computation));
 
   // Grad[beta] = Sum(Grad[Y]).
-  auto grad_beta = computation_->AddInstruction(HloInstruction::CreateReduce(
+  auto grad_beta = add(HloInstruction::CreateReduce(
       feature_shape, grad_output, zero, dimensions_without_feature,
       add_reduce_computation));
 
-  if (use_fusion_) {
-    auto tuple = computation_->AddInstruction(HloInstruction::CreateTuple(
+  if (use_fusion_ && !batch_norm->has_sharding()) {
+    auto tuple = add(HloInstruction::CreateTuple(
         {sum_grad_output_times_activiation_minus_mean, grad_beta}));
 
     auto fused = computation_->CreateFusionInstruction(
         {tuple, sum_grad_output_times_activiation_minus_mean, grad_beta},
         HloInstruction::FusionKind::kInput);
 
-    sum_grad_output_times_activiation_minus_mean = computation_->AddInstruction(
-        HloInstruction::CreateGetTupleElement(feature_shape, fused, 0));
+    sum_grad_output_times_activiation_minus_mean =
+        add(HloInstruction::CreateGetTupleElement(feature_shape, fused, 0));
 
-    grad_beta = computation_->AddInstruction(
-        HloInstruction::CreateGetTupleElement(feature_shape, fused, 1));
+    grad_beta =
+        add(HloInstruction::CreateGetTupleElement(feature_shape, fused, 1));
   }
 
   // Grad[scale] = Sum(Grad[Y] * (X - E[X]) * rsqrt[Var[X] + epsilon]).
-  auto grad_scale = computation_->AddInstruction(HloInstruction::CreateBinary(
+  auto grad_scale = add(HloInstruction::CreateBinary(
       feature_shape, HloOpcode::kMultiply,
       sum_grad_output_times_activiation_minus_mean, rsqrt_var_add_epsilon));
 
   // I2 = Sum(Grad[Y])
-  auto I2 = computation_->AddInstruction(HloInstruction::CreateBroadcast(
-      activation_shape, grad_beta, {feature_index}));
+  auto i2 = add(HloInstruction::CreateBroadcast(activation_shape, grad_beta,
+                                                {feature_index}));
 
   // I3 = Sum(Grad[Y] * (X - E[X]))
-  auto I3 = computation_->AddInstruction(HloInstruction::CreateBroadcast(
+  auto i3 = add(HloInstruction::CreateBroadcast(
       activation_shape, sum_grad_output_times_activiation_minus_mean,
       {feature_index}));
 
   // I4 = (X - E[X]) * I3
-  auto I4 = computation_->AddInstruction(HloInstruction::CreateBinary(
-      activation_shape, HloOpcode::kMultiply, I3, activation_minus_mean));
+  auto i4 = add(HloInstruction::CreateBinary(
+      activation_shape, HloOpcode::kMultiply, i3, activation_minus_mean));
 
   // I5 = I4 / (Var[X] + epsilon)
-  auto I5 = computation_->AddInstruction(HloInstruction::CreateBinary(
-      activation_shape, HloOpcode::kDivide, I4,
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          activation_shape, HloOpcode::kAdd, variance_broadcasted, epsilon))));
+  auto i5 = add(HloInstruction::CreateBinary(
+      activation_shape, HloOpcode::kDivide, i4,
+      add(HloInstruction::CreateBinary(activation_shape, HloOpcode::kAdd,
+                                       variance_broadcasted, epsilon))));
 
   // scale * rsqrt[Var[X] + epsilon] * 1/N
-  auto scale_times_rsqrt_var_add_epsilon =
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          activation_shape, HloOpcode::kMultiply, scale_broadcasted,
-          rsqrt_var_add_epsilon_broadcasted));
+  auto scale_times_rsqrt_var_add_epsilon = add(HloInstruction::CreateBinary(
+      activation_shape, HloOpcode::kMultiply, scale_broadcasted,
+      rsqrt_var_add_epsilon_broadcasted));
 
-  scale_times_rsqrt_var_add_epsilon =
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          activation_shape, HloOpcode::kDivide,
-          scale_times_rsqrt_var_add_epsilon, elements_per_feature));
+  scale_times_rsqrt_var_add_epsilon = add(HloInstruction::CreateBinary(
+      activation_shape, HloOpcode::kDivide, scale_times_rsqrt_var_add_epsilon,
+      elements_per_feature));
 
-  auto I1 = computation_->AddInstruction(
-      HloInstruction::CreateBinary(activation_shape, HloOpcode::kMultiply,
-                                   grad_output, elements_per_feature));
+  auto i1 =
+      add(HloInstruction::CreateBinary(activation_shape, HloOpcode::kMultiply,
+                                       grad_output, elements_per_feature));
 
   // I6 = I1 - I2 - I5
-  auto I6 = computation_->AddInstruction(HloInstruction::CreateBinary(
+  auto i6 = add(HloInstruction::CreateBinary(
       activation_shape, HloOpcode::kSubtract,
-      computation_->AddInstruction(HloInstruction::CreateBinary(
-          activation_shape, HloOpcode::kSubtract, I1, I2)),
-      I5));
+      add(HloInstruction::CreateBinary(activation_shape, HloOpcode::kSubtract,
+                                       i1, i2)),
+      i5));
 
   // Grad[X] = scale * rsqrt[Var[X] + epsilon] * 1/N * I6.
-  auto grad_activation = computation_->AddInstruction(
-      HloInstruction::CreateBinary(activation_shape, HloOpcode::kMultiply,
-                                   scale_times_rsqrt_var_add_epsilon, I6));
+  auto grad_activation =
+      add(HloInstruction::CreateBinary(activation_shape, HloOpcode::kMultiply,
+                                       scale_times_rsqrt_var_add_epsilon, i6));
+  auto tuple =
+      HloInstruction::CreateTuple({grad_activation, grad_scale, grad_beta});
+  if (batch_norm->has_sharding()) {
+    int64 instruction_count_after = computation_->instruction_count();
+    CHECK_EQ(instruction_count_after,
+             instruction_count_before + added_instructions.size());
+    HloSharding activation_sharding =
+        batch_norm->sharding().GetAsShapeTree(batch_norm->shape()).element({0});
+    for (HloInstruction* inst : added_instructions) {
+      if (ShapeUtil::Equal(inst->shape(), activation_shape)) {
+        inst->set_sharding(activation_sharding);
+      } else {
+        inst->set_sharding(HloSharding::Replicate());
+      }
+    }
+    tuple->set_sharding(batch_norm->sharding());
+  }
 
-  TF_CHECK_OK(ReplaceWithNewInstruction(
-      batch_norm,
-      HloInstruction::CreateTuple({grad_activation, grad_scale, grad_beta})));
+  TF_CHECK_OK(ReplaceWithNewInstruction(batch_norm, std::move(tuple)));
 
   return Status::OK();
 }
 
-StatusOr<bool> BatchNormRewriter::Run(HloModule* module) {
-  XLA_VLOG_LINES(2, "BatchNormRewriter::Run(), before:\n" + module->ToString());
+StatusOr<bool> BatchNormExpander::Run(HloModule* module) {
+  XLA_VLOG_LINES(2, "BatchNormExpander::Run(), before:\n" + module->ToString());
   bool changed = false;
   for (auto* comp : module->MakeNonfusionComputations()) {
-    if (BatchNormRewriterVisitor::Run(comp, rewrite_training_op_,
+    if (BatchNormExpanderVisitor::Run(comp, rewrite_training_op_,
                                       rewrite_inference_op_, rewrite_grad_op_,
                                       use_fusion_)) {
       changed = true;
     }
   }
-  XLA_VLOG_LINES(2, "BatchNormRewriter::Run(), after:\n" + module->ToString());
+  XLA_VLOG_LINES(2, "BatchNormExpander::Run(), after:\n" + module->ToString());
   return changed;
 }
 
diff --git a/tensorflow/compiler/xla/service/batchnorm_rewriter.h b/tensorflow/compiler/xla/service/batchnorm_expander.h
similarity index 83%
rename from tensorflow/compiler/xla/service/batchnorm_rewriter.h
rename to tensorflow/compiler/xla/service/batchnorm_expander.h
index f601741d964376058a2bafade311ede4c8567fd2..4ad987085da91684bb7891070afeefd19be4138f 100644
--- a/tensorflow/compiler/xla/service/batchnorm_rewriter.h
+++ b/tensorflow/compiler/xla/service/batchnorm_expander.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BATCHNORM_REWRITER_H_
-#define TENSORFLOW_COMPILER_XLA_SERVICE_BATCHNORM_REWRITER_H_
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BATCHNORM_EXPANDER_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_BATCHNORM_EXPANDER_H_
 
 #include <utility>
 
@@ -26,18 +26,18 @@ namespace xla {
 // A pass which rewrites batch norm operations into more operations. Breaking a
 // big operation into smaller operations helps leverage our generic fusion
 // logic.
-class BatchNormRewriter : public HloPassInterface {
+class BatchNormExpander : public HloPassInterface {
  public:
   // When use_fusion is set, a multi-output fusion node is created.
-  BatchNormRewriter(bool rewrite_training_op = false,
+  BatchNormExpander(bool rewrite_training_op = false,
                     bool rewrite_inference_op = false,
                     bool rewrite_grad_op = false, bool use_fusion = true)
       : rewrite_training_op_(rewrite_training_op),
         rewrite_inference_op_(rewrite_inference_op),
         rewrite_grad_op_(rewrite_grad_op),
         use_fusion_(use_fusion) {}
-  ~BatchNormRewriter() = default;
-  tensorflow::StringPiece name() const override { return "batchnorm_rewriter"; }
+  ~BatchNormExpander() = default;
+  tensorflow::StringPiece name() const override { return "batchnorm_expander"; }
 
   // Run operation expander on the given computation. Returns whether the
   // computation was changed.
@@ -52,4 +52,4 @@ class BatchNormRewriter : public HloPassInterface {
 
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_BATCHNORM_REWRITER_H_
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_BATCHNORM_EXPANDER_H_
diff --git a/tensorflow/compiler/xla/service/batchnorm_rewriter_test.cc b/tensorflow/compiler/xla/service/batchnorm_expander_test.cc
similarity index 93%
rename from tensorflow/compiler/xla/service/batchnorm_rewriter_test.cc
rename to tensorflow/compiler/xla/service/batchnorm_expander_test.cc
index 590f79aee51ccf410823b91fd8ad09fc7c429c7d..aa36e64b07099a372dab67babc7a18a2d39596bc 100644
--- a/tensorflow/compiler/xla/service/batchnorm_rewriter_test.cc
+++ b/tensorflow/compiler/xla/service/batchnorm_expander_test.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/batchnorm_rewriter.h"
+#include "tensorflow/compiler/xla/service/batchnorm_expander.h"
 
 #include <memory>
 #include <utility>
@@ -36,10 +36,10 @@ limitations under the License.
 namespace xla {
 namespace {
 
-using BatchNormRewriterTest = HloTestBase;
+using BatchNormExpanderTest = HloTestBase;
 
 // Test that we expand BatchNormTraining.
-TEST_F(BatchNormRewriterTest, BatchNormTraining) {
+TEST_F(BatchNormExpanderTest, BatchNormTraining) {
   Shape input_shape = ShapeUtil::MakeShape(F32, {2, 2, 2, 2});
   Shape scale_shape = ShapeUtil::MakeShape(F32, {2});
   Shape offset_shape = ShapeUtil::MakeShape(F32, {2});
@@ -63,7 +63,7 @@ TEST_F(BatchNormRewriterTest, BatchNormTraining) {
   auto computation = module->AddEntryComputation(builder.Build());
   HloInstruction* root = computation->root_instruction();
   EXPECT_EQ(root->opcode(), HloOpcode::kBatchNormTraining);
-  BatchNormRewriter rewriter(/*rewrite_training_op=*/true,
+  BatchNormExpander rewriter(/*rewrite_training_op=*/true,
                              /*rewrite_inference_op=*/true,
                              /*rewrite_grad_op=*/true);
   ASSERT_TRUE(rewriter.Run(module.get()).ValueOrDie());
@@ -73,7 +73,7 @@ TEST_F(BatchNormRewriterTest, BatchNormTraining) {
 }
 
 // Test that we expand BatchNormGrad.
-TEST_F(BatchNormRewriterTest, BatchNormGrad) {
+TEST_F(BatchNormExpanderTest, BatchNormGrad) {
   Shape input_shape = ShapeUtil::MakeShape(F32, {2, 2, 2, 2});
   Shape scale_shape = ShapeUtil::MakeShape(F32, {2});
   Shape mean_shape = ShapeUtil::MakeShape(F32, {2});
@@ -105,7 +105,7 @@ TEST_F(BatchNormRewriterTest, BatchNormGrad) {
   auto computation = module->AddEntryComputation(builder.Build());
   HloInstruction* root = computation->root_instruction();
   EXPECT_EQ(root->opcode(), HloOpcode::kBatchNormGrad);
-  BatchNormRewriter rewriter(/*rewrite_training_op=*/true,
+  BatchNormExpander rewriter(/*rewrite_training_op=*/true,
                              /*rewrite_inference_op=*/true,
                              /*rewrite_grad_op=*/true);
   ASSERT_TRUE(rewriter.Run(module.get()).ValueOrDie());
diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc
index 19a9ff04def5fc3d0b3739bbcf546a74114759a6..33fe11b81db1a1db40285d5c77d8900722025d1c 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment.cc
+++ b/tensorflow/compiler/xla/service/buffer_assignment.cc
@@ -73,9 +73,10 @@ void BufferAllocation::AddAssignment(const LogicalBuffer& buffer, int64 offset,
   CHECK_LE(offset, size_) << "LogicalBuffer " << buffer
                           << " offset out of range";
   CHECK_LE(offset + size, size_)
-      << "LogicalBuffer " << buffer << " size out of range";
+      << "LogicalBuffer " << buffer
+      << " size out of range at offset: " << offset << " with size: " << size;
   CHECK_EQ(buffer.color(), color())
-      << "Buffer color " << buffer.color()
+      << "Buffer color " << buffer.color() << " for buffer " << buffer
       << " does not match allocation color " << color() << ".";
   OffsetSize offset_size;
   offset_size.offset = offset;
@@ -581,6 +582,7 @@ Status GatherComputationsByAllocationType(
            instruction->called_computations()) {
         switch (instruction->opcode()) {
           case HloOpcode::kCall:
+          case HloOpcode::kConditional:
           case HloOpcode::kWhile:
             // Call and while must be called from a computation with global
             // allocations as they may return references to buffers inside the
@@ -976,8 +978,8 @@ Status BufferAssigner::AssignBuffersWithSequentialOrdering(
   const HloOrdering& hlo_ordering = assignment->liveness().hlo_ordering();
   if (run_whole_module_heap_simulation) {
     // Run the heap simulation over the whole module. This reduces memory usage,
-    // since buffers for kCall and kWhile sub-computations are only live for the
-    // duration of their calling instructions.
+    // since buffers for kCall, kWhile, and kConditional sub-computations are
+    // only live for the duration of their calling instructions.
     VLOG(1) << "Running whole-module heap simulation";
     SequentialHloOrdering::HloModuleSequence module_sequence;
     FlatSet<const LogicalBuffer*> all_buffers_to_assign;
@@ -1272,7 +1274,8 @@ const LogicalBuffer* AddBufferToColocatedSet(
 }  // namespace
 
 // Builds sets of buffers in 'colocated_buffer_sets' which should be colocated
-// in the same allocation (currently just supports kWhile and kCall).
+// in the same allocation (currently just supports kWhile, kCall, and
+// kConditional).
 void BufferAssigner::BuildColocatedBufferSets(
     const HloModule* module, const BufferLiveness& buffer_liveness,
     const LogicalBuffer::SizeFunction& buffer_size,
@@ -1336,6 +1339,26 @@ void BufferAssigner::BuildColocatedBufferSets(
                                       &colocated_set);
               AddSetToColocatedBufferSets(colocated_set, colocated_buffer_sets);
             });
+      } else if (opcode == HloOpcode::kConditional) {
+        const HloInstruction* conditional_hlo = instruction;
+        ShapeUtil::ForEachSubshape(
+            conditional_hlo->shape(),
+            [this, conditional_hlo, &points_to_analysis, colocated_buffer_sets](
+                const Shape& /*subshape*/, const ShapeIndex& index) {
+              std::vector<const LogicalBuffer*> colocated_set;
+              // Add conditional.result.
+              AddBufferToColocatedSet(conditional_hlo, index,
+                                      points_to_analysis, &colocated_set);
+              // Add conditional.true_computation.root.
+              AddBufferToColocatedSet(
+                  conditional_hlo->true_computation()->root_instruction(),
+                  index, points_to_analysis, &colocated_set);
+              // Add conditional.false_computation.root.
+              AddBufferToColocatedSet(
+                  conditional_hlo->false_computation()->root_instruction(),
+                  index, points_to_analysis, &colocated_set);
+              AddSetToColocatedBufferSets(colocated_set, colocated_buffer_sets);
+            });
       }
     }
   }
@@ -1363,14 +1386,15 @@ void BufferAssigner::AssignColocatedBufferSets(
     }
 
     for (const LogicalBuffer* buffer : colocated_buffer_set) {
+      const int64 buffer_size = assignment->buffer_size_(*buffer);
       if (allocation == nullptr) {
         // TODO(b/32491382) Avoid current trivial solution of using new
         // allocations for each colocated buffer set. When liveness has
         // module-level scope, we can allow buffers to be shared across
         // computations (in some cases).
-        allocation = assignment->NewAllocation(
-            *buffer, assignment->buffer_size_(*buffer),
-            /*is_thread_local=*/false, /*is_reusable=*/true);
+        allocation = assignment->NewAllocation(*buffer, buffer_size,
+                                               /*is_thread_local=*/false,
+                                               /*is_reusable=*/true);
         if (entry_parameter_number >= 0) {
           // This colocated buffer set contains an entry parameter and other
           // logical buffers which use the parameter as read-only in a while
@@ -1381,8 +1405,11 @@ void BufferAssigner::AssignColocatedBufferSets(
         }
         colocated_allocations->insert(allocation->index());
       } else {
+        CHECK_EQ(buffer_size, allocation->size())
+            << "Buffer: " << *buffer << " size mismatch in colocated buffer "
+            << "allocation: " << *allocation;
         assignment->AddAssignment(allocation, *buffer, /*offset=*/0,
-                                  assignment->buffer_size_(*buffer));
+                                  buffer_size);
       }
       colocated_buffers->insert(buffer);
     }
diff --git a/tensorflow/compiler/xla/service/buffer_assignment_test.cc b/tensorflow/compiler/xla/service/buffer_assignment_test.cc
index 8fba8ef5e5c799eaac429017f4a0ff6a0315ba7c..6fc9d783f1b34de8c0f93c6aa342591891d08eaf 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/buffer_assignment_test.cc
@@ -166,6 +166,15 @@ class BufferAssignmentTest : public HloTestBase {
     return builder.Build();
   }
 
+  std::unique_ptr<HloComputation> BuildR0F32UnaryOpComputation(
+      HloOpcode opcode, const string& name) {
+    auto builder = HloComputation::Builder(name);
+    auto param =
+        builder.AddInstruction(HloInstruction::CreateParameter(0, r0f32_, "x"));
+    builder.AddInstruction(HloInstruction::CreateUnary(r0f32_, opcode, param));
+    return builder.Build();
+  }
+
   // Verifies that the given instruction hlo has a valid input buffer assigned,
   // i.e., the parameter number matches the op's.
   const BufferAllocation& GetAssignedInputAllocation(
@@ -740,6 +749,56 @@ TEST_F(BufferAssignmentTest, ExampleWhile) {
             << " instructions; total buffer size " << size0 + sizec + sizeb;
 }
 
+TEST_F(BufferAssignmentTest, ExampleConditional) {
+  auto module = CreateNewModule();
+  auto true_computation = module->AddEmbeddedComputation(
+      BuildR0F32UnaryOpComputation(HloOpcode::kCeil, "Ceil"));
+  auto false_computation = module->AddEmbeddedComputation(
+      BuildR0F32UnaryOpComputation(HloOpcode::kFloor, "Floor"));
+
+  auto builder = HloComputation::Builder(TestName());
+  auto pred = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<bool>(false)));
+  auto const1 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(56.4f)));
+  auto const2 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(12.4f)));
+  auto conditional = builder.AddInstruction(HloInstruction::CreateConditional(
+      r0f32_, pred, const1, true_computation, const2, false_computation));
+  module->AddEntryComputation(builder.Build());
+
+  const std::vector<const HloInstruction*> conditional_instrs =
+      GetInstructions(conditional);
+  const std::vector<const HloInstruction*> true_instrs =
+      GetInstructions(true_computation->root_instruction());
+  const std::vector<const HloInstruction*> false_instrs =
+      GetInstructions(false_computation->root_instruction());
+  EXPECT_EQ(4, conditional_instrs.size());
+  EXPECT_EQ(2, true_instrs.size());
+  EXPECT_EQ(2, false_instrs.size());
+
+  auto buffers = RunBufferAssignment(module.get());
+  ValidateBuffers(conditional_instrs, *buffers);
+  ValidateBuffers(true_instrs, *buffers);
+  ValidateBuffers(false_instrs, *buffers);
+
+  EXPECT_FALSE(BuffersDistinct(conditional_instrs, true_instrs, *buffers))
+      << "Should be reuse between conditional and true computation.";
+  EXPECT_FALSE(BuffersDistinct(conditional_instrs, false_instrs, *buffers))
+      << "Should be reuse between conditional and false computation.";
+  EXPECT_FALSE(BuffersDistinct(true_instrs, false_instrs, *buffers))
+      << "Should be reuse between true and false computations.";
+
+  const BufferAllocation& conditional_buffer =
+      GetTopLevelAllocation(*buffers, conditional);
+  const BufferAllocation& true_buffer =
+      GetTopLevelAllocation(*buffers, true_computation->root_instruction());
+  const BufferAllocation& false_buffer =
+      GetTopLevelAllocation(*buffers, false_computation->root_instruction());
+  EXPECT_EQ(conditional_buffer.size(), true_buffer.size());
+  EXPECT_EQ(conditional_buffer.size(), false_buffer.size());
+}
+
 TEST_F(BufferAssignmentTest, UnaryOpReuseChain) {
   // param0[100] ---> (exp) ---> (tanh) ---> (exp) ---> (neg)
   auto builder = HloComputation::Builder(TestName());
@@ -1360,10 +1419,13 @@ TEST_F(BufferAssignmentTest, OneTempAllocation) {
       HloInstruction::CreateParameter(1, shape_3x4, "param_b"));
   auto param_c = builder.AddInstruction(
       HloInstruction::CreateParameter(2, shape_4x4, "param_c"));
-  auto dot_ab = builder.AddInstruction(HloInstruction::CreateBinary(
-      shape_2x4, HloOpcode::kDot, param_a, param_b));
-  auto dot_bc = builder.AddInstruction(HloInstruction::CreateBinary(
-      shape_3x4, HloOpcode::kDot, param_b, param_c));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  auto dot_ab = builder.AddInstruction(
+      HloInstruction::CreateDot(shape_2x4, param_a, param_b, dot_dnums));
+  auto dot_bc = builder.AddInstruction(
+      HloInstruction::CreateDot(shape_3x4, param_b, param_c, dot_dnums));
   builder.AddInstruction(
       HloInstruction::CreateConcatenate(shape_5x4, {dot_ab, dot_bc}, 1));
 
@@ -1708,9 +1770,8 @@ TEST_F(WhileBufferAssignmentTest, WhileLoopsInterferingResultRange) {
       BufferAssigner::Run(
           module.get(),
           xla::MakeUnique<SequentialHloOrdering>(module.get(), sequence),
-          ByteSizeOf,
-          [](LogicalBuffer::Color) { return 1; })
-      .ConsumeValueOrDie();
+          ByteSizeOf, [](LogicalBuffer::Color) { return 1; })
+          .ConsumeValueOrDie();
 
   EXPECT_TRUE(BuffersDistinct({while0}, {while1}, *assignment));
 }
diff --git a/tensorflow/compiler/xla/service/buffer_liveness.cc b/tensorflow/compiler/xla/service/buffer_liveness.cc
index 513bfa3b7f7b45696093d03c1dd8250c548d260a..e7749252ce44f0daf7016f72d80401695eaaacb9 100644
--- a/tensorflow/compiler/xla/service/buffer_liveness.cc
+++ b/tensorflow/compiler/xla/service/buffer_liveness.cc
@@ -102,8 +102,8 @@ bool BufferLiveness::live_range_strictly_before(const LogicalBuffer& a,
     return false;
   }
 
-  // Every user of 'a' must be a predecessor of 'b' or 'b' itself.
   for (const BufferAlias& alias : points_to_analysis_->GetBufferAliases(a)) {
+    // Every user of 'a' must be a predecessor of 'b' or 'b' itself.
     for (auto user : alias.instruction()->users()) {
       if (DoesNotUseOperandBuffer(alias.instruction(), alias.index(), user,
                                   points_to_analysis())) {
@@ -114,6 +114,16 @@ bool BufferLiveness::live_range_strictly_before(const LogicalBuffer& a,
         return false;
       }
     }
+
+    // If the root instruction aliases the buffer 'a', the live range of 'a' is
+    // until the end of the computation and can never be strictly before another
+    // buffer. This is needed to prevent the root instruction's buffers from
+    // being reused by later instructions even when the root is not the last
+    // instruction in the schedule.
+    if (alias.instruction()->parent()->root_instruction() ==
+        alias.instruction()) {
+      return false;
+    }
   }
 
   // If 'b' is a user of 'a' then the buffers interfere unless 'a.instruction'
diff --git a/tensorflow/compiler/xla/service/buffer_liveness_test.cc b/tensorflow/compiler/xla/service/buffer_liveness_test.cc
index bbb42d494b8003176d4911bacbe8a10dc5fc7c6a..f623aef67a4f98b447a9a15634a78deb60cfe6f1 100644
--- a/tensorflow/compiler/xla/service/buffer_liveness_test.cc
+++ b/tensorflow/compiler/xla/service/buffer_liveness_test.cc
@@ -167,11 +167,10 @@ TEST_F(BufferLivenessTest, MultipleEntryParameters_Sequential) {
 
   SequentialHloOrdering::HloModuleSequence sequence;
   sequence.insert({entry, {param0, negate, param1, exp, add}});
-  auto liveness = BufferLiveness::Run(
-                      module.get(),
-                      xla::MakeUnique<SequentialHloOrdering>(
-                          module.get(), sequence))
-                      .ConsumeValueOrDie();
+  auto liveness =
+      BufferLiveness::Run(module.get(), xla::MakeUnique<SequentialHloOrdering>(
+                                            module.get(), sequence))
+          .ConsumeValueOrDie();
 
   // Entry parameters interfere as if they are defined simultaneously at
   // the very beginning.
@@ -296,7 +295,7 @@ TEST_F(BufferLivenessTest, OverlappedBuffersSequentialOrder) {
   module_sequence.emplace(computation, order);
   auto liveness =
       BufferLiveness::Run(module.get(), xla::MakeUnique<SequentialHloOrdering>(
-          module.get(), module_sequence))
+                                            module.get(), module_sequence))
           .ConsumeValueOrDie();
 
   EXPECT_TRUE(InstructionsMayInterfere(*liveness, param, negate));
@@ -312,6 +311,48 @@ TEST_F(BufferLivenessTest, OverlappedBuffersSequentialOrder) {
   EXPECT_FALSE(InstructionsMayInterfere(*liveness, add, exp));
 }
 
+TEST_F(BufferLivenessTest, RootInstructionIsNotLastInSequentialOrder) {
+  // Tests that when the root instruction is not the last instruction in the
+  // schedule, the live range of its buffers interfere with the buffers of the
+  // later instructions.
+  //
+  // Two sets of independent instructions are executed in the computation.
+  // param --> add (root)
+  // recv --> recv-done --> send --> send-done
+  //
+  // Sequential order:
+  //  param, add (root), recv, recv-done, send, send-done
+  auto builder = HloComputation::Builder(TestName());
+  auto param =
+      builder.AddInstruction(HloInstruction::CreateParameter(0, vec_, "param"));
+  auto add = builder.AddInstruction(
+      HloInstruction::CreateBinary(vec_, HloOpcode::kAdd, param, param));
+  auto recv = builder.AddInstruction(
+      HloInstruction::CreateRecv(vec_, /*channel_id=*/0));
+  auto recv_done = builder.AddInstruction(HloInstruction::CreateRecvDone(recv));
+  auto send = builder.AddInstruction(
+      HloInstruction::CreateSend(recv_done, /*channel_id=*/1));
+  auto send_done = builder.AddInstruction(HloInstruction::CreateSendDone(send));
+
+  auto module = CreateNewModule();
+  auto computation = module->AddEntryComputation(builder.Build(add));
+
+  SequentialHloOrdering::HloModuleSequence module_sequence;
+  std::vector<const HloInstruction*> order = {param,     add,  recv,
+                                              recv_done, send, send_done};
+  module_sequence.emplace(computation, order);
+  auto liveness =
+      BufferLiveness::Run(module.get(), xla::MakeUnique<SequentialHloOrdering>(
+                                            module.get(), module_sequence))
+          .ConsumeValueOrDie();
+
+  EXPECT_FALSE(InstructionsMayInterfere(*liveness, param, add));
+  // Check the root instruction (add) buffer interferes with the recv buffer.
+  EXPECT_TRUE(
+      liveness->MayInterfere(GetBuffer(*liveness, add, /*index=*/{}),
+                             GetBuffer(*liveness, recv, /*index=*/{0})));
+}
+
 TEST_F(BufferLivenessTest, TupleLiveOut) {
   // Verify MaybeLiveOut with nested tuples. Result of computation looks like:
   //
@@ -625,9 +666,8 @@ class FusedDynamicUpdateSliceLivenessTest : public BufferLivenessTest {
 
     // Run BufferLiveness on 'module'.
     auto liveness =
-        BufferLiveness::Run(module.get(),
-                            xla::MakeUnique<DependencyHloOrdering>(
-                                module.get()))
+        BufferLiveness::Run(
+            module.get(), xla::MakeUnique<DependencyHloOrdering>(module.get()))
             .ConsumeValueOrDie();
     // Return whether or not buffers interference is detected between
     // 'tuple_param0' and 'tuple_root' at shape index '{1}'.
@@ -738,9 +778,8 @@ class DynamicUpdateSliceLivenessTest : public BufferLivenessTest {
     module->AddEmbeddedComputation(builder.Build());
     // Run BufferLiveness on 'module'.
     auto liveness =
-        BufferLiveness::Run(module.get(),
-                            xla::MakeUnique<DependencyHloOrdering>(
-                                module.get()))
+        BufferLiveness::Run(
+            module.get(), xla::MakeUnique<DependencyHloOrdering>(module.get()))
             .ConsumeValueOrDie();
     // Return whether or not buffers interference is detected between
     // 'tuple_param0' and 'tuple_root' at shape index '{1}'.
diff --git a/tensorflow/compiler/xla/service/call_graph.cc b/tensorflow/compiler/xla/service/call_graph.cc
index 1adecdb939cb2c1259003d3be2c90b5a299b0f30..13eb02ca012f44b2b5ed7c6f5becb7d54b07c33c 100644
--- a/tensorflow/compiler/xla/service/call_graph.cc
+++ b/tensorflow/compiler/xla/service/call_graph.cc
@@ -54,6 +54,7 @@ std::ostream& operator<<(std::ostream& out, const CallContext& context) {
 CallContext GetInstructionCallContext(const HloInstruction* instruction) {
   switch (instruction->opcode()) {
     case HloOpcode::kCall:
+    case HloOpcode::kConditional:
     case HloOpcode::kWhile:
       return CallContext::kSequential;
     case HloOpcode::kMap:
diff --git a/tensorflow/compiler/xla/service/call_graph_test.cc b/tensorflow/compiler/xla/service/call_graph_test.cc
index 0395ea8c8b52315f7ca2221f412750ebadda2dd8..1ea7d538cd515c3098b6a1f03c6146d288330406 100644
--- a/tensorflow/compiler/xla/service/call_graph_test.cc
+++ b/tensorflow/compiler/xla/service/call_graph_test.cc
@@ -34,12 +34,13 @@ using ::testing::UnorderedElementsAre;
 class CallGraphTest : public HloTestBase {
  protected:
   // Build and return a trivial computation taking and returning a scalar.
-  std::unique_ptr<HloComputation> MakeScalarComputation() {
+  std::unique_ptr<HloComputation> MakeScalarComputation(
+      HloOpcode opcode = HloOpcode::kNegate) {
     HloComputation::Builder builder(TestName() + ".ScalarComputation");
     HloInstruction* param0 = builder.AddInstruction(
         HloInstruction::CreateParameter(0, kScalarShape, "param0"));
     builder.AddInstruction(
-        HloInstruction::CreateUnary(kScalarShape, HloOpcode::kNegate, param0));
+        HloInstruction::CreateUnary(kScalarShape, opcode, param0));
     return builder.Build();
   }
 
@@ -236,6 +237,54 @@ TEST_F(CallGraphTest, ContextBothComputations) {
   EXPECT_EQ(CallContext::kBoth, sub_node.context());
 }
 
+TEST_F(CallGraphTest, ComputationWithConditional) {
+  // Test a call graph of a module with a conditional.
+  auto module = CreateNewModule();
+  HloComputation* true_computation =
+      module->AddEmbeddedComputation(MakeScalarComputation(HloOpcode::kCeil));
+  HloComputation* false_computation =
+      module->AddEmbeddedComputation(MakeScalarComputation(HloOpcode::kFloor));
+
+  HloComputation::Builder builder(TestName());
+  HloInstruction* pred = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<bool>(false)));
+  HloInstruction* const1 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(56.4f)));
+  HloInstruction* const2 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(12.6f)));
+  HloInstruction* conditional =
+      builder.AddInstruction(HloInstruction::CreateConditional(
+          kScalarShape, pred, const1, true_computation, const2,
+          false_computation));
+  HloComputation* entry_computation =
+      module->AddEntryComputation(builder.Build());
+
+  std::unique_ptr<CallGraph> call_graph = CallGraph::Build(module.get());
+
+  EXPECT_EQ(3, call_graph->nodes().size());
+
+  const CallGraphNode& entry_node = call_graph->GetNode(entry_computation);
+  EXPECT_EQ(entry_computation, entry_node.computation());
+  EXPECT_EQ(1, entry_node.callsites().size());
+
+  const CallSite& conditional_callsite = entry_node.callsites()[0];
+  EXPECT_EQ(conditional, conditional_callsite.instruction());
+  EXPECT_THAT(conditional_callsite.called_computations(),
+              UnorderedElementsAre(true_computation, false_computation));
+  EXPECT_EQ(CallContext::kSequential, conditional_callsite.context());
+  EXPECT_EQ(entry_node.GetCallSite(conditional), &conditional_callsite);
+
+  const CallGraphNode& true_node = call_graph->GetNode(true_computation);
+  EXPECT_TRUE(true_node.callees().empty());
+  EXPECT_EQ(1, true_node.callers().size());
+  EXPECT_EQ(entry_computation, true_node.callers()[0]);
+
+  const CallGraphNode& false_node = call_graph->GetNode(false_computation);
+  EXPECT_TRUE(false_node.callees().empty());
+  EXPECT_EQ(1, false_node.callers().size());
+  EXPECT_EQ(entry_computation, false_node.callers()[0]);
+}
+
 TEST_F(CallGraphTest, ComplexGraph) {
   // Test a call graph of a module with several computation called in various
   // contexts. The call graph looks like:
diff --git a/tensorflow/compiler/xla/service/call_inliner.cc b/tensorflow/compiler/xla/service/call_inliner.cc
index 3aa7f5c4d5829ccc0e8df697c1363754128ff436..482ccc5b67109258f544e5657ecfa0e8f62192c0 100644
--- a/tensorflow/compiler/xla/service/call_inliner.cc
+++ b/tensorflow/compiler/xla/service/call_inliner.cc
@@ -82,6 +82,10 @@ class SubcomputationInsertionVisitor : public DfsHloVisitorWithDefault {
     return outer_->ReplaceInstruction(call_, new_root);
   }
 
+  CallInliner::InlinedInstructionMap ConsumeInstructionMap() {
+    return std::move(subcomputation_hlo_to_new_hlo_);
+  }
+
  private:
   // Resolves the callee subcomputation_hlo to the new (inline) HLO in the
   // caller computation, or returns a NotFound error if that subcomputation HLO
@@ -112,13 +116,13 @@ class SubcomputationInsertionVisitor : public DfsHloVisitorWithDefault {
 
   HloInstruction* call_;
   HloComputation* outer_;
-  std::unordered_map<HloInstruction*, HloInstruction*>
-      subcomputation_hlo_to_new_hlo_;
+  CallInliner::InlinedInstructionMap subcomputation_hlo_to_new_hlo_;
 };
 
 }  // namespace
 
-/* static */ Status CallInliner::Inline(HloInstruction* call) {
+/* static */ StatusOr<CallInliner::InlinedInstructionMap> CallInliner::Inline(
+    HloInstruction* call) {
   TF_RET_CHECK(call->opcode() == HloOpcode::kCall)
       << "Instruction was not a call op: " << call->opcode();
   const auto& callees = call->called_computations();
@@ -126,7 +130,8 @@ class SubcomputationInsertionVisitor : public DfsHloVisitorWithDefault {
   HloComputation* callee = callees[0];
   // We visit the callee, cloning its body into its caller.
   SubcomputationInsertionVisitor visitor(call);
-  return callee->Accept(&visitor);
+  TF_RETURN_IF_ERROR(callee->Accept(&visitor));
+  return visitor.ConsumeInstructionMap();
 }
 
 StatusOr<bool> CallInliner::Run(HloModule* module) {
@@ -140,7 +145,7 @@ StatusOr<bool> CallInliner::Run(HloModule* module) {
           VLOG(1) << "Visiting callsite: " << callsite.ToString();
           if (callsite.instruction()->opcode() == HloOpcode::kCall) {
             HloInstruction* call = callsite.instruction();
-            TF_RETURN_IF_ERROR(Inline(call));
+            TF_RETURN_IF_ERROR(Inline(call).status());
             did_mutate = true;
           }
         }
diff --git a/tensorflow/compiler/xla/service/call_inliner.h b/tensorflow/compiler/xla/service/call_inliner.h
index 2dbd38bf1ac90d3efa1453e6af6f791668d5e72a..a8345a394d46c90a48305313dac0bcd9b06938ac 100644
--- a/tensorflow/compiler/xla/service/call_inliner.h
+++ b/tensorflow/compiler/xla/service/call_inliner.h
@@ -27,8 +27,12 @@ namespace xla {
 // called function, and proceed recursively.
 class CallInliner : public HloPassInterface {
  public:
-  // Inlines one call instruction.
-  static Status Inline(HloInstruction* call);
+  using InlinedInstructionMap =
+      std::unordered_map<HloInstruction*, HloInstruction*>;
+
+  // Inlines one call instruction.  Returns a mapping from the original
+  // instructions to their inlined versions.
+  static StatusOr<InlinedInstructionMap> Inline(HloInstruction* call);
 
   ~CallInliner() override = default;
   tensorflow::StringPiece name() const override { return "CallInliner"; }
diff --git a/tensorflow/compiler/xla/service/call_inliner_test.cc b/tensorflow/compiler/xla/service/call_inliner_test.cc
index 865ed993da121d26ceb61123f1822d93814cbb9b..738d00881dd057fc13c115006c15e8f5b6d14a1d 100644
--- a/tensorflow/compiler/xla/service/call_inliner_test.cc
+++ b/tensorflow/compiler/xla/service/call_inliner_test.cc
@@ -135,7 +135,7 @@ TEST_F(CallInlinerTest, InlineWithoutRunningPass) {
       HloInstruction::CreateCall(pred, {}, false_computation));
   auto computation = module->AddEntryComputation(call_false_builder.Build());
 
-  TF_ASSERT_OK(CallInliner::Inline(call));
+  TF_ASSERT_OK(CallInliner::Inline(call).status());
   EXPECT_THAT(computation->root_instruction(), op::Constant());
   EXPECT_THAT(computation->root_instruction()->control_successors(),
               ElementsAre(op::Constant()));
diff --git a/tensorflow/compiler/xla/service/copy_insertion_test.cc b/tensorflow/compiler/xla/service/copy_insertion_test.cc
index 3278fd5f064902459ded4d9367b5390cf8a63f27..128ee726ea6e4a8b63727fdc9762d865cee1c985 100644
--- a/tensorflow/compiler/xla/service/copy_insertion_test.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion_test.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_matchers.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/service/hlo_runner.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/test.h"
 #include "tensorflow/compiler/xla/test_helpers.h"
@@ -339,7 +340,7 @@ TEST_F(CopyInsertionTest, ElementOfNestedTupleParameter) {
            ShapeUtil::MakeShape(F32, {42})}),
       "param0"));
 
-  // The return value of the computation is the zero-th elemnt of the nested
+  // The return value of the computation is the zero-th element of the nested
   // tuple. This element is itself a tuple.
   auto gte = builder.AddInstruction(HloInstruction::CreateGetTupleElement(
       ShapeUtil::GetSubshape(param->shape(), {0}), param, 0));
@@ -1726,5 +1727,189 @@ void BM_ParallelWhiles(int num_iters, int num_whiles) {
 BENCHMARK(BM_SequentialWhiles)->Arg(512)->Arg(1024)->Arg(2048)->Arg(4096);
 BENCHMARK(BM_ParallelWhiles)->Arg(512)->Arg(1024)->Arg(2048)->Arg(4096);
 
+TEST_F(CopyInsertionTest, SimpleControlFlowTest) {
+  const string& hlo_string = R"(
+HloModule TestModule
+
+if-body.v5 {
+  constant.3 = s32[] constant(-1)
+  p.1 = (s32[], (s32[], s32[], s32[]), (s32[])) parameter(0)
+  get-tuple-element.18 = (s32[], s32[], s32[]) get-tuple-element(p.1), index=1
+  get-tuple-element.65 = s32[] get-tuple-element(get-tuple-element.18), index=0
+  get-tuple-element.66 = s32[] get-tuple-element(get-tuple-element.18), index=1
+  add.3 = s32[] add(get-tuple-element.65, get-tuple-element.66)
+  tuple.33 = (s32[]) tuple(add.3)
+  ROOT tuple.34 = (s32[], (s32[], s32[], s32[]), (s32[])) tuple(constant.3, get-tuple-element.18, tuple.33)
+}
+
+if-condition.v4 {
+  p.2 = (s32[], (s32[], s32[], s32[]), (s32[])) parameter(0)
+  get-tuple-element.67 = s32[] get-tuple-element(p.2), index=0
+  constant.4 = s32[] constant(0)
+  ROOT equal-to = pred[] equal-to(get-tuple-element.67, constant.4)
+}
+
+_functionalize_body_1__.v28 {
+  arg_tuple.4 = (s32[], s32[], s32[], s32[]) parameter(0)
+  get-tuple-element.68 = s32[] get-tuple-element(arg_tuple.4), index=0
+  constant.7 = s32[] constant(1)
+  add.4 = s32[] add(get-tuple-element.68, constant.7)
+  get-tuple-element.69 = s32[] get-tuple-element(arg_tuple.4), index=1
+  get-tuple-element.70 = s32[] get-tuple-element(arg_tuple.4), index=2
+  less-than-or-equal-to = pred[] less-than-or-equal-to(get-tuple-element.69, get-tuple-element.70)
+  constant.8 = s32[] constant(0)
+  select = s32[] select(less-than-or-equal-to, constant.8, constant.7)
+  get-tuple-element.71 = s32[] get-tuple-element(arg_tuple.4), index=3
+  tuple.35 = (s32[], s32[], s32[]) tuple(get-tuple-element.69, get-tuple-element.71, get-tuple-element.70)
+  tuple.36 = (s32[]) tuple(constant.8)
+  tuple.37 = (s32[], (s32[], s32[], s32[]), (s32[])) tuple(select, tuple.35, tuple.36)
+  while = (s32[], (s32[], s32[], s32[]), (s32[])) while(tuple.37), condition=if-condition.v4, body=if-body.v5
+  get-tuple-element.72 = (s32[]) get-tuple-element(while), index=2
+  get-tuple-element.73 = s32[] get-tuple-element(get-tuple-element.72), index=0
+  ROOT tuple.38 = (s32[], s32[], s32[], s32[]) tuple(add.4, get-tuple-element.69, get-tuple-element.70, get-tuple-element.73)
+}
+
+cond_wrapper.v3.1 {
+  inputs.1 = (s32[], s32[], s32[], s32[]) parameter(0)
+  get-tuple-element.75 = s32[] get-tuple-element(inputs.1), index=0
+  constant.11 = s32[] constant(7)
+  ROOT less-than.2 = pred[] less-than(get-tuple-element.75, constant.11)
+}
+
+_functionalize_body_2__.v25 {
+  arg_tuple.5 = (s32[], s32[], s32[], s32[], s32[]) parameter(0)
+  get-tuple-element.76 = s32[] get-tuple-element(arg_tuple.5), index=0
+  get-tuple-element.77 = s32[] get-tuple-element(arg_tuple.5), index=2
+  get-tuple-element.78 = s32[] get-tuple-element(arg_tuple.5), index=3
+  get-tuple-element.79 = s32[] get-tuple-element(arg_tuple.5), index=4
+  tuple.39 = (s32[], s32[], s32[], s32[]) tuple(get-tuple-element.76, get-tuple-element.77, get-tuple-element.78, get-tuple-element.79)
+  while.2 = (s32[], s32[], s32[], s32[]) while(tuple.39), condition=cond_wrapper.v3.1, body=_functionalize_body_1__.v28
+  get-tuple-element.80 = s32[] get-tuple-element(while.2), index=0
+  get-tuple-element.81 = s32[] get-tuple-element(arg_tuple.5), index=1
+  constant.12 = s32[] constant(1)
+  add.5 = s32[] add(get-tuple-element.81, constant.12)
+  get-tuple-element.82 = s32[] get-tuple-element(while.2), index=3
+  ROOT tuple.40 = (s32[], s32[], s32[], s32[], s32[]) tuple(get-tuple-element.80, add.5, get-tuple-element.77, get-tuple-element.78, get-tuple-element.82)
+}
+
+cond_wrapper.v3.2 {
+  inputs.2 = (s32[], s32[], s32[], s32[], s32[]) parameter(0)
+  get-tuple-element.83 = s32[] get-tuple-element(inputs.2), index=1
+  constant.13 = s32[] constant(5)
+  ROOT less-than.3 = pred[] less-than(get-tuple-element.83, constant.13)
+}
+
+ENTRY TestComputation {
+  arg_tuple.6 = (s32[], s32[], s32[], s32[], s32[]) parameter(0)
+  ROOT while.3 = (s32[], s32[], s32[], s32[], s32[]) while(arg_tuple.6), condition=cond_wrapper.v3.2, body=_functionalize_body_2__.v25
+}
+)";
+  auto module_or_status =
+      HloRunner::CreateModuleFromString(hlo_string, GetDebugOptionsForTest());
+  auto module = module_or_status.ConsumeValueOrDie();
+  InsertCopies(module.get());
+}
+
+TEST_F(CopyInsertionTest, ControlFlowTest) {
+  const string& hlo_string = R"(
+HloModule TestModule
+
+if-body.v5 {
+  constant.3 = s32[] constant(-1)
+  p.1 = (s32[], (s32[], s32[], s32[]), (s32[])) parameter(0)
+  get-tuple-element.18 = (s32[], s32[], s32[]) get-tuple-element(p.1), index=1
+  get-tuple-element.65 = s32[] get-tuple-element(get-tuple-element.18), index=0
+  get-tuple-element.66 = s32[] get-tuple-element(get-tuple-element.18), index=1
+  add.3 = s32[] add(get-tuple-element.65, get-tuple-element.66)
+  tuple.33 = (s32[]) tuple(add.3)
+  ROOT tuple.34 = (s32[], (s32[], s32[], s32[]), (s32[])) tuple(constant.3, get-tuple-element.18, tuple.33)
+}
+
+if-condition.v4 {
+  p.2 = (s32[], (s32[], s32[], s32[]), (s32[])) parameter(0)
+  get-tuple-element.67 = s32[] get-tuple-element(p.2), index=0
+  constant.4 = s32[] constant(0)
+  ROOT equal-to = pred[] equal-to(get-tuple-element.67, constant.4)
+}
+
+if-body.v5.1 {
+  constant.5 = s32[] constant(-1)
+  p.3 = (s32[], (s32[], s32[], s32[]), (s32[])) parameter(0)
+  get-tuple-element.68 = (s32[], s32[], s32[]) get-tuple-element(p.3), index=1
+  get-tuple-element.70 = s32[] get-tuple-element(get-tuple-element.68), index=2
+  multiply.1 = s32[] multiply(get-tuple-element.70, get-tuple-element.70)
+  tuple.35 = (s32[]) tuple(multiply.1)
+  ROOT tuple.36 = (s32[], (s32[], s32[], s32[]), (s32[])) tuple(constant.5, get-tuple-element.68, tuple.35)
+}
+
+if-condition.v4.1 {
+  p.4 = (s32[], (s32[], s32[], s32[]), (s32[])) parameter(0)
+  get-tuple-element.71 = s32[] get-tuple-element(p.4), index=0
+  constant.6 = s32[] constant(1)
+  ROOT equal-to.1 = pred[] equal-to(get-tuple-element.71, constant.6)
+}
+
+_functionalize_body_1__.v28 {
+  arg_tuple.4 = (s32[], s32[], s32[], s32[]) parameter(0)
+  get-tuple-element.72 = s32[] get-tuple-element(arg_tuple.4), index=0
+  constant.7 = s32[] constant(1)
+  add.4 = s32[] add(get-tuple-element.72, constant.7)
+  get-tuple-element.73 = s32[] get-tuple-element(arg_tuple.4), index=1
+  get-tuple-element.74 = s32[] get-tuple-element(arg_tuple.4), index=2
+  less-than-or-equal-to = pred[] less-than-or-equal-to(get-tuple-element.73, get-tuple-element.74)
+  constant.8 = s32[] constant(0)
+  select = s32[] select(less-than-or-equal-to, constant.8, constant.7)
+  get-tuple-element.75 = s32[] get-tuple-element(arg_tuple.4), index=3
+  tuple.37 = (s32[], s32[], s32[]) tuple(get-tuple-element.73, get-tuple-element.75, get-tuple-element.74)
+  tuple.38 = (s32[]) tuple(constant.8)
+  tuple.39 = (s32[], (s32[], s32[], s32[]), (s32[])) tuple(select, tuple.37, tuple.38)
+  while = (s32[], (s32[], s32[], s32[]), (s32[])) while(tuple.39), condition=if-condition.v4, body=if-body.v5
+  while.1 = (s32[], (s32[], s32[], s32[]), (s32[])) while(while), condition=if-condition.v4.1, body=if-body.v5.1
+  get-tuple-element.76 = (s32[]) get-tuple-element(while.1), index=2
+  get-tuple-element.77 = s32[] get-tuple-element(get-tuple-element.76), index=0
+  ROOT tuple.40 = (s32[], s32[], s32[], s32[]) tuple(add.4, get-tuple-element.73, get-tuple-element.74, get-tuple-element.77)
+}
+
+cond_wrapper.v3.1 {
+  inputs.1 = (s32[], s32[], s32[], s32[]) parameter(0)
+  get-tuple-element.78 = s32[] get-tuple-element(inputs.1), index=0
+  constant.11 = s32[] constant(7)
+  ROOT less-than.2 = pred[] less-than(get-tuple-element.78, constant.11)
+}
+
+_functionalize_body_2__.v25 {
+  arg_tuple.5 = (s32[], s32[], s32[], s32[], s32[]) parameter(0)
+  get-tuple-element.79 = s32[] get-tuple-element(arg_tuple.5), index=0
+  get-tuple-element.80 = s32[] get-tuple-element(arg_tuple.5), index=2
+  get-tuple-element.81 = s32[] get-tuple-element(arg_tuple.5), index=3
+  get-tuple-element.82 = s32[] get-tuple-element(arg_tuple.5), index=4
+  tuple.41 = (s32[], s32[], s32[], s32[]) tuple(get-tuple-element.79, get-tuple-element.80, get-tuple-element.81, get-tuple-element.82)
+  while.2 = (s32[], s32[], s32[], s32[]) while(tuple.41), condition=cond_wrapper.v3.1, body=_functionalize_body_1__.v28
+  get-tuple-element.83 = s32[] get-tuple-element(while.2), index=0
+  get-tuple-element.84 = s32[] get-tuple-element(arg_tuple.5), index=1
+  constant.12 = s32[] constant(1)
+  add.5 = s32[] add(get-tuple-element.84, constant.12)
+  get-tuple-element.85 = s32[] get-tuple-element(while.2), index=3
+  ROOT tuple.42 = (s32[], s32[], s32[], s32[], s32[]) tuple(get-tuple-element.83, add.5, get-tuple-element.80, get-tuple-element.81, get-tuple-element.85)
+}
+
+cond_wrapper.v3.2 {
+  inputs.2 = (s32[], s32[], s32[], s32[], s32[]) parameter(0)
+  get-tuple-element.86 = s32[] get-tuple-element(inputs.2), index=1
+  constant.13 = s32[] constant(5)
+  ROOT less-than.3 = pred[] less-than(get-tuple-element.86, constant.13)
+}
+
+ENTRY TestComputation {
+  arg_tuple.6 = (s32[], s32[], s32[], s32[], s32[]) parameter(0)
+  ROOT while.3 = (s32[], s32[], s32[], s32[], s32[]) while(arg_tuple.6), condition=cond_wrapper.v3.2, body=_functionalize_body_2__.v25
+}
+)";
+  auto module_or_status =
+      HloRunner::CreateModuleFromString(hlo_string, GetDebugOptionsForTest());
+  auto module = module_or_status.ConsumeValueOrDie();
+  InsertCopies(module.get());
+}
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index e1eed498f6adfdae9df1dbf183f7c0505afd4ea2..2f0259163120dd5d62a5d1289deada8dc59c2c6c 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -81,14 +81,15 @@ cc_library(
         ":conv_canonicalization",
         ":cpu_copy_insertion",
         ":cpu_executable",
+        ":cpu_hlo_support_checker",
         ":cpu_instruction_fusion",
+        ":cpu_layout_assignment",
         ":cpu_options",
         ":cpu_parallelization_preparation",
         ":disassembler",
         ":dot_op_emitter",
         ":ir_emission_utils",
         ":ir_emitter",
-        ":layout_assignment",
         ":parallel_cpu_executable",
         ":parallel_task_assignment",
         ":simple_orc_jit",
@@ -100,16 +101,18 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/compiler/xla/service:algebraic_simplifier",
-        "//tensorflow/compiler/xla/service:batchnorm_rewriter",
+        "//tensorflow/compiler/xla/service:batchnorm_expander",
         "//tensorflow/compiler/xla/service:buffer_assignment",
         "//tensorflow/compiler/xla/service:buffer_liveness",
         "//tensorflow/compiler/xla/service:call_inliner",
+        "//tensorflow/compiler/xla/service:dot_decomposer",
         "//tensorflow/compiler/xla/service:executable",
         "//tensorflow/compiler/xla/service:flatten_call_graph",
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/compiler/xla/service:hlo_constant_folding",
         "//tensorflow/compiler/xla/service:hlo_cse",
         "//tensorflow/compiler/xla/service:hlo_dce",
+        "//tensorflow/compiler/xla/service:hlo_element_type_converter",
         "//tensorflow/compiler/xla/service:hlo_ordering",
         "//tensorflow/compiler/xla/service:hlo_pass",
         "//tensorflow/compiler/xla/service:hlo_pass_pipeline",
@@ -124,7 +127,9 @@ cc_library(
         "//tensorflow/compiler/xla/service:reshape_mover",
         "//tensorflow/compiler/xla/service:transpose_folding",
         "//tensorflow/compiler/xla/service:tuple_simplifier",
+        "//tensorflow/compiler/xla/service:while_loop_invariant_code_motion",
         "//tensorflow/compiler/xla/service:while_loop_simplifier",
+        "//tensorflow/compiler/xla/service:zero_sized_hlo_elimination",
         "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",  # fixdeps: keep
         "//tensorflow/core:lib",  # fixdeps: keep
         "//tensorflow/core:stream_executor_no_cuda",
@@ -135,8 +140,6 @@ cc_library(
         "@llvm//:core",
         "@llvm//:mc",  # fixdeps: keep
         "@llvm//:object",
-        "@llvm//:powerpc_code_gen",  # fixdeps: keep
-        "@llvm//:powerpc_disassembler",  # fixdeps: keep
         "@llvm//:support",
         "@llvm//:target",  # fixdeps: keep
         "@llvm//:x86_code_gen",  # fixdeps: keep
@@ -147,7 +150,11 @@ cc_library(
 
 cc_library(
     name = "simple_orc_jit",
-    srcs = ["simple_orc_jit.cc"],
+    srcs = [
+        "simple_orc_jit.cc",
+        "windows_compatibility.cc",
+        "windows_compatibility.h",
+    ],
     hdrs = ["simple_orc_jit.h"],
     deps = [
         ":compiler_functor",
@@ -160,6 +167,7 @@ cc_library(
         ":external_constant_pool",
         ":orc_jit_memory_mapper",
         ":runtime_conv2d",
+        ":runtime_fft",
         ":runtime_fork_join",
         ":runtime_matmul",
         ":runtime_single_threaded_conv2d",
@@ -250,8 +258,11 @@ cc_library(
         ":dot_op_emitter",
         ":external_constant_pool",
         ":ir_emission_utils",
+        ":ir_function",
+        ":parallel_loop_emitter",
         ":shape_partition",
         ":simple_orc_jit",
+        ":target_machine_features",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/compiler/xla:statusor",
@@ -280,6 +291,54 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "target_machine_features",
+    srcs = [
+        "target_machine_features.cc",
+    ],
+    hdrs = ["target_machine_features.h"],
+    deps = [
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/core:lib",
+        "@llvm//:analysis",
+        "@llvm//:target",
+    ],
+)
+
+cc_library(
+    name = "ir_function",
+    srcs = ["ir_function.cc"],
+    hdrs = ["ir_function.h"],
+    deps = [
+        ":ir_emission_utils",
+        ":shape_partition",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:status_macros",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla/service/cpu:cpu_runtime",
+        "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
+        "//tensorflow/core:lib",
+        "@llvm//:core",
+    ],
+)
+
+cc_library(
+    name = "parallel_loop_emitter",
+    srcs = ["parallel_loop_emitter.cc"],
+    hdrs = ["parallel_loop_emitter.h"],
+    deps = [
+        ":ir_emission_utils",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/service/llvm_ir:ir_array",
+        "//tensorflow/compiler/xla/service/llvm_ir:llvm_loop",
+        "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
+        "//tensorflow/compiler/xla/service/llvm_ir:loop_emitter",
+        "//tensorflow/core:lib",
+        "@llvm//:core",
+    ],
+)
+
 cc_library(
     name = "dot_op_emitter",
     srcs = ["dot_op_emitter.cc"],
@@ -287,6 +346,8 @@ cc_library(
     deps = [
         ":cpu_options",
         ":cpu_runtime",
+        ":target_machine_features",
+        ":vector_support_library",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/compiler/xla:types",
@@ -298,7 +359,6 @@ cc_library(
         "//tensorflow/compiler/xla/service/llvm_ir:kernel_support_library",
         "//tensorflow/compiler/xla/service/llvm_ir:llvm_loop",
         "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
-        "//tensorflow/compiler/xla/service/llvm_ir:vector_support_library",
         "//tensorflow/core:lib",
         "@llvm//:core",
     ],
@@ -336,7 +396,6 @@ cc_library(
         "@llvm//:mc",
         "@llvm//:mc_disassembler",
         "@llvm//:object",
-        "@llvm//:powerpc_disassembler",  # fixdeps: keep
         "@llvm//:support",
         "@llvm//:target",
         "@llvm//:x86_disassembler",  # fixdeps: keep
@@ -462,6 +521,24 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "runtime_fft",
+    srcs = [
+        "runtime_fft.cc",
+        "runtime_fft_impl.h",
+    ],
+    hdrs = ["runtime_fft.h"],
+    copts = runtime_copts(),
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow/compiler/xla:executable_run_options",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:framework_lite",
+        "//third_party/eigen3",
+    ],
+)
+
 cc_library(
     name = "runtime_matvec",
     srcs = ["runtime_matvec.cc"],
@@ -615,13 +692,14 @@ cc_library(
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:window_util",
         "//tensorflow/compiler/xla/service:hlo",
+        "@llvm//:core",
     ],
 )
 
 cc_library(
-    name = "layout_assignment",
-    srcs = ["layout_assignment.cc"],
-    hdrs = ["layout_assignment.h"],
+    name = "cpu_layout_assignment",
+    srcs = ["cpu_layout_assignment.cc"],
+    hdrs = ["cpu_layout_assignment.h"],
     deps = [
         ":dot_op_emitter",
         ":ir_emission_utils",
@@ -633,11 +711,11 @@ cc_library(
 )
 
 tf_cc_test(
-    name = "layout_assignment_test",
+    name = "cpu_layout_assignment_test",
     size = "small",
-    srcs = ["layout_assignment_test.cc"],
+    srcs = ["cpu_layout_assignment_test.cc"],
     deps = [
-        ":layout_assignment",
+        ":cpu_layout_assignment",
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:shape_layout",
         "//tensorflow/compiler/xla:shape_util",
@@ -763,6 +841,20 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "vector_support_library",
+    srcs = ["vector_support_library.cc"],
+    hdrs = ["vector_support_library.h"],
+    deps = [
+        ":target_machine_features",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
+        "@llvm//:core",
+    ],
+)
+
 tf_cc_test(
     name = "cpu_copy_insertion_test",
     srcs = ["cpu_copy_insertion_test.cc"],
@@ -783,6 +875,32 @@ tf_cc_test(
     ],
 )
 
+cc_library(
+    name = "cpu_hlo_support_checker",
+    srcs = ["cpu_hlo_support_checker.cc"],
+    hdrs = ["cpu_hlo_support_checker.h"],
+    deps = [
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/service:hlo_pass",
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_cc_test(
+    name = "cpu_hlo_support_checker_test",
+    srcs = ["cpu_hlo_support_checker_test.cc"],
+    deps = [
+        ":cpu_hlo_support_checker",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+    ],
+)
+
 # -----------------------------------------------------------------------------
 
 filegroup(
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index addd7284c593f3dcdd86b1745f9aef7b6a1c30c6..f0507982b3749b179dbd7d76c46d39a209640661 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -31,6 +31,7 @@ limitations under the License.
 #include "llvm/IR/Function.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Verifier.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -42,7 +43,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/protobuf_util.h"
 #include "tensorflow/compiler/xla/ptr_util.h"
 #include "tensorflow/compiler/xla/service/algebraic_simplifier.h"
-#include "tensorflow/compiler/xla/service/batchnorm_rewriter.h"
+#include "tensorflow/compiler/xla/service/batchnorm_expander.h"
 #include "tensorflow/compiler/xla/service/buffer_assignment.h"
 #include "tensorflow/compiler/xla/service/buffer_liveness.h"
 #include "tensorflow/compiler/xla/service/call_inliner.h"
@@ -50,24 +51,27 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/cpu/conv_canonicalization.h"
 #include "tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.h"
 #include "tensorflow/compiler/xla/service/cpu/cpu_executable.h"
+#include "tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h"
 #include "tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.h"
+#include "tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h"
 #include "tensorflow/compiler/xla/service/cpu/cpu_options.h"
 #include "tensorflow/compiler/xla/service/cpu/cpu_parallelization_preparation.h"
 #include "tensorflow/compiler/xla/service/cpu/disassembler.h"
 #include "tensorflow/compiler/xla/service/cpu/dot_op_emitter.h"
 #include "tensorflow/compiler/xla/service/cpu/ir_emission_utils.h"
 #include "tensorflow/compiler/xla/service/cpu/ir_emitter.h"
-#include "tensorflow/compiler/xla/service/cpu/layout_assignment.h"
 #include "tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h"
 #include "tensorflow/compiler/xla/service/cpu/parallel_task_assignment.h"
 #include "tensorflow/compiler/xla/service/cpu/simple_orc_jit.h"
 #include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h"
+#include "tensorflow/compiler/xla/service/dot_decomposer.h"
 #include "tensorflow/compiler/xla/service/flatten_call_graph.h"
 #include "tensorflow/compiler/xla/service/hlo.pb.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_constant_folding.h"
 #include "tensorflow/compiler/xla/service/hlo_cse.h"
 #include "tensorflow/compiler/xla/service/hlo_dce.h"
+#include "tensorflow/compiler/xla/service/hlo_element_type_converter.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/service/hlo_ordering.h"
@@ -83,7 +87,9 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/reshape_mover.h"
 #include "tensorflow/compiler/xla/service/transpose_folding.h"
 #include "tensorflow/compiler/xla/service/tuple_simplifier.h"
+#include "tensorflow/compiler/xla/service/while_loop_invariant_code_motion.h"
 #include "tensorflow/compiler/xla/service/while_loop_simplifier.h"
+#include "tensorflow/compiler/xla/service/zero_sized_hlo_elimination.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
@@ -149,11 +155,6 @@ CpuCompiler::CpuCompiler() {
   LLVMInitializeAArch64TargetMC();
   LLVMInitializeAArch64AsmPrinter();
   LLVMInitializeAArch64Disassembler();
-  LLVMInitializePowerPCTarget();
-  LLVMInitializePowerPCTargetInfo();
-  LLVMInitializePowerPCTargetMC();
-  LLVMInitializePowerPCAsmPrinter();
-  LLVMInitializePowerPCDisassembler();
 }
 
 namespace {
@@ -166,42 +167,16 @@ namespace {
 // first module is compiled.
 std::once_flag llvm_command_line_options_initialized;
 
-void InitializeLLVMCommandLineOptions(const HloModuleConfig& config) {
-  auto options = config.debug_options().xla_backend_extra_options();
-  if (!options.empty()) {
-    std::vector<string> fake_argv_storage;
-    fake_argv_storage.push_back("");
-    for (const auto& it : options) {
-      // Skip options the XLA backend itself consumes.
-      if (!tensorflow::StringPiece(it.first).starts_with("xla_")) {
-        if (it.second.empty()) {
-          fake_argv_storage.push_back(it.first);
-        } else {
-          fake_argv_storage.push_back(it.first + "=" + it.second);
-        }
-      }
-    }
-
-    VLOG(2) << "Passing argv to LLVM:";
-    std::vector<const char*> fake_argv;
-    for (const auto& s : fake_argv_storage) {
-      fake_argv.push_back(s.c_str());
-      VLOG(2) << s;
-    }
-    llvm::cl::ParseCommandLineOptions(fake_argv.size(), &fake_argv[0]);
-  }
-}
-
 // This visitor records which HLO instructions should have profiling information
 // recorded.
 class CollectProfileCandidates : public DfsHloVisitorWithDefault {
  public:
-  static StatusOr<std::unordered_map<const HloInstruction*, size_t>>
+  static StatusOr<std::unordered_map<const HloInstruction*, int64>>
   GetCandidatesForComputation(
       HloComputation* computation,
       const std::unordered_map<const HloInstruction*, int64>&
           assigned_indices) {
-    std::unordered_map<const HloInstruction*, size_t> hlo_to_profile_idx;
+    std::unordered_map<const HloInstruction*, int64> hlo_to_profile_idx;
     CollectProfileCandidates profile_candidates_for_computation(
         &hlo_to_profile_idx, assigned_indices);
     TF_RETURN_IF_ERROR(
@@ -211,7 +186,7 @@ class CollectProfileCandidates : public DfsHloVisitorWithDefault {
 
  private:
   CollectProfileCandidates(
-      std::unordered_map<const HloInstruction*, size_t>* hlo_to_profile_idx,
+      std::unordered_map<const HloInstruction*, int64>* hlo_to_profile_idx,
       const std::unordered_map<const HloInstruction*, int64>& assigned_indices)
       : hlo_to_profile_idx_(hlo_to_profile_idx),
         assigned_indices_(assigned_indices) {}
@@ -251,7 +226,7 @@ class CollectProfileCandidates : public DfsHloVisitorWithDefault {
     return Status::OK();
   }
 
-  std::unordered_map<const HloInstruction*, size_t>* hlo_to_profile_idx_;
+  std::unordered_map<const HloInstruction*, int64>* hlo_to_profile_idx_;
   const std::unordered_map<const HloInstruction*, int64>& assigned_indices_;
 };
 }  // namespace
@@ -259,7 +234,8 @@ class CollectProfileCandidates : public DfsHloVisitorWithDefault {
 Status CpuCompiler::RunHloPasses(HloModule* module, bool is_aot_compile) {
   // Optimization pipeline.
   HloPassPipeline pipeline("CPU");
-  pipeline.AddInvariantChecker<HloVerifier>(ShapeSizeBytesFunction());
+  pipeline.AddInvariantChecker<HloVerifier>();
+  pipeline.AddPass<CpuHloSupportChecker>();
 
   ReducePrecisionInsertion::AddPasses(
       &pipeline, module->config().debug_options(),
@@ -272,14 +248,14 @@ Status CpuCompiler::RunHloPasses(HloModule* module, bool is_aot_compile) {
   // TODO(b/65775800): Fix wrong output bug in Call and remove the CallInliner
   // pass.
   pipeline.AddPass<CallInliner>();
-
+  pipeline.AddPass<DotDecomposer>();
   pipeline.AddPass<ConvCanonicalization>();
   {
     auto& pass =
         pipeline.AddPass<HloPassFix<HloPassPipeline>>("simplification");
-    pass.AddInvariantChecker<HloVerifier>(ShapeSizeBytesFunction());
+    pass.AddInvariantChecker<HloVerifier>();
 
-    pass.AddPass<BatchNormRewriter>(
+    pass.AddPass<BatchNormExpander>(
         /*rewrite_training_op=*/true,
         /*rewrite_inference_op=*/true,
         /*rewrite_grad_op=*/true,
@@ -288,6 +264,12 @@ Status CpuCompiler::RunHloPasses(HloModule* module, bool is_aot_compile) {
         /*is_layout_sensitive=*/false,
         [](const Shape&, const Shape&) { return false; },
         /*enable_dot_strength_reduction=*/false);
+
+    // BatchNormExpander can create zero-sized ops, so zero-sized HLO
+    // elimination has to come after that pass.
+    pipeline.AddPass<ZeroSizedHloElimination>();
+
+    pass.AddPass<WhileLoopInvariantCodeMotion>();
     pass.AddPass<TupleSimplifier>();
     pass.AddPass<WhileLoopSimplifier>();
     pass.AddPass<HloDCE>();
@@ -318,6 +300,7 @@ Status CpuCompiler::RunHloPasses(HloModule* module, bool is_aot_compile) {
       [](const Shape&, const Shape&) { return true; },
       /*enable_dot_strength_reduction=*/false);
   pipeline.AddPass<HloCSE>(/*is_layout_sensitive=*/true);
+  pipeline.AddPass<HloElementTypeConverter>(BF16, F32);
   // Outline ops in the entry computation into calls to subcomputations.
   const int max_parallelism =
       module->config().intra_op_parallelism_threads() > 0
@@ -435,6 +418,21 @@ Status InitializeModuleHooks(
   return Status::OK();
 }
 
+Status VerifyLlvmModule(const llvm::Module& llvm_module) {
+  XLA_SCOPED_LOGGING_TIMER("CpuCompiler - Running LLVM verifier");
+
+  std::string err;
+  llvm::raw_string_ostream err_stream(err);
+
+  // verifyModule() returns true if the module is broken.
+  TF_RET_CHECK(!llvm::verifyModule(llvm_module, &err_stream))
+      << "Invalid LLVM IR before optimizations:\n"
+      << err_stream.str()
+      << "\nThis probably indicates a bug in the HLO -> LLVM IR lowering. "
+         "Rerun with --xla_dump_ir_to to get the IR. ";
+  return Status::OK();
+}
+
 }  // namespace
 
 StatusOr<std::unique_ptr<HloModule>> CpuCompiler::RunHloPasses(
@@ -460,7 +458,7 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
   VLOG(1) << "Compiling: " << module->name();
   TF_RET_CHECK(stream_exec != nullptr);
   std::call_once(llvm_command_line_options_initialized,
-                 &InitializeLLVMCommandLineOptions, module->config());
+                 &llvm_ir::InitializeLLVMCommandLineOptions, module->config());
 
   ModuleHook pre_optimization_ir_hook;
   ModuleHook post_optimization_ir_hook;
@@ -483,17 +481,19 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
   llvm_module->setDataLayout(jit->data_layout());
   llvm_module->setTargetTriple(jit->target_triple().getTriple());
 
-  HloComputation* computation = module->entry_computation();
-  std::unordered_map<const HloInstruction*, size_t> hlo_to_profile_idx;
+  HloComputation* entry_computation = module->entry_computation();
+  std::unordered_map<const HloInstruction*, int64> instruction_to_profile_idx;
+  std::unordered_map<const HloComputation*, int64> computation_to_profile_idx;
   std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map;
   std::unique_ptr<HloProfilePrinter> hlo_profile_printer;
   if (module->config().hlo_profiling_enabled()) {
     hlo_profile_index_map = MakeUnique<HloProfileIndexMap>(*module);
 
     TF_ASSIGN_OR_RETURN(
-        hlo_to_profile_idx,
+        instruction_to_profile_idx,
         CollectProfileCandidates::GetCandidatesForComputation(
-            computation, hlo_profile_index_map->instruction_to_profile_idx()));
+            entry_computation,
+            hlo_profile_index_map->instruction_to_profile_idx()));
 
     auto shape_size_bytes = [](const Shape& shape) {
       // On the cpu, opaques are pointers.
@@ -504,8 +504,11 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
     };
 
     HloCostAnalysis cost_analysis(shape_size_bytes);
+    TF_RETURN_IF_ERROR(entry_computation->Accept(&cost_analysis));
     hlo_profile_printer =
         CreateHloProfilePrinter(*hlo_profile_index_map, cost_analysis);
+    computation_to_profile_idx =
+        hlo_profile_index_map->computation_to_profile_idx();
   }
 
   std::unique_ptr<Executable> cpu_executable;
@@ -528,9 +531,9 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
     // uses data dependencies for determining order.
     TF_ASSIGN_OR_RETURN(
         std::unique_ptr<BufferAssignment> assignment,
-        BufferAssigner::Run(module.get(),
-                            xla::MakeUnique<DependencyHloOrdering>(module.get()),
-                            BufferSizeBytesFunction(), memory_alignment));
+        BufferAssigner::Run(
+            module.get(), xla::MakeUnique<DependencyHloOrdering>(module.get()),
+            BufferSizeBytesFunction(), memory_alignment));
     // BufferAssignment::ToString() includes a header, so no need for us to
     // print one ourselves.
     XLA_VLOG_LINES(2, assignment->ToString());
@@ -546,7 +549,7 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
     std::map<HloComputation*, HloInstruction*> parallel_computations;
     std::unordered_map<const HloInstruction*, std::unique_ptr<unsigned char[]>>
         aligned_constants;
-    for (auto instruction : computation->MakeInstructionPostOrder()) {
+    for (auto instruction : entry_computation->MakeInstructionPostOrder()) {
       // Parameters and constants don't get their own computation.
       if (instruction->opcode() == HloOpcode::kParameter) {
         continue;
@@ -554,7 +557,7 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
       if (instruction->opcode() == HloOpcode::kConstant) {
         // Copy the constant out of the ProtocolBuffer so that we can give it a
         // higher alignment.
-        const void* data = instruction->literal().InternalData();
+        const void* data = instruction->literal().untyped_data();
         int64 size = CpuExecutable::ShapeSizeBytes(instruction->shape());
         auto iter = aligned_constants.emplace(
             instruction, xla::MakeUnique<unsigned char[]>(size));
@@ -571,22 +574,15 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
       parallel_computations.emplace(to_apply, instruction);
     }
 
-    // We always profile the entire computation as a whole, even if hlo
-    // profiling is disabled.  When hlo profiling is diabled, we pass in a
-    // profile counter array of just one element, which corresponds to the whole
-    // computation.
-    size_t entry_computation_profile_idx =
-        hlo_profile_index_map ? hlo_profile_index_map->GetProfileIndexFor(
-                                    *module->entry_computation())
-                              : 0;
     IrEmitter ir_emitter(*module, *assignment, llvm_module.get(),
-                         hlo_to_profile_idx, entry_computation_profile_idx,
+                         std::move(instruction_to_profile_idx),
+                         std::move(computation_to_profile_idx),
                          jit->target_machine(), jit->external_constant_pool());
 
     std::unique_ptr<HloInstructionMap<string>> function_names(
         new HloInstructionMap<string>());
     for (auto embedded_computation :
-         computation->MakeEmbeddedComputationsList()) {
+         entry_computation->MakeEmbeddedComputationsList()) {
       if (embedded_computation->IsFusionComputation()) {
         continue;
       }
@@ -600,7 +596,7 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
           llvm::Function * ir_function,
           ir_emitter.EmitComputation(
               embedded_computation, embedded_computation->name(),
-              /*is_entry_computation=*/computation_is_parallel,
+              /*is_top_level_computation=*/computation_is_parallel,
               /*instruction_order=*/nullptr));
       // If this computation is parallel, remember it in the function name map.
       // This way we know what function to execute when we try to run code for
@@ -616,6 +612,7 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
     if (embed_ir_in_executable) {
       ir_module_string = llvm_ir::DumpModuleToString(*llvm_module);
     }
+    TF_RETURN_IF_ERROR(VerifyLlvmModule(*llvm_module));
 
     // JIT compile the LLVM IR module to in-memory machine code.
     jit->AddModule(std::move(llvm_module));
@@ -642,10 +639,10 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
     // temporary buffers are required to run the computation.
     TF_ASSIGN_OR_RETURN(
         std::unique_ptr<BufferAssignment> assignment,
-        BufferAssigner::Run(
-            module.get(),
-            xla::MakeUnique<SequentialHloOrdering>(module.get(), module_sequence),
-            BufferSizeBytesFunction(), memory_alignment));
+        BufferAssigner::Run(module.get(),
+                            xla::MakeUnique<SequentialHloOrdering>(
+                                module.get(), module_sequence),
+                            BufferSizeBytesFunction(), memory_alignment));
     // BufferAssignment::ToString() includes a header, so no need for us to
     // print one ourselves.
     XLA_VLOG_LINES(2, assignment->ToString());
@@ -655,14 +652,6 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
       TF_RETURN_IF_ERROR(protobuf_util::DumpProtoToDirectory(
           proto, xla_dump_hlo_proto_to, module->name()));
     }
-    // We always profile the entire computation as a whole, even if hlo
-    // profiling is disabled.  When hlo profiling is diabled, we pass in a
-    // profile counter array of just one element, which corresponds to the whole
-    // computation.
-    size_t entry_computation_profile_idx =
-        hlo_profile_index_map ? hlo_profile_index_map->GetProfileIndexFor(
-                                    *module->entry_computation())
-                              : 0;
 
     // Each computation is a single function.  Emit all embedded computations
     // before the entry computation. The order of computations returned from
@@ -670,11 +659,12 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
     // before a caller computation.
 
     IrEmitter ir_emitter(*module, *assignment, llvm_module.get(),
-                         hlo_to_profile_idx, entry_computation_profile_idx,
+                         std::move(instruction_to_profile_idx),
+                         std::move(computation_to_profile_idx),
                          jit->target_machine(), jit->external_constant_pool());
 
     for (auto embedded_computation :
-         computation->MakeEmbeddedComputationsList()) {
+         entry_computation->MakeEmbeddedComputationsList()) {
       if (embedded_computation->IsFusionComputation()) {
         continue;
       }
@@ -682,23 +672,27 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
           ir_emitter
               .EmitComputation(embedded_computation,
                                embedded_computation->name(),
-                               /*is_entry_computation=*/false,
+                               /*is_top_level_computation=*/false,
                                &module_sequence.at(embedded_computation))
               .status());
     }
-    string function_name_prefix =
-        computation->name().empty() ? "__compute" : computation->name();
+    string function_name_prefix = entry_computation->name().empty()
+                                      ? "__compute"
+                                      : entry_computation->name();
     TF_ASSIGN_OR_RETURN(
         llvm::Function * entry_function,
-        ir_emitter.EmitComputation(computation, function_name_prefix,
-                                   /*is_entry_computation=*/true,
-                                   &module_sequence.at(computation)));
+        ir_emitter.EmitComputation(entry_computation, function_name_prefix,
+                                   /*is_top_level_computation=*/true,
+                                   &module_sequence.at(entry_computation)));
 
     string function_name = llvm_ir::AsString(entry_function->getName());
     string ir_module_string;
     if (embed_ir_in_executable) {
       ir_module_string = llvm_ir::DumpModuleToString(*llvm_module);
     }
+    TF_RETURN_IF_ERROR(VerifyLlvmModule(*llvm_module));
+
+    XLA_VLOG_LINES(2, "LLVM IR:\n" + llvm_ir::DumpModuleToString(*llvm_module));
 
     // JIT compile the LLVM IR module to in-memory machine code.
     jit->AddModule(std::move(llvm_module));
@@ -721,7 +715,8 @@ CpuCompiler::CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
                                 const AotCompilationOptions& aot_options) {
   TF_RET_CHECK(!modules.empty());
   std::call_once(llvm_command_line_options_initialized,
-                 &InitializeLLVMCommandLineOptions, modules[0]->config());
+                 &llvm_ir::InitializeLLVMCommandLineOptions,
+                 modules[0]->config());
 
   // We can pass just one llvm::TargetOptions when we compile the LLVM module,
   // so we bail if the configs have conflicting flags. At the moment, the only
@@ -824,7 +819,8 @@ CpuCompiler::CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
     TF_ASSIGN_OR_RETURN(
         std::unique_ptr<BufferAssignment> assignment,
         BufferAssigner::Run(
-            module, xla::MakeUnique<SequentialHloOrdering>(module, module_sequence),
+            module,
+            xla::MakeUnique<SequentialHloOrdering>(module, module_sequence),
             BufferSizeBytesFunction(), memory_alignment));
     // BufferAssignment::ToString() includes a header, so no need for us to
     // print one ourselves.
@@ -838,13 +834,13 @@ CpuCompiler::CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
           proto, xla_dump_hlo_proto_to, module->name()));
     }
 
-    IrEmitter ir_emitter(
-        *module, *assignment, &llvm_module,
-        /*hlo_to_profile_idx=*/
-        std::unordered_map<const HloInstruction*, size_t>{},
-        /*entry_computation_profile_idx=*/tensorflow::gtl::nullopt,
-        target_machine.get(),
-        /*external_constant_pool=*/nullptr);
+    IrEmitter ir_emitter(*module, *assignment, &llvm_module,
+                         /*instruction_to_profile_idx=*/
+                         std::unordered_map<const HloInstruction*, int64>{},
+                         /*computation_to_profile_idx=*/
+                         std::unordered_map<const HloComputation*, int64>{},
+                         target_machine.get(),
+                         /*external_constant_pool=*/nullptr);
     HloComputation* computation = module->entry_computation();
     for (auto embedded_computation :
          computation->MakeEmbeddedComputationsList()) {
@@ -855,7 +851,7 @@ CpuCompiler::CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
           ir_emitter
               .EmitComputation(embedded_computation,
                                embedded_computation->name(),
-                               /*is_entry_computation=*/false,
+                               /*is_top_level_computation=*/false,
                                &module_sequence.at(embedded_computation))
               .status());
     }
@@ -863,7 +859,7 @@ CpuCompiler::CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
     TF_ASSIGN_OR_RETURN(
         llvm::Function * entry_function,
         ir_emitter.EmitComputation(computation, entry_point_name,
-                                   /*is_entry_computation=*/true,
+                                   /*is_top_level_computation=*/true,
                                    &module_sequence.at(computation)));
 
     CHECK(entry_function->getName() == llvm_ir::AsStringRef(entry_point_name));
@@ -874,6 +870,16 @@ CpuCompiler::CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
         *module, user_pre_optimization_hook_, user_post_optimization_hook_,
         &pre_optimization_ir_dump_hook, &post_optimization_ir_dump_hook));
 
+    // Run the LLVM verifier over the unoptimized LLVM IR.  If it fails, run the
+    // pre-optimization IR dump hook before returning.
+    {
+      Status verify_status = VerifyLlvmModule(llvm_module);
+      if (!verify_status.ok() && pre_optimization_ir_dump_hook) {
+        pre_optimization_ir_dump_hook(llvm_module).IgnoreError();
+      }
+      TF_RETURN_IF_ERROR(verify_status);
+    }
+
     Disassembler disassembler(*target_machine);
     CompilerFunctor compiler_functor(
         target_machine.get(), &disassembler, opt_level,
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
index e956f478b86d9816615e2902f5bbeae6d6384162..f335bd1bbc7376d1cccc0fa6aa1c0a6d6ad559ab 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
@@ -73,28 +73,6 @@ CpuExecutable::CpuExecutable(
       reinterpret_cast<ComputeFunctionType>(cantFail(sym.getAddress()));
 }
 
-// Given a pointer to an output buffer (following the CPU JIT calling
-// conventions), mark addresses that are "live". The initial pointer itself is
-// trivially live. If the shape of the buffer is a tuple, this analysis looks
-// into the tuple's elements and marks them live as well (since tuples keep
-// pointers to buffers) and also works recursively.  address is an in-memory
-// buffer address that contains some runtime XLA object.  shape is its
-// shape. marked_addresses is the set of live addresses to populate.
-static void MarkLiveAddressesInOutput(
-    const void* address, const Shape& shape,
-    std::unordered_set<const void*>* marked_addresses) {
-  marked_addresses->insert(address);
-  const uintptr_t* address_buffer = static_cast<const uintptr_t*>(address);
-  if (ShapeUtil::IsTuple(shape)) {
-    for (int i = 0; i < ShapeUtil::TupleElementCount(shape); ++i) {
-      const uintptr_t* element_address = address_buffer + i;
-      const void* element = reinterpret_cast<const void*>(*element_address);
-      MarkLiveAddressesInOutput(
-          element, ShapeUtil::GetTupleElementShape(shape, i), marked_addresses);
-    }
-  }
-}
-
 Status CpuExecutable::AllocateBuffers(
     DeviceMemoryAllocator* memory_allocator, int device_ordinal,
     std::vector<perftools::gputools::DeviceMemoryBase>* buffers) {
@@ -148,20 +126,6 @@ Status CpuExecutable::ExecuteComputeFunction(
     tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
     tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> buffers,
     HloExecutionProfile* hlo_execution_profile) {
-  std::vector<se::DeviceMemoryBase> argument_buffers;
-  argument_buffers.reserve(arguments.size());
-  for (const auto* argument : arguments) {
-    argument_buffers.push_back(argument->buffer(/*index=*/{}));
-  }
-  return ExecuteComputeFunction(run_options, argument_buffers, buffers,
-                                hlo_execution_profile);
-}
-
-Status CpuExecutable::ExecuteComputeFunction(
-    const ExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> buffers,
-    HloExecutionProfile* hlo_execution_profile) {
   // The calling convention for JITed functions is:
   //
   //  void function(void* result, const void* run_options, void** args_array,
@@ -177,23 +141,19 @@ Status CpuExecutable::ExecuteComputeFunction(
   //               determined by buffer analysis.
   //
   std::vector<const void*> args_array;
-  for (se::DeviceMemoryBase arg_mem : arguments) {
-    args_array.push_back(arg_mem.opaque());
+  for (const ShapedBuffer* argument : arguments) {
+    args_array.push_back(argument->root_buffer().opaque());
   }
 
   uint64 start_micros = tensorflow::Env::Default()->NowMicros();
 
-  // Allocate profiling counters for each hlo instruction that we would like to
-  // profile.  Even when not Hlo profiling, we allocate a counter for the entire
-  // computation, which we use to update ExecutionProfile below.
-  std::vector<int64>* profile_counters = nullptr;
-  std::vector<int64> profile_counter_for_entry_computation;
-  if (hlo_execution_profile) {
-    profile_counters = hlo_execution_profile->mutable_profile_counters();
-  } else {
-    profile_counters = &profile_counter_for_entry_computation;
-    profile_counter_for_entry_computation.push_back(0);
-  }
+  size_t profile_counters_size =
+      hlo_execution_profile ? hlo_execution_profile->profile_counters().size()
+                            : 0;
+  int64* profile_counters =
+      hlo_execution_profile
+          ? hlo_execution_profile->mutable_profile_counters()->data()
+          : nullptr;
 
   // Call the computation function following the calling convention.
   std::vector<void*> buffer_pointers;
@@ -208,7 +168,7 @@ Status CpuExecutable::ExecuteComputeFunction(
     VLOG(3) << tensorflow::strings::Printf(
         "  func(void* result, void* params[%zu], void* temps[%zu], "
         "uint64 profile_counters[%zu])",
-        args_array.size(), buffer_pointers.size(), profile_counters->size());
+        args_array.size(), buffer_pointers.size(), profile_counters_size);
     VLOG(3) << tensorflow::strings::Printf("    result = %p", result_buffer);
     auto ptr_printer = [](string* out, const void* p) {
       tensorflow::strings::StrAppend(out, tensorflow::strings::Printf("%p", p));
@@ -220,11 +180,11 @@ Status CpuExecutable::ExecuteComputeFunction(
         "    temps = [%s]",
         tensorflow::str_util::Join(buffer_pointers, ", ", ptr_printer).c_str());
     VLOG(3) << tensorflow::strings::Printf("    profile_counters = %p",
-                                           profile_counters->data());
+                                           profile_counters);
   }
 
   compute_function_(result_buffer, run_options, args_array.data(),
-                    buffer_pointers.data(), profile_counters->data());
+                    buffer_pointers.data(), profile_counters);
 
   uint64 end_micros = tensorflow::Env::Default()->NowMicros();
 
@@ -232,13 +192,11 @@ Status CpuExecutable::ExecuteComputeFunction(
     tensorflow::mutex_lock lock(mutex_);
     const double nanoseconds = (end_micros - start_micros) * 1000.0;
     execution_profile_.set_compute_time_ns(std::max(nanoseconds, 1.0));
-
+    // If hlo profiling was disabled then the cycle count is left empty.
     if (hlo_execution_profile) {
       execution_profile_.set_compute_cycle_count(
           hlo_execution_profile->total_cycles_executed(
               *module().entry_computation()));
-    } else {
-      execution_profile_.set_compute_cycle_count(profile_counters->back());
     }
   }
 
@@ -246,11 +204,23 @@ Status CpuExecutable::ExecuteComputeFunction(
 }
 
 static void LogLiveAddresses(
-    const std::unordered_set<const void*>& marked_addresses) {
+    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> buffers,
+    const std::vector<bool>& buffers_in_result) {
+  if (!VLOG_IS_ON(3)) {
+    return;
+  }
+
+  CHECK_EQ(buffers.size(), buffers_in_result.size());
+  std::vector<const void*> live_out_buffers;
+  for (int i = 0; i < buffers.size(); ++i) {
+    if (buffers_in_result[i]) {
+      live_out_buffers.push_back(buffers[i].opaque());
+    }
+  }
   VLOG(3) << "Live addresses in output marking found "
-          << marked_addresses.size() << " addresses:\n"
+          << live_out_buffers.size() << " addresses:\n"
           << tensorflow::str_util::Join(
-                 marked_addresses, ", ", [](string* out, const void* address) {
+                 live_out_buffers, ", ", [](string* out, const void* address) {
                    tensorflow::strings::StrAppend(
                        out, tensorflow::strings::Printf("%p", address));
                  });
@@ -259,13 +229,12 @@ static void LogLiveAddresses(
 static Status DeallocateTempBuffers(
     DeviceMemoryAllocator* allocator, se::Stream* stream,
     tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> buffers,
-    const std::unordered_set<const void*>& marked_addresses) {
-  // Keep those marked live because they are referenced by the output of the
-  // computation and are needed by the service. They will be deallocated by the
-  // service.
+    const std::vector<bool>& buffers_in_result) {
+  // Keep those buffers in the output of the marked live because they are needed
+  // by the service. They will be deallocated by the service.
   for (size_t i = 0; i < buffers.size(); ++i) {
     se::DeviceMemoryBase alloc = buffers[i];
-    if (marked_addresses.count(alloc.opaque()) == 0 && !alloc.is_null()) {
+    if (!buffers_in_result[i] && !alloc.is_null()) {
       VLOG(3) << "CpuExecutable deallocating buffer #" << i << " ["
               << alloc.opaque() << "]";
       TF_RETURN_IF_ERROR(
@@ -276,33 +245,43 @@ static Status DeallocateTempBuffers(
   return Status::OK();
 }
 
-StatusOr<perftools::gputools::DeviceMemoryBase> CpuExecutable::ExecuteOnStream(
+StatusOr<std::unique_ptr<ShapedBuffer>> CpuExecutable::CreateResultShapedBuffer(
     const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments,
-    HloExecutionProfile* hlo_execution_profile) {
+    tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
+        allocated_buffers,
+    std::vector<bool>* buffers_in_result) {
   se::Stream* stream = run_options->stream();
-  DeviceMemoryAllocator* memory_allocator = run_options->allocator();
-  std::vector<se::DeviceMemoryBase> buffers(assignment_->Allocations().size());
+  auto result_buffer = MakeUnique<ShapedBuffer>(
+      /*on_host_shape=*/result_shape(), /*on_device_shape=*/result_shape(),
+      stream->parent()->platform(), stream->parent()->device_ordinal());
 
-  TF_RETURN_IF_ERROR(AllocateBuffers(
-      memory_allocator, stream->parent()->device_ordinal(), &buffers));
-  TF_RETURN_IF_ERROR(ExecuteComputeFunction(
-      &run_options->run_options(), arguments, buffers, hlo_execution_profile));
-
-  // Mark the buffers that are actually live (used in the output) when the
-  // computation finishes executing.
-  std::unordered_set<const void*> marked_addresses;
-  TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice result_slice,
-                      assignment_->GetUniqueTopLevelOutputSlice());
-  se::DeviceMemoryBase top_level_output = buffers[result_slice.index()];
-  MarkLiveAddressesInOutput(top_level_output.opaque(), result_shape(),
-                            &marked_addresses);
-
-  LogLiveAddresses(marked_addresses);
-  TF_RETURN_IF_ERROR(DeallocateTempBuffers(memory_allocator, stream, buffers,
-                                           marked_addresses));
-
-  return top_level_output;
+  // Copy DeviceMemoryBase values which contain the array(s) of the result into
+  // the respective location in ShapedBuffer which is returned to the caller.
+  TF_RETURN_IF_ERROR(result_buffer->buffers().ForEachMutableElementWithStatus(
+      [&](const ShapeIndex& index, se::DeviceMemoryBase* device_memory) {
+        const auto& sources = this->GetRootPointsToSet().element(index);
+        // The points to set is unambiguous so the set should be a
+        // singleton.
+        CHECK_EQ(1, sources.size());
+        const LogicalBuffer* buffer_source = sources[0];
+        HloInstruction* src = buffer_source->instruction();
+
+        // The source for this result buffer can be a nested buffer such as
+        // a tuple element. The source instruction should have a
+        // non-parameter buffer assigned.
+        TF_ASSIGN_OR_RETURN(
+            const BufferAllocation::Slice slice,
+            this->assignment_->GetUniqueSlice(src, buffer_source->index()));
+        CHECK(!slice.allocation()->is_entry_computation_parameter());
+
+        const BufferAllocation::Index buffer_index = slice.index();
+        const se::DeviceMemoryBase& buffer = allocated_buffers[buffer_index];
+        CHECK(!buffer.is_null() || buffer.size() == 0);
+        *device_memory = buffer;
+        (*buffers_in_result)[buffer_index] = true;
+        return Status::OK();
+      }));
+  return std::move(result_buffer);
 }
 
 StatusOr<std::unique_ptr<ShapedBuffer>> CpuExecutable::ExecuteOnStream(
@@ -317,67 +296,26 @@ StatusOr<std::unique_ptr<ShapedBuffer>> CpuExecutable::ExecuteOnStream(
   DeviceMemoryAllocator* memory_allocator = run_options->allocator();
   std::vector<se::DeviceMemoryBase> buffers(assignment_->Allocations().size());
 
-  auto result_buffer =
-      MakeUnique<ShapedBuffer>(result_shape(), stream->parent()->platform(),
-                               stream->parent()->device_ordinal());
-
   TF_RETURN_IF_ERROR(AllocateBuffers(
       memory_allocator, stream->parent()->device_ordinal(), &buffers));
   TF_RETURN_IF_ERROR(ExecuteComputeFunction(
       &run_options->run_options(), arguments, buffers, hlo_execution_profile));
 
-  // Copy DeviceMemoryBase values which contain the array(s) of the result into
-  // the respective location in ShapedBuffer which is returned to the caller.
   std::vector<bool> buffers_in_result(assignment_->Allocations().size(), false);
-  TF_RETURN_IF_ERROR(
-      result_buffer->mutable_shape_index_to_buffer_entry()
-          ->ForEachMutableElementWithStatus(
-              [&buffers, &buffers_in_result, &result_buffer, this](
-                  const ShapeIndex& index, size_t* buffer_entry) {
-                const auto& sources = this->GetRootPointsToSet().element(index);
-                // The points to set is unambiguous so the set should be a
-                // singleton.
-                CHECK_EQ(1, sources.size());
-                const LogicalBuffer* buffer_source = sources[0];
-                HloInstruction* src = buffer_source->instruction();
-
-                // The source for this result buffer can be a nested buffer
-                // such as a tuple element.
-
-                // The source instruction should have a non-parameter buffer
-                // assigned.
-                TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice slice,
-                                    this->assignment_->GetUniqueSlice(
-                                        src, buffer_source->index()));
-                CHECK(!slice.allocation()->is_entry_computation_parameter());
-
-                const BufferAllocation::Index buffer_index = slice.index();
-                const se::DeviceMemoryBase& buffer = buffers[buffer_index];
-                CHECK(!buffer.is_null() || buffer.size() == 0);
-                *buffer_entry = result_buffer->mutable_buffers()->size();
-                result_buffer->mutable_buffers()->push_back(buffer);
-                buffers_in_result[buffer_index] = true;
-                return Status::OK();
-              }));
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<ShapedBuffer> result_buffer,
+      CreateResultShapedBuffer(run_options, buffers, &buffers_in_result));
 
   // Free all buffers not in the result.
-  for (size_t i = 0; i < buffers.size(); ++i) {
-    se::DeviceMemoryBase alloc = buffers[i];
-    if (!buffers_in_result[i] && !alloc.is_null()) {
-      VLOG(3) << "CpuExecutable deallocating buffer #" << i << " ["
-              << alloc.opaque() << "]";
-      TF_RETURN_IF_ERROR(memory_allocator->Deallocate(
-          stream->parent()->device_ordinal(), &alloc));
-    }
-  }
+  TF_RETURN_IF_ERROR(DeallocateTempBuffers(memory_allocator, stream, buffers,
+                                           buffers_in_result));
 
   return std::move(result_buffer);
 }
 
-StatusOr<perftools::gputools::DeviceMemoryBase>
-CpuExecutable::ExecuteAsyncOnStream(
+StatusOr<std::unique_ptr<ShapedBuffer>> CpuExecutable::ExecuteAsyncOnStream(
     const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments) {
+    tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) {
   if (hlo_profiling_enabled()) {
     return Unimplemented(
         "Asynchronous execution on stream with hlo profiling is not yet "
@@ -393,29 +331,25 @@ CpuExecutable::ExecuteAsyncOnStream(
   TF_RETURN_IF_ERROR(AllocateBuffers(
       memory_allocator, stream->parent()->device_ordinal(), &buffers));
 
-  // Mark the buffers that are actually live (used in the output) when the
-  // computation finishes executing.
-  std::unordered_set<const void*> marked_addresses;
-  TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice result_slice,
-                      assignment_->GetUniqueTopLevelOutputSlice());
-  se::DeviceMemoryBase top_level_output = buffers[result_slice.index()];
-  MarkLiveAddressesInOutput(top_level_output.opaque(), result_shape(),
-                            &marked_addresses);
+  std::vector<bool> buffers_in_result(assignment_->Allocations().size(), false);
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<ShapedBuffer> result_buffer,
+      CreateResultShapedBuffer(run_options, buffers, &buffers_in_result));
 
-  LogLiveAddresses(marked_addresses);
+  LogLiveAddresses(buffers, buffers_in_result);
 
   host_stream->EnqueueTask([this, run_options, arguments, buffers,
-                            marked_addresses, memory_allocator, stream]() {
+                            buffers_in_result, memory_allocator, stream]() {
     // Failing a CHECK here is not great, but I don't see an obvious way to
     // return a failed Status asynchronously.
     TF_CHECK_OK(ExecuteComputeFunction(&run_options->run_options(), arguments,
                                        buffers,
                                        /*hlo_execution_profile=*/nullptr));
     TF_CHECK_OK(DeallocateTempBuffers(memory_allocator, stream, buffers,
-                                      marked_addresses));
+                                      buffers_in_result));
   });
 
-  return top_level_output;
+  return std::move(result_buffer);
 }
 
 /*static*/ int64 CpuExecutable::ShapeSizeBytes(const Shape& shape) {
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.h b/tensorflow/compiler/xla/service/cpu/cpu_executable.h
index 17ee2d673ee7cde1847bf29e2399e6033cb7e30e..50443a59954e222f65fc935e83effdaf6d6c8bf0 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_executable.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.h
@@ -55,21 +55,14 @@ class CpuExecutable : public Executable {
                 std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map);
   ~CpuExecutable() override {}
 
-  StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteOnStream(
-      const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments,
-      HloExecutionProfile* hlo_execution_profile) override;
-
   StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteOnStream(
       const ServiceExecutableRunOptions* run_options,
       tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
       HloExecutionProfile* hlo_execution_profile) override;
 
-  StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteAsyncOnStream(
+  StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteAsyncOnStream(
       const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments) override;
+      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) override;
 
   // This should be called after set_ir_module_string.
   const string& ir_module_string() const { return ir_module_string_; }
@@ -108,13 +101,6 @@ class CpuExecutable : public Executable {
 
   // Calls the generated function performing the computation with the given
   // arguments using the supplied buffers.
-  Status ExecuteComputeFunction(
-      const ExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          buffers,
-      HloExecutionProfile* hlo_execution_profile);
   Status ExecuteComputeFunction(
       const ExecutableRunOptions* run_options,
       tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
@@ -122,6 +108,18 @@ class CpuExecutable : public Executable {
           buffers,
       HloExecutionProfile* hlo_execution_profile);
 
+  // Create a ShapedBuffer for holding the result of the computation. The
+  // addresses (DeviceMemoryBases) are set according to buffer assignment.
+  // 'buffers_in_result' should point to a vector of the same size as
+  // 'allocated_buffers'. An element in buffers_in_result is set to true if the
+  // corresponding buffer is live out of the computation (and thus contained in
+  // the returned ShapedBuffer).
+  StatusOr<std::unique_ptr<ShapedBuffer>> CreateResultShapedBuffer(
+      const ServiceExecutableRunOptions* run_options,
+      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
+          allocated_buffers,
+      std::vector<bool>* buffers_in_result);
+
   // Returns the points-to set of the root instruction of the entry
   // computation. Uses points-to analysis from buffer assignment.
   const PointsToSet& GetRootPointsToSet() const;
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.cc b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.cc
new file mode 100644
index 0000000000000000000000000000000000000000..7bd4741a04b1135d9780e0cf765b7b33378526e1
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.cc
@@ -0,0 +1,48 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h"
+
+#include "tensorflow/compiler/xla/layout_util.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace xla {
+
+StatusOr<bool> CpuHloSupportChecker::Run(HloModule* module) {
+  for (auto* computation : module->computations()) {
+    for (const auto& instruction : computation->instructions()) {
+      TF_RETURN_IF_ERROR(
+          ShapeUtil::ValidateShapeWithOptionalLayout(instruction->shape()));
+      TF_RETURN_IF_ERROR(ShapeUtil::ForEachSubshapeWithStatus(
+          instruction->shape(),
+          [&instruction](const Shape& subshape, const ShapeIndex&) {
+            if (LayoutUtil::IsSparseArray(subshape)) {
+              return xla::Unimplemented(
+                  "CPU backend does not support HLO instruction %s with shape "
+                  "containing a sparse layout: %s",
+                  instruction->ToString().c_str(),
+                  ShapeUtil::HumanStringWithLayout(instruction->shape())
+                      .c_str());
+            }
+            return Status::OK();
+          }));
+    }
+  }
+  return false;
+}
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h
new file mode 100644
index 0000000000000000000000000000000000000000..2271af7b247c2684d371010361308b4d7bcd6423
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h
@@ -0,0 +1,42 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_HLO_SUPPORT_CHECKER_H_
+#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_HLO_SUPPORT_CHECKER_H_
+
+#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
+
+namespace xla {
+
+// This pass should run early in the HLO pipeline and checks for HLO constructs
+// which are not supported by the CPU backend and cannot be removed via HLO
+// transformations (eg, sparse layouts).
+class CpuHloSupportChecker : public HloPassInterface {
+ public:
+  CpuHloSupportChecker() = default;
+  ~CpuHloSupportChecker() override = default;
+
+  tensorflow::StringPiece name() const override {
+    return "cpu_hlo_support_checker";
+  }
+
+  // Note: always returns false (no instructions are ever modified by this
+  // pass).
+  StatusOr<bool> Run(HloModule* module) override;
+};
+
+}  // namespace xla
+
+#endif  // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_HLO_SUPPORT_CHECKER_H_
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..0f463e6de623fc6ab43d685ff2a5d6882ba7b8a2
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker_test.cc
@@ -0,0 +1,72 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/core/lib/core/error_codes.pb.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+
+namespace xla {
+namespace {
+
+using ::testing::HasSubstr;
+
+class CpuHloSupportCheckerTest : public HloTestBase {
+ protected:
+  CpuHloSupportChecker& checker() { return checker_; }
+
+ private:
+  CpuHloSupportChecker checker_;
+};
+
+TEST_F(CpuHloSupportCheckerTest, Add) {
+  HloComputation::Builder builder(TestName());
+  const Shape scalar_shape = ShapeUtil::MakeShape(F32, {});
+  HloInstruction* param0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape, "param0"));
+  HloInstruction* param1 = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, scalar_shape, "param1"));
+  builder.AddInstruction(HloInstruction::CreateBinary(
+      scalar_shape, HloOpcode::kAdd, param0, param1));
+  auto module = CreateNewModule();
+  module->AddEntryComputation(builder.Build());
+
+  TF_ASSERT_OK(checker().Run(module.get()).status());
+}
+
+TEST_F(CpuHloSupportCheckerTest, SparseUnimplemented) {
+  HloComputation::Builder builder(TestName());
+  const Shape sparse_shape = ShapeUtil::MakeShapeWithSparseLayout(F32, {10}, 2);
+  HloInstruction* param0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, sparse_shape, "param0"));
+  HloInstruction* param1 = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, sparse_shape, "param1"));
+  builder.AddInstruction(HloInstruction::CreateBinary(
+      sparse_shape, HloOpcode::kAdd, param0, param1));
+  auto module = CreateNewModule();
+  module->AddEntryComputation(builder.Build());
+
+  Status status = checker().Run(module.get()).status();
+  ASSERT_EQ(status.code(), tensorflow::error::UNIMPLEMENTED);
+  EXPECT_THAT(status.error_message(),
+              HasSubstr("CPU backend does not support"));
+  EXPECT_THAT(status.error_message(),
+              HasSubstr(ShapeUtil::HumanStringWithLayout(sparse_shape)));
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc
index f87ee3cecd932faac140636a3db7cd4aa0371b85..482e04052d5a914eab0e5bff2c7a83f3b698052f 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc
@@ -26,7 +26,7 @@ int64 BytesInDimension(const Shape& shape, int64 dimension) {
          shape.dimensions(dimension);
 }
 
-bool IsFusile(const HloInstruction& hlo) {
+bool CanBeLoopFused(const HloInstruction& hlo) {
   // These are the only ones we fuse since we rely on effective elemental IR
   // generation.
   return hlo.IsElementwise() ||  //
@@ -42,6 +42,23 @@ bool IsFusile(const HloInstruction& hlo) {
          hlo.opcode() == HloOpcode::kTranspose;
 }
 
+bool IsMatrixVectorDot(const HloInstruction* hlo) {
+  const Shape& hlo_shape = hlo->shape();
+  return hlo->opcode() == HloOpcode::kDot && hlo_shape.dimensions_size() == 2 &&
+         (hlo_shape.dimensions(0) == 1 || hlo_shape.dimensions(1) == 1);
+}
+
+bool CanBeOutputFused(const HloInstruction* producer,
+                      const HloInstruction* consumer) {
+  return consumer->opcode() == HloOpcode::kAdd && IsMatrixVectorDot(producer) &&
+         producer->user_count() == 1;
+}
+
+bool CanBeOutputFusedIntoSomeOperand(const HloInstruction* consumer) {
+  return consumer->opcode() == HloOpcode::kAdd &&
+         (CanBeOutputFused(consumer->operand(0), consumer) ||
+          CanBeOutputFused(consumer->operand(1), consumer));
+}
 }  // namespace
 
 bool CpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
@@ -52,7 +69,15 @@ bool CpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
 
   constexpr int kFusionThresholdBytes = 16 * 1024;
 
-  if (!IsFusile(*producer)) {
+  if (CanBeOutputFused(producer, consumer)) {
+    return true;
+  }
+
+  if (CanBeOutputFusedIntoSomeOperand(producer)) {
+    return false;
+  }
+
+  if (!CanBeLoopFused(*producer)) {
     VLOG(2) << "Producer is not fusile.";
     return false;
   }
@@ -108,16 +133,13 @@ bool CpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
     }
   }
 
-  if (consumer->opcode() == HloOpcode::kFusion) {
-    // InstructionFusion::ShouldFuse above only allows kLoop and kInput fusions.
-    // The CPU backend does not create kInput fusions, so we only expect to see
-    // kLoop here.
-    CHECK(consumer->fusion_kind() == HloInstruction::FusionKind::kLoop);
+  if (consumer->opcode() == HloOpcode::kFusion &&
+      consumer->fusion_kind() == HloInstruction::FusionKind::kLoop) {
     VLOG(2) << "Fusing: consumer is a fusion node.";
     return true;
   }
 
-  if (IsFusile(*consumer)) {
+  if (CanBeLoopFused(*consumer)) {
     VLOG(2) << "Fusing: consumer is elementwise or fusile.";
     return true;
   }
@@ -126,5 +148,11 @@ bool CpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
   return false;
 }
 
+HloInstruction::FusionKind CpuInstructionFusion::ChooseKind(
+    const HloInstruction* producer, const HloInstruction* consumer) {
+  return CanBeOutputFused(producer, consumer)
+             ? HloInstruction::FusionKind::kOutput
+             : HloInstruction::FusionKind::kLoop;
+}
 }  // namespace cpu
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.h b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.h
index 0eca4c3473e1454fe5dbd8bf855b4418cf553a94..07aff34974e0cfa6c7a129f82017b280fb1ccd59 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.h
@@ -30,6 +30,8 @@ class CpuInstructionFusion : public InstructionFusion {
 
  protected:
   bool ShouldFuse(HloInstruction* consumer, int64 operand_index) override;
+  HloInstruction::FusionKind ChooseKind(
+      const HloInstruction* producer, const HloInstruction* consumer) override;
 };
 
 }  // namespace cpu
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc
index b9e4d006d77ae76e33ac51440349400ea4eff118..595c3f55b321f47e2312b93e0c238c7637495d77 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc
@@ -31,6 +31,14 @@ namespace {
 
 using InstructionFusionTest = HloTestBase;
 
+std::unique_ptr<HloInstruction> MakeDot(const Shape& shape, HloInstruction* lhs,
+                                        HloInstruction* rhs) {
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  return HloInstruction::CreateDot(shape, lhs, rhs, dot_dnums);
+}
+
 TEST_F(InstructionFusionTest, DotOperationFusion_Basic_0) {
   HloComputation::Builder builder(TestName());
   HloInstruction* arg0 = builder.AddInstruction(HloInstruction::CreateParameter(
@@ -40,8 +48,8 @@ TEST_F(InstructionFusionTest, DotOperationFusion_Basic_0) {
 
   HloInstruction* exp0 = builder.AddInstruction(HloInstruction::CreateUnary(
       ShapeUtil::MakeShape(S32, {1024, 256}), HloOpcode::kExp, arg0));
-  HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary(
-      ShapeUtil::MakeShape(F32, {1024, 1}), HloOpcode::kDot, exp0, arg1));
+  HloInstruction* dot = builder.AddInstruction(
+      MakeDot(ShapeUtil::MakeShape(F32, {1024, 1}), exp0, arg1));
 
   auto module = CreateNewModule();
   auto computation = module->AddEntryComputation(builder.Build());
@@ -59,8 +67,8 @@ TEST_F(InstructionFusionTest, DotOperationFusion_Basic_1) {
 
   HloInstruction* exp1 = builder.AddInstruction(HloInstruction::CreateUnary(
       ShapeUtil::MakeShape(S32, {256, 1024}), HloOpcode::kExp, arg1));
-  HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary(
-      ShapeUtil::MakeShape(F32, {1, 1024}), HloOpcode::kDot, arg0, exp1));
+  HloInstruction* dot = builder.AddInstruction(
+      MakeDot(ShapeUtil::MakeShape(F32, {1, 1024}), arg0, exp1));
 
   auto module = CreateNewModule();
   auto computation = module->AddEntryComputation(builder.Build());
@@ -80,8 +88,8 @@ TEST_F(InstructionFusionTest, DotOperationFusion_Bitcast) {
       ShapeUtil::MakeShape(S32, {2, 512, 2, 128}), HloOpcode::kExp, arg0));
   HloInstruction* bitcast0 = builder.AddInstruction(HloInstruction::CreateUnary(
       ShapeUtil::MakeShape(S32, {1024, 256}), HloOpcode::kBitcast, exp0));
-  HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary(
-      ShapeUtil::MakeShape(F32, {1024, 1}), HloOpcode::kDot, bitcast0, arg1));
+  HloInstruction* dot = builder.AddInstruction(
+      MakeDot(ShapeUtil::MakeShape(F32, {1024, 1}), bitcast0, arg1));
 
   auto module = CreateNewModule();
   auto computation = module->AddEntryComputation(builder.Build());
@@ -102,8 +110,8 @@ TEST_F(InstructionFusionTest, DotOperationFusion_Reshape) {
   HloInstruction* reshape0 =
       builder.AddInstruction(HloInstruction::CreateReshape(
           ShapeUtil::MakeShape(S32, {1024, 256}), exp0));
-  HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary(
-      ShapeUtil::MakeShape(F32, {1024, 1}), HloOpcode::kDot, reshape0, arg1));
+  HloInstruction* dot = builder.AddInstruction(
+      MakeDot(ShapeUtil::MakeShape(F32, {1024, 1}), reshape0, arg1));
 
   auto module = CreateNewModule();
   auto computation = module->AddEntryComputation(builder.Build());
@@ -121,8 +129,8 @@ TEST_F(InstructionFusionTest, DotOperationFusion_TooLarge) {
 
   HloInstruction* exp1 = builder.AddInstruction(HloInstruction::CreateUnary(
       ShapeUtil::MakeShape(S32, {256, 32 * 1024}), HloOpcode::kExp, arg1));
-  HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary(
-      ShapeUtil::MakeShape(F32, {1, 32 * 1024}), HloOpcode::kDot, arg0, exp1));
+  HloInstruction* dot = builder.AddInstruction(
+      MakeDot(ShapeUtil::MakeShape(F32, {1, 32 * 1024}), arg0, exp1));
 
   auto module = CreateNewModule();
   auto computation = module->AddEntryComputation(builder.Build());
@@ -140,8 +148,8 @@ TEST_F(InstructionFusionTest, DotOperationFusion_ElementReuse) {
 
   HloInstruction* exp1 = builder.AddInstruction(HloInstruction::CreateUnary(
       ShapeUtil::MakeShape(S32, {256, 1024}), HloOpcode::kExp, arg1));
-  HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary(
-      ShapeUtil::MakeShape(F32, {2, 1024}), HloOpcode::kDot, arg0, exp1));
+  HloInstruction* dot = builder.AddInstruction(
+      MakeDot(ShapeUtil::MakeShape(F32, {2, 1024}), arg0, exp1));
 
   auto module = CreateNewModule();
   auto computation = module->AddEntryComputation(builder.Build());
@@ -162,8 +170,8 @@ TEST_F(InstructionFusionTest, DotOperationFusion_TransposeFusion) {
   HloInstruction* transpose1 =
       builder.AddInstruction(HloInstruction::CreateTranspose(
           ShapeUtil::MakeShape(S32, {256, 1024}), exp1, {1, 0}));
-  builder.AddInstruction(HloInstruction::CreateBinary(
-      ShapeUtil::MakeShape(F32, {1, 1024}), HloOpcode::kDot, arg0, transpose1));
+  builder.AddInstruction(
+      MakeDot(ShapeUtil::MakeShape(F32, {1, 1024}), arg0, transpose1));
 
   auto module = CreateNewModule();
   auto computation = module->AddEntryComputation(builder.Build());
@@ -188,7 +196,9 @@ class OpcodeFusionTest : public InstructionFusionTest {
   // Runs CPU instruction fusion on the given module, and tests that the result
   // contains a fused op at the root with exactly the given multiset of opcodes.
   void RunFusionAndCheckOpcodesWereFused(
-      HloModule* module, const std::multiset<HloOpcode>& expected_opcodes) {
+      HloModule* module, const std::multiset<HloOpcode>& expected_opcodes,
+      HloInstruction::FusionKind fusion_kind =
+          HloInstruction::FusionKind::kLoop) {
     auto computation = module->entry_computation();
     auto did_fusion = CpuInstructionFusion().Run(module);
     ASSERT_TRUE(did_fusion.ok());
@@ -196,7 +206,7 @@ class OpcodeFusionTest : public InstructionFusionTest {
 
     HloInstruction* root = computation->root_instruction();
     ASSERT_THAT(root, op::Fusion());
-    EXPECT_EQ(root->fusion_kind(), HloInstruction::FusionKind::kLoop);
+    EXPECT_EQ(root->fusion_kind(), fusion_kind);
 
     std::vector<HloOpcode> fused_opcodes(root->fused_instruction_count());
     std::transform(root->fused_instructions().begin(),
@@ -608,6 +618,88 @@ TEST_F(OpcodeFusionTest, ReuseViaImplicitBroadcastBinary) {
               Not(op::Fusion()));
 }
 
+void CreateComputationForDotAddOutputFusionTest(const string& test_name,
+                                                HloModule* module, int m, int k,
+                                                int n,
+                                                bool add_extra_use_for_dot) {
+  HloComputation::Builder builder(test_name);
+
+  Shape dot_lhs_shape = ShapeUtil::MakeShape(F32, {m, k});
+  Shape dot_rhs_shape = ShapeUtil::MakeShape(F32, {k, n});
+  Shape dot_shape = ShapeUtil::MakeShape(F32, {m, n});
+
+  auto* dot_lhs = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, dot_lhs_shape, "param0"));
+  auto* dot_rhs = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, dot_rhs_shape, "param1"));
+  auto* addend = builder.AddInstruction(
+      HloInstruction::CreateParameter(2, dot_shape, "param2"));
+
+  auto* dot = builder.AddInstruction(
+      HloInstruction::CreateCanonicalDot(dot_shape, dot_lhs, dot_rhs));
+  builder.AddInstruction(
+      HloInstruction::CreateBinary(dot_shape, HloOpcode::kAdd, dot, addend));
+
+  if (add_extra_use_for_dot) {
+    builder.AddInstruction(
+        HloInstruction::CreateOutfeed(dot_shape, dot, "no_config"));
+  }
+
+  module->AddEntryComputation(builder.Build());
+}
+
+TEST_F(OpcodeFusionTest, DotAddOutputFusion_1x50x19) {
+  auto module = CreateNewModule();
+  CreateComputationForDotAddOutputFusionTest(TestName(), module.get(), /*m=*/1,
+                                             /*k=*/50, /*n=*/19,
+                                             /*add_extra_use_for_dot=*/false);
+
+  RunFusionAndCheckOpcodesWereFused(
+      module.get(),
+      {HloOpcode::kDot, HloOpcode::kAdd, HloOpcode::kParameter,
+       HloOpcode::kParameter, HloOpcode::kParameter},
+      HloInstruction::FusionKind::kOutput);
+}
+
+TEST_F(OpcodeFusionTest, DotAddOutputFusion_19x50x1) {
+  auto module = CreateNewModule();
+  CreateComputationForDotAddOutputFusionTest(TestName(), module.get(), /*m=*/19,
+                                             /*k=*/50, /*n=*/1,
+                                             /*add_extra_use_for_dot=*/false);
+
+  RunFusionAndCheckOpcodesWereFused(
+      module.get(),
+      {HloOpcode::kDot, HloOpcode::kAdd, HloOpcode::kParameter,
+       HloOpcode::kParameter, HloOpcode::kParameter},
+      HloInstruction::FusionKind::kOutput);
+}
+
+TEST_F(OpcodeFusionTest, DotAddOutputFusion_19x50x19) {
+  auto module = CreateNewModule();
+  CreateComputationForDotAddOutputFusionTest(TestName(), module.get(), /*m=*/19,
+                                             /*k=*/50, /*n=*/19,
+                                             /*add_extra_use_for_dot=*/false);
+
+  TF_ASSERT_OK_AND_ASSIGN(bool fused_something,
+                          CpuInstructionFusion().Run(module.get()));
+  EXPECT_FALSE(fused_something);
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              Not(op::Fusion()));
+}
+
+TEST_F(OpcodeFusionTest, DotAddOutputFusion_19x50x1_multi_use) {
+  auto module = CreateNewModule();
+  CreateComputationForDotAddOutputFusionTest(TestName(), module.get(), /*m=*/19,
+                                             /*k=*/50, /*n=*/1,
+                                             /*add_extra_use_for_dot=*/true);
+
+  TF_ASSERT_OK_AND_ASSIGN(bool fused_something,
+                          CpuInstructionFusion().Run(module.get()));
+  EXPECT_FALSE(fused_something);
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              Not(op::Fusion()));
+}
+
 }  // namespace
 }  // namespace cpu
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/layout_assignment.cc b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc
similarity index 55%
rename from tensorflow/compiler/xla/service/cpu/layout_assignment.cc
rename to tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc
index 3f2d101959db50d9f775097f01d5a2ba25a0da8c..e8117377e61a4e21b8c45b929c518a18878fcb60 100644
--- a/tensorflow/compiler/xla/service/cpu/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/cpu/layout_assignment.h"
+#include "tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h"
 
 #include <numeric>
 
@@ -25,58 +25,77 @@ limitations under the License.
 namespace xla {
 namespace cpu {
 
-Status CpuLayoutAssignment::AddBackendConstraints(
-    LayoutConstraints* constraints) {
-  auto row_major_shape = [](const Shape& old_shape) {
-    Shape new_shape(old_shape);
-    std::vector<int64> dimension_order(new_shape.dimensions_size());
-    std::iota(dimension_order.rbegin(), dimension_order.rend(), 0);
-    *new_shape.mutable_layout() = LayoutUtil::MakeLayout(dimension_order);
-    return new_shape;
-  };
-  auto col_major_shape = [](const Shape& old_shape) {
-    Shape new_shape(old_shape);
-    std::vector<int64> dimension_order(new_shape.dimensions_size());
-    std::iota(dimension_order.begin(), dimension_order.end(), 0);
-    *new_shape.mutable_layout() = LayoutUtil::MakeLayout(dimension_order);
-    return new_shape;
-  };
-
-  // We want to change the layout of constant arrays to be column major when all
-  // of their users are dot operations that can be made faster with the flipped
-  // layout.  To avoid going quadriatic over the # of instructions, we cache
-  // this property in should_make_rhs_col_major -- it maps a constant to true if
-  // all of the users of said constant are dot operations that can be sped up.
-  // This cache is populated lazily as we encounter dot operations traversing
-  // the instruction stream.
-  tensorflow::gtl::FlatMap<const HloInstruction*, bool>
-      should_make_rhs_col_major_cache;
-  auto should_make_rhs_col_major = [&](const HloInstruction& instruction) {
-    if (ProfitableToImplementDotInUntiledLlvmIr(instruction) !=
-        DotInLlvmIrProfitable::kWithColumnMajorRhs) {
+// We want to change the layout of constant arrays to be column major when all
+// of their users are dot operations that can be made faster with the flipped
+// layout.  To avoid going quadriatic over the # of instructions, we cache this
+// property in should_make_rhs_col_major -- it maps a constant to true if all of
+// the users of said constant are dot operations that can be sped up.  This
+// cache is populated lazily as we encounter dot operations traversing the
+// instruction stream.
+
+namespace {
+using ::tensorflow::gtl::nullopt;
+using ::tensorflow::gtl::optional;
+
+using ShouldMakeOperandColMajorCache =
+    tensorflow::gtl::FlatMap<const HloInstruction*, bool>;
+}  // namespace
+
+static bool ShouldMakeAllUsersColMajor(const HloInstruction* instruction) {
+  for (auto* user : instruction->users()) {
+    optional<int64> operand_idx = ProfitableToMakeDotOperandColumnMajor(*user);
+    if (!operand_idx || user->operand(*operand_idx) != instruction ||
+        std::count(user->operands().begin(), user->operands().end(),
+                   instruction) != 1) {
       return false;
     }
+  }
+  return true;
+}
 
-    const auto* rhs = instruction.operand(1);
-    if (rhs->opcode() != HloOpcode::kConstant) {
-      return false;
-    }
+static optional<int64> ShouldMakeOperandColumnMajor(
+    ShouldMakeOperandColMajorCache* cache, const HloInstruction& instruction) {
+  optional<int64> operand_idx =
+      ProfitableToMakeDotOperandColumnMajor(instruction);
+  if (!operand_idx) {
+    return nullopt;
+  }
 
-    auto it = should_make_rhs_col_major_cache.find(rhs);
-    if (it != should_make_rhs_col_major_cache.end()) {
-      return it->second;
-    }
+  const HloInstruction* operand = instruction.operand(*operand_idx);
+  if (operand->opcode() != HloOpcode::kConstant) {
+    return nullopt;
+  }
 
-    bool result = std::all_of(
-        rhs->users().begin(), rhs->users().end(), [&](HloInstruction* user) {
-          return ProfitableToImplementDotInUntiledLlvmIr(*user) ==
-                     DotInLlvmIrProfitable::kWithColumnMajorRhs &&
-                 user->operand(0) != rhs;
-        });
+  auto it = cache->find(operand);
+  if (it == cache->end()) {
+    auto insert_result =
+        cache->insert({operand, ShouldMakeAllUsersColMajor(operand)});
+    CHECK(insert_result.second);
+    it = insert_result.first;
+  }
 
-    InsertOrDie(&should_make_rhs_col_major_cache, rhs, result);
-    return result;
-  };
+  return it->second ? operand_idx : nullopt;
+}
+
+static Shape RowMajorShape(const Shape& old_shape) {
+  Shape new_shape(old_shape);
+  std::vector<int64> dimension_order(new_shape.dimensions_size());
+  std::iota(dimension_order.rbegin(), dimension_order.rend(), 0);
+  *new_shape.mutable_layout() = LayoutUtil::MakeLayout(dimension_order);
+  return new_shape;
+}
+
+static Shape ColMajorShape(const Shape& old_shape) {
+  Shape new_shape(old_shape);
+  std::vector<int64> dimension_order(new_shape.dimensions_size());
+  std::iota(dimension_order.begin(), dimension_order.end(), 0);
+  *new_shape.mutable_layout() = LayoutUtil::MakeLayout(dimension_order);
+  return new_shape;
+}
+
+Status CpuLayoutAssignment::AddBackendConstraints(
+    LayoutConstraints* constraints) {
+  ShouldMakeOperandColMajorCache cache;
 
   const HloComputation* computation = constraints->computation();
   for (auto* instruction : computation->instructions()) {
@@ -91,9 +110,9 @@ Status CpuLayoutAssignment::AddBackendConstraints(
       //
       // These constraints are not hard constraints. Ideally, we should decide
       // which layouts to choose according to some cost model.
-      Shape output_shape(row_major_shape(convolution->shape()));
-      Shape input_shape(row_major_shape(lhs_instruction->shape()));
-      Shape filter_shape(row_major_shape(rhs_instruction->shape()));
+      Shape output_shape(RowMajorShape(convolution->shape()));
+      Shape input_shape(RowMajorShape(lhs_instruction->shape()));
+      Shape filter_shape(RowMajorShape(rhs_instruction->shape()));
 
       // Set layouts of the instructions' shapes.
       TF_RETURN_IF_ERROR(
@@ -102,11 +121,11 @@ Status CpuLayoutAssignment::AddBackendConstraints(
           constraints->SetOperandLayout(filter_shape, convolution, 1));
       TF_RETURN_IF_ERROR(
           constraints->SetInstructionLayout(output_shape, convolution));
-    } else if (should_make_rhs_col_major(*instruction)) {
-      auto* dot = instruction;
-      const auto& rhs_shape = dot->operand(1)->shape();
-      TF_RETURN_IF_ERROR(
-          constraints->SetOperandLayout(col_major_shape(rhs_shape), dot, 1));
+    } else if (optional<int64> op_idx =
+                   ShouldMakeOperandColumnMajor(&cache, *instruction)) {
+      const HloInstruction* op = instruction->operand(*op_idx);
+      TF_RETURN_IF_ERROR(constraints->SetOperandLayout(
+          ColMajorShape(op->shape()), instruction, *op_idx));
     } else if (PotentiallyImplementedAsEigenDot(*instruction)) {
       const HloInstruction* dot = instruction;
       // In order to implement `dot` with Eigen dot, the layouts of the lhs,
@@ -114,17 +133,17 @@ Status CpuLayoutAssignment::AddBackendConstraints(
       //
       // These constraints are not hard constraints. Ideally, we should decide
       // which layouts to choose according to some cost model.
-      Shape output_shape(row_major_shape(dot->shape()));
+      Shape output_shape(RowMajorShape(dot->shape()));
 
       const HloInstruction* lhs_instruction = dot->operand(0);
-      Shape lhs_shape(row_major_shape(lhs_instruction->shape()));
+      Shape lhs_shape(RowMajorShape(lhs_instruction->shape()));
       TF_RETURN_IF_ERROR(constraints->SetOperandLayout(lhs_shape, dot, 0));
 
       // dot is a kDot or a kTransposeDot fusion node.  In the latter case, if
       // it represents X @ X, it may have just one operand.
       if (dot->operand_count() > 1) {
         const HloInstruction* rhs_instruction = dot->operand(1);
-        Shape rhs_shape(row_major_shape(rhs_instruction->shape()));
+        Shape rhs_shape(RowMajorShape(rhs_instruction->shape()));
         TF_RETURN_IF_ERROR(constraints->SetOperandLayout(rhs_shape, dot, 1));
       }
 
@@ -141,8 +160,12 @@ Status CpuLayoutAssignment::AddBackendConstraints(
         if (constraints->OperandBufferForwarded(instruction, operand_no)) {
           continue;
         }
+        // Skip operands with non-array shapes.
+        if (!ShapeUtil::IsArray(instruction->operand(operand_no)->shape())) {
+          continue;
+        }
         Shape operand_shape(
-            row_major_shape(instruction->operand(operand_no)->shape()));
+            RowMajorShape(instruction->operand(operand_no)->shape()));
         TF_RETURN_IF_ERROR(constraints->SetOperandLayout(
             operand_shape, instruction, operand_no));
       }
diff --git a/tensorflow/compiler/xla/service/cpu/layout_assignment.h b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h
similarity index 86%
rename from tensorflow/compiler/xla/service/cpu/layout_assignment.h
rename to tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h
index 4fd8d68dd6b4f2a8b16f6c048743a996ea76a560..c8edbb9e15a5b6f9c574f5fe9d130d149499ebd2 100644
--- a/tensorflow/compiler/xla/service/cpu/layout_assignment.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_LAYOUT_ASSIGNMENT_H_
-#define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_LAYOUT_ASSIGNMENT_H_
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_LAYOUT_ASSIGNMENT_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_LAYOUT_ASSIGNMENT_H_
 
 #include "tensorflow/compiler/xla/service/computation_layout.h"
 #include "tensorflow/compiler/xla/service/layout_assignment.h"
@@ -38,4 +38,4 @@ class CpuLayoutAssignment : public LayoutAssignment {
 }  // namespace cpu
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_LAYOUT_ASSIGNMENT_H_
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_LAYOUT_ASSIGNMENT_H_
diff --git a/tensorflow/compiler/xla/service/cpu/layout_assignment_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc
similarity index 54%
rename from tensorflow/compiler/xla/service/cpu/layout_assignment_test.cc
rename to tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc
index 1ea5e8c7fc4896512e62396d0a756cda44785f11..6ba030fff3bbc5f413bfb133114ceb5309b77672 100644
--- a/tensorflow/compiler/xla/service/cpu/layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/cpu/layout_assignment.h"
+#include "tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h"
 
 #include <initializer_list>
 #include <memory>
@@ -40,6 +40,8 @@ limitations under the License.
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 
+namespace op = xla::testing::opcode_matchers;
+
 namespace xla {
 namespace {
 
@@ -61,8 +63,8 @@ TEST_F(CpuLayoutAssignmentTest, DotWithConstantRhsTensor) {
       HloInstruction::CreateParameter(0, lhs_shape, "param0"));
   auto dot_rhs = builder.AddInstruction(
       HloInstruction::CreateConstant(Literal::CreateFromShape(rhs_shape)));
-  auto result = builder.AddInstruction(HloInstruction::CreateBinary(
-      result_shape, HloOpcode::kDot, dot_lhs, dot_rhs));
+  auto result = builder.AddInstruction(
+      HloInstruction::CreateCanonicalDot(result_shape, dot_lhs, dot_rhs));
 
   auto module = CreateNewModule();
   HloComputation* computation = module->AddEntryComputation(builder.Build());
@@ -98,10 +100,10 @@ TEST_F(CpuLayoutAssignmentTest, MultipleDotsWithSameConstantRhsTensor0) {
       HloInstruction::CreateParameter(1, lhs_shape, "param1"));
   auto dot_rhs = builder.AddInstruction(
       HloInstruction::CreateConstant(Literal::CreateFromShape(rhs_shape)));
-  auto dot_a_result = builder.AddInstruction(HloInstruction::CreateBinary(
-      result_shape, HloOpcode::kDot, dot_a_lhs, dot_rhs));
-  auto dot_b_result = builder.AddInstruction(HloInstruction::CreateBinary(
-      result_shape, HloOpcode::kDot, dot_b_lhs, dot_rhs));
+  auto dot_a_result = builder.AddInstruction(
+      HloInstruction::CreateCanonicalDot(result_shape, dot_a_lhs, dot_rhs));
+  auto dot_b_result = builder.AddInstruction(
+      HloInstruction::CreateCanonicalDot(result_shape, dot_b_lhs, dot_rhs));
   builder.AddInstruction(HloInstruction::CreateBinary(
       result_shape, HloOpcode::kAdd, dot_a_result, dot_b_result));
 
@@ -142,10 +144,10 @@ TEST_F(CpuLayoutAssignmentTest, MultipleDotsWithSameConstantRhsTensor1) {
       HloInstruction::CreateParameter(1, lhs_b_shape, "param1"));
   auto dot_rhs = builder.AddInstruction(
       HloInstruction::CreateConstant(Literal::CreateFromShape(rhs_shape)));
-  auto dot_a_result = builder.AddInstruction(HloInstruction::CreateBinary(
-      result_a_shape, HloOpcode::kDot, dot_a_lhs, dot_rhs));
-  auto dot_b_result = builder.AddInstruction(HloInstruction::CreateBinary(
-      result_b_shape, HloOpcode::kDot, dot_b_lhs, dot_rhs));
+  auto dot_a_result = builder.AddInstruction(
+      HloInstruction::CreateCanonicalDot(result_a_shape, dot_a_lhs, dot_rhs));
+  auto dot_b_result = builder.AddInstruction(
+      HloInstruction::CreateCanonicalDot(result_b_shape, dot_b_lhs, dot_rhs));
   auto tuple_result = builder.AddInstruction(
       HloInstruction::CreateTuple({dot_a_result, dot_b_result}));
 
@@ -180,8 +182,8 @@ TEST_F(CpuLayoutAssignmentTest, DotWithConstantLhsTensor) {
       HloInstruction::CreateConstant(Literal::CreateFromShape(lhs_shape)));
   auto dot_rhs = builder.AddInstruction(
       HloInstruction::CreateParameter(0, rhs_shape, "param0"));
-  auto dot_result = builder.AddInstruction(HloInstruction::CreateBinary(
-      result_shape, HloOpcode::kDot, dot_lhs, dot_rhs));
+  auto dot_result = builder.AddInstruction(
+      HloInstruction::CreateCanonicalDot(result_shape, dot_lhs, dot_rhs));
 
   auto module = CreateNewModule();
   HloComputation* computation = module->AddEntryComputation(builder.Build());
@@ -220,8 +222,8 @@ TEST_F(CpuLayoutAssignmentTest, DotWithConstantRhsTensorThroughGTE) {
       HloInstruction::CreateParameter(0, lhs_shape, "param0"));
   auto dot_rhs = builder.AddInstruction(
       HloInstruction::CreateGetTupleElement(rhs_shape, constant, 1));
-  auto dot_result = builder.AddInstruction(HloInstruction::CreateBinary(
-      result_shape, HloOpcode::kDot, dot_lhs, dot_rhs));
+  auto dot_result = builder.AddInstruction(
+      HloInstruction::CreateCanonicalDot(result_shape, dot_lhs, dot_rhs));
 
   auto module = CreateNewModule();
   HloComputation* computation = module->AddEntryComputation(builder.Build());
@@ -241,5 +243,172 @@ TEST_F(CpuLayoutAssignmentTest, DotWithConstantRhsTensorThroughGTE) {
     EXPECT_NE(instruction->opcode(), HloOpcode::kCopy);
   }
 }
+
+struct DotOutputFusionLayoutAssignmentResult {
+  bool layout_assignment_changed_something;
+  const HloInstruction* dot_lhs_fusion_param;
+  const HloInstruction* dot_rhs_fusion_param;
+  const HloInstruction* addend_fusion_param;
+};
+
+static StatusOr<DotOutputFusionLayoutAssignmentResult> RunDotOutputFusion(
+    HloModule* module, const string& test_name, int m, int k, int n,
+    const int64 dot_operand_idx_in_add) {
+  DotOutputFusionLayoutAssignmentResult result;
+
+  CHECK(dot_operand_idx_in_add == 0 || dot_operand_idx_in_add == 1);
+
+  auto builder = HloComputation::Builder(test_name);
+
+  Shape dot_lhs_shape = ShapeUtil::MakeShape(F32, {m, k});
+  Shape dot_rhs_shape = ShapeUtil::MakeShape(F32, {k, n});
+  Shape dot_shape = ShapeUtil::MakeShape(F32, {m, n});
+
+  HloInstruction* dot_lhs = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, dot_lhs_shape, "param0"));
+  HloInstruction* addend = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, dot_shape, "param1"));
+  HloInstruction* dot_rhs = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateFromShape(dot_rhs_shape)));
+  HloInstruction* dot_result = builder.AddInstruction(
+      HloInstruction::CreateCanonicalDot(dot_shape, dot_lhs, dot_rhs));
+  HloInstruction* add_result;
+  if (dot_operand_idx_in_add == 0) {
+    add_result = builder.AddInstruction(HloInstruction::CreateBinary(
+        dot_shape, HloOpcode::kAdd, dot_result, addend));
+  } else {
+    add_result = builder.AddInstruction(HloInstruction::CreateBinary(
+        dot_shape, HloOpcode::kAdd, addend, dot_result));
+  }
+
+  HloComputation* computation = module->AddEntryComputation(builder.Build());
+
+  HloInstruction* fusion_instruction =
+      module->entry_computation()->AddInstruction(HloInstruction::CreateFusion(
+          dot_shape, HloInstruction::FusionKind::kOutput, add_result));
+  TF_RETURN_IF_ERROR(
+      computation->ReplaceInstruction(add_result, fusion_instruction));
+
+  HloInstruction* fused_add =
+      fusion_instruction->fused_instructions_computation()->root_instruction();
+  HloInstruction* fused_dot = fusion_instruction->FuseInstruction(dot_result);
+
+  TF_RETURN_IF_ERROR(
+      computation->RemoveInstructionAndUnusedOperands(dot_result));
+
+  ComputationLayout computation_layout(computation->ComputeProgramShape());
+  *computation_layout.mutable_parameter_layout(0) =
+      ShapeLayout(LayoutUtil::GetWithDefaultLayout(dot_lhs_shape));
+  *computation_layout.mutable_parameter_layout(1) =
+      ShapeLayout(LayoutUtil::GetWithDefaultLayout(dot_shape));
+  *computation_layout.mutable_result_layout() =
+      ShapeLayout(LayoutUtil::GetWithDefaultLayout(dot_shape));
+
+  result.dot_lhs_fusion_param =
+      fusion_instruction->operand(fused_dot->operand(0)->parameter_number());
+  result.dot_rhs_fusion_param =
+      fusion_instruction->operand(fused_dot->operand(1)->parameter_number());
+  result.addend_fusion_param = fusion_instruction->operand(
+      fused_add->operand(1 - dot_operand_idx_in_add)->parameter_number());
+
+  cpu::CpuLayoutAssignment layout_assignment(&computation_layout);
+  TF_ASSIGN_OR_RETURN(result.layout_assignment_changed_something,
+                      layout_assignment.Run(module));
+
+  return result;
+}
+
+static void AssertCorrectLayoutForDotOutputFusion(
+    const HloComputation* computation,
+    const DotOutputFusionLayoutAssignmentResult& layout_assignment_result,
+    bool expect_col_major_dot_rhs) {
+  Layout expected_dot_rhs_layout = expect_col_major_dot_rhs
+                                       ? LayoutUtil::MakeLayout({0, 1})
+                                       : LayoutUtil::MakeLayout({1, 0});
+  EXPECT_TRUE(LayoutUtil::Equal(
+      expected_dot_rhs_layout,
+      layout_assignment_result.dot_rhs_fusion_param->shape().layout()));
+
+  EXPECT_TRUE(LayoutUtil::Equal(
+      LayoutUtil::MakeLayout({1, 0}),
+      layout_assignment_result.dot_lhs_fusion_param->shape().layout()));
+
+  EXPECT_TRUE(LayoutUtil::Equal(
+      LayoutUtil::MakeLayout({1, 0}),
+      layout_assignment_result.addend_fusion_param->shape().layout()));
+  EXPECT_THAT(computation->instructions(), Each(Not(op::Copy())));
+}
+
+TEST_F(CpuLayoutAssignmentTest, DotOutputFusion_1x50x19_dot_idx_0) {
+  std::unique_ptr<HloModule> module = CreateNewModule();
+  TF_ASSERT_OK_AND_ASSIGN(
+      DotOutputFusionLayoutAssignmentResult layout_assignment_result,
+      RunDotOutputFusion(module.get(), TestName(), /*m=*/1, /*k=*/50, /*n=*/19,
+                         /*dot_operand_idx_in_add=*/0));
+  ASSERT_TRUE(layout_assignment_result.layout_assignment_changed_something);
+  AssertCorrectLayoutForDotOutputFusion(module->entry_computation(),
+                                        layout_assignment_result,
+                                        /*expect_col_major_dot_rhs=*/true);
+}
+
+TEST_F(CpuLayoutAssignmentTest, DotOutputFusion_1x50x19_dot_idx_1) {
+  std::unique_ptr<HloModule> module = CreateNewModule();
+  TF_ASSERT_OK_AND_ASSIGN(
+      DotOutputFusionLayoutAssignmentResult layout_assignment_result,
+      RunDotOutputFusion(module.get(), TestName(), /*m=*/1, /*k=*/50, /*n=*/19,
+                         /*dot_operand_idx_in_add=*/1));
+  ASSERT_TRUE(layout_assignment_result.layout_assignment_changed_something);
+  AssertCorrectLayoutForDotOutputFusion(module->entry_computation(),
+                                        layout_assignment_result,
+                                        /*expect_col_major_dot_rhs=*/true);
+}
+
+TEST_F(CpuLayoutAssignmentTest, DotOutputFusion_19x50x1_dot_idx_0) {
+  std::unique_ptr<HloModule> module = CreateNewModule();
+  TF_ASSERT_OK_AND_ASSIGN(
+      DotOutputFusionLayoutAssignmentResult layout_assignment_result,
+      RunDotOutputFusion(module.get(), TestName(), /*m=*/19, /*k=*/50, /*n=*/1,
+                         /*dot_operand_idx_in_add=*/0));
+  ASSERT_TRUE(layout_assignment_result.layout_assignment_changed_something);
+  AssertCorrectLayoutForDotOutputFusion(module->entry_computation(),
+                                        layout_assignment_result,
+                                        /*expect_col_major_dot_rhs=*/false);
+}
+
+TEST_F(CpuLayoutAssignmentTest, DotOutputFusion_19x50x1_dot_idx_1) {
+  std::unique_ptr<HloModule> module = CreateNewModule();
+  TF_ASSERT_OK_AND_ASSIGN(
+      DotOutputFusionLayoutAssignmentResult layout_assignment_result,
+      RunDotOutputFusion(module.get(), TestName(), /*m=*/19, /*k=*/50, /*n=*/1,
+                         /*dot_operand_idx_in_add=*/1));
+  ASSERT_TRUE(layout_assignment_result.layout_assignment_changed_something);
+  AssertCorrectLayoutForDotOutputFusion(module->entry_computation(),
+                                        layout_assignment_result,
+                                        /*expect_col_major_dot_rhs=*/false);
+}
+
+TEST_F(CpuLayoutAssignmentTest, DotOutputFusion_19x50x19_dot_idx_0) {
+  std::unique_ptr<HloModule> module = CreateNewModule();
+  TF_ASSERT_OK_AND_ASSIGN(
+      DotOutputFusionLayoutAssignmentResult layout_assignment_result,
+      RunDotOutputFusion(module.get(), TestName(), /*m=*/19, /*k=*/50, /*n=*/19,
+                         /*dot_operand_idx_in_add=*/0));
+  ASSERT_TRUE(layout_assignment_result.layout_assignment_changed_something);
+  AssertCorrectLayoutForDotOutputFusion(module->entry_computation(),
+                                        layout_assignment_result,
+                                        /*expect_col_major_dot_rhs=*/false);
+}
+
+TEST_F(CpuLayoutAssignmentTest, DotOutputFusion_19x50x19_dot_idx_1) {
+  std::unique_ptr<HloModule> module = CreateNewModule();
+  TF_ASSERT_OK_AND_ASSIGN(
+      DotOutputFusionLayoutAssignmentResult layout_assignment_result,
+      RunDotOutputFusion(module.get(), TestName(), /*m=*/19, /*k=*/50, /*n=*/19,
+                         /*dot_operand_idx_in_add=*/1));
+  ASSERT_TRUE(layout_assignment_result.layout_assignment_changed_something);
+  AssertCorrectLayoutForDotOutputFusion(module->entry_computation(),
+                                        layout_assignment_result,
+                                        /*expect_col_major_dot_rhs=*/false);
+}
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc
index 7908dc173d79a4a9dcb6127ac344267e27d2b5f2..1ef45dbec39a0880ebb123ba3fcd1fd6c89eb39a 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc
@@ -37,6 +37,7 @@ extern const char* const kEigenMatMulF64SymbolName =
     "__xla_cpu_runtime_EigenMatMulF64";
 extern const char* const kEigenConvF32SymbolName =
     "__xla_cpu_runtime_EigenConvF32";
+extern const char* const kEigenFftSymbolName = "__xla_cpu_runtime_EigenFft";
 extern const char* const kEigenSingleThreadedMatMulF32SymbolName =
     "__xla_cpu_runtime_EigenSingleThreadedMatMulF32";
 extern const char* const kEigenSingleThreadedMatMulF64SymbolName =
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime.h
index 2ade455b8a0a43dda8c93bbb79891439da2e4f75..3e1f08071119c938619d02777513e5b834077118 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.h
@@ -44,6 +44,7 @@ namespace runtime {
 extern const char* const kEigenMatMulF32SymbolName;
 extern const char* const kEigenMatMulF64SymbolName;
 extern const char* const kEigenConvF32SymbolName;
+extern const char* const kEigenFftSymbolName;
 extern const char* const kEigenSingleThreadedMatMulF32SymbolName;
 extern const char* const kEigenSingleThreadedMatMulF64SymbolName;
 extern const char* const kEigenSingleThreadedConvF32SymbolName;
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.cc
index 181deedde71bab3cb9ef1820a88de557131b9311..b1c1142e8d988be2ca00809b4be505466071c72f 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.cc
@@ -19,7 +19,7 @@ limitations under the License.
 
 #include "third_party/eigen3/Eigen/Core"
 
-#ifdef __AVX__
+#ifdef TF_XLA_HAS_AVX
 xla::cpu::runtime::V8F32AVX __xla_cpu_runtime_ExpV8F32AVX(
     xla::cpu::runtime::V8F32AVX x) {
   return Eigen::internal::pexp(x);
@@ -29,7 +29,7 @@ xla::cpu::runtime::V8F32AVX __xla_cpu_runtime_LogV8F32AVX(
     xla::cpu::runtime::V8F32AVX x) {
   return Eigen::internal::plog(x);
 }
-#endif  // __AVX__
+#endif  // TF_XLA_HAS_AVX
 
 namespace xla {
 namespace cpu {
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.h
index acfada8540d89bb098bb0b04e109441e2123e678..e5c782f93f54dc9f8f76fce7e4735a60e8847583 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.h
@@ -24,6 +24,11 @@ limitations under the License.
 
 #include "tensorflow/core/platform/macros.h"
 
+#if defined(__AVX__)
+#include <immintrin.h>
+#define TF_XLA_HAS_AVX
+#endif
+
 namespace xla {
 namespace cpu {
 namespace runtime {
@@ -31,21 +36,25 @@ namespace runtime {
 extern const char *const kExpV8F32AVXSymbolName;
 extern const char *const kLogV8F32AVXSymbolName;
 
-typedef float V8F32AVX __attribute__((__vector_size__(32)));
+#ifdef TF_XLA_HAS_AVX
+typedef __m256 V8F32AVX;
+#endif
 }  // namespace runtime
 }  // namespace cpu
 }  // namespace xla
 
 extern "C" {
 
+#ifdef TF_XLA_HAS_AVX
 // The following functions are vectorized versions of a selection of libm
 // library functions.
 // References to these functions are created by the LLVM vectorizer.
 xla::cpu::runtime::V8F32AVX __xla_cpu_runtime_ExpV8F32AVX(
-    xla::cpu::runtime::V8F32AVX x) TF_ATTRIBUTE_WEAK;
+    xla::cpu::runtime::V8F32AVX x);
 
 xla::cpu::runtime::V8F32AVX __xla_cpu_runtime_LogV8F32AVX(
-    xla::cpu::runtime::V8F32AVX x) TF_ATTRIBUTE_WEAK;
+    xla::cpu::runtime::V8F32AVX x);
+#endif
 }
 
 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_AVX_H_
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.cc
index abe792b2787ce8baf56ee62585a0ab886d922a23..8099b722f10ecb83f7cf6c58ba2abb783478b97f 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.cc
@@ -19,7 +19,7 @@ limitations under the License.
 
 #include "third_party/eigen3/Eigen/Core"
 
-#ifdef __ARM_NEON__
+#ifdef TF_XLA_HAS_NEON
 
 xla::cpu::runtime::V4F32NEON __xla_cpu_runtime_ExpV4F32NEON(
     xla::cpu::runtime::V4F32NEON x) {
@@ -32,7 +32,7 @@ xla::cpu::runtime::V4F32NEON __xla_cpu_runtime_LogV4F32NEON(
   return Eigen::internal::plog(p);
 }
 
-#endif  // __ARM_NEON__
+#endif  // TF_XLA_HAS_NEON
 
 namespace xla {
 namespace cpu {
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h
index 75cb16b273973d2bf665d378084343fd612a2941..2f5d1a872aaf3868d6d27f88a4f05c778d45660f 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h
@@ -27,6 +27,7 @@ limitations under the License.
 // __attribute__((__vector_size__(*))).  Unfortunately, the typedef for the ARM
 // NEON SIMD types is not portable, so the type has to come from <arm_neon.h>
 #include <arm_neon.h>
+#define TF_XLA_HAS_NEON
 #endif  // __ARM_NEON__
 
 namespace xla {
@@ -36,12 +37,9 @@ namespace runtime {
 extern const char *const kExpV4F32NEONSymbolName;
 extern const char *const kLogV4F32NEONSymbolName;
 
-#ifdef __ARM_NEON__
+#ifdef TF_XLA_HAS_NEON
 typedef float32x4_t V4F32NEON;
-#else
-// On non-ARM platforms ensure the declaration is present
-struct V4F32NEON;
-#endif  // __ARM_NEON__
+#endif  // TF_XLA_HAS_NEON
 
 }  // namespace runtime
 }  // namespace cpu
@@ -49,14 +47,16 @@ struct V4F32NEON;
 
 extern "C" {
 
+#ifdef TF_XLA_HAS_NEON
 // The following functions are vectorized versions of a selection of libm
 // library functions.
 // References to these functions are created by the LLVM vectorizer.
 xla::cpu::runtime::V4F32NEON __xla_cpu_runtime_ExpV4F32NEON(
-    xla::cpu::runtime::V4F32NEON x) TF_ATTRIBUTE_WEAK;
+    xla::cpu::runtime::V4F32NEON x);
 
 xla::cpu::runtime::V4F32NEON __xla_cpu_runtime_LogV4F32NEON(
-    xla::cpu::runtime::V4F32NEON x) TF_ATTRIBUTE_WEAK;
+    xla::cpu::runtime::V4F32NEON x);
+#endif  // TF_XLA_HAS_NEON
 }
 
 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_NEON_H_
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.cc
index a9a45db5a424d2faecbd437542c41fbd7fdf0bb8..d8ecf231cc8c859ac88e1ef1478f7107cd86a052 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.cc
@@ -19,7 +19,7 @@ limitations under the License.
 
 #include "third_party/eigen3/Eigen/Core"
 
-#ifdef __SSE4_1__
+#ifdef TF_XLA_HAS_SSE4_1
 
 xla::cpu::runtime::V4F32SSE __xla_cpu_runtime_ExpV4F32SSE(
     xla::cpu::runtime::V4F32SSE x) {
@@ -33,7 +33,7 @@ xla::cpu::runtime::V4F32SSE __xla_cpu_runtime_LogV4F32SSE(
   return Eigen::internal::plog(p);
 }
 
-#endif  // __SSE4_1__
+#endif  // TF_XLA_HAS_SSE4_1
 
 namespace xla {
 namespace cpu {
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h
index 96587d10d2b86e14ff6a7400fdf14ca0d994ddc5..aeb1eda23f76a6b5cb520b6673e0a011fa1130c7 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h
@@ -24,6 +24,13 @@ limitations under the License.
 
 #include "tensorflow/core/platform/macros.h"
 
+// MSVC does not have __SSE4_1__ macro. Eigen enables EIGEN_VECTORIZE_SSE4_1
+// when __AVX__ is defined, we should do the same.
+#if defined(__SSE4_1__) || (defined(_MSC_VER) && defined(__AVX__))
+#include <smmintrin.h>
+#define TF_XLA_HAS_SSE4_1
+#endif
+
 namespace xla {
 namespace cpu {
 namespace runtime {
@@ -31,7 +38,9 @@ namespace runtime {
 extern const char *const kExpV4F32SSESymbolName;
 extern const char *const kLogV4F32SSESymbolName;
 
-typedef float V4F32SSE __attribute__((__vector_size__(16)));
+#ifdef TF_XLA_HAS_SSE4_1
+typedef __m128 V4F32SSE;
+#endif
 
 }  // namespace runtime
 }  // namespace cpu
@@ -39,14 +48,16 @@ typedef float V4F32SSE __attribute__((__vector_size__(16)));
 
 extern "C" {
 
+#ifdef TF_XLA_HAS_SSE4_1
 // The following functions are vectorized versions of a selection of libm
 // library functions.
 // References to these functions are created by the LLVM vectorizer.
 xla::cpu::runtime::V4F32SSE __xla_cpu_runtime_ExpV4F32SSE(
-    xla::cpu::runtime::V4F32SSE x) TF_ATTRIBUTE_WEAK;
+    xla::cpu::runtime::V4F32SSE x);
 
 xla::cpu::runtime::V4F32SSE __xla_cpu_runtime_LogV4F32SSE(
-    xla::cpu::runtime::V4F32SSE x) TF_ATTRIBUTE_WEAK;
+    xla::cpu::runtime::V4F32SSE x);
+#endif
 }
 
 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_SSE4_1_H_
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_transfer_manager.cc b/tensorflow/compiler/xla/service/cpu/cpu_transfer_manager.cc
index b53719fcc260d706eab3d7460c42af4a1b5e775f..f5e61aef534da57ce13d3ee9bbeaeaec31f53d2e 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_transfer_manager.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_transfer_manager.cc
@@ -98,7 +98,7 @@ Status CpuTransferManager::TransferLiteralToInfeed(se::StreamExecutor* executor,
 
   if (!ShapeUtil::IsTuple(shape)) {
     int64 size = GetByteSizeRequirement(shape);
-    return TransferBufferToInfeed(executor, size, literal.InternalData());
+    return TransferBufferToInfeed(executor, size, literal.untyped_data());
   }
 
   if (ShapeUtil::IsNestedTuple(shape)) {
@@ -111,20 +111,20 @@ Status CpuTransferManager::TransferLiteralToInfeed(se::StreamExecutor* executor,
   // enqueue the resulting destination device addresses with the
   // infeed manager.
   std::vector<cpu::runtime::XfeedBuffer*> buffers;
-  buffers.reserve(literal.tuple_literals_size());
+  buffers.reserve(ShapeUtil::TupleElementCount(shape));
   auto cleanup = tensorflow::gtl::MakeCleanup([&buffers]() {
     for (cpu::runtime::XfeedBuffer* b : buffers) {
       b->Done(Cancelled("Failed to infeed buffer to device."));
     }
   });
 
-  for (const auto& tuple_element : literal.tuple_literals()) {
-    const Shape& tuple_element_shape = tuple_element.shape();
+  for (int64 i = 0; i < ShapeUtil::TupleElementCount(shape); ++i) {
+    const Shape& tuple_element_shape = ShapeUtil::GetSubshape(shape, {i});
     int64 tuple_element_size = GetByteSizeRequirement(tuple_element_shape);
     TF_ASSIGN_OR_RETURN(
         cpu::runtime::XfeedBuffer * buffer,
         TransferBufferToInfeedInternal(executor, tuple_element_size,
-                                       tuple_element.InternalData()));
+                                       literal.untyped_data({i})));
     buffers.push_back(buffer);
   }
 
@@ -187,14 +187,14 @@ Status CpuTransferManager::TransferLiteralFromOutfeed(
         literal_shape.element_type(), dimensions));
     TF_ASSIGN_OR_RETURN(Shape received_shape,
                         TransferArrayBufferFromOutfeed(
-                            executor, literal->MutableInternalData(), size));
+                            executor, literal->untyped_data(), size));
     TF_RET_CHECK(ShapeUtil::Compatible(received_shape, literal->shape()))
         << "Shape received from outfeed "
         << ShapeUtil::HumanString(received_shape)
         << " did not match the shape that was requested for outfeed: "
         << ShapeUtil::HumanString(literal_shape);
     TF_RET_CHECK(size == GetByteSizeRequirement(received_shape));
-    *literal->mutable_shape() = received_shape;
+    *literal->mutable_shape_do_not_use() = received_shape;
     return Status::OK();
   }
 
@@ -217,7 +217,7 @@ Status CpuTransferManager::TransferLiteralFromOutfeed(
     auto empty = Literal::CreateFromDimensions(
         tuple_element_shape.element_type(), dimensions);
     int64 size = GetByteSizeRequirement(tuple_element_shape);
-    buffer_data.push_back({empty->MutableInternalData(), size});
+    buffer_data.push_back({empty->untyped_data(), size});
     elements.push_back(std::move(empty));
   }
 
@@ -233,7 +233,7 @@ Status CpuTransferManager::TransferLiteralFromOutfeed(
                GetByteSizeRequirement(received_shape));
 
   for (int64 i = 0; i < literal_shape.tuple_shapes_size(); ++i) {
-    *elements[i]->mutable_shape() = received_shape.tuple_shapes(i);
+    *elements[i]->mutable_shape_do_not_use() = received_shape.tuple_shapes(i);
   }
   *literal = std::move(*Literal::MakeTupleOwned(std::move(elements)));
   TF_RET_CHECK(ShapeUtil::Equal(literal->shape(), literal_shape));
diff --git a/tensorflow/compiler/xla/service/cpu/disassembler.h b/tensorflow/compiler/xla/service/cpu/disassembler.h
index b6feaa7e45cee26eb7f850081bd1fad2cb63b15c..5e302f88990ee4a3c37758881ecec4d6f71dd8e6 100644
--- a/tensorflow/compiler/xla/service/cpu/disassembler.h
+++ b/tensorflow/compiler/xla/service/cpu/disassembler.h
@@ -37,7 +37,7 @@ struct DisassemblerResult {
   DisassemblerResult(const string& text, size_t code_size_bytes)
       : text(text), code_size_bytes(code_size_bytes) {}
 
-  // The dissassembled text sections of the object file.
+  // The disassembled text sections of the object file.
   string text;
   // The total number of bytes of executable code in the object file.
   uint64_t code_size_bytes;
@@ -53,7 +53,7 @@ class Disassembler {
   // Returns a DisassemblerResult for the given object file, containing the
   // disassembled code.
   //
-  // If we couldnt' retrieve a disassembler for this platform, an error status
+  // If we couldn't retrieve a disassembler for this platform, an error status
   // is returned.
   StatusOr<DisassemblerResult> DisassembleObjectFile(
       const llvm::object::ObjectFile& object_file) const;
diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
index 4c40dae5122b0853a72d6428fc120220e3a69237..c9fc586b9a4c06eb9e1f111d8f9bd2f717990aab 100644
--- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
@@ -23,10 +23,11 @@ limitations under the License.
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Value.h"
 #include "tensorflow/compiler/xla/service/cpu/cpu_runtime.h"
+#include "tensorflow/compiler/xla/service/cpu/target_machine_features.h"
+#include "tensorflow/compiler/xla/service/cpu/vector_support_library.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
-#include "tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/util.h"
@@ -143,7 +144,8 @@ class ColumnMajorMatrixVectorProductEmitter {
   ColumnMajorMatrixVectorProductEmitter(PrimitiveType scalar_type,
                                         int64 tile_rows, int64 tile_cols,
                                         int64 m, int64 k, llvm::Value* lhs,
-                                        llvm::Value* rhs, llvm::Value* result,
+                                        llvm::Value* rhs, llvm::Value* addend,
+                                        llvm::Value* result,
                                         llvm::IRBuilder<>* ir_builder)
       : scalar_type_(scalar_type),
         tile_rows_(tile_rows),
@@ -152,6 +154,7 @@ class ColumnMajorMatrixVectorProductEmitter {
         k_(k),
         lhs_(lhs),
         rhs_(rhs),
+        addend_(addend),
         result_(result),
         ir_builder_(ir_builder),
         ksl_(ir_builder_),
@@ -173,7 +176,7 @@ class ColumnMajorMatrixVectorProductEmitter {
   }
 
   // Load a tile of values from the RHS.  For the RHS a "tile" is a contiguous
-  // sequnce of `count` values, each one broadcasted to the vector width.
+  // sequence of `count` values, each one broadcasted to the vector width.
   std::vector<llvm::Value*> LoadRhsTile(llvm::Value* offset, int64 count) {
     llvm::Value* base_pointer = vsl_.ComputeOffsetPointer(rhs_, offset);
     std::vector<llvm::Value*> result;
@@ -198,6 +201,7 @@ class ColumnMajorMatrixVectorProductEmitter {
   int64 k_;
   llvm::Value* lhs_;
   llvm::Value* rhs_;
+  llvm::Value* addend_;
   llvm::Value* result_;
   llvm::IRBuilder<>* ir_builder_;
   KernelSupportLibrary ksl_;
@@ -242,9 +246,10 @@ void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopTiled(
            /*step=*/tile_rows_, [&](llvm::Value* row) {
              std::vector<llvm::Value*> lhs_tile =
                  lhs_tile_loader->LoadTile(/*minor_dim_offset=*/row);
-             llvm::Value* accumulator = is_first_column
-                                            ? vsl_.GetZeroVector()
-                                            : vsl_.LoadVector(result_, row);
+             llvm::Value* accumulator =
+                 is_first_column ? (addend_ ? vsl_.LoadVector(addend_, row)
+                                            : vsl_.GetZeroVector())
+                                 : vsl_.LoadVector(result_, row);
              for (int i = 0; i < columns; i++) {
                accumulator = vsl_.MulAdd(lhs_tile[i], rhs_tile[i], accumulator);
              }
@@ -288,7 +293,18 @@ void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue(
                   ir_builder_->getInt1(is_first_tiled_column));
               ksl_.If(
                   setting_result_first_time,
-                  [&]() { vsl_.StoreScalar(product, result_, scalar_row); },
+                  /*true_block_generator=*/
+                  [&]() {
+                    if (addend_) {
+                      vsl_.StoreScalar(
+                          vsl_.Add(vsl_.LoadScalar(addend_, scalar_row),
+                                   product),
+                          result_, scalar_row);
+                    } else {
+                      vsl_.StoreScalar(product, result_, scalar_row);
+                    }
+                  },
+                  /*false_block_generator=*/
                   [&]() {
                     vsl_.StoreScalar(
                         vsl_.Add(vsl_.LoadScalar(result_, scalar_row), product),
@@ -353,7 +369,7 @@ class RowMajorMatrixVectorProductEmitter {
   RowMajorMatrixVectorProductEmitter(PrimitiveType scalar_type, int64 tile_rows,
                                      int64 tile_cols, int64 m, int64 k,
                                      llvm::Value* lhs, llvm::Value* rhs,
-                                     llvm::Value* result,
+                                     llvm::Value* addend, llvm::Value* result,
                                      llvm::IRBuilder<>* ir_builder)
       : scalar_type_(scalar_type),
         tile_rows_(tile_rows),
@@ -362,6 +378,7 @@ class RowMajorMatrixVectorProductEmitter {
         k_(k),
         lhs_(lhs),
         rhs_(rhs),
+        addend_(addend),
         result_(result),
         ir_builder_(ir_builder),
         ksl_(ir_builder_),
@@ -394,6 +411,7 @@ class RowMajorMatrixVectorProductEmitter {
   int64 k_;
   llvm::Value* lhs_;
   llvm::Value* rhs_;
+  llvm::Value* addend_;
   llvm::Value* result_;
   llvm::IRBuilder<>* ir_builder_;
   KernelSupportLibrary ksl_;
@@ -415,11 +433,32 @@ void RowMajorMatrixVectorProductEmitter::EmitOuterLoopBody(llvm::Value* row,
   EmitInnerLoopEpilogue(/*current_tile_row=*/row, /*rows=*/row_count,
                         &scalar_accumulators);
 
+  std::vector<llvm::Value*> accumulator_values;
+  std::transform(
+      vector_accumulators.begin(), vector_accumulators.end(),
+      std::back_inserter(accumulator_values),
+      [](const VectorVariable& vector_var) { return vector_var.Get(); });
+
+  std::vector<llvm::Value*> horizontal_sums;
+  if (row_count == vsl_.vector_size()) {
+    if (addend_) {
+      horizontal_sums = vsl_.ComputeHorizontalSums(
+          std::move(accumulator_values), vsl_.LoadVector(addend_, row));
+    } else {
+      horizontal_sums =
+          vsl_.ComputeHorizontalSums(std::move(accumulator_values));
+    }
+  } else {
+    horizontal_sums = vsl_.ComputeHorizontalSums(std::move(accumulator_values));
+  }
+
   for (int i = 0; i < row_count; i++) {
     llvm::Value* result_value =
-        vsl_.Add(vsl_.AddReduce(vector_accumulators[i].Get()),
-                 scalar_accumulators[i].Get());
+        vsl_.Add(horizontal_sums[i], scalar_accumulators[i].Get());
     llvm::Value* offset = ir_builder_->CreateAdd(ir_builder_->getInt64(i), row);
+    if (addend_ && row_count != vsl_.vector_size()) {
+      result_value = vsl_.Add(vsl_.LoadScalar(addend_, offset), result_value);
+    }
     vsl_.StoreScalar(result_value, result_, offset);
   }
 }
@@ -483,49 +522,52 @@ void RowMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue(
 
 }  // namespace
 
-DotOpEmitter::DotOpEmitter(const HloInstruction& dot, bool transpose_lhs,
-                           bool transpose_rhs,
-                           const llvm_ir::IrArray& target_array,
-                           const llvm_ir::IrArray& lhs_array,
-                           const llvm_ir::IrArray& rhs_array,
-                           llvm::Value* executable_run_options_value,
-                           llvm::IRBuilder<>* ir_builder,
-                           const HloModuleConfig& hlo_module_config)
+DotOpEmitter::DotOpEmitter(
+    const HloInstruction& dot, bool transpose_lhs, bool transpose_rhs,
+    const llvm_ir::IrArray& target_array, const llvm_ir::IrArray& lhs_array,
+    const llvm_ir::IrArray& rhs_array, const llvm_ir::IrArray* addend_array,
+    llvm::Value* executable_run_options_value, llvm::IRBuilder<>* ir_builder,
+    const HloModuleConfig& hlo_module_config,
+    const TargetMachineFeatures& target_machine_features)
     : dot_(dot),
       transpose_lhs_(transpose_lhs),
       transpose_rhs_(transpose_rhs),
       target_array_(target_array),
       lhs_array_(lhs_array),
       rhs_array_(rhs_array),
+      addend_array_(addend_array),
       executable_run_options_value_(executable_run_options_value),
       ir_builder_(ir_builder),
-      hlo_module_config_(hlo_module_config) {}
+      hlo_module_config_(hlo_module_config),
+      target_machine_features_(target_machine_features) {}
 
 /* static */ tensorflow::Status DotOpEmitter::EmitDotOperation(
     const HloInstruction& dot, bool transpose_lhs, bool transpose_rhs,
     const llvm_ir::IrArray& target_array, const llvm_ir::IrArray& lhs_array,
-    const llvm_ir::IrArray& rhs_array,
+    const llvm_ir::IrArray& rhs_array, const llvm_ir::IrArray* addend_array,
     llvm::Value* executable_run_options_value, llvm::IRBuilder<>* ir_builder,
-    const HloModuleConfig& hlo_module_config) {
+    const HloModuleConfig& hlo_module_config,
+    const TargetMachineFeatures& target_machine_features) {
   PrimitiveType type = target_array.GetShape().element_type();
   TF_RET_CHECK(F32 == type || F64 == type || C64 == type);
   DotOpEmitter dot_emitter(dot, transpose_lhs, transpose_rhs, target_array,
-                           lhs_array, rhs_array, executable_run_options_value,
-                           ir_builder, hlo_module_config);
+                           lhs_array, rhs_array, addend_array,
+                           executable_run_options_value, ir_builder,
+                           hlo_module_config, target_machine_features);
   return dot_emitter.Emit();
 }
 
 bool DotOpEmitter::ShapesAreLegalForRuntimeDot() const { return true; }
 
 bool DotOpEmitter::EmitLlvmIrDotIfProfitable() {
-  if (dot_.shape().dimensions_size() != 2 ||
-      ProfitableToImplementDotInUntiledLlvmIr(dot_) ==
-          DotInLlvmIrProfitable::kYes) {
+  if (dot_.shape().dimensions_size() != 2) {
     return false;
   }
 
-  if (!primitive_util::IsFloatingPointType(dot_.shape().element_type()) &&
-      !primitive_util::IsIntegralType(dot_.shape().element_type())) {
+  PrimitiveType primitive_type = dot_.shape().element_type();
+
+  if (!primitive_util::IsFloatingPointType(primitive_type) &&
+      !primitive_util::IsIntegralType(primitive_type)) {
     return false;
   }
 
@@ -575,30 +617,76 @@ bool DotOpEmitter::EmitLlvmIrDotIfProfitable() {
   int64 tiling_factor = GetGemvTilingFactor();
   CHECK_GT(tiling_factor, 0);
 
+  llvm::Value* result_op = target_array_.GetBasePointer();
+  llvm::Value* lhs_op =
+      swap_operands ? rhs_array_.GetBasePointer() : lhs_array_.GetBasePointer();
+  llvm::Value* rhs_op =
+      swap_operands ? lhs_array_.GetBasePointer() : rhs_array_.GetBasePointer();
+
+  const bool enable_fast_math =
+      hlo_module_config_.debug_options().xla_enable_fast_math();
+  const bool optimize_for_size =
+      options::OptimizeForSizeRequested(hlo_module_config_);
+
+  const int target_vector_register_element_size =
+      target_machine_features_.vector_register_num_elements(
+          *ir_builder_->GetInsertBlock()->getParent(), primitive_type);
+
+  // We may not always know the vector register size for the target we're
+  // compiling against, in which case target_vector_register_element_size is 0.
+  // In these cases we choose a default LLVM IR register size.
+  const int kUnknownTargetVectorRegisterSize = 4;
+  const int vector_register_element_size =
+      target_vector_register_element_size == 0
+          ? kUnknownTargetVectorRegisterSize
+          : target_vector_register_element_size;
+
   if (is_column_major_matrix_vector) {
     VLOG(2) << "Emitting column major matrix-vector multiply with m = " << m
             << " and k = " << k;
-    ColumnMajorMatrixVectorProductEmitter emitter(
-        dot_.shape().element_type(), /*tile_rows=*/8,
-        /*tile_cols=*/tiling_factor, m, k,
-        swap_operands ? rhs_array_.GetBasePointer()
-                      : lhs_array_.GetBasePointer(),
-        swap_operands ? lhs_array_.GetBasePointer()
-                      : rhs_array_.GetBasePointer(),
-        target_array_.GetBasePointer(), ir_builder_);
-    emitter.Emit();
+    int64 tile_rows = vector_register_element_size;
+    int64 tile_cols = tiling_factor;
+
+    string kernel_name = tensorflow::strings::StrCat(
+        "col_major_gemv_", PrimitiveType_Name(primitive_type), "_", tile_rows,
+        "_", tile_cols, "_", m, "_", k, addend_array_ ? "_with_addend" : "");
+
+    KernelSupportLibrary::EmitAndCallOutlinedKernel(
+        /*enable_fast_math=*/enable_fast_math,
+        /*optimize_for_size=*/optimize_for_size, ir_builder_, kernel_name,
+        lhs_op, rhs_op,
+        addend_array_ ? addend_array_->GetBasePointer() : nullptr, result_op,
+        [this, tile_rows, tile_cols, m, k, primitive_type](
+            llvm::Value* lhs_op, llvm::Value* rhs_op, llvm::Value* addend_op,
+            llvm::Value* result_op) {
+          ColumnMajorMatrixVectorProductEmitter emitter(
+              primitive_type, tile_rows, tile_cols, m, k, lhs_op, rhs_op,
+              addend_op, result_op, ir_builder_);
+          emitter.Emit();
+        });
   } else {
     VLOG(2) << "Emitting row major matrix-vector multiply with m = " << m
             << " and k = " << k;
-    RowMajorMatrixVectorProductEmitter emitter(
-        dot_.shape().element_type(), /*tile_rows=*/tiling_factor,
-        /*tile_cols=*/8, m, k,
-        swap_operands ? rhs_array_.GetBasePointer()
-                      : lhs_array_.GetBasePointer(),
-        swap_operands ? lhs_array_.GetBasePointer()
-                      : rhs_array_.GetBasePointer(),
-        target_array_.GetBasePointer(), ir_builder_);
-    emitter.Emit();
+    int64 tile_rows = tiling_factor;
+    int64 tile_cols = vector_register_element_size;
+
+    string kernel_name = tensorflow::strings::StrCat(
+        "row_major_gemv_", PrimitiveType_Name(primitive_type), "_", tile_rows,
+        "_", tile_cols, "_", m, "_", k, addend_array_ ? "_with_addend" : "");
+
+    KernelSupportLibrary::EmitAndCallOutlinedKernel(
+        /*enable_fast_math=*/enable_fast_math,
+        /*optimize_for_size=*/optimize_for_size, ir_builder_, kernel_name,
+        lhs_op, rhs_op,
+        addend_array_ ? addend_array_->GetBasePointer() : nullptr, result_op,
+        [this, tile_rows, tile_cols, m, k, primitive_type](
+            llvm::Value* lhs_op, llvm::Value* rhs_op, llvm::Value* addend_op,
+            llvm::Value* result_op) {
+          RowMajorMatrixVectorProductEmitter emitter(
+              primitive_type, tile_rows, tile_cols, m, k, lhs_op, rhs_op,
+              addend_op, result_op, ir_builder_);
+          emitter.Emit();
+        });
   }
 
   return true;
@@ -641,6 +729,8 @@ tensorflow::Status DotOpEmitter::Emit() {
     return Status::OK();
   }
 
+  CHECK_EQ(addend_array_, nullptr);
+
   if (PotentiallyImplementedAsEigenDot(dot_)) {
     return EmitCallToRuntime();
   }
@@ -915,8 +1005,8 @@ DotOpEmitter::MatMultDims DotOpEmitter::GetMatMultDims() const {
   return {lhs_shape.dimensions(transpose_lhs_ ? 1 : 0),
           lhs_shape.dimensions(transpose_lhs_ ? 0 : 1),
           rhs_shape.dimensions(transpose_rhs_ ? 0 : 1),
-          lhs_shape.layout().minor_to_major(0) == 0,
-          rhs_shape.layout().minor_to_major(0) == 0};
+          LayoutUtil::Minor(lhs_shape.layout(), 0) == 0,
+          LayoutUtil::Minor(rhs_shape.layout(), 0) == 0};
 }
 
 llvm_ir::IrArray::Index DotOpEmitter::EmitOperandArrayLoopNest(
@@ -927,8 +1017,8 @@ llvm_ir::IrArray::Index DotOpEmitter::EmitOperandArrayLoopNest(
   // reduction dimension.
   std::vector<int64> dimensions;
   const Shape& shape = operand_array.GetShape();
-  for (int i = shape.layout().minor_to_major_size() - 1; i >= 0; --i) {
-    int64 dimension = shape.layout().minor_to_major(i);
+  for (int i = LayoutUtil::MinorToMajor(shape).size() - 1; i >= 0; --i) {
+    int64 dimension = LayoutUtil::Minor(shape.layout(), i);
     if (dimension != reduction_dimension) {
       dimensions.push_back(dimension);
     }
@@ -977,9 +1067,7 @@ bool PotentiallyImplementedAsEigenDot(const HloInstruction& hlo) {
       return false;
     }
 
-    if (ProfitableToImplementDotInUntiledLlvmIr(hlo) ==
-            DotInLlvmIrProfitable::kYes ||
-        ProfitableToImplementDotInTiledLlvmIr(hlo)) {
+    if (ProfitableToImplementDotInTiledLlvmIr(hlo)) {
       return false;
     }
 
@@ -1010,46 +1098,42 @@ bool PotentiallyImplementedAsEigenDot(const HloInstruction& hlo) {
   return false;
 }
 
-DotInLlvmIrProfitable ProfitableToImplementDotInUntiledLlvmIr(
-    const HloInstruction& dot) {
-  if (dot.opcode() == HloOpcode::kDot && dot.shape().dimensions_size() == 2) {
-    const Shape& result_shape = dot.shape();
-    // kReductionDimensionThresholdBytes was chosen to be 1/4 of a typical L1
-    // cache line size, so that we can have the reduction dimension of both the
-    // LHS and RHS matrices and still have some space "left over".  This needs
-    // to be tuned further.
-    const int64 kReductionDimensionThresholdBytes = 8 * 1024;
-    const bool single_threaded_eigen =
-        !dot.GetModule()->config().debug_options().xla_cpu_multi_thread_eigen();
-
-    // This is the point at which it is better to call into Eigen and shard the
-    // dot across multiple worker threads.  This is a rough estimate by running
-    // a matmult benchmark on my local machine, and it can be tuned further.
-    const int64 kMaxSingleThreadedFlops = 16 * 1024;
-
-    const int64 M = result_shape.dimensions(0);
-    const int64 N = result_shape.dimensions(1);
-    const int64 K = dot.operand(1)->shape().dimensions(0);
-    const int64 primitive_type_size =
-        ShapeUtil::ByteSizeOfPrimitiveType(result_shape.element_type());
-    if (M == 1 &&
-        K * primitive_type_size <= kReductionDimensionThresholdBytes &&
-        (single_threaded_eigen || M * K * N <= kMaxSingleThreadedFlops)) {
-      // Heuristics:
-      //
-      //  - Look for a configuration where we will likely be able to keep LHS in
-      //    L1 and do a cache-optimal traversal of RHS.
-      //
-      //  - Bail out on matrices that are large enough that Eigen can profitably
-      //    shard the computation across multiple cores.  This only applies when
-      //    multi-threading is enabled.
-      return LayoutUtil::IsMonotonicWithDim0Major(
-                 dot.operand(1)->shape().layout())
-                 ? DotInLlvmIrProfitable::kWithColumnMajorRhs
-                 : DotInLlvmIrProfitable::kYes;
+// For vector-matrix dot products, it is always profitable to make the Rhs
+// column major.
+tensorflow::gtl::optional<int64> ProfitableToMakeDotOperandColumnMajor(
+    const HloInstruction& hlo) {
+  if (hlo.opcode() == HloOpcode::kDot && hlo.shape().dimensions_size() == 2 &&
+      hlo.shape().dimensions(0) == 1) {
+    if (hlo.dot_dimension_numbers().rhs_contracting_dimensions(0) == 0) {
+      return 1;
+    }
+    return {};
+  }
+
+  if (hlo.opcode() == HloOpcode::kFusion &&
+      hlo.fusion_kind() == HloInstruction::FusionKind::kOutput) {
+    auto* fusion_root =
+        hlo.fused_instructions_computation()->root_instruction();
+    if (fusion_root->opcode() != HloOpcode::kAdd) {
+      return {};
+    }
+
+    for (auto* fusion_root_op : fusion_root->operands()) {
+      if (fusion_root_op->opcode() != HloOpcode::kDot) {
+        continue;
+      }
+      if (auto operand_num =
+              ProfitableToMakeDotOperandColumnMajor(*fusion_root_op)) {
+        auto* operand = fusion_root_op->operand(*operand_num);
+        if (operand->opcode() == HloOpcode::kParameter &&
+            operand->user_count() == 1) {
+          return operand->parameter_number();
+        }
+      }
     }
   }
-  return DotInLlvmIrProfitable::kNo;
+
+  return {};
 }
 
 bool ProfitableToImplementDotInTiledLlvmIr(const HloInstruction& dot) {
diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h
index c9168ccc0f6629c2a2bfbc7d4dc9c7ebab0a5708..9d748eb81f7850f3ccdb10f076eecfdc8326c05f 100644
--- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h
+++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include "llvm/IR/IRBuilder.h"
 #include "tensorflow/compiler/xla/service/cpu/cpu_options.h"
+#include "tensorflow/compiler/xla/service/cpu/target_machine_features.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module_config.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h"
@@ -32,19 +33,11 @@ namespace cpu {
 
 bool PotentiallyImplementedAsEigenDot(const HloInstruction& hlo);
 
-enum class DotInLlvmIrProfitable { kYes, kNo, kWithColumnMajorRhs };
-
-// Returns a value to indicate if (and under what conditions) will lowering
-// |dot| as a untiled LLVM IR dot operation be profitable over calling into
-// Eigen or emitting a tiled LLVM IR implementation.  Possible return values
-// are:
-//
-//  * DotInLlvmIrProfitable::kYes - always profitable.
-//  * DotInLlvmIrProfitable::kNo - never profitable.
-//  * DotInLlvmIrProfitable::kWithColumnMajorRhs - only if we can manage to make
-//    the Rhs layout column major.
-DotInLlvmIrProfitable ProfitableToImplementDotInUntiledLlvmIr(
-    const HloInstruction& dot);
+// Returns the index for an operand to `hlo` that should ideally be column
+// major.  Returns nullopt if there is no such operand or if `hlo` is not a dot
+// or a fusion containing a dot.
+tensorflow::gtl::optional<int64> ProfitableToMakeDotOperandColumnMajor(
+    const HloInstruction& hlo);
 
 // Returns true to indicate that we can generate a tiled LLVM IR implementation
 // for |dot|.
@@ -57,21 +50,29 @@ class DotOpEmitter {
   // place the result in target_array. IR is emitted at current insert point of
   // the builder. Upon completion of the method, the insert point is set to the
   // end of all instructions emitted for this operation.
+  //
+  // If `addend_array` is not nullptr then it must be an array of the same
+  // dimensions as the result, and the result is computed as `addend_array` +
+  // dot(`lhs_array`, `rhs_array`).  A non-null `addend_array` is only supported
+  // for Matrix-vector products.
   static tensorflow::Status EmitDotOperation(
       const HloInstruction& dot, bool transpose_lhs, bool transpose_rhs,
       const llvm_ir::IrArray& target_array, const llvm_ir::IrArray& lhs_array,
-      const llvm_ir::IrArray& rhs_array,
+      const llvm_ir::IrArray& rhs_array, const llvm_ir::IrArray* addend_array,
       llvm::Value* executable_run_options_value, llvm::IRBuilder<>* ir_builder,
-      const HloModuleConfig& hlo_module_config);
+      const HloModuleConfig& hlo_module_config,
+      const TargetMachineFeatures& target_machine_features);
 
  private:
   DotOpEmitter(const HloInstruction& dot, bool transpose_lhs,
                bool transpose_rhs, const llvm_ir::IrArray& target_array,
                const llvm_ir::IrArray& lhs_array,
                const llvm_ir::IrArray& rhs_array,
+               const llvm_ir::IrArray* addend_array,
                llvm::Value* executable_run_options_value,
                llvm::IRBuilder<>* ir_builder,
-               const HloModuleConfig& hlo_module_config);
+               const HloModuleConfig& hlo_module_config,
+               const TargetMachineFeatures& target_machine_features);
 
   // Emits the IR to perform the dot operation.
   tensorflow::Status Emit();
@@ -140,9 +141,11 @@ class DotOpEmitter {
   const llvm_ir::IrArray& target_array_;
   const llvm_ir::IrArray& lhs_array_;
   const llvm_ir::IrArray& rhs_array_;
+  const llvm_ir::IrArray* addend_array_;
   llvm::Value* executable_run_options_value_;
   llvm::IRBuilder<>* ir_builder_;
   const HloModuleConfig& hlo_module_config_;
+  const TargetMachineFeatures& target_machine_features_;
 };
 
 }  // namespace cpu
diff --git a/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.cc
index ba693ec89ab7c4090f8c9d1e4d65f17a80d0ac55..ebd96c4c42759b71b79408c73814605301af03c1 100644
--- a/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.cc
@@ -44,15 +44,11 @@ StatusOr<llvm::Value*> CpuElementalIrEmitter::EmitFloatUnaryOp(
         default:
           return Unimplemented("tanh");
       }
-      // Create function type for the function.
-      llvm::FunctionType* function_type = llvm::FunctionType::get(
-          llvm_ir::PrimitiveTypeToIrType(element_type, module_),
-          llvm_ir::PrimitiveTypeToIrType(element_type, module_),
-          /*isVarArg=*/false);
       // Create function declaration for 'tanhf'.
       llvm::Function* function =
           llvm::cast<llvm::Function>(module_->getOrInsertFunction(
-              llvm_ir::AsStringRef(function_name), function_type));
+              llvm_ir::AsStringRef(function_name), operand_value->getType(),
+              operand_value->getType()));
       function->setCallingConv(llvm::CallingConv::C);
       function->setDoesNotThrow();
       function->setDoesNotAccessMemory();
@@ -64,6 +60,31 @@ StatusOr<llvm::Value*> CpuElementalIrEmitter::EmitFloatUnaryOp(
   }
 }
 
+StatusOr<llvm::Value*> CpuElementalIrEmitter::EmitAtan2(
+    PrimitiveType prim_type, llvm::Value* lhs, llvm::Value* rhs) const {
+  string function_name;
+  switch (prim_type) {
+    case F32:
+      function_name = "atan2f";
+      break;
+    case F64:
+      function_name = "atan2";
+      break;
+    default:
+      return Unimplemented("atan2");
+  }
+  // Create function declaration for 'atan2'.
+  llvm::Function* function =
+      llvm::cast<llvm::Function>(module_->getOrInsertFunction(
+          llvm_ir::AsStringRef(function_name), lhs->getType(), lhs->getType(),
+          rhs->getType()));
+  function->setCallingConv(llvm::CallingConv::C);
+  function->setDoesNotThrow();
+  function->setDoesNotAccessMemory();
+  // Create instruction to call 'atan2'.
+  return ir_builder_->CreateCall(function, {lhs, rhs});
+}
+
 llvm_ir::ElementGenerator CpuElementalIrEmitter::MakeElementGenerator(
     const HloInstruction* hlo,
     const HloToElementGeneratorMap& operand_to_generator) const {
diff --git a/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.h b/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.h
index 7e9f27befb456c17581f556868712f92fd8fd083..4446dfd2821fb4b6e75f33694367392ecbcdd8bf 100644
--- a/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.h
+++ b/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.h
@@ -41,6 +41,8 @@ class CpuElementalIrEmitter : public ElementalIrEmitter {
  protected:
   StatusOr<llvm::Value*> EmitFloatUnaryOp(
       const HloInstruction* op, llvm::Value* operand_value) const override;
+  StatusOr<llvm::Value*> EmitAtan2(PrimitiveType prim_type, llvm::Value* lhs,
+                                   llvm::Value* rhs) const override;
 
   IrEmitter* ir_emitter_;
 };
diff --git a/tensorflow/compiler/xla/service/cpu/external_constant_pool.cc b/tensorflow/compiler/xla/service/cpu/external_constant_pool.cc
index c9f8e5584965d0c73771750e26bd63c401d5b0c0..7dcc4ca7fa08b478f24065275ffa69725dc51682 100644
--- a/tensorflow/compiler/xla/service/cpu/external_constant_pool.cc
+++ b/tensorflow/compiler/xla/service/cpu/external_constant_pool.cc
@@ -33,15 +33,12 @@ void ExternalConstantPool::Insert(string name, const Literal& literal,
   CHECK(entries_.find(name) == entries_.end());
 
   int64 literal_size = ShapeUtil::ByteSizeOf(literal.shape());
-  void* raw_pointer;
-  CHECK_EQ(
-      posix_memalign(&raw_pointer, std::max<size_t>(alignment, sizeof(void*)),
-                     literal_size),
-      0)
-      << "failed to allocate " << literal_size << " bytes with alignment of "
-      << alignment;
-
-  std::memcpy(raw_pointer, literal.InternalData(), literal_size);
+  void* raw_pointer = tensorflow::port::AlignedMalloc(
+      literal_size, std::max<size_t>(alignment, sizeof(void*)));
+  CHECK(raw_pointer != nullptr) << "failed to allocate " << literal_size
+                                << " bytes with alignment of " << alignment;
+
+  std::memcpy(raw_pointer, literal.untyped_data(), literal_size);
   entries_.emplace(std::move(name), static_cast<uint8*>(raw_pointer));
 }
 
diff --git a/tensorflow/compiler/xla/service/cpu/external_constant_pool.h b/tensorflow/compiler/xla/service/cpu/external_constant_pool.h
index ade28cbcbcfda05a9ad0adab1139bf316720e11f..9c00d476b1fca6c3174af4ebb62dbbde324fd0ea 100644
--- a/tensorflow/compiler/xla/service/cpu/external_constant_pool.h
+++ b/tensorflow/compiler/xla/service/cpu/external_constant_pool.h
@@ -20,6 +20,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/literal_util.h"
 #include "tensorflow/core/lib/gtl/flatmap.h"
+#include "tensorflow/core/platform/mem.h"
 
 namespace xla {
 namespace cpu {
@@ -49,10 +50,10 @@ class ExternalConstantPool {
   const uint8* Find(const string& name);
 
  private:
-  // We need to `free()` pointers allocated into `entries_` since we allocate
-  // them with `posix_memalign`.
+  // We need to `AlignedFree` pointers allocated into `entries_` since we
+  // allocate them with `AlignedMalloc`.
   struct FreeDeleter {
-    void operator()(void* ptr) { free(ptr); }
+    void operator()(void* ptr) { tensorflow::port::AlignedFree(ptr); }
   };
 
   tensorflow::gtl::FlatMap<string, std::unique_ptr<uint8, FreeDeleter>>
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc
index 3993779da636e519f8d8fded468c3271d27ee093..788217aab6172b4e548452b3f6ffd4197c163ce4 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc
@@ -44,6 +44,9 @@ bool PotentiallyImplementedAsEigenConvolution(
       ShapeUtil::ElementIsComplex(kernel_shape)) {
     return false;
   }
+  if (window_util::HasWindowReversal(convolution.window())) {
+    return false;
+  }
 
   const ConvolutionDimensionNumbers& dnums =
       convolution.convolution_dimension_numbers();
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.h b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.h
index ac361ddfb4c8d253ffb1c99200939f6324cad2bb..34b2003916933f5ec0a15d9e219063c0a912fa40 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.h
+++ b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_IR_EMISSION_UTILS_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_IR_EMISSION_UTILS_H_
 
+#include "llvm/IR/Value.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 
 namespace xla {
@@ -23,6 +24,19 @@ namespace cpu {
 
 bool PotentiallyImplementedAsEigenConvolution(
     const HloInstruction& convolution);
+
+// Dynamic loop bounds are specified as an array of dimension index
+// [start, limit) pairs of ir values (one for each partitioned outer dimension).
+//
+// EX: Let 'shape' = [8, 16, 32], with the loop bounds of the two-most major
+//     dimensions dynamic. Then 'dynamic_loop_bounds' will contain the
+//     following ir values for the two most-major dimensions:
+//       [dim0_index_start_ir_value, dim0_index_limit_ir_value]
+//       [dim1_index_start_ir_value, dim1_index_limit_ir_value]
+//
+// See IrFunction and ParallelLoopEmitter for details.
+using DynamicLoopBounds = std::vector<std::pair<llvm::Value*, llvm::Value*>>;
+
 }  // namespace cpu
 }  // namespace xla
 
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index 502dd2e7387d701e69e1c7ecb67fbdac26c6b5de..cfdf9f4ebc5a5ae2b0188c86edcdc70e3a596971 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "tensorflow/core/lib/math/math_util.h"
 #include "tensorflow/core/platform/logging.h"
 // IWYU pragma: no_include "llvm/IR/Intrinsics.gen.inc"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -42,6 +43,8 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/cpu/dot_op_emitter.h"
 #include "tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.h"
 #include "tensorflow/compiler/xla/service/cpu/ir_emission_utils.h"
+#include "tensorflow/compiler/xla/service/cpu/ir_function.h"
+#include "tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.h"
 #include "tensorflow/compiler/xla/service/cpu/shape_partition.h"
 #include "tensorflow/compiler/xla/service/cpu/simple_orc_jit.h"
 #include "tensorflow/compiler/xla/service/elemental_ir_emitter.h"
@@ -76,16 +79,16 @@ namespace cpu {
 IrEmitter::IrEmitter(
     const HloModule& hlo_module, const BufferAssignment& assignment,
     llvm::Module* llvm_module,
-    std::unordered_map<const HloInstruction*, size_t> hlo_to_profile_idx,
-    tensorflow::gtl::optional<size_t> entry_computation_profile_idx,
+    std::unordered_map<const HloInstruction*, int64> instruction_to_profile_idx,
+    std::unordered_map<const HloComputation*, int64> computation_to_profile_idx,
     llvm::TargetMachine* target_machine,
     ExternalConstantPool* external_constant_pool)
     : assignment_(assignment),
       module_(llvm_module),
       arch_type_(llvm::Triple(llvm_module->getTargetTriple()).getArch()),
       ir_builder_(llvm_module->getContext()),
-      hlo_to_profile_idx_(std::move(hlo_to_profile_idx)),
-      entry_computation_profile_idx_(std::move(entry_computation_profile_idx)),
+      instruction_to_profile_idx_(std::move(instruction_to_profile_idx)),
+      computation_to_profile_idx_(std::move(computation_to_profile_idx)),
       alias_analysis_(hlo_module, assignment, &llvm_module->getContext()),
       hlo_module_config_(hlo_module.config()),
       parallel_cpu_backend_(
@@ -117,138 +120,33 @@ StatusOr<llvm::Function*> IrEmitter::EmitComputation(
   // readcyclecounter if it is unavailable.
   bool use_rdtscp = arch_type_ == llvm::Triple::ArchType::x86 ||
                     arch_type_ == llvm::Triple::ArchType::x86_64;
-  profiling_state_ = ProfilingState(is_top_level_computation_, use_rdtscp,
-                                    GetProfileCountersArgument());
+  profiling_state_ = ProfilingState(use_rdtscp, GetProfileCountersArgument());
   if (instruction_order == nullptr) {
     TF_RETURN_IF_ERROR(computation->Accept(this));
   } else {
     TF_RETURN_IF_ERROR(computation->AcceptOrdered(this, *instruction_order));
   }
-  InsertOrDie(&emitted_functions_, computation, compute_function_);
-
-  return compute_function_;
-}
-
-static llvm::Argument* GetArg(llvm::Function* f, int idx) {
-  llvm::Function::arg_iterator arg_iter = f->arg_begin();
-  std::advance(arg_iter, idx);
-  return &*arg_iter;
+  llvm::Function* ir_function = compute_function_->function();
+  InsertOrDie(&emitted_functions_, computation, ir_function);
+  // Delete 'compute_function', finalizing 'ir_function' and restoring caller
+  // IR insert point.
+  compute_function_.reset();
+  return ir_function;
 }
 
 void IrEmitter::InitializeIrFunction(const string& function_name) {
-  // The function signature is:
-  //   void function(i8* retval, i8* run_options, i8** params, i8** temps,
-  //                 i64* dynamic_loop_bounds, i64* prof_counters)
-  //
-  // retval: points to the returned value.
-  // params: address of an array with pointers to parameters.
-  // temps: address of an array with pointers to temporary buffers.
-  //
-  // Therefore, the generated function's signature (FunctionType) is statically
-  // determined - parameter unpacking is done in code generated into the
-  // function, rather than by a prologue dictated by the platform ABI.
-  //
-  //                      /--------------\
-  //   retval ----------> | return value |
-  //                      \--------------/
-  //
-  //                      /-------------------------------\
-  //   run_options -----> | xla::ExecutableRunOptions |
-  //                      \-------------------------------/
-  //
-  //                     /---------------------------------------------\
-  //   params -------->  |  param 0  |  param 1  | ..... |  param N-1  |
-  //                     |   addr    |   addr    |       |   addr      |
-  //                     \---------------------------------------------/
-  //                          |           |                   |
-  //                          |           |                   |
-  //                          V           V                   V
-  //                     /---------\  /---------\         /-----------\
-  //                     | param 0 |  | param 1 |         | param N-1 |
-  //                     \---------/  \---------/         \-----------/
-  //
-  //                     /---------------------------------------------\
-  //   temps --------->  |  temp  0  |  temp  1  | ..... |  temp  N-1  |
-  //                     |   addr    |   addr    |       |   addr      |
-  //                     \---------------------------------------------/
-  //                          |           |                   |
-  //                          |           |                   |
-  //                          V           V                   V
-  //                     /---------\  /---------\         /-----------\
-  //                     | temp  0 |  | temp  1 |         | temp  N-1 |
-  //                     \---------/  \---------/         \-----------/
-  //
-  //                        /--------------------------------------------\
-  // dynamic loop bounds -> | outer_dim0_start | outer_dim0_limit | .....|
-  //  (elided for aot)      \--------------------------------------------/
-  //
-  //                     /---------------------------------------------\
-  //   prof counters ->  | counter 0 | counter 1 | ..... | counter N-1 |
-  //  (elided for aot)   \---------------------------------------------/
-
-  // Even though the type of params and temps is void** in the host's view, in
-  // LLVM IR this is represented by i8*, similarly to void*. It's up to the code
-  // to use GEPs to unravel the indirection layers.
-  llvm::FunctionType* compute_function_type = llvm::FunctionType::get(
-      /*Result=*/llvm::Type::getVoidTy(module_->getContext()),
-      /*Params=*/GetComputeFunctionParams(),
-      /*isVarArg=*/false);
-
   // Functions with local linkage get an inlining bonus.  Because we know
   // a-priori that embedded functions (non-entry functions) will not have its
   // name resolved, give it local linkage.
   llvm::Function::LinkageTypes linkage =
       is_top_level_computation_ ? llvm::GlobalValue::ExternalLinkage
                                 : llvm::GlobalValue::InternalLinkage;
-  compute_function_ =
-      llvm::Function::Create(/*Ty=*/compute_function_type,
-                             /*Linkage=*/linkage,
-                             /*Name=*/AsStringRef(function_name),
-                             /*Module=*/module_);
-  compute_function_->setCallingConv(llvm::CallingConv::C);
-
-  // Set meaningful names for the function's arguments: useful for debugging.
-  llvm::Function::arg_iterator arg_iter = compute_function_->arg_begin();
-  arg_iter->setName("retval");
-  (++arg_iter)->setName("run_options");
-  (++arg_iter)->setName("params");
-  (++arg_iter)->setName("temps");
-  if (num_dynamic_loop_bounds_ > 0) {
-    (++arg_iter)->setName("dynamic_loop_bounds");
-  }
-  (++arg_iter)->setName("prof_counters");
-
-  // We know a-priori that the function arguments are guaranteed to point to
-  // disjoint objects.
-  llvm::Argument* retval = GetResultArgument();
-  for (llvm::Argument& argument : compute_function_->args()) {
-    // However, the return buffer aliases the temporaries and thus cannot be
-    // marked noalias.
-    if (&argument == retval) {
-      continue;
-    }
-    compute_function_->addAttribute(argument.getArgNo() + 1,
-                                    llvm::Attribute::NoAlias);
-  }
-
-  // Add the optize attribute to the function if optimizing for size. This
-  // controls internal behavior of some optimization passes (e.g. loop
-  // unrolling).
-  if (options::OptimizeForSizeRequested(hlo_module_config_)) {
-    compute_function_->addFnAttr(llvm::Attribute::OptimizeForSize);
-  }
-
-  if (hlo_module_config_.debug_options().xla_enable_fast_math()) {
-    compute_function_->addFnAttr("unsafe-fp-math", "true");
-    compute_function_->addFnAttr("no-infs-fp-math", "true");
-    compute_function_->addFnAttr("no-nans-fp-math", "true");
-    compute_function_->addFnAttr("no-signed-zeros-fp-math", "true");
-  }
-
-  ir_builder_.SetInsertPoint(llvm::BasicBlock::Create(
-      /*Context=*/module_->getContext(),
-      /*Name=*/"entry",
-      /*Parent=*/compute_function_));
+  // Create and initialize new IrFunction.
+  compute_function_.reset(
+      new IrFunction(function_name, linkage,
+                     options::OptimizeForSizeRequested(hlo_module_config_),
+                     hlo_module_config_.debug_options().xla_enable_fast_math(),
+                     module_, &ir_builder_, num_dynamic_loop_bounds_));
 }
 
 IrEmitter::~IrEmitter() {}
@@ -344,11 +242,12 @@ int IrEmitter::MinimumAlignmentForBufferSize(int64 buffer_size) {
 
 // Calculate the alignment of a buffer allocated for a given primitive type.
 int IrEmitter::MinimumAlignmentForPrimitiveType(PrimitiveType primitive_type) {
-  int64 buffer_size = ShapeUtil::ByteSizeOfPrimitiveType(primitive_type);
-  DCHECK_GE(buffer_size, 0);
-  DCHECK_LE(buffer_size, SIZE_MAX);
-
-  return MinimumAlignmentForBufferSize(buffer_size);
+  int64 byte_size = ShapeUtil::ByteSizeOfPrimitiveType(primitive_type);
+  DCHECK_GE(byte_size, 0);
+  // Largest scalar is a complex64 so we don't need to worry about the
+  // int64->int truncation here.
+  DCHECK_LE(byte_size, 8);
+  return byte_size;
 }
 
 int64 IrEmitter::ByteSizeOf(const Shape& shape) const {
@@ -357,6 +256,10 @@ int64 IrEmitter::ByteSizeOf(const Shape& shape) const {
 
 // Calculate the alignment of a buffer allocated for a given shape.
 int IrEmitter::MinimumAlignmentForShape(const Shape& shape) {
+  if (ShapeUtil::IsScalar(shape)) {
+    return MinimumAlignmentForPrimitiveType(shape.element_type());
+  }
+
   int64 buffer_size = ByteSizeOf(shape);
   DCHECK_GE(buffer_size, 0);
   DCHECK_LE(buffer_size, SIZE_MAX);
@@ -612,7 +515,7 @@ Status IrEmitter::HandleReduceWindow(HloInstruction* reduce_window) {
   HloComputation* function = reduce_window->to_apply();
   TF_RETURN_IF_ERROR(ElementTypesSameAndSupported(
       /*instruction=*/*reduce_window, /*operands=*/{operand},
-      /*supported_types=*/{F32}));
+      /*supported_types=*/{F32, BF16}));
 
   // TODO(b/31410564): Implement dilation for reduce-window.
   if (window_util::HasDilation(window)) {
@@ -898,6 +801,24 @@ Status IrEmitter::HandleDot(HloInstruction* dot) {
   TF_RETURN_IF_ERROR(ElementTypesSameAndSupported(
       /*instruction=*/*dot, /*operands=*/{lhs, rhs},
       /*supported_types=*/{F32, F64, C64}));
+  const DotDimensionNumbers& dnums = dot->dot_dimension_numbers();
+  if (dnums.lhs_batch_dimensions_size() > 0 ||
+      dnums.rhs_batch_dimensions_size() > 0) {
+    return Unimplemented("Dot with batch dimensions not implemented.");
+  }
+
+  if (dnums.lhs_contracting_dimensions_size() != 1) {
+    // This is disallowed by ShapeInference today.
+    return Unimplemented(
+        "Dot with multiple contracting dimensions not implemented.");
+  }
+
+  if (dnums.lhs_contracting_dimensions(0) !=
+          std::min(lhs->shape().dimensions_size() - 1, 1) ||
+      dnums.rhs_contracting_dimensions(0) != 0) {
+    return Unimplemented(
+        "Dot with non-standard contracting dimensions not implemented.");
+  }
 
   llvm_ir::IrArray lhs_array(GetIrArrayFor(lhs));
   llvm_ir::IrArray rhs_array(GetIrArrayFor(rhs));
@@ -916,8 +837,9 @@ Status IrEmitter::HandleDot(HloInstruction* dot) {
   // Dot operation is complicated so we delegate to a helper class.
   return DotOpEmitter::EmitDotOperation(
       *dot, /*transpose_lhs=*/false, /*transpose_rhs=*/false, target_array,
-      lhs_array, rhs_array, GetExecutableRunOptionsArgument(), &ir_builder_,
-      hlo_module_config_);
+      lhs_array, rhs_array, /*addend_array=*/nullptr,
+      GetExecutableRunOptionsArgument(), &ir_builder_, hlo_module_config_,
+      target_machine_features_);
 }
 
 Status IrEmitter::HandleConvolution(HloInstruction* convolution) {
@@ -1189,8 +1111,14 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) {
         llvm_ir::IrArray kernel_array(GetIrArrayFor(rhs));
         llvm_ir::IrArray::Index kernel_index(num_dims);
         for (int i = 0; i < num_spatial_dims; ++i) {
-          kernel_index[dnums.kernel_spatial_dimensions(i)] = kernel_spatial[i];
+          kernel_index[dnums.kernel_spatial_dimensions(i)] =
+              window.dimensions(i).window_reversal()
+                  ? ir_builder_.CreateNSWSub(
+                        ir_builder_.getInt64(window.dimensions(i).size() - 1),
+                        kernel_spatial[i])
+                  : kernel_spatial[i];
         }
+
         kernel_index[dnums.kernel_input_feature_dimension()] = input_feature;
         kernel_index[dnums.kernel_output_feature_dimension()] = output_feature;
 
@@ -1207,10 +1135,67 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) {
       });
 }
 
+Status IrEmitter::HandleFft(HloInstruction* fft) {
+  auto operand = fft->operand(0);
+  TF_RETURN_IF_ERROR(ElementTypesSameAndSupported(
+      /*instruction=*/*fft, /*operands=*/{operand},
+      /*supported_types=*/{F32, C64}));
+  TF_RET_CHECK(LayoutUtil::IsMonotonicWithDim0Major(operand->shape().layout()));
+  TF_RET_CHECK(LayoutUtil::IsMonotonicWithDim0Major(fft->shape().layout()));
+  VLOG(3) << "operand=" << ShapeUtil::HumanStringWithLayout(operand->shape());
+  VLOG(3) << "fft=" << ShapeUtil::HumanStringWithLayout(fft->shape());
+
+  llvm::Value* operand_address = GetEmittedValueFor(operand);
+  TF_RETURN_IF_ERROR(EmitTargetAddressForOp(fft));
+
+  const std::vector<int64>& fft_length = fft->fft_length();
+  int64 input_batch = 1;
+  for (int i = 0; i < fft->shape().dimensions_size() - fft_length.size(); i++) {
+    input_batch *= fft->shape().dimensions(i);
+  }
+
+  // Args have been computed, make the call.
+  llvm::Type* int8_ptr_type = ir_builder_.getInt8Ty()->getPointerTo();
+  llvm::Type* int32_type = ir_builder_.getInt32Ty();
+  llvm::Type* int64_type = ir_builder_.getInt64Ty();
+  llvm::FunctionType* fft_type = llvm::FunctionType::get(
+      ir_builder_.getVoidTy(),
+      {int8_ptr_type, int8_ptr_type, int8_ptr_type, int32_type, int32_type,
+       int64_type, int64_type, int64_type, int64_type},
+      /*isVarArg=*/false);
+  const char* fn_name = runtime::kEigenFftSymbolName;
+  llvm::Function* fft_func = llvm::cast<llvm::Function>(
+      module_->getOrInsertFunction(fn_name, fft_type));
+  fft_func->setCallingConv(llvm::CallingConv::C);
+  fft_func->setDoesNotThrow();
+  fft_func->setOnlyAccessesInaccessibleMemOrArgMem();
+  const int fft_rank = fft_length.size();
+  ir_builder_.CreateCall(
+      fft_func,
+      {GetExecutableRunOptionsArgument(),
+       ir_builder_.CreateBitCast(GetEmittedValueFor(fft), int8_ptr_type),
+       ir_builder_.CreateBitCast(operand_address, int8_ptr_type),
+       ir_builder_.getInt32(fft->fft_type()), ir_builder_.getInt32(fft_rank),
+       ir_builder_.getInt64(input_batch),
+       ir_builder_.getInt64(fft_rank > 0 ? fft_length[0] : 0),
+       ir_builder_.getInt64(fft_rank > 1 ? fft_length[1] : 0),
+       ir_builder_.getInt64(fft_rank > 2 ? fft_length[2] : 0)});
+
+  return Status::OK();
+}
+
 Status IrEmitter::HandleCrossReplicaSum(HloInstruction* crs) {
+  if (hlo_module_config_.replica_count() == 1) {
+    // When there is a single replica, a cross replica sum is the identity
+    // function, and the buffer assignment expects a copy (we could eliminate
+    // these at the HLO level as an optimization).
+    TF_RETURN_IF_ERROR(EmitTargetAddressForOp(crs));
+    return EmitMemcpy(*crs->operand(0), *crs);
+  }
+
   // TODO(b/33011107): Support cross replica sum on CPU.
   return Unimplemented(
-      "Cross replica sum not implemented on CPU. See b/33011107.");
+      "Cross replica sum is not implemented on CPU. See b/33011107.");
 }
 
 // Fills up the free variables in 'index_with_free_var' with values from
@@ -1452,15 +1437,20 @@ Status IrEmitter::HandleParameter(HloInstruction* parameter) {
   //
   // Where Param is the actual element type of the underlying buffer (for
   // example, float for an XLA F32 element type).
-  llvm::Argument* params = GetArg(compute_function_, 2);
+  llvm::Value* params = compute_function_->parameters_arg();
   llvm::Value* param_address_offset =
       llvm_ir::EmitBufferIndexingGEP(params, param_number, &ir_builder_);
   llvm::LoadInst* param_address_untyped =
       ir_builder_.CreateLoad(param_address_offset);
   param_address_untyped->setName(AsStringRef(IrName(parameter, "untyped")));
-  if (hlo_module_config_.debug_options()
+  if (is_top_level_computation_ &&
+      hlo_module_config_.debug_options()
           .xla_llvm_enable_invariant_load_metadata()) {
-    // We never reassign parameters, so this load is invariant.
+    // In the entry computation the parameter slots in the %params argument are
+    // invariant through program execution.  In computations that are called
+    // from the entry computation (via kWhile, kCall and kConditional) the
+    // parameter slots are *not* invariant since they're written to by their
+    // callers.
     param_address_untyped->setMetadata(
         llvm::LLVMContext::MD_invariant_load,
         llvm::MDNode::get(param_address_untyped->getContext(), /*MDs=*/{}));
@@ -1587,13 +1577,9 @@ IrEmitter::ReductionGenerator IrEmitter::MatchReductionGenerator(
 
 IrEmitter::ShardedVectorType IrEmitter::CreateShardedVectorType(
     PrimitiveType element_type, unsigned element_count) {
-  // Here we assume that the largest register is a vector register.
-  int max_vector_register_size_in_bytes =
-      target_machine_features_.largest_register_size_in_bytes(
-          compute_function_);
-
   int vector_register_size_in_elements =
-      max_vector_register_size_in_bytes /
+      target_machine_features_.vector_register_byte_size(
+          *compute_function_->function()) /
       ShapeUtil::ByteSizeOfPrimitiveType(element_type);
 
   ShardedVectorType sharded_vector_type;
@@ -1748,19 +1734,6 @@ void IrEmitter::EmitShardedVectorStore(
   }
 }
 
-namespace {
-// TODO(sanjoy): This is duplicated in tensorflow/core/lib/core/arena.cc.
-// Extract out a common implementation to tensorflow/core/lib/math/math_util.h
-uint32 GCD(uint32 x, uint32 y) {
-  while (y != 0) {
-    uint32 r = x % y;
-    x = y;
-    y = r;
-  }
-  return x;
-}
-}  // namespace
-
 StatusOr<bool> IrEmitter::EmitVectorizedReduce(
     HloInstruction* reduce, HloInstruction* arg, HloInstruction* init_value,
     tensorflow::gtl::ArraySlice<int64> dimensions, HloComputation* function,
@@ -1781,11 +1754,12 @@ StatusOr<bool> IrEmitter::EmitVectorizedReduce(
 
   bool is_reduction_over_minor_dimension =
       std::find(dimensions.begin(), dimensions.end(),
-                arg->shape().layout().minor_to_major(0)) != dimensions.end();
+                LayoutUtil::Minor(arg->shape().layout(), 0)) !=
+      dimensions.end();
 
-  unsigned element_alignment =
-      GCD(ShapeUtil::ByteSizeOfPrimitiveType(reduce->shape().element_type()),
-          MinimumAlignmentForPrimitiveType(reduce->shape().element_type()));
+  unsigned element_alignment = tensorflow::MathUtil::GCD<unsigned>(
+      ShapeUtil::ByteSizeOfPrimitiveType(reduce->shape().element_type()),
+      MinimumAlignmentForPrimitiveType(reduce->shape().element_type()));
 
   if (is_reduction_over_minor_dimension) {
     // TODO(sanjoy): Implement vectorized reduction over the minor dimension.
@@ -1818,8 +1792,9 @@ StatusOr<bool> IrEmitter::EmitVectorizedReduce(
 
   llvm_ir::ForLoopNest loop_nest(IrName(reduce), &ir_builder_);
   llvm_ir::IrArray::Index array_index(reduce->shape().dimensions_size());
-  for (int i = reduce->shape().layout().minor_to_major_size() - 1; i > 0; --i) {
-    int64 dimension = reduce->shape().layout().minor_to_major(i);
+  for (int i = LayoutUtil::MinorToMajor(reduce->shape()).size() - 1; i > 0;
+       --i) {
+    int64 dimension = LayoutUtil::Minor(reduce->shape().layout(), i);
     int64 start_index = 0;
     int64 end_index = reduce->shape().dimensions(dimension);
     std::unique_ptr<llvm_ir::ForLoop> loop =
@@ -1828,7 +1803,7 @@ StatusOr<bool> IrEmitter::EmitVectorizedReduce(
     array_index[dimension] = loop->GetIndVarValue();
   }
 
-  int64 innermost_dimension = reduce->shape().layout().minor_to_major(0);
+  int64 innermost_dimension = LayoutUtil::Minor(reduce->shape().layout(), 0);
   int64 innermost_dimension_size =
       reduce->shape().dimensions(innermost_dimension);
 
@@ -1864,10 +1839,10 @@ StatusOr<bool> IrEmitter::EmitVectorizedReduce(
                            target_array);
 
     if (auto exit_terminator = loop->GetExitBasicBlock()->getTerminator()) {
-      CHECK_GT(reduce->shape().layout().minor_to_major_size(), 1);
+      CHECK_GT(LayoutUtil::MinorToMajor(reduce->shape()).size(), 1);
       ir_builder_.SetInsertPoint(exit_terminator);
     } else {
-      CHECK_EQ(reduce->shape().layout().minor_to_major_size(), 1);
+      CHECK_EQ(LayoutUtil::MinorToMajor(reduce->shape()).size(), 1);
       ir_builder_.SetInsertPoint(loop->GetExitBasicBlock());
     }
   }
@@ -1995,7 +1970,7 @@ Status IrEmitter::HandleSlice(HloInstruction* slice) {
   VLOG(2) << "HandleSlice: " << slice->ToString();
   auto operand = slice->operand(0);
   // The code below emits a sequential loop nest. For the parallel backend, use
-  // EmitParallelTargetElementLoop() which respects dynamic loop bounds.
+  // ParallelLoopEmitter which respects dynamic loop bounds.
   if (ShouldEmitParallelLoopFor(*slice)) {
     return DefaultAction(slice);
   }
@@ -2027,7 +2002,7 @@ Status IrEmitter::HandleSlice(HloInstruction* slice) {
   // * Implement the memcpy within the innermost loop.
 
   tensorflow::gtl::FlatSet<int64> inner_dims;
-  for (int64 dim : layout.minor_to_major()) {
+  for (int64 dim : LayoutUtil::MinorToMajor(layout)) {
     if (operand->shape().dimensions(dim) != slice->shape().dimensions(dim)) {
       break;
     }
@@ -2054,7 +2029,7 @@ Status IrEmitter::HandleSlice(HloInstruction* slice) {
 
   // memcpy_dim is the innermost (in terms of layout) dimension for which the
   // slice does *not* just copy all the elements along the dimension.
-  const int64 memcpy_dim = layout.minor_to_major(inner_dims.size());
+  const int64 memcpy_dim = LayoutUtil::Minor(layout, inner_dims.size());
 
   const bool memcpy_is_contiguous = slice->slice_strides(memcpy_dim) == 1;
   // The number of logical elements that can be copied in a single call
@@ -2263,8 +2238,8 @@ Status IrEmitter::HandleFusion(HloInstruction* fusion) {
     TF_RETURN_IF_ERROR(DotOpEmitter::EmitDotOperation(
         *root, root->operand(0)->IsRank2Transpose(),
         root->operand(1)->IsRank2Transpose(), target_array, lhs_array,
-        rhs_array, GetExecutableRunOptionsArgument(), &ir_builder_,
-        hlo_module_config_));
+        rhs_array, /*addend_array=*/nullptr, GetExecutableRunOptionsArgument(),
+        &ir_builder_, hlo_module_config_, target_machine_features_));
     return Status::OK();
   } else if (llvm_ir::CanEmitFusedDynamicUpdateSliceInPlace(fusion,
                                                             assignment_)) {
@@ -2285,6 +2260,35 @@ Status IrEmitter::HandleFusion(HloInstruction* fusion) {
     TF_RETURN_IF_ERROR(fusion->fused_expression_root()->Accept(&fused_emitter));
 
     return EmitTargetElementLoop(fusion, fused_emitter.GetRootGenerator());
+  } else if (fusion->fusion_kind() == HloInstruction::FusionKind::kOutput) {
+    VLOG(3) << "HandleFusion kOutput";
+    int64 dot_op_index = root->operand(0)->opcode() == HloOpcode::kDot ? 0 : 1;
+    const HloInstruction* dot = root->operand(dot_op_index);
+    CHECK_EQ(dot->opcode(), HloOpcode::kDot)
+        << dot->ToString() << "  "
+        << fusion->fused_instructions_computation()->ToString();
+
+    int64 dot_lhs_param_number = dot->operand(0)->parameter_number();
+    int64 dot_rhs_param_number = dot->operand(1)->parameter_number();
+    int64 addend_param_number =
+        root->operand(1 - dot_op_index)->parameter_number();
+
+    Shape target_shape = fusion->shape();
+    TF_RETURN_IF_ERROR(EmitTargetAddressForOp(fusion));
+    llvm_ir::IrArray target_array = GetIrArrayFor(fusion);
+
+    llvm_ir::IrArray lhs_array(
+        GetIrArrayFor(fusion->operand(dot_lhs_param_number)));
+    llvm_ir::IrArray rhs_array(
+        GetIrArrayFor(fusion->operand(dot_rhs_param_number)));
+    llvm_ir::IrArray addend_array(
+        GetIrArrayFor(fusion->operand(addend_param_number)));
+
+    TF_RETURN_IF_ERROR(DotOpEmitter::EmitDotOperation(
+        *dot, /*transpose_lhs=*/false, /*transpose_rhs=*/false, target_array,
+        lhs_array, rhs_array, &addend_array, GetExecutableRunOptionsArgument(),
+        &ir_builder_, hlo_module_config_, target_machine_features_));
+    return Status::OK();
   } else {
     return Unimplemented("Fusion kind not implemented on CPU");
   }
@@ -2305,9 +2309,17 @@ Status IrEmitter::HandleCall(HloInstruction* call) {
       !parallel_cpu_backend_) {
     // ParallelTaskAssignment assigned partitions, emit call to
     // ParallelForkJoin.
-    TF_RETURN_IF_ERROR(EmitParallelForkJoin(parameter_addresses,
-                                            emitted_value_[call], computation,
-                                            call_ir_function));
+    std::vector<llvm::Value*> call_args = GetArrayFunctionCallArguments(
+        parameter_addresses, &ir_builder_, computation->name(),
+        /*return_value_buffer=*/emitted_value_[call],
+        /*exec_run_options_arg=*/GetExecutableRunOptionsArgument(),
+        /*temp_buffers_arg=*/GetTempBuffersArgument(),
+        /*profile_counters_arg=*/GetProfileCountersArgument());
+
+    HloInstruction* root = computation->root_instruction();
+    TF_RETURN_IF_ERROR(EmitCallToParallelForkJoin(
+        call_args, root->shape(), root->outer_dimension_partitions(),
+        &ir_builder_, call_ir_function, computation->name()));
   } else {
     EmitArrayFunctionCallInto(call_ir_function, parameter_addresses,
                               emitted_value_[call], computation->name());
@@ -2410,7 +2422,7 @@ Status IrEmitter::HandleWhile(HloInstruction* xla_while) {
   // Terminates the current block with a branch to a while header.
   llvm::BasicBlock* header_bb = llvm::BasicBlock::Create(
       module_->getContext(), AsStringRef(IrName(xla_while, "header")),
-      compute_function_);
+      compute_function_->function());
   ir_builder_.CreateBr(header_bb);
   ir_builder_.SetInsertPoint(header_bb);
 
@@ -2427,7 +2439,7 @@ Status IrEmitter::HandleWhile(HloInstruction* xla_while) {
   // Branches to the body or to the while exit depending on the condition.
   llvm::BasicBlock* body_bb = llvm::BasicBlock::Create(
       module_->getContext(), AsStringRef(IrName(xla_while, "body")),
-      compute_function_);
+      compute_function_->function());
   llvm::BasicBlock* exit_bb = llvm::BasicBlock::Create(
       module_->getContext(), AsStringRef(IrName(xla_while, "exit")));
   ir_builder_.CreateCondBr(while_predicate, body_bb, exit_bb);
@@ -2442,7 +2454,7 @@ Status IrEmitter::HandleWhile(HloInstruction* xla_while) {
   ir_builder_.CreateBr(header_bb);
 
   // Adds the exit block to the function and sets the insert point there.
-  compute_function_->getBasicBlockList().push_back(exit_bb);
+  compute_function_->function()->getBasicBlockList().push_back(exit_bb);
   ir_builder_.SetInsertPoint(exit_bb);
 
   return Status::OK();
@@ -2478,14 +2490,13 @@ StatusOr<bool> IrEmitter::EmitFastConcatenate(
 
   int64 concat_dim = concatenate->dimensions(0);
   const Layout& output_layout = output_shape.layout();
+  auto output_min2maj = LayoutUtil::MinorToMajor(output_layout);
   auto concat_dim_layout_itr =
-      std::find(output_layout.minor_to_major().begin(),
-                output_layout.minor_to_major().end(), concat_dim);
+      std::find(output_min2maj.begin(), output_min2maj.end(), concat_dim);
 
-  std::vector<int64> inner_dims(output_layout.minor_to_major().begin(),
-                                concat_dim_layout_itr);
+  std::vector<int64> inner_dims(output_min2maj.begin(), concat_dim_layout_itr);
   std::vector<int64> outer_dims(std::next(concat_dim_layout_itr),
-                                output_layout.minor_to_major().end());
+                                output_min2maj.end());
 
   llvm::Type* i8_ptr_type = ir_builder_.getInt8PtrTy();
   llvm::Type* i8_type = ir_builder_.getInt8Ty();
@@ -2560,7 +2571,7 @@ void IrEmitter::EmitTransferElements(llvm::Value* target, llvm::Value* source,
                                      const llvm_ir::IrArray& source_array) {
   unsigned primitive_type_size =
       ShapeUtil::ByteSizeOfPrimitiveType(primitive_type);
-  unsigned element_alignment = GCD(
+  unsigned element_alignment = tensorflow::MathUtil::GCD<unsigned>(
       primitive_type_size, MinimumAlignmentForPrimitiveType(primitive_type));
   llvm::Type* primitive_ptr_type = llvm::PointerType::getUnqual(
       llvm_ir::PrimitiveTypeToIrType(primitive_type, module_));
@@ -2607,6 +2618,65 @@ Status IrEmitter::HandleConcatenate(HloInstruction* concatenate) {
   return DefaultAction(concatenate);
 }
 
+Status IrEmitter::HandleConditional(HloInstruction* conditional) {
+  auto pred = conditional->operand(0);
+  auto true_arg = conditional->operand(1);
+  auto false_arg = conditional->operand(2);
+  TF_RET_CHECK(ShapeUtil::IsScalar(pred->shape()) &&
+               pred->shape().element_type() == PRED)
+      << "Predicate on a Conditional must be bool; got: "
+      << ShapeUtil::HumanString(pred->shape());
+
+  HloComputation* true_computation = conditional->true_computation();
+  HloComputation* false_computation = conditional->false_computation();
+  TF_RET_CHECK(ShapeUtil::Equal(conditional->shape(),
+                                true_computation->root_instruction()->shape()))
+      << "Shape of conditional should be same as the shape of the true "
+      << "computation; got: " << ShapeUtil::HumanString(conditional->shape())
+      << " and "
+      << ShapeUtil::HumanString(true_computation->root_instruction()->shape());
+
+  TF_RET_CHECK(ShapeUtil::Equal(conditional->shape(),
+                                false_computation->root_instruction()->shape()))
+      << "Shape of conditional should be same as the shape of the false "
+      << "computation; got: " << ShapeUtil::HumanString(conditional->shape())
+      << " and "
+      << ShapeUtil::HumanString(false_computation->root_instruction()->shape());
+
+  llvm::Function* true_function =
+      FindOrDie(emitted_functions_, true_computation);
+  llvm::Function* false_function =
+      FindOrDie(emitted_functions_, false_computation);
+
+  TF_RETURN_IF_ERROR(EmitTargetAddressForOp(conditional));
+  llvm::Value* conditional_result = GetEmittedValueFor(conditional);
+
+  // Generating:
+  //   if (pred)
+  //     cond_result = true_computation(true_operand)
+  //   else
+  //     cond_result = false_computation(false_operand)
+  llvm::LoadInst* pred_value = ir_builder_.CreateLoad(
+      GetIrArrayFor(pred).GetBasePointer(), "load_predicate_value");
+  llvm::Value* pred_cond = ir_builder_.CreateICmpNE(
+      pred_value,
+      llvm::ConstantInt::get(llvm_ir::PrimitiveTypeToIrType(PRED, module_), 0),
+      "boolean_predicate");
+  llvm_ir::LlvmIfData if_data =
+      llvm_ir::EmitIfThenElse(pred_cond, "conditional", &ir_builder_);
+
+  SetToFirstInsertPoint(if_data.true_block, &ir_builder_);
+  EmitArrayFunctionCallInto(true_function, {GetEmittedValueFor(true_arg)},
+                            conditional_result, IrName(conditional, "_true"));
+
+  SetToFirstInsertPoint(if_data.false_block, &ir_builder_);
+  EmitArrayFunctionCallInto(false_function, {GetEmittedValueFor(false_arg)},
+                            conditional_result, IrName(conditional, "_false"));
+
+  SetToFirstInsertPoint(if_data.after_block, &ir_builder_);
+  return Status::OK();
+}
+
 Status IrEmitter::FinishVisit(HloInstruction* root) {
   // When this method is called, we should have already emitted an IR value for
   // the root (return) op. The IR value holds the address of the buffer holding
@@ -2618,57 +2688,51 @@ Status IrEmitter::FinishVisit(HloInstruction* root) {
   llvm::Value* root_value = GetEmittedValueFor(root);
   VLOG(2) << "  value: " << llvm_ir::DumpToString(*root_value);
 
-  llvm::Value* prof_counter = [&]() {
-    // For the parallel cpu backend, we record the total for each embedded
-    // computation callee with its caller kCall HLO.
-    if (parallel_cpu_backend_ && is_top_level_computation_) {
-      auto* computation = root->parent();
-      auto* entry_computation = computation->parent()->entry_computation();
-      if (computation != entry_computation) {
-        for (HloInstruction* instruction : entry_computation->instructions()) {
-          if (instruction->opcode() == HloOpcode::kCall &&
-              instruction->to_apply()->root_instruction() == root) {
-            return GetProfileCounterFor(*instruction);
-          }
+  auto record_complete_computation = [&](llvm::Value* prof_counter) {
+    if (prof_counter) {
+      profiling_state_.RecordCompleteComputation(&ir_builder_, prof_counter);
+    }
+  };
+
+  // For the parallel cpu backend, we record the total for each embedded
+  // computation callee with its caller kCall HLO.
+  if (parallel_cpu_backend_ && is_top_level_computation_) {
+    auto* computation = root->parent();
+    auto* entry_computation = computation->parent()->entry_computation();
+    if (computation != entry_computation) {
+      for (HloInstruction* instruction : entry_computation->instructions()) {
+        if (instruction->opcode() == HloOpcode::kCall &&
+            instruction->to_apply()->root_instruction() == root) {
+          record_complete_computation(GetProfileCounterFor(*instruction));
+          return Status::OK();
         }
       }
     }
-
-    // Otherwise we record the total computation cycles in a dedicated slot for
-    // the entry computation.
-    return GetProfileCounterForEntryComputation();
-  }();
-
-  if (prof_counter) {
-    profiling_state_.RecordCompleteComputation(&ir_builder_, prof_counter);
   }
-  ir_builder_.CreateRetVoid();
+
+  // For the entry computation this increment is cumulative of embedded
+  // computations since it includes cycles spent in computations invoked by
+  // While, Call etc.
+  record_complete_computation(GetProfileCounterFor(*root->parent()));
   return Status::OK();
 }
 
-llvm::Value* IrEmitter::GetProfileCounterFor(const HloInstruction& hlo) {
-  auto it = hlo_to_profile_idx_.find(&hlo);
-  if (it == hlo_to_profile_idx_.end()) {
+template <typename T>
+llvm::Value* IrEmitter::GetProfileCounterCommon(
+    const T& hlo,
+    const std::unordered_map<const T*, int64>& profile_index_map) {
+  auto it = profile_index_map.find(&hlo);
+  if (it == profile_index_map.end()) {
     return nullptr;
   }
 
-  size_t prof_counter_idx = it->second;
+  int64 prof_counter_idx = it->second;
   string counter_name = IrName("prof_counter", hlo.name());
   return ir_builder_.CreateGEP(GetProfileCountersArgument(),
                                ir_builder_.getInt64(prof_counter_idx),
                                AsStringRef(counter_name));
 }
 
-llvm::Value* IrEmitter::GetProfileCounterForEntryComputation() {
-  if (entry_computation_profile_idx_) {
-    return ir_builder_.CreateGEP(
-        GetProfileCountersArgument(),
-        ir_builder_.getInt64(*entry_computation_profile_idx_),
-        "prof_counter.computation");
-  }
-  return nullptr;
-}
-
 void IrEmitter::ProfilingState::UpdateProfileCounter(
     llvm::IRBuilder<>* ir_builder, llvm::Value* prof_counter,
     llvm::Value* cycle_end, llvm::Value* cycle_start) {
@@ -2731,8 +2795,7 @@ void IrEmitter::ProfilingState::RecordCycleDelta(llvm::IRBuilder<>* ir_builder,
 
 void IrEmitter::ProfilingState::RecordCompleteComputation(
     llvm::IRBuilder<>* ir_builder, llvm::Value* prof_counter) {
-  if (is_top_level_computation_ && last_read_cycle_end_ &&
-      first_read_cycle_start_) {
+  if (last_read_cycle_end_ && first_read_cycle_start_) {
     UpdateProfileCounter(ir_builder, prof_counter, last_read_cycle_end_,
                          first_read_cycle_start_);
   }
@@ -2740,7 +2803,7 @@ void IrEmitter::ProfilingState::RecordCompleteComputation(
 
 Status IrEmitter::Preprocess(HloInstruction* hlo) {
   VLOG(3) << "Visiting: " << hlo->ToString();
-  if (hlo_to_profile_idx_.count(hlo)) {
+  if (instruction_to_profile_idx_.count(hlo)) {
     profiling_state_.RecordCycleStart(&ir_builder_, hlo);
   }
   return Status::OK();
@@ -2783,43 +2846,16 @@ llvm::Type* IrEmitter::IrShapeType(const Shape& shape) {
   return llvm_ir::ShapeToIrType(shape, module_);
 }
 
-std::vector<llvm::Type*> IrEmitter::GetComputeFunctionParams() {
-  llvm::Type* i8_ptr_type = llvm::Type::getInt8PtrTy(module_->getContext());
-  llvm::Type* i8_ptr_ptr_type = i8_ptr_type->getPointerTo();
-  llvm::Type* i64_ptr_type = llvm::Type::getInt64PtrTy(module_->getContext());
-  std::vector<llvm::Type*> compute_function_params(
-      {i8_ptr_type, i8_ptr_type, i8_ptr_ptr_type, i8_ptr_ptr_type});
-  if (num_dynamic_loop_bounds_ > 0) {
-    compute_function_params.push_back(i64_ptr_type);
-  }
-  compute_function_params.push_back(i64_ptr_type);
-  return compute_function_params;
-}
-
-llvm::Argument* IrEmitter::GetResultArgument() {
-  return GetArg(compute_function_, 0);
-}
-
-llvm::Argument* IrEmitter::GetProfileCountersArgument() {
-  const int64 arg_index = num_dynamic_loop_bounds_ > 0 ? 5 : 4;
-  return GetArg(compute_function_, arg_index);
+llvm::Value* IrEmitter::GetProfileCountersArgument() {
+  return compute_function_->profile_counters_arg();
 }
 
 llvm::Value* IrEmitter::GetTempBuffersArgument() {
-  return GetArg(compute_function_, 3);
-}
-
-llvm::Value* IrEmitter::GetDynamicLoopBound(const int64 offset) {
-  CHECK_GT(num_dynamic_loop_bounds_, 0);
-  CHECK_LT(offset, num_dynamic_loop_bounds_ * 2);
-  llvm::Argument* loop_bounds_arg = GetArg(compute_function_, 4);
-  string name = tensorflow::strings::StrCat("dynamic_loop_bound_", offset);
-  return ir_builder_.CreateLoad(ir_builder_.CreateGEP(
-      loop_bounds_arg, ir_builder_.getInt64(offset), AsStringRef(name)));
+  return compute_function_->temp_buffers_arg();
 }
 
 llvm::Value* IrEmitter::GetExecutableRunOptionsArgument() {
-  return GetArg(compute_function_, 1);
+  return compute_function_->exec_run_options_arg();
 }
 
 llvm::Value* IrEmitter::EmitTempBufferPointer(
@@ -2850,10 +2886,14 @@ llvm::Value* IrEmitter::EmitTempBufferPointer(
       GetTempBuffersArgument(), slice.index(), &ir_builder_);
   llvm::LoadInst* tempbuf_address_base =
       ir_builder_.CreateLoad(tempbuf_address_ptr);
-  if (hlo_module_config_.debug_options()
+  if (is_top_level_computation_ &&
+      hlo_module_config_.debug_options()
           .xla_llvm_enable_invariant_load_metadata()) {
-    // Loading the address of a buffer is invariant of the point at which the
-    // load is executed in the program because we never reassign buffers.
+    // In the entry computation the parameter slots in the %params argument are
+    // invariant through program execution.  In computations that are called
+    // from the entry computation (via kWhile, kCall and kConditional) the
+    // parameter slots are *not* invariant since they're written to by their
+    // callers.
     tempbuf_address_base->setMetadata(
         llvm::LLVMContext::MD_invariant_load,
         llvm::MDNode::get(tempbuf_address_base->getContext(), /*MDs=*/{}));
@@ -2884,42 +2924,6 @@ llvm::Value* IrEmitter::EmitElementFunctionCall(
       AsStringRef(tensorflow::strings::StrCat(name, "_return_value")));
 }
 
-// Emits code to allocate an array of parameter address pointers, and store
-// each address from 'parameter_addresses'.
-// Returns an array of compute function call arguments (including parameter
-// address buffer).
-std::vector<llvm::Value*> IrEmitter::GetArrayFunctionCallArguments(
-    tensorflow::gtl::ArraySlice<llvm::Value*> parameter_addresses,
-    llvm::Value* return_value_buffer, tensorflow::StringPiece name) {
-  llvm::Value* parameter_addresses_buffer =
-      llvm_ir::EmitAllocaAtFunctionEntryWithCount(
-          ir_builder_.getInt8PtrTy(),
-          ir_builder_.getInt32(parameter_addresses.size()),
-          tensorflow::strings::StrCat(name, "_parameter_addresses"),
-          &ir_builder_);
-  for (size_t i = 0; i < parameter_addresses.size(); ++i) {
-    llvm::Value* parameter_as_i8ptr = ir_builder_.CreateBitCast(
-        parameter_addresses[i], ir_builder_.getInt8PtrTy(),
-        AsStringRef(tensorflow::strings::StrCat(name, "_parameter_", i,
-                                                "_address_as_i8ptr")));
-    llvm::Value* slot_in_param_adresses = ir_builder_.CreateInBoundsGEP(
-        parameter_addresses_buffer, {ir_builder_.getInt64(i)});
-    ir_builder_.CreateStore(parameter_as_i8ptr, slot_in_param_adresses);
-  }
-
-  const auto to_int8_ptr = [this](llvm::Value* ptr) {
-    return ir_builder_.CreatePointerCast(ptr, ir_builder_.getInt8PtrTy());
-  };
-  std::vector<llvm::Value*> arguments{
-      to_int8_ptr(return_value_buffer),
-      to_int8_ptr(GetExecutableRunOptionsArgument()),
-      parameter_addresses_buffer, GetTempBuffersArgument()};
-  if (auto* profile_counters = GetProfileCountersArgument()) {
-    arguments.push_back(profile_counters);
-  }
-  return arguments;
-}
-
 // Emits a core function call based on the following pseudo-code.
 //
 //   char** parameter_addresses_buffer =
@@ -2935,8 +2939,12 @@ void IrEmitter::EmitArrayFunctionCallInto(
     tensorflow::gtl::ArraySlice<llvm::Value*> parameter_addresses,
     llvm::Value* return_value_buffer, tensorflow::StringPiece name) {
   ir_builder_.CreateCall(
-      function, GetArrayFunctionCallArguments(parameter_addresses,
-                                              return_value_buffer, name));
+      function, GetArrayFunctionCallArguments(
+                    parameter_addresses, &ir_builder_, name,
+                    /*return_value_buffer=*/return_value_buffer,
+                    /*exec_run_options_arg=*/GetExecutableRunOptionsArgument(),
+                    /*temp_buffers_arg=*/GetTempBuffersArgument(),
+                    /*profile_counters_arg=*/GetProfileCountersArgument()));
 }
 
 llvm::Value* IrEmitter::EmitArrayFunctionCall(
@@ -2956,117 +2964,13 @@ llvm::Value* IrEmitter::EmitArrayFunctionCall(
   return return_value_buffer;
 }
 
-// Emits a call to a runtime fork/join function which dispatches parallel
-// calls to 'parallel_function' (and joins threads before returning).
-Status IrEmitter::EmitParallelForkJoin(
-    tensorflow::gtl::ArraySlice<llvm::Value*> parameter_addresses,
-    llvm::Value* output_address, HloComputation* computation,
-    llvm::Function* parallel_function) {
-  HloInstruction* root = computation->root_instruction();
-
-  // Build ParallelForkJoin function type.
-  std::vector<llvm::Type*> compute_function_params = GetComputeFunctionParams();
-  // Number of parallel compute functions.
-  compute_function_params.push_back(ir_builder_.getInt32Ty());
-  // Array of partitions. There is an array element for each
-  // partition x partition_dim x 2 (for dimension start and limit).
-  compute_function_params.push_back(
-      llvm::Type::getInt64PtrTy(module_->getContext()));
-  // Number of partitioned most-major dimensions in 'root.shape'.
-  compute_function_params.push_back(ir_builder_.getInt32Ty());
-  // Function pointer for compute function to be dispatched in parallel.
-  compute_function_params.push_back(
-      llvm::Type::getInt8PtrTy(module_->getContext()));
-
-  llvm::FunctionType* fork_join_type = llvm::FunctionType::get(
-      /*Result=*/llvm::Type::getVoidTy(module_->getContext()),
-      /*Params=*/compute_function_params,
-      /*isVarArg=*/false);
-
-  llvm::Function* fork_join_func =
-      llvm::cast<llvm::Function>(module_->getOrInsertFunction(
-          runtime::kParallelForkJoinSymbolName, fork_join_type));
-  fork_join_func->setCallingConv(llvm::CallingConv::C);
-  fork_join_func->setDoesNotThrow();
-
-  // Add common compute function arguments.
-  const string name = computation->name();
-  std::vector<llvm::Value*> arguments =
-      GetArrayFunctionCallArguments(parameter_addresses, output_address, name);
-
-  // Create ShapePartitionIterator to generate all partitions of 'root.shape'.
-  ShapePartitionIterator partition_iterator(root->shape(),
-                                            root->outer_dimension_partitions());
-  const int64 num_partitions = partition_iterator.GetTotalPartitionCount();
-  // Add argument specifying the number of parallel partitions.
-  arguments.push_back(ir_builder_.getInt32(num_partitions));
-
-  // The number of partitioned most-major dimensions in 'root.shape'.
-  const int32 num_partitioned_dims = root->outer_dimension_partitions().size();
-  // A dimension partition consists of two elements: [start_index, limit_index).
-  const int32 dim_partition_size = 2;
-  // Calculate array partition stride.
-  const int32 array_partition_stride =
-      num_partitioned_dims * dim_partition_size;
-  // Calculate the total number of elements in the partition array.
-  const int32 partition_array_size =
-      dim_partition_size * num_partitioned_dims * num_partitions;
-
-  // Store dimension partition values as llvm constants in 'partitions'.
-  // See comments in runtime_fork_join.cc for array layout description.
-  std::vector<llvm::Constant*> partitions(partition_array_size);
-  for (int32 i = 0; i < num_partitions; ++i) {
-    std::vector<std::pair<int64, int64>> dim_partitions =
-        partition_iterator.GetPartition(i);
-    CHECK_EQ(num_partitioned_dims, dim_partitions.size());
-    const int32 partition_index = i * array_partition_stride;
-    for (int32 j = 0; j < num_partitioned_dims; ++j) {
-      const std::pair<int64, int64>& dim_partition = dim_partitions[j];
-      const int32 index = partition_index + j * dim_partition_size;
-      // Store partition [dim_start, dim_limit) intervals for each dimension.
-      partitions[index] = ir_builder_.getInt64(dim_partition.first);
-      partitions[index + 1] =
-          ir_builder_.getInt64(dim_partition.first + dim_partition.second);
-    }
-  }
-
-  // Create global variable out of dimension partitions in 'partitions'.
-  llvm::ArrayType* partitions_array_type =
-      llvm::ArrayType::get(ir_builder_.getInt64Ty(), partition_array_size);
-  llvm::Constant* partitions_array =
-      llvm::ConstantArray::get(partitions_array_type, partitions);
-  llvm::GlobalVariable* global_partitions_array = new llvm::GlobalVariable(
-      /*Module=*/*module_,
-      /*Type=*/partitions_array_type,
-      /*isConstant=*/true,
-      /*Linkage=*/llvm::GlobalValue::PrivateLinkage,
-      /*Initializer=*/partitions_array,
-      /*Name=*/
-      AsStringRef(
-          tensorflow::strings::StrCat(name, "_parallel_dimension_partitions")));
-
-  // Add argument specifying parallel dimension partitions.
-  arguments.push_back(ir_builder_.CreateBitCast(
-      global_partitions_array,
-      llvm::Type::getInt64PtrTy(module_->getContext())));
-  // Add argument specifying the number of partitioned most-major dimensions.
-  arguments.push_back(ir_builder_.getInt32(num_partitioned_dims));
-  // Add argument for parallel compute function pointer.
-  arguments.push_back(
-      ir_builder_.CreateBitCast(parallel_function, ir_builder_.getInt8PtrTy()));
-  // Emit call to parallel fork/join.
-  ir_builder_.CreateCall(fork_join_func, arguments);
-
-  return Status::OK();
-}
-
 Status IrEmitter::EmitTargetAddressForOp(const HloInstruction* op) {
   llvm::Value* addr;
   const Shape& target_shape = op->shape();
   if (op == op->parent()->root_instruction()) {
     // For the root node, we write directly to the output buffer of the
     // function.
-    llvm::Argument* retval = GetResultArgument();
+    llvm::Argument* retval = compute_function_->result_arg();
     if (!ShapeUtil::IsNil(target_shape)) {
       llvm::AttrBuilder attr_builder;
       attr_builder.addAlignmentAttr(MinimumAlignmentForShape(target_shape));
@@ -3127,8 +3031,13 @@ Status IrEmitter::EmitTargetElementLoop(
 
   } else {
     if (ShouldEmitParallelLoopFor(*target_op)) {
-      TF_RETURN_IF_ERROR(EmitParallelTargetElementLoop(
-          target_shape, element_generator, IrName(target_op), &target_array));
+      // Emit code to read dynamic loop bounds from compute function argument.
+      std::vector<std::pair<llvm::Value*, llvm::Value*>> dynamic_loop_bounds =
+          compute_function_->GetDynamicLoopBounds();
+      // Emit parallel loop with dynamic loop bounds for most-major dimensions.
+      TF_RETURN_IF_ERROR(ParallelLoopEmitter(element_generator, target_array,
+                                             &dynamic_loop_bounds, &ir_builder_)
+                             .EmitLoop(IrName(target_op)));
     } else {
       TF_RETURN_IF_ERROR(
           llvm_ir::LoopEmitter(element_generator, target_array, &ir_builder_)
@@ -3138,60 +3047,6 @@ Status IrEmitter::EmitTargetElementLoop(
   return Status::OK();
 }
 
-Status IrEmitter::EmitParallelTargetElementLoop(
-    const Shape& target_shape,
-    const llvm_ir::ElementGenerator& element_generator,
-    tensorflow::StringPiece loop_name, llvm_ir::IrArray* target_array) {
-  CHECK(!ShapeUtil::IsTuple(target_shape));
-  CHECK(!ShapeUtil::IsScalar(target_shape));
-
-  // Emit code to read dynamic loop bounds from function argument 4.
-  std::vector<llvm::Value*> dynamic_loop_bounds(2 * num_dynamic_loop_bounds_);
-  for (int i = 0; i < 2 * num_dynamic_loop_bounds_; ++i) {
-    dynamic_loop_bounds[i] = GetDynamicLoopBound(i);
-  }
-
-  llvm_ir::ForLoopNest loop_nest(loop_name, &ir_builder_);
-  const int64 num_dims = target_shape.dimensions_size();
-  llvm_ir::IrArray::Index array_index(num_dims);
-
-  // Add loops from outer-most to inner-most dimensions.
-  for (int i = target_shape.layout().minor_to_major_size() - 1; i >= 0; --i) {
-    const int64 dimension = target_shape.layout().minor_to_major(i);
-    const int bounds_index = num_dims - 1 - i;
-    if (bounds_index < num_dynamic_loop_bounds_) {
-      // Emit dynamic loop bounds for this dimension. Dynamic loop bounds
-      // are read from ir function dynamic loop bounds argument.
-      llvm::Value* start_index = dynamic_loop_bounds[bounds_index * 2 + 0];
-      llvm::Value* end_index = dynamic_loop_bounds[bounds_index * 2 + 1];
-
-      std::unique_ptr<llvm_ir::ForLoop> loop = loop_nest.AddLoop(
-          /*suffix=*/tensorflow::strings::Printf("dim.%lld", dimension),
-          start_index, end_index);
-      array_index[dimension] = loop->GetIndVarValue();
-    } else {
-      // Emit static loop bounds for this dimension.
-      std::unique_ptr<llvm_ir::ForLoop> loop = loop_nest.AddLoop(
-          /*start_index=*/0,
-          /*end_index=*/target_shape.dimensions(dimension),
-          /*suffix=*/tensorflow::strings::Printf("dim.%lld", dimension));
-      array_index[dimension] = loop->GetIndVarValue();
-    }
-  }
-  // Point IR builder at inner loop BB.
-  SetToFirstInsertPoint(loop_nest.GetInnerLoopBodyBasicBlock(), &ir_builder_);
-
-  // Emit loop body.
-  TF_ASSIGN_OR_RETURN(llvm::Value * target_element,
-                      element_generator(array_index));
-  target_array->EmitWriteArrayElement(array_index, target_element,
-                                      &ir_builder_);
-  // Point IR builder at outer loop exit BB.
-  SetToFirstInsertPoint(loop_nest.GetOuterLoopExitBasicBlock(), &ir_builder_);
-
-  return Status::OK();
-}
-
 Status IrEmitter::EmitMemcpy(const HloInstruction& source,
                              const HloInstruction& destination) {
   llvm::Value* source_value = GetEmittedValueFor(&source);
@@ -3249,37 +3104,5 @@ StatusOr<llvm::Value*> IrEmitter::EmitScalarCall(
                                  ShapeUtil::MakeShape(return_type, {}),
                                  argument_addrs, name);
 }
-
-unsigned TargetMachineFeatures::largest_register_size_in_bytes(
-    llvm::Function* function) {
-  auto itr = largest_register_size_in_bytes_.find(function);
-  if (itr != largest_register_size_in_bytes_.end()) {
-    return itr->second;
-  }
-
-  int result = largest_register_size_in_bytes_impl(function);
-
-  InsertOrDie(&largest_register_size_in_bytes_, function, result);
-  DCHECK_EQ(result, largest_register_size_in_bytes_.begin()->second);
-  return result;
-}
-
-unsigned TargetMachineFeatures::largest_register_size_in_bytes_impl(
-    llvm::Function* function) const {
-  auto register_info =
-      target_machine_->getSubtargetImpl(*function)->getRegisterInfo();
-
-  unsigned largest_register_size = 0;
-  for (const llvm::TargetRegisterClass* register_class :
-       register_info->regclasses()) {
-    if (register_class->isAllocatable()) {
-      largest_register_size =
-          std::max(largest_register_size,
-                   register_info->getRegSizeInBits(*register_class));
-    }
-  }
-
-  return largest_register_size / 8;
-}
 }  // namespace cpu
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
index 351c95278c17f536e56d9f085b938a9baea9cde1..66f2aeeab33dbaa34297c8dc6a37c3ad481820d8 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <stddef.h>
 #include <map>
+#include <memory>
 #include <string>
 #include <unordered_map>
 #include <vector>
@@ -30,6 +31,8 @@ limitations under the License.
 #include "llvm/Target/TargetMachine.h"
 #include "tensorflow/compiler/xla/service/buffer_assignment.h"
 #include "tensorflow/compiler/xla/service/cpu/external_constant_pool.h"
+#include "tensorflow/compiler/xla/service/cpu/ir_function.h"
+#include "tensorflow/compiler/xla/service/cpu/target_machine_features.h"
 #include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
@@ -49,49 +52,6 @@ limitations under the License.
 
 namespace xla {
 namespace cpu {
-
-// Wraps an llvm::TargetMachine and parses out some information that feeds into
-// code LLVM IR generation decisions.
-//
-// Ideally we'd be able to use llvm::TargetTransformInfo here (since its
-// interface is pretty much a perfect fit for our use case), but obtaining an
-// instance of llvm::TargetTransformInfo outside an LLVM pass pipeline without
-// super-ugly hacks is difficult.
-//
-// TODO(b/66049221): See if the LLVM community will be receptive to exposing an
-// API that lets us directly create and use llvm::TargetTransformInfo instances
-// outside of a pass manager.
-class TargetMachineFeatures {
- public:
-  TargetMachineFeatures(llvm::TargetMachine* target_machine)
-      : target_machine_(target_machine) {}
-
-  // Return the vectorization factor, which is the number of bytes of data
-  // explicitly vectorized routines will try to process at once.
-  int vectorization_factor_in_bytes() const {
-    // Ideally this should be a function of the cache line size (which we can
-    // get from llvm::TargetTransformInfo::getCacheLineSize) of the target
-    // machine.  Guess a value of 128 bytes for now.
-    return 128;
-  }
-
-  // Return the size of the largest register size in bytes.  We need to pass in
-  // "function" since llvm functions can contain annotations for specializing
-  // them to specific micro-architectures (though currently XLA does not use
-  // this functionality).
-  //
-  // Ideally we should have been able to use
-  // llvm::TargetTransformInfo::getRegisterBitWidth(true) here.
-  unsigned largest_register_size_in_bytes(llvm::Function* function);
-
- private:
-  unsigned largest_register_size_in_bytes_impl(llvm::Function* function) const;
-
-  tensorflow::gtl::FlatMap<llvm::Function*, int>
-      largest_register_size_in_bytes_;
-  llvm::TargetMachine* target_machine_;
-};
-
 // This class is the top-level API for the XLA HLO --> LLVM IR compiler.  It
 // implements the DfsHloVisitor interface and emits HLO computations as LLVM IR
 // functions.
@@ -103,20 +63,21 @@ class IrEmitter : public DfsHloVisitorWithDefault {
   // assignment: a BufferAssignment from which we know which temporary buffers
   //             are used by the HLO nodes.
   // llvm_module: the LLVM module to emit IR into.
-  // hlo_to_profile_idx: the mapping from HLO to its index in the profiling
-  //                     array.
-  // entry_computation_profile_idx: the index in the profiling array
-  //                                for the entry computation.
+  // instruction_to_profile_idx: the mapping from HLO instructions to their
+  //              index in the profiling array.
+  // computation_to_profile_idx: the mapping from HLO computations to their
+  //              index in the profiling array.
   // external_constant_pool: if non-null, points to an ExternalConstantPool
   //                         instance into which the Ir emitter can spill
   //                         constants.
-  IrEmitter(
-      const HloModule& hlo_module, const BufferAssignment& assignment,
-      llvm::Module* llvm_module,
-      std::unordered_map<const HloInstruction*, size_t> hlo_to_profile_idx,
-      tensorflow::gtl::optional<size_t> entry_computation_profile_idx,
-      llvm::TargetMachine* target_machine,
-      ExternalConstantPool* external_constant_pool);
+  IrEmitter(const HloModule& hlo_module, const BufferAssignment& assignment,
+            llvm::Module* llvm_module,
+            std::unordered_map<const HloInstruction*, int64>
+                instruction_to_profile_idx,
+            std::unordered_map<const HloComputation*, int64>
+                computation_to_profile_idx,
+            llvm::TargetMachine* target_machine,
+            ExternalConstantPool* external_constant_pool);
   ~IrEmitter() override;
 
   // Emit and return the given HLO computation as an LLVM IR
@@ -163,6 +124,7 @@ class IrEmitter : public DfsHloVisitorWithDefault {
   Status HandleSelect(HloInstruction* select) override;
   Status HandleDot(HloInstruction* dot) override;
   Status HandleConvolution(HloInstruction* convolution) override;
+  Status HandleFft(HloInstruction* fft) override;
   Status HandleBatchNormTraining(HloInstruction* batch_norm_training) override;
   Status HandleBatchNormGrad(HloInstruction* batch_norm_grad) override;
   Status HandleCrossReplicaSum(HloInstruction* crs) override;
@@ -189,6 +151,7 @@ class IrEmitter : public DfsHloVisitorWithDefault {
   Status HandleCustomCall(HloInstruction* custom_call) override;
   Status HandleWhile(HloInstruction* xla_while) override;
   Status HandleConcatenate(HloInstruction* concatenate) override;
+  Status HandleConditional(HloInstruction* conditional) override;
   Status FinishVisit(HloInstruction* root) override;
 
   Status Preprocess(HloInstruction* hlo) override;
@@ -198,14 +161,23 @@ class IrEmitter : public DfsHloVisitorWithDefault {
   // Private helper to initialize an IR function for the computation.
   void InitializeIrFunction(const string& function_name);
 
-  // Convenience function to generate a GEP into the profile counter parameter
-  // which would correspond to the index for a given HLO.
-  llvm::Value* GetProfileCounterFor(const HloInstruction& hlo);
+  template <typename T>
+  llvm::Value* GetProfileCounterCommon(
+      const T& hlo,
+      const std::unordered_map<const T*, int64>& profile_index_map);
+
+  // Convenience functions to generate a GEP into the profile counter parameter
+  // which would correspond to the index for a given HLO instruction or
+  // computation.
+  llvm::Value* GetProfileCounterFor(const HloInstruction& instruction) {
+    return GetProfileCounterCommon<HloInstruction>(instruction,
+                                                   instruction_to_profile_idx_);
+  }
 
-  // Convenience function to generate a GEP into the profile counter parameter
-  // corresponding to the index for the entry computation.  Returns nullptr if
-  // profiling the entry computation is disabled.
-  llvm::Value* GetProfileCounterForEntryComputation();
+  llvm::Value* GetProfileCounterFor(const HloComputation& computation) {
+    return GetProfileCounterCommon<HloComputation>(computation,
+                                                   computation_to_profile_idx_);
+  }
 
   // Gets the IR Value emitted previously for the given hlo.
   //
@@ -233,16 +205,9 @@ class IrEmitter : public DfsHloVisitorWithDefault {
   // Convenience function to get the IR type matching the given shape.
   llvm::Type* IrShapeType(const Shape& shape);
 
-  // Returns an array of compute function parameter types.
-  std::vector<llvm::Type*> GetComputeFunctionParams();
-
-  // Get the llvm::Value* that represents the "retval" argument of the
-  // computation function being emitted by this emitter.
-  llvm::Argument* GetResultArgument();
-
   // Get the llvm::Value* that represents the "prof_counters" argument of the
   // computation function being emitted by this emitter.
-  llvm::Argument* GetProfileCountersArgument();
+  llvm::Value* GetProfileCountersArgument();
 
   // Get the xla::ExecutableRunOptions that represents the "run_options"
   // argument of the computation function being emitted by this emitter.
@@ -252,11 +217,6 @@ class IrEmitter : public DfsHloVisitorWithDefault {
   // computation function being emitted by this emitter.
   llvm::Value* GetTempBuffersArgument();
 
-  // Emit ir to read and return the ir value for the dynamic loop bound at
-  // 'offset' from the "dynamic_loop_bounds" argument of the computation
-  // function being emitted by this emitter.
-  llvm::Value* GetDynamicLoopBound(const int64 offset);
-
   // Emits code that computes the address of the given temporary buffer to the
   // function. target_shape is the shape of this temporary buffer.
   // The returned Value's type is a pointer to element_type.
@@ -310,18 +270,6 @@ class IrEmitter : public DfsHloVisitorWithDefault {
       tensorflow::gtl::ArraySlice<llvm::Value*> parameter_addresses,
       tensorflow::StringPiece name);
 
-  // Returns an array of compute function call arguments.
-  std::vector<llvm::Value*> GetArrayFunctionCallArguments(
-      tensorflow::gtl::ArraySlice<llvm::Value*> parameter_addresses,
-      llvm::Value* return_value_buffer, tensorflow::StringPiece name);
-
-  // Emits a call to a runtime fork/join function which dispatches parallel
-  // calls to 'parallel_function' (and joins threads before returning).
-  Status EmitParallelForkJoin(
-      tensorflow::gtl::ArraySlice<llvm::Value*> parameter_addresses,
-      llvm::Value* output_address, HloComputation* computation,
-      llvm::Function* parallel_function);
-
   // Verifies that the element types of all of the given operand instructions
   // match and are of one of the given supported types.
   Status ElementTypesSameAndSupported(
@@ -346,15 +294,6 @@ class IrEmitter : public DfsHloVisitorWithDefault {
       HloInstruction* target_op, tensorflow::StringPiece desc,
       const llvm_ir::ElementGenerator& element_generator);
 
-  // Emit IR to perform a computation for every element in a partition/slice of
-  // 'target_shape'. The loop bounds for the outer-dimension partitions are
-  // passed into the compute function as a runtime argument (accessible from
-  // GetDynamicLoopBound).
-  Status EmitParallelTargetElementLoop(
-      const Shape& target_shape,
-      const llvm_ir::ElementGenerator& element_generator,
-      tensorflow::StringPiece loop_name, llvm_ir::IrArray* target_array);
-
   // Emits a memcpy from the source instruction's result value to the
   // destination's.  Both source and destination must have an entry in the
   // emitted_value_ table.
@@ -476,13 +415,19 @@ class IrEmitter : public DfsHloVisitorWithDefault {
       thread_local_buffers_;
 
   // The following fields track the IR emission state. According to LLVM memory
-  // management rules, their memory is owned by the module.
-  llvm::Function* compute_function_;
+  // management rules, their memory is owned by the module (Note that IrFunction
+  // creates the encapsulated llvm::Function s.t. it is added to the llvm
+  // module's function list).
+  std::unique_ptr<IrFunction> compute_function_;
   llvm::IRBuilder<> ir_builder_;
 
-  // Maps HLOs to their index into the profile counter array.
-  std::unordered_map<const HloInstruction*, size_t> hlo_to_profile_idx_;
-  const tensorflow::gtl::optional<size_t> entry_computation_profile_idx_;
+  // Maps HLO instructions to their index into the profile counter array.
+  const std::unordered_map<const HloInstruction*, int64>
+      instruction_to_profile_idx_;
+
+  // Maps HLO computations to their index into the profile counter array.
+  const std::unordered_map<const HloComputation*, int64>
+      computation_to_profile_idx_;
 
   // Maps HLOs to Values emitted for them.
   std::unordered_map<const HloInstruction*, llvm::Value*> emitted_value_;
@@ -490,7 +435,7 @@ class IrEmitter : public DfsHloVisitorWithDefault {
   llvm_ir::AliasAnalysis alias_analysis_;
 
   // The number of root instruction outer dimensions used in parallel loop
-  // emission (EmitParallelTargetElementLoop).
+  // emission (ParallelLoopEmitter).
   int64 num_dynamic_loop_bounds_ = 0;
 
   // Returns whether the given instruction should be emitted as a parallel loop.
@@ -505,15 +450,9 @@ class IrEmitter : public DfsHloVisitorWithDefault {
   // profiling a computation.
   class ProfilingState {
    public:
-    ProfilingState()
-        : is_top_level_computation_(false),
-          use_rdtscp_(false),
-          prof_counters_(nullptr) {}
-    ProfilingState(bool is_top_level_computation, bool use_rdtscp,
-                   llvm::Argument* prof_counters)
-        : is_top_level_computation_(is_top_level_computation),
-          use_rdtscp_(use_rdtscp),
-          prof_counters_(prof_counters) {}
+    ProfilingState() : use_rdtscp_(false), prof_counters_(nullptr) {}
+    ProfilingState(bool use_rdtscp, llvm::Value* prof_counters)
+        : use_rdtscp_(use_rdtscp), prof_counters_(prof_counters) {}
 
     // Record the cycle counter before an HLO executes.
     void RecordCycleStart(llvm::IRBuilder<>* ir_builder, HloInstruction* hlo);
@@ -535,15 +474,12 @@ class IrEmitter : public DfsHloVisitorWithDefault {
                               llvm::Value* cycle_start);
 
    private:
-    // Is this IrEmitter for a top-level computation?
-    bool is_top_level_computation_;
-
     // Should we use the x86-specific rdtscp or the generic readcyclecounter
     // intrinsic?
     bool use_rdtscp_;
 
     // The argument which corresponds to the profile counter buffer.
-    llvm::Argument* prof_counters_;
+    llvm::Value* prof_counters_;
 
     // The first read cycle counter in the program.
     llvm::Value* first_read_cycle_start_ = nullptr;
diff --git a/tensorflow/compiler/xla/service/cpu/ir_function.cc b/tensorflow/compiler/xla/service/cpu/ir_function.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ca8c290dd1c4959e42026c3917d37f8fc95a1011
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/ir_function.cc
@@ -0,0 +1,333 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <iterator>
+
+#include "tensorflow/compiler/xla/service/cpu/ir_function.h"
+
+#include "tensorflow/compiler/xla/service/cpu/cpu_runtime.h"
+#include "tensorflow/compiler/xla/service/cpu/shape_partition.h"
+#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
+#include "tensorflow/compiler/xla/status_macros.h"
+
+namespace xla {
+
+namespace {
+using llvm_ir::AsStringRef;
+}  // namespace
+
+namespace cpu {
+
+static std::vector<llvm::Type*> GetComputeFunctionParams(
+    llvm::Module* llvm_module, const int64 num_dynamic_loop_bounds) {
+  llvm::Type* i8_ptr_type = llvm::Type::getInt8PtrTy(llvm_module->getContext());
+  llvm::Type* i8_ptr_ptr_type = i8_ptr_type->getPointerTo();
+  llvm::Type* i64_ptr_type =
+      llvm::Type::getInt64PtrTy(llvm_module->getContext());
+  std::vector<llvm::Type*> compute_function_params(
+      {i8_ptr_type, i8_ptr_type, i8_ptr_ptr_type, i8_ptr_ptr_type});
+  if (num_dynamic_loop_bounds > 0) {
+    compute_function_params.push_back(i64_ptr_type);
+  }
+  compute_function_params.push_back(i64_ptr_type);
+  return compute_function_params;
+}
+
+IrFunction::IrFunction(const string& function_name,
+                       llvm::Function::LinkageTypes linkage,
+                       const bool optimize_for_size_requested,
+                       const bool enable_fast_math, llvm::Module* llvm_module,
+                       llvm::IRBuilder<>* ir_builder,
+                       int64 num_dynamic_loop_bounds)
+    : ir_builder_(ir_builder),
+      llvm_module_(llvm_module),
+      caller_insert_point_guard_(*ir_builder),
+      num_dynamic_loop_bounds_(num_dynamic_loop_bounds) {
+  Initialize(function_name, linkage, optimize_for_size_requested,
+             enable_fast_math);
+}
+
+IrFunction::~IrFunction() {
+  // Emit function return value.
+  ir_builder_->CreateRetVoid();
+}
+
+DynamicLoopBounds IrFunction::GetDynamicLoopBounds() {
+  DynamicLoopBounds dynamic_loop_bounds(num_dynamic_loop_bounds_);
+  for (int i = 0; i < num_dynamic_loop_bounds_; ++i) {
+    dynamic_loop_bounds[i].first = GetDynamicLoopBound(i * 2 + 0);
+    dynamic_loop_bounds[i].second = GetDynamicLoopBound(i * 2 + 1);
+  }
+  return dynamic_loop_bounds;
+}
+
+void IrFunction::Initialize(const string& function_name,
+                            llvm::Function::LinkageTypes linkage,
+                            const bool optimize_for_size_requested,
+                            const bool enable_fast_math) {
+  // The function signature is:
+  //   void function(i8* retval, i8* run_options, i8** params, i8** temps,
+  //                 i64* dynamic_loop_bounds, i64* prof_counters)
+  //
+  // retval: points to the returned value.
+  // params: address of an array with pointers to parameters.
+  // temps: address of an array with pointers to temporary buffers.
+  //
+  // Therefore, the generated function's signature (FunctionType) is statically
+  // determined - parameter unpacking is done in code generated into the
+  // function, rather than by a prologue dictated by the platform ABI.
+  //
+  //                      /--------------\
+  //   retval ----------> | return value |
+  //                      \--------------/
+  //
+  //                      /-------------------------------\
+  //   run_options -----> | xla::ExecutableRunOptions |
+  //                      \-------------------------------/
+  //
+  //                     /---------------------------------------------\
+  //   params -------->  |  param 0  |  param 1  | ..... |  param N-1  |
+  //                     |   addr    |   addr    |       |   addr      |
+  //                     \---------------------------------------------/
+  //                          |           |                   |
+  //                          |           |                   |
+  //                          V           V                   V
+  //                     /---------\  /---------\         /-----------\
+  //                     | param 0 |  | param 1 |         | param N-1 |
+  //                     \---------/  \---------/         \-----------/
+  //
+  //                     /---------------------------------------------\
+  //   temps --------->  |  temp  0  |  temp  1  | ..... |  temp  N-1  |
+  //                     |   addr    |   addr    |       |   addr      |
+  //                     \---------------------------------------------/
+  //                          |           |                   |
+  //                          |           |                   |
+  //                          V           V                   V
+  //                     /---------\  /---------\         /-----------\
+  //                     | temp  0 |  | temp  1 |         | temp  N-1 |
+  //                     \---------/  \---------/         \-----------/
+  //
+  //                        /--------------------------------------------\
+  // dynamic loop bounds -> | outer_dim0_start | outer_dim0_limit | .....|
+  //  (elided for aot)      \--------------------------------------------/
+  //
+  //                     /---------------------------------------------\
+  //   prof counters ->  | counter 0 | counter 1 | ..... | counter N-1 |
+  //                     \---------------------------------------------/
+
+  // Even though the type of params and temps is void** in the host's view, in
+  // LLVM IR this is represented by i8*, similarly to void*. It's up to the code
+  // to use GEPs to unravel the indirection layers.
+  llvm::FunctionType* function_type = llvm::FunctionType::get(
+      /*Result=*/llvm::Type::getVoidTy(llvm_module_->getContext()),
+      /*Params=*/
+      GetComputeFunctionParams(llvm_module_, num_dynamic_loop_bounds_),
+      /*isVarArg=*/false);
+
+  // Functions with local linkage get an inlining bonus.  Because we know
+  // a-priori that embedded functions (non-entry functions) will not have its
+  // name resolved, give it local linkage.
+  function_ =
+      llvm_ir::CreateFunction(function_type, linkage,
+                              /*enable_fast_math=*/enable_fast_math,
+                              /*optimize_for_size=*/optimize_for_size_requested,
+                              function_name, llvm_module_);
+
+  // Set meaningful names for the function's arguments: useful for debugging.
+  llvm::Function::arg_iterator arg_iter = function_->arg_begin();
+  arg_iter->setName("retval");
+  result_arg_ = &*arg_iter;
+  (++arg_iter)->setName("run_options");
+  exec_run_options_arg_ = &*arg_iter;
+  (++arg_iter)->setName("params");
+  parameters_arg_ = &*arg_iter;
+  (++arg_iter)->setName("temps");
+  temp_buffers_arg_ = &*arg_iter;
+  if (num_dynamic_loop_bounds_ > 0) {
+    (++arg_iter)->setName("dynamic_loop_bounds");
+    dynamic_loop_bounds_arg_ = &*arg_iter;
+  }
+  (++arg_iter)->setName("prof_counters");
+  profile_counters_arg_ = &*arg_iter;
+
+  // We know a-priori that the function arguments are guaranteed to point to
+  // disjoint objects.
+  llvm::Argument* retval = result_arg();
+  for (llvm::Argument& argument : function_->args()) {
+    // However, the return buffer aliases the temporaries and thus cannot be
+    // marked noalias.
+    if (&argument == retval) {
+      continue;
+    }
+    function_->addAttribute(argument.getArgNo() + 1, llvm::Attribute::NoAlias);
+  }
+
+  ir_builder_->SetInsertPoint(llvm::BasicBlock::Create(
+      /*Context=*/llvm_module_->getContext(),
+      /*Name=*/"entry",
+      /*Parent=*/function_));
+}
+
+llvm::Value* IrFunction::GetDynamicLoopBound(const int64 offset) {
+  CHECK_GT(num_dynamic_loop_bounds_, 0);
+  CHECK_LT(offset, num_dynamic_loop_bounds_ * 2);
+  string name = tensorflow::strings::StrCat("dynamic_loop_bound_", offset);
+  return ir_builder_->CreateLoad(
+      ir_builder_->CreateGEP(CHECK_NOTNULL(dynamic_loop_bounds_arg_),
+                             ir_builder_->getInt64(offset), AsStringRef(name)));
+}
+
+// Emits code to allocate an array of parameter address pointers, and store
+// each address from 'parameter_addresses'.
+// Returns an array of compute function call arguments (including parameter
+// address buffer).
+std::vector<llvm::Value*> GetArrayFunctionCallArguments(
+    tensorflow::gtl::ArraySlice<llvm::Value*> parameter_addresses,
+    llvm::IRBuilder<>* ir_builder, tensorflow::StringPiece name,
+    llvm::Value* return_value_buffer, llvm::Value* exec_run_options_arg,
+    llvm::Value* temp_buffers_arg, llvm::Value* profile_counters_arg) {
+  llvm::Value* parameter_addresses_buffer =
+      llvm_ir::EmitAllocaAtFunctionEntryWithCount(
+          ir_builder->getInt8PtrTy(),
+          ir_builder->getInt32(parameter_addresses.size()),
+          tensorflow::strings::StrCat(name, "_parameter_addresses"),
+          ir_builder);
+  for (size_t i = 0; i < parameter_addresses.size(); ++i) {
+    llvm::Value* parameter_as_i8ptr = ir_builder->CreateBitCast(
+        parameter_addresses[i], ir_builder->getInt8PtrTy(),
+        AsStringRef(tensorflow::strings::StrCat(name, "_parameter_", i,
+                                                "_address_as_i8ptr")));
+    llvm::Value* slot_in_param_adresses = ir_builder->CreateInBoundsGEP(
+        parameter_addresses_buffer, {ir_builder->getInt64(i)});
+    ir_builder->CreateStore(parameter_as_i8ptr, slot_in_param_adresses);
+  }
+
+  const auto to_int8_ptr = [=](llvm::Value* ptr) {
+    return ir_builder->CreatePointerCast(ptr, ir_builder->getInt8PtrTy());
+  };
+  std::vector<llvm::Value*> arguments{
+      to_int8_ptr(return_value_buffer), to_int8_ptr(exec_run_options_arg),
+      parameter_addresses_buffer, temp_buffers_arg};
+  if (profile_counters_arg != nullptr) {
+    arguments.push_back(profile_counters_arg);
+  }
+  return arguments;
+}
+
+// Emits a call to a runtime fork/join function which dispatches parallel
+// calls to 'parallel_function' (and joins threads before returning).
+Status EmitCallToParallelForkJoin(
+    const std::vector<llvm::Value*>& arguments, const Shape& shape,
+    const std::vector<int64>& dimension_partition_counts,
+    llvm::IRBuilder<>* ir_builder, llvm::Function* parallel_function,
+    const string& name) {
+  llvm::Module* module = ir_builder->GetInsertBlock()->getModule();
+
+  // Build ParallelForkJoin function type.
+  std::vector<llvm::Type*> compute_function_params =
+      GetComputeFunctionParams(module, /*num_dynamic_loop_bounds=*/0);
+  // Number of parallel compute functions.
+  compute_function_params.push_back(ir_builder->getInt32Ty());
+  // Array of partitions. There is an array element for each
+  // partition x partition_dim x 2 (for dimension start and limit).
+  compute_function_params.push_back(
+      llvm::Type::getInt64PtrTy(module->getContext()));
+  // Number of partitioned most-major dimensions in 'shape'.
+  compute_function_params.push_back(ir_builder->getInt32Ty());
+  // Function pointer for compute function to be dispatched in parallel.
+  compute_function_params.push_back(
+      llvm::Type::getInt8PtrTy(module->getContext()));
+
+  llvm::FunctionType* fork_join_type = llvm::FunctionType::get(
+      /*Result=*/llvm::Type::getVoidTy(module->getContext()),
+      /*Params=*/compute_function_params,
+      /*isVarArg=*/false);
+
+  llvm::Function* fork_join_func =
+      llvm::cast<llvm::Function>(module->getOrInsertFunction(
+          runtime::kParallelForkJoinSymbolName, fork_join_type));
+  fork_join_func->setCallingConv(llvm::CallingConv::C);
+  fork_join_func->setDoesNotThrow();
+
+  // Add common compute function arguments.
+  std::vector<llvm::Value*> fork_join_arguments(arguments);
+
+  // Create ShapePartitionIterator to generate all partitions of 'shape'.
+  ShapePartitionIterator partition_iterator(shape, dimension_partition_counts);
+  const int64 num_partitions = partition_iterator.GetTotalPartitionCount();
+  // Add argument specifying the number of parallel partitions.
+  fork_join_arguments.push_back(ir_builder->getInt32(num_partitions));
+
+  // The number of partitioned most-major dimensions in 'shape'.
+  const int32 num_partitioned_dims = dimension_partition_counts.size();
+  // A dimension partition consists of two elements: [start_index, limit_index).
+  const int32 dim_partition_size = 2;
+  // Calculate array partition stride.
+  const int32 array_partition_stride =
+      num_partitioned_dims * dim_partition_size;
+  // Calculate the total number of elements in the partition array.
+  const int32 partition_array_size =
+      dim_partition_size * num_partitioned_dims * num_partitions;
+
+  // Store dimension partition values as llvm constants in 'partitions'.
+  // See comments in runtime_fork_join.cc for array layout description.
+  std::vector<llvm::Constant*> partitions(partition_array_size);
+  for (int32 i = 0; i < num_partitions; ++i) {
+    std::vector<std::pair<int64, int64>> dim_partitions =
+        partition_iterator.GetPartition(i);
+    CHECK_EQ(num_partitioned_dims, dim_partitions.size());
+    const int32 partition_index = i * array_partition_stride;
+    for (int32 j = 0; j < num_partitioned_dims; ++j) {
+      const std::pair<int64, int64>& dim_partition = dim_partitions[j];
+      const int32 index = partition_index + j * dim_partition_size;
+      // Store partition [dim_start, dim_limit) intervals for each dimension.
+      partitions[index] = ir_builder->getInt64(dim_partition.first);
+      partitions[index + 1] =
+          ir_builder->getInt64(dim_partition.first + dim_partition.second);
+    }
+  }
+
+  // Create global variable out of dimension partitions in 'partitions'.
+  llvm::ArrayType* partitions_array_type =
+      llvm::ArrayType::get(ir_builder->getInt64Ty(), partition_array_size);
+  llvm::Constant* partitions_array =
+      llvm::ConstantArray::get(partitions_array_type, partitions);
+  llvm::GlobalVariable* global_partitions_array = new llvm::GlobalVariable(
+      /*M=*/*module,
+      /*Ty=*/partitions_array_type,
+      /*isConstant=*/true,
+      /*Linkage=*/llvm::GlobalValue::PrivateLinkage,
+      /*Initializer=*/partitions_array,
+      /*Name=*/
+      AsStringRef(
+          tensorflow::strings::StrCat(name, "_parallel_dimension_partitions")));
+
+  // Add argument specifying parallel dimension partitions.
+  fork_join_arguments.push_back(ir_builder->CreateBitCast(
+      global_partitions_array,
+      llvm::Type::getInt64PtrTy(module->getContext())));
+  // Add argument specifying the number of partitioned most-major dimensions.
+  fork_join_arguments.push_back(ir_builder->getInt32(num_partitioned_dims));
+  // Add argument for parallel compute function pointer.
+  fork_join_arguments.push_back(
+      ir_builder->CreateBitCast(parallel_function, ir_builder->getInt8PtrTy()));
+  // Emit call to parallel fork/join.
+  ir_builder->CreateCall(fork_join_func, fork_join_arguments);
+
+  return Status::OK();
+}
+
+}  // namespace cpu
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/ir_function.h b/tensorflow/compiler/xla/service/cpu/ir_function.h
new file mode 100644
index 0000000000000000000000000000000000000000..1fd2da4dce23982ed030f3aa8ec604182d0ebab8
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/ir_function.h
@@ -0,0 +1,134 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_IR_FUNCTION_H_
+#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_IR_FUNCTION_H_
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
+#include "tensorflow/compiler/xla/service/cpu/ir_emission_utils.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+
+namespace xla {
+namespace cpu {
+
+// IrFunction creates and encapsulates an llvm::Function, exposing methods to
+// emitters for function and function argument access.
+// The llvm::Function is created with the standard function signature
+// used in the XLA CPU backend (see ir_function.cc for argument details).
+// In addtion IrFunction saves the callers IR insert point during contruction,
+// and restores it after desctruction.
+//
+// Example usage:
+//
+//    // Create and initialize new IrFunction.
+//    std::unique_ptr<IrFunction> compute_function(new IrFunction(...));
+//    // Emit IR for function body using IrFunction helper methods.
+//    ...
+//    // Store reference to llvm::Function for future invocation.
+//    ir_functions.push_back(compute_function.function());
+//    // Delete IrFunction (finalizes IR function and restores caller insertion
+//    // point).
+//    compute_function.reset();
+//
+
+class IrFunction {
+ public:
+  IrFunction(const string& function_name, llvm::Function::LinkageTypes linkage,
+             const bool optimize_for_size_requested,
+             const bool enable_fast_math, llvm::Module* llvm_module,
+             llvm::IRBuilder<>* ir_builder, int64 num_dynamic_loop_bounds);
+  ~IrFunction();
+
+  // Emit ir to read and return the set of ir values representing the dynamic
+  // loop bounds argument of this function.
+  // Each element in returned vector is a pair of ir values representing
+  // the loop bounds for a specific dimension, where the first element of the
+  // pair is the dimension start index, and the second element of the pair
+  // is the dimension limit.
+  // EX: [dimension_i_index_start_ir_value, dimension_i_index_limit_ir_value]
+  //
+  DynamicLoopBounds GetDynamicLoopBounds();
+
+  // Returns the encapculated llvm::Function.
+  llvm::Function* function() { return function_; }
+
+  // Get the llvm::Value* that represents this functions "retval" argument.
+  llvm::Argument* result_arg() { return result_arg_; }
+
+  // Get the xla::ExecutableRunOptions that represents this functions
+  // "run_options" argument.
+  llvm::Value* exec_run_options_arg() { return exec_run_options_arg_; }
+
+  // Get the llvm::Value* that represents this functions parameters argument.
+  llvm::Value* parameters_arg() { return parameters_arg_; }
+
+  // Get the llvm::Value* that represents this functions "temps" argument.
+  llvm::Value* temp_buffers_arg() { return temp_buffers_arg_; }
+
+  // Get the llvm::Value* that represents this functions "prof_counters"
+  // argument.
+  llvm::Value* profile_counters_arg() { return profile_counters_arg_; }
+
+ private:
+  // Initialize an llvm::Function with standard signature based on arguments.
+  void Initialize(const string& function_name,
+                  llvm::Function::LinkageTypes linkage,
+                  bool optimize_for_size_requested, bool enable_fast_math);
+
+  // Emit ir to read and return the ir value for the dynamic loop bound at
+  // 'offset' from the "dynamic_loop_bounds" argument of this function.
+  llvm::Value* GetDynamicLoopBound(int64 offset);
+
+  llvm::IRBuilder<>* ir_builder_;
+  llvm::Module* llvm_module_;
+  llvm::IRBuilder<>::InsertPointGuard caller_insert_point_guard_;
+
+  int64 num_dynamic_loop_bounds_ = 0;
+  // Encapsulated llvm::Function.
+  llvm::Function* function_;
+  // Function argument IR values.
+  llvm::Argument* result_arg_;
+  llvm::Value* exec_run_options_arg_;
+  llvm::Value* parameters_arg_;
+  llvm::Value* temp_buffers_arg_;
+  llvm::Value* dynamic_loop_bounds_arg_ = nullptr;
+  llvm::Value* profile_counters_arg_;
+};
+
+// Returns an array of compute function call argument ir values.
+std::vector<llvm::Value*> GetArrayFunctionCallArguments(
+    tensorflow::gtl::ArraySlice<llvm::Value*> parameter_addresses,
+    llvm::IRBuilder<>* ir_builder, tensorflow::StringPiece name,
+    llvm::Value* return_value_buffer, llvm::Value* exec_run_options_arg,
+    llvm::Value* temp_buffers_arg, llvm::Value* profile_counters_arg);
+
+// Emits a call to a runtime fork/join function which dispatches parallel
+// calls to 'parallel_function' (and joins threads before returning).
+Status EmitCallToParallelForkJoin(
+    const std::vector<llvm::Value*>& arguments, const Shape& shape,
+    const std::vector<int64>& dimension_partition_counts,
+    llvm::IRBuilder<>* ir_builder, llvm::Function* parallel_function,
+    const string& name);
+
+}  // namespace cpu
+}  // namespace xla
+
+#endif  // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_IR_FUNCTION_H_
diff --git a/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc b/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc
index 81c29e4726c7be53b433be896f558f502e43c885..0336fa61312e5cd626ae38ddd29875bff256212a 100644
--- a/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc
+++ b/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc
@@ -64,14 +64,14 @@ llvm::Function* EmitVectorF32TanhIfNeeded(llvm::Module* module,
                             &ir_builder),
       llvm::ConstantFP::get(vector_type, 9.0), &ir_builder);
 
-  std::array<float, 7> numerator_coeffs(
-      {{-2.76076847742355e-16f, 2.00018790482477e-13f, -8.60467152213735e-11f,
-        5.12229709037114e-08f, 1.48572235717979e-05f, 6.37261928875436e-04f,
-        4.89352455891786e-03f}});
-
-  std::array<float, 4> denominator_coeffs(
-      {{1.19825839466702e-06f, 1.18534705686654e-04f, 2.26843463243900e-03f,
-        4.89352518554385e-03f}});
+  std::array<float, 7> numerator_coeffs{
+      -2.76076847742355e-16f, 2.00018790482477e-13f, -8.60467152213735e-11f,
+      5.12229709037114e-08f,  1.48572235717979e-05f, 6.37261928875436e-04f,
+      4.89352455891786e-03f};
+
+  std::array<float, 4> denominator_coeffs{
+      1.19825839466702e-06f, 1.18534705686654e-04f, 2.26843463243900e-03f,
+      4.89352518554385e-03f};
 
   llvm::Value* input_squared =
       ir_builder.CreateFMul(input_clamped, input_clamped);
diff --git a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc
index 0077e344e2bd34aa598ee076220fee678f31b4ad..d1b88b27f068962fb86477fcad3e4390b1636c2b 100644
--- a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc
+++ b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc
@@ -376,19 +376,6 @@ Status ParallelCpuExecutable::ExecuteComputeFunctions(
     tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
     tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> buffers,
     HloExecutionProfile* hlo_execution_profile) {
-  std::vector<se::DeviceMemoryBase> argument_buffers(arguments.size());
-  for (int i = 0; i < arguments.size(); ++i) {
-    argument_buffers[i] = arguments[i]->buffer(/*index=*/{});
-  }
-  return ExecuteComputeFunctions(run_options, argument_buffers, buffers,
-                                 hlo_execution_profile);
-}
-
-Status ParallelCpuExecutable::ExecuteComputeFunctions(
-    const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> buffers,
-    HloExecutionProfile* hlo_execution_profile) {
   // Allocate profiling counters for each hlo instruction that we would like to
   // profile.
   std::vector<int64>* profile_counters = nullptr;
@@ -428,8 +415,9 @@ Status ParallelCpuExecutable::ExecuteComputeFunctions(
     // just copy the existing buffer into the map containing instruction
     // results..
     if (instruction->opcode() == HloOpcode::kParameter) {
-      InsertOrDie(&results, instruction,
-                  arguments[instruction->parameter_number()].opaque());
+      InsertOrDie(
+          &results, instruction,
+          arguments[instruction->parameter_number()]->root_buffer().opaque());
     } else if (instruction->opcode() == HloOpcode::kConstant) {
       unsigned char* aligned_data =
           FindOrDie(aligned_constants_, instruction).get();
@@ -461,69 +449,6 @@ Status ParallelCpuExecutable::ExecuteComputeFunctions(
   return Status::OK();
 }
 
-StatusOr<perftools::gputools::DeviceMemoryBase>
-ParallelCpuExecutable::ExecuteOnStream(
-    const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments,
-    HloExecutionProfile* hlo_execution_profile) {
-  se::Stream* stream = run_options->stream();
-  DeviceMemoryAllocator* memory_allocator = run_options->allocator();
-  VLOG(3) << "ExecuteOnStream arg size: " << arguments.size();
-  if (!arguments.empty()) {
-    VLOG(3) << "ExecuteOnStream arg[0]: " << arguments.at(0).opaque();
-  }
-
-  // Allocate the temporary buffers required for the computation.
-  se::StreamExecutor* stream_executor = stream->parent();
-  int device_ordinal = stream_executor->device_ordinal();
-  int64 buffer_count = assignment_->Allocations().size();
-  VLOG(3) << "temp buffer count: " << buffer_count;
-
-  std::vector<se::DeviceMemoryBase> device_allocations(
-      assignment_->Allocations().size());
-  TF_RETURN_IF_ERROR(AllocateBuffers(memory_allocator,
-                                     stream->parent()->device_ordinal(),
-                                     &device_allocations));
-
-  TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice result_slice,
-                      assignment_->GetUniqueTopLevelOutputSlice());
-  const BufferAllocation::Index result_index = result_slice.index();
-  VLOG(3) << "result index: " << result_index;
-
-  TF_RETURN_IF_ERROR(ExecuteComputeFunctions(
-      run_options, arguments, device_allocations, hlo_execution_profile));
-
-  // Mark the buffers that are actually live (used in the output) when the
-  // computation finishes executing.
-  std::unordered_set<const void*> marked_addresses;
-  MarkLiveAddressesInOutput(device_allocations[result_index].opaque(),
-                            result_shape(), &marked_addresses);
-
-  VLOG(3) << "Live addresses in output marking found "
-          << marked_addresses.size() << " addresses:\n"
-          << tensorflow::str_util::Join(
-                 marked_addresses, ", ", [](string* out, const void* address) {
-                   tensorflow::strings::StrAppend(
-                       out, tensorflow::strings::Printf("%p", address));
-                 });
-
-  // Computation is done - deallocate temp buffers. Keep those marked
-  // live because they are referenced by the output of the computation
-  // and are needed by the service. They will be deallocated by the
-  // service.
-  for (size_t i = 0; i < device_allocations.size(); ++i) {
-    auto alloc = device_allocations[i];
-    if (marked_addresses.count(alloc.opaque()) == 0 &&
-        alloc.opaque() != nullptr) {
-      VLOG(3) << "ParallelCpuExecutable deallocating buffer #" << i << " ["
-              << alloc.opaque() << "]";
-      TF_RETURN_IF_ERROR(memory_allocator->Deallocate(device_ordinal, &alloc));
-    }
-  }
-
-  return device_allocations[result_index];
-}
-
 StatusOr<std::unique_ptr<ShapedBuffer>> ParallelCpuExecutable::ExecuteOnStream(
     const ServiceExecutableRunOptions* run_options,
     tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
@@ -536,9 +461,9 @@ StatusOr<std::unique_ptr<ShapedBuffer>> ParallelCpuExecutable::ExecuteOnStream(
   DeviceMemoryAllocator* memory_allocator = run_options->allocator();
   std::vector<se::DeviceMemoryBase> buffers(assignment_->Allocations().size());
 
-  auto result_buffer =
-      MakeUnique<ShapedBuffer>(result_shape(), stream->parent()->platform(),
-                               stream->parent()->device_ordinal());
+  auto result_buffer = MakeUnique<ShapedBuffer>(
+      /*on_host_shape=*/result_shape(), /*on_device_shape=*/result_shape(),
+      stream->parent()->platform(), stream->parent()->device_ordinal());
 
   TF_RETURN_IF_ERROR(AllocateBuffers(
       memory_allocator, stream->parent()->device_ordinal(), &buffers));
@@ -549,37 +474,30 @@ StatusOr<std::unique_ptr<ShapedBuffer>> ParallelCpuExecutable::ExecuteOnStream(
   // Copy DeviceMemoryBase values which into the respective location in
   // ShapedBuffer which is returned to the caller.
   std::vector<bool> buffers_in_result(assignment_->Allocations().size(), false);
-  TF_RETURN_IF_ERROR(
-      result_buffer->mutable_shape_index_to_buffer_entry()
-          ->ForEachMutableElementWithStatus(
-              [&buffers, &buffers_in_result, &result_buffer, this](
-                  const ShapeIndex& index, size_t* buffer_entry) {
-                  const auto& sources =
-                      this->GetRootPointsToSet().element(index);
-                  // The points to set is unambiguous so the set should be a
-                  // singleton.
-                  CHECK_EQ(1, sources.size());
-                  const LogicalBuffer* buffer_source = sources[0];
-                  HloInstruction* src = buffer_source->instruction();
-
-                  // The source for this result buffer can be a nested buffer
-                  // such as a tuple element.
-
-                  // The source instruction should have a non-parameter buffer
-                  // assigned.
-                  TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice slice,
-                                      this->assignment_->GetUniqueSlice(
-                                          src, buffer_source->index()));
-                  CHECK(!slice.allocation()->is_entry_computation_parameter());
-
-                  const BufferAllocation::Index buffer_index = slice.index();
-                  const se::DeviceMemoryBase& buffer = buffers[buffer_index];
-                  CHECK(!buffer.is_null() || buffer.size() == 0);
-                  *buffer_entry = result_buffer->mutable_buffers()->size();
-                  result_buffer->mutable_buffers()->push_back(buffer);
-                  buffers_in_result[buffer_index] = true;
-                return Status::OK();
-              }));
+  TF_RETURN_IF_ERROR(result_buffer->buffers().ForEachMutableElementWithStatus(
+      [&](const ShapeIndex& index, se::DeviceMemoryBase* device_memory) {
+        const auto& sources = this->GetRootPointsToSet().element(index);
+
+        // The points to set is unambiguous so the set should be a singleton.
+        CHECK_EQ(1, sources.size());
+        const LogicalBuffer* buffer_source = sources[0];
+        HloInstruction* src = buffer_source->instruction();
+
+        // The source for this result buffer can be a nested buffer such as a
+        // tuple element. The source instruction should have a non-parameter
+        // buffer assigned.
+        TF_ASSIGN_OR_RETURN(
+            const BufferAllocation::Slice slice,
+            this->assignment_->GetUniqueSlice(src, buffer_source->index()));
+        CHECK(!slice.allocation()->is_entry_computation_parameter());
+
+        const BufferAllocation::Index buffer_index = slice.index();
+        const se::DeviceMemoryBase& buffer = buffers[buffer_index];
+        CHECK(!buffer.is_null() || buffer.size() == 0);
+        *device_memory = buffer;
+        buffers_in_result[buffer_index] = true;
+        return Status::OK();
+      }));
 
   // Free all buffers not in the result.
   for (size_t i = 0; i < buffers.size(); ++i) {
@@ -595,10 +513,10 @@ StatusOr<std::unique_ptr<ShapedBuffer>> ParallelCpuExecutable::ExecuteOnStream(
   return std::move(result_buffer);
 }
 
-StatusOr<perftools::gputools::DeviceMemoryBase>
+StatusOr<std::unique_ptr<ShapedBuffer>>
 ParallelCpuExecutable::ExecuteAsyncOnStream(
     const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments) {
+    tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) {
   // TODO(b/30671675): Implement asynchronous execution mode.
   return Unimplemented(
       "Asynchronous execution on stream is not yet supported on CPU.");
diff --git a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h
index d65e3f42f3cb34eff005f34b51b81fd5c42974a3..90ac94ef9288b2e860cb30c47ed44a7b96e4825d 100644
--- a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h
+++ b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h
@@ -59,21 +59,14 @@ class ParallelCpuExecutable : public Executable {
       std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map);
   ~ParallelCpuExecutable() override {}
 
-  StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteOnStream(
-      const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments,
-      HloExecutionProfile* hlo_execution_profile) override;
-
   StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteOnStream(
       const ServiceExecutableRunOptions* run_options,
       tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
       HloExecutionProfile* hlo_execution_profile) override;
 
-  StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteAsyncOnStream(
+  StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteAsyncOnStream(
       const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments) override;
+      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) override;
 
   // This should be called after set_ir_module_string.
   const string& ir_module_string() const { return ir_module_string_; }
@@ -108,13 +101,6 @@ class ParallelCpuExecutable : public Executable {
 
   // Calls the generated functions in 'function_names_', performing the
   // computation with the given arguments using the supplied buffers.
-  Status ExecuteComputeFunctions(
-      const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          buffers,
-      HloExecutionProfile* hlo_execution_profile);
   Status ExecuteComputeFunctions(
       const ServiceExecutableRunOptions* run_options,
       tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
diff --git a/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.cc b/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.cc
new file mode 100644
index 0000000000000000000000000000000000000000..1e439cde11cf74272101b80c867a308e51ab26a6
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.cc
@@ -0,0 +1,76 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.h"
+
+#include "tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h"
+#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+
+namespace xla {
+namespace cpu {
+
+ParallelLoopEmitter::ParallelLoopEmitter(
+    const llvm_ir::ElementGenerator& target_element_generator,
+    const llvm_ir::IrArray& target_array,
+    const DynamicLoopBounds* dynamic_loop_bounds, llvm::IRBuilder<>* ir_builder)
+    : LoopEmitter(target_element_generator, target_array, ir_builder),
+      dynamic_loop_bounds_(dynamic_loop_bounds) {}
+
+llvm_ir::IrArray::Index ParallelLoopEmitter::EmitIndexAndSetExitBasicBlock(
+    tensorflow::StringPiece loop_name) {
+  CHECK(!ShapeUtil::IsTuple(shape_));
+  CHECK(!ShapeUtil::IsScalar(shape_));
+
+  llvm_ir::ForLoopNest loop_nest(loop_name, ir_builder_);
+  const int64 num_dims = shape_.dimensions_size();
+  llvm_ir::IrArray::Index array_index(num_dims);
+
+  // Add loops from outer-most to inner-most dimensions.
+  for (int i = LayoutUtil::MinorToMajor(shape_).size() - 1; i >= 0; --i) {
+    const int64 dimension = LayoutUtil::Minor(shape_.layout(), i);
+    const int bounds_index = num_dims - 1 - i;
+    if (bounds_index < dynamic_loop_bounds_->size()) {
+      // Emit dynamic loop bounds for this dimension. Dynamic loop bounds
+      // are read from ir function dynamic loop bounds argument.
+      llvm::Value* start_index = (*dynamic_loop_bounds_)[bounds_index].first;
+      llvm::Value* end_index = (*dynamic_loop_bounds_)[bounds_index].second;
+
+      std::unique_ptr<llvm_ir::ForLoop> loop = loop_nest.AddLoop(
+          /*suffix=*/tensorflow::strings::Printf("dim.%lld", dimension),
+          start_index, end_index);
+      array_index[dimension] = loop->GetIndVarValue();
+    } else {
+      // Emit static loop bounds for this dimension.
+      std::unique_ptr<llvm_ir::ForLoop> loop = loop_nest.AddLoop(
+          /*start_index=*/0,
+          /*end_index=*/shape_.dimensions(dimension),
+          /*suffix=*/tensorflow::strings::Printf("dim.%lld", dimension));
+      array_index[dimension] = loop->GetIndVarValue();
+    }
+  }
+  // Point IR builder at inner loop BB.
+  llvm_ir::SetToFirstInsertPoint(loop_nest.GetInnerLoopBodyBasicBlock(),
+                                 ir_builder_);
+
+  // Set exit_bb_ to the exit block of the loop nest.
+  exit_bb_ = loop_nest.GetOuterLoopExitBasicBlock();
+  CHECK(exit_bb_ != nullptr);
+
+  return array_index;
+}
+
+}  // namespace cpu
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.h b/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.h
new file mode 100644
index 0000000000000000000000000000000000000000..9335d2818e99eb3588537d80dabddda08c1c020e
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/parallel_loop_emitter.h
@@ -0,0 +1,73 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_PARALLEL_LOOP_EMITTER_H_
+#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_PARALLEL_LOOP_EMITTER_H_
+
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Value.h"
+#include "tensorflow/compiler/xla/service/cpu/ir_emission_utils.h"
+#include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h"
+#include "tensorflow/compiler/xla/service/llvm_ir/loop_emitter.h"
+
+namespace xla {
+namespace cpu {
+
+// ParallelLoopEmitter emits a loop nest for the target array shape.
+// The outer loop bounds of the loop nest are passed as ir values at runtime
+// (specified in 'dynamic_loop_bounds'), and the inner loop bounds are static.
+// Dynamic loop bounds are specified as an array of dimension index
+// [start, limit) pairs of ir values (one for each partitioned outer dimension).
+//
+// EX: Let 'shape' = [8, 16, 32], with the loop bounds of the two-most major
+//     dimensions dynamic. Then 'dynamic_loop_bounds' will contain the
+//     following ir values for the two most-major dimensions:
+//       [dim0_index_start_ir_value, dim0_index_limit_ir_value]
+//       [dim1_index_start_ir_value, dim1_index_limit_ir_value]
+//
+// Code emitted by ParallelLoopEmitter will be called in a multi-threaded
+// context where each thread will be assigned a different set of outer dimension
+// partitions, and where all threads will collectively iterate over the
+// entire target array shape.
+//
+// Outer dimension partitions can be generated using the ShapePartitionAssigner
+// and ShapePartitionIterator utility classes from shape_partition.cc.
+//
+class ParallelLoopEmitter : public llvm_ir::LoopEmitter {
+ public:
+  // Constructs a ParallelLoopEmitter which uses 'target_element_generator' to
+  // generate elements, 'dynamic_loop_bounds' to set the loop bounds of the
+  // most-major dimensions, and 'target_array.' shape to set the static loop
+  // bounds for the most-minor dimensions.
+  ParallelLoopEmitter(const llvm_ir::ElementGenerator& target_element_generator,
+                      const llvm_ir::IrArray& target_array,
+                      const DynamicLoopBounds* dynamic_loop_bounds,
+                      llvm::IRBuilder<>* ir_builder);
+
+  ParallelLoopEmitter(const ParallelLoopEmitter&) = delete;
+  ParallelLoopEmitter& operator=(const ParallelLoopEmitter&) = delete;
+  ~ParallelLoopEmitter() override = default;
+
+  llvm_ir::IrArray::Index EmitIndexAndSetExitBasicBlock(
+      tensorflow::StringPiece loop_name) override;
+
+ private:
+  const DynamicLoopBounds* dynamic_loop_bounds_;
+};
+
+}  // namespace cpu
+}  // namespace xla
+
+#endif  // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_PARALLEL_LOOP_EMITTER_H_
diff --git a/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc b/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc
index 4b44ac8941e222d5954121bbb9654062e41f55d6..deb21bf4ef5895cfdbec5c2449b6ce7b306a7008 100644
--- a/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc
+++ b/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc
@@ -126,7 +126,7 @@ int64 ParallelTaskAssignment::GetTargetParallelTaskCount(
     HloInstruction* instruction) {
   // Currently, we do not assign parallel tasks to instructions with at least
   // one of the following properties:
-  // *) Internal threading (library calls to kConv, kDot, and kCustomCall).
+  // *) Internal threading (library calls to kConv, kDot, kFft, kCustomCall).
   // *) Emit custom loops (kSelectAndScatter, FusionKind::kTransposeDot).
   // *) Tuple-shaped.
   // TODO(b/27458679) Parallelize instructions which are skipped here.
@@ -137,6 +137,7 @@ int64 ParallelTaskAssignment::GetTargetParallelTaskCount(
       instruction->opcode() == HloOpcode::kSelectAndScatter ||
       instruction->opcode() == HloOpcode::kGetTupleElement ||
       instruction->opcode() == HloOpcode::kBitcast ||
+      instruction->opcode() == HloOpcode::kFft ||
       (instruction->opcode() == HloOpcode::kConvolution &&
        PotentiallyImplementedAsEigenConvolution(*instruction)) ||
       PotentiallyImplementedAsEigenDot(*instruction) ||
diff --git a/tensorflow/compiler/xla/service/cpu/runtime_fft.cc b/tensorflow/compiler/xla/service/cpu/runtime_fft.cc
new file mode 100644
index 0000000000000000000000000000000000000000..848d2d22414e8fc9bca82de90f7676011d8992fd
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/runtime_fft.cc
@@ -0,0 +1,37 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/cpu/runtime_fft.h"
+
+#define EIGEN_USE_THREADS
+
+#include "tensorflow/compiler/xla/executable_run_options.h"
+#include "tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h"
+#include "tensorflow/core/platform/dynamic_annotations.h"
+#include "tensorflow/core/platform/types.h"
+
+using tensorflow::int32;
+using tensorflow::int64;
+
+TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenFft(
+    const void* run_options_ptr, void* out, void* operand, int32 fft_type,
+    int32 fft_rank, int64 input_batch, int64 fft_length0, int64 fft_length1,
+    int64 fft_length2) {
+  const xla::ExecutableRunOptions* run_options =
+      static_cast<const xla::ExecutableRunOptions*>(run_options_ptr);
+  tensorflow::xla::EigenFftImpl(*run_options->intra_op_thread_pool(), out,
+                                operand, fft_type, fft_rank, input_batch,
+                                fft_length0, fft_length1, fft_length2);
+}
diff --git a/tensorflow/compiler/xla/service/cpu/runtime_fft.h b/tensorflow/compiler/xla/service/cpu/runtime_fft.h
new file mode 100644
index 0000000000000000000000000000000000000000..f20c5aa0aa2dcbc700f47c718e75baae18650d1a
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/runtime_fft.h
@@ -0,0 +1,31 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_FFT_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_FFT_H_
+
+#include "tensorflow/core/platform/types.h"
+
+extern "C" {
+
+extern void __xla_cpu_runtime_EigenFft(
+    const void* /* xla::ExecutableRunOptions* */ run_options_ptr, void* out,
+    void* operand, tensorflow::int32 fft_type, tensorflow::int32 fft_rank,
+    tensorflow::int64 input_batch, tensorflow::int64 fft_length0,
+    tensorflow::int64 fft_length1, tensorflow::int64 fft_length2);
+
+}  // extern "C"
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_FFT_H_
diff --git a/tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h b/tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..984cb0616e02475babad7160d0f43bb23de0b50e
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/runtime_fft_impl.h
@@ -0,0 +1,240 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_FFT_IMPL_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_FFT_IMPL_H_
+
+#include <array>
+
+#include "third_party/eigen3/Eigen/Core"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/framework/numeric_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/platform/types.h"
+
+// 'tensorflow' namespace is used so that int64 and other types don't require
+// qualification.
+namespace tensorflow {
+namespace xla {
+
+namespace internal {
+
+// Computes either a forward or reverse complex-to-complex FFT.
+template <bool Forward, int FFTRank, typename EigenDevice>
+void EigenFftC2C(const EigenDevice& device, complex64* out, complex64* operand,
+                 int64 input_batch, int64 fft_length0, int64 fft_length1,
+                 int64 fft_length2) {
+  // Create the axes (which are always trailing).
+  const auto axes = Eigen::ArrayXi::LinSpaced(FFTRank, 1, FFTRank);
+  constexpr auto direction = Forward ? Eigen::FFT_FORWARD : Eigen::FFT_REVERSE;
+
+  const std::array<int64, 3> fft_shape = {
+      {fft_length0, fft_length1, fft_length2}};
+
+  Eigen::DSizes<Eigen::DenseIndex, FFTRank + 1> dims;
+  dims[0] = input_batch;
+  for (int i = 0; i < FFTRank; i++) {
+    dims[i + 1] = fft_shape[i];
+  }
+  const Eigen::TensorMap<Eigen::Tensor<complex64, FFTRank + 1, Eigen::RowMajor>,
+                         Eigen::Aligned>
+      input(operand, dims);
+  Eigen::TensorMap<Eigen::Tensor<complex64, FFTRank + 1, Eigen::RowMajor>,
+                   Eigen::Aligned>
+      output(out, dims);
+  output.device(device) = input.template fft<Eigen::BothParts, direction>(axes);
+}
+
+// Computes a forward real->complex FFT, slicing out redundant negative
+// frequencies from the innermost dimension.
+template <int FFTRank, typename EigenDevice>
+void EigenFftR2C(const EigenDevice& device, complex64* out, float* operand,
+                 int64 input_batch, int64 fft_length0, int64 fft_length1,
+                 int64 fft_length2) {
+  const std::array<int64, 3> fft_shape = {
+      {fft_length0, fft_length1, fft_length2}};
+
+  Eigen::DSizes<Eigen::DenseIndex, FFTRank + 1> in_dims;
+  in_dims[0] = input_batch;
+  Eigen::DSizes<Eigen::DenseIndex, FFTRank + 1> out_dims;
+  out_dims[0] = input_batch;
+  TensorShape temp_shape{input_batch};
+  for (int i = 0; i < FFTRank; i++) {
+    in_dims[i + 1] = fft_shape[i];
+    out_dims[i + 1] = i == FFTRank - 1 ? fft_shape[i] / 2 + 1 : fft_shape[i];
+    temp_shape.AddDim(fft_shape[i]);
+  }
+  const Eigen::TensorMap<Eigen::Tensor<float, FFTRank + 1, Eigen::RowMajor>,
+                         Eigen::Aligned>
+      input(operand, in_dims);
+  Eigen::TensorMap<Eigen::Tensor<complex64, FFTRank + 1, Eigen::RowMajor>,
+                   Eigen::Aligned>
+      output(out, out_dims);
+
+  // Create the axes (which are always trailing).
+  const auto axes = Eigen::ArrayXi::LinSpaced(FFTRank, 1, FFTRank);
+
+  // Compute the full FFT using a temporary tensor.
+  Tensor temp(DataTypeToEnum<complex64>::v(), temp_shape);
+  auto full_fft = temp.flat_inner_dims<complex64, FFTRank + 1>();
+  const Eigen::DSizes<Eigen::DenseIndex, FFTRank + 1> zero_start_indices;
+  full_fft.device(device) =
+      input.template fft<Eigen::BothParts, Eigen::FFT_FORWARD>(axes);
+
+  // Slice away the negative frequency components.
+  output.device(device) = full_fft.slice(zero_start_indices, out_dims);
+}
+
+// Computes a reverse complex->real FFT, reconstructing redundant negative
+// frequencies using reverse conjugate on innermost dimension after doing IFFT
+// on outer dimensions.
+template <int FFTRank, typename EigenDevice>
+void EigenFftC2R(const EigenDevice& device, float* out, complex64* operand,
+                 int64 input_batch, int64 fft_length0, int64 fft_length1,
+                 int64 fft_length2) {
+  const std::array<int64, 3> fft_shape = {
+      {fft_length0, fft_length1, fft_length2}};
+
+  Eigen::DSizes<Eigen::DenseIndex, FFTRank + 1> in_dims;
+  in_dims[0] = input_batch;
+  Eigen::DSizes<Eigen::DenseIndex, FFTRank + 1> out_dims;
+  out_dims[0] = input_batch;
+  TensorShape temp_shape{input_batch};
+  for (int i = 0; i < FFTRank; i++) {
+    in_dims[i + 1] = i == FFTRank - 1 ? fft_shape[i] / 2 + 1 : fft_shape[i];
+    out_dims[i + 1] = fft_shape[i];
+    temp_shape.AddDim(fft_shape[i]);
+  }
+  const Eigen::TensorMap<Eigen::Tensor<complex64, FFTRank + 1, Eigen::RowMajor>,
+                         Eigen::Aligned>
+      input(operand, in_dims);
+  Eigen::TensorMap<Eigen::Tensor<float, FFTRank + 1, Eigen::RowMajor>,
+                   Eigen::Aligned>
+      output(out, out_dims);
+
+  // Calculate the shape of the temporary tensor for the full FFT and the
+  // region we will slice from input given fft_shape. We slice input to
+  // fft_shape on its inner-most dimensions, except the last (which we
+  // slice to fft_shape[-1] / 2 + 1).
+  Tensor temp(DataTypeToEnum<complex64>::v(), temp_shape);
+  auto full_fft = temp.flat_inner_dims<complex64, FFTRank + 1>();
+
+  // Calculate the starting point and range of the source of
+  // negative frequency part.
+  auto neg_sizes = in_dims;
+  neg_sizes[FFTRank] = fft_shape[FFTRank - 1] - in_dims[FFTRank];
+  Eigen::DSizes<Eigen::DenseIndex, FFTRank + 1> neg_target_indices;
+  neg_target_indices[FFTRank] = in_dims[FFTRank];
+
+  const Eigen::DSizes<Eigen::DenseIndex, FFTRank + 1> zero_start_indices;
+  Eigen::DSizes<Eigen::DenseIndex, FFTRank + 1> neg_start_indices;
+  neg_start_indices[FFTRank] = 1;
+
+  full_fft.slice(zero_start_indices, in_dims).device(device) = input;
+
+  // First, conduct IFFTs on outer dimensions. We save computation (and
+  // avoid touching uninitialized memory) by slicing full_fft to the
+  // subregion we wrote input to.
+  if (FFTRank > 1) {
+    const auto outer_axes =
+        Eigen::ArrayXi::LinSpaced(FFTRank - 1, 1, FFTRank - 1);
+    full_fft.slice(zero_start_indices, in_dims).device(device) =
+        full_fft.slice(zero_start_indices, in_dims)
+            .template fft<Eigen::BothParts, Eigen::FFT_REVERSE>(outer_axes);
+  }
+
+  // Reconstruct the full FFT by appending reversed and conjugated
+  // spectrum as the negative frequency part.
+  Eigen::array<bool, FFTRank + 1> reverse_last_axis;
+  for (auto i = 0; i <= FFTRank; i++) {
+    reverse_last_axis[i] = i == FFTRank;
+  }
+
+  if (neg_sizes[FFTRank] != 0) {
+    full_fft.slice(neg_target_indices, neg_sizes).device(device) =
+        full_fft.slice(neg_start_indices, neg_sizes)
+            .reverse(reverse_last_axis)
+            .conjugate();
+  }
+
+  auto inner_axis = Eigen::array<int, 1>{FFTRank};
+  output.device(device) =
+      full_fft.template fft<Eigen::RealPart, Eigen::FFT_REVERSE>(inner_axis);
+}
+
+template <int FFTRank, typename EigenDevice>
+void EigenFftWithRank(const EigenDevice& device, void* out, void* operand,
+                      int32 fft_type, int64 input_batch, int64 fft_length0,
+                      int64 fft_length1, int64 fft_length2) {
+  CHECK(::xla::FftType_IsValid(fft_type)) << fft_type;
+  switch (fft_type) {
+    case ::xla::FftType::FFT:
+      EigenFftC2C<true, FFTRank, EigenDevice>(
+          device, static_cast<complex64*>(out),
+          static_cast<complex64*>(operand), input_batch, fft_length0,
+          fft_length1, fft_length2);
+      break;
+    case ::xla::FftType::IFFT:
+      EigenFftC2C<false, FFTRank, EigenDevice>(
+          device, static_cast<complex64*>(out),
+          static_cast<complex64*>(operand), input_batch, fft_length0,
+          fft_length1, fft_length2);
+      break;
+    case ::xla::FftType::RFFT:
+      EigenFftR2C<FFTRank, EigenDevice>(
+          device, static_cast<complex64*>(out), static_cast<float*>(operand),
+          input_batch, fft_length0, fft_length1, fft_length2);
+      break;
+    case ::xla::FftType::IRFFT:
+      EigenFftC2R<FFTRank, EigenDevice>(
+          device, static_cast<float*>(out), static_cast<complex64*>(operand),
+          input_batch, fft_length0, fft_length1, fft_length2);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported FFT type: " << fft_type;
+  }
+}
+
+}  // namespace internal
+
+template <typename EigenDevice>
+void EigenFftImpl(const EigenDevice& device, void* out, void* operand,
+                  int32 fft_type, int32 fft_rank, int64 input_batch,
+                  int64 fft_length0, int64 fft_length1, int64 fft_length2) {
+  switch (fft_rank) {
+    case 1:
+      internal::EigenFftWithRank<1, EigenDevice>(
+          device, out, operand, fft_type, input_batch, fft_length0, 0, 0);
+      break;
+    case 2:
+      internal::EigenFftWithRank<2, EigenDevice>(device, out, operand, fft_type,
+                                                 input_batch, fft_length0,
+                                                 fft_length1, 0);
+      break;
+    case 3:
+      internal::EigenFftWithRank<3, EigenDevice>(device, out, operand, fft_type,
+                                                 input_batch, fft_length0,
+                                                 fft_length1, fft_length2);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported FFT rank " << fft_rank;
+  }
+}
+
+}  // namespace xla
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_FFT_IMPL_H_
diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
index cda2783307925b77ac6d8cfe679c5b325db2befc..5403bf48b748c587802c6ed7abb4699e8395ca67 100644
--- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
+++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/cpu/simple_orc_jit.h"
 
-#include <dlfcn.h>
 #include <stdint.h>
 #include <algorithm>
 #include <list>
@@ -34,10 +33,12 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h"
 #include "tensorflow/compiler/xla/service/cpu/orc_jit_memory_mapper.h"
 #include "tensorflow/compiler/xla/service/cpu/runtime_conv2d.h"
+#include "tensorflow/compiler/xla/service/cpu/runtime_fft.h"
 #include "tensorflow/compiler/xla/service/cpu/runtime_fork_join.h"
 #include "tensorflow/compiler/xla/service/cpu/runtime_matmul.h"
 #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.h"
 #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h"
+#include "tensorflow/compiler/xla/service/cpu/windows_compatibility.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/core/platform/logging.h"
 
@@ -102,9 +103,21 @@ llvm::StringRef GetHostCpuName() {
 
 CompilerFunctor::VectorIntrinsics GetAvailableIntrinsics() {
   CompilerFunctor::VectorIntrinsics intrinsics;
-  intrinsics.sse_intrinsics = (&__xla_cpu_runtime_ExpV4F32SSE != nullptr);
-  intrinsics.avx_intrinsics = (&__xla_cpu_runtime_ExpV8F32AVX != nullptr);
-  intrinsics.neon_intrinsics = (&__xla_cpu_runtime_ExpV4F32NEON != nullptr);
+#ifdef TF_XLA_HAS_SSE4_1
+  intrinsics.sse_intrinsics = true;
+#else
+  intrinsics.sse_intrinsics = false;
+#endif
+#ifdef TF_XLA_HAS_AVX
+  intrinsics.avx_intrinsics = true;
+#else
+  intrinsics.avx_intrinsics = false;
+#endif
+#ifdef TF_XLA_HAS_NEON
+  intrinsics.neon_intrinsics = true;
+#else
+  intrinsics.neon_intrinsics = false;
+#endif
   return intrinsics;
 }
 
@@ -196,17 +209,24 @@ bool RegisterKnownJITSymbols() {
   REGISTER_CPU_RUNTIME_SYMBOL(AcquireInfeedBufferForDequeue);
   REGISTER_CPU_RUNTIME_SYMBOL(AcquireOutfeedBufferForPopulation);
   REGISTER_CPU_RUNTIME_SYMBOL(EigenConvF32);
+  REGISTER_CPU_RUNTIME_SYMBOL(EigenFft);
   REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF32);
   REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF64);
   REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF32);
   REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF32);
   REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF64);
+#ifdef TF_XLA_HAS_NEON
   REGISTER_CPU_RUNTIME_SYMBOL(ExpV4F32NEON);
-  REGISTER_CPU_RUNTIME_SYMBOL(ExpV4F32SSE);
-  REGISTER_CPU_RUNTIME_SYMBOL(ExpV8F32AVX);
   REGISTER_CPU_RUNTIME_SYMBOL(LogV4F32NEON);
+#endif
+#ifdef TF_XLA_HAS_SSE4_1
+  REGISTER_CPU_RUNTIME_SYMBOL(ExpV4F32SSE);
   REGISTER_CPU_RUNTIME_SYMBOL(LogV4F32SSE);
+#endif
+#ifdef TF_XLA_HAS_AVX
+  REGISTER_CPU_RUNTIME_SYMBOL(ExpV8F32AVX);
   REGISTER_CPU_RUNTIME_SYMBOL(LogV8F32AVX);
+#endif
   REGISTER_CPU_RUNTIME_SYMBOL(ParallelForkJoin);
   REGISTER_CPU_RUNTIME_SYMBOL(ReleaseInfeedBufferAfterDequeue);
   REGISTER_CPU_RUNTIME_SYMBOL(ReleaseOutfeedBufferAfterPopulation);
@@ -275,7 +295,11 @@ bool RegisterKnownJITSymbols() {
   REGISTER_LIBM_SYMBOL(scalbln, double (*)(double, long));
   REGISTER_LIBM_SYMBOL(scalbn, double (*)(double, int));
   REGISTER_LIBM_SYMBOL(sin, double (*)(double));
+#ifdef __APPLE__
+  REGISTER_LIBM_SYMBOL(__sincos, void (*)(double, double*, double*));
+#else
   REGISTER_LIBM_SYMBOL(sincos, void (*)(double, double*, double*));
+#endif
   REGISTER_LIBM_SYMBOL(sinh, double (*)(double));
   REGISTER_LIBM_SYMBOL(sqrt, double (*)(double));
   REGISTER_LIBM_SYMBOL(tan, double (*)(double));
diff --git a/tensorflow/compiler/xla/service/cpu/target_machine_features.cc b/tensorflow/compiler/xla/service/cpu/target_machine_features.cc
new file mode 100644
index 0000000000000000000000000000000000000000..eeb049737dddd11ef2ce229df772baec3ac03dd8
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/target_machine_features.cc
@@ -0,0 +1,35 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/cpu/target_machine_features.h"
+
+namespace xla {
+namespace cpu {
+
+llvm::TargetTransformInfo* TargetMachineFeatures::GetTargetTransformInfoFor(
+    const llvm::Function& function) const {
+  auto it = target_transform_info_cache_.find(&function);
+  if (it == target_transform_info_cache_.end()) {
+    auto emplace_result = target_transform_info_cache_.emplace(
+        &function, target_machine_->getTargetTransformInfo(function));
+    CHECK(emplace_result.second);
+    it = emplace_result.first;
+  }
+
+  return &it->second;
+}
+
+}  // namespace cpu
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/target_machine_features.h b/tensorflow/compiler/xla/service/cpu/target_machine_features.h
new file mode 100644
index 0000000000000000000000000000000000000000..703942615e552dccde7ddec8c8b90e8a486652af
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/target_machine_features.h
@@ -0,0 +1,84 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_TARGET_MACHINE_FEATURES_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_TARGET_MACHINE_FEATURES_H_
+
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "tensorflow/compiler/xla/primitive_util.h"
+#include "tensorflow/core/lib/gtl/flatmap.h"
+
+namespace xla {
+namespace cpu {
+
+// Wraps an llvm::TargetMachine and parses out some information that feeds into
+// LLVM IR code generation decisions.
+class TargetMachineFeatures {
+ public:
+  static constexpr int kX86AvxVectorByteSize = 32;
+
+  TargetMachineFeatures(llvm::TargetMachine* target_machine)
+      : target_machine_(target_machine) {}
+
+  // Return the vectorization factor, which is the number of bytes of data
+  // explicitly vectorized routines will try to process at once.
+  int vectorization_factor_in_bytes() const {
+    // Ideally this should be a function of the cache line size (which we can
+    // get from llvm::TargetTransformInfo::getCacheLineSize) of the target
+    // machine.  Guess a value of 128 bytes for now.
+    return 128;
+  }
+
+  // Return the size of the largest vector size in bytes.  We need to pass in
+  // "function" since llvm functions can contain annotations for specializing
+  // them to specific micro-architectures (though currently XLA does not use
+  // this functionality).
+  int vector_register_byte_size(const llvm::Function& function) const {
+    llvm::TargetTransformInfo* tti = GetTargetTransformInfoFor(function);
+    return tti->getRegisterBitWidth(/*Vector=*/true) / 8;
+  }
+
+  // Return the number of elements of type `type` that can fit into the largest
+  // vector register available.  We need to pass in "function" since llvm
+  // functions can contain annotations for specializing them to specific
+  // micro-architectures (though currently XLA does not use this functionality).
+  int vector_register_num_elements(const llvm::Function& function,
+                                   PrimitiveType type) const {
+    return vector_register_byte_size(function) /
+           (primitive_util::BitWidth(type) / 8);
+  }
+
+ private:
+  llvm::TargetTransformInfo* GetTargetTransformInfoFor(
+      const llvm::Function& function) const;
+
+  // This cache saves us from having to create a llvm::TargetTransformInfo for
+  // every call to GetTargetTransformInfoFor (creating a TargetTransformInfo
+  // costs one heap allocation on X86).
+  //
+  // This is mutated from within `GetTargetTransformInfoFor` which is
+  // semantically a getter (and thus `const`); and is therefore declared
+  // mutable.  Making this mutable is okay because it has cache semantics.
+  mutable tensorflow::gtl::FlatMap<const llvm::Function*,
+                                   llvm::TargetTransformInfo>
+      target_transform_info_cache_;
+  llvm::TargetMachine* target_machine_;
+};
+
+}  // namespace cpu
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_TARGET_MACHINE_FEATURES_H_
diff --git a/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.cc b/tensorflow/compiler/xla/service/cpu/vector_support_library.cc
similarity index 51%
rename from tensorflow/compiler/xla/service/llvm_ir/vector_support_library.cc
rename to tensorflow/compiler/xla/service/cpu/vector_support_library.cc
index e8c6a83618eaa8430521197f1c166cb7eb11a28e..128b465be239130918687d8e2ba0458684086ee1 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.cc
+++ b/tensorflow/compiler/xla/service/cpu/vector_support_library.cc
@@ -13,11 +13,13 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h"
+#include "tensorflow/compiler/xla/service/cpu/vector_support_library.h"
 
+#include "tensorflow/compiler/xla/service/cpu/target_machine_features.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
 
 namespace xla {
+namespace cpu {
 VectorSupportLibrary::VectorSupportLibrary(PrimitiveType primitive_type,
                                            int64 vector_size,
                                            llvm::IRBuilder<>* ir_builder,
@@ -34,6 +36,12 @@ VectorSupportLibrary::VectorSupportLibrary(PrimitiveType primitive_type,
 }
 
 llvm::Value* VectorSupportLibrary::Mul(llvm::Value* lhs, llvm::Value* rhs) {
+  CHECK(lhs->getType() == scalar_type() || lhs->getType() == vector_type());
+  return MulInternal(lhs, rhs);
+}
+
+llvm::Value* VectorSupportLibrary::MulInternal(llvm::Value* lhs,
+                                               llvm::Value* rhs) {
   if (scalar_type_->isFloatingPointTy()) {
     return ir_builder()->CreateFMul(lhs, rhs, name());
   } else {
@@ -42,6 +50,12 @@ llvm::Value* VectorSupportLibrary::Mul(llvm::Value* lhs, llvm::Value* rhs) {
 }
 
 llvm::Value* VectorSupportLibrary::Add(llvm::Value* lhs, llvm::Value* rhs) {
+  CHECK(lhs->getType() == scalar_type() || lhs->getType() == vector_type());
+  return AddInternal(lhs, rhs);
+}
+
+llvm::Value* VectorSupportLibrary::AddInternal(llvm::Value* lhs,
+                                               llvm::Value* rhs) {
   if (scalar_type_->isFloatingPointTy()) {
     return ir_builder()->CreateFAdd(lhs, rhs, name());
   } else {
@@ -129,6 +143,123 @@ llvm::Value* VectorSupportLibrary::AddReduce(llvm::Value* vector) {
                                             name());
 }
 
+llvm::Value* VectorSupportLibrary::AvxStyleHorizontalAdd(llvm::Value* lhs,
+                                                         llvm::Value* rhs) {
+  CHECK_EQ(lhs->getType(), vector_type());
+  CHECK_EQ(rhs->getType(), vector_type());
+  CHECK_EQ(vector_size() % 2, 0);
+
+  llvm::SmallVector<llvm::Constant*, 32> mask_a, mask_b;
+
+  // Adding the values shuffled using mask_a and mask_b gives us the
+  // AVX-style horizontal add we want.  The masks work as documented
+  // in https://llvm.org/docs/LangRef.html#shufflevector-instruction
+  //
+  // Here are the masks for vector_width() == 8:
+  //
+  //    index: |0 |1 |2 | 3 |4 |5 | 6 | 7
+  //   --------+--+--+--+---+--+--+---+---
+  //   mask_a: |0 |2 |8 |10 |4 |6 |12 |14
+  //   mask_b: |1 |3 |9 |11 |5 |7 |13 |16
+  //
+  // So, as an example, the value at lane 3 of the result vector is
+  // the result of adding lane 10 and lane 11 in the combined lhs++rhs
+  // vector, which are the lanes 2 and 3 in the rhs vector.
+  for (int i = 0; i < vector_size(); i += 2) {
+    int increment = i < vector_size() / 2 ? 0 : (vector_size() / 2);
+    mask_a.push_back(ir_builder()->getInt32(increment + i));
+    mask_b.push_back(ir_builder()->getInt32(increment + i + 1));
+  }
+  for (int i = 0; i < vector_size(); i += 2) {
+    int increment = i < vector_size() / 2 ? (vector_size() / 2) : vector_size();
+    mask_a.push_back(ir_builder()->getInt32(increment + i));
+    mask_b.push_back(ir_builder()->getInt32(increment + i + 1));
+  }
+
+  llvm::Value* shuffle_0 = ir_builder()->CreateShuffleVector(
+      lhs, rhs, llvm::ConstantVector::get(mask_a));
+  llvm::Value* shuffle_1 = ir_builder()->CreateShuffleVector(
+      lhs, rhs, llvm::ConstantVector::get(mask_b));
+
+  return Add(shuffle_0, shuffle_1);
+}
+
+llvm::Value* VectorSupportLibrary::ExtractLowHalf(llvm::Value* vector) {
+  llvm::SmallVector<llvm::Constant*, 32> mask;
+  for (int i = 0; i < vector_size() / 2; i++) {
+    mask.push_back(ir_builder()->getInt32(i));
+  }
+
+  return ir_builder()->CreateShuffleVector(vector,
+                                           llvm::UndefValue::get(vector_type()),
+                                           llvm::ConstantVector::get(mask));
+}
+
+llvm::Value* VectorSupportLibrary::ExtractHighHalf(llvm::Value* vector) {
+  llvm::SmallVector<llvm::Constant*, 32> mask;
+  for (int i = 0; i < vector_size() / 2; i++) {
+    mask.push_back(ir_builder()->getInt32(i + vector_size() / 2));
+  }
+
+  return ir_builder()->CreateShuffleVector(vector,
+                                           llvm::UndefValue::get(vector_type()),
+                                           llvm::ConstantVector::get(mask));
+}
+
+std::vector<llvm::Value*> VectorSupportLibrary::ComputeHorizontalSums(
+    std::vector<llvm::Value*> vectors, llvm::Value* init_values) {
+  const int x86_avx_vector_elements =
+      TargetMachineFeatures::kX86AvxVectorByteSize / scalar_byte_size();
+  if (vector_size() == x86_avx_vector_elements &&
+      vectors.size() == x86_avx_vector_elements) {
+    return ComputeAvxOptimizedHorizontalSums(std::move(vectors), init_values);
+  }
+
+  std::vector<llvm::Value*> result;
+  std::transform(vectors.begin(), vectors.end(), std::back_inserter(result),
+                 [this](llvm::Value* vector) { return AddReduce(vector); });
+  if (init_values) {
+    for (int64 i = 0, e = result.size(); i < e; i++) {
+      result[i] = Add(result[i], ir_builder()->CreateExtractElement(
+                                     init_values, ir_builder()->getInt32(i)));
+    }
+  }
+  return result;
+}
+
+std::vector<llvm::Value*>
+VectorSupportLibrary::ComputeAvxOptimizedHorizontalSums(
+    std::vector<llvm::Value*> vectors, llvm::Value* init_values) {
+  while (vectors.size() != 2) {
+    std::vector<llvm::Value*> new_vectors;
+    for (int i = 0; i < vectors.size(); i += 2) {
+      new_vectors.push_back(AvxStyleHorizontalAdd(vectors[i], vectors[i + 1]));
+    }
+
+    vectors = std::move(new_vectors);
+  }
+
+  llvm::Value* low =
+      AddInternal(ExtractLowHalf(vectors[0]), ExtractHighHalf(vectors[0]));
+  if (init_values) {
+    low = AddInternal(ExtractLowHalf(init_values), low);
+  }
+  llvm::Value* high =
+      AddInternal(ExtractLowHalf(vectors[1]), ExtractHighHalf(vectors[1]));
+  if (init_values) {
+    high = AddInternal(ExtractHighHalf(init_values), high);
+  }
+
+  std::vector<llvm::Value*> results;
+  for (int i = 0; i < 8; i++) {
+    llvm::Value* scalar_result = ir_builder()->CreateExtractElement(
+        i < 4 ? low : high, ir_builder()->getInt32(i % 4), name());
+    results.push_back(scalar_result);
+  }
+
+  return results;
+}
+
 llvm::Value* VectorSupportLibrary::GetZeroVector() {
   return llvm::Constant::getNullValue(vector_type());
 }
@@ -142,9 +273,12 @@ LlvmVariable::LlvmVariable(llvm::Type* type, llvm::IRBuilder<>* ir_builder)
   alloca_ = llvm_ir::EmitAllocaAtFunctionEntry(type, "", ir_builder_);
 }
 
-llvm::Value* LlvmVariable::Get() { return ir_builder_->CreateLoad(alloca_); }
+llvm::Value* LlvmVariable::Get() const {
+  return ir_builder_->CreateLoad(alloca_);
+}
 
 void LlvmVariable::Set(llvm::Value* new_value) {
   ir_builder_->CreateStore(new_value, alloca_);
 }
+}  // namespace cpu
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h b/tensorflow/compiler/xla/service/cpu/vector_support_library.h
similarity index 76%
rename from tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h
rename to tensorflow/compiler/xla/service/cpu/vector_support_library.h
index 3072677ab05aa91c736baaa0dc3023329d810a52..8fbac2a6670f8ef18c00877a1566bd4ab896a7c8 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/vector_support_library.h
+++ b/tensorflow/compiler/xla/service/cpu/vector_support_library.h
@@ -13,17 +13,19 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_VECTOR_SUPPORT_LIBRARY_H_
-#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_VECTOR_SUPPORT_LIBRARY_H_
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_VECTOR_SUPPORT_LIBRARY_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_VECTOR_SUPPORT_LIBRARY_H_
 
 #include <string>
 
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Value.h"
+#include "tensorflow/compiler/xla/primitive_util.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 
 namespace xla {
+namespace cpu {
 // A thin wrapper around llvm_util.h to make code generating vector math flow
 // more readable.
 class VectorSupportLibrary {
@@ -111,7 +113,12 @@ class VectorSupportLibrary {
     return LoadBroadcast(base_pointer, ir_builder()->getInt64(offset_elements));
   }
 
-  llvm::Value* AddReduce(llvm::Value* vector);
+  // Compute the horizontal sum of each vector in `vectors`.  The i'th element
+  // in the result vector is the (scalar) horizontal sum of the i'th vector in
+  // `vectors`.  If `init_values` is not nullptr then the value in the i'th lane
+  // in `init_values` is added to the i'th horizontal sum.
+  std::vector<llvm::Value*> ComputeHorizontalSums(
+      std::vector<llvm::Value*> vectors, llvm::Value* init_values = nullptr);
 
   llvm::Value* GetZeroVector();
   llvm::Value* GetZeroScalar();
@@ -122,10 +129,40 @@ class VectorSupportLibrary {
   llvm::Type* vector_pointer_type() const { return vector_pointer_type_; }
   llvm::Type* scalar_type() const { return scalar_type_; }
   llvm::Type* scalar_pointer_type() const { return scalar_pointer_type_; }
+  int64 scalar_byte_size() const {
+    return primitive_util::BitWidth(primitive_type_) / 8;
+  }
 
   const std::string& name() const { return name_; }
 
  private:
+  llvm::Value* ExtractLowHalf(llvm::Value*);
+  llvm::Value* ExtractHighHalf(llvm::Value*);
+
+  llvm::Value* MulInternal(llvm::Value* lhs, llvm::Value* rhs);
+  llvm::Value* AddInternal(llvm::Value* lhs, llvm::Value* rhs);
+
+  llvm::Value* AddReduce(llvm::Value* vector);
+
+  // Perform an X86 AVX style horizontal add between `lhs` and `rhs`.  The
+  // resulting IR for an 8-float wide vector is expected to lower to a single
+  // vhaddps instruction on a CPU that supports vhaddps, and not be too bad in
+  // other cases.
+  //
+  // For a vector width of 8, the result vector is computed as:
+  //   Result[0] = Lhs[0] + Lhs[1]
+  //   Result[1] = Lhs[2] + Lhs[3]
+  //   Result[2] = Rhs[0] + Rhs[1]
+  //   Result[3] = Rhs[2] + Rhs[3]
+  //   Result[4] = Lhs[4] + Lhs[5]
+  //   Result[5] = Lhs[6] + Lhs[7]
+  //   Result[6] = Rhs[4] + Rhs[5]
+  //   Result[7] = Rhs[6] + Rhs[7]
+  llvm::Value* AvxStyleHorizontalAdd(llvm::Value* lhs, llvm::Value* rhs);
+
+  std::vector<llvm::Value*> ComputeAvxOptimizedHorizontalSums(
+      std::vector<llvm::Value*> vectors, llvm::Value* init_values);
+
   int64 vector_size_;
   PrimitiveType primitive_type_;
   llvm::IRBuilder<>* ir_builder_;
@@ -142,7 +179,7 @@ class LlvmVariable {
  public:
   LlvmVariable(llvm::Type*, llvm::IRBuilder<>* ir_builder);
 
-  llvm::Value* Get();
+  llvm::Value* Get() const;
   void Set(llvm::Value* new_value);
 
  private:
@@ -169,6 +206,7 @@ class ScalarVariable : public LlvmVariable {
     Set(initial_value);
   }
 };
+}  // namespace cpu
 }  // namespace xla
 
-#endif  // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_VECTOR_SUPPORT_LIBRARY_H_
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_VECTOR_SUPPORT_LIBRARY_H_
diff --git a/tensorflow/compiler/xla/service/cpu/windows_compatibility.cc b/tensorflow/compiler/xla/service/cpu/windows_compatibility.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ab308ee6cb16ba95e24694b59a4b5737765bbb8b
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/windows_compatibility.cc
@@ -0,0 +1,32 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/cpu/windows_compatibility.h"
+
+#ifdef _MSC_VER
+
+#include <math.h>
+
+void sincos(double x, double *sinv, double *cosv) {
+  *sinv = sin(x);
+  *cosv = cos(x);
+}
+
+void sincosf(float x, float *sinv, float *cosv) {
+  *sinv = sinf(x);
+  *cosv = cosf(x);
+}
+
+#endif  // _MSC_VER
diff --git a/tensorflow/compiler/xla/service/cpu/windows_compatibility.h b/tensorflow/compiler/xla/service/cpu/windows_compatibility.h
new file mode 100644
index 0000000000000000000000000000000000000000..262f379d8b6017f4a7e0156b724bfee7e8ec5b9a
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/windows_compatibility.h
@@ -0,0 +1,31 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_WINDOWS_COMPATIBILITY_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_WINDOWS_COMPATIBILITY_H_
+
+#ifdef _MSC_VER
+
+extern "C" {
+
+// MSVC does not have sincos[f].
+void sincos(double x, double *sinv, double *cosv);
+void sincosf(float x, float *sinv, float *cosv);
+
+}
+
+#endif  // _MSC_VER
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_WINDOWS_COMPATIBILITY_H_
diff --git a/tensorflow/compiler/xla/service/cpu/xfeed_manager.cc b/tensorflow/compiler/xla/service/cpu/xfeed_manager.cc
index d0f214202908266371639af8f431ad8269ad0e35..47543b2082f55cf7b8cf60f1c5bbb16a0a609912 100644
--- a/tensorflow/compiler/xla/service/cpu/xfeed_manager.cc
+++ b/tensorflow/compiler/xla/service/cpu/xfeed_manager.cc
@@ -41,6 +41,8 @@ void XfeedQueueManager::EnqueueBuffersAtomically(
   tensorflow::mutex_lock l(mu_);
   bool was_empty = enqueued_buffers_.empty();
   for (XfeedBuffer* b : buffers) {
+    VLOG(3) << "Enqueueing " << queue_name_ << " buffer (of " << buffers.size()
+            << " buffers) with length: " << b->length();
     enqueued_buffers_.push_back(b);
   }
   if (was_empty && !buffers.empty()) {
@@ -54,9 +56,11 @@ void XfeedQueueManager::EnqueueBuffersAtomically(
 
 XfeedBuffer* XfeedQueueManager::BlockingDequeueBuffer() {
   tensorflow::mutex_lock l(mu_);
+  VLOG(3) << "Waiting for an available buffer.";
   while (enqueued_buffers_.empty()) {
     cv_.wait(l);
   }
+  VLOG(3) << "A buffer is available!";
   CHECK(current_buffer_ == nullptr);
   current_buffer_ = enqueued_buffers_.front();
   enqueued_buffers_.pop_front();
@@ -65,6 +69,9 @@ XfeedBuffer* XfeedQueueManager::BlockingDequeueBuffer() {
 
 void XfeedQueueManager::ReleaseCurrentBuffer(int32 length, void* data,
                                              StatusOr<Shape> shape) {
+  VLOG(3) << "Releasing buffer with shape: "
+          << (shape.ok() ? ShapeUtil::HumanString(shape.ValueOrDie())
+                         : "<error status>");
   tensorflow::mutex_lock l(mu_);
   CHECK(current_buffer_ != nullptr);
   CHECK_EQ(length, current_buffer_->length());
diff --git a/tensorflow/compiler/xla/service/cpu/xfeed_manager.h b/tensorflow/compiler/xla/service/cpu/xfeed_manager.h
index 6af55700052007a2ca419d52b63dddea2052bd0b..b4ace232607e14fbfec01d48946f0031d96cd027 100644
--- a/tensorflow/compiler/xla/service/cpu/xfeed_manager.h
+++ b/tensorflow/compiler/xla/service/cpu/xfeed_manager.h
@@ -50,7 +50,7 @@ class XfeedBuffer {
 // Reusable component for managing the infeed and outfeed queue state.
 class XfeedQueueManager {
  public:
-  XfeedQueueManager() = default;
+  XfeedQueueManager(string queue_name) : queue_name_(queue_name) {}
 
   // Calls the completion callback for any enqueued buffers that have
   // not been dequeued by the runtime, and empties the
@@ -86,6 +86,8 @@ class XfeedQueueManager {
   void ReleaseCurrentBuffer(int32 length, void* data, StatusOr<Shape> shape);
 
  private:
+  const string queue_name_;
+
   tensorflow::mutex mu_;
 
   // Condition variable that is signaled every time a buffer is
@@ -112,8 +114,8 @@ class XfeedManager {
   XfeedQueueManager* outfeed() { return &outfeed_; }
 
  private:
-  XfeedQueueManager infeed_;
-  XfeedQueueManager outfeed_;
+  XfeedQueueManager infeed_ = {"infeed"};
+  XfeedQueueManager outfeed_ = {"outfeed"};
 };
 
 }  // namespace runtime
diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
index 91086fd4a5f68211ef56c2417bb0ef4a38de2cff..a803b3171f9afa6297553c5507c4f9aa45e420ab 100644
--- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
+++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
@@ -103,6 +103,7 @@ class DfsHloVisitorBase {
     return HandleElementwiseBinary(hlo);
   }
   virtual Status HandleConvolution(HloInstructionPtr hlo) = 0;
+  virtual Status HandleFft(HloInstructionPtr fft) = 0;
   virtual Status HandleCrossReplicaSum(HloInstructionPtr hlo) = 0;
   virtual Status HandleCompare(HloInstructionPtr hlo) {
     return HandleElementwiseBinary(hlo);
@@ -247,6 +248,10 @@ class DfsHloVisitorBase {
   // affecting correctness.
   void ReserveVisitStates(int num) { visit_state_.Reserve(num); }
 
+  // Useful when we want to visit the same computation more than once with the
+  // same visitor.
+  void ResetVisitStates() { visit_state_.Reset(); }
+
   void SetVisitState(int id, VisitState state) {
     visit_state_.SetState(id, state);
   }
@@ -326,6 +331,7 @@ class DfsHloVisitorBase {
       *w = (*w & ~mask) | (static_cast<uint64>(state) << shift);
       DCHECK_EQ(GetState(id), state);
     }
+    void Reset() { states_.clear(); }
 
    private:
     static const uint32 kStatesPerWord = sizeof(uint64) / 2 /*bits per entry*/;
diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h
index 133aa2509405738de8388708b0c61a82023e2738..170adb3d241b3648bc53f96dde9866f0b794f80a 100644
--- a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h
+++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h
@@ -85,6 +85,9 @@ class DfsHloVisitorWithDefaultBase
   Status HandleConvolution(HloInstructionPtr convolution) override {
     return DefaultAction(convolution);
   }
+  Status HandleFft(HloInstructionPtr fft) override {
+    return DefaultAction(fft);
+  }
   Status HandleCrossReplicaSum(HloInstructionPtr crs) override {
     return DefaultAction(crs);
   }
diff --git a/tensorflow/compiler/xla/service/dot_decomposer.cc b/tensorflow/compiler/xla/service/dot_decomposer.cc
new file mode 100644
index 0000000000000000000000000000000000000000..12faed69677cd99c6ed82c8d13dad3138d9461b7
--- /dev/null
+++ b/tensorflow/compiler/xla/service/dot_decomposer.cc
@@ -0,0 +1,185 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/dot_decomposer.h"
+
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/status_macros.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace xla {
+
+namespace {
+
+// TODO(b/69062148) Remove this code when all backends support BatchDot
+// natively.
+Status DecomposeBatchDot(HloInstruction* dot) {
+  auto computation = dot->parent();
+  const DotDimensionNumbers& dnums = dot->dot_dimension_numbers();
+  HloInstruction* lhs = dot->mutable_operand(0);
+  HloInstruction* rhs = dot->mutable_operand(1);
+  const Shape& lhs_shape = lhs->shape();
+  const Shape& rhs_shape = rhs->shape();
+  const Shape& dot_shape = dot->shape();
+
+  // ShapeInference should guarantee that lhs/rhs batch dimensions match.
+  CHECK_EQ(dnums.lhs_batch_dimensions_size(),
+           dnums.rhs_batch_dimensions_size());
+  const int64 num_batch_dims = dnums.lhs_batch_dimensions_size();
+  // Calculate total batch size (note that ShapeInference requires that
+  // the batch dimensions are most-major).
+  int64 batch_size = 1;
+  for (int i = 0; i < num_batch_dims; ++i) {
+    CHECK_EQ(lhs_shape.dimensions(dnums.lhs_batch_dimensions(i)),
+             rhs_shape.dimensions(dnums.rhs_batch_dimensions(i)));
+    batch_size *= lhs_shape.dimensions(dnums.lhs_batch_dimensions(i));
+  }
+
+  // Set lhs/rhs_transpose.
+  CHECK_EQ(1, dnums.lhs_contracting_dimensions_size());
+  const int64 lhs_contracting_dim_number = dnums.lhs_contracting_dimensions(0);
+  const bool lhs_transpose = (lhs_contracting_dim_number - num_batch_dims) == 0;
+
+  CHECK_EQ(1, dnums.rhs_contracting_dimensions_size());
+  const int64 rhs_contracting_dim_number = dnums.rhs_contracting_dimensions(0);
+  const bool rhs_transpose = (rhs_contracting_dim_number - num_batch_dims) == 1;
+
+  // Compute R3 and R3 shapes for lhs.
+  PrimitiveType lhs_type = lhs_shape.element_type();
+  const int64 lhs_rows = lhs_shape.dimensions(num_batch_dims + 0);
+  const int64 lhs_cols = lhs_shape.dimensions(num_batch_dims + 1);
+  Shape lhs_shape_r3 =
+      ShapeUtil::MakeShape(lhs_type, {batch_size, lhs_rows, lhs_cols});
+  Shape lhs_slice_shape_r3 =
+      ShapeUtil::MakeShape(lhs_type, {1, lhs_rows, lhs_cols});
+  Shape lhs_slice_shape_r2 =
+      ShapeUtil::MakeShape(lhs_type, {lhs_rows, lhs_cols});
+
+  // Compute R3 and R3 shapes for rhs.
+  PrimitiveType rhs_type = rhs_shape.element_type();
+  const int64 rhs_rows = rhs_shape.dimensions(num_batch_dims + 0);
+  const int64 rhs_cols = rhs_shape.dimensions(num_batch_dims + 1);
+  Shape rhs_shape_r3 =
+      ShapeUtil::MakeShape(rhs_type, {batch_size, rhs_rows, rhs_cols});
+  Shape rhs_slice_shape_r3 =
+      ShapeUtil::MakeShape(rhs_type, {1, rhs_rows, rhs_cols});
+  Shape rhs_slice_shape_r2 =
+      ShapeUtil::MakeShape(rhs_type, {rhs_rows, rhs_cols});
+
+  // Compute R3 and R3 shapes for dot output.
+  PrimitiveType dot_type = dot_shape.element_type();
+  const int64 dot_rows = dot_shape.dimensions(num_batch_dims + 0);
+  const int64 dot_cols = dot_shape.dimensions(num_batch_dims + 1);
+  Shape dot_shape_r2 = ShapeUtil::MakeShape(dot_type, {dot_rows, dot_cols});
+  Shape dot_shape_r3 = ShapeUtil::MakeShape(dot_type, {1, dot_rows, dot_cols});
+  Shape concat_shape_r3 =
+      ShapeUtil::MakeShape(dot_type, {batch_size, dot_rows, dot_cols});
+
+  // Reshape lhs/rhs into R3.
+  auto lhs_r3 = computation->AddInstruction(
+      HloInstruction::CreateReshape(lhs_shape_r3, lhs));
+  auto rhs_r3 = computation->AddInstruction(
+      HloInstruction::CreateReshape(rhs_shape_r3, rhs));
+
+  // Loop through batch size, slicing out required lhs/rhs to compute each Dot.
+  std::vector<HloInstruction*> output_slices(batch_size);
+  for (int64 i = 0; i < batch_size; ++i) {
+    // Slice R3 shape from 'lhs' and reshape to R2.
+    auto lhs_slice_r3 = computation->AddInstruction(
+        HloInstruction::CreateSlice(lhs_slice_shape_r3, lhs_r3, {i, 0, 0},
+                                    {i + 1, lhs_rows, lhs_cols}, {1, 1, 1}));
+    auto lhs_slice_r2 = computation->AddInstruction(
+        HloInstruction::CreateReshape(lhs_slice_shape_r2, lhs_slice_r3));
+
+    // Slice R3 shape from 'rhs' and reshape to R2.
+    auto rhs_slice_r3 = computation->AddInstruction(
+        HloInstruction::CreateSlice(rhs_slice_shape_r3, rhs_r3, {i, 0, 0},
+                                    {i + 1, rhs_rows, rhs_cols}, {1, 1, 1}));
+    auto rhs_slice_r2 = computation->AddInstruction(
+        HloInstruction::CreateReshape(rhs_slice_shape_r2, rhs_slice_r3));
+
+    // Transpose lhs/rhs (if needed).
+    if (lhs_transpose) {
+      Shape lhs_slice_shape_r2_transpose =
+          ShapeUtil::MakeShape(lhs_type, {lhs_cols, lhs_rows});
+      lhs_slice_r2 =
+          computation->AddInstruction(HloInstruction::CreateTranspose(
+              lhs_slice_shape_r2_transpose, lhs_slice_r2, {1, 0}));
+    }
+    if (rhs_transpose) {
+      Shape rhs_slice_shape_r2_transpose =
+          ShapeUtil::MakeShape(rhs_type, {rhs_cols, rhs_rows});
+      rhs_slice_r2 =
+          computation->AddInstruction(HloInstruction::CreateTranspose(
+              rhs_slice_shape_r2_transpose, rhs_slice_r2, {1, 0}));
+    }
+
+    // Compute Dot of lhs/rhs R2 slices.
+    DotDimensionNumbers dot_dnums;
+    dot_dnums.add_lhs_contracting_dimensions(1);
+    dot_dnums.add_rhs_contracting_dimensions(0);
+    auto dot_r2 = computation->AddInstruction(HloInstruction::CreateDot(
+        dot_shape_r2, lhs_slice_r2, rhs_slice_r2, dot_dnums));
+
+    // Reshape Dot to R3 so we can concat along batch dimension.
+    auto dot_r3 = computation->AddInstruction(
+        HloInstruction::CreateReshape(dot_shape_r3, dot_r2));
+
+    output_slices[i] = dot_r3;
+  }
+
+  // Concatenate slices from 'output_slices' along batch dimension.
+  auto concat = computation->AddInstruction(
+      HloInstruction::CreateConcatenate(concat_shape_r3, output_slices, 0));
+  // Reshape output 'new_dot' to original dimensions.
+  auto new_dot = computation->AddInstruction(
+      HloInstruction::CreateReshape(dot_shape, concat));
+
+  // Replace all uses of 'dot' in 'computation' with 'new_dot'.
+  return computation->ReplaceInstruction(dot, new_dot);
+}
+
+}  // namespace
+
+StatusOr<bool> DotDecomposer::Run(HloModule* module) {
+  XLA_VLOG_LINES(2, "DotDecomposer ENTRY\n" + module->ToString());
+  // Gather all batch Dot operations.
+  std::vector<HloInstruction*> batch_dots;
+  for (auto* computation : module->MakeNonfusionComputations()) {
+    for (auto* instruction : computation->instructions()) {
+      if (instruction->opcode() != HloOpcode::kDot) {
+        continue;
+      }
+      const DotDimensionNumbers& dnums = instruction->dot_dimension_numbers();
+      if (dnums.lhs_batch_dimensions_size() > 0 && decompose_batch_dot_) {
+        batch_dots.push_back(instruction);
+      }
+    }
+  }
+  // Decompose each batch Dot in 'batch_dots'.
+  bool changed = false;
+  for (auto* dot : batch_dots) {
+    TF_RETURN_IF_ERROR(DecomposeBatchDot(dot));
+    changed = true;
+  }
+  XLA_VLOG_LINES(2, "DotDecompose EXIT\n" + module->ToString());
+  return changed;
+}
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/dot_decomposer.h b/tensorflow/compiler/xla/service/dot_decomposer.h
new file mode 100644
index 0000000000000000000000000000000000000000..5ff0ab34eac0cd0fbc264b408c57653c944402a6
--- /dev/null
+++ b/tensorflow/compiler/xla/service/dot_decomposer.h
@@ -0,0 +1,44 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_DOT_DECOMPOSER_H_
+#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_DOT_DECOMPOSER_H_
+
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
+
+namespace xla {
+
+// DotDecomposer is a pass which decomposes batch Dot operations into a
+// sequence of smaller (R2) Dot operations.
+class DotDecomposer : public HloPassInterface {
+ public:
+  // Decomposes batch Dot operations when 'decompose_batch_dot' is true.
+  DotDecomposer(bool decompose_batch_dot = true)
+      : decompose_batch_dot_(decompose_batch_dot) {}
+  ~DotDecomposer() = default;
+  tensorflow::StringPiece name() const override { return "dot_decomposer"; }
+
+  // Run DotDecomposer pass on computations in 'module'.
+  // Returns whether the 'module' was changed.
+  StatusOr<bool> Run(HloModule* module) override;
+
+ private:
+  bool decompose_batch_dot_;
+};
+
+}  // namespace xla
+
+#endif  // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_DOT_DECOMPOSER_H_
diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
index b9407818cd8bc82aabd32ed02f61ef66fe442625..9780bac16ec17eed2c1df64f01bcb753e26b46f0 100644
--- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
@@ -50,11 +50,161 @@ using llvm_ir::IrName;
 using llvm_ir::SetToFirstInsertPoint;
 using tensorflow::strings::StrCat;
 
+namespace {
+
+llvm::Value* EmitReducePrecisionFloat(llvm::Value* x, int64 exponent_bits,
+                                      int64 mantissa_bits,
+                                      llvm::IRBuilder<>* ir_builder) {
+  // Integer and float types for casting and constant generation.
+  llvm::Type* float_type = x->getType();
+  llvm::IntegerType* int_type = ir_builder->getInt32Ty();
+
+  // Cast the input value to an integer for bitwise manipulation.
+  llvm::Value* x_as_int = ir_builder->CreateBitCast(x, int_type);
+
+  if (mantissa_bits < 23) {
+    // Last remaining mantissa bit.
+    const uint32_t last_mantissa_bit_mask = 1u << (23 - mantissa_bits);
+
+    // Compute rounding bias for round-to-nearest with ties to even.  This is
+    // equal to a base value of 0111... plus one bit if the last remaining
+    // mantissa bit is 1.
+    const uint32_t base_rounding_bias = (last_mantissa_bit_mask >> 1) - 1;
+    llvm::Value* x_last_mantissa_bit = ir_builder->CreateLShr(
+        ir_builder->CreateAnd(
+            x_as_int, llvm::ConstantInt::get(int_type, last_mantissa_bit_mask)),
+        (23 - mantissa_bits));
+    llvm::Value* x_rounding_bias = ir_builder->CreateAdd(
+        x_last_mantissa_bit,
+        llvm::ConstantInt::get(int_type, base_rounding_bias));
+
+    // Add rounding bias, and mask out truncated bits.  Note that the case
+    // where adding the rounding bias overflows into the exponent bits is
+    // correct; the non-masked mantissa bits will all be zero, and the
+    // exponent will be incremented by one.
+    const uint32_t truncation_mask = ~(last_mantissa_bit_mask - 1);
+    x_as_int = ir_builder->CreateAdd(x_as_int, x_rounding_bias);
+    x_as_int = ir_builder->CreateAnd(
+        x_as_int, llvm::ConstantInt::get(int_type, truncation_mask));
+  }
+
+  if (exponent_bits < 8) {
+    // Masks for f32 values.
+    const uint32_t f32_sign_bit_mask = 1u << 31;
+    const uint32_t f32_exp_bits_mask = 0xffu << 23;
+
+    // An exponent of 2^(n-1)-1 -- that is, 0111... with the zero in the most-
+    // significant bit -- is equal to 1.0f for all exponent sizes.  Adding
+    // 2^(n-1)-1 to this gives us the highest non-infinite exponent for a bit-
+    // size of n, and subtracting 2^(n-1)-1 from this gives us the lowest'
+    // exponent (corresponding to 0.0f).
+    //
+    // Thus, the f32 exponent corresponding to the highest non-infinite
+    // exponent for a bit size of n is (2^7-1) + 2^(n-1)-1, and the f32
+    // exponent corresponding to the lowest exponent for a bit size of n is
+    // (2^7-1) - 2^(n-1)-1.
+    //
+    // Note that we have already checked that exponents_bits >= 1.
+    const uint32_t f32_exponent_bias = (1 << 7) - 1;
+    const uint32_t reduced_exponent_bias = (1 << (exponent_bits - 1)) - 1;
+    const uint32_t reduced_max_exponent =
+        f32_exponent_bias + reduced_exponent_bias;
+    const uint32_t reduced_min_exponent =
+        f32_exponent_bias - reduced_exponent_bias;
+
+    // Do we overflow or underflow?
+    llvm::Value* x_exponent = ir_builder->CreateAnd(
+        x_as_int, llvm::ConstantInt::get(int_type, f32_exp_bits_mask));
+    llvm::Value* x_overflows = ir_builder->CreateICmpUGT(
+        x_exponent,
+        llvm::ConstantInt::get(int_type, reduced_max_exponent << 23));
+    llvm::Value* x_underflows = ir_builder->CreateICmpULE(
+        x_exponent,
+        llvm::ConstantInt::get(int_type, reduced_min_exponent << 23));
+
+    // Compute appropriately-signed values of zero and infinity.
+    llvm::Value* x_signed_zero = ir_builder->CreateAnd(
+        x_as_int, llvm::ConstantInt::get(int_type, f32_sign_bit_mask));
+    llvm::Value* x_signed_inf = ir_builder->CreateOr(
+        x_signed_zero, llvm::ConstantInt::get(int_type, f32_exp_bits_mask));
+
+    // Force to zero or infinity if overflow or underflow.  (Note that this
+    // truncates all denormal values to zero, rather than rounding them.)
+    x_as_int = ir_builder->CreateSelect(x_overflows, x_signed_inf, x_as_int);
+    x_as_int = ir_builder->CreateSelect(x_underflows, x_signed_zero, x_as_int);
+  }
+
+  // Cast the result back to a floating-point type.
+  llvm::Value* result = ir_builder->CreateBitCast(x_as_int, float_type);
+
+  // Correct result for NaN inputs.
+  //
+  // The exponent handling will "normalize" NaN values to infinities, which is
+  // undesirable (except in the case with no mantissa bits, in which case it
+  // is mandatory).  This logic also handles cases where mantissa-rounding
+  // causes a NaN's mantissa to overflow into the exponent bits, which would
+  // otherwise create an erroneous zero value.
+  //
+  // If the fast-math flags are set to assume no NaNs, the comparison is likely
+  // to be optimized away, so there's no point in even emitting it.
+  if (!ir_builder->getFastMathFlags().noNaNs()) {
+    llvm::Value* x_is_nan = ir_builder->CreateFCmpUNO(x, x);
+
+    if (mantissa_bits > 0) {
+      result = ir_builder->CreateSelect(x_is_nan, x, result);
+    } else {
+      result = ir_builder->CreateSelect(
+          x_is_nan, llvm::ConstantFP::getInfinity(float_type), result);
+    }
+  }
+  return result;
+}
+
+llvm::Value* EmitF32ToBF16(llvm::Value* f32_value,
+                           llvm::IRBuilder<>* ir_builder) {
+  auto reduced_precision = EmitReducePrecisionFloat(
+      f32_value,
+      /*exponent_bits=*/primitive_util::kBFloat16ExponentBits,
+      /*mantissa_bits=*/primitive_util::kBFloat16MantissaBits, ir_builder);
+  auto as_int32 =
+      ir_builder->CreateBitCast(reduced_precision, ir_builder->getInt32Ty());
+  auto shifted = ir_builder->CreateLShr(as_int32, 16);
+  auto truncated = ir_builder->CreateTrunc(shifted, ir_builder->getInt16Ty());
+  return ir_builder->CreateBitCast(truncated, ir_builder->getInt16Ty());
+}
+
+llvm::Value* EmitBF16ToF32(llvm::Value* bf16_value,
+                           llvm::IRBuilder<>* ir_builder) {
+  auto as_int16 =
+      ir_builder->CreateBitCast(bf16_value, ir_builder->getInt16Ty());
+  auto as_int32 = ir_builder->CreateZExt(as_int16, ir_builder->getInt32Ty());
+  auto shifted = ir_builder->CreateShl(as_int32, 16);
+  return ir_builder->CreateBitCast(shifted, ir_builder->getFloatTy());
+}
+
+llvm::Value* EmitIntegralToFloating(llvm::Value* integer_value,
+                                    PrimitiveType from_type,
+                                    PrimitiveType to_type, llvm::Module* module,
+                                    llvm::IRBuilder<>* ir_builder) {
+  if (primitive_util::IsSignedIntegralType(from_type)) {
+    return ir_builder->CreateSIToFP(
+        integer_value, llvm_ir::PrimitiveTypeToIrType(to_type, module));
+  } else {
+    CHECK(primitive_util::IsUnsignedIntegralType(from_type) ||
+          from_type == PRED);
+    return ir_builder->CreateUIToFP(
+        integer_value, llvm_ir::PrimitiveTypeToIrType(to_type, module));
+  }
+}
+
+}  // namespace
+
 StatusOr<llvm::Value*> ElementalIrEmitter::EmitUnaryOp(
     const HloInstruction* op, llvm::Value* operand_value) const {
   if (op->opcode() == HloOpcode::kCopy) {
     return operand_value;
-  } else if (operand_value->getType()->isIntegerTy()) {
+  } else if (ShapeUtil::ElementIsIntegral(op->operand(0)->shape()) ||
+             op->operand(0)->shape().element_type() == PRED) {
     return EmitIntegerUnaryOp(op, operand_value);
   } else if (ShapeUtil::ElementIsComplex(op->operand(0)->shape())) {
     return EmitComplexUnaryOp(op, operand_value);
@@ -79,15 +229,14 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitIntegerUnaryOp(
             primitive_util::IsSignedIntegralType(to_type));
       }
       if (primitive_util::IsFloatingPointType(to_type)) {
-        if (primitive_util::IsSignedIntegralType(from_type)) {
-          return ir_builder_->CreateSIToFP(
-              operand_value, llvm_ir::PrimitiveTypeToIrType(to_type, module_));
-        }
-        if (primitive_util::IsUnsignedIntegralType(from_type) ||
-            from_type == PRED) {
-          return ir_builder_->CreateUIToFP(
-              operand_value, llvm_ir::PrimitiveTypeToIrType(to_type, module_));
+        if (to_type == BF16) {
+          return EmitF32ToBF16(
+              EmitIntegralToFloating(operand_value, from_type, F32, module_,
+                                     ir_builder_),
+              ir_builder_);
         }
+        return EmitIntegralToFloating(operand_value, from_type, to_type,
+                                      module_, ir_builder_);
       }
       if (primitive_util::IsComplexType(to_type)) {
         auto to_ir_component_type = llvm_ir::PrimitiveTypeToIrType(
@@ -207,6 +356,17 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitFloatUnaryOp(
                 llvm_ir::PrimitiveTypeToIrType(to_component_type, module_)),
             nullptr);
       }
+      if (from_type == BF16) {
+        TF_RET_CHECK(to_type != BF16);
+        operand_value = EmitBF16ToF32(operand_value, ir_builder_);
+        from_type = F32;
+        if (from_type == to_type) {
+          return operand_value;
+        }
+      }
+      if (from_type == F32 && to_type == BF16) {
+        return EmitF32ToBF16(operand_value, ir_builder_);
+      }
       if (primitive_util::IsFloatingPointType(to_type)) {
         return ir_builder_->CreateFPCast(
             operand_value, llvm_ir::PrimitiveTypeToIrType(to_type, module_));
@@ -244,21 +404,13 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitFloatUnaryOp(
           primitive_util::BitWidth(to_type));
     }
     case HloOpcode::kExp:
-      return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::exp, {operand_value},
-                                          {operand_value->getType()},
-                                          ir_builder_);
+      return EmitExp(op->shape().element_type(), operand_value);
     case HloOpcode::kLog:
-      return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::log, {operand_value},
-                                          {operand_value->getType()},
-                                          ir_builder_);
+      return EmitLog(op->shape().element_type(), operand_value);
     case HloOpcode::kCos:
-      return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::cos, {operand_value},
-                                          {operand_value->getType()},
-                                          ir_builder_);
+      return EmitCos(op->shape().element_type(), operand_value);
     case HloOpcode::kSin:
-      return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::sin, {operand_value},
-                                          {operand_value->getType()},
-                                          ir_builder_);
+      return EmitSin(op->shape().element_type(), operand_value);
     case HloOpcode::kFloor:
       return llvm_ir::EmitCallToIntrinsic(
           llvm::Intrinsic::floor, {operand_value}, {operand_value->getType()},
@@ -309,9 +461,25 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitFloatUnaryOp(
 
 StatusOr<llvm::Value*> ElementalIrEmitter::EmitComplexUnaryOp(
     const HloInstruction* op, llvm::Value* operand_value) const {
+  PrimitiveType input_type = op->operand(0)->shape().element_type();
+  PrimitiveType component_type =
+      primitive_util::IsComplexType(input_type)
+          ? primitive_util::ComplexComponentType(input_type)
+          : input_type;
   switch (op->opcode()) {
-    // TODO(b/65209142): Angle/Log require atan2.
-    // case HloOpcode::kLog:  // log(a+bi) = .5*log(a^2+b^2) + i*atan2(b, a)
+    case HloOpcode::kLog: {
+      // log(a+bi) = .5*log(a^2+b^2) + i*atan2(b, a)
+      auto a = EmitExtractReal(operand_value);
+      auto b = EmitExtractImag(operand_value);
+      llvm::Type* llvm_ty = a->getType();
+      auto sum_sq = ir_builder_->CreateFAdd(ir_builder_->CreateFMul(a, a),
+                                            ir_builder_->CreateFMul(b, b));
+      TF_ASSIGN_OR_RETURN(auto log_sum_sq, EmitLog(component_type, sum_sq));
+      TF_ASSIGN_OR_RETURN(auto angle, EmitAtan2(component_type, b, a));
+      auto one_half = llvm::ConstantFP::get(llvm_ty, 0.5);
+      return EmitComposeComplex(
+          op, ir_builder_->CreateFMul(one_half, log_sum_sq), angle);
+    }
     case HloOpcode::kConvert: {
       PrimitiveType from_type = op->operand(0)->shape().element_type();
       TF_RET_CHECK(primitive_util::IsComplexType(from_type));
@@ -333,15 +501,12 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitComplexUnaryOp(
     }
     case HloOpcode::kExp: {
       // e^(a+bi) = e^a*(cos(b)+sin(b)i)
-      auto exp_a = llvm_ir::EmitCallToIntrinsic(
-          llvm::Intrinsic::exp, {EmitExtractReal(operand_value)},
-          {EmitExtractReal(operand_value)->getType()}, ir_builder_);
-      auto cos_b = llvm_ir::EmitCallToIntrinsic(
-          llvm::Intrinsic::cos, {EmitExtractImag(operand_value)},
-          {EmitExtractImag(operand_value)->getType()}, ir_builder_);
-      auto sin_b = llvm_ir::EmitCallToIntrinsic(
-          llvm::Intrinsic::sin, {EmitExtractImag(operand_value)},
-          {EmitExtractImag(operand_value)->getType()}, ir_builder_);
+      TF_ASSIGN_OR_RETURN(
+          auto exp_a, EmitExp(component_type, EmitExtractReal(operand_value)));
+      TF_ASSIGN_OR_RETURN(
+          auto cos_b, EmitCos(component_type, EmitExtractImag(operand_value)));
+      TF_ASSIGN_OR_RETURN(
+          auto sin_b, EmitSin(component_type, EmitExtractImag(operand_value)));
       return EmitComposeComplex(op, ir_builder_->CreateFMul(exp_a, cos_b),
                                 ir_builder_->CreateFMul(exp_a, sin_b));
     }
@@ -356,16 +521,13 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitComplexUnaryOp(
       auto a = EmitExtractReal(operand_value);
       auto b = EmitExtractImag(operand_value);
       auto type = a->getType();
-      auto exp_b = llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::exp, {b},
-                                                {type}, ir_builder_);
+      TF_ASSIGN_OR_RETURN(auto exp_b, EmitExp(component_type, b));
       auto half_exp_b =
           ir_builder_->CreateFMul(llvm::ConstantFP::get(type, 0.5), exp_b);
       auto half_exp_neg_b =
           ir_builder_->CreateFDiv(llvm::ConstantFP::get(type, 0.5), exp_b);
-      auto cos_a = llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::cos, {a},
-                                                {type}, ir_builder_);
-      auto sin_a = llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::sin, {a},
-                                                {type}, ir_builder_);
+      TF_ASSIGN_OR_RETURN(auto cos_a, EmitCos(component_type, a));
+      TF_ASSIGN_OR_RETURN(auto sin_a, EmitSin(component_type, a));
       return EmitComposeComplex(
           op,
           ir_builder_->CreateFMul(
@@ -386,16 +548,13 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitComplexUnaryOp(
       auto a = EmitExtractReal(operand_value);
       auto b = EmitExtractImag(operand_value);
       auto type = a->getType();
-      auto exp_b = llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::exp, {b},
-                                                {type}, ir_builder_);
+      TF_ASSIGN_OR_RETURN(auto exp_b, EmitExp(component_type, b));
       auto half_exp_b =
           ir_builder_->CreateFMul(llvm::ConstantFP::get(type, 0.5), exp_b);
       auto half_exp_neg_b =
           ir_builder_->CreateFDiv(llvm::ConstantFP::get(type, 0.5), exp_b);
-      auto cos_a = llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::cos, {a},
-                                                {type}, ir_builder_);
-      auto sin_a = llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::sin, {a},
-                                                {type}, ir_builder_);
+      TF_ASSIGN_OR_RETURN(auto cos_a, EmitCos(component_type, a));
+      TF_ASSIGN_OR_RETURN(auto sin_a, EmitSin(component_type, a));
       return EmitComposeComplex(
           op,
           ir_builder_->CreateFMul(
@@ -403,6 +562,58 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitComplexUnaryOp(
           ir_builder_->CreateFMul(
               cos_a, ir_builder_->CreateFSub(half_exp_b, half_exp_neg_b)));
     }
+    case HloOpcode::kTanh: {
+      /*
+      tanh=(exp(x)-exp(-x)) / (exp(x)+exp(-x))
+      e^(a+bi) = e^a*(cos(b)+sin(b)i)
+      so tanh=(((cos(b)+sin(b)i)e^a - (cos(-b)+sin(-b)i)e^-a)) /
+              (((cos(b)+sin(b)i)e^a + (cos(-b)+sin(-b)i)e^-a))
+      cos(b)=cos(-b), sin(-b)=-sin(b)
+      so tanh=(((cos(b)+sin(b)i)e^a - (cos(b)-sin(b)i)e^-a)) /
+              (((cos(b)+sin(b)i)e^a + (cos(b)-sin(b)i)e^-a))
+             =(cos(b)e^a+i*sin(b)e^a + cos(b)(-e^-a)+i*sin(b)e^-a) /
+              (cos(b)e^a+i*sin(b)e^a + cos(b)e^-a+i*sin(b)(-e^-a))
+             =(cos(b)(e^a-e^-a) + i*sin(b)(e^a+e^-a)) /
+              (cos(b)(e^a+e^-a) + i*sin(b)(e^a-e^-a))
+      This is a complex division, so we can multiply by denom_conj/denom_conj
+             =(cos(b)(e^a-e^-a) + i*sin(b)(e^a+e^-a)) *
+              (cos(b)(e^a+e^-a) - i*sin(b)(e^a-e^-a)) /
+              ((cos(b)(e^a+e^-a))^2 + (sin(b)(e^a-e^-a))^2)
+             =(cos(b)^2(e^(2a)-e^(-2a)) + sin(b)^2(e^(2a)-e^(-2a)) +
+               i*(cos(b)sin(b)(e^a+e^-a)^2 - cos(b)sin(b)(e^a-e^-a)^2)) /
+              ((cos(b)(e^a+e^-a))^2 + (sin(b)(e^a-e^-a))^2)
+      */
+      auto a = EmitExtractReal(operand_value);
+      auto b = EmitExtractImag(operand_value);
+      TF_ASSIGN_OR_RETURN(auto exp_a, EmitExp(component_type, a));
+      TF_ASSIGN_OR_RETURN(auto cos_b, EmitCos(component_type, b));
+      TF_ASSIGN_OR_RETURN(auto sin_b, EmitSin(component_type, b));
+      auto exp_neg_a = ir_builder_->CreateFDiv(
+          llvm::ConstantFP::get(exp_a->getType(), 1), exp_a);
+      auto exp_2a_minus_exp_neg_2a = ir_builder_->CreateFSub(
+          ir_builder_->CreateFMul(exp_a, exp_a),
+          ir_builder_->CreateFMul(exp_neg_a, exp_neg_a));
+      auto cos_b_sq = ir_builder_->CreateFMul(cos_b, cos_b);
+      auto sin_b_sq = ir_builder_->CreateFMul(sin_b, sin_b);
+      auto real_num = ir_builder_->CreateFAdd(
+          ir_builder_->CreateFMul(cos_b_sq, exp_2a_minus_exp_neg_2a),
+          ir_builder_->CreateFMul(sin_b_sq, exp_2a_minus_exp_neg_2a));
+      auto cos_b_sin_b = ir_builder_->CreateFMul(cos_b, sin_b);
+      auto exp_a_plus_exp_neg_a = ir_builder_->CreateFAdd(exp_a, exp_neg_a);
+      auto exp_a_plus_exp_neg_a_sq =
+          ir_builder_->CreateFMul(exp_a_plus_exp_neg_a, exp_a_plus_exp_neg_a);
+      auto exp_a_minus_exp_neg_a = ir_builder_->CreateFSub(exp_a, exp_neg_a);
+      auto exp_a_minus_exp_neg_a_sq =
+          ir_builder_->CreateFMul(exp_a_minus_exp_neg_a, exp_a_minus_exp_neg_a);
+      auto imag_num = ir_builder_->CreateFMul(
+          cos_b_sin_b, ir_builder_->CreateFSub(exp_a_plus_exp_neg_a_sq,
+                                               exp_a_minus_exp_neg_a_sq));
+      auto denom = ir_builder_->CreateFAdd(
+          ir_builder_->CreateFMul(cos_b_sq, exp_a_plus_exp_neg_a_sq),
+          ir_builder_->CreateFMul(sin_b_sq, exp_a_minus_exp_neg_a_sq));
+      return EmitComposeComplex(op, ir_builder_->CreateFDiv(real_num, denom),
+                                ir_builder_->CreateFDiv(imag_num, denom));
+    }
     case HloOpcode::kAbs: {
       auto sum_sq = ir_builder_->CreateFAdd(
           ir_builder_->CreateFMul(EmitExtractReal(operand_value),
@@ -449,7 +660,8 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitBinaryOp(
     const HloInstruction* op, llvm::Value* lhs_value,
     llvm::Value* rhs_value) const {
   PrimitiveType operand_type = op->operand(0)->shape().element_type();
-  if (lhs_value->getType()->isIntegerTy()) {
+  if (ShapeUtil::ElementIsIntegral(op->operand(0)->shape()) ||
+      operand_type == PRED) {
     return EmitIntegerBinaryOp(
         op, lhs_value, rhs_value,
         primitive_util::IsSignedIntegralType(operand_type));
@@ -464,7 +676,6 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitFloatBinaryOp(
     const HloInstruction* op, llvm::Value* lhs_value,
     llvm::Value* rhs_value) const {
   switch (op->opcode()) {
-    // case HloOpcode::kAtan2:  // TODO(b/65209142): CPU atan2 support
     case HloOpcode::kComplex:
       return EmitComposeComplex(op, lhs_value, rhs_value);
     case HloOpcode::kAdd:
@@ -508,10 +719,9 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitFloatBinaryOp(
     case HloOpcode::kMinimum:
       return EmitFloatMin(lhs_value, rhs_value);
     case HloOpcode::kPower:
-      return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::pow,
-                                          {lhs_value, rhs_value},
-                                          {lhs_value->getType()}, ir_builder_);
-
+      return EmitPow(op->shape().element_type(), lhs_value, rhs_value);
+    case HloOpcode::kAtan2:
+      return EmitAtan2(op->shape().element_type(), lhs_value, rhs_value);
     default:
       return Unimplemented("binary floating point op '%s'",
                            HloOpcodeString(op->opcode()).c_str());
@@ -607,9 +817,40 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitComplexBinaryOp(
                                   EmitExtractImag(lhs_value),
                                   EmitExtractImag(rhs_value), ir_builder_));
 
-    // TODO(b/65209142): requires arg(z) -> requires atan|atan2 intrinsic
-    // case HloOpcode::kPower:
-    // // (a+bi)^(c+di) = exp(i(c+di)*arg(a+bi)) * (a*a+b*b)^(c/2+di/2)
+    case HloOpcode::kPower: {
+      // (a+bi)^(c+di) =
+      //    (a*a+b*b)^(0.5c) * exp(-d*atan2(b,a)) * (cos(q) + i*sin(q)),
+      //    where q = c*atan2(b,a)+0.5d*ln(a*a+b*b)
+      PrimitiveType component_type =
+          primitive_util::ComplexComponentType(op->shape().element_type());
+      auto a = EmitExtractReal(lhs_value);
+      auto b = EmitExtractImag(lhs_value);
+      auto c = EmitExtractReal(rhs_value);
+      auto d = EmitExtractImag(rhs_value);
+      auto aa_p_bb = ir_builder_->CreateFAdd(ir_builder_->CreateFMul(a, a),
+                                             ir_builder_->CreateFMul(b, b));
+      auto one_half = llvm::ConstantFP::get(a->getType(), 0.5);
+      auto half_c = ir_builder_->CreateFMul(one_half, c);
+
+      TF_ASSIGN_OR_RETURN(auto aa_p_bb_to_half_c,
+                          EmitPow(component_type, aa_p_bb, half_c));
+      auto neg_d = ir_builder_->CreateFNeg(d);
+      TF_ASSIGN_OR_RETURN(auto arg_lhs, EmitAtan2(component_type, b, a));
+      auto neg_d_arg_lhs = ir_builder_->CreateFMul(neg_d, arg_lhs);
+      TF_ASSIGN_OR_RETURN(auto e_to_neg_d_arg_lhs,
+                          EmitExp(component_type, neg_d_arg_lhs));
+      auto coeff =
+          ir_builder_->CreateFMul(aa_p_bb_to_half_c, e_to_neg_d_arg_lhs);
+      TF_ASSIGN_OR_RETURN(auto ln_aa_p_bb, EmitLog(component_type, aa_p_bb));
+      auto half_d = ir_builder_->CreateFMul(one_half, d);
+      auto q =
+          ir_builder_->CreateFAdd(ir_builder_->CreateFMul(c, arg_lhs),
+                                  ir_builder_->CreateFMul(half_d, ln_aa_p_bb));
+      TF_ASSIGN_OR_RETURN(auto cos_q, EmitCos(component_type, q));
+      TF_ASSIGN_OR_RETURN(auto sin_q, EmitSin(component_type, q));
+      return EmitComposeComplex(op, ir_builder_->CreateFMul(coeff, cos_q),
+                                ir_builder_->CreateFMul(coeff, sin_q));
+    }
     default:
       return Unimplemented("binary complex op '%s'",
                            HloOpcodeString(op->opcode()).c_str());
@@ -712,116 +953,51 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitErfcInv(
   return EmitErfInv(prim_type, ir_builder_->CreateFSub(one, value));
 }
 
-StatusOr<llvm::Value*> ElementalIrEmitter::EmitReducePrecision(
-    const HloInstruction* hlo, llvm::Value* x) const {
-  if (hlo->operand(0)->shape().element_type() != F32) {
-    return Unimplemented("reduce-precision only implemented for F32");
-  }
-
-  // Integer and float types for casting and constant generation.
-  llvm::Type* float_type = x->getType();
-  llvm::IntegerType* int_type = ir_builder_->getInt32Ty();
-
-  // Cast the input value to an integer for bitwise manipulation.
-  llvm::Value* x_as_int = ir_builder_->CreateBitCast(x, int_type);
-
-  if (hlo->mantissa_bits() < 23) {
-    // Last remaining mantissa bit.
-    const uint32_t last_mantissa_bit_mask = 1u << (23 - hlo->mantissa_bits());
-
-    // Compute rounding bias for round-to-nearest with ties to even.  This is
-    // equal to a base value of 0111... plus one bit if the last remaining
-    // mantissa bit is 1.
-    const uint32_t base_rounding_bias = (last_mantissa_bit_mask >> 1) - 1;
-    llvm::Value* x_last_mantissa_bit = ir_builder_->CreateLShr(
-        ir_builder_->CreateAnd(
-            x_as_int, llvm::ConstantInt::get(int_type, last_mantissa_bit_mask)),
-        (23 - hlo->mantissa_bits()));
-    llvm::Value* x_rounding_bias = ir_builder_->CreateAdd(
-        x_last_mantissa_bit,
-        llvm::ConstantInt::get(int_type, base_rounding_bias));
-
-    // Add rounding bias, and mask out truncated bits.  Note that the case
-    // where adding the rounding bias overflows into the exponent bits is
-    // correct; the non-masked mantissa bits will all be zero, and the
-    // exponent will be incremented by one.
-    const uint32_t truncation_mask = ~(last_mantissa_bit_mask - 1);
-    x_as_int = ir_builder_->CreateAdd(x_as_int, x_rounding_bias);
-    x_as_int = ir_builder_->CreateAnd(
-        x_as_int, llvm::ConstantInt::get(int_type, truncation_mask));
-  }
-
-  if (hlo->exponent_bits() < 8) {
-    // Masks for f32 values.
-    const uint32_t f32_sign_bit_mask = 1u << 31;
-    const uint32_t f32_exp_bits_mask = 0xffu << 23;
-
-    // An exponent of 2^(n-1)-1 -- that is, 0111... with the zero in the most-
-    // significant bit -- is equal to 1.0f for all exponent sizes.  Adding
-    // 2^(n-1)-1 to this gives us the highest non-infinite exponent for a bit-
-    // size of n, and subtracting 2^(n-1)-1 from this gives us the lowest'
-    // exponent (corresponding to 0.0f).
-    //
-    // Thus, the f32 exponent corresponding to the highest non-infinite
-    // exponent for a bit size of n is (2^7-1) + 2^(n-1)-1, and the f32
-    // exponent corresponding to the lowest exponent for a bit size of n is
-    // (2^7-1) - 2^(n-1)-1.
-    //
-    // Note that we have already checked that exponents_bits >= 1.
-    const uint32_t f32_exponent_bias = (1 << 7) - 1;
-    const uint32_t reduced_exponent_bias =
-        (1 << (hlo->exponent_bits() - 1)) - 1;
-    const uint32_t reduced_max_exponent =
-        f32_exponent_bias + reduced_exponent_bias;
-    const uint32_t reduced_min_exponent =
-        f32_exponent_bias - reduced_exponent_bias;
+StatusOr<llvm::Value*> ElementalIrEmitter::EmitLog(PrimitiveType prim_type,
+                                                   llvm::Value* value) const {
+  return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::log, {value},
+                                      {value->getType()}, ir_builder_);
+}
 
-    // Do we overflow or underflow?
-    llvm::Value* x_exponent = ir_builder_->CreateAnd(
-        x_as_int, llvm::ConstantInt::get(int_type, f32_exp_bits_mask));
-    llvm::Value* x_overflows = ir_builder_->CreateICmpUGT(
-        x_exponent,
-        llvm::ConstantInt::get(int_type, reduced_max_exponent << 23));
-    llvm::Value* x_underflows = ir_builder_->CreateICmpULE(
-        x_exponent,
-        llvm::ConstantInt::get(int_type, reduced_min_exponent << 23));
+StatusOr<llvm::Value*> ElementalIrEmitter::EmitSin(PrimitiveType prim_type,
+                                                   llvm::Value* value) const {
+  return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::sin, {value},
+                                      {value->getType()}, ir_builder_);
+}
 
-    // Compute appropriately-signed values of zero and infinity.
-    llvm::Value* x_signed_zero = ir_builder_->CreateAnd(
-        x_as_int, llvm::ConstantInt::get(int_type, f32_sign_bit_mask));
-    llvm::Value* x_signed_inf = ir_builder_->CreateOr(
-        x_signed_zero, llvm::ConstantInt::get(int_type, f32_exp_bits_mask));
+StatusOr<llvm::Value*> ElementalIrEmitter::EmitCos(PrimitiveType prim_type,
+                                                   llvm::Value* value) const {
+  return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::cos, {value},
+                                      {value->getType()}, ir_builder_);
+}
 
-    // Force to zero or infinity if overflow or underflow.  (Note that this
-    // truncates all denormal values to zero, rather than rounding them.)
-    x_as_int = ir_builder_->CreateSelect(x_overflows, x_signed_inf, x_as_int);
-    x_as_int = ir_builder_->CreateSelect(x_underflows, x_signed_zero, x_as_int);
-  }
+StatusOr<llvm::Value*> ElementalIrEmitter::EmitExp(PrimitiveType prim_type,
+                                                   llvm::Value* value) const {
+  return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::exp, {value},
+                                      {value->getType()}, ir_builder_);
+}
 
-  // Cast the result back to a floating-point type.
-  llvm::Value* result = ir_builder_->CreateBitCast(x_as_int, float_type);
+StatusOr<llvm::Value*> ElementalIrEmitter::EmitPow(PrimitiveType prim_type,
+                                                   llvm::Value* lhs,
+                                                   llvm::Value* rhs) const {
+  return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::pow, {lhs, rhs},
+                                      {lhs->getType()}, ir_builder_);
+}
 
-  // Correct result for NaN inputs.
-  //
-  // The exponent handling will "normalize" NaN values to infinities, which is
-  // undesirable (except in the case with no mantissa bits, in which case it
-  // is mandatory).  This logic also handles cases where mantissa-rounding
-  // causes a NaN's mantissa to overflow into the exponent bits, which would
-  // otherwise create an erroneous zero value.
-  //
-  // If the fast-math flags are set to assume no NaNs, the comparison is likely
-  // to be optimized away, so there's no point in even emitting it.
-  if (!ir_builder_->getFastMathFlags().noNaNs()) {
-    llvm::Value* x_is_nan = ir_builder_->CreateFCmpUNO(x, x);
+StatusOr<llvm::Value*> ElementalIrEmitter::EmitAtan2(PrimitiveType prim_type,
+                                                     llvm::Value* lhs,
+                                                     llvm::Value* rhs) const {
+  return Unimplemented("atan2");
+}
 
-    if (hlo->mantissa_bits() > 0) {
-      result = ir_builder_->CreateSelect(x_is_nan, x, result);
-    } else {
-      result = ir_builder_->CreateSelect(
-          x_is_nan, llvm::ConstantFP::getInfinity(float_type), result);
-    }
+StatusOr<llvm::Value*> ElementalIrEmitter::EmitReducePrecision(
+    const HloInstruction* hlo, llvm::Value* x) const {
+  if (hlo->operand(0)->shape().element_type() != F32) {
+    return Unimplemented("reduce-precision only implemented for F32");
   }
-  return result;
+  return EmitReducePrecisionFloat(x, /*exponent_bits=*/hlo->exponent_bits(),
+                                  /*mantissa_bits=*/hlo->mantissa_bits(),
+                                  ir_builder_);
 }
 
 StatusOr<llvm::Value*> ElementalIrEmitter::EmitIntegerBinaryOp(
@@ -1088,14 +1264,6 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeRngElementGenerator(
                             get_next_uniform_float())));
         return ir_builder_->CreateFAdd(ir_builder_->CreateFMul(r, s), m);
       }
-      case RNG_BERNOULLI: {
-        TF_ASSIGN_OR_RETURN(llvm::Value * p,
-                            operand_to_generator.at(hlo->operand(0))(index));
-        return ir_builder_->CreateZExt(
-            ir_builder_->CreateFCmpOLT(get_next_uniform_float(), p),
-            llvm_ir::PrimitiveTypeToIrType(hlo->shape().element_type(),
-                                           module_));
-      }
       default:
         return InvalidArgument(
             "unhandled distribution %s",
diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.h b/tensorflow/compiler/xla/service/elemental_ir_emitter.h
index cccb498f82936283a215370787907b293827ff2d..1a48eb5fcb960b60d524ea56a43e15269576db76 100644
--- a/tensorflow/compiler/xla/service/elemental_ir_emitter.h
+++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.h
@@ -39,7 +39,7 @@ class ElementalIrEmitter {
         module_(module),
         hlo_module_config_(hlo_module_config) {}
 
-  virtual ~ElementalIrEmitter() {}
+  virtual ~ElementalIrEmitter() = default;
 
   virtual StatusOr<llvm::Value*> EmitUnaryOp(const HloInstruction* op,
                                              llvm::Value* operand_value) const;
@@ -92,6 +92,26 @@ class ElementalIrEmitter {
   virtual StatusOr<llvm::Value*> EmitErfcInv(PrimitiveType prim_type,
                                              llvm::Value* value) const;
 
+  virtual StatusOr<llvm::Value*> EmitAtan2(PrimitiveType prim_type,
+                                           llvm::Value* lhs,
+                                           llvm::Value* rhs) const;
+
+  virtual StatusOr<llvm::Value*> EmitLog(PrimitiveType prim_type,
+                                         llvm::Value* value) const;
+
+  virtual StatusOr<llvm::Value*> EmitSin(PrimitiveType prim_type,
+                                         llvm::Value* value) const;
+
+  virtual StatusOr<llvm::Value*> EmitCos(PrimitiveType prim_type,
+                                         llvm::Value* value) const;
+
+  virtual StatusOr<llvm::Value*> EmitExp(PrimitiveType prim_type,
+                                         llvm::Value* value) const;
+
+  virtual StatusOr<llvm::Value*> EmitPow(PrimitiveType prim_type,
+                                         llvm::Value* lhs,
+                                         llvm::Value* rhs) const;
+
   virtual StatusOr<llvm::Value*> EmitReducePrecision(const HloInstruction* hlo,
                                                      llvm::Value* x) const;
 
diff --git a/tensorflow/compiler/xla/service/executable.cc b/tensorflow/compiler/xla/service/executable.cc
index 9c96d9eb30b5f9e51b7f5d82391c6b9f366898d6..21e7fbea291721dfc446bae2a7002a8ec2520be4 100644
--- a/tensorflow/compiler/xla/service/executable.cc
+++ b/tensorflow/compiler/xla/service/executable.cc
@@ -24,25 +24,25 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/env.h"
 
+using tensorflow::gtl::ArraySlice;
+
 namespace xla {
 
-StatusOr<std::vector<perftools::gputools::DeviceMemoryBase>>
+StatusOr<std::vector<std::unique_ptr<ShapedBuffer>>>
 Executable::ExecuteOnStreams(
-    tensorflow::gtl::ArraySlice<const ServiceExecutableRunOptions> run_options,
-    tensorflow::gtl::ArraySlice<
-        tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>>
-        arguments) {
+    ArraySlice<const ServiceExecutableRunOptions> run_options,
+    ArraySlice<ArraySlice<const ShapedBuffer*>> arguments) {
   TF_RET_CHECK(run_options.size() == arguments.size());
 
+  std::vector<std::unique_ptr<ShapedBuffer>> return_values(run_options.size());
+
   if (run_options.size() == 1) {
-    TF_ASSIGN_OR_RETURN(auto result,
+    TF_ASSIGN_OR_RETURN(return_values[0],
                         ExecuteOnStream(&run_options[0], arguments[0],
                                         /*hlo_execution_profile=*/nullptr));
-    return std::vector<perftools::gputools::DeviceMemoryBase>({result});
+    return std::move(return_values);
   }
 
-  std::vector<perftools::gputools::DeviceMemoryBase> return_values(
-      run_options.size());
   for (size_t i = 0; i < run_options.size(); ++i) {
     // We cannot BlockHostUntilDone() on the already-launched executions in case
     // of error, since if the executions communicate, the initially launched
@@ -52,9 +52,77 @@ Executable::ExecuteOnStreams(
   }
   for (const auto& options : run_options) {
     TF_RET_CHECK(options.stream() != nullptr);
-    options.stream()->BlockHostUntilDone();
+    TF_RETURN_IF_ERROR(options.stream()->BlockHostUntilDone());
+  }
+  return std::move(return_values);
+}
+
+StatusOr<std::unique_ptr<ShapedBuffer>> Executable::ExecuteOnStreamWrapper(
+    const ServiceExecutableRunOptions* run_options, ExecutionProfile* profile,
+    ArraySlice<const ShapedBuffer*> arguments) {
+  perftools::gputools::Stream* stream = run_options->stream();
+  std::unique_ptr<perftools::gputools::Timer> timer;
+  if (profile != nullptr) {
+    timer.reset(new perftools::gputools::Timer(stream->parent()));
+    stream->InitTimer(timer.get()).ThenStartTimer(timer.get());
   }
-  return return_values;
+
+  VLOG(1) << "enqueueing executable on stream...";
+  // If the profiling flag isn't enabled, we pass nullptr as the profile to
+  // indicate profiling is not requested.
+  std::unique_ptr<HloExecutionProfile> profile_ptr =
+      module_config().debug_options().xla_hlo_profile() &&
+              hlo_profiling_enabled()
+          ? MakeUnique<HloExecutionProfile>(&hlo_profile_printer(),
+                                            &hlo_profile_index_map())
+          : nullptr;
+
+  StatusOr<std::unique_ptr<ShapedBuffer>> return_value =
+      ExecuteOnStream(run_options, arguments, profile_ptr.get());
+
+  if (profile != nullptr) {
+    VLOG(1) << "enqueueing 'stop timer' and blocking host until done...";
+    stream->ThenStopTimer(timer.get());
+    TF_RETURN_IF_ERROR(stream->BlockHostUntilDone());
+    VLOG(1) << "done with block-host-until-done";
+
+    // Merge in run-time profile information from execution_profile.
+    //
+    // TODO(b/71713097): This is buggy -- even though the mutex takes care of
+    // C++ level races, some other concurrent ExecuteOnStreamWrapper call could
+    // have rewritten the execution_profile before we get to it.
+    profile->MergeFrom(execution_profile());
+
+    // Overall execution time (in nanoseconds) from the executor timer.
+    if (stream->ok()) {
+      // Don't read timer->Nanoseconds() if the stream isn't OK -- that's
+      // illegal.
+      profile->set_compute_and_transfer_time_ns(timer->Nanoseconds());
+    }
+
+    // TODO(b/28123297): On GPU we end up including transfer time in
+    // the compute time this way. Instead, we should get the correct
+    // value by measuring it. Setting the field here at least lets
+    // benchmarks provide *some* value for GPU computations.
+    //
+    // TODO(b/28447609): The value in compute_and_transfer_time_ns is actually
+    // the compute time without the transfer time, so this way we get the
+    // correct compute time. We should instead have the correct value for
+    // compute_and_transfer_time and set compute_time to the compute time.
+    if (profile->compute_time_ns() == 0) {
+      profile->set_compute_time_ns(profile->compute_and_transfer_time_ns());
+    }
+  }
+
+  if (profile_ptr != nullptr) {
+    XLA_LOG_LINES(
+        tensorflow::INFO,
+        profile_ptr->ToString(stream->parent()->GetDeviceDescription()));
+    hlo_graph_dumper::MaybeDumpHloModule(module(), "Service::Execute",
+                                         profile_ptr.get());
+  }
+
+  return return_value;
 }
 
 Status Executable::DumpSessionModule() {
diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h
index 08862308c90af736c1adcaa9438973f858852506..5ecfdffe211c571b1bb2bc30ff2acd3021c735ae 100644
--- a/tensorflow/compiler/xla/service/executable.h
+++ b/tensorflow/compiler/xla/service/executable.h
@@ -61,16 +61,7 @@ class Executable {
   // If the hlo_execution_profile is provided as non-nullptr, profiling will be
   // enabled.
   //
-  // Returns the device memory region that a successful execution would
-  // populate.
-  virtual StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteOnStream(
-      const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments,
-      HloExecutionProfile* hlo_execution_profile) = 0;
-
-  // Overload of ExecuteOnStream which returns and takes arguments as
-  // ShapedBuffers. Used for LocalService execution.
+  // Returns a shaped buffer containing the result of the computation.
   virtual StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteOnStream(
       const ServiceExecutableRunOptions* run_options,
       tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
@@ -78,21 +69,19 @@ class Executable {
 
   // Same as ExecuteOnStream(), but this call is non-blocking and returns as
   // soon as all of the operations are enqueued for launch on the stream.
-  virtual StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteAsyncOnStream(
+  virtual StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteAsyncOnStream(
       const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments) = 0;
+      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) = 0;
 
   // Same as ExecuteOnStream(), but runs this executable on multiple
   // streams. arguments[i] contains the arguments to the execution on
   // run_options[i]->stream() and the returned value is at index i of the
   // returned vector.
-  virtual StatusOr<std::vector<perftools::gputools::DeviceMemoryBase>>
-  ExecuteOnStreams(
+  virtual StatusOr<std::vector<std::unique_ptr<ShapedBuffer>>> ExecuteOnStreams(
       tensorflow::gtl::ArraySlice<const ServiceExecutableRunOptions>
           run_options,
       tensorflow::gtl::ArraySlice<
-          tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>>
+          tensorflow::gtl::ArraySlice<const ShapedBuffer*>>
           arguments);
 
   // Populates `hlo_execution_profile` from `executor`. This is implicit in any
@@ -107,13 +96,10 @@ class Executable {
 
   // Convenience wrapper for calling Executable::ExecuteOnStream. Sets up a
   // timer for the execution, sets up HLO profiling if enabled, and fills in the
-  // given ExecutionProfile if non-null.  The ExecuteOnStream overloads have
-  // different argument types and return types, so this method is templated on
-  // argument type and return type of the execute function.
-  template <typename ReturnT, typename ArgT>
-  StatusOr<ReturnT> ExecuteOnStreamWrapper(
+  // given ExecutionProfile if non-null.
+  StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteOnStreamWrapper(
       const ServiceExecutableRunOptions* run_options, ExecutionProfile* profile,
-      const ArgT& arguments);
+      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments);
 
   // Returns the ExecutionProfile from executing on the device. This includes
   // the number of cycles taken for the computation or the compilation time.
@@ -197,66 +183,6 @@ class Executable {
   std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map_;
 };
 
-template <typename ReturnT, typename ArgT>
-StatusOr<ReturnT> Executable::ExecuteOnStreamWrapper(
-    const ServiceExecutableRunOptions* run_options, ExecutionProfile* profile,
-    const ArgT& arguments) {
-  perftools::gputools::Stream* stream = run_options->stream();
-  std::unique_ptr<perftools::gputools::Timer> timer;
-  if (profile != nullptr) {
-    timer.reset(new perftools::gputools::Timer(stream->parent()));
-    stream->InitTimer(timer.get()).ThenStartTimer(timer.get());
-  }
-
-  VLOG(1) << "enqueueing executable on stream...";
-  // If the profiling flag isn't enabled, we pass nullptr as the profile to
-  // indicate profiling is not requested.
-  std::unique_ptr<HloExecutionProfile> profile_ptr =
-      module_config().debug_options().xla_hlo_profile() &&
-              hlo_profiling_enabled()
-          ? MakeUnique<HloExecutionProfile>(&hlo_profile_printer(),
-                                            &hlo_profile_index_map())
-          : nullptr;
-
-  auto return_value =
-      ExecuteOnStream(run_options, arguments, profile_ptr.get());
-
-  if (profile != nullptr) {
-    VLOG(1) << "enqueueing 'stop timer' and blocking host until done...";
-    stream->ThenStopTimer(timer.get()).BlockHostUntilDone();
-    VLOG(1) << "done with block-host-until-done";
-
-    // Merge in run-time profile information from execution_profile.
-    profile->MergeFrom(execution_profile());
-
-    // Overall execution time (in nanoseconds) from the executor timer.
-    profile->set_compute_and_transfer_time_ns(timer->Nanoseconds());
-
-    // TODO(b/28123297): On GPU we end up including transfer time in
-    // the compute time this way. Instead, we should get the correct
-    // value by measuring it. Setting the field here at least lets
-    // benchmarks provide *some* value for GPU computations.
-    //
-    // TODO(b/28447609): The value in compute_and_transfer_time_ns is actually
-    // the compute time without the transfer time, so this way we get the
-    // correct compute time. We should instead have the correct value for
-    // compute_and_transfer_time and set compute_time to the compute time.
-    if (profile->compute_time_ns() == 0) {
-      profile->set_compute_time_ns(profile->compute_and_transfer_time_ns());
-    }
-  }
-
-  if (profile_ptr != nullptr) {
-    XLA_LOG_LINES(
-        tensorflow::INFO,
-        profile_ptr->ToString(stream->parent()->GetDeviceDescription()));
-    hlo_graph_dumper::MaybeDumpHloModule(module(), "Service::Execute",
-                                         profile_ptr.get());
-  }
-
-  return return_value;
-}
-
 }  // namespace xla
 
 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_EXECUTABLE_H_
diff --git a/tensorflow/compiler/xla/service/execution_tracker.cc b/tensorflow/compiler/xla/service/execution_tracker.cc
index c225e62e3e11d2d01251b0f92272b0949eff8af1..2f0b9ed2bd98fbea4e67c0a30d5aa41ff6a06979 100644
--- a/tensorflow/compiler/xla/service/execution_tracker.cc
+++ b/tensorflow/compiler/xla/service/execution_tracker.cc
@@ -39,9 +39,7 @@ AsyncExecution::AsyncExecution(Backend* backend,
 
 tensorflow::Status AsyncExecution::BlockUntilDone() const {
   for (auto& stream : streams_) {
-    if (!stream->BlockHostUntilDone()) {
-      return InternalError("failed to block until done");
-    }
+    TF_RETURN_IF_ERROR(stream->BlockHostUntilDone());
   }
   return tensorflow::Status::OK();
 }
diff --git a/tensorflow/compiler/xla/service/flatten_call_graph.cc b/tensorflow/compiler/xla/service/flatten_call_graph.cc
index dfba22a6c4c5cf071c2cd8621643b8da6587ee3b..2b6caa149439a86d6d047605099bc3ff7b295a8e 100644
--- a/tensorflow/compiler/xla/service/flatten_call_graph.cc
+++ b/tensorflow/compiler/xla/service/flatten_call_graph.cc
@@ -26,7 +26,10 @@ namespace xla {
 
 namespace {
 
-// Helper to replace the called computation at a while- or call-instruction.
+// Helper to replace the called computation at a while-, call-, or
+// conditional-instruction. This function replaces exactly one instance of
+// 'computation' with 'new_computation' even if 'instruction' calls
+// 'computation' more than once.
 void ReplaceCalledComputation(HloInstruction* instruction,
                               HloComputation* computation,
                               HloComputation* new_computation) {
@@ -45,6 +48,15 @@ void ReplaceCalledComputation(HloInstruction* instruction,
       instruction->set_to_apply(new_computation);
       break;
     }
+    case HloOpcode::kConditional: {
+      if (computation == instruction->true_computation()) {
+        instruction->set_true_computation(new_computation);
+      } else {
+        CHECK_EQ(computation, instruction->false_computation());
+        instruction->set_false_computation(new_computation);
+      }
+      break;
+    }
     default:
       LOG(FATAL) << "unexpected opcode: "
                  << HloOpcodeString(instruction->opcode());
diff --git a/tensorflow/compiler/xla/service/flatten_call_graph_test.cc b/tensorflow/compiler/xla/service/flatten_call_graph_test.cc
index a68e90b7d009890012f94baa790d911871c9c960..d3854b40de3572a60df1ad99d8a4589f59ad7194 100644
--- a/tensorflow/compiler/xla/service/flatten_call_graph_test.cc
+++ b/tensorflow/compiler/xla/service/flatten_call_graph_test.cc
@@ -223,5 +223,35 @@ TEST_F(FlattenCallGraphTest, FlattenCalls) {
   EXPECT_EQ(1, b_node.caller_callsites().size());
 }
 
+TEST_F(FlattenCallGraphTest, FlattenCallsInConditional) {
+  auto module = CreateNewModule();
+  HloComputation* sub_computation =
+      module->AddEmbeddedComputation(MakeScalarComputation());
+
+  // Create entry computation, which is a conditional that has the same
+  // computation in the true and false branch.
+  HloComputation::Builder builder(TestName());
+  auto pred = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<bool>(true)));
+  auto constant1 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(56.0f)));
+  auto constant2 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(12.0f)));
+  builder.AddInstruction(HloInstruction::CreateConditional(
+      kScalarShape, pred, constant1, sub_computation, constant2,
+      sub_computation));
+  module->AddEntryComputation(builder.Build());
+  EXPECT_EQ(2, module->computation_count());
+
+  TF_ASSERT_OK_AND_ASSIGN(bool result, RunFlattenCallGraph(module.get()));
+  EXPECT_TRUE(result);
+  std::unique_ptr<CallGraph> call_graph = CallGraph::Build(module.get());
+  // The true and false computations must now be different.
+  EXPECT_EQ(3, module->computation_count());
+
+  const CallGraphNode& sub_node = call_graph->GetNode(sub_computation);
+  EXPECT_EQ(1, sub_node.caller_callsites().size());
+}
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/generic_transfer_manager.cc b/tensorflow/compiler/xla/service/generic_transfer_manager.cc
index 74aa77b4f165be76fbc0a8aa1a4a7e90a8e9acec..78dc0ad4fcd167c93f19d0c2b18ea72d666897ef 100644
--- a/tensorflow/compiler/xla/service/generic_transfer_manager.cc
+++ b/tensorflow/compiler/xla/service/generic_transfer_manager.cc
@@ -51,83 +51,7 @@ se::Platform::Id GenericTransferManager::PlatformId() const {
   return platform_id_;
 }
 
-Status GenericTransferManager::TransferLiteralFromDevice(
-    se::StreamExecutor* executor, const se::DeviceMemoryBase& source,
-    const Shape& device_shape, const Shape& literal_shape, Literal* literal) {
-  VLOG(2) << "transferring literal shape from device: "
-          << ShapeUtil::HumanString(literal_shape)
-          << "; device location: " << source.opaque();
-  TF_RET_CHECK(ShapeUtil::Compatible(device_shape, literal_shape));
-
-  // Tuples are a special case and contain one or more shapes inside of them to
-  // an arbitrary nesting depth.
-  if (device_shape.element_type() == TUPLE) {
-    *literal->mutable_shape() = literal_shape;
-    TF_ASSIGN_OR_RETURN(
-        std::vector<se::DeviceMemoryBase> element_buffers,
-        ShallowCopyTupleFromDevice(executor, source, device_shape));
-    TF_RET_CHECK(element_buffers.size() ==
-                 ShapeUtil::TupleElementCount(device_shape));
-    for (int64 i = 0; i < element_buffers.size(); ++i) {
-      const Shape& element_device_shape = device_shape.tuple_shapes(i);
-      const Shape& element_literal_shape = literal_shape.tuple_shapes(i);
-      Literal* element_literal = literal->add_tuple_literals();
-      // Recursively call TransferFromDevice to copy over the data in the
-      // element array.
-      TF_RETURN_IF_ERROR(TransferLiteralFromDevice(
-          executor, element_buffers[i], /*device_shape=*/element_device_shape,
-          /*literal_shape=*/element_literal_shape, element_literal));
-    }
-    return Status::OK();
-  }
-
-  *literal->mutable_shape() = device_shape;
-  literal->Reserve(ShapeUtil::ElementsIn(device_shape));
-  TF_RETURN_IF_ERROR(TransferBufferFromDevice(
-      executor, source, /*size=*/ShapeUtil::ByteSizeOf(device_shape),
-      /*destination=*/literal->MutableInternalData()));
-  if (!ShapeUtil::Equal(literal_shape, device_shape)) {
-    *literal = std::move(*literal->Relayout(literal_shape.layout()));
-  }
-  TF_RET_CHECK(ShapeUtil::Equal(literal_shape, literal->shape()));
-  return Status::OK();
-}
-
-StatusOr<std::vector<se::DeviceMemoryBase>>
-GenericTransferManager::ShallowCopyTupleFromDevice(
-    se::StreamExecutor* executor, const se::DeviceMemoryBase& source,
-    const Shape& shape) {
-  TF_RET_CHECK(ShapeUtil::IsTuple(shape));
-
-  // For devices which use the GenericTransferManager, a tuple is stored as an
-  // array of pointers to buffers. Copy the contents of the tuple buffer into
-  // a vector of void* pointers.
-  std::vector<void*> element_pointers(ShapeUtil::TupleElementCount(shape),
-                                      nullptr);
-  int64 tuple_size = ShapeUtil::ByteSizeOf(shape, pointer_size_);
-  auto copy_status = executor->SynchronousMemcpyD2H(source, tuple_size,
-                                                    element_pointers.data());
-  if (!copy_status.ok()) {
-    return AddStatus(
-        Status(static_cast<tensorflow::error::Code>(copy_status.code()),
-               copy_status.error_message()),
-        "failed transfer of tuple buffer " + ShapeUtil::HumanString(shape));
-  }
-
-  // Create a DeviceMemoryBase from each void* pointer.
-  std::vector<se::DeviceMemoryBase> destination;
-  for (size_t i = 0; i < element_pointers.size(); ++i) {
-    if (element_pointers[i] == nullptr &&
-        !ShapeUtil::HasZeroElements(shape.tuple_shapes(i))) {
-      return FailedPrecondition("tuple contains nullptr at element %lu", i);
-    }
-    destination.emplace_back(element_pointers[i],
-                             GetByteSizeRequirement(shape.tuple_shapes(i)));
-  }
-  return std::move(destination);
-}
-
-Status GenericTransferManager::WriteTuplePointersToDevice(
+Status GenericTransferManager::WriteSingleTupleIndexTable(
     perftools::gputools::StreamExecutor* executor,
     tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> elements,
     const Shape& shape, perftools::gputools::DeviceMemoryBase* region) {
@@ -145,16 +69,19 @@ StatusOr<std::unique_ptr<Literal>>
 GenericTransferManager::TransferLiteralFromDevice(
     se::StreamExecutor* executor, const ShapedBuffer& device_buffer) {
   VLOG(2) << "transferring literal from device ordinal "
-          << executor->device_ordinal() << "; device shape: "
-          << ShapeUtil::HumanStringWithLayout(device_buffer.shape())
-          << "; opaque: " << device_buffer.buffer(/*index=*/{}).opaque();
+          << executor->device_ordinal() << "; device buffer: " << device_buffer;
   TF_RET_CHECK(executor->device_ordinal() == device_buffer.device_ordinal());
 
+  // The on-host and on-device shape should always be the same for the generic
+  // transfer manager.
+  TF_RET_CHECK(ShapeUtil::Equal(device_buffer.on_device_shape(),
+                                device_buffer.on_host_shape()));
+
   std::unique_ptr<Literal> literal =
-      Literal::CreateFromShape(device_buffer.shape());
+      Literal::CreateFromShape(device_buffer.on_host_shape());
 
   TF_RETURN_IF_ERROR(ShapeUtil::ForEachSubshapeWithStatus(
-      device_buffer.shape(),
+      device_buffer.on_host_shape(),
       [&](const Shape& subshape, const ShapeIndex& index) -> Status {
         if (!ShapeUtil::IsTuple(subshape)) {
           TF_RETURN_IF_ERROR(TransferBufferFromDevice(
@@ -162,7 +89,7 @@ GenericTransferManager::TransferLiteralFromDevice(
               /*source=*/device_buffer.buffer(index),
               /*size=*/GetByteSizeRequirement(subshape),
               /*destination=*/
-              literal->GetSubliteral(index).MutableInternalData()));
+              literal->untyped_data(index)));
         }
 
         return Status::OK();
@@ -175,33 +102,39 @@ Status GenericTransferManager::TransferLiteralToDevice(
     const ShapedBuffer& device_buffer) {
   const Shape& shape = literal.shape();
   VLOG(2) << "transferring literal shape to device: "
-          << ShapeUtil::HumanString(shape) << "; device location: "
-          << device_buffer.buffer(/*index=*/{}).opaque();
+          << ShapeUtil::HumanString(shape)
+          << "; device buffer: " << device_buffer;
+
+  // The on-host and on-device shape should always be the same for the generic
+  // transfer manager.
+  TF_RET_CHECK(ShapeUtil::Equal(device_buffer.on_device_shape(),
+                                device_buffer.on_host_shape()));
 
-  TF_RET_CHECK(ShapeUtil::Compatible(literal.shape(), device_buffer.shape()));
+  TF_RET_CHECK(
+      ShapeUtil::Compatible(literal.shape(), device_buffer.on_host_shape()));
   TF_RET_CHECK(executor->device_ordinal() == device_buffer.device_ordinal());
 
   TF_RETURN_IF_ERROR(WriteTupleIndexTables(executor, device_buffer));
 
   return ShapeUtil::ForEachSubshapeWithStatus(
-      device_buffer.shape(),
+      device_buffer.on_host_shape(),
       [&](const Shape& device_subshape, const ShapeIndex& index) -> Status {
         se::DeviceMemoryBase device_memory = device_buffer.buffer(index);
         if (ShapeUtil::IsArray(device_subshape)) {
           TF_RET_CHECK(GetByteSizeRequirement(device_subshape) ==
                        device_memory.size());
           // Element is array-shaped: transfer array data to device buffer.
-          const Literal& subliteral = literal.GetSubliteral(index);
+          const auto subliteral = LiteralView::Create(literal, index);
           std::unique_ptr<Literal> relayed_out_literal;
           const void* source;
           if (LayoutUtil::Equal(device_subshape.layout(),
                                 subliteral.shape().layout())) {
-            source = subliteral.InternalData();
+            source = subliteral.untyped_data();
           } else {
             // Relayout data before transferring.
             relayed_out_literal = subliteral.Relayout(device_subshape.layout(),
                                                       /*shape_index=*/{});
-            source = relayed_out_literal->InternalData();
+            source = relayed_out_literal->untyped_data();
           }
           return TransferBufferToDevice(
               executor,
@@ -212,33 +145,6 @@ Status GenericTransferManager::TransferLiteralToDevice(
       });
 }
 
-Status GenericTransferManager::TransferLiteralToDevice(
-    se::StreamExecutor* executor, const Literal& literal,
-    se::DeviceMemoryBase* destination) {
-  const Shape& shape = literal.shape();
-  VLOG(2) << "transferring literal shape to device: "
-          << ShapeUtil::HumanString(shape)
-          << "; device location: " << destination->opaque();
-
-  if (ShapeUtil::IsTuple(literal.shape())) {
-    std::vector<void*> tuple_elements_on_device;
-    for (const Literal& tuple_element : literal.tuple_literals()) {
-      se::DeviceMemoryBase allocation = executor->AllocateArray<uint8>(
-          GetByteSizeRequirement(tuple_element.shape()));
-      TF_RETURN_IF_ERROR(
-          TransferLiteralToDevice(executor, tuple_element, &allocation));
-      tuple_elements_on_device.push_back(allocation.opaque());
-    }
-    return TransferBufferToDevice(
-        executor, tuple_elements_on_device.size() * sizeof(void*),
-        tuple_elements_on_device.data(), destination);
-  }
-
-  return TransferBufferToDevice(executor,
-                                /*size=*/GetByteSizeRequirement(shape),
-                                /*source=*/literal.InternalData(), destination);
-}
-
 Status GenericTransferManager::TransferLiteralToInfeed(
     se::StreamExecutor* executor, const Literal& literal) {
   return Unimplemented("Generic transfer to Infeed");
diff --git a/tensorflow/compiler/xla/service/generic_transfer_manager.h b/tensorflow/compiler/xla/service/generic_transfer_manager.h
index 50dca6aec5012f0b02cb54846b622f008600e48e..63a7c820cf4e5fbbdf870086a4fb5316ac50d10b 100644
--- a/tensorflow/compiler/xla/service/generic_transfer_manager.h
+++ b/tensorflow/compiler/xla/service/generic_transfer_manager.h
@@ -42,16 +42,6 @@ class GenericTransferManager : public TransferManager {
 
   perftools::gputools::Platform::Id PlatformId() const override;
 
-  Status TransferLiteralFromDevice(
-      perftools::gputools::StreamExecutor* executor,
-      const perftools::gputools::DeviceMemoryBase& source,
-      const Shape& device_shape, const Shape& literal_shape,
-      Literal* literal) override;
-
-  Status TransferLiteralToDevice(
-      perftools::gputools::StreamExecutor* executor, const Literal& literal,
-      perftools::gputools::DeviceMemoryBase* destination) override;
-
   StatusOr<std::unique_ptr<Literal>> TransferLiteralFromDevice(
       perftools::gputools::StreamExecutor* executor,
       const ShapedBuffer& device_buffer) override;
@@ -62,9 +52,6 @@ class GenericTransferManager : public TransferManager {
 
   Status TransferLiteralToInfeed(perftools::gputools::StreamExecutor* executor,
                                  const Literal& literal) override;
-  Status TransferBufferToInfeed(perftools::gputools::StreamExecutor* executor,
-                                int64 size, const void* source) override;
-
   Status TransferLiteralFromOutfeed(
       perftools::gputools::StreamExecutor* executor, const Shape& literal_shape,
       Literal* literal) override;
@@ -73,16 +60,13 @@ class GenericTransferManager : public TransferManager {
       tensorflow::gtl::ArraySlice<perftools::gputools::StreamExecutor*>
           executors) override;
 
-  StatusOr<std::vector<perftools::gputools::DeviceMemoryBase>>
-  ShallowCopyTupleFromDevice(
-      perftools::gputools::StreamExecutor* executor,
-      const perftools::gputools::DeviceMemoryBase& source,
-      const Shape& shape) override;
-
   int64 GetByteSizeRequirement(const Shape& shape) const override;
 
  protected:
-  Status WriteTuplePointersToDevice(
+  Status TransferBufferToInfeed(perftools::gputools::StreamExecutor* executor,
+                                int64 size, const void* source) override;
+
+  Status WriteSingleTupleIndexTable(
       perftools::gputools::StreamExecutor* executor,
       tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
           elements,
diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index e57558b5788965214cadf5eab1024860f1a39ca1..d7ca0f6846834ae77569930325d3fc6b9fd5cca8 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -23,6 +23,15 @@ filegroup(
 
 load("//tensorflow:tensorflow.bzl", "tf_cc_test")
 
+cc_library(
+    name = "gpu_constants",
+    srcs = ["gpu_constants.cc"],
+    hdrs = ["gpu_constants.h"],
+    deps = [
+        "//tensorflow/compiler/xla:types",
+    ],
+)
+
 cc_library(
     name = "partition_assignment",
     srcs = [
@@ -123,6 +132,7 @@ cc_library(
     ],
     deps = [
         ":elemental_ir_emitter",
+        ":gpu_constants",
         ":gpu_executable",
         ":hlo_to_ir_bindings",
         ":ir_emission_utils",
@@ -203,6 +213,7 @@ cc_library(
     srcs = ["buffer_allocations.cc"],
     hdrs = ["buffer_allocations.h"],
     deps = [
+        ":gpu_constants",
         "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:types",
@@ -219,6 +230,8 @@ cc_library(
     srcs = [
         "convolution_thunk.cc",
         "copy_thunk.cc",
+        "cudnn_batchnorm_thunk.cc",
+        "fft_thunk.cc",
         "for_thunk.cc",
         "gemm_thunk.cc",
         "gpu_executable.cc",
@@ -232,6 +245,8 @@ cc_library(
     hdrs = [
         "convolution_thunk.h",
         "copy_thunk.h",
+        "cudnn_batchnorm_thunk.h",
+        "fft_thunk.h",
         "for_thunk.h",
         "gemm_thunk.h",
         "gpu_executable.h",
@@ -246,6 +261,7 @@ cc_library(
     deps = [
         ":buffer_allocations",
         ":infeed_manager",
+        ":ir_emission_utils",
         ":partition_assignment",
         ":stream_assignment",
         "//tensorflow/compiler/xla:array2d",
@@ -269,6 +285,7 @@ cc_library(
         "//tensorflow/core:stream_executor_no_cuda",
         "//tensorflow/core/platform/default/build_config:cublas_plugin",
         "//tensorflow/core/platform/default/build_config:cudnn_plugin",
+        "//tensorflow/core/platform/default/build_config:cufft_plugin",
         "//tensorflow/core/platform/default/build_config:stream_executor_cuda",  # build_cleaner: keep
     ],
 )
@@ -429,13 +446,15 @@ cc_library(
     deps = [
         ":convolution_folding",
         ":fusion_merger",
+        ":gpu_constants",
         ":gpu_copy_insertion",
         ":gpu_executable",
+        ":gpu_hlo_support_checker",
+        ":gpu_layout_assignment",
         ":hlo_schedule",
         ":instruction_fusion",
         ":ir_emission_utils",
         ":ir_emitter",
-        ":layout_assignment",
         ":pad_insertion",
         ":partition_assignment",
         ":stream_assignment",
@@ -445,10 +464,11 @@ cc_library(
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla/service:algebraic_simplifier",
-        "//tensorflow/compiler/xla/service:batchnorm_rewriter",
+        "//tensorflow/compiler/xla/service:batchnorm_expander",
         "//tensorflow/compiler/xla/service:buffer_assignment",
         "//tensorflow/compiler/xla/service:buffer_liveness",
         "//tensorflow/compiler/xla/service:call_inliner",
+        "//tensorflow/compiler/xla/service:dot_decomposer",
         "//tensorflow/compiler/xla/service:executable",
         "//tensorflow/compiler/xla/service:flatten_call_graph",
         "//tensorflow/compiler/xla/service:hlo",
@@ -467,11 +487,14 @@ cc_library(
         "//tensorflow/compiler/xla/service:transpose_folding",
         "//tensorflow/compiler/xla/service:tuple_simplifier",
         "//tensorflow/compiler/xla/service:while_loop_simplifier",
+        "//tensorflow/compiler/xla/service:zero_sized_hlo_elimination",
+        "//tensorflow/compiler/xla/service/gpu:cudnn_batchnorm_rewriter",
         "//tensorflow/compiler/xla/service/gpu/llvm_gpu_backend",
         "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
         "//tensorflow/core:cuda_libdevice_path",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "//tensorflow/core:regexp_internal",
         "//tensorflow/core:stream_executor_no_cuda",
         "@llvm//:core",
         "@llvm//:support",
@@ -479,6 +502,19 @@ cc_library(
     alwayslink = True,  # Contains compiler registration
 )
 
+cc_library(
+    name = "cudnn_batchnorm_rewriter",
+    srcs = ["cudnn_batchnorm_rewriter.cc"],
+    hdrs = ["cudnn_batchnorm_rewriter.h"],
+    deps = [
+        ":ir_emission_utils",
+        "//tensorflow/compiler/xla:literal_util",
+        "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service:hlo_pass",
+        "@llvm//:core",
+    ],
+)
+
 cc_library(
     name = "infeed_manager",
     srcs = ["infeed_manager.cc"],
@@ -492,9 +528,9 @@ cc_library(
 )
 
 cc_library(
-    name = "layout_assignment",
-    srcs = ["layout_assignment.cc"],
-    hdrs = ["layout_assignment.h"],
+    name = "gpu_layout_assignment",
+    srcs = ["gpu_layout_assignment.cc"],
+    hdrs = ["gpu_layout_assignment.h"],
     deps = [
         ":ir_emission_utils",
         "//tensorflow/compiler/xla:shape_util",
@@ -508,17 +544,18 @@ cc_library(
 )
 
 tf_cc_test(
-    name = "layout_assignment_test",
-    srcs = ["layout_assignment_test.cc"],
+    name = "gpu_layout_assignment_test",
+    srcs = ["gpu_layout_assignment_test.cc"],
     deps = [
-        ":layout_assignment",
+        ":gpu_layout_assignment",
+        ":ir_emission_utils",
         "//tensorflow/compiler/xla:shape_layout",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/compiler/xla/service:computation_layout",
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
-        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",  # build_cleaner: keep
     ],
 )
 
@@ -586,6 +623,32 @@ tf_cc_test(
     ],
 )
 
+cc_library(
+    name = "gpu_hlo_support_checker",
+    srcs = ["gpu_hlo_support_checker.cc"],
+    hdrs = ["gpu_hlo_support_checker.h"],
+    deps = [
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/service:hlo_pass",
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_cc_test(
+    name = "gpu_hlo_support_checker_test",
+    srcs = ["gpu_hlo_support_checker_test.cc"],
+    deps = [
+        ":gpu_hlo_support_checker",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+    ],
+)
+
 # -----------------------------------------------------------------------------
 
 filegroup(
diff --git a/tensorflow/compiler/xla/service/gpu/buffer_allocations.cc b/tensorflow/compiler/xla/service/gpu/buffer_allocations.cc
index 9fdf717b5d463010e2709b6209c070f25555de72..ed78fef4113bd9f7048ca3c8c2d4e38c5ec4762a 100644
--- a/tensorflow/compiler/xla/service/gpu/buffer_allocations.cc
+++ b/tensorflow/compiler/xla/service/gpu/buffer_allocations.cc
@@ -19,6 +19,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/ptr_util.h"
+#include "tensorflow/compiler/xla/service/gpu/gpu_constants.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
@@ -48,6 +49,15 @@ StatusOr<std::unique_ptr<BufferAllocations>> BufferAllocations::Builder::Build(
     // If buffer #i's address is already registered (e.g. external arguments or
     // result buffers), use that registered buffer.
     if (registered_buffers_.count(i)) {
+      se::DeviceMemoryBase address = FindOrDie(registered_buffers_, i);
+      if (reinterpret_cast<uintptr_t>(address.opaque()) %
+              kCudaMallocAlignBytes !=
+          0) {
+        return InternalError(
+            "Address of registered buffer %lld must be a multiple of %llx, but "
+            "was %p",
+            i, kCudaMallocAlignBytes, address.opaque());
+      }
       buffer_allocations->SetBuffer(i, FindOrDie(registered_buffers_, i));
       continue;
     }
@@ -67,6 +77,14 @@ StatusOr<std::unique_ptr<BufferAllocations>> BufferAllocations::Builder::Build(
               tensorflow::strings::HumanReadableNumBytes(buffer_size).c_str(),
               i);
         }
+        if (reinterpret_cast<uintptr_t>(buffer_address.opaque()) %
+                kCudaMallocAlignBytes !=
+            0) {
+          return InternalError(
+              "Address returned by memory_allocator->Allocate must be a "
+              "multiple of %llx, but was %p",
+              kCudaMallocAlignBytes, buffer_address.opaque());
+        }
       }
       buffer_allocations->SetBuffer(i, buffer_address);
       if (allocation.IsPreallocatedTempBuffer()) {
diff --git a/tensorflow/compiler/xla/service/gpu/convolution_folding.cc b/tensorflow/compiler/xla/service/gpu/convolution_folding.cc
index 828ae675d7ba60b4cee1c3f5312b069263d5a814..b0626ca3bc9f843e513d4727932f0e2d5fa37748 100644
--- a/tensorflow/compiler/xla/service/gpu/convolution_folding.cc
+++ b/tensorflow/compiler/xla/service/gpu/convolution_folding.cc
@@ -55,19 +55,7 @@ MatchBackwardFilter(HloInstruction* conv) {
   //               v       v
   //              Convolution
   //                 conv
-  //                   |
-  //                   v
-  //               Transpose (optional if identity transposition)
   CHECK_EQ(HloOpcode::kConvolution, conv->opcode());
-  // If the forward convolution is followed by a transpose, we can fuse the
-  // transpose into the backward convolution as well.
-  HloInstruction* transpose = nullptr;
-  if (conv->user_count() == 1) {
-    HloInstruction* single_user = *conv->users().begin();
-    if (single_user->opcode() == HloOpcode::kTranspose) {
-      transpose = single_user;
-    }
-  }
 
   // Step 2: match paddings and dimension numbers of the forward convolution.
   const ConvolutionDimensionNumbers& conv_dnums =
@@ -75,6 +63,9 @@ MatchBackwardFilter(HloInstruction* conv) {
   auto input_batch_dim = conv_dnums.input_batch_dimension();
   auto input_feature_dim = conv_dnums.input_feature_dimension();
   auto input_spatial_dims = conv_dnums.input_spatial_dimensions();
+  auto kernel_input_feature_dim = conv_dnums.kernel_input_feature_dimension();
+  auto kernel_output_feature_dim = conv_dnums.kernel_output_feature_dimension();
+  auto kernel_spatial_dims = conv_dnums.kernel_spatial_dimensions();
   auto output_batch_dim = conv_dnums.output_batch_dimension();
   auto output_feature_dim = conv_dnums.output_feature_dimension();
   auto output_spatial_dims = conv_dnums.output_spatial_dimensions();
@@ -96,9 +87,14 @@ MatchBackwardFilter(HloInstruction* conv) {
       VLOG(1) << "Padding low should be non-negative.";
       return no_match_result;
     }
+    if (window_dim.window_reversal()) {
+      VLOG(1) << "Window reversal field not supported";
+      return no_match_result;
+    }
     // Padding high will be checked in Step 3.
   }
-  if (transpose == nullptr && !window_util::HasWindowDilation(conv->window())) {
+  if (input_batch_dim == output_batch_dim &&
+      !window_util::HasWindowDilation(conv->window())) {
     VLOG(1) << conv->ToString()
             << " is a regular forward convolution. No need "
                "to fold it to a backward filter convolution.";
@@ -169,53 +165,32 @@ MatchBackwardFilter(HloInstruction* conv) {
     }
   }
 
-  // To make future HLO passes easier, we canonicalize the fused expression by
-  // adding an identity transposition if it's omitted in the pattern.
-  if (transpose == nullptr) {
-    // Create an identity transposition with the same rank as the forward
-    // convolution.
-    HloComputation* parent_computation = conv->parent();
-    std::vector<int64> transpose_dimensions(ShapeUtil::Rank(conv->shape()));
-    std::iota(transpose_dimensions.begin(), transpose_dimensions.end(), 0);
-    transpose =
-        parent_computation->AddInstruction(HloInstruction::CreateTranspose(
-            conv->shape(), conv, transpose_dimensions));
-    TF_CHECK_OK(conv->ReplaceAllUsesWith(transpose));
-  }
-
   // Restore the dimension numbers of the backward convolution from the forward
   // convolution. The two activation dimensions are reversed (batch and
   // feature).
   ConvolutionDimensionNumbers backward_conv_dnums;
   backward_conv_dnums.set_input_batch_dimension(input_feature_dim);
   backward_conv_dnums.set_input_feature_dimension(input_batch_dim);
-  backward_conv_dnums.set_output_batch_dimension(output_feature_dim);
-  backward_conv_dnums.set_output_feature_dimension(output_batch_dim);
   for (int i = 0; i < input_spatial_dims.size(); ++i) {
     backward_conv_dnums.add_input_spatial_dimensions(input_spatial_dims[i]);
   }
-  for (int i = 0; i < output_spatial_dims.size(); ++i) {
-    backward_conv_dnums.add_output_spatial_dimensions(output_spatial_dims[i]);
+  backward_conv_dnums.set_output_batch_dimension(kernel_input_feature_dim);
+  backward_conv_dnums.set_output_feature_dimension(kernel_output_feature_dim);
+  for (int i = 0; i < kernel_spatial_dims.size(); ++i) {
+    backward_conv_dnums.add_output_spatial_dimensions(kernel_spatial_dims[i]);
   }
   // The dimension numbering of the output of the forward convolution (before
   // transposition) is the same as that of the activations (according to the
   // semantics of kConvolution). The batch dimension of the activations should
   // be treated as the input feature dimension, and the feature dimension should
   // be treated as the output feature.
-  //
-  // The output of the forward convolution needs to be transposed to fit into
-  // the dimension numbering of the weight gradients. This transposition maps
-  // dimension i to PositionInContainer(transpose->dimensions(), i).
-  backward_conv_dnums.set_kernel_input_feature_dimension(
-      PositionInContainer(transpose->dimensions(), output_batch_dim));
-  backward_conv_dnums.set_kernel_output_feature_dimension(
-      PositionInContainer(transpose->dimensions(), output_feature_dim));
+  backward_conv_dnums.set_kernel_input_feature_dimension(output_batch_dim);
+  backward_conv_dnums.set_kernel_output_feature_dimension(output_feature_dim);
   for (int i = 0; i < output_spatial_dims.size(); ++i) {
-    backward_conv_dnums.add_kernel_spatial_dimensions(
-        PositionInContainer(transpose->dimensions(), output_spatial_dims[i]));
+    backward_conv_dnums.add_kernel_spatial_dimensions(output_spatial_dims[i]);
   }
 
-  return std::make_tuple(true, std::vector<HloInstruction*>({transpose, conv}),
+  return std::make_tuple(true, std::vector<HloInstruction*>({conv}),
                          backward_conv_window, backward_conv_dnums);
 }
 
@@ -275,6 +250,10 @@ MatchBackwardInput(HloInstruction* conv) {
               << " should have no window dilation.";
       return no_match_result;
     }
+    if (window_dim.window_reversal()) {
+      VLOG(1) << "Window reversal field not supported";
+      return no_match_result;
+    }
   }
 
   const auto& input_spatial_dims = dnums.input_spatial_dimensions();
diff --git a/tensorflow/compiler/xla/service/gpu/convolution_folding_test.cc b/tensorflow/compiler/xla/service/gpu/convolution_folding_test.cc
index 112c496e1f6bd17f89ac389ccf0256846dfa1971..34e6bdb117d47a3d7e1eb3bae5806e130e94ea79 100644
--- a/tensorflow/compiler/xla/service/gpu/convolution_folding_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/convolution_folding_test.cc
@@ -46,18 +46,18 @@ class ConvolutionFoldingTest : public HloTestBase {
     //
     // TODO(jingyue): Add more tests on NCHW input order which TF also supports.
     tf_default_dnums_for_backward_filter_.set_input_batch_dimension(3);
-    tf_default_dnums_for_backward_filter_.set_output_batch_dimension(3);
     tf_default_dnums_for_backward_filter_.set_input_feature_dimension(0);
-    tf_default_dnums_for_backward_filter_.set_output_feature_dimension(0);
     tf_default_dnums_for_backward_filter_.add_input_spatial_dimensions(1);
-    tf_default_dnums_for_backward_filter_.add_output_spatial_dimensions(1);
     tf_default_dnums_for_backward_filter_.add_input_spatial_dimensions(2);
-    tf_default_dnums_for_backward_filter_.add_output_spatial_dimensions(2);
     tf_default_dnums_for_backward_filter_.set_kernel_input_feature_dimension(0);
     tf_default_dnums_for_backward_filter_.set_kernel_output_feature_dimension(
         3);
     tf_default_dnums_for_backward_filter_.add_kernel_spatial_dimensions(1);
     tf_default_dnums_for_backward_filter_.add_kernel_spatial_dimensions(2);
+    tf_default_dnums_for_backward_filter_.add_output_spatial_dimensions(0);
+    tf_default_dnums_for_backward_filter_.add_output_spatial_dimensions(1);
+    tf_default_dnums_for_backward_filter_.set_output_batch_dimension(2);
+    tf_default_dnums_for_backward_filter_.set_output_feature_dimension(3);
 
     tf_default_dnums_for_backward_input_.set_input_batch_dimension(0);
     tf_default_dnums_for_backward_input_.set_output_batch_dimension(0);
@@ -86,7 +86,7 @@ class ConvolutionFoldingTest : public HloTestBase {
   ConvolutionDimensionNumbers tf_default_dnums_for_backward_input_;
 };
 
-TEST_F(ConvolutionFoldingTest, BackwardFilterConvolveWithoutTranspose) {
+TEST_F(ConvolutionFoldingTest, BackwardFilterConvolve) {
   HloComputation::Builder builder(TestName());
   HloInstruction* activations =
       builder.AddInstruction(HloInstruction::CreateParameter(
@@ -136,7 +136,7 @@ TEST_F(ConvolutionFoldingTest,
 
   auto module = CreateNewModule();
   module->AddEntryComputation(builder.Build());
-  EXPECT_FALSE(FoldConvolution(module.get()));
+  EXPECT_TRUE(FoldConvolution(module.get()));
 }
 
 // Extracted from block35 training.
@@ -155,13 +155,9 @@ TEST_F(ConvolutionFoldingTest, BackwardFilterConvolveWithPaddedActivations) {
     conv_window.mutable_dimensions(i)->set_padding_low(1);
     conv_window.mutable_dimensions(i)->set_padding_high(1);
   }
-  HloInstruction* convolution =
-      builder.AddInstruction(HloInstruction::CreateConvolve(
-          ShapeUtil::MakeShape(F32, {32, 3, 3, 32}), activations, gradients,
-          conv_window, tf_default_dnums_for_backward_filter_));
-
-  builder.AddInstruction(HloInstruction::CreateTranspose(
-      ShapeUtil::MakeShape(F32, {3, 3, 32, 32}), convolution, {1, 2, 3, 0}));
+  builder.AddInstruction(HloInstruction::CreateConvolve(
+      ShapeUtil::MakeShape(F32, {32, 3, 3, 32}), activations, gradients,
+      conv_window, tf_default_dnums_for_backward_filter_));
 
   auto module = CreateNewModule();
   HloComputation* entry_computation =
@@ -189,13 +185,9 @@ TEST_F(ConvolutionFoldingTest, BackwardFilterConvolveWithPaddedGradients) {
     conv_window.mutable_dimensions(i)->set_padding_high(-1);
     conv_window.mutable_dimensions(i)->set_window_dilation(2);
   }
-  HloInstruction* convolution =
-      builder.AddInstruction(HloInstruction::CreateConvolve(
-          ShapeUtil::MakeShape(F32, {320, 3, 3, 192}), activations, gradients,
-          conv_window, tf_default_dnums_for_backward_filter_));
-
-  builder.AddInstruction(HloInstruction::CreateTranspose(
-      ShapeUtil::MakeShape(F32, {3, 3, 192, 320}), convolution, {1, 2, 3, 0}));
+  builder.AddInstruction(HloInstruction::CreateConvolve(
+      ShapeUtil::MakeShape(F32, {320, 3, 3, 192}), activations, gradients,
+      conv_window, tf_default_dnums_for_backward_filter_));
 
   auto module = CreateNewModule();
   HloComputation* entry_computation =
@@ -222,13 +214,9 @@ TEST_F(ConvolutionFoldingTest, BackwardFilterConvolveWithUnevenPadding) {
     // Uneven padding: padding_low=0, padding_high=1
     conv_window.mutable_dimensions(i)->set_padding_high(1);
   }
-  HloInstruction* convolution =
-      builder.AddInstruction(HloInstruction::CreateConvolve(
-          ShapeUtil::MakeShape(F32, {32, 2, 2, 32}), activations, gradients,
-          conv_window, tf_default_dnums_for_backward_filter_));
-
-  builder.AddInstruction(HloInstruction::CreateTranspose(
-      ShapeUtil::MakeShape(F32, {2, 2, 32, 32}), convolution, {1, 2, 3, 0}));
+  builder.AddInstruction(HloInstruction::CreateConvolve(
+      ShapeUtil::MakeShape(F32, {32, 2, 2, 32}), activations, gradients,
+      conv_window, tf_default_dnums_for_backward_filter_));
 
   auto module = CreateNewModule();
   HloComputation* entry_computation =
diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc
index 037eec8ef59e1aeccdfc43dbb5c1a852403780d1..899cc5c83b99f1bb6154f883ca17871863e1f457 100644
--- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc
+++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc
@@ -314,7 +314,9 @@ tensorflow::Status ConvolutionThunk::ConvolveWithTune(
     const ConvolutionDescriptor& convolution_descriptor,
     const BufferAllocations& buffer_allocations, se::Stream* stream) {
   // TODO(b/29126320): Try cudnn v5's new auto-tuner when it's rolled out.
-  if (best_algorithm_.algorithm().is_default()) {
+  if (!best_algorithm_.has_value()) {
+    best_algorithm_.emplace();
+
     // Auto-tuning either is disabled or only happens in the first run of this
     // function.
     VLOG(2) << "Profiling for best convolution algorithm used for "
@@ -363,35 +365,35 @@ tensorflow::Status ConvolutionThunk::ConvolveWithTune(
     }
 
     if (best_result.is_valid()) {
-      best_algorithm_.set_algorithm(best_result.algorithm());
+      best_algorithm_->set_algorithm(best_result.algorithm());
     } else {
       LOG(ERROR) << "No convolution algorithm works with profiling. Fall back "
                     "to the default algorithm.";
-      best_algorithm_.set_algorithm(AlgorithmDesc());
+      best_algorithm_->set_algorithm(AlgorithmDesc());
     }
 
     if (best_result_without_scratch.is_valid()) {
-      best_algorithm_.set_algorithm_no_scratch(
+      best_algorithm_->set_algorithm_no_scratch(
           best_result_without_scratch.algorithm());
     } else {
       LOG(ERROR) << "No convolution algorithm without scratch works with "
                     "profiling. Fall back "
                     "to the default algorithm.";
-      best_algorithm_.set_algorithm_no_scratch(AlgorithmDesc());
+      best_algorithm_->set_algorithm_no_scratch(AlgorithmDesc());
     }
   }
 
   {
     VLOG(2) << "Using convolution algorithm ("
-            << AlgorithmToString(best_algorithm_.algorithm()) << ", "
-            << AlgorithmToString(best_algorithm_.algorithm_no_scratch())
+            << AlgorithmToString(best_algorithm_->algorithm()) << ", "
+            << AlgorithmToString(best_algorithm_->algorithm_no_scratch())
             << ") for ConvolutionThunk: " << this;
     ConvolveScratchAllocator scratch_allocator(
         buffer_allocations.device_ordinal(),
         buffer_allocations.memory_allocator());
     return Convolve(input_descriptor, input_data, filter_descriptor,
                     filter_data, output_descriptor, output_data,
-                    convolution_descriptor, best_algorithm_, stream,
+                    convolution_descriptor, *best_algorithm_, stream,
                     &scratch_allocator, nullptr);
   }
 }
diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.h b/tensorflow/compiler/xla/service/gpu/convolution_thunk.h
index 5ac5db2f04b6796c6013a7f87dd40b485233baa6..46c94d0bf1e486fb91e63109efb8e4ba778c4120 100644
--- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.h
+++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.h
@@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/gtl/optional.h"
 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
 
 namespace xla {
@@ -87,6 +88,34 @@ class ConvolutionThunk : public Thunk {
       const BufferAllocations& buffer_allocations,
       perftools::gputools::Stream* stream) override;
 
+  // Returns true if the next run of ExecuteOnStream will do autotuning.  If so,
+  // we want the GPU to be quiescent during autotuning, so as not to introduce
+  // noise in our results.
+  bool ShouldHaltAllActivityBeforeRunning(
+      perftools::gputools::Stream*) override {
+    return !best_algorithm_.has_value();
+  }
+
+  // Return true if scratch memory is needed to execute the thunk, that is
+  // either the best algorithm hasn't been chosen or the best algorithm is not
+  // the same as the no-scratch algorithm. This is because that the execution
+  // of the thunk is asynchronous, and the scratch allocator goes out of
+  // scope before the thunk finishes execution. Returning true tells the stream
+  // executor to make future thunks wait for this thunk to avoid reusing the
+  // deallocated scratch memory until this thunk is done with it.
+  bool ShouldBlockFutureThunks() {
+    if (!best_algorithm_.has_value()) {
+      return true;
+    }
+
+    const perftools::gputools::dnn::AlgorithmDesc& best_alg =
+        best_algorithm_->algorithm();
+    const perftools::gputools::dnn::AlgorithmDesc& no_scratch_best_alg =
+        best_algorithm_->algorithm_no_scratch();
+    return (!best_alg.is_default() || !no_scratch_best_alg.is_default() ||
+            !(best_alg == no_scratch_best_alg));
+  }
+
  private:
   tensorflow::Status ConvolveWithTune(
       const perftools::gputools::dnn::BatchDescriptor& input_descriptor,
@@ -121,9 +150,10 @@ class ConvolutionThunk : public Thunk {
 
   // Fastest cuDNN convolution algorithm for this thunk learned from
   // auto-tuning. If auto-tuning is disabled or failed, best_algorithm_ is set
-  // to the default value indicating cuDNN's convolution will choose
-  // the best algorithm from some heuristics based on its parameters.
-  perftools::gputools::dnn::AlgorithmConfig best_algorithm_;
+  // to the default value, indicating cuDNN's convolution will choose the best
+  // algorithm from some heuristics based on its parameters.
+  tensorflow::gtl::optional<perftools::gputools::dnn::AlgorithmConfig>
+      best_algorithm_;
 
   const ConvolutionKind convolution_kind_;
 
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.cc
new file mode 100644
index 0000000000000000000000000000000000000000..db6924c742e4a949a3e939b6d6659e92c2d1e312
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.cc
@@ -0,0 +1,219 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.h"
+#include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
+
+namespace xla {
+namespace gpu {
+namespace {
+
+class Visitor : public DfsHloVisitorWithDefault {
+ public:
+  explicit Visitor(HloComputation* computation) : computation_(computation) {}
+
+  static bool Run(HloComputation* computation) {
+    Visitor visitor(computation);
+    TF_CHECK_OK(computation->Accept(&visitor));
+    return visitor.changed_;
+  }
+
+  Status DefaultAction(HloInstruction* /*hlo_instruction*/) override {
+    return Status::OK();
+  }
+
+  Status HandleBatchNormInference(HloInstruction* batch_norm) override;
+  Status HandleBatchNormTraining(HloInstruction* batch_norm) override;
+  Status HandleBatchNormGrad(HloInstruction* batch_norm) override;
+
+ private:
+  bool changed_ = false;
+  HloComputation* computation_;
+};
+
+// cudnn defines CUDNN_BN_MIN_EPSILON = 1e-5 as the minimum acceptable epsilon
+// for calls to its batchnorm ops.
+bool EpsilonInRange(HloInstruction* batch_norm) {
+  return batch_norm->epsilon() >= 1e-5;
+}
+
+Status Visitor::HandleBatchNormInference(HloInstruction* batch_norm) {
+  if (batch_norm->operand(0)->shape().element_type() != F32) {
+    VLOG(1) << "Not rewriting op with non-F32 element type: "
+            << batch_norm->ToString();
+    return Status::OK();
+  }
+
+  // cudnn errors out on zero-sized inputs.
+  if (ShapeUtil::ElementsIn(batch_norm->operand(0)->shape()) == 0) {
+    return Status::OK();
+  }
+
+  if (!EpsilonInRange(batch_norm)) {
+    return Status::OK();
+  }
+
+  HloInstruction* epsilon = computation_->AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0(batch_norm->epsilon())));
+  HloInstruction* feature_index =
+      computation_->AddInstruction(HloInstruction::CreateConstant(
+          Literal::CreateR0(batch_norm->feature_index())));
+
+  std::vector<HloInstruction*> operands(batch_norm->operands().begin(),
+                                        batch_norm->operands().end());
+  operands.push_back(epsilon);
+  operands.push_back(feature_index);
+
+  std::unique_ptr<HloInstruction> libcall = HloInstruction::CreateCustomCall(
+      batch_norm->shape(), operands, kCudnnBatchNormForwardInferenceCallTarget);
+  TF_RETURN_IF_ERROR(
+      computation_->ReplaceWithNewInstruction(batch_norm, std::move(libcall)));
+  changed_ = true;
+  return Status::OK();
+}
+
+Status Visitor::HandleBatchNormTraining(HloInstruction* batch_norm) {
+  if (batch_norm->operand(0)->shape().element_type() != F32) {
+    VLOG(1) << "Not rewriting op with non-F32 element type: "
+            << batch_norm->ToString();
+    return Status::OK();
+  }
+
+  // cudnn errors out on zero-sized inputs.
+  if (ShapeUtil::ElementsIn(batch_norm->operand(0)->shape()) == 0) {
+    return Status::OK();
+  }
+
+  if (!EpsilonInRange(batch_norm)) {
+    return Status::OK();
+  }
+
+  HloInstruction* epsilon = computation_->AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0(batch_norm->epsilon())));
+  HloInstruction* feature_index =
+      computation_->AddInstruction(HloInstruction::CreateConstant(
+          Literal::CreateR0(batch_norm->feature_index())));
+
+  std::vector<HloInstruction*> operands(batch_norm->operands().begin(),
+                                        batch_norm->operands().end());
+  operands.push_back(epsilon);
+  operands.push_back(feature_index);
+
+  HloInstruction* libcall =
+      computation_->AddInstruction(HloInstruction::CreateCustomCall(
+          batch_norm->shape(), operands,
+          kCudnnBatchNormForwardTrainingCallTarget));
+
+  // The cudnn libcall returns a tuple
+  //   {output, mean, rsqrt(variance + epsilon)},
+  // but the batchnorm HLO returns {output, mean, variance}.  Fix it up.
+  HloInstruction* inverse_stddev =
+      computation_->AddInstruction(HloInstruction::CreateGetTupleElement(
+          libcall->shape().tuple_shapes(2), libcall, 2));
+  HloInstruction* variance_plus_epsilon =
+      computation_->AddInstruction(HloInstruction::CreateBinary(
+          inverse_stddev->shape(), HloOpcode::kPower, inverse_stddev,
+          computation_->AddInstruction(
+              HloInstruction::CreateConstant(Literal::CreateR0<float>(-2)))));
+  HloInstruction* variance =
+      computation_->AddInstruction(HloInstruction::CreateBinary(
+          variance_plus_epsilon->shape(), HloOpcode::kSubtract,
+          variance_plus_epsilon, epsilon));
+
+  // Repackage the results.
+  std::unique_ptr<HloInstruction> new_tuple = HloInstruction::CreateTuple({
+      computation_->AddInstruction(HloInstruction::CreateGetTupleElement(
+          libcall->shape().tuple_shapes(0), libcall, 0)),
+      computation_->AddInstruction(HloInstruction::CreateGetTupleElement(
+          libcall->shape().tuple_shapes(1), libcall, 1)),
+      variance,
+  });
+
+  TF_RETURN_IF_ERROR(computation_->ReplaceWithNewInstruction(
+      batch_norm, std::move(new_tuple)));
+  changed_ = true;
+  return Status::OK();
+}
+
+Status Visitor::HandleBatchNormGrad(HloInstruction* batch_norm) {
+  if (batch_norm->operand(0)->shape().element_type() != F32) {
+    VLOG(1) << "Not rewriting op with non-F32 element type: "
+            << batch_norm->ToString();
+    return Status::OK();
+  }
+
+  // cudnn errors out on zero-sized inputs.
+  if (ShapeUtil::ElementsIn(batch_norm->operand(0)->shape()) == 0) {
+    return Status::OK();
+  }
+
+  if (!EpsilonInRange(batch_norm)) {
+    return Status::OK();
+  }
+
+  HloInstruction* epsilon = computation_->AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0(batch_norm->epsilon())));
+  HloInstruction* feature_index =
+      computation_->AddInstruction(HloInstruction::CreateConstant(
+          Literal::CreateR0(batch_norm->feature_index())));
+
+  // The cudnn libcall expects its input to be rsqrt(variance + epsilon), but
+  // the batchnorm HLO takes plain variance as input.  Fix it up.
+  HloInstruction* var_plus_epsilon =
+      computation_->AddInstruction(HloInstruction::CreateBinary(
+          batch_norm->operand(3)->shape(), HloOpcode::kAdd,
+          batch_norm->mutable_operand(3), epsilon));
+  HloInstruction* inverse_stddev =
+      computation_->AddInstruction(HloInstruction::CreateBinary(
+          var_plus_epsilon->shape(), HloOpcode::kPower, var_plus_epsilon,
+          computation_->AddInstruction(
+              HloInstruction::CreateConstant(Literal::CreateR0<float>(-.5)))));
+
+  std::vector<HloInstruction*> operands(batch_norm->operands().begin(),
+                                        batch_norm->operands().end());
+  operands[3] = inverse_stddev;
+  operands.push_back(epsilon);
+  operands.push_back(feature_index);
+
+  std::unique_ptr<HloInstruction> libcall = HloInstruction::CreateCustomCall(
+      batch_norm->shape(), operands, kCudnnBatchNormBackwardCallTarget);
+
+  TF_RETURN_IF_ERROR(
+      computation_->ReplaceWithNewInstruction(batch_norm, std::move(libcall)));
+  changed_ = true;
+  return Status::OK();
+}
+
+}  // anonymous namespace
+
+StatusOr<bool> CudnnBatchNormRewriter::Run(HloModule* module) {
+  VLOG(2) << "CudnnBatchNormRewriter::Run(), before:";
+  XLA_VLOG_LINES(2, module->ToString());
+
+  bool changed = false;
+  for (auto* comp : module->MakeNonfusionComputations()) {
+    if (Visitor::Run(comp)) {
+      changed = true;
+    }
+  }
+
+  VLOG(2) << "CudnnBatchNormRewriter::Run(), after:";
+  XLA_VLOG_LINES(2, module->ToString());
+  return changed;
+}
+
+}  // namespace gpu
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.h b/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.h
new file mode 100644
index 0000000000000000000000000000000000000000..e09cde9abf85454c7a020566cd8c2671ae12ffc3
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.h
@@ -0,0 +1,66 @@
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_BATCHNORM_REWRITER_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_BATCHNORM_REWRITER_H_
+
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
+
+namespace xla {
+namespace gpu {
+
+// Rewrites BatchNorm HLOs into calls into cudnn where possible.
+//
+// A call into cudnn for performing a batchnorm op is represented as a
+// CustomCall HLO with custom_call_target equal to one of
+//
+//   - kCudnnBatchNormForwardInferenceCallTarget
+//   - kCudnnBatchNormForwardTrainingCallTarget, or
+//   - kCudnnBatchNormBackwardCallTarget.
+//
+// A CustomCall created by this pass has the same operands corresponding
+// batchnorm HLO, except the epsilon() and feature_index() properties of the
+// batchnorm HLO are converted into proper operands, added to the end of the
+// CustomCall's operands list.
+//
+// The inputs/outputs of the cudnn calls for BatchNormTraining and BatchNormGrad
+// do not correspond exactly to the HLOs.  In particular, the training cudnn
+// call returns 1/sqrt(variance + epsilon), while the HLO returns plain
+// variance.  Similarly, the grad cudnn call expects 1/sqrt(variance + epsilon)
+// as input, whereas the HLO expects plain variance.
+//
+// This pass adds HLOs in front of / behind the CustomCalls to fix up the
+// inputs/outputs as appropriate, and we rely on the AlgebraicSimplifier to
+// remove these where possible.
+//
+// Currently batchnorm ops over F32s are converted into cudnn calls, so long as
+// epsilon is not too small.  This pass leaves other batchnorm ops unmodified.
+//
+// The GPU backend does not implement a lowering for the batchnorm HLOs -- it
+// expects them to be lowered to cudnn calls via this pass or to HLO soup via
+// BatchNormRewriter.
+class CudnnBatchNormRewriter : public HloPassInterface {
+ public:
+  tensorflow::StringPiece name() const override {
+    return "cudnn_batchnorm_rewriter";
+  }
+  StatusOr<bool> Run(HloModule* module) override;
+};
+
+}  // namespace gpu
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_BATCHNORM_REWRITER_H_
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.cc b/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.cc
new file mode 100644
index 0000000000000000000000000000000000000000..58d9c8caff31e878487fbef01afce566e6187fd9
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.cc
@@ -0,0 +1,285 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.h"
+
+#include <string>
+
+#include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/stream_executor_no_cuda.h"
+
+namespace xla {
+namespace gpu {
+
+namespace se = ::perftools::gputools;
+namespace dnn = se::dnn;
+
+static std::pair<dnn::BatchDescriptor /*input_desc*/,
+                 dnn::BatchDescriptor /*scale_offset_desc*/>
+MakeDescriptors(const Shape& shape, int64 feature_index) {
+  std::vector<int64> logical_to_physical =
+      LayoutUtil::MakeLogicalToPhysical(shape.layout());
+
+  auto physical_dim_size = [&](int64 physical_dim) {
+    return shape.dimensions(LayoutUtil::Major(shape.layout(), physical_dim));
+  };
+
+  // Batchnorm only cares about the location of the depth (aka "feature") dim.
+  // The other dims are all treated the same.  Thus we can use the kBatchDepthYX
+  // cudnn layout for any XLA shape+layout, even XLA shapes that don't have
+  // exactly 4 dimensions: We put everything that comes before the feature dim
+  // into "batch", and everything that comes after the feature dim into "Y".
+  int64 batch_size = 1;
+  int64 y_size = 1;
+  int64 physical_dim;
+  for (physical_dim = 0; physical_dim != logical_to_physical[feature_index];
+       ++physical_dim) {
+    CHECK_LT(physical_dim, shape.dimensions_size());
+    batch_size *= physical_dim_size(physical_dim);
+  }
+  ++physical_dim;  // Skip the feature dimension.
+  for (; physical_dim < shape.dimensions_size(); ++physical_dim) {
+    y_size *= physical_dim_size(physical_dim);
+  }
+
+  dnn::BatchDescriptor input_desc;
+  input_desc.set_layout(dnn::DataLayout::kBatchDepthYX)
+      .set_count(batch_size)
+      .set_feature_map_count(shape.dimensions(feature_index))
+      .set_height(y_size)
+      .set_width(1);
+
+  dnn::BatchDescriptor scale_offset_desc;
+  scale_offset_desc.set_layout(dnn::DataLayout::kBatchDepthYX)
+      .set_feature_map_count(input_desc.feature_map_count())
+      .set_height(1)
+      .set_width(1)
+      .set_count(1);
+
+  return std::make_pair(input_desc, scale_offset_desc);
+}
+
+CudnnBatchNormForwardInferenceThunk::CudnnBatchNormForwardInferenceThunk(
+    const BufferAllocation::Slice& operand,
+    const BufferAllocation::Slice& scale, const BufferAllocation::Slice& offset,
+    const BufferAllocation::Slice& mean,
+    const BufferAllocation::Slice& variance, float epsilon, int64 feature_index,
+    const BufferAllocation::Slice& output, const HloInstruction* hlo)
+    : Thunk(Thunk::Kind::kCudnnBatchNormForwardInference, hlo),
+      operand_(operand),
+      scale_(scale),
+      offset_(offset),
+      mean_(mean),
+      variance_(variance),
+      epsilon_(epsilon),
+      feature_index_(feature_index),
+      output_(output) {
+  CHECK_EQ(hlo->opcode(), HloOpcode::kCustomCall);
+  CHECK_EQ(hlo->custom_call_target(),
+           kCudnnBatchNormForwardInferenceCallTarget);
+  CHECK(
+      LayoutUtil::LayoutsInShapesEqual(hlo->shape(), hlo->operand(0)->shape()));
+  CHECK_EQ(hlo->shape().element_type(), F32) << "Not yet implemented";
+}
+
+Status CudnnBatchNormForwardInferenceThunk::ExecuteOnStream(
+    const BufferAllocations& buffer_allocations, se::Stream* stream) {
+  dnn::BatchDescriptor operand_desc;
+  dnn::BatchDescriptor scale_offset_desc;
+  std::tie(operand_desc, scale_offset_desc) =
+      MakeDescriptors(hlo_instruction()->shape(), feature_index_);
+
+  se::DeviceMemory<float> output(buffer_allocations.GetDeviceAddress(output_));
+  stream->ThenBatchNormalizationForward(
+      se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(operand_)),
+      se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(scale_)),
+      se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(offset_)),
+      se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(mean_)),
+      se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(variance_)),
+      operand_desc,                //
+      scale_offset_desc,           //
+      epsilon_,                    //
+      &output,                     //
+      /*batch_mean=*/nullptr,      //
+      /*batch_var=*/nullptr,       //
+      /*saved_mean=*/nullptr,      //
+      /*saved_inv_var=*/nullptr,   //
+      /*is_training=*/false,       //
+      /*var_to_inv_var=*/nullptr,  //
+      /*inv_var_to_var=*/nullptr);
+  if (!stream->ok()) {
+    return InternalError("BatchNormalizationForward call failed.");
+  }
+  return Status::OK();
+}
+
+CudnnBatchNormForwardTrainingThunk::CudnnBatchNormForwardTrainingThunk(
+    const BufferAllocation::Slice& operand,
+    const BufferAllocation::Slice& scale, const BufferAllocation::Slice& offset,
+    float epsilon, int64 feature_index,
+    const BufferAllocation::Slice& output_data,
+    const BufferAllocation::Slice& output_mean,
+    const BufferAllocation::Slice& output_inv_stddev,
+    const BufferAllocation::Slice& output_tuple, const HloInstruction* hlo)
+    : Thunk(Thunk::Kind::kCudnnBatchNormForwardTraining, hlo),
+      operand_(operand),
+      scale_(scale),
+      offset_(offset),
+      epsilon_(epsilon),
+      feature_index_(feature_index),
+      output_data_(output_data),
+      output_mean_(output_mean),
+      output_inv_stddev_(output_inv_stddev),
+      output_tuple_(output_tuple) {
+  CHECK_EQ(hlo->opcode(), HloOpcode::kCustomCall);
+  CHECK_EQ(hlo->custom_call_target(), kCudnnBatchNormForwardTrainingCallTarget);
+  CHECK_EQ(hlo->shape().tuple_shapes_size(), 3);
+  CHECK(LayoutUtil::LayoutsInShapesEqual(hlo->shape().tuple_shapes(0),
+                                         hlo->operand(0)->shape()));
+  for (const auto& tuple_shape : hlo->shape().tuple_shapes()) {
+    CHECK_EQ(tuple_shape.element_type(), F32) << "Not yet implemented";
+  }
+}
+
+Status CudnnBatchNormForwardTrainingThunk::ExecuteOnStream(
+    const BufferAllocations& buffer_allocations, se::Stream* stream) {
+  dnn::BatchDescriptor operand_desc;
+  dnn::BatchDescriptor scale_offset_desc;
+  // The BatchNormTraining HLO outputs a tuple of three elements: output data,
+  // batch mean, and batch variance.  We want to make our descriptors based on
+  // the shape of the output data.
+  std::tie(operand_desc, scale_offset_desc) = MakeDescriptors(
+      hlo_instruction()->shape().tuple_shapes(0), feature_index_);
+
+  se::DeviceMemory<float> output_data(
+      buffer_allocations.GetDeviceAddress(output_data_));
+  se::DeviceMemory<float> output_mean(
+      buffer_allocations.GetDeviceAddress(output_mean_));
+  se::DeviceMemory<float> output_inv_stddev(
+      buffer_allocations.GetDeviceAddress(output_inv_stddev_));
+
+  se::DeviceMemory<float> null_device_ptr(nullptr);
+  stream->ThenBatchNormalizationForward(
+      se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(operand_)),
+      se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(scale_)),
+      se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(offset_)),
+      /*estimated_mean=*/null_device_ptr,
+      /*estimated_variance=*/null_device_ptr,
+      operand_desc,                          //
+      scale_offset_desc,                     //
+      epsilon_,                              //
+      &output_data,                          //
+      /*batch_mean=*/&null_device_ptr,       //
+      /*batch_var=*/&null_device_ptr,        //
+      /*saved_mean=*/&output_mean,           //
+      /*saved_inv_var=*/&output_inv_stddev,  //
+      /*is_training=*/true,                  //
+      /*var_to_inv_var=*/nullptr,            //
+      /*inv_var_to_var=*/nullptr);
+
+  // Write the tuple.
+  void* ptrs[] = {output_data.opaque(), output_mean.opaque(),
+                  output_inv_stddev.opaque()};
+  se::DeviceMemory<void*> tuple_addr(
+      buffer_allocations.GetDeviceAddress(output_tuple_));
+  stream->ThenMemcpyH2D<void*>(ptrs, &tuple_addr);
+
+  if (!stream->ok()) {
+    return InternalError("BatchNormalizationTraining call failed.");
+  }
+  return Status::OK();
+}
+
+CudnnBatchNormBackwardThunk::CudnnBatchNormBackwardThunk(
+    const BufferAllocation::Slice& operand,
+    const BufferAllocation::Slice& scale, const BufferAllocation::Slice& mean,
+    const BufferAllocation::Slice& inv_stddev,
+    const BufferAllocation::Slice& grad_output, float epsilon,
+    int64 feature_index, const BufferAllocation::Slice& output_grad_data,
+    const BufferAllocation::Slice& output_grad_scale,
+    const BufferAllocation::Slice& output_grad_offset,
+    const BufferAllocation::Slice& output_tuple, const HloInstruction* hlo)
+    : Thunk(Thunk::Kind::kCudnnBatchNormBackward, hlo),
+      operand_(operand),
+      scale_(scale),
+      mean_(mean),
+      inv_stddev_(inv_stddev),
+      grad_output_(grad_output),
+      epsilon_(epsilon),
+      feature_index_(feature_index),
+      output_grad_data_(output_grad_data),
+      output_grad_scale_(output_grad_scale),
+      output_grad_offset_(output_grad_offset),
+      output_tuple_(output_tuple) {
+  CHECK_EQ(hlo->opcode(), HloOpcode::kCustomCall);
+  CHECK_EQ(hlo->custom_call_target(), kCudnnBatchNormBackwardCallTarget);
+  CHECK_EQ(hlo->shape().tuple_shapes_size(), 3);
+  CHECK(LayoutUtil::LayoutsInShapesEqual(hlo->shape().tuple_shapes(0),
+                                         hlo->operand(0)->shape()));
+  CHECK(LayoutUtil::LayoutsInShapesEqual(hlo->shape().tuple_shapes(0),
+                                         hlo->operand(4)->shape()));
+  for (const auto& tuple_shape : hlo->shape().tuple_shapes()) {
+    CHECK_EQ(tuple_shape.element_type(), F32) << "Not yet implemented";
+  }
+}
+
+Status CudnnBatchNormBackwardThunk::ExecuteOnStream(
+    const BufferAllocations& buffer_allocations, se::Stream* stream) {
+  dnn::BatchDescriptor operand_desc;
+  dnn::BatchDescriptor scale_offset_desc;
+
+  // This call outputs a tuple of three elements: grad data, grad offset, and
+  // grad scale.  We want to make our descriptors based on the shape of the grad
+  // data.
+  std::tie(operand_desc, scale_offset_desc) = MakeDescriptors(
+      hlo_instruction()->shape().tuple_shapes(0), feature_index_);
+
+  se::DeviceMemory<float> output_grad_data(
+      buffer_allocations.GetDeviceAddress(output_grad_data_));
+  se::DeviceMemory<float> output_grad_scale(
+      buffer_allocations.GetDeviceAddress(output_grad_scale_));
+  se::DeviceMemory<float> output_grad_offset(
+      buffer_allocations.GetDeviceAddress(output_grad_offset_));
+
+  stream->ThenBatchNormalizationBackward(
+      se::DeviceMemory<float>(
+          buffer_allocations.GetDeviceAddress(grad_output_)),
+      se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(operand_)),
+      se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(scale_)),
+      se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(mean_)),
+      se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(inv_stddev_)),
+      operand_desc, scale_offset_desc, epsilon_, &output_grad_data,
+      &output_grad_scale, &output_grad_offset);
+
+  // Write the output tuple.
+  void* ptrs[] = {output_grad_data.opaque(), output_grad_scale.opaque(),
+                  output_grad_offset.opaque()};
+  se::DeviceMemory<void*> tuple_addr(
+      buffer_allocations.GetDeviceAddress(output_tuple_));
+  stream->ThenMemcpyH2D<void*>(ptrs, &tuple_addr);
+
+  if (!stream->ok()) {
+    return InternalError("BatchNormalizationBackward call failed.");
+  }
+  return Status::OK();
+}
+
+}  // namespace gpu
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.h b/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.h
new file mode 100644
index 0000000000000000000000000000000000000000..c5fbb6d8a3912d380172d496d8d35e80dc9f5c71
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.h
@@ -0,0 +1,145 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_BATCHNORM_THUNK_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_BATCHNORM_THUNK_H_
+
+#include "tensorflow/compiler/xla/service/buffer_assignment.h"
+#include "tensorflow/compiler/xla/service/gpu/buffer_allocations.h"
+#include "tensorflow/compiler/xla/service/gpu/thunk.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/core/lib/core/status.h"
+
+namespace xla {
+namespace gpu {
+
+// This file contains thunks which call into cudnn to run the various flavors of
+// batch normalization: BatchNormInference, BatchNormTraining, and
+// BatchNormGrad, known to cudnn as BatchNormForwardInference,
+// BatchNormForwardTraining, and BatchNormBackward.
+//
+// As an alternative to using these thunks, XLA can decompose batchnorm HLOs
+// into smaller components using the BatchNormRewriter pass.  This can result in
+// faster code because those individual components can fuse into their
+// inputs/outputs, but it may also be slower if cudnn's batchnorm implementation
+// outperforms the code XLA generates for these components.
+//
+// Currently these thunks require that their inputs are F32s.
+//
+// Note that these thunks do not take full advantage of the cudnn batchnorm
+// functions.  For example, cudnn lets you bias and/or scale the input/output,
+// but these thunks don't currently support that.
+
+class CudnnBatchNormForwardInferenceThunk : public Thunk {
+ public:
+  CudnnBatchNormForwardInferenceThunk(const BufferAllocation::Slice& operand,
+                                      const BufferAllocation::Slice& scale,
+                                      const BufferAllocation::Slice& offset,
+                                      const BufferAllocation::Slice& mean,
+                                      const BufferAllocation::Slice& variance,
+                                      float epsilon, int64 feature_index,
+                                      const BufferAllocation::Slice& output,
+                                      const HloInstruction* hlo);
+
+  CudnnBatchNormForwardInferenceThunk(
+      const CudnnBatchNormForwardInferenceThunk&) = delete;
+  CudnnBatchNormForwardInferenceThunk& operator=(
+      const CudnnBatchNormForwardInferenceThunk&) = delete;
+
+  Status ExecuteOnStream(const BufferAllocations& buffer_allocations,
+                         perftools::gputools::Stream* stream) override;
+
+ private:
+  BufferAllocation::Slice operand_;
+  BufferAllocation::Slice scale_;
+  BufferAllocation::Slice offset_;
+  BufferAllocation::Slice mean_;
+  BufferAllocation::Slice variance_;
+  float epsilon_;
+  int64 feature_index_;
+  BufferAllocation::Slice output_;
+};
+
+class CudnnBatchNormForwardTrainingThunk : public Thunk {
+ public:
+  CudnnBatchNormForwardTrainingThunk(
+      const BufferAllocation::Slice& operand,
+      const BufferAllocation::Slice& scale,
+      const BufferAllocation::Slice& offset, float epsilon, int64 feature_index,
+      const BufferAllocation::Slice& output_data,
+      const BufferAllocation::Slice& output_mean,
+      const BufferAllocation::Slice& output_inv_stddev,
+      const BufferAllocation::Slice& output_tuple, const HloInstruction* hlo);
+
+  CudnnBatchNormForwardTrainingThunk(
+      const CudnnBatchNormForwardTrainingThunk&) = delete;
+  CudnnBatchNormForwardTrainingThunk& operator=(
+      const CudnnBatchNormForwardTrainingThunk&) = delete;
+
+  Status ExecuteOnStream(const BufferAllocations& buffer_allocations,
+                         perftools::gputools::Stream* stream) override;
+
+ private:
+  BufferAllocation::Slice operand_;
+  BufferAllocation::Slice scale_;
+  BufferAllocation::Slice offset_;
+  float epsilon_;
+  int64 feature_index_;
+  BufferAllocation::Slice output_data_;
+  BufferAllocation::Slice output_mean_;
+  BufferAllocation::Slice output_inv_stddev_;
+  BufferAllocation::Slice output_tuple_;
+};
+
+class CudnnBatchNormBackwardThunk : public Thunk {
+ public:
+  CudnnBatchNormBackwardThunk(const BufferAllocation::Slice& operand,
+                              const BufferAllocation::Slice& scale,
+                              const BufferAllocation::Slice& mean,
+                              const BufferAllocation::Slice& inv_stddev,
+                              const BufferAllocation::Slice& grad_output,
+                              float epsilon, int64 feature_index,
+                              const BufferAllocation::Slice& output_grad_data,
+                              const BufferAllocation::Slice& output_grad_scale,
+                              const BufferAllocation::Slice& output_grad_offset,
+                              const BufferAllocation::Slice& output_tuple,
+                              const HloInstruction* hlo);
+
+  CudnnBatchNormBackwardThunk(const CudnnBatchNormBackwardThunk&) = delete;
+  CudnnBatchNormBackwardThunk& operator=(const CudnnBatchNormBackwardThunk&) =
+      delete;
+
+  Status ExecuteOnStream(const BufferAllocations& buffer_allocations,
+                         perftools::gputools::Stream* stream) override;
+
+ private:
+  BufferAllocation::Slice operand_;
+  BufferAllocation::Slice scale_;
+  BufferAllocation::Slice mean_;
+  BufferAllocation::Slice inv_stddev_;
+  BufferAllocation::Slice grad_output_;
+  float epsilon_;
+  int64 feature_index_;
+  BufferAllocation::Slice output_grad_data_;
+  BufferAllocation::Slice output_grad_scale_;
+  BufferAllocation::Slice output_grad_offset_;
+  BufferAllocation::Slice output_tuple_;
+};
+
+}  // namespace gpu
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_BATCHNORM_THUNK_H_
diff --git a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc
index 6bf00cfb8a53723ae9608093480bf2eed10144dd..4b511cb4bb94addfae53d6b2e6d6f86d5b9afd84 100644
--- a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc
@@ -135,10 +135,6 @@ StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitFloatBinaryOp(
   PrimitiveType rhs_input_type = op->operand(1)->shape().element_type();
   PrimitiveType output_type = op->shape().element_type();
   switch (op->opcode()) {
-    case HloOpcode::kAtan2:
-      return EmitLibdeviceMathCall("__nv_atan2", {lhs_value, rhs_value},
-                                   {lhs_input_type, rhs_input_type},
-                                   output_type);
     case HloOpcode::kRemainder: {
       return EmitLibdeviceMathCall("__nv_fmod", {lhs_value, rhs_value},
                                    {lhs_input_type, rhs_input_type},
@@ -199,29 +195,50 @@ StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitErfcInv(
   return EmitLibdeviceMathCall("__nv_erfcinv", {value}, {prim_type}, prim_type);
 }
 
+StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitLog(
+    PrimitiveType prim_type, llvm::Value* value) const {
+  return EmitLibdeviceMathCall("__nv_log", {value}, {prim_type}, prim_type);
+}
+
+StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitSin(
+    PrimitiveType prim_type, llvm::Value* value) const {
+  return EmitLibdeviceMathCall("__nv_sin", {value}, {prim_type}, prim_type);
+}
+
+StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitCos(
+    PrimitiveType prim_type, llvm::Value* value) const {
+  return EmitLibdeviceMathCall("__nv_cos", {value}, {prim_type}, prim_type);
+}
+
+StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitExp(
+    PrimitiveType prim_type, llvm::Value* value) const {
+  return EmitLibdeviceMathCall("__nv_exp", {value}, {prim_type}, prim_type);
+}
+
+StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitPow(PrimitiveType prim_type,
+                                                      llvm::Value* lhs,
+                                                      llvm::Value* rhs) const {
+  return EmitLibdeviceMathCall("__nv_pow", {lhs, rhs}, {prim_type, prim_type},
+                               prim_type);
+}
+
+StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitAtan2(
+    PrimitiveType prim_type, llvm::Value* lhs, llvm::Value* rhs) const {
+  return EmitLibdeviceMathCall("__nv_atan2", {lhs, rhs}, {prim_type, prim_type},
+                               prim_type);
+}
+
 StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitFloatUnaryOp(
     const HloInstruction* op, llvm::Value* operand_value) const {
   PrimitiveType input_type = op->operand(0)->shape().element_type();
   PrimitiveType output_type = op->shape().element_type();
   switch (op->opcode()) {
-    case HloOpcode::kExp:
-      return EmitLibdeviceMathCall("__nv_exp", {operand_value}, {input_type},
-                                   output_type);
     case HloOpcode::kFloor:
       return EmitLibdeviceMathCall("__nv_floor", {operand_value}, {input_type},
                                    output_type);
     case HloOpcode::kCeil:
       return EmitLibdeviceMathCall("__nv_ceil", {operand_value}, {input_type},
                                    output_type);
-    case HloOpcode::kLog:
-      return EmitLibdeviceMathCall("__nv_log", {operand_value}, {input_type},
-                                   output_type);
-    case HloOpcode::kCos:
-      return EmitLibdeviceMathCall("__nv_cos", {operand_value}, {input_type},
-                                   output_type);
-    case HloOpcode::kSin:
-      return EmitLibdeviceMathCall("__nv_sin", {operand_value}, {input_type},
-                                   output_type);
     case HloOpcode::kTanh:
       return EmitLibdeviceMathCall("__nv_tanh", {operand_value}, {input_type},
                                    output_type);
@@ -230,224 +247,6 @@ StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitFloatUnaryOp(
   }
 }
 
-StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitComplexBinaryOp(
-    const HloInstruction* op, llvm::Value* lhs_value,
-    llvm::Value* rhs_value) const {
-  PrimitiveType input_type = op->operand(0)->shape().element_type();
-  TF_RET_CHECK(primitive_util::IsComplexType(input_type));
-  PrimitiveType component_type =
-      primitive_util::ComplexComponentType(input_type);
-  switch (op->opcode()) {
-    case HloOpcode::kPower: {
-      // (a+bi)^(c+di) =
-      //    (a*a+b*b)^(0.5c) * exp(-d*atan2(b,a)) * (cos(q) + i*sin(q)),
-      //    where q = c*atan2(b,a)+0.5d*ln(a*a+b*b)
-      auto a = EmitExtractReal(lhs_value);
-      auto b = EmitExtractImag(lhs_value);
-      auto c = EmitExtractReal(rhs_value);
-      auto d = EmitExtractImag(rhs_value);
-      auto aa_p_bb = ir_builder_->CreateFAdd(ir_builder_->CreateFMul(a, a),
-                                             ir_builder_->CreateFMul(b, b));
-      auto one_half = llvm::ConstantFP::get(a->getType(), 0.5);
-      auto half_c = ir_builder_->CreateFMul(one_half, c);
-
-      TF_ASSIGN_OR_RETURN(
-          auto aa_p_bb_to_half_c,
-          EmitLibdeviceMathCall("__nv_pow", {aa_p_bb, half_c},
-                                {component_type, component_type},
-                                component_type));
-      auto neg_d = ir_builder_->CreateFNeg(d);
-      TF_ASSIGN_OR_RETURN(
-          auto arg_lhs, EmitLibdeviceMathCall("__nv_atan2", {b, a},
-                                              {component_type, component_type},
-                                              component_type));
-      auto neg_d_arg_lhs = ir_builder_->CreateFMul(neg_d, arg_lhs);
-      TF_ASSIGN_OR_RETURN(
-          auto e_to_neg_d_arg_lhs,
-          EmitLibdeviceMathCall("__nv_exp", {neg_d_arg_lhs}, {component_type},
-                                component_type));
-      auto coeff =
-          ir_builder_->CreateFMul(aa_p_bb_to_half_c, e_to_neg_d_arg_lhs);
-      TF_ASSIGN_OR_RETURN(
-          auto ln_aa_p_bb,
-          EmitLibdeviceMathCall("__nv_log", {aa_p_bb}, {component_type},
-                                component_type));
-      auto half_d = ir_builder_->CreateFMul(one_half, d);
-      auto q =
-          ir_builder_->CreateFAdd(ir_builder_->CreateFMul(c, arg_lhs),
-                                  ir_builder_->CreateFMul(half_d, ln_aa_p_bb));
-      TF_ASSIGN_OR_RETURN(
-          auto cos_q, EmitLibdeviceMathCall("__nv_cos", {q}, {component_type},
-                                            component_type));
-      TF_ASSIGN_OR_RETURN(
-          auto sin_q, EmitLibdeviceMathCall("__nv_sin", {q}, {component_type},
-                                            component_type));
-      return EmitComposeComplex(op, ir_builder_->CreateFMul(coeff, cos_q),
-                                ir_builder_->CreateFMul(coeff, sin_q));
-    }
-    default:
-      return ElementalIrEmitter::EmitComplexBinaryOp(op, lhs_value, rhs_value);
-  }
-}
-
-StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitComplexUnaryOp(
-    const HloInstruction* op, llvm::Value* operand_value) const {
-  PrimitiveType input_type = op->operand(0)->shape().element_type();
-  PrimitiveType component_type =
-      primitive_util::IsComplexType(input_type)
-          ? primitive_util::ComplexComponentType(input_type)
-          : input_type;
-
-  switch (op->opcode()) {
-    case HloOpcode::kLog: {
-      // log(a+bi) = .5*log(a^2+b^2) + i*atan2(b, a)
-      auto a = EmitExtractReal(operand_value);
-      auto b = EmitExtractImag(operand_value);
-      llvm::Type* llvm_ty = a->getType();
-      auto sum_sq = ir_builder_->CreateFAdd(ir_builder_->CreateFMul(a, a),
-                                            ir_builder_->CreateFMul(b, b));
-      TF_ASSIGN_OR_RETURN(
-          auto log_sum_sq,
-          EmitLibdeviceMathCall("__nv_log", {sum_sq}, {component_type},
-                                component_type));
-      TF_ASSIGN_OR_RETURN(
-          auto angle, EmitLibdeviceMathCall("__nv_atan2", {b, a},
-                                            {component_type, component_type},
-                                            component_type));
-      auto one_half = llvm::ConstantFP::get(llvm_ty, 0.5);
-      return EmitComposeComplex(
-          op, ir_builder_->CreateFMul(one_half, log_sum_sq), angle);
-    }
-    case HloOpcode::kExp: {
-      // e^(a+bi) = e^a*(cos(b)+sin(b)i)
-      auto b = EmitExtractImag(operand_value);
-      TF_ASSIGN_OR_RETURN(
-          auto exp_a,
-          EmitLibdeviceMathCall("__nv_exp", {EmitExtractReal(operand_value)},
-                                {component_type}, component_type));
-      TF_ASSIGN_OR_RETURN(
-          auto cos_b, EmitLibdeviceMathCall("__nv_cos", {b}, {component_type},
-                                            component_type));
-      TF_ASSIGN_OR_RETURN(
-          auto sin_b, EmitLibdeviceMathCall("__nv_sin", {b}, {component_type},
-                                            component_type));
-      return EmitComposeComplex(op, ir_builder_->CreateFMul(exp_a, cos_b),
-                                ir_builder_->CreateFMul(exp_a, sin_b));
-    }
-    case HloOpcode::kCos: {
-      // cos(a+bi) = .5(cos(a)*(e^-b+e^b) + i*sin(a)*(e^-b-e^b))
-      auto a = EmitExtractReal(operand_value);
-      auto llvm_ty = a->getType();
-      TF_ASSIGN_OR_RETURN(
-          auto exp_b,
-          EmitLibdeviceMathCall("__nv_exp", {EmitExtractImag(operand_value)},
-                                {component_type}, component_type));
-      TF_ASSIGN_OR_RETURN(
-          auto cos_a, EmitLibdeviceMathCall("__nv_cos", {a}, {component_type},
-                                            component_type));
-      TF_ASSIGN_OR_RETURN(
-          auto sin_a, EmitLibdeviceMathCall("__nv_sin", {a}, {component_type},
-                                            component_type));
-      auto half_exp_b =
-          ir_builder_->CreateFMul(llvm::ConstantFP::get(llvm_ty, 0.5), exp_b);
-      auto half_exp_neg_b =
-          ir_builder_->CreateFDiv(llvm::ConstantFP::get(llvm_ty, 0.5), exp_b);
-      return EmitComposeComplex(
-          op,
-          ir_builder_->CreateFMul(
-              cos_a, ir_builder_->CreateFAdd(half_exp_neg_b, half_exp_b)),
-          ir_builder_->CreateFMul(
-              sin_a, ir_builder_->CreateFSub(half_exp_neg_b, half_exp_b)));
-    }
-
-    case HloOpcode::kSin: {
-      // sin(a+bi) = 0.5(sin(a)*(e^b+e^-b) + i*cos(a)*(e^b-e^-b)
-      auto a = EmitExtractReal(operand_value);
-      auto llvm_ty = a->getType();
-      TF_ASSIGN_OR_RETURN(
-          auto exp_b,
-          EmitLibdeviceMathCall("__nv_exp", {EmitExtractImag(operand_value)},
-                                {component_type}, component_type));
-      TF_ASSIGN_OR_RETURN(
-          auto cos_a, EmitLibdeviceMathCall("__nv_cos", {a}, {component_type},
-                                            component_type));
-      TF_ASSIGN_OR_RETURN(
-          auto sin_a, EmitLibdeviceMathCall("__nv_sin", {a}, {component_type},
-                                            component_type));
-      auto half_exp_b =
-          ir_builder_->CreateFMul(llvm::ConstantFP::get(llvm_ty, 0.5), exp_b);
-      auto half_exp_neg_b =
-          ir_builder_->CreateFDiv(llvm::ConstantFP::get(llvm_ty, 0.5), exp_b);
-      return EmitComposeComplex(
-          op,
-          ir_builder_->CreateFMul(
-              sin_a, ir_builder_->CreateFAdd(half_exp_b, half_exp_neg_b)),
-          ir_builder_->CreateFMul(
-              cos_a, ir_builder_->CreateFSub(half_exp_b, half_exp_neg_b)));
-    }
-    case HloOpcode::kTanh: {
-      /*
-      tanh=(exp(x)-exp(-x)) / (exp(x)+exp(-x))
-      e^(a+bi) = e^a*(cos(b)+sin(b)i)
-      so tanh=(((cos(b)+sin(b)i)e^a - (cos(-b)+sin(-b)i)e^-a)) /
-              (((cos(b)+sin(b)i)e^a + (cos(-b)+sin(-b)i)e^-a))
-      cos(b)=cos(-b), sin(-b)=-sin(b)
-      so tanh=(((cos(b)+sin(b)i)e^a - (cos(b)-sin(b)i)e^-a)) /
-              (((cos(b)+sin(b)i)e^a + (cos(b)-sin(b)i)e^-a))
-             =(cos(b)e^a+i*sin(b)e^a + cos(b)(-e^-a)+i*sin(b)e^-a) /
-              (cos(b)e^a+i*sin(b)e^a + cos(b)e^-a+i*sin(b)(-e^-a))
-             =(cos(b)(e^a-e^-a) + i*sin(b)(e^a+e^-a)) /
-              (cos(b)(e^a+e^-a) + i*sin(b)(e^a-e^-a))
-      This is a complex division, so we can multiply by denom_conj/denom_conj
-             =(cos(b)(e^a-e^-a) + i*sin(b)(e^a+e^-a)) *
-              (cos(b)(e^a+e^-a) - i*sin(b)(e^a-e^-a)) /
-              ((cos(b)(e^a+e^-a))^2 + (sin(b)(e^a-e^-a))^2)
-             =(cos(b)^2(e^(2a)-e^(-2a)) + sin(b)^2(e^(2a)-e^(-2a)) +
-               i*(cos(b)sin(b)(e^a+e^-a)^2 - cos(b)sin(b)(e^a-e^-a)^2)) /
-              ((cos(b)(e^a+e^-a))^2 + (sin(b)(e^a-e^-a))^2)
-      */
-      auto a = EmitExtractReal(operand_value);
-      auto b = EmitExtractImag(operand_value);
-      TF_ASSIGN_OR_RETURN(
-          auto exp_a, EmitLibdeviceMathCall("__nv_exp", {a}, {component_type},
-                                            component_type));
-      TF_ASSIGN_OR_RETURN(
-          auto cos_b, EmitLibdeviceMathCall("__nv_cos", {b}, {component_type},
-                                            component_type));
-      TF_ASSIGN_OR_RETURN(
-          auto sin_b, EmitLibdeviceMathCall("__nv_sin", {b}, {component_type},
-                                            component_type));
-      auto exp_neg_a = ir_builder_->CreateFDiv(
-          llvm::ConstantFP::get(exp_a->getType(), 1), exp_a);
-      auto exp_2a_minus_exp_neg_2a = ir_builder_->CreateFSub(
-          ir_builder_->CreateFMul(exp_a, exp_a),
-          ir_builder_->CreateFMul(exp_neg_a, exp_neg_a));
-      auto cos_b_sq = ir_builder_->CreateFMul(cos_b, cos_b);
-      auto sin_b_sq = ir_builder_->CreateFMul(sin_b, sin_b);
-      auto real_num = ir_builder_->CreateFAdd(
-          ir_builder_->CreateFMul(cos_b_sq, exp_2a_minus_exp_neg_2a),
-          ir_builder_->CreateFMul(sin_b_sq, exp_2a_minus_exp_neg_2a));
-      auto cos_b_sin_b = ir_builder_->CreateFMul(cos_b, sin_b);
-      auto exp_a_plus_exp_neg_a = ir_builder_->CreateFAdd(exp_a, exp_neg_a);
-      auto exp_a_plus_exp_neg_a_sq =
-          ir_builder_->CreateFMul(exp_a_plus_exp_neg_a, exp_a_plus_exp_neg_a);
-      auto exp_a_minus_exp_neg_a = ir_builder_->CreateFSub(exp_a, exp_neg_a);
-      auto exp_a_minus_exp_neg_a_sq =
-          ir_builder_->CreateFMul(exp_a_minus_exp_neg_a, exp_a_minus_exp_neg_a);
-      auto imag_num = ir_builder_->CreateFMul(
-          cos_b_sin_b, ir_builder_->CreateFSub(exp_a_plus_exp_neg_a_sq,
-                                               exp_a_minus_exp_neg_a_sq));
-      auto denom = ir_builder_->CreateFAdd(
-          ir_builder_->CreateFMul(cos_b_sq, exp_a_plus_exp_neg_a_sq),
-          ir_builder_->CreateFMul(sin_b_sq, exp_a_minus_exp_neg_a_sq));
-      return EmitComposeComplex(op, ir_builder_->CreateFDiv(real_num, denom),
-                                ir_builder_->CreateFDiv(imag_num, denom));
-    }
-    default:
-      return ElementalIrEmitter::EmitComplexUnaryOp(op, operand_value);
-  }
-}
-
 llvm::Value* GpuElementalIrEmitter::EmitDeviceFunctionCall(
     const string& callee_name,
     tensorflow::gtl::ArraySlice<llvm::Value*> operands,
diff --git a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.h b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.h
index 6a537d015209bc507af36b13eeb5d69ce58d8fea..77d4569b1e8e398005e8f517ff086a77aedd382d 100644
--- a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.h
+++ b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.h
@@ -54,20 +54,31 @@ class GpuElementalIrEmitter : public ElementalIrEmitter {
   StatusOr<llvm::Value*> EmitFloatUnaryOp(
       const HloInstruction* op, llvm::Value* operand_value) const override;
 
-  StatusOr<llvm::Value*> EmitComplexUnaryOp(
-      const HloInstruction* op, llvm::Value* operand_value) const override;
-
   StatusOr<llvm::Value*> EmitFloatBinaryOp(
       const HloInstruction* op, llvm::Value* lhs_value,
       llvm::Value* rhs_value) const override;
 
-  StatusOr<llvm::Value*> EmitComplexBinaryOp(
-      const HloInstruction* op, llvm::Value* lhs_value,
-      llvm::Value* rhs_value) const override;
-
   StatusOr<llvm::Value*> EmitErfcInv(PrimitiveType prim_type,
                                      llvm::Value* value) const override;
 
+  StatusOr<llvm::Value*> EmitLog(PrimitiveType prim_type,
+                                 llvm::Value* value) const override;
+
+  StatusOr<llvm::Value*> EmitSin(PrimitiveType prim_type,
+                                 llvm::Value* value) const override;
+
+  StatusOr<llvm::Value*> EmitCos(PrimitiveType prim_type,
+                                 llvm::Value* value) const override;
+
+  StatusOr<llvm::Value*> EmitExp(PrimitiveType prim_type,
+                                 llvm::Value* value) const override;
+
+  StatusOr<llvm::Value*> EmitPow(PrimitiveType prim_type, llvm::Value* lhs,
+                                 llvm::Value* rhs) const override;
+
+  StatusOr<llvm::Value*> EmitAtan2(PrimitiveType prim_type, llvm::Value* lhs,
+                                   llvm::Value* rhs) const override;
+
   llvm::Value* EmitThreadId() const override;
 
  private:
diff --git a/tensorflow/compiler/xla/service/gpu/fft_thunk.cc b/tensorflow/compiler/xla/service/gpu/fft_thunk.cc
new file mode 100644
index 0000000000000000000000000000000000000000..66931bdc8b1030b2b2e7731ce6327c1e908d4ee6
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/fft_thunk.cc
@@ -0,0 +1,234 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/gpu/fft_thunk.h"
+
+#include <string>
+
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/stream_executor_no_cuda.h"
+
+namespace se = ::perftools::gputools;
+
+namespace xla {
+namespace gpu {
+
+FftScratchAllocator::FftScratchAllocator(
+    int device_ordinal, DeviceMemoryAllocator* memory_allocator)
+    : device_ordinal_(device_ordinal), memory_allocator_(memory_allocator) {}
+
+FftScratchAllocator::~FftScratchAllocator() {
+  for (auto& allocated_buffer : allocated_buffers_) {
+    if (!memory_allocator_->Deallocate(device_ordinal_, &allocated_buffer)
+             .ok()) {
+      // The program can still continue with failed deallocation.
+      LOG(ERROR) << "Failed to deallocate the allocated buffer: "
+                 << allocated_buffer.opaque();
+    }
+  }
+}
+
+int64 FftScratchAllocator::GetMemoryLimitInBytes(se::Stream* stream) {
+  constexpr int64 kFftScratchSize = 1LL << 32;  // 4GB by default.
+  return kFftScratchSize;
+}
+
+se::port::StatusOr<se::DeviceMemory<uint8>> FftScratchAllocator::AllocateBytes(
+    se::Stream* stream, int64 byte_size) {
+  CHECK_GE(byte_size, 0) << "byte_size must be positive.";
+  if (byte_size > GetMemoryLimitInBytes(stream)) {
+    return se::port::Status(
+        se::port::error::RESOURCE_EXHAUSTED,
+        tensorflow::strings::Printf(
+            "Allocating %lld bytes exceeds the memory limit of %lld bytes.",
+            byte_size, GetMemoryLimitInBytes(stream)));
+  }
+
+  auto status_or_memory =
+      memory_allocator_->Allocate(device_ordinal_, byte_size,
+                                  /*retry_on_failure=*/false);
+  if (!status_or_memory.ok()) {
+    return tensorflow::errors::ResourceExhausted(
+        "Failed to allocate %lld bytes on device %d.", byte_size,
+        device_ordinal_);
+  }
+  se::DeviceMemoryBase allocated_buffer = status_or_memory.ValueOrDie();
+  allocated_buffers_.push_back(allocated_buffer);
+  total_allocated_bytes_ += byte_size;
+  return se::DeviceMemory<uint8>(allocated_buffer);
+}
+
+namespace {
+
+se::fft::Type FftTypeToSeType(FftType type) {
+  switch (type) {
+    case FftType::FFT:
+      return se::fft::Type::kC2CForward;
+    case FftType::IFFT:
+      return se::fft::Type::kC2CInverse;
+    case FftType::IRFFT:
+      return se::fft::Type::kC2R;
+    case FftType::RFFT:
+      return se::fft::Type::kR2C;
+    default:
+      LOG(FATAL) << "unsupported fft type";
+  }
+}
+
+string FftTypeToString(se::fft::Type type) {
+  switch (type) {
+    case se::fft::Type::kC2CForward:
+      return "FFT";
+    case se::fft::Type::kC2CInverse:
+      return "IFFT";
+    case se::fft::Type::kC2R:
+      return "IRFFT";
+    case se::fft::Type::kR2C:
+      return "RFFT";
+    default:
+      LOG(FATAL) << "unknown fft type";
+  }
+}
+
+}  // namespace
+
+FftThunk::FftThunk(FftType fft_type,
+                   tensorflow::gtl::ArraySlice<int64> fft_length,
+                   const BufferAllocation::Slice& input_buffer,
+                   const BufferAllocation::Slice& output_buffer,
+                   const Shape& input_shape, const Shape& output_shape,
+                   const HloInstruction* hlo)
+    : Thunk(Kind::kFft, hlo),
+      fft_type_(FftTypeToSeType(fft_type)),
+      fft_length_(fft_length.begin(), fft_length.end()),
+      scale_factor_(1.0f),
+      input_buffer_(input_buffer),
+      output_buffer_(output_buffer),
+      input_shape_(input_shape),
+      output_shape_(output_shape) {}
+
+tensorflow::Status FftThunk::ExecuteOnStream(
+    const BufferAllocations& buffer_allocations, se::Stream* stream) {
+  VLOG(3) << "FFT type: " << FftTypeToString(fft_type_);
+  VLOG(3) << "Input shape: " << ShapeUtil::HumanStringWithLayout(input_shape_);
+  VLOG(3) << "Output shape: "
+          << ShapeUtil::HumanStringWithLayout(output_shape_);
+
+  FftScratchAllocator scratch_allocator(buffer_allocations.device_ordinal(),
+                                        buffer_allocations.memory_allocator());
+
+  if (fft_plan_ == nullptr) {
+    const int64 fft_rank = fft_length_.size();
+    CHECK_LE(fft_rank, 3);
+    int batch_size = 1;
+    for (int i = 0; i < input_shape_.dimensions_size() - fft_rank; ++i) {
+      batch_size *= input_shape_.dimensions(i);
+    }
+    uint64 fft_length[3];
+    uint64 input_embed[3];
+    const uint64 input_stride = 1;
+    uint64 input_distance = 1;
+    uint64 output_embed[3];
+    const uint64 output_stride = 1;
+    uint64 output_distance = 1;
+
+    for (int i = 0; i < fft_rank; ++i) {
+      auto dim_offset = input_shape_.dimensions_size() - fft_rank + i;
+      fft_length[i] = static_cast<uint64>(fft_length_[i]);
+      input_embed[i] = input_shape_.dimensions(dim_offset);
+      input_distance *= input_shape_.dimensions(dim_offset);
+      output_embed[i] = output_shape_.dimensions(dim_offset);
+      output_distance *= output_shape_.dimensions(dim_offset);
+    }
+
+    constexpr bool kInPlaceFft = false;
+    fft_plan_ =
+        stream->parent()->AsFft()->CreateBatchedPlanWithScratchAllocator(
+            stream, fft_rank, fft_length, input_embed, input_stride,
+            input_distance, output_embed, output_stride, output_distance,
+            fft_type_, kInPlaceFft, batch_size, &scratch_allocator);
+    scale_factor_ = 1.0f / output_distance;
+  } else {
+    stream->parent()->AsFft()->UpdatePlanWithScratchAllocator(
+        stream, fft_plan_.get(), &scratch_allocator);
+  }
+
+  bool launch_ok;
+  switch (fft_type_) {
+    case se::fft::Type::kC2CForward: {
+      se::DeviceMemory<complex64> input_data(
+          buffer_allocations.GetDeviceAddress(input_buffer_));
+      se::DeviceMemory<complex64> output_data(
+          buffer_allocations.GetDeviceAddress(output_buffer_));
+      launch_ok =
+          stream->ThenFft(fft_plan_.get(), input_data, &output_data).ok();
+      break;
+    }
+    case se::fft::Type::kC2CInverse: {
+      se::DeviceMemory<complex64> input_data(
+          buffer_allocations.GetDeviceAddress(input_buffer_));
+      se::DeviceMemory<complex64> output_data(
+          buffer_allocations.GetDeviceAddress(output_buffer_));
+      launch_ok =
+          stream->ThenFft(fft_plan_.get(), input_data, &output_data).ok();
+      if (launch_ok) {
+        launch_ok =
+            stream
+                ->ThenBlasScal(ShapeUtil::ElementsIn(output_shape_),
+                               complex64(scale_factor_), &output_data, 1)
+                .ok();
+      }
+      break;
+    }
+    case se::fft::Type::kR2C: {
+      se::DeviceMemory<float> input_data(
+          buffer_allocations.GetDeviceAddress(input_buffer_));
+      se::DeviceMemory<complex64> output_data(
+          buffer_allocations.GetDeviceAddress(output_buffer_));
+      launch_ok =
+          stream->ThenFft(fft_plan_.get(), input_data, &output_data).ok();
+      break;
+    }
+    case se::fft::Type::kC2R: {
+      se::DeviceMemory<complex64> input_data(
+          buffer_allocations.GetDeviceAddress(input_buffer_));
+      se::DeviceMemory<float> output_data(
+          buffer_allocations.GetDeviceAddress(output_buffer_));
+      launch_ok =
+          stream->ThenFft(fft_plan_.get(), input_data, &output_data).ok();
+      if (launch_ok) {
+        launch_ok = stream
+                        ->ThenBlasScal(ShapeUtil::ElementsIn(output_shape_),
+                                       scale_factor_, &output_data, 1)
+                        .ok();
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "unsupported fft type";
+  }
+  if (launch_ok) {
+    return tensorflow::Status::OK();
+  }
+  return InternalError("Unable to launch fft for thunk %p with type %s", this,
+                       FftTypeToString(fft_type_).c_str());
+}
+
+}  // namespace gpu
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/fft_thunk.h b/tensorflow/compiler/xla/service/gpu/fft_thunk.h
new file mode 100644
index 0000000000000000000000000000000000000000..52fb8c376d7acea0f15aaa865c23fa2382717338
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/fft_thunk.h
@@ -0,0 +1,98 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_FFT_THUNK_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_FFT_THUNK_H_
+
+#include "tensorflow/compiler/xla/service/buffer_assignment.h"
+#include "tensorflow/compiler/xla/service/gpu/buffer_allocations.h"
+#include "tensorflow/compiler/xla/service/gpu/gpu_executable.h"
+#include "tensorflow/compiler/xla/service/gpu/thunk.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/gtl/optional.h"
+#include "tensorflow/core/platform/stream_executor_no_cuda.h"
+
+namespace xla {
+namespace gpu {
+
+// A one-time scratch allocator for FFT. The scratch buffers allocated are
+// released on destruction.
+//
+// Not thread-safe in that AllocateBytes, destructor are not locked.
+class FftScratchAllocator : public perftools::gputools::ScratchAllocator {
+ public:
+  FftScratchAllocator(int device_ordinal,
+                      DeviceMemoryAllocator* memory_allocator);
+
+  ~FftScratchAllocator() override;
+
+  int64 GetMemoryLimitInBytes(perftools::gputools::Stream* stream) override;
+
+  int64 TotalAllocatedBytes() { return total_allocated_bytes_; }
+
+  perftools::gputools::port::StatusOr<perftools::gputools::DeviceMemory<uint8>>
+  AllocateBytes(perftools::gputools::Stream* stream, int64 byte_size) override;
+
+ private:
+  const int device_ordinal_;
+  DeviceMemoryAllocator* memory_allocator_;
+  std::vector<perftools::gputools::DeviceMemoryBase> allocated_buffers_;
+  int64 total_allocated_bytes_ = 0;
+};
+
+// This class stores everything that StreamExecutor needs to launch an FFT.
+// It is generated by IrEmitter.
+//
+// This is thread-compatible.
+class FftThunk : public Thunk {
+ public:
+  // Constructs a thunk for launching an FFT on a stream.
+  // Semantics of null hlo_instruction argument are as in Thunk.
+  FftThunk(FftType fft_type, tensorflow::gtl::ArraySlice<int64> fft_length,
+           const BufferAllocation::Slice& input_buffer,
+           const BufferAllocation::Slice& output_buffer,
+           const Shape& input_shape, const Shape& output_shape,
+           const HloInstruction* hlo);
+
+  FftThunk(const FftThunk&) = delete;             // Cannot share fft_plan_
+  FftThunk& operator=(const FftThunk&) = delete;  // Cannot share fft_plan_
+
+  // Does the FFT for the thunk on "stream".
+  tensorflow::Status ExecuteOnStream(
+      const BufferAllocations& buffer_allocations,
+      perftools::gputools::Stream* stream) override;
+
+ private:
+  const perftools::gputools::fft::Type fft_type_;
+  const std::vector<int64> fft_length_;
+
+  float scale_factor_;
+
+  std::unique_ptr<perftools::gputools::fft::Plan> fft_plan_;
+
+  const BufferAllocation::Slice input_buffer_;
+  const BufferAllocation::Slice output_buffer_;
+
+  const Shape input_shape_;
+  const Shape output_shape_;
+};
+
+}  // namespace gpu
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_FFT_THUNK_H_
diff --git a/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc b/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc
index e784046450ed1cca088770c65c786e80adda869f..8e3aebbc12b5e6d746700956b9743bc94db50167 100644
--- a/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc
+++ b/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc
@@ -264,9 +264,9 @@ tensorflow::Status GemmThunk::ExecuteOnStream(
 
   auto make_descriptor = [this](se::DeviceMemoryBase data, const Shape& shape,
                                 bool transpose) -> MatrixDescriptor {
-    bool is_row_major = shape.layout().minor_to_major(0) != 0;
-    bool layout_mismatch = shape.layout().minor_to_major(0) !=
-                           output_shape_.layout().minor_to_major(0);
+    bool is_row_major = LayoutUtil::Minor(shape.layout(), 0) != 0;
+    bool layout_mismatch = LayoutUtil::Minor(shape.layout(), 0) !=
+                           LayoutUtil::Minor(output_shape_.layout(), 0);
     return MatrixDescriptor(data, transpose ^ layout_mismatch,
                             shape.dimensions(is_row_major),
                             shape.dimensions(!is_row_major));
@@ -320,7 +320,7 @@ tensorflow::Status GemmThunk::ExecuteOnStream(
   };
 
   bool launch_ok;
-  if (output_shape_.layout().minor_to_major(0) == 0) {
+  if (LayoutUtil::Minor(output_shape_.layout(), 0) == 0) {
     launch_ok = launch(
         lhs_descriptor, rhs_descriptor,
         MatrixDescriptor(output_data, false, output_num_rows, output_num_cols),
diff --git a/tensorflow/compiler/xla/service/gpu/gemm_thunk.h b/tensorflow/compiler/xla/service/gpu/gemm_thunk.h
index 983cb872924f22be0dfad8aa9ad86f233b909c46..8c6a1f51a8a09ef78950dfe7e89994a3fe247f49 100644
--- a/tensorflow/compiler/xla/service/gpu/gemm_thunk.h
+++ b/tensorflow/compiler/xla/service/gpu/gemm_thunk.h
@@ -52,6 +52,15 @@ class GemmThunk : public Thunk {
       const BufferAllocations& buffer_allocations,
       perftools::gputools::Stream* stream) override;
 
+  // Returns true if we'll perform autotuning if run on the given stream.  If
+  // so, we want the GPU to be quiescent during autotuning, so as not to
+  // introduce noise in our results.
+  bool ShouldHaltAllActivityBeforeRunning(
+      perftools::gputools::Stream* stream) override {
+    return autotune_results_.count(
+               stream->parent()->GetDeviceDescription().name()) != 0;
+  }
+
  private:
   const BufferAllocation::Slice lhs_buffer_;
   const BufferAllocation::Slice rhs_buffer_;
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
index fcd73fd37a2d9ae3c24b56970e3e992da5944682..89acac2c3ff77a93b6cf3b871a130dcd7edecf30 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
@@ -18,30 +18,36 @@ limitations under the License.
 #include <stdlib.h>
 #include <atomic>
 #include <functional>
+#include <mutex>  // NOLINT(build/c++11): only using std::call_once, not mutex.
 #include <utility>
 
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Verifier.h"
 #include "tensorflow/compiler/xla/protobuf_util.h"
 #include "tensorflow/compiler/xla/ptr_util.h"
 #include "tensorflow/compiler/xla/service/algebraic_simplifier.h"
-#include "tensorflow/compiler/xla/service/batchnorm_rewriter.h"
+#include "tensorflow/compiler/xla/service/batchnorm_expander.h"
 #include "tensorflow/compiler/xla/service/buffer_assignment.h"
 #include "tensorflow/compiler/xla/service/buffer_liveness.h"
 #include "tensorflow/compiler/xla/service/call_inliner.h"
+#include "tensorflow/compiler/xla/service/dot_decomposer.h"
 #include "tensorflow/compiler/xla/service/flatten_call_graph.h"
 #include "tensorflow/compiler/xla/service/gpu/convolution_folding.h"
+#include "tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.h"
 #include "tensorflow/compiler/xla/service/gpu/fusion_merger.h"
+#include "tensorflow/compiler/xla/service/gpu/gpu_constants.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_executable.h"
+#include "tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h"
+#include "tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h"
 #include "tensorflow/compiler/xla/service/gpu/hlo_schedule.h"
 #include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emitter.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emitter_context.h"
-#include "tensorflow/compiler/xla/service/gpu/layout_assignment.h"
 #include "tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.h"
 #include "tensorflow/compiler/xla/service/gpu/pad_insertion.h"
 #include "tensorflow/compiler/xla/service/gpu/partition_assignment.h"
@@ -64,6 +70,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/transpose_folding.h"
 #include "tensorflow/compiler/xla/service/tuple_simplifier.h"
 #include "tensorflow/compiler/xla/service/while_loop_simplifier.h"
+#include "tensorflow/compiler/xla/service/zero_sized_hlo_elimination.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
@@ -74,9 +81,11 @@ limitations under the License.
 #include "tensorflow/core/platform/cuda_libdevice_path.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/regexp.h"
 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
 #include "tensorflow/core/platform/subprocess.h"
 #include "tensorflow/core/platform/tracing.h"
+#include "tensorflow/stream_executor/cuda/cuda_diagnostics.h"
 
 namespace se = ::perftools::gputools;
 
@@ -90,14 +99,6 @@ namespace gpu {
 namespace {
 
 using tensorflow::port::Tracing;
-using tensorflow::strings::StrCat;
-
-// Any address of a variable residing in global memory or returned by one of the
-// memory allocation routines from the driver or runtime API is always aligned
-// to at least 256 bytes.
-//
-// http://docs.nvidia.com/cuda/cuda-c-programming-guide/#device-memory-accesses
-constexpr int64 kMemoryAlignment = 256;
 
 // Returns the directory containing nvvm libdevice files.  config_cuda_data_dir
 // should be equal to config().debug_options().xla_gpu_cuda_data_dir() of the
@@ -125,31 +126,39 @@ string GetLibdeviceDir(const string& config_cuda_data_dir) {
 }
 
 // Runs optimization passes on the given HLO module.
-tensorflow::Status OptimizeHloModule(
-    HloModule* hlo_module,
-    const HloCostAnalysis::ShapeSizeFunction& shape_size_function) {
+tensorflow::Status OptimizeHloModule(HloModule* hlo_module) {
   {
     HloPassPipeline pipeline("optimization");
-    pipeline.AddInvariantChecker<HloVerifier>(shape_size_function);
+    pipeline.AddInvariantChecker<HloVerifier>();
+    pipeline.AddPass<GpuHloSupportChecker>();
     ReducePrecisionInsertion::AddPasses(
         &pipeline, hlo_module->config().debug_options(),
         ReducePrecisionInsertion::PassTiming::BEFORE_OPTIMIZATION);
 
     // TODO(b/64094172): make Call work on GPU instead of inlining.
     pipeline.AddPass<CallInliner>();
-
+    pipeline.AddPass<DotDecomposer>();
     {
       auto& pass =
           pipeline.AddPass<HloPassFix<HloPassPipeline>>("simplification");
-      pass.AddInvariantChecker<HloVerifier>(shape_size_function);
+      pass.AddInvariantChecker<HloVerifier>();
 
-      // TODO(b/62764704): Do not rewrite on GPU, use cuDNN's BatchNorm APIs
-      // instead.
-      pass.AddPass<BatchNormRewriter>(
+      // If cudnn batchnorms are enabled, rewrite batchnorm HLOs to cudnn calls
+      // where possible.  Not every batchnorm op can be implemented as a call to
+      // cudnn, so decompose any remaining batchnorm ops into a soup of HLOs.
+      if (hlo_module->config().debug_options().xla_gpu_use_cudnn_batchnorm()) {
+        pass.AddPass<CudnnBatchNormRewriter>();
+      }
+      pass.AddPass<BatchNormExpander>(
           /*rewrite_training_op=*/true,
           /*rewrite_inference_op=*/true,
           /*rewrite_grad_op=*/true,
           /*use_fusion=*/false);
+
+      // BatchNormExpander can create zero-sized ops, so zero-sized HLO
+      // elimination has to come after that pass.
+      pipeline.AddPass<ZeroSizedHloElimination>();
+
       pass.AddPass<AlgebraicSimplifier>(
           /*is_layout_sensitive=*/false,
           [](const Shape&, const Shape&) { return false; });
@@ -173,14 +182,14 @@ tensorflow::Status OptimizeHloModule(
   }
   {
     HloPassFix<HloPassPipeline> fusion("fusion");
-    fusion.AddInvariantChecker<HloVerifier>(shape_size_function);
+    fusion.AddInvariantChecker<HloVerifier>();
     fusion.AddPass<GpuInstructionFusion>(/*may_duplicate=*/false);
     fusion.AddPass<GpuInstructionFusion>(/*may_duplicate=*/true);
     fusion.AddPass<FusionMerger>();
     TF_RETURN_IF_ERROR(fusion.Run(hlo_module).status());
 
     HloPassPipeline reduce_pipeline("reduce-precision");
-    reduce_pipeline.AddInvariantChecker<HloVerifier>(shape_size_function);
+    reduce_pipeline.AddInvariantChecker<HloVerifier>();
     ReducePrecisionInsertion::AddPasses(
         &reduce_pipeline, hlo_module->config().debug_options(),
         ReducePrecisionInsertion::PassTiming::AFTER_FUSION);
@@ -198,16 +207,14 @@ tensorflow::Status OptimizeHloModule(
 
 // Modifies the given HLO module so that it will be accepted by IrEmitter.
 // Unlike optimization passes, the passes are necessary for correctness.
-tensorflow::Status PrepareHloModuleForIrEmitting(
-    HloModule* hlo_module,
-    const HloCostAnalysis::ShapeSizeFunction& shape_size_function) {
+tensorflow::Status PrepareHloModuleForIrEmitting(HloModule* hlo_module) {
   // In some cases, we have to place the result of an instruction in a temporary
   // buffer. For instance, the buffer that holds an external parameter is
   // assumed immutable at this point, and should not be reused for output
   // (b/27180329). Therefore, in that case, we set the output to be a copy of
   // the parameter.
   HloPassPipeline pipeline("GPU-ir-emit-prepare");
-  pipeline.AddInvariantChecker<HloVerifier>(shape_size_function);
+  pipeline.AddInvariantChecker<HloVerifier>();
   pipeline.AddPass<PadInsertion>();
   pipeline.AddPass<GpuLayoutAssignment>(
       hlo_module->mutable_entry_computation_layout());
@@ -229,6 +236,93 @@ tensorflow::Status PrepareHloModuleForIrEmitting(
   return pipeline.Run(hlo_module).status();
 }
 
+// Prints a warning if the ptxas at ptxas_path has known bugs.
+//
+// Only prints a warning the first time it's called for a particular value of
+// ptxas_path.
+void WarnIfBadPtxasVersion(const string& ptxas_path) {
+  static tensorflow::mutex mu(tensorflow::LINKER_INITIALIZED);
+  static std::unordered_set<string>* seen_ptxas_paths GUARDED_BY(mu) =
+      new std::unordered_set<string>();
+
+  tensorflow::mutex_lock lock(mu);
+  if (!seen_ptxas_paths->insert(ptxas_path).second) {
+    // Already checked this ptx binary, nothing to do.
+    return;
+  }
+
+  tensorflow::SubProcess ptxas;
+  ptxas.SetProgram(ptxas_path, {ptxas_path, "--version"});
+  ptxas.SetChannelAction(tensorflow::CHAN_STDOUT, tensorflow::ACTION_PIPE);
+  if (!ptxas.Start()) {
+    LOG(WARNING) << "Couldn't invoke " << ptxas_path << " --version";
+    return;
+  }
+
+  string out;
+  int exit_code = ptxas.Communicate(/*stdin_input=*/nullptr, &out,
+                                    /*stderr_output=*/nullptr);
+  if (exit_code != 0) {
+    LOG(WARNING) << "Running " << ptxas_path << " --version returned "
+                 << exit_code;
+    return;
+  }
+
+  int64 vmaj, vmin, vdot;
+  string vmaj_str, vmin_str, vdot_str;
+  if (!RE2::PartialMatch(out, R"(\bV(\d+)\.(\d+)\.(\d+)\b)", &vmaj_str,
+                         &vmin_str, &vdot_str) ||
+      !tensorflow::strings::safe_strto64(vmaj_str, &vmaj) ||
+      !tensorflow::strings::safe_strto64(vmin_str, &vmin) ||
+      !tensorflow::strings::safe_strto64(vdot_str, &vdot)) {
+    LOG(WARNING) << "Couldn't parse ptxas version in output of " << ptxas_path
+                 << " --version:\n"
+                 << out;
+    return;
+  }
+
+  // ptxas 9.0 before 9.0.276 miscompiles some address calculations with large
+  // offsets (e.g. "load ptr + large_constant"), b/70245379.
+  if (vmaj == 9 && vmin == 0 && vdot < 276) {
+    LOG(WARNING) << "*** WARNING *** You are using ptxas " << vmaj << "."
+                 << vmin << "." << vdot
+                 << ", which is in range [9.0.0, 9.0.276). These versions are "
+                    "known to miscompile XLA code, leading to incorrect "
+                    "results or invalid-address errors.";
+  }
+}
+
+// Prints a warning if the ptx->sass JIT in the driver has known bugs.
+//
+// Using such a driver only a problem if we fail to use ptxas to compile our ptx
+// and have to use the driver instead, so you should only call this function if
+// we're going to use the driver JIT.
+//
+// Only prints a warning the first time it's called.
+void WarnIfBadDriverJITVersion() {
+  static std::once_flag run_once;
+  std::call_once(run_once, [] {
+    auto version_or_status = se::cuda::Diagnostician::FindKernelDriverVersion();
+    if (!version_or_status.ok()) {
+      LOG(WARNING) << "Couldn't read CUDA driver version.";
+      return;
+    }
+    se::cuda::DriverVersion version = version_or_status.ValueOrDie();
+
+    // The driver JIT in 384 before 384.108 miscompiles some address
+    // calculations with large offsets (e.g. "load ptr + large_constant"),
+    // b/70245379.
+    if (std::get<0>(version) == 384 && std::get<1>(version) < 108) {
+      LOG(WARNING)
+          << "*** WARNING *** Invoking the PTX->SASS JIT from driver version "
+          << se::cuda::DriverVersionToString(version)
+          << ", which is in range [384.0.0, 384.108.0). These versions are "
+             "known to miscompile XLA code, leading to incorrect results or "
+             "invalid-address errors.";
+    }
+  });
+}
+
 // Compiles the given PTX string using ptxas and returns the resulting machine
 // code (i.e. a cubin) as a byte array.
 StatusOr<std::vector<uint8>> CompilePtx(const string& ptx, int cc_major,
@@ -240,6 +334,8 @@ StatusOr<std::vector<uint8>> CompilePtx(const string& ptx, int cc_major,
   auto env = tensorflow::Env::Default();
   TF_RETURN_IF_ERROR(env->FileExists(ptxas_path));
 
+  WarnIfBadPtxasVersion(ptxas_path);
+
   // Write ptx into a temporary file.
   string ptx_path;
   if (!env->LocalTempFilename(&ptx_path)) {
@@ -263,8 +359,9 @@ StatusOr<std::vector<uint8>> CompilePtx(const string& ptx, int cc_major,
     tensorflow::Env::Default()->DeleteFile(cubin_path).IgnoreError();
   });
   tensorflow::SubProcess ptxas_info_dumper;
-  std::vector<string> ptxas_args = {ptxas_path, ptx_path, "-o", cubin_path,
-                                    StrCat("-arch=sm_", cc_major, cc_minor)};
+  std::vector<string> ptxas_args = {
+      ptxas_path, ptx_path, "-o", cubin_path,
+      tensorflow::strings::StrCat("-arch=sm_", cc_major, cc_minor)};
   if (VLOG_IS_ON(2)) {
     ptxas_args.push_back("-v");
   }
@@ -294,14 +391,15 @@ StatusOr<std::vector<uint8>> CompilePtx(const string& ptx, int cc_major,
 }  // namespace
 
 GpuCompiler::GpuCompiler()
-    : pointer_size_(llvm::DataLayout(kDataLayout).getPointerSize()) {}
+    : pointer_size_(llvm::DataLayout(kDataLayout)
+                        .getPointerSize(0 /* default address space */)) {}
 
 StatusOr<std::unique_ptr<HloModule>> GpuCompiler::RunHloPasses(
     std::unique_ptr<HloModule> module, se::StreamExecutor* /*stream_exec*/) {
   XLA_SCOPED_LOGGING_TIMER("GpuCompiler::RunHloPasses");
   Tracing::TraceMe annotation("HLO Transforms", module->name(),
                               /*is_expensive=*/true);
-  TF_RETURN_IF_ERROR(OptimizeHloModule(module.get(), ShapeSizeBytesFunction()));
+  TF_RETURN_IF_ERROR(OptimizeHloModule(module.get()));
   return std::move(module);
 }
 
@@ -311,8 +409,7 @@ StatusOr<std::unique_ptr<Executable>> GpuCompiler::RunBackend(
 
   TF_RET_CHECK(stream_exec != nullptr);
 
-  TF_RETURN_IF_ERROR(
-      PrepareHloModuleForIrEmitting(module.get(), ShapeSizeBytesFunction()));
+  TF_RETURN_IF_ERROR(PrepareHloModuleForIrEmitting(module.get()));
 
   llvm::LLVMContext llvm_context;
   std::string buffer;
@@ -343,8 +440,9 @@ StatusOr<std::unique_ptr<Executable>> GpuCompiler::RunBackend(
   TF_ASSIGN_OR_RETURN(
       std::unique_ptr<BufferAssignment> buffer_assignment,
       BufferAssigner::Run(module.get(), hlo_schedule->ConsumeHloOrdering(),
-                          BufferSizeBytesFunction(), [](LogicalBuffer::Color) {
-                            return kMemoryAlignment;
+                          BufferSizeBytesFunction(),
+                          /*color_alignment=*/[](LogicalBuffer::Color) {
+                            return kCudaMallocAlignBytes;
                           }));
   // BufferAssignment::ToString() includes a header, so no need for us to
   // print one ourselves.
@@ -393,6 +491,20 @@ StatusOr<std::unique_ptr<Executable>> GpuCompiler::RunBackend(
         /*optimized=*/false));
   }
 
+  {
+    XLA_SCOPED_LOGGING_TIMER("GpuCompiler::RunBackend - Running LLVM verifier");
+
+    std::string err;
+    llvm::raw_string_ostream err_stream(err);
+
+    // verifyModule() returns true if the module is broken.
+    TF_RET_CHECK(!llvm::verifyModule(llvm_module, &err_stream))
+        << "Invalid LLVM IR before optimizations:\n"
+        << err_stream.str()
+        << "\nThis probably indicates a bug in the HLO -> LLVM IR lowering. "
+           "Rerun with --xla_dump_ir_to to get the IR. ";
+  }
+
   string libdevice_dir;
   {
     tensorflow::mutex_lock lock(mutex_);
@@ -443,7 +555,7 @@ StatusOr<std::unique_ptr<Executable>> GpuCompiler::RunBackend(
   // Write PTX to IR dump directory, if IR dumping was requested.
   if (!ir_dump_directory.empty()) {
     const string ptx_outfile = tensorflow::io::JoinPath(
-        ir_dump_directory, StrCat(module->name(), ".ptx"));
+        ir_dump_directory, tensorflow::strings::StrCat(module->name(), ".ptx"));
     auto status = [&] {
       auto* env = tensorflow::Env::Default();
       TF_RETURN_IF_ERROR(env->RecursivelyCreateDir(ir_dump_directory));
@@ -470,6 +582,7 @@ StatusOr<std::unique_ptr<Executable>> GpuCompiler::RunBackend(
 
   if (module->config().hlo_profiling_enabled()) {
     HloCostAnalysis cost_analysis(ShapeSizeBytesFunction());
+    TF_RETURN_IF_ERROR(module->entry_computation()->Accept(&cost_analysis));
     profile_index_map = MakeUnique<HloProfileIndexMap>(*module);
     profile_printer =
         CreateHloProfilePrinter(*profile_index_map, cost_analysis);
@@ -541,6 +654,10 @@ std::vector<uint8> GpuCompiler::CompilePtxOrGetCachedResult(const string& ptx,
                    "GPU driver compile the ptx. "
                 << maybe_cubin.status();
           }
+
+          // We're going to use the driver to JIT our PTX->SASS, so warn if
+          // the JIT in the driver has known bugs.
+          WarnIfBadDriverJITVersion();
         }
       }
       cache_value->compilation_done = true;
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_constants.cc b/tensorflow/compiler/xla/service/gpu/gpu_constants.cc
new file mode 100644
index 0000000000000000000000000000000000000000..aa360c7f73de2f0f9cf59c22b552b8e60ddb3a87
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/gpu_constants.cc
@@ -0,0 +1,25 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/gpu/gpu_constants.h"
+
+namespace xla {
+namespace gpu {
+
+// http://docs.nvidia.com/cuda/cuda-c-programming-guide/#device-memory-accesses
+const int64 kCudaMallocAlignBytes = 256;
+
+}  // namespace gpu
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_constants.h b/tensorflow/compiler/xla/service/gpu/gpu_constants.h
new file mode 100644
index 0000000000000000000000000000000000000000..572c85628278752f924b90dbb7134c5fc8fb9740
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/gpu_constants.h
@@ -0,0 +1,31 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_CONSTANTS_H_
+#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_CONSTANTS_H_
+
+#include "tensorflow/compiler/xla/types.h"
+
+namespace xla {
+namespace gpu {
+
+// Minimum alignment of cudaMalloc.  We require that buffers created by our
+// DeviceMemoryAllocator, and all input/output buffers, have this alignment.
+extern const int64 kCudaMallocAlignBytes;
+
+}  // namespace gpu
+}  // namespace xla
+
+#endif  // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_CONSTANTS_H_
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc b/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc
index 33d739b79d3664fec3586bbc924b7fa2e10d3256..e67087d822e2f3367c48b08be66f5f60791be638 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc
@@ -55,20 +55,33 @@ StatusOr<bool> GpuCopyInsertion::Run(HloModule* module) {
   // in IR.
   for (HloInstruction* hlo :
        module->entry_computation()->MakeInstructionPostOrder()) {
-    if (ImplementedAsLibraryCall(*hlo)) {
+    // Inserts a copy of hlo->operand(n) if it's a constant.
+    auto copy_operand_if_constant = [&](int64 n) -> Status {
+      HloInstruction* operand = hlo->mutable_operand(n);
+      TF_RET_CHECK(ShapeUtil::IsArray(operand->shape()));
+      const auto& values = dataflow->GetValueSet(operand).values();
+      if (std::any_of(values.begin(), values.end(), [](const HloValue* value) {
+            return value->defining_instruction()->opcode() ==
+                   HloOpcode::kConstant;
+          })) {
+        TF_ASSIGN_OR_RETURN(HloInstruction * copy, FindOrInsertCopy(operand));
+        TF_RETURN_IF_ERROR(hlo->ReplaceOperandWith(n, copy));
+        changed = true;
+      }
+      return Status::OK();
+    };
+
+    if (IsCustomCallToDnnBatchNorm(*hlo)) {
+      // The epsilon and feature_index operands to a CUDNN batchnorm op don't
+      // need to be materialized in memory -- in fact, they must be constants.
+      // These are the last two operands of all three batchnorm ops.
+      for (int64 i = 0; i < hlo->operand_count() - 2; ++i) {
+        TF_RETURN_IF_ERROR(copy_operand_if_constant(i));
+      }
+    } else if (ImplementedAsLibraryCall(*hlo)) {
+      // For all other library calls, materialize all the operands into memory.
       for (int64 i = 0; i < hlo->operand_count(); ++i) {
-        HloInstruction* operand = hlo->mutable_operand(i);
-        TF_RET_CHECK(ShapeUtil::IsArray(operand->shape()));
-        const auto& values = dataflow->GetValueSet(operand).values();
-        if (std::any_of(values.begin(), values.end(),
-                        [](const HloValue* value) {
-                          return value->defining_instruction()->opcode() ==
-                                 HloOpcode::kConstant;
-                        })) {
-          TF_ASSIGN_OR_RETURN(HloInstruction * copy, FindOrInsertCopy(operand));
-          TF_RETURN_IF_ERROR(hlo->ReplaceOperandWith(i, copy));
-          changed = true;
-        }
+        TF_RETURN_IF_ERROR(copy_operand_if_constant(i));
       }
     }
   }
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
index 0fd85e4fb057f144df93d53485570d67c66af0d4..51d164cdf427f9513bc340e090832a9b064b999c 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
@@ -66,10 +66,12 @@ class HloExecutionProfiler {
 
   // If profiling is enabled, sets the total cycle count on the profile from the
   // execution timer.
-  ~HloExecutionProfiler() {
+  void FinishExecution() {
+    CHECK(!finished_execution_) << "Call FinishExecution only once!";
+    finished_execution_ = true;
     if (do_profile_) {
       stream_->ThenStopTimer(execution_timer_.get());
-      stream_->BlockHostUntilDone();
+      stream_->BlockHostUntilDone().IgnoreError();
       profile_->set_total_cycles_executed(
           *computation_, execution_timer_->Nanoseconds() * clock_rate_ghz_);
     }
@@ -87,7 +89,7 @@ class HloExecutionProfiler {
   void FinishOperation(const HloInstruction* hlo_instruction) {
     if (do_profile_) {
       stream_->ThenStopTimer(per_op_timer_.get());
-      stream_->BlockHostUntilDone();
+      stream_->BlockHostUntilDone().IgnoreError();
       profile_->SetCyclesTakenBy(
           hlo_instruction, per_op_timer_->Nanoseconds() * clock_rate_ghz_);
     }
@@ -101,6 +103,7 @@ class HloExecutionProfiler {
   const HloComputation* computation_;
   std::unique_ptr<se::Timer> execution_timer_;
   std::unique_ptr<se::Timer> per_op_timer_;
+  bool finished_execution_ = false;
 };
 
 }  // namespace
@@ -143,9 +146,12 @@ Status GpuExecutable::ExecuteThunks(
   if (do_profile) {
     LOG(WARNING) << "PROFILING: profiling is enabled";
   }
+
   HloExecutionProfiler profiler(do_profile, hlo_execution_profile, main_stream,
                                 hlo_module_->entry_computation());
 
+  uint64 start_micros = tensorflow::Env::Default()->NowMicros();
+
   // Stream 0 indicates `main_stream` and substreams start from stream 1.
   std::vector<Pool<se::Stream>::SmartPtr> sub_streams;
   while (sub_streams.size() + 1 < thunk_schedule_->StreamCount()) {
@@ -155,6 +161,9 @@ Status GpuExecutable::ExecuteThunks(
         run_options->BorrowStream(main_stream->parent()->device_ordinal()));
   }
 
+  // The next event enqueued on stream N must not run until the thunk at
+  // last_blocking_thunk_for_stream[N] completes.
+  std::map<int32, const Thunk*> last_blocking_thunk_for_stream;
   std::map<const Thunk*, std::unique_ptr<se::Event>> thunk_to_finish_event;
   for (Thunk* thunk : thunk_schedule_->TotalOrder()) {
     TF_RETURN_IF_ERROR(thunk->Initialize(*this));
@@ -167,15 +176,41 @@ Status GpuExecutable::ExecuteThunks(
       stream->ThenWaitFor(FindOrDie(thunk_to_finish_event, dependency).get());
     }
 
+    if (last_blocking_thunk_for_stream.count(stream_no)) {
+      stream->ThenWaitFor(FindOrDie(thunk_to_finish_event,
+                                    last_blocking_thunk_for_stream[stream_no])
+                              .get());
+      last_blocking_thunk_for_stream.erase(stream_no);
+    }
+
+    // If this thunk requests it, wait for all currently-executing thunks to
+    // finish.  This is useful e.g. if the thunk is about to perform autotuning.
+    if (thunk->ShouldHaltAllActivityBeforeRunning(stream)) {
+      TF_RETURN_IF_ERROR(main_stream->BlockHostUntilDone());
+      last_blocking_thunk_for_stream.clear();
+    }
+
     profiler.StartOperation();
     VLOG(2) << "Executing the thunk for "
-            << thunk->hlo_instruction()->ToString();
+            << thunk->hlo_instruction()->ToString() << " on stream "
+            << stream_no;
     TF_RETURN_IF_ERROR(thunk->ExecuteOnStream(buffer_allocations, stream));
-    if (thunk_schedule_->Depended(thunk)) {
+    if (thunk_schedule_->Depended(thunk) || thunk->ShouldBlockFutureThunks()) {
       auto finish_event = MakeUnique<se::Event>(main_stream->parent());
       finish_event->Init();
       stream->ThenRecordEvent(finish_event.get());
       thunk_to_finish_event[thunk] = std::move(finish_event);
+
+      if (thunk->ShouldBlockFutureThunks()) {
+        // Set last_blocking_thunk_for_stream on all streams other than this one
+        // so that all other streams will wait for this thunk to complete before
+        // executing any events that occur later in the total order.
+        for (int32 i = 0; i < sub_streams.size() + 1; ++i) {
+          if (i != stream_no) {
+            last_blocking_thunk_for_stream[i] = thunk;
+          }
+        }
+      }
     }
     profiler.FinishOperation(thunk->hlo_instruction());
   }
@@ -184,90 +219,32 @@ Status GpuExecutable::ExecuteThunks(
   // Make sure kernels are completed before deallocating temporary buffers.
   // TODO(b/30100571): we could potentially postpone deallocating the temp
   // buffers until a different computation is executed.
-  if (block_host_until_done && !main_stream->BlockHostUntilDone()) {
-    return InternalError("Failed to complete all kernels launched on stream %p",
-                         main_stream);
+  if (block_host_until_done) {
+    Status block_status = main_stream->BlockHostUntilDone();
+    if (!block_status.ok()) {
+      return InternalError(
+          "Failed to complete all kernels launched on stream %p: %s",
+          main_stream, block_status.error_message().c_str());
+    }
   }
 
-  return Status::OK();
-}
+  profiler.FinishExecution();
+  uint64 end_micros = tensorflow::Env::Default()->NowMicros();
 
-StatusOr<se::DeviceMemoryBase> GpuExecutable::ExecuteOnStream(
-    const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments,
-    HloExecutionProfile* hlo_execution_profile) {
-  se::Stream* stream = run_options->stream();
-  DeviceMemoryAllocator* memory_allocator = run_options->allocator();
+  {
+    tensorflow::mutex_lock lock(mutex_);
+    const double nanoseconds = (end_micros - start_micros) * 1000.0;
+    execution_profile_.set_compute_time_ns(std::max(nanoseconds, 1.0));
 
-  BufferAllocations::Builder buffer_allocations_builder;
-  for (BufferAllocation::Index i = 0; i < assignment_->Allocations().size();
-       ++i) {
-    const BufferAllocation& allocation = assignment_->GetAllocation(i);
-    if (allocation.is_entry_computation_parameter()) {
-      buffer_allocations_builder.RegisterBuffer(
-          i, arguments[allocation.parameter_number()]);
+    // If hlo profiling was disabled then the cycle count is left empty.
+    if (do_profile) {
+      execution_profile_.set_compute_cycle_count(
+          hlo_execution_profile->total_cycles_executed(
+              *module().entry_computation()));
     }
   }
-  se::StreamExecutor* executor = stream->parent();
-  TF_ASSIGN_OR_RETURN(
-      auto buffer_allocations,
-      buffer_allocations_builder.Build(*assignment_, executor->device_ordinal(),
-                                       memory_allocator));
 
-  bool block_host_until_done =
-      !memory_allocator->AllowsAsynchronousDeallocation();
-  TF_RETURN_IF_ERROR(ExecuteThunks(run_options, *buffer_allocations,
-                                   block_host_until_done,
-                                   hlo_execution_profile));
-
-  HloInstruction* root = hlo_module_->entry_computation()->root_instruction();
-  TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice output_slice,
-                      assignment_->GetUniqueTopLevelOutputSlice());
-  se::DeviceMemoryBase output_buffer_address =
-      buffer_allocations->GetDeviceAddress(output_slice.index());
-
-  if (ShapeUtil::IsTuple(root->shape())) {
-    std::set<se::DeviceMemoryBase> referred_by_output;
-    if (GetRootPointsToSet().IsAmbiguous()) {
-      // The points-to set of the root is ambiguous so we need to examine the
-      // result data to determine which buffers are contained in the result.
-      TF_ASSIGN_OR_RETURN(
-          TransferManager * transfer_manager,
-          TransferManager::GetForPlatform(executor->platform()));
-      TF_ASSIGN_OR_RETURN(referred_by_output,
-                          transfer_manager->GatherBufferPointersFromTuple(
-                              executor, output_buffer_address, root->shape()));
-    } else {
-      // The points-to set of the root is unambiguous so it's known statically
-      // which buffers are in the result. Gather these buffers using the root's
-      // points-to set.
-      TF_RETURN_IF_ERROR(GetRootPointsToSet().ForEachElementWithStatus(
-          [&referred_by_output, &buffer_allocations, this](
-              const ShapeIndex& /*index*/,
-              const PointsToSet::BufferList& buffers) {
-            // The points to set is unambiguous so the set should be a
-            // singleton. That is, we know exactly which instruction produced
-            // the array at this element.
-            CHECK_EQ(1, buffers.size());
-            HloInstruction* hlo = buffers[0]->instruction();
-            TF_ASSIGN_OR_RETURN(
-                const BufferAllocation::Slice slice,
-                this->assignment_->GetUniqueSlice(hlo, buffers[0]->index()));
-            CHECK(!slice.allocation()->is_entry_computation_parameter());
-            referred_by_output.insert(
-                buffer_allocations->GetDeviceAddress(slice.index()));
-            return Status::OK();
-          }));
-    }
-    TF_RETURN_IF_ERROR(
-        buffer_allocations->TearDown(referred_by_output, *assignment_));
-  } else {
-    // If the computation result is not a tuple, we can delete all temporary
-    // buffers that are not the output.
-    TF_RETURN_IF_ERROR(
-        buffer_allocations->TearDown({output_buffer_address}, *assignment_));
-  }
-  return output_buffer_address;
+  return Status::OK();
 }
 
 StatusOr<std::unique_ptr<ShapedBuffer>> GpuExecutable::ExecuteOnStream(
@@ -287,7 +264,7 @@ StatusOr<std::unique_ptr<ShapedBuffer>> GpuExecutable::ExecuteOnStream(
     if (allocation.is_entry_computation_parameter()) {
       auto param_no = allocation.parameter_number();
       buffer_allocations_builder.RegisterBuffer(
-          i, arguments[param_no]->buffer(/*index=*/{}));
+          i, arguments[param_no]->root_buffer());
     }
   }
   se::StreamExecutor* executor = run_options->stream()->parent();
@@ -305,50 +282,46 @@ StatusOr<std::unique_ptr<ShapedBuffer>> GpuExecutable::ExecuteOnStream(
   HloInstruction* root = hlo_module_->entry_computation()->root_instruction();
   auto device_ordinal = executor->device_ordinal();
   auto shaped_buffer = MakeUnique<ShapedBuffer>(
-      root->shape(), executor->platform(), device_ordinal);
+      root->shape(), root->shape(), executor->platform(), device_ordinal);
 
   // Copy DeviceMemoryBase values which contain the array(s) of the result into
   // the respective location in ShapedBuffer.
   std::set<se::DeviceMemoryBase> buffers_in_result;
-  TF_RETURN_IF_ERROR(
-      shaped_buffer->mutable_shape_index_to_buffer_entry()
-          ->ForEachMutableElementWithStatus(
-              [&buffer_allocations, &buffers_in_result, &shaped_buffer, this](
-                  const ShapeIndex& index, size_t* buffer_entry) {
-                const auto& sources = this->GetRootPointsToSet().element(index);
-                // The points-to set is unambiguous so the set should be a
-                // singleton. That is, we know exactly which instruction
-                // produced the array at this element.
-                CHECK_EQ(1, sources.size());
-                auto src_hlo = sources[0]->instruction();
-
-                VLOG(4) << "Looking at: " << sources[0];
-
-                // The source instruction should have a non-parameter buffer
-                // assigned.
-                TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice slice,
-                                    this->assignment_->GetUniqueSlice(
-                                        src_hlo, sources[0]->index()));
-                CHECK(!slice.allocation()->is_entry_computation_parameter());
-
-                perftools::gputools::DeviceMemoryBase src_base =
-                    buffer_allocations->GetDeviceAddress(slice.index());
-                CHECK(!src_base.is_null() || src_base.size() == 0);
-                shaped_buffer->mutable_buffers()->push_back(src_base);
-                *buffer_entry = shaped_buffer->mutable_buffers()->size() - 1;
-
-                buffers_in_result.insert(src_base);
-                return Status::OK();
-              }));
+  TF_RETURN_IF_ERROR(shaped_buffer->buffers().ForEachMutableElementWithStatus(
+      [&buffer_allocations, &buffers_in_result, &shaped_buffer, this](
+          const ShapeIndex& index, se::DeviceMemoryBase* device_memory) {
+        const auto& sources = this->GetRootPointsToSet().element(index);
+        // The points-to set is unambiguous so the set should be a
+        // singleton. That is, we know exactly which instruction
+        // produced the array at this element.
+        CHECK_EQ(1, sources.size());
+        auto src_hlo = sources[0]->instruction();
+
+        VLOG(4) << "Looking at: " << sources[0];
+
+        // The source instruction should have a non-parameter buffer
+        // assigned.
+        TF_ASSIGN_OR_RETURN(
+            const BufferAllocation::Slice slice,
+            this->assignment_->GetUniqueSlice(src_hlo, sources[0]->index()));
+        CHECK(!slice.allocation()->is_entry_computation_parameter());
+
+        perftools::gputools::DeviceMemoryBase src_base =
+            buffer_allocations->GetDeviceAddress(slice.index());
+        CHECK(!src_base.is_null() || src_base.size() == 0);
+        *device_memory = src_base;
+        buffers_in_result.insert(src_base);
+        return Status::OK();
+      }));
   TF_RETURN_IF_ERROR(
       buffer_allocations->TearDown(buffers_in_result, *assignment_));
 
   return std::move(shaped_buffer);
 }
 
-StatusOr<se::DeviceMemoryBase> GpuExecutable::ExecuteAsyncOnStream(
+StatusOr<std::unique_ptr<ShapedBuffer>> GpuExecutable::ExecuteAsyncOnStream(
     const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments) {
+    tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) {
   // TODO(b/30671675): Implement asynchronous execution mode.
   return Unimplemented(
       "Asynchronous execution on stream is not yet supported on GPU.");
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.h b/tensorflow/compiler/xla/service/gpu/gpu_executable.h
index e7307e07c0b5608e31f15597d31d11c50f81c6d5..00da64dfade8ddb0694c0ee7ac158c9f2e15a508 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.h
@@ -72,24 +72,16 @@ class GpuExecutable : public Executable {
   // empty, in which case compilation is left up to the GPU driver.
   const std::vector<uint8>& cubin() const { return cubin_; }
 
-  // Both overloads of ExecuteOnStream will fail if the compute capability of
-  // the stream doesn't match the compute capability passed to this object's
-  // constructor.
-  StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteOnStream(
-      const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments,
-      HloExecutionProfile* hlo_execution_profile) override;
-
+  // ExecuteOnStream will fail if the compute capability of the stream doesn't
+  // match the compute capability passed to this object's constructor.
   StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteOnStream(
       const ServiceExecutableRunOptions* run_options,
       tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
       HloExecutionProfile* hlo_execution_profile) override;
 
-  StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteAsyncOnStream(
+  StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteAsyncOnStream(
       const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments) override;
+      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) override;
 
   const Status EqualOrFail(const Executable& executable) {
     // TODO(b/62952745) Implement equality test on GPU executable.
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.cc b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.cc
new file mode 100644
index 0000000000000000000000000000000000000000..4944c41f7d8dc7a78a3cd094aee4d7087c74857e
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.cc
@@ -0,0 +1,48 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h"
+
+#include "tensorflow/compiler/xla/layout_util.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace xla {
+
+StatusOr<bool> GpuHloSupportChecker::Run(HloModule* module) {
+  for (auto* computation : module->computations()) {
+    for (const auto& instruction : computation->instructions()) {
+      TF_RETURN_IF_ERROR(
+          ShapeUtil::ValidateShapeWithOptionalLayout(instruction->shape()));
+      TF_RETURN_IF_ERROR(ShapeUtil::ForEachSubshapeWithStatus(
+          instruction->shape(),
+          [&instruction](const Shape& subshape, const ShapeIndex&) {
+            if (LayoutUtil::IsSparseArray(subshape)) {
+              return xla::Unimplemented(
+                  "GPU backend does not support HLO instruction %s with shape "
+                  "containing a sparse layout: %s",
+                  instruction->ToString().c_str(),
+                  ShapeUtil::HumanStringWithLayout(instruction->shape())
+                      .c_str());
+            }
+            return Status::OK();
+          }));
+    }
+  }
+  return false;
+}
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h
new file mode 100644
index 0000000000000000000000000000000000000000..d9550f81b591ead3f6e8d3de4f62896ee04d2f82
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h
@@ -0,0 +1,42 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_HLO_SUPPORT_CHECKER_H_
+#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_HLO_SUPPORT_CHECKER_H_
+
+#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
+
+namespace xla {
+
+// his pass should run early in the HLO pipeline and checks for HLO constructs
+// which are not supported by the GPU backend and cannot be removed via HLO
+// transformations (eg, sparse layouts).
+class GpuHloSupportChecker : public HloPassInterface {
+ public:
+  GpuHloSupportChecker() = default;
+  ~GpuHloSupportChecker() override = default;
+
+  tensorflow::StringPiece name() const override {
+    return "gpu_hlo_support_checker";
+  }
+
+  // Note: always returns false (no instructions are ever modified by this
+  // pass).
+  StatusOr<bool> Run(HloModule* module) override;
+};
+
+}  // namespace xla
+
+#endif  // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_HLO_SUPPORT_CHECKER_H_
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker_test.cc b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..0a4089df4c954cafcbe241189ee79a0995683513
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker_test.cc
@@ -0,0 +1,72 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/core/lib/core/error_codes.pb.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+
+namespace xla {
+namespace {
+
+using ::testing::HasSubstr;
+
+class GpuHloSupportCheckerTest : public HloTestBase {
+ protected:
+  GpuHloSupportChecker& checker() { return checker_; }
+
+ private:
+  GpuHloSupportChecker checker_;
+};
+
+TEST_F(GpuHloSupportCheckerTest, Add) {
+  HloComputation::Builder builder(TestName());
+  const Shape scalar_shape = ShapeUtil::MakeShape(F32, {});
+  HloInstruction* param0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape, "param0"));
+  HloInstruction* param1 = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, scalar_shape, "param1"));
+  builder.AddInstruction(HloInstruction::CreateBinary(
+      scalar_shape, HloOpcode::kAdd, param0, param1));
+  auto module = CreateNewModule();
+  module->AddEntryComputation(builder.Build());
+
+  TF_ASSERT_OK(checker().Run(module.get()).status());
+}
+
+TEST_F(GpuHloSupportCheckerTest, SparseUnimplemented) {
+  HloComputation::Builder builder(TestName());
+  const Shape sparse_shape = ShapeUtil::MakeShapeWithSparseLayout(F32, {10}, 2);
+  HloInstruction* param0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, sparse_shape, "param0"));
+  HloInstruction* param1 = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, sparse_shape, "param1"));
+  builder.AddInstruction(HloInstruction::CreateBinary(
+      sparse_shape, HloOpcode::kAdd, param0, param1));
+  auto module = CreateNewModule();
+  module->AddEntryComputation(builder.Build());
+
+  Status status = checker().Run(module.get()).status();
+  ASSERT_EQ(status.code(), tensorflow::error::UNIMPLEMENTED);
+  EXPECT_THAT(status.error_message(),
+              HasSubstr("GPU backend does not support"));
+  EXPECT_THAT(status.error_message(),
+              HasSubstr(ShapeUtil::HumanStringWithLayout(sparse_shape)));
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/layout_assignment.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
similarity index 57%
rename from tensorflow/compiler/xla/service/gpu/layout_assignment.cc
rename to tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
index d475c4171b56ceedf5fdbda8b4d6221af844261c..58915f1f62f0c0f320443058a798333c498ffe47 100644
--- a/tensorflow/compiler/xla/service/gpu/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/gpu/layout_assignment.h"
+#include "tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h"
 
 #include <memory>
 
@@ -149,5 +149,106 @@ Status GpuLayoutAssignment::AddBackendConstraints(
   return Status::OK();
 }
 
+bool GpuLayoutAssignment::CustomCallRequiresMajorFirstLayout(
+    const HloInstruction* instruction) {
+  // Inputs to cudnn batchnorm custom calls don't need the major-first layout
+  // (i.e. {n, n-1, ...0}) -- we can handle any layout.
+  return !IsCustomCallToDnnBatchNorm(*instruction);
+}
+
+Status GpuLayoutAssignment::PropagateOperandConstraint(
+    const OperandLayoutConstraint& layout_constraint,
+    LayoutConstraints* constraints) {
+  const HloInstruction* instruction = layout_constraint.instruction();
+
+  // cudnn batchnorm forward inference's result must have the same layout as its
+  // operand 0.
+  if (instruction->opcode() == HloOpcode::kCustomCall &&
+      instruction->custom_call_target() ==
+          kCudnnBatchNormForwardInferenceCallTarget &&
+      layout_constraint.operand_no() == 0) {
+    TF_RETURN_IF_ERROR(constraints->SetInstructionLayout(
+        layout_constraint.shape_layout().shape(), instruction));
+  }
+
+  // cudnn batchnorm forward training returns a tuple {output, mean,
+  // inverse-stddev}.  mean and inverse-stddev are rank 1 and so have only one
+  // possible layout, but output is not (necessarily) rank 1, and, like in
+  // batchnorm forward inference, must have the same layout as operand 0.
+  if (instruction->opcode() == HloOpcode::kCustomCall &&
+      instruction->custom_call_target() ==
+          kCudnnBatchNormForwardTrainingCallTarget &&
+      layout_constraint.operand_no() == 0) {
+    TF_ASSIGN_OR_RETURN(const LogicalBuffer* out_buf,
+                        constraints->points_to_analysis().GetBufferDefinedAt(
+                            instruction, /*index=*/{0}));
+    TF_RETURN_IF_ERROR(constraints->SetBufferLayout(
+        layout_constraint.shape_layout().layout(), *out_buf));
+  }
+
+  // Like forward training, cudnn batchnorm backward returns a tuple {output,
+  // mean, inverse-stddev}, and its operand 0 and 'output' must have the same
+  // layout.  In addition, its operand 0 and operand 4 -- the 'operand' and
+  // 'grad_output' parameters -- must have the same layout.
+  if (instruction->opcode() == HloOpcode::kCustomCall &&
+      instruction->custom_call_target() == kCudnnBatchNormBackwardCallTarget &&
+      (layout_constraint.operand_no() == 0 ||
+       layout_constraint.operand_no() == 4)) {
+    TF_ASSIGN_OR_RETURN(const LogicalBuffer* out_buf,
+                        constraints->points_to_analysis().GetBufferDefinedAt(
+                            instruction, /*index=*/{0}));
+    TF_RETURN_IF_ERROR(constraints->SetBufferLayout(
+        layout_constraint.shape_layout().layout(), *out_buf));
+
+    int64 operand_to_set = layout_constraint.operand_no() == 0 ? 4 : 0;
+    TF_RETURN_IF_ERROR(constraints->SetOperandLayout(
+        layout_constraint.shape_layout().shape(), instruction, operand_to_set));
+  }
+
+  return LayoutAssignment::PropagateOperandConstraint(layout_constraint,
+                                                      constraints);
+}
+
+Status GpuLayoutAssignment::PropagateBufferConstraint(
+    const BufferLayoutConstraint& buffer_constraint,
+    LayoutConstraints* constraints) {
+  const LogicalBuffer& buf = buffer_constraint.buffer();
+  const HloInstruction* instruction = buf.instruction();
+
+  Shape shape_with_layout = buf.shape();
+  *shape_with_layout.mutable_layout() = buffer_constraint.layout();
+
+  // Propagate output constraints to the operands of cudnn batchnorm ops.  This
+  // is the same as PropagateOperandConstraint, just in the other direction.  We
+  // need to both to fulfill our contract to LayoutAssignment.
+  if (instruction->opcode() == HloOpcode::kCustomCall &&
+      instruction->custom_call_target() ==
+          kCudnnBatchNormForwardInferenceCallTarget) {
+    TF_RETURN_IF_ERROR(constraints->SetOperandLayout(
+        shape_with_layout, instruction, /*operand_no=*/0));
+  }
+
+  if (instruction->opcode() == HloOpcode::kCustomCall &&
+      instruction->custom_call_target() ==
+          kCudnnBatchNormForwardTrainingCallTarget &&
+      buf.index() == ShapeIndex({0})) {
+    TF_RETURN_IF_ERROR(constraints->SetOperandLayout(
+        shape_with_layout, instruction, /*operand_no=*/0));
+  }
+  if (instruction->opcode() == HloOpcode::kCustomCall &&
+      instruction->custom_call_target() == kCudnnBatchNormBackwardCallTarget &&
+      buf.index() == ShapeIndex({0})) {
+    // batchnorm backward has two operands, "operand" and "grad_output" whose
+    // layouts must both match that of the result at tuple-index 0.
+    TF_RETURN_IF_ERROR(constraints->SetOperandLayout(
+        shape_with_layout, instruction, /*operand_no=*/0));
+    TF_RETURN_IF_ERROR(constraints->SetOperandLayout(
+        shape_with_layout, instruction, /*operand_no=*/4));
+  }
+
+  return LayoutAssignment::PropagateBufferConstraint(buffer_constraint,
+                                                     constraints);
+}
+
 }  // namespace gpu
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/layout_assignment.h b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h
similarity index 70%
rename from tensorflow/compiler/xla/service/gpu/layout_assignment.h
rename to tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h
index 169041eb85c633cb4f1f679bcea127714828308f..86a3a7111fd79494e469beecf3234f6cec9adb9c 100644
--- a/tensorflow/compiler/xla/service/gpu/layout_assignment.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_LAYOUT_ASSIGNMENT_H_
-#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_LAYOUT_ASSIGNMENT_H_
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_LAYOUT_ASSIGNMENT_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_LAYOUT_ASSIGNMENT_H_
 
 #include "tensorflow/compiler/xla/service/computation_layout.h"
 #include "tensorflow/compiler/xla/service/layout_assignment.h"
@@ -33,9 +33,17 @@ class GpuLayoutAssignment : public LayoutAssignment {
 
  protected:
   Status AddBackendConstraints(LayoutConstraints* constraints) override;
+  Status PropagateOperandConstraint(
+      const OperandLayoutConstraint& layout_constraint,
+      LayoutConstraints* constraints) override;
+  Status PropagateBufferConstraint(
+      const BufferLayoutConstraint& buffer_constraint,
+      LayoutConstraints* constraints) override;
+  bool CustomCallRequiresMajorFirstLayout(
+      const HloInstruction* instruction) override;
 };
 
 }  // namespace gpu
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_LAYOUT_ASSIGNMENT_H_
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_LAYOUT_ASSIGNMENT_H_
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..4c45d2e94aebce5496da94841f6a1ae9015615c1
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc
@@ -0,0 +1,328 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h"
+
+#include "tensorflow/compiler/xla/layout_util.h"
+#include "tensorflow/compiler/xla/service/computation_layout.h"
+#include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/shape_layout.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+
+namespace xla {
+namespace gpu {
+namespace {
+
+using LayoutAssignmentTest = HloTestBase;
+
+TEST_F(LayoutAssignmentTest, Elementwise) {
+  Shape ashape = ShapeUtil::MakeShape(F32, {42, 12});
+  Shape ashape_in_row_major(ashape);
+  Shape ashape_in_col_major(ashape);
+  *ashape_in_row_major.mutable_layout() = LayoutUtil::MakeLayout({1, 0});
+  *ashape_in_col_major.mutable_layout() = LayoutUtil::MakeLayout({0, 1});
+
+  // Enumerate all possible combinations of layouts.
+  for (const Shape& lhs_shape_with_layout :
+       {ashape_in_row_major, ashape_in_col_major}) {
+    for (const Shape& rhs_shape_with_layout :
+         {ashape_in_row_major, ashape_in_col_major}) {
+      for (const Shape& result_shape_with_layout :
+           {ashape_in_row_major, ashape_in_col_major}) {
+        // GpuLayoutAssignment should assign the same layout to "add" and its
+        // two operands.
+        auto builder = HloComputation::Builder(TestName());
+        auto x = builder.AddInstruction(
+            HloInstruction::CreateParameter(0, ashape, "x"));
+        auto y = builder.AddInstruction(
+            HloInstruction::CreateParameter(1, ashape, "y"));
+        auto add = builder.AddInstruction(
+            HloInstruction::CreateBinary(ashape, HloOpcode::kAdd, x, y));
+        auto module = CreateNewModule();
+        HloComputation* computation =
+            module->AddEntryComputation(builder.Build(add));
+
+        ComputationLayout computation_layout(
+            computation->ComputeProgramShape());
+        *computation_layout.mutable_parameter_layout(0) =
+            ShapeLayout(lhs_shape_with_layout);
+        *computation_layout.mutable_parameter_layout(1) =
+            ShapeLayout(rhs_shape_with_layout);
+        *computation_layout.mutable_result_layout() =
+            ShapeLayout(result_shape_with_layout);
+
+        GpuLayoutAssignment layout_assignment(&computation_layout);
+        EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
+
+        for (const HloInstruction* operand : add->operands()) {
+          EXPECT_TRUE(LayoutUtil::Equal(add->shape().layout(),
+                                        operand->shape().layout()));
+        }
+      }
+    }
+  }
+}
+
+// Returns a list shapes with all the possible layouts of this shape, including
+// a shape with no layout.
+std::vector<Shape> AllLayoutsOf(const Shape& s) {
+  std::vector<int64> layout_vec(s.dimensions_size());
+  std::iota(layout_vec.begin(), layout_vec.end(), 0);
+
+  std::vector<Shape> shapes;
+  shapes.push_back(s);
+  shapes.back().clear_layout();
+
+  do {
+    shapes.push_back(s);
+    *shapes.back().mutable_layout() = LayoutUtil::MakeLayout(layout_vec);
+  } while (std::next_permutation(layout_vec.begin(), layout_vec.end()));
+
+  return shapes;
+}
+
+TEST_F(LayoutAssignmentTest, BatchNormInference) {
+  const int64 kFeatureIndex = 1;
+
+  // The shape of the data operand to BatchNormInference and of the output of
+  // the BatchNormInference call.
+  Shape shape = ShapeUtil::MakeShape(F32, {42, 12, 1, 100});
+
+  // The shape of the scale, offset, mean, and variance inputs to
+  // BatchNormTraining.  These are rank 1, with as many elements are in the
+  // kFeatureIndex dim of shape.
+  Shape aux_shape =
+      ShapeUtil::MakeShape(F32, {shape.dimensions(kFeatureIndex)});
+
+  for (const Shape& input_shape : AllLayoutsOf(shape)) {
+    for (const Shape& result_shape : AllLayoutsOf(shape)) {
+      SCOPED_TRACE(tensorflow::strings::StrCat(
+          "input_shape=", ShapeUtil::HumanStringWithLayout(input_shape),
+          ", result_shape=", ShapeUtil::HumanStringWithLayout(result_shape)));
+
+      auto builder = HloComputation::Builder(TestName());
+      auto* operand = builder.AddInstruction(
+          HloInstruction::CreateParameter(0, shape, "operand"));
+      auto* scale = builder.AddInstruction(
+          HloInstruction::CreateParameter(1, aux_shape, "scale"));
+      auto* offset = builder.AddInstruction(
+          HloInstruction::CreateParameter(2, aux_shape, "offset"));
+      auto* mean = builder.AddInstruction(
+          HloInstruction::CreateParameter(3, aux_shape, "mean"));
+      auto* variance = builder.AddInstruction(
+          HloInstruction::CreateParameter(4, aux_shape, "variance"));
+
+      auto* epsilon = builder.AddInstruction(
+          HloInstruction::CreateConstant(Literal::CreateR0<float>(1)));
+      auto* feature_index =
+          builder.AddInstruction(HloInstruction::CreateConstant(
+              Literal::CreateR0<int64>(kFeatureIndex)));
+
+      auto* batchnorm = builder.AddInstruction(HloInstruction::CreateCustomCall(
+          shape,
+          {operand, scale, offset, mean, variance, epsilon, feature_index},
+          kCudnnBatchNormForwardInferenceCallTarget));
+
+      auto module = CreateNewModule();
+      HloComputation* computation =
+          module->AddEntryComputation(builder.Build(batchnorm));
+
+      ComputationLayout computation_layout(computation->ComputeProgramShape());
+
+      if (input_shape.has_layout()) {
+        *computation_layout.mutable_parameter_layout(0) =
+            ShapeLayout(input_shape);
+      }
+
+      if (result_shape.has_layout()) {
+        *computation_layout.mutable_result_layout() = ShapeLayout(result_shape);
+      }
+
+      GpuLayoutAssignment layout_assignment(&computation_layout);
+      EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
+
+      // The first operand to batchnorm should have the same layout as the
+      // result.
+      EXPECT_TRUE(LayoutUtil::Equal(batchnorm->operand(0)->shape().layout(),
+                                    batchnorm->shape().layout()))
+          << batchnorm->ToString();
+    }
+  }
+}
+
+TEST_F(LayoutAssignmentTest, BatchNormTraining) {
+  const int64 kFeatureIndex = 1;
+
+  // The shape of the data operand to BatchNormTraining.
+  Shape shape = ShapeUtil::MakeShape(F32, {42, 12, 1, 100});
+
+  // The shape of the offset and scale inputs to BatchNormTraining.  These are
+  // rank 1, with as many elements are in the kFeatureIndex dim of shape.
+  Shape offset_scale_shape =
+      ShapeUtil::MakeShape(F32, {shape.dimensions(kFeatureIndex)});
+
+  // Shape of the output of our BatchNormTraining op.
+  Shape batchnorm_shape = ShapeUtil::MakeTupleShape(
+      {shape, offset_scale_shape, offset_scale_shape});
+
+  // Enumerate all combinations of shapes.
+  for (const Shape& input_shape : AllLayoutsOf(shape)) {
+    for (const Shape& result_shape : AllLayoutsOf(shape)) {
+      SCOPED_TRACE(tensorflow::strings::StrCat(
+          "input_shape=", ShapeUtil::HumanStringWithLayout(input_shape),
+          ", result_shape=", ShapeUtil::HumanStringWithLayout(result_shape)));
+
+      auto builder = HloComputation::Builder(TestName());
+      auto* operand = builder.AddInstruction(
+          HloInstruction::CreateParameter(0, shape, "operand"));
+      auto* scale = builder.AddInstruction(
+          HloInstruction::CreateParameter(1, offset_scale_shape, "scale"));
+      auto* offset = builder.AddInstruction(
+          HloInstruction::CreateParameter(2, offset_scale_shape, "offset"));
+
+      auto* epsilon = builder.AddInstruction(
+          HloInstruction::CreateConstant(Literal::CreateR0<float>(1)));
+      auto* feature_index =
+          builder.AddInstruction(HloInstruction::CreateConstant(
+              Literal::CreateR0<int64>(kFeatureIndex)));
+
+      auto* batchnorm = builder.AddInstruction(HloInstruction::CreateCustomCall(
+          batchnorm_shape, {operand, scale, offset, epsilon, feature_index},
+          kCudnnBatchNormForwardTrainingCallTarget));
+
+      auto module = CreateNewModule();
+      HloComputation* computation =
+          module->AddEntryComputation(builder.Build(batchnorm));
+
+      ComputationLayout computation_layout(computation->ComputeProgramShape());
+
+      if (input_shape.has_layout()) {
+        *computation_layout.mutable_parameter_layout(0) =
+            ShapeLayout(input_shape);
+      }
+
+      if (result_shape.has_layout()) {
+        *computation_layout.mutable_result_layout() =
+            ShapeLayout(ShapeUtil::MakeTupleShape(
+                {result_shape, offset_scale_shape, offset_scale_shape}));
+      }
+
+      GpuLayoutAssignment layout_assignment(&computation_layout);
+      EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
+
+      // The first operand to batchnorm should have the same layout as the
+      // first element of the result tuple.
+      EXPECT_TRUE(
+          LayoutUtil::Equal(batchnorm->operand(0)->shape().layout(),
+                            batchnorm->shape().tuple_shapes(0).layout()))
+          << batchnorm->ToString();
+    }
+  }
+}
+
+TEST_F(LayoutAssignmentTest, BatchNormGrad) {
+  const int64 kFeatureIndex = 1;
+
+  // The shape of the data operand to BatchNormTraining.
+  Shape shape = ShapeUtil::MakeShape(F32, {42, 12, 1, 100});
+
+  // The shape of the scale, mean, and variance inputs to BatchNormGrad.  These
+  // are rank 1, with as many elements are in the kFeatureIndex dim of shape.
+  Shape scale_shape =
+      ShapeUtil::MakeShape(F32, {shape.dimensions(kFeatureIndex)});
+
+  // Shape of the output of our BatchNormGrad op.
+  Shape batchnorm_shape =
+      ShapeUtil::MakeTupleShape({shape, scale_shape, scale_shape});
+
+  // Enumerate all combinations of shapes plus whether we're constraining param
+  // 0 or param 4.
+  for (const Shape& input_shape : AllLayoutsOf(shape)) {
+    for (const Shape& result_shape : AllLayoutsOf(shape)) {
+      for (int constrained_param_no : {0, 4}) {
+        SCOPED_TRACE(tensorflow::strings::StrCat(
+            "input_shape=", ShapeUtil::HumanStringWithLayout(input_shape),
+            ", result_shape=", ShapeUtil::HumanStringWithLayout(result_shape)));
+
+        auto builder = HloComputation::Builder(TestName());
+        auto* operand = builder.AddInstruction(
+            HloInstruction::CreateParameter(0, shape, "operand"));
+        auto* scale = builder.AddInstruction(
+            HloInstruction::CreateParameter(1, scale_shape, "scale"));
+        auto* mean = builder.AddInstruction(
+            HloInstruction::CreateParameter(2, scale_shape, "mean"));
+        auto* var = builder.AddInstruction(
+            HloInstruction::CreateParameter(3, scale_shape, "var"));
+        auto* grad_offset = builder.AddInstruction(
+            HloInstruction::CreateParameter(4, shape, "var"));
+
+        auto* epsilon = builder.AddInstruction(
+            HloInstruction::CreateConstant(Literal::CreateR0<float>(1)));
+        auto* feature_index =
+            builder.AddInstruction(HloInstruction::CreateConstant(
+                Literal::CreateR0<int64>(kFeatureIndex)));
+
+        auto* batchnorm =
+            builder.AddInstruction(HloInstruction::CreateCustomCall(
+                batchnorm_shape,
+                {operand, scale, mean, var, grad_offset, epsilon,
+                 feature_index},
+                kCudnnBatchNormBackwardCallTarget));
+
+        auto module = CreateNewModule();
+        HloComputation* computation =
+            module->AddEntryComputation(builder.Build(batchnorm));
+
+        ComputationLayout computation_layout(
+            computation->ComputeProgramShape());
+
+        if (input_shape.has_layout()) {
+          *computation_layout.mutable_parameter_layout(constrained_param_no) =
+              ShapeLayout(input_shape);
+        }
+
+        if (result_shape.has_layout()) {
+          *computation_layout.mutable_result_layout() =
+              ShapeLayout(ShapeUtil::MakeTupleShape(
+                  {result_shape, scale_shape, scale_shape}));
+        }
+
+        GpuLayoutAssignment layout_assignment(&computation_layout);
+        EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
+
+        // The first and fourth operands to the batchnorm call should have the
+        // same layout as the first element of the result tuple.
+        EXPECT_TRUE(
+            LayoutUtil::Equal(batchnorm->operand(0)->shape().layout(),
+                              batchnorm->shape().tuple_shapes(0).layout()))
+            << batchnorm->ToString();
+        EXPECT_TRUE(
+            LayoutUtil::Equal(batchnorm->operand(4)->shape().layout(),
+                              batchnorm->shape().tuple_shapes(0).layout()))
+            << batchnorm->ToString();
+      }
+    }
+  }
+}
+
+}  // namespace
+}  // namespace gpu
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc b/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc
index f0f036f7f381db15b84db85d3efeec5d8141884e..af9897769fda371e47af06c19abce9a06015e094 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc
@@ -44,7 +44,7 @@ GpuTransferManager::GpuTransferManager()
     : GenericTransferManager(
           se::cuda::kCudaPlatformId,
           /*pointer_size=*/llvm::DataLayout(gpu::GpuCompiler::kDataLayout)
-              .getPointerSize()) {}
+              .getPointerSize(0 /* default address space */)) {}
 
 Status GpuTransferManager::TransferLiteralToInfeed(se::StreamExecutor* executor,
                                                    const Literal& literal) {
@@ -54,7 +54,7 @@ Status GpuTransferManager::TransferLiteralToInfeed(se::StreamExecutor* executor,
 
   if (!ShapeUtil::IsTuple(shape)) {
     int64 size = GetByteSizeRequirement(shape);
-    return TransferBufferToInfeed(executor, size, literal.InternalData());
+    return TransferBufferToInfeed(executor, size, literal.untyped_data());
   }
 
   if (ShapeUtil::IsNestedTuple(shape)) {
@@ -67,20 +67,21 @@ Status GpuTransferManager::TransferLiteralToInfeed(se::StreamExecutor* executor,
   // enqueue the resulting destination device addresses with the
   // infeed manager.
   std::vector<gpu::InfeedBuffer*> buffers;
-  buffers.reserve(literal.tuple_literals_size());
+  buffers.reserve(ShapeUtil::TupleElementCount(shape));
   auto cleanup = tensorflow::gtl::MakeCleanup([buffers]() {
     for (gpu::InfeedBuffer* b : buffers) {
       b->Done();
     }
   });
 
-  for (const auto& tuple_element : literal.tuple_literals()) {
-    const Shape& tuple_element_shape = tuple_element.shape();
+  for (int64 i = 0; i < ShapeUtil::TupleElementCount(shape); ++i) {
+    const Shape& tuple_element_shape =
+        ShapeUtil::GetTupleElementShape(shape, i);
     int64 tuple_element_size = GetByteSizeRequirement(tuple_element_shape);
     TF_ASSIGN_OR_RETURN(
         gpu::InfeedBuffer * buffer,
         TransferBufferToInfeedInternal(executor, tuple_element_size,
-                                       tuple_element.InternalData()));
+                                       literal.untyped_data({i})));
     buffers.push_back(buffer);
   }
 
@@ -105,12 +106,13 @@ Status GpuTransferManager::EnqueueBuffersToInfeed(
   // infeed requests, blocking on the stream might be
   // heavy-handed. Figure out if finer-grained acknowledgement is
   // possible.
-  if (!stream->BlockHostUntilDone()) {
+  Status block_status = stream->BlockHostUntilDone();
+  if (!block_status.ok()) {
     for (gpu::InfeedBuffer* b : buffers) {
       b->Done();
     }
-    return InternalError("Failed to complete data transfer on stream %p",
-                         stream);
+    return InternalError("Failed to complete data transfer on stream %p: %s",
+                         stream, block_status.error_message().c_str());
   }
 
   infeed_manager->EnqueueBuffers(buffers);
diff --git a/tensorflow/compiler/xla/service/gpu/infeed_thunk.cc b/tensorflow/compiler/xla/service/gpu/infeed_thunk.cc
index e33e904692ca5ad41e17d2e165dbb40b6bd4aa33..2ac95ceb692447c7ac6dbbcd8b9a38876f7a77b6 100644
--- a/tensorflow/compiler/xla/service/gpu/infeed_thunk.cc
+++ b/tensorflow/compiler/xla/service/gpu/infeed_thunk.cc
@@ -30,9 +30,8 @@ InfeedThunk::InfeedThunk(
                              tuple_element_buffers.end()),
       destination_buffer_(destination_buffer) {}
 
-tensorflow::Status InfeedThunk::ExecuteOnStream(
-    const BufferAllocations& buffer_allocations,
-    perftools::gputools::Stream* stream) {
+Status InfeedThunk::ExecuteOnStream(const BufferAllocations& buffer_allocations,
+                                    perftools::gputools::Stream* stream) {
   VLOG(2) << "Infeeding to GPU ";
 
   perftools::gputools::DeviceMemoryBase destination_address =
@@ -66,15 +65,16 @@ tensorflow::Status InfeedThunk::ExecuteOnStream(
                        buffer->length());
   }
 
-  if (!stream->BlockHostUntilDone()) {
-    return InternalError("Failed to complete data transfer on stream %p",
-                         stream);
+  Status block_status = stream->BlockHostUntilDone();
+  if (!block_status.ok()) {
+    return InternalError("Failed to complete data transfer on stream %p: %s",
+                         stream, block_status.error_message().c_str());
   }
 
   infeed_manager->ReleaseBuffers(infeed_buffers);
 
   VLOG(2) << "Infeeding to GPU complete";
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
 }  // namespace gpu
diff --git a/tensorflow/compiler/xla/service/gpu/infeed_thunk.h b/tensorflow/compiler/xla/service/gpu/infeed_thunk.h
index 371d71f9dbdd21cb5f36cc3108c8f398a4a91c29..86918705fa0305217f11753e383200c7bd71474b 100644
--- a/tensorflow/compiler/xla/service/gpu/infeed_thunk.h
+++ b/tensorflow/compiler/xla/service/gpu/infeed_thunk.h
@@ -43,9 +43,8 @@ class InfeedThunk : public Thunk {
   InfeedThunk(const InfeedThunk&) = delete;
   InfeedThunk& operator=(const InfeedThunk&) = delete;
 
-  tensorflow::Status ExecuteOnStream(
-      const BufferAllocations& buffer_allocations,
-      perftools::gputools::Stream* stream) override;
+  Status ExecuteOnStream(const BufferAllocations& buffer_allocations,
+                         perftools::gputools::Stream* stream) override;
 
  private:
   const std::vector<BufferAllocation::Slice> tuple_element_buffers_;
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
index 658fd05cd4b63c923d21b4a1de16468c0aeec65d..76566a9e3dbbc936ff90fe3f440ede14bf4e5233 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
@@ -110,6 +110,10 @@ bool ImplementedAsDnnConvolution(const HloInstruction& hlo) {
       return false;
     }
 
+    if (window_util::HasWindowReversal(hlo.window())) {
+      return false;
+    }
+
     return true;
   }
 
@@ -123,8 +127,26 @@ bool ImplementedAsDnnConvolution(const HloInstruction& hlo) {
   return false;
 }
 
+const char* const kCudnnBatchNormForwardInferenceCallTarget =
+    "__cudnn$batchNormalizationForwardInference";
+const char* const kCudnnBatchNormForwardTrainingCallTarget =
+    "__cudnn$batchNormalizationForwardTraining";
+const char* const kCudnnBatchNormBackwardCallTarget =
+    "__cudnn$batchNormalizationBackward";
+
+bool IsCustomCallToDnnBatchNorm(const HloInstruction& hlo) {
+  if (hlo.opcode() != HloOpcode::kCustomCall) {
+    return false;
+  }
+  const auto& target = hlo.custom_call_target();
+  return target == kCudnnBatchNormForwardInferenceCallTarget ||
+         target == kCudnnBatchNormForwardTrainingCallTarget ||
+         target == kCudnnBatchNormBackwardCallTarget;
+}
+
 bool ImplementedAsLibraryCall(const HloInstruction& hlo) {
-  return ImplementedAsGemm(hlo) || ImplementedAsDnnConvolution(hlo);
+  return ImplementedAsGemm(hlo) || ImplementedAsDnnConvolution(hlo) ||
+         IsCustomCallToDnnBatchNorm(hlo);
 }
 
 bool IsReductionToVector(const HloInstruction& reduce) {
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h
index 06c3205296e4546e39525ec093cc17e2fc375d0d..d24ed9879d084e96862885efaae2f79a256cd71d 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h
@@ -33,6 +33,31 @@ bool ImplementedAsGemm(const HloInstruction& hlo);
 // Returns true if `hlo` will be implemented as a call to cuDNN convolution.
 bool ImplementedAsDnnConvolution(const HloInstruction& hlo);
 
+// A call to cuDNN for batch normalization is represented as CustomCall HLO with
+// a call target equal to one of these strings.
+//
+// The operands to and outputs of these calls are the same as those of the
+// corresponding HLOs, except:
+//
+//  - epsilon and feature_index are proper operands, at the end of the operands
+//    list.  They must be HLO constants.
+//  - The cuDNN forward training call returns inv_stddev =
+//    1/sqrt(variance + epsilon) in place of plain variance.
+//  - Similarly, BatchNormGrad accepts inv_stddev in place of the variance
+//    operand.
+extern const char* const kCudnnBatchNormForwardInferenceCallTarget;
+extern const char* const kCudnnBatchNormForwardTrainingCallTarget;
+extern const char* const kCudnnBatchNormBackwardCallTarget;
+
+// Returns true if `hlo` will be implemented as a call to a cuDNN batch
+// normalization routine.
+//
+// This returns true if `hlo` is a CustomCall HLO with a call target equal to
+// one of the kCudnnBatchNormFoo constants above, but returns *false* for HLOs
+// with one of the kBatchNorm opcodes, because these are lowered either to a
+// sequence of generic HLOs or to a cuDNN CustomCall.
+bool IsCustomCallToDnnBatchNorm(const HloInstruction& hlo);
+
 // Returns true if `hlo` will be implemented as a library call, e.g. cuBLAS gemm
 // or cuDNN convolution.
 bool ImplementedAsLibraryCall(const HloInstruction& hlo);
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
index 6e2bd4e11d3c4ff576edb0df3b724abebfc0e424..095c3df3bfc75cae999edc7fdd800f6e399546dd 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
@@ -173,7 +173,7 @@ Status IrEmitter::EmitCallToNestedComputation(
   return Status::OK();
 }
 
-bool IrEmitter::MaybeEmitSpecialAtomicOperation(
+bool IrEmitter::MaybeEmitDirectAtomicOperation(
     const HloComputation& computation, llvm::Value* output_address,
     llvm::Value* source_address) {
   CHECK_EQ(2, computation.num_parameters());
@@ -233,102 +233,189 @@ bool IrEmitter::MaybeEmitSpecialAtomicOperation(
   return false;
 }
 
-Status IrEmitter::EmitAtomicOperationForNestedComputation(
-    const HloComputation& computation, llvm::Value* output_address,
-    llvm::Value* source_address) {
-  if (computation.num_parameters() != 2) {
-    // TODO(b/30258929): We only accept binary computations so far.
-    return Unimplemented(
-        "We only support atomic functions with exactly two parameters, but "
-        "computation %s has %lld.",
-        computation.name().c_str(), computation.num_parameters());
-  }
+// Implements atomic binary operations using atomic compare-and-swap
+// (atomicCAS) as follows:
+//   1. Reads the value from the memory pointed to by output_address and
+//     records it as old_output.
+//   2. Uses old_output as one of the source operand to perform the binary
+//     operation and stores the result in new_output.
+//   3. Calls atomicCAS which implements compare-and-swap as an atomic
+//     operation. In particular, atomicCAS reads the value from the memory
+//     pointed to by output_address, and compares the value with old_output. If
+//     the two values equal, new_output is written to the same memory location
+//     and true is returned to indicate that the atomic operation succeeds.
+//     Otherwise, the new value read from the memory is returned. In this case,
+//     the new value is copied to old_output, and steps 2. and 3. are repeated
+//     until atomicCAS succeeds.
+//
+// On Nvidia GPUs, atomicCAS can only operate on 32 bit and 64 bit integers. If
+// the element type of the binary operation is 32 bits or 64 bits, the integer
+// type of the same size is used for the atomicCAS operation. On the other hand,
+// if the element type is smaller than 32 bits, int32 is used for the atomicCAS
+// operation. In this case, atomicCAS reads and writes 32 bit values from
+// the memory, which is larger than the memory size required by the original
+// atomic binary operation. We mask off the last two bits of the output_address
+// and use the result as an address to read the 32 bit values from the memory.
+// This can avoid out of bound memory accesses if tensor buffers are 4 byte
+// aligned and have a size of 4N, an assumption that the runtime can guarantee.
+//
+// The pseudo code is shown below. Variables *_address are pointers to a memory
+// region with a size equal to the size of the atomicCAS operation, with the
+// exception that new_output_address is a pointer to a memory region with a size
+// equal to the element size of the binary operation.
+//
+//   element_size = sizeof(element_type);
+//   atomic_size = max(32, element_size);
+//   cas_new_output_address = alloca(atomic_size);
+//   cas_old_output_address = alloca(atomic_size);
+//   if (atomic_size != element_size) {
+//     atomic_address = output_address & ((int64)(-4));
+//     new_output_address = cas_new_output_address + (output_address & 3);
+//   } else {
+//     atomic_address = output_address;
+//     new_output_address = cas_new_output_address;
+//   }
+//
+//   *cas_old_output_address = *atomic_address;
+//   do {
+//     *cas_new_output_address = *cas_old_output_address;
+//     *new_output_address = operation(*new_output_address, *source_address);
+//     (*cas_old_output_address, success) =
+//       atomicCAS(atomic_address, *cas_old_output_address,
+//       *cas_new_output_address);
+//   } while (!success);
+//
+Status IrEmitter::EmitAtomicOperationUsingCAS(const HloComputation& computation,
+                                              llvm::Value* output_address,
+                                              llvm::Value* source_address) {
+  llvm::PointerType* output_address_type =
+      llvm::dyn_cast<llvm::PointerType>(output_address->getType());
+  CHECK_NE(output_address_type, nullptr);
+
+  // element_type is the data type for the binary operation.
+  llvm::Type* element_type = output_address_type->getPointerElementType();
+  int element_size = llvm_ir::GetSizeInBits(element_type);
+  llvm::Type* element_address_type = element_type->getPointerTo();
+
+  int atomic_size = (element_size < 32) ? 32 : element_size;
+  llvm::Type* atomic_type = ir_builder_.getIntNTy(atomic_size);
+  llvm::Type* atomic_address_type =
+      atomic_type->getPointerTo(output_address_type->getPointerAddressSpace());
+
+  // cas_old_output_address and cas_new_output_address point to the scratch
+  // memory where we store the old and new values for the repeated atomicCAS
+  // operations.
+  llvm::Value* cas_old_output_address = ir_builder_.CreateAlloca(
+      atomic_type, /*ArraySize=*/nullptr, "cas_old_output_address");
+  llvm::Value* cas_new_output_address = ir_builder_.CreateAlloca(
+      atomic_type, /*ArraySize=*/nullptr, "cas_new_output_address");
 
-  if (MaybeEmitSpecialAtomicOperation(computation, output_address,
-                                      source_address)) {
-    return Status::OK();
-  }
-
-  // Other binary computations can be made atomic as following (labels are basic
-  // block names used in the IR emitting code later).
-  //
-  // atomic_op_loop_preheader:
-  //   ...
-  //   source = *source_address;
-  //   old_output = *output_address;
-  //   do {
-  // atomic_op_loop_body_entry:
-  //     new_output = computation(old_output, source);
-  //     (old_output, success) =
-  //         atomicCAS(output_address, old_output, new_output);
-  //   } while (!success);
-  //
-  // atomic_op_loop_exit:
-  //   ...
-  //
-  // TODO(jingyue): Consider encapsulate the logic of emitting control flow to
-  // something similar to llvm_ir::ForLoop.
-  //
   // Emit preparation code to the preheader.
   llvm::BasicBlock* loop_preheader_bb = ir_builder_.GetInsertBlock();
-  llvm::Type* element_ir_type =
-      output_address->getType()->getPointerElementType();
-  // old_output = *output_address;
-  llvm::Value* old_output_location = ir_builder_.CreateAlloca(
-      element_ir_type, /*ArraySize=*/nullptr, "old_output_location");
-  ir_builder_.CreateStore(ir_builder_.CreateLoad(output_address, "old_output"),
-                          old_output_location);
+
+  llvm::Value* atomic_memory_address;
+  // binop_output_address points to the scratch memory that stores the
+  // result of the binary operation.
+  llvm::Value* binop_output_address;
+  if (element_size < 32) {
+    // Assume the element size is an integer number of bytes.
+    CHECK_EQ((element_size % sizeof(char)), 0);
+    llvm::Type* address_int_type =
+        module_->getDataLayout().getIntPtrType(output_address_type);
+    atomic_memory_address =
+        ir_builder_.CreatePtrToInt(output_address, address_int_type);
+    llvm::Value* mask = llvm::ConstantInt::get(address_int_type, 3);
+    llvm::Value* offset = ir_builder_.CreateAnd(atomic_memory_address, mask);
+    mask = llvm::ConstantInt::get(address_int_type, -4);
+    atomic_memory_address = ir_builder_.CreateAnd(atomic_memory_address, mask);
+    atomic_memory_address =
+        ir_builder_.CreateIntToPtr(atomic_memory_address, atomic_address_type);
+    binop_output_address = ir_builder_.CreateAdd(
+        ir_builder_.CreatePtrToInt(cas_new_output_address, address_int_type),
+        offset);
+    binop_output_address =
+        ir_builder_.CreateIntToPtr(binop_output_address, element_address_type);
+  } else {
+    atomic_memory_address =
+        ir_builder_.CreateBitCast(output_address, atomic_address_type);
+    binop_output_address =
+        ir_builder_.CreateBitCast(cas_new_output_address, element_address_type);
+  }
+
+  // Use the value from the memory that atomicCAS operates on to initialize
+  // cas_old_output.
+  llvm::Value* cas_old_output =
+      ir_builder_.CreateLoad(atomic_memory_address, "cas_old_output");
+  ir_builder_.CreateStore(cas_old_output, cas_old_output_address);
+
   llvm::BasicBlock* loop_exit_bb = loop_preheader_bb->splitBasicBlock(
       ir_builder_.GetInsertPoint(), "atomic_op_loop_exit");
-
-  // Emit the body of the loop that repeatedly invokes atomicCAS.
   llvm::BasicBlock* loop_body_bb =
       llvm::BasicBlock::Create(ir_builder_.getContext(), "atomic_op_loop_body",
                                ir_builder_.GetInsertBlock()->getParent());
   ir_builder_.SetInsertPoint(loop_body_bb);
   // Change preheader's successor from loop_exit_bb to loop_body_bb.
   loop_preheader_bb->getTerminator()->setSuccessor(0, loop_body_bb);
-  // new_output = computation(old_output, source);
-  llvm::Value* new_output_location = ir_builder_.CreateAlloca(
-      element_ir_type, /*ArraySize=*/nullptr, "new_output_location");
+
+  // Emit the body of the loop that repeatedly invokes atomicCAS.
+  //
+  // Use cas_old_output to initialize cas_new_output.
+  cas_old_output =
+      ir_builder_.CreateLoad(cas_old_output_address, "cas_old_output");
+  ir_builder_.CreateStore(cas_old_output, cas_new_output_address);
+  // Emits code to calculate new_output = operation(old_output, source);
   TF_RETURN_IF_ERROR(EmitCallToNestedComputation(
-      computation, {old_output_location, source_address}, new_output_location));
-
-  // (old_output, success) = atomicCAS(output_address, old_output, new_output);
-  int num_bits = llvm_ir::GetSizeInBits(element_ir_type);
-  llvm::Type* element_int_ir_type = ir_builder_.getIntNTy(num_bits);
-  // cmpxchg accepts integer only, and bitcast refuses to operate on aggregate
-  // types, so we bitcast load and store addresses to intN* of the same bit
-  // width.
-  llvm::Value* old_output = ir_builder_.CreateLoad(
-      ir_builder_.CreateBitCast(old_output_location,
-                                element_int_ir_type->getPointerTo()),
-      "old_output");
-  llvm::Value* new_output = ir_builder_.CreateLoad(
-      ir_builder_.CreateBitCast(new_output_location,
-                                element_int_ir_type->getPointerTo()),
-      "new_output");
+      computation, {binop_output_address, source_address},
+      binop_output_address));
+
+  llvm::Value* cas_new_output =
+      ir_builder_.CreateLoad(cas_new_output_address, "cas_new_output");
+
+  // Emit code to perform the atomicCAS operation
+  // (cas_old_output, success) = atomicCAS(memory_address, cas_old_output,
+  //                                       cas_new_output);
   llvm::Value* ret_value = ir_builder_.CreateAtomicCmpXchg(
-      ir_builder_.CreateBitCast(output_address,
-                                element_int_ir_type->getPointerTo()),
-      old_output, new_output, llvm::AtomicOrdering::SequentiallyConsistent,
+      atomic_memory_address, cas_old_output, cas_new_output,
+      llvm::AtomicOrdering::SequentiallyConsistent,
       llvm::AtomicOrdering::SequentiallyConsistent);
-  // cmpxchg returns a pair. The first element is the original value at
-  // output_address and the second element is whether the swap is successful.
+
+  // Extract the memory value returned from atomicCAS and store it as
+  // cas_old_output.
   ir_builder_.CreateStore(
-      ir_builder_.CreateExtractValue(ret_value, 0, "old_output"),
-      ir_builder_.CreateBitCast(old_output_location,
-                                element_int_ir_type->getPointerTo()));
+      ir_builder_.CreateExtractValue(ret_value, 0, "cas_old_output"),
+      cas_old_output_address);
+  // Extract the success bit returned from atomicCAS and generate a
+  // conditional branch on the success bit.
   ir_builder_.CreateCondBr(
       ir_builder_.CreateExtractValue(ret_value, 1, "success"), loop_exit_bb,
       loop_body_bb);
 
-  // Restore the insertion point to the exit basic block so that the caller of
+  // Set the insertion point to the exit basic block so that the caller of
   // this method can continue emitting code to the right place.
   SetToFirstInsertPoint(loop_exit_bb, &ir_builder_);
   return Status::OK();
 }
 
+Status IrEmitter::EmitAtomicOperationForNestedComputation(
+    const HloComputation& computation, llvm::Value* output_address,
+    llvm::Value* source_address) {
+  if (computation.num_parameters() != 2) {
+    // TODO(b/30258929): We only accept binary computations so far.
+    return Unimplemented(
+        "We only support atomic functions with exactly two parameters, but "
+        "computation %s has %lld.",
+        computation.name().c_str(), computation.num_parameters());
+  }
+
+  if (MaybeEmitDirectAtomicOperation(computation, output_address,
+                                     source_address)) {
+    return Status::OK();
+  }
+
+  return EmitAtomicOperationUsingCAS(computation, output_address,
+                                     source_address);
+}
+
 Status IrEmitter::HandleSelect(HloInstruction* select) {
   auto pred = select->operand(0);
   auto on_true = select->operand(1);
@@ -518,6 +605,14 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) {
       "Hit a case for convolution that is not implemented on GPU.");
 }
 
+Status IrEmitter::HandleFft(HloInstruction* fft) {
+  if (ShapeUtil::HasZeroElements(fft->shape())) {
+    // Emit no code for an empty output.
+    return Status::OK();
+  }
+  return Unimplemented("Hit a case for fft that is not implemented on GPU.");
+}
+
 Status IrEmitter::HandleCrossReplicaSum(HloInstruction* crs) {
   // TODO(b/33011107): Support cross replica sum on GPU.
   return Unimplemented(
@@ -640,6 +735,60 @@ Status IrEmitter::HandleRng(HloInstruction* random) {
       .EmitLoop(IrName(random));
 }
 
+Status IrEmitter::HandleBatchNormInference(HloInstruction*) {
+  return Unimplemented(
+      "The GPU backend does not implement BatchNormInference directly.  It "
+      "should be lowered before IR emission to HLO-soup using "
+      "BatchNormRewriter or to a cudnn CustomCall using "
+      "CudnnBatchNormRewriter.");
+}
+
+Status IrEmitter::HandleBatchNormTraining(HloInstruction*) {
+  return Unimplemented(
+      "The GPU backend does not implement BatchNormTraining directly.  It "
+      "should be lowered before IR emission to HLO-soup using "
+      "BatchNormRewriter or to a cudnn CustomCall using "
+      "CudnnBatchNormRewriter.");
+}
+
+Status IrEmitter::HandleBatchNormGrad(HloInstruction*) {
+  return Unimplemented(
+      "The GPU backend does not implement BatchNormGrad directly.  It should "
+      "be lowered before IR emission to HLO-soup (using BatchNormRewriter) or "
+      "to a cudnn CustomCall using CudnnBatchNormRewriter.");
+}
+
+Status IrEmitter::HandleConditional(HloInstruction* conditional) {
+  auto pred = conditional->operand(0);
+  auto true_arg = conditional->operand(1);
+  auto false_arg = conditional->operand(2);
+
+  llvm::Value* conditional_result = GetBasePointer(*conditional);
+
+  llvm::LoadInst* pred_value = ir_builder_.CreateLoad(
+      GetBasePointer(*pred),
+      llvm_ir::AsStringRef(IrName(conditional, "load_predicate_value")));
+  llvm::Value* pred_cond = ir_builder_.CreateICmpNE(
+      pred_value,
+      llvm::ConstantInt::get(llvm_ir::PrimitiveTypeToIrType(PRED, module_), 0),
+      llvm_ir::AsStringRef(IrName(conditional, "boolean_predicate")));
+  llvm_ir::LlvmIfData if_data = llvm_ir::EmitIfThenElse(
+      pred_cond, IrName(conditional, "if_then_else"), &ir_builder_);
+
+  SetToFirstInsertPoint(if_data.true_block, &ir_builder_);
+  TF_RETURN_IF_ERROR(EmitCallToNestedComputation(
+      *conditional->true_computation(), {GetBasePointer(*true_arg)},
+      conditional_result));
+
+  SetToFirstInsertPoint(if_data.false_block, &ir_builder_);
+  TF_RETURN_IF_ERROR(EmitCallToNestedComputation(
+      *conditional->false_computation(), {GetBasePointer(*false_arg)},
+      conditional_result));
+
+  SetToFirstInsertPoint(if_data.after_block, &ir_builder_);
+  return Status::OK();
+}
+
 llvm_ir::IrArray::Index IrEmitter::EmitOperandArrayLoopNest(
     const llvm_ir::IrArray& operand_array, int64 reduction_dimension,
     tensorflow::StringPiece name_suffix, llvm_ir::ForLoopNest* loop_nest) {
@@ -648,8 +797,8 @@ llvm_ir::IrArray::Index IrEmitter::EmitOperandArrayLoopNest(
   // reduction dimension.
   std::vector<int64> dimensions;
   const Shape& shape = operand_array.GetShape();
-  for (int i = shape.layout().minor_to_major_size() - 1; i >= 0; --i) {
-    int64 dimension = shape.layout().minor_to_major(i);
+  for (int i = 0; i < LayoutUtil::MinorToMajor(shape).size(); ++i) {
+    int64 dimension = LayoutUtil::Major(shape.layout(), i);
     if (dimension != reduction_dimension) {
       dimensions.push_back(dimension);
     }
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.h b/tensorflow/compiler/xla/service/gpu/ir_emitter.h
index 9c01f5b7c72f429822300af28bfd5261150d33d1..39bafaa34656a35f24444dc7f3665c1250833921 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.h
@@ -79,6 +79,7 @@ class IrEmitter : public DfsHloVisitorWithDefault {
   Status HandleGetTupleElement(HloInstruction* get_tuple_element) override;
   Status HandleDot(HloInstruction* dot) override;
   Status HandleConvolution(HloInstruction* convolution) override;
+  Status HandleFft(HloInstruction* fft) override;
   Status HandleCrossReplicaSum(HloInstruction* crs) override;
   Status HandleInfeed(HloInstruction* infeed) override;
   Status HandleOutfeed(HloInstruction* outfeed) override;
@@ -95,6 +96,10 @@ class IrEmitter : public DfsHloVisitorWithDefault {
   Status HandleCall(HloInstruction* call) override;
   Status HandleCustomCall(HloInstruction* custom_call) override;
   Status HandleRng(HloInstruction* random) override;
+  Status HandleConditional(HloInstruction* conditional) override;
+  Status HandleBatchNormInference(HloInstruction* batch_norm) override;
+  Status HandleBatchNormTraining(HloInstruction* batch_norm) override;
+  Status HandleBatchNormGrad(HloInstruction* batch_norm) override;
 
   Status FinishVisit(HloInstruction* root) override { return Status::OK(); }
 
@@ -185,9 +190,16 @@ class IrEmitter : public DfsHloVisitorWithDefault {
   // be simply implemented using an LLVM atomic instruction. If "computation" is
   // one of this kind, emits code to do that and returns true; otherwise,
   // returns false.
-  bool MaybeEmitSpecialAtomicOperation(const HloComputation& computation,
-                                       llvm::Value* output_address,
-                                       llvm::Value* source_address);
+  bool MaybeEmitDirectAtomicOperation(const HloComputation& computation,
+                                      llvm::Value* output_address,
+                                      llvm::Value* source_address);
+
+  // A helper method for EmitAtomicOperationForNestedComputation. It implements
+  // binary atomic operations using atomicCAS with special handling to support
+  // small data types.
+  Status EmitAtomicOperationUsingCAS(const HloComputation& computation,
+                                     llvm::Value* output_address,
+                                     llvm::Value* source_address);
 
   StatusOr<llvm::Value*> ComputeNestedElement(
       const HloComputation& computation,
@@ -227,8 +239,11 @@ class IrEmitterUnnested : public IrEmitter {
   // IrEmitterUnnested handles the following instructions differently from
   // IrEmitter.
   Status HandleCopy(HloInstruction* copy) override;
+  Status HandleConditional(HloInstruction* conditional) override;
   Status HandleConvolution(HloInstruction* convolution) override;
+  Status HandleCustomCall(HloInstruction* custom_call) override;
   Status HandleDot(HloInstruction* dot) override;
+  Status HandleFft(HloInstruction* fft) override;
   Status HandleFusion(HloInstruction* fusion) override;
   Status HandleGetTupleElement(HloInstruction* get_tuple_element) override;
   Status HandleReduce(HloInstruction* reduce) override;
@@ -292,6 +307,12 @@ class IrEmitterUnnested : public IrEmitter {
                           const llvm_ir::ElementGenerator& init_value_gen,
                           HloComputation* reducer);
 
+  // Emits code that reduces a tensor of arbitrary rank to a scalar.
+  Status EmitReductionToScalar(HloInstruction* reduce, const Shape& input_shape,
+                               const llvm_ir::ElementGenerator& input_gen,
+                               const llvm_ir::ElementGenerator& init_value_gen,
+                               HloComputation* reducer);
+
   // Figures out whether `reduce` is a row or column reduction, and which
   // dimensions to reduce, and calls either `EmitRowReduction` or
   // `EmitColumnReduction` as appropriate. `input_shape` is the shape of the
@@ -319,6 +340,9 @@ class IrEmitterUnnested : public IrEmitter {
   // Returns a ConvolutionThunk that calls DNN to implement `inst`.
   std::unique_ptr<Thunk> BuildConvolutionThunk(const HloInstruction* inst);
 
+  // Returns a FftThunk that calls cuFFT to implement `inst`.
+  std::unique_ptr<Thunk> BuildFftThunk(const HloInstruction* inst);
+
   // Returns a GemmThunk that calls gemm to implement `inst`. The caller needs
   // to make sure `inst` outlives the lifetime of the returned Thunk object.
   std::unique_ptr<Thunk> BuildGemmThunk(const HloInstruction* inst);
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index 1b863c9e3c51d6e757751154abd653cd1fdcb8a7..be35351e8727ce15998460e41f21a53ebe427c3b 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -30,8 +30,11 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/dfs_hlo_visitor.h"
 #include "tensorflow/compiler/xla/service/gpu/convolution_thunk.h"
 #include "tensorflow/compiler/xla/service/gpu/copy_thunk.h"
+#include "tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.h"
+#include "tensorflow/compiler/xla/service/gpu/fft_thunk.h"
 #include "tensorflow/compiler/xla/service/gpu/for_thunk.h"
 #include "tensorflow/compiler/xla/service/gpu/gemm_thunk.h"
+#include "tensorflow/compiler/xla/service/gpu/gpu_constants.h"
 #include "tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h"
 #include "tensorflow/compiler/xla/service/gpu/infeed_thunk.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
@@ -123,10 +126,12 @@ void UpdateLaunchDimensions(const LaunchDimensions& launch_dims, Thunk* thunk,
   llvm::ConstantInt* threads_per_block_ir_value = llvm::ConstantInt::get(
       llvm::IntegerType::get(llvm_context, /*NumBits=*/32),
       launch_dims.threads_per_block());
+  // Our launch bounds are exact, so we can specify them as reqntidx rather than
+  // maxntidx.
   nvvm_annotations_node->addOperand(llvm::MDNode::get(
       llvm_context,
       {llvm::ConstantAsMetadata::get(ir_kernel),
-       llvm::MDString::get(llvm_context, "maxntidx"),
+       llvm::MDString::get(llvm_context, "reqntidx"),
        llvm::ConstantAsMetadata::get(threads_per_block_ir_value)}));
 }
 }  // namespace
@@ -181,15 +186,15 @@ llvm::Function* IrEmitterUnnested::BuildKernelPrototype(
   string kernel_name = ir_emitter_context_->name_uniquer()->GetUniqueName(
       llvm_ir::SanitizeFunctionName(inst.name()));
 
-  // Create the kernel and adds it to the module.
+  // Create the kernel and add it to the module.
   llvm::Module* module = ir_emitter_context_->llvm_module();
   llvm::LLVMContext& context = module->getContext();
   int num_escaped_hlos = escaped_hlos.size();
   llvm::FunctionType* kernel_type = llvm::FunctionType::get(
-      llvm::Type::getVoidTy(context),  // The type of function result.
+      /*Result=*/llvm::Type::getVoidTy(context),
       std::vector<llvm::Type*>(num_escaped_hlos + 1,
                                ir_builder_.getInt8PtrTy()),
-      false);  // Not a variadic argument function.
+      /*isVarArg=*/false);
   llvm::Function* kernel =
       llvm::Function::Create(kernel_type, llvm::GlobalValue::ExternalLinkage,
                              kernel_name.c_str(), module);
@@ -214,7 +219,14 @@ llvm::Function* IrEmitterUnnested::BuildKernelPrototype(
     kernel->addDereferenceableAttr(temp_buffer_arg_no + 1,
                                    temp_allocation_total_size);
   }
-  kernel->addAttribute(temp_buffer_arg_no + 1, llvm::Attribute::NoAlias);
+  kernel->addParamAttr(temp_buffer_arg_no, llvm::Attribute::NoAlias);
+
+  // All arguments to a kernel must be aligned to kCudaMallocAlignBytes.
+  for (int64 i = 0; i < kernel->arg_size(); ++i) {
+    kernel->addParamAttr(
+        i, llvm::Attribute::get(context, llvm::Attribute::Alignment,
+                                kCudaMallocAlignBytes));
+  }
 
   // TODO(b/65380986): Investigate if adding fast math flags for generated
   // kernels makes sense.
@@ -246,6 +258,11 @@ Status IrEmitterUnnested::DefaultAction(HloInstruction* hlo) {
 }
 
 Status IrEmitterUnnested::HandleDot(HloInstruction* dot) {
+  const DotDimensionNumbers& dnums = dot->dot_dimension_numbers();
+  if (dnums.lhs_batch_dimensions_size() > 0 ||
+      dnums.rhs_batch_dimensions_size() > 0) {
+    return Unimplemented("Dot with batch dimensions not implemented.");
+  }
   if (ImplementedAsGemm(*dot)) {
     thunk_sequence_->emplace_back(BuildGemmThunk(dot));
     return Status::OK();
@@ -254,6 +271,11 @@ Status IrEmitterUnnested::HandleDot(HloInstruction* dot) {
   return IrEmitter::HandleDot(dot);
 }
 
+Status IrEmitterUnnested::HandleConditional(HloInstruction* conditional) {
+  thunk_sequence_->push_back(BuildKernelThunk(conditional));
+  return IrEmitter::HandleConditional(conditional);
+}
+
 Status IrEmitterUnnested::HandleConvolution(HloInstruction* convolution) {
   if (ImplementedAsDnnConvolution(*convolution)) {
     thunk_sequence_->emplace_back(BuildConvolutionThunk(convolution));
@@ -263,6 +285,111 @@ Status IrEmitterUnnested::HandleConvolution(HloInstruction* convolution) {
   return IrEmitter::HandleConvolution(convolution);
 }
 
+Status IrEmitterUnnested::HandleCustomCall(HloInstruction* custom_call) {
+  // A CustomCall on the GPU backend can either be a custom-call to a
+  // user-supplied kernel, or a call into a library like cudnn.
+
+  // Lower custom-calls to cudnn batchnorm ops to specialized thunks.  It's part
+  // of the contract of these cudnn batchnorm calls that the epsilon and
+  // feature_index operands be constants.
+  if (custom_call->custom_call_target() ==
+      kCudnnBatchNormForwardInferenceCallTarget) {
+    const HloInstruction* epsilon = custom_call->operand(5);
+    CHECK(epsilon->IsConstant());
+    float epsilon_value = epsilon->literal().Get<float>({});
+
+    const HloInstruction* feature_index = custom_call->operand(6);
+    CHECK(feature_index->IsConstant());
+    int64 feature_index_value = feature_index->literal().Get<int64>({});
+
+    thunk_sequence_->emplace_back(
+        MakeUnique<CudnnBatchNormForwardInferenceThunk>(
+            /*operand=*/GetAllocationSlice(*custom_call->operand(0)),
+            /*scale=*/GetAllocationSlice(*custom_call->operand(1)),
+            /*offset=*/GetAllocationSlice(*custom_call->operand(2)),
+            /*mean=*/GetAllocationSlice(*custom_call->operand(3)),
+            /*variance=*/GetAllocationSlice(*custom_call->operand(4)),
+            /*epsilon=*/epsilon_value,
+            /*feature_index=*/feature_index_value,
+            /*output=*/GetAllocationSlice(*custom_call),
+            /*hlo=*/custom_call));
+    return Status::OK();
+  }
+
+  if (custom_call->custom_call_target() ==
+      kCudnnBatchNormForwardTrainingCallTarget) {
+    const HloInstruction* epsilon = custom_call->operand(3);
+    CHECK(epsilon->IsConstant());
+    float epsilon_value = epsilon->literal().Get<float>({});
+
+    const HloInstruction* feature_index = custom_call->operand(4);
+    CHECK(feature_index->IsConstant());
+    int64 feature_index_value = feature_index->literal().Get<int64>({});
+
+    // BatchNormTraining returns a tuple of three elements: data, calculated
+    // mean, and calculated 1/sqrt(variance + epsilon).
+    const auto& assn = ir_emitter_context_->buffer_assignment();
+    auto output_data = assn.GetUniqueSlice(custom_call, {0}).ValueOrDie();
+    auto output_mean = assn.GetUniqueSlice(custom_call, {1}).ValueOrDie();
+    auto output_inv_stddev = assn.GetUniqueSlice(custom_call, {2}).ValueOrDie();
+    thunk_sequence_->emplace_back(
+        MakeUnique<CudnnBatchNormForwardTrainingThunk>(
+            /*operand=*/GetAllocationSlice(*custom_call->operand(0)),
+            /*scale=*/GetAllocationSlice(*custom_call->operand(1)),
+            /*offset=*/GetAllocationSlice(*custom_call->operand(2)),
+            /*epsilon=*/epsilon_value,
+            /*feature_index=*/feature_index_value,
+            /*output_data=*/output_data,
+            /*output_mean=*/output_mean,
+            /*output_inv_stddev=*/output_inv_stddev,
+            /*output_tuple=*/GetAllocationSlice(*custom_call),
+            /*hlo=*/custom_call));
+    return Status::OK();
+  }
+
+  if (custom_call->custom_call_target() == kCudnnBatchNormBackwardCallTarget) {
+    const HloInstruction* epsilon = custom_call->operand(5);
+    CHECK(epsilon->IsConstant());
+    float epsilon_value = epsilon->literal().Get<float>({});
+
+    const HloInstruction* feature_index = custom_call->operand(6);
+    CHECK(feature_index->IsConstant());
+    int64 feature_index_value = feature_index->literal().Get<int64>({});
+
+    // BatchNormGrad returns a tuple of three elements: grad_data, grad_scale,
+    // grad_offset.
+    const auto& assn = ir_emitter_context_->buffer_assignment();
+    auto output_grad_data = assn.GetUniqueSlice(custom_call, {0}).ValueOrDie();
+    auto output_grad_scale = assn.GetUniqueSlice(custom_call, {1}).ValueOrDie();
+    auto output_grad_offset =
+        assn.GetUniqueSlice(custom_call, {2}).ValueOrDie();
+    thunk_sequence_->emplace_back(MakeUnique<CudnnBatchNormBackwardThunk>(
+        /*operand=*/GetAllocationSlice(*custom_call->operand(0)),
+        /*scale=*/GetAllocationSlice(*custom_call->operand(1)),
+        /*mean=*/GetAllocationSlice(*custom_call->operand(2)),
+        /*inv_stddev=*/GetAllocationSlice(*custom_call->operand(3)),
+        /*grad_output=*/GetAllocationSlice(*custom_call->operand(4)),
+        /*epsilon=*/epsilon_value,
+        /*feature_index=*/feature_index_value,
+        /*output_grad_data=*/output_grad_data,
+        /*output_grad_scale=*/output_grad_scale,
+        /*output_grad_offset=*/output_grad_offset,
+        /*output_tuple=*/GetAllocationSlice(*custom_call),
+        /*hlo=*/custom_call));
+    return Status::OK();
+  }
+
+  return IrEmitter::HandleCustomCall(custom_call);
+}
+
+Status IrEmitterUnnested::HandleFft(HloInstruction* fft) {
+  TF_RET_CHECK(
+      LayoutUtil::IsMonotonicWithDim0Major(fft->operand(0)->shape().layout()));
+  TF_RET_CHECK(LayoutUtil::IsMonotonicWithDim0Major(fft->shape().layout()));
+  thunk_sequence_->emplace_back(BuildFftThunk(fft));
+  return Status::OK();
+}
+
 Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) {
   HloInstruction* root = fusion->fused_expression_root();
   // HandleFusion specializes reduction from a multi-dimensional array to a 1D
@@ -407,8 +534,8 @@ Shape MergeDimensions(tensorflow::gtl::ArraySlice<size_t> segs,
             (segs.size() == i ? shape.dimensions().size() : segs[i]),
         1, std::multiplies<int64>()));
   }
-  return ShapeUtil::MakeShapeWithMonotonicDim0MajorLayout(shape.element_type(),
-                                                          dimensions);
+  return ShapeUtil::MakeShapeWithDescendingLayout(shape.element_type(),
+                                                  dimensions);
 }
 
 // Returns whether the given shapes and permutation are a 0-2-1 transpose, and
@@ -421,20 +548,22 @@ std::tuple<bool, Shape, Shape> IsTranspose021(const Shape& a, const Shape& b) {
   CHECK(ShapeUtil::Compatible(a, b));
   std::vector<int64> perm(a.dimensions().size());
   {
-    std::vector<int64> layout_a(a.layout().minor_to_major().rbegin(),
-                                a.layout().minor_to_major().rend());
-    std::vector<int64> layout_b(b.layout().minor_to_major().rbegin(),
-                                b.layout().minor_to_major().rend());
+    auto layout_a_orig = LayoutUtil::MinorToMajor(a);
+    std::vector<int64> layout_a(layout_a_orig.rbegin(), layout_a_orig.rend());
+    auto layout_b_orig = LayoutUtil::MinorToMajor(b);
+    std::vector<int64> layout_b(layout_b_orig.rbegin(), layout_b_orig.rend());
     for (size_t i = 0; i < perm.size(); ++i) {
       perm[i] = PositionInContainer(layout_b, layout_a[i]);
     }
   }
   auto segs = ConsecutiveSegments(perm);
-  Shape norm_a = ShapeUtil::NormalizeShapeToMonotonicDim0MajorLayout(a);
-  Shape norm_b = ShapeUtil::NormalizeShapeToMonotonicDim0MajorLayout(b);
+  Shape norm_a =
+      ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(a);
+  Shape norm_b =
+      ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(b);
   if (3 == segs.size() && 0 == perm[0]) {
     Shape reduced_a = MergeDimensions(segs, norm_a);
-    Shape reduced_b = ShapeUtil::MakeShapeWithMonotonicDim0MajorLayout(
+    Shape reduced_b = ShapeUtil::MakeShapeWithDescendingLayout(
         b.element_type(),
         Permute({0, 2, 1}, AsInt64Slice(reduced_a.dimensions())));
     return std::make_tuple(true, reduced_a, reduced_b);
@@ -448,10 +577,11 @@ std::tuple<bool, Shape, Shape> IsTranspose021(const Shape& a, const Shape& b) {
 bool AreShapesForTranspose021(const Shape& a, const Shape& b) {
   return 3 == b.dimensions().size() &&
          ShapeUtil::Compatible(
-             ShapeUtil::NormalizeShapeToMonotonicDim0MajorLayout(a),
+             ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(a),
              ShapeUtil::PermuteDimensions(
                  {0, 2, 1},
-                 ShapeUtil::NormalizeShapeToMonotonicDim0MajorLayout(b)));
+                 ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
+                     b)));
 }
 
 // Emits a tiled 0-2-1 transpose, assuming both input and output lain out from
@@ -483,9 +613,11 @@ int64 EmitTranspose021Tiled(llvm_ir::IrArray input, llvm_ir::IrArray output,
   CHECK(AreShapesForTranspose021(input.GetShape(), output.GetShape()));
 
   Shape input_shape =
-      ShapeUtil::NormalizeShapeToMonotonicDim0MajorLayout(input.GetShape());
+      ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
+          input.GetShape());
   Shape output_shape =
-      ShapeUtil::NormalizeShapeToMonotonicDim0MajorLayout(output.GetShape());
+      ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
+          output.GetShape());
   input = input.CastToShape(input_shape, builder);
   output = output.CastToShape(output_shape, builder);
 
@@ -603,7 +735,7 @@ int64 EmitTranspose021Tiled(llvm_ir::IrArray input, llvm_ir::IrArray output,
                   llvm::Intrinsic::nvvm_read_ptx_sreg_ctaid_x, {}, {},
                   builder))),
           builder->getInt64Ty(), /*isSigned=*/true, "block.id.x"),
-      ShapeUtil::MakeShapeWithMonotonicDim0MajorLayout(
+      ShapeUtil::MakeShapeWithDescendingLayout(
           PRED /*arbitrary*/, AsInt64Slice(input_dims_in_tiles)),
       builder);
   const llvm_ir::IrArray::Index input_tile_origin = ({
@@ -706,6 +838,194 @@ Status IrEmitterUnnested::HandleCopy(HloInstruction* copy) {
   return IrEmitter::HandleCopy(copy);
 }
 
+Status IrEmitterUnnested::EmitReductionToScalar(
+    HloInstruction* reduce, const Shape& input_shape,
+    const llvm_ir::ElementGenerator& input_gen,
+    const llvm_ir::ElementGenerator& init_value_gen, HloComputation* reducer) {
+  // Number of elements processed by a single thread.
+  constexpr int64 kTileSize = 16;
+  int64 num_elems = ShapeUtil::ElementsIn(input_shape);
+
+  // Round up the number of tiles to a multiple of the warp size.  This is
+  // necessary for correctness.  We launch one thread per tile, and if the
+  // number of threads isn't a multiple of the number of the warp size, our
+  // shuffles will read from inactive threads, producing undefined values.
+  int64 num_tiles =
+      RoundUpToNearest(CeilOfRatio(num_elems, kTileSize), kWarpSize);
+
+  // Check whether every thread will process a full tile's worth of elements
+  // without reading outside the bounds of the input.  If this is true, we can
+  // skip some bounds checks in the final algorithm.
+  bool all_threads_in_bounds = num_tiles * kTileSize == num_elems;
+
+  // __global__ void full_reduce_kernel() {
+  //   x_in_tiles = threadIdx.x + blockIdx.x * blockDim.x;
+  //   x = x_in_tiles * kTileSize;
+  //
+  //   partial_result = init_value;
+  //   if (all_threads_in_bounds || x + kTileSize <= num_elems) {
+  //     for (i = 0; i < kTileSize; ++i) {
+  //       partial_result = Reducer(partial_result, input[x + i]);
+  //     }
+  //   } else {
+  //     for (i = 0; i < kTileSize; ++i) {
+  //       if (x + i < num_elems) {
+  //         partial_result = Reducer(partial_result, input[x + i]);
+  //       }
+  //     }
+  //   }
+  //   for (i = warpSize / 2; i > 0; i /= 2) {
+  //     partial_result = Reducer(partial_result,
+  //                              __shfl_down(partial_result, i));
+  //   }
+  //   if (lane_id == 0) {
+  //     AtomicReducer(&output[y], partial_result);
+  //   }
+  // }
+  //
+  // // Choose num_blocks and threads_per_block such that:
+  // //
+  // //   num_blocks * threads_per_block =
+  // //     RoundUpToNextMultipleOf(Ceil(num_elems / kTileSize), warpSize),
+  // //
+  // // and threads_per_block is a multiple of warpSize.
+  // reduce_kernel<<<num_blocks, threads_per_block>>>();
+  //
+  auto loop_body_emitter =
+      [=](const llvm_ir::IrArray::Index& tile_index) -> Status {
+    llvm::Type* element_ir_type =
+        llvm_ir::PrimitiveTypeToIrType(input_shape.element_type(), module_);
+    llvm::Value* partial_reduction_result_address = ir_builder_.CreateAlloca(
+        element_ir_type, /*ArraySize=*/nullptr, "partial_reduction_result");
+    {
+      TF_ASSIGN_OR_RETURN(llvm::Value * init_ir_value,
+                          init_value_gen(llvm_ir::IrArray::Index({})));
+      ir_builder_.CreateStore(init_ir_value, partial_reduction_result_address);
+    }
+
+    llvm::Value* x_in_tiles = tile_index[0];
+
+    // Emit an inner for-loop that reduces the elements in the tile.
+    auto emit_tile_element_loop = [=](bool tile_in_bounds) -> Status {
+      std::unique_ptr<llvm_ir::ForLoop> tile_element_loop =
+          llvm_ir::ForLoop::EmitForLoop("element_id_in_tile",
+                                        ir_builder_.getInt64(0),
+                                        ir_builder_.getInt64(kTileSize),
+                                        ir_builder_.getInt64(1), &ir_builder_);
+
+      // Emit the body of the partial reduction loop.
+      llvm_ir::SetToFirstInsertPoint(tile_element_loop->GetBodyBasicBlock(),
+                                     &ir_builder_);
+      llvm::Value* x = ir_builder_.CreateNSWAdd(
+          ir_builder_.CreateNSWMul(x_in_tiles, ir_builder_.getInt64(kTileSize)),
+          tile_element_loop->GetIndVarValue());
+      // Unless we know the tile is entirely in bounds, we have to emit a
+      // x-in-bounds check before reading from the input.
+      if (!tile_in_bounds) {
+        llvm_ir::LlvmIfData if_data = llvm_ir::EmitIfThenElse(
+            ir_builder_.CreateICmpULT(x, ir_builder_.getInt64(num_elems)),
+            "x_in_bounds", &ir_builder_);
+
+        // Emit code that reads the input element and accumulates it to
+        // the partial reduction result.
+        llvm_ir::SetToFirstInsertPoint(if_data.true_block, &ir_builder_);
+      }
+      llvm_ir::IrArray::Index input_index(
+          /*linear=*/x, input_shape, &ir_builder_);
+      llvm::Value* input_address = ir_builder_.CreateAlloca(element_ir_type);
+      TF_ASSIGN_OR_RETURN(llvm::Value * input_ir_value, input_gen(input_index));
+      ir_builder_.CreateStore(input_ir_value, input_address);
+      return (EmitCallToNestedComputation(
+          *reducer, {partial_reduction_result_address, input_address},
+          partial_reduction_result_address));
+    };
+
+    // x_end = kTileSize + x_in_tiles * kTileSize, i.e., the location that's
+    // immediately beyond the tile.
+    llvm::Value* x_end = ir_builder_.CreateNSWAdd(
+        ir_builder_.getInt64(kTileSize),
+        ir_builder_.CreateNSWMul(x_in_tiles, ir_builder_.getInt64(kTileSize)));
+    // The tile is entirely in bound if all_threads_in_bounds or
+    // x_end <= num_elems.
+    llvm::Value* tile_in_bounds = ir_builder_.CreateOr(
+        ir_builder_.CreateICmpULE(x_end, ir_builder_.getInt64(num_elems)),
+        ir_builder_.getInt1(all_threads_in_bounds));
+    llvm_ir::LlvmIfData if_tile_in_bounds_data =
+        llvm_ir::EmitIfThenElse(tile_in_bounds, "tile_in_bounds", &ir_builder_);
+    llvm_ir::SetToFirstInsertPoint(if_tile_in_bounds_data.true_block,
+                                   &ir_builder_);
+    TF_RETURN_IF_ERROR(emit_tile_element_loop(/*tile_in_bounds=*/true));
+    llvm_ir::SetToFirstInsertPoint(if_tile_in_bounds_data.false_block,
+                                   &ir_builder_);
+    TF_RETURN_IF_ERROR(emit_tile_element_loop(/*tile_in_bounds=*/false));
+
+    // After the if-then-else statement on tile_in_bounds, emit calls to
+    // shfl_down that accumulate the partial reduction results of all threads
+    // from the warp.
+    llvm_ir::SetToFirstInsertPoint(if_tile_in_bounds_data.after_block,
+                                   &ir_builder_);
+    int bit_width = llvm_ir::GetSizeInBits(element_ir_type);
+    // bitcast cannot be applied to aggregate types (even packed ones), so we
+    // instead bitcast addresses of load/store to intN* of the same bit-width.
+    llvm::Type* shuffle_ir_type = element_ir_type->isStructTy()
+                                      ? ir_builder_.getIntNTy(bit_width)
+                                      : element_ir_type;
+    for (int shuffle_distance = kWarpSize / 2; shuffle_distance >= 1;
+         shuffle_distance /= 2) {
+      llvm::Value* partial_reduction_result = ir_builder_.CreateLoad(
+          ir_builder_.CreateBitCast(partial_reduction_result_address,
+                                    shuffle_ir_type->getPointerTo()),
+          "partial_reduction_result");
+      llvm::Value* result_from_other_lane = ir_builder_.CreateAlloca(
+          element_ir_type, nullptr, "result_from_other_lane");
+      ir_builder_.CreateStore(
+          EmitShuffleDown(partial_reduction_result,
+                          ir_builder_.getInt32(shuffle_distance), &ir_builder_),
+          ir_builder_.CreateBitCast(result_from_other_lane,
+                                    shuffle_ir_type->getPointerTo()));
+      TF_RETURN_IF_ERROR(EmitCallToNestedComputation(
+          *reducer, {partial_reduction_result_address, result_from_other_lane},
+          partial_reduction_result_address));
+    }
+
+    const HloInstruction* output =
+        reduce->IsFused() ? reduce->parent()->FusionInstruction() : reduce;
+
+    // Emit an atomic operation that accumulates the partial reduction result of
+    // lane 0 (which holds the partially accumulated result for its warp) to the
+    // output element.
+    llvm::Value* lane_id = ir_builder_.CreateURem(
+        x_in_tiles, ir_builder_.getInt64(kWarpSize), "lane_id");
+    llvm_ir::LlvmIfData if_lane_id_is_zero_data = llvm_ir::EmitIfThenElse(
+        ir_builder_.CreateICmpEQ(lane_id, ir_builder_.getInt64(0)),
+        "lane_id_is_zero", &ir_builder_);
+    llvm_ir::SetToFirstInsertPoint(if_lane_id_is_zero_data.true_block,
+                                   &ir_builder_);
+    llvm::Value* output_address =
+        GetIrArray(*output, *output)
+            .EmitArrayElementAddress(
+                llvm_ir::IrArray::Index(/*linear=*/ir_builder_.getInt64(0),
+                                        output->shape(), &ir_builder_),
+                &ir_builder_, "output_element_address");
+    return EmitAtomicOperationForNestedComputation(
+        *reducer, output_address, partial_reduction_result_address);
+  };
+
+  // Emit a parallel loop that iterates through all input tiles, one per thread.
+  Shape tiled_input_shape = ShapeUtil::MakeShapeWithLayout(
+      reduce->shape().element_type(), {num_tiles}, {0});
+  LaunchDimensions launch_dimensions = CalculateLaunchDimensions(
+      tiled_input_shape, ir_emitter_context_->device_description());
+  CHECK(LastThunk()->kind() == Thunk::Kind::kSequential);
+  UpdateLaunchDimensions(
+      launch_dimensions,
+      static_cast<SequentialThunk*>(LastThunk())->thunks().back().get(),
+      ir_emitter_context_->llvm_module());
+  return ParallelLoopEmitter(loop_body_emitter, tiled_input_shape,
+                             launch_dimensions, &ir_builder_)
+      .EmitLoop(IrName(reduce));
+}
+
 Status IrEmitterUnnested::EmitColumnReduction(
     int64 height, int64 width, HloInstruction* reduce, const Shape& input_shape,
     const llvm_ir::ElementGenerator& input_gen,
@@ -799,14 +1119,15 @@ Status IrEmitterUnnested::EmitColumnReduction(
         // input_shape to normalized_input_shape and a reshape from
         // normalized_input_shape to input_matrix_shape.
         const Shape normalized_input_shape =
-            ShapeUtil::NormalizeShapeToMonotonicDim0MajorLayout(input_shape);
+            ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
+                input_shape);
+        auto input_shape_min2maj = LayoutUtil::MinorToMajor(input_shape);
         const std::vector<int64> transpose_dimension_mapping(
-            input_shape.layout().minor_to_major().rbegin(),
-            input_shape.layout().minor_to_major().rend());
+            input_shape_min2maj.rbegin(), input_shape_min2maj.rend());
 
         const Shape input_matrix_shape =
-            ShapeUtil::MakeShapeWithMonotonicDim0MajorLayout(
-                input_shape.element_type(), {height, width});
+            ShapeUtil::MakeShapeWithDescendingLayout(input_shape.element_type(),
+                                                     {height, width});
         const llvm_ir::IrArray::Index input_matrix_index(
             {y, x}, input_matrix_shape, &ir_builder_);
         const llvm_ir::IrArray::Index input_index =
@@ -901,7 +1222,7 @@ Status IrEmitterUnnested::EmitRowReduction(
   //
   // Three optimizations are performed.
   //
-  // 1. To coalesc global memory accesses, dilate the tile with a factor of 32
+  // 1. To coalesce global memory accesses, dilate the tile with a factor of 32
   // (i.e. the warp size). For example, suppose the width is 8x32=256. Instead
   // of making each tile consecutive, we let make tile 0 column
   // [0,32,64,...,224], tile 1 column [1,33,65,...,225], and so on. This ensures
@@ -1042,13 +1363,14 @@ Status IrEmitterUnnested::EmitRowReduction(
         // from input_shape to normalized_input_shape and a reshape from
         // normalized_input_shape to input_3d_tensor_shape.
         const Shape normalized_input_shape =
-            ShapeUtil::NormalizeShapeToMonotonicDim0MajorLayout(input_shape);
+            ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
+                input_shape);
+        auto input_shape_min2maj = LayoutUtil::MinorToMajor(input_shape);
         const std::vector<int64> transpose_dimension_mapping(
-            input_shape.layout().minor_to_major().rbegin(),
-            input_shape.layout().minor_to_major().rend());
+            input_shape_min2maj.rbegin(), input_shape_min2maj.rend());
         const Shape input_3d_tensor_shape =
-            ShapeUtil::MakeShapeWithMonotonicDim0MajorLayout(
-                input_shape.element_type(), {depth, height, width});
+            ShapeUtil::MakeShapeWithDescendingLayout(input_shape.element_type(),
+                                                     {depth, height, width});
         const llvm_ir::IrArray::Index input_3d_tensor_index(
             {z, y, x}, input_3d_tensor_shape, &ir_builder_);
         const llvm_ir::IrArray::Index input_index =
@@ -1177,9 +1499,9 @@ Status IrEmitterUnnested::EmitReductionToVector(
   // whether another dimension is major or minor of them.
   std::sort(input_dims_to_keep.begin(), input_dims_to_keep.end(),
             [&input_shape](int64 dim_a, int64 dim_b) {
-              return PositionInContainer(input_shape.layout().minor_to_major(),
+              return PositionInContainer(LayoutUtil::MinorToMajor(input_shape),
                                          dim_a) <
-                     PositionInContainer(input_shape.layout().minor_to_major(),
+                     PositionInContainer(LayoutUtil::MinorToMajor(input_shape),
                                          dim_b);
             });
   // Now, if output rank is at least 1, `input_dims_to_keep.front()` is
@@ -1189,14 +1511,11 @@ Status IrEmitterUnnested::EmitReductionToVector(
   // the dimensions to keep are contiguous, by prerequisite of
   // `EmitReductionToVector`, we only need to check whether the minormost
   // dimension of the input is to keep.
-  //
-  // If the output is scalar, we could emit either a row or a column reduction.
-  // Some tests have shown scalar reduction is no more efficient as row
-  // reduction, and is simpler to emit as column reduction, so we emit a column
-  // reduction in this case.
-  if (input_dims_to_keep.empty() ||
-      input_dims_to_keep.front() ==
-          LayoutUtil::Minor(input_shape.layout(), 0)) {
+  if (input_dims_to_keep.empty()) {
+    return EmitReductionToScalar(reduce, input_shape, input_gen, init_value_gen,
+                                 reducer);
+  } else if (input_dims_to_keep.front() ==
+             LayoutUtil::Minor(input_shape.layout(), 0)) {
     // Column reduction. Treat the result of "input" as a matrix whose width
     // is the most minor dimension and height the product of other dimensions,
     // and treat "reduce" as a column reduction of the input matrix.
@@ -1224,14 +1543,14 @@ Status IrEmitterUnnested::EmitReductionToVector(
     int64 width = 1;
     for (int64 input_dim = 0; input_dim < ShapeUtil::Rank(input_shape);
          ++input_dim) {
-      if (PositionInContainer(input_shape.layout().minor_to_major(),
+      if (PositionInContainer(LayoutUtil::MinorToMajor(input_shape),
                               input_dim) >
-          PositionInContainer(input_shape.layout().minor_to_major(),
+          PositionInContainer(LayoutUtil::MinorToMajor(input_shape),
                               input_dims_to_keep.back())) {
         depth *= input_shape.dimensions(input_dim);
-      } else if (PositionInContainer(input_shape.layout().minor_to_major(),
+      } else if (PositionInContainer(LayoutUtil::MinorToMajor(input_shape),
                                      input_dim) <
-                 PositionInContainer(input_shape.layout().minor_to_major(),
+                 PositionInContainer(LayoutUtil::MinorToMajor(input_shape),
                                      input_dims_to_keep.front())) {
         width *= input_shape.dimensions(input_dim);
       }
@@ -1611,7 +1930,7 @@ std::unique_ptr<Thunk> IrEmitterUnnested::BuildHostToDeviceCopyThunk(
   const HloInstruction* operand = inst->operand(0);
   CHECK_EQ(HloOpcode::kConstant, operand->opcode());
   return MakeUnique<HostToDeviceCopyThunk>(
-      /*source_address=*/operand->literal().InternalData(),
+      /*source_address=*/operand->literal().untyped_data(),
       /*destination_buffer=*/GetAllocationSlice(*inst),
       /*mem_size=*/
       llvm_ir::ByteSizeOf(operand->shape(),
@@ -1738,6 +2057,16 @@ std::unique_ptr<Thunk> IrEmitterUnnested::BuildConvolutionThunk(
   }
 }
 
+std::unique_ptr<Thunk> IrEmitterUnnested::BuildFftThunk(
+    const HloInstruction* inst) {
+  const HloInstruction* operand = inst->operand(0);
+  return MakeUnique<FftThunk>(inst->fft_type(), inst->fft_length(),
+                              /*input_buffer=*/GetAllocationSlice(*operand),
+                              /*output_buffer=*/GetAllocationSlice(*inst),
+                              /*input_shape=*/operand->shape(),
+                              /*output_shape=*/inst->shape(), inst);
+}
+
 Status IrEmitterUnnested::EmitInitializer(const HloInstruction* hlo,
                                           KernelThunk* thunk) {
   bool fused = HloOpcode::kFusion == hlo->opcode();
diff --git a/tensorflow/compiler/xla/service/gpu/layout_assignment_test.cc b/tensorflow/compiler/xla/service/gpu/layout_assignment_test.cc
deleted file mode 100644
index ac206b89d329d7e4ac91ee51162c9694f6899d78..0000000000000000000000000000000000000000
--- a/tensorflow/compiler/xla/service/gpu/layout_assignment_test.cc
+++ /dev/null
@@ -1,85 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/compiler/xla/service/gpu/layout_assignment.h"
-
-#include "tensorflow/compiler/xla/layout_util.h"
-#include "tensorflow/compiler/xla/service/computation_layout.h"
-#include "tensorflow/compiler/xla/service/hlo_computation.h"
-#include "tensorflow/compiler/xla/service/hlo_instruction.h"
-#include "tensorflow/compiler/xla/service/hlo_module.h"
-#include "tensorflow/compiler/xla/service/hlo_opcode.h"
-#include "tensorflow/compiler/xla/shape_layout.h"
-#include "tensorflow/compiler/xla/shape_util.h"
-#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
-#include "tensorflow/compiler/xla/xla_data.pb.h"
-
-namespace xla {
-namespace gpu {
-namespace {
-
-using LayoutAssignmentTest = HloTestBase;
-
-TEST_F(LayoutAssignmentTest, Elementwise) {
-  Shape ashape = ShapeUtil::MakeShape(F32, {42, 12});
-  Shape ashape_in_row_major(ashape);
-  Shape ashape_in_col_major(ashape);
-  *ashape_in_row_major.mutable_layout() = LayoutUtil::MakeLayout({1, 0});
-  *ashape_in_col_major.mutable_layout() = LayoutUtil::MakeLayout({0, 1});
-
-  // Enumerate all possible combinations of layouts.
-  for (const Shape& lhs_shape_with_layout :
-       {ashape_in_row_major, ashape_in_col_major}) {
-    for (const Shape& rhs_shape_with_layout :
-         {ashape_in_row_major, ashape_in_col_major}) {
-      for (const Shape& result_shape_with_layout :
-           {ashape_in_row_major, ashape_in_col_major}) {
-        // GpuLayoutAssignment should assign the same layout to "add" and its
-        // two operands.
-        auto builder = HloComputation::Builder(TestName());
-        auto x = builder.AddInstruction(
-            HloInstruction::CreateParameter(0, ashape, "x"));
-        auto y = builder.AddInstruction(
-            HloInstruction::CreateParameter(1, ashape, "y"));
-        auto add = builder.AddInstruction(
-            HloInstruction::CreateBinary(ashape, HloOpcode::kAdd, x, y));
-        auto module = CreateNewModule();
-        HloComputation* computation =
-            module->AddEntryComputation(builder.Build(add));
-
-        ComputationLayout computation_layout(
-            computation->ComputeProgramShape());
-        *computation_layout.mutable_parameter_layout(0) =
-            ShapeLayout(lhs_shape_with_layout);
-        *computation_layout.mutable_parameter_layout(1) =
-            ShapeLayout(rhs_shape_with_layout);
-        *computation_layout.mutable_result_layout() =
-            ShapeLayout(result_shape_with_layout);
-
-        GpuLayoutAssignment layout_assignment(&computation_layout);
-        EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
-
-        for (const HloInstruction* operand : add->operands()) {
-          EXPECT_TRUE(LayoutUtil::Equal(add->shape().layout(),
-                                        operand->shape().layout()));
-        }
-      }
-    }
-  }
-}
-
-}  // namespace
-}  // namespace gpu
-}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
index 059943d48cd34b0ac487b91c3f3079ee3f761229..cfabae791d26d0eb49826085ad7ad166a19109a1 100644
--- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
+++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
@@ -440,7 +440,7 @@ StatusOr<string> CompileModuleToPtx(llvm::Module* module,
 
 // One-time module initializer.
 // Must be called only once -- DO NOT CALL DIRECTLY.
-void GPUBackendInit() {
+void GPUBackendInit(const HloModuleConfig& hlo_module_config) {
   // Feed all customized flags here, so we can override them with llvm_cl_opts
   // without redeploy the compiler for development purpose.
 
@@ -466,6 +466,8 @@ void GPUBackendInit() {
   // between those loads.
   FeedLLVMWithFlags({"-memdep-block-scan-limit=500"});
 
+  llvm_ir::InitializeLLVMCommandLineOptions(hlo_module_config);
+
   // Initialize the NVPTX target; it's the only target we link with, so call its
   // specific initialization functions instead of the catch-all InitializeAll*.
   LLVMInitializeNVPTXTarget();
@@ -485,7 +487,7 @@ StatusOr<string> CompileToPtx(llvm::Module* module,
                               const HloModuleConfig& hlo_module_config,
                               const string& libdevice_dir_path) {
   static std::once_flag backend_init_flag;
-  std::call_once(backend_init_flag, GPUBackendInit);
+  std::call_once(backend_init_flag, GPUBackendInit, hlo_module_config);
 
   string ptx;
   {
diff --git a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
index 11290eda4ffcd579c03acd531b493bb7b1d34ed4..c29fee0879c02021fdc23ac0e02ab398cf40f99e 100644
--- a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
+++ b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
@@ -202,8 +202,7 @@ bool PadInsertion::CanonicalizeBackwardFilterConvolution(
   //   ABCD0 = Pad(ABCD, padding_high=1)
   //   BackwardFilterConv(ABCD0, xyz, padding_low=pading_high=1)
   // We choose the lesser of padding_low and padding_high as the new padding.
-  HloInstruction* transpose = backward_conv->fused_expression_root();
-  HloInstruction* forward_conv = transpose->mutable_operand(0);
+  HloInstruction* forward_conv = backward_conv->fused_expression_root();
   HloInstruction* input = backward_conv->mutable_operand(0);
   Window new_forward_conv_window = forward_conv->window();
   Window new_backward_conv_window = backward_conv->window();
@@ -269,19 +268,10 @@ bool PadInsertion::CanonicalizeBackwardFilterConvolution(
               .ConsumeValueOrDie(),
           padded_input, output, new_forward_conv_window, forward_conv_dnums));
 
-  HloInstruction* new_transpose =
-      computation->AddInstruction(HloInstruction::CreateTranspose(
-          ShapeInference::InferTransposeShape(new_forward_conv->shape(),
-                                              transpose->dimensions())
-              .ConsumeValueOrDie(),
-          new_forward_conv, transpose->dimensions()));
-
-  // Fuse the new forward convolution and the new transpose to the new backward
-  // convolution.
+  // Fuse the new forward convolution to the new backward convolution.
   HloInstruction* new_backward_conv =
       computation->CreateFusionInstructionForBackwardConvolution(
-          {new_transpose, new_forward_conv},
-          HloInstruction::FusionKind::kConvBackwardFilter,
+          {new_forward_conv}, HloInstruction::FusionKind::kConvBackwardFilter,
           new_backward_conv_window, backward_conv_dnums);
 
   VLOG(1) << "Canonicalizing backward filter conv";
diff --git a/tensorflow/compiler/xla/service/gpu/parallel_loop_emitter.cc b/tensorflow/compiler/xla/service/gpu/parallel_loop_emitter.cc
index 457e6094d90413440658452937bff2ccfe6cbe5c..388dcc008b07a76ff9ed07df04181e49a8734f51 100644
--- a/tensorflow/compiler/xla/service/gpu/parallel_loop_emitter.cc
+++ b/tensorflow/compiler/xla/service/gpu/parallel_loop_emitter.cc
@@ -88,6 +88,23 @@ llvm_ir::IrArray::Index ParallelLoopEmitter::EmitIndexAndSetExitBasicBlock(
           /*HasNUW=*/true, /*HasNSW=*/true),
       thread_id, "linear_index", /*HasNUW=*/true, /*HasNSW=*/true);
 
+  // Add an @llvm.assume(linear_index < threads_per_block * num_blocks).
+  //
+  // This might seem obvious from the computation above, but LLVM does not
+  // currently determine the range of linear_index precisely.  InstCombine uses
+  // known-bits, which, when applied to the task of determining a value's range,
+  // is imprecise for everything other than powers of 2.  And
+  // CorrelatedValuePropagation is, as a cost-saving measure, disabled for
+  // conditions in the same basic block as their operands.
+  llvm_ir::EmitCallToIntrinsic(
+      llvm::Intrinsic::assume,
+      {ir_builder_->CreateICmpULT(
+          linear_index,
+          ir_builder_->getInt64(launch_dimensions_.threads_per_block() *
+                                launch_dimensions_.block_count()),
+          "linear_index_in_range")},
+      {}, ir_builder_);
+
   auto if_in_bounds = llvm_ir::EmitIfThenElse(
       ir_builder_->CreateICmpULT(
           linear_index, ir_builder_->getInt64(ShapeUtil::ElementsIn(shape_))),
diff --git a/tensorflow/compiler/xla/service/gpu/partition_assignment.cc b/tensorflow/compiler/xla/service/gpu/partition_assignment.cc
index d0d2deee24848184278e3e51dcaa3bb673b5fadc..6cf280df05496716a0780d61ded92efd9982734c 100644
--- a/tensorflow/compiler/xla/service/gpu/partition_assignment.cc
+++ b/tensorflow/compiler/xla/service/gpu/partition_assignment.cc
@@ -44,37 +44,41 @@ std::ostream& operator<<(std::ostream& out,
 
 // Calculates the launch dimensions used to invoke `hlo`.
 LaunchDimensions CalculateLaunchDimensions(
-    const Shape& shape, const se::DeviceDescription& device_desc,
-    PartitionStrategy partition_strategy) {
-  int64 warp_size = device_desc.threads_per_warp();
-
+    const Shape& shape, const se::DeviceDescription& device_desc) {
   int64 num_elements = ShapeUtil::ElementsIn(shape);
   if (num_elements <= 1) {
     return LaunchDimensions();
   }
 
-  // Calculate the number of threads per block.
-  // Initialize threads_per_block as the threads-per-block limit.
-  int64 threads_per_block = device_desc.threads_per_block_limit();
-  VLOG(2) << "Initial # of threads per block = " << threads_per_block;
-
-  if (partition_strategy == PartitionStrategy::kLatency) {
-    // Limit the thread count to allow maximum number of registers per thread.
-    // TODO(b/28560520): We don't have to assume the emitted kernel will use up
-    // all the registers. We could use ptxas to examine the actual number of
-    // register used, and set the thread count accordingly.
-    int64 threads_per_block_limit_due_to_registers =
-        device_desc.registers_per_core_limit() /
-        device_desc.registers_per_thread_limit();
-    CHECK_NE(0, threads_per_block_limit_due_to_registers);
-    if (threads_per_block_limit_due_to_registers < threads_per_block) {
-      threads_per_block =
-          // Make `threads_per_block` a multiple of warp size to use GPU
-          // efficiently.
-          warp_size *
-          std::max(1LL, threads_per_block_limit_due_to_registers / warp_size);
-      VLOG(2) << "Update # of threads per block due to register pressure = "
-              << threads_per_block;
+  // Since we don't do any inter-warp communication, we're free to choose any
+  // block size we want, subject to hardware constraints.  We choose the
+  // smallest block size that allows the GPU to reach full occupancy (assuming
+  // the kernel uses sufficiently few registers).  This gives us max performance
+  // when the kernel uses few registers, and lets us scale down gracefully as
+  // the kernel uses more registers.
+  //
+  // Specifically, we choose the number of threads per block such that
+  //
+  //   <num threads per block> * <max blocks per core> = <max threads per core>
+
+  auto threads_per_core = device_desc.threads_per_core_limit();
+  auto blocks_per_core = device_desc.blocks_per_core_limit();
+  int64 threads_per_block;
+  if (threads_per_core != 0 && blocks_per_core != 0) {
+    threads_per_block = device_desc.threads_per_core_limit() /
+                        device_desc.blocks_per_core_limit();
+  } else {
+    static std::atomic<int64> log_count{0};
+    if (log_count.fetch_add(1) < 8) {
+      LOG(WARNING) << "Attempting to calculate launch dimensions for GPU "
+                      "without full information about its capabilities.  "
+                      "StreamExecutor's PopulateDeviceDescription should be "
+                      "updated for this device.";
+    }
+    threads_per_block = device_desc.threads_per_warp();
+    if (threads_per_block == 0) {
+      // Fall back to *something* if we can't even get num threads per warp.
+      threads_per_block = 32;
     }
   }
 
@@ -84,8 +88,6 @@ LaunchDimensions CalculateLaunchDimensions(
             << threads_per_block << ") because the latter is smaller.";
   }
 
-  // Calculate the block count. We copy the strategy used by Eigen:
-  // eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
   int64 block_count = CeilOfRatio(num_elements, threads_per_block);
   VLOG(2) << tensorflow::strings::Printf(
       "Initialized the block count to ceil(# of elements / threads per "
diff --git a/tensorflow/compiler/xla/service/gpu/partition_assignment.h b/tensorflow/compiler/xla/service/gpu/partition_assignment.h
index 8f7fce884acc93fd39510ad0826b819a6d9731a7..0bf463a6ef95d5a32784838c08ad239752fd1acf 100644
--- a/tensorflow/compiler/xla/service/gpu/partition_assignment.h
+++ b/tensorflow/compiler/xla/service/gpu/partition_assignment.h
@@ -30,14 +30,6 @@ limitations under the License.
 namespace xla {
 namespace gpu {
 
-enum class PartitionStrategy {
-  // Optimized for latency by allowing maximum number of registers per thread.
-  kLatency,
-  // Optimized for throughput. This may limit registers per thread and cause
-  // longer latency.
-  kThroughput
-};
-
 // Encapsulates the launch dimensions of a kernel, e.g., the block count and the
 // number of threads per block.
 class LaunchDimensions {
@@ -66,8 +58,7 @@ std::ostream& operator<<(std::ostream& out,
 
 LaunchDimensions CalculateLaunchDimensions(
     const Shape& shape,
-    const perftools::gputools::DeviceDescription& device_desc,
-    PartitionStrategy partition_strategy = PartitionStrategy::kLatency);
+    const perftools::gputools::DeviceDescription& device_desc);
 
 }  // namespace gpu
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/thunk.h b/tensorflow/compiler/xla/service/gpu/thunk.h
index 0ff27888ad72f8190400c22a9086d1965448662c..625c3f8bea418b7942145a05ba42b9ea9b14543b 100644
--- a/tensorflow/compiler/xla/service/gpu/thunk.h
+++ b/tensorflow/compiler/xla/service/gpu/thunk.h
@@ -43,6 +43,10 @@ class Thunk {
   enum class Kind {
     kConvolution,
     kCopy,
+    kCudnnBatchNormBackward,
+    kCudnnBatchNormForwardInference,
+    kCudnnBatchNormForwardTraining,
+    kFft,
     kGemm,
     kInfeed,
     kKernel,
@@ -70,6 +74,29 @@ class Thunk {
     return tensorflow::Status::OK();
   }
 
+  // Users of Thunk should call ShouldHaltAllActivityBeforeRunning(stream)
+  // before calling ExecuteOnStream(stream).  If it returns true, it's the
+  // user's responsibility to wait for all activity on the GPU to finish before
+  // calling ExecuteOnStream.
+  //
+  // This value is not required to be constant for a given Thunk.  For example,
+  // a Thunk that performs autotuning may return true for its first run and
+  // false thereafter.
+  virtual bool ShouldHaltAllActivityBeforeRunning(
+      perftools::gputools::Stream* /*stream*/) {
+    return false;
+  }
+
+  // Indicates whether thunks scheduled after this one should wait for this one
+  // to complete before running. For example, a convolution thunk creates a
+  // scratch allocator, then kicks off a convolution in cudnn via the stream
+  // executor. When the stream executor call returns, the scratch allocator goes
+  // out of scope, and the scratch memory is deallocated. In this case, the
+  // convolution thunk needs to return true so that future thunks wait for the
+  // convolution thunk to avoid reusing the deallocated memory until the
+  // convolution thunk is done with it.
+  virtual bool ShouldBlockFutureThunks() { return false; }
+
   // Execute the kernel for the thunk on the given stream. This method must be
   // called after Initialize and can be called multiple times over Thunk's
   // lifetime. Stream argument must be non-null.
diff --git a/tensorflow/compiler/xla/service/gpu/while_thunk.cc b/tensorflow/compiler/xla/service/gpu/while_thunk.cc
index 0d2412096abf7838b7b0e7617811c789f507a4a1..c21559af6d2e5dfb5aaf62afcdcaed514e0914c9 100644
--- a/tensorflow/compiler/xla/service/gpu/while_thunk.cc
+++ b/tensorflow/compiler/xla/service/gpu/while_thunk.cc
@@ -34,16 +34,14 @@ WhileThunk::WhileThunk(
       body_thunk_sequence_(
           MakeUnique<SequentialThunk>(std::move(*body_thunk_sequence), hlo)) {}
 
-tensorflow::Status WhileThunk::Initialize(const GpuExecutable& executable) {
+Status WhileThunk::Initialize(const GpuExecutable& executable) {
   TF_RETURN_IF_ERROR(condition_thunk_sequence_->Initialize(executable));
   TF_RETURN_IF_ERROR(body_thunk_sequence_->Initialize(executable));
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-tensorflow::Status WhileThunk::ExecuteOnStream(
-    const BufferAllocations& buffer_allocations,
-    perftools::gputools::Stream* stream) {
-
+Status WhileThunk::ExecuteOnStream(const BufferAllocations& buffer_allocations,
+                                   perftools::gputools::Stream* stream) {
   perftools::gputools::DeviceMemoryBase condition_result_data =
       buffer_allocations.GetDeviceAddress(condition_result_buffer_index_);
 
@@ -55,9 +53,11 @@ tensorflow::Status WhileThunk::ExecuteOnStream(
     // Copy the result of condition computation and break the loop if 'false'.
     bool condition_result;
     stream->ThenMemcpy(&condition_result, condition_result_data, sizeof(bool));
-    if (!stream->BlockHostUntilDone()) {
+    Status block_status = stream->BlockHostUntilDone();
+    if (!block_status.ok()) {
       return InternalError(
-          "Failed to complete all kernels launched on stream %p", stream);
+          "Failed to complete all kernels launched on stream %p: %s", stream,
+          block_status.error_message().c_str());
     }
 
     if (!condition_result) {
@@ -68,7 +68,7 @@ tensorflow::Status WhileThunk::ExecuteOnStream(
     TF_RETURN_IF_ERROR(
         body_thunk_sequence_->ExecuteOnStream(buffer_allocations, stream));
   }
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
 }  // namespace gpu
diff --git a/tensorflow/compiler/xla/service/gpu/while_thunk.h b/tensorflow/compiler/xla/service/gpu/while_thunk.h
index 95ed5497cea4fa3ba5dcdc6762cbd53cec88339a..4c9f45de9e42494df58706d0a4a3eb0c4220b8b8 100644
--- a/tensorflow/compiler/xla/service/gpu/while_thunk.h
+++ b/tensorflow/compiler/xla/service/gpu/while_thunk.h
@@ -45,10 +45,9 @@ class WhileThunk : public Thunk {
   WhileThunk(const WhileThunk&) = delete;
   WhileThunk& operator=(const WhileThunk&) = delete;
 
-  tensorflow::Status Initialize(const GpuExecutable& executable) override;
-  tensorflow::Status ExecuteOnStream(
-      const BufferAllocations& buffer_allocations,
-      perftools::gputools::Stream* stream) override;
+  Status Initialize(const GpuExecutable& executable) override;
+  Status ExecuteOnStream(const BufferAllocations& buffer_allocations,
+                         perftools::gputools::Stream* stream) override;
 
  private:
   const BufferAllocation::Slice condition_result_buffer_index_;
diff --git a/tensorflow/compiler/xla/service/gpu/while_transformer.cc b/tensorflow/compiler/xla/service/gpu/while_transformer.cc
index ccdd1717593e4fa7c1d1deb3f0f9ebfab1bf7209..e6caec8625f0d622dbb92bcc20802d254fe23f94 100644
--- a/tensorflow/compiler/xla/service/gpu/while_transformer.cc
+++ b/tensorflow/compiler/xla/service/gpu/while_transformer.cc
@@ -44,7 +44,7 @@ namespace {
 //
 //            Parameter
 //               |
-//   Const  GetTupleElemet
+//   Const  GetTupleElement
 //      \   /
 //       Add (root)
 //
@@ -62,7 +62,7 @@ namespace {
 //                                &tagged_instructions));
 //
 // Instructions that are "tagged" with a context-specific string will
-// be returned in 'tagged_instructions' for further procesing (i.e. parsing
+// be returned in 'tagged_instructions' for further processing (i.e. parsing
 // constants or recording the tuple_index).
 //
 class ExprTree {
diff --git a/tensorflow/compiler/xla/service/gpu/while_transformer_test.cc b/tensorflow/compiler/xla/service/gpu/while_transformer_test.cc
index f16daa0b5481474e754c880ead1945297ca50168..2f290f61bd527e9827472a78256f015e066e44be 100644
--- a/tensorflow/compiler/xla/service/gpu/while_transformer_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/while_transformer_test.cc
@@ -117,9 +117,7 @@ class WhileTransformerTest : public HloTestBase {
   }
 
   void RunCopyInsertionPass() {
-    HloVerifier verifier([](const Shape& shape) {
-      return ShapeUtil::ByteSizeOf(shape, /*pointer_size=*/sizeof(void*));
-    });
+    HloVerifier verifier;
     TF_ASSERT_OK(verifier.Run(module_.get()).status());
     CopyInsertion copy_insertion;
     TF_ASSERT_OK(copy_insertion.Run(module_.get()).status());
diff --git a/tensorflow/compiler/xla/service/graphviz_example.cc b/tensorflow/compiler/xla/service/graphviz_example.cc
index 049e8d80d80c835bca4a4d38592564ba82a3ecf9..05017008e2ddbe0b9e78d06275fdec5d08d94bfa 100644
--- a/tensorflow/compiler/xla/service/graphviz_example.cc
+++ b/tensorflow/compiler/xla/service/graphviz_example.cc
@@ -108,8 +108,11 @@ std::unique_ptr<HloModule> MakeBigGraph() {
       HloInstruction::CreateUnary(vshape, HloOpcode::kCopy, param_v0));
   auto clamp = builder.AddInstruction(HloInstruction::CreateTernary(
       vshape, HloOpcode::kClamp, copy, param_v1, param_v2));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
   auto dot = builder.AddInstruction(
-      HloInstruction::CreateBinary(vshape, HloOpcode::kDot, clamp, param_v0));
+      HloInstruction::CreateDot(vshape, clamp, param_v0, dot_dnums));
   auto tuple = builder.AddInstruction(
       HloInstruction::CreateTuple({dot, param_s, clamp}));
   auto scalar = builder.AddInstruction(
diff --git a/tensorflow/compiler/xla/service/heap_simulator.h b/tensorflow/compiler/xla/service/heap_simulator.h
index a03ad2f37cf5ede35275ea019ab3d5998fb85d0a..88a8698d16132372fc8f4e87eba3b99125aab876 100644
--- a/tensorflow/compiler/xla/service/heap_simulator.h
+++ b/tensorflow/compiler/xla/service/heap_simulator.h
@@ -264,7 +264,7 @@ class LazyBestFitHeap : public HeapAlgorithm {
   enum { kLazyAllocOffset = -1 };
 
   struct OrderChunkByIncreasingSize {
-    bool operator()(const Chunk& a, const Chunk& b) {
+    bool operator()(const Chunk& a, const Chunk& b) const {
       if (a.size != b.size) return a.size < b.size;
       return a.offset < b.offset;
     }
diff --git a/tensorflow/compiler/xla/service/heap_simulator_test.cc b/tensorflow/compiler/xla/service/heap_simulator_test.cc
index 17b926c8748e45b55f380e7595711b9e7a748f64..387b649a731ebcbfd8307807469f39f22d192b06 100644
--- a/tensorflow/compiler/xla/service/heap_simulator_test.cc
+++ b/tensorflow/compiler/xla/service/heap_simulator_test.cc
@@ -259,8 +259,11 @@ TEST_F(HeapSimulatorTest, MultiplyDot) {
       HloInstruction::CreateParameter(2, f32scalar_, "paramY"));
   auto mul = builder.AddInstruction(HloInstruction::CreateBinary(
       f32vec4_, HloOpcode::kMultiply, paramA, paramX));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
   auto dot = builder.AddInstruction(
-      HloInstruction::CreateBinary(f32vec4_, HloOpcode::kDot, mul, paramY));
+      HloInstruction::CreateDot(f32vec4_, mul, paramY, dot_dnums));
 
   // The buffer for dot is the output, and it cannot be shared with the buffer
   // for mul, since dot isn't elementwise.
@@ -292,8 +295,11 @@ TEST_F(HeapSimulatorTest, MultiplyDotAdd) {
       HloInstruction::CreateParameter(2, f32scalar_, "paramY"));
   auto mul = builder.AddInstruction(HloInstruction::CreateBinary(
       f32vec4_, HloOpcode::kMultiply, paramA, paramX));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
   auto dot = builder.AddInstruction(
-      HloInstruction::CreateBinary(f32vec4_, HloOpcode::kDot, mul, paramY));
+      HloInstruction::CreateDot(f32vec4_, mul, paramY, dot_dnums));
   auto add = builder.AddInstruction(
       HloInstruction::CreateBinary(f32vec4_, HloOpcode::kAdd, dot, paramA));
 
@@ -327,10 +333,13 @@ TEST_F(HeapSimulatorTest, MultiplyDotDot) {
       HloInstruction::CreateParameter(2, f32scalar_, "paramY"));
   auto mul = builder.AddInstruction(HloInstruction::CreateBinary(
       f32vec4_, HloOpcode::kMultiply, paramA, paramX));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
   auto dot0 = builder.AddInstruction(
-      HloInstruction::CreateBinary(f32vec4_, HloOpcode::kDot, mul, paramY));
+      HloInstruction::CreateDot(f32vec4_, mul, paramY, dot_dnums));
   auto dot1 = builder.AddInstruction(
-      HloInstruction::CreateBinary(f32vec4_, HloOpcode::kDot, dot0, paramY));
+      HloInstruction::CreateDot(f32vec4_, dot0, paramY, dot_dnums));
 
   // The buffer for dot1 is the output.  No buffers can be shared.  The buffer
   // for mul is freed before the end, since it's no longer used after dot0
@@ -365,10 +374,13 @@ TEST_F(HeapSimulatorTest, MultiplyDotDotTuple) {
       HloInstruction::CreateParameter(2, f32scalar_, "paramY"));
   auto mul = builder.AddInstruction(HloInstruction::CreateBinary(
       f32vec4_, HloOpcode::kMultiply, paramA, paramX));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
   auto dot0 = builder.AddInstruction(
-      HloInstruction::CreateBinary(f32vec4_, HloOpcode::kDot, mul, paramY));
+      HloInstruction::CreateDot(f32vec4_, mul, paramY, dot_dnums));
   auto dot1 = builder.AddInstruction(
-      HloInstruction::CreateBinary(f32vec4_, HloOpcode::kDot, dot0, paramY));
+      HloInstruction::CreateDot(f32vec4_, dot0, paramY, dot_dnums));
   auto tuple =
       builder.AddInstruction(HloInstruction::CreateTuple({dot0, dot1}));
 
diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto
index e984bdb5f75f714fb7b4453a97178158d9b8a8b8..0e9a852788e978f79fa6f6c802f855a4c476583f 100644
--- a/tensorflow/compiler/xla/service/hlo.proto
+++ b/tensorflow/compiler/xla/service/hlo.proto
@@ -36,6 +36,9 @@ option cc_enable_arenas = true;
 
 // Serialization of HloInstruction.
 message HloInstructionProto {
+  reserved 10;
+  reserved "parameter_name";
+
   string name = 1;
   string opcode = 2;
   xla.Shape shape = 3;
@@ -50,9 +53,8 @@ message HloInstructionProto {
   // Literal, only present for kConstant.
   xla.LiteralProto literal = 8;
 
-  // Parameter info, only present for kParameter.
+  // Parameter number is only present for kParameter.
   int64 parameter_number = 9;
-  string parameter_name = 10;
 
   // Fusion state, only present for kFusion.
   string fusion_kind = 11;
@@ -118,6 +120,15 @@ message HloInstructionProto {
 
   // Shape of outfeed request.
   xla.Shape outfeed_shape = 29;
+
+  // Describes the dimension numbers used for a dot operation
+  xla.DotDimensionNumbers dot_dimension_numbers = 30;
+
+  // FFT type (FFT, IFFT, etc).
+  xla.FftType fft_type = 31;
+
+  // FFT length.
+  repeated int64 fft_length = 32;
 }
 
 // Serialization of HloComputation.
diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index c215cc48d60b93a88d64b7c4aecb2aa3bb460443..a63affa06caf75f1ccab084bd114e39ba7c91a38 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -131,9 +131,9 @@ Status HloComputation::RemoveParameter(int64 param_no) {
 
   while (param_no < param_instructions_.size()) {
     param_instruction = param_instructions_[param_no];
-    string param_name = param_instruction->parameter_name();
+    string param_name = param_instruction->name();
     // Fusion parameters are named foo.param_1, bar.param_2, etc. We are
-    // renumbering the parameters so replace the final number in the name with
+    // renumbering the parameters, so replace the final number in the name with
     // the updated value.
     const string param_underscore = ".param_";
     size_t index = param_name.rfind(param_underscore);
@@ -176,10 +176,6 @@ bool HloComputation::IsRemovable(const HloInstruction* instruction) {
     return false;
   }
 
-  if (instruction->HasSideEffect()) {
-    return false;
-  }
-
   return true;
 }
 
@@ -207,7 +203,8 @@ Status HloComputation::RemoveInstructionAndUnusedOperands(
     worklist.pop();
 
     if (removed.count(item) != 0 || item->user_count() != 0 ||
-        item == root_instruction() || !IsRemovable(item)) {
+        item == root_instruction() || !IsRemovable(item) ||
+        item->HasSideEffect()) {
       continue;
     }
     for (int i = 0; i < item->operand_count(); ++i) {
@@ -367,26 +364,27 @@ std::list<HloComputation*> HloComputation::MakeEmbeddedComputationsList()
   return post_order;
 }
 
-string HloComputation::ToString(int nested_level,
-                                bool include_large_constants) const {
+string HloComputation::ToString(const HloPrintOptions& options) const {
   std::ostringstream s;
-  for (int i = 0; i < nested_level; i++) {
+  for (int i = 0; i < options.indent_amount(); i++) {
     s << "    ";
   }
-  s << "%" << name() << " " << ShapeUtil::HumanString(ComputeProgramShape())
-    << " {\n";
+  if (options.print_percent()) {
+    s << "%";
+  }
+  s << name();
+  if (options.print_program_shape()) {
+    s << " " << ShapeUtil::HumanString(ComputeProgramShape());
+  }
+  s << " {\n";
   for (const HloInstruction* instruction : MakeInstructionPostOrder()) {
-    for (int i = 0; i < nested_level; i++) {
+    for (int i = 0; i < options.indent_amount(); i++) {
       s << "    ";
     }
     s << "  " << (instruction == root_instruction_ ? "ROOT " : "")
-      << instruction->ToString(
-             /*compact_operands=*/false,
-             /*include_metadata=*/true,
-             /*include_large_constants=*/include_large_constants)
-      << "\n";
+      << instruction->ToString(options) << "\n";
   }
-  for (int i = 0; i < nested_level; i++) {
+  for (int i = 0; i < options.indent_amount(); i++) {
     s << "    ";
   }
   s << "}";
@@ -543,7 +541,7 @@ ProgramShape HloComputation::ComputeProgramShape() const {
 
   for (auto* param_instruction : param_instructions_) {
     *program_shape.add_parameters() = param_instruction->shape();
-    *program_shape.add_parameter_names() = param_instruction->parameter_name();
+    *program_shape.add_parameter_names() = param_instruction->name();
   }
   *program_shape.mutable_result() = root_instruction_->shape();
 
diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h
index 353b30bc69d98556311635d6097e3d6ad5fb2aaa..6436815f910405477ec21a33dec75ef71df08602 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.h
+++ b/tensorflow/compiler/xla/service/hlo_computation.h
@@ -138,8 +138,11 @@ class HloComputation {
   void UniquifyName(NameUniquer* name_uniquer);
 
   // Return a string representation of the computation.
-  string ToString(int nested_level = 0,
-                  bool include_large_constants = false) const;
+  //
+  // (We express the default options using an overload rather than a default
+  // param because gdb ignores default params, but does resolve overloads.)
+  string ToString() const { return ToString(HloPrintOptions()); }
+  string ToString(const HloPrintOptions& options) const;
 
   // Returns a serialized representation of this computation.
   HloComputationProto ToProto() const;
@@ -313,11 +316,17 @@ class HloComputation {
           replacements,
       HloModule* module = nullptr, const string& suffix = "clone");
 
-  // Returns true if the given instruction can be removed from the
-  // computation. Instructions such as parameters and send/receive instructions
-  // cannot be removed without violating invariants of the HLO computation or
-  // module with the exception of fusion computation.  A parameter instruction
-  // is removable for a fusion computation.
+  // Returns true if the given instruction can be removed from the computation.
+  // Parameter instructions cannot be removed without violating invariants of
+  // the HLO computation with the exception of fusion computation. A parameter
+  // instruction is removable for a fusion computation.
+  //
+  // Note that IsRemovable() is a necessariy condition to remove an instruction
+  // rather than a sufficient condition. For example, instructions with
+  // side-effect (e.g., Send, Infeed) may be removed from a computation, but the
+  // transformation must guarantee the invariants relevant to the instructions
+  // still hold (e.g., Send and Recv must be removed together to make each
+  // channel complete).
   bool IsRemovable(const HloInstruction* instruction);
 
   // Returns true if this computation has a side effect. A computation has a
diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
index 6fcc01dd64d1ac041e99eedb8b1de476409b257d..cd54eb74d18d0be714b5b56fc8ae0dfa55ff31a0 100644
--- a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
@@ -201,10 +201,11 @@ Status HloCostAnalysis::HandleCopy(const HloInstruction*) {
 Status HloCostAnalysis::HandleDot(const HloInstruction* dot) {
   const Shape& lhs_shape = dot->operand(0)->shape();
   const Shape& rhs_shape = dot->operand(1)->shape();
+  const DotDimensionNumbers& dnums = dot->dot_dimension_numbers();
   // Count of elements along the reduction dimension (last dimension for the
   // rhs).
-  int64 reduction_width = lhs_shape.dimensions(ShapeUtil::Rank(lhs_shape) - 1);
-
+  int64 reduction_width =
+      lhs_shape.dimensions(dnums.lhs_contracting_dimensions(0));
   // First divide by reduction width before multiplying by rhs elements to avoid
   // overflow.
   int64 fma_count;
@@ -391,13 +392,35 @@ Status HloCostAnalysis::HandleConvolution(const HloInstruction* convolution) {
   return Status::OK();
 }
 
+Status HloCostAnalysis::HandleFft(const HloInstruction* fft) {
+  auto real_shape =
+      ShapeUtil::IsTuple(fft->operand(0)->shape())
+          ? ShapeUtil::GetTupleElementShape(fft->operand(0)->shape(), 0)
+          : fft->operand(0)->shape();
+  constexpr int kFmaPerComplexMul = 4;
+  int64 log_factors = 1;
+  for (int64 dim : fft->fft_length()) {
+    log_factors *= tensorflow::Log2Floor(dim);
+  }
+  current_properties_[kFlopsKey] = kFmaFlops * kFmaPerComplexMul * log_factors *
+                                   ShapeUtil::ElementsIn(real_shape);
+  return Status::OK();
+}
+
 Status HloCostAnalysis::HandleCrossReplicaSum(const HloInstruction* crs) {
   // We assume 2 replicas, so that each output element is the sum of two input
   // elements.
   //
   // TODO(b/33004697): Compute correct cost here, taking the actual number of
   // replicas into account.
-  current_properties_[kFlopsKey] = ShapeUtil::ElementsIn(crs->shape());
+  double flops = 0.0;
+  ShapeUtil::ForEachSubshape(
+      crs->shape(), [&, this](const Shape& subshape, const ShapeIndex&) {
+        if (ShapeUtil::IsArray(subshape)) {
+          flops += ShapeUtil::ElementsIn(subshape);
+        }
+      });
+  current_properties_[kFlopsKey] = flops;
   return Status::OK();
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.h b/tensorflow/compiler/xla/service/hlo_cost_analysis.h
index fade19522cf0c30eab037aa355de1f9203f80014..e5783539e5436f09fa58bf7889118380ee90fea0 100644
--- a/tensorflow/compiler/xla/service/hlo_cost_analysis.h
+++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.h
@@ -67,6 +67,7 @@ class HloCostAnalysis : public ConstDfsHloVisitor {
   Status HandleCopy(const HloInstruction* copy) override;
   Status HandleDot(const HloInstruction* dot) override;
   Status HandleConvolution(const HloInstruction* convolution) override;
+  Status HandleFft(const HloInstruction* fft) override;
   Status HandleCrossReplicaSum(const HloInstruction* crs) override;
   Status HandleInfeed(const HloInstruction* infeed) override;
   Status HandleOutfeed(const HloInstruction* outfeed) override;
diff --git a/tensorflow/compiler/xla/service/hlo_cse.cc b/tensorflow/compiler/xla/service/hlo_cse.cc
index d35ba19a730555433099072c51ca5cf3774d4b99..7feda2b3b040de1f0a14303ce1adcd21c6624c8b 100644
--- a/tensorflow/compiler/xla/service/hlo_cse.cc
+++ b/tensorflow/compiler/xla/service/hlo_cse.cc
@@ -32,6 +32,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/gtl/inlined_vector.h"
 
 namespace xla {
 
@@ -91,6 +92,10 @@ bool CombineConstants(HloComputation* computation, bool is_layout_sensitive) {
 
 StatusOr<bool> HloCSE::Run(HloModule* module) {
   bool changed = false;
+  const std::function<bool(const HloInstruction*, const HloInstruction*)>
+      eq_instructions = std::equal_to<const HloInstruction*>();
+  const std::function<bool(const HloComputation*, const HloComputation*)>
+      eq_computations = std::equal_to<const HloComputation*>();
   for (auto* computation : module->computations()) {
     changed |= CombineConstants(computation, is_layout_sensitive_);
 
@@ -110,9 +115,11 @@ StatusOr<bool> HloCSE::Run(HloModule* module) {
       // of this instruction.
       const HloInstruction* operand = instruction->operand(0);
 
-      std::vector<HloInstruction*> equivalent_instructions;
+      tensorflow::gtl::InlinedVector<HloInstruction*, 8>
+          equivalent_instructions;
       for (HloInstruction* user : operand->users()) {
-        if (user != instruction && user->Identical(*instruction) &&
+        if (user != instruction &&
+            user->Identical(*instruction, eq_instructions, eq_computations) &&
             (!is_layout_sensitive_ ||
              ShapeUtil::Equal(user->shape(), instruction->shape()))) {
           equivalent_instructions.push_back(user);
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
index 3f34b9ceb34abc89fca5b896bb8fbe3a06cd6ed4..d25fc5d7418ae40c7167f88d6172906482a58925 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
@@ -154,7 +154,11 @@ bool HloDataflowAnalysis::Phi(
     tensorflow::gtl::ArraySlice<const InstructionValueSet*> inputs) {
   CHECK(ssa_form_);
   VLOG(4) << "Phi(" << instruction->name() << ")";
-
+  VLOG(5) << "instruction value set = "
+          << GetInstructionValueSet(instruction).ToString();
+  for (const InstructionValueSet* input : inputs) {
+    VLOG(5) << "input value set = " << input->ToString();
+  }
   for (const InstructionValueSet* input : inputs) {
     DCHECK(ShapeUtil::Compatible(instruction->shape(), input->shape()));
   }
@@ -171,9 +175,14 @@ bool HloDataflowAnalysis::Phi(
         value_set.values().size() == 1 ? value_set.values()[0] : nullptr;
 
     // Construct a vector of unique value IDs of the inputs.
+    // Don't add value ids where the input is equal to the definition.
     std::vector<HloValue::Id> input_value_ids;
     for (const InstructionValueSet* input : inputs) {
       for (const HloValue* value : input->element(index).values()) {
+        if (value->defining_instruction() == instruction &&
+            value->defining_index() == index) {
+          continue;
+        }
         input_value_ids.push_back(value->id());
       }
     }
@@ -190,6 +199,7 @@ bool HloDataflowAnalysis::Phi(
          current_value->defining_instruction() == instruction &&
          current_value->defining_index() == index);
     if (current_value_defined_here) {
+      VLOG(5) << "current_value_defined_here: " << current_value->ToString();
       CHECK(current_value->is_phi());
       auto it = std::find(input_value_ids.begin(), input_value_ids.end(),
                           current_value->id());
@@ -197,7 +207,7 @@ bool HloDataflowAnalysis::Phi(
         input_value_ids.erase(it);
       }
     }
-
+    VLOG(5) << "after input_value_ids.size = " << input_value_ids.size();
     if (input_value_ids.empty()) {
       // A value set which has at least one element should never have its value
       // set reduced to zero elements. During dataflow value sets only can go
@@ -276,6 +286,23 @@ bool HloDataflowAnalysis::UpdateBitcastValueSet(HloInstruction* bitcast) {
   return false;
 }
 
+bool HloDataflowAnalysis::UpdateSliceValueSet(HloInstruction* slice) {
+  CHECK_EQ(slice->opcode(), HloOpcode::kSlice);
+  if (!slice->IsInPlaceSlice()) {
+    return false;
+  }
+  // If this slice is lowered to an in-place version, then it forwards the
+  // operand value to the output.
+  const InstructionValueSet& operand_set =
+      GetInstructionValueSet(slice->operand(0));
+  InstructionValueSet& slice_set = GetInstructionValueSet(slice);
+  if (operand_set != slice_set) {
+    slice_set = operand_set;
+    return true;
+  }
+  return false;
+}
+
 bool HloDataflowAnalysis::UpdateSendValueSet(HloInstruction* send) {
   CHECK_EQ(send->opcode(), HloOpcode::kSend);
   bool changed = false;
@@ -333,6 +360,21 @@ bool HloDataflowAnalysis::UpdateCallValueSet(HloInstruction* call) {
   return false;
 }
 
+bool HloDataflowAnalysis::UpdateConditionalValueSet(
+    HloInstruction* conditional) {
+  CHECK_EQ(conditional->opcode(), HloOpcode::kConditional);
+  std::vector<const InstructionValueSet*> inputs = {
+      &GetInstructionValueSet(
+          conditional->true_computation()->root_instruction()),
+      &GetInstructionValueSet(
+          conditional->false_computation()->root_instruction())};
+  // A phi-node is not defined for a kConditional instruction even though it
+  // represents a join point. This is because the current approach is to define
+  // a phi-node only for kWhile to account for the dataflow through back-edges
+  // and deal with the ambiguity in other cases.
+  return GetInstructionValueSet(conditional).AssignUnionOf(inputs);
+}
+
 bool HloDataflowAnalysis::UpdateCopyValueSet(HloInstruction* copy) {
   CHECK_EQ(copy->opcode(), HloOpcode::kCopy);
   bool changed = false;
@@ -394,7 +436,7 @@ bool HloDataflowAnalysis::UpdateParameterValueSet(HloInstruction* parameter) {
   CHECK_EQ(call_graph_node.context(), CallContext::kSequential);
 
   std::vector<const InstructionValueSet*> inputs;
-  bool called_from_while = false;
+  bool need_phi = false;
   for (const CallSite& callsite : call_graph_node.caller_callsites()) {
     if (callsite.instruction()->opcode() == HloOpcode::kCall) {
       // The operand values of a call instruction are forwarded to the
@@ -416,14 +458,32 @@ bool HloDataflowAnalysis::UpdateParameterValueSet(HloInstruction* parameter) {
         inputs.push_back(&GetInstructionValueSet(
             callsite.instruction()->while_body()->root_instruction()));
       }
-      called_from_while = true;
+      need_phi = true;
+    } else if (callsite.instruction()->opcode() == HloOpcode::kConditional) {
+      CHECK_EQ(parameter->parameter_number(), 0);
+      auto conditional = callsite.instruction();
+      // Conditional has 3 operands. Operand 0 is the predicate, operand 1 is
+      // the argument to the true computation and operand 2 is the argument to
+      // the false computation.
+      //
+      // If the parameter belongs to conditional's true computation, then
+      // operand 1 is forwarded to this parameter instruction. If the parameter
+      // belongs to conditional's false computation, then operand 2 is forwarded
+      // to this parameter instruction.
+      if (parameter->parent() == conditional->true_computation()) {
+        inputs.push_back(&GetInstructionValueSet(conditional->operand(1)));
+      } else {
+        CHECK_EQ(parameter->parent(), conditional->false_computation());
+        inputs.push_back(&GetInstructionValueSet(conditional->operand(2)));
+      }
+      need_phi = true;
     } else {
       LOG(FATAL) << "CallContext::kSequential computations should only be "
-                    "called from call or while instructions";
+                    "called from call, while, or conditional instructions";
     }
   }
 
-  if (ssa_form_ && called_from_while) {
+  if (ssa_form_ && need_phi) {
     return Phi(parameter, inputs);
   } else {
     return GetInstructionValueSet(parameter).AssignUnionOf(inputs);
@@ -494,6 +554,8 @@ bool HloDataflowAnalysis::UpdateInstructionValueSet(
   switch (instruction->opcode()) {
     case HloOpcode::kBitcast:
       return UpdateBitcastValueSet(instruction);
+    case HloOpcode::kSlice:
+      return UpdateSliceValueSet(instruction);
     case HloOpcode::kCopy:
       return UpdateCopyValueSet(instruction);
     case HloOpcode::kGetTupleElement:
@@ -512,6 +574,8 @@ bool HloDataflowAnalysis::UpdateInstructionValueSet(
       return UpdateSendValueSet(instruction);
     case HloOpcode::kRecvDone:
       return UpdateRecvDoneValueSet(instruction);
+    case HloOpcode::kConditional:
+      return UpdateConditionalValueSet(instruction);
     default:
       // Instruction does not forward HloValues (it defines all values in its
       // output). No update is necessary.
@@ -550,13 +614,31 @@ void HloDataflowAnalysis::Propagate() {
 
       // If user sequentially calls a computation, then the respective
       // parameter(s) of the computation need to be updated.
-      for (HloComputation* called_computation : user->called_computations()) {
-        const CallGraphNode& call_graph_node =
-            call_graph_->GetNode(called_computation);
-        if (call_graph_node.context() == CallContext::kSequential) {
-          for (int64 operand_number : user->OperandIndices(instruction)) {
-            worklist.push(
-                called_computation->parameter_instruction(operand_number));
+      if (user->opcode() == HloOpcode::kConditional) {
+        // If operand 0 is the use of instruction, then no parameters need to be
+        // updated, since that is the predicate of the conditional.
+        // If operand 1 is the use of instruction, then the true_computation's
+        // parameter need to be updated.
+        // If operand 2 is the use of instruction, then the false_computation's
+        // parameter need to be updated.
+        //
+        // Note that the same instruction can be used in both operand 1 and
+        // operand 2.
+        if (user->operand(1) == instruction) {
+          worklist.push(user->true_computation()->parameter_instruction(0));
+        }
+        if (user->operand(2) == instruction) {
+          worklist.push(user->false_computation()->parameter_instruction(0));
+        }
+      } else {
+        for (HloComputation* called_computation : user->called_computations()) {
+          const CallGraphNode& call_graph_node =
+              call_graph_->GetNode(called_computation);
+          if (call_graph_node.context() == CallContext::kSequential) {
+            for (int64 operand_number : user->OperandIndices(instruction)) {
+              worklist.push(
+                  called_computation->parameter_instruction(operand_number));
+            }
           }
         }
       }
@@ -568,7 +650,8 @@ void HloDataflowAnalysis::Propagate() {
       const CallGraphNode& call_graph_node =
           call_graph_->GetNode(instruction->parent());
       for (const CallSite& callsite : call_graph_node.caller_callsites()) {
-        if (callsite.instruction()->opcode() == HloOpcode::kCall) {
+        if ((callsite.instruction()->opcode() == HloOpcode::kCall) ||
+            (callsite.instruction()->opcode() == HloOpcode::kConditional)) {
           worklist.push(callsite.instruction());
         } else if (callsite.instruction()->opcode() == HloOpcode::kWhile) {
           // Add the while itself, and the body and condition parameters.
@@ -634,8 +717,14 @@ Status HloDataflowAnalysis::InitializeInstructionValueSets() {
             define_all_values();
           }
           break;
+        case HloOpcode::kSlice:
+          if (!instruction->IsInPlaceSlice()) {
+            define_all_values();
+          }
+          break;
         case HloOpcode::kWhile:
         case HloOpcode::kCall:
+        case HloOpcode::kConditional:
         case HloOpcode::kGetTupleElement:
           // These instructions define no values. The values in their output
           // flow from their operands or from cross computation dataflow.
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h
index dfd81ae951042f7a4d6d3c24af4d5b7e046c272d..89d318188f0855c7924836a51cfe98d531e08cb4 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h
@@ -145,7 +145,9 @@ class HloDataflowAnalysis {
   // Updates the value set for a particular instruction type. Returns whether
   // the instruction value set changed.
   bool UpdateBitcastValueSet(HloInstruction* bitcast);
+  bool UpdateSliceValueSet(HloInstruction* slice);
   bool UpdateCallValueSet(HloInstruction* call);
+  bool UpdateConditionalValueSet(HloInstruction* conditional);
   bool UpdateCopyValueSet(HloInstruction* copy);
   bool UpdateGetTupleElementValueSet(HloInstruction* gte);
   bool UpdateParameterValueSet(HloInstruction* parameter);
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
index f08f0b1d6833b028baa5f997929a17eb5abae205..e714b2567fd1b3eab607a19f0bb7e3288150dc64 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
@@ -34,6 +34,7 @@ limitations under the License.
 namespace xla {
 namespace {
 
+using ::testing::ElementsAre;
 using ::testing::UnorderedElementsAre;
 
 // Test is parameterized on a bool which is whether the dataflow analysis is
@@ -77,11 +78,23 @@ class HloDataflowAnalysisTest : public HloTestBase,
                                  analysis_->GetValueDefinedAt(b), *analysis_);
   }
 
+  std::unique_ptr<HloComputation> CreateR0F32UnaryOpComputation(
+      HloOpcode opcode) {
+    HloComputation::Builder builder(TestName() + "." + HloOpcodeString(opcode));
+    HloInstruction* param0 = builder.AddInstruction(
+        HloInstruction::CreateParameter(0, scalar_shape_, "param0"));
+    builder.AddInstruction(
+        HloInstruction::CreateUnary(scalar_shape_, opcode, param0));
+    return builder.Build();
+  }
+
   std::unique_ptr<HloModule> module_;
   std::unique_ptr<HloDataflowAnalysis> analysis_;
 
   const Shape scalar_shape_ = ShapeUtil::MakeShape(F32, {});
   const Shape vector_shape_ = ShapeUtil::MakeShape(F32, {42});
+  const Shape tuple_shape_ = ShapeUtil::MakeTupleShape(
+      {ShapeUtil::MakeShape(F32, {}), ShapeUtil::MakeShape(F32, {})});
 };
 
 TEST_P(HloDataflowAnalysisTest, BinaryOperation) {
@@ -1528,6 +1541,315 @@ TEST_P(HloDataflowAnalysisTest, EmbeddedComputationInterference) {
   EXPECT_TRUE(InstructionsMayInterfere(ordering, negate, embedded_log));
 }
 
+TEST_P(HloDataflowAnalysisTest, ConditionalWithIdentity) {
+  // Test conditional with identity computations in both true and false cases.
+  //
+  // true_computation(F32[] %true_param):
+  //   return %true_param
+  //
+  // false_computation(F32[] %false_param):
+  //   return %false_param
+  //
+  // entry:
+  //   %pred = Constant(true)
+  //   %constant1 = Constant(56.0)
+  //   %constant2 = Constant(12.0)
+  //   return Conditional(%pred, %constant1, true_computation,
+  //                      %constant2, false_computation)
+
+  auto true_builder = HloComputation::Builder(TestName() + "_true");
+  auto true_param = true_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape_, "true_param"));
+  HloComputation* true_computation =
+      module_->AddEmbeddedComputation(true_builder.Build());
+
+  auto false_builder = HloComputation::Builder(TestName() + "_false");
+  auto false_param = false_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape_, "false_param"));
+  HloComputation* false_computation =
+      module_->AddEmbeddedComputation(false_builder.Build());
+
+  auto builder = HloComputation::Builder(TestName());
+  auto pred = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<bool>(true)));
+  auto constant1 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(56.0f)));
+  auto constant2 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(12.0f)));
+  auto conditional = builder.AddInstruction(HloInstruction::CreateConditional(
+      scalar_shape_, pred, constant1, true_computation, constant2,
+      false_computation));
+  module_->AddEntryComputation(builder.Build());
+
+  const HloDataflowAnalysis& analysis = RunAnalysis(GetParam());
+
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(pred));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(constant1));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(constant2));
+
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(true_param));
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(false_param));
+
+  EXPECT_EQ(analysis.GetUniqueValueAt(true_param),
+            analysis.GetValueDefinedAt(constant1));
+  EXPECT_EQ(analysis.GetUniqueValueAt(false_param),
+            analysis.GetValueDefinedAt(constant2));
+
+  EXPECT_THAT(analysis.GetValueDefinedAt(pred).uses(),
+              ElementsAre(HloUse{conditional, 0, {}}));
+  EXPECT_THAT(analysis.GetValueDefinedAt(constant1).uses(),
+              ElementsAre(HloUse{conditional, 1, {}}));
+  EXPECT_THAT(analysis.GetValueDefinedAt(constant2).uses(),
+              ElementsAre(HloUse{conditional, 2, {}}));
+
+  EXPECT_EQ(analysis.values().size(), 3);
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(conditional));
+  EXPECT_THAT(HloValuesAt(conditional),
+              UnorderedElementsAre(analysis.GetValueDefinedAt(constant1),
+                                   analysis.GetValueDefinedAt(constant2)));
+}
+
+TEST_P(HloDataflowAnalysisTest, ConditionalTakingTupleOperand) {
+  // Test conditional with true and false computations taking a tuple operand.
+  //
+  // true_computation((F32[], F32[]) %true_param):
+  //   %true_x = GetTupleElement(%true_param, 0)
+  //   %true_y = GetTupleElement(%true_param, 1)
+  //   return Add(%true_x, %true_y)
+  //
+  // false_computation((F32[], F32[]) %false_param):
+  //   %false_x = GetTupleElement(%false_param, 0)
+  //   %false_y = GetTupleElement(%false_param, 1)
+  //   return Subtract(%false_x, %false_y)
+  //
+  // entry:
+  //   %pred = Constant(true)
+  //   %constant1 = Constant(56.0)
+  //   %constant2 = Constant(12.0)
+  //   %tuple_operand = Tuple(%constant1, %constant2)
+  //   return Conditional(%pred, %tuple_operand, true_computation,
+  //                      %tuple_operand, false_computation)
+
+  auto true_builder = HloComputation::Builder(TestName() + "_true");
+  auto true_param = true_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape_, "true_param"));
+  auto true_x = true_builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, true_param, 0));
+  auto true_y = true_builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, true_param, 1));
+  auto add = true_builder.AddInstruction(HloInstruction::CreateBinary(
+      scalar_shape_, HloOpcode::kAdd, true_x, true_y));
+  HloComputation* true_computation =
+      module_->AddEmbeddedComputation(true_builder.Build());
+
+  auto false_builder = HloComputation::Builder(TestName() + "_false");
+  auto false_param = false_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape_, "false_param"));
+  auto false_x = false_builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, false_param, 0));
+  auto false_y = false_builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, false_param, 1));
+  auto sub = false_builder.AddInstruction(HloInstruction::CreateBinary(
+      scalar_shape_, HloOpcode::kSubtract, false_x, false_y));
+  HloComputation* false_computation =
+      module_->AddEmbeddedComputation(false_builder.Build());
+
+  auto builder = HloComputation::Builder(TestName());
+  auto pred = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<bool>(true)));
+  auto constant1 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(56.0f)));
+  auto constant2 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(12.0f)));
+  auto tuple_operand = builder.AddInstruction(
+      HloInstruction::CreateTuple({constant1, constant2}));
+  auto conditional = builder.AddInstruction(HloInstruction::CreateConditional(
+      scalar_shape_, pred, tuple_operand, true_computation, tuple_operand,
+      false_computation));
+  module_->AddEntryComputation(builder.Build());
+
+  const HloDataflowAnalysis& analysis = RunAnalysis(GetParam());
+
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(pred));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(constant1));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(constant2));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(tuple_operand));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(add));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(sub));
+
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(true_param));
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(false_param));
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(true_x));
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(true_y));
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(false_x));
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(false_y));
+
+  EXPECT_EQ(analysis.GetUniqueValueAt(true_param),
+            analysis.GetValueDefinedAt(tuple_operand));
+  EXPECT_EQ(analysis.GetUniqueValueAt(false_param),
+            analysis.GetValueDefinedAt(tuple_operand));
+  EXPECT_EQ(analysis.GetUniqueValueAt(true_x),
+            analysis.GetValueDefinedAt(constant1));
+  EXPECT_EQ(analysis.GetUniqueValueAt(true_y),
+            analysis.GetValueDefinedAt(constant2));
+  EXPECT_EQ(analysis.GetUniqueValueAt(false_x),
+            analysis.GetValueDefinedAt(constant1));
+  EXPECT_EQ(analysis.GetUniqueValueAt(false_y),
+            analysis.GetValueDefinedAt(constant2));
+
+  EXPECT_THAT(analysis.GetValueDefinedAt(pred).uses(),
+              ElementsAre(HloUse{conditional, 0, {}}));
+  EXPECT_THAT(analysis.GetValueDefinedAt(constant1).uses(),
+              UnorderedElementsAre(HloUse{conditional, 1, {0}},
+                                   HloUse{conditional, 2, {0}},
+                                   HloUse{add, 0, {}}, HloUse{sub, 0, {}}));
+  EXPECT_THAT(analysis.GetValueDefinedAt(constant2).uses(),
+              UnorderedElementsAre(HloUse{conditional, 1, {1}},
+                                   HloUse{conditional, 2, {1}},
+                                   HloUse{add, 1, {}}, HloUse{sub, 1, {}}));
+  EXPECT_THAT(analysis.GetValueDefinedAt(tuple_operand).uses(),
+              UnorderedElementsAre(
+                  HloUse{conditional, 1, {}}, HloUse{conditional, 2, {}},
+                  HloUse{true_x, 0, {}}, HloUse{true_y, 0, {}},
+                  HloUse{false_x, 0, {}}, HloUse{false_y, 0, {}}));
+
+  EXPECT_EQ(analysis.values().size(), 6);
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(conditional));
+  EXPECT_THAT(HloValuesAt(conditional),
+              UnorderedElementsAre(analysis.GetValueDefinedAt(add),
+                                   analysis.GetValueDefinedAt(sub)));
+}
+
+TEST_P(HloDataflowAnalysisTest, NestedConditionals) {
+  // computation1(F32[] %param1):
+  //   %ceil = Ceil(%param1)
+  //   return %ceil
+  //
+  // computation2(F32[] %param2):
+  //   %floor = Floor(%param2)
+  //   return %floor
+  //
+  // computation3(F32[] %param3):
+  //   %negate = Negate(%param3)
+  //   return %negate
+  //
+  // inner_conditional((PRED, F32[], F32[]) %param_cond):
+  //   %pred_cond = GetTupleElement(%param_cond, 0)
+  //   %true_operand_cond = GetTupleElement(%param_cond, 1)
+  //   %false_opearnd_cond = GetTupleElement(%param_cond, 2)
+  //   return Conditional(%pred_cond, %true_operand_cond, computation1,
+  //                      %false_operand_cond, computation2)
+  //
+  // entry:
+  //   %pred1 = Constant(true)
+  //   %pred2 = Constant(false)
+  //   %constant1 = Constant(1.1);
+  //   %constant2 = Constant(2.2);
+  //   %constant3 = Constant(3.3);
+  //   return Conditional(%pred1, (%pred2, %constant1, %constant2),
+  //                      inner_conditional, %constant3, computation3)
+
+  auto computation1 = module_->AddEmbeddedComputation(
+      CreateR0F32UnaryOpComputation(HloOpcode::kCeil));
+  auto computation2 = module_->AddEmbeddedComputation(
+      CreateR0F32UnaryOpComputation(HloOpcode::kFloor));
+  auto computation3 = module_->AddEmbeddedComputation(
+      CreateR0F32UnaryOpComputation(HloOpcode::kNegate));
+
+  // Build inner_conditional computation.
+  const Shape scalar_bool_shape = ShapeUtil::MakeShape(PRED, {});
+  const Shape tuple_param_shape = ShapeUtil::MakeTupleShape(
+      {scalar_bool_shape, scalar_shape_, scalar_shape_});
+  auto inner_builder =
+      HloComputation::Builder(TestName() + "_inner_conditional");
+  auto param_cond = inner_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_param_shape, "param_cond"));
+  auto pred_cond = inner_builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_bool_shape, param_cond, 0));
+  auto true_operand_cond = inner_builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param_cond, 1));
+  auto false_operand_cond = inner_builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param_cond, 2));
+  auto inner_conditional =
+      inner_builder.AddInstruction(HloInstruction::CreateConditional(
+          scalar_shape_, pred_cond, true_operand_cond, computation1,
+          false_operand_cond, computation2));
+  auto inner_conditional_computation =
+      module_->AddEmbeddedComputation(inner_builder.Build());
+
+  // Build entry computation.
+  auto builder = HloComputation::Builder(TestName());
+  auto pred1 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<bool>(true)));
+  auto pred2 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<bool>(false)));
+  auto constant1 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(1.1f)));
+  auto constant2 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(2.2f)));
+  auto constant3 = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(3.3f)));
+  auto tuple_operand = builder.AddInstruction(
+      HloInstruction::CreateTuple({pred2, constant1, constant2}));
+  auto conditional = builder.AddInstruction(HloInstruction::CreateConditional(
+      scalar_shape_, pred1, tuple_operand, inner_conditional_computation,
+      constant3, computation3));
+  module_->AddEntryComputation(builder.Build());
+
+  const HloDataflowAnalysis& analysis = RunAnalysis(GetParam());
+
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(pred1));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(pred2));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(constant1));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(constant2));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(constant3));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(tuple_operand));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(computation1->root_instruction()));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(computation2->root_instruction()));
+  EXPECT_TRUE(analysis.ValueIsDefinedAt(computation3->root_instruction()));
+
+  auto computation1_param = computation1->parameter_instruction(0);
+  auto computation2_param = computation2->parameter_instruction(0);
+  auto computation3_param = computation3->parameter_instruction(0);
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(computation1_param));
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(computation2_param));
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(computation3_param));
+  EXPECT_EQ(analysis.GetUniqueValueAt(computation1_param),
+            analysis.GetValueDefinedAt(constant1));
+  EXPECT_EQ(analysis.GetUniqueValueAt(computation2_param),
+            analysis.GetValueDefinedAt(constant2));
+  EXPECT_EQ(analysis.GetUniqueValueAt(computation3_param),
+            analysis.GetValueDefinedAt(constant3));
+
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(param_cond));
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(pred_cond));
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(true_operand_cond));
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(false_operand_cond));
+  EXPECT_EQ(analysis.GetUniqueValueAt(param_cond),
+            analysis.GetValueDefinedAt(tuple_operand));
+  EXPECT_EQ(analysis.GetUniqueValueAt(pred_cond),
+            analysis.GetValueDefinedAt(pred2));
+  EXPECT_EQ(analysis.GetUniqueValueAt(true_operand_cond),
+            analysis.GetValueDefinedAt(constant1));
+  EXPECT_EQ(analysis.GetUniqueValueAt(false_operand_cond),
+            analysis.GetValueDefinedAt(constant2));
+
+  EXPECT_EQ(analysis.values().size(), 9);
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(inner_conditional));
+  EXPECT_FALSE(analysis.ValueIsDefinedAt(conditional));
+  EXPECT_THAT(
+      HloValuesAt(inner_conditional),
+      UnorderedElementsAre(
+          analysis.GetValueDefinedAt(computation1->root_instruction()),
+          analysis.GetValueDefinedAt(computation2->root_instruction())));
+  EXPECT_THAT(
+      HloValuesAt(conditional),
+      UnorderedElementsAre(
+          analysis.GetValueDefinedAt(computation1->root_instruction()),
+          analysis.GetValueDefinedAt(computation2->root_instruction()),
+          analysis.GetValueDefinedAt(computation3->root_instruction())));
+}
+
 INSTANTIATE_TEST_CASE_P(HloDataflowAnalysisInstantiation,
                         HloDataflowAnalysisTest,
                         ::testing::Values(false, true));
diff --git a/tensorflow/compiler/xla/service/hlo_dce.cc b/tensorflow/compiler/xla/service/hlo_dce.cc
index 40e67c87807b3e13d8ac09206bf6be02e4f9ff31..1e5f0f797a13fd7e7ce1cc934387a274a74153bc 100644
--- a/tensorflow/compiler/xla/service/hlo_dce.cc
+++ b/tensorflow/compiler/xla/service/hlo_dce.cc
@@ -55,7 +55,8 @@ StatusOr<bool> HloDCE::Run(HloModule* module) {
     for (auto* instruction : computation->instructions()) {
       if (instruction->user_count() == 0 &&
           live_instructions.count(instruction) == 0 &&
-          computation->IsRemovable(instruction)) {
+          computation->IsRemovable(instruction) &&
+          !instruction->HasSideEffect()) {
         dead_roots.push_back(instruction);
       }
     }
diff --git a/tensorflow/compiler/xla/service/hlo_dce_test.cc b/tensorflow/compiler/xla/service/hlo_dce_test.cc
index d54b9a27087a42fd23eab0bd06e8deaca567312b..5a56607a665c4cbeb7b2572f182b88e890602968 100644
--- a/tensorflow/compiler/xla/service/hlo_dce_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_dce_test.cc
@@ -70,6 +70,26 @@ TEST_F(HloDceTest, NoDeadCode) {
   EXPECT_EQ(3, computation->instruction_count());
 }
 
+TEST_F(HloDceTest, InstructionsWithSideEffect) {
+  // Verify that side-effect instructions (Send in this test) are not removed.
+  auto builder = HloComputation::Builder(TestName());
+  auto constant = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(42.0f)));
+  builder.AddInstruction(
+      HloInstruction::CreateSend(constant, /*channel_id=*/0));
+  builder.AddInstruction(HloInstruction::CreateTuple({}));
+
+  auto module = CreateNewModule();
+  auto computation = module->AddEntryComputation(builder.Build());
+
+  EXPECT_EQ(3, computation->instruction_count());
+
+  HloDCE dce;
+  EXPECT_FALSE(dce.Run(module.get()).ValueOrDie());
+
+  EXPECT_EQ(3, computation->instruction_count());
+}
+
 TEST_F(HloDceTest, DeadParameters) {
   // Verify that dead parameters are not removed, but use of the dead parameters
   // are.
diff --git a/tensorflow/compiler/xla/service/hlo_element_type_converter.cc b/tensorflow/compiler/xla/service/hlo_element_type_converter.cc
new file mode 100644
index 0000000000000000000000000000000000000000..1773bb401d380031f6c860d295e76d2f62c9e5ff
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_element_type_converter.cc
@@ -0,0 +1,137 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/hlo_element_type_converter.h"
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "tensorflow/compiler/xla/layout_util.h"
+#include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_evaluator.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/service/hlo_query.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace xla {
+namespace {
+
+HloInstruction* ToElementType(HloInstruction* hlo, PrimitiveType type) {
+  if (hlo->shape().element_type() != type) {
+    Shape shape = ShapeUtil::ChangeElementType(hlo->shape(), type);
+    hlo = hlo->parent()->AddInstruction(
+        HloInstruction::CreateConvert(shape, hlo));
+  }
+  CHECK_EQ(hlo->shape().element_type(), type);
+  return hlo;
+}
+
+bool HasOperandType(HloInstruction* hlo, PrimitiveType type) {
+  for (HloInstruction* operand : hlo->operands()) {
+    if (operand->shape().element_type() == type) {
+      return true;
+    }
+  }
+  return false;
+}
+
+}  // namespace
+
+HloElementTypeConverter::HloElementTypeConverter(
+    PrimitiveType eliminate_type, PrimitiveType replace_with_type)
+    : eliminate_type_(eliminate_type), replace_with_type_(replace_with_type) {}
+
+StatusOr<bool> HloElementTypeConverter::Run(HloModule* module) {
+  XLA_VLOG_LINES(
+      3, "HloElementTypeConverter::Run(), before:\n" + module->ToString());
+  bool changed = false;
+  for (auto* computation : module->computations()) {
+    for (auto* hlo : computation->MakeInstructionPostOrder()) {
+      // These are ops where it does not make sense to convert them.
+      if (hlo->opcode() == HloOpcode::kParameter ||
+          hlo->opcode() == HloOpcode::kConstant ||
+          hlo->opcode() == HloOpcode::kTuple ||
+          hlo->opcode() == HloOpcode::kConvert ||
+          hlo->opcode() == HloOpcode::kGetTupleElement ||
+          hlo->opcode() == HloOpcode::kInfeed ||
+          hlo->opcode() == HloOpcode::kOutfeed) {
+        continue;
+      }
+
+      // We cannot change a CustomCall since we have no way of adjusting the
+      // called binary to expect the updated type.
+      if (hlo->opcode() == HloOpcode::kCustomCall) {
+        continue;
+      }
+
+      // These are ops with embedded computations where it suffices to convert
+      // the embedded computations instead of converting the ops themselves.
+      if (hlo->opcode() == HloOpcode::kWhile ||
+          hlo->opcode() == HloOpcode::kCall ||
+          hlo->opcode() == HloOpcode::kFusion ||
+          hlo->opcode() == HloOpcode::kMap ||
+          hlo->opcode() == HloOpcode::kReduce ||
+          hlo->opcode() == HloOpcode::kReduceWindow ||
+          hlo->opcode() == HloOpcode::kSelectAndScatter ||
+          hlo->opcode() == HloOpcode::kConditional) {
+        continue;
+      }
+      TF_RET_CHECK(hlo->called_computations().empty()) << hlo->ToString();
+
+      if (!HasOperandType(hlo, eliminate_type_)) {
+        // If this CHECK fires, then this was an instruction that does not take
+        // the elimination type as an operand but it does return it. This pass
+        // does not have a feature to change the output type in that case, so
+        // instead of silently failing to eliminate the type, it fails loudly.
+        TF_RET_CHECK(hlo->shape().element_type() != eliminate_type_);
+        continue;
+      }
+
+      std::vector<HloInstruction*> new_operands;
+      for (HloInstruction* operand : hlo->operands()) {
+        if (operand->shape().element_type() == eliminate_type_) {
+          operand = ToElementType(operand, replace_with_type_);
+        }
+        new_operands.push_back(operand);
+      }
+
+      HloInstruction* new_hlo;
+      if (hlo->shape().element_type() == eliminate_type_) {
+        Shape shape =
+            ShapeUtil::ChangeElementType(hlo->shape(), replace_with_type_);
+        new_hlo = computation->AddInstruction(
+            hlo->CloneWithNewOperands(shape, new_operands, hlo->GetModule()));
+        new_hlo = ToElementType(new_hlo, eliminate_type_);
+      } else {
+        new_hlo = computation->AddInstruction(hlo->CloneWithNewOperands(
+            hlo->shape(), new_operands, hlo->GetModule()));
+      }
+      TF_RETURN_IF_ERROR(computation->ReplaceInstruction(hlo, new_hlo));
+      changed = true;
+    }
+  }
+  XLA_VLOG_LINES(
+      2, "HloElementTypeConverter::Run(), after:\n" + module->ToString());
+  return changed;
+}
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_element_type_converter.h b/tensorflow/compiler/xla/service/hlo_element_type_converter.h
new file mode 100644
index 0000000000000000000000000000000000000000..2b109225d0b192e5c9e4f6d841377ffad8078dc2
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_element_type_converter.h
@@ -0,0 +1,49 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_ELEMENT_TYPE_CONVERTER_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_ELEMENT_TYPE_CONVERTER_H_
+
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
+
+namespace xla {
+
+// A pass that eliminates certain element types as the input or output of ops by
+// inserting Convert ops. This allows a backend to support an element type while
+// only actually implementing the Convert op for that element type. This is
+// generally not the fastest approach, but it works.
+class HloElementTypeConverter : public HloPassInterface {
+ public:
+  // eliminate_type is the type to eliminate as the input or output of ops,
+  // using Convert ops to replace it with replace_with_type.
+  HloElementTypeConverter(PrimitiveType eliminate_type,
+                          PrimitiveType replace_with_type);
+
+  tensorflow::StringPiece name() const override {
+    return "element_type_converter";
+  }
+
+  // Returns the pass on the module and returns whether the module was modified.
+  StatusOr<bool> Run(HloModule* module) override;
+
+ private:
+  PrimitiveType eliminate_type_;
+  PrimitiveType replace_with_type_;
+};
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_ELEMENT_TYPE_CONVERTER_H_
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index e693d167a1f96f65b894d07fb2c8f33e61ff8c49..3a846a752988efd618a1d6b9ed3c9e7a27627eee 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc
@@ -29,6 +29,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/primitive_util.h"
 #include "tensorflow/compiler/xla/ptr_util.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/service/hlo_query.h"
 #include "tensorflow/compiler/xla/service/shape_inference.h"
@@ -167,11 +168,37 @@ StatusOr<std::unique_ptr<Literal>> ElementWiseUnaryOpImpl(
 
 }  // namespace
 
-template <typename ReturnT>
+template <typename ReturnT, typename ElementwiseT>
 class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
  public:
   explicit TypedVisitor(HloEvaluator* p) : parent_(p) {}
 
+  // The following higher-order functions convert a function with ElementwiseT
+  // to a function with ReturnT.
+  std::function<ReturnT(ReturnT)> ConvertUnaryFunction(
+      const std::function<ElementwiseT(ElementwiseT)>& unary_op) {
+    return [&unary_op](ReturnT arg) {
+      return static_cast<ReturnT>(unary_op(static_cast<ElementwiseT>(arg)));
+    };
+  }
+  std::function<ReturnT(ReturnT, ReturnT)> ConvertBinaryFunction(
+      const std::function<ElementwiseT(ElementwiseT, ElementwiseT)>&
+          binary_op) {
+    return [&binary_op](ReturnT arg1, ReturnT arg2) {
+      return static_cast<ReturnT>(binary_op(static_cast<ElementwiseT>(arg1),
+                                            static_cast<ElementwiseT>(arg2)));
+    };
+  }
+  std::function<ReturnT(ReturnT, ReturnT, ReturnT)> ConvertTernaryFunction(
+      const std::function<ElementwiseT(ElementwiseT, ElementwiseT,
+                                       ElementwiseT)>& ternary_op) {
+    return [&ternary_op](ReturnT arg1, ReturnT arg2, ReturnT arg3) {
+      return static_cast<ReturnT>(ternary_op(static_cast<ElementwiseT>(arg1),
+                                             static_cast<ElementwiseT>(arg2),
+                                             static_cast<ElementwiseT>(arg3)));
+    };
+  }
+
   Status DefaultAction(HloInstruction* hlo_instruction) override {
     return Unimplemented("unhandled HLO ops for HloEvaluator: %s.",
                          HloOpcodeString(hlo_instruction->opcode()).c_str());
@@ -197,24 +224,25 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
                               is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleAbs(HloInstruction* abs) {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[abs],
-                        ElementWiseUnaryOp(abs, [](NativeT elem_operand) {
+                        ElementWiseUnaryOp(abs, [](ElementwiseT elem_operand) {
                           return std::abs(elem_operand);
                         }));
     return Status::OK();
   }
 
   Status HandleAbs(HloInstruction* abs) override {
-    return HandleAbs<ReturnT>(abs);
+    return HandleAbs<ElementwiseT>(abs);
   }
 
   template <
       typename NativeT,
       typename std::enable_if<!is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleRound(HloInstruction* round) {
-    TF_ASSIGN_OR_RETURN(parent_->evaluated_[round],
-                        ElementWiseUnaryOp(round, [](ReturnT elem_operand) {
-                          return std::round(elem_operand);
-                        }));
+    TF_ASSIGN_OR_RETURN(
+        parent_->evaluated_[round],
+        ElementWiseUnaryOp(round, [](ElementwiseT elem_operand) {
+          return std::round(elem_operand);
+        }));
     return Status::OK();
   }
 
@@ -233,7 +261,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     parent_->evaluated_[broadcast] =
         Literal::CreateFromShape(broadcast->shape());
     auto output = parent_->evaluated_[broadcast].get();
-    auto operand_to_broadcast =
+    const Literal& operand_to_broadcast =
         parent_->GetEvaluatedLiteralFor(broadcast->operand(0));
     std::vector<int64> broadcast_indices(
         ShapeUtil::Rank(broadcast->operand(0)->shape()), 0);
@@ -264,7 +292,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
       typename std::enable_if<!is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleCeil(HloInstruction* ceil) {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[ceil],
-                        ElementWiseUnaryOp(ceil, [](ReturnT elem_operand) {
+                        ElementWiseUnaryOp(ceil, [](ElementwiseT elem_operand) {
                           return std::ceil(elem_operand);
                         }));
     return Status::OK();
@@ -299,7 +327,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
 
   Status HandleExp(HloInstruction* exp) override {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[exp],
-                        ElementWiseUnaryOp(exp, [](ReturnT elem_operand) {
+                        ElementWiseUnaryOp(exp, [](ElementwiseT elem_operand) {
                           return std::exp(elem_operand);
                         }));
     return Status::OK();
@@ -309,10 +337,11 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
       typename NativeT,
       typename std::enable_if<!is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleFloor(HloInstruction* floor) {
-    TF_ASSIGN_OR_RETURN(parent_->evaluated_[floor],
-                        ElementWiseUnaryOp(floor, [](ReturnT elem_operand) {
-                          return std::floor(elem_operand);
-                        }));
+    TF_ASSIGN_OR_RETURN(
+        parent_->evaluated_[floor],
+        ElementWiseUnaryOp(floor, [](ElementwiseT elem_operand) {
+          return std::floor(elem_operand);
+        }));
     return Status::OK();
   }
 
@@ -329,7 +358,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
 
   Status HandleLog(HloInstruction* log) override {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[log],
-                        ElementWiseUnaryOp(log, [](ReturnT elem_operand) {
+                        ElementWiseUnaryOp(log, [](ElementwiseT elem_operand) {
                           return std::log(elem_operand);
                         }));
     return Status::OK();
@@ -341,7 +370,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
                 !std::is_same<NativeT, bool>::value>::type* = nullptr>
   Status HandleNot(HloInstruction* not_) {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[not_],
-                        ElementWiseUnaryOp(not_, [](ReturnT elem_operand) {
+                        ElementWiseUnaryOp(not_, [](ElementwiseT elem_operand) {
                           return ~elem_operand;
                         }));
     return Status::OK();
@@ -351,7 +380,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
                                   NativeT>::value>::type* = nullptr>
   Status HandleNot(HloInstruction* not_) {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[not_],
-                        ElementWiseUnaryOp(not_, [](ReturnT elem_operand) {
+                        ElementWiseUnaryOp(not_, [](ElementwiseT elem_operand) {
                           return !elem_operand;
                         }));
     return Status::OK();
@@ -362,7 +391,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
                 nullptr>
   Status HandleNot(HloInstruction* not_) {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[not_],
-                        ElementWiseUnaryOp(not_, [](ReturnT elem_operand) {
+                        ElementWiseUnaryOp(not_, [](ElementwiseT elem_operand) {
                           return !elem_operand;
                         }));
     return Status::OK();
@@ -376,7 +405,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   }
 
   Status HandleNot(HloInstruction* not_) override {
-    return HandleNot<ReturnT>(not_);
+    return HandleNot<ElementwiseT>(not_);
   }
 
   template <typename NativeT,
@@ -385,10 +414,11 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
                 !std::is_floating_point<NativeT>::value>::type* = nullptr>
   Status HandleNegate(HloInstruction* negate) {
     using type = typename std::make_unsigned<NativeT>::type;
-    TF_ASSIGN_OR_RETURN(parent_->evaluated_[negate],
-                        ElementWiseUnaryOp(negate, [](ReturnT elem_operand) {
-                          return NativeT(-type(elem_operand));
-                        }));
+    TF_ASSIGN_OR_RETURN(
+        parent_->evaluated_[negate],
+        ElementWiseUnaryOp(negate, [](ElementwiseT elem_operand) {
+          return NativeT(-type(elem_operand));
+        }));
     return Status::OK();
   }
 
@@ -397,10 +427,10 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
                 !std::is_signed<NativeT>::value ||
                 std::is_floating_point<NativeT>::value>::type* = nullptr>
   Status HandleNegate(HloInstruction* negate) {
-    TF_ASSIGN_OR_RETURN(parent_->evaluated_[negate],
-                        ElementWiseUnaryOp(negate, [](ReturnT elem_operand) {
-                          return -elem_operand;
-                        }));
+    TF_ASSIGN_OR_RETURN(
+        parent_->evaluated_[negate],
+        ElementWiseUnaryOp(
+            negate, [](ElementwiseT elem_operand) { return -elem_operand; }));
     return Status::OK();
   }
 
@@ -413,9 +443,9 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
       typename std::enable_if<!is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleSign(HloInstruction* sign) {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[sign],
-                        ElementWiseUnaryOp(sign, [](ReturnT elem_operand) {
-                          return (ReturnT(0) < elem_operand) -
-                                 (elem_operand < ReturnT(0));
+                        ElementWiseUnaryOp(sign, [](ElementwiseT elem_operand) {
+                          return (ElementwiseT(0) < elem_operand) -
+                                 (elem_operand < ElementwiseT(0));
                         }));
     return Status::OK();
   }
@@ -425,9 +455,9 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
       typename std::enable_if<is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleSign(HloInstruction* sign) {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[sign],
-                        ElementWiseUnaryOp(sign, [](ReturnT elem_operand) {
+                        ElementWiseUnaryOp(sign, [](ElementwiseT elem_operand) {
                           auto abs_val = std::abs(elem_operand);
-                          return 0 == abs_val ? ReturnT(0)
+                          return 0 == abs_val ? ElementwiseT(0)
                                               : elem_operand / abs_val;
                         }));
     return Status::OK();
@@ -437,9 +467,30 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     return HandleSign<ReturnT>(sign);
   }
 
+  template <typename NativeT, typename std::enable_if<std::is_floating_point<
+                                  NativeT>::value>::type* = nullptr>
+  Status HandleAtan2(HloInstruction* atan2) {
+    TF_ASSIGN_OR_RETURN(parent_->evaluated_[atan2],
+                        ElementWiseBinaryOp(atan2, [](ElementwiseT lhs_elem,
+                                                      ElementwiseT rhs_elem) {
+                          return std::atan2(lhs_elem, rhs_elem);
+                        }));
+    return Status::OK();
+  }
+
+  template <typename NativeT, typename std::enable_if<!std::is_floating_point<
+                                  NativeT>::value>::type* = nullptr>
+  Status HandleAtan2(HloInstruction* atan2) {
+    return InvalidArgument("Unsupported type for Atan2");
+  }
+
+  Status HandleAtan2(HloInstruction* atan2) override {
+    return HandleAtan2<ElementwiseT>(atan2);
+  }
+
   Status HandleTanh(HloInstruction* tanh) override {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[tanh],
-                        ElementWiseUnaryOp(tanh, [](ReturnT elem_operand) {
+                        ElementWiseUnaryOp(tanh, [](ElementwiseT elem_operand) {
                           return std::tanh(elem_operand);
                         }));
     return Status::OK();
@@ -453,9 +504,10 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     using type = typename std::make_unsigned<NativeT>::type;
     TF_ASSIGN_OR_RETURN(
         parent_->evaluated_[multiply],
-        ElementWiseBinaryOp(multiply, [](ReturnT lhs_elem, ReturnT rhs_elem) {
-          return NativeT(type(lhs_elem) * type(rhs_elem));
-        }));
+        ElementWiseBinaryOp(multiply,
+                            [](ElementwiseT lhs_elem, ElementwiseT rhs_elem) {
+                              return NativeT(type(lhs_elem) * type(rhs_elem));
+                            }));
     return Status::OK();
   }
 
@@ -467,40 +519,42 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   Status HandleMultiply(HloInstruction* multiply) {
     TF_ASSIGN_OR_RETURN(
         parent_->evaluated_[multiply],
-        ElementWiseBinaryOp(multiply, [](ReturnT lhs_elem, ReturnT rhs_elem) {
-          return lhs_elem * rhs_elem;
-        }));
+        ElementWiseBinaryOp(multiply,
+                            [](ElementwiseT lhs_elem, ElementwiseT rhs_elem) {
+                              return lhs_elem * rhs_elem;
+                            }));
     return Status::OK();
   }
 
   Status HandleMultiply(HloInstruction* multiply) override {
-    return HandleMultiply<ReturnT>(multiply);
+    return HandleMultiply<ElementwiseT>(multiply);
   }
 
   Status HandleSubtract(HloInstruction* subtract) override {
     TF_ASSIGN_OR_RETURN(
         parent_->evaluated_[subtract],
-        ElementWiseBinaryOp(subtract, [](ReturnT lhs_elem, ReturnT rhs_elem) {
-          return lhs_elem - rhs_elem;
-        }));
+        ElementWiseBinaryOp(subtract,
+                            [](ElementwiseT lhs_elem, ElementwiseT rhs_elem) {
+                              return lhs_elem - rhs_elem;
+                            }));
     return Status::OK();
   }
 
   Status HandleAdd(HloInstruction* add) override {
-    TF_ASSIGN_OR_RETURN(
-        parent_->evaluated_[add],
-        ElementWiseBinaryOp(add, [](ReturnT lhs_elem, ReturnT rhs_elem) {
-          return lhs_elem + rhs_elem;
-        }));
+    TF_ASSIGN_OR_RETURN(parent_->evaluated_[add],
+                        ElementWiseBinaryOp(add, [](ElementwiseT lhs_elem,
+                                                    ElementwiseT rhs_elem) {
+                          return lhs_elem + rhs_elem;
+                        }));
     return Status::OK();
   }
 
   Status HandleDivide(HloInstruction* divide) override {
-    TF_ASSIGN_OR_RETURN(
-        parent_->evaluated_[divide],
-        ElementWiseBinaryOp(divide, [](ReturnT lhs_elem, ReturnT rhs_elem) {
-          return lhs_elem / rhs_elem;
-        }));
+    TF_ASSIGN_OR_RETURN(parent_->evaluated_[divide],
+                        ElementWiseBinaryOp(divide, [](ElementwiseT lhs_elem,
+                                                       ElementwiseT rhs_elem) {
+                          return lhs_elem / rhs_elem;
+                        }));
     return Status::OK();
   }
 
@@ -510,7 +564,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   Status HandleMaximum(HloInstruction* maximum) {
     TF_ASSIGN_OR_RETURN(
         parent_->evaluated_[maximum],
-        ElementWiseBinaryOp(maximum, [](ReturnT lhs, ReturnT rhs) {
+        ElementWiseBinaryOp(maximum, [](ElementwiseT lhs, ElementwiseT rhs) {
           return std::fmax(lhs, rhs);
         }));
     return Status::OK();
@@ -524,18 +578,18 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   }
 
   Status HandleMaximum(HloInstruction* maximum) override {
-    return HandleMaximum<ReturnT>(maximum);
+    return HandleMaximum<ElementwiseT>(maximum);
   }
 
   template <
       typename NativeT,
       typename std::enable_if<!is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleMinimum(HloInstruction* minimum) {
-    TF_ASSIGN_OR_RETURN(
-        parent_->evaluated_[minimum],
-        ElementWiseBinaryOp(minimum, [](ReturnT lhs_el, ReturnT rhs_el) {
-          return std::fmin(lhs_el, rhs_el);
-        }));
+    TF_ASSIGN_OR_RETURN(parent_->evaluated_[minimum],
+                        ElementWiseBinaryOp(minimum, [](ElementwiseT lhs_el,
+                                                        ElementwiseT rhs_el) {
+                          return std::fmin(lhs_el, rhs_el);
+                        }));
     return Status::OK();
   }
 
@@ -547,15 +601,15 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   }
 
   Status HandleMinimum(HloInstruction* minimum) override {
-    return HandleMinimum<ReturnT>(minimum);
+    return HandleMinimum<ElementwiseT>(minimum);
   }
 
   Status HandlePower(HloInstruction* power) override {
-    TF_ASSIGN_OR_RETURN(
-        parent_->evaluated_[power],
-        ElementWiseBinaryOp(power, [](ReturnT lhs_el, ReturnT rhs_el) {
-          return std::pow(lhs_el, rhs_el);
-        }));
+    TF_ASSIGN_OR_RETURN(parent_->evaluated_[power],
+                        ElementWiseBinaryOp(power, [](ElementwiseT lhs_el,
+                                                      ElementwiseT rhs_el) {
+                          return std::pow(lhs_el, rhs_el);
+                        }));
     return Status::OK();
   }
 
@@ -563,11 +617,11 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
       typename NativeT,
       typename std::enable_if<!is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleRemainder(HloInstruction* remainder) {
-    TF_ASSIGN_OR_RETURN(
-        parent_->evaluated_[remainder],
-        ElementWiseBinaryOp(remainder, [](ReturnT lhs_el, ReturnT rhs_el) {
-          return std::fmod(lhs_el, rhs_el);
-        }));
+    TF_ASSIGN_OR_RETURN(parent_->evaluated_[remainder],
+                        ElementWiseBinaryOp(remainder, [](ElementwiseT lhs_el,
+                                                          ElementwiseT rhs_el) {
+                          return std::fmod(lhs_el, rhs_el);
+                        }));
     return Status::OK();
   }
 
@@ -579,7 +633,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   }
 
   Status HandleRemainder(HloInstruction* remainder) override {
-    return HandleRemainder<ReturnT>(remainder);
+    return HandleRemainder<ElementwiseT>(remainder);
   }
 
   template <typename NativeT,
@@ -588,7 +642,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   Status HandleAnd(HloInstruction* and_) {
     TF_ASSIGN_OR_RETURN(
         parent_->evaluated_[and_],
-        ElementWiseBinaryOp(and_, [](ReturnT lhs_el, ReturnT rhs_el) {
+        ElementWiseBinaryOp(and_, [](ElementwiseT lhs_el, ElementwiseT rhs_el) {
           return lhs_el & rhs_el;
         }));
     return Status::OK();
@@ -599,7 +653,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   Status HandleAnd(HloInstruction* and_) {
     TF_ASSIGN_OR_RETURN(
         parent_->evaluated_[and_],
-        ElementWiseBinaryOp(and_, [](ReturnT lhs_el, ReturnT rhs_el) {
+        ElementWiseBinaryOp(and_, [](ElementwiseT lhs_el, ElementwiseT rhs_el) {
           return lhs_el && rhs_el;
         }));
     return Status::OK();
@@ -613,7 +667,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   }
 
   Status HandleAnd(HloInstruction* and_) override {
-    return HandleAnd<ReturnT>(and_);
+    return HandleAnd<ElementwiseT>(and_);
   }
 
   template <typename NativeT,
@@ -622,7 +676,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   Status HandleOr(HloInstruction* or_) {
     TF_ASSIGN_OR_RETURN(
         parent_->evaluated_[or_],
-        ElementWiseBinaryOp(or_, [](ReturnT lhs_el, ReturnT rhs_el) {
+        ElementWiseBinaryOp(or_, [](ElementwiseT lhs_el, ElementwiseT rhs_el) {
           return lhs_el | rhs_el;
         }));
     return Status::OK();
@@ -633,7 +687,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   Status HandleOr(HloInstruction* or_) {
     TF_ASSIGN_OR_RETURN(
         parent_->evaluated_[or_],
-        ElementWiseBinaryOp(or_, [](ReturnT lhs_el, ReturnT rhs_el) {
+        ElementWiseBinaryOp(or_, [](ElementwiseT lhs_el, ElementwiseT rhs_el) {
           return lhs_el || rhs_el;
         }));
     return Status::OK();
@@ -647,7 +701,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   }
 
   Status HandleOr(HloInstruction* or_) override {
-    return HandleOr<ReturnT>(or_);
+    return HandleOr<ElementwiseT>(or_);
   }
 
   template <typename NativeT,
@@ -672,7 +726,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   }
 
   Status HandleShiftLeft(HloInstruction* shl) override {
-    return HandleShiftLeft<ReturnT>(shl);
+    return HandleShiftLeft<ElementwiseT>(shl);
   }
   template <typename NativeT,
             typename std::enable_if<
@@ -698,7 +752,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   }
 
   Status HandleShiftRightArithmetic(HloInstruction* shra) override {
-    return HandleShiftRightArithmetic<ReturnT>(shra);
+    return HandleShiftRightArithmetic<ElementwiseT>(shra);
   }
 
   template <typename NativeT,
@@ -725,19 +779,21 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   }
 
   Status HandleShiftRightLogical(HloInstruction* shrl) override {
-    return HandleShiftRightLogical<ReturnT>(shrl);
+    return HandleShiftRightLogical<ElementwiseT>(shrl);
   }
 
   template <
       typename NativeT,
       typename std::enable_if<!is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleClamp(HloInstruction* clamp) {
-    std::function<ReturnT(ReturnT, ReturnT, ReturnT)> clamp_op =
-        [](ReturnT low, ReturnT value, ReturnT high) {
+    std::function<ElementwiseT(ElementwiseT, ElementwiseT, ElementwiseT)>
+        clamp_op = [](ElementwiseT low, ElementwiseT value, ElementwiseT high) {
           return std::fmax(low, std::fmin(value, high));
         };
-    TF_ASSIGN_OR_RETURN(parent_->evaluated_[clamp],
-                        ElementWiseTernaryOp(clamp, std::move(clamp_op)));
+    TF_ASSIGN_OR_RETURN(
+        parent_->evaluated_[clamp],
+        ElementwiseTernaryOp(clamp,
+                             std::move(ConvertTernaryFunction(clamp_op))));
     return Status::OK();
   }
 
@@ -749,7 +805,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   }
 
   Status HandleClamp(HloInstruction* clamp) override {
-    return HandleClamp<ReturnT>(clamp);
+    return HandleClamp<ElementwiseT>(clamp);
   }
 
   Status HandleSelect(HloInstruction* select) override {
@@ -762,7 +818,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
           return on_false;
         };
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[select],
-                        ElementWiseTernaryOp(select, std::move(select_op)));
+                        ElementwiseTernaryOp(select, std::move(select_op)));
     return Status::OK();
   }
 
@@ -780,7 +836,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
         << " but is inferred to be: "
         << ShapeUtil::HumanString(inferred_return_shape);
 
-    auto operand_literal = parent_->GetEvaluatedLiteralFor(operand);
+    const Literal& operand_literal = parent_->GetEvaluatedLiteralFor(operand);
     auto result = Literal::CreateFromShape(result_shape);
 
     TF_RETURN_IF_ERROR(result->Populate<ReturnT>(
@@ -860,7 +916,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     DimensionVector rhs_spatial_index(dnums.kernel_spatial_dimensions_size());
 
     auto func = [&](tensorflow::gtl::ArraySlice<int64> out_index) {
-      ReturnT result_val = static_cast<ReturnT>(0);
+      ElementwiseT result_val = static_cast<ElementwiseT>(0);
 
       std::fill(lhs_index.begin(), lhs_index.end(), 0);
       std::fill(rhs_index.begin(), rhs_index.end(), 0);
@@ -911,13 +967,14 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
                     : rhs_spatial_index[ki];
           }
 
-          result_val += lhs_literal.Get<ReturnT>(lhs_index) *
-                        rhs_literal.Get<ReturnT>(rhs_index);
+          result_val +=
+              static_cast<ElementwiseT>(lhs_literal.Get<ReturnT>(lhs_index)) *
+              static_cast<ElementwiseT>(rhs_literal.Get<ReturnT>(rhs_index));
         }
       cnt : {}
       } while (IndexUtil::BumpIndices(window_shape, &rhs_spatial_index));
 
-      return result_val;
+      return static_cast<ReturnT>(result_val);
     };
 
     auto result = Literal::CreateFromShape(result_shape);
@@ -967,7 +1024,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     auto result = Literal::CreateFromShape(dot->shape());
     TF_RETURN_IF_ERROR(result->Populate<ReturnT>(
         [&](tensorflow::gtl::ArraySlice<int64> multi_index) {
-          ReturnT result_val = static_cast<ReturnT>(0);
+          ElementwiseT result_val = static_cast<ElementwiseT>(0);
 
           std::vector<int64> lhs_index(lhs_rank, 0);
           std::vector<int64> rhs_index(rhs_rank, 0);
@@ -984,11 +1041,12 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
             lhs_index[lhs_contracted_dimension] = i;
             rhs_index[rhs_contracted_dimension] = i;
 
-            result_val += lhs_literal.Get<ReturnT>(lhs_index) *
-                          rhs_literal.Get<ReturnT>(rhs_index);
+            result_val +=
+                static_cast<ElementwiseT>(lhs_literal.Get<ReturnT>(lhs_index)) *
+                static_cast<ElementwiseT>(rhs_literal.Get<ReturnT>(rhs_index));
           }
 
-          return result_val;
+          return static_cast<ReturnT>(result_val);
         }));
 
     parent_->evaluated_[dot] = std::move(result);
@@ -1021,7 +1079,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
           return scalar;
         }));
 
-    auto evaluated_operand = parent_->GetEvaluatedLiteralFor(pad->operand(0));
+    const Literal& evaluated_operand =
+        parent_->GetEvaluatedLiteralFor(pad->operand(0));
 
     std::vector<int64> input_index(ShapeUtil::Rank(evaluated_operand.shape()),
                                    0);
@@ -1174,6 +1233,97 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     return Status::OK();
   }
 
+  template <typename NativeT>
+  StatusOr<std::unique_ptr<Literal>> MapImpl(HloInstruction* map) {
+    auto operands = map->operands();
+    HloComputation* computation = map->to_apply();
+
+    auto result = Literal::CreateFromShape(map->shape());
+
+    HloEvaluator embedded_evaluator;
+    TF_RETURN_IF_ERROR(result->Populate<ReturnT>(
+        [&](tensorflow::gtl::ArraySlice<int64> multi_index) {
+          std::vector<std::unique_ptr<Literal>> arg_literals;
+          arg_literals.reserve(operands.size());
+
+          // Construct scalar literal parameters to be passed to the map
+          // computation.
+          for (auto operand : operands) {
+            const Literal& arg_literal =
+                parent_->GetEvaluatedLiteralFor(operand);
+
+            auto curr_val = arg_literal.Get<NativeT>(multi_index);
+            auto curr_val_literal = Literal::CreateR0<NativeT>(curr_val);
+
+            arg_literals.push_back(std::move(curr_val_literal));
+          }
+
+          std::unique_ptr<Literal> computed_result =
+              embedded_evaluator
+                  .Evaluate<std::unique_ptr<Literal>>(*computation,
+                                                      arg_literals)
+                  .ConsumeValueOrDie();
+          // Clear visit states so that the we can use the evaluate again on
+          // the same computation.
+          embedded_evaluator.ResetVisitStates();
+
+          return computed_result->Get<ReturnT>({});
+        }));
+    return std::move(result);
+  }
+
+  Status HandleMap(HloInstruction* map) override {
+    switch (map->operand(0)->shape().element_type()) {
+      case PRED: {
+        TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl<bool>(map));
+        break;
+      }
+      case U8: {
+        TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl<uint8>(map));
+        break;
+      }
+      case U32: {
+        TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl<uint32>(map));
+        break;
+      }
+      case U64: {
+        TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl<uint64>(map));
+        break;
+      }
+      case S8: {
+        TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl<int8>(map));
+        break;
+      }
+      case S32: {
+        TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl<int32>(map));
+        break;
+      }
+      case S64: {
+        TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl<int64>(map));
+        break;
+      }
+      case F32: {
+        TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl<float>(map));
+        break;
+      }
+      case F64: {
+        TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl<double>(map));
+        break;
+      }
+      case C64: {
+        TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl<complex64>(map));
+        break;
+      }
+      default:
+        LOG(FATAL) << "HandleMap: unhandled primitive type for "
+                      "input operand: "
+                   << PrimitiveType_Name(
+                          map->operand(0)->shape().element_type());
+    }
+
+    return Status::OK();
+  }
+
   Status HandleReduce(HloInstruction* reduce) override {
     auto arg = reduce->operand(0);
     auto init_value = reduce->operand(1);
@@ -1220,6 +1370,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
       }
     }
 
+    HloEvaluator embedded_evaluator;
     // For each resulting dimension, calculate and assign computed value.
     TF_RETURN_IF_ERROR(result->Populate<ReturnT>(
         [&](tensorflow::gtl::ArraySlice<int64> multi_index) {
@@ -1239,13 +1390,12 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
             std::vector<const Literal*> args = {curr_val_literal.get(),
                                                 result_val_literal.get()};
 
-            // We need a new visitor for each evaluation, so that the same
-            // computation can be visited more than once (with different
-            // inputs).
-            HloEvaluator embedded_evaluator;
             std::unique_ptr<Literal> computed_result =
-                embedded_evaluator.Evaluate(*function, args)
+                embedded_evaluator.Evaluate<const Literal*>(*function, args)
                     .ConsumeValueOrDie();
+            // Clear visit states so that the we can use the evaluate again on
+            // the same computation.
+            embedded_evaluator.ResetVisitStates();
 
             // Assign computed result to result_val.
             result_val = computed_result->Get<ReturnT>({});
@@ -1302,6 +1452,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     DimensionVector window_index(window.dimensions_size());
     DimensionVector operand_index(ShapeUtil::Rank(operand_literal.shape()));
 
+    HloEvaluator embedded_evaluator;
     // For each resulting dimension, calculate and assign computed value.
     TF_RETURN_IF_ERROR(result->Populate<ReturnT>(
         [&](tensorflow::gtl::ArraySlice<int64> output_index) {
@@ -1311,8 +1462,6 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
           std::fill(operand_index.begin(), operand_index.end(), 0);
 
           do {
-            // Set curr_val to 0 if out of bound (padded).
-            ReturnT curr_val = static_cast<ReturnT>(0);
             bool out_of_bound = false;
             for (int i = 0; i < operand_index.size(); ++i) {
               operand_index[i] =
@@ -1325,23 +1474,25 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
               }
             }
             if (!out_of_bound) {
-              curr_val = operand_literal.Get<ReturnT>(operand_index);
+              auto curr_val = operand_literal.Get<ReturnT>(operand_index);
+
+              // Evaluate computation with specified literal operands.
+              const auto curr_val_literal =
+                  Literal::CreateR0<ReturnT>(curr_val);
+              const auto result_val_literal =
+                  Literal::CreateR0<ReturnT>(result_val);
+              const std::vector<const Literal*> args = {
+                  curr_val_literal.get(), result_val_literal.get()};
+              std::unique_ptr<Literal> computed_result =
+                  embedded_evaluator.Evaluate<const Literal*>(*function, args)
+                      .ConsumeValueOrDie();
+
+              // Clear visit states so that the we can use the evaluate again on
+              // the same computation.
+              embedded_evaluator.ResetVisitStates();
+
+              result_val = computed_result->Get<ReturnT>({});
             }
-            // Evaluate computation with specified literal operands.
-            const auto curr_val_literal = Literal::CreateR0<ReturnT>(curr_val);
-            const auto result_val_literal =
-                Literal::CreateR0<ReturnT>(result_val);
-            const std::vector<const Literal*> args = {curr_val_literal.get(),
-                                                      result_val_literal.get()};
-            // We need a new visitor for each evaluation, so that the same
-            // computation can be visited more than once (with different
-            // inputs).
-            HloEvaluator embedded_evaluator;
-            std::unique_ptr<Literal> computed_result =
-                embedded_evaluator.Evaluate(*function, args)
-                    .ConsumeValueOrDie();
-
-            result_val = computed_result->Get<ReturnT>({});
           } while (IndexUtil::BumpIndices(window_shape, &window_index));
 
           return result_val;
@@ -1364,7 +1515,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
         << ShapeUtil::HumanString(inferred_return_shape);
 
     const int64 rank = ShapeUtil::Rank(operand->shape());
-    auto operand_literal = parent_->GetEvaluatedLiteralFor(operand);
+    const Literal& operand_literal = parent_->GetEvaluatedLiteralFor(operand);
     auto func = [&](tensorflow::gtl::ArraySlice<int64> out_index) {
       DimensionVector operand_index(rank);
       for (int64 i = 0; i < rank; ++i) {
@@ -1385,7 +1536,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
                                   NativeT>::value>::type* = nullptr>
   Status HandleSin(HloInstruction* sin) {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[sin],
-                        ElementWiseUnaryOp(sin, [](ReturnT elem_operand) {
+                        ElementWiseUnaryOp(sin, [](ElementwiseT elem_operand) {
                           return std::sin(elem_operand);
                         }));
     return Status::OK();
@@ -1400,14 +1551,14 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   }
 
   Status HandleSin(HloInstruction* sin) override {
-    return HandleSin<ReturnT>(sin);
+    return HandleSin<ElementwiseT>(sin);
   }
 
   template <typename NativeT, typename std::enable_if<std::is_floating_point<
                                   NativeT>::value>::type* = nullptr>
   Status HandleCos(HloInstruction* cos) {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[cos],
-                        ElementWiseUnaryOp(cos, [](ReturnT elem_operand) {
+                        ElementWiseUnaryOp(cos, [](ElementwiseT elem_operand) {
                           return std::cos(elem_operand);
                         }));
     return Status::OK();
@@ -1422,7 +1573,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   }
 
   Status HandleCos(HloInstruction* cos) override {
-    return HandleCos<ReturnT>(cos);
+    return HandleCos<ElementwiseT>(cos);
   }
 
  private:
@@ -1430,8 +1581,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   StatusOr<std::unique_ptr<Literal>> DynamicSlice(
       const Literal& operand_literal, const Literal& start_indices_literal,
       const Shape& result_shape) {
-    const auto& start_indices_typed =
-        start_indices_literal.GetArraySlice<IndexT>();
+    auto start_indices_typed = start_indices_literal.data<IndexT>();
     std::vector<int64> start(start_indices_typed.begin(),
                              start_indices_typed.end());
 
@@ -1459,12 +1609,11 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
   StatusOr<std::unique_ptr<Literal>> DynamicUpdateSlice(
       const Literal& operand_literal, const Literal& update_literal,
       const Literal& start_indices_literal) {
-    const auto& start_indices_typed =
-        start_indices_literal.GetArraySlice<IndexT>();
+    auto start_indices_typed = start_indices_literal.data<IndexT>();
     const std::vector<int64> start(start_indices_typed.begin(),
                                    start_indices_typed.end());
 
-    auto result = MakeUnique<Literal>(operand_literal);
+    auto result = operand_literal.CloneToUnique();
     std::vector<int64> result_index(ShapeUtil::Rank(result->shape()), 0);
 
     auto func = [&](const std::vector<int64>& update_index) {
@@ -1487,22 +1636,27 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
 
   StatusOr<std::unique_ptr<Literal>> ElementWiseUnaryOp(
       HloInstruction* instruction,
-      const std::function<ReturnT(ReturnT)>& unary_op) {
+      const std::function<ElementwiseT(ElementwiseT)>& unary_op) {
     const Literal& operand_literal =
         parent_->GetEvaluatedLiteralFor(instruction->operand(0));
-    return ElementWiseUnaryOpImpl<ReturnT, ReturnT>(instruction, unary_op,
-                                                    operand_literal);
+    TF_ASSIGN_OR_RETURN(
+        auto result_literal,
+        (ElementWiseUnaryOpImpl<ReturnT, ReturnT>(
+            instruction, ConvertUnaryFunction(unary_op), operand_literal)));
+
+    return std::move(result_literal);
   }
 
   StatusOr<std::unique_ptr<Literal>> ElementWiseBinaryOp(
       HloInstruction* instruction,
-      const std::function<ReturnT(ReturnT, ReturnT)>& binary_op) {
+      const std::function<ElementwiseT(ElementwiseT, ElementwiseT)>&
+          binary_op) {
     const auto shape = instruction->shape();
     const auto* lhs = instruction->operand(0);
     const auto* rhs = instruction->operand(1);
 
-    // TODO(b/35950897, b/27796129): add DCHECK back once implicit broadcast is
-    // removed.
+    // TODO(b/35950897, b/27796129): add DCHECK back once implicit broadcast
+    // is removed.
     if (!(ShapeUtil::SameDimensions(shape, rhs->shape()) &&
           ShapeUtil::SameDimensions(lhs->shape(), rhs->shape()))) {
       return Unimplemented(
@@ -1520,14 +1674,15 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
 
     TF_RETURN_IF_ERROR(result->Populate<ReturnT>(
         [&](tensorflow::gtl::ArraySlice<int64> multi_index) {
-          return binary_op(lhs_literal.Get<ReturnT>(multi_index),
-                           rhs_literal.Get<ReturnT>(multi_index));
+          return ConvertBinaryFunction(binary_op)(
+              lhs_literal.Get<ReturnT>(multi_index),
+              rhs_literal.Get<ReturnT>(multi_index));
         }));
     return std::move(result);
   }
 
   template <typename LhsType, typename RhsType, typename EhsType>
-  StatusOr<std::unique_ptr<Literal>> ElementWiseTernaryOp(
+  StatusOr<std::unique_ptr<Literal>> ElementwiseTernaryOp(
       HloInstruction* instruction,
       const std::function<ReturnT(LhsType, RhsType, EhsType)>& ternary_op) {
     const auto shape = instruction->shape();
@@ -1589,9 +1744,11 @@ HloEvaluator::HloEvaluator() {
   typed_visitors_[F64] = MakeUnique<TypedVisitor<double>>(this);
   typed_visitors_[C64] = MakeUnique<TypedVisitor<complex64>>(this);
 
-  typed_visitors_[BF16] = MakeUnique<FunctionVisitor>([](HloInstruction*) {
-    return Unimplemented("HloEvaluator: unhandled primitive type: BF16.");
-  });
+  // Most of the evaluator computations we use don't support BF16 (e.g.,
+  // std::ceil, std::tanh). To make evaluator work with BF16, we set all
+  // elementwise computations to be done in F32 and do BF16<->F32 conversion
+  // around the input and the output of the computations.
+  typed_visitors_[BF16] = MakeUnique<TypedVisitor<bfloat16, float>>(this);
   typed_visitors_[TUPLE] = MakeUnique<FunctionVisitor>([](HloInstruction*) {
     return Unimplemented("HloEvaluator: unhandled primitive type: TUPLE.");
   });
@@ -1600,41 +1757,53 @@ HloEvaluator::HloEvaluator() {
   });
 }
 
+template <typename LiteralPtr>
 StatusOr<std::unique_ptr<Literal>> HloEvaluator::Evaluate(
     const HloModule& module,
-    tensorflow::gtl::ArraySlice<const Literal*> arg_literals) {
+    tensorflow::gtl::ArraySlice<LiteralPtr> arg_literals) {
   XLA_VLOG_LINES(2, "HloEvaluator::Evaluate module:\n" + module.ToString());
 
-  arg_literals_ = arg_literals;
   evaluated_.clear();
+  arg_literals_.clear();
+  for (const auto& literal_ptr : arg_literals) {
+    arg_literals_.push_back(&*literal_ptr);
+  }
 
   TF_RETURN_IF_ERROR(module.entry_computation()->Accept(this));
 
-  return MakeUnique<Literal>(
-      GetEvaluatedLiteralFor(module.entry_computation()->root_instruction()));
+  return GetEvaluatedLiteralFor(module.entry_computation()->root_instruction())
+      .CloneToUnique();
 }
 
+template <typename LiteralPtr>
 StatusOr<std::unique_ptr<Literal>> HloEvaluator::Evaluate(
     const HloComputation& computation,
-    tensorflow::gtl::ArraySlice<const Literal*> arg_literals) {
+    tensorflow::gtl::ArraySlice<LiteralPtr> arg_literals) {
   XLA_VLOG_LINES(
       2, "HloEvaluator::Evaluate computation:\n" + computation.ToString());
-  arg_literals_ = arg_literals;
+
   evaluated_.clear();
+  arg_literals_.clear();
+  for (const auto& literal_ptr : arg_literals) {
+    arg_literals_.push_back(&*literal_ptr);
+  }
 
   TF_RETURN_IF_ERROR(computation.Accept(this));
-  return MakeUnique<Literal>(
-      GetEvaluatedLiteralFor(computation.root_instruction()));
+  return GetEvaluatedLiteralFor(computation.root_instruction()).CloneToUnique();
 }
 
+template <typename LiteralPtr>
 StatusOr<std::unique_ptr<Literal>> HloEvaluator::Evaluate(
     HloInstruction* instruction,
-    tensorflow::gtl::ArraySlice<const Literal*> operands) {
+    tensorflow::gtl::ArraySlice<LiteralPtr> arg_literals) {
   TF_RET_CHECK(hlo_query::AllOperandsAreParametersOrConstants(*instruction));
   TF_RETURN_IF_ERROR(ShapeUtil::ValidateShape(instruction->shape()));
 
-  arg_literals_ = operands;
   evaluated_.clear();
+  arg_literals_.clear();
+  for (const auto& literal_ptr : arg_literals) {
+    arg_literals_.push_back(&*literal_ptr);
+  }
 
   // Evaluate operands of Parameter type against the input literals which
   // caches the evaluated literal results.
@@ -1645,14 +1814,14 @@ StatusOr<std::unique_ptr<Literal>> HloEvaluator::Evaluate(
               << input_literal->ToString();
       TF_RET_CHECK(ShapeUtil::Equal(operand->shape(), input_literal->shape()));
 
-      evaluated_[operand] = MakeUnique<Literal>(*input_literal);
+      evaluated_[operand] = input_literal->CloneToUnique();
     }
   }
 
   TF_RETURN_IF_ERROR(Preprocess(instruction));
   TF_RETURN_IF_ERROR(instruction->Visit(this));
   TF_RETURN_IF_ERROR(Postprocess(instruction));
-  return MakeUnique<Literal>(GetEvaluatedLiteralFor(instruction));
+  return GetEvaluatedLiteralFor(instruction).CloneToUnique();
 }
 
 StatusOr<std::unique_ptr<Literal>> HloEvaluator::Evaluate(
@@ -1673,7 +1842,7 @@ StatusOr<std::unique_ptr<Literal>> HloEvaluator::Evaluate(
   TF_RETURN_IF_ERROR(Preprocess(instruction));
   TF_RETURN_IF_ERROR(instruction->Visit(this));
   TF_RETURN_IF_ERROR(Postprocess(instruction));
-  return MakeUnique<Literal>(GetEvaluatedLiteralFor(instruction));
+  return GetEvaluatedLiteralFor(instruction).CloneToUnique();
 }
 
 std::unique_ptr<Literal> HloEvaluator::TryEvaluate(
@@ -1722,11 +1891,15 @@ StatusOr<std::unique_ptr<Literal>> HloEvaluator::EvaluateWithSubstitutions(
 }
 
 Status HloEvaluator::HandleParameter(HloInstruction* parameter) {
+  CHECK_LT(parameter->parameter_number(), arg_literals_.size());
   const Literal* input_literal = arg_literals_[parameter->parameter_number()];
   VLOG(2) << "Parameter evaluated to: " << input_literal->ToString();
-  DCHECK(ShapeUtil::Equal(parameter->shape(), input_literal->shape()));
+  DCHECK(ShapeUtil::Equal(parameter->shape(), input_literal->shape()))
+      << "parameter shape is: " << ShapeUtil::HumanString(parameter->shape())
+      << ", but input literal shape is: "
+      << ShapeUtil::HumanString(input_literal->shape());
 
-  evaluated_[parameter] = MakeUnique<Literal>(*input_literal);
+  evaluated_[parameter] = input_literal->CloneToUnique();
   return Status::OK();
 }
 
@@ -1749,8 +1922,8 @@ Status HloEvaluator::HandleTranspose(HloInstruction* transpose) {
 Status HloEvaluator::HandleConcatenate(HloInstruction* concatenate) {
   tensorflow::gtl::ArraySlice<HloInstruction*> operands(
       concatenate->operands());
-  // The result concatenate dimension is going to be the sum of all concatenate
-  // dimensions of the operands taking part of the operation.
+  // The result concatenate dimension is going to be the sum of all
+  // concatenate dimensions of the operands taking part of the operation.
   const Shape& reference_shape = operands[0]->shape();
   CHECK(!ShapeUtil::IsTuple(reference_shape));
   const int64 rank = ShapeUtil::Rank(reference_shape);
@@ -1777,7 +1950,7 @@ Status HloEvaluator::HandleConcatenate(HloInstruction* concatenate) {
 
   for (auto operand : operands) {
     const Shape& operand_shape = operand->shape();
-    TF_RETURN_IF_ERROR(result_literal->Copy(
+    TF_RETURN_IF_ERROR(result_literal->CopySliceFrom(
         GetEvaluatedLiteralFor(operand), source_indices, dest_indices,
         AsInt64Slice(operand_shape.dimensions())));
     dest_indices[concat_dim] +=
@@ -1935,16 +2108,17 @@ Status HloEvaluator::HandleGetTupleElement(HloInstruction* get_tuple_element) {
 
   const Literal& operand_tuple_literal = GetEvaluatedLiteralFor(operand);
 
-  evaluated_[get_tuple_element] =
-      MakeUnique<Literal>(operand_tuple_literal.tuple_literals(index));
-
-  return Status::OK();
+  evaluated_[get_tuple_element] = MakeUnique<Literal>(
+      ShapeUtil::GetTupleElementShape(operand->shape(), index));
+  return evaluated_[get_tuple_element]->CopyFrom(operand_tuple_literal,
+                                                 /*dest_shape_index=*/{},
+                                                 /*src_shape_index=*/{index});
 }
 
 Status HloEvaluator::HandleCopy(HloInstruction* copy) {
   TF_RET_CHECK(ShapeUtil::Compatible(copy->shape(), copy->operand(0)->shape()));
 
-  auto result = MakeUnique<Literal>(GetEvaluatedLiteralFor(copy->operand(0)));
+  auto result = GetEvaluatedLiteralFor(copy->operand(0)).CloneToUnique();
   evaluated_[copy] = std::move(result);
   return Status::OK();
 }
@@ -1960,4 +2134,30 @@ Status HloEvaluator::Postprocess(HloInstruction* hlo) {
   return Status::OK();
 }
 
+// Explicit instantiation of templatized Evaluate* methods.
+//
+template StatusOr<std::unique_ptr<Literal>> HloEvaluator::Evaluate<
+    const Literal*>(const HloModule& module,
+                    tensorflow::gtl::ArraySlice<const Literal*> arg_literals);
+template StatusOr<std::unique_ptr<Literal>>
+HloEvaluator::Evaluate<std::unique_ptr<Literal>>(
+    const HloModule& module,
+    tensorflow::gtl::ArraySlice<std::unique_ptr<Literal>> arg_literals);
+
+template StatusOr<std::unique_ptr<Literal>> HloEvaluator::Evaluate<
+    const Literal*>(const HloComputation& computation,
+                    tensorflow::gtl::ArraySlice<const Literal*> arg_literals);
+template StatusOr<std::unique_ptr<Literal>>
+HloEvaluator::Evaluate<std::unique_ptr<Literal>>(
+    const HloComputation& computation,
+    tensorflow::gtl::ArraySlice<std::unique_ptr<Literal>> arg_literals);
+
+template StatusOr<std::unique_ptr<Literal>> HloEvaluator::Evaluate<
+    const Literal*>(HloInstruction* instruction,
+                    tensorflow::gtl::ArraySlice<const Literal*> arg_literals);
+template StatusOr<std::unique_ptr<Literal>>
+HloEvaluator::Evaluate<std::unique_ptr<Literal>>(
+    HloInstruction* instruction,
+    tensorflow::gtl::ArraySlice<std::unique_ptr<Literal>> arg_literals);
+
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h
index 7557aaa2484d184555411a79d8dce2c9241427b0..02bb8b0a47065c359603a113f49626bf3ad344d8 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.h
@@ -42,9 +42,12 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
   // Precondition: The indices of arg_literals correspond to the parameter
   // numbers of the HLO parameters in the computation. See comment below for an
   // example.
+  // `LiteralPtr` accepts either std::unique_ptr<Literal> or const Literal*
+  // type.
+  template <typename LiteralPtr>
   StatusOr<std::unique_ptr<Literal>> Evaluate(
       const HloModule& module,
-      tensorflow::gtl::ArraySlice<const Literal*> arg_literals);
+      tensorflow::gtl::ArraySlice<LiteralPtr> arg_literals);
 
   // Evaluates an HLO computation and an array of pointers to literals.
   // Returns the evaluated result as a literal if successful.
@@ -62,9 +65,12 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
   // where Parameter0 has parameter_number 0 and Parameter1 has parameter_number
   // 1 in this computation. The input literals array will then have its first
   // literal map to Parameter0 and the second map to Parameter1.
+  // `LiteralPtr` accepts either std::unique_ptr<Literal> or const Literal*
+  // type.
+  template <typename LiteralPtr>
   StatusOr<std::unique_ptr<Literal>> Evaluate(
       const HloComputation& computation,
-      tensorflow::gtl::ArraySlice<const Literal*> arg_literals);
+      tensorflow::gtl::ArraySlice<LiteralPtr> arg_literals);
 
   // Evaluates a single HLO instruction and an array of pointers to literals.
   // Return the evaluated result as literal if successful.
@@ -72,10 +78,12 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
   // 1. argument literals correspond to the input instruction's parameters in
   // their post-ordering.
   // 2. the instruction's operands must be of either Parameter or Constant type.
-  // TODO(b/35950897): implement more ops other than element-wise ops.
+  // `LiteralPtr` accepts either std::unique_ptr<Literal> or const Literal*
+  // type.
+  template <typename LiteralPtr>
   StatusOr<std::unique_ptr<Literal>> Evaluate(
       HloInstruction* instruction,
-      tensorflow::gtl::ArraySlice<const Literal*> arg_literals);
+      tensorflow::gtl::ArraySlice<LiteralPtr> arg_literals);
 
   // Evaluates a single HLO instruction with constant operands.
   // Returns the evaluated result as literal if successful.
@@ -100,12 +108,16 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
  protected:
   // Templated DfsHloVisitor. Typically ReturnT here indicates the resulting
   // literal type of each evaluated Handle* method of a TypedVisitor.
-  // There are however a few notable exceptions to this is rule, notably:
+  // There are however a few notable exceptions to this rule, notably:
   // - HandleCompare and HandleIsFinite: where the resulting literal type is
   // always boolean.
   // These operations are handled outside of the parent HloEvaluator handlers
   // instead of from within TypedVisitor.
-  template <typename ReturnT>
+  //
+  // Type params:
+  //   - ReturnT: The type of input and output of each operation.
+  //   - ElementwiseT: The type in which internal computation are done.
+  template <typename ReturnT, typename ElementwiseT = ReturnT>
   class TypedVisitor;
 
   // Wraps around instruction handling to infer types before dispatching to
@@ -134,6 +146,7 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
   Status HandleIsFinite(HloInstruction* is_finite) override;
 
   Status HandleCompare(HloInstruction* compare) override;
+
   Status HandleTuple(HloInstruction* tuple) override;
 
   Status HandleGetTupleElement(HloInstruction* get_tuple_element) override;
@@ -167,13 +180,15 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
   // TODO(b/35950897): have better memory management here to free instructions
   // that are no longer a parent for any other subsequent instruction in
   // post-orderring.
+  // Must be cleared for each evaluation.
   tensorflow::gtl::FlatMap<const HloInstruction*, std::unique_ptr<Literal>>
       evaluated_;
 
-  // Stores input literals, assuming they are in post-order. Literals are not
-  // owned by this class, and they must outlive the lifetime of the instance of
-  // this class.
-  tensorflow::gtl::ArraySlice<const Literal*> arg_literals_;
+  // Caches pointers to input literals, assuming they are in post-order.
+  // Literals are not owned by this class, and they must outlive the lifetime of
+  // each invocation to the Evaluate* method.
+  // Must be cleared for each evaluation.
+  std::vector<const Literal*> arg_literals_;
 
   TF_DISALLOW_COPY_AND_ASSIGN(HloEvaluator);
 };
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
index b2c4351896764fa8683e91396f526d97ba208df6..97765d65909cee192f65069777f8f195081603b2 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
@@ -25,8 +25,10 @@ limitations under the License.
 #include "tensorflow/compiler/xla/literal_util.h"
 #include "tensorflow/compiler/xla/reference_util.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_element_type_converter.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/status.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/test.h"
@@ -35,15 +37,33 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace xla {
 namespace {
 
-class HloEvaluatorTest : public HloVerifiedTestBase {
+static std::array<bool, 2> use_bf16_params{true, false};
+
+class HloEvaluatorTest : public ::testing::WithParamInterface<bool>,
+                         public HloVerifiedTestBase {
  protected:
-  HloEvaluatorTest() { evaluator_ = MakeUnique<HloEvaluator>(); }
+  HloEvaluatorTest() : use_bfloat16_(GetParam()) {
+    evaluator_ = MakeUnique<HloEvaluator>();
+  }
+
+  std::unique_ptr<Literal> Evaluate(
+      tensorflow::gtl::ArraySlice<const Literal*> arg_literals = {}) {
+    if (use_bfloat16_) {
+      // In BF16 mode, we convert all F32 type to BF16 and evaluate the module.
+      auto type_converter = HloElementTypeConverter(F32, BF16);
+      type_converter.Run(&module()).ValueOrDie();
+    }
+    return evaluator_->Evaluate(*module().entry_computation(), arg_literals)
+        .ConsumeValueOrDie();
+  }
 
   std::unique_ptr<HloEvaluator> evaluator_;
 
@@ -52,12 +72,11 @@ class HloEvaluatorTest : public HloVerifiedTestBase {
     HloComputation::Builder b(TestName());
     auto c1 =
         b.AddInstruction(HloInstruction::CreateConstant(std::move(input)));
-    auto instruction = b.AddInstruction(
+    b.AddInstruction(
         HloInstruction::CreateUnary(expected->shape(), opcode, c1));
     module().AddEntryComputation(b.Build());
 
-    std::unique_ptr<Literal> result =
-        evaluator_->Evaluate(instruction, {}).ConsumeValueOrDie();
+    std::unique_ptr<Literal> result = Evaluate();
 
     auto element_type = expected->shape().element_type();
     if (element_type == F32 || element_type == F64) {
@@ -74,20 +93,24 @@ class HloEvaluatorTest : public HloVerifiedTestBase {
     HloComputation::Builder b(TestName());
     auto c1 = b.AddInstruction(HloInstruction::CreateConstant(std::move(lhs)));
     auto c2 = b.AddInstruction(HloInstruction::CreateConstant(std::move(rhs)));
-    auto instruction = b.AddInstruction(
+    b.AddInstruction(
         HloInstruction::CreateBinary(expected->shape(), opcode, c1, c2));
     module().AddEntryComputation(b.Build());
 
-    std::unique_ptr<Literal> result =
-        evaluator_->Evaluate(instruction, {}).ConsumeValueOrDie();
+    std::unique_ptr<Literal> result = Evaluate();
 
     LiteralTestUtil::ExpectEqual(*expected, *result);
   }
+
+  bool use_bfloat16_;
 };
 
+#define XLA_TYPED_TEST_P(test_case_name, test_name, test_type1) \
+  TEST_P(test_case_name, test_name)
+
 // Verifies that HloEvaluator evaluates a HLO instruction that performs clamp
 // with 3 operands.
-TEST_F(HloEvaluatorTest, DoesClamp) {
+TEST_P(HloEvaluatorTest, DoesClamp) {
   auto low = Literal::CreateR2<float>({{0.f, 2.f}, {2.f, 4.f}});
   auto value = Literal::CreateR2<float>({{0.f, 5.f}, {0.f, 4.f}});
   auto high = Literal::CreateR2<float>({{2.f, 4.f}, {4.f, 4.f}});
@@ -97,19 +120,18 @@ TEST_F(HloEvaluatorTest, DoesClamp) {
   auto c1 = b.AddInstruction(HloInstruction::CreateConstant(std::move(low)));
   auto c2 = b.AddInstruction(HloInstruction::CreateConstant(std::move(value)));
   auto c3 = b.AddInstruction(HloInstruction::CreateConstant(std::move(high)));
-  auto instruction = b.AddInstruction(
+  b.AddInstruction(
       HloInstruction::CreateTernary(shape, HloOpcode::kClamp, c1, c2, c3));
   module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(instruction, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected = Literal::CreateR2<float>({{0, 4}, {2, 4}});
 
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, DISABLED_DoesClampSpecialBroadcast) {
+TEST_P(HloEvaluatorTest, DISABLED_DoesClampSpecialBroadcast) {
   auto low = Literal::CreateR0<float>(0.f);
   auto value = Literal::CreateR2<float>({{-1.f, 0.f}, {1.f, 2.f}});
   auto high = Literal::CreateR0<float>(1.f);
@@ -119,12 +141,11 @@ TEST_F(HloEvaluatorTest, DISABLED_DoesClampSpecialBroadcast) {
   auto c1 = b.AddInstruction(HloInstruction::CreateConstant(std::move(low)));
   auto c2 = b.AddInstruction(HloInstruction::CreateConstant(std::move(value)));
   auto c3 = b.AddInstruction(HloInstruction::CreateConstant(std::move(high)));
-  auto instruction = b.AddInstruction(
+  b.AddInstruction(
       HloInstruction::CreateTernary(shape, HloOpcode::kClamp, c1, c2, c3));
   module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(instruction, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected = Literal::CreateR2<float>({{0, 0}, {1, 1}});
 
@@ -133,7 +154,7 @@ TEST_F(HloEvaluatorTest, DISABLED_DoesClampSpecialBroadcast) {
 
 // Verifies that HloEvaluator evaluates a HLO instruction that performs select
 // with 3 operands.
-TEST_F(HloEvaluatorTest, DoesSelect) {
+TEST_P(HloEvaluatorTest, DoesSelect) {
   auto pred = Literal::CreateR2<bool>({{true, false}, {false, true}});
   auto on_true = Literal::CreateR2<float>({{2.f, 4.f}, {4.f, 4.f}});
   auto on_false = Literal::CreateR2<float>({{0.f, 5.f}, {0.f, 4.f}});
@@ -145,12 +166,11 @@ TEST_F(HloEvaluatorTest, DoesSelect) {
       b.AddInstruction(HloInstruction::CreateConstant(std::move(on_true)));
   auto c3 =
       b.AddInstruction(HloInstruction::CreateConstant(std::move(on_false)));
-  auto instruction = b.AddInstruction(
+  b.AddInstruction(
       HloInstruction::CreateTernary(shape, HloOpcode::kSelect, c1, c2, c3));
   module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(instruction, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate({});
 
   auto expected = Literal::CreateR2<float>({{2, 5}, {0, 4}});
 
@@ -159,7 +179,7 @@ TEST_F(HloEvaluatorTest, DoesSelect) {
 
 // Verifies that HloEvaluator evaluates a HLO instruction that performs
 // element-wise addition with 2 operands.
-TEST_F(HloEvaluatorTest, DoesAdd) {
+TEST_P(HloEvaluatorTest, DoesAdd) {
   auto lhs = Literal::CreateR2<int64>({{1, 0}, {-100, 4}});
   auto rhs = Literal::CreateR2<int64>({{2, 4}, {4, 4}});
   auto expected = Literal::CreateR2<int64>({{3, 4}, {-96, 8}});
@@ -168,7 +188,7 @@ TEST_F(HloEvaluatorTest, DoesAdd) {
 }
 // Verifies that HloEvaluator evaluates a HLO instruction that performs
 // element-wise and with 2 operands.
-TEST_F(HloEvaluatorTest, DoesAnd) {
+TEST_P(HloEvaluatorTest, DoesAnd) {
   auto lhs = Literal::CreateR2<int64>({{1, 0}, {-100, 4}});
   auto rhs = Literal::CreateR2<int64>({{2, 4}, {4, 4}});
   auto expected = Literal::CreateR2<int64>({{0, 0}, {4, 4}});
@@ -177,7 +197,7 @@ TEST_F(HloEvaluatorTest, DoesAnd) {
 }
 // Verifies that HloEvaluator evaluates a HLO instruction that performs
 // element-wise or with 2 operands.
-TEST_F(HloEvaluatorTest, DoesOr) {
+TEST_P(HloEvaluatorTest, DoesOr) {
   auto lhs = Literal::CreateR2<int64>({{1, 0}, {-100, 4}});
   auto rhs = Literal::CreateR2<int64>({{2, 4}, {4, 4}});
   auto expected = Literal::CreateR2<int64>({{3, 4}, {-100, 4}});
@@ -186,7 +206,7 @@ TEST_F(HloEvaluatorTest, DoesOr) {
 }
 // Verifies that HloEvaluator evaluates a HLO instruction that performs
 // element-wise multiply with 2 operands.
-TEST_F(HloEvaluatorTest, DoesMultiply) {
+TEST_P(HloEvaluatorTest, DoesMultiply) {
   auto lhs = Literal::CreateR2<int32>({{-1, 0}, {-100, 4}});
   auto rhs = Literal::CreateR2<int32>(
       {{std::numeric_limits<int32>::min(), 4}, {4, 4}});
@@ -197,14 +217,14 @@ TEST_F(HloEvaluatorTest, DoesMultiply) {
 }
 // Verifies that HloEvaluator evaluates a HLO instruction that performs
 // element-wise divide with 2 operands.
-TEST_F(HloEvaluatorTest, DoesDivideInt64) {
+TEST_P(HloEvaluatorTest, DoesDivideInt64) {
   auto lhs = Literal::CreateR2<int64>({{1, 0}, {-100, 4}});
   auto rhs = Literal::CreateR2<int64>({{2, 4}, {4, 4}});
   auto expected = Literal::CreateR2<int64>({{0, 0}, {-25, 1}});
   TestBinaryOp(HloOpcode::kDivide, std::move(expected), std::move(lhs),
                std::move(rhs));
 }
-TEST_F(HloEvaluatorTest, DoesDivideDouble) {
+TEST_P(HloEvaluatorTest, DoesDivideDouble) {
   auto lhs = Literal::CreateR2<double>({{1.0, 0.0}, {-100.0, 4.0}});
   auto rhs = Literal::CreateR2<double>({{2.2, 4.0}, {4.0, 4.0}});
   auto expected =
@@ -215,40 +235,41 @@ TEST_F(HloEvaluatorTest, DoesDivideDouble) {
 
 // Verifies that HloEvaluator evaluates a HLO instruction that performs
 // element-wise abs op with 1 operand.
-TEST_F(HloEvaluatorTest, DoesAbsR2) {
+TEST_P(HloEvaluatorTest, DoesAbsR2) {
   auto operand = Literal::CreateR2<int64>({{1, -20}, {-100, 4}});
   auto expected = Literal::CreateR2<int64>({{1, 20}, {100, 4}});
   TestUnaryOp(HloOpcode::kAbs, std::move(expected), std::move(operand));
 }
-TEST_F(HloEvaluatorTest, DoesAbsR0) {
+TEST_P(HloEvaluatorTest, DoesAbsR0) {
   auto operand = Literal::CreateR0<float>(-1.0f);
   auto expected = Literal::CreateR0<float>(1.0f);
   TestUnaryOp(HloOpcode::kAbs, std::move(expected), std::move(operand));
 }
-TEST_F(HloEvaluatorTest, DoesAbsR1WithZeroSize) {
+TEST_P(HloEvaluatorTest, DoesAbsR1WithZeroSize) {
   auto operand = Literal::CreateR1<float>({});
   auto expected = Literal::CreateR1<float>({});
   TestUnaryOp(HloOpcode::kAbs, std::move(expected), std::move(operand));
 }
-TEST_F(HloEvaluatorTest, DoesNegateR2) {
+TEST_P(HloEvaluatorTest, DoesNegateR2) {
   auto operand = Literal::CreateR2<int32>(
       {{0, std::numeric_limits<int32>::min()}, {-1, 4}});
   auto expected =
       Literal::CreateR2<int32>({{0, std::numeric_limits<int>::min()}, {1, -4}});
   TestUnaryOp(HloOpcode::kNegate, std::move(expected), std::move(operand));
 }
-TEST_F(HloEvaluatorTest, DoesCosR2) {
+TEST_P(HloEvaluatorTest, DoesCosR2) {
   auto operand = Literal::CreateR2<float>({{0, M_PI}, {-M_PI, 2 * M_PI}});
   auto expected = Literal::CreateR2<float>({{1, -1}, {-1, 1}});
-  TestUnaryOp(HloOpcode::kCos, std::move(expected), std::move(operand));
+  TestUnaryOp(HloOpcode::kCos, std::move(expected), std::move(operand),
+              use_bfloat16_ ? 0x1.0P-5 : 0x1.0P-20);
 }
-TEST_F(HloEvaluatorTest, DoesSinR2) {
+TEST_P(HloEvaluatorTest, DoesSinR2) {
   auto operand = Literal::CreateR2<float>({{0, M_PI}, {-M_PI, 2 * M_PI}});
   auto expected = Literal::CreateR2<float>({{0, 0}, {0, 0}});
   TestUnaryOp(HloOpcode::kSin, std::move(expected), std::move(operand),
-              0x1.0P-20);
+              use_bfloat16_ ? 0x1.0P-5 : 0x1.0P-20);
 }
-TEST_F(HloEvaluatorTest, DoesNotR2) {
+TEST_P(HloEvaluatorTest, DoesNotR2) {
   auto operand =
       Literal::CreateR2<int32>({{0, std::numeric_limits<int>::min()},
                                 {-1, std::numeric_limits<int>::max()}});
@@ -259,7 +280,7 @@ TEST_F(HloEvaluatorTest, DoesNotR2) {
 }
 // Verifies that HloEvaluator evaluates a HLO Computation with non-parameter nor
 // constant operands.
-TEST_F(HloEvaluatorTest, DoesTraverseInstructions) {
+TEST_P(HloEvaluatorTest, DoesTraverseInstructions) {
   auto lhs = Literal::CreateR2<int64>({{1, 0}, {-100, 4}});
   auto rhs = Literal::CreateR2<int64>({{2, 4}, {4, 4}});
   auto rhs2 = Literal::CreateR2<int64>({{1, -20}, {-100, 4}});
@@ -279,10 +300,9 @@ TEST_F(HloEvaluatorTest, DoesTraverseInstructions) {
       b.AddInstruction(HloInstruction::CreateParameter(2, shape, "rhs2"));
   b.AddInstruction(HloInstruction::CreateBinary(shape, HloOpcode::kAdd,
                                                 lhs_instruction, param_rhs2));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, args).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate(args);
 
   auto expected = Literal::CreateR2<int64>({{4, -16}, {-196, 12}});
 
@@ -290,7 +310,7 @@ TEST_F(HloEvaluatorTest, DoesTraverseInstructions) {
 }
 
 // Verifies Reshape operation is correctly evaluated.
-TEST_F(HloEvaluatorTest, DoesReshape) {
+TEST_P(HloEvaluatorTest, DoesReshape) {
   HloComputation::Builder b(TestName());
   const int64 dimensions[] = {11, 8, 7, 5, 9};
   TF_ASSERT_OK_AND_ASSIGN(auto literal,
@@ -304,21 +324,20 @@ TEST_F(HloEvaluatorTest, DoesReshape) {
   const int64 permutation[] = {1, 2, 0, 4, 3};
   b.AddInstruction(
       HloInstruction::CreateTranspose(shape, literal_instruction, permutation));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate({});
 
   using NativeT = typename primitive_util::PrimitiveTypeToNative<F32>::type;
   result->EachCell<NativeT>(
       [&](tensorflow::gtl::ArraySlice<int64> indices, NativeT value) {
         std::vector<int64> rindexes = Permute(permutation, indices);
-        EXPECT_TRUE(value == literal_clone->Get<NativeT>(rindexes));
+        EXPECT_NEAR(value, literal_clone->Get<NativeT>(rindexes), 0x1.0P-5);
       });
 }
 
 // Verifies Broadcast operation is correctly evaluated.
-TEST_F(HloEvaluatorTest, DoesBroadcast) {
+TEST_P(HloEvaluatorTest, DoesBroadcast) {
   HloComputation::Builder b(TestName());
   auto input_literal = Literal::CreateR2<int32>({{1, 2}, {3, 4}, {5, 6}});
   auto output_literal = Literal::CreateR3<int32>(
@@ -327,15 +346,14 @@ TEST_F(HloEvaluatorTest, DoesBroadcast) {
       HloInstruction::CreateConstant(std::move(input_literal)));
   b.AddInstruction(HloInstruction::CreateBroadcast(
       output_literal->shape(), literal_instruction, {1, 2}));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate({});
 
   LiteralTestUtil::ExpectEqual(*result, *output_literal);
 }
 
-TEST_F(HloEvaluatorTest, DoesBroadcastScalar) {
+TEST_P(HloEvaluatorTest, DoesBroadcastScalar) {
   HloComputation::Builder b(TestName());
   auto input_literal = Literal::CreateR0<int32>(111);
   auto output_literal = Literal::CreateR2<int32>(
@@ -347,15 +365,14 @@ TEST_F(HloEvaluatorTest, DoesBroadcastScalar) {
   b.AddInstruction(HloInstruction::CreateBroadcast(
       output_literal->shape(), literal_instruction,
       /*broadcast_dimensions=*/{}));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate({});
 
   LiteralTestUtil::ExpectEqual(*result, *output_literal);
 }
 
-TEST_F(HloEvaluatorTest, DoesConcatenateSimple) {
+TEST_P(HloEvaluatorTest, DoesConcatenateSimple) {
   HloComputation::Builder b(TestName());
 
   HloInstruction* operand1 = b.AddInstruction(HloInstruction::CreateConstant(
@@ -368,17 +385,16 @@ TEST_F(HloEvaluatorTest, DoesConcatenateSimple) {
   Shape shape = ShapeUtil::MakeShape(S64, {4, 2});
   b.AddInstruction(HloInstruction::CreateConcatenate(shape, operands, 0));
 
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected =
       Literal::CreateR2<int64>({{-1, -2}, {100, 200}, {-2, -3}, {-100, -200}});
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, ConcatenateHandlesShapeWithZeroElement) {
+TEST_P(HloEvaluatorTest, ConcatenateHandlesShapeWithZeroElement) {
   HloComputation::Builder b(TestName());
 
   HloInstruction* operand1 = b.AddInstruction(
@@ -391,16 +407,15 @@ TEST_F(HloEvaluatorTest, ConcatenateHandlesShapeWithZeroElement) {
   Shape shape = ShapeUtil::MakeShape(S64, {2});
   b.AddInstruction(HloInstruction::CreateConcatenate(shape, operands, 0));
 
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected = Literal::CreateR1<int64>({100, 200});
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, ConvertWithSameLayout) {
+TEST_P(HloEvaluatorTest, ConvertWithSameLayout) {
   HloComputation::Builder b(TestName());
 
   auto input_literal = Literal::CreateR2<int32>({{1, 2}, {3, 4}, {5, 6}});
@@ -412,15 +427,14 @@ TEST_F(HloEvaluatorTest, ConvertWithSameLayout) {
   HloInstruction* constant = b.AddInstruction(
       HloInstruction::CreateConstant(std::move(input_literal)));
   b.AddInstruction(HloInstruction::CreateConvert(expected->shape(), constant));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   LiteralTestUtil::ExpectEqual(*result, *expected);
 }
 
-TEST_F(HloEvaluatorTest, ConvertWithDifferentLayout) {
+TEST_P(HloEvaluatorTest, ConvertWithDifferentLayout) {
   HloComputation::Builder b(TestName());
 
   auto input_literal = Literal::CreateR2WithLayout<int32>(
@@ -433,10 +447,9 @@ TEST_F(HloEvaluatorTest, ConvertWithDifferentLayout) {
   HloInstruction* constant = b.AddInstruction(
       HloInstruction::CreateConstant(std::move(input_literal)));
   b.AddInstruction(HloInstruction::CreateConvert(expected->shape(), constant));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   LiteralTestUtil::ExpectEqual(*result, *expected);
 }
@@ -454,7 +467,7 @@ PaddingConfig CreatePaddingConfig(
   return padding_config;
 }
 
-TEST_F(HloEvaluatorTest, Pad2DIntegerArrayWithZeroDimension) {
+TEST_P(HloEvaluatorTest, Pad2DIntegerArrayWithZeroDimension) {
   auto operand = Literal::CreateR2<int32>({{}, {}});
   HloComputation::Builder b(TestName());
   auto operand_instruction =
@@ -467,11 +480,11 @@ TEST_F(HloEvaluatorTest, Pad2DIntegerArrayWithZeroDimension) {
 
   auto padding_config = CreatePaddingConfig({{{1, 0, 2}}, {{0, 2, 1}}});
   Shape shape = ShapeUtil::MakeShape(S32, {5, 2});
-  auto pad_instruction = b.AddInstruction(HloInstruction::CreatePad(
+  b.AddInstruction(HloInstruction::CreatePad(
       shape, operand_instruction, padding_value_instruction, padding_config));
   module().AddEntryComputation(b.Build());
 
-  auto result = evaluator_->Evaluate(pad_instruction).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected = Literal::CreateR2<int32>(
       {{10, 10}, {10, 10}, {10, 10}, {10, 10}, {10, 10}});
@@ -479,7 +492,7 @@ TEST_F(HloEvaluatorTest, Pad2DIntegerArrayWithZeroDimension) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, Pad4DFloatArrayWithInteriorPadding) {
+TEST_P(HloEvaluatorTest, Pad4DFloatArrayWithInteriorPadding) {
   HloComputation::Builder b(TestName());
 
   Array4D<float> input_array(3, 2, 1, 1, {1, 2, 3, 4, 5, 6});
@@ -496,10 +509,9 @@ TEST_F(HloEvaluatorTest, Pad4DFloatArrayWithInteriorPadding) {
       CreatePaddingConfig({{{1, 0, 2}}, {{0, 2, 1}}, {{0, 0, 0}}, {{0, 0, 0}}});
   b.AddInstruction(HloInstruction::CreatePad(
       shape, input_instruction, pad_instruction, r4_padding_on_dim0_dim1));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected_array = MakeUnique<Array4D<float>>(8, 5, 1, 1);
   expected_array->Fill(kPadValue);
@@ -515,7 +527,7 @@ TEST_F(HloEvaluatorTest, Pad4DFloatArrayWithInteriorPadding) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, NegativePadding2D) {
+TEST_P(HloEvaluatorTest, NegativePadding2D) {
   HloComputation::Builder b(TestName());
 
   // input_array:
@@ -541,10 +553,9 @@ TEST_F(HloEvaluatorTest, NegativePadding2D) {
                                              pad_value_instruction,
                                              r2_padding_on_dim0_dim1));
 
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   // f32[1,5] { 7.0, 2.718, 2.718, 2.718, 2.718 }
   auto expected_array = MakeUnique<Array2D<float>>(1, 5);
@@ -555,10 +566,10 @@ TEST_F(HloEvaluatorTest, NegativePadding2D) {
   (*expected_array)(0, 4) = 2.718f;
   auto expected = Literal::CreateR2FromArray2D<float>(*expected_array);
 
-  LiteralTestUtil::ExpectEqual(*expected, *result);
+  LiteralTestUtil::ExpectNear(*expected, *result, ErrorSpec(0x1.0P-5));
 }
 
-TEST_F(HloEvaluatorTest, NegativeAndInteriorPadding2D) {
+TEST_P(HloEvaluatorTest, NegativeAndInteriorPadding2D) {
   HloComputation::Builder b(TestName());
 
   // f32[4,3] {
@@ -587,10 +598,9 @@ TEST_F(HloEvaluatorTest, NegativeAndInteriorPadding2D) {
                                              pad_value_instruction,
                                              r2_padding_on_dim0_dim1));
 
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected_array = MakeUnique<Array2D<float>>(0, 9);
   auto expected = Literal::CreateR2FromArray2D<float>(*expected_array);
@@ -598,7 +608,7 @@ TEST_F(HloEvaluatorTest, NegativeAndInteriorPadding2D) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, DotRank2AndRank1) {
+TEST_P(HloEvaluatorTest, DotRank2AndRank1) {
   HloComputation::Builder b(TestName());
 
   // lhs:
@@ -621,12 +631,14 @@ TEST_F(HloEvaluatorTest, DotRank2AndRank1) {
       b.AddInstruction(HloInstruction::CreateConstant(std::move(rhs_literal)));
 
   Shape shape = ShapeUtil::MakeShape(F32, {4, 2});
-  b.AddInstruction(HloInstruction::CreateBinary(
-      shape, HloOpcode::kDot, lhs_instruction, rhs_instruction));
-  auto computation = module().AddEntryComputation(b.Build());
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  b.AddInstruction(HloInstruction::CreateDot(shape, lhs_instruction,
+                                             rhs_instruction, dot_dnums));
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   // clang-format off
   auto expected_array = Array2D<float>({
@@ -641,7 +653,7 @@ TEST_F(HloEvaluatorTest, DotRank2AndRank1) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, DotRank1AndRank2) {
+TEST_P(HloEvaluatorTest, DotRank1AndRank2) {
   HloComputation::Builder b(TestName());
 
   // lhs:
@@ -664,19 +676,21 @@ TEST_F(HloEvaluatorTest, DotRank1AndRank2) {
       b.AddInstruction(HloInstruction::CreateConstant(std::move(rhs_literal)));
 
   Shape shape = ShapeUtil::MakeShape(F32, {2});
-  b.AddInstruction(HloInstruction::CreateBinary(
-      shape, HloOpcode::kDot, lhs_instruction, rhs_instruction));
-  auto computation = module().AddEntryComputation(b.Build());
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(0);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  b.AddInstruction(HloInstruction::CreateDot(shape, lhs_instruction,
+                                             rhs_instruction, dot_dnums));
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected = Literal::CreateR1<float>({22.f, 28.f});
 
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, DotRank2AndRank2) {
+TEST_P(HloEvaluatorTest, DotRank2AndRank2) {
   HloComputation::Builder b(TestName());
 
   // lhs:
@@ -705,12 +719,14 @@ TEST_F(HloEvaluatorTest, DotRank2AndRank2) {
       b.AddInstruction(HloInstruction::CreateConstant(std::move(rhs_literal)));
 
   Shape shape = ShapeUtil::MakeShape(F32, {4, 2});
-  b.AddInstruction(HloInstruction::CreateBinary(
-      shape, HloOpcode::kDot, lhs_instruction, rhs_instruction));
-  auto computation = module().AddEntryComputation(b.Build());
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  b.AddInstruction(HloInstruction::CreateDot(shape, lhs_instruction,
+                                             rhs_instruction, dot_dnums));
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected_array = Array2D<float>({
       {22.f, 28.f},
@@ -723,7 +739,7 @@ TEST_F(HloEvaluatorTest, DotRank2AndRank2) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, SimpleConv1D) {
+TEST_P(HloEvaluatorTest, SimpleConv1D) {
   HloComputation::Builder b(TestName());
 
   Array3D<float> lhs_array = {{{1, 2, 3}}};
@@ -761,10 +777,9 @@ TEST_F(HloEvaluatorTest, SimpleConv1D) {
   const Shape& shape = ShapeUtil::MakeShape(F32, {1, 1, 3});
   b.AddInstruction(HloInstruction::CreateConvolve(
       shape, lhs_instruction, rhs_instruction, window, dnums));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   Array3D<float> expected_array = {{{11.f, 18.f, 9.f}}};
   auto expected = Literal::CreateR3FromArray3D<float>(expected_array);
@@ -772,7 +787,7 @@ TEST_F(HloEvaluatorTest, SimpleConv1D) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, Simple4x4Conv2DWith2x2Kernel) {
+TEST_P(HloEvaluatorTest, Simple4x4Conv2DWith2x2Kernel) {
   HloComputation::Builder b(TestName());
 
   Array4D<float> lhs_array(1, 1, 4, 4);
@@ -816,10 +831,9 @@ TEST_F(HloEvaluatorTest, Simple4x4Conv2DWith2x2Kernel) {
   const Shape& shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4});
   b.AddInstruction(HloInstruction::CreateConvolve(
       shape, lhs_instruction, rhs_instruction, window, dnums));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   Array4D<float> expected_array(1, 1, 4, 4);
   // clang-format off
@@ -835,7 +849,7 @@ TEST_F(HloEvaluatorTest, Simple4x4Conv2DWith2x2Kernel) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, Conv2DGeneralDimensionsReversed) {
+TEST_P(HloEvaluatorTest, Conv2DGeneralDimensionsReversed) {
   HloComputation::Builder b(TestName());
 
   // clang-format off
@@ -900,21 +914,22 @@ TEST_F(HloEvaluatorTest, Conv2DGeneralDimensionsReversed) {
   const Shape& shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2});
   b.AddInstruction(HloInstruction::CreateConvolve(
       shape, lhs_instruction, rhs_instruction, window, dnums));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   // clang-format off
   // Result dimensions: [feature=1, height=1, batch=1, width=2]
   Array4D<float> expected_array({{{{2514, 2685}}}});
+  Array4D<float> expected_array_bf16({{{{2512, 2672}}}});
   // clang-format on
-  auto expected = Literal::CreateR4FromArray4D<float>(expected_array);
+  auto expected = Literal::CreateR4FromArray4D<float>(
+      use_bfloat16_ ? expected_array_bf16 : expected_array);
 
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, Conv2DGeneralDimensions) {
+TEST_P(HloEvaluatorTest, Conv2DGeneralDimensions) {
   HloComputation::Builder b(TestName());
 
   // clang-format off
@@ -976,21 +991,22 @@ TEST_F(HloEvaluatorTest, Conv2DGeneralDimensions) {
   const Shape& shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2});
   b.AddInstruction(HloInstruction::CreateConvolve(
       shape, lhs_instruction, rhs_instruction, window, dnums));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   // clang-format off
   // Result dimensions: [feature=1, height=1, batch=1, width=2]
   Array4D<float> expected_array({{{{2514, 2685}}}});
+  Array4D<float> expected_array_bf16({{{{2512, 2672}}}});
   // clang-format on
-  auto expected = Literal::CreateR4FromArray4D<float>(expected_array);
+  auto expected = Literal::CreateR4FromArray4D<float>(
+      use_bfloat16_ ? expected_array_bf16 : expected_array);
 
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, DilatedBaseConv2DWithHighPadding) {
+TEST_P(HloEvaluatorTest, DilatedBaseConv2DWithHighPadding) {
   HloComputation::Builder b(TestName());
 
   Array4D<float> lhs_array(1, 1, 4, 4);
@@ -1034,10 +1050,9 @@ TEST_F(HloEvaluatorTest, DilatedBaseConv2DWithHighPadding) {
   const Shape& shape = ShapeUtil::MakeShape(F32, {1, 1, 7, 7});
   b.AddInstruction(HloInstruction::CreateConvolve(
       shape, lhs_instruction, rhs_instruction, window, dnums));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   Array4D<float> expected_array(1, 1, 7, 7);
   expected_array.FillWithYX(Array2D<float>({
@@ -1054,7 +1069,7 @@ TEST_F(HloEvaluatorTest, DilatedBaseConv2DWithHighPadding) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, DilatedBaseConv2DWithLowAndHighPadding) {
+TEST_P(HloEvaluatorTest, DilatedBaseConv2DWithLowAndHighPadding) {
   HloComputation::Builder b(TestName());
 
   Array4D<float> lhs_array(1, 1, 4, 4);
@@ -1098,10 +1113,9 @@ TEST_F(HloEvaluatorTest, DilatedBaseConv2DWithLowAndHighPadding) {
   const Shape& shape = ShapeUtil::MakeShape(F32, {1, 1, 8, 8});
   b.AddInstruction(HloInstruction::CreateConvolve(
       shape, lhs_instruction, rhs_instruction, window, dnums));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   Array4D<float> expected_array(1, 1, 8, 8);
   expected_array.FillWithYX(Array2D<float>({
@@ -1119,7 +1133,7 @@ TEST_F(HloEvaluatorTest, DilatedBaseConv2DWithLowAndHighPadding) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest,
+TEST_P(HloEvaluatorTest,
        DilatedWindowAndBaseConv2DWithDifferentLowAndHighPaddingAndStrides) {
   HloComputation::Builder b(TestName());
 
@@ -1170,10 +1184,9 @@ TEST_F(HloEvaluatorTest,
   const Shape& shape = ShapeUtil::MakeShape(F32, {1, 1, 9, 3});
   b.AddInstruction(HloInstruction::CreateConvolve(
       shape, lhs_instruction, rhs_instruction, window, dnums));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   Array4D<float> expected_array(1, 1, 9, 3);
   expected_array.FillWithYX(Array2D<float>({
@@ -1192,7 +1205,7 @@ TEST_F(HloEvaluatorTest,
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, ReduceAdd) {
+TEST_P(HloEvaluatorTest, ReduceAdd) {
   HloComputation::Builder b(TestName());
 
   // arg:
@@ -1225,17 +1238,16 @@ TEST_F(HloEvaluatorTest, ReduceAdd) {
       HloInstruction::CreateReduce(shape, arg_instruction, init_value,
                                    /*dimensions_to_reduce=*/{1}, add_func));
 
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected = Literal::CreateR1<float>({6, 18});
 
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, ReduceWindowMax) {
+TEST_P(HloEvaluatorTest, ReduceWindowMax) {
   HloComputation::Builder b(TestName());
 
   // arg:
@@ -1278,15 +1290,15 @@ TEST_F(HloEvaluatorTest, ReduceWindowMax) {
   b.AddInstruction(HloInstruction::CreateReduceWindow(
       shape, arg_instruction, init_value, window, max_func));
 
-  auto computation = module().AddEntryComputation(b.Build());
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  module().AddEntryComputation(b.Build());
+
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected = Literal::CreateR2<float>({{6, 7}});
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, ReduceWindowAdd) {
+TEST_P(HloEvaluatorTest, ReduceWindowAdd) {
   HloComputation::Builder b(TestName());
 
   // arg:
@@ -1335,21 +1347,21 @@ TEST_F(HloEvaluatorTest, ReduceWindowAdd) {
   b.AddInstruction(HloInstruction::CreateReduceWindow(
       shape, arg_instruction, init_value, window, add_func));
 
-  auto computation = module().AddEntryComputation(b.Build());
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  module().AddEntryComputation(b.Build());
+
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected = Literal::CreateR2<float>({{1, 3, 5}, {5, 11, 13}});
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, ReduceWindowAdd6D) {
+TEST_P(HloEvaluatorTest, ReduceWindowAdd6D) {
   HloComputation::Builder b(TestName());
 
   // arg: f32[4,4,4,4,4,4] full of ones. Using small dims to limit run-time.
   std::vector<int64> input_dims(6, 4);
   std::unique_ptr<Literal> arg_literal =
-      Literal::CreateFullWithMonotonicDim0MajorLayout<float>(input_dims, 1.0f);
+      Literal::CreateFullWithDescendingLayout<float>(input_dims, 1.0f);
 
   HloInstruction* arg_instruction =
       b.AddInstruction(HloInstruction::CreateConstant(std::move(arg_literal)));
@@ -1396,17 +1408,17 @@ TEST_F(HloEvaluatorTest, ReduceWindowAdd6D) {
   b.AddInstruction(HloInstruction::CreateReduceWindow(
       shape, arg_instruction, init_value, window, add_func));
 
-  auto computation = module().AddEntryComputation(b.Build());
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  module().AddEntryComputation(b.Build());
+
+  std::unique_ptr<Literal> result = Evaluate();
 
   std::vector<int64> output_dims = {4, 3, 3, 3, 4, 4};
   std::unique_ptr<Literal> result_literal =
-      Literal::CreateFullWithMonotonicDim0MajorLayout<float>(output_dims, 8.0f);
+      Literal::CreateFullWithDescendingLayout<float>(output_dims, 8.0f);
   LiteralTestUtil::ExpectEqual(*result_literal, *result);
 }
 
-TEST_F(HloEvaluatorTest, StridedSlice) {
+TEST_P(HloEvaluatorTest, StridedSlice) {
   HloComputation::Builder b(TestName());
 
   // arg:
@@ -1427,10 +1439,9 @@ TEST_F(HloEvaluatorTest, StridedSlice) {
                                                /*start_indices=*/{0, 2},
                                                /*limit_indices=*/{3, 5},
                                                /*strides=*/{2, 3}));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected = Literal::CreateR2<float>({
       {3},
@@ -1440,7 +1451,7 @@ TEST_F(HloEvaluatorTest, StridedSlice) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, DynamicSlice) {
+TEST_P(HloEvaluatorTest, DynamicSlice) {
   HloComputation::Builder b(TestName());
 
   // arg:
@@ -1461,10 +1472,9 @@ TEST_F(HloEvaluatorTest, DynamicSlice) {
   Shape shape = ShapeUtil::MakeShape(F32, {2, 3});
   b.AddInstruction(HloInstruction::CreateDynamicSlice(shape, operand,
                                                       start_indices, {2, 3}));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected = Literal::CreateR2<float>({
       {2, 3, 4},
@@ -1476,7 +1486,7 @@ TEST_F(HloEvaluatorTest, DynamicSlice) {
 
 // Verifies that the HloEvaluator's implementation goes along with existing
 // backends' behavior, although this is not required by the spec.
-TEST_F(HloEvaluatorTest, DynamicSliceModSlice) {
+TEST_P(HloEvaluatorTest, DynamicSliceModSlice) {
   HloComputation::Builder b(TestName());
 
   // arg:
@@ -1497,10 +1507,9 @@ TEST_F(HloEvaluatorTest, DynamicSliceModSlice) {
   Shape shape = ShapeUtil::MakeShape(F32, {2, 3});
   b.AddInstruction(HloInstruction::CreateDynamicSlice(shape, operand,
                                                       start_indices, {2, 3}));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected = Literal::CreateR2<float>({
       {2, 3, 4},
@@ -1510,7 +1519,7 @@ TEST_F(HloEvaluatorTest, DynamicSliceModSlice) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, DynamicSliceUpdate) {
+TEST_P(HloEvaluatorTest, DynamicSliceUpdate) {
   HloComputation::Builder b(TestName());
 
   // arg:
@@ -1534,10 +1543,9 @@ TEST_F(HloEvaluatorTest, DynamicSliceUpdate) {
   Shape shape = ShapeUtil::MakeShape(F64, {2, 3});
   b.AddInstruction(HloInstruction::CreateDynamicUpdateSlice(
       shape, operand, update, start_indices));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected = Literal::CreateR2<double>({
       {1, -2, -3},
@@ -1547,7 +1555,7 @@ TEST_F(HloEvaluatorTest, DynamicSliceUpdate) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, SetAndGetTuples) {
+TEST_P(HloEvaluatorTest, SetAndGetTuples) {
   HloComputation::Builder b(TestName());
 
   // arg:
@@ -1570,9 +1578,9 @@ TEST_F(HloEvaluatorTest, SetAndGetTuples) {
   Shape shape = ShapeUtil::MakeShape(F64, {2, 3});
   b.AddInstruction(HloInstruction::CreateGetTupleElement(shape, tuple, 1));
 
-  auto computation = module().AddEntryComputation(b.Build());
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  module().AddEntryComputation(b.Build());
+
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto expected = Literal::CreateR2<double>({
       {1, 2, 3},
@@ -1582,7 +1590,7 @@ TEST_F(HloEvaluatorTest, SetAndGetTuples) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, SetAndGetNestedTuples) {
+TEST_P(HloEvaluatorTest, SetAndGetNestedTuples) {
   HloComputation::Builder b(TestName());
 
   // arg:
@@ -1609,9 +1617,9 @@ TEST_F(HloEvaluatorTest, SetAndGetNestedTuples) {
   b.AddInstruction(
       HloInstruction::CreateGetTupleElement(tuple2->shape(), outer_tuple, 1));
 
-  auto computation = module().AddEntryComputation(b.Build());
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  module().AddEntryComputation(b.Build());
+
+  std::unique_ptr<Literal> result = Evaluate();
 
   auto result_inner_literal =
       Literal::CreateR2FromArray2D<double>(*operand_array);
@@ -1623,7 +1631,7 @@ TEST_F(HloEvaluatorTest, SetAndGetNestedTuples) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, Reverse) {
+TEST_P(HloEvaluatorTest, Reverse) {
   HloComputation::Builder b(TestName());
 
   // Input shape is float[4x3x2x1].
@@ -1649,10 +1657,9 @@ TEST_F(HloEvaluatorTest, Reverse) {
 
   const Shape shape = ShapeUtil::MakeShape(F32, {4, 3, 2, 1});
   b.AddInstruction(HloInstruction::CreateReverse(shape, operand, {0, 1}));
-  auto computation = module().AddEntryComputation(b.Build());
+  module().AddEntryComputation(b.Build());
 
-  std::unique_ptr<Literal> result =
-      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+  std::unique_ptr<Literal> result = Evaluate();
 
   // clang-format off
   auto expected = Literal::CreateR4FromArray4D<float>({
@@ -1677,7 +1684,7 @@ TEST_F(HloEvaluatorTest, Reverse) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
-TEST_F(HloEvaluatorTest, EvaluateWithSubstitutions) {
+TEST_P(HloEvaluatorTest, EvaluateWithSubstitutions) {
   HloComputation::Builder b(TestName());
   Shape shape = ShapeUtil::MakeShape(F32, {4});
 
@@ -1700,7 +1707,7 @@ TEST_F(HloEvaluatorTest, EvaluateWithSubstitutions) {
 
 // Check that EvaluateWithSubstitutions works if one of the operands to the op
 // we're evaluating is a constant.
-TEST_F(HloEvaluatorTest, EvaluateWithSubstitutionsWithConstantOperand) {
+TEST_P(HloEvaluatorTest, EvaluateWithSubstitutionsWithConstantOperand) {
   HloComputation::Builder b(TestName());
   Shape shape = ShapeUtil::MakeShape(F32, {4});
 
@@ -1722,5 +1729,8 @@ TEST_F(HloEvaluatorTest, EvaluateWithSubstitutionsWithConstantOperand) {
                                *result.ValueOrDie());
 }
 
+INSTANTIATE_TEST_CASE_P(HloEvaluatorTest_Instantiation, HloEvaluatorTest,
+                        ::testing::ValuesIn(use_bf16_params));
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.cc b/tensorflow/compiler/xla/service/hlo_execution_profile.cc
index ba75e2ef1b485f015f3b8f8dbd76f214d6ab0130..849aac0b12b096e5f7c4a5c441fc019c48a27060 100644
--- a/tensorflow/compiler/xla/service/hlo_execution_profile.cc
+++ b/tensorflow/compiler/xla/service/hlo_execution_profile.cc
@@ -32,7 +32,7 @@ HloProfileIndexMap::HloProfileIndexMap(const HloModule& module) {
     InsertOrDie(&computation_to_profile_idx_, computation,
                 current_profile_index++);
     for (const HloInstruction* instruction : computation->instructions()) {
-      // For simplicity we track all instrutions here, but we could skip
+      // For simplicity we track all instructions here, but we could skip
       // non-executing instructions like constants and parameters.
       InsertOrDie(&instruction_to_profile_idx_, instruction,
                   current_profile_index++);
@@ -76,8 +76,8 @@ std::unique_ptr<HloProfilePrinter> CreateHloProfilePrinter(
       HloProfilePrinter::HloInstructionInfo* instruction_info =
           &computation_info->instructions[instruction_index_in_static_data++];
       instruction_info->long_name = strdup(hlo->ToString().c_str());
-      instruction_info->short_name =
-          strdup(hlo->ToString(/*compact_operands=*/true).c_str());
+      instruction_info->short_name = strdup(
+          hlo->ToString(HloPrintOptions().set_compact_operands(true)).c_str());
       instruction_info->category = strdup(hlo->ToCategory().c_str());
       instruction_info->flop_count = cost_analysis.flop_count(*hlo);
       instruction_info->transcendental_count =
@@ -109,7 +109,8 @@ std::unique_ptr<HloProfilePrinter> CreateHloProfilePrinter(
   };
 
   return MakeUnique<HloProfilePrinter>(
-      computation_infos, hlo_profile_index_map.computation_count(), deleter);
+      computation_infos, hlo_profile_index_map.computation_count(),
+      /*profile_counters_size=*/max_profile_index, deleter);
 }
 
 HloExecutionProfile::HloExecutionProfile(
diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.h b/tensorflow/compiler/xla/service/hlo_execution_profile.h
index 470fd4ce3c205d84152238f4b18daad77e403f68..1a6b069609cb58bcc9659b4457453758a277bc0e 100644
--- a/tensorflow/compiler/xla/service/hlo_execution_profile.h
+++ b/tensorflow/compiler/xla/service/hlo_execution_profile.h
@@ -125,6 +125,9 @@ class HloExecutionProfile {
   }
 
   std::vector<int64>* mutable_profile_counters() { return &profile_counters_; }
+  const std::vector<int64>& profile_counters() const {
+    return profile_counters_;
+  }
 
  private:
   const HloProfilePrinter& hlo_profile_printer_;
diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
index 84187d578346eafd5e32727a15f5eab9cc79feef..f7c6435002d278d93cc0814041a7e055e5573e3e 100644
--- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
+++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
@@ -34,6 +34,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/window_util.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/gtl/optional.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/numbers.h"
@@ -508,8 +509,17 @@ stylesheet="
 
     // The "to_node" value may be a NULL, indicating that this points to the
     // "root" tag rather than a normal node.
-    int64 from_node_id = node_ids_.at(from_node);
-    int64 to_node_id = to_node ? node_ids_.at(to_node) : root_node_id_;
+    int64 from_node_id =
+        tensorflow::gtl::FindWithDefault(node_ids_, from_node, -1);
+    if (from_node_id == -1) {
+      LOG(FATAL) << from_node->name() << " was added to edges but not to nodes";
+    }
+    int64 to_node_id =
+        to_node ? tensorflow::gtl::FindWithDefault(node_ids_, to_node, -1)
+                : root_node_id_;
+    if (to_node != nullptr && to_node_id == -1) {
+      LOG(FATAL) << to_node->name() << " was added to edges but not to nodes";
+    }
 
     add_hover_css_rule("node", from_node_id, kBlue);
     add_hover_css_rule("node", to_node_id, kRed);
@@ -653,12 +663,15 @@ string HloDotDumper::DumpComputation(const HloComputation* comp) {
 
 string HloDotDumper::DumpRootTag() {
   const HloInstruction* from = GetNodeForEdge(computation_->root_instruction());
-  auto from_id = InstructionId(from);
 
-  if (!filter_.Show(from)) {
+  // We didn't display constants as separate nodes; so if the root is a
+  // constant, we don't add root tag or edge for it.
+  if (!filter_.Show(from) || from->opcode() == HloOpcode::kConstant) {
     return "";
   }
 
+  auto from_id = InstructionId(from);
+
   // The ID of the root computation is otherwise unused, so it makes a good ID
   // to use for the root-tag node.  However, the edge_ids_ map requires a
   // HloInstruction* pointer for the 'to' value, so we use a NULL value there
@@ -784,7 +797,7 @@ string HloDotDumper::GetInstructionNodeInlinedOperands(
 
     // Otherwise, print e.g. "%constant.42 (s32[100])".
     string constant_name;
-    if (tensorflow::StringPiece(constant->name()).starts_with("%constant")) {
+    if (tensorflow::StringPiece(constant->name()).starts_with("constant")) {
       constant_name = constant->name();
     } else {
       constant_name = StrCat("constant ", constant->name());
@@ -948,6 +961,7 @@ ColorScheme HloDotDumper::GetInstructionColor(const HloInstruction* instr) {
       return kGreen;
     case HloOpcode::kConvolution:
     case HloOpcode::kDot:
+    case HloOpcode::kFft:
       return kDarkBlue;
     case HloOpcode::kReducePrecision:
       return kRed;
@@ -1000,7 +1014,7 @@ string HloDotDumper::GetInstructionNodeLabel(const HloInstruction* instr) {
   // The HLO instruction name contains usually the opcode, e.g. "%add.42" is
   // an add instruction.  In this case we render just the name.
   if (tensorflow::StringPiece(instr->name())
-          .starts_with(StrCat("%", HloOpcodeString(instr->opcode())))) {
+          .starts_with(HloOpcodeString(instr->opcode()))) {
     return Printf("<b>%s</b>", HtmlLikeStringSanitize(instr->name()));
   }
   string extended_opcode =
@@ -1036,50 +1050,15 @@ string HloDotDumper::GetInstructionNodeMetadata(const HloInstruction* instr) {
 }
 
 string HloDotDumper::GetInstructionNodeExtraInfo(const HloInstruction* instr) {
-  string opcode_specific_info = [&]() -> string {
-    switch (instr->opcode()) {
-      case HloOpcode::kRng:
-        return RandomDistribution_Name(instr->random_distribution());
-      case HloOpcode::kConvolution:
-        return StrCat(
-            HtmlLikeStringSanitize(
-                instr->ConvolutionDimensionNumbersToString()),
-            "<br/>",
-            HtmlLikeStringSanitize(window_util::ToString(instr->window())));
-      case HloOpcode::kBroadcast:
-      case HloOpcode::kTranspose:
-      case HloOpcode::kReduce:
-        return Printf("dims={%s}", Join(instr->dimensions(), ","));
-      case HloOpcode::kGetTupleElement:
-        return Printf("index=%lld", instr->tuple_index());
-      case HloOpcode::kBatchNormTraining:
-      case HloOpcode::kBatchNormGrad:
-        return Printf("feature_index=%lld", instr->feature_index());
-      case HloOpcode::kCustomCall:
-        return Printf("custom_call_target=%s", instr->custom_call_target());
-      case HloOpcode::kSlice:
-        return std::all_of(instr->slice_strides().begin(),
-                           instr->slice_strides().end(),
-                           [](int64 stride) { return stride == 1; })
-                   ? ""
-                   : StrCat("stride=", VectorString(instr->slice_strides()));
-      case HloOpcode::kSend:
-      case HloOpcode::kSendDone:
-      case HloOpcode::kRecv:
-      case HloOpcode::kRecvDone:
-        return StrCat("channel_id=", instr->channel_id());
-      default:
-        return "";
-    }
-  }();
-
   std::vector<string> lines;
-  if (!opcode_specific_info.empty()) {
-    lines.push_back(opcode_specific_info);
-  }
-  if (instr->has_sharding()) {
-    lines.push_back(StrCat("sharding=", instr->sharding().ToString()));
+
+  // Get the instruction's extra attributes excluding the names of its
+  // subcomputations, since those are drawn explicitly in the graph.
+  for (const auto& line : instr->ExtraAttributesToString(
+           HloPrintOptions().set_print_subcomputation_references(false))) {
+    lines.push_back(HtmlLikeStringSanitize(line));
   }
+
   // Show the shape and layout of the instruction, unless it's an inlined fusion
   // node -- there the shape and layout is present in the output node.
   if (instr->opcode() != HloOpcode::kFusion ||
@@ -1091,7 +1070,7 @@ string HloDotDumper::GetInstructionNodeExtraInfo(const HloInstruction* instr) {
         instr->shape().dimensions_size() > 1 &&
         !ShapeUtil::IsTuple(instr->shape())) {
       StrAppend(&instr_shape, "{",
-                Join(instr->shape().layout().minor_to_major(), ","), "}");
+                Join(LayoutUtil::MinorToMajor(instr->shape()), ","), "}");
     }
 
     // Some instructions have giant tuples as their shapes, so truncate the
@@ -1353,19 +1332,16 @@ string SaveGraph(const string& graph,
       file_extension = ".pbtxt";
       break;
   }
-  string path = JoinPath(
-      dest_path, StrCat("hlo_graph_", output_num++, ".XXXXXX", file_extension));
+  string path = JoinPath(dest_path, StrCat("hlo_graph_", output_num++, "."));
   auto status = Status::OK();
-  int fd = mkstemps(&path[0], file_extension.length());
-  if (fd < 0) {
+  auto env = tensorflow::Env::Default();
+  if (!env->CreateUniqueFileName(&path, file_extension)) {
     status =
         Status(tensorflow::error::Code::UNKNOWN,
                StrCat("Failed to create temporary file to dump HLO graph: ",
                       strerror(errno)));
   } else {
-    status =
-        tensorflow::WriteStringToFile(tensorflow::Env::Default(), path, graph);
-    close(fd);
+    status = tensorflow::WriteStringToFile(env, path, graph);
   }
   if (!status.ok()) {
     LOG(WARNING) << "Saving HLO graph failed: " << status;
@@ -1438,7 +1414,8 @@ void DumpText(const HloModule& module, const string& label,
       do_prefix ? StrCat(prefix, "-", label, ".txt") : StrCat(label, ".txt");
   string path = JoinPath(directory_path, filename);
   TF_CHECK_OK(WriteStringToFile(
-      env, path, module.ToString(/*include_large_constants=*/true)));
+      env, path,
+      module.ToString(HloPrintOptions().set_print_large_constants(true))));
   LOG(INFO) << "dumping module '" << module.name() << "' to " << path;
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper_test.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper_test.cc
index 8e1531c87f9c6e133e2d6763b046b1d5dcbcd09f..1f00aa41dc783f9e5657f5fa654884a31fae0fe7 100644
--- a/tensorflow/compiler/xla/service/hlo_graph_dumper_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_graph_dumper_test.cc
@@ -117,5 +117,18 @@ TEST(HloGraphDumperTest, NestedFusion) {
       HasSubstr(inner_sum->name()));
 }
 
+TEST(HloGraphDumperTest, Constant) {
+  HloComputation::Builder b("b");
+  auto instruction = b.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(-42)));
+  instruction->set_name("i_am_a_constant_root_instruction");
+  HloModule m(TestName());
+  HloComputation* root_computation = m.AddEntryComputation(b.Build());
+  string graph = hlo_graph_dumper::DumpGraph(
+      *root_computation, /*label=*/"an_empty_graph", DebugOptions());
+  EXPECT_THAT(graph, HasSubstr("an_empty_graph"));
+  EXPECT_THAT(graph, Not(HasSubstr("i_am_a_constant_root_instruction")));
+}
+
 }  // anonymous namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index c30c4326547bbeae4f7054974f0d3fade65e3382..90121f7ffe11b379bea9e83a483c7e752c97998c 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -101,10 +101,10 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
 
   instruction->metadata_ = proto.metadata();
   if (proto.has_literal()) {
-    instruction->literal_ = MakeUnique<Literal>(proto.literal());
+    TF_ASSIGN_OR_RETURN(instruction->literal_,
+                        Literal::CreateFromProto(proto.literal()));
   }
   instruction->parameter_number_ = proto.parameter_number();
-  instruction->parameter_name_ = proto.parameter_name();
 
   instruction->tuple_index_ = proto.tuple_index();
   for (int64 dimension : proto.dimensions()) {
@@ -118,6 +118,10 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
         MakeUnique<ConvolutionDimensionNumbers>(
             proto.convolution_dimension_numbers());
   }
+  if (proto.has_dot_dimension_numbers()) {
+    instruction->dot_dimension_numbers_ =
+        MakeUnique<DotDimensionNumbers>(proto.dot_dimension_numbers());
+  }
   for (const HloInstructionProto::SliceDimensions& slice_dimensions :
        proto.slice_dimensions()) {
     instruction->slice_starts_.push_back(slice_dimensions.start());
@@ -141,6 +145,10 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
   instruction->infeed_config_ = proto.infeed_config();
   instruction->custom_call_target_ = proto.custom_call_target();
   instruction->outfeed_shape_ = proto.outfeed_shape();
+  instruction->fft_type_ = proto.fft_type();
+  for (int64 fft_len : proto.fft_length()) {
+    instruction->fft_length_.push_back(fft_len);
+  }
 
   return std::move(instruction);
 }
@@ -150,7 +158,6 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
   auto instruction =
       WrapUnique(new HloInstruction(HloOpcode::kParameter, shape));
   instruction->parameter_number_ = parameter_number;
-  instruction->parameter_name_ = name;
   instruction->name_ = name;
   return instruction;
 }
@@ -160,8 +167,7 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
   auto instruction =
       WrapUnique(new HloInstruction(HloOpcode::kTrace, ShapeUtil::MakeNil()));
   instruction->operands_.push_back(operand);
-  instruction->literal_.reset(new Literal);
-  instruction->literal_->append_u8s(tag);
+  instruction->literal_ = Literal::CreateR1U8(tag);
   return instruction;
 }
 
@@ -332,6 +338,41 @@ HloInstruction::CreateGetTupleElement(const Shape& shape,
   return instruction;
 }
 
+/* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateFft(
+    const Shape& shape, HloInstruction* operand, FftType fft_type,
+    tensorflow::gtl::ArraySlice<int64> fft_length) {
+  auto instruction = WrapUnique(new HloInstruction(HloOpcode::kFft, shape));
+  instruction->AppendOperand(operand);
+  instruction->fft_type_ = fft_type;
+  instruction->fft_length_.assign(fft_length.begin(), fft_length.end());
+  return instruction;
+}
+
+/* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateDot(
+    const Shape& shape, HloInstruction* lhs, HloInstruction* rhs,
+    const DotDimensionNumbers& dimension_numbers) {
+  auto instruction = WrapUnique(new HloInstruction(HloOpcode::kDot, shape));
+  instruction->AppendOperand(lhs);
+  instruction->AppendOperand(rhs);
+  instruction->dot_dimension_numbers_ =
+      MakeUnique<DotDimensionNumbers>(dimension_numbers);
+  return instruction;
+}
+
+/* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateCanonicalDot(
+    const Shape& shape, HloInstruction* lhs, HloInstruction* rhs) {
+  CHECK_EQ(ShapeUtil::Rank(lhs->shape()), 2);
+  CHECK_EQ(ShapeUtil::Rank(rhs->shape()), 2);
+
+  auto instruction = WrapUnique(new HloInstruction(HloOpcode::kDot, shape));
+  instruction->AppendOperand(lhs);
+  instruction->AppendOperand(rhs);
+  instruction->dot_dimension_numbers_ = MakeUnique<DotDimensionNumbers>();
+  instruction->dot_dimension_numbers_->add_lhs_contracting_dimensions(1);
+  instruction->dot_dimension_numbers_->add_rhs_contracting_dimensions(0);
+  return instruction;
+}
+
 /* static */ std::unique_ptr<HloInstruction>
 HloInstruction::CreateReducePrecision(const Shape& shape,
                                       HloInstruction* operand,
@@ -346,12 +387,9 @@ HloInstruction::CreateReducePrecision(const Shape& shape,
 }
 
 /* static */ std::unique_ptr<HloInstruction>
-HloInstruction::CreateCrossReplicaSum(const Shape& shape,
-                                      HloInstruction* operand) {
-  auto instruction =
-      WrapUnique(new HloInstruction(HloOpcode::kCrossReplicaSum, shape));
-  instruction->AppendOperand(operand);
-  return instruction;
+HloInstruction::CreateCrossReplicaSum(
+    const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands) {
+  return CreateNary(shape, HloOpcode::kCrossReplicaSum, operands);
 }
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateInfeed(
@@ -670,10 +708,26 @@ HloInstruction::CreateSelectAndScatter(
   return instruction;
 }
 
+// We put the fusion kind into the instruction's name for transpose-dot and
+// backward-conv fusions, since those fusions are really just describing a type
+// of dot/conv rather than generating a novel computation.
+static string FusionNodeName(HloInstruction::FusionKind fusion_kind) {
+  switch (fusion_kind) {
+    case HloInstruction::FusionKind::kTransposeDot:
+      return "dot_fusion";
+    case HloInstruction::FusionKind::kConvBackwardInput:
+    case HloInstruction::FusionKind::kConvBackwardFilter:
+      return "conv_fusion";
+    default:
+      return "fusion";
+  }
+}
+
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateFusion(
     const Shape& shape, FusionKind fusion_kind, HloInstruction* fused_root) {
   auto instruction = WrapUnique(new HloInstruction(HloOpcode::kFusion, shape));
   instruction->fusion_kind_ = fusion_kind;
+  instruction->name_ = FusionNodeName(fusion_kind);
   instruction->set_parent(fused_root->parent());
   instruction->set_metadata(fused_root->metadata());
   instruction->CloneAndFuseInternal(fused_root);
@@ -689,6 +743,7 @@ HloInstruction::CreateSelectAndScatter(
     instruction->AppendOperand(operand);
   }
   instruction->fusion_kind_ = fusion_kind;
+  instruction->name_ = FusionNodeName(fusion_kind);
   instruction->called_computations_.push_back(fusion_computation);
   fusion_computation->SetFusionInstruction(instruction.get());
   return instruction;
@@ -985,6 +1040,7 @@ bool HloInstruction::HasSideEffect() const {
     case HloOpcode::kSendDone:
     case HloOpcode::kRecv:
     case HloOpcode::kRecvDone:
+    case HloOpcode::kRng:
     case HloOpcode::kInfeed:
     case HloOpcode::kOutfeed:
     case HloOpcode::kTrace:
@@ -1086,7 +1142,6 @@ std::unique_ptr<HloInstruction> HloInstruction::CloneWithNewOperands(
     case HloOpcode::kLe:
     case HloOpcode::kLt:
     case HloOpcode::kNe:
-    case HloOpcode::kDot:
     case HloOpcode::kMaximum:
     case HloOpcode::kMinimum:
     case HloOpcode::kPower:
@@ -1138,9 +1193,16 @@ std::unique_ptr<HloInstruction> HloInstruction::CloneWithNewOperands(
       clone = CreateConvolve(shape, new_operands[0], new_operands[1], *window_,
                              *convolution_dimension_numbers_);
       break;
-    case HloOpcode::kCrossReplicaSum:
+    case HloOpcode::kDot:
+      CHECK_EQ(new_operands.size(), 2);
+      clone = CreateDot(shape, new_operands[0], new_operands[1],
+                        *dot_dimension_numbers_);
+      break;
+    case HloOpcode::kFft:
       CHECK_EQ(new_operands.size(), 1);
-      clone = CreateCrossReplicaSum(shape, new_operands[0]);
+      return CreateFft(shape, new_operands[0], fft_type_, fft_length_);
+    case HloOpcode::kCrossReplicaSum:
+      clone = CreateCrossReplicaSum(shape, new_operands);
       break;
     case HloOpcode::kGetTupleElement:
       CHECK_EQ(new_operands.size(), 1);
@@ -1215,7 +1277,7 @@ std::unique_ptr<HloInstruction> HloInstruction::CloneWithNewOperands(
       clone = CloneFusionWithNewOperands(shape, new_operands, module);
       break;
     case HloOpcode::kParameter:
-      clone = CreateParameter(parameter_number_, shape, parameter_name_);
+      clone = CreateParameter(parameter_number_, shape, name_);
       break;
     case HloOpcode::kBatchNormTraining:
       CHECK_EQ(new_operands.size(), 3);
@@ -1244,10 +1306,27 @@ std::unique_ptr<HloInstruction> HloInstruction::CloneWithNewOperands(
                                   new_operands[4], epsilon(), feature_index());
       break;
     case HloOpcode::kConditional:
-    case HloOpcode::kRecv:
-    case HloOpcode::kRecvDone:
+      CHECK_EQ(new_operands.size(), 3);
+      clone = CreateConditional(shape, new_operands[0], new_operands[1],
+                                true_computation(), new_operands[2],
+                                false_computation());
+      break;
     case HloOpcode::kSend:
+      CHECK_EQ(new_operands.size(), 1);
+      clone = CreateSend(new_operands[0], channel_id());
+      break;
     case HloOpcode::kSendDone:
+      CHECK_EQ(new_operands.size(), 1);
+      clone = CreateSendDone(new_operands[0]);
+      break;
+    case HloOpcode::kRecv:
+      CHECK_EQ(new_operands.size(), 0);
+      clone = CreateRecv(shape, channel_id());
+      break;
+    case HloOpcode::kRecvDone:
+      CHECK_EQ(new_operands.size(), 1);
+      clone = CreateRecvDone(new_operands[0]);
+      break;
     case HloOpcode::kTrace:
       LOG(FATAL) << "Not yet implemented, clone: " << HloOpcodeString(opcode_);
   }
@@ -1492,7 +1571,7 @@ bool HloInstruction::HasConstantOperand() const {
 
 bool HloInstruction::IdenticalSlowPath(
     const HloInstruction& other,
-    std::function<bool(const HloComputation*, const HloComputation*)>
+    const std::function<bool(const HloComputation*, const HloComputation*)>&
         eq_computations) const {
   // Perform opcode specific checks.
   switch (opcode()) {
@@ -1509,7 +1588,6 @@ bool HloInstruction::IdenticalSlowPath(
     case HloOpcode::kCos:
     case HloOpcode::kCrossReplicaSum:
     case HloOpcode::kDivide:
-    case HloOpcode::kDot:
     case HloOpcode::kEq:
     case HloOpcode::kExp:
     case HloOpcode::kFloor:
@@ -1582,6 +1660,15 @@ bool HloInstruction::IdenticalSlowPath(
              protobuf_util::ProtobufEquals(
                  convolution_dimension_numbers(),
                  other.convolution_dimension_numbers());
+    // Check dot dimension numbers.
+    case HloOpcode::kDot:
+      return protobuf_util::ProtobufEquals(dot_dimension_numbers(),
+                                           other.dot_dimension_numbers());
+
+    // FFT has various types & lengths.
+    case HloOpcode::kFft:
+      return fft_type() == other.fft_type() &&
+             fft_length() == other.fft_length();
 
     // Reduction results are determined by the reduction dimension and the
     // reduction computation.
@@ -1636,9 +1723,11 @@ bool HloInstruction::IdenticalSlowPath(
       return custom_call_target_ == other.custom_call_target_;
     case HloOpcode::kReverse:
       return dimensions() == other.dimensions();
+    case HloOpcode::kConditional:
+      return eq_computations(true_computation(), other.true_computation()) &&
+             eq_computations(false_computation(), other.false_computation());
 
     // These opcodes are not yet supported.
-    case HloOpcode::kConditional:
     case HloOpcode::kInfeed:
     case HloOpcode::kOutfeed:
     case HloOpcode::kSort:
@@ -1882,16 +1971,23 @@ string HloInstruction::SignatureString() const {
   return StrCat("(", operands, ") -> ", ShapeUtil::HumanString(shape()));
 }
 
-string HloInstruction::ToString(bool compact_operands, bool include_metadata,
-                                bool include_large_constants) const {
+namespace {
+
+string PrintName(const string& name, const HloPrintOptions& options) {
+  return StrCat(options.print_percent() ? "%" : "", name);
+}
+
+}  // namespace
+
+string HloInstruction::ToString(const HloPrintOptions& options) const {
   string result =
-      StrCat("%", name(), " = ", ShapeUtil::HumanStringWithLayout(shape()), " ",
-             HloOpcodeString(opcode()), "(",
-             OperandsToString(compact_operands, include_large_constants), ")");
-  for (const string& extra : ExtraAttributesToString()) {
+      StrCat(PrintName(name(), options), " = ",
+             ShapeUtil::HumanStringWithLayout(shape()), " ",
+             HloOpcodeString(opcode()), "(", OperandsToString(options), ")");
+  for (const string& extra : ExtraAttributesToString(options)) {
     StrAppend(&result, ", ", extra);
   }
-  if (include_metadata &&
+  if (options.print_metadata() &&
       (!metadata_.op_type().empty() || !metadata_.op_name().empty() ||
        !metadata_.source_file().empty())) {
     StrAppend(&result, ", metadata={", xla::OpMetadataToString(metadata_), "}");
@@ -1899,14 +1995,13 @@ string HloInstruction::ToString(bool compact_operands, bool include_metadata,
   return result;
 }
 
-string HloInstruction::OperandsToString(bool compact,
-                                        bool include_large_constants) const {
+string HloInstruction::OperandsToString(const HloPrintOptions& options) const {
   string operands;
   if (opcode() == HloOpcode::kConstant) {
     // For constants, show the actual value in place of an empty operand list.
     if ((!ShapeUtil::IsTuple(shape()) &&
          ShapeUtil::ElementsIn(shape()) <= 10) ||
-        include_large_constants) {
+        options.print_large_constants()) {
       // Literal::ToString emits multidimensional arrays over multiple
       // lines. Compact this into one line by stripping out white space.
       string tmp = literal().ToString();
@@ -1931,14 +2026,19 @@ string HloInstruction::OperandsToString(bool compact,
   } else {
     tensorflow::gtl::ArraySlice<HloInstruction*> slice(operands_);
     const int64 kMaxOperandsToShowIfCompact = 4;
-    if (compact && slice.size() > kMaxOperandsToShowIfCompact) {
+    if (options.compact_operands() &&
+        slice.size() > kMaxOperandsToShowIfCompact) {
       slice.remove_suffix(slice.size() - kMaxOperandsToShowIfCompact);
     }
     operands = Join(slice, ", ", [&](string* out, HloInstruction* operand) {
-      *out += ShapeUtil::HumanStringWithLayout(operand->shape());
-      if (!compact) {
-        StrAppend(out, " %", operand->name());
+      std::vector<string> str;
+      if (options.print_operand_shape()) {
+        str.push_back(ShapeUtil::HumanStringWithLayout(operand->shape()));
+      }
+      if (!options.compact_operands()) {
+        str.push_back(PrintName(operand->name(), options));
       }
+      StrAppend(out, Join(str, " "));
     });
     const int64 remaining = operands_.size() - slice.size();
     if (slice.size() != operands_.size()) {
@@ -1948,7 +2048,8 @@ string HloInstruction::OperandsToString(bool compact,
   return operands;
 }
 
-std::vector<string> HloInstruction::ExtraAttributesToString() const {
+std::vector<string> HloInstruction::ExtraAttributesToString(
+    const HloPrintOptions& options) const {
   std::vector<string> extra;
   if (opcode() == HloOpcode::kFusion) {
     extra.push_back(StrCat("kind=", xla::ToString(fusion_kind())));
@@ -1990,23 +2091,42 @@ std::vector<string> HloInstruction::ExtraAttributesToString() const {
   if (convolution_dimension_numbers_ != nullptr) {
     extra.push_back(ConvolutionDimensionNumbersToString());
   }
-
-  if (opcode() == HloOpcode::kWhile) {
-    extra.push_back(StrCat("condition=%", while_condition()->name()));
-    extra.push_back(StrCat("body=%", while_body()->name()));
-  } else if (opcode() == HloOpcode::kSelectAndScatter) {
-    extra.push_back(StrCat("select=%", select()->name()));
-    extra.push_back(StrCat("scatter=%", scatter()->name()));
-  } else if (opcode() == HloOpcode::kCall || opcode() == HloOpcode::kMap ||
-             opcode() == HloOpcode::kReduceWindow ||
-             opcode() == HloOpcode::kReduce) {
-    extra.push_back(StrCat("to_apply=%", to_apply()->name()));
-  } else if (!called_computations().empty()) {
-    extra.push_back(StrCat(
-        "calls=", Join(called_computations(), ", ",
-                       [](string* out, const HloComputation* computation) {
-                         StrAppend(out, "%", computation->name());
-                       })));
+  if (dot_dimension_numbers_ != nullptr) {
+    extra.push_back(DotDimensionNumbersToString());
+  }
+  if (opcode() == HloOpcode::kFft) {
+    extra.push_back(StrCat("fft_type=", FftType_Name(fft_type())));
+    extra.push_back(StrCat("fft_length={", Join(fft_length(), ","), "}"));
+  }
+
+  if (options.print_subcomputation_references()) {
+    if (opcode() == HloOpcode::kWhile) {
+      extra.push_back(
+          StrCat("condition=", PrintName(while_condition()->name(), options)));
+      extra.push_back(
+          StrCat("body=", PrintName(while_body()->name(), options)));
+    } else if (opcode() == HloOpcode::kSelectAndScatter) {
+      extra.push_back(StrCat("select=", PrintName(select()->name(), options)));
+      extra.push_back(
+          StrCat("scatter=", PrintName(scatter()->name(), options)));
+    } else if (opcode() == HloOpcode::kConditional) {
+      extra.push_back(StrCat("true_computation=",
+                             PrintName(true_computation()->name(), options)));
+      extra.push_back(StrCat("false_computation=",
+                             PrintName(false_computation()->name(), options)));
+    } else if (opcode() == HloOpcode::kCall || opcode() == HloOpcode::kMap ||
+               opcode() == HloOpcode::kReduceWindow ||
+               opcode() == HloOpcode::kReduce) {
+      extra.push_back(
+          StrCat("to_apply=", PrintName(to_apply()->name(), options)));
+    } else if (!called_computations().empty()) {
+      extra.push_back(StrCat(
+          "calls=", Join(called_computations(), ", ",
+                         [&](string* out, const HloComputation* computation) {
+                           StrAppend(out,
+                                     PrintName(computation->name(), options));
+                         })));
+    }
   }
 
   if (opcode() == HloOpcode::kSend || opcode() == HloOpcode::kRecv ||
@@ -2023,8 +2143,9 @@ std::vector<string> HloInstruction::ExtraAttributesToString() const {
   if (!control_predecessors_.empty()) {
     extra.push_back(StrCat("control-predecessors={",
                            Join(control_predecessors_, ", ",
-                                [](string* out, HloInstruction* pre) {
-                                  StrAppend(out, "%", pre->name());
+                                [&](string* out, HloInstruction* pre) {
+                                  StrAppend(out,
+                                            PrintName(pre->name(), options));
                                 }),
                            "}"));
   }
@@ -2035,6 +2156,22 @@ std::vector<string> HloInstruction::ExtraAttributesToString() const {
     extra.push_back(
         StrCat("outfeed_config=\"", CEscape(outfeed_config_), "\""));
   }
+  if (opcode() == HloOpcode::kRng) {
+    extra.push_back(
+        StrCat("distribution=", RandomDistributionToString(distribution_)));
+  }
+  if (opcode() == HloOpcode::kReducePrecision) {
+    extra.push_back(StrCat("exponent_bits=", exponent_bits_));
+    extra.push_back(StrCat("mantissa_bits=", mantissa_bits_));
+  }
+
+  // By contract, we print the custom call target even if
+  // !options.print_subcomputation_references(), because the call target is not
+  // an HloComputation.
+  if (opcode() == HloOpcode::kCustomCall) {
+    extra.push_back(
+        StrCat("custom_call_target=\"", CEscape(custom_call_target_), "\""));
+  }
   return extra;
 }
 
@@ -2064,7 +2201,6 @@ HloInstructionProto HloInstruction::ToProto() const {
     *proto.mutable_literal() = literal_->ToProto();
   }
   proto.set_parameter_number(parameter_number_);
-  proto.set_parameter_name(parameter_name_);
   if (opcode() == HloOpcode::kFusion) {
     proto.set_fusion_kind(xla::ToString(fusion_kind()));
     *proto.mutable_fused_instructions_computation() =
@@ -2086,6 +2222,9 @@ HloInstructionProto HloInstruction::ToProto() const {
     *proto.mutable_convolution_dimension_numbers() =
         *convolution_dimension_numbers_;
   }
+  if (dot_dimension_numbers_ != nullptr) {
+    *proto.mutable_dot_dimension_numbers() = *dot_dimension_numbers_;
+  }
   for (int i = 0; i < slice_starts_.size(); ++i) {
     auto* slice_dimension = proto.add_slice_dimensions();
     slice_dimension->set_start(slice_starts_[i]);
@@ -2110,6 +2249,10 @@ HloInstructionProto HloInstruction::ToProto() const {
   proto.set_infeed_config(infeed_config_);
   proto.set_custom_call_target(custom_call_target_);
   *proto.mutable_outfeed_shape() = outfeed_shape_;
+  proto.set_fft_type(fft_type_);
+  for (int64 fft_len : fft_length_) {
+    proto.add_fft_length(fft_len);
+  }
 
   return proto;
 }
@@ -2120,7 +2263,7 @@ string HloInstruction::ToCategory() const {
     return "data formatting";
   }
 
-  if (opcode() == HloOpcode::kConvolution) {
+  auto conv_category = [&] {
     string category = "convolution";
     if (window_util::HasBaseDilation(window())) {
       category += " base-dilated";
@@ -2129,44 +2272,36 @@ string HloInstruction::ToCategory() const {
       category += " window-dilated";
     }
     return category;
+  };
+
+  if (opcode() == HloOpcode::kConvolution) {
+    return conv_category();
   }
 
+  // Give transpose-dot and backwards-conv fusions the categories "dot" and
+  // "convolution" so they match the categories of proper kDot and kConvolution
+  // ops.  These fusion categories are really just a way of expressing a
+  // particular kind of dot or conv, so they should have the same category as a
+  // vanilla dot/conv.
   if (opcode() == HloOpcode::kFusion) {
-    if (operands().size() == 2) {
-      bool saw_rank_1 = false;
-      bool saw_higher_rank = false;
-      for (const auto* operand : operands()) {
-        if (!ShapeUtil::IsTuple(operand->shape())) {
-          saw_rank_1 |= ShapeUtil::Rank(operand->shape()) == 1;
-          saw_higher_rank |= ShapeUtil::Rank(operand->shape()) > 1;
-        }
-      }
-      if (saw_rank_1 && saw_higher_rank) {
-        return "rank-1-broadcast binary fusion";
-      }
-    }
     switch (fusion_kind()) {
       case FusionKind::kLoop:
-        if (IsElementwise()) {
-          return "elementwise fusion";
-        } else {
-          return "non-elementwise fusion";
-        }
+        return "loop fusion";
       case FusionKind::kInput:
         return "input fusion";
       case FusionKind::kOutput:
         return "output fusion";
       case FusionKind::kTransposeDot:
-        return "dot fusion";
+        return "dot";
       case FusionKind::kConvBackwardFilter:
       case FusionKind::kConvBackwardInput:
-        return "convolution fusion";
+        return conv_category();
       case FusionKind::kCustom:
         return "custom fusion";
     }
   }
 
-  if (IsElementwise() && opcode() != HloOpcode::kFusion) {
+  if (IsElementwise()) {
     return "non-fusion elementwise";
   }
 
@@ -2182,7 +2317,7 @@ void HloInstruction::set_tracing(HloInstruction* trace_instruction) {
 string HloInstruction::TracingTag() const {
   CHECK_EQ(HloOpcode::kTrace, opcode());
   CHECK(literal_ != nullptr);
-  return literal_->u8s_string();
+  return literal_->GetR1U8AsString();
 }
 
 bool HloInstruction::IsFused() const { return parent_->IsFusionComputation(); }
@@ -2325,6 +2460,8 @@ Status HloInstruction::Visit(DfsHloVisitorBase<HloInstructionPtr>* visitor) {
       return visitor->HandleSelect(this);
     case HloOpcode::kConvolution:
       return visitor->HandleConvolution(this);
+    case HloOpcode::kFft:
+      return visitor->HandleFft(this);
     case HloOpcode::kCrossReplicaSum:
       return visitor->HandleCrossReplicaSum(this);
     case HloOpcode::kTuple:
@@ -3001,6 +3138,28 @@ string OpMetadataToString(const OpMetadata& metadata) {
   return Join(result, " ");
 }
 
+string RandomDistributionToString(const RandomDistribution& distribution) {
+  return tensorflow::str_util::Lowercase(RandomDistribution_Name(distribution));
+}
+
+StatusOr<RandomDistribution> StringToRandomDistribution(const string& name) {
+  static std::unordered_map<string, RandomDistribution>* map = [] {
+    static auto* map = new std::unordered_map<string, RandomDistribution>;
+    for (int i = 0; i < RandomDistribution_ARRAYSIZE; i++) {
+      if (RandomDistribution_IsValid(i)) {
+        auto value = static_cast<RandomDistribution>(i);
+        (*map)[RandomDistributionToString(value)] = value;
+      }
+    }
+    return map;
+  }();
+  auto found = map->find(tensorflow::str_util::Lowercase(name));
+  if (found == map->end()) {
+    return InvalidArgument("Unknown distribution");
+  }
+  return found->second;
+}
+
 std::ostream& operator<<(std::ostream& os, HloInstruction::FusionKind kind) {
   return os << ToString(kind);
 }
@@ -3051,6 +3210,29 @@ string HloInstruction::ConvolutionDimensionNumbersToString() const {
   return result;
 }
 
+string HloInstruction::DotDimensionNumbersToString() const {
+  std::vector<string> result;
+  if (dot_dimension_numbers_ == nullptr) {
+    return "";
+  }
+  const DotDimensionNumbers& dnums = *dot_dimension_numbers_;
+  if (!dnums.lhs_batch_dimensions().empty()) {
+    result.push_back(StrCat("lhs_batch_dims={",
+                            Join(dnums.lhs_batch_dimensions(), ","), "}"));
+  }
+  result.push_back(StrCat("lhs_contracting_dims={",
+                          Join(dnums.lhs_contracting_dimensions(), ","), "}"));
+
+  if (!dnums.rhs_batch_dimensions().empty()) {
+    result.push_back(StrCat("rhs_batch_dims={",
+                            Join(dnums.rhs_batch_dimensions(), ","), "}"));
+  }
+  result.push_back(StrCat("rhs_contracting_dims={",
+                          Join(dnums.rhs_contracting_dimensions(), ","), "}"));
+
+  return Join(result, ", ");
+}
+
 bool HloInstruction::CouldBeBitcast() const {
   switch (opcode_) {
     case HloOpcode::kTranspose:
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index cda8b07c61e2b36a83184648f6f3744deeb86812..e700ec1d2903ac0bb77e36097c3e1e582206e4d5 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -25,6 +25,7 @@ limitations under the License.
 #include <iosfwd>
 #include <list>
 #include <memory>
+#include <set>
 #include <string>
 #include <tuple>
 #include <unordered_map>
@@ -56,6 +57,107 @@ namespace xla {
 class HloComputation;
 class HloModule;
 
+// A bunch of switches that control how the hlo text should be printed.
+class HloPrintOptions {
+ public:
+  // Constructs the default print options: don't print large constants, don't
+  // compact operands, no indentation.
+  HloPrintOptions()
+      : print_large_constants_(false),
+        print_subcomputation_references_(true),
+        print_metadata_(true),
+        compact_operands_(false),
+        print_operand_shape_(true),
+        print_program_shape_(true),
+        print_percent_(true),
+        indent_amount_(0) {}
+
+  static HloPrintOptions ShortParsable() {
+    return HloPrintOptions()
+        .set_print_large_constants(true)
+        .set_print_subcomputation_references(true)
+        .set_print_metadata(false)
+        .set_print_operand_shape(false)
+        .set_print_program_shape(false)
+        .set_print_percent(false);
+  }
+
+  // If true, large constants will be printed out.
+  HloPrintOptions& set_print_large_constants(bool value) {
+    print_large_constants_ = value;
+    return *this;
+  }
+
+  // If true, the names of subcomputations (e.g. a fusion node's fused
+  // computation) won't be printed.  This makes the resulting text not parsable.
+  //
+  // A CustomCall's call target is printed even if
+  // print_subcomputation_references is false, because the call target isn't an
+  // HloComputation.
+  HloPrintOptions& set_print_subcomputation_references(bool value) {
+    print_subcomputation_references_ = value;
+    return *this;
+  }
+
+  // If true, metatdata will be printed.
+  HloPrintOptions& set_print_metadata(bool value) {
+    print_metadata_ = value;
+    return *this;
+  }
+
+  // If true, operands' shapes will be printed.
+  HloPrintOptions& set_print_operand_shape(bool value) {
+    print_operand_shape_ = value;
+    return *this;
+  }
+
+  // If true, program shape of hlo computations will be printed.
+  HloPrintOptions& set_print_program_shape(bool value) {
+    print_program_shape_ = value;
+    return *this;
+  }
+
+  // If true, names will be printed with prefix '%'.
+  HloPrintOptions& set_print_percent(bool value) {
+    print_percent_ = value;
+    return *this;
+  }
+
+  // If true, only a part of operands will be printed out, and their names will
+  // be omitted (note that in this case the text will not be parsable).
+  HloPrintOptions& set_compact_operands(bool value) {
+    compact_operands_ = value;
+    return *this;
+  }
+
+  // The indent of the hlo text block.
+  HloPrintOptions& set_indent_amount(int value) {
+    indent_amount_ = value;
+    return *this;
+  }
+
+  bool print_large_constants() const { return print_large_constants_; }
+  bool print_subcomputation_references() const {
+    return print_subcomputation_references_;
+  }
+  bool print_metadata() const { return print_metadata_; }
+  bool compact_operands() const { return compact_operands_; }
+  bool print_operand_shape() const { return print_operand_shape_; }
+  bool print_program_shape() const { return print_program_shape_; }
+  bool print_percent() const { return print_percent_; }
+  int indent_amount() const { return indent_amount_; }
+
+ private:
+  bool print_large_constants_;
+  bool print_subcomputation_references_;
+  bool print_metadata_;
+  bool compact_operands_;
+  bool print_operand_shape_;
+  bool print_program_shape_;
+  bool print_percent_;
+  int indent_amount_;
+};
+
 // HLO instructions are the IR used by the high-level compiler.
 class HloInstruction {
  public:
@@ -160,6 +262,23 @@ class HloInstruction {
       const Window& window,
       const ConvolutionDimensionNumbers& dimension_numbers);
 
+  // Creates an FFT op, of the type indicated by fft_type.
+  static std::unique_ptr<HloInstruction> CreateFft(
+      const Shape& shape, HloInstruction* operand, FftType fft_type,
+      tensorflow::gtl::ArraySlice<int64> fft_length);
+
+  // Creates a dot op with operands 'lhs' and 'rhs' with contracting and batch
+  // dimensions specified in 'dimension_numbers'.
+  static std::unique_ptr<HloInstruction> CreateDot(
+      const Shape& shape, HloInstruction* lhs, HloInstruction* rhs,
+      const DotDimensionNumbers& dimension_numbers);
+
+  // Creates a dot op with operands 'lhs' and 'rhs' that contracts dimension 1
+  // of the LHS with dimension 0 of the RHS with no batch dimensions.  Both LHS
+  // and the RHS must be of rank 2.
+  static std::unique_ptr<HloInstruction> CreateCanonicalDot(
+      const Shape& shape, HloInstruction* lhs, HloInstruction* rhs);
+
   // Creates a reduce-precision op, where operand is the data to reduce in
   // precision, and exponent_bits and mantissa_bits describe the precision to
   // reduce it to.
@@ -169,7 +288,8 @@ class HloInstruction {
 
   // Creates a cross replica sum op.
   static std::unique_ptr<HloInstruction> CreateCrossReplicaSum(
-      const Shape& shape, HloInstruction* operand);
+      const Shape& shape,
+      tensorflow::gtl::ArraySlice<HloInstruction*> operands);
 
   // Creates a conversion instruction, where operand is the data to convert and
   // shape is the target shape for the conversion.
@@ -421,7 +541,7 @@ class HloInstruction {
   Status RemoveControlDependencyTo(HloInstruction* instruction);
 
   // Returns the set of control predecessors (successors) of this
-  // instruction. Control predecessors (sucessors) must execute before (after)
+  // instruction. Control predecessors (successors) must execute before (after)
   // the current instruction.
   const std::vector<HloInstruction*>& control_predecessors() const {
     return control_predecessors_;
@@ -434,9 +554,9 @@ class HloInstruction {
   // Layout of the instructions' output array is not considered.
   bool Identical(
       const HloInstruction& other,
-      std::function<bool(const HloInstruction*, const HloInstruction*)>
+      const std::function<bool(const HloInstruction*, const HloInstruction*)>&
           eq_operands = std::equal_to<const HloInstruction*>(),
-      std::function<bool(const HloComputation*, const HloComputation*)>
+      const std::function<bool(const HloComputation*, const HloComputation*)>&
           eq_computations = std::equal_to<const HloComputation*>()) const {
     // An instruction is always identical to itself.
     if (this == &other) {
@@ -446,11 +566,19 @@ class HloInstruction {
     // Identical instruction must have the same opcode and identical operands.
     // In general, there is no need to check shape because shape is inferred
     // from the shape of the operands.
-    if (opcode() != other.opcode() ||
-        !ContainersEqual(operands(), other.operands(),
-                         std::move(eq_operands))) {
+    if (opcode() != other.opcode()) {
+      return false;
+    }
+    if (operands().size() != other.operands().size()) {
       return false;
     }
+    // Use an explicit loop rather than ContainerEquals, because copying around
+    // std::functions may be too expensive in some cases.
+    for (size_t i = 0; i < operands().size(); ++i) {
+      if (!eq_operands(operand(i), other.operand(i))) {
+        return false;
+      }
+    }
 
     return IdenticalSlowPath(other, eq_computations);
   }
@@ -540,16 +668,6 @@ class HloInstruction {
     return parameter_number_;
   }
 
-  const string& parameter_name() const {
-    CHECK_EQ(HloOpcode::kParameter, opcode_);
-    return parameter_name_;
-  }
-
-  void set_parameter_name(const string& str) {
-    CHECK_EQ(HloOpcode::kParameter, opcode_);
-    parameter_name_ = str;
-  }
-
   // Returns the dimension sizes or numbers associated with this instruction.
   //
   // Precondition: opcode() is one of: concatenate, reduce, broadcast, reshape,
@@ -637,18 +755,20 @@ class HloInstruction {
   string SignatureString() const;
 
   // Returns a debugging string that represents this instruction.
-  string ToString(bool compact_operands = false, bool include_metadata = true,
-                  bool include_large_constants = false) const;
+  //
+  // (We express the default options using an overload rather than a default
+  // param because gdb ignores default params, but does resolve overloads.)
+  string ToString() const { return ToString(HloPrintOptions()); }
+  string ToString(const HloPrintOptions& options) const;
 
   // Components of the ToString() representation:
 
   // Returns a string representation of the operand list.
-  string OperandsToString(bool compact, bool include_large_constants) const;
+  string OperandsToString(const HloPrintOptions& options) const;
 
   // Returns string representation of op-specific attributes.
-  std::vector<string> ExtraAttributesToString() const;
-
-  string ToStringNoMetadata() const { return ToString(false, false); }
+  std::vector<string> ExtraAttributesToString(
+      const HloPrintOptions& options) const;
 
   // As ToString, but returns a shorter string.
   string ToShortString() const;
@@ -676,13 +796,15 @@ class HloInstruction {
   // Returns feature_index field associated with the instruction. The index
   // represents the index of the feature dimension.
   //
-  // Precondition: opcode() == HloOpcode::kBatchNormTraining
+  // Precondition: opcode() is one of kBatchNormTraining, kBatchNormInference,
+  // or kBatchNormGrad.
   int64 feature_index() const { return feature_index_; }
 
   // Returns a epsilon value associated with the instruction. The is a small
   // number added to the variance to avoid divide-by-zero error.
   //
-  // Precondition: opcode() == HloOpcode::kBatchNormTraining
+  // Precondition: opcode() is one of kBatchNormTraining, kBatchNormInference,
+  // or kBatchNormGrad.
   float epsilon() const { return epsilon_; }
 
   // Returns the infeed configuration string. The infeed configuration includes
@@ -856,6 +978,17 @@ class HloInstruction {
   }
   const std::vector<int64>& slice_strides() const { return slice_strides_; }
 
+  // Returns the flag that describes whether a slice must be lowered into an
+  // offset into the original operand.
+  bool IsInPlaceSlice() const { return is_in_place_slice_; }
+
+  // Sets and returns the flag that describes whether a slice must be lowered
+  // into an offset into the original operand.
+  bool SetIsInPlaceSlice(bool value) {
+    is_in_place_slice_ = value;
+    return value;
+  }
+
   // Returns the size of the slice in the given dimension for a dynamic
   // slice node.
   //
@@ -912,9 +1045,28 @@ class HloInstruction {
     return *convolution_dimension_numbers_;
   }
 
+  FftType fft_type() const {
+    CHECK_EQ(HloOpcode::kFft, opcode_);
+    return fft_type_;
+  }
+
+  const std::vector<int64>& fft_length() const {
+    CHECK_EQ(HloOpcode::kFft, opcode_);
+    return fft_length_;
+  }
+
   // Returns the dump string of the convolution dimension numbers.
   string ConvolutionDimensionNumbersToString() const;
 
+  // Returns data on the dimension numbers used for a dot operation.
+  const DotDimensionNumbers& dot_dimension_numbers() const {
+    CHECK(dot_dimension_numbers_ != nullptr);
+    return *dot_dimension_numbers_;
+  }
+
+  // Returns the dump string of the dot dimension numbers.
+  string DotDimensionNumbersToString() const;
+
   // Returns the random distribution for this rng node.
   //
   // Precondition: opcode() == HloOpcode::kRng
@@ -1006,10 +1158,9 @@ class HloInstruction {
   std::tuple<bool, std::vector<int64>, std::vector<int64>>
   ReshapeMerelyInsertsOrDeletes1SizedDimensions() const;
 
-  // Returns a string identifier for this instruction. If no string identifier
-  // has been explicitly set, then the identifier is the serialized pointer to
-  // this instruction.
+  // Gets/sets the string identifier for this instruction.
   const string& name() const { return name_; }
+  void set_name(tensorflow::StringPiece name) { name_ = name.ToString(); }
 
   // Use the given NameUniquer to select a unique name for the instruction based
   // on the instruction's existing name.
@@ -1070,7 +1221,7 @@ class HloInstruction {
   // See comments on Identical().
   bool IdenticalSlowPath(
       const HloInstruction& other,
-      std::function<bool(const HloComputation*, const HloComputation*)>
+      const std::function<bool(const HloComputation*, const HloComputation*)>&
           eq_computations) const;
 
   // Creates an n-ary elementwise operation.
@@ -1173,11 +1324,23 @@ class HloInstruction {
   // Describes the dimension numbers used for a convolution.
   std::unique_ptr<ConvolutionDimensionNumbers> convolution_dimension_numbers_;
 
+  // Describes the dimension numbers used for a dot.
+  std::unique_ptr<DotDimensionNumbers> dot_dimension_numbers_;
+
+  // Describes FFT type for an FFT instruction.
+  FftType fft_type_ = FftType::FFT;
+
+  // Indicates the FFT length for an FFT instruction.
+  std::vector<int64> fft_length_;
+
   // Describes the [begin, end) index range for a slice.
   std::vector<int64> slice_starts_;
   std::vector<int64> slice_limits_;
   std::vector<int64> slice_strides_;
 
+  // Describes whether the slice can be lowered to an offset into the operand.
+  bool is_in_place_slice_ = false;
+
   // The bit sizes for a reduce-precision operation.
   int32 exponent_bits_ = 0;
   int32 mantissa_bits_ = 0;
@@ -1198,7 +1361,6 @@ class HloInstruction {
 
   // For parameter instructions this field holds the parameter number.
   int64 parameter_number_ = 0;
-  string parameter_name_;
 
   // Name of a global symbol to call, only present for kCustomCall.
   string custom_call_target_;
@@ -1267,9 +1429,12 @@ string ToString(HloInstruction::FusionKind kind);
 StatusOr<HloInstruction::FusionKind> StringToFusionKind(
     const string& kind_name);
 
-// Custom stringification functions for protos that live inside HloInstruction.
+// Custom (de)stringification functions for protos that live inside
+// HloInstruction.
 string PaddingConfigToString(const PaddingConfig& padding);
 string OpMetadataToString(const OpMetadata& metadata);
+string RandomDistributionToString(const RandomDistribution& distribution);
+StatusOr<RandomDistribution> StringToRandomDistribution(const string& name);
 
 std::ostream& operator<<(std::ostream& os, HloInstruction::FusionKind kind);
 
@@ -1295,6 +1460,10 @@ template <typename ValueT>
 using ConstHloInstructionMap =
     std::map<const HloInstruction*, ValueT, HloPtrComparator>;
 
+using HloInstructionSet = std::set<HloInstruction*, HloPtrComparator>;
+using ConstHloInstructionSet =
+    std::set<const HloInstruction*, HloPtrComparator>;
+
 }  // namespace xla
 
 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INSTRUCTION_H_
diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc
index 76b12fc8d3aadc0a874ce059851666fbcd6a4e94..3af3b29cedd06996dd4a175fdb1584c705ceea87 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc
@@ -1068,8 +1068,11 @@ TEST_F(HloInstructionTest, CloneOfFusionPreservesShape) {
       builder.AddInstruction(HloInstruction::CreateParameter(1, s2, "y"));
   HloInstruction* reshape =
       builder.AddInstruction(HloInstruction::CreateTranspose(s2t, y, {1, 0}));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
   HloInstruction* dot = builder.AddInstruction(
-      HloInstruction::CreateBinary(sout, HloOpcode::kDot, x, reshape));
+      HloInstruction::CreateDot(sout, x, reshape, dot_dnums));
 
   HloModule module(TestName());
   auto* computation = module.AddEntryComputation(builder.Build());
@@ -1088,48 +1091,6 @@ TEST_F(HloInstructionTest, CloneOfFusionPreservesShape) {
                                root2->operand(1)->operand(0)->shape()));
 }
 
-TEST_F(HloInstructionTest, IsRandomFusable) {
-  auto shape = ShapeUtil::MakeShape(F32, {2, 2});
-  {
-    auto builder = HloComputation::Builder(TestName());
-    auto hlo_module = CreateNewModule();
-    auto const0 = builder.AddInstruction(HloInstruction::CreateConstant(
-        Literal::CreateR0<float>(0.0)));
-    auto const1 = builder.AddInstruction(HloInstruction::CreateConstant(
-        Literal::CreateR0<float>(1.0)));
-    auto rng = builder.AddInstruction(HloInstruction::CreateRng(
-        shape, RandomDistribution::RNG_NORMAL, {const0, const1}));
-
-    auto* computation = hlo_module->AddEntryComputation(builder.Build());
-    computation->CreateFusionInstruction({rng, const0, const1},
-      HloInstruction::FusionKind::kLoop);
-
-    auto* root = computation->root_instruction();
-
-    EXPECT_EQ(HloOpcode::kFusion, root->opcode());
-  }
-  {
-    auto builder = HloComputation::Builder(TestName());
-    auto hlo_module = CreateNewModule();
-    auto const0 = builder.AddInstruction(HloInstruction::CreateConstant(
-        Literal::CreateR0<float>(0.0)));
-    auto const1 = builder.AddInstruction(HloInstruction::CreateConstant(
-        Literal::CreateR0<float>(1.0)));
-    auto rng = builder.AddInstruction(HloInstruction::CreateRng(
-        shape, RandomDistribution::RNG_NORMAL, {const0, const1}));
-    builder.AddInstruction(HloInstruction::CreateUnary(
-        shape, HloOpcode::kNegate, rng));
-    auto* computation = hlo_module->AddEntryComputation(builder.Build());
-    computation->CreateFusionInstruction({rng, const0, const1},
-      HloInstruction::FusionKind::kLoop);
-
-    auto* root = computation->root_instruction();
-
-    EXPECT_EQ(HloOpcode::kFusion, root->operand(0)->opcode());
-  }
-}
-
-
 TEST_F(HloInstructionTest, CloneSuffixNames) {
   // Test that the suffix string added to cloned instructions is not
   // duplicated. Rather a numeric incrementing value should be appended. That
@@ -1169,7 +1130,7 @@ TEST_F(HloInstructionTest, CloneSuffixNames) {
 }
 
 TEST_F(HloInstructionTest, Stringification) {
-  // Tests stringification of a simple op, fusion, and while.
+  // Tests stringification of a simple op, fusion, while, and conditional.
   const Shape s1 = ShapeUtil::MakeShape(F32, {5, 10});
   const Shape s2 = ShapeUtil::MakeShape(F32, {20, 10});
   const Shape s2t = ShapeUtil::MakeShape(F32, {10, 20});
@@ -1182,12 +1143,17 @@ TEST_F(HloInstructionTest, Stringification) {
       builder.AddInstruction(HloInstruction::CreateParameter(1, s2, "y"));
   HloInstruction* reshape =
       builder.AddInstruction(HloInstruction::CreateTranspose(s2t, y, {1, 0}));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
   HloInstruction* dot = builder.AddInstruction(
-      HloInstruction::CreateBinary(sout, HloOpcode::kDot, x, reshape));
+      HloInstruction::CreateDot(sout, x, reshape, dot_dnums));
+
+  auto options = HloPrintOptions().set_print_metadata(false);
 
-  EXPECT_EQ(dot->ToString(false, false),
+  EXPECT_EQ(dot->ToString(options),
             "%dot = f32[5,20]{1,0} dot(f32[5,10]{1,0} %x, f32[10,20]{1,0} "
-            "%transpose)");
+            "%transpose), lhs_contracting_dims={1}, rhs_contracting_dims={0}");
 
   HloModule module(TestName());
   auto* computation = module.AddEntryComputation(builder.Build());
@@ -1195,15 +1161,25 @@ TEST_F(HloInstructionTest, Stringification) {
       {dot, reshape}, HloInstruction::FusionKind::kTransposeDot);
 
   EXPECT_EQ(
-      fusion->ToString(false, false),
-      "%fusion = f32[5,20]{1,0} fusion(f32[5,10]{1,0} %x, "
+      fusion->ToString(options),
+      "%dot_fusion = f32[5,20]{1,0} fusion(f32[5,10]{1,0} %x, "
       "f32[20,10]{1,0} %y), kind=kTransposeDot, calls=%fused_computation");
 
   HloInstruction* loop = builder.AddInstruction(
       HloInstruction::CreateWhile(sout, computation, computation, x));
-  EXPECT_EQ(loop->ToString(false, false),
+  EXPECT_EQ(loop->ToString(options),
             "%while = f32[5,20]{1,0} while(f32[5,10]{1,0} %x), "
             "condition=%TransposeDot, body=%TransposeDot");
+
+  auto pred = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<bool>(true)));
+  HloInstruction* conditional =
+      builder.AddInstruction(HloInstruction::CreateConditional(
+          sout, pred, x, computation, x, computation));
+  EXPECT_EQ(conditional->ToString(options),
+            "%conditional = f32[5,20]{1,0} conditional(pred[] %constant, "
+            "f32[5,10]{1,0} %x, f32[5,10]{1,0} %x), "
+            "true_computation=%TransposeDot, false_computation=%TransposeDot");
 }
 
 }  // namespace
diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc
index faaf73ea1ce5c77b0522cb3276b4efd78aabde16..58bb94221149c9a8b550add900dff52a53565985 100644
--- a/tensorflow/compiler/xla/service/hlo_module.cc
+++ b/tensorflow/compiler/xla/service/hlo_module.cc
@@ -35,14 +35,15 @@ namespace xla {
 HloModule::HloModule(const string& name,
                      const VersionedComputationHandle& entry_computation_handle,
                      const HloModuleConfig& config)
-    : name_(name),
+    : name_(NameUniquer::GetSanitizedName(name)),
       config_(config),
       has_entry_computation_handle_(true),
       entry_computation_handle_(entry_computation_handle) {}
 
-HloModule::HloModule(const string& name) : name_(name) {}
+HloModule::HloModule(const string& name)
+    : name_(NameUniquer::GetSanitizedName(name)) {}
 HloModule::HloModule(const string& name, const HloModuleConfig& config)
-    : name_(name), config_(config) {}
+    : name_(NameUniquer::GetSanitizedName(name)), config_(config) {}
 
 HloComputation* HloModule::AddComputationInternal(
     std::unique_ptr<HloComputation> computation, bool is_entry,
@@ -170,17 +171,14 @@ void HloModule::ReplaceComputations(
   computations_ = std::move(new_computations);
 }
 
-string HloModule::ToString(bool include_large_constants) const {
+string HloModule::ToString(const HloPrintOptions& options) const {
   std::ostringstream s;
-  s << "HloModule " << name() << ":\n\n";
+  s << "HloModule " << name() << "\n\n";
   for (const HloComputation* computation : MakeComputationPostOrder()) {
     if (computation == entry_computation()) {
       s << "ENTRY ";
     }
-    s << computation->ToString(
-             /*nested_level=*/0,
-             /*include_large_constants=*/include_large_constants)
-      << "\n\n";
+    s << computation->ToString(options) << "\n\n";
   }
   return s.str();
 }
@@ -232,8 +230,8 @@ StatusOr<ProgramShape> ProgramShapeFromProto(const HloModuleProto& module) {
           << "Entry computation has more than one parameter instruction "
              "with parameter number "
           << instruction.parameter_number();
-      parameters[instruction.parameter_number()] = {
-          instruction.parameter_name(), &instruction.shape()};
+      parameters[instruction.parameter_number()] = {instruction.name(),
+                                                    &instruction.shape()};
     }
   }
   TF_RET_CHECK(root != nullptr)
@@ -459,6 +457,14 @@ HloInstruction* HloModule::OutlineExpressionFromComputation(
   return call;
 }
 
+int64 HloModule::instruction_count() const {
+  int64 n = 0;
+  for (const auto& computation : computations_) {
+    n += computation->instruction_count();
+  }
+  return n;
+}
+
 std::list<HloComputation*> HloModule::MakeComputationPostOrder() const {
   // First determine all root computations by building a set of nonroot
   // computations (computations which are called by an instruction in the
diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h
index 5141e7bc8d4cf0ef4cd83310772e0c5d66b5da12..e377654d024819d00f73f43a70d363bd902dc981 100644
--- a/tensorflow/compiler/xla/service/hlo_module.h
+++ b/tensorflow/compiler/xla/service/hlo_module.h
@@ -98,6 +98,10 @@ class HloModule {
     return config_.mutable_entry_computation_layout();
   }
 
+  ComputationLayout entry_computation_layout() const {
+    return config_.entry_computation_layout();
+  }
+
   const VersionedComputationHandle& entry_computation_handle() const {
     return entry_computation_handle_;
   }
@@ -125,6 +129,9 @@ class HloModule {
   // Gets the number of computations in this module.
   int64 computation_count() const { return computations_.size(); }
 
+  // Gets the number of instructions in this module.
+  int64 instruction_count() const;
+
   // Compute and return a post order of all computations in the module. The sort
   // is defined like so: if computation A has an instruction which calls
   // computation B, then A will appear after B in the sort.
@@ -143,7 +150,12 @@ class HloModule {
 
   const HloModuleConfig& config() const { return config_; }
 
-  string ToString(bool include_large_constants = false) const;
+  // Return a string representation of the module.
+  //
+  // (We express the default options using an overload rather than a default
+  // param because gdb ignores default params, but does resolve overloads.)
+  string ToString() const { return ToString(HloPrintOptions()); }
+  string ToString(const HloPrintOptions& options) const;
 
   // Convert an HloModule to or from a proto.
   HloModuleProto ToProto() const;
diff --git a/tensorflow/compiler/xla/service/hlo_module_test.cc b/tensorflow/compiler/xla/service/hlo_module_test.cc
index bf6440d66cac0d3a929c377202b212aba262f887..0f5d3dccb74e6e3c88e51685392171f940c03596 100644
--- a/tensorflow/compiler/xla/service/hlo_module_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_module_test.cc
@@ -135,14 +135,15 @@ TEST_F(HloModuleTest, LargeConstantToString) {
   module->AddEntryComputation(builder.Build());
 
   EXPECT_EQ(
-      "HloModule LargeConstantToString:\n\nENTRY %Constant () -> f32[16] {\n  "
+      "HloModule LargeConstantToString\n\nENTRY %Constant () -> f32[16] {\n  "
       "ROOT %constant = f32[16]{0} constant({...})\n}\n\n",
-      module->ToString(/*include_large_constants=*/false));
+      module->ToString(HloPrintOptions().set_print_large_constants(false)));
+
   EXPECT_EQ(
-      "HloModule LargeConstantToString:\n\nENTRY %Constant () -> f32[16] {\n  "
+      "HloModule LargeConstantToString\n\nENTRY %Constant () -> f32[16] {\n  "
       "ROOT %constant = f32[16]{0} constant({42, 42, 42, 42, 42, 42, 42, 42, "
       "42, 42, 42, 42, 42, 42, 42, 42})\n}\n\n",
-      module->ToString(/*include_large_constants=*/true));
+      module->ToString(HloPrintOptions().set_print_large_constants(true)));
 }
 
 }  // namespace
diff --git a/tensorflow/compiler/xla/service/hlo_opcode.h b/tensorflow/compiler/xla/service/hlo_opcode.h
index f3f79357582ac7661a532e94031acdbca0b86784..3d64523a79fc50638fdf378b5d521a5cd4482b90 100644
--- a/tensorflow/compiler/xla/service/hlo_opcode.h
+++ b/tensorflow/compiler/xla/service/hlo_opcode.h
@@ -73,6 +73,7 @@ namespace xla {
   V(kDynamicUpdateSlice, "dynamic-update-slice")             \
   V(kEq, "equal-to", kHloOpcodeIsComparison)                 \
   V(kExp, "exponential")                                     \
+  V(kFft, "fft")                                             \
   V(kFloor, "floor")                                         \
   V(kFusion, "fusion", kHloOpcodeIsVariadic)                 \
   V(kGe, "greater-than-or-equal-to", kHloOpcodeIsComparison) \
diff --git a/tensorflow/compiler/xla/service/hlo_profile_printer.h b/tensorflow/compiler/xla/service/hlo_profile_printer.h
index 316753a82ab2a9b5459b71c723a8e817ee2cacbf..2f056490ae027872570f7a0821ee63114f49fab8 100644
--- a/tensorflow/compiler/xla/service/hlo_profile_printer.h
+++ b/tensorflow/compiler/xla/service/hlo_profile_printer.h
@@ -65,9 +65,11 @@ class HloProfilePrinter {
 
   HloProfilePrinter(
       HloComputationInfo* computation_infos, int64 computation_infos_size,
+      int64 profile_counters_size,
       std::function<void(HloComputationInfo*, int64)> deleter = nullptr)
       : computation_infos_(computation_infos),
         computation_infos_size_(computation_infos_size),
+        profile_counters_size_(profile_counters_size),
         deleter_(std::move(deleter)) {}
 
   HloProfilePrinter(HloProfilePrinter&& other) {
@@ -79,10 +81,13 @@ class HloProfilePrinter {
   HloProfilePrinter(const HloProfilePrinter&) = delete;
   HloProfilePrinter& operator=(const HloProfilePrinter&) = delete;
 
-  // Convert the profile counter sequence `counters` to a human readable string
+  // Converts the profile counter sequence `counters` to a human readable string
   // representation.
   string ToString(const int64* counters, double clock_rate_ghz) const;
 
+  // Returns the size of the profile buffer expected by this printer.
+  int64 profile_counters_size() const { return profile_counters_size_; }
+
   ~HloProfilePrinter();
 
  private:
@@ -90,6 +95,7 @@ class HloProfilePrinter {
   // is manifested as the deleter_ function.
   HloComputationInfo* computation_infos_ = nullptr;
   int64 computation_infos_size_ = 0;
+  int64 profile_counters_size_ = 0;
   std::function<void(HloComputationInfo*, int64)> deleter_;
 };
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_proto_util.cc b/tensorflow/compiler/xla/service/hlo_proto_util.cc
index 727ad0178c6227cd2e64c31a4618e781671b9393..78e6a101c10a1e812e3e2631d520139fd0bc425c 100644
--- a/tensorflow/compiler/xla/service/hlo_proto_util.cc
+++ b/tensorflow/compiler/xla/service/hlo_proto_util.cc
@@ -19,15 +19,20 @@ namespace xla {
 
 HloProto MakeHloProto(const HloModule& module,
                       const BufferAssignment& assignment) {
-  HloModuleProto proto_module = module.ToProto();
   HloOrderingProto proto_ordering =
       assignment.liveness().hlo_ordering().ToProto();
   BufferAssignmentProto proto_assignment = assignment.ToProto();
-  HloProto proto;
-  proto.mutable_hlo_module()->Swap(&proto_module);
+  HloProto proto = MakeHloProto(module);
   proto.mutable_hlo_ordering()->Swap(&proto_ordering);
   proto.mutable_buffer_assignment()->Swap(&proto_assignment);
   return proto;
 }
 
+HloProto MakeHloProto(const HloModule& module) {
+  HloModuleProto proto_module = module.ToProto();
+  HloProto proto;
+  proto.mutable_hlo_module()->Swap(&proto_module);
+  return proto;
+}
+
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_proto_util.h b/tensorflow/compiler/xla/service/hlo_proto_util.h
index 603259a11fcdca59f58653d9a7a164c983711a57..320288fdb9aa0810b306b1d78bd1ff4cfc366ed2 100644
--- a/tensorflow/compiler/xla/service/hlo_proto_util.h
+++ b/tensorflow/compiler/xla/service/hlo_proto_util.h
@@ -31,6 +31,10 @@ namespace xla {
 HloProto MakeHloProto(const HloModule& module,
                       const BufferAssignment& assignment);
 
+// Returns a serialized representation of the HLO state, but buffer assignment
+// will not be included in the output.
+HloProto MakeHloProto(const HloModule& module);
+
 }  // namespace xla
 
 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_PROTO_UTIL_H_
diff --git a/tensorflow/compiler/xla/service/hlo_reachability.h b/tensorflow/compiler/xla/service/hlo_reachability.h
index d7bdac9c86579f19afbba133772c2c50894853d1..553ec11f6f9a2997ab7113f9b8241e04c7fe20d5 100644
--- a/tensorflow/compiler/xla/service/hlo_reachability.h
+++ b/tensorflow/compiler/xla/service/hlo_reachability.h
@@ -30,11 +30,17 @@ namespace xla {
 
 class HloInstruction;
 
-// A class for computing and representing reachability between HloInstructions.
+// A class for representing reachability between HloInstructions.
+//
+// !!! THIS CLASS DOES NOT COMPUTE REACHABILITY !!! It has an adjacency matrix
+// and it is up to the user of the class to set the adjacency matrix such that
+// it represents reachability, i.e. such that it is transitive. That the graph
+// be transitive is thus not an invariant of this class, but it is required for
+// the name of the class and its methods to make sense.
 class HloReachabilityMap {
  public:
-  // Sets up an empty reachable matrix for the full set of instructions
-  // specified in 'instructions'.
+  // Sets up a graph with no edges and where the nodes correspond to the given
+  // instructions.
   explicit HloReachabilityMap(const std::list<HloInstruction*>& instructions);
 
   // Set the reachability set of 'instruction' to the union of the reachability
@@ -42,17 +48,33 @@ class HloReachabilityMap {
   // 'x' is not 'instruction' will return true iff IsReachable(x, input) is true
   // for some 'input' in 'inputs'. Also sets 'instruction' to be reachable from
   // itself. Returns whether the reachability set of 'instruction' changed.
+  //
+  // !!! THIS FUNCTION DOES NOT COMPUTE REACHABILITY !!! It sets the adjacency
+  // vector in the internal graph of this HloReachabilityMap for the given
+  // instruction and does not transitively update any other part of the
+  // adjacency matrix.
   bool SetReachabilityToUnion(
       tensorflow::gtl::ArraySlice<const HloInstruction*> inputs,
       const HloInstruction* instruction);
 
   // Sets entry so that IsReachable(a, b) will return true
+  //
+  // !!! THIS FUNCTION DOES NOT COMPUTE REACHABILITY !!! It sets the adjacency
+  // matrix in the internal graph of this HloReachabilityMap to have an edge
+  // from a to b and does not transitively update any other part of the
+  // adjacency matrix.
   void SetReachable(const HloInstruction* a, const HloInstruction* b);
 
   // Returns true if "b" is reachable from "a"
+  //
+  // Note that this function only correctly answers queries about reachability
+  // if the set of edges that have been provided to this class are transitive.
   bool IsReachable(const HloInstruction* a, const HloInstruction* b) const;
 
   // Returns true if "b" is reachable from "a" or "a" is reachable from "b"
+  //
+  // Note that this function only correctly answers queries about reachability
+  // if the set of edges that have been provided to this class are transitive.
   bool IsConnected(const HloInstruction* a, const HloInstruction* b) const;
 
  private:
diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
index 017f996bc4d1902c81f96425b7bc28d52622df0f..c6b4dc0368d92fd477decdfb38045f74f8696803 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
@@ -566,7 +566,9 @@ Status MemoryUsageTracker::BeginInstruction(Item* item) {
   VLOG(3) << "  memory usage = " << memory_usage_;
   VLOG(10) << ToString();
 
-  DCHECK(Check());
+  if (VLOG_IS_ON(1)) {
+    DCHECK(Check());
+  }
   return Status::OK();
 }
 
@@ -603,8 +605,9 @@ Status MemoryUsageTracker::EndInstruction() {
   VLOG(3) << "  memory usage = " << memory_usage_;
   VLOG(10) << ToString();
 
-  DCHECK(Check());
-
+  if (VLOG_IS_ON(1)) {
+    DCHECK(Check());
+  }
   return Status::OK();
 }
 
@@ -1021,7 +1024,9 @@ StatusOr<bool> HloRematerialization::RematerializeComputation(
 
       HloInstruction* best = best_item->instruction;
       VLOG(1) << "Rematerializing instruction " << best->name() << " (saving "
-              << memory_tracker.MemoryReducedIfRematerialized(best_item) << ")";
+              << HumanReadableNumBytes(
+                     memory_tracker.MemoryReducedIfRematerialized(best_item))
+              << ")";
       changed = true;
       remat_count++;
 
@@ -1101,8 +1106,8 @@ StatusOr<bool> HloRematerialization::RematerializeComputation(
         net_instructions_added++;
       }
 
-      VLOG(3) << "memory_usage after rematerialization = "
-              << memory_tracker.memory_usage();
+      VLOG(1) << "memory_usage after rematerialization = "
+              << HumanReadableNumBytes(memory_tracker.memory_usage());
     }
 
     const CallSite* callsite = call_graph_node.GetCallSite(instruction);
@@ -1208,11 +1213,12 @@ StatusOr<bool> HloRematerialization::Run(
 
   XLA_VLOG_LINES(3, "Before HloRematerialization:\n" + module->ToString());
   // Create initial sequence of HLO instructions.
-  TF_ASSIGN_OR_RETURN(*sequence,
-                      CreateMemoryMinimizingSequence(
-                          *module, [this](const LogicalBuffer& buffer) {
-                            return size_function_(buffer.shape());
-                          }));
+  TF_ASSIGN_OR_RETURN(*sequence, CreateMemoryMinimizingSequence(
+                                     *module,
+                                     [this](const LogicalBuffer& buffer) {
+                                       return size_function_(buffer.shape());
+                                     },
+                                     scheduler_algorithm_));
   // Compute peak memory usage of all computations in the module called in a
   // sequential context.
   call_graph_ = CallGraph::Build(module);
@@ -1313,9 +1319,10 @@ StatusOr<bool> HloRematerialization::Run(
 /* static */ StatusOr<bool> HloRematerialization::RematerializeAndSchedule(
     const HloRematerialization::ShapeSizeFunction& size_function,
     int64 memory_limit_bytes, HloModule* hlo_module,
+    SchedulerAlgorithm scheduler_algorithm,
     SequentialHloOrdering::HloModuleSequence* sequence,
     RematerializationSizes* sizes) {
-  HloRematerialization remat(size_function);
+  HloRematerialization remat(scheduler_algorithm, size_function);
   return remat.Run(hlo_module, sequence, memory_limit_bytes, sizes);
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.h b/tensorflow/compiler/xla/service/hlo_rematerialization.h
index 11f79a6d4158c6251c2faf63e9cac4e742440863..52553439033a3bcfa4b472f13f9cd4b1ecf5ed96 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization.h
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization.h
@@ -20,6 +20,7 @@
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_scheduling.h"
 #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h"
 
 namespace xla {
@@ -65,12 +66,15 @@ class HloRematerialization {
   // code generation.
   static StatusOr<bool> RematerializeAndSchedule(
       const ShapeSizeFunction& size_function, int64 memory_limit_bytes,
-      HloModule* hlo_module, SequentialHloOrdering::HloModuleSequence* sequence,
+      HloModule* hlo_module, SchedulerAlgorithm scheduler_algorithm,
+      SequentialHloOrdering::HloModuleSequence* sequence,
       RematerializationSizes* sizes = nullptr);
 
  protected:
-  HloRematerialization(const ShapeSizeFunction& size_function)
-      : size_function_(size_function) {}
+  HloRematerialization(SchedulerAlgorithm scheduler_algorithm,
+                       const ShapeSizeFunction& size_function)
+      : scheduler_algorithm_(scheduler_algorithm),
+        size_function_(size_function) {}
   ~HloRematerialization() {}
 
   // Runs rematerialization on the given module. Returns whether the module was
@@ -103,6 +107,9 @@ class HloRematerialization {
   StatusOr<int64> CalledComputationsMemoryUsage(
       const HloInstruction* instruction) const;
 
+  // Selects an algorithm to use for HLO scheduling.
+  SchedulerAlgorithm scheduler_algorithm_;
+
   // Function which computes the size of the top-level buffer of a shape.
   const ShapeSizeFunction size_function_;
 
diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc b/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc
index d88aa4bb567c6c5f6eab54f12239bf7040339c39..1b7d26dde501a6a0955d62ea0938e0683a32d49d 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc
@@ -158,11 +158,11 @@ TEST_F(HloRematerializationTest, SingleComputation) {
   SequentialHloOrdering::HloModuleSequence sequence;
   // Computation requires 16KB without rematerialization, but uses only 12KB
   // with rematerialization so pick a memory limit between these values (14KB).
-  TF_ASSERT_OK_AND_ASSIGN(
-      bool changed,
-      HloRematerialization::RematerializeAndSchedule(
-          ByteSizeOf,
-          /*memory_limit_bytes=*/14 * 1024, module.get(), &sequence));
+  TF_ASSERT_OK_AND_ASSIGN(bool changed,
+                          HloRematerialization::RematerializeAndSchedule(
+                              ByteSizeOf,
+                              /*memory_limit_bytes=*/14 * 1024, module.get(),
+                              SchedulerAlgorithm::kAuto, &sequence));
   EXPECT_TRUE(changed);
 
   // Root should not have changed.
@@ -191,11 +191,11 @@ TEST_F(HloRematerializationTest, SingleComputationNoRematerialization) {
   EXPECT_EQ(computation->instruction_count(), 7);
 
   SequentialHloOrdering::HloModuleSequence sequence;
-  TF_ASSERT_OK_AND_ASSIGN(
-      bool changed,
-      HloRematerialization::RematerializeAndSchedule(
-          ByteSizeOf,
-          /*memory_limit_bytes=*/20 * 1024, module.get(), &sequence));
+  TF_ASSERT_OK_AND_ASSIGN(bool changed,
+                          HloRematerialization::RematerializeAndSchedule(
+                              ByteSizeOf,
+                              /*memory_limit_bytes=*/20 * 1024, module.get(),
+                              SchedulerAlgorithm::kAuto, &sequence));
 
   // No instructions should have been materialized.
   EXPECT_FALSE(changed);
@@ -232,11 +232,11 @@ TEST_F(HloRematerializationTest, RematerializeAroundWhile) {
   // while so the peak memory use of the module is 18KB. Set the memory limit a
   // bit lower (17KB) to force rematerialization of the entry computation.
   SequentialHloOrdering::HloModuleSequence sequence;
-  TF_ASSERT_OK_AND_ASSIGN(
-      bool changed,
-      HloRematerialization::RematerializeAndSchedule(
-          ByteSizeOf,
-          /*memory_limit_bytes=*/17 * 1024, module.get(), &sequence));
+  TF_ASSERT_OK_AND_ASSIGN(bool changed,
+                          HloRematerialization::RematerializeAndSchedule(
+                              ByteSizeOf,
+                              /*memory_limit_bytes=*/17 * 1024, module.get(),
+                              SchedulerAlgorithm::kAuto, &sequence));
   EXPECT_TRUE(changed);
 
   // Only the entry computation should have a rematerialized instruction added.
@@ -268,11 +268,11 @@ TEST_F(HloRematerializationTest, RematerializeEntryAndWhileBody) {
   EXPECT_EQ(body_computation->instruction_count(), 7);
 
   SequentialHloOrdering::HloModuleSequence sequence;
-  TF_ASSERT_OK_AND_ASSIGN(
-      bool changed,
-      HloRematerialization::RematerializeAndSchedule(
-          ByteSizeOf,
-          /*memory_limit_bytes=*/15 * 1024, module.get(), &sequence));
+  TF_ASSERT_OK_AND_ASSIGN(bool changed,
+                          HloRematerialization::RematerializeAndSchedule(
+                              ByteSizeOf,
+                              /*memory_limit_bytes=*/15 * 1024, module.get(),
+                              SchedulerAlgorithm::kAuto, &sequence));
   EXPECT_TRUE(changed);
 
   // Both computations should have a rematerialized instruction added.
@@ -310,11 +310,11 @@ TEST_F(HloRematerializationTest, RematerializeNestedComputations) {
   // If all computations are maximally rematerialized then peak memory usage is
   // ~12K so pick something slightly larger.
   SequentialHloOrdering::HloModuleSequence sequence;
-  TF_ASSERT_OK_AND_ASSIGN(
-      bool changed,
-      HloRematerialization::RematerializeAndSchedule(
-          ByteSizeOf,
-          /*memory_limit_bytes=*/13 * 1024, module.get(), &sequence));
+  TF_ASSERT_OK_AND_ASSIGN(bool changed,
+                          HloRematerialization::RematerializeAndSchedule(
+                              ByteSizeOf,
+                              /*memory_limit_bytes=*/13 * 1024, module.get(),
+                              SchedulerAlgorithm::kAuto, &sequence));
   EXPECT_TRUE(changed);
 
   // All computations should have a rematerialized instruction added.
@@ -323,6 +323,76 @@ TEST_F(HloRematerializationTest, RematerializeNestedComputations) {
   EXPECT_EQ(inner_computation->instruction_count(), 8);
 }
 
+TEST_F(HloRematerializationTest, RngNotRematerialized) {
+  // Test that a single rng is not rematerialized:
+  //
+  // Entry computation:
+  //   F32[] %param = {...}
+  //   F32[1024] rng = rng(param)
+  //   F32[1024] tanh = tanh(rng)
+  //   F32[1024] exp = exp(rng)
+  //   F32[1024] add_0 = add(rng, tanh)              // LIVE: add_0 + rng +
+  //                                                 //       tanh + exp
+  //
+  //   F32[1024] add_1 = add(rng, add(exp, add_0))   // LIVE: add_1 + add_0 +
+  //                                                 //       rng + tanh + exp
+  //
+  //   F32[1024] add_2 = add(rng, add(tanh, add_1))  // LIVE: add_2 + add_1 +
+  //                                                 //       rng + tanh + exp
+  auto module = CreateNewModule();
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape_, "param"));
+  auto rng = builder.AddInstruction(HloInstruction::CreateRng(
+      vec1024_shape_, RandomDistribution::RNG_UNIFORM, {param, param}));
+  auto tanh = builder.AddInstruction(
+      HloInstruction::CreateUnary(vec1024_shape_, HloOpcode::kTanh, rng));
+  auto exp = builder.AddInstruction(
+      HloInstruction::CreateUnary(vec1024_shape_, HloOpcode::kExp, rng));
+  auto add_0 = builder.AddInstruction(
+      HloInstruction::CreateBinary(vec1024_shape_, HloOpcode::kAdd, rng, tanh));
+  auto add_1 = builder.AddInstruction(HloInstruction::CreateBinary(
+      vec1024_shape_, HloOpcode::kAdd, rng,
+      builder.AddInstruction(HloInstruction::CreateBinary(
+          vec1024_shape_, HloOpcode::kAdd, exp, add_0))));
+  builder.AddInstruction(HloInstruction::CreateBinary(
+      vec1024_shape_, HloOpcode::kAdd, rng,
+      builder.AddInstruction(HloInstruction::CreateBinary(
+          vec1024_shape_, HloOpcode::kAdd, tanh, add_1))));
+  HloComputation* entry_computation =
+      module->AddEntryComputation(builder.Build());
+
+  auto count_rngs = [](const HloComputation* computation) {
+    int64 rng_count = 0;
+    for (auto* instruction : computation->instructions()) {
+      if (instruction->opcode() == HloOpcode::kRng) {
+        ++rng_count;
+      }
+    }
+    return rng_count;
+  };
+  // Before rematerialization there should be a single broadcast rng in
+  // the graph.
+  ASSERT_EQ(count_rngs(entry_computation), 1);
+  const int64 original_instruction_count =
+      entry_computation->instruction_count();
+  SequentialHloOrdering::HloModuleSequence sequence;
+  // Pick a memory limit some where between 24KB (initial peak memory including
+  // parameter and output) and 20KB (peak memory possible with
+  // rematerialization).
+  TF_ASSERT_OK_AND_ASSIGN(
+      bool changed, HloRematerialization::RematerializeAndSchedule(
+                        ByteSizeOf,
+                        /*memory_limit_bytes=*/4 * ByteSizeOf(vec1024_shape_),
+                        module.get(), SchedulerAlgorithm::kAuto, &sequence));
+  EXPECT_TRUE(changed);
+  // The rng should not have been rematerialized.
+  EXPECT_EQ(count_rngs(entry_computation), 1);
+  // There should have been rematerialization.
+  EXPECT_GT(entry_computation->instruction_count(), original_instruction_count);
+}
+
 TEST_F(HloRematerializationTest, InstructionRematerializedMultipleTimes) {
   // Test that a single instruction is rematerialized several times. Module:
   //
@@ -406,11 +476,11 @@ TEST_F(HloRematerializationTest, InstructionRematerializedMultipleTimes) {
   // Pick a memory limit some where between 24KB (initial peak memory including
   // parameter and output) and 20KB (peak memory possible with
   // rematerialization).
-  TF_ASSERT_OK_AND_ASSIGN(
-      bool changed,
-      HloRematerialization::RematerializeAndSchedule(
-          ByteSizeOf,
-          /*memory_limit_bytes=*/22 * 1024, module.get(), &sequence));
+  TF_ASSERT_OK_AND_ASSIGN(bool changed,
+                          HloRematerialization::RematerializeAndSchedule(
+                              ByteSizeOf,
+                              /*memory_limit_bytes=*/22 * 1024, module.get(),
+                              SchedulerAlgorithm::kAuto, &sequence));
   EXPECT_TRUE(changed);
 
   // The broadcast should have been rematerialized 3 times.
@@ -503,11 +573,11 @@ TEST_P(IndirectUseTest, IndirectUseNotRematerialized) {
   // Pick a memory limit some where between 24KB (initial peak memory including
   // parameter and output) and 20KB (peak memory possible with
   // rematerialization).
-  TF_ASSERT_OK_AND_ASSIGN(
-      bool changed,
-      HloRematerialization::RematerializeAndSchedule(
-          ByteSizeOf,
-          /*memory_limit_bytes=*/22 * 1024, module.get(), &sequence));
+  TF_ASSERT_OK_AND_ASSIGN(bool changed,
+                          HloRematerialization::RematerializeAndSchedule(
+                              ByteSizeOf,
+                              /*memory_limit_bytes=*/22 * 1024, module.get(),
+                              SchedulerAlgorithm::kAuto, &sequence));
   // Rematerialization should only occur if the rematerializable instruction has
   // no indirect uses.
   if (indirectly_used) {
diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc
index 6b6d48233a7da50927207b8334186ee5105db268..204a8bf748685af71ac82be0d102cf7f76c7b38f 100644
--- a/tensorflow/compiler/xla/service/hlo_runner.cc
+++ b/tensorflow/compiler/xla/service/hlo_runner.cc
@@ -39,6 +39,14 @@ namespace se = ::perftools::gputools;
 
 namespace xla {
 
+/*static*/ StatusOr<std::unique_ptr<HloModule>>
+HloRunner::CreateModuleFromString(const tensorflow::StringPiece hlo_string,
+                                  const DebugOptions& debug_options) {
+  HloModuleConfig config;
+  config.set_debug_options(debug_options);
+  return tools::Parse(hlo_string, config);
+}
+
 /*static*/ StatusOr<std::unique_ptr<HloModule>>
 HloRunner::ReadModuleFromHloProtoFile(const std::string& filename,
                                       const DebugOptions& debug_options) {
@@ -104,17 +112,12 @@ HloRunner::HloRunner(se::Platform* platform) {
   VLOG(1) << "Created HloRunner for platform: " << platform->Name();
 }
 
-HloRunner::~HloRunner() {
-  // Deallocate all the memory allocated during the tests.
-  for (auto& allocation : allocations_) {
-    backend().default_stream_executor()->Deallocate(&allocation);
-  }
-}
+HloRunner::~HloRunner() {}
 
-StatusOr<se::DeviceMemoryBase> HloRunner::Execute(
+StatusOr<std::unique_ptr<Literal>> HloRunner::ExecuteInternal(
     std::unique_ptr<HloModule> module,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments,
-    Shape* result_shape, bool run_hlo_passes) {
+    const tensorflow::gtl::ArraySlice<Literal*> arguments,
+    bool run_hlo_passes) {
   if (run_hlo_passes) {
     TF_ASSIGN_OR_RETURN(
         module, backend().compiler()->RunHloPasses(
@@ -129,6 +132,7 @@ StatusOr<se::DeviceMemoryBase> HloRunner::Execute(
   stream.Init();
 
   ExecutableRunOptions run_options;
+  run_options.set_device_ordinal(backend().default_device_ordinal());
   run_options.set_stream(&stream);
   run_options.set_allocator(backend().memory_allocator());
   run_options.set_inter_op_thread_pool(backend().inter_op_thread_pool());
@@ -138,73 +142,43 @@ StatusOr<se::DeviceMemoryBase> HloRunner::Execute(
   ServiceExecutableRunOptions service_run_options(
       run_options, backend().StreamBorrower(),
       backend().inter_op_thread_pool());
-  TF_ASSIGN_OR_RETURN(
-      se::DeviceMemoryBase result,
-      executable->ExecuteOnStream(&service_run_options, arguments,
-                                  /*hlo_execution_profile=*/nullptr));
-  TF_RET_CHECK(stream.BlockHostUntilDone());
-
-  allocations_.push_back(result);
-
-  *result_shape = executable->result_shape();
 
-  if (ShapeUtil::IsTuple(*result_shape)) {
-    // We must record element buffers of tuples as well to avoid leaks.
-    DCHECK(!ShapeUtil::IsNestedTuple(*result_shape));
+  // Copy arguments to device.
+  std::vector<std::unique_ptr<ScopedShapedBuffer>> argument_buffers;
+  std::vector<ShapedBuffer*> argument_buffer_ptrs;
+  for (Literal* argument : arguments) {
     TF_ASSIGN_OR_RETURN(
-        std::vector<se::DeviceMemoryBase> element_buffers,
-        backend().transfer_manager()->ShallowCopyTupleFromDevice(
-            backend().default_stream_executor(), result, *result_shape));
-
-    // A tuple may contain the same buffer in more than one element. Keep track
-    // of the buffers already added to avoid duplicates in allocations_.
-    std::set<void*> added_opaques;
-    for (auto element_buffer : element_buffers) {
-      if (added_opaques.count(element_buffer.opaque()) == 0) {
-        CHECK(element_buffer.opaque() != nullptr);
-        added_opaques.insert(element_buffer.opaque());
-        allocations_.push_back(element_buffer);
-      }
-    }
+        std::unique_ptr<ScopedShapedBuffer> argument_buffer,
+        backend().transfer_manager()->AllocateScopedShapedBuffer(
+            argument->shape(), run_options.allocator(),
+            run_options.device_ordinal()));
+    TF_RETURN_IF_ERROR(backend().transfer_manager()->TransferLiteralToDevice(
+        stream.parent(), *argument, *argument_buffer));
+    argument_buffers.push_back(std::move(argument_buffer));
+    argument_buffer_ptrs.push_back(argument_buffers.back().get());
   }
 
-  return result;
-}
-
-StatusOr<se::DeviceMemoryBase> HloRunner::TransferToDevice(
-    const Literal& literal) {
-  // Allocate memory on the device using the stream executor.
-  int64 allocation_size =
-      backend().transfer_manager()->GetByteSizeRequirement(literal.shape());
-  se::DeviceMemoryBase allocation =
-      backend().default_stream_executor()->AllocateArray<uint8>(
-          allocation_size);
-  allocations_.push_back(allocation);
-
-  TF_RETURN_IF_ERROR(backend().transfer_manager()->TransferLiteralToDevice(
-      backend().default_stream_executor(), literal, &allocation));
-
-  return allocation;
-}
-
-StatusOr<std::unique_ptr<Literal>> HloRunner::TransferFromDevice(
-    const Shape& shape, se::DeviceMemoryBase device_base) {
-  auto literal = MakeUnique<Literal>();
-  TF_RETURN_IF_ERROR(backend().transfer_manager()->TransferLiteralFromDevice(
-      backend().default_stream_executor(), device_base, shape, shape,
-      literal.get()));
-  return std::move(literal);
-}
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<ShapedBuffer> result,
+      executable->ExecuteOnStream(&service_run_options, argument_buffer_ptrs,
+                                  /*hlo_execution_profile=*/nullptr));
 
-StatusOr<std::unique_ptr<Literal>> HloRunner::ExecuteAndTransfer(
-    std::unique_ptr<HloModule> module,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments,
-    bool run_hlo_passes) {
-  Shape result_shape;
+  // Create a ScopedShapedBuffer of the result to manage deallocation. This will
+  // deallocate all the device memory when it goes out of scope.
   TF_ASSIGN_OR_RETURN(
-      se::DeviceMemoryBase device_base,
-      Execute(std::move(module), arguments, &result_shape, run_hlo_passes));
-  return TransferFromDevice(result_shape, device_base);
+      std::unique_ptr<ScopedShapedBuffer> scoped_result,
+      ScopedShapedBuffer::MakeScoped(result.get(), run_options.allocator()));
+
+  auto result_literal = backend().transfer_manager()->TransferLiteralFromDevice(
+      stream.parent(), *scoped_result);
+  if (result_literal.ok()) {
+    VLOG(4) << "Executed binary and got result: "
+            << result_literal.ValueOrDie()->ToString();
+  } else {
+    VLOG(4) << "Executed binary and got status: "
+            << result_literal.status().ToString();
+  }
+  return result_literal;
 }
 
 Backend& HloRunner::backend() {
diff --git a/tensorflow/compiler/xla/service/hlo_runner.h b/tensorflow/compiler/xla/service/hlo_runner.h
index 95cddafc91ff40948efc4b0744343d994cf84f3a..d4b221fb52dff64dda264a931df6fd19b86e5260 100644
--- a/tensorflow/compiler/xla/service/hlo_runner.h
+++ b/tensorflow/compiler/xla/service/hlo_runner.h
@@ -35,7 +35,8 @@ namespace xla {
 
 // A base class for running an HloModule. This executes the given HloModule on a
 // certain backend directly without using the client interface. HloModule can be
-// explicitly built, or loaded from a serialization file (e.g., hlo proto file).
+// explicitly built, or loaded from a serialization file (e.g., hlo proto
+// file), or parsed from a hlo textual IR string.
 class HloRunner {
  public:
   HloRunner();
@@ -44,6 +45,12 @@ class HloRunner {
 
   ~HloRunner();
 
+  // Converts an HloModule from the given hlo textual IR string (in
+  // HloModule::ToString format).
+  static StatusOr<std::unique_ptr<HloModule>> CreateModuleFromString(
+      const tensorflow::StringPiece hlo_string,
+      const DebugOptions& debug_options);
+
   // Reads the proto file in xla.HloProto format, creates and returns the
   // HloModule. Will try to parse the filename as binary proto, then try as
   // text proto if that fails.
@@ -65,35 +72,13 @@ class HloRunner {
   // Executes the given module with given literals as input and returns the
   // result as a Literal. The LiteralPtr type accepts Literal* or
   // std::unique_ptr<Literal>.
-  // If run_hlo_passes is true, the module will be executed without Hlo
+  //
+  // If run_hlo_passes is false, the module will be executed without Hlo
   // optimization.
   template <typename LiteralPtr>
   StatusOr<std::unique_ptr<Literal>> Execute(
       std::unique_ptr<HloModule> module,
-      const tensorflow::gtl::ArraySlice<LiteralPtr> literals,
-      bool run_hlo_passes = true);
-
-  // Executes the given module and returns a global data handle.
-  StatusOr<perftools::gputools::DeviceMemoryBase> Execute(
-      std::unique_ptr<HloModule> module,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments,
-      Shape* result_shape, bool run_hlo_passes = true);
-
-  // Transfers the given literal to the device and returns the data handle.
-  StatusOr<perftools::gputools::DeviceMemoryBase> TransferToDevice(
-      const Literal& literal);
-
-  // Transfers the array referred to by the given handle from the device and
-  // returns as a Literal.
-  StatusOr<std::unique_ptr<Literal>> TransferFromDevice(
-      const Shape& shape, perftools::gputools::DeviceMemoryBase device_base);
-
-  // Executes the given module and return the result as a Literal.
-  StatusOr<std::unique_ptr<Literal>> ExecuteAndTransfer(
-      std::unique_ptr<HloModule> module,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments,
+      const tensorflow::gtl::ArraySlice<LiteralPtr> arguments,
       bool run_hlo_passes = true);
 
   // If backend is not created in the constructor, creates and returns the
@@ -104,9 +89,12 @@ class HloRunner {
   Backend& backend();
 
  private:
-  struct EigenThreadPoolWrapper;
+  StatusOr<std::unique_ptr<Literal>> ExecuteInternal(
+      std::unique_ptr<HloModule> module,
+      const tensorflow::gtl::ArraySlice<Literal*> arguments,
+      bool run_hlo_passes = true);
 
-  std::vector<perftools::gputools::DeviceMemoryBase> allocations_;
+  struct EigenThreadPoolWrapper;
 
   std::unique_ptr<EigenThreadPoolWrapper> thread_pool_wrapper_;
 
@@ -116,15 +104,14 @@ class HloRunner {
 template <typename LiteralPtr>
 StatusOr<std::unique_ptr<Literal>> HloRunner::Execute(
     std::unique_ptr<HloModule> module,
-    const tensorflow::gtl::ArraySlice<LiteralPtr> literals,
+    const tensorflow::gtl::ArraySlice<LiteralPtr> arguments,
     bool run_hlo_passes) {
-  std::vector<perftools::gputools::DeviceMemoryBase> arguments;
-  for (const auto& literal : literals) {
-    TF_ASSIGN_OR_RETURN(perftools::gputools::DeviceMemoryBase argument,
-                        TransferToDevice(*literal));
-    arguments.push_back(argument);
+  // Construct a vector of plain pointers for the arguments.
+  std::vector<Literal*> argument_pointers;
+  for (const auto& argument : arguments) {
+    argument_pointers.push_back(&*argument);
   }
-  return ExecuteAndTransfer(std::move(module), arguments, run_hlo_passes);
+  return ExecuteInternal(std::move(module), argument_pointers, run_hlo_passes);
 }
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_scheduling.cc b/tensorflow/compiler/xla/service/hlo_scheduling.cc
index 8ccbcaeee4a9c9e94b344231953e20ac8f4b2053..2594c29efd717b3bead34d326c28c7efdf093c50 100644
--- a/tensorflow/compiler/xla/service/hlo_scheduling.cc
+++ b/tensorflow/compiler/xla/service/hlo_scheduling.cc
@@ -31,6 +31,8 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/logging.h"
 
+using ::tensorflow::strings::HumanReadableNumBytes;
+
 namespace xla {
 
 StatusOr<int64> MinimumMemoryForSequence(
@@ -367,7 +369,17 @@ StatusOr<int64> MinimumMemoryForComputation(
 StatusOr<std::vector<const HloInstruction*>> CreateMemoryMinimizingSequence(
     const HloComputation& computation,
     const TuplePointsToAnalysis& points_to_analysis,
-    const LogicalBuffer::SizeFunction& size_function) {
+    const LogicalBuffer::SizeFunction& size_function,
+    SchedulerAlgorithm algorithm) {
+  VLOG(2) << "Computation: " << computation.name();
+  if (algorithm == SchedulerAlgorithm::kListSchedule) {
+    return ListScheduler::Run(computation, points_to_analysis, size_function);
+  }
+  if (algorithm == SchedulerAlgorithm::kDfsSchedule) {
+    return RunDFSMemoryScheduler(computation, points_to_analysis,
+                                 size_function);
+  }
+
   // We try both a list-scheduler based ordering and a DFS based ordering, and
   // choose whichever returns a lower min-memory, not accounting for
   // fragmentation.
@@ -382,7 +394,7 @@ StatusOr<std::vector<const HloInstruction*>> CreateMemoryMinimizingSequence(
       const int64 list_memory,
       MinimumMemoryForComputation(computation, list_sequence,
                                   points_to_analysis, size_function));
-  VLOG(2) << "Min-memory list sequence: " << list_memory << " bytes";
+  VLOG(2) << "Min-memory list sequence: " << HumanReadableNumBytes(list_memory);
 
   TF_ASSIGN_OR_RETURN(
       std::vector<const HloInstruction*> dfs_sequence,
@@ -391,13 +403,15 @@ StatusOr<std::vector<const HloInstruction*>> CreateMemoryMinimizingSequence(
       const int64 dfs_memory,
       MinimumMemoryForComputation(computation, dfs_sequence, points_to_analysis,
                                   size_function));
-  VLOG(2) << "Min-memory dfs sequence: " << dfs_memory << " bytes";
+  VLOG(2) << "Min-memory dfs sequence: " << HumanReadableNumBytes(dfs_memory);
 
   if (list_memory <= dfs_memory) {
-    VLOG(2) << "Chose min-memory list sequence: " << list_memory << " bytes";
+    VLOG(2) << "Chose min-memory list sequence: "
+            << HumanReadableNumBytes(list_memory);
     return list_sequence;
   } else {
-    VLOG(2) << "Chose min-memory dfs sequence: " << dfs_memory << " bytes";
+    VLOG(2) << "Chose min-memory dfs sequence: "
+            << HumanReadableNumBytes(dfs_memory);
     return dfs_sequence;
   }
 }
@@ -405,27 +419,30 @@ StatusOr<std::vector<const HloInstruction*>> CreateMemoryMinimizingSequence(
 }  // namespace
 
 StatusOr<SequentialHloOrdering::HloModuleSequence>
-CreateMemoryMinimizingSequence(
-    const HloModule& module, const LogicalBuffer::SizeFunction& size_function) {
+CreateMemoryMinimizingSequence(const HloModule& module,
+                               const LogicalBuffer::SizeFunction& size_function,
+                               SchedulerAlgorithm algorithm) {
   SequentialHloOrdering::HloModuleSequence sequence;
   TF_ASSIGN_OR_RETURN(std::unique_ptr<TuplePointsToAnalysis> points_to_analysis,
                       TuplePointsToAnalysis::Run(&module));
   for (const auto* computation : module.MakeNonfusionComputations()) {
-    TF_ASSIGN_OR_RETURN(sequence[computation],
-                        CreateMemoryMinimizingSequence(
-                            *computation, *points_to_analysis, size_function));
+    TF_ASSIGN_OR_RETURN(
+        sequence[computation],
+        CreateMemoryMinimizingSequence(*computation, *points_to_analysis,
+                                       size_function, algorithm));
   }
   return sequence;
 }
 
 StatusOr<std::vector<const HloInstruction*>> CreateMemoryMinimizingSequence(
     const HloComputation& computation,
-    const LogicalBuffer::SizeFunction& size_function) {
+    const LogicalBuffer::SizeFunction& size_function,
+    SchedulerAlgorithm algorithm) {
   CHECK(!computation.IsFusionComputation());
   TF_ASSIGN_OR_RETURN(std::unique_ptr<TuplePointsToAnalysis> points_to_analysis,
                       TuplePointsToAnalysis::Run(computation.parent()));
   return CreateMemoryMinimizingSequence(computation, *points_to_analysis,
-                                        size_function);
+                                        size_function, algorithm);
 }
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_scheduling.h b/tensorflow/compiler/xla/service/hlo_scheduling.h
index ec92a56b962152b15981f868369683144aa7c76a..1d1eb1e064f75c2220b39e84b010e720a0c37880 100644
--- a/tensorflow/compiler/xla/service/hlo_scheduling.h
+++ b/tensorflow/compiler/xla/service/hlo_scheduling.h
@@ -33,17 +33,28 @@ StatusOr<int64> MinimumMemoryForSequence(
     const SequentialHloOrdering::HloModuleSequence& module_sequence,
     const LogicalBuffer::SizeFunction& size_function);
 
+enum class SchedulerAlgorithm {
+  kListSchedule,
+  kDfsSchedule,
+
+  // Selects the available scheduler algorithm that had the minimum memory in
+  // the resulting sequence (a la MinimumMemoryForSequence).
+  kAuto,
+};
+
 // Returns an HloModuleSequence which seeks to minimize the memory required for
 // the computation. size_function is the function returning the number of bytes
 // required for a LogicalBuffer.
 StatusOr<SequentialHloOrdering::HloModuleSequence>
 CreateMemoryMinimizingSequence(
-    const HloModule& module, const LogicalBuffer::SizeFunction& size_function);
+    const HloModule& module, const LogicalBuffer::SizeFunction& size_function,
+    SchedulerAlgorithm algorithm = SchedulerAlgorithm::kAuto);
 
 // Overload of above that computes the sequence for a single computation.
 StatusOr<std::vector<const HloInstruction*>> CreateMemoryMinimizingSequence(
     const HloComputation& computation,
-    const LogicalBuffer::SizeFunction& size_function);
+    const LogicalBuffer::SizeFunction& size_function,
+    SchedulerAlgorithm algorithm = SchedulerAlgorithm::kAuto);
 
 }  // namespace xla
 
diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc
index d1adec31c21fe55001db4d522ddda27dd538bc95..447c2446668253c932b44b51b2db22bfd47f9957 100644
--- a/tensorflow/compiler/xla/service/hlo_sharding.cc
+++ b/tensorflow/compiler/xla/service/hlo_sharding.cc
@@ -246,7 +246,8 @@ Status HloSharding::ValidateNonTuple(const Shape& shape,
   // The tile rank must be the same as the input rank.
   if (ShapeUtil::Rank(shape) != ShapeUtil::Rank(tile_shape_)) {
     return tensorflow::errors::InvalidArgument(
-        "Tile rank is different to the input rank");
+        "Tile rank is different to the input rank. sharding=", ToString(),
+        ", input_shape=", ShapeUtil::HumanString(shape));
   }
 
   // The tile shape must not be the same as the input shape without maximal_
diff --git a/tensorflow/compiler/xla/service/hlo_sharding.h b/tensorflow/compiler/xla/service/hlo_sharding.h
index 1a6988a2dc872a39ff6b0551adf7ddb871f0d72a..7263198385cf0c84b1dac1e15177dcac99adaafb 100644
--- a/tensorflow/compiler/xla/service/hlo_sharding.h
+++ b/tensorflow/compiler/xla/service/hlo_sharding.h
@@ -80,6 +80,17 @@ class HloSharding {
     return HloSharding(flattened_list);
   }
 
+  // Creates a new sharding for a tuple type. The requested tuple shape must not
+  // be nested. For nested tuples, use the ShapeTree overload.
+  static HloSharding Tuple(const Shape& tuple_shape,
+                           tensorflow::gtl::ArraySlice<HloSharding> shardings) {
+    CHECK(ShapeUtil::IsTuple(tuple_shape));
+    CHECK(!ShapeUtil::IsNestedTuple(tuple_shape));
+    std::vector<HloSharding> flattened_list(shardings.begin(), shardings.end());
+    CHECK_EQ(flattened_list.size(), ShapeUtil::TupleElementCount(tuple_shape));
+    return HloSharding(flattened_list);
+  }
+
   // Create a new sharding from a protobuf OpSharding.
   static StatusOr<HloSharding> FromProto(const OpSharding& proto);
 
diff --git a/tensorflow/compiler/xla/service/hlo_tfgraph_builder.cc b/tensorflow/compiler/xla/service/hlo_tfgraph_builder.cc
index 101a710d1cad9401134fdfe1d0ec9df241bc01e1..3dc733940fc89952bd5e75a9b28d9cbf356f8000 100644
--- a/tensorflow/compiler/xla/service/hlo_tfgraph_builder.cc
+++ b/tensorflow/compiler/xla/service/hlo_tfgraph_builder.cc
@@ -166,7 +166,7 @@ void HloTfGraphBuilder::SetNodeAttrs(const HloInstruction* instruction,
       layout_string = ShapeUtil::HumanStringWithLayout(instruction->shape());
     } else {
       layout_string = StrCat(
-          "{", Join(instruction->shape().layout().minor_to_major(), ","), "}");
+          "{", Join(LayoutUtil::MinorToMajor(instruction->shape()), ","), "}");
     }
     attrs["layout"].set_s(layout_string);
   }
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index 15188c4057eca8eea1805e599cd020c045fdd10a..9d9cf0c0f67f50a13f6d966079b3f9748b0a52e9 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -14,412 +14,400 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/compiler/xla/service/hlo_verifier.h"
-#include "tensorflow/compiler/xla/service/shape_inference.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace xla {
 
-namespace {
+Status ShapeVerifier::HandleElementwiseUnary(HloInstruction* hlo) {
+  return CheckUnaryShape(hlo);
+}
 
-// Visitor which verifies that the output shape is correctly set. Verifies
-// against the inferred shape for the instruction.
-// TODO(b/26024837): Check output shape for all instruction types.
-class ShapeVerifier : public DfsHloVisitor {
- public:
-  explicit ShapeVerifier(
-      const std::function<int64(const Shape&)>& shape_size_fn)
-      : shape_size_fn_(shape_size_fn) {}
+Status ShapeVerifier::HandleElementwiseBinary(HloInstruction* hlo) {
+  return CheckBinaryShape(hlo);
+}
 
-  Status HandleElementwiseUnary(HloInstruction* hlo) override {
-    return CheckUnaryShape(hlo);
-  }
+Status ShapeVerifier::HandleClamp(HloInstruction* clamp) {
+  return CheckTernaryShape(clamp);
+}
 
-  Status HandleElementwiseBinary(HloInstruction* hlo) override {
-    return CheckBinaryShape(hlo);
-  }
+Status ShapeVerifier::HandleSelect(HloInstruction* select) {
+  return CheckTernaryShape(select);
+}
 
-  Status HandleClamp(HloInstruction* clamp) override {
-    return CheckTernaryShape(clamp);
+Status ShapeVerifier::HandleConcatenate(HloInstruction* concatenate) {
+  std::vector<const Shape*> operand_shapes;
+  for (const HloInstruction* operand : concatenate->operands()) {
+    operand_shapes.push_back(&operand->shape());
   }
+  return CheckShape(concatenate,
+                    ShapeInference::InferConcatOpShape(
+                        operand_shapes, concatenate->concatenate_dimension()));
+}
 
-  Status HandleSelect(HloInstruction* select) override {
-    return CheckTernaryShape(select);
-  }
+Status ShapeVerifier::HandleConvert(HloInstruction* convert) {
+  return CheckShape(convert, ShapeInference::InferConvertShape(
+                                 convert->operand(0)->shape(),
+                                 convert->shape().element_type()));
+}
 
-  Status HandleConcatenate(HloInstruction* concatenate) override {
-    std::vector<const Shape*> operand_shapes;
-    for (const HloInstruction* operand : concatenate->operands()) {
-      operand_shapes.push_back(&operand->shape());
-    }
-    return CheckShape(
-        concatenate, ShapeInference::InferConcatOpShape(
-                         operand_shapes, concatenate->concatenate_dimension()));
-  }
+Status ShapeVerifier::HandleBitcastConvert(HloInstruction* convert) {
+  return CheckShape(convert, ShapeInference::InferBitcastConvertShape(
+                                 convert->operand(0)->shape(),
+                                 convert->shape().element_type()));
+}
 
-  Status HandleConvert(HloInstruction* convert) override {
-    return CheckShape(convert, ShapeInference::InferConvertShape(
-                                   convert->operand(0)->shape(),
-                                   convert->shape().element_type()));
-  }
+Status ShapeVerifier::HandleCopy(HloInstruction* copy) {
+  return CheckUnaryShape(copy);
+}
 
-  Status HandleBitcastConvert(HloInstruction* convert) override {
-    return CheckShape(convert, ShapeInference::InferBitcastConvertShape(
-                                   convert->operand(0)->shape(),
-                                   convert->shape().element_type()));
-  }
+Status ShapeVerifier::HandleDot(HloInstruction* dot) {
+  TF_ASSIGN_OR_RETURN(const Shape expected,
+                      ShapeInference::InferDotOpShape(
+                          dot->operand(0)->shape(), dot->operand(1)->shape(),
+                          dot->dot_dimension_numbers()));
+  return CheckShape(dot, expected);
+}
 
-  Status HandleCopy(HloInstruction* copy) override {
-    return CheckUnaryShape(copy);
-  }
+Status ShapeVerifier::HandleConvolution(HloInstruction* convolution) {
+  TF_ASSIGN_OR_RETURN(
+      const Shape expected,
+      ShapeInference::InferConvolveShape(
+          convolution->operand(0)->shape(), convolution->operand(1)->shape(),
+          convolution->window(), convolution->convolution_dimension_numbers()));
+  return CheckShape(convolution, expected);
+}
 
-  Status HandleDot(HloInstruction* dot) override {
-    return CheckBinaryShape(dot);
-  }
+Status ShapeVerifier::HandleFft(HloInstruction* fft) {
+  TF_ASSIGN_OR_RETURN(
+      const Shape expected,
+      ShapeInference::InferFftShape(fft->operand(0)->shape(), fft->fft_type(),
+                                    fft->fft_length()));
+  return CheckShape(fft, expected);
+}
 
-  Status HandleConvolution(HloInstruction* convolution) override {
-    TF_ASSIGN_OR_RETURN(
-        const Shape expected,
-        ShapeInference::InferConvolveShape(
-            convolution->operand(0)->shape(), convolution->operand(1)->shape(),
-            convolution->window(),
-            convolution->convolution_dimension_numbers()));
-    return CheckShape(convolution, expected);
+Status ShapeVerifier::HandleCrossReplicaSum(HloInstruction* crs) {
+  std::vector<const Shape*> operand_shapes;
+  for (const HloInstruction* operand : crs->operands()) {
+    operand_shapes.push_back(&operand->shape());
   }
+  return CheckShape(crs,
+                    ShapeInference::InferCrossReplicaSumShape(operand_shapes));
+}
 
-  Status HandleCrossReplicaSum(HloInstruction* crs) override {
-    return CheckShape(crs, ShapeInference::InferCrossReplicaSumShape(
-                               crs->operand(0)->shape()));
-  }
+Status ShapeVerifier::HandleReducePrecision(HloInstruction* reduce_precision) {
+  return CheckShape(reduce_precision, ShapeInference::InferReducePrecisionShape(
+                                          reduce_precision->operand(0)->shape(),
+                                          reduce_precision->exponent_bits(),
+                                          reduce_precision->mantissa_bits()));
+}
 
-  Status HandleReducePrecision(HloInstruction* reduce_precision) override {
-    return CheckShape(reduce_precision,
-                      ShapeInference::InferReducePrecisionShape(
-                          reduce_precision->operand(0)->shape(),
-                          reduce_precision->exponent_bits(),
-                          reduce_precision->mantissa_bits()));
-  }
+Status ShapeVerifier::HandleInfeed(HloInstruction*) {
+  return tensorflow::Status::OK();
+}
 
-  Status HandleInfeed(HloInstruction*) override {
-    return tensorflow::Status::OK();
-  }
+Status ShapeVerifier::HandleOutfeed(HloInstruction*) {
+  return tensorflow::Status::OK();
+}
 
-  Status HandleOutfeed(HloInstruction*) override {
-    return tensorflow::Status::OK();
-  }
+Status ShapeVerifier::HandleRng(HloInstruction*) {
+  return tensorflow::Status::OK();
+}
 
-  Status HandleRng(HloInstruction*) override {
-    return tensorflow::Status::OK();
-  }
+Status ShapeVerifier::HandleReverse(HloInstruction* reverse) {
+  return CheckShape(
+      reverse, ShapeInference::InferReverseShape(reverse->operand(0)->shape(),
+                                                 reverse->dimensions()));
+}
 
-  Status HandleReverse(HloInstruction* reverse) override {
-    return CheckShape(
-        reverse, ShapeInference::InferReverseShape(reverse->operand(0)->shape(),
-                                                   reverse->dimensions()));
-  }
+Status ShapeVerifier::HandleSort(HloInstruction* sort) {
+  return CheckUnaryShape(sort);
+}
 
-  Status HandleSort(HloInstruction* sort) override {
-    return CheckUnaryShape(sort);
-  }
+Status ShapeVerifier::HandleConstant(HloInstruction* constant) {
+  return CheckShape(constant, constant->literal().shape());
+}
 
-  Status HandleConstant(HloInstruction* constant) override {
-    return CheckShape(constant, constant->literal().shape());
-  }
+Status ShapeVerifier::HandleGetTupleElement(HloInstruction* get_tuple_element) {
+  return CheckShape(get_tuple_element,
+                    ShapeInference::InferGetTupleElementShape(
+                        get_tuple_element->operand(0)->shape(),
+                        get_tuple_element->tuple_index()));
+}
 
-  Status HandleGetTupleElement(HloInstruction* get_tuple_element) override {
-    return CheckShape(get_tuple_element,
-                      ShapeInference::InferGetTupleElementShape(
-                          get_tuple_element->operand(0)->shape(),
-                          get_tuple_element->tuple_index()));
-  }
+Status ShapeVerifier::HandleReduce(HloInstruction* reduce) {
+  return CheckShape(
+      reduce,
+      ShapeInference::InferReduceShape(
+          reduce->operand(0)->shape(), reduce->operand(1)->shape(),
+          reduce->dimensions(), reduce->to_apply()->ComputeProgramShape()));
+}
 
-  Status HandleReduce(HloInstruction* reduce) override {
-    return CheckShape(
-        reduce,
-        ShapeInference::InferReduceShape(
-            reduce->operand(0)->shape(), reduce->operand(1)->shape(),
-            reduce->dimensions(), reduce->to_apply()->ComputeProgramShape()));
-  }
+Status ShapeVerifier::HandleBitcast(HloInstruction* bitcast) {
+  return tensorflow::Status::OK();
+}
 
-  Status HandleBitcast(HloInstruction* bitcast) override {
-    // Bitcasts can be any shape, as long as the size matches the operand size.
-    TF_RET_CHECK(shape_size_fn_(bitcast->shape()) ==
-                 shape_size_fn_(bitcast->operand(0)->shape()));
-    return tensorflow::Status::OK();
+Status ShapeVerifier::HandleBroadcast(HloInstruction* broadcast) {
+  // HLO broadcast has no exact analog at the proto level so there is no
+  // ShapeInference method. Check the output shape explicitly.
+  const Shape& operand_shape = broadcast->operand(0)->shape();
+  TF_RET_CHECK(ShapeUtil::Rank(operand_shape) ==
+               broadcast->dimensions().size());
+  for (int64 operand_dimension = 0;
+       operand_dimension < ShapeUtil::Rank(operand_shape);
+       ++operand_dimension) {
+    int64 output_dimension = broadcast->dimensions()[operand_dimension];
+    TF_RET_CHECK(broadcast->shape().dimensions(output_dimension) ==
+                 operand_shape.dimensions(operand_dimension));
   }
+  return tensorflow::Status::OK();
+}
 
-  Status HandleBroadcast(HloInstruction* broadcast) override {
-    // HLO broadcast has no exact analog at the proto level so there is no
-    // ShapeInference method. Check the output shape explicitly.
-    const Shape& operand_shape = broadcast->operand(0)->shape();
-    TF_RET_CHECK(ShapeUtil::Rank(operand_shape) ==
-                 broadcast->dimensions().size());
-    for (int64 operand_dimension = 0;
-         operand_dimension < ShapeUtil::Rank(operand_shape);
-         ++operand_dimension) {
-      int64 output_dimension = broadcast->dimensions()[operand_dimension];
-      TF_RET_CHECK(broadcast->shape().dimensions(output_dimension) ==
-                   operand_shape.dimensions(operand_dimension));
-    }
-    return tensorflow::Status::OK();
-  }
+Status ShapeVerifier::HandleReshape(HloInstruction* reshape) {
+  TF_RET_CHECK(ShapeUtil::ElementsIn(reshape->shape()) ==
+               ShapeUtil::ElementsIn(reshape->operand(0)->shape()));
+  return tensorflow::Status::OK();
+}
 
-  Status HandleReshape(HloInstruction* reshape) override {
-    TF_RET_CHECK(ShapeUtil::ElementsIn(reshape->shape()) ==
-                 ShapeUtil::ElementsIn(reshape->operand(0)->shape()));
-    return tensorflow::Status::OK();
-  }
+Status ShapeVerifier::HandleTranspose(HloInstruction* transpose) {
+  return CheckShape(
+      transpose, ShapeInference::InferTransposeShape(
+                     transpose->operand(0)->shape(), transpose->dimensions()));
+}
 
-  Status HandleTranspose(HloInstruction* transpose) override {
-    return CheckShape(transpose, ShapeInference::InferTransposeShape(
-                                     transpose->operand(0)->shape(),
-                                     transpose->dimensions()));
-  }
+Status ShapeVerifier::HandleParameter(HloInstruction*) {
+  return tensorflow::Status::OK();
+}
 
-  Status HandleParameter(HloInstruction*) override {
-    return tensorflow::Status::OK();
-  }
+Status ShapeVerifier::HandleFusion(HloInstruction*) {
+  return tensorflow::Status::OK();
+}
 
-  Status HandleFusion(HloInstruction*) override {
-    return tensorflow::Status::OK();
-  }
+Status ShapeVerifier::HandleCall(HloInstruction* call) {
+  // The shape of kCall should match the shape of the computation it calls.
+  return CheckShape(call, call->to_apply()->ComputeProgramShape().result());
+}
 
-  Status HandleCall(HloInstruction* call) override {
-    // The shape of kCall should match the shape of the computation it calls.
-    return CheckShape(call, call->to_apply()->ComputeProgramShape().result());
-  }
+Status ShapeVerifier::HandleCustomCall(HloInstruction*) {
+  return tensorflow::Status::OK();
+}
 
-  Status HandleCustomCall(HloInstruction*) override {
-    return tensorflow::Status::OK();
-  }
+Status ShapeVerifier::HandleSlice(HloInstruction* slice) {
+  return CheckShape(slice,
+                    ShapeInference::InferSliceShape(
+                        slice->operand(0)->shape(), slice->slice_starts(),
+                        slice->slice_limits(), slice->slice_strides()));
+}
 
-  Status HandleSlice(HloInstruction* slice) override {
-    return CheckShape(slice,
-                      ShapeInference::InferSliceShape(
-                          slice->operand(0)->shape(), slice->slice_starts(),
-                          slice->slice_limits(), slice->slice_strides()));
-  }
+Status ShapeVerifier::HandleDynamicSlice(HloInstruction* dynamic_slice) {
+  return CheckShape(dynamic_slice, ShapeInference::InferDynamicSliceShape(
+                                       dynamic_slice->operand(0)->shape(),
+                                       dynamic_slice->operand(1)->shape(),
+                                       dynamic_slice->dynamic_slice_sizes()));
+}
 
-  Status HandleDynamicSlice(HloInstruction* dynamic_slice) override {
-    return CheckShape(dynamic_slice, ShapeInference::InferDynamicSliceShape(
-                                         dynamic_slice->operand(0)->shape(),
-                                         dynamic_slice->operand(1)->shape(),
-                                         dynamic_slice->dynamic_slice_sizes()));
-  }
+Status ShapeVerifier::HandleDynamicUpdateSlice(
+    HloInstruction* dynamic_update_slice) {
+  return CheckShape(dynamic_update_slice,
+                    ShapeInference::InferDynamicUpdateSliceShape(
+                        dynamic_update_slice->operand(0)->shape(),
+                        dynamic_update_slice->operand(1)->shape(),
+                        dynamic_update_slice->operand(2)->shape()));
+}
 
-  Status HandleDynamicUpdateSlice(
-      HloInstruction* dynamic_update_slice) override {
-    return CheckShape(dynamic_update_slice,
-                      ShapeInference::InferDynamicUpdateSliceShape(
-                          dynamic_update_slice->operand(0)->shape(),
-                          dynamic_update_slice->operand(1)->shape(),
-                          dynamic_update_slice->operand(2)->shape()));
-  }
+Status ShapeVerifier::HandleTuple(HloInstruction* tuple) {
+  return CheckVariadicShape(tuple);
+}
 
-  Status HandleTuple(HloInstruction* tuple) override {
-    return CheckVariadicShape(tuple);
-  }
+Status ShapeVerifier::HandleMap(HloInstruction* map) {
+  std::vector<const Shape*> operand_shapes;
+  int64 max_operand_rank = 0;
+  for (const HloInstruction* operand : map->operands()) {
+    operand_shapes.push_back(&operand->shape());
+    max_operand_rank =
+        std::max(max_operand_rank, ShapeUtil::Rank(operand->shape()));
+  }
+  // TODO(b/65689298) Remove code below once Map is generalized to accept
+  // arbitrary map dimensions.
+  std::vector<int64> map_dims(max_operand_rank);
+  std::iota(map_dims.begin(), map_dims.end(), 0);
+  return CheckShape(map, ShapeInference::InferMapShape(
+                             operand_shapes,
+                             map->to_apply()->ComputeProgramShape(), map_dims));
+}
 
-  Status HandleMap(HloInstruction* map) override {
-    std::vector<const Shape*> operand_shapes;
-    int64 max_operand_rank = 0;
-    for (const HloInstruction* operand : map->operands()) {
-      operand_shapes.push_back(&operand->shape());
-      max_operand_rank =
-          std::max(max_operand_rank, ShapeUtil::Rank(operand->shape()));
-    }
-    // TODO(b/65689298) Remove code below once Map is generalized to accept
-    // arbitrary map dimensions.
-    std::vector<int64> map_dims(max_operand_rank);
-    std::iota(map_dims.begin(), map_dims.end(), 0);
-    return CheckShape(
-        map,
-        ShapeInference::InferMapShape(
-            operand_shapes, map->to_apply()->ComputeProgramShape(), map_dims));
-  }
+Status ShapeVerifier::HandleReduceWindow(HloInstruction* reduce_window) {
+  return CheckShape(
+      reduce_window,
+      ShapeInference::InferReduceWindowShape(
+          reduce_window->operand(0)->shape(),
+          reduce_window->operand(1)->shape(), reduce_window->window(),
+          reduce_window->to_apply()->ComputeProgramShape()));
+}
 
-  Status HandleReduceWindow(HloInstruction* reduce_window) override {
-    return CheckShape(
-        reduce_window,
-        ShapeInference::InferReduceWindowShape(
-            reduce_window->operand(0)->shape(),
-            reduce_window->operand(1)->shape(), reduce_window->window(),
-            reduce_window->to_apply()->ComputeProgramShape()));
-  }
+Status ShapeVerifier::HandleSelectAndScatter(HloInstruction* instruction) {
+  return CheckShape(
+      instruction,
+      ShapeInference::InferSelectAndScatterShape(
+          instruction->operand(0)->shape(),
+          instruction->select()->ComputeProgramShape(), instruction->window(),
+          instruction->operand(1)->shape(), instruction->operand(2)->shape(),
+          instruction->scatter()->ComputeProgramShape()));
+}
 
-  Status HandleSelectAndScatter(HloInstruction* instruction) override {
-    return CheckShape(
-        instruction,
-        ShapeInference::InferSelectAndScatterShape(
-            instruction->operand(0)->shape(),
-            instruction->select()->ComputeProgramShape(), instruction->window(),
-            instruction->operand(1)->shape(), instruction->operand(2)->shape(),
-            instruction->scatter()->ComputeProgramShape()));
-  }
+Status ShapeVerifier::HandleWhile(HloInstruction* xla_while) {
+  // The shape of kWhile should match the shape of the body computation it
+  // calls.
+  return CheckShape(xla_while,
+                    xla_while->while_body()->ComputeProgramShape().result());
+}
 
-  Status HandleWhile(HloInstruction* xla_while) override {
-    // The shape of kWhile should match the shape of the body computation it
-    // calls.
-    return CheckShape(xla_while,
-                      xla_while->while_body()->ComputeProgramShape().result());
-  }
+Status ShapeVerifier::HandleConditional(HloInstruction* conditional) {
+  TF_RETURN_IF_ERROR(CheckShape(
+      conditional,
+      conditional->true_computation()->ComputeProgramShape().result()));
+  return CheckShape(
+      conditional,
+      conditional->false_computation()->ComputeProgramShape().result());
+}
 
-  Status HandleConditional(HloInstruction* conditional) override {
-    TF_RETURN_IF_ERROR(CheckShape(
-        conditional,
-        conditional->true_computation()->ComputeProgramShape().result()));
-    return CheckShape(
-        conditional,
-        conditional->false_computation()->ComputeProgramShape().result());
-  }
+Status ShapeVerifier::HandlePad(HloInstruction* pad) {
+  return CheckShape(pad, ShapeInference::InferPadShape(pad->operand(0)->shape(),
+                                                       pad->operand(1)->shape(),
+                                                       pad->padding_config()));
+}
 
-  Status HandlePad(HloInstruction* pad) override {
-    return CheckShape(pad,
-                      ShapeInference::InferPadShape(pad->operand(0)->shape(),
-                                                    pad->operand(1)->shape(),
-                                                    pad->padding_config()));
-  }
+Status ShapeVerifier::HandleSend(HloInstruction* send) {
+  TF_RET_CHECK(send->users().size() == 1);
+  const HloInstruction* send_done = send->users().front();
+  TF_RET_CHECK(send_done->opcode() == HloOpcode::kSendDone);
+  TF_RETURN_IF_ERROR(CheckSameChannel(send, send_done));
+  return CheckShape(
+      send, ShapeUtil::MakeTupleShape(
+                {send->operand(0)->shape(), ShapeUtil::MakeShape(U32, {})}));
+}
 
-  Status HandleSend(HloInstruction* send) override {
-    TF_RET_CHECK(send->users().size() == 1);
-    const HloInstruction* send_done = send->users().front();
-    TF_RET_CHECK(send_done->opcode() == HloOpcode::kSendDone);
-    TF_RETURN_IF_ERROR(CheckSameChannel(send, send_done));
-    return CheckShape(
-        send, ShapeUtil::MakeTupleShape(
-                  {send->operand(0)->shape(), ShapeUtil::MakeShape(U32, {})}));
-  }
+Status ShapeVerifier::HandleSendDone(HloInstruction* send_done) {
+  TF_RET_CHECK(send_done->operands().size() == 1);
+  const HloInstruction* send = send_done->operand(0);
+  TF_RET_CHECK(send->opcode() == HloOpcode::kSend);
+  TF_RETURN_IF_ERROR(CheckSameChannel(send, send_done));
+  return CheckShape(send_done, ShapeUtil::MakeNil());
+}
 
-  Status HandleSendDone(HloInstruction* send_done) override {
-    TF_RET_CHECK(send_done->operands().size() == 1);
-    const HloInstruction* send = send_done->operand(0);
-    TF_RET_CHECK(send->opcode() == HloOpcode::kSend);
-    TF_RETURN_IF_ERROR(CheckSameChannel(send, send_done));
-    return CheckShape(send_done, ShapeUtil::MakeNil());
-  }
+Status ShapeVerifier::HandleRecv(HloInstruction* recv) {
+  TF_RET_CHECK(recv->users().size() == 1);
+  const HloInstruction* recv_done = recv->users().front();
+  TF_RET_CHECK(recv_done->opcode() == HloOpcode::kRecvDone);
+  TF_RETURN_IF_ERROR(CheckSameChannel(recv, recv_done));
+  return CheckShape(recv,
+                    ShapeUtil::MakeTupleShape(
+                        {recv_done->shape(), ShapeUtil::MakeShape(U32, {})}));
+}
 
-  Status HandleRecv(HloInstruction* recv) override {
-    TF_RET_CHECK(recv->users().size() == 1);
-    const HloInstruction* recv_done = recv->users().front();
-    TF_RET_CHECK(recv_done->opcode() == HloOpcode::kRecvDone);
-    TF_RETURN_IF_ERROR(CheckSameChannel(recv, recv_done));
-    return CheckShape(recv,
-                      ShapeUtil::MakeTupleShape(
-                          {recv_done->shape(), ShapeUtil::MakeShape(U32, {})}));
-  }
+Status ShapeVerifier::HandleRecvDone(HloInstruction* recv_done) {
+  TF_RET_CHECK(recv_done->operands().size() == 1);
+  const HloInstruction* recv = recv_done->operand(0);
+  TF_RET_CHECK(recv->opcode() == HloOpcode::kRecv);
+  TF_RETURN_IF_ERROR(CheckSameChannel(recv, recv_done));
+  return CheckShape(recv_done, recv->shape().tuple_shapes(0));
+}
 
-  Status HandleRecvDone(HloInstruction* recv_done) override {
-    TF_RET_CHECK(recv_done->operands().size() == 1);
-    const HloInstruction* recv = recv_done->operand(0);
-    TF_RET_CHECK(recv->opcode() == HloOpcode::kRecv);
-    TF_RETURN_IF_ERROR(CheckSameChannel(recv, recv_done));
-    return CheckShape(recv_done, recv->shape().tuple_shapes(0));
-  }
+Status ShapeVerifier::HandleBatchNormTraining(
+    HloInstruction* batch_norm_training) {
+  return CheckShape(batch_norm_training,
+                    ShapeInference::InferBatchNormTrainingShape(
+                        batch_norm_training->operand(0)->shape(),
+                        batch_norm_training->operand(1)->shape(),
+                        batch_norm_training->operand(2)->shape(),
+                        batch_norm_training->feature_index()));
+}
 
-  Status HandleBatchNormTraining(HloInstruction* batch_norm_training) override {
-    return CheckShape(batch_norm_training,
-                      ShapeInference::InferBatchNormTrainingShape(
-                          batch_norm_training->operand(0)->shape(),
-                          batch_norm_training->operand(1)->shape(),
-                          batch_norm_training->operand(2)->shape(),
-                          batch_norm_training->feature_index()));
-  }
+Status ShapeVerifier::HandleBatchNormInference(
+    HloInstruction* batch_norm_inference) {
+  return CheckShape(batch_norm_inference,
+                    ShapeInference::InferBatchNormInferenceShape(
+                        batch_norm_inference->operand(0)->shape(),
+                        batch_norm_inference->operand(1)->shape(),
+                        batch_norm_inference->operand(2)->shape(),
+                        batch_norm_inference->operand(3)->shape(),
+                        batch_norm_inference->operand(4)->shape(),
+                        batch_norm_inference->feature_index()));
+}
 
-  Status HandleBatchNormInference(
-      HloInstruction* batch_norm_inference) override {
-    return CheckShape(batch_norm_inference,
-                      ShapeInference::InferBatchNormInferenceShape(
-                          batch_norm_inference->operand(0)->shape(),
-                          batch_norm_inference->operand(1)->shape(),
-                          batch_norm_inference->operand(2)->shape(),
-                          batch_norm_inference->operand(3)->shape(),
-                          batch_norm_inference->operand(4)->shape(),
-                          batch_norm_inference->feature_index()));
-  }
+Status ShapeVerifier::HandleBatchNormGrad(HloInstruction* batch_norm_grad) {
+  return CheckShape(batch_norm_grad, ShapeInference::InferBatchNormGradShape(
+                                         batch_norm_grad->operand(0)->shape(),
+                                         batch_norm_grad->operand(1)->shape(),
+                                         batch_norm_grad->operand(2)->shape(),
+                                         batch_norm_grad->operand(3)->shape(),
+                                         batch_norm_grad->operand(4)->shape(),
+                                         batch_norm_grad->feature_index()));
+}
 
-  Status HandleBatchNormGrad(HloInstruction* batch_norm_grad) override {
-    return CheckShape(batch_norm_grad, ShapeInference::InferBatchNormGradShape(
-                                           batch_norm_grad->operand(0)->shape(),
-                                           batch_norm_grad->operand(1)->shape(),
-                                           batch_norm_grad->operand(2)->shape(),
-                                           batch_norm_grad->operand(3)->shape(),
-                                           batch_norm_grad->operand(4)->shape(),
-                                           batch_norm_grad->feature_index()));
+Status ShapeVerifier::CheckShape(const HloInstruction* instruction,
+                                 const Shape& expected_shape) {
+  if (!ShapeUtil::Compatible(instruction->shape(), expected_shape)) {
+    return InvalidArgument(
+        "Expected instruction to have shape compatible with %s, actual "
+        "shape is %s:\n%s",
+        ShapeUtil::HumanString(expected_shape).c_str(),
+        ShapeUtil::HumanString(instruction->shape()).c_str(),
+        instruction->ToString().c_str());
   }
+  return tensorflow::Status::OK();
+}
 
-  Status FinishVisit(HloInstruction*) override {
-    return tensorflow::Status::OK();
+Status ShapeVerifier::CheckShape(const HloInstruction* instruction,
+                                 const StatusOr<Shape>& expected_shape_status) {
+  if (!expected_shape_status.ok()) {
+    Status s = expected_shape_status.status();
+    tensorflow::errors::AppendToMessage(&s, ", for instruction ",
+                                        instruction->ToString());
+    return s;
   }
+  return CheckShape(instruction, expected_shape_status.ValueOrDie());
+}
 
- private:
-  // Check the instruction's shape against the given expected shape and return
-  // an appropriate error if there is a mismatch.
-  Status CheckShape(const HloInstruction* instruction,
-                    const Shape& expected_shape) {
-    if (!ShapeUtil::Compatible(instruction->shape(), expected_shape)) {
-      return InvalidArgument(
-          "Expected instruction to have shape compatible with %s, actual "
-          "shape is %s:\n%s",
-          ShapeUtil::HumanString(expected_shape).c_str(),
-          ShapeUtil::HumanString(instruction->shape()).c_str(),
-          instruction->ToString().c_str());
-    }
-    return tensorflow::Status::OK();
-  }
+Status ShapeVerifier::CheckUnaryShape(const HloInstruction* instruction) {
+  return CheckShape(instruction,
+                    ShapeInference::InferUnaryOpShape(instruction->opcode(),
+                                                      instruction->operand(0)));
+}
 
-  // Overload which takes a StatusOr to reduce boilerplate in the caller.
-  Status CheckShape(const HloInstruction* instruction,
-                    const StatusOr<Shape>& expected_shape_status) {
-    if (!expected_shape_status.ok()) {
-      Status s = expected_shape_status.status();
-      tensorflow::errors::AppendToMessage(&s, ", for instruction ",
-                                          instruction->ToString());
-      return s;
-    }
-    return CheckShape(instruction, expected_shape_status.ValueOrDie());
-  }
+Status ShapeVerifier::CheckBinaryShape(const HloInstruction* instruction) {
+  return CheckShape(
+      instruction, ShapeInference::InferBinaryOpShape(instruction->opcode(),
+                                                      instruction->operand(0),
+                                                      instruction->operand(1)));
+}
 
-  // Check a unary (binary, etc) instruction's shape against the inferred shape.
-  Status CheckUnaryShape(const HloInstruction* instruction) {
-    return CheckShape(instruction,
-                      ShapeInference::InferUnaryOpShape(
-                          instruction->opcode(), instruction->operand(0)));
-  }
-  Status CheckBinaryShape(const HloInstruction* instruction) {
-    return CheckShape(instruction,
-                      ShapeInference::InferBinaryOpShape(
-                          instruction->opcode(), instruction->operand(0),
-                          instruction->operand(1)));
-  }
-  Status CheckTernaryShape(const HloInstruction* instruction) {
-    return CheckShape(instruction,
-                      ShapeInference::InferTernaryOpShape(
-                          instruction->opcode(), instruction->operand(0),
-                          instruction->operand(1), instruction->operand(2)));
-  }
-  Status CheckVariadicShape(const HloInstruction* instruction) {
-    return CheckShape(instruction,
-                      ShapeInference::InferVariadicOpShape(
-                          instruction->opcode(), instruction->operands()));
-  }
+Status ShapeVerifier::CheckTernaryShape(const HloInstruction* instruction) {
+  return CheckShape(instruction,
+                    ShapeInference::InferTernaryOpShape(
+                        instruction->opcode(), instruction->operand(0),
+                        instruction->operand(1), instruction->operand(2)));
+}
 
-  // Checks if the given two instructions shares the same channel id.
-  Status CheckSameChannel(const HloInstruction* instr1,
-                          const HloInstruction* instr2) {
-    if (instr1->channel_id() != instr2->channel_id()) {
-      return FailedPrecondition(
-          "Expected to have the same channel id, actual channel ids are: %s "
-          "(%lld), %s (%lld)",
-          instr1->ToString().c_str(), instr1->channel_id(),
-          instr2->ToString().c_str(), instr2->channel_id());
-    }
-    return tensorflow::Status::OK();
-  }
+Status ShapeVerifier::CheckVariadicShape(const HloInstruction* instruction) {
+  return CheckShape(instruction,
+                    ShapeInference::InferVariadicOpShape(
+                        instruction->opcode(), instruction->operands()));
+}
 
-  // Returns the size of a Shape in bytes.
-  const std::function<int64(const Shape&)> shape_size_fn_;
-};
+// Checks if the given two instructions shares the same channel id.
+Status ShapeVerifier::CheckSameChannel(const HloInstruction* instr1,
+                                       const HloInstruction* instr2) {
+  if (instr1->channel_id() != instr2->channel_id()) {
+    return FailedPrecondition(
+        "Expected to have the same channel id, actual channel ids are: %s "
+        "(%lld), %s (%lld)",
+        instr1->ToString().c_str(), instr1->channel_id(),
+        instr2->ToString().c_str(), instr2->channel_id());
+  }
+  return tensorflow::Status::OK();
+}
 
 string ComputationsToString(
     tensorflow::gtl::ArraySlice<HloComputation*> computations) {
@@ -429,7 +417,62 @@ string ComputationsToString(
       });
 }
 
-}  // namespace
+// Verifies various invariants about the structure of the HLO:
+//
+// (1) each instruction has a non-null parent() set to the HloComputation which
+//     contains it.
+//
+// (2) each computation has a non-null parent() set to the HloModule which
+//     contains it.
+//
+// (3) the operands of each instruction are in the same computation as the
+//     instruction.
+Status VerifyHloStructure(HloModule* module) {
+  for (const HloComputation* computation : module->computations()) {
+    if (computation->parent() == nullptr) {
+      return FailedPrecondition("Computation %s has a null parent pointer",
+                                computation->name().c_str());
+    }
+    if (computation->parent() != module) {
+      return FailedPrecondition(
+          "Computation %s parent() does not point to parent module",
+          computation->name().c_str());
+    }
+
+    for (const HloInstruction* instruction : computation->instructions()) {
+      if (instruction->parent() == nullptr) {
+        return FailedPrecondition("Instruction %s has a null parent pointer",
+                                  instruction->name().c_str());
+      }
+      if (instruction->parent() != computation) {
+        return FailedPrecondition(
+            "Instruction %s parent() does not point to parent computation",
+            instruction->name().c_str());
+      }
+    }
+  }
+
+  // Check that operands are in the same computation separately from verifying
+  // parent() correctness so conditions like a null HloInstruction::parent() are
+  // identified and reported explicitly above rather than reporting a mismatched
+  // operand.
+  for (const HloComputation* computation : module->computations()) {
+    for (const HloInstruction* instruction : computation->instructions()) {
+      for (int i = 0; i < instruction->operand_count(); ++i) {
+        const HloInstruction* operand = instruction->operand(i);
+        if (operand->parent() != instruction->parent()) {
+          return FailedPrecondition(
+              "Operand %d (%s) of instruction %s is in a different "
+              "computation: %s vs %s",
+              i, operand->name().c_str(), instruction->name().c_str(),
+              operand->parent()->name().c_str(),
+              instruction->parent()->name().c_str());
+        }
+      }
+    }
+  }
+  return tensorflow::Status::OK();
+}
 
 Status HloVerifier::CheckFusionInstruction(HloInstruction* fusion) const {
   // The parent fusion instruction of the fusion computation must be 'fusion'.
@@ -549,8 +592,9 @@ Status HloVerifier::CheckFusionInstruction(HloInstruction* fusion) const {
 }
 
 StatusOr<bool> HloVerifier::Run(HloModule* module) {
+  TF_RETURN_IF_ERROR(VerifyHloStructure(module));
+
   tensorflow::gtl::FlatMap<string, const HloInstruction*> instructions;
-  ShapeVerifier shape_verifier(shape_size_fn_);
 
   for (auto* computation : module->computations()) {
     for (const auto& instruction : computation->instructions()) {
@@ -630,7 +674,7 @@ StatusOr<bool> HloVerifier::Run(HloModule* module) {
       instructions[instruction->name()] = instruction;
     }
 
-    TF_RETURN_IF_ERROR(computation->Accept(&shape_verifier));
+    TF_RETURN_IF_ERROR(computation->Accept(shape_verifier_.get()));
   }
 
   return false;
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.h b/tensorflow/compiler/xla/service/hlo_verifier.h
index e35a7f3642ccf91df37f69a3a11bd8c8e428b846..6368611f323ad7c1ebade4941260e12ed2c6e45f 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.h
+++ b/tensorflow/compiler/xla/service/hlo_verifier.h
@@ -18,14 +18,98 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
 
+#include "tensorflow/compiler/xla/service/shape_inference.h"
+
 namespace xla {
 
+// Visitor which verifies that the output shape is correctly set. Verifies
+// against the inferred shape for the instruction.
+// TODO(b/26024837): Check output shape for all instruction types.
+class ShapeVerifier : public DfsHloVisitor {
+ public:
+  Status HandleElementwiseUnary(HloInstruction* hlo) override;
+  Status HandleElementwiseBinary(HloInstruction* hlo) override;
+  Status HandleClamp(HloInstruction* clamp) override;
+  Status HandleSelect(HloInstruction* select) override;
+  Status HandleConcatenate(HloInstruction* concatenate) override;
+  Status HandleConvert(HloInstruction* convert) override;
+  Status HandleBitcastConvert(HloInstruction* convert) override;
+  Status HandleCopy(HloInstruction* copy) override;
+  Status HandleDot(HloInstruction* dot) override;
+  Status HandleConvolution(HloInstruction* convolution) override;
+  Status HandleFft(HloInstruction* fft) override;
+  Status HandleCrossReplicaSum(HloInstruction* crs) override;
+  Status HandleReducePrecision(HloInstruction* reduce_precision) override;
+  Status HandleInfeed(HloInstruction*) override;
+  Status HandleOutfeed(HloInstruction*) override;
+  Status HandleRng(HloInstruction*) override;
+  Status HandleReverse(HloInstruction* reverse) override;
+  Status HandleSort(HloInstruction* sort) override;
+  Status HandleConstant(HloInstruction* constant) override;
+  Status HandleGetTupleElement(HloInstruction* get_tuple_element) override;
+  Status HandleReduce(HloInstruction* reduce) override;
+  Status HandleBitcast(HloInstruction* bitcast) override;
+  Status HandleBroadcast(HloInstruction* broadcast) override;
+  Status HandleReshape(HloInstruction* reshape) override;
+  Status HandleTranspose(HloInstruction* transpose) override;
+  Status HandleParameter(HloInstruction*) override;
+  Status HandleFusion(HloInstruction*) override;
+  Status HandleCall(HloInstruction* call) override;
+  Status HandleCustomCall(HloInstruction*) override;
+  Status HandleSlice(HloInstruction* slice) override;
+  Status HandleDynamicSlice(HloInstruction* dynamic_slice) override;
+  Status HandleDynamicUpdateSlice(
+      HloInstruction* dynamic_update_slice) override;
+  Status HandleTuple(HloInstruction* tuple) override;
+  Status HandleMap(HloInstruction* map) override;
+  Status HandleReduceWindow(HloInstruction* reduce_window) override;
+  Status HandleSelectAndScatter(HloInstruction* instruction) override;
+  Status HandleWhile(HloInstruction* xla_while) override;
+  Status HandleConditional(HloInstruction* conditional) override;
+  Status HandlePad(HloInstruction* pad) override;
+  Status HandleSend(HloInstruction* send) override;
+  Status HandleSendDone(HloInstruction* send_done) override;
+  Status HandleRecv(HloInstruction* recv) override;
+  Status HandleRecvDone(HloInstruction* recv_done) override;
+  Status HandleBatchNormTraining(HloInstruction* batch_norm_training) override;
+  Status HandleBatchNormInference(
+      HloInstruction* batch_norm_inference) override;
+  Status HandleBatchNormGrad(HloInstruction* batch_norm_grad) override;
+
+  Status FinishVisit(HloInstruction*) override {
+    return tensorflow::Status::OK();
+  }
+
+ protected:
+  // Check the instruction's shape against the given expected shape and return
+  // an appropriate error if there is a mismatch.
+  Status CheckShape(const HloInstruction* instruction,
+                    const Shape& expected_shape);
+
+  // Overload which takes a StatusOr to reduce boilerplate in the caller.
+  Status CheckShape(const HloInstruction* instruction,
+                    const StatusOr<Shape>& expected_shape_status);
+
+  // Check a unary (binary, etc) instruction's shape against the inferred shape.
+  Status CheckUnaryShape(const HloInstruction* instruction);
+  Status CheckBinaryShape(const HloInstruction* instruction);
+  Status CheckTernaryShape(const HloInstruction* instruction);
+  Status CheckVariadicShape(const HloInstruction* instruction);
+
+  // Checks if the given two instructions shares the same channel id.
+  Status CheckSameChannel(const HloInstruction* instr1,
+                          const HloInstruction* instr2);
+};
+
 // HLO pass that verifies invariants of HLO instructions for each computation in
 // the module.
 class HloVerifier : public HloPassInterface {
  public:
-  explicit HloVerifier(const std::function<int64(const Shape&)>& shape_size_fn)
-      : shape_size_fn_(shape_size_fn) {}
+  // Uses standard shape inference.
+  explicit HloVerifier() : shape_verifier_(MakeUnique<ShapeVerifier>()) {}
+  // Uses custom shape verification.
+  explicit HloVerifier(std::unique_ptr<ShapeVerifier> shape_verifier)
+      : shape_verifier_(std::move(shape_verifier)) {}
   ~HloVerifier() override = default;
   tensorflow::StringPiece name() const override { return "verifier"; }
 
@@ -37,8 +121,8 @@ class HloVerifier : public HloPassInterface {
   // CHECKs various invariants of a fusion instruction.
   Status CheckFusionInstruction(HloInstruction* fusion) const;
 
-  // Returns the size of a Shape in bytes.
-  const std::function<int64(const Shape&)> shape_size_fn_;
+  // Verifies shapes match inferred expectations.
+  std::unique_ptr<ShapeVerifier> shape_verifier_;
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_verifier_test.cc b/tensorflow/compiler/xla/service/hlo_verifier_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..2a3b55decc5289e7e576d3c5897b333c0b1bc922
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_verifier_test.cc
@@ -0,0 +1,101 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/hlo_verifier.h"
+
+#include <memory>
+#include <utility>
+
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+
+namespace xla {
+namespace {
+
+using ::testing::HasSubstr;
+
+using HloVerifierTest = HloTestBase;
+
+TEST_F(HloVerifierTest, NullInstructionParent) {
+  HloComputation::Builder builder(TestName());
+  const Shape scalar_shape = ShapeUtil::MakeShape(F32, {});
+  HloInstruction* param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape, "param"));
+  HloInstruction* negate = builder.AddInstruction(
+      HloInstruction::CreateUnary(scalar_shape, HloOpcode::kNegate, param));
+  auto module = CreateNewModule();
+  module->AddEntryComputation(builder.Build());
+
+  TF_ASSERT_OK(verifier().Run(module.get()).status());
+
+  negate->set_parent(nullptr);
+
+  auto status = verifier().Run(module.get()).status();
+  ASSERT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(), HasSubstr("has a null parent pointer"));
+}
+
+TEST_F(HloVerifierTest, NullComputationParent) {
+  HloComputation::Builder builder(TestName());
+  const Shape scalar_shape = ShapeUtil::MakeShape(F32, {});
+  HloInstruction* param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape, "param"));
+  builder.AddInstruction(
+      HloInstruction::CreateUnary(scalar_shape, HloOpcode::kNegate, param));
+  auto module = CreateNewModule();
+  HloComputation* computation = module->AddEntryComputation(builder.Build());
+
+  TF_ASSERT_OK(verifier().Run(module.get()).status());
+
+  computation->set_parent(nullptr);
+
+  auto status = verifier().Run(module.get()).status();
+  ASSERT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(), HasSubstr("has a null parent pointer"));
+}
+
+TEST_F(HloVerifierTest, DifferentOperandParents) {
+  HloComputation::Builder builder(TestName());
+  const Shape scalar_shape = ShapeUtil::MakeShape(F32, {});
+  HloInstruction* param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape, "param"));
+  HloInstruction* negate = builder.AddInstruction(
+      HloInstruction::CreateUnary(scalar_shape, HloOpcode::kNegate, param));
+  auto module = CreateNewModule();
+  module->AddEntryComputation(builder.Build());
+
+  HloComputation::Builder emb_builder(TestName());
+  HloInstruction* emb_param = emb_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape, "param"));
+  module->AddEmbeddedComputation(emb_builder.Build());
+
+  TF_ASSERT_OK(verifier().Run(module.get()).status());
+  TF_ASSERT_OK(negate->ReplaceOperandWith(0, emb_param));
+
+  auto status = verifier().Run(module.get()).status();
+  ASSERT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(),
+              HasSubstr("is in a different computation"));
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/human_readable_profile_builder.cc b/tensorflow/compiler/xla/service/human_readable_profile_builder.cc
index b7c40fdeeb157fc74900bd9cf9d68a06a2cb1d56..13e4557317f74b3fb46f07fb91c339fd2f34752f 100644
--- a/tensorflow/compiler/xla/service/human_readable_profile_builder.cc
+++ b/tensorflow/compiler/xla/service/human_readable_profile_builder.cc
@@ -25,6 +25,7 @@ namespace xla {
 using tensorflow::strings::Appendf;
 using tensorflow::strings::HumanReadableElapsedTime;
 using tensorflow::strings::HumanReadableNumBytes;
+using tensorflow::strings::Printf;
 using tensorflow::strings::StrAppend;
 
 string HumanReadableProfileBuilder::ToString() const {
@@ -43,7 +44,12 @@ string HumanReadableProfileBuilder::ToString() const {
     } else {
       bytes_per_sec =
           HumanReadableNumBytes(op.bytes_accessed / CyclesToSeconds(op.cycles));
-      bytes_per_cycle = HumanReadableNumBytes(op.bytes_accessed / op.cycles);
+      if (op.bytes_accessed > op.cycles) {
+        bytes_per_cycle = HumanReadableNumBytes(op.bytes_accessed / op.cycles);
+      } else {
+        bytes_per_cycle =
+            Printf("%.3fB", static_cast<float>(op.bytes_accessed) / op.cycles);
+      }
     }
 
     double cycles_percent = 0;
diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc
index ba901b99e4f3c72c84c1ecdf4e19e58ad9ab6506..90e1f0acdc4cdeda280dabaab2df66b181d0f407 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/instruction_fusion.cc
@@ -100,6 +100,7 @@ namespace xla {
     case HloOpcode::kDivide:
     case HloOpcode::kDot:
     case HloOpcode::kExp:
+    case HloOpcode::kFft:
     case HloOpcode::kFusion:
     case HloOpcode::kLog:
     case HloOpcode::kMap:
diff --git a/tensorflow/compiler/xla/service/interpreter/BUILD b/tensorflow/compiler/xla/service/interpreter/BUILD
index 2704a805a91b93c69b751cdb61305ea7780f0ef2..0819ab3b90b2360c6b0b2afaa89f322afe566eb3 100644
--- a/tensorflow/compiler/xla/service/interpreter/BUILD
+++ b/tensorflow/compiler/xla/service/interpreter/BUILD
@@ -92,6 +92,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo_execution_profile",
         "//tensorflow/compiler/xla/service:hlo_module_config",
         "//tensorflow/compiler/xla/service:shaped_buffer",
+        "//tensorflow/compiler/xla/service:transfer_manager",
         "//tensorflow/core:lib",
         "//tensorflow/core:stream_executor_no_cuda",
     ],
diff --git a/tensorflow/compiler/xla/service/interpreter/executable.cc b/tensorflow/compiler/xla/service/interpreter/executable.cc
index 9183a1d1bfb8c2f6e1933c004f9c9f5f9ad8eced..0cb9b5d8107cd8bf468b07d5fe2a22930d9e8b8c 100644
--- a/tensorflow/compiler/xla/service/interpreter/executable.cc
+++ b/tensorflow/compiler/xla/service/interpreter/executable.cc
@@ -27,6 +27,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_evaluator.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/interpreter/executor.h"
+#include "tensorflow/compiler/xla/service/transfer_manager.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -38,7 +39,6 @@ namespace xla {
 namespace interpreter {
 
 namespace se = ::perftools::gputools;
-namespace sep = ::perftools::gputools::interpreter;
 
 InterpreterExecutable::InterpreterExecutable(
     std::unique_ptr<const HloModule> hlo_module)
@@ -47,44 +47,18 @@ InterpreterExecutable::InterpreterExecutable(
 
 InterpreterExecutable::~InterpreterExecutable() {}
 
-static se::DeviceMemoryBase AllocateSingleOutput(
-    sep::InterpreterExecutor* executor, const Literal& literal) {
-  int64 size(xla::ShapeUtil::ByteSizeOf(literal.shape()));
-  void* buf = executor->Allocate(size);
-  const void* src = literal.InternalData();
-  memcpy(buf, src, size);
-  return se::DeviceMemoryBase(buf, size);
-}
-
-static se::DeviceMemoryBase AllocateOutputBuffer(
-    sep::InterpreterExecutor* executor, const Literal& literal) {
-  const Shape& shape = literal.shape();
-  if (shape.element_type() != xla::TUPLE) {
-    return AllocateSingleOutput(executor, literal);
-  } else {
-    int64 size(xla::ShapeUtil::ByteSizeOf(shape, sizeof(void*)));
-    void** buf = reinterpret_cast<void**>(executor->Allocate(size));
-    void** buf_rc = buf;
-    for (int64 n = 0; n < xla::ShapeUtil::TupleElementCount(shape); n++) {
-      se::DeviceMemoryBase out =
-          AllocateSingleOutput(executor, literal.tuple_literals(n));
-      *buf++ = out.opaque();
-    }
-
-    return se::DeviceMemoryBase(buf_rc, size);
-  }
-}
-
-StatusOr<se::DeviceMemoryBase> InterpreterExecutable::ExecuteOnStream(
+StatusOr<std::unique_ptr<ShapedBuffer>> InterpreterExecutable::ExecuteOnStream(
     const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments,
+    tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
     HloExecutionProfile* hlo_execution_profile) {
   se::Stream* stream = run_options->stream();
+  se::StreamExecutor* executor = stream->parent();
+  const se::Platform* platform = executor->platform();
 
   VLOG(1) << "Execute " << module().name();
   if (VLOG_IS_ON(2)) {
     for (const auto& a : arguments) {
-      VLOG(2) << "-- argument " << a.opaque();
+      VLOG(2) << "-- argument " << *a;
     }
   }
 
@@ -96,33 +70,32 @@ StatusOr<se::DeviceMemoryBase> InterpreterExecutable::ExecuteOnStream(
         "Mismatch between argument count and graph parameter count.");
   }
 
-  // Create the arguments as an vector of XLA literals
+  TF_ASSIGN_OR_RETURN(TransferManager * transfer_manager,
+                      TransferManager::GetForPlatform(platform));
+
+  // Transform the ShapedBuffer arguments into literals which the evaluator
+  // consumes.
   std::vector<std::unique_ptr<Literal>> arg_literals;
-  std::vector<Literal*> arg_literals_ptrs;
   for (int64 p = 0; p < computation->num_parameters(); ++p) {
-    // Create the input literal for the parameter
-    HloInstruction* param = computation->parameter_instruction(p);
-    arg_literals.emplace_back(Literal::CreateFromShape(param->shape()));
-    arg_literals_ptrs.push_back(arg_literals.back().get());
-
-    // Copy in the data from the stream_executor buffers
-    void* buffer = arg_literals.back()->MutableInternalData();
-    memcpy(buffer, arguments[p].opaque(),
-           ShapeUtil::ByteSizeOf(param->shape()));
+    TF_ASSIGN_OR_RETURN(
+        std::unique_ptr<Literal> arg_literal,
+        transfer_manager->TransferLiteralFromDevice(executor, *arguments[p]));
+    arg_literals.push_back(std::move(arg_literal));
   }
 
   // Execute the graph using the HloEvaluator.
   HloEvaluator evaluator;
-  TF_ASSIGN_OR_RETURN(std::unique_ptr<Literal> output,
-                      evaluator.Evaluate(*computation, arg_literals_ptrs));
-
-  // Copy the result into the return buffer
-  perftools::gputools::StreamExecutor* executor(stream->parent());
-  sep::InterpreterExecutor* interpreter_executor(
-      static_cast<sep::InterpreterExecutor*>(executor->implementation()));
-
-  se::DeviceMemoryBase ret =
-      AllocateOutputBuffer(interpreter_executor, *(output.get()));
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<Literal> result_literal,
+      evaluator.Evaluate<std::unique_ptr<Literal>>(*computation, arg_literals));
+
+  // Transform the result literal back into a ShapedBuffer.
+  TF_ASSIGN_OR_RETURN(std::unique_ptr<ShapedBuffer> result,
+                      transfer_manager->AllocateShapedBuffer(
+                          result_literal->shape(), run_options->allocator(),
+                          run_options->device_ordinal()));
+  TF_RETURN_IF_ERROR(transfer_manager->TransferLiteralToDevice(
+      executor, *result_literal, *result));
 
   uint64 end_micros = tensorflow::Env::Default()->NowMicros();
 
@@ -132,20 +105,13 @@ StatusOr<se::DeviceMemoryBase> InterpreterExecutable::ExecuteOnStream(
     execution_profile_.set_compute_time_ns(std::max(nanoseconds, 1.0));
   }
 
-  return ret;
-}
-
-StatusOr<std::unique_ptr<ShapedBuffer>> InterpreterExecutable::ExecuteOnStream(
-    const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
-    HloExecutionProfile* hlo_execution_profile) {
-  return tensorflow::errors::Unimplemented(
-      "ExecuteOnStream is not yet supported on Interpreter.");
+  return std::move(result);
 }
 
-StatusOr<se::DeviceMemoryBase> InterpreterExecutable::ExecuteAsyncOnStream(
+StatusOr<std::unique_ptr<ShapedBuffer>>
+InterpreterExecutable::ExecuteAsyncOnStream(
     const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments) {
+    tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) {
   return tensorflow::errors::Unimplemented(
       "ExecuteAsyncOnStream is not yet supported on Interpreter.");
 }
diff --git a/tensorflow/compiler/xla/service/interpreter/executable.h b/tensorflow/compiler/xla/service/interpreter/executable.h
index 0e87eb90bff4b896fc4bc0efc4fa7b851631be6f..410110a1adf04c83001c38ed03f5d60dd203dc7e 100644
--- a/tensorflow/compiler/xla/service/interpreter/executable.h
+++ b/tensorflow/compiler/xla/service/interpreter/executable.h
@@ -43,21 +43,14 @@ class InterpreterExecutable : public Executable {
   InterpreterExecutable(std::unique_ptr<const HloModule> hlo_module);
   ~InterpreterExecutable() override;
 
-  StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteOnStream(
-      const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments,
-      HloExecutionProfile* hlo_execution_profile) override;
-
   StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteOnStream(
       const ServiceExecutableRunOptions* run_options,
       tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
       HloExecutionProfile* hlo_execution_profile) override;
 
-  StatusOr<perftools::gputools::DeviceMemoryBase> ExecuteAsyncOnStream(
+  StatusOr<std::unique_ptr<ShapedBuffer>> ExecuteAsyncOnStream(
       const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments) override;
+      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) override;
 
   static int64 ShapeSizeBytes(const Shape& shape);
 
diff --git a/tensorflow/compiler/xla/service/interpreter/executor.cc b/tensorflow/compiler/xla/service/interpreter/executor.cc
index 0bb3259ef43915067e614e72038387e8300ecc41..68371910d76f42c0b6d4b1adad9d6a83bdb858e6 100644
--- a/tensorflow/compiler/xla/service/interpreter/executor.cc
+++ b/tensorflow/compiler/xla/service/interpreter/executor.cc
@@ -85,7 +85,7 @@ bool InterpreterExecutor::HostCallback(Stream *stream,
 bool InterpreterExecutor::CreateStreamDependency(Stream *dependent,
                                                  Stream *other) {
   AsExecutorStream(dependent)->EnqueueTask(
-      [other]() { other->BlockHostUntilDone(); });
+      [other]() { SE_CHECK_OK(other->BlockHostUntilDone()); });
   AsExecutorStream(dependent)->BlockUntilDone();
   return true;
 }
@@ -100,9 +100,9 @@ bool InterpreterExecutor::StopTimer(Stream *stream, Timer *timer) {
   return true;
 }
 
-bool InterpreterExecutor::BlockHostUntilDone(Stream *stream) {
+port::Status InterpreterExecutor::BlockHostUntilDone(Stream *stream) {
   AsExecutorStream(stream)->BlockUntilDone();
-  return true;
+  return port::Status::OK();
 }
 
 DeviceDescription *InterpreterExecutor::PopulateDeviceDescription() const {
diff --git a/tensorflow/compiler/xla/service/interpreter/executor.h b/tensorflow/compiler/xla/service/interpreter/executor.h
index c59b2ccb1505b78be0c459ac9311428d65cc7e44..c5d07e906dafb033905c50c604069e80e1ce80cd 100644
--- a/tensorflow/compiler/xla/service/interpreter/executor.h
+++ b/tensorflow/compiler/xla/service/interpreter/executor.h
@@ -157,7 +157,7 @@ class InterpreterExecutor : public internal::StreamExecutorInterface {
   bool StartTimer(Stream *stream, Timer *timer) override;
   bool StopTimer(Stream *stream, Timer *timer) override;
 
-  bool BlockHostUntilDone(Stream *stream) override;
+  port::Status BlockHostUntilDone(Stream *stream) override;
 
   int PlatformDeviceCount() override { return 1; }
 
diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index 7eda7c2284c2457703fcfcd4226172e41dd4ae01..f80dace8775c5ed31addb4a3d134f53005c6df71 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -369,8 +369,9 @@ string LayoutConstraints::ToString() const {
 }
 
 Status LayoutAssignment::AddMandatoryConstraints(
-    const ComputationLayout& computation_layout, HloComputation* computation,
-    LayoutConstraints* constraints) {
+    const ComputationLayout& computation_layout,
+    const ChannelLayoutConstraints* channel_constraints,
+    HloComputation* computation, LayoutConstraints* constraints) {
   VLOG(3) << "Adding mandatory layout constraints to computation "
           << computation->name();
 
@@ -403,6 +404,37 @@ Status LayoutAssignment::AddMandatoryConstraints(
       TF_RETURN_IF_ERROR(
           constraints->SetInstructionLayout(*shape_with_layout, instruction));
     }
+
+    if (instruction->opcode() == HloOpcode::kSend ||
+        instruction->opcode() == HloOpcode::kRecv) {
+      CHECK(channel_constraints)
+          << "Multi-module layout assignment requires ChannelLayoutConstraints";
+      int64 channel_id = instruction->channel_id();
+      if (!channel_constraints->IsChannelConstrained(channel_id)) {
+        continue;
+      }
+      if (instruction->opcode() == HloOpcode::kSend) {
+        // TODO(b/68493863): Change to use SetOperandLayout().
+        const Shape send_buffer_shape = instruction->operand(0)->shape();
+        TF_RET_CHECK(ShapeUtil::IsArray(send_buffer_shape));
+        Shape new_buffer_shape = channel_constraints->LayoutShapeForChannel(
+            send_buffer_shape, instruction->channel_id());
+        TF_RETURN_IF_ERROR(constraints->SetInstructionLayout(
+            new_buffer_shape, instruction->operand(0)));
+      } else {
+        const Shape recv_buffer_shape =
+            ShapeUtil::GetTupleElementShape(instruction->shape(), 0);
+        TF_RET_CHECK(ShapeUtil::IsArray(recv_buffer_shape));
+        TF_ASSIGN_OR_RETURN(
+            const LogicalBuffer* buffer,
+            constraints->points_to_analysis().GetBufferDefinedAt(instruction,
+                                                                 {0}));
+        Shape new_shape = channel_constraints->LayoutShapeForChannel(
+            recv_buffer_shape, instruction->channel_id());
+        TF_RETURN_IF_ERROR(constraints->SetBufferLayout(
+            new_shape.layout(), *buffer, /*mandatory=*/true));
+      }
+    }
   }
 
   // Constrain layouts of instructions which call computations which have
@@ -476,17 +508,14 @@ Status LayoutAssignment::AddMandatoryConstraints(
           body_layout.result_shape(), instruction, 0,
           /*mandatory=*/true));
     } else if (instruction->opcode() == HloOpcode::kCustomCall) {
+      if (!CustomCallRequiresMajorFirstLayout(instruction)) {
+        continue;
+      }
       // Add constraints for kCustomCall instruction operands and instructions.
-      // For now we only support row major layouts for all inputs and outputs.
-      auto row_major_shape = [](const Shape& old_shape) {
-        Shape new_shape(old_shape);
-        std::vector<int64> dimension_order(new_shape.dimensions_size());
-        std::iota(dimension_order.rbegin(), dimension_order.rend(), 0);
-        *new_shape.mutable_layout() = LayoutUtil::MakeLayout(dimension_order);
-        return new_shape;
-      };
-
-      Shape result_shape(row_major_shape(instruction->shape()));
+      // For now we only support major-first layouts for all inputs and outputs.
+      Shape result_shape = ShapeUtil::MakeShapeWithDescendingLayout(
+          instruction->shape().element_type(),
+          AsInt64Slice(instruction->shape().dimensions()));
       TF_RETURN_IF_ERROR(
           constraints->SetInstructionLayout(result_shape, instruction));
       for (int64 i = 0; i < instruction->operand_count(); ++i) {
@@ -496,7 +525,10 @@ Status LayoutAssignment::AddMandatoryConstraints(
           continue;
         }
 
-        Shape row_major_operand_shape(row_major_shape(operand_shape));
+        Shape row_major_operand_shape =
+            ShapeUtil::MakeShapeWithDescendingLayout(
+                operand_shape.element_type(),
+                AsInt64Slice(operand_shape.dimensions()));
         TF_RETURN_IF_ERROR(constraints->SetOperandLayout(
             row_major_operand_shape, instruction, i, /*mandatory=*/true));
       }
@@ -530,9 +562,11 @@ Status CheckCallLayout(HloInstruction* call,
 Status CheckCustomCallLayout(HloInstruction* custom_call) {
   for (const HloInstruction* operand : custom_call->operands()) {
     TF_RET_CHECK(
+        ShapeUtil::IsOpaque(operand->shape()) ||
         LayoutUtil::IsMonotonicWithDim0Major(operand->shape().layout()));
   }
   TF_RET_CHECK(
+      ShapeUtil::IsOpaque(custom_call->shape()) ||
       LayoutUtil::IsMonotonicWithDim0Major(custom_call->shape().layout()));
   return Status::OK();
 }
@@ -601,11 +635,9 @@ Status CheckConstantLayout(HloInstruction* constant) {
   return Status::OK();
 }
 
-// Check that all layouts in the module have been set and satisfy all necessary
-// conditions.
-Status CheckLayouts(
-    HloModule* module,
-    const std::map<HloComputation*, ComputationLayout>& computation_layouts) {
+}  // namespace
+
+Status LayoutAssignment::CheckLayouts(HloModule* module) {
   TF_ASSIGN_OR_RETURN(auto points_to_analysis,
                       TuplePointsToAnalysis::Run(module));
   for (auto* computation : module->MakeNonfusionComputations()) {
@@ -649,10 +681,12 @@ Status CheckLayouts(
         case HloOpcode::kCall:
           TF_RETURN_IF_ERROR(CheckCallLayout(
               instruction,
-              FindOrDie(computation_layouts, instruction->to_apply())));
+              FindOrDie(computation_layouts_, instruction->to_apply())));
           break;
         case HloOpcode::kCustomCall:
-          TF_RETURN_IF_ERROR(CheckCustomCallLayout(instruction));
+          if (CustomCallRequiresMajorFirstLayout(instruction)) {
+            TF_RETURN_IF_ERROR(CheckCustomCallLayout(instruction));
+          }
           break;
         case HloOpcode::kFusion:
           TF_RETURN_IF_ERROR(CheckFusionLayout(instruction));
@@ -660,7 +694,7 @@ Status CheckLayouts(
         case HloOpcode::kParameter:
           TF_RETURN_IF_ERROR(CheckParameterLayout(
               instruction,
-              FindOrDie(computation_layouts, instruction->parent())));
+              FindOrDie(computation_layouts_, instruction->parent())));
           break;
         case HloOpcode::kConstant:
           TF_RETURN_IF_ERROR(CheckConstantLayout(instruction));
@@ -668,8 +702,8 @@ Status CheckLayouts(
         case HloOpcode::kWhile:
           TF_RETURN_IF_ERROR(CheckWhileLayout(
               instruction,
-              FindOrDie(computation_layouts, instruction->while_condition()),
-              FindOrDie(computation_layouts, instruction->while_body())));
+              FindOrDie(computation_layouts_, instruction->while_condition()),
+              FindOrDie(computation_layouts_, instruction->while_body())));
           break;
         default:
           break;
@@ -681,17 +715,18 @@ Status CheckLayouts(
   // computation root.
   TF_RET_CHECK(ShapeUtil::Equal(
       module->entry_computation()->root_instruction()->shape(),
-      FindOrDie(computation_layouts, module->entry_computation())
+      FindOrDie(computation_layouts_, module->entry_computation())
           .result_layout()
           .shape()));
 
   return Status::OK();
 }
 
-}  // namespace
-
-LayoutAssignment::LayoutAssignment(ComputationLayout* entry_computation_layout)
-    : entry_computation_layout_(entry_computation_layout) {
+LayoutAssignment::LayoutAssignment(
+    ComputationLayout* entry_computation_layout,
+    ChannelLayoutConstraints* channel_constraints)
+    : entry_computation_layout_(entry_computation_layout),
+      channel_layout_constraints_(channel_constraints) {
   VLOG(1) << "entry computation layout given to layout assignment: "
           << entry_computation_layout_->ToString();
   // Layouts of all parameter instructions must be set.
@@ -711,8 +746,8 @@ std::unique_ptr<Layout> LayoutAssignment::ChooseOperandLayoutFromOutputLayout(
     int64 operand_no) {
   const HloInstruction* operand = instruction->operand(operand_no);
 
-  CHECK(ShapeUtil::IsArray(instruction->shape()) &&
-        ShapeUtil::IsArray(operand->shape()));
+  CHECK(ShapeUtil::IsArray(instruction->shape()));
+  CHECK(ShapeUtil::IsArray(operand->shape()));
 
   if (instruction->IsElementwiseOnOperand(operand_no) &&
       !ShapeUtil::IsScalar(operand->shape()) &&
@@ -742,7 +777,7 @@ std::unique_ptr<Layout> LayoutAssignment::ChooseOperandLayoutFromOutputLayout(
     const Shape& output_shape = instruction->shape();
     Shape output_shape_with_layout = ShapeUtil::MakeShapeWithLayout(
         output_shape.element_type(), AsInt64Slice(output_shape.dimensions()),
-        AsInt64Slice(output_layout.minor_to_major()));
+        LayoutUtil::MinorToMajor(output_layout));
     Shape operand_shape = operand->shape();
     *operand_shape.mutable_layout() =
         LayoutUtil::GetDefaultLayoutForShape(operand_shape);
@@ -771,7 +806,7 @@ std::unique_ptr<Layout> LayoutAssignment::ChooseOperandLayoutFromOutputLayout(
     int64 rank = ShapeUtil::Rank(instruction->shape());
     std::vector<int64> new_minor_to_major(rank);
     for (int64 i = 0; i < rank; ++i) {
-      int64 output_dim = output_layout.minor_to_major(i);
+      int64 output_dim = LayoutUtil::Minor(output_layout, i);
       int64 operand_dim = instruction->dimensions(output_dim);
       new_minor_to_major[i] = operand_dim;
     }
@@ -814,7 +849,7 @@ std::unique_ptr<Layout> LayoutAssignment::ChooseOutputLayoutFromOperandLayout(
     Shape operand_shape_with_layout = ShapeUtil::MakeShapeWithLayout(
         operand->shape().element_type(),
         AsInt64Slice(operand->shape().dimensions()),
-        AsInt64Slice(operand_layout.minor_to_major()));
+        LayoutUtil::MinorToMajor(operand_layout));
     Shape output_shape = user->shape();
     *output_shape.mutable_layout() =
         LayoutUtil::GetDefaultLayoutForShape(output_shape);
@@ -844,7 +879,7 @@ std::unique_ptr<Layout> LayoutAssignment::ChooseOutputLayoutFromOperandLayout(
     std::vector<int64> new_minor_to_major(rank);
     auto inverse_dimensions = InversePermutation(user->dimensions());
     for (int64 i = 0; i < rank; ++i) {
-      int64 operand_dim = operand_layout.minor_to_major(i);
+      int64 operand_dim = LayoutUtil::Minor(operand_layout, i);
       int64 user_dim = inverse_dimensions[operand_dim];
       new_minor_to_major[i] = user_dim;
     }
@@ -1303,8 +1338,8 @@ Status LayoutAssignment::AssignLayouts(const LayoutConstraints& constraints,
     TF_RET_CHECK(LayoutUtil::HasLayout(instruction->shape()));
   }
 
-  // Copy the root instrucion's result if the it does not match the result
-  // layout constraint
+  // Copy the root instruction's result if its layout does not match the result
+  // layout constraint.
   if (constraints.ResultLayout() != nullptr &&
       !constraints.ResultLayout()->MatchesLayoutInShape(
           computation->root_instruction()->shape())) {
@@ -1321,20 +1356,35 @@ Status LayoutAssignment::AssignLayouts(const LayoutConstraints& constraints,
 Status LayoutAssignment::RunOnComputation(
     const ComputationLayout& computation_layout,
     const TuplePointsToAnalysis& points_to_analysis,
-    HloComputation* computation) {
+    HloComputation* computation,
+    ChannelLayoutConstraints* channel_constraints) {
   DCHECK(computation_layout.LayoutIsSet());
   InsertOrDie(&computation_layouts_, computation, computation_layout);
   VLOG(2) << "LayoutAssignment::RunOnComputation(" << computation->name()
           << ")";
   VLOG(2) << "  ComputationLayout = " << computation_layout.ToString();
 
+  // Clear existing layouts of the instructions. All layouts must be assigned by
+  // the LayoutAssignment pass, except for Infeed, Outfeed, Parameters and the
+  // computation result. The latter two are specified in computation_layout, so
+  // we only need to keep the existing layouts for Infeed and Outfeed. Clearing
+  // the layouts here avoids hiding potential bugs in the layout assignment pass
+  // that may accidently use the existing layout.
+  for (HloInstruction* instruction : computation->instructions()) {
+    if (instruction->opcode() == HloOpcode::kInfeed ||
+        instruction->opcode() == HloOpcode::kOutfeed) {
+      continue;
+    }
+    LayoutUtil::ClearLayout(instruction->mutable_shape());
+  }
+
   // Construct LayoutConstraints with all layout constraints of the computation.
   LayoutConstraints constraints(points_to_analysis, computation);
 
   // Add constraints required for correctness on all backends (eg, entry
   // parameter layout constraints).
-  TF_RETURN_IF_ERROR(
-      AddMandatoryConstraints(computation_layout, computation, &constraints));
+  TF_RETURN_IF_ERROR(AddMandatoryConstraints(
+      computation_layout, channel_constraints, computation, &constraints));
 
   // Add any backend-specific constraints.
   TF_RETURN_IF_ERROR(AddBackendConstraints(&constraints));
@@ -1373,7 +1423,20 @@ Status LayoutAssignment::RunOnComputation(
   // All logical buffers should have constraints at this point. All that
   // remains is assign the constraints to the buffers and infer layouts for
   // aliased buffers.
-  return AssignLayouts(constraints, computation);
+  TF_RETURN_IF_ERROR(AssignLayouts(constraints, computation));
+
+  // Record the layouts assigned for any communication ops in
+  // channel_constraints so that they are constrained for future modules.
+  for (HloInstruction* instruction : computation->instructions()) {
+    if (instruction->opcode() == HloOpcode::kSend) {
+      channel_constraints->ConstrainChannel(
+          instruction->channel_id(), instruction->operand(0)->shape().layout());
+    } else if (instruction->opcode() == HloOpcode::kRecvDone) {
+      channel_constraints->ConstrainChannel(instruction->channel_id(),
+                                            instruction->shape().layout());
+    }
+  }
+  return Status::OK();
 }
 
 StatusOr<bool> LayoutAssignment::Run(HloModule* module) {
@@ -1393,9 +1456,9 @@ StatusOr<bool> LayoutAssignment::Run(HloModule* module) {
   // all callers of a computation will agree.
   for (auto* computation : module->MakeComputationPostOrder()) {
     if (computation == module->entry_computation()) {
-      TF_RETURN_IF_ERROR(RunOnComputation(*entry_computation_layout_,
-                                          *points_to_analysis,
-                                          module->entry_computation()));
+      TF_RETURN_IF_ERROR(RunOnComputation(
+          *entry_computation_layout_, *points_to_analysis,
+          module->entry_computation(), channel_layout_constraints_));
     } else if (computation->IsFusionComputation()) {
       continue;
     } else {
@@ -1404,11 +1467,12 @@ StatusOr<bool> LayoutAssignment::Run(HloModule* module) {
       // suboptimal.
       computation_layout.SetToDefaultLayout();
       TF_RETURN_IF_ERROR(RunOnComputation(computation_layout,
-                                          *points_to_analysis, computation));
+                                          *points_to_analysis, computation,
+                                          channel_layout_constraints_));
     }
   }
 
-  TF_RETURN_IF_ERROR(CheckLayouts(module, computation_layouts_));
+  TF_RETURN_IF_ERROR(CheckLayouts(module));
 
   VLOG(3) << "After layout assignment:";
   XLA_VLOG_LINES(3, module->ToString());
diff --git a/tensorflow/compiler/xla/service/layout_assignment.h b/tensorflow/compiler/xla/service/layout_assignment.h
index 0b97fba744923b8afc3fb539566b68f1bca47d38..6bfae2998609c0482b91368f1891ce1e8e43fa23 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.h
+++ b/tensorflow/compiler/xla/service/layout_assignment.h
@@ -215,13 +215,62 @@ class LayoutConstraints {
   HloComputation* computation_;
 };
 
+// Contains constraints on the layout of channels; sends and recvs.
+class ChannelLayoutConstraints {
+ public:
+  // Construct an empty constraint set.
+  ChannelLayoutConstraints() {}
+
+  // Returns true if channel_id has a layout constraint.
+  bool IsChannelConstrained(int64 channel_id) const {
+    return constraints_.count(channel_id) > 0;
+  }
+
+  // Given `shape`, apply the layout for `channel_id`. `channel_id` must already
+  // be constrained.
+  Shape LayoutShapeForChannel(Shape shape, int64 channel_id) const {
+    CHECK(IsChannelConstrained(channel_id));
+    *shape.mutable_layout() = constraints_.at(channel_id);
+    return shape;
+  }
+
+  // Returns the layout constraint for `channel_id`, which must already be
+  // constrained.
+  Layout LayoutForChannel(int64 channel_id) const {
+    CHECK(IsChannelConstrained(channel_id));
+    return constraints_.at(channel_id);
+  }
+
+  // Adds a new layout constraint for `channel_id`. If a constraint for
+  // `channel_id` already exists, this operation requires that the new layout is
+  // the same as the previously constrained layout.
+  void ConstrainChannel(int64 channel_id, const Layout& layout) {
+    CHECK(!IsChannelConstrained(channel_id) ||
+          LayoutUtil::Equal(layout, constraints_[channel_id]));
+    constraints_[channel_id] = layout;
+  }
+
+ private:
+  std::unordered_map<int64, Layout> constraints_;
+};
+
 // HLO pass which assigns layouts to all instructions in the HLO module while
 // satisfying all necessary invariants and minimizing cost.
 class LayoutAssignment : public HloPassInterface {
  public:
   // entry_computation_layout is modified to populate a layout for the result in
   // the case that no particular layout is requested.
-  explicit LayoutAssignment(ComputationLayout* entry_computation_layout);
+  //
+  // channel_constraints is both an input and output. Any sends or recvs that
+  // are present in channel_constraints will be layed out as constrained. Any
+  // unconstrained sends or recvs will be layed out as locally optimal and their
+  // layout will be added as a constraint to channel_constraints.
+  //
+  // If channel_constraints is nullptr, no kSend or kRecvs must be contained
+  // within any module passed to `Run`.
+  explicit LayoutAssignment(
+      ComputationLayout* entry_computation_layout,
+      ChannelLayoutConstraints* channel_constraints = nullptr);
   ~LayoutAssignment() override {}
   tensorflow::StringPiece name() const override { return "layout-assignment"; }
 
@@ -247,6 +296,19 @@ class LayoutAssignment : public HloPassInterface {
       const ResultLayoutConstraint& layout_constraint,
       LayoutConstraints* constraints);
 
+  // By default LayoutAssignment ensures that inputs and outputs of CustomCalls
+  // have the "major-first" layout (i.e.  {n, n-1, ..., 0}).
+  //
+  // If this function returns true, LayoutAssignment does not set a layout for
+  // the given CustomCall.  It's up to the backend to set one in
+  // AddBackendConstraints, if necessary.
+  //
+  // Precondition: instruction->opcode() == HloOpcode::kCustomCall.
+  virtual bool CustomCallRequiresMajorFirstLayout(
+      const HloInstruction* /*instruction*/) {
+    return true;
+  }
+
   // Called after layouts of an instruction have been finalized to allow
   // subclasses to check for platform specific assumptions.
   virtual Status Verify(const HloInstruction* instruction) {
@@ -283,9 +345,10 @@ class LayoutAssignment : public HloPassInterface {
  private:
   // Adds constraints which must be satisfied for correctness on all
   // backends. Called once prior to propagating constraints.
-  Status AddMandatoryConstraints(const ComputationLayout& computation_layout,
-                                 HloComputation* computation,
-                                 LayoutConstraints* constraints);
+  Status AddMandatoryConstraints(
+      const ComputationLayout& computation_layout,
+      const ChannelLayoutConstraints* channel_constraints,
+      HloComputation* computation, LayoutConstraints* constraints);
 
   // This method can be overridden to add backend-specific constraints to the
   // layout of the instructions of a computation. This method is called after
@@ -301,7 +364,8 @@ class LayoutAssignment : public HloPassInterface {
   // constrained.
   Status RunOnComputation(const ComputationLayout& computation_layout,
                           const TuplePointsToAnalysis& points_to_analysis,
-                          HloComputation* computation);
+                          HloComputation* computation,
+                          ChannelLayoutConstraints* channel_constraints);
 
   // Assign layouts to the instructions of a computation which satisfy the given
   // layout constraints. Copies may be added to satisfy the constraints. The
@@ -315,7 +379,12 @@ class LayoutAssignment : public HloPassInterface {
   // required for correctness.
   Status PropagateConstraints(LayoutConstraints* constraints);
 
+  // Check that all layouts in the module have been set and satisfy all
+  // necessary conditions.
+  Status CheckLayouts(HloModule* module);
+
   ComputationLayout* entry_computation_layout_;
+  ChannelLayoutConstraints* channel_layout_constraints_;
 
  protected:
   // Map containing the layouts of all computations assigned so
diff --git a/tensorflow/compiler/xla/service/liveness_util_test.cc b/tensorflow/compiler/xla/service/liveness_util_test.cc
index 476e86fa72ad691cda52097c953ba15132f206a7..2c2a02f6375343d67dfb155bbb03729ff6e490d2 100644
--- a/tensorflow/compiler/xla/service/liveness_util_test.cc
+++ b/tensorflow/compiler/xla/service/liveness_util_test.cc
@@ -277,8 +277,11 @@ TEST_F(CanShareOperandBufferWithUserTest, FusedDotAdd) {
   auto b = builder.AddInstruction(HloInstruction::CreateConstant(
       Literal::CreateR2<float>({{2.0, 2.0}, {2.0, 2.0}})));
 
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
   auto dot = builder.AddInstruction(
-      HloInstruction::CreateBinary(data_shape, HloOpcode::kDot, a, b));
+      HloInstruction::CreateDot(data_shape, a, b, dot_dnums));
 
   auto one = builder.AddInstruction(
       HloInstruction::CreateConstant(Literal::CreateR0<float>(1.0)));
@@ -312,8 +315,11 @@ TEST_F(CanShareOperandBufferWithUserTest, FusedTransposeDotAdd) {
   auto b_t = builder.AddInstruction(
       HloInstruction::CreateTranspose(data_shape, b, {1, 0}));
 
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
   auto dot = builder.AddInstruction(
-      HloInstruction::CreateBinary(data_shape, HloOpcode::kDot, a, b_t));
+      HloInstruction::CreateDot(data_shape, a, b_t, dot_dnums));
 
   auto one = builder.AddInstruction(
       HloInstruction::CreateConstant(Literal::CreateR0<float>(1.0)));
diff --git a/tensorflow/compiler/xla/service/llvm_ir/BUILD b/tensorflow/compiler/xla/service/llvm_ir/BUILD
index d878061f724de1c82f8285b0f082d0be4d5778df..ffc78bd5cfac3df1001d8125327607c85169ae92 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/BUILD
+++ b/tensorflow/compiler/xla/service/llvm_ir/BUILD
@@ -48,6 +48,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service:hlo_module_config",
         "//tensorflow/compiler/xla/service:name_uniquer",
         "//tensorflow/core:lib",
         "@llvm//:core",
@@ -156,18 +157,6 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "vector_support_library",
-    srcs = ["vector_support_library.cc"],
-    hdrs = ["vector_support_library.h"],
-    deps = [
-        "//tensorflow/compiler/xla:types",
-        "//tensorflow/compiler/xla:xla_data_proto",
-        "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
-        "@llvm//:core",
-    ],
-)
-
 cc_library(
     name = "kernel_support_library",
     srcs = ["kernel_support_library.cc"],
diff --git a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc
index 7224bd689842d89563b374f3db3d4e314be18764..6384c7f46f5ebbedaeda232b40095611a5d738a4 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc
@@ -39,13 +39,27 @@ IrArray::Index::Index(llvm::Value* linear, const Shape& shape,
       << "Shape " << ShapeUtil::HumanStringWithLayout(shape)
       << " should have a layout.";
   int64 divisor = 1;
-  for (int64 dimension : layout_.minor_to_major()) {
+  for (int64 i = 0; i < layout_.minor_to_major_size(); ++i) {
+    int64 dimension = layout_.minor_to_major(i);
     int64 size_of_current_dimension = shape.dimensions(dimension);
-    // Emit IR instructions that compute
-    //   (linear_index / divisor) % current_dimension
-    multidim_[dimension] = ir_builder->CreateURem(
-        ir_builder->CreateUDiv(linear, ir_builder->getInt64(divisor)),
-        ir_builder->getInt64(size_of_current_dimension));
+
+    // If i is not the last dimension, compute
+    //   (linear_index / divisor) % current_dimension.
+    // If i is the last dimension, we can skip the mod, because we assume that
+    // linear is in bounds.
+    //
+    // TODO(jlebar): We could add bounds checks here and elsewhere in this file,
+    // guarded under some sort of xla-memcheck flag.  This might be particularly
+    // useful because cuda-memcheck can't help us much in XLA: Most of our
+    // memory lives in one big allocation, so cuda-memcheck can't detect
+    // out-of-bounds accesses.
+    auto* quot = ir_builder->CreateUDiv(linear, ir_builder->getInt64(divisor));
+    if (i < layout_.minor_to_major_size() - 1) {
+      multidim_[dimension] = ir_builder->CreateURem(
+          quot, ir_builder->getInt64(size_of_current_dimension));
+    } else {
+      multidim_[dimension] = quot;
+    }
     divisor *= size_of_current_dimension;
   }
 }
@@ -244,8 +258,8 @@ llvm::Value* IrArray::EmitArrayElementAddress(
   //
   //   getelementptr base_ptr_, 0, most major index, ..., most minor index
   std::vector<llvm::Value*> gep_indices(1, ir_builder->getInt64(0));
-  for (int64 i = shape_->layout().minor_to_major_size() - 1; i >= 0; --i) {
-    int64 dimension = shape_->layout().minor_to_major(i);
+  for (int64 i = 0; i < LayoutUtil::MinorToMajor(*shape_).size(); ++i) {
+    int64 dimension = LayoutUtil::Major(shape_->layout(), i);
     gep_indices.push_back(actual_index[dimension]);
   }
   return ir_builder->CreateInBoundsGEP(base_ptr_, gep_indices,
diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc
index 29cc0f81bd2c06538e28d1b593ee6a897fea0f27..23d2d4e87d26f4988ebddcf20f5a27af6a7fe0d6 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h"
 
 #include "tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h"
+#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
 
 namespace xla {
 void KernelSupportLibrary::For(
@@ -62,4 +63,72 @@ void KernelSupportLibrary::If(
   false_block_generator();
   llvm_ir::SetToLastInsertPoint(if_data.after_block, ir_builder_);
 }
+
+void KernelSupportLibrary::EmitAndCallOutlinedKernel(
+    bool enable_fast_math, bool optimize_for_size,
+    llvm::IRBuilder<>* ir_builder, tensorflow::StringPiece kernel_name,
+    KernelSupportLibrary::ArgumentVector arguments,
+    const std::function<void(KernelSupportLibrary::ArgumentVector)>&
+        kernel_body_generator) {
+  llvm::Module* module = ir_builder->GetInsertBlock()->getModule();
+  llvm::Function* function =
+      module->getFunction(llvm_ir::AsStringRef(kernel_name));
+
+  int64 null_arg_idx = -1;
+  std::vector<llvm::Value*> sanitized_args;
+  sanitized_args.reserve(arguments.size());
+  for (int64 i = 0, e = arguments.size(); i < e; i++) {
+    if (arguments[i]) {
+      sanitized_args.push_back(arguments[i]);
+    } else {
+      CHECK_EQ(null_arg_idx, -1);
+      null_arg_idx = i;
+    }
+  }
+
+  if (!function) {
+    VLOG(2) << "Generating kernel for " << kernel_name;
+    std::vector<llvm::Type*> arg_types;
+    std::transform(sanitized_args.begin(), sanitized_args.end(),
+                   std::back_inserter(arg_types),
+                   [](llvm::Value* arg) { return arg->getType(); });
+
+    auto* function_type = llvm::FunctionType::get(
+        ir_builder->getVoidTy(), arg_types, /*isVarArg=*/false);
+
+    function = llvm_ir::CreateFunction(
+        function_type, llvm::GlobalValue::InternalLinkage,
+        /*enable_fast_math=*/enable_fast_math,
+        /*optimize_for_size=*/optimize_for_size, kernel_name, module);
+
+    llvm::IRBuilder<>::InsertPointGuard guard(*ir_builder);
+
+    auto* entry_bb =
+        llvm::BasicBlock::Create(ir_builder->getContext(), "entry", function);
+    auto* return_inst = llvm::ReturnInst::Create(ir_builder->getContext(),
+                                                 /*retVal=*/nullptr, entry_bb);
+    // Set the insert point to before return_inst.
+    ir_builder->SetInsertPoint(return_inst);
+
+    std::vector<llvm::Value*> arg_values;
+    /*
+     * clang on OSX doesn't like std::transform or range for loop here.
+     * See https://github.com/tensorflow/tensorflow/issues/15196
+     */
+    for (llvm::Function::arg_iterator arg = function->arg_begin(),
+                                      arg_e = function->arg_end();
+         arg != arg_e; ++arg) {
+      arg_values.push_back(arg);
+    }
+    if (null_arg_idx != -1) {
+      arg_values.insert(arg_values.begin() + null_arg_idx, nullptr);
+    }
+    kernel_body_generator(arg_values);
+  } else {
+    VLOG(3) << "Re-using kernel for " << kernel_name;
+  }
+
+  ir_builder->CreateCall(function, llvm_ir::AsArrayRef(sanitized_args));
+}
+
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h
index 9bafb7b57740b7acd0286c113c8a0585c0f93689..827e092a3fa9116c461716b27c309033f7988745 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h
@@ -118,6 +118,60 @@ class KernelSupportLibrary {
           const std::function<void()>& true_block_generator,
           const std::function<void()>& false_block_generator = []() {});
 
+  using ArgumentVector = tensorflow::gtl::ArraySlice<llvm::Value*>;
+
+  // Generates the following control flow structure:
+  //
+  //  define @`kernel_name`(arg0, arg1, ... arg`arguments.size()`) {
+  //    kernel_body_generator({arg0, arg1, ... arg`arguments.size()`});
+  //  }
+  //
+  //  ...
+  //  call @`kernel_name`(arguments[0], arguments[1] ...)
+  //  ...
+  //
+  // If a function called `kernel_name` is already present in the module then
+  // that function is re-used.  In that sense we're using the llvm::Module as a
+  // cache of outlined kernels, keyed by function name.
+  //
+  // If any of the values in `arguments` is nullptr (i.e. a nullptr
+  // llvm::Value*) then we ignore it when generating LLVM IR, and instead pass
+  // in a nullptr llvm::Value* in its position to `kernel_body_generator`.
+  // Currently we only support at most one nullptr value in `arguments`.
+  static void EmitAndCallOutlinedKernel(
+      bool enable_fast_math, bool optimize_for_size,
+      llvm::IRBuilder<>* ir_builder, tensorflow::StringPiece kernel_name,
+      ArgumentVector arguments,
+      const std::function<void(ArgumentVector)>& kernel_body_generator);
+
+  // Thin wrappers around the more general EmitAndCallOutlinedKernel above.
+  static void EmitAndCallOutlinedKernel(
+      bool enable_fast_math, bool optimize_for_size,
+      llvm::IRBuilder<>* ir_builder, tensorflow::StringPiece kernel_name,
+      llvm::Value* arg0, llvm::Value* arg1, llvm::Value* arg2,
+      const std::function<void(llvm::Value*, llvm::Value*, llvm::Value*)>&
+          kernel_body_generator) {
+    EmitAndCallOutlinedKernel(
+        enable_fast_math, optimize_for_size, ir_builder, kernel_name,
+        {arg0, arg1, arg2}, [&](ArgumentVector args) {
+          kernel_body_generator(args[0], args[1], args[2]);
+        });
+  }
+
+  static void EmitAndCallOutlinedKernel(
+      bool enable_fast_math, bool optimize_for_size,
+      llvm::IRBuilder<>* ir_builder, tensorflow::StringPiece kernel_name,
+      llvm::Value* arg0, llvm::Value* arg1, llvm::Value* arg2,
+      llvm::Value* arg3,
+      const std::function<void(llvm::Value*, llvm::Value*, llvm::Value*,
+                               llvm::Value*)>& kernel_body_generator) {
+    EmitAndCallOutlinedKernel(
+        enable_fast_math, optimize_for_size, ir_builder, kernel_name,
+        {arg0, arg1, arg2, arg3}, [&](ArgumentVector args) {
+          kernel_body_generator(args[0], args[1], args[2], args[3]);
+        });
+  }
+
  private:
   llvm::IRBuilder<>* ir_builder_;
   bool prevent_unrolling_;
diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc
index cd0c4a371e2b1cd0e1c52b77e47e8b081ab8e836..d2bcb38d09218c72183c7cece95bef6371006555 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc
@@ -142,6 +142,13 @@ llvm::Type* PrimitiveTypeToIrType(PrimitiveType element_type,
       return llvm::Type::getInt8Ty(module->getContext());
     case S16:
     case U16:
+    case BF16:
+      // For BF16 we just need some type that is 16 bits wide so that it will
+      // take up the right amount of space in memory. LLVM does not have a BF16
+      // type (the LLVM half type is IEEE 16 bit floating point, not bfloat), so
+      // we can't map it directly to an LLVM type. We will not map a BF16
+      // addition to an addition on this type (int16) - this is just the type
+      // used for storage.
       return llvm::Type::getInt16Ty(module->getContext());
     case S32:
     case U32:
@@ -200,8 +207,8 @@ llvm::Type* ShapeToIrType(const Shape& shape, llvm::Module* module) {
   if (ShapeUtil::IsTuple(shape)) {
     // A tuple buffer is an array of pointers.
     result_type = llvm::ArrayType::get(result_type, shape.tuple_shapes_size());
-  } else {
-    for (int64 dimension : shape.layout().minor_to_major()) {
+  } else if (ShapeUtil::IsArray(shape)) {
+    for (int64 dimension : LayoutUtil::MinorToMajor(shape)) {
       result_type =
           llvm::ArrayType::get(result_type, shape.dimensions(dimension));
     }
@@ -280,6 +287,11 @@ llvm::Constant* LiteralToConstant(const Literal& literal, int64 dimension_index,
         value = llvm::ConstantFP::get(ir_element_type,
                                       literal.Get<float>(*multi_index));
         break;
+      case BF16:
+        value = llvm::ConstantInt::get(
+            ir_element_type,
+            tensorflow::bit_cast<uint16>(literal.Get<bfloat16>(*multi_index)));
+        break;
       case F64:
         value = llvm::ConstantFP::get(ir_element_type,
                                       literal.Get<double>(*multi_index));
@@ -304,7 +316,7 @@ llvm::Constant* LiteralToConstant(const Literal& literal, int64 dimension_index,
   // decrements with each recursive call. We want to iterate through the
   // dimensions in major-to-minor order as we recurse so just index into
   // minor_to_major to get the dimension number for this level of the recursion.
-  int64 dimension = shape.layout().minor_to_major(dimension_index);
+  int64 dimension = LayoutUtil::Minor(shape.layout(), dimension_index);
 
   // Recursively call LiteralToConstant to construct subarrays for the
   // more-minor dimensions. Gather the subarrays into a vector for bundling into
@@ -320,7 +332,7 @@ llvm::Constant* LiteralToConstant(const Literal& literal, int64 dimension_index,
   if (elements.empty()) {
     element_type = ir_element_type;
     for (int i = 0; i < dimension_index; ++i) {
-      int64 index = shape.layout().minor_to_major(i);
+      int64 index = LayoutUtil::Minor(shape.layout(), i);
       element_type =
           llvm::ArrayType::get(element_type, shape.dimensions(index));
     }
@@ -676,5 +688,58 @@ Status DumpIRToDirectory(const string& directory_name,
   return f->Close();
 }
 
+llvm::Function* CreateFunction(llvm::FunctionType* function_type,
+                               llvm::GlobalValue::LinkageTypes linkage,
+                               bool enable_fast_math, bool optimize_for_size,
+                               tensorflow::StringPiece name,
+                               llvm::Module* module) {
+  llvm::Function* function =
+      llvm::Function::Create(function_type, linkage, AsStringRef(name), module);
+  function->setCallingConv(llvm::CallingConv::C);
+  function->addFnAttr("no-frame-pointer-elim", "false");
+
+  if (enable_fast_math) {
+    function->addFnAttr("unsafe-fp-math", "true");
+    function->addFnAttr("no-infs-fp-math", "true");
+    function->addFnAttr("no-nans-fp-math", "true");
+    function->addFnAttr("no-signed-zeros-fp-math", "true");
+  }
+
+  // Add the optize attribute to the function if optimizing for size. This
+  // controls internal behavior of some optimization passes (e.g. loop
+  // unrolling).
+  if (optimize_for_size) {
+    function->addFnAttr(llvm::Attribute::OptimizeForSize);
+  }
+
+  return function;
+}
+
+void InitializeLLVMCommandLineOptions(const HloModuleConfig& config) {
+  auto options = config.debug_options().xla_backend_extra_options();
+  if (!options.empty()) {
+    std::vector<string> fake_argv_storage;
+    fake_argv_storage.push_back("");
+    for (const auto& it : options) {
+      // Skip options the XLA backend itself consumes.
+      if (!tensorflow::StringPiece(it.first).starts_with("xla_")) {
+        if (it.second.empty()) {
+          fake_argv_storage.push_back(it.first);
+        } else {
+          fake_argv_storage.push_back(it.first + "=" + it.second);
+        }
+      }
+    }
+
+    VLOG(2) << "Passing argv to LLVM:";
+    std::vector<const char*> fake_argv;
+    for (const auto& s : fake_argv_storage) {
+      fake_argv.push_back(s.c_str());
+      VLOG(2) << s;
+    }
+    llvm::cl::ParseCommandLineOptions(fake_argv.size(), &fake_argv[0]);
+  }
+}
+
 }  // namespace llvm_ir
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h
index 063ead2b647d8fc5cc4f67004aaded80a2191fe9..4a10ec466dae6fdb56546fb8d8b353dcff6a5b8d 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h
@@ -29,6 +29,7 @@ limitations under the License.
 #include "llvm/Support/raw_ostream.h"
 #include "tensorflow/compiler/xla/literal_util.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_module_config.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
@@ -281,6 +282,16 @@ Status DumpIRToDirectory(const string& directory_name,
                          const string& hlo_module_name,
                          const llvm::Module& llvm_module, bool optimized);
 
+llvm::Function* CreateFunction(llvm::FunctionType* function_type,
+                               llvm::GlobalValue::LinkageTypes linkage,
+                               bool enable_fast_math, bool optimize_for_size,
+                               tensorflow::StringPiece name,
+                               llvm::Module* module);
+
+// Extracts the xla_backend_extra_options from `config` and passes those that
+// don't start with xla_ to LLVM.
+void InitializeLLVMCommandLineOptions(const HloModuleConfig& config);
+
 }  // namespace llvm_ir
 }  // namespace xla
 
diff --git a/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.cc b/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.cc
index 6fa4cd08c9e0ac30b83c0e2b49d98d930c2e15df..a5f7c850c33757fe8d48567ade35544d81224e46 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.cc
@@ -99,8 +99,8 @@ IrArray::Index LoopEmitter::EmitIndexAndSetExitBasicBlock(
   // dimension (of the target shape).
   ForLoopNest loop_nest(loop_name, ir_builder_);
   IrArray::Index array_index(shape_.dimensions_size());
-  for (int i = shape_.layout().minor_to_major_size() - 1; i >= 0; --i) {
-    int64 dimension = shape_.layout().minor_to_major(i);
+  for (int i = 0; i < LayoutUtil::MinorToMajor(shape_).size(); ++i) {
+    int64 dimension = LayoutUtil::Major(shape_.layout(), i);
     std::unique_ptr<ForLoop> loop = loop_nest.AddLoop(
         /*start_index=*/0,
         /*end_index=*/shape_.dimensions(dimension),
diff --git a/tensorflow/compiler/xla/service/local_service.cc b/tensorflow/compiler/xla/service/local_service.cc
index 06f43bd3cb2376d34a3104133c868c4f4e5cc730..2194d24257d0ccd04f3c9625412116eba01acd8c 100644
--- a/tensorflow/compiler/xla/service/local_service.cc
+++ b/tensorflow/compiler/xla/service/local_service.cc
@@ -84,15 +84,30 @@ StatusOr<std::unique_ptr<Executable>> LocalService::CompileExecutable(
   // Validate incoming layouts.
   if (argument_layouts.size() != program_shape->parameters_size()) {
     return InvalidArgument(
-        "invalid number of arguments for computation: expected %d, got %zu",
+        "Invalid number of arguments for computation: expected %d, got %zu.",
         program_shape->parameters_size(), argument_layouts.size());
   }
   for (int i = 0; i < argument_layouts.size(); ++i) {
     const Shape& argument_shape = *argument_layouts[i];
     TF_RETURN_IF_ERROR(ShapeUtil::ValidateShape(argument_shape));
     if (!ShapeUtil::Compatible(argument_shape, program_shape->parameters(i))) {
+      tensorflow::gtl::optional<const OpMetadata*> metadata =
+          user_computation->ParameterMetadata(i);
+      auto metadata_string = [&metadata]() -> string {
+        if (!metadata.has_value()) {
+          return "";
+        }
+        CHECK(metadata.value() != nullptr);
+        const OpMetadata& m = *metadata.value();
+        if (!m.source_file().empty()) {
+          return tensorflow::strings::Printf(
+              " (%s:%d)", m.source_file().c_str(), m.source_line());
+        }
+        return "";
+      };
       return InvalidArgument(
-          "invalid argument shape for argument %d, expected %s, got %s", i,
+          "Invalid argument shape for argument %d%s, expected %s, got %s.", i,
+          metadata_string().c_str(),
           ShapeUtil::HumanString(program_shape->parameters(i)).c_str(),
           ShapeUtil::HumanString(argument_shape).c_str());
     }
@@ -118,10 +133,14 @@ StatusOr<std::unique_ptr<Executable>> LocalService::CompileExecutable(
   TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor,
                       execute_backend_->stream_executor(device_ordinal));
 
-  std::vector<perftools::gputools::DeviceMemoryBase> argument_buffers(
-      argument_layouts.size());
   return BuildExecutable(versioned_handle, std::move(module_config),
-                         argument_buffers, execute_backend_.get(), executor);
+                         execute_backend_.get(), executor);
+}
+
+StatusOr<int> LocalService::ReplicaNumberToDeviceOrdinal(int replica_number) {
+  return backend().computation_placer()->DeviceId(
+      replica_number, /*computation=*/0, options_.number_of_replicas(),
+      /*computation_count=*/1);
 }
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/local_service.h b/tensorflow/compiler/xla/service/local_service.h
index 52c4346385eb663baa6e7579d7b3883ba084205b..acbc7268252881958190f416ab936d64430166e1 100644
--- a/tensorflow/compiler/xla/service/local_service.h
+++ b/tensorflow/compiler/xla/service/local_service.h
@@ -47,6 +47,13 @@ class LocalService : public Service {
       const tensorflow::gtl::ArraySlice<const Shape*> argument_layouts,
       const Shape* result_layout, int device_ordinal);
 
+  // Returns the device ordinal that corresponds to the given replica number.
+  //
+  // This returns an error if there is not a one-to-one correspondence of
+  // replicas to device ordinals, but is useful as a short term mechanism for
+  // the "easy" case where a single replica is a single device.
+  StatusOr<int> ReplicaNumberToDeviceOrdinal(int replica_number);
+
  private:
   explicit LocalService(const ServiceOptions& options,
                         std::unique_ptr<Backend> backend);
diff --git a/tensorflow/compiler/xla/service/name_uniquer.cc b/tensorflow/compiler/xla/service/name_uniquer.cc
index a0d08c288dbcc45e83a36ce7b094b04a9dbae532..7d8c05fffa4ab11d7dbf9956d2cb7ebd5bcdd3c4 100644
--- a/tensorflow/compiler/xla/service/name_uniquer.cc
+++ b/tensorflow/compiler/xla/service/name_uniquer.cc
@@ -17,12 +17,44 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace xla {
 
+namespace {
+
+bool IsAllowed(char character) {
+  auto c = static_cast<unsigned char>(character);
+  return (isalnum(c) != 0) || c == '_' || c == '.' || c == '-';
+}
+
+}  // namespace
+
+NameUniquer::NameUniquer(const string& separator) {
+  CHECK(std::all_of(separator.begin(), separator.end(), IsAllowed))
+      << "separator should comprises allowed characters only";
+  separator_ = separator;
+}
+
+/*static*/ string NameUniquer::GetSanitizedName(const string& name) {
+  string result = name;
+  CHECK(!result.empty()) << "name should not be empty";
+  char c = static_cast<unsigned char>(result[0]);
+  if (!isalpha(c) && c != '_') {
+    result[0] = '_';
+  }
+  for (int i = 1; i < result.length(); i++) {
+    if (!IsAllowed(result[i])) {
+      result[i] = '_';
+    }
+  }
+  return result;
+}
+
 string NameUniquer::GetUniqueName(tensorflow::StringPiece prefix) {
   string root = prefix.empty() ? "name" : prefix.ToString();
+  root = GetSanitizedName(root);
 
   // Strip away numeric suffix (if any). Only recognize separator if it is in
   // the middle of the name.
diff --git a/tensorflow/compiler/xla/service/name_uniquer.h b/tensorflow/compiler/xla/service/name_uniquer.h
index ed379b52258463b960dea788721c2c4325ef0260..4139c2700b25e8600182a034a8ac6f4f041c12e6 100644
--- a/tensorflow/compiler/xla/service/name_uniquer.h
+++ b/tensorflow/compiler/xla/service/name_uniquer.h
@@ -28,14 +28,21 @@ namespace xla {
 // Simple stateful class that helps generate "unique" names. To use it, simply
 // call GetUniqueName as many times as needed. The names returned by
 // GetUniqueName are guaranteed to be distinct for this instance of the class.
+// Note that the names will be sanitized to match regexp
+// "[a-zA-Z_][a-zA-Z0-9_.-]*".
 class NameUniquer {
  public:
-  explicit NameUniquer(const string& separator = "__")
-      : separator_(separator) {}
+  // The separator must contain allowed characters only: "[a-zA-Z0-9_.-]".
+  explicit NameUniquer(const string& separator = "__");
 
-  // Get a unique name in a string, with an optional prefix for convenience.
+  // Get a sanitized unique name in a string, with an optional prefix for
+  // convenience.
   string GetUniqueName(tensorflow::StringPiece prefix = "");
 
+  // Sanitizes and returns the name. Unallowed characters will be replaced with
+  // '_'. The result will match the regexp "[a-zA-Z_][a-zA-Z0-9_.-]*".
+  static string GetSanitizedName(const string& name);
+
  private:
   // The string to use to separate the prefix of the name from the uniquing
   // integer value.
diff --git a/tensorflow/compiler/xla/service/name_uniquer_test.cc b/tensorflow/compiler/xla/service/name_uniquer_test.cc
index 9f0747a6e2175a968d8f3661ac51512009e86f29..4258cf16876ab46dce6df062ab701b1b1a4a7580 100644
--- a/tensorflow/compiler/xla/service/name_uniquer_test.cc
+++ b/tensorflow/compiler/xla/service/name_uniquer_test.cc
@@ -60,12 +60,30 @@ TEST_F(NameUniquerTest, NumericSuffixes) {
   EXPECT_EQ("bar", uniquer.GetUniqueName("bar.-1000"));
   EXPECT_EQ("bar.1", uniquer.GetUniqueName("bar.-2000"));
   EXPECT_EQ("bar.2", uniquer.GetUniqueName("bar.1"));
+}
+
+TEST_F(NameUniquerTest, Sanitize) {
+  NameUniquer uniquer("_");
+
+  EXPECT_EQ("foo", uniquer.GetUniqueName("foo"));
+  EXPECT_EQ("foo_1", uniquer.GetUniqueName("foo"));
+  EXPECT_EQ("foo.54", uniquer.GetUniqueName("foo.54"));
+  EXPECT_EQ("foo_54", uniquer.GetUniqueName("foo_54"));
+  EXPECT_EQ("foo_54.1", uniquer.GetUniqueName("foo_54.1"));
+  EXPECT_EQ("foo_55", uniquer.GetUniqueName("foo"));
+
+  // Invalid characters will be replaced with '_'.
+  EXPECT_EQ("bar", uniquer.GetUniqueName("bar<-1000"));
+  EXPECT_EQ("bar_1", uniquer.GetUniqueName("bar<-2000"));
+  EXPECT_EQ("bar_2", uniquer.GetUniqueName("bar_1"));
 
   // Separator is only recognized in the middle of the prefix.
-  EXPECT_EQ(".10", uniquer.GetUniqueName(".10"));
-  EXPECT_EQ(".10.1", uniquer.GetUniqueName(".10"));
-  EXPECT_EQ("foobar.", uniquer.GetUniqueName("foobar."));
-  EXPECT_EQ("foobar..1", uniquer.GetUniqueName("foobar."));
+  EXPECT_EQ("_10", uniquer.GetUniqueName(
+                       ".10"));  // the leading '.' is replaced with '_'.
+  EXPECT_EQ("_10_1", uniquer.GetUniqueName(".10"));
+  EXPECT_EQ("_10_2", uniquer.GetUniqueName("_10"));
+  EXPECT_EQ("foobar_", uniquer.GetUniqueName("foobar_"));
+  EXPECT_EQ("foobar__1", uniquer.GetUniqueName("foobar_"));
 }
 
 }  // namespace
diff --git a/tensorflow/compiler/xla/service/platform_util.cc b/tensorflow/compiler/xla/service/platform_util.cc
index 63f3bfb36cedeb44b190e1e8a5584d334f94b585..aa974ee61a27de9c19e97d8a6eb48f9261ce4bd9 100644
--- a/tensorflow/compiler/xla/service/platform_util.cc
+++ b/tensorflow/compiler/xla/service/platform_util.cc
@@ -33,10 +33,32 @@ namespace se = ::perftools::gputools;
 
 namespace xla {
 
+using tensorflow::str_util::Lowercase;
+
 // Minimum supported CUDA compute capability is 3.5.
 constexpr int kMinCudaComputeCapabilityMajor = 3;
 constexpr int kMinCudaComputeCapabilityMinor = 5;
 
+// The name of the interpreter platform.
+constexpr char kInterpreter[] = "interpreter";
+
+namespace {
+
+string CanonicalPlatformName(const string& name) {
+  string platform_str = Lowercase(name);
+  // "cpu" and "host" mean the same thing.
+  if (platform_str == "cpu") {
+    platform_str = "host";
+  }
+  // "gpu" and "cuda" mean the same thing.
+  if (platform_str == "gpu") {
+    platform_str = "cuda";
+  }
+  return platform_str;
+}
+
+}  // namespace
+
 /* static */ StatusOr<std::vector<se::Platform*>>
 PlatformUtil::GetSupportedPlatforms() {
   se::MultiPlatformManager::PlatformMap platform_map;
@@ -78,7 +100,7 @@ PlatformUtil::GetSupportedPlatforms() {
   return platforms;
 }
 
-/* static */ StatusOr<se::Platform*> PlatformUtil::GetDefaultPlatform() {
+/* static */ StatusOr<se::Platform*> PlatformUtil::GetSolePlatform() {
   TF_ASSIGN_OR_RETURN(auto platforms, GetSupportedPlatforms());
   if (platforms.empty()) {
     return NotFound("no platforms found");
@@ -87,26 +109,42 @@ PlatformUtil::GetSupportedPlatforms() {
   }
 
   // Multiple platforms present and we can't pick a reasonable default.
-  auto l = [](string* out, const se::Platform* p) { out->append(p->Name()); };
-  string platforms_string = tensorflow::str_util::Join(platforms, ", ", l);
+  string platforms_string = tensorflow::str_util::Join(
+      platforms, ", ",
+      [](string* out, const se::Platform* p) { out->append(p->Name()); });
   return InvalidArgument(
       "must specify platform because more than one platform found: %s",
       platforms_string.c_str());
 }
 
-/*static*/ StatusOr<se::Platform*> PlatformUtil::GetPlatform(
-    const string& platform_name) {
-  using tensorflow::str_util::Lowercase;
-  string platform_str = Lowercase(platform_name);
-  // "cpu" and "host" mean the same thing.
-  if (platform_str == "cpu") {
-    platform_str = "host";
-  }
-  // "gpu" and "cuda" mean the same thing.
-  if (platform_str == "gpu") {
-    platform_str = "cuda";
+/* static */ StatusOr<se::Platform*> PlatformUtil::GetDefaultPlatform() {
+  TF_ASSIGN_OR_RETURN(auto platforms, GetSupportedPlatforms());
+  if (platforms.empty()) {
+    return NotFound("no platforms found");
+  } else if (platforms.size() == 1) {
+    return platforms[0];
+  } else if (platforms.size() == 2) {
+    for (int i = 0; i < 2; i++) {
+      if (Lowercase(platforms[i]->Name()) == kInterpreter &&
+          Lowercase(platforms[1 - i]->Name()) != kInterpreter) {
+        return platforms[1 - i];
+      }
+    }
   }
 
+  // Multiple platforms present and we can't pick a reasonable default.
+  string platforms_string = tensorflow::str_util::Join(
+      platforms, ", ",
+      [](string* out, const se::Platform* p) { out->append(p->Name()); });
+  return InvalidArgument(
+      "must specify platform because more than one platform (except for the "
+      "interpreter platform) found: %s",
+      platforms_string.c_str());
+}
+
+/*static*/ StatusOr<se::Platform*> PlatformUtil::GetPlatform(
+    const string& platform_name) {
+  string platform_str = CanonicalPlatformName(platform_name);
   TF_ASSIGN_OR_RETURN(auto platforms, PlatformUtil::GetSupportedPlatforms());
   for (se::Platform* platform : platforms) {
     if (Lowercase(platform->Name()) == platform_str) {
@@ -116,6 +154,32 @@ PlatformUtil::GetSupportedPlatforms() {
   return InvalidArgument("platform %s not found", platform_name.c_str());
 }
 
+/*static*/ StatusOr<se::Platform*> PlatformUtil::GetPlatformExceptFor(
+    const string& platform_name) {
+  string platform_str = CanonicalPlatformName(platform_name);
+
+  TF_ASSIGN_OR_RETURN(auto platforms, PlatformUtil::GetSupportedPlatforms());
+  std::vector<se::Platform*> matched;
+  for (se::Platform* platform : platforms) {
+    if (Lowercase(platform->Name()) != platform_name) {
+      matched.push_back(platform);
+    }
+  }
+  if (matched.empty()) {
+    return InvalidArgument("unable to find platform that is not %s",
+                           platform_name.c_str());
+  }
+  if (matched.size() == 1) {
+    return matched[0];
+  }
+  string matched_string = tensorflow::str_util::Join(
+      matched, ", ",
+      [](string* out, const se::Platform* p) { out->append(p->Name()); });
+  return InvalidArgument(
+      "found multiple platforms %s, but expected one platform except for %s",
+      matched_string.c_str(), platform_name.c_str());
+}
+
 // Returns whether the device underlying the given StreamExecutor is supported
 // by XLA.
 static bool IsDeviceSupported(se::StreamExecutor* executor) {
diff --git a/tensorflow/compiler/xla/service/platform_util.h b/tensorflow/compiler/xla/service/platform_util.h
index a59d4ffe87f568ac786e4b2d3bf6983bc0d4695a..69188820a70707d9c9be10b20fb7de92ad4d9873 100644
--- a/tensorflow/compiler/xla/service/platform_util.h
+++ b/tensorflow/compiler/xla/service/platform_util.h
@@ -37,16 +37,28 @@ class PlatformUtil {
   static StatusOr<std::vector<perftools::gputools::Platform*>>
   GetSupportedPlatforms();
 
-  // Convenience function which returns the default supported platform. If
+  // Convenience function which returns the default supported platform for
+  // tests. If exactly one supported platform is present, then this platform is
+  // the default platform. If exactly two platforms are present and one of them
+  // is the interpreter platform, then the other platform is the default
+  // platform. Otherwise returns an error.
+  static StatusOr<perftools::gputools::Platform*> GetDefaultPlatform();
+
+  // Convenience function which returns the sole supported platform. If
   // exactly one supported platform is present, then this platform is the
   // default platform. Otherwise returns an error.
-  static StatusOr<perftools::gputools::Platform*> GetDefaultPlatform();
+  static StatusOr<perftools::gputools::Platform*> GetSolePlatform();
 
   // Returns the platform according to the given name. Returns error if there is
   // no such platform.
   static StatusOr<perftools::gputools::Platform*> GetPlatform(
       const string& platform_name);
 
+  // Returns exactly one platform that does not have given name. Returns error
+  // if there is no such platform, or there are multiple such platforms.
+  static StatusOr<perftools::gputools::Platform*> GetPlatformExceptFor(
+      const string& platform_name);
+
   // Returns a vector of StreamExecutors for the given platform. The vector is
   // indexed by device ordinal (device numbering used by StreamExecutor). If an
   // element is nullptr, then the device is present by not supported by XLA.
diff --git a/tensorflow/compiler/xla/service/reshape_mover.cc b/tensorflow/compiler/xla/service/reshape_mover.cc
index 0fb90230f2f39a841973361f63d17af579a1342b..e62bafc50b0e1270702621c9ea7b2ee43e001fe0 100644
--- a/tensorflow/compiler/xla/service/reshape_mover.cc
+++ b/tensorflow/compiler/xla/service/reshape_mover.cc
@@ -101,8 +101,9 @@ HloInstruction* FirstNonScalarAndNonTrivialReshapeOperand(
         IsReshapeOrTranspose(operand) &&
         !CanTriviallyChangeShape(operand->operand(0))) {
       VLOG(5) << "Found first non-scalar and non-trivial reshape operand of "
-              << hlo->ToStringNoMetadata() << ":\n\t"
-              << operand->ToStringNoMetadata();
+              << hlo->ToString(HloPrintOptions().set_print_metadata(false))
+              << ":\n\t"
+              << operand->ToString(HloPrintOptions().set_print_metadata(false));
       return operand;
     }
   }
@@ -133,8 +134,9 @@ bool AreEquivalentReshapes(const HloInstruction* a, const HloInstruction* b) {
 bool AllOperandsHaveEasyShapeChanges(
     const HloInstruction* instruction,
     const HloInstruction* first_reshape_operand) {
+  auto print_no_metadata = HloPrintOptions().set_print_metadata(false);
   VLOG(3) << "** Checking whether all operands have easy shape changes: "
-          << instruction->ToStringNoMetadata();
+          << instruction->ToString(print_no_metadata);
   // Check whether all operands:
   //    0. Have the same dimensions as the output -- if not, it may be
   //       implicitly broadcast, which can confound the movement's
@@ -151,21 +153,21 @@ bool AllOperandsHaveEasyShapeChanges(
       VLOG(5) << "Operand shape differs from output shape; may be "
                  "implicitly broadcast, so preventing "
                  "movement\n\toperand: "
-              << operand->ToStringNoMetadata()
-              << "\n\tinstruction: " << instruction->ToStringNoMetadata();
+              << operand->ToString(print_no_metadata) << "\n\tinstruction: "
+              << instruction->ToString(print_no_metadata);
       return false;
     }
 
     if (AreEquivalentReshapes(first_reshape_operand, operand)) {
       VLOG(5) << "Are equivalent reshapes:\n\tfirst_reshape_operand: "
-              << first_reshape_operand->ToStringNoMetadata()
-              << "\n\toperand: " << operand->ToStringNoMetadata();
+              << first_reshape_operand->ToString(print_no_metadata)
+              << "\n\toperand: " << operand->ToString(print_no_metadata);
       continue;
     }
 
     if (CanTriviallyChangeShape(operand)) {
       VLOG(5) << "Operand can trivially change shape: "
-              << operand->ToStringNoMetadata();
+              << operand->ToString(print_no_metadata);
       continue;
     }
 
@@ -173,12 +175,12 @@ bool AllOperandsHaveEasyShapeChanges(
     // well.
     VLOG(5) << "Operand is neither equalivant to the first Reshape operand"
                "nor can trivially change shape: "
-            << operand->ToStringNoMetadata();
+            << operand->ToString(print_no_metadata);
     return false;
   }
 
   VLOG(3) << "All operands have easy shape changes: "
-          << instruction->ToStringNoMetadata();
+          << instruction->ToString(print_no_metadata);
   return true;
 }
 
@@ -250,11 +252,13 @@ StatusOr<bool> TrySinkReshapeOrTranspose(HloComputation* computation,
     return false;
   }
 
+  auto print_no_metadata = HloPrintOptions().set_print_metadata(false);
   // At this point we've decided to sink reshape/transpose operands.
   const Shape& new_operand_shape = first_reshape_operand->operand(0)->shape();
   VLOG(3) << "** Sinking reshape or transpose: "
-          << instruction->ToStringNoMetadata() << "\n\tfirst reshape operand: "
-          << first_reshape_operand->ToStringNoMetadata()
+          << instruction->ToString(print_no_metadata)
+          << "\n\tfirst reshape operand: "
+          << first_reshape_operand->ToString(print_no_metadata)
           << "\n\tnew operand shape: "
           << ShapeUtil::HumanString(new_operand_shape);
 
@@ -267,7 +271,7 @@ StatusOr<bool> TrySinkReshapeOrTranspose(HloComputation* computation,
       continue;
     }
     VLOG(3) << "Updating operand #" << i << ": "
-            << operands[i]->ToStringNoMetadata();
+            << operands[i]->ToString(print_no_metadata);
     operands[i] = UpdateOperand(computation, first_reshape_operand,
                                 new_operand_shape, operands[i]);
   }
@@ -298,7 +302,7 @@ StatusOr<bool> TrySinkReshapeOrTranspose(HloComputation* computation,
   switch (first_reshape_operand->opcode()) {
     case HloOpcode::kReshape:
       VLOG(3) << "Creating new reshape for new elementwise op: "
-              << new_elementwise->ToStringNoMetadata();
+              << new_elementwise->ToString(print_no_metadata);
       new_reshape =
           HloInstruction::CreateReshape(instruction->shape(), new_elementwise);
       break;
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index d997cab83f8c2bc74632e49f23e690ffb17b901a..fc848bdb036125e5dadb471be431d3d2523c6770 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -60,41 +60,32 @@ namespace xla {
 
 namespace {
 
-// Copies the contents of an Allocation into a Literal proto.
-tensorflow::Status LiteralFromAllocation(const Allocation* allocation,
-                                         const Shape& literal_shape,
-                                         Literal* literal) {
-  TF_ASSIGN_OR_RETURN(
-      se::StreamExecutor * executor,
-      allocation->backend()->stream_executor(allocation->device_ordinal()));
-  return allocation->backend()->transfer_manager()->TransferLiteralFromDevice(
-      executor, allocation->device_memory(), allocation->shape(), literal_shape,
-      literal);
-}
-
 // Records the arguments used to invoke a computation in a SessionModule
 // proto.
 tensorflow::Status RecordArguments(
-    const tensorflow::gtl::ArraySlice<const Allocation*> arg_allocations,
+    const tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
+    se::StreamExecutor* executor, TransferManager* transfer_manager,
     SessionModule* module) {
   module->clear_arguments();
-  for (const Allocation* allocation : arg_allocations) {
-    Literal argument;
-    TF_RETURN_IF_ERROR(
-        LiteralFromAllocation(allocation, allocation->shape(), &argument));
-    *module->add_arguments() = argument.ToProto();
+  for (const ShapedBuffer* argument : arguments) {
+    TF_ASSIGN_OR_RETURN(
+        std::unique_ptr<Literal> literal,
+        transfer_manager->TransferLiteralFromDevice(executor, *argument));
+    *module->add_arguments() = literal->ToProto();
   }
   return tensorflow::Status::OK();
 }
 
 // Records the result of a computation in a SessionModule proto.
-tensorflow::Status RecordResult(const Allocation* result_allocation,
+tensorflow::Status RecordResult(const ShapedBuffer& result,
+                                se::StreamExecutor* executor,
+                                TransferManager* transfer_manager,
                                 SessionModule* module) {
   module->clear_result();
-  Literal result;
-  TF_RETURN_IF_ERROR(LiteralFromAllocation(
-      result_allocation, result_allocation->shape(), &result));
-  *module->mutable_result() = result.ToProto();
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<Literal> literal,
+      transfer_manager->TransferLiteralFromDevice(executor, result));
+  *module->mutable_result() = literal->ToProto();
   return tensorflow::Status::OK();
 }
 
@@ -152,7 +143,9 @@ int ServiceOptions::intra_op_parallelism_threads() const {
 
 Service::Service(const ServiceOptions& options,
                  std::unique_ptr<Backend> execute_backend)
-    : options_(options), execute_backend_(std::move(execute_backend)) {
+    : options_(options),
+      allocation_tracker_(execute_backend.get()),
+      execute_backend_(std::move(execute_backend)) {
   CHECK_GT(options_.number_of_replicas(), 0);
   if (execute_backend_) {
     if (execute_backend_->device_count() > 0) {
@@ -235,35 +228,33 @@ tensorflow::Status Service::ValidateResultShapeWithLayout(
   return ShapeUtil::ValidateShape(shape_with_layout);
 }
 
-StatusOr<std::vector<const Allocation*>> Service::ResolveAndValidateArguments(
+StatusOr<std::vector<const ShapedBuffer*>> Service::ResolveAndValidateArguments(
     tensorflow::gtl::ArraySlice<const GlobalDataHandle*> arguments,
-    const Backend* backend, int device_ordinal) {
-  std::vector<const Allocation*> allocations;
+    int device_ordinal) {
+  std::vector<const ShapedBuffer*> shaped_buffers;
   for (size_t i = 0; i < arguments.size(); ++i) {
-    auto allocation_status = allocation_tracker_.Resolve(*arguments[i]);
-    if (!allocation_status.ok()) {
-      return Status(allocation_status.status().code(),
-                    StrCat(allocation_status.status().error_message(), ", ",
+    auto buffer_status = allocation_tracker_.Resolve(*arguments[i]);
+    if (!buffer_status.ok()) {
+      return Status(buffer_status.status().code(),
+                    StrCat(buffer_status.status().error_message(), ", ",
                            "failed to resolve allocation for parameter ", i));
     }
-    const Allocation* allocation = allocation_status.ValueOrDie();
+    const ShapedBuffer* shaped_buffer = buffer_status.ValueOrDie();
 
     // Verify allocation is same platform and device as the execution.
-    if (allocation->backend() != backend ||
-        allocation->device_ordinal() != device_ordinal) {
+    if (shaped_buffer->platform() != execute_backend_->platform() ||
+        shaped_buffer->device_ordinal() != device_ordinal) {
       return InvalidArgument(
-          "argument %lu is on device %s but computation will be executed "
+          "argument %lu is on device %s:%d but computation will be executed "
           "on device %s",
-          i,
-          allocation->backend()
-              ->device_name(allocation->device_ordinal())
-              .c_str(),
-          backend->device_name(device_ordinal).c_str());
+          i, shaped_buffer->platform()->Name().c_str(),
+          shaped_buffer->device_ordinal(),
+          execute_backend_->device_name(device_ordinal).c_str());
     }
 
-    allocations.push_back(allocation);
+    shaped_buffers.push_back(shaped_buffer);
   }
-  return allocations;
+  return shaped_buffers;
 }
 
 StatusOr<std::unique_ptr<HloModuleConfig>> Service::CreateModuleConfig(
@@ -325,11 +316,11 @@ StatusOr<std::unique_ptr<HloModuleConfig>> Service::CreateModuleConfig(
 
 StatusOr<std::unique_ptr<HloModuleConfig>> Service::CreateModuleConfig(
     const ProgramShape& program_shape,
-    tensorflow::gtl::ArraySlice<const Allocation*> arguments,
+    tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
     const ExecutionOptions& execution_options) {
   std::vector<const Shape*> argument_shapes;
   for (const auto* arg : arguments) {
-    argument_shapes.push_back(&arg->shape());
+    argument_shapes.push_back(&arg->on_host_shape());
   }
   return CreateModuleConfig(program_shape, argument_shapes, &execution_options);
 }
@@ -398,8 +389,6 @@ StatusOr<std::vector<std::unique_ptr<Executable>>> Service::BuildExecutables(
 StatusOr<std::unique_ptr<Executable>> Service::BuildExecutable(
     const VersionedComputationHandle& versioned_handle,
     std::unique_ptr<HloModuleConfig> module_config,
-    const tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-        arguments,
     Backend* backend, se::StreamExecutor* executor) {
   VLOG(1) << Printf("BuildExecutable on service %p with handle %s", this,
                     versioned_handle.ToString().c_str());
@@ -447,8 +436,6 @@ StatusOr<std::unique_ptr<Executable>> Service::BuildExecutable(
 StatusOr<std::shared_ptr<Executable>> Service::BuildAndCacheExecutable(
     const VersionedComputationHandle& versioned_handle,
     std::unique_ptr<HloModuleConfig> module_config,
-    const tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-        arguments,
     Backend* backend, perftools::gputools::StreamExecutor* executor,
     ExecutionProfile* profile) {
   std::shared_ptr<Executable> executable =
@@ -471,8 +458,8 @@ StatusOr<std::shared_ptr<Executable>> Service::BuildAndCacheExecutable(
   HloModuleConfig original_module_config = *module_config;
   TF_ASSIGN_OR_RETURN(
       std::unique_ptr<Executable> executable_unique_ptr,
-      BuildExecutable(versioned_handle, std::move(module_config), arguments,
-                      backend, executor));
+      BuildExecutable(versioned_handle, std::move(module_config), backend,
+                      executor));
 
   if (profile != nullptr) {
     uint64 end_micros = tensorflow::Env::Default()->NowMicros();
@@ -489,9 +476,7 @@ StatusOr<std::shared_ptr<Executable>> Service::BuildAndCacheExecutable(
 StatusOr<std::vector<GlobalDataHandle>>
 Service::ExecuteParallelAndRegisterResult(
     tensorflow::gtl::ArraySlice<Executable*> executables,
-    tensorflow::gtl::ArraySlice<
-        std::vector<perftools::gputools::DeviceMemoryBase>>
-        arguments,
+    tensorflow::gtl::ArraySlice<std::vector<const ShapedBuffer*>> arguments,
     Backend* backend, tensorflow::gtl::ArraySlice<DeviceHandle> device_handles,
     tensorflow::gtl::ArraySlice<string> result_tags,
     ExecutionProfile* profile) {
@@ -547,7 +532,7 @@ Service::ExecuteParallelAndRegisterResult(
 
       // Asynchronously launch the computation.
       TF_ASSIGN_OR_RETURN(
-          perftools::gputools::DeviceMemoryBase result,
+          std::unique_ptr<ShapedBuffer> result,
           executables[i]->ExecuteAsyncOnStream(&run_options, arguments[i]));
 
       if (replica == 0 && profile != nullptr) {
@@ -557,17 +542,20 @@ Service::ExecuteParallelAndRegisterResult(
       // All replicas share the same device address for the result allocation,
       // so only one of the replicas need to register the result handle.
       if (replica == 0) {
-        result_handles.push_back(allocation_tracker_.Register(
-            backend, replicas[0]->device_ordinal(), result,
-            executables[i]->result_shape(), result_tags[i]));
+        TF_ASSIGN_OR_RETURN(
+            GlobalDataHandle handle,
+            allocation_tracker_.Register(std::move(result), result_tags[i]));
+        result_handles.push_back(handle);
       }
     }
   }
 
   // Wait for all executions to complete.
   for (int64 i = 0; i < streams.size(); ++i) {
-    if (!streams[i]->BlockHostUntilDone()) {
-      return InternalError("failed to complete execution for stream %lld", i);
+    Status block_status = streams[i]->BlockHostUntilDone();
+    if (!block_status.ok()) {
+      return InternalError("failed to complete execution for stream %lld: %s",
+                           i, block_status.error_message().c_str());
     }
   }
 
@@ -625,8 +613,7 @@ Service::ExecuteParallelAndRegisterResult(
 
 StatusOr<GlobalDataHandle> Service::ExecuteAndRegisterResult(
     Executable* executable,
-    const tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-        arguments,
+    const tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
     Backend* backend, perftools::gputools::StreamExecutor* executor,
     const string& result_tag, ExecutionProfile* profile) {
   // Set up streams.
@@ -651,6 +638,7 @@ StatusOr<GlobalDataHandle> Service::ExecuteAndRegisterResult(
   for (const Pool<se::Stream>::SmartPtr& stream : streams) {
     ExecutableRunOptions options;
     options.set_stream(stream.get());
+    options.set_device_ordinal(stream->parent()->device_ordinal());
     options.set_allocator(backend->memory_allocator());
     options.set_inter_op_thread_pool(backend->inter_op_thread_pool());
     options.set_intra_op_thread_pool(
@@ -660,24 +648,21 @@ StatusOr<GlobalDataHandle> Service::ExecuteAndRegisterResult(
                              backend->inter_op_thread_pool());
   }
 
-  perftools::gputools::DeviceMemoryBase result;
+  std::unique_ptr<ShapedBuffer> result;
   if (options_.number_of_replicas() == 1) {
-    TF_ASSIGN_OR_RETURN(
-        result, executable->ExecuteOnStreamWrapper<se::DeviceMemoryBase>(
-                    &run_options[0], profile, arguments));
+    TF_ASSIGN_OR_RETURN(result, executable->ExecuteOnStreamWrapper(
+                                    &run_options[0], profile, arguments));
   } else {
-    std::vector<
-        tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>>
+    // TODO(b/69985541): Support profiling also on this path.
+    std::vector<tensorflow::gtl::ArraySlice<const ShapedBuffer*>>
         repeated_arguments(options_.number_of_replicas(), arguments);
 
     TF_ASSIGN_OR_RETURN(auto results, executable->ExecuteOnStreams(
                                           run_options, repeated_arguments));
     TF_RET_CHECK(!results.empty());
-    result = results[0];
+    result = std::move(results[0]);
   }
-  return allocation_tracker_.Register(backend, executor->device_ordinal(),
-                                      result, executable->result_shape(),
-                                      result_tag);
+  return allocation_tracker_.Register(std::move(result), result_tag);
 }
 
 tensorflow::Status Service::SetReturnValue(const SetReturnValueRequest* arg,
@@ -691,7 +676,7 @@ tensorflow::Status Service::ExecuteParallel(const ExecuteParallelRequest* arg,
                                             ExecuteParallelResponse* result) {
   VLOG(1) << "running execute-parallel request: " << arg->ShortDebugString();
 
-  std::vector<std::vector<se::DeviceMemoryBase>> all_arguments;
+  std::vector<std::vector<const ShapedBuffer*>> all_arguments;
   std::vector<std::vector<perftools::gputools::StreamExecutor*>> all_executors;
   std::vector<VersionedComputationHandle> versioned_handles;
   std::vector<std::unique_ptr<HloModuleConfig>> module_configs;
@@ -748,19 +733,14 @@ tensorflow::Status Service::ExecuteParallel(const ExecuteParallelRequest* arg,
     // In the case of partitioned computations, assume all arguments go on the
     // zeroth core.
     TF_ASSIGN_OR_RETURN(
-        std::vector<const Allocation*> arg_allocations,
-        ResolveAndValidateArguments(request.arguments(), execute_backend_.get(),
+        std::vector<const ShapedBuffer*> arguments,
+        ResolveAndValidateArguments(request.arguments(),
                                     executors[0]->device_ordinal()));
-    std::vector<se::DeviceMemoryBase> arguments;
-    arguments.reserve(arg_allocations.size());
-    for (const Allocation* allocation : arg_allocations) {
-      arguments.push_back(allocation->device_memory());
-    }
 
     // Create an HloModuleConfig object for the computation, given the shape of
     // the program and the argument allocations.
     TF_ASSIGN_OR_RETURN(std::unique_ptr<HloModuleConfig> module_config,
-                        CreateModuleConfig(*program_shape, arg_allocations,
+                        CreateModuleConfig(*program_shape, arguments,
                                            request.execution_options()));
     VLOG(3) << "ExecuteParallel created HloModuleConfig computation layout: "
             << module_config->entry_computation_layout().ToString();
@@ -863,35 +843,30 @@ tensorflow::Status Service::Execute(const ExecuteRequest* arg,
       user_computation->ComputeProgramShape(versioned_handle.version));
 
   TF_ASSIGN_OR_RETURN(
-      std::vector<const Allocation*> arg_allocations,
-      ResolveAndValidateArguments(arg->arguments(), execute_backend_.get(),
+      std::vector<const ShapedBuffer*> arguments,
+      ResolveAndValidateArguments(arg->arguments(),
                                   execute_backend_->default_device_ordinal()));
 
-  TF_ASSIGN_OR_RETURN(std::unique_ptr<HloModuleConfig> module_config,
-                      CreateModuleConfig(*program_shape, arg_allocations,
-                                         arg->execution_options()));
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<HloModuleConfig> module_config,
+      CreateModuleConfig(*program_shape, arguments, arg->execution_options()));
 
   VLOG(3) << "Execute created HloModuleConfig computation layout: "
           << module_config->entry_computation_layout().ToString();
 
-  std::vector<se::DeviceMemoryBase> arguments;
-  arguments.reserve(arg_allocations.size());
-  for (const Allocation* allocation : arg_allocations) {
-    arguments.push_back(allocation->device_memory());
-  }
-
   TF_ASSIGN_OR_RETURN(
       std::shared_ptr<Executable> executable,
       BuildAndCacheExecutable(versioned_handle, std::move(module_config),
-                              arguments, execute_backend_.get(),
+                              execute_backend_.get(),
                               execute_backend_->default_stream_executor(),
                               result->mutable_profile()));
 
   if (executable->dumping()) {
     executable->session_module()->set_execution_platform(
         execute_backend_->platform()->Name());
-    TF_RETURN_IF_ERROR(
-        RecordArguments(arg_allocations, executable->session_module()));
+    TF_RETURN_IF_ERROR(RecordArguments(
+        arguments, execute_backend_->default_stream_executor(),
+        execute_backend_->transfer_manager(), executable->session_module()));
   }
 
   TF_ASSIGN_OR_RETURN(
@@ -902,10 +877,11 @@ tensorflow::Status Service::Execute(const ExecuteRequest* arg,
           "result of " + user_computation->name(), result->mutable_profile()));
 
   if (executable->dumping()) {
-    TF_ASSIGN_OR_RETURN(const Allocation* result_allocation,
+    TF_ASSIGN_OR_RETURN(const ShapedBuffer* result_buffer,
                         allocation_tracker_.Resolve(result->output()));
-    TF_RETURN_IF_ERROR(
-        RecordResult(result_allocation, executable->session_module()));
+    TF_RETURN_IF_ERROR(RecordResult(
+        *result_buffer, execute_backend_->default_stream_executor(),
+        execute_backend_->transfer_manager(), executable->session_module()));
     TF_RETURN_IF_ERROR(executable->DumpSessionModule());
   }
 
@@ -931,31 +907,24 @@ tensorflow::Status Service::ExecuteAsync(const ExecuteAsyncRequest* arg,
       user_computation->ComputeProgramShape(versioned_handle.version));
 
   TF_ASSIGN_OR_RETURN(
-      std::vector<const Allocation*> arg_allocations,
-      ResolveAndValidateArguments(arg->arguments(), execute_backend_.get(),
+      std::vector<const ShapedBuffer*> arguments,
+      ResolveAndValidateArguments(arg->arguments(),
                                   execute_backend_->default_device_ordinal()));
 
-  TF_ASSIGN_OR_RETURN(std::unique_ptr<HloModuleConfig> module_config,
-                      CreateModuleConfig(*program_shape, arg_allocations,
-                                         arg->execution_options()));
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<HloModuleConfig> module_config,
+      CreateModuleConfig(*program_shape, arguments, arg->execution_options()));
 
   VLOG(3) << "ExecuteAsync created HloModuleConfig computation layout: "
           << module_config->entry_computation_layout().ToString();
 
-  std::vector<se::DeviceMemoryBase> arguments;
-  arguments.reserve(arg_allocations.size());
-  for (const Allocation* allocation : arg_allocations) {
-    arguments.push_back(allocation->device_memory());
-  }
-
   ExecutionProfile profile;
 
   TF_ASSIGN_OR_RETURN(
       std::shared_ptr<Executable> executable,
-      BuildAndCacheExecutable(versioned_handle, std::move(module_config),
-                              arguments, execute_backend_.get(),
-                              execute_backend_->default_stream_executor(),
-                              &profile));
+      BuildAndCacheExecutable(
+          versioned_handle, std::move(module_config), execute_backend_.get(),
+          execute_backend_->default_stream_executor(), &profile));
 
   TF_ASSIGN_OR_RETURN(auto replicas, Replicas(*execute_backend_,
                                               SingleComputationDeviceHandle()));
@@ -970,7 +939,7 @@ tensorflow::Status Service::ExecuteAsync(const ExecuteAsyncRequest* arg,
     streams.push_back(std::move(stream));
   }
 
-  perftools::gputools::DeviceMemoryBase result_data;
+  std::unique_ptr<ShapedBuffer> result_buffer;
   for (const Pool<se::Stream>::SmartPtr& stream : streams) {
     ExecutableRunOptions options;
     options.set_stream(stream.get());
@@ -983,19 +952,19 @@ tensorflow::Status Service::ExecuteAsync(const ExecuteAsyncRequest* arg,
         options, execute_backend_->StreamBorrower());
 
     TF_ASSIGN_OR_RETURN(
-        perftools::gputools::DeviceMemoryBase this_result_data,
+        std::unique_ptr<ShapedBuffer> this_result_buffer,
         executable->ExecuteAsyncOnStream(&service_options, arguments));
 
     // Take the first result.
-    if (result_data == nullptr) {
-      result_data = this_result_data;
+    if (result_buffer == nullptr) {
+      result_buffer = std::move(this_result_buffer);
     }
   }
 
-  auto output = allocation_tracker_.Register(
-      execute_backend_.get(), execute_backend_->default_device_ordinal(),
-      result_data, executable->result_shape(),
-      "result of " + user_computation->name());
+  TF_ASSIGN_OR_RETURN(
+      GlobalDataHandle output,
+      allocation_tracker_.Register(std::move(result_buffer),
+                                   "result of " + user_computation->name()));
 
   *result->mutable_execution() = execution_tracker_.Register(
       execute_backend_.get(), std::move(streams), profile, output);
@@ -1022,37 +991,58 @@ tensorflow::Status Service::WaitForExecution(const WaitForExecutionRequest* arg,
 
 tensorflow::Status Service::TransferToClient(const TransferToClientRequest* arg,
                                              TransferToClientResponse* result) {
-  TF_ASSIGN_OR_RETURN(const Allocation* allocation,
+  TF_ASSIGN_OR_RETURN(const ShapedBuffer* shaped_buffer,
                       allocation_tracker_.Resolve(arg->data()));
 
-  const Shape* literal_shape;
+  const Shape* return_shape;
   if (arg->has_shape_with_layout()) {
     if (!LayoutUtil::HasLayout(arg->shape_with_layout())) {
       return InvalidArgument("shape_with_layout must have layout if present.");
     }
-    literal_shape = &arg->shape_with_layout();
+    return_shape = &arg->shape_with_layout();
   } else {
-    literal_shape = &allocation->shape();
+    return_shape = &shaped_buffer->on_host_shape();
   }
 
-  Literal literal;
-  TF_RETURN_IF_ERROR(
-      LiteralFromAllocation(allocation, *literal_shape, &literal));
-  *result->mutable_literal() = literal.ToProto();
+  TF_ASSIGN_OR_RETURN(
+      se::StreamExecutor * executor,
+      execute_backend_->stream_executor(shaped_buffer->device_ordinal()));
+
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<Literal> result_literal,
+      execute_backend_->transfer_manager()->TransferLiteralFromDevice(
+          executor, *shaped_buffer));
+
+  if (LayoutUtil::LayoutsInShapesEqual(*return_shape,
+                                       result_literal->shape())) {
+    *result->mutable_literal() = result_literal->ToProto();
+  } else {
+    *result->mutable_literal() =
+        result_literal->Relayout(*return_shape)->ToProto();
+  }
   return tensorflow::Status::OK();
 }
 
+namespace {
+
+// Creates a clone of the given shaped buffer with the given device ordinal. The
+// shape and DeviceMemoryBase values of the clone are identical to the original.
+std::unique_ptr<ShapedBuffer> CloneShapedBufferOnDevice(
+    const ShapedBuffer& shaped_buffer, int device_ordinal) {
+  auto clone = MakeUnique<ShapedBuffer>(
+      shaped_buffer.on_host_shape(), shaped_buffer.on_device_shape(),
+      shaped_buffer.platform(), device_ordinal);
+  clone->buffers() = shaped_buffer.buffers();
+  return clone;
+}
+
+}  // namespace
+
 tensorflow::Status Service::TransferToServer(const TransferToServerRequest* arg,
                                              TransferToServerResponse* result) {
-  Literal literal = Literal(arg->literal());
-  const Shape& shape = literal.shape();
-
-  if (ShapeUtil::IsTuple(shape) && options_.number_of_replicas() > 1) {
-    // TODO(b/32990684): Tuple transfers to host end up allocating further
-    // buffers - implement that correctly.
-    return Unimplemented(
-        "Tuple transfers to the device not supported with replication.");
-  }
+  TF_ASSIGN_OR_RETURN(std::unique_ptr<Literal> literal,
+                      Literal::CreateFromProto(arg->literal()));
+  const Shape& shape = literal->shape();
 
   std::vector<se::StreamExecutor*> replicas;
   if (arg->has_device_handle()) {
@@ -1063,25 +1053,38 @@ tensorflow::Status Service::TransferToServer(const TransferToServerRequest* arg,
         replicas, Replicas(*execute_backend_, SingleComputationDeviceHandle()));
   }
 
-  // Allocate memory on the device, using the stream executor. The size of the
-  // allocation is obtained by examining the shape of the literal passed from
-  // the client. An allocation handle is returned in the response.
-  int64 allocation_size =
-      execute_backend_->transfer_manager()->GetByteSizeRequirement(shape);
-
-  TF_ASSIGN_OR_RETURN(se::DeviceMemoryBase allocation,
-                      execute_backend_->memory_allocator()->Allocate(
-                          replicas[0]->device_ordinal(), allocation_size));
-
-  *result->mutable_data() = allocation_tracker_.Register(
-      execute_backend_.get(), replicas[0]->device_ordinal(), allocation, shape,
-      StrCat("TransferToServer literal of size ", allocation_size));
+  // All memory allocation is done on the first replica. The allocations in all
+  // other replicas mirror the firsts'.
+  int master_device_ordinal = replicas[0]->device_ordinal();
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<ShapedBuffer> shaped_buffer,
+      execute_backend_->transfer_manager()->AllocateShapedBuffer(
+          shape, execute_backend_->memory_allocator(), master_device_ordinal));
 
+  // Transfer the data to the replicas.
   for (se::StreamExecutor* executor : replicas) {
-    TF_RETURN_IF_ERROR(
-        execute_backend_->transfer_manager()->TransferLiteralToDevice(
-            executor, literal, &allocation));
+    if (executor->device_ordinal() == master_device_ordinal) {
+      TF_RETURN_IF_ERROR(
+          execute_backend_->transfer_manager()->TransferLiteralToDevice(
+              executor, *literal, *shaped_buffer));
+    } else {
+      // The replica is not the master. Create an cloned shaped buffer with
+      // the replica's device ordinal. This is required because
+      // TransferLiteralToDevice verifies that the device ordinal of the shaped
+      // buffer matches that of the executor.
+      std::unique_ptr<ShapedBuffer> clone =
+          CloneShapedBufferOnDevice(*shaped_buffer, executor->device_ordinal());
+      TF_RETURN_IF_ERROR(
+          execute_backend_->transfer_manager()->TransferLiteralToDevice(
+              executor, *literal, *clone));
+    }
   }
+  TF_ASSIGN_OR_RETURN(
+      *result->mutable_data(),
+      allocation_tracker_.Register(std::move(shaped_buffer),
+                                   StrCat("TransferToServer literal of shape ",
+                                          ShapeUtil::HumanString(shape))));
+
   return tensorflow::Status::OK();
 }
 
@@ -1109,8 +1112,10 @@ tensorflow::Status Service::TransferToInfeed(const TransferToInfeedRequest* arg,
     executor = replicas[arg->replica_id()];
   }
 
+  TF_ASSIGN_OR_RETURN(std::unique_ptr<Literal> literal,
+                      Literal::CreateFromProto(arg->literal()));
   return execute_backend_->transfer_manager()->TransferLiteralToInfeed(
-      executor, Literal(arg->literal()));
+      executor, *literal);
 }
 
 tensorflow::Status Service::TransferFromOutfeed(
@@ -1185,7 +1190,22 @@ tensorflow::Status Service::ComputeConstant(const ComputeConstantRequest* arg,
       bool is_constant,
       user_computation->IsConstant(arg->operand(), arg->parameters_size()));
   if (!is_constant) {
-    return InvalidArgument("Operand to ComputeConstant depends on parameter.");
+    StatusOr<const OperationRequest*> op_request_status =
+        user_computation->LookUpRequestForErrorReporting(arg->operand());
+    string op_request_string = "<unknown operation>";
+    if (op_request_status.ok()) {
+      op_request_string = op_request_status.ValueOrDie()->ShortDebugString();
+    }
+    return InvalidArgument(
+        "Operand to ComputeConstant depends on a parameter.\n\n"
+        "  op requested for constant evaluation: %s\n\n"
+        "This is an internal error that typically happens when the XLA user "
+        "(e.g. TensorFlow) is attempting to determine a value that must be a "
+        "compile-time constant (e.g. an array dimension) but it is not capable "
+        "of being evaluated at XLA compile time.\n\n"
+        "Please file a usability bug with the framework being used (e.g. "
+        "TensorFlow).",
+        op_request_string.c_str());
   }
 
   // We can't use ComputeProgramShape because it checks that all parameter
@@ -1222,18 +1242,16 @@ tensorflow::Status Service::ComputeConstant(const ComputeConstantRequest* arg,
                                           /*include_unreachable_instructions=*/
                                           false));
 
-  std::vector<Literal> parameters(arg->parameters_size());
+  std::vector<std::unique_ptr<Literal>> parameters(arg->parameters_size());
   for (int64 i = 0; i < arg->parameters_size(); ++i) {
-    parameters[i] = Literal(arg->parameters(i));
+    TF_ASSIGN_OR_RETURN(parameters[i],
+                        Literal::CreateFromProto(arg->parameters(i)));
   }
-  std::vector<const Literal*> parameter_ptrs;
-  std::transform(parameters.begin(), parameters.end(),
-                 std::back_inserter(parameter_ptrs),
-                 [](const Literal& literal) { return &literal; });
-
   HloEvaluator evaluator;
-  TF_ASSIGN_OR_RETURN(auto result_literal,
-                      evaluator.Evaluate(*module, parameter_ptrs));
+  TF_ASSIGN_OR_RETURN(
+      auto result_literal,
+      evaluator.Evaluate<std::unique_ptr<Literal>>(*module, parameters));
+
   // Since the shape_with_output_layout option in ExecutionOption is
   // non-effective to the Evaluator results, explicit relayout here.
   if (arg->has_output_layout()) {
@@ -1246,9 +1264,9 @@ tensorflow::Status Service::ComputeConstant(const ComputeConstantRequest* arg,
 
 tensorflow::Status Service::GetShape(const GetShapeRequest* arg,
                                      GetShapeResponse* result) {
-  TF_ASSIGN_OR_RETURN(const Allocation* allocation,
+  TF_ASSIGN_OR_RETURN(const ShapedBuffer* buffer,
                       allocation_tracker_.Resolve(arg->data()));
-  *result->mutable_shape() = allocation->shape();
+  *result->mutable_shape() = buffer->on_host_shape();
   return tensorflow::Status::OK();
 }
 
@@ -1357,6 +1375,17 @@ tensorflow::Status Service::Op(const OpRequest* arg, OpResponse* result) {
       handle_status =
           computation->AddConcatenateInstruction(arg->concatenate_request());
       break;
+    case OpRequest::kConditionalRequest: {
+      TF_ASSIGN_OR_RETURN(UserComputation * true_computation,
+                          computation_tracker_.Resolve(
+                              arg->conditional_request().true_computation()));
+      TF_ASSIGN_OR_RETURN(UserComputation * false_computation,
+                          computation_tracker_.Resolve(
+                              arg->conditional_request().false_computation()));
+      handle_status = computation->AddConditionalInstruction(
+          arg->conditional_request(), *true_computation, *false_computation);
+      break;
+    }
     case OpRequest::kConstantRequest:
       handle_status =
           computation->AddConstantInstruction(arg->constant_request());
@@ -1381,6 +1410,9 @@ tensorflow::Status Service::Op(const OpRequest* arg, OpResponse* result) {
       handle_status =
           computation->AddCustomCallInstruction(arg->custom_call_request());
       break;
+    case OpRequest::kDotRequest:
+      handle_status = computation->AddDotInstruction(arg->dot_request());
+      break;
     case OpRequest::kDynamicSliceRequest:
       handle_status =
           computation->AddDynamicSliceInstruction(arg->dynamic_slice_request());
@@ -1389,6 +1421,9 @@ tensorflow::Status Service::Op(const OpRequest* arg, OpResponse* result) {
       handle_status = computation->AddDynamicUpdateSliceInstruction(
           arg->dynamic_update_slice_request());
       break;
+    case OpRequest::kFftRequest:
+      handle_status = computation->AddFftInstruction(arg->fft_request());
+      break;
     case OpRequest::kGetTupleElementRequest:
       handle_status = computation->AddGetTupleElementInstruction(
           arg->get_tuple_element_request());
@@ -1501,8 +1536,10 @@ tensorflow::Status Service::Op(const OpRequest* arg, OpResponse* result) {
       handle_status = computation->AddRecvInstruction(arg->recv_request());
       break;
     }
+    case OpRequest::OP_NOT_SET:
+      return InvalidArgument("XLA service received OpRequest with OP_NOT_SET");
     default:
-      return InvalidArgument("Unsupported operation");
+      return InvalidArgument("Unsupported operation in XLA service");
   }
   TF_ASSIGN_OR_RETURN(*result->mutable_output(), handle_status);
 
diff --git a/tensorflow/compiler/xla/service/service.h b/tensorflow/compiler/xla/service/service.h
index 47f4f0ade594089aa71717ef1e122886b0a6c7ac..f962d0cdc7d41e1aeab55da5abcb1b40215b4144 100644
--- a/tensorflow/compiler/xla/service/service.h
+++ b/tensorflow/compiler/xla/service/service.h
@@ -250,7 +250,7 @@ class Service : public ServiceInterface {
   // class.
   StatusOr<std::unique_ptr<HloModuleConfig>> CreateModuleConfig(
       const ProgramShape& program_shape,
-      tensorflow::gtl::ArraySlice<const Allocation*> arguments,
+      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
       const ExecutionOptions& execution_options);
 
  protected:
@@ -265,10 +265,10 @@ class Service : public ServiceInterface {
 
   // Resolves the given argument handles in the allocation tracker and returns
   // the corresponding allocations. The function also verifies that each
-  // allocation matches the given backend and device ordinal.
-  StatusOr<std::vector<const Allocation*>> ResolveAndValidateArguments(
+  // allocation matches the execution platform and device ordinal.
+  StatusOr<std::vector<const ShapedBuffer*>> ResolveAndValidateArguments(
       tensorflow::gtl::ArraySlice<const GlobalDataHandle*> arguments,
-      const Backend* backend, int device_ordinal);
+      int device_ordinal);
 
   // Create a Hlo module config for the given program shape and arguments.
   // execution_options is optional; if not given a default is used.
@@ -281,8 +281,6 @@ class Service : public ServiceInterface {
   StatusOr<std::unique_ptr<Executable>> BuildExecutable(
       const VersionedComputationHandle& versioned_handle,
       std::unique_ptr<HloModuleConfig> module_config,
-      const tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments,
       Backend* backend, perftools::gputools::StreamExecutor* executor);
 
   // Same as BuildExecutable() above, but builds a list of Executables for the
@@ -299,8 +297,6 @@ class Service : public ServiceInterface {
   StatusOr<std::shared_ptr<Executable>> BuildAndCacheExecutable(
       const VersionedComputationHandle& versioned_handle,
       std::unique_ptr<HloModuleConfig> module_config,
-      const tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments,
       Backend* backend, perftools::gputools::StreamExecutor* executor,
       ExecutionProfile* profile);
 
@@ -310,8 +306,7 @@ class Service : public ServiceInterface {
   // ExecutionProfile object which will be filled in with profile data.
   StatusOr<GlobalDataHandle> ExecuteAndRegisterResult(
       Executable* executable,
-      const tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments,
+      const tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
       Backend* backend, perftools::gputools::StreamExecutor* executor,
       const string& result_tag, ExecutionProfile* profile);
 
@@ -320,9 +315,7 @@ class Service : public ServiceInterface {
   // from the tracker are returned.
   StatusOr<std::vector<GlobalDataHandle>> ExecuteParallelAndRegisterResult(
       tensorflow::gtl::ArraySlice<Executable*> executables,
-      tensorflow::gtl::ArraySlice<
-          std::vector<perftools::gputools::DeviceMemoryBase>>
-          arguments,
+      tensorflow::gtl::ArraySlice<std::vector<const ShapedBuffer*>> arguments,
       Backend* backend,
       tensorflow::gtl::ArraySlice<DeviceHandle> device_handles,
       tensorflow::gtl::ArraySlice<string> result_tags,
diff --git a/tensorflow/compiler/xla/service/service_executable_run_options.h b/tensorflow/compiler/xla/service/service_executable_run_options.h
index 017e5ef09ed2f52b862821e9408540d188a1edf5..6c1f8feac7ed4423051cf2737be57dcfab508671 100644
--- a/tensorflow/compiler/xla/service/service_executable_run_options.h
+++ b/tensorflow/compiler/xla/service/service_executable_run_options.h
@@ -30,6 +30,9 @@ class ServiceExecutableRunOptions {
   using StreamBorrower =
       std::function<StatusOr<Pool<perftools::gputools::Stream>::SmartPtr>(int)>;
 
+  ServiceExecutableRunOptions()
+      : ServiceExecutableRunOptions(ExecutableRunOptions()) {}
+
   explicit ServiceExecutableRunOptions(
       ExecutableRunOptions run_options, StreamBorrower borrow_stream = nullptr,
       tensorflow::thread::ThreadPool* xla_intra_op_thread_pool = nullptr)
diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc
index 3df1911d07cf0cd123604b1fac63923a725a37c6..a6d6c8b27f81045a4bee09e056c5c8f8e8a330c7 100644
--- a/tensorflow/compiler/xla/service/shape_inference.cc
+++ b/tensorflow/compiler/xla/service/shape_inference.cc
@@ -29,6 +29,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/math/math_util.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -90,8 +91,6 @@ BinaryOperation OpcodeToBinaryOperation(HloOpcode opcode) {
       return BINOP_ATAN2;
     case HloOpcode::kComplex:
       return BINOP_COMPLEX;
-    case HloOpcode::kDot:
-      return BINOP_DOT;
     case HloOpcode::kMultiply:
       return BINOP_MUL;
     case HloOpcode::kAdd:
@@ -549,8 +548,113 @@ StatusOr<Shape> InferWindowOutputShape(const Shape& base_shape,
   return ShapeUtil::MakeShape(operand_shape.element_type(), dimensions);
 }
 
-/* static */ StatusOr<Shape> ShapeInference::InferDotOpShape(const Shape& lhs,
-                                                             const Shape& rhs) {
+// Current DotDimensionNumbers Requirements:
+//
+// Contracting Dimensions:
+// *) Exactly one contracting dimension on both lhs and rhs.
+// *) Contracting dimension size must be the same on both lhs and rhs.
+// *) Contracting dimension numbers do not need to be the same (i.e. transposes
+//    are passed on to emitter implementations).
+//
+// Batch Dimensions:
+// *) Same number of batch dimensions on both lhs and rhs.
+// *) Same batch dimension numbers (and sizes) on both lhs and rhs.
+// *) Batch dimension numbers must be ordered before contracting and
+//    non-contracting/non-batch dimension numbers.
+//
+// Non-Contracting-Non-Batch Dimensions:
+// *) Can be 0 (matrix-vector) or 1 (matrix-matrix).
+//
+
+namespace {
+
+Status ValidateDotDimensionNumbers(
+    const Shape& lhs, const Shape& rhs,
+    const DotDimensionNumbers& dimension_numbers) {
+  // Check that dimension numbers are in range.
+  auto dims_in_range =
+      [](const int64 rank, tensorflow::gtl::ArraySlice<int64> contracting_dims,
+         tensorflow::gtl::ArraySlice<int64> batch_dims) -> bool {
+    auto in_range = [&rank](int64 i) -> bool { return 0 <= i && i < rank; };
+    return std::all_of(contracting_dims.begin(), contracting_dims.end(),
+                       in_range) &&
+           std::all_of(batch_dims.begin(), batch_dims.end(), in_range);
+  };
+
+  tensorflow::gtl::ArraySlice<int64> lhs_contracting_dimensions =
+      AsInt64Slice(dimension_numbers.lhs_contracting_dimensions());
+  tensorflow::gtl::ArraySlice<int64> rhs_contracting_dimensions =
+      AsInt64Slice(dimension_numbers.rhs_contracting_dimensions());
+  tensorflow::gtl::ArraySlice<int64> lhs_batch_dimensions =
+      AsInt64Slice(dimension_numbers.lhs_batch_dimensions());
+  tensorflow::gtl::ArraySlice<int64> rhs_batch_dimensions =
+      AsInt64Slice(dimension_numbers.rhs_batch_dimensions());
+
+  if (!dims_in_range(ShapeUtil::Rank(lhs), lhs_contracting_dimensions,
+                     lhs_batch_dimensions) ||
+      !dims_in_range(ShapeUtil::Rank(rhs), rhs_contracting_dimensions,
+                     rhs_batch_dimensions)) {
+    return InvalidArgument("A dimension number is out of range in dot: %s",
+                           dimension_numbers.DebugString().c_str());
+  }
+
+  // Check that dimension numbers are unique.
+  auto dims_unique = [](tensorflow::gtl::ArraySlice<int64> contracting_dims,
+                        tensorflow::gtl::ArraySlice<int64> batch_dims) -> bool {
+    tensorflow::gtl::FlatSet<int64> dim_set;
+    auto is_unique = [&dim_set](int64 i) -> bool {
+      return dim_set.insert(i).second;
+    };
+    return std::all_of(contracting_dims.begin(), contracting_dims.end(),
+                       is_unique) &&
+           std::all_of(batch_dims.begin(), batch_dims.end(), is_unique);
+  };
+
+  if (!dims_unique(lhs_contracting_dimensions, lhs_batch_dimensions) ||
+      !dims_unique(rhs_contracting_dimensions, rhs_batch_dimensions)) {
+    return InvalidArgument("A dimension number is not unique in dot: %s",
+                           dimension_numbers.DebugString().c_str());
+  }
+
+  // Check that the count of non-contracting-non-batch dimensions is in {0, 1}.
+  const int64 lhs_non_contracting_non_batch_dims =
+      ShapeUtil::Rank(lhs) -
+      dimension_numbers.lhs_contracting_dimensions_size() -
+      dimension_numbers.lhs_batch_dimensions_size();
+  const int64 rhs_non_contracting_non_batch_dims =
+      ShapeUtil::Rank(rhs) -
+      dimension_numbers.rhs_contracting_dimensions_size() -
+      dimension_numbers.rhs_batch_dimensions_size();
+  if (lhs_non_contracting_non_batch_dims < 0 ||
+      lhs_non_contracting_non_batch_dims > 1 ||
+      rhs_non_contracting_non_batch_dims < 0 ||
+      rhs_non_contracting_non_batch_dims > 1) {
+    return InvalidArgument(
+        "batch and contracting dimension number mismatch "
+        "with rank ");
+  }
+
+  // Check that batch dimension numbers are ordered before all others, and
+  // that they are monotonically increasing.
+  std::vector<int64> batch_dim_numbers(lhs_batch_dimensions.size());
+  std::iota(batch_dim_numbers.begin(), batch_dim_numbers.end(), 0);
+  if (!std::equal(batch_dim_numbers.begin(), batch_dim_numbers.end(),
+                  lhs_batch_dimensions.begin()) ||
+      !std::equal(batch_dim_numbers.begin(), batch_dim_numbers.end(),
+                  rhs_batch_dimensions.begin())) {
+    return InvalidArgument(
+        "batch dimension numbers must precede non-batch dimensions and be"
+        "monotonically increasing.");
+  }
+
+  return Status::OK();
+}
+
+}  // namespace
+
+/* static */ StatusOr<Shape> ShapeInference::InferDotOpShape(
+    const Shape& lhs, const Shape& rhs,
+    const DotDimensionNumbers& dimension_numbers) {
   TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(lhs, "lhs of dot"));
   TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(rhs, "rhs of dot"));
 
@@ -570,37 +674,62 @@ StatusOr<Shape> InferWindowOutputShape(const Shape& base_shape,
     return fail("element types do not match");
   }
 
-  if (ShapeUtil::Rank(lhs) < 1 || ShapeUtil::Rank(lhs) > 2 ||
-      ShapeUtil::Rank(rhs) < 1 || ShapeUtil::Rank(rhs) > 2) {
-    return fail("dot only supports rank 1 or 2");
+  if ((ShapeUtil::Rank(lhs) < 1) || (ShapeUtil::Rank(rhs) < 1)) {
+    return fail("dot only supports rank 1 or above.");
   }
 
-  // Determine the index of the contracted dimensions for input tensors.
-  // dimensions -1 of lhs and dimension 0 of rhs are contracted.
-  int64 lhs_contracted_dimension = ShapeUtil::GetDimensionNumber(lhs, -1);
-  int64 rhs_contracted_dimension = 0;
+  // Validate basic properties of dot dimension numbers.
+  TF_RETURN_IF_ERROR(ValidateDotDimensionNumbers(lhs, rhs, dimension_numbers));
+
+  // Check that there is only one contracting dimension for both lhs and rhs.
+  if (dimension_numbers.lhs_contracting_dimensions_size() !=
+          dimension_numbers.rhs_contracting_dimensions_size() ||
+      dimension_numbers.lhs_contracting_dimensions_size() != 1) {
+    return fail("must specify one contracting dimension for both lhs and rhs.");
+  }
 
-  // Check if the contracted dimension sizes are the same.
-  if ((lhs_contracted_dimension < ShapeUtil::Rank(lhs) &&
-       rhs_contracted_dimension < ShapeUtil::Rank(rhs)) &&
-      lhs.dimensions(lhs_contracted_dimension) !=
-          rhs.dimensions(rhs_contracted_dimension)) {
-    return fail("contracted dimensions mismatch");
+  // Check that contracting dimension sizes match.
+  const int64 lhs_contracting_dimension =
+      dimension_numbers.lhs_contracting_dimensions(0);
+  const int64 rhs_contracting_dimension =
+      dimension_numbers.rhs_contracting_dimensions(0);
+  if (lhs.dimensions(lhs_contracting_dimension) !=
+      rhs.dimensions(rhs_contracting_dimension)) {
+    return fail("contracting dimension sizes do not match.");
+  }
+
+  // Check that number of batch dimensions match.
+  if (dimension_numbers.lhs_batch_dimensions_size() !=
+      dimension_numbers.rhs_batch_dimensions_size()) {
+    return fail("must the same number of batch dimensions for lhs and rhs.");
+  }
+
+  // Check that batch dimension numbers and sizes match.
+  for (int64 i = 0; i < dimension_numbers.lhs_batch_dimensions_size(); ++i) {
+    if (dimension_numbers.lhs_batch_dimensions(i) !=
+            dimension_numbers.rhs_batch_dimensions(i) ||
+        lhs.dimensions(dimension_numbers.lhs_batch_dimensions(i)) !=
+            rhs.dimensions(dimension_numbers.rhs_batch_dimensions(i))) {
+      return fail("batch dimension numbers and sizes must match for lhs/rhs.");
+    }
   }
 
   // The ranks of lhs and rhs are decremented by 1 respectively due to the
   // contraction, and added for the rank of the result. When an input tensor is
   // a scalar, its contribution to the rank of the result is 0.
   // Generate the result dimensions in order, rhs dimensions followed by lhs
-  // dimensions except the contracted dimensions.
+  // dimensions except the contracted and batch dimensions.
   std::vector<int64> dimensions;
+  std::unordered_set<int64> rhs_batch_dims(
+      dimension_numbers.rhs_batch_dimensions().begin(),
+      dimension_numbers.rhs_batch_dimensions().end());
   for (int64 i = 0; i < ShapeUtil::Rank(lhs); i++) {
-    if (i != lhs_contracted_dimension) {
+    if (i != lhs_contracting_dimension) {
       dimensions.push_back(lhs.dimensions(i));
     }
   }
   for (int64 i = 0; i < ShapeUtil::Rank(rhs); i++) {
-    if (i != rhs_contracted_dimension) {
+    if (i != rhs_contracting_dimension && rhs_batch_dims.count(i) == 0) {
       dimensions.push_back(rhs.dimensions(i));
     }
   }
@@ -816,8 +945,6 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(
       rhs, tensorflow::strings::StrCat("rhs of binary operation ",
                                        BinaryOperation_Name(operation))));
   switch (operation) {
-    case BINOP_DOT:
-      return InferDotOpShape(lhs, rhs);
     case BINOP_MAX:
     case BINOP_MIN:
     case BINOP_SUB:
@@ -1588,11 +1715,103 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(
   return ShapeUtil::MakeShape(lhs.element_type(), dimensions);
 }
 
+/* static */ StatusOr<Shape> ShapeInference::InferFftShape(
+    const Shape& in, const FftType fft_type,
+    const tensorflow::gtl::ArraySlice<int64> fft_length) {
+  const int64 fft_rank = fft_length.size();
+  if (fft_rank < 1 || fft_rank > 3) {
+    return InvalidArgument("FFT only supports ranks 1-3, but got %lld",
+                           fft_rank);
+  }
+#define RET_CHECK_RANK(x)                              \
+  if (x.dimensions_size() < fft_rank) {                \
+    return InvalidArgument(                            \
+        "FFT of rank %lld requires input of at least " \
+        "same rank; got input of rank %d",             \
+        fft_rank, x.dimensions_size());                \
+  }
+  switch (fft_type) {
+    case FFT:
+    case IFFT:
+      if (in.element_type() != C64) {
+        return InvalidArgument("%s requires C64 input type, found %s",
+                               FftType_Name(fft_type).c_str(),
+                               PrimitiveType_Name(in.element_type()).c_str());
+      }
+      RET_CHECK_RANK(in);
+      return in;
+    case RFFT: {
+      if (in.element_type() != F32) {
+        return InvalidArgument("RFFT requires F32 input type, found %s",
+                               PrimitiveType_Name(in.element_type()).c_str());
+      }
+      RET_CHECK_RANK(in);
+      for (int i = 0; i < fft_rank; i++) {
+        if (in.dimensions(in.dimensions_size() - fft_rank + i) !=
+            fft_length[i]) {
+          return InvalidArgument(
+              "RFFT requires innermost dimensions match fft_length but "
+              "dimension %lld is %lld and should be %lld",
+              in.dimensions_size() - fft_rank + i,
+              in.dimensions(in.dimensions_size() - fft_rank + i),
+              fft_length[i]);
+        }
+      }
+      Shape result = ShapeUtil::ChangeElementType(in, C64);
+      result.set_dimensions(result.dimensions_size() - 1,
+                            fft_length[fft_rank - 1] / 2 + 1);
+      return result;
+    }
+    case IRFFT: {
+      if (in.element_type() != C64) {
+        return InvalidArgument("IRFFT requires C64 input type, found %s",
+                               PrimitiveType_Name(in.element_type()).c_str());
+      }
+      RET_CHECK_RANK(in);
+      Shape result = ShapeUtil::ComplexComponentShape(in);
+      for (int i = 0; i < fft_rank - 1; i++) {
+        if (in.dimensions(in.dimensions_size() - fft_rank + i) !=
+            fft_length[i]) {
+          return InvalidArgument(
+              "IRFFT requires all but one innermost dimensions match "
+              "fft_length, but dimension %lld is %lld and should be %lld",
+              in.dimensions_size() - fft_rank + i,
+              in.dimensions(in.dimensions_size() - fft_rank + i),
+              fft_length[i]);
+        }
+      }
+      if (in.dimensions(in.dimensions_size() - 1) !=
+          fft_length[fft_rank - 1] / 2 + 1) {
+        return InvalidArgument(
+            "IRFFT requires innermost dimension matches fft_length/2+1, but "
+            "dimension %d is %lld and should be %lld",
+            in.dimensions_size() - 1, in.dimensions(in.dimensions_size() - 1),
+            fft_length[fft_rank - 1] / 2 + 1);
+      }
+      result.set_dimensions(result.dimensions_size() - 1,
+                            fft_length[fft_rank - 1]);
+      return result;
+    }
+    default:
+      LOG(FATAL) << "Unexpected fft_type: " << fft_type;
+  }
+#undef RET_CHECK_RANK
+}
+
 /* static */ StatusOr<Shape> ShapeInference::InferCrossReplicaSumShape(
-    const Shape& operand) {
-  TF_RETURN_IF_ERROR(
-      ExpectNotTupleOrOpaque(operand, "operand of cross replica sum"));
-  return operand;
+    tensorflow::gtl::ArraySlice<const Shape*> operand_shapes) {
+  for (const Shape* operand_shape : operand_shapes) {
+    TF_RETURN_IF_ERROR(
+        ExpectNotTupleOrOpaque(*operand_shape, "operand of cross replica sum"));
+  }
+  if (operand_shapes.size() == 1) {
+    return *operand_shapes[0];
+  }
+  std::vector<Shape> operand_shape_values;
+  for (const Shape* operand_shape : operand_shapes) {
+    operand_shape_values.push_back(*operand_shape);
+  }
+  return ShapeUtil::MakeTupleShape(operand_shape_values);
 }
 
 /* static */ StatusOr<Shape> ShapeInference::InferReduceShape(
@@ -1958,6 +2177,64 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(
   return init;
 }
 
+/* static */ StatusOr<Shape> ShapeInference::InferConditionalShape(
+    const Shape& predicate, const Shape& true_operand,
+    const Shape& false_operand, const ProgramShape& true_computation,
+    const ProgramShape& false_computation) {
+  if (!ShapeUtil::ShapeIs(predicate, PRED, {})) {
+    return InvalidArgument("predicate must be a boolean; got %s.",
+                           ShapeUtil::HumanString(predicate).c_str());
+  }
+
+  if (true_computation.parameters_size() != 1) {
+    return InvalidArgument("true_computation must take 1 argument; got %d.",
+                           true_computation.parameters_size());
+  }
+  if (!ShapeUtil::Compatible(true_computation.parameters(0), true_operand)) {
+    auto true_shape_string = [&]() {
+      return tensorflow::strings::Printf(
+          "true_operand: %s; true_computation: %s",
+          ShapeUtil::HumanString(true_operand).c_str(),
+          ShapeUtil::HumanString(true_computation).c_str());
+    };
+    return InvalidArgument(
+        "true_operand must match the shape of the only parameter of "
+        "true_computation: got %s.",
+        true_shape_string().c_str());
+  }
+
+  if (false_computation.parameters_size() != 1) {
+    return InvalidArgument("false_computation must take 1 argument; got %d.",
+                           false_computation.parameters_size());
+  }
+  if (!ShapeUtil::Compatible(false_computation.parameters(0), false_operand)) {
+    auto false_shape_string = [&]() {
+      return tensorflow::strings::Printf(
+          "false_operand: %s; false_computation: %s",
+          ShapeUtil::HumanString(false_operand).c_str(),
+          ShapeUtil::HumanString(false_computation).c_str());
+    };
+    return InvalidArgument(
+        "false_operand must match the shape of the only parameter of "
+        "false_computation: got %s.",
+        false_shape_string().c_str());
+  }
+  if (!ShapeUtil::Compatible(true_computation.result(),
+                             false_computation.result())) {
+    auto shape_string = [&]() {
+      return tensorflow::strings::Printf(
+          "true_computation result: %s; false_computation result: %s.",
+          ShapeUtil::HumanString(true_computation.result()).c_str(),
+          ShapeUtil::HumanString(false_computation.result()).c_str());
+    };
+    return InvalidArgument(
+        "the result of true_computation and false_computation must have the "
+        "same shape: got %s.",
+        shape_string().c_str());
+  }
+  return true_computation.result();
+}
+
 /* static */ StatusOr<Shape> ShapeInference::InferBroadcastShape(
     const Shape& operand, tensorflow::gtl::ArraySlice<int64> broadcast_sizes) {
   TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(operand, "operand of broadcast"));
diff --git a/tensorflow/compiler/xla/service/shape_inference.h b/tensorflow/compiler/xla/service/shape_inference.h
index 0aadb98a407c2160b60e686f6f3ea250bb9e838f..b39151ebbc19f5d0b702a80da5069f58c8dfb07d 100644
--- a/tensorflow/compiler/xla/service/shape_inference.h
+++ b/tensorflow/compiler/xla/service/shape_inference.h
@@ -109,8 +109,15 @@ class ShapeInference {
       const Shape& lhs, const Shape& rhs, const Window& window,
       const ConvolutionDimensionNumbers& dimension_numbers);
 
-  // Infers the shape produced a cross replica sum with the given operand shape.
-  static StatusOr<Shape> InferCrossReplicaSumShape(const Shape& operand);
+  // Infers the shape produced by the given FFT type on the given operand.
+  static StatusOr<Shape> InferFftShape(
+      const Shape& in, FftType fft_type,
+      tensorflow::gtl::ArraySlice<int64> fft_length);
+
+  // Infers the shape produced a cross replica sum with the given operand
+  // shapes.
+  static StatusOr<Shape> InferCrossReplicaSumShape(
+      tensorflow::gtl::ArraySlice<const Shape*> operand_shapes);
 
   // Infers the shape produced by applying the given reduction computation
   // shape to the given input operand shape.
@@ -178,6 +185,12 @@ class ShapeInference {
                                          const ProgramShape& body,
                                          const Shape& init);
 
+  // Infers the shape produced by a conditional operation.
+  static StatusOr<Shape> InferConditionalShape(
+      const Shape& predicate, const Shape& true_operand,
+      const Shape& false_operand, const ProgramShape& true_computation,
+      const ProgramShape& false_computation);
+
   // Infers the shape produced by a broadcast operation.
   static StatusOr<Shape> InferBroadcastShape(
       const Shape& operand, tensorflow::gtl::ArraySlice<int64> broadcast_sizes);
@@ -229,11 +242,13 @@ class ShapeInference {
       tensorflow::gtl::ArraySlice<const Shape*> arg_shapes,
       const ProgramShape& to_apply);
 
- private:
   // Helper that infers the shape produced by performing a dot operation with
   // the given LHS and RHS shapes.
-  static StatusOr<Shape> InferDotOpShape(const Shape& lhs, const Shape& rhs);
+  static StatusOr<Shape> InferDotOpShape(
+      const Shape& lhs, const Shape& rhs,
+      const DotDimensionNumbers& dimension_numbers);
 
+ private:
   // Helper that infers the shape produced by performing an element-wise binary
   // operation with the given LHS and RHS shapes.
   // Note: By "element-wise" we mean operations that look at a single element in
diff --git a/tensorflow/compiler/xla/service/shape_inference_test.cc b/tensorflow/compiler/xla/service/shape_inference_test.cc
index be93c879c0b7fd74c3b93e28c6dc0f5c656a522a..99d87f3b550ae72befe254f23fad080dd210aaf4 100644
--- a/tensorflow/compiler/xla/service/shape_inference_test.cc
+++ b/tensorflow/compiler/xla/service/shape_inference_test.cc
@@ -898,8 +898,11 @@ TEST_F(ShapeInferenceTest, BroadcastScalar) {
 
 // scalar <dot> vector: error
 TEST_F(ShapeInferenceTest, ScalarDotVector) {
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
   auto inferred_status =
-      ShapeInference::InferBinaryOpShape(BINOP_DOT, f32_, vector_32_, {});
+      ShapeInference::InferDotOpShape(f32_, vector_32_, dot_dnums);
   ASSERT_FALSE(inferred_status.ok());
   ASSERT_THAT(inferred_status.status().error_message(),
               HasSubstr("dot only supports rank"));
@@ -907,61 +910,199 @@ TEST_F(ShapeInferenceTest, ScalarDotVector) {
 
 // 3D <dot> 2D: error
 TEST_F(ShapeInferenceTest, DotWithRankHigherThanTwo) {
-  auto inferred_status = ShapeInference::InferBinaryOpShape(
-      BINOP_DOT, ShapeUtil::MakeShape(F32, {32, 32, 32}), matrix_32_64_, {});
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  auto inferred_status = ShapeInference::InferDotOpShape(
+      ShapeUtil::MakeShape(F32, {32, 32, 32}), matrix_32_64_, dot_dnums);
   ASSERT_FALSE(inferred_status.ok());
   ASSERT_THAT(inferred_status.status().error_message(),
-              HasSubstr("dot only supports rank"));
+              HasSubstr("batch and contracting dimension number mismatch"));
 }
 
 // vector <dot> vector -> scalar
 TEST_F(ShapeInferenceTest, VectorDotVector) {
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(0);
+  dot_dnums.add_rhs_contracting_dimensions(0);
   auto inferred_status =
-      ShapeInference::InferBinaryOpShape(BINOP_DOT, vector_64_, vector_64_, {});
+      ShapeInference::InferDotOpShape(vector_64_, vector_64_, dot_dnums);
   ASSERT_IS_OK(inferred_status.status());
   ASSERT_TRUE(ShapeUtil::Equal(f32_, inferred_status.ValueOrDie()));
   auto inferred_status_mismatch =
-      ShapeInference::InferBinaryOpShape(BINOP_DOT, vector_64_, vector_32_, {});
+      ShapeInference::InferDotOpShape(vector_64_, vector_32_, dot_dnums);
   ASSERT_FALSE(inferred_status_mismatch.ok());
 }
 
 // matrix <dot> vector -> vector
 TEST_F(ShapeInferenceTest, MatrixDotVector) {
-  auto inferred_status = ShapeInference::InferBinaryOpShape(
-      BinaryOperation::BINOP_DOT, matrix_32_64_, vector_64_, {});
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  auto inferred_status =
+      ShapeInference::InferDotOpShape(matrix_32_64_, vector_64_, dot_dnums);
   ASSERT_IS_OK(inferred_status.status());
   ASSERT_TRUE(ShapeUtil::Equal(inferred_status.ValueOrDie(), vector_32_));
-  auto inferred_status_mismatch = ShapeInference::InferBinaryOpShape(
-      BinaryOperation::BINOP_DOT, matrix_32_64_, vector_32_, {});
+  auto inferred_status_mismatch =
+      ShapeInference::InferDotOpShape(matrix_32_64_, vector_32_, dot_dnums);
   ASSERT_FALSE(inferred_status_mismatch.ok());
 }
 
 // vector <dot> matrix -> vector
 TEST_F(ShapeInferenceTest, VectorDotMatrix) {
-  auto inferred_status = ShapeInference::InferBinaryOpShape(
-      BinaryOperation::BINOP_DOT, vector_32_, matrix_32_64_, {});
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(0);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  auto inferred_status =
+      ShapeInference::InferDotOpShape(vector_32_, matrix_32_64_, dot_dnums);
   ASSERT_IS_OK(inferred_status.status());
   ASSERT_TRUE(ShapeUtil::Equal(inferred_status.ValueOrDie(), vector_64_));
-  auto inferred_status_mismatch = ShapeInference::InferBinaryOpShape(
-      BinaryOperation::BINOP_DOT, vector_64_, matrix_32_64_, {});
+  auto inferred_status_mismatch =
+      ShapeInference::InferDotOpShape(vector_64_, matrix_32_64_, dot_dnums);
   ASSERT_FALSE(inferred_status_mismatch.ok());
 }
 
 // matrix <dot> matrix -> matrix
 TEST_F(ShapeInferenceTest, MatrixDotMatrix) {
-  auto inferred_status_match = ShapeInference::InferBinaryOpShape(
-      BinaryOperation::BINOP_DOT, matrix_32_64_, matrix_64_48_, {});
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  auto inferred_status_match =
+      ShapeInference::InferDotOpShape(matrix_32_64_, matrix_64_48_, dot_dnums);
   ASSERT_IS_OK(inferred_status_match.status());
   ASSERT_TRUE(
       ShapeUtil::Equal(inferred_status_match.ValueOrDie(), matrix_32_48_))
       << "inferred: "
       << ShapeUtil::HumanString(inferred_status_match.ValueOrDie())
       << " expected: " << ShapeUtil::HumanString(matrix_64_48_);
-  auto inferred_status_mismatch = ShapeInference::InferBinaryOpShape(
-      BinaryOperation::BINOP_DOT, matrix_32_64_, matrix_32_64_, {});
+  auto inferred_status_mismatch =
+      ShapeInference::InferDotOpShape(matrix_32_64_, matrix_32_64_, dot_dnums);
   ASSERT_FALSE(inferred_status_mismatch.ok());
 }
 
+// BatchMatMul with two batch dimensions and one contracting dimension.
+TEST_F(ShapeInferenceTest, DotGeneral) {
+  Shape lhs_shape = ShapeUtil::MakeShape(F32, {5, 2, 11, 3});
+  Shape rhs_shape = ShapeUtil::MakeShape(F32, {5, 2, 3, 14});
+  Shape output_shape = ShapeUtil::MakeShape(F32, {5, 2, 11, 14});
+
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(3);
+  dot_dnums.add_lhs_batch_dimensions(0);
+  dot_dnums.add_lhs_batch_dimensions(1);
+
+  dot_dnums.add_rhs_contracting_dimensions(2);
+  dot_dnums.add_rhs_batch_dimensions(0);
+  dot_dnums.add_rhs_batch_dimensions(1);
+
+  auto inferred_status_match =
+      ShapeInference::InferDotOpShape(lhs_shape, rhs_shape, dot_dnums);
+  ASSERT_IS_OK(inferred_status_match.status());
+  ASSERT_TRUE(
+      ShapeUtil::Equal(inferred_status_match.ValueOrDie(), output_shape))
+      << "inferred: "
+      << ShapeUtil::HumanString(inferred_status_match.ValueOrDie())
+      << " expected: " << ShapeUtil::HumanString(output_shape);
+}
+
+// BatchMatMul with two contracting dimensions fails.
+TEST_F(ShapeInferenceTest, DotWithTwoContractingDimsFails) {
+  Shape lhs_shape = ShapeUtil::MakeShape(F32, {2, 11, 3, 2});
+  Shape rhs_shape = ShapeUtil::MakeShape(F32, {2, 3, 14});
+  Shape output_shape = ShapeUtil::MakeShape(F32, {2, 11, 14});
+
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(2);
+  dot_dnums.add_lhs_contracting_dimensions(3);
+  dot_dnums.add_lhs_batch_dimensions(0);
+
+  dot_dnums.add_rhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_batch_dimensions(0);
+
+  auto inferred_status =
+      ShapeInference::InferDotOpShape(lhs_shape, rhs_shape, dot_dnums);
+  ASSERT_FALSE(inferred_status.ok());
+  ASSERT_THAT(inferred_status.status().error_message(),
+              HasSubstr("must specify one contracting dimension for both "
+                        "lhs and rhs"));
+}
+
+// BatchMatMul with different batch dimension sizes fails.
+TEST_F(ShapeInferenceTest, DotWithMisatchedBatchDimSizesFails) {
+  Shape lhs_shape = ShapeUtil::MakeShape(F32, {2, 11, 3});
+  Shape rhs_shape = ShapeUtil::MakeShape(F32, {3, 3, 14});
+
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(2);
+  dot_dnums.add_lhs_batch_dimensions(0);
+
+  dot_dnums.add_rhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_batch_dimensions(0);
+
+  auto inferred_status =
+      ShapeInference::InferDotOpShape(lhs_shape, rhs_shape, dot_dnums);
+  ASSERT_FALSE(inferred_status.ok());
+  ASSERT_THAT(inferred_status.status().error_message(),
+              HasSubstr("batch dimension numbers and sizes must match"));
+}
+
+// BatchMatMul with different batch dimension numbers fails.
+TEST_F(ShapeInferenceTest, DotWithMisatchedBatchDimNumbersFails) {
+  Shape lhs_shape = ShapeUtil::MakeShape(F32, {2, 11, 3});
+  Shape rhs_shape = ShapeUtil::MakeShape(F32, {3, 2, 14});
+
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(2);
+  dot_dnums.add_lhs_batch_dimensions(0);
+
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  dot_dnums.add_rhs_batch_dimensions(1);
+
+  auto inferred_status =
+      ShapeInference::InferDotOpShape(lhs_shape, rhs_shape, dot_dnums);
+  ASSERT_FALSE(inferred_status.ok());
+  ASSERT_THAT(inferred_status.status().error_message(),
+              HasSubstr("batch dimension numbers must precede non-batch"));
+}
+
+// BatchMatMul with out-of-range dimension numbers fails.
+TEST_F(ShapeInferenceTest, DotWithContractingDimNumberOutOfRange) {
+  Shape lhs_shape = ShapeUtil::MakeShape(F32, {2, 11, 3});
+  Shape rhs_shape = ShapeUtil::MakeShape(F32, {2, 3, 14});
+
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(3);
+  dot_dnums.add_lhs_batch_dimensions(0);
+
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  dot_dnums.add_rhs_batch_dimensions(1);
+
+  auto inferred_status =
+      ShapeInference::InferDotOpShape(lhs_shape, rhs_shape, dot_dnums);
+  ASSERT_FALSE(inferred_status.ok());
+  ASSERT_THAT(inferred_status.status().error_message(),
+              HasSubstr("A dimension number is out of range"));
+}
+
+// BatchMatMul with non-unique dimension numbers fails.
+TEST_F(ShapeInferenceTest, DotWithContractingNonUniqueDimNumber) {
+  Shape lhs_shape = ShapeUtil::MakeShape(F32, {2, 11, 3});
+  Shape rhs_shape = ShapeUtil::MakeShape(F32, {2, 3, 14});
+
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(0);
+  dot_dnums.add_lhs_batch_dimensions(0);
+
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  dot_dnums.add_rhs_batch_dimensions(1);
+
+  auto inferred_status =
+      ShapeInference::InferDotOpShape(lhs_shape, rhs_shape, dot_dnums);
+  ASSERT_FALSE(inferred_status.ok());
+  ASSERT_THAT(inferred_status.status().error_message(),
+              HasSubstr("A dimension number is not unique"));
+}
+
 TEST_F(ShapeInferenceTest, BinOpBroadcastMatrixVector) {
   // Test variations of broadcasting a vector for a binary add with a
   // matrix.
@@ -1296,5 +1437,80 @@ TEST_F(ShapeInferenceTest, Transpose) {
                                     ShapeUtil::MakeShape(F32, {3, 4, 5, 2})));
 }
 
+TEST_F(ShapeInferenceTest, Conditional) {
+  auto inferred_status0 = ShapeInference::InferConditionalShape(
+      pred_, vector_32_, vector_64_,
+      ShapeUtil::MakeProgramShape({vector_32_}, f32_),
+      ShapeUtil::MakeProgramShape({vector_64_}, f32_));
+  EXPECT_IS_OK(inferred_status0.status());
+  EXPECT_TRUE(ShapeUtil::Equal(f32_, inferred_status0.ValueOrDie()));
+
+  auto inferred_status1 = ShapeInference::InferConditionalShape(
+      pred_, matrix_32_48_, vector_32_,
+      ShapeUtil::MakeProgramShape({matrix_32_48_}, vector_64_),
+      ShapeUtil::MakeProgramShape({vector_32_}, vector_64_));
+  EXPECT_IS_OK(inferred_status1.status());
+  EXPECT_TRUE(ShapeUtil::Equal(vector_64_, inferred_status1.ValueOrDie()));
+
+  auto tuple_f32_v32 = ShapeUtil::MakeTupleShape({f32_, vector_32_});
+  auto inferred_status2 = ShapeInference::InferConditionalShape(
+      pred_, matrix_32_48_, tuple_f32_v32,
+      ShapeUtil::MakeProgramShape({matrix_32_48_}, vector_32_),
+      ShapeUtil::MakeProgramShape({tuple_f32_v32}, vector_32_));
+  EXPECT_IS_OK(inferred_status2.status());
+  EXPECT_TRUE(ShapeUtil::Equal(vector_32_, inferred_status2.ValueOrDie()));
+
+  auto inferred_status_error0 = ShapeInference::InferConditionalShape(
+      s32_, vector_32_, vector_64_,
+      ShapeUtil::MakeProgramShape({vector_32_}, f32_),
+      ShapeUtil::MakeProgramShape({vector_64_}, f32_));
+  EXPECT_FALSE(inferred_status_error0.ok());
+  EXPECT_THAT(inferred_status_error0.status().error_message(),
+              HasSubstr("predicate must be a boolean"));
+
+  auto inferred_status_error1 = ShapeInference::InferConditionalShape(
+      pred_, ShapeUtil::MakeTupleShape({f32_, vector_32_}), matrix_32_48_,
+      ShapeUtil::MakeProgramShape({f32_, vector_32_}, vector_32_),
+      ShapeUtil::MakeProgramShape({matrix_32_48_}, vector_32_));
+  EXPECT_FALSE(inferred_status_error1.ok());
+  EXPECT_THAT(inferred_status_error1.status().error_message(),
+              HasSubstr("true_computation must take 1 argument"));
+
+  auto inferred_status_error2 = ShapeInference::InferConditionalShape(
+      pred_, vector_32_, vector_64_,
+      ShapeUtil::MakeProgramShape({vector_64_}, f32_),
+      ShapeUtil::MakeProgramShape({vector_64_}, f32_));
+  EXPECT_FALSE(inferred_status_error2.ok());
+  EXPECT_THAT(inferred_status_error2.status().error_message(),
+              HasSubstr("true_operand must match the shape of the only "
+                        "parameter of true_computation"));
+
+  auto inferred_status_error3 = ShapeInference::InferConditionalShape(
+      pred_, matrix_32_48_, ShapeUtil::MakeTupleShape({f32_, vector_32_}),
+      ShapeUtil::MakeProgramShape({matrix_32_48_}, vector_32_),
+      ShapeUtil::MakeProgramShape({f32_, vector_32_}, vector_32_));
+  EXPECT_FALSE(inferred_status_error3.ok());
+  EXPECT_THAT(inferred_status_error3.status().error_message(),
+              HasSubstr("false_computation must take 1 argument"));
+
+  auto inferred_status_error4 = ShapeInference::InferConditionalShape(
+      pred_, vector_32_, vector_64_,
+      ShapeUtil::MakeProgramShape({vector_32_}, f32_),
+      ShapeUtil::MakeProgramShape({vector_32_}, f32_));
+  EXPECT_FALSE(inferred_status_error4.ok());
+  EXPECT_THAT(inferred_status_error4.status().error_message(),
+              HasSubstr("false_operand must match the shape of the only "
+                        "parameter of false_computation"));
+
+  auto inferred_status_error5 = ShapeInference::InferConditionalShape(
+      pred_, vector_32_, vector_64_,
+      ShapeUtil::MakeProgramShape({vector_32_}, f32_),
+      ShapeUtil::MakeProgramShape({vector_64_}, vector_32_));
+  EXPECT_FALSE(inferred_status_error5.ok());
+  EXPECT_THAT(inferred_status_error5.status().error_message(),
+              HasSubstr("the result of true_computation and false_computation "
+                        "must have the same shape"));
+}
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/shaped_buffer.cc b/tensorflow/compiler/xla/service/shaped_buffer.cc
index a7539a1a11d2bbd62c780890c6730dbb212307c4..c679d401c3691b14a43ce77cbe953cd4c64a9e92 100644
--- a/tensorflow/compiler/xla/service/shaped_buffer.cc
+++ b/tensorflow/compiler/xla/service/shaped_buffer.cc
@@ -34,58 +34,32 @@ namespace xla {
 
 using ::tensorflow::strings::Appendf;
 
-/* static */ StatusOr<std::unique_ptr<ShapedBuffer>>
-ShapedBuffer::MakeArrayShapedBuffer(const Shape& shape,
-                                    const se::Platform* platform,
-                                    int device_ordinal,
-                                    const se::DeviceMemoryBase& buffer) {
-  if (ShapeUtil::IsTuple(shape)) {
-    return InvalidArgument("Shape must be an array: %s",
-                           ShapeUtil::HumanStringWithLayout(shape).c_str());
-  }
-  auto shaped_buffer =
-      MakeUnique<ShapedBuffer>(shape, platform, device_ordinal);
-  *shaped_buffer->mutable_shape_index_to_buffer_entry()->mutable_element({}) =
-      0;
-  *shaped_buffer->mutable_buffers() = {buffer};
-  return std::move(shaped_buffer);
-}
-
-ShapedBuffer::ShapedBuffer(const Shape& shape, const se::Platform* platform,
-                           int device_ordinal)
-    : shape_(shape),
+ShapedBuffer::ShapedBuffer(const Shape& on_host_shape,
+                           const Shape& on_device_shape,
+                           const se::Platform* platform, int device_ordinal)
+    : on_host_shape_(on_host_shape),
+      on_device_shape_(on_device_shape),
       platform_(platform),
       device_ordinal_(device_ordinal),
-      shape_index_to_buffer_entry_(shape) {}
+      buffers_(on_device_shape) {}
 
 void ShapedBuffer::clear() {
-  for (se::DeviceMemoryBase& memory_base : buffers_) {
+  for (auto& pair : buffers_) {
     // A default constructed DeviceMemoryBase is a null pointer.
-    memory_base = se::DeviceMemoryBase();
+    pair.second = se::DeviceMemoryBase();
   }
 }
 
-void ShapedBuffer::AddBufferAtIndex(
-    const perftools::gputools::DeviceMemoryBase& buffer,
-    const ShapeIndex& shape_index) {
-  *mutable_shape_index_to_buffer_entry()->mutable_element(shape_index) =
-      buffers().size();
-  mutable_buffers()->push_back(buffer);
-}
-
-const se::DeviceMemoryBase& ShapedBuffer::buffer(
-    const ShapeIndex& index) const {
-  return buffers_[shape_index_to_buffer_entry_.element(index)];
-}
-
-se::DeviceMemoryBase* ShapedBuffer::mutable_buffer(const ShapeIndex& index) {
-  return &buffers_[shape_index_to_buffer_entry_.element(index)];
-}
-
 string ShapedBuffer::ToString() const {
-  string s = "ShapedBuffer(" + platform_->Name() + "):\n";
+  string s = tensorflow::strings::StrCat(
+      "ShapedBuffer(", platform_->Name(), ":", device_ordinal(),
+      "), on-host shape=" + ShapeUtil::HumanStringWithLayout(on_host_shape()),
+      ", on-device shape=" +
+          ShapeUtil::HumanStringWithLayout(on_device_shape()),
+      ":\n");
   ShapeUtil::ForEachSubshape(
-      shape(), [this, &s](const Shape& subshape, const ShapeIndex& index) {
+      on_device_shape(),
+      [this, &s](const Shape& subshape, const ShapeIndex& index) {
         string shape_str;
         if (ShapeUtil::IsTuple(subshape)) {
           shape_str = "tuple";
@@ -105,53 +79,24 @@ std::ostream& operator<<(std::ostream& out, const ShapedBuffer& buffer) {
   return out;
 }
 
-/* static */ StatusOr<std::unique_ptr<ScopedShapedBuffer>>
-ScopedShapedBuffer::Allocate(
-    const Shape& shape, DeviceMemoryAllocator* allocator, int device_ordinal,
-    const std::function<int64(const Shape&)>& shape_size_fn) {
-  if (!LayoutUtil::HasLayout(shape)) {
-    return InvalidArgument("Shape must have a layout: %s",
-                           ShapeUtil::HumanStringWithLayout(shape).c_str());
-  }
-  TF_RETURN_IF_ERROR(ShapeUtil::ValidateShape(shape));
-  auto shaped_buffer =
-      WrapUnique(new ScopedShapedBuffer(shape, allocator, device_ordinal));
-
-  // Allocate an appropriate sized buffer for each element in the shape
-  // including the tuple pointer arrays.
-  for (auto& pair : shaped_buffer->shape_index_to_buffer_entry_) {
-    const ShapeIndex& index = pair.first;
-    size_t& buffer_entry = pair.second;
-    TF_ASSIGN_OR_RETURN(se::DeviceMemoryBase memory_base,
-                        shaped_buffer->allocator_->Allocate(
-                            shaped_buffer->device_ordinal(),
-                            shape_size_fn(ShapeUtil::GetSubshape(
-                                shaped_buffer->shape(), index))));
-    shaped_buffer->buffers_.push_back(memory_base);
-    buffer_entry = shaped_buffer->buffers_.size() - 1;
-  }
-
-  return std::move(shaped_buffer);
-}
-
 /* static */
 StatusOr<std::unique_ptr<ScopedShapedBuffer>> ScopedShapedBuffer::MakeScoped(
     ShapedBuffer* shaped_buffer, DeviceMemoryAllocator* allocator) {
   auto scoped_buffer = WrapUnique(new ScopedShapedBuffer(
-      shaped_buffer->shape(), allocator, shaped_buffer->device_ordinal()));
+      shaped_buffer->on_host_shape(), shaped_buffer->on_device_shape(),
+      allocator, shaped_buffer->device_ordinal()));
   scoped_buffer->buffers_ = shaped_buffer->buffers();
-  scoped_buffer->shape_index_to_buffer_entry_ =
-      shaped_buffer->shape_index_to_buffer_entry();
-
   shaped_buffer->clear();
 
   return std::move(scoped_buffer);
 }
 
-ScopedShapedBuffer::ScopedShapedBuffer(const Shape& shape,
+ScopedShapedBuffer::ScopedShapedBuffer(const Shape& on_host_shape,
+                                       const Shape& on_device_shape,
                                        DeviceMemoryAllocator* allocator,
                                        int device_ordinal)
-    : ShapedBuffer(shape, allocator->platform(), device_ordinal),
+    : ShapedBuffer(on_host_shape, on_device_shape, allocator->platform(),
+                   device_ordinal),
       allocator_(allocator) {}
 
 ScopedShapedBuffer::~ScopedShapedBuffer() {
@@ -159,7 +104,8 @@ ScopedShapedBuffer::~ScopedShapedBuffer() {
   // in the shape (eg, a tuple with a repeated element) so keep track of what
   // has been deallocated.
   std::set<void*> deallocated_opaques;
-  for (se::DeviceMemoryBase& memory_base : buffers_) {
+  for (auto& pair : buffers_) {
+    se::DeviceMemoryBase& memory_base = pair.second;
     if (!memory_base.is_null() &&
         deallocated_opaques.count(memory_base.opaque()) == 0) {
       deallocated_opaques.insert(memory_base.opaque());
@@ -170,13 +116,10 @@ ScopedShapedBuffer::~ScopedShapedBuffer() {
 }
 
 std::unique_ptr<ShapedBuffer> ScopedShapedBuffer::release() {
-  auto shaped_buffer =
-      MakeUnique<ShapedBuffer>(shape(), platform(), device_ordinal());
-
-  *shaped_buffer->mutable_buffers() = buffers();
-  *shaped_buffer->mutable_shape_index_to_buffer_entry() =
-      shape_index_to_buffer_entry();
+  auto shaped_buffer = MakeUnique<ShapedBuffer>(
+      on_host_shape(), on_device_shape(), platform(), device_ordinal());
 
+  shaped_buffer->buffers() = buffers();
   clear();
 
   return shaped_buffer;
diff --git a/tensorflow/compiler/xla/service/shaped_buffer.h b/tensorflow/compiler/xla/service/shaped_buffer.h
index fa88caa13ff734995e8ab0925f17d0d3c26b8fda..d397e47d2ca734458c7dc99baa5c81b16d0fd72b 100644
--- a/tensorflow/compiler/xla/service/shaped_buffer.h
+++ b/tensorflow/compiler/xla/service/shaped_buffer.h
@@ -31,61 +31,68 @@ limitations under the License.
 namespace xla {
 
 // Class which encapsulates a buffer or set of buffers containing data of a
-// particular XLA shape. Used for zero-copy execution interface for a
-// XLA client running in the same process as the service (LocalClient),
+// particular XLA shape.
 class ShapedBuffer {
  public:
-  // Convenience method which creates a ShapedBuffer of array shape (not a
-  // tuple). Its single buffer pointer is set to the given value "buffer". The
-  // given buffer must be large enough to store the given shape as given by
-  // ShapeUtil::ByteSizeOf.
-  static StatusOr<std::unique_ptr<ShapedBuffer>> MakeArrayShapedBuffer(
-      const Shape& shape, const perftools::gputools::Platform* platform,
-      int device_ordinal, const perftools::gputools::DeviceMemoryBase& buffer);
-
-  ShapedBuffer(const Shape& shape,
+  // Construct a ShapedBuffer with null DeviceMemoryBases at each index. The
+  // shape of the data on the host and the device may differ because the device
+  // may have a different representation for different data types. Therefore,
+  // both the on-host and on-device shape are required. The on-device shape
+  // determines the number of device allocations (DeviceMemoryBase) held by the
+  // ShapedBuffer.
+  ShapedBuffer(const Shape& on_host_shape, const Shape& on_device_shape,
                const perftools::gputools::Platform* platform,
                int device_ordinal);
 
-  const Shape& shape() const { return shape_; }
+  // Returns the shape of the on-host representation of the data held by this
+  // ShapedBuffer.
+  const Shape& on_host_shape() const { return on_host_shape_; }
+
+  // Returns the shape of the on-device representation of the data held by this
+  // ShapedBuffer.
+  const Shape& on_device_shape() const { return on_device_shape_; }
+
   const perftools::gputools::Platform* platform() const { return platform_; }
   int device_ordinal() const { return device_ordinal_; }
 
+  // Return the root buffer of the shape (shape index {}).
+  const perftools::gputools::DeviceMemoryBase& root_buffer() const {
+    return buffer(/*index=*/{});
+  }
+
   // Returns the buffer at the given shape index where index is defined as in
   // ShapeUtil::GetSubshape.
   const perftools::gputools::DeviceMemoryBase& buffer(
-      const ShapeIndex& index) const;
-  perftools::gputools::DeviceMemoryBase* mutable_buffer(
-      const ShapeIndex& index);
-
-  // Returns the underlying structure which stores the buffer pointers.
-  const std::vector<perftools::gputools::DeviceMemoryBase>& buffers() const {
-    return buffers_;
+      const ShapeIndex& index) const {
+    return buffers_.element(index);
   }
-  std::vector<perftools::gputools::DeviceMemoryBase>* mutable_buffers() {
-    return &buffers_;
+
+  // Sets the device memory buffer at the given index.
+  void set_buffer(const perftools::gputools::DeviceMemoryBase& buffer,
+                  const ShapeIndex& index) {
+    *buffers_.mutable_element(index) = buffer;
   }
 
-  // Returns the tree of indices which map to buffer pointers.
-  const ShapeTree<size_t>& shape_index_to_buffer_entry() const {
-    return shape_index_to_buffer_entry_;
+  // Returns the underlying ShapeTree containing all the device addresses in the
+  // ShapedBuffer.
+  const ShapeTree<perftools::gputools::DeviceMemoryBase>& buffers() const {
+    return buffers_;
   }
-  ShapeTree<size_t>* mutable_shape_index_to_buffer_entry() {
-    return &shape_index_to_buffer_entry_;
+  ShapeTree<perftools::gputools::DeviceMemoryBase>& buffers() {
+    return buffers_;
   }
 
   // Set all device memory pointers in the object to null.
   void clear();
 
-  // Adds a new buffer at the given shape index.
-  void AddBufferAtIndex(const perftools::gputools::DeviceMemoryBase& buffer,
-                        const ShapeIndex& shape_index);
-
   string ToString() const;
 
  protected:
-  // The shape of the device buffer with layout.
-  const Shape shape_;
+  // The shape of the data when represented on the host.
+  const Shape on_host_shape_;
+
+  // The shape of the data on the device.
+  const Shape on_device_shape_;
 
   // The platform the memory is allocated on.
   const perftools::gputools::Platform* platform_;
@@ -93,14 +100,8 @@ class ShapedBuffer {
   // The device the memory is allocated on.
   const int device_ordinal_;
 
-  // The list of DeviceMemoryBase pointers representing this shape.
-  // Note that there can be a many to one relationship between tuple elements
-  // and buffers.  To account for this, shape_index_to_buffer_entry_ allows us
-  // to make from a position in a shape to an index into this list.
-  std::vector<perftools::gputools::DeviceMemoryBase> buffers_;
-
-  // The tree of indices into buffers_.
-  ShapeTree<size_t> shape_index_to_buffer_entry_;
+  // The tree of device buffers. Its shape is on_device_shape().
+  ShapeTree<perftools::gputools::DeviceMemoryBase> buffers_;
 };
 
 std::ostream& operator<<(std::ostream& out, const ShapedBuffer& buffer);
@@ -110,20 +111,16 @@ std::ostream& operator<<(std::ostream& out, const ShapedBuffer& buffer);
 // destructed.
 class ScopedShapedBuffer : public ShapedBuffer {
  public:
-  // Return a newly allocated ScopedShapedBuffer of an arbitrary shape. Array
-  // buffers (leaves in the shape) are allocated and uninitialized. Tuple
-  // buffers (if any) are allocated and initialized to the backend-specific
-  // representation of an array of pointers to the tuple elements.
-  static StatusOr<std::unique_ptr<ScopedShapedBuffer>> Allocate(
-      const Shape& shape, DeviceMemoryAllocator* allocator, int device_ordinal,
-      const std::function<int64(const Shape&)>& shape_size_fn);
-
   // Takes a ShapedBuffer and returns a ScopedShapedBuffer which manages the
   // deallocation of the device memory held in the shaped buffer. All device
   // memory pointers in the given ShapedBuffer are set to null.
   static StatusOr<std::unique_ptr<ScopedShapedBuffer>> MakeScoped(
       ShapedBuffer* shaped_buffer, DeviceMemoryAllocator* allocator);
 
+  // Create a ScopedShapedBuffer with null DeviceMemoryBases at each index.
+  ScopedShapedBuffer(const Shape& on_host_shape, const Shape& on_device_shape,
+                     DeviceMemoryAllocator* allocator, int device_ordinal);
+
   // Return the allocator used to allocate the device memory held in this
   // ScopedShapedBuffer.
   DeviceMemoryAllocator* memory_allocator() const { return allocator_; }
@@ -138,8 +135,6 @@ class ScopedShapedBuffer : public ShapedBuffer {
   virtual ~ScopedShapedBuffer();
 
  protected:
-  ScopedShapedBuffer(const Shape& shape, DeviceMemoryAllocator* allocator,
-                     int device_ordinal);
   ScopedShapedBuffer(const ScopedShapedBuffer&) = delete;
   void operator=(const ScopedShapedBuffer&) = delete;
 
diff --git a/tensorflow/compiler/xla/service/transfer_manager.cc b/tensorflow/compiler/xla/service/transfer_manager.cc
index d5f53ad56fb019d0ae7c27fc28706f05614ece68..2f36e2b16e0f2eed10aef811dd3cceeba6a5b8a9 100644
--- a/tensorflow/compiler/xla/service/transfer_manager.cc
+++ b/tensorflow/compiler/xla/service/transfer_manager.cc
@@ -40,6 +40,45 @@ TransferManager::GetPlatformTransferManagers() {
   return r;
 }
 
+Status TransferManager::TransferArrayToDevice(
+    perftools::gputools::StreamExecutor* executor, const Literal& literal,
+    const perftools::gputools::DeviceMemoryBase& dest) {
+  const Shape on_device_shape = HostShapeToDeviceShape(literal.shape());
+  TF_RET_CHECK(ShapeUtil::IsArray(on_device_shape))
+      << "On-device representation of "
+      << ShapeUtil::HumanString(literal.shape())
+      << " is not an array: " << ShapeUtil::HumanString(on_device_shape);
+  if (dest.size() < GetByteSizeRequirement(on_device_shape)) {
+    return FailedPrecondition(
+        "Allocation on device not large enough for array: "
+        "%lld < %lld",
+        dest.size(), GetByteSizeRequirement(on_device_shape));
+  }
+  ShapedBuffer shaped_buffer(/*on_host_shape=*/literal.shape(), on_device_shape,
+                             executor->platform(), executor->device_ordinal());
+  shaped_buffer.set_buffer(dest, /*index=*/{});
+  return TransferLiteralToDevice(executor, literal, shaped_buffer);
+}
+
+StatusOr<std::unique_ptr<Literal>> TransferManager::TransferArrayFromDevice(
+    perftools::gputools::StreamExecutor* executor, const Shape& shape,
+    const perftools::gputools::DeviceMemoryBase& source) {
+  TF_RET_CHECK(ShapeUtil::Equal(HostShapeToDeviceShape(shape), shape))
+      << "Shape " << ShapeUtil::HumanString(shape)
+      << " has a differently shaped representation on-device: "
+      << ShapeUtil::HumanString(HostShapeToDeviceShape(shape));
+  if (source.size() < GetByteSizeRequirement(shape)) {
+    return FailedPrecondition(
+        "Allocation on device not large enough for array: "
+        "%lld < %lld",
+        source.size(), GetByteSizeRequirement(shape));
+  }
+  ShapedBuffer shaped_buffer(/*on_host_shape=*/shape, shape,
+                             executor->platform(), executor->device_ordinal());
+  shaped_buffer.set_buffer(source, /*index=*/{});
+  return TransferLiteralFromDevice(executor, shaped_buffer);
+}
+
 /* static */ void TransferManager::RegisterTransferManager(
     se::Platform::Id platform_id,
     TransferManagerCreationFunction creation_function) {
@@ -75,14 +114,12 @@ TransferManager::GetPlatformTransferManagers() {
 Status TransferManager::WriteTupleIndexTables(
     perftools::gputools::StreamExecutor* executor,
     const ShapedBuffer& device_buffer) {
-  VLOG(2) << "Writing tuple index tables to ShapedBuffer rooted at "
-          << device_buffer.buffer(/*index=*/{}).opaque()
-          << "; shape: " << ShapeUtil::HumanString(device_buffer.shape());
+  VLOG(2) << "Writing tuple index tables for " << device_buffer;
 
   TF_RET_CHECK(executor->device_ordinal() == device_buffer.device_ordinal());
 
   return ShapeUtil::ForEachSubshapeWithStatus(
-      device_buffer.shape(),
+      device_buffer.on_device_shape(),
       [&](const Shape& device_subshape, const ShapeIndex& index) -> Status {
         if (ShapeUtil::IsTuple(device_subshape)) {
           se::DeviceMemoryBase device_memory = device_buffer.buffer(index);
@@ -97,7 +134,7 @@ Status TransferManager::WriteTupleIndexTables(
             elements.push_back(device_buffer.buffer(element_index));
             element_index.pop_back();
           }
-          return WriteTuplePointersToDevice(executor, elements, device_subshape,
+          return WriteSingleTupleIndexTable(executor, elements, device_subshape,
                                             &device_memory);
         }
 
@@ -143,31 +180,43 @@ Status TransferManager::TransferBufferToDevice(
   return Status::OK();
 }
 
-StatusOr<std::set<se::DeviceMemoryBase>>
-TransferManager::GatherBufferPointersFromTuple(
-    se::StreamExecutor* executor, const se::DeviceMemoryBase& source,
-    const Shape& shape) {
-  TF_RET_CHECK(ShapeUtil::IsTuple(shape));
-
-  std::set<se::DeviceMemoryBase> buffer_pointers;
-  buffer_pointers.insert(source);
-
-  TF_ASSIGN_OR_RETURN(std::vector<se::DeviceMemoryBase> tuple_elements,
-                      ShallowCopyTupleFromDevice(executor, source, shape));
-  for (auto i = 0; i < tuple_elements.size(); ++i) {
-    const Shape& element_shape = shape.tuple_shapes(i);
-    if (ShapeUtil::IsTuple(element_shape)) {
-      TF_ASSIGN_OR_RETURN(
-          std::set<se::DeviceMemoryBase> buffer_pointers_in_element,
-          GatherBufferPointersFromTuple(executor, tuple_elements[i],
-                                        element_shape));
-      buffer_pointers.insert(buffer_pointers_in_element.begin(),
-                             buffer_pointers_in_element.end());
-    } else {
-      buffer_pointers.insert(tuple_elements[i]);
-    }
+StatusOr<std::unique_ptr<ShapedBuffer>> TransferManager::AllocateShapedBuffer(
+    const Shape& on_host_shape, DeviceMemoryAllocator* allocator,
+    int device_ordinal) {
+  if (!LayoutUtil::HasLayout(on_host_shape)) {
+    return InvalidArgument(
+        "Shape must have a layout: %s",
+        ShapeUtil::HumanStringWithLayout(on_host_shape).c_str());
+  }
+  TF_RETURN_IF_ERROR(ShapeUtil::ValidateShape(on_host_shape));
+  const Shape on_device_shape = HostShapeToDeviceShape(on_host_shape);
+  TF_RET_CHECK(LayoutUtil::HasLayout(on_device_shape));
+
+  auto shaped_buffer = WrapUnique(new ShapedBuffer(
+      on_host_shape, on_device_shape, allocator->platform(), device_ordinal));
+
+  // Allocate an appropriate sized buffer for each element in the shape
+  // including the tuple pointer arrays.
+  for (auto& pair : shaped_buffer->buffers()) {
+    const ShapeIndex& index = pair.first;
+    se::DeviceMemoryBase& memory_base = pair.second;
+    const Shape& subshape = ShapeUtil::GetSubshape(on_device_shape, index);
+    TF_ASSIGN_OR_RETURN(memory_base,
+                        allocator->Allocate(shaped_buffer->device_ordinal(),
+                                            GetByteSizeRequirement(subshape)));
   }
-  return std::move(buffer_pointers);
+
+  return std::move(shaped_buffer);
+}
+
+StatusOr<std::unique_ptr<ScopedShapedBuffer>>
+TransferManager::AllocateScopedShapedBuffer(const Shape& on_host_shape,
+                                            DeviceMemoryAllocator* allocator,
+                                            int device_ordinal) {
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<ShapedBuffer> unscoped_buffer,
+      AllocateShapedBuffer(on_host_shape, allocator, device_ordinal));
+  return ScopedShapedBuffer::MakeScoped(unscoped_buffer.get(), allocator);
 }
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/transfer_manager.h b/tensorflow/compiler/xla/service/transfer_manager.h
index fdc123e54eb7f754c12510bef551b98da01b585d..9f2b5c4aecf0b52f610171e0c2755de577b2bd9e 100644
--- a/tensorflow/compiler/xla/service/transfer_manager.h
+++ b/tensorflow/compiler/xla/service/transfer_manager.h
@@ -44,55 +44,47 @@ class TransferManager {
   // Returns the ID of the platform that this transfer manager acts on.
   virtual perftools::gputools::Platform::Id PlatformId() const = 0;
 
-  // Transfers the region into the provided literal using the provided
-  // executor. device_shape is the shape, including layout, of the data on the
-  // device, while literal_shape will be the shape for the literal. device_shape
-  // and literal_shape must be compatible, but need not have the same layout.
-  // TODO(b/66694934): Remove TransferLiteral* methods which accept bare
-  // DeviceMemoryBase.
-  virtual Status TransferLiteralFromDevice(
-      perftools::gputools::StreamExecutor* executor,
-      const perftools::gputools::DeviceMemoryBase& region,
-      const Shape& device_shape, const Shape& literal_shape,
-      Literal* literal) = 0;
-
-  // Transfers the given literal into the provided region output parameter,
-  // using the given executor.
-  virtual Status TransferLiteralToDevice(
-      perftools::gputools::StreamExecutor* executor, const Literal& literal,
-      perftools::gputools::DeviceMemoryBase* region) = 0;
-
-  // Transfers the data held in the given ShapedBuffer into the provided literal
-  // using the provided executor. literal_shape will be the shape for the
-  // literal. The shape of the ShapedBuffer and literal_shape must be
-  // compatible, but need not have the same layout.
+  // Returns the shape of the on-device representation for the given shape on
+  // the host. This is intended for use with ShapedBuffer where buffers are
+  // pre-allocated by the host, e.g. TransferLiteralToDevice, without the user
+  // needing to consider device-specific behaviors.
+  virtual Shape HostShapeToDeviceShape(const Shape& host_shape) const {
+    return host_shape;
+  }
+
+  // Returns a literal containing the data held in the given ShapedBuffer.
+  // using the provided executor. The optional literal_shape will be the shape
+  // for the literal. The shape of the ShapedBuffer and
+  // DeviceShape(literal_shape) must be compatible, but need not have the same
+  // layout.
   virtual StatusOr<std::unique_ptr<Literal>> TransferLiteralFromDevice(
       perftools::gputools::StreamExecutor* executor,
       const ShapedBuffer& device_buffer) = 0;
 
   // Transfers the given literal into the previously allocated device memory
-  // represented by the given ShapedBuffer using the given executor.
+  // represented by the given ShapedBuffer using the given executor. The shape
+  // of the ShapedBuffer and DeviceShape(literal.shape()) must be compatible,
+  // but need not have the same layout
   virtual Status TransferLiteralToDevice(
       perftools::gputools::StreamExecutor* executor, const Literal& literal,
       const ShapedBuffer& device_buffer) = 0;
 
+  // Convenience methods for transferring an array to or from the device at a
+  // known address. This avoids having to construct a ShapedBuffer just to
+  // transfer an array at a known address.
+  Status TransferArrayToDevice(
+      perftools::gputools::StreamExecutor* executor, const Literal& literal,
+      const perftools::gputools::DeviceMemoryBase& dest);
+  StatusOr<std::unique_ptr<Literal>> TransferArrayFromDevice(
+      perftools::gputools::StreamExecutor* executor, const Shape& shape,
+      const perftools::gputools::DeviceMemoryBase& source);
+
   // Transfers the given literal into the Infeed interface of the device,
   // using the given executor.
   virtual Status TransferLiteralToInfeed(
       perftools::gputools::StreamExecutor* executor,
       const Literal& literal) = 0;
 
-  // Transfer a memory block of the given size from 'source' buffer to the
-  // Infeed interface of the device using the given executor.
-  //
-  // size is the size to transfer from source in bytes.
-  //
-  // source is the source data that must be in the target-dependent layout that
-  // the Infeed HLO used in the computation expects.
-  virtual Status TransferBufferToInfeed(
-      perftools::gputools::StreamExecutor* executor, int64 size,
-      const void* source) = 0;
-
   // Transfers the given literal from the Outfeed interface of the device,
   // using the given executor.
   virtual Status TransferLiteralFromOutfeed(
@@ -104,37 +96,26 @@ class TransferManager {
       tensorflow::gtl::ArraySlice<perftools::gputools::StreamExecutor*>
           executor) = 0;
 
-  // Shallow copy a tuple from the device and create a DeviceMemoryBase object
-  // for each element in the tuple. A DeviceMemoryBase object refers to the
-  // buffer containing the data of that element. The DeviceMemoryBase objects
-  // are returned as a vector.
-  virtual StatusOr<std::vector<perftools::gputools::DeviceMemoryBase>>
-  ShallowCopyTupleFromDevice(
-      perftools::gputools::StreamExecutor* executor,
-      const perftools::gputools::DeviceMemoryBase& source,
-      const Shape& shape) = 0;
-
   // Given an allocated ShapedBuffer, constructs the tuple index table(s) in
   // each buffer of the given ShapedBuffer corresponding to tuple shapes. If the
   // ShapedBuffer is array-shaped this method does nothing.
   Status WriteTupleIndexTables(perftools::gputools::StreamExecutor* executor,
                                const ShapedBuffer& device_buffer);
 
-  // Returns all buffer pointers that the tuple `source` refers to. Unlike
-  // ShallowCopyTupleFromDevice, this function gather buffer pointers in nested
-  // tuples as well. Also, the returned DeviceMemoryBase objects are
-  // deduplicated.
-  StatusOr<std::set<perftools::gputools::DeviceMemoryBase>>
-  GatherBufferPointersFromTuple(
-      perftools::gputools::StreamExecutor* executor,
-      const perftools::gputools::DeviceMemoryBase& source, const Shape& shape);
-
   // Determines the byte size requirement for the given shape on the underlying
   // architecture. This will be used to allocate an appropriately sized memory
   // region for a host-to-device transfer.
   virtual int64 GetByteSizeRequirement(const Shape& shape) const = 0;
 
-  typedef std::unique_ptr<TransferManager> (*TransferManagerCreationFunction)();
+  // Allocate a ShapedBuffer which can hold data with the given on-host
+  // shape. The on-device shape may be different as indicated by
+  // HostShapeToDeviceShape.
+  StatusOr<std::unique_ptr<ShapedBuffer>> AllocateShapedBuffer(
+      const Shape& on_host_shape, DeviceMemoryAllocator* allocator,
+      int device_ordinal);
+  StatusOr<std::unique_ptr<ScopedShapedBuffer>> AllocateScopedShapedBuffer(
+      const Shape& on_host_shape, DeviceMemoryAllocator* allocator,
+      int device_ordinal);
 
   /////
   // The TransferManager class also serves as a point to register objects for
@@ -144,6 +125,7 @@ class TransferManager {
   // assumed to be a singleton, so no ownership is transferred.
   //
   // Precondition: a platform kind must not be registered more than once.
+  typedef std::unique_ptr<TransferManager> (*TransferManagerCreationFunction)();
   static void RegisterTransferManager(
       perftools::gputools::Platform::Id platform_id,
       TransferManagerCreationFunction transfer_manager);
@@ -154,6 +136,17 @@ class TransferManager {
       const perftools::gputools::Platform* platform);
 
  protected:
+  // Transfer a memory block of the given size from 'source' buffer to the
+  // Infeed interface of the device using the given executor.
+  //
+  // size is the size to transfer from source in bytes.
+  //
+  // source is the source data that must be in the target-dependent layout that
+  // the Infeed HLO used in the computation expects.
+  virtual Status TransferBufferToInfeed(
+      perftools::gputools::StreamExecutor* executor, int64 size,
+      const void* source) = 0;
+
   // Transfer a memory block of the given size from the device source into the
   // 'destination' buffer.
   //
@@ -172,10 +165,9 @@ class TransferManager {
       const void* source, perftools::gputools::DeviceMemoryBase* destination);
 
   // Writes the given device-memory pointers in 'elements' to the given region
-  // to construct a tuple in the platform-specific tuple representation. This
-  // can handle nested tuples as well. In the nested case, the element
-  // DeviceMemoryBase points to another array of pointers on the device.
-  virtual Status WriteTuplePointersToDevice(
+  // to construct a tuple index table in the platform-specific tuple
+  // representation.
+  virtual Status WriteSingleTupleIndexTable(
       perftools::gputools::StreamExecutor* executor,
       tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
           elements,
diff --git a/tensorflow/compiler/xla/service/transpose_folding.cc b/tensorflow/compiler/xla/service/transpose_folding.cc
index fb55d4e5433ce666a061256691ea08ee56fde396..83185ac49e9b7c386d10d1cbc4e20dcdfdfd6cae 100644
--- a/tensorflow/compiler/xla/service/transpose_folding.cc
+++ b/tensorflow/compiler/xla/service/transpose_folding.cc
@@ -42,7 +42,7 @@ TransposeFolding::OperandIndices CanFoldOperandsIntoDot(
   TransposeFolding::OperandIndices operand_set;
   for (int64 i = 0; i < dot.operand_count(); ++i) {
     auto& operand = *dot.operand(i);
-    if (operand.IsRank2Transpose() && operand.user_count() == 1) {
+    if (operand.IsRank2Transpose()) {
       operand_set.push_back(i);
     }
   }
@@ -61,8 +61,7 @@ TransposeFolding::OperandIndices CanFoldOperandsIntoConvolution(
   TransposeFolding::OperandIndices operand_set;
   for (int64 i = 0; i < convolution.operand_count(); ++i) {
     auto& operand = *convolution.operand(i);
-    if (operand.opcode() == HloOpcode::kTranspose &&
-        operand.user_count() == 1) {
+    if (operand.opcode() == HloOpcode::kTranspose) {
       operand_set.push_back(i);
     }
   }
@@ -102,6 +101,10 @@ bool FoldTransposeIntoConvolution(InstructionOperandsPair pair) {
   auto& convolution = *pair.first;
   auto& operand_indices = pair.second;
 
+  if (operand_indices.empty()) {
+    return false;
+  }
+
   const ConvolutionDimensionNumbers& dnums =
       convolution.convolution_dimension_numbers();
   ConvolutionDimensionNumbers new_dnums = dnums;
@@ -121,8 +124,9 @@ bool FoldTransposeIntoConvolution(InstructionOperandsPair pair) {
         transpose_dimensions[dnums.input_batch_dimension()]);
     new_dnums.set_input_feature_dimension(
         transpose_dimensions[dnums.input_feature_dimension()]);
-    for (const auto& spatial_dimension : dnums.input_spatial_dimensions()) {
-      CHECK_EQ(spatial_dimension, transpose_dimensions[spatial_dimension]);
+    for (auto& input_spatial_dimension :
+         *new_dnums.mutable_input_spatial_dimensions()) {
+      input_spatial_dimension = transpose_dimensions[input_spatial_dimension];
     }
     new_lhs = &transpose_operand;
   } else {
diff --git a/tensorflow/compiler/xla/service/transpose_folding_test.cc b/tensorflow/compiler/xla/service/transpose_folding_test.cc
index 6ac32e88f1f4af4743990daecd6c1f66a4e32763..caa1a111ad880b9dee62c1c94e32e8275c196fbf 100644
--- a/tensorflow/compiler/xla/service/transpose_folding_test.cc
+++ b/tensorflow/compiler/xla/service/transpose_folding_test.cc
@@ -64,9 +64,12 @@ TEST_F(TransposeFoldingTest, FoldDotTranspose) {
   HloInstruction* transpose_y =
       builder.AddInstruction(HloInstruction::CreateTranspose(
           ShapeUtil::MakeShape(F32, {3, 2}), y, {1, 0}));
-  HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary(
-      ShapeUtil::MakeShape(F32, {2, 2}), /*opcode=*/HloOpcode::kDot,
-      /*lhs=*/x, /*rhs=*/transpose_y));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  HloInstruction* dot = builder.AddInstruction(
+      HloInstruction::CreateDot(ShapeUtil::MakeShape(F32, {2, 2}), /*lhs=*/x,
+                                /*rhs=*/transpose_y, dot_dnums));
 
   HloModule module("test_module");
   HloComputation* entry_computation =
@@ -104,9 +107,12 @@ TEST_F(TransposeFoldingTest, FoldDotTransposeConstant) {
   HloInstruction* transpose1 =
       builder.AddInstruction(HloInstruction::CreateTranspose(
           ShapeUtil::MakeShape(F32, {2, 3}), const1, {1, 0}));
-  HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary(
-      ShapeUtil::MakeShape(F32, {1, 3}), /*opcode=*/HloOpcode::kDot,
-      /*lhs=*/transpose0, /*rhs=*/transpose1));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateDot(
+      ShapeUtil::MakeShape(F32, {1, 3}),
+      /*lhs=*/transpose0, /*rhs=*/transpose1, dot_dnums));
 
   HloModule module("test_module");
   HloComputation* entry_computation =
@@ -169,9 +175,12 @@ TEST_F(TransposeFoldingTest, FoldDotTransposeInWhile) {
   HloInstruction* transpose_y =
       builder.AddInstruction(HloInstruction::CreateTranspose(
           ShapeUtil::MakeShape(F32, {3, 2}), y, {1, 0}));
-  HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary(
-      ShapeUtil::MakeShape(F32, {2, 2}), /*opcode=*/HloOpcode::kDot,
-      /*lhs=*/x, /*rhs=*/transpose_y));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_contracting_dimensions(1);
+  dot_dnums.add_rhs_contracting_dimensions(0);
+  HloInstruction* dot = builder.AddInstruction(
+      HloInstruction::CreateDot(ShapeUtil::MakeShape(F32, {2, 2}), /*lhs=*/x,
+                                /*rhs=*/transpose_y, dot_dnums));
 
   HloModule module("test_module");
   HloComputation* entry_computation =
@@ -376,5 +385,69 @@ TEST_F(TransposeFoldingTest, FoldConvTransposeLhs) {
       new_conv->convolution_dimension_numbers().output_spatial_dimensions(1));
 }
 
+// Test that a transpose of every dimension in the activations gets folded into
+// convolution.
+TEST_F(TransposeFoldingTest, FoldConvComplexTransposeLhs) {
+  auto builder = HloComputation::Builder("entry_computation");
+  HloInstruction* x = builder.AddInstruction(HloInstruction::CreateParameter(
+      /*parameter_number=*/0, ShapeUtil::MakeShape(F32, {3, 2, 1, 1}),
+      /*name=*/"x"));
+  HloInstruction* y = builder.AddInstruction(HloInstruction::CreateParameter(
+      /*parameter_number=*/1, ShapeUtil::MakeShape(F32, {2, 3, 1, 1}),
+      /*name=*/"y"));
+  HloInstruction* transpose_x =
+      builder.AddInstruction(HloInstruction::CreateTranspose(
+          ShapeUtil::MakeShape(F32, {2, 3, 1, 1}), x, {1, 0, 3, 2}));
+  auto dnums = ComputationBuilder::CreateDefaultConvDimensionNumbers();
+  Window window;
+  for (int i = 0; i < 2; ++i) {
+    WindowDimension* dim = window.add_dimensions();
+    dim->set_padding_low(0);
+    dim->set_padding_high(0);
+    dim->set_base_dilation(1);
+    dim->set_window_dilation(1);
+    dim->set_stride(1);
+    dim->set_size(y->shape().dimensions(dnums.kernel_spatial_dimensions(i)));
+  }
+  StatusOr<Shape> conv_shape = ShapeInference::InferConvolveShape(
+      transpose_x->shape(), y->shape(), window, dnums);
+  EXPECT_IS_OK(conv_shape);
+  HloInstruction* conv = builder.AddInstruction(HloInstruction::CreateConvolve(
+      conv_shape.ValueOrDie(), transpose_x, y, window, dnums));
+
+  HloModule module("test_module");
+  HloComputation* entry_computation =
+      module.AddEntryComputation(builder.Build(conv));
+  FoldTranspose(&module);
+
+  // Instructions after folding: x, y, and the convolution.
+  std::unordered_set<HloInstruction*> instruction_set(
+      entry_computation->instructions().begin(),
+      entry_computation->instructions().end());
+  EXPECT_EQ(1, instruction_set.erase(x)) << "x is not in entry_computation.";
+  EXPECT_EQ(1, instruction_set.erase(y)) << "y is not in entry_computation.";
+  EXPECT_EQ(1, instruction_set.size())
+      << "entry_computation should contain exactly 3 instructions.";
+  HloInstruction* new_conv = *instruction_set.begin();
+  EXPECT_EQ(HloOpcode::kConvolution, new_conv->opcode());
+  EXPECT_EQ(dnums.input_feature_dimension(),
+            new_conv->convolution_dimension_numbers().input_batch_dimension());
+  EXPECT_EQ(
+      dnums.input_batch_dimension(),
+      new_conv->convolution_dimension_numbers().input_feature_dimension());
+  EXPECT_EQ(
+      dnums.input_spatial_dimensions(0),
+      new_conv->convolution_dimension_numbers().input_spatial_dimensions(1));
+  EXPECT_EQ(
+      dnums.input_spatial_dimensions(1),
+      new_conv->convolution_dimension_numbers().input_spatial_dimensions(0));
+  EXPECT_EQ(
+      dnums.output_spatial_dimensions(0),
+      new_conv->convolution_dimension_numbers().output_spatial_dimensions(0));
+  EXPECT_EQ(
+      dnums.output_spatial_dimensions(1),
+      new_conv->convolution_dimension_numbers().output_spatial_dimensions(1));
+}
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc
index 0c848566478a25d4862cb0698e029dacd71f7a6a..657a8fe09ae9df906d695f7f49df72500d611792 100644
--- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc
+++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc
@@ -273,6 +273,16 @@ Status TuplePointsToAnalysis::HandleBitcast(HloInstruction* bitcast) {
   return Status::OK();
 }
 
+Status TuplePointsToAnalysis::HandleSlice(HloInstruction* slice) {
+  // A kSlice instruction aliases its operand if the backend lowers it to an
+  // in-place implementation.
+  if (slice->IsInPlaceSlice()) {
+    CreateCopiedPointsToSet(slice, slice->operand(0));
+    return Status::OK();
+  }
+  return DefaultAction(slice);
+}
+
 Status TuplePointsToAnalysis::HandleRecvDone(HloInstruction* recv_done) {
   // RecvDone aliases its input (Recv) tuple element {0} to its output.
   PointsToSet& points_to_set = CreateEmptyPointsToSet(recv_done);
@@ -427,10 +437,15 @@ bool TuplePointsToAnalysis::InstructionDefinesBufferAtIndex(
 
 Status TuplePointsToAnalysis::VerifyBuffer(const LogicalBuffer& buffer) const {
   if (!InstructionDefinesBufferAtIndex(buffer.instruction(), buffer.index())) {
-    return FailedPrecondition(
-        "LogicalBuffer %s is ill-defined: instruction %s does not define a "
-        "buffer at that index",
-        buffer.ToString().c_str(), buffer.instruction()->name().c_str());
+    // kSlice ops that are lowered to an in-place version are expected to not
+    // define their output buffer.
+    if (buffer.instruction()->opcode() != HloOpcode::kSlice ||
+        !buffer.instruction()->IsInPlaceSlice()) {
+      return FailedPrecondition(
+          "LogicalBuffer %s is ill-defined: instruction %s does not define a "
+          "buffer at that index",
+          buffer.ToString().c_str(), buffer.instruction()->name().c_str());
+    }
   }
 
   if (buffer.id() < 0 ||
diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
index 8928de107eed8c40bbe2130e26fe83ca3802d2f6..c3743b150168ebcf1051050dc511e50c43108c4f 100644
--- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
+++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
@@ -199,12 +199,10 @@ class TuplePointsToAnalysis : public DfsHloVisitorWithDefault {
   StatusOr<const LogicalBuffer*> GetBufferDefinedAt(
       const HloInstruction* instruction, const ShapeIndex& index) const;
 
-  // Return a vector containing all BufferAliases of the given logical buffer
-  // This trivially includes the BufferAlias with same instruction and index as
-  // the logical buffer itself, so the returned vector is never empty.  The
-  // buffer alias set is the inverse of the points-to set. That is,
-  // LogicalBuffer B is in the points-to set of instruction I at index N iff
-  // instruction I, index N is a BufferAlias of B.
+  // Return a (possibly empty) vector containing all BufferAliases of the given
+  // logical buffer The buffer alias set is the inverse of the points-to set.
+  // That is, LogicalBuffer B is in the points-to set of instruction I at index
+  // N iff instruction I, index N is a BufferAlias of B.
   using BufferAliasVector = tensorflow::gtl::InlinedVector<BufferAlias, 1>;
   const BufferAliasVector& GetBufferAliases(const LogicalBuffer& buffer) const;
 
@@ -250,6 +248,7 @@ class TuplePointsToAnalysis : public DfsHloVisitorWithDefault {
   Status HandleTuple(HloInstruction* tuple) override;
   Status HandleGetTupleElement(HloInstruction* get_tuple_element) override;
   Status HandleBitcast(HloInstruction* bitcast) override;
+  Status HandleSlice(HloInstruction* slice) override;
   Status HandleCopy(HloInstruction* copy) override;
   Status HandleRecvDone(HloInstruction* recv_done) override;
   Status HandleSend(HloInstruction* send) override;
diff --git a/tensorflow/compiler/xla/service/tuple_util.cc b/tensorflow/compiler/xla/service/tuple_util.cc
new file mode 100644
index 0000000000000000000000000000000000000000..4a530bb0b20582b303f4af969514748b46fd5064
--- /dev/null
+++ b/tensorflow/compiler/xla/service/tuple_util.cc
@@ -0,0 +1,61 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/tuple_util.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+
+namespace xla {
+
+/*static*/ HloInstruction* TupleUtil::ExtractPrefix(HloInstruction* input_tuple,
+                                                    int64 elements) {
+  CHECK(ShapeUtil::IsTuple(input_tuple->shape()));
+
+  HloComputation* computation = input_tuple->parent();
+  const Shape& input_shape = input_tuple->shape();
+
+  std::vector<HloInstruction*> tuple_elements;
+  tuple_elements.reserve(elements);
+  for (int i = 0; i < elements; i++) {
+    tuple_elements.push_back(
+        computation->AddInstruction(HloInstruction::CreateGetTupleElement(
+            input_shape.tuple_shapes(i), input_tuple, i)));
+  }
+
+  return computation->AddInstruction(
+      HloInstruction::CreateTuple(tuple_elements));
+}
+
+/*static*/ HloInstruction* TupleUtil::AppendSuffix(
+    HloInstruction* input_tuple,
+    tensorflow::gtl::ArraySlice<HloInstruction*> trailing_values) {
+  CHECK(ShapeUtil::IsTuple(input_tuple->shape()));
+
+  HloComputation* computation = input_tuple->parent();
+  const Shape& input_shape = input_tuple->shape();
+  std::vector<HloInstruction*> tuple_elements;
+  tuple_elements.reserve(input_shape.tuple_shapes_size());
+  for (int i = 0; i < input_shape.tuple_shapes_size(); i++) {
+    tuple_elements.push_back(
+        computation->AddInstruction(HloInstruction::CreateGetTupleElement(
+            input_shape.tuple_shapes(i), input_tuple, i)));
+  }
+  tuple_elements.insert(tuple_elements.end(), trailing_values.begin(),
+                        trailing_values.end());
+  return computation->AddInstruction(
+      HloInstruction::CreateTuple(tuple_elements));
+}
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/tuple_util.h b/tensorflow/compiler/xla/service/tuple_util.h
new file mode 100644
index 0000000000000000000000000000000000000000..e5ff9aaa8357fe8e4777d6dee37bbec72e144c06
--- /dev/null
+++ b/tensorflow/compiler/xla/service/tuple_util.h
@@ -0,0 +1,45 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_TUPLE_UTIL_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_TUPLE_UTIL_H_
+
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+
+namespace xla {
+class TupleUtil {
+ public:
+  // Generates HLO instructions to get a prefix tuple from `input_tuple` (which
+  // must be of tuple shape) of length `elements`.  Returns the root of the
+  // graph of instructions generated.
+  //
+  // The instructions are generated into the computation containing
+  // `input_tuple`.
+  static HloInstruction* ExtractPrefix(HloInstruction* input_tuple,
+                                       int64 elements);
+
+  // Generates HLO instructions to create a tuple that consists of the values in
+  // `trailing_values` appended to `input_tuple` (which must be of tuple shape).
+  // Returns the root of the graph of instructions generated.
+  //
+  // The instructions are generated into the computation containing
+  // `input_tuple`.
+  static HloInstruction* AppendSuffix(
+      HloInstruction* input_tuple,
+      tensorflow::gtl::ArraySlice<HloInstruction*> trailing_values);
+};
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_TUPLE_UTIL_H_
diff --git a/tensorflow/compiler/xla/service/tuple_util_test.cc b/tensorflow/compiler/xla/service/tuple_util_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..754fd8ef169231827eeb5bfd72aeb596644ca767
--- /dev/null
+++ b/tensorflow/compiler/xla/service/tuple_util_test.cc
@@ -0,0 +1,81 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/tuple_util.h"
+
+#include "tensorflow/compiler/xla/service/hlo_matchers.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h"
+
+namespace xla {
+namespace {
+
+namespace op = ::xla::testing::opcode_matchers;
+
+StatusOr<std::unique_ptr<HloModule>> GetParsedModule(
+    HloComputation** entry_computation, HloInstruction** param0,
+    HloInstruction** param1) {
+  const char* const hlo_string = R"(
+HloModule Module
+
+ENTRY entry {
+  p0 = (f32[32,32]{1,0},f32[32,32]{1,0},f32[32,32]{1,0}) parameter(0)
+  ROOT p1 = f32[32,32]{1,0} parameter(1)
+}
+)";
+
+  TF_ASSIGN_OR_RETURN(std::unique_ptr<HloModule> module,
+                      tools::Parse(hlo_string));
+
+  *entry_computation = module->entry_computation();
+  *param0 = (*entry_computation)->parameter_instruction(0);
+  *param1 = (*entry_computation)->parameter_instruction(1);
+
+  return std::move(module);
+}
+
+TEST(TupleUtilTest, ExtractPrefix) {
+  HloInstruction *param0, *param1;
+  HloComputation* entry_computation;
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<HloModule> module,
+      GetParsedModule(&entry_computation, &param0, &param1));
+
+  HloInstruction* prefix = TupleUtil::ExtractPrefix(param0, 2);
+
+  EXPECT_THAT(prefix, op::Tuple(op::GetTupleElement(op::Parameter(0), 0),
+                                op::GetTupleElement(op::Parameter(0), 1)));
+}
+
+TEST(TupleUtilTest, AppendSuffix) {
+  HloInstruction *param0, *param1;
+  HloComputation* entry_computation;
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<HloModule> module,
+      GetParsedModule(&entry_computation, &param0, &param1));
+
+  HloInstruction* with_suffix =
+      TupleUtil::AppendSuffix(param0, {param1, param1});
+
+  EXPECT_THAT(with_suffix, op::Tuple(op::GetTupleElement(op::Parameter(0), 0),
+                                     op::GetTupleElement(op::Parameter(0), 1),
+                                     op::GetTupleElement(op::Parameter(0), 2),
+                                     op::Parameter(1), op::Parameter(1)));
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/user_computation.cc b/tensorflow/compiler/xla/service/user_computation.cc
index 4e90491b55a5688e37cbabae0843f584578add55..7882b70ab7765ad528b68f97c115e3ae5f19e48a 100644
--- a/tensorflow/compiler/xla/service/user_computation.cc
+++ b/tensorflow/compiler/xla/service/user_computation.cc
@@ -88,8 +88,6 @@ HloOpcode BinaryOperationToHloOpcode(BinaryOperation binop) {
       return HloOpcode::kAtan2;
     case BINOP_COMPLEX:
       return HloOpcode::kComplex;
-    case BINOP_DOT:
-      return HloOpcode::kDot;
     case BINOP_MUL:
       return HloOpcode::kMultiply;
     case BINOP_ADD:
@@ -371,14 +369,6 @@ StatusOr<ComputationDataHandle> UserComputation::AddRngInstruction(
 
   // Check the number of parameters per RNG distribution.
   switch (rng_request.distribution()) {
-    case RandomDistribution::RNG_BERNOULLI:
-      if (rng_request.parameter_size() != 1) {
-        return InvalidArgument(
-            "RNG distribution (%s) expects 1 parameters, but got %d",
-            RandomDistribution_Name(rng_request.distribution()).c_str(),
-            rng_request.parameter_size());
-      }
-      break;
     case RandomDistribution::RNG_NORMAL:
     case RandomDistribution::RNG_UNIFORM:
       if (rng_request.parameter_size() != 2) {
@@ -765,6 +755,54 @@ StatusOr<ComputationDataHandle> UserComputation::AddWhileInstruction(
   return handle;
 }
 
+StatusOr<ComputationDataHandle> UserComputation::AddConditionalInstruction(
+    const ConditionalRequest& conditional_request,
+    const UserComputation& true_computation,
+    const UserComputation& false_computation) {
+  tensorflow::mutex_lock lock(mutex_);
+
+  TF_ASSIGN_OR_RETURN(const OperationRequest* pred,
+                      LookUpRequest(conditional_request.predicate()));
+  TF_ASSIGN_OR_RETURN(const OperationRequest* true_operand,
+                      LookUpRequest(conditional_request.true_operand()));
+  TF_ASSIGN_OR_RETURN(const OperationRequest* false_operand,
+                      LookUpRequest(conditional_request.false_operand()));
+
+  VersionedComputationHandle::Version true_computation_version =
+      true_computation.version();
+  TF_ASSIGN_OR_RETURN(
+      std::shared_ptr<const ProgramShape> true_computation_shape,
+      true_computation.ComputeProgramShape(true_computation_version));
+
+  VersionedComputationHandle::Version false_computation_version =
+      false_computation.version();
+  TF_ASSIGN_OR_RETURN(
+      std::shared_ptr<const ProgramShape> false_computation_shape,
+      false_computation.ComputeProgramShape(false_computation_version));
+
+  TF_ASSIGN_OR_RETURN(Shape inferred_shape,
+                      ShapeInference::InferConditionalShape(
+                          pred->output_shape(), true_operand->output_shape(),
+                          false_operand->output_shape(),
+                          *true_computation_shape, *false_computation_shape));
+
+  ComputationDataHandle handle = CreateComputationDataHandle();
+
+  OperationRequest& request =
+      (*session_computation_.mutable_requests())[handle.handle()];
+  *request.mutable_output_handle() = handle;
+  *request.mutable_output_shape() = inferred_shape;
+  request.add_embedded_computation_versions(true_computation_version);
+  request.add_embedded_computation_versions(false_computation_version);
+  *request.mutable_request()->mutable_conditional_request() =
+      conditional_request;
+
+  VLOG(1) << "AddConditionalInstruction (" << GetVersionedHandleInternal()
+          << "), data handle " << handle.handle() << ": "
+          << conditional_request.ShortDebugString();
+  return handle;
+}
+
 StatusOr<ComputationDataHandle> UserComputation::AddBroadcastInstruction(
     const BroadcastRequest& broadcast_request) {
   tensorflow::mutex_lock lock(mutex_);
@@ -1075,6 +1113,31 @@ StatusOr<ComputationDataHandle> UserComputation::AddConvolveInstruction(
   return handle;
 }
 
+StatusOr<ComputationDataHandle> UserComputation::AddFftInstruction(
+    const FftRequest& fft_request) {
+  tensorflow::mutex_lock lock(mutex_);
+
+  TF_ASSIGN_OR_RETURN(const OperationRequest* operand,
+                      LookUpRequest(fft_request.operand()));
+  TF_ASSIGN_OR_RETURN(Shape shape,
+                      ShapeInference::InferFftShape(
+                          operand->output_shape(), fft_request.fft_type(),
+                          AsInt64Slice(fft_request.fft_length())));
+
+  const ComputationDataHandle handle = CreateComputationDataHandle();
+
+  OperationRequest& request =
+      (*session_computation_.mutable_requests())[handle.handle()];
+  *request.mutable_output_handle() = handle;
+  *request.mutable_output_shape() = shape;
+  *request.mutable_request()->mutable_fft_request() = fft_request;
+
+  VLOG(1) << "AddFftInstruction (" << GetVersionedHandleInternal()
+          << "), data handle " << handle.handle() << ": "
+          << fft_request.ShortDebugString();
+  return handle;
+}
+
 StatusOr<ComputationDataHandle> UserComputation::AddCrossReplicaSumInstruction(
     const CrossReplicaSumRequest& cross_replica_sum_request) {
   tensorflow::mutex_lock lock(mutex_);
@@ -1082,7 +1145,7 @@ StatusOr<ComputationDataHandle> UserComputation::AddCrossReplicaSumInstruction(
   TF_ASSIGN_OR_RETURN(const OperationRequest* operand,
                       LookUpRequest(cross_replica_sum_request.operand()));
   TF_ASSIGN_OR_RETURN(Shape shape, ShapeInference::InferCrossReplicaSumShape(
-                                       operand->output_shape()));
+                                       {&operand->output_shape()}));
 
   ComputationDataHandle handle = CreateComputationDataHandle();
 
@@ -1192,6 +1255,14 @@ StatusOr<ComputationDataHandle> UserComputation::AddCustomCallInstruction(
     TF_RETURN_IF_ERROR(LookUpRequest(handle).status());
   }
 
+  if (tensorflow::StringPiece(custom_call_request.call_target_name())
+          .starts_with("$")) {
+    return InvalidArgument(
+        "Invalid custom_call_target \"%s\": Call targets that start with '$' "
+        "are reserved for internal use.",
+        custom_call_request.call_target_name().c_str());
+  }
+
   const ComputationDataHandle handle = CreateComputationDataHandle();
 
   OperationRequest& request =
@@ -1207,6 +1278,33 @@ StatusOr<ComputationDataHandle> UserComputation::AddCustomCallInstruction(
   return handle;
 }
 
+StatusOr<ComputationDataHandle> UserComputation::AddDotInstruction(
+    const DotRequest& dot_request) {
+  tensorflow::mutex_lock lock(mutex_);
+
+  TF_ASSIGN_OR_RETURN(const OperationRequest* lhs,
+                      LookUpRequest(dot_request.lhs()));
+  TF_ASSIGN_OR_RETURN(const OperationRequest* rhs,
+                      LookUpRequest(dot_request.rhs()));
+
+  TF_ASSIGN_OR_RETURN(Shape shape, ShapeInference::InferDotOpShape(
+                                       lhs->output_shape(), rhs->output_shape(),
+                                       dot_request.dimension_numbers()));
+
+  const ComputationDataHandle handle = CreateComputationDataHandle();
+
+  OperationRequest& request =
+      (*session_computation_.mutable_requests())[handle.handle()];
+  *request.mutable_output_handle() = handle;
+  *request.mutable_output_shape() = shape;
+  *request.mutable_request()->mutable_dot_request() = dot_request;
+
+  VLOG(1) << "AddDotInstruction (" << GetVersionedHandleInternal()
+          << "), data handle " << handle.handle() << ": "
+          << dot_request.ShortDebugString();
+  return handle;
+}
+
 StatusOr<ComputationDataHandle> UserComputation::AddUnaryInstruction(
     const UnaryOpRequest& unary_request) {
   tensorflow::mutex_lock lock(mutex_);
@@ -1433,7 +1531,7 @@ StatusOr<const OperationRequest*> LookUpRequest(
   return &session_computation.requests().at(handle_value);
 }
 
-// Returns the OperationRequestion corresponding to the root (result) of the
+// Returns the OperationRequest corresponding to the root (result) of the
 // session computation.
 StatusOr<const OperationRequest*> GetRoot(
     VersionedComputationHandle::Version version,
@@ -1479,8 +1577,8 @@ UserComputation::ComputeProgramShape(
             request.request().parameter_request();
         int64 param_no = parameter_request.parameter();
         // Parameters may be out of order so expand ProgramShape parameters
-        // until
-        // it is at least large enough to hold the current parameter number.
+        // until it is at least large enough to hold the current parameter
+        // number.
         while (program_shape->parameters_size() <= param_no) {
           program_shape->add_parameters();
           program_shape->add_parameter_names();
@@ -1594,6 +1692,13 @@ void PureFunctionalVisitor(const SessionComputation& session_computation,
       break;
     }
 
+    case OpRequest::kFftRequest: {
+      const FftRequest& fft_request = request.request().fft_request();
+      PureFunctionalVisitor(session_computation, fft_request.operand(),
+                            num_parameters, visited, is_functional);
+      break;
+    }
+
     case OpRequest::kCrossReplicaSumRequest: {
       // TODO(b/33009255): Implmement constant folding for cross replica sum.
       *is_functional = false;
@@ -1629,6 +1734,15 @@ void PureFunctionalVisitor(const SessionComputation& session_computation,
       break;
     }
 
+    case OpRequest::kDotRequest: {
+      const DotRequest& dot_request = request.request().dot_request();
+      PureFunctionalVisitor(session_computation, dot_request.lhs(),
+                            num_parameters, visited, is_functional);
+      PureFunctionalVisitor(session_computation, dot_request.rhs(),
+                            num_parameters, visited, is_functional);
+      break;
+    }
+
     case OpRequest::kSendRequest: {
       *is_functional = false;
       break;
@@ -1757,6 +1871,23 @@ void PureFunctionalVisitor(const SessionComputation& session_computation,
       break;
     }
 
+    case OpRequest::kConditionalRequest: {
+      const ConditionalRequest& conditional_request =
+          request.request().conditional_request();
+      PureFunctionalVisitor(session_computation,
+                            conditional_request.predicate(), num_parameters,
+                            visited, is_functional);
+      PureFunctionalVisitor(session_computation,
+                            conditional_request.true_operand(), num_parameters,
+                            visited, is_functional);
+      PureFunctionalVisitor(session_computation,
+                            conditional_request.false_operand(), num_parameters,
+                            visited, is_functional);
+      // TODO(b/32495713): We aren't checking the true and false computations
+      // themselves.
+      break;
+    }
+
     case OpRequest::kTernaryOpRequest: {
       const TernaryOpRequest& ternary_op_request =
           request.request().ternary_op_request();
@@ -1985,6 +2116,21 @@ UserComputation::GetEmbeddedComputations(
           break;
         }
 
+        case OpRequest::kConditionalRequest: {
+          CHECK_EQ(2, request.embedded_computation_versions_size());
+          const ConditionalRequest& conditional_request =
+              request.request().conditional_request();
+          const VersionedComputationHandle true_computation_versioned_handle = {
+              conditional_request.true_computation(),
+              request.embedded_computation_versions(0)};
+          computations.push_back(true_computation_versioned_handle);
+          const VersionedComputationHandle false_computation_versioned_handle =
+              {conditional_request.false_computation(),
+               request.embedded_computation_versions(1)};
+          computations.push_back(false_computation_versioned_handle);
+          break;
+        }
+
         default:
           // No embedded computation.
           break;
@@ -2000,6 +2146,24 @@ UserComputation::GetEmbeddedComputations(
   return computations;
 }
 
+StatusOr<const OperationRequest*>
+UserComputation::LookUpRequestForErrorReporting(
+    const ComputationDataHandle& handle) const {
+  tensorflow::mutex_lock lock(mutex_);
+  return LookUpRequest(handle);
+}
+
+tensorflow::gtl::optional<const OpMetadata*> UserComputation::ParameterMetadata(
+    int parameter_number) const {
+  tensorflow::mutex_lock lock(mutex_);
+  auto it = parameters_.find(parameter_number);
+  if (it == parameters_.end()) {
+    return tensorflow::gtl::nullopt;
+  }
+  OperationRequest* op = it->second;
+  return &op->request().metadata();
+}
+
 Status UserComputation::RemapEmbeddedComputations(
     const std::map<int64, ComputationHandle>& old_to_new) {
   auto update = [&old_to_new](ComputationHandle* to_update) -> Status {
@@ -2071,6 +2235,16 @@ Status UserComputation::RemapEmbeddedComputations(
         TF_RETURN_IF_ERROR(update(while_request->mutable_body()));
         break;
       }
+      case OpRequest::kConditionalRequest: {
+        TF_RET_CHECK(2 == request.embedded_computation_versions_size());
+        ConditionalRequest* conditional_request =
+            request.mutable_request()->mutable_conditional_request();
+        TF_RETURN_IF_ERROR(
+            update(conditional_request->mutable_true_computation()));
+        TF_RETURN_IF_ERROR(
+            update(conditional_request->mutable_false_computation()));
+        break;
+      }
       default:
         // No embedded computation.
         TF_RET_CHECK(0 == request.embedded_computation_versions_size());
@@ -2274,6 +2448,12 @@ static void ForEachOperand(
       break;
     }
 
+    case OpRequest::kFftRequest: {
+      const FftRequest& fft_request = request.request().fft_request();
+      apply(fft_request.operand());
+      break;
+    }
+
     case OpRequest::kBatchNormTrainingRequest: {
       const BatchNormTrainingRequest& batch_norm_training_request =
           request.request().batch_norm_training_request();
@@ -2417,6 +2597,15 @@ static void ForEachOperand(
       break;
     }
 
+    case OpRequest::kConditionalRequest: {
+      const ConditionalRequest& conditional_request =
+          request.request().conditional_request();
+      apply(conditional_request.predicate());
+      apply(conditional_request.true_operand());
+      apply(conditional_request.false_operand());
+      break;
+    }
+
     case OpRequest::kTernaryOpRequest: {
       const TernaryOpRequest& ternary_op_request =
           request.request().ternary_op_request();
@@ -2453,6 +2642,13 @@ static void ForEachOperand(
       break;
     }
 
+    case OpRequest::kDotRequest: {
+      const DotRequest& dot_request = request.request().dot_request();
+      apply(dot_request.rhs());
+      apply(dot_request.lhs());
+      break;
+    }
+
     case OpRequest::kUnaryOpRequest: {
       const UnaryOpRequest& unary_op_request =
           request.request().unary_op_request();
@@ -2653,7 +2849,8 @@ void ComputationLowerer::Visit(
       const ConstantRequest& constant_request =
           request.request().constant_request();
       hlo_instruction = add_instruction(HloInstruction::CreateConstant(
-          Literal(constant_request.literal()).CloneToUnique()));
+          Literal::CreateFromProto(constant_request.literal())
+              .ConsumeValueOrDie()));
       break;
     }
 
@@ -2732,13 +2929,31 @@ void ComputationLowerer::Visit(
       break;
     }
 
+    case OpRequest::kFftRequest: {
+      const FftRequest& fft_request = request.request().fft_request();
+      HloInstruction* operand = lookup_instruction(fft_request.operand());
+      hlo_instruction = add_instruction(HloInstruction::CreateFft(
+          request.output_shape(), operand, fft_request.fft_type(),
+          AsInt64Slice(fft_request.fft_length())));
+      break;
+    }
+
+    case OpRequest::kDotRequest: {
+      const DotRequest& dot_request = request.request().dot_request();
+      HloInstruction* lhs = lookup_instruction(dot_request.lhs());
+      HloInstruction* rhs = lookup_instruction(dot_request.rhs());
+      hlo_instruction = add_instruction(HloInstruction::CreateDot(
+          request.output_shape(), lhs, rhs, dot_request.dimension_numbers()));
+      break;
+    }
+
     case OpRequest::kCrossReplicaSumRequest: {
       const CrossReplicaSumRequest& cross_replica_sum_request =
           request.request().cross_replica_sum_request();
       HloInstruction* operand =
           lookup_instruction(cross_replica_sum_request.operand());
       hlo_instruction = add_instruction(HloInstruction::CreateCrossReplicaSum(
-          request.output_shape(), operand));
+          request.output_shape(), {operand}));
       break;
     }
 
@@ -3021,6 +3236,30 @@ void ComputationLowerer::Visit(
       break;
     }
 
+    case OpRequest::kConditionalRequest: {
+      const ConditionalRequest& conditional_request =
+          request.request().conditional_request();
+      CHECK_EQ(2, request.embedded_computation_versions_size());
+      VersionedComputationHandle::Version true_computation_version =
+          request.embedded_computation_versions(0);
+      HloComputation* true_computation = ResolveComputation(
+          conditional_request.true_computation(), true_computation_version);
+      VersionedComputationHandle::Version false_computation_version =
+          request.embedded_computation_versions(1);
+      HloComputation* false_computation = ResolveComputation(
+          conditional_request.false_computation(), false_computation_version);
+      HloInstruction* predicate =
+          lookup_instruction(conditional_request.predicate());
+      HloInstruction* true_operand =
+          lookup_instruction(conditional_request.true_operand());
+      HloInstruction* false_operand =
+          lookup_instruction(conditional_request.false_operand());
+      hlo_instruction = add_instruction(HloInstruction::CreateConditional(
+          request.output_shape(), predicate, true_operand, true_computation,
+          false_operand, false_computation));
+      break;
+    }
+
     case OpRequest::kTernaryOpRequest: {
       const TernaryOpRequest& ternary_op_request =
           request.request().ternary_op_request();
@@ -3151,8 +3390,7 @@ void ComputationLowerer::Visit(
         lhs = (lhs == operand_to_broadcast) ? broadcasted_operand : lhs;
         rhs = (rhs == operand_to_broadcast) ? broadcasted_operand : rhs;
       }
-      if (debug_options_.xla_eliminate_hlo_implicit_broadcast() &&
-          binary_op_request.binop() != BINOP_DOT) {
+      if (debug_options_.xla_eliminate_hlo_implicit_broadcast()) {
         if (!ShapeUtil::SameDimensions(request.output_shape(), lhs->shape())) {
           // lhs side is being implicitly broadcast. Change to explicit.
           lhs =
diff --git a/tensorflow/compiler/xla/service/user_computation.h b/tensorflow/compiler/xla/service/user_computation.h
index 317c631dca2e1ebe6f3c8fbaf1a3e94106034f79..4f92e58877a1d06728fdd250744ca2ce7b57d9ad 100644
--- a/tensorflow/compiler/xla/service/user_computation.h
+++ b/tensorflow/compiler/xla/service/user_computation.h
@@ -133,6 +133,10 @@ class UserComputation {
   StatusOr<ComputationDataHandle> AddConvolveInstruction(
       const ConvolveRequest& convolve_request);
 
+  // Enqueues an FFT instruction onto this user computation.
+  StatusOr<ComputationDataHandle> AddFftInstruction(
+      const FftRequest& fft_request);
+
   // Enqueues a cross replica sum instruction onto this user computation.
   StatusOr<ComputationDataHandle> AddCrossReplicaSumInstruction(
       const CrossReplicaSumRequest& cross_replica_sum_request);
@@ -153,6 +157,10 @@ class UserComputation {
   StatusOr<ComputationDataHandle> AddCustomCallInstruction(
       const CustomCallRequest& custom_call_request);
 
+  // Enqueues a dot instruction onto this user computation.
+  StatusOr<ComputationDataHandle> AddDotInstruction(
+      const DotRequest& dot_request);
+
   // Enqueues a broadcast instruction onto this user computation.
   StatusOr<ComputationDataHandle> AddBroadcastInstruction(
       const BroadcastRequest& broadcast_request);
@@ -216,6 +224,12 @@ class UserComputation {
       const UserComputation& condition_computation,
       const UserComputation& body_computation);
 
+  // Enqueues a conditional instruction on this user computation.
+  StatusOr<ComputationDataHandle> AddConditionalInstruction(
+      const ConditionalRequest& conditional_request,
+      const UserComputation& true_computation,
+      const UserComputation& false_computation);
+
   // Enqueues a Send instruction onto this user computation.
   Status AddSendInstruction(const SendRequest& send_request);
 
@@ -307,6 +321,23 @@ class UserComputation {
   SessionComputation CloneSessionComputation(
       VersionedComputationHandle::Version version) const;
 
+  // Warning: typically we don't want to look up computation data handles until
+  // the computation is finished being built, for consistency purposes. We
+  // expose this routine for error reporting purposes so that we can provide
+  // more meaningful error messages from the XLA service layer.
+  //
+  // Returns the operation request that the handle comes from.
+  StatusOr<const OperationRequest*> LookUpRequestForErrorReporting(
+      const ComputationDataHandle& handle) const;
+
+  // Retrieves the parameter metadata for the given parameter number.
+  //
+  // If the parameter number is invalid for this computation, nullopt is
+  // returned. When the return value has_value(), nullptr will never be
+  // the held value.
+  tensorflow::gtl::optional<const OpMetadata*> ParameterMetadata(
+      int parameter_number) const;
+
  private:
   // Warning: dangerous mutating operation that doesn't respect versioning.
   // This is only used at initialization time when constructing from a
diff --git a/tensorflow/compiler/xla/service/user_computation_test.cc b/tensorflow/compiler/xla/service/user_computation_test.cc
index 5afaf226ae0cce7e9afc966c6b4adf838aeebc91..ca02115863e6906ef709ba63259024877e0dcef4 100644
--- a/tensorflow/compiler/xla/service/user_computation_test.cc
+++ b/tensorflow/compiler/xla/service/user_computation_test.cc
@@ -65,6 +65,7 @@ TEST_F(UserComputationTest, SimpleComputation) {
 
   OutfeedRequest outfeed_request;
   *outfeed_request.mutable_operand() = constant_handle;
+  *outfeed_request.mutable_shape() = kVectorShape;
   outfeed_request.set_outfeed_config("abc");
   TF_ASSERT_OK(computation.AddOutfeedInstruction(outfeed_request));
 
@@ -334,50 +335,5 @@ TEST_F(UserComputationTest, EliminateDegenerateBroadcastAfterIndimBroadcast) {
               operands[1]->opcode() == HloOpcode::kBroadcast);
 }
 
-TEST_F(UserComputationTest, SkipDotInEliminatingImplicitBroadcast) {
-  auto debug_options = DebugOptions();
-  debug_options.set_xla_eliminate_hlo_implicit_broadcast(true);
-
-  //  %a = Param({1, 3});
-  //  %b = Param({3, 1});
-  //  %dot = Dot(%a, %b);
-  ComputationHandle handle;
-  handle.set_handle(123);
-  UserComputation computation("TheComputation", handle);
-
-  ParameterRequest a_request;
-  *a_request.mutable_shape() = ShapeUtil::MakeShape(F32, {1, 3});
-  a_request.set_name("a");
-  a_request.set_parameter(0);
-  TF_ASSERT_OK_AND_ASSIGN(ComputationDataHandle a_handle,
-                          computation.AddParameterInstruction(a_request));
-
-  ParameterRequest b_request;
-  *b_request.mutable_shape() = ShapeUtil::MakeShape(F32, {3, 1});
-  b_request.set_name("b");
-  b_request.set_parameter(1);
-  TF_ASSERT_OK_AND_ASSIGN(ComputationDataHandle b_handle,
-                          computation.AddParameterInstruction(b_request));
-
-  BinaryOpRequest dot;
-  dot.set_binop(BINOP_DOT);
-  *dot.mutable_lhs() = a_handle;
-  *dot.mutable_rhs() = b_handle;
-  TF_ASSERT_OK(computation.AddBinaryInstruction(dot).status());
-
-  auto hlo_resolver = [](const VersionedComputationHandle& handle) {
-    return nullptr;
-  };
-  VersionedComputationHandle latest_version = computation.GetVersionedHandle();
-
-  // Build the HLO computation.
-  TF_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<HloComputation> hlo_computation,
-      computation.BuildHloComputation(latest_version.version, hlo_resolver,
-                                      debug_options));
-
-  EXPECT_EQ(3, hlo_computation->instruction_count());
-}
-
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc
new file mode 100644
index 0000000000000000000000000000000000000000..a5f9b01f011ce04f1114c74391a967c62f015221
--- /dev/null
+++ b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc
@@ -0,0 +1,296 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/while_loop_invariant_code_motion.h"
+#include "tensorflow/compiler/xla/service/tuple_util.h"
+#include "tensorflow/compiler/xla/service/while_util.h"
+#include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/core/lib/gtl/flatmap.h"
+#include "tensorflow/core/lib/gtl/flatset.h"
+#include "tensorflow/core/lib/gtl/inlined_vector.h"
+
+namespace xla {
+
+using tensorflow::gtl::FlatMap;
+using tensorflow::gtl::FlatSet;
+using tensorflow::gtl::InlinedVector;
+
+// Copies `to_hoist` to the computation containing `while_instr`, hoisting its
+// operands as needed.  All of its transitive operands are expected to be either
+// in `hoisted_instructions` or `unhoisted_invariant_instructions`.  This
+// function hoists the operands in `unhoisted_invariant_instructions` and moves
+// them into `hoisted_instructions`.
+static void CreateLoopInvariantCopy(
+    FlatMap<HloInstruction*, HloInstruction*>* hoisted_instructions,
+    FlatSet<HloInstruction*>* unhoisted_invariant_instructions,
+    HloInstruction* while_instr, HloInstruction* to_hoist) {
+  HloComputation* parent_of_while = while_instr->parent();
+  HloComputation* while_body = while_instr->while_body();
+
+  struct DFSFrame {
+    HloInstruction* instruction;
+    int64 operand_index;
+  };
+
+  InlinedVector<DFSFrame, 8> dfs_stack;
+  dfs_stack.push_back({to_hoist, 0});
+
+  HloInstruction* while_body_param = while_body->parameter_instruction(0);
+  HloInstruction* while_operand = while_instr->mutable_operand(0);
+
+  do {
+    DFSFrame* frame = &dfs_stack.back();
+    if (frame->operand_index == frame->instruction->operand_count()) {
+      HloInstruction* old_instruction = frame->instruction;
+
+      // All of the operands for old_instruction have been cloned, so it is
+      // time to clone old_instruction itself.
+
+      auto get_new_operand = [&](HloInstruction* old_operand) {
+        return old_operand == while_body_param
+                   ? while_operand
+                   : FindOrDie(*hoisted_instructions, old_operand);
+      };
+
+      InlinedVector<HloInstruction*, 4> new_operands;
+      c_transform(old_instruction->operands(), std::back_inserter(new_operands),
+                  get_new_operand);
+
+      HloInstruction* new_instruction =
+          parent_of_while->AddInstruction(old_instruction->CloneWithNewOperands(
+              old_instruction->shape(), new_operands));
+
+      InsertOrDie(hoisted_instructions, old_instruction, new_instruction);
+
+      // Approximately half of the instructions that would normally be present
+      // in unhoisted_invariant_instructions are constants.  We save a bit of
+      // compile time by not putting these in the hashtable.
+      CHECK_EQ(unhoisted_invariant_instructions->erase(old_instruction),
+               to_hoist != old_instruction &&
+                   old_instruction->opcode() != HloOpcode::kConstant);
+      dfs_stack.pop_back();
+      continue;
+    }
+
+    HloInstruction* next_operand =
+        frame->instruction->mutable_operand(frame->operand_index++);
+    if (hoisted_instructions->count(next_operand) ||
+        next_operand == while_body_param) {
+      continue;
+    }
+
+    dfs_stack.push_back({next_operand, 0});
+  } while (!dfs_stack.empty());
+}
+
+// Returns true if `instruction` is worth hoisting only if it lets us hoist some
+// instruction using it.  The rationale is that hoisting these instructions will
+// prevent simplification and fusion in the while body.
+static bool NotWorthHoistingIndividually(const HloInstruction& instruction) {
+  switch (instruction.opcode()) {
+    default:
+      return false;
+
+    case HloOpcode::kBitcast:
+    case HloOpcode::kBroadcast:
+    case HloOpcode::kConstant:
+    case HloOpcode::kReverse:
+    case HloOpcode::kSlice:
+    case HloOpcode::kTuple:
+      return true;
+
+    case HloOpcode::kTranspose:
+      return ShapeUtil::TransposeIsBitcast(
+          /*input_shape=*/instruction.operand(0)->shape(),
+          /*output_shape=*/instruction.shape(), instruction.dimensions());
+
+    case HloOpcode::kReshape:
+      return ShapeUtil::ReshapeIsBitcast(
+          /*input_shape=*/instruction.operand(0)->shape(),
+          /*output_shape=*/instruction.shape());
+  }
+}
+
+// Populates `gte_set` with the GetTupleElement instructions in `while_body`
+// that access elements in the parameter tuple that don't change across
+// iterations.  Assumes `while_body` is the body computation of the while loop
+// in question.
+static void GatherInvariantGTEs(HloComputation* while_body,
+                                FlatSet<HloInstruction*>* gte_set) {
+  const HloInstruction::InstructionVector root_operands =
+      while_body->root_instruction()->operands();
+  for (int i = 0; i < root_operands.size(); i++) {
+    HloInstruction* instr = root_operands[i];
+    if (instr->opcode() == HloOpcode::kGetTupleElement &&
+        instr->tuple_index() == i &&
+        instr->operand(0) == while_body->parameter_instruction(0) &&
+        ShapeUtil::IsArray(instr->shape())) {
+      InsertOrDie(gte_set, instr);
+    }
+  }
+}
+
+static StatusOr<bool> TryHoistingInvariantInstructionsFromWhileBody(
+    HloInstruction* while_instr) {
+  auto print_no_metadata = HloPrintOptions{}.set_print_metadata(false);
+
+  if (!ShapeUtil::IsTuple(while_instr->shape())) {
+    // This restriction leaves one interesting pattern on the table:
+    //
+    //  while_body(f32[1024, 1024] %param) {
+    //    %value = expensive_op(%param)
+    //    outfeed(%value)
+    //    ROOT = %param
+    //  }
+    //
+    // If we see that pattern in the while, instead of generalizing this
+    // algorithm to work with non-tuples, we should instead add a pass that
+    // canonicalizes while loops like the above to use a tuple state.
+    return false;
+  }
+
+  string while_instr_name = while_instr->ToString(print_no_metadata);
+  VLOG(2) << "Trying to hoist from " << while_instr_name;
+
+  HloComputation* while_body = while_instr->while_body();
+
+  // Maps instructions in the while body to instructions hoisted outside the
+  // while that compute the same value.
+  FlatMap<HloInstruction*, HloInstruction*> hoisted_instructions;
+
+  // Contains instructions that can be legally hoisted, but were deemed to be
+  // unprofitable to be hoisted alone by NotWorthHoistingIndividually.  When we
+  // hoist an instruction in this set, we move it from
+  // unhoisted_invariant_instructions to hoisted_instructions.
+  FlatSet<HloInstruction*> unhoisted_invariant_instructions;
+
+  // Invariant GTE's axiomatically satisfy the constraints for
+  // unhoisted_invariant_instructions -- they can be legally hoisted, but there
+  // is no benefit to hoisting them unless something that uses it is also
+  // hoisted.
+  GatherInvariantGTEs(while_body, &unhoisted_invariant_instructions);
+
+  if (unhoisted_invariant_instructions.empty()) {
+    // There are no obviously loop invariant elements in the state being
+    // threaded through the while loop so give up.  In theory this precondition
+    // is too strong -- we could have code that e.g. permutes the elements in
+    // the while state but uses a select to pick the same value on every
+    // iteration.
+    return false;
+  }
+
+  // instructions_to_replace[i] is hoisted into a loop invariant instruction
+  // replacement_instructions[i].
+  std::vector<HloInstruction*> instructions_to_replace;
+  std::vector<HloInstruction*> replacement_instructions;
+
+  for (auto* instruction : while_body->MakeInstructionPostOrder()) {
+    if (instruction->HasSideEffect() ||
+        instruction->opcode() == HloOpcode::kParameter ||
+        !instruction->control_predecessors().empty() ||
+        !instruction->control_successors().empty()) {
+      continue;
+    }
+
+    auto is_invariant = [&](HloInstruction* op) {
+      return hoisted_instructions.find(op) != hoisted_instructions.end() ||
+             unhoisted_invariant_instructions.count(op) ||
+             op->opcode() == HloOpcode::kConstant;
+    };
+
+    if (!c_all_of(instruction->operands(), is_invariant)) {
+      continue;
+    }
+
+    if (NotWorthHoistingIndividually(*instruction)) {
+      VLOG(2) << "Adding " << instruction->ToString(print_no_metadata)
+              << " to unhoisted invariant set.";
+      // Approximately half of the instructions that reach this point are
+      // constants.  We save a bit of compile time by not putting these in the
+      // hashtable.
+      if (instruction->opcode() != HloOpcode::kConstant) {
+        InsertOrDie(&unhoisted_invariant_instructions, instruction);
+      }
+      continue;
+    }
+
+    VLOG(2) << "Hoisting " << instruction->ToString(print_no_metadata);
+
+    CreateLoopInvariantCopy(&hoisted_instructions,
+                            &unhoisted_invariant_instructions, while_instr,
+                            instruction);
+
+    instructions_to_replace.push_back(instruction);
+    replacement_instructions.push_back(
+        FindOrDie(hoisted_instructions, instruction));
+  }
+
+  if (instructions_to_replace.empty()) {
+    return false;
+  }
+
+  TF_ASSIGN_OR_RETURN(
+      WhileUtil::MakeInstructionsLiveInResult live_in_instructions_result,
+      WhileUtil::MakeInstructionsLiveIn(while_instr, replacement_instructions));
+
+  HloComputation* new_while_body =
+      live_in_instructions_result.new_while_instr->while_body();
+
+  for (int i = 0; i < instructions_to_replace.size(); i++) {
+    HloInstruction* instruction_to_replace_in_new_while =
+        FindOrDie(live_in_instructions_result.while_body_instruction_map,
+                  instructions_to_replace[i]);
+    TF_RETURN_IF_ERROR(new_while_body->ReplaceInstruction(
+        instruction_to_replace_in_new_while,
+        live_in_instructions_result.while_body_live_in_values[i]));
+  }
+
+  VLOG(1) << "Hoisted " << instructions_to_replace.size()
+          << " instructions from " << while_instr_name;
+
+  return true;
+}
+
+StatusOr<bool> WhileLoopInvariantCodeMotion::Run(HloModule* module) {
+  bool changed = false;
+  std::vector<HloInstruction*> while_instrs;
+  for (auto* comp : module->computations()) {
+    c_copy_if(comp->instructions(), std::back_inserter(while_instrs),
+              [](const HloInstruction* instr) {
+                return instr->opcode() == HloOpcode::kWhile;
+              });
+  }
+
+  for (HloInstruction* while_instr : while_instrs) {
+    // Right now we only hoist computations from the while body, but
+    // TryHoistingInvariantInstructionsFromWhileBody can be generalized to
+    // optimize the condition computation too, if needed.
+    //
+    // The transform we do here is a pessmization for while loops that execute
+    // zero times*, but at this time we expect those to be rare.  If this
+    // becomes a problem we can consider using the conditional HLO to avoid
+    // doing extra work for while loops with zero trip count.
+    //
+    // * We delete while loops that have a zero trip count, so this would have
+    //   to be a while loop with a somewhat opaque condition expression.
+
+    TF_ASSIGN_OR_RETURN(
+        bool result,
+        TryHoistingInvariantInstructionsFromWhileBody(while_instr));
+    changed |= result;
+  }
+  return changed;
+}
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.h b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.h
new file mode 100644
index 0000000000000000000000000000000000000000..8c4b765b0003c48cfacb9d28e7c8259ac0927d66
--- /dev/null
+++ b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.h
@@ -0,0 +1,39 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_WHILE_LOOP_INVARIANT_CODE_MOTION_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_WHILE_LOOP_INVARIANT_CODE_MOTION_H_
+
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
+#include "tensorflow/compiler/xla/statusor.h"
+
+namespace xla {
+
+// HLO pass that rewrites while loops to hoist loop invariant instructions in
+// the while body into the computation that contains the while instruction.
+
+class WhileLoopInvariantCodeMotion : public HloPassInterface {
+ public:
+  ~WhileLoopInvariantCodeMotion() override = default;
+
+  tensorflow::StringPiece name() const override {
+    return "while-loop-invariant-code-motion";
+  }
+  StatusOr<bool> Run(HloModule* module) override;
+};
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_WHILE_LOOP_INVARIANT_CODE_MOTION_H_
diff --git a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion_test.cc b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..799340fda905fb7d40b19b4cb79bb0fcb5629fd3
--- /dev/null
+++ b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion_test.cc
@@ -0,0 +1,442 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/while_loop_invariant_code_motion.h"
+
+#include "tensorflow/compiler/xla/service/hlo_matchers.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/tests/hlo_verified_test_base.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+
+namespace xla {
+namespace {
+
+namespace op = xla::testing::opcode_matchers;
+
+class WhileLoopInvariantCodeMotionTest : public HloVerifiedTestBase {
+ public:
+  // Makes a computation which has one parameter, of the given shape, and always
+  // returns PRED[]{true}.  This is useful as a dummy loop condition.
+  HloComputation* MakeAlwaysTrueComputation(const Shape& param_shape,
+                                            HloModule* module);
+};
+
+static void FindOnlyWhileInstruction(HloComputation* computation,
+                                     HloInstruction** while_instruction) {
+  *while_instruction = nullptr;
+  for (auto* instr : computation->instructions()) {
+    if (instr->opcode() == HloOpcode::kWhile) {
+      ASSERT_EQ(*while_instruction, nullptr);
+      *while_instruction = instr;
+    }
+  }
+
+  ASSERT_NE(*while_instruction, nullptr);
+}
+
+HloComputation* WhileLoopInvariantCodeMotionTest::MakeAlwaysTrueComputation(
+    const Shape& param_shape, HloModule* module) {
+  HloComputation::Builder builder(TestName() + ".always_true");
+  builder.AddInstruction(
+      HloInstruction::CreateParameter(0, param_shape, "param"));
+  builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<bool>(true)));
+  return module->AddEmbeddedComputation(builder.Build());
+}
+
+TEST_F(WhileLoopInvariantCodeMotionTest, HoistOneInvariantOperation) {
+  auto scalar_s32 = ShapeUtil::MakeShape(S32, {});
+  Shape while_shape =
+      ShapeUtil::MakeTupleShape({scalar_s32, scalar_s32, scalar_s32});
+
+  HloComputation* while_body = [&]() {
+    HloComputation::Builder builder(TestName() + ".while_body");
+    HloInstruction* param = builder.AddInstruction(
+        HloInstruction::CreateParameter(0, while_shape, "param"));
+    HloInstruction* gte_0 = builder.AddInstruction(
+        HloInstruction::CreateGetTupleElement(scalar_s32, param, 0));
+    HloInstruction* gte_1 = builder.AddInstruction(
+        HloInstruction::CreateGetTupleElement(scalar_s32, param, 1));
+    HloInstruction* add_result =
+        builder.AddInstruction(HloInstruction::CreateBinary(
+            scalar_s32, HloOpcode::kAdd, gte_0, gte_1));
+    builder.AddInstruction(
+        HloInstruction::CreateTuple({gte_0, gte_1, add_result}));
+
+    return module().AddEmbeddedComputation(builder.Build());
+  }();
+
+  HloComputation::Builder builder(TestName());
+  auto* init_value = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, while_shape, "init_value"));
+  builder.AddInstruction(HloInstruction::CreateWhile(
+      while_shape, MakeAlwaysTrueComputation(while_shape, &module()),
+      while_body, init_value));
+  HloComputation* entry_computation =
+      module().AddEntryComputation(builder.Build());
+  TF_ASSERT_OK_AND_ASSIGN(bool simplified_loop,
+                          WhileLoopInvariantCodeMotion{}.Run(&module()));
+  EXPECT_TRUE(simplified_loop);
+
+  HloInstruction* transformed_while;
+  FindOnlyWhileInstruction(entry_computation, &transformed_while);
+
+  EXPECT_THAT(entry_computation->instructions(), Contains(op::Add()));
+  EXPECT_THAT(transformed_while->while_body()->instructions(),
+              Each(Not(op::Add())));
+}
+
+TEST_F(WhileLoopInvariantCodeMotionTest, HoistInvariantOperationTree) {
+  auto scalar_s32 = ShapeUtil::MakeShape(S32, {});
+  Shape while_shape =
+      ShapeUtil::MakeTupleShape({scalar_s32, scalar_s32, scalar_s32});
+
+  HloComputation* while_body = [&]() {
+    HloComputation::Builder builder(TestName() + ".while_body");
+    HloInstruction* param = builder.AddInstruction(
+        HloInstruction::CreateParameter(0, while_shape, "param"));
+    HloInstruction* gte_0 = builder.AddInstruction(
+        HloInstruction::CreateGetTupleElement(scalar_s32, param, 0));
+    HloInstruction* gte_1 = builder.AddInstruction(
+        HloInstruction::CreateGetTupleElement(scalar_s32, param, 1));
+    HloInstruction* gte_2_loop_variant = builder.AddInstruction(
+        HloInstruction::CreateGetTupleElement(scalar_s32, param, 2));
+
+    HloInstruction* add_result =
+        builder.AddInstruction(HloInstruction::CreateBinary(
+            scalar_s32, HloOpcode::kAdd, gte_0, gte_1));
+    HloInstruction* mul_result =
+        builder.AddInstruction(HloInstruction::CreateBinary(
+            scalar_s32, HloOpcode::kMultiply, add_result, gte_1));
+    HloInstruction* negate_result =
+        builder.AddInstruction(HloInstruction::CreateUnary(
+            scalar_s32, HloOpcode::kNegate, mul_result));
+    HloInstruction* constant = builder.AddInstruction(
+        HloInstruction::CreateConstant(Literal::CreateR0<int32>(4)));
+    HloInstruction* sub_result =
+        builder.AddInstruction(HloInstruction::CreateBinary(
+            scalar_s32, HloOpcode::kSubtract, negate_result, constant));
+    HloInstruction* divide_result =
+        builder.AddInstruction(HloInstruction::CreateBinary(
+            scalar_s32, HloOpcode::kDivide, sub_result, gte_2_loop_variant));
+    builder.AddInstruction(
+        HloInstruction::CreateTuple({gte_0, gte_1, divide_result}));
+
+    return module().AddEmbeddedComputation(builder.Build());
+  }();
+
+  HloComputation::Builder builder(TestName());
+  auto* init_value = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, while_shape, "init_value"));
+  builder.AddInstruction(HloInstruction::CreateWhile(
+      while_shape, MakeAlwaysTrueComputation(while_shape, &module()),
+      while_body, init_value));
+  HloComputation* entry_computation =
+      module().AddEntryComputation(builder.Build());
+  TF_ASSERT_OK_AND_ASSIGN(bool simplified_loop,
+                          WhileLoopInvariantCodeMotion{}.Run(&module()));
+  EXPECT_TRUE(simplified_loop);
+
+  HloInstruction* transformed_while;
+  FindOnlyWhileInstruction(entry_computation, &transformed_while);
+
+  EXPECT_THAT(entry_computation->instructions(),
+              AllOf(Contains(op::Add()), Contains(op::Multiply()),
+                    Contains(op::Negate()), Contains(op::Subtract()),
+                    Contains(op::Constant()),
+
+                    // The division had a loop varying operand so that better
+                    // not be hoisted.
+                    Not(Contains(op::Divide()))));
+
+  EXPECT_THAT(transformed_while->while_body()->instructions(),
+              Each(Not(AnyOf(op::Add(), op::Multiply(), op::Negate(),
+                             op::Subtract(), op::Constant()))));
+
+  EXPECT_THAT(transformed_while->while_body()->instructions(),
+              Contains(op::Divide()));
+}
+
+TEST_F(WhileLoopInvariantCodeMotionTest,
+       DontHoistTriviallyLoopVaryingComputation) {
+  // Basic negative test: the add expression is not loop invariant.
+  auto scalar_s32 = ShapeUtil::MakeShape(S32, {});
+  Shape while_shape = ShapeUtil::MakeTupleShape({scalar_s32, scalar_s32});
+
+  HloComputation* while_body = [&]() {
+    HloComputation::Builder builder(TestName() + ".while_body");
+    HloInstruction* param = builder.AddInstruction(
+        HloInstruction::CreateParameter(0, while_shape, "param"));
+    HloInstruction* gte_0 = builder.AddInstruction(
+        HloInstruction::CreateGetTupleElement(scalar_s32, param, 0));
+    HloInstruction* gte_1 = builder.AddInstruction(
+        HloInstruction::CreateGetTupleElement(scalar_s32, param, 1));
+    HloInstruction* add_result =
+        builder.AddInstruction(HloInstruction::CreateBinary(
+            scalar_s32, HloOpcode::kAdd, gte_0, gte_1));
+    builder.AddInstruction(HloInstruction::CreateTuple({gte_0, add_result}));
+
+    return module().AddEmbeddedComputation(builder.Build());
+  }();
+
+  HloComputation::Builder builder(TestName());
+  auto* init_value = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, while_shape, "init_value"));
+  auto* while_inst = builder.AddInstruction(HloInstruction::CreateWhile(
+      while_shape, MakeAlwaysTrueComputation(while_shape, &module()),
+      while_body, init_value));
+
+  module().AddEntryComputation(builder.Build());
+
+  TF_ASSERT_OK_AND_ASSIGN(bool simplified_loop,
+                          WhileLoopInvariantCodeMotion{}.Run(&module()));
+  EXPECT_FALSE(simplified_loop);
+
+  EXPECT_THAT(while_inst->while_body()->instructions(), Contains(op::Add()));
+}
+
+TEST_F(WhileLoopInvariantCodeMotionTest,
+       DontHoistLoopVaryingComputationWithAlternatingTuples) {
+  auto scalar_s32 = ShapeUtil::MakeShape(S32, {});
+  Shape while_shape =
+      ShapeUtil::MakeTupleShape({scalar_s32, scalar_s32, scalar_s32});
+
+  HloComputation* while_body = [&]() {
+    HloComputation::Builder builder(TestName() + ".while_body");
+    HloInstruction* param = builder.AddInstruction(
+        HloInstruction::CreateParameter(0, while_shape, "param"));
+    HloInstruction* gte_0 = builder.AddInstruction(
+        HloInstruction::CreateGetTupleElement(scalar_s32, param, 0));
+    HloInstruction* gte_1 = builder.AddInstruction(
+        HloInstruction::CreateGetTupleElement(scalar_s32, param, 1));
+    HloInstruction* add_result =
+        builder.AddInstruction(HloInstruction::CreateBinary(
+            scalar_s32, HloOpcode::kAdd, gte_0, gte_1));
+    builder.AddInstruction(
+        HloInstruction::CreateTuple({gte_1, gte_0, add_result}));
+
+    return module().AddEmbeddedComputation(builder.Build());
+  }();
+
+  HloComputation::Builder builder(TestName());
+  auto* init_value = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, while_shape, "init_value"));
+  auto* while_inst = builder.AddInstruction(HloInstruction::CreateWhile(
+      while_shape, MakeAlwaysTrueComputation(while_shape, &module()),
+      while_body, init_value));
+
+  module().AddEntryComputation(builder.Build());
+  TF_ASSERT_OK_AND_ASSIGN(bool simplified_loop,
+                          WhileLoopInvariantCodeMotion{}.Run(&module()));
+  EXPECT_FALSE(simplified_loop);
+
+  EXPECT_THAT(while_inst->while_body()->instructions(), Contains(op::Add()));
+}
+
+TEST_F(WhileLoopInvariantCodeMotionTest, DontHoistInstructionWithSideEffects) {
+  auto scalar_s32 = ShapeUtil::MakeShape(S32, {});
+  Shape while_shape = ShapeUtil::MakeTupleShape({scalar_s32, scalar_s32});
+
+  HloComputation* while_body = [&]() {
+    HloComputation::Builder builder(TestName() + ".while_body");
+    HloInstruction* param = builder.AddInstruction(
+        HloInstruction::CreateParameter(0, while_shape, "param"));
+    HloInstruction* gte_0 = builder.AddInstruction(
+        HloInstruction::CreateGetTupleElement(scalar_s32, param, 0));
+    HloInstruction* gte_1 = builder.AddInstruction(
+        HloInstruction::CreateGetTupleElement(scalar_s32, param, 1));
+    builder.AddInstruction(
+        HloInstruction::CreateOutfeed(scalar_s32, gte_0, ""));
+    builder.AddInstruction(HloInstruction::CreateTuple({gte_0, gte_1}));
+
+    return module().AddEmbeddedComputation(builder.Build());
+  }();
+
+  HloComputation::Builder builder(TestName());
+  auto* init_value = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, while_shape, "init_value"));
+  auto* while_inst = builder.AddInstruction(HloInstruction::CreateWhile(
+      while_shape, MakeAlwaysTrueComputation(while_shape, &module()),
+      while_body, init_value));
+
+  module().AddEntryComputation(builder.Build());
+
+  TF_ASSERT_OK_AND_ASSIGN(bool simplified_loop,
+                          WhileLoopInvariantCodeMotion{}.Run(&module()));
+  EXPECT_FALSE(simplified_loop);
+
+  EXPECT_THAT(while_inst->while_body()->instructions(),
+              Contains(op::Outfeed()));
+}
+
+TEST_F(WhileLoopInvariantCodeMotionTest, DontHoistBitcastAlone) {
+  // The bitcast's user, an outfeed, can't be hoisted, so don't hoist the
+  // bitcast either.
+  auto scalar_s32 = ShapeUtil::MakeShape(S32, {});
+  auto scalar_f32 = ShapeUtil::MakeShape(F32, {});
+  Shape while_shape = ShapeUtil::MakeTupleShape({scalar_s32, scalar_s32});
+
+  HloComputation* while_body = [&]() {
+    HloComputation::Builder builder(TestName() + ".while_body");
+    HloInstruction* param = builder.AddInstruction(
+        HloInstruction::CreateParameter(0, while_shape, "param"));
+    HloInstruction* gte_0 = builder.AddInstruction(
+        HloInstruction::CreateGetTupleElement(scalar_s32, param, 0));
+    HloInstruction* gte_1 = builder.AddInstruction(
+        HloInstruction::CreateGetTupleElement(scalar_s32, param, 1));
+    HloInstruction* bitcast_inst = builder.AddInstruction(
+        HloInstruction::CreateUnary(scalar_f32, HloOpcode::kBitcast, gte_0));
+    builder.AddInstruction(
+        HloInstruction::CreateOutfeed(scalar_f32, bitcast_inst, ""));
+    builder.AddInstruction(HloInstruction::CreateTuple({gte_0, gte_1}));
+
+    return module().AddEmbeddedComputation(builder.Build());
+  }();
+
+  HloComputation::Builder builder(TestName());
+  auto* init_value = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, while_shape, "init_value"));
+  auto* while_inst = builder.AddInstruction(HloInstruction::CreateWhile(
+      while_shape, MakeAlwaysTrueComputation(while_shape, &module()),
+      while_body, init_value));
+
+  module().AddEntryComputation(builder.Build());
+
+  TF_ASSERT_OK_AND_ASSIGN(bool simplified_loop,
+                          WhileLoopInvariantCodeMotion{}.Run(&module()));
+  EXPECT_FALSE(simplified_loop);
+
+  EXPECT_THAT(while_inst->while_body()->instructions(),
+              Contains(op::Outfeed()));
+  EXPECT_THAT(while_inst->while_body()->instructions(),
+              Contains(op::Bitcast()));
+}
+
+TEST_F(WhileLoopInvariantCodeMotionTest, HoistBitcastIfNeeded) {
+  // The bitcast's user can be hoisted, so hoist the bitcast too.
+  auto scalar_s32 = ShapeUtil::MakeShape(S32, {});
+  auto scalar_f32 = ShapeUtil::MakeShape(F32, {});
+  Shape while_shape =
+      ShapeUtil::MakeTupleShape({scalar_s32, scalar_f32, scalar_f32});
+
+  HloComputation* while_body = [&]() {
+    HloComputation::Builder builder(TestName() + ".while_body");
+    HloInstruction* param = builder.AddInstruction(
+        HloInstruction::CreateParameter(0, while_shape, "param"));
+    HloInstruction* gte_0 = builder.AddInstruction(
+        HloInstruction::CreateGetTupleElement(scalar_s32, param, 0));
+    HloInstruction* gte_1 = builder.AddInstruction(
+        HloInstruction::CreateGetTupleElement(scalar_f32, param, 1));
+    HloInstruction* bitcast_inst = builder.AddInstruction(
+        HloInstruction::CreateUnary(scalar_f32, HloOpcode::kBitcast, gte_0));
+    HloInstruction* add_inst =
+        builder.AddInstruction(HloInstruction::CreateBinary(
+            scalar_f32, HloOpcode::kAdd, bitcast_inst, gte_1));
+    builder.AddInstruction(
+        HloInstruction::CreateTuple({gte_0, gte_1, add_inst}));
+
+    return module().AddEmbeddedComputation(builder.Build());
+  }();
+
+  HloComputation::Builder builder(TestName());
+  auto* init_value = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, while_shape, "init_value"));
+  builder.AddInstruction(HloInstruction::CreateWhile(
+      while_shape, MakeAlwaysTrueComputation(while_shape, &module()),
+      while_body, init_value));
+
+  HloComputation* entry_computation =
+      module().AddEntryComputation(builder.Build());
+
+  TF_ASSERT_OK_AND_ASSIGN(bool simplified_loop,
+                          WhileLoopInvariantCodeMotion{}.Run(&module()));
+  EXPECT_TRUE(simplified_loop);
+
+  HloInstruction* transformed_while;
+  FindOnlyWhileInstruction(entry_computation, &transformed_while);
+
+  EXPECT_THAT(transformed_while->while_body()->instructions(),
+              Each(Not(op::Add())));
+  EXPECT_THAT(transformed_while->while_body()->instructions(),
+              Each(Not(op::Bitcast())));
+  EXPECT_THAT(entry_computation->instructions(), Contains(op::Add()));
+  EXPECT_THAT(entry_computation->instructions(), Contains(op::Bitcast()));
+}
+
+TEST_F(WhileLoopInvariantCodeMotionTest, DontHoistControlDependencies) {
+  auto scalar_s32 = ShapeUtil::MakeShape(S32, {});
+  Shape while_shape =
+      ShapeUtil::MakeTupleShape({scalar_s32, scalar_s32, scalar_s32});
+
+  HloComputation* while_body;
+  {
+    HloComputation::Builder builder(TestName() + ".while_body");
+    HloInstruction* param = builder.AddInstruction(
+        HloInstruction::CreateParameter(0, while_shape, "param"));
+    HloInstruction* gte_0 = builder.AddInstruction(
+        HloInstruction::CreateGetTupleElement(scalar_s32, param, 0));
+    HloInstruction* gte_1 = builder.AddInstruction(
+        HloInstruction::CreateGetTupleElement(scalar_s32, param, 1));
+    HloInstruction* add_result =
+        builder.AddInstruction(HloInstruction::CreateBinary(
+            scalar_s32, HloOpcode::kAdd, gte_0, gte_1));
+    TF_ASSERT_OK(param->AddControlDependencyTo(add_result));
+    builder.AddInstruction(
+        HloInstruction::CreateTuple({gte_0, gte_1, add_result}));
+
+    while_body = module().AddEmbeddedComputation(builder.Build());
+  }
+
+  HloComputation::Builder builder(TestName());
+  auto* init_value = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, while_shape, "init_value"));
+  builder.AddInstruction(HloInstruction::CreateWhile(
+      while_shape, MakeAlwaysTrueComputation(while_shape, &module()),
+      while_body, init_value));
+  module().AddEntryComputation(builder.Build());
+  TF_ASSERT_OK_AND_ASSIGN(bool simplified_loop,
+                          WhileLoopInvariantCodeMotion{}.Run(&module()));
+  EXPECT_FALSE(simplified_loop);
+}
+
+TEST_F(WhileLoopInvariantCodeMotionTest, BodyHasNonTupleRoot) {
+  auto scalar_s32 = ShapeUtil::MakeShape(S32, {});
+  Shape while_shape = ShapeUtil::MakeTupleShape({scalar_s32, scalar_s32});
+
+  HloComputation* while_body = [&]() {
+    HloComputation::Builder builder(TestName() + ".passthrough");
+    HloInstruction* param = builder.AddInstruction(
+        HloInstruction::CreateParameter(0, while_shape, "param"));
+    HloComputation* result = module().AddEmbeddedComputation(builder.Build());
+
+    result->AddInstruction(
+        HloInstruction::CreateGetTupleElement(scalar_s32, param, 1));
+    return result;
+  }();
+
+  HloComputation::Builder builder(TestName());
+  auto* init_value = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, while_shape, "init_value"));
+  builder.AddInstruction(HloInstruction::CreateWhile(
+      while_shape, MakeAlwaysTrueComputation(while_shape, &module()),
+      while_body, init_value));
+  module().AddEntryComputation(builder.Build());
+  TF_ASSERT_OK_AND_ASSIGN(bool simplified_loop,
+                          WhileLoopInvariantCodeMotion{}.Run(&module()));
+  EXPECT_FALSE(simplified_loop);
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier.cc b/tensorflow/compiler/xla/service/while_loop_simplifier.cc
index b38ee907d70e29093c5cef718e1432663015728b..87a7f86f4ec9844de3e350d7774093dd6248dd83 100644
--- a/tensorflow/compiler/xla/service/while_loop_simplifier.cc
+++ b/tensorflow/compiler/xla/service/while_loop_simplifier.cc
@@ -236,7 +236,7 @@ static optional<int64> GetLoopTripCount(HloInstruction* while_op) {
       VLOG(2) << "Couldn't evaluate while cond: " << result.status();
       return nullopt;
     }
-    return result.ValueOrDie()->GetArraySlice<bool>() ==
+    return result.ValueOrDie()->data<bool>() ==
            tensorflow::gtl::ArraySlice<bool>{true};
   };
 
@@ -289,7 +289,7 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
   // Don't try this transformation if the while loop isn't removable, since if
   // it succeeds ultimately we're going to have to replace the old while loop
   // with a new one.
-  if (!while_op->parent()->IsRemovable(while_op)) {
+  if (!while_op->parent()->IsRemovable(while_op) || while_op->HasSideEffect()) {
     VLOG(2) << "Can't remove dead parameters from non-removable while op.";
     return false;
   }
@@ -306,6 +306,13 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
     return false;
   }
 
+  if (while_body_root->opcode() != HloOpcode::kTuple) {
+    VLOG(2) << "While body's root is not a tuple(...) instruction.";
+    return false;
+  }
+
+  auto print_no_metadata = HloPrintOptions().set_print_metadata(false);
+
   // Bail if param0 of while_cond or while_body has users which aren't of type
   // get-tuple-element.
   for (const HloInstruction* instr : {while_body->parameter_instruction(0),
@@ -313,9 +320,10 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
     for (const HloInstruction* user : instr->users()) {
       if (user->opcode() != HloOpcode::kGetTupleElement) {
         VLOG(2) << "Cowardly refusing to analyze while loop with "
-                << instr->ToStringNoMetadata()
-                << " used by non-GTE instruction " << user->ToStringNoMetadata()
-                << " in computation " << instr->parent()->name();
+                << instr->ToString(print_no_metadata)
+                << " used by non-GTE instruction "
+                << user->ToString(print_no_metadata) << " in computation "
+                << instr->parent()->name();
         return false;
       }
     }
@@ -351,7 +359,7 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
 
       used_tuple_indices.insert(user->tuple_index());
       if (used_tuple_indices.size() == tuple_size) {
-        VLOG(2) << "Loop " << while_op->ToStringNoMetadata()
+        VLOG(2) << "Loop " << while_op->ToString(print_no_metadata)
                 << " uses all of its inputs; no simplification possible.";
         return false;
       }
@@ -375,7 +383,7 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
       used_tuple_indices.insert(i);
 
       if (used_tuple_indices.size() == tuple_size) {
-        VLOG(2) << "Loop " << while_op->ToStringNoMetadata()
+        VLOG(2) << "Loop " << while_op->ToString(print_no_metadata)
                 << " uses all of its inputs; no simplification possible.";
         return false;
       }
@@ -387,7 +395,8 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
   CHECK_LT(used_tuple_indices.size(), tuple_size);
 
   VLOG(1) << "Eliminating " << tuple_size - used_tuple_indices.size()
-          << " elements from tuple of " << while_op->ToStringNoMetadata();
+          << " elements from tuple of "
+          << while_op->ToString(print_no_metadata);
 
   // Build up maps from the old/new to the new/old tuple indices.
   std::vector<int64> new_to_old_tuple_idx(used_tuple_indices.begin(),
@@ -431,7 +440,7 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
         continue;
       }
       CHECK_EQ(user->opcode(), HloOpcode::kGetTupleElement)
-          << user->ToStringNoMetadata();
+          << user->ToString(print_no_metadata);
 
       int64 old_idx = user->tuple_index();
       auto new_idx_iter = old_to_new_tuple_idx.find(old_idx);
@@ -446,14 +455,14 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
         CHECK(user->user_count() == 0 ||
               user->user_count() == 1 &&
                   user->users().front() == while_body_root)
-            << "Instruction " << user->ToStringNoMetadata()
+            << "Instruction " << user->ToString(print_no_metadata)
             << " should be unused (except by root of while body), but has "
                "users: {"
             << tensorflow::str_util::Join(
                    user->users(), ", ",
-                   [](string* out, const HloInstruction* instr) {
+                   [&](string* out, const HloInstruction* instr) {
                      tensorflow::strings::StrAppend(
-                         out, instr->ToStringNoMetadata());
+                         out, instr->ToString(print_no_metadata));
                    })
             << "}";
 
@@ -558,7 +567,7 @@ static StatusOr<bool> TryRemoveWhileLoop(HloInstruction* while_op) {
   // the loop aren't removed, just cloned and added back to the loop.
   // Nevertheless our infrastructure sees loop simplification as removal of
   // these nodes and currently doesn't allow it.
-  if (!while_op->parent()->IsRemovable(while_op)) {
+  if (!while_op->parent()->IsRemovable(while_op) || while_op->HasSideEffect()) {
     VLOG(2) << "Not attempting to remove while loop it is not removable: "
             << while_op->ToShortString();
     return false;
@@ -586,7 +595,9 @@ static StatusOr<bool> TryRemoveWhileLoop(HloInstruction* while_op) {
     auto call_op = computation->AddInstruction(HloInstruction::CreateCall(
         while_op->shape(), while_op->operands(), while_op->while_body()));
     TF_RETURN_IF_ERROR(computation->ReplaceInstruction(while_op, call_op));
-    TF_RETURN_IF_ERROR(CallInliner::Inline(call_op));
+    TF_ASSIGN_OR_RETURN(auto inlined_instructions_map,
+                        CallInliner::Inline(call_op));
+    (void)inlined_instructions_map;
     return true;
   }
   return false;
diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc b/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc
index d99b31dc0037968bc88d5f22d53309a6a4546963..c5183f8d3aee99696ed4114c3f7e451888222137 100644
--- a/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc
+++ b/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc
@@ -418,5 +418,32 @@ TEST_F(WhileLoopSimplifierTest, RemoveUnusedOperand) {
                      op::GetTupleElement(op::Parameter(0), /*tuple_index=*/1)));
 }
 
+TEST_F(WhileLoopSimplifierTest, BodyHasNonTupleRoot) {
+  auto scalar_s32 = ShapeUtil::MakeShape(S32, {});
+  Shape while_shape = ShapeUtil::MakeTupleShape({scalar_s32, scalar_s32});
+
+  HloComputation* while_body = [&]() {
+    HloComputation::Builder builder(TestName() + ".passthrough");
+    HloInstruction* param = builder.AddInstruction(
+        HloInstruction::CreateParameter(0, while_shape, "param"));
+    HloComputation* result = module().AddEmbeddedComputation(builder.Build());
+
+    result->AddInstruction(
+        HloInstruction::CreateGetTupleElement(scalar_s32, param, 1));
+    return result;
+  }();
+
+  HloComputation::Builder builder(TestName());
+  auto* init_value = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, while_shape, "init_value"));
+  builder.AddInstruction(HloInstruction::CreateWhile(
+      while_shape, MakeAlwaysTrueComputation(while_shape, &module()),
+      while_body, init_value));
+  module().AddEntryComputation(builder.Build());
+  TF_ASSERT_OK_AND_ASSIGN(bool simplified_loop,
+                          WhileLoopSimplifier{}.Run(&module()));
+  EXPECT_FALSE(simplified_loop);
+}
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/while_util.cc b/tensorflow/compiler/xla/service/while_util.cc
new file mode 100644
index 0000000000000000000000000000000000000000..e20b25e4a08a946f6b58575a4d4e557744f8035c
--- /dev/null
+++ b/tensorflow/compiler/xla/service/while_util.cc
@@ -0,0 +1,140 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/while_util.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/tuple_util.h"
+
+namespace xla {
+
+static StatusOr<HloComputation*> WidenWhileCondition(
+    HloComputation* narrow_condition, const Shape& wide_shape) {
+  const Shape& narrow_shape =
+      narrow_condition->parameter_instruction(0)->shape();
+
+  HloComputation* wide_while_cond = [&]() {
+    HloComputation::Builder builder(
+        tensorflow::strings::StrCat("wide.", narrow_condition->name()));
+    builder.AddInstruction(
+        HloInstruction::CreateParameter(0, wide_shape, "wide_param"));
+
+    // This is needed so that the root instruction is shaped as a PRED[] -- we
+    // need to get this right to begin with since we can't mutate the type of
+    // the root instruction later.  We later change the root instruction to
+    // something more appropriate.
+    builder.AddInstruction(
+        HloInstruction::CreateConstant(Literal::CreateR0<bool>(false)));
+    return narrow_condition->parent()->AddEmbeddedComputation(builder.Build());
+  }();
+
+  HloInstruction* truncated_parameter =
+      TupleUtil::ExtractPrefix(wide_while_cond->parameter_instruction(0),
+                               narrow_shape.tuple_shapes_size());
+  HloInstruction* call_narrow_cond = wide_while_cond->AddInstruction(
+      HloInstruction::CreateCall(ShapeUtil::MakeShape(PRED, {}),
+                                 {truncated_parameter}, narrow_condition));
+
+  wide_while_cond->set_root_instruction(call_narrow_cond);
+
+  TF_RETURN_IF_ERROR(CallInliner::Inline(call_narrow_cond).status());
+  return wide_while_cond;
+}
+
+static StatusOr<std::pair<HloComputation*, CallInliner::InlinedInstructionMap>>
+WidenWhileBody(HloComputation* narrow_body, const Shape& wide_shape) {
+  const Shape& narrow_shape = narrow_body->parameter_instruction(0)->shape();
+
+  HloComputation* wide_while_body = [&]() {
+    HloComputation::Builder builder(
+        tensorflow::strings::StrCat("wide.", narrow_body->name()));
+    builder.AddInstruction(
+        HloInstruction::CreateParameter(0, wide_shape, "wide_param"));
+    return narrow_body->parent()->AddEmbeddedComputation(builder.Build());
+  }();
+
+  HloInstruction* wide_parameter = wide_while_body->parameter_instruction(0);
+  HloInstruction* truncated_parameter = TupleUtil::ExtractPrefix(
+      wide_parameter, narrow_shape.tuple_shapes_size());
+  HloInstruction* call_narrow_body =
+      wide_while_body->AddInstruction(HloInstruction::CreateCall(
+          narrow_shape, {truncated_parameter}, narrow_body));
+
+  std::vector<HloInstruction*> live_through_values;
+  for (int i = narrow_shape.tuple_shapes_size();
+       i < wide_shape.tuple_shapes_size(); i++) {
+    live_through_values.push_back(
+        wide_while_body->AddInstruction(HloInstruction::CreateGetTupleElement(
+            wide_shape.tuple_shapes(i), wide_parameter, i)));
+  }
+
+  wide_while_body->set_root_instruction(
+      TupleUtil::AppendSuffix(call_narrow_body, live_through_values));
+
+  TF_ASSIGN_OR_RETURN(auto inlined_instructions_map,
+                      CallInliner::Inline(call_narrow_body));
+  return {{wide_while_body, std::move(inlined_instructions_map)}};
+}
+
+/*static*/ StatusOr<WhileUtil::MakeInstructionsLiveInResult>
+WhileUtil::MakeInstructionsLiveIn(
+    HloInstruction* while_instr,
+    tensorflow::gtl::ArraySlice<HloInstruction*> instructions) {
+  CHECK(ShapeUtil::IsTuple(while_instr->shape()));
+
+  int64 elements_in_old_while_shape = while_instr->shape().tuple_shapes_size();
+  Shape new_while_shape = while_instr->shape();
+  for (auto* instruction : instructions) {
+    *new_while_shape.add_tuple_shapes() = instruction->shape();
+  }
+
+  TF_ASSIGN_OR_RETURN(
+      HloComputation * new_while_condition,
+      WidenWhileCondition(while_instr->while_condition(), new_while_shape));
+
+  HloComputation* new_while_body;
+  CallInliner::InlinedInstructionMap inlined_instructions_map;
+  TF_ASSIGN_OR_RETURN(
+      std::tie(new_while_body, inlined_instructions_map),
+      WidenWhileBody(while_instr->while_body(), new_while_shape));
+
+  HloInstruction* new_while_init =
+      TupleUtil::AppendSuffix(while_instr->mutable_operand(0), instructions);
+  HloComputation* containing_computation = while_instr->parent();
+  HloInstruction* new_while = containing_computation->AddInstruction(
+      HloInstruction::CreateWhile(new_while_shape, new_while_condition,
+                                  new_while_body, new_while_init));
+  TF_RETURN_IF_ERROR(containing_computation->ReplaceInstruction(
+      while_instr, TupleUtil::ExtractPrefix(
+                       new_while, while_instr->shape().tuple_shapes_size())));
+
+  HloInstruction* while_body_param = new_while_body->parameter_instruction(0);
+  std::vector<HloInstruction*> live_in_instructions;
+  for (int64 i = elements_in_old_while_shape;
+       i < new_while_shape.tuple_shapes_size(); i++) {
+    live_in_instructions.push_back(
+        new_while_body->AddInstruction(HloInstruction::CreateGetTupleElement(
+            instructions[i - elements_in_old_while_shape]->shape(),
+            while_body_param, i)));
+  }
+
+  WhileUtil::MakeInstructionsLiveInResult result;
+
+  result.new_while_instr = new_while;
+  result.while_body_live_in_values = std::move(live_in_instructions);
+  result.while_body_instruction_map = std::move(inlined_instructions_map);
+
+  return std::move(result);
+}
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/while_util.h b/tensorflow/compiler/xla/service/while_util.h
new file mode 100644
index 0000000000000000000000000000000000000000..3600b5a80d26e37fdb7d5173c3b8743734306390
--- /dev/null
+++ b/tensorflow/compiler/xla/service/while_util.h
@@ -0,0 +1,58 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_WHILE_UTIL_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_WHILE_UTIL_H_
+
+#include "tensorflow/compiler/xla/service/call_inliner.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+
+namespace xla {
+class WhileUtil {
+ public:
+  // Holds a return value from MakeInstructionsLiveIn.
+  struct MakeInstructionsLiveInResult {
+    // The new while operation that has the requested values live in.
+    HloInstruction* new_while_instr;
+
+    // The i'th element of `while_body_live_in_values` is an instruction in the
+    // while body that holds the i'th *newly added* live in value at runtime.
+    std::vector<HloInstruction*> while_body_live_in_values;
+
+    // `while_body_instruction_map` maps instructions in the original while body
+    // to the corresponding instructions in the body for the newly created while
+    // operation.
+    CallInliner::InlinedInstructionMap while_body_instruction_map;
+  };
+
+  // Replaces `while_instr` with a new while instruction that is equivalent to
+  // `while_instr`, except that it has all of the HLO instructions in
+  // `instructions` as live-in, loop invariant values.  These new live in values
+  // are represented as new elements appended to the parameter of the while
+  // loop, which must be of tuple shape.  GetTupleElement instructions computing
+  // each new live in value is returned in the `while_body_live_in_values`
+  // vector.
+  //
+  // Precondition: `while_instr` must have a tuple shaped state.
+  //
+  // Every instruction in `instructions` must be contained in the computation
+  // that contains `while_instr`.
+  static StatusOr<MakeInstructionsLiveInResult> MakeInstructionsLiveIn(
+      HloInstruction* while_instr,
+      tensorflow::gtl::ArraySlice<HloInstruction*> instructions);
+};
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_WHILE_UTIL_H_
diff --git a/tensorflow/compiler/xla/service/while_util_test.cc b/tensorflow/compiler/xla/service/while_util_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..cf0d0db99bd92b6b364b4e28e56a0902d4065963
--- /dev/null
+++ b/tensorflow/compiler/xla/service/while_util_test.cc
@@ -0,0 +1,130 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/while_util.h"
+
+#include "tensorflow/compiler/xla/service/hlo_matchers.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h"
+
+namespace xla {
+namespace {
+
+namespace op = ::xla::testing::opcode_matchers;
+
+StatusOr<std::unique_ptr<HloModule>> GetParsedModule(
+    HloComputation** entry_computation, HloInstruction** param0,
+    HloInstruction** param1, HloInstruction** param2) {
+  const char* const hlo_string = R"(
+HloModule ModuleWithWhile
+
+while_body {
+  ROOT p_body = (f32[32,32]{1,0}, f32[32,32]{1,0}) parameter(0)
+}
+
+while_condition {
+  p_cond = f32[32,32]{1,0} parameter(0)
+  ROOT result = pred[] constant(true)
+}
+
+ENTRY entry {
+  p_entry_0 = f32[32,32]{1,0} parameter(0)
+  p_entry_1 = s32[32,32]{1,0} parameter(1)
+  p_entry_2 = s64[32,32]{1,0} parameter(2)
+  while_init = (f32[32,32]{1,0}, f32[32,32]{1,0}) tuple(p_entry_0, p_entry_0)
+  ROOT while = (f32[32,32]{1,0}, f32[32,32]{1,0}) while(while_init), condition=while_condition, body=while_body
+}
+)";
+
+  TF_ASSIGN_OR_RETURN(std::unique_ptr<HloModule> module,
+                      tools::Parse(hlo_string));
+
+  *entry_computation = module->entry_computation();
+  *param0 = (*entry_computation)->parameter_instruction(0);
+  *param1 = (*entry_computation)->parameter_instruction(1);
+  *param2 = (*entry_computation)->parameter_instruction(2);
+
+  return std::move(module);
+}
+
+TEST(WhileUtil, MakeZeroInstructionsLiveOp) {
+  HloInstruction *param0, *param1, *param2;
+  HloComputation* entry_computation;
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<HloModule> module,
+      GetParsedModule(&entry_computation, &param0, &param1, &param2));
+
+  HloInstruction* while_instr = entry_computation->root_instruction();
+  ASSERT_EQ(while_instr->opcode(), HloOpcode::kWhile);
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      WhileUtil::MakeInstructionsLiveInResult make_live_in_result,
+      WhileUtil::MakeInstructionsLiveIn(while_instr, /*instructions=*/{}));
+
+  HloInstruction* new_while_instr = make_live_in_result.new_while_instr;
+
+  EXPECT_THAT(
+      entry_computation->root_instruction(),
+      op::Tuple(op::GetTupleElement(::testing::Eq(new_while_instr), 0),
+                op::GetTupleElement(::testing::Eq(new_while_instr), 1)));
+
+  auto param_reconstructed =
+      op::Tuple(op::GetTupleElement(op::Parameter(0), 0),
+                op::GetTupleElement(op::Parameter(0), 1));
+
+  EXPECT_THAT(new_while_instr->while_body()->root_instruction(),
+              op::Tuple(op::GetTupleElement(param_reconstructed, 0),
+                        op::GetTupleElement(param_reconstructed, 1)));
+}
+
+TEST(WhileUtilTest, MakeTwoInstructionsLive) {
+  HloInstruction *param0, *param1, *param2;
+  HloComputation* entry_computation;
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<HloModule> module,
+      GetParsedModule(&entry_computation, &param0, &param1, &param2));
+
+  HloInstruction* while_instr = entry_computation->root_instruction();
+  ASSERT_EQ(while_instr->opcode(), HloOpcode::kWhile);
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      WhileUtil::MakeInstructionsLiveInResult make_live_in_result,
+      WhileUtil::MakeInstructionsLiveIn(while_instr,
+                                        /*instructions=*/{param0, param1}));
+
+  HloInstruction* new_while_instr = make_live_in_result.new_while_instr;
+
+  XLA_VLOG_LINES(3, module->ToString());
+
+  EXPECT_THAT(
+      entry_computation->root_instruction(),
+      op::Tuple(op::GetTupleElement(::testing::Eq(new_while_instr), 0),
+                op::GetTupleElement(::testing::Eq(new_while_instr), 1)));
+
+  auto first_half_param_reconstructed =
+      op::Tuple(op::GetTupleElement(op::Parameter(0), 0),
+                op::GetTupleElement(op::Parameter(0), 1));
+
+  EXPECT_THAT(new_while_instr->while_body()->root_instruction(),
+              op::Tuple(op::GetTupleElement(first_half_param_reconstructed, 0),
+                        op::GetTupleElement(first_half_param_reconstructed, 1),
+                        op::GetTupleElement(op::Parameter(0), 2),
+                        op::GetTupleElement(op::Parameter(0), 3)));
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/zero_sized_hlo_elimination.cc b/tensorflow/compiler/xla/service/zero_sized_hlo_elimination.cc
new file mode 100644
index 0000000000000000000000000000000000000000..aa40b5cb264803097f52966d6f61f1f41b6b3017
--- /dev/null
+++ b/tensorflow/compiler/xla/service/zero_sized_hlo_elimination.cc
@@ -0,0 +1,50 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/zero_sized_hlo_elimination.h"
+
+#include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/status_macros.h"
+#include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace xla {
+
+StatusOr<bool> ZeroSizedHloElimination::Run(HloModule* module) {
+  bool changed = false;
+  for (HloComputation* comp : module->MakeNonfusionComputations()) {
+    for (HloInstruction* instruction : comp->MakeInstructionPostOrder()) {
+      if (instruction->HasSideEffect() ||
+          ShapeUtil::IsTuple(instruction->shape())) {
+        continue;
+      }
+      if (comp->IsRemovable(instruction) &&
+          ShapeUtil::HasZeroElements(instruction->shape())) {
+        TF_RETURN_IF_ERROR(comp->ReplaceWithNewInstruction(
+            instruction, HloInstruction::CreateConstant(
+                             Literal::CreateFromShape(instruction->shape()))));
+        changed = true;
+      }
+    }
+  }
+  return changed;
+}
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/zero_sized_hlo_elimination.h b/tensorflow/compiler/xla/service/zero_sized_hlo_elimination.h
new file mode 100644
index 0000000000000000000000000000000000000000..63afab4206eb072e84745ced3307295c0516da7b
--- /dev/null
+++ b/tensorflow/compiler/xla/service/zero_sized_hlo_elimination.h
@@ -0,0 +1,32 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_ZERO_SIZED_HLO_ELIMINATION_H_
+#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_ZERO_SIZED_HLO_ELIMINATION_H_
+
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
+
+// HLO pass that replaces zero sized Hlos with an zero sized constant literal.
+namespace xla {
+class ZeroSizedHloElimination : public HloPassInterface {
+ public:
+  StatusOr<bool> Run(HloModule* module) override;
+  tensorflow::StringPiece name() const override {
+    return "zero_sized_hlo_elimination";
+  }
+};
+}  // namespace xla
+#endif  // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_ZERO_SIZED_HLO_ELIMINATION_H_
diff --git a/tensorflow/compiler/xla/service/zero_sized_hlo_elimination_test.cc b/tensorflow/compiler/xla/service/zero_sized_hlo_elimination_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..4f8cdc1e0e73cdaa8675fc945ba3dbe19ce3da7d
--- /dev/null
+++ b/tensorflow/compiler/xla/service/zero_sized_hlo_elimination_test.cc
@@ -0,0 +1,77 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/zero_sized_hlo_elimination.h"
+
+#include <memory>
+#include <unordered_set>
+#include <vector>
+
+#include "tensorflow/compiler/xla/client/computation_builder.h"
+#include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/service/shape_inference.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/status_macros.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/test_helpers.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace xla {
+namespace {
+class ZeroSizedHloEliminationTest : public HloTestBase {
+ protected:
+  ZeroSizedHloEliminationTest()
+      : HloTestBase(),
+        builder_("zero_sized_computation"),
+        zero_sized_param_(
+            builder_.AddInstruction(HloInstruction::CreateParameter(
+                0, ShapeUtil::MakeShape(F32, {3, 0}), "zero sized param"))) {}
+
+  StatusOr<bool> RunZeroSizedElimination() {
+    HloModule module("zero_sized_elimination_test_module");
+    module.AddEntryComputation(builder_.Build());
+    return ZeroSizedHloElimination{}.Run(&module);
+  }
+
+  HloComputation::Builder builder_;
+  HloInstruction* zero_sized_param_;
+};
+
+TEST_F(ZeroSizedHloEliminationTest, EliminatedZeroSizedOp) {
+  builder_.AddInstruction(HloInstruction::CreateUnary(
+      zero_sized_param_->shape(), HloOpcode::kTanh, zero_sized_param_));
+  TF_ASSERT_OK_AND_ASSIGN(bool changed, RunZeroSizedElimination());
+  EXPECT_TRUE(changed);
+}
+
+TEST_F(ZeroSizedHloEliminationTest, DoesNotEliminateParameter) {
+  TF_ASSERT_OK_AND_ASSIGN(bool changed, RunZeroSizedElimination());
+  EXPECT_FALSE(changed);
+}
+
+TEST_F(ZeroSizedHloEliminationTest, DoesNotEliminateSideEffects) {
+  builder_.AddInstruction(HloInstruction::CreateSend(zero_sized_param_, 0));
+  TF_ASSERT_OK_AND_ASSIGN(bool changed, RunZeroSizedElimination());
+  EXPECT_FALSE(changed);
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/shape_layout.cc b/tensorflow/compiler/xla/shape_layout.cc
index 5bf9842a6ce7be747f58c10f302f85c6f82ac6f9..789eba5780d37e1fd4d80ec881855951c8bba0eb 100644
--- a/tensorflow/compiler/xla/shape_layout.cc
+++ b/tensorflow/compiler/xla/shape_layout.cc
@@ -32,13 +32,13 @@ tensorflow::Status ShapeLayout::CopyLayoutFromShape(const Shape& other_shape) {
   return tensorflow::Status::OK();
 }
 
-tensorflow::Status ShapeLayout::AssignLayoutToShape(Shape* other_shape) const {
-  if (!ShapeUtil::Compatible(*other_shape, shape_)) {
+tensorflow::Status ShapeLayout::AssignLayoutToShape(Shape* to_shape) const {
+  if (!ShapeUtil::Compatible(*to_shape, shape_)) {
     return InvalidArgument("Shape %s is not compatible with shape %s",
-                           ShapeUtil::HumanString(*other_shape).c_str(),
+                           ShapeUtil::HumanString(*to_shape).c_str(),
                            ShapeUtil::HumanString(shape()).c_str());
   }
-  *other_shape = shape_;
+  *to_shape = shape_;
   return tensorflow::Status::OK();
 }
 
diff --git a/tensorflow/compiler/xla/shape_layout.h b/tensorflow/compiler/xla/shape_layout.h
index 92564660f21bf1b596c4b9ca04c07eaca27ed192..4c83750f3e6f3c735db66d8e0b86ae3f43e5ca11 100644
--- a/tensorflow/compiler/xla/shape_layout.h
+++ b/tensorflow/compiler/xla/shape_layout.h
@@ -38,18 +38,19 @@ class ShapeLayout {
   explicit ShapeLayout(const Shape& shape) : shape_(shape) {}
 
   // Assigns the layouts in this ShapeLayout to the Layout fields of the given
-  // shape. 'shape' and the shape of the ShapeLayout object must be compatible.
-  tensorflow::Status AssignLayoutToShape(Shape* shape) const;
+  // shape. 'to_shape' and the shape of the ShapeLayout object must be
+  // compatible.
+  tensorflow::Status AssignLayoutToShape(Shape* to_shape) const;
 
   // Returns true if the Layouts in this ShapeLayout match the layouts in the
   // given shape. Returns false otherwise. If the given shape is not compatible
   // with the ShapeLayout's shape, then false is returned.
   bool MatchesLayoutInShape(const Shape& shape) const;
 
-  // Copies the layout from the given shape into this ShapeLayout. 'shape' must
-  // be compatible with the ShapeLayout's shape, and 'shape' must have a layout
-  // (LayoutUtil::HasLayout).
-  tensorflow::Status CopyLayoutFromShape(const Shape& shape);
+  // Copies the layout from the given shape into this ShapeLayout. 'other_shape'
+  // must be compatible with the ShapeLayout's shape, and 'other_shape' must
+  // have a layout (LayoutUtil::HasLayout).
+  tensorflow::Status CopyLayoutFromShape(const Shape& other_shape);
 
   // Clears (Layout::Clear) all the Layouts stored in this object.
   void Clear();
diff --git a/tensorflow/compiler/xla/shape_tree.h b/tensorflow/compiler/xla/shape_tree.h
index bf8d19015079f2ce0bd450594040ed818f94b66b..d752619bd65751779c24f061e44e206d66b01465 100644
--- a/tensorflow/compiler/xla/shape_tree.h
+++ b/tensorflow/compiler/xla/shape_tree.h
@@ -238,7 +238,7 @@ class ShapeTree {
   //           (or compatible).
   //   index : the index of the element in the shape. See ShapeUtil::GetSubshape
   //           for definition of index.
-  //   data : The data value at this elemnt.
+  //   data : The data value at this element.
   template <typename Fn>
   void ForEachElement(const Fn& func) const;
 
diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index 74fa0b2f2e740310be23661caef3f19e24e4087b..cba73322fa924785fbc73a4e931b5f27227d89b9 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <algorithm>
 #include <functional>
 #include <numeric>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 
@@ -58,36 +59,47 @@ std::ostream& operator<<(std::ostream& out, const ShapeIndex& shape_index) {
   return out;
 }
 
+std::ostream& operator<<(std::ostream& out, const ShapeIndexView& shape_index) {
+  out << shape_index.ToString();
+  return out;
+}
+
 namespace {
 
 // Recursive helper for comparing the equality of two shapes. Returns true if
 // the shapes are the same. If compare_layouts is true, then layouts must also
 // match.
 bool CompareShapes(const Shape& lhs, const Shape& rhs, bool compare_layouts) {
-  if (ShapeUtil::IsTuple(lhs)) {
-    return ShapeUtil::IsTuple(rhs) &&
+  if (ShapeUtil::IsTuple(lhs) || ShapeUtil::IsTuple(rhs)) {
+    return ShapeUtil::IsTuple(lhs) && ShapeUtil::IsTuple(rhs) &&
            ContainersEqual(lhs.tuple_shapes(), rhs.tuple_shapes(),
                            [=](const Shape& l, const Shape& r) {
                              return CompareShapes(l, r, compare_layouts);
                            });
+  } else if (ShapeUtil::IsOpaque(lhs) || ShapeUtil::IsOpaque(rhs)) {
+    return ShapeUtil::IsOpaque(lhs) && ShapeUtil::IsOpaque(rhs);
   }
-  // Explicitly compare the fields rather than using MessageDifferencer because
-  // we want empty layouts to be treated identically to missing layouts.
+
   if (compare_layouts) {
-    if (!ContainersEqual(lhs.layout().minor_to_major(),
-                         rhs.layout().minor_to_major())) {
-      VLOG(3) << "CompareShapes: lhs layout != rhs layout";
-      return false;
-    }
-    if (!ContainersEqual(lhs.layout().padded_dimensions(),
-                         rhs.layout().padded_dimensions())) {
-      VLOG(3)
-          << "CompareShapes: lhs padded_dimensions != rhs padded_dimensions";
+    if (lhs.layout().format() != rhs.layout().format()) {
       return false;
     }
-    if (lhs.layout().padding_value() != rhs.layout().padding_value()) {
-      VLOG(3) << "CompareShapes: lhs padding value != rhs padding_value";
-      return false;
+    if (LayoutUtil::IsDenseArray(lhs)) {
+      if (!ContainersEqual(LayoutUtil::MinorToMajor(lhs),
+                           LayoutUtil::MinorToMajor(rhs))) {
+        VLOG(3) << "CompareShapes: lhs layout != rhs layout";
+        return false;
+      }
+      if (!ContainersEqual(lhs.layout().padded_dimensions(),
+                           rhs.layout().padded_dimensions())) {
+        VLOG(3)
+            << "CompareShapes: lhs padded_dimensions != rhs padded_dimensions";
+        return false;
+      }
+      if (lhs.layout().padding_value() != rhs.layout().padding_value()) {
+        VLOG(3) << "CompareShapes: lhs padding value != rhs padding_value";
+        return false;
+      }
     }
   }
 
@@ -141,7 +153,8 @@ StatusOr<Shape> MakeShapeWithLayoutInternal(
 }
 
 /* static */ int64 ShapeUtil::Rank(const Shape& shape) {
-  CHECK(!ShapeUtil::IsTuple(shape)) << "Tuples do not have a rank";
+  CHECK(!ShapeUtil::IsTuple(shape))
+      << "Tuples do not have a rank, shape: " << shape;
   return shape.dimensions_size();
 }
 
@@ -182,20 +195,32 @@ StatusOr<Shape> MakeShapeWithLayoutInternal(
       .ValueOrDie();
 }
 
-/* static */ Shape ShapeUtil::MakeShapeWithMonotonicDim0MajorLayout(
+/* static */ Shape ShapeUtil::MakeShapeWithDescendingLayout(
     PrimitiveType element_type, tensorflow::gtl::ArraySlice<int64> dimensions) {
   std::vector<int64> layout(dimensions.size());
   std::iota(layout.rbegin(), layout.rend(), static_cast<int64>(0));
   return MakeShapeWithLayout(element_type, dimensions, layout);
 }
 
-/* static */ Shape ShapeUtil::NormalizeShapeToMonotonicDim0MajorLayout(
+/* static */ Shape ShapeUtil::MakeShapeWithSparseLayout(
+    PrimitiveType element_type, tensorflow::gtl::ArraySlice<int64> dimensions,
+    int64 max_sparse_elements) {
+  DCHECK_NE(TUPLE, element_type);
+  DCHECK_NE(OPAQUE, element_type);
+  Shape shape = ShapeUtil::MakeShape(element_type, dimensions);
+  *shape.mutable_layout() = LayoutUtil::MakeSparseLayout(max_sparse_elements);
+  TF_DCHECK_OK(ShapeUtil::ValidateShape(shape));
+  return shape;
+}
+
+/* static */ Shape
+ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
     const Shape& shape) {
   std::vector<int64> dims(shape.dimensions_size());
   for (int i = 0; i < shape.dimensions_size(); ++i) {
     dims[i] = shape.dimensions(LayoutUtil::Major(shape.layout(), i));
   }
-  return MakeShapeWithMonotonicDim0MajorLayout(shape.element_type(), dims);
+  return MakeShapeWithDescendingLayout(shape.element_type(), dims);
 }
 
 /* static */ void ShapeUtil::PopulateShape(
@@ -235,6 +260,7 @@ StatusOr<Shape> MakeShapeWithLayoutInternal(
 }
 
 /* static */ void ShapeUtil::AppendMajorDimension(int bound, Shape* shape) {
+  CHECK(LayoutUtil::IsDenseArray(*shape));
   shape->mutable_layout()->add_minor_to_major(Rank(*shape));
   shape->add_dimensions(bound);
   TF_DCHECK_OK(ValidateShape(*shape));
@@ -329,6 +355,14 @@ StatusOr<Shape> MakeShapeWithLayoutInternal(
   return MakeTupleShape(new_elements);
 }
 
+// Returns the shape of a real or imaginary component.
+/* static */ Shape ShapeUtil::ComplexComponentShape(
+    const Shape& complex_shape) {
+  CHECK(ElementIsComplex(complex_shape)) << HumanString(complex_shape);
+  return ChangeElementType(complex_shape, primitive_util::ComplexComponentType(
+                                              complex_shape.element_type()));
+}
+
 /* static */ bool ShapeUtil::ShapeIs(const Shape& shape,
                                      PrimitiveType element_type,
                                      std::initializer_list<int64> dimensions) {
@@ -336,7 +370,7 @@ StatusOr<Shape> MakeShapeWithLayoutInternal(
 }
 
 /* static */ int64 ShapeUtil::ElementsIn(const Shape& shape) {
-  CHECK(!IsTuple(shape));
+  CHECK(!IsTuple(shape)) << ShapeUtil::HumanString(shape);
   CHECK_EQ(shape.dimensions_size(), Rank(shape));
   return std::accumulate<decltype(shape.dimensions().begin()), int64>(
       shape.dimensions().begin(), shape.dimensions().end(), 1LL,
@@ -352,7 +386,7 @@ StatusOr<Shape> MakeShapeWithLayoutInternal(
 }
 
 /* static */ string ShapeUtil::HumanString(const Shape& shape) {
-  if (shape.element_type() == TUPLE) {
+  if (IsTuple(shape)) {
     string text = "(";
     const char* prefix = "";
     for (const Shape& elem_shape : shape.tuple_shapes()) {
@@ -396,10 +430,30 @@ const string& LowercasePrimitiveTypeName(PrimitiveType s) {
   static PrimitiveTypeNameGenerator* gen = new PrimitiveTypeNameGenerator();
   return gen->LowercaseName(s);
 }
+
+StatusOr<PrimitiveType> StringToPrimitiveType(const string& name) {
+  static std::unordered_map<string, PrimitiveType>* name_to_type = [] {
+    static auto* map = new std::unordered_map<string, PrimitiveType>;
+    for (int i = 0; i < PrimitiveType_ARRAYSIZE; i++) {
+      if (PrimitiveType_IsValid(i)) {
+        auto value = static_cast<PrimitiveType>(i);
+        (*map)[LowercasePrimitiveTypeName(value)] = value;
+      }
+    }
+    return map;
+  }();
+  auto found = name_to_type->find(name);
+  if (found == name_to_type->end()) {
+    return InvalidArgument("Invalid element type string: \"%s\".",
+                           name.c_str());
+  }
+  return found->second;
+}
+
 }  // namespace
 
 /* static */ string ShapeUtil::HumanStringWithLayout(const Shape& shape) {
-  if (shape.element_type() == TUPLE) {
+  if (IsTuple(shape)) {
     string text = "(";
     const char* prefix = "";
     for (const Shape& elem_shape : shape.tuple_shapes()) {
@@ -470,26 +524,35 @@ StatusOr<Shape> ParseShapeStringInternal(tensorflow::StringPiece* s) {
 
   string element_type_string;
   string dimensions_string;
+  string format_string;
   string layout_string;
   // tensorflow::StringPiece is not compatible with internal RE2 StringPiece, so
   // we convert in to the RE2-consumable type and then consume the corresponding
   // amount from our StringPiece type.
   tensorflow::RegexpStringPiece s_consumable(s->data(), s->size());
-  if (RE2::Consume(&s_consumable,
-                   "^(\\w*\\d*)\\[([\\d,]*)\\](?:\\s*{([\\d,]*)})?",
-                   &element_type_string, &dimensions_string, &layout_string)) {
+  if (RE2::Consume(
+          &s_consumable,
+          "^(\\w*\\d*)\\[([\\d,]*)\\](?:\\s*(dense|sparse)?\\s*{([\\d,]+)})?",
+          &element_type_string, &dimensions_string, &format_string,
+          &layout_string)) {
     size_t consumed = s->size() - s_consumable.size();
     s->remove_prefix(consumed);
+    auto string_to_int64 = [&s](const string& input) -> StatusOr<int64> {
+      int64 element;
+      if (!tensorflow::strings::safe_strto64(input.c_str(), &element)) {
+        return InvalidArgument(
+            "Invalid s64 value in parsed shape string: \"%s\" in \"%s\"",
+            input.c_str(), s->ToString().c_str());
+      }
+      return element;
+    };
+
     auto comma_list_to_int64s =
-        [&s](const string& input) -> StatusOr<std::vector<int64>> {
+        [&s,
+         string_to_int64](const string& input) -> StatusOr<std::vector<int64>> {
       std::vector<int64> results;
       for (const string& piece : tensorflow::str_util::Split(input, ',')) {
-        int64 element;
-        if (!tensorflow::strings::safe_strto64(piece.c_str(), &element)) {
-          return InvalidArgument(
-              "Invalid s64 value in parsed shape string: \"%s\" in \"%s\"",
-              piece.c_str(), s->ToString().c_str());
-        }
+        TF_ASSIGN_OR_RETURN(int64 element, string_to_int64(piece));
         results.push_back(element);
       }
       return results;
@@ -500,31 +563,32 @@ StatusOr<Shape> ParseShapeStringInternal(tensorflow::StringPiece* s) {
                         comma_list_to_int64s(dimensions_string));
 
     // Extract the primitive element type.
-    PrimitiveType primitive_type = PRIMITIVE_TYPE_INVALID;
-    for (PrimitiveType i =
-             static_cast<PrimitiveType>(PRIMITIVE_TYPE_INVALID + 1);
-         i < TUPLE; i = static_cast<PrimitiveType>(i + 1)) {
-      if (tensorflow::str_util::Lowercase(PrimitiveType_Name(i)) ==
-          element_type_string) {
-        primitive_type = i;
-        break;
-      }
-    }
-    if (primitive_type == PRIMITIVE_TYPE_INVALID) {
+    TF_ASSIGN_OR_RETURN(const PrimitiveType primitive_type,
+                        StringToPrimitiveType(element_type_string));
+    if (primitive_type == PRIMITIVE_TYPE_INVALID || primitive_type == TUPLE ||
+        primitive_type == OPAQUE) {
       return InvalidArgument("Invalid element type string: \"%s\".",
                              element_type_string.c_str());
     }
 
     Shape result;
-    if (layout_string.empty()) {
+    if (format_string.empty() && layout_string.empty()) {
       // Create a shape without a layout set.
       result = ShapeUtil::MakeShape(primitive_type, dimensions);
-    } else {
+    } else if (format_string == "sparse") {
+      TF_ASSIGN_OR_RETURN(int64 max_elements, string_to_int64(layout_string));
+      result = ShapeUtil::MakeShapeWithSparseLayout(primitive_type, dimensions,
+                                                    max_elements);
+    } else if (format_string.empty() || format_string == "dense") {
       // Extract the layout minor-to-major and set it.
       TF_ASSIGN_OR_RETURN(std::vector<int64> min2maj,
                           comma_list_to_int64s(layout_string));
       TF_ASSIGN_OR_RETURN(result, MakeShapeWithLayoutInternal(
                                       primitive_type, dimensions, min2maj));
+    } else {
+      // This should not be reached.
+      LOG(FATAL) << "Unhandled condition when parsing shape; format: \""
+                 << format_string << "\", layout: \"" << layout_string << "\"";
     }
     TF_RETURN_IF_ERROR(ShapeUtil::ValidateShape(result));
     return std::move(result);
@@ -537,7 +601,12 @@ StatusOr<Shape> ParseShapeStringInternal(tensorflow::StringPiece* s) {
 
 /* static */ StatusOr<Shape> ShapeUtil::ParseShapeString(
     tensorflow::StringPiece s) {
-  return ParseShapeStringInternal(&s);
+  TF_ASSIGN_OR_RETURN(Shape shape, ParseShapeStringInternal(&s));
+  if (!s.empty()) {
+    return InvalidArgument("Invalid shape string to parse: \"%s\"",
+                           s.ToString().c_str());
+  }
+  return shape;
 }
 
 /* static */ bool ShapeUtil::SameDimensions(const Shape& lhs,
@@ -622,23 +691,55 @@ StatusOr<Shape> ParseShapeStringInternal(tensorflow::StringPiece* s) {
   TF_DCHECK_OK(ValidateShape(shape));
   DCHECK_NE(OPAQUE, shape.element_type());
   if (shape.element_type() == TUPLE) {
-    CHECK_GT(pointer_size, 0);
-    return pointer_size * shape.tuple_shapes_size();
+    return ByteSizeOfTupleIndexTable(shape, pointer_size);
   }
+  int64 byte_size = ByteSizeOfElements(shape);
+  if (LayoutUtil::IsSparseArray(shape)) {
+    byte_size += ByteSizeOfSparseIndices(shape);
+  }
+  return byte_size;
+}
+
+/* static */ int64 ShapeUtil::ByteSizeOfTupleIndexTable(const Shape& shape,
+                                                        int64 pointer_size) {
+  TF_DCHECK_OK(ValidateShape(shape));
+  DCHECK_EQ(TUPLE, shape.element_type());
+  CHECK_GT(pointer_size, 0);
+  return pointer_size * shape.tuple_shapes_size();
+}
+
+/* static */ int64 ShapeUtil::ByteSizeOfElements(const Shape& shape) {
+  TF_DCHECK_OK(ValidateShape(shape));
+  DCHECK(ShapeUtil::IsArray(shape));
   int64 allocated_element_count;
-  if (shape.layout().padded_dimensions_size() > 0) {
-    CHECK_EQ(Rank(shape), shape.layout().padded_dimensions_size());
-    allocated_element_count = 1;
-    for (int64 dimension_size : shape.layout().padded_dimensions()) {
-      allocated_element_count *= dimension_size;
-    }
+
+  if (LayoutUtil::IsSparseArray(shape)) {
+    allocated_element_count = LayoutUtil::MaxSparseElements(shape.layout());
   } else {
-    allocated_element_count = ElementsIn(shape);
+    CHECK(LayoutUtil::IsDenseArray(shape));
+    tensorflow::gtl::ArraySlice<int64> padded_dimensions =
+        LayoutUtil::PaddedDimensions(shape);
+    if (!padded_dimensions.empty()) {
+      CHECK_EQ(Rank(shape), padded_dimensions.size());
+      allocated_element_count = 1;
+      for (int64 dimension_size : padded_dimensions) {
+        allocated_element_count *= dimension_size;
+      }
+    } else {
+      allocated_element_count = ElementsIn(shape);
+    }
   }
   return allocated_element_count *
          ByteSizeOfPrimitiveType(shape.element_type());
 }
 
+/* static */ int64 ShapeUtil::ByteSizeOfSparseIndices(const Shape& shape) {
+  TF_DCHECK_OK(ValidateShape(shape));
+  DCHECK(LayoutUtil::IsSparseArray(shape));
+  return LayoutUtil::MaxSparseElements(shape.layout()) *
+         ShapeUtil::Rank(shape) * sizeof(int64);
+}
+
 /* static */ Status ShapeUtil::ValidateShapeWithOptionalLayoutInternal(
     const Shape& shape) {
   if (shape.element_type() == TUPLE) {
@@ -694,9 +795,9 @@ StatusOr<Shape> ParseShapeStringInternal(tensorflow::StringPiece* s) {
   return LayoutUtil::ValidateLayoutInShape(shape);
 }
 
-/* static */ Shape ShapeUtil::ChangeElementType(const Shape& shape,
+/* static */ Shape ShapeUtil::ChangeElementType(const Shape& original,
                                                 PrimitiveType type) {
-  Shape new_shape = shape;
+  Shape new_shape = original;
   new_shape.set_element_type(type);
   return new_shape;
 }
@@ -705,7 +806,8 @@ StatusOr<Shape> ParseShapeStringInternal(tensorflow::StringPiece* s) {
                                                  ShapeIndexView index) {
   const Shape* return_shape = &shape;
   for (auto i : index) {
-    CHECK(IsTuple(*return_shape));
+    CHECK(IsTuple(*return_shape))
+        << "Invalid index " << index << " for shape " << shape;
     return_shape = &return_shape->tuple_shapes(i);
   }
   return *return_shape;
@@ -863,7 +965,9 @@ Status ForEachMutableSubshapeHelper(
     new_shape.add_dimensions(dim);
   }
   if (shape.has_layout()) {
+    CHECK(LayoutUtil::IsDenseArray(shape));
     Layout* new_layout = new_shape.mutable_layout();
+    new_layout->set_format(DENSE);
     new_layout->clear_minor_to_major();
     for (auto index : Permute(permutation, shape.layout().minor_to_major())) {
       new_layout->add_minor_to_major(index);
@@ -1117,9 +1221,9 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape,
     // as input_shape/output_shape and the dimension-0-major layout. These two
     // shapes are used for conversion between logical linear indices and
     // multi-dimensional indices.
-    Shape input_shape_dim0_major = MakeShapeWithMonotonicDim0MajorLayout(
+    Shape input_shape_dim0_major = MakeShapeWithDescendingLayout(
         input_shape.element_type(), AsInt64Slice(input_shape.dimensions()));
-    Shape output_shape_dim0_major = MakeShapeWithMonotonicDim0MajorLayout(
+    Shape output_shape_dim0_major = MakeShapeWithDescendingLayout(
         output_shape.element_type(), AsInt64Slice(output_shape.dimensions()));
 
     for (int64 input_dim = 0; input_dim < Rank(input_shape); ++input_dim) {
@@ -1290,6 +1394,7 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape,
   shape.mutable_dimensions()->erase(shape.dimensions().begin() + dim_to_delete);
   if (LayoutUtil::HasLayout(shape)) {
     Layout* layout = shape.mutable_layout();
+    layout->set_format(DENSE);
     for (size_t i = 0; i < layout->minor_to_major().size();) {
       if (layout->minor_to_major(i) == dim_to_delete) {
         layout->mutable_minor_to_major()->erase(
@@ -1319,4 +1424,9 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape,
   return shape;
 }
 
+std::ostream& operator<<(std::ostream& out, const Shape& shape) {
+  out << ShapeUtil::HumanString(shape);
+  return out;
+}
+
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h
index 2ea1bd95cb571134ab1e1dda37fbc887a1fa06b2..453d4ec04726a4dd3851b8becb439bb7506e4ca9 100644
--- a/tensorflow/compiler/xla/shape_util.h
+++ b/tensorflow/compiler/xla/shape_util.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include <initializer_list>
 #include <string>
 
+#include "tensorflow/compiler/xla/layout_util.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
@@ -133,6 +134,7 @@ class ShapeIndexView {
 };
 
 std::ostream& operator<<(std::ostream& out, const ShapeIndex& shape_index);
+std::ostream& operator<<(std::ostream& out, const ShapeIndexView& shape_index);
 
 // Namespaced collection of (static) shape utilities.
 //
@@ -141,7 +143,10 @@ std::ostream& operator<<(std::ostream& out, const ShapeIndex& shape_index);
 class ShapeUtil {
  public:
   // Returns the number of elements are contained within the provided shape;
-  // e.g. for rank 0 (scalars) the result is always 1.
+  // e.g. for rank 0 (scalars) the result is always 1. Note that sparse shapes
+  // may not actually be able to store this number of elements. See
+  // LayoutUtil::MaxSparseElements(shape) to obtain the maximum number of
+  // elements that can be stored in a sparse shape.
   // Precondition: !IsTuple(shape)
   static int64 ElementsIn(const Shape& shape);
 
@@ -162,6 +167,27 @@ class ShapeUtil {
   // Precondition: !ShapeUtil::IsOpaque(shape) && !ShapeUtil::IsTuple(shape)
   static int64 ByteSizeOfPrimitiveType(PrimitiveType primitive_type);
 
+  // Returns the number of bytes required to store the tuple member pointers for
+  // a allocation of shape. The `shape` must be a TUPLE shape, and
+  // `pointer_size` must be larger than zero.
+  static int64 ByteSizeOfTupleIndexTable(const Shape& shape,
+                                         int64 pointer_size);
+
+  // Returns the number of bytes required for the elements in an allocation of
+  // `shape`, which must be an array shape. The return value does not include
+  // the bytes needed to store sparse indices. Dense shapes use a separate
+  // memory location for each element, and so for these shapes,
+  // `ByteSizeOf(shape) == ByteSizeOfElements(shape)`. For dense shapes, this
+  // size also includes padding if present in the layout. For sparse shapes,
+  // `ByteSizeOf(shape) == ByteSizeOfElements(shape) +
+  // ByteSizeOfSparseindices(shape)`.
+  static int64 ByteSizeOfElements(const Shape& shape);
+
+  // Returns the number of bytes required for the sparse indices in an
+  // allocation of shape. The shape must be an array shape. The return value
+  // does not include the bytes needed to store sparse indices.
+  static int64 ByteSizeOfSparseIndices(const Shape& shape);
+
   // Returns a human-readable string that represents the given shape, with or
   // without layout. e.g. "f32[42x12] {0, 1}" or "f32[64]".
   static string HumanString(const Shape& shape);
@@ -170,7 +196,7 @@ class ShapeUtil {
   // As above, but for program shapes, returns a string for the form:
   //
   // (param_name: f32[42x12], ...) -> f32[24x42]
-  static string HumanString(const ProgramShape& shape);
+  static string HumanString(const ProgramShape& program_shape);
 
   // Parses a ShapeUtil::HumanString-format shape string back into a shape
   // object.
@@ -267,14 +293,22 @@ class ShapeUtil {
       PrimitiveType element_type, tensorflow::gtl::ArraySlice<int64> dimensions,
       tensorflow::gtl::ArraySlice<int64> minor_to_major);
 
-  // Constructs a new shape with major-first layout.
-  static Shape MakeShapeWithMonotonicDim0MajorLayout(
+  static Shape MakeShapeWithSparseLayout(
+      PrimitiveType element_type, tensorflow::gtl::ArraySlice<int64> dimensions,
+      int64 max_sparse_elements);
+
+  // Constructs a new shape with major-first layout (i.e. {n, n-1, ..., 0}).
+  static Shape MakeShapeWithDescendingLayout(
       PrimitiveType element_type,
       tensorflow::gtl::ArraySlice<int64> dimensions);
 
-  // Returns a new shape with major-first layout that has the same layout of
-  // elements with a different shape.
-  static Shape NormalizeShapeToMonotonicDim0MajorLayout(const Shape& shape);
+  // Returns a new Shape based on the given Shape with low-dimension-major
+  // layout (i.e. {n, n-1, ..., 0}, like Fortran), and with the dimensions
+  // rearranged so that it has the same in-memory layout as the given shape.
+  //
+  // For example, transforms f32[B,H,W,C]{0,3,2,1} to f32[H,W,C,B]{3,2,1,0}.
+  static Shape MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
+      const Shape& shape);
 
   // As MakeShape, but the object to write to is passed in.
   static void PopulateShape(PrimitiveType element_type,
@@ -324,7 +358,8 @@ class ShapeUtil {
     return shape.element_type() == OPAQUE;
   }
 
-  // Returns whether the shape is an array.
+  // Returns whether the shape is an array.  Note that scalars are considered
+  // arrays.
   static bool IsArray(const Shape& shape) {
     return !IsTuple(shape) && !IsOpaque(shape);
   }
@@ -351,6 +386,10 @@ class ShapeUtil {
   // shape. E.g. a tuple like (f32, s32, u32) would slice via 1,3 to (s32, u32).
   static Shape SliceTuple(const Shape& tuple, int64 start, int64 limit);
 
+  // Returns the shape of the real/imaginary components of the given complex
+  // shape.
+  static Shape ComplexComponentShape(const Shape& complex_shape);
+
   // Shorthand for testing whether a shape is of a given element type and
   // sequence of dimensions.
   //
@@ -502,8 +541,7 @@ class ShapeUtil {
     CHECK_EQ(Rank(shape), base.size());
     CHECK_EQ(incr.size(), base.size());
     CHECK_EQ(count.size(), base.size());
-    const Layout& layout = shape.layout();
-    const int64 rank = layout.minor_to_major_size();
+    const int64 rank = LayoutUtil::MinorToMajor(shape).size();
     // Allows handling R0 arrays, such that the visitor function will be called
     // once with the proper empty indexes.
     int64 n = -1;
@@ -511,7 +549,7 @@ class ShapeUtil {
     while (n < rank && visitor_function(indexes)) {
       // Increments dimensions in minor to major order.
       for (n = 0; n < rank; ++n) {
-        int64 dim = layout.minor_to_major(n);
+        int64 dim = LayoutUtil::Minor(shape.layout(), n);
         indexes[dim] += incr[dim];
         if (indexes[dim] < base[dim] + count[dim]) {
           break;
@@ -529,6 +567,8 @@ class ShapeUtil {
   TF_DISALLOW_COPY_AND_ASSIGN(ShapeUtil);
 };
 
+std::ostream& operator<<(std::ostream& out, const Shape& shape);
+
 }  // namespace xla
 
 #endif  // TENSORFLOW_COMPILER_XLA_SHAPE_UTIL_H_
diff --git a/tensorflow/compiler/xla/shape_util_test.cc b/tensorflow/compiler/xla/shape_util_test.cc
index 4bce7ca51d0534cbcad6faac12818c5f3e94b29e..81ba7afb95265398e830e26122cd0056a32daee3 100644
--- a/tensorflow/compiler/xla/shape_util_test.cc
+++ b/tensorflow/compiler/xla/shape_util_test.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/shape_util.h"
 
 #include "tensorflow/compiler/xla/layout_util.h"
+#include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/test.h"
 #include "tensorflow/compiler/xla/test_helpers.h"
 #include "tensorflow/compiler/xla/types.h"
@@ -71,7 +72,8 @@ TEST(ShapeUtilTest, Rank4DimensionIndexing) {
 
 TEST(ShapeUtilTest, ParseShapeStringR2F32) {
   string shape_string = "f32[123,456]";
-  Shape actual = ShapeUtil::ParseShapeString(shape_string).ValueOrDie();
+  TF_ASSERT_OK_AND_ASSIGN(Shape actual,
+                          ShapeUtil::ParseShapeString(shape_string));
   Shape expected = ShapeUtil::MakeShape(F32, {123, 456});
   ASSERT_TRUE(ShapeUtil::Equal(expected, actual))
       << "expected: " << ShapeUtil::HumanString(expected)
@@ -80,7 +82,8 @@ TEST(ShapeUtilTest, ParseShapeStringR2F32) {
 
 TEST(ShapeUtilTest, ParseShapeStringTupleOfArrays) {
   string shape_string = "(f32[1572864],s8[5120,1024])";
-  Shape actual = ShapeUtil::ParseShapeString(shape_string).ValueOrDie();
+  TF_ASSERT_OK_AND_ASSIGN(Shape actual,
+                          ShapeUtil::ParseShapeString(shape_string));
   Shape expected =
       ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape(F32, {1572864}),
                                  ShapeUtil::MakeShape(S8, {5120, 1024})});
@@ -91,7 +94,8 @@ TEST(ShapeUtilTest, ParseShapeStringTupleOfArrays) {
 
 TEST(ShapeUtilTest, ParseShapeStringNestedTuple) {
   string shape_string = "(f32[1],(f32[2]), f32[3])";
-  Shape actual = ShapeUtil::ParseShapeString(shape_string).ValueOrDie();
+  TF_ASSERT_OK_AND_ASSIGN(Shape actual,
+                          ShapeUtil::ParseShapeString(shape_string));
   Shape expected = ShapeUtil::MakeTupleShape({
       ShapeUtil::MakeShape(F32, {1}),
       ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape(F32, {2})}),
@@ -102,6 +106,47 @@ TEST(ShapeUtilTest, ParseShapeStringNestedTuple) {
       << "actual:   " << ShapeUtil::HumanString(actual);
 }
 
+TEST(ShapeUtilTest, ParseShapeStringWithLayout) {
+  string shape_string = "f32[123,456]{0,1}";
+  TF_ASSERT_OK_AND_ASSIGN(Shape actual,
+                          ShapeUtil::ParseShapeString(shape_string));
+  Shape expected = ShapeUtil::MakeShapeWithLayout(F32, {123, 456}, {0, 1});
+  ASSERT_TRUE(ShapeUtil::Equal(expected, actual))
+      << "expected: " << ShapeUtil::HumanString(expected)
+      << "actual:   " << ShapeUtil::HumanString(actual);
+}
+
+TEST(ShapeUtilTest, ParseShapeStringWithExplicitDenseLayout) {
+  string shape_string = "f32[123,456]dense{0,1}";
+  TF_ASSERT_OK_AND_ASSIGN(Shape actual,
+                          ShapeUtil::ParseShapeString(shape_string));
+  Shape expected = ShapeUtil::MakeShapeWithLayout(F32, {123, 456}, {0, 1});
+  ASSERT_TRUE(ShapeUtil::Equal(expected, actual))
+      << "expected: " << ShapeUtil::HumanString(expected)
+      << "actual:   " << ShapeUtil::HumanString(actual);
+}
+
+TEST(ShapeUtilTest, ParseShapeStringWithSparseLayout) {
+  string shape_string = "f32[123,456]sparse{10}";
+  TF_ASSERT_OK_AND_ASSIGN(Shape actual,
+                          ShapeUtil::ParseShapeString(shape_string));
+  Shape expected = ShapeUtil::MakeShapeWithSparseLayout(F32, {123, 456}, 10);
+  ASSERT_TRUE(ShapeUtil::Equal(expected, actual))
+      << "expected: " << ShapeUtil::HumanString(expected)
+      << "actual: " << ShapeUtil::HumanString(actual);
+}
+
+TEST(ShapeUtilTest, ParseInvalidShapeString) {
+  string shape_strings[] = {
+      "f32[123,456]foobar{0,1}", "f32[123,456]sparse{0,1}", "f32[123,456]{foo}",
+      "f32[123,456]dense{foo}",  "f32[123,456]sparse{foo}",
+  };
+  for (const string& shape_string : shape_strings) {
+    StatusOr<Shape> result = ShapeUtil::ParseShapeString(shape_string);
+    ASSERT_FALSE(result.ok()) << "shape: " << shape_string;
+  }
+}
+
 TEST(ShapeUtilTest, CompatibleIdenticalShapes) {
   Shape shape1 = ShapeUtil::MakeShape(F32, {3, 2});
   Shape shape2 = ShapeUtil::MakeShape(F32, {3, 2});
@@ -165,20 +210,6 @@ TEST(ShapeUtilTest, IncompatibleTuplesWithDifferentDimensions) {
   EXPECT_FALSE(ShapeUtil::Compatible(tuple1, tuple2));
 }
 
-TEST(ShapeUtilTest, EmptyLayoutEqualsMissingLayout) {
-  // A shape with a missing layout should be equal to a shape with an empty
-  // layout.
-  Shape scalar1 = ShapeUtil::MakeShape(F32, {});
-  Shape scalar2 = ShapeUtil::MakeShape(F32, {});
-
-  EXPECT_TRUE(ShapeUtil::Equal(scalar1, scalar2));
-
-  scalar1.clear_layout();    // Remove layout field.
-  scalar2.mutable_layout();  // Create empty layout field.
-
-  EXPECT_TRUE(ShapeUtil::Equal(scalar1, scalar2));
-}
-
 TEST(ShapeUtilTest, CompareShapesWithPaddedDimensionsMismatch) {
   Shape shape1 = ShapeUtil::MakeShape(F32, {20, 30});
   shape1.mutable_layout()->add_padded_dimensions(10);
@@ -199,17 +230,17 @@ TEST(ShapeUtilTest, CompareShapesWithPaddingValueMismatch) {
   EXPECT_FALSE(ShapeUtil::Equal(shape1, shape2));
 }
 
-TEST(ShapeUtilTest, ScalarUnpopulatedLayoutEqualsScalarLayout) {
-  Shape scalar_unpopulated = ShapeUtil::MakeShape(F32, {});
-  scalar_unpopulated.clear_layout();
-  ASSERT_FALSE(scalar_unpopulated.has_layout())
-      << ShapeUtil::HumanStringWithLayout(scalar_unpopulated);
+TEST(ShapeUtilTest, ScalarDefaultLayoutEqualsScalarEmptyMin2Maj) {
+  Shape scalar_default_layout = ShapeUtil::MakeShape(F32, {});
+  ASSERT_TRUE(scalar_default_layout.has_layout())
+      << ShapeUtil::HumanStringWithLayout(scalar_default_layout);
 
-  const Shape scalar_populated = ShapeUtil::MakeShapeWithLayout(F32, {}, {});
-  ASSERT_TRUE(scalar_populated.has_layout())
-      << ShapeUtil::HumanStringWithLayout(scalar_populated);
+  const Shape scalar_empty_min2maj =
+      ShapeUtil::MakeShapeWithLayout(F32, {}, {});
+  ASSERT_TRUE(scalar_empty_min2maj.has_layout())
+      << ShapeUtil::HumanStringWithLayout(scalar_empty_min2maj);
 
-  EXPECT_TRUE(ShapeUtil::Equal(scalar_unpopulated, scalar_populated));
+  EXPECT_TRUE(ShapeUtil::Equal(scalar_default_layout, scalar_empty_min2maj));
 }
 
 TEST(ShapeUtilTest, ByteSizeOfWithoutPadding) {
diff --git a/tensorflow/compiler/xla/sparse_index_array.cc b/tensorflow/compiler/xla/sparse_index_array.cc
new file mode 100644
index 0000000000000000000000000000000000000000..31844abd89a020c87c403353374a80fb639a3244
--- /dev/null
+++ b/tensorflow/compiler/xla/sparse_index_array.cc
@@ -0,0 +1,110 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/sparse_index_array.h"
+
+#include "tensorflow/compiler/xla/index_util.h"
+#include "tensorflow/compiler/xla/layout_util.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+
+namespace xla {
+
+SparseIndexArray::SparseIndexArray() : rank_(0), max_indices_(0) {}
+
+SparseIndexArray::SparseIndexArray(int64 max_indices, int64 rank,
+                                   std::vector<int64> indices)
+    : indices_(std::move(indices)), rank_(rank), max_indices_(max_indices) {
+  CHECK_GT(rank_, 0);
+  CHECK_EQ(indices_.size() % rank_, 0)
+      << "indices_.size(): " << indices_.size() << ", rank_: " << rank_;
+  CHECK_LT(index_count(), max_indices_);
+}
+
+SparseIndexArray::SparseIndexArray(int64 max_indices, int64 rank,
+                                   tensorflow::gtl::ArraySlice<int64> indices)
+    : SparseIndexArray(max_indices, rank,
+                       std::vector<int64>(indices.begin(), indices.end())) {}
+
+SparseIndexArray::SparseIndexArray(int64 max_indices,
+                                   const Array2D<int64>& indices)
+    : SparseIndexArray(max_indices, indices.n2(),
+                       std::vector<int64>(indices.begin(), indices.end())) {}
+
+int64 SparseIndexArray::index_count() const {
+  CHECK_GT(rank_, 0);
+  CHECK_EQ(indices_.size() % rank_, 0);
+  return indices_.size() / rank_;
+}
+
+tensorflow::gtl::ArraySlice<int64> SparseIndexArray::At(
+    int64 sparse_element_number) const {
+  CHECK_GT(rank_, 0);
+  CHECK_GE(sparse_element_number, 0);
+  CHECK_LE(rank_ * sparse_element_number + rank_, indices_.size());
+  return tensorflow::gtl::ArraySlice<int64>(
+      indices_.data() + rank_ * sparse_element_number, rank_);
+}
+
+tensorflow::gtl::MutableArraySlice<int64> SparseIndexArray::At(
+    int64 sparse_element_number) {
+  CHECK_GT(rank_, 0);
+  CHECK_GE(sparse_element_number, 0);
+  CHECK_LE(rank_ * sparse_element_number + rank_, indices_.size());
+  return tensorflow::gtl::MutableArraySlice<int64>(
+      indices_.data() + rank_ * sparse_element_number, rank_);
+}
+
+void SparseIndexArray::Append(tensorflow::gtl::ArraySlice<int64> index) {
+  CHECK_GT(rank_, 0);
+  CHECK_EQ(index.size(), rank_);
+  indices_.insert(indices_.end(), index.begin(), index.end());
+}
+
+void SparseIndexArray::Clear() { indices_.clear(); }
+
+void SparseIndexArray::Resize(int64 num_indices) {
+  CHECK_GT(rank_, 0);
+  indices_.resize(rank_ * num_indices);
+}
+
+bool SparseIndexArray::Validate(const Shape& shape) const {
+  if (rank_ == 0 || rank_ != ShapeUtil::Rank(shape)) {
+    return false;
+  }
+  int64 num_indices = index_count();
+  if (num_indices > LayoutUtil::MaxSparseElements(shape.layout())) {
+    return false;
+  }
+  if (num_indices < 2) {
+    return true;
+  }
+  tensorflow::gtl::ArraySlice<int64> last = At(0);
+  if (!IndexUtil::IndexInBounds(shape, last)) {
+    return false;
+  }
+  for (int64 n = 1; n < num_indices; ++n) {
+    tensorflow::gtl::ArraySlice<int64> next = At(n);
+    if (!IndexUtil::IndexInBounds(shape, next)) {
+      return false;
+    }
+    if (IndexUtil::CompareIndices(last, next) >= 0) {
+      return false;
+    }
+    last = next;
+  }
+  return true;
+}
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/sparse_index_array.h b/tensorflow/compiler/xla/sparse_index_array.h
new file mode 100644
index 0000000000000000000000000000000000000000..903fee525520205dbd516897fe451b0fd59d3872
--- /dev/null
+++ b/tensorflow/compiler/xla/sparse_index_array.h
@@ -0,0 +1,176 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Utility class for managing sparse array indices.
+
+#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SPARSE_INDEX_ARRAY_H_
+#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SPARSE_INDEX_ARRAY_H_
+
+#include <vector>
+
+#include "tensorflow/compiler/xla/array2d.h"
+#include "tensorflow/compiler/xla/index_util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+
+namespace xla {
+
+// Encapsulates the array of indices for a sparse array.  A SparseIndexArray
+// contain indices for up to `max_indices` elements of a sparse array.  Each
+// sparse index is an array of `rank` int64 value that gives the location of a
+// value within a sparse array.  Note that the dimensions of the array are not
+// checked (except for the rank).  To avoid confusion, we refer to the position
+// of an index within a SparseIndexArray as a sparse index number.
+class SparseIndexArray {
+ public:
+  SparseIndexArray();
+  SparseIndexArray(const SparseIndexArray&) = default;
+  SparseIndexArray(SparseIndexArray&&) = default;
+  SparseIndexArray& operator=(const SparseIndexArray&) = default;
+  SparseIndexArray& operator=(SparseIndexArray&&) = default;
+
+  // Constructs a SparseIndexArray that can hold up to `max_indices` sparse
+  // indices, with an initial contents obtained from the given array.  The rank
+  // is taken from the minor dimension of the array.  The major dimension of the
+  // array must not exceed `max_indices`.
+  SparseIndexArray(int64 max_indices, const Array2D<int64>& indices);
+
+  // Like above, but the array is flattened.  For example, the following are
+  // equivalent:
+  //
+  //  SparseIndexArray(10, 3,
+  //                   Array2D{
+  //                     {0, 1, 2},
+  //                     {3, 4, 5},
+  //                     {6, 7, 8},
+  //                     {9, 10, 11},
+  //                   })
+  //
+  //  SparseIndexArray(10, 3,
+  //                   {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11})
+  //
+  SparseIndexArray(int64 max_indices, int64 rank,
+                   std::vector<int64> indices = {});
+  SparseIndexArray(int64 max_indices, int64 rank,
+                   tensorflow::gtl::ArraySlice<int64> indices);
+
+  // Returns the number of elements represented by the indices stored in the
+  // array.
+  int64 index_count() const;
+
+  // Returns a slice that refers to the given sparse index number. The argument
+  // must be in the range [0, element_count()).
+  tensorflow::gtl::ArraySlice<int64> At(int64 sparse_element_number) const;
+  tensorflow::gtl::MutableArraySlice<int64> At(int64 sparse_element_number);
+
+  // Adds the given index at the end of the array.  The new size of the
+  // SparseIndexArray must not exceed `max_indices`.
+  void Append(tensorflow::gtl::ArraySlice<int64> index);
+
+  // Removes all indices from the array.
+  void Clear();
+
+  // Resizes the array to contain the given number of sparse indices.  The new
+  // size must be smaller than `max_indices`.  If the new size is larger than
+  // the old size, the value of the new indices is not specified.
+  void Resize(int64 num_indices);
+
+  // Returns true iff all indices are unique and occur in sorted order, and are
+  // valid for the given shape.
+  bool Validate(const Shape& shape) const;
+
+  int64 rank() const { return rank_; }
+  int64 max_indices() const { return max_indices_; }
+
+  // Returns a pointer to the int64 array that holds the sparse indices.
+  tensorflow::gtl::MutableArraySlice<int64> mutable_data() { return &indices_; }
+  tensorflow::gtl::ArraySlice<int64> data() const { return indices_; }
+
+  // Sorts this sparse index array along with the set of corresponding values.
+  // The indices and values are sorted in the lexicographic order of the
+  // indices, from smallest to largest.
+  //
+  // For example:
+  //
+  //   std::vector<float> v{10.0, 11.0, 12.0};
+  //   SparseIndexArray a(10, 3,
+  //                      {{3, 4, 5},
+  //                       {1, 2, 3},
+  //                       {2, 3, 4}});
+  //   a.SortWithValues(&v);
+  //   // Prints "11.0, 12.0, 10.0":
+  //   std::cout << v[0] << ", " << v[1] << ", " << v[2] << std::endl;
+  //
+  template <typename NativeT>
+  void SortWithValues(tensorflow::gtl::MutableArraySlice<NativeT> values);
+
+ private:
+  std::vector<int64> indices_;
+  int64 rank_;
+  int64 max_indices_;
+};
+
+template <typename NativeT>
+void SparseIndexArray::SortWithValues(
+    tensorflow::gtl::MutableArraySlice<NativeT> values) {
+  int64 num_elements = index_count();
+  CHECK_EQ(values.size(), num_elements);
+  std::vector<int64> sort_order;
+  sort_order.reserve(num_elements);
+  for (int64 i = 0; i < num_elements; ++i) {
+    sort_order.push_back(i);
+  }
+  auto sort_order_less = [this](int64 lhs, int64 rhs) {
+    return IndexUtil::CompareIndices(At(lhs), At(rhs)) < 0;
+  };
+  std::sort(sort_order.begin(), sort_order.end(), sort_order_less);
+
+  // Reorder the array elements according to sort_order.  Work through the array
+  // and follow cycles so we can do the reorder in-place.
+  tensorflow::gtl::InlinedVector<int64, 8> saved_index(rank());
+  for (int64 i = 0; i < num_elements; ++i) {
+    // sort_order[i] == -1 indicates the element has already been copied.
+    if (sort_order[i] < 0) {
+      continue;
+    } else if (i == sort_order[i]) {
+      // The element is already in sorted order.
+      sort_order[i] = -1;
+      continue;
+    }
+
+    std::copy_n(At(i).begin(), rank(), saved_index.begin());
+    NativeT saved_value = values[i];
+    int64 j = i;
+    for (;;) {
+      if (sort_order[j] == i) {
+        std::copy_n(saved_index.begin(), rank(), At(j).begin());
+        values[j] = saved_value;
+        sort_order[j] = -1;
+        break;
+      }
+
+      std::copy_n(At(sort_order[j]).begin(), rank(), At(j).begin());
+      values[j] = values[sort_order[j]];
+
+      int64 k = sort_order[j];
+      sort_order[j] = -1;
+      j = k;
+    }
+  }
+}
+
+}  // namespace xla
+
+#endif  // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SPARSE_INDEX_ARRAY_H_
diff --git a/tensorflow/compiler/xla/sparse_index_array_test.cc b/tensorflow/compiler/xla/sparse_index_array_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..7377f88958dcb7daf3d3f4f0e07966fdc9294580
--- /dev/null
+++ b/tensorflow/compiler/xla/sparse_index_array_test.cc
@@ -0,0 +1,43 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/sparse_index_array.h"
+
+#include <vector>
+
+#include "tensorflow/compiler/xla/test.h"
+
+namespace xla {
+namespace {
+
+TEST(SparseIndexArrayTest, Sort) {
+  SparseIndexArray a(10, 3);
+  a.Append({2, 3, 4});
+  a.Append({3, 4, 5});
+  a.Append({1, 2, 3});
+  a.Append({5, 6, 7});
+  a.Append({4, 5, 6});
+  a.Append({6, 7, 8});
+  std::vector<double> values = {
+      12.0, 13.0, 11.0, 15.0, 14.0, 16.0,
+  };
+  a.SortWithValues<double>(&values);
+  ASSERT_EQ(a.data(), std::vector<int64>({1, 2, 3, 2, 3, 4, 3, 4, 5, 4, 5, 6, 5,
+                                          6, 7, 6, 7, 8}));
+  ASSERT_EQ(values, std::vector<double>({11.0, 12.0, 13.0, 14.0, 15.0, 16.0}));
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/status_macros.h b/tensorflow/compiler/xla/status_macros.h
index 5e5550563d02de99ddefbeb8ee8e1bf98afdcdbf..e51dd64e2a3dc7c359918cb08c6c94b2b4d9e91b 100644
--- a/tensorflow/compiler/xla/status_macros.h
+++ b/tensorflow/compiler/xla/status_macros.h
@@ -196,18 +196,8 @@ class StatusAdaptorForMacros {
 #define TF_STATUS_MACROS_CONCAT_NAME(x, y) TF_STATUS_MACROS_CONCAT_IMPL(x, y)
 #define TF_STATUS_MACROS_CONCAT_IMPL(x, y) x##y
 
-#define TF_ASSIGN_OR_RETURN(...)                                             \
-  TF_STATUS_MACRO_GET_VARIADIC_IMPL(__VA_ARGS__, TF_ASSIGN_OR_RETURN_IMPL_3, \
-                                    TF_ASSIGN_OR_RETURN_IMPL_2)              \
-  (__VA_ARGS__)
-
-#define TF_STATUS_MACRO_GET_VARIADIC_IMPL(_1, _2, _3, NAME, ...) NAME
-
-#define TF_ASSIGN_OR_RETURN_IMPL_2(lhs, rexpr) \
-  TF_ASSIGN_OR_RETURN_IMPL_3(lhs, rexpr)
-
-#define TF_ASSIGN_OR_RETURN_IMPL_3(lhs, rexpr) \
-  TF_ASSIGN_OR_RETURN_IMPL(                    \
+#define TF_ASSIGN_OR_RETURN(lhs, rexpr) \
+  TF_ASSIGN_OR_RETURN_IMPL(             \
       TF_STATUS_MACROS_CONCAT_NAME(_status_or_value, __COUNTER__), lhs, rexpr)
 
 #define TF_ASSIGN_OR_RETURN_IMPL(statusor, lhs, rexpr) \
diff --git a/tensorflow/compiler/xla/statusor_test.cc b/tensorflow/compiler/xla/statusor_test.cc
index 5fa2211ac66177514ac8ecabfa8791e7c8c014a2..f9d25945bc617507735fb6c4d011c39723497f69 100644
--- a/tensorflow/compiler/xla/statusor_test.cc
+++ b/tensorflow/compiler/xla/statusor_test.cc
@@ -32,26 +32,26 @@ namespace {
 class Base1 {
  public:
   virtual ~Base1() {}
-  int pad;
+  int pad_;
 };
 
 class Base2 {
  public:
   virtual ~Base2() {}
-  int yetotherpad;
+  int yetotherpad_;
 };
 
 class Derived : public Base1, public Base2 {
  public:
   ~Derived() override {}
-  int evenmorepad;
+  int evenmorepad_;
 };
 
 class CopyNoAssign {
  public:
-  explicit CopyNoAssign(int value) : foo(value) {}
-  CopyNoAssign(const CopyNoAssign& other) : foo(other.foo) {}
-  int foo;
+  explicit CopyNoAssign(int value) : foo_(value) {}
+  CopyNoAssign(const CopyNoAssign& other) : foo_(other.foo_) {}
+  int foo_;
 
  private:
   const CopyNoAssign& operator=(const CopyNoAssign&);
@@ -253,7 +253,7 @@ TEST(StatusOr, TestCopyCtorNonAssignable) {
   StatusOr<CopyNoAssign> original(value);
   StatusOr<CopyNoAssign> copy(original);
   EXPECT_EQ(copy.status(), original.status());
-  EXPECT_EQ(original.ValueOrDie().foo, copy.ValueOrDie().foo);
+  EXPECT_EQ(original.ValueOrDie().foo_, copy.ValueOrDie().foo_);
 }
 
 TEST(StatusOr, TestCopyCtorStatusOKConverting) {
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index addce9019b340f9489a25dbdd2437f4d71740b95..3922c779a0979c493df84431bf97c1da57717443 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -69,6 +69,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service:hlo_dataflow_analysis",
         "//tensorflow/compiler/xla/service:hlo_verifier",
         "//tensorflow/compiler/xla/service:transfer_manager",
         "//tensorflow/core:lib",
@@ -104,7 +105,9 @@ cc_library(
     hdrs = ["hlo_test_base.h"],
     deps = [
         ":literal_test_util",
+        ":test_utils",
         "//tensorflow/compiler/xla:shape_layout",
+        "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
@@ -114,6 +117,10 @@ cc_library(
         "//tensorflow/compiler/xla/service:computation_layout",
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/compiler/xla/service:hlo_runner",
+        "//tensorflow/compiler/xla/service:hlo_verifier",
+        "//tensorflow/compiler/xla/service:interpreter_plugin",  # reference backend
+        "//tensorflow/compiler/xla/service:platform_util",
+        "//tensorflow/compiler/xla/tools/parser:hlo_parser",
         "//tensorflow/core:lib",
         "//tensorflow/core:stream_executor_no_cuda",
         "//tensorflow/core:test",
@@ -338,6 +345,23 @@ xla_test(
     ],
 )
 
+xla_test(
+    name = "xla_hlo_profile_test",
+    srcs = ["xla_hlo_profile_test.cc"],
+    deps = [
+        "//tensorflow/compiler/xla:array2d",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla/client:computation_builder",
+        "//tensorflow/compiler/xla/client:local_client",
+        "//tensorflow/compiler/xla/service:platform_util",
+        "//tensorflow/compiler/xla/tests:client_library_test_base",
+        "//tensorflow/compiler/xla/tests:test_utils",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:regexp_internal",
+        "//tensorflow/core:test",
+    ],
+)
+
 xla_test(
     name = "axpy_simple_test",
     srcs = ["axpy_simple_test.cc"],
@@ -354,6 +378,7 @@ xla_test(
 xla_test(
     name = "map_test",
     srcs = ["map_test.cc"],
+    tags = ["enable_for_xla_interpreter"],
     deps = [
         "//tensorflow/compiler/xla:array2d",
         "//tensorflow/compiler/xla:literal_util",
@@ -382,6 +407,7 @@ xla_test(
     name = "params_test",
     srcs = ["params_test.cc"],
     shard_count = 30,
+    tags = ["optonly"],
     deps = [
         "//tensorflow/compiler/xla:array2d",
         "//tensorflow/compiler/xla:literal_util",
@@ -430,6 +456,22 @@ xla_test(
     ],
 )
 
+xla_test(
+    name = "conditional_test",
+    srcs = ["conditional_test.cc"],
+    deps = [
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/client:computation_builder",
+        "//tensorflow/compiler/xla/client:global_data",
+        "//tensorflow/compiler/xla/client:local_client",
+        "//tensorflow/compiler/xla/tests:client_library_test_base",
+        "//tensorflow/compiler/xla/tests:literal_test_util",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+    ],
+)
+
 xla_test(
     name = "unary_op_test",
     srcs = ["unary_op_test.cc"],
@@ -774,8 +816,6 @@ xla_test(
     name = "bfloat16_test",
     srcs = ["bfloat16_test.cc"],
     blacklisted_backends = [
-        "cpu",
-        "cpu_parallel",
         "gpu",
     ],
     shard_count = 40,
@@ -961,7 +1001,10 @@ xla_test(
     name = "reduce_window_test",
     timeout = "long",
     srcs = [],
-    tags = ["optonly"],
+    tags = [
+        "enable_for_xla_interpreter",
+        "optonly",
+    ],
     xla_test_library_deps = [":reduce_window_test_library"],
     deps = [],
 )
@@ -1036,9 +1079,10 @@ xla_test(
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/compiler/xla/service/cpu:custom_call_target_registry",
+        "//tensorflow/compiler/xla/tests:client_library_test_base",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
         "//tensorflow/compiler/xla/tests:literal_test_util",
-        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",  # fixdeps: keep
         "//tensorflow/core:lib",
         "//tensorflow/core:test",
     ],
@@ -1364,6 +1408,31 @@ xla_test(
     ],
 )
 
+xla_test(
+    name = "execution_profile_test",
+    srcs = ["execution_profile_test.cc"],
+    deps = [
+        ":client_library_test_base",
+        "//tensorflow/compiler/xla/client:computation_builder",
+        "//tensorflow/compiler/xla/client:global_data",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:test",
+    ],
+)
+
+xla_test(
+    name = "execution_profile_test_with_xla_hlo_profile",
+    srcs = ["execution_profile_test.cc"],
+    args = ["--xla_hlo_profile"],
+    deps = [
+        ":client_library_test_base",
+        "//tensorflow/compiler/xla/client:computation_builder",
+        "//tensorflow/compiler/xla/client:global_data",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:test",
+    ],
+)
+
 xla_test(
     name = "replay_test",
     srcs = ["replay_test.cc"],
@@ -1676,6 +1745,45 @@ xla_test(
     ],
 )
 
+# A demo of textual IR based test.
+xla_test(
+    name = "sample_text_test",
+    srcs = ["sample_text_test.cc"],
+    # You can leave this empty if you want to test all supported backends.
+    backends = [
+        "cpu",
+        "gpu",
+    ],
+    deps = [
+        ":hlo_test_base",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla/tests:literal_test_util",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:lib",
+    ],
+)
+
+# A demo of test that loads an hlo module from a file and compares results on gpu and cpu.
+tf_cc_test(
+    name = "sample_file_test",
+    srcs = ["sample_file_test.cc"],
+    data = ["isolated_convolution.hlo"],
+    tags = ["requires-gpu-sm35"],
+    deps = [
+        ":hlo_test_base",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla/service:cpu_plugin",  # reference backend
+        "//tensorflow/compiler/xla/service:gpu_plugin",  # test backend
+        "//tensorflow/compiler/xla/service:platform_util",
+        "//tensorflow/compiler/xla/tests:literal_test_util",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",  # fixdeps: keep
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+    ],
+)
+
 # -----------------------------------------------------------------------------
 
 filegroup(
diff --git a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc
index c6e8b24d1211743d07878d388522feacf9c0e7f1..56fc21d019bb823f8f4631420a15fd607ef46a9a 100644
--- a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc
+++ b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc
@@ -1971,6 +1971,18 @@ XLA_TEST_F(ArrayElementwiseOpTest, SinF32s) {
                              error_spec_);
 }
 
+XLA_TEST_F(ArrayElementwiseOpTest, Atan2F32s) {
+  ComputationBuilder builder(client_, TestName());
+  auto a = builder.ConstantR1<float>({0.0f, 5.0f, 0.0f, -3.0f, 2.0f, -8.0f});
+  auto b = builder.ConstantR1<float>({6.0f, 0.0f, -4.0f, 0.0f, 2.0f, 8.0f});
+  auto atan = builder.Atan2(a, b);
+
+  ComputeAndCompareR1<float>(
+      &builder,
+      {0.0f, 1.57079633f, 3.14159265f, -1.57079633f, 0.78539816f, -0.78539816f},
+      {}, error_spec_);
+}
+
 XLA_TEST_F(ArrayElementwiseOpTest, TanhF32s) {
   ComputationBuilder builder(client_, TestName());
   auto a = builder.ConstantR1<float>({-2.5f, 3.14f, 2.25f});
@@ -2520,9 +2532,8 @@ XLA_TEST_F(ArrayElementwiseOpTest, R4_16x16x2x2_Plus_R1_16) {
   std::iota(r1.begin(), r1.end(), 1.0);
 
   ComputationBuilder builder(client_, TestName());
-  std::unique_ptr<Literal> a_literal = Literal::CreateR4FromArray4D(r4);
-  *a_literal->mutable_shape()->mutable_layout() =
-      LayoutUtil::MakeLayout({0, 1, 2, 3});
+  std::unique_ptr<Literal> a_literal = Literal::CreateR4FromArray4DWithLayout(
+      r4, LayoutUtil::MakeLayout({0, 1, 2, 3}));
   auto a = builder.ConstantLiteral(*a_literal);
   auto b = builder.ConstantR1<float>(r1);
   builder.Add(a, b, {1});
diff --git a/tensorflow/compiler/xla/tests/batch_normalization_test.cc b/tensorflow/compiler/xla/tests/batch_normalization_test.cc
index 028d1251b455b82a291c236f7866e52e27d3590e..28ab9654997728fbafd6610af840e721e72cce5a 100644
--- a/tensorflow/compiler/xla/tests/batch_normalization_test.cc
+++ b/tensorflow/compiler/xla/tests/batch_normalization_test.cc
@@ -39,6 +39,8 @@ limitations under the License.
 #include "tensorflow/compiler/xla/tests/test_utils.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/math/math_util.h"
+#include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/types.h"
@@ -46,9 +48,13 @@ limitations under the License.
 namespace xla {
 namespace {
 
-class BatchNormalizationTest : public ClientLibraryTestBase {
+class BatchNormalizationTest
+    : public ClientLibraryTestBase,
+      public ::testing::WithParamInterface<bool /*use_cudnn_batchnorm*/> {
  protected:
   BatchNormalizationTest() : input_array_(kSamples, kZ, kY, kX) {
+    mutable_debug_options()->set_xla_gpu_use_cudnn_batchnorm(GetParam());
+
     Array2D<float> pz({
         // z0 z1
         {-1.0f, 4.1f},  // p0
@@ -56,7 +62,7 @@ class BatchNormalizationTest : public ClientLibraryTestBase {
         {5.0f, 4.4f},   // p2
     });
     input_array_.FillWithPZ(pz);
-    input_literal_ = *Literal::CreateR4FromArray4D(input_array_);
+    input_literal_ = std::move(*Literal::CreateR4FromArray4D(input_array_));
     CHECK_EQ(kSamples, input_array_.planes());
     CHECK_EQ(kZ, input_array_.depth());
     CHECK_EQ(kY, input_array_.height());
@@ -73,7 +79,18 @@ class BatchNormalizationTest : public ClientLibraryTestBase {
   const ErrorSpec error_spec_{0.001, 0.001};
 };
 
-TEST_F(BatchNormalizationTest, SubtractInZ) {
+// If testing the GPU backend, run the tests twice, with and without cudnn
+// batchnorm.  Otherwise, just run the tests once -- the value of this flag
+// doesn't matter.
+#ifdef XLA_TEST_BACKEND_GPU
+INSTANTIATE_TEST_CASE_P(BatchNormalizationTestInstance, BatchNormalizationTest,
+                        ::testing::Bool());
+#else
+INSTANTIATE_TEST_CASE_P(BatchNormalizationTestInstance, BatchNormalizationTest,
+                        ::testing::Values(false));
+#endif
+
+XLA_TEST_P(BatchNormalizationTest, SubtractInZ) {
   ComputationBuilder builder(client_, "subtract_in_z_one_sample");
   auto x = builder.ConstantLiteral(input_literal_);
   auto y = builder.ConstantR1<float>({3.14, 4.25});
@@ -89,22 +106,24 @@ TEST_F(BatchNormalizationTest, SubtractInZ) {
   ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_);
 }
 
-TEST_F(BatchNormalizationTest, SquareTesseractElementwise) {
+XLA_TEST_P(BatchNormalizationTest, SquareTesseractElementwise) {
   ComputationBuilder builder(client_, "square_tesseract_elementwise");
   auto x = builder.ConstantLiteral(input_literal_);
   builder.SquareF32(x);
 
+  using tensorflow::MathUtil;
+
   Array4D<float> expected(kSamples, kZ, kY, kX);
   Array2D<float> expected_pz({
-      {std::pow(-1.0f, 2.0f), std::pow(4.1f, 2.0f)},
-      {std::pow(2.0f, 2.0f), std::pow(4.1f, 2.0f)},
-      {std::pow(5.0f, 2.0f), std::pow(4.4f, 2.0f)},
+      {MathUtil::IPow(-1.0f, 2), MathUtil::IPow(4.1f, 2)},
+      {MathUtil::IPow(2.0f, 2), MathUtil::IPow(4.1f, 2)},
+      {MathUtil::IPow(5.0f, 2), MathUtil::IPow(4.4f, 2)},
   });
   expected.FillWithPZ(expected_pz);
   ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_);
 }
 
-TEST_F(BatchNormalizationTest, SumToZ) {
+XLA_TEST_P(BatchNormalizationTest, SumToZ) {
   ComputationBuilder builder(client_, "sum_to_z");
   auto input_activations = builder.ConstantLiteral(input_literal_);
   Computation add = CreateScalarAddComputation(F32, &builder);
@@ -116,7 +135,7 @@ TEST_F(BatchNormalizationTest, SumToZ) {
   ComputeAndCompareR1<float>(&builder, expected, {}, error_spec_);
 }
 
-TEST_F(BatchNormalizationTest, SquareAndReduce) {
+XLA_TEST_P(BatchNormalizationTest, SquareAndReduce) {
   ComputationBuilder builder(client_, "square_and_reduce");
   auto input_activations = builder.ConstantLiteral(input_literal_);
   auto set_means = builder.ConstantR1<float>({2.f, 4.2f});
@@ -131,7 +150,7 @@ TEST_F(BatchNormalizationTest, SquareAndReduce) {
   ComputeAndCompareR1<float>(&builder, expected, {}, error_spec_);
 }
 
-TEST_F(BatchNormalizationTest, VarianceToStddev) {
+XLA_TEST_P(BatchNormalizationTest, VarianceToStddev) {
   ComputationBuilder builder(client_, "variance_to_stddev");
   auto variance = builder.ConstantR1<float>({6.f, .02f});
   auto sqrt = builder.SqrtF32(variance);
@@ -142,7 +161,7 @@ TEST_F(BatchNormalizationTest, VarianceToStddev) {
 
 // Compare against a forward batch normalization example in the NN spec
 // reference.
-TEST_F(BatchNormalizationTest, SpecComparisonForward) {
+XLA_TEST_P(BatchNormalizationTest, SpecComparisonForward) {
   ComputationBuilder builder(client_, "batch_normalize_per_spec");
   auto input_activations =
       builder.CheckShape(builder.ConstantLiteral(input_literal_),
@@ -198,19 +217,227 @@ TEST_F(BatchNormalizationTest, SpecComparisonForward) {
   ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_);
 }
 
+XLA_TEST_P(BatchNormalizationTest, BasicTraining) {
+  const int kFeatureIndex = 3;
+  ComputationBuilder builder(client_, TestName());
+
+  auto operand = builder.ConstantR4FromArray4D<float>(
+      {{{{1.f, 2.f}}, {{3.f, 4.f}}}, {{{5.f, 6.f}}, {{7.f, 8.f}}}});
+
+  auto scale = builder.ConstantR1<float>({2.0f, 3.0f});
+
+  auto offset = builder.ConstantR1<float>({1.0f, 2.0f});
+
+  auto tuple = builder.BatchNormTraining(operand, scale, offset,
+                                         /*epsilon=*/0.001, kFeatureIndex);
+
+  auto expected = Literal::MakeTuple(
+      {Literal::CreateR4<float>({{{{-1.6f, -2.0f}}, {{0.1f, 0.6f}}},
+                                 {{{1.9f, 3.3f}}, {{3.7f, 6.0f}}}})
+           .get(),
+       Literal::CreateR1<float>({4, 5}).get(),
+       Literal::CreateR1<float>({5, 5}).get()});
+
+  ComputeAndCompareTuple(&builder, *expected, {}, ErrorSpec(0.1));
+}
+
+XLA_TEST_P(BatchNormalizationTest, BasicTrainingOnSublane) {
+  const int kFeatureIndex = 2;
+  ComputationBuilder builder(client_, TestName());
+
+  auto operand = builder.ConstantR4FromArray4D<float>(
+      {{{{1.f}, {2.f}}, {{3.f}, {4.f}}}, {{{5.f}, {6.f}}, {{7.f}, {8.f}}}});
+
+  auto scale = builder.ConstantR1<float>({2.0f, 3.0f});
+
+  auto offset = builder.ConstantR1<float>({1.0f, 2.0f});
+
+  auto tuple = builder.BatchNormTraining(operand, scale, offset,
+                                         /*epsilon=*/0.001, kFeatureIndex);
+
+  auto expected = Literal::MakeTuple(
+      {Literal::CreateR4<float>({{{{-1.6f}, {-2.0f}}, {{0.1f}, {0.6f}}},
+                                 {{{1.9f}, {3.3f}}, {{3.7f}, {6.0f}}}})
+           .get(),
+       Literal::CreateR1<float>({4, 5}).get(),
+       Literal::CreateR1<float>({5, 5}).get()});
+
+  ComputeAndCompareTuple(&builder, *expected, {}, ErrorSpec(0.1));
+}
+
+XLA_TEST_P(BatchNormalizationTest, TrainingWithFeatureOnLowDimension) {
+  // Use 0 dimension as feature, tests layout analyzer.
+  const int kFeatureIndex = 0;
+  ComputationBuilder builder(client_, TestName());
+
+  ComputationDataHandle h0;
+  auto operand = CreateR3Parameter<float>(Array3D<float>(260, 2, 2, 1.0f),
+                                          /*parameter_number=*/0, "operand",
+                                          &builder, &h0);
+  ComputationDataHandle h1;
+  auto scale =
+      CreateR1Parameter<float>(std::vector<float>(260, 1.0f),
+                               /*parameter_number=*/1, "scale", &builder, &h1);
+  ComputationDataHandle h2;
+  auto offset =
+      CreateR1Parameter<float>(std::vector<float>(260, 1.0f),
+                               /*parameter_number=*/2, "offset", &builder, &h2);
+
+  auto tuple = builder.BatchNormTraining(h0, h1, h2,
+                                         /*epsilon=*/1, kFeatureIndex);
+
+  auto expected = Literal::MakeTuple(
+      {Literal::CreateR3FromArray3D<float>(Array3D<float>(260, 2, 2, 1.0f))
+           .get(),
+       Literal::CreateR1<float>(std::vector<float>(260, 1.0f)).get(),
+       Literal::CreateR1<float>(std::vector<float>(260, 0.0f)).get()});
+
+  ComputeAndCompareTuple(&builder, *expected,
+                         {operand.get(), scale.get(), offset.get()},
+                         ErrorSpec(0.1));
+}
+
+XLA_TEST_P(BatchNormalizationTest, LargeEpsilonTest) {
+  // Test the correctness of choosing a large epsilon value.
+  const int kFeatureIndex = 2;
+  ComputationBuilder builder(client_, TestName());
+
+  ComputationDataHandle h0;
+  auto operand = CreateR3Parameter<float>({{{0.0f}, {10.0f}, {20.0f}, {30.0f}}},
+                                          /*parameter_number=*/0, "operand",
+                                          &builder, &h0);
+  ComputationDataHandle h1;
+  auto scale =
+      CreateR1Parameter<float>(std::vector<float>(1, 1.0f),
+                               /*parameter_number=*/1, "scale", &builder, &h1);
+  ComputationDataHandle h2;
+  auto offset =
+      CreateR1Parameter<float>(std::vector<float>(1, 0.0f),
+                               /*parameter_number=*/2, "offset", &builder, &h2);
+
+  // var = 125, mean = 15, epsilon = -100
+  auto tuple = builder.BatchNormTraining(h0, h1, h2,
+                                         /*epsilon=*/-100, kFeatureIndex);
+
+  auto expected = Literal::MakeTuple(
+      {Literal::CreateR3FromArray3D<float>({{{-3.0f}, {-1.0f}, {1.0f}, {3.0f}}})
+           .get(),
+       Literal::CreateR1<float>(std::vector<float>(1, 15.0f)).get(),
+       Literal::CreateR1<float>(std::vector<float>(1, 125.0f)).get()});
+
+  ComputeAndCompareTuple(&builder, *expected,
+                         {operand.get(), scale.get(), offset.get()},
+                         ErrorSpec(0.1));
+}
+
+XLA_TEST_P(BatchNormalizationTest, BatchNormGradBasic) {
+  const int kFeatureIndex = 2;
+  ComputationBuilder builder(client_, TestName());
+
+  auto operand =
+      builder.ConstantR4FromArray4D<float>(Array4D<float>(2, 2, 2, 1, 0.0f));
+
+  auto scale = builder.ConstantR1<float>({1.0f, 1.0f});
+
+  auto mean = builder.ConstantR1<float>({0.0f, 0.0f});
+
+  auto var = builder.ConstantR1<float>({1.0f, 1.0f});
+
+  auto grad_output = builder.ConstantR4FromArray4D<float>(
+      {{{{1.f}, {2.f}}, {{3.f}, {4.f}}}, {{{5.f}, {6.f}}, {{7.f}, {8.f}}}});
+
+  builder.BatchNormGrad(operand, scale, mean, var, grad_output,
+                        /*epsilon=*/0.0, kFeatureIndex);
+
+  auto expected = Literal::MakeTuple(
+      {Literal::CreateR4<float>({{{{-3.f}, {-3.f}}, {{-1.f}, {-1.f}}},
+                                 {{{1.f}, {1.f}}, {{3.f}, {3.f}}}})
+           .get(),
+       Literal::CreateR1<float>({0, 0}).get(),
+       Literal::CreateR1<float>({16, 20}).get()});
+
+  ComputeAndCompareTuple(&builder, *expected, {}, ErrorSpec(0.1));
+}
+
 struct BatchNormTestParam {
   std::vector<int64> bounds;
   int64 feature_index;
   float random_value_mean;
   float random_value_var;
+  bool use_cudnn_batchnorm;
+
+  friend ::std::ostream& operator<<(::std::ostream& os,
+                                    const BatchNormTestParam& p) {
+    os << "bounds={" << tensorflow::str_util::Join(p.bounds, ", ") << "}, ";
+    os << "feature_index=" << p.feature_index << ", ";
+    os << "random_value_mean=" << p.random_value_mean << ", ";
+    os << "random_value_var=" << p.random_value_var;
+
+    // Don't print use_cudnn_batchnorm when it's false, because most backends
+    // never set it to true.
+    if (p.use_cudnn_batchnorm) {
+      os << ", use_cudnn_batchnorm=true";
+    }
+    return os;
+  }
 };
 
 // Tests to test the fused operation of BatchNorm.
-class BatchNormTest : public ClientLibraryTestBase,
-                      public ::testing::WithParamInterface<BatchNormTestParam> {
+class BatchNormTestManySizes
+    : public ClientLibraryTestBase,
+      public ::testing::WithParamInterface<BatchNormTestParam> {
+ public:
+  BatchNormTestManySizes() {
+    mutable_debug_options()->set_xla_gpu_use_cudnn_batchnorm(
+        GetParam().use_cudnn_batchnorm);
+  }
 };
 
-XLA_TEST_P(BatchNormTest, RandomizedTests) {
+std::vector<BatchNormTestParam> BuildBatchNormTestParams() {
+  std::vector<BatchNormTestParam> params;
+
+  auto add_testcase = [&](std::vector<int64> bounds, int64 feature_index,
+                          float random_value_mean, float random_value_var) {
+    BatchNormTestParam p{bounds, feature_index, random_value_mean,
+                         random_value_var, /*use_cudnn_batchnorm=*/false};
+    params.push_back(p);
+
+    // If testing the GPU backend, also run with cudnn batchnorm enabled.
+#ifdef XLA_TEST_BACKEND_GPU
+    p.use_cudnn_batchnorm = true;
+    params.push_back(p);
+#endif
+  };
+
+  add_testcase({2, 2, 2, 2}, 0, 100.2f, 200.0f);
+  add_testcase({2, 2, 2, 2}, 3, 300.f, 400.0f);
+
+  add_testcase({1, 10, 1, 1}, 0, 10.1f, 20.1f);
+  add_testcase({10, 10, 10, 10}, 1, 3.14f, 314.15f);
+  add_testcase({10, 10, 10, 10}, 2, 666.6f, 777.7f);
+  add_testcase({10, 10, 10, 10}, 1, -666.6f, 777.7f);
+  add_testcase({10, 10, 10, 10}, 2, 0.f, 777.7f);
+  add_testcase({1, 1, 10, 130}, 2, 0.f, 777.7f);
+  add_testcase({1, 1, 130, 11}, 2, 0.f, 777.7f);
+  add_testcase({1, 1, 10, 1}, 3, 888.8f, 9.9f);
+
+  add_testcase({24, 129, 1, 2}, 2, 10000, 10000);
+  add_testcase({24, 129, 1, 2}, 3, 10000, 10000);
+
+  // Feature on low dimension to trigger relayout, check that internal logical
+  // to physical dimension calculation is correct after relayout.
+  add_testcase({1, 2, 3, 4}, 0, 100, 100);
+
+  // Zero-sized tensor.
+  add_testcase({1, 0, 100, 42}, 0, 100, 100);
+
+  return params;
+}
+
+INSTANTIATE_TEST_CASE_P(BatchNormTest_Instantiation, BatchNormTestManySizes,
+                        ::testing::ValuesIn(BuildBatchNormTestParams()));
+
+XLA_TEST_P(BatchNormTestManySizes, RandomizedTrainingTests) {
   float epsilon = 0.001;
   ComputationBuilder builder(client_, TestName());
   const std::vector<int64>& bounds = GetParam().bounds;
@@ -286,9 +513,9 @@ XLA_TEST_P(BatchNormTest, RandomizedTests) {
   auto offset_activations =
       builder.Parameter(2, offset_literal->shape(), "scale");
 
-  auto expected = *Literal::MakeTuple({expected_normalized.get(),
-                                       Literal::CreateR1<float>(mean).get(),
-                                       Literal::CreateR1<float>(var).get()});
+  auto expected = Literal::MakeTuple({expected_normalized.get(),
+                                      Literal::CreateR1<float>(mean).get(),
+                                      Literal::CreateR1<float>(var).get()});
 
   std::unique_ptr<GlobalData> input_data =
       client_->TransferToServer(*input_literal).ConsumeValueOrDie();
@@ -300,13 +527,17 @@ XLA_TEST_P(BatchNormTest, RandomizedTests) {
   builder.BatchNormTraining(input_activations, scale_activations,
                             offset_activations, epsilon, feature_index);
 
+  // Run all HLO passes during this test.  In particular, ClientLibraryTestBase
+  // disables constant folding, but we want it enabled for our zero-sized tensor
+  // testcase.
+  execution_options_.mutable_debug_options()->clear_xla_disable_hlo_passes();
   ComputeAndCompareTuple(
-      &builder, expected,
+      &builder, *expected,
       {input_data.get(), scale_data.get(), offset_data.get()},
       ErrorSpec(0.01, 1));
 }
 
-XLA_TEST_P(BatchNormTest, RandomizedInferencingTests) {
+XLA_TEST_P(BatchNormTestManySizes, RandomizedInferencingTests) {
   float epsilon = 0.001;
   ComputationBuilder builder(client_, TestName());
   const std::vector<int64>& bounds = GetParam().bounds;
@@ -402,6 +633,11 @@ XLA_TEST_P(BatchNormTest, RandomizedInferencingTests) {
                              offset_activations, mean_activations,
                              variance_activations, epsilon, feature_index);
 
+  // Run all HLO passes during this test.  In particular, ClientLibraryTestBase
+  // disables constant folding, but we want it enabled for our zero-sized tensor
+  // testcase.
+  execution_options_.mutable_debug_options()->clear_xla_disable_hlo_passes();
+
   ComputeAndCompareR4<float>(
       &builder, expected,
       {input_data.get(), scale_data.get(), offset_data.get(), mean_data.get(),
@@ -409,7 +645,7 @@ XLA_TEST_P(BatchNormTest, RandomizedInferencingTests) {
       ErrorSpec(0.01, 1));
 }
 
-XLA_TEST_P(BatchNormTest, RandomizedGradTests) {
+XLA_TEST_P(BatchNormTestManySizes, RandomizedGradTests) {
   float epsilon = 0.001;
   ComputationBuilder builder(client_, TestName());
   const std::vector<int64>& bounds = GetParam().bounds;
@@ -447,7 +683,11 @@ XLA_TEST_P(BatchNormTest, RandomizedGradTests) {
   std::vector<float> mean(feature_bound);
 
   for (int64 i = 0; i < feature_bound; ++i) {
-    mean[i] = sum[i] / num_elements_per_feature;
+    if (num_elements_per_feature > 0) {
+      mean[i] = sum[i] / num_elements_per_feature;
+    } else {
+      mean[i] = 0;
+    }
   }
 
   std::vector<float> mean_square(feature_bound);
@@ -457,7 +697,11 @@ XLA_TEST_P(BatchNormTest, RandomizedGradTests) {
 
   std::vector<float> square_mean(feature_bound);
   for (int64 i = 0; i < feature_bound; ++i) {
-    square_mean[i] = sum_squared[i] / num_elements_per_feature;
+    if (num_elements_per_feature > 0) {
+      square_mean[i] = sum_squared[i] / num_elements_per_feature;
+    } else {
+      square_mean[i] = 0;
+    }
   }
 
   std::vector<float> var(feature_bound);
@@ -535,8 +779,12 @@ XLA_TEST_P(BatchNormTest, RandomizedGradTests) {
       grad_activation, scale4D, [](float a, float b) { return a * b; });
 
   grad_activation = *ReferenceUtil::MapArray4D(
-      grad_activation, rsqrt_var_add_epsilon,
-      [=](float a, float b) { return a * b / num_elements_per_feature; });
+      grad_activation, rsqrt_var_add_epsilon, [=](float a, float b) {
+        if (num_elements_per_feature > 0) {
+          return a * b / num_elements_per_feature;
+        }
+        return 0.f;
+      });
 
   auto expected_grad_activation =
       Literal::CreateR4FromArray4D<float>(grad_activation);
@@ -571,179 +819,20 @@ XLA_TEST_P(BatchNormTest, RandomizedGradTests) {
                                  grad_output_parameter, epsilon, feature_index);
 
   auto expected =
-      *Literal::MakeTuple({expected_grad_activation.get(),
-                           Literal::CreateR1<float>(grad_scale).get(),
-                           Literal::CreateR1<float>(grad_offset).get()});
+      Literal::MakeTuple({expected_grad_activation.get(),
+                          Literal::CreateR1<float>(grad_scale).get(),
+                          Literal::CreateR1<float>(grad_offset).get()});
+
+  // Run all HLO passes during this test.  In particular, ClientLibraryTestBase
+  // disables constant folding, but we want it enabled for our zero-sized tensor
+  // testcase.
+  execution_options_.mutable_debug_options()->clear_xla_disable_hlo_passes();
 
-  ComputeAndCompareTuple(&builder, expected,
+  ComputeAndCompareTuple(&builder, *expected,
                          {input_data.get(), scale_data.get(), mean_data.get(),
                           var_data.get(), grad_output_data.get()},
                          ErrorSpec(0.01, 1));
 }
 
-INSTANTIATE_TEST_CASE_P(
-    BatchNormTest_Instantiation, BatchNormTest,
-    ::testing::Values(BatchNormTestParam{{2, 2, 2, 2}, 0, 100.2f, 200.0f},
-                      BatchNormTestParam{{2, 2, 2, 2}, 3, 300.f, 400.0f},
-
-                      BatchNormTestParam{{1, 10, 1, 1}, 0, 10.1f, 20.1f},
-                      BatchNormTestParam{{10, 10, 10, 10}, 1, 3.14f, 314.15f},
-                      BatchNormTestParam{{10, 10, 10, 10}, 2, 666.6f, 777.7f},
-                      BatchNormTestParam{{10, 10, 10, 10}, 1, -666.6f, 777.7f},
-                      BatchNormTestParam{{10, 10, 10, 10}, 2, 0.f, 777.7f},
-                      BatchNormTestParam{{1, 1, 10, 130}, 2, 0.f, 777.7f},
-                      BatchNormTestParam{{1, 1, 130, 11}, 2, 0.f, 777.7f},
-                      BatchNormTestParam{{1, 1, 10, 1}, 3, 888.8f, 9.9f},
-
-                      BatchNormTestParam{{24, 129, 1, 2}, 2, 10000, 10000},
-                      BatchNormTestParam{{24, 129, 1, 2}, 3, 10000, 10000},
-
-                      // Feature on low dimension to trigger relayout, test
-                      // internal logical to physical dimension calculation
-                      // is correct after relayout.
-                      BatchNormTestParam{{1, 2, 3, 4}, 0, 100, 100}));
-
-XLA_TEST_F(BatchNormTest, BasicTraining) {
-  const int kFeatureIndex = 3;
-  ComputationBuilder builder(client_, TestName());
-
-  auto operand = builder.ConstantR4FromArray4D<float>(
-      {{{{1.f, 2.f}}, {{3.f, 4.f}}}, {{{5.f, 6.f}}, {{7.f, 8.f}}}});
-
-  auto scale = builder.ConstantR1<float>({2.0f, 3.0f});
-
-  auto offset = builder.ConstantR1<float>({1.0f, 2.0f});
-
-  auto tuple = builder.BatchNormTraining(operand, scale, offset,
-                                         /*epsilon=*/0.001, kFeatureIndex);
-
-  auto expected = *Literal::MakeTuple(
-      {Literal::CreateR4<float>({{{{-1.6f, -2.0f}}, {{0.1f, 0.6f}}},
-                                 {{{1.9f, 3.3f}}, {{3.7f, 6.0f}}}})
-           .get(),
-       Literal::CreateR1<float>({4, 5}).get(),
-       Literal::CreateR1<float>({5, 5}).get()});
-
-  ComputeAndCompareTuple(&builder, expected, {}, ErrorSpec(0.1));
-}
-
-XLA_TEST_F(BatchNormTest, BasicTrainingOnSublane) {
-  const int kFeatureIndex = 2;
-  ComputationBuilder builder(client_, TestName());
-
-  auto operand = builder.ConstantR4FromArray4D<float>(
-      {{{{1.f}, {2.f}}, {{3.f}, {4.f}}}, {{{5.f}, {6.f}}, {{7.f}, {8.f}}}});
-
-  auto scale = builder.ConstantR1<float>({2.0f, 3.0f});
-
-  auto offset = builder.ConstantR1<float>({1.0f, 2.0f});
-
-  auto tuple = builder.BatchNormTraining(operand, scale, offset,
-                                         /*epsilon=*/0.001, kFeatureIndex);
-
-  auto expected = *Literal::MakeTuple(
-      {Literal::CreateR4<float>({{{{-1.6f}, {-2.0f}}, {{0.1f}, {0.6f}}},
-                                 {{{1.9f}, {3.3f}}, {{3.7f}, {6.0f}}}})
-           .get(),
-       Literal::CreateR1<float>({4, 5}).get(),
-       Literal::CreateR1<float>({5, 5}).get()});
-
-  ComputeAndCompareTuple(&builder, expected, {}, ErrorSpec(0.1));
-}
-
-XLA_TEST_F(BatchNormTest, DISABLED_ON_GPU(TrainingWithFeatureOnLowDimension)) {
-  // Use 0 dimension as feature, tests layout analyzer.
-  const int kFeatureIndex = 0;
-  ComputationBuilder builder(client_, TestName());
-
-  ComputationDataHandle h0;
-  auto operand = CreateR3Parameter<float>(Array3D<float>(260, 2, 2, 1.0f),
-                                          /*parameter_number=*/0, "operand",
-                                          &builder, &h0);
-  ComputationDataHandle h1;
-  auto scale =
-      CreateR1Parameter<float>(std::vector<float>(260, 1.0f),
-                               /*parameter_number=*/1, "scale", &builder, &h1);
-  ComputationDataHandle h2;
-  auto offset =
-      CreateR1Parameter<float>(std::vector<float>(260, 1.0f),
-                               /*parameter_number=*/2, "offset", &builder, &h2);
-
-  auto tuple = builder.BatchNormTraining(h0, h1, h2,
-                                         /*epsilon=*/1, kFeatureIndex);
-
-  auto expected = *Literal::MakeTuple(
-      {Literal::CreateR3FromArray3D<float>(Array3D<float>(260, 2, 2, 1.0f))
-           .get(),
-       Literal::CreateR1<float>(std::vector<float>(260, 1.0f)).get(),
-       Literal::CreateR1<float>(std::vector<float>(260, 0.0f)).get()});
-
-  ComputeAndCompareTuple(&builder, expected,
-                         {operand.get(), scale.get(), offset.get()},
-                         ErrorSpec(0.1));
-}
-
-XLA_TEST_F(BatchNormTest, LargeEpsilonTest) {
-  // Test the correctness of choosing a large epsilon value.
-  const int kFeatureIndex = 2;
-  ComputationBuilder builder(client_, TestName());
-
-  ComputationDataHandle h0;
-  auto operand = CreateR3Parameter<float>({{{0.0f}, {10.0f}, {20.0f}, {30.0f}}},
-                                          /*parameter_number=*/0, "operand",
-                                          &builder, &h0);
-  ComputationDataHandle h1;
-  auto scale =
-      CreateR1Parameter<float>(std::vector<float>(1, 1.0f),
-                               /*parameter_number=*/1, "scale", &builder, &h1);
-  ComputationDataHandle h2;
-  auto offset =
-      CreateR1Parameter<float>(std::vector<float>(1, 0.0f),
-                               /*parameter_number=*/2, "offset", &builder, &h2);
-
-  // var = 125, mean = 15, epsilon = -100
-  auto tuple = builder.BatchNormTraining(h0, h1, h2,
-                                         /*epsilon=*/-100, kFeatureIndex);
-
-  auto expected = *Literal::MakeTuple(
-      {Literal::CreateR3FromArray3D<float>({{{-3.0f}, {-1.0f}, {1.0f}, {3.0f}}})
-           .get(),
-       Literal::CreateR1<float>(std::vector<float>(1, 15.0f)).get(),
-       Literal::CreateR1<float>(std::vector<float>(1, 125.0f)).get()});
-
-  ComputeAndCompareTuple(&builder, expected,
-                         {operand.get(), scale.get(), offset.get()},
-                         ErrorSpec(0.1));
-}
-
-XLA_TEST_F(BatchNormTest, BatchNormGradBasic) {
-  const int kFeatureIndex = 2;
-  ComputationBuilder builder(client_, TestName());
-
-  auto operand =
-      builder.ConstantR4FromArray4D<float>(Array4D<float>(2, 2, 2, 1, 0.0f));
-
-  auto scale = builder.ConstantR1<float>({1.0f, 1.0f});
-
-  auto mean = builder.ConstantR1<float>({0.0f, 0.0f});
-
-  auto var = builder.ConstantR1<float>({1.0f, 1.0f});
-
-  auto grad_output = builder.ConstantR4FromArray4D<float>(
-      {{{{1.f}, {2.f}}, {{3.f}, {4.f}}}, {{{5.f}, {6.f}}, {{7.f}, {8.f}}}});
-
-  builder.BatchNormGrad(operand, scale, mean, var, grad_output,
-                        /*epsilon=*/0.0, kFeatureIndex);
-
-  auto expected = *Literal::MakeTuple(
-      {Literal::CreateR4<float>({{{{-3.f}, {-3.f}}, {{-1.f}, {-1.f}}},
-                                 {{{1.f}, {1.f}}, {{3.f}, {3.f}}}})
-           .get(),
-       Literal::CreateR1<float>({0, 0}).get(),
-       Literal::CreateR1<float>({16, 20}).get()});
-
-  ComputeAndCompareTuple(&builder, expected, {}, ErrorSpec(0.1));
-}
-
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/bfloat16_test.cc b/tensorflow/compiler/xla/tests/bfloat16_test.cc
index a1c53ef2aa95c7d2a9d46483dfda22a05ff0cf1a..e47fcad475bb176a7b4598daf2c98897eb34182b 100644
--- a/tensorflow/compiler/xla/tests/bfloat16_test.cc
+++ b/tensorflow/compiler/xla/tests/bfloat16_test.cc
@@ -61,6 +61,15 @@ XLA_TEST_F(Bfloat16Test, ScalarOperation) {
                                 error_spec_);
 }
 
+XLA_TEST_F(Bfloat16Test, LogOperation) {
+  ComputationBuilder builder(client_, TestName());
+  auto x = builder.ConstantR0<bfloat16>(static_cast<bfloat16>(4.0f));
+  builder.Log(x);
+
+  ComputeAndCompareR0<bfloat16>(&builder, static_cast<bfloat16>(1.387f), {},
+                                error_spec_);
+}
+
 XLA_TEST_F(Bfloat16Test, NegateScalarF16) {
   ComputationBuilder builder(client_, TestName());
   builder.Neg(builder.ConstantR0<bfloat16>(static_cast<bfloat16>(2.1f)));
@@ -88,7 +97,7 @@ XLA_TEST_F(Bfloat16Test, BatchNormTraining) {
   auto tuple = builder.BatchNormTraining(operand, scale, offset,
                                          /*epsilon=*/0.001, kFeatureIndex);
 
-  auto expected = *Literal::MakeTuple(
+  auto expected = Literal::MakeTuple(
       {Literal::CreateR4<bfloat16>(
            {{{{static_cast<bfloat16>(-1.7f)}, {static_cast<bfloat16>(-2.04f)}},
              {{static_cast<bfloat16>(0.105f)}, {static_cast<bfloat16>(0.65f)}}},
@@ -102,7 +111,7 @@ XLA_TEST_F(Bfloat16Test, BatchNormTraining) {
            {static_cast<bfloat16>(5), static_cast<bfloat16>(5)})
            .get()});
 
-  ComputeAndCompareTuple(&builder, expected, {}, ErrorSpec(0.01));
+  ComputeAndCompareTuple(&builder, *expected, {}, ErrorSpec(0.01));
 }
 
 XLA_TEST_F(Bfloat16Test, BatchNormGrad) {
@@ -130,7 +139,7 @@ XLA_TEST_F(Bfloat16Test, BatchNormGrad) {
   builder.BatchNormGrad(operand, scale, mean, var, grad_output,
                         /*epsilon=*/0.0, kFeatureIndex);
 
-  auto expected = *Literal::MakeTuple(
+  auto expected = Literal::MakeTuple(
       {Literal::CreateR4<bfloat16>(
            {{{{static_cast<bfloat16>(-3.f)}, {static_cast<bfloat16>(-3.f)}},
              {{static_cast<bfloat16>(-1.f)}, {static_cast<bfloat16>(-1.f)}}},
@@ -144,7 +153,7 @@ XLA_TEST_F(Bfloat16Test, BatchNormGrad) {
            {static_cast<bfloat16>(16), static_cast<bfloat16>(20)})
            .get()});
 
-  ComputeAndCompareTuple(&builder, expected, {}, ErrorSpec(0.01));
+  ComputeAndCompareTuple(&builder, *expected, {}, ErrorSpec(0.01));
 }
 
 }  // namespace
diff --git a/tensorflow/compiler/xla/tests/broadcast_test.cc b/tensorflow/compiler/xla/tests/broadcast_test.cc
index 0294628a127c9d506e6387d0b80f3da583c5a174..6ebbf7191833ef85ee4a48cc96c0a3be38c71228 100644
--- a/tensorflow/compiler/xla/tests/broadcast_test.cc
+++ b/tensorflow/compiler/xla/tests/broadcast_test.cc
@@ -87,11 +87,11 @@ XLA_TEST_F(BroadcastTest, BroadcastVectorTo2D) {
 
   LiteralTestUtil::ExpectNear(
       *Literal::CreateR2<float>({{1.0, 1.0}, {2.0, 2.0}, {3.0, 3.0}}),
-      result->tuple_literals(0), error_spec_);
+      LiteralView::Create(*result, {0}), error_spec_);
 
   LiteralTestUtil::ExpectNear(
       *Literal::CreateR2<float>({{1.0, 2.0, 3.0}, {1.0, 2.0, 3.0}}),
-      result->tuple_literals(1), error_spec_);
+      LiteralView::Create(*result, {1}), error_spec_);
 }
 
 XLA_TEST_F(BroadcastTest, Broadcast2DTo2D) {
diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.cc b/tensorflow/compiler/xla/tests/client_library_test_base.cc
index 15bd273e9b69f9c177a4ec6b5c9f0e1dccee7fc1..7c9494f133f3db3733fc2ffa4dacfb9a71dd01d8 100644
--- a/tensorflow/compiler/xla/tests/client_library_test_base.cc
+++ b/tensorflow/compiler/xla/tests/client_library_test_base.cc
@@ -251,8 +251,17 @@ ClientLibraryTestBase::ComputeAndCompareLiteralWithAllInputLayouts(
 
 tensorflow::Status ClientLibraryTestBase::ComputeAndCompareLiteralWithStatus(
     ComputationBuilder* builder, const Literal& expected,
-    tensorflow::gtl::ArraySlice<GlobalData*> arguments,
+    tensorflow::gtl::ArraySlice<GlobalData*> arguments_passed_in,
     const Shape* shape_with_layout) {
+  std::vector<GlobalData*> arguments(arguments_passed_in.begin(),
+                                     arguments_passed_in.end());
+  if (!arguments_.empty()) {
+    CHECK(arguments.empty());
+    for (const auto& argument : arguments_) {
+      arguments.push_back(argument.get());
+    }
+  }
+
   TF_ASSIGN_OR_RETURN(auto computation, builder->Build());
   if (ShapeUtil::ElementIsFloating(expected.shape()) ||
       ShapeUtil::ElementIsComplex(expected.shape())) {
@@ -267,12 +276,17 @@ tensorflow::Status ClientLibraryTestBase::ComputeAndCompareLiteralWithStatus(
   const Literal* expected_ptr = &expected;
   std::unique_ptr<Literal> converted_expected;
   Shape layout_shape;
-  if (expected.shape().element_type() == F32 && use_bfloat16_) {
+  if (use_bfloat16_) {
     converted_expected = LiteralTestUtil::ConvertF32ToBF16(expected);
     expected_ptr = converted_expected.get();
     if (shape_with_layout != nullptr) {
       layout_shape = *shape_with_layout;
-      layout_shape.set_element_type(BF16);
+      ShapeUtil::ForEachMutableSubshape(
+          &layout_shape, [&](Shape* subshape, const ShapeIndex& /*index*/) {
+            if (subshape->element_type() == F32) {
+              subshape->set_element_type(BF16);
+            }
+          });
       shape_with_layout = &layout_shape;
     }
   }
@@ -295,8 +309,17 @@ tensorflow::Status ClientLibraryTestBase::ComputeAndCompareLiteralWithStatus(
 
 tensorflow::Status ClientLibraryTestBase::ComputeAndCompareLiteralWithStatus(
     ComputationBuilder* builder, const Literal& expected,
-    tensorflow::gtl::ArraySlice<GlobalData*> arguments, ErrorSpec error,
-    const Shape* shape_with_layout) {
+    tensorflow::gtl::ArraySlice<GlobalData*> arguments_passed_in,
+    ErrorSpec error, const Shape* shape_with_layout) {
+  std::vector<GlobalData*> arguments(arguments_passed_in.begin(),
+                                     arguments_passed_in.end());
+  if (!arguments_.empty()) {
+    CHECK(arguments.empty());
+    for (const auto& argument : arguments_) {
+      arguments.push_back(argument.get());
+    }
+  }
+
   TF_RET_CHECK(ShapeUtil::ElementIsFloating(expected.shape()) ||
                ShapeUtil::ElementIsComplex(expected.shape()));
   TF_ASSIGN_OR_RETURN(auto computation, builder->Build());
@@ -305,13 +328,17 @@ tensorflow::Status ClientLibraryTestBase::ComputeAndCompareLiteralWithStatus(
   const Literal* expected_ptr = &expected;
   std::unique_ptr<Literal> converted_expected;
   Shape layout_shape;
-  if (expected.shape().element_type() == F32 && use_bfloat16_) {
+  if (use_bfloat16_) {
     converted_expected = LiteralTestUtil::ConvertF32ToBF16(expected);
     expected_ptr = converted_expected.get();
-    layout_shape.set_element_type(BF16);
     if (shape_with_layout != nullptr) {
       layout_shape = *shape_with_layout;
-      layout_shape.set_element_type(BF16);
+      ShapeUtil::ForEachMutableSubshape(
+          &layout_shape, [&](Shape* subshape, const ShapeIndex& /*index*/) {
+            if (subshape->element_type() == F32) {
+              subshape->set_element_type(BF16);
+            }
+          });
       shape_with_layout = &layout_shape;
     }
   }
@@ -348,7 +375,7 @@ void ClientLibraryTestBase::ComputeAndCompareR1U8(
   VLOG(1) << "expected: " << expected_literal->ToString();
   VLOG(1) << "actual:   " << actual->ToString();
 
-  EXPECT_EQ(expected, actual->u8s_string());
+  EXPECT_EQ(expected, actual->GetR1U8AsString());
 }
 
 void ClientLibraryTestBase::ComputeAndCompareTuple(
@@ -499,17 +526,41 @@ std::unique_ptr<GlobalData>
 ClientLibraryTestBase::CreateParameterAndTransferLiteral(
     int64 parameter_number, const Literal& literal, const string& name,
     ComputationBuilder* builder, ComputationDataHandle* data_handle) {
+  return CreateParameterAndTransferLiteral(parameter_number, literal, name,
+                                           nullptr, builder, data_handle);
+}
+
+std::unique_ptr<GlobalData>
+ClientLibraryTestBase::CreateParameterAndTransferLiteral(
+    int64 parameter_number, const Literal& literal, const string& name,
+    const DeviceHandle* device_handle, ComputationBuilder* builder,
+    ComputationDataHandle* data_handle) {
   const Literal* param_literal = &literal;
   std::unique_ptr<Literal> converted_literal;
-  if (use_bfloat16_ && literal.shape().element_type() == F32) {
+  if (use_bfloat16_) {
     converted_literal = LiteralTestUtil::ConvertF32ToBF16(literal);
     param_literal = converted_literal.get();
   }
   std::unique_ptr<GlobalData> data =
-      client_->TransferToServer(*param_literal).ConsumeValueOrDie();
+      client_->TransferToServer(*param_literal, device_handle)
+          .ConsumeValueOrDie();
   *data_handle =
       builder->Parameter(parameter_number, param_literal->shape(), name);
   return data;
 }
 
+ComputationDataHandle ClientLibraryTestBase::AddParam(
+    const Literal& argument, ComputationBuilder* builder) {
+  ComputationDataHandle data_handle;
+  arguments_.push_back(CreateParameterAndTransferLiteral(
+      arguments_.size(), argument, "", builder, &data_handle));
+  return data_handle;
+}
+
+ComputationDataHandle ClientLibraryTestBase::CreateConstantFromLiteral(
+    const Literal& literal, ComputationBuilder* builder) {
+  return builder->ConstantLiteral(
+      use_bfloat16_ ? *LiteralTestUtil::ConvertF32ToBF16(literal) : literal);
+}
+
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h
index 1d27880fb1413adbbe691b5d12cadcd85fbe5d92..a559a653df89f3b99bd87665a7f2ccf99afa54e0 100644
--- a/tensorflow/compiler/xla/tests/client_library_test_base.h
+++ b/tensorflow/compiler/xla/tests/client_library_test_base.h
@@ -43,6 +43,23 @@ limitations under the License.
 
 namespace xla {
 
+// Sets the use_bfloat16 on a container of test cases according to the values in
+// use_bfloat16_params. Generates one set of test cases for each values in
+// use_bfloat16_params with that value. Returns the result.
+template <typename TestCase>
+std::vector<TestCase> ExpandUseBfloat16(
+    tensorflow::gtl::ArraySlice<bool> use_bfloat16_params,
+    tensorflow::gtl::ArraySlice<TestCase> specs) {
+  std::vector<TestCase> expanded;
+  for (bool use_bfloat16 : use_bfloat16_params) {
+    for (const auto& spec : specs) {
+      expanded.push_back(spec);
+      expanded.back().use_bfloat16 = use_bfloat16;
+    }
+  }
+  return expanded;
+}
+
 // A client library test establishes an in-process XLA client connection.
 class ClientLibraryTestBase : public ::testing::Test {
  protected:
@@ -194,7 +211,7 @@ class ClientLibraryTestBase : public ::testing::Test {
       tensorflow::gtl::ArraySlice<GlobalData*> arguments);
   void ComputeAndCompareTuple(
       ComputationBuilder* builder, const Literal& expected,
-      tensorflow::gtl::ArraySlice<GlobalData*> arguments, ErrorSpec abs_error);
+      tensorflow::gtl::ArraySlice<GlobalData*> arguments, ErrorSpec error);
 
   // Convenience method for running a built computation and comparing the result
   // with the HloEvaluator.
@@ -253,6 +270,51 @@ class ClientLibraryTestBase : public ::testing::Test {
       int64 parameter_number, const Literal& literal, const string& name,
       ComputationBuilder* builder, ComputationDataHandle* data_handle);
 
+  // As above, but the caller can specify the device that the literal is
+  // transferred to. If device_handle is nullptr, the literal will be
+  // transferred to the default device.
+  std::unique_ptr<GlobalData> CreateParameterAndTransferLiteral(
+      int64 parameter_number, const Literal& literal, const string& name,
+      const DeviceHandle* device_handle, ComputationBuilder* builder,
+      ComputationDataHandle* data_handle);
+
+  // Creates a parameter instruction and sets the value that will be passed to
+  // the computation as specified. This function must be used for all parameters
+  // or none and no parameters must be passed when invoking the computation if
+  // using this mechanism. If using this mechanism, then each parameter must be
+  // set exactly once. The first added parameter gets index 0, then 1 and so on.
+  ComputationDataHandle AddParam(const Literal& argument,
+                                 ComputationBuilder* builder);
+
+  template <class T>
+  ComputationDataHandle AddParam(const Array<T>& argument,
+                                 ComputationBuilder* builder) {
+    return AddParam(*Literal::CreateFromArray(argument), builder);
+  }
+
+  // Creates a constant instruction with the given literal. When the
+  // use_bfloat16 flag is set but the literal has F32 elements, the elements
+  // will be converted to BF16s.
+  ComputationDataHandle CreateConstantFromLiteral(const Literal& literal,
+                                                  ComputationBuilder* builder);
+
+  // Creates a constant instruction with the given array. When the use_bfloat16
+  // flag is set but the array has float elements, the elements will be
+  // converted to bfloat16s.
+  template <typename NativeT>
+  ComputationDataHandle CreateConstantFromArray(const Array<NativeT>& array,
+                                                ComputationBuilder* builder) {
+    return CreateConstantFromLiteral(*Literal::CreateFromArray(array), builder);
+  }
+
+  // Same as CreateConstantFromArray, but for scalars.
+  template <typename NativeT>
+  ComputationDataHandle CreateConstantFromScalar(NativeT value,
+                                                 ComputationBuilder* builder) {
+    return CreateConstantFromLiteral(*Literal::CreateR0<NativeT>(value),
+                                     builder);
+  }
+
   // Creates a parameter instruction that wraps a given value and then stores
   // into "data_handle" the global handle for that parameter.
   //
@@ -315,6 +377,9 @@ class ClientLibraryTestBase : public ::testing::Test {
   bool use_bfloat16() const { return use_bfloat16_; }
   void set_use_bfloat16(bool value) { use_bfloat16_ = value; }
 
+  // The float type used in this test, BF16 or F32 according to use_bfloat16.
+  PrimitiveType FloatType() const { return use_bfloat16_ ? BF16 : F32; }
+
   Client* client_;
   ExecutionOptions execution_options_;
 
@@ -344,6 +409,9 @@ class ClientLibraryTestBase : public ::testing::Test {
   // Whether to run tests with all float-type input/output converted to
   // bfloat16.
   bool use_bfloat16_ = false;
+
+  // Arguments to be passed to the computation when it runs.
+  std::vector<std::unique_ptr<GlobalData>> arguments_;
 };
 
 template <typename NativeT>
diff --git a/tensorflow/compiler/xla/tests/client_test.cc b/tensorflow/compiler/xla/tests/client_test.cc
index 8853ed9e5780672d4006c326291767b8b5253f56..045148cdd11da94ae4789a753efca95c6aaa1f27 100644
--- a/tensorflow/compiler/xla/tests/client_test.cc
+++ b/tensorflow/compiler/xla/tests/client_test.cc
@@ -36,7 +36,7 @@ namespace {
 
 class ClientTest : public ClientLibraryTestBase {};
 
-TEST_F(ClientTest, ExecuteWithLayout) {
+XLA_TEST_F(ClientTest, ExecuteWithLayout) {
   ComputationBuilder b(client_, TestName());
 
   std::vector<std::vector<int64>> layouts = {{0, 1}, {1, 0}};
@@ -68,7 +68,7 @@ TEST_F(ClientTest, ExecuteWithLayout) {
   }
 }
 
-TEST_F(ClientTest, ExecuteWithTupleLayout) {
+XLA_TEST_F(ClientTest, ExecuteWithTupleLayout) {
   ComputationBuilder b(client_, TestName());
 
   b.Tuple({b.ConstantR2<int32>({{1, 2}, {3, 4}}),
@@ -90,9 +90,9 @@ TEST_F(ClientTest, ExecuteWithTupleLayout) {
       auto result,
       client_->ExecuteAndTransfer(computation, {}, &execution_options));
   LiteralTestUtil::ExpectR2Equal<int32>({{1, 2}, {3, 4}},
-                                        result->tuple_literals(0));
+                                        LiteralView::Create(*result, {0}));
   LiteralTestUtil::ExpectR2Equal<int32>({{10, 20}, {30, 40}},
-                                        result->tuple_literals(1));
+                                        LiteralView::Create(*result, {1}));
 
   EXPECT_TRUE(ShapeUtil::IsTuple(result->shape()));
   EXPECT_EQ(2, ShapeUtil::TupleElementCount(result->shape()));
@@ -107,7 +107,8 @@ TEST_F(ClientTest, ExecuteWithTupleLayout) {
                                      /*minor_to_major=*/{1, 0})));
 }
 
-TEST_F(ClientTest, DISABLED_ON_CPU_PARALLEL(DISABLED_ON_GPU(ExecuteParallel))) {
+XLA_TEST_F(ClientTest,
+        DISABLED_ON_CPU_PARALLEL(DISABLED_ON_GPU(ExecuteParallel))) {
   Computation add_with_one_arg, mul_with_two_args, dot_with_one_arg;
   Shape shape = ShapeUtil::MakeShape(S32, {2, 2});
 
diff --git a/tensorflow/compiler/xla/tests/compute_constant_test.cc b/tensorflow/compiler/xla/tests/compute_constant_test.cc
index 5226a78386824a94572d3e5cc3329677108a910a..ec2c580670cfac14ba42e8c9a836c86551af4b89 100644
--- a/tensorflow/compiler/xla/tests/compute_constant_test.cc
+++ b/tensorflow/compiler/xla/tests/compute_constant_test.cc
@@ -149,7 +149,7 @@ TEST_F(ComputeConstantTest, Param) {
     auto computation = b.Add(param, b.ConstantR0<float>(1.5f));
 
     std::vector<Literal> arguments;
-    arguments.emplace_back(*Literal::CreateR0(42.5f));
+    arguments.push_back(std::move(*Literal::CreateR0(42.5f)));
     EXPECT_TRUE(IsConstant(computation, &b, arguments.size()));
 
     auto value =
@@ -168,7 +168,7 @@ TEST_F(ComputeConstantTest, DirectParamMissing) {
 
     auto value = ComputeConstantScalar<float>(client, computation, &b);
     EXPECT_TRUE(tensorflow::StringPiece(value.status().ToString())
-                    .contains("depends on parameter"))
+                    .contains("depends on a parameter"))
         << value.status();
   }
 }
@@ -184,7 +184,7 @@ TEST_F(ComputeConstantTest, IndirectParamMissing) {
 
     auto value = ComputeConstantScalar<float>(client, computation, &b);
     EXPECT_TRUE(tensorflow::StringPiece(value.status().ToString())
-                    .contains("depends on parameter"))
+                    .contains("depends on a parameter"))
         << value.status();
   }
 }
diff --git a/tensorflow/compiler/xla/tests/conditional_test.cc b/tensorflow/compiler/xla/tests/conditional_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..0016b6cc614469d7ac9b40b740d163a7a4f32abf
--- /dev/null
+++ b/tensorflow/compiler/xla/tests/conditional_test.cc
@@ -0,0 +1,553 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/client/computation_builder.h"
+#include "tensorflow/compiler/xla/tests/client_library_test_base.h"
+#include "tensorflow/compiler/xla/tests/literal_test_util.h"
+#include "tensorflow/compiler/xla/tests/test_macros.h"
+
+namespace xla {
+namespace {
+
+class ConditionalOpTest : public ClientLibraryTestBase {
+ protected:
+  Computation CreateR0ConstantComputation(float value) {
+    ComputationBuilder builder(client_, "Constant");
+    builder.Parameter(0, empty_tuple_, "tuple");
+    builder.ConstantR0<float>(value);
+    auto build_status = builder.Build();
+    EXPECT_IS_OK(build_status.status());
+    return build_status.ConsumeValueOrDie();
+  }
+
+  Computation CreateR0IdentityComputation() {
+    ComputationBuilder builder(client_, "Identity");
+    builder.Parameter(0, r0f32_, "x");
+    auto build_status = builder.Build();
+    EXPECT_IS_OK(build_status.status());
+    return build_status.ConsumeValueOrDie();
+  }
+
+  Computation CreateCeilComputation(const Shape& shape) {
+    ComputationBuilder builder(client_, "Ceil");
+    auto param = builder.Parameter(0, shape, "param");
+    builder.Ceil(param);
+    auto build_status = builder.Build();
+    EXPECT_IS_OK(build_status.status());
+    return build_status.ConsumeValueOrDie();
+  }
+
+  Computation CreateR0CeilComputation() {
+    return CreateCeilComputation(r0f32_);
+  }
+
+  Computation CreateR1CeilComputation() {
+    return CreateCeilComputation(r1s2f32_);
+  }
+
+  Computation CreateFloorComputation(const Shape& shape) {
+    ComputationBuilder builder(client_, "Floor");
+    auto param = builder.Parameter(0, shape, "param");
+    builder.Floor(param);
+    auto build_status = builder.Build();
+    EXPECT_IS_OK(build_status.status());
+    return build_status.ConsumeValueOrDie();
+  }
+
+  Computation CreateR0FloorComputation() {
+    return CreateFloorComputation(r0f32_);
+  }
+
+  Computation CreateR1FloorComputation() {
+    return CreateFloorComputation(r1s2f32_);
+  }
+
+  Computation CreateTupleCeilComputation(const string& computation_name,
+                                         const Shape& tuple_shape) {
+    ComputationBuilder builder(client_, computation_name);
+    auto tuple = builder.Parameter(0, tuple_shape, "tuple");
+    auto x = builder.GetTupleElement(tuple, 0);
+    auto y = builder.GetTupleElement(tuple, 1);
+    auto x_ceil = builder.Ceil(x);
+    auto y_ceil = builder.Ceil(y);
+    builder.Tuple({x_ceil, y_ceil});
+    auto build_status = builder.Build();
+    EXPECT_IS_OK(build_status.status());
+    return build_status.ConsumeValueOrDie();
+  }
+
+  Computation CreateR0TupleCeilComputation() {
+    return CreateTupleCeilComputation("CeilR0", tuple_2_r0f32_);
+  }
+
+  Computation CreateR1TupleCeilComputation() {
+    return CreateTupleCeilComputation("CeilR1", tuple_2_r1s2f32_);
+  }
+
+  Computation CreateTupleFloorComputation(const string& computation_name,
+                                          const Shape& tuple_shape) {
+    ComputationBuilder builder(client_, computation_name);
+    auto tuple = builder.Parameter(0, tuple_shape, "tuple");
+    auto x = builder.GetTupleElement(tuple, 0);
+    auto y = builder.GetTupleElement(tuple, 1);
+    auto x_floor = builder.Floor(x);
+    auto y_floor = builder.Floor(y);
+    builder.Tuple({x_floor, y_floor});
+    auto build_status = builder.Build();
+    EXPECT_IS_OK(build_status.status());
+    return build_status.ConsumeValueOrDie();
+  }
+
+  Computation CreateR0TupleFloorComputation() {
+    return CreateTupleFloorComputation("FloorR0", tuple_2_r0f32_);
+  }
+
+  Computation CreateR1TupleFloorComputation() {
+    return CreateTupleFloorComputation("FloorR1", tuple_2_r1s2f32_);
+  }
+
+  Computation CreateTupleAddComputation(const string& computation_name,
+                                        const Shape& tuple_shape) {
+    ComputationBuilder builder(client_, computation_name);
+    auto tuple = builder.Parameter(0, tuple_shape, "tuple");
+    auto x = builder.GetTupleElement(tuple, 0);
+    auto y = builder.GetTupleElement(tuple, 1);
+    builder.Add(x, y);
+    auto build_status = builder.Build();
+    EXPECT_IS_OK(build_status.status());
+    return build_status.ConsumeValueOrDie();
+  }
+
+  Computation CreateR0TupleAddComputation() {
+    return CreateTupleAddComputation("AddR0", tuple_2_r0f32_);
+  }
+
+  Computation CreateR1TupleAddComputation() {
+    return CreateTupleAddComputation("AddR1", tuple_2_r1s2f32_);
+  }
+
+  Computation CreateTupleSubComputation(const string& computation_name,
+                                        const Shape& tuple_shape) {
+    ComputationBuilder builder(client_, computation_name);
+    auto tuple = builder.Parameter(0, tuple_shape, "tuple");
+    auto x = builder.GetTupleElement(tuple, 0);
+    auto y = builder.GetTupleElement(tuple, 1);
+    builder.Sub(x, y);
+    auto build_status = builder.Build();
+    EXPECT_IS_OK(build_status.status());
+    return build_status.ConsumeValueOrDie();
+  }
+
+  Computation CreateR0TupleSubComputation() {
+    return CreateTupleSubComputation("SubR0", tuple_2_r0f32_);
+  }
+
+  Computation CreateR1TupleSubComputation() {
+    return CreateTupleSubComputation("SubR1", tuple_2_r1s2f32_);
+  }
+
+  Shape r0f32_ = ShapeUtil::MakeShape(F32, {});
+  Shape r1s2f32_ = ShapeUtil::MakeShape(F32, {2});
+  Shape tuple_2_r0f32_ = ShapeUtil::MakeTupleShape(
+      {ShapeUtil::MakeShape(F32, {}), ShapeUtil::MakeShape(F32, {})});
+  Shape tuple_2_r1s2f32_ = ShapeUtil::MakeTupleShape(
+      {ShapeUtil::MakeShape(F32, {2}), ShapeUtil::MakeShape(F32, {2})});
+  Shape empty_tuple_ = ShapeUtil::MakeTupleShape({});
+  ErrorSpec error_spec_{0.001};
+};
+
+// Test true and false computations that do not take any parameters.
+XLA_TEST_F(ConditionalOpTest, Parameters0) {
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(true);
+  auto operands = builder.Tuple({});
+  auto true_computation = CreateR0ConstantComputation(56.0f);
+  auto false_computation = CreateR0ConstantComputation(12.0f);
+  auto result = builder.Conditional(pred, operands, true_computation, operands,
+                                    false_computation);
+
+  ComputeAndCompareR0<float>(&builder, 56.0f, {}, error_spec_);
+}
+
+// Test true and false computations that take in 1 parameter.
+XLA_TEST_F(ConditionalOpTest, Parameters1) {
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(false);
+  auto operand1 = builder.ConstantR0<float>(56.0f);
+  auto operand2 = builder.ConstantR0<float>(12.0f);
+  auto identity = CreateR0IdentityComputation();
+  auto result =
+      builder.Conditional(pred, operand1, identity, operand2, identity);
+
+  ComputeAndCompareR0<float>(&builder, 12.0f, {}, error_spec_);
+}
+
+// Test conditional with two different computations in the true and false cases
+// that take in different arguments.
+XLA_TEST_F(ConditionalOpTest, DiffComputationsDiffArgs) {
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(false);
+  auto operand1 = builder.ConstantR0<float>(56.4f);
+  auto operand2 = builder.ConstantR0<float>(12.6f);
+  auto result = builder.Conditional(pred, operand1, CreateR0CeilComputation(),
+                                    operand2, CreateR0FloorComputation());
+
+  ComputeAndCompareR0<float>(&builder, 12.0f, {}, error_spec_);
+}
+
+// Test conditional with two different computations in the true and false cases
+// that take in the same arguments.
+XLA_TEST_F(ConditionalOpTest, DiffComputationsSameArg) {
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(false);
+  auto operand = builder.ConstantR0<float>(12.6f);
+  auto result = builder.Conditional(pred, operand, CreateR0CeilComputation(),
+                                    operand, CreateR0FloorComputation());
+
+  ComputeAndCompareR0<float>(&builder, 12.0f, {}, error_spec_);
+}
+
+// Test conditional with the same computation in the true and false cases but
+// take in different arguments.
+XLA_TEST_F(ConditionalOpTest, SameComputationDiffArgs) {
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(false);
+  auto operand1 = builder.ConstantR0<float>(56.4f);
+  auto operand2 = builder.ConstantR0<float>(12.6f);
+  auto floor = CreateR0FloorComputation();
+  auto result = builder.Conditional(pred, operand1, floor, operand2, floor);
+
+  ComputeAndCompareR0<float>(&builder, 12.0f, {}, error_spec_);
+}
+
+// Test conditional with the same computation in the true and false cases that
+// take in the same arguments.
+XLA_TEST_F(ConditionalOpTest, SameComputationSameArg) {
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(false);
+  auto operand = builder.ConstantR0<float>(12.6f);
+  auto floor = CreateR0FloorComputation();
+  auto result = builder.Conditional(pred, operand, floor, operand, floor);
+
+  ComputeAndCompareR0<float>(&builder, 12.0f, {}, error_spec_);
+}
+
+// Test conditional with different instances of the same computation in the true
+// and false cases.
+XLA_TEST_F(ConditionalOpTest, SameComputationDiffInstances) {
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(false);
+  auto operand1 = builder.ConstantR0<float>(56.4f);
+  auto operand2 = builder.ConstantR0<float>(12.6f);
+  auto result = builder.Conditional(pred, operand1, CreateR0FloorComputation(),
+                                    operand2, CreateR0FloorComputation());
+
+  ComputeAndCompareR0<float>(&builder, 12.0f, {}, error_spec_);
+}
+
+// Test the case when a call invokes a computation that contains a conditional.
+XLA_TEST_F(ConditionalOpTest, ConditionalWithCall) {
+  Shape r0bool = ShapeUtil::MakeShape(PRED, {});
+  ComputationBuilder inner_builder(client_, TestName() + ".inner_conditional");
+  auto pred_cond = inner_builder.Parameter(0, r0bool, "param0");
+  auto true_operand = inner_builder.Parameter(1, r0f32_, "param1");
+  auto false_operand = inner_builder.Parameter(2, r0f32_, "param2");
+  inner_builder.Conditional(pred_cond, true_operand, CreateR0CeilComputation(),
+                            false_operand, CreateR0FloorComputation());
+  auto inner_builder_result = inner_builder.Build();
+
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(false);
+  auto operand1 = builder.ConstantR0<float>(56.4f);
+  auto operand2 = builder.ConstantR0<float>(12.6f);
+  builder.Call(inner_builder_result.ConsumeValueOrDie(),
+               {pred, operand1, operand2});
+
+  ComputeAndCompareR0<float>(&builder, 12.0f, {}, error_spec_);
+}
+
+// Test true and false computations that take in 2 parameters and predicate is
+// true.
+XLA_TEST_F(ConditionalOpTest, Parameters2TrueBranch) {
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(true);
+  auto operand1 = builder.ConstantR0<float>(56.0f);
+  auto operand2 = builder.ConstantR0<float>(12.0f);
+  auto operands = builder.Tuple({operand1, operand2});
+  auto result =
+      builder.Conditional(pred, operands, CreateR0TupleAddComputation(),
+                          operands, CreateR0TupleSubComputation());
+
+  ComputeAndCompareR0<float>(&builder, 68.0f, {}, error_spec_);
+}
+
+// Test true and false computations that take in 2 parameters and predicate is
+// false.
+XLA_TEST_F(ConditionalOpTest, Parameters2FalseBranch) {
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(false);
+  auto operand1 = builder.ConstantR0<float>(56.0f);
+  auto operand2 = builder.ConstantR0<float>(12.0f);
+  auto operands = builder.Tuple({operand1, operand2});
+  auto result =
+      builder.Conditional(pred, operands, CreateR0TupleAddComputation(),
+                          operands, CreateR0TupleSubComputation());
+
+  ComputeAndCompareR0<float>(&builder, 44.0f, {}, error_spec_);
+}
+
+// Test true and false computations that take in 2 array parameters and
+// predicate is true.
+XLA_TEST_F(ConditionalOpTest, Parameters2ArrayTrueBranch) {
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(true);
+  auto operand1 = builder.ConstantR1<float>({24.0f, 56.0f});
+  auto operand2 = builder.ConstantR1<float>({10.0f, 11.0f});
+  auto operands = builder.Tuple({operand1, operand2});
+  auto result =
+      builder.Conditional(pred, operands, CreateR1TupleAddComputation(),
+                          operands, CreateR1TupleSubComputation());
+
+  ComputeAndCompareR1<float>(&builder, {34.0f, 67.0f}, {}, error_spec_);
+}
+
+// Test true and false computations that take in 2 array parameters and
+// predicate is false.
+XLA_TEST_F(ConditionalOpTest, Parameters2ArrayFalseBranch) {
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(false);
+  auto operand1 = builder.ConstantR1<float>({24.0f, 56.0f});
+  auto operand2 = builder.ConstantR1<float>({10.0f, 11.0f});
+  auto operands = builder.Tuple({operand1, operand2});
+  auto result =
+      builder.Conditional(pred, operands, CreateR1TupleAddComputation(),
+                          operands, CreateR1TupleSubComputation());
+
+  ComputeAndCompareR1<float>(&builder, {14.0f, 45.0f}, {}, error_spec_);
+}
+
+// Test true and false computations that return a tuple of scalars.
+XLA_TEST_F(ConditionalOpTest, ReturnTupleOfScalars) {
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(false);
+  auto operands = builder.Tuple(
+      {builder.ConstantR0<float>(12.2f), builder.ConstantR0<float>(25.6f)});
+  builder.Conditional(pred, operands, CreateR0TupleCeilComputation(), operands,
+                      CreateR0TupleFloorComputation());
+
+  ComputeAndCompareTuple(
+      &builder,
+      *Literal::MakeTuple({Literal::CreateR0<float>(12.0f).get(),
+                           Literal::CreateR0<float>(25.0f).get()}),
+      {}, error_spec_);
+}
+
+// Test true and false computations that return a tuple of arrays.
+// TODO(b/71715476): Returning tuples from Conditional fails in GPU backend.
+XLA_TEST_F(ConditionalOpTest, DISABLED_ON_GPU(ReturnTupleOfArrays)) {
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(true);
+  auto operands = builder.Tuple({builder.ConstantR1<float>({12.2f, 15.8f}),
+                                 builder.ConstantR1<float>({25.6f, 29.2f})});
+  builder.Conditional(pred, operands, CreateR1TupleCeilComputation(), operands,
+                      CreateR1TupleFloorComputation());
+
+  ComputeAndCompareTuple(
+      &builder,
+      *Literal::MakeTuple({Literal::CreateR1<float>({13.0f, 16.0f}).get(),
+                           Literal::CreateR1<float>({26.0f, 30.0f}).get()}),
+      {}, error_spec_);
+}
+
+// Test true and false computations that return a tuple of a predicate, a
+// scalar, and an array.
+// TODO(b/71715476): Returning tuples from Conditional fails in GPU backend.
+XLA_TEST_F(ConditionalOpTest,
+           DISABLED_ON_GPU(ReturnTupleofPredicateScalarArray)) {
+  ComputationBuilder true_builder(client_, TestName() + ".true");
+  {
+    true_builder.Parameter(0, empty_tuple_, "tuple");
+    auto true_pred = true_builder.ConstantR0<bool>(true);
+    auto true_scalar = true_builder.ConstantR0<float>(12.2f);
+    auto true_array = true_builder.ConstantR1<float>({12.8f, 14.6f});
+    true_builder.Tuple({true_pred, true_scalar, true_array});
+  }
+  auto true_builder_result = true_builder.Build();
+  EXPECT_IS_OK(true_builder_result.status());
+
+  ComputationBuilder false_builder(client_, TestName() + ".false");
+  {
+    false_builder.Parameter(0, empty_tuple_, "tuple");
+    auto false_pred = false_builder.ConstantR0<bool>(false);
+    auto false_scalar = false_builder.ConstantR0<float>(25.6f);
+    auto false_array = false_builder.ConstantR1<float>({26.4f, 32.6f});
+    false_builder.Tuple({false_pred, false_scalar, false_array});
+  }
+  auto false_builder_result = false_builder.Build();
+  EXPECT_IS_OK(false_builder_result.status());
+
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(true);
+  auto operands = builder.Tuple({});
+  builder.Conditional(pred, operands, true_builder_result.ConsumeValueOrDie(),
+                      operands, false_builder_result.ConsumeValueOrDie());
+
+  ComputeAndCompareTuple(
+      &builder,
+      *Literal::MakeTuple({Literal::CreateR0<bool>(true).get(),
+                           Literal::CreateR0<float>(12.2f).get(),
+                           Literal::CreateR1<float>({12.8f, 14.6f}).get()}),
+      {}, error_spec_);
+}
+
+// Test true and false computations that return a nested tuple.
+// TODO(b/71715476): Returning tuples from Conditional fails in GPU backend.
+XLA_TEST_F(ConditionalOpTest, DISABLED_ON_GPU(ReturnNestedTuple)) {
+  ComputationBuilder true_builder(client_, TestName() + ".true");
+  {
+    true_builder.Parameter(0, empty_tuple_, "tuple");
+    auto true_constant1 = true_builder.ConstantR0<float>(12.2f);
+    auto true_constant2 = true_builder.ConstantR1<float>({12.8f, 14.6f});
+    auto true_constant3 = true_builder.ConstantR1<float>({25.4f, 29.8f});
+    auto true_constant4 = true_builder.ConstantR0<float>(35.6f);
+    true_builder.Tuple({true_builder.Tuple({true_constant1, true_constant2}),
+                        true_builder.Tuple({true_constant3, true_constant4})});
+  }
+  auto true_builder_result = true_builder.Build();
+  EXPECT_IS_OK(true_builder_result.status());
+
+  ComputationBuilder false_builder(client_, TestName() + ".false");
+  {
+    false_builder.Parameter(0, empty_tuple_, "tuple");
+    auto false_constant1 = false_builder.ConstantR0<float>(46.6f);
+    auto false_constant2 = false_builder.ConstantR1<float>({54.4f, 58.4f});
+    auto false_constant3 = false_builder.ConstantR1<float>({62.1f, 67.4f});
+    auto false_constant4 = false_builder.ConstantR0<float>(9.3f);
+    false_builder.Tuple(
+        {false_builder.Tuple({false_constant1, false_constant2}),
+         false_builder.Tuple({false_constant3, false_constant4})});
+  }
+  auto false_builder_result = false_builder.Build();
+  EXPECT_IS_OK(false_builder_result.status());
+
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(false);
+  auto operands = builder.Tuple({});
+  builder.Conditional(pred, operands, true_builder_result.ConsumeValueOrDie(),
+                      operands, false_builder_result.ConsumeValueOrDie());
+
+  ComputeAndCompareTuple(
+      &builder,
+      *Literal::MakeTuple(
+          {Literal::MakeTuple({Literal::CreateR0<float>(46.6f).get(),
+                               Literal::CreateR1<float>({54.4f, 58.4f}).get()})
+               .get(),
+           Literal::MakeTuple({Literal::CreateR1<float>({62.1f, 67.4f}).get(),
+                               Literal::CreateR0<float>(9.3f).get()})
+               .get()}),
+      {}, error_spec_);
+}
+
+// Test conditional that takes in scalar operands in the form of external
+// params.
+XLA_TEST_F(ConditionalOpTest, ScalarOperandsFromExternalParams) {
+  Shape r0bool = ShapeUtil::MakeShape(PRED, {});
+  ComputationBuilder builder(client_, TestName());
+
+  ComputationDataHandle pred, operand1, operand2;
+  auto pred_arg = CreateR0Parameter<bool>(true, 0, "pred", &builder, &pred);
+  auto operand1_param =
+      CreateR0Parameter<float>(56.3f, 1, "operand1", &builder, &operand1);
+  auto operand2_param =
+      CreateR0Parameter<float>(12.7f, 2, "operand2", &builder, &operand2);
+  auto result = builder.Conditional(pred, operand1, CreateR0CeilComputation(),
+                                    operand2, CreateR0FloorComputation());
+
+  ComputeAndCompareR0<float>(
+      &builder, 57.0f,
+      {pred_arg.get(), operand1_param.get(), operand2_param.get()},
+      error_spec_);
+}
+
+// Test conditional that takes in array operands in the form of external params.
+XLA_TEST_F(ConditionalOpTest, ArrayOperandsFromExternalParams) {
+  Shape r0bool = ShapeUtil::MakeShape(PRED, {});
+  ComputationBuilder builder(client_, TestName());
+
+  ComputationDataHandle pred, operand1, operand2;
+  auto pred_arg = CreateR0Parameter<bool>(false, 0, "pred", &builder, &pred);
+  auto operand1_param = CreateR1Parameter<float>({24.3f, 56.7f}, 1, "operand1",
+                                                 &builder, &operand1);
+  auto operand2_param = CreateR1Parameter<float>({10.2f, 11.6f}, 2, "operand2",
+                                                 &builder, &operand2);
+  auto result = builder.Conditional(pred, operand1, CreateR1CeilComputation(),
+                                    operand2, CreateR1FloorComputation());
+
+  ComputeAndCompareR1<float>(
+      &builder, {10.0f, 11.0f},
+      {pred_arg.get(), operand1_param.get(), operand2_param.get()},
+      error_spec_);
+}
+
+// Test the case where one conditional is nested within another.
+XLA_TEST_F(ConditionalOpTest, NestedConditionals) {
+  ComputationBuilder inner_builder(client_, TestName() + ".inner_conditional");
+  {
+    Shape r0bool = ShapeUtil::MakeShape(PRED, {});
+    Shape tuple_shape = ShapeUtil::MakeTupleShape({r0bool, r0f32_, r0f32_});
+    auto param0 = inner_builder.Parameter(0, tuple_shape, "param0");
+    auto pred_cond = inner_builder.GetTupleElement(param0, 0);
+    auto true_operand = inner_builder.GetTupleElement(param0, 1);
+    auto false_operand = inner_builder.GetTupleElement(param0, 2);
+    inner_builder.Conditional(pred_cond, true_operand,
+                              CreateR0CeilComputation(), false_operand,
+                              CreateR0FloorComputation());
+  }
+  auto inner_builder_result = inner_builder.Build();
+  EXPECT_IS_OK(inner_builder_result.status());
+
+  ComputationBuilder builder(client_, TestName());
+  auto pred1 = builder.ConstantR0<bool>(true);
+  auto pred2 = builder.ConstantR0<bool>(false);
+  auto operand1 = builder.ConstantR0<float>(1.1f);
+  auto operand2 = builder.ConstantR0<float>(12.2f);
+  auto operand3 = builder.ConstantR0<float>(43.3f);
+  auto tuple_operand = builder.Tuple({pred2, operand1, operand2});
+  builder.Conditional(pred1, tuple_operand,
+                      inner_builder_result.ConsumeValueOrDie(), operand3,
+                      CreateR0IdentityComputation());
+
+  ComputeAndCompareR0<float>(&builder, 12.0f, {}, error_spec_);
+}
+
+// Test a mismatch in the shape of the true operand and true computation.
+XLA_TEST_F(ConditionalOpTest, ShapeMismatch) {
+  ComputationBuilder builder(client_, TestName());
+  auto pred = builder.ConstantR0<bool>(true);
+  auto operand1 = builder.ConstantR0<float>(56.0f);
+  auto operand2 = builder.ConstantR0<float>(12.0f);
+  auto operands = builder.Tuple({operand1, operand2});
+  builder.Conditional(pred, operands, CreateR1TupleAddComputation(), operands,
+                      CreateR0TupleSubComputation());
+
+  auto result = builder.Build();
+  EXPECT_FALSE(result.ok());
+  EXPECT_THAT(result.status().error_message(),
+              ::testing::HasSubstr("true_operand must match the shape of the "
+                                   "only parameter of true_computation"));
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/constants_test.cc b/tensorflow/compiler/xla/tests/constants_test.cc
index 97bd1553664a6c0fcb097b441ec42efb4eaa9cc2..35aa3f6d696297efb7d95d826ed75a504a24529d 100644
--- a/tensorflow/compiler/xla/tests/constants_test.cc
+++ b/tensorflow/compiler/xla/tests/constants_test.cc
@@ -141,11 +141,12 @@ TEST_F(ConstantsTest, Small_3x2x1x1) {
       {5.0f, 4.4f},   // p2
   });
   input_array.FillWithPZ(pz);
-  Literal input_literal = *Literal::CreateR4FromArray4D(input_array);
+  std::unique_ptr<Literal> input_literal =
+      Literal::CreateR4FromArray4D(input_array);
 
   {
     ComputationBuilder builder(client_, TestName());
-    builder.ConstantLiteral(input_literal);
+    builder.ConstantLiteral(*input_literal);
     ComputeAndCompareR4<float>(&builder, input_array, {}, error_spec_);
   }
 
@@ -165,10 +166,10 @@ TEST_F(ConstantsTest, DISABLED_TupleConstant) {
 
   std::unique_ptr<Literal> result = ExecuteAndTransferOrDie(&builder, {});
 
-  LiteralTestUtil::ExpectR2Near<float>({{1.0}, {2.0}},
-                                       result->tuple_literals(0), error_spec_);
-  LiteralTestUtil::ExpectR1Near<float>({2.0, 42.0}, result->tuple_literals(1),
-                                       error_spec_);
+  LiteralTestUtil::ExpectR2Near<float>(
+      {{1.0}, {2.0}}, LiteralView::Create(*result, {0}), error_spec_);
+  LiteralTestUtil::ExpectR1Near<float>(
+      {2.0, 42.0}, LiteralView::Create(*result, {1}), error_spec_);
 }
 
 }  // namespace
diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc
index 2924c08615fa706bb19addf04bf58e1d5dd5a659..a10e17dbf34b3a6fe503f156fab496708b833c07 100644
--- a/tensorflow/compiler/xla/tests/convolution_test.cc
+++ b/tensorflow/compiler/xla/tests/convolution_test.cc
@@ -105,8 +105,8 @@ TEST_F(ConvolutionTest, Convolve_1x1x1x2_1x1x1x2_Valid) {
   }));
 
   ComputeAndCompare(&builder, conv,
-                    {*Literal::CreateFromArray(input_data),
-                     *Literal::CreateFromArray(filter_data)},
+                    {std::move(*Literal::CreateFromArray(input_data)),
+                     std::move(*Literal::CreateFromArray(filter_data))},
                     error_spec_);
 }
 
@@ -136,8 +136,8 @@ TEST_F(ConvolutionTest, Convolve_1x1x4x4_1x1x2x2_Valid) {
   }));
   // clang-format on
   ComputeAndCompare(&builder, conv,
-                    {*Literal::CreateFromArray(input_data),
-                     *Literal::CreateFromArray(filter_data)},
+                    {std::move(*Literal::CreateFromArray(input_data)),
+                     std::move(*Literal::CreateFromArray(filter_data))},
                     error_spec_);
 }
 
@@ -167,8 +167,8 @@ TEST_F(ConvolutionTest, Convolve_1x1x4x4_1x1x2x2_Same) {
   }));
   // clang-format on
   ComputeAndCompare(&builder, conv,
-                    {*Literal::CreateFromArray(input_data),
-                     *Literal::CreateFromArray(filter_data)},
+                    {std::move(*Literal::CreateFromArray(input_data)),
+                     std::move(*Literal::CreateFromArray(filter_data))},
                     error_spec_);
 }
 
@@ -200,8 +200,8 @@ TEST_F(ConvolutionTest, Convolve_1x1x4x4_1x1x3x3_Same) {
   }));
   // clang-format on
   ComputeAndCompare(&builder, conv,
-                    {*Literal::CreateFromArray(input_data),
-                     *Literal::CreateFromArray(filter_data)},
+                    {std::move(*Literal::CreateFromArray(input_data)),
+                     std::move(*Literal::CreateFromArray(filter_data))},
                     error_spec_);
 }
 
@@ -501,10 +501,10 @@ XLA_TEST_P(ConvolveWithAndWithoutCanonicalization,
   Array2D<float> expected_result(29, 10);
   expected_result.Fill(0);
 
-  ComputeAndCompare(
-      &builder, conv,
-      {*Literal::CreateFromArray(param0), *Literal::CreateFromArray(param1)},
-      error_spec_);
+  ComputeAndCompare(&builder, conv,
+                    {std::move(*Literal::CreateFromArray(param0)),
+                     std::move(*Literal::CreateFromArray(param1))},
+                    error_spec_);
 }
 
 INSTANTIATE_TEST_CASE_P(ConvolveWithAndWithoutCanonicalization_Instantiation,
diff --git a/tensorflow/compiler/xla/tests/copy_test.cc b/tensorflow/compiler/xla/tests/copy_test.cc
index bcb85b04eefa349df1c055e010d584b85b55a4a8..ece7c3b05e7fafa299db7f9cbf50610c8204f95e 100644
--- a/tensorflow/compiler/xla/tests/copy_test.cc
+++ b/tensorflow/compiler/xla/tests/copy_test.cc
@@ -40,7 +40,7 @@ class CopyOpTest : public HloTestBase {
   void TestCopyOp(const Literal& literal) {
     auto builder = HloComputation::Builder(TestName());
     auto constant = builder.AddInstruction(
-        HloInstruction::CreateConstant(MakeUnique<Literal>(literal)));
+        HloInstruction::CreateConstant(literal.CloneToUnique()));
     builder.AddInstruction(HloInstruction::CreateUnary(
         constant->shape(), HloOpcode::kCopy, constant));
     auto computation = builder.Build();
@@ -56,9 +56,13 @@ class CopyOpTest : public HloTestBase {
                                 tensorflow::gtl::ArraySlice<int64> permutation);
 };
 
-XLA_TEST_F(CopyOpTest, CopyR0Bool) { TestCopyOp(*Literal::CreateR0<bool>(true)); }
+XLA_TEST_F(CopyOpTest, CopyR0Bool) {
+  TestCopyOp(*Literal::CreateR0<bool>(true));
+}
 
-XLA_TEST_F(CopyOpTest, CopyR1S0U32) { TestCopyOp(*Literal::CreateR1<uint32>({})); }
+XLA_TEST_F(CopyOpTest, CopyR1S0U32) {
+  TestCopyOp(*Literal::CreateR1<uint32>({}));
+}
 
 XLA_TEST_F(CopyOpTest, CopyR1S3U32) {
   TestCopyOp(*Literal::CreateR1<uint32>({1, 2, 3}));
@@ -85,7 +89,6 @@ XLA_TEST_F(CopyOpTest, CopyParameterScalar) {
   // Copy literal to device to use as parameter.
   auto literal = Literal::CreateR0<float>(42.0);
   Shape shape = literal->shape();
-  auto constant_device_base = TransferToDevice(*literal);
 
   auto param0 = builder.AddInstruction(
       HloInstruction::CreateParameter(0, shape, "param0"));
@@ -98,7 +101,7 @@ XLA_TEST_F(CopyOpTest, CopyParameterScalar) {
   module->AddEntryComputation(std::move(computation));
 
   std::unique_ptr<Literal> result =
-      ExecuteAndTransfer(std::move(module), {constant_device_base});
+      ExecuteAndTransfer(std::move(module), {literal.get()});
   LiteralTestUtil::ExpectR0Near<float>(42.0f, *result, error_spec_);
 }
 
@@ -129,7 +132,8 @@ XLA_TEST_F(CopyOpTest, CopyConstantR2DifferentLayouts) {
   std::unique_ptr<Literal> literal =
       Literal::CreateR2<float>({{1.0, 2.0}, {3.0, 4.0}});
   // Reverse the minor-to-major order of the literal.
-  Layout* literal_layout = literal->mutable_shape()->mutable_layout();
+  Layout* literal_layout =
+      literal->mutable_shape_do_not_use()->mutable_layout();
   ASSERT_EQ(2, literal_layout->minor_to_major_size());
   literal_layout->mutable_minor_to_major()->SwapElements(0, 1);
 
diff --git a/tensorflow/compiler/xla/tests/custom_call_test.cc b/tensorflow/compiler/xla/tests/custom_call_test.cc
index 74f73a1ddc15be033e52b0b45f9961e5dc3a1ecb..2d847a66b0ae7c8f09fa0cb181a4c84ea99be5b1 100644
--- a/tensorflow/compiler/xla/tests/custom_call_test.cc
+++ b/tensorflow/compiler/xla/tests/custom_call_test.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/tests/client_library_test_base.h"
 #include "tensorflow/compiler/xla/tests/hlo_test_base.h"
 #include "tensorflow/compiler/xla/tests/literal_test_util.h"
 #include "tensorflow/compiler/xla/tests/test_macros.h"
@@ -128,5 +129,19 @@ XLA_TEST_F(CustomCallTest,
       Array3D<float>{{{2, 3}, {4, 5}}, {{3, 4}, {5, 6}}}, *result);
 }
 
+class CustomCallClientAPITest : public ClientLibraryTestBase {};
+
+// When using the client API, CustomCall targets can't begin with '$' -- these
+// are reserved for internal use.
+XLA_TEST_F(CustomCallClientAPITest, IllegalCustomCallTarget) {
+  ComputationBuilder builder(client_, TestName());
+  auto call = builder.CustomCall("$illegal", /*operands=*/{},
+                                 ShapeUtil::MakeShape(F32, {1}));
+
+  StatusOr<std::unique_ptr<GlobalData>> result =
+      Execute(&builder, /*arguments=*/{});
+  EXPECT_FALSE(result.ok());
+}
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc
index bfb04fd9f9bf6887c4462cb00fee00250517f5c4..cc683701e6305510d202721fe645310f1009081c 100644
--- a/tensorflow/compiler/xla/tests/dot_operation_test.cc
+++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc
@@ -51,8 +51,6 @@ class DotOperationTest : public ClientLibraryTestBase {
   template <typename Element>
   void TestNonsquareMatrixDot(bool lhs_row_major = false,
                               bool rhs_row_major = false);
-  void TestMatrixDot(int M, int K, int N, bool lhs_row_major = false,
-                     bool rhs_row_major = false);
 };
 
 XLA_TEST_F(DotOperationTest, ZeroElementVectorDotF32) {
@@ -199,158 +197,182 @@ void DotOperationTest::TestSquareMatrixDot(bool lhs_row_major,
       &builder, expected, {lhs_handle.get(), rhs_handle.get()}, error_spec_);
 }
 
-void DotOperationTest::TestMatrixDot(int M, int K, int N, bool lhs_row_major,
-                                     bool rhs_row_major) {
-  std::unique_ptr<Array2D<float>> lhs_data =
-      MakeLinspaceArray2D(0.0, 1.0, M, K);
-  std::unique_ptr<Literal> lhs_lit = Literal::CreateR2FromArray2DWithLayout(
-      *lhs_data,
-      LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(lhs_row_major)));
-  auto lhs_handle = client_->TransferToServer(*lhs_lit).ConsumeValueOrDie();
+struct DotTestParam {
+  int m;
+  int k;
+  int n;
+  bool dot_lhs_row_major;
+  bool dot_rhs_row_major;
+  bool has_addend;
+  bool addend_row_major;
+};
 
-  std::unique_ptr<Array2D<float>> rhs_data =
-      MakeLinspaceArray2D(0.0, 1.0, K, N);
-  std::unique_ptr<Literal> rhs_lit = Literal::CreateR2FromArray2DWithLayout(
-      *rhs_data,
-      LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(rhs_row_major)));
-  auto rhs_handle = client_->TransferToServer(*rhs_lit).ConsumeValueOrDie();
+string PrintDotTestParam(
+    const ::testing::TestParamInfo<DotTestParam>& test_param) {
+  const DotTestParam& param = test_param.param;
+  if (param.has_addend) {
+    return tensorflow::strings::StrCat(param.m, "x", param.k, "x", param.n,
+                                       "_MajorToMinor",
+                                       param.dot_lhs_row_major ? "T" : "F",
+                                       param.dot_rhs_row_major ? "T" : "F",
+                                       param.addend_row_major ? "T" : "F");
+  } else {
+    return tensorflow::strings::StrCat(param.m, "x", param.k, "x", param.n,
+                                       "_MajorToMinor",
+                                       param.dot_lhs_row_major ? "T" : "F",
+                                       param.dot_rhs_row_major ? "T" : "F");
+  }
+}
+
+class ParametricDotTest : public DotOperationTest,
+                          public ::testing::WithParamInterface<DotTestParam> {};
+
+XLA_TEST_P(ParametricDotTest, TestF32) {
+  DotTestParam param = GetParam();
+
+  std::unique_ptr<Array2D<float>> dot_lhs_data =
+      MakeLinspaceArray2D(0.0, 1.0, param.m, param.k);
+  std::unique_ptr<Literal> dot_lhs_lit = Literal::CreateR2FromArray2DWithLayout(
+      *dot_lhs_data, LayoutUtil::MakeLayout(
+                         MinorToMajorForIsRowMajor(param.dot_lhs_row_major)));
+  std::unique_ptr<GlobalData> dot_lhs_handle =
+      client_->TransferToServer(*dot_lhs_lit).ConsumeValueOrDie();
+
+  std::unique_ptr<Array2D<float>> dot_rhs_data =
+      MakeLinspaceArray2D(0.0, 1.0, param.k, param.n);
+  std::unique_ptr<Literal> dot_rhs_lit = Literal::CreateR2FromArray2DWithLayout(
+      *dot_rhs_data, LayoutUtil::MakeLayout(
+                         MinorToMajorForIsRowMajor(param.dot_rhs_row_major)));
+  std::unique_ptr<GlobalData> dot_rhs_handle =
+      client_->TransferToServer(*dot_rhs_lit).ConsumeValueOrDie();
+
+  std::unique_ptr<Array2D<float>> addend_data;
+  std::unique_ptr<Literal> addend_lit;
+  std::unique_ptr<GlobalData> addend_handle;
+
+  if (param.has_addend) {
+    addend_data = MakeLinspaceArray2D(0.0, 1.0, param.m, param.n);
+    addend_lit = Literal::CreateR2FromArray2DWithLayout(
+        *addend_data, LayoutUtil::MakeLayout(
+                          MinorToMajorForIsRowMajor(param.addend_row_major)));
+    addend_handle = client_->TransferToServer(*addend_lit).ConsumeValueOrDie();
+  }
 
   ComputationBuilder builder(client_, TestName());
   auto prim_type = primitive_util::NativeToPrimitiveType<float>();
   auto result = builder.Dot(
-      builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {M, K}), "lhs"),
-      builder.Parameter(1, ShapeUtil::MakeShape(prim_type, {K, N}), "rhs"));
-
-  std::unique_ptr<Array2D<float>> expected =
-      ReferenceUtil::MatmulArray2D(*lhs_data, *rhs_data);
-
-  ComputeAndCompareR2<float>(&builder, *expected,
-                             {lhs_handle.get(), rhs_handle.get()},
-                             ErrorSpec(0.3, 3e-3));
-}
-
-XLA_TEST_F(DotOperationTest, MatrixDotF32_12_117_7_MinorToMajorTF) {
-  TestMatrixDot(12, 117, 7, true, false);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixDotF32_12_117_7_MinorToMajorFT) {
-  TestMatrixDot(12, 117, 7, false, true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixDotF32_12_117_7_MinorToMajorTT) {
-  TestMatrixDot(12, 117, 7, true, true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixDotF32_12_117_7_MinorToMajorFF) {
-  TestMatrixDot(12, 117, 7, false, false);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixDotF32_270_270_520_MinorToMajorTT) {
-  TestMatrixDot(270, 270, 520, true, true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixDotF32_270_270_520_MinorToMajorTF) {
-  TestMatrixDot(270, 270, 520, true, false);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixDotF32_270_270_520_MinorToMajorFT) {
-  TestMatrixDot(270, 270, 520, false, true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixDotF32_270_270_520_MinorToMajorFF) {
-  TestMatrixDot(270, 270, 520, false, false);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixDotF32_260_3_520_MinorToMajorTT) {
-  TestMatrixDot(269, 3, 520, true, true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixDotF32_260_3_520_MinorToMajorTF) {
-  TestMatrixDot(260, 3, 520, true, false);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixDotF32_260_3_520_MinorToMajorFT) {
-  TestMatrixDot(260, 3, 520, false, true);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixDotF32_260_3_520_MinorToMajorFF) {
-  TestMatrixDot(260, 3, 520, false, false);
-}
-
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x8x8) {
-  TestMatrixDot(1, 8, 8, true, true);
-}
+      builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {param.m, param.k}),
+                        "dot_lhs"),
+      builder.Parameter(1, ShapeUtil::MakeShape(prim_type, {param.k, param.n}),
+                        "dot_rhs"));
+
+  if (param.has_addend) {
+    result = builder.Add(
+        result,
+        builder.Parameter(
+            2, ShapeUtil::MakeShape(prim_type, {param.m, param.n}), "addend"));
+  }
 
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x130x8) {
-  TestMatrixDot(1, 130, 8, true, true);
-}
+  std::unique_ptr<Array2D<float>> expected;
+  if (param.has_addend) {
+    expected = ReferenceUtil::ApplyElementwise2D(
+        std::plus<float>(),
+        *ReferenceUtil::MatmulArray2D(*dot_lhs_data, *dot_rhs_data),
+        *addend_data);
+  } else {
+    expected = ReferenceUtil::MatmulArray2D(*dot_lhs_data, *dot_rhs_data);
+  }
 
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x8x130) {
-  TestMatrixDot(1, 8, 130, true, true);
-}
+  std::vector<GlobalData*> args = {dot_lhs_handle.get(), dot_rhs_handle.get()};
+  if (param.has_addend) {
+    args.push_back(addend_handle.get());
+  }
 
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x290x130) {
-  TestMatrixDot(1, 290, 130, true, true);
+  ComputeAndCompareR2<float>(&builder, *expected, args, ErrorSpec(0.3, 3e-3));
 }
 
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_2x1x1) {
-  TestMatrixDot(2, 1, 1, true, true);
-}
+std::vector<DotTestParam> CreateDotTestParameters() {
+  std::vector<DotTestParam> params;
 
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_8x8x1) {
-  TestMatrixDot(8, 8, 1, true, true);
-}
+  auto add_matrix_matrix_dot_test = [&](int m, int k, int n) {
+    for (bool lhs_row_major : {true, false}) {
+      for (bool rhs_row_major : {true, false}) {
+        params.push_back({/*m=*/m, /*k=*/k, /*n=*/n,
+                          /*dot_lhs_row_major=*/lhs_row_major,
+                          /*dot_rhs_row_major=*/rhs_row_major,
+                          /*has_addend=*/false, /*addend_row_major=*/true});
+      }
+    }
+  };
+
+  auto add_matrix_vector_dot_test = [&](int k, int n) {
+    for (bool has_addend : {false, true}) {
+      params.push_back({/*m=*/1, /*k=*/k, /*n=*/n,
+                        /*dot_lhs_row_major=*/true, /*dot_rhs_row_major=*/true,
+                        /*has_addend=*/has_addend, /*addend_row_major=*/true});
+      if (n != 1) {
+        params.push_back(
+            {/*m=*/n, /*k=*/k, /*n=*/1,
+             /*dot_lhs_row_major=*/true, /*dot_rhs_row_major=*/true,
+             /*has_addend=*/has_addend, /*addend_row_major=*/true});
+      }
+    }
+  };
 
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_16x1x1) {
-  TestMatrixDot(16, 1, 1, true, true);
-}
+  add_matrix_matrix_dot_test(/*m=*/12, /*k=*/117, /*n=*/7);
+  add_matrix_matrix_dot_test(/*m=*/270, /*k=*/270, /*n=*/520);
+  add_matrix_matrix_dot_test(/*m=*/260, /*k=*/3, /*n=*/520);
 
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_16x3x1) {
-  TestMatrixDot(16, 3, 1, true, true);
-}
+  add_matrix_vector_dot_test(/*k=*/8, /*n=*/8);
+  add_matrix_vector_dot_test(/*k=*/130, /*n=*/8);
+  add_matrix_vector_dot_test(/*k=*/8, /*n=*/130);
+  add_matrix_vector_dot_test(/*k=*/290, /*n=*/130);
+  add_matrix_vector_dot_test(/*k=*/1, /*n=*/1);
+  add_matrix_vector_dot_test(/*k=*/1, /*n=*/16);
+  add_matrix_vector_dot_test(/*k=*/3, /*n=*/16);
+  add_matrix_vector_dot_test(/*k=*/3, /*n=*/3);
+  add_matrix_vector_dot_test(/*k=*/29, /*n=*/29);
+  add_matrix_vector_dot_test(/*k=*/8, /*n=*/2);
+  add_matrix_vector_dot_test(/*k=*/2, /*n=*/8);
+  add_matrix_vector_dot_test(/*k=*/259, /*n=*/258);
 
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_3x3x1) {
-  TestMatrixDot(3, 3, 1, true, true);
+  return params;
 }
 
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_29x29x1) {
-  TestMatrixDot(29, 29, 1, true, true);
-}
+INSTANTIATE_TEST_CASE_P(DotTests, ParametricDotTest,
+                        ::testing::ValuesIn(CreateDotTestParameters()),
+                        PrintDotTestParam);
 
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x8x2) {
-  TestMatrixDot(1, 8, 2, true, true);
+XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorFF) {
+  TestSquareMatrixDot<float>(false, false);
 }
 
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_1x2x8) {
-  TestMatrixDot(1, 2, 8, true, true);
+XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorFT) {
+  TestSquareMatrixDot<float>(false, true);
 }
 
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_259x258x1) {
-  TestMatrixDot(259, 258, 1, true, true);
+XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorTF) {
+  TestSquareMatrixDot<float>(true, false);
 }
 
-XLA_TEST_F(DotOperationTest, MatrixVectorDotF32_259x258x1_FT) {
-  TestMatrixDot(259, 258, 1, false, true);
+XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorTT) {
+  TestSquareMatrixDot<float>(true, true);
 }
 
-XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorFF) {
-  constexpr bool kLhsRowMajor = false;
-  constexpr bool kRhsRowMajor = false;
-  TestSquareMatrixDot<float>(kLhsRowMajor, kRhsRowMajor);
+XLA_TEST_F(DotOperationTest, SquareMatrixDotC64MinorToMajorFF) {
+  TestSquareMatrixDot<complex64>(false, false);
 }
 
-XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorFT) {
-  TestSquareMatrixDot<float>(false, true);
+XLA_TEST_F(DotOperationTest, SquareMatrixDotC64MinorToMajorFT) {
+  TestSquareMatrixDot<complex64>(false, true);
 }
 
-XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorTF) {
-  TestSquareMatrixDot<float>(true, false);
+XLA_TEST_F(DotOperationTest, SquareMatrixDotC64MinorToMajorTF) {
+  TestSquareMatrixDot<complex64>(true, false);
 }
 
-TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorTT) {
-  constexpr bool kLhsRowMajor = true;
-  constexpr bool kRhsRowMajor = true;
-  TestSquareMatrixDot<float>(kLhsRowMajor, kRhsRowMajor);
+XLA_TEST_F(DotOperationTest, SquareMatrixDotC64MinorToMajorTT) {
+  TestSquareMatrixDot<complex64>(true, true);
 }
 
 XLA_TEST_F(DotOperationTest, SquareMatrixDotF64) {
@@ -561,5 +583,95 @@ TEST_F(DotOperationTest, TransposeFolding) {
   }
 }
 
+TEST_F(DotOperationTest, DotOfConcatOptimizationWithConstLHS) {
+  auto prim_type = primitive_util::NativeToPrimitiveType<float>();
+
+  std::unique_ptr<Array2D<float>> constant_lhs_array(new Array2D<float>(
+      {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}}));
+
+  ComputationBuilder builder(client_, TestName());
+  auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array);
+  auto rhs_arg_0 = builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {2, 2}),
+                                     "rhs_arg_0");
+  auto rhs_arg_1 = builder.Parameter(1, ShapeUtil::MakeShape(prim_type, {3, 2}),
+                                     "rhs_arg_1");
+  auto rhs_arg_2 = builder.Parameter(2, ShapeUtil::MakeShape(prim_type, {1, 2}),
+                                     "rhs_arg_2");
+  auto result = builder.Dot(
+      lhs_constant, builder.ConcatInDim({rhs_arg_0, rhs_arg_1, rhs_arg_2}, 0));
+
+  std::unique_ptr<Array2D<float>> arg_0_value_array(
+      new Array2D<float>({{1.0, 2.0}, {3.0, 4.0}}));
+  std::unique_ptr<Array2D<float>> arg_1_value_array(
+      new Array2D<float>({{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}));
+  std::unique_ptr<Array2D<float>> arg_2_value_array(
+      new Array2D<float>({{1.0, 2.0}}));
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      auto arg_0_value,
+      client_->TransferToServer(
+          *Literal::CreateR2FromArray2D<float>(*arg_0_value_array)));
+  TF_ASSERT_OK_AND_ASSIGN(
+      auto arg_1_value,
+      client_->TransferToServer(
+          *Literal::CreateR2FromArray2D<float>(*arg_1_value_array)));
+  TF_ASSERT_OK_AND_ASSIGN(
+      auto arg_2_value,
+      client_->TransferToServer(
+          *Literal::CreateR2FromArray2D<float>(*arg_2_value_array)));
+
+  Array2D<float> expected({{53.0, 74.0}, {45.0, 66.0}});
+  ComputeAndCompareR2<float>(
+      &builder, expected,
+      {arg_0_value.get(), arg_1_value.get(), arg_2_value.get()}, error_spec_);
+}
+
+TEST_F(DotOperationTest, DotOfConcatOptimizationWithConstRHS) {
+  auto prim_type = primitive_util::NativeToPrimitiveType<float>();
+
+  std::unique_ptr<Array2D<float>> constant_rhs_array(
+      new Array2D<float>({{1.0, 2.0},
+                          {3.0, 4.0},
+                          {5.0, 6.0},
+                          {6.0, 5.0},
+                          {4.0, 3.0},
+                          {2.0, 1.0}}));
+
+  ComputationBuilder builder(client_, TestName());
+  auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array);
+  auto lhs_arg_0 = builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {2, 2}),
+                                     "lhs_arg_0");
+  auto lhs_arg_1 = builder.Parameter(1, ShapeUtil::MakeShape(prim_type, {2, 3}),
+                                     "lhs_arg_1");
+  auto lhs_arg_2 = builder.Parameter(2, ShapeUtil::MakeShape(prim_type, {2, 1}),
+                                     "lhs_arg_2");
+  auto result = builder.Dot(
+      builder.ConcatInDim({lhs_arg_0, lhs_arg_1, lhs_arg_2}, 1), rhs_constant);
+
+  std::unique_ptr<Array2D<float>> arg_0_value_array(
+      new Array2D<float>({{1.0, 2.0}, {3.0, 4.0}}));
+  std::unique_ptr<Array2D<float>> arg_1_value_array(
+      new Array2D<float>({{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}));
+  std::unique_ptr<Array2D<float>> arg_2_value_array(
+      new Array2D<float>({{1.0}, {2.0}}));
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      auto arg_0_value,
+      client_->TransferToServer(
+          *Literal::CreateR2FromArray2D<float>(*arg_0_value_array)));
+  TF_ASSERT_OK_AND_ASSIGN(
+      auto arg_1_value,
+      client_->TransferToServer(
+          *Literal::CreateR2FromArray2D<float>(*arg_1_value_array)));
+  TF_ASSERT_OK_AND_ASSIGN(
+      auto arg_2_value,
+      client_->TransferToServer(
+          *Literal::CreateR2FromArray2D<float>(*arg_2_value_array)));
+
+  Array2D<float> expected({{38.0, 36.0}, {93.0, 91.0}});
+  ComputeAndCompareR2<float>(
+      &builder, expected,
+      {arg_0_value.get(), arg_1_value.get(), arg_2_value.get()}, error_spec_);
+}
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/dynamic_ops_test.cc b/tensorflow/compiler/xla/tests/dynamic_ops_test.cc
index 8baaf39e3cf8fa7f6fa4a0224c1297f82e0d92aa..ae3f887240d0ccffcc9c51a2c409de457a94f967 100644
--- a/tensorflow/compiler/xla/tests/dynamic_ops_test.cc
+++ b/tensorflow/compiler/xla/tests/dynamic_ops_test.cc
@@ -51,12 +51,16 @@ class DynamicSliceTest : public ClientLibraryTestBase {
     RunR1<IndexT, DataT>({0, 1, 2, 3, 4, 5, 6, 7}, {2}, {3}, {2, 3, 4});
     // Slice at dimension boundaries.
     RunR1<IndexT, DataT>({0, 1, 2, 3, 4, 5, 6, 7}, {5}, {3}, {5, 6, 7});
-    // Slice at dimension boundaries, but with sizes that cause indices to wrap.
-    RunR1<IndexT, DataT>({0, 1, 2, 3, 4, 5, 6, 7}, {6}, {4}, {6, 7, 0, 1});
     // Zero element slice.
     RunR1<IndexT, DataT>({0, 1, 2, 3, 4, 5, 6, 7}, {2}, {0}, {});
   }
 
+  template <typename IndexT, typename DataT>
+  void TestR1Wrap() {
+    // Slice at dimension boundaries, but with sizes that cause indices to wrap.
+    RunR1<IndexT, DataT>({0, 1, 2, 3, 4, 5, 6, 7}, {6}, {4}, {6, 7, 0, 1});
+  }
+
   template <typename IndexT, typename DataT>
   void TestR2() {
     // Slice at dimension start.
@@ -68,15 +72,19 @@ class DynamicSliceTest : public ClientLibraryTestBase {
     // Slice at dimension boundaries.
     RunR2<IndexT, DataT>({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}, {1, 1}, {2, 1},
                          {{5}, {8}});
-    // Slice at dimension boundaries, but with sizes that cause indices to wrap.
-    RunR2<IndexT, DataT>({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}, {1, 1}, {3, 3},
-                         {{5, 6, 4}, {8, 9, 7}, {2, 3, 1}});
     // Zero element slice: 2x0.
     RunR2<IndexT, DataT>({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}, {0, 0}, {2, 0},
                          {{}, {}});
     // Zero element slice: 0x2.
     RunR2<IndexT, DataT>({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}, {0, 0}, {0, 2},
-                         Array2D<DataT>(0, 2));
+                         Array2D<int>(0, 2));
+  }
+
+  template <typename IndexT, typename DataT>
+  void TestR2Wrap() {
+    // Slice at dimension boundaries, but with sizes that cause indices to wrap.
+    RunR2<IndexT, DataT>({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}, {1, 1}, {3, 3},
+                         {{5, 6, 4}, {8, 9, 7}, {2, 3, 1}});
   }
 
   template <typename IndexT, typename DataT>
@@ -97,85 +105,119 @@ class DynamicSliceTest : public ClientLibraryTestBase {
        {{7, 8}, {9, 10}, {11, 12}}},
       {0, 1, 1}, {2, 2, 1},
       {{{4}, {6}}, {{10}, {12}}});
+    // clang-format on
+  }
 
+  template <typename IndexT, typename DataT>
+  void TestR3Wrap() {
     // Slice at dimension boundaries, but with sizes that cause indices to wrap.
     RunR3<IndexT, DataT>(
       {{{1, 2}, {3, 4}, {5, 6}},
        {{7, 8}, {9, 10}, {11, 12}}},
       {0, 2, 1}, {2, 1, 2},
       {{{6, 5}}, {{12, 11}}});
-
-    // clang-format on
   }
 
   template <typename IndexT, typename DataT>
-  void RunR1(tensorflow::gtl::ArraySlice<DataT> input_values,
+  void RunR1(tensorflow::gtl::ArraySlice<int> input_values_int,
              const std::vector<IndexT> slice_starts,
              const std::vector<int64>& slice_sizes,
-             tensorflow::gtl::ArraySlice<DataT> expected_values) {
+             tensorflow::gtl::ArraySlice<int> expected_values_int) {
+    // bfloat16 has explicit constructors, so it does not implicitly convert the
+    // way built-in types do, which is why we can't take the parameter as an
+    // ArraySlice<DataT>. We also can't convert it to a vector, because
+    // vector<bool> is special so that it cannot be an ArraySlice<bool>, which
+    // is what the code below wants. So instead we do this.
+    Literal input_values =
+        std::move(*Literal::CreateR1(input_values_int)
+                       ->Convert(primitive_util::NativeToPrimitiveType<DataT>())
+                       .ValueOrDie());
+    Literal expected_values =
+        std::move(*Literal::CreateR1(expected_values_int)
+                       ->Convert(primitive_util::NativeToPrimitiveType<DataT>())
+                       .ValueOrDie());
+
     ComputationBuilder builder(client_, TestName());
     // Initialize and transfer dynamic slice start indices parameter.
     ComputationDataHandle starts;
     std::unique_ptr<GlobalData> start_data = CreateR1Parameter<IndexT>(
         slice_starts, 0, "slice_starts", &builder, &starts);
     // Build dynamic slice computation.
-    auto input = builder.ConstantR1<DataT>(input_values);
+    auto input = builder.ConstantLiteral(input_values);
     builder.DynamicSlice(input, starts, slice_sizes);
     // Run computation and compare against expected values.
-    ComputeAndCompareR1<DataT>(&builder, expected_values, {start_data.get()});
+    ComputeAndCompareLiteral(&builder, expected_values, {start_data.get()});
   }
 
   template <typename IndexT, typename DataT>
-  void RunR2(const Array2D<DataT>& input_values,
+  void RunR2(const Array2D<int>& input_values_int,
              const std::vector<IndexT> slice_starts,
              const std::vector<int64>& slice_sizes,
-             const Array2D<DataT>& expected_values) {
+             const Array2D<int>& expected_values_int) {
+    Literal input_values =
+        std::move(*Literal::CreateR2FromArray2D(input_values_int)
+                       ->Convert(primitive_util::NativeToPrimitiveType<DataT>())
+                       .ValueOrDie());
+    Literal expected_values =
+        std::move(*Literal::CreateR2FromArray2D(expected_values_int)
+                       ->Convert(primitive_util::NativeToPrimitiveType<DataT>())
+                       .ValueOrDie());
+
     ComputationBuilder builder(client_, TestName());
     // Initialize and transfer dynamic slice start indices parameter.
     ComputationDataHandle starts;
     std::unique_ptr<GlobalData> start_data = CreateR1Parameter<IndexT>(
         slice_starts, 0, "slice_starts", &builder, &starts);
     // Build dynamic slice computation.
-    auto input = builder.ConstantR2FromArray2D<DataT>(input_values);
+    auto input = builder.ConstantLiteral(input_values);
     builder.DynamicSlice(input, starts, slice_sizes);
     // Run computation and compare against expected values.
-    ComputeAndCompareR2<DataT>(&builder, expected_values, {start_data.get()});
+    ComputeAndCompareLiteral(&builder, expected_values, {start_data.get()});
   }
 
   template <typename IndexT, typename DataT>
-  void RunR3(const Array3D<DataT>& input_values,
+  void RunR3(const Array3D<int>& input_values_int,
              const std::vector<IndexT> slice_starts,
              const std::vector<int64>& slice_sizes,
-             const Array3D<DataT>& expected_values) {
+             const Array3D<int>& expected_values_int) {
+    Literal input_values =
+        std::move(*Literal::CreateR3FromArray3D(input_values_int)
+                       ->Convert(primitive_util::NativeToPrimitiveType<DataT>())
+                       .ValueOrDie());
+    Literal expected_values =
+        std::move(*Literal::CreateR3FromArray3D(expected_values_int)
+                       ->Convert(primitive_util::NativeToPrimitiveType<DataT>())
+                       .ValueOrDie());
+
     ComputationBuilder builder(client_, TestName());
     // Initialize and transfer dynamic slice start indices parameter.
     ComputationDataHandle starts;
     std::unique_ptr<GlobalData> start_data = CreateR1Parameter<IndexT>(
         slice_starts, 0, "slice_starts", &builder, &starts);
     // Build dynamic slice computation.
-    auto input = builder.ConstantR3FromArray3D<DataT>(input_values);
+    auto input = builder.ConstantLiteral(input_values);
     builder.DynamicSlice(input, starts, slice_sizes);
     // Run computation and compare against expected values.
-    ComputeAndCompareR3<DataT>(&builder, expected_values, {start_data.get()});
+    ComputeAndCompareLiteral(&builder, expected_values, {start_data.get()});
   }
 };
 
+XLA_TEST_F(DynamicSliceTest, Int32R1BF16) { TestR1<int32, bfloat16>(); }
 XLA_TEST_F(DynamicSliceTest, Int32R1) { TestR1<int32, int32>(); }
-
+XLA_TEST_F(DynamicSliceTest, Int32R1Wrap) { TestR1Wrap<int32, int32>(); }
 XLA_TEST_F(DynamicSliceTest, Int64R1) { TestR1<int64, float>(); }
-
 XLA_TEST_F(DynamicSliceTest, UInt64R1) { TestR1<uint64, double>(); }
 
-XLA_TEST_F(DynamicSliceTest, Int32R2) { TestR2<int32, float>(); }
-
+XLA_TEST_F(DynamicSliceTest, Int32R2BF16) { TestR2<int32, bfloat16>(); }
+XLA_TEST_F(DynamicSliceTest, Int32R2) { TestR2<int32, int32>(); }
+XLA_TEST_F(DynamicSliceTest, Int32R2Wrap) { TestR2Wrap<int32, int32>(); }
 XLA_TEST_F(DynamicSliceTest, Int64R2) { TestR2<int64, double>(); }
-
 XLA_TEST_F(DynamicSliceTest, UInt64R2) { TestR2<uint64, int32>(); }
 
-XLA_TEST_F(DynamicSliceTest, Int32R3) { TestR3<int32, int32>(); }
-
+XLA_TEST_F(DynamicSliceTest, Int32R3BF16) { TestR3<int32, bfloat16>(); }
+XLA_TEST_F(DynamicSliceTest, Int32R3) { TestR3<int32, float>(); }
+XLA_TEST_F(DynamicSliceTest, Int32R3Wrap) { TestR3Wrap<int32, float>(); }
 XLA_TEST_F(DynamicSliceTest, Int64R3) { TestR3<int64, float>(); }
-
 XLA_TEST_F(DynamicSliceTest, UInt64R3) { TestR3<uint64, double>(); }
 
 XLA_TEST_F(DynamicSliceTest, Int32R1Pred) {
@@ -213,7 +255,7 @@ XLA_TEST_F(DynamicSliceTest, Int32R2Pred) {
   // Zero element slice: 0x2.
   RunR2<int32, bool>(
       {{true, false, true}, {false, false, true}, {true, true, false}}, {0, 0},
-      {0, 2}, Array2D<bool>(0, 2));
+      {0, 2}, Array2D<int>(0, 2));
 }
 
 XLA_TEST_F(DynamicSliceTest, Int32R3Pred) {
@@ -300,107 +342,154 @@ class DynamicUpdateSliceTest : public ClientLibraryTestBase {
   }
 
   template <typename IndexT, typename DataT>
-  void RunR1(tensorflow::gtl::ArraySlice<DataT> input_values,
-             tensorflow::gtl::ArraySlice<DataT> update_values,
+  void RunR1(tensorflow::gtl::ArraySlice<int> input_values_int,
+             tensorflow::gtl::ArraySlice<int> update_values_int,
              const std::vector<IndexT> slice_starts,
-             tensorflow::gtl::ArraySlice<DataT> expected_values) {
+             tensorflow::gtl::ArraySlice<int> expected_values_int) {
+    Literal input_values =
+        std::move(*Literal::CreateR1(input_values_int)
+                       ->Convert(primitive_util::NativeToPrimitiveType<DataT>())
+                       .ValueOrDie());
+    Literal update_values =
+        std::move(*Literal::CreateR1(update_values_int)
+                       ->Convert(primitive_util::NativeToPrimitiveType<DataT>())
+                       .ValueOrDie());
+    Literal expected_values =
+        std::move(*Literal::CreateR1(expected_values_int)
+                       ->Convert(primitive_util::NativeToPrimitiveType<DataT>())
+                       .ValueOrDie());
+
     ComputationBuilder builder(client_, TestName());
     // Initialize and transfer dynamic slice start indices parameter.
     ComputationDataHandle starts;
     std::unique_ptr<GlobalData> start_data = CreateR1Parameter<IndexT>(
         slice_starts, 0, "slice_starts", &builder, &starts);
     // Build dynamic slice computation.
-    auto input = builder.ConstantR1<DataT>(input_values);
-    auto update = builder.ConstantR1<DataT>(update_values);
+    auto input = builder.ConstantLiteral(input_values);
+    auto update = builder.ConstantLiteral(update_values);
     builder.DynamicUpdateSlice(input, update, starts);
     // Run computation and compare against expected values.
-    ComputeAndCompareR1<DataT>(&builder, expected_values, {start_data.get()});
+    ComputeAndCompareLiteral(&builder, expected_values, {start_data.get()});
   }
 
   template <typename IndexT, typename DataT>
-  void RunR2(const Array2D<DataT>& input_values,
-             const Array2D<DataT>& update_values,
+  void RunR2(const Array2D<int>& input_values_int,
+             const Array2D<int>& update_values_int,
              const std::vector<IndexT> slice_starts,
-             const Array2D<DataT>& expected_values) {
+             const Array2D<int>& expected_values_int) {
+    Literal input_values =
+        std::move(*Literal::CreateR2FromArray2D(input_values_int)
+                       ->Convert(primitive_util::NativeToPrimitiveType<DataT>())
+                       .ValueOrDie());
+    Literal update_values =
+        std::move(*Literal::CreateR2FromArray2D(update_values_int)
+                       ->Convert(primitive_util::NativeToPrimitiveType<DataT>())
+                       .ValueOrDie());
+    Literal expected_values =
+        std::move(*Literal::CreateR2FromArray2D(expected_values_int)
+                       ->Convert(primitive_util::NativeToPrimitiveType<DataT>())
+                       .ValueOrDie());
+
     ComputationBuilder builder(client_, TestName());
     // Initialize and transfer dynamic slice start indices parameter.
     ComputationDataHandle starts;
     std::unique_ptr<GlobalData> start_data = CreateR1Parameter<IndexT>(
         slice_starts, 0, "slice_starts", &builder, &starts);
     // Build dynamic slice computation.
-    auto input = builder.ConstantR2FromArray2D<DataT>(input_values);
-    auto update = builder.ConstantR2FromArray2D<DataT>(update_values);
+    auto input = builder.ConstantLiteral(input_values);
+    auto update = builder.ConstantLiteral(update_values);
     builder.DynamicUpdateSlice(input, update, starts);
     // Run computation and compare against expected values.
-    ComputeAndCompareR2<DataT>(&builder, expected_values, {start_data.get()});
+    ComputeAndCompareLiteral(&builder, expected_values, {start_data.get()});
   }
 
   template <typename IndexT, typename DataT>
-  void RunR3(const Array3D<DataT>& input_values,
-             const Array3D<DataT>& update_values,
+  void RunR3(const Array3D<int>& input_values_int,
+             const Array3D<int>& update_values_int,
              const std::vector<IndexT> slice_starts,
-             const Array3D<DataT>& expected_values) {
+             const Array3D<int>& expected_values_int) {
+    Literal input_values =
+        std::move(*Literal::CreateR3FromArray3D(input_values_int)
+                       ->Convert(primitive_util::NativeToPrimitiveType<DataT>())
+                       .ValueOrDie());
+    Literal update_values =
+        std::move(*Literal::CreateR3FromArray3D(update_values_int)
+                       ->Convert(primitive_util::NativeToPrimitiveType<DataT>())
+                       .ValueOrDie());
+    Literal expected_values =
+        std::move(*Literal::CreateR3FromArray3D(expected_values_int)
+                       ->Convert(primitive_util::NativeToPrimitiveType<DataT>())
+                       .ValueOrDie());
+
     ComputationBuilder builder(client_, TestName());
     // Initialize and transfer dynamic slice start indices parameter.
     ComputationDataHandle starts;
     std::unique_ptr<GlobalData> start_data = CreateR1Parameter<IndexT>(
         slice_starts, 0, "slice_starts", &builder, &starts);
     // Build dynamic slice computation.
-    auto input = builder.ConstantR3FromArray3D<DataT>(input_values);
-    auto update = builder.ConstantR3FromArray3D<DataT>(update_values);
+    auto input = builder.ConstantLiteral(input_values);
+    auto update = builder.ConstantLiteral(update_values);
     builder.DynamicUpdateSlice(input, update, starts);
     // Run computation and compare against expected values.
-    ComputeAndCompareR3<DataT>(&builder, expected_values, {start_data.get()});
+    ComputeAndCompareLiteral(&builder, expected_values, {start_data.get()});
   }
 
+  template <class T>
   void RunR3Contiguous(std::vector<int32> operand_shape, int32 index,
                        int32 size) {
+#ifdef XLA_TEST_BACKEND_CPU_PARALLEL
+    // TODO(b/71820067): The CPU parallel backend failed for this on 2018-01-10.
+    if (std::is_same<bfloat16, T>::value) {
+      return;
+    }
+#endif
+
     const int32 kSeq = operand_shape[0];
     const int32 kBatch = operand_shape[1];
     const int32 kDim = operand_shape[2];
-    Array3D<float> input_values(kSeq, kBatch, kDim);
-    Array3D<float> update_values(size, kBatch, kDim);
-    Array3D<float> expected_values(kSeq, kBatch, kDim);
+    Array3D<T> input_values(kSeq, kBatch, kDim);
+    Array3D<T> update_values(size, kBatch, kDim);
+    Array3D<T> expected_values(kSeq, kBatch, kDim);
 
-    input_values.FillIota(0);
-    float val = 1000;
-    update_values.FillIota(val);
+    input_values.FillIota(static_cast<T>(0));
+    T value = static_cast<T>(10);
+    update_values.FillIota(static_cast<T>(value));
 
     // TODO(b/34128753) Expected values may vary depending on backend when
     // the update wraps. According to documentation, the results are technically
     // implementation specific where the update is out of bounds, and hence
     // we don't really know what to pass into ComputeAndCompareR3.
-    expected_values.FillIota(0);
+    expected_values.FillIota(static_cast<T>(0));
     for (int i = 0; i < size; i++) {
       for (int j = 0; j < kBatch; j++) {
         for (int k = 0; k < kDim; k++) {
-          expected_values((index + i) % kSeq, j, k) = val++;
+          expected_values((index + i) % kSeq, j, k) = value++;
         }
       }
     }
     if (VLOG_IS_ON(1)) {
-      DumpArray<float>("input", input_values);
-      DumpArray<float>("update", update_values);
-      DumpArray<float>("expected", expected_values);
+      DumpArray<T>("input", input_values);
+      DumpArray<T>("update", update_values);
+      DumpArray<T>("expected", expected_values);
     }
 
     // Build dynamic slice computation.
     ComputationBuilder builder(client_, TestName());
     // Initialize and transfer input parameter.
     ComputationDataHandle input;
-    std::unique_ptr<GlobalData> input_data = CreateR3Parameter<float>(
-        input_values, 0, "input_values", &builder, &input);
+    std::unique_ptr<GlobalData> input_data =
+        CreateR3Parameter<T>(input_values, 0, "input_values", &builder, &input);
     // Initialize and transfer update parameter.
     ComputationDataHandle update;
-    std::unique_ptr<GlobalData> update_data = CreateR3Parameter<float>(
+    std::unique_ptr<GlobalData> update_data = CreateR3Parameter<T>(
         update_values, 1, "update_values", &builder, &update);
     auto starts = builder.ConstantR1<int32>({index, 0, 0});
     builder.DynamicUpdateSlice(input, update, starts);
 
     // Run computation and compare against expected values.
-    ComputeAndCompareR3<float>(&builder, expected_values,
-                               {input_data.get(), update_data.get()},
-                               ErrorSpec(0.000001));
+    ComputeAndCompareR3<T>(&builder, expected_values,
+                           {input_data.get(), update_data.get()},
+                           ErrorSpec(0.000001));
   }
 
   template <typename NativeT>
@@ -411,28 +500,35 @@ class DynamicUpdateSliceTest : public ClientLibraryTestBase {
   }
 };
 
+// TODO(b/71820067): The CPU parallel backend failed for this on 2018-01-10.
+XLA_TEST_F(DynamicUpdateSliceTest, DISABLED_ON_CPU_PARALLEL(Int32R1BF16)) {
+  TestR1<int32, bfloat16>();
+}
 XLA_TEST_F(DynamicUpdateSliceTest, Int32R1) { TestR1<int32, float>(); }
-
 XLA_TEST_F(DynamicUpdateSliceTest, Int64R1) { TestR1<int64, float>(); }
-
 XLA_TEST_F(DynamicUpdateSliceTest, UInt64R1) { TestR1<uint64, double>(); }
 
+// TODO(b/71820067): The CPU parallel backend failed for this on 2018-01-10.
+XLA_TEST_F(DynamicUpdateSliceTest, DISABLED_ON_CPU_PARALLEL(Int32R2BF16)) {
+  TestR2<int32, bfloat16>();
+}
 XLA_TEST_F(DynamicUpdateSliceTest, Int32R2) { TestR2<int32, float>(); }
-
 XLA_TEST_F(DynamicUpdateSliceTest, Int64R2) { TestR2<int64, int64>(); }
-
 XLA_TEST_F(DynamicUpdateSliceTest, UInt64R2) { TestR2<uint64, int32>(); }
 
+// TODO(b/71820067): The CPU parallel backend failed for this on 2018-01-10.
+XLA_TEST_F(DynamicUpdateSliceTest, DISABLED_ON_CPU_PARALLEL(Int32R3BF16)) {
+  TestR3<int32, bfloat16>();
+}
 XLA_TEST_F(DynamicUpdateSliceTest, Int32R3) { TestR3<int32, float>(); }
-
 XLA_TEST_F(DynamicUpdateSliceTest, Int64R3) { TestR3<int64, int64>(); }
-
 XLA_TEST_F(DynamicUpdateSliceTest, UInt64R3) { TestR3<uint64, uint64>(); }
 
+XLA_TEST_F(DynamicUpdateSliceTest, DISABLED_ON_CPU_PARALLEL(Int32WrapBF16)) {
+  TestWrap<int32, bfloat16>();
+}
 XLA_TEST_F(DynamicUpdateSliceTest, Int32Wrap) { TestWrap<int32, float>(); }
-
 XLA_TEST_F(DynamicUpdateSliceTest, Int64Wrap) { TestWrap<int64, int64>(); }
-
 XLA_TEST_F(DynamicUpdateSliceTest, UInt64Wrap) { TestWrap<uint64, uint64>(); }
 
 XLA_TEST_F(DynamicUpdateSliceTest, Int32R1Pred) {
@@ -498,36 +594,42 @@ XLA_TEST_F(DynamicUpdateSliceTest, Int32R3Pred) {
 XLA_TEST_F(DynamicUpdateSliceTest, R3ContiguousSingleElement) {
   // Single element, no wrap.
   std::vector<int32> operand_shape({4, 5, 2});
-  RunR3Contiguous(operand_shape, /*index=*/1, /*size=*/1);
+  RunR3Contiguous<float>(operand_shape, /*index=*/1, /*size=*/1);
+  RunR3Contiguous<bfloat16>(operand_shape, /*index=*/1, /*size=*/1);
 }
 
 XLA_TEST_F(DynamicUpdateSliceTest, R3ContiguousMultipleElements) {
   // Multiple element, no wrap.
   std::vector<int32> operand_shape({4, 5, 2});
-  RunR3Contiguous(operand_shape, /*index=*/1, /*size=*/2);
+  RunR3Contiguous<float>(operand_shape, /*index=*/1, /*size=*/2);
+  RunR3Contiguous<bfloat16>(operand_shape, /*index=*/1, /*size=*/2);
 }
 
 XLA_TEST_F(DynamicUpdateSliceTest, R3ContiguousMultipleWrapping) {
   // Multiple element, wrapping.
   std::vector<int32> operand_shape({4, 5, 2});
-  RunR3Contiguous(operand_shape, /*index=*/3, /*size=*/2);
+  RunR3Contiguous<float>(operand_shape, /*index=*/3, /*size=*/2);
+  RunR3Contiguous<bfloat16>(operand_shape, /*index=*/3, /*size=*/2);
 }
 
 XLA_TEST_F(DynamicUpdateSliceTest, R3ContiguousTooLarge) {
   // Multiple element, update size larger than operand.
   std::vector<int32> operand_shape({4, 5, 2});
-  RunR3Contiguous(operand_shape, /*index=*/5, /*size=*/2);
+  RunR3Contiguous<float>(operand_shape, /*index=*/5, /*size=*/2);
+  RunR3Contiguous<bfloat16>(operand_shape, /*index=*/5, /*size=*/2);
 }
 
 XLA_TEST_F(DynamicUpdateSliceTest, R3ContiguousUnaligned) {
   std::vector<int32> operand_shape({3, 123, 247});
-  RunR3Contiguous(operand_shape, /*index=*/1, /*size=*/1);
+  RunR3Contiguous<float>(operand_shape, /*index=*/1, /*size=*/1);
+  RunR3Contiguous<bfloat16>(operand_shape, /*index=*/1, /*size=*/1);
 }
 
 // TODO(b/34134076) Disabled on GPU 2016-01-06 due to out-of-memory error.
 XLA_TEST_F(DynamicUpdateSliceTest, DISABLED_ON_GPU(R3ContiguousLarger)) {
   std::vector<int32> operand_shape({32, 128, 1024});
-  RunR3Contiguous(operand_shape, /*index=*/7, /*size=*/1);
+  RunR3Contiguous<float>(operand_shape, /*index=*/7, /*size=*/1);
+  RunR3Contiguous<bfloat16>(operand_shape, /*index=*/7, /*size=*/1);
 }
 
 void BM_DynamicSlice(int num_iters) {
@@ -559,20 +661,20 @@ void BM_DynamicSlice(int num_iters) {
   auto computation = builder.Build().ConsumeValueOrDie();
 
   // Initialize and transfer parameter buffer.
-  auto shape_size_fn = [client](const Shape& shape) {
-    return client->backend().transfer_manager()->GetByteSizeRequirement(shape);
-  };
-  auto buffer = ScopedShapedBuffer::Allocate(start_indices_shape, &allocator, 0,
-                                             shape_size_fn)
+  auto buffer = client->backend()
+                    .transfer_manager()
+                    ->AllocateScopedShapedBuffer(
+                        start_indices_shape, &allocator, /*device_ordinal=*/0)
                     .ConsumeValueOrDie();
 
   auto start_indices_literal = Literal::CreateR1<int32>({0, 1, 2, 3});
   ASSERT_IS_OK(transfer_manager->TransferLiteralToDevice(
-      executors[device_ordinal], *start_indices_literal,
-      buffer->mutable_buffer({})));
+      executors[device_ordinal], *start_indices_literal, *buffer));
 
   std::unique_ptr<LocalExecutable> executable =
-      client->Compile(computation, {&buffer->shape()}, ExecutableBuildOptions())
+      client
+          ->Compile(computation, {&buffer->on_host_shape()},
+                    ExecutableBuildOptions())
           .ConsumeValueOrDie();
 
   // Run some warm-up executions.
diff --git a/tensorflow/compiler/xla/tests/execution_profile_test.cc b/tensorflow/compiler/xla/tests/execution_profile_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..644cbbf40f296eb2a574ae568b4f32aa3d0bd12f
--- /dev/null
+++ b/tensorflow/compiler/xla/tests/execution_profile_test.cc
@@ -0,0 +1,71 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/client/computation_builder.h"
+#include "tensorflow/compiler/xla/client/global_data.h"
+#include "tensorflow/compiler/xla/tests/client_library_test_base.h"
+#include "tensorflow/compiler/xla/tests/test_macros.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace xla {
+namespace {
+
+class ExecutionProfileTest : public ClientLibraryTestBase {};
+
+XLA_TEST_F(ExecutionProfileTest,
+           DISABLED_ON_CPU_PARALLEL(ExecuteWithExecutionProfile)) {
+  Shape shape = ShapeUtil::MakeShape(F32, {256, 256});
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<GlobalData> input,
+      client_->TransferToServer(
+          *Literal::CreateR2F32Linspace(1e0, 1e5, 256, 256)));
+
+  ComputationBuilder b(client_, TestName() + ".add");
+  b.Dot(b.Parameter(0, shape, "param_0"), b.Parameter(1, shape, "param_1"));
+  TF_ASSERT_OK_AND_ASSIGN(Computation dot_product, b.Build());
+
+  ExecutionProfile execution_profile;
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<GlobalData> data,
+      client_->Execute(dot_product, {input.get(), input.get()},
+                       &execution_options_, &execution_profile));
+
+  VLOG(3) << "execution_profile.compute_cycle_count() = "
+          << execution_profile.compute_cycle_count();
+  VLOG(3) << "execution_profile.compute_and_transfer_time_ns() = "
+          << execution_profile.compute_and_transfer_time_ns();
+  VLOG(3) << "execution_profile.compute_time_ns() = "
+          << execution_profile.compute_time_ns();
+
+  bool hlo_profiling_enabled =
+      execution_options_.debug_options().xla_hlo_profile();
+
+  // If HLO profiling is enabled we always expect cycle count to be populated.
+  // If HLO profiling is disabled then depending on the backend the cycle count
+  // may or may not be populated.
+  if (hlo_profiling_enabled) {
+    EXPECT_GT(execution_profile.compute_cycle_count(), 0);
+  }
+
+  EXPECT_GT(execution_profile.compute_and_transfer_time_ns(), 0);
+  EXPECT_GT(execution_profile.compute_time_ns(), 0);
+
+  TF_ASSERT_OK_AND_ASSIGN(auto computed, client_->Transfer(*data, &shape));
+  (void)computed;
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/fusion_test.cc b/tensorflow/compiler/xla/tests/fusion_test.cc
index 2686afccc216095345dbb7b43e916fbbe7c8ea39..a292eab1d198fbf69c6dc81c780487ea46756f72 100644
--- a/tensorflow/compiler/xla/tests/fusion_test.cc
+++ b/tensorflow/compiler/xla/tests/fusion_test.cc
@@ -816,7 +816,8 @@ void BM_ParallelFusion(int num_iters) {
   std::unique_ptr<LocalExecutable> executable =
       client
           ->Compile(computation,
-                    {&buffer0->shape(), &buffer1->shape(), &buffer2->shape()},
+                    {&buffer0->on_host_shape(), &buffer1->on_host_shape(),
+                     &buffer2->on_host_shape()},
                     ExecutableBuildOptions())
           .ConsumeValueOrDie();
 
diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.cc b/tensorflow/compiler/xla/tests/hlo_test_base.cc
index d73c05ff92578209143e0679558848160cae99bd..7c1a993b478a0e0878e85c0e4192da053e33619f 100644
--- a/tensorflow/compiler/xla/tests/hlo_test_base.cc
+++ b/tensorflow/compiler/xla/tests/hlo_test_base.cc
@@ -15,13 +15,22 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/tests/hlo_test_base.h"
 
+#include <memory>
 #include <set>
 #include <string>
 #include <utility>
 
+#include "tensorflow/compiler/xla/layout_util.h"
 #include "tensorflow/compiler/xla/legacy_flags/debug_options_flags.h"
 #include "tensorflow/compiler/xla/ptr_util.h"
+#include "tensorflow/compiler/xla/service/platform_util.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/tests/literal_test_util.h"
+#include "tensorflow/compiler/xla/tests/test_utils.h"
+#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h"
 #include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/types.h"
@@ -30,44 +39,235 @@ namespace se = ::perftools::gputools;
 
 namespace xla {
 
+namespace {
+
+using tensorflow::StringPiece;
+using tensorflow::gtl::ArraySlice;
+using tensorflow::gtl::optional;
+
+constexpr char kInterpreter[] = "interpreter";
+
+// Helper functions to get test and reference platforms.
+se::Platform* GetReferencePlatform() {
+  auto result = PlatformUtil::GetPlatform(kInterpreter);
+  TF_CHECK_OK(result.status()) << "could not get interpreter platform";
+  return result.ValueOrDie();
+}
+
+se::Platform* GetTestPlatform() {
+  auto result = PlatformUtil::GetDefaultPlatform();
+  TF_CHECK_OK(result.status()) << "could not get test platform";
+  return result.ValueOrDie();
+}
+
+bool ProgramShapesEqual(const ProgramShape& lhs, const ProgramShape& rhs) {
+  if (lhs.parameters_size() != rhs.parameters_size()) {
+    return false;
+  }
+  for (int i = 0; i < lhs.parameters_size(); i++) {
+    if (!ShapeUtil::Equal(lhs.parameters(i), rhs.parameters(i))) {
+      return false;
+    }
+  }
+  return ShapeUtil::Equal(lhs.result(), rhs.result());
+}
+
+ProgramShape GetProgramShapeWithLayout(const HloModule& module) {
+  ProgramShape program_shape;
+  const auto* entry = module.entry_computation();
+  for (const auto* param : entry->parameter_instructions()) {
+    *program_shape.add_parameters() = param->shape();
+    *program_shape.add_parameter_names() = param->name();
+  }
+  *program_shape.mutable_result() = entry->root_instruction()->shape();
+  return program_shape;
+}
+
+}  // namespace
+
+HloTestBase::HloTestBase()
+    : HloTestBase(GetTestPlatform(), GetReferencePlatform()) {}
+
+HloTestBase::HloTestBase(se::Platform* test_platform,
+                         se::Platform* reference_platform)
+    : test_runner_(test_platform), reference_runner_(reference_platform) {
+  hlo_verifier_ = MakeUnique<HloVerifier>();
+}
+
 /* static */
 std::unique_ptr<HloModule> HloTestBase::CreateNewModule() {
   HloModuleConfig config;
+  config.set_debug_options(GetDebugOptionsForTest());
+  return MakeUnique<HloModule>(TestName(), VersionedComputationHandle(),
+                               config);
+}
 
+/*static*/ DebugOptions HloTestBase::GetDebugOptionsForTest() {
   auto debug_options = legacy_flags::GetDebugOptionsFromFlags();
   // TODO(b/38354253): Change tests to use Parameters instead of Constants.
   debug_options.add_xla_disable_hlo_passes("constant_folding");
+  return debug_options;
+}
 
-  config.set_debug_options(debug_options);
-
-  return MakeUnique<HloModule>(TestName(), VersionedComputationHandle(),
-                               config);
+StatusOr<std::unique_ptr<Literal>> HloTestBase::Execute(
+    std::unique_ptr<HloModule> module,
+    tensorflow::gtl::ArraySlice<Literal*> arguments) {
+  return test_runner_.Execute(std::move(module), arguments);
 }
 
-StatusOr<perftools::gputools::DeviceMemoryBase> HloTestBase::Execute(
+std::unique_ptr<Literal> HloTestBase::ExecuteAndTransfer(
     std::unique_ptr<HloModule> module,
-    tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-        arguments,
-    Shape* result_shape) {
-  return runner_.Execute(std::move(module), arguments, result_shape);
+    tensorflow::gtl::ArraySlice<Literal*> arguments) {
+  return test_runner_.Execute(std::move(module), arguments).ValueOrDie();
 }
 
-se::DeviceMemoryBase HloTestBase::TransferToDevice(const Literal& literal) {
-  return runner_.TransferToDevice(literal).ValueOrDie();
+StatusOr<std::unique_ptr<HloModule>> HloTestBase::MakeReferenceModule(
+    const HloModule& test_module,
+    const std::function<void(HloModule*)>& reference_preprocessor) {
+  std::unique_ptr<HloModule> reference_module = test_module.Clone();
+  const auto& program_shape = GetProgramShapeWithLayout(test_module);
+
+  if (reference_preprocessor != nullptr) {
+    reference_preprocessor(reference_module.get());
+    if (!ProgramShapesEqual(program_shape,
+                            GetProgramShapeWithLayout(*reference_module))) {
+      return InvalidArgument(
+          "reference preprocessor must not modify the program shape");
+    }
+  }
+  TF_RETURN_IF_ERROR(VerifyHloModule(*reference_runner_.backend().platform(),
+                                     reference_module.get()));
+  return std::move(reference_module);
 }
 
-std::unique_ptr<Literal> HloTestBase::TransferFromDevice(
-    const Shape& shape, se::DeviceMemoryBase device_base) {
-  return runner_.TransferFromDevice(shape, device_base).ValueOrDie();
+template <typename LiteralPtr>
+StatusOr<::testing::AssertionResult> HloTestBase::RunAndCompareInternal(
+    std::unique_ptr<HloModule> module, const ArraySlice<LiteralPtr> arguments,
+    const optional<ErrorSpec>& error, bool run_hlo_passes,
+    const std::function<void(HloModule*)>& reference_preprocessor) {
+  static_assert(
+      std::is_same<Literal*, LiteralPtr>::value ||
+          std::is_same<std::unique_ptr<Literal>, LiteralPtr>::value,
+      "The LiteralPtr type only accepts Literal* or std::unique_ptr<Literal>.");
+  TF_RETURN_IF_ERROR(
+      VerifyHloModule(*test_runner_.backend().platform(), module.get()));
+  TF_ASSIGN_OR_RETURN(auto reference_module,
+                      MakeReferenceModule(*module, reference_preprocessor));
+
+  // Execute on two backends.
+  TF_ASSIGN_OR_RETURN(
+      auto test,
+      test_runner_.Execute(std::move(module), arguments, run_hlo_passes));
+  TF_ASSIGN_OR_RETURN(auto reference,
+                      reference_runner_.Execute(std::move(reference_module),
+                                                arguments, run_hlo_passes));
+  return LiteralTestUtil::NearOrEqual(/*expected=*/*reference, /*actual=*/*test,
+                                      error);
 }
 
-std::unique_ptr<Literal> HloTestBase::ExecuteAndTransfer(
-    std::unique_ptr<HloModule> module,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> arguments) {
-  return runner_.ExecuteAndTransfer(std::move(module), arguments).ValueOrDie();
+template <typename LiteralPtr>
+::testing::AssertionResult HloTestBase::RunAndCompare(
+    std::unique_ptr<HloModule> module, const ArraySlice<LiteralPtr> arguments,
+    const optional<ErrorSpec>& error,
+    const std::function<void(HloModule*)>& reference_preprocessor) {
+  auto result =
+      RunAndCompareInternal(std::move(module), arguments, error,
+                            /*run_hlo_passes=*/true, reference_preprocessor);
+  if (!result.ok()) {
+    return ::testing::AssertionFailure() << result.status();
+  }
+  return result.ValueOrDie();
+}
+
+template <typename LiteralPtr>
+::testing::AssertionResult HloTestBase::RunAndCompareNoHloPasses(
+    std::unique_ptr<HloModule> module, const ArraySlice<LiteralPtr> arguments,
+    const optional<ErrorSpec>& error,
+    const std::function<void(HloModule*)>& reference_preprocessor) {
+  auto result =
+      RunAndCompareInternal(std::move(module), arguments, error,
+                            /*run_hlo_passes=*/false, reference_preprocessor);
+  if (!result.ok()) {
+    return ::testing::AssertionFailure() << result.status();
+  }
+  return result.ValueOrDie();
+}
+
+::testing::AssertionResult HloTestBase::RunAndCompare(
+    std::unique_ptr<HloModule> module, const optional<ErrorSpec>& error,
+    const std::function<void(HloModule*)>& reference_preprocessor) {
+  const auto& fake_arguments =
+      MakeFakeArguments(module.get()).ConsumeValueOrDie();
+  return RunAndCompare<std::unique_ptr<Literal>>(
+      std::move(module), fake_arguments, error, reference_preprocessor);
+}
+
+::testing::AssertionResult HloTestBase::RunAndCompareNoHloPasses(
+    std::unique_ptr<HloModule> module, const optional<ErrorSpec>& error,
+    const std::function<void(HloModule*)>& reference_preprocessor) {
+  const auto& fake_arguments =
+      MakeFakeArguments(module.get()).ConsumeValueOrDie();
+  return RunAndCompareNoHloPasses<std::unique_ptr<Literal>>(
+      std::move(module), fake_arguments, error, reference_preprocessor);
+}
+
+::testing::AssertionResult HloTestBase::RunAndCompare(
+    const StringPiece hlo_string,
+    const tensorflow::gtl::optional<ErrorSpec>& error,
+    const std::function<void(HloModule*)>& reference_preprocessor) {
+  auto module_or_status =
+      HloRunner::CreateModuleFromString(hlo_string, GetDebugOptionsForTest());
+  if (!module_or_status.ok()) {
+    return ::testing::AssertionFailure()
+           << "Error while parsing HLO text format: "
+           << module_or_status.status().ToString();
+  }
+  return RunAndCompare(module_or_status.ConsumeValueOrDie(), error,
+                       reference_preprocessor);
+}
+
+::testing::AssertionResult HloTestBase::RunAndCompareFromFile(
+    const string& filename, const tensorflow::gtl::optional<ErrorSpec>& error,
+    const std::function<void(HloModule*)>& reference_preprocessor) {
+  auto module_or_status =
+      HloRunner::ReadModule(filename, GetDebugOptionsForTest());
+  if (!module_or_status.ok()) {
+    return ::testing::AssertionFailure()
+           << "failed reading hlo module from file";
+  }
+  return RunAndCompare(module_or_status.ConsumeValueOrDie(), error,
+                       reference_preprocessor);
+}
+
+::testing::AssertionResult HloTestBase::RunAndCompareNoHloPasses(
+    const StringPiece hlo_string,
+    const tensorflow::gtl::optional<ErrorSpec>& error,
+    const std::function<void(HloModule*)>& reference_preprocessor) {
+  auto module_or_status =
+      HloRunner::CreateModuleFromString(hlo_string, GetDebugOptionsForTest());
+  if (!module_or_status.ok()) {
+    return ::testing::AssertionFailure()
+           << "Error while parsing HLO text format: "
+           << module_or_status.status().ToString();
+  }
+  return RunAndCompareNoHloPasses(module_or_status.ConsumeValueOrDie(), error,
+                                  reference_preprocessor);
+}
+
+::testing::AssertionResult HloTestBase::RunAndCompareNoHloPassesFromFile(
+    const string& filename, const tensorflow::gtl::optional<ErrorSpec>& error,
+    const std::function<void(HloModule*)>& reference_preprocessor) {
+  auto module_or_status =
+      HloRunner::ReadModule(filename, GetDebugOptionsForTest());
+  if (!module_or_status.ok()) {
+    return ::testing::AssertionFailure()
+           << "failed reading hlo module from file";
+  }
+  return RunAndCompareNoHloPasses(module_or_status.ConsumeValueOrDie(), error,
+                                  reference_preprocessor);
 }
 
-Backend& HloTestBase::backend() { return runner_.backend(); }
+Backend& HloTestBase::backend() { return test_runner_.backend(); }
 
 /* static */
 string HloTestBase::TestName() {
diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.h b/tensorflow/compiler/xla/tests/hlo_test_base.h
index 7f068dce36be3546298de2f06bf6d33446d07ca2..4aea9fc9fd027231106e529eb16bcd43f23fbe1c 100644
--- a/tensorflow/compiler/xla/tests/hlo_test_base.h
+++ b/tensorflow/compiler/xla/tests/hlo_test_base.h
@@ -24,52 +24,150 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/computation_layout.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_runner.h"
+#include "tensorflow/compiler/xla/service/hlo_verifier.h"
+#include "tensorflow/compiler/xla/service/platform_util.h"
 #include "tensorflow/compiler/xla/shape_layout.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/tests/literal_test_util.h"
+#include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/lib/gtl/optional.h"
 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
 #include "tensorflow/core/platform/test.h"
 
 namespace xla {
 
-// A base class for tests which build and run HLO code. This is a lower level of
-// abstraction than using the client interface and enables, for one, explicitly
-// building a graph of HLO instructions to run.
+// A base class for tests which build and/or run HLO code. The class includes
+// support for running an HLO module on two platforms and compare the results.
+// This is a lower level of abstraction than using the client interface and
+// enables, for one, explicitly building a graph of HLO instructions to run.
+//
+// This can also be used to write text/file-based test cases. Note that the test
+// target is responsible for linking the needed backends. A covenient way to do
+// this is to make it an xla_test: it will generate test targets linking with
+// the respective backends, which will be used as the test backend; the
+// interpreter backend is already linked with hlo_test_base so it will be the
+// default reference backend. For example, if you want to compare both cpu vs.
+// interpreter, and gpu vs. interpreter, you can:
+//
+//  xla_test (
+//    name = "sample_text_test",
+//    srcs = ["sample_text_test.cc"],
+//    backends = [
+//      "cpu",
+//      "gpu",
+//    ],
+//    deps = [
+//      "//third_party/tensorflow/compiler/xla/tests:hlo_test_base",
+//      ...
+//    ],
+//  )
+//
+// For a more detailed example, see "../tests/sample_text_test.cc".
 class HloTestBase : public ::testing::Test {
  protected:
-  HloTestBase() {}
+  // This uses the interpreter backend as the reference backend and
+  // automatically finds another supported backend as the test backend. If the
+  // interpreter is the only supported backend, it will be both the test backend
+  // and the reference backend.
+  HloTestBase();
+
+  // If your test doesn't use interpreter as the reference backend, you can use
+  // this constructor. Note that your test target is responsible for linking in
+  // both needed backends.
+  HloTestBase(::perftools::gputools::Platform* test_platform,
+              ::perftools::gputools::Platform* reference_platform);
 
   ~HloTestBase() override {}
 
   // Creates a new HLO module for a test. The module created will have
   // TestName() for its name; it will also automatically populate its debug
-  // options from command-line flags. It's recommended to use this method to
-  // create all HloModules for tests.
+  // options from command-line flags. If you want a fresh HloModule object and
+  // then add HloComputations to it, it's recommended to use this method in your
+  // tests.
   static std::unique_ptr<HloModule> CreateNewModule();
 
-  // Executes the given module and returns a global data handle.
-  StatusOr<perftools::gputools::DeviceMemoryBase> Execute(
+  // Populates debug options from command-line flags and adjusts the options for
+  // testing. It is recommended to use this when you need to pass in
+  // DebugOptions, e.g. when creating a module from a string or a file.
+  static DebugOptions GetDebugOptionsForTest();
+
+  // Executes the given module and return the result as a Literal.
+  StatusOr<std::unique_ptr<Literal>> Execute(
       std::unique_ptr<HloModule> module,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments,
-      Shape* result_shape);
+      tensorflow::gtl::ArraySlice<Literal*> arguments);
 
-  // Transfers the given literal to the device and returns the data handle.
-  perftools::gputools::DeviceMemoryBase TransferToDevice(
-      const Literal& literal);
+  std::unique_ptr<Literal> ExecuteAndTransfer(
+      std::unique_ptr<HloModule> module,
+      tensorflow::gtl::ArraySlice<Literal*> arguments);
+
+  // Executes the given hlo module on two backends and compares results.
+  //
+  // 'arguments': the input of the hlo module. The LiteralPtr type accepts
+  // Literal* or std::unique_ptr<Literal>.
+  //
+  // 'error': if has value, expects the results to be near (within the error
+  // bound). Otherwise, expects the results to be equal.
+  //
+  // 'reference_preprocessor': the module should be ready to run on the test
+  // backend, but it might need to be tailored so that it is able to run on the
+  // reference backend. Note that the program shape of the module must not be
+  // modified.
+  template <typename LiteralPtr>
+  ::testing::AssertionResult RunAndCompare(
+      std::unique_ptr<HloModule> module,
+      const tensorflow::gtl::ArraySlice<LiteralPtr> arguments,
+      const tensorflow::gtl::optional<ErrorSpec>& error,
+      const std::function<void(HloModule*)>& reference_preprocessor = nullptr)
+      TF_MUST_USE_RESULT;
+
+  // Same as above, except that the module will be executed without Hlo
+  // optimization.
+  template <typename LiteralPtr>
+  ::testing::AssertionResult RunAndCompareNoHloPasses(
+      std::unique_ptr<HloModule> module,
+      const tensorflow::gtl::ArraySlice<LiteralPtr> arguments,
+      const tensorflow::gtl::optional<ErrorSpec>& error,
+      const std::function<void(HloModule*)>& reference_preprocessor = nullptr)
+      TF_MUST_USE_RESULT;
 
-  // Transfers the array referred to by the given handle from the device and
-  // returns as a Literal.
-  std::unique_ptr<Literal> TransferFromDevice(
-      const Shape& shape, perftools::gputools::DeviceMemoryBase device_base);
+  // Executes an hlo module with fake inputs and compares the results.
+  ::testing::AssertionResult RunAndCompare(
+      std::unique_ptr<HloModule> module,
+      const tensorflow::gtl::optional<ErrorSpec>& error,
+      const std::function<void(HloModule*)>& reference_preprocessor = nullptr)
+      TF_MUST_USE_RESULT;
 
-  // Executes the given module and return the result as a Literal.
-  std::unique_ptr<Literal> ExecuteAndTransfer(
+  // Same as above, except that the module will be executed without Hlo
+  // optimization.
+  ::testing::AssertionResult RunAndCompareNoHloPasses(
       std::unique_ptr<HloModule> module,
-      tensorflow::gtl::ArraySlice<perftools::gputools::DeviceMemoryBase>
-          arguments);
+      const tensorflow::gtl::optional<ErrorSpec>& error,
+      const std::function<void(HloModule*)>& reference_preprocessor = nullptr)
+      TF_MUST_USE_RESULT;
+
+  // Convenient wrappers for executing and comparing an hlo module with fake
+  // input. Module can be passed in directly, or parsed from an hlo_string,
+  // or loaded from a file.
+  ::testing::AssertionResult RunAndCompare(
+      const tensorflow::StringPiece hlo_string,
+      const tensorflow::gtl::optional<ErrorSpec>& error,
+      const std::function<void(HloModule*)>& reference_preprocessor = nullptr)
+      TF_MUST_USE_RESULT;
+  ::testing::AssertionResult RunAndCompareFromFile(
+      const string& filename, const tensorflow::gtl::optional<ErrorSpec>& error,
+      const std::function<void(HloModule*)>& reference_preprocessor = nullptr)
+      TF_MUST_USE_RESULT;
+  ::testing::AssertionResult RunAndCompareNoHloPasses(
+      const tensorflow::StringPiece hlo_string,
+      const tensorflow::gtl::optional<ErrorSpec>& error,
+      const std::function<void(HloModule*)>& reference_preprocessor = nullptr)
+      TF_MUST_USE_RESULT;
+  ::testing::AssertionResult RunAndCompareNoHloPassesFromFile(
+      const string& filename, const tensorflow::gtl::optional<ErrorSpec>& error,
+      const std::function<void(HloModule*)>& reference_preprocessor = nullptr)
+      TF_MUST_USE_RESULT;
 
   // Convenience method to force the layout of a given parameter in a module.
   // The layout of parameter number 'param_no' in the 'module' is set to
@@ -99,14 +197,38 @@ class HloTestBase : public ::testing::Test {
         ->Clear();
   }
 
+  // Return an HLO verifier constructed for the test backend.
+  HloVerifier& verifier() const { return *hlo_verifier_; }
+
   static string TestName();
 
-  // Returns the backend owned by the HloRunner.
+  // Returns the backend owned by the test runner.
   Backend& backend();
 
-  HloRunner runner_;
+  HloRunner test_runner_;
+  HloRunner reference_runner_;
+
+  std::unique_ptr<HloVerifier> hlo_verifier_;
 
   ErrorSpec error_spec_{0.0001};
+
+ private:
+  // Given the test module, makes a reference module that is ready to run on the
+  // reference platform. This assumes that the given module is ready to run on
+  // the test platform.
+  StatusOr<std::unique_ptr<HloModule>> MakeReferenceModule(
+      const HloModule& test_module,
+      const std::function<void(HloModule*)>& reference_preprocessor);
+
+  // Runs the module on two platforms with or without running hlo passes and
+  // compares the results. Returns whether the results are near or equal. If any
+  // error happens before the results are computed, returns the error status.
+  template <typename LiteralPtr>
+  StatusOr<::testing::AssertionResult> RunAndCompareInternal(
+      std::unique_ptr<HloModule> module,
+      const tensorflow::gtl::ArraySlice<LiteralPtr> arguments,
+      const tensorflow::gtl::optional<ErrorSpec>& error, bool run_hlo_passes,
+      const std::function<void(HloModule*)>& reference_preprocessor);
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc b/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc
index 31060b9e80fcd50aefdedca27c70ec8a9b8be743..506091ddd8d1d8e6519525bb7031f4e8b296b5fb 100644
--- a/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc
+++ b/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc
@@ -23,15 +23,8 @@ limitations under the License.
 
 namespace xla {
 
-/*static*/ int64 HloVerifiedTestBase::DefaultShapeSize(const Shape& shape) {
-  constexpr int64 kPointerSize = sizeof(void*);
-  if (ShapeUtil::IsOpaque(shape)) {
-    return kPointerSize;
-  }
-  return ShapeUtil::ByteSizeOf(shape, kPointerSize);
-}
-
-HloVerifiedTestBase::HloVerifiedTestBase() : shape_size_fn_(DefaultShapeSize) {}
+HloVerifiedTestBase::HloVerifiedTestBase()
+    : shape_verifier_(MakeUnique<ShapeVerifier>()) {}
 
 HloVerifiedTestBase::~HloVerifiedTestBase() {
   // We can't call the ASSERT or EXPECT test macros in destructors, so we
@@ -47,7 +40,7 @@ void HloVerifiedTestBase::TearDown() {
       << "TearDown called more than once; it should be called exactly once.";
   tear_down_called_ = true;
   if (module_) {
-    HloVerifier verifier(shape_size_fn_);
+    HloVerifier verifier;
     xla::StatusOr<bool> mutated = verifier.Run(module_.get());
     if (!mutated.ok()) {
       ADD_FAILURE() << "HloVerifier failed: " << mutated.status();
diff --git a/tensorflow/compiler/xla/tests/hlo_verified_test_base.h b/tensorflow/compiler/xla/tests/hlo_verified_test_base.h
index b3d6b5af3b46f932707abf309669d23c327d1334..492688bf7d682cf991cb8c09399492a0437f651b 100644
--- a/tensorflow/compiler/xla/tests/hlo_verified_test_base.h
+++ b/tensorflow/compiler/xla/tests/hlo_verified_test_base.h
@@ -28,14 +28,13 @@ namespace xla {
 // A base class for HLO tests that stores a default HloModule, and automatically
 // performs verification on that module on tear-down.
 class HloVerifiedTestBase : public HloTestBase {
- public:
-  // Returns the size in bytes of the given shape, using a default pointer size.
-  static int64 DefaultShapeSize(const Shape& shape);
-
  protected:
   HloVerifiedTestBase();
   ~HloVerifiedTestBase() override;
 
+  // Constructs a default shape verifier.
+  std::unique_ptr<ShapeVerifier> MakeShapeVerifier();
+
   // Performs verification on the default HloModule returned by module().
   // Automatically called by the testing framework for each test.
   //
@@ -47,14 +46,14 @@ class HloVerifiedTestBase : public HloTestBase {
   HloModule& module();
 
   // Sets the shape-size function used during hlo verification. If this isn't
-  // called, DefaultShapeSize is used instead.
-  void SetShapeSizeFn(std::function<int64(const Shape&)> shape_size_fn) {
-    shape_size_fn_ = std::move(shape_size_fn);
+  // called, a default ShapeVerifier is used instead.
+  void SetShapeVerifier(std::unique_ptr<ShapeVerifier> shape_verifier) {
+    shape_verifier_ = std::move(shape_verifier);
   }
 
  private:
   std::unique_ptr<HloModule> module_;  // Lazily populated. Access via module().
-  std::function<int64(const Shape&)> shape_size_fn_;
+  std::unique_ptr<ShapeVerifier> shape_verifier_;
   bool tear_down_called_ = false;
 };
 
diff --git a/tensorflow/compiler/xla/tests/isolated_convolution.hlo b/tensorflow/compiler/xla/tests/isolated_convolution.hlo
new file mode 100644
index 0000000000000000000000000000000000000000..9452780930efbb1ecc13b35cd4ab53678d36c37f
--- /dev/null
+++ b/tensorflow/compiler/xla/tests/isolated_convolution.hlo
@@ -0,0 +1,8 @@
+HloModule convolution.167:
+
+ENTRY %convolution.167 (parameter.0: f32[16,28,28,128], parameter.1: f32[3,3,128,128]) -> f32[16,28,28,128] {
+  %parameter.0 = f32[16,28,28,128]{3,0,2,1} parameter(0)
+  %parameter.1 = f32[3,3,128,128]{3,2,1,0} parameter(1)
+  ROOT %convolution.167 = f32[16,28,28,128]{3,0,2,1} convolution(f32[16,28,28,128]{3,0,2,1} %parameter.0, f32[3,3,128,128]{3,2,1,0} %parameter.1), window={size=3x3 pad=1_1x1_1}, dim_labels=b01f_01oi->b01f
+}
+
diff --git a/tensorflow/compiler/xla/tests/literal_test_util.cc b/tensorflow/compiler/xla/tests/literal_test_util.cc
index 6aa27e5470d22a8c6698389a720a38e9ea254617..e5b96c51ce303819e33d67f5f383c119d313bae1 100644
--- a/tensorflow/compiler/xla/tests/literal_test_util.cc
+++ b/tensorflow/compiler/xla/tests/literal_test_util.cc
@@ -57,7 +57,8 @@ namespace xla {
     }
     for (int i = 0; i < expected.tuple_shapes_size(); ++i) {
       ::testing::AssertionResult result =
-          EqualShapes(expected.tuple_shapes(i), actual.tuple_shapes(i));
+          EqualShapes(expected.tuple_shapes(i), actual.tuple_shapes(i))
+          << "mismatch in tuple index " << i;
       if (!result) {
         return result;
       }
@@ -100,36 +101,57 @@ namespace xla {
   ASSERT_EQ(expected.ShortDebugString(), actual.ShortDebugString());
 }
 
+namespace {
+
+// Return a literal with all arrays of type FromNativeT converted to type
+// ToNativeT in the given literal.
+template <typename FromNativeT, typename ToNativeT>
+std::unique_ptr<Literal> ConvertType(const Literal& literal) {
+  // First construct shape of the result.
+  Shape result_shape(literal.shape());
+  ShapeUtil::ForEachMutableSubshape(
+      &result_shape, [](Shape* subshape, const ShapeIndex&) {
+        if (subshape->element_type() ==
+            primitive_util::NativeToPrimitiveType<FromNativeT>()) {
+          subshape->set_element_type(
+              primitive_util::NativeToPrimitiveType<ToNativeT>());
+        }
+      });
+  auto result = MakeUnique<Literal>(result_shape);
+
+  // Then copy over the data from 'literal' converting FromNativeT values to
+  // ToNativeT values as necessary.
+  ShapeUtil::ForEachSubshape(
+      literal.shape(),
+      [&](const Shape& subshape, const ShapeIndex& shape_index) {
+        if (ShapeUtil::IsArray(subshape)) {
+          if (subshape.element_type() ==
+              primitive_util::NativeToPrimitiveType<FromNativeT>()) {
+            auto src = literal.data<FromNativeT>(shape_index);
+            auto dest = result->data<ToNativeT>(shape_index);
+            for (int64 i = 0; i < src.size(); ++i) {
+              dest[i] = static_cast<ToNativeT>(src[i]);
+            }
+          } else {
+            TF_CHECK_OK(result->CopyFrom(literal,
+                                         /*dest_shape_index=*/shape_index,
+                                         /*src_shape_index=*/shape_index));
+          }
+        }
+      });
+  return result;
+}
+
+}  // namespace
+
 /* static */ std::unique_ptr<Literal> LiteralTestUtil::ConvertBF16ToF32(
-    const Literal& bf16_literal) {
-  CHECK_EQ(bf16_literal.shape().element_type(), BF16);
-  Shape converted_shape = bf16_literal.shape();
-  converted_shape.set_element_type(F32);
-  auto converted = Literal::CreateFromShape(converted_shape);
-  if (!ShapeUtil::HasZeroElements(converted_shape)) {
-    std::vector<int64> index(converted_shape.dimensions_size(), 0);
-    do {
-      converted->Set<float>(
-          index, static_cast<float>(bf16_literal.Get<bfloat16>(index)));
-    } while (IndexUtil::BumpIndices(converted_shape, &index));
-  }
-  return converted;
+    const Literal& literal) {
+  return ConvertType<bfloat16, float>(literal);
 }
 
 /* static */ std::unique_ptr<Literal> LiteralTestUtil::ConvertF32ToBF16(
-    const Literal& f32_literal) {
-  CHECK_EQ(f32_literal.shape().element_type(), F32);
-  Shape converted_shape = f32_literal.shape();
-  converted_shape.set_element_type(BF16);
-  auto converted = Literal::CreateFromShape(converted_shape);
-  if (!ShapeUtil::HasZeroElements(converted_shape)) {
-    std::vector<int64> index(converted_shape.dimensions_size(), 0);
-    do {
-      converted->Set<bfloat16>(
-          index, static_cast<bfloat16>(f32_literal.Get<float>(index)));
-    } while (IndexUtil::BumpIndices(converted_shape, &index));
-  }
-  return converted;
+    const Literal& literal) {
+  return ConvertType<float, bfloat16>(literal);
 }
 
 namespace {
@@ -290,9 +312,10 @@ bool ExpectLiteralsEqual(const Literal& expected, const Literal& actual,
       break;
     case TUPLE: {
       bool tuple_match = true;
-      for (int i = 0; i < actual.tuple_literals_size(); ++i) {
-        auto result =
-            Equal(expected.tuple_literals(i), actual.tuple_literals(i));
+      for (int i = 0; i < ShapeUtil::TupleElementCount(expected.shape()); ++i) {
+        // Create LiteralViews of the expected and actual elements.
+        auto result = Equal(LiteralView::Create(expected, {i}),
+                            LiteralView::Create(actual, {i}));
         tuple_match = tuple_match ? !!result : false;
       }
       match = tuple_match;
@@ -313,23 +336,45 @@ bool ExpectLiteralsEqual(const Literal& expected, const Literal& actual,
   return result;
 }
 
-/* static */ void LiteralTestUtil::ExpectEqualTuple(const Literal& expected,
-                                                    const Literal& actual) {
+/* static */ ::testing::AssertionResult LiteralTestUtil::EqualTuple(
+    const Literal& expected, const Literal& actual) {
   VLOG(1) << "expected: " << expected.ToString();
   VLOG(1) << "actual:   " << actual.ToString();
 
-  ASSERT_TRUE(ShapeUtil::IsTuple(expected.shape()));
-  ASSERT_TRUE(ShapeUtil::IsTuple(actual.shape()));
+  if (!ShapeUtil::IsTuple(expected.shape()) ||
+      !ShapeUtil::IsTuple(actual.shape())) {
+    return ::testing::AssertionFailure()
+           << "tuples expected shape = " << expected.shape().ShortDebugString()
+           << " actual shape = " << actual.shape().ShortDebugString();
+  }
   AssertEqualShapes(expected.shape(), actual.shape());
-  for (uint64 i = 0; i < expected.tuple_literals_size(); ++i) {
-    const auto& expected_element = expected.tuple_literals(i);
-    const auto& actual_element = actual.tuple_literals(i);
-    if (ShapeUtil::IsTuple(expected_element.shape())) {
-      ExpectEqualTuple(expected_element, actual_element);
-    } else {
-      ExpectEqual(expected_element, actual_element);
+
+  ::testing::AssertionResult err = ::testing::AssertionSuccess();
+  for (int64 i = 0; i < ShapeUtil::TupleElementCount(expected.shape()); ++i) {
+    SCOPED_TRACE(tensorflow::strings::StrCat(
+        "Tuple index ", i, " in ", ShapeUtil::HumanString(expected.shape())));
+    const auto expected_element = LiteralView::Create(expected, {i});
+    const auto actual_element = LiteralView::Create(actual, {i});
+
+    ::testing::AssertionResult res = [&] {
+      if (ShapeUtil::IsTuple(expected_element.shape())) {
+        return EqualTuple(expected_element, actual_element);
+      } else {
+        return Equal(expected_element, actual_element);
+      }
+    }();
+
+    if (!res && err) {
+      err = res;
     }
   }
+
+  return err;
+}
+
+/* static */ void LiteralTestUtil::ExpectEqualTuple(const Literal& expected,
+                                                    const Literal& actual) {
+  EXPECT_TRUE(EqualTuple(expected, actual));
 }
 
 namespace {
@@ -365,10 +410,7 @@ class NearComparator {
     abs_expected_miscompare_sum_ = 0.0;
     max_rel_err_ = 0.0;
     max_abs_err_ = 0.0;
-    *miscompares_.mutable_shape() =
-        ShapeUtil::ChangeElementType(actual.shape(), PRED);
-    miscompares_.mutable_preds()->resize(
-        ShapeUtil::ElementsIn(miscompares_.shape()), false);
+    miscompares_ = Literal(ShapeUtil::ChangeElementType(actual.shape(), PRED));
     multi_index_.resize(expected.shape().dimensions_size(), 0);
 
     switch (expected.shape().element_type()) {
@@ -595,33 +637,33 @@ bool NearComparator::ExpectValuesNear<bfloat16>(bfloat16 expected,
   if (!ShapeUtil::IsTuple(expected.shape()) ||
       !ShapeUtil::IsTuple(actual.shape())) {
     return ::testing::AssertionFailure()
-           << "tuples expected expected shape = "
-           << expected.shape().ShortDebugString()
+           << "tuples expected shape = " << expected.shape().ShortDebugString()
            << " actual shape = " << actual.shape().ShortDebugString();
   }
   AssertEqualShapes(expected.shape(), actual.shape());
-  for (uint64 i = 0; i < expected.tuple_literals_size(); ++i) {
-    const auto& expected_element = expected.tuple_literals(i);
-    const auto& actual_element = actual.tuple_literals(i);
-    if (ShapeUtil::IsTuple(expected_element.shape())) {
-      auto ret = NearTuple(expected_element, actual_element, error);
-      if (!ret) {
-        return ret;
-      }
-    } else if (ShapeUtil::ElementIsFloating(expected_element.shape())) {
-      auto ret = Near(expected_element, actual_element, error);
-      if (!ret) {
-        return ret;
-      }
-    } else {
-      auto ret = Equal(expected_element, actual_element);
-      if (!ret) {
-        return ret;
+
+  ::testing::AssertionResult err = ::testing::AssertionSuccess();
+  for (int64 i = 0; i < ShapeUtil::TupleElementCount(expected.shape()); ++i) {
+    SCOPED_TRACE(tensorflow::strings::StrCat(
+        "Tuple index ", i, " in ", ShapeUtil::HumanString(expected.shape())));
+    const auto expected_element = LiteralView::Create(expected, {i});
+    const auto actual_element = LiteralView::Create(actual, {i});
+
+    ::testing::AssertionResult res = [&] {
+      if (ShapeUtil::IsTuple(expected_element.shape())) {
+        return NearTuple(expected_element, actual_element, error);
+      } else if (ShapeUtil::ElementIsFloating(expected_element.shape())) {
+        return Near(expected_element, actual_element, error);
+      } else {
+        return Equal(expected_element, actual_element);
       }
+    }();
+
+    if (err && !res) {
+      err = res;
     }
   }
-
-  return ::testing::AssertionSuccess();
+  return err;
 }
 
 /* static */ void LiteralTestUtil::ExpectNearTuple(const Literal& expected,
@@ -630,6 +672,32 @@ bool NearComparator::ExpectValuesNear<bfloat16>(bfloat16 expected,
   EXPECT_TRUE(NearTuple(expected, actual, error));
 }
 
+/*static*/ ::testing::AssertionResult LiteralTestUtil::NearOrEqual(
+    const Literal& expected, const Literal& actual,
+    const tensorflow::gtl::optional<ErrorSpec>& error) {
+  bool is_tuple = ShapeUtil::IsTuple(expected.shape());
+  if (error.has_value()) {
+    if (is_tuple) {
+      VLOG(1) << "Expects near tuple";
+      return NearTuple(expected, actual, *error);
+    }
+    VLOG(1) << "Expects near";
+    return Near(expected, actual, *error);
+  }
+  if (is_tuple) {
+    VLOG(1) << "Expects equal tuple";
+    return EqualTuple(expected, actual);
+  }
+  VLOG(1) << "Expects equal";
+  return Equal(expected, actual);
+}
+
+/*static*/ void LiteralTestUtil::ExpectNearOrEqual(
+    const Literal& expected, const Literal& actual,
+    const tensorflow::gtl::optional<ErrorSpec>& error) {
+  EXPECT_TRUE(NearOrEqual(expected, actual, error));
+}
+
 /* static */ string LiteralTestUtil::MultiIndexAsString(
     tensorflow::gtl::ArraySlice<int64> multi_index) {
   return tensorflow::strings::StrCat(
@@ -645,9 +713,8 @@ bool NearComparator::ExpectValuesNear<bfloat16>(bfloat16 expected,
   }
   CHECK_EQ(ShapeUtil::ElementsIn(literal.shape()), new_num_elements);
 
-  auto new_literal = MakeUnique<Literal>();
-  *new_literal->mutable_shape() =
-      ShapeUtil::MakeShape(literal.shape().element_type(), new_dimensions);
+  auto new_literal = MakeUnique<Literal>(
+      ShapeUtil::MakeShape(literal.shape().element_type(), new_dimensions));
 
   // Create a new shape with the given minor-to-major layout. This shape is used
   // solely for converting linear address to multi-dimensional addresses when
@@ -655,9 +722,6 @@ bool NearComparator::ExpectValuesNear<bfloat16>(bfloat16 expected,
   Shape shape_with_layout = new_literal->shape();
   *shape_with_layout.mutable_layout() = LayoutUtil::MakeLayout(minor_to_major);
 
-  // Allocate space in the new literal.
-  new_literal->Reserve(ShapeUtil::ElementsIn(literal.shape()));
-
   // Copy data into new literal, element-by-element.
   for (int64 i = 0; i < ShapeUtil::ElementsIn(literal.shape()); ++i) {
     std::vector<int64> from_multi_index =
diff --git a/tensorflow/compiler/xla/tests/literal_test_util.h b/tensorflow/compiler/xla/tests/literal_test_util.h
index 6e4add2690fd958d555eab3cef51cdbbd01819c9..f53553c70170bdcda717e72ffd791016effd0774 100644
--- a/tensorflow/compiler/xla/tests/literal_test_util.h
+++ b/tensorflow/compiler/xla/tests/literal_test_util.h
@@ -31,6 +31,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/lib/gtl/optional.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/types.h"
@@ -59,10 +60,14 @@ class LiteralTestUtil {
   static void AssertEqualShapesAndLayouts(const Shape& expected,
                                           const Shape& actual);
 
-  // Converts a bfloat16 literal to a float literal.
+  // If the given literal's data type is bfloat16, converts it to a float
+  // literal; otherwise, returns a copy of it. If the literal is a tuple,
+  // recursively converts its elements.
   static std::unique_ptr<Literal> ConvertBF16ToF32(const Literal& bf16_literal);
 
-  // Converts a float literal to a bfloat16 literal.
+  // If the given literal's data type is float, converts it to a bfloat16
+  // literal; otherwise, returns a copy of it. If the literal is a tuple,
+  // recursively converts its elements.
   static std::unique_ptr<Literal> ConvertF32ToBF16(const Literal& f32_literal);
 
   // Asserts that the expected and actual literals are (bitwise) equal for all
@@ -106,6 +111,10 @@ class LiteralTestUtil {
   static void ExpectR4EqualArray4D(const Array4D<NativeT>& expected,
                                    const Literal& actual);
 
+  // Returns whether the two tuples are equal.
+  static ::testing::AssertionResult EqualTuple(
+      const Literal& expected, const Literal& actual) TF_MUST_USE_RESULT;
+
   // Expects that the values of the elements in the expected and actual tuples
   // are equal. Tuples are matched recursively.
   static void ExpectEqualTuple(const Literal& expected, const Literal& actual);
@@ -173,6 +182,19 @@ class LiteralTestUtil {
   static void ExpectNearTuple(const Literal& expected, const Literal& actual,
                               const ErrorSpec& error);
 
+  // If the error spec is given, returns whether the expected and the actual are
+  // within the error bound; otherwise, returns whether they are equal. Tuples
+  // will be compared recursively.
+  static ::testing::AssertionResult NearOrEqual(
+      const Literal& expected, const Literal& actual,
+      const tensorflow::gtl::optional<ErrorSpec>& error) TF_MUST_USE_RESULT;
+
+  // If the error spec is given, expects the expected and the actual to be near;
+  // otherwise, expects them to be equal. Tuples will be compared recursively.
+  static void ExpectNearOrEqual(
+      const Literal& expected, const Literal& actual,
+      const tensorflow::gtl::optional<ErrorSpec>& error);
+
   // Returns a multi-dimensional index as a string. For example: '{7, 8}' will
   // be returned for a 2-dimensional index with dimension 0 index equal to 7,
   // dimension 1 equal to 8.
diff --git a/tensorflow/compiler/xla/tests/literal_test_util_test.cc b/tensorflow/compiler/xla/tests/literal_test_util_test.cc
index 2acf27ed390b0732ba40fcf505c746bd7d8b651e..e477784557a3b9340cff644a3695485389d8cc22 100644
--- a/tensorflow/compiler/xla/tests/literal_test_util_test.cc
+++ b/tensorflow/compiler/xla/tests/literal_test_util_test.cc
@@ -83,13 +83,14 @@ TEST(LiteralTestUtilTest, ExpectNearFailurePlacesResultsInTemporaryDirectory) {
     LiteralProto literal_proto;
     TF_CHECK_OK(tensorflow::ReadBinaryProto(tensorflow::Env::Default(), result,
                                             &literal_proto));
-    Literal literal(literal_proto);
+    std::unique_ptr<Literal> literal =
+        Literal::CreateFromProto(literal_proto).ConsumeValueOrDie();
     if (result.find("expected") != string::npos) {
-      EXPECT_EQ("2", literal.ToString());
+      EXPECT_EQ("2", literal->ToString());
     } else if (result.find("actual") != string::npos) {
-      EXPECT_EQ("4", literal.ToString());
+      EXPECT_EQ("4", literal->ToString());
     } else if (result.find("miscompares") != string::npos) {
-      EXPECT_EQ("true", literal.ToString());
+      EXPECT_EQ("true", literal->ToString());
     } else {
       FAIL() << "unknown file in temporary directory: " << result;
     }
diff --git a/tensorflow/compiler/xla/tests/local_client_aot_test_helper.cc b/tensorflow/compiler/xla/tests/local_client_aot_test_helper.cc
index 0cd44a72b5818c1bf66fd4cd1929572038596b47..4d3b513b092e0b447a1452a3809fb7099e54dbb9 100644
--- a/tensorflow/compiler/xla/tests/local_client_aot_test_helper.cc
+++ b/tensorflow/compiler/xla/tests/local_client_aot_test_helper.cc
@@ -63,8 +63,6 @@ int main(int argc, char** argv) {
     triple_string = "x86_64-apple-macosx";
   } else if (target_cpu == "arm") {
     triple_string = "aarch64-none-linux-gnu";
-  } else if (target_cpu == "ppc") {
-    triple_string = "powerpc64le-unknown-linux-gnu";
   } else if (target_cpu == "local") {
     triple_string = xla::llvm_ir::AsString(llvm::sys::getDefaultTargetTriple());
   } else {
diff --git a/tensorflow/compiler/xla/tests/local_client_execute_test.cc b/tensorflow/compiler/xla/tests/local_client_execute_test.cc
index ad71d40197fe48b4343ee5f5f7f71b282a05cbf5..2462ea39f914b1dbb525ea777a48d9ce66035638 100644
--- a/tensorflow/compiler/xla/tests/local_client_execute_test.cc
+++ b/tensorflow/compiler/xla/tests/local_client_execute_test.cc
@@ -138,13 +138,13 @@ XLA_TEST_F(LocalClientExecuteTest, AddArraysWithDifferentInputLayouts) {
   // Create x as a col-major array.
   auto x_array = LiteralToShapedBuffer(*Literal::CreateR2WithLayout(
       {{1.0f, 2.0f}, {3.0f, 4.0f}}, LayoutUtil::MakeLayout({0, 1})));
-  EXPECT_TRUE(LayoutUtil::Equal(x_array->shape().layout(),
+  EXPECT_TRUE(LayoutUtil::Equal(x_array->on_device_shape().layout(),
                                 LayoutUtil::MakeLayout({0, 1})));
 
   // Create y as a row-major array.
   auto y_array = LiteralToShapedBuffer(*Literal::CreateR2WithLayout(
       {{10.0f, 20.0f}, {30.0f, 40.0f}}, LayoutUtil::MakeLayout({1, 0})));
-  EXPECT_TRUE(LayoutUtil::Equal(y_array->shape().layout(),
+  EXPECT_TRUE(LayoutUtil::Equal(y_array->on_device_shape().layout(),
                                 LayoutUtil::MakeLayout({1, 0})));
 
   std::unique_ptr<ScopedShapedBuffer> result_colmaj =
@@ -179,7 +179,7 @@ XLA_TEST_F(LocalClientExecuteTest, AddArraysWithDifferentOutputLayouts) {
       DefaultExecutableBuildOptions().set_result_layout(
           ShapeUtil::MakeShapeWithLayout(F32, /*dimensions=*/{2, 2}, {0, 1})),
       DefaultExecutableRunOptions());
-  EXPECT_TRUE(LayoutUtil::Equal(result_colmaj->shape().layout(),
+  EXPECT_TRUE(LayoutUtil::Equal(result_colmaj->on_device_shape().layout(),
                                 LayoutUtil::MakeLayout({0, 1})));
   LiteralTestUtil::ExpectR2Near<float>({{11.0f, 22.0f}, {33.0f, 44.0f}},
                                        *ShapedBufferToLiteral(*result_colmaj),
@@ -191,7 +191,7 @@ XLA_TEST_F(LocalClientExecuteTest, AddArraysWithDifferentOutputLayouts) {
       DefaultExecutableBuildOptions().set_result_layout(
           ShapeUtil::MakeShapeWithLayout(F32, /*dimensions=*/{2, 2}, {1, 0})),
       DefaultExecutableRunOptions());
-  EXPECT_TRUE(LayoutUtil::Equal(result_rowmaj->shape().layout(),
+  EXPECT_TRUE(LayoutUtil::Equal(result_rowmaj->on_device_shape().layout(),
                                 LayoutUtil::MakeLayout({1, 0})));
   LiteralTestUtil::ExpectR2Near<float>({{11.0f, 22.0f}, {33.0f, 44.0f}},
                                        *ShapedBufferToLiteral(*result_rowmaj),
@@ -213,16 +213,17 @@ XLA_TEST_F(LocalClientExecuteTest, TupleResult) {
   std::unique_ptr<ScopedShapedBuffer> result =
       ExecuteLocallyOrDie(computation, {x_array.get(), y_array.get()});
 
-  EXPECT_TRUE(ShapeUtil::IsTuple(result->shape()));
-  EXPECT_EQ(3, ShapeUtil::TupleElementCount(result->shape()));
+  EXPECT_TRUE(ShapeUtil::IsTuple(result->on_host_shape()));
+  EXPECT_EQ(3, ShapeUtil::TupleElementCount(result->on_host_shape()));
 
   std::unique_ptr<Literal> result_literal = ShapedBufferToLiteral(*result);
-  LiteralTestUtil::ExpectR2Equal<float>({{1.0f, 2.0f}, {3.0f, 4.0f}},
-                                        result_literal->tuple_literals(0));
-  LiteralTestUtil::ExpectR2Equal<float>({{10.0f, 20.0f}, {30.0f, 40.0f}},
-                                        result_literal->tuple_literals(1));
-  LiteralTestUtil::ExpectR2Equal<float>({{1.0f, 2.0f}, {3.0f, 4.0f}},
-                                        result_literal->tuple_literals(2));
+  LiteralTestUtil::ExpectR2Equal<float>(
+      {{1.0f, 2.0f}, {3.0f, 4.0f}}, LiteralView::Create(*result_literal, {0}));
+  LiteralTestUtil::ExpectR2Equal<float>(
+      {{10.0f, 20.0f}, {30.0f, 40.0f}},
+      LiteralView::Create(*result_literal, {1}));
+  LiteralTestUtil::ExpectR2Equal<float>(
+      {{1.0f, 2.0f}, {3.0f, 4.0f}}, LiteralView::Create(*result_literal, {2}));
 }
 
 XLA_TEST_F(LocalClientExecuteTest, NestedTupleResult) {
@@ -241,19 +242,21 @@ XLA_TEST_F(LocalClientExecuteTest, NestedTupleResult) {
   std::unique_ptr<ScopedShapedBuffer> result =
       ExecuteLocallyOrDie(computation, {x_array.get(), y_array.get()});
 
-  EXPECT_TRUE(ShapeUtil::IsTuple(result->shape()));
-  EXPECT_EQ(2, ShapeUtil::TupleElementCount(result->shape()));
+  EXPECT_TRUE(ShapeUtil::IsTuple(result->on_host_shape()));
+  EXPECT_EQ(2, ShapeUtil::TupleElementCount(result->on_host_shape()));
 
   std::unique_ptr<Literal> result_literal = ShapedBufferToLiteral(*result);
-  LiteralTestUtil::ExpectR2Equal<float>({{1.0f, 2.0f}, {3.0f, 4.0f}},
-                                        result_literal->tuple_literals(1));
-  const Literal& inner_tuple_literal = result_literal->tuple_literals(0);
-  LiteralTestUtil::ExpectR2Equal<float>({{1.0f, 2.0f}, {3.0f, 4.0f}},
-                                        inner_tuple_literal.tuple_literals(0));
-  LiteralTestUtil::ExpectR2Equal<float>({{10.0f, 20.0f}, {30.0f, 40.0f}},
-                                        inner_tuple_literal.tuple_literals(1));
-  LiteralTestUtil::ExpectR2Equal<float>({{1.0f, 2.0f}, {3.0f, 4.0f}},
-                                        inner_tuple_literal.tuple_literals(2));
+  LiteralTestUtil::ExpectR2Equal<float>(
+      {{1.0f, 2.0f}, {3.0f, 4.0f}}, LiteralView::Create(*result_literal, {1}));
+  LiteralTestUtil::ExpectR2Equal<float>(
+      {{1.0f, 2.0f}, {3.0f, 4.0f}},
+      LiteralView::Create(*result_literal, {0, 0}));
+  LiteralTestUtil::ExpectR2Equal<float>(
+      {{10.0f, 20.0f}, {30.0f, 40.0f}},
+      LiteralView::Create(*result_literal, {0, 1}));
+  LiteralTestUtil::ExpectR2Equal<float>(
+      {{1.0f, 2.0f}, {3.0f, 4.0f}},
+      LiteralView::Create(*result_literal, {0, 2}));
 }
 
 XLA_TEST_F(LocalClientExecuteTest, TupleResultWithLayout) {
@@ -278,10 +281,10 @@ XLA_TEST_F(LocalClientExecuteTest, TupleResultWithLayout) {
       DefaultExecutableRunOptions());
 
   std::unique_ptr<Literal> result_literal = ShapedBufferToLiteral(*result);
-  LiteralTestUtil::ExpectR2Equal<float>({{1.0f, 2.0f}, {3.0f, 4.0f}},
-                                        result_literal->tuple_literals(0));
-  LiteralTestUtil::ExpectR2Equal<float>({{1.0f, 2.0f}, {3.0f, 4.0f}},
-                                        result_literal->tuple_literals(1));
+  LiteralTestUtil::ExpectR2Equal<float>(
+      {{1.0f, 2.0f}, {3.0f, 4.0f}}, LiteralView::Create(*result_literal, {0}));
+  LiteralTestUtil::ExpectR2Equal<float>(
+      {{1.0f, 2.0f}, {3.0f, 4.0f}}, LiteralView::Create(*result_literal, {1}));
 }
 
 XLA_TEST_F(LocalClientExecuteTest, TupleArguments) {
@@ -320,14 +323,15 @@ XLA_TEST_F(LocalClientExecuteTest, TupleArguments) {
   std::unique_ptr<ScopedShapedBuffer> result =
       ExecuteLocallyOrDie(computation, {x_buffer.get(), y_buffer.get()});
 
-  EXPECT_TRUE(ShapeUtil::IsTuple(result->shape()));
-  EXPECT_EQ(2, ShapeUtil::TupleElementCount(result->shape()));
+  EXPECT_TRUE(ShapeUtil::IsTuple(result->on_host_shape()));
+  EXPECT_EQ(2, ShapeUtil::TupleElementCount(result->on_host_shape()));
 
   std::unique_ptr<Literal> result_literal = ShapedBufferToLiteral(*result);
-  LiteralTestUtil::ExpectR2Equal<float>({{56.0f, 46.0f}, {36.0f, 26.0f}},
-                                        result_literal->tuple_literals(0));
-  LiteralTestUtil::ExpectR1Equal<float>({40.0f, 71.0f, 117.0f},
-                                        result_literal->tuple_literals(1));
+  LiteralTestUtil::ExpectR2Equal<float>(
+      {{56.0f, 46.0f}, {36.0f, 26.0f}},
+      LiteralView::Create(*result_literal, {0}));
+  LiteralTestUtil::ExpectR1Equal<float>(
+      {40.0f, 71.0f, 117.0f}, LiteralView::Create(*result_literal, {1}));
 }
 
 XLA_TEST_F(LocalClientExecuteTest, NestedTupleArgument) {
@@ -365,10 +369,10 @@ XLA_TEST_F(LocalClientExecuteTest, NestedTupleArgument) {
       ExecuteLocallyOrDie(computation, {arg_buffer.get()});
 
   std::unique_ptr<Literal> result_literal = ShapedBufferToLiteral(*result);
-  LiteralTestUtil::ExpectR2Equal<float>({{-1.0, -2.0}, {-3.0, -4}},
-                                        result_literal->tuple_literals(0));
-  LiteralTestUtil::ExpectR1Equal<float>({264.0, 73.0, 133.0},
-                                        result_literal->tuple_literals(1));
+  LiteralTestUtil::ExpectR2Equal<float>(
+      {{-1.0, -2.0}, {-3.0, -4}}, LiteralView::Create(*result_literal, {0}));
+  LiteralTestUtil::ExpectR1Equal<float>(
+      {264.0, 73.0, 133.0}, LiteralView::Create(*result_literal, {1}));
 }
 
 XLA_TEST_F(LocalClientExecuteTest, PassingTupleResultBackIntoComputation) {
@@ -395,18 +399,19 @@ XLA_TEST_F(LocalClientExecuteTest, PassingTupleResultBackIntoComputation) {
   std::unique_ptr<ScopedShapedBuffer> result_0 =
       ExecuteLocallyOrDie(computation, {arg_buffer.get()});
   std::unique_ptr<Literal> result_0_literal = ShapedBufferToLiteral(*result_0);
-  LiteralTestUtil::ExpectR2Equal<float>({{-1.0, -2.0}, {-3.0, -4.0}},
-                                        result_0_literal->tuple_literals(0));
-  LiteralTestUtil::ExpectR2Equal<float>({{22.0, 6.0}, {8.0, 10}},
-                                        result_0_literal->tuple_literals(1));
+  LiteralTestUtil::ExpectR2Equal<float>(
+      {{-1.0, -2.0}, {-3.0, -4.0}},
+      LiteralView::Create(*result_0_literal, {0}));
+  LiteralTestUtil::ExpectR2Equal<float>(
+      {{22.0, 6.0}, {8.0, 10}}, LiteralView::Create(*result_0_literal, {1}));
 
   std::unique_ptr<ScopedShapedBuffer> result_1 =
       ExecuteLocallyOrDie(computation, {result_0.get()});
   std::unique_ptr<Literal> result_1_literal = ShapedBufferToLiteral(*result_1);
-  LiteralTestUtil::ExpectR2Equal<float>({{1.0, 2.0}, {3.0, 4.0}},
-                                        result_1_literal->tuple_literals(0));
-  LiteralTestUtil::ExpectR2Equal<float>({{44.0, 12.0}, {16.0, 20}},
-                                        result_1_literal->tuple_literals(1));
+  LiteralTestUtil::ExpectR2Equal<float>(
+      {{1.0, 2.0}, {3.0, 4.0}}, LiteralView::Create(*result_1_literal, {0}));
+  LiteralTestUtil::ExpectR2Equal<float>(
+      {{44.0, 12.0}, {16.0, 20}}, LiteralView::Create(*result_1_literal, {1}));
 }
 
 XLA_TEST_F(LocalClientExecuteTest, LargeTuple) {
@@ -455,7 +460,8 @@ XLA_TEST_F(LocalClientExecuteTest, LargeTuple) {
 
   for (int i = 0; i < kElementCount; ++i) {
     LiteralTestUtil::ExpectR1Near<float>(
-        {2.0f * i, 0.0f}, result_literal->tuple_literals(i), error_spec_);
+        {2.0f * i, 0.0f}, LiteralView::Create(*result_literal, {i}),
+        error_spec_);
   }
 }
 
@@ -512,8 +518,8 @@ XLA_TEST_F(LocalClientExecuteTest, DISABLED_ON_CPU_PARALLEL(LargeNestedTuple)) {
   for (int i = 0; i < kFanout; ++i) {
     for (int j = 0; j < kFanout; ++j) {
       LiteralTestUtil::ExpectR0Near<float>(
-          i + j + i * kFanout + j,
-          result_literal->tuple_literals(i).tuple_literals(j), error_spec_);
+          i + j + i * kFanout + j, LiteralView::Create(*result_literal, {i, j}),
+          error_spec_);
     }
   }
 }
@@ -554,11 +560,12 @@ XLA_TEST_F(LocalClientExecuteTest, DeepTuple) {
       ExecuteLocallyOrDie(computation, {arg_buffer.get()});
   std::unique_ptr<Literal> result_literal = ShapedBufferToLiteral(*result);
 
-  const Literal* result_element = result_literal.get();
+  ShapeIndex index;
   for (int i = 0; i < kTupleDepth; ++i) {
-    result_element = &result_element->tuple_literals(0);
+    index.push_back(0);
   }
-  LiteralTestUtil::ExpectR0Equal<float>(165.0, *result_element);
+  LiteralTestUtil::ExpectR0Equal<float>(
+      165.0, LiteralView::Create(*result_literal, index));
 }
 
 XLA_TEST_F(LocalClientExecuteTest, InvalidNumberOfArguments) {
@@ -575,7 +582,7 @@ XLA_TEST_F(LocalClientExecuteTest, InvalidNumberOfArguments) {
 
   EXPECT_FALSE(execute_status.ok());
   EXPECT_THAT(execute_status.status().error_message(),
-              ContainsRegex("invalid number of arguments"));
+              ContainsRegex("Invalid number of arguments"));
 }
 
 XLA_TEST_F(LocalClientExecuteTest, IncorrectArgumentShape) {
@@ -591,7 +598,7 @@ XLA_TEST_F(LocalClientExecuteTest, IncorrectArgumentShape) {
 
   EXPECT_FALSE(execute_status.ok());
   EXPECT_THAT(execute_status.status().error_message(),
-              ContainsRegex("invalid argument shape"))
+              ContainsRegex("Invalid argument shape"))
       << execute_status.status();
 }
 
@@ -763,10 +770,10 @@ XLA_TEST_F(LocalClientExecuteTest, SelectBetweenTuples) {
   std::unique_ptr<ScopedShapedBuffer> result =
       ExecuteLocallyOrDie(builder.Build().ValueOrDie(), {});
   std::unique_ptr<Literal> tuple_literal = ShapedBufferToLiteral(*result);
-  LiteralTestUtil::ExpectR1Equal<float>({2.0f, 4.0f, 6.0f},
-                                        tuple_literal->tuple_literals(0));
-  LiteralTestUtil::ExpectR1Equal<float>({1.0f, 2.0f, 3.0f},
-                                        tuple_literal->tuple_literals(1));
+  LiteralTestUtil::ExpectR1Equal<float>(
+      {2.0f, 4.0f, 6.0f}, LiteralView::Create(*tuple_literal, {0}));
+  LiteralTestUtil::ExpectR1Equal<float>(
+      {1.0f, 2.0f, 3.0f}, LiteralView::Create(*tuple_literal, {1}));
 }
 
 XLA_TEST_F(LocalClientExecuteTest, CompileExecutable) {
@@ -906,20 +913,18 @@ void BM_LocalClientOverhead(int num_iters) {
   builder.Add(x, x);
   auto computation = builder.Build().ConsumeValueOrDie();
 
-  auto shape_size_fn = [client](const Shape& shape) {
-    return client->backend().transfer_manager()->GetByteSizeRequirement(shape);
-  };
-  auto buffer = ScopedShapedBuffer::Allocate(
-                    shape, &allocator, /*device_ordinal=*/0, shape_size_fn)
-                    .ConsumeValueOrDie();
+  auto buffer =
+      transfer_manager
+          ->AllocateScopedShapedBuffer(shape, &allocator, /*device_ordinal=*/0)
+          .ConsumeValueOrDie();
   auto literal = Literal::CreateR2<float>({{0, 0, 0}, {0, 0, 0}});
   ASSERT_IS_OK(transfer_manager->TransferLiteralToDevice(
-      executors[device_ordinal], *literal, buffer->mutable_buffer({})));
+      executors[device_ordinal], *literal, *buffer));
 
   const int kWarmups = 2;
 
-  auto executable_status = client->Compile(computation, {&buffer->shape()},
-                                           ExecutableBuildOptions());
+  auto executable_status = client->Compile(
+      computation, {&buffer->on_host_shape()}, ExecutableBuildOptions());
   ASSERT_IS_OK(executable_status);
   std::unique_ptr<LocalExecutable> executable =
       executable_status.ConsumeValueOrDie();
diff --git a/tensorflow/compiler/xla/tests/local_client_test_base.cc b/tensorflow/compiler/xla/tests/local_client_test_base.cc
index 062a9246e49598d5d03dce8c1f437138923449bf..96b976d25d75d35f46adfd104a03aceb363661eb 100644
--- a/tensorflow/compiler/xla/tests/local_client_test_base.cc
+++ b/tensorflow/compiler/xla/tests/local_client_test_base.cc
@@ -188,7 +188,7 @@ LocalClientTestBase::ExecuteLocally(
     const ExecutableRunOptions& run_options) {
   std::vector<const Shape*> argument_layouts(arguments.size());
   for (int i = 0; i < arguments.size(); ++i) {
-    argument_layouts[i] = &arguments[i]->shape();
+    argument_layouts[i] = &arguments[i]->on_host_shape();
   }
   TF_ASSIGN_OR_RETURN(
       std::unique_ptr<LocalExecutable> executable,
diff --git a/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc b/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc
index 22d2b917a1d55f4f453e21c2d8fea38e32ff796b..6e6cb7ff1e2ac74dc54f14d8811c9a5d3662bbd2 100644
--- a/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc
+++ b/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc
@@ -76,8 +76,11 @@ class MultiOutputFusionTest : public HloTestBase {
         elem_shape2, HloOpcode::kAdd, broadcast, param1));
     HloInstruction* sub = builder.AddInstruction(HloInstruction::CreateBinary(
         elem_shape2, HloOpcode::kSubtract, param1, broadcast));
+    DotDimensionNumbers dot_dnums;
+    dot_dnums.add_lhs_contracting_dimensions(1);
+    dot_dnums.add_rhs_contracting_dimensions(0);
     HloInstruction* dot = builder.AddInstruction(
-        HloInstruction::CreateBinary(elem_shape2, HloOpcode::kDot, sub, add2));
+        HloInstruction::CreateDot(elem_shape2, sub, add2, dot_dnums));
     auto computation = hlo_module->AddEntryComputation(builder.Build(dot));
 
     if (manual_fusion) {
@@ -96,14 +99,13 @@ class MultiOutputFusionTest : public HloTestBase {
           nullptr);
     }
 
-    Literal input;
-    input.PopulateWithValue<float>(2.5f, {size, size});
-    auto p1 = TransferToDevice(input);
-    auto p0 = TransferToDevice(*Literal::CreateR0<float>(-9.0f));
+    Literal arg1(ShapeUtil::MakeShape(F32, {size, size}));
+    arg1.PopulateWithValue<float>(2.5f);
 
-    Literal expect;
-    expect.PopulateWithValue<float>(size * 1.5f * 3.5f, {size, size});
-    auto actual = ExecuteAndTransfer(std::move(hlo_module), {p0, p1});
+    Literal expect(ShapeUtil::MakeShape(F32, {size, size}));
+    expect.PopulateWithValue<float>(size * 1.5f * 3.5f);
+    auto actual = ExecuteAndTransfer(
+        std::move(hlo_module), {Literal::CreateR0<float>(-9.0f).get(), &arg1});
     LiteralTestUtil::ExpectNear(expect, *actual, error_spec_);
   }
 
@@ -133,8 +135,11 @@ class MultiOutputFusionTest : public HloTestBase {
     HloInstruction* reshape =
         builder.AddInstruction(HloInstruction::CreateReshape(
             ShapeUtil::MakeShape(F32, {size, 1}), add));
-    HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary(
-        ShapeUtil::MakeShape(F32, {1}), HloOpcode::kDot, sub, reshape));
+    DotDimensionNumbers dot_dnums;
+    dot_dnums.add_lhs_contracting_dimensions(0);
+    dot_dnums.add_rhs_contracting_dimensions(0);
+    HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateDot(
+        ShapeUtil::MakeShape(F32, {1}), sub, reshape, dot_dnums));
     auto computation = hlo_module->AddEntryComputation(builder.Build(dot));
 
     if (manual_fusion) {
@@ -154,14 +159,13 @@ class MultiOutputFusionTest : public HloTestBase {
                nullptr);
     }
 
-    Literal input0, input1;
-    input0.PopulateWithValue<float>(2.5f, {size});
-    input1.PopulateWithValue<double>(1, {size});
-    auto p0 = TransferToDevice(input0);
-    auto p1 = TransferToDevice(input1);
+    Literal input0(ShapeUtil::MakeShape(F32, {size}));
+    input0.PopulateWithValue(2.5f);
+    Literal input1(ShapeUtil::MakeShape(F64, {size}));
+    input1.PopulateWithValue(1.);
 
-    Literal expect = *Literal::CreateR1<float>({size * 1.5f * 3.5f});
-    auto actual = ExecuteAndTransfer(std::move(hlo_module), {p0, p1});
+    Literal expect = std::move(*Literal::CreateR1<float>({size * 1.5f * 3.5f}));
+    auto actual = ExecuteAndTransfer(std::move(hlo_module), {&input0, &input1});
     LiteralTestUtil::ExpectNear(expect, *actual, error_spec_);
   }
 };
diff --git a/tensorflow/compiler/xla/tests/params_test.cc b/tensorflow/compiler/xla/tests/params_test.cc
index b7f62b8aa167b2d9ef1bb2fa83af5aaeda1d6652..bb7e800df84121f2045141bc366c34b94ba694ea 100644
--- a/tensorflow/compiler/xla/tests/params_test.cc
+++ b/tensorflow/compiler/xla/tests/params_test.cc
@@ -334,10 +334,109 @@ XLA_TEST_F(ParamsTest, DISABLED_ON_CPU(DISABLED_ON_GPU(
   ComputeAndCompareTuple(&builder, *Literal::MakeTuple(ptrs), param_data);
 }
 
+// Test large number of parameters flowing into a while-loop.
+// Construct conceptually the following HLO graph:
+//
+// p0 = parameter(0)
+// p1 = parameter(1)
+// ...
+// pN = parameter(N)
+// result = while (false) {
+//   p0 += (1, 1);
+//   p1 += (1, 1);
+//   ...
+//   pN += (1, 1)
+// }
+// result = {p0, p1, ..., pN}
+//
+// TODO(b/70173746): Times out during compilation on GPU and CPU backends as of
+// 2017-12-12.
+XLA_TEST_F(ParamsTest,
+           DISABLED_ON_CPU(DISABLED_ON_GPU(ManyParametersIntoWhileLoop))) {
+  ComputationBuilder builder(client_, TestName());
+
+  std::vector<std::unique_ptr<GlobalData>> param_data_owner;
+  constexpr int kParamCount = 1900;
+  std::vector<ComputationDataHandle> params;
+  std::vector<Shape> parameter_shapes;
+  for (int i = 0; i < kParamCount; ++i) {
+    std::unique_ptr<Literal> literal = Literal::CreateR1<int32>({i, i});
+    param_data_owner.push_back(
+        std::move(client_->TransferToServer(*literal)).ValueOrDie());
+    ComputationDataHandle param =
+        builder.Parameter(i, literal->shape(), "param");
+    params.push_back(param);
+    parameter_shapes.push_back(literal->shape());
+  }
+
+  // Add bool parameter for the loop condition. Use a parameter HLO instead of a
+  // constant because DCE may eliminate the while-body otherwise.
+  std::unique_ptr<Literal> bool_literal = Literal::CreateR0<bool>(false);
+  param_data_owner.push_back(
+      std::move(client_->TransferToServer(*bool_literal)).ValueOrDie());
+  ComputationDataHandle bool_param =
+      builder.Parameter(kParamCount, bool_literal->shape(), "bool_param");
+  params.push_back(bool_param);
+  parameter_shapes.push_back(bool_literal->shape());
+
+  auto init = builder.Tuple(params);
+
+  // Create a computation for the condition: while(bool_param).
+  Shape while_shape = ShapeUtil::MakeTupleShape(parameter_shapes);
+  Computation condition;
+  {
+    ComputationBuilder builder(client_, "condition");
+    auto condition_parameter =
+        builder.Parameter(0, while_shape, "condition_parameter");
+    builder.GetTupleElement(condition_parameter, kParamCount);
+    condition = builder.Build().ConsumeValueOrDie();
+  }
+
+  // Create a computation for the body.
+  // Add {1, 1} to the each tuple element.
+  Computation body;
+  {
+    ComputationBuilder builder(client_, "body");
+    auto body_parameter = builder.Parameter(0, while_shape, "body_parameter");
+    std::vector<ComputationDataHandle> updates;
+    for (int i = 0; i < kParamCount; ++i) {
+      auto add = builder.Add(builder.GetTupleElement(body_parameter, i),
+                             builder.ConstantR1<int32>({1, 1}));
+      updates.push_back(add);
+    }
+    // Add bool parameter.
+    updates.push_back(builder.GetTupleElement(body_parameter, kParamCount));
+
+    builder.Tuple(updates);
+    body = builder.Build().ConsumeValueOrDie();
+  }
+
+  auto loop = builder.While(condition, body, init);
+
+  std::vector<ComputationDataHandle> outputs;
+  for (int i = 0; i < kParamCount; ++i) {
+    outputs.push_back(builder.GetTupleElement(loop, i));
+  }
+  builder.Tuple(outputs);
+
+  std::vector<GlobalData*> param_data;
+  param_data.reserve(param_data_owner.size());
+  for (const std::unique_ptr<GlobalData>& data : param_data_owner) {
+    param_data.push_back(data.get());
+  }
+
+  std::vector<std::unique_ptr<Literal>> elements;
+  std::vector<const Literal*> ptrs;
+  for (int i = 0; i < kParamCount; ++i) {
+    elements.push_back(Literal::CreateR1<int32>({i, i}));
+    ptrs.push_back(elements.back().get());
+  }
+  ComputeAndCompareTuple(&builder, *Literal::MakeTuple(ptrs), param_data);
+}
+
 #endif
 
-XLA_TEST_F(ParamsTest,
-           DISABLED_ON_CPU_PARALLEL(TupleOfR1ParametersAddedTogether)) {
+XLA_TEST_F(ParamsTest, TupleOfR1ParametersAddedTogether) {
   ComputationBuilder builder(client_, TestName());
 
   Shape r1f32_3 = ShapeUtil::MakeShape(F32, {3});
@@ -363,10 +462,8 @@ XLA_TEST_F(ParamsTest,
 // Verifies that passing a 2x2 with {0, 1} layout returns the same value back
 // when (transferred to the server and) passed through a parameter.
 XLA_TEST_F(ParamsTest, R2_2x2_Layout_01) {
-  std::unique_ptr<Literal> literal = Literal::CreateR2<float>({
-      {1, 2}, {3, 4},
-  });
-  *literal->mutable_shape()->mutable_layout() = LayoutUtil::MakeLayout({0, 1});
+  std::unique_ptr<Literal> literal = Literal::CreateR2WithLayout<float>(
+      {{1, 2}, {3, 4}}, LayoutUtil::MakeLayout({0, 1}));
   ComputationBuilder builder(client_, TestName());
   builder.Parameter(0, literal->shape(), "input");
 
@@ -377,10 +474,8 @@ XLA_TEST_F(ParamsTest, R2_2x2_Layout_01) {
 
 // As above, but for {1, 0} layout.
 XLA_TEST_F(ParamsTest, R2_2x2_Layout_10) {
-  std::unique_ptr<Literal> literal = Literal::CreateR2<float>({
-      {1, 3}, {2, 4},
-  });
-  *literal->mutable_shape()->mutable_layout() = LayoutUtil::MakeLayout({1, 0});
+  std::unique_ptr<Literal> literal = Literal::CreateR2WithLayout<float>(
+      {{1, 3}, {2, 4}}, LayoutUtil::MakeLayout({1, 0}));
   ComputationBuilder builder(client_, TestName());
   builder.Parameter(0, literal->shape(), "input");
 
@@ -401,7 +496,7 @@ XLA_TEST_F(ParamsTest, R2_2x2_TryToPassReverseLayoutToParameter) {
         original.layout().minor_to_major().begin(),
         original.layout().minor_to_major().end());
     std::reverse(original_layout.begin(), original_layout.end());
-    *literal->mutable_shape()->mutable_layout() =
+    *literal->mutable_shape_do_not_use()->mutable_layout() =
         LayoutUtil::MakeLayout(original_layout);
     ASSERT_EQ(2, literal->Get<float>({0, 1}));
   }
diff --git a/tensorflow/compiler/xla/tests/prng_test.cc b/tensorflow/compiler/xla/tests/prng_test.cc
index 209f063cc5a34648453d12deae79f261b95dc3b4..6489eee9f34c6c4426d52e166f7b401d5948742f 100644
--- a/tensorflow/compiler/xla/tests/prng_test.cc
+++ b/tensorflow/compiler/xla/tests/prng_test.cc
@@ -37,6 +37,8 @@ class PrngTest : public ClientLibraryTestBase {
  protected:
   template <typename T>
   void UniformTest(T a, T b, tensorflow::gtl::ArraySlice<int64> dims);
+
+  template <typename T>
   void BernoulliTest(float p, tensorflow::gtl::ArraySlice<int64> dims);
 
   // Computes the χ² statistic of a sample of the discrete uniform distribution
@@ -62,37 +64,6 @@ void PrngTest::UniformTest(T a, T b, tensorflow::gtl::ArraySlice<int64> dims) {
   });
 }
 
-void PrngTest::BernoulliTest(float p, tensorflow::gtl::ArraySlice<int64> dims) {
-  ComputationBuilder builder(client_, TestName());
-  auto shape = ShapeUtil::MakeShape(U32, dims);
-  builder.RngBernoulli(builder.ConstantR0<float>(p), shape);
-
-  TF_ASSERT_OK_AND_ASSIGN(auto computation, builder.Build());
-  ExecutionOptions execution_options = execution_options_;
-  execution_options.set_seed(42);
-  TF_ASSERT_OK_AND_ASSIGN(
-      auto actual, client_->ExecuteAndTransfer(computation, /*arguments=*/{},
-                                               &execution_options));
-  EXPECT_THAT(dims, ::testing::ElementsAreArray(actual->shape().dimensions()));
-  int32 sum = 0;
-  actual->EachCell<uint32>(
-      [&sum](tensorflow::gtl::ArraySlice<int64>, uint32 value) {
-        EXPECT_TRUE(value == 0 || value == 1);
-        sum += value;
-      });
-  int32 total = ShapeUtil::ElementsIn(shape);
-  float p_tilde = sum / static_cast<float>(total);
-
-  // Test within expected range using normal approximation. The test uses a
-  // fixed seed and has a fixed output per p and backend. Using the normal
-  // approximation as this test is invoked for different `p` and the different
-  // backends could use different random number generators and produce different
-  // values. Choose 95% confidence level, so that z_{1-\alpha/2} = 1.96.
-  float normal_approximation_term = 1.96 * sqrt(p * (1 - p) / total);
-  EXPECT_GE(p_tilde, p - normal_approximation_term);
-  EXPECT_LE(p_tilde, p + normal_approximation_term);
-}
-
 // Uniform random number generation tests
 XLA_TEST_F(PrngTest, ScalarU01) { UniformTest<float>(0, 1, {}); }
 XLA_TEST_F(PrngTest, ZeroValuesU01) { UniformTest<float>(0, 1, {0}); }
@@ -181,10 +152,12 @@ XLA_TEST_F(PrngTest, MapUsingRng) {
                        computation,
                        /*arguments=*/{param0_data.get()}, &execution_options));
 
-  EXPECT_EQ(actual->f32s_size(), param0_literal->f32s_size());
-  for (int i = 0; i < param0_literal->f32s_size(); ++i) {
-    EXPECT_GE(actual->f32s(i), param0_literal->f32s(i));
-    EXPECT_LT(actual->f32s(i), param0_literal->f32s(i) + 1.0f);
+  EXPECT_EQ(ShapeUtil::ElementsIn(actual->shape()),
+            ShapeUtil::ElementsIn(param0_literal->shape()));
+  for (int i = 0; i < ShapeUtil::ElementsIn(actual->shape()); ++i) {
+    EXPECT_GE(actual->data<float>()[i], param0_literal->data<float>()[i]);
+    EXPECT_LT(actual->data<float>()[i],
+              param0_literal->data<float>()[i] + 1.0f);
   }
 }
 
@@ -250,10 +223,6 @@ XLA_TEST_F(PrngTest, PassInGlobalRngSeed) {
   LiteralTestUtil::ExpectNotEqual(*result5, *result6);
 }
 
-// Bernoulli random number generation tests
-XLA_TEST_F(PrngTest, HundredValuesB10p5) { BernoulliTest(0.5, {100}); }
-XLA_TEST_F(PrngTest, HundredValuesB10p1) { BernoulliTest(0.1, {100}); }
-
 XLA_TEST_F(PrngTest, TenValuesN01) {
   ComputationBuilder builder(client_, TestName());
   builder.RngNormal(builder.ConstantR0<float>(0), builder.ConstantR0<float>(1),
diff --git a/tensorflow/compiler/xla/tests/reduce_test.cc b/tensorflow/compiler/xla/tests/reduce_test.cc
index 7bc3185c367f076c9a7d211c9799557e1a91d92f..a766fa2db0e193c52171490981855843ab3ee158 100644
--- a/tensorflow/compiler/xla/tests/reduce_test.cc
+++ b/tensorflow/compiler/xla/tests/reduce_test.cc
@@ -143,6 +143,55 @@ class ReduceTest : public ClientLibraryTestBase {
     ComputeAndCompareR0<bool>(&builder, expected, {input_global_data.get()});
   }
 
+  // Reduce predicate tensor with dimension rows * cols to dimension cols, to
+  // test the implementation of atomic operations on misaligned small data
+  // types.
+  template <int64 cols>
+  void RunR2ToR1PredTest(bool and_reduce, int64 rows, int64 minor = 1,
+                         int64 major = 0) {
+    ComputationBuilder builder(client_, TestName());
+    const Shape input_shape = ShapeUtil::MakeShape(U8, {rows, cols});
+    auto input = builder.Parameter(0, input_shape, "input");
+    auto input_pred = builder.Eq(input, builder.ConstantR0<uint8>(1));
+
+    ComputationDataHandle init_value;
+    Computation reduce_op;
+    if (and_reduce) {
+      init_value = builder.ConstantR0<bool>(true);
+      reduce_op = CreateScalarAndComputation(&builder);
+    } else {
+      init_value = builder.ConstantR0<bool>(false);
+      reduce_op = CreateScalarOrComputation(&builder);
+    }
+
+    builder.Reduce(input_pred, init_value, reduce_op,
+                   /*dimensions_to_reduce=*/{0});
+
+    Array2D<uint8> input_data(rows, cols);
+    input_data.FillRandom(0, 1);
+    std::unique_ptr<Literal> input_literal =
+        Literal::CreateR2FromArray2D(input_data);
+    input_literal =
+        input_literal->Relayout(LayoutUtil::MakeLayout({minor, major}));
+    std::unique_ptr<GlobalData> input_global_data =
+        client_->TransferToServer(*input_literal).ConsumeValueOrDie();
+
+    std::array<bool, cols> expected;
+    for (int64 colno = 0; colno < cols; ++colno) {
+      bool column_sum = and_reduce ? true : false;
+      for (int64 rowno = 0; rowno < rows; ++rowno) {
+        if (and_reduce) {
+          column_sum = column_sum && input_data(rowno, colno);
+        } else {
+          column_sum = column_sum || input_data(rowno, colno);
+        }
+      }
+      expected[colno] = column_sum;
+    }
+
+    ComputeAndCompareR1<bool>(&builder, expected, {input_global_data.get()});
+  }
+
   // Runs an R2 => R0 reduction test with the given number of (rows, cols).
   void RunR2ToR0Test(int64 rows, int64 cols, int64 minor = 1, int64 major = 0) {
     ComputationBuilder builder(client_, TestName());
@@ -352,15 +401,13 @@ XLA_TEST_F(ReduceTest, ReduceR2_111x50_01_To_R1) {
 XLA_TEST_F(ReduceTest, ReduceR2_1024x1024_To_R1) { RunR2ToR1Test(1024, 1024); }
 XLA_TEST_F(ReduceTest, ReduceR2_1000x1500_To_R1) { RunR2ToR1Test(1000, 1500); }
 
-// TODO(b/34969189): Invalid CAS generated on GPU.
-XLA_TEST_F(ReduceTest, DISABLED_ON_GPU(AndReduceAllOnesR1_10_Pred)) {
+XLA_TEST_F(ReduceTest, AndReduceAllOnesR1_10_Pred) {
   constexpr int element_count = 10;
   std::vector<int> input(element_count, 1);
   RunR1ToR0PredTest(/*and_reduce=*/true, input);
 }
 
-// TODO(b/34969189): Invalid CAS generated on GPU.
-XLA_TEST_F(ReduceTest, DISABLED_ON_GPU(AndReduceOnesAndZerosR1_10_Pred)) {
+XLA_TEST_F(ReduceTest, AndReduceOnesAndZerosR1_10_Pred) {
   constexpr int element_count = 10;
   std::vector<int> input(element_count);
   for (int i = 0; i < element_count; ++i) {
@@ -369,15 +416,13 @@ XLA_TEST_F(ReduceTest, DISABLED_ON_GPU(AndReduceOnesAndZerosR1_10_Pred)) {
   RunR1ToR0PredTest(/*and_reduce=*/true, input);
 }
 
-// TODO(b/34969189): Invalid CAS generated on GPU.
-XLA_TEST_F(ReduceTest, DISABLED_ON_GPU(OrReduceAllOnesR1_10_Pred)) {
+XLA_TEST_F(ReduceTest, OrReduceAllOnesR1_10_Pred) {
   constexpr int element_count = 10;
   std::vector<int> input(element_count, 1);
   RunR1ToR0PredTest(/*and_reduce=*/false, input);
 }
 
-// TODO(b/34969189): Invalid CAS generated on GPU.
-XLA_TEST_F(ReduceTest, DISABLED_ON_GPU(OrReduceOnesAndZerosR1_10_Pred)) {
+XLA_TEST_F(ReduceTest, OrReduceOnesAndZerosR1_10_Pred) {
   constexpr int element_count = 10;
   std::vector<int> input(element_count);
   for (int i = 0; i < element_count; ++i) {
@@ -812,5 +857,12 @@ XLA_TEST_F(ReduceTest, DISABLED_ON_GPU(OperationOnConstantAsInitValue)) {
   ComputeAndCompareR0<float>(&builder, 4.0f, {b_data.get()});
 }
 
+XLA_TEST_F(ReduceTest, ReduceAndPredR2_128x64_To_R1) {
+  RunR2ToR1PredTest</*cols=64*/ 64>(/*and_reduce=true*/ true, /*rows=128*/ 128);
+}
+XLA_TEST_F(ReduceTest, ReduceOrPredR2_64x32_To_R1) {
+  RunR2ToR1PredTest</*cols=32*/ 32>(/*and_reduce=false*/ false, /*rows=64*/ 64);
+}
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/reduce_window_test.cc b/tensorflow/compiler/xla/tests/reduce_window_test.cc
index 0601a1466bd87ab721443e0da725006e2d73e392..01f23efcd52e3b227309df3b7d965f3b4c3a0cdf 100644
--- a/tensorflow/compiler/xla/tests/reduce_window_test.cc
+++ b/tensorflow/compiler/xla/tests/reduce_window_test.cc
@@ -41,16 +41,40 @@ limitations under the License.
 namespace xla {
 namespace {
 
-class ReduceWindowTest : public ClientLibraryTestBase {
+#ifdef XLA_BACKEND_SUPPORTS_BFLOAT16
+// Tests both F32 and BF16.
+static std::array<bool, 2> use_bfloat16_params{false, true};
+#else
+// Only tests F32.
+static std::array<bool, 1> use_bfloat16_params{false};
+#endif
+
+class ReduceWindowTestBase : public ClientLibraryTestBase {
  public:
-  ReduceWindowTest() : builder_(client_, TestName()) {}
+  ErrorSpec DefaultErrorSpec() const {
+    if (use_bfloat16()) {
+      return ErrorSpec(1e-1, 5e-2);
+    } else {
+      return ErrorSpec(1e-3, 1e-3);
+    }
+  }
+};
+
+class ReduceWindowTest : public ::testing::WithParamInterface<bool>,
+                         public ReduceWindowTestBase {
+ public:
+  ReduceWindowTest() : builder_(client_, TestName()) {
+    set_use_bfloat16(GetParam());
+  }
 
   void ReduceWindowAdd(const ComputationDataHandle& input,
                        tensorflow::gtl::ArraySlice<int64> window_dimensions,
                        tensorflow::gtl::ArraySlice<int64> window_strides,
                        Padding padding) {
-    builder_.ReduceWindow(input, builder_.ConstantR0<float>(0.0f),
-                          CreateScalarAddComputation(F32, &builder_),
+    auto init =
+        CreateConstantFromLiteral(*Literal::CreateR0<float>(0.0f), &builder_);
+    builder_.ReduceWindow(input, init,
+                          CreateScalarAddComputation(FloatType(), &builder_),
                           window_dimensions, window_strides, padding);
   }
 
@@ -58,30 +82,32 @@ class ReduceWindowTest : public ClientLibraryTestBase {
                        tensorflow::gtl::ArraySlice<int64> window_dimensions,
                        tensorflow::gtl::ArraySlice<int64> window_strides,
                        Padding padding) {
-    builder_.ReduceWindow(
-        input, builder_.ConstantLiteral(Literal::MinValue(F32)),
-        CreateScalarMax(), window_dimensions, window_strides, padding);
+    auto init = CreateConstantFromLiteral(Literal::MinValue(F32), &builder_);
+    builder_.ReduceWindow(input, init, CreateScalarMax(), window_dimensions,
+                          window_strides, padding);
   }
 
   void ReduceWindowMin(const ComputationDataHandle& input,
                        tensorflow::gtl::ArraySlice<int64> window_dimensions,
                        tensorflow::gtl::ArraySlice<int64> window_strides,
                        Padding padding) {
-    builder_.ReduceWindow(input,
-                          builder_.ConstantLiteral(Literal::MaxValue(F32)),
-                          CreateScalarMinComputation(F32, &builder_),
+    auto init = CreateConstantFromLiteral(Literal::MaxValue(F32), &builder_);
+    builder_.ReduceWindow(input, init,
+                          CreateScalarMinComputation(FloatType(), &builder_),
                           window_dimensions, window_strides, padding);
   }
 
   ComputationBuilder builder_;
 };
 
-TEST_F(ReduceWindowTest, MismatchedRanksGivesErrorStatus) {
-  const auto input = builder_.ConstantR1<float>({1, 1, 1, 1});
-  const auto init_value = builder_.ConstantR0<float>(0);
+TEST_P(ReduceWindowTest, MismatchedRanksGivesErrorStatus) {
+  const auto input = CreateConstantFromLiteral(
+      *Literal::CreateR1<float>({1, 1, 1, 1}), &builder_);
+  const auto init_value =
+      CreateConstantFromLiteral(*Literal::CreateR0<float>(0), &builder_);
   TF_ASSERT_OK(builder_.first_error());
   builder_.ReduceWindow(input, init_value,
-                        CreateScalarAddComputation(F32, &builder_),
+                        CreateScalarAddComputation(FloatType(), &builder_),
                         /*window_dimensions=*/{1, 2},
                         /*window_strides=*/{1}, Padding::kValid);
   ASSERT_EQ(builder_.first_error().code(), tensorflow::error::INVALID_ARGUMENT)
@@ -91,88 +117,106 @@ TEST_F(ReduceWindowTest, MismatchedRanksGivesErrorStatus) {
 }
 
 // Regression test for b/68964348.
-TEST_F(ReduceWindowTest, R0ReduceWindow) {
-  auto input = builder_.ConstantR0<float>(42);
-  auto init = builder_.ConstantR0<float>(1.0);
-  builder_.ReduceWindow(input, init, CreateScalarAddComputation(F32, &builder_),
+TEST_P(ReduceWindowTest, R0ReduceWindow) {
+  const auto input =
+      CreateConstantFromLiteral(*Literal::CreateR0<float>(42.0), &builder_);
+  const auto init =
+      CreateConstantFromLiteral(*Literal::CreateR0<float>(1.0), &builder_);
+  builder_.ReduceWindow(input, init,
+                        CreateScalarAddComputation(FloatType(), &builder_),
                         /*window_dimensions=*/{},
                         /*window_strides=*/{}, Padding::kSame);
-  ComputeAndCompareR0<float>(&builder_, 43, {}, ErrorSpec(0.00001));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateR0<float>(43.0), {},
+                           ErrorSpec(0.00001));
 }
 
-TEST_F(ReduceWindowTest, Min3In5Stride2) {
-  const auto input = builder_.ConstantR1<float>({10000, 1000, 100, 10, 1});
+TEST_P(ReduceWindowTest, Min3In5Stride2) {
+  const auto input = CreateConstantFromLiteral(
+      *Literal::CreateR1<float>({10000, 1000, 100, 10, 1}), &builder_);
   ReduceWindowMin(input, {3}, {2}, Padding::kValid);
-  ComputeAndCompareR1<float>(&builder_, {100, 1}, {}, ErrorSpec(0.0001));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateR1<float>({100, 1}), {},
+                           ErrorSpec(0.00001));
 }
 
-XLA_TEST_F(ReduceWindowTest, ZeroElementSmall) {
-  Array4D<float> input_array(1, 0, 2, 1);
+TEST_P(ReduceWindowTest, Min3In5Stride1WithSamePadding) {
+  const auto input = CreateConstantFromLiteral(
+      *Literal::CreateR1<float>({10000, 1000, 100, 10, 1}), &builder_);
+  ReduceWindowMin(input, /*window_dimensions=*/{3}, /*window_strides=*/{1},
+                  Padding::kSame);
+  ComputeAndCompareLiteral(&builder_,
+                           *Literal::CreateR1<float>({1000, 100, 10, 1, 1}), {},
+                           ErrorSpec(0.00001));
+}
 
-  const auto input = builder_.ConstantR4FromArray4D<float>(input_array);
+XLA_TEST_P(ReduceWindowTest, ZeroElementSmall) {
+  Array4D<float> input_array(1, 0, 2, 1);
+  const auto input = CreateConstantFromArray(input_array, &builder_);
   Padding padding = Padding::kSame;
   ReduceWindowAdd(input, {1, 1, 2, 1}, {1, 1, 1, 1}, padding);
 
   auto res = ReferenceUtil::ReduceWindow4DAdd(input_array, 0.0f, {1, 1, 2, 1},
                                               {1, 1, 1, 1}, padding);
 
-  ComputeAndCompareR4<float>(&builder_, *res, {}, ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*res), {},
+                           DefaultErrorSpec());
 }
 
-TEST_F(ReduceWindowTest, NonSquareSmall) {
+TEST_P(ReduceWindowTest, NonSquareSmall) {
   Array4D<float> input_array(1, 2, 2, 1);
-  input_array.FillRandom(2.f);
+  input_array.FillRandom(2.f, 2.f);
+  const auto input = CreateConstantFromArray(input_array, &builder_);
 
-  const auto input = builder_.ConstantR4FromArray4D<float>(input_array);
   Padding padding = Padding::kSame;
   ReduceWindowAdd(input, {1, 1, 2, 1}, {1, 1, 1, 1}, padding);
 
   auto res = ReferenceUtil::ReduceWindow4DAdd(input_array, 0.0f, {1, 1, 2, 1},
                                               {1, 1, 1, 1}, padding);
 
-  ComputeAndCompareR4<float>(&builder_, *res, {}, ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*res), {},
+                           DefaultErrorSpec());
 }
 
-TEST_F(ReduceWindowTest, MiddleDimsSmall) {
+TEST_P(ReduceWindowTest, MiddleDimsSmall) {
   Array4D<float> input_array(1, 3, 3, 1);
-  input_array.FillRandom(2.f);
-
-  const auto input = builder_.ConstantR4FromArray4D<float>(input_array);
+  input_array.FillRandom(2.f, 2.f);
+  const auto input = CreateConstantFromArray(input_array, &builder_);
   Padding padding = Padding::kSame;
   ReduceWindowAdd(input, {1, 1, 1, 1}, {1, 2, 2, 1}, padding);
 
   auto res = ReferenceUtil::ReduceWindow4DAdd(input_array, 0.0f, {1, 1, 1, 1},
                                               {1, 2, 2, 1}, padding);
 
-  ComputeAndCompareR4<float>(&builder_, *res, {}, ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*res), {},
+                           DefaultErrorSpec());
 }
 
-TEST_F(ReduceWindowTest, Along2ndMinorDim) {
+TEST_P(ReduceWindowTest, Along2ndMinorDim) {
   Array4D<float> input_array(3, 6, 7, 32);
-  input_array.FillRandom(2.f);
+  input_array.FillRandom(2.f, 2.f);
+  const auto input = CreateConstantFromArray(input_array, &builder_);
 
   // The parameters of this reduction mimic feature norm (e.g. LRN).
   int lrn_diameter = 7;  // diameter = 2*radius + 1 --> must be odd
-  const auto input = builder_.ConstantR4FromArray4D<float>(input_array);
   Padding padding = Padding::kSame;
   ReduceWindowAdd(input, {1, 1, lrn_diameter, 1}, {1, 1, 1, 1}, padding);
 
   auto res = ReferenceUtil::ReduceWindow4DAdd(
       input_array, 0.0f, {1, 1, lrn_diameter, 1}, {1, 1, 1, 1}, padding);
 
-  ComputeAndCompareR4<float>(&builder_, *res, {}, ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*res), {},
+                           DefaultErrorSpec());
 }
 
-TEST_F(ReduceWindowTest, AmongMajor2Dims) {
+TEST_P(ReduceWindowTest, AmongMajor2Dims) {
   Array4D<float> input_array(4, 4, 6, 8);
   input_array.FillWithMinorDimNum();
+  const auto input_data_handle =
+      CreateConstantFromArray(input_array, &builder_);
 
   int win_len = 3;
   int win_stride = 1;
 
   Padding padding = Padding::kSame;
-  const auto input_data_handle =
-      builder_.ConstantR4FromArray4D<float>(input_array);
   // Reduce only along the x and y dimensions, according to the win_len.
   ReduceWindowAdd(input_data_handle, {win_len, win_len, 1, 1},
                   {win_stride, win_stride, 1, 1}, padding);
@@ -180,18 +224,20 @@ TEST_F(ReduceWindowTest, AmongMajor2Dims) {
   auto result = ReferenceUtil::ReduceWindow4DAdd(
       input_array, 0.0f, {win_len, win_len, 1, 1},
       {win_stride, win_stride, 1, 1}, padding);
-  ComputeAndCompareR4<float>(&builder_, *result, {}, ErrorSpec(1e-3, 1e-3));
+
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*result), {},
+                           DefaultErrorSpec());
 }
 
-TEST_F(ReduceWindowTest, AmongMajor2DimsMediumSize) {
+TEST_P(ReduceWindowTest, AmongMajor2DimsMediumSize) {
   Array4D<float> input_array(9, 12, 4, 89);
-  input_array.FillRandom(2.0f);
+  input_array.FillRandom(2.f, 2.f);
 
   int win_len = 3;
   int win_stride = 2;
 
   const auto input_data_handle =
-      builder_.ConstantR4FromArray4D<float>(input_array);
+      CreateConstantFromArray(input_array, &builder_);
 
   Padding padding = Padding::kSame;
   // Reduce only along the x and y dimensions, according to the win_len.
@@ -202,116 +248,34 @@ TEST_F(ReduceWindowTest, AmongMajor2DimsMediumSize) {
       input_array, 0.0f, {win_len, win_len, 1, 1},
       {win_stride, win_stride, 1, 1}, padding);
 
-  ComputeAndCompareR4<float>(&builder_, *result, {}, ErrorSpec(1e-3, 1e-3));
-}
-
-// TODO(b/32173947): Test support for arbitrary-sized padding.
-TEST_F(ReduceWindowTest, DISABLED_AmongMajor2DimsMediumSizeLargePadding) {
-  Array4D<float> input_array(9, 12, 4, 89);  // simulate Dim0IsMinor layout
-  input_array.FillRandom(2.0f);
-
-  int64 rank = 4;
-  int win_len = 3;
-  int win_stride = 2;
-
-  const auto input_data_handle =
-      builder_.ConstantR4FromArray4D<float>(input_array);
-
-  Padding padding = Padding::kSame;
-  // Reduce only along the x and y dimensions, according to the win_len.
-  // Create padding vector with large padding values in the reduction dims.
-  std::vector<std::pair<int64, int64>> low_high_padding;
-  low_high_padding.resize(rank, {4, 4});
-
-  builder_.ReduceWindowWithGeneralPadding(
-      input_data_handle, builder_.ConstantR0<float>(0.0f),
-      CreateScalarAddComputation(F32, &builder_), {win_len, win_len, 1, 1},
-      {win_stride, win_stride, 1, 1}, low_high_padding);
-
-  auto result = ReferenceUtil::ReduceWindow4DAdd(
-      input_array, 0.0f, {win_len, win_len, 1, 1},
-      {win_stride, win_stride, 1, 1}, padding);
-
-  ComputeAndCompareR4<float>(&builder_, *result, {}, ErrorSpec(1e-3, 1e-3));
-}
-
-XLA_TEST_F(ReduceWindowTest, Add1x1x2In2x1x2) {
-  Array3D<float> input_array(2, 1, 2);
-  input_array(0, 0, 0) = 1000;
-  input_array(0, 0, 1) = 100;
-  input_array(1, 0, 0) = 10;
-  input_array(1, 0, 1) = 1;
-  auto input = builder_.ConstantR3FromArray3D<float>(input_array);
-
-  ReduceWindowAdd(input, {1, 1, 2}, {1, 1, 1}, Padding::kValid);
-
-  Array3D<float> expected(2, 1, 1);
-  expected(0, 0, 0) = 1100;
-  expected(1, 0, 0) = 11;
-  ComputeAndCompareR3<float>(&builder_, expected, {}, ErrorSpec(0.0001));
-}
-
-XLA_TEST_F(ReduceWindowTest, Add1x1x2In2x1x3Stride1x1x2) {
-  Array3D<float> input_array(2, 1, 3);
-  input_array(0, 0, 0) = 100;
-  input_array(0, 0, 1) = 10;
-  input_array(0, 0, 2) = 1;
-  input_array(1, 0, 0) = 500;
-  input_array(1, 0, 1) = 50;
-  input_array(1, 0, 2) = 5;
-  auto input = builder_.ConstantR3FromArray3D<float>(input_array);
-
-  ReduceWindowAdd(input, {1, 1, 2}, {1, 1, 2}, Padding::kValid);
-
-  Array3D<float> expected(2, 1, 1);
-  expected(0, 0, 0) = 110;
-  expected(1, 0, 0) = 550;
-  ComputeAndCompareR3<float>(&builder_, expected, {}, ErrorSpec(0.0001));
-}
-
-XLA_TEST_F(ReduceWindowTest, Add1x1x2In2x1x3SamePad) {
-  Array3D<float> input_array(2, 1, 3);
-  input_array(0, 0, 0) = 100;
-  input_array(0, 0, 1) = 10;
-  input_array(0, 0, 2) = 1;
-  input_array(1, 0, 0) = 500;
-  input_array(1, 0, 1) = 50;
-  input_array(1, 0, 2) = 5;
-  auto input = builder_.ConstantR3FromArray3D<float>(input_array);
-
-  ReduceWindowAdd(input, {1, 1, 2}, {1, 1, 1}, Padding::kSame);
-
-  Array3D<float> expected(2, 1, 3);
-  expected(0, 0, 0) = 110;
-  expected(0, 0, 1) = 11;
-  expected(0, 0, 2) = 1;
-  expected(1, 0, 0) = 550;
-  expected(1, 0, 1) = 55;
-  expected(1, 0, 2) = 5;
-  ComputeAndCompareR3<float>(&builder_, expected, {}, ErrorSpec(0.0001));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*result), {},
+                           DefaultErrorSpec());
 }
 
 // Tests a reduction function that is not a simple add/min/max/etc.
-XLA_TEST_F(ReduceWindowTest, NonstandardReduceFunction) {
+XLA_TEST_P(ReduceWindowTest, NonstandardReduceFunction) {
   Array4D<float> input_array(1, 2, 2, 1);
   input_array(0, 0, 0, 0) = 1;
   input_array(0, 0, 1, 0) = 2;
   input_array(0, 1, 0, 0) = 3;
   input_array(0, 1, 1, 0) = 4;
+  const auto input = CreateConstantFromArray(input_array, &builder_);
 
-  const auto input = builder_.ConstantR4FromArray4D<float>(input_array);
   Padding padding = Padding::kValid;
-
-  const Shape scalar = ShapeUtil::MakeShape(F32, {});
+  const Shape scalar = ShapeUtil::MakeShape(FloatType(), {});
   auto b = builder_.CreateSubBuilder("unusual");
   auto lhs = b->Parameter(0, scalar, "lhs");
   auto rhs = b->Parameter(1, scalar, "rhs");
-  b->Min(b->Add(lhs, rhs), b->ConstantR0<float>(8.0f));
+  b->Min(b->Add(lhs, rhs),
+         CreateConstantFromLiteral(*Literal::CreateR0<float>(8.0f), b.get()));
   Computation reduce_fn = b->BuildAndNoteError();
 
-  builder_.ReduceWindow(input, builder_.ConstantR0<float>(3.0f), reduce_fn,
-                        /*window_dimensions=*/{1, 1, 2, 1},
-                        /*window_strides=*/{1, 1, 1, 1}, padding);
+  builder_.ReduceWindow(
+      input,
+      CreateConstantFromLiteral(*Literal::CreateR0<float>(3.0f), &builder_),
+      reduce_fn,
+      /*window_dimensions=*/{1, 1, 2, 1},
+      /*window_strides=*/{1, 1, 1, 1}, padding);
 
   const auto reduce_func = [](float arg1, float arg2) {
     return std::min<float>(arg1 + arg2, 8.0f);
@@ -322,17 +286,19 @@ XLA_TEST_F(ReduceWindowTest, NonstandardReduceFunction) {
                                            /*window=*/{1, 1, 2, 1},
                                            /*stride=*/{1, 1, 1, 1}, padding);
 
-  ComputeAndCompareR4<float>(&builder_, *expected, {}, ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*expected), {},
+                           DefaultErrorSpec());
 }
 
-TEST_F(ReduceWindowTest, R4UnitWindow) {
+TEST_P(ReduceWindowTest, R4UnitWindow) {
   Array4D<float> input_array(13, 12, 8, 15);
-  input_array.Fill(1.0f);
+  input_array.FillRandom(2.f, 2.f);
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR4FromArray4DWithLayout(
           input_array, LayoutUtil::MakeLayout({0, 3, 2, 1}));
-  ComputationDataHandle input =
-      builder_.Parameter(0, input_literal->shape(), "operand");
+  ComputationDataHandle input;
+  auto input_data = CreateParameterAndTransferLiteral(
+      0, *input_literal, "parameter", &builder_, &input);
 
   Padding padding = Padding::kSame;
   ReduceWindowAdd(input, {1, 1, 7, 1}, {1, 4, 1, 1}, padding);
@@ -340,15 +306,11 @@ TEST_F(ReduceWindowTest, R4UnitWindow) {
   auto res = ReferenceUtil::ReduceWindow4DAdd(input_array, 0.0f, {1, 1, 7, 1},
                                               {1, 4, 1, 1}, padding);
 
-  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<GlobalData> input_data,
-                          client_->TransferToServer(*input_literal));
-  ComputeAndCompareR4<float>(&builder_, *res, {input_data.get()},
-                             ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*res),
+                           {input_data.get()}, DefaultErrorSpec());
 }
 
-XLA_TEST_F(HloTestBase, R6AddMultipleStrides) {
-  auto b = HloComputation::Builder(TestName());
-
+XLA_TEST_P(ReduceWindowTest, R6AddMultipleStrides) {
   std::vector<int64> input_dims(6, 8);
   auto shape = ShapeUtil::MakeShape(F32, input_dims);
 
@@ -358,56 +320,15 @@ XLA_TEST_F(HloTestBase, R6AddMultipleStrides) {
   };
   TF_EXPECT_OK(arg_literal->Populate<float>(generator));
 
-  auto input =
-      b.AddInstruction(HloInstruction::CreateConstant(std::move(arg_literal)));
-
-  auto init_value = b.AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR0<float>(0.f)));
-
-  HloComputation::Builder add_computation("add");
-  Shape scalar_shape = ShapeUtil::MakeShape(F32, {});
-  auto param_lhs = add_computation.AddInstruction(
-      HloInstruction::CreateParameter(0, scalar_shape, "lhs"));
-  auto param_rhs = add_computation.AddInstruction(
-      HloInstruction::CreateParameter(1, scalar_shape, "rhs"));
-  add_computation.AddInstruction(HloInstruction::CreateBinary(
-      scalar_shape, HloOpcode::kAdd, param_lhs, param_rhs));
-
-  auto module = CreateNewModule();
-  auto add_func = module->AddEmbeddedComputation(add_computation.Build());
-
-  WindowDimension trivial_dim;
-  trivial_dim.set_size(1);
-  trivial_dim.set_stride(1);
-  trivial_dim.set_padding_low(0);
-  trivial_dim.set_padding_high(0);
-  trivial_dim.set_window_dilation(1);
-  trivial_dim.set_base_dilation(1);
-
-  WindowDimension active_dim;
-  active_dim.set_size(3);
-  active_dim.set_stride(1);
-  active_dim.set_padding_low(0);
-  active_dim.set_padding_high(0);
-  active_dim.set_window_dilation(1);
-  active_dim.set_base_dilation(1);
-
-  Window window;
-  *window.add_dimensions() = active_dim;
-  *window.add_dimensions() = trivial_dim;
-  *window.add_dimensions() = active_dim;
-  *window.add_dimensions() = active_dim;
-  *window.add_dimensions() = trivial_dim;
-  *window.add_dimensions() = trivial_dim;
-
-  // Non-monotonic output layout with minor dims trivial.
+  const auto input = CreateConstantFromLiteral(*arg_literal, &builder_);
+
+  Padding padding = Padding::kValid;
+  ReduceWindowAdd(input, {3, 1, 3, 3, 1, 1}, {1, 1, 1, 1, 1, 1}, padding);
+
   std::vector<int64> output_layout = {1, 5, 3, 2, 0, 4};
   std::vector<int64> output_dims = {6, 8, 6, 6, 8, 8};
   Shape result_shape =
       ShapeUtil::MakeShapeWithLayout(F32, output_dims, output_layout);
-  b.AddInstruction(HloInstruction::CreateReduceWindow(
-      result_shape, input, init_value, window, add_func));
-
   std::unique_ptr<Literal> expected = Literal::CreateFromShape(result_shape);
   auto out_generator =
       [&](tensorflow::gtl::ArraySlice<int64> indexes) -> float {
@@ -415,82 +336,37 @@ XLA_TEST_F(HloTestBase, R6AddMultipleStrides) {
   };
   TF_EXPECT_OK(expected->Populate<float>(out_generator));
 
-  module->AddEntryComputation(b.Build());
-  auto actual = ExecuteAndTransfer(std::move(module), {});
-
-  LiteralTestUtil::ExpectNear(*actual, *expected, ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *expected, {}, DefaultErrorSpec());
 }
 
-XLA_TEST_F(HloTestBase, R6Add) {
-  auto b = HloComputation::Builder(TestName());
-
+XLA_TEST_P(ReduceWindowTest, R6Add) {
   std::vector<int64> input_dims(6, 8);
+  auto shape = ShapeUtil::MakeShape(F32, input_dims);
+
   std::unique_ptr<Literal> arg_literal =
-      Literal::CreateFullWithMonotonicDim0MajorLayout<float>(input_dims, 1.0f);
-  auto input =
-      b.AddInstruction(HloInstruction::CreateConstant(std::move(arg_literal)));
-
-  auto init_value = b.AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR0<float>(0.f)));
-
-  HloComputation::Builder add_computation("add");
-  Shape scalar_shape = ShapeUtil::MakeShape(F32, {});
-  auto param_lhs = add_computation.AddInstruction(
-      HloInstruction::CreateParameter(0, scalar_shape, "lhs"));
-  auto param_rhs = add_computation.AddInstruction(
-      HloInstruction::CreateParameter(1, scalar_shape, "rhs"));
-  add_computation.AddInstruction(HloInstruction::CreateBinary(
-      scalar_shape, HloOpcode::kAdd, param_lhs, param_rhs));
-
-  auto module = CreateNewModule();
-  auto add_func = module->AddEmbeddedComputation(add_computation.Build());
-
-  WindowDimension trivial_dim;
-  trivial_dim.set_size(1);
-  trivial_dim.set_stride(1);
-  trivial_dim.set_padding_low(0);
-  trivial_dim.set_padding_high(0);
-  trivial_dim.set_window_dilation(1);
-  trivial_dim.set_base_dilation(1);
-
-  WindowDimension active_dim;
-  active_dim.set_size(3);
-  active_dim.set_stride(1);
-  active_dim.set_padding_low(0);
-  active_dim.set_padding_high(0);
-  active_dim.set_window_dilation(1);
-  active_dim.set_base_dilation(1);
-
-  Window window;
-  *window.add_dimensions() = trivial_dim;
-  *window.add_dimensions() = trivial_dim;
-  *window.add_dimensions() = active_dim;
-  *window.add_dimensions() = active_dim;
-  *window.add_dimensions() = trivial_dim;
-  *window.add_dimensions() = trivial_dim;
-
-  Shape shape = ShapeUtil::MakeShape(F32, {8, 8, 6, 6, 8, 8});
-  b.AddInstruction(HloInstruction::CreateReduceWindow(shape, input, init_value,
-                                                      window, add_func));
+      Literal::CreateFullWithDescendingLayout<float>(input_dims, 1.0f);
+
+  const auto input = CreateConstantFromLiteral(*arg_literal, &builder_);
+
+  Padding padding = Padding::kValid;
+  ReduceWindowAdd(input, {1, 1, 3, 3, 1, 1}, {1, 1, 1, 1, 1, 1}, padding);
 
   std::vector<int64> output_dims = {8, 8, 6, 6, 8, 8};
   std::unique_ptr<Literal> expected =
-      Literal::CreateFullWithMonotonicDim0MajorLayout<float>(output_dims, 9.0f);
-
-  module->AddEntryComputation(b.Build());
-  auto actual = ExecuteAndTransfer(std::move(module), {});
+      Literal::CreateFullWithDescendingLayout<float>(output_dims, 9.0f);
 
-  LiteralTestUtil::ExpectNear(*actual, *expected, ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *expected, {}, DefaultErrorSpec());
 }
 
-XLA_TEST_F(ReduceWindowTest, R4SecondMinorStride) {
+XLA_TEST_P(ReduceWindowTest, R4SecondMinorStride) {
   Array4D<float> input_array(2, 1, 27, 119);
   input_array.FillRandom(2.0f);
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR4FromArray4DWithLayout(
           input_array, LayoutUtil::MakeLayout({3, 2, 1, 0}));
-  ComputationDataHandle input =
-      builder_.Parameter(0, input_literal->shape(), "operand");
+  ComputationDataHandle input;
+  auto input_data = CreateParameterAndTransferLiteral(
+      0, *input_literal, "parameter", &builder_, &input);
 
   int win_len = 1;
   int stride = 8;
@@ -500,20 +376,19 @@ XLA_TEST_F(ReduceWindowTest, R4SecondMinorStride) {
   auto res = ReferenceUtil::ReduceWindow4DAdd(
       input_array, 0.0f, {1, 1, win_len, 1}, {1, 1, stride, 1}, padding);
 
-  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<GlobalData> input_data,
-                          client_->TransferToServer(*input_literal));
-  ComputeAndCompareR4<float>(&builder_, *res, {input_data.get()},
-                             ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*res),
+                           {input_data.get()}, DefaultErrorSpec());
 }
 
-XLA_TEST_F(ReduceWindowTest, R4SecondMinorUnitStride) {
+XLA_TEST_P(ReduceWindowTest, R4SecondMinorUnitStride) {
   Array4D<float> input_array(3, 2, 4, 64);
   input_array.FillRandom(2.0f);
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR4FromArray4DWithLayout(
           input_array, LayoutUtil::MakeLayout({3, 2, 1, 0}));
-  ComputationDataHandle input =
-      builder_.Parameter(0, input_literal->shape(), "operand");
+  ComputationDataHandle input;
+  auto input_data = CreateParameterAndTransferLiteral(
+      0, *input_literal, "parameter", &builder_, &input);
 
   int win_len = 3;
   int stride = 1;
@@ -523,20 +398,19 @@ XLA_TEST_F(ReduceWindowTest, R4SecondMinorUnitStride) {
   auto res = ReferenceUtil::ReduceWindow4DAdd(
       input_array, 0.0f, {1, 1, win_len, 1}, {1, 1, stride, 1}, padding);
 
-  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<GlobalData> input_data,
-                          client_->TransferToServer(*input_literal));
-  ComputeAndCompareR4<float>(&builder_, *res, {input_data.get()},
-                             ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*res),
+                           {input_data.get()}, DefaultErrorSpec());
 }
 
-XLA_TEST_F(ReduceWindowTest, R4SecondMinorWin) {
+XLA_TEST_P(ReduceWindowTest, R4SecondMinorWin) {
   Array4D<float> input_array(1, 3, 12, 200);
   input_array.FillRandom(2.0f);
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR4FromArray4DWithLayout(
           input_array, LayoutUtil::MakeLayout({3, 2, 1, 0}));
-  ComputationDataHandle input =
-      builder_.Parameter(0, input_literal->shape(), "operand");
+  ComputationDataHandle input;
+  auto input_data = CreateParameterAndTransferLiteral(
+      0, *input_literal, "parameter", &builder_, &input);
 
   int win_len = 8;
   int stride = 5;
@@ -546,13 +420,11 @@ XLA_TEST_F(ReduceWindowTest, R4SecondMinorWin) {
   auto res = ReferenceUtil::ReduceWindow4DAdd(
       input_array, 0.0f, {1, 1, win_len, 1}, {1, 1, stride, 1}, padding);
 
-  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<GlobalData> input_data,
-                          client_->TransferToServer(*input_literal));
-  ComputeAndCompareR4<float>(&builder_, *res, {input_data.get()},
-                             ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*res),
+                           {input_data.get()}, DefaultErrorSpec());
 }
 
-TEST_F(ReduceWindowTest, AmongMajor2DimsMultipleMinor) {
+TEST_P(ReduceWindowTest, AmongMajor2DimsMultipleMinor) {
   Array4D<float> input_array(6, 4, 10, 130);
   input_array.FillRandom(2.0f);
 
@@ -561,7 +433,7 @@ TEST_F(ReduceWindowTest, AmongMajor2DimsMultipleMinor) {
 
   Padding padding = Padding::kSame;
   const auto input_data_handle =
-      builder_.ConstantR4FromArray4D<float>(input_array);
+      CreateConstantFromArray(input_array, &builder_);
   // Reduce only along the x and y dimensions, according to the win_len.
   ReduceWindowAdd(input_data_handle, {win_len, win_len, 1, 1},
                   {win_stride, win_stride, 1, 1}, padding);
@@ -569,36 +441,59 @@ TEST_F(ReduceWindowTest, AmongMajor2DimsMultipleMinor) {
   auto result = ReferenceUtil::ReduceWindow4DAdd(
       input_array, 0.0f, {win_len, win_len, 1, 1},
       {win_stride, win_stride, 1, 1}, padding);
-  ComputeAndCompareR4<float>(&builder_, *result, {}, ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray(*result), {},
+                           DefaultErrorSpec());
 }
 
-XLA_TEST_F(ReduceWindowTest, Add24In1152_NoOverlap) {
+XLA_TEST_P(ReduceWindowTest, Add24In1152_NoOverlap) {
   std::vector<float> input_vector(128 * 9, 1);
-  const auto input = builder_.ConstantR1<float>(input_vector);
+  const auto input = CreateConstantFromLiteral(
+      *Literal::CreateR1<float>(input_vector), &builder_);
   ReduceWindowAdd(input, {32}, {128}, Padding::kValid);
-  ComputeAndCompareR1<float>(&builder_, {32, 32, 32, 32, 32, 32, 32, 32, 32},
-                             {}, ErrorSpec(0.0001));
+  ComputeAndCompareLiteral(
+      &builder_,
+      *Literal::CreateR1<float>({32, 32, 32, 32, 32, 32, 32, 32, 32}), {},
+      DefaultErrorSpec());
 }
 
-XLA_TEST_F(ReduceWindowTest, Add128In128Stride128) {
-  const auto input = builder_.ConstantR1<float>(
-      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
-       1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
-       1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
-       1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
-       1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
-       1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
-       1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
-       1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
+XLA_TEST_P(ReduceWindowTest, Add128In128Stride128) {
+  std::vector<float> input_vector{
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  const auto input = CreateConstantFromLiteral(
+      *Literal::CreateR1<float>(input_vector), &builder_);
   ReduceWindowAdd(input, {128}, {128}, Padding::kValid);
-  ComputeAndCompareR1<float>(&builder_, {1088}, {}, ErrorSpec(0.0001));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateR1<float>({1088}), {},
+                           DefaultErrorSpec());
+}
+
+XLA_TEST_P(ReduceWindowTest, Add128In128) {
+  std::vector<float> input_vector{
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  const auto input = CreateConstantFromLiteral(
+      *Literal::CreateR1<float>(input_vector), &builder_);
+  ReduceWindowAdd(input, {128}, {1}, Padding::kValid);
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateR1<float>({1088}), {},
+                           DefaultErrorSpec());
 }
 
 // Regression test for a bug that appeared in Inception (b/34784899).
-TEST_F(ReduceWindowTest, R2ReduceWindowInceptionFromBroadcast) {
+TEST_P(ReduceWindowTest, R2ReduceWindowInceptionFromBroadcast) {
   Array2D<float> input_array(14, 14, 1.0f);
-  ComputationDataHandle input =
-      builder_.Broadcast(builder_.ConstantLiteral(Literal::One(F32)), {14, 14});
+  const auto input = CreateConstantFromArray(input_array, &builder_);
 
   int win_len = 3;
   int stride = 1;
@@ -608,13 +503,14 @@ TEST_F(ReduceWindowTest, R2ReduceWindowInceptionFromBroadcast) {
   auto res = ReferenceUtil::ReduceWindow2DAdd(
       input_array, 0.0f, {win_len, win_len}, {stride, stride}, padding);
 
-  ComputeAndCompareR2<float>(&builder_, *res, {}, ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray<float>(*res),
+                           {}, DefaultErrorSpec());
 }
 
-TEST_F(ReduceWindowTest, R2ReduceWindowNonOverlappingFromBroadcast) {
+TEST_P(ReduceWindowTest, R2ReduceWindowNonOverlappingFromBroadcast) {
   Array2D<float> input_array(6, 4, 1.0f);
-  ComputationDataHandle input =
-      builder_.Broadcast(builder_.ConstantLiteral(Literal::One(F32)), {6, 4});
+  ComputationDataHandle input = builder_.Broadcast(
+      CreateConstantFromLiteral(Literal::One(F32), &builder_), {6, 4});
 
   Padding padding = Padding::kSame;
   ReduceWindowAdd(input, {4, 2}, {3, 3}, padding);
@@ -622,9 +518,13 @@ TEST_F(ReduceWindowTest, R2ReduceWindowNonOverlappingFromBroadcast) {
   auto res = ReferenceUtil::ReduceWindow2DAdd(input_array, 0.0f, {4, 2}, {3, 3},
                                               padding);
 
-  ComputeAndCompareR2<float>(&builder_, *res, {}, ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&builder_, *Literal::CreateFromArray<float>(*res),
+                           {}, DefaultErrorSpec());
 }
 
+INSTANTIATE_TEST_CASE_P(ReduceWindowTestInstance, ReduceWindowTest,
+                        ::testing::ValuesIn(use_bfloat16_params));
+
 enum Reducer { kAdd, kMax };
 
 struct R4ReduceWindowTestData {
@@ -638,30 +538,36 @@ struct R4ReduceWindowTestData {
 };
 
 string R4ReduceWindowTestDataToString(
-    const ::testing::TestParamInfo<R4ReduceWindowTestData>& data) {
+    const ::testing::TestParamInfo<
+        ::testing::tuple<R4ReduceWindowTestData, bool>>& data) {
+  const auto& param = ::testing::get<0>(data.param);
   string str = tensorflow::strings::StrCat(
-      "base_bounds_",
-      tensorflow::str_util::Join(data.param.base_bounds, "x"),  //
+      "base_bounds_", tensorflow::str_util::Join(param.base_bounds, "x"),  //
       "__window_bounds_",
-      tensorflow::str_util::Join(data.param.window_bounds, "x"),            //
-      "__strides_", tensorflow::str_util::Join(data.param.strides, "x"),    //
-      "__pad_low_", tensorflow::str_util::Join(data.param.pad_low, "x"),    //
-      "__pad_high_", tensorflow::str_util::Join(data.param.pad_high, "x"),  //
-      (data.param.reducer == kAdd) ? "add" : "max");
-  CHECK(data.param.reducer == kAdd || data.param.reducer == kMax);
+      tensorflow::str_util::Join(param.window_bounds, "x"),            //
+      "__strides_", tensorflow::str_util::Join(param.strides, "x"),    //
+      "__pad_low_", tensorflow::str_util::Join(param.pad_low, "x"),    //
+      "__pad_high_", tensorflow::str_util::Join(param.pad_high, "x"),  //
+      (param.reducer == kAdd) ? "add" : "max");
+  CHECK(param.reducer == kAdd || param.reducer == kMax);
 
   // Test names are not allowed to contain the '-' character.
   std::replace(str.begin(), str.end(), '-', 'n');
+  if (::testing::get<1>(data.param)) {
+    str = tensorflow::strings::StrCat(str, "_bfloat16");
+  }
   return str;
 }
 
-class R4ReduceWindowTest
-    : public ClientLibraryTestBase,
-      public ::testing::WithParamInterface<R4ReduceWindowTestData> {
+class R4ReduceWindowTest : public ReduceWindowTestBase,
+                           public ::testing::WithParamInterface<
+                               ::testing::tuple<R4ReduceWindowTestData, bool>> {
  protected:
+  R4ReduceWindowTest() { set_use_bfloat16(::testing::get<1>(GetParam())); }
+
   void DoIt() {
     ComputationBuilder b(client_, TestName());
-    const auto& param = GetParam();
+    const auto& param = ::testing::get<0>(GetParam());
 
     const float kInitValue = 0.0f;
 
@@ -670,23 +576,24 @@ class R4ReduceWindowTest
     input.FillIota(1);
     std::unique_ptr<Literal> input_literal =
         Literal::CreateR4FromArray4D(input);
-    TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<GlobalData> input_arg,
-                            client_->TransferToServer(*input_literal));
+    ComputationDataHandle parameter;
+    auto input_arg = CreateParameterAndTransferLiteral(0, *input_literal, "p0",
+                                                       &b, &parameter);
 
     std::vector<std::pair<int64, int64>> padding(4);
     for (int i = 0; i < 4; ++i) {
       padding[i] = {param.pad_low[i], param.pad_high[i]};
     }
 
-    auto parameter = b.Parameter(0, input_literal->shape(), "p0");
-    auto pad_value = b.ConstantR0<float>(kInitValue);
+    auto init_value =
+        CreateConstantFromLiteral(*Literal::CreateR0(kInitValue), &b);
     CHECK(param.reducer == kAdd || param.reducer == kMax);
     auto computation = param.reducer == kAdd
-                           ? CreateScalarAddComputation(F32, &b)
-                           : CreateScalarMaxComputation(F32, &b);
+                           ? CreateScalarAddComputation(FloatType(), &b)
+                           : CreateScalarMaxComputation(FloatType(), &b);
     b.ReduceWindowWithGeneralPadding(
         /*operand=*/parameter,
-        /*init_value=*/pad_value,
+        /*init_value=*/init_value,
         /*computation=*/computation,
         /*window_dimensions=*/param.window_bounds,
         /*window_strides=*/param.strides,
@@ -704,8 +611,8 @@ class R4ReduceWindowTest
             /*window=*/param.window_bounds,
             /*stride=*/param.strides,
             /*padding=*/padding);
-    ComputeAndCompareR4<float>(&b, *expected, {input_arg.get()},
-                               ErrorSpec(1e-3, 1e-3));
+    ComputeAndCompareLiteral(&b, *Literal::CreateFromArray(*expected),
+                             {input_arg.get()}, DefaultErrorSpec());
   }
 };
 
@@ -721,6 +628,14 @@ const R4ReduceWindowTestData kR4ReduceWindowTestValues[] = {
                            /*pad_high=*/{0, 0, 0, 0},
                            /*reducer=*/kAdd},
 
+    // Arbitrary padding (not kSame or kValid).
+    R4ReduceWindowTestData{/*base_bounds=*/{9, 12, 4, 89},
+                           /*window_bounds=*/{3, 3, 1, 1},
+                           /*strides=*/{2, 2, 1, 1},
+                           /*pad_low=*/{4, 4, 0, 0},
+                           /*pad_high=*/{4, 4, 0, 0},
+                           /*reducer=*/kAdd},
+
     // Zero base bound edge case.
     R4ReduceWindowTestData{/*base_bounds=*/{1, 0, 1, 1},
                            /*window_bounds=*/{1, 1, 1, 1},
@@ -834,13 +749,15 @@ const R4ReduceWindowTestData kR4ReduceWindowTestValues[] = {
                            /*reducer=*/kAdd},
 };
 
-INSTANTIATE_TEST_CASE_P(R4ReduceWindowTestInstantiation, R4ReduceWindowTest,
-                        ::testing::ValuesIn(kR4ReduceWindowTestValues),
-                        R4ReduceWindowTestDataToString);
+INSTANTIATE_TEST_CASE_P(
+    R4ReduceWindowTestInstantiation, R4ReduceWindowTest,
+    ::testing::Combine(::testing::ValuesIn(kR4ReduceWindowTestValues),
+                       ::testing::ValuesIn(use_bfloat16_params)),
+    R4ReduceWindowTestDataToString);
 
 class R4ReduceWindowLargeTest : public R4ReduceWindowTest {};
 
-XLA_TEST_P(R4ReduceWindowLargeTest, DoIt) { DoIt(); }
+XLA_TEST_P(R4ReduceWindowLargeTest, DISABLED_ON_INTERPRETER(DoIt)) { DoIt(); }
 
 // Test cases that are large/slow/failed.
 const R4ReduceWindowTestData kR4ReduceWindowLargeTestValues[] = {
@@ -859,10 +776,103 @@ const R4ReduceWindowTestData kR4ReduceWindowLargeTestValues[] = {
                            /*reducer=*/kAdd},
 };
 
-INSTANTIATE_TEST_CASE_P(R4ReduceWindowLargeTestInstantiation,
-                        R4ReduceWindowLargeTest,
-                        ::testing::ValuesIn(kR4ReduceWindowLargeTestValues),
-                        R4ReduceWindowTestDataToString);
+INSTANTIATE_TEST_CASE_P(
+    R4ReduceWindowLargeTestInstantiation, R4ReduceWindowLargeTest,
+    ::testing::Combine(::testing::ValuesIn(kR4ReduceWindowLargeTestValues),
+                       ::testing::ValuesIn(use_bfloat16_params)),
+    R4ReduceWindowTestDataToString);
+
+struct R3ReduceWindowTestData {
+  int64 base_bounds[3];
+  int64 window_bounds[3];
+  int64 strides[3];
+  int64 layout[3];
+  Padding padding;
+  Reducer reducer;
+} kR3TestCases[] = {
+    {/*base_bounds=*/{2, 1, 2}, /*window_bounds=*/{1, 1, 2},
+     /*strides=*/{1, 1, 1}, /*layout=*/{2, 1, 0},
+     /*padding=*/Padding::kValid, /*reducer=*/Reducer::kAdd},
+    {/*base_bounds=*/{4, 3, 3}, /*window_bounds=*/{2, 2, 2},
+     /*strides=*/{2, 2, 2}, /*layout=*/{2, 1, 0},
+     /*padding=*/Padding::kSame, /*reducer=*/Reducer::kAdd},
+    {/*base_bounds=*/{4, 3, 3}, /*window_bounds=*/{2, 2, 2},
+     /*strides=*/{2, 2, 2}, /*layout=*/{2, 1, 0},
+     /*padding=*/Padding::kValid, /*reducer=*/Reducer::kAdd},
+    {/*base_bounds=*/{6, 21, 3}, /*window_bounds=*/{2, 3, 2},
+     /*strides=*/{1, 2, 2}, /*layout=*/{2, 1, 0},
+     /*padding=*/Padding::kValid, /*reducer=*/Reducer::kAdd},
+    {/*base_bounds=*/{10, 21, 129}, /*window_bounds=*/{2, 9, 1},
+     /*strides=*/{5, 2, 1}, /*layout=*/{2, 1, 0},
+     /*padding=*/Padding::kSame, /*reducer=*/Reducer::kAdd},
+    {/*base_bounds=*/{6, 21, 3}, /*window_bounds=*/{2, 3, 2},
+     /*strides=*/{1, 2, 2}, /*layout=*/{0, 1, 2},
+     /*padding=*/Padding::kValid, /*reducer=*/Reducer::kAdd},
+    {/*base_bounds=*/{6, 21, 3}, /*window_bounds=*/{2, 3, 2},
+     /*strides=*/{1, 2, 2}, /*layout=*/{1, 0, 2},
+     /*padding=*/Padding::kValid, /*reducer=*/Reducer::kAdd},
+};
+
+string R3ReduceWindowTestDataToString(
+    const ::testing::TestParamInfo<
+        ::testing::tuple<R3ReduceWindowTestData, bool>>& data) {
+  const auto& param = ::testing::get<0>(data.param);
+  string str = tensorflow::strings::StrCat(
+      "base_bounds_", tensorflow::str_util::Join(param.base_bounds, "x"),
+      "__window_bounds_", tensorflow::str_util::Join(param.window_bounds, "x"),
+      "__strides_", tensorflow::str_util::Join(param.strides, "x"),
+      "__padding_", param.padding == Padding::kSame ? "same" : "valid",
+      "__layout_", param.layout[0], "_", param.layout[1], "_", param.layout[2],
+      "__reducer_", param.reducer == kAdd ? "add" : "max");
+  if (::testing::get<1>(data.param)) {
+    str = tensorflow::strings::StrCat(str, "_bfloat16");
+  }
+  return str;
+}
+
+class R3ReduceWindowTest : public ReduceWindowTestBase,
+                           public ::testing::WithParamInterface<
+                               ::testing::tuple<R3ReduceWindowTestData, bool>> {
+ protected:
+  R3ReduceWindowTest() { set_use_bfloat16(::testing::get<1>(GetParam())); }
+};
+
+TEST_P(R3ReduceWindowTest, Add) {
+  ComputationBuilder b(client_, TestName());
+  const auto& param = ::testing::get<0>(GetParam());
+  CHECK(param.reducer == kAdd);
+
+  const float kInitValue = 0.0f;
+  Array3D<float> input(param.base_bounds[0], param.base_bounds[1],
+                       param.base_bounds[2], 1.0f);
+  std::unique_ptr<Literal> input_literal =
+      Literal::CreateR3FromArray3DWithLayout(
+          input, LayoutUtil::MakeLayout(param.layout));
+
+  ComputationDataHandle parameter;
+  auto input_arg = CreateParameterAndTransferLiteral(0, *input_literal, "p0",
+                                                     &b, &parameter);
+  auto init_value =
+      CreateConstantFromLiteral(*Literal::CreateR0(kInitValue), &b);
+  b.ReduceWindow(/*operand=*/parameter,
+                 /*init_value=*/init_value,
+                 /*computation=*/CreateScalarAddComputation(FloatType(), &b),
+                 /*window_dimensions=*/param.window_bounds,
+                 /*window_strides=*/param.strides, /*padding=*/param.padding);
+
+  auto expected = ReferenceUtil::ReduceWindow3DAdd(
+      /*operand=*/input, /*init=*/kInitValue, /*window=*/param.window_bounds,
+      /*stride=*/param.strides, /*padding=*/param.padding);
+
+  ComputeAndCompareLiteral(&b, *Literal::CreateFromArray(*expected),
+                           {input_arg.get()}, DefaultErrorSpec());
+}
+
+INSTANTIATE_TEST_CASE_P(
+    R3ReduceWindowTestInstantiation, R3ReduceWindowTest,
+    ::testing::Combine(::testing::ValuesIn(kR3TestCases),
+                       ::testing::ValuesIn(use_bfloat16_params)),
+    R3ReduceWindowTestDataToString);
 
 struct R2ReduceWindowTestData {
   int64 base_bounds[2];
@@ -910,26 +920,33 @@ struct R2ReduceWindowTestData {
 };
 
 string R2ReduceWindowTestDataToString(
-    const ::testing::TestParamInfo<R2ReduceWindowTestData>& data) {
+    const ::testing::TestParamInfo<
+        ::testing::tuple<R2ReduceWindowTestData, bool>>& data) {
+  const auto& param = ::testing::get<0>(data.param);
   string str = tensorflow::strings::StrCat(
-      "base_bounds_",
-      tensorflow::str_util::Join(data.param.base_bounds, "x"),  //
+      "base_bounds_", tensorflow::str_util::Join(param.base_bounds, "x"),  //
       "__window_bounds_",
-      tensorflow::str_util::Join(data.param.window_bounds, "x"),              //
-      "__strides_", tensorflow::str_util::Join(data.param.strides, "x"),      //
-      "__padding_", data.param.padding == Padding::kSame ? "same" : "valid",  //
-      "__layout_", data.param.layout[0], "_", data.param.layout[1],           //
-      "__reducer_", data.param.reducer == kAdd ? "add" : "max");
+      tensorflow::str_util::Join(param.window_bounds, "x"),              //
+      "__strides_", tensorflow::str_util::Join(param.strides, "x"),      //
+      "__padding_", param.padding == Padding::kSame ? "same" : "valid",  //
+      "__layout_", param.layout[0], "_", param.layout[1],                //
+      "__reducer_", param.reducer == kAdd ? "add" : "max");
+  if (::testing::get<1>(data.param)) {
+    str = tensorflow::strings::StrCat(str, "_bfloat16");
+  }
   return str;
 }
 
-class R2ReduceWindowTest
-    : public ClientLibraryTestBase,
-      public ::testing::WithParamInterface<R2ReduceWindowTestData> {};
+class R2ReduceWindowTest : public ReduceWindowTestBase,
+                           public ::testing::WithParamInterface<
+                               ::testing::tuple<R2ReduceWindowTestData, bool>> {
+ protected:
+  R2ReduceWindowTest() { set_use_bfloat16(::testing::get<1>(GetParam())); }
+};
 
 TEST_P(R2ReduceWindowTest, Add) {
   ComputationBuilder b(client_, TestName());
-  const auto& param = GetParam();
+  const auto& param = ::testing::get<0>(GetParam());
   CHECK(param.reducer == kAdd);
 
   const float kInitValue = 0.0f;
@@ -937,12 +954,15 @@ TEST_P(R2ReduceWindowTest, Add) {
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR2FromArray2DWithLayout(
           input, LayoutUtil::MakeLayout(param.layout));
-  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<GlobalData> input_arg,
-                          client_->TransferToServer(*input_literal));
-  b.ReduceWindow(/*operand=*/
-                 b.Parameter(0, input_literal->shape(), "p0"),
-                 /*init_value=*/b.ConstantR0<float>(kInitValue),
-                 /*computation=*/CreateScalarAddComputation(F32, &b),
+
+  ComputationDataHandle parameter;
+  auto input_arg = CreateParameterAndTransferLiteral(0, *input_literal, "p0",
+                                                     &b, &parameter);
+  auto init_value =
+      CreateConstantFromLiteral(*Literal::CreateR0(kInitValue), &b);
+  b.ReduceWindow(/*operand=*/parameter,
+                 /*init_value=*/init_value,
+                 /*computation=*/CreateScalarAddComputation(FloatType(), &b),
                  /*window_dimensions=*/param.window_bounds,
                  /*window_strides=*/param.strides, /*padding=*/param.padding);
 
@@ -950,90 +970,145 @@ TEST_P(R2ReduceWindowTest, Add) {
       /*operand=*/input, /*init=*/kInitValue, /*window=*/param.window_bounds,
       /*stride=*/param.strides, /*padding=*/param.padding);
 
-  ComputeAndCompareR2<float>(&b, *expected, {input_arg.get()},
-                             ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&b, *Literal::CreateFromArray(*expected),
+                           {input_arg.get()}, DefaultErrorSpec());
 }
 
-INSTANTIATE_TEST_CASE_P(R2ReduceWindowTestInstantiation, R2ReduceWindowTest,
-                        ::testing::ValuesIn(kR2TestCases),
-                        R2ReduceWindowTestDataToString);
+INSTANTIATE_TEST_CASE_P(
+    R2ReduceWindowTestInstantiation, R2ReduceWindowTest,
+    ::testing::Combine(::testing::ValuesIn(kR2TestCases),
+                       ::testing::ValuesIn(use_bfloat16_params)),
+    R2ReduceWindowTestDataToString);
 
 struct R1ReduceWindowTestData {
   int64 base_bounds[1];
   int64 window_bounds[1];
   int64 strides[1];
-  Padding padding;
+  int64 pad_low[1];
+  int64 pad_high[1];
   Reducer reducer;
 } kR1TestCases[] = {
     {/*base_bounds=*/{1}, /*window_bounds=*/{1},
      /*strides=*/{1},
-     /*padding=*/Padding::kValid, /*reducer=*/Reducer::kAdd},
+     /*pad_low=*/{xla::MakePadding({1}, {1}, {1}, Padding::kValid)[0].first},
+     /*pad_high=*/{xla::MakePadding({1}, {1}, {1}, Padding::kValid)[0].second},
+     /*reducer=*/Reducer::kAdd},
 
     {/*base_bounds=*/{3}, /*window_bounds=*/{3},
      /*strides=*/{1},
-     /*padding=*/Padding::kValid, /*reducer=*/Reducer::kAdd},
+     /*pad_low=*/{xla::MakePadding({3}, {3}, {1}, Padding::kValid)[0].first},
+     /*pad_high=*/{xla::MakePadding({3}, {3}, {1}, Padding::kValid)[0].second},
+     /*reducer=*/Reducer::kAdd},
 
     {/*base_bounds=*/{3}, /*window_bounds=*/{2},
      /*strides=*/{1},
-     /*padding=*/Padding::kValid, /*reducer=*/Reducer::kAdd},
+     /*pad_low=*/{xla::MakePadding({3}, {2}, {1}, Padding::kValid)[0].first},
+     /*pad_high=*/{xla::MakePadding({3}, {2}, {1}, Padding::kValid)[0].second},
+     /*reducer=*/Reducer::kAdd},
 
     {/*base_bounds=*/{5}, /*window_bounds=*/{1},
      /*strides=*/{1},
-     /*padding=*/Padding::kValid, /*reducer=*/Reducer::kMax},
+     /*pad_low=*/{xla::MakePadding({5}, {1}, {1}, Padding::kValid)[0].first},
+     /*pad_high=*/{xla::MakePadding({5}, {1}, {1}, Padding::kValid)[0].second},
+     /*reducer=*/Reducer::kMax},
 
     {/*base_bounds=*/{16}, /*window_bounds=*/{4},
      /*strides=*/{4},
-     /*padding=*/Padding::kValid, /*reducer=*/Reducer::kMax},
+     /*pad_low=*/{xla::MakePadding({16}, {4}, {4}, Padding::kValid)[0].first},
+     /*pad_high=*/{xla::MakePadding({16}, {4}, {4}, Padding::kValid)[0].second},
+     /*reducer=*/Reducer::kMax},
 
     {/*base_bounds=*/{16}, /*window_bounds=*/{4},
      /*strides=*/{3},
-     /*padding=*/Padding::kValid, /*reducer=*/Reducer::kAdd},
+     /*pad_low=*/{xla::MakePadding({16}, {4}, {3}, Padding::kValid)[0].first},
+     /*pad_high=*/{xla::MakePadding({16}, {4}, {3}, Padding::kValid)[0].second},
+     /*reducer=*/Reducer::kAdd},
 
-    {/*base_bounds=*/{128 * 2}, /*window_bounds=*/{30},
+    {/*base_bounds=*/{128 * 2},
+     /*window_bounds=*/{30},
      /*strides=*/{27},
-     /*padding=*/Padding::kValid, /*reducer=*/Reducer::kAdd},
-
-    {/*base_bounds=*/{128 * 17}, /*window_bounds=*/{7},
+     /*pad_low=*/
+     {xla::MakePadding({128 * 2}, {30}, {27}, Padding::kValid)[0].first},
+     /*pad_high=*/
+     {xla::MakePadding({128 * 2}, {30}, {27}, Padding::kValid)[0].second},
+     /*reducer=*/Reducer::kAdd},
+
+    {/*base_bounds=*/{128 * 17},
+     /*window_bounds=*/{7},
      /*strides=*/{64},
-     /*padding=*/Padding::kValid, /*reducer=*/Reducer::kAdd},
-
-    {/*base_bounds=*/{128 * 2}, /*window_bounds=*/{32},
+     /*pad_low=*/
+     {xla::MakePadding({128 * 17}, {7}, {64}, Padding::kValid)[0].first},
+     /*pad_high=*/
+     {xla::MakePadding({128 * 17}, {7}, {64}, Padding::kValid)[0].second},
+     /*reducer=*/Reducer::kAdd},
+
+    {/*base_bounds=*/{128 * 2},
+     /*window_bounds=*/{32},
      /*strides=*/{56},
-     /*padding=*/Padding::kValid, /*reducer=*/Reducer::kAdd},
+     /*pad_low=*/
+     {xla::MakePadding({128 * 2}, {32}, {56}, Padding::kValid)[0].first},
+     /*pad_high=*/
+     {xla::MakePadding({128 * 2}, {32}, {56}, Padding::kValid)[0].second},
+     /*reducer=*/Reducer::kAdd},
 
     {/*base_bounds=*/{3}, /*window_bounds=*/{2},
      /*strides=*/{1},
-     /*padding=*/Padding::kSame, /*reducer=*/Reducer::kAdd},
+     /*pad_low=*/{xla::MakePadding({3}, {2}, {1}, Padding::kSame)[0].first},
+     /*pad_high=*/{xla::MakePadding({3}, {2}, {1}, Padding::kSame)[0].second},
+     /*reducer=*/Reducer::kAdd},
 
     {/*base_bounds=*/{5}, /*window_bounds=*/{3},
      /*strides=*/{2},
-     /*padding=*/Padding::kSame, /*reducer=*/Reducer::kAdd},
+     /*pad_low=*/{xla::MakePadding({5}, {3}, {2}, Padding::kSame)[0].first},
+     /*pad_high=*/{xla::MakePadding({5}, {3}, {2}, Padding::kSame)[0].second},
+     /*reducer=*/Reducer::kAdd},
 
     {/*base_bounds=*/{16}, /*window_bounds=*/{4},
      /*strides=*/{3},
-     /*padding=*/Padding::kSame, /*reducer=*/Reducer::kAdd},
+     /*pad_low=*/{xla::MakePadding({16}, {4}, {3}, Padding::kSame)[0].first},
+     /*pad_high=*/{xla::MakePadding({16}, {4}, {3}, Padding::kSame)[0].second},
+     /*reducer=*/Reducer::kAdd},
+
+    {/*base_bounds=*/{5}, /*window_bounds=*/{5},
+     /*strides=*/{1},
+     /*pad_low=*/{0},
+     /*pad_high=*/{5},
+     /*reducer=*/Reducer::kAdd},
+
+    {/*base_bounds=*/{5}, /*window_bounds=*/{5},
+     /*strides=*/{1},
+     /*pad_low=*/{5},
+     /*pad_high=*/{0},
+     /*reducer=*/Reducer::kAdd},
 };
 
 string R1ReduceWindowTestDataToString(
-    const ::testing::TestParamInfo<R1ReduceWindowTestData>& data) {
+    const ::testing::TestParamInfo<
+        ::testing::tuple<R1ReduceWindowTestData, bool>>& data) {
+  const auto& param = ::testing::get<0>(data.param);
   string str = tensorflow::strings::StrCat(
-      "base_bounds_",
-      tensorflow::str_util::Join(data.param.base_bounds, "x"),  //
-      "__window_bounds_",
-      tensorflow::str_util::Join(data.param.window_bounds, "x"),              //
-      "__strides_", tensorflow::str_util::Join(data.param.strides, "x"),      //
-      "__padding_", data.param.padding == Padding::kSame ? "same" : "valid",  //
-      "__reducer_", data.param.reducer == kAdd ? "add" : "max");
+      "base_bounds_", tensorflow::str_util::Join(param.base_bounds, "x"),
+      "__window_bounds_", tensorflow::str_util::Join(param.window_bounds, "x"),
+      "__strides_", tensorflow::str_util::Join(param.strides, "x"),
+      "__pad_low_", tensorflow::str_util::Join(param.pad_low, "x"),
+      "__pad_high_", tensorflow::str_util::Join(param.pad_high, "x"),
+      "__reducer_", param.reducer == kAdd ? "add" : "max");
+  if (::testing::get<1>(data.param)) {
+    str = tensorflow::strings::StrCat(str, "_bfloat16");
+  }
   return str;
 }
 
-class R1ReduceWindowTest
-    : public ClientLibraryTestBase,
-      public ::testing::WithParamInterface<R1ReduceWindowTestData> {};
+class R1ReduceWindowTest : public ReduceWindowTestBase,
+                           public ::testing::WithParamInterface<
+                               ::testing::tuple<R1ReduceWindowTestData, bool>> {
+ protected:
+  R1ReduceWindowTest() { set_use_bfloat16(::testing::get<1>(GetParam())); }
+};
 
 TEST_P(R1ReduceWindowTest, DoIt) {
   ComputationBuilder b(client_, TestName());
-  const auto& param = GetParam();
+  const auto& param = ::testing::get<0>(GetParam());
   CHECK(param.reducer == kAdd || param.reducer == kMax);
 
   const float kInitValue = 0.0f;
@@ -1041,18 +1116,24 @@ TEST_P(R1ReduceWindowTest, DoIt) {
   std::iota(std::begin(input_vector), std::end(input_vector), 0);
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR1(tensorflow::gtl::ArraySlice<float>(input_vector));
-  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<GlobalData> input_arg,
-                          client_->TransferToServer(*input_literal));
+  ComputationDataHandle parameter;
+  auto input_arg = CreateParameterAndTransferLiteral(0, *input_literal, "p0",
+                                                     &b, &parameter);
+
+  std::vector<std::pair<int64, int64>> padding(1);
+  padding[0] = {param.pad_low[0], param.pad_high[0]};
 
   auto computation = param.reducer == kAdd
-                         ? CreateScalarAddComputation(F32, &b)
-                         : CreateScalarMaxComputation(F32, &b);
-  b.ReduceWindow(/*operand=*/
-                 b.Parameter(0, input_literal->shape(), "p0"),
-                 /*init_value=*/b.ConstantR0<float>(kInitValue),
-                 /*computation=*/computation,
-                 /*window_dimensions=*/param.window_bounds,
-                 /*window_strides=*/param.strides, /*padding=*/param.padding);
+                         ? CreateScalarAddComputation(FloatType(), &b)
+                         : CreateScalarMaxComputation(FloatType(), &b);
+  auto init_value =
+      CreateConstantFromLiteral(*Literal::CreateR0(kInitValue), &b);
+  b.ReduceWindowWithGeneralPadding(
+      /*operand=*/parameter,
+      /*init_value=*/init_value,
+      /*computation=*/computation,
+      /*window_dimensions=*/param.window_bounds,
+      /*window_strides=*/param.strides, /*padding=*/padding);
 
   auto reduce_func = param.reducer == kAdd
                          ? +[](float a, float b) { return a + b; }
@@ -1062,14 +1143,73 @@ TEST_P(R1ReduceWindowTest, DoIt) {
       /*init=*/kInitValue,
       /*reduce_func=*/reduce_func,
       /*window=*/param.window_bounds,
-      /*stride=*/param.strides, /*padding=*/param.padding);
+      /*stride=*/param.strides,
+      /*padding=*/padding);
 
-  ComputeAndCompareR1<float>(&b, tensorflow::gtl::ArraySlice<float>(*expected),
-                             {input_arg.get()}, ErrorSpec(1e-3, 1e-3));
+  ComputeAndCompareLiteral(&b, *Literal::CreateR1<float>(*expected),
+                           {input_arg.get()}, DefaultErrorSpec());
+}
+
+INSTANTIATE_TEST_CASE_P(
+    R1ReduceWindowTestInstantiation, R1ReduceWindowTest,
+    ::testing::Combine(::testing::ValuesIn(kR1TestCases),
+                       ::testing::ValuesIn(use_bfloat16_params)),
+    R1ReduceWindowTestDataToString);
+
+// Test class for text-based test cases. Note that this compares with the
+// results on the interpreter backend.
+class ReduceWindowTextTest : public HloTestBase {};
+
+TEST_F(ReduceWindowTextTest, R2General256x384) {
+  const string& hlo_string = R"(
+HloModule R2Window
+mul {
+  lhs = f32[] parameter(0)
+  rhs = f32[] parameter(1)
+  ROOT mul = f32[] multiply(lhs, rhs)
+}
+ENTRY R2Window {
+  operand = f32[256,384]{1,0} parameter(0)
+  constant = f32[] constant(1)
+  ROOT reduce-window = f32[256,384]{1,0} reduce-window(operand, constant), window={size=2x3 pad=0_1x1_1}, to_apply=mul
+}
+)";
+  EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0.001}));
+}
+
+TEST_F(ReduceWindowTextTest, R2General256x384Layout01) {
+  const string& hlo_string = R"(
+HloModule R2Window
+mul {
+lhs = f32[] parameter(0)
+rhs = f32[] parameter(1)
+ROOT mul = f32[] multiply(lhs, rhs)
+}
+ENTRY R2Window {
+operand = f32[256,384]{0,1} parameter(0)
+constant = f32[] constant(1)
+ROOT reduce-window = f32[256,384]{0,1} reduce-window(operand, constant), window={size=2x3 pad=0_1x1_1}, to_apply=mul
+}
+)";
+  EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0.001}));
+}
+
+TEST_F(ReduceWindowTextTest, R2General2x5) {
+  const string& hlo_string = R"(
+HloModule R2Window
+mul {
+  lhs = f32[] parameter(0)
+  rhs = f32[] parameter(1)
+  ROOT mul = f32[] multiply(lhs, rhs)
+}
+ENTRY R2Window {
+  operand = f32[2,5]{1,0} parameter(0)
+  constant = f32[] constant(1)
+  ROOT reduce-window = f32[3,5]{1,0} reduce-window(operand, constant), window={size=2x1 pad=0_2x0_0}, to_apply=mul
+}
+)";
+  EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0.001}));
 }
 
-INSTANTIATE_TEST_CASE_P(R1ReduceWindowTestInstantiation, R1ReduceWindowTest,
-                        ::testing::ValuesIn(kR1TestCases),
-                        R1ReduceWindowTestDataToString);
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/reshape_test.cc b/tensorflow/compiler/xla/tests/reshape_test.cc
index d235b9a1580ecbd6b82a69fca53d259912ff375e..f7b04debd4f5c40a904e32c832b6fc384a03c33b 100644
--- a/tensorflow/compiler/xla/tests/reshape_test.cc
+++ b/tensorflow/compiler/xla/tests/reshape_test.cc
@@ -41,326 +41,467 @@ limitations under the License.
 namespace xla {
 namespace {
 
-class ReshapeTest : public ClientLibraryTestBase {
+// Use a bool parameter to indicate whether to use bfloat16.
+class ReshapeTest : public ::testing::WithParamInterface<bool>,
+                    public ClientLibraryTestBase {
  public:
+  ReshapeTest() { set_use_bfloat16(GetParam()); }
+
   ErrorSpec zero_error_spec_{0.0};
 };
 
 // Collapses 2-dimensional pseudo-scalar (single-element array) to 1 dimension.
-XLA_TEST_F(ReshapeTest, CollapseTrivial1x1) {
+XLA_TEST_P(ReshapeTest, CollapseTrivial1x1) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR2<float>({{1.0}});
-  builder.Collapse(/*operand=*/a, /*dimensions=*/{0, 1});
-
-  ComputeAndCompareR1<float>(&builder, {1.0f}, {}, zero_error_spec_);
+  Array2D<float> input_array(1, 1);
+  input_array.Fill(1.0f);
+  auto input_literal = Literal::CreateR2FromArray2D(input_array);
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "parameter",
+                                                 &builder, &parameter);
+  builder.Collapse(/*operand=*/parameter, /*dimensions=*/{0, 1});
+
+  auto expected_literal = Literal::CreateR1<float>({1.0f});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
-XLA_TEST_F(ReshapeTest, CollapseTrivialR1EmptyDims) {
+XLA_TEST_P(ReshapeTest, CollapseTrivialR1EmptyDims) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR1<float>({1.0});
-  builder.Collapse(/*operand=*/a, /*dimensions=*/{});
-
-  ComputeAndCompareR1<float>(&builder, {1.0f}, {}, zero_error_spec_);
+  auto input_literal = Literal::CreateR1<float>({1.0f});
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "parameter",
+                                                 &builder, &parameter);
+  builder.Collapse(/*operand=*/parameter, /*dimensions=*/{});
+
+  auto expected_literal = Literal::CreateR1<float>({1.0f});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
-XLA_TEST_F(ReshapeTest, CollapseTrivialR1OnlyDim) {
+XLA_TEST_P(ReshapeTest, CollapseTrivialR1OnlyDim) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR1<float>({1.0});
-  builder.Collapse(/*operand=*/a, /*dimensions=*/{0});
-
-  ComputeAndCompareR1<float>(&builder, {1.0f}, {}, zero_error_spec_);
+  auto input_literal = Literal::CreateR1<float>({1.0f});
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "parameter",
+                                                 &builder, &parameter);
+  builder.Collapse(/*operand=*/parameter, /*dimensions=*/{0});
+
+  auto expected_literal = Literal::CreateR1<float>({1.0f});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // Collapses 2-dimensional pseudo-scalar (single-element array) to scalar.
-XLA_TEST_F(ReshapeTest, SingleElementArrayToScalar) {
+XLA_TEST_P(ReshapeTest, SingleElementArrayToScalar) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR2<float>({{1.0}});
-  auto reshape =
-      builder.Reshape(/*operand=*/a, /*dimensions=*/{0, 1}, /*new_sizes=*/{});
+  Array2D<float> input_array(1, 1);
+  input_array.Fill(1.0f);
+  auto input_literal = Literal::CreateR2FromArray2D(input_array);
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "parameter",
+                                                 &builder, &parameter);
+  auto reshape = builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1},
+                                 /*new_sizes=*/{});
   auto new_shape = builder.GetShape(reshape).ConsumeValueOrDie();
 
-  ComputeAndCompareR0<float>(&builder, 1.0f, {}, zero_error_spec_);
+  auto expected_literal = Literal::CreateR0<float>(1.0f);
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
-XLA_TEST_F(ReshapeTest, ScalarToSingleElementArray) {
+XLA_TEST_P(ReshapeTest, ScalarToSingleElementArray) {
   ComputationBuilder builder(client_, TestName());
 
   std::unique_ptr<Literal> param0_literal = Literal::CreateR0<float>(1.0f);
-  std::unique_ptr<GlobalData> param0_data =
-      client_->TransferToServer(*param0_literal).ConsumeValueOrDie();
-
-  auto a = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "param0");
-  a = builder.Neg(a);
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *param0_literal, "param0",
+                                                 &builder, &parameter);
+  auto a = builder.Neg(parameter);
   auto reshape =
       builder.Reshape(/*operand=*/a, /*dimensions=*/{}, /*new_sizes=*/{1});
 
-  ComputeAndCompareR1<float>(&builder, {-1.0f}, {param0_data.get()},
-                             zero_error_spec_);
+  auto expected_literal = Literal::CreateR1<float>({-1.0f});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
-XLA_TEST_F(ReshapeTest, Trivial0x3) {
+// TODO(b/29185393): Make this work with the GPU backend. The GPU backend
+// does not handle zero-sized shapes correctly. Failed last on 2017-11-30
+// with an incorrect result rank.
+XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Trivial0x3)) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR2FromArray2D<float>(Array2D<float>(0, 3));
-  auto result = builder.Collapse(/*operand=*/a, /*dimensions=*/{0, 1});
-
-  ComputeAndCompareR1<float>(&builder, {}, {}, zero_error_spec_);
+  Array2D<float> input_array(0, 3);
+  auto input_literal = Literal::CreateR2FromArray2D(input_array);
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Collapse(/*operand=*/parameter, /*dimensions=*/{0, 1});
+  auto expected_literal = Literal::CreateR1<float>({});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // TODO(b/29185393): Make this work with the GPU backend. The GPU backend
 // does not handle zero-sized shapes correctly. Failed last on 2017-05-15
 // with an incorrect result rank.
-XLA_TEST_F(ReshapeTest, DISABLED_ON_GPU(Trivial0x3WithParameter)) {
+XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Trivial0x3WithParameter)) {
   ComputationBuilder builder(client_, TestName());
 
   std::unique_ptr<Literal> param0_literal =
       Literal::CreateR2FromArray2D<float>(Array2D<float>(0, 3));
-  std::unique_ptr<GlobalData> param0_data =
-      client_->TransferToServer(*param0_literal).ConsumeValueOrDie();
-
-  auto a = builder.Parameter(0, ShapeUtil::MakeShape(F32, {0, 3}), "param0");
-  auto result = builder.Collapse(/*operand=*/a, /*dimensions=*/{0, 1});
-
-  ComputeAndCompareR1<float>(&builder, {}, {param0_data.get()},
-                             zero_error_spec_);
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *param0_literal, "param0",
+                                                 &builder, &parameter);
+  builder.Collapse(/*operand=*/parameter, /*dimensions=*/{0, 1});
+  auto expected_literal = Literal::CreateR1<float>({});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
-XLA_TEST_F(ReshapeTest, Trivial3x0) {
+// TODO(b/29185393): Make this work with the GPU backend. The GPU backend
+// does not handle zero-sized shapes correctly. Failed last on 2017-11-30
+// with an incorrect result rank.
+XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Trivial3x0)) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR2FromArray2D<float>(Array2D<float>(3, 0));
-  auto result = builder.Collapse(/*operand=*/a, /*dimensions=*/{0, 1});
-
-  ComputeAndCompareR1<float>(&builder, {}, {}, zero_error_spec_);
+  Array2D<float> input_array(3, 0);
+  auto input_literal = Literal::CreateR2FromArray2D(input_array);
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Collapse(/*operand=*/parameter, /*dimensions=*/{0, 1});
+  auto expected_literal = Literal::CreateR1<float>({});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // Collapses a 2-dimensional row vector to 1 dimension.
-XLA_TEST_F(ReshapeTest, Trivial1x3) {
+XLA_TEST_P(ReshapeTest, Trivial1x3) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR2<float>({{1.0f, 2.0f, 3.0f}});
-  auto result = builder.Collapse(/*operand=*/a, /*dimensions=*/{0, 1});
-
-  ComputeAndCompareR1<float>(&builder, {1.0f, 2.0f, 3.0f}, {},
-                             zero_error_spec_);
+  auto input_literal = Literal::CreateR2<float>({{1.0f, 2.0f, 3.0f}});
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Collapse(/*operand=*/parameter, /*dimensions=*/{0, 1});
+  auto expected_literal = Literal::CreateR1<float>({1.0f, 2.0f, 3.0f});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // Collapses a 2-dimensional column vector to 1 dimension.
-XLA_TEST_F(ReshapeTest, Trivial3x1) {
+XLA_TEST_P(ReshapeTest, Trivial3x1) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR2<float>({{1.0f}, {2.0f}, {3.0f}});
-  auto result = builder.Collapse(/*operand=*/a, /*dimensions=*/{0, 1});
-
-  ComputeAndCompareR1<float>(&builder, {1.0f, 2.0f, 3.0f}, {},
-                             zero_error_spec_);
+  auto input_literal = Literal::CreateR2<float>({{1.0f}, {2.0f}, {3.0f}});
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Collapse(/*operand=*/parameter, /*dimensions=*/{0, 1});
+  auto expected_literal = Literal::CreateR1<float>({1.0f, 2.0f, 3.0f});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
+// TODO(b/29185393): Make this work with the GPU backend. The GPU backend
+// does not handle zero-sized shapes correctly. Failed last on 2017-11-30
+// with an incorrect result rank.
+//
 // Splits an empty vector into an empty matrix.
-XLA_TEST_F(ReshapeTest, R1ToR2_0_To_2x0) {
+XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(R1ToR2_0_To_2x0)) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR1<float>({});
-  auto result =
-      builder.Reshape(/*operand=*/a, /*dimensions=*/{0}, /*new_sizes=*/{2, 0});
-  ComputeAndCompareR2<float>(&builder, Array2D<float>(2, 0), {},
-                             zero_error_spec_);
+  auto input_literal = Literal::CreateR1<float>({});
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0},
+                  /*new_sizes=*/{2, 0});
+  auto expected_literal = Literal::CreateR2<float>({{}, {}});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // Splits a vector into a matrix.
-XLA_TEST_F(ReshapeTest, R1ToR2_6_To_2x3) {
+XLA_TEST_P(ReshapeTest, R1ToR2_6_To_2x3) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR1<float>({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f});
-  auto result =
-      builder.Reshape(/*operand=*/a, /*dimensions=*/{0}, /*new_sizes=*/{2, 3});
-  Array2D<float> expected_2x3({{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}});
-  ComputeAndCompareR2<float>(&builder, expected_2x3, {}, zero_error_spec_);
+  auto input_literal =
+      Literal::CreateR1<float>({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f});
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0},
+                  /*new_sizes=*/{2, 3});
+  auto expected_literal =
+      Literal::CreateR2<float>({{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
+// TODO(b/29185393): Make this work with the GPU backend. The GPU backend
+// does not handle zero-sized shapes correctly. Failed last on 2017-11-30
+// with an incorrect result rank.
+//
 // Transposes a 2x0 array to a 0x2 array.
-XLA_TEST_F(ReshapeTest, Reshape0x2To2x0) {
+XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Reshape0x2To2x0)) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR2FromArray2D<float>(Array2D<float>(0, 2));
-  auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{0, 1},
-                                /*new_sizes=*/{2, 0});
-
-  ComputeAndCompareR2<float>(&builder, Array2D<float>(2, 0), {},
-                             zero_error_spec_);
+  auto input_literal = Literal::CreateFromArray(Array2D<float>(0, 2));
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1},
+                  /*new_sizes=*/{2, 0});
+  auto expected_literal = Literal::CreateR2<float>({{}, {}});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // Transposes a 2-dimensional row vector to a column vector.
-XLA_TEST_F(ReshapeTest, ReshapeRowToCol) {
+XLA_TEST_P(ReshapeTest, ReshapeRowToCol) {
   ComputationBuilder builder(client_, TestName());
   auto simple = MakeLinspaceArray2D(1.0f, 3.0f, 1, 3);
-  auto a = builder.ConstantR2FromArray2D<float>(*simple);
-  auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{0, 1},
-                                /*new_sizes=*/{3, 1});
+  auto input_literal = Literal::CreateFromArray(*simple);
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1},
+                  /*new_sizes=*/{3, 1});
 
   auto expected = ReferenceUtil::TransposeArray2D(*simple);
-  ComputeAndCompareR2<float>(&builder, *expected, {}, zero_error_spec_);
+  auto expected_literal = Literal::CreateFromArray(*expected);
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // Transposes a 2-dimensional array.
-XLA_TEST_F(ReshapeTest, TransposeAsReshape) {
+XLA_TEST_P(ReshapeTest, TransposeAsReshape) {
   ComputationBuilder builder(client_, TestName());
   auto a4x3 = MakeLinspaceArray2D(1.0f, 12.0f, 4, 3);
-  auto a = builder.ConstantR2FromArray2D<float>(*a4x3);
-  auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{1, 0},
-                                /*new_sizes=*/{3, 4});
-
-  auto expected3x4 = ReferenceUtil::TransposeArray2D(*a4x3);
-  ComputeAndCompareR2<float>(&builder, *expected3x4, {}, zero_error_spec_);
+  auto input_literal = Literal::CreateFromArray(*a4x3);
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{1, 0},
+                  /*new_sizes=*/{3, 4});
+
+  auto expected = ReferenceUtil::TransposeArray2D(*a4x3);
+  auto expected_literal = Literal::CreateFromArray(*expected);
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
+// TODO(b/29185393): Make this work with the GPU backend. The GPU backend
+// does not handle zero-sized shapes correctly. Failed last on 2017-11-30
+// with an incorrect result rank.
+//
 // Transposes a 0x4 array with ComputationBuilder::Trans.
-XLA_TEST_F(ReshapeTest, Transpose0x4) {
+XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Transpose0x4)) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR2FromArray2D<float>(Array2D<float>(0, 4));
-  auto result = builder.Transpose(a, {1, 0});
-
-  ComputeAndCompareR2<float>(&builder, Array2D<float>(4, 0), {},
-                             zero_error_spec_);
+  auto input_literal = Literal::CreateFromArray(Array2D<float>(0, 4));
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Transpose(parameter, {1, 0});
+  auto expected_literal = Literal::CreateR2<float>({{}, {}, {}, {}});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // Transposes a 2-dimensional array with ComputationBuilder::Trans.
-XLA_TEST_F(ReshapeTest, Transpose4x3) {
+XLA_TEST_P(ReshapeTest, Transpose4x3) {
   ComputationBuilder builder(client_, TestName());
   auto a4x3 = MakeLinspaceArray2D(1.0f, 12.0f, 4, 3);
-  auto a = builder.ConstantR2FromArray2D<float>(*a4x3);
-  auto result = builder.Transpose(a, {1, 0});
-
-  auto expected3x4 = ReferenceUtil::TransposeArray2D(*a4x3);
-  ComputeAndCompareR2<float>(&builder, *expected3x4, {}, zero_error_spec_);
+  auto input_literal = Literal::CreateFromArray(*a4x3);
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Transpose(parameter, {1, 0});
+
+  auto expected = ReferenceUtil::TransposeArray2D(*a4x3);
+  auto expected_literal = Literal::CreateFromArray(*expected);
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
+// TODO(b/29185393): Make this work with the GPU backend. The GPU backend
+// does not handle zero-sized shapes correctly. Failed last on 2017-11-30
+// with an incorrect result rank.
+//
 // Reshapes an empty 2-dimensional array with dimensions that are not just a
 // rearrangement of the originals (split), but no reordering (no shuffle).
-XLA_TEST_F(ReshapeTest, ReshapeSplitNoShuffleZeroElements) {
+XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(ReshapeSplitNoShuffleZeroElements)) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR2FromArray2D<float>(Array2D<float>(6, 0));
-  auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{0, 1},
-                                /*new_sizes=*/{2, 3, 0, 0});
-
-  ComputeAndCompareR4<float>(&builder, Array4D<float>(2, 3, 0, 0), {},
-                             zero_error_spec_);
+  auto input_literal = Literal::CreateFromArray(Array2D<float>(6, 0));
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1},
+                  /*new_sizes=*/{2, 3, 0, 0});
+  auto expected_literal = Literal::CreateFromArray(Array4D<float>(2, 3, 0, 0));
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
-XLA_TEST_F(ReshapeTest, ReshapeR4ToR2ZeroElements) {
+// TODO(b/29185393): Make this work with the GPU backend. The GPU backend
+// does not handle zero-sized shapes correctly. Failed last on 2017-11-30
+// with an incorrect result rank.
+XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(ReshapeR4ToR2ZeroElements)) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR4FromArray4D<float>(Array4D<float>(2, 3, 4, 0));
-  auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{0, 1, 2, 3},
-                                /*new_sizes=*/{24, 0});
-
-  ComputeAndCompareR2<float>(&builder, Array2D<float>(24, 0), {},
-                             zero_error_spec_);
+  auto input_literal = Literal::CreateFromArray(Array4D<float>(2, 3, 4, 0));
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1, 2, 3},
+                  /*new_sizes=*/{24, 0});
+  auto expected_literal = Literal::CreateFromArray(Array2D<float>(24, 0));
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // Reshapes a 2-dimensional array with dimensions that are not just a
 // rearrangement of the originals (split), but no reordering (no shuffle).
-XLA_TEST_F(ReshapeTest, ReshapeSplitNoShuffle) {
+XLA_TEST_P(ReshapeTest, ReshapeSplitNoShuffle) {
   ComputationBuilder builder(client_, TestName());
   auto a4x3 = MakeLinspaceArray2D(1.0f, 12.0f, 4, 3);
-  auto a = builder.ConstantR2FromArray2D<float>(*a4x3);
-  auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{0, 1},
-                                /*new_sizes=*/{2, 6});
-
-  auto expected2x6 = MakeLinspaceArray2D(1.0f, 12.0f, 2, 6);
-  ComputeAndCompareR2<float>(&builder, *expected2x6, {}, zero_error_spec_);
+  auto input_literal = Literal::CreateFromArray(*a4x3);
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1},
+                  /*new_sizes=*/{2, 6});
+
+  auto expected = MakeLinspaceArray2D(1.0f, 12.0f, 2, 6);
+  auto expected_literal = Literal::CreateFromArray(*expected);
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
-// Reshapes a 2-dimensional array with dimensions that are not just a
-// rearrangement of the originals (split), and reorder the input (shuffle).
-XLA_TEST_F(ReshapeTest, ReshapeSplitAndShuffleZeroElements) {
+// TODO(b/29185393): Make this work with the GPU backend. The GPU backend
+// does not handle zero-sized shapes correctly. Failed last on 2017-11-30
+// with an incorrect result rank.
+//
+XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(ReshapeSplitAndShuffleZeroElements)) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.ConstantR2FromArray2D<float>(Array2D<float>(0, 6));
-  auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{1, 0},
-                                /*new_sizes=*/{3, 0});
-
-  ComputeAndCompareR2<float>(&builder, Array2D<float>(3, 0), {},
-                             zero_error_spec_);
+  auto input_literal = Literal::CreateFromArray(Array2D<float>(0, 6));
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{1, 0},
+                  /*new_sizes=*/{3, 0});
+  auto expected_literal = Literal::CreateFromArray(Array2D<float>(3, 0));
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // Reshapes a 2-dimensional array with dimensions that are not just a
 // rearrangement of the originals (split), and reorder the input (shuffle).
-XLA_TEST_F(ReshapeTest, ReshapeSplitAndShuffle) {
+XLA_TEST_P(ReshapeTest, ReshapeSplitAndShuffle) {
   ComputationBuilder builder(client_, TestName());
   auto a4x3 = MakeLinspaceArray2D(1.0f, 12.0f, 4, 3);
-  auto a = builder.ConstantR2FromArray2D<float>(*a4x3);
-  auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{1, 0},
-                                /*new_sizes=*/{2, 6});
-
-  Array2D<float> expected2x6({{1.0f, 4.0f, 7.0f, 10.0f, 2.0f, 5.0f},
-                              {8.0f, 11.0f, 3.0f, 6.0f, 9.0f, 12.0f}});
-  ComputeAndCompareR2<float>(&builder, expected2x6, {}, zero_error_spec_);
+  auto input_literal = Literal::CreateFromArray(*a4x3);
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{1, 0},
+                  /*new_sizes=*/{2, 6});
+  Array2D<float> expected({{1.0f, 4.0f, 7.0f, 10.0f, 2.0f, 5.0f},
+                           {8.0f, 11.0f, 3.0f, 6.0f, 9.0f, 12.0f}});
+  auto expected_literal = Literal::CreateFromArray(expected);
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // The following tests use the same input 3D array; they test the examples we
 // show for the Reshape operation in the operation_semantics document.
 // TODO(b/34503277): find a way to show this code in the documentation without
 // duplication on the TF documentation server.
-Array3D<int> v_array_for_doc_R3_tests({{{10, 11, 12}, {15, 16, 17}},
-                                       {{20, 21, 22}, {25, 26, 27}},
-                                       {{30, 31, 32}, {35, 36, 37}},
-                                       {{40, 41, 42}, {45, 46, 47}}});
-
-XLA_TEST_F(ReshapeTest, DocR3_R1_Collapse_012) {
-  ComputationBuilder builder(client_, TestName());
-  auto v = builder.ConstantR3FromArray3D<int>(v_array_for_doc_R3_tests);
-  auto result = builder.Reshape(/*operand=*/v, /*dimensions=*/{0, 1, 2},
-                                /*new_sizes=*/{24});
-  ComputeAndCompareR1<int>(&builder,
-                           {10, 11, 12, 15, 16, 17, 20, 21, 22, 25, 26, 27,
-                            30, 31, 32, 35, 36, 37, 40, 41, 42, 45, 46, 47},
-                           {});
-}
-
-XLA_TEST_F(ReshapeTest, DocR3_R2_Collapse_012_Refine_83) {
-  ComputationBuilder builder(client_, TestName());
-  auto v = builder.ConstantR3FromArray3D<int>(v_array_for_doc_R3_tests);
-  auto result = builder.Reshape(/*operand=*/v, /*dimensions=*/{0, 1, 2},
-                                /*new_sizes=*/{8, 3});
-  Array2D<int> expected({{10, 11, 12},
-                         {15, 16, 17},
-                         {20, 21, 22},
-                         {25, 26, 27},
-                         {30, 31, 32},
-                         {35, 36, 37},
-                         {40, 41, 42},
-                         {45, 46, 47}});
-  ComputeAndCompareR2<int>(&builder, expected, {});
-}
-
-XLA_TEST_F(ReshapeTest, DocR3_R1_Collapse_120) {
-  ComputationBuilder builder(client_, TestName());
-  auto v = builder.ConstantR3FromArray3D<int>(v_array_for_doc_R3_tests);
-  auto result = builder.Reshape(/*operand=*/v, /*dimensions=*/{1, 2, 0},
-                                /*new_sizes=*/{24});
-  ComputeAndCompareR1<int>(&builder,
-                           {10, 20, 30, 40, 11, 21, 31, 41, 12, 22, 32, 42,
-                            15, 25, 35, 45, 16, 26, 36, 46, 17, 27, 37, 47},
-                           {});
-}
-
-XLA_TEST_F(ReshapeTest, DocR3_R2_Collapse_120_Refine_83) {
-  ComputationBuilder builder(client_, TestName());
-  auto v = builder.ConstantR3FromArray3D<int>(v_array_for_doc_R3_tests);
-  auto result = builder.Reshape(/*operand=*/v, /*dimensions=*/{1, 2, 0},
-                                /*new_sizes=*/{8, 3});
-  Array2D<int> expected({{10, 20, 30},
-                         {40, 11, 21},
-                         {31, 41, 12},
-                         {22, 32, 42},
-                         {15, 25, 35},
-                         {45, 16, 26},
-                         {36, 46, 17},
-                         {27, 37, 47}});
-  ComputeAndCompareR2<int>(&builder, expected, {});
-}
-
-XLA_TEST_F(ReshapeTest, DocR3_R3_Collapse_120_Refine_262) {
-  ComputationBuilder builder(client_, TestName());
-  auto v = builder.ConstantR3FromArray3D<int>(v_array_for_doc_R3_tests);
-  auto result = builder.Reshape(/*operand=*/v, /*dimensions=*/{1, 2, 0},
-                                /*new_sizes=*/{2, 6, 2});
-  Array3D<int> expected(
+static Array3D<float> ArrayForDocR3Tests() {
+  return Array3D<float>({{{10, 11, 12}, {15, 16, 17}},
+                         {{20, 21, 22}, {25, 26, 27}},
+                         {{30, 31, 32}, {35, 36, 37}},
+                         {{40, 41, 42}, {45, 46, 47}}});
+}
+
+XLA_TEST_P(ReshapeTest, DocR3_R1_Collapse_012) {
+  ComputationBuilder builder(client_, TestName());
+  auto input_literal = Literal::CreateFromArray(ArrayForDocR3Tests());
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1, 2},
+                  /*new_sizes=*/{24});
+  auto expected_literal = Literal::CreateR1<float>(
+      {10, 11, 12, 15, 16, 17, 20, 21, 22, 25, 26, 27,
+       30, 31, 32, 35, 36, 37, 40, 41, 42, 45, 46, 47});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
+}
+
+XLA_TEST_P(ReshapeTest, DocR3_R2_Collapse_012_Refine_83) {
+  ComputationBuilder builder(client_, TestName());
+  auto input_literal = Literal::CreateFromArray(ArrayForDocR3Tests());
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1, 2},
+                  /*new_sizes=*/{8, 3});
+  auto expected_literal = Literal::CreateR2<float>({{10, 11, 12},
+                                                    {15, 16, 17},
+                                                    {20, 21, 22},
+                                                    {25, 26, 27},
+                                                    {30, 31, 32},
+                                                    {35, 36, 37},
+                                                    {40, 41, 42},
+                                                    {45, 46, 47}});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
+}
+
+XLA_TEST_P(ReshapeTest, DocR3_R1_Collapse_120) {
+  ComputationBuilder builder(client_, TestName());
+  auto input_literal = Literal::CreateFromArray(ArrayForDocR3Tests());
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{1, 2, 0},
+                  /*new_sizes=*/{24});
+  auto expected_literal = Literal::CreateR1<float>(
+      {10, 20, 30, 40, 11, 21, 31, 41, 12, 22, 32, 42,
+       15, 25, 35, 45, 16, 26, 36, 46, 17, 27, 37, 47});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
+}
+
+XLA_TEST_P(ReshapeTest, DocR3_R2_Collapse_120_Refine_83) {
+  ComputationBuilder builder(client_, TestName());
+  auto input_literal = Literal::CreateFromArray(ArrayForDocR3Tests());
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{1, 2, 0},
+                  /*new_sizes=*/{8, 3});
+  auto expected_literal = Literal::CreateR2<float>({{10, 20, 30},
+                                                    {40, 11, 21},
+                                                    {31, 41, 12},
+                                                    {22, 32, 42},
+                                                    {15, 25, 35},
+                                                    {45, 16, 26},
+                                                    {36, 46, 17},
+                                                    {27, 37, 47}});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
+}
+
+XLA_TEST_P(ReshapeTest, DocR3_R3_Collapse_120_Refine_262) {
+  ComputationBuilder builder(client_, TestName());
+  auto input_literal = Literal::CreateFromArray(ArrayForDocR3Tests());
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{1, 2, 0},
+                  /*new_sizes=*/{2, 6, 2});
+  auto expected_literal = Literal::CreateR3<float>(
       {{{10, 20}, {30, 40}, {11, 21}, {31, 41}, {12, 22}, {32, 42}},
        {{15, 25}, {35, 45}, {16, 26}, {36, 46}, {17, 27}, {37, 47}}});
-  ComputeAndCompareR3<int>(&builder, expected, {});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // Collapses the low dimensions of a 4D tensor to get a 2D matrix, without
@@ -378,23 +519,26 @@ XLA_TEST_F(ReshapeTest, DocR3_R3_Collapse_120_Refine_262) {
 // Then we collapse Z be collapsed so we just end up with planes:
 //
 // 1 2 3 4 5 6 1 2 3 4 5 6
-XLA_TEST_F(ReshapeTest, FullyConnectedCollapse) {
+XLA_TEST_P(ReshapeTest, FullyConnectedCollapse) {
   ComputationBuilder builder(client_, TestName());
   Array4D<float> t2x2x2x3(2, 2, 2, 3);
   auto filler2x3 = MakeLinspaceArray2D(1.0f, 6.0f, 2, 3);
   t2x2x2x3.FillWithYX(*filler2x3);
-  auto a = builder.ConstantR4FromArray4D<float>(t2x2x2x3);
-  auto result = builder.Collapse(/*operand=*/a, /*dimensions=*/{1, 2, 3});
-
-  Array2D<float> expected2x12(
+  auto input_literal = Literal::CreateFromArray(t2x2x2x3);
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Collapse(/*operand=*/parameter, /*dimensions=*/{1, 2, 3});
+  auto expected_literal = Literal::CreateR2<float>(
       {{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f},
        {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
         6.0f}});
-  ComputeAndCompareR2<float>(&builder, expected2x12, {}, zero_error_spec_);
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // As above, but uses reshape directly.
-XLA_TEST_F(ReshapeTest, FullyConnectedCollapseDesugared) {
+XLA_TEST_P(ReshapeTest, FullyConnectedCollapseDesugared) {
   ComputationBuilder builder(client_, TestName());
   Array4D<float> t(2, 1, 2, 2);
   t(0, 0, 0, 0) = 0;
@@ -405,52 +549,68 @@ XLA_TEST_F(ReshapeTest, FullyConnectedCollapseDesugared) {
   t(1, 0, 0, 1) = 5;
   t(1, 0, 1, 0) = 6;
   t(1, 0, 1, 1) = 7;
-  auto a = builder.ConstantR4FromArray4D<float>(t);
-  auto result = builder.Reshape(/*operand=*/a, /*dimensions=*/{0, 1, 2, 3},
-                                /*new_sizes=*/{2, 4});
-
-  Array2D<float> expected({{0, 1, 2, 3}, {4, 5, 6, 7}});
-  ComputeAndCompareR2<float>(&builder, expected, {}, zero_error_spec_);
+  auto input_literal = Literal::CreateFromArray(t);
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1, 2, 3},
+                  /*new_sizes=*/{2, 4});
+
+  auto expected_literal =
+      Literal::CreateR2<float>({{0, 1, 2, 3}, {4, 5, 6, 7}});
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // Reshape various ranks to a scalar.
-XLA_TEST_F(ReshapeTest, ToScalar) {
+XLA_TEST_P(ReshapeTest, ToScalar) {
   for (int rank = 0; rank < 8; ++rank) {
     ComputationBuilder b(client_, TestName());
-    auto input = Literal::CreateR1<float>({83.0f});
     std::vector<int64> ones(rank, 1);  // this is {1, ..., 1}.
     std::vector<int64> dimensions(rank);
     std::iota(dimensions.begin(), dimensions.end(), 0);
-    *input->mutable_shape() = ShapeUtil::MakeShape(F32, ones);
-    b.Reshape(b.ConstantLiteral(*input), dimensions, {});
+    Literal input_literal(ShapeUtil::MakeShape(F32, ones));
+    std::vector<int64> zeros(rank, 0);  // this is {0, ..., 0}.
+    input_literal.Set<float>(zeros, 83.0f);
 
-    ComputeAndCompareR0<float>(&b, 83.0f, {}, zero_error_spec_);
+    ComputationDataHandle parameter;
+    auto input = CreateParameterAndTransferLiteral(0, input_literal, "input",
+                                                   &b, &parameter);
+    b.Reshape(parameter, dimensions, {});
+
+    auto expected_literal = Literal::CreateR0<float>(83.0f);
+    ComputeAndCompareLiteral(&b, *expected_literal, {input.get()},
+                             zero_error_spec_);
   }
 }
 
-XLA_TEST_F(ReshapeTest, BadDimensions) {
+XLA_TEST_P(ReshapeTest, BadDimensions) {
   ComputationBuilder b(client_, TestName());
-  b.Reshape(b.ConstantR1<int32>({1}), {}, {});
+  auto input_literal = Literal::CreateR1<float>({1.0f});
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &b,
+                                                 &parameter);
+  b.Reshape(parameter, {}, {});
   EXPECT_THAT(
       ExecuteToString(&b, {}),
       ::testing::HasSubstr("not a permutation of the operand dimensions"));
 }
 
-XLA_TEST_F(ReshapeTest, BadNewSizes) {
+XLA_TEST_P(ReshapeTest, BadNewSizes) {
   ComputationBuilder b(client_, TestName());
-  b.Reshape(b.ConstantR1<int32>({1, 2}), {1}, {});
+  auto input_literal = Literal::CreateR1<float>({1.0f, 2.0f});
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &b,
+                                                 &parameter);
+  b.Reshape(parameter, {1}, {});
   EXPECT_THAT(ExecuteToString(&b, {}),
               ::testing::HasSubstr("mismatched element counts"));
 }
 
-XLA_TEST_F(ReshapeTest, R4Dim0MinorLayoutToR2Dim0MajorLayout) {
-  const Shape parameter_shape = ShapeUtil::MakeShape(F32, {2, 2, 2, 2});
+XLA_TEST_P(ReshapeTest, R4Dim0MinorLayoutToR2Dim0MajorLayout) {
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.Parameter(0, parameter_shape, "a");
-  builder.Reshape(a, /*dimensions=*/{0, 1, 2, 3}, /*new_sizes=*/{2, 8});
-
   // clang-format off
-  auto literal = Literal::CreateR4FromArray4DWithLayout(Array4D<float>{
+  auto input_literal = Literal::CreateR4FromArray4DWithLayout(Array4D<float>{
     {
       {
         {0, 1},
@@ -474,8 +634,12 @@ XLA_TEST_F(ReshapeTest, R4Dim0MinorLayoutToR2Dim0MajorLayout) {
   },
        LayoutUtil::MakeLayout({0, 1, 2, 3}));
   // clang-format on
-  std::unique_ptr<GlobalData> input =
-      client_->TransferToServer(*literal).ConsumeValueOrDie();
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+
+  builder.Reshape(parameter, /*dimensions=*/{0, 1, 2, 3}, /*new_sizes=*/{2, 8});
+
   Array2D<float> expected_array({
       {0, 1, 2, 3, 100, 101, 102, 103},
       {222, 333, 444, 555, 666, 777, 888, 999},
@@ -484,72 +648,75 @@ XLA_TEST_F(ReshapeTest, R4Dim0MinorLayoutToR2Dim0MajorLayout) {
   Computation computation = builder.Build().ConsumeValueOrDie();
   ExecutionOptions execution_options = execution_options_;
   *execution_options.mutable_shape_with_output_layout() =
-      ShapeUtil::MakeShapeWithLayout(F32, {2, 8}, {1, 0});
+      ShapeUtil::MakeShapeWithLayout(use_bfloat16() ? BF16 : F32, {2, 8},
+                                     {1, 0});
   std::unique_ptr<Literal> actual =
       client_
           ->ExecuteAndTransfer(computation, {input.get()}, &execution_options)
           .ConsumeValueOrDie();
   std::unique_ptr<Literal> expected =
       Literal::CreateR2FromArray2D<float>(expected_array);
+  if (use_bfloat16()) {
+    expected = LiteralTestUtil::ConvertF32ToBF16(*expected);
+  }
   LiteralTestUtil::ExpectEqual(*expected, *actual);
 }
 
-XLA_TEST_F(ReshapeTest, R2ToR4_3x8_To_3x2x1x4) {
-  std::unique_ptr<Literal> input = Literal::CreateR2<float>({
+XLA_TEST_P(ReshapeTest, R2ToR4_3x8_To_3x2x1x4) {
+  ComputationBuilder builder(client_, TestName());
+  std::unique_ptr<Literal> input_literal = Literal::CreateR2<float>({
       {0, 1, 2, 3, 4, 5, 6, 7},
       {100, 101, 102, 103, 104, 105, 106, 107},
       {200, 201, 202, 203, 204, 205, 206, 207},
   });
-  std::unique_ptr<GlobalData> input_data =
-      client_->TransferToServer(*input).ConsumeValueOrDie();
-
-  ComputationBuilder builder(client_, TestName());
-  auto a = builder.Parameter(0, input->shape(), "a");
-  builder.Reshape(a, /*dimensions=*/{0, 1}, /*new_sizes=*/{3, 2, 1, 4});
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(parameter, /*dimensions=*/{0, 1}, /*new_sizes=*/{3, 2, 1, 4});
 
   // clang-format off
-  Array4D<float> expected = {
+  auto expected_literal = Literal::CreateR4<float>({
     {{{0, 1, 2, 3}},
      {{4, 5, 6, 7}}},
     {{{100, 101, 102, 103}},
      {{104, 105, 106, 107}}},
     {{{200, 201, 202, 203}},
      {{204, 205, 206, 207}}}
-  };
+  });
   // clang-format on
-  ComputeAndCompareR4<float>(&builder, expected, {input_data.get()},
-                             zero_error_spec_);
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
 // Tests R2->R4 reshape with the reshape dimensions {1, 0}.
-XLA_TEST_F(ReshapeTest, R2ToR4_3x8_To_3x2x1x4_Dimensions_10) {
-  std::unique_ptr<Literal> input = Literal::CreateR2<float>({
+XLA_TEST_P(ReshapeTest, R2ToR4_3x8_To_3x2x1x4_Dimensions_10) {
+  ComputationBuilder builder(client_, TestName());
+  std::unique_ptr<Literal> input_literal = Literal::CreateR2<float>({
       {0, 1, 2, 3, 4, 5, 6, 7},
       {100, 101, 102, 103, 104, 105, 106, 107},
       {200, 201, 202, 203, 204, 205, 206, 207},
   });
-  std::unique_ptr<GlobalData> input_data =
-      client_->TransferToServer(*input).ConsumeValueOrDie();
-
-  ComputationBuilder builder(client_, TestName());
-  auto a = builder.Parameter(0, input->shape(), "a");
-  builder.Reshape(a, /*dimensions=*/{1, 0}, /*new_sizes=*/{3, 2, 1, 4});
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(parameter, /*dimensions=*/{1, 0}, /*new_sizes=*/{3, 2, 1, 4});
 
   // clang-format off
-  Array4D<float> expected = {
+  auto expected_literal = Literal::CreateR4<float>({
     {{{0, 100, 200, 1}},
      {{101, 201, 2, 102}}},
     {{{202, 3, 103, 203}},
      {{4, 104, 204, 5}}},
     {{{105, 205, 6, 106}},
      {{206, 7, 107, 207}}}
-  };
+  });
   // clang-format on
-  ComputeAndCompareR4<float>(&builder, expected, {input_data.get()},
-                             zero_error_spec_);
+  ComputeAndCompareLiteral(&builder, *expected_literal, {input.get()},
+                           zero_error_spec_);
 }
 
-XLA_TEST_F(ReshapeTest, R4ToR2_2x1x1x1_To_2x1) {
+XLA_TEST_P(ReshapeTest, R4ToR2_2x1x1x1_To_2x1) {
+  ComputationBuilder builder(client_, TestName());
   std::mt19937 rng;
   std::uniform_real_distribution<float> distribution;
   Array4D<float> input(2, 1, 1, 1);
@@ -559,12 +726,10 @@ XLA_TEST_F(ReshapeTest, R4ToR2_2x1x1x1_To_2x1) {
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR4FromArray4DWithLayout(
           input, LayoutUtil::MakeLayout({3, 2, 1, 0}));
-  std::unique_ptr<GlobalData> input_data =
-      client_->TransferToServer(*input_literal).ConsumeValueOrDie();
-
-  ComputationBuilder builder(client_, TestName());
-  auto a = builder.Parameter(0, input_literal->shape(), "a");
-  builder.Reshape(a, /*dimensions=*/{0, 1, 2, 3}, /*new_sizes=*/{2, 1});
+  ComputationDataHandle parameter;
+  auto input_data = CreateParameterAndTransferLiteral(
+      0, *input_literal, "input", &builder, &parameter);
+  builder.Reshape(parameter, /*dimensions=*/{0, 1, 2, 3}, /*new_sizes=*/{2, 1});
 
   std::unique_ptr<Literal> expected =
       LiteralTestUtil::Reshape({2, 1}, {1, 0}, *input_literal);
@@ -572,7 +737,8 @@ XLA_TEST_F(ReshapeTest, R4ToR2_2x1x1x1_To_2x1) {
                            zero_error_spec_);
 }
 
-XLA_TEST_F(ReshapeTest, R4ToR2_2x1x4x1_To_4x2) {
+XLA_TEST_P(ReshapeTest, R4ToR2_2x1x4x1_To_4x2) {
+  ComputationBuilder builder(client_, TestName());
   std::mt19937 rng;
   std::uniform_real_distribution<float> distribution;
   Array4D<float> input(2, 1, 4, 1);
@@ -582,12 +748,10 @@ XLA_TEST_F(ReshapeTest, R4ToR2_2x1x4x1_To_4x2) {
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR4FromArray4DWithLayout(
           input, LayoutUtil::MakeLayout({3, 2, 1, 0}));
-  std::unique_ptr<GlobalData> input_data =
-      client_->TransferToServer(*input_literal).ConsumeValueOrDie();
-
-  ComputationBuilder builder(client_, TestName());
-  auto a = builder.Parameter(0, input_literal->shape(), "a");
-  builder.Reshape(a, /*dimensions=*/{0, 1, 2, 3}, /*new_sizes=*/{4, 2});
+  ComputationDataHandle parameter;
+  auto input_data = CreateParameterAndTransferLiteral(
+      0, *input_literal, "input", &builder, &parameter);
+  builder.Reshape(parameter, /*dimensions=*/{0, 1, 2, 3}, /*new_sizes=*/{4, 2});
 
   std::unique_ptr<Literal> expected =
       LiteralTestUtil::Reshape({4, 2}, {1, 0}, *input_literal);
@@ -596,7 +760,8 @@ XLA_TEST_F(ReshapeTest, R4ToR2_2x1x4x1_To_4x2) {
 }
 
 // Tests R4->R2 reshape with the reshape dimensions {0, 2, 1, 3}.
-XLA_TEST_F(ReshapeTest, R4ToR2_5x10x2x3_To_5x60_Dimensions_0213) {
+XLA_TEST_P(ReshapeTest, R4ToR2_5x10x2x3_To_5x60_Dimensions_0213) {
+  ComputationBuilder builder(client_, TestName());
   std::mt19937 rng;
   std::uniform_real_distribution<float> distribution;
   Array4D<float> input(5, 10, 2, 3);
@@ -606,12 +771,11 @@ XLA_TEST_F(ReshapeTest, R4ToR2_5x10x2x3_To_5x60_Dimensions_0213) {
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR4FromArray4DWithLayout(
           input, LayoutUtil::MakeLayout({3, 2, 1, 0}));
-  std::unique_ptr<GlobalData> input_data =
-      client_->TransferToServer(*input_literal).ConsumeValueOrDie();
-
-  ComputationBuilder builder(client_, TestName());
-  auto a = builder.Parameter(0, input_literal->shape(), "a");
-  builder.Reshape(a, /*dimensions=*/{0, 2, 1, 3}, /*new_sizes=*/{5, 60});
+  ComputationDataHandle parameter;
+  auto input_data = CreateParameterAndTransferLiteral(
+      0, *input_literal, "input", &builder, &parameter);
+  builder.Reshape(parameter, /*dimensions=*/{0, 2, 1, 3},
+                  /*new_sizes=*/{5, 60});
 
   Array2D<float> expected_array(5, 60);
   input.Each([&](tensorflow::gtl::ArraySlice<int64> indices, float* cell) {
@@ -619,10 +783,12 @@ XLA_TEST_F(ReshapeTest, R4ToR2_5x10x2x3_To_5x60_Dimensions_0213) {
         *cell;
   });
   auto expected = Literal::CreateR2FromArray2D(expected_array);
-  ComputeAndCompareLiteral(&builder, *expected, {input_data.get()});
+  ComputeAndCompareLiteral(&builder, *expected, {input_data.get()},
+                           zero_error_spec_);
 }
 
-XLA_TEST_F(ReshapeTest, NoopReshape) {
+XLA_TEST_P(ReshapeTest, NoopReshape) {
+  ComputationBuilder builder(client_, TestName());
   std::mt19937 rng;
   std::uniform_real_distribution<float> distribution;
   Array4D<float> input_array(2, 3, 5, 7);
@@ -632,18 +798,17 @@ XLA_TEST_F(ReshapeTest, NoopReshape) {
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR4FromArray4DWithLayout(
           input_array, LayoutUtil::MakeLayout({1, 2, 3, 0}));
-  std::unique_ptr<GlobalData> input_data =
-      client_->TransferToServer(*input_literal).ConsumeValueOrDie();
-
-  ComputationBuilder builder(client_, TestName());
-  auto input = builder.Parameter(0, input_literal->shape(), "input");
-  builder.Reshape(input, /*dimensions=*/{3, 0, 1, 2},
+  ComputationDataHandle parameter;
+  auto input_data = CreateParameterAndTransferLiteral(
+      0, *input_literal, "input", &builder, &parameter);
+  builder.Reshape(parameter, /*dimensions=*/{3, 0, 1, 2},
                   /*new_sizes=*/{7, 2, 3, 5});
   Computation computation = builder.Build().ConsumeValueOrDie();
 
   ExecutionOptions execution_options = execution_options_;
   *execution_options.mutable_shape_with_output_layout() =
-      ShapeUtil::MakeShapeWithLayout(F32, {7, 2, 3, 5}, {2, 3, 0, 1});
+      ShapeUtil::MakeShapeWithLayout(use_bfloat16() ? BF16 : F32, {7, 2, 3, 5},
+                                     {2, 3, 0, 1});
   std::unique_ptr<Literal> output_literal =
       client_
           ->ExecuteAndTransfer(computation, {input_data.get()},
@@ -652,35 +817,43 @@ XLA_TEST_F(ReshapeTest, NoopReshape) {
 
   // Since the reshape is a no-op, verify that it does not change the underlying
   // data.
-  EXPECT_EQ(tensorflow::gtl::ArraySlice<float>(input_literal->f32s()),
-            tensorflow::gtl::ArraySlice<float>(output_literal->f32s()));
+  if (use_bfloat16()) {
+    auto expected = LiteralTestUtil::ConvertF32ToBF16(*input_literal);
+    EXPECT_EQ(expected->data<bfloat16>(), output_literal->data<bfloat16>());
+  } else {
+    EXPECT_EQ(input_literal->data<float>(), output_literal->data<float>());
+  }
 }
 
-XLA_TEST_F(ReshapeTest, R4ToR4Reshape_Trivial) {
-  auto literal_1x2x3x4 = Literal::CreateR4(
+XLA_TEST_P(ReshapeTest, R4ToR4Reshape_Trivial) {
+  ComputationBuilder builder(client_, TestName());
+  auto literal_1x2x3x4 = Literal::CreateR4<float>(
       {{{{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}},
         {{13, 14, 15, 16}, {17, 18, 19, 20}, {21, 22, 23, 24}}}});
 
-  ComputationBuilder builder(client_, TestName());
-  auto input = builder.ConstantLiteral(*literal_1x2x3x4);
-  builder.Reshape(input, /*dimensions=*/{0, 1, 2, 3},
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *literal_1x2x3x4, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(parameter, /*dimensions=*/{0, 1, 2, 3},
                   /*new_sizes=*/{1, 2, 3, 4});
 
-  ComputeAndCompareLiteral(&builder, *literal_1x2x3x4, {});
+  ComputeAndCompareLiteral(&builder, *literal_1x2x3x4, {input.get()});
 }
 
-XLA_TEST_F(ReshapeTest, R4ToR4Reshape) {
-  auto literal_1x2x3x4 = Literal::CreateR4(
+XLA_TEST_P(ReshapeTest, R4ToR4Reshape) {
+  auto literal_1x2x3x4 = Literal::CreateR4<float>(
       {{{{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}},
         {{13, 14, 15, 16}, {17, 18, 19, 20}, {21, 22, 23, 24}}}});
 
   ComputationBuilder builder(client_, TestName());
-  auto input = builder.ConstantLiteral(*literal_1x2x3x4);
-  builder.Reshape(input, /*dimensions=*/{1, 3, 2, 0},
+  ComputationDataHandle parameter;
+  auto input = CreateParameterAndTransferLiteral(0, *literal_1x2x3x4, "input",
+                                                 &builder, &parameter);
+  builder.Reshape(parameter, /*dimensions=*/{1, 3, 2, 0},
                   /*new_sizes=*/{2, 4, 3, 1});
 
   // clang-format off
-  auto expected_2x4x3x1 = Literal::CreateR4(
+  auto expected_2x4x3x1 = Literal::CreateR4<float>(
       {{{{1}, {5}, {9}},
         {{2}, {6}, {10}},
         {{3}, {7}, {11}},
@@ -691,10 +864,10 @@ XLA_TEST_F(ReshapeTest, R4ToR4Reshape) {
         {{16}, {20}, {24}}}});
   // clang-format on
 
-  ComputeAndCompareLiteral(&builder, *expected_2x4x3x1, {});
+  ComputeAndCompareLiteral(&builder, *expected_2x4x3x1, {input.get()});
 }
 
-XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeSimple) {
+XLA_TEST_P(ReshapeTest, R4TwoMinorTransposeSimple) {
   std::mt19937 rng;
   std::uniform_real_distribution<float> distribution;
   std::vector<int64> bounds = {2, 2, 2, 2};
@@ -706,12 +879,12 @@ XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeSimple) {
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR4FromArray4DWithLayout(
           input, LayoutUtil::MakeLayout({3, 2, 1, 0}));
-  std::unique_ptr<GlobalData> input_data =
-      client_->TransferToServer(*input_literal).ConsumeValueOrDie();
-
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.Parameter(0, input_literal->shape(), "a");
-  builder.Reshape(a, /*dimensions=*/{0, 1, 3, 2}, /*new_sizes=*/new_bounds);
+  ComputationDataHandle parameter;
+  auto input_data = CreateParameterAndTransferLiteral(
+      0, *input_literal, "input", &builder, &parameter);
+  builder.Reshape(parameter, /*dimensions=*/{0, 1, 3, 2},
+                  /*new_sizes=*/new_bounds);
 
   std::unique_ptr<Literal> expected =
       LiteralTestUtil::Reshape(new_bounds, {2, 3, 1, 0}, *input_literal)
@@ -723,7 +896,7 @@ XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeSimple) {
                            zero_error_spec_, &expected->shape());
 }
 
-XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeMajorFirstEffectiveR2) {
+XLA_TEST_P(ReshapeTest, R4TwoMinorTransposeMajorFirstEffectiveR2) {
   std::mt19937 rng;
   std::uniform_real_distribution<float> distribution;
   std::vector<int64> bounds = {1, 1, 250, 300};
@@ -735,12 +908,12 @@ XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeMajorFirstEffectiveR2) {
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR4FromArray4DWithLayout(
           input, LayoutUtil::MakeLayout({3, 2, 1, 0}));
-  std::unique_ptr<GlobalData> input_data =
-      client_->TransferToServer(*input_literal).ConsumeValueOrDie();
-
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.Parameter(0, input_literal->shape(), "a");
-  builder.Reshape(a, /*dimensions=*/{0, 1, 3, 2}, /*new_sizes=*/new_bounds);
+  ComputationDataHandle parameter;
+  auto input_data = CreateParameterAndTransferLiteral(
+      0, *input_literal, "input", &builder, &parameter);
+  builder.Reshape(parameter, /*dimensions=*/{0, 1, 3, 2},
+                  /*new_sizes=*/new_bounds);
 
   std::unique_ptr<Literal> expected =
       LiteralTestUtil::Reshape(new_bounds, {2, 3, 1, 0}, *input_literal)
@@ -752,7 +925,7 @@ XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeMajorFirstEffectiveR2) {
                            zero_error_spec_, &expected->shape());
 }
 
-XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeMajorFirstMinorEffectiveR1) {
+XLA_TEST_P(ReshapeTest, R4TwoMinorTransposeMajorFirstMinorEffectiveR1) {
   std::mt19937 rng;
   std::uniform_real_distribution<float> distribution;
   std::vector<int64> bounds = {5, 5, 1, 10};
@@ -764,12 +937,12 @@ XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeMajorFirstMinorEffectiveR1) {
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR4FromArray4DWithLayout(
           input, LayoutUtil::MakeLayout({3, 2, 1, 0}));
-  std::unique_ptr<GlobalData> input_data =
-      client_->TransferToServer(*input_literal).ConsumeValueOrDie();
-
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.Parameter(0, input_literal->shape(), "a");
-  builder.Reshape(a, /*dimensions=*/{0, 1, 3, 2}, /*new_sizes=*/new_bounds);
+  ComputationDataHandle parameter;
+  auto input_data = CreateParameterAndTransferLiteral(
+      0, *input_literal, "input", &builder, &parameter);
+  builder.Reshape(parameter, /*dimensions=*/{0, 1, 3, 2},
+                  /*new_sizes=*/new_bounds);
 
   std::unique_ptr<Literal> expected =
       LiteralTestUtil::Reshape(new_bounds, {2, 3, 1, 0}, *input_literal)
@@ -781,7 +954,7 @@ XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeMajorFirstMinorEffectiveR1) {
                            zero_error_spec_, &expected->shape());
 }
 
-XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeMajorFirstMinorEffectiveR1InR2) {
+XLA_TEST_P(ReshapeTest, R4TwoMinorTransposeMajorFirstMinorEffectiveR1InR2) {
   std::mt19937 rng;
   std::uniform_real_distribution<float> distribution;
   // This happens in NN-Builder MNIST.
@@ -794,12 +967,12 @@ XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeMajorFirstMinorEffectiveR1InR2) {
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR4FromArray4DWithLayout(
           input, LayoutUtil::MakeLayout({3, 2, 1, 0}));
-  std::unique_ptr<GlobalData> input_data =
-      client_->TransferToServer(*input_literal).ConsumeValueOrDie();
-
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.Parameter(0, input_literal->shape(), "a");
-  builder.Reshape(a, /*dimensions=*/{0, 1, 3, 2}, /*new_sizes=*/new_bounds);
+  ComputationDataHandle parameter;
+  auto input_data = CreateParameterAndTransferLiteral(
+      0, *input_literal, "input", &builder, &parameter);
+  builder.Reshape(parameter, /*dimensions=*/{0, 1, 3, 2},
+                  /*new_sizes=*/new_bounds);
 
   std::unique_ptr<Literal> expected =
       LiteralTestUtil::Reshape(new_bounds, {2, 3, 1, 0}, *input_literal)
@@ -811,7 +984,7 @@ XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeMajorFirstMinorEffectiveR1InR2) {
                            zero_error_spec_, &expected->shape());
 }
 
-XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeTrivialR2) {
+XLA_TEST_P(ReshapeTest, R4TwoMinorTransposeTrivialR2) {
   std::mt19937 rng;
   std::uniform_real_distribution<float> distribution;
   std::vector<int64> bounds = {3, 3, 1, 3};
@@ -823,12 +996,12 @@ XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeTrivialR2) {
   std::unique_ptr<Literal> input_literal =
       Literal::CreateR4FromArray4DWithLayout(
           input, LayoutUtil::MakeLayout({0, 1, 2, 3}));
-  std::unique_ptr<GlobalData> input_data =
-      client_->TransferToServer(*input_literal).ConsumeValueOrDie();
-
   ComputationBuilder builder(client_, TestName());
-  auto a = builder.Parameter(0, input_literal->shape(), "a");
-  builder.Reshape(a, /*dimensions=*/{1, 0, 2, 3}, /*new_sizes=*/new_bounds);
+  ComputationDataHandle parameter;
+  auto input_data = CreateParameterAndTransferLiteral(
+      0, *input_literal, "input", &builder, &parameter);
+  builder.Reshape(parameter, /*dimensions=*/{1, 0, 2, 3},
+                  /*new_sizes=*/new_bounds);
 
   std::unique_ptr<Literal> expected =
       LiteralTestUtil::Reshape(new_bounds, {1, 0, 2, 3}, *input_literal)
@@ -840,5 +1013,12 @@ XLA_TEST_F(ReshapeTest, R4TwoMinorTransposeTrivialR2) {
                            zero_error_spec_, &expected->shape());
 }
 
+#ifdef XLA_BACKEND_SUPPORTS_BFLOAT16
+INSTANTIATE_TEST_CASE_P(ReshapeTestInstance, ReshapeTest, ::testing::Bool());
+#else
+INSTANTIATE_TEST_CASE_P(ReshapeTestInstance, ReshapeTest,
+                        ::testing::ValuesIn(std::vector<bool>{false}));
+#endif
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/reverse_test.cc b/tensorflow/compiler/xla/tests/reverse_test.cc
index 1f6cfc85ccd25bb22db51411f7376489c14c3603..8fc841f14087cdea02fe44cdaea521ff92122aec 100644
--- a/tensorflow/compiler/xla/tests/reverse_test.cc
+++ b/tensorflow/compiler/xla/tests/reverse_test.cc
@@ -28,56 +28,89 @@ limitations under the License.
 namespace xla {
 namespace {
 
-class ReverseTest : public ClientLibraryTestBase {};
-
-// Tests the reverse operation on a scalar.
-XLA_TEST_F(ReverseTest, ReverseScalar) {
-  ComputationBuilder b(client_, TestName());
-  float input = 3.5f;
-  b.Rev(b.ConstantR0<float>(input), {});
-  ComputeAndCompareR0<float>(&b, input, {});
-}
-
-// Tests the reverse operation on a 0x0 float array on both dimensions.
-XLA_TEST_F(ReverseTest, Reverse0x0FloatArray) {
-  ComputationBuilder b(client_, TestName());
-  b.Rev(b.ConstantR2FromArray2D<float>(Array2D<float>(0, 0)), {0, 1});
-  ComputeAndCompareR2<float>(&b, Array2D<float>(0, 0), {});
-}
-
-// Tests the reverse operation on a 0x1 float array on both dimensions.
-XLA_TEST_F(ReverseTest, Reverse0x1FloatArray) {
-  ComputationBuilder b(client_, TestName());
-  b.Rev(b.ConstantR2FromArray2D<float>(Array2D<float>(0, 1)), {0, 1});
-  ComputeAndCompareR2<float>(&b, Array2D<float>(0, 1), {});
+#ifdef XLA_BACKEND_SUPPORTS_BFLOAT16
+// Tests both F32 and BF16.
+static std::array<bool, 2> use_bfloat16_params{false, true};
+#else
+// Only tests F32.
+static std::array<bool, 1> use_bfloat16_params{false};
+#endif
+
+struct ReverseSpec {
+  tensorflow::gtl::ArraySlice<int64> input_dims;
+  tensorflow::gtl::ArraySlice<int64> reversal;
+  bool use_bfloat16;
+
+  string ToTestCaseName() const {
+    return tensorflow::strings::Printf(
+        "reverse_%s_in_dims_%s_%s",
+        tensorflow::str_util::Join(input_dims, "x").c_str(),
+        tensorflow::str_util::Join(reversal, "x").c_str(),
+        use_bfloat16 ? "bf16" : "f32");
+  }
+};
+
+static std::vector<ReverseSpec> GetTestCases() {
+  // clang-format off
+  return ExpandUseBfloat16<ReverseSpec>(
+      use_bfloat16_params,
+      {{{}, {}},
+        {{0, 0}, {0, 1}},
+        {{0, 1}, {0, 1}},
+        {{1, 0}, {0, 1}},
+        {{1, 1}, {0, 1}},
+        {{2, 0, 4, 3}, {0, 2}},
+        {{2, 0, 4, 3}, {1, 3}},
+        {{1, 2, 3, 4}, {0, 3}},
+        {{4, 3, 2, 1}, {0, 1}},
+      });
+  // clang-format on
 }
 
-// Tests the reverse operation on a 1x0 float array on both dimensions.
-XLA_TEST_F(ReverseTest, Reverse1x0FloatArray) {
-  ComputationBuilder b(client_, TestName());
-  b.Rev(b.ConstantR2FromArray2D<float>(Array2D<float>(1, 0)), {0, 1});
-  ComputeAndCompareR2<float>(&b, Array2D<float>(1, 0), {});
+void PrintTo(const ReverseSpec& spec, std::ostream* os) {
+  *os << spec.ToTestCaseName();
 }
 
-// Tests the reverse operation on a 1x1 float array on both dimensions.
-XLA_TEST_F(ReverseTest, Reverse1x1FloatArray) {
-  ComputationBuilder b(client_, TestName());
-  Array2D<float> input({{3.5f}});
-  b.Rev(b.ConstantR2FromArray2D<float>(input), {0, 1});
-  ComputeAndCompareR2<float>(&b, input, {});
+class FloatReverseTest : public ClientLibraryTestBase,
+                         public ::testing::WithParamInterface<ReverseSpec> {
+ public:
+  FloatReverseTest() { set_use_bfloat16(GetParam().use_bfloat16); }
+};
+
+TEST_P(FloatReverseTest, Reverses) {
+  const ReverseSpec& spec = GetParam();
+  std::vector<float> input_vector(
+      ShapeUtil::ElementsIn(ShapeUtil::MakeShape(F32, spec.input_dims)));
+  std::iota(input_vector.begin(), input_vector.end(), 0.0);
+  auto r1_literal = Literal::CreateR1<float>(input_vector);
+  auto input_literal = r1_literal->Reshape(spec.input_dims).ConsumeValueOrDie();
+
+  ComputationBuilder builder(client_, TestName());
+  auto a = AddParam(*input_literal, &builder);
+  builder.Rev(a, spec.reversal);
+
+  std::unique_ptr<Literal> expected = input_literal->CloneToUnique();
+  std::vector<int64> output_indices(spec.input_dims.size());
+  expected->EachCell<float>(
+      [&](tensorflow::gtl::ArraySlice<int64> indices, float) {
+        for (int64 i = 0; i < indices.size(); ++i) {
+          output_indices[i] = indices[i];
+        }
+        float value = input_literal->Get<float>(indices);
+        for (int64 dim : spec.reversal) {
+          output_indices[dim] = (spec.input_dims[dim] - 1) - indices[dim];
+        }
+        expected->Set<float>(output_indices, value);
+      });
+  ComputeAndCompareLiteral(&builder, *expected, {});
 }
 
-XLA_TEST_F(ReverseTest, Reverse2x0x4x3FloatArrayDim02) {
-  ComputationBuilder b(client_, TestName());
-  b.Rev(b.ConstantR4FromArray4D<float>(Array4D<float>(2, 0, 4, 3)), {0, 2});
-  ComputeAndCompareR4<float>(&b, Array4D<float>(2, 0, 4, 3), {});
-}
+INSTANTIATE_TEST_CASE_P(FloatReverseInstance, FloatReverseTest,
+                        ::testing::ValuesIn(GetTestCases()),
+                        ::testing::PrintToStringParamName());
 
-XLA_TEST_F(ReverseTest, Reverse2x0x4x3FloatArrayDim13) {
-  ComputationBuilder b(client_, TestName());
-  b.Rev(b.ConstantR4FromArray4D<float>(Array4D<float>(2, 0, 4, 3)), {1, 3});
-  ComputeAndCompareR4<float>(&b, Array4D<float>(2, 0, 4, 3), {});
-}
+// A simple test class which not templated by float precision.
+class ReverseTest : public ClientLibraryTestBase {};
 
 // Tests the reverse operation on a 4D U8 array on dimension 0 and 3.
 XLA_TEST_F(ReverseTest, Reverse4DU8ArrayOnDim23) {
diff --git a/tensorflow/compiler/xla/tests/sample_file_test.cc b/tensorflow/compiler/xla/tests/sample_file_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..31b104f4e37f77d47f56ff8183ee1de1cc22e44d
--- /dev/null
+++ b/tensorflow/compiler/xla/tests/sample_file_test.cc
@@ -0,0 +1,51 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This demonstrates how to use hlo_test_base to create a file based testcase
+// and compare results on gpu and cpu.
+
+#include <string>
+#include <vector>
+
+#include "tensorflow/compiler/xla/service/platform_util.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/compiler/xla/tests/literal_test_util.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace xla {
+namespace {
+
+class SampleFileTest : public HloTestBase {
+ protected:
+  SampleFileTest()
+      : HloTestBase(
+            /*test_platform=*/PlatformUtil::GetPlatform("gpu").ValueOrDie(),
+            /*reference_platform=*/PlatformUtil::GetPlatform("cpu")
+                .ValueOrDie()) {}
+};
+
+TEST_F(SampleFileTest, Convolution) {
+  const string& filename = "compiler/xla/tests/isolated_convolution.hlo";
+  string test_srcdir = tensorflow::testing::TensorFlowSrcRoot();
+  EXPECT_TRUE(RunAndCompareFromFile(
+      tensorflow::io::JoinPath(test_srcdir, filename), ErrorSpec{0.01}));
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/sample_text_test.cc b/tensorflow/compiler/xla/tests/sample_text_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..b4f2b74e3dc9e80f50454b28eb6f2502cef3e681
--- /dev/null
+++ b/tensorflow/compiler/xla/tests/sample_text_test.cc
@@ -0,0 +1,66 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This demonstrates how to use hlo_test_base to create textual IR based
+// testcases.
+
+#include <string>
+#include <vector>
+
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/compiler/xla/tests/literal_test_util.h"
+#include "tensorflow/compiler/xla/tests/test_macros.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/core/lib/gtl/optional.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace xla {
+namespace {
+
+using tensorflow::gtl::nullopt;
+
+class SampleTextTest : public HloTestBase {};
+
+TEST_F(SampleTextTest, Axpy) {
+  const string& hlo_string = R"(
+HloModule axpy_module:
+ENTRY %axpy.v5 (alpha: f32[], x: f32[2,4], y: f32[2,4]) -> f32[2,4] {
+  %alpha = f32[] parameter(0)
+  %broadcast = f32[2,4]{1,0} broadcast(f32[] %alpha), dimensions={}
+  %x = f32[2,4]{1,0} parameter(1)
+  %multiply = f32[2,4]{1,0} multiply(f32[2,4]{1,0} %broadcast, f32[2,4]{1,0} %x)
+  %y = f32[2,4]{1,0} parameter(2)
+  ROOT %add = f32[2,4]{1,0} add(f32[2,4]{1,0} %multiply, f32[2,4]{1,0} %y)
+}
+)";
+  EXPECT_TRUE(RunAndCompareNoHloPasses(hlo_string, ErrorSpec{0.0001}));
+}
+
+TEST_F(SampleTextTest, Tuple) {
+  const string& hlo_string = R"(
+HloModule TupleCreate_module:
+ENTRY %TupleCreate.v4 (v1: f32[], v2: f32[3], v3: f32[2,3]) -> (f32[], f32[3], f32[2,3]) {
+  %v1 = f32[] parameter(0)
+  %v2 = f32[3]{0} parameter(1)
+  %v3 = f32[2,3]{1,0} parameter(2)
+  ROOT %tuple = (f32[], f32[3]{0}, f32[2,3]{1,0}) tuple(f32[] %v1, f32[3]{0} %v2, f32[2,3]{1,0} %v3)
+}
+)";
+  EXPECT_TRUE(RunAndCompare(hlo_string, nullopt));
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/scalar_computations_test.cc b/tensorflow/compiler/xla/tests/scalar_computations_test.cc
index b5e7570778ffeca66cc15d7cd2b153639637a647..debf2d2d317fe64ca1ef86cb1f2978e76af1b55d 100644
--- a/tensorflow/compiler/xla/tests/scalar_computations_test.cc
+++ b/tensorflow/compiler/xla/tests/scalar_computations_test.cc
@@ -69,6 +69,13 @@ class ScalarComputationsTest : public ClientLibraryTestBase {
   }
 };
 
+XLA_TEST_F(ScalarComputationsTest, ReturnScalarF32) {
+  ComputationBuilder builder(client_, TestName());
+  builder.ConstantR0<float>(2.1f);
+
+  ComputeAndCompareR0<float>(&builder, 2.1f, {}, error_spec_);
+}
+
 XLA_TEST_F(ScalarComputationsTest, NegateScalarF32) {
   ComputationBuilder builder(client_, TestName());
   builder.Neg(builder.ConstantR0<float>(2.1f));
diff --git a/tensorflow/compiler/xla/tests/slice_test.cc b/tensorflow/compiler/xla/tests/slice_test.cc
index c21124750ad512cad69b1483e708613ee2857ac0..ac163df127e0087c02777fa3d5ce7970c51b97b9 100644
--- a/tensorflow/compiler/xla/tests/slice_test.cc
+++ b/tensorflow/compiler/xla/tests/slice_test.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/tests/literal_test_util.h"
 #include "tensorflow/compiler/xla/tests/test_macros.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -33,7 +34,6 @@ namespace xla {
 namespace {
 
 using ::tensorflow::str_util::Join;
-using ::tensorflow::strings::StrCat;
 
 class SliceTest : public ClientLibraryTestBase {};
 
@@ -211,6 +211,13 @@ class SliceR1Test : public ClientLibraryTestBase,
   }
 };
 
+string SliceR1TestDataToString(const ::testing::TestParamInfo<R1Spec>& data) {
+  const R1Spec& spec = data.param;
+  return ::tensorflow::strings::Printf("%lld_%lld_%lld_%lld", spec.input_dim0,
+                                       spec.slice_start, spec.slice_limit,
+                                       spec.slice_stride);
+}
+
 XLA_TEST_P(SliceR1Test, DoIt_F32) { Run<float>(GetParam()); }
 
 XLA_TEST_P(SliceR1Test, DoIt_F64) { Run<double>(GetParam()); }
@@ -223,30 +230,66 @@ XLA_TEST_P(SliceR1Test, DoIt_U64) { Run<uint64>(GetParam()); }
 
 XLA_TEST_P(SliceR1Test, DoIt_S64) { Run<int64>(GetParam()); }
 
-INSTANTIATE_TEST_CASE_P(                          //
-    SliceR1TestInstantiation,                     //
-    SliceR1Test,                                  //
-    ::testing::Values(                            //
-        R1Spec{10, 0, 0, 1},                      //
-        R1Spec{10, 7, 7, 1},                      //
-        R1Spec{10, 2, 4, 1},                      //
-        R1Spec{10, 2, 4, 2},                      //
-        R1Spec{10, 0, 10, 1},                     //
-        R1Spec{1024, 1024 - 4, 1024, 1},          //
-        R1Spec{4096, 7, 7 + 1024, 1},             //
-        R1Spec{10, 0, 10, 2},                     //
-        R1Spec{10, 0, 10, 3},                     //
-        R1Spec{10, 0, 10, 4},                     //
-        R1Spec{10, 0, 10, 5},                     //
-        R1Spec{10, 0, 10, 10},                    //
-        R1Spec{500, 200, 400, 7},                 //
-        R1Spec{4096, 1, 4095, 3},                 //
-        R1Spec{2047, 1024 - 24, 1024 + 160, 31},  //
-        R1Spec{2047, 1, 2046, 3 * 128},           //
-        R1Spec{4096, 1024 + 3, 4095, 500},        //
-        R1Spec{8192, 0, 8192, 1024 * 3 + 400}     //
-        )                                         //
+// Tests for R1 slice ops.
+// The format for each testcase is {input size, start, limit, stride}.
+// clang-format off
+INSTANTIATE_TEST_CASE_P(
+    SliceR1TestInstantiation,
+    SliceR1Test,
+    ::testing::Values(
+        R1Spec{10, 0, 0, 1},
+        R1Spec{10, 7, 7, 1},
+        R1Spec{10, 0, 5, 1},
+        R1Spec{10, 3, 5, 1},
+        R1Spec{10, 0, 10, 1},
+        R1Spec{1024, 0, 5, 1},
+        R1Spec{1024, 3, 5, 1},
+        R1Spec{1024 + 17, 0, 5, 1},
+        R1Spec{1024 + 17, 3, 5, 1},
+        R1Spec{1024 + 17, 1024, 1024 + 6, 1},
+        R1Spec{1024 + 17, 1024 + 1, 1024 + 6, 1},
+        R1Spec{1024, 1024 - 4, 1024, 1},
+        R1Spec{4 * 1024, 7, 7 + 1024, 1},
+        R1Spec{4 * 1024, 0, 4 * 1024, 1},
+        R1Spec{4 * 1024, 1, 4 * 1024 - 1, 1},
+        R1Spec{4 * 1024, 1024, 3 * 1024, 1},
+        R1Spec{4 * 1024, 1024 + 1, 3 * 1024 - 1, 1},
+        R1Spec{16 * 1024, 0, 5, 1},
+        R1Spec{16 * 1024, 3, 5, 1},
+        R1Spec{16 * 1024 + 17, 0, 5, 1},
+        R1Spec{16 * 1024 + 17, 3, 5, 1},
+        R1Spec{16 * 1024 + 17, 16 * 1024, 16 * 1024 + 6, 1},
+        R1Spec{16 * 1024 + 17, 16 * 1024 + 1, 16 * 1024 + 6, 1},
+        R1Spec{16 * 1024, 4 * 1024 - 17, 8 * 1024 - 18, 1},
+        R1Spec{64 * 1024, 0, 64 * 1024, 1},
+        R1Spec{64 * 1024, 1, 64 * 1024 - 1, 1},
+        R1Spec{64 * 1024, 1024, 63 * 1024, 1},
+        R1Spec{64 * 1024, 1024 + 1, 63 * 1024 - 1, 1},
+        R1Spec{64 * 1024, 32 * 1024, 33 * 1024, 1},
+        R1Spec{64 * 1024, 32 * 1024 + 1, 33 * 1024 - 1, 1},
+        R1Spec{64 * 1024, 32 * 1024 - 17, 36 * 1024 - 18, 1},
+// TODO(b/69425338): This uses too much memory on GPU.
+#ifndef XLA_TEST_BACKEND_GPU
+        R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024, 12 * 1024 * 1024, 1},
+        R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024 + 1, 12 * 1024 * 1024 - 1, 1},
+        R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024 - 1, 12 * 1024 * 1024 + 1, 1},
+#endif
+        R1Spec{10, 2, 4, 2},
+        R1Spec{10, 0, 10, 2},
+        R1Spec{10, 0, 10, 3},
+        R1Spec{10, 0, 10, 4},
+        R1Spec{10, 0, 10, 5},
+        R1Spec{10, 0, 10, 10},
+        R1Spec{500, 200, 400, 7},
+        R1Spec{4096, 1, 4095, 3},
+        R1Spec{2047, 1024 - 24, 1024 + 160, 31},
+        R1Spec{2047, 1, 2046, 3 * 128},
+        R1Spec{4096, 1024 + 3, 4095, 500},
+        R1Spec{8192, 0, 8192, 1024 * 3 + 400}
+        ),
+    SliceR1TestDataToString
 );
+// clang-format on
 
 struct R2Spec {
   int64 input_dim0;
@@ -339,7 +382,7 @@ struct R4Spec {
 
 string R4SpecToString(const ::testing::TestParamInfo<R4Spec>& data) {
   const R4Spec& spec = data.param;
-  return StrCat(                                   //
+  return tensorflow::strings::StrCat(              //
       "input_", Join(spec.input_dims, "x"),        //
       "__layout_", Join(spec.input_layout, ""),    //
       "__starts_", Join(spec.slice_starts, "x"),   //
diff --git a/tensorflow/compiler/xla/tests/test_macros.h b/tensorflow/compiler/xla/tests/test_macros.h
index 28a2d0198a707cec1aa5e0fbed341ee9b2a927f7..cc4eaf62f50d1fa622c705fab810fe1e1b0fbf08 100644
--- a/tensorflow/compiler/xla/tests/test_macros.h
+++ b/tensorflow/compiler/xla/tests/test_macros.h
@@ -36,6 +36,7 @@ limitations under the License.
 #define DISABLED_ON_CPU(X) X
 #define DISABLED_ON_CPU_PARALLEL(X) X
 #define DISABLED_ON_GPU(X) X
+#define DISABLED_ON_INTERPRETER(X) X
 
 // We need this macro instead of pasting directly to support nesting
 // the DISABLED_ON_FOO macros, as in the definition of DISABLED_ON_CPU.
@@ -62,6 +63,11 @@ limitations under the License.
 # define DISABLED_ON_GPU(X) XLA_TEST_PASTE(DISABLED_, X)
 #endif  // XLA_TEST_BACKEND_GPU
 
+#ifdef XLA_TEST_BACKEND_INTERPRETER
+# undef DISABLED_ON_INTERPRETER
+# define DISABLED_ON_INTERPRETER(X) XLA_TEST_PASTE(DISABLED_, X)
+#endif  // XLA_TEST_BACKEND_INTERPRETER
+
 // clang-format on
 
 namespace xla {
diff --git a/tensorflow/compiler/xla/tests/test_utils.cc b/tensorflow/compiler/xla/tests/test_utils.cc
index 0d56c9f48363d0569921d7c76050dcc66208931b..8b10aef5b81c18648b6e255445d66a6d195f8a76 100644
--- a/tensorflow/compiler/xla/tests/test_utils.cc
+++ b/tensorflow/compiler/xla/tests/test_utils.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/tests/test_utils.h"
 #include "tensorflow/compiler/xla/primitive_util.h"
+#include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h"
 #include "tensorflow/compiler/xla/service/hlo_verifier.h"
 #include "tensorflow/compiler/xla/service/transfer_manager.h"
 
@@ -27,10 +28,31 @@ void PopulateWithRandomFloatingPointData(Literal* literal) {
   CHECK_EQ(literal->shape().element_type(),
            primitive_util::NativeToPrimitiveType<FloatT>());
   std::minstd_rand0 engine;
-  std::uniform_real_distribution<FloatT> generator(0.0f, 1.0f);
+  // Create uniform numbers between 1 and 1.125 ot avoid creating denormal
+  // numbers.
+  std::uniform_real_distribution<FloatT> generator(1.0f, 1.125f);
   TF_CHECK_OK(literal->Populate<FloatT>(
+      [&](tensorflow::gtl::ArraySlice<int64> indices) {
+        // Generate a random uniforma number from -0.0625 and 0.0625 and bias it
+        // with  a position dependent nubmer with mean 0.037109375. These number
+        // should allow for long chains of accumulation without being too close
+        // to zero or to large to accumulate all numbers accurately.
+        return (generator(engine) - 1.0625) +
+               static_cast<FloatT>(Product(indices) % 113 - 47) /
+                   static_cast<FloatT>(256.0f);
+      }));
+}
+
+// The standard library does not have a case for bfloat16, unsurprisingly, so we
+// handle that one specially.
+template <>
+void PopulateWithRandomFloatingPointData<bfloat16>(Literal* literal) {
+  CHECK_EQ(literal->shape().element_type(), BF16);
+  std::minstd_rand0 engine;
+  std::uniform_real_distribution<float> generator(-0.9f, 1.0f);
+  TF_CHECK_OK(literal->Populate<bfloat16>(
       [&](tensorflow::gtl::ArraySlice<int64> /*indices*/) {
-        return generator(engine);
+        return static_cast<bfloat16>(generator(engine));
       }));
 }
 
@@ -47,42 +69,136 @@ void PopulateWithRandomIntegralData(Literal* literal) {
       }));
 }
 
-bool LooksLikeSum(const HloInstruction& instruction) {
-  return instruction.opcode() == HloOpcode::kAdd &&
-         instruction.operand(0)->opcode() == HloOpcode::kParameter &&
-         instruction.operand(1)->opcode() == HloOpcode::kParameter &&
-         instruction.operand(0) != instruction.operand(1);
+// Matches binary addition computations.
+bool LooksLikeSum(const HloComputation& computation) {
+  const HloInstruction* const root = computation.root_instruction();
+  return root->opcode() == HloOpcode::kAdd &&
+         computation.num_parameters() == 2 &&
+         root->operand(0)->opcode() == HloOpcode::kParameter &&
+         root->operand(1)->opcode() == HloOpcode::kParameter &&
+         root->operand(0) != root->operand(1);
+}
+
+// Reduce, ReduceWindow, and SelectAndScatter ops may use binary addition,
+// which requires an init_value of 0 rather than a random value.
+bool NeedsZeroInitValue(const HloUse& use) {
+  const HloInstruction* const instruction = use.instruction;
+  const HloOpcode opcode = instruction->opcode();
+  const int64 op_num = use.operand_number;
+  return (
+      ((opcode == HloOpcode::kReduce || opcode == HloOpcode::kReduceWindow) &&
+       op_num == 1 && LooksLikeSum(*instruction->to_apply())) ||
+      (opcode == HloOpcode::kSelectAndScatter && op_num == 2 &&
+       LooksLikeSum(*instruction->scatter())));
 }
 
-// Given an instruction and operand number, replace the given operand with
-// a Literal Constant Zero. Handle the case of a fusion instruction by
-// replacing the fusion's parent's parameter with a Literal Constant Zero,
-// unless the fusion's parent is itself a fusion.
-Status MaybeReplaceParameterInputWithZero(HloInstruction* const instruction,
-                                          const int64 operand_number) {
-  CHECK_LT(operand_number, instruction->operand_count());
-  if (instruction->operand(operand_number)->opcode() != HloOpcode::kParameter) {
-    return Status::OK();
+// Generate random values that are constrained to the input_shape minus the
+// output_shape so as not to produce wrapping slices, for instance.
+std::unique_ptr<Literal> MakeRandomNonwrappingSliceIndex(
+    const Shape& input_shape, const Shape& slice_shape) {
+  const int64 rank = ShapeUtil::Rank(input_shape);
+  std::vector<int32> start_indices(rank);
+  std::minstd_rand0 engine;
+  for (int i = 0; i < rank; ++i) {
+    const int32 upper_bound = ShapeUtil::GetDimension(input_shape, i) -
+                              ShapeUtil::GetDimension(slice_shape, i);
+    std::uniform_int_distribution<int32> generator(0, upper_bound);
+    start_indices[i] = generator(engine);
   }
+  return Literal::CreateR1<int32>(start_indices);
+}
 
-  HloComputation* const computation = instruction->parent();
-  std::unique_ptr<HloInstruction> zero = HloInstruction::CreateConstant(
-      MakeUnique<Literal>(Literal::Zero(instruction->shape().element_type())));
+// Use dataflow analysis on each parameter to see if there are uses that would
+// be problematic when generating input data.  Returns the list of instructions
+// that correspond to their uses.
+//
+// Should be paired with the CreateLiteralForConstrainedUses() function below.
+std::vector<HloInstruction*> FindConstrainedUses(
+    const HloDataflowAnalysis& dataflow, const HloInstruction& param) {
+  std::vector<HloInstruction*> constrained_uses;
+  for (const auto& pair : dataflow.GetInstructionValueSet(&param)) {
+    const HloValue& value = dataflow.GetUniqueValueAt(&param, pair.first);
+    for (const HloUse& use : value.uses()) {
+      HloInstruction* instruction = use.instruction;
+      const HloOpcode opcode = instruction->opcode();
+      const int64 op_num = use.operand_number;
+      if ((opcode == HloOpcode::kDynamicSlice && op_num == 1) ||
+          (opcode == HloOpcode::kDynamicUpdateSlice && op_num == 2)) {
+        constrained_uses.push_back(instruction);
+      } else if (opcode == HloOpcode::kFusion) {
+        const HloInstruction* const to_analyze =
+            instruction->fused_parameter(op_num);
+        auto fused_uses = FindConstrainedUses(dataflow, *to_analyze);
+        constrained_uses.insert(constrained_uses.end(), fused_uses.begin(),
+                                fused_uses.end());
+      } else if (NeedsZeroInitValue(use)) {
+        constrained_uses.push_back(instruction);
+      } else if (opcode == HloOpcode::kConvert ||
+                 opcode == HloOpcode::kReducePrecision) {
+        auto converted_uses = FindConstrainedUses(dataflow, *instruction);
+        constrained_uses.insert(constrained_uses.end(), converted_uses.begin(),
+                                converted_uses.end());
+      }
+    }
+  }
+  return constrained_uses;
+}
 
-  if (computation->IsFusionComputation()) {
-    HloInstruction* const fusion_instruction = computation->FusionInstruction();
-    if (fusion_instruction->IsFused()) {
-      return Unimplemented(
-          "Unable to replace fused parameter of fusion instruction");
+// Given a parameter, generate a random Literal to use as input if there exist
+// no constrained uses in the dataflow graph.  If such constraints exist,
+// generate a constrained literal (either bounded in the case of indices, or
+// zero in the case of init_values for reductions).
+StatusOr<std::unique_ptr<Literal>> CreateLiteralForConstrainedUses(
+    const tensorflow::gtl::ArraySlice<HloInstruction*> constrained_uses,
+    const HloInstruction& param) {
+  HloInstruction* needs_index = nullptr;
+  HloInstruction* needs_zero = nullptr;
+  for (HloInstruction* use : constrained_uses) {
+    switch (use->opcode()) {
+      case HloOpcode::kDynamicSlice:
+      case HloOpcode::kDynamicUpdateSlice:
+        if (needs_index != nullptr &&
+            !ShapeUtil::Equal(needs_index->shape(), use->shape())) {
+          return Unimplemented(
+              "Conflicting operand generation slice index constraints\n");
+        }
+        needs_index = use;
+        break;
+
+      case HloOpcode::kReduce:
+      case HloOpcode::kReduceWindow:
+      case HloOpcode::kSelectAndScatter:
+        needs_zero = use;
+        break;
+
+      default:
+        return Unimplemented(
+            "Constrained operand generation not implemented for %s.",
+            use->ToString().c_str());
     }
-    TF_RETURN_IF_ERROR(fusion_instruction->ReplaceOperandWith(
-        instruction->operand(operand_number)->parameter_number(),
-        fusion_instruction->parent()->AddInstruction(std::move(zero))));
+  }
+  if (needs_index != nullptr && needs_zero != nullptr) {
+    return Unimplemented(
+        "Conflicting operand generation constraints.\nNeeds index: %s\nNeeds "
+        "zero: %s\n",
+        needs_index->ToString().c_str(), needs_zero->ToString().c_str());
+  }
+  if (needs_index != nullptr) {
+    return MakeRandomNonwrappingSliceIndex(needs_index->operand(0)->shape(),
+                                           needs_index->shape());
+  } else if (needs_zero != nullptr) {
+    return Literal::CreateFromShape(param.shape());
   } else {
-    TF_RETURN_IF_ERROR(instruction->ReplaceOperandWith(
-        operand_number, computation->AddInstruction(std::move(zero))));
+    return MakeFakeLiteral(param.shape());
   }
-  return Status::OK();
+}
+
+// Given a module entry parameter, use the dataflow analysis to see if a
+// special case literal must be created, or if we can generate fake data.
+StatusOr<std::unique_ptr<Literal>> MakeConstrainedArgument(
+    const HloDataflowAnalysis& dataflow, const HloInstruction& param) {
+  const auto constrained_uses = FindConstrainedUses(dataflow, param);
+  return CreateLiteralForConstrainedUses(constrained_uses, param);
 }
 
 }  // namespace
@@ -99,6 +215,9 @@ StatusOr<std::unique_ptr<Literal>> MakeFakeLiteral(const Shape& shape) {
   }
   std::unique_ptr<Literal> literal = Literal::CreateFromShape(shape);
   switch (shape.element_type()) {
+    case BF16:
+      PopulateWithRandomFloatingPointData<bfloat16>(literal.get());
+      break;
     case F32:
       PopulateWithRandomFloatingPointData<float>(literal.get());
       break;
@@ -146,42 +265,20 @@ StatusOr<std::unique_ptr<Literal>> MakeFakeLiteral(const Shape& shape) {
 }
 
 StatusOr<std::vector<std::unique_ptr<Literal>>> MakeFakeArguments(
-    const HloModule& module) {
-  std::vector<std::unique_ptr<Literal>> arguments;
-  for (const ShapeLayout& shape_layout :
-       module.config().entry_computation_layout().parameter_layouts()) {
-    TF_ASSIGN_OR_RETURN(auto literal, MakeFakeLiteral(shape_layout.shape()));
-    arguments.push_back(std::move(literal));
+    HloModule* const module) {
+  TF_ASSIGN_OR_RETURN(auto dataflow, HloDataflowAnalysis::Run(module));
+  const auto params = module->entry_computation()->parameter_instructions();
+  std::vector<std::unique_ptr<Literal>> arguments(params.size());
+  for (int i = 0; i < params.size(); ++i) {
+    TF_ASSIGN_OR_RETURN(arguments[i],
+                        MakeConstrainedArgument(*dataflow, *params[i]));
   }
   return std::move(arguments);
 }
 
-Status ReplaceInitsWithConstants(HloModule* const module) {
-  for (HloComputation* const computation : module->computations()) {
-    for (HloInstruction* const instruction : computation->instructions()) {
-      const HloOpcode opcode = instruction->opcode();
-      if ((opcode == HloOpcode::kReduce ||
-           opcode == HloOpcode::kReduceWindow) &&
-          LooksLikeSum(*instruction->to_apply()->root_instruction())) {
-        TF_RETURN_IF_ERROR(MaybeReplaceParameterInputWithZero(instruction, 1));
-      } else if (opcode == HloOpcode::kSelectAndScatter &&
-                 LooksLikeSum(*instruction->scatter()->root_instruction())) {
-        TF_RETURN_IF_ERROR(MaybeReplaceParameterInputWithZero(instruction, 2));
-      }
-    }
-  }
-  return Status::OK();
-}
-
 Status VerifyHloModule(const perftools::gputools::Platform& platform,
                        HloModule* const module) {
-  return HloVerifier(
-             std::bind(
-                 &TransferManager::GetByteSizeRequirement,
-                 TransferManager::GetForPlatform(&platform).ConsumeValueOrDie(),
-                 std::placeholders::_1))
-      .Run(module)
-      .status();
+  return HloVerifier().Run(module).status();
 }
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/test_utils.h b/tensorflow/compiler/xla/tests/test_utils.h
index 9aca162a185e5b22888229555b7bce88769c79a6..0fb024ffb074f1c90b75022bc7f5a8b58b03c0c2 100644
--- a/tensorflow/compiler/xla/tests/test_utils.h
+++ b/tensorflow/compiler/xla/tests/test_utils.h
@@ -60,13 +60,11 @@ StatusOr<std::unique_ptr<Literal>> MakeFakeLiteral(const Shape& shape);
 
 // Generates a vector of arguments containing fake data. The number, shape and
 // layout of the arguments is appropriate for given HLO module.
+//
+// Will handle special cases such as making sure that indices used for dynamic
+// slices are bounded, reduces that call adds use 0 as an init value, etc.
 StatusOr<std::vector<std::unique_ptr<Literal>>> MakeFakeArguments(
-    const HloModule& module);
-
-// Reductions using Adds, ReduceWindow, and SelectAndScatter, require their
-// init_value to be replaced with the constant 0.0f when testing, otherwise we
-// may generate a bad init_value when looking at the op in isolation.
-Status ReplaceInitsWithConstants(HloModule* const module);
+    HloModule* const module);
 
 // Check that a given module satisfies various constraints before trying to
 // execute it.
diff --git a/tensorflow/compiler/xla/tests/transfer_manager_test.cc b/tensorflow/compiler/xla/tests/transfer_manager_test.cc
index f2a64749482e5f5a8c5d72034fb7a4eee07baf48..268ba338f2e6740a1d1a046d5a85494f3cf2e9f8 100644
--- a/tensorflow/compiler/xla/tests/transfer_manager_test.cc
+++ b/tensorflow/compiler/xla/tests/transfer_manager_test.cc
@@ -46,9 +46,10 @@ class TransferManagerTest : public LocalClientTestBase {
   ~TransferManagerTest() override = default;
 
   std::unique_ptr<ScopedShapedBuffer> AllocateDeviceBuffer(const Shape& shape) {
-    return ScopedShapedBuffer::Allocate(
-               shape, GetOrCreateAllocator(local_client_->platform()),
-               /*device_ordinal=*/0, shape_size_fn_)
+    return transfer_manager_
+        ->AllocateScopedShapedBuffer(
+            shape, GetOrCreateAllocator(local_client_->platform()),
+            /*device_ordinal=*/0)
         .ValueOrDie();
   }
 
@@ -118,7 +119,7 @@ XLA_TEST_F(TransferManagerTest, TransferR1U8) {
                           transfer_manager_->TransferLiteralFromDevice(
                               stream_executor_, *device_buffer));
 
-  EXPECT_EQ(result->u8s_string(), test_string);
+  EXPECT_EQ(result->GetR1U8AsString(), test_string);
 }
 
 XLA_TEST_F(TransferManagerTest, TransferR2F32) {
@@ -211,5 +212,39 @@ XLA_TEST_F(TransferManagerTest, TransferNestedTuple) {
   LiteralTestUtil::ExpectEqual(*literal, *result);
 }
 
+XLA_TEST_F(TransferManagerTest, TransferComplexValue) {
+  std::unique_ptr<Literal> literal = Literal::CreateR1<complex64>(
+      {complex64(1.0f, 2.0f), complex64(42.0f, -123.4f)});
+  auto device_buffer = AllocateDeviceBuffer(literal->shape());
+
+  // Round trip literal through device.
+  ASSERT_IS_OK(transfer_manager_->TransferLiteralToDevice(
+      stream_executor_, *literal, *device_buffer));
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Literal> result,
+                          transfer_manager_->TransferLiteralFromDevice(
+                              stream_executor_, *device_buffer));
+
+  LiteralTestUtil::ExpectEqual(*literal, *result);
+}
+
+XLA_TEST_F(TransferManagerTest, TransferComplexValueInTuple) {
+  std::unique_ptr<Literal> literal = Literal::MakeTuple(
+      {Literal::CreateR1<complex64>(
+           {complex64(1.0f, 2.0f), complex64(42.0f, -123.4f)})
+           .get(),
+       Literal::CreateR1<int32>({1, 2, 3, 4, 5, 6}).get(),
+       Literal::CreateR0<complex64>(complex64(0.3f, -0.4f)).get()});
+  auto device_buffer = AllocateDeviceBuffer(literal->shape());
+
+  // Round trip literal through device.
+  ASSERT_IS_OK(transfer_manager_->TransferLiteralToDevice(
+      stream_executor_, *literal, *device_buffer));
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Literal> result,
+                          transfer_manager_->TransferLiteralFromDevice(
+                              stream_executor_, *device_buffer));
+
+  LiteralTestUtil::ExpectEqual(*literal, *result);
+}
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/tuple_test.cc b/tensorflow/compiler/xla/tests/tuple_test.cc
index 5a012c93d64f6a6fca73aa422e20cf238c945ce9..a8bca70d85ddf168bc441231d6f43bead019b10a 100644
--- a/tensorflow/compiler/xla/tests/tuple_test.cc
+++ b/tensorflow/compiler/xla/tests/tuple_test.cc
@@ -57,6 +57,20 @@ XLA_TEST_F(TupleTest, TupleConstant) {
   ComputeAndCompareTuple(&builder, *value, {}, error_spec_);
 }
 
+// Tests a tuple made of scalar constants.
+XLA_TEST_F(TupleTest, TupleScalarConstant) {
+  ComputationBuilder builder(client_, TestName());
+
+  const float constant_scalar1 = 7.3f;
+  const float constant_scalar2 = 1.2f;
+  auto value =
+      Literal::MakeTuple({Literal::CreateR0<float>(constant_scalar1).get(),
+                          Literal::CreateR0<float>(constant_scalar2).get()});
+
+  auto result = builder.ConstantLiteral(*value);
+  ComputeAndCompareTuple(&builder, *value, {}, error_spec_);
+}
+
 // Tests the creation of tuple data.
 XLA_TEST_F(TupleTest, TupleCreate) {
   ComputationBuilder builder(client_, TestName());
@@ -445,5 +459,61 @@ XLA_TEST_F(TupleTest, GetTupleElementOfNestedTuple) {
   ComputeAndCompareR1<float>(&builder, expected, arguments, ErrorSpec(1e-5));
 }
 
+XLA_TEST_F(TupleTest, ComplexTuples) {
+  ComputationBuilder builder(client_, TestName());
+  {
+    Shape c64r0 = ShapeUtil::MakeShape(C64, {});
+    Shape c64r1 = ShapeUtil::MakeShape(C64, {2});
+    Shape c64r2 = ShapeUtil::MakeShape(C64, {3, 2});
+    Shape arg0_shape = ShapeUtil::MakeTupleShape(
+        {c64r0, ShapeUtil::MakeTupleShape({c64r1, c64r2})});
+    auto input0 = builder.Parameter(0, arg0_shape, "input0");
+    auto t0 = builder.GetTupleElement(input0, 0);
+    auto t1 = builder.GetTupleElement(input0, 1);
+    auto t10 = builder.GetTupleElement(t1, 0);
+    auto t11 = builder.GetTupleElement(t1, 1);
+    auto sum = builder.Add(builder.Add(t10, t11, {1}), t0);
+    auto input1 = builder.Parameter(1, c64r1, "input1");
+    auto prod = builder.Mul(input1, sum, {1});
+    builder.Tuple({builder.Tuple({prod, sum}),
+                   builder.ConstantR0<complex64>({123, 456})});
+  }
+
+  std::unique_ptr<GlobalData> arg0 =
+      client_
+          ->TransferToServer(*Literal::MakeTuple(
+              {Literal::CreateR0<complex64>({1, 2}).get(),
+               Literal::MakeTuple(
+                   {Literal::CreateR1<complex64>({{10, 20}, {30, 40}}).get(),
+                    Literal::CreateR2<complex64>(
+                        {{{100, 200}, {300, 400}},
+                         {{1000, 2000}, {3000, 4000}},
+                         {{10000, 20000}, {30000, 40000}}})
+                        .get()})
+                   .get()}))
+          .ConsumeValueOrDie();
+  std::unique_ptr<GlobalData> arg1 =
+      client_
+          ->TransferToServer(*Literal::CreateR1<complex64>({{1, 2}, {1, -2}}))
+          .ConsumeValueOrDie();
+  auto sum = Literal::CreateR2<complex64>({{{111, 222}, {331, 442}},
+                                           {{1011, 2022}, {3031, 4042}},
+                                           {{10011, 20022}, {30031, 40042}}});
+  auto prod = Literal::CreateFromShape(sum->shape());
+  ASSERT_TRUE(prod->Populate<complex64>(
+                      [&sum](tensorflow::gtl::ArraySlice<int64> indexes) {
+                        return sum->Get<complex64>(indexes) *
+                               (indexes[indexes.size() - 1] == 0
+                                    ? complex64(1, 2)
+                                    : complex64(1, -2));
+                      })
+                  .ok());
+  auto expected =
+      Literal::MakeTuple({Literal::MakeTuple({prod.get(), sum.get()}).get(),
+                          Literal::CreateR0<complex64>({123, 456}).get()});
+  ComputeAndCompareTuple(&builder, *expected, {arg0.get(), arg1.get()},
+                         error_spec_);
+}
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/while_test.cc b/tensorflow/compiler/xla/tests/while_test.cc
index 49f673f5f0bf9b844ab4030383784208b4e2c58a..52157b837c383205f77a030ef98b2fd03a41aff5 100644
--- a/tensorflow/compiler/xla/tests/while_test.cc
+++ b/tensorflow/compiler/xla/tests/while_test.cc
@@ -357,8 +357,7 @@ TEST_F(WhileTest, WhileWithVectorResultIntoTuple) {
   ComputeAndCompareTuple(&builder, *expected, {}, ErrorSpec(0.0001));
 }
 
-// TODO(b/63003356): 11-06-2017: fails on all back-ends with incorrect result.
-TEST_F(WhileTest, DISABLED_WhileWithPermutationAndTupleResult) {
+TEST_F(WhileTest, WhileWithPermutationAndTupleResult) {
   std::vector<Shape> shape_elements = {
       ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(F32, {3}),
       ShapeUtil::MakeShape(F32, {3}), ShapeUtil::MakeShape(F32, {3})};
@@ -411,8 +410,7 @@ TEST_F(WhileTest, DISABLED_WhileWithPermutationAndTupleResult) {
   ComputeAndCompareTuple(&builder, *expected, {}, ErrorSpec(0.0001));
 }
 
-// TODO(b/63003356): 11-06-2017: fails on all back-ends with incorrect result.
-TEST_F(WhileTest, DISABLED_WhileWithPermutationAndVectorResult) {
+TEST_F(WhileTest, WhileWithPermutationAndVectorResult) {
   std::vector<Shape> shape_elements = {
       ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(F32, {3}),
       ShapeUtil::MakeShape(F32, {3}), ShapeUtil::MakeShape(F32, {3})};
@@ -565,6 +563,53 @@ TEST_F(WhileTest, WhileWithPredicateTupleResult) {
   ComputeAndCompareTuple(&builder, *expected, {}, ErrorSpec(0));
 }
 
+TEST_F(WhileTest, WhileWithTupleConstantScalarResult) {
+  std::vector<Shape> shape_elements = {ShapeUtil::MakeShape(S32, {}),
+                                       ShapeUtil::MakeShape(S32, {})};
+  Shape result_shape = ShapeUtil::MakeTupleShape(shape_elements);
+
+  // Create a computation for the condition.
+  // Repeat for 5 iterations.
+  Computation condition;
+  {
+    ComputationBuilder builder(client_, "condition");
+    auto prev = builder.Parameter(0, result_shape, "prev");
+    auto iteration = builder.GetTupleElement(prev, 0);
+    builder.Gt(builder.ConstantR0<int32>(5), iteration);
+    condition = builder.Build().ConsumeValueOrDie();
+  }
+
+  // Create a computation for the body.
+  // Add 1 to the iteration variable and set the other tuple element to a
+  // constant.
+  Computation body;
+  {
+    ComputationBuilder builder(client_, "body");
+    auto prev = builder.Parameter(0, result_shape, "prev");
+    auto iteration = builder.GetTupleElement(prev, 0);
+    auto result =
+        builder.Tuple({builder.Add(iteration, builder.ConstantR0<int32>(1)),
+                       builder.ConstantR0<int32>(7)});
+    body = builder.Build().ConsumeValueOrDie();
+  }
+
+  // Create a While node with computations for the condition and the body.
+  ComputationBuilder builder(client_, "while");
+  auto init = builder.Tuple(
+      {builder.ConstantR0<int32>(0), builder.ConstantR0<int32>(7)});
+  auto result = builder.While(condition, body, init);
+  VLOG(2) << "while = "
+          << ShapeUtil::HumanString(
+                 *builder.GetShape(result).ConsumeValueOrDie());
+
+  auto expected_counter = Literal::CreateR0<int32>(5);
+  auto expected_data = Literal::CreateR0<int32>(7);
+  auto expected =
+      Literal::MakeTuple({expected_counter.get(), expected_data.get()});
+  VLOG(2) << "expected = " << ShapeUtil::HumanString(expected->shape());
+  ComputeAndCompareTuple(&builder, *expected, {}, ErrorSpec(0.0001));
+}
+
 // Tests two while nodes when the result type T is a Tuple and the second
 // while node uses the result of the first while node which is used in two
 // nodes.
@@ -913,8 +958,7 @@ TEST_F(WhileTest, WhileWithPrngScalarResult) {
   }
 }
 
-// TODO(b/34969189) Fails with bad AtomicCmpSwap on GPU on 2017-09-11.
-TEST_F(WhileTest, DISABLED_ON_GPU(WhileThatSwapsParameterWithTupleElement)) {
+TEST_F(WhileTest, WhileThatSwapsParameterWithTupleElement) {
   auto element_shape = ShapeUtil::MakeShape(F32, {2});
 
   ComputationBuilder outer(client_, "outer");
@@ -950,8 +994,7 @@ TEST_F(WhileTest, DISABLED_ON_GPU(WhileThatSwapsParameterWithTupleElement)) {
                          ErrorSpec(1e-6));
 }
 
-// TODO(b/34969189) Fails with bad AtomicCmpSwap on GPU on 2017-09-11.
-TEST_F(WhileTest, DISABLED_ON_GPU(WhileThatSwapsParameterWithBroadcast)) {
+TEST_F(WhileTest, WhileThatSwapsParameterWithBroadcast) {
   auto element_shape = ShapeUtil::MakeShape(F32, {2});
 
   ComputationBuilder outer(client_, "outer");
@@ -1164,6 +1207,50 @@ TEST_F(WhileTest, WhileWithCallInsideCondition) {
   ComputeAndCompareR0<int32>(&builder, 5, {});
 }
 
+TEST_F(WhileTest, WhileWithLoopInvariantOperation) {
+  auto matrix_shape = ShapeUtil::MakeShape(F32, {2, 2});
+  auto scalar_s32 = ShapeUtil::MakeShape(S32, {});
+  auto while_shape = ShapeUtil::MakeTupleShape(
+      {scalar_s32, matrix_shape, matrix_shape, matrix_shape});
+
+  // Create a computation for the condition: repeat for 5 iterations.
+  Computation condition;
+  {
+    ComputationBuilder builder(client_, "condition");
+    auto state = builder.Parameter(0, while_shape, "state");
+    builder.Gt(builder.ConstantR0<int32>(5), builder.GetTupleElement(state, 0));
+    TF_ASSERT_OK_AND_ASSIGN(condition, builder.Build());
+  }
+
+  Computation body;
+  {
+    ComputationBuilder builder(client_, "body");
+    auto state = builder.Parameter(0, while_shape, "state");
+    auto indvar = builder.GetTupleElement(state, 0);
+    auto input_0 = builder.GetTupleElement(state, 1);
+    auto input_1 = builder.GetTupleElement(state, 2);
+    auto output = builder.Tanh(builder.Dot(input_0, input_1));
+    auto indvar_next = builder.Add(indvar, builder.ConstantR0<int32>(1));
+    auto tuple_result = builder.Tuple({indvar_next, input_0, input_1, output});
+    TF_ASSERT_OK_AND_ASSIGN(body, builder.Build());
+  }
+
+  ComputationBuilder builder(client_, TestName());
+  auto matrix_input = builder.Parameter(0, matrix_shape, "matrix");
+  auto init = builder.Tuple(
+      {builder.ConstantR0<int32>(0), matrix_input, matrix_input, matrix_input});
+  auto while_instruction = builder.While(condition, body, init);
+  builder.GetTupleElement(while_instruction, 3);
+
+  TF_ASSERT_OK_AND_ASSIGN(auto param_value,
+                          client_->TransferToServer(*Literal::CreateR2<float>(
+                              {{1.0, 2.0}, {-1.0, -2.0}})));
+
+  ComputeAndCompareR2<float>(
+      &builder, {{-0.76159416, -0.96402758}, {0.76159416, 0.96402758}},
+      {param_value.get()}, ErrorSpec(4e-5));
+}
+
 void BM_WhileLoop(int num_iters) {
   // Benchmark a simple kernel to measure while loop overheads.
   tensorflow::testing::StopTiming();
diff --git a/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc b/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..146fbadcb68e6c5d0fa0856c1c98b399df72051f
--- /dev/null
+++ b/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc
@@ -0,0 +1,308 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <memory>
+#include <vector>
+
+#include "tensorflow/compiler/xla/array2d.h"
+#include "tensorflow/compiler/xla/client/computation_builder.h"
+#include "tensorflow/compiler/xla/client/local_client.h"
+#include "tensorflow/compiler/xla/service/platform_util.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/tests/client_library_test_base.h"
+#include "tensorflow/compiler/xla/tests/test_macros.h"
+#include "tensorflow/compiler/xla/tests/test_utils.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/regexp.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace xla {
+namespace {
+namespace se = ::perftools::gputools;
+
+class HloProfileTest : public ClientLibraryTestBase {};
+
+struct ParsedProfileOutputLine {
+  int64 cycles;
+  string cycles_percentage;
+  double usec;
+  string flops;
+  string trops;
+  string bytes_per_sec;
+  string bytes_per_cycle;
+  string name;
+};
+
+StatusOr<ParsedProfileOutputLine> ParseProfileOutputLine(const string& line,
+                                                         bool expect_flops,
+                                                         bool expect_trops) {
+  string separator = "[^:]*:: +";
+  string match_percentage = "\\d+\\.\\d\\d%";
+  string match_cycles = "(\\d+) cycles +\\( *(" + match_percentage + ")\\)";
+  string match_usecs = "([0-9.]+) usec";
+  string match_flops = expect_flops ? "([0-9.TGMk]+)FLOP/s" : "(<none>)";
+  string match_trops = expect_trops ? "([0-9.TGMk]+)TROP/s" : "(<none>)";
+  string match_bytes_per_sec = "([0-9.TGMKi]+)B/s";
+  string match_bytes_per_cycle = "([0-9.TGMKi]+)B/cycle";
+  string regexp_pattern = tensorflow::strings::StrCat(
+      " +", match_cycles, separator, match_usecs, separator, match_flops,
+      separator, match_trops, separator, match_bytes_per_sec, separator,
+      match_bytes_per_cycle, separator, "(.*)");
+
+  RE2 pattern(regexp_pattern);
+  ParsedProfileOutputLine parsed_line;
+  bool matched = RE2::FullMatch(
+      line, pattern, &parsed_line.cycles, &parsed_line.cycles_percentage,
+      &parsed_line.usec, &parsed_line.flops, &parsed_line.trops,
+      &parsed_line.bytes_per_sec, &parsed_line.bytes_per_cycle,
+      &parsed_line.name);
+  if (!matched) {
+    return tensorflow::errors::InvalidArgument(
+        "Input did not match regexp.  Input: ", line,
+        ", Regexp: ", regexp_pattern);
+  }
+
+  return parsed_line;
+}
+
+// Returns void so that we can ASSERT.
+void ExecuteAndFetchProfile(string* profile_output, LocalClient* client,
+                            const Computation& computation,
+                            const Shape& lhs_arg_shape,
+                            const Shape& rhs_arg_shape) {
+  LocalService* service = ClientLibrary::GetXlaService(client->platform());
+  Backend* backend = service->mutable_backend();
+  se::StreamExecutor* executor = backend->default_stream_executor();
+  DeviceMemoryAllocator* allocator = backend->memory_allocator();
+  auto* transfer_manager = backend->transfer_manager();
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScopedShapedBuffer> lhs_arg,
+      transfer_manager->AllocateScopedShapedBuffer(
+          lhs_arg_shape, allocator, backend->default_device_ordinal()));
+  TF_ASSERT_OK(transfer_manager->TransferLiteralToDevice(
+      executor, *Literal::CreateFromShape(lhs_arg_shape), *lhs_arg));
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScopedShapedBuffer> rhs_arg,
+      transfer_manager->AllocateScopedShapedBuffer(
+          rhs_arg_shape, allocator, backend->default_device_ordinal()));
+  TF_ASSERT_OK(transfer_manager->TransferLiteralToDevice(
+      executor, *Literal::CreateFromShape(rhs_arg_shape), *rhs_arg));
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<LocalExecutable> local_executable,
+      client->Compile(computation, {&lhs_arg_shape, &rhs_arg_shape},
+                      ExecutableBuildOptions()));
+
+  Executable* executable = local_executable->executable();
+  HloExecutionProfile hlo_execution_profile(
+      &executable->hlo_profile_printer(), &executable->hlo_profile_index_map());
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      Backend::StreamPtr stream_ptr,
+      backend->BorrowStream(backend->default_device_ordinal()));
+  ExecutableRunOptions exec_run_options;
+  exec_run_options.set_stream(stream_ptr.get());
+  exec_run_options.set_allocator(backend->memory_allocator());
+  exec_run_options.set_intra_op_thread_pool(
+      backend->eigen_intra_op_thread_pool_device());
+  ServiceExecutableRunOptions run_options(
+      exec_run_options, /*borrow_stream=*/nullptr,
+      backend->eigen_intra_op_thread_pool());
+  TF_ASSERT_OK_AND_ASSIGN(
+      auto execution_result,
+      executable->ExecuteOnStream(&run_options, {lhs_arg.get(), rhs_arg.get()},
+                                  &hlo_execution_profile));
+  (void)execution_result;
+
+  *profile_output =
+      hlo_execution_profile.ToString(executor->GetDeviceDescription());
+
+  XLA_VLOG_LINES(4, *profile_output);
+}
+
+// TODO(b/71364943): This test exposes a bug in the parallel CPU backend.
+XLA_TEST_F(HloProfileTest, DISABLED_ON_CPU_PARALLEL(ProfileSingleComputation)) {
+  const int64 m = 256, k = 256, n = 256;
+  Shape lhs_shape = ShapeUtil::MakeShape(F32, {m, k});
+  Shape rhs_shape = ShapeUtil::MakeShape(F32, {m, k});
+
+  TF_ASSERT_OK_AND_ASSIGN(se::Platform * platform,
+                          PlatformUtil::GetDefaultPlatform());
+  TF_ASSERT_OK_AND_ASSIGN(LocalClient * client,
+                          ClientLibrary::GetOrCreateLocalClient(platform));
+
+  ComputationBuilder builder(client, TestName());
+  auto result = builder.Tanh(builder.Dot(
+      builder.Parameter(0, ShapeUtil::MakeShape(F32, {m, k}), "dot_lhs"),
+      builder.Parameter(1, ShapeUtil::MakeShape(F32, {k, n}), "dot_rhs")));
+
+  TF_ASSERT_OK_AND_ASSIGN(auto computation, builder.Build());
+
+  string profile_output;
+  ExecuteAndFetchProfile(&profile_output, client, computation, lhs_shape,
+                         rhs_shape);
+
+  std::vector<string> profile_output_lines =
+      tensorflow::str_util::Split(profile_output, '\n');
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      ParsedProfileOutputLine total_profile,
+      ParseProfileOutputLine(profile_output_lines[1], /*expect_flops=*/true,
+                             /*expect_trops=*/true));
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      ParsedProfileOutputLine dot_profile,
+      ParseProfileOutputLine(profile_output_lines[2], /*expect_flops=*/true,
+                             /*expect_trops=*/false));
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      ParsedProfileOutputLine tanh_profile,
+      ParseProfileOutputLine(profile_output_lines[3], /*expect_flops=*/false,
+                             /*expect_trops=*/true));
+
+  EXPECT_GT(total_profile.cycles, 0);
+  EXPECT_EQ(total_profile.cycles_percentage, "100.00%");
+
+  EXPECT_GT(total_profile.cycles, dot_profile.cycles);
+  EXPECT_NE(dot_profile.cycles_percentage, "0.00%");
+  EXPECT_NE(dot_profile.cycles_percentage, "100.00%");
+
+  EXPECT_GT(total_profile.cycles, tanh_profile.cycles);
+  EXPECT_NE(tanh_profile.cycles_percentage, "0.00%");
+  EXPECT_NE(tanh_profile.cycles_percentage, "100.00%");
+}
+
+// TODO(b/71364943): This test exposes a bug in the parallel CPU backend.
+//
+// TODO(b/71544591): The GPU backend does not record cycles spent in on Hlo
+// instructions "interior" to while nodes.
+XLA_TEST_F(HloProfileTest,
+           DISABLED_ON_GPU(DISABLED_ON_CPU_PARALLEL(ProfileWhileComputation))) {
+  const int64 size = 256;
+  Shape matrix_shape = ShapeUtil::MakeShape(F32, {size, size});
+  Shape while_result_shape =
+      ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape(S32, {}), matrix_shape});
+
+  TF_ASSERT_OK_AND_ASSIGN(se::Platform * platform,
+                          PlatformUtil::GetDefaultPlatform());
+  TF_ASSERT_OK_AND_ASSIGN(LocalClient * client,
+                          ClientLibrary::GetOrCreateLocalClient(platform));
+
+  Computation condition;
+  {
+    ComputationBuilder builder(client, "condition");
+    auto state = builder.Parameter(0, while_result_shape, "state");
+    auto iteration = builder.GetTupleElement(state, 0);
+    builder.Gt(builder.ConstantR0<int32>(5), iteration);
+    TF_ASSERT_OK_AND_ASSIGN(condition, builder.Build());
+  }
+
+  Computation body;
+  {
+    ComputationBuilder builder(client, "body");
+    auto state = builder.Parameter(0, while_result_shape, "state");
+    auto matrix = builder.GetTupleElement(state, 1);
+    auto next_iteration = builder.Add(builder.GetTupleElement(state, 0),
+                                      builder.ConstantR0<int32>(1));
+    builder.Tuple({next_iteration, builder.Dot(matrix, matrix)});
+    TF_ASSERT_OK_AND_ASSIGN(body, builder.Build());
+  }
+
+  ComputationBuilder builder(client, TestName());
+  auto initial_while_state =
+      builder.Tuple({builder.ConstantR0<int32>(0),
+                     builder.Parameter(0, matrix_shape, "initial_value")});
+  auto while_result = builder.While(condition, body, initial_while_state);
+  builder.Add(builder.GetTupleElement(while_result, 1),
+              builder.Parameter(1, matrix_shape, "other_value"));
+
+  TF_ASSERT_OK_AND_ASSIGN(auto computation, builder.Build());
+
+  string profile_output;
+  ExecuteAndFetchProfile(&profile_output, client, computation, matrix_shape,
+                         matrix_shape);
+
+  std::vector<string> profile_output_lines =
+      tensorflow::str_util::Split(profile_output, '\n');
+
+  auto while_body_profile_start =
+      std::find_if(profile_output_lines.begin(), profile_output_lines.end(),
+                   [](tensorflow::StringPiece s) {
+                     return s.starts_with("Execution profile for body");
+                   });
+
+  ASSERT_NE(while_body_profile_start, profile_output_lines.end());
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      ParsedProfileOutputLine total_while_body_profile,
+      ParseProfileOutputLine(*std::next(while_body_profile_start, 1),
+                             /*expect_flops=*/false,
+                             /*expect_trops=*/false));
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      ParsedProfileOutputLine dot_profile,
+      ParseProfileOutputLine(*std::next(while_body_profile_start, 2),
+                             /*expect_flops=*/false,
+                             /*expect_trops=*/false));
+
+  EXPECT_GT(total_while_body_profile.cycles, 0);
+  EXPECT_EQ(total_while_body_profile.name, "[total]");
+  EXPECT_EQ(total_while_body_profile.cycles_percentage, "100.00%");
+
+  EXPECT_GT(total_while_body_profile.cycles, dot_profile.cycles);
+  EXPECT_NE(dot_profile.cycles_percentage, "0.00%");
+  EXPECT_NE(dot_profile.cycles_percentage, "100.00%");
+}
+}  // namespace
+}  // namespace xla
+
+static std::pair<int, char**> AddXlaHloProfileFlag(int argc, char** argv) {
+  // Intentional "leak".
+  char** new_argv = new char*[argc + 2];
+  for (int i = 0; i < argc; i++) {
+    new_argv[i] = argv[i];
+  }
+
+  // We do it this way (as opposed to piping in a modified DebugOptions
+  // instance) for better end-to-end integration testing.
+  new_argv[argc] = strdup("--xla_hlo_profile");
+
+  // Fusion can change the Hlo instructions that show up in the final Hlo
+  // executable, so block it here.
+  new_argv[argc + 1] = strdup("--xla_disable_hlo_passes=fusion");
+  return {argc + 2, new_argv};
+}
+
+GTEST_API_ int main(int argc, char** argv) {
+  std::vector<tensorflow::Flag> flag_list;
+  xla::legacy_flags::AppendDebugOptionsFlags(&flag_list);
+  std::tie(argc, argv) = AddXlaHloProfileFlag(argc, argv);
+
+  auto usage = tensorflow::Flags::Usage(argv[0], flag_list);
+  if (!tensorflow::Flags::Parse(&argc, argv, flag_list)) {
+    LOG(ERROR) << "\n" << usage;
+    return 2;
+  }
+
+  testing::InitGoogleTest(&argc, argv);
+  if (argc > 1) {
+    LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage;
+    return 2;
+  }
+  return RUN_ALL_TESTS();
+}
diff --git a/tensorflow/compiler/xla/text_literal_reader.cc b/tensorflow/compiler/xla/text_literal_reader.cc
index 4d060895d357493327ec50b38016478c65fef94d..6fa4c48e11d1102367b21bc21d4734466495ef0e 100644
--- a/tensorflow/compiler/xla/text_literal_reader.cc
+++ b/tensorflow/compiler/xla/text_literal_reader.cc
@@ -102,9 +102,9 @@ StatusOr<std::unique_ptr<Literal>> TextLiteralReader::ReadAllLines() {
         ShapeUtil::HumanString(shape).c_str());
   }
 
-  auto result = MakeUnique<Literal>();
+  auto result = MakeUnique<Literal>(shape);
   const float fill = std::numeric_limits<float>::quiet_NaN();
-  result->PopulateWithValue<float>(fill, AsInt64Slice(shape.dimensions()));
+  result->PopulateWithValue<float>(fill);
   std::vector<tensorflow::StringPiece> pieces;
   std::vector<tensorflow::StringPiece> coordinates;
   std::vector<int64> coordinate_values;
diff --git a/tensorflow/compiler/xla/tools/dumped_computation_to_text.cc b/tensorflow/compiler/xla/tools/dumped_computation_to_text.cc
index 78d8fb1f4330aed899ca917e66fae819a002b3a9..24417a0cb8212e59cc0af53bd5bb21afcf3e134b 100644
--- a/tensorflow/compiler/xla/tools/dumped_computation_to_text.cc
+++ b/tensorflow/compiler/xla/tools/dumped_computation_to_text.cc
@@ -69,7 +69,7 @@ void RealMain(tensorflow::gtl::ArraySlice<char*> args, bool compile) {
 
       fprintf(stdout, "HLO compiled for %s backend:\n%s\n",
               local_service->backend().platform()->Name().c_str(),
-              module.ToString().c_str());
+              module.ToString(HloPrintOptions::ShortParsable()).c_str());
     } else {
       const ComputationTracker& tracker = local_service->computation_tracker();
       UserComputation* user_computation =
@@ -80,7 +80,8 @@ void RealMain(tensorflow::gtl::ArraySlice<char*> args, bool compile) {
           tracker.BuildHloModule(versioned_handle, HloModuleConfig())
               .ConsumeValueOrDie();
 
-      fprintf(stdout, "%s\n", module->ToString().c_str());
+      fprintf(stdout, "%s\n",
+              module->ToString(HloPrintOptions::ShortParsable()).c_str());
     }
   }
 }
diff --git a/tensorflow/compiler/xla/tools/parser/BUILD b/tensorflow/compiler/xla/tools/parser/BUILD
index ce936af6c3376387c1ed9fa48da23b8af537f6e5..97aacf6b39f83978e732060817cd93ede81ca782 100644
--- a/tensorflow/compiler/xla/tools/parser/BUILD
+++ b/tensorflow/compiler/xla/tools/parser/BUILD
@@ -34,9 +34,9 @@ cc_library(
     deps = [
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
-        "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/core:lib",
         "//tensorflow/core:regexp_internal",
     ],
diff --git a/tensorflow/compiler/xla/tools/parser/README.md b/tensorflow/compiler/xla/tools/parser/README.md
index 6232967f5f04cbf316d985357ae84c28335531e2..f0f3dd7785c13e505e1eb6d4c8cd4bad157c4993 100644
--- a/tensorflow/compiler/xla/tools/parser/README.md
+++ b/tensorflow/compiler/xla/tools/parser/README.md
@@ -1,24 +1,26 @@
-# HloModule string syntax
-
-TODO: Support all subcomputations (for fusion, reduce, ...).
-
-TODO: Support all extra attributes, e.g. dimensions, strides.
+# HLO Text Syntax
 
 ```yacc
 hlo_module
   : 'HloModule' name computations
   ;
 
+/* If no computation is marked as ENTRY, the last computation will be the entry
+computation of the module.*/
 computations
   : computation
   | computation computations
   ;
 
 computation
-  : 'ENTRY' name param_list '->' shape instruction_list
-  | name param_list '->' shape instruction_list
+  : 'ENTRY' name param_list_to_shape instruction_list
+  | name param_list_to_shape instruction_list
+  | 'ENTRY' name instruction_list
+  | name instruction_list
   ;
 
+/* If no instruction is marked as ROOT, the last instruction will be the root of
+its computation. */
 instruction_list
   : '{' instruction_list1 '}'
   ;
@@ -41,6 +43,7 @@ operands1
   ;
 operand
   : shape name
+  | name
   ;
 
 attributes
@@ -60,6 +63,10 @@ attribute_value
   | '{' sub_attributes '}'
   ;
 
+param_list_to_shape
+  : param_list '->' shape
+  ;
+
 param_list
   : '(' param_list1 ')'
   ;
@@ -84,6 +91,7 @@ tuple_elements
 name
   : identifier ':'
   | '%' identifier
+  | identifier
   ;
 
 identifier
@@ -108,7 +116,29 @@ non_tuple
   | rank2345
   ;
 rank2345
-  : shape nested_array
+  : shape sparse_or_nested_array
+  ;
+sparse_or_nested_array
+  : sparse_array
+  | nested_array
+  ;
+sparse_array
+  : '{' sparse_array1 '}'
+  ;
+sparse_array1
+  : sparse_array_item
+  | sparse_array1 ',' sparse_array_item
+  ;
+sparse_array_item
+  : multi_index ':' scalar
+  ;
+multi_index
+  : kInt
+  | '[' multi_index1 ']'
+  ;
+multi_index1
+  : kInt
+  | multi_index1 ',' kInt
   ;
 
 ```
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc
index 56744440db1b17aa1cc8823feb1bad279f8f4f75..fc0e4444521247734fc240a03da669244fe1a6a4 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <unordered_map>
 
-#include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/util.h"
@@ -153,21 +152,21 @@ TokKind HloLexer::LexToken() {
   }
 }
 
-// Lex a shape, name, keyword, opcode, attribute name, or the dim labels
-// pattern.
+// Lex a shape, name, keyword, attribute name, the dim labels pattern, and
+// other identifiers.
 //
 // shape    ::= ([a-zA-Z0-9_]*[0-9]*)\[([0-9,]*)\](?:\s*{([0-9,]*)})?
 // name     ::= [a-zA-Z_][a-zA-Z0-9_.-]*:
 // keyword  ::= HloModule, ENTRY, ...
-// opcode   ::= add, greater-than, ...
 // attribute_name ::= condition, body, dimensions, ...
 // dim_labels_pattern ::= [0-9bf]{2,}_[0-9io]{2,}->[0-9bf]{2,}
+// identifiers ::= other cases that match [a-zA-Z_][a-zA-Z0-9_.-]*
 TokKind HloLexer::LexIdentifier() {
   {
     auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end());
     // 'consumable' will be advanced iff its prefix matches the pattern.
     static LazyRE2 shape_pattern = {
-        R"(^(\w*\d*)\[([\d,]*)\](?:{([\d,]*)})?)"};
+        R"(^(\w*\d*)\[([\d,]*)\](?:(dense|sparse)?{([\d,]+)})?)"};
     if (RE2::Consume(&consumable, *shape_pattern)) {
       auto status_or_shape = ShapeUtil::ParseShapeString(
           StringPieceFromPointers(token_start_, consumable.begin()));
@@ -220,20 +219,6 @@ TokKind HloLexer::LexIdentifier() {
 
 #undef KEYWORD
 
-  // See if this is an opcode.
-  auto opcode = StringToHloOpcode(identifier.ToString());
-  if (opcode.ok()) {
-    opcode_val_ = opcode.ValueOrDie();
-    return TokKind::kOpcode;
-  }
-
-  // See if this is an fusion kind.
-  auto kind = xla::StringToFusionKind(identifier.ToString());
-  if (kind.ok()) {
-    fusion_kind_val_ = kind.ValueOrDie();
-    return TokKind::kFusionKind;
-  }
-
   {
     auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end());
     static LazyRE2 dim_labels_pattern = {
@@ -244,8 +229,9 @@ TokKind HloLexer::LexIdentifier() {
       return TokKind::kDimLabels;
     }
   }
-  current_ptr_ = token_start_ + 1;
-  return TokKind::kError;
+
+  str_val_ = identifier.ToString();
+  return TokKind::kIdent;
 }
 
 // Lex names after a % character.
@@ -271,7 +257,8 @@ TokKind HloLexer::LexPercent() {
 // fp without exp ::= [-]?([0-9]+[.][0-9]*|[0-9]*[.][0-9]+)
 // dim_labels_pattern ::= [0-9bf]{2,}_[0-9io]{2,}->[0-9bf]{2,}
 // dxd_pattern ::= [0-9]+(x[0-9]+)+
-// pad_pattern ::= [0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)*
+// pad_pattern ::=
+//   [-]?[0-9]+_[-]?[0-9]+(_[0-9]+)?(x[-]?[0-9]+_[-]?[0-9]+(_[0-9]+)?)*
 // int ::=  [-]?[0-9]+
 // negative inf ::= '-inf'
 TokKind HloLexer::LexNumberOrPattern() {
@@ -289,7 +276,7 @@ TokKind HloLexer::LexNumberOrPattern() {
       R"([0-9bf]{2,}_[0-9io]{2,}->[0-9bf]{2,})"};
   static LazyRE2 dxd_pattern = {R"([0-9]+(x[0-9]+)+)"};
   static LazyRE2 pad_pattern = {
-      R"([0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)*)"};
+      R"([-]?[0-9]+_[-]?[0-9]+(_[0-9]+)?(x[-]?[0-9]+_[-]?[0-9]+(_[0-9]+)?)*)"};
 
   if (RE2::Consume(&consumable, *dim_labels_pattern)) {
     current_ptr_ = consumable.begin();
@@ -326,18 +313,43 @@ TokKind HloLexer::LexNumberOrPattern() {
   return TokKind::kError;
 }
 
-StringPiece HloLexer::GetCurrentLine() const {
-  const char* start = token_start_;
-  const char* end = current_ptr_;
-  if (!CanDereference(start) || !CanDereference(end)) {
-    return "LINE OUT OF RANGE";
+std::pair<unsigned, unsigned> HloLexer::GetLineAndColumn(LocTy location) const {
+  unsigned line_no = 1;
+  const char* start = buf_.begin();
+  const char* ptr = start;
+  if (line_no_cache_.last_query && CanDereference(line_no_cache_.last_query) &&
+      line_no_cache_.last_query <= location) {
+    ptr = line_no_cache_.last_query;
+    line_no = line_no_cache_.line_no_of_query;
   }
-  while (start > buf_.begin() && *start != '\n') {
-    start--;
+  for (; ptr != location; ptr++) {
+    if (*ptr == '\n') {
+      line_no++;
+    }
   }
-  while (end < buf_.end() && *end != '\n') {
-    end++;
+
+  // Update the line number cache.
+  line_no_cache_.last_query = ptr;
+  line_no_cache_.line_no_of_query = line_no;
+  size_t line_offset = StringPieceFromPointers(start, ptr).rfind('\n');
+  if (line_offset == StringPiece::npos) {
+    line_offset = 0;
   }
+  return {line_no, ptr - start - line_offset};
+}
+
+StringPiece HloLexer::GetLine(LocTy loc) const {
+  if (!CanDereference(loc)) {
+    return "LINE OUT OF RANGE";
+  }
+  size_t line_start =
+      StringPieceFromPointers(buf_.begin(), loc + 1).rfind('\n');
+  const char* start = line_start == StringPiece::npos
+                          ? buf_.begin()
+                          : buf_.begin() + line_start + 1;
+  size_t line_end = StringPieceFromPointers(loc, buf_.end()).find('\n');
+  const char* end = line_end == StringPiece::npos ? buf_.end() : loc + line_end;
+
   return StringPieceFromPointers(start, end);
 }
 
@@ -428,14 +440,12 @@ string TokKindToString(TokKind kind) {
       return "kDxD";
     case TokKind::kPad:
       return "kPad";
+    case TokKind::kIdent:
+      return "kIdent";
     case TokKind::kString:
       return "kString";
     case TokKind::kShape:
       return "kShape";
-    case TokKind::kOpcode:
-      return "kOpcode";
-    case TokKind::kFusionKind:
-      return "kFusionKind";
     case TokKind::kInt:
       return "kInt";
     case TokKind::kDecimal:
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.h b/tensorflow/compiler/xla/tools/parser/hlo_lexer.h
index 5c9d1bf3912584040dc5260cc6730247d439fd60..27880b9b8afbfa58abfedc3b2cecd5236b78a6d6 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.h
+++ b/tensorflow/compiler/xla/tools/parser/hlo_lexer.h
@@ -18,9 +18,8 @@ limitations under the License.
 
 #include <string>
 
-#include "tensorflow/compiler/xla/service/hlo_instruction.h"
-#include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/tools/parser/hlo_token.h"
+#include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/logging.h"
@@ -48,6 +47,7 @@ class HloLexer {
       case TokKind::kDxD:
       case TokKind::kPad:
       case TokKind::kString:
+      case TokKind::kIdent:
         return str_val_;
       default:
         LOG(FATAL) << "This token does not have string value";
@@ -57,14 +57,6 @@ class HloLexer {
     CHECK(GetKind() == TokKind::kShape);
     return shape_val_;
   }
-  HloOpcode GetOpcodeVal() const {
-    CHECK(GetKind() == TokKind::kOpcode);
-    return opcode_val_;
-  }
-  HloInstruction::FusionKind GetFusionKindVal() const {
-    CHECK(GetKind() == TokKind::kFusionKind);
-    return fusion_kind_val_;
-  }
   int64 GetInt64Val() const {
     CHECK(GetKind() == TokKind::kInt);
     return int64_val_;
@@ -74,8 +66,16 @@ class HloLexer {
     return decimal_val_;
   }
 
-  // Returns the line of text that is currently being lexed.
-  tensorflow::StringPiece GetCurrentLine() const;
+  typedef const char* LocTy;
+
+  // Returns the location of the current token.
+  LocTy GetLoc() const { return token_start_; }
+
+  // Returns the line and column of a location in the buffer.
+  std::pair<unsigned, unsigned> GetLineAndColumn(LocTy location) const;
+
+  // Returns the whole line given the location.
+  tensorflow::StringPiece GetLine(LocTy loc) const;
 
  private:
   // Returns the current character. If it's neither the end of input buffer nor
@@ -114,10 +114,15 @@ class HloLexer {
   TokKind current_kind_;
   string str_val_;
   Shape shape_val_;
-  HloOpcode opcode_val_;
-  HloInstruction::FusionKind fusion_kind_val_;
   int64 int64_val_;
   double decimal_val_;
+
+  struct LineNoCacheTy {
+    const char* last_query;
+    unsigned line_no_of_query;
+  };
+  // This caches the line number of the previous query.
+  mutable LineNoCacheTy line_no_cache_{nullptr, 0};
 };
 
 }  // namespace tools
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
index 47979ec6f361789f29e8f7ff47793747330551fc..1c68e271e0f75d8facc36bd0878190f3db512972 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/tools/parser/hlo_parser.h"
 
 #include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
@@ -40,6 +41,8 @@ const double kF16max = 65504;
 // Parser for the HloModule::ToString() format text.
 class HloParser {
  public:
+  using LocTy = HloLexer::LocTy;
+
   explicit HloParser(StringPiece str, const HloModuleConfig& config)
       : lexer_(str), config_(config) {}
 
@@ -56,7 +59,7 @@ class HloParser {
   // ParseXXX returns false if an error occurred.
   bool ParseHloModule();
   bool ParseComputations();
-  bool ParseComputation();
+  bool ParseComputation(HloComputation** entry_computation);
   bool ParseInstructionList(HloComputation::Builder* builder,
                             string* root_name);
   bool ParseInstruction(HloComputation::Builder* builder, string* root_name);
@@ -65,6 +68,13 @@ class HloParser {
   bool ParseTupleLiteral(std::unique_ptr<Literal>* literal, const Shape& shape);
   bool ParseNonTupleLiteral(std::unique_ptr<Literal>* literal,
                             const Shape& shape);
+  bool ParseDenseLiteral(std::unique_ptr<Literal>* literal, const Shape& shape);
+  bool ParseSparseLiteral(std::unique_ptr<Literal>* literal,
+                          const Shape& shape);
+  template <typename LiteralNativeT>
+  bool ParseSparseLiteralHelper(std::unique_ptr<Literal>* literal,
+                                const Shape& shape);
+
   // Sets the sub-value of literal at the given index to the given value. The
   // literal's shape must have the default layout.
   bool SetValueInLiteral(int64 value, int64 linear_index, Literal* literal);
@@ -96,6 +106,7 @@ class HloParser {
     kString,
     kBracedInt64List,
     kHloComputation,
+    kFftType,
     kWindow,
     kConvolutionDimensionNumbers,
     kSharding,
@@ -104,6 +115,7 @@ class HloParser {
     kPaddingConfig,
     kMetadata,
     kFusionKind,
+    kDistribution,
   };
 
   struct AttrConfig {
@@ -167,20 +179,30 @@ class HloParser {
   bool ParseInt64List(const TokKind start, const TokKind end,
                       const TokKind delim, std::vector<int64>* result);
 
+  bool ParseParamListToShape(Shape* shape, LocTy* shape_loc);
   bool ParseParamList();
   bool ParseName(string* result);
   bool ParseAttributeName(string* result);
   bool ParseString(string* result);
   bool ParseShape(Shape* result);
   bool ParseOpcode(HloOpcode* result);
+  bool ParseFftType(FftType* result);
   bool ParseFusionKind(HloInstruction::FusionKind* result);
+  bool ParseRandomDistribution(RandomDistribution* result);
   bool ParseInt64(int64* result);
   bool ParseDouble(double* result);
   bool ParseBool(bool* result);
   bool ParseToken(TokKind kind, const string& msg);
 
+  // Returns true if the current token is the beginning of a shape.
+  bool CanBeShape();
+  // Returns true if the current token is the beginning of a
+  // param_list_to_shape.
+  bool CanBeParamListToShape();
+
   // Logs the current parsing line and the given message. Always returns false.
   bool TokenError(StringPiece msg);
+  bool Error(LocTy loc, StringPiece msg);
 
   // If the current token is 'kind', eats it (i.e. lexes the next token) and
   // returns true.
@@ -191,10 +213,12 @@ class HloParser {
 
   // Adds the instruction to the pool. Returns false and emits an error if the
   // instruction already exists.
-  bool AddInstruction(const string& name, HloInstruction* instruction);
+  bool AddInstruction(const string& name, HloInstruction* instruction,
+                      LocTy name_loc);
   // Adds the computation to the pool. Returns false and emits an error if the
   // computation already exists.
-  bool AddComputation(const string& name, HloComputation* computation);
+  bool AddComputation(const string& name, HloComputation* computation,
+                      LocTy name_loc);
 
   // The map from the instruction name to the instruction. This does not own the
   // instructions.
@@ -203,19 +227,30 @@ class HloParser {
 
   HloLexer lexer_;
   std::unique_ptr<HloModule> module_;
+  std::vector<std::unique_ptr<HloComputation>> computations_;
   const HloModuleConfig config_;
   std::vector<string> error_;
 };
 
-bool HloParser::TokenError(StringPiece msg) {
-  const string error =
-      StrCat("was parsing \"", lexer_.GetCurrentLine(), "\"; token ",
-             TokKindToString(lexer_.GetKind()), "; ", msg);
-  VLOG(1) << "TokenError: " << error;
-  error_.push_back(error);
+bool HloParser::Error(LocTy loc, StringPiece msg) {
+  auto line_col = lexer_.GetLineAndColumn(loc);
+  const unsigned line = line_col.first;
+  const unsigned col = line_col.second;
+  std::vector<string> error_lines;
+  error_lines.push_back(
+      StrCat("was parsing ", line, ":", col, ": error: ", msg));
+  error_lines.push_back(lexer_.GetLine(loc).ToString());
+  error_lines.push_back(col == 0 ? "" : StrCat(string(col - 1, ' '), "^"));
+
+  error_.push_back(tensorflow::str_util::Join(error_lines, "\n"));
+  VLOG(1) << "Error: " << error_.back();
   return false;
 }
 
+bool HloParser::TokenError(StringPiece msg) {
+  return Error(lexer_.GetLoc(), msg);
+}
+
 bool HloParser::Run() {
   lexer_.Lex();
   return ParseHloModule();
@@ -241,27 +276,67 @@ bool HloParser::ParseHloModule() {
 
 // computations ::= (computation)+
 bool HloParser::ParseComputations() {
+  HloComputation* entry_computation = nullptr;
   do {
-    if (!ParseComputation()) {
+    if (!ParseComputation(&entry_computation)) {
       return false;
     }
   } while (lexer_.GetKind() != TokKind::kEof);
+
+  for (int i = 0; i < computations_.size(); i++) {
+    // If entry_computation is not nullptr, it means the computation it pointed
+    // to is marked with "ENTRY"; otherwise, no computation is marked with
+    // "ENTRY", and we use the last computation as the entry computation. We
+    // add the non-entry computations as embedded computations to the module.
+    if ((entry_computation != nullptr &&
+         computations_[i].get() != entry_computation) ||
+        (entry_computation == nullptr && i != computations_.size() - 1)) {
+      module_->AddEmbeddedComputation(std::move(computations_[i]));
+      continue;
+    }
+    auto computation =
+        module_->AddEntryComputation(std::move(computations_[i]));
+    // The parameters and result layouts were set to default layout. Here we
+    // set the layouts to what the hlo text says.
+    for (int p = 0; p < computation->num_parameters(); p++) {
+      const Shape& param_shape = computation->parameter_instruction(p)->shape();
+      if (param_shape.has_layout()) {
+        module_->mutable_entry_computation_layout()
+            ->mutable_parameter_layout(p)
+            ->ResetLayout(param_shape.layout());
+      }
+    }
+    const Shape& result_shape = computation->root_instruction()->shape();
+    if (result_shape.has_layout()) {
+      module_->mutable_entry_computation_layout()
+          ->mutable_result_layout()
+          ->ResetLayout(result_shape.layout());
+    }
+  }
+
   return true;
 }
 
-// computation ::= ('ENTRY')? name param_list '->' shape instruction_list
-bool HloParser::ParseComputation() {
+// computation ::= ('ENTRY')? name (param_list_to_shape)? instruction_list
+bool HloParser::ParseComputation(HloComputation** entry_computation) {
+  LocTy maybe_entry_loc = lexer_.GetLoc();
   const bool is_entry_computation = EatIfPresent(TokKind::kw_ENTRY);
+
   string name;
+  LocTy name_loc = lexer_.GetLoc();
   if (!ParseName(&name)) {
     return false;
   }
   auto builder = MakeUnique<HloComputation::Builder>(name);
 
+  LocTy shape_loc = nullptr;
   Shape shape;
+  if (CanBeParamListToShape() && !ParseParamListToShape(&shape, &shape_loc)) {
+    return false;
+  }
+
   string root_name;
-  if (!ParseParamList() || !ParseToken(TokKind::kArrow, "expects '->'") ||
-      !ParseShape(&shape) || !ParseInstructionList(builder.get(), &root_name)) {
+  if (!ParseInstructionList(builder.get(), &root_name)) {
     return false;
   }
 
@@ -273,14 +348,37 @@ bool HloParser::ParseComputation() {
     LOG(FATAL) << "instruction " << root_name
                << " was marked as ROOT but the parser has not seen it before";
   }
+
   // Now root can be either an existing instruction or a nullptr. If it's a
   // nullptr, the implementation of Builder will set the last instruction as
   // root instruction.
-  HloComputation* computation =
-      is_entry_computation
-          ? module_->AddEntryComputation(builder->Build(root))
-          : module_->AddEmbeddedComputation(builder->Build(root));
-  return AddComputation(name, computation);
+  computations_.emplace_back(builder->Build(root));
+  HloComputation* computation = computations_.back().get();
+
+  if (!root) {
+    root = computation->root_instruction();
+  } else {
+    CHECK_EQ(root, computation->root_instruction());
+  }
+
+  // If param_list_to_shape was present, check compatibility.
+  if (shape_loc != nullptr && !ShapeUtil::Compatible(root->shape(), shape)) {
+    return Error(
+        shape_loc,
+        StrCat("Shape of computation ", name, ", ",
+               ShapeUtil::HumanString(shape),
+               ", is not compatible with that of its root instruction ",
+               root_name, ", ", ShapeUtil::HumanString(root->shape())));
+  }
+
+  if (is_entry_computation) {
+    if (*entry_computation != nullptr) {
+      return Error(maybe_entry_loc, "expects only one ENTRY");
+    }
+    *entry_computation = computation;
+  }
+
+  return AddComputation(name, computation, name_loc);
 }
 
 // instruction_list ::= '{' instruction_list1 '}'
@@ -307,13 +405,21 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
   Shape shape;
   HloOpcode opcode;
   std::vector<HloInstruction*> operands;
+
+  LocTy maybe_root_loc = lexer_.GetLoc();
   bool is_root = EatIfPresent(TokKind::kw_ROOT);
+
+  const LocTy name_loc = lexer_.GetLoc();
   if (!ParseName(&name) ||
       !ParseToken(TokKind::kEqual, "expects '=' in instruction") ||
       !ParseShape(&shape) || !ParseOpcode(&opcode)) {
     return false;
   }
+
   if (is_root) {
+    if (!root_name->empty()) {
+      return Error(maybe_root_loc, "one computation should have only one ROOT");
+    }
     *root_name = name;
   }
 
@@ -395,7 +501,6 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
     case HloOpcode::kLe:
     case HloOpcode::kLt:
     case HloOpcode::kNe:
-    case HloOpcode::kDot:
     case HloOpcode::kMaximum:
     case HloOpcode::kMinimum:
     case HloOpcode::kPower:
@@ -444,12 +549,11 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
       break;
     }
     case HloOpcode::kCrossReplicaSum: {
-      if (!ParseOperands(&operands, /*expected_size=*/1) ||
-          !ParseAttributes(attrs)) {
+      if (!ParseOperands(&operands) || !ParseAttributes(attrs)) {
         return false;
       }
       instruction = builder->AddInstruction(
-          HloInstruction::CreateCrossReplicaSum(shape, operands[0]));
+          HloInstruction::CreateCrossReplicaSum(shape, operands));
       break;
     }
     case HloOpcode::kReshape: {
@@ -590,6 +694,20 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
           shape, /*lhs=*/operands[0], /*rhs=*/operands[1], *window, *dnums));
       break;
     }
+    case HloOpcode::kFft: {
+      optional<FftType> fft_type;
+      optional<std::vector<int64>> fft_length;
+      attrs["fft_type"] = {/*required=*/true, AttrTy::kFftType, &fft_type};
+      attrs["fft_length"] = {/*required=*/true, AttrTy::kBracedInt64List,
+                             &fft_length};
+      if (!ParseOperands(&operands, /*expected_size=*/1) ||
+          !ParseAttributes(attrs)) {
+        return false;
+      }
+      instruction = builder->AddInstruction(HloInstruction::CreateFft(
+          shape, operands[0], *fft_type, *fft_length));
+      break;
+    }
     case HloOpcode::kBroadcast: {
       optional<std::vector<int64>> broadcast_dimensions;
       attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List,
@@ -816,15 +934,110 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
           shape, operands[0], config ? *config : ""));
       break;
     }
-    case HloOpcode::kConditional:
-    case HloOpcode::kCustomCall:
-    case HloOpcode::kReducePrecision:
-    case HloOpcode::kRng:
+    case HloOpcode::kRng: {
+      optional<RandomDistribution> distribution;
+      attrs["distribution"] = {/*required=*/true, AttrTy::kDistribution,
+                               &distribution};
+      if (!ParseOperands(&operands) || !ParseAttributes(attrs)) {
+        return false;
+      }
+      instruction = builder->AddInstruction(
+          HloInstruction::CreateRng(shape, *distribution, operands));
+      break;
+    }
+    case HloOpcode::kReducePrecision: {
+      optional<int64> exponent_bits;
+      optional<int64> mantissa_bits;
+      attrs["exponent_bits"] = {/*required=*/true, AttrTy::kInt64,
+                                &exponent_bits};
+      attrs["mantissa_bits"] = {/*required=*/true, AttrTy::kInt64,
+                                &mantissa_bits};
+      if (!ParseOperands(&operands, /*expected_size=*/1) ||
+          !ParseAttributes(attrs)) {
+        return false;
+      }
+      instruction =
+          builder->AddInstruction(HloInstruction::CreateReducePrecision(
+              shape, operands[0], static_cast<int>(*exponent_bits),
+              static_cast<int>(*mantissa_bits)));
+      break;
+    }
+    case HloOpcode::kConditional: {
+      optional<HloComputation*> true_computation;
+      optional<HloComputation*> false_computation;
+      attrs["true_computation"] = {/*required=*/true, AttrTy::kHloComputation,
+                                   &true_computation};
+      attrs["false_computation"] = {/*required=*/true, AttrTy::kHloComputation,
+                                    &false_computation};
+      if (!ParseOperands(&operands, /*expected_size=*/3) ||
+          !ParseAttributes(attrs)) {
+        return false;
+      }
+      instruction = builder->AddInstruction(HloInstruction::CreateConditional(
+          shape, /*pred=*/operands[0],
+          /*true_computation_arg=*/operands[1], *true_computation,
+          /*false_computation_arg=*/operands[2], *false_computation));
+      break;
+    }
+    case HloOpcode::kCustomCall: {
+      optional<string> custom_call_target;
+      attrs["custom_call_target"] = {/*required=*/true, AttrTy::kString,
+                                     &custom_call_target};
+      if (!ParseOperands(&operands) || !ParseAttributes(attrs)) {
+        return false;
+      }
+      instruction = builder->AddInstruction(HloInstruction::CreateCustomCall(
+          shape, operands, *custom_call_target));
+      break;
+    }
+    case HloOpcode::kDot: {
+      optional<std::vector<int64>> lhs_contracting_dims;
+      attrs["lhs_contracting_dims"] = {
+          /*required=*/false, AttrTy::kBracedInt64List, &lhs_contracting_dims};
+      optional<std::vector<int64>> rhs_contracting_dims;
+      attrs["rhs_contracting_dims"] = {
+          /*required=*/false, AttrTy::kBracedInt64List, &rhs_contracting_dims};
+      optional<std::vector<int64>> lhs_batch_dims;
+      attrs["lhs_batch_dims"] = {/*required=*/false, AttrTy::kBracedInt64List,
+                                 &lhs_batch_dims};
+      optional<std::vector<int64>> rhs_batch_dims;
+      attrs["rhs_batch_dims"] = {/*required=*/false, AttrTy::kBracedInt64List,
+                                 &rhs_batch_dims};
+
+      if (!ParseOperands(&operands, /*expected_size=*/2) ||
+          !ParseAttributes(attrs)) {
+        return false;
+      }
+
+      DotDimensionNumbers dnum;
+      if (lhs_contracting_dims) {
+        *dnum.mutable_lhs_contracting_dimensions() = {
+            lhs_contracting_dims->begin(), lhs_contracting_dims->end()};
+      }
+      if (rhs_contracting_dims) {
+        *dnum.mutable_rhs_contracting_dimensions() = {
+            rhs_contracting_dims->begin(), rhs_contracting_dims->end()};
+      }
+      if (lhs_batch_dims) {
+        *dnum.mutable_lhs_batch_dimensions() = {lhs_batch_dims->begin(),
+                                                lhs_batch_dims->end()};
+      }
+      if (rhs_batch_dims) {
+        *dnum.mutable_rhs_batch_dimensions() = {rhs_batch_dims->begin(),
+                                                rhs_batch_dims->end()};
+      }
+
+      instruction = builder->AddInstruction(
+          HloInstruction::CreateDot(shape, operands[0], operands[1], dnum));
+      break;
+    }
     case HloOpcode::kTrace:
       return TokenError(StrCat("parsing not yet implemented for op: ",
                                HloOpcodeString(opcode)));
   }
 
+  instruction->set_name(name);
+
   // Add common attrs (sharding, control predecessors) to the instruction, if
   // they were seen.
   if (sharding) {
@@ -835,15 +1048,15 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
     for (auto* pre : *predecessors) {
       Status status = pre->AddControlDependencyTo(instruction);
       if (!status.ok()) {
-        return TokenError(StrCat("error adding control dependency for: ", name,
-                                 " status: ", status.ToString()));
+        return Error(name_loc, StrCat("error adding control dependency for: ",
+                                      name, " status: ", status.ToString()));
       }
     }
   }
   if (metadata) {
     instruction->set_metadata(*metadata);
   }
-  return AddInstruction(name, instruction);
+  return AddInstruction(name, instruction, name_loc);
 }  // NOLINT(readability/fn_size)
 
 // ::= '{' (single_sharding | tuple_sharding) '}'
@@ -889,6 +1102,7 @@ bool HloParser::ParseSingleSharding(OpSharding* sharding,
     return false;
   }
 
+  LocTy loc = lexer_.GetLoc();
   bool maximal = false;
   bool replicated = false;
   std::vector<int64> devices;
@@ -956,34 +1170,35 @@ bool HloParser::ParseSingleSharding(OpSharding* sharding,
 
   if (replicated) {
     if (!devices.empty()) {
-      return TokenError(
-          "replicated shardings should not have any devices assigned");
+      return Error(loc,
+                   "replicated shardings should not have any devices assigned");
     }
     if (!ShapeUtil::Equal(tile_shape, Shape())) {
-      return TokenError(
-          "replicated shardings should not have any tile shape set");
+      return Error(loc,
+                   "replicated shardings should not have any tile shape set");
     }
     sharding->set_type(OpSharding::Type::OpSharding_Type_REPLICATED);
   } else if (maximal) {
     if (devices.size() != 1) {
-      return TokenError(
-          "maximal shardings should have exactly one device assigned");
+      return Error(loc,
+                   "maximal shardings should have exactly one device assigned");
     }
     if (!ShapeUtil::Equal(tile_shape, Shape())) {
-      return TokenError("maximal shardings should not have any tile shape set");
+      return Error(loc, "maximal shardings should not have any tile shape set");
     }
     sharding->set_type(OpSharding::Type::OpSharding_Type_MAXIMAL);
     sharding->add_tile_assignment_devices(devices[0]);
   } else {
     if (devices.size() <= 1) {
-      return TokenError(
-          "non-maximal shardings must have more than one device assigned");
+      return Error(
+          loc, "non-maximal shardings must have more than one device assigned");
     }
     if (ShapeUtil::Equal(tile_shape, Shape())) {
-      return TokenError("non-maximal shardings should have a tile shape set");
+      return Error(loc, "non-maximal shardings should have a tile shape set");
     }
     if (tile_assignment_dimensions.empty()) {
-      return TokenError(
+      return Error(
+          loc,
           "non-maximal shardings must have a tile assignment list including "
           "dimensions");
     }
@@ -1008,10 +1223,11 @@ bool HloParser::ParseInstructionNames(
                   "expects '{' at the beginning of instruction name list")) {
     return false;
   }
+  LocTy loc = lexer_.GetLoc();
   do {
     string name;
     if (!ParseName(&name)) {
-      return TokenError("expects a instruction name");
+      return Error(loc, "expects a instruction name");
     }
     HloInstruction* instr =
         tensorflow::gtl::FindPtrOrNull(instruction_pool_, name);
@@ -1023,7 +1239,7 @@ bool HloParser::ParseInstructionNames(
   } while (EatIfPresent(TokKind::kComma));
 
   return ParseToken(TokKind::kRbrace,
-                    "expects '}' at the end of control instructions");
+                    "expects '}' at the end of instruction name list");
 }
 
 bool HloParser::SetValueInLiteral(int64 value, int64 linear_index,
@@ -1058,6 +1274,8 @@ bool HloParser::SetValueInLiteral(double value, int64 linear_index,
   switch (shape.element_type()) {
     case F16:
       return SetValueInLiteralHelper<half>(value, linear_index, literal);
+    case BF16:
+      return SetValueInLiteralHelper<bfloat16>(value, linear_index, literal);
     case F32:
       return SetValueInLiteralHelper<float>(value, linear_index, literal);
     case F64:
@@ -1096,7 +1314,8 @@ bool HloParser::SetValueInLiteralHelper(ParsedElemT value, int64 linear_index,
        (std::numeric_limits<ParsedElemT>::infinity() == value ||
         -std::numeric_limits<ParsedElemT>::infinity() == value))) {
     // Skip range checking for non-finite value.
-  } else if (literal->shape().element_type() == F16) {
+  } else if (literal->shape().element_type() == F16 ||
+             literal->shape().element_type() == BF16) {
     if (value > kF16max || value < -kF16max) {
       return TokenError(StrCat(
           "value ", value, " is out of range for literal's primitive type ",
@@ -1112,7 +1331,7 @@ bool HloParser::SetValueInLiteralHelper(ParsedElemT value, int64 linear_index,
         PrimitiveType_Name(literal->shape().element_type())));
   }
 
-  literal->GetMutableArraySlice<LiteralNativeT>().at(linear_index) =
+  literal->data<LiteralNativeT>().at(linear_index) =
       static_cast<LiteralNativeT>(value);
   return true;
 }
@@ -1179,9 +1398,19 @@ bool HloParser::ParseTupleLiteral(std::unique_ptr<Literal>* literal,
 // non_tuple
 //   ::= rank01
 //   ::= rank2345
-// rank2345 ::= shape nested_array
+// rank2345 ::= shape sparse_or_nested_array
 bool HloParser::ParseNonTupleLiteral(std::unique_ptr<Literal>* literal,
                                      const Shape& shape) {
+  if (LayoutUtil::IsSparseArray(shape)) {
+    return ParseSparseLiteral(literal, shape);
+  }
+
+  CHECK(LayoutUtil::IsDenseArray(shape));
+  return ParseDenseLiteral(literal, shape);
+}
+
+bool HloParser::ParseDenseLiteral(std::unique_ptr<Literal>* literal,
+                                  const Shape& shape) {
   const int64 rank = ShapeUtil::Rank(shape);
   if (rank > 1 && !EatShapeAndCheckCompatible(shape)) {
     return false;
@@ -1282,26 +1511,28 @@ bool HloParser::ParseNonTupleLiteral(std::unique_ptr<Literal>* literal,
           }
           lexer_.Lex();
         } else if (primitive_util::IsIntegralType(shape.element_type())) {
+          LocTy loc = lexer_.GetLoc();
           int64 value;
           if (!ParseInt64(&value)) {
-            return TokenError(StrCat("expects integer for primitive type: ",
+            return Error(loc, StrCat("expects integer for primitive type: ",
                                      PrimitiveType_Name(shape.element_type())));
           }
           if (!SetValueInLiteral(value, linear_index++, literal->get())) {
             return false;
           }
         } else if (primitive_util::IsFloatingPointType(shape.element_type())) {
+          LocTy loc = lexer_.GetLoc();
           double value;
           if (!ParseDouble(&value)) {
-            return TokenError(
-                StrCat("expect floating point value for primitive type: ",
-                       PrimitiveType_Name(shape.element_type())));
+            return Error(
+                loc, StrCat("expect floating point value for primitive type: ",
+                            PrimitiveType_Name(shape.element_type())));
           }
           if (!SetValueInLiteral(value, linear_index++, literal->get())) {
             return false;
           }
         } else {
-          return TokenError(StrCat("unsupported premitive type ",
+          return TokenError(StrCat("unsupported primitive type ",
                                    PrimitiveType_Name(shape.element_type())));
         }
         break;
@@ -1313,11 +1544,147 @@ bool HloParser::ParseNonTupleLiteral(std::unique_ptr<Literal>* literal,
   return true;
 }
 
+bool HloParser::ParseSparseLiteral(std::unique_ptr<Literal>* literal,
+                                   const Shape& shape) {
+  if (!EatShapeAndCheckCompatible(shape)) {
+    return false;
+  }
+
+  switch (shape.element_type()) {
+    case PRED:
+      return ParseSparseLiteralHelper<uint8>(literal, shape);
+    case S8:
+      return ParseSparseLiteralHelper<int8>(literal, shape);
+    case S16:
+      return ParseSparseLiteralHelper<int16>(literal, shape);
+    case S32:
+      return ParseSparseLiteralHelper<int32>(literal, shape);
+    case S64:
+      return ParseSparseLiteralHelper<int64>(literal, shape);
+    case U8:
+      return ParseSparseLiteralHelper<uint8>(literal, shape);
+    case U16:
+      return ParseSparseLiteralHelper<uint16>(literal, shape);
+    case U32:
+      return ParseSparseLiteralHelper<uint32>(literal, shape);
+    case U64:
+      return ParseSparseLiteralHelper<uint64>(literal, shape);
+    case F16:
+      return ParseSparseLiteralHelper<half>(literal, shape);
+    case F32:
+      return ParseSparseLiteralHelper<float>(literal, shape);
+    case BF16:
+      return ParseSparseLiteralHelper<bfloat16>(literal, shape);
+    case F64:
+      return ParseSparseLiteralHelper<double>(literal, shape);
+    default:
+      return Error(lexer_.GetLoc(),
+                   StrCat("invalid primitive type for sparse literal: ",
+                          PrimitiveType_Name(shape.element_type())));
+  }
+}
+
+template <typename LiteralNativeT>
+bool HloParser::ParseSparseLiteralHelper(std::unique_ptr<Literal>* literal,
+                                         const Shape& shape) {
+  std::vector<int64> index;
+
+  int64 rank = ShapeUtil::Rank(shape);
+
+  *literal = MakeUnique<Literal>(shape);
+
+  if (!ParseToken(TokKind::kLbrace,
+                  "expects '{' at the beginning of a sparse literal")) {
+    return false;
+  }
+
+  for (;;) {
+    if (lexer_.GetKind() == TokKind::kRbrace) {
+      lexer_.Lex();
+      break;
+    }
+
+    LocTy index_loc = lexer_.GetLoc();
+    index.clear();
+    if (lexer_.GetKind() == TokKind::kInt) {
+      int64 single_index = lexer_.GetInt64Val();
+      lexer_.Lex();
+      if (rank != 1) {
+        return Error(
+            index_loc,
+            StrCat("invalid single-dimensional index for shape with rank ",
+                   rank, ": ", single_index));
+      }
+      index.push_back(single_index);
+    } else {
+      if (!ParseInt64List(TokKind::kLsquare, TokKind::kRsquare, TokKind::kComma,
+                          &index)) {
+        return false;
+      }
+      if (index.size() != rank) {
+        return Error(
+            index_loc,
+            StrCat("invalid multi-dimension index for shape with rank ", rank,
+                   ": [", tensorflow::str_util::Join(index, ", "), "]"));
+      }
+    }
+    if (!ParseToken(TokKind::kColon,
+                    "expects ':' after after the sparse array index and before "
+                    "the sparse array value")) {
+      return false;
+    }
+    LocTy value_loc = lexer_.GetLoc();
+    LiteralNativeT value;
+    if (lexer_.GetKind() == TokKind::kw_true ||
+        lexer_.GetKind() == TokKind::kw_false) {
+      value = static_cast<LiteralNativeT>(lexer_.GetKind() == TokKind::kw_true);
+      lexer_.Lex();
+    } else if (primitive_util::IsIntegralType(shape.element_type())) {
+      int64 value_s64;
+      if (!ParseInt64(&value_s64)) {
+        return Error(value_loc,
+                     StrCat("expects integer for primitive type: ",
+                            PrimitiveType_Name(shape.element_type())));
+      }
+      value = static_cast<LiteralNativeT>(value_s64);
+    } else if (primitive_util::IsFloatingPointType(shape.element_type())) {
+      double value_f64;
+      if (!ParseDouble(&value_f64)) {
+        return Error(value_loc,
+                     StrCat("expects floating point value for primitive type: ",
+                            PrimitiveType_Name(shape.element_type())));
+      }
+      value = static_cast<LiteralNativeT>(value_f64);
+    } else {
+      LOG(FATAL) << "Unexpected element type: "
+                 << PrimitiveType_Name(shape.element_type());
+    }
+    if (lexer_.GetKind() != TokKind::kRbrace &&
+        !ParseToken(TokKind::kComma,
+                    "expects ',' separator between sparse array elements")) {
+      return false;
+    }
+
+    if ((*literal)->sparse_element_count() + 1 ==
+        LayoutUtil::MaxSparseElements(shape.layout())) {
+      return Error(
+          lexer_.GetLoc(),
+          StrCat("number of sparse elements exceeds maximum for layout: ",
+                 ShapeUtil::HumanStringWithLayout(shape)));
+    }
+
+    (*literal)->AppendSparseElement(index, value);
+  }
+
+  (*literal)->SortSparseElements();
+  return true;
+}
+
 // operands ::= '(' operands1 ')'
 // operands1
 //   ::= /*empty*/
 //   ::= operand (, operand)*
-// operand ::= shape name
+// operand ::= (shape)? name
 bool HloParser::ParseOperands(std::vector<HloInstruction*>* operands) {
   if (!ParseToken(TokKind::kLparen,
                   "expects '(' at the beginning of operands")) {
@@ -1327,15 +1694,21 @@ bool HloParser::ParseOperands(std::vector<HloInstruction*>* operands) {
     // empty
   } else {
     do {
-      Shape shape;
+      LocTy loc = lexer_.GetLoc();
       string name;
-      if (!ParseShape(&shape) || !ParseName(&name)) {
+      if (CanBeShape()) {
+        Shape shape;
+        if (!ParseShape(&shape)) {
+          return false;
+        }
+      }
+      if (!ParseName(&name)) {
         return false;
       }
       HloInstruction* instruction =
           tensorflow::gtl::FindPtrOrNull(instruction_pool_, name);
       if (!instruction) {
-        return TokenError(StrCat("instruction does not exist: ", name));
+        return Error(loc, StrCat("instruction does not exist: ", name));
       }
       operands->push_back(instruction);
     } while (EatIfPresent(TokKind::kComma));
@@ -1345,11 +1718,12 @@ bool HloParser::ParseOperands(std::vector<HloInstruction*>* operands) {
 
 bool HloParser::ParseOperands(std::vector<HloInstruction*>* operands,
                               const int expected_size) {
+  LocTy loc = lexer_.GetLoc();
   if (!ParseOperands(operands)) {
     return false;
   }
   if (expected_size != operands->size()) {
-    return TokenError(StrCat("expects ", expected_size, " operands, but has ",
+    return Error(loc, StrCat("expects ", expected_size, " operands, but has ",
                              operands->size(), " operands"));
   }
   return true;
@@ -1358,6 +1732,7 @@ bool HloParser::ParseOperands(std::vector<HloInstruction*>* operands,
 // sub_attributes ::= '{' (','? attribute)* '}'
 bool HloParser::ParseSubAttributes(
     const std::unordered_map<string, AttrConfig>& attrs) {
+  LocTy loc = lexer_.GetLoc();
   if (!ParseToken(TokKind::kLbrace, "expects '{' to start sub attributes")) {
     return false;
   }
@@ -1376,7 +1751,7 @@ bool HloParser::ParseSubAttributes(
   for (const auto& attr_it : attrs) {
     if (attr_it.second.required &&
         seen_attrs.find(attr_it.first) == seen_attrs.end()) {
-      return TokenError(Printf("sub-attribute %s is expected but not seen",
+      return Error(loc, Printf("sub-attribute %s is expected but not seen",
                                attr_it.first.c_str()));
     }
   }
@@ -1386,6 +1761,7 @@ bool HloParser::ParseSubAttributes(
 // attributes ::= (',' attribute)*
 bool HloParser::ParseAttributes(
     const std::unordered_map<string, AttrConfig>& attrs) {
+  LocTy loc = lexer_.GetLoc();
   std::unordered_set<string> seen_attrs;
   while (EatIfPresent(TokKind::kComma)) {
     if (!ParseAttributeHelper(attrs, &seen_attrs)) {
@@ -1396,7 +1772,7 @@ bool HloParser::ParseAttributes(
   for (const auto& attr_it : attrs) {
     if (attr_it.second.required &&
         seen_attrs.find(attr_it.first) == seen_attrs.end()) {
-      return TokenError(Printf("attribute %s is expected but not seen",
+      return Error(loc, Printf("attribute %s is expected but not seen",
                                attr_it.first.c_str()));
     }
   }
@@ -1406,21 +1782,23 @@ bool HloParser::ParseAttributes(
 bool HloParser::ParseAttributeHelper(
     const std::unordered_map<string, AttrConfig>& attrs,
     std::unordered_set<string>* seen_attrs) {
+  LocTy loc = lexer_.GetLoc();
   string name;
   if (!ParseAttributeName(&name)) {
-    return TokenError("error parsing attributes");
+    return Error(loc, "error parsing attributes");
   }
   VLOG(1) << "Parsing attribute " << name;
   if (!seen_attrs->insert(name).second) {
-    return TokenError(Printf("attribute %s already exists", name.c_str()));
+    return Error(loc, Printf("attribute %s already exists", name.c_str()));
   }
   auto attr_it = attrs.find(name);
   if (attr_it == attrs.end()) {
-    return TokenError(Printf("unexpected attribute %s", name.c_str()));
+    return Error(loc, Printf("unexpected attribute %s", name.c_str()));
   }
   AttrTy attr_type = attr_it->second.attr_type;
   void* attr_out_ptr = attr_it->second.result;
   bool success = [&] {
+    LocTy attr_loc = lexer_.GetLoc();
     switch (attr_type) {
       case AttrTy::kInt64: {
         int64 result;
@@ -1436,7 +1814,7 @@ bool HloParser::ParseAttributeHelper(
           return false;
         }
         if (result != static_cast<int32>(result)) {
-          return TokenError("value out of range for int32");
+          return Error(attr_loc, "value out of range for int32");
         }
         static_cast<optional<int32>*>(attr_out_ptr)
             ->emplace(static_cast<int32>(result));
@@ -1449,7 +1827,7 @@ bool HloParser::ParseAttributeHelper(
         }
         if (result > std::numeric_limits<float>::max() ||
             result < std::numeric_limits<float>::lowest()) {
-          return TokenError("value out of range for float");
+          return Error(attr_loc, "value out of range for float");
         }
         static_cast<optional<float>*>(attr_out_ptr)
             ->emplace(static_cast<float>(result));
@@ -1463,6 +1841,14 @@ bool HloParser::ParseAttributeHelper(
         static_cast<optional<HloComputation*>*>(attr_out_ptr)->emplace(result);
         return true;
       }
+      case AttrTy::kFftType: {
+        FftType result;
+        if (!ParseFftType(&result)) {
+          return false;
+        }
+        static_cast<optional<FftType>*>(attr_out_ptr)->emplace(result);
+        return true;
+      }
       case AttrTy::kWindow: {
         Window result;
         if (!ParseWindow(&result)) {
@@ -1548,22 +1934,32 @@ bool HloParser::ParseAttributeHelper(
         static_cast<optional<OpMetadata>*>(attr_out_ptr)->emplace(result);
         return true;
       }
+      case AttrTy::kDistribution: {
+        RandomDistribution result;
+        if (!ParseRandomDistribution(&result)) {
+          return false;
+        }
+        static_cast<optional<RandomDistribution>*>(attr_out_ptr)
+            ->emplace(result);
+        return true;
+      }
     }
   }();
   if (!success) {
-    return TokenError(Printf("error parsing attribute %s", name.c_str()));
+    return Error(loc, Printf("error parsing attribute %s", name.c_str()));
   }
   return true;
 }
 
 bool HloParser::ParseComputationName(HloComputation** value) {
   string name;
+  LocTy loc = lexer_.GetLoc();
   if (!ParseName(&name)) {
-    return TokenError("expects computation name");
+    return Error(loc, "expects computation name");
   }
   *value = tensorflow::gtl::FindPtrOrNull(computation_pool_, name);
   if (*value == nullptr) {
-    return TokenError(StrCat("computation does not exist: ", name));
+    return Error(loc, StrCat("computation does not exist: ", name));
   }
   return true;
 }
@@ -1572,6 +1968,7 @@ bool HloParser::ParseComputationName(HloComputation** value) {
 // The subattributes can appear in any order. 'size=' is required, others are
 // optional.
 bool HloParser::ParseWindow(Window* window) {
+  LocTy loc = lexer_.GetLoc();
   if (!ParseToken(TokKind::kLbrace, "expected '{' to start window attribute")) {
     return false;
   }
@@ -1581,10 +1978,12 @@ bool HloParser::ParseWindow(Window* window) {
   std::vector<std::vector<int64>> pad;
   std::vector<int64> lhs_dilate;
   std::vector<int64> rhs_dilate;
+  std::vector<int64> rhs_reversal;
   while (lexer_.GetKind() != TokKind::kRbrace) {
+    LocTy attr_loc = lexer_.GetLoc();
     string field_name;
     if (!ParseAttributeName(&field_name)) {
-      return TokenError("expects sub-attributes in window");
+      return Error(attr_loc, "expects sub-attributes in window");
     }
     bool ok = [&] {
       if (field_name == "size") {
@@ -1602,7 +2001,10 @@ bool HloParser::ParseWindow(Window* window) {
       if (field_name == "pad") {
         return ParseWindowPad(&pad);
       }
-      return TokenError(StrCat("unexpected attribute name: ", field_name));
+      if (field_name == "rhs_reversal") {
+        return ParseDxD("rhs_reversal", &rhs_reversal);
+      }
+      return Error(attr_loc, StrCat("unexpected attribute name: ", field_name));
     }();
     if (!ok) {
       return false;
@@ -1610,20 +2012,20 @@ bool HloParser::ParseWindow(Window* window) {
   }
 
   if (size.empty()) {
-    return TokenError(
-        "sub-attribute 'size=' is required in the window attribute");
+    return Error(loc,
+                 "sub-attribute 'size=' is required in the window attribute");
   }
   if (!stride.empty() && stride.size() != size.size()) {
-    return TokenError("expects 'stride=' has the same size as 'size='");
+    return Error(loc, "expects 'stride=' has the same size as 'size='");
   }
   if (!lhs_dilate.empty() && lhs_dilate.size() != size.size()) {
-    return TokenError("expects 'lhs_dilate=' has the same size as 'size='");
+    return Error(loc, "expects 'lhs_dilate=' has the same size as 'size='");
   }
   if (!rhs_dilate.empty() && rhs_dilate.size() != size.size()) {
-    return TokenError("expects 'rhs_dilate=' has the same size as 'size='");
+    return Error(loc, "expects 'rhs_dilate=' has the same size as 'size='");
   }
   if (!pad.empty() && pad.size() != size.size()) {
-    return TokenError("expects 'pad=' has the same size as 'size='");
+    return Error(loc, "expects 'pad=' has the same size as 'size='");
   }
 
   for (int i = 0; i < size.size(); i++) {
@@ -1638,6 +2040,8 @@ bool HloParser::ParseWindow(Window* window) {
         lhs_dilate.empty() ? 1 : lhs_dilate[i]);
     window->mutable_dimensions(i)->set_window_dilation(
         rhs_dilate.empty() ? 1 : rhs_dilate[i]);
+    window->mutable_dimensions(i)->set_window_reversal(
+        rhs_reversal.empty() ? false : (rhs_reversal[i] == 1));
   }
   return ParseToken(TokKind::kRbrace, "expected '}' to end window attribute");
 }
@@ -1783,20 +2187,19 @@ bool HloParser::ParseSliceRanges(SliceRanges* result) {
     return ParseToken(TokKind::kRbrace, "expects '}' to end ranges");
   }
   do {
+    LocTy loc = lexer_.GetLoc();
     ranges.emplace_back();
     if (!ParseInt64List(TokKind::kLsquare, TokKind::kRsquare, TokKind::kColon,
                         &ranges.back())) {
       return false;
     }
-  } while (EatIfPresent(TokKind::kComma));
-
-  for (const auto& range : ranges) {
+    const auto& range = ranges.back();
     if (range.size() != 2 && range.size() != 3) {
-      return TokenError(Printf(
-          "expects [start:limit:step] or [start:limit], but sees %ld elements.",
-          range.size()));
+      return Error(loc, Printf("expects [start:limit:step] or [start:limit], "
+                               "but sees %ld elements.",
+                               range.size()));
     }
-  }
+  } while (EatIfPresent(TokKind::kComma));
 
   for (const auto& range : ranges) {
     result->starts.push_back(range[0]);
@@ -1832,6 +2235,19 @@ bool HloParser::ParseInt64List(const TokKind start, const TokKind end,
       end, StrCat("expects an int64 list to end with ", TokKindToString(end)));
 }
 
+// param_list_to_shape ::= param_list '->' shape
+bool HloParser::ParseParamListToShape(Shape* shape, LocTy* shape_loc) {
+  if (!ParseParamList() || !ParseToken(TokKind::kArrow, "expects '->'")) {
+    return false;
+  }
+  *shape_loc = lexer_.GetLoc();
+  return ParseShape(shape);
+}
+
+bool HloParser::CanBeParamListToShape() {
+  return lexer_.GetKind() == TokKind::kLparen;
+}
+
 // param_list ::= '(' param_list1 ')'
 // param_list1
 //   ::= /*empty*/
@@ -1848,8 +2264,8 @@ bool HloParser::ParseParamList() {
   } else {
     do {
       Shape shape;
-      if (!ParseToken(TokKind::kName, "expects name in parameter") ||
-          !ParseShape(&shape)) {
+      string name;
+      if (!ParseName(&name) || !ParseShape(&shape)) {
         return false;
       }
     } while (EatIfPresent(TokKind::kComma));
@@ -1888,9 +2304,17 @@ bool HloParser::ParseShape(Shape* result) {
   return true;
 }
 
+bool HloParser::CanBeShape() {
+  // A non-tuple shape starts with a kShape token; a tuple shape starts with
+  // '('.
+  return lexer_.GetKind() == TokKind::kShape ||
+         lexer_.GetKind() == TokKind::kLparen;
+}
+
 bool HloParser::ParseName(string* result) {
   VLOG(1) << "ParseName";
-  if (lexer_.GetKind() != TokKind::kName) {
+  if (lexer_.GetKind() != TokKind::kIdent &&
+      lexer_.GetKind() != TokKind::kName) {
     return TokenError("expects name");
   }
   *result = lexer_.GetStrVal();
@@ -1918,15 +2342,16 @@ bool HloParser::ParseString(string* result) {
 }
 
 bool HloParser::ParseDxD(const string& name, std::vector<int64>* result) {
+  LocTy loc = lexer_.GetLoc();
   if (!result->empty()) {
-    return TokenError(
-        Printf("sub-attribute '%s=' already exists", name.c_str()));
+    return Error(loc,
+                 Printf("sub-attribute '%s=' already exists", name.c_str()));
   }
   // 1D
   if (lexer_.GetKind() == TokKind::kInt) {
     int64 number;
     if (!ParseInt64(&number)) {
-      return TokenError(Printf("expects sub-attribute '%s=i'", name.c_str()));
+      return Error(loc, Printf("expects sub-attribute '%s=i'", name.c_str()));
     }
     result->push_back(number);
     return true;
@@ -1935,8 +2360,8 @@ bool HloParser::ParseDxD(const string& name, std::vector<int64>* result) {
   if (lexer_.GetKind() == TokKind::kDxD) {
     string str = lexer_.GetStrVal();
     if (!SplitAndParseAsInts(str, 'x', result)) {
-      return TokenError(
-          Printf("expects sub-attribute '%s=ixj...'", name.c_str()));
+      return Error(loc,
+                   Printf("expects sub-attribute '%s=ixj...'", name.c_str()));
     }
     lexer_.Lex();
     return true;
@@ -1945,8 +2370,9 @@ bool HloParser::ParseDxD(const string& name, std::vector<int64>* result) {
 }
 
 bool HloParser::ParseWindowPad(std::vector<std::vector<int64>>* pad) {
+  LocTy loc = lexer_.GetLoc();
   if (!pad->empty()) {
-    return TokenError("sub-attribute 'pad=' already exists");
+    return Error(loc, "sub-attribute 'pad=' already exists");
   }
   if (lexer_.GetKind() != TokKind::kPad) {
     return TokenError("expects window pad pattern, e.g., '0_0x3_3'");
@@ -1957,8 +2383,8 @@ bool HloParser::ParseWindowPad(std::vector<std::vector<int64>>* pad) {
     std::vector<int64> low_high;
     if (!SplitAndParseAsInts(padding_str[i], '_', &low_high) ||
         low_high.size() != 2) {
-      return TokenError(
-          "expects padding_low and padding_high separated by '_'");
+      return Error(loc,
+                   "expects padding_low and padding_high separated by '_'");
     }
     pad->push_back(low_high);
   }
@@ -1974,15 +2400,16 @@ bool HloParser::ParsePaddingConfig(PaddingConfig* padding) {
   if (lexer_.GetKind() != TokKind::kPad) {
     return TokenError("expects padding config, e.g., '0_0_0x3_3_1'");
   }
+  LocTy loc = lexer_.GetLoc();
   string str = lexer_.GetStrVal();
   std::vector<string> padding_str = Split(str, 'x');
   for (const auto& padding_dim_str : padding_str) {
     std::vector<int64> padding_dim;
     if (!SplitAndParseAsInts(padding_dim_str, '_', &padding_dim) ||
         (padding_dim.size() != 2 && padding_dim.size() != 3)) {
-      return TokenError(
-          "expects padding config pattern like 'low_high_interior' or "
-          "'low_high'");
+      return Error(loc,
+                   "expects padding config pattern like 'low_high_interior' or "
+                   "'low_high'");
     }
     auto* dim = padding->add_dimensions();
     dim->set_edge_padding_low(padding_dim[0]);
@@ -2024,20 +2451,64 @@ bool HloParser::ParseMetadata(OpMetadata* metadata) {
 
 bool HloParser::ParseOpcode(HloOpcode* result) {
   VLOG(1) << "ParseOpcode";
-  if (lexer_.GetKind() != TokKind::kOpcode) {
+  if (lexer_.GetKind() != TokKind::kIdent) {
     return TokenError("expects opcode");
   }
-  *result = lexer_.GetOpcodeVal();
+  string val = lexer_.GetStrVal();
+  auto status_or_result = StringToHloOpcode(val);
+  if (!status_or_result.ok()) {
+    return TokenError(
+        Printf("expects opcode but sees: %s, error: %s", val.c_str(),
+               status_or_result.status().error_message().c_str()));
+  }
+  *result = status_or_result.ValueOrDie();
+  lexer_.Lex();
+  return true;
+}
+
+bool HloParser::ParseFftType(FftType* result) {
+  VLOG(1) << "ParseFftType";
+  if (lexer_.GetKind() != TokKind::kIdent) {
+    return TokenError("expects fft type");
+  }
+  string val = lexer_.GetStrVal();
+  if (!FftType_Parse(val, result) || !FftType_IsValid(*result)) {
+    return TokenError(Printf("expects fft type but sees: %s", val.c_str()));
+  }
   lexer_.Lex();
   return true;
 }
 
 bool HloParser::ParseFusionKind(HloInstruction::FusionKind* result) {
   VLOG(1) << "ParseFusionKind";
-  if (lexer_.GetKind() != TokKind::kFusionKind) {
+  if (lexer_.GetKind() != TokKind::kIdent) {
     return TokenError("expects fusion kind");
   }
-  *result = lexer_.GetFusionKindVal();
+  string val = lexer_.GetStrVal();
+  auto status_or_result = StringToFusionKind(val);
+  if (!status_or_result.ok()) {
+    return TokenError(
+        Printf("expects fusion kind but sees: %s, error: %s", val.c_str(),
+               status_or_result.status().error_message().c_str()));
+  }
+  *result = status_or_result.ValueOrDie();
+  lexer_.Lex();
+  return true;
+}
+
+bool HloParser::ParseRandomDistribution(RandomDistribution* result) {
+  VLOG(1) << "ParseRandomDistribution";
+  if (lexer_.GetKind() != TokKind::kIdent) {
+    return TokenError("expects random distribution");
+  }
+  string val = lexer_.GetStrVal();
+  auto status_or_result = StringToRandomDistribution(val);
+  if (!status_or_result.ok()) {
+    return TokenError(
+        Printf("expects random distribution but sees: %s, error: %s",
+               val.c_str(), status_or_result.status().error_message().c_str()));
+  }
+  *result = status_or_result.ValueOrDie();
   lexer_.Lex();
   return true;
 }
@@ -2103,20 +2574,20 @@ bool HloParser::EatIfPresent(TokKind kind) {
   return true;
 }
 
-bool HloParser::AddInstruction(const string& name,
-                               HloInstruction* instruction) {
+bool HloParser::AddInstruction(const string& name, HloInstruction* instruction,
+                               LocTy name_loc) {
   auto result = instruction_pool_.insert({name, instruction});
   if (!result.second) {
-    return TokenError(StrCat("instruction already exists: ", name));
+    return Error(name_loc, StrCat("instruction already exists: ", name));
   }
   return true;
 }
 
-bool HloParser::AddComputation(const string& name,
-                               HloComputation* computation) {
+bool HloParser::AddComputation(const string& name, HloComputation* computation,
+                               LocTy name_loc) {
   auto result = computation_pool_.insert({name, computation});
   if (!result.second) {
-    return TokenError(StrCat("computation already exists: ", name));
+    return Error(name_loc, StrCat("computation already exists: ", name));
   }
   return true;
 }
@@ -2127,7 +2598,7 @@ StatusOr<std::unique_ptr<HloModule>> Parse(StringPiece str,
                                            const HloModuleConfig& config) {
   HloParser parser(str, config);
   if (!parser.Run()) {
-    return InvalidArgument("Syntax error: %s", parser.GetError().c_str());
+    return InvalidArgument("Syntax error:\n%s", parser.GetError().c_str());
   }
   return parser.ConsumeHloModule();
 }
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
index 90cdb87a1ebcf59d291eebd52963a130f19f4403..dd76d8d0fee7cdfa22829fe92ff889e44157216e 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc
@@ -25,7 +25,6 @@ namespace tools {
 namespace {
 
 using tensorflow::StringPiece;
-using tensorflow::strings::StrCat;
 
 struct TestData {
   string test_name;
@@ -46,7 +45,7 @@ std::vector<TestData> CreateTestCases() {
 // ax + y
 {
 "AxpyParam",
-R"(HloModule axpy_module:
+R"(HloModule axpy_module
 
 ENTRY %axpy.v5 (alpha: f32[], x: f32[2,4], y: f32[2,4]) -> f32[2,4] {
   %alpha = f32[] parameter(0)
@@ -62,7 +61,7 @@ ENTRY %axpy.v5 (alpha: f32[], x: f32[2,4], y: f32[2,4]) -> f32[2,4] {
 // pred constant
 {
 "ConstantPred",
-R"(HloModule constant_pred_module:
+R"(HloModule constant_pred_module
 
 ENTRY %constant_pred () -> pred[] {
   ROOT %constant = pred[] constant(true), metadata={op_type="const" op_name="\"it\'s not a problem\n" source_file="path/to/test.cc" source_line=68}
@@ -73,7 +72,7 @@ ENTRY %constant_pred () -> pred[] {
 // s32 constant
 {
 "ConstantS32",
-R"(HloModule constant_s32_module:
+R"(HloModule constant_s32_module
 
 ENTRY %constant_s32 () -> s32[] {
   ROOT %constant = s32[] constant(-42)
@@ -84,7 +83,7 @@ ENTRY %constant_s32 () -> s32[] {
 // f32 constant, but the value is not a decimal
 {
 "ConstantF32",
-R"(HloModule ConstantF32_module:
+R"(HloModule ConstantF32_module
 
 ENTRY %ConstantF32.v4 () -> f32[] {
   ROOT %constant = f32[] constant(42)
@@ -95,7 +94,7 @@ ENTRY %ConstantF32.v4 () -> f32[] {
 // f32 constant, rank 1 empty array.
 {
 "ConstantF32R1Empty",
-R"(HloModule ConstantF32Empty_module:
+R"(HloModule ConstantF32Empty_module
 
 ENTRY %ConstantF32Empty.v4 () -> f32[0] {
   ROOT %constant = f32[0]{0} constant({})
@@ -106,7 +105,7 @@ ENTRY %ConstantF32Empty.v4 () -> f32[0] {
 // f32 constant, rank 4 empty array.
 {
 "ConstantF32R4Empty",
-R"(HloModule ConstantF32R4Empty_module:
+R"(HloModule ConstantF32R4Empty_module
 
 ENTRY %ConstantF32R4Empty.v4 () -> f32[2,0,4,3] {
   ROOT %constant = f32[2,0,4,3]{3,2,1,0} constant(f32[2,0,4,3] { { /*i0=0*/ }, { /*i0=1*/ } })
@@ -117,7 +116,7 @@ ENTRY %ConstantF32R4Empty.v4 () -> f32[2,0,4,3] {
 // constant 4D
 {
 "Constant4D",
-R"(HloModule Small_3x2x1x1_module:
+R"(HloModule Small_3x2x1x1_module
 
 ENTRY %Small_3x2x1x1.v1 () -> f32[3,2,1,1] {
   ROOT %constant = f32[3,2,1,1]{3,2,1,0} constant(f32[3,2,1,1] { { /*i0=0*/ { /*i1=0*/ {-1} }, { /*i1=1*/ {4.1} } }, { /*i0=1*/ { /*i1=0*/ {2} }, { /*i1=1*/ {4.1} } }, { /*i0=2*/ { /*i1=0*/ {5} }, { /*i1=1*/ {4.4} } } })
@@ -128,7 +127,7 @@ ENTRY %Small_3x2x1x1.v1 () -> f32[3,2,1,1] {
 // non-finite constants: nan, inf, -inf
 {
 "ConstantNonFinite",
-R"(HloModule IsFiniteR1F32s_module:
+R"(HloModule IsFiniteR1F32s_module
 
 ENTRY %IsFiniteR1F32s.v2 () -> pred[6] {
   %constant = f32[6]{0} constant({nan, 7, nan, -1, inf, -inf})
@@ -140,18 +139,29 @@ ENTRY %IsFiniteR1F32s.v2 () -> pred[6] {
 // constant f16
 {
 "ConstantF16",
-R"(HloModule ConstantF16_module:
+R"(HloModule ConstantF16_module
 
 ENTRY %ConstantF16.v4 () -> f16[] {
   ROOT %constant = f16[] constant(500)
 }
 
+)"
+},
+// bf16
+{
+"BF16",
+R"(HloModule BF16
+
+ENTRY %BF16.v4 () -> bf16[] {
+  ROOT %constant = bf16[] constant(500)
+}
+
 )"
 },
 // constant + constant
 {
 "AddConstants",
-R"(HloModule add_constants_module:
+R"(HloModule add_constants_module
 
 ENTRY %add_constants () -> f32[] {
   %constant = f32[] constant(3.14)
@@ -163,7 +173,7 @@ ENTRY %add_constants () -> f32[] {
 // tuple constant
 {
 "TupleConstant",
-R"(HloModule TupleConstant_module:
+R"(HloModule TupleConstant_module
 
 ENTRY %TupleConstant.v1 () -> (f32[2,1], f32[2]) {
   ROOT %constant = (f32[2,1]{1,0}, f32[2]{0}) constant((f32[2,1], f32[2]) ( f32[2,1] { { 1 }, { 2 } }, {2, 42} ))
@@ -174,7 +184,7 @@ ENTRY %TupleConstant.v1 () -> (f32[2,1], f32[2]) {
 // v1 > v2 ? v1 : v2
 {
 "SelectR1F32",
-R"(HloModule SelectR1F32WithCmpR1F32sFromParamsSmall_module:
+R"(HloModule SelectR1F32WithCmpR1F32sFromParamsSmall_module
 
 ENTRY %SelectR1F32WithCmpR1F32sFromParamsSmall.v4 (v1: f32[4], v2: f32[4]) -> f32[4] {
   %v1 = f32[4]{0} parameter(0), sharding={maximal device=1}
@@ -188,7 +198,7 @@ ENTRY %SelectR1F32WithCmpR1F32sFromParamsSmall.v4 (v1: f32[4], v2: f32[4]) -> f3
 // empty tuple
 {
 "EmptyTupleCreate",
-R"(HloModule EmptyTupleCreate_module:
+R"(HloModule EmptyTupleCreate_module
 
 ENTRY %EmptyTupleCreate.v1 () -> () {
   ROOT %tuple = () tuple()
@@ -199,7 +209,7 @@ ENTRY %EmptyTupleCreate.v1 () -> () {
 // tuple
 {
 "TupleCreate",
-R"(HloModule TupleCreate_module:
+R"(HloModule TupleCreate_module
 
 ENTRY %TupleCreate.v4 (v1: f32[], v2: f32[3], v3: f32[2,3]) -> (f32[], f32[3], f32[2,3]) {
   %v1 = f32[] parameter(0)
@@ -212,7 +222,7 @@ ENTRY %TupleCreate.v4 (v1: f32[], v2: f32[3], v3: f32[2,3]) -> (f32[], f32[3], f
 },
 {
 "ShardedTupleCreate",
-R"(HloModule ShardedTupleCreate_module:
+R"(HloModule ShardedTupleCreate_module
 
 ENTRY %ShardedTupleCreate.v4 (v1: f32[], v2: f32[3], v3: f32[2,3]) -> (f32[], f32[3], f32[2,3]) {
   %v1 = f32[] parameter(0)
@@ -227,7 +237,7 @@ ENTRY %ShardedTupleCreate.v4 (v1: f32[], v2: f32[3], v3: f32[2,3]) -> (f32[], f3
 // while (result < 5) { result = result + 1; }
 {
 "WhileWithScalarS32Result",
-R"(HloModule WhileWithScalarS32Result_module:
+R"(HloModule WhileWithScalarS32Result_module
 
 %body.v3 (prev.1: s32[]) -> s32[] {
   %constant = s32[] constant(1)
@@ -251,7 +261,7 @@ ENTRY %WhileWithScalarS32Result.v2 () -> s32[] {
 // send and recv
 {
 "SendRecv",
-R"(HloModule TwoSendRecvBothWayRecvFist_module:
+R"(HloModule TwoSendRecvBothWayRecvFist_module
 
 ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] {
   %recv = (f32[], u32[]) recv(), channel_id=15, sharding={maximal device=1}
@@ -266,7 +276,7 @@ ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] {
 // get-tuple-element
 {
 "GetTupleElement",
-R"(HloModule GetTupleElement_module:
+R"(HloModule GetTupleElement_module
 
 ENTRY %GetTupleElement.v4 () -> s32[2,3] {
   %constant = f32[3]{0} constant({1, 2, 3})
@@ -280,7 +290,7 @@ ENTRY %GetTupleElement.v4 () -> s32[2,3] {
 // call
 {
 "Call",
-R"(HloModule CallR0F32IdentityScalar_module:
+R"(HloModule CallR0F32IdentityScalar_module
 
 %Identity.v1 (x: f32[]) -> f32[] {
   ROOT %x = f32[] parameter(0)
@@ -296,7 +306,7 @@ ENTRY %CallR0F32IdentityScalar.v2 () -> f32[] {
 // reduce window
 {
 "ReduceWindow",
-R"(HloModule R4UnitWindow_module:
+R"(HloModule R4UnitWindow_module
 
 %add_F32.v3 (lhs: f32[], rhs: f32[]) -> f32[] {
   %lhs = f32[] parameter(0)
@@ -315,7 +325,7 @@ ENTRY %R4UnitWindow.v3 (operand: f32[13,12,8,15]) -> f32[13,3,8,15] {
 // reduce window on scalar
 {
 "ReduceWindowScalar",
-R"(HloModule reduce_window_scalar:
+R"(HloModule reduce_window_scalar
 
 %add_F32.v3 (lhs: f32[], rhs: f32[]) -> f32[] {
   %lhs = f32[] parameter(0)
@@ -334,7 +344,7 @@ ENTRY %R4UnitWindowScalar () -> f32[] {
 // convolution
 {
 "Convolution",
-R"(HloModule Convolve1D1Window_0_module:
+R"(HloModule Convolve1D1Window_0_module
 
 ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2,1] {
   %input = f32[1,2,1]{2,1,0} parameter(0)
@@ -348,7 +358,7 @@ ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2
 // convolution rank 2
 {
 "ConvolutionR2",
-R"(HloModule ConvolveR2_module:
+R"(HloModule ConvolveR2_module
 
 ENTRY %ConvolveR2.v3 (input: f32[1,2], filter: f32[1,1]) -> f32[1,2] {
   %input = f32[1,2]{1,0} parameter(0)
@@ -356,12 +366,25 @@ ENTRY %ConvolveR2.v3 (input: f32[1,2], filter: f32[1,1]) -> f32[1,2] {
   ROOT %convolution = f32[1,2]{0,1} convolution(f32[1,2]{1,0} %input, f32[1,1]{1,0} %filter), dim_labels=bf_io->bf
 }
 
+)"
+},
+// convolution backward
+{
+"ConvolutionBackward",
+R"(HloModule ConvolveBackward_module
+
+ENTRY %ConvolveBackward (input: f32[128,7,7,512], filter: f32[3,3,512,512]) -> f32[128,14,14,512] {
+  %input = f32[128,7,7,512]{0,3,2,1} parameter(0)
+  %filter = f32[3,3,512,512]{3,2,1,0} parameter(1)
+  ROOT %convolution-base-dilated = f32[128,14,14,512]{0,3,2,1} convolution(f32[128,7,7,512]{0,3,2,1} %input, f32[3,3,512,512]{3,2,1,0} %filter), window={size=3x3 pad=1_2x1_2 lhs_dilate=2x2 rhs_reversal=1x1}, dim_labels=b01f_01oi->b01f
+}
+
 )"
 },
 // reverse(constant)
 {
 "Reverse4D",
-R"(HloModule Reverse4DFloatArrayOnDim01_module:
+R"(HloModule Reverse4DFloatArrayOnDim01_module
 
 ENTRY %Reverse4DFloatArrayOnDim01.v2 () -> f32[4,3,2,1] {
   %constant = f32[4,3,2,1]{0,1,2,3} constant(f32[4,3,2,1] { { /*i0=0*/ { /*i1=0*/ {1}, {2} }, { /*i1=1*/ {3}, {4} }, { /*i1=2*/ {5}, {6} } }, { /*i0=1*/ { /*i1=0*/ {7}, {8} }, { /*i1=1*/ {9}, {10} }, { /*i1=2*/ {11}, {12} } }, { /*i0=2*/ { /*i1=0*/ {13}, {14} }, { /*i1=1*/ {15}, {16} }, { /*i1=2*/ {17}, {18} } }, { /*i0=3*/ { /*i1=0*/ {19}, {20} }, { /*i1=1*/ {21}, {22} }, { /*i1=2*/ {23}, {24} } } })
@@ -373,7 +396,7 @@ ENTRY %Reverse4DFloatArrayOnDim01.v2 () -> f32[4,3,2,1] {
 // concat
 {
 "Concat",
-R"(HloModule Concat2x3With2x5_module:
+R"(HloModule Concat2x3With2x5_module
 
 ENTRY %Concat2x3With2x5.v3 () -> f32[2,8] {
   %constant = f32[2,3]{1,0} constant(f32[2,3] { { 0, 1, 2 }, { 1000, 1001, 1002 } })
@@ -381,50 +404,12 @@ ENTRY %Concat2x3With2x5.v3 () -> f32[2,8] {
   ROOT %concatenate = f32[2,8]{1,0} concatenate(f32[2,3]{1,0} %constant, f32[2,5]{1,0} %constant.1), dimensions={1}
 }
 
-)"
-},
-// map
-{
-"Map",
-R"(HloModule MapBinaryAdder_module:
-
-%add_F32.v3 (lhs: f32[], rhs: f32[]) -> f32[] {
-  %lhs = f32[] parameter(0)
-  %rhs = f32[] parameter(1)
-  ROOT %add = f32[] add(f32[] %lhs, f32[] %rhs)
-}
-
-ENTRY %MapBinaryAdder.v3 (param0: f32[4], param1: f32[4]) -> f32[4] {
-  %param0 = f32[4]{0} parameter(0)
-  %param1 = f32[4]{0} parameter(1)
-  ROOT %map = f32[4]{0} map(f32[4]{0} %param0, f32[4]{0} %param1), to_apply=%add_F32.v3
-}
-
-)"
-},
-// reduce
-{
-"Reduce",
-R"(HloModule ReduceR3ToR2_module:
-
-%add_F32.v3 (lhs: f32[], rhs: f32[]) -> f32[] {
-  %lhs = f32[] parameter(0)
-  %rhs = f32[] parameter(1)
-  ROOT %add = f32[] add(f32[] %lhs, f32[] %rhs)
-}
-
-ENTRY %ReduceR3ToR2.v3 (input: f32[8,16,256]) -> f32[8,16] {
-  %input = f32[8,16,256]{2,1,0} parameter(0)
-  %constant = f32[] constant(0)
-  ROOT %reduce = f32[8,16]{1,0} reduce(f32[8,16,256]{2,1,0} %input, f32[] %constant), dimensions={2}, to_apply=%add_F32.v3
-}
-
 )"
 },
 // select and scatter
 {
 "SelectAndScatter",
-R"(HloModule R4F32OverlapSmall_module:
+R"(HloModule R4F32OverlapSmall_module
 
 %ge_F32.v3 (lhs: f32[], rhs: f32[]) -> pred[] {
   %lhs = f32[] parameter(0)
@@ -450,7 +435,7 @@ ENTRY %R4F32OverlapSmall.v4 () -> f32[4,5,1,1] {
 // select and scatter on scalar
 {
 "SelectAndScatterScalar",
-R"(HloModule select_and_scatter_scalar:
+R"(HloModule select_and_scatter_scalar
 
 %ge_F32.v3 (lhs: f32[], rhs: f32[]) -> pred[] {
   %lhs = f32[] parameter(0)
@@ -476,7 +461,7 @@ ENTRY %SelectAndScatterScalar () -> f32[] {
 // slice
 {
 "Slice",
-R"(HloModule slice_module:
+R"(HloModule slice_module
 
 ENTRY %slice.v2 (p0: f32[3,3,4,4]) -> f32[3,3,2,4] {
   %p0 = f32[3,3,4,4]{3,2,1,0} parameter(0)
@@ -488,7 +473,7 @@ ENTRY %slice.v2 (p0: f32[3,3,4,4]) -> f32[3,3,2,4] {
 // slice, no stride
 {
 "SliceNoStride",
-R"(HloModule Slice3x3x3_To_1x3x3_F32_module:
+R"(HloModule Slice3x3x3_To_1x3x3_F32_module
 
 ENTRY %Slice3x3x3_To_1x3x3_F32.v2 () -> f32[1,3,3] {
   %constant = f32[3,3,3]{2,1,0} constant(f32[3,3,3] { { { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 } }, { { 9, 10, 11 }, { 12, 13, 14 }, { 15, 16, 17 } }, { { 18, 19, 20 }, { 21, 22, 23 }, { 24, 25, 26 } } })
@@ -500,7 +485,7 @@ ENTRY %Slice3x3x3_To_1x3x3_F32.v2 () -> f32[1,3,3] {
 // slice R0
 {
 "SliceR0",
-R"(HloModule SliceR0_module:
+R"(HloModule SliceR0_module
 
 ENTRY %SliceR0.v2 () -> s32[] {
   %constant = s32[] constant(1)
@@ -512,7 +497,7 @@ ENTRY %SliceR0.v2 () -> s32[] {
 // transpose
 {
 "Transpose",
-R"(HloModule Transpose_module:
+R"(HloModule Transpose_module
 
 ENTRY %Transpose.v2 () -> s32[1,2,3] {
   %constant = s32[1,2,3]{2,1,0} constant(s32[1,2,3] { { { 1, 2, 3 }, { 4, 5, 6 } } })
@@ -524,7 +509,7 @@ ENTRY %Transpose.v2 () -> s32[1,2,3] {
 // Dynamic slice
 {
 "DynamicSlice",
-R"(HloModule DynamicSlice_module:
+R"(HloModule DynamicSlice_module
 
 ENTRY %DynamicSlice.v5 (original_parameter: s32[2,2,258], start_index: s32[1]) -> s32[2,2,258] {
   %original_parameter = s32[2,2,258]{2,1,0} parameter(0)
@@ -539,7 +524,7 @@ ENTRY %DynamicSlice.v5 (original_parameter: s32[2,2,258], start_index: s32[1]) -
 // Dynamic update slice
 {
 "DynamicUpdateSlice",
-R"(HloModule DynamicUpdateSlice_module:
+R"(HloModule DynamicUpdateSlice_module
 
 ENTRY %DynamicUpdateSlice.v4 (input: s32[1,1,25,1], update: s32[1,1,2,1], start_indices: s32[4]) -> s32[1,1,25,1] {
   %input = s32[1,1,25,1]{3,2,1,0} parameter(0)
@@ -553,7 +538,7 @@ ENTRY %DynamicUpdateSlice.v4 (input: s32[1,1,25,1], update: s32[1,1,2,1], start_
 // batch norm training
 {
 "BatchNormTraining",
-R"(HloModule BasicTraining_module:
+R"(HloModule BasicTraining_module
 
 ENTRY %BasicTraining.v4 () -> (f32[2,2,1,2], f32[2], f32[2]) {
   %constant = f32[2,2,1,2]{3,2,1,0} constant(f32[2,2,1,2] { { /*i0=0*/ { /*i1=0*/ {1, 2} }, { /*i1=1*/ {3, 4} } }, { /*i0=1*/ { /*i1=0*/ {5, 6} }, { /*i1=1*/ {7, 8} } } })
@@ -567,7 +552,7 @@ ENTRY %BasicTraining.v4 () -> (f32[2,2,1,2], f32[2], f32[2]) {
 // batch norm inference
 {
 "BatchNormInference",
-R"(HloModule BatchNormInference_module:
+R"(HloModule BatchNormInference_module
 
 ENTRY %BatchNormInference.v6 (input: f32[2,2,2,2], offset: f32[2], scale: f32[2], mean: f32[2], variance: f32[2]) -> f32[2,2,2,2] {
   %input = f32[2,2,2,2]{3,2,1,0} parameter(0)
@@ -583,7 +568,7 @@ ENTRY %BatchNormInference.v6 (input: f32[2,2,2,2], offset: f32[2], scale: f32[2]
 // batch norm grad
 {
 "BatchNormGrad",
-R"(HloModule BatchNormGrad_module:
+R"(HloModule BatchNormGrad_module
 
 ENTRY %BatchNormGrad.v4 (input: f32[2,2,2,2], scale: f32[2], mean: f32[2], variance: f32[2], grad_output: f32[2,2,2,2]) -> (f32[2,2,2,2], f32[2], f32[2]) {
   %input = f32[2,2,2,2]{3,2,1,0} parameter(0)
@@ -594,12 +579,60 @@ ENTRY %BatchNormGrad.v4 (input: f32[2,2,2,2], scale: f32[2], mean: f32[2], varia
   ROOT %batch-norm-grad = (f32[2,2,2,2]{3,2,1,0}, f32[2]{0}, f32[2]{0}) batch-norm-grad(f32[2,2,2,2]{3,2,1,0} %input, f32[2]{0} %scale, f32[2]{0} %mean, f32[2]{0} %variance, f32[2,2,2,2]{3,2,1,0} %grad_output), epsilon=0.001, feature_index=0
 }
 
+)"
+},
+// fft
+{
+"Fft",
+R"(HloModule Fft_module
+
+ENTRY %Fft (input: c64[8,32]) -> c64[8,32] {
+  %input = c64[8,32]{1,0} parameter(0)
+  ROOT %fft = c64[8,32]{1,0} fft(c64[8,32]{1,0} %input), fft_type=FFT, fft_length={32}
+}
+
+)"
+},
+// ifft
+{
+"Ifft2d",
+R"(HloModule Ifft2d_module
+
+ENTRY %Ifft2d (input: c64[5,8,32]) -> c64[5,8,32] {
+  %input = c64[5,8,32]{2,1,0} parameter(0)
+  ROOT %fft = c64[5,8,32]{2,1,0} fft(c64[5,8,32]{2,1,0} %input), fft_type=IFFT, fft_length={8,32}
+}
+
+)"
+},
+// rfft2d
+{
+"Rfft2d",
+R"(HloModule Rfft2d_module
+
+ENTRY %Rfft2d (input: f32[5,64,32]) -> c64[5,64,17] {
+  %input = f32[5,64,32]{2,1,0} parameter(0)
+  ROOT %fft = c64[5,64,17]{2,1,0} fft(f32[5,64,32]{2,1,0} %input), fft_type=RFFT, fft_length={64,32}
+}
+
+)"
+},
+// irfft3d
+{
+"Irfft3d",
+R"(HloModule Irfft3d_module
+
+ENTRY %Irfft3d (input: c64[5,64,128,33]) -> f32[5,64,128,64] {
+  %input = c64[5,64,128,33]{3,2,1,0} parameter(0)
+  ROOT %fft = f32[5,64,128,64]{3,2,1,0} fft(c64[5,64,128,33]{3,2,1,0} %input), fft_type=IRFFT, fft_length={64,128,64}
+}
+
 )"
 },
 // pad
 {
 "Pad",
-R"(HloModule Pad1DS3Array_module:
+R"(HloModule Pad1DS3Array_module
 
 ENTRY %Pad1DS3Array.v3 () -> f32[8] {
   %constant = f32[3]{0} constant({1, 2, 3})
@@ -612,7 +645,7 @@ ENTRY %Pad1DS3Array.v3 () -> f32[8] {
 // pad has interior
 {
 "PadHasInterior",
-R"(HloModule PadHasInterior_module:
+R"(HloModule PadHasInterior_module
 
 ENTRY %PadHasInterior.v3 (input: f32[1,25,7,7]) -> f32[1,25,17,11] {
   %input = f32[1,25,7,7]{3,2,1,0} parameter(0)
@@ -620,12 +653,25 @@ ENTRY %PadHasInterior.v3 (input: f32[1,25,7,7]) -> f32[1,25,17,11] {
   ROOT %pad = f32[1,25,17,11]{3,2,1,0} pad(f32[1,25,7,7]{3,2,1,0} %input, f32[] %constant), padding=0_0_0x0_0_0x2_2_1x2_2_0
 }
 
+)"
+},
+// Negative padding
+{
+"PadHasNegativePadding",
+R"(HloModule PadHasNegativePadding_module
+
+ENTRY %PadHasNegativePadding (input: f32[1,25,7,7,10]) -> f32[1,15,6,3,29] {
+  %input = f32[1,25,7,7,10]{4,3,2,1,0} parameter(0)
+  %constant = f32[] constant(-5.123)
+  ROOT %pad = f32[1,15,6,3,29]{4,3,2,1,0} pad(f32[1,25,7,7,10]{4,3,2,1,0} %input, f32[] %constant), padding=0_0_0x0_-10_0x0_-1_0x-2_-2_0x-1_-1_3
+}
+
 )"
 },
 // fusion
 {
 "Fusion",
-R"(HloModule fusion_module:
+R"(HloModule fusion_module
 
 %fused_computation (constant.param_0: f32[3,2,1,1], constant.1.param_1: f32[2]) -> f32[3,2,1,1] {
   %constant.param_0 = f32[3,2,1,1]{3,2,1,0} parameter(0)
@@ -640,22 +686,182 @@ ENTRY %fusion.v3 () -> f32[3,2,1,1] {
   ROOT %fusion = f32[3,2,1,1]{3,2,1,0} fusion(f32[3,2,1,1]{3,2,1,0} %constant, f32[2]{0} %constant.1), kind=kLoop, calls=%fused_computation
 }
 
+)"
+},
+{
+"Sparse",
+R"(HloModule sparse_f32
+
+ENTRY %sparse () -> f32[2,3,4] {
+  ROOT %foo = f32[2,3,4]sparse{10} constant(f32[2,3,4]{[0, 1, 2]: 1, [1, 2, 3]: 2, [2, 3, 4]: 3})
+}
+
+)"
+},
+{
+"SparseEmpty",
+R"(HloModule sparse_f32_empty
+
+ENTRY %sparse_f32_empty () -> f32[2,3,4] {
+  ROOT %foo = f32[2,3,4]sparse{10} constant(f32[2,3,4]{})
+}
+
+)"
+},
+{
+"SparseR1",
+R"(HloModule sparse_f32_r1
+
+ENTRY %sparse_f32_r1 () -> f32[9] {
+  ROOT %foo = f32[9]sparse{10} constant(f32[9]{1: 2, 3: 4, 5: 6})
+}
+
+)"
+},
+  });
+  // clang-format on
+}
+
+std::vector<TestData> CreateShortTestCases() {
+  // clang-format off
+  return std::vector<TestData>({
+// map
+{
+"Map",
+R"(HloModule MapBinaryAdder_module
+
+add_F32.v3 {
+  lhs = f32[] parameter(0)
+  rhs = f32[] parameter(1)
+  ROOT add = f32[] add(lhs, rhs)
+}
+
+ENTRY MapBinaryAdder.v3 {
+  param0 = f32[4]{0} parameter(0)
+  param1 = f32[4]{0} parameter(1)
+  ROOT map = f32[4]{0} map(param0, param1), to_apply=add_F32.v3
+}
+
+)"
+},
+// reduce
+{
+"Reduce",
+R"(HloModule ReduceR3ToR2_module
+
+add_F32.v3 {
+  lhs = f32[] parameter(0)
+  rhs = f32[] parameter(1)
+  ROOT add = f32[] add(lhs, rhs)
+}
+
+ENTRY ReduceR3ToR2.v3 {
+  input = f32[8,16,256]{2,1,0} parameter(0)
+  constant = f32[] constant(0)
+  ROOT reduce = f32[8,16]{1,0} reduce(input, constant), dimensions={2}, to_apply=add_F32.v3
+}
+
 )"
 },
 // infeed/outfeed
 {
 "InfeedOutfeed",
-R"(HloModule outfeed_module:
+R"(HloModule outfeed_module
+
+ENTRY InfeedToOutfeed {
+  infeed = (u32[3]{0}, pred[]) infeed()
+  outfeed = () outfeed(infeed)
+  ROOT infeed.1 = (u32[3]{0}, pred[]) infeed()
+  outfeed.1 = () outfeed(infeed.1)
+}
+
+)"
+},
+// Rng
+{
+"Rng",
+R"(HloModule rng_module
 
-ENTRY %InfeedToOutfeed () -> (u32[3], pred[]) {
-  %infeed = (u32[3]{0}, pred[]) infeed()
-  %outfeed = () outfeed((u32[3]{0}, pred[]) %infeed)
-  ROOT %infeed.1 = (u32[3]{0}, pred[]) infeed()
-  %outfeed.1 = () outfeed((u32[3]{0}, pred[]) %infeed.1)
+ENTRY Rng {
+  constant = f32[] constant(0)
+  constant.1 = f32[] constant(1)
+  ROOT rng = f32[8]{0} rng(constant, constant.1), distribution=rng_uniform
 }
 
 )"
+},
+// Reduce precision
+{
+"ReducePrevison",
+R"(HloModule reduce_precision
+
+ENTRY ReducePrecision {
+  constant = f32[1]{0} constant({3.14159})
+  ROOT reduce-precision = f32[1]{0} reduce-precision(constant), exponent_bits=8, mantissa_bits=10
 }
+
+)"
+},
+// Conditional
+{
+"Conditional",
+R"(HloModule conditional
+
+Negate {
+  x = f32[] parameter(0)
+  ROOT negate = f32[] negate(x)
+}
+
+Identity {
+  y = f32[] parameter(0)
+  ROOT copy = f32[] copy(y)
+}
+
+ENTRY Parameters1.v4 {
+  constant = pred[] constant(true)
+  constant.1 = f32[] constant(56)
+  constant.2 = f32[] constant(12)
+  ROOT conditional = f32[] conditional(constant, constant.1, constant.2), true_computation=Negate, false_computation=Identity
+}
+
+)"
+},
+// CustomCall
+{
+"CustomCall",
+R"(HloModule custom_call
+
+ENTRY CustomCall {
+  constant = f32[1]{0} constant({12345})
+  ROOT custom-call = f32[1,2,3]{0,2,1} custom-call(constant), custom_call_target="foo\"bar"
+}
+
+)"
+},
+// Variables with non-default names
+{
+"NonDefaultNames",
+R"(HloModule add_constants_module
+
+ENTRY add_constants {
+  foo = f32[] constant(3.14)
+  ROOT bar = f32[] add(foo, foo)
+}
+
+)"
+},
+{
+"Dot",
+R"(HloModule dot
+
+ENTRY dot {
+  a = f32[2,10]{1,0} parameter(0)
+  b = f32[10,3]{1,0} parameter(1)
+  ROOT dot = f32[2,3]{1,0} dot(a, b), lhs_batch_dims={0}, lhs_contracting_dims={1}, rhs_contracting_dims={0}
+}
+
+)"
+},
   });
   // clang-format on
 }
@@ -674,18 +880,35 @@ class HloParserTest : public ::testing::Test,
   void ExpectEqual() {
     const string& original = GetParam().module_string;
     auto result = Parse(original);
-    TF_EXPECT_OK(result.status());
+    TF_ASSERT_OK(result.status());
+    EXPECT_EQ(original, result.ValueOrDie()->ToString(
+                            HloPrintOptions().set_print_large_constants(true)));
+  }
+};
+
+class HloParserShortTest : public HloParserTest {
+ protected:
+  void ExpectEqualShort() {
+    const string& original = GetParam().module_string;
+    auto result = Parse(original);
+    TF_ASSERT_OK(result.status());
     EXPECT_EQ(original,
-              result.ValueOrDie()->ToString(/*include_large_constants=*/true));
+              result.ValueOrDie()->ToString(HloPrintOptions::ShortParsable()));
   }
 };
 
 TEST_P(HloParserTest, Run) { ExpectEqual(); }
 
+TEST_P(HloParserShortTest, Run) { ExpectEqualShort(); }
+
 INSTANTIATE_TEST_CASE_P(HloParserTestSuccessInstantiation, HloParserTest,
                         ::testing::ValuesIn(CreateTestCases()),
                         TestDataToString);
 
+INSTANTIATE_TEST_CASE_P(HloParserTestSuccessInstantiation, HloParserShortTest,
+                        ::testing::ValuesIn(CreateShortTestCases()),
+                        TestDataToString);
+
 TEST_F(HloParserTest, Empty) {
   const string original = "";
   auto result = Parse(original);
@@ -749,7 +972,7 @@ ENTRY %blabla (x: f32[]) -> pred[] {
 }
 
 TEST_F(HloParserTest, MoreConstants) {
-  const string original = R"(HloModule SelectScalarS32True_module:
+  const string original = R"(HloModule SelectScalarS32True_module
 
 ENTRY %SelectScalarS32True.v4 () -> s32[] {
   %constant.2 = pred[] constant(true)
@@ -766,7 +989,7 @@ ENTRY %SelectScalarS32True.v4 () -> s32[] {
 }
 
 TEST_F(HloParserTest, LiteralDimensionsMismatch_1) {
-  const string original = R"(HloModule some_2_module:
+  const string original = R"(HloModule some_2_module
 
 ENTRY %some_2 () -> f32[2] {
   ROOT %constant = f32[2]{0} constant({1,{2}})
@@ -780,7 +1003,7 @@ ENTRY %some_2 () -> f32[2] {
 }
 
 TEST_F(HloParserTest, LiteralDimensionsMismatch_2) {
-  const string original = R"(HloModule some_2x3_module:
+  const string original = R"(HloModule some_2x3_module
 
 ENTRY %some_2x3 () -> f32[2,3] {
   ROOT %constant = f32[2,3]{1,0} constant(f32[2,3] {1, 2, 3, 4, 5, 6})
@@ -794,7 +1017,7 @@ ENTRY %some_2x3 () -> f32[2,3] {
 }
 
 TEST_F(HloParserTest, LiteralDimensionsMismatch_3) {
-  const string original = R"(HloModule some_2x3x2_module:
+  const string original = R"(HloModule some_2x3x2_module
 
 ENTRY %some_2x3x2 () -> f32[2,3,2] {
   ROOT %constant = f32[2,3,2]{2,1,0} constant(f32[2,3,2] {{{1, 2}, {3, 4}, {5, 6}, {7, 8}, {9, 10}, {11, 12}}})
@@ -809,7 +1032,7 @@ ENTRY %some_2x3x2 () -> f32[2,3,2] {
 
 TEST_F(HloParserTest, ConstantF16Overflow) {
   const string original =
-      R"(HloModule ConstantF16Overflow_module:
+      R"(HloModule ConstantF16Overflow_module
 
 ENTRY %ConstantF16Overflow.v4 () -> f16[] {
   ROOT %constant = f16[] constant(-65505)
@@ -823,7 +1046,7 @@ ENTRY %ConstantF16Overflow.v4 () -> f16[] {
 }
 
 TEST_F(HloParserTest, ConstantWithExp) {
-  const string original = R"(HloModule ConstantWithExp_module:
+  const string original = R"(HloModule ConstantWithExp_module
 
 ENTRY %ConstantWithExp.v4 () -> f32[] {
   %constant.1 = f32[] constant(3e+2)
@@ -838,7 +1061,7 @@ ENTRY %ConstantWithExp.v4 () -> f32[] {
 }
 
 TEST_F(HloParserTest, AttibutesAnyOrder) {
-  const string original = R"(HloModule any_order_module:
+  const string original = R"(HloModule any_order_module
 
 ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2,1] {
   %input = f32[1,2,1]{2,1,0} parameter(0)
@@ -852,7 +1075,7 @@ ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2
 }
 
 TEST_F(HloParserTest, InvalidDimLabels) {
-  string prefix = R"(HloModule invalid_dim_labels_module:
+  string prefix = R"(HloModule invalid_dim_labels_module
 
 ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2,1] {
   %input = f32[1,2,1]{2,1,0} parameter(0)
@@ -864,19 +1087,21 @@ ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2
 
 )";
 
-  ExpectHasSubstr(Parse(StrCat(prefix, ",dim_labels=00_01_10", suffix))
-                      .status()
-                      .error_message(),
-                  "expects dim labels pattern");
+  ExpectHasSubstr(
+      Parse(tensorflow::strings::StrCat(prefix, ",dim_labels=00_01_10", suffix))
+          .status()
+          .error_message(),
+      "expects dim labels pattern");
 
-  ExpectHasSubstr(Parse(StrCat(prefix, ",dim_labels=010_1100->010", suffix))
+  ExpectHasSubstr(Parse(tensorflow::strings::StrCat(
+                            prefix, ",dim_labels=010_1100->010", suffix))
                       .status()
                       .error_message(),
                   "must have the same rank");
 }
 
 TEST_F(HloParserTest, UnexpectedAttribute) {
-  const string original = R"(HloModule unexpected_attr_module:
+  const string original = R"(HloModule unexpected_attr_module
 
 ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] {
   %recv = (f32[], u32[]) recv(), channel_id=15
@@ -892,7 +1117,7 @@ ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] {
 }
 
 TEST_F(HloParserTest, MissingAttribute) {
-  const string original = R"(HloModule missing_attr_module:
+  const string original = R"(HloModule missing_attr_module
 
 ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] {
   %recv = (f32[], u32[]) recv(), channel_id=15
@@ -908,7 +1133,7 @@ ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] {
 }
 
 TEST_F(HloParserTest, PredecessorUndefined) {
-  const string original = R"(HloModule pre_not_found_module:
+  const string original = R"(HloModule pre_not_found_module
 
 ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] {
   %recv = (f32[], u32[]) recv(), channel_id=15
@@ -924,7 +1149,7 @@ ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] {
 }
 
 TEST_F(HloParserTest, SliceAllowOmitStride1) {
-  const string original = R"(HloModule slice_module:
+  const string original = R"(HloModule slice_module
 
 ENTRY %slice.v2 (p0: f32[3,3,4,4]) -> f32[3,3,2,4] {
   %p0 = f32[3,3,4,4]{3,2,1,0} parameter(0)
@@ -936,7 +1161,7 @@ ENTRY %slice.v2 (p0: f32[3,3,4,4]) -> f32[3,3,2,4] {
 }
 
 TEST_F(HloParserTest, PaddingConfigIsNotWindowPad) {
-  const string original = R"(HloModule window_pad_module:
+  const string original = R"(HloModule window_pad_module
 
 ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2,1] {
   %input = f32[1,2,1]{2,1,0} parameter(0)
@@ -951,7 +1176,7 @@ ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2
 }
 
 TEST_F(HloParserTest, CommaBetweenSubAttributes) {
-  const string original = R"(HloModule test_comma_module:
+  const string original = R"(HloModule test_comma_module
 
 ENTRY %test_comma.v4 () -> f32[] {
   ROOT %constant = f32[] constant(-4.2), metadata={source_line=5, op_type="::const"}
@@ -961,6 +1186,95 @@ ENTRY %test_comma.v4 () -> f32[] {
   TF_EXPECT_OK(Parse(original).status());
 }
 
+TEST_F(HloParserTest, ComputationShapeDoesNotMatchRootShape) {
+  const string original = R"(HloModule custom_call:
+
+ENTRY %CustomCall () -> f32[1] {
+  %constant = f32[1]{0} constant({12345})
+  ROOT %foo = f32[1,2,3]{0,2,1} custom-call(f32[1]{0} %constant), custom_call_target="foo\"bar"
+})";
+  ExpectHasSubstr(Parse(original).status().error_message(),
+                  "Shape of computation CustomCall, f32[1], is not compatible "
+                  "with that of its root instruction foo, f32[1,2,3]");
+}
+
+TEST_F(HloParserTest, EntryComputationWithLayout) {
+  const string original = R"(HloModule layout:
+add_F32.v3 {
+  lhs = f32[] parameter(0)
+  rhs = f32[] parameter(1)
+  ROOT add = f32[] add(lhs, rhs)
+}
+
+ENTRY %Reduce (input: f32[8,16,256]) -> f32[8,16] {
+  input = f32[8,16,256]{0,1,2} parameter(0)
+  constant = f32[] constant(0)
+  ROOT reduce = f32[8,16]{0,1} reduce(input, constant), dimensions={2}, to_apply=add_F32.v3
+})";
+
+  auto module = Parse(original);
+  TF_ASSERT_OK(module.status());
+  auto program_layout = module.ValueOrDie()->entry_computation_layout();
+  ASSERT_EQ(program_layout.parameter_count(), 1);
+  auto param_layout = program_layout.parameter_layout(0).layout();
+  auto result_layout = program_layout.result_layout().layout();
+  EXPECT_TRUE(
+      LayoutUtil::Equal(LayoutUtil::MakeLayout({0, 1, 2}), param_layout))
+      << "actual layout of parameter(0) is "
+      << LayoutUtil::HumanString(param_layout);
+  EXPECT_TRUE(LayoutUtil::Equal(LayoutUtil::MakeLayout({0, 1}), result_layout))
+      << "actual layout of result is "
+      << LayoutUtil::HumanString(result_layout);
+}
+
+TEST_F(HloParserTest, NoEntry) {
+  const string original = R"(HloModule no_entry:
+c1 {
+  const1 = f32[1]{0} constant({12345})
+}
+c2 {
+  const2 = f32[1]{0} constant({67890})
+})";
+  auto module = Parse(original);
+  TF_ASSERT_OK(module.status());
+  EXPECT_EQ(module.ValueOrDie()->entry_computation()->name(), "c2");
+}
+
+TEST_F(HloParserTest, NoRoot) {
+  const string original = R"(HloModule no_root:
+ENTRY consts {
+  first = f32[1]{0} constant({12345})
+  last = f32[1]{0} constant({67890})
+})";
+  auto module = Parse(original);
+  TF_ASSERT_OK(module.status());
+  EXPECT_EQ(
+      module.ValueOrDie()->entry_computation()->root_instruction()->name(),
+      "last");
+}
+
+TEST_F(HloParserTest, MultipleEntries) {
+  const string original = R"(HloModule multiple_entries:
+ENTRY c1 {
+  const1 = f32[1]{0} constant({12345})
+}
+ENTRY c2 {
+  const2 = f32[1]{0} constant({67890})
+})";
+  ExpectHasSubstr(Parse(original).status().error_message(),
+                  "expects only one ENTRY");
+}
+
+TEST_F(HloParserTest, MultipleRoots) {
+  const string original = R"(HloModule multiple_roots:
+ENTRY consts {
+  ROOT const1 = f32[1]{0} constant({12345})
+  ROOT const2 = f32[1]{0} constant({12345})
+})";
+  ExpectHasSubstr(Parse(original).status().error_message(),
+                  "one computation should have only one ROOT");
+}
+
 }  // namespace
 }  // namespace tools
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/tools/parser/hlo_token.h b/tensorflow/compiler/xla/tools/parser/hlo_token.h
index 07e48804d053f31bdff6678f09ee2c1e3b731e0f..7928bee5c2097f353b182095a555c334d7b69c95 100644
--- a/tensorflow/compiler/xla/tools/parser/hlo_token.h
+++ b/tensorflow/compiler/xla/tools/parser/hlo_token.h
@@ -18,6 +18,9 @@ limitations under the License.
 
 #include <string>
 
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/core/platform/types.h"
+
 namespace xla {
 namespace tools {
 
@@ -60,10 +63,9 @@ enum class TokKind {
   kDimLabels,      // [0-9bf]{2,}_[0-9io]{2,}->[0-9bf]{2,}
   kDxD,            // [0-9]+(x[0-9]+)+
   kPad,            // [0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)*
+  kIdent,          // other identifiers
   kString,         // "abcd\"\n"
   kShape,          // f32[2,3]{1,0}
-  kOpcode,         // add
-  kFusionKind,     // kLoop, kOutput, ...
   kInt,            // 42
   kDecimal,        // 4.2
 };
diff --git a/tensorflow/compiler/xla/tools/replay_computation.cc b/tensorflow/compiler/xla/tools/replay_computation.cc
index ec3f6a0471e2ae965846f5ef7560e448fe9d8073..eda5effbb92db92c9317a956497a00c0ec15c27c 100644
--- a/tensorflow/compiler/xla/tools/replay_computation.cc
+++ b/tensorflow/compiler/xla/tools/replay_computation.cc
@@ -59,25 +59,33 @@ namespace xla {
 namespace tools {
 namespace {
 
+// Command-line opts to this tool.  See main() for descriptions of these
+// fields.
+struct Options {
+  string fake_infeed_shape;
+  bool use_fake_data = false;
+  bool print_result = true;
+  int num_runs = 1;
+};
+
 // Invokes the given computation passing arbitrary data for every (unbound)
 // parameter if use_fake_data, Otherwise use recorded data if available.
 //
 // Similarly, infeeds fake data of shape fake_infeed_shape if it is provided;
 // otherwise, no infeed is performed.
 StatusOr<std::unique_ptr<Literal>> ReplayComputation(
-    const SessionModule& module, int num_runs,
-    tensorflow::StringPiece fake_infeed_shape, bool use_fake_data,
-    Client* client) {
+    const SessionModule& module, Client* client, const Options& opts) {
   TF_ASSIGN_OR_RETURN(Computation computation, client->LoadSnapshot(module));
 
   std::vector<std::unique_ptr<GlobalData>> arguments;
-  if (use_fake_data) {
+  if (opts.use_fake_data) {
     arguments = MakeFakeArgumentsOrDie(computation, client);
   } else {  // use recorded data if available
     for (const auto& proto : module.arguments()) {
-      Literal literal(proto);
+      TF_ASSIGN_OR_RETURN(std::unique_ptr<xla::Literal> literal,
+                          Literal::CreateFromProto(proto));
       TF_ASSIGN_OR_RETURN(std::unique_ptr<GlobalData> data,
-                          client->TransferToServer(literal));
+                          client->TransferToServer(*literal));
       arguments.push_back(std::move(data));
     }
   }
@@ -86,12 +94,12 @@ StatusOr<std::unique_ptr<Literal>> ReplayComputation(
   // concurrent infeed occur via the fake_infeed_shape.
   tensorflow::gtl::optional<tensorflow::thread::ThreadPool> pool;
 
-  if (!fake_infeed_shape.empty()) {
+  if (!opts.fake_infeed_shape.empty()) {
     pool.emplace(tensorflow::Env::Default(), "infeed",
                  /*num_threads=*/1);
-    pool->Schedule([fake_infeed_shape, client]() {
+    pool->Schedule([opts, client]() {
       StatusOr<Shape> shape_status =
-          ShapeUtil::ParseShapeString(fake_infeed_shape);
+          ShapeUtil::ParseShapeString(opts.fake_infeed_shape);
       TF_CHECK_OK(shape_status.status());
       Shape shape = std::move(shape_status).ValueOrDie();
       StatusOr<std::unique_ptr<Literal>> data_status = MakeFakeLiteral(shape);
@@ -112,19 +120,19 @@ StatusOr<std::unique_ptr<Literal>> ReplayComputation(
   // Run the computation num_runs times, and return the result from the last
   // execution.
   std::unique_ptr<Literal> result;
-  for (int i = 0; i < num_runs; ++i) {
+  for (int i = 0; i < opts.num_runs; ++i) {
     ExecutionProfile profile;
-    if (use_fake_data) {
-      // If using fake data, execute the computation but don't bother retrieving
-      // the result -- presumably it's uninteresting, since our data is fake.
+    if (opts.print_result) {
+      TF_ASSIGN_OR_RETURN(result, client->ExecuteAndTransfer(
+                                      computation, execute_arguments,
+                                      /*execution_options=*/nullptr, &profile));
+    } else {
+      // If we're not printing the result, execute the computation but don't
+      // bother retrieving the result.  This can be a significant speedup.
       TF_RETURN_IF_ERROR(client
                              ->Execute(computation, execute_arguments,
                                        /*execution_options=*/nullptr, &profile)
                              .status());
-    } else {
-      TF_ASSIGN_OR_RETURN(result, client->ExecuteAndTransfer(
-                                      computation, execute_arguments,
-                                      /*execution_options=*/nullptr, &profile));
     }
     LOG(INFO) << "Execution took "
               << static_cast<double>(profile.compute_time_ns()) / 1e9 << "s";
@@ -133,16 +141,15 @@ StatusOr<std::unique_ptr<Literal>> ReplayComputation(
   return std::move(result);
 }
 
-int RealMain(tensorflow::gtl::ArraySlice<char*> args, int num_runs,
-             tensorflow::StringPiece fake_infeed_shape, bool use_fake_data) {
+int RealMain(tensorflow::gtl::ArraySlice<char*> args, const Options& opts) {
   Client* client = ClientLibrary::LocalClientOrDie();
   tensorflow::Env* env = tensorflow::Env::Default();
   int exit_status = EXIT_SUCCESS;
   for (char* arg : args) {
     SessionModule module;
     TF_CHECK_OK(tensorflow::ReadBinaryProto(env, arg, &module));
-    StatusOr<std::unique_ptr<Literal>> result_status = ReplayComputation(
-        module, num_runs, fake_infeed_shape, use_fake_data, client);
+    StatusOr<std::unique_ptr<Literal>> result_status =
+        ReplayComputation(module, client, opts);
     if (!result_status.ok()) {
       fprintf(stderr, "%s: error: %s\n", arg,
               result_status.status().ToString().c_str());
@@ -156,12 +163,16 @@ int RealMain(tensorflow::gtl::ArraySlice<char*> args, int num_runs,
               ShapeUtil::HumanString(result->shape()).c_str(),
               result->ToString().c_str());
       if (module.has_result()) {
+        std::unique_ptr<Literal> literal =
+            Literal::CreateFromProto(module.result()).ConsumeValueOrDie();
         fprintf(stdout, "was %s:%s\n",
                 ShapeUtil::HumanString(module.result().shape()).c_str(),
-                Literal(module.result()).ToString().c_str());
+                literal->ToString().c_str());
       }
     }
   }
+
+  ClientLibrary::DestroyLocalInstances();
   return exit_status;
 }
 
@@ -170,16 +181,15 @@ int RealMain(tensorflow::gtl::ArraySlice<char*> args, int num_runs,
 }  // namespace xla
 
 int main(int argc, char** argv) {
-  // Flags
-  xla::string fake_infeed_shape;
-  bool use_fake_data = false;
-  int num_runs = 1;
+  xla::tools::Options opts;
   const std::vector<tensorflow::Flag> flag_list = {
-      tensorflow::Flag("use_fake_data", &use_fake_data,
+      tensorflow::Flag("use_fake_data", &opts.use_fake_data,
                        "Replay computation using fake data"),
-      tensorflow::Flag("num_runs", &num_runs,
+      tensorflow::Flag("print_result", &opts.print_result,
+                       "Print the result of the computation to stdout"),
+      tensorflow::Flag("num_runs", &opts.num_runs,
                        "Number of times to run each computation"),
-      tensorflow::Flag("fake_infeed_shape", &fake_infeed_shape,
+      tensorflow::Flag("fake_infeed_shape", &opts.fake_infeed_shape,
                        "Shape of fake data to construct for (infinite) infeed"),
   };
   xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list);
@@ -191,5 +201,5 @@ int main(int argc, char** argv) {
 
   tensorflow::gtl::ArraySlice<char*> args(argv, argc);
   args.pop_front();  // Pop off the binary name, argv[0]
-  return xla::tools::RealMain(args, num_runs, fake_infeed_shape, use_fake_data);
+  return xla::tools::RealMain(args, opts);
 }
diff --git a/tensorflow/compiler/xla/tools/show_literal.cc b/tensorflow/compiler/xla/tools/show_literal.cc
index b50cb5e28eac14ed99af566939f8bd64e393ff64..fe8e72ba32bb4493b2751cfdfeb977f271092f9c 100644
--- a/tensorflow/compiler/xla/tools/show_literal.cc
+++ b/tensorflow/compiler/xla/tools/show_literal.cc
@@ -40,7 +40,8 @@ int main(int argc, char **argv) {
   xla::LiteralProto literal_proto;
   TF_CHECK_OK(tensorflow::ReadBinaryProto(tensorflow::Env::Default(), argv[1],
                                           &literal_proto));
-  xla::Literal literal(literal_proto);
+  std::unique_ptr<xla::Literal> literal =
+      xla::Literal::CreateFromProto(literal_proto).ConsumeValueOrDie();
   LOG(INFO) << "literal: " << literal_proto.ShortDebugString();
-  fprintf(stderr, "%s\n", literal.ToString().c_str());
+  fprintf(stderr, "%s\n", literal->ToString().c_str());
 }
diff --git a/tensorflow/compiler/xla/tools/show_text_literal.cc b/tensorflow/compiler/xla/tools/show_text_literal.cc
index bbe9902aa17a585c4bad5b732330305dfdd45302..8525873e913185554d18df8c8c3584bfcdcdcabe 100644
--- a/tensorflow/compiler/xla/tools/show_text_literal.cc
+++ b/tensorflow/compiler/xla/tools/show_text_literal.cc
@@ -39,13 +39,13 @@ int main(int argc, char **argv) {
   std::unique_ptr<xla::Literal> literal =
       xla::TextLiteralReader::ReadPath(argv[1]).ConsumeValueOrDie();
 
-  LOG(INFO) << "literal: " << literal->ShortDebugString();
+  LOG(INFO) << "literal: " << *literal;
   fprintf(stderr, "%s\n", literal->ToString().c_str());
   if (literal->shape().element_type() == xla::F32) {
-    float min =
-        *std::min_element(literal->f32s().begin(), literal->f32s().end());
-    float max =
-        *std::max_element(literal->f32s().begin(), literal->f32s().end());
+    float min = *std::min_element(literal->data<float>().begin(),
+                                  literal->data<float>().end());
+    float max = *std::max_element(literal->data<float>().begin(),
+                                  literal->data<float>().end());
     fprintf(stderr, "min: %a=%f\n", min, min);
     fprintf(stderr, "max: %a=%f\n", max, max);
   }
diff --git a/tensorflow/compiler/xla/util.cc b/tensorflow/compiler/xla/util.cc
index e595df3052c3de64de503d7627eff72dcba177ee..fe5d29a6b655a89d559eb1214c2b8dd54d34094c 100644
--- a/tensorflow/compiler/xla/util.cc
+++ b/tensorflow/compiler/xla/util.cc
@@ -191,9 +191,9 @@ std::vector<int64> ComposePermutations(tensorflow::gtl::ArraySlice<int64> p1,
   return output;
 }
 
-bool IsIdentityPermutation(tensorflow::gtl::ArraySlice<int64> p) {
-  for (int64 i = 0; i < p.size(); ++i) {
-    if (p[i] != i) {
+bool IsIdentityPermutation(tensorflow::gtl::ArraySlice<int64> permutation) {
+  for (int64 i = 0; i < permutation.size(); ++i) {
+    if (permutation[i] != i) {
       return false;
     }
   }
diff --git a/tensorflow/compiler/xla/util.h b/tensorflow/compiler/xla/util.h
index b722095d1f38bf8a984c3ce9092a65f8e0baa911..bb2db2010c5e0da6ed3fde628eb5928d555815b2 100644
--- a/tensorflow/compiler/xla/util.h
+++ b/tensorflow/compiler/xla/util.h
@@ -239,11 +239,14 @@ std::vector<T> Permute(tensorflow::gtl::ArraySlice<int64> permutation,
 
 // Override of the above that works around compile failures with gcc 7.1.1.
 // For details see https://github.com/tensorflow/tensorflow/issues/10843
+// Hide this workaround from MSVC as it causes ambiguous error.
+#ifndef _MSC_VER
 template <typename T>
 std::vector<T> Permute(tensorflow::gtl::ArraySlice<int64> permutation,
                        const std::vector<T>& input) {
   return Permute<std::vector, T>(permutation, input);
 }
+#endif
 
 // Inverts a permutation, i.e., output_permutation[input_permutation[i]] = i.
 std::vector<int64> InversePermutation(
@@ -395,6 +398,31 @@ std::vector<std::pair<int64, int64>> CommonFactors(
 // Removes illegal characters from filenames.
 string SanitizeFileName(string file_name);
 
+// Simple wrapper around std::all_of.
+template <typename Container, typename Predicate>
+bool c_all_of(Container container, Predicate predicate) {
+  return std::all_of(std::begin(container), std::end(container), predicate);
+}
+
+// Simple wrapper around std::transform.
+template <typename InputContainer, typename OutputIterator,
+          typename UnaryOperation>
+OutputIterator c_transform(InputContainer input_container,
+                           OutputIterator output_iterator,
+                           UnaryOperation unary_op) {
+  return std::transform(std::begin(input_container), std::end(input_container),
+                        output_iterator, unary_op);
+}
+
+// Simple wrapper around std::copy_if.
+template <class InputContainer, class OutputIterator, class UnaryPredicate>
+OutputIterator c_copy_if(InputContainer input_container,
+                         OutputIterator output_iterator,
+                         UnaryPredicate predicate) {
+  return std::copy_if(std::begin(input_container), std::end(input_container),
+                      output_iterator, predicate);
+}
+
 }  // namespace xla
 
 #define XLA_LOG_LINES(SEV, STRING) \
diff --git a/tensorflow/compiler/xla/window_util.cc b/tensorflow/compiler/xla/window_util.cc
index 2e0eba8de0100fb4e7e45348618febd778c88c9a..224eb2a20c8fc5ac4bfe2bb92a65a3bd178dbaf6 100644
--- a/tensorflow/compiler/xla/window_util.cc
+++ b/tensorflow/compiler/xla/window_util.cc
@@ -88,6 +88,11 @@ string ToString(const Window& window) {
       return StrCat(dim.window_dilation());
     });
   }
+  if (HasWindowReversal(window)) {
+    add_field(" rhs_reversal", [](const WindowDimension& dim) {
+      return StrCat(dim.window_reversal() ? 1 : 0);
+    });
+  }
   return str;
 }
 
@@ -141,10 +146,25 @@ bool HasWindowDilation(const Window& window) {
   return false;
 }
 
+bool HasWindowReversal(const Window& window) {
+  for (const auto& dim : window.dimensions()) {
+    if (dim.window_reversal()) {
+      return true;
+    }
+  }
+  return false;
+}
+
 bool HasDilation(const Window& window) {
   return HasBaseDilation(window) || HasWindowDilation(window);
 }
 
+bool IsInactiveWindowDimension(const Window& window, int64 logical_dim) {
+  const WindowDimension& window_dim = window.dimensions(logical_dim);
+  return window_dim.size() == 1 && window_dim.stride() == 1 &&
+         window_dim.padding_low() == 0 && window_dim.padding_high() == 0;
+}
+
 int64 DilatedBound(int64 bound, int64 dilation) {
   CHECK_GE(bound, 0);
   CHECK_GE(dilation, 1);
diff --git a/tensorflow/compiler/xla/window_util.h b/tensorflow/compiler/xla/window_util.h
index 235cb2d59d451a25dc4f824ab488f8cef6b03bfb..17c388fc0b551ec227802434b7db435c4d25d985 100644
--- a/tensorflow/compiler/xla/window_util.h
+++ b/tensorflow/compiler/xla/window_util.h
@@ -39,6 +39,12 @@ bool HasBaseDilation(const Window& window);
 bool HasWindowDilation(const Window& window);
 bool HasDilation(const Window& window);
 
+bool HasWindowReversal(const Window& window);
+
+// Returns true if the given logical dimension is inactive in the sense that it
+// has window bound 1, no striding and no padding.
+bool IsInactiveWindowDimension(const Window& window, int64 logical_dim);
+
 // Returns the new bound after dilation.
 //
 // If a window with the given bound in some dimension is dilated with the given
diff --git a/tensorflow/compiler/xla/xla.proto b/tensorflow/compiler/xla/xla.proto
index 127e5e81ac6d21945c7125ef913d236e8892758e..fda1a4c27b6dea1b7e4dee76de976f93ba61c007 100644
--- a/tensorflow/compiler/xla/xla.proto
+++ b/tensorflow/compiler/xla/xla.proto
@@ -175,6 +175,10 @@ message DebugOptions {
   // assignments, if available.
   bool xla_hlo_tfgraph_device_scopes = 93;
 
+  // If true, the GPU backend is free to use cudnn for HLO batch normalization
+  // ops.
+  bool xla_gpu_use_cudnn_batchnorm = 94;
+
   // Extra options to pass to the compilation backend; specific interpretation
   // of these values is left to the backend.
   map<string, string> xla_backend_extra_options = 500;
diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto
index 2ba1a2d904e45e582ee4e8a4ea889ee69d55e747..3aea0217539b89b5d60ecfaf2605eee4b69af728 100644
--- a/tensorflow/compiler/xla/xla_data.proto
+++ b/tensorflow/compiler/xla/xla_data.proto
@@ -114,6 +114,17 @@ message PaddingConfig {
   repeated PaddingConfigDimension dimensions = 1;
 }
 
+// A format specifies the method used by a layout to store an array in memory.
+enum Format {
+  INVALID_FORMAT = 0;
+  // The default layout, with exactly one storage location per element (ignoring
+  // padding).
+  DENSE = 1;
+  // A sparsely encoded layout, providing only the index/value pairs of non-zero
+  // elements.
+  SPARSE = 2;
+}
+
 // A layout describes how the array is placed in (1D) memory space.  This
 // includes the minor-to-major ordering of dimensions within a shape, as well as
 // any padding present in those dimensions.
@@ -124,21 +135,30 @@ message PaddingConfig {
 //
 // See the XLA documentation for more information on shapes and layouts.
 message Layout {
+  // The method used to store the data in memory. The format determines which of
+  // the other fields are used by the layout.
+  Format format = 4;
+
   // Sequence of dimension numbers, from minor (fastest varying index) to major
   // (slowest varying index). This field is required.
   repeated int64 minor_to_major = 1;
 
-  // The width to which the layout of each dimension is padded up
-  // to. If present, the size of the padded_dimensions must equal the
-  // rank of the shape. The padding appears at the end of a dimension,
-  // not at the beginning. This kind of padding, unlike padding in
-  // e.g. convolution, is not part of the shape.
+  // The width to which the layout of each dimension is padded up to. If
+  // present, the size of the padded_dimensions must equal the rank of the
+  // shape. The padding appears at the end of a dimension, not at the
+  // beginning. This kind of padding, unlike padding in e.g. convolution, is not
+  // part of the shape. This field must be unset unless the format is DENSE.
   repeated int64 padded_dimensions = 2;
 
-  // Describes the values in the padding specified by
-  // padded_dimensions.
+  // Describes the values in the padding specified by padded_dimensions. This
+  // field must be unset unless the format is DENSE.
   PaddingValue padding_value = 3;
 
+  // The maximum number of elements that can be stored for SPARSE formats.  This
+  // can be used to determine the maximum size in bytes of arrays stored in
+  // memory.  This field must be unset unless the format is SPARSE.
+  int64 max_sparse_elements = 5;
+
   // Important: if any field is added, be sure to modify ShapeUtil::Equal()
   // appropriately to account for the new field.
 }
@@ -321,7 +341,8 @@ message LiteralProto {
   // The F16s and BF16s are encoded in little endian byte order
   bytes f16s = 11;
   bytes bf16s = 13;
-  // Next = 14
+  repeated int64 sparse_indices = 14;
+  // Next = 15
 }
 
 message WindowDimension {
@@ -498,6 +519,23 @@ message CustomCallRequest {
   Shape shape = 4;
 }
 
+message DotDimensionNumbers {
+  // The dimension numbers that represent the 'lhs' contracting dimensions.
+  repeated int64 lhs_contracting_dimensions = 1;
+  // The dimension numbers that represent the 'rhs' contracting dimensions.
+  repeated int64 rhs_contracting_dimensions = 2;
+  // The dimension numbers that represent the 'lhs' batch dimensions.
+  repeated int64 lhs_batch_dimensions = 3;
+  // The dimension numbers that represent the 'rhs' batch dimensions.
+  repeated int64 rhs_batch_dimensions = 4;
+};
+
+message DotRequest {
+  ComputationDataHandle lhs = 2;
+  ComputationDataHandle rhs = 3;
+  DotDimensionNumbers dimension_numbers = 4;
+}
+
 message MapRequest {
   repeated ComputationDataHandle operands = 2;
   ComputationHandle to_apply = 3;
@@ -651,6 +689,14 @@ message ConcatenateRequest {
   int64 dimension = 3;
 }
 
+message ConditionalRequest {
+  ComputationDataHandle predicate = 2;
+  ComputationDataHandle true_operand = 3;
+  ComputationHandle true_computation = 4;
+  ComputationDataHandle false_operand = 5;
+  ComputationHandle false_computation = 6;
+}
+
 message WhileRequest {
   ComputationHandle condition = 2;
   ComputationHandle body = 3;
@@ -732,9 +778,6 @@ enum BinaryOperation {
   BINOP_LT = 9;
   BINOP_NE = 10;
 
-  // Dot product, matrix multiply.
-  BINOP_DOT = 12;
-
   // Element-wise maximum.
   BINOP_MAX = 14;
 
@@ -780,9 +823,7 @@ enum RandomDistribution {
   // parameter[0] and standard deviation parameter[1].
   RNG_NORMAL = 2;
 
-  // Creates a Bernoulli-distribution-generated random number with mean
-  // parameter[0].
-  RNG_BERNOULLI = 3;
+  // Next: 4
 }
 
 message RngRequest {
@@ -885,6 +926,7 @@ message OpRequest {
     ConvolveRequest convolve_request = 8;
     CrossReplicaSumRequest cross_replica_sum_request = 9;
     CustomCallRequest custom_call_request = 10;
+    DotRequest dot_request = 43;
     DynamicSliceRequest dynamic_slice_request = 11;
     DynamicUpdateSliceRequest dynamic_update_slice_request = 12;
     GetTupleElementRequest get_tuple_element_request = 13;
@@ -914,7 +956,8 @@ message OpRequest {
     BatchNormInferenceRequest batch_norm_inference_request = 38;
     FftRequest fft_request = 41;
     ConvertRequest bitcast_convert_request = 42;
-    // Next: 43
+    ConditionalRequest conditional_request = 44;
+    // Next: 45
   }
 }
 
diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index 61f7821519bc1d053ee3b273a6b36b9dbd973245..8bed0fabd743c9cf9a51fe574401ae42730d15b4 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -6,10 +6,16 @@ licenses(["notice"])  # Apache 2.0
 package(default_visibility = ["//tensorflow:__subpackages__"])
 
 load("//third_party/mpi:mpi.bzl", "if_mpi")
+load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
 
 py_library(
     name = "contrib_py",
-    srcs = glob(["**/*.py"]),
+    srcs = glob(
+        ["**/*.py"],
+        exclude = [
+            "**/*_test.py",
+        ],
+    ),
     srcs_version = "PY2AND3",
     visibility = ["//visibility:public"],
     deps = [
@@ -19,6 +25,7 @@ py_library(
         "//tensorflow/contrib/boosted_trees:init_py",
         "//tensorflow/contrib/cloud:cloud_py",
         "//tensorflow/contrib/cluster_resolver:cluster_resolver_py",
+        "//tensorflow/contrib/coder:coder_ops_py",
         "//tensorflow/contrib/compiler:compiler_py",
         "//tensorflow/contrib/copy_graph:copy_graph_py",
         "//tensorflow/contrib/crf:crf_py",
@@ -48,6 +55,7 @@ py_library(
         "//tensorflow/contrib/layers:layers_py",
         "//tensorflow/contrib/learn",
         "//tensorflow/contrib/legacy_seq2seq:seq2seq_py",
+        "//tensorflow/contrib/libsvm",
         "//tensorflow/contrib/linalg:linalg_py",
         "//tensorflow/contrib/linear_optimizer:sdca_estimator_py",
         "//tensorflow/contrib/linear_optimizer:sdca_ops_py",
@@ -68,6 +76,7 @@ py_library(
         "//tensorflow/contrib/predictor",
         "//tensorflow/contrib/quantization:quantization_py",
         "//tensorflow/contrib/quantize:quantize_graph",
+        "//tensorflow/contrib/receptive_field:receptive_field_py",
         "//tensorflow/contrib/reduce_slice_ops:reduce_slice_ops_py",
         "//tensorflow/contrib/remote_fused_graph/pylib:remote_fused_graph_ops_py",
         "//tensorflow/contrib/resampler:resampler_py",
@@ -95,7 +104,7 @@ py_library(
         "//tensorflow/contrib/training:training_py",
         "//tensorflow/contrib/util:util_py",
         "//tensorflow/python:util",
-    ] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_ops_py"]),
+    ] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_collectives_py"]),
 )
 
 cc_library(
@@ -104,11 +113,11 @@ cc_library(
     deps = [
         "//tensorflow/contrib/batching:batch_ops_kernels",
         "//tensorflow/contrib/boosted_trees:boosted_trees_kernels",
+        "//tensorflow/contrib/coder:all_kernels",
         "//tensorflow/contrib/cudnn_rnn:cudnn_rnn_kernels",
         "//tensorflow/contrib/factorization/kernels:all_kernels",
         "//tensorflow/contrib/input_pipeline:input_pipeline_ops_kernels",
         "//tensorflow/contrib/layers:sparse_feature_cross_op_kernel",
-        "//tensorflow/contrib/nccl:nccl_kernels",
         "//tensorflow/contrib/nearest_neighbor:nearest_neighbor_ops_kernels",
         "//tensorflow/contrib/rnn:all_kernels",
         "//tensorflow/contrib/seq2seq:beam_search_ops_kernels",
@@ -116,7 +125,9 @@ cc_library(
         "//tensorflow/contrib/tensor_forest:stats_ops_kernels",
         "//tensorflow/contrib/tensor_forest:tensor_forest_kernels",
         "//tensorflow/contrib/text:all_kernels",
-    ],
+    ] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_collectives_py"]) + if_cuda([
+        "//tensorflow/contrib/nccl:nccl_kernels",
+    ]),
 )
 
 cc_library(
@@ -125,6 +136,7 @@ cc_library(
     deps = [
         "//tensorflow/contrib/batching:batch_ops_op_lib",
         "//tensorflow/contrib/boosted_trees:boosted_trees_ops_op_lib",
+        "//tensorflow/contrib/coder:all_ops",
         "//tensorflow/contrib/cudnn_rnn:cudnn_rnn_ops_op_lib",
         "//tensorflow/contrib/factorization:all_ops",
         "//tensorflow/contrib/framework:all_ops",
diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py
index 08247c6b38a4df663ad28a6b4d3c41a1da41a020..f600a8a99816586d6bd7d7ab51354888c435e739 100644
--- a/tensorflow/contrib/__init__.py
+++ b/tensorflow/contrib/__init__.py
@@ -22,6 +22,7 @@ from __future__ import print_function
 from tensorflow.contrib import bayesflow
 from tensorflow.contrib import cloud
 from tensorflow.contrib import cluster_resolver
+from tensorflow.contrib import coder
 from tensorflow.contrib import compiler
 from tensorflow.contrib import copy_graph
 from tensorflow.contrib import crf
@@ -82,13 +83,14 @@ from tensorflow.contrib import util
 from tensorflow.contrib.eager.python import tfe as eager
 from tensorflow.contrib.lite.python import lite
 from tensorflow.contrib.ndlstm import python as ndlstm
+from tensorflow.contrib.receptive_field import receptive_field_api as receptive_field
 from tensorflow.contrib.remote_fused_graph import pylib as remote_fused_graph
 from tensorflow.contrib.specs import python as specs
 from tensorflow.contrib.summary import summary
 
 from tensorflow.python.util.lazy_loader import LazyLoader
-ffmpeg = LazyLoader("ffmpeg",
-                    globals(), "tensorflow.contrib.ffmpeg")
+ffmpeg = LazyLoader("ffmpeg", globals(),
+                    "tensorflow.contrib.ffmpeg")
 del LazyLoader
 
 del absolute_import
diff --git a/tensorflow/contrib/all_reduce/python/all_reduce.py b/tensorflow/contrib/all_reduce/python/all_reduce.py
index a5057da9fd43a88575813613d6ac9d17fd2b2e28..28f60b34996945d573facc665c01d0bc10cf5cd1 100644
--- a/tensorflow/contrib/all_reduce/python/all_reduce.py
+++ b/tensorflow/contrib/all_reduce/python/all_reduce.py
@@ -744,13 +744,13 @@ def _build_nccl_hybrid(input_tensors, red_op, upper_level_f):
   level_2_output = upper_level_f(up_values)
   # Third stage: propagate within each worker using NCCL Broadcast
   for w in range(0, num_workers):
-    dst_devices = per_worker_devices[w][1:]
-    send_op, dst_tensors = nccl.broadcast(level_2_output[w], dst_devices)
-    # NOTE: need control dependency to ensure send_op executes
-    with ops.control_dependencies([send_op]):
-      with ops.device(per_worker_devices[w][0]):
-        dst_tensors.insert(0, array_ops.identity(level_2_output[w]))
-        down_values[w] = dst_tensors
+    dst_tensors = []
+    with ops.device(per_worker_devices[w][0]):
+      broadcast_src = nccl.broadcast(array_ops.identity(level_2_output[w]))
+    for d in per_worker_devices[w]:
+      with ops.device(d):
+        dst_tensors.append(array_ops.identity(broadcast_src))
+    down_values[w] = dst_tensors
   output_tensors = [v for sublist in down_values for v in sublist]
   if len(shape) > 1:
     output_tensors = _reshape_tensors(output_tensors, shape)
diff --git a/tensorflow/contrib/android/README.md b/tensorflow/contrib/android/README.md
index f49e5857fe5255c2459793cb1389052a2ff5f88f..b8d73bf24ce60e0b3850d4f39ac9e6d6c2194a02 100644
--- a/tensorflow/contrib/android/README.md
+++ b/tensorflow/contrib/android/README.md
@@ -15,9 +15,9 @@ For prebuilt libraries, see the
 page for a recent build.
 
 The TensorFlow Inference Interface is also available as a
-[JCenter package](https://bintray.com/google/tensorflow/tensorflow-android) and
-can be included quite simply in your android project with a couple of lines in
-the project's `build.gradle` file:
+[JCenter package](https://bintray.com/google/tensorflow/tensorflow)
+(see the tensorflow-android directory) and can be included quite simply in your
+android project with a couple of lines in the project's `build.gradle` file:
 
 ```
 allprojects {
@@ -32,9 +32,9 @@ dependencies {
 ```
 
 This will tell Gradle to use the
-[latest version](https://bintray.com/google/tensorflow/tensorflow-android/_latestVersion)
+[latest version](https://bintray.com/google/tensorflow/tensorflow/_latestVersion)
 of the TensorFlow AAR that has been released to
-[https://bintray.com/google/tensorflow/tensorflow-android](https://bintray.com/google/tensorflow/tensorflow-android).
+[JCenter](https://jcenter.bintray.com/org/tensorflow/tensorflow-android/).
 You may replace the `+` with an explicit version label if you wish to
 use a specific release of TensorFlow in your app.
 
diff --git a/tensorflow/contrib/android/cmake/CMakeLists.txt b/tensorflow/contrib/android/cmake/CMakeLists.txt
index aba356d6167658f125001cbed6e3190c716ee7d6..a115d1610e2334a6626f29674f3dd195e3a3c648 100644
--- a/tensorflow/contrib/android/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/android/cmake/CMakeLists.txt
@@ -34,6 +34,8 @@ add_library(lib_tf STATIC IMPORTED )
 set_target_properties(lib_tf PROPERTIES IMPORTED_LOCATION
         ${PREBUILT_DIR}/lib/libtensorflow-core.a)
 # Change to compile flags should be replicated into bazel build file
+# TODO: Consider options other than -O2 for binary size.
+#       e.g. -Os for gcc, and -Oz for clang.
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIS_SLIM_BUILD \
                      -std=c++11 -fno-rtti -fno-exceptions \
                      -O2 -Wno-narrowing -fomit-frame-pointer \
diff --git a/tensorflow/contrib/batching/BUILD b/tensorflow/contrib/batching/BUILD
index a111cfecb366fe245150cc71d2c43662d0d69090..cd98f0e70335db715b8cb6c76a9d7df3e2280552 100644
--- a/tensorflow/contrib/batching/BUILD
+++ b/tensorflow/contrib/batching/BUILD
@@ -12,7 +12,7 @@ cc_library(
     name = "batch_scheduler_hdrs",
     hdrs = ["batch_scheduler.h"],
     deps = [
-        "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/core/kernels/batching_util:batch_scheduler_hdrs",
     ],
 )
 
@@ -20,18 +20,7 @@ cc_library(
     name = "batch_scheduler",
     hdrs = ["batch_scheduler.h"],
     deps = [
-        "//tensorflow/core:lib",
-    ],
-)
-
-tf_cc_test(
-    name = "batch_scheduler_test",
-    srcs = ["batch_scheduler_test.cc"],
-    deps = [
-        ":batch_scheduler",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
+        "//tensorflow/core/kernels/batching_util:batch_scheduler",
     ],
 )
 
@@ -39,9 +28,7 @@ cc_library(
     name = "shared_batch_scheduler_hdrs",
     hdrs = ["shared_batch_scheduler.h"],
     deps = [
-        ":batch_scheduler_hdrs",
-        "//tensorflow/contrib/batching/util:periodic_function_dynamic",
-        "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/core/kernels/batching_util:shared_batch_scheduler_hdrs",
     ],
 )
 
@@ -49,46 +36,16 @@ cc_library(
     name = "shared_batch_scheduler",
     hdrs = ["shared_batch_scheduler.h"],
     deps = [
-        ":batch_scheduler",
-        "//tensorflow/contrib/batching/util:periodic_function_dynamic",
-        "//tensorflow/core:lib",
+        "//tensorflow/core/kernels/batching_util:shared_batch_scheduler",
     ],
     alwayslink = 1,
 )
 
-tf_cc_test(
-    name = "shared_batch_scheduler_test",
-    srcs = ["shared_batch_scheduler_test.cc"],
-    deps = [
-        ":shared_batch_scheduler",
-        "//tensorflow/contrib/batching/test_util:fake_clock_env",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-    ],
-)
-
 cc_library(
     name = "adaptive_shared_batch_scheduler",
     hdrs = ["adaptive_shared_batch_scheduler.h"],
     deps = [
-        ":batch_scheduler",
-        "//tensorflow/contrib/batching/util:periodic_function_dynamic",
-        "//tensorflow/core:lib",
-    ],
-)
-
-tf_cc_test(
-    name = "adaptive_shared_batch_scheduler_test",
-    srcs = ["adaptive_shared_batch_scheduler_test.cc"],
-    tags = ["manual"],  # b/69013768
-    deps = [
-        ":adaptive_shared_batch_scheduler",
-        "//tensorflow/contrib/batching/test_util:fake_clock_env",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
+        "//tensorflow/core/kernels/batching_util:adaptive_shared_batch_scheduler",
     ],
 )
 
@@ -96,34 +53,7 @@ cc_library(
     name = "basic_batch_scheduler",
     hdrs = ["basic_batch_scheduler.h"],
     deps = [
-        ":shared_batch_scheduler",
-    ],
-)
-
-tf_cc_test(
-    name = "basic_batch_scheduler_test",
-    srcs = ["basic_batch_scheduler_test.cc"],
-    deps = [
-        ":basic_batch_scheduler",
-        ":batch_scheduler",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-    ],
-)
-
-tf_cc_test(
-    name = "basic_batch_scheduler_benchmark",
-    srcs = ["basic_batch_scheduler_benchmark.cc"],
-    tags = [
-        "local",
-        "manual",
-    ],
-    deps = [
-        ":basic_batch_scheduler",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:tensorflow",
-        "//tensorflow/core:test",
+        "//tensorflow/core/kernels/batching_util:basic_batch_scheduler",
     ],
 )
 
diff --git a/tensorflow/contrib/batching/adaptive_shared_batch_scheduler.h b/tensorflow/contrib/batching/adaptive_shared_batch_scheduler.h
index 6ed177e001758ad8c566c7965e1ec10ae5235fc8..60861f83f450d3f67f21a46bdfa3fda223b9d2b4 100644
--- a/tensorflow/contrib/batching/adaptive_shared_batch_scheduler.h
+++ b/tensorflow/contrib/batching/adaptive_shared_batch_scheduler.h
@@ -16,447 +16,6 @@ limitations under the License.
 #ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BATCHING_ADAPTIVE_SHARED_BATCH_SCHEDULER_H_
 #define THIRD_PARTY_TENSORFLOW_CONTRIB_BATCHING_ADAPTIVE_SHARED_BATCH_SCHEDULER_H_
 
-#include <functional>
-#include <memory>
-#include <queue>
-#include <unordered_map>
-#include <vector>
-
-#include "tensorflow/contrib/batching/batch_scheduler.h"
-#include "tensorflow/contrib/batching/util/periodic_function.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/threadpool.h"
-#include "tensorflow/core/platform/cpu_info.h"
-#include "tensorflow/core/platform/env.h"
-#include "tensorflow/core/platform/thread_annotations.h"
-#include "tensorflow/core/platform/types.h"
-
-namespace tensorflow {
-namespace serving {
-namespace internal {
-template <typename TaskType>
-class ASBSBatch;
-
-template <typename TaskType>
-class ASBSQueue;
-}  // namespace internal
-
-// Shared batch scheduler designed to minimize latency. The scheduler keeps
-// track of a number of queues (one per model or model version) which are
-// continuously enqueuing requests. The scheduler groups the requests into
-// batches which it periodically sends off for processing (see
-// shared_batch_scheduler.h for more details). The AdaptiveSharedBatchScheduler
-// prioritizes batches by age (i.e. the batch's oldest request) irrespective of
-// queue. The scheduler will process the oldest batch at an adjustable rate,
-// regardless of batch size. The user can provide feedback to help set this rate
-// to achieve some goal (i.e. minimize overall latency, limit cpu usage, etc).
-//
-// The rate (or rather, the corresponding period) is adjusted each time a batch
-// is processed, using an exponentially weighted moving average to smooth
-// potentially noisy feedback:
-// ewma_feedback = ((N - 1) * ewma_feedback + feedback()) / N
-// period *= (1 + K * emwa_feedback)
-//
-// Some potential use cases:
-// Hardware Accelerators (GPUs & TPUs) - If some phase of batch processing
-//   involves serial processing by a device, from a latency perspective it is
-//   desirable to keep the device evenly loaded, avoiding the need to wait for
-//   the device to process prior batches.
-//   feedback = num_pending_on_device() - desired_pending.
-// CPU utilization - If the batch processing is cpu dominated, you can reap
-//   latency gains when underutilized by increasing the processing rate, but
-//   back the rate off when the load increases to avoid overload.
-//   feedback = cpu_rate() - desired_cpu_rate.
-
-template <typename TaskType>
-class AdaptiveSharedBatchScheduler
-    : public std::enable_shared_from_this<
-          AdaptiveSharedBatchScheduler<TaskType>> {
- public:
-  struct Options {
-    // The name to use for the pool of batch threads.
-    string thread_pool_name = {"batch_threads"};
-    // Number of batch processing threads; equivalently the maximum number of
-    // concurrently running batches.
-    int64 num_batch_threads = port::NumSchedulableCPUs();
-    // The environment to use (typically only overridden by test code).
-    Env* env = Env::Default();
-    // Initial batch scheduling period in microseconds. Will be altered for
-    // non-zero rate_feedback.
-    double initial_scheduling_period_micros = 500;
-    // Minimum batch scheduling period in microseconds. Recommend setting this
-    // value greater than 0, otherwise it may take a while to recover from a
-    // sustained time of negative scheduling_period_feedback (which may occur
-    // under low load).
-    double min_scheduling_period_micros = 100;
-    // Maximum batch scheduling period in microseconds.
-    double max_scheduling_period_micros = 10000;
-    // Feedback function used to modify the scheduling period each time a batch
-    // is scheduled.  Should return values roughly O(1), with positive values
-    // resulting in an increased period.
-    std::function<double()> scheduling_period_feedback{[] { return 0.; }};
-    // To handle potentially noisy scheduling_period_feedback, the period is
-    // adjusted using an exponentially weighted moving average over the previous
-    // feedback_smoothing_batches batches.  Must be greater than 0.
-    int64 feedback_smoothing_batches = 10;
-  };
-
-  // Ownership is shared between the caller of Create() and any queues created
-  // via AddQueue().
-  static Status Create(
-      const Options& options,
-      std::shared_ptr<AdaptiveSharedBatchScheduler<TaskType>>* scheduler);
-
-  struct QueueOptions {
-    // Maximum size of each batch.
-    int max_batch_size = 1000;
-    // Maximum number of enqueued (i.e. non-scheduled) batches.
-    int max_enqueued_batches = 10;
-  };
-
-  using BatchProcessor = std::function<void(std::unique_ptr<Batch<TaskType>>)>;
-
-  // Adds queue (and its callback) to be managed by this scheduler.
-  Status AddQueue(const QueueOptions& options,
-                  BatchProcessor process_batch_callback,
-                  std::unique_ptr<BatchScheduler<TaskType>>* queue);
-
- private:
-  // access to AddBatch, RemoveQueue, GetEnv.
-  friend class internal::ASBSQueue<TaskType>;
-
-  explicit AdaptiveSharedBatchScheduler(const Options& options);
-
-  // Batch scheduling function which runs every scheduling_period_ microseconds.
-  void ProcessOneBatch();
-
-  // Notifies scheduler of non-empty batch which is eligible for processing.
-  void AddBatch(internal::ASBSBatch<TaskType>*);
-
-  // Removes queue from scheduler.
-  void RemoveQueue(const internal::ASBSQueue<TaskType>* queue);
-
-  Env* GetEnv() const { return options_.env; }
-
-  const Options options_;
-
-  struct BatchCompare {
-    bool operator()(const internal::ASBSBatch<TaskType>* a,
-                    const internal::ASBSBatch<TaskType>* b);
-  };
-
-  // Collection of batches added by AddBatch, ordered by age. Owned by scheduler
-  // until they are released for processing.
-  std::priority_queue<const internal::ASBSBatch<TaskType>*,
-                      std::vector<internal::ASBSBatch<TaskType>*>, BatchCompare>
-      batches_ GUARDED_BY(mu_);
-
-  // Unowned queues and callbacks added by AddQueue.
-  std::unordered_map<const internal::ASBSQueue<TaskType>*, BatchProcessor>
-      queues_and_callbacks_ GUARDED_BY(mu_);
-
-  mutex mu_;
-
-  // Responsible for running ProcessOneBatch. PeriodicFunction was used in order
-  // to check for deletion so that the thread can be shut down.
-  std::unique_ptr<PeriodicFunction> scheduling_thread_;
-
-  // Responsible for running the batch processing callbacks.
-  std::unique_ptr<thread::ThreadPool> batch_thread_pool_;
-
-  // Time interval in microseconds between successive ProcessOneBatch calls.
-  double scheduling_period_;
-
-  // Exponentially weighted moving average of
-  // options_.scheduling_period_feedback() evaluated in each ProcessOneBatch
-  // call.
-  double ewma_feedback_ = 0;
-
-  TF_DISALLOW_COPY_AND_ASSIGN(AdaptiveSharedBatchScheduler);
-};
-
-//////////////////////////////////////////////////////////
-// Implementation details follow. API users need not read.
-
-namespace internal {
-// Consolidates tasks into batches, passing them off to the
-// AdaptiveSharedBatchScheduler for processing.
-template <typename TaskType>
-class ASBSQueue : public BatchScheduler<TaskType> {
- public:
-  using QueueOptions =
-      typename AdaptiveSharedBatchScheduler<TaskType>::QueueOptions;
-
-  ASBSQueue(std::shared_ptr<AdaptiveSharedBatchScheduler<TaskType>> scheduler,
-            const QueueOptions& options);
-
-  ~ASBSQueue() override;
-
-  // Adds task to current batch. Fails if the task size is larger than the batch
-  // size or if the current batch is full and this queue's number of outstanding
-  // batches is at its maximum.
-  Status Schedule(std::unique_ptr<TaskType>* task) override;
-
-  // Number of tasks waiting to be scheduled.
-  size_t NumEnqueuedTasks() const override;
-
-  // Number of size 1 tasks which could currently be scheduled without failing.
-  size_t SchedulingCapacity() const override;
-
-  // Notifies queue that a batch is about to be scheduled; the queue should not
-  // place any more tasks in this batch.
-  void ReleaseBatch(const ASBSBatch<TaskType>* batch);
-
- private:
-  std::shared_ptr<AdaptiveSharedBatchScheduler<TaskType>> scheduler_;
-  const QueueOptions options_;
-  // Owned by scheduler_.
-  ASBSBatch<TaskType>* current_batch_ GUARDED_BY(mu_) = nullptr;
-  int64 num_enqueued_batches_ GUARDED_BY(mu_) = 0;
-  int64 num_enqueued_tasks_ GUARDED_BY(mu_) = 0;
-  mutable mutex mu_;
-  TF_DISALLOW_COPY_AND_ASSIGN(ASBSQueue);
-};
-
-// Batch which remembers when and by whom it was created.
-template <typename TaskType>
-class ASBSBatch : public Batch<TaskType> {
- public:
-  ASBSBatch(ASBSQueue<TaskType>* queue, int64 creation_time_micros)
-      : queue_(queue), creation_time_micros_(creation_time_micros) {}
-
-  ~ASBSBatch() override {}
-
-  ASBSQueue<TaskType>* queue() const { return queue_; }
-
-  int64 creation_time_micros() const { return creation_time_micros_; }
-
- private:
-  ASBSQueue<TaskType>* queue_;
-  const int64 creation_time_micros_;
-  TF_DISALLOW_COPY_AND_ASSIGN(ASBSBatch);
-};
-}  // namespace internal
-
-// ---------------- AdaptiveSharedBatchScheduler ----------------
-
-template <typename TaskType>
-Status AdaptiveSharedBatchScheduler<TaskType>::Create(
-    const Options& options,
-    std::shared_ptr<AdaptiveSharedBatchScheduler<TaskType>>* scheduler) {
-  if (options.num_batch_threads < 1) {
-    return errors::InvalidArgument("num_batch_threads must be positive; was ",
-                                   options.num_batch_threads);
-  }
-  if (options.min_scheduling_period_micros < 0) {
-    return errors::InvalidArgument(
-        "min_scheduling_period_micros must be >= 0; was ",
-        options.min_scheduling_period_micros);
-  }
-  if (options.min_scheduling_period_micros >
-      options.initial_scheduling_period_micros) {
-    return errors::InvalidArgument(
-        "initial_scheduling_period_micros (",
-        options.initial_scheduling_period_micros,
-        ") must be >= min_scheduling_period_micros (",
-        options.min_scheduling_period_micros, ")");
-  }
-  if (options.initial_scheduling_period_micros >
-      options.max_scheduling_period_micros) {
-    return errors::InvalidArgument(
-        "initial_scheduling_period_micros (",
-        options.initial_scheduling_period_micros,
-        ") must be <= max_scheduling_period_micros (",
-        options.max_scheduling_period_micros, ")");
-  }
-  if (options.feedback_smoothing_batches < 1) {
-    return errors::InvalidArgument(
-        "feedback_smoothing_batches must be positive; was ",
-        options.feedback_smoothing_batches);
-  }
-  scheduler->reset(new AdaptiveSharedBatchScheduler<TaskType>(options));
-  return Status::OK();
-}
-
-template <typename TaskType>
-AdaptiveSharedBatchScheduler<TaskType>::AdaptiveSharedBatchScheduler(
-    const Options& options)
-    : options_(options),
-      scheduling_period_(options.initial_scheduling_period_micros) {
-  PeriodicFunction::Options opts;
-  opts.thread_name_prefix = "scheduling_thread";
-  opts.env = GetEnv();
-  scheduling_thread_.reset(
-      new PeriodicFunction([this] { ProcessOneBatch(); }, 0, opts));
-  batch_thread_pool_.reset(new thread::ThreadPool(
-      GetEnv(), options.thread_pool_name, options.num_batch_threads));
-}
-
-template <typename TaskType>
-Status AdaptiveSharedBatchScheduler<TaskType>::AddQueue(
-    const QueueOptions& options, BatchProcessor process_batch_callback,
-    std::unique_ptr<BatchScheduler<TaskType>>* queue) {
-  if (options.max_batch_size <= 0) {
-    return errors::InvalidArgument("max_batch_size must be positive; was ",
-                                   options.max_batch_size);
-  }
-  if (options.max_enqueued_batches <= 0) {
-    return errors::InvalidArgument(
-        "max_enqueued_batches must be positive; was ",
-        options.max_enqueued_batches);
-  }
-  internal::ASBSQueue<TaskType>* asbs_queue_raw;
-  queue->reset(asbs_queue_raw = new internal::ASBSQueue<TaskType>(
-                   this->shared_from_this(), options));
-  mutex_lock l(mu_);
-  queues_and_callbacks_[asbs_queue_raw] = process_batch_callback;
-  return Status::OK();
-}
-
-template <typename TaskType>
-void AdaptiveSharedBatchScheduler<TaskType>::AddBatch(
-    internal::ASBSBatch<TaskType>* batch) {
-  mutex_lock l(mu_);
-  batches_.push(batch);
-}
-
-template <typename TaskType>
-void AdaptiveSharedBatchScheduler<TaskType>::RemoveQueue(
-    const internal::ASBSQueue<TaskType>* queue) {
-  mutex_lock l(mu_);
-  queues_and_callbacks_.erase(queue);
-}
-
-template <typename TaskType>
-void AdaptiveSharedBatchScheduler<TaskType>::ProcessOneBatch() {
-  static const double kFeedbackMultiplier = .001;
-  internal::ASBSBatch<TaskType>* batch = nullptr;
-  BatchProcessor callback;
-  const int64 start_time_micros = GetEnv()->NowMicros();
-  {
-    mutex_lock l(mu_);
-    if (!batches_.empty()) {
-      batch = batches_.top();
-      batches_.pop();
-      callback = queues_and_callbacks_[batch->queue()];
-    }
-  }
-  if (batch != nullptr) {
-    double feedback = options_.scheduling_period_feedback();
-    const int64 N = options_.feedback_smoothing_batches;
-    ewma_feedback_ = ((N - 1) * ewma_feedback_ + feedback) / N;
-    scheduling_period_ *= (1 + kFeedbackMultiplier * ewma_feedback_);
-    if (scheduling_period_ < options_.min_scheduling_period_micros) {
-      scheduling_period_ = options_.min_scheduling_period_micros;
-    } else if (scheduling_period_ > options_.max_scheduling_period_micros) {
-      scheduling_period_ = options_.max_scheduling_period_micros;
-    }
-    // Queue may destroy itself after ReleaseBatch is called.
-    batch->queue()->ReleaseBatch(batch);
-    batch_thread_pool_->Schedule([callback, batch] {
-      callback(std::unique_ptr<Batch<TaskType>>(batch));
-    });
-  }
-  const int64 sleep_time =
-      scheduling_period_ - (GetEnv()->NowMicros() - start_time_micros);
-  if (sleep_time > 0) {
-    GetEnv()->SleepForMicroseconds(sleep_time);
-  }
-}
-
-template <typename TaskType>
-bool AdaptiveSharedBatchScheduler<TaskType>::BatchCompare::operator()(
-    const internal::ASBSBatch<TaskType>* a,
-    const internal::ASBSBatch<TaskType>* b) {
-  return a->creation_time_micros() > b->creation_time_micros();
-}
-
-// ---------------- ASBSQueue ----------------
-
-namespace internal {
-template <typename TaskType>
-ASBSQueue<TaskType>::ASBSQueue(
-    std::shared_ptr<AdaptiveSharedBatchScheduler<TaskType>> scheduler,
-    const QueueOptions& options)
-    : scheduler_(scheduler), options_(options) {}
-
-template <typename TaskType>
-ASBSQueue<TaskType>::~ASBSQueue() {
-  // Wait until last batch has been scheduled.
-  const int kSleepMicros = 1000;
-  for (;;) {
-    {
-      mutex_lock l(mu_);
-      if (num_enqueued_batches_ == 0) {
-        break;
-      }
-    }
-    scheduler_->GetEnv()->SleepForMicroseconds(kSleepMicros);
-  }
-  scheduler_->RemoveQueue(this);
-}
-
-template <typename TaskType>
-Status ASBSQueue<TaskType>::Schedule(std::unique_ptr<TaskType>* task) {
-  ASBSBatch<TaskType>* new_batch = nullptr;
-  size_t size = (*task)->size();
-  if (size > options_.max_batch_size) {
-    return errors::InvalidArgument("Task size ", size,
-                                   " is larger than maximum batch size ",
-                                   options_.max_batch_size);
-  }
-  {
-    mutex_lock l(mu_);
-    // Current batch is full, create another if allowed.
-    if (current_batch_ &&
-        current_batch_->size() + size > options_.max_batch_size) {
-      if (num_enqueued_batches_ >= options_.max_enqueued_batches) {
-        return errors::Unavailable("The batch scheduling queue is full");
-      }
-      current_batch_->Close();
-      current_batch_ = nullptr;
-    }
-    if (!current_batch_) {
-      num_enqueued_batches_++;
-      current_batch_ = new_batch =
-          new ASBSBatch<TaskType>(this, scheduler_->GetEnv()->NowMicros());
-    }
-    current_batch_->AddTask(std::move(*task));
-    num_enqueued_tasks_++;
-  }
-  if (new_batch != nullptr) scheduler_->AddBatch(new_batch);
-  return Status::OK();
-}
-
-template <typename TaskType>
-void ASBSQueue<TaskType>::ReleaseBatch(const ASBSBatch<TaskType>* batch) {
-  mutex_lock l(mu_);
-  num_enqueued_batches_--;
-  num_enqueued_tasks_ -= batch->num_tasks();
-  if (batch == current_batch_) {
-    current_batch_->Close();
-    current_batch_ = nullptr;
-  }
-}
-
-template <typename TaskType>
-size_t ASBSQueue<TaskType>::NumEnqueuedTasks() const {
-  mutex_lock l(mu_);
-  return num_enqueued_tasks_;
-}
-
-template <typename TaskType>
-size_t ASBSQueue<TaskType>::SchedulingCapacity() const {
-  mutex_lock l(mu_);
-  const int current_batch_capacity =
-      current_batch_ ? options_.max_batch_size - current_batch_->size() : 0;
-  const int spare_batches =
-      options_.max_enqueued_batches - num_enqueued_batches_;
-  return spare_batches * options_.max_batch_size + current_batch_capacity;
-}
-}  // namespace internal
-}  // namespace serving
-}  // namespace tensorflow
+#include "tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h"
 
 #endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_BATCHING_ADAPTIVE_SHARED_BATCH_SCHEDULER_H_
diff --git a/tensorflow/contrib/batching/basic_batch_scheduler.h b/tensorflow/contrib/batching/basic_batch_scheduler.h
index 9d3805fbaf39978159dd2f4a754e6d41a07acf6a..63ba8fcf45d8e6caad14c267bb19c0bc4eea20bf 100644
--- a/tensorflow/contrib/batching/basic_batch_scheduler.h
+++ b/tensorflow/contrib/batching/basic_batch_scheduler.h
@@ -16,249 +16,6 @@ limitations under the License.
 #ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BATCHING_BASIC_BATCH_SCHEDULER_H_
 #define THIRD_PARTY_TENSORFLOW_CONTRIB_BATCHING_BASIC_BATCH_SCHEDULER_H_
 
-#include <stddef.h>
-#include <cstddef>
-#include <functional>
-#include <memory>
-#include <string>
-
-#include "tensorflow/contrib/batching/shared_batch_scheduler.h"
-
-namespace tensorflow {
-namespace serving {
-
-// A BatchScheduler implementation geared toward handling a single request type
-// running on a specific set of hardware resources. A typical scenario is one in
-// which all requests invoke the same machine-learned model on one GPU.
-//
-// If there are, say, two GPUs and two models each bound to one of the GPUs, one
-// could use two BasicBatchScheduler instances to schedule the two model/GPU
-// combinations independently. If multiple models must share a given GPU or
-// other hardware resource, consider using SharedBatchScheduler instead.
-//
-//
-// PARAMETERS AND BEHAVIOR:
-//
-// BasicBatchScheduler runs a fixed pool of threads, which it uses to process
-// batches of tasks. It enforces a maximum batch size, and enqueues a bounded
-// number of tasks. If the queue is nearly empty, such that a full batch cannot
-// be formed, when a thread becomes free, it anyway schedules a batch
-// immediately if a task has been in the queue for longer than a given timeout
-// parameter. If the timeout parameter is set to 0, then the batch threads will
-// always be kept busy (unless there are zero tasks waiting to be processed).
-//
-// For online serving, it is recommended to set the maximum number of enqueued
-// batches worth of tasks equal to the number of batch threads, which allows
-// enqueuing of enough tasks s.t. if every thread becomes available it can be
-// kept busy, but no more. For bulk processing jobs and throughput-oriented
-// benchmarks, you may want to set it much higher.
-//
-// When Schedule() is called, if the queue is full the call will fail with an
-// UNAVAILABLE error (after which the client may retry again later). If the call
-// succeeds, the maximum time the task will spend in the queue before being
-// placed in a batch and assigned to a thread for processing, is the greater of:
-//  - the maximum time to process ceil(max_enqueued_batches/num_batch_threads)
-//    (1 in the recommended configuration) batches of previously-submitted tasks
-//  - the configured timeout parameter (which can be 0, as mentioned above)
-//
-// Unlike StreamingBatchScheduler, when BasicBatchScheduler assigns a batch to a
-// thread, it closes the batch. The process-batch callback may assume that every
-// batch it receives is closed at the outset.
-//
-//
-// RECOMMENDED USE-CASES:
-//
-// BasicBatchScheduler is suitable for use-cases that feature a single kind of
-// request (e.g. a server performing inference with a single machine-learned
-// model, possibly evolving over time), with loose versioning semantics.
-// Concretely, the following conditions should hold:
-//
-//  A. All requests batched onto a given resource (e.g. a hardware accelerator,
-//     or a pool accelerators) are of the same type. For example, they all
-//     invoke the same machine-learned model.
-//
-//     These variations are permitted:
-//      - The model may reside in a single servable, or it may be spread across
-//        multiple servables that are used in unison (e.g. a vocabulary lookup
-//        table servable and a tensorflow session servable).
-//      - The model's servable(s) may be static, or they may evolve over time
-//        (successive servable versions).
-//      - Zero or more of the servables are used in the request thread; the rest
-//        are used in the batch thread. In our running example, the vocabulary
-//        lookups and tensorflow runs may both be performed in the batch thread,
-//        or alternatively the vocabulary lookup may occur in the request thread
-//        with only the tensorflow run performed in the batch thread.
-//
-//     In contrast, BasicBatchScheduler is not a good fit if the server
-//     hosts multiple distinct models running on a pool accelerators, with each
-//     request specifying which model it wants to use. BasicBatchScheduler
-//     has no facility to time-multiplex the batch threads across multiple
-//     models in a principled way. More basically, it cannot ensure that a given
-//     batch doesn't contain a mixture of requests for different models.
-//
-//  B. Requests do not specify a particular version of the servable(s) that must
-//     be used. Instead, each request is content to use the "latest" version.
-//
-//     BasicBatchScheduler does not constrain which requests get grouped
-//     together into a batch, so using this scheduler there is no way to achieve
-//     cohesion of versioned requests to version-specific batches.
-//
-//  C. No servable version coordination needs to be performed between the
-//     request threads and the batch threads. Often, servables are only used in
-//     the batch threads, in which case this condition trivially holds. If
-//     servables are used in both threads, then the use-case must tolerate
-//     version skew across the servables used in the two kinds of threads.
-//
-//
-// EXAMPLE USE-CASE FLOW:
-//
-// For such use-cases, request processing via BasicBatchScheduler generally
-// follows this flow (given for illustration; variations are possible):
-//  1. Optionally perform some pre-processing on each request in the request
-//     threads.
-//  2. Route the requests to the batch scheduler, as batching::Task objects.
-//     (Since all requests are of the same type and are not versioned, the
-//     scheduler is free to group them into batches arbitrarily.)
-//  3. Merge the requests into a single batched representation B.
-//  4. Obtain handles to the servable(s) needed to process B. The simplest
-//     approach is to obtain the latest version of each servable. Alternatively,
-//     if cross-servable consistency is required (e.g. the vocabulary lookup
-//     table's version number must match that of the tensorflow session),
-//     identify an appropriate version number and obtain the servable handles
-//     accordingly.
-//  5. Process B using the obtained servable handles, and split the result into
-//     individual per-request units.
-//  6. Perform any post-processing in the batch thread and/or request thread.
-//
-//
-// PERFORMANCE TUNING: See README.md.
-//
-template <typename TaskType>
-class BasicBatchScheduler : public BatchScheduler<TaskType> {
- public:
-  // TODO(b/25089730): Tune defaults based on best practices as they develop.
-  // (Keep them mirrored to the ones in SharedBatchScheduler::QueueOptions and
-  // SharedBatchScheduler::Options.)
-  struct Options {
-    // The maximum size of each batch.
-    //
-    // The scheduler may form batches of any size between 1 and this number
-    // (inclusive). If there is a need to quantize the batch sizes, i.e. only
-    // submit batches whose size is in a small set of allowed sizes, that can be
-    // done by adding padding in the process-batch callback.
-    int max_batch_size = 1000;
-
-    // If a task has been enqueued for this amount of time (in microseconds),
-    // and a thread is available, the scheduler will immediately form a batch
-    // from enqueued tasks and assign the batch to the thread for processing,
-    // even if the batch's size is below 'max_batch_size'.
-    //
-    // This parameter offers a way to bound queue latency, so that a task isn't
-    // stuck in the queue indefinitely waiting for enough tasks to arrive to
-    // make a full batch. (The latency bound is given in the class documentation
-    // above.)
-    //
-    // The goal is to smooth out batch sizes under low request rates, and thus
-    // avoid latency spikes.
-    int64 batch_timeout_micros = 0;
-
-    // The name to use for the pool of batch threads.
-    string thread_pool_name = {"batch_threads"};
-
-    // The number of threads to use to process batches.
-    // Must be >= 1, and should be tuned carefully.
-    int num_batch_threads = port::NumSchedulableCPUs();
-
-    // The maximum allowable number of enqueued (accepted by Schedule() but
-    // not yet being processed on a batch thread) tasks in terms of batches.
-    // If this limit is reached, Schedule() will return an UNAVAILABLE error.
-    // See the class documentation above for guidelines on how to tune this
-    // parameter.
-    int max_enqueued_batches = 10;
-
-    // The following options are typically only overridden by test code.
-
-    // The environment to use.
-    Env* env = Env::Default();
-  };
-  static Status Create(const Options& options,
-                       std::function<void(std::unique_ptr<Batch<TaskType>>)>
-                           process_batch_callback,
-                       std::unique_ptr<BasicBatchScheduler>* scheduler);
-
-  ~BasicBatchScheduler() override = default;
-
-  Status Schedule(std::unique_ptr<TaskType>* task) override;
-  size_t NumEnqueuedTasks() const override;
-  size_t SchedulingCapacity() const override;
-
- private:
-  explicit BasicBatchScheduler(
-      std::unique_ptr<BatchScheduler<TaskType>> shared_scheduler_queue);
-
-  // This class is merely a thin wrapper around a SharedBatchScheduler with a
-  // single queue.
-  std::unique_ptr<BatchScheduler<TaskType>> shared_scheduler_queue_;
-
-  TF_DISALLOW_COPY_AND_ASSIGN(BasicBatchScheduler);
-};
-
-//////////
-// Implementation details follow. API users need not read.
-
-template <typename TaskType>
-Status BasicBatchScheduler<TaskType>::Create(
-    const Options& options,
-    std::function<void(std::unique_ptr<Batch<TaskType>>)>
-        process_batch_callback,
-    std::unique_ptr<BasicBatchScheduler>* scheduler) {
-  typename SharedBatchScheduler<TaskType>::Options shared_scheduler_options;
-  shared_scheduler_options.thread_pool_name = options.thread_pool_name;
-  shared_scheduler_options.num_batch_threads = options.num_batch_threads;
-  shared_scheduler_options.env = options.env;
-  std::shared_ptr<SharedBatchScheduler<TaskType>> shared_scheduler;
-  TF_RETURN_IF_ERROR(SharedBatchScheduler<TaskType>::Create(
-      shared_scheduler_options, &shared_scheduler));
-
-  typename SharedBatchScheduler<TaskType>::QueueOptions
-      shared_scheduler_queue_options;
-  shared_scheduler_queue_options.max_batch_size = options.max_batch_size;
-  shared_scheduler_queue_options.batch_timeout_micros =
-      options.batch_timeout_micros;
-  shared_scheduler_queue_options.max_enqueued_batches =
-      options.max_enqueued_batches;
-  std::unique_ptr<BatchScheduler<TaskType>> shared_scheduler_queue;
-  TF_RETURN_IF_ERROR(shared_scheduler->AddQueue(shared_scheduler_queue_options,
-                                                process_batch_callback,
-                                                &shared_scheduler_queue));
-
-  scheduler->reset(
-      new BasicBatchScheduler<TaskType>(std::move(shared_scheduler_queue)));
-  return Status::OK();
-}
-
-template <typename TaskType>
-Status BasicBatchScheduler<TaskType>::Schedule(
-    std::unique_ptr<TaskType>* task) {
-  return shared_scheduler_queue_->Schedule(task);
-}
-
-template <typename TaskType>
-size_t BasicBatchScheduler<TaskType>::NumEnqueuedTasks() const {
-  return shared_scheduler_queue_->NumEnqueuedTasks();
-}
-
-template <typename TaskType>
-size_t BasicBatchScheduler<TaskType>::SchedulingCapacity() const {
-  return shared_scheduler_queue_->SchedulingCapacity();
-}
-
-template <typename TaskType>
-BasicBatchScheduler<TaskType>::BasicBatchScheduler(
-    std::unique_ptr<BatchScheduler<TaskType>> shared_scheduler_queue)
-    : shared_scheduler_queue_(std::move(shared_scheduler_queue)) {}
-
-}  // namespace serving
-}  // namespace tensorflow
+#include "tensorflow/core/kernels/batching_util/basic_batch_scheduler.h"
 
 #endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_BATCHING_BASIC_BATCH_SCHEDULER_H_
diff --git a/tensorflow/contrib/batching/batch_scheduler.h b/tensorflow/contrib/batching/batch_scheduler.h
index a5072f439abad3c5db79a514a7f2baff0b021b39..3afce2761f748136f4d556017823db8dbd4af50e 100644
--- a/tensorflow/contrib/batching/batch_scheduler.h
+++ b/tensorflow/contrib/batching/batch_scheduler.h
@@ -13,264 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-// Abstractions for processing small tasks in a batched fashion, to reduce
-// processing times and costs that can be amortized across multiple tasks.
-//
-// The core class is BatchScheduler, which groups tasks into batches.
-//
-// BatchScheduler encapsulates logic for aggregating multiple tasks into a
-// batch, and kicking off processing of a batch on a thread pool it manages.
-//
-// This file defines an abstract BatchScheduler class.
-
 #ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BATCHING_BATCH_SCHEDULER_H_
 #define THIRD_PARTY_TENSORFLOW_CONTRIB_BATCHING_BATCH_SCHEDULER_H_
 
-#include <stddef.h>
-#include <algorithm>
-#include <functional>
-#include <memory>
-#include <utility>
-#include <vector>
-
-#include "tensorflow/core/lib/core/notification.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/platform/logging.h"
-#include "tensorflow/core/platform/macros.h"
-#include "tensorflow/core/platform/mutex.h"
-#include "tensorflow/core/platform/thread_annotations.h"
-#include "tensorflow/core/platform/types.h"
-
-namespace tensorflow {
-namespace serving {
-
-// The abstract superclass for a unit of work to be done as part of a batch.
-//
-// An implementing subclass typically contains (or points to):
-//  (a) input data;
-//  (b) a thread-safe completion signal (e.g. a Notification);
-//  (c) a place to store the outcome (success, or some error), upon completion;
-//  (d) a place to store the output data, upon success.
-//
-// Items (b), (c) and (d) are typically non-owned pointers to data homed
-// elsewhere, because a task's ownership gets transferred to a BatchScheduler
-// (see below) and it may be deleted as soon as it is done executing.
-class BatchTask {
- public:
-  virtual ~BatchTask() = default;
-
-  // Returns the size of the task, in terms of how much it contributes to the
-  // size of a batch. (A batch's size is the sum of its task sizes.)
-  virtual size_t size() const = 0;
-};
-
-// A thread-safe collection of BatchTasks, to be executed together in some
-// fashion.
-//
-// At a given time, a batch is either "open" or "closed": an open batch can
-// accept new tasks; a closed one cannot. A batch is monotonic: initially it is
-// open and tasks can be added to it; then it is closed and its set of tasks
-// remains fixed for the remainder of its life. A closed batch cannot be re-
-// opened. Tasks can never be removed from a batch.
-//
-// Type parameter TaskType must be a subclass of BatchTask.
-template <typename TaskType>
-class Batch {
- public:
-  Batch() = default;
-  virtual ~Batch();  // Blocks until the batch is closed.
-
-  // Appends 'task' to the batch. After calling AddTask(), the newly-added task
-  // can be accessed via task(num_tasks()-1) or mutable_task(num_tasks()-1).
-  // Dies if the batch is closed.
-  void AddTask(std::unique_ptr<TaskType> task);
-
-  // Removes the most recently added task. Returns nullptr if the batch is
-  // empty.
-  std::unique_ptr<TaskType> RemoveTask();
-
-  // Returns the number of tasks in the batch.
-  int num_tasks() const;
-
-  // Returns true iff the batch contains 0 tasks.
-  bool empty() const;
-
-  // Returns a reference to the ith task (in terms of insertion order).
-  const TaskType& task(int i) const;
-
-  // Returns a pointer to the ith task (in terms of insertion order).
-  TaskType* mutable_task(int i);
-
-  // Returns the sum of the task sizes.
-  size_t size() const;
-
-  // Returns true iff the batch is currently closed.
-  bool IsClosed() const;
-
-  // Blocks until the batch is closed.
-  void WaitUntilClosed() const;
-
-  // Marks the batch as closed. Dies if called more than once.
-  void Close();
-
- private:
-  mutable mutex mu_;
-
-  // The tasks in the batch.
-  std::vector<std::unique_ptr<TaskType>> tasks_ GUARDED_BY(mu_);
-
-  // The sum of the sizes of the tasks in 'tasks_'.
-  size_t size_ GUARDED_BY(mu_) = 0;
-
-  // Whether the batch has been closed.
-  Notification closed_;
-
-  TF_DISALLOW_COPY_AND_ASSIGN(Batch);
-};
-
-// An abstract batch scheduler class. Collects individual tasks into batches,
-// and processes each batch on a pool of "batch threads" that it manages. The
-// actual logic for processing a batch is accomplished via a callback.
-//
-// Type parameter TaskType must be a subclass of BatchTask.
-template <typename TaskType>
-class BatchScheduler {
- public:
-  virtual ~BatchScheduler() = default;
-
-  // Submits a task to be processed as part of a batch.
-  //
-  // Ownership of '*task' is transferred to the callee iff the method returns
-  // Status::OK. In that case, '*task' is left as nullptr. Otherwise, '*task' is
-  // left as-is.
-  //
-  // If no batch processing capacity is available to process this task at the
-  // present time, and any task queue maintained by the implementing subclass is
-  // full, this method returns an UNAVAILABLE error code. The client may retry
-  // later.
-  //
-  // Other problems, such as the task size being larger than the maximum batch
-  // size, yield other, permanent error types.
-  //
-  // In all cases, this method returns "quickly" without blocking for any
-  // substantial amount of time. If the method returns Status::OK, the task is
-  // processed asynchronously, and any errors that occur during the processing
-  // of the batch that includes the task can be reported to 'task'.
-  virtual Status Schedule(std::unique_ptr<TaskType>* task) = 0;
-
-  // Returns the number of tasks that have been scheduled (i.e. accepted by
-  // Schedule()), but have yet to be handed to a thread for execution as part of
-  // a batch. Note that this returns the number of tasks, not the aggregate task
-  // size (so if there is one task of size 3 and one task of size 5, this method
-  // returns 2 rather than 8).
-  virtual size_t NumEnqueuedTasks() const = 0;
-
-  // Returns a guaranteed number of size 1 tasks that can be Schedule()d without
-  // getting an UNAVAILABLE error. In a typical implementation, returns the
-  // available space on a queue.
-  //
-  // There are two important caveats:
-  //  1. The guarantee does not extend to varying-size tasks due to possible
-  //     internal fragmentation of batches.
-  //  2. The guarantee only holds in a single-thread environment or critical
-  //     section, i.e. if an intervening thread cannot call Schedule().
-  //
-  // This method is useful for monitoring, or for guaranteeing a future slot in
-  // the schedule (but being mindful about the caveats listed above).
-  virtual size_t SchedulingCapacity() const = 0;
-};
-
-//////////
-// Implementation details follow. API users need not read.
-
-template <typename TaskType>
-Batch<TaskType>::~Batch() {
-  WaitUntilClosed();
-}
-
-template <typename TaskType>
-void Batch<TaskType>::AddTask(std::unique_ptr<TaskType> task) {
-  DCHECK(!IsClosed());
-  {
-    mutex_lock l(mu_);
-    size_ += task->size();
-    tasks_.push_back(std::move(task));
-  }
-}
-
-template <typename TaskType>
-std::unique_ptr<TaskType> Batch<TaskType>::RemoveTask() {
-  {
-    mutex_lock l(mu_);
-    if (tasks_.empty()) {
-      return nullptr;
-    }
-    std::unique_ptr<TaskType> task = std::move(tasks_.back());
-    tasks_.pop_back();
-    return task;
-  }
-}
-
-template <typename TaskType>
-int Batch<TaskType>::num_tasks() const {
-  {
-    mutex_lock l(mu_);
-    return tasks_.size();
-  }
-}
-
-template <typename TaskType>
-bool Batch<TaskType>::empty() const {
-  {
-    mutex_lock l(mu_);
-    return tasks_.empty();
-  }
-}
-
-template <typename TaskType>
-const TaskType& Batch<TaskType>::task(int i) const {
-  DCHECK_GE(i, 0);
-  {
-    mutex_lock l(mu_);
-    DCHECK_LT(i, tasks_.size());
-    return *tasks_[i].get();
-  }
-}
-
-template <typename TaskType>
-TaskType* Batch<TaskType>::mutable_task(int i) {
-  DCHECK_GE(i, 0);
-  {
-    mutex_lock l(mu_);
-    DCHECK_LT(i, tasks_.size());
-    return tasks_[i].get();
-  }
-}
-
-template <typename TaskType>
-size_t Batch<TaskType>::size() const {
-  {
-    mutex_lock l(mu_);
-    return size_;
-  }
-}
-
-template <typename TaskType>
-bool Batch<TaskType>::IsClosed() const {
-  return const_cast<Notification*>(&closed_)->HasBeenNotified();
-}
-
-template <typename TaskType>
-void Batch<TaskType>::WaitUntilClosed() const {
-  const_cast<Notification*>(&closed_)->WaitForNotification();
-}
-
-template <typename TaskType>
-void Batch<TaskType>::Close() {
-  closed_.Notify();
-}
-
-}  // namespace serving
-}  // namespace tensorflow
+#include "tensorflow/core/kernels/batching_util/batch_scheduler.h"
 
 #endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_BATCHING_BATCH_SCHEDULER_H_
diff --git a/tensorflow/contrib/batching/shared_batch_scheduler.h b/tensorflow/contrib/batching/shared_batch_scheduler.h
index 41a3f99137ade2552432fee62ddce17d064148a4..7eb1e20c42283a38564f7686db0015f153f469ed 100644
--- a/tensorflow/contrib/batching/shared_batch_scheduler.h
+++ b/tensorflow/contrib/batching/shared_batch_scheduler.h
@@ -16,685 +16,6 @@ limitations under the License.
 #ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BATCHING_SHARED_BATCH_SCHEDULER_H_
 #define THIRD_PARTY_TENSORFLOW_CONTRIB_BATCHING_SHARED_BATCH_SCHEDULER_H_
 
-#include <stddef.h>
-#include <deque>
-#include <functional>
-#include <list>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "tensorflow/contrib/batching/batch_scheduler.h"
-#include "tensorflow/contrib/batching/util/periodic_function.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/strings/strcat.h"
-#include "tensorflow/core/platform/cpu_info.h"
-#include "tensorflow/core/platform/env.h"
-#include "tensorflow/core/platform/thread_annotations.h"
-#include "tensorflow/core/platform/types.h"
-
-namespace tensorflow {
-namespace serving {
-namespace internal {
-template <typename TaskType>
-class Queue;
-}  // namespace internal
-}  // namespace serving
-}  // namespace tensorflow
-
-namespace tensorflow {
-namespace serving {
-
-// A batch scheduler for server instances that service multiple request types
-// (e.g. multiple machine-learned models, or multiple versions of a model served
-// concurrently), or even multiple distinct tasks for a given request. The
-// scheduler multiplexes batches of different kinds of tasks onto a fixed-size
-// thread pool (each batch contains tasks of a single type), in a carefully
-// controlled manner. A common configuration is to set the number of threads
-// equal to the number of hardware accelerator units, in which case the
-// scheduler takes care of multiplexing the task types onto the shared hardware,
-// in a manner that is both fair and efficient.
-//
-// Semantically, SharedBatchScheduler behaves like having N instances of
-// BasicBatchScheduler (see basic_batch_scheduler.h), one per task type. The
-// difference is that under the covers there is a single shared thread pool,
-// instead of N independent ones, with their sharing deliberately coordinated.
-//
-// SharedBatchScheduler does not implement the BatchScheduler API; rather, it
-// presents an abstraction of "queues", where each queue coresponds to one type
-// of task. Tasks submitted to a given queue are placed in their own batches,
-// and cannot be mixed with other tasks. Queues can be added and deleted
-// dynamically, to accommodate e.g. versions of a model being brought up and
-// down over the lifetime of a server.
-//
-// The batch thread pool round-robins through the queues, running one batch
-// from a queue and then moving to the next queue. Each queue behaves like a
-// BasicBatchScheduler instance, in the sense that it has maximum batch size and
-// timeout parameters, which govern when a batch is eligible to be processed.
-//
-// Each queue is independently configured with a maximum size (in terms of the
-// maximum number of batches worth of enqueued tasks). For online serving, it is
-// recommended that the queue sizes be configured such that the sum of the sizes
-// of the active queues roughly equal the number of batch threads. (The idea is
-// that if all threads become available at roughly the same time, there will be
-// enough enqueued work for them to take on, but no more.)
-//
-// If queue sizes are configured in the manner suggested above, the maximum time
-// a task can spend in a queue before being placed in a batch and assigned to a
-// thread for processing, is the greater of:
-//  - the maximum time to process one batch of tasks from any active queue
-//  - the configured timeout parameter for the task's queue (which can be 0)
-//
-// For bulk processing jobs and throughput-oriented benchmarks, you may want to
-// set the maximum queue size to a large value.
-//
-// TODO(b/26539183): Support queue servicing policies other than round-robin.
-// E.g. let each queue specify a "share" (an int >= 1), so e.g. with queues A
-// and B having shares 1 and 2 respectively, the servicing pattern is ABBABB...
-//
-//
-// PERFORMANCE TUNING: See README.md.
-//
-template <typename TaskType>
-class SharedBatchScheduler
-    : public std::enable_shared_from_this<SharedBatchScheduler<TaskType>> {
- public:
-  // TODO(b/25089730): Tune defaults based on best practices as they develop.
-  struct Options {
-    // The name to use for the pool of batch threads.
-    string thread_pool_name = {"batch_threads"};
-
-    // The number of threads to use to process batches.
-    // Must be >= 1, and should be tuned carefully.
-    int num_batch_threads = port::NumSchedulableCPUs();
-
-    // The environment to use.
-    // (Typically only overridden by test code.)
-    Env* env = Env::Default();
-  };
-  // Ownership is shared between the caller of Create() and any queues created
-  // via AddQueue().
-  static Status Create(
-      const Options& options,
-      std::shared_ptr<SharedBatchScheduler<TaskType>>* scheduler);
-
-  ~SharedBatchScheduler();
-
-  // Adds a queue to which tasks may be submitted. The returned queue implements
-  // the BatchScheduler API. Each queue has its own set of scheduling options,
-  // and its own callback to process batches of tasks submitted to the queue.
-  //
-  // The returned queue's destructor blocks until all tasks submitted to it have
-  // been processed.
-  struct QueueOptions {
-    // The maximum size of each batch.
-    //
-    // The scheduler may form batches of any size between 1 and this number
-    // (inclusive). If there is a need to quantize the batch sizes, i.e. only
-    // submit batches whose size is in a small set of allowed sizes, that can be
-    // done by adding padding in the process-batch callback.
-    int max_batch_size = 1000;
-
-    // If a task has been enqueued for this amount of time (in microseconds),
-    // and a thread is available, the scheduler will immediately form a batch
-    // from enqueued tasks and assign the batch to the thread for processing,
-    // even if the batch's size is below 'max_batch_size'.
-    //
-    // This parameter offers a way to bound queue latency, so that a task isn't
-    // stuck in the queue indefinitely waiting for enough tasks to arrive to
-    // make a full batch. (The latency bound is given in the class documentation
-    // above.)
-    //
-    // The goal is to smooth out batch sizes under low request rates, and thus
-    // avoid latency spikes.
-    int64 batch_timeout_micros = 0;
-
-    // The maximum allowable number of enqueued (accepted by Schedule() but
-    // not yet being processed on a batch thread) tasks in terms of batches.
-    // If this limit is reached, Schedule() will return an UNAVAILABLE error.
-    // See the class documentation above for guidelines on how to tune this
-    // parameter.
-    int max_enqueued_batches = 10;
-  };
-  Status AddQueue(const QueueOptions& options,
-                  std::function<void(std::unique_ptr<Batch<TaskType>>)>
-                      process_batch_callback,
-                  std::unique_ptr<BatchScheduler<TaskType>>* queue);
-
- private:
-  explicit SharedBatchScheduler(const Options& options);
-
-  // The code executed in 'batch_threads_'. Obtains a batch to process from the
-  // queue pointed to by 'next_queue_to_schedule_', and processes it. If that
-  // queue declines to provide a batch to process, moves onto the next queue. If
-  // no queues provide a batch to process, just sleeps briefly and exits.
-  void ThreadLogic();
-
-  const Options options_;
-
-  mutex mu_;
-
-  // A list of queues. (We use std::list instead of std::vector to ensure that
-  // iterators are not invalidated by adding/removing elements. It also offers
-  // efficient removal of elements from the middle.)
-  using QueueList = std::list<std::unique_ptr<internal::Queue<TaskType>>>;
-
-  // All "active" queues, i.e. ones that either:
-  //  - have not been removed, or
-  //  - have been removed but are not yet empty.
-  QueueList queues_ GUARDED_BY(mu_);
-
-  // An iterator over 'queues_', pointing to the queue from which the next
-  // available batch thread should grab work.
-  typename QueueList::iterator next_queue_to_schedule_ GUARDED_BY(mu_);
-
-  // Used by idle batch threads to wait for work to enter the system. Notified
-  // whenever a batch becomes schedulable.
-  condition_variable schedulable_batch_cv_;
-
-  // Threads that process batches obtained from the queues.
-  std::vector<std::unique_ptr<PeriodicFunction>> batch_threads_;
-
-  TF_DISALLOW_COPY_AND_ASSIGN(SharedBatchScheduler);
-};
-
-//////////
-// Implementation details follow. API users need not read.
-
-namespace internal {
-
-// A task queue for SharedBatchScheduler. Accepts tasks and accumulates them
-// into batches, and dispenses those batches to be processed via a "pull"
-// interface. The queue's behavior is governed by maximum batch size, timeout
-// and maximum queue length parameters; see their documentation in
-// SharedBatchScheduler.
-//
-// The queue is implemented as a deque of batches, with these invariants:
-//  - The number of batches is between 1 and 'options_.max_enqueued_batches'.
-//  - The back-most batch is open; the rest are closed.
-//
-// Submitted tasks are added to the open batch. If that batch doesn't have room
-// but the queue isn't full, then that batch is closed and a new open batch is
-// started.
-//
-// Batch pull requests are handled by dequeuing the front-most batch if it is
-// closed. If the front-most batch is open (i.e. the queue contains only one
-// batch) and has reached the timeout, it is immediately closed and returned;
-// otherwise no batch is returned for the request.
-template <typename TaskType>
-class Queue {
- public:
-  using ProcessBatchCallback =
-      std::function<void(std::unique_ptr<Batch<TaskType>>)>;
-  using SchedulableBatchCallback = std::function<void()>;
-  Queue(const typename SharedBatchScheduler<TaskType>::QueueOptions& options,
-        Env* env, ProcessBatchCallback process_batch_callback,
-        SchedulableBatchCallback schdulable_batch_callback);
-
-  // Illegal to destruct unless the queue is empty.
-  ~Queue();
-
-  // Submits a task to the queue, with the same semantics as
-  // BatchScheduler::Schedule().
-  Status Schedule(std::unique_ptr<TaskType>* task);
-
-  // Returns the number of enqueued tasks, with the same semantics as
-  // BatchScheduler::NumEnqueuedTasks().
-  size_t NumEnqueuedTasks() const;
-
-  // Returns the queue capacity, with the same semantics as
-  // BatchScheduler::SchedulingCapacity().
-  size_t SchedulingCapacity() const;
-
-  // Called by a thread that is ready to process a batch, to request one from
-  // this queue. Either returns a batch that is ready to be processed, or
-  // nullptr if the queue declines to schedule a batch at this time. If it
-  // returns a batch, the batch is guaranteed to be closed.
-  std::unique_ptr<Batch<TaskType>> ScheduleBatch();
-
-  // Processes a batch that has been returned earlier by ScheduleBatch().
-  void ProcessBatch(std::unique_ptr<Batch<TaskType>> batch);
-
-  // Determines whether the queue is empty, i.e. has no tasks waiting or being
-  // processed.
-  bool IsEmpty() const;
-
-  // Marks the queue closed, and waits until it is empty.
-  void CloseAndWaitUntilEmpty();
-
-  bool closed() const {
-    mutex_lock l(mu_);
-    return closed_;
-  }
-
- private:
-  // Same as IsEmpty(), but assumes the caller already holds a lock on 'mu_'.
-  bool IsEmptyInternal() const EXCLUSIVE_LOCKS_REQUIRED(mu_);
-
-  // Closes the open batch residing at the back of 'batches_', and inserts a
-  // fresh open batch behind it.
-  void StartNewBatch() EXCLUSIVE_LOCKS_REQUIRED(mu_);
-
-  // Determines whether the open batch residing at the back of 'batches_' is
-  // currently schedulable.
-  bool IsOpenBatchSchedulable() const EXCLUSIVE_LOCKS_REQUIRED(mu_);
-
-  const typename SharedBatchScheduler<TaskType>::QueueOptions options_;
-
-  // The environment to use.
-  Env* env_;
-
-  // A callback invoked to processes a batch of work units. Always invoked from
-  // a batch thread.
-  ProcessBatchCallback process_batch_callback_;
-
-  // A callback invoked to notify the scheduler that a new batch has become
-  // schedulable.
-  SchedulableBatchCallback schedulable_batch_callback_;
-
-  mutable mutex mu_;
-
-  // Whether this queue can accept new tasks. This variable is monotonic: it
-  // starts as false, and then at some point gets set to true and remains true
-  // for the duration of this object's life.
-  bool closed_ GUARDED_BY(mu_) = false;
-
-  // The enqueued batches. See the invariants in the class comments above.
-  std::deque<std::unique_ptr<Batch<TaskType>>> batches_ GUARDED_BY(mu_);
-
-  // The time at which the first task was added to the open (back-most) batch
-  // in 'batches_'. Valid iff that batch contains at least one task.
-  uint64 open_batch_start_time_micros_ GUARDED_BY(mu_);
-
-  // Whether this queue contains a batch that is eligible to be scheduled. Used
-  // to keep track of when to call 'schedulable_batch_callback_'.
-  bool schedulable_batch_ GUARDED_BY(mu_) = false;
-
-  // The number of batches currently being processed by batch threads.
-  // Incremented in ScheduleBatch() and decremented in ProcessBatch().
-  int num_batches_being_processed_ GUARDED_BY(mu_) = 0;
-
-  // Used by CloseAndWaitUntilEmpty() to wait until the queue is empty, for the
-  // case in which the queue is not empty when CloseAndWaitUntilEmpty() starts.
-  // When ProcessBatch() dequeues the last batch and makes the queue empty, if
-  // 'empty_notification_' is non-null it calls 'empty_notification_->Notify()'.
-  Notification* empty_notification_ GUARDED_BY(mu_) = nullptr;
-
-  TF_DISALLOW_COPY_AND_ASSIGN(Queue);
-};
-
-// A RAII-style object that points to a Queue and implements
-// the BatchScheduler API. To be handed out to clients who call AddQueue().
-template <typename TaskType>
-class QueueHandle : public BatchScheduler<TaskType> {
- public:
-  QueueHandle(std::shared_ptr<SharedBatchScheduler<TaskType>> scheduler,
-              Queue<TaskType>* queue);
-  ~QueueHandle() override;
-
-  Status Schedule(std::unique_ptr<TaskType>* task) override;
-  size_t NumEnqueuedTasks() const override;
-  size_t SchedulingCapacity() const override;
-
- private:
-  // The scheduler that owns 'queue_'.
-  std::shared_ptr<SharedBatchScheduler<TaskType>> scheduler_;
-
-  // The queue this handle wraps. Owned by 'scheduler_', which keeps it alive at
-  // least until this class's destructor closes it.
-  Queue<TaskType>* queue_;
-
-  TF_DISALLOW_COPY_AND_ASSIGN(QueueHandle);
-};
-
-}  // namespace internal
-
-template <typename TaskType>
-Status SharedBatchScheduler<TaskType>::Create(
-    const Options& options,
-    std::shared_ptr<SharedBatchScheduler<TaskType>>* scheduler) {
-  if (options.num_batch_threads < 1) {
-    return errors::InvalidArgument("num_batch_threads must be positive; was ",
-                                   options.num_batch_threads);
-  }
-  scheduler->reset(new SharedBatchScheduler<TaskType>(options));
-  return Status::OK();
-}
-
-template <typename TaskType>
-SharedBatchScheduler<TaskType>::~SharedBatchScheduler() {
-  // Wait until the batch threads finish clearing out and deleting the closed
-  // queues.
-  for (;;) {
-    {
-      mutex_lock l(mu_);
-      if (queues_.empty()) {
-        break;
-      }
-    }
-    const int64 kSleepTimeMicros = 100;
-    options_.env->SleepForMicroseconds(kSleepTimeMicros);
-  }
-  // Delete the batch threads before allowing state the threads may access (e.g.
-  // 'mu_') to be deleted.
-  batch_threads_.clear();
-}
-
-template <typename TaskType>
-Status SharedBatchScheduler<TaskType>::AddQueue(
-    const QueueOptions& options,
-    std::function<void(std::unique_ptr<Batch<TaskType>>)>
-        process_batch_callback,
-    std::unique_ptr<BatchScheduler<TaskType>>* queue) {
-  if (options.max_batch_size <= 0) {
-    return errors::InvalidArgument("max_batch_size must be positive; was ",
-                                   options.max_batch_size);
-  }
-  if (options.batch_timeout_micros < 0) {
-    return errors::InvalidArgument(
-        "batch_timeout_micros must be non-negative; was ",
-        options.batch_timeout_micros);
-  }
-  if (options.max_enqueued_batches < 0) {
-    return errors::InvalidArgument(
-        "max_enqueued_batches must be non-negative; was ",
-        options.max_enqueued_batches);
-  }
-
-  auto schedulable_batch_callback = [this] {
-    mutex_lock l(mu_);
-    schedulable_batch_cv_.notify_one();
-  };
-  auto internal_queue =
-      std::unique_ptr<internal::Queue<TaskType>>(new internal::Queue<TaskType>(
-          options, options_.env, process_batch_callback,
-          schedulable_batch_callback));
-  auto handle = std::unique_ptr<BatchScheduler<TaskType>>(
-      new internal::QueueHandle<TaskType>(this->shared_from_this(),
-                                          internal_queue.get()));
-  {
-    mutex_lock l(mu_);
-    queues_.push_back(std::move(internal_queue));
-    if (next_queue_to_schedule_ == queues_.end()) {
-      next_queue_to_schedule_ = queues_.begin();
-    }
-  }
-  *queue = std::move(handle);
-  return Status::OK();
-}
-
-template <typename TaskType>
-SharedBatchScheduler<TaskType>::SharedBatchScheduler(const Options& options)
-    : options_(options), next_queue_to_schedule_(queues_.end()) {
-  // Kick off the batch threads.
-  PeriodicFunction::Options periodic_fn_options;
-  periodic_fn_options.thread_name_prefix =
-      strings::StrCat(options.thread_pool_name, "_");
-  for (int i = 0; i < options.num_batch_threads; ++i) {
-    std::unique_ptr<PeriodicFunction> thread(new PeriodicFunction(
-        [this] { this->ThreadLogic(); },
-        0 /* function invocation interval time */, periodic_fn_options));
-    batch_threads_.push_back(std::move(thread));
-  }
-}
-
-template <typename TaskType>
-void SharedBatchScheduler<TaskType>::ThreadLogic() {
-  // A batch to process next (or nullptr if no work to do).
-  std::unique_ptr<Batch<TaskType>> batch_to_process;
-  // The queue with which 'batch_to_process' is associated.
-  internal::Queue<TaskType>* queue_for_batch = nullptr;
-  {
-    mutex_lock l(mu_);
-
-    const int num_queues = queues_.size();
-    for (int num_queues_tried = 0;
-         batch_to_process == nullptr && num_queues_tried < num_queues;
-         ++num_queues_tried) {
-      DCHECK(next_queue_to_schedule_ != queues_.end());
-
-      // If a closed queue responds to ScheduleBatch() with nullptr, the queue
-      // will never yield any further batches so we can drop it. To avoid a
-      // race, we take a snapshot of the queue's closedness state *before*
-      // calling ScheduleBatch().
-      const bool queue_closed = (*next_queue_to_schedule_)->closed();
-
-      // Ask '*next_queue_to_schedule_' if it wants us to process a batch.
-      batch_to_process = (*next_queue_to_schedule_)->ScheduleBatch();
-      if (batch_to_process != nullptr) {
-        queue_for_batch = next_queue_to_schedule_->get();
-      }
-
-      // Advance 'next_queue_to_schedule_'.
-      if (queue_closed && (*next_queue_to_schedule_)->IsEmpty() &&
-          batch_to_process == nullptr) {
-        // We've encountered a closed queue with no work to do. Drop it.
-        DCHECK_NE(queue_for_batch, next_queue_to_schedule_->get());
-        next_queue_to_schedule_ = queues_.erase(next_queue_to_schedule_);
-      } else {
-        ++next_queue_to_schedule_;
-      }
-      if (next_queue_to_schedule_ == queues_.end() && !queues_.empty()) {
-        // We've hit the end. Wrap to the first queue.
-        next_queue_to_schedule_ = queues_.begin();
-      }
-    }
-
-    if (batch_to_process == nullptr) {
-      // We couldn't find any work to do. Wait until a new batch becomes
-      // schedulable, or some time has elapsed, before checking again.
-      const int64 kTimeoutMillis = 1;  // The smallest accepted granule of time.
-      WaitForMilliseconds(&l, &schedulable_batch_cv_, kTimeoutMillis);
-      return;
-    }
-  }
-
-  queue_for_batch->ProcessBatch(std::move(batch_to_process));
-}
-
-namespace internal {
-
-template <typename TaskType>
-Queue<TaskType>::Queue(
-    const typename SharedBatchScheduler<TaskType>::QueueOptions& options,
-    Env* env, ProcessBatchCallback process_batch_callback,
-    SchedulableBatchCallback schedulable_batch_callback)
-    : options_(options),
-      env_(env),
-      process_batch_callback_(process_batch_callback),
-      schedulable_batch_callback_(schedulable_batch_callback) {
-  // Create an initial, open batch.
-  batches_.emplace_back(new Batch<TaskType>);
-}
-
-template <typename TaskType>
-Queue<TaskType>::~Queue() {
-  mutex_lock l(mu_);
-  DCHECK(IsEmptyInternal());
-
-  // Close the (empty) open batch, so its destructor doesn't block.
-  batches_.back()->Close();
-}
-
-template <typename TaskType>
-Status Queue<TaskType>::Schedule(std::unique_ptr<TaskType>* task) {
-  if ((*task)->size() > options_.max_batch_size) {
-    return errors::InvalidArgument("Task size ", (*task)->size(),
-                                   " is larger than maximum batch size ",
-                                   options_.max_batch_size);
-  }
-
-  bool notify_of_schedulable_batch = false;
-  {
-    mutex_lock l(mu_);
-
-    DCHECK(!closed_);
-
-    if (batches_.back()->size() + (*task)->size() > options_.max_batch_size) {
-      if (batches_.size() >= options_.max_enqueued_batches) {
-        return errors::Unavailable(
-            "The batch scheduling queue to which this task was submitted is "
-            "full");
-      }
-      StartNewBatch();
-    }
-    if (batches_.back()->empty()) {
-      open_batch_start_time_micros_ = env_->NowMicros();
-    }
-    batches_.back()->AddTask(std::move(*task));
-
-    if (!schedulable_batch_) {
-      if (batches_.size() > 1 || IsOpenBatchSchedulable()) {
-        schedulable_batch_ = true;
-        notify_of_schedulable_batch = true;
-      }
-    }
-  }
-
-  if (notify_of_schedulable_batch) {
-    schedulable_batch_callback_();
-  }
-
-  return Status::OK();
-}
-
-template <typename TaskType>
-size_t Queue<TaskType>::NumEnqueuedTasks() const {
-  mutex_lock l(mu_);
-  size_t num_enqueued_tasks = 0;
-  for (const auto& batch : batches_) {
-    num_enqueued_tasks += batch->num_tasks();
-  }
-  return num_enqueued_tasks;
-}
-
-template <typename TaskType>
-size_t Queue<TaskType>::SchedulingCapacity() const {
-  mutex_lock l(mu_);
-  const int num_new_batches_schedulable =
-      options_.max_enqueued_batches - batches_.size();
-  const int open_batch_capacity =
-      options_.max_batch_size - batches_.back()->size();
-  return (num_new_batches_schedulable * options_.max_batch_size) +
-         open_batch_capacity;
-}
-
-template <typename TaskType>
-std::unique_ptr<Batch<TaskType>> Queue<TaskType>::ScheduleBatch() {
-  // The batch to schedule, which we may populate below. (If left as nullptr,
-  // that means we are electing not to schedule a batch at this time.)
-  std::unique_ptr<Batch<TaskType>> batch_to_schedule;
-
-  {
-    mutex_lock l(mu_);
-
-    // Consider closing the open batch at this time, to schedule it.
-    if (batches_.size() == 1 && IsOpenBatchSchedulable()) {
-      StartNewBatch();
-    }
-
-    if (batches_.size() >= 2) {
-      // There is at least one closed batch that is ready to be scheduled.
-      ++num_batches_being_processed_;
-      batch_to_schedule = std::move(batches_.front());
-      batches_.pop_front();
-    } else {
-      schedulable_batch_ = false;
-    }
-  }
-
-  return batch_to_schedule;
-}
-
-template <typename TaskType>
-void Queue<TaskType>::ProcessBatch(std::unique_ptr<Batch<TaskType>> batch) {
-  process_batch_callback_(std::move(batch));
-
-  {
-    mutex_lock l(mu_);
-    --num_batches_being_processed_;
-    if (empty_notification_ != nullptr && IsEmptyInternal()) {
-      empty_notification_->Notify();
-    }
-  }
-}
-
-template <typename TaskType>
-bool Queue<TaskType>::IsEmpty() const {
-  mutex_lock l(mu_);
-  return IsEmptyInternal();
-}
-
-template <typename TaskType>
-void Queue<TaskType>::CloseAndWaitUntilEmpty() {
-  Notification empty;
-  {
-    mutex_lock l(mu_);
-    closed_ = true;
-    if (IsEmptyInternal()) {
-      empty.Notify();
-    } else {
-      // Arrange for ProcessBatch() to notify when the queue becomes empty.
-      empty_notification_ = &empty;
-    }
-  }
-  empty.WaitForNotification();
-}
-
-template <typename TaskType>
-bool Queue<TaskType>::IsEmptyInternal() const {
-  return num_batches_being_processed_ == 0 && batches_.size() == 1 &&
-         batches_.back()->empty();
-}
-
-template <typename TaskType>
-void Queue<TaskType>::StartNewBatch() {
-  batches_.back()->Close();
-  batches_.emplace_back(new Batch<TaskType>);
-}
-
-template <typename TaskType>
-bool Queue<TaskType>::IsOpenBatchSchedulable() const {
-  Batch<TaskType>* open_batch = batches_.back().get();
-  if (open_batch->empty()) {
-    return false;
-  }
-  return closed_ || open_batch->size() >= options_.max_batch_size ||
-         env_->NowMicros() >=
-             open_batch_start_time_micros_ + options_.batch_timeout_micros;
-}
-
-template <typename TaskType>
-QueueHandle<TaskType>::QueueHandle(
-    std::shared_ptr<SharedBatchScheduler<TaskType>> scheduler,
-    Queue<TaskType>* queue)
-    : scheduler_(scheduler), queue_(queue) {}
-
-template <typename TaskType>
-QueueHandle<TaskType>::~QueueHandle() {
-  queue_->CloseAndWaitUntilEmpty();
-}
-
-template <typename TaskType>
-Status QueueHandle<TaskType>::Schedule(std::unique_ptr<TaskType>* task) {
-  return queue_->Schedule(task);
-}
-
-template <typename TaskType>
-size_t QueueHandle<TaskType>::NumEnqueuedTasks() const {
-  return queue_->NumEnqueuedTasks();
-}
-
-template <typename TaskType>
-size_t QueueHandle<TaskType>::SchedulingCapacity() const {
-  return queue_->SchedulingCapacity();
-}
-
-}  // namespace internal
-
-}  // namespace serving
-}  // namespace tensorflow
+#include "tensorflow/core/kernels/batching_util/shared_batch_scheduler.h"
 
 #endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_BATCHING_SHARED_BATCH_SCHEDULER_H_
diff --git a/tensorflow/contrib/batching/test_util/BUILD b/tensorflow/contrib/batching/test_util/BUILD
index d1ced0d8c367f44b520a9bba2db8a3e0969bab4c..6db627faad1df4a4b73082e74e7754829ff2b514 100644
--- a/tensorflow/contrib/batching/test_util/BUILD
+++ b/tensorflow/contrib/batching/test_util/BUILD
@@ -22,11 +22,9 @@ filegroup(
 cc_library(
     name = "fake_clock_env",
     testonly = 1,
-    srcs = ["fake_clock_env.cc"],
     hdrs = ["fake_clock_env.h"],
     visibility = ["//visibility:public"],
     deps = [
-        "//tensorflow/core:lib",
-        "//tensorflow/core:tensorflow",
+        "//tensorflow/core/kernels/batching_util:fake_clock_env",
     ],
 )
diff --git a/tensorflow/contrib/batching/test_util/fake_clock_env.h b/tensorflow/contrib/batching/test_util/fake_clock_env.h
index 35cafcb73c51feb4e9e15a61d1830c8ef6bc3e0f..ced27a88336324fb8c4be490138291d9234693f9 100644
--- a/tensorflow/contrib/batching/test_util/fake_clock_env.h
+++ b/tensorflow/contrib/batching/test_util/fake_clock_env.h
@@ -16,61 +16,6 @@ limitations under the License.
 #ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BATCHING_TEST_UTIL_FAKE_CLOCK_ENV_H_
 #define THIRD_PARTY_TENSORFLOW_CONTRIB_BATCHING_TEST_UTIL_FAKE_CLOCK_ENV_H_
 
-#include <functional>
-#include <string>
-#include <vector>
-
-#include "tensorflow/core/lib/core/notification.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/platform/env.h"
-#include "tensorflow/core/platform/macros.h"
-#include "tensorflow/core/platform/mutex.h"
-#include "tensorflow/core/platform/thread_annotations.h"
-#include "tensorflow/core/platform/types.h"
-
-namespace tensorflow {
-namespace serving {
-namespace test_util {
-
-// An Env implementation with a fake clock for NowMicros() and
-// SleepForMicroseconds(). The clock doesn't advance on its own; it advances via
-// an explicit Advance() method.
-// All other Env virtual methods pass through to a wrapped Env.
-class FakeClockEnv : public EnvWrapper {
- public:
-  explicit FakeClockEnv(Env* wrapped);
-  ~FakeClockEnv() override = default;
-
-  // Advance the clock by a certain number of microseconds.
-  void AdvanceByMicroseconds(int micros);
-
-  // Blocks until there is a sleeping thread that is scheduled to wake up at
-  // the given (absolute) time.
-  void BlockUntilSleepingThread(uint64 wake_time);
-
-  // Blocks until there are at least num_threads sleeping.
-  void BlockUntilThreadsAsleep(int num_threads);
-
-  // Methods that this class implements.
-  uint64 NowMicros() override;
-  void SleepForMicroseconds(int64 micros) override;
-
- private:
-  mutex mu_;
-
-  uint64 current_time_ GUARDED_BY(mu_) = 0;
-
-  struct SleepingThread {
-    uint64 wake_time;
-    Notification* wake_notification;
-  };
-  std::vector<SleepingThread> sleeping_threads_ GUARDED_BY(mu_);
-
-  TF_DISALLOW_COPY_AND_ASSIGN(FakeClockEnv);
-};
-
-}  // namespace test_util
-}  // namespace serving
-}  // namespace tensorflow
+#include "tensorflow/core/kernels/batching_util/fake_clock_env.h"
 
 #endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_BATCHING_TEST_UTIL_FAKE_CLOCK_ENV_H_
diff --git a/tensorflow/contrib/batching/util/BUILD b/tensorflow/contrib/batching/util/BUILD
index f33a08cb817e9f2832be953ef6ff1aba04c4c288..2a84a7712a8fa66e89db41ff4e7ebe4f620029ca 100644
--- a/tensorflow/contrib/batching/util/BUILD
+++ b/tensorflow/contrib/batching/util/BUILD
@@ -22,12 +22,11 @@ filegroup(
 
 cc_library(
     name = "periodic_function_dynamic",
-    srcs = ["periodic_function.cc"],
     hdrs = ["periodic_function.h"],
     visibility = ["//visibility:public"],
     deps = [
-        "//tensorflow/core:framework_headers_lib",
-        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core/kernels/batching_util:periodic_function_dynamic",
+        "//third_party/eigen3",
     ],
 )
 
@@ -36,17 +35,6 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":periodic_function_dynamic",
-        "//tensorflow/core:lib",
-    ],
-)
-
-tf_cc_test(
-    name = "periodic_function_test",
-    srcs = ["periodic_function_test.cc"],
-    deps = [
-        ":periodic_function_dynamic",
-        "//tensorflow/contrib/batching/test_util:fake_clock_env",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
+        "//tensorflow/core/kernels/batching_util:periodic_function",
     ],
 )
diff --git a/tensorflow/contrib/batching/util/periodic_function.h b/tensorflow/contrib/batching/util/periodic_function.h
index 2c032d802fe5f23a267db28dc869a253f16afc34..fb61bc2eea2ec6eb560670148611c66ddc3d73df 100644
--- a/tensorflow/contrib/batching/util/periodic_function.h
+++ b/tensorflow/contrib/batching/util/periodic_function.h
@@ -12,121 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-
-// PeriodicFunction will periodically call the given function with a specified
-// period in a background thread.  After Start() returns, the thread is
-// guaranteed to have started. The destruction of the class causes the
-// background thread to be destroyed as well.  Start() should not be called more
-// than once.
-//
-// PeriodicFunction runs the function as soon as any previous run both is
-// complete and was started more than "interval_micros" earlier.  Thus, runs are
-// both serialized, and normally have a period of "interval_micros" if no run
-// exceeds the time.
-//
-// Note that, if the function takes longer than two interval_micross to finish,
-// then PeriodicFunction will "skip" at least one call to the function.  For
-// instance, if the period is 50ms and the function starts runs at time 0 for
-// 150ms, then the function will immediately start executing again at time 150,
-// but there will be no function runs corresponding to times 50 or 100.  This is
-// especially important to remember when using an environment with a simulated
-// clock: advancing simulated time atomically over N interval_micross will not
-// cause the function to be called N times.
-//
-// This object is thread-safe.
-//
-// Example:
-//
-//   class Foo {
-//    public:
-//     Foo() : periodic_function_([this]() { Bar(); },
-//                               1000 /* 1000us == 1ms*/) {
-//     }
-//
-//    private:
-//     void Bar() { ... }
-//
-//     PeriodicFunction periodic_function_;
-//   };
-
 #ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BATCHING_UTIL_PERIODIC_FUNCTION_H_
 #define THIRD_PARTY_TENSORFLOW_CONTRIB_BATCHING_UTIL_PERIODIC_FUNCTION_H_
 
-#include <functional>
-#include <memory>
-#include <string>
-
-#include "tensorflow/core/lib/core/notification.h"
-#include "tensorflow/core/platform/env.h"
-#include "tensorflow/core/platform/macros.h"
-#include "tensorflow/core/platform/mutex.h"
-#include "tensorflow/core/platform/thread_annotations.h"
-#include "tensorflow/core/platform/types.h"
-
-namespace tensorflow {
-namespace serving {
-
-namespace internal {
-class PeriodicFunctionTestAccess;
-}
-
-class PeriodicFunction {
- public:
-  // Provides the ability to customize several aspects of the PeriodicFunction.
-  // Passed to constructor of PeriodicFunction.
-  struct Options {
-    Options() {}
-
-    // Any standard thread options, such as stack size, should
-    // be passed via "thread_options".
-    ThreadOptions thread_options;
-
-    // Specifies the thread name prefix (see the description in class
-    // Thread).
-    string thread_name_prefix = "periodic_function";
-
-    // The environment to use. Does not take ownership, but must remain alive
-    // for as long as the PeriodicFunction exists.
-    Env* env = Env::Default();
-
-    // Specifies the length of sleep before the first invocation of the
-    // function.
-    // This can be used for adding a random jitter to avoid synchronous behavior
-    // across multiple periodic functions.
-    int64 startup_delay_micros = 0;
-  };
-
-  // Also starts the background thread which will be calling the function.
-  PeriodicFunction(const std::function<void()>& function, int64 interval_micros,
-                   const Options& options = Options());
-
-  ~PeriodicFunction();
-
- private:
-  friend class internal::PeriodicFunctionTestAccess;
-
-  // Notifies the background thread to stop.
-  void NotifyStop();
-
-  // (Blocking.) Loops forever calling "function_" every "interval_micros_".
-  void RunLoop(int64 start) LOCKS_EXCLUDED(mutex_);
-
-  const std::function<void()> function_;  // Actual client function
-  const int64 interval_micros_;    // Interval between calls.
-  const Options options_;
-
-  // Protects state below.
-  mutable mutex mutex_;
-  // Used to notify the thread to stop.
-  Notification stop_thread_;
-
-  // Thread for running "function_"
-  std::unique_ptr<Thread> thread_ GUARDED_BY(mutex_) = nullptr;
-
-  TF_DISALLOW_COPY_AND_ASSIGN(PeriodicFunction);
-};
-
-}  // namespace serving
-}  // namespace tensorflow
+#include "tensorflow/core/kernels/batching_util/periodic_function.h"
 
 #endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_BATCHING_UTIL_PERIODIC_FUNCTION_H_
diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD
index a262d4aecdbb69dfcd8b88bc0a09060500d6b1c9..11c3c037c4e8b4ba41eae60d28d6aac49f1488f2 100644
--- a/tensorflow/contrib/bayesflow/BUILD
+++ b/tensorflow/contrib/bayesflow/BUILD
@@ -99,6 +99,25 @@ cuda_py_test(
     ],
 )
 
+cuda_py_test(
+    name = "layers_conv_variational_test",
+    size = "small",
+    srcs = ["python/kernel_tests/layers_conv_variational_test.py"],
+    additional_deps = [
+        ":bayesflow_py",
+        "//third_party/py/numpy",
+        "//tensorflow/contrib/distributions:distributions_py",
+        "//tensorflow/python/ops/distributions",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:gradients",
+        "//tensorflow/python:linalg_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:nn_ops",
+    ],
+)
+
 cuda_py_test(
     name = "layers_dense_variational_test",
     size = "small",
@@ -200,6 +219,28 @@ cuda_py_test(
     ],
 )
 
+cuda_py_test(
+    name = "variational_sgd_optimizer_test",
+    size = "small",
+    srcs = ["python/kernel_tests/variational_sgd_optimizer_test.py"],
+    additional_deps = [
+        ":bayesflow_py",
+        "//third_party/py/numpy",
+        "//tensorflow/contrib/distributions:distributions_py",
+        "//tensorflow/contrib/layers:layers_py",
+        "//tensorflow/python/ops/distributions",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:gradients",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:random_seed",
+    ],
+    tags = ["notsan"],
+)
+
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py
index b1f108e5f01e4945ee83d8262f1d99877f0fe9f0..cbc66b6dc13db62c25952de6b6c13b2fdfe27f12 100644
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py
+++ b/tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py
@@ -12,8 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for Hamiltonian Monte Carlo.
-"""
+"""Tests for Hamiltonian Monte Carlo."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -27,6 +26,7 @@ from tensorflow.contrib.bayesflow.python.ops import hmc
 
 from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.platform import test
@@ -46,6 +46,9 @@ class HMCTest(test.TestCase):
     random_seed.set_random_seed(10003)
     np.random.seed(10003)
 
+  def assertAllFinite(self, x):
+    self.assertAllEqual(np.ones_like(x).astype(bool), np.isfinite(x))
+
   def _log_gamma_log_prob(self, x, event_dims=()):
     """Computes log-pdf of a log-gamma random variable.
 
@@ -345,5 +348,97 @@ class HMCTest(test.TestCase):
   def testAIS12(self):
     self._ais_gets_correct_log_normalizer_wrapper([1, 2])
 
+  def testNanRejection(self):
+    """Tests that an update that yields NaN potentials gets rejected.
+
+    We run HMC with a target distribution that returns NaN
+    log-likelihoods if any element of x < 0, and unit-scale
+    exponential log-likelihoods otherwise. The exponential potential
+    pushes x towards 0, ensuring that any reasonably large update will
+    push us over the edge into NaN territory.
+    """
+    def _unbounded_exponential_log_prob(x):
+      """An exponential distribution with log-likelihood NaN for x < 0."""
+      per_element_potentials = array_ops.where(x < 0,
+                                               np.nan * array_ops.ones_like(x),
+                                               -x)
+      return math_ops.reduce_sum(per_element_potentials)
+
+    with self.test_session() as sess:
+      initial_x = math_ops.linspace(0.01, 5, 10)
+      updated_x, acceptance_probs, _, _ = hmc.kernel(
+          2., 5, initial_x, _unbounded_exponential_log_prob, [0])
+      initial_x_val, updated_x_val, acceptance_probs_val = sess.run(
+          [initial_x, updated_x, acceptance_probs])
+
+      logging.vlog(1, 'initial_x = {}'.format(initial_x_val))
+      logging.vlog(1, 'updated_x = {}'.format(updated_x_val))
+      logging.vlog(1, 'acceptance_probs = {}'.format(acceptance_probs_val))
+
+      self.assertAllEqual(initial_x_val, updated_x_val)
+      self.assertEqual(acceptance_probs_val, 0.)
+
+  def testNanFromGradsDontPropagate(self):
+    """Test that update with NaN gradients does not cause NaN in results."""
+    def _nan_log_prob_with_nan_gradient(x):
+      return np.nan * math_ops.reduce_sum(x)
+
+    with self.test_session() as sess:
+      initial_x = math_ops.linspace(0.01, 5, 10)
+      updated_x, acceptance_probs, new_log_prob, new_grad = hmc.kernel(
+          2., 5, initial_x, _nan_log_prob_with_nan_gradient, [0])
+      initial_x_val, updated_x_val, acceptance_probs_val = sess.run(
+          [initial_x, updated_x, acceptance_probs])
+
+      logging.vlog(1, 'initial_x = {}'.format(initial_x_val))
+      logging.vlog(1, 'updated_x = {}'.format(updated_x_val))
+      logging.vlog(1, 'acceptance_probs = {}'.format(acceptance_probs_val))
+
+      self.assertAllEqual(initial_x_val, updated_x_val)
+      self.assertEqual(acceptance_probs_val, 0.)
+
+      self.assertAllFinite(
+          gradients_impl.gradients(updated_x, initial_x)[0].eval())
+      self.assertTrue(
+          gradients_impl.gradients(new_grad, initial_x)[0] is None)
+
+      # Gradients of the acceptance probs and new log prob are not finite.
+      _ = new_log_prob  # Prevent unused arg error.
+      # self.assertAllFinite(
+      #     gradients_impl.gradients(acceptance_probs, initial_x)[0].eval())
+      # self.assertAllFinite(
+      #     gradients_impl.gradients(new_log_prob, initial_x)[0].eval())
+
+  def testChainWorksIn64Bit(self):
+    def log_prob(x):
+      return - math_ops.reduce_sum(x * x, axis=-1)
+    states, acceptance_probs = hmc.chain(
+        n_iterations=10,
+        step_size=np.float64(0.01),
+        n_leapfrog_steps=10,
+        initial_x=np.zeros(5).astype(np.float64),
+        target_log_prob_fn=log_prob,
+        event_dims=[-1])
+    with self.test_session() as sess:
+      states_, acceptance_probs_ = sess.run([states, acceptance_probs])
+    self.assertEqual(np.float64, states_.dtype)
+    self.assertEqual(np.float64, acceptance_probs_.dtype)
+
+  def testChainWorksIn16Bit(self):
+    def log_prob(x):
+      return - math_ops.reduce_sum(x * x, axis=-1)
+    states, acceptance_probs = hmc.chain(
+        n_iterations=10,
+        step_size=np.float16(0.01),
+        n_leapfrog_steps=10,
+        initial_x=np.zeros(5).astype(np.float16),
+        target_log_prob_fn=log_prob,
+        event_dims=[-1])
+    with self.test_session() as sess:
+      states_, acceptance_probs_ = sess.run([states, acceptance_probs])
+    self.assertEqual(np.float16, states_.dtype)
+    self.assertEqual(np.float16, acceptance_probs_.dtype)
+
+
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/layers_conv_variational_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/layers_conv_variational_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..750afb6654311fea30a1dc6b31b20aa3b4160ae2
--- /dev/null
+++ b/tensorflow/contrib/bayesflow/python/kernel_tests/layers_conv_variational_test.py
@@ -0,0 +1,521 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for convolutional Bayesian layers."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.contrib.bayesflow.python.ops import layers_conv_variational as prob_layers_lib
+from tensorflow.contrib.bayesflow.python.ops import layers_util as prob_layers_util
+from tensorflow.contrib.distributions.python.ops import independent as independent_lib
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.ops.distributions import normal as normal_lib
+from tensorflow.python.ops.distributions import util as distribution_util
+from tensorflow.python.platform import test
+
+
+class Counter(object):
+  """Helper class to manage incrementing a counting `int`."""
+
+  def __init__(self):
+    self._value = -1
+
+  @property
+  def value(self):
+    return self._value
+
+  def __call__(self):
+    self._value += 1
+    return self._value
+
+
+class MockDistribution(independent_lib.Independent):
+  """Monitors layer calls to the underlying distribution."""
+
+  def __init__(self, result_sample, result_log_prob, loc=None, scale=None):
+    self.result_sample = result_sample
+    self.result_log_prob = result_log_prob
+    self.result_loc = loc
+    self.result_scale = scale
+    self.result_distribution = normal_lib.Normal(loc=0.0, scale=1.0)
+    if loc is not None and scale is not None:
+      self.result_distribution = normal_lib.Normal(loc=self.result_loc,
+                                                   scale=self.result_scale)
+    self.called_log_prob = Counter()
+    self.called_sample = Counter()
+    self.called_loc = Counter()
+    self.called_scale = Counter()
+
+  def log_prob(self, *args, **kwargs):
+    self.called_log_prob()
+    return self.result_log_prob
+
+  def sample(self, *args, **kwargs):
+    self.called_sample()
+    return self.result_sample
+
+  @property
+  def distribution(self):  # for dummy check on Independent(Normal)
+    return self.result_distribution
+
+  @property
+  def loc(self):
+    self.called_loc()
+    return self.result_loc
+
+  @property
+  def scale(self):
+    self.called_scale()
+    return self.result_scale
+
+
+class MockKLDivergence(object):
+  """Monitors layer calls to the divergence implementation."""
+
+  def __init__(self, result):
+    self.result = result
+    self.args = []
+    self.called = Counter()
+
+  def __call__(self, *args, **kwargs):
+    self.called()
+    self.args.append(args)
+    return self.result
+
+
+class ConvVariational(test.TestCase):
+
+  def _testKLPenaltyKernel(self, layer_class):
+    with self.test_session():
+      layer = layer_class(filters=2, kernel_size=3)
+      if layer_class in (prob_layers_lib.Conv1DReparameterization,
+                         prob_layers_lib.Conv1DFlipout):
+        inputs = random_ops.random_uniform([2, 3, 1], seed=1)
+      elif layer_class in (prob_layers_lib.Conv2DReparameterization,
+                           prob_layers_lib.Conv2DFlipout):
+        inputs = random_ops.random_uniform([2, 3, 3, 1], seed=1)
+      elif layer_class in (prob_layers_lib.Conv3DReparameterization,
+                           prob_layers_lib.Conv3DFlipout):
+        inputs = random_ops.random_uniform([2, 3, 3, 3, 1], seed=1)
+
+      # No keys.
+      losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
+      self.assertEqual(len(losses), 0)
+      self.assertListEqual(layer.losses, losses)
+
+      _ = layer(inputs)
+
+      # Yes keys.
+      losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
+      self.assertEqual(len(losses), 1)
+      self.assertListEqual(layer.losses, losses)
+
+  def _testKLPenaltyBoth(self, layer_class):
+    def _make_normal(dtype, *args):  # pylint: disable=unused-argument
+      return normal_lib.Normal(
+          loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.))
+    with self.test_session():
+      layer = layer_class(
+          filters=2,
+          kernel_size=3,
+          bias_posterior_fn=prob_layers_util.default_mean_field_normal_fn(),
+          bias_prior_fn=_make_normal)
+      if layer_class in (prob_layers_lib.Conv1DReparameterization,
+                         prob_layers_lib.Conv1DFlipout):
+        inputs = random_ops.random_uniform([2, 3, 1], seed=1)
+      elif layer_class in (prob_layers_lib.Conv2DReparameterization,
+                           prob_layers_lib.Conv2DFlipout):
+        inputs = random_ops.random_uniform([2, 3, 3, 1], seed=1)
+      elif layer_class in (prob_layers_lib.Conv3DReparameterization,
+                           prob_layers_lib.Conv3DFlipout):
+        inputs = random_ops.random_uniform([2, 3, 3, 3, 1], seed=1)
+
+      # No keys.
+      losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
+      self.assertEqual(len(losses), 0)
+      self.assertListEqual(layer.losses, losses)
+
+      _ = layer(inputs)
+
+      # Yes keys.
+      losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
+      self.assertEqual(len(losses), 2)
+      self.assertListEqual(layer.losses, losses)
+
+  def _testConvSetUp(self, layer_class, batch_size, depth=None,
+                     height=None, width=None, channels=None, filters=None,
+                     **kwargs):
+    seed = Counter()
+    if layer_class in (prob_layers_lib.Conv1DReparameterization,
+                       prob_layers_lib.Conv1DFlipout):
+      inputs = random_ops.random_uniform(
+          [batch_size, width, channels], seed=seed())
+      kernel_size = (2,)
+    elif layer_class in (prob_layers_lib.Conv2DReparameterization,
+                         prob_layers_lib.Conv2DFlipout):
+      inputs = random_ops.random_uniform(
+          [batch_size, height, width, channels], seed=seed())
+      kernel_size = (2, 2)
+    elif layer_class in (prob_layers_lib.Conv3DReparameterization,
+                         prob_layers_lib.Conv3DFlipout):
+      inputs = random_ops.random_uniform(
+          [batch_size, depth, height, width, channels], seed=seed())
+      kernel_size = (2, 2, 2)
+
+    kernel_shape = kernel_size + (channels, filters)
+    kernel_posterior = MockDistribution(
+        loc=random_ops.random_uniform(kernel_shape, seed=seed()),
+        scale=random_ops.random_uniform(kernel_shape, seed=seed()),
+        result_log_prob=random_ops.random_uniform(kernel_shape, seed=seed()),
+        result_sample=random_ops.random_uniform(kernel_shape, seed=seed()))
+    kernel_prior = MockDistribution(
+        result_log_prob=random_ops.random_uniform(kernel_shape, seed=seed()),
+        result_sample=random_ops.random_uniform(kernel_shape, seed=seed()))
+    kernel_divergence = MockKLDivergence(
+        result=random_ops.random_uniform(kernel_shape, seed=seed()))
+
+    bias_size = (filters,)
+    bias_posterior = MockDistribution(
+        result_log_prob=random_ops.random_uniform(bias_size, seed=seed()),
+        result_sample=random_ops.random_uniform(bias_size, seed=seed()))
+    bias_prior = MockDistribution(
+        result_log_prob=random_ops.random_uniform(bias_size, seed=seed()),
+        result_sample=random_ops.random_uniform(bias_size, seed=seed()))
+    bias_divergence = MockKLDivergence(
+        result=random_ops.random_uniform(bias_size, seed=seed()))
+
+    layer = layer_class(
+        filters=filters,
+        kernel_size=kernel_size,
+        padding="SAME",
+        kernel_posterior_fn=lambda *args: kernel_posterior,
+        kernel_posterior_tensor_fn=lambda d: d.sample(seed=42),
+        kernel_prior_fn=lambda *args: kernel_prior,
+        kernel_divergence_fn=kernel_divergence,
+        bias_posterior_fn=lambda *args: bias_posterior,
+        bias_posterior_tensor_fn=lambda d: d.sample(seed=43),
+        bias_prior_fn=lambda *args: bias_prior,
+        bias_divergence_fn=bias_divergence,
+        **kwargs)
+
+    outputs = layer(inputs)
+
+    kl_penalty = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
+    return (kernel_posterior, kernel_prior, kernel_divergence,
+            bias_posterior, bias_prior, bias_divergence,
+            layer, inputs, outputs, kl_penalty, kernel_shape)
+
+  def _testConvReparameterization(self, layer_class):
+    batch_size, depth, height, width, channels, filters = 2, 4, 4, 4, 3, 5
+    with self.test_session() as sess:
+      (kernel_posterior, kernel_prior, kernel_divergence,
+       bias_posterior, bias_prior, bias_divergence, layer, inputs,
+       outputs, kl_penalty, kernel_shape) = self._testConvSetUp(
+           layer_class, batch_size,
+           depth=depth, height=height, width=width, channels=channels,
+           filters=filters)
+
+      convolution_op = nn_ops.Convolution(
+          tensor_shape.TensorShape(inputs.shape),
+          filter_shape=tensor_shape.TensorShape(kernel_shape),
+          padding="SAME")
+      expected_outputs = convolution_op(inputs, kernel_posterior.result_sample)
+      expected_outputs = nn.bias_add(expected_outputs,
+                                     bias_posterior.result_sample,
+                                     data_format="NHWC")
+
+      [
+          expected_outputs_, actual_outputs_,
+          expected_kernel_, actual_kernel_,
+          expected_kernel_divergence_, actual_kernel_divergence_,
+          expected_bias_, actual_bias_,
+          expected_bias_divergence_, actual_bias_divergence_,
+      ] = sess.run([
+          expected_outputs, outputs,
+          kernel_posterior.result_sample, layer.kernel_posterior_tensor,
+          kernel_divergence.result, kl_penalty[0],
+          bias_posterior.result_sample, layer.bias_posterior_tensor,
+          bias_divergence.result, kl_penalty[1],
+      ])
+
+      self.assertAllClose(
+          expected_kernel_, actual_kernel_,
+          rtol=1e-6, atol=0.)
+      self.assertAllClose(
+          expected_bias_, actual_bias_,
+          rtol=1e-6, atol=0.)
+      self.assertAllClose(
+          expected_outputs_, actual_outputs_,
+          rtol=1e-6, atol=0.)
+      self.assertAllClose(
+          expected_kernel_divergence_, actual_kernel_divergence_,
+          rtol=1e-6, atol=0.)
+      self.assertAllClose(
+          expected_bias_divergence_, actual_bias_divergence_,
+          rtol=1e-6, atol=0.)
+
+      self.assertAllEqual(
+          [[kernel_posterior.distribution,
+            kernel_prior.distribution,
+            kernel_posterior.result_sample]],
+          kernel_divergence.args)
+
+      self.assertAllEqual(
+          [[bias_posterior.distribution,
+            bias_prior.distribution,
+            bias_posterior.result_sample]],
+          bias_divergence.args)
+
+  def _testConvFlipout(self, layer_class):
+    batch_size, depth, height, width, channels, filters = 2, 4, 4, 4, 3, 5
+    with self.test_session() as sess:
+      (kernel_posterior, kernel_prior, kernel_divergence,
+       bias_posterior, bias_prior, bias_divergence, layer, inputs,
+       outputs, kl_penalty, kernel_shape) = self._testConvSetUp(
+           layer_class, batch_size,
+           depth=depth, height=height, width=width, channels=channels,
+           filters=filters, seed=44)
+
+      convolution_op = nn_ops.Convolution(
+          tensor_shape.TensorShape(inputs.shape),
+          filter_shape=tensor_shape.TensorShape(kernel_shape),
+          padding="SAME")
+
+      expected_kernel_posterior_affine = normal_lib.Normal(
+          loc=array_ops.zeros_like(kernel_posterior.result_loc),
+          scale=kernel_posterior.result_scale)
+      expected_kernel_posterior_affine_tensor = (
+          expected_kernel_posterior_affine.sample(seed=42))
+
+      expected_outputs = convolution_op(
+          inputs, kernel_posterior.distribution.loc)
+
+      input_shape = array_ops.shape(inputs)
+      output_shape = array_ops.shape(expected_outputs)
+      batch_shape = array_ops.expand_dims(input_shape[0], 0)
+      channels = input_shape[-1]
+      rank = len(inputs.get_shape()) - 2
+
+      sign_input = random_ops.random_uniform(
+          array_ops.concat([batch_shape,
+                            array_ops.expand_dims(channels, 0)], 0),
+          minval=0,
+          maxval=2,
+          dtype=dtypes.int32,
+          seed=layer.seed)
+      sign_input = math_ops.cast(2 * sign_input - 1, inputs.dtype)
+      sign_output = random_ops.random_uniform(
+          array_ops.concat([batch_shape,
+                            array_ops.expand_dims(filters, 0)], 0),
+          minval=0,
+          maxval=2,
+          dtype=dtypes.int32,
+          seed=distribution_util.gen_new_seed(
+              layer.seed, salt="conv_flipout"))
+      sign_output = math_ops.cast(2 * sign_output - 1, inputs.dtype)
+      for _ in range(rank):
+        sign_input = array_ops.expand_dims(sign_input, 1)  # 2D ex: (B, 1, 1, C)
+        sign_output = array_ops.expand_dims(sign_output, 1)
+
+      sign_input = array_ops.tile(  # tile for element-wise op broadcasting
+          sign_input,
+          [1] + [input_shape[i + 1] for i in range(rank)] + [1])
+      sign_output = array_ops.tile(
+          sign_output,
+          [1] + [output_shape[i + 1] for i in range(rank)] + [1])
+
+      perturbed_inputs = convolution_op(
+          inputs * sign_input, expected_kernel_posterior_affine_tensor)
+      perturbed_inputs *= sign_output
+
+      expected_outputs += perturbed_inputs
+      expected_outputs = nn.bias_add(expected_outputs,
+                                     bias_posterior.result_sample,
+                                     data_format="NHWC")
+
+      [
+          expected_outputs_, actual_outputs_,
+          expected_kernel_divergence_, actual_kernel_divergence_,
+          expected_bias_, actual_bias_,
+          expected_bias_divergence_, actual_bias_divergence_,
+      ] = sess.run([
+          expected_outputs, outputs,
+          kernel_divergence.result, kl_penalty[0],
+          bias_posterior.result_sample, layer.bias_posterior_tensor,
+          bias_divergence.result, kl_penalty[1],
+      ])
+
+      self.assertAllClose(
+          expected_bias_, actual_bias_,
+          rtol=1e-6, atol=0.)
+      self.assertAllClose(
+          expected_outputs_, actual_outputs_,
+          rtol=1e-6, atol=0.)
+      self.assertAllClose(
+          expected_kernel_divergence_, actual_kernel_divergence_,
+          rtol=1e-6, atol=0.)
+      self.assertAllClose(
+          expected_bias_divergence_, actual_bias_divergence_,
+          rtol=1e-6, atol=0.)
+
+      self.assertAllEqual(
+          [[kernel_posterior.distribution, kernel_prior.distribution, None]],
+          kernel_divergence.args)
+
+      self.assertAllEqual(
+          [[bias_posterior.distribution,
+            bias_prior.distribution,
+            bias_posterior.result_sample]],
+          bias_divergence.args)
+
+  def _testRandomConvFlipout(self, layer_class):
+    batch_size, depth, height, width, channels, filters = 2, 4, 4, 4, 3, 5
+    with self.test_session() as sess:
+      seed = Counter()
+      if layer_class in (prob_layers_lib.Conv1DReparameterization,
+                         prob_layers_lib.Conv1DFlipout):
+        inputs = random_ops.random_uniform(
+            [batch_size, width, channels], seed=seed())
+        kernel_size = (2,)
+      elif layer_class in (prob_layers_lib.Conv2DReparameterization,
+                           prob_layers_lib.Conv2DFlipout):
+        inputs = random_ops.random_uniform(
+            [batch_size, height, width, channels], seed=seed())
+        kernel_size = (2, 2)
+      elif layer_class in (prob_layers_lib.Conv3DReparameterization,
+                           prob_layers_lib.Conv3DFlipout):
+        inputs = random_ops.random_uniform(
+            [batch_size, depth, height, width, channels], seed=seed())
+        kernel_size = (2, 2, 2)
+
+      kernel_shape = kernel_size + (channels, filters)
+      bias_size = (filters,)
+
+      kernel_posterior = MockDistribution(
+          loc=random_ops.random_uniform(
+              kernel_shape, seed=seed()),
+          scale=random_ops.random_uniform(
+              kernel_shape, seed=seed()),
+          result_log_prob=random_ops.random_uniform(
+              kernel_shape, seed=seed()),
+          result_sample=random_ops.random_uniform(
+              kernel_shape, seed=seed()))
+      bias_posterior = MockDistribution(
+          loc=random_ops.random_uniform(
+              bias_size, seed=seed()),
+          scale=random_ops.random_uniform(
+              bias_size, seed=seed()),
+          result_log_prob=random_ops.random_uniform(
+              bias_size, seed=seed()),
+          result_sample=random_ops.random_uniform(
+              bias_size, seed=seed()))
+      layer_one = layer_class(
+          filters=filters,
+          kernel_size=kernel_size,
+          padding="SAME",
+          kernel_posterior_fn=lambda *args: kernel_posterior,
+          kernel_posterior_tensor_fn=lambda d: d.sample(seed=42),
+          bias_posterior_fn=lambda *args: bias_posterior,
+          bias_posterior_tensor_fn=lambda d: d.sample(seed=43),
+          seed=44)
+      layer_two = layer_class(
+          filters=filters,
+          kernel_size=kernel_size,
+          padding="SAME",
+          kernel_posterior_fn=lambda *args: kernel_posterior,
+          kernel_posterior_tensor_fn=lambda d: d.sample(seed=42),
+          bias_posterior_fn=lambda *args: bias_posterior,
+          bias_posterior_tensor_fn=lambda d: d.sample(seed=43),
+          seed=45)
+
+      outputs_one = layer_one(inputs)
+      outputs_two = layer_two(inputs)
+
+      outputs_one_, outputs_two_ = sess.run([
+          outputs_one, outputs_two])
+
+      self.assertLess(np.sum(np.isclose(outputs_one_, outputs_two_)),
+                      np.prod(outputs_one_.shape))
+
+  def testKLPenaltyKernelConv1DReparameterization(self):
+    self._testKLPenaltyKernel(prob_layers_lib.Conv1DReparameterization)
+
+  def testKLPenaltyKernelConv2DReparameterization(self):
+    self._testKLPenaltyKernel(prob_layers_lib.Conv2DReparameterization)
+
+  def testKLPenaltyKernelConv3DReparameterization(self):
+    self._testKLPenaltyKernel(prob_layers_lib.Conv3DReparameterization)
+
+  def testKLPenaltyKernelConv1DFlipout(self):
+    self._testKLPenaltyKernel(prob_layers_lib.Conv1DFlipout)
+
+  def testKLPenaltyKernelConv2DFlipout(self):
+    self._testKLPenaltyKernel(prob_layers_lib.Conv2DFlipout)
+
+  def testKLPenaltyKernelConv3DFlipout(self):
+    self._testKLPenaltyKernel(prob_layers_lib.Conv3DFlipout)
+
+  def testKLPenaltyBothConv1DReparameterization(self):
+    self._testKLPenaltyBoth(prob_layers_lib.Conv1DReparameterization)
+
+  def testKLPenaltyBothConv2DReparameterization(self):
+    self._testKLPenaltyBoth(prob_layers_lib.Conv2DReparameterization)
+
+  def testKLPenaltyBothConv3DReparameterization(self):
+    self._testKLPenaltyBoth(prob_layers_lib.Conv3DReparameterization)
+
+  def testKLPenaltyBothConv1DFlipout(self):
+    self._testKLPenaltyBoth(prob_layers_lib.Conv1DFlipout)
+
+  def testKLPenaltyBothConv2DFlipout(self):
+    self._testKLPenaltyBoth(prob_layers_lib.Conv2DFlipout)
+
+  def testKLPenaltyBothConv3DFlipout(self):
+    self._testKLPenaltyBoth(prob_layers_lib.Conv3DFlipout)
+
+  def testConv1DReparameterization(self):
+    self._testConvReparameterization(prob_layers_lib.Conv1DReparameterization)
+
+  def testConv2DReparameterization(self):
+    self._testConvReparameterization(prob_layers_lib.Conv2DReparameterization)
+
+  def testConv3DReparameterization(self):
+    self._testConvReparameterization(prob_layers_lib.Conv3DReparameterization)
+
+  def testConv1DFlipout(self):
+    self._testConvFlipout(prob_layers_lib.Conv1DFlipout)
+
+  def testConv2DFlipout(self):
+    self._testConvFlipout(prob_layers_lib.Conv2DFlipout)
+
+  def testConv3DFlipout(self):
+    self._testConvFlipout(prob_layers_lib.Conv3DFlipout)
+
+  def testRandomConv1DFlipout(self):
+    self._testRandomConvFlipout(prob_layers_lib.Conv1DFlipout)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py
index 50358fd1c2b7635ffe2d08c5af3219bb0a11498b..342f38ccec7ec74db1b393d6cdc22300205cc547 100644
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py
+++ b/tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py
@@ -18,11 +18,18 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.bayesflow.python.ops import layers_dense_variational_impl as prob_layers_lib
+import numpy as np
+
+from tensorflow.contrib.bayesflow.python.ops import layers_dense_variational as prob_layers_lib
+from tensorflow.contrib.bayesflow.python.ops import layers_util as prob_layers_util
+from tensorflow.contrib.distributions.python.ops import independent as independent_lib
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops.distributions import normal as normal_lib
+from tensorflow.python.ops.distributions import util as distribution_util
 from tensorflow.python.platform import test
 
 
@@ -41,14 +48,18 @@ class Counter(object):
     return self._value
 
 
-class MockDistribution(normal_lib.Normal):
-  """Monitors DenseVariational calls to the underlying distribution."""
+class MockDistribution(independent_lib.Independent):
+  """Monitors layer calls to the underlying distribution."""
 
   def __init__(self, result_sample, result_log_prob, loc=None, scale=None):
     self.result_sample = result_sample
     self.result_log_prob = result_log_prob
     self.result_loc = loc
     self.result_scale = scale
+    self.result_distribution = normal_lib.Normal(loc=0.0, scale=1.0)
+    if loc is not None and scale is not None:
+      self.result_distribution = normal_lib.Normal(loc=self.result_loc,
+                                                   scale=self.result_scale)
     self.called_log_prob = Counter()
     self.called_sample = Counter()
     self.called_loc = Counter()
@@ -62,6 +73,10 @@ class MockDistribution(normal_lib.Normal):
     self.called_sample()
     return self.result_sample
 
+  @property
+  def distribution(self):  # for dummy check on Independent(Normal)
+    return self.result_distribution
+
   @property
   def loc(self):
     self.called_loc()
@@ -74,7 +89,7 @@ class MockDistribution(normal_lib.Normal):
 
 
 class MockKLDivergence(object):
-  """Monitors DenseVariational calls to the divergence implementation."""
+  """Monitors layer calls to the divergence implementation."""
 
   def __init__(self, result):
     self.result = result
@@ -87,94 +102,125 @@ class MockKLDivergence(object):
     return self.result
 
 
-class DenseVariationalLocalReparametrization(test.TestCase):
+class DenseVariational(test.TestCase):
 
-  def testKLPenaltyKernel(self):
+  def _testKLPenaltyKernel(self, layer_class):
     with self.test_session():
-      dense_vi = prob_layers_lib.DenseVariational(units=2)
+      layer = layer_class(units=2)
       inputs = random_ops.random_uniform([2, 3], seed=1)
 
       # No keys.
-      loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
-      self.assertEqual(len(loss_keys), 0)
-      self.assertListEqual(dense_vi.losses, loss_keys)
+      losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
+      self.assertEqual(len(losses), 0)
+      self.assertListEqual(layer.losses, losses)
 
-      _ = dense_vi(inputs)
+      _ = layer(inputs)
 
       # Yes keys.
-      loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
-      self.assertEqual(len(loss_keys), 1)
-      self.assertListEqual(dense_vi.losses, loss_keys)
+      losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
+      self.assertEqual(len(losses), 1)
+      self.assertListEqual(layer.losses, losses)
 
-  def testKLPenaltyBoth(self):
+  def _testKLPenaltyBoth(self, layer_class):
     def _make_normal(dtype, *args):  # pylint: disable=unused-argument
       return normal_lib.Normal(
           loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.))
     with self.test_session():
-      dense_vi = prob_layers_lib.DenseVariational(
+      layer = layer_class(
           units=2,
-          bias_posterior_fn=prob_layers_lib.default_mean_field_normal_fn(),
+          bias_posterior_fn=prob_layers_util.default_mean_field_normal_fn(),
           bias_prior_fn=_make_normal)
       inputs = random_ops.random_uniform([2, 3], seed=1)
 
       # No keys.
-      loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
-      self.assertEqual(len(loss_keys), 0)
-      self.assertListEqual(dense_vi.losses, loss_keys)
+      losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
+      self.assertEqual(len(losses), 0)
+      self.assertListEqual(layer.losses, losses)
 
-      _ = dense_vi(inputs)
+      _ = layer(inputs)
 
       # Yes keys.
-      loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
-      self.assertEqual(len(loss_keys), 2)
-      self.assertListEqual(dense_vi.losses, loss_keys)
-
-  def testVariationalNonLocal(self):
+      losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
+      self.assertEqual(len(losses), 2)
+      self.assertListEqual(layer.losses, losses)
+
+  def _testDenseSetUp(self, layer_class, batch_size, in_size, out_size,
+                      **kwargs):
+    seed = Counter()
+    inputs = random_ops.random_uniform([batch_size, in_size], seed=seed())
+
+    kernel_size = [in_size, out_size]
+    kernel_posterior = MockDistribution(
+        loc=random_ops.random_uniform(kernel_size, seed=seed()),
+        scale=random_ops.random_uniform(kernel_size, seed=seed()),
+        result_log_prob=random_ops.random_uniform(kernel_size, seed=seed()),
+        result_sample=random_ops.random_uniform(kernel_size, seed=seed()))
+    kernel_prior = MockDistribution(
+        result_log_prob=random_ops.random_uniform(kernel_size, seed=seed()),
+        result_sample=random_ops.random_uniform(kernel_size, seed=seed()))
+    kernel_divergence = MockKLDivergence(
+        result=random_ops.random_uniform(kernel_size, seed=seed()))
+
+    bias_size = [out_size]
+    bias_posterior = MockDistribution(
+        result_log_prob=random_ops.random_uniform(bias_size, seed=seed()),
+        result_sample=random_ops.random_uniform(bias_size, seed=seed()))
+    bias_prior = MockDistribution(
+        result_log_prob=random_ops.random_uniform(bias_size, seed=seed()),
+        result_sample=random_ops.random_uniform(bias_size, seed=seed()))
+    bias_divergence = MockKLDivergence(
+        result=random_ops.random_uniform(bias_size, seed=seed()))
+
+    layer = layer_class(
+        units=out_size,
+        kernel_posterior_fn=lambda *args: kernel_posterior,
+        kernel_posterior_tensor_fn=lambda d: d.sample(seed=42),
+        kernel_prior_fn=lambda *args: kernel_prior,
+        kernel_divergence_fn=kernel_divergence,
+        bias_posterior_fn=lambda *args: bias_posterior,
+        bias_posterior_tensor_fn=lambda d: d.sample(seed=43),
+        bias_prior_fn=lambda *args: bias_prior,
+        bias_divergence_fn=bias_divergence,
+        **kwargs)
+
+    outputs = layer(inputs)
+
+    kl_penalty = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
+    return (kernel_posterior, kernel_prior, kernel_divergence,
+            bias_posterior, bias_prior, bias_divergence,
+            layer, inputs, outputs, kl_penalty)
+
+  def testKLPenaltyKernelReparameterization(self):
+    self._testKLPenaltyKernel(prob_layers_lib.DenseReparameterization)
+
+  def testKLPenaltyKernelLocalReparameterization(self):
+    self._testKLPenaltyKernel(prob_layers_lib.DenseLocalReparameterization)
+
+  def testKLPenaltyKernelFlipout(self):
+    self._testKLPenaltyKernel(prob_layers_lib.DenseFlipout)
+
+  def testKLPenaltyBothReparameterization(self):
+    self._testKLPenaltyBoth(prob_layers_lib.DenseReparameterization)
+
+  def testKLPenaltyBothLocalReparameterization(self):
+    self._testKLPenaltyBoth(prob_layers_lib.DenseLocalReparameterization)
+
+  def testKLPenaltyBothFlipout(self):
+    self._testKLPenaltyBoth(prob_layers_lib.DenseFlipout)
+
+  def testDenseReparameterization(self):
     batch_size, in_size, out_size = 2, 3, 4
     with self.test_session() as sess:
-      seed = Counter()
-      inputs = random_ops.random_uniform([batch_size, in_size], seed=seed())
-
-      kernel_size = [in_size, out_size]
-      kernel_posterior = MockDistribution(
-          result_log_prob=random_ops.random_uniform(kernel_size, seed=seed()),
-          result_sample=random_ops.random_uniform(kernel_size, seed=seed()))
-      kernel_prior = MockDistribution(
-          result_log_prob=random_ops.random_uniform(kernel_size, seed=seed()),
-          result_sample=random_ops.random_uniform(kernel_size, seed=seed()))
-      kernel_divergence = MockKLDivergence(
-          result=random_ops.random_uniform(kernel_size, seed=seed()))
-
-      bias_size = [out_size]
-      bias_posterior = MockDistribution(
-          result_log_prob=random_ops.random_uniform(bias_size, seed=seed()),
-          result_sample=random_ops.random_uniform(bias_size, seed=seed()))
-      bias_prior = MockDistribution(
-          result_log_prob=random_ops.random_uniform(bias_size, seed=seed()),
-          result_sample=random_ops.random_uniform(bias_size, seed=seed()))
-      bias_divergence = MockKLDivergence(
-          result=random_ops.random_uniform(bias_size, seed=seed()))
+      (kernel_posterior, kernel_prior, kernel_divergence,
+       bias_posterior, bias_prior, bias_divergence, layer, inputs,
+       outputs, kl_penalty) = self._testDenseSetUp(
+           prob_layers_lib.DenseReparameterization,
+           batch_size, in_size, out_size)
 
       expected_outputs = (
           math_ops.matmul(inputs, kernel_posterior.result_sample) +
           bias_posterior.result_sample)
 
-      dense_vi = prob_layers_lib.DenseVariational(
-          units=2,
-          kernel_use_local_reparameterization=False,
-          kernel_posterior_fn=lambda *args: kernel_posterior,
-          kernel_posterior_tensor_fn=lambda d: d.sample(seed=42),
-          kernel_prior_fn=lambda *args: kernel_prior,
-          kernel_divergence_fn=kernel_divergence,
-          bias_posterior_fn=lambda *args: bias_posterior,
-          bias_posterior_tensor_fn=lambda d: d.sample(seed=43),
-          bias_prior_fn=lambda *args: bias_prior,
-          bias_divergence_fn=bias_divergence)
-
-      outputs = dense_vi(inputs)
-
-      kl_penalty = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
-
       [
           expected_outputs_, actual_outputs_,
           expected_kernel_, actual_kernel_,
@@ -183,9 +229,9 @@ class DenseVariationalLocalReparametrization(test.TestCase):
           expected_bias_divergence_, actual_bias_divergence_,
       ] = sess.run([
           expected_outputs, outputs,
-          kernel_posterior.result_sample, dense_vi.kernel.posterior_tensor,
+          kernel_posterior.result_sample, layer.kernel_posterior_tensor,
           kernel_divergence.result, kl_penalty[0],
-          bias_posterior.result_sample, dense_vi.bias.posterior_tensor,
+          bias_posterior.result_sample, layer.bias_posterior_tensor,
           bias_divergence.result, kl_penalty[1],
       ])
 
@@ -206,40 +252,25 @@ class DenseVariationalLocalReparametrization(test.TestCase):
           rtol=1e-6, atol=0.)
 
       self.assertAllEqual(
-          [[kernel_posterior, kernel_prior, kernel_posterior.result_sample]],
+          [[kernel_posterior.distribution,
+            kernel_prior.distribution,
+            kernel_posterior.result_sample]],
           kernel_divergence.args)
 
       self.assertAllEqual(
-          [[bias_posterior, bias_prior, bias_posterior.result_sample]],
+          [[bias_posterior.distribution,
+            bias_prior.distribution,
+            bias_posterior.result_sample]],
           bias_divergence.args)
 
-  def testVariationalLocal(self):
+  def testDenseLocalReparameterization(self):
     batch_size, in_size, out_size = 2, 3, 4
     with self.test_session() as sess:
-      seed = Counter()
-      inputs = random_ops.random_uniform([batch_size, in_size], seed=seed())
-
-      kernel_size = [in_size, out_size]
-      kernel_posterior = MockDistribution(
-          loc=random_ops.random_uniform(kernel_size, seed=seed()),
-          scale=random_ops.random_uniform(kernel_size, seed=seed()),
-          result_log_prob=random_ops.random_uniform(kernel_size, seed=seed()),
-          result_sample=random_ops.random_uniform(kernel_size, seed=seed()))
-      kernel_prior = MockDistribution(
-          result_log_prob=random_ops.random_uniform(kernel_size, seed=seed()),
-          result_sample=random_ops.random_uniform(kernel_size, seed=seed()))
-      kernel_divergence = MockKLDivergence(
-          result=random_ops.random_uniform(kernel_size, seed=seed()))
-
-      bias_size = [out_size]
-      bias_posterior = MockDistribution(
-          result_log_prob=random_ops.random_uniform(bias_size, seed=seed()),
-          result_sample=random_ops.random_uniform(bias_size, seed=seed()))
-      bias_prior = MockDistribution(
-          result_log_prob=random_ops.random_uniform(bias_size, seed=seed()),
-          result_sample=random_ops.random_uniform(bias_size, seed=seed()))
-      bias_divergence = MockKLDivergence(
-          result=random_ops.random_uniform(bias_size, seed=seed()))
+      (kernel_posterior, kernel_prior, kernel_divergence,
+       bias_posterior, bias_prior, bias_divergence, layer, inputs,
+       outputs, kl_penalty) = self._testDenseSetUp(
+           prob_layers_lib.DenseLocalReparameterization,
+           batch_size, in_size, out_size)
 
       expected_kernel_posterior_affine = normal_lib.Normal(
           loc=math_ops.matmul(inputs, kernel_posterior.result_loc),
@@ -250,21 +281,80 @@ class DenseVariationalLocalReparametrization(test.TestCase):
       expected_outputs = (expected_kernel_posterior_affine_tensor +
                           bias_posterior.result_sample)
 
-      dense_vi = prob_layers_lib.DenseVariational(
-          units=2,
-          kernel_use_local_reparameterization=True,
-          kernel_posterior_fn=lambda *args: kernel_posterior,
-          kernel_posterior_tensor_fn=lambda d: d.sample(seed=42),
-          kernel_prior_fn=lambda *args: kernel_prior,
-          kernel_divergence_fn=kernel_divergence,
-          bias_posterior_fn=lambda *args: bias_posterior,
-          bias_posterior_tensor_fn=lambda d: d.sample(seed=43),
-          bias_prior_fn=lambda *args: bias_prior,
-          bias_divergence_fn=bias_divergence)
+      [
+          expected_outputs_, actual_outputs_,
+          expected_kernel_divergence_, actual_kernel_divergence_,
+          expected_bias_, actual_bias_,
+          expected_bias_divergence_, actual_bias_divergence_,
+      ] = sess.run([
+          expected_outputs, outputs,
+          kernel_divergence.result, kl_penalty[0],
+          bias_posterior.result_sample, layer.bias_posterior_tensor,
+          bias_divergence.result, kl_penalty[1],
+      ])
 
-      outputs = dense_vi(inputs)
+      self.assertAllClose(
+          expected_bias_, actual_bias_,
+          rtol=1e-6, atol=0.)
+      self.assertAllClose(
+          expected_outputs_, actual_outputs_,
+          rtol=1e-6, atol=0.)
+      self.assertAllClose(
+          expected_kernel_divergence_, actual_kernel_divergence_,
+          rtol=1e-6, atol=0.)
+      self.assertAllClose(
+          expected_bias_divergence_, actual_bias_divergence_,
+          rtol=1e-6, atol=0.)
+
+      self.assertAllEqual(
+          [[kernel_posterior.distribution,
+            kernel_prior.distribution,
+            None]],
+          kernel_divergence.args)
+
+      self.assertAllEqual(
+          [[bias_posterior.distribution,
+            bias_prior.distribution,
+            bias_posterior.result_sample]],
+          bias_divergence.args)
+
+  def testDenseFlipout(self):
+    batch_size, in_size, out_size = 2, 3, 4
+    with self.test_session() as sess:
+      (kernel_posterior, kernel_prior, kernel_divergence,
+       bias_posterior, bias_prior, bias_divergence, layer, inputs,
+       outputs, kl_penalty) = self._testDenseSetUp(
+           prob_layers_lib.DenseFlipout,
+           batch_size, in_size, out_size, seed=44)
+
+      expected_kernel_posterior_affine = normal_lib.Normal(
+          loc=array_ops.zeros_like(kernel_posterior.result_loc),
+          scale=kernel_posterior.result_scale)
+      expected_kernel_posterior_affine_tensor = (
+          expected_kernel_posterior_affine.sample(seed=42))
 
-      kl_penalty = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
+      sign_input = random_ops.random_uniform(
+          [batch_size, in_size],
+          minval=0,
+          maxval=2,
+          dtype=dtypes.int32,
+          seed=layer.seed)
+      sign_input = math_ops.cast(2 * sign_input - 1, inputs.dtype)
+      sign_output = random_ops.random_uniform(
+          [batch_size, out_size],
+          minval=0,
+          maxval=2,
+          dtype=dtypes.int32,
+          seed=distribution_util.gen_new_seed(
+              layer.seed, salt="dense_flipout"))
+      sign_output = math_ops.cast(2 * sign_output - 1, inputs.dtype)
+      perturbed_inputs = math_ops.matmul(
+          inputs * sign_input, expected_kernel_posterior_affine_tensor)
+      perturbed_inputs *= sign_output
+
+      expected_outputs = math_ops.matmul(inputs, kernel_posterior.result_loc)
+      expected_outputs += perturbed_inputs
+      expected_outputs += bias_posterior.result_sample
 
       [
           expected_outputs_, actual_outputs_,
@@ -274,7 +364,7 @@ class DenseVariationalLocalReparametrization(test.TestCase):
       ] = sess.run([
           expected_outputs, outputs,
           kernel_divergence.result, kl_penalty[0],
-          bias_posterior.result_sample, dense_vi.bias.posterior_tensor,
+          bias_posterior.result_sample, layer.bias_posterior_tensor,
           bias_divergence.result, kl_penalty[1],
       ])
 
@@ -292,13 +382,62 @@ class DenseVariationalLocalReparametrization(test.TestCase):
           rtol=1e-6, atol=0.)
 
       self.assertAllEqual(
-          [[kernel_posterior, kernel_prior, None]],
+          [[kernel_posterior.distribution, kernel_prior.distribution, None]],
           kernel_divergence.args)
 
       self.assertAllEqual(
-          [[bias_posterior, bias_prior, bias_posterior.result_sample]],
+          [[bias_posterior.distribution,
+            bias_prior.distribution,
+            bias_posterior.result_sample]],
           bias_divergence.args)
 
+  def testRandomDenseFlipout(self):
+    batch_size, in_size, out_size = 2, 3, 4
+    with self.test_session() as sess:
+      seed = Counter()
+      inputs = random_ops.random_uniform([batch_size, in_size], seed=seed())
+
+      kernel_posterior = MockDistribution(
+          loc=random_ops.random_uniform(
+              [in_size, out_size], seed=seed()),
+          scale=random_ops.random_uniform(
+              [in_size, out_size], seed=seed()),
+          result_log_prob=random_ops.random_uniform(
+              [in_size, out_size], seed=seed()),
+          result_sample=random_ops.random_uniform(
+              [in_size, out_size], seed=seed()))
+      bias_posterior = MockDistribution(
+          loc=random_ops.random_uniform(
+              [out_size], seed=seed()),
+          scale=random_ops.random_uniform(
+              [out_size], seed=seed()),
+          result_log_prob=random_ops.random_uniform(
+              [out_size], seed=seed()),
+          result_sample=random_ops.random_uniform(
+              [out_size], seed=seed()))
+      layer_one = prob_layers_lib.DenseFlipout(
+          units=out_size,
+          kernel_posterior_fn=lambda *args: kernel_posterior,
+          kernel_posterior_tensor_fn=lambda d: d.sample(seed=42),
+          bias_posterior_fn=lambda *args: bias_posterior,
+          bias_posterior_tensor_fn=lambda d: d.sample(seed=43),
+          seed=44)
+      layer_two = prob_layers_lib.DenseFlipout(
+          units=out_size,
+          kernel_posterior_fn=lambda *args: kernel_posterior,
+          kernel_posterior_tensor_fn=lambda d: d.sample(seed=42),
+          bias_posterior_fn=lambda *args: bias_posterior,
+          bias_posterior_tensor_fn=lambda d: d.sample(seed=43),
+          seed=45)
+
+      outputs_one = layer_one(inputs)
+      outputs_two = layer_two(inputs)
+
+      outputs_one_, outputs_two_ = sess.run([
+          outputs_one, outputs_two])
+
+      self.assertLess(np.sum(np.isclose(outputs_one_, outputs_two_)), out_size)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/sgld_optimizer_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/sgld_optimizer_test.py
index 66793383fdd5c71f136900197a91be6966e2f8c7..756c25683bd4b0c8c77e9e28485ca2a85582999c 100644
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/sgld_optimizer_test.py
+++ b/tensorflow/contrib/bayesflow/python/kernel_tests/sgld_optimizer_test.py
@@ -1,4 +1,4 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -36,9 +36,9 @@ class SGLDOptimizerTest(test.TestCase):
         grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
         grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
         decay_rate = 0.53
-        sgd_op = SGLDOptimizer(
-            3.0, preconditioner_decay_rate=decay_rate).apply_gradients(
-                zip([grads0, grads1], [var0, var1]))
+        sgd_optimizer = SGLDOptimizer(3.0, preconditioner_decay_rate=decay_rate)
+        sgd_op = sgd_optimizer.apply_gradients(
+            zip([grads0, grads1], [var0, var1]))
         variables.global_variables_initializer().run()
         # Fetch params to validate initial values
         self.assertAllCloseAccordingToType([1.1, 2.1], var0.eval())
@@ -54,6 +54,7 @@ class SGLDOptimizerTest(test.TestCase):
             decay_rate + (1 - decay_rate) * 0.01**2 + 1e-8))
         self.assertAllCloseAccordingToType(
             [3.0 - 3.0 * grads_scaled, 4.0 - 3.0 * grads_scaled], var1.eval())
+        self.assertAllCloseAccordingToType(1, sgd_optimizer._counter.eval())
 
   def testBasicMultiInstance(self):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
@@ -102,6 +103,8 @@ class SGLDOptimizerTest(test.TestCase):
                             sgd_optimizer2.variable_scope)
         self.assertNotEqual(sgd_optimizer.variable_scope.name,
                             sgd_optimizer2.variable_scope.name)
+        self.assertAllCloseAccordingToType(1, sgd_optimizer._counter.eval())
+        self.assertAllCloseAccordingToType(1, sgd_optimizer2._counter.eval())
 
   def testTensorLearningRate(self):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/variational_sgd_optimizer_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/variational_sgd_optimizer_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..83c64dbe0fd586edcb784a5c09a4c133aaa99cff
--- /dev/null
+++ b/tensorflow/contrib/bayesflow/python/kernel_tests/variational_sgd_optimizer_test.py
@@ -0,0 +1,268 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functional test for GradientDescent."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from tensorflow.contrib.bayesflow.python.ops.optimizers import VariationalSGDOptimizer
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+
+class VariationalSGDOptimizerTest(test.TestCase):
+
+  def testBasic(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.test_session():
+        var0 = variables.Variable([1.1, 2.1], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+        grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+        decay_rate = 0.53
+        sgd_op = VariationalSGDOptimizer(
+            1,
+            1,
+            preconditioner_decay_rate=decay_rate,
+            max_learning_rate=3.0,
+            burnin_max_learning_rate=3.0,
+            use_single_learning_rate=True).apply_gradients(
+                zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllCloseAccordingToType([1.1, 2.1], var0.eval())
+        self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval())
+        # Run 1 step of sgd
+        sgd_op.run()
+        self.assertAllCloseAccordingToType([1.1 - 3.0 * 0.1, 2.1 - 3.0 * 0.1],
+                                           var0.eval())
+        self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01],
+                                           var1.eval())
+
+  def testBasicMultiInstance(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.test_session():
+        var0 = variables.Variable([1.1, 2.1], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+        grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+        vara = variables.Variable([1.1, 2.1], dtype=dtype)
+        varb = variables.Variable([3.0, 4.0], dtype=dtype)
+        gradsa = constant_op.constant([0.1, 0.1], dtype=dtype)
+        gradsb = constant_op.constant([0.01, 0.01], dtype=dtype)
+        decay_rate = 0.5
+        batch_size = 2
+        total_num_examples = 10
+        optimizer = VariationalSGDOptimizer(
+            batch_size,
+            total_num_examples,
+            max_learning_rate=1.0,
+            burnin_max_learning_rate=3.0,
+            preconditioner_decay_rate=decay_rate)
+        sgd_op = optimizer.apply_gradients(
+            zip([grads0, grads1], [var0, var1]))
+        optimizer2 = VariationalSGDOptimizer(
+            batch_size,
+            total_num_examples,
+            max_learning_rate=1.0,
+            burnin_max_learning_rate=10.0,
+            burnin=0,
+            preconditioner_decay_rate=decay_rate)
+        sgd_op2 = optimizer2.apply_gradients(
+            zip([gradsa, gradsb], [vara, varb]))
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllCloseAccordingToType([1.1, 2.1], var0.eval())
+        self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval())
+        self.assertAllCloseAccordingToType([1.1, 2.1], vara.eval())
+        self.assertAllCloseAccordingToType([3.0, 4.0], varb.eval())
+
+        # Run 1 step of sgd
+        sgd_op.run()
+        sgd_op2.run()
+        # Validate updated params
+        self.assertAllCloseAccordingToType([1.1 - 3. * 0.1, 2.1 - 3. * 0.1],
+                                           var0.eval())
+        self.assertAllCloseAccordingToType([1.1 - 0.1, 2.1 - 0.1], vara.eval())
+
+        self.assertAllCloseAccordingToType([3.0 - 3. * 0.01, 4.0 - 3. * 0.01],
+                                           var1.eval())
+        self.assertAllCloseAccordingToType([3.0 - 0.01, 4.0 - 0.01],
+                                           varb.eval())
+        self.assertNotEqual(optimizer.variable_scope,
+                            optimizer2.variable_scope)
+        self.assertNotEqual(optimizer.variable_scope.name,
+                            optimizer2.variable_scope.name)
+        self.assertAllCloseAccordingToType(1, optimizer._counter.eval())
+        self.assertAllCloseAccordingToType(1, optimizer2._counter.eval())
+
+  def testTensorLearningRate(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.test_session():
+        var0 = variables.Variable([1.1, 2.1], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+        grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+        lrate = constant_op.constant(3.0)
+        decay_rate = 0.5
+        batch_size = 2
+        total_num_examples = 10
+        sgd_op = VariationalSGDOptimizer(
+            batch_size,
+            total_num_examples,
+            max_learning_rate=lrate,
+            burnin=0,
+            preconditioner_decay_rate=decay_rate).apply_gradients(
+                zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllCloseAccordingToType([1.1, 2.1], var0.eval())
+        self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval())
+        # Run 1 step of sgd
+        sgd_op.run()
+        # Validate updated params
+        self.assertAllCloseAccordingToType([1.1 - 3.0 * 0.1, 2.1 - 3.0 * 0.1],
+                                           var0.eval())
+        self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01],
+                                           var1.eval())
+
+  def testTensorDecayLearningRate(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.test_session():
+        var0 = variables.Variable([1.1, 2.1], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+        grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+        lrate = variables.Variable(3.0)
+        lrate_decay_op = lrate.assign_add(-3.)
+        decay_rate = 0.5
+        batch_size = 2
+        total_num_examples = 10
+        optimizer = VariationalSGDOptimizer(
+            batch_size,
+            total_num_examples,
+            max_learning_rate=lrate,
+            burnin=0,
+            preconditioner_decay_rate=decay_rate)
+        sgd_op = optimizer.apply_gradients(zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllCloseAccordingToType([1.1, 2.1], var0.eval())
+        self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval())
+        # Run 1 step of sgd
+        sgd_op.run()
+        # Validate updated params
+        self.assertAllCloseAccordingToType([1.1 - 3.0 * 0.1, 2.1 - 3.0 * 0.1],
+                                           var0.eval())
+        self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01],
+                                           var1.eval())
+        # Update learning rate to 0
+        lrate_decay_op.eval()
+        sgd_op.run()
+        # Validate params haven't changed
+        self.assertAllCloseAccordingToType([1.1 - 3.0 * 0.1, 2.1 - 3.0 * 0.1],
+                                           var0.eval())
+        self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01],
+                                           var1.eval())
+        lrate_decay_op.eval()
+
+        with self.assertRaises(errors.InvalidArgumentError):
+          sgd_op.run()
+
+  def testGradWrtRef(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.test_session():
+        opt = VariationalSGDOptimizer(1, 1, max_learning_rate=1.0)
+        values = [1.0, 3.0]
+        vars_ = [variables.Variable([v], dtype=dtype) for v in values]
+        grads_and_vars = opt.compute_gradients(vars_[0] + vars_[1], vars_)
+        variables.global_variables_initializer().run()
+        for grad, _ in grads_and_vars:
+          self.assertAllCloseAccordingToType([1.0], grad.eval())
+
+  def testWithGlobalStep(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.test_session():
+        global_step = variables.Variable(0, trainable=False)
+        var0 = variables.Variable([1.1, 2.1], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+        grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+        decay_rate = 0.1
+        batch_size = 2
+        total_num_examples = 10
+        sgd_optimizer = VariationalSGDOptimizer(
+            batch_size,
+            total_num_examples,
+            max_learning_rate=3.0,
+            burnin=0,
+            preconditioner_decay_rate=decay_rate)
+        sgd_op = sgd_optimizer.apply_gradients(
+            zip([grads0, grads1], [var0, var1]), global_step=global_step)
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllCloseAccordingToType([1.1, 2.1], var0.eval())
+        self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval())
+        # Run 1 step of sgd
+        sgd_op.run()
+
+        # Validate updated params and global_step
+        self.assertAllCloseAccordingToType([1.1 - 3.0 * 0.1, 2.1 - 3.0 * 0.1],
+                                           var0.eval())
+        self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01],
+                                           var1.eval())
+        self.assertAllCloseAccordingToType(1, global_step.eval())
+        self.assertAllCloseAccordingToType(1, sgd_optimizer._counter.eval())
+
+  def testSparseBasic(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.test_session():
+        var0 = variables.Variable([[1.1], [2.1]], dtype=dtype)
+        var1 = variables.Variable([[3.0], [4.0]], dtype=dtype)
+        grads0 = ops.IndexedSlices(
+            constant_op.constant([0.1], shape=[1, 1], dtype=dtype),
+            constant_op.constant([0]), constant_op.constant([2, 1]))
+        grads1 = ops.IndexedSlices(
+            constant_op.constant([0.01], shape=[1, 1], dtype=dtype),
+            constant_op.constant([1]), constant_op.constant([2, 1]))
+        decay_rate = 0.1
+        batch_size = 2
+        total_num_examples = 10
+        sgd_op = VariationalSGDOptimizer(
+            batch_size,
+            total_num_examples,
+            max_learning_rate=3.0,
+            burnin=0,
+            preconditioner_decay_rate=decay_rate).apply_gradients(
+                zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllCloseAccordingToType([[1.1], [2.1]], var0.eval())
+        self.assertAllCloseAccordingToType([[3.0], [4.0]], var1.eval())
+        # Run 1 step of sgd
+        sgd_op.run()
+        # Validate updated params
+        self.assertAllCloseAccordingToType([[1.1 - 3.0 * 0.1], [2.1]],
+                                           var0.eval())
+        self.assertAllCloseAccordingToType(
+            [[3.0 - 3.0 * 0], [4.0 - 3.0 * 0.01]], var1.eval())
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/bayesflow/python/ops/custom_grad_impl.py b/tensorflow/contrib/bayesflow/python/ops/custom_grad_impl.py
index ee3719232d8796c338247320fd8ef832a41df12b..fdc12e3b21466a2c552124d6c6a339a0c25f9f46 100644
--- a/tensorflow/contrib/bayesflow/python/ops/custom_grad_impl.py
+++ b/tensorflow/contrib/bayesflow/python/ops/custom_grad_impl.py
@@ -43,7 +43,7 @@ def custom_gradient(fx, gx, x, axis=(),
   h(x) = x * stop_gradient(g(x)) + stop_gradient(f(x) - x * g(x))
   ```
 
-  is such that `h(x) = stop(f(x))` and `grad[h(x), x] = stop_gradient(g(x)).`
+  is such that `h(x) = stop_gradient(f(x))` and `grad[h(x), x] = stop_gradient(g(x)).`
 
   In addition to scalar-domain/scalar-range functions, this function also
   supports tensor-domain/scalar-range functions. However, in the latter case it
diff --git a/tensorflow/contrib/bayesflow/python/ops/hmc_impl.py b/tensorflow/contrib/bayesflow/python/ops/hmc_impl.py
index 333dce929530adceb30dcb63653a5bd009c059e0..5685a942e98800a39ec718adc67bcfd43aeafd52 100644
--- a/tensorflow/contrib/bayesflow/python/ops/hmc_impl.py
+++ b/tensorflow/contrib/bayesflow/python/ops/hmc_impl.py
@@ -27,6 +27,7 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
@@ -174,9 +175,11 @@ def chain(n_iterations, step_size, n_leapfrog_steps, initial_x,
 
     potential_and_grad = _make_potential_and_grad(target_log_prob_fn)
     potential, grad = potential_and_grad(initial_x)
-    return functional_ops.scan(body, array_ops.zeros(n_iterations),
-                               (initial_x, array_ops.zeros(non_event_shape),
-                                -potential, -grad))[:2]
+    return functional_ops.scan(
+        body, array_ops.zeros(n_iterations, dtype=initial_x.dtype),
+        (initial_x,
+         array_ops.zeros(non_event_shape, dtype=initial_x.dtype),
+         -potential, -grad))[:2]
 
 
 def ais_chain(n_iterations, step_size, n_leapfrog_steps, initial_x,
@@ -298,8 +301,9 @@ def ais_chain(n_iterations, step_size, n_leapfrog_steps, initial_x,
       return updated_x, acceptance_probs, w
 
     x, acceptance_probs, w = functional_ops.scan(
-        _body, beta_series, (initial_x, array_ops.zeros(non_event_shape),
-                             array_ops.zeros(non_event_shape)))
+        _body, beta_series,
+        (initial_x, array_ops.zeros(non_event_shape, dtype=initial_x.dtype),
+         array_ops.zeros(non_event_shape, dtype=initial_x.dtype)))
   return w[-1], x[-1], acceptance_probs[-1]
 
 
@@ -446,9 +450,10 @@ def kernel(step_size, n_leapfrog_steps, x, target_log_prob_fn, event_dims=(),
   """
   with ops.name_scope(name, 'hmc_kernel', [step_size, n_leapfrog_steps, x]):
     potential_and_grad = _make_potential_and_grad(target_log_prob_fn)
+    x = ops.convert_to_tensor(x, name='x')
 
     x_shape = array_ops.shape(x)
-    m = random_ops.random_normal(x_shape)
+    m = random_ops.random_normal(x_shape, dtype=x.dtype)
 
     kinetic_0 = 0.5 * math_ops.reduce_sum(math_ops.square(m), event_dims)
 
@@ -468,26 +473,33 @@ def kernel(step_size, n_leapfrog_steps, x, target_log_prob_fn, event_dims=(),
 
     kinetic_1 = 0.5 * math_ops.reduce_sum(math_ops.square(new_m), event_dims)
 
-    # TODO(mhoffman): It seems like there may be an opportunity for nans here.
-    # I'm delaying addressing this because we're going to refactor this part
-    # to use the more general Metropolis abstraction anyway.
-    acceptance_probs = math_ops.exp(math_ops.minimum(0., log_potential_0 -
-                                                     log_potential_1 +
-                                                     kinetic_0 - kinetic_1))
-    accepted = math_ops.cast(
-        random_ops.random_uniform(array_ops.shape(acceptance_probs)) <
-        acceptance_probs, np.float32)
-    new_log_prob = (-log_potential_0 * (1. - accepted) -
-                    log_potential_1 * accepted)
+    energy_change = log_potential_1 - log_potential_0 + kinetic_1 - kinetic_0
+    # Treat NaN as infinite energy (and therefore guaranteed rejection).
+    energy_change = array_ops.where(
+        math_ops.is_nan(energy_change),
+        array_ops.fill(array_ops.shape(energy_change),
+                       energy_change.dtype.as_numpy_dtype(np.inf)),
+        energy_change)
+    acceptance_probs = math_ops.exp(math_ops.minimum(-energy_change, 0.))
+    accepted = (
+        random_ops.random_uniform(
+            array_ops.shape(acceptance_probs), dtype=x.dtype)
+        < acceptance_probs)
+    new_log_prob = -array_ops.where(accepted, log_potential_1, log_potential_0)
 
     # TODO(b/65738010): This should work, but it doesn't for now.
     # reduced_shape = math_ops.reduced_shape(x_shape, event_dims)
     reduced_shape = array_ops.shape(math_ops.reduce_sum(x, event_dims,
                                                         keep_dims=True))
     accepted = array_ops.reshape(accepted, reduced_shape)
-    new_x = x * (1. - accepted) + new_x * accepted
-    new_grad = -grad_0 * (1. - accepted) - grad_1 * accepted
-
+    accepted = math_ops.logical_or(
+        accepted, math_ops.cast(array_ops.zeros_like(x), dtypes.bool))
+    new_x = array_ops.where(accepted, new_x, x)
+    new_grad = -array_ops.where(accepted, grad_1, grad_0)
+
+  # TODO(langmore) Gradients of acceptance_probs and new_log_prob with respect
+  # to initial_x will propagate NaNs (see testNanFromGradsDontPropagate).  This
+  # should be fixed.
   return new_x, acceptance_probs, new_log_prob, new_grad
 
 
@@ -525,6 +537,7 @@ def leapfrog_integrator(step_size, n_steps, initial_position, initial_momentum,
       Has shape matching `initial_position`.
 
   Example: Simple quadratic potential.
+
   ```python
   def potential_and_grad(position):
     return tf.reduce_sum(0.5 * tf.square(position)), position
@@ -600,6 +613,7 @@ def leapfrog_step(step_size, position, momentum, potential_and_grad, grad,
       Has shape matching `position`.
 
   Example: Simple quadratic potential.
+
   ```python
   def potential_and_grad(position):
     # Simple quadratic potential
diff --git a/tensorflow/contrib/bayesflow/python/ops/layers.py b/tensorflow/contrib/bayesflow/python/ops/layers.py
index dcead38af826a12e776160bdb251ba021e6b953c..a742b7c1aa593d6c08bf9d8d597c99c9fc4e7aed 100644
--- a/tensorflow/contrib/bayesflow/python/ops/layers.py
+++ b/tensorflow/contrib/bayesflow/python/ops/layers.py
@@ -23,13 +23,43 @@ from __future__ import print_function
 
 # go/tf-wildcard-import
 # pylint: disable=wildcard-import
-from tensorflow.contrib.bayesflow.python.ops.layers_dense_variational_impl import *
+from tensorflow.contrib.bayesflow.python.ops.layers_conv_variational import *
+from tensorflow.contrib.bayesflow.python.ops.layers_dense_variational import *
+from tensorflow.contrib.bayesflow.python.ops.layers_util import *
 # pylint: enable=wildcard-import
 from tensorflow.python.util.all_util import remove_undocumented
 
 _allowed_symbols = [
-    'DenseVariational',
-    'dense_variational',
+    'Convolution1DReparameterization',
+    'Convolution2DReparameterization',
+    'Convolution3DReparameterization',
+    'Convolution1DFlipout',
+    'Convolution2DFlipout',
+    'Convolution3DFlipout',
+    'Conv1DReparameterization',
+    'Conv2DReparameterization',
+    'Conv3DReparameterization',
+    'Conv1DFlipout',
+    'Conv2DFlipout',
+    'Conv3DFlipout',
+    'convolution1d_reparameterization',
+    'convolution2d_reparameterization',
+    'convolution3d_reparameterization',
+    'convolution1d_flipout',
+    'convolution2d_flipout',
+    'convolution3d_flipout',
+    'conv1d_reparameterization',
+    'conv2d_reparameterization',
+    'conv3d_reparameterization',
+    'conv1d_flipout',
+    'conv2d_flipout',
+    'conv3d_flipout',
+    'DenseReparameterization',
+    'DenseLocalReparameterization',
+    'DenseFlipout',
+    'dense_reparameterization',
+    'dense_local_reparameterization',
+    'dense_flipout',
     'default_loc_scale_fn',
     'default_mean_field_normal_fn',
 ]
diff --git a/tensorflow/contrib/bayesflow/python/ops/layers_conv_variational.py b/tensorflow/contrib/bayesflow/python/ops/layers_conv_variational.py
new file mode 100644
index 0000000000000000000000000000000000000000..7723cfb442712626ff415f1412e3362f2392ce9f
--- /dev/null
+++ b/tensorflow/contrib/bayesflow/python/ops/layers_conv_variational.py
@@ -0,0 +1,2943 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Convolutional variational layer classes and their functional aliases.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.bayesflow.python.ops import layers_util
+from tensorflow.contrib.distributions.python.ops import independent as independent_lib
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.layers import base as layers_lib
+from tensorflow.python.layers import utils
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import standard_ops
+from tensorflow.python.ops.distributions import kullback_leibler as kl_lib
+from tensorflow.python.ops.distributions import normal as normal_lib
+from tensorflow.python.ops.distributions import util as distribution_util
+
+
+class _ConvVariational(layers_lib.Layer):
+  """Abstract nD convolution layer (private, used as implementation base).
+
+  This layer creates a convolution kernel that is convolved
+  (actually cross-correlated) with the layer input to produce a tensor of
+  outputs. It may also include a bias addition and activation function
+  on the outputs. It assumes the `kernel` and/or `bias` are drawn from
+  distributions.
+
+  By default, the layer implements a stochastic forward pass via
+  sampling from the kernel and bias posteriors,
+  ```none
+  outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
+  ```
+  where f denotes the layer's calculation.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Arguments:
+    rank: An integer, the rank of the convolution, e.g. "2" for 2D convolution.
+    filters: Integer, the dimensionality of the output space (i.e. the number
+      of filters in the convolution).
+    kernel_size: An integer or tuple/list of n integers, specifying the
+      length of the convolution window.
+    strides: An integer or tuple/list of n integers,
+      specifying the stride length of the convolution.
+      Specifying any stride value != 1 is incompatible with specifying
+      any `dilation_rate` value != 1.
+    padding: One of `"valid"` or `"same"` (case-insensitive).
+    data_format: A string, one of `channels_last` (default) or `channels_first`.
+      The ordering of the dimensions in the inputs.
+      `channels_last` corresponds to inputs with shape
+      `(batch, ..., channels)` while `channels_first` corresponds to
+      inputs with shape `(batch, channels, ...)`.
+    dilation_rate: An integer or tuple/list of n integers, specifying
+      the dilation rate to use for dilated convolution.
+      Currently, specifying any `dilation_rate` value != 1 is
+      incompatible with specifying any `strides` value != 1.
+    activation: Activation function. Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Optional regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    name: A string, the name of the layer.
+
+  Properties:
+    rank: Python integer, dimensionality of convolution.
+    filters: Python integer, dimensionality of the output space.
+    kernel_size: Size of the convolution window.
+    strides: Stride length of convolution.
+    padding: Python string describing padding approach.
+    data_format: Python string describing input data's dimensions.
+    dilation_rate: Dilation rate for an atrous convolution.
+    activation: Activation function (`callable`).
+    activity_regularizer: Regularizer function for the output.
+    kernel_posterior_fn: `callable` returning posterior.
+    kernel_posterior_tensor_fn: `callable` operating on posterior.
+    kernel_prior_fn: `callable` returning prior.
+    kernel_divergence_fn: `callable` returning divergence.
+    bias_posterior_fn: `callable` returning posterior.
+    bias_posterior_tensor_fn: `callable` operating on posterior.
+    bias_prior_fn: `callable` returning prior.
+    bias_divergence_fn: `callable` returning divergence.
+  """
+
+  def __init__(
+      self,
+      rank,
+      filters,
+      kernel_size,
+      strides=1,
+      padding="valid",
+      data_format="channels_last",
+      dilation_rate=1,
+      activation=None,
+      activity_regularizer=None,
+      trainable=True,
+      kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+      kernel_posterior_tensor_fn=lambda d: d.sample(),
+      kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+          loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+      kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True),  # pylint: disable=line-too-long
+      bias_posterior_tensor_fn=lambda d: d.sample(),
+      bias_prior_fn=None,
+      bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      name=None,
+      **kwargs):
+    super(_ConvVariational, self).__init__(
+        trainable=trainable,
+        name=name,
+        activity_regularizer=activity_regularizer,
+        **kwargs)
+    self.rank = rank
+    self.filters = filters
+    self.kernel_size = utils.normalize_tuple(kernel_size, rank, "kernel_size")
+    self.strides = utils.normalize_tuple(strides, rank, "strides")
+    self.padding = utils.normalize_padding(padding)
+    self.data_format = utils.normalize_data_format(data_format)
+    self.dilation_rate = utils.normalize_tuple(
+        dilation_rate, rank, "dilation_rate")
+    self.activation = activation
+    self.input_spec = layers_lib.InputSpec(ndim=self.rank + 2)
+    self.kernel_posterior_fn = kernel_posterior_fn
+    self.kernel_posterior_tensor_fn = kernel_posterior_tensor_fn
+    self.kernel_prior_fn = kernel_prior_fn
+    self.kernel_divergence_fn = kernel_divergence_fn
+    self.bias_posterior_fn = bias_posterior_fn
+    self.bias_posterior_tensor_fn = bias_posterior_tensor_fn
+    self.bias_prior_fn = bias_prior_fn
+    self.bias_divergence_fn = bias_divergence_fn
+
+  def build(self, input_shape):
+    input_shape = tensor_shape.TensorShape(input_shape)
+    if self.data_format == "channels_first":
+      channel_axis = 1
+    else:
+      channel_axis = -1
+    if input_shape[channel_axis].value is None:
+      raise ValueError("The channel dimension of the inputs "
+                       "should be defined. Found `None`.")
+    input_dim = input_shape[channel_axis].value
+    kernel_shape = self.kernel_size + (input_dim, self.filters)
+    dtype = dtypes.as_dtype(self.dtype)
+
+    # Must have a posterior kernel.
+    self.kernel_posterior = self.kernel_posterior_fn(
+        dtype, kernel_shape, "kernel_posterior",
+        self.trainable, self.add_variable)
+
+    if self.kernel_prior_fn is None:
+      self.kernel_prior = None
+    else:
+      self.kernel_prior = self.kernel_prior_fn(
+          dtype, kernel_shape, "kernel_prior",
+          self.trainable, self.add_variable)
+    self._built_kernel_divergence = False
+
+    if self.bias_posterior_fn is None:
+      self.bias_posterior = None
+    else:
+      self.bias_posterior = self.bias_posterior_fn(
+          dtype, (self.filters,), "bias_posterior",
+          self.trainable, self.add_variable)
+
+    if self.bias_prior_fn is None:
+      self.bias_prior = None
+    else:
+      self.bias_prior = self.bias_prior_fn(
+          dtype, (self.filters,), "bias_prior",
+          self.trainable, self.add_variable)
+    self._built_bias_divergence = False
+
+    self.input_spec = layers_lib.InputSpec(ndim=self.rank + 2,
+                                           axes={channel_axis: input_dim})
+    self._convolution_op = nn_ops.Convolution(
+        input_shape,
+        filter_shape=tensor_shape.TensorShape(kernel_shape),
+        dilation_rate=self.dilation_rate,
+        strides=self.strides,
+        padding=self.padding.upper(),
+        data_format=utils.convert_data_format(self.data_format,
+                                              self.rank + 2))
+
+    self.built = True
+
+  def call(self, inputs):
+    inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
+
+    outputs = self._apply_variational_kernel(inputs)
+    outputs = self._apply_variational_bias(outputs)
+    if self.activation is not None:
+      outputs = self.activation(outputs)
+    if not self._built_kernel_divergence:
+      kernel_posterior = self.kernel_posterior
+      kernel_prior = self.kernel_prior
+      if isinstance(self.kernel_posterior, independent_lib.Independent):
+        kernel_posterior = kernel_posterior.distribution
+      if isinstance(self.kernel_prior, independent_lib.Independent):
+        kernel_prior = kernel_prior.distribution
+      self._apply_divergence(self.kernel_divergence_fn,
+                             kernel_posterior,
+                             kernel_prior,
+                             self.kernel_posterior_tensor,
+                             name="divergence_kernel")
+      self._built_kernel_divergence = True
+    if not self._built_bias_divergence:
+      bias_posterior = self.bias_posterior
+      bias_prior = self.bias_prior
+      if isinstance(self.bias_posterior, independent_lib.Independent):
+        bias_posterior = bias_posterior.distribution
+      if isinstance(self.bias_prior, independent_lib.Independent):
+        bias_prior = bias_prior.distribution
+      self._apply_divergence(self.bias_divergence_fn,
+                             bias_posterior,
+                             bias_prior,
+                             self.bias_posterior_tensor,
+                             name="divergence_bias")
+      self._built_bias_divergence = True
+    return outputs
+
+  def _apply_variational_bias(self, inputs):
+    if self.bias_posterior is None:
+      self.bias_posterior_tensor = None
+      return inputs
+    self.bias_posterior_tensor = self.bias_posterior_tensor_fn(
+        self.bias_posterior)
+    outputs = inputs
+    if self.data_format == "channels_first":
+      if self.rank == 1:
+        # nn.bias_add does not accept a 1D input tensor.
+        bias = array_ops.reshape(self.bias_posterior_tensor,
+                                 (1, self.filters, 1))
+        outputs += bias
+      if self.rank == 2:
+        outputs = nn.bias_add(outputs,
+                              self.bias_posterior_tensor,
+                              data_format="NCHW")
+      if self.rank == 3:
+        # As of Mar 2017, direct addition is significantly slower than
+        # bias_add when computing gradients. To use bias_add, we collapse Z
+        # and Y into a single dimension to obtain a 4D input tensor.
+        outputs_shape = outputs.shape.as_list()
+        outputs_4d = array_ops.reshape(outputs,
+                                       [outputs_shape[0], outputs_shape[1],
+                                        outputs_shape[2] * outputs_shape[3],
+                                        outputs_shape[4]])
+        outputs_4d = nn.bias_add(outputs_4d,
+                                 self.bias_posterior_tensor,
+                                 data_format="NCHW")
+        outputs = array_ops.reshape(outputs_4d, outputs_shape)
+    else:
+      outputs = nn.bias_add(outputs,
+                            self.bias_posterior_tensor,
+                            data_format="NHWC")
+    return outputs
+
+  def _apply_divergence(self, divergence_fn, posterior, prior,
+                        posterior_tensor, name):
+    if (divergence_fn is None or
+        posterior is None or
+        prior is None):
+      divergence = None
+      return
+    divergence = standard_ops.identity(
+        divergence_fn(
+            posterior, prior, posterior_tensor),
+        name=name)
+    self.add_loss(divergence)
+
+  def _compute_output_shape(self, input_shape):
+    input_shape = tensor_shape.TensorShape(input_shape).as_list()
+    if self.data_format == "channels_last":
+      space = input_shape[1:-1]
+      new_space = []
+      for i in range(len(space)):
+        new_dim = utils.conv_output_length(
+            space[i],
+            self.kernel_size[i],
+            padding=self.padding,
+            stride=self.strides[i],
+            dilation=self.dilation_rate[i])
+        new_space.append(new_dim)
+      return tensor_shape.TensorShape([input_shape[0]] + new_space +
+                                      [self.filters])
+    else:
+      space = input_shape[2:]
+      new_space = []
+      for i in range(len(space)):
+        new_dim = utils.conv_output_length(
+            space[i],
+            self.kernel_size[i],
+            padding=self.padding,
+            stride=self.strides[i],
+            dilation=self.dilation_rate[i])
+        new_space.append(new_dim)
+      return tensor_shape.TensorShape([input_shape[0], self.filters] +
+                                      new_space)
+
+
+class _ConvReparameterization(_ConvVariational):
+  """Abstract nD convolution layer (private, used as implementation base).
+
+  This layer creates a convolution kernel that is convolved
+  (actually cross-correlated) with the layer input to produce a tensor of
+  outputs. It may also include a bias addition and activation function
+  on the outputs. It assumes the `kernel` and/or `bias` are drawn from
+  distributions.
+
+  By default, the layer implements a stochastic forward pass via
+  sampling from the kernel and bias posteriors,
+  ```none
+  outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
+  ```
+  where f denotes the layer's calculation. It uses the reparameterization
+  estimator [1], which performs a Monte Carlo approximation of the
+  distribution integrating over the `kernel` and `bias`.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Arguments:
+    rank: An integer, the rank of the convolution, e.g. "2" for 2D convolution.
+    filters: Integer, the dimensionality of the output space (i.e. the number
+      of filters in the convolution).
+    kernel_size: An integer or tuple/list of n integers, specifying the
+      length of the convolution window.
+    strides: An integer or tuple/list of n integers,
+      specifying the stride length of the convolution.
+      Specifying any stride value != 1 is incompatible with specifying
+      any `dilation_rate` value != 1.
+    padding: One of `"valid"` or `"same"` (case-insensitive).
+    data_format: A string, one of `channels_last` (default) or `channels_first`.
+      The ordering of the dimensions in the inputs.
+      `channels_last` corresponds to inputs with shape
+      `(batch, ..., channels)` while `channels_first` corresponds to
+      inputs with shape `(batch, channels, ...)`.
+    dilation_rate: An integer or tuple/list of n integers, specifying
+      the dilation rate to use for dilated convolution.
+      Currently, specifying any `dilation_rate` value != 1 is
+      incompatible with specifying any `strides` value != 1.
+    activation: Activation function. Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Optional regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    name: A string, the name of the layer.
+
+  Properties:
+    rank: Python integer, dimensionality of convolution.
+    filters: Python integer, dimensionality of the output space.
+    kernel_size: Size of the convolution window.
+    strides: Stride length of convolution.
+    padding: Python string describing padding approach.
+    data_format: Python string describing input data's dimensions.
+    dilation_rate: Dilation rate for an atrous convolution.
+    activation: Activation function (`callable`).
+    activity_regularizer: Regularizer function for the output.
+    kernel_posterior_fn: `callable` returning posterior.
+    kernel_posterior_tensor_fn: `callable` operating on posterior.
+    kernel_prior_fn: `callable` returning prior.
+    kernel_divergence_fn: `callable` returning divergence.
+    bias_posterior_fn: `callable` returning posterior.
+    bias_posterior_tensor_fn: `callable` operating on posterior.
+    bias_prior_fn: `callable` returning prior.
+    bias_divergence_fn: `callable` returning divergence.
+
+  [1]: "Auto-Encoding Variational Bayes."
+        Diederik P. Kingma, Max Welling.
+        International Conference on Learning Representations, 2014.
+  """
+
+  def __init__(
+      self,
+      rank,
+      filters,
+      kernel_size,
+      strides=1,
+      padding="valid",
+      data_format="channels_last",
+      dilation_rate=1,
+      activation=None,
+      activity_regularizer=None,
+      trainable=True,
+      kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+      kernel_posterior_tensor_fn=lambda d: d.sample(),
+      kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+          loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+      kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True),  # pylint: disable=line-too-long
+      bias_posterior_tensor_fn=lambda d: d.sample(),
+      bias_prior_fn=None,
+      bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      name=None,
+      **kwargs):
+    super(_ConvReparameterization, self).__init__(
+        rank=rank,
+        filters=filters,
+        kernel_size=kernel_size,
+        strides=strides,
+        padding=padding,
+        data_format=data_format,
+        dilation_rate=dilation_rate,
+        activation=activation,
+        activity_regularizer=activity_regularizer,
+        trainable=trainable,
+        kernel_posterior_fn=kernel_posterior_fn,
+        kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+        kernel_prior_fn=kernel_prior_fn,
+        kernel_divergence_fn=kernel_divergence_fn,
+        bias_posterior_fn=bias_posterior_fn,
+        bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+        bias_prior_fn=bias_prior_fn,
+        bias_divergence_fn=bias_divergence_fn,
+        name=name, **kwargs)
+
+  def _apply_variational_kernel(self, inputs):
+    self.kernel_posterior_tensor = self.kernel_posterior_tensor_fn(
+        self.kernel_posterior)
+    self.kernel_posterior_affine = None
+    self.kernel_posterior_affine_tensor = None
+    outputs = self._convolution_op(inputs, self.kernel_posterior_tensor)
+    return outputs
+
+
+class Conv1DReparameterization(_ConvReparameterization):
+  """1D convolution layer (e.g. temporal convolution).
+
+  This layer creates a convolution kernel that is convolved
+  (actually cross-correlated) with the layer input to produce a tensor of
+  outputs. It may also include a bias addition and activation function
+  on the outputs. It assumes the `kernel` and/or `bias` are drawn from
+  distributions.
+
+  By default, the layer implements a stochastic forward pass via
+  sampling from the kernel and bias posteriors,
+  ```none
+  outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
+  ```
+  where f denotes the layer's calculation. It uses the reparameterization
+  estimator [1], which performs a Monte Carlo approximation of the
+  distribution integrating over the `kernel` and `bias`.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Arguments:
+    filters: Integer, the dimensionality of the output space (i.e. the number
+      of filters in the convolution).
+    kernel_size: An integer or tuple/list of a single integer, specifying the
+      length of the 1D convolution window.
+    strides: An integer or tuple/list of a single integer,
+      specifying the stride length of the convolution.
+      Specifying any stride value != 1 is incompatible with specifying
+      any `dilation_rate` value != 1.
+    padding: One of `"valid"` or `"same"` (case-insensitive).
+    data_format: A string, one of `channels_last` (default) or `channels_first`.
+      The ordering of the dimensions in the inputs.
+      `channels_last` corresponds to inputs with shape
+      `(batch, length, channels)` while `channels_first` corresponds to
+      inputs with shape `(batch, channels, length)`.
+    dilation_rate: An integer or tuple/list of a single integer, specifying
+      the dilation rate to use for dilated convolution.
+      Currently, specifying any `dilation_rate` value != 1 is
+      incompatible with specifying any `strides` value != 1.
+    activation: Activation function. Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Optional regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+    name: A string, the name of the layer.
+
+  Properties:
+    filters: Python integer, dimensionality of the output space.
+    kernel_size: Size of the convolution window.
+    strides: Stride length of convolution.
+    padding: Python string describing padding approach.
+    data_format: Python string describing input data's dimensions.
+    dilation_rate: Dilation rate for an atrous convolution.
+    activation: Activation function (`callable`).
+    activity_regularizer: Regularizer function for the output.
+    kernel_posterior_fn: `callable` returning posterior.
+    kernel_posterior_tensor_fn: `callable` operating on posterior.
+    kernel_prior_fn: `callable` returning prior.
+    kernel_divergence_fn: `callable` returning divergence.
+    bias_posterior_fn: `callable` returning posterior.
+    bias_posterior_tensor_fn: `callable` operating on posterior.
+    bias_prior_fn: `callable` returning prior.
+    bias_divergence_fn: `callable` returning divergence.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tf.reshape(features, [-1, 128, 1])
+  net = tfp.layers.Conv1DReparameterization(64,
+                                            kernel_size=5,
+                                            padding="SAME",
+                                            activation=tf.nn.relu)(net)
+  net = tf.reshape(net, [-1, 128 * 64])
+  logits = tfp.layers.DenseReparameterization(10)(net)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses reparameterization gradients to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+
+  [1]: "Auto-Encoding Variational Bayes."
+        Diederik P. Kingma, Max Welling.
+        International Conference on Learning Representations, 2014.
+  """
+
+  def __init__(
+      self,
+      filters,
+      kernel_size,
+      strides=1,
+      padding="valid",
+      data_format="channels_last",
+      dilation_rate=1,
+      activation=None,
+      activity_regularizer=None,
+      trainable=True,
+      kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+      kernel_posterior_tensor_fn=lambda d: d.sample(),
+      kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+          loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+      kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True),  # pylint: disable=line-too-long
+      bias_posterior_tensor_fn=lambda d: d.sample(),
+      bias_prior_fn=None,
+      bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      name=None,
+      **kwargs):
+    super(Conv1DReparameterization, self).__init__(
+        rank=1,
+        filters=filters,
+        kernel_size=kernel_size,
+        strides=strides,
+        padding=padding,
+        data_format=data_format,
+        dilation_rate=dilation_rate,
+        activation=activation,
+        activity_regularizer=activity_regularizer,
+        trainable=trainable,
+        kernel_posterior_fn=kernel_posterior_fn,
+        kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+        kernel_prior_fn=kernel_prior_fn,
+        kernel_divergence_fn=kernel_divergence_fn,
+        bias_posterior_fn=bias_posterior_fn,
+        bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+        bias_prior_fn=bias_prior_fn,
+        bias_divergence_fn=bias_divergence_fn,
+        name=name, **kwargs)
+
+
+def conv1d_reparameterization(
+    inputs,
+    filters,
+    kernel_size,
+    strides=1,
+    padding="valid",
+    data_format="channels_last",
+    dilation_rate=1,
+    activation=None,
+    activity_regularizer=None,
+    trainable=True,
+    kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+    kernel_posterior_tensor_fn=lambda d: d.sample(),
+    kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+        loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+    kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True),  # pylint: disable=line-too-long
+    bias_posterior_tensor_fn=lambda d: d.sample(),
+    bias_prior_fn=None,
+    bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    name=None,
+    reuse=None):
+  """Functional interface for 1D convolution layer (e.g. temporal convolution).
+
+  This layer creates a convolution kernel that is convolved
+  (actually cross-correlated) with the layer input to produce a tensor of
+  outputs. It may also include a bias addition and activation function
+  on the outputs. It assumes the `kernel` and/or `bias` are drawn from
+  distributions.
+
+  By default, the layer implements a stochastic forward pass via
+  sampling from the kernel and bias posteriors,
+  ```none
+  outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
+  ```
+  where f denotes the layer's calculation. It uses the reparameterization
+  estimator [1], which performs a Monte Carlo approximation of the
+  distribution integrating over the `kernel` and `bias`.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Arguments:
+    inputs: Tensor input.
+    filters: Integer, the dimensionality of the output space (i.e. the number
+      of filters in the convolution).
+    kernel_size: An integer or tuple/list of a single integer, specifying the
+      length of the 1D convolution window.
+    strides: An integer or tuple/list of a single integer,
+      specifying the stride length of the convolution.
+      Specifying any stride value != 1 is incompatible with specifying
+      any `dilation_rate` value != 1.
+    padding: One of `"valid"` or `"same"` (case-insensitive).
+    data_format: A string, one of `channels_last` (default) or `channels_first`.
+      The ordering of the dimensions in the inputs.
+      `channels_last` corresponds to inputs with shape
+      `(batch, length, channels)` while `channels_first` corresponds to
+      inputs with shape `(batch, channels, length)`.
+    dilation_rate: An integer or tuple/list of a single integer, specifying
+      the dilation rate to use for dilated convolution.
+      Currently, specifying any `dilation_rate` value != 1 is
+      incompatible with specifying any `strides` value != 1.
+    activation: Activation function. Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Optional regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+    name: A string, the name of the layer.
+    reuse: Boolean, whether to reuse the weights of a previous layer
+      by the same name.
+
+  Returns:
+    Output tensor.
+
+  Raises:
+    ValueError: if eager execution is enabled.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tf.reshape(features, [-1, 128, 1])
+  net = tfp.layers.conv1d_reparameterization(net,
+                                             filters=64,
+                                             kernel_size=5,
+                                             padding="SAME",
+                                             activation=tf.nn.relu)
+  net = tf.reshape(net, [-1, 128 * 64])
+  logits = tfp.layers.dense_reparameterization(net, 10)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses reparameterization gradients to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+
+  [1]: "Auto-Encoding Variational Bayes."
+        Diederik P. Kingma, Max Welling.
+        International Conference on Learning Representations, 2014.
+  """
+  layer = Conv1DReparameterization(
+      filters=filters,
+      kernel_size=kernel_size,
+      strides=strides,
+      padding=padding,
+      data_format=data_format,
+      dilation_rate=dilation_rate,
+      activation=activation,
+      activity_regularizer=activity_regularizer,
+      trainable=trainable,
+      kernel_posterior_fn=kernel_posterior_fn,
+      kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+      kernel_prior_fn=kernel_prior_fn,
+      kernel_divergence_fn=kernel_divergence_fn,
+      bias_posterior_fn=bias_posterior_fn,
+      bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+      bias_prior_fn=bias_prior_fn,
+      bias_divergence_fn=bias_divergence_fn,
+      name=name,
+      dtype=inputs.dtype.base_dtype,
+      _scope=name,
+      _reuse=reuse)
+  return layer.apply(inputs)
+
+
+class Conv2DReparameterization(_ConvReparameterization):
+  """2D convolution layer (e.g. spatial convolution over images).
+
+  This layer creates a convolution kernel that is convolved
+  (actually cross-correlated) with the layer input to produce a tensor of
+  outputs. It may also include a bias addition and activation function
+  on the outputs. It assumes the `kernel` and/or `bias` are drawn from
+  distributions.
+
+  By default, the layer implements a stochastic forward pass via
+  sampling from the kernel and bias posteriors,
+  ```none
+  outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
+  ```
+  where f denotes the layer's calculation. It uses the reparameterization
+  estimator [1], which performs a Monte Carlo approximation of the
+  distribution integrating over the `kernel` and `bias`.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Arguments:
+    filters: Integer, the dimensionality of the output space (i.e. the number
+      of filters in the convolution).
+    kernel_size: An integer or tuple/list of 2 integers, specifying the
+      height and width of the 2D convolution window.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+    strides: An integer or tuple/list of 2 integers,
+      specifying the strides of the convolution along the height and width.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+      Specifying any stride value != 1 is incompatible with specifying
+      any `dilation_rate` value != 1.
+    padding: One of `"valid"` or `"same"` (case-insensitive).
+    data_format: A string, one of `channels_last` (default) or `channels_first`.
+      The ordering of the dimensions in the inputs.
+      `channels_last` corresponds to inputs with shape
+      `(batch, height, width, channels)` while `channels_first` corresponds to
+      inputs with shape `(batch, channels, height, width)`.
+
+    dilation_rate: An integer or tuple/list of 2 integers, specifying
+      the dilation rate to use for dilated convolution.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+      Currently, specifying any `dilation_rate` value != 1 is
+      incompatible with specifying any stride value != 1.
+    activation: Activation function. Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Optional regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+    name: A string, the name of the layer.
+
+  Properties:
+    filters: Python integer, dimensionality of the output space.
+    kernel_size: Size of the convolution window.
+    strides: Stride length of convolution.
+    padding: Python string describing padding approach.
+    data_format: Python string describing input data's dimensions.
+    dilation_rate: Dilation rate for an atrous convolution.
+    activation: Activation function (`callable`).
+    activity_regularizer: Regularizer function for the output.
+    kernel_posterior_fn: `callable` returning posterior.
+    kernel_posterior_tensor_fn: `callable` operating on posterior.
+    kernel_prior_fn: `callable` returning prior.
+    kernel_divergence_fn: `callable` returning divergence.
+    bias_posterior_fn: `callable` returning posterior.
+    bias_posterior_tensor_fn: `callable` operating on posterior.
+    bias_prior_fn: `callable` returning prior.
+    bias_divergence_fn: `callable` returning divergence.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tf.reshape(features, [-1, 32, 32, 3])
+  net = tfp.layers.Conv2DReparameterization(64,
+                                            kernel_size=5,
+                                            padding="SAME",
+                                            activation=tf.nn.relu)(net)
+  net = tf.layers.MaxPooling2D(pool_size=2,
+                               strides=2,
+                               padding="SAME")(net)
+  net = tf.reshape(net, [-1, 8 * 8 * 64])
+  logits = tfp.layers.DenseReparameterization(10)(net)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses reparameterization gradients to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+
+  [1]: "Auto-Encoding Variational Bayes."
+        Diederik P. Kingma, Max Welling.
+        International Conference on Learning Representations, 2014.
+  """
+
+  def __init__(
+      self,
+      filters,
+      kernel_size,
+      strides=(1, 1),
+      padding="valid",
+      data_format="channels_last",
+      dilation_rate=(1, 1),
+      activation=None,
+      activity_regularizer=None,
+      trainable=True,
+      kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+      kernel_posterior_tensor_fn=lambda d: d.sample(),
+      kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+          loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+      kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True),  # pylint: disable=line-too-long
+      bias_posterior_tensor_fn=lambda d: d.sample(),
+      bias_prior_fn=None,
+      bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      name=None,
+      **kwargs):
+    super(Conv2DReparameterization, self).__init__(
+        rank=2,
+        filters=filters,
+        kernel_size=kernel_size,
+        strides=strides,
+        padding=padding,
+        data_format=data_format,
+        dilation_rate=dilation_rate,
+        activation=activation,
+        activity_regularizer=activity_regularizer,
+        trainable=trainable,
+        kernel_posterior_fn=kernel_posterior_fn,
+        kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+        kernel_prior_fn=kernel_prior_fn,
+        kernel_divergence_fn=kernel_divergence_fn,
+        bias_posterior_fn=bias_posterior_fn,
+        bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+        bias_prior_fn=bias_prior_fn,
+        bias_divergence_fn=bias_divergence_fn,
+        name=name, **kwargs)
+
+
+def conv2d_reparameterization(
+    inputs,
+    filters,
+    kernel_size,
+    strides=(1, 1),
+    padding="valid",
+    data_format="channels_last",
+    dilation_rate=(1, 1),
+    activation=None,
+    activity_regularizer=None,
+    trainable=True,
+    kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+    kernel_posterior_tensor_fn=lambda d: d.sample(),
+    kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+        loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+    kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True),  # pylint: disable=line-too-long
+    bias_posterior_tensor_fn=lambda d: d.sample(),
+    bias_prior_fn=None,
+    bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    name=None,
+    reuse=None):
+  """Functional interface for the 2D convolution layer.
+
+  This layer creates a convolution kernel that is convolved
+  (actually cross-correlated) with the layer input to produce a tensor of
+  outputs. It may also include a bias addition and activation function
+  on the outputs. It assumes the `kernel` and/or `bias` are drawn from
+  distributions.
+
+  By default, the layer implements a stochastic forward pass via
+  sampling from the kernel and bias posteriors,
+  ```none
+  outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
+  ```
+  where f denotes the layer's calculation. It uses the reparameterization
+  estimator [1], which performs a Monte Carlo approximation of the
+  distribution integrating over the `kernel` and `bias`.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Arguments:
+    inputs: Tensor input.
+    filters: Integer, the dimensionality of the output space (i.e. the number
+      of filters in the convolution).
+    kernel_size: An integer or tuple/list of 2 integers, specifying the
+      height and width of the 2D convolution window.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+    strides: An integer or tuple/list of 2 integers,
+      specifying the strides of the convolution along the height and width.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+      Specifying any stride value != 1 is incompatible with specifying
+      any `dilation_rate` value != 1.
+    padding: One of `"valid"` or `"same"` (case-insensitive).
+    data_format: A string, one of `channels_last` (default) or `channels_first`.
+      The ordering of the dimensions in the inputs.
+      `channels_last` corresponds to inputs with shape
+      `(batch, height, width, channels)` while `channels_first` corresponds to
+      inputs with shape `(batch, channels, height, width)`.
+
+    dilation_rate: An integer or tuple/list of 2 integers, specifying
+      the dilation rate to use for dilated convolution.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+      Currently, specifying any `dilation_rate` value != 1 is
+      incompatible with specifying any stride value != 1.
+    activation: Activation function. Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Optional regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+    name: A string, the name of the layer.
+    reuse: Boolean, whether to reuse the weights of a previous layer
+      by the same name.
+
+  Returns:
+    Output tensor.
+
+  Raises:
+    ValueError: if eager execution is enabled.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tf.reshape(features, [-1, 32, 32, 3])
+  net = tfp.layers.conv2d_reparameterization(net,
+                                             filters=64,
+                                             kernel_size=5,
+                                             padding="SAME",
+                                             activation=tf.nn.relu)
+  net = tf.layers.max_pooling2d(net,
+                                pool_size=2,
+                                strides=2,
+                                padding="SAME")
+  net = tf.reshape(net, [-1, 8 * 8 * 64])
+  logits = tfp.layers.dense_reparameterization(net, 10)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses reparameterization gradients to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+
+  [1]: "Auto-Encoding Variational Bayes."
+        Diederik P. Kingma, Max Welling.
+        International Conference on Learning Representations, 2014.
+  """
+  layer = Conv2DReparameterization(
+      filters=filters,
+      kernel_size=kernel_size,
+      strides=strides,
+      padding=padding,
+      data_format=data_format,
+      dilation_rate=dilation_rate,
+      activation=activation,
+      activity_regularizer=activity_regularizer,
+      trainable=trainable,
+      kernel_posterior_fn=kernel_posterior_fn,
+      kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+      kernel_prior_fn=kernel_prior_fn,
+      kernel_divergence_fn=kernel_divergence_fn,
+      bias_posterior_fn=bias_posterior_fn,
+      bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+      bias_prior_fn=bias_prior_fn,
+      bias_divergence_fn=bias_divergence_fn,
+      name=name,
+      dtype=inputs.dtype.base_dtype,
+      _scope=name,
+      _reuse=reuse)
+  return layer.apply(inputs)
+
+
+class Conv3DReparameterization(_ConvReparameterization):
+  """3D convolution layer (e.g. spatial convolution over volumes).
+
+  This layer creates a convolution kernel that is convolved
+  (actually cross-correlated) with the layer input to produce a tensor of
+  outputs. It may also include a bias addition and activation function
+  on the outputs. It assumes the `kernel` and/or `bias` are drawn from
+  distributions.
+
+  By default, the layer implements a stochastic forward pass via
+  sampling from the kernel and bias posteriors,
+  ```none
+  outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
+  ```
+  where f denotes the layer's calculation. It uses the reparameterization
+  estimator [1], which performs a Monte Carlo approximation of the
+  distribution integrating over the `kernel` and `bias`.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Arguments:
+    filters: Integer, the dimensionality of the output space (i.e. the number
+      of filters in the convolution).
+    kernel_size: An integer or tuple/list of 3 integers, specifying the
+      depth, height and width of the 3D convolution window.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+    strides: An integer or tuple/list of 3 integers,
+      specifying the strides of the convolution along the depth,
+      height and width.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+      Specifying any stride value != 1 is incompatible with specifying
+      any `dilation_rate` value != 1.
+    padding: One of `"valid"` or `"same"` (case-insensitive).
+    data_format: A string, one of `channels_last` (default) or `channels_first`.
+      The ordering of the dimensions in the inputs.
+      `channels_last` corresponds to inputs with shape
+      `(batch, depth, height, width, channels)` while `channels_first`
+      corresponds to inputs with shape
+      `(batch, channels, depth, height, width)`.
+    dilation_rate: An integer or tuple/list of 3 integers, specifying
+      the dilation rate to use for dilated convolution.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+      Currently, specifying any `dilation_rate` value != 1 is
+      incompatible with specifying any stride value != 1.
+    activation: Activation function. Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Optional regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+    name: A string, the name of the layer.
+
+  Properties:
+    filters: Python integer, dimensionality of the output space.
+    kernel_size: Size of the convolution window.
+    strides: Stride length of convolution.
+    padding: Python string describing padding approach.
+    data_format: Python string describing input data's dimensions.
+    dilation_rate: Dilation rate for an atrous convolution.
+    activation: Activation function (`callable`).
+    activity_regularizer: Regularizer function for the output.
+    kernel_posterior_fn: `callable` returning posterior.
+    kernel_posterior_tensor_fn: `callable` operating on posterior.
+    kernel_prior_fn: `callable` returning prior.
+    kernel_divergence_fn: `callable` returning divergence.
+    bias_posterior_fn: `callable` returning posterior.
+    bias_posterior_tensor_fn: `callable` operating on posterior.
+    bias_prior_fn: `callable` returning prior.
+    bias_divergence_fn: `callable` returning divergence.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tf.reshape(features, [-1, 256, 32, 32, 3])
+  net = tfp.layers.Conv3DReparameterization(64,
+                                            kernel_size=5,
+                                            padding="SAME",
+                                            activation=tf.nn.relu)(net)
+  net = tf.layers.MaxPooling2D(pool_size=2,
+                               strides=2,
+                               padding="SAME")(net)
+  net = tf.reshape(net, [-1, 256 * 8 * 8 * 64])
+  logits = tfp.layers.DenseReparameterization(10)(net)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses reparameterization gradients to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+
+  [1]: "Auto-Encoding Variational Bayes."
+        Diederik P. Kingma, Max Welling.
+        International Conference on Learning Representations, 2014.
+  """
+
+  def __init__(
+      self,
+      filters,
+      kernel_size,
+      strides=(1, 1, 1),
+      padding="valid",
+      data_format="channels_last",
+      dilation_rate=(1, 1, 1),
+      activation=None,
+      activity_regularizer=None,
+      trainable=True,
+      kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+      kernel_posterior_tensor_fn=lambda d: d.sample(),
+      kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+          loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+      kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True),  # pylint: disable=line-too-long
+      bias_posterior_tensor_fn=lambda d: d.sample(),
+      bias_prior_fn=None,
+      bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      name=None,
+      **kwargs):
+    super(Conv3DReparameterization, self).__init__(
+        rank=3,
+        filters=filters,
+        kernel_size=kernel_size,
+        strides=strides,
+        padding=padding,
+        data_format=data_format,
+        dilation_rate=dilation_rate,
+        activation=activation,
+        activity_regularizer=activity_regularizer,
+        trainable=trainable,
+        kernel_posterior_fn=kernel_posterior_fn,
+        kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+        kernel_prior_fn=kernel_prior_fn,
+        kernel_divergence_fn=kernel_divergence_fn,
+        bias_posterior_fn=bias_posterior_fn,
+        bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+        bias_prior_fn=bias_prior_fn,
+        bias_divergence_fn=bias_divergence_fn,
+        name=name, **kwargs)
+
+
+def conv3d_reparameterization(
+    inputs,
+    filters,
+    kernel_size,
+    strides=(1, 1, 1),
+    padding="valid",
+    data_format="channels_last",
+    dilation_rate=(1, 1, 1),
+    activation=None,
+    activity_regularizer=None,
+    trainable=True,
+    kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+    kernel_posterior_tensor_fn=lambda d: d.sample(),
+    kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+        loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+    kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True),  # pylint: disable=line-too-long
+    bias_posterior_tensor_fn=lambda d: d.sample(),
+    bias_prior_fn=None,
+    bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    name=None,
+    reuse=None):
+  """Functional interface for the 3D convolution layer.
+
+  This layer creates a convolution kernel that is convolved
+  (actually cross-correlated) with the layer input to produce a tensor of
+  outputs. It may also include a bias addition and activation function
+  on the outputs. It assumes the `kernel` and/or `bias` are drawn from
+  distributions.
+
+  By default, the layer implements a stochastic forward pass via
+  sampling from the kernel and bias posteriors,
+  ```none
+  outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
+  ```
+  where f denotes the layer's calculation. It uses the reparameterization
+  estimator [1], which performs a Monte Carlo approximation of the
+  distribution integrating over the `kernel` and `bias`.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Arguments:
+    inputs: Tensor input.
+    filters: Integer, the dimensionality of the output space (i.e. the number
+      of filters in the convolution).
+    kernel_size: An integer or tuple/list of 3 integers, specifying the
+      depth, height and width of the 3D convolution window.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+    strides: An integer or tuple/list of 3 integers,
+      specifying the strides of the convolution along the depth,
+      height and width.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+      Specifying any stride value != 1 is incompatible with specifying
+      any `dilation_rate` value != 1.
+    padding: One of `"valid"` or `"same"` (case-insensitive).
+    data_format: A string, one of `channels_last` (default) or `channels_first`.
+      The ordering of the dimensions in the inputs.
+      `channels_last` corresponds to inputs with shape
+      `(batch, depth, height, width, channels)` while `channels_first`
+      corresponds to inputs with shape
+      `(batch, channels, depth, height, width)`.
+    dilation_rate: An integer or tuple/list of 3 integers, specifying
+      the dilation rate to use for dilated convolution.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+      Currently, specifying any `dilation_rate` value != 1 is
+      incompatible with specifying any stride value != 1.
+    activation: Activation function. Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Optional regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+    name: A string, the name of the layer.
+    reuse: Boolean, whether to reuse the weights of a previous layer
+      by the same name.
+
+  Returns:
+    Output tensor.
+
+  Raises:
+    ValueError: if eager execution is enabled.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tf.reshape(features, [-1, 256, 32, 32, 3])
+  net = tfp.layers.conv3d_reparameterization(net,
+                                             filters=64,
+                                             kernel_size=5,
+                                             padding="SAME",
+                                             activation=tf.nn.relu)
+  net = tf.layers.max_pooling2d(net,
+                                pool_size=2,
+                                strides=2,
+                                padding="SAME")
+  net = tf.reshape(net, [-1, 256 * 8 * 8 * 64])
+  logits = tfp.layers.dense_reparameterization(net, 10)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses reparameterization gradients to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+
+  [1]: "Auto-Encoding Variational Bayes."
+        Diederik P. Kingma, Max Welling.
+        International Conference on Learning Representations, 2014.
+  """
+  layer = Conv3DReparameterization(
+      filters=filters,
+      kernel_size=kernel_size,
+      strides=strides,
+      padding=padding,
+      data_format=data_format,
+      dilation_rate=dilation_rate,
+      activation=activation,
+      activity_regularizer=activity_regularizer,
+      trainable=trainable,
+      kernel_posterior_fn=kernel_posterior_fn,
+      kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+      kernel_prior_fn=kernel_prior_fn,
+      kernel_divergence_fn=kernel_divergence_fn,
+      bias_posterior_fn=bias_posterior_fn,
+      bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+      bias_prior_fn=bias_prior_fn,
+      bias_divergence_fn=bias_divergence_fn,
+      name=name,
+      dtype=inputs.dtype.base_dtype,
+      _scope=name,
+      _reuse=reuse)
+  return layer.apply(inputs)
+
+
+class _ConvFlipout(_ConvVariational):
+  """Abstract nD convolution layer (private, used as implementation base).
+
+  This layer creates a convolution kernel that is convolved
+  (actually cross-correlated) with the layer input to produce a tensor of
+  outputs. It may also include a bias addition and activation function
+  on the outputs. It assumes the `kernel` and/or `bias` are drawn from
+  distributions.
+
+  By default, the layer implements a stochastic forward pass via
+  sampling from the kernel and bias posteriors,
+  ```none
+  outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
+  ```
+  where f denotes the layer's calculation. It uses the Flipout
+  estimator [1], which performs a Monte Carlo approximation of the
+  distribution integrating over the `kernel` and `bias`. Flipout uses
+  roughly twice as many floating point operations as the
+  reparameterization estimator but has the advantage of significantly
+  lower variance.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Arguments:
+    rank: An integer, the rank of the convolution, e.g. "2" for 2D convolution.
+    filters: Integer, the dimensionality of the output space (i.e. the number
+      of filters in the convolution).
+    kernel_size: An integer or tuple/list of n integers, specifying the
+      length of the convolution window.
+    strides: An integer or tuple/list of n integers,
+      specifying the stride length of the convolution.
+      Specifying any stride value != 1 is incompatible with specifying
+      any `dilation_rate` value != 1.
+    padding: One of `"valid"` or `"same"` (case-insensitive).
+    data_format: A string, one of `channels_last` (default) or `channels_first`.
+      The ordering of the dimensions in the inputs.
+      `channels_last` corresponds to inputs with shape
+      `(batch, ..., channels)` while `channels_first` corresponds to
+      inputs with shape `(batch, channels, ...)`.
+    dilation_rate: An integer or tuple/list of n integers, specifying
+      the dilation rate to use for dilated convolution.
+      Currently, specifying any `dilation_rate` value != 1 is
+      incompatible with specifying any `strides` value != 1.
+    activation: Activation function. Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Optional regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    seed: Python scalar `int` which initializes the random number
+      generator. Default value: `None` (i.e., use global seed).
+    name: A string, the name of the layer.
+
+  Properties:
+    rank: Python integer, dimensionality of convolution.
+    filters: Python integer, dimensionality of the output space.
+    kernel_size: Size of the convolution window.
+    strides: Stride length of convolution.
+    padding: Python string describing padding approach.
+    data_format: Python string describing input data's dimensions.
+    dilation_rate: Dilation rate for an atrous convolution.
+    activation: Activation function (`callable`).
+    activity_regularizer: Regularizer function for the output.
+    kernel_posterior_fn: `callable` returning posterior.
+    kernel_posterior_tensor_fn: `callable` operating on posterior.
+    kernel_prior_fn: `callable` returning prior.
+    kernel_divergence_fn: `callable` returning divergence.
+    bias_posterior_fn: `callable` returning posterior.
+    bias_posterior_tensor_fn: `callable` operating on posterior.
+    bias_prior_fn: `callable` returning prior.
+    bias_divergence_fn: `callable` returning divergence.
+    seed: Python integer, used to create random seeds.
+
+  [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on
+        Mini-Batches."
+        Anonymous. OpenReview, 2017.
+        https://openreview.net/forum?id=rJnpifWAb
+  """
+
+  def __init__(
+      self,
+      rank,
+      filters,
+      kernel_size,
+      strides=1,
+      padding="valid",
+      data_format="channels_last",
+      dilation_rate=1,
+      activation=None,
+      activity_regularizer=None,
+      trainable=True,
+      kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+      kernel_posterior_tensor_fn=lambda d: d.sample(),
+      kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+          loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+      kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True),  # pylint: disable=line-too-long
+      bias_posterior_tensor_fn=lambda d: d.sample(),
+      bias_prior_fn=None,
+      bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      seed=None,
+      name=None,
+      **kwargs):
+    super(_ConvFlipout, self).__init__(
+        rank=rank,
+        filters=filters,
+        kernel_size=kernel_size,
+        strides=strides,
+        padding=padding,
+        data_format=data_format,
+        dilation_rate=dilation_rate,
+        activation=activation,
+        activity_regularizer=activity_regularizer,
+        trainable=trainable,
+        kernel_posterior_fn=kernel_posterior_fn,
+        kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+        kernel_prior_fn=kernel_prior_fn,
+        kernel_divergence_fn=kernel_divergence_fn,
+        bias_posterior_fn=bias_posterior_fn,
+        bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+        bias_prior_fn=bias_prior_fn,
+        bias_divergence_fn=bias_divergence_fn,
+        name=name, **kwargs)
+    self.seed = seed
+
+  def _apply_variational_kernel(self, inputs):
+    if (not isinstance(self.kernel_posterior, independent_lib.Independent) or
+        not isinstance(self.kernel_posterior.distribution, normal_lib.Normal)):
+      raise TypeError(
+          "`{}` requires "
+          "`kernel_posterior_fn` produce an instance of "
+          "`tf.distributions.Independent(tf.distributions.Normal)` "
+          "(saw: \"{}\").".format(
+              type(self).__name__, self.kernel_posterior.name))
+    self.kernel_posterior_affine = normal_lib.Normal(
+        loc=array_ops.zeros_like(self.kernel_posterior.distribution.loc),
+        scale=self.kernel_posterior.distribution.scale)
+    self.kernel_posterior_affine_tensor = (
+        self.kernel_posterior_tensor_fn(self.kernel_posterior_affine))
+    self.kernel_posterior_tensor = None
+
+    outputs = self._convolution_op(
+        inputs, self.kernel_posterior.distribution.loc)
+
+    input_shape = array_ops.shape(inputs)
+    output_shape = array_ops.shape(outputs)
+    batch_shape = array_ops.expand_dims(input_shape[0], 0)
+    channels = input_shape[-1]
+
+    sign_input = layers_util.random_sign(
+        array_ops.concat([batch_shape,
+                          array_ops.expand_dims(channels, 0)], 0),
+        dtype=inputs.dtype,
+        seed=self.seed)
+    sign_output = layers_util.random_sign(
+        array_ops.concat([batch_shape,
+                          array_ops.expand_dims(self.filters, 0)], 0),
+        dtype=inputs.dtype,
+        seed=distribution_util.gen_new_seed(
+            self.seed, salt="conv_flipout"))
+    for _ in range(self.rank):
+      sign_input = array_ops.expand_dims(sign_input, 1)  # 2D ex: (B, 1, 1, C)
+      sign_output = array_ops.expand_dims(sign_output, 1)
+
+    sign_input = array_ops.tile(  # tile for element-wise op broadcasting
+        sign_input,
+        [1] + [input_shape[i + 1] for i in range(self.rank)] + [1])
+    sign_output = array_ops.tile(
+        sign_output,
+        [1] + [output_shape[i + 1] for i in range(self.rank)] + [1])
+
+    perturbed_inputs = self._convolution_op(
+        inputs * sign_input, self.kernel_posterior_affine_tensor) * sign_output
+
+    outputs += perturbed_inputs
+    return outputs
+
+
+class Conv1DFlipout(_ConvFlipout):
+  """1D convolution layer (e.g. temporal convolution) with Flipout.
+
+  This layer creates a convolution kernel that is convolved
+  (actually cross-correlated) with the layer input to produce a tensor of
+  outputs. It may also include a bias addition and activation function
+  on the outputs. It assumes the `kernel` and/or `bias` are drawn from
+  distributions.
+
+  By default, the layer implements a stochastic forward pass via
+  sampling from the kernel and bias posteriors,
+  ```none
+  outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
+  ```
+  where f denotes the layer's calculation. It uses the Flipout
+  estimator [1], which performs a Monte Carlo approximation of the
+  distribution integrating over the `kernel` and `bias`. Flipout uses
+  roughly twice as many floating point operations as the
+  reparameterization estimator but has the advantage of significantly
+  lower variance.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Arguments:
+    filters: Integer, the dimensionality of the output space (i.e. the number
+      of filters in the convolution).
+    kernel_size: An integer or tuple/list of a single integer, specifying the
+      length of the 1D convolution window.
+    strides: An integer or tuple/list of a single integer,
+      specifying the stride length of the convolution.
+      Specifying any stride value != 1 is incompatible with specifying
+      any `dilation_rate` value != 1.
+    padding: One of `"valid"` or `"same"` (case-insensitive).
+    data_format: A string, one of `channels_last` (default) or `channels_first`.
+      The ordering of the dimensions in the inputs.
+      `channels_last` corresponds to inputs with shape
+      `(batch, length, channels)` while `channels_first` corresponds to
+      inputs with shape `(batch, channels, length)`.
+    dilation_rate: An integer or tuple/list of a single integer, specifying
+      the dilation rate to use for dilated convolution.
+      Currently, specifying any `dilation_rate` value != 1 is
+      incompatible with specifying any `strides` value != 1.
+    activation: Activation function. Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Optional regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+    seed: Python scalar `int` which initializes the random number
+      generator. Default value: `None` (i.e., use global seed).
+    name: A string, the name of the layer.
+
+  Properties:
+    filters: Python integer, dimensionality of the output space.
+    kernel_size: Size of the convolution window.
+    strides: Stride length of convolution.
+    padding: Python string describing padding approach.
+    data_format: Python string describing input data's dimensions.
+    dilation_rate: Dilation rate for an atrous convolution.
+    activation: Activation function (`callable`).
+    activity_regularizer: Regularizer function for the output.
+    kernel_posterior_fn: `callable` returning posterior.
+    kernel_posterior_tensor_fn: `callable` operating on posterior.
+    kernel_prior_fn: `callable` returning prior.
+    kernel_divergence_fn: `callable` returning divergence.
+    bias_posterior_fn: `callable` returning posterior.
+    bias_posterior_tensor_fn: `callable` operating on posterior.
+    bias_prior_fn: `callable` returning prior.
+    bias_divergence_fn: `callable` returning divergence.
+    seed: Python integer, used to create random seeds.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tf.reshape(features, [-1, 128, 1])
+  net = tfp.layers.Conv1DFlipout(64,
+                                 kernel_size=5,
+                                 padding="SAME",
+                                 activation=tf.nn.relu)(net)
+  net = tf.reshape(net, [-1, 128 * 64])
+  logits = tfp.layers.DenseFlipout(10)(net)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses the Flipout gradient estimator to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+
+  [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on
+        Mini-Batches."
+        Anonymous. OpenReview, 2017.
+        https://openreview.net/forum?id=rJnpifWAb
+  """
+
+  def __init__(
+      self,
+      filters,
+      kernel_size,
+      strides=1,
+      padding="valid",
+      data_format="channels_last",
+      dilation_rate=1,
+      activation=None,
+      activity_regularizer=None,
+      trainable=True,
+      kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+      kernel_posterior_tensor_fn=lambda d: d.sample(),
+      kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+          loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+      kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True),  # pylint: disable=line-too-long
+      bias_posterior_tensor_fn=lambda d: d.sample(),
+      bias_prior_fn=None,
+      bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      seed=None,
+      name=None,
+      **kwargs):
+    super(Conv1DFlipout, self).__init__(
+        rank=1,
+        filters=filters,
+        kernel_size=kernel_size,
+        strides=strides,
+        padding=padding,
+        data_format=data_format,
+        dilation_rate=dilation_rate,
+        activation=activation,
+        activity_regularizer=activity_regularizer,
+        trainable=trainable,
+        kernel_posterior_fn=kernel_posterior_fn,
+        kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+        kernel_prior_fn=kernel_prior_fn,
+        kernel_divergence_fn=kernel_divergence_fn,
+        bias_posterior_fn=bias_posterior_fn,
+        bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+        bias_prior_fn=bias_prior_fn,
+        bias_divergence_fn=bias_divergence_fn,
+        seed=seed,
+        name=name, **kwargs)
+
+
+def conv1d_flipout(
+    inputs,
+    filters,
+    kernel_size,
+    strides=1,
+    padding="valid",
+    data_format="channels_last",
+    dilation_rate=1,
+    activation=None,
+    activity_regularizer=None,
+    trainable=True,
+    kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+    kernel_posterior_tensor_fn=lambda d: d.sample(),
+    kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+        loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+    kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True),  # pylint: disable=line-too-long
+    bias_posterior_tensor_fn=lambda d: d.sample(),
+    bias_prior_fn=None,
+    bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    seed=None,
+    name=None,
+    reuse=None):
+  """Functional interface for 1D convolution layer (e.g. temporal convolution).
+
+  This layer creates a convolution kernel that is convolved
+  (actually cross-correlated) with the layer input to produce a tensor of
+  outputs. It may also include a bias addition and activation function
+  on the outputs. It assumes the `kernel` and/or `bias` are drawn from
+  distributions.
+
+  By default, the layer implements a stochastic forward pass via
+  sampling from the kernel and bias posteriors,
+  ```none
+  outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
+  ```
+  where f denotes the layer's calculation. It uses the Flipout
+  estimator [1], which performs a Monte Carlo approximation of the
+  distribution integrating over the `kernel` and `bias`. Flipout uses
+  roughly twice as many floating point operations as the
+  reparameterization estimator but has the advantage of significantly
+  lower variance.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Arguments:
+    inputs: Tensor input.
+    filters: Integer, the dimensionality of the output space (i.e. the number
+      of filters in the convolution).
+    kernel_size: An integer or tuple/list of a single integer, specifying the
+      length of the 1D convolution window.
+    strides: An integer or tuple/list of a single integer,
+      specifying the stride length of the convolution.
+      Specifying any stride value != 1 is incompatible with specifying
+      any `dilation_rate` value != 1.
+    padding: One of `"valid"` or `"same"` (case-insensitive).
+    data_format: A string, one of `channels_last` (default) or `channels_first`.
+      The ordering of the dimensions in the inputs.
+      `channels_last` corresponds to inputs with shape
+      `(batch, length, channels)` while `channels_first` corresponds to
+      inputs with shape `(batch, channels, length)`.
+    dilation_rate: An integer or tuple/list of a single integer, specifying
+      the dilation rate to use for dilated convolution.
+      Currently, specifying any `dilation_rate` value != 1 is
+      incompatible with specifying any `strides` value != 1.
+    activation: Activation function. Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Optional regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+    seed: Python scalar `int` which initializes the random number
+      generator. Default value: `None` (i.e., use global seed).
+    name: A string, the name of the layer.
+    reuse: Boolean, whether to reuse the weights of a previous layer
+      by the same name.
+
+  Returns:
+    Output tensor.
+
+  Raises:
+    ValueError: if eager execution is enabled.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tf.reshape(features, [-1, 128, 1])
+  net = tfp.layers.conv1d_flipout(net,
+                                  filters=64,
+                                  kernel_size=5,
+                                  padding="SAME",
+                                  activation=tf.nn.relu)
+  net = tf.reshape(net, [-1, 128 * 64])
+  logits = tfp.layers.dense_flipout(net, 10)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses the Flipout gradient estimator to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+
+  [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on
+        Mini-Batches."
+        Anonymous. OpenReview, 2017.
+        https://openreview.net/forum?id=rJnpifWAb
+  """
+  layer = Conv1DFlipout(
+      filters=filters,
+      kernel_size=kernel_size,
+      strides=strides,
+      padding=padding,
+      data_format=data_format,
+      dilation_rate=dilation_rate,
+      activation=activation,
+      activity_regularizer=activity_regularizer,
+      trainable=trainable,
+      kernel_posterior_fn=kernel_posterior_fn,
+      kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+      kernel_prior_fn=kernel_prior_fn,
+      kernel_divergence_fn=kernel_divergence_fn,
+      bias_posterior_fn=bias_posterior_fn,
+      bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+      bias_prior_fn=bias_prior_fn,
+      bias_divergence_fn=bias_divergence_fn,
+      seed=seed,
+      name=name,
+      dtype=inputs.dtype.base_dtype,
+      _scope=name,
+      _reuse=reuse)
+  return layer.apply(inputs)
+
+
+class Conv2DFlipout(_ConvFlipout):
+  """2D convolution layer (e.g. spatial convolution over images) with Flipout.
+
+  This layer creates a convolution kernel that is convolved
+  (actually cross-correlated) with the layer input to produce a tensor of
+  outputs. It may also include a bias addition and activation function
+  on the outputs. It assumes the `kernel` and/or `bias` are drawn from
+  distributions.
+
+  By default, the layer implements a stochastic forward pass via
+  sampling from the kernel and bias posteriors,
+  ```none
+  outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
+  ```
+  where f denotes the layer's calculation. It uses the Flipout
+  estimator [1], which performs a Monte Carlo approximation of the
+  distribution integrating over the `kernel` and `bias`. Flipout uses
+  roughly twice as many floating point operations as the
+  reparameterization estimator but has the advantage of significantly
+  lower variance.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Arguments:
+    filters: Integer, the dimensionality of the output space (i.e. the number
+      of filters in the convolution).
+    kernel_size: An integer or tuple/list of 2 integers, specifying the
+      height and width of the 2D convolution window.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+    strides: An integer or tuple/list of 2 integers,
+      specifying the strides of the convolution along the height and width.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+      Specifying any stride value != 1 is incompatible with specifying
+      any `dilation_rate` value != 1.
+    padding: One of `"valid"` or `"same"` (case-insensitive).
+    data_format: A string, one of `channels_last` (default) or `channels_first`.
+      The ordering of the dimensions in the inputs.
+      `channels_last` corresponds to inputs with shape
+      `(batch, height, width, channels)` while `channels_first` corresponds to
+      inputs with shape `(batch, channels, height, width)`.
+
+    dilation_rate: An integer or tuple/list of 2 integers, specifying
+      the dilation rate to use for dilated convolution.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+      Currently, specifying any `dilation_rate` value != 1 is
+      incompatible with specifying any stride value != 1.
+    activation: Activation function. Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Optional regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+    seed: Python scalar `int` which initializes the random number
+      generator. Default value: `None` (i.e., use global seed).
+    name: A string, the name of the layer.
+
+  Properties:
+    filters: Python integer, dimensionality of the output space.
+    kernel_size: Size of the convolution window.
+    strides: Stride length of convolution.
+    padding: Python string describing padding approach.
+    data_format: Python string describing input data's dimensions.
+    dilation_rate: Dilation rate for an atrous convolution.
+    activation: Activation function (`callable`).
+    activity_regularizer: Regularizer function for the output.
+    kernel_posterior_fn: `callable` returning posterior.
+    kernel_posterior_tensor_fn: `callable` operating on posterior.
+    kernel_prior_fn: `callable` returning prior.
+    kernel_divergence_fn: `callable` returning divergence.
+    bias_posterior_fn: `callable` returning posterior.
+    bias_posterior_tensor_fn: `callable` operating on posterior.
+    bias_prior_fn: `callable` returning prior.
+    bias_divergence_fn: `callable` returning divergence.
+    seed: Python integer, used to create random seeds.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tf.reshape(features, [-1, 32, 32, 3])
+  net = tfp.layers.Conv2DFlipout(64,
+                                 kernel_size=5,
+                                 padding="SAME",
+                                 activation=tf.nn.relu)(net)
+  net = tf.layers.MaxPooling2D(pool_size=2,
+                               strides=2,
+                               padding="SAME")(net)
+  net = tf.reshape(net, [-1, 8 * 8 * 64])
+  logits = tfp.layers.DenseFlipout(10)(net)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses the Flipout gradient estimator to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+
+  [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on
+        Mini-Batches."
+        Anonymous. OpenReview, 2017.
+        https://openreview.net/forum?id=rJnpifWAb
+  """
+
+  def __init__(
+      self,
+      filters,
+      kernel_size,
+      strides=(1, 1),
+      padding="valid",
+      data_format="channels_last",
+      dilation_rate=(1, 1),
+      activation=None,
+      activity_regularizer=None,
+      trainable=True,
+      kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+      kernel_posterior_tensor_fn=lambda d: d.sample(),
+      kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+          loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+      kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True),  # pylint: disable=line-too-long
+      bias_posterior_tensor_fn=lambda d: d.sample(),
+      bias_prior_fn=None,
+      bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      seed=None,
+      name=None,
+      **kwargs):
+    super(Conv2DFlipout, self).__init__(
+        rank=2,
+        filters=filters,
+        kernel_size=kernel_size,
+        strides=strides,
+        padding=padding,
+        data_format=data_format,
+        dilation_rate=dilation_rate,
+        activation=activation,
+        activity_regularizer=activity_regularizer,
+        trainable=trainable,
+        kernel_posterior_fn=kernel_posterior_fn,
+        kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+        kernel_prior_fn=kernel_prior_fn,
+        kernel_divergence_fn=kernel_divergence_fn,
+        bias_posterior_fn=bias_posterior_fn,
+        bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+        bias_prior_fn=bias_prior_fn,
+        bias_divergence_fn=bias_divergence_fn,
+        seed=seed,
+        name=name, **kwargs)
+
+
+def conv2d_flipout(
+    inputs,
+    filters,
+    kernel_size,
+    strides=(1, 1),
+    padding="valid",
+    data_format="channels_last",
+    dilation_rate=(1, 1),
+    activation=None,
+    activity_regularizer=None,
+    trainable=True,
+    kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+    kernel_posterior_tensor_fn=lambda d: d.sample(),
+    kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+        loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+    kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True),  # pylint: disable=line-too-long
+    bias_posterior_tensor_fn=lambda d: d.sample(),
+    bias_prior_fn=None,
+    bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    seed=None,
+    name=None,
+    reuse=None):
+  """Functional interface for the 2D convolution layer.
+
+  This layer creates a convolution kernel that is convolved
+  (actually cross-correlated) with the layer input to produce a tensor of
+  outputs. It may also include a bias addition and activation function
+  on the outputs. It assumes the `kernel` and/or `bias` are drawn from
+  distributions.
+
+  By default, the layer implements a stochastic forward pass via
+  sampling from the kernel and bias posteriors,
+  ```none
+  outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
+  ```
+  where f denotes the layer's calculation. It uses the Flipout
+  estimator [1], which performs a Monte Carlo approximation of the
+  distribution integrating over the `kernel` and `bias`. Flipout uses
+  roughly twice as many floating point operations as the
+  reparameterization estimator but has the advantage of significantly
+  lower variance.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Arguments:
+    inputs: Tensor input.
+    filters: Integer, the dimensionality of the output space (i.e. the number
+      of filters in the convolution).
+    kernel_size: An integer or tuple/list of 2 integers, specifying the
+      height and width of the 2D convolution window.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+    strides: An integer or tuple/list of 2 integers,
+      specifying the strides of the convolution along the height and width.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+      Specifying any stride value != 1 is incompatible with specifying
+      any `dilation_rate` value != 1.
+    padding: One of `"valid"` or `"same"` (case-insensitive).
+    data_format: A string, one of `channels_last` (default) or `channels_first`.
+      The ordering of the dimensions in the inputs.
+      `channels_last` corresponds to inputs with shape
+      `(batch, height, width, channels)` while `channels_first` corresponds to
+      inputs with shape `(batch, channels, height, width)`.
+
+    dilation_rate: An integer or tuple/list of 2 integers, specifying
+      the dilation rate to use for dilated convolution.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+      Currently, specifying any `dilation_rate` value != 1 is
+      incompatible with specifying any stride value != 1.
+    activation: Activation function. Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Optional regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+    seed: Python scalar `int` which initializes the random number
+      generator. Default value: `None` (i.e., use global seed).
+    name: A string, the name of the layer.
+    reuse: Boolean, whether to reuse the weights of a previous layer
+      by the same name.
+
+  Returns:
+    Output tensor.
+
+  Raises:
+    ValueError: if eager execution is enabled.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tf.reshape(features, [-1, 32, 32, 3])
+  net = tfp.layers.conv2d_flipout(net,
+                                  filters=64,
+                                  kernel_size=5,
+                                  padding="SAME",
+                                  activation=tf.nn.relu)
+  net = tf.layers.max_pooling2d(net,
+                                pool_size=2,
+                                strides=2,
+                                padding="SAME")
+  net = tf.reshape(net, [-1, 8 * 8 * 64])
+  logits = tfp.layers.dense_flipout(net, 10)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses the Flipout gradient estimator to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+
+  [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on
+        Mini-Batches."
+        Anonymous. OpenReview, 2017.
+        https://openreview.net/forum?id=rJnpifWAb
+  """
+  layer = Conv2DFlipout(
+      filters=filters,
+      kernel_size=kernel_size,
+      strides=strides,
+      padding=padding,
+      data_format=data_format,
+      dilation_rate=dilation_rate,
+      activation=activation,
+      activity_regularizer=activity_regularizer,
+      trainable=trainable,
+      kernel_posterior_fn=kernel_posterior_fn,
+      kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+      kernel_prior_fn=kernel_prior_fn,
+      kernel_divergence_fn=kernel_divergence_fn,
+      bias_posterior_fn=bias_posterior_fn,
+      bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+      bias_prior_fn=bias_prior_fn,
+      bias_divergence_fn=bias_divergence_fn,
+      seed=seed,
+      name=name,
+      dtype=inputs.dtype.base_dtype,
+      _scope=name,
+      _reuse=reuse)
+  return layer.apply(inputs)
+
+
+class Conv3DFlipout(_ConvFlipout):
+  """3D convolution layer (e.g. spatial convolution over volumes) with Flipout.
+
+  This layer creates a convolution kernel that is convolved
+  (actually cross-correlated) with the layer input to produce a tensor of
+  outputs. It may also include a bias addition and activation function
+  on the outputs. It assumes the `kernel` and/or `bias` are drawn from
+  distributions.
+
+  By default, the layer implements a stochastic forward pass via
+  sampling from the kernel and bias posteriors,
+  ```none
+  outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
+  ```
+  where f denotes the layer's calculation. It uses the Flipout
+  estimator [1], which performs a Monte Carlo approximation of the
+  distribution integrating over the `kernel` and `bias`. Flipout uses
+  roughly twice as many floating point operations as the
+  reparameterization estimator but has the advantage of significantly
+  lower variance.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Arguments:
+    filters: Integer, the dimensionality of the output space (i.e. the number
+      of filters in the convolution).
+    kernel_size: An integer or tuple/list of 3 integers, specifying the
+      depth, height and width of the 3D convolution window.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+    strides: An integer or tuple/list of 3 integers,
+      specifying the strides of the convolution along the depth,
+      height and width.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+      Specifying any stride value != 1 is incompatible with specifying
+      any `dilation_rate` value != 1.
+    padding: One of `"valid"` or `"same"` (case-insensitive).
+    data_format: A string, one of `channels_last` (default) or `channels_first`.
+      The ordering of the dimensions in the inputs.
+      `channels_last` corresponds to inputs with shape
+      `(batch, depth, height, width, channels)` while `channels_first`
+      corresponds to inputs with shape
+      `(batch, channels, depth, height, width)`.
+    dilation_rate: An integer or tuple/list of 3 integers, specifying
+      the dilation rate to use for dilated convolution.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+      Currently, specifying any `dilation_rate` value != 1 is
+      incompatible with specifying any stride value != 1.
+    activation: Activation function. Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Optional regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+    seed: Python scalar `int` which initializes the random number
+      generator. Default value: `None` (i.e., use global seed).
+    name: A string, the name of the layer.
+
+  Properties:
+    filters: Python integer, dimensionality of the output space.
+    kernel_size: Size of the convolution window.
+    strides: Stride length of convolution.
+    padding: Python string describing padding approach.
+    data_format: Python string describing input data's dimensions.
+    dilation_rate: Dilation rate for an atrous convolution.
+    activation: Activation function (`callable`).
+    activity_regularizer: Regularizer function for the output.
+    kernel_posterior_fn: `callable` returning posterior.
+    kernel_posterior_tensor_fn: `callable` operating on posterior.
+    kernel_prior_fn: `callable` returning prior.
+    kernel_divergence_fn: `callable` returning divergence.
+    bias_posterior_fn: `callable` returning posterior.
+    bias_posterior_tensor_fn: `callable` operating on posterior.
+    bias_prior_fn: `callable` returning prior.
+    bias_divergence_fn: `callable` returning divergence.
+    seed: Python integer, used to create random seeds.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tf.reshape(features, [-1, 256, 32, 32, 3])
+  net = tfp.layers.Conv3DFlipout(64,
+                                 kernel_size=5,
+                                 padding="SAME",
+                                 activation=tf.nn.relu)(net)
+  net = tf.layers.MaxPooling2D(pool_size=2,
+                               strides=2,
+                               padding="SAME")(net)
+  net = tf.reshape(net, [-1, 256 * 8 * 8 * 64])
+  logits = tfp.layers.DenseFlipout(10)(net)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses the Flipout gradient estimator to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+
+  [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on
+        Mini-Batches."
+        Anonymous. OpenReview, 2017.
+        https://openreview.net/forum?id=rJnpifWAb
+  """
+
+  def __init__(
+      self,
+      filters,
+      kernel_size,
+      strides=(1, 1, 1),
+      padding="valid",
+      data_format="channels_last",
+      dilation_rate=(1, 1, 1),
+      activation=None,
+      activity_regularizer=None,
+      trainable=True,
+      kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+      kernel_posterior_tensor_fn=lambda d: d.sample(),
+      kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+          loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+      kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True),  # pylint: disable=line-too-long
+      bias_posterior_tensor_fn=lambda d: d.sample(),
+      bias_prior_fn=None,
+      bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      seed=None,
+      name=None,
+      **kwargs):
+    super(Conv3DFlipout, self).__init__(
+        rank=3,
+        filters=filters,
+        kernel_size=kernel_size,
+        strides=strides,
+        padding=padding,
+        data_format=data_format,
+        dilation_rate=dilation_rate,
+        activation=activation,
+        activity_regularizer=activity_regularizer,
+        trainable=trainable,
+        kernel_posterior_fn=kernel_posterior_fn,
+        kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+        kernel_prior_fn=kernel_prior_fn,
+        kernel_divergence_fn=kernel_divergence_fn,
+        bias_posterior_fn=bias_posterior_fn,
+        bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+        bias_prior_fn=bias_prior_fn,
+        bias_divergence_fn=bias_divergence_fn,
+        seed=seed,
+        name=name, **kwargs)
+
+
+def conv3d_flipout(
+    inputs,
+    filters,
+    kernel_size,
+    strides=(1, 1, 1),
+    padding="valid",
+    data_format="channels_last",
+    dilation_rate=(1, 1, 1),
+    activation=None,
+    activity_regularizer=None,
+    trainable=True,
+    kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+    kernel_posterior_tensor_fn=lambda d: d.sample(),
+    kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+        loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+    kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True),  # pylint: disable=line-too-long
+    bias_posterior_tensor_fn=lambda d: d.sample(),
+    bias_prior_fn=None,
+    bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    seed=None,
+    name=None,
+    reuse=None):
+  """Functional interface for the 3D convolution layer.
+
+  This layer creates a convolution kernel that is convolved
+  (actually cross-correlated) with the layer input to produce a tensor of
+  outputs. It may also include a bias addition and activation function
+  on the outputs. It assumes the `kernel` and/or `bias` are drawn from
+  distributions.
+
+  By default, the layer implements a stochastic forward pass via
+  sampling from the kernel and bias posteriors,
+  ```none
+  outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
+  ```
+  where f denotes the layer's calculation. It uses the Flipout
+  estimator [1], which performs a Monte Carlo approximation of the
+  distribution integrating over the `kernel` and `bias`. Flipout uses
+  roughly twice as many floating point operations as the
+  reparameterization estimator but has the advantage of significantly
+  lower variance.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Arguments:
+    inputs: Tensor input.
+    filters: Integer, the dimensionality of the output space (i.e. the number
+      of filters in the convolution).
+    kernel_size: An integer or tuple/list of 3 integers, specifying the
+      depth, height and width of the 3D convolution window.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+    strides: An integer or tuple/list of 3 integers,
+      specifying the strides of the convolution along the depth,
+      height and width.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+      Specifying any stride value != 1 is incompatible with specifying
+      any `dilation_rate` value != 1.
+    padding: One of `"valid"` or `"same"` (case-insensitive).
+    data_format: A string, one of `channels_last` (default) or `channels_first`.
+      The ordering of the dimensions in the inputs.
+      `channels_last` corresponds to inputs with shape
+      `(batch, depth, height, width, channels)` while `channels_first`
+      corresponds to inputs with shape
+      `(batch, channels, depth, height, width)`.
+    dilation_rate: An integer or tuple/list of 3 integers, specifying
+      the dilation rate to use for dilated convolution.
+      Can be a single integer to specify the same value for
+      all spatial dimensions.
+      Currently, specifying any `dilation_rate` value != 1 is
+      incompatible with specifying any stride value != 1.
+    activation: Activation function. Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Optional regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+    seed: Python scalar `int` which initializes the random number
+      generator. Default value: `None` (i.e., use global seed).
+    name: A string, the name of the layer.
+    reuse: Boolean, whether to reuse the weights of a previous layer
+      by the same name.
+
+  Returns:
+    Output tensor.
+
+  Raises:
+    ValueError: if eager execution is enabled.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tf.reshape(features, [-1, 256, 32, 32, 3])
+  net = tfp.layers.conv3d_flipout(net,
+                                  filters=64,
+                                  kernel_size=5,
+                                  padding="SAME",
+                                  activation=tf.nn.relu)
+  net = tf.layers.max_pooling2d(net,
+                                pool_size=2,
+                                strides=2,
+                                padding="SAME")
+  net = tf.reshape(net, [-1, 256 * 8 * 8 * 64])
+  logits = tfp.layers.dense_flipout(net, 10)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses the Flipout gradient estimator to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+
+  [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on
+        Mini-Batches."
+        Anonymous. OpenReview, 2017.
+        https://openreview.net/forum?id=rJnpifWAb
+  """
+  layer = Conv3DFlipout(
+      filters=filters,
+      kernel_size=kernel_size,
+      strides=strides,
+      padding=padding,
+      data_format=data_format,
+      dilation_rate=dilation_rate,
+      activation=activation,
+      activity_regularizer=activity_regularizer,
+      trainable=trainable,
+      kernel_posterior_fn=kernel_posterior_fn,
+      kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+      kernel_prior_fn=kernel_prior_fn,
+      kernel_divergence_fn=kernel_divergence_fn,
+      bias_posterior_fn=bias_posterior_fn,
+      bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+      bias_prior_fn=bias_prior_fn,
+      bias_divergence_fn=bias_divergence_fn,
+      seed=seed,
+      name=name,
+      dtype=inputs.dtype.base_dtype,
+      _scope=name,
+      _reuse=reuse)
+  return layer.apply(inputs)
+
+
+# Aliases
+
+Convolution1DReparameterization = Conv1DReparameterization
+Convolution2DReparameterization = Conv2DReparameterization
+Convolution3DReparameterization = Conv3DReparameterization
+convolution1d_reparameterization = conv1d_reparameterization
+convolution2d_reparameterization = conv2d_reparameterization
+convolution3d_reparameterization = conv3d_reparameterization
+Convolution1DFlipout = Conv1DFlipout
+Convolution2DFlipout = Conv2DFlipout
+Convolution3DFlipout = Conv3DFlipout
+convolution1d_flipout = conv1d_flipout
+convolution2d_flipout = conv2d_flipout
+convolution3d_flipout = conv3d_flipout
diff --git a/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational.py b/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational.py
new file mode 100644
index 0000000000000000000000000000000000000000..591a8e553de0c194786c7ee8693665f762711b2d
--- /dev/null
+++ b/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational.py
@@ -0,0 +1,1176 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Dense Bayesian layer using KL-divergence based variational inference.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.bayesflow.python.ops import layers_util
+from tensorflow.contrib.distributions.python.ops import independent as independent_lib
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.layers import base as layers_lib
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import standard_ops
+from tensorflow.python.ops.distributions import kullback_leibler as kl_lib
+from tensorflow.python.ops.distributions import normal as normal_lib
+from tensorflow.python.ops.distributions import util as distribution_util
+
+
+class _DenseVariational(layers_lib.Layer):
+  """Abstract densely-connected class (private, used as implementation base).
+
+  This layer implements the Bayesian variational inference analogue to
+  a dense layer by assuming the `kernel` and/or the `bias` are drawn
+  from distributions. By default, the layer implements a stochastic
+  forward pass via sampling from the kernel and bias posteriors,
+
+  ```none
+  kernel, bias ~ posterior
+  outputs = activation(matmul(inputs, kernel) + bias)
+  ```
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Args:
+    units: Integer or Long, dimensionality of the output space.
+    activation: Activation function (`callable`). Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    name: Python `str`, the name of the layer. Layers with the same name will
+      share `tf.Variable`s, but to avoid mistakes we require `reuse=True` in
+      such cases.
+    reuse: Python `bool`, whether to reuse the `tf.Variable`s of a previous
+      layer by the same name.
+
+  Properties:
+    units: Python integer, dimensionality of the output space.
+    activation: Activation function (`callable`).
+    activity_regularizer: Regularizer function for the output.
+    kernel_posterior_fn: `callable` returning posterior.
+    kernel_posterior_tensor_fn: `callable` operating on posterior.
+    kernel_prior_fn: `callable` returning prior.
+    kernel_divergence_fn: `callable` returning divergence.
+    bias_posterior_fn: `callable` returning posterior.
+    bias_posterior_tensor_fn: `callable` operating on posterior.
+    bias_prior_fn: `callable` returning prior.
+    bias_divergence_fn: `callable` returning divergence.
+  """
+
+  def __init__(
+      self,
+      units,
+      activation=None,
+      activity_regularizer=None,
+      trainable=True,
+      kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+      kernel_posterior_tensor_fn=lambda d: d.sample(),
+      kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+          loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+      kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True),  # pylint: disable=line-too-long
+      bias_posterior_tensor_fn=lambda d: d.sample(),
+      bias_prior_fn=None,
+      bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      name=None,
+      **kwargs):
+    super(_DenseVariational, self).__init__(
+        trainable=trainable,
+        name=name,
+        activity_regularizer=activity_regularizer,
+        **kwargs)
+    self.units = units
+    self.activation = activation
+    self.input_spec = layers_lib.InputSpec(min_ndim=2)
+    self.kernel_posterior_fn = kernel_posterior_fn
+    self.kernel_posterior_tensor_fn = kernel_posterior_tensor_fn
+    self.kernel_prior_fn = kernel_prior_fn
+    self.kernel_divergence_fn = kernel_divergence_fn
+    self.bias_posterior_fn = bias_posterior_fn
+    self.bias_posterior_tensor_fn = bias_posterior_tensor_fn
+    self.bias_prior_fn = bias_prior_fn
+    self.bias_divergence_fn = bias_divergence_fn
+
+  def build(self, input_shape):
+    input_shape = tensor_shape.TensorShape(input_shape)
+    in_size = input_shape.with_rank_at_least(2)[-1].value
+    if in_size is None:
+      raise ValueError("The last dimension of the inputs to `Dense` "
+                       "should be defined. Found `None`.")
+    self._input_spec = layers_lib.InputSpec(min_ndim=2, axes={-1: in_size})
+    dtype = dtypes.as_dtype(self.dtype)
+
+    # Must have a posterior kernel.
+    self.kernel_posterior = self.kernel_posterior_fn(
+        dtype, [in_size, self.units], "kernel_posterior",
+        self.trainable, self.add_variable)
+
+    if self.kernel_prior_fn is None:
+      self.kernel_prior = None
+    else:
+      self.kernel_prior = self.kernel_prior_fn(
+          dtype, [in_size, self.units], "kernel_prior",
+          self.trainable, self.add_variable)
+    self._built_kernel_divergence = False
+
+    if self.bias_posterior_fn is None:
+      self.bias_posterior = None
+    else:
+      self.bias_posterior = self.bias_posterior_fn(
+          dtype, [self.units], "bias_posterior",
+          self.trainable, self.add_variable)
+
+    if self.bias_prior_fn is None:
+      self.bias_prior = None
+    else:
+      self.bias_prior = self.bias_prior_fn(
+          dtype, [self.units], "bias_prior",
+          self.trainable, self.add_variable)
+    self._built_bias_divergence = False
+
+    self.built = True
+
+  def call(self, inputs):
+    inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
+
+    outputs = self._apply_variational_kernel(inputs)
+    outputs = self._apply_variational_bias(outputs)
+    if self.activation is not None:
+      outputs = self.activation(outputs)  # pylint: disable=not-callable
+    if not self._built_kernel_divergence:
+      kernel_posterior = self.kernel_posterior
+      kernel_prior = self.kernel_prior
+      if isinstance(self.kernel_posterior, independent_lib.Independent):
+        kernel_posterior = kernel_posterior.distribution
+      if isinstance(self.kernel_prior, independent_lib.Independent):
+        kernel_prior = kernel_prior.distribution
+      self._apply_divergence(self.kernel_divergence_fn,
+                             kernel_posterior,
+                             kernel_prior,
+                             self.kernel_posterior_tensor,
+                             name="divergence_kernel")
+      self._built_kernel_divergence = True
+    if not self._built_bias_divergence:
+      bias_posterior = self.bias_posterior
+      bias_prior = self.bias_prior
+      if isinstance(self.bias_posterior, independent_lib.Independent):
+        bias_posterior = bias_posterior.distribution
+      if isinstance(self.bias_prior, independent_lib.Independent):
+        bias_prior = bias_prior.distribution
+      self._apply_divergence(self.bias_divergence_fn,
+                             bias_posterior,
+                             bias_prior,
+                             self.bias_posterior_tensor,
+                             name="divergence_bias")
+      self._built_bias_divergence = True
+    return outputs
+
+  def _apply_variational_bias(self, inputs):
+    if self.bias_posterior is None:
+      self.bias_posterior_tensor = None
+      return inputs
+    self.bias_posterior_tensor = self.bias_posterior_tensor_fn(
+        self.bias_posterior)
+    return nn.bias_add(inputs, self.bias_posterior_tensor)
+
+  def _apply_divergence(self, divergence_fn, posterior, prior,
+                        posterior_tensor, name):
+    if (divergence_fn is None or
+        posterior is None or
+        prior is None):
+      divergence = None
+      return
+    divergence = standard_ops.identity(
+        divergence_fn(
+            posterior, prior, posterior_tensor),
+        name=name)
+    self.add_loss(divergence)
+
+  def _matmul(self, inputs, kernel):
+    if inputs.shape.ndims <= 2:
+      return standard_ops.matmul(inputs, kernel)
+    # To handle broadcasting, we must use `tensordot`.
+    return standard_ops.tensordot(inputs, kernel, axes=[[-1], [0]])
+
+  def _compute_output_shape(self, input_shape):
+    input_shape = tensor_shape.TensorShape(input_shape).with_rank_at_least(2)
+    if input_shape[-1].value is None:
+      raise ValueError(
+          "The innermost dimension of input_shape must be defined, "
+          "but saw: {}".format(input_shape))
+    return input_shape[:-1].concatenate(self.units)
+
+
+class DenseReparameterization(_DenseVariational):
+  """Densely-connected layer class with reparameterization estimator.
+
+  This layer implements the Bayesian variational inference analogue to
+  a dense layer by assuming the `kernel` and/or the `bias` are drawn
+  from distributions. By default, the layer implements a stochastic
+  forward pass via sampling from the kernel and bias posteriors,
+
+  ```none
+  kernel, bias ~ posterior
+  outputs = activation(matmul(inputs, kernel) + bias)
+  ```
+
+  It uses the reparameterization estimator [1], which performs a Monte Carlo
+  approximation of the distribution integrating over the `kernel` and
+  `bias`.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Args:
+    units: Integer or Long, dimensionality of the output space.
+    activation: Activation function (`callable`). Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    name: Python `str`, the name of the layer. Layers with the same name will
+      share `tf.Variable`s, but to avoid mistakes we require `reuse=True` in
+      such cases.
+    reuse: Python `bool`, whether to reuse the `tf.Variable`s of a previous
+      layer by the same name.
+
+  Properties:
+    units: Python integer, dimensionality of the output space.
+    activation: Activation function (`callable`).
+    activity_regularizer: Regularizer function for the output.
+    kernel_posterior_fn: `callable` returning posterior.
+    kernel_posterior_tensor_fn: `callable` operating on posterior.
+    kernel_prior_fn: `callable` returning prior.
+    kernel_divergence_fn: `callable` returning divergence.
+    bias_posterior_fn: `callable` returning posterior.
+    bias_posterior_tensor_fn: `callable` operating on posterior.
+    bias_prior_fn: `callable` returning prior.
+    bias_divergence_fn: `callable` returning divergence.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tfp.layers.DenseReparameterization(
+      512, activation=tf.nn.relu)(features)
+  logits = tfp.layers.DenseReparameterization(10)(net)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses reparameterization gradients to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+
+  [1]: "Auto-Encoding Variational Bayes."
+        Diederik P. Kingma, Max Welling.
+        International Conference on Learning Representations, 2014.
+  """
+
+  def __init__(
+      self,
+      units,
+      activation=None,
+      activity_regularizer=None,
+      trainable=True,
+      kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+      kernel_posterior_tensor_fn=lambda d: d.sample(),
+      kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+          loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+      kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      bias_posterior_fn=layers_util.default_mean_field_normal_fn(
+          is_singular=True),
+      bias_posterior_tensor_fn=lambda d: d.sample(),
+      bias_prior_fn=None,
+      bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      name=None,
+      **kwargs):
+    super(DenseReparameterization, self).__init__(
+        units=units,
+        activation=activation,
+        activity_regularizer=activity_regularizer,
+        trainable=trainable,
+        kernel_posterior_fn=kernel_posterior_fn,
+        kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+        kernel_prior_fn=kernel_prior_fn,
+        kernel_divergence_fn=kernel_divergence_fn,
+        bias_posterior_fn=bias_posterior_fn,
+        bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+        bias_prior_fn=bias_prior_fn,
+        bias_divergence_fn=bias_divergence_fn,
+        name=name,
+        **kwargs)
+
+  def _apply_variational_kernel(self, inputs):
+    self.kernel_posterior_tensor = self.kernel_posterior_tensor_fn(
+        self.kernel_posterior)
+    self.kernel_posterior_affine = None
+    self.kernel_posterior_affine_tensor = None
+    return self._matmul(inputs, self.kernel_posterior_tensor)
+
+
+def dense_reparameterization(
+    inputs,
+    units,
+    activation=None,
+    activity_regularizer=None,
+    trainable=True,
+    kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+    kernel_posterior_tensor_fn=lambda d: d.sample(),
+    kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+        loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+    kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True),  # pylint: disable=line-too-long
+    bias_posterior_tensor_fn=lambda d: d.sample(),
+    bias_prior_fn=None,
+    bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    name=None,
+    reuse=None):
+  """Densely-connected layer with reparameterization estimator.
+
+  This layer implements the Bayesian variational inference analogue to
+  a dense layer by assuming the `kernel` and/or the `bias` are drawn
+  from distributions. By default, the layer implements a stochastic
+  forward pass via sampling from the kernel and bias posteriors,
+
+  ```none
+  kernel, bias ~ posterior
+  outputs = activation(matmul(inputs, kernel) + bias)
+  ```
+
+  It uses the reparameterization estimator [1], which performs a Monte Carlo
+  approximation of the distribution integrating over the `kernel` and
+  `bias`.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Args:
+    inputs: Tensor input.
+    units: Integer or Long, dimensionality of the output space.
+    activation: Activation function (`callable`). Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    name: Python `str`, the name of the layer. Layers with the same name will
+      share `tf.Variable`s, but to avoid mistakes we require `reuse=True` in
+      such cases.
+    reuse: Python `bool`, whether to reuse the `tf.Variable`s of a previous
+      layer by the same name.
+
+  Returns:
+    output: `Tensor` representing a the affine transformed input under a random
+      draw from the surrogate posterior distribution.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tfp.layers.dense_reparameterization(
+      features, 512, activation=tf.nn.relu)
+  logits = tfp.layers.dense_reparameterization(net, 10)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses reparameterization gradients to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+
+  [1]: "Auto-Encoding Variational Bayes."
+        Diederik P. Kingma, Max Welling.
+        International Conference on Learning Representations, 2014.
+  """
+  layer = DenseReparameterization(
+      units,
+      activation=activation,
+      activity_regularizer=activity_regularizer,
+      trainable=trainable,
+      kernel_posterior_fn=kernel_posterior_fn,
+      kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+      kernel_prior_fn=kernel_prior_fn,
+      kernel_divergence_fn=kernel_divergence_fn,
+      bias_posterior_fn=bias_posterior_fn,
+      bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+      bias_prior_fn=bias_prior_fn,
+      bias_divergence_fn=bias_divergence_fn,
+      name=name,
+      dtype=inputs.dtype.base_dtype,
+      _scope=name,
+      _reuse=reuse)
+  return layer.apply(inputs)
+
+
+class DenseLocalReparameterization(_DenseVariational):
+  """Densely-connected layer class with local reparameterization estimator.
+
+  This layer implements the Bayesian variational inference analogue to
+  a dense layer by assuming the `kernel` and/or the `bias` are drawn
+  from distributions. By default, the layer implements a stochastic
+  forward pass via sampling from the kernel and bias posteriors,
+
+  ```none
+  kernel, bias ~ posterior
+  outputs = activation(matmul(inputs, kernel) + bias)
+  ```
+
+  It uses the local reparameterization estimator [1], which performs a
+  Monte Carlo approximation of the distribution on the hidden units
+  induced by the `kernel` and `bias`.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Args:
+    units: Integer or Long, dimensionality of the output space.
+    activation: Activation function (`callable`). Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    name: Python `str`, the name of the layer. Layers with the same name will
+      share `tf.Variable`s, but to avoid mistakes we require `reuse=True` in
+      such cases.
+    reuse: Python `bool`, whether to reuse the `tf.Variable`s of a previous
+      layer by the same name.
+
+  Properties:
+    units: Python integer, dimensionality of the output space.
+    activation: Activation function (`callable`).
+    activity_regularizer: Regularizer function for the output.
+    kernel_posterior_fn: `callable` returning posterior.
+    kernel_posterior_tensor_fn: `callable` operating on posterior.
+    kernel_prior_fn: `callable` returning prior.
+    kernel_divergence_fn: `callable` returning divergence.
+    bias_posterior_fn: `callable` returning posterior.
+    bias_posterior_tensor_fn: `callable` operating on posterior.
+    bias_prior_fn: `callable` returning prior.
+    bias_divergence_fn: `callable` returning divergence.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tfp.layers.DenseLocalReparameterization(
+      512, activation=tf.nn.relu)(features)
+  logits = tfp.layers.DenseLocalReparameterization(10)(net)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses local reparameterization gradients to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+
+  [1]: "Variational Dropout and the Local Reparameterization Trick."
+        Diederik P. Kingma, Tim Salimans, Max Welling.
+        Neural Information Processing Systems, 2015.
+  """
+
+  def __init__(
+      self,
+      units,
+      activation=None,
+      activity_regularizer=None,
+      trainable=True,
+      kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+      kernel_posterior_tensor_fn=lambda d: d.sample(),
+      kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+          loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+      kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      bias_posterior_fn=layers_util.default_mean_field_normal_fn(
+          is_singular=True),
+      bias_posterior_tensor_fn=lambda d: d.sample(),
+      bias_prior_fn=None,
+      bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      name=None,
+      **kwargs):
+    super(DenseLocalReparameterization, self).__init__(
+        units=units,
+        activation=activation,
+        activity_regularizer=activity_regularizer,
+        trainable=trainable,
+        kernel_posterior_fn=kernel_posterior_fn,
+        kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+        kernel_prior_fn=kernel_prior_fn,
+        kernel_divergence_fn=kernel_divergence_fn,
+        bias_posterior_fn=bias_posterior_fn,
+        bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+        bias_prior_fn=bias_prior_fn,
+        bias_divergence_fn=bias_divergence_fn,
+        name=name,
+        **kwargs)
+
+  def _apply_variational_kernel(self, inputs):
+    if (not isinstance(self.kernel_posterior, independent_lib.Independent) or
+        not isinstance(self.kernel_posterior.distribution, normal_lib.Normal)):
+      raise TypeError(
+          "`DenseLocalReparameterization` requires "
+          "`kernel_posterior_fn` produce an instance of "
+          "`tf.distributions.Independent(tf.distributions.Normal)` "
+          "(saw: \"{}\").".format(self.kernel_posterior.name))
+    self.kernel_posterior_affine = normal_lib.Normal(
+        loc=self._matmul(inputs, self.kernel_posterior.distribution.loc),
+        scale=standard_ops.sqrt(self._matmul(
+            standard_ops.square(inputs),
+            standard_ops.square(self.kernel_posterior.distribution.scale))))
+    self.kernel_posterior_affine_tensor = (
+        self.kernel_posterior_tensor_fn(self.kernel_posterior_affine))
+    self.kernel_posterior_tensor = None
+    return self.kernel_posterior_affine_tensor
+
+
+def dense_local_reparameterization(
+    inputs,
+    units,
+    activation=None,
+    activity_regularizer=None,
+    trainable=True,
+    kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+    kernel_posterior_tensor_fn=lambda d: d.sample(),
+    kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+        loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+    kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    bias_posterior_fn=layers_util.default_mean_field_normal_fn(
+        is_singular=True),
+    bias_posterior_tensor_fn=lambda d: d.sample(),
+    bias_prior_fn=None,
+    bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    name=None,
+    reuse=None):
+  """Densely-connected layer with local reparameterization estimator.
+
+  This layer implements the Bayesian variational inference analogue to
+  a dense layer by assuming the `kernel` and/or the `bias` are drawn
+  from distributions. By default, the layer implements a stochastic
+  forward pass via sampling from the kernel and bias posteriors,
+
+  ```none
+  kernel, bias ~ posterior
+  outputs = activation(matmul(inputs, kernel) + bias)
+  ```
+
+  It uses the local reparameterization estimator [1], which performs a
+  Monte Carlo approximation of the distribution on the hidden units
+  induced by the `kernel` and `bias`.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Args:
+    inputs: Tensor input.
+    units: Integer or Long, dimensionality of the output space.
+    activation: Activation function (`callable`). Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    name: Python `str`, the name of the layer. Layers with the same name will
+      share `tf.Variable`s, but to avoid mistakes we require `reuse=True` in
+      such cases.
+    reuse: Python `bool`, whether to reuse the `tf.Variable`s of a previous
+      layer by the same name.
+
+  Returns:
+    output: `Tensor` representing a the affine transformed input under a random
+      draw from the surrogate posterior distribution.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tfp.layers.dense_local_reparameterization(
+      features, 512, activation=tf.nn.relu)
+  logits = tfp.layers.dense_local_reparameterization(net, 10)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses local reparameterization gradients to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+
+  [1]: "Variational Dropout and the Local Reparameterization Trick."
+        Diederik P. Kingma, Tim Salimans, Max Welling.
+        Neural Information Processing Systems, 2015.
+  """
+  layer = DenseLocalReparameterization(
+      units,
+      activation=activation,
+      activity_regularizer=activity_regularizer,
+      trainable=trainable,
+      kernel_posterior_fn=kernel_posterior_fn,
+      kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+      kernel_prior_fn=kernel_prior_fn,
+      kernel_divergence_fn=kernel_divergence_fn,
+      bias_posterior_fn=bias_posterior_fn,
+      bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+      bias_prior_fn=bias_prior_fn,
+      bias_divergence_fn=bias_divergence_fn,
+      name=name,
+      dtype=inputs.dtype.base_dtype,
+      _scope=name,
+      _reuse=reuse)
+  return layer.apply(inputs)
+
+
+class DenseFlipout(_DenseVariational):
+  """Densely-connected layer class with Flipout estimator.
+
+  This layer implements the Bayesian variational inference analogue to
+  a dense layer by assuming the `kernel` and/or the `bias` are drawn
+  from distributions. By default, the layer implements a stochastic
+  forward pass via sampling from the kernel and bias posteriors,
+
+  ```none
+  kernel, bias ~ posterior
+  outputs = activation(matmul(inputs, kernel) + bias)
+  ```
+
+  It uses the Flipout estimator [1], which performs a Monte Carlo
+  approximation of the distribution integrating over the `kernel` and
+  `bias`. Flipout uses roughly twice as many floating point operations
+  as the reparameterization estimator but has the advantage of
+  significantly lower variance.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Args:
+    units: Integer or Long, dimensionality of the output space.
+    activation: Activation function (`callable`). Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    seed: Python scalar `int` which initializes the random number
+      generator. Default value: `None` (i.e., use global seed).
+    name: Python `str`, the name of the layer. Layers with the same name will
+      share `tf.Variable`s, but to avoid mistakes we require `reuse=True` in
+      such cases.
+    reuse: Python `bool`, whether to reuse the `tf.Variable`s of a previous
+      layer by the same name.
+
+  Properties:
+    units: Python integer, dimensionality of the output space.
+    activation: Activation function (`callable`).
+    activity_regularizer: Regularizer function for the output.
+    kernel_posterior_fn: `callable` returning posterior.
+    kernel_posterior_tensor_fn: `callable` operating on posterior.
+    kernel_prior_fn: `callable` returning prior.
+    kernel_divergence_fn: `callable` returning divergence.
+    bias_posterior_fn: `callable` returning posterior.
+    bias_posterior_tensor_fn: `callable` operating on posterior.
+    bias_prior_fn: `callable` returning prior.
+    bias_divergence_fn: `callable` returning divergence.
+    seed: Python integer, used to create random seeds.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tfp.layers.DenseFlipout(
+      512, activation=tf.nn.relu)(features)
+  logits = tfp.layers.DenseFlipout(10)(net)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses the Flipout gradient estimator to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+
+  [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on
+        Mini-Batches."
+        Anonymous. OpenReview, 2017.
+        https://openreview.net/forum?id=rJnpifWAb
+  """
+
+  def __init__(
+      self,
+      units,
+      activation=None,
+      activity_regularizer=None,
+      trainable=True,
+      kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+      kernel_posterior_tensor_fn=lambda d: d.sample(),
+      kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+          loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+      kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      bias_posterior_fn=layers_util.default_mean_field_normal_fn(
+          is_singular=True),
+      bias_posterior_tensor_fn=lambda d: d.sample(),
+      bias_prior_fn=None,
+      bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+      seed=None,
+      name=None,
+      **kwargs):
+    super(DenseFlipout, self).__init__(
+        units=units,
+        activation=activation,
+        activity_regularizer=activity_regularizer,
+        trainable=trainable,
+        kernel_posterior_fn=kernel_posterior_fn,
+        kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+        kernel_prior_fn=kernel_prior_fn,
+        kernel_divergence_fn=kernel_divergence_fn,
+        bias_posterior_fn=bias_posterior_fn,
+        bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+        bias_prior_fn=bias_prior_fn,
+        bias_divergence_fn=bias_divergence_fn,
+        name=name,
+        **kwargs)
+    self.seed = seed
+
+  def _apply_variational_kernel(self, inputs):
+    if (not isinstance(self.kernel_posterior, independent_lib.Independent) or
+        not isinstance(self.kernel_posterior.distribution, normal_lib.Normal)):
+      raise TypeError(
+          "`DenseFlipout` requires "
+          "`kernel_posterior_fn` produce an instance of "
+          "`tf.distributions.Independent(tf.distributions.Normal)` "
+          "(saw: \"{}\").".format(self.kernel_posterior.name))
+    self.kernel_posterior_affine = normal_lib.Normal(
+        loc=array_ops.zeros_like(self.kernel_posterior.distribution.loc),
+        scale=self.kernel_posterior.distribution.scale)
+    self.kernel_posterior_affine_tensor = (
+        self.kernel_posterior_tensor_fn(self.kernel_posterior_affine))
+    self.kernel_posterior_tensor = None
+
+    input_shape = array_ops.shape(inputs)
+    batch_shape = input_shape[:-1]
+
+    sign_input = layers_util.random_sign(
+        input_shape,
+        dtype=inputs.dtype,
+        seed=self.seed)
+    sign_output = layers_util.random_sign(
+        array_ops.concat([batch_shape,
+                          array_ops.expand_dims(self.units, 0)], 0),
+        dtype=inputs.dtype,
+        seed=distribution_util.gen_new_seed(
+            self.seed, salt="dense_flipout"))
+    perturbed_inputs = self._matmul(
+        inputs * sign_input, self.kernel_posterior_affine_tensor) * sign_output
+
+    outputs = self._matmul(inputs, self.kernel_posterior.distribution.loc)
+    outputs += perturbed_inputs
+    return outputs
+
+
+def dense_flipout(
+    inputs,
+    units,
+    activation=None,
+    activity_regularizer=None,
+    trainable=True,
+    kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
+    kernel_posterior_tensor_fn=lambda d: d.sample(),
+    kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
+        loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
+    kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    bias_posterior_fn=layers_util.default_mean_field_normal_fn(
+        is_singular=True),
+    bias_posterior_tensor_fn=lambda d: d.sample(),
+    bias_prior_fn=None,
+    bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
+    seed=None,
+    name=None,
+    reuse=None):
+  """Densely-connected layer with Flipout estimator.
+
+  This layer implements the Bayesian variational inference analogue to
+  a dense layer by assuming the `kernel` and/or the `bias` are drawn
+  from distributions. By default, the layer implements a stochastic
+  forward pass via sampling from the kernel and bias posteriors,
+
+  ```none
+  kernel, bias ~ posterior
+  outputs = activation(matmul(inputs, kernel) + bias)
+  ```
+
+  It uses the Flipout estimator [1], which performs a Monte Carlo
+  approximation of the distribution integrating over the `kernel` and
+  `bias`. Flipout uses roughly twice as many floating point operations
+  as the reparameterization estimator but has the advantage of
+  significantly lower variance.
+
+  The arguments permit separate specification of the surrogate posterior
+  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
+  distributions.
+
+  Args:
+    inputs: Tensor input.
+    units: Integer or Long, dimensionality of the output space.
+    activation: Activation function (`callable`). Set it to None to maintain a
+      linear activation.
+    activity_regularizer: Regularizer function for the output.
+    trainable: Boolean, if `True` also add variables to the graph collection
+      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+    kernel_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `kernel` parameter. Default value:
+      `default_mean_field_normal_fn()`.
+    kernel_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    kernel_prior_fn: Python `callable` which creates `tf.distributions`
+      instance. See `default_mean_field_normal_fn` docstring for required
+      parameter signature.
+      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
+    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    bias_posterior_fn: Python `callable` which creates
+      `tf.distributions.Distribution` instance representing the surrogate
+      posterior of the `bias` parameter. Default value:
+      `default_mean_field_normal_fn(is_singular=True)` (which creates an
+      instance of `tf.distributions.Deterministic`).
+    bias_posterior_tensor_fn: Python `callable` which takes a
+      `tf.distributions.Distribution` instance and returns a representative
+      value. Default value: `lambda d: d.sample()`.
+    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
+      See `default_mean_field_normal_fn` docstring for required parameter
+      signature. Default value: `None` (no prior, no variational inference)
+    bias_divergence_fn: Python `callable` which takes the surrogate posterior
+      distribution, prior distribution and random variate sample(s) from the
+      surrogate posterior and computes or approximates the KL divergence. The
+      distributions are `tf.distributions.Distribution`-like instances and the
+      sample is a `Tensor`.
+    seed: Python scalar `int` which initializes the random number
+      generator. Default value: `None` (i.e., use global seed).
+    name: Python `str`, the name of the layer. Layers with the same name will
+      share `tf.Variable`s, but to avoid mistakes we require `reuse=True` in
+      such cases.
+    reuse: Python `bool`, whether to reuse the `tf.Variable`s of a previous
+      layer by the same name.
+
+  Returns:
+    output: `Tensor` representing a the affine transformed input under a random
+      draw from the surrogate posterior distribution.
+
+  #### Examples
+
+  We illustrate a Bayesian neural network with [variational inference](
+  https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
+  assuming a dataset of `features` and `labels`.
+
+  ```python
+  tfp = tf.contrib.bayesflow
+
+  net = tfp.layers.dense_flipout(
+      features, 512, activation=tf.nn.relu)
+  logits = tfp.layers.dense_flipout(net, 10)
+  neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
+      labels=labels, logits=logits)
+  kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+  loss = neg_log_likelihood + kl
+  train_op = tf.train.AdamOptimizer().minimize(loss)
+  ```
+
+  It uses the Flipout gradient estimator to minimize the
+  Kullback-Leibler divergence up to a constant, also known as the
+  negative Evidence Lower Bound. It consists of the sum of two terms:
+  the expected negative log-likelihood, which we approximate via
+  Monte Carlo; and the KL divergence, which is added via regularizer
+  terms which are arguments to the layer.
+
+  [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on
+        Mini-Batches."
+        Anonymous. OpenReview, 2017.
+        https://openreview.net/forum?id=rJnpifWAb
+  """
+  layer = DenseFlipout(
+      units,
+      activation=activation,
+      activity_regularizer=activity_regularizer,
+      trainable=trainable,
+      kernel_posterior_fn=kernel_posterior_fn,
+      kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
+      kernel_prior_fn=kernel_prior_fn,
+      kernel_divergence_fn=kernel_divergence_fn,
+      bias_posterior_fn=bias_posterior_fn,
+      bias_posterior_tensor_fn=bias_posterior_tensor_fn,
+      bias_prior_fn=bias_prior_fn,
+      bias_divergence_fn=bias_divergence_fn,
+      seed=seed,
+      name=name,
+      dtype=inputs.dtype.base_dtype,
+      _scope=name,
+      _reuse=reuse)
+  return layer.apply(inputs)
diff --git a/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational_impl.py b/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational_impl.py
deleted file mode 100644
index b05ce0ffc1dd55ffb029b339a846a9aa5c877620..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational_impl.py
+++ /dev/null
@@ -1,797 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Dense Bayesian layer using KL-divergence based variational inference.
-
-@@DenseVariational
-@@dense_variational
-
-@@default_loc_scale_fn
-@@default_mean_field_normal_fn
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.contrib.distributions.python.ops import deterministic as deterministic_lib
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.layers import base as layers_lib
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import nn
-from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops import standard_ops
-from tensorflow.python.ops.distributions import kullback_leibler as kl_lib
-from tensorflow.python.ops.distributions import normal as normal_lib
-
-
-__all__ = [
-    "DenseVariational",
-    "dense_variational",
-    "default_loc_scale_fn",
-    "default_mean_field_normal_fn",
-]
-
-
-def default_loc_scale_fn(
-    is_singular=False,
-    loc_initializer=init_ops.random_normal_initializer(stddev=0.1),
-    untransformed_scale_initializer=init_ops.random_normal_initializer(
-        mean=-3., stddev=0.1),
-    loc_regularizer=None,
-    untransformed_scale_regularizer=None,
-    loc_constraint=None,
-    untransformed_scale_constraint=None):
-  """Makes closure which creates `loc`, `scale` params from `tf.get_variable`.
-
-  This function produces a closure which produces `loc`, `scale` using
-  `tf.get_variable`. The closure accepts the following arguments:
-
-    dtype: Type of parameter's event.
-    shape: Python `list`-like representing the parameter's event shape.
-    name: Python `str` name prepended to any created (or existing)
-      `tf.Variable`s.
-    trainable: Python `bool` indicating all created `tf.Variable`s should be
-      added to the graph collection `GraphKeys.TRAINABLE_VARIABLES`.
-    add_variable_fn: `tf.get_variable`-like `callable` used to create (or
-      access existing) `tf.Variable`s.
-
-  Args:
-    is_singular: Python `bool` indicating if `scale is None`. Default: `False`.
-    loc_initializer: Initializer function for the `loc` parameters.
-      The default is `tf.random_normal_initializer(mean=0., stddev=0.1)`.
-    untransformed_scale_initializer: Initializer function for the `scale`
-      parameters. Default value: `tf.random_normal_initializer(mean=-3.,
-      stddev=0.1)`. This implies the softplus transformed result has mean
-      approximately `0.05` and std. deviation approximately `0.005`.
-    loc_regularizer: Regularizer function for the `loc` parameters.
-      The default (`None`) is to use the `tf.get_variable` default.
-    untransformed_scale_regularizer: Regularizer function for the `scale`
-      parameters. The default (`None`) is to use the `tf.get_variable` default.
-    loc_constraint: An optional projection function to be applied to the
-      loc after being updated by an `Optimizer`. The function must take as input
-      the unprojected variable and must return the projected variable (which
-      must have the same shape). Constraints are not safe to use when doing
-      asynchronous distributed training.
-      The default (`None`) is to use the `tf.get_variable` default.
-    untransformed_scale_constraint: An optional projection function to be
-      applied to the `scale` parameters after being updated by an `Optimizer`
-      (e.g. used to implement norm constraints or value constraints). The
-      function must take as input the unprojected variable and must return the
-      projected variable (which must have the same shape). Constraints are not
-      safe to use when doing asynchronous distributed training. The default
-      (`None`) is to use the `tf.get_variable` default.
-
-  Returns:
-    default_loc_scale_fn: Python `callable` which instantiates `loc`, `scale`
-    parameters from args: `dtype, shape, name, trainable, add_variable_fn`.
-  """
-  def _fn(dtype, shape, name, trainable, add_variable_fn):
-    """Creates `loc`, `scale` parameters."""
-    loc = add_variable_fn(
-        name=name + "_loc",
-        shape=shape,
-        initializer=loc_initializer,
-        regularizer=loc_regularizer,
-        constraint=loc_constraint,
-        dtype=dtype,
-        trainable=trainable)
-    if is_singular:
-      return loc, None
-    untransformed_scale = add_variable_fn(
-        name=name + "_untransformed_scale",
-        shape=shape,
-        initializer=untransformed_scale_initializer,
-        regularizer=untransformed_scale_regularizer,
-        constraint=untransformed_scale_constraint,
-        dtype=dtype,
-        trainable=trainable)
-    scale = (np.finfo(dtype.as_numpy_dtype).eps +
-             nn_ops.softplus(untransformed_scale))
-    return loc, scale
-  return _fn
-
-
-def default_mean_field_normal_fn(
-    is_singular=False,
-    loc_initializer=None,
-    untransformed_scale_initializer=None,
-    loc_regularizer=None,
-    untransformed_scale_regularizer=None,
-    loc_constraint=None,
-    untransformed_scale_constraint=None):
-  """Creates a function to build Normal distributions with trainable params.
-
-  This function produces a closure which produces `tf.distributions.Normal`
-  parameterized by a loc` and `scale` each created using `tf.get_variable`. The
-  produced closure accepts the following arguments:
-
-    name: Python `str` name prepended to any created (or existing)
-      `tf.Variable`s.
-    shape: Python `list`-like representing the parameter's event shape.
-    dtype: Type of parameter's event.
-    trainable: Python `bool` indicating all created `tf.Variable`s should be
-      added to the graph collection `GraphKeys.TRAINABLE_VARIABLES`.
-    add_variable_fn: `tf.get_variable`-like `callable` used to create (or
-      access existing) `tf.Variable`s.
-
-  Args:
-    is_singular: Python `bool` if `True`, forces the special case limit of
-      `scale->0`, i.e., a `Deterministic` distribution.
-    loc_initializer: Initializer function for the `loc` parameters.
-      If `None` (default), values are initialized using the default
-      initializer used by `tf.get_variable`.
-    untransformed_scale_initializer: Initializer function for the `scale`
-      parameters. If `None` (default), values are initialized using the default
-      initializer used by `tf.get_variable`.
-    loc_regularizer: Regularizer function for the `loc` parameters.
-    untransformed_scale_regularizer: Regularizer function for the `scale`
-      parameters.
-    loc_constraint: An optional projection function to be applied to the
-      loc after being updated by an `Optimizer`. The function must take as input
-      the unprojected variable and must return the projected variable (which
-      must have the same shape). Constraints are not safe to use when doing
-      asynchronous distributed training.
-    untransformed_scale_constraint: An optional projection function to be
-      applied to the `scale` parameters after being updated by an `Optimizer`
-      (e.g. used to implement norm constraints or value constraints). The
-      function must take as input the unprojected variable and must return the
-      projected variable (which must have the same shape). Constraints are not
-      safe to use when doing asynchronous distributed training.
-
-  Returns:
-    make_normal_fn: Python `callable` which creates a `tf.distributions.Normal`
-      using from args: `dtype, shape, name, trainable, add_variable_fn`.
-  """
-  loc_scale_fn_ = default_loc_scale_fn(
-      is_singular,
-      loc_initializer,
-      untransformed_scale_initializer,
-      loc_regularizer,
-      untransformed_scale_regularizer,
-      loc_constraint,
-      untransformed_scale_constraint)
-  def _fn(dtype, shape, name, trainable, add_variable_fn):
-    """Creates a batch of `Deterministic` or `Normal` distributions."""
-    loc, scale = loc_scale_fn_(dtype, shape, name, trainable, add_variable_fn)
-    if scale is None:
-      return deterministic_lib.Deterministic(loc=loc)
-    return normal_lib.Normal(loc=loc, scale=scale)
-  return _fn
-
-
-class DenseVariational(layers_lib.Layer):
-  """Densely-connected variational class.
-
-  This layer implements the Bayesian variational inference analogue to:
-  `outputs = activation(matmul(inputs, kernel) + bias)`
-  by assuming the `kernel` and/or the `bias` are random variables.
-
-  The layer implements a stochastic dense calculation by making a Monte Carlo
-  approximation of a [variational Bayesian method based on KL divergence](
-  https://en.wikipedia.org/wiki/Variational_Bayesian_methods), i.e.,
-
-  ```none
-  -log p(y|x) = -log int_{R**d} p(y|x,w) p(w) dw
-              = -log int_{R**d} p(y,w|x) q(w|x) / q(w|x) dw
-             <= E_q(W|x)[-log p(y,W|x) + log q(W|x)]       # Jensen's
-              = E_q(W|x)[-log p(y|x,W)] + KL[q(W|x), p(W)]
-             ~= m**-1 sum{ -log(y|x,w[j]) : w[j] ~ q(W|x), j=1..m }
-                 + KL[q(W|x), p(W)]
-  ```
-
-  where `W` denotes the (independent) `kernel` and `bias` random variables, `w`
-  is a random variate or outcome of `W`, `y` is the label, `x` is the evidence`,
-  and `~=` denotes an approximation which becomes exact as `m->inf`. The above
-  bound is sometimes referred to as the negative Evidence Lower BOund or
-  negative [ELBO](https://arxiv.org/abs/1601.00670). In context of a DNN, this
-  layer is appropriate to use when the final loss is a negative log-likelihood.
-
-  The Monte-Carlo sum portion is used for the feed-forward calculation of the
-  DNN. The KL divergence portion can be added to the final loss via:
-  `loss += sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))`.
-
-  The arguments permit separate specification of the surrogate posterior
-  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
-  random variables (which together comprise `W`).
-
-  Args:
-    units: Integer or Long, dimensionality of the output space.
-    activation: Activation function (`callable`). Set it to None to maintain a
-      linear activation.
-    activity_regularizer: Regularizer function for the output.
-    trainable: Boolean, if `True` also add variables to the graph collection
-      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
-    kernel_use_local_reparameterization: Python `bool` indicating whether
-      `kernel` calculation should employ the Local Reparameterization Trick.
-      When `True`, `kernel_posterior_fn` must create an instance of
-      `tf.distributions.Normal`.
-    kernel_posterior_fn: Python `callable` which creates
-      `tf.distributions.Distribution` instance representing the surrogate
-      posterior of the `kernel` parameter. Default value:
-      `default_mean_field_normal_fn()`.
-    kernel_posterior_tensor_fn: Python `callable` which takes a
-      `tf.distributions.Distribution` instance and returns a representative
-      value. Default value: `lambda d: d.sample()`.
-    kernel_prior_fn: Python `callable` which creates `tf.distributions`
-      instance. See `default_mean_field_normal_fn` docstring for required
-      parameter signature.
-      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
-    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
-      distribution, prior distribution and random variate sample(s) from the
-      surrogate posterior and computes or approximates the KL divergence. The
-      distributions are `tf.distributions.Distribution`-like instances and the
-      sample is a `Tensor`.
-    bias_posterior_fn: Python `callable` which creates
-      `tf.distributions.Distribution` instance representing the surrogate
-      posterior of the `bias` parameter. Default value:
-      `default_mean_field_normal_fn(is_singular=True)` (which creates an
-      instance of `tf.distributions.Deterministic`).
-    bias_posterior_tensor_fn: Python `callable` which takes a
-      `tf.distributions.Distribution` instance and returns a representative
-      value. Default value: `lambda d: d.sample()`.
-    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
-      See `default_mean_field_normal_fn` docstring for required parameter
-      signature. Default value: `None` (no prior, no variational inference)
-    bias_divergence_fn: Python `callable` which takes the surrogate posterior
-      distribution, prior distribution and random variate sample(s) from the
-      surrogate posterior and computes or approximates the KL divergence. The
-      distributions are `tf.distributions.Distribution`-like instances and the
-      sample is a `Tensor`.
-    name: Python `str`, the name of the layer. Layers with the same name will
-      share `tf.Variable`s, but to avoid mistakes we require `reuse=True` in
-      such cases.
-    reuse: Python `bool`, whether to reuse the `tf.Variable`s of a previous
-      layer by the same name.
-
-  Properties:
-    units: Python integer, dimensionality of the output space.
-    activation: Activation function (`callable`).
-    activity_regularizer: Regularizer function for the output.
-    kernel_use_local_reparameterization: Python `bool` indicating whether
-      `kernel` calculation should employ the Local Reparameterization Trick.
-    kernel: `VariationalKernelParamater` instance containing all `kernel`
-      related properties and `callable`s.
-    bias: `VariationalParameter` instance containing all `kernel`
-      related properties and `callable`s.
-  """
-
-  def __init__(
-      self,
-      units,
-      activation=None,
-      activity_regularizer=None,
-      trainable=True,
-      kernel_use_local_reparameterization=True,
-      kernel_posterior_fn=default_mean_field_normal_fn(),
-      kernel_posterior_tensor_fn=lambda d: d.sample(),
-      kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
-          loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
-      kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
-      bias_posterior_fn=default_mean_field_normal_fn(is_singular=True),
-      bias_posterior_tensor_fn=lambda d: d.sample(),
-      bias_prior_fn=None,
-      bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
-      name=None,
-      **kwargs):
-    super(DenseVariational, self).__init__(
-        trainable=trainable,
-        name=name,
-        activity_regularizer=activity_regularizer,
-        **kwargs)
-    self._units = units
-    self._activation = activation
-    self._input_spec = layers_lib.InputSpec(min_ndim=2)
-    self._kernel_use_local_reparameterization = (
-        kernel_use_local_reparameterization)
-    self._kernel = VariationalKernelParameter(
-        kernel_posterior_fn,
-        kernel_posterior_tensor_fn,
-        kernel_prior_fn,
-        kernel_divergence_fn)
-    self._bias = VariationalParameter(
-        bias_posterior_fn,
-        bias_posterior_tensor_fn,
-        bias_prior_fn,
-        bias_divergence_fn)
-
-  @property
-  def units(self):
-    return self._units
-
-  @property
-  def activation(self):
-    return self._activation
-
-  @property
-  def input_spec(self):
-    return self._input_spec
-
-  @input_spec.setter
-  def input_spec(self, value):
-    self._input_spec = value
-
-  @property
-  def kernel_use_local_reparameterization(self):
-    return self._kernel_use_local_reparameterization
-
-  @property
-  def kernel(self):
-    return self._kernel
-
-  @property
-  def bias(self):
-    return self._bias
-
-  def build(self, input_shape):
-    input_shape = tensor_shape.TensorShape(input_shape)
-    in_size = input_shape.with_rank_at_least(2)[-1].value
-    if in_size is None:
-      raise ValueError("The last dimension of the inputs to `Dense` "
-                       "should be defined. Found `None`.")
-    self._input_spec = layers_lib.InputSpec(min_ndim=2, axes={-1: in_size})
-    dtype = dtypes.as_dtype(self.dtype)
-
-    # Must have a posterior kernel.
-    self.kernel.posterior = self.kernel.posterior_fn(
-        dtype, [in_size, self.units], "kernel_posterior",
-        self.trainable, self.add_variable)
-
-    if self.kernel.prior_fn is None:
-      self.kernel_prior = None
-    else:
-      self.kernel.prior = self.kernel.prior_fn(
-          dtype, [in_size, self.units], "kernel_prior",
-          self.trainable, self.add_variable)
-    self._built_kernel_divergence = False
-
-    if self.bias.posterior_fn is None:
-      self.bias.posterior = None
-    else:
-      self.bias.posterior = self.bias.posterior_fn(
-          dtype, [self.units], "bias_posterior",
-          self.trainable, self.add_variable)
-
-    if self.bias.prior_fn is None:
-      self.bias.prior = None
-    else:
-      self.bias.prior = self.bias.prior_fn(
-          dtype, [self.units], "bias_prior",
-          self.trainable, self.add_variable)
-    self._built_bias_divergence = False
-
-    self.built = True
-
-  def call(self, inputs):
-    inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
-
-    outputs = self._apply_variational_kernel(inputs)
-    outputs = self._apply_variational_bias(outputs)
-    if self.activation is not None:
-      outputs = self.activation(outputs)  # pylint: disable=not-callable
-    if not self._built_kernel_divergence:
-      self._apply_divergence(self.kernel, name="divergence_kernel")
-      self._built_kernel_divergence = True
-    if not self._built_bias_divergence:
-      self._apply_divergence(self.bias, name="divergence_bias")
-      self._built_bias_divergence = True
-    return outputs
-
-  def _apply_variational_kernel(self, inputs):
-    if not self.kernel_use_local_reparameterization:
-      self.kernel.posterior_tensor = self.kernel.posterior_tensor_fn(
-          self.kernel.posterior)
-      self.kernel.posterior_affine = None
-      self.kernel.posterior_affine_tensor = None
-      return self._matmul(inputs, self.kernel.posterior_tensor)
-    if not isinstance(self.kernel.posterior, normal_lib.Normal):
-      raise TypeError("`kernel_use_local_reparameterization=True` requires "
-                      "`kernel_posterior_fn` produce an instance of "
-                      "`tf.distributions.Normal` (saw: \"{}\").".format(
-                          type(self.kernel.posterior).__name__))
-    self.kernel.posterior_affine = normal_lib.Normal(
-        loc=self._matmul(inputs, self.kernel.posterior.loc),
-        scale=standard_ops.sqrt(self._matmul(
-            standard_ops.square(inputs),
-            standard_ops.square(self.kernel.posterior.scale))))
-    self.kernel.posterior_affine_tensor = (
-        self.kernel.posterior_tensor_fn(self.kernel.posterior_affine))
-    self.kernel.posterior_tensor = None
-    return self.kernel.posterior_affine_tensor
-
-  def _apply_variational_bias(self, inputs):
-    if self.bias.posterior is None:
-      self.bias.posterior_tensor = None
-      return inputs
-    self.bias.posterior_tensor = self.bias.posterior_tensor_fn(
-        self.bias.posterior)
-    return nn.bias_add(inputs, self.bias.posterior_tensor)
-
-  def _apply_divergence(self, param, name):
-    if (param.divergence_fn is None or
-        param.posterior is None or
-        param.prior is None):
-      param.divergence = None
-      return
-    param.divergence = standard_ops.identity(
-        param.divergence_fn(
-            param.posterior, param.prior, param.posterior_tensor),
-        name=name)
-    self.add_loss(param.divergence)
-
-  def _matmul(self, inputs, kernel):
-    if inputs.shape.ndims <= 2:
-      return standard_ops.matmul(inputs, kernel)
-    # To handle broadcasting, we must use `tensordot`.
-    return standard_ops.tensordot(inputs, kernel, axes=[[-1], [0]])
-
-  def _compute_output_shape(self, input_shape):
-    input_shape = tensor_shape.TensorShape(input_shape).with_rank_at_least(2)
-    if input_shape[-1].value is None:
-      raise ValueError(
-          "The innermost dimension of input_shape must be defined, "
-          "but saw: {}".format(input_shape))
-    return input_shape[:-1].concatenate(self.units)
-
-
-def dense_variational(
-    inputs,
-    units,
-    activation=None,
-    activity_regularizer=None,
-    trainable=True,
-    kernel_use_local_reparameterization=True,
-    kernel_posterior_fn=default_mean_field_normal_fn(),
-    kernel_posterior_tensor_fn=lambda d: d.sample(),
-    kernel_prior_fn=lambda dtype, *args: normal_lib.Normal(  # pylint: disable=g-long-lambda
-        loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
-    kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
-    bias_posterior_fn=default_mean_field_normal_fn(is_singular=True),
-    bias_posterior_tensor_fn=lambda d: d.sample(),
-    bias_prior_fn=None,
-    bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
-    name=None,
-    reuse=None):
-  """Densely-connected variational layer.
-
-  This layer implements the Bayesian variational inference analogue to:
-  `outputs = activation(matmul(inputs, kernel) + bias)`
-  by assuming the `kernel` and/or the `bias` are random variables.
-
-  The layer implements a stochastic dense calculation by making a Monte Carlo
-  approximation of a [variational Bayesian method based on KL divergence](
-  https://en.wikipedia.org/wiki/Variational_Bayesian_methods), i.e.,
-
-  ```none
-  -log p(y|x) = -log int_{R**d} p(y|x,w) p(w) dw
-              = -log int_{R**d} p(y,w|x) q(w|x) / q(w|x) dw
-             <= E_q(W|x)[-log p(y,W|x) + log q(W|x)]       # Jensen's
-              = E_q(W|x)[-log p(y|x,W)] + KL[q(W|x), p(W)]
-             ~= m**-1 sum{ -log(y|x,w[j]) : w[j] ~ q(W|x), j=1..m }
-                 + KL[q(W|x), p(W)]
-  ```
-
-  where `W` denotes the (independent) `kernel` and `bias` random variables, `w`
-  is a random variate or outcome of `W`, `y` is the label, `x` is the evidence`,
-  and `~=` denotes an approximation which becomes exact as `m->inf`. The above
-  bound is sometimes referred to as the negative Evidence Lower BOund or
-  negative [ELBO](https://arxiv.org/abs/1601.00670). In context of a DNN, this
-  layer is appropriate to use when the final loss is a negative log-likelihood.
-
-  The Monte-Carlo sum portion is used for the feed-forward calculation of the
-  DNN. The KL divergence portion can be added to the final loss via:
-  `loss += sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))`.
-
-  The arguments permit separate specification of the surrogate posterior
-  (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
-  random variables (which together comprise `W`).
-
-  Args:
-    inputs: Tensor input.
-    units: Integer or Long, dimensionality of the output space.
-    activation: Activation function (`callable`). Set it to None to maintain a
-      linear activation.
-    activity_regularizer: Regularizer function for the output.
-    trainable: Boolean, if `True` also add variables to the graph collection
-      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
-    kernel_use_local_reparameterization: Python `bool` indicating whether
-      `kernel` calculation should employ the Local Reparameterization Trick.
-      When `True`, `kernel_posterior_fn` must create an instance of
-      `tf.distributions.Normal`.
-    kernel_posterior_fn: Python `callable` which creates
-      `tf.distributions.Distribution` instance representing the surrogate
-      posterior of the `kernel` parameter. Default value:
-      `default_mean_field_normal_fn()`.
-    kernel_posterior_tensor_fn: Python `callable` which takes a
-      `tf.distributions.Distribution` instance and returns a representative
-      value. Default value: `lambda d: d.sample()`.
-    kernel_prior_fn: Python `callable` which creates `tf.distributions`
-      instance. See `default_mean_field_normal_fn` docstring for required
-      parameter signature.
-      Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
-    kernel_divergence_fn: Python `callable` which takes the surrogate posterior
-      distribution, prior distribution and random variate sample(s) from the
-      surrogate posterior and computes or approximates the KL divergence. The
-      distributions are `tf.distributions.Distribution`-like instances and the
-      sample is a `Tensor`.
-    bias_posterior_fn: Python `callable` which creates
-      `tf.distributions.Distribution` instance representing the surrogate
-      posterior of the `bias` parameter. Default value:
-      `default_mean_field_normal_fn(is_singular=True)` (which creates an
-      instance of `tf.distributions.Deterministic`).
-    bias_posterior_tensor_fn: Python `callable` which takes a
-      `tf.distributions.Distribution` instance and returns a representative
-      value. Default value: `lambda d: d.sample()`.
-    bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
-      See `default_mean_field_normal_fn` docstring for required parameter
-      signature. Default value: `None` (no prior, no variational inference)
-    bias_divergence_fn: Python `callable` which takes the surrogate posterior
-      distribution, prior distribution and random variate sample(s) from the
-      surrogate posterior and computes or approximates the KL divergence. The
-      distributions are `tf.distributions.Distribution`-like instances and the
-      sample is a `Tensor`.
-    name: Python `str`, the name of the layer. Layers with the same name will
-      share `tf.Variable`s, but to avoid mistakes we require `reuse=True` in
-      such cases.
-    reuse: Python `bool`, whether to reuse the `tf.Variable`s of a previous
-      layer by the same name.
-
-  Returns:
-    output: `Tensor` representing a the affine transformed input under a random
-      draw from the surrogate posterior distribution.
-  """
-  layer = DenseVariational(
-      units,
-      activation=activation,
-      activity_regularizer=activity_regularizer,
-      trainable=trainable,
-      kernel_use_local_reparameterization=(
-          kernel_use_local_reparameterization),
-      kernel_posterior_fn=kernel_posterior_fn,
-      kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
-      kernel_prior_fn=kernel_prior_fn,
-      kernel_divergence_fn=kernel_divergence_fn,
-      bias_posterior_fn=bias_posterior_fn,
-      bias_posterior_tensor_fn=bias_posterior_tensor_fn,
-      bias_prior_fn=bias_prior_fn,
-      bias_divergence_fn=bias_divergence_fn,
-      name=name,
-      dtype=inputs.dtype.base_dtype,
-      _scope=name,
-      _reuse=reuse)
-  return layer.apply(inputs)
-
-
-class NotSet(object):
-  """Helper to track whether a `VariationalParameter` value has been set."""
-  pass
-
-
-class VariationalParameter(object):
-  """Struct-like container of variational parameter properties.
-
-  A `VariationalParameter` is intitialized with Python `callable`s which set the
-  value of correspondingly named members. Corresponding values have "set once"
-  semantics, i.e., once set to any value they are immutable.
-  """
-
-  def __init__(
-      self,
-      posterior_fn,
-      posterior_tensor_fn,
-      prior_fn,
-      divergence_fn):
-    """Creates the `VariationalParameter` struct-like object.
-
-    Args:
-      posterior_fn: Python `callable` which creates a
-        `tf.distribution.Distribution` like object representing the posterior
-        distribution. See `VariationalParameter.posterior_fn` for `callable`'s
-        required parameters.
-      posterior_tensor_fn: Python `callable` which computes a `Tensor`
-        which represents the `posterior`.
-      prior_fn: Python `callable` which creates a
-        `tf.distribution.Distribution` like object representing the prior
-        distribution. See `VariationalParameter.prior_fn` for `callable`'s
-        required parameters.
-      divergence_fn: Python `callable` which computes the KL divergence from
-        `posterior` to `prior`. See `VariationalParameter.divergence_fn` for
-        required `callable`'s parameters.
-    """
-    self._posterior_fn = posterior_fn
-    self._posterior = NotSet()
-    self._posterior_tensor_fn = posterior_tensor_fn
-    self._posterior_tensor = NotSet()
-    self._prior_fn = prior_fn
-    self._prior = NotSet()
-    self._divergence_fn = divergence_fn
-    self._divergence = NotSet()
-    self._init_helper()
-
-  @property
-  def posterior_fn(self):
-    """`callable` which creates `tf.distributions.Distribution`-like posterior.
-
-    The `callable` must accept the following parameters:
-      name: Python `str` name prepended to any created (or existing)
-        `tf.Variable`s.
-      shape: Python `list`-like representing the parameter's event shape.
-      dtype: Type of parameter's event.
-      trainable: Python `bool` indicating all created `tf.Variable`s should be
-        added to the graph collection `GraphKeys.TRAINABLE_VARIABLES`.
-      add_variable_fn: `tf.get_variable`-like `callable` used to create (or
-        access existing) `tf.Variable`s.
-
-    Returns:
-      posterior_fn: The Python `callable` specified in `__init__`.
-    """
-    return self._posterior_fn
-
-  @property
-  def posterior(self):
-    """`tf.distributions.Distribution`-like instance representing posterior."""
-    return self._posterior
-
-  @posterior.setter
-  def posterior(self, value):
-    """One-time setter of the `posterior` distribution."""
-    if not isinstance(self._posterior, NotSet):
-      raise ValueError("Cannot override already set attribute.")
-    self._posterior = value
-
-  @property
-  def posterior_tensor_fn(self):
-    """Creates `Tensor` representing the `posterior` distribution.
-
-    The `callable` must accept the following parameters:
-      posterior: `tf.distributions.Distribution`-like instance.
-
-    Returns:
-      posterior_tensor_fn: The Python `callable` specified in
-        `__init__`.
-    """
-    return self._posterior_tensor_fn
-
-  @property
-  def posterior_tensor(self):
-    """`Tensor` representing the `posterior` distribution."""
-    return self._posterior_tensor
-
-  @posterior_tensor.setter
-  def posterior_tensor(self, value):
-    """One-time setter of the `posterior_tensor`."""
-    if not isinstance(self._posterior_tensor, NotSet):
-      raise ValueError("Cannot override already set attribute.")
-    self._posterior_tensor = value
-
-  @property
-  def prior_fn(self):
-    """`callable` which creates `tf.distributions.Distribution`-like prior.
-
-    The `callable` must accept the following parameters:
-      name: Python `str` name prepended to any created (or existing)
-        `tf.Variable`s.
-      shape: Python `list`-like representing the parameter's event shape.
-      dtype: Type of parameter's event.
-      trainable: Python `bool` indicating all created `tf.Variable`s should be
-        added to the graph collection `GraphKeys.TRAINABLE_VARIABLES`.
-      add_variable_fn: `tf.get_variable`-like `callable` used to create (or
-        access existing) `tf.Variable`s.
-
-    Returns:
-      prior_fn: The Python `callable` specified in `__init__`.
-    """
-    return self._prior_fn
-
-  @property
-  def prior(self):
-    """`tf.distributions.Distribution`-like instance representing posterior."""
-    return self._prior
-
-  @prior.setter
-  def prior(self, value):
-    """One-time setter of the `prior` distribution."""
-    if not isinstance(self._prior, NotSet):
-      raise ValueError("Cannot override already set attribute.")
-    self._prior = value
-
-  @property
-  def divergence_fn(self):
-    """`callable` which computes KL-divergence `Tensor` from posterior to prior.
-
-    The `callable` must accept the following parameters:
-      posterior: `tf.distributions.Distribution`-like instance.
-      prior: `tf.distributions.Distribution`-like instance.
-      posterior_tensor: `Tensor` representing value of posterior.
-
-    Returns:
-      divergence_fn: The Python `callable` specified in `__init__`.
-    """
-    return self._divergence_fn
-
-  @property
-  def divergence(self):
-    """`Tensor` representing KL-divergence from posterior to prior."""
-    return self._divergence
-
-  @divergence.setter
-  def divergence(self, value):
-    """One-time setter of the `divergence`."""
-    if not isinstance(self._divergence, NotSet):
-      raise ValueError("Cannot override already set attribute.")
-    self._divergence = value
-
-  def _init_helper(self):
-    pass
-
-
-class VariationalKernelParameter(VariationalParameter):
-  """Struct-like container of variational kernel properties.
-
-  A `VariationalKernelParameter` is intitialized with Python `callable`s which
-  set the value of correspondingly named members. Corresponding values have "set
-  once" semantics, i.e., once set to any value they are immutable.
-  """
-
-  @property
-  def posterior_affine(self):
-    """`tf.distributions.Distribution` affine transformed posterior."""
-    return self._posterior_affine
-
-  @posterior_affine.setter
-  def posterior_affine(self, value):
-    """One-time setter of `posterior_affine`."""
-    if not isinstance(self._posterior_affine, NotSet):
-      raise ValueError("Cannot override already set attribute.")
-    self._posterior_affine = value
-
-  @property
-  def posterior_affine_tensor(self):
-    """`Tensor` representing the `posterior_affine` distribution."""
-    return self._posterior_affine_tensor
-
-  @posterior_affine_tensor.setter
-  def posterior_affine_tensor(self, value):
-    """One-time setter of the `posterior_affine_tensor`."""
-    if not isinstance(self._posterior_affine_tensor, NotSet):
-      raise ValueError("Cannot override already set attribute.")
-    self._posterior_affine_tensor = value
-
-  def _init_helper(self):
-    self._posterior_affine = NotSet()
-    self._posterior_affine_tensor = NotSet()
diff --git a/tensorflow/contrib/bayesflow/python/ops/layers_util.py b/tensorflow/contrib/bayesflow/python/ops/layers_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..8c1fb203f7328e8260e49b4326d813fbe133613e
--- /dev/null
+++ b/tensorflow/contrib/bayesflow/python/ops/layers_util.py
@@ -0,0 +1,191 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utilities for probabilistic layers.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.contrib.distributions.python.ops import deterministic as deterministic_lib
+from tensorflow.contrib.distributions.python.ops import independent as independent_lib
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.ops.distributions import normal as normal_lib
+
+
+def default_loc_scale_fn(
+    is_singular=False,
+    loc_initializer=init_ops.random_normal_initializer(stddev=0.1),
+    untransformed_scale_initializer=init_ops.random_normal_initializer(
+        mean=-3., stddev=0.1),
+    loc_regularizer=None,
+    untransformed_scale_regularizer=None,
+    loc_constraint=None,
+    untransformed_scale_constraint=None):
+  """Makes closure which creates `loc`, `scale` params from `tf.get_variable`.
+
+  This function produces a closure which produces `loc`, `scale` using
+  `tf.get_variable`. The closure accepts the following arguments:
+
+    dtype: Type of parameter's event.
+    shape: Python `list`-like representing the parameter's event shape.
+    name: Python `str` name prepended to any created (or existing)
+      `tf.Variable`s.
+    trainable: Python `bool` indicating all created `tf.Variable`s should be
+      added to the graph collection `GraphKeys.TRAINABLE_VARIABLES`.
+    add_variable_fn: `tf.get_variable`-like `callable` used to create (or
+      access existing) `tf.Variable`s.
+
+  Args:
+    is_singular: Python `bool` indicating if `scale is None`. Default: `False`.
+    loc_initializer: Initializer function for the `loc` parameters.
+      The default is `tf.random_normal_initializer(mean=0., stddev=0.1)`.
+    untransformed_scale_initializer: Initializer function for the `scale`
+      parameters. Default value: `tf.random_normal_initializer(mean=-3.,
+      stddev=0.1)`. This implies the softplus transformed result has mean
+      approximately `0.05` and std. deviation approximately `0.005`.
+    loc_regularizer: Regularizer function for the `loc` parameters.
+      The default (`None`) is to use the `tf.get_variable` default.
+    untransformed_scale_regularizer: Regularizer function for the `scale`
+      parameters. The default (`None`) is to use the `tf.get_variable` default.
+    loc_constraint: An optional projection function to be applied to the
+      loc after being updated by an `Optimizer`. The function must take as input
+      the unprojected variable and must return the projected variable (which
+      must have the same shape). Constraints are not safe to use when doing
+      asynchronous distributed training.
+      The default (`None`) is to use the `tf.get_variable` default.
+    untransformed_scale_constraint: An optional projection function to be
+      applied to the `scale` parameters after being updated by an `Optimizer`
+      (e.g. used to implement norm constraints or value constraints). The
+      function must take as input the unprojected variable and must return the
+      projected variable (which must have the same shape). Constraints are not
+      safe to use when doing asynchronous distributed training. The default
+      (`None`) is to use the `tf.get_variable` default.
+
+  Returns:
+    default_loc_scale_fn: Python `callable` which instantiates `loc`, `scale`
+    parameters from args: `dtype, shape, name, trainable, add_variable_fn`.
+  """
+  def _fn(dtype, shape, name, trainable, add_variable_fn):
+    """Creates `loc`, `scale` parameters."""
+    loc = add_variable_fn(
+        name=name + "_loc",
+        shape=shape,
+        initializer=loc_initializer,
+        regularizer=loc_regularizer,
+        constraint=loc_constraint,
+        dtype=dtype,
+        trainable=trainable)
+    if is_singular:
+      return loc, None
+    untransformed_scale = add_variable_fn(
+        name=name + "_untransformed_scale",
+        shape=shape,
+        initializer=untransformed_scale_initializer,
+        regularizer=untransformed_scale_regularizer,
+        constraint=untransformed_scale_constraint,
+        dtype=dtype,
+        trainable=trainable)
+    scale = (np.finfo(dtype.as_numpy_dtype).eps +
+             nn_ops.softplus(untransformed_scale))
+    return loc, scale
+  return _fn
+
+
+def default_mean_field_normal_fn(
+    is_singular=False,
+    loc_initializer=None,
+    untransformed_scale_initializer=None,
+    loc_regularizer=None,
+    untransformed_scale_regularizer=None,
+    loc_constraint=None,
+    untransformed_scale_constraint=None):
+  """Creates a function to build Normal distributions with trainable params.
+
+  This function produces a closure which produces `tf.distributions.Normal`
+  parameterized by a loc` and `scale` each created using `tf.get_variable`. The
+  produced closure accepts the following arguments:
+
+    name: Python `str` name prepended to any created (or existing)
+      `tf.Variable`s.
+    shape: Python `list`-like representing the parameter's event shape.
+    dtype: Type of parameter's event.
+    trainable: Python `bool` indicating all created `tf.Variable`s should be
+      added to the graph collection `GraphKeys.TRAINABLE_VARIABLES`.
+    add_variable_fn: `tf.get_variable`-like `callable` used to create (or
+      access existing) `tf.Variable`s.
+
+  Args:
+    is_singular: Python `bool` if `True`, forces the special case limit of
+      `scale->0`, i.e., a `Deterministic` distribution.
+    loc_initializer: Initializer function for the `loc` parameters.
+      If `None` (default), values are initialized using the default
+      initializer used by `tf.get_variable`.
+    untransformed_scale_initializer: Initializer function for the `scale`
+      parameters. If `None` (default), values are initialized using the default
+      initializer used by `tf.get_variable`.
+    loc_regularizer: Regularizer function for the `loc` parameters.
+    untransformed_scale_regularizer: Regularizer function for the `scale`
+      parameters.
+    loc_constraint: An optional projection function to be applied to the
+      loc after being updated by an `Optimizer`. The function must take as input
+      the unprojected variable and must return the projected variable (which
+      must have the same shape). Constraints are not safe to use when doing
+      asynchronous distributed training.
+    untransformed_scale_constraint: An optional projection function to be
+      applied to the `scale` parameters after being updated by an `Optimizer`
+      (e.g. used to implement norm constraints or value constraints). The
+      function must take as input the unprojected variable and must return the
+      projected variable (which must have the same shape). Constraints are not
+      safe to use when doing asynchronous distributed training.
+
+  Returns:
+    make_normal_fn: Python `callable` which creates a `tf.distributions.Normal`
+      using from args: `dtype, shape, name, trainable, add_variable_fn`.
+  """
+  loc_scale_fn_ = default_loc_scale_fn(
+      is_singular,
+      loc_initializer,
+      untransformed_scale_initializer,
+      loc_regularizer,
+      untransformed_scale_regularizer,
+      loc_constraint,
+      untransformed_scale_constraint)
+  def _fn(dtype, shape, name, trainable, add_variable_fn):
+    """Creates multivariate `Deterministic` or `Normal` distribution."""
+    loc, scale = loc_scale_fn_(dtype, shape, name, trainable, add_variable_fn)
+    if scale is None:
+      dist = deterministic_lib.Deterministic(loc=loc)
+    else:
+      dist = normal_lib.Normal(loc=loc, scale=scale)
+    reinterpreted_batch_ndims = array_ops.shape(dist.batch_shape_tensor())[0]
+    return independent_lib.Independent(
+        dist, reinterpreted_batch_ndims=reinterpreted_batch_ndims)
+  return _fn
+
+
+def random_sign(shape, dtype=dtypes.float32, seed=None):
+  """Draw values from {-1, 1} uniformly, i.e., Rademacher distribution."""
+  random_bernoulli = random_ops.random_uniform(shape, minval=0, maxval=2,
+                                               dtype=dtypes.int32,
+                                               seed=seed)
+  return math_ops.cast(2 * random_bernoulli - 1, dtype)
diff --git a/tensorflow/contrib/bayesflow/python/ops/optimizers.py b/tensorflow/contrib/bayesflow/python/ops/optimizers.py
index ee32e6b5c3d9efaeaf73436638c5eea55f2cfc70..fb70628d1083836281e9327e83e109493276c64f 100644
--- a/tensorflow/contrib/bayesflow/python/ops/optimizers.py
+++ b/tensorflow/contrib/bayesflow/python/ops/optimizers.py
@@ -24,11 +24,13 @@ from __future__ import print_function
 # go/tf-wildcard-import
 # pylint: disable=wildcard-import
 from tensorflow.contrib.bayesflow.python.ops.sgld_optimizer import *
+from tensorflow.contrib.bayesflow.python.ops.variational_sgd_optimizer import *
 # pylint: enable=wildcard-import
 from tensorflow.python.util.all_util import remove_undocumented
 
 _allowed_symbols = [
     'SGLDOptimizer',
+    'VariationalSGDOptimizer',
 ]
 
 remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/bayesflow/python/ops/sgld_optimizer.py b/tensorflow/contrib/bayesflow/python/ops/sgld_optimizer.py
index 5d36ea7a2b51aa45cdc253992a2a58634c068987..7786656398e3c87704227be95b3cd23a38785249 100644
--- a/tensorflow/contrib/bayesflow/python/ops/sgld_optimizer.py
+++ b/tensorflow/contrib/bayesflow/python/ops/sgld_optimizer.py
@@ -189,6 +189,10 @@ class SGLDOptimizer(optimizer.Optimizer):
         new_grad,
         use_locking=self._use_locking).op
 
+  def _finish(self, update_ops, name_scope):
+    update_ops.append([self._counter.assign_add(1)])
+    return control_flow_ops.group(*update_ops, name=name_scope)
+
   @property
   def variable_scope(self):
     """Variable scope of all calls to `tf.get_variable`."""
diff --git a/tensorflow/contrib/bayesflow/python/ops/variational_sgd_optimizer.py b/tensorflow/contrib/bayesflow/python/ops/variational_sgd_optimizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d5f0cfe9713a011b32c5aba8d429847d81f33e2
--- /dev/null
+++ b/tensorflow/contrib/bayesflow/python/ops/variational_sgd_optimizer.py
@@ -0,0 +1,279 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""An optimizer module for constant stochastic gradient descent."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import clip_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variable_scope as varscope_ops
+from tensorflow.python.training import optimizer
+from tensorflow.python.training import training_ops
+
+
+class VariationalSGDOptimizer(optimizer.Optimizer):
+  """An optimizer module for constant stochastic gradient descent.
+
+  This implements an optimizer module for the constant stochastic gradient
+  descent algorithm [1].  The optimization variable is regarded as an
+  approximate sample from the posterior .
+
+  Note: If a prior is included in the loss, it should be scaled by
+  `1/num_pseudo_batches`, where num_pseudo_batches is the number of minibatches
+  in the data.  I.e., it should be divided by the `num_pseudo_batches` term
+  described below.
+
+  [1]: "Stochastic Gradient Descent as Approximate Bayesian Inference
+       Stephan Mandt, Matthew D. Hoffman, David M. Blei.
+       ArXiv:1704.04289, 2017. https://arxiv.org/abs/1704.04289
+
+  Args:
+    batch_size: Scalar `int`-like `Tensor`. The number of examples in a
+      minibatch in the data set. Note: Assumes the loss is taken as the mean
+      over a minibatch. Otherwise if the sum was taken set this to 1.
+    total_num_examples: Scalar `int`-like `Tensor`. The total number of examples
+      in the data set.
+    max_learning_rate: Scalar `float`-like `Tensor`. A maximum allowable
+      effective coordinate-wise learning rate. The algorithm scales down any
+      effective learning rate (i.e. after preconditioning) that is larger than
+      this. (Default: `1`)
+    preconditioner_decay_rate: Scalar `float`-like `Tensor`. The exponential
+      decay rate of the rescaling of the preconditioner (RMSprop). (This is
+      "alpha" in [1]). Should be smaller than but nearly `1` to approximate
+      sampling from the posterior. (Default: `0.95`)
+    burnin: Scalar `int`-like `Tensor`. The number of iterations to collect
+      gradient statistics to update the preconditioner before starting to draw
+      noisy samples. (Default: `25`)
+    burnin_max_learning_rate: Scalar `float`-like `Tensor`. Maximum learning
+      rate to use during the burnin period.
+      (Default: `1e-8`)
+    use_single_learning_rate: Boolean Indicates whether one single learning
+      rate is used or coordinate_wise learning rates are used.
+      (Default: `False`)
+    name: Python `str` describing ops managed by this function.
+      (Default: `"VariationalSGDOptimizer"`)
+    variable_scope: Variable scope used for calls to `tf.get_variable`.
+      If `None`, a new variable scope is created using name
+      `ops.get_default_graph().unique_name(name or default_name)`.
+
+  Raises:
+    InvalidArgumentError: If preconditioner_decay_rate is a `Tensor` not in
+      `(0,1]`.
+  """
+
+  def __init__(self,
+               batch_size,
+               total_num_examples,
+               max_learning_rate=1.0,
+               preconditioner_decay_rate=0.95,
+               burnin=25,
+               burnin_max_learning_rate=1e-6,
+               use_single_learning_rate=False,
+               name=None,
+               variable_scope=None):
+    default_name = 'VariationalSGDOptimizer'
+    with ops.name_scope(name, default_name, [
+        max_learning_rate, preconditioner_decay_rate, batch_size, burnin,
+        burnin_max_learning_rate
+    ]):
+      if variable_scope is None:
+        var_scope_name = ops.get_default_graph().unique_name(
+            name or default_name)
+        with varscope_ops.variable_scope(var_scope_name) as scope:
+          self._variable_scope = scope
+      else:
+        self._variable_scope = variable_scope
+
+      self._preconditioner_decay_rate = ops.convert_to_tensor(
+          preconditioner_decay_rate, name='preconditioner_decay_rate')
+      self._batch_size = ops.convert_to_tensor(batch_size, name='batch_size')
+      self._total_num_examples = ops.convert_to_tensor(
+          total_num_examples, name='total_num_examples')
+      self._burnin = ops.convert_to_tensor(burnin, name='burnin')
+      self._burnin_max_learning_rate = ops.convert_to_tensor(
+          burnin_max_learning_rate, name='burnin_max_learning_rate')
+      self._max_learning_rate = ops.convert_to_tensor(
+          max_learning_rate, name='max_learning_rate')
+      self._use_single_learning_rate = use_single_learning_rate
+
+      with varscope_ops.variable_scope(self._variable_scope):
+        self._counter = varscope_ops.get_variable(
+            'counter', initializer=0, trainable=False)
+
+      self._preconditioner_decay_rate = control_flow_ops.with_dependencies([
+          check_ops.assert_non_negative(
+              self._preconditioner_decay_rate,
+              message='`preconditioner_decay_rate` must be non-negative'),
+          check_ops.assert_less_equal(
+              self._preconditioner_decay_rate,
+              1.,
+              message='`preconditioner_decay_rate` must be at most 1.'),
+      ], self._preconditioner_decay_rate)
+
+      self._batch_size = control_flow_ops.with_dependencies([
+          check_ops.assert_greater(
+              self._batch_size,
+              0,
+              message='`batch_size` must be greater than zero')
+      ], self._batch_size)
+
+      self._total_num_examples = control_flow_ops.with_dependencies([
+          check_ops.assert_greater(
+              self._total_num_examples,
+              0,
+              message='`total_num_examples` must be greater than zero')
+      ], self._total_num_examples)
+
+      self._burnin = control_flow_ops.with_dependencies([
+          check_ops.assert_non_negative(
+              self._burnin, message='`burnin` must be non-negative'),
+          check_ops.assert_integer(
+              self._burnin, message='`burnin` must be an integer')
+      ], self._burnin)
+
+      self._burnin_max_learning_rate = control_flow_ops.with_dependencies([
+          check_ops.assert_non_negative(
+              self._burnin_max_learning_rate,
+              message='`burnin_max_learning_rate` must be non-negative')
+      ], self._burnin_max_learning_rate)
+
+      self._max_learning_rate = control_flow_ops.with_dependencies([
+          check_ops.assert_non_negative(
+              self._max_learning_rate,
+              message='`max_learning_rate` must be non-negative')
+      ], self._max_learning_rate)
+
+      super(VariationalSGDOptimizer, self).__init__(
+          use_locking=False, name=name or default_name)
+
+  def _create_slots(self, var_list):
+    for v in var_list:
+      init_moment = init_ops.zeros_initializer(dtype=v.dtype)
+      self._get_or_make_slot_with_initializer(
+          v, init_moment, v.get_shape(), v.dtype, 'first_moment', self._name)
+      self._get_or_make_slot_with_initializer(
+          v, init_moment, v.get_shape(), v.dtype, 'second_moment', self._name)
+
+  def _prepare(self):
+    self._decay_tensor = ops.convert_to_tensor(
+        self._preconditioner_decay_rate, name='preconditioner_decay_rate')
+    self._batch_size_tensor = ops.convert_to_tensor(
+        self._batch_size, name='batch_size_tensor')
+
+    super(VariationalSGDOptimizer, self)._prepare()
+
+  def _get_coordinatewise_learning_rate(self, grad, var):
+    # Compute the learning rate using a moving average for the diagonal of BB^T
+    avg_first = self.get_slot(var, 'first_moment')
+    avg_second = self.get_slot(var, 'second_moment')
+    decay_tensor = math_ops.cast(self._decay_tensor, var.dtype)
+    batch_size = math_ops.cast(self._batch_size_tensor, var.dtype)
+
+    # Create an estimator for the moving average of gradient mean and variance
+    # via Welford's algorithm
+    if isinstance(grad, ops.Tensor):
+      delta = grad - avg_first
+      first_moment_update = avg_first.assign_add(
+          array_ops.where(self._counter < 1, math_ops.cast(1, var.dtype),
+                          1. - decay_tensor) * delta)
+
+      with ops.control_dependencies([first_moment_update]):
+        second_moment_update = avg_second.assign_add(
+            math_ops.cast(self._counter < 1, var.dtype) *
+            -(1. - decay_tensor) * (
+                avg_second - decay_tensor  * math_ops.square(delta)))
+      diag_preconditioner = control_flow_ops.with_dependencies(
+          [second_moment_update],
+          clip_ops.clip_by_value(avg_second, 1e-12, 1e12))
+    elif isinstance(grad, ops.IndexedSlices):
+      delta = grad.values - array_ops.gather_nd(avg_first, grad.indices)
+      first_moment_update = state_ops.scatter_add(
+          avg_first,
+          grad.indices,
+          array_ops.where(self._counter < 1,
+                          math_ops.cast(1., var.dtype),
+                          1. - decay_tensor) * delta)
+
+      with ops.control_dependencies([first_moment_update]):
+        avg_second = state_ops.scatter_add(
+            avg_second,
+            grad.indices,
+            math_ops.cast(self._counter < 1, var.dtype) *
+            -(1. - decay_tensor) * (
+                array_ops.gather_nd(avg_second, grad.indices) - decay_tensor *
+                math_ops.square(delta)))
+        avg_second = array_ops.gather_nd(avg_second, grad.indices)
+        # TODO(b/70783772)
+        diag_preconditioner = clip_ops.clip_by_value(avg_second, 1e-12, 1e12)
+    else:
+      raise errors.InvalidArgumentError(
+          None, None, 'grad must of type Tensor or IndexedSlice')
+
+    diag_preconditioner *= batch_size
+
+    if self._use_single_learning_rate:
+      diag_preconditioner = math_ops.reduce_mean(diag_preconditioner)
+
+    # From Theorem 2 Corollary 1 of Mandt et al. 2017
+    return 2. * batch_size / (
+        math_ops.cast(self._total_num_examples, var.dtype.base_dtype) *
+        diag_preconditioner)
+
+  def _apply_dense(self, grad, var):
+
+    max_learning_rate = array_ops.where(self._counter < self._burnin,
+                                        self._burnin_max_learning_rate,
+                                        self._max_learning_rate)
+
+    learn_rates = clip_ops.clip_by_value(
+        self._get_coordinatewise_learning_rate(grad, var), 0.0,
+        math_ops.cast(max_learning_rate, var.dtype.base_dtype))
+
+    newgrad = grad * learn_rates
+    return training_ops.apply_gradient_descent(
+        var,
+        math_ops.cast(1.0, var.dtype),
+        newgrad,
+        use_locking=self._use_locking).op
+
+  def _apply_sparse(self, grad, var):
+
+    max_learning_rate = array_ops.where(self._counter < self._burnin,
+                                        self._burnin_max_learning_rate,
+                                        self._max_learning_rate)
+
+    learn_rate = clip_ops.clip_by_value(
+        self._get_coordinatewise_learning_rate(grad, var), 0.0,
+        math_ops.cast(max_learning_rate, var.dtype))
+    delta = grad.values * learn_rate
+
+    return state_ops.scatter_sub(var, grad.indices, delta,
+                                 use_locking=self._use_locking)
+
+  def _finish(self, update_ops, name_scope):
+    update_ops.append([self._counter.assign_add(1)])
+    return control_flow_ops.group(*update_ops, name=name_scope)
+
+  @property
+  def variable_scope(self):
+    """Variable scope of all calls to `tf.get_variable`."""
+    return self._variable_scope
diff --git a/tensorflow/contrib/boosted_trees/BUILD b/tensorflow/contrib/boosted_trees/BUILD
index 7072f56420ac9e576b20b62c0aa67498857403a7..392ac7fa1ce600a64ee3b941b70b01447645e4aa 100644
--- a/tensorflow/contrib/boosted_trees/BUILD
+++ b/tensorflow/contrib/boosted_trees/BUILD
@@ -601,6 +601,7 @@ py_library(
         ":init_py",
         "//tensorflow/contrib/boosted_trees:gbdt_batch",
         "//tensorflow/contrib/boosted_trees/estimator_batch:custom_export_strategy",
+        "//tensorflow/contrib/boosted_trees/estimator_batch:dnn_tree_combined_estimator",
         "//tensorflow/contrib/boosted_trees/estimator_batch:init_py",
         "//tensorflow/contrib/boosted_trees/estimator_batch:trainer_hooks",
         "//tensorflow/contrib/boosted_trees/lib:categorical_split_handler",
diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/BUILD b/tensorflow/contrib/boosted_trees/estimator_batch/BUILD
index 7792c7127c0285dc2eb5b213da054674f6a81d64..289f5bb3140974d8c37f4938ceef27275b099f9a 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/BUILD
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/BUILD
@@ -50,6 +50,7 @@ py_library(
     deps = [
         "//tensorflow/contrib/learn",
         "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:platform",
         "//tensorflow/python:training",
@@ -129,3 +130,38 @@ py_library(
         "//tensorflow/python:math_ops",
     ],
 )
+
+py_library(
+    name = "dnn_tree_combined_estimator",
+    srcs = ["dnn_tree_combined_estimator.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":trainer_hooks",
+        "//tensorflow/contrib/boosted_trees:gbdt_batch",
+        "//tensorflow/contrib/boosted_trees:model_ops_py",
+        "//tensorflow/contrib/learn",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:state_ops",
+        "//tensorflow/python:training",
+    ],
+)
+
+py_test(
+    name = "dnn_tree_combined_estimator_test",
+    size = "small",
+    srcs = ["dnn_tree_combined_estimator_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_gpu",
+        "no_pip_gpu",
+        "notsan",
+    ],
+    deps = [
+        ":dnn_tree_combined_estimator",
+        "//tensorflow/contrib/boosted_trees:gbdt_batch",
+        "//tensorflow/contrib/layers:layers_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:framework_for_generated_wrappers",
+    ],
+)
diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py
index ef8dee91b6cc05c4c3dd5eb3c81de4fb65b473e3..31f5c444817b9b82723c86bea3504d4934e57eb8 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py
@@ -33,6 +33,8 @@ from tensorflow.python.platform import gfile
 from tensorflow.python.saved_model import loader as saved_model_loader
 from tensorflow.python.saved_model import tag_constants
 
+_SPARSE_FLOAT_FEATURE_NAME_TEMPLATE = "%s_%d"
+
 
 def make_custom_export_strategy(name,
                                 convert_fn,
@@ -147,13 +149,15 @@ def convert_to_universal_format(dtec, sorted_feature_names,
           inequality_test.threshold.float_value = split.threshold
         elif node_type == "sparse_float_binary_split_default_left":
           split = gtflow_node.sparse_float_binary_split_default_left.split
-          node.default_direction = (
-              generic_tree_model_pb2.BinaryNode.LEFT)
-          # TODO(nponomareva): adjust this id assignement when we allow multi-
-          # column sparse tensors.
+          node.default_direction = (generic_tree_model_pb2.BinaryNode.LEFT)
           feature_id = split.feature_column + num_dense
           inequality_test = node.inequality_left_child_test
-          inequality_test.feature_id.id.value = sorted_feature_names[feature_id]
+          inequality_test.feature_id.id.value = (
+              _SPARSE_FLOAT_FEATURE_NAME_TEMPLATE %
+              (sorted_feature_names[feature_id], split.dimension_id))
+          model_and_features.features.pop(sorted_feature_names[feature_id])
+          (model_and_features.features[inequality_test.feature_id.id.value]
+           .SetInParent())
           inequality_test.type = (
               generic_tree_model_pb2.InequalityTest.LESS_OR_EQUAL)
           inequality_test.threshold.float_value = split.threshold
@@ -165,7 +169,12 @@ def convert_to_universal_format(dtec, sorted_feature_names,
           # column sparse tensors.
           feature_id = split.feature_column + num_dense
           inequality_test = node.inequality_left_child_test
-          inequality_test.feature_id.id.value = sorted_feature_names[feature_id]
+          inequality_test.feature_id.id.value = (
+              _SPARSE_FLOAT_FEATURE_NAME_TEMPLATE %
+              (sorted_feature_names[feature_id], split.dimension_id))
+          model_and_features.features.pop(sorted_feature_names[feature_id])
+          (model_and_features.features[inequality_test.feature_id.id.value]
+           .SetInParent())
           inequality_test.type = (
               generic_tree_model_pb2.InequalityTest.LESS_OR_EQUAL)
           inequality_test.threshold.float_value = split.threshold
@@ -201,10 +210,14 @@ def _get_feature_importances(dtec, feature_names, num_dense_floats,
         split_column = feature_names[split.feature_column]
       elif node_type == "sparse_float_binary_split_default_left":
         split = tree_node.sparse_float_binary_split_default_left.split
-        split_column = feature_names[split.feature_column + num_dense_floats]
+        split_column = _SPARSE_FLOAT_FEATURE_NAME_TEMPLATE % (
+            feature_names[split.feature_column + num_dense_floats],
+            split.dimension_id)
       elif node_type == "sparse_float_binary_split_default_right":
         split = tree_node.sparse_float_binary_split_default_right.split
-        split_column = feature_names[split.feature_column + num_dense_floats]
+        split_column = _SPARSE_FLOAT_FEATURE_NAME_TEMPLATE % (
+            feature_names[split.feature_column + num_dense_floats],
+            split.dimension_id)
       elif node_type == "categorical_id_binary_split":
         split = tree_node.categorical_id_binary_split
         split_column = feature_names[split.feature_column + num_dense_floats +
diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy_test.py b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy_test.py
index 4ed18b2d34c5af47826ab1c058f5d13797593bd4..67ec0e16bf815e9dbea6567cc87c3980a825a004 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy_test.py
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy_test.py
@@ -12,7 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for the conversion code from GTFlow format to Chauffeur."""
+"""Tests for the conversion code and for feature importances export.
+
+Tests that cover conversion from TFBT format to a tensorflow.contrib.
+decision_tree generic_tree_model format and feature importances export.
+"""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -95,10 +99,31 @@ class ConvertModelTest(test_util.TensorFlowTestCase):
           }
         }
       }
+      nodes {
+        sparse_float_binary_split_default_right {
+          split {
+            feature_column: 1
+            dimension_id:3
+            threshold: -0.4
+            left_id: 7
+            right_id: 8
+          }
+        }
+        node_metadata {
+            gain: 3600
+        }
+      }
+      nodes {
+        leaf {
+          vector {
+            value: 0.36
+          }
+        }
+      }
       nodes {
         leaf {
           vector {
-            value: 0.3
+            value: 18
           }
         }
       }
@@ -108,17 +133,25 @@ class ConvertModelTest(test_util.TensorFlowTestCase):
     """
     dtec = tree_config_pb2.DecisionTreeEnsembleConfig()
     text_format.Merge(dtec_str, dtec)
-    feature_columns = ["feature_b", "feature_a", "feature_d"]
+    feature_columns = [
+        "feature_b",
+        "feature_a",
+        "feature_a_m",
+        "feature_d",
+    ]
     return dtec, feature_columns
 
   def testConvertModel(self):
     dtec, feature_columns = self._make_trees()
+    # Assume 2 sparse float columns, one with 1 dimension, the second one with
+    # 5 dimensions.
     # The feature columns in the order they were added.
     out = custom_export_strategy.convert_to_universal_format(
-        dtec, feature_columns, 1, 1,
-        1)
+        dtec, feature_columns, 1, 2, 1)
+    # Features a and a_m are sparse float features, a_m is multidimensional.
     expected_tree = """
-    features { key: "feature_a" }
+    features { key: "feature_a_0" }
+    features { key: "feature_a_m_3" }
     features { key: "feature_b" }
     features { key: "feature_d" }
     model {
@@ -169,7 +202,6 @@ class ConvertModelTest(test_util.TensorFlowTestCase):
                   }
                 }
               }
-
               nodes {
                 node_id {
                   value: 1
@@ -196,7 +228,7 @@ class ConvertModelTest(test_util.TensorFlowTestCase):
                   inequality_left_child_test {
                     feature_id {
                       id {
-                        value: "feature_a"
+                        value: "feature_a_0"
                       }
                     }
                     threshold {
@@ -259,14 +291,51 @@ class ConvertModelTest(test_util.TensorFlowTestCase):
                 node_id {
                   value: 6
                 }
+                binary_node {
+                  left_child_id {
+                    value: 7
+                  }
+                  right_child_id {
+                    value: 8
+                  }
+                  default_direction: RIGHT
+                  inequality_left_child_test {
+                      feature_id {
+                        id {
+                          value: "feature_a_m_3"
+                        }
+                      }
+                      threshold {
+                        float_value: -0.4
+                      }
+                  }
+                }
+              }
+              nodes {
+                node_id {
+                  value: 7
+                }
                 leaf {
                   vector {
                     value {
-                      float_value: 0.03
+                      float_value: 0.036
                     }
                   }
                 }
               }
+              nodes {
+                node_id {
+                  value: 8
+                }
+                leaf {
+                  vector {
+                    value {
+                      float_value: 1.8
+                    }
+                  }
+                }
+              }
+
             }
           }
           submodel_id {
@@ -280,12 +349,15 @@ class ConvertModelTest(test_util.TensorFlowTestCase):
   def testFeatureImportance(self):
     dtec, feature_columns = self._make_trees()
     feature_importances = custom_export_strategy._get_feature_importances(
-        dtec, feature_columns, 1, 1, 1)
-    self.assertItemsEqual(["feature_b", "feature_a", "feature_d"],
-                          feature_importances.keys())
+        dtec, feature_columns, 1, 2, 1)
+    self.assertItemsEqual(
+        ["feature_b", "feature_a_0", "feature_a_m_3", "feature_d"],
+        feature_importances.keys())
     self.assertAlmostEqual(50.0, feature_importances["feature_b"], places=4)
-    self.assertAlmostEqual(50.0, feature_importances["feature_a"], places=4)
+    self.assertAlmostEqual(50.0, feature_importances["feature_a_0"], places=4)
     self.assertAlmostEqual(50.0, feature_importances["feature_d"], places=4)
+    self.assertAlmostEqual(
+        360.0, feature_importances["feature_a_m_3"], places=4)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator.py b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator.py
new file mode 100644
index 0000000000000000000000000000000000000000..cec3892b57655dc967b4e7926f7f5a6a30084487
--- /dev/null
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator.py
@@ -0,0 +1,515 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TensorFlow estimators for combined DNN + GBDT training model.
+
+The combined model trains a DNN first, then trains boosted trees to boost the
+logits of the DNN. The input layer of the DNN (including the embeddings learned
+over sparse features) can optionally be provided to the boosted trees as
+an additional input feature.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import six
+
+from tensorflow.contrib import layers
+from tensorflow.contrib.boosted_trees.estimator_batch import trainer_hooks
+from tensorflow.contrib.boosted_trees.python.ops import model_ops
+from tensorflow.contrib.boosted_trees.python.training.functions import gbdt_batch
+from tensorflow.contrib.layers.python.layers import optimizers
+from tensorflow.contrib.learn.python.learn.estimators import estimator
+from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
+from tensorflow.contrib.learn.python.learn.estimators import model_fn
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import partitioned_variables
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.summary import summary
+from tensorflow.python.training import training_util
+
+
+_DNN_LEARNING_RATE = 0.001
+
+
+def _get_optimizer(optimizer):
+  if callable(optimizer):
+    return optimizer()
+  else:
+    return optimizer
+
+
+def _add_hidden_layer_summary(value, tag):
+  summary.scalar("%s_fraction_of_zero_values" % tag, nn.zero_fraction(value))
+  summary.histogram("%s_activation" % tag, value)
+
+
+def _dnn_tree_combined_model_fn(
+    features, labels, mode, head, dnn_hidden_units,
+    dnn_feature_columns, tree_learner_config, num_trees,
+    tree_examples_per_layer,
+    config=None, dnn_optimizer="Adagrad",
+    dnn_activation_fn=nn.relu, dnn_dropout=None,
+    dnn_input_layer_partitioner=None,
+    dnn_input_layer_to_tree=True, dnn_steps_to_train=10000,
+    tree_feature_columns=None,
+    tree_center_bias=True):
+  """DNN and GBDT combined model_fn.
+
+  Args:
+    features: `dict` of `Tensor` objects.
+    labels: Labels used to train on.
+    mode: Mode we are in. (TRAIN/EVAL/INFER)
+    head: A `Head` instance.
+    dnn_hidden_units: List of hidden units per layer.
+    dnn_feature_columns: An iterable containing all the feature columns
+      used by the model's DNN.
+    tree_learner_config: A config for the tree learner.
+    num_trees: Number of trees to grow model to after training DNN.
+    tree_examples_per_layer: Number of examples to accumulate before
+      growing the tree a layer. This value has a big impact on model
+      quality and should be set equal to the number of examples in
+      training dataset if possible. It can also be a function that computes
+      the number of examples based on the depth of the layer that's
+      being built.
+    config: `RunConfig` of the estimator.
+    dnn_optimizer: string, `Optimizer` object, or callable that defines the
+      optimizer to use for training the DNN. If `None`, will use the Adagrad
+      optimizer with default learning rate of 0.001.
+    dnn_activation_fn: Activation function applied to each layer of the DNN.
+      If `None`, will use `tf.nn.relu`.
+    dnn_dropout: When not `None`, the probability to drop out a given
+      unit in the DNN.
+    dnn_input_layer_partitioner: Partitioner for input layer of the DNN.
+      Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
+    dnn_input_layer_to_tree: Whether to provide the DNN's input layer
+    as a feature to the tree.
+    dnn_steps_to_train: Number of steps to train dnn for before switching
+      to gbdt.
+    tree_feature_columns: An iterable containing all the feature columns
+      used by the model's boosted trees. If dnn_input_layer_to_tree is
+      set to True, these features are in addition to dnn_feature_columns.
+    tree_center_bias: Whether a separate tree should be created for
+      first fitting the bias.
+
+  Returns:
+    A `ModelFnOps` object.
+  Raises:
+    ValueError: if inputs are not valid.
+  """
+  if not isinstance(features, dict):
+    raise ValueError("features should be a dictionary of `Tensor`s. "
+                     "Given type: {}".format(type(features)))
+
+  if not dnn_feature_columns:
+    raise ValueError("dnn_feature_columns must be specified")
+
+  # Build DNN Logits.
+  dnn_parent_scope = "dnn"
+  dnn_partitioner = dnn_input_layer_partitioner or (
+      partitioned_variables.min_max_variable_partitioner(
+          max_partitions=config.num_ps_replicas,
+          min_slice_size=64 << 20))
+
+  with variable_scope.variable_scope(
+      dnn_parent_scope,
+      values=tuple(six.itervalues(features)),
+      partitioner=dnn_partitioner):
+
+    with variable_scope.variable_scope(
+        "input_from_feature_columns",
+        values=tuple(six.itervalues(features)),
+        partitioner=dnn_partitioner) as input_layer_scope:
+      input_layer = layers.input_from_feature_columns(
+          columns_to_tensors=features,
+          feature_columns=dnn_feature_columns,
+          weight_collections=[dnn_parent_scope],
+          scope=input_layer_scope)
+    previous_layer = input_layer
+    for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
+      with variable_scope.variable_scope(
+          "hiddenlayer_%d" % layer_id,
+          values=(previous_layer,)) as hidden_layer_scope:
+        net = layers.fully_connected(
+            previous_layer,
+            num_hidden_units,
+            activation_fn=dnn_activation_fn,
+            variables_collections=[dnn_parent_scope],
+            scope=hidden_layer_scope)
+        if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
+          net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout))
+      _add_hidden_layer_summary(net, hidden_layer_scope.name)
+      previous_layer = net
+    with variable_scope.variable_scope(
+        "logits",
+        values=(previous_layer,)) as logits_scope:
+      dnn_logits = layers.fully_connected(
+          previous_layer,
+          head.logits_dimension,
+          activation_fn=None,
+          variables_collections=[dnn_parent_scope],
+          scope=logits_scope)
+    _add_hidden_layer_summary(dnn_logits, logits_scope.name)
+
+    def _dnn_train_op_fn(loss):
+      """Returns the op to optimize the loss."""
+      return optimizers.optimize_loss(
+          loss=loss,
+          global_step=training_util.get_global_step(),
+          learning_rate=_DNN_LEARNING_RATE,
+          optimizer=_get_optimizer(dnn_optimizer),
+          name=dnn_parent_scope,
+          variables=ops.get_collection(
+              ops.GraphKeys.TRAINABLE_VARIABLES,
+              scope=dnn_parent_scope),
+          # Empty summaries to prevent optimizers from logging training_loss.
+          summaries=[])
+
+  # Build Tree Logits.
+  global_step = training_util.get_global_step()
+  with ops.device(global_step.device):
+    ensemble_handle = model_ops.tree_ensemble_variable(
+        stamp_token=0,
+        tree_ensemble_config="",  # Initialize an empty ensemble.
+        name="ensemble_model")
+
+  tree_features = features.copy()
+  if dnn_input_layer_to_tree:
+    tree_features["dnn_input_layer"] = input_layer
+    tree_feature_columns.append(layers.real_valued_column("dnn_input_layer"))
+  gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel(
+      is_chief=config.is_chief,
+      num_ps_replicas=config.num_ps_replicas,
+      ensemble_handle=ensemble_handle,
+      center_bias=tree_center_bias,
+      examples_per_layer=tree_examples_per_layer,
+      learner_config=tree_learner_config,
+      feature_columns=tree_feature_columns,
+      logits_dimension=head.logits_dimension,
+      features=tree_features)
+
+  with ops.name_scope("gbdt"):
+    predictions_dict = gbdt_model.predict(mode)
+    tree_logits = predictions_dict["predictions"]
+
+    def _tree_train_op_fn(loss):
+      """Returns the op to optimize the loss."""
+      update_op = gbdt_model.train(loss, predictions_dict, labels)
+      with ops.control_dependencies(
+          [update_op]), (ops.colocate_with(global_step)):
+        update_op = state_ops.assign_add(global_step, 1).op
+        return update_op
+
+  tree_train_logits = dnn_logits + tree_logits
+
+  def _no_train_op_fn(loss):
+    """Returns a no-op."""
+    del loss
+    return control_flow_ops.no_op()
+
+  model_fn_ops = head.create_model_fn_ops(
+      features=features,
+      mode=mode,
+      labels=labels,
+      train_op_fn=_no_train_op_fn,
+      logits=tree_train_logits)
+  dnn_train_op = head.create_model_fn_ops(
+      features=features,
+      mode=mode,
+      labels=labels,
+      train_op_fn=_dnn_train_op_fn,
+      logits=dnn_logits).train_op
+  tree_train_op = head.create_model_fn_ops(
+      features=tree_features,
+      mode=mode,
+      labels=labels,
+      train_op_fn=_tree_train_op_fn,
+      logits=tree_train_logits).train_op
+
+  if tree_center_bias:
+    num_trees += 1
+  finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor()
+
+  model_fn_ops.training_hooks.extend([
+      trainer_hooks.SwitchTrainOp(
+          dnn_train_op, dnn_steps_to_train, tree_train_op),
+      trainer_hooks.StopAfterNTrees(
+          num_trees, attempted_trees, finalized_trees)])
+
+  return model_fn_ops
+
+
+class DNNBoostedTreeCombinedClassifier(estimator.Estimator):
+  """A classifier that uses a combined DNN/GBDT model."""
+
+  def __init__(self,
+               dnn_hidden_units,
+               dnn_feature_columns,
+               tree_learner_config,
+               num_trees,
+               tree_examples_per_layer,
+               n_classes=2,
+               weight_column_name=None,
+               model_dir=None,
+               config=None,
+               label_name=None,
+               label_keys=None,
+               feature_engineering_fn=None,
+               dnn_optimizer="Adagrad",
+               dnn_activation_fn=nn.relu,
+               dnn_dropout=None,
+               dnn_input_layer_partitioner=None,
+               dnn_input_layer_to_tree=True,
+               dnn_steps_to_train=10000,
+               tree_feature_columns=None,
+               tree_center_bias=True):
+    """Initializes a DNNBoostedTreeCombinedClassifier instance.
+
+    Args:
+      dnn_hidden_units: List of hidden units per layer for DNN.
+      dnn_feature_columns: An iterable containing all the feature columns
+        used by the model's DNN.
+      tree_learner_config: A config for the tree learner.
+      num_trees: Number of trees to grow model to after training DNN.
+      tree_examples_per_layer: Number of examples to accumulate before
+        growing the tree a layer. This value has a big impact on model
+        quality and should be set equal to the number of examples in
+        training dataset if possible. It can also be a function that computes
+        the number of examples based on the depth of the layer that's
+        being built.
+      n_classes: The number of label classes.
+      weight_column_name: The name of weight column.
+      model_dir: Directory for model exports.
+      config: `RunConfig` of the estimator.
+      label_name: String, name of the key in label dict. Can be null if label
+        is a tensor (single headed models).
+      label_keys: Optional list of strings with size `[n_classes]` defining the
+        label vocabulary. Only supported for `n_classes` > 2.
+      feature_engineering_fn: Feature engineering function. Takes features and
+        labels which are the output of `input_fn` and returns features and
+        labels which will be fed into the model.
+      dnn_optimizer: string, `Optimizer` object, or callable that defines the
+        optimizer to use for training the DNN. If `None`, will use the Adagrad
+        optimizer with default learning rate.
+      dnn_activation_fn: Activation function applied to each layer of the DNN.
+        If `None`, will use `tf.nn.relu`.
+      dnn_dropout: When not `None`, the probability to drop out a given
+        unit in the DNN.
+      dnn_input_layer_partitioner: Partitioner for input layer of the DNN.
+        Defaults to `min_max_variable_partitioner` with `min_slice_size`
+        64 << 20.
+      dnn_input_layer_to_tree: Whether to provide the DNN's input layer
+      as a feature to the tree.
+      dnn_steps_to_train: Number of steps to train dnn for before switching
+        to gbdt.
+      tree_feature_columns: An iterable containing all the feature columns
+        used by the model's boosted trees. If dnn_input_layer_to_tree is
+        set to True, these features are in addition to dnn_feature_columns.
+      tree_center_bias: Whether a separate tree should be created for
+        first fitting the bias.
+    """
+    head = head_lib.multi_class_head(
+        n_classes=n_classes,
+        label_name=label_name,
+        label_keys=label_keys,
+        weight_column_name=weight_column_name,
+        enable_centered_bias=False)
+
+    def _model_fn(features, labels, mode, config):
+      return _dnn_tree_combined_model_fn(
+          features, labels, mode, head, dnn_hidden_units, dnn_feature_columns,
+          tree_learner_config, num_trees, tree_examples_per_layer, config,
+          dnn_optimizer, dnn_activation_fn, dnn_dropout,
+          dnn_input_layer_partitioner, dnn_input_layer_to_tree,
+          dnn_steps_to_train,
+          tree_feature_columns, tree_center_bias)
+
+    super(DNNBoostedTreeCombinedClassifier, self).__init__(
+        model_fn=_model_fn, model_dir=model_dir,
+        config=config, feature_engineering_fn=feature_engineering_fn)
+
+
+class DNNBoostedTreeCombinedRegressor(estimator.Estimator):
+  """A regressor that uses a combined DNN/GBDT model."""
+
+  def __init__(self,
+               dnn_hidden_units,
+               dnn_feature_columns,
+               tree_learner_config,
+               num_trees,
+               tree_examples_per_layer,
+               weight_column_name=None,
+               model_dir=None,
+               config=None,
+               label_name=None,
+               label_dimension=1,
+               feature_engineering_fn=None,
+               dnn_optimizer="Adagrad",
+               dnn_activation_fn=nn.relu,
+               dnn_dropout=None,
+               dnn_input_layer_partitioner=None,
+               dnn_input_layer_to_tree=True,
+               dnn_steps_to_train=10000,
+               tree_feature_columns=None,
+               tree_center_bias=True):
+    """Initializes a DNNBoostedTreeCombinedRegressor instance.
+
+    Args:
+      dnn_hidden_units: List of hidden units per layer for DNN.
+      dnn_feature_columns: An iterable containing all the feature columns
+        used by the model's DNN.
+      tree_learner_config: A config for the tree learner.
+      num_trees: Number of trees to grow model to after training DNN.
+      tree_examples_per_layer: Number of examples to accumulate before
+        growing the tree a layer. This value has a big impact on model
+        quality and should be set equal to the number of examples in
+        training dataset if possible. It can also be a function that computes
+        the number of examples based on the depth of the layer that's
+        being built.
+      weight_column_name: The name of weight column.
+      model_dir: Directory for model exports.
+      config: `RunConfig` of the estimator.
+      label_name: String, name of the key in label dict. Can be null if label
+        is a tensor (single headed models).
+      label_dimension: Number of regression labels per example. This is the size
+        of the last dimension of the labels `Tensor` (typically, this has shape
+        `[batch_size, label_dimension]`).
+      feature_engineering_fn: Feature engineering function. Takes features and
+        labels which are the output of `input_fn` and returns features and
+        labels which will be fed into the model.
+      dnn_optimizer: string, `Optimizer` object, or callable that defines the
+        optimizer to use for training the DNN. If `None`, will use the Adagrad
+        optimizer with default learning rate.
+      dnn_activation_fn: Activation function applied to each layer of the DNN.
+        If `None`, will use `tf.nn.relu`.
+      dnn_dropout: When not `None`, the probability to drop out a given
+        unit in the DNN.
+      dnn_input_layer_partitioner: Partitioner for input layer of the DNN.
+        Defaults to `min_max_variable_partitioner` with `min_slice_size`
+        64 << 20.
+      dnn_input_layer_to_tree: Whether to provide the DNN's input layer
+      as a feature to the tree.
+      dnn_steps_to_train: Number of steps to train dnn for before switching
+        to gbdt.
+      tree_feature_columns: An iterable containing all the feature columns
+        used by the model's boosted trees. If dnn_input_layer_to_tree is
+        set to True, these features are in addition to dnn_feature_columns.
+      tree_center_bias: Whether a separate tree should be created for
+        first fitting the bias.
+    """
+    head = head_lib.regression_head(
+        label_name=label_name,
+        label_dimension=label_dimension,
+        weight_column_name=weight_column_name,
+        enable_centered_bias=False)
+
+    # num_classes needed for GradientBoostedDecisionTreeModel
+    if label_dimension == 1:
+      tree_learner_config.num_classes = 2
+    else:
+      tree_learner_config.num_classes = label_dimension
+
+    def _model_fn(features, labels, mode, config):
+      return _dnn_tree_combined_model_fn(
+          features, labels, mode, head, dnn_hidden_units, dnn_feature_columns,
+          tree_learner_config, num_trees, tree_examples_per_layer, config,
+          dnn_optimizer, dnn_activation_fn, dnn_dropout,
+          dnn_input_layer_partitioner, dnn_input_layer_to_tree,
+          dnn_steps_to_train, tree_feature_columns, tree_center_bias)
+
+    super(DNNBoostedTreeCombinedRegressor, self).__init__(
+        model_fn=_model_fn, model_dir=model_dir,
+        config=config, feature_engineering_fn=feature_engineering_fn)
+
+
+class DNNBoostedTreeCombinedEstimator(estimator.Estimator):
+  """An estimator that uses a combined DNN/GBDT model.
+
+  Useful for training with user specified `Head`.
+  """
+
+  def __init__(self,
+               dnn_hidden_units,
+               dnn_feature_columns,
+               tree_learner_config,
+               num_trees,
+               tree_examples_per_layer,
+               head,
+               model_dir=None,
+               config=None,
+               feature_engineering_fn=None,
+               dnn_optimizer="Adagrad",
+               dnn_activation_fn=nn.relu,
+               dnn_dropout=None,
+               dnn_input_layer_partitioner=None,
+               dnn_input_layer_to_tree=True,
+               dnn_steps_to_train=10000,
+               tree_feature_columns=None,
+               tree_center_bias=True):
+    """Initializes a DNNBoostedTreeCombinedEstimator instance.
+
+    Args:
+      dnn_hidden_units: List of hidden units per layer for DNN.
+      dnn_feature_columns: An iterable containing all the feature columns
+        used by the model's DNN.
+      tree_learner_config: A config for the tree learner.
+      num_trees: Number of trees to grow model to after training DNN.
+      tree_examples_per_layer: Number of examples to accumulate before
+        growing the tree a layer. This value has a big impact on model
+        quality and should be set equal to the number of examples in
+        training dataset if possible. It can also be a function that computes
+        the number of examples based on the depth of the layer that's
+        being built.
+      head: `Head` instance.
+      model_dir: Directory for model exports.
+      config: `RunConfig` of the estimator.
+      feature_engineering_fn: Feature engineering function. Takes features and
+        labels which are the output of `input_fn` and returns features and
+        labels which will be fed into the model.
+      dnn_optimizer: string, `Optimizer` object, or callable that defines the
+        optimizer to use for training the DNN. If `None`, will use the Adagrad
+        optimizer with default learning rate.
+      dnn_activation_fn: Activation function applied to each layer of the DNN.
+        If `None`, will use `tf.nn.relu`.
+      dnn_dropout: When not `None`, the probability to drop out a given
+        unit in the DNN.
+      dnn_input_layer_partitioner: Partitioner for input layer of the DNN.
+        Defaults to `min_max_variable_partitioner` with `min_slice_size`
+        64 << 20.
+      dnn_input_layer_to_tree: Whether to provide the DNN's input layer
+      as a feature to the tree.
+      dnn_steps_to_train: Number of steps to train dnn for before switching
+        to gbdt.
+      tree_feature_columns: An iterable containing all the feature columns
+        used by the model's boosted trees. If dnn_input_layer_to_tree is
+        set to True, these features are in addition to dnn_feature_columns.
+      tree_center_bias: Whether a separate tree should be created for
+        first fitting the bias.
+    """
+    def _model_fn(features, labels, mode, config):
+      return _dnn_tree_combined_model_fn(
+          features, labels, mode, head, dnn_hidden_units, dnn_feature_columns,
+          tree_learner_config, num_trees, tree_examples_per_layer, config,
+          dnn_optimizer, dnn_activation_fn, dnn_dropout,
+          dnn_input_layer_partitioner, dnn_input_layer_to_tree,
+          dnn_steps_to_train,
+          tree_feature_columns, tree_center_bias)
+
+    super(DNNBoostedTreeCombinedEstimator, self).__init__(
+        model_fn=_model_fn, model_dir=model_dir,
+        config=config, feature_engineering_fn=feature_engineering_fn)
diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..83d58c561008e8a5a69eb503d1605bb9e940f281
--- /dev/null
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py
@@ -0,0 +1,105 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for combined DNN + GBDT estimators."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tempfile
+
+from tensorflow.contrib.boosted_trees.estimator_batch import dnn_tree_combined_estimator as estimator
+from tensorflow.contrib.boosted_trees.proto import learner_pb2
+from tensorflow.contrib.layers.python.layers import feature_column
+from tensorflow.contrib.learn.python.learn.estimators import estimator_test_utils
+from tensorflow.contrib.learn.python.learn.estimators import run_config
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import test_util
+from tensorflow.python.platform import googletest
+
+
+def _train_input_fn():
+  features = {
+      "x": constant_op.constant([[2.], [1.], [1.]])
+  }
+  label = constant_op.constant([[1], [0], [0]], dtype=dtypes.int32)
+  return features, label
+
+
+def _eval_input_fn():
+  features = {
+      "x": constant_op.constant([[1.], [2.], [2.]])
+  }
+  label = constant_op.constant([[0], [1], [1]], dtype=dtypes.int32)
+  return features, label
+
+
+class DNNBoostedTreeCombinedTest(test_util.TensorFlowTestCase):
+
+  def testClassifierContract(self):
+    estimator_test_utils.assert_estimator_contract(
+        self, estimator.DNNBoostedTreeCombinedClassifier)
+
+  def testRegressorContract(self):
+    estimator_test_utils.assert_estimator_contract(
+        self, estimator.DNNBoostedTreeCombinedRegressor)
+
+  def testEstimatorContract(self):
+    estimator_test_utils.assert_estimator_contract(
+        self, estimator.DNNBoostedTreeCombinedEstimator)
+
+  def testNoDNNFeatureColumns(self):
+    learner_config = learner_pb2.LearnerConfig()
+    learner_config.num_classes = 2
+
+    with self.assertRaisesRegexp(
+        ValueError,
+        "dnn_feature_columns must be specified"):
+      classifier = estimator.DNNBoostedTreeCombinedClassifier(
+          dnn_hidden_units=[1],
+          dnn_feature_columns=[],
+          tree_learner_config=learner_config,
+          num_trees=1,
+          tree_examples_per_layer=3,
+          n_classes=2)
+      classifier.fit(input_fn=_train_input_fn, steps=5)
+
+  def testFitAndEvaluateDontThrowException(self):
+    learner_config = learner_pb2.LearnerConfig()
+    learner_config.num_classes = 2
+    learner_config.constraints.max_tree_depth = 1
+    model_dir = tempfile.mkdtemp()
+    config = run_config.RunConfig()
+
+    classifier = estimator.DNNBoostedTreeCombinedClassifier(
+        dnn_hidden_units=[1],
+        dnn_feature_columns=[feature_column.real_valued_column("x")],
+        tree_learner_config=learner_config,
+        num_trees=1,
+        tree_examples_per_layer=3,
+        n_classes=2,
+        model_dir=model_dir,
+        config=config,
+        dnn_steps_to_train=10,
+        dnn_input_layer_to_tree=False,
+        tree_feature_columns=[feature_column.real_valued_column("x")])
+
+    classifier.fit(input_fn=_train_input_fn, steps=15)
+    classifier.evaluate(input_fn=_eval_input_fn, steps=1)
+
+
+if __name__ == "__main__":
+  googletest.main()
diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/trainer_hooks.py b/tensorflow/contrib/boosted_trees/estimator_batch/trainer_hooks.py
index 79193fffc3d3fa97e20a12181bf20e6ad86dcb58..2e4151cac40f770e2bece70d752122eb7f34dd40 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/trainer_hooks.py
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/trainer_hooks.py
@@ -24,6 +24,7 @@ from tensorflow.contrib.learn.python.learn import session_run_hook
 from tensorflow.contrib.learn.python.learn.session_run_hook import SessionRunArgs
 from tensorflow.core.framework.summary_pb2 import Summary
 from tensorflow.python.framework import ops
+from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import training_util
 from tensorflow.python.training.summary_io import SummaryWriterCache
@@ -175,3 +176,40 @@ class StopAfterNTrees(session_run_hook.SessionRunHook):
       logging.info("Requesting stop since we have reached %d trees.",
                    num_finalized_trees)
       run_context.request_stop()
+
+
+class SwitchTrainOp(session_run_hook.SessionRunHook):
+  """Hook that switches the train op after specified number of steps.
+
+  Hook that replaces the train op depending on the number of steps of training
+  that have taken place. The first_train_op is used till train_steps steps
+  are reached. Thereafter the second_train_op is used.
+  """
+
+  def __init__(self, first_train_op, train_steps, second_train_op):
+    """Initializes a `SwitchTrainOp`."""
+    self._first_train_op = first_train_op
+    self._second_train_op = second_train_op
+    self._train_steps = train_steps
+
+  def _get_train_op_for_global_step(self, current_step):
+    """Gets train_op for current global step."""
+    if current_step < self._train_steps:
+      return self._first_train_op
+    return self._second_train_op
+
+  def begin(self):
+    self._global_step_tensor = training_util.get_global_step()
+    self._current_train_op = control_flow_ops.no_op()
+    if self._global_step_tensor is None:
+      raise RuntimeError(
+          "Global step should be created to use SwitchTrainOp.")
+
+  def before_run(self, run_context):  # pylint: disable=unused-argument
+    return session_run_hook.SessionRunArgs(
+        {"global_step": self._global_step_tensor,
+         "train_op": self._current_train_op})
+
+  def after_run(self, run_context, run_values):
+    self._current_train_op = self._get_train_op_for_global_step(
+        run_values.results["global_step"])
diff --git a/tensorflow/contrib/boosted_trees/examples/boston_combined.py b/tensorflow/contrib/boosted_trees/examples/boston_combined.py
new file mode 100644
index 0000000000000000000000000000000000000000..e04b56afbfd266dc13a5b0d78d171ea273415ee3
--- /dev/null
+++ b/tensorflow/contrib/boosted_trees/examples/boston_combined.py
@@ -0,0 +1,165 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Regression on Boston housing data using DNNBoostedTreeCombinedRegressor.
+
+  Example Usage:
+
+  python tensorflow/contrib/boosted_trees/examples/boston_combined.py \
+  --batch_size=404 --output_dir="/tmp/boston" \
+  --dnn_hidden_units="8,4" --dnn_steps_to_train=1000 \
+  --tree_depth=4 --tree_learning_rate=0.1 \
+  --num_trees=100 --tree_l2=0.001 --num_eval_steps=1 \
+  --vmodule=training_ops=1
+
+  When training is done, mean squared error on eval data is reported.
+  Point tensorboard to the directory for the run to see how the training
+  progresses:
+
+  tensorboard --logdir=/tmp/boston
+
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import sys
+import tensorflow as tf
+
+from tensorflow.contrib.boosted_trees.estimator_batch.dnn_tree_combined_estimator import DNNBoostedTreeCombinedRegressor
+from tensorflow.contrib.boosted_trees.proto import learner_pb2
+from tensorflow.contrib.layers.python.layers import feature_column
+from tensorflow.contrib.learn.python.learn import learn_runner
+from tensorflow.contrib.learn.python.learn.utils import input_fn_utils
+from tensorflow.contrib.learn.python.learn.utils import saved_model_export_utils
+
+_BOSTON_NUM_FEATURES = 13
+
+
+def _get_estimator(output_dir, feature_cols):
+  """Configures DNNBoostedTreeCombinedRegressor based on flags."""
+  learner_config = learner_pb2.LearnerConfig()
+  learner_config.learning_rate_tuner.fixed.learning_rate = (
+      FLAGS.tree_learning_rate)
+  learner_config.regularization.l1 = 0.0
+  learner_config.regularization.l2 = FLAGS.tree_l2
+  learner_config.constraints.max_tree_depth = FLAGS.tree_depth
+
+  run_config = tf.contrib.learn.RunConfig(save_summary_steps=1)
+
+  # Create a DNNBoostedTreeCombinedRegressor estimator.
+  estimator = DNNBoostedTreeCombinedRegressor(
+      dnn_hidden_units=[int(x) for x in FLAGS.dnn_hidden_units.split(",")],
+      dnn_feature_columns=feature_cols,
+      tree_learner_config=learner_config,
+      num_trees=FLAGS.num_trees,
+      # This should be the number of examples. For large datasets it can be
+      # larger than the batch_size.
+      tree_examples_per_layer=FLAGS.batch_size,
+      model_dir=output_dir,
+      config=run_config,
+      dnn_input_layer_to_tree=True,
+      dnn_steps_to_train=FLAGS.dnn_steps_to_train)
+  return estimator
+
+
+def _make_experiment_fn(output_dir):
+  """Creates experiment for DNNBoostedTreeCombinedRegressor."""
+  (x_train, y_train), (x_test,
+                       y_test) = tf.keras.datasets.boston_housing.load_data()
+
+  train_input_fn = tf.estimator.inputs.numpy_input_fn(
+      x={"x": x_train},
+      y=y_train,
+      batch_size=FLAGS.batch_size,
+      num_epochs=None,
+      shuffle=True)
+  eval_input_fn = tf.estimator.inputs.numpy_input_fn(
+      x={"x": x_test}, y=y_test, num_epochs=1, shuffle=False)
+
+  feature_columns = [
+      feature_column.real_valued_column("x", dimension=_BOSTON_NUM_FEATURES)
+  ]
+  feature_spec = tf.contrib.layers.create_feature_spec_for_parsing(
+      feature_columns)
+  serving_input_fn = input_fn_utils.build_parsing_serving_input_fn(feature_spec)
+  export_strategies = [
+      saved_model_export_utils.make_export_strategy(serving_input_fn)]
+  return tf.contrib.learn.Experiment(
+      estimator=_get_estimator(output_dir, feature_columns),
+      train_input_fn=train_input_fn,
+      eval_input_fn=eval_input_fn,
+      train_steps=None,
+      eval_steps=FLAGS.num_eval_steps,
+      eval_metrics=None,
+      export_strategies=export_strategies)
+
+
+def main(unused_argv):
+  learn_runner.run(
+      experiment_fn=_make_experiment_fn,
+      output_dir=FLAGS.output_dir,
+      schedule="train_and_evaluate")
+
+
+if __name__ == "__main__":
+  tf.logging.set_verbosity(tf.logging.INFO)
+  parser = argparse.ArgumentParser()
+  # Define the list of flags that users can change.
+  parser.add_argument(
+      "--batch_size",
+      type=int,
+      default=1000,
+      help="The batch size for reading data.")
+  parser.add_argument(
+      "--output_dir",
+      type=str,
+      required=True,
+      help="Choose the dir for the output.")
+  parser.add_argument(
+      "--num_eval_steps",
+      type=int,
+      default=1,
+      help="The number of steps to run evaluation for.")
+  # Flags for configuring DNNBoostedTreeCombinedRegressor.
+  parser.add_argument(
+      "--dnn_hidden_units",
+      type=str,
+      default="8,4",
+      help="Hidden layers for DNN.")
+  parser.add_argument(
+      "--dnn_steps_to_train",
+      type=int,
+      default=1000,
+      help="Number of steps to train DNN.")
+  parser.add_argument(
+      "--tree_depth", type=int, default=4, help="Maximum depth of trees.")
+  parser.add_argument(
+      "--tree_l2", type=float, default=1.0, help="l2 regularization per batch.")
+  parser.add_argument(
+      "--tree_learning_rate",
+      type=float,
+      default=0.1,
+      help=("Learning rate (shrinkage weight) with which each "
+            "new tree is added."))
+  parser.add_argument(
+      "--num_trees",
+      type=int,
+      default=None,
+      required=True,
+      help="Number of trees to grow before stopping.")
+
+  FLAGS, unparsed = parser.parse_known_args()
+  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/contrib/boosted_trees/kernels/training_ops.cc b/tensorflow/contrib/boosted_trees/kernels/training_ops.cc
index c77d90e243c304ec8e9a10a0b63401f9bd825c3e..7f8dea1d3c2a04b725843f6e2932a0cdfbc7733c 100644
--- a/tensorflow/contrib/boosted_trees/kernels/training_ops.cc
+++ b/tensorflow/contrib/boosted_trees/kernels/training_ops.cc
@@ -361,10 +361,27 @@ class GrowTreeEnsembleOp : public OpKernel {
     // Increment attempt stats.
     ensemble_resource->IncrementAttempts();
 
+    // In case we want to do feature selection and we have reached the limit,
+    // build a list of handlers used so far to avoid adding new features.
+    std::vector<int64> allowed_handlers;
+    if (learner_config_.constraints().max_number_of_unique_feature_columns() >
+        0) {
+      allowed_handlers = ensemble_resource->GetUsedHandlers();
+      // TODO(soroush): We can disable handlers that are not going to be used to
+      // avoid unnecessary computations.
+      if (allowed_handlers.size() <
+          learner_config_.constraints()
+              .max_number_of_unique_feature_columns()) {
+        // We have not reached the limit yet. Empty the list of allow features
+        // which means we can keep adding new features.
+        allowed_handlers.clear();
+      }
+    }
+
     // Find best splits for each active partition.
     std::map<int32, SplitCandidate> best_splits;
-    FindBestSplitsPerPartition(context, partition_ids_list, gains_list,
-                               splits_list, &best_splits);
+    FindBestSplitsPerPartition(context, allowed_handlers, partition_ids_list,
+                               gains_list, splits_list, &best_splits);
 
     // No-op if no new splits can be considered.
     if (best_splits.empty()) {
@@ -381,7 +398,8 @@ class GrowTreeEnsembleOp : public OpKernel {
 
     // Split tree nodes.
     for (auto& split_entry : best_splits) {
-      SplitTreeNode(split_entry.first, &split_entry.second, tree_config);
+      SplitTreeNode(split_entry.first, &split_entry.second, tree_config,
+                    ensemble_resource);
     }
 
     // Post-prune finalized tree if needed.
@@ -403,12 +421,20 @@ class GrowTreeEnsembleOp : public OpKernel {
   // Helper method which effectively does a reduce over all split candidates
   // and finds the best split for each partition.
   void FindBestSplitsPerPartition(
-      OpKernelContext* const context, const OpInputList& partition_ids_list,
-      const OpInputList& gains_list, const OpInputList& splits_list,
+      OpKernelContext* const context,
+      const std::vector<int64>& allowed_handlers,  // Empty means all handlers.
+      const OpInputList& partition_ids_list, const OpInputList& gains_list,
+      const OpInputList& splits_list,
       std::map<int32, SplitCandidate>* best_splits) {
     // Find best split per partition going through every feature candidate.
     // TODO(salehay): Is this worth parallelizing?
     for (int64 handler_id = 0; handler_id < num_handlers_; ++handler_id) {
+      if (!allowed_handlers.empty()) {
+        if (!std::binary_search(allowed_handlers.begin(),
+                                allowed_handlers.end(), handler_id)) {
+          continue;
+        }
+      }
       const auto& partition_ids = partition_ids_list[handler_id].vec<int32>();
       const auto& gains = gains_list[handler_id].vec<float>();
       const auto& splits = splits_list[handler_id].vec<string>();
@@ -592,8 +618,10 @@ class GrowTreeEnsembleOp : public OpKernel {
 
   // Helper method to split a tree node and append its respective
   // leaf children given the split candidate.
-  void SplitTreeNode(const int32 node_id, SplitCandidate* split,
-                     boosted_trees::trees::DecisionTreeConfig* tree_config) {
+  void SplitTreeNode(
+      const int32 node_id, SplitCandidate* split,
+      boosted_trees::trees::DecisionTreeConfig* tree_config,
+      boosted_trees::models::DecisionTreeEnsembleResource* ensemble_resource) {
     // No-op if we have no real node.
     CHECK(node_id < tree_config->nodes_size())
         << "Invalid node " << node_id << " to split.";
@@ -633,6 +661,9 @@ class GrowTreeEnsembleOp : public OpKernel {
     // Replace node in tree.
     (*tree_config->mutable_nodes(node_id)) =
         *split->split_info.mutable_split_node();
+    if (learner_config_.constraints().max_number_of_unique_feature_columns()) {
+      ensemble_resource->MaybeAddUsedHandler(split->handler_id);
+    }
   }
 
   void PruneTree(boosted_trees::trees::DecisionTreeConfig* tree_config) {
diff --git a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py
index 72e20aaa127cda592bd314786cddb925cc87a075..7df514cd207c5e781f3b4abaa2020016b197669d 100644
--- a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py
+++ b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py
@@ -436,7 +436,7 @@ def dense_make_stats_update(is_active, are_buckets_ready, float_column,
     quantized_feature = quantile_ops.quantiles([float_column], [],
                                                [quantile_buckets], [], [])
     quantized_feature = math_ops.cast(quantized_feature[0], dtypes.int64)
-    quantized_feature = array_ops.squeeze(quantized_feature)
+    quantized_feature = array_ops.squeeze(quantized_feature, axis=0)
     return (example_partition_ids, quantized_feature, gradients, hessians)
 
   def not_ready_inputs_fn():
@@ -468,7 +468,7 @@ def sparse_make_stats_update(
                                                [sparse_column_indices])
 
     quantized_feature = math_ops.cast(quantized_feature[1], dtypes.int64)
-    quantized_feature = array_ops.squeeze(quantized_feature)
+    quantized_feature = array_ops.squeeze(quantized_feature, axis=0)
 
     example_indices, _ = array_ops.split(
         sparse_column_indices, num_or_size_splits=2, axis=1)
diff --git a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler_test.py b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler_test.py
index ee16a5f838a65f20db4436eb86527518621b6d8d..54d03018d9e266beabbbabd78ebbb80cfe689c04 100644
--- a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler_test.py
+++ b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler_test.py
@@ -1121,6 +1121,87 @@ class SparseSplitHandlerTest(test_util.TensorFlowTestCase):
     self.assertEqual(len(gains), 0)
     self.assertEqual(len(splits), 0)
 
+  def testDegenerativeCase(self):
+    with self.test_session() as sess:
+      # One data example only, one leaf and thus one quantile bucket.The same
+      # situation is when all examples have the same values. This case was
+      # causing before a failure.
+      gradients = array_ops.constant([0.2])
+      hessians = array_ops.constant([0.12])
+      example_partitions = array_ops.constant([1], dtype=dtypes.int32)
+      indices = array_ops.constant([[0, 0]], dtype=dtypes.int64)
+      values = array_ops.constant([0.58])
+      sparse_column = sparse_tensor.SparseTensor(indices, values, [1, 1])
+
+      gradient_shape = tensor_shape.scalar()
+      hessian_shape = tensor_shape.scalar()
+      class_id = -1
+
+      split_handler = ordinal_split_handler.SparseSplitHandler(
+          l1_regularization=0,
+          l2_regularization=2,
+          tree_complexity_regularization=0,
+          min_node_weight=0,
+          epsilon=0.01,
+          num_quantiles=2,
+          feature_column_group_id=0,
+          sparse_float_column=sparse_column,
+          init_stamp_token=0,
+          gradient_shape=gradient_shape,
+          hessian_shape=hessian_shape,
+          multiclass_strategy=learner_pb2.LearnerConfig.TREE_PER_CLASS)
+      resources.initialize_resources(resources.shared_resources()).run()
+
+      empty_gradients, empty_hessians = get_empty_tensors(
+          gradient_shape, hessian_shape)
+      example_weights = array_ops.ones([1, 1], dtypes.float32)
+
+      update_1 = split_handler.update_stats_sync(
+          0,
+          example_partitions,
+          gradients,
+          hessians,
+          empty_gradients,
+          empty_hessians,
+          example_weights,
+          is_active=array_ops.constant([True, True]))
+      with ops.control_dependencies([update_1]):
+        are_splits_ready = split_handler.make_splits(0, 1, class_id)[0]
+
+      with ops.control_dependencies([are_splits_ready]):
+        update_2 = split_handler.update_stats_sync(
+            1,
+            example_partitions,
+            gradients,
+            hessians,
+            empty_gradients,
+            empty_hessians,
+            example_weights,
+            is_active=array_ops.constant([True, True]))
+      with ops.control_dependencies([update_2]):
+        are_splits_ready2, partitions, gains, splits = (
+            split_handler.make_splits(1, 2, class_id))
+        are_splits_ready, are_splits_ready2, partitions, gains, splits = (
+            sess.run([
+                are_splits_ready, are_splits_ready2, partitions, gains, splits
+            ]))
+
+    # During the first iteration, inequality split handlers are not going to
+    # have any splits. Make sure that we return not_ready in that case.
+    self.assertFalse(are_splits_ready)
+    self.assertTrue(are_splits_ready2)
+
+    self.assertAllEqual([1], partitions)
+    self.assertAllEqual([0.0], gains)
+
+    split_info = split_info_pb2.SplitInfo()
+    split_info.ParseFromString(splits[0])
+    split_node = split_info.split_node.sparse_float_binary_split_default_left
+
+    self.assertEqual(0, split_node.split.feature_column)
+
+    self.assertAllClose(0.58, split_node.split.threshold)
+
 
 if __name__ == "__main__":
   googletest.main()
diff --git a/tensorflow/contrib/boosted_trees/lib/utils/sparse_column_iterable.cc b/tensorflow/contrib/boosted_trees/lib/utils/sparse_column_iterable.cc
index 0d46565a1962b88cbb267f3d6043610758790578..1297aa884938f2f099a32568acc80c6cd8162651 100644
--- a/tensorflow/contrib/boosted_trees/lib/utils/sparse_column_iterable.cc
+++ b/tensorflow/contrib/boosted_trees/lib/utils/sparse_column_iterable.cc
@@ -51,7 +51,7 @@ class IndicesRowIterator
     return tmp;
   }
 
-  reference operator*() { return iter_->ix()(row_idx_, 0); }
+  reference operator*() const { return iter_->ix()(row_idx_, 0); }
 
   pointer operator->() { return &iter_->ix()(row_idx_, 0); }
 
@@ -97,7 +97,7 @@ class IndicesRowIterator
   }
 
   bool operator<(const IndicesRowIterator& other) const {
-	return (row_idx_ < other.row_idx_);
+    return (row_idx_ < other.row_idx_);
   }
 
   bool operator==(const IndicesRowIterator& other) const {
diff --git a/tensorflow/contrib/boosted_trees/proto/learner.proto b/tensorflow/contrib/boosted_trees/proto/learner.proto
index 919e7cd81427c27cf892bc77998f52406d2bcf15..d84ba7438e7f03685d5bafca52ff8283f0fce898 100644
--- a/tensorflow/contrib/boosted_trees/proto/learner.proto
+++ b/tensorflow/contrib/boosted_trees/proto/learner.proto
@@ -22,6 +22,10 @@ message TreeConstraintsConfig {
 
   // Min hessian weight per node.
   float min_node_weight = 2;
+
+  // Maximum number of unique features used in the tree. Zero means there is no
+  // limit.
+  int64 max_number_of_unique_feature_columns = 3;
 }
 
 // LearningRateConfig describes all supported learning rate tuners.
diff --git a/tensorflow/contrib/boosted_trees/proto/tree_config.proto b/tensorflow/contrib/boosted_trees/proto/tree_config.proto
index fc570c1083d01a65760a456c109dad93afd9f62a..4407c4d981785a279b6296f4726a221cacb4c5b1 100644
--- a/tensorflow/contrib/boosted_trees/proto/tree_config.proto
+++ b/tensorflow/contrib/boosted_trees/proto/tree_config.proto
@@ -128,6 +128,10 @@ message GrowingMetadata {
   // Number of layers that we have attempted to build. After pruning, these
   // layers might have been removed.
   int64 num_layers_attempted = 2;
+
+  // Sorted list of column handlers that have been used in at least one split
+  // so far.
+  repeated int64 used_handler_ids = 3;
 }
 
 // DecisionTreeEnsembleConfig describes an ensemble of decision trees.
diff --git a/tensorflow/contrib/boosted_trees/python/kernel_tests/training_ops_test.py b/tensorflow/contrib/boosted_trees/python/kernel_tests/training_ops_test.py
index c2e65b643df90e88aadb0bb9acaf692da35b1a16..8ca1aabacaf53b66aaba184962922294427d6803 100644
--- a/tensorflow/contrib/boosted_trees/python/kernel_tests/training_ops_test.py
+++ b/tensorflow/contrib/boosted_trees/python/kernel_tests/training_ops_test.py
@@ -63,7 +63,7 @@ def _gen_learner_config(num_classes,
   if dropout_prob_of_skipping is not None:
     config.learning_rate_tuner.dropout.dropout_prob_of_skipping = (
         dropout_prob_of_skipping)
-  return config.SerializeToString()
+  return config
 
 
 def _gen_dense_split_info(fc, threshold, left_weight, right_weight):
@@ -145,7 +145,7 @@ class CenterTreeEnsembleBiasOpTest(test_util.TensorFlowTestCase):
           pruning_mode=learner_pb2.LearnerConfig.PRE_PRUNE,
           growing_mode=learner_pb2.LearnerConfig.WHOLE_TREE,
           # Dropout does not change anything here.
-          dropout_probability=0.5)
+          dropout_probability=0.5).SerializeToString()
 
       # Center bias for the initial step.
       grads = constant_op.constant([0.4, -0.3])
@@ -296,7 +296,7 @@ class GrowTreeEnsembleOpTest(test_util.TensorFlowTestCase):
           pruning_mode=learner_pb2.LearnerConfig.PRE_PRUNE,
           growing_mode=learner_pb2.LearnerConfig.WHOLE_TREE,
           # Dropout does not change anything here, tree is not finalized.
-          dropout_probability=0.5)
+          dropout_probability=0.5).SerializeToString()
 
       # Prepare handler inputs.
       # Note that handlers 1 & 3 have the same gain but different splits.
@@ -443,7 +443,7 @@ class GrowTreeEnsembleOpTest(test_util.TensorFlowTestCase):
           pruning_mode=learner_pb2.LearnerConfig.PRE_PRUNE,
           growing_mode=learner_pb2.LearnerConfig.WHOLE_TREE,
           # Dropout does not change anything here - tree is not finalized.
-          dropout_probability=0.5)
+          dropout_probability=0.5).SerializeToString()
 
       # Prepare handler inputs.
       # Handler 1 only has a candidate for partition 1, handler 2 has candidates
@@ -632,7 +632,8 @@ class GrowTreeEnsembleOpTest(test_util.TensorFlowTestCase):
           max_depth=1,
           min_node_weight=0,
           pruning_mode=learner_pb2.LearnerConfig.PRE_PRUNE,
-          growing_mode=learner_pb2.LearnerConfig.WHOLE_TREE)
+          growing_mode=learner_pb2.LearnerConfig.WHOLE_TREE).SerializeToString(
+          )
 
       # Prepare handler inputs.
       handler1_partitions = np.array([0], dtype=np.int32)
@@ -772,7 +773,8 @@ class GrowTreeEnsembleOpTest(test_util.TensorFlowTestCase):
           max_depth=1,
           min_node_weight=0,
           pruning_mode=learner_pb2.LearnerConfig.PRE_PRUNE,
-          growing_mode=learner_pb2.LearnerConfig.WHOLE_TREE)
+          growing_mode=learner_pb2.LearnerConfig.WHOLE_TREE).SerializeToString(
+          )
 
       # Prepare handler inputs.
       # All handlers have negative gain.
@@ -837,7 +839,8 @@ class GrowTreeEnsembleOpTest(test_util.TensorFlowTestCase):
           max_depth=1,
           min_node_weight=0,
           pruning_mode=learner_pb2.LearnerConfig.POST_PRUNE,
-          growing_mode=learner_pb2.LearnerConfig.WHOLE_TREE)
+          growing_mode=learner_pb2.LearnerConfig.WHOLE_TREE).SerializeToString(
+          )
 
       # Prepare handler inputs.
       # Note that handlers 1 & 3 have the same gain but different splits.
@@ -943,7 +946,8 @@ class GrowTreeEnsembleOpTest(test_util.TensorFlowTestCase):
           max_depth=2,
           min_node_weight=0,
           pruning_mode=learner_pb2.LearnerConfig.POST_PRUNE,
-          growing_mode=learner_pb2.LearnerConfig.WHOLE_TREE)
+          growing_mode=learner_pb2.LearnerConfig.WHOLE_TREE).SerializeToString(
+          )
 
       # Prepare handler inputs.
       # All handlers have negative gain.
@@ -1090,7 +1094,8 @@ class GrowTreeEnsembleOpTest(test_util.TensorFlowTestCase):
           max_depth=2,
           min_node_weight=0,
           pruning_mode=learner_pb2.LearnerConfig.POST_PRUNE,
-          growing_mode=learner_pb2.LearnerConfig.WHOLE_TREE)
+          growing_mode=learner_pb2.LearnerConfig.WHOLE_TREE).SerializeToString(
+          )
 
       # Prepare handler inputs.
       # Second handler has positive gain.
@@ -1330,7 +1335,7 @@ class GrowTreeEnsembleOpTest(test_util.TensorFlowTestCase):
           pruning_mode=learner_pb2.LearnerConfig.PRE_PRUNE,
           growing_mode=learner_pb2.LearnerConfig.LAYER_BY_LAYER,
           # Dropout will have no effect, since the tree will not be fully grown.
-          dropout_probability=1.0)
+          dropout_probability=1.0).SerializeToString()
 
       # Prepare handler inputs.
       # Handler 1 only has a candidate for partition 1, handler 2 has candidates
@@ -1538,7 +1543,7 @@ class GrowTreeEnsembleOpTest(test_util.TensorFlowTestCase):
           min_node_weight=0,
           pruning_mode=learner_pb2.LearnerConfig.PRE_PRUNE,
           growing_mode=learner_pb2.LearnerConfig.WHOLE_TREE,
-          dropout_probability=1.0)
+          dropout_probability=1.0).SerializeToString()
 
       # Prepare handler inputs.
       handler1_partitions = np.array([0], dtype=np.int32)
@@ -1583,6 +1588,301 @@ class GrowTreeEnsembleOpTest(test_util.TensorFlowTestCase):
       self.assertEqual(
           2, tree_ensemble_config.tree_metadata[2].num_tree_weight_updates)
 
+  def testGrowExistingEnsembleTreeWithFeatureSelectionCanStillGrow(self):
+    """Test growing a tree with feature selection."""
+    with self.test_session() as session:
+      # Create existing ensemble with one root split and one bias tree.
+      tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig()
+      text_format.Merge("""
+        trees {
+          nodes {
+            leaf {
+              vector {
+                value: -0.32
+                value: 0.28
+              }
+            }
+          }
+        }
+        trees {
+          nodes {
+            categorical_id_binary_split {
+              feature_column: 3
+              feature_id: 7
+              left_id: 1
+              right_id: 2
+            }
+            node_metadata {
+              gain: 1.3
+            }
+          }
+          nodes {
+            leaf {
+              sparse_vector {
+                index: 0
+                value: 2.3
+              }
+            }
+          }
+          nodes {
+            leaf {
+              sparse_vector {
+                index: 0
+                value: -0.9
+              }
+            }
+          }
+        }
+        tree_weights: 0.7
+        tree_weights: 1
+        tree_metadata {
+          num_tree_weight_updates: 1
+          num_layers_grown: 1
+          is_finalized: true
+        }
+        tree_metadata {
+          num_tree_weight_updates: 5
+          num_layers_grown: 1
+          is_finalized: true
+        }
+        growing_metadata {
+          num_trees_attempted: 2
+          num_layers_attempted: 2
+          used_handler_ids: 2
+          used_handler_ids: 5
+        }
+      """, tree_ensemble_config)
+      tree_ensemble_handle = model_ops.tree_ensemble_variable(
+          stamp_token=0,
+          tree_ensemble_config=tree_ensemble_config.SerializeToString(),
+          name="tree_ensemble")
+      resources.initialize_resources(resources.shared_resources()).run()
+
+      # Prepare learner config.
+      learner_config = _gen_learner_config(
+          num_classes=2,
+          l1_reg=0,
+          l2_reg=0,
+          tree_complexity=0,
+          max_depth=1,
+          min_node_weight=0,
+          pruning_mode=learner_pb2.LearnerConfig.PRE_PRUNE,
+          growing_mode=learner_pb2.LearnerConfig.WHOLE_TREE)
+      # There are 2 handler_ids in used_handler_ids already but one of them
+      # is handler 2, so we can still grow trees.
+      learner_config.constraints.max_number_of_unique_feature_columns = 2
+      learner_config = learner_config.SerializeToString()
+      # Prepare handler inputs.
+      handler1_partitions = np.array([0], dtype=np.int32)
+      handler1_gains = np.array([7.62], dtype=np.float32)
+      handler1_split = [_gen_dense_split_info(5, 0.52, -4.375, 7.143)]
+      handler2_partitions = np.array([0], dtype=np.int32)
+      handler2_gains = np.array([0.63], dtype=np.float32)
+      handler2_split = [_gen_dense_split_info(2, 0.23, -0.6, 0.24)]
+      handler3_partitions = np.array([0], dtype=np.int32)
+      handler3_gains = np.array([7.62], dtype=np.float32)
+      handler3_split = [_gen_categorical_split_info(8, 7, -4.375, 7.143)]
+
+      # Grow tree ensemble.
+      grow_op = training_ops.grow_tree_ensemble(
+          tree_ensemble_handle,
+          stamp_token=0,
+          next_stamp_token=1,
+          learning_rate=1,
+          partition_ids=[
+              handler1_partitions, handler2_partitions, handler3_partitions
+          ],
+          gains=[handler1_gains, handler2_gains, handler3_gains],
+          splits=[handler1_split, handler2_split, handler3_split],
+          learner_config=learner_config,
+          dropout_seed=123,
+          center_bias=True)
+      session.run(grow_op)
+
+      # Expect a new tree to be added with the split from handler 1.
+      _, serialized = session.run(
+          model_ops.tree_ensemble_serialize(tree_ensemble_handle))
+      tree_ensemble_config.ParseFromString(serialized)
+      self.assertEqual(3, len(tree_ensemble_config.trees))
+      self.assertEqual(
+          2, len(tree_ensemble_config.growing_metadata.used_handler_ids))
+
+  def testGrowExistingEnsembleTreeWithFeatureSelectionEmptyEnsemble(self):
+    """Test growing a tree with feature selection with empty ensemble."""
+    with self.test_session() as session:
+      # Create existing ensemble with one root split and one bias tree.
+      tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig()
+      tree_ensemble_handle = model_ops.tree_ensemble_variable(
+          stamp_token=0,
+          tree_ensemble_config=tree_ensemble_config.SerializeToString(),
+          name="tree_ensemble")
+      resources.initialize_resources(resources.shared_resources()).run()
+
+      # Prepare learner config.
+      learner_config = _gen_learner_config(
+          num_classes=2,
+          l1_reg=0,
+          l2_reg=0,
+          tree_complexity=0,
+          max_depth=1,
+          min_node_weight=0,
+          pruning_mode=learner_pb2.LearnerConfig.PRE_PRUNE,
+          growing_mode=learner_pb2.LearnerConfig.WHOLE_TREE)
+      learner_config.constraints.max_number_of_unique_feature_columns = 2
+      learner_config = learner_config.SerializeToString()
+      # Prepare handler inputs.
+      handler1_partitions = np.array([0], dtype=np.int32)
+      handler1_gains = np.array([7.62], dtype=np.float32)
+      handler1_split = [_gen_dense_split_info(5, 0.52, -4.375, 7.143)]
+      handler2_partitions = np.array([0], dtype=np.int32)
+      handler2_gains = np.array([0.63], dtype=np.float32)
+      handler2_split = [_gen_dense_split_info(2, 0.23, -0.6, 0.24)]
+      handler3_partitions = np.array([0], dtype=np.int32)
+      handler3_gains = np.array([7.62], dtype=np.float32)
+      handler3_split = [_gen_categorical_split_info(8, 7, -4.375, 7.143)]
+
+      # Grow tree ensemble.
+      grow_op = training_ops.grow_tree_ensemble(
+          tree_ensemble_handle,
+          stamp_token=0,
+          next_stamp_token=1,
+          learning_rate=1,
+          partition_ids=[
+              handler1_partitions, handler2_partitions, handler3_partitions
+          ],
+          gains=[handler1_gains, handler2_gains, handler3_gains],
+          splits=[handler1_split, handler2_split, handler3_split],
+          learner_config=learner_config,
+          dropout_seed=123,
+          center_bias=True)
+      session.run(grow_op)
+
+      _, serialized = session.run(
+          model_ops.tree_ensemble_serialize(tree_ensemble_handle))
+      tree_ensemble_config.ParseFromString(serialized)
+      self.assertEqual(1, len(tree_ensemble_config.trees))
+      self.assertEqual(
+          1, len(tree_ensemble_config.growing_metadata.used_handler_ids))
+
+  def testGrowExistingEnsembleTreeWithFeatureSelectionCantGrow(self):
+    """Test growing a tree with feature selection with empty ensemble."""
+    with self.test_session() as session:
+      # Create existing ensemble with one root split and one bias tree.
+      tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig()
+      text_format.Merge("""
+        trees {
+          nodes {
+            leaf {
+              vector {
+                value: -0.32
+                value: 0.28
+              }
+            }
+          }
+        }
+        trees {
+          nodes {
+            categorical_id_binary_split {
+              feature_column: 3
+              feature_id: 7
+              left_id: 1
+              right_id: 2
+            }
+            node_metadata {
+              gain: 1.3
+            }
+          }
+          nodes {
+            leaf {
+              sparse_vector {
+                index: 0
+                value: 2.3
+              }
+            }
+          }
+          nodes {
+            leaf {
+              sparse_vector {
+                index: 0
+                value: -0.9
+              }
+            }
+          }
+        }
+        tree_weights: 0.7
+        tree_weights: 1
+        tree_metadata {
+          num_tree_weight_updates: 1
+          num_layers_grown: 1
+          is_finalized: true
+        }
+        tree_metadata {
+          num_tree_weight_updates: 5
+          num_layers_grown: 1
+          is_finalized: true
+        }
+        growing_metadata {
+          num_trees_attempted: 2
+          num_layers_attempted: 2
+          used_handler_ids: 4
+          used_handler_ids: 5
+        }
+      """, tree_ensemble_config)
+      tree_ensemble_handle = model_ops.tree_ensemble_variable(
+          stamp_token=0,
+          tree_ensemble_config=tree_ensemble_config.SerializeToString(),
+          name="tree_ensemble")
+      resources.initialize_resources(resources.shared_resources()).run()
+
+      # Prepare learner config.
+      learner_config = _gen_learner_config(
+          num_classes=2,
+          l1_reg=0,
+          l2_reg=0,
+          tree_complexity=0,
+          max_depth=1,
+          min_node_weight=0,
+          pruning_mode=learner_pb2.LearnerConfig.PRE_PRUNE,
+          growing_mode=learner_pb2.LearnerConfig.WHOLE_TREE)
+      learner_config.constraints.max_number_of_unique_feature_columns = 2
+      learner_config = learner_config.SerializeToString()
+      # Prepare handler inputs.
+      handler1_partitions = np.array([0], dtype=np.int32)
+      handler1_gains = np.array([7.62], dtype=np.float32)
+      handler1_split = [_gen_dense_split_info(5, 0.52, -4.375, 7.143)]
+      handler2_partitions = np.array([0], dtype=np.int32)
+      handler2_gains = np.array([0.63], dtype=np.float32)
+      handler2_split = [_gen_dense_split_info(2, 0.23, -0.6, 0.24)]
+      handler3_partitions = np.array([0], dtype=np.int32)
+      handler3_gains = np.array([7.62], dtype=np.float32)
+      handler3_split = [_gen_categorical_split_info(8, 7, -4.375, 7.143)]
+
+      # Grow tree ensemble.
+      grow_op = training_ops.grow_tree_ensemble(
+          tree_ensemble_handle,
+          stamp_token=0,
+          next_stamp_token=1,
+          learning_rate=1,
+          partition_ids=[
+              handler1_partitions, handler2_partitions, handler3_partitions
+          ],
+          gains=[handler1_gains, handler2_gains, handler3_gains],
+          splits=[handler1_split, handler2_split, handler3_split],
+          learner_config=learner_config,
+          dropout_seed=123,
+          center_bias=True)
+      session.run(grow_op)
+
+      _, serialized = session.run(
+          model_ops.tree_ensemble_serialize(tree_ensemble_handle))
+      tree_ensemble_config.ParseFromString(serialized)
+      # We can't grow a tree since we have reached the limit of 2 unique
+      # features [4, 5] and the only available splits are from
+      # handlers [0, 1, 2].
+      self.assertEqual(2, len(tree_ensemble_config.trees))
+      self.assertEqual(
+          2, len(tree_ensemble_config.growing_metadata.used_handler_ids))
+
 
 if __name__ == "__main__":
   googletest.main()
diff --git a/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py b/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py
index 7e8e15e7d8c89d1adaa472b1da7e8bb3c73ca17e..294e04002adac62fc123a3242a05a1b36f422433 100644
--- a/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py
+++ b/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py
@@ -45,6 +45,7 @@ class QuantileAccumulator(saver.BaseSaverBuilder.SaveableObject):
                init_stamp_token,
                epsilon,
                num_quantiles,
+               max_elements=None,
                name=None,
                container=None):
     """Creates a QuantileAccumulator object.
@@ -53,6 +54,7 @@ class QuantileAccumulator(saver.BaseSaverBuilder.SaveableObject):
       init_stamp_token: The initial value for the stamp token.
       epsilon: Error bound on the quantile computation.
       num_quantiles: Number of quantiles to produce from the final summary.
+      max_elements: Maximum number of elements added to the accumulator.
       name: the name to save the accumulator under.
       container: An optional `string`. Defaults to `""`
     """
@@ -67,6 +69,7 @@ class QuantileAccumulator(saver.BaseSaverBuilder.SaveableObject):
           self._quantile_accumulator_handle,
           init_stamp_token,
           epsilon=epsilon,
+          max_elements=max_elements,
           num_quantiles=num_quantiles)
       is_initialized_op = gen_quantile_ops.quantile_accumulator_is_initialized(
           self._quantile_accumulator_handle)
diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py
index 6094dae6b59d8b05bb12a28cf167a536e6825287..b95956dae2a62b28643cd31815c5f5650eca337b 100644
--- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py
+++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py
@@ -322,9 +322,11 @@ class GradientBoostedDecisionTreeModel(object):
     self._feature_columns = feature_columns
     self._learner_config_serialized = learner_config.SerializeToString()
     self._attempted_trees = variables.Variable(
-        initial_value=array_ops.zeros([], dtypes.int64), trainable=False)
+        initial_value=array_ops.zeros([], dtypes.int64), trainable=False,
+        name="attempted_trees")
     self._finalized_trees = variables.Variable(
-        initial_value=array_ops.zeros([], dtypes.int64), trainable=False)
+        initial_value=array_ops.zeros([], dtypes.int64), trainable=False,
+        name="finalized_trees")
     if not features:
       raise ValueError("Features dictionary must be specified.")
     (fc_names, dense_floats, sparse_float_indices, sparse_float_values,
diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py
index 16e24d97ddee0751e0b808b89080074c1b4baba7..dba51d4f527792d2a8dedc693f74c07119fd231d 100644
--- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py
+++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py
@@ -912,8 +912,10 @@ class GbdtTest(test_util.TensorFlowTestCase):
       self.assertEqual(1,
                        len(output.trees[0].nodes[2].leaf.sparse_vector.index))
       self.assertEqual(3, output.trees[0].nodes[2].leaf.sparse_vector.index[0])
-      self.assertAlmostEqual(
-          0.893284678459, output.trees[0].nodes[2].leaf.sparse_vector.value[0])
+      self.assertAllClose(
+          0.893284678459,
+          output.trees[0].nodes[2].leaf.sparse_vector.value[0],
+          atol=1e-4, rtol=1e-4)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/contrib/boosted_trees/resources/decision_tree_ensemble_resource.h b/tensorflow/contrib/boosted_trees/resources/decision_tree_ensemble_resource.h
index 284ad5cdb9abf374650940ade7bb36663d72c0dd..ad9c8961aaadbc4c1ff6bdc7793171d0ad48d75f 100644
--- a/tensorflow/contrib/boosted_trees/resources/decision_tree_ensemble_resource.h
+++ b/tensorflow/contrib/boosted_trees/resources/decision_tree_ensemble_resource.h
@@ -111,6 +111,35 @@ class DecisionTreeEnsembleResource : public StampedResource {
     return decision_tree_ensemble_->tree_weights(index);
   }
 
+  void MaybeAddUsedHandler(const int32 handler_id) {
+    protobuf::RepeatedField<protobuf_int64>* used_ids =
+        decision_tree_ensemble_->mutable_growing_metadata()
+            ->mutable_used_handler_ids();
+    protobuf::RepeatedField<protobuf_int64>::iterator first =
+        std::lower_bound(used_ids->begin(), used_ids->end(), handler_id);
+    if (first == used_ids->end()) {
+      used_ids->Add(handler_id);
+      return;
+    }
+    if (handler_id == *first) {
+      // It is a duplicate entry.
+      return;
+    }
+    used_ids->Add(handler_id);
+    std::rotate(first, used_ids->end() - 1, used_ids->end());
+  }
+
+  std::vector<int64> GetUsedHandlers() const {
+    std::vector<int64> result;
+    result.reserve(
+        decision_tree_ensemble_->growing_metadata().used_handler_ids().size());
+    for (int64 h :
+         decision_tree_ensemble_->growing_metadata().used_handler_ids()) {
+      result.push_back(h);
+    }
+    return result;
+  }
+
   // Sets the weight of i'th tree, and increment num_updates in tree_metadata.
   void SetTreeWeight(const int32 index, const float weight,
                      const int32 increment_num_updates) {
diff --git a/tensorflow/contrib/cloud/BUILD b/tensorflow/contrib/cloud/BUILD
index aa8f5ed12bc6f779e3c1a923b9225ec283189747..fe8bd072afd43a64fa62a65bd8900b5a98dbe761 100644
--- a/tensorflow/contrib/cloud/BUILD
+++ b/tensorflow/contrib/cloud/BUILD
@@ -60,9 +60,7 @@ tf_py_test(
     size = "small",
     srcs = ["python/ops/bigquery_reader_ops_test.py"],
     additional_deps = [
-        ":bigquery_reader_ops_op_lib",
         ":cloud_py",
-        "//tensorflow/contrib/cloud/kernels:bigquery_reader_ops",
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
diff --git a/tensorflow/contrib/cloud/kernels/bigquery_table_accessor.cc b/tensorflow/contrib/cloud/kernels/bigquery_table_accessor.cc
index 51821f6653550afd2d2e8a49b7337ff8ba0b5489..deb324634b6edc17c9725996115d80c5bd11cbde 100644
--- a/tensorflow/contrib/cloud/kernels/bigquery_table_accessor.cc
+++ b/tensorflow/contrib/cloud/kernels/bigquery_table_accessor.cc
@@ -202,22 +202,21 @@ Status BigQueryTableAccessor::ReadRow(int64* row_id, Example* example) {
     std::unique_ptr<HttpRequest> request(http_request_factory_->Create());
     std::vector<char> output_buffer;
     output_buffer.reserve(kBufferSize);
-    TF_RETURN_IF_ERROR(request->Init());
 
     // The first time that we access BigQuery there is no page token. After that
     // we use the page token (which returns rows faster).
     if (!next_page_token_.empty()) {
-      TF_RETURN_IF_ERROR(request->SetUri(strings::StrCat(
+      request->SetUri(strings::StrCat(
           BigQueryUriPrefix(), "data?maxResults=", ComputeMaxResultsArg(),
-          "&pageToken=", request->EscapeString(next_page_token_))));
+          "&pageToken=", request->EscapeString(next_page_token_)));
       first_buffered_row_index_ += row_buffer_.size();
     } else {
-      TF_RETURN_IF_ERROR(request->SetUri(strings::StrCat(
+      request->SetUri(strings::StrCat(
           BigQueryUriPrefix(), "data?maxResults=", ComputeMaxResultsArg(),
-          "&startIndex=", first_buffered_row_index_)));
+          "&startIndex=", first_buffered_row_index_));
     }
-    TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
-    TF_RETURN_IF_ERROR(request->SetResultBuffer(&output_buffer));
+    request->AddAuthBearerHeader(auth_token);
+    request->SetResultBuffer(&output_buffer);
     TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when reading rows from ",
                                     FullTableName());
 
@@ -293,10 +292,9 @@ Status BigQueryTableAccessor::ReadSchema() {
   std::unique_ptr<HttpRequest> request(http_request_factory_->Create());
   std::vector<char> output_buffer;
   output_buffer.reserve(kBufferSize);
-  TF_RETURN_IF_ERROR(request->Init());
-  TF_RETURN_IF_ERROR(request->SetUri(BigQueryUriPrefix()));
-  TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
-  TF_RETURN_IF_ERROR(request->SetResultBuffer(&output_buffer));
+  request->SetUri(BigQueryUriPrefix());
+  request->AddAuthBearerHeader(auth_token);
+  request->SetResultBuffer(&output_buffer);
   TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when reading schema for ",
                                   FullTableName());
 
diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py
index c74da9cabd6816bc9c7891e32937534cff2d677d..2e75ac226ea74e879edda5e03dff3d53c8a76569 100644
--- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py
+++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py
@@ -18,6 +18,10 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+
+from six.moves.urllib.request import Request
+from six.moves.urllib.request import urlopen
+
 from tensorflow.contrib.cluster_resolver.python.training.cluster_resolver import ClusterResolver
 from tensorflow.python.training.server_lib import ClusterSpec
 
@@ -38,10 +42,16 @@ class TPUClusterResolver(ClusterResolver):
   Cloud Platform project.
   """
 
+  def _requestComputeMetadata(self, path):
+    req = Request('http://metadata/computeMetadata/v1/%s' % path,
+                  headers={'Metadata-Flavor': 'Google'})
+    resp = urlopen(req)
+    return resp.read()
+
   def __init__(self,
-               project,
-               zone,
                tpu_names,
+               zone=None,
+               project=None,
                job_name='tpu_worker',
                credentials='default',
                service=None):
@@ -51,9 +61,13 @@ class TPUClusterResolver(ClusterResolver):
     for the IP addresses and ports of each Cloud TPU listed.
 
     Args:
-      project: Name of the GCP project containing Cloud TPUs
-      zone: Zone where the TPUs are located
       tpu_names: A list of names of the target Cloud TPUs.
+      zone: Zone where the TPUs are located. If omitted or empty, we will assume
+        that the zone of the TPU is the same as the zone of the GCE VM, which we
+        will try to discover from the GCE metadata service.
+      project: Name of the GCP project containing Cloud TPUs. If omitted or
+        empty, we will try to discover the project name of the GCE VM from the
+        GCE metadata service.
       job_name: Name of the TensorFlow job the TPUs belong to.
       credentials: GCE Credentials. If None, then we use default credentials
         from the oauth2client
@@ -65,6 +79,13 @@ class TPUClusterResolver(ClusterResolver):
       ImportError: If the googleapiclient is not installed.
     """
 
+    if not project:
+      project = self._requestComputeMetadata('/project/project-id')
+
+    if not zone:
+      zone_path = self._requestComputeMetadata('/instance/zone')
+      zone = zone_path.split('/')[-1]
+
     self._project = project
     self._zone = zone
     self._tpu_names = tpu_names
diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py
index db7419be06b58e1c5737f69f2c7fd9fee44b9d95..0c4730613af4ad9ca87deb6200ab4bb93d3f6a53 100644
--- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py
+++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py
@@ -48,6 +48,15 @@ class MockNodeClass(object):
     return MockRequestClass(name, self._tpu_map)
 
 
+def mock_request_compute_metadata(cls, *args, **kwargs):
+  del cls, kwargs  # Unused.
+  if args[0] == '/project/project-id':
+    return 'test-project'
+  elif args[0] == '/instance/zone':
+    return 'projects/test-project/locations/us-central1-c'
+  return ''
+
+
 class TPUClusterResolverTest(test.TestCase):
 
   def _verifyClusterSpecEquality(self, cluster_spec, expected_proto):
@@ -89,6 +98,30 @@ class TPUClusterResolverTest(test.TestCase):
 
     return mock_client
 
+  @mock.patch.object(TPUClusterResolver,
+                     '_requestComputeMetadata',
+                     mock_request_compute_metadata)
+  def testRetrieveProjectAndZoneFromMetadata(self):
+    tpu_map = {
+        'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': {
+            'ipAddress': '10.1.2.3',
+            'port': '8470'
+        }
+    }
+
+    tpu_cluster_resolver = TPUClusterResolver(
+        project=None,
+        zone=None,
+        tpu_names=['test-tpu-1'],
+        credentials=None,
+        service=self.mock_service_client(tpu_map=tpu_map))
+
+    actual_cluster_spec = tpu_cluster_resolver.cluster_spec()
+    expected_proto = """
+    job { name: 'tpu_worker' tasks { key: 0 value: '10.1.2.3:8470' } }
+    """
+    self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto)
+
   def testSimpleSuccessfulRetrieval(self):
     tpu_map = {
         'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': {
diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index ba708673b0d562f928230f427406147ab22f0007..817e96f5da0e7512a9fd99cc9a4b4c6025d7dd68 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -18,7 +18,6 @@ cmake_policy(SET CMP0022 NEW)
 
 # Options
 option(tensorflow_VERBOSE "Enable for verbose output" OFF)
-option(tensorflow_ENABLE_GPU "Enable GPU support" OFF)
 option(tensorflow_ENABLE_SSL_SUPPORT "Enable boringssl support" OFF)
 option(tensorflow_ENABLE_GRPC_SUPPORT "Enable gRPC support" ON)
 option(tensorflow_ENABLE_HDFS_SUPPORT "Enable HDFS support" OFF)
@@ -34,6 +33,13 @@ option(tensorflow_BUILD_SHARED_LIB "Build TensorFlow as a shared library" OFF)
 option(tensorflow_OPTIMIZE_FOR_NATIVE_ARCH "Enable compiler optimizations for the native processor architecture (if available)" ON)
 option(tensorflow_WIN_CPU_SIMD_OPTIONS "Enables CPU SIMD instructions")
 option(tensorflow_ENABLE_SNAPPY_SUPPORT "Enable SNAPPY compression support" ON)
+option(tensorflow_DISABLE_EIGEN_FORCEINLINE "Disable forceinline, to speed up build on windows." OFF)
+
+# GPU, CUDA and cuDNN options
+option(tensorflow_ENABLE_GPU "Enable GPU support" OFF)
+set(tensorflow_CUDA_VERSION "9.0" CACHE STRING "CUDA version to build against")
+set(tensorflow_CUDNN_VERSION "7" CACHE STRING "cuDNN version to build against")
+
 if(HAIKU)
 	option(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE "Enable PIE support" OFF)
 else()
@@ -53,7 +59,15 @@ if (NOT WIN32)
     set(tensorflow_CUDNN_INCLUDE /usr/include)
   endif (NOT tensorflow_CUDNN_INCLUDE)
   option(tensorflow_PATH_CUDNN_STATIC_LIB "Override PATH_STATIC_LIB for libcudnn_static.a" ${tensorflow_PATH_STATIC_LIB})
+  if (NOT tensorflow_PATH_CUDNN_STATIC_LIB)
+    # option's default value is OFF. Fill it with real default values
+    set (tensorflow_PATH_CUDNN_STATIC_LIB ${tensorflow_PATH_STATIC_LIB})
+  endif (NOT tensorflow_PATH_CUDNN_STATIC_LIB)
   option(tensorflow_PATH_NCCL_STATIC_LIB "Override PATH_STATIC_LIB for libnccl_static.a" ${tensorflow_PATH_STATIC_LIB})
+  if (NOT tensorflow_PATH_NCCL_STATIC_LIB)
+    # option's default value is OFF. Fill it with real default values
+    set (tensorflow_PATH_NCCL_STATIC_LIB ${tensorflow_PATH_STATIC_LIB})
+  endif (NOT tensorflow_PATH_NCCL_STATIC_LIB)
   option(tensorflow_CUDA_LIBRARY_PATH "Designate the default CUDA library paths" /usr/local/cuda/lib64)
   if (NOT tensorflow_CUDA_LIBRARY_PATH)
     # option's default value is OFF. Fill it with real default values
@@ -92,6 +106,13 @@ else()
 	set(CMAKE_POSITION_INDEPENDENT_CODE OFF)
 endif()
 
+# TODO(jart): We should make this only apply to snapfn.cc
+add_definitions(-DSQLITE_OMIT_LOAD_EXTENSION)
+
+if (tensorflow_DISABLE_EIGEN_FORCEINLINE)
+  add_definitions(-DEIGEN_STRONG_INLINE=inline)
+endif()
+
 add_definitions(-DEIGEN_AVOID_STL_ARRAY)
 if(WIN32)
   add_definitions(-DNOMINMAX -D_WIN32_WINNT=0x0A00 -DLANG_CXX11 -DCOMPILER_MSVC)
@@ -160,7 +181,6 @@ include(protobuf)
 include(re2)
 include(cub)
 include(sqlite)
-include(double_conversion)
 if (tensorflow_BUILD_CC_TESTS)
   include(googletest)
 endif()
@@ -179,7 +199,6 @@ set(tensorflow_EXTERNAL_LIBRARIES
     ${protobuf_STATIC_LIBRARIES}
     ${re2_STATIC_LIBRARIES}
     ${sqlite_STATIC_LIBRARIES}
-    ${double_conversion_STATIC_LIBRARIES}
 )
 set(tensorflow_EXTERNAL_DEPENDENCIES
     zlib_copy_headers_to_destination
@@ -198,7 +217,6 @@ set(tensorflow_EXTERNAL_DEPENDENCIES
     fft2d
     re2
     sqlite_copy_headers_to_destination
-    double_conversion
 )
 
 include_directories(
@@ -221,7 +239,6 @@ include_directories(
     ${PROTOBUF_INCLUDE_DIRS}
     ${re2_INCLUDE_DIR}
     ${sqlite_INCLUDE_DIR}
-    ${double_conversion_INCLUDE_DIR}
 )
 
 if(tensorflow_ENABLE_SSL_SUPPORT)
@@ -266,7 +283,7 @@ if (tensorflow_ENABLE_GPU)
     list(APPEND CMAKE_LIBRARY_PATH "${tensorflow_CUDA_LIBRARY_PATH}/stubs")
   endif (NOT WIN32)
 
-  find_package(CUDA 8.0 REQUIRED)
+  find_package(CUDA ${tensorflow_CUDA_VERSION} REQUIRED)
 
   # by default we assume compute cabability 3.5 and 5.2. If you change this change it in
   # CUDA_NVCC_FLAGS and cuda_config.h below
@@ -320,13 +337,16 @@ if (tensorflow_ENABLE_GPU)
       ${CUDA_curand_LIBRARY} ${CUDA_cupti_LIBRARY} ${CUDA_cusolver_LIBRARY} ${cudnn_STATIC_LIBRARY} ${culibos_STATIC_LIBRARY} ${nccl_STATIC_LIBRARY})
   endif (WIN32)
 
+  # Remove "." from CUDA version variable.
+  string(REPLACE "." "" short_CUDA_VER ${tensorflow_CUDA_VERSION})
+
   # create cuda_config.h
   FILE(WRITE ${tensorflow_source_dir}/third_party/gpus/cuda/cuda_config.h
     "#ifndef CUDA_CUDA_CONFIG_H_\n"
     "#define CUDA_CUDA_CONFIG_H_\n"
     "#define TF_CUDA_CAPABILITIES CudaVersion(\"3.0\"),CudaVersion(\"3.5\"),CudaVersion(\"5.2\")\n"
-    "#define TF_CUDA_VERSION \"64_80\"\n"
-    "#define TF_CUDNN_VERSION \"64_6\"\n"
+    "#define TF_CUDA_VERSION \"64_${short_CUDA_VER}\"\n"
+    "#define TF_CUDNN_VERSION \"64_${tensorflow_CUDNN_VERSION}\"\n"
     "#define TF_CUDA_TOOLKIT_PATH \"${CUDA_TOOLKIT_ROOT_DIR}\"\n"
     "#endif  // CUDA_CUDA_CONFIG_H_\n"
   )
@@ -364,15 +384,15 @@ if (tensorflow_ENABLE_GPU)
   if(WIN32)
     set(tensorflow_BUILD_INFO_FLAGS --build_config cuda --key_value
       msvcp_dll_name=msvcp140.dll
-      cudart_dll_name=cudart64_80.dll
-      cuda_version_number=8.0
+      cudart_dll_name=cudart64_${short_CUDA_VER}.dll
+      cuda_version_number=${tensorflow_CUDA_VERSION}
       nvcuda_dll_name=nvcuda.dll
-      cudnn_dll_name=cudnn64_6.dll
-      cudnn_version_number=6)
+      cudnn_dll_name=cudnn64_${tensorflow_CUDNN_VERSION}.dll
+      cudnn_version_number=${tensorflow_CUDNN_VERSION})
   else(WIN32)
     set(tensorflow_BUILD_INFO_FLAGS --build_config cuda --key_value
-      cuda_version_number=8.0
-      cudnn_version_number=6)
+	    cuda_version_number=${tensorflow_CUDA_VERSION}
+	    cudnn_version_number=${tensorflow_CUDNN_VERSION})
   endif(WIN32)
 else(tensorflow_ENABLE_GPU)
   set(tensorflow_BUILD_INFO_FLAGS --build_config cpu --key_value
@@ -387,10 +407,8 @@ endif()
 
 # Let's get to work!
 include(tf_core_framework.cmake)
-# NOTE: Disabled until issue #3996 is fixed.
-# include(tf_stream_executor.cmake)
 if (tensorflow_ENABLE_GPU)
-    include(tf_stream_executor.cmake)
+  include(tf_stream_executor.cmake)
 endif()
 
 include(tf_core_cpu.cmake)
diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md
index 4ddfec5960d2b759bacb376202cd8dab6ef2b024..8f85a75ee466dbac524a1266dc2522109ca77cd5 100644
--- a/tensorflow/contrib/cmake/README.md
+++ b/tensorflow/contrib/cmake/README.md
@@ -19,23 +19,6 @@ for instructions on how to install a pre-built TensorFlow package on Windows.
 ### Current known limitations
 * It is not possible to load a custom Op library.
 * GCS file system is not supported.
-* The following Ops are not currently implemented:
- - Dequantize
- - QuantizeAndDequantize
- - QuantizedAvgPool
- - QuantizedBatchNomWithGlobalNormalization
- - QuantizedBiasAdd
- - QuantizedConcat
- - QuantizedConv2D
- - QuantizedMatmul
- - QuantizedMaxPoo
- - QuantizeDownAndShrinkRange
- - QuantizedRelu
- - QuantizedRelu6
- - QuantizedReshape
- - QuantizeV2
- - RequantizationRange
- - Requantize
 
 ## Building with CMake
 
@@ -47,7 +30,7 @@ bindings.
 
 * CMake version 3.5 or later.
 
-* [Git](http://git-scm.com)
+* [Git](https://git-scm.com)
 
 * [SWIG](http://www.swig.org/download.html)
 
@@ -65,7 +48,7 @@ bindings.
 
 * Microsoft Windows 10
   - Microsoft Visual Studio Enterprise 2015 with Visual C++ 2015
-  - [Anaconda 4.1.1 (Python 3.5 64-bit)](https://www.continuum.io/downloads)
+  - [Anaconda 4.1.1 (Python 3.5 64-bit)](https://www.anaconda.com/download/)
   - [Git for Windows version 2.9.2.windows.1](https://git-scm.com/download/win)
   - [swigwin-3.0.10](http://www.swig.org/download.html)
   - [NVidia CUDA Toolkit 8.0](https://developer.nvidia.com/cuda-downloads)
diff --git a/tensorflow/contrib/cmake/external/boringssl.cmake b/tensorflow/contrib/cmake/external/boringssl.cmake
index cca8444e2ae9952ea7c69a9392580ead715d363b..5ad477fdff68feab4adf0c0072c68c8e55390ab8 100644
--- a/tensorflow/contrib/cmake/external/boringssl.cmake
+++ b/tensorflow/contrib/cmake/external/boringssl.cmake
@@ -39,11 +39,7 @@ ExternalProject_Add(boringssl
     # BUILD_IN_SOURCE 1
     INSTALL_COMMAND ""
     CMAKE_CACHE_ARGS
-        if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
-        	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-        else()
-        	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
-        endif()
+        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=${tensorflow_ENABLE_POSITION_INDEPENDENT_CODE}
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
 )
diff --git a/tensorflow/contrib/cmake/external/double_conversion.cmake b/tensorflow/contrib/cmake/external/double_conversion.cmake
deleted file mode 100644
index 527ccdc8d887cb4c2e7d2412c99a8bc682568472..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/cmake/external/double_conversion.cmake
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-include (ExternalProject)
-
-set(double_conversion_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/double_conversion/src/double_conversion)
-set(double_conversion_URL https://github.com/google/double-conversion.git)
-set(double_conversion_TAG 5664746)
-set(double_conversion_BUILD ${double_conversion_INCLUDE_DIR})
-set(double_conversion_LIBRARIES ${double_conversion_BUILD}/double-conversion/libdouble-conversion.so)
-set(double_conversion_INCLUDES ${double_conversion_BUILD})
-
-if(WIN32)
-  set(double_conversion_STATIC_LIBRARIES ${double_conversion_BUILD}/double-conversion/$(Configuration)/double-conversion.lib)
-else()
-  set(double_conversion_STATIC_LIBRARIES ${double_conversion_BUILD}/double-conversion/libdouble-conversion.a)
-endif()
-
-set(double_conversion_HEADERS
-    "${double_conversion_INCLUDE_DIR}/double-conversion/bignum-dtoa.h"
-    "${double_conversion_INCLUDE_DIR}/double-conversion/cached-powers.h"
-    "${double_conversion_INCLUDE_DIR}/double-conversion/double-conversion.h"
-    "${double_conversion_INCLUDE_DIR}/double-conversion/fixed-dtoa.h"
-    "${double_conversion_INCLUDE_DIR}/double-conversion/strtod.h"
-    "${double_conversion_INCLUDE_DIR}/double-conversion/bignum.h"
-    "${double_conversion_INCLUDE_DIR}/double-conversion/diy-fp.h"
-    "${double_conversion_INCLUDE_DIR}/double-conversion/fast-dtoa.h"
-    "${double_conversion_INCLUDE_DIR}/double-conversion/ieee.h"
-    "${double_conversion_INCLUDE_DIR}/double-conversion/utils.h"
-)
-
-ExternalProject_Add(double_conversion
-    PREFIX double_conversion
-    GIT_REPOSITORY ${double_conversion_URL}
-    GIT_TAG ${double_conversion_TAG}
-    DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
-    BUILD_IN_SOURCE 1
-    INSTALL_COMMAND ""
-    CMAKE_CACHE_ARGS
-        -DCMAKE_BUILD_TYPE:STRING=Release
-        -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-)
diff --git a/tensorflow/contrib/cmake/external/gemmlowp.cmake b/tensorflow/contrib/cmake/external/gemmlowp.cmake
index 3b146657bfc9bdd54db14839195af45972e67aff..a235442dc5c0a07e249653381436eeae81575883 100644
--- a/tensorflow/contrib/cmake/external/gemmlowp.cmake
+++ b/tensorflow/contrib/cmake/external/gemmlowp.cmake
@@ -14,8 +14,8 @@
 # ==============================================================================
 include (ExternalProject)
 
-set(gemmlowp_URL https://mirror.bazel.build/github.com/google/gemmlowp/archive/010bb3e71a26ca1d0884a167081d092b43563996.zip)
-set(gemmlowp_HASH SHA256=dd2557072bde12141419cb8320a9c25e6ec41a8ae53c2ac78c076a347bb46d9d)
+set(gemmlowp_URL https://github.com/google/gemmlowp/archive/6a2a90822e8546fc2bfa7044de0faf1c1cb4862f.zip)
+set(gemmlowp_HASH SHA256=3447948d219f3270383766bbe08942888c0eb4e0ca6663c0e0548502ec5bb77d)
 set(gemmlowp_BUILD ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp)
 set(gemmlowp_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/gemmlowp/src/gemmlowp)
 
diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake
index 41ea0b48a4600d7ca2dd2f4a61c14ec0cc5b4734..28adb4fe84423bb5a21c78dac4e757505ce87d1d 100644
--- a/tensorflow/contrib/cmake/external/grpc.cmake
+++ b/tensorflow/contrib/cmake/external/grpc.cmake
@@ -17,7 +17,7 @@ include (ExternalProject)
 set(GRPC_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/include)
 set(GRPC_URL https://github.com/grpc/grpc.git)
 set(GRPC_BUILD ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc)
-set(GRPC_TAG 54e8f37e537794c2d814c1604c1282125f64f093)
+set(GRPC_TAG 730b778632e79cc3c96ad237f282d687ee325ce7)
 
 if(WIN32)
   set(grpc_STATIC_LIBRARIES
diff --git a/tensorflow/contrib/cmake/external/jsoncpp.cmake b/tensorflow/contrib/cmake/external/jsoncpp.cmake
index d2ae4c76e8cd175cdc3ba41fdf4e4009f8237309..861201f97edbce2d9d70a833ce5a8cad46f2470a 100644
--- a/tensorflow/contrib/cmake/external/jsoncpp.cmake
+++ b/tensorflow/contrib/cmake/external/jsoncpp.cmake
@@ -42,11 +42,7 @@ ExternalProject_Add(jsoncpp
     BUILD_IN_SOURCE 1
     INSTALL_COMMAND ""
     CMAKE_CACHE_ARGS
-  	  if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
-  	      -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-  	  else()
-   	    	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
-   	 endif()
+        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=${tensorflow_ENABLE_POSITION_INDEPENDENT_CODE}
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
 )
diff --git a/tensorflow/contrib/cmake/external/lmdb.cmake b/tensorflow/contrib/cmake/external/lmdb.cmake
index e41384f023ca9fc4cba697917b491af5a9db92bc..41b314e2857577581eb27eb6c6480b757d0b436c 100644
--- a/tensorflow/contrib/cmake/external/lmdb.cmake
+++ b/tensorflow/contrib/cmake/external/lmdb.cmake
@@ -29,11 +29,7 @@ ExternalProject_Add(lmdb
     INSTALL_DIR ${lmdb_INSTALL}
     DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
     CMAKE_CACHE_ARGS
-		if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
-			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-		else()
-			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
-		endif()
+        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=${tensorflow_ENABLE_POSITION_INDEPENDENT_CODE}
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
         -DCMAKE_INSTALL_PREFIX:STRING=${lmdb_INSTALL}
diff --git a/tensorflow/contrib/cmake/external/nsync.cmake b/tensorflow/contrib/cmake/external/nsync.cmake
index 155c91cb97dbe5ef33c318efb5544a9fa22166c7..05080060479b6240edb8ab9f65160b3dd182feb9 100644
--- a/tensorflow/contrib/cmake/external/nsync.cmake
+++ b/tensorflow/contrib/cmake/external/nsync.cmake
@@ -16,7 +16,7 @@ include (ExternalProject)
 
 set(nsync_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/nsync/public)
 set(nsync_URL https://github.com/google/nsync)
-set(nsync_TAG 93815892dddafe9146a5f7e7042281d59d0f4323)
+set(nsync_TAG 8502189abfa44c249c01c2cad64e6ed660a9a668)
 set(nsync_BUILD ${CMAKE_CURRENT_BINARY_DIR}/nsync/src/nsync)
 set(nsync_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/nsync/install)
 
diff --git a/tensorflow/contrib/cmake/external/png.cmake b/tensorflow/contrib/cmake/external/png.cmake
index aad6618f52f909096fd2388e867ef3a965d033cb..b277be5690387b06876ca89eb88becbf885486a4 100644
--- a/tensorflow/contrib/cmake/external/png.cmake
+++ b/tensorflow/contrib/cmake/external/png.cmake
@@ -41,11 +41,7 @@ ExternalProject_Add(png
     INSTALL_DIR ${png_INSTALL}
     DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
     CMAKE_CACHE_ARGS
-		if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
-			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-		else()
-			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
-		endif()
+        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=${tensorflow_ENABLE_POSITION_INDEPENDENT_CODE}
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
         -DCMAKE_INSTALL_PREFIX:STRING=${png_INSTALL}
diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake
index b53857a47bfbf797af02fe7f69474263119161cd..aedb793d2aef4bf6950cd074cd065909667eaf75 100644
--- a/tensorflow/contrib/cmake/external/protobuf.cmake
+++ b/tensorflow/contrib/cmake/external/protobuf.cmake
@@ -44,11 +44,7 @@ ExternalProject_Add(protobuf
         ${PROTOBUF_ADDITIONAL_CMAKE_OPTIONS}
     INSTALL_COMMAND ""
     CMAKE_CACHE_ARGS
-		if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
-			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-		else()
-			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
-		endif()
+        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=${tensorflow_ENABLE_POSITION_INDEPENDENT_CODE}
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
         -DZLIB_ROOT:STRING=${ZLIB_INSTALL}
diff --git a/tensorflow/contrib/cmake/external/re2.cmake b/tensorflow/contrib/cmake/external/re2.cmake
index d10f5959f71dd350e6e2bcb81be8882b203fb231..371d8447f93735e7af2a5a2b16f128a47b5a082a 100644
--- a/tensorflow/contrib/cmake/external/re2.cmake
+++ b/tensorflow/contrib/cmake/external/re2.cmake
@@ -38,11 +38,7 @@ ExternalProject_Add(re2
     BUILD_IN_SOURCE 1
     DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
     CMAKE_CACHE_ARGS
-		if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
-			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-		else()
-			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
-		endif()
+        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=${tensorflow_ENABLE_POSITION_INDEPENDENT_CODE}
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_INSTALL_PREFIX:STRING=${re2_INSTALL}
         -DRE2_BUILD_TESTING:BOOL=OFF
diff --git a/tensorflow/contrib/cmake/external/snappy.cmake b/tensorflow/contrib/cmake/external/snappy.cmake
index 926c271fd9ea6e2a30251aa408bd49859ae95070..013b3a862f13fd9017fade500d391ecc2bd27fae 100644
--- a/tensorflow/contrib/cmake/external/snappy.cmake
+++ b/tensorflow/contrib/cmake/external/snappy.cmake
@@ -40,11 +40,7 @@ ExternalProject_Add(snappy
     LOG_CONFIGURE ON
     LOG_BUILD ON
     CMAKE_CACHE_ARGS
-		if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
-			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-		else()
-			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
-		endif()
+        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=${tensorflow_ENABLE_POSITION_INDEPENDENT_CODE}
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
         -DSNAPPY_BUILD_TESTS:BOOL=OFF
diff --git a/tensorflow/contrib/cmake/external/sqlite.cmake b/tensorflow/contrib/cmake/external/sqlite.cmake
index 785039a46983747557607562675349c150e064ad..8297c60712c49ed6f47a9750691eee1325a5b55e 100644
--- a/tensorflow/contrib/cmake/external/sqlite.cmake
+++ b/tensorflow/contrib/cmake/external/sqlite.cmake
@@ -28,6 +28,7 @@ endif()
 
 set(sqlite_HEADERS
     "${sqlite_BUILD}/sqlite3.h"
+    "${sqlite_BUILD}/sqlite3ext.h"
 )
 
 if (WIN32)
@@ -53,11 +54,7 @@ else()
         INSTALL_DIR ${sqlite_INSTALL}
         DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
         CMAKE_CACHE_ARGS
-			if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
-				-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-			else()
-				-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
-			endif()
+            -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=${tensorflow_ENABLE_POSITION_INDEPENDENT_CODE}
             -DCMAKE_BUILD_TYPE:STRING=Release
             -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
             -DCMAKE_INSTALL_PREFIX:STRING=${sqlite_INSTALL}
diff --git a/tensorflow/contrib/cmake/external/zlib.cmake b/tensorflow/contrib/cmake/external/zlib.cmake
index f10f84336e8b1c0a2c7de7ea1f8b8af7c21f8b51..5bec14fb00a50f6e6e8c7d8b703bde681e9d02ae 100644
--- a/tensorflow/contrib/cmake/external/zlib.cmake
+++ b/tensorflow/contrib/cmake/external/zlib.cmake
@@ -42,11 +42,7 @@ ExternalProject_Add(zlib
     BUILD_IN_SOURCE 1
     DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
     CMAKE_CACHE_ARGS
-		if(tensorflow_ENABLE_POSITION_INDEPENDENT_CODE)
-			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-		else()
-			-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=OFF
-		endif()
+        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=${tensorflow_ENABLE_POSITION_INDEPENDENT_CODE}
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_INSTALL_PREFIX:STRING=${ZLIB_INSTALL}
 )
diff --git a/tensorflow/tools/ci_build/install/install_cmake_for_clang.sh b/tensorflow/contrib/cmake/make.sh
similarity index 83%
rename from tensorflow/tools/ci_build/install/install_cmake_for_clang.sh
rename to tensorflow/contrib/cmake/make.sh
index 3e626a69ab5e6b7f8d1b4997b459301606501a8e..eed3c34aba1f0326ec741169a187eb2982f253a3 100755
--- a/tensorflow/tools/ci_build/install/install_cmake_for_clang.sh
+++ b/tensorflow/contrib/cmake/make.sh
@@ -14,6 +14,13 @@
 # limitations under the License.
 # ==============================================================================
 
-CMAKE_URL="https://cmake.org/files/v3.7/cmake-3.7.2-Linux-x86_64.tar.gz"
+(
+cd "$(dirname "$0")"
+mkdir -p _build
 
-wget -O - "${CMAKE_URL}" | tar xzf - -C /usr/local --strip-components=1
+(
+cd _build
+rm -rf -- *
+cmake ..
+)
+)
diff --git a/tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt b/tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt
index 594c2492d4fd68b50c8493321a2c4dcc2d41917e..aaae18a313dd082b428654091c9411600c981ec9 100644
--- a/tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt
@@ -158,12 +158,21 @@ if (NOT "${NSYNC_LANGUAGE}X" STREQUAL "c++11X")
   elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "NetBSDX")
     include_directories ("${PROJECT_SOURCE_DIR}/platform/netbsd")
     set (NSYNC_POSIX ON)
+    set (NSYNC_OS_EXTRA_SRC
+      "platform/posix/src/nsync_semaphore_mutex.c"
+    )
   elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "FreeBSDX")
     include_directories ("${PROJECT_SOURCE_DIR}/platform/freebsd")
     set (NSYNC_POSIX ON)
+    set (NSYNC_OS_EXTRA_SRC
+      "platform/posix/src/nsync_semaphore_mutex.c"
+    )
   elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "OpenBSDX")
     include_directories ("${PROJECT_SOURCE_DIR}/platform/openbsd")
     set (NSYNC_POSIX ON)
+    set (NSYNC_OS_EXTRA_SRC
+      "platform/posix/src/nsync_semaphore_mutex.c"
+    )
   endif ()
 endif ()
 
diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e37d059a84cb3d75cebf2473e7880f6d6cb20a69
--- /dev/null
+++ b/tensorflow/contrib/cmake/python_modules.txt
@@ -0,0 +1,440 @@
+tensorflow
+tensorflow/core
+tensorflow/core/example
+tensorflow/core/framework
+tensorflow/core/lib
+tensorflow/core/lib/core
+tensorflow/core/protobuf
+tensorflow/core/util
+tensorflow/examples
+tensorflow/examples/tutorials
+tensorflow/examples/tutorials/mnist
+tensorflow/python
+tensorflow/python/client
+tensorflow/python/data
+tensorflow/python/data/ops
+tensorflow/python/data/util
+tensorflow/python/debug
+tensorflow/python/debug/cli
+tensorflow/python/debug/examples
+tensorflow/python/debug/lib
+tensorflow/python/debug/wrappers
+tensorflow/python/eager
+tensorflow/python/estimator
+tensorflow/python/estimator/canned
+tensorflow/python/estimator/export
+tensorflow/python/estimator/inputs
+tensorflow/python/estimator/inputs/queues
+tensorflow/python/feature_column
+tensorflow/python/framework
+tensorflow/python/grappler
+tensorflow/python/keras
+tensorflow/python/keras/activations
+tensorflow/python/keras/applications
+tensorflow/python/keras/applications/inception_resnet_v2
+tensorflow/python/keras/applications/inception_v3
+tensorflow/python/keras/applications/mobilenet
+tensorflow/python/keras/applications/resnet50
+tensorflow/python/keras/applications/vgg16
+tensorflow/python/keras/applications/vgg19
+tensorflow/python/keras/applications/xception
+tensorflow/python/keras/backend
+tensorflow/python/keras/callbacks
+tensorflow/python/keras/constraints
+tensorflow/python/keras/datasets
+tensorflow/python/keras/datasets/boston_housing
+tensorflow/python/keras/datasets/cifar10
+tensorflow/python/keras/datasets/cifar100
+tensorflow/python/keras/datasets/fashion_mnist
+tensorflow/python/keras/datasets/imdb
+tensorflow/python/keras/datasets/mnist
+tensorflow/python/keras/datasets/reuters
+tensorflow/python/keras/estimator
+tensorflow/python/keras/initializers
+tensorflow/python/keras/layers
+tensorflow/python/keras/losses
+tensorflow/python/keras/metrics
+tensorflow/python/keras/models
+tensorflow/python/keras/optimizers
+tensorflow/python/keras/preprocessing
+tensorflow/python/keras/preprocessing/image
+tensorflow/python/keras/preprocessing/sequence
+tensorflow/python/keras/preprocessing/text
+tensorflow/python/keras/regularizers
+tensorflow/python/keras/utils
+tensorflow/python/keras/wrappers
+tensorflow/python/keras/wrappers/scikit_learn
+tensorflow/python/keras/_impl
+tensorflow/python/keras/_impl/keras
+tensorflow/python/keras/_impl/keras/applications
+tensorflow/python/keras/_impl/keras/datasets
+tensorflow/python/keras/_impl/keras/engine
+tensorflow/python/keras/_impl/keras/layers
+tensorflow/python/keras/_impl/keras/preprocessing
+tensorflow/python/keras/_impl/keras/utils
+tensorflow/python/keras/_impl/keras/wrappers
+tensorflow/python/kernel_tests
+tensorflow/python/kernel_tests/distributions
+tensorflow/python/kernel_tests/linalg
+tensorflow/python/kernel_tests/random
+tensorflow/python/layers
+tensorflow/python/lib
+tensorflow/python/lib/core
+tensorflow/python/lib/io
+tensorflow/python/ops
+tensorflow/python/ops/distributions
+tensorflow/python/ops/linalg
+tensorflow/python/ops/losses
+tensorflow/python/platform
+tensorflow/python/profiler
+tensorflow/python/profiler/internal
+tensorflow/python/saved_model
+tensorflow/python/summary
+tensorflow/python/summary/writer
+tensorflow/python/tools
+tensorflow/python/training
+tensorflow/python/user_ops
+tensorflow/python/util
+tensorflow/python/util/protobuf
+tensorflow/tools
+tensorflow/tools/graph_transforms
+tensorflow/contrib
+tensorflow/contrib/all_reduce
+tensorflow/contrib/all_reduce/python
+tensorflow/contrib/android
+tensorflow/contrib/android/java
+tensorflow/contrib/android/java/org
+tensorflow/contrib/android/java/org/tensorflow
+tensorflow/contrib/android/java/org/tensorflow/contrib
+tensorflow/contrib/android/java/org/tensorflow/contrib/android
+tensorflow/contrib/android/jni
+tensorflow/contrib/batching
+tensorflow/contrib/batching/kernels
+tensorflow/contrib/batching/python
+tensorflow/contrib/batching/python/ops
+tensorflow/contrib/bayesflow
+tensorflow/contrib/bayesflow/python
+tensorflow/contrib/bayesflow/python/ops
+tensorflow/contrib/boosted_trees
+tensorflow/contrib/boosted_trees/estimator_batch
+tensorflow/contrib/boosted_trees/kernels
+tensorflow/contrib/boosted_trees/ops
+tensorflow/contrib/boosted_trees/proto
+tensorflow/contrib/boosted_trees/python
+tensorflow/contrib/boosted_trees/python/ops
+tensorflow/contrib/cloud
+tensorflow/contrib/cloud/kernels
+tensorflow/contrib/cloud/ops
+tensorflow/contrib/cloud/python
+tensorflow/contrib/cloud/python/ops
+tensorflow/contrib/cluster_resolver
+tensorflow/contrib/cluster_resolver/python
+tensorflow/contrib/cluster_resolver/python/training
+tensorflow/contrib/coder
+tensorflow/contrib/coder/kernels
+tensorflow/contrib/coder/ops
+tensorflow/contrib/coder/python
+tensorflow/contrib/coder/python/ops
+tensorflow/contrib/compiler
+tensorflow/contrib/copy_graph
+tensorflow/contrib/copy_graph/python
+tensorflow/contrib/copy_graph/python/util
+tensorflow/contrib/crf
+tensorflow/contrib/crf/python
+tensorflow/contrib/crf/python/ops
+tensorflow/contrib/cudnn_rnn
+tensorflow/contrib/cudnn_rnn/kernels
+tensorflow/contrib/cudnn_rnn/ops
+tensorflow/contrib/cudnn_rnn/python
+tensorflow/contrib/cudnn_rnn/python/layers
+tensorflow/contrib/cudnn_rnn/python/ops
+tensorflow/contrib/data
+tensorflow/contrib/data/kernels
+tensorflow/contrib/data/python
+tensorflow/contrib/data/python/kernel_tests
+tensorflow/contrib/data/python/ops
+tensorflow/contrib/decision_trees
+tensorflow/contrib/decision_trees/proto
+tensorflow/contrib/deprecated
+tensorflow/contrib/distributions
+tensorflow/contrib/distributions/python
+tensorflow/contrib/distributions/python/ops
+tensorflow/contrib/distributions/python/ops/bijectors
+tensorflow/contrib/eager
+tensorflow/contrib/eager/python
+tensorflow/contrib/estimator
+tensorflow/contrib/estimator/python
+tensorflow/contrib/estimator/python/estimator
+tensorflow/contrib/factorization
+tensorflow/contrib/factorization/examples
+tensorflow/contrib/factorization/kernels
+tensorflow/contrib/factorization/ops
+tensorflow/contrib/factorization/python
+tensorflow/contrib/factorization/python/ops
+tensorflow/contrib/ffmpeg
+tensorflow/contrib/ffmpeg/default
+tensorflow/contrib/framework
+tensorflow/contrib/framework/kernels
+tensorflow/contrib/framework/ops
+tensorflow/contrib/framework/python
+tensorflow/contrib/framework/python/framework
+tensorflow/contrib/framework/python/ops
+tensorflow/contrib/fused_conv
+tensorflow/contrib/fused_conv/kernels
+tensorflow/contrib/fused_conv/python
+tensorflow/contrib/fused_conv/python/ops
+tensorflow/contrib/gan
+tensorflow/contrib/gan/python
+tensorflow/contrib/gan/python/estimator
+tensorflow/contrib/gan/python/estimator/python
+tensorflow/contrib/gan/python/eval
+tensorflow/contrib/gan/python/eval/python
+tensorflow/contrib/gan/python/features
+tensorflow/contrib/gan/python/features/python
+tensorflow/contrib/gan/python/losses
+tensorflow/contrib/gan/python/losses/python
+tensorflow/contrib/graph_editor
+tensorflow/contrib/graph_editor/examples
+tensorflow/contrib/grid_rnn
+tensorflow/contrib/grid_rnn/python
+tensorflow/contrib/grid_rnn/python/ops
+tensorflow/contrib/hooks
+tensorflow/contrib/hooks/python
+tensorflow/contrib/image
+tensorflow/contrib/image/kernels
+tensorflow/contrib/image/ops
+tensorflow/contrib/image/python
+tensorflow/contrib/image/python/ops
+tensorflow/contrib/input_pipeline
+tensorflow/contrib/input_pipeline/kernels
+tensorflow/contrib/input_pipeline/ops
+tensorflow/contrib/input_pipeline/python
+tensorflow/contrib/input_pipeline/python/ops
+tensorflow/contrib/integrate
+tensorflow/contrib/integrate/python
+tensorflow/contrib/integrate/python/ops
+tensorflow/contrib/keras
+tensorflow/contrib/keras/api
+tensorflow/contrib/keras/api/keras
+tensorflow/contrib/keras/api/keras/activations
+tensorflow/contrib/keras/api/keras/applications
+tensorflow/contrib/keras/api/keras/applications/inception_v3
+tensorflow/contrib/keras/api/keras/applications/mobilenet
+tensorflow/contrib/keras/api/keras/applications/resnet50
+tensorflow/contrib/keras/api/keras/applications/vgg16
+tensorflow/contrib/keras/api/keras/applications/vgg19
+tensorflow/contrib/keras/api/keras/applications/xception
+tensorflow/contrib/keras/api/keras/backend
+tensorflow/contrib/keras/api/keras/callbacks
+tensorflow/contrib/keras/api/keras/constraints
+tensorflow/contrib/keras/api/keras/datasets
+tensorflow/contrib/keras/api/keras/datasets/boston_housing
+tensorflow/contrib/keras/api/keras/datasets/cifar10
+tensorflow/contrib/keras/api/keras/datasets/cifar100
+tensorflow/contrib/keras/api/keras/datasets/imdb
+tensorflow/contrib/keras/api/keras/datasets/mnist
+tensorflow/contrib/keras/api/keras/datasets/reuters
+tensorflow/contrib/keras/api/keras/initializers
+tensorflow/contrib/keras/api/keras/layers
+tensorflow/contrib/keras/api/keras/losses
+tensorflow/contrib/keras/api/keras/metrics
+tensorflow/contrib/keras/api/keras/models
+tensorflow/contrib/keras/api/keras/optimizers
+tensorflow/contrib/keras/api/keras/preprocessing
+tensorflow/contrib/keras/api/keras/preprocessing/image
+tensorflow/contrib/keras/api/keras/preprocessing/sequence
+tensorflow/contrib/keras/api/keras/preprocessing/text
+tensorflow/contrib/keras/api/keras/regularizers
+tensorflow/contrib/keras/api/keras/utils
+tensorflow/contrib/keras/api/keras/wrappers
+tensorflow/contrib/keras/api/keras/wrappers/scikit_learn
+tensorflow/contrib/kernel_methods
+tensorflow/contrib/kernel_methods/python
+tensorflow/contrib/kernel_methods/python/mappers
+tensorflow/contrib/kfac
+tensorflow/contrib/kfac/examples
+tensorflow/contrib/kfac/python
+tensorflow/contrib/kfac/python/ops
+tensorflow/contrib/labeled_tensor
+tensorflow/contrib/labeled_tensor/python
+tensorflow/contrib/labeled_tensor/python/ops
+tensorflow/contrib/layers
+tensorflow/contrib/layers/kernels
+tensorflow/contrib/layers/ops
+tensorflow/contrib/layers/python
+tensorflow/contrib/layers/python/layers
+tensorflow/contrib/layers/python/ops
+tensorflow/contrib/learn
+tensorflow/contrib/learn/python
+tensorflow/contrib/learn/python/learn
+tensorflow/contrib/learn/python/learn/datasets
+tensorflow/contrib/learn/python/learn/datasets/data
+tensorflow/contrib/learn/python/learn/estimators
+tensorflow/contrib/learn/python/learn/learn_io
+tensorflow/contrib/learn/python/learn/ops
+tensorflow/contrib/learn/python/learn/preprocessing
+tensorflow/contrib/learn/python/learn/utils
+tensorflow/contrib/legacy_seq2seq
+tensorflow/contrib/legacy_seq2seq/python
+tensorflow/contrib/legacy_seq2seq/python/ops
+tensorflow/contrib/libsvm
+tensorflow/contrib/libsvm/python
+tensorflow/contrib/libsvm/python/kernel_tests
+tensorflow/contrib/libsvm/python/ops
+tensorflow/contrib/linalg
+tensorflow/contrib/linalg/python
+tensorflow/contrib/linalg/python/ops
+tensorflow/contrib/linear_optimizer
+tensorflow/contrib/linear_optimizer/kernels
+tensorflow/contrib/linear_optimizer/kernels/g3doc
+tensorflow/contrib/linear_optimizer/python
+tensorflow/contrib/linear_optimizer/python/ops
+# TODO(drpngx): Fix failing imports
+# tensorflow/contrib/lite/python
+# tensorflow/contrib/lite/toco/python
+tensorflow/contrib/lookup
+tensorflow/contrib/losses
+tensorflow/contrib/losses/python
+tensorflow/contrib/losses/python/losses
+tensorflow/contrib/losses/python/metric_learning
+tensorflow/contrib/makefile
+tensorflow/contrib/memory_stats
+tensorflow/contrib/memory_stats/kernels
+tensorflow/contrib/memory_stats/ops
+tensorflow/contrib/memory_stats/python
+tensorflow/contrib/memory_stats/python/ops
+tensorflow/contrib/meta_graph_transform
+tensorflow/contrib/metrics
+tensorflow/contrib/metrics/python
+tensorflow/contrib/metrics/python/metrics
+tensorflow/contrib/metrics/python/ops
+tensorflow/contrib/model_pruning
+tensorflow/contrib/model_pruning/examples
+tensorflow/contrib/model_pruning/examples/cifar10
+tensorflow/contrib/model_pruning/python
+tensorflow/contrib/model_pruning/python/layers
+tensorflow/contrib/nccl
+tensorflow/contrib/nccl/kernels
+tensorflow/contrib/nccl/ops
+tensorflow/contrib/nccl/python
+tensorflow/contrib/nccl/python/ops
+tensorflow/contrib/ndlstm
+tensorflow/contrib/ndlstm/python
+tensorflow/contrib/nearest_neighbor/kernels
+tensorflow/contrib/nearest_neighbor/ops
+tensorflow/contrib/nearest_neighbor/python
+tensorflow/contrib/nearest_neighbor/python/ops
+tensorflow/contrib/nn
+tensorflow/contrib/nn/python
+tensorflow/contrib/nn/python/ops
+tensorflow/contrib/opt
+tensorflow/contrib/opt/python
+tensorflow/contrib/opt/python/training
+tensorflow/contrib/pi_examples
+tensorflow/contrib/pi_examples/camera
+tensorflow/contrib/pi_examples/label_image
+tensorflow/contrib/pi_examples/label_image/data
+tensorflow/contrib/periodic_resample
+tensorflow/contrib/periodic_resample/python
+tensorflow/contrib/periodic_resample/python/ops
+tensorflow/contrib/predictor
+tensorflow/contrib/quantization
+tensorflow/contrib/quantization/python
+tensorflow/contrib/quantize
+tensorflow/contrib/quantize/python
+tensorflow/contrib/receptive_field
+tensorflow/contrib/receptive_field/python
+tensorflow/contrib/receptive_field/python/util
+tensorflow/contrib/receptive_field/python/util/examples
+tensorflow/contrib/reduce_slice_ops
+tensorflow/contrib/reduce_slice_ops/kernels
+tensorflow/contrib/reduce_slice_ops/ops
+tensorflow/contrib/reduce_slice_ops/python
+tensorflow/contrib/reduce_slice_ops/python/ops
+tensorflow/contrib/remote_fused_graph/pylib
+tensorflow/contrib/remote_fused_graph/pylib/python
+tensorflow/contrib/remote_fused_graph/pylib/python/ops
+tensorflow/contrib/resampler
+tensorflow/contrib/resampler/kernels
+tensorflow/contrib/resampler/ops
+tensorflow/contrib/resampler/python
+tensorflow/contrib/resampler/python/ops
+tensorflow/contrib/rnn
+tensorflow/contrib/rnn/kernels
+tensorflow/contrib/rnn/ops
+tensorflow/contrib/rnn/python
+tensorflow/contrib/rnn/python/kernel_tests
+tensorflow/contrib/rnn/python/ops
+tensorflow/contrib/saved_model
+tensorflow/contrib/saved_model/python
+tensorflow/contrib/saved_model/python/saved_model
+tensorflow/contrib/seq2seq
+tensorflow/contrib/seq2seq/kernels
+tensorflow/contrib/seq2seq/ops
+tensorflow/contrib/seq2seq/python
+tensorflow/contrib/seq2seq/python/ops
+tensorflow/contrib/session_bundle
+tensorflow/contrib/session_bundle/example
+tensorflow/contrib/signal
+tensorflow/contrib/signal/python
+tensorflow/contrib/signal/python/ops
+tensorflow/contrib/slim
+tensorflow/contrib/slim/python
+tensorflow/contrib/slim/python/slim
+tensorflow/contrib/slim/python/slim/data
+tensorflow/contrib/slim/python/slim/nets
+tensorflow/contrib/solvers
+tensorflow/contrib/solvers/python
+tensorflow/contrib/solvers/python/ops
+tensorflow/contrib/sparsemax
+tensorflow/contrib/sparsemax/python
+tensorflow/contrib/sparsemax/python/ops
+tensorflow/contrib/specs
+tensorflow/contrib/specs/python
+tensorflow/contrib/staging
+tensorflow/contrib/stat_summarizer
+tensorflow/contrib/stat_summarizer/python
+tensorflow/contrib/stateless
+tensorflow/contrib/stateless/python
+tensorflow/contrib/summary
+tensorflow/contrib/tensorboard
+tensorflow/contrib/tensorboard/plugins
+tensorflow/contrib/tensorboard/plugins/projector
+tensorflow/contrib/tensor_forest
+tensorflow/contrib/tensor_forest/client
+tensorflow/contrib/tensor_forest/hybrid
+tensorflow/contrib/tensor_forest/hybrid/core
+tensorflow/contrib/tensor_forest/hybrid/core/ops
+tensorflow/contrib/tensor_forest/hybrid/python
+tensorflow/contrib/tensor_forest/hybrid/python/layers
+tensorflow/contrib/tensor_forest/hybrid/python/models
+tensorflow/contrib/tensor_forest/hybrid/python/ops
+tensorflow/contrib/tensor_forest/kernels
+tensorflow/contrib/tensor_forest/python
+tensorflow/contrib/tensor_forest/python/ops
+tensorflow/contrib/testing
+tensorflow/contrib/testing/python
+tensorflow/contrib/testing/python/framework
+tensorflow/contrib/text
+tensorflow/contrib/text/kernels
+tensorflow/contrib/text/ops
+tensorflow/contrib/text/python
+tensorflow/contrib/text/python/ops
+tensorflow/contrib/tfprof
+tensorflow/contrib/timeseries
+tensorflow/contrib/timeseries/examples
+tensorflow/contrib/timeseries/examples/data
+tensorflow/contrib/timeseries/python
+tensorflow/contrib/timeseries/python/timeseries
+tensorflow/contrib/timeseries/python/timeseries/state_space_models
+tensorflow/contrib/tpu
+tensorflow/contrib/tpu/ops
+tensorflow/contrib/tpu/profiler
+tensorflow/contrib/tpu/python
+tensorflow/contrib/tpu/python/ops
+tensorflow/contrib/tpu/python/profiler
+tensorflow/contrib/tpu/python/tpu
+tensorflow/contrib/training
+tensorflow/contrib/training/python
+tensorflow/contrib/training/python/training
+tensorflow/contrib/util
diff --git a/tensorflow/contrib/cmake/python_protos.txt b/tensorflow/contrib/cmake/python_protos.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a9c406d8b118c10ddcaafb0e4fc242aa79cdb57
--- /dev/null
+++ b/tensorflow/contrib/cmake/python_protos.txt
@@ -0,0 +1,19 @@
+tensorflow/core
+tensorflow/core/profiler
+tensorflow/python
+tensorflow/contrib/boosted_trees/proto
+tensorflow/contrib/cloud/kernels
+tensorflow/contrib/decision_trees/proto
+tensorflow/contrib/gdr
+tensorflow/contrib/lite/toco
+tensorflow/contrib/mpi
+tensorflow/contrib/mpi_collectives
+tensorflow/contrib/session_bundle
+tensorflow/contrib/tensor_forest/proto
+tensorflow/contrib/tensorboard/graph_explorer/proto
+tensorflow/contrib/tensorboard/plugins/projector
+tensorflow/contrib/tensorboard/plugins/trace
+tensorflow/contrib/tpu/proto
+tensorflow/contrib/tpu/profiler
+tensorflow/contrib/training/python/training
+tensorflow/contrib/verbs
diff --git a/tensorflow/contrib/cmake/python_protos_cc.txt b/tensorflow/contrib/cmake/python_protos_cc.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d4a257b25c814a1464308d0e6ce3ce65d21f6a36
--- /dev/null
+++ b/tensorflow/contrib/cmake/python_protos_cc.txt
@@ -0,0 +1,5 @@
+tensorflow/core/profiler
+tensorflow/python
+tensorflow/contrib/session_bundle
+tensorflow/contrib/tensorboard
+tensorflow/contrib/training
diff --git a/tensorflow/contrib/cmake/tf_cc_ops.cmake b/tensorflow/contrib/cmake/tf_cc_ops.cmake
index 6e2ac203f9a7f96cb14752a91483840a9eb6b451..f3cf3e70441de67ef79bc9cedf85549315170c29 100644
--- a/tensorflow/contrib/cmake/tf_cc_ops.cmake
+++ b/tensorflow/contrib/cmake/tf_cc_ops.cmake
@@ -83,7 +83,7 @@ foreach(tf_cc_op_lib_name ${tf_cc_op_lib_names})
                ${cc_ops_target_dir}/${tf_cc_op_lib_name}.cc
                ${cc_ops_target_dir}/${tf_cc_op_lib_name}_internal.h
                ${cc_ops_target_dir}/${tf_cc_op_lib_name}_internal.cc
-        COMMAND ${tf_cc_op_lib_name}_gen_cc ${cc_ops_target_dir}/${tf_cc_op_lib_name}.h ${cc_ops_target_dir}/${tf_cc_op_lib_name}.cc ${tensorflow_source_dir}/tensorflow/cc/ops/op_gen_overrides.pbtxt ${cc_ops_include_internal} ${tensorflow_source_dir}/tensorflow/core/api_def/base_api
+        COMMAND ${tf_cc_op_lib_name}_gen_cc ${cc_ops_target_dir}/${tf_cc_op_lib_name}.h ${cc_ops_target_dir}/${tf_cc_op_lib_name}.cc ${cc_ops_include_internal} ${tensorflow_source_dir}/tensorflow/core/api_def/base_api
         DEPENDS ${tf_cc_op_lib_name}_gen_cc create_cc_ops_header_dir
     )
 
diff --git a/tensorflow/contrib/cmake/tf_core_cpu.cmake b/tensorflow/contrib/cmake/tf_core_cpu.cmake
index 5c01ca382fb9cc7a01a6f2b60a510c59f0aa7119..e4213ea2a47da2a7381cccd0504235ad62018d4e 100644
--- a/tensorflow/contrib/cmake/tf_core_cpu.cmake
+++ b/tensorflow/contrib/cmake/tf_core_cpu.cmake
@@ -63,7 +63,7 @@ if (tensorflow_ENABLE_GPU)
   file(GLOB_RECURSE tf_core_gpu_srcs
     "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/*.cc"
     "${tensorflow_source_dir}/tensorflow/core/platform/default/gpu/cupti_wrapper.cc"
-    "${tensorflow_source_dir}/tensorflow/core/platform/default/gpu_tracer.cc"
+    "${tensorflow_source_dir}/tensorflow/core/platform/default/device_tracer.cc"
     "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu_device_factory.cc"
     "${tensorflow_source_dir}/tensorflow/core/grappler/devices.h"
     "${tensorflow_source_dir}/tensorflow/core/grappler/devices.cc"
diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake
index c607546f4a5244fb6e7cd12db874f07a962f6f4d..24d7fb82a268623be06c2b98b5857b6b9b95c3a1 100644
--- a/tensorflow/contrib/cmake/tf_core_framework.cmake
+++ b/tensorflow/contrib/cmake/tf_core_framework.cmake
@@ -191,10 +191,6 @@ file(GLOB_RECURSE tf_core_lib_srcs
     "${tensorflow_source_dir}/tensorflow/core/lib/*.h"
     "${tensorflow_source_dir}/tensorflow/core/lib/*.cc"
     "${tensorflow_source_dir}/tensorflow/core/public/*.h"
-    # TODO(@jart): Move StatusOr into core.
-    "${tensorflow_source_dir}/tensorflow/compiler/xla/statusor.cc"
-    "${tensorflow_source_dir}/tensorflow/compiler/xla/statusor.h"
-    "${tensorflow_source_dir}/tensorflow/compiler/xla/statusor_internals.h"
 )
 
 file(GLOB tf_core_platform_srcs
@@ -211,7 +207,7 @@ if (NOT tensorflow_ENABLE_GPU)
   list(REMOVE_ITEM tf_core_platform_srcs ${tf_core_platform_gpu_srcs})
 else()
   file(GLOB tf_core_platform_srcs_exclude
-      "${tensorflow_source_dir}/tensorflow/core/platform/default/gpu_tracer.cc")
+      "${tensorflow_source_dir}/tensorflow/core/platform/default/device_tracer.cc")
   list(REMOVE_ITEM tf_core_platform_srcs ${tf_core_platform_srcs_exclude})
 endif()
 
@@ -317,8 +313,15 @@ file(GLOB_RECURSE tf_core_framework_exclude_srcs
     "${tensorflow_source_dir}/tensorflow/core/util/*test*.cc"
     "${tensorflow_source_dir}/tensorflow/core/util/*main.cc"
     "${tensorflow_source_dir}/tensorflow/contrib/tensorboard/db/*test*.cc"
+    "${tensorflow_source_dir}/tensorflow/contrib/tensorboard/db/loader.cc"
+    "${tensorflow_source_dir}/tensorflow/contrib/tensorboard/db/vacuum.cc"
 )
 
+# TODO(jart): Why doesn't this work?
+# set_source_files_properties(
+#     ${tensorflow_source_dir}/tensorflow/contrib/tensorboard/db/snapfn.cc
+#     PROPERTIES COMPILE_FLAGS -DSQLITE_OMIT_LOAD_EXTENSION)
+
 list(REMOVE_ITEM tf_core_framework_srcs ${tf_core_framework_exclude_srcs})
 
 add_library(tf_core_framework OBJECT
diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake
index 2d015908a890fd7757bf212573f4ebce8ba8b30d..6927bf03f08b68a1f13f6a0978af629af45575e8 100644
--- a/tensorflow/contrib/cmake/tf_core_kernels.cmake
+++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake
@@ -63,6 +63,10 @@ if(tensorflow_BUILD_CONTRIB_KERNELS)
       "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/ops/split_handler_ops.cc"
       "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/ops/stats_accumulator_ops.cc"
       "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/ops/training_ops.cc"
+      "${tensorflow_source_dir}/tensorflow/contrib/coder/kernels/range_coder.cc"
+      "${tensorflow_source_dir}/tensorflow/contrib/coder/kernels/range_coder_ops.cc"
+      "${tensorflow_source_dir}/tensorflow/contrib/coder/kernels/range_coder_ops_util.cc"
+      "${tensorflow_source_dir}/tensorflow/contrib/coder/ops/coder_ops.cc"
       "${tensorflow_source_dir}/tensorflow/contrib/cudnn_rnn/kernels/cudnn_rnn_ops.cc"
       "${tensorflow_source_dir}/tensorflow/contrib/cudnn_rnn/ops/cudnn_rnn_ops.cc"
       "${tensorflow_source_dir}/tensorflow/contrib/data/kernels/prefetching_kernels.cc"
@@ -79,12 +83,15 @@ if(tensorflow_BUILD_CONTRIB_KERNELS)
       "${tensorflow_source_dir}/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op.cc"
       "${tensorflow_source_dir}/tensorflow/contrib/image/kernels/bipartite_match_op.cc"
       "${tensorflow_source_dir}/tensorflow/contrib/image/kernels/image_ops.cc"
+      "${tensorflow_source_dir}/tensorflow/contrib/image/kernels/segmentation_ops.cc"
       "${tensorflow_source_dir}/tensorflow/contrib/image/kernels/single_image_random_dot_stereograms_ops.cc"
       "${tensorflow_source_dir}/tensorflow/contrib/image/ops/distort_image_ops.cc"
       "${tensorflow_source_dir}/tensorflow/contrib/image/ops/image_ops.cc"
       "${tensorflow_source_dir}/tensorflow/contrib/image/ops/single_image_random_dot_stereograms_ops.cc"
       "${tensorflow_source_dir}/tensorflow/contrib/layers/kernels/sparse_feature_cross_kernel.cc"
       "${tensorflow_source_dir}/tensorflow/contrib/layers/ops/sparse_feature_cross_op.cc"
+      "${tensorflow_source_dir}/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc"
+      "${tensorflow_source_dir}/tensorflow/contrib/libsvm/ops/libsvm_ops.cc"
       "${tensorflow_source_dir}/tensorflow/contrib/nccl/kernels/nccl_manager.cc"
       "${tensorflow_source_dir}/tensorflow/contrib/nccl/kernels/nccl_ops.cc"
       "${tensorflow_source_dir}/tensorflow/contrib/nccl/ops/nccl_ops.cc"
@@ -150,9 +157,6 @@ list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_exclude_srcs})
 if(WIN32)
   file(GLOB_RECURSE tf_core_kernels_windows_exclude_srcs
       # not working on windows yet
-      "${tensorflow_source_dir}/tensorflow/core/kernels/meta_support.*"
-      "${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.h"
-      "${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.cc"
       "${tensorflow_source_dir}/tensorflow/core/kernels/neon/*"
       # not in core - those are loaded dynamically as dll
       "${tensorflow_source_dir}/tensorflow/contrib/nearest_neighbor/kernels/hyperplane_lsh_probes.cc"
diff --git a/tensorflow/contrib/cmake/tf_core_ops.cmake b/tensorflow/contrib/cmake/tf_core_ops.cmake
index e8c2cd347327843d10d13c1d24a800ff776aa8c1..6f56e9d0869bc0d3311ffbc68326f8ab43758019 100644
--- a/tensorflow/contrib/cmake/tf_core_ops.cmake
+++ b/tensorflow/contrib/cmake/tf_core_ops.cmake
@@ -26,6 +26,7 @@ set(tf_op_lib_names
     "image_ops"
     "io_ops"
     "linalg_ops"
+		"list_ops"
     "lookup_ops"
     "logging_ops"
     "math_ops"
@@ -80,6 +81,7 @@ GENERATE_CONTRIB_OP_LIBRARY(boosted_trees_training "${tensorflow_source_dir}/ten
 GENERATE_CONTRIB_OP_LIBRARY(boosted_trees_prediction "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/ops/prediction_ops.cc")
 GENERATE_CONTRIB_OP_LIBRARY(boosted_trees_quantiles "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/ops/quantile_ops.cc")
 GENERATE_CONTRIB_OP_LIBRARY(boosted_trees_stats_accumulator "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/ops/stats_accumulator_ops.cc")
+GENERATE_CONTRIB_OP_LIBRARY(coder "${tensorflow_source_dir}/tensorflow/contrib/coder/ops/coder_ops.cc")
 GENERATE_CONTRIB_OP_LIBRARY(cudnn_rnn "${tensorflow_source_dir}/tensorflow/contrib/cudnn_rnn/ops/cudnn_rnn_ops.cc")
 GENERATE_CONTRIB_OP_LIBRARY(data_prefetching "${tensorflow_source_dir}/tensorflow/contrib/data/ops/prefetching_ops.cc")
 GENERATE_CONTRIB_OP_LIBRARY(factorization_clustering "${tensorflow_source_dir}/tensorflow/contrib/factorization/ops/clustering_ops.cc")
diff --git a/tensorflow/contrib/cmake/tf_core_profiler.cmake b/tensorflow/contrib/cmake/tf_core_profiler.cmake
index 61ed6a1e145299125d037b48b8b644cae1ce96e7..b91a7f43e5c03e933d10572e54e0c8c914c55f71 100644
--- a/tensorflow/contrib/cmake/tf_core_profiler.cmake
+++ b/tensorflow/contrib/cmake/tf_core_profiler.cmake
@@ -17,6 +17,8 @@
 ########################################################
 file(GLOB_RECURSE tf_core_profiler_srcs
     "${tensorflow_source_dir}/tensorflow/core/profiler/*.proto"
+    "${tensorflow_source_dir}/tensorflow/core/profiler/tfprof_options.h"
+    "${tensorflow_source_dir}/tensorflow/core/profiler/tfprof_options.cc"
     "${tensorflow_source_dir}/tensorflow/core/profiler/internal/*.h"
     "${tensorflow_source_dir}/tensorflow/core/profiler/internal/*.cc"
     "${tensorflow_source_dir}/tensorflow/core/profiler/internal/advisor/*.h"
diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake
index 5e15a972d6272151e128c37dfe398225e3b4f44e..17bbdb1a86f4a1b026b6d159a7b8adad9a3d1f57 100755
--- a/tensorflow/contrib/cmake/tf_python.cmake
+++ b/tensorflow/contrib/cmake/tf_python.cmake
@@ -120,33 +120,44 @@ function(RELATIVE_PROTOBUF_GENERATE_CPP SRCS HDRS ROOT_DIR)
   set(${HDRS} ${${HDRS}} PARENT_SCOPE)
 endfunction()
 
-file(GLOB_RECURSE tf_protos_python_srcs RELATIVE ${tensorflow_source_dir}
-    "${tensorflow_source_dir}/tensorflow/core/*.proto"
-    "${tensorflow_source_dir}/tensorflow/core/profiler/*.proto"
-    "${tensorflow_source_dir}/tensorflow/python/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/proto/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/decision_trees/proto/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/session_bundle/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/tensor_forest/proto/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/tensorboard/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/tpu/proto/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/tpu/profiler/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/training/*.proto"
-)
+FILE(READ python_protos.txt python_protos)
+# Convert file contents into a CMake list (where each element in the list is one line of the file)
+STRING(REGEX REPLACE ";" "\\\\;" python_protos "${python_protos}")
+STRING(REGEX REPLACE "\n" ";" python_protos "${python_protos}")
+
+foreach(python_proto ${python_protos})
+  if(NOT python_proto MATCHES "\#")
+    if(NOT EXISTS "${tensorflow_source_dir}/${python_proto}")
+      message(SEND_ERROR "Python proto directory not found: ${python_proto}")
+    endif()
+    file(GLOB_RECURSE tf_python_protos_src RELATIVE ${tensorflow_source_dir}
+        "${tensorflow_source_dir}/${python_proto}/*.proto"
+    )
+    list(APPEND tf_python_protos_srcs ${tf_python_protos_src})
+  endif()
+endforeach(python_proto)
+
 RELATIVE_PROTOBUF_GENERATE_PYTHON(
-    ${tensorflow_source_dir} PYTHON_PROTO_GENFILES ${tf_protos_python_srcs}
+    ${tensorflow_source_dir} PYTHON_PROTO_GENFILES ${tf_python_protos_srcs}
 )
 
-# NOTE(mrry): Avoid regenerating the tensorflow/core protos because this
-# can cause benign-but-failing-on-Windows-due-to-file-locking conflicts
-# when two rules attempt to generate the same file.
-file(GLOB_RECURSE tf_python_protos_cc_srcs RELATIVE ${tensorflow_source_dir}
-    "${tensorflow_source_dir}/tensorflow/core/profiler/*.proto"
-    "${tensorflow_source_dir}/tensorflow/python/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/session_bundle/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/tensorboard/*.proto"
-    "${tensorflow_source_dir}/tensorflow/contrib/training/*.proto"
-)
+FILE(READ python_protos_cc.txt python_protos_cc)
+# Convert file contents into a CMake list (where each element in the list is one line of the file)
+STRING(REGEX REPLACE ";" "\\\\;" python_protos_cc "${python_protos_cc}")
+STRING(REGEX REPLACE "\n" ";" python_protos_cc "${python_protos_cc}")
+
+foreach(python_proto_cc ${python_protos_cc})
+  if(NOT python_proto_cc MATCHES "\#")
+    if(NOT EXISTS "${tensorflow_source_dir}/${python_proto_cc}")
+      message(SEND_ERROR "Python proto CC directory not found: ${python_proto_cc}")
+    endif()
+    file(GLOB_RECURSE tf_python_protos_cc_src RELATIVE ${tensorflow_source_dir}
+        "${tensorflow_source_dir}/${python_proto_cc}/*.proto"
+    )
+    list(APPEND tf_python_protos_cc_srcs ${tf_python_protos_cc_src})
+  endif()
+endforeach(python_proto_cc)
+
 RELATIVE_PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS
     ${tensorflow_source_dir} ${tf_python_protos_cc_srcs}
 )
@@ -192,315 +203,20 @@ function(add_python_module MODULE_NAME)
     endif()
 endfunction()
 
-add_python_module("tensorflow")
-add_python_module("tensorflow/core")
-add_python_module("tensorflow/core/example")
-add_python_module("tensorflow/core/framework")
-add_python_module("tensorflow/core/lib")
-add_python_module("tensorflow/core/lib/core")
-add_python_module("tensorflow/core/protobuf")
-add_python_module("tensorflow/core/util")
-add_python_module("tensorflow/examples")
-add_python_module("tensorflow/examples/tutorials")
-add_python_module("tensorflow/examples/tutorials/mnist")
-add_python_module("tensorflow/python")
-add_python_module("tensorflow/python/client")
-add_python_module("tensorflow/python/data")
-add_python_module("tensorflow/python/data/ops")
-add_python_module("tensorflow/python/data/util")
-add_python_module("tensorflow/python/debug")
-add_python_module("tensorflow/python/debug/cli")
-add_python_module("tensorflow/python/debug/examples")
-add_python_module("tensorflow/python/debug/lib")
-add_python_module("tensorflow/python/debug/wrappers")
-add_python_module("tensorflow/python/eager")
-add_python_module("tensorflow/python/estimator")
-add_python_module("tensorflow/python/estimator/canned")
-add_python_module("tensorflow/python/estimator/export")
-add_python_module("tensorflow/python/estimator/inputs")
-add_python_module("tensorflow/python/estimator/inputs/queues")
-add_python_module("tensorflow/python/feature_column")
-add_python_module("tensorflow/python/framework")
-add_python_module("tensorflow/python/grappler")
-add_python_module("tensorflow/python/keras")
-add_python_module("tensorflow/python/keras/activations")
-add_python_module("tensorflow/python/keras/applications")
-add_python_module("tensorflow/python/keras/applications/inception_resnet_v2")
-add_python_module("tensorflow/python/keras/applications/inception_v3")
-add_python_module("tensorflow/python/keras/applications/mobilenet")
-add_python_module("tensorflow/python/keras/applications/resnet50")
-add_python_module("tensorflow/python/keras/applications/vgg16")
-add_python_module("tensorflow/python/keras/applications/vgg19")
-add_python_module("tensorflow/python/keras/applications/xception")
-add_python_module("tensorflow/python/keras/backend")
-add_python_module("tensorflow/python/keras/callbacks")
-add_python_module("tensorflow/python/keras/constraints")
-add_python_module("tensorflow/python/keras/datasets")
-add_python_module("tensorflow/python/keras/datasets/boston_housing")
-add_python_module("tensorflow/python/keras/datasets/cifar10")
-add_python_module("tensorflow/python/keras/datasets/cifar100")
-add_python_module("tensorflow/python/keras/datasets/fashion_mnist")
-add_python_module("tensorflow/python/keras/datasets/imdb")
-add_python_module("tensorflow/python/keras/datasets/mnist")
-add_python_module("tensorflow/python/keras/datasets/reuters")
-add_python_module("tensorflow/python/keras/estimator")
-add_python_module("tensorflow/python/keras/initializers")
-add_python_module("tensorflow/python/keras/layers")
-add_python_module("tensorflow/python/keras/losses")
-add_python_module("tensorflow/python/keras/metrics")
-add_python_module("tensorflow/python/keras/models")
-add_python_module("tensorflow/python/keras/optimizers")
-add_python_module("tensorflow/python/keras/preprocessing")
-add_python_module("tensorflow/python/keras/preprocessing/image")
-add_python_module("tensorflow/python/keras/preprocessing/sequence")
-add_python_module("tensorflow/python/keras/preprocessing/text")
-add_python_module("tensorflow/python/keras/regularizers")
-add_python_module("tensorflow/python/keras/utils")
-add_python_module("tensorflow/python/keras/wrappers")
-add_python_module("tensorflow/python/keras/wrappers/scikit_learn")
-add_python_module("tensorflow/python/keras/_impl")
-add_python_module("tensorflow/python/keras/_impl/keras")
-add_python_module("tensorflow/python/keras/_impl/keras/applications")
-add_python_module("tensorflow/python/keras/_impl/keras/datasets")
-add_python_module("tensorflow/python/keras/_impl/keras/engine")
-add_python_module("tensorflow/python/keras/_impl/keras/layers")
-add_python_module("tensorflow/python/keras/_impl/keras/preprocessing")
-add_python_module("tensorflow/python/keras/_impl/keras/utils")
-add_python_module("tensorflow/python/keras/_impl/keras/wrappers")
-add_python_module("tensorflow/python/kernel_tests")
-add_python_module("tensorflow/python/kernel_tests/distributions")
-add_python_module("tensorflow/python/kernel_tests/linalg")
-add_python_module("tensorflow/python/layers")
-add_python_module("tensorflow/python/lib")
-add_python_module("tensorflow/python/lib/core")
-add_python_module("tensorflow/python/lib/io")
-add_python_module("tensorflow/python/ops")
-add_python_module("tensorflow/python/ops/distributions")
-add_python_module("tensorflow/python/ops/linalg")
-add_python_module("tensorflow/python/ops/losses")
-add_python_module("tensorflow/python/platform")
-add_python_module("tensorflow/python/platform/default")
-add_python_module("tensorflow/python/platform/summary")
-add_python_module("tensorflow/python/profiler/")
-add_python_module("tensorflow/python/profiler/internal")
-add_python_module("tensorflow/python/saved_model")
-add_python_module("tensorflow/python/summary")
-add_python_module("tensorflow/python/summary/writer")
-add_python_module("tensorflow/python/tools")
-add_python_module("tensorflow/python/training")
-add_python_module("tensorflow/python/user_ops")
-add_python_module("tensorflow/python/util")
-add_python_module("tensorflow/python/util/protobuf")
-add_python_module("tensorflow/tools")
-add_python_module("tensorflow/tools/graph_transforms")
-add_python_module("tensorflow/contrib")
-add_python_module("tensorflow/contrib/all_reduce")
-add_python_module("tensorflow/contrib/all_reduce/python")
-add_python_module("tensorflow/contrib/android")
-add_python_module("tensorflow/contrib/android/java")
-add_python_module("tensorflow/contrib/android/java/org")
-add_python_module("tensorflow/contrib/android/java/org/tensorflow")
-add_python_module("tensorflow/contrib/android/java/org/tensorflow/contrib")
-add_python_module("tensorflow/contrib/android/java/org/tensorflow/contrib/android")
-add_python_module("tensorflow/contrib/android/jni")
-add_python_module("tensorflow/contrib/bayesflow")
-add_python_module("tensorflow/contrib/bayesflow/examples")
-add_python_module("tensorflow/contrib/bayesflow/examples/reinforce_simple")
-add_python_module("tensorflow/contrib/bayesflow/python")
-add_python_module("tensorflow/contrib/bayesflow/python/kernel_tests")
-add_python_module("tensorflow/contrib/bayesflow/python/ops")
-add_python_module("tensorflow/contrib/boosted_trees")
-add_python_module("tensorflow/contrib/boosted_trees/estimator_batch")
-add_python_module("tensorflow/contrib/boosted_trees/ops")
-add_python_module("tensorflow/contrib/boosted_trees/proto")
-add_python_module("tensorflow/contrib/boosted_trees/python")
-add_python_module("tensorflow/contrib/boosted_trees/python/kernel_tests")
-add_python_module("tensorflow/contrib/boosted_trees/python/ops")
-add_python_module("tensorflow/contrib/cloud")
-add_python_module("tensorflow/contrib/cloud/kernels")
-add_python_module("tensorflow/contrib/cloud/ops")
-add_python_module("tensorflow/contrib/cloud/python")
-add_python_module("tensorflow/contrib/cloud/python/ops")
-add_python_module("tensorflow/contrib/cluster_resolver")
-add_python_module("tensorflow/contrib/cluster_resolver/python")
-add_python_module("tensorflow/contrib/cluster_resolver/python/training")
-add_python_module("tensorflow/contrib/compiler")
-add_python_module("tensorflow/contrib/copy_graph")
-add_python_module("tensorflow/contrib/copy_graph/python")
-add_python_module("tensorflow/contrib/copy_graph/python/util")
-add_python_module("tensorflow/contrib/crf")
-add_python_module("tensorflow/contrib/crf/python")
-add_python_module("tensorflow/contrib/crf/python/kernel_tests")
-add_python_module("tensorflow/contrib/crf/python/ops")
-add_python_module("tensorflow/contrib/cudnn_rnn")
-add_python_module("tensorflow/contrib/cudnn_rnn/kernels")
-add_python_module("tensorflow/contrib/cudnn_rnn/ops")
-add_python_module("tensorflow/contrib/cudnn_rnn/python")
-add_python_module("tensorflow/contrib/cudnn_rnn/python/kernel_tests")
-add_python_module("tensorflow/contrib/cudnn_rnn/python/layers")
-add_python_module("tensorflow/contrib/cudnn_rnn/python/ops")
-add_python_module("tensorflow/contrib/data")
-add_python_module("tensorflow/contrib/data/python")
-add_python_module("tensorflow/contrib/data/python/kernel_tests")
-add_python_module("tensorflow/contrib/data/python/ops")
-add_python_module("tensorflow/contrib/decision_trees")
-add_python_module("tensorflow/contrib/decision_trees/proto")
-add_python_module("tensorflow/contrib/deprecated")
-add_python_module("tensorflow/contrib/distributions")
-add_python_module("tensorflow/contrib/distributions/python")
-add_python_module("tensorflow/contrib/distributions/python/kernel_tests")
-add_python_module("tensorflow/contrib/distributions/python/ops")
-add_python_module("tensorflow/contrib/distributions/python/ops/bijectors")
-add_python_module("tensorflow/contrib/eager")
-add_python_module("tensorflow/contrib/eager/python")
-add_python_module("tensorflow/contrib/estimator")
-add_python_module("tensorflow/contrib/estimator/python")
-add_python_module("tensorflow/contrib/estimator/python/estimator")
-add_python_module("tensorflow/contrib/factorization")
-add_python_module("tensorflow/contrib/factorization/examples")
-add_python_module("tensorflow/contrib/factorization/kernels")
-add_python_module("tensorflow/contrib/factorization/ops")
-add_python_module("tensorflow/contrib/factorization/python")
-add_python_module("tensorflow/contrib/factorization/python/kernel_tests")
-add_python_module("tensorflow/contrib/factorization/python/ops")
-add_python_module("tensorflow/contrib/ffmpeg")
-add_python_module("tensorflow/contrib/ffmpeg/default")
-add_python_module("tensorflow/contrib/ffmpeg/testdata")
-add_python_module("tensorflow/contrib/framework")
-add_python_module("tensorflow/contrib/framework/kernels")
-add_python_module("tensorflow/contrib/framework/ops")
-add_python_module("tensorflow/contrib/framework/python")
-add_python_module("tensorflow/contrib/framework/python/framework")
-add_python_module("tensorflow/contrib/framework/python/ops")
-add_python_module("tensorflow/contrib/gan")
-add_python_module("tensorflow/contrib/gan/python")
-add_python_module("tensorflow/contrib/gan/python/eval")
-add_python_module("tensorflow/contrib/gan/python/eval/python")
-add_python_module("tensorflow/contrib/gan/python/features")
-add_python_module("tensorflow/contrib/gan/python/features/python")
-add_python_module("tensorflow/contrib/gan/python/estimator")
-add_python_module("tensorflow/contrib/gan/python/estimator/python")
-add_python_module("tensorflow/contrib/gan/python/losses")
-add_python_module("tensorflow/contrib/gan/python/losses/python")
-add_python_module("tensorflow/contrib/graph_editor")
-add_python_module("tensorflow/contrib/graph_editor/examples")
-add_python_module("tensorflow/contrib/graph_editor/tests")
-add_python_module("tensorflow/contrib/grid_rnn")
-add_python_module("tensorflow/contrib/grid_rnn/python")
-add_python_module("tensorflow/contrib/grid_rnn/python/kernel_tests")
-add_python_module("tensorflow/contrib/grid_rnn/python/ops")
-add_python_module("tensorflow/contrib/hooks")
-add_python_module("tensorflow/contrib/image")
-add_python_module("tensorflow/contrib/image/ops")
-add_python_module("tensorflow/contrib/image/python")
-add_python_module("tensorflow/contrib/image/python/ops")
-add_python_module("tensorflow/contrib/input_pipeline")
-add_python_module("tensorflow/contrib/input_pipeline/ops")
-add_python_module("tensorflow/contrib/input_pipeline/python")
-add_python_module("tensorflow/contrib/input_pipeline/python/ops")
-add_python_module("tensorflow/contrib/integrate")
-add_python_module("tensorflow/contrib/integrate/python")
-add_python_module("tensorflow/contrib/integrate/python/ops")
-add_python_module("tensorflow/contrib/ios_examples")
-add_python_module("tensorflow/contrib/ios_examples/benchmark")
-add_python_module("tensorflow/contrib/ios_examples/benchmark/benchmark.xcodeproj")
-add_python_module("tensorflow/contrib/ios_examples/benchmark/data")
-add_python_module("tensorflow/contrib/ios_examples/camera")
-add_python_module("tensorflow/contrib/ios_examples/camera/camera_example.xcodeproj")
-add_python_module("tensorflow/contrib/ios_examples/camera/en.lproj")
-add_python_module("tensorflow/contrib/ios_examples/simple")
-add_python_module("tensorflow/contrib/ios_examples/simple/data")
-add_python_module("tensorflow/contrib/ios_examples/simple/tf_ios_makefile_example.xcodeproj")
-add_python_module("tensorflow/contrib/keras")
-add_python_module("tensorflow/contrib/keras/api")
-add_python_module("tensorflow/contrib/keras/api/keras")
-add_python_module("tensorflow/contrib/keras/api/keras/activations")
-add_python_module("tensorflow/contrib/keras/api/keras/applications")
-add_python_module("tensorflow/contrib/keras/api/keras/applications/inception_v3")
-add_python_module("tensorflow/contrib/keras/api/keras/applications/mobilenet")
-add_python_module("tensorflow/contrib/keras/api/keras/applications/resnet50")
-add_python_module("tensorflow/contrib/keras/api/keras/applications/vgg16")
-add_python_module("tensorflow/contrib/keras/api/keras/applications/vgg19")
-add_python_module("tensorflow/contrib/keras/api/keras/applications/xception")
-add_python_module("tensorflow/contrib/keras/api/keras/backend")
-add_python_module("tensorflow/contrib/keras/api/keras/callbacks")
-add_python_module("tensorflow/contrib/keras/api/keras/constraints")
-add_python_module("tensorflow/contrib/keras/api/keras/datasets")
-add_python_module("tensorflow/contrib/keras/api/keras/datasets/boston_housing")
-add_python_module("tensorflow/contrib/keras/api/keras/datasets/cifar10")
-add_python_module("tensorflow/contrib/keras/api/keras/datasets/cifar100")
-add_python_module("tensorflow/contrib/keras/api/keras/datasets/imdb")
-add_python_module("tensorflow/contrib/keras/api/keras/datasets/mnist")
-add_python_module("tensorflow/contrib/keras/api/keras/datasets/reuters")
-add_python_module("tensorflow/contrib/keras/api/keras/initializers")
-add_python_module("tensorflow/contrib/keras/api/keras/layers")
-add_python_module("tensorflow/contrib/keras/api/keras/losses")
-add_python_module("tensorflow/contrib/keras/api/keras/metrics")
-add_python_module("tensorflow/contrib/keras/api/keras/models")
-add_python_module("tensorflow/contrib/keras/api/keras/optimizers")
-add_python_module("tensorflow/contrib/keras/api/keras/preprocessing")
-add_python_module("tensorflow/contrib/keras/api/keras/preprocessing/image")
-add_python_module("tensorflow/contrib/keras/api/keras/preprocessing/sequence")
-add_python_module("tensorflow/contrib/keras/api/keras/preprocessing/text")
-add_python_module("tensorflow/contrib/keras/api/keras/regularizers")
-add_python_module("tensorflow/contrib/keras/api/keras/utils")
-add_python_module("tensorflow/contrib/keras/api/keras/wrappers")
-add_python_module("tensorflow/contrib/keras/api/keras/wrappers/scikit_learn")
-add_python_module("tensorflow/contrib/keras/python")
-add_python_module("tensorflow/contrib/keras/python/keras")
-add_python_module("tensorflow/contrib/keras/python/keras/applications")
-add_python_module("tensorflow/contrib/keras/python/keras/datasets")
-add_python_module("tensorflow/contrib/keras/python/keras/engine")
-add_python_module("tensorflow/contrib/keras/python/keras/layers")
-add_python_module("tensorflow/contrib/keras/python/keras/preprocessing")
-add_python_module("tensorflow/contrib/keras/python/keras/utils")
-add_python_module("tensorflow/contrib/keras/python/keras/wrappers")
-add_python_module("tensorflow/contrib/kernel_methods")
-add_python_module("tensorflow/contrib/kernel_methods/python")
-add_python_module("tensorflow/contrib/kernel_methods/python/mappers")
-add_python_module("tensorflow/contrib/kfac")
-add_python_module("tensorflow/contrib/kfac/examples")
-add_python_module("tensorflow/contrib/kfac/python")
-add_python_module("tensorflow/contrib/kfac/python/ops")
-add_python_module("tensorflow/contrib/labeled_tensor")
-add_python_module("tensorflow/contrib/labeled_tensor/python")
-add_python_module("tensorflow/contrib/labeled_tensor/python/ops")
-add_python_module("tensorflow/contrib/layers")
-add_python_module("tensorflow/contrib/layers/kernels")
-add_python_module("tensorflow/contrib/layers/ops")
-add_python_module("tensorflow/contrib/layers/python")
-add_python_module("tensorflow/contrib/layers/python/kernel_tests")
-add_python_module("tensorflow/contrib/layers/python/layers")
-add_python_module("tensorflow/contrib/layers/python/ops")
-add_python_module("tensorflow/contrib/learn")
-add_python_module("tensorflow/contrib/learn/python")
-add_python_module("tensorflow/contrib/learn/python/learn")
-add_python_module("tensorflow/contrib/learn/python/learn/dataframe")
-add_python_module("tensorflow/contrib/learn/python/learn/dataframe/queues")
-add_python_module("tensorflow/contrib/learn/python/learn/dataframe/transforms")
-add_python_module("tensorflow/contrib/learn/python/learn/datasets")
-add_python_module("tensorflow/contrib/learn/python/learn/datasets/data")
-add_python_module("tensorflow/contrib/learn/python/learn/estimators")
-add_python_module("tensorflow/contrib/learn/python/learn/learn_io")
-add_python_module("tensorflow/contrib/learn/python/learn/ops")
-add_python_module("tensorflow/contrib/learn/python/learn/preprocessing")
-add_python_module("tensorflow/contrib/learn/python/learn/preprocessing/tests")
-add_python_module("tensorflow/contrib/learn/python/learn/tests")
-add_python_module("tensorflow/contrib/learn/python/learn/tests/dataframe")
-add_python_module("tensorflow/contrib/learn/python/learn/utils")
-add_python_module("tensorflow/contrib/legacy_seq2seq")
-add_python_module("tensorflow/contrib/legacy_seq2seq/python")
-add_python_module("tensorflow/contrib/legacy_seq2seq/python/ops")
-add_python_module("tensorflow/contrib/linalg")
-add_python_module("tensorflow/contrib/linalg/python")
-add_python_module("tensorflow/contrib/linalg/python/ops")
-add_python_module("tensorflow/contrib/linalg/python/kernel_tests")
-add_python_module("tensorflow/contrib/linear_optimizer")
-add_python_module("tensorflow/contrib/linear_optimizer/kernels")
-add_python_module("tensorflow/contrib/linear_optimizer/kernels/g3doc")
-add_python_module("tensorflow/contrib/linear_optimizer/python")
-add_python_module("tensorflow/contrib/linear_optimizer/python/kernel_tests")
-add_python_module("tensorflow/contrib/linear_optimizer/python/ops")
+FILE(READ python_modules.txt python_modules)
+# Convert file contents into a CMake list (where each element in the list is one line of the file)
+STRING(REGEX REPLACE ";" "\\\\;" python_modules "${python_modules}")
+STRING(REGEX REPLACE "\n" ";" python_modules "${python_modules}")
+
+foreach(python_module ${python_modules})
+  if(NOT python_module MATCHES "\#")
+    if(NOT EXISTS "${tensorflow_source_dir}/${python_module}")
+      message(SEND_ERROR "Python module not found: ${python_module}")
+    endif()
+    add_python_module(${python_module})
+  endif()
+endforeach(python_module)
+
 add_custom_command(TARGET tf_python_touchup_modules PRE_BUILD
     COMMAND ${CMAKE_COMMAND} -E make_directory
     "${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/lite")
@@ -514,161 +230,6 @@ add_custom_command(
     TARGET tf_python_copy_scripts_to_destination PRE_BUILD
     COMMAND ${CMAKE_COMMAND} -E touch
     ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/lite/python/lite.py)
-add_python_module("tensorflow/contrib/lookup")
-add_python_module("tensorflow/contrib/losses")
-add_python_module("tensorflow/contrib/losses/python")
-add_python_module("tensorflow/contrib/losses/python/losses")
-add_python_module("tensorflow/contrib/losses/python/metric_learning")
-add_python_module("tensorflow/contrib/makefile")
-add_python_module("tensorflow/contrib/makefile/test")
-add_python_module("tensorflow/contrib/memory_stats")
-add_python_module("tensorflow/contrib/memory_stats/kernels")
-add_python_module("tensorflow/contrib/memory_stats/ops")
-add_python_module("tensorflow/contrib/memory_stats/python")
-add_python_module("tensorflow/contrib/memory_stats/python/kernel_tests")
-add_python_module("tensorflow/contrib/memory_stats/python/ops")
-add_python_module("tensorflow/contrib/meta_graph_transform")
-add_python_module("tensorflow/contrib/metrics")
-add_python_module("tensorflow/contrib/metrics/kernels")
-add_python_module("tensorflow/contrib/metrics/ops")
-add_python_module("tensorflow/contrib/metrics/python")
-add_python_module("tensorflow/contrib/metrics/python/kernel_tests")
-add_python_module("tensorflow/contrib/metrics/python/metrics")
-add_python_module("tensorflow/contrib/metrics/python/ops")
-add_python_module("tensorflow/contrib/model_pruning")
-add_python_module("tensorflow/contrib/model_pruning/examples")
-add_python_module("tensorflow/contrib/model_pruning/examples/cifar10")
-add_python_module("tensorflow/contrib/model_pruning/python")
-add_python_module("tensorflow/contrib/model_pruning/python/layers")
-add_python_module("tensorflow/contrib/ndlstm")
-add_python_module("tensorflow/contrib/ndlstm/python")
-add_python_module("tensorflow/contrib/nn")
-add_python_module("tensorflow/contrib/nn/python")
-add_python_module("tensorflow/contrib/nn/python/ops")
-add_python_module("tensorflow/contrib/nccl")
-add_python_module("tensorflow/contrib/nccl/kernels")
-add_python_module("tensorflow/contrib/nccl/ops")
-add_python_module("tensorflow/contrib/nccl/python")
-add_python_module("tensorflow/contrib/nccl/python/ops")
-add_python_module("tensorflow/contrib/nearest_neighbor/kernels")
-add_python_module("tensorflow/contrib/nearest_neighbor/ops")
-add_python_module("tensorflow/contrib/nearest_neighbor/python")
-add_python_module("tensorflow/contrib/nearest_neighbor/python/kernel_tests")
-add_python_module("tensorflow/contrib/nearest_neighbor/python/ops")
-add_python_module("tensorflow/contrib/opt")
-add_python_module("tensorflow/contrib/opt/python")
-add_python_module("tensorflow/contrib/opt/python/training")
-add_python_module("tensorflow/contrib/pi_examples")
-add_python_module("tensorflow/contrib/pi_examples/camera")
-add_python_module("tensorflow/contrib/pi_examples/label_image")
-add_python_module("tensorflow/contrib/pi_examples/label_image/data")
-add_python_module("tensorflow/contrib/periodic_resample")
-add_python_module("tensorflow/contrib/periodic_resample/python")
-add_python_module("tensorflow/contrib/periodic_resample/python/ops")
-add_python_module("tensorflow/contrib/periodic_resample/python/kernel_tests")
-add_python_module("tensorflow/contrib/predictor")
-add_python_module("tensorflow/contrib/quantization")
-add_python_module("tensorflow/contrib/quantization/python")
-add_python_module("tensorflow/contrib/quantize")
-add_python_module("tensorflow/contrib/quantize/python")
-add_python_module("tensorflow/contrib/remote_fused_graph/pylib")
-add_python_module("tensorflow/contrib/remote_fused_graph/pylib/python")
-add_python_module("tensorflow/contrib/remote_fused_graph/pylib/python/ops")
-add_python_module("tensorflow/contrib/resampler")
-add_python_module("tensorflow/contrib/resampler/kernels")
-add_python_module("tensorflow/contrib/resampler/ops")
-add_python_module("tensorflow/contrib/resampler/python")
-add_python_module("tensorflow/contrib/resampler/python/ops")
-add_python_module("tensorflow/contrib/rnn")
-add_python_module("tensorflow/contrib/rnn/kernels")
-add_python_module("tensorflow/contrib/rnn/ops")
-add_python_module("tensorflow/contrib/rnn/python")
-add_python_module("tensorflow/contrib/rnn/python/kernel_tests")
-add_python_module("tensorflow/contrib/rnn/python/ops")
-add_python_module("tensorflow/contrib/saved_model")
-add_python_module("tensorflow/contrib/saved_model/python")
-add_python_module("tensorflow/contrib/saved_model/python/saved_model")
-add_python_module("tensorflow/contrib/seq2seq")
-add_python_module("tensorflow/contrib/seq2seq/kernels")
-add_python_module("tensorflow/contrib/seq2seq/ops")
-add_python_module("tensorflow/contrib/seq2seq/python")
-add_python_module("tensorflow/contrib/seq2seq/python/kernel_tests")
-add_python_module("tensorflow/contrib/seq2seq/python/ops")
-add_python_module("tensorflow/contrib/session_bundle")
-add_python_module("tensorflow/contrib/session_bundle/example")
-add_python_module("tensorflow/contrib/session_bundle/testdata")
-add_python_module("tensorflow/contrib/signal")
-add_python_module("tensorflow/contrib/signal/python")
-add_python_module("tensorflow/contrib/signal/python/ops")
-add_python_module("tensorflow/contrib/slim")
-add_python_module("tensorflow/contrib/slim/python")
-add_python_module("tensorflow/contrib/slim/python/slim")
-add_python_module("tensorflow/contrib/slim/python/slim/data")
-add_python_module("tensorflow/contrib/slim/python/slim/nets")
-add_python_module("tensorflow/contrib/solvers")
-add_python_module("tensorflow/contrib/solvers/python")
-add_python_module("tensorflow/contrib/solvers/python/ops")
-add_python_module("tensorflow/contrib/sparsemax")
-add_python_module("tensorflow/contrib/sparsemax/python")
-add_python_module("tensorflow/contrib/sparsemax/python/ops")
-add_python_module("tensorflow/contrib/specs")
-add_python_module("tensorflow/contrib/specs/python")
-add_python_module("tensorflow/contrib/staging")
-add_python_module("tensorflow/contrib/stat_summarizer")
-add_python_module("tensorflow/contrib/stateless")
-add_python_module("tensorflow/contrib/tensorboard")
-add_python_module("tensorflow/contrib/tensorboard/plugins")
-add_python_module("tensorflow/contrib/tensorboard/plugins/projector")
-add_python_module("tensorflow/contrib/tensor_forest")
-add_python_module("tensorflow/contrib/tensor_forest/client")
-add_python_module("tensorflow/contrib/tensor_forest/core")
-add_python_module("tensorflow/contrib/tensor_forest/core/ops")
-add_python_module("tensorflow/contrib/tensor_forest/data")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid/core")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid/core/ops")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid/ops")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid/python")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/kernel_tests")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/layers")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/models")
-add_python_module("tensorflow/contrib/tensor_forest/hybrid/python/ops")
-add_python_module("tensorflow/contrib/tensor_forest/python")
-add_python_module("tensorflow/contrib/tensor_forest/python/kernel_tests")
-add_python_module("tensorflow/contrib/tensor_forest/python/ops")
-add_python_module("tensorflow/contrib/testing")
-add_python_module("tensorflow/contrib/testing/python")
-add_python_module("tensorflow/contrib/testing/python/framework")
-add_python_module("tensorflow/contrib/text")
-add_python_module("tensorflow/contrib/text/kernels")
-add_python_module("tensorflow/contrib/text/ops")
-add_python_module("tensorflow/contrib/text/python")
-add_python_module("tensorflow/contrib/text/python/ops")
-add_python_module("tensorflow/contrib/tfprof")
-add_python_module("tensorflow/contrib/timeseries")
-add_python_module("tensorflow/contrib/timeseries/examples")
-add_python_module("tensorflow/contrib/timeseries/examples/data")
-add_python_module("tensorflow/contrib/timeseries/python")
-add_python_module("tensorflow/contrib/timeseries/python/timeseries")
-add_python_module("tensorflow/contrib/timeseries/python/timeseries/state_space_models")
-add_python_module("tensorflow/contrib/tpu")
-add_python_module("tensorflow/contrib/tpu/ops")
-add_python_module("tensorflow/contrib/tpu/profiler")
-add_python_module("tensorflow/contrib/tpu/python")
-add_python_module("tensorflow/contrib/tpu/python/ops")
-add_python_module("tensorflow/contrib/tpu/python/profiler")
-add_python_module("tensorflow/contrib/tpu/python/tpu")
-add_python_module("tensorflow/contrib/training")
-add_python_module("tensorflow/contrib/training/python")
-add_python_module("tensorflow/contrib/training/python/training")
-add_python_module("tensorflow/contrib/util")
-add_python_module("tensorflow/contrib/reduce_slice_ops")
-add_python_module("tensorflow/contrib/reduce_slice_ops/kernels")
-add_python_module("tensorflow/contrib/reduce_slice_ops/ops")
-add_python_module("tensorflow/contrib/reduce_slice_ops/python")
-add_python_module("tensorflow/contrib/reduce_slice_ops/python/kernel_tests")
-add_python_module("tensorflow/contrib/reduce_slice_ops/python/ops")
-add_python_module("tensorflow/contrib/summary")
 
 # Generate the tensorflow.python.platform.build_info module.
 set(BUILD_INFO_PY "${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/platform/build_info.py")
@@ -743,7 +304,7 @@ function(GENERATE_PYTHON_OP_LIB tf_python_op_lib_name)
     # containing the wrappers.
     add_custom_command(
       OUTPUT ${GENERATE_PYTHON_OP_LIB_DESTINATION}
-      COMMAND ${tf_python_op_lib_name}_gen_python @${tensorflow_source_dir}/tensorflow/python/ops/hidden_ops.txt ${require_shape_fn} > ${GENERATE_PYTHON_OP_LIB_DESTINATION}
+      COMMAND ${tf_python_op_lib_name}_gen_python ${tensorflow_source_dir}/tensorflow/core/api_def/base_api,${tensorflow_source_dir}/tensorflow/core/api_def/python_api @${tensorflow_source_dir}/tensorflow/python/ops/hidden_ops.txt ${require_shape_fn} > ${GENERATE_PYTHON_OP_LIB_DESTINATION}
       DEPENDS ${tf_python_op_lib_name}_gen_python
     )
 
@@ -766,6 +327,7 @@ GENERATE_PYTHON_OP_LIB("dataset_ops")
 GENERATE_PYTHON_OP_LIB("image_ops")
 GENERATE_PYTHON_OP_LIB("io_ops")
 GENERATE_PYTHON_OP_LIB("linalg_ops")
+GENERATE_PYTHON_OP_LIB("list_ops")
 GENERATE_PYTHON_OP_LIB("logging_ops")
 GENERATE_PYTHON_OP_LIB("lookup_ops")
 GENERATE_PYTHON_OP_LIB("nn_ops")
@@ -797,6 +359,8 @@ GENERATE_PYTHON_OP_LIB("contrib_boosted_trees_quantiles_ops"
   DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/boosted_trees/python/ops/gen_quantile_ops.py)
 GENERATE_PYTHON_OP_LIB("contrib_boosted_trees_stats_accumulator_ops"
   DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/boosted_trees/python/ops/gen_stats_accumulator_ops.py)
+GENERATE_PYTHON_OP_LIB("contrib_coder_ops"
+  DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/coder/python/ops/gen_coder_ops.py)
 GENERATE_PYTHON_OP_LIB("contrib_cudnn_rnn_ops"
   DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/cudnn_rnn/ops/gen_cudnn_rnn_ops.py)
 GENERATE_PYTHON_OP_LIB("contrib_data_prefetching_ops"
@@ -896,6 +460,8 @@ set (pywrap_tensorflow_internal_src
     "${tensorflow_source_dir}/tensorflow/python/framework/cpp_shape_inference.cc"
     "${tensorflow_source_dir}/tensorflow/python/framework/python_op_gen.h"
     "${tensorflow_source_dir}/tensorflow/python/framework/python_op_gen.cc"
+    "${tensorflow_source_dir}/tensorflow/python/lib/core/bfloat16.h"
+    "${tensorflow_source_dir}/tensorflow/python/lib/core/bfloat16.cc"
     "${tensorflow_source_dir}/tensorflow/python/lib/core/numpy.h"
     "${tensorflow_source_dir}/tensorflow/python/lib/core/numpy.cc"
     "${tensorflow_source_dir}/tensorflow/python/lib/core/ndarray_tensor.h"
@@ -906,6 +472,8 @@ set (pywrap_tensorflow_internal_src
     "${tensorflow_source_dir}/tensorflow/python/lib/core/py_func.cc"
     "${tensorflow_source_dir}/tensorflow/python/lib/core/py_seq_tensor.h"
     "${tensorflow_source_dir}/tensorflow/python/lib/core/py_seq_tensor.cc"
+    "${tensorflow_source_dir}/tensorflow/python/lib/core/py_util.h"
+    "${tensorflow_source_dir}/tensorflow/python/lib/core/py_util.cc"
     "${tensorflow_source_dir}/tensorflow/python/lib/core/safe_ptr.h"
     "${tensorflow_source_dir}/tensorflow/python/lib/core/safe_ptr.cc"
     "${tensorflow_source_dir}/tensorflow/python/lib/io/py_record_reader.h"
diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake
index 7884631d6d27bd5375b80d7eb5593d10d709e450..2e79eadf7f566690a7742757ceb56e147ebd6ea0 100644
--- a/tensorflow/contrib/cmake/tf_tests.cmake
+++ b/tensorflow/contrib/cmake/tf_tests.cmake
@@ -139,17 +139,21 @@ if (tensorflow_BUILD_PYTHON_TESTS)
 
   file(GLOB_RECURSE tf_test_src_py
     ${tf_test_rnn_src_py}
+    "${tensorflow_source_dir}/tensorflow/python/data/kernel_tests/*.py"
     "${tensorflow_source_dir}/tensorflow/python/debug/cli/*_test.py"
     "${tensorflow_source_dir}/tensorflow/python/debug/lib/*_test.py"
     "${tensorflow_source_dir}/tensorflow/python/debug/wrappers/*_test.py"
     "${tensorflow_source_dir}/tensorflow/contrib/estimator/python/estimator/*_test.py"
     "${tensorflow_source_dir}/tensorflow/python/kernel_tests/*.py"
     "${tensorflow_source_dir}/tensorflow/python/meta_graph_transform/*_test.py"
+    "${tensorflow_source_dir}/tensorflow/python/ops/quantized_conv_ops_test.py"
+    "${tensorflow_source_dir}/tensorflow/python/ops/quantized_ops_test.py"
     "${tensorflow_source_dir}/tensorflow/python/platform/build_info_test.py"
     "${tensorflow_source_dir}/tensorflow/python/profiler/*_test.py"
     "${tensorflow_source_dir}/tensorflow/python/profiler/internal/*_test.py"
     "${tensorflow_source_dir}/tensorflow/python/saved_model/*_test.py"
     "${tensorflow_source_dir}/tensorflow/python/training/*_test.py"
+    "${tensorflow_source_dir}/tensorflow/contrib/coder/*_test.py"
     "${tensorflow_source_dir}/tensorflow/contrib/data/*_test.py"
     "${tensorflow_source_dir}/tensorflow/contrib/factorization/*_test.py"
     "${tensorflow_source_dir}/tensorflow/contrib/image/*_test.py"
@@ -187,6 +191,7 @@ if (tensorflow_BUILD_PYTHON_TESTS)
     "${tensorflow_source_dir}/tensorflow/python/profiler/pprof_profiler_test.py"
     # flaky test
     "${tensorflow_source_dir}/tensorflow/python/profiler/internal/run_metadata_test.py"
+    "${tensorflow_source_dir}/tensorflow/python/profiler/model_analyzer_test.py"
     # Fails because uses data dependencies with bazel
     "${tensorflow_source_dir}/tensorflow/python/saved_model/saved_model_test.py"
     # requires scipy
@@ -217,16 +222,20 @@ if (tensorflow_BUILD_PYTHON_TESTS)
       # TFDBG grpc:// mode is not yet available on Windows.
       "${tensorflow_source_dir}/tensorflow/python/debug/lib/dist_session_debug_grpc_test.py"
       "${tensorflow_source_dir}/tensorflow/python/debug/lib/session_debug_grpc_test.py"
+      "${tensorflow_source_dir}/tensorflow/python/debug/lib/source_remote_test.py"
       # stl on windows handles overflows different
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/as_string_op_test.py"
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/string_to_number_op_test.py"
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/clip_ops_test.py"
+      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/list_ops_test.py"  # Needs portpicker.
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/tensor_array_ops_test.py"  # Needs portpicker.
       # Numerical issues, calculations off.
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/concat_op_test.py"
       "${tensorflow_source_dir}/tensorflow/contrib/factorization/python/ops/wals_test.py"
       "${tensorflow_source_dir}/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py"
       "${tensorflow_source_dir}/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py"
+      "${tensorflow_source_dir}/tensorflow/python/keras/_impl/keras/backend_test.py"
+      "${tensorflow_source_dir}/tensorflow/python/keras/_impl/keras/preprocessing/image_test.py"
       # Float division by zero
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/benchmark_test.py"
       # Flaky, for unknown reasons. Cannot reproduce in terminal. Revisit once we can get stack traces.
@@ -235,11 +244,11 @@ if (tensorflow_BUILD_PYTHON_TESTS)
       "${tensorflow_source_dir}/tensorflow/python/training/sync_replicas_optimizer_test.py"
       "${tensorflow_source_dir}/tensorflow/python/debug/lib/session_debug_grpc_test.py"
       "${tensorflow_source_dir}tensorflow/python/training/localhost_cluster_performance_test.py"
-      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/iterator_ops_cluster_test.py"
+      "${tensorflow_source_dir}/tensorflow/python/data/kernel_tests/iterator_ops_cluster_test.py"
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/functional_ops_test.py"
       "${tensorflow_source_dir}/tensorflow/contrib/data/python/kernel_tests/iterator_ops_cluster_test.py"
       # Type error in testRemoteIteratorUsingRemoteCallOpDirectSessionGPUCPU.
-      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/iterator_ops_test.py"
+      "${tensorflow_source_dir}/tensorflow/python/data/kernel_tests/iterator_ops_test.py"
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py"
       "${tensorflow_source_dir}/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py"
       # IteratorGetMax OutOfRangeError
@@ -263,9 +272,9 @@ if (tensorflow_BUILD_PYTHON_TESTS)
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/linalg_grad_test.py"  # cudaSolver handle creation fails.
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/array_ops_test.py"  # depends on python/framework/test_ops
       # Dataset tests
-      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/dataset_constructor_op_test.py"  # Segfaults on windows
+      "${tensorflow_source_dir}/tensorflow/python/data/kernel_tests/dataset_constructor_op_test.py"  # Segfaults on windows
       "${tensorflow_source_dir}/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py"  # Segfaults on Windows.
-      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/iterator_ops_cluster_test.py"
+      "${tensorflow_source_dir}/tensorflow/python/data/kernel_tests/iterator_ops_cluster_test.py"
       # Broken tensorboard test due to cmake issues.
       "${tensorflow_source_dir}/tensorflow/contrib/data/python/kernel_tests/iterator_ops_cluster_test.py"  # Needs portpicker
       "${tensorflow_source_dir}/tensorflow/contrib/data/python/kernel_tests/sloppy_transformation_dataset_op_test.py"  # b/65430561
@@ -296,6 +305,11 @@ if (tensorflow_BUILD_PYTHON_TESTS)
       # Test should only be run manually
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/reduction_ops_test_big.py"
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/svd_op_test.py"
+      # Depends on python/framework/test_ops
+      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/array_ops_test.py"
+      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/control_flow_util_test.py"
+      # Flaky replicate_model_fn_test
+      "${tensorflow_source_dir}/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py"  # b/71901810
   )
   endif()
   list(REMOVE_ITEM tf_test_src_py ${tf_test_src_py_exclude})
@@ -363,7 +377,6 @@ if (tensorflow_BUILD_CC_TESTS)
     "${tensorflow_source_dir}/tensorflow/core/distributed_runtime/tensor_coding_test.cc"
     "${tensorflow_source_dir}/tensorflow/core/kernels/remote_fused_graph_rewriter_transform_test.cc"
     "${tensorflow_source_dir}/tensorflow/core/kernels/hexagon/graph_transferer_test.cc"
-    "${tensorflow_source_dir}/tensorflow/core/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc"
   )
 
   if (NOT tensorflow_ENABLE_GPU)
diff --git a/tensorflow/contrib/coder/BUILD b/tensorflow/contrib/coder/BUILD
new file mode 100644
index 0000000000000000000000000000000000000000..ec3d550b70d2aaa23b989c44f3d86fa87cffb335
--- /dev/null
+++ b/tensorflow/contrib/coder/BUILD
@@ -0,0 +1,167 @@
+# Description:
+#   Contains entropy coding related modules.
+
+package(default_visibility = [
+    "//learning/brain:__subpackages__",
+    "//tensorflow:__subpackages__",
+])
+
+licenses(["notice"])  # Apache 2.0
+
+load(
+    "//tensorflow:tensorflow.bzl",
+    "tf_cc_test",
+    "tf_custom_op_library",
+    "tf_custom_op_py_library",
+    "tf_gen_op_libs",
+    "tf_gen_op_wrapper_py",
+    "tf_kernel_library",
+    "tf_py_test",
+)
+
+cc_library(
+    name = "range_coder",
+    srcs = [
+        "kernels/range_coder.cc",
+    ],
+    hdrs = [
+        "kernels/range_coder.h",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_cc_test(
+    name = "range_coder_test",
+    size = "small",
+    srcs = ["kernels/range_coder_test.cc"],
+    deps = [
+        ":range_coder",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
+
+tf_gen_op_libs(
+    op_lib_names = ["coder_ops"],
+    deps = [
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_kernel_library(
+    name = "range_coder_ops",
+    srcs = [
+        "kernels/range_coder_ops.cc",
+        "kernels/range_coder_ops_util.cc",
+    ],
+    hdrs = [
+        "kernels/range_coder_ops_util.h",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":coder_ops_op_lib",
+        ":range_coder",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+    ],
+    alwayslink = 1,
+)
+
+tf_cc_test(
+    name = "range_coder_ops_test",
+    size = "small",
+    srcs = ["kernels/range_coder_ops_test.cc"],
+    deps = [
+        ":range_coder",
+        ":range_coder_ops",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_testutil",
+    ],
+)
+
+cc_library(
+    name = "all_ops",
+    deps = [":coder_ops_op_lib"],
+)
+
+cc_library(
+    name = "all_kernels",
+    deps = [":range_coder_ops"],
+)
+
+tf_custom_op_library(
+    name = "python/ops/_coder_ops.so",
+    srcs = [
+        "kernels/range_coder.cc",
+        "kernels/range_coder.h",
+        "kernels/range_coder_ops.cc",
+        "kernels/range_coder_ops_util.cc",
+        "kernels/range_coder_ops_util.h",
+        "ops/coder_ops.cc",
+    ],
+)
+
+tf_gen_op_wrapper_py(
+    name = "gen_coder_ops",
+    out = "python/ops/gen_coder_ops.py",
+    deps = [":coder_ops_op_lib"],
+)
+
+tf_custom_op_py_library(
+    name = "coder_ops_py",
+    srcs = [
+        "__init__.py",
+        "python/ops/coder_ops.py",
+    ],
+    dso = [
+        ":python/ops/_coder_ops.so",
+    ],
+    kernels = [
+        ":all_kernels",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":gen_coder_ops",
+        "//tensorflow/contrib/util:util_py",
+    ],
+)
+
+tf_py_test(
+    name = "coder_ops_py_test",
+    srcs = [
+        "python/ops/coder_ops_test.py",
+    ],
+    additional_deps = [
+        ":coder_ops_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:random_ops",
+    ],
+    main = "python/ops/coder_ops_test.py",
+)
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+)
diff --git a/tensorflow/contrib/coder/README.md b/tensorflow/contrib/coder/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e1e867db5aa701eb73ee43a47cd3dcc2dc783a04
--- /dev/null
+++ b/tensorflow/contrib/coder/README.md
@@ -0,0 +1,73 @@
+# Entropy coder
+
+This module contains range encoder and range decoder which can encode integer
+data into string with cumulative distribution functions (CDF).
+
+## Data and CDF values
+
+The data to be encoded should be non-negative integers in half-open interval
+`[0, m)`. Then a CDF is represented as an integral vector of length `m + 1`
+where `CDF(i) = f(Pr(X < i) * 2^precision)` for i = 0,1,...,m, and `precision`
+is an attribute in range `0 < precision <= 16`. The function `f` maps real
+values into integers, e.g., round or floor. It is important that to encode a
+number `i`, `CDF(i + 1) - CDF(i)` cannot be zero.
+
+Note that we used `Pr(X < i)` not `Pr(X <= i)`, and therefore CDF(0) = 0 always.
+
+## RangeEncode: data shapes and CDF shapes
+
+For each data element, its CDF has to be provided. Therefore if the shape of CDF
+should be `data.shape + (m + 1,)` in NumPy-like notation. For example, if `data`
+is a 2-D tensor of shape (10, 10) and its elements are in `[0, 64)`, then the
+CDF tensor should have shape (10, 10, 65).
+
+This may make CDF tensor too large, and in many applications all data elements
+may have the same probability distribution. To handle this, `RangeEncode`
+supports limited broadcasting CDF into data. Broadcasting is limited in the
+following sense:
+
+- All CDF axes but the last one is broadcasted into data but not the other way
+  around,
+- The number of CDF axes does not extend, i.e., `CDF.ndim == data.ndim + 1`.
+
+In the previous example where data has shape (10, 10), the followings are
+acceptable CDF shapes:
+
+- (10, 10, 65)
+- (1, 10, 65)
+- (10, 1, 65)
+- (1, 1, 65)
+
+## RangeDecode
+
+`RangeEncode` encodes neither data shape nor termination character. Therefore
+the decoder should know how many characters are encoded into the string, and
+`RangeDecode` takes the encoded data shape as the second argument. The same
+shape restrictions as `RangeEncode` inputs apply here.
+
+## Example
+
+```python
+data = tf.random_uniform((128, 128), 0, 10, dtype=tf.int32)
+
+histogram = tf.bincount(data, minlength=10, maxlength=10)
+cdf = tf.cumsum(histogram, exclusive=False)
+# CDF should have length m + 1.
+cdf = tf.pad(cdf, [[1, 0]])
+# CDF axis count must be one more than data.
+cdf = tf.reshape(cdf, [1, 1, -1])
+
+# Note that data has 2^14 elements, and therefore the sum of CDF is 2^14.
+data = tf.cast(data, tf.int16)
+encoded = coder.range_encode(data, cdf, precision=14)
+decoded = coder.range_decode(encoded, tf.shape(data), cdf, precision=14)
+
+# data and decoded should be the same.
+sess = tf.Session()
+x, y = sess.run((data, decoded))
+assert np.all(x == y)
+```
+
+## Authors
+Sung Jin Hwang (github: [ssjhv](https://github.com/ssjhv)) and Nick Johnston
+(github: [nmjohn](https://github.com/nmjohn))
diff --git a/tensorflow/contrib/coder/__init__.py b/tensorflow/contrib/coder/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7e663e6f1359f399cdaa80e037635a8f7546b37
--- /dev/null
+++ b/tensorflow/contrib/coder/__init__.py
@@ -0,0 +1,26 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Entropy code operations."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# pylint: disable=wildcard-import
+from tensorflow.contrib.coder.python.ops.coder_ops import *
+# pylint: enable=wildcard-import
+
+from tensorflow.python.util.all_util import remove_undocumented
+remove_undocumented(__name__)
diff --git a/tensorflow/contrib/coder/kernels/range_coder.cc b/tensorflow/contrib/coder/kernels/range_coder.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f4f076b6c4e0c82cc297266bedc63034d5f5bf8b
--- /dev/null
+++ b/tensorflow/contrib/coder/kernels/range_coder.cc
@@ -0,0 +1,374 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Range coder implementation, based on [1].
+//
+// [1] G. N. N. Martin, "Range coding: an algorithm for removing redundancy from
+// a digitised message", presented to the Video & Data Recording Conference,
+// held in Southampton, July 24-27, 1979.
+//
+#include "tensorflow/contrib/coder/kernels/range_coder.h"
+
+#include <limits>
+#include <string>
+
+#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+RangeEncoder::RangeEncoder(int precision) : precision_(precision) {
+  CHECK_GT(precision, 0);
+  CHECK_LE(precision, 16);
+}
+
+void RangeEncoder::Encode(int32 lower, int32 upper, string* sink) {
+  // Input requirement: 0 <= lower < upper <= 2^precision.
+  DCHECK_LE(0, lower);
+  DCHECK_LT(lower, upper);
+  DCHECK_LE(upper, 1 << precision_);
+
+  // `base` and `size` represent a half-open interval [base, base + size).
+  // Loop invariant: 2^16 <= size <= 2^32.
+  //
+  // Note that keeping size above 2^16 is important. Since the interval sizes
+  // are quantized to up to 16 bits, the smallest interval size the encode may
+  // handle is 2^-16. If size is smaller than 2^16, a small interval input may
+  // collapse the encoder range into an empty interval.
+  const uint64 size = static_cast<uint64>(size_minus1_) + 1;
+  DCHECK_NE(size >> 16, 0);
+
+  // For short notation, let u := lower and v := upper.
+  //
+  // The input u, v represents a half-open interval [u, v) / 2^precision.
+  // This narrows the current interval roughly to
+  // [base + (size * u) / 2^precision, base + (size * v) / 2^precision).
+  //
+  // TODO(sjhwang): Try rounding if it helps improve compression ratio, at the
+  // expense of more operations. In the test using Zipf distribution, the
+  // overhead over the theoretical compression ratio was ~0.01%.
+  // NOTE: The max value of `size` is 2^32 and size > 0. Therefore `size * u`
+  // can be rewritten as `(size - 1) * u + u` and all the computation can be
+  // done in 32-bit mode. If 32-bit multiply is faster, then rewrite.
+  const uint32 a = (size * static_cast<uint64>(lower)) >> precision_;
+  const uint32 b = ((size * static_cast<uint64>(upper)) >> precision_) - 1;
+  DCHECK_LE(a, b);
+
+  // Let's confirm the RHS of a, b fit in uint32 type.
+  // Recall that 0 <= u < 2^precision, and size <= 2^32. Therefore
+  //   (size * u) / 2^precision < size <= 2^32,
+  // and the value of a fits in uint32 type. Similarly, since v <= 2^precision,
+  //   (size * v) / 2^precision - 1 <= size - 1 < 2^32.
+  // For lower bound of b, note that 1 <= v, 2^16 <= size, and 16 <= precision.
+  // Therefore (size * v) / 2^precision - 1 >= 2^16 / 2^precision - 1 >= 0.
+
+  // The new interval is [base + a, base + b] = [base + a, base + b + 1).
+  base_ += a;  // May overflow.
+  size_minus1_ = b - a;
+  const bool base_overflow = (base_ < a);
+
+  // The encoder has two states. Let's call them state 0 and state 1.
+  // State 0 is when base < base + size <= 2^32.
+  // State 1 is when base < 2^32 < base + size.
+  //
+  // The encoder initially starts in state 0, with base = 0, size = 2^32.
+  //
+  // TODO(sjhwang): Requires some profiling, but the encoder stays in state 0
+  // most of the time. Should optimize code for state 0.
+  //
+  // Each Encode() has up to two places where the interval changes:
+  //   #1. Refine the interval. [base, base + size) -> [base + a, base + b + 1).
+  //   #2. Expand interval if the new size is too small,
+  // and each change may cause a state transition.
+  //
+  // First, consider when the current state is 0.
+  //
+  // In this case, the next state after #1 is always state 0, since refining
+  // interval only shrinks the interval, therefore new_base + new_size <= 2^32.
+  //
+  // Let us explain #2.
+  //
+  // Recall that at the beginning of each Encode(), the encoder requires
+  // 2^16 < size <= 2^32. As precision <= 16, the new interval size can be as
+  // small as 1, but never zero.
+  //
+  // To keep size above 2^16, if new size is smaller than or equal to 2^16, the
+  // encoder would left-shift base and size by 16 bits: size' <- size * 2^16.
+  // Note that new size' is now in the range [2^16, 2^32].
+  //
+  // Since size is left-shifted, the same should be applied to base as well.
+  // However, after the left-shift, base will then contain 48 bits instead of 32
+  // bits. Therefore prior to the shift, The upper 16 bits in base should be
+  // stored somewhere else.
+  //
+  // If the upper 16 bits of all values in the interval were the same, i.e., if
+  // base[32:16] == (base + size - 1)[32:16], then base[32:16] can be written
+  // out to `output` string, since any further Encode() only narrows down the
+  // interval and that 16 bits would never change.
+  //
+  // If the upper 16 bits were not all the same, since this happens only when
+  // size <= 2^16, the upper 16 bits may differ only by one, i.e.,
+  // base[32:16] + 1 == (base + size - 1)[32:16]. At this stage, it is not
+  // determined yet whether base[32:16] should be written to the output  or
+  // (base[32:16] + 1) should be written to the output. In this case,
+  // (base[32:16] + 1) is temporarily stored in `delay`, and base is
+  // left-shifted by 16 bits.
+  //
+  // In the latter case, the condition implies that (base // 2^16) and
+  // ((base + size - 1) // 2^16) were different. Therefore after left-shift by
+  // 16 bits, the new (base + size) is greater than 2^32, i.e., the encoder
+  // transition to state 1.
+  //
+  // ==== Summary ====
+  // To detect the current encoder state,
+  //   state 0: delay == 0 iff (base mod 2^32) < (base + size) mod 2^32,
+  //   state 1: delay != 0 iff (base + size) mod 2^32 <= base mod 2^32,
+  // because size <= 2^32.
+  //
+  // ==== Summary for state 0 ====
+  // 1. Interval refinement does not cause state transition.
+  // 2. Interval expansion may cause state transition, depending on the upper 16
+  // bits of base and base + size - 1.
+  //
+  // Now suppose the previous state was 1. This means that
+  // base <= 2^32 < base + size.
+  //
+  // When in state 1, an interval refinement may trigger state transition.
+  // After Encode() refines the interval, there are three possibilities:
+  //   #1. base <= 2^32 < base + size (unchanged),
+  //   #2. 2^32 <= base < base + size (base overflowed),
+  //   #3. base < base + size <= 2^32 (base + size - 1 underflowed).
+  //
+  // In case #1, the encoder remains in state 1.
+  // In case #2 or #3, the encoder state changes to state 0.
+  //
+  // ==== State transition for interval refinement ====
+  // 1. state 0 -> state 0,
+  // 2. state 1 -> state 0 or state 1.
+  //
+  // Therefore if the new state is 1, then the previous state must have been
+  // state 1.
+  if (base_ + size_minus1_ < base_) {
+    // If statement checked if 2^32 < base + size. The new state is 1, hence the
+    // previous state was also state 1.
+    DCHECK_NE(((base_ - a) + size) >> 32, 0);
+    DCHECK_NE(delay_ & 0xFFFF, 0);
+
+    // Like in state 0, if the new size is <= 2^16, then base and size should
+    // be left-shifted by 16 bits. Combine the conditions
+    // base <= 2^32 < base + size and size <= 2^16 to conclude that
+    // base[32:16] >= 0xFFFF and (base + size - 1)[32:16] = 0x0000.
+    //
+    // Note that 2^32 - base < size, and since base is at least 0xFFFF0000,
+    // 2^16 - base[16:0] < size. Let base' and size' be the new base and size
+    // after the bit-shift. Then 2^32 - base' < size' => 2^32 < base' + size'.
+    // Therefore the encoder remains in state 1.
+    //
+    // Lastly, `delay` is modified. Conceptually, delay has to be changed to
+    //   delay' <- delay * 2^16 + (base + size - 1)[32:16].
+    // Since we know above that (base + size - 1)[32:16] = 0x0000, there is no
+    // need to explicitly do the computation above, but rather store how many
+    // trailing zeros there were. For this reason, the lower 16 bits of
+    // `delay` stores the delayed value when state changed from 0 to 1, and
+    // delay[32:16] stores the # of trailing zeros (in bytes).
+    //
+    // ==== State transition for interval expansion ====
+    // 1. state 0 -> state 0 or state 1,
+    // 2. state 1 -> state 1.
+    if (size_minus1_ >> 16 == 0) {
+      DCHECK_EQ(base_ >> 16, 0xFFFF);
+      base_ <<= 16;
+      size_minus1_ <<= 16;
+      size_minus1_ |= 0xFFFF;
+      // TODO(sjhwang): It is possible that for very long input, delay
+      // overflow during below. If overflow is detected, this delay is too
+      // long the encoder should forcefully move to state 0. In such case,
+      // base can be raised to 2^32 (force case #2), or (base + size) can be
+      // lowered to 2^32 (force case #3), depending on which transition
+      // keeps size larger.
+      CHECK_LT(delay_, static_cast<uint64>(1) << 62);
+      delay_ += 0x20000;  // Two more bytes of zeros. Check overflow?
+    }
+    return;
+  }
+
+  // If reached here, the current state is 0.
+  // First handle the case when the previous state was state 1.
+  if (delay_ != 0) {
+    // In case #2 or #3, the encoder state changes to state 0. Recall that when
+    // the encoder state changed from state 0 to state 1, the top 16 bits of
+    // (base + size - 1) was temporarily stored in `delay`, because the output
+    // could be either (delay - 1) or (delay).
+    //
+    // And from above, the delayed value encoded in `delay` is
+    //   delay' <- delay[16:0] * 2^(8 * delay[MAX:16])
+    //
+    // In case #2, the interval moved below 2^32. So (delay' - 1) is the
+    // converged value after interval refinements. Write out
+    // (delay[16:0] - 1) and write (8 * delay[MAX:16]) bytes of 0xFF.
+    //
+    // In case #3, the interval moved above 2^32. So delay' is the converged
+    // value after interval refinement. Write out delay[16:0] and write
+    // (8 * delay[MAX:16]) bytes of 0x00.
+    if (base_overflow) {
+      // Case #2.
+      DCHECK_NE((static_cast<uint64>(base_ - a) + a) >> 32, 0);
+      sink->push_back(static_cast<char>(delay_ >> 8));
+      sink->push_back(static_cast<char>(delay_ >> 0));
+      sink->append(delay_ >> 16, static_cast<char>(0));
+    } else {
+      // Case #3.
+      DCHECK_EQ(static_cast<uint64>(base_ + size_minus1_) >> 32, 0);
+      --delay_;
+      sink->push_back(static_cast<char>(delay_ >> 8));
+      sink->push_back(static_cast<char>(delay_ >> 0));
+      sink->append(delay_ >> 16, static_cast<char>(0xFF));
+    }
+    // Reset to state 0.
+    delay_ = 0;
+  }
+
+  if (size_minus1_ >> 16 == 0) {
+    const uint32 top = base_ >> 16;
+
+    base_ <<= 16;
+    size_minus1_ <<= 16;
+    size_minus1_ |= 0xFFFF;
+
+    if (base_ <= base_ + size_minus1_) {
+      // Still in state 0. Write the top 16 bits.
+      sink->push_back(static_cast<char>(top >> 8));
+      sink->push_back(static_cast<char>(top));
+    } else {
+      // New state is 1.
+      DCHECK_LT(top, 0xFFFF);
+      delay_ = top + 1;
+    }
+  }
+}
+
+void RangeEncoder::Finalize(string* sink) {
+  // Finalize the encode by writing out any number in the interval
+  // [base, base + size).
+  //
+  // Trailing zeros are not explicitly written out as decoder can fill in zeros
+  // by default.
+  if (delay_ != 0) {
+    // The last state was state 1. Since base < 2^32 < base + size, pick 2^32
+    // (state 1, case #3).
+    // NOTE: It is a bit difficult to trigger this code path on purpose.
+    // TODO(sjhwang): Find a way to trigger this code path for test coverage.
+    sink->push_back(static_cast<char>(delay_ >> 8));
+    if ((delay_ & 0xFF) != 0) {
+      sink->push_back(static_cast<char>(delay_));
+    }
+  } else if (base_ != 0) {
+    // If base == 0, then pick 0 from [base, base + size) and no zeros are
+    // explcitly written.
+    //
+    // Otherwise, pick (base + (2^16 - base[16:0])), i.e., round up base to the
+    // next multiple of 2^16. As 2^16 < size, this value should be in the
+    // interval [base, base + size).
+    const uint32 mid = ((base_ - 1) >> 16) + 1;
+    DCHECK_EQ(mid & 0xFFFF, mid);
+    sink->push_back(static_cast<char>(mid >> 8));
+    if ((mid & 0xFF) != 0) {
+      sink->push_back(static_cast<char>(mid >> 0));
+    }
+  }
+
+  base_ = 0;
+  size_minus1_ = std::numeric_limits<uint32>::max();
+  delay_ = 0;
+}
+
+RangeDecoder::RangeDecoder(const string& source, int precision)
+    : current_(source.begin()),
+      begin_(source.begin()),
+      end_(source.end()),
+      precision_(precision) {
+  CHECK_LE(precision, 16);
+
+  Read16BitValue();
+  Read16BitValue();
+}
+
+int32 RangeDecoder::Decode(tensorflow::gtl::ArraySlice<int32> cdf) {
+  const uint64 size = static_cast<uint64>(size_minus1_) + 1;
+  const uint64 offset =
+      ((static_cast<uint64>(value_ - base_) + 1) << precision_) - 1;
+
+  // This is similar to std::lower_range() with std::less_equal as comparison.
+  // After the binary search, `pv` points to the smallest number v that
+  // satisfies offset < (size * v) / 2^precision.
+
+  // Assumes that cdf[0] == 0. Therefore (size * cdf[0]) / 2^precision is always
+  // less than or equal to offset.
+  const int32* pv = cdf.data() + 1;
+  // `len` can be cdf.size() - 2 if there is guarantee that the last element of
+  // cdf is 2^precision.
+  auto len = cdf.size() - 1;
+  DCHECK_GT(len, 0);
+
+  do {
+    const auto half = len / 2;
+    const int32* mid = pv + half;
+    DCHECK_GE(*mid, 0);
+    DCHECK_LE(*mid, 1 << precision_);
+    if (size * static_cast<uint64>(*mid) <= offset) {
+      pv = mid + 1;
+      len -= half + 1;
+    } else {
+      len = half;
+    }
+  } while (len > 0);
+
+  // If (size * v) / 2^precision <= offset for all v in cdf, then pv points to
+  // one after the last element of cdf. That is a decoding error.
+  //
+  // TODO(sjhwang): Consider returning -1 to indicate error. Or start len =
+  // cdf.size() - 2 instead and give up detecting this error.
+  CHECK_LT(pv, cdf.data() + cdf.size());
+
+  const uint32 a = (size * static_cast<uint64>(*(pv - 1))) >> precision_;
+  const uint32 b = ((size * static_cast<uint64>(*pv)) >> precision_) - 1;
+  DCHECK_LE(a, offset >> precision_);
+  DCHECK_LE(offset >> precision_, b);
+
+  base_ += a;
+  size_minus1_ = b - a;
+
+  if (size_minus1_ >> 16 == 0) {
+    base_ <<= 16;
+    size_minus1_ <<= 16;
+    size_minus1_ |= 0xFFFF;
+
+    Read16BitValue();
+  }
+
+  return pv - cdf.data() - 1;
+}
+
+void RangeDecoder::Read16BitValue() {
+  value_ <<= 8;
+  if (current_ != end_) {
+    value_ |= static_cast<uint8>(*current_++);
+  }
+  value_ <<= 8;
+  if (current_ != end_) {
+    value_ |= static_cast<uint8>(*current_++);
+  }
+}
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/coder/kernels/range_coder.h b/tensorflow/contrib/coder/kernels/range_coder.h
new file mode 100644
index 0000000000000000000000000000000000000000..c24fb707fc9f1776a4e6e7be7df3245c0cdccb0b
--- /dev/null
+++ b/tensorflow/contrib/coder/kernels/range_coder.h
@@ -0,0 +1,109 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_CODER_KERNELS_RANGE_CODER_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_CODER_KERNELS_RANGE_CODER_H_
+
+#include <limits>
+#include <string>
+
+#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+class RangeEncoder {
+ public:
+  // `precision` determines the granularity of probability masses passed to
+  // Encode() function below.
+  //
+  // REQUIRES: 0 < precision <= 16.
+  explicit RangeEncoder(int precision);
+
+  // Encodes a half-open interval [lower / 2^precision, upper / 2^precision).
+  // Suppose each character to be encoded is from an integer-valued
+  // distribution. When encoding a random character x0, the arguments lower and
+  // upper represent
+  //   Pr(X < x0) = lower / 2^precision,
+  //   Pr(X < x0 + 1) = upper / 2^precision,
+  // where X is a random variable following the distribution.
+  //
+  // For example, assume that the distribution has possible outputs 0, 1, 2, ...
+  // To encode value 0, lower = 0 and upper = Pr(X = 0).
+  // To encode value 1, lower = Pr(X = 0) and upper = Pr(X = 0 or 1).
+  // To encode value 2, lower = Pr(X = 0 or 1) and upper = Pr(X = 0, 1, or 2).
+  // ...
+  //
+  // REQUIRES: 0 <= lower < upper <= 2^precision.
+  void Encode(int32 lower, int32 upper, string* sink);
+
+  // The encode may contain some under-determined values from previous encoding.
+  // After Encode() calls, Finalize() must be called. Otherwise the encoded
+  // string may not be decoded.
+  void Finalize(string* sink);
+
+ private:
+  uint32 base_ = 0;
+  uint32 size_minus1_ = std::numeric_limits<uint32>::max();
+  uint64 delay_ = 0;
+
+  const int precision_;
+};
+
+class RangeDecoder {
+ public:
+  // Holds a reference to `source`. The caller has to make sure that `source`
+  // outlives the decoder object.
+  //
+  // REQUIRES: `precision` must be the same as the encoder's precision.
+  // REQUIRES: 0 < precision <= 16.
+  RangeDecoder(const string& source, int precision);
+
+  // Decodes a character from `source` using CDF. The size of `cdf` should be
+  // one more than the number of the character in the alphabet.
+  //
+  // If x0, x1, x2, ... are the possible characters (in increasing order) from
+  // the distribution, then
+  //   cdf[0] = 0
+  //   cdf[1] = Pr(X <= x0),
+  //   cdf[2] = Pr(X <= x1),
+  //   cdf[3] = Pr(X <= x2),
+  //   ...
+  //
+  // The returned value is an index to `cdf` where the decoded character
+  // corresponds to.
+  //
+  // REQUIRES: cdf.size() > 1.
+  // REQUIRES: cdf[i] <= cdf[i + 1] for i = 0, 1, ..., cdf.size() - 2.
+  // REQUIRES: cdf[cdf.size() - 1] <= 2^precision.
+  //
+  // In practice the last element of `cdf` should equal to 2^precision.
+  int32 Decode(gtl::ArraySlice<int32> cdf);
+
+ private:
+  void Read16BitValue();
+
+  uint32 base_ = 0;
+  uint32 size_minus1_ = std::numeric_limits<uint32>::max();
+  uint32 value_ = 0;
+
+  string::const_iterator current_;
+  const string::const_iterator begin_;
+  const string::const_iterator end_;
+
+  const int precision_;
+};
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_CODER_KERNELS_RANGE_CODER_H_
diff --git a/tensorflow/contrib/coder/kernels/range_coder_ops.cc b/tensorflow/contrib/coder/kernels/range_coder_ops.cc
new file mode 100644
index 0000000000000000000000000000000000000000..cde7982530fea6407aaf074f7af4a22263d50da3
--- /dev/null
+++ b/tensorflow/contrib/coder/kernels/range_coder_ops.cc
@@ -0,0 +1,307 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define EIGEN_USE_THREADS
+
+#include <algorithm>
+#include <array>
+#include <limits>
+#include <type_traits>
+#include <vector>
+
+#include "tensorflow/contrib/coder/kernels/range_coder.h"
+#include "tensorflow/contrib/coder/kernels/range_coder_ops_util.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+namespace {
+// A helper class to iterate over data and cdf simultaneously, while cdf is
+// broadcasted to data.
+// NOTE: Moving this class out of anonymous namespace impacts compiler
+// optimization and affects performance. When moving this code around (e.g.,
+// into a library header), be sure to check the benchmark tests.
+template <typename T, typename U, int N>
+class BroadcastRange {
+ public:
+  BroadcastRange(T* data_pointer, gtl::ArraySlice<int64> data_shape,
+                 const U* cdf_pointer, gtl::ArraySlice<int64> cdf_shape)
+      : data_pointer_(data_pointer), cdf_pointer_(cdf_pointer) {
+    CHECK(!data_shape.empty());
+    CHECK_EQ(data_shape.size(), N);
+    CHECK_EQ(cdf_shape.size(), N + 1);
+
+    std::copy(data_shape.begin(), data_shape.end(), &data_shape_[0]);
+    data_index_.fill(0);
+
+    const int64 innermost_stride = cdf_shape[N];
+    cdf_displace_.fill(innermost_stride);
+
+    // Pre-compute the pointer displacement for cdf.
+    int64 stride = innermost_stride;
+    for (int i = N - 1; i >= 0; --i) {
+      const bool broadcasting = (cdf_shape[i] <= 1);
+
+      // When the data linear index advances by one, the cdf linear index
+      // advances by `innermost_stride`.
+      //
+      // Suppose that the i-th axis coordinate of data increased by one, and
+      // that i-th axis is broadcasting. The cdf linear index should be wound
+      // back by i-th axis stride, so that i-th axis coordinate of cdf is
+      // effectively kept at 0.
+      if (broadcasting) {
+        cdf_displace_[i] -= stride;
+      }
+      stride *= cdf_shape[i];
+    }
+  }
+
+  // Returns the pointers to the current iterating locations to data and cdf
+  // tensors.
+  //
+  // Note that this function does not track whether data pointer is running past
+  // the end of data buffer. The caller has to make sure Next() is called no
+  // more than that.
+  std::pair<T*, const U*> Next() {
+    std::pair<T*, const U*> return_value = {data_pointer_, cdf_pointer_};
+
+    int i = N - 1;
+    for (; i > 0; --i) {
+      ++data_index_[i];
+      if (data_index_[i] < data_shape_[i]) {
+        break;
+      }
+      data_index_[i] = 0;
+    }
+
+    // Advance data pointer by one.
+    data_pointer_ += 1;
+
+    // For cdf pointer, it's more complicated because of broadcasting. When i-th
+    // coordinate increase by one, and if i-th axis is broadcasting, then we
+    // need to rewind back the pointer so that the effective i-th axis
+    // coordinate for cdf is always 0. This value is precomputed as
+    // cdf_displace_.
+    cdf_pointer_ += cdf_displace_[i];
+    return return_value;
+  }
+
+ private:
+  std::array<int64, N> data_shape_;
+  std::array<int64, N> cdf_displace_;
+  std::array<int64, N> data_index_;
+
+  T* data_pointer_;
+  const U* cdf_pointer_;
+};
+
+Status CheckCdfShape(const TensorShape& data_shape,
+                     const TensorShape& cdf_shape) {
+  if (TF_PREDICT_FALSE(cdf_shape.dims() != data_shape.dims() + 1)) {
+    return errors::InvalidArgument(
+        "`cdf` should have one more axis than `data`: data shape=",
+        data_shape.DebugString(), ", cdf shape=", cdf_shape.DebugString());
+  }
+
+  if (TF_PREDICT_FALSE(cdf_shape.dim_size(cdf_shape.dims() - 1) <= 1)) {
+    return errors::InvalidArgument(
+        "The last dimension of `cdf` should be > 1: ", cdf_shape.DebugString());
+  }
+
+  return Status::OK();
+}
+
+// Non-incremental encoder op -------------------------------------------------
+class RangeEncodeOp : public OpKernel {
+ public:
+  explicit RangeEncodeOp(OpKernelConstruction* context) : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("precision", &precision_));
+    OP_REQUIRES(context, 0 < precision_ && precision_ <= 16,
+                errors::InvalidArgument("`precision` must be in [1, 16]: ",
+                                        precision_));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    const Tensor& data = context->input(0);
+    const Tensor& cdf = context->input(1);
+
+    OP_REQUIRES_OK(context, CheckCdfShape(data.shape(), cdf.shape()));
+
+    std::vector<int64> data_shape, cdf_shape;
+    OP_REQUIRES_OK(
+        context, MergeAxes(data.shape(), cdf.shape(), &data_shape, &cdf_shape));
+
+    Tensor* output_tensor;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, TensorShape{}, &output_tensor));
+    string* output = &output_tensor->scalar<string>()();
+
+    switch (data_shape.size()) {
+#define RANGE_ENCODE_CASE(dims)                                                \
+  case dims: {                                                                 \
+    RangeEncodeImpl<dims>(data.flat<int16>(), data_shape,                      \
+                          cdf.flat_inner_dims<int32, 2>(), cdf_shape, output); \
+  } break
+      RANGE_ENCODE_CASE(1);
+      RANGE_ENCODE_CASE(2);
+      RANGE_ENCODE_CASE(3);
+      RANGE_ENCODE_CASE(4);
+      RANGE_ENCODE_CASE(5);
+      RANGE_ENCODE_CASE(6);
+#undef RANGE_ENCODE_CASE
+      default:
+        context->CtxFailure(errors::InvalidArgument(
+            "Irregular broadcast pattern: ", data.shape().DebugString(), ", ",
+            cdf.shape().DebugString()));
+        return;
+    }
+  }
+
+ private:
+  template <int N>
+  void RangeEncodeImpl(TTypes<int16>::ConstFlat data,
+                       gtl::ArraySlice<int64> data_shape,
+                       TTypes<int32>::ConstMatrix cdf,
+                       gtl::ArraySlice<int64> cdf_shape, string* output) const {
+    const int64 data_size = data.size();
+    const int64 cdf_size = cdf.size();
+    const int64 chip_size = cdf.dimension(1);
+
+    BroadcastRange<const int16, int32, N> view{data.data(), data_shape,
+                                               cdf.data(), cdf_shape};
+    RangeEncoder encoder{precision_};
+    for (int64 linear = 0; linear < data_size; ++linear) {
+      const auto pair = view.Next();
+
+      const int64 index = *pair.first;
+      DCHECK_GE(index, 0);
+      DCHECK_LT(index + 1, chip_size);
+
+      const int32* cdf_slice = pair.second;
+      DCHECK_LE(cdf_slice + chip_size, cdf.data() + cdf_size);
+
+      const int32 lower = cdf_slice[index];
+      const int32 upper = cdf_slice[index + 1];
+      encoder.Encode(lower, upper, output);
+    }
+
+    encoder.Finalize(output);
+  }
+
+  int precision_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("RangeEncode").Device(DEVICE_CPU), RangeEncodeOp);
+
+// Non-incremental decoder op -------------------------------------------------
+class RangeDecodeOp : public OpKernel {
+ public:
+  explicit RangeDecodeOp(OpKernelConstruction* context) : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("precision", &precision_));
+    OP_REQUIRES(context, 0 < precision_ && precision_ <= 16,
+                errors::InvalidArgument("`precision` must be in [1, 16]: ",
+                                        precision_));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    const Tensor& encoded_tensor = context->input(0);
+    const Tensor& shape = context->input(1);
+    const Tensor& cdf = context->input(2);
+
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(encoded_tensor.shape()),
+                errors::InvalidArgument("Invalid `encoded` shape: ",
+                                        encoded_tensor.shape().DebugString()));
+    OP_REQUIRES(context, TensorShapeUtils::IsVector(shape.shape()),
+                errors::InvalidArgument("Invalid `shape` shape: ",
+                                        shape.shape().DebugString()));
+    TensorShape output_shape;
+    OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(shape.vec<int32>(),
+                                                        &output_shape));
+    OP_REQUIRES_OK(context, CheckCdfShape(output_shape, cdf.shape()));
+
+    std::vector<int64> data_shape, cdf_shape;
+    OP_REQUIRES_OK(
+        context, MergeAxes(output_shape, cdf.shape(), &data_shape, &cdf_shape));
+
+    const string& encoded = encoded_tensor.scalar<string>()();
+
+    Tensor* output;
+    OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output));
+
+    switch (data_shape.size()) {
+#define RANGE_DECODE_CASE(dim)                                              \
+  case dim: {                                                               \
+    RangeDecodeImpl<dim>(output->flat<int16>(), data_shape,                 \
+                         cdf.flat_inner_dims<int32>(), cdf_shape, encoded); \
+  } break
+      RANGE_DECODE_CASE(1);
+      RANGE_DECODE_CASE(2);
+      RANGE_DECODE_CASE(3);
+      RANGE_DECODE_CASE(4);
+      RANGE_DECODE_CASE(5);
+      RANGE_DECODE_CASE(6);
+#undef RANGE_DECODE_CASE
+      default:
+        context->CtxFailure(errors::InvalidArgument(
+            "Irregular broadcast pattern: ", output_shape.DebugString(), ", ",
+            cdf.shape().DebugString()));
+        return;
+    }
+  }
+
+ private:
+  template <int N>
+  void RangeDecodeImpl(TTypes<int16>::Flat output,
+                       gtl::ArraySlice<int64> output_shape,
+                       TTypes<int32>::ConstMatrix cdf,
+                       gtl::ArraySlice<int64> cdf_shape,
+                       const string& encoded) const {
+    BroadcastRange<int16, int32, N> view{output.data(), output_shape,
+                                         cdf.data(), cdf_shape};
+
+    RangeDecoder decoder{encoded, precision_};
+
+    const int64 output_size = output.size();
+    const int64 cdf_size = cdf.size();
+    const auto chip_size =
+        static_cast<gtl::ArraySlice<int32>::size_type>(cdf.dimension(1));
+
+    for (int64 i = 0; i < output_size; ++i) {
+      const auto pair = view.Next();
+
+      int16* data = pair.first;
+      DCHECK_LT(data, output.data() + output_size);
+
+      const int32* cdf_slice = pair.second;
+      DCHECK_LE(cdf_slice + chip_size, cdf.data() + cdf_size);
+
+      *data = decoder.Decode(gtl::ArraySlice<int32>{cdf_slice, chip_size});
+    }
+  }
+
+  int precision_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("RangeDecode").Device(DEVICE_CPU), RangeDecodeOp);
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/coder/kernels/range_coder_ops_test.cc b/tensorflow/contrib/coder/kernels/range_coder_ops_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ae4d9d2836a0f89a9765004a85bc3c292b0e484f
--- /dev/null
+++ b/tensorflow/contrib/coder/kernels/range_coder_ops_test.cc
@@ -0,0 +1,521 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <memory>
+#include <vector>
+
+#include "tensorflow/contrib/coder/kernels/range_coder.h"
+#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
+#include "tensorflow/core/common_runtime/shape_refiner.h"
+#include "tensorflow/core/framework/fake_input.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/framework/versions.pb.h"
+#include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/graph/node_builder.h"
+#include "tensorflow/core/graph/testlib.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
+#include "tensorflow/core/lib/core/bits.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/lib/random/simple_philox.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/test_benchmark.h"
+#include "tensorflow/core/public/session.h"
+#include "tensorflow/core/public/session_options.h"
+
+namespace tensorflow {
+namespace {
+int LogUniform(random::SimplePhilox* gen, uint32 n) {
+  CHECK_GT(n, 0);
+
+  // Split [0, n) into {0}, [1, 2), [2, 4), [4, 8), ..., [2^(m-1), n).
+  const int m = Log2Ceiling(n);
+
+  int outcome;
+  do {
+    // Uniform() consumes at least 32 bits per call, therefore this is somewhat
+    // wasteful implementation. Since this is used only for test, we do not
+    // refine this implementation further.
+    const int k = gen->Uniform(m + 1) - 1;
+    // If k == -1, then sample from {0}.
+    // If k == 0, then sample from [1, 2).
+    // If k == 1, then sample from [2, 4), ... and so on.
+    if (k < 1) {
+      outcome = k + 1;
+    } else {
+      outcome = (1 << k) + gen->Uniform(1 << k);
+    }
+  } while (n <= outcome);
+  return outcome;
+}
+
+std::vector<int64> ComputeStrides(const TensorShape& shape) {
+  std::vector<int64> stride(shape.dims());
+  int64 current = 1;
+  for (int i = shape.dims() - 1; i >= 0; --i) {
+    stride[i] = current;
+    current *= shape.dim_size(i);
+  }
+  return stride;
+}
+
+class RangeCoderOpsTest : public OpsTestBase {
+ protected:
+  Status RunEncodeOp(int precision, gtl::ArraySlice<Tensor> input,
+                     Tensor* output) {
+    TF_RETURN_IF_ERROR(NodeDefBuilder("encode", "RangeEncode")
+                           .Input(tensorflow::FakeInput(DT_INT16))
+                           .Input(tensorflow::FakeInput(DT_INT32))
+                           .Attr("precision", precision)
+                           .Finalize(node_def()));
+    TF_RETURN_IF_ERROR(InitOp());
+
+    inputs_.clear();
+    std::vector<Tensor> copies(input.size());
+    for (int i = 0; i < input.size(); ++i) {
+      copies[i] = input[i];
+      inputs_.emplace_back(&copies[i]);
+    }
+
+    TF_RETURN_IF_ERROR(RunOpKernel());
+
+    *output = *GetOutput(0);
+    inputs_.clear();
+
+    return Status::OK();
+  }
+
+  Status RunDecodeOp(int precision, gtl::ArraySlice<Tensor> input,
+                     Tensor* output) {
+    TF_RETURN_IF_ERROR(NodeDefBuilder("decode", "RangeDecode")
+                           .Input(tensorflow::FakeInput(DT_STRING))
+                           .Input(tensorflow::FakeInput(DT_INT32))
+                           .Input(tensorflow::FakeInput(DT_INT32))
+                           .Attr("precision", precision)
+                           .Finalize(node_def()));
+    TF_RETURN_IF_ERROR(InitOp());
+
+    inputs_.clear();
+    std::vector<Tensor> copies(input.size());
+    for (int i = 0; i < input.size(); ++i) {
+      copies[i] = input[i];
+      inputs_.emplace_back(&copies[i]);
+    }
+
+    TF_RETURN_IF_ERROR(RunOpKernel());
+
+    *output = *GetOutput(0);
+    inputs_.clear();
+
+    return Status::OK();
+  }
+
+  void TestEncodeAndDecode(int precision, const Tensor& data,
+                           const Tensor& cdf) {
+    Tensor encoded;
+    TF_ASSERT_OK(RunEncodeOp(precision, {data, cdf}, &encoded));
+
+    const TensorShape& data_shape = data.shape();
+    Tensor shape{DT_INT32, {data_shape.dims()}};
+    for (int i = 0; i < data_shape.dims(); ++i) {
+      shape.flat<int32>()(i) = data_shape.dim_size(i);
+    }
+
+    Tensor decoded;
+    TF_ASSERT_OK(RunDecodeOp(precision, {encoded, shape, cdf}, &decoded));
+
+    EXPECT_EQ(decoded.dtype(), data.dtype());
+    EXPECT_EQ(decoded.shape(), data.shape());
+    EXPECT_EQ(decoded.tensor_data(), data.tensor_data());
+  }
+
+  void PopulateMaxValues(random::SimplePhilox* gen, Tensor* maxvalue_tensor,
+                         int min_maxvalue, int max_maxvalue) {
+    const int range = max_maxvalue - min_maxvalue;
+    TTypes<int16>::Flat flat = maxvalue_tensor->flat<int16>();
+
+    for (int64 i = 0; i < flat.size(); ++i) {
+      flat(i) = min_maxvalue + gen->Uniform(range);
+    }
+  }
+
+  void BuildCdf(random::SimplePhilox* gen, Tensor* data_tensor,
+                Tensor* cdf_tensor, const Tensor& maxvalue_tensor) {
+    CHECK(TensorShapeUtils::StartsWith(cdf_tensor->shape(),
+                                       maxvalue_tensor.shape()));
+    CHECK_EQ(cdf_tensor->dims(), maxvalue_tensor.dims() + 1);
+    const int64 chip_size = cdf_tensor->dim_size(cdf_tensor->dims() - 1);
+
+    std::vector<int64> data_stride = ComputeStrides(data_tensor->shape());
+    std::vector<int64> cdf_stride = ComputeStrides(cdf_tensor->shape());
+
+    for (int i = 0; i < cdf_tensor->dims(); ++i) {
+      if (cdf_tensor->dim_size(i) == 1) {
+        cdf_stride[i] = 0;
+      }
+    }
+
+    Tensor histogram_tensor{DT_INT32, cdf_tensor->shape()};
+    TTypes<int16>::Flat data = data_tensor->flat<int16>();
+    TTypes<int32>::Flat histogram = histogram_tensor.flat<int32>();
+    TTypes<int16>::ConstFlat maxvalue = maxvalue_tensor.flat<int16>();
+    histogram.setZero();
+
+    for (int64 index = 0; index < data.size(); ++index) {
+      int64 temp = index;
+      int64 offset = 0;
+      for (int dim = 0; dim < data_stride.size(); ++dim) {
+        const int64 coord = temp / data_stride[dim];
+        offset += coord * cdf_stride[dim];
+        temp -= coord * data_stride[dim];
+      }
+      ASSERT_EQ(temp, 0);
+
+      const int64 maxvalue_offset = offset / chip_size;
+      CHECK_EQ(maxvalue_offset * chip_size, offset);
+      CHECK_LT(maxvalue(maxvalue_offset) + 1, chip_size);
+      const int value = LogUniform(gen, maxvalue(maxvalue_offset));
+      data(index) = value;
+      histogram(offset + value + 1) += 1;
+    }
+
+    cdf_tensor->flat_inner_dims<int32, 2>() =
+        histogram_tensor.flat_inner_dims<int32, 2>().cumsum(1);
+  }
+};
+
+TEST_F(RangeCoderOpsTest, NoBroadcast) {
+  constexpr int kPrecision = 14;
+  constexpr int kMaxValue = 10;
+
+  Tensor data{DT_INT16, {1, 32, 32, 16}};
+  Tensor temp{DT_INT32, {1, 1, 1, 1, kMaxValue + 2}};
+  Tensor maxvalue{DT_INT16, {1, 1, 1, 1}};
+  maxvalue.flat<int16>()(0) = kMaxValue;
+
+  ASSERT_LE(data.shape().num_elements(), 1 << kPrecision);
+
+  random::PhiloxRandom philox(random::New64(), random::New64());
+  random::SimplePhilox gen(&philox);
+  BuildCdf(&gen, &data, &temp, maxvalue);
+
+  const Eigen::array<int32, 5> broadcast = {1, 32, 32, 16, 1};
+
+  Tensor cdf{DT_INT32, {1, 32, 32, 16, kMaxValue + 2}};
+  cdf.tensor<int32, 5>() = temp.tensor<int32, 5>().broadcast(broadcast);
+
+  TestEncodeAndDecode(kPrecision, data, cdf);
+}
+
+TEST_F(RangeCoderOpsTest, Broadcast1Axis) {
+  constexpr int kPrecision = 9;
+  constexpr int kDimensionSize = 1 << kPrecision;
+  constexpr int kMinMaxValue = 10;
+  constexpr int kMaxMaxValue = 64;
+
+  random::PhiloxRandom philox(random::New64(), random::New64());
+  random::SimplePhilox gen(&philox);
+  Tensor data{DT_INT16, {1, kDimensionSize, kDimensionSize}};
+
+  Tensor maxvalue{DT_INT16, {kDimensionSize}};
+  PopulateMaxValues(&gen, &maxvalue, kMinMaxValue, kMaxMaxValue);
+
+  {
+    // Axis 1.
+    Tensor maxvalue1;
+    ASSERT_TRUE(maxvalue1.CopyFrom(maxvalue, {1, 1, kDimensionSize}));
+
+    Tensor cdf{DT_INT32, {1, 1, kDimensionSize, kMaxMaxValue + 2}};
+    BuildCdf(&gen, &data, &cdf, maxvalue1);
+    TestEncodeAndDecode(kPrecision, data, cdf);
+  }
+
+  {
+    // Axis 2.
+    Tensor maxvalue2;
+    ASSERT_TRUE(maxvalue2.CopyFrom(maxvalue, {1, kDimensionSize, 1}));
+
+    Tensor cdf{DT_INT32, {1, kDimensionSize, 1, kMaxMaxValue + 2}};
+    BuildCdf(&gen, &data, &cdf, maxvalue2);
+    TestEncodeAndDecode(kPrecision, data, cdf);
+  }
+}
+
+TEST_F(RangeCoderOpsTest, Broadcast2Axes) {
+  constexpr int kPrecision = 13;
+  constexpr int kDimensionSize1 = 1 << (kPrecision / 2);
+  constexpr int kDimensionSize2 = 1 << (kPrecision - kPrecision / 2);
+  constexpr int kMinMaxValue = 10;
+  constexpr int kMaxMaxValue = 64;
+
+  random::PhiloxRandom philox(random::New64(), random::New64());
+  random::SimplePhilox gen(&philox);
+  Tensor maxvalue{DT_INT16, {2, 1, 1, 7}};
+  PopulateMaxValues(&gen, &maxvalue, kMinMaxValue, kMaxMaxValue);
+
+  Tensor data{DT_INT16, {2, kDimensionSize1, kDimensionSize2, 7}};
+  Tensor cdf{DT_INT32, {2, 1, 1, 7, kMaxMaxValue + 2}};
+  BuildCdf(&gen, &data, &cdf, maxvalue);
+  TestEncodeAndDecode(kPrecision, data, cdf);
+}
+
+TEST_F(RangeCoderOpsTest, InvalidCdfShape) {
+  Tensor data{DT_INT16, {3, 3}};
+  Tensor cdf{DT_INT32, {3, 3}};
+
+  Tensor unused;
+  {
+    const Status status = RunEncodeOp(10, {data, cdf}, &unused);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.error_message().find("`cdf` should have one more axis"),
+              string::npos);
+  }
+
+  Tensor empty{DT_STRING, {}};
+  Tensor shape{DT_INT32, {2}};
+  shape.vec<int32>().setValues({3, 3});
+  {
+    const Status status = RunDecodeOp(10, {empty, shape, cdf}, &unused);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.error_message().find("`cdf` should have one more axis"),
+              string::npos);
+  }
+
+  cdf = Tensor{DT_INT32, {3, 3, 1}};
+  {
+    const Status status = RunEncodeOp(10, {data, cdf}, &unused);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(
+        status.error_message().find("last dimension of `cdf` should be > 1"),
+        string::npos);
+  }
+  {
+    const Status status = RunDecodeOp(10, {empty, shape, cdf}, &unused);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(
+        status.error_message().find("last dimension of `cdf` should be > 1"),
+        string::npos);
+  }
+}
+
+TEST_F(RangeCoderOpsTest, DecoderShapeFn) {
+  Tensor encoded_tensor{DT_STRING, {}};
+  Tensor shape_tensor{DT_INT32, {3}};
+  Tensor cdf_tensor{DT_INT32, {4, 6, 8, 2}};
+
+  shape_tensor.flat<int32>().setValues({4, 6, 8});
+
+  Graph g{OpRegistry::Global()};
+  Node* encoded = test::graph::Constant(&g, encoded_tensor);
+  Node* shape = test::graph::Constant(&g, shape_tensor);
+  Node* cdf = test::graph::Constant(&g, cdf_tensor);
+  Node* decode;
+  TF_ASSERT_OK(NodeBuilder("range_decode", "RangeDecode", g.op_registry())
+                   .Input(encoded)
+                   .Input(shape)
+                   .Input(cdf)
+                   .Attr("precision", 10)
+                   .Finalize(&g, &decode));
+
+  ShapeRefiner refiner{g.versions().producer(), g.op_registry()};
+  TF_ASSERT_OK(refiner.AddNode(encoded));
+  TF_ASSERT_OK(refiner.AddNode(shape));
+  TF_ASSERT_OK(refiner.AddNode(cdf));
+  TF_ASSERT_OK(refiner.AddNode(decode));
+
+  auto* context = refiner.GetContext(decode);
+  ASSERT_NE(context, nullptr);
+
+  ASSERT_EQ(context->num_outputs(), 1);
+  auto shape_handle = context->output(0);
+
+  ASSERT_EQ(context->Rank(shape_handle), 3);
+  EXPECT_EQ(context->Value(context->Dim(shape_handle, 0)), 4);
+  EXPECT_EQ(context->Value(context->Dim(shape_handle, 1)), 6);
+  EXPECT_EQ(context->Value(context->Dim(shape_handle, 2)), 8);
+}
+
+TEST_F(RangeCoderOpsTest, InvalidBroadcast) {
+  Tensor data{DT_INT16, {3, 3}};
+  Tensor cdf{DT_INT32, {3, 2, 2}};
+
+  Tensor unused;
+  {
+    const Status status = RunEncodeOp(10, {data, cdf}, &unused);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.error_message().find("Cannot broadcast shape"),
+              string::npos);
+  }
+
+  data = Tensor{DT_INT16, {3, 1}};
+  cdf = Tensor{DT_INT32, {3, 3, 2}};
+  Tensor empty{DT_STRING, {}};
+  Tensor shape{DT_INT32, {2}};
+  shape.vec<int32>().setValues({3, 1});
+  {
+    const Status status = RunDecodeOp(10, {empty, shape, cdf}, &unused);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.error_message().find("Cannot broadcast shape"),
+              string::npos);
+  }
+
+  std::vector<int64> shape_vector = {2, 2, 2, 2, 2, 2, 2, 2, 2};
+  data = Tensor{DT_INT16, TensorShape{shape_vector}};
+  cdf = Tensor{DT_INT32, {2, 1, 2, 1, 2, 1, 2, 1, 2, 2}};
+  {
+    const Status status = RunEncodeOp(10, {data, cdf}, &unused);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.error_message().find("Irregular broadcast"), string::npos);
+  }
+
+  shape = Tensor{DT_INT32, {static_cast<int64>(shape_vector.size())}};
+  for (int i = 0; i < shape_vector.size(); ++i) {
+    shape.flat<int32>()(i) = shape_vector[i];
+  }
+  {
+    const Status status = RunDecodeOp(10, {empty, shape, cdf}, &unused);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.error_message().find("Irregular broadcast"), string::npos);
+  }
+}
+
+// Benchmark -------------------------------------------------------------
+
+// This function creates RangeEncode graph with CDF built from a separate data
+// sample.
+Graph* CreateRangeEncodeFullBroadcastGraph(const TensorShape& shape,
+                                           int precision) {
+  CHECK_EQ(shape.dims(), 4);
+
+  constexpr int kAlphabetSize = 70;
+
+  Tensor histogram{DT_INT32, {kAlphabetSize + 1}};
+  TTypes<int32>::Vec h = histogram.vec<int32>();
+  h.setConstant(1);
+  h(0) = 0;
+
+  random::PhiloxRandom philox(random::New64(), random::New64());
+  random::SimplePhilox gen(&philox);
+  for (int i = 0; i < (1 << precision) - kAlphabetSize; ++i) {
+    const int value = LogUniform(&gen, kAlphabetSize - 1);
+    h(value + 1) += 1;
+  }
+
+  Tensor cdf{DT_INT32, {1, 1, 1, 1, kAlphabetSize + 1}};
+  cdf.flat<int32>() = h.cumsum(0);
+
+  Tensor data{DT_INT16, shape};
+  TTypes<int16>::Flat d = data.flat<int16>();
+  for (int64 i = 0; i < d.size(); ++i) {
+    d(i) = LogUniform(&gen, kAlphabetSize - 1);
+  }
+
+  Graph* g = new Graph(OpRegistry::Global());
+  TF_CHECK_OK(NodeBuilder("range_encode", "RangeEncode", g->op_registry())
+                  .Input(test::graph::Constant(g, data))
+                  .Input(test::graph::Constant(g, cdf))
+                  .Attr("precision", precision)
+                  .Finalize(g, nullptr));
+  return g;
+}
+
+// This function creates RangeDecode graph with CDF built from a separate data
+// sample.
+Graph* CreateRangeDecodeFullBroadcastGraph(const TensorShape& shape,
+                                           int precision) {
+  CHECK_EQ(shape.dims(), 4);
+
+  constexpr int kAlphabetSize = 200;
+  const int64 num_elements = shape.num_elements();
+
+  Tensor histogram{DT_INT32, {kAlphabetSize + 1}};
+  TTypes<int32>::Vec h = histogram.vec<int32>();
+  h.setConstant(1);
+  h(0) = 0;
+
+  random::PhiloxRandom philox(random::New64(), random::New64());
+  random::SimplePhilox gen(&philox);
+  for (int i = 0; i < (1 << precision) - kAlphabetSize; ++i) {
+    const int value = LogUniform(&gen, kAlphabetSize - 1);
+    h(value + 1) += 1;
+  }
+
+  Tensor cdf_tensor{DT_INT32, {1, 1, 1, 1, kAlphabetSize + 1}};
+  TTypes<int32>::Flat cdf = cdf_tensor.flat<int32>();
+  cdf = h.cumsum(0);
+
+  Tensor string_tensor{DT_STRING, TensorShape{}};
+  string& sink = string_tensor.scalar<string>()();
+
+  RangeEncoder encoder{precision};
+  for (int64 i = 0; i < num_elements; ++i) {
+    const int value = LogUniform(&gen, kAlphabetSize - 1);
+    encoder.Encode(cdf(value), cdf(value + 1), &sink);
+  }
+  encoder.Finalize(&sink);
+
+  Tensor shape_tensor{DT_INT32, {shape.dims()}};
+  for (int i = 0; i < shape.dims(); ++i) {
+    shape_tensor.flat<int32>()(i) = shape.dim_size(i);
+  }
+
+  Graph* g = new Graph(OpRegistry::Global());
+  TF_CHECK_OK(NodeBuilder("range_decode", "RangeDecode", g->op_registry())
+                  .Input(test::graph::Constant(g, string_tensor))
+                  .Input(test::graph::Constant(g, shape_tensor))
+                  .Input(test::graph::Constant(g, cdf_tensor))
+                  .Attr("precision", precision)
+                  .Finalize(g, nullptr));
+  return g;
+}
+
+void RunTensorFlowBenchmark(int iters, Graph* g, int64 num_elements) {
+  SessionOptions opts;
+  opts.config.set_intra_op_parallelism_threads(1);
+  opts.config.set_inter_op_parallelism_threads(1);
+
+  testing::UseRealTime();
+  test::Benchmark("cpu", g, &opts).Run(iters);
+
+  const int64 num_items = static_cast<int64>(iters) * num_elements;
+  testing::ItemsProcessed(num_items);
+}
+
+void BM_RangeEncodeFullBroadcast(int iters, int code_size) {
+  constexpr int kPrecision = 14;
+  const TensorShape shape = {1, code_size, code_size, 256};
+  Graph* g = CreateRangeEncodeFullBroadcastGraph(shape, kPrecision);
+  RunTensorFlowBenchmark(iters, g, shape.num_elements());
+}
+
+BENCHMARK(BM_RangeEncodeFullBroadcast)->Arg(32)->Arg(64);
+
+void BM_RangeDecodeFullBroadcast(int iters, int code_size) {
+  constexpr int kPrecision = 14;
+  const TensorShape shape = {1, code_size, code_size, 256};
+  Graph* g = CreateRangeDecodeFullBroadcastGraph(shape, kPrecision);
+  RunTensorFlowBenchmark(iters, g, shape.num_elements());
+}
+
+BENCHMARK(BM_RangeDecodeFullBroadcast)->Arg(32)->Arg(64);
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/coder/kernels/range_coder_ops_util.cc b/tensorflow/contrib/coder/kernels/range_coder_ops_util.cc
new file mode 100644
index 0000000000000000000000000000000000000000..d66730cb4881ea92b5477047c500291fa9c0c290
--- /dev/null
+++ b/tensorflow/contrib/coder/kernels/range_coder_ops_util.cc
@@ -0,0 +1,85 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/coder/kernels/range_coder_ops_util.h"
+
+#include <vector>
+
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/types.h"
+
+using tensorflow::errors::InvalidArgument;
+
+namespace tensorflow {
+Status MergeAxes(const TensorShape& broadcast_shape,
+                 const TensorShape& storage_shape,
+                 std::vector<int64>* merged_broadcast_shape_pointer,
+                 std::vector<int64>* merged_storage_shape_pointer) {
+  CHECK_EQ(storage_shape.dims(), broadcast_shape.dims() + 1);
+
+  std::vector<int64>& merged_broadcast_shape = *merged_broadcast_shape_pointer;
+  std::vector<int64>& merged_storage_shape = *merged_storage_shape_pointer;
+
+  // The shapes are simplified so that the conversions between linear index
+  // and coordinates takes less CPU cycles. Two adjacent dimensions are
+  // merged if they both are broadcasting dimensions or if they both are
+  // non-broadcasting dimensions.
+  merged_broadcast_shape.resize(1);
+  merged_broadcast_shape[0] = 1;
+  merged_storage_shape.resize(1);
+  merged_storage_shape[0] = 1;
+
+  for (int i = 0, j = 0; j < broadcast_shape.dims(); ++j) {
+    if (TF_PREDICT_FALSE(
+            (broadcast_shape.dim_size(j) != storage_shape.dim_size(j)) &&
+            (storage_shape.dim_size(j) != 1))) {
+      return InvalidArgument("Cannot broadcast shape ",
+                             storage_shape.DebugString(), " to ",
+                             broadcast_shape.DebugString());
+    }
+
+    const bool was_broadcasting = (merged_storage_shape[i] == 1);
+    const bool is_broadcasting = (storage_shape.dim_size(j) == 1);
+
+    // Merge two adjacent axes if they both are broadcasting or both are
+    // non-broadcasting axes. The second and the third conditions in the if
+    // clause below are when the previously merged axis or the next j-th axis
+    // may be interpreted as either a broadcasting or a non-broadcasting axis.
+    const bool merge = (was_broadcasting == is_broadcasting) ||
+                       (broadcast_shape.dim_size(j) <= 1) ||
+                       (merged_broadcast_shape[i] <= 1);
+
+    if (merge) {
+      merged_broadcast_shape[i] *= broadcast_shape.dim_size(j);
+      merged_storage_shape[i] *= storage_shape.dim_size(j);
+    } else {
+      // Move to the next axis.
+      merged_broadcast_shape.push_back(broadcast_shape.dim_size(j));
+      merged_storage_shape.push_back(storage_shape.dim_size(j));
+      ++i;
+    }
+  }
+
+  int64 storage_stride = 1;
+  for (int i = broadcast_shape.dims(); i < storage_shape.dims(); ++i) {
+    storage_stride *= storage_shape.dim_size(i);
+  }
+  merged_storage_shape.push_back(storage_stride);
+
+  return Status::OK();
+}
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/coder/kernels/range_coder_ops_util.h b/tensorflow/contrib/coder/kernels/range_coder_ops_util.h
new file mode 100644
index 0000000000000000000000000000000000000000..95241a8682891dc94780a9194d20aa9dc22e17c8
--- /dev/null
+++ b/tensorflow/contrib/coder/kernels/range_coder_ops_util.h
@@ -0,0 +1,33 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_CODER_KERNELS_RANGE_CODER_OPS_UTIL_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_CODER_KERNELS_RANGE_CODER_OPS_UTIL_H_
+
+#include <vector>
+
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+// The shapes are simplified to reduce indexing cost.
+Status MergeAxes(const TensorShape& broadcast_shape,
+                 const TensorShape& storage_shape,
+                 std::vector<int64>* merged_broadcast_shape_pointer,
+                 std::vector<int64>* merged_storage_shape_pointer);
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_CODER_KERNELS_RANGE_CODER_OPS_UTIL_H_
diff --git a/tensorflow/contrib/coder/kernels/range_coder_test.cc b/tensorflow/contrib/coder/kernels/range_coder_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..442994bf7c7566c1cbe1c439050a69e5b9a4208e
--- /dev/null
+++ b/tensorflow/contrib/coder/kernels/range_coder_test.cc
@@ -0,0 +1,116 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/coder/kernels/range_coder.h"
+
+#include <cmath>
+
+#include "tensorflow/core/lib/random/distribution_sampler.h"
+#include "tensorflow/core/lib/random/philox_random.h"
+#include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/lib/random/simple_philox.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace {
+void RangeEncodeDecodeTest(int precision, random::SimplePhilox* gen) {
+  constexpr int kAlphabetSize = 256;
+
+  std::vector<float> distribution_weight;
+  distribution_weight.reserve(kAlphabetSize);
+  for (int i = 1; i <= kAlphabetSize; ++i) {
+    distribution_weight.push_back(std::pow(static_cast<float>(i), -2.0f));
+  }
+
+  random::DistributionSampler sampler(distribution_weight);
+
+  const int multiplier = (precision > 7) ? 32 : 1;
+  std::vector<int32> histogram(kAlphabetSize, multiplier - 1);
+
+  const int data_size =
+      (multiplier << precision) - histogram.size() * (multiplier - 1);
+  CHECK_GE(data_size, 0);
+  std::vector<uint8> data(data_size);
+  for (uint8& x : data) {
+    x = sampler.Sample(gen);
+    ++histogram[x];
+  }
+
+  std::vector<int32> cdf(histogram.size() + 1, 0);
+  int partial_sum = 0;
+  for (int i = 0; i < histogram.size(); ++i) {
+    partial_sum += histogram[i];
+    cdf[i + 1] = partial_sum / multiplier;
+  }
+
+  ASSERT_EQ(cdf.front(), 0);
+  ASSERT_EQ(cdf.back(), 1 << precision);
+
+  std::vector<double> ideal_code_length(histogram.size());
+  const double normalizer = static_cast<double>(1 << precision);
+  for (int i = 0; i < ideal_code_length.size(); ++i) {
+    ideal_code_length[i] = -std::log2((cdf[i + 1] - cdf[i]) / normalizer);
+  }
+
+  RangeEncoder encoder(precision);
+  string encoded;
+  double ideal_length = 0.0;
+  for (uint8 x : data) {
+    encoder.Encode(cdf[x], cdf[x + 1], &encoded);
+    ideal_length += ideal_code_length[x];
+  }
+  encoder.Finalize(&encoded);
+
+  LOG(INFO) << "Encoded string length (bits): " << 8 * encoded.size()
+            << ", whereas ideal " << ideal_length << " ("
+            << (8 * encoded.size()) / ideal_length << " of ideal) "
+            << " (ideal compression rate " << ideal_length / (8 * data.size())
+            << ")";
+
+  RangeDecoder decoder(encoded, precision);
+  for (int i = 0; i < data.size(); ++i) {
+    const int32 decoded = decoder.Decode(cdf);
+    ASSERT_EQ(decoded, static_cast<int32>(data[i])) << i;
+  }
+}
+
+TEST(RangeCoderTest, Precision1To11) {
+  random::PhiloxRandom gen(random::New64(), random::New64());
+  random::SimplePhilox rand(&gen);
+  const int precision = 1 + rand.Uniform(11);
+  RangeEncodeDecodeTest(precision, &rand);
+}
+
+TEST(RangeCoderTest, Precision12To16) {
+  random::PhiloxRandom gen(random::New64(), random::New64());
+  random::SimplePhilox rand(&gen);
+  for (int precision = 12; precision < 17; ++precision) {
+    RangeEncodeDecodeTest(precision, &rand);
+  }
+}
+
+TEST(RangeCoderTest, FinalizeState0) {
+  constexpr int kPrecision = 2;
+
+  string output;
+  RangeEncoder encoder(kPrecision);
+  encoder.Encode(0, 2, &output);
+  encoder.Finalize(&output);
+
+  RangeDecoder decoder(output, kPrecision);
+  EXPECT_EQ(decoder.Decode({0, 2, 4}), 0);
+}
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/coder/ops/coder_ops.cc b/tensorflow/contrib/coder/ops/coder_ops.cc
new file mode 100644
index 0000000000000000000000000000000000000000..9056d1a6963d7be92f499db31385fb6afe2dc515
--- /dev/null
+++ b/tensorflow/contrib/coder/ops/coder_ops.cc
@@ -0,0 +1,119 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/lib/core/status.h"
+
+namespace tensorflow {
+using shape_inference::InferenceContext;
+using shape_inference::ShapeHandle;
+
+// clang-format off
+REGISTER_OP("RangeEncode")
+    .Input("data: int16")
+    .Input("cdf: int32")
+    .Output("encoded: string")
+    .Attr("precision: int >= 1")
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Using the provided cumulative distribution functions (CDF) inside `cdf`, returns
+a range-code of `data`.
+
+The shape of `cdf` should have one more axis than the shape of `data`, and the
+prefix `cdf.shape[:-1]` should be broadcastable to `data.shape`. That is, for
+every `i = 0,...,rank(data) - 1`, the op requires that either
+`cdf.shape[i] == 1` or `cdf.shape[i] == data.shape[i]`. Note that this
+broadcasting is limited in the sense that the number of axes must match, and
+broadcasts only `cdf` but not `data`.
+
+`data` should have an upper bound `m > 0` such that each element is an integer
+in range `[0, m)`. Then the last dimension size of `cdf` must be `m + 1`. For
+each element of `data`, the innermost strip of `cdf` is a vector representing a
+CDF. For each k = 0,...,m, `cdf[..., k] / 2^precision` is the probability that
+an outcome is less than `k` (not less than or equal to).
+
+```
+   cdf[..., 0] / 2^precision = Pr(data[...] < 0)
+   cdf[..., 1] / 2^precision = Pr(data[...] < 1) = Pr(data[...] <= 0)
+   cdf[..., 2] / 2^precision = Pr(data[...] < 2) = Pr(data[...] <= 1)
+   ...
+   cdf[..., m] / 2^precision = Pr(data[...] < m) = 1
+```
+
+Therefore each element of `cdf` must be in `[0, 2^precision]`.
+
+Ideally `cdf[..., m]` should equal to `2^precision` but this is not a hard
+requirement as long as `cdf[..., m] <= 2^precision`.
+
+The encoded string neither contains the shape information of the encoded data
+nor a termination symbol. Therefore the shape of the encoded data must be
+explicitly provided to the decoder.
+
+Implementation notes:
+
+- Because of potential performance issues, the op does not check whether
+elements of `data` is in the correct range `[0, m)`, or if `cdf` satisfies
+monotonic increase property.
+
+- For the range coder to decode the encoded string correctly, the decoder should
+be able to reproduce the internal states of the encoder precisely. Otherwise,
+the decoding would fail and once an error occur, all subsequent decoded values
+are incorrect. For this reason, the range coder uses integer arithmetics and
+avoids using any floating point operations internally, and `cdf` should contain
+integers representing quantized probability mass rather than floating points. 
+
+data: An int32 tensor.
+cdf: An int32 tensor representing the CDF's of `data`. Each integer is divided
+  by `2^precision` to represent a fraction.
+encoded: A range-coded scalar string.
+precision: The number of bits for probability quantization. Must be <= 16.
+)doc");
+
+
+REGISTER_OP("RangeDecode")
+    .Input("encoded: string")
+    .Input("shape: int32")
+    .Input("cdf: int32")
+    .Output("decoded: int16")
+    .Attr("precision: int >= 1")
+    .SetShapeFn([] (InferenceContext* c) {
+      ShapeHandle out;
+      TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &out));
+      c->set_output(0, out);
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Decodes a range-coded `code` into an int32 tensor of shape `shape`.
+
+This is the reverse op of RangeEncode. The shape of the tensor that was encoded
+should be known by the caller.
+
+Implementation notes:
+
+- If wrong input was given (e.g., corrupt `encoded` string, or `cdf` or
+`precision` do not match encoder), the decode is unsuccessful. Because of
+potential performance issues, the decoder does not return error status.
+
+encoded: A scalar string tensor from RangeEncode.
+shape: An int32 1-D tensor representing the shape of the data encoded by
+  RangeEncode.
+decoded: An int32 tensor with shape equal to `shape`.
+precision: The number of bits for probability quantization. Must be <= 16, and
+  must match the precision used by RangeEncode that produced `encoded`.
+)doc");
+// clang-format on
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/coder/python/ops/coder_ops.py b/tensorflow/contrib/coder/python/ops/coder_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb262e338baf1d9c3c043f03a02c2d2851e22b49
--- /dev/null
+++ b/tensorflow/contrib/coder/python/ops/coder_ops.py
@@ -0,0 +1,30 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Range coder operations."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# pylint: disable=wildcard-import,unused-import
+from tensorflow.contrib.coder.python.ops import gen_coder_ops
+from tensorflow.contrib.coder.python.ops.gen_coder_ops import *
+# pylint: enable=wildcard-import,unused-import
+from tensorflow.contrib.util import loader
+from tensorflow.python.platform import resource_loader
+
+
+_coder_ops = loader.load_op_library(
+    resource_loader.get_path_to_datafile("_coder_ops.so"))
diff --git a/tensorflow/contrib/coder/python/ops/coder_ops_test.py b/tensorflow/contrib/coder/python/ops/coder_ops_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5e14e7a641b5673e97882daf2b5a1796ee1bbef
--- /dev/null
+++ b/tensorflow/contrib/coder/python/ops/coder_ops_test.py
@@ -0,0 +1,53 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Coder operations tests."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.coder.python.ops import coder_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.platform import test
+
+
+class CoderOpsTest(test.TestCase):
+  """Coder ops test.
+
+  Coder ops have C++ tests. Python test just ensures that Python binding is not
+  broken.
+  """
+
+  def testReadmeExample(self):
+    data = random_ops.random_uniform((128, 128), 0, 10, dtype=dtypes.int32)
+    histogram = math_ops.bincount(data, minlength=10, maxlength=10)
+    cdf = math_ops.cumsum(histogram, exclusive=False)
+    cdf = array_ops.pad(cdf, [[1, 0]])
+    cdf = array_ops.reshape(cdf, [1, 1, -1])
+
+    data = math_ops.cast(data, dtypes.int16)
+    encoded = coder_ops.range_encode(data, cdf, precision=14)
+    decoded = coder_ops.range_decode(
+        encoded, array_ops.shape(data), cdf, precision=14)
+
+    with self.test_session() as sess:
+      self.assertAllEqual(*sess.run((data, decoded)))
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/copy_graph/__init__.py b/tensorflow/contrib/copy_graph/__init__.py
index 30a0aac140b576c501595fd6c8767b7dddde8e58..61ee39e4be1f0471309bb2672476dd9100cbfd49 100644
--- a/tensorflow/contrib/copy_graph/__init__.py
+++ b/tensorflow/contrib/copy_graph/__init__.py
@@ -13,8 +13,6 @@
 # limitations under the License.
 # ==============================================================================
 """Functions to copy elements between graphs.
-
-See the @{$python/contrib.copy_graph} guide.
 """
 
 from __future__ import absolute_import
diff --git a/tensorflow/contrib/copy_graph/python/util/copy_elements.py b/tensorflow/contrib/copy_graph/python/util/copy_elements.py
index d060eda0a74010db10d9506b2a1c2345b2731709..bae66ffd4289308f2cbfc730ec50d057b13923fb 100644
--- a/tensorflow/contrib/copy_graph/python/util/copy_elements.py
+++ b/tensorflow/contrib/copy_graph/python/util/copy_elements.py
@@ -225,6 +225,7 @@ def copy_op_to_graph(org_instance, to_graph, variables,
                            new_original_op,
                            op_def)
     #Use Graph's hidden methods to add the op
+    to_graph._add_op(new_op)  # pylint: disable=protected-access
     to_graph._record_op_seen_by_control_dependencies(new_op)
     for device_function in reversed(to_graph._device_function_stack):
       new_op._set_device(device_function(new_op))
diff --git a/tensorflow/contrib/crf/__init__.py b/tensorflow/contrib/crf/__init__.py
index bc749339bd4d49c8372bc731da98732f8c19cbe1..046c509626bc2eb20a65c0b38495ff37c294e0e1 100644
--- a/tensorflow/contrib/crf/__init__.py
+++ b/tensorflow/contrib/crf/__init__.py
@@ -16,15 +16,15 @@
 
 See the @{$python/contrib.crf} guide.
 
-@@crf_sequence_score
-@@crf_log_norm
-@@crf_log_likelihood
-@@crf_unary_score
 @@crf_binary_score
 @@crf_decode
-@@CrfForwardRnnCell
-@@CrfDecodeForwardRnnCell
+@@crf_log_likelihood
+@@crf_log_norm
+@@crf_sequence_score
+@@crf_unary_score
 @@CrfDecodeBackwardRnnCell
+@@CrfDecodeForwardRnnCell
+@@CrfForwardRnnCell
 @@viterbi_decode
 """
 
@@ -32,16 +32,15 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.crf.python.ops.crf import _lengths_to_masks
 from tensorflow.contrib.crf.python.ops.crf import crf_binary_score
 from tensorflow.contrib.crf.python.ops.crf import crf_decode
 from tensorflow.contrib.crf.python.ops.crf import crf_log_likelihood
 from tensorflow.contrib.crf.python.ops.crf import crf_log_norm
 from tensorflow.contrib.crf.python.ops.crf import crf_sequence_score
 from tensorflow.contrib.crf.python.ops.crf import crf_unary_score
-from tensorflow.contrib.crf.python.ops.crf import CrfForwardRnnCell
-from tensorflow.contrib.crf.python.ops.crf import CrfDecodeForwardRnnCell
 from tensorflow.contrib.crf.python.ops.crf import CrfDecodeBackwardRnnCell
+from tensorflow.contrib.crf.python.ops.crf import CrfDecodeForwardRnnCell
+from tensorflow.contrib.crf.python.ops.crf import CrfForwardRnnCell
 from tensorflow.contrib.crf.python.ops.crf import viterbi_decode
 
 from tensorflow.python.util.all_util import remove_undocumented
diff --git a/tensorflow/contrib/crf/python/kernel_tests/crf_test.py b/tensorflow/contrib/crf/python/kernel_tests/crf_test.py
index b47fb426a193e0fcc075deafae3eaab698f18ec9..721dc4d0801d1f0e116921888e3851a95e0b72b0 100644
--- a/tensorflow/contrib/crf/python/kernel_tests/crf_test.py
+++ b/tensorflow/contrib/crf/python/kernel_tests/crf_test.py
@@ -179,17 +179,6 @@ class CrfTest(test.TestCase):
       tf_total_log_likelihood = sess.run(total_log_likelihood)
       self.assertAllClose(tf_total_log_likelihood, 0.0)
 
-  def testLengthsToMasks(self):
-    with self.test_session() as sess:
-      sequence_lengths = [4, 1, 8, 2]
-      max_sequence_length = max(sequence_lengths)
-      mask = crf._lengths_to_masks(sequence_lengths, max_sequence_length)
-      tf_mask = sess.run(mask)
-      self.assertEqual(len(tf_mask), len(sequence_lengths))
-      for m, l in zip(tf_mask, sequence_lengths):
-        self.assertAllEqual(m[:l], [1] * l)
-        self.assertAllEqual(m[l:], [0] * (len(m) - l))
-
   def testViterbiDecode(self):
     inputs = np.array(
         [[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32)
diff --git a/tensorflow/contrib/crf/python/ops/crf.py b/tensorflow/contrib/crf/python/ops/crf.py
index 7f5ae937b26f465076c6976429697c35924432e5..62708636c6181ca63cddf2b2e7c84d3da740282a 100644
--- a/tensorflow/contrib/crf/python/ops/crf.py
+++ b/tensorflow/contrib/crf/python/ops/crf.py
@@ -70,25 +70,6 @@ __all__ = [
 ]
 
 
-def _lengths_to_masks(lengths, max_length):
-  """Creates a binary matrix that can be used to mask away padding.
-
-  Args:
-    lengths: A vector of integers representing lengths.
-    max_length: An integer indicating the maximum length. All values in
-      lengths should be less than max_length.
-  Returns:
-    masks: Masks that can be used to get rid of padding.
-  """
-  tiled_ranges = array_ops.tile(
-      array_ops.expand_dims(math_ops.range(max_length), 0),
-      [array_ops.shape(lengths)[0], 1])
-  lengths = array_ops.expand_dims(lengths, 1)
-  masks = math_ops.to_float(
-      math_ops.to_int64(tiled_ranges) < math_ops.to_int64(lengths))
-  return masks
-
-
 def crf_sequence_score(inputs, tag_indices, sequence_lengths,
                        transition_params):
   """Computes the unnormalized score for a tag sequence.
@@ -234,7 +215,9 @@ def crf_unary_score(tag_indices, sequence_lengths, inputs):
       array_ops.gather(flattened_inputs, flattened_tag_indices),
       [batch_size, max_seq_len])
 
-  masks = _lengths_to_masks(sequence_lengths, array_ops.shape(tag_indices)[1])
+  masks = array_ops.sequence_mask(sequence_lengths,
+                                  maxlen=array_ops.shape(tag_indices)[1],
+                                  dtype=dtypes.float32)
 
   unary_scores = math_ops.reduce_sum(unary_scores * masks, 1)
   return unary_scores
@@ -268,7 +251,9 @@ def crf_binary_score(tag_indices, sequence_lengths, transition_params):
   binary_scores = array_ops.gather(flattened_transition_params,
                                    flattened_transition_indices)
 
-  masks = _lengths_to_masks(sequence_lengths, array_ops.shape(tag_indices)[1])
+  masks = array_ops.sequence_mask(sequence_lengths,
+                                  maxlen=array_ops.shape(tag_indices)[1],
+                                  dtype=dtypes.float32)
   truncated_masks = array_ops.slice(masks, [0, 1], [-1, -1])
   binary_scores = math_ops.reduce_sum(binary_scores * truncated_masks, 1)
   return binary_scores
diff --git a/tensorflow/contrib/cudnn_rnn/BUILD b/tensorflow/contrib/cudnn_rnn/BUILD
index fce2c03e69bc4b8b0ac46b8e081a33c43c9d41ab..fec358c4e1067dc8dc8173d1b9d05dc90b90ca05 100644
--- a/tensorflow/contrib/cudnn_rnn/BUILD
+++ b/tensorflow/contrib/cudnn_rnn/BUILD
@@ -25,6 +25,7 @@ tf_custom_op_library(
     ],
     deps = [
         "//tensorflow/core/kernels:bounds_check_lib",
+        "@farmhash_archive//:farmhash",
     ],
 )
 
@@ -39,6 +40,7 @@ tf_kernel_library(
         "//tensorflow/core:stream_executor",
         "//tensorflow/core/kernels:bounds_check_lib",
         "//third_party/eigen3",
+        "@farmhash_archive//:farmhash",
     ],
 )
 
@@ -146,10 +148,10 @@ cuda_py_test(
 
 cuda_py_test(
     name = "cudnn_rnn_ops_benchmark",
-    size = "large",
+    size = "small",
     srcs = ["python/kernel_tests/cudnn_rnn_ops_benchmark.py"],
     additional_deps = [
-        ":cudnn_rnn_ops_py",
+        ":cudnn_rnn_py",
         "//tensorflow/contrib/rnn:rnn_py",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client",
@@ -164,7 +166,6 @@ cuda_py_test(
         "//tensorflow/python:variables",
     ],
     tags = [
-        "manual",
         "noasan",  # http://b/62067814
         "nomsan",
         "notsan",
diff --git a/tensorflow/contrib/cudnn_rnn/kernels/cudnn_rnn_ops.cc b/tensorflow/contrib/cudnn_rnn/kernels/cudnn_rnn_ops.cc
index 5d5f593d016a3bb9f7b5ea8f5cd40c29268dc4f5..ba9686e94ee7072cc485c955decb2287bd4a56f3 100644
--- a/tensorflow/contrib/cudnn_rnn/kernels/cudnn_rnn_ops.cc
+++ b/tensorflow/contrib/cudnn_rnn/kernels/cudnn_rnn_ops.cc
@@ -39,6 +39,7 @@ limitations under the License.
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/core/platform/fingerprint.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/env_var.h"
@@ -369,6 +370,27 @@ struct CudnnModelShapes {
   }
 };
 
+// Utility class for using CudnnModelShapes as a hash table key.
+struct CudnnModelShapesHasher {
+  uint64 operator()(const CudnnModelShapes& to_hash) const {
+    uint64 hash = static_cast<uint64>(to_hash.num_layers);
+    hash = tensorflow::FingerprintCat64(
+        hash, static_cast<uint64>(to_hash.input_size));
+    hash = tensorflow::FingerprintCat64(hash,
+                                        static_cast<uint64>(to_hash.num_units));
+    return tensorflow::FingerprintCat64(hash,
+                                        static_cast<uint64>(to_hash.dir_count));
+  }
+};
+
+// Utility class for using CudnnModelShapes as a hash table key.
+struct CudnnModelShapesComparator {
+  bool operator()(const CudnnModelShapes& first,
+                  const CudnnModelShapes& second) const {
+    return first.IsCompatibleWith(second);
+  }
+};
+
 // Extract and checks the forward input tensors, parameters, and shapes from the
 // OpKernelContext.
 Status ExtractForwardInput(OpKernelContext* context,
@@ -627,7 +649,7 @@ class CudnnRNNParamsToCanonical<GPUDevice, T> : public CudnnRNNKernelCommon {
     }
     const int num_params_per_layer = num_params_ / num_layers / num_dirs;
     // Number of params applied on inputs. The rest are applied on recurrent
-    // hiddden states.
+    // hidden states.
     const int num_params_input_state = num_params_per_layer / 2;
     CHECK(num_params_ % (num_layers * num_dirs) == 0)
         << "Number of params is not a multiple of num_layers * num_dirs.";
@@ -764,6 +786,13 @@ TF_CALL_float(REGISTER_GPU);
 TF_CALL_double(REGISTER_GPU);
 #undef REGISTER_GPU
 
+// Pointers to RNN scratch space for a specific set of shape parameters (used as
+// a hash table value in CudnnRNNForwardOp and CudnnRNNBackwardOp).
+struct RnnScratchSpace {
+  std::unique_ptr<RnnDescriptor> rnn_desc;
+  std::unique_ptr<CudnnRNNPersistentSpaceAllocator> dropout_state_allocator;
+};
+
 // Run the forward operation of the RNN model.
 template <typename T>
 class CudnnRNNForwardOp<GPUDevice, T> : public CudnnRNNKernelCommon {
@@ -808,32 +837,7 @@ class CudnnRNNForwardOp<GPUDevice, T> : public CudnnRNNKernelCommon {
     OP_REQUIRES_OK(context,
                    ToRNNInputMode(rnn_input_mode(), model_shapes.num_units,
                                   model_shapes.input_size, &input_mode));
-    // TODO(zhengxq): cache the descriptor so we don't have to create them all
-    // the time.
     auto data_type = ToDataType<T>::value;
-    {
-      mutex_lock l(mu_);
-      if (model_shapes_ == nullptr) {
-        model_shapes_.reset(new CudnnModelShapes(model_shapes));
-      } else {
-        OP_REQUIRES(context, model_shapes_->IsCompatibleWith(model_shapes),
-                    errors::InvalidArgument(
-                        "Incompatible rnn model shapes inferred: expecting ",
-                        model_shapes_->RnnDescDebugString(), ", getting ",
-                        model_shapes.RnnDescDebugString(), "."));
-      }
-      if (rnn_desc_ == nullptr || ResetRndGenState()) {
-        dropout_state_allocator_.reset(
-            new CudnnRNNPersistentSpaceAllocator(context));
-        auto rnn_desc_s = executor->createRnnDescriptor(
-            model_shapes_->num_layers, model_shapes_->num_units,
-            model_shapes_->input_size, input_mode, rnn_direction_mode(),
-            rnn_mode(), data_type, dropout(), seed(),
-            dropout_state_allocator_.get());
-        OP_REQUIRES_OK(context, FromExecutorStatus(rnn_desc_s));
-        rnn_desc_ = std::move(rnn_desc_s.ConsumeValueOrDie());
-      }
-    }
 
     auto input_desc_s = executor->createRnnSequenceTensorDescriptor(
         input_shape.dim_size(0), input_shape.dim_size(1),
@@ -882,14 +886,27 @@ class CudnnRNNForwardOp<GPUDevice, T> : public CudnnRNNKernelCommon {
     bool launch_status = false;
     {
       mutex_lock l(mu_);
+      RnnScratchSpace& rnn_state = rnn_state_cache_[model_shapes];
+      if (rnn_state.rnn_desc == nullptr || ResetRndGenState()) {
+        CudnnRNNPersistentSpaceAllocator* dropout_state_allocator =
+            new CudnnRNNPersistentSpaceAllocator(context);
+        rnn_state.dropout_state_allocator.reset(dropout_state_allocator);
+        auto rnn_desc_s = executor->createRnnDescriptor(
+            model_shapes.num_layers, model_shapes.num_units,
+            model_shapes.input_size, input_mode, rnn_direction_mode(),
+            rnn_mode(), data_type, dropout(), seed(), dropout_state_allocator);
+        OP_REQUIRES_OK(context, FromExecutorStatus(rnn_desc_s));
+        rnn_state.rnn_desc = std::move(rnn_desc_s.ConsumeValueOrDie());
+      }
       launch_status =
           stream
-              ->ThenRnnForward(
-                  *rnn_desc_, *input_desc, input_data, *hidden_state_desc,
-                  input_h_data, *hidden_state_desc, input_c_data, params_data,
-                  *output_desc, &output_data, *hidden_state_desc,
-                  &output_h_data, *hidden_state_desc, &output_c_data,
-                  is_training_, &reserve_space_allocator, &workspace_allocator)
+              ->ThenRnnForward(*rnn_state.rnn_desc, *input_desc, input_data,
+                               *hidden_state_desc, input_h_data,
+                               *hidden_state_desc, input_c_data, params_data,
+                               *output_desc, &output_data, *hidden_state_desc,
+                               &output_h_data, *hidden_state_desc,
+                               &output_c_data, is_training_,
+                               &reserve_space_allocator, &workspace_allocator)
               .ok();
     }
     OP_REQUIRES(context, launch_status,
@@ -899,10 +916,9 @@ class CudnnRNNForwardOp<GPUDevice, T> : public CudnnRNNKernelCommon {
  private:
   mutex mu_;
   bool is_training_;
-  std::unique_ptr<CudnnModelShapes> model_shapes_ GUARDED_BY(mu_);
-  std::unique_ptr<RnnDescriptor> rnn_desc_ GUARDED_BY(mu_);
-  std::unique_ptr<CudnnRNNPersistentSpaceAllocator> dropout_state_allocator_
-      GUARDED_BY(mu_);
+  std::unordered_map<CudnnModelShapes, RnnScratchSpace, CudnnModelShapesHasher,
+                     CudnnModelShapesComparator>
+      rnn_state_cache_ GUARDED_BY(mu_);
 };
 
 #define REGISTER_GPU(T)                                           \
@@ -1022,32 +1038,6 @@ class CudnnRNNBackwardOp<GPUDevice, T> : public CudnnRNNKernelCommon {
     OP_REQUIRES_OK(context,
                    ToRNNInputMode(rnn_input_mode(), model_shapes.num_units,
                                   model_shapes.input_size, &input_mode));
-    // TODO(zhengxq): cache the descriptor so we don't have to create them all
-    // the time.
-    {
-      mutex_lock l(mu_);
-      if (model_shapes_ == nullptr) {
-        model_shapes_.reset(new CudnnModelShapes(model_shapes));
-      } else {
-        OP_REQUIRES(context, model_shapes_->IsCompatibleWith(model_shapes),
-                    errors::InvalidArgument(
-                        "Incompatible rnn model shapes inferred: expecting ",
-                        model_shapes_->RnnDescDebugString(), ", getting ",
-                        model_shapes.RnnDescDebugString(), "."));
-      }
-
-      if (rnn_desc_ == nullptr || ResetRndGenState()) {
-        dropout_state_allocator_.reset(
-            new CudnnRNNPersistentSpaceAllocator(context));
-        auto rnn_desc_s = executor->createRnnDescriptor(
-            model_shapes.num_layers, model_shapes.num_units,
-            model_shapes.input_size, input_mode, rnn_direction_mode(),
-            rnn_mode(), data_type, dropout(), seed(),
-            dropout_state_allocator_.get());
-        OP_REQUIRES_OK(context, FromExecutorStatus(rnn_desc_s));
-        rnn_desc_ = std::move(rnn_desc_s.ConsumeValueOrDie());
-      }
-    }
 
     auto input_desc_s = executor->createRnnSequenceTensorDescriptor(
         input_shape.dim_size(0), input_shape.dim_size(1),
@@ -1100,17 +1090,30 @@ class CudnnRNNBackwardOp<GPUDevice, T> : public CudnnRNNKernelCommon {
     bool launch_status = false;
     {
       mutex_lock l(mu_);
+      RnnScratchSpace& rnn_state = rnn_state_cache_[model_shapes];
+      if (rnn_state.rnn_desc == nullptr || ResetRndGenState()) {
+        CudnnRNNPersistentSpaceAllocator* dropout_state_allocator =
+            new CudnnRNNPersistentSpaceAllocator(context);
+        rnn_state.dropout_state_allocator.reset(dropout_state_allocator);
+        auto rnn_desc_s = executor->createRnnDescriptor(
+            model_shapes.num_layers, model_shapes.num_units,
+            model_shapes.input_size, input_mode, rnn_direction_mode(),
+            rnn_mode(), data_type, dropout(), seed(), dropout_state_allocator);
+        OP_REQUIRES_OK(context, FromExecutorStatus(rnn_desc_s));
+        rnn_state.rnn_desc = std::move(rnn_desc_s.ConsumeValueOrDie());
+      }
       launch_status =
           stream
-              ->ThenRnnBackward(
-                  *rnn_desc_, *input_desc, input_data, *hidden_state_desc,
-                  input_h_data, *hidden_state_desc, input_c_data, params_data,
-                  *output_desc, output_data, *hidden_state_desc, output_h_data,
-                  *hidden_state_desc, output_c_data, output_backprop_data,
-                  output_h_backprop_data, output_c_backprop_data,
-                  &input_backprop_data, &input_h_backprop_data,
-                  &input_c_backprop_data, &params_backprop_data,
-                  &reserve_space_uint8, &workspace_allocator)
+              ->ThenRnnBackward(*rnn_state.rnn_desc, *input_desc, input_data,
+                                *hidden_state_desc, input_h_data,
+                                *hidden_state_desc, input_c_data, params_data,
+                                *output_desc, output_data, *hidden_state_desc,
+                                output_h_data, *hidden_state_desc,
+                                output_c_data, output_backprop_data,
+                                output_h_backprop_data, output_c_backprop_data,
+                                &input_backprop_data, &input_h_backprop_data,
+                                &input_c_backprop_data, &params_backprop_data,
+                                &reserve_space_uint8, &workspace_allocator)
               .ok();
     }
     OP_REQUIRES(context, launch_status,
@@ -1119,10 +1122,9 @@ class CudnnRNNBackwardOp<GPUDevice, T> : public CudnnRNNKernelCommon {
 
  private:
   mutex mu_;
-  std::unique_ptr<CudnnModelShapes> model_shapes_ GUARDED_BY(mu_);
-  std::unique_ptr<RnnDescriptor> rnn_desc_ GUARDED_BY(mu_);
-  std::unique_ptr<CudnnRNNPersistentSpaceAllocator> dropout_state_allocator_
-      GUARDED_BY(mu_);
+  std::unordered_map<CudnnModelShapes, RnnScratchSpace, CudnnModelShapesHasher,
+                     CudnnModelShapesComparator>
+      rnn_state_cache_ GUARDED_BY(mu_);
 };
 
 #define REGISTER_GPU(T)                                                   \
diff --git a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py
index ff409ac71826f1f0f57e9133d768003f849abc09..4fc5ff1bd1887c4532e95fcf0e791d72b20471b0 100644
--- a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py
+++ b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py
@@ -20,8 +20,8 @@ from __future__ import print_function
 
 import time
 
+from tensorflow.contrib import rnn as contrib_rnn
 from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops
-from tensorflow.contrib.rnn.python.ops import core_rnn
 from tensorflow.contrib.rnn.python.ops import lstm_ops
 from tensorflow.python.client import session
 from tensorflow.python.framework import dtypes
@@ -29,8 +29,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gradients_impl
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import rnn_cell
+from tensorflow.python.ops import rnn
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
@@ -44,19 +43,19 @@ class CudnnRNNBenchmark(test.Benchmark):
         "large": {
             "num_layers": 4,
             "num_units": 1024,
-            "seq_length": 40,
+            "seq_length": 50,
             "batch_size": 64,
         },
         "medium": {
             "num_layers": 4,
             "num_units": 512,
-            "seq_length": 30,
+            "seq_length": 50,
             "batch_size": 64,
         },
         "small": {
             "num_layers": 4,
             "num_units": 128,
-            "seq_length": 20,
+            "seq_length": 50,
             "batch_size": 64,
         },
     }
@@ -71,7 +70,7 @@ class CudnnRNNBenchmark(test.Benchmark):
 
   def _BenchmarkOp(self, op, desc):
     burn_in_steps = 10
-    benchmark_steps = 40
+    benchmark_steps = 20
     with session.Session() as sess:
       sess.run(variables.global_variables_initializer())
       for i in xrange(burn_in_steps + benchmark_steps):
@@ -126,16 +125,12 @@ class CudnnRNNBenchmark(test.Benchmark):
       seq_length = config["seq_length"]
 
       with ops.Graph().as_default(), ops.device("/device:GPU:0"):
-        inputs = seq_length * [
-            array_ops.zeros([batch_size, num_units], dtypes.float32)
-        ]
-        initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=127)
-
-        cell = rnn_cell.LSTMCell(
-            num_units=num_units, initializer=initializer, state_is_tuple=True)
-        multi_cell = rnn_cell.MultiRNNCell(
-            [cell() for _ in range(num_layers)])
-        outputs, final_state = core_rnn.static_rnn(
+        inputs = array_ops.zeros([batch_size, seq_length, num_units],
+                                 dtypes.float32)
+
+        multi_cell = contrib_rnn.MultiRNNCell(
+            [contrib_rnn.BasicLSTMCell(num_units) for _ in range(num_layers)])
+        outputs, final_state = rnn.dynamic_rnn(
             multi_cell, inputs, dtype=dtypes.float32)
         trainable_variables = ops.get_collection(
             ops.GraphKeys.TRAINABLE_VARIABLES)
@@ -154,14 +149,12 @@ class CudnnRNNBenchmark(test.Benchmark):
       seq_length = config["seq_length"]
 
       with ops.Graph().as_default(), ops.device("/device:GPU:0"):
-        inputs = seq_length * [
-            array_ops.zeros([batch_size, num_units], dtypes.float32)
-        ]
-        cell = lambda: lstm_ops.LSTMBlockCell(num_units=num_units)  # pylint: disable=cell-var-from-loop
-
-        multi_cell = rnn_cell.MultiRNNCell(
-            [cell() for _ in range(num_layers)])
-        outputs, final_state = core_rnn.static_rnn(
+        inputs = array_ops.zeros([batch_size, seq_length, num_units],
+                                 dtypes.float32)
+
+        multi_cell = contrib_rnn.MultiRNNCell(
+            [lstm_ops.LSTMBlockCell(num_units) for _ in range(num_layers)])
+        outputs, final_state = rnn.dynamic_rnn(
             multi_cell, inputs, dtype=dtypes.float32)
         trainable_variables = ops.get_collection(
             ops.GraphKeys.TRAINABLE_VARIABLES)
diff --git a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py
index e65394cba07574ed49398981f1cbd8bcb402e24f..49d305cb0dd0387c34b7feb79ef631eac9e935cd 100644
--- a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py
+++ b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py
@@ -29,6 +29,8 @@ import numpy as np
 from tensorflow.contrib.cudnn_rnn.python.layers import cudnn_rnn
 from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops
 from tensorflow.contrib.rnn.python.ops import rnn as contrib_rnn_lib
+from tensorflow.python.eager import backprop
+from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
@@ -314,6 +316,101 @@ class CudnnRNNTestBasic(TensorFlowTestCase):
       self.assertEqual(0, total_sum2_v)
       self.assertEqual(0, total_sum3_v)
 
+  def testSaveableGraphDeviceAssignment(self):
+    num_layers = 4
+    num_units = 2
+    batch_size = 8
+    direction = CUDNN_RNN_UNIDIRECTION
+    dir_count = 1
+
+    def DeviceFn(op):
+      if op.type in ("Variable", "VariableV2"):
+        return "/cpu:0"
+      else:
+        return "/gpu:0"
+
+    with ops.Graph().as_default() as g:
+      with ops.device(DeviceFn):
+        with vs.variable_scope("main"):
+          kernel_initializer = init_ops.constant_initializer(3.14)
+          bias_initializer = init_ops.constant_initializer(1.59)
+          inputs = random_ops.random_uniform(
+              [num_layers * dir_count, batch_size, num_units],
+              dtype=dtypes.float32)
+
+          lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units,
+                                     direction=direction,
+                                     kernel_initializer=kernel_initializer,
+                                     bias_initializer=bias_initializer,
+                                     name="awesome_lstm")
+          outputs = lstm(inputs)
+
+        # saver is created in the scope of DeviceFn.
+        saver = saver_lib.Saver()
+
+    with self.test_session(use_gpu=True, graph=g) as sess:
+      save_path = os.path.join(self.get_temp_dir(),
+                               "test-saveable-device-assignment")
+      sess.run(variables.global_variables_initializer())
+
+      saver.save(sess, save_path)
+      saver.restore(sess, save_path)
+      sess.run(outputs)
+
+  @unittest.skipUnless(test.is_built_with_cuda(),
+                       "Test only applicable when running on GPUs")
+  def testDifferentShapesEager(self):
+    # Checks that kernel caching does not cause sharing of temporary storage
+    # across different input shapes when executing eagerly.
+    with context.eager_mode():
+      with ops.device("gpu:0"):
+        first_output, _ = cudnn_rnn.CudnnGRU(1, 100)(
+            array_ops.zeros([28, 100, 28]))
+        second_output, _ = cudnn_rnn.CudnnGRU(1, 100)(
+            array_ops.zeros([28, 100, 100]))
+        self.assertAllEqual([28, 100, 100], first_output.shape)
+        self.assertAllEqual([28, 100, 100], second_output.shape)
+
+        def _LossFunc():
+          first_output, _ = cudnn_rnn.CudnnGRU(1, 100)(
+              array_ops.zeros([28, 100, 28]))
+          second_output, _ = cudnn_rnn.CudnnGRU(1, 100)(
+              array_ops.zeros([28, 100, 100]))
+          return (math_ops.reduce_sum(first_output) +
+                  math_ops.reduce_sum(second_output))
+
+        backprop.implicit_grad(_LossFunc)()
+
+  @unittest.skipUnless(test.is_built_with_cuda(),
+                       "Test only applicable when running on GPUs")
+  def testDifferentShapesGraph(self):
+    # Tests that a single kernel instance presented with multiple input shapes
+    # does not crash with graph execution.
+    with ops.device("gpu:0"):
+      layer = cudnn_rnn.CudnnGRU(1, 100)
+      layer(array_ops.zeros([28, 100, 100]))
+
+      def _Cond(index, accumulation):
+        del accumulation  # unused
+        return math_ops.less(index, 4)
+
+      def _Body(index, accumulation):
+        layer_input = accumulation[:, :, 10 * (1 + index % 2):]
+        output, _ = layer(layer_input)
+        return index + 1, accumulation + output
+
+      original_input = array_ops.zeros([28, 100, 100])
+      _, accumulation = control_flow_ops.while_loop(_Cond, _Body,
+                                                    [0, original_input])
+      grad, = gradients.gradients(
+          math_ops.reduce_sum(accumulation), (original_input,))
+    init_op = variables.global_variables_initializer()
+    with self.test_session() as sess:
+      sess.run(init_op)
+      accumulation_eval, grad_eval = sess.run((accumulation, grad))
+      self.assertAllEqual([28, 100, 100], accumulation_eval.shape)
+      self.assertAllEqual([28, 100, 100], grad_eval.shape)
+
 
 # TODO(jamesqin): Transform to parameterized test after it is included in the
 # TF open source codebase.
diff --git a/tensorflow/contrib/cudnn_rnn/python/layers/cudnn_rnn.py b/tensorflow/contrib/cudnn_rnn/python/layers/cudnn_rnn.py
index 37c61a71a3bdac4fadef58ba8c24b853fb3638ef..36fba917a8f56c26fd5b4c3468d1d980a8ba2ba5 100644
--- a/tensorflow/contrib/cudnn_rnn/python/layers/cudnn_rnn.py
+++ b/tensorflow/contrib/cudnn_rnn/python/layers/cudnn_rnn.py
@@ -176,8 +176,9 @@ class _CudnnRNN(base_layer.Layer):
           otherwise, it implies 'linear_input'.
       direction: the direction model that the model operates. Can be either
           'unidirectional' or 'bidirectional'
-      dropout: dropout rate, a number between [0, 1]. Dropout is applied on
-          inputs of each layer. When set to 0, dropout is disabled.
+      dropout: dropout rate, a number between [0, 1]. Dropout is applied between
+          each layer (no dropout is applied for a model with a single layer).
+          When set to 0, dropout is disabled.
       seed: the op seed used for initializing dropout. See @{tf.set_random_seed}
           for behavior.
       dtype: tf.float16, tf.float32 or tf.float64
@@ -358,7 +359,7 @@ class _CudnnRNN(base_layer.Layer):
     # Create saveable in the outer scope of the cudnn subgraph, such that
     # alternative subgraph with platform-independent rnn cells can load the
     # checkpoints directly.
-    if not (self.built or vs.get_variable_scope().reuse):
+    if not (self.built or vs.get_variable_scope().reuse is True):
       self._create_saveable()
     self.built = True
 
@@ -450,17 +451,18 @@ class _CudnnRNN(base_layer.Layer):
       raise RuntimeError(
           "%s._canonical_to_opaque invoked before input shape is known" %
           type(self).__name__)
-    return cudnn_rnn_ops.cudnn_rnn_canonical_to_opaque_params(
-        rnn_mode=self._rnn_mode,
-        num_layers=self._num_layers,
-        num_units=self._num_units,
-        input_size=self._input_size,
-        weights=cu_weights,
-        biases=cu_biases,
-        input_mode=self._input_mode,
-        seed=self._seed,
-        dropout=self._dropout,
-        direction=self._direction)
+    with ops.device("/gpu:0"):
+      return cudnn_rnn_ops.cudnn_rnn_canonical_to_opaque_params(
+          rnn_mode=self._rnn_mode,
+          num_layers=self._num_layers,
+          num_units=self._num_units,
+          input_size=self._input_size,
+          weights=cu_weights,
+          biases=cu_biases,
+          input_mode=self._input_mode,
+          seed=self._seed,
+          dropout=self._dropout,
+          direction=self._direction)
 
   def _forward(self, inputs, h, c, opaque_params, training):
     output, output_h, output_c = cudnn_rnn_ops._cudnn_rnn(  # pylint:disable=protected-access
@@ -489,14 +491,14 @@ class _CudnnRNN(base_layer.Layer):
     if self._saveable is not None:
       raise RuntimeError("Cudnn saveable already created.")
     self._saveable = self._saveable_cls(  # pylint:disable=not-callable
-        self.trainable_variables[0],
-        self.num_layers,
-        self.num_units,
-        self.input_size,
-        self.input_mode,
-        self.direction,
+        opaque_params=self.trainable_variables[0],
+        num_layers=self.num_layers,
+        num_units=self.num_units,
+        input_size=self.input_size,
+        input_mode=self.input_mode,
+        direction=self.direction,
         scope=vs.get_variable_scope(),
-        name="%s_saveable" % self.trainable_variables[0].op.name)
+        name="%s_saveable" % self.trainable_variables[0].name.split(":")[0])
     ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, self._saveable)
 
 
diff --git a/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py b/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py
index dcd3d4732a27ae4bec579ac12ac568dc4a53baaa..e87162f0ee9cc4eed795555171f55a93639e83cf 100644
--- a/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py
+++ b/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py
@@ -72,7 +72,7 @@ class CudnnCompatibleLSTMCell(lstm_ops.LSTMBlockCell):
   def __init__(self, num_units, reuse=None):
     super(CudnnCompatibleLSTMCell, self).__init__(
         num_units, forget_bias=0, cell_clip=None, use_peephole=False,
-        reuse=reuse)
+        reuse=reuse, name="cudnn_compatible_lstm_cell")
     self._names.update({"scope": "cudnn_compatible_lstm_cell"})
 
 
@@ -303,16 +303,17 @@ class CudnnOpaqueParamsSaveable(saver.BaseSaverBuilder.SaveableObject):
     Returns:
       2 list for weights and biases respectively.
     """
-    weights, biases = gen_cudnn_rnn_ops.cudnn_rnn_params_to_canonical(
-        num_layers=self._num_layers,
-        num_units=self._num_units,
-        input_size=self._input_size,
-        params=self._variables,
-        num_params=self._num_params,
-        rnn_mode=self._rnn_mode,
-        input_mode=self._input_mode,
-        direction=self._direction)
-    return (weights, biases)
+    with ops.device("/gpu:0"):
+      weights, biases = gen_cudnn_rnn_ops.cudnn_rnn_params_to_canonical(
+          num_layers=self._num_layers,
+          num_units=self._num_units,
+          input_size=self._input_size,
+          params=self._variables,
+          num_params=self._num_params,
+          rnn_mode=self._rnn_mode,
+          input_mode=self._input_mode,
+          direction=self._direction)
+      return (weights, biases)
 
   def _CanonicalToOpaqueParams(self, cu_weights, cu_biases):
     """Converts from Cudnn canonical format to opaque params.
@@ -323,15 +324,16 @@ class CudnnOpaqueParamsSaveable(saver.BaseSaverBuilder.SaveableObject):
     Returns:
       a single opaque tensor.
     """
-    return gen_cudnn_rnn_ops.cudnn_rnn_canonical_to_params(
-        num_layers=self._num_layers,
-        num_units=self._num_units,
-        input_size=self._input_size,
-        weights=cu_weights,
-        biases=cu_biases,
-        rnn_mode=self._rnn_mode,
-        input_mode=self._input_mode,
-        direction=self._direction)
+    with ops.device("/gpu:0"):
+      return gen_cudnn_rnn_ops.cudnn_rnn_canonical_to_params(
+          num_layers=self._num_layers,
+          num_units=self._num_units,
+          input_size=self._input_size,
+          weights=cu_weights,
+          biases=cu_biases,
+          rnn_mode=self._rnn_mode,
+          input_mode=self._input_mode,
+          direction=self._direction)
 
   def _TransformCanonical(self, cu_weights, cu_biases):
     r"""Transform from Cudnn canonical to tf canonical.
@@ -1352,7 +1354,7 @@ class _CudnnRNN(object):
       params: the parameter buffer created for this model.
       is_training: whether this operation will be used in training or inference.
     Returns:
-      output: the output sequuence.
+      output: the output sequence.
       output_h: the final state for h.
       output_c: the final state for c. This is only relevant for LSTM.
     """
@@ -1470,7 +1472,7 @@ class CudnnLSTM(_CudnnRNN):
       params: the parameter buffer created for this model.
       is_training: whether this operation will be used in training or inference.
     Returns:
-      output: the output sequuence.
+      output: the output sequence.
       output_h: the final state for h.
       output_c: the final state for c.
     """
@@ -1540,7 +1542,7 @@ class _CudnnRNNNoInputC(_CudnnRNN):
       params: the parameter buffer created for this model.
       is_training: whether this operation will be used in training or inference.
     Returns:
-      output: the output sequuence.
+      output: the output sequence.
       output_h: the final state for h.
     """
     return _cudnn_rnn_no_input_c(
diff --git a/tensorflow/contrib/data/BUILD b/tensorflow/contrib/data/BUILD
index f7d8a084d9c12c05c411ae0751854d1823a818ec..8ecc003348d70379ee48d050e63e93d0dd38efaa 100644
--- a/tensorflow/contrib/data/BUILD
+++ b/tensorflow/contrib/data/BUILD
@@ -18,7 +18,9 @@ py_library(
         "//tensorflow/contrib/data/python/ops:dataset_ops",
         "//tensorflow/contrib/data/python/ops:iterator_ops",
         "//tensorflow/contrib/data/python/ops:readers",
+        "//tensorflow/contrib/data/python/ops:shuffle_ops",
         "//tensorflow/contrib/data/python/ops:transformation_ops",
+        "//tensorflow/python:parsing_ops",
         "//tensorflow/python:util",
         "//tensorflow/python/data/ops:iterator_ops",
     ],
diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py
index 7c6244f22b0f41656369595d3e3e6c23b7088bcb..daeb6a610533404044d42033709d644deb481024 100644
--- a/tensorflow/contrib/data/__init__.py
+++ b/tensorflow/contrib/data/__init__.py
@@ -12,7 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""`tf.contrib.data.Dataset` API for input pipelines.
+"""`tf.contrib.data` API for input pipelines.
+
+This module contains the experimental (less stable) counterpart to the
+`tf.data` API. See @{tf.data.Dataset} and @{tf.data.Iterator} for the
+stable classes.
 
 See the @{$datasets$Importing Data} Programmer's Guide for an overview.
 
@@ -24,18 +28,20 @@ See the @{$datasets$Importing Data} Programmer's Guide for an overview.
 @@TextLineDataset
 
 @@batch_and_drop_remainder
-@@padded_batch_and_drop_remainder
 @@dense_to_sparse_batch
 @@enumerate_dataset
 @@group_by_window
 @@ignore_errors
 @@make_saveable_from_iterator
-@@read_batch_features
-@@unbatch
+@@map_and_batch
+@@padded_batch_and_drop_remainder
 @@parallel_interleave
+@@read_batch_features
 @@rejection_resample
 @@scan
+@@shuffle_and_repeat
 @@sloppy_interleave
+@@unbatch
 
 @@get_single_element
 """
@@ -48,6 +54,7 @@ from __future__ import print_function
 
 from tensorflow.contrib.data.python.ops.batching import batch_and_drop_remainder
 from tensorflow.contrib.data.python.ops.batching import dense_to_sparse_batch
+from tensorflow.contrib.data.python.ops.batching import map_and_batch
 from tensorflow.contrib.data.python.ops.batching import padded_batch_and_drop_remainder
 from tensorflow.contrib.data.python.ops.batching import unbatch
 from tensorflow.contrib.data.python.ops.counter import Counter
@@ -66,6 +73,7 @@ from tensorflow.contrib.data.python.ops.readers import TextLineDataset
 from tensorflow.contrib.data.python.ops.readers import TFRecordDataset
 from tensorflow.contrib.data.python.ops.resampling import rejection_resample
 from tensorflow.contrib.data.python.ops.scan_ops import scan
+from tensorflow.contrib.data.python.ops.shuffle_ops import shuffle_and_repeat
 from tensorflow.python.data.ops.iterator_ops import Iterator
 # pylint: enable=unused-import
 
diff --git a/tensorflow/contrib/data/kernels/prefetching_kernels.cc b/tensorflow/contrib/data/kernels/prefetching_kernels.cc
index c9a3537c70c711290fb1111a1594e6dea3bc07a9..d3df14bdd03476e9ee4015b374512e5bb9893a63 100644
--- a/tensorflow/contrib/data/kernels/prefetching_kernels.cc
+++ b/tensorflow/contrib/data/kernels/prefetching_kernels.cc
@@ -83,11 +83,10 @@ class FunctionBufferingResource : public ResourceBase {
       return Status::OK();
     }
     AttrValueMap attr_values = func_.attr();
-    AttrValue v;
-    v.set_s(target_device_);
-    AddAttr("_target", v, &attr_values);
-
-    return lib_->Instantiate(func_.name(), AttrSlice(&attr_values), &handle_);
+    FunctionLibraryRuntime::InstantiateOptions opts;
+    opts.target = target_device_;
+    return lib_->Instantiate(func_.name(), AttrSlice(&attr_values), opts,
+                             &handle_);
   }
 
   // Returns true if we've got to the end of the sequence and exhausted the
diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index 1d4817fa2670317f4f4e9e63c724a79e18aa35bc..1fbf18f30a293de697826885d15bb95b40568daa 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -4,7 +4,7 @@ licenses(["notice"])  # Apache 2.0
 
 exports_files(["LICENSE"])
 
-load("//tensorflow:tensorflow.bzl", "py_test")
+load("//tensorflow:tensorflow.bzl", "py_test", "tf_py_test")
 
 py_test(
     name = "batch_dataset_op_test",
@@ -36,6 +36,7 @@ py_test(
     srcs = ["bucketing_test.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
         "//tensorflow/contrib/data/python/ops:transformation_ops",
         "//tensorflow/python:array_ops",
@@ -75,13 +76,11 @@ py_test(
     srcs = ["concatenate_dataset_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
-        "//tensorflow/contrib/data/python/ops:iterator_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
         "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:training",
         "//tensorflow/python/data/util:nest",
         "//third_party/py/numpy",
     ],
@@ -89,7 +88,7 @@ py_test(
 
 py_test(
     name = "dataset_constructor_op_test",
-    size = "small",
+    size = "medium",
     srcs = ["dataset_constructor_op_test.py"],
     srcs_version = "PY2AND3",
     tags = [
@@ -118,7 +117,6 @@ py_test(
 
 py_library(
     name = "dataset_serialization_test",
-    testonly = 1,
     srcs = [
         "dataset_serialization_test_base.py",
     ],
@@ -157,14 +155,13 @@ py_test(
     ],
 )
 
-py_test(
+tf_py_test(
     name = "flat_map_dataset_op_test",
-    size = "small",
+    size = "medium",
     srcs = ["flat_map_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["no_pip"],
-    deps = [
+    additional_deps = [
         ":dataset_serialization_test",
+        "//third_party/py/numpy",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
@@ -177,17 +174,19 @@ py_test(
         "//tensorflow/python:session",
         "//tensorflow/python:training",
         "//tensorflow/python:variable_scope",
-        "//third_party/py/numpy",
     ],
+    grpc_enabled = True,
+    tags = ["no_pip"],
 )
 
 py_test(
     name = "interleave_dataset_op_test",
-    size = "small",
+    size = "medium",
     srcs = ["interleave_dataset_op_test.py"],
     srcs_version = "PY2AND3",
     tags = [
-        "manual",  # b/67958761
+        "no_oss",
+        "no_pip",
     ],
     deps = [
         ":dataset_serialization_test",
@@ -207,13 +206,11 @@ py_test(
     ],
 )
 
-py_test(
+tf_py_test(
     name = "iterator_ops_cluster_test",
     size = "small",
     srcs = ["iterator_ops_cluster_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["no_windows"],
-    deps = [
+    additional_deps = [
         "//tensorflow/contrib/data/python/ops:dataset_ops",
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:array_ops",
@@ -227,14 +224,19 @@ py_test(
         "//tensorflow/python:session",
         "//tensorflow/python/data/ops:iterator_ops",
     ],
+    grpc_enabled = True,
+    tags = [
+        "no_windows",
+        "oss_serial",
+    ],
 )
 
-py_test(
+tf_py_test(
     name = "iterator_ops_test",
     size = "small",
     srcs = ["iterator_ops_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
+    additional_deps = [
+        "//third_party/py/numpy",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
         "//tensorflow/contrib/data/python/ops:readers",
         "//tensorflow/core:protos_all_py",
@@ -256,8 +258,8 @@ py_test(
         "//tensorflow/python:session",
         "//tensorflow/python:training",
         "//tensorflow/python/data/ops:iterator_ops",
-        "//third_party/py/numpy",
     ],
+    grpc_enabled = True,
 )
 
 py_test(
@@ -277,7 +279,7 @@ py_test(
 
 py_test(
     name = "map_dataset_op_test",
-    size = "small",
+    size = "medium",
     srcs = ["map_dataset_op_test.py"],
     srcs_version = "PY2AND3",
     tags = ["no_pip"],
@@ -304,6 +306,7 @@ py_test(
         "//tensorflow/python:string_ops",
         "//tensorflow/python:util",
         "//tensorflow/python:variable_scope",
+        "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
     ],
 )
@@ -327,8 +330,8 @@ py_test(
     srcs = ["range_dataset_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
-        "//tensorflow/contrib/data/python/ops:iterator_ops",
         "//tensorflow/contrib/data/python/ops:transformation_ops",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
@@ -339,11 +342,8 @@ py_test(
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:io_ops",
         "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:platform",
         "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:training",
         "//tensorflow/python:variables",
-        "//tensorflow/python/data/ops:iterator_ops",
     ],
 )
 
@@ -389,8 +389,27 @@ py_test(
 )
 
 py_test(
-    name = "sequence_dataset_op_test",
+    name = "scan_dataset_op_test",
     size = "small",
+    srcs = ["scan_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        ":dataset_serialization_test",
+        "//tensorflow/contrib/data/python/ops:transformation_ops",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_test(
+    name = "sequence_dataset_op_test",
+    size = "medium",
     srcs = ["sequence_dataset_op_test.py"],
     srcs_version = "PY2AND3",
     tags = ["no_pip"],
@@ -419,20 +438,20 @@ py_test(
 
 py_test(
     name = "shuffle_dataset_op_test",
-    size = "small",
+    size = "medium",
     srcs = ["shuffle_dataset_op_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
+        ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
-        "//tensorflow/contrib/data/python/ops:iterator_ops",
+        "//tensorflow/contrib/data/python/ops:shuffle_ops",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_ops",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:training",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/ops:iterator_ops",
         "//third_party/py/numpy",
@@ -450,6 +469,7 @@ py_test(
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
+        "@org_sqlite//:python",
     ],
 )
 
@@ -458,11 +478,32 @@ py_test(
     size = "small",
     srcs = ["stats_dataset_ops_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        ":dataset_serialization_test",
+        "//tensorflow/contrib/data/python/ops:dataset_ops",
+        "//tensorflow/contrib/data/python/ops:transformation_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+    ],
+)
+
+py_test(
+    name = "unique_dataset_op_test",
+    size = "small",
+    srcs = ["unique_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
+        ":dataset_serialization_test",
         "//tensorflow/contrib/data/python/ops:dataset_ops",
         "//tensorflow/contrib/data/python/ops:transformation_ops",
+        "//tensorflow/contrib/stateless",
+        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
+        "//third_party/py/numpy",
     ],
 )
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
index a939b3c841286a3b5786268dc3a9c82fd7359bfb..015f69c5673f185c53e61a5df2636333699ae203 100644
--- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
@@ -305,10 +305,10 @@ class BatchDatasetTest(test.TestCase):
     iterator = (
         dataset_ops.Dataset.from_tensor_slices(components)
         .map(lambda x: array_ops.fill([x], x)).apply(
-            batching.dense_to_sparse_batch(4,
-                                           [12])).make_initializable_iterator())
+            batching.dense_to_sparse_batch(4, [12]))
+        .make_initializable_iterator())
     init_op = iterator.initializer
-    get_next = sparse_tensor.SparseTensor(*iterator.get_next())
+    get_next = iterator.get_next()
 
     with self.test_session() as sess:
       sess.run(init_op)
@@ -334,9 +334,9 @@ class BatchDatasetTest(test.TestCase):
         dataset_ops.Dataset.from_tensor_slices(components)
         .map(lambda x: array_ops.fill([x, x], x)).apply(
             batching.dense_to_sparse_batch(
-                4, [5, -1])).make_initializable_iterator())
+                4, [5, None])).make_initializable_iterator())
     init_op = iterator.initializer
-    get_next = sparse_tensor.SparseTensor(*iterator.get_next())
+    get_next = iterator.get_next()
 
     with self.test_session() as sess:
       sess.run(init_op)
@@ -363,25 +363,18 @@ class BatchDatasetTest(test.TestCase):
 
   def testDenseToSparseBatchDatasetWithInvalidShape(self):
     input_tensor = array_ops.constant([[1]])
-    iterator = (
-        dataset_ops.Dataset.from_tensors(input_tensor).apply(
-            batching.dense_to_sparse_batch(4, [-2]))
-        .make_initializable_iterator())
-    init_op = iterator.initializer
-
-    with self.test_session() as sess:
-      with self.assertRaisesRegexp(errors.InvalidArgumentError,
-                                   "Dimension -2 must be >= -1"):
-        sess.run(init_op)
+    with self.assertRaisesRegexp(ValueError, "Dimension -2 must be >= 0"):
+      dataset_ops.Dataset.from_tensors(input_tensor).apply(
+          batching.dense_to_sparse_batch(4, [-2])).make_initializable_iterator()
 
   def testDenseToSparseBatchDatasetShapeErrors(self):
     input_tensor = array_ops.placeholder(dtypes.int32)
     iterator = (
         dataset_ops.Dataset.from_tensors(input_tensor).apply(
-            batching.dense_to_sparse_batch(4,
-                                           [12])).make_initializable_iterator())
+            batching.dense_to_sparse_batch(4, [12]))
+        .make_initializable_iterator())
     init_op = iterator.initializer
-    get_next = sparse_tensor.SparseTensor(*iterator.get_next())
+    get_next = iterator.get_next()
 
     with self.test_session() as sess:
       # Initialize with an input tensor of incompatible rank.
@@ -577,7 +570,7 @@ class BatchDatasetTest(test.TestCase):
     self.assertEqual([None], dataset.output_shapes[1][0].as_list())
     self.assertEqual([None, 30], dataset.output_shapes[1][1].as_list())
 
-  def testBatchAndMapDataset(self):
+  def _testBatchAndMapDatasetHelper(self, num_parallel_batches=1):
     """Test a dataset that maps a TF function across its input elements."""
     # The pipeline is TensorSliceDataset ->
     # RepeatDataset(count) -> BatchAndMapDataset(square_3, batch_size).
@@ -593,7 +586,10 @@ class BatchDatasetTest(test.TestCase):
 
     iterator = (
         dataset_ops.Dataset.from_tensor_slices(components).repeat(count).apply(
-            batching.map_and_batch(_map_fn, batch_size))
+            batching.map_and_batch(
+                map_func=_map_fn,
+                batch_size=batch_size,
+                num_parallel_batches=num_parallel_batches))
         .make_initializable_iterator())
     init_op = iterator.initializer
     get_next = iterator.get_next()
@@ -627,7 +623,11 @@ class BatchDatasetTest(test.TestCase):
           for j in range(8):
             self.assertAllEqual(component[(i * 8 + j) % 7]**2,
                                 result_component[j])
-      # The last batch should fail with `OutOfRange`.
+      result = sess.run(get_next)
+      for component, result_component in zip(components, result):
+        for j in range((14 * 7) % 8):
+          self.assertAllEqual(component[((num_batches - 1) * 8 + j) % 7]**2,
+                              result_component[j])
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
@@ -640,6 +640,12 @@ class BatchDatasetTest(test.TestCase):
       with self.assertRaises(errors.InvalidArgumentError):
         sess.run(init_op, feed_dict={count: 14, batch_size: 0})
 
+  def testBatchAndMapDataset(self):
+    return self._testBatchAndMapDatasetHelper()
+
+  def testBatchAndMapDatasetWithParallelBatching(self):
+    return self._testBatchAndMapDatasetHelper(num_parallel_batches=10)
+
   def testMapAndBatchSparse(self):
 
     def _sparse(i):
@@ -722,6 +728,22 @@ class BatchDatasetSerializationTest(
         lambda: self.build_dataset(20.0, tensor_slice_len, batch_size),
         num_outputs)
 
+  def _build_dataset_dense_to_sparse(self, components):
+    return dataset_ops.Dataset.from_tensor_slices(components).map(
+        lambda x: array_ops.fill([x], x)).apply(
+            batching.dense_to_sparse_batch(4, [12]))
+
+  # TODO(b/70988345): Re-enable when sparse tensors are properly supported by
+  # the DatasetSerializationTestBase.
+  def _testDenseToSparseBatchDatasetCore(self):
+    components = np.random.randint(5, size=(40,)).astype(np.int32)
+    diff_comp = np.random.randint(2, size=(100,)).astype(np.int32)
+
+    num_outputs = len(components) // 4
+    self.run_core_tests(lambda: self._build_dataset_dense_to_sparse(components),
+                        lambda: self._build_dataset_dense_to_sparse(diff_comp),
+                        num_outputs)
+
 
 class PaddedBatchDatasetSerializationTest(
     dataset_serialization_test_base.DatasetSerializationTestBase):
diff --git a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py b/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py
index 765ed53618958a8c49b26e416c57be28ea3bba73..4d984bb4d76e52c4200ae471550dcf48668c5f89 100644
--- a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base
 from tensorflow.contrib.data.python.ops import dataset_ops
 from tensorflow.contrib.data.python.ops import grouping
 from tensorflow.python.framework import constant_op
@@ -160,6 +161,34 @@ class GroupByWindowTest(test.TestCase):
       self.assertEqual(len(components), sum(counts))
 
 
+class GroupByWindowSerializationTest(
+    dataset_serialization_test_base.DatasetSerializationTestBase):
+
+  def _build_dataset(self, components):
+    return dataset_ops.Dataset.from_tensor_slices(components).repeat(-1).apply(
+        grouping.group_by_window(lambda x: x % 3, lambda _, xs: xs.batch(4), 4))
+
+  def testCoreGroupByWindow(self):
+    components = np.array(
+        [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 2, 2, 0, 0], dtype=np.int64)
+    self.verify_unused_iterator(
+        lambda: self._build_dataset(components), 12, verify_exhausted=False)
+    self.verify_init_before_restore(
+        lambda: self._build_dataset(components), 12, verify_exhausted=False)
+    self.verify_multiple_breaks(
+        lambda: self._build_dataset(components), 12, verify_exhausted=False)
+    self.verify_reset_restored_iterator(
+        lambda: self._build_dataset(components), 12, verify_exhausted=False)
+    self.verify_restore_in_empty_graph(
+        lambda: self._build_dataset(components), 12, verify_exhausted=False)
+    diff_components = np.array([0, 0, 0, 1, 1, 1], dtype=np.int64)
+    self.verify_restore_in_modified_graph(
+        lambda: self._build_dataset(components),
+        lambda: self._build_dataset(diff_components),
+        12,
+        verify_exhausted=False)
+
+
 # NOTE(mrry): These tests are based on the tests in bucket_ops_test.py.
 # Currently, they use a constant batch size, though should be made to use a
 # different batch size per key.
diff --git a/tensorflow/contrib/data/python/kernel_tests/concatenate_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/concatenate_dataset_op_test.py
index 870352209a08e6bc08bcca227ba455ad1851e8bf..063c71063601002af8168c4facf4057433061ab7 100644
--- a/tensorflow/contrib/data/python/kernel_tests/concatenate_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/concatenate_dataset_op_test.py
@@ -17,17 +17,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import os
 import numpy as np
 
+from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base
 from tensorflow.contrib.data.python.ops import dataset_ops
-from tensorflow.contrib.data.python.ops import iterator_ops
 from tensorflow.python.data.util import nest
 from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.platform import test
-from tensorflow.python.training import saver as saver_lib
 
 
 class ConcatenateDatasetTest(test.TestCase):
@@ -133,139 +130,26 @@ class ConcatenateDatasetTest(test.TestCase):
     with self.assertRaisesRegexp(TypeError, "have different types"):
       input_dataset.concatenate(dataset_to_concatenate)
 
-  def _iterator_checkpoint_prefix(self):
-    return os.path.join(self.get_temp_dir(), "iterator")
 
-  def _build_graph(self, input_components, to_concatenate_components):
-    input_dataset = dataset_ops.Dataset.from_tensor_slices(input_components)
-    dataset_to_concatenate = dataset_ops.Dataset.from_tensor_slices(
-        to_concatenate_components)
-    iterator = input_dataset.concatenate(
-        dataset_to_concatenate).make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    saveable = iterator_ops.make_saveable_from_iterator(iterator)
-    ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable)
-    # TODO(shivaniagrawal) : non-intuitive way, add support in mata_graph
-    for t in nest.flatten(get_next):
-      ops.add_to_collection("get_next", t)
-    return init_op, get_next
-
-  def _testSaveRestoreUtility(self, start, break_range, stop):
-    path = self._iterator_checkpoint_prefix()
-    step = 0
-    meta_filename = path + "-%d.meta" % step
-
-    input_components = (np.tile(np.array([[1], [2], [3], [4]]), 20), np.tile(
-        np.array([[12], [13], [14], [15]]), 4))
-    to_concatenate_components = (np.tile(
-        np.array([[5], [6], [7], [8], [9]]), 20), np.tile(
-            np.array([[16], [17], [18], [19], [20]]), 15))
-
-    with ops.Graph().as_default() as g:
-      init_op, get_next = self._build_graph(input_components,
-                                            to_concatenate_components)
-      saver = saver_lib.Saver()
-      with self.test_session(graph=g) as sess:
-        sess.run(init_op)
-        for i in range(start, break_range):
-          result = sess.run(get_next)
-          if i < 4:
-            for component, result_component in zip(input_components, result):
-              self.assertAllEqual(component[i], result_component)
-          else:
-            for component, result_component in zip(to_concatenate_components,
-                                                   result):
-              self.assertAllEqual(component[i - 4], result_component)
-        saver.save(sess, path, step)
-
-    with ops.Graph().as_default() as g:
-      saver = saver_lib.import_meta_graph(meta_filename)
-      with self.test_session(graph=g) as sess:
-        get_next = nest.pack_sequence_as(("a", "b"),
-                                         ops.get_collection("get_next"))
-        saver.restore(sess, saver_lib.latest_checkpoint(self.get_temp_dir()))
-        for i in range(break_range, stop):
-          result = sess.run(get_next)
-          if i < 4:
-            for component, result_component in zip(input_components, result):
-              self.assertAllEqual(component[i], result_component)
-          else:
-            for component, result_component in zip(to_concatenate_components,
-                                                   result):
-              self.assertAllEqual(component[i - 4], result_component)
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
+class ConcatenateDatasetSerializationTest(
+    dataset_serialization_test_base.DatasetSerializationTestBase):
 
-  def testRestoreAtFirstDataset(self):
-    start = 0
-    stop = 9
-    break_range = 3
-    self._testSaveRestoreUtility(start, break_range, stop)
-
-  def testRestoreAtSecondDataset(self):
-    start = 0
-    stop = 9
-    break_range = 6
-    self._testSaveRestoreUtility(start, break_range, stop)
-
-  def testRestoreAtBetweenDatasets(self):
-    start = 0
-    stop = 9
-    break_range = 4
-    self._testSaveRestoreUtility(start, break_range, stop)
-
-  def testRestoreExhaustedIterator(self):
-    start = 0
-    stop = 9
-    break_range = 9
-    self._testSaveRestoreUtility(start, break_range, stop)
-
-  def testRestoreInModifiedGraph(self):
-    start = 0
-    stop = 9
-    break_range = 6
-    path = self._iterator_checkpoint_prefix()
-    step = 0
-
-    input_components = (np.tile(np.array([[1], [2], [3], [4]]), 20), np.tile(
-        np.array([[12], [13], [14], [15]]), 4))
+  def _build_concatenate_dataset(self, var_array):
+    input_components = (np.tile(np.array([[1], [2], [3], [4]]), 20),
+                        np.tile(np.array([[12], [13], [14], [15]]), 4))
     to_concatenate_components = (np.tile(
-        np.array([[5], [6], [7], [8], [9]]), 20), np.tile(
-            np.array([[16], [17], [18], [19], [20]]), 15))
-
-    with ops.Graph().as_default() as g:
-      init_op, get_next = self._build_graph(input_components,
-                                            to_concatenate_components)
-      saver = saver_lib.Saver(allow_empty=True)
-      with self.test_session(graph=g) as sess:
-        sess.run(init_op)
-        for i in range(start, break_range):
-          result = sess.run(get_next)
-          if i < 4:
-            for component, result_component in zip(input_components, result):
-              self.assertAllEqual(component[i], result_component)
-          else:
-            for component, result_component in zip(to_concatenate_components,
-                                                   result):
-              self.assertAllEqual(component[i - 4], result_component)
-        saver.save(sess, path, step)
-
-    new_to_concatenate_components = (np.array([[5], [6], [7], [8], [9]]),
-                                     np.array([[16], [17], [18], [19], [20]]))
-    with ops.Graph().as_default() as g:
-      init_op, get_next = self._build_graph(input_components,
-                                            new_to_concatenate_components)
-      saver = saver_lib.Saver()
-      with self.test_session(graph=g) as sess:
-        saver.restore(sess, saver_lib.latest_checkpoint(self.get_temp_dir()))
-        for i in range(break_range, stop):
-          result = sess.run(get_next)
-          for component, result_component in zip(to_concatenate_components,
-                                                 result):
-            self.assertAllEqual(component[i - 4], result_component)
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
+        np.array([[5], [6], [7], [8], [9]]), 20), var_array)
+
+    return dataset_ops.Dataset.from_tensor_slices(input_components).concatenate(
+        dataset_ops.Dataset.from_tensor_slices(to_concatenate_components))
+
+  def testConcatenateCore(self):
+    num_outputs = 9
+    array = np.tile(np.array([[16], [17], [18], [19], [20]]), 15)
+    diff_array = np.array([[1], [2], [3], [4], [5]])
+    self.run_core_tests(lambda: self._build_concatenate_dataset(array),
+                        lambda: self._build_concatenate_dataset(diff_array),
+                        num_outputs)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py b/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py
index 55a1d3b95b212466b262ad3c26f1efd7ed0e067e..a90ba30e60cef13156719bba24fb553c0acec391 100644
--- a/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py
@@ -717,11 +717,12 @@ class DatasetConstructorTest(test.TestCase):
       sess.run(var_1.initializer)
 
       iterator = dataset.make_initializable_iterator()
+      sess.run(iterator.initializer)
 
       with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          "Trying to access resource located in device"):
-        sess.run(iterator.initializer)
+          errors.FailedPreconditionError,
+          "Error while reading resource variable Variable"):
+        sess.run(iterator.get_next())
 
   def testRestructureDataset(self):
     components = (array_ops.placeholder(dtypes.int32),
diff --git a/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py b/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py
index bf25cc60a1c0efc09bed6501fd2d6f4ccb07764b..7cde6e05b244773966fd7c1bd4ca1e95abf7fd5e 100644
--- a/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py
+++ b/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py
@@ -40,6 +40,8 @@ class DatasetSerializationTestBase(test.TestCase):
   def tearDown(self):
     self._delete_ckpt()
 
+  # TODO(b/70988345): Support native `tf.SparseTensor` objects and get rid of
+  # `sparse_tensors` argument.
   def run_core_tests(self, ds_fn1, ds_fn2, num_outputs, sparse_tensors=False):
     """Runs the core tests.
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py
index e66ed3f7aa2a512813ef353d2d0744ae67005884..b1937c08f347734d0d6871bd30ed209ff520623a 100644
--- a/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py
@@ -41,6 +41,7 @@ from tensorflow.python.platform import test
 class InterleaveDatasetTest(test.TestCase):
 
   def _interleave(self, lists, cycle_length, block_length):
+    # TODO(b/69678297): Consolidate python interleave implementations.
     num_open = 0
 
     # `all_iterators` acts as a queue of iterators over each element of `lists`.
@@ -255,11 +256,15 @@ class InterleaveDatasetSeriazationTest(
 class ParallelInterleaveDatasetTest(test.TestCase):
 
   def setUp(self):
+
     self.input_values = array_ops.placeholder(dtypes.int64, shape=[None])
     self.cycle_length = array_ops.placeholder(dtypes.int64, shape=[])
     self.block_length = array_ops.placeholder(dtypes.int64, shape=[])
     self.sloppy = array_ops.placeholder(dtypes.bool, shape=[])
+    self.buffer_output_elements = array_ops.placeholder(dtypes.int64, shape=[])
+    self.prefetch_input_elements = array_ops.placeholder(dtypes.int64, shape=[])
 
+    self.error = None
     self.repeat_count = 2
 
     # Set up threading events used to sequence when items are produced that
@@ -276,6 +281,10 @@ class ParallelInterleaveDatasetTest(test.TestCase):
       self.write_coordination_events[x].wait()
       self.write_coordination_events[x].clear()
       self.read_coordination_events[x].release()
+      if self.error:
+        err = self.error
+        self.error = None
+        raise err  # pylint: disable=raising-bad-type
       return x * x
 
     def map_fn(x):
@@ -286,11 +295,13 @@ class ParallelInterleaveDatasetTest(test.TestCase):
       dataset = dataset.repeat(x)
       return dataset.map(map_fn)
 
-    self.dataset = (dataset_ops.Dataset.from_tensor_slices(self.input_values)
-                    .repeat(self.repeat_count).apply(
-                        interleave_ops.parallel_interleave(
-                            interleave_fn, self.cycle_length,
-                            self.block_length, self.sloppy)))
+    self.dataset = (
+        dataset_ops.Dataset.from_tensor_slices(self.input_values)
+        .repeat(self.repeat_count).apply(
+            interleave_ops.parallel_interleave(interleave_fn, self.cycle_length,
+                                               self.block_length, self.sloppy,
+                                               self.buffer_output_elements,
+                                               self.prefetch_input_elements)))
     self.iterator = self.dataset.make_initializable_iterator()
     self.init_op = self.iterator.initializer
     self.next_element = self.iterator.get_next()
@@ -380,7 +391,7 @@ class ParallelInterleaveDatasetTest(test.TestCase):
     for i in range(4, 7):
       self.write_coordination_events[i].set()
 
-  def _testSingleThreaded(self, sloppy=False):
+  def _testSingleThreaded(self, sloppy=False, prefetch_input_elements=0):
     # cycle_length=1,block_length=1 acts like `Dataset.interleave()` and
     # `Dataset.flat_map()` and is single-threaded. No synchronization required.
     with self.test_session() as sess:
@@ -391,7 +402,9 @@ class ParallelInterleaveDatasetTest(test.TestCase):
               self.input_values: [4, 5, 6],
               self.cycle_length: 1,
               self.block_length: 1,
-              self.sloppy: sloppy
+              self.sloppy: sloppy,
+              self.buffer_output_elements: 1,
+              self.prefetch_input_elements: prefetch_input_elements,
           })
 
       for expected_element in self._interleave(
@@ -408,6 +421,41 @@ class ParallelInterleaveDatasetTest(test.TestCase):
   def testSingleThreadedSloppy(self):
     self._testSingleThreaded(sloppy=True)
 
+  def testSingleThreadedPrefetch1Itr(self):
+    self._testSingleThreaded(prefetch_input_elements=1)
+
+  def testSingleThreadedPrefetch1ItrSloppy(self):
+    self._testSingleThreaded(prefetch_input_elements=1, sloppy=True)
+
+  def testSingleThreadedRagged(self):
+    # Tests a sequence with wildly different elements per iterator.
+    with self.test_session() as sess:
+      self._clear_coordination_events()
+      sess.run(
+          self.init_op,
+          feed_dict={
+              self.input_values: [3, 7, 4],
+              self.cycle_length: 2,
+              self.block_length: 1,
+              self.sloppy: False,
+              self.buffer_output_elements: 1,
+              self.prefetch_input_elements: 1,
+          })
+
+      # Add coordination values for 3 and 7
+      self.read_coordination_events[3] = threading.Semaphore(0)
+      self.write_coordination_events[3] = threading.Event()
+      self.read_coordination_events[7] = threading.Semaphore(0)
+      self.write_coordination_events[7] = threading.Event()
+
+      for expected_element in self._interleave(
+          [[3] * 3, [7] * 7, [4] * 4] * self.repeat_count, 2, 1):
+        self.write_coordination_events[expected_element].set()
+        output = sess.run(self.next_element)
+        self.assertEqual(expected_element * expected_element, output)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(self.next_element)
+
   def _testTwoThreadsNoContention(self, sloppy=False):
     # num_threads > 1.
     # Explicit coordination should result in `Dataset.interleave()` behavior
@@ -420,7 +468,9 @@ class ParallelInterleaveDatasetTest(test.TestCase):
               self.input_values: [4, 5, 6],
               self.cycle_length: 2,
               self.block_length: 1,
-              self.sloppy: sloppy
+              self.sloppy: sloppy,
+              self.buffer_output_elements: 1,
+              self.prefetch_input_elements: 1,
           })
       for i, expected_element in enumerate(
           self._interleave([[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 2,
@@ -463,6 +513,8 @@ class ParallelInterleaveDatasetTest(test.TestCase):
               self.cycle_length: 2,
               self.block_length: 1,
               self.sloppy: sloppy,
+              self.buffer_output_elements: 1,
+              self.prefetch_input_elements: 1,
           })
       for i, expected_element in enumerate(
           self._interleave([[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 2,
@@ -472,7 +524,7 @@ class ParallelInterleaveDatasetTest(test.TestCase):
           self.read_coordination_events[expected_element].acquire()
         else:
           self.write_coordination_events[expected_element].set()
-        time.sleep(0.1)  # Sleep to consistently "avoid" the race condition.
+        time.sleep(0.5)  # Sleep to consistently "avoid" the race condition.
         actual_element = sess.run(self.next_element)
         if not done_first_event:
           done_first_event = True
@@ -502,7 +554,9 @@ class ParallelInterleaveDatasetTest(test.TestCase):
               self.input_values: [4, 5, 6],
               self.cycle_length: 2,
               self.block_length: 2,
-              self.sloppy: sloppy
+              self.sloppy: sloppy,
+              self.buffer_output_elements: 1,
+              self.prefetch_input_elements: 1,
           })
       for i, expected_element in enumerate(
           self._interleave([[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 2,
@@ -545,7 +599,9 @@ class ParallelInterleaveDatasetTest(test.TestCase):
               self.input_values: [4, 5, 6],
               self.cycle_length: 2,
               self.block_length: 2,
-              self.sloppy: sloppy
+              self.sloppy: sloppy,
+              self.buffer_output_elements: 1,
+              self.prefetch_input_elements: 1,
           })
       for i, expected_element in enumerate(
           self._interleave([[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 2,
@@ -555,7 +611,7 @@ class ParallelInterleaveDatasetTest(test.TestCase):
           self.read_coordination_events[expected_element].acquire()
         else:
           self.write_coordination_events[expected_element].set()
-        time.sleep(0.1)  # Sleep to consistently "avoid" the race condition.
+        time.sleep(0.5)  # Sleep to consistently "avoid" the race condition.
         actual_element = sess.run(self.next_element)
         if not done_first_event:
           done_first_event = True
@@ -583,7 +639,9 @@ class ParallelInterleaveDatasetTest(test.TestCase):
               self.input_values: [],
               self.cycle_length: 2,
               self.block_length: 3,
-              self.sloppy: sloppy
+              self.sloppy: sloppy,
+              self.buffer_output_elements: 1,
+              self.prefetch_input_elements: 0,
           })
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(self.next_element)
@@ -604,7 +662,9 @@ class ParallelInterleaveDatasetTest(test.TestCase):
               self.input_values: [0, 0, 0],
               self.cycle_length: 2,
               self.block_length: 3,
-              self.sloppy: sloppy
+              self.sloppy: sloppy,
+              self.buffer_output_elements: 1,
+              self.prefetch_input_elements: 0,
           })
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(self.next_element)
@@ -615,7 +675,8 @@ class ParallelInterleaveDatasetTest(test.TestCase):
   def testNonEmptyInputIntoEmptyOutputsSloppy(self):
     self._testNonEmptyInputIntoEmptyOutputs(sloppy=True)
 
-  def _testPartiallyEmptyOutputs(self, sloppy=False):
+  def _testPartiallyEmptyOutputs(self, sloppy=False, prefetch_input_elements=1):
+    race_indices = {2, 8, 14}  # Sequence points when sloppy mode has race conds
     # Mixture of non-empty and empty interleaved datasets.
     with self.test_session() as sess:
       self._clear_coordination_events()
@@ -627,27 +688,31 @@ class ParallelInterleaveDatasetTest(test.TestCase):
               self.cycle_length: 2,
               self.block_length: 1,
               self.sloppy: sloppy,
+              self.buffer_output_elements: 1,
+              self.prefetch_input_elements: prefetch_input_elements,
           })
       for i, expected_element in enumerate(
           self._interleave([[4] * 4, [], [6] * 6] * self.repeat_count, 2, 1)):
         self.write_coordination_events[expected_element].set()
-        if done_first_event:  # First event starts the worker threads
+        # First event starts the worker threads. Additionally, when running the
+        # sloppy case with prefetch_input_elements=0, we get stuck if we wait
+        # for the read coordination event for certain event orderings in the
+        # presence of finishing iterators.
+        if done_first_event and not (sloppy and (i in race_indices)):
           self.read_coordination_events[expected_element].acquire()
         actual_element = sess.run(self.next_element)
-        if not done_first_event:
+        if not done_first_event or (sloppy and (i in race_indices)):
           done_first_event = True
           self.read_coordination_events[expected_element].acquire()
         self.assertEqual(expected_element * expected_element, actual_element,
                          "At index %s: %s expected, got: %s" %
                          (i, expected_element, actual_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(self.next_element)
 
   def testPartiallyEmptyOutputs(self):
     self._testPartiallyEmptyOutputs()
 
   def testPartiallyEmptyOutputsSloppy(self):
-    self._testPartiallyEmptyOutputs(sloppy=True)
+    self._testPartiallyEmptyOutputs(sloppy=True, prefetch_input_elements=0)
 
   def testDelayedOutputSloppy(self):
     # Explicitly control the sequence of events to ensure we correctly avoid
@@ -661,6 +726,8 @@ class ParallelInterleaveDatasetTest(test.TestCase):
               self.cycle_length: 2,
               self.block_length: 1,
               self.sloppy: True,
+              self.buffer_output_elements: 1,
+              self.prefetch_input_elements: 0,
           })
 
       mis_ordering = [
@@ -683,8 +750,10 @@ class ParallelInterleaveDatasetTest(test.TestCase):
           feed_dict={
               self.input_values: [4, 5, 6],
               self.cycle_length: 2,
-              self.block_length: 3,
-              self.sloppy: True
+              self.block_length: 1,
+              self.sloppy: True,
+              self.buffer_output_elements: 1,
+              self.prefetch_input_elements: 1,
           })
       # Test against a generating sequence that differs from the uncontended
       # case, in order to prove sloppy correctness.
@@ -692,7 +761,7 @@ class ParallelInterleaveDatasetTest(test.TestCase):
           self._interleave(
               [[4] * 4, [5] * 5, [6] * 6] * self.repeat_count,
               cycle_length=2,
-              block_length=2)):
+              block_length=3)):
         self.write_coordination_events[expected_element].set()
         if done_first_event:  # First event starts the worker threads.
           self.read_coordination_events[expected_element].acquire()
@@ -716,7 +785,9 @@ class ParallelInterleaveDatasetTest(test.TestCase):
               self.input_values: [4, 5, 6],
               self.cycle_length: 3,
               self.block_length: 2,
-              self.sloppy: sloppy
+              self.sloppy: sloppy,
+              self.buffer_output_elements: 1,
+              self.prefetch_input_elements: 0,
           })
       for i in range(4, 7):
         self.write_coordination_events[i].set()
@@ -790,6 +861,139 @@ class ParallelInterleaveDatasetTest(test.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
+  def testErrorsInOutputFn(self):
+    with self.test_session() as sess:
+      self._clear_coordination_events()
+      sess.run(
+          self.init_op,
+          feed_dict={
+              self.input_values: [4, 5, 6],
+              self.cycle_length: 2,
+              self.block_length: 1,
+              self.sloppy: False,
+              self.buffer_output_elements: 1,
+              self.prefetch_input_elements: 0,
+          })
+
+      except_on_element_indices = set([3])
+
+      for i, expected_element in enumerate(
+          self._interleave([[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 2,
+                           1)):
+        if i in except_on_element_indices:
+          self.error = ValueError()
+          self.write_coordination_events[expected_element].set()
+          with self.assertRaises(errors.InvalidArgumentError):
+            sess.run(self.next_element)
+        else:
+          self.write_coordination_events[expected_element].set()
+          actual_element = sess.run(self.next_element)
+          self.assertEqual(expected_element * expected_element, actual_element,
+                           "At index %s: %s expected, got: %s" %
+                           (i, expected_element, actual_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(self.next_element)
+
+  def testErrorsInInputFn(self):
+
+    def map_py_fn(x):
+      if x == 5:
+        raise ValueError()
+      return x
+
+    def map_fn(x):
+      return script_ops.py_func(map_py_fn, [x], x.dtype)
+
+    def interleave_fn(x):
+      dataset = dataset_ops.Dataset.from_tensors(x)
+      dataset = dataset.repeat(x)
+      return dataset
+
+    self.dataset = (
+        dataset_ops.Dataset.from_tensor_slices(self.input_values).map(map_fn)
+        .repeat(self.repeat_count).apply(
+            interleave_ops.parallel_interleave(interleave_fn, self.cycle_length,
+                                               self.block_length, self.sloppy,
+                                               self.buffer_output_elements,
+                                               self.prefetch_input_elements)))
+
+    self.iterator = self.dataset.make_initializable_iterator()
+    self.init_op = self.iterator.initializer
+    self.next_element = self.iterator.get_next()
+
+    with self.test_session() as sess:
+      sess.run(
+          self.init_op,
+          feed_dict={
+              self.input_values: [4, 5, 6],
+              self.cycle_length: 2,
+              self.block_length: 1,
+              self.sloppy: False,
+              self.buffer_output_elements: 1,
+              self.prefetch_input_elements: 0,
+          })
+      for i, expected_element in enumerate(
+          self._interleave([[4] * 4, [5], [6] * 6] * self.repeat_count, 2, 1)):
+        if expected_element == 5:
+          with self.assertRaises(errors.InvalidArgumentError):
+            sess.run(self.next_element)
+        else:
+          actual_element = sess.run(self.next_element)
+          self.assertEqual(expected_element, actual_element,
+                           "At index %s: %s expected, got: %s" %
+                           (i, expected_element, actual_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(self.next_element)
+
+  def testErrorsInInterleaveFn(self):
+
+    def map_py_fn(x):
+      if x == 5:
+        raise ValueError()
+      return x
+
+    def interleave_fn(x):
+      dataset = dataset_ops.Dataset.from_tensors(x)
+      y = script_ops.py_func(map_py_fn, [x], x.dtype)
+      dataset = dataset.repeat(y)
+      return dataset
+
+    self.dataset = (
+        dataset_ops.Dataset.from_tensor_slices(self.input_values)
+        .repeat(self.repeat_count).apply(
+            interleave_ops.parallel_interleave(interleave_fn, self.cycle_length,
+                                               self.block_length, self.sloppy,
+                                               self.buffer_output_elements,
+                                               self.prefetch_input_elements)))
+
+    self.iterator = self.dataset.make_initializable_iterator()
+    self.init_op = self.iterator.initializer
+    self.next_element = self.iterator.get_next()
+
+    with self.test_session() as sess:
+      sess.run(
+          self.init_op,
+          feed_dict={
+              self.input_values: [4, 5, 6],
+              self.cycle_length: 2,
+              self.block_length: 1,
+              self.sloppy: False,
+              self.buffer_output_elements: 1,
+              self.prefetch_input_elements: 0,
+          })
+      for i, expected_element in enumerate(
+          self._interleave([[4] * 4, [5], [6] * 6] * self.repeat_count, 2, 1)):
+        if expected_element == 5:
+          with self.assertRaises(errors.InvalidArgumentError):
+            sess.run(self.next_element)
+        else:
+          actual_element = sess.run(self.next_element)
+          self.assertEqual(expected_element, actual_element,
+                           "At index %s: %s expected, got: %s" %
+                           (i, expected_element, actual_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(self.next_element)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py
index e9a07da84a8c80c09ebd4dab0b1d69febe1c9790..69252612a8e6cb29c513003188946be21f3432c2 100644
--- a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py
@@ -24,8 +24,9 @@ import threading
 import numpy as np
 
 from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import dataset_ops
+from tensorflow.contrib.data.python.ops import dataset_ops as contrib_dataset_ops
 from tensorflow.contrib.data.python.ops import error_ops
+from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -52,8 +53,10 @@ class MapDatasetTest(test.TestCase):
   def _buildMapDataset(self, components, count):
     def _map_fn(x, y, z):
       return math_ops.square(x), math_ops.square(y), math_ops.square(z)
-    return (dataset_ops.Dataset.from_tensor_slices(components).map(_map_fn)
-            .repeat(count))
+
+    return (
+        contrib_dataset_ops.Dataset.from_tensor_slices(components).map(_map_fn)
+        .repeat(count))
 
   def testMapDataset(self):
     """Test an dataset that maps a TF function across its input elements."""
@@ -113,7 +116,8 @@ class MapDatasetTest(test.TestCase):
                                output_buffer_size):
     def _map_fn(x, y, z):
       return math_ops.square(x), math_ops.square(y), math_ops.square(z)
-    return (dataset_ops.Dataset.from_tensor_slices(components).map(
+
+    return (contrib_dataset_ops.Dataset.from_tensor_slices(components).map(
         _map_fn, num_threads=num_threads, output_buffer_size=output_buffer_size)
             .repeat(count))
 
@@ -210,9 +214,9 @@ class MapDatasetTest(test.TestCase):
   def testParallelMapUnspecifiedOutputSize(self):
     components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)
 
-    dataset = (dataset_ops.Dataset.from_tensor_slices(components)
-               .map(lambda x: array_ops.check_numerics(x, "message"),
-                    num_threads=2))
+    dataset = (
+        contrib_dataset_ops.Dataset.from_tensor_slices(components).map(
+            lambda x: array_ops.check_numerics(x, "message"), num_threads=2))
     iterator = dataset.make_initializable_iterator()
     init_op = iterator.initializer
     get_next = iterator.get_next()
@@ -225,9 +229,11 @@ class MapDatasetTest(test.TestCase):
   def testParallelMapError(self):
     components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)
 
-    dataset = (dataset_ops.Dataset.from_tensor_slices(components)
-               .map(lambda x: array_ops.check_numerics(x, "message"),
-                    num_threads=2, output_buffer_size=2))
+    dataset = (
+        contrib_dataset_ops.Dataset.from_tensor_slices(components).map(
+            lambda x: array_ops.check_numerics(x, "message"),
+            num_threads=2,
+            output_buffer_size=2))
     iterator = dataset.make_initializable_iterator()
     init_op = iterator.initializer
     get_next = iterator.get_next()
@@ -246,9 +252,9 @@ class MapDatasetTest(test.TestCase):
   def testPrefetchError(self):
     components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)
 
-    dataset = (dataset_ops.Dataset.from_tensor_slices(components)
-               .map(lambda x: array_ops.check_numerics(x, "message"))
-               .prefetch(2))
+    dataset = (
+        contrib_dataset_ops.Dataset.from_tensor_slices(components)
+        .map(lambda x: array_ops.check_numerics(x, "message")).prefetch(2))
     iterator = dataset.make_initializable_iterator()
     init_op = iterator.initializer
     get_next = iterator.get_next()
@@ -267,9 +273,10 @@ class MapDatasetTest(test.TestCase):
   def testMapIgnoreError(self):
     components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)
 
-    dataset = (dataset_ops.Dataset.from_tensor_slices(components)
-               .map(lambda x: array_ops.check_numerics(x, "message")).apply(
-                   error_ops.ignore_errors()))
+    dataset = (
+        contrib_dataset_ops.Dataset.from_tensor_slices(components)
+        .map(lambda x: array_ops.check_numerics(x, "message")).apply(
+            error_ops.ignore_errors()))
     iterator = dataset.make_initializable_iterator()
     init_op = iterator.initializer
     get_next = iterator.get_next()
@@ -284,10 +291,11 @@ class MapDatasetTest(test.TestCase):
   def testParallelMapIgnoreError(self):
     components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)
 
-    dataset = (dataset_ops.Dataset.from_tensor_slices(components).map(
-        lambda x: array_ops.check_numerics(x, "message"),
-        num_threads=2,
-        output_buffer_size=2).apply(error_ops.ignore_errors()))
+    dataset = (
+        contrib_dataset_ops.Dataset.from_tensor_slices(components).map(
+            lambda x: array_ops.check_numerics(x, "message"),
+            num_threads=2,
+            output_buffer_size=2).apply(error_ops.ignore_errors()))
     iterator = dataset.make_initializable_iterator()
     init_op = iterator.initializer
     get_next = iterator.get_next()
@@ -308,9 +316,10 @@ class MapDatasetTest(test.TestCase):
     for filename in filenames:
       write_string_to_file(filename, filename)
 
-    dataset = (dataset_ops.Dataset.from_tensor_slices(filenames).map(
-        io_ops.read_file, num_threads=2, output_buffer_size=2).apply(
-            error_ops.ignore_errors()))
+    dataset = (
+        contrib_dataset_ops.Dataset.from_tensor_slices(filenames).map(
+            io_ops.read_file, num_threads=2, output_buffer_size=2).apply(
+                error_ops.ignore_errors()))
     iterator = dataset.make_initializable_iterator()
     init_op = iterator.initializer
     get_next = iterator.get_next()
@@ -344,7 +353,7 @@ class MapDatasetTest(test.TestCase):
     table = lookup_ops.HashTable(
         lookup_ops.KeyValueTensorInitializer(keys, values), default_val)
 
-    input_sentences = dataset_ops.Dataset.from_tensor_slices(
+    input_sentences = contrib_dataset_ops.Dataset.from_tensor_slices(
         ["brain brain tank salad surgery", "surgery brain"])
 
     iterator = (input_sentences
@@ -368,8 +377,9 @@ class MapDatasetTest(test.TestCase):
     queue = data_flow_ops.FIFOQueue(200, dtypes.int64, shapes=[])
     enqueue_op = queue.enqueue_many(elements)
     close_op = queue.close()
-    iterator = (dataset_ops.Dataset.from_tensors(0).repeat(-1)
-                .map(lambda _: queue.dequeue()).make_initializable_iterator())
+    iterator = (
+        contrib_dataset_ops.Dataset.from_tensors(0).repeat(-1)
+        .map(lambda _: queue.dequeue()).make_initializable_iterator())
     init_op = iterator.initializer
     get_next = iterator.get_next()
 
@@ -392,9 +402,10 @@ class MapDatasetTest(test.TestCase):
     enqueue_op = queue.enqueue_many(elements)
     close_op = queue.close()
 
-    iterator = (dataset_ops.Dataset.from_tensors(0).repeat(-1)
-                .map(lambda _: (queue.dequeue(), queue_2.dequeue()))
-                .make_initializable_iterator())
+    iterator = (
+        contrib_dataset_ops.Dataset.from_tensors(0).repeat(-1)
+        .map(lambda _: (queue.dequeue(), queue_2.dequeue()))
+        .make_initializable_iterator())
     init_op = iterator.initializer
     get_next = iterator.get_next()
 
@@ -411,9 +422,9 @@ class MapDatasetTest(test.TestCase):
   def testCaptureVariable(self):
     counter_var = variable_scope.get_variable(
         "counter", (), dtypes.int32, use_resource=True)
-    iterator = (dataset_ops.Dataset.from_tensors(0).repeat(10)
-                .map(lambda _: counter_var.assign_add(1))
-                .make_initializable_iterator())
+    iterator = (
+        contrib_dataset_ops.Dataset.from_tensors(0).repeat(10)
+        .map(lambda _: counter_var.assign_add(1)).make_initializable_iterator())
     init_op = iterator.initializer
     get_next = iterator.get_next()
 
@@ -431,20 +442,22 @@ class MapDatasetTest(test.TestCase):
   def testCaptureUninitializedVariableError(self):
     counter_var = variable_scope.get_variable(
         "counter", (), dtypes.int32, use_resource=True)
-    iterator = (dataset_ops.Dataset.from_tensors(0).repeat(10)
-                .map(lambda _: counter_var.assign_add(1))
-                .make_initializable_iterator())
+    iterator = (
+        contrib_dataset_ops.Dataset.from_tensors(0).repeat(10)
+        .map(lambda _: counter_var.assign_add(1)).make_initializable_iterator())
     init_op = iterator.initializer
+    get_next = iterator.get_next()
 
     with self.test_session() as sess:
-      with self.assertRaisesRegexp(errors.FailedPreconditionError,
-                                   "Failed to capture resource"):
-        sess.run(init_op)
+      sess.run(init_op)
+      with self.assertRaises(errors.NotFoundError):
+        sess.run(get_next)
 
   def testSeededStatefulOperatorIsProperlyStateful(self):
-    iterator = (dataset_ops.Dataset.from_tensors(0).repeat(10)
-                .map(lambda _: random_ops.random_uniform((), seed=11)).batch(2)
-                .make_initializable_iterator())
+    iterator = (
+        contrib_dataset_ops.Dataset.from_tensors(0).repeat(10)
+        .map(lambda _: random_ops.random_uniform((), seed=11)).batch(2)
+        .make_initializable_iterator())
     init_op = iterator.initializer
     get_next = iterator.get_next()
 
@@ -466,7 +479,7 @@ class MapDatasetTest(test.TestCase):
       self.assertAllClose(random_values, random_values_2)
 
   def testMapDict(self):
-    iterator = (dataset_ops.Dataset.range(10)
+    iterator = (contrib_dataset_ops.Dataset.range(10)
                 .map(lambda x: {"foo": x * 2, "bar": x ** 2})
                 .map(lambda d: d["foo"] + d["bar"])
                 .make_initializable_iterator())
@@ -482,9 +495,9 @@ class MapDatasetTest(test.TestCase):
 
   def testMapNamedtuple(self, count=10):
     # construct dataset of tuples
-    labels = dataset_ops.Dataset.range(count)
+    labels = contrib_dataset_ops.Dataset.range(count)
     images = labels.map(lambda l: -l)
-    dataset_tuple = dataset_ops.Dataset.zip((labels, images))
+    dataset_tuple = contrib_dataset_ops.Dataset.zip((labels, images))
 
     # convert dataset of tuples to dataset of namedtuples
     example = namedtuple("Example", ["label", "image"])
@@ -517,7 +530,7 @@ class MapDatasetTest(test.TestCase):
   def testUseStepContainerInMap(self):
     row = np.arange(6)
     iterator = (
-        dataset_ops.Dataset.from_tensors(row)
+        contrib_dataset_ops.Dataset.from_tensors(row)
         .map(lambda elems: functional_ops.map_fn(lambda x: x * x, elems))
         .make_initializable_iterator())
     init_op = iterator.initializer
@@ -547,10 +560,8 @@ class MapDatasetTest(test.TestCase):
 
     buffer_size_placeholder = array_ops.placeholder(dtypes.int64, shape=[])
     iterator = (
-        dataset_ops.Dataset.range(100)
-        .map(_map_fn)
-        .prefetch(buffer_size_placeholder)
-        .make_initializable_iterator())
+        contrib_dataset_ops.Dataset.range(100).map(_map_fn)
+        .prefetch(buffer_size_placeholder).make_initializable_iterator())
     init_op = iterator.initializer
     get_next = iterator.get_next()
 
@@ -586,9 +597,10 @@ class MapDatasetTest(test.TestCase):
           sess.run(get_next)
 
   def testReturnList(self):
-    iterator = (dataset_ops.Dataset.range(10)
-                .map(lambda x: [x, constant_op.constant(37.0)])
-                .make_initializable_iterator())
+    iterator = (
+        contrib_dataset_ops.Dataset.range(10)
+        .map(lambda x: [x, constant_op.constant(37.0)])
+        .make_initializable_iterator())
     init_op = iterator.initializer
     get_next = iterator.get_next()
 
@@ -607,9 +619,9 @@ class MapDatasetTest(test.TestCase):
       return script_ops.py_func(
           _map_py_func, [x_tensor], [dtypes.int64, dtypes.float64])
 
-    iterator = (dataset_ops.Dataset.range(10)
-                .map(_map_fn)
-                .make_initializable_iterator())
+    iterator = (
+        contrib_dataset_ops.Dataset.range(10).map(_map_fn)
+        .make_initializable_iterator())
     init_op = iterator.initializer
     get_next = iterator.get_next()
 
@@ -633,9 +645,9 @@ class MapDatasetTest(test.TestCase):
           values=(i * np.array([1])),
           dense_shape=np.array([1, 1]))
 
-    iterator = (dataset_ops.Dataset.range(10)
-                .map(_sparse)
-                .make_initializable_iterator())
+    iterator = (
+        contrib_dataset_ops.Dataset.range(10).map(_sparse)
+        .make_initializable_iterator())
     init_op = iterator.initializer
     get_next = iterator.get_next()
 
@@ -661,7 +673,7 @@ class MapDatasetTest(test.TestCase):
       return sparse_ops.sparse_concat(0, [i, i])
 
     iterator = (
-        dataset_ops.Dataset.range(10).map(_sparse).map(_check)
+        contrib_dataset_ops.Dataset.range(10).map(_sparse).map(_check)
         .make_initializable_iterator())
     init_op = iterator.initializer
     get_next = iterator.get_next()
@@ -683,23 +695,26 @@ class MapDatasetTest(test.TestCase):
         get_next = iterator.get_next()
         return x * get_next
 
-      return dataset_ops.Dataset.range(10).map(_map_fn)
+      return contrib_dataset_ops.Dataset.range(10).map(_map_fn)
 
     def _build_graph():
-      captured_iterator = dataset_ops.Dataset.range(
+      captured_iterator = contrib_dataset_ops.Dataset.range(
           10).make_initializable_iterator()
       ds = _build_ds(captured_iterator)
       iterator = ds.make_initializable_iterator()
       init_op = iterator.initializer
-      return captured_iterator.initializer, init_op
+      get_next = iterator.get_next()
+      return captured_iterator.initializer, init_op, get_next
 
     with ops.Graph().as_default() as g:
-      captured_init_op, init_op = _build_graph()
+      captured_init_op, init_op, get_next = _build_graph()
       with self.test_session(graph=g) as sess:
         sess.run(captured_init_op)
-        with self.assertRaises(errors.UnimplementedError):
-          # CapturedFunction does not support capturing IteratorResource.
-          sess.run(init_op)
+        sess.run(init_op)
+        for i in range(10):
+          self.assertEquals(i * i, sess.run(get_next))
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run(get_next)
 
 
 class MapDatasetSerializationTest(
@@ -718,8 +733,9 @@ class MapDatasetSerializationTest(
     def _map_fn(x, y, z):
       return math_ops.square(x), math_ops.square(y), math_ops.square(z)
 
-    return (dataset_ops.Dataset.from_tensor_slices(components).map(_map_fn)
-            .repeat(self._num_epochs))
+    return (
+        contrib_dataset_ops.Dataset.from_tensor_slices(components).map(_map_fn)
+        .repeat(self._num_epochs))
 
   def testSaveRestoreCore(self):
     self.run_core_tests(
@@ -735,7 +751,98 @@ class MapDatasetSerializationTest(
         return random_ops.random_uniform(
             (), 0, 10, dtype=dtypes.int32) * math_ops.to_int32(x)
 
-      return dataset_ops.Dataset.range(100).map(_map_fn)
+      return contrib_dataset_ops.Dataset.range(100).map(_map_fn)
+
+    self.verify_error_on_save(_build_ds, 15, errors.InvalidArgumentError)
+
+  def testCaptureVariableInMapFn(self):
+
+    def _build_ds():
+      counter_var = variable_scope.get_variable(
+          "counter", (), dtypes.int32, use_resource=True)
+      return (contrib_dataset_ops.Dataset.from_tensors(0).repeat(10).map(
+          lambda _: counter_var.assign_add(1)))
+
+    self.verify_error_on_save(_build_ds, 15, errors.InvalidArgumentError)
+
+  def testCaptureDefunInMapFn(self):
+    num_outputs = 100
+
+    def _build_ds():
+
+      @function.Defun(dtypes.int64)
+      def defun_fn(x):
+        return constant_op.constant(1000) + math_ops.to_int32(x)
+
+      return contrib_dataset_ops.Dataset.range(num_outputs).map(defun_fn)
+
+    self.run_core_tests(_build_ds, None, num_outputs)
+
+  def testBuildDefunInMapFn(self):
+    num_outputs = 100
+
+    def _build_ds():
+
+      @function.Defun(dtypes.int64)
+      def defun_fn(x):
+
+        @function.Defun(dtypes.int32)
+        def defun_fn_deep(x):
+          return constant_op.constant(1000) + math_ops.to_int32(x)
+
+        return constant_op.constant(11000) + defun_fn_deep(math_ops.to_int32(x))
+
+      return contrib_dataset_ops.Dataset.range(num_outputs).map(defun_fn)
+
+    self.run_core_tests(_build_ds, None, num_outputs)
+
+
+class ParallelMapDatasetSerializationTest(
+    dataset_serialization_test_base.DatasetSerializationTestBase):
+
+  def setUp(self):
+    self._tensor_slice_len = 7
+    self._num_epochs = 1
+    self._num_outputs = self._tensor_slice_len * self._num_epochs
+
+  def _build_ds(self, multiplier=37.0):
+    components = (np.arange(self._tensor_slice_len), np.array([[1, 2, 3]]) *
+                  np.arange(self._tensor_slice_len)[:, np.newaxis],
+                  np.array(multiplier) * np.arange(self._tensor_slice_len))
+
+    def _map_fn(x, y, z):
+      return math_ops.square(x), math_ops.square(y), math_ops.square(z)
+
+    return (dataset_ops.Dataset.from_tensor_slices(components).map(
+        _map_fn, num_parallel_calls=3).repeat(self._num_epochs))
+
+  def _build_ds_with_prefetch(self, multiplier=37.0):
+    components = (np.arange(self._tensor_slice_len), np.array([[1, 2, 3]]) *
+                  np.arange(self._tensor_slice_len)[:, np.newaxis],
+                  np.array(multiplier) * np.arange(self._tensor_slice_len))
+
+    def _map_fn(x, y, z):
+      return math_ops.square(x), math_ops.square(y), math_ops.square(z)
+
+    return (dataset_ops.Dataset.from_tensor_slices(components).map(
+        _map_fn, num_parallel_calls=3).repeat(self._num_epochs).prefetch(5))
+
+  def testSaveRestoreCore(self):
+    for ds_fn in [self._build_ds, self._build_ds_with_prefetch]:
+      self.run_core_tests(
+          ds_fn,
+          lambda: ds_fn(multiplier=15.0),
+          self._num_outputs)
+
+  def testSaveStatefulFunction(self):
+
+    def _build_ds():
+
+      def _map_fn(x):
+        return random_ops.random_uniform(
+            (), 0, 10, dtype=dtypes.int32) * math_ops.to_int32(x)
+
+      return contrib_dataset_ops.Dataset.range(100).map(_map_fn)
 
     self.verify_error_on_save(_build_ds, 15, errors.InvalidArgumentError)
 
@@ -744,7 +851,7 @@ class MapDatasetSerializationTest(
     def _build_ds():
       counter_var = variable_scope.get_variable(
           "counter", (), dtypes.int32, use_resource=True)
-      return (dataset_ops.Dataset.from_tensors(0).repeat(10).map(
+      return (contrib_dataset_ops.Dataset.from_tensors(0).repeat(10).map(
           lambda _: counter_var.assign_add(1)))
 
     self.verify_error_on_save(_build_ds, 15, errors.InvalidArgumentError)
@@ -758,7 +865,7 @@ class MapDatasetSerializationTest(
       def defun_fn(x):
         return constant_op.constant(1000) + math_ops.to_int32(x)
 
-      return dataset_ops.Dataset.range(num_outputs).map(defun_fn)
+      return contrib_dataset_ops.Dataset.range(num_outputs).map(defun_fn)
 
     self.run_core_tests(_build_ds, None, num_outputs)
 
@@ -776,7 +883,7 @@ class MapDatasetSerializationTest(
 
         return constant_op.constant(11000) + defun_fn_deep(math_ops.to_int32(x))
 
-      return dataset_ops.Dataset.range(num_outputs).map(defun_fn)
+      return contrib_dataset_ops.Dataset.range(num_outputs).map(defun_fn)
 
     self.run_core_tests(_build_ds, None, num_outputs)
 
@@ -785,7 +892,7 @@ class IgnoreErrorsSerializationTest(
     dataset_serialization_test_base.DatasetSerializationTestBase):
 
   def _build_ds(self, components):
-    return dataset_ops.Dataset.from_tensor_slices(components).map(
+    return contrib_dataset_ops.Dataset.from_tensor_slices(components).map(
         lambda x: array_ops.check_numerics(x, "message")).apply(
             error_ops.ignore_errors())
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py
index 8e6ad061a11752ab7b1ffc13c90b4fa52f67d6aa..a431670829ed1d66f1719985af73eafa1fe45982 100644
--- a/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py
@@ -19,11 +19,10 @@ from __future__ import print_function
 
 import os
 
+from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base
 from tensorflow.contrib.data.python.ops import counter
 from tensorflow.contrib.data.python.ops import dataset_ops
 from tensorflow.contrib.data.python.ops import enumerate_ops
-from tensorflow.contrib.data.python.ops import iterator_ops as contrib_iterator_ops
-from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -34,20 +33,11 @@ from tensorflow.python.ops import gen_dataset_ops
 from tensorflow.python.ops import io_ops
 from tensorflow.python.ops import parsing_ops
 from tensorflow.python.ops import variables
-from tensorflow.python.platform import gfile
 from tensorflow.python.platform import test
-from tensorflow.python.training import saver as saver_lib
 
 
 class RangeDatasetTest(test.TestCase):
 
-  def tearDown(self):
-    # Remove all checkpoint files.
-    prefix = self._iterator_checkpoint_prefix()
-    pattern = prefix + "*"
-    files = gfile.Glob(pattern)
-    map(gfile.Remove, files)
-
   def testStop(self):
     stop = array_ops.placeholder(dtypes.int64, shape=[])
     iterator = dataset_ops.Dataset.range(stop).make_initializable_iterator()
@@ -216,20 +206,25 @@ class RangeDatasetTest(test.TestCase):
       self.assertEqual(-1, sess.run(negative_get_next))
       self.assertEqual(-2, sess.run(negative_get_next))
 
-  def _iterator_checkpoint_prefix(self):
+
+class RangeDatasetSerializationTest(
+    dataset_serialization_test_base.DatasetSerializationTestBase):
+
+  def _iterator_checkpoint_prefix_local(self):
     return os.path.join(self.get_temp_dir(), "iterator")
 
   def _save_op(self, iterator_resource):
     iterator_state_variant = gen_dataset_ops.serialize_iterator(
         iterator_resource)
     save_op = io_ops.write_file(
-        self._iterator_checkpoint_prefix(),
+        self._iterator_checkpoint_prefix_local(),
         parsing_ops.serialize_tensor(iterator_state_variant))
     return save_op
 
   def _restore_op(self, iterator_resource):
     iterator_state_variant = parsing_ops.parse_tensor(
-        io_ops.read_file(self._iterator_checkpoint_prefix()), dtypes.variant)
+        io_ops.read_file(self._iterator_checkpoint_prefix_local()),
+        dtypes.variant)
     restore_op = gen_dataset_ops.deserialize_iterator(iterator_resource,
                                                       iterator_state_variant)
     return restore_op
@@ -283,382 +278,16 @@ class RangeDatasetTest(test.TestCase):
         with self.assertRaises(errors.OutOfRangeError):
           sess.run(get_next)
 
-  def testSaveRestoreUsingSaverFromMetaGraph(self):
-
-    def _build_graph(start, stop):
-      iterator = dataset_ops.Dataset.range(start,
-                                           stop).make_initializable_iterator()
-      init_op = iterator.initializer
-      get_next = iterator.get_next()
-      ops.add_to_collection("iterator_ops", init_op)
-      ops.add_to_collection("iterator_ops", get_next)
-      saveable_obj = contrib_iterator_ops.make_saveable_from_iterator(iterator)
-      # Add the SaveableObject to the `SAVEABLE_OBJECTS` collection
-      # so that it can be automatically picked up by the Saver.
-      ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable_obj)
-      saver = saver_lib.Saver()
-      return init_op, get_next, saver
-
-    start = 2
-    stop = 10
-    break_point = 5
-    path = self._iterator_checkpoint_prefix()
-    meta_filename = path + ".meta"
-
-    # Execute input pipeline for a few steps and save iterator state.
-    with ops.Graph().as_default() as g:
-      init_op, get_next, saver = _build_graph(start, stop)
-      with self.test_session(graph=g) as sess:
-        sess.run(variables.global_variables_initializer())
-        sess.run(init_op)
-        for i in range(start, break_point):
-          self.assertEqual(i, sess.run(get_next))
-        saver.save(sess, path)
-
-    # Build the saver from the MetaGraph using import_meta_graph and
-    # check that the iterator state is restored.
-    with ops.Graph().as_default() as g:
-      saver = saver_lib.import_meta_graph(meta_filename)
-      init_op, get_next = ops.get_collection("iterator_ops")
-      with self.test_session(graph=g) as sess:
-        saver.restore(sess, saver_lib.latest_checkpoint(self.get_temp_dir()))
-        for i in range(break_point, stop):
-          self.assertEqual(i, sess.run(get_next))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
-
-  def testSaveRestoreUsingBuiltSaver(self):
-
-    def _build_graph(start, stop):
-      iterator = dataset_ops.Dataset.range(start,
-                                           stop).make_initializable_iterator()
-      init_op = iterator.initializer
-      get_next = iterator.get_next()
-      ops.add_to_collection("iterator_ops", init_op)
-      ops.add_to_collection("iterator_ops", get_next)
-      # Add the SaveableObject to the `SAVEABLE_OBJECTS` collection
-      # so that it can be automatically picked up by the Saver.
-      saveable_obj = contrib_iterator_ops.make_saveable_from_iterator(iterator)
-      ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable_obj)
-      saver = saver_lib.Saver()
-      return init_op, get_next, saver
-
-    start = 2
-    stop = 10
-    stop_new = 15
-    break_point = 5
-    path = self._iterator_checkpoint_prefix()
-
-    # Execute input pipeline for a few steps and save iterator state.
-    with ops.Graph().as_default() as g:
-      init_op, get_next, saver = _build_graph(start, stop)
-      with self.test_session(graph=g) as sess:
-        sess.run(variables.global_variables_initializer())
-        sess.run(init_op)
-        for i in range(start, break_point):
-          self.assertEqual(i, sess.run(get_next))
-        saver.save(sess, path)
-
-    # Manually build a modified Graph and Saver instead of importing
-    # MetaGraph and verify that original iterator state gets restored.
-    with ops.Graph().as_default() as g:
-      init_op, get_next, saver = _build_graph(start, stop_new)
-      with self.test_session(graph=g) as sess:
-        saver.restore(sess, saver_lib.latest_checkpoint(self.get_temp_dir()))
-        for i in range(break_point, stop):
-          self.assertEqual(i, sess.run(get_next))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
-
-  def testSaveRestoreUsingSaverThenInit(self):
-
-    def _build_graph(start, stop):
-      iterator = dataset_ops.Dataset.range(start,
-                                           stop).make_initializable_iterator()
-      init_op = iterator.initializer
-      get_next = iterator.get_next()
-      ops.add_to_collection("iterator_ops", init_op)
-      ops.add_to_collection("iterator_ops", get_next)
-      # Add the SaveableObject to the `SAVEABLE_OBJECTS` collection
-      # so that it can be automatically picked up by the Saver.
-      saveable_obj = contrib_iterator_ops.make_saveable_from_iterator(iterator)
-      ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable_obj)
-      saver = saver_lib.Saver()
-      return init_op, get_next, saver
+  def _build_range_dataset(self, start, stop):
+    return dataset_ops.Dataset.range(start, stop)
 
-    start = 2
-    stop = 10
-    stop_new = 15
-    break_point = 5
-    path = self._iterator_checkpoint_prefix()
-
-    # Execute input pipeline for a few steps and save iterator state.
-    with ops.Graph().as_default() as g:
-      init_op, get_next, saver = _build_graph(start, stop)
-      with self.test_session(graph=g) as sess:
-        sess.run(variables.global_variables_initializer())
-        sess.run(init_op)
-        for i in range(start, break_point):
-          self.assertEqual(i, sess.run(get_next))
-        saver.save(sess, path)
-
-    # Restore iterator state call and then call init_op for the iterator and
-    # verify that the new iterator hides the restored iterator.
-    with ops.Graph().as_default() as g:
-      init_op, get_next, saver = _build_graph(start, stop_new)
-      with self.test_session(graph=g) as sess:
-        saver.restore(sess, saver_lib.latest_checkpoint(self.get_temp_dir()))
-        sess.run(init_op)
-        for i in range(start, stop_new):
-          self.assertEqual(i, sess.run(get_next))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
-
-  def testRestoreWithoutBuildingDatasetGraph(self):
-
-    def _build_graph(start, stop, num_epochs):
-      dataset = dataset_ops.Dataset.range(start, stop).repeat(num_epochs)
-      iterator = dataset.make_initializable_iterator()
-      init_op = iterator.initializer
-      get_next = iterator.get_next()
-      save_op = self._save_op(iterator._iterator_resource)
-      restore_op = self._restore_op(iterator._iterator_resource)
-      return init_op, get_next, save_op, restore_op
-
-    # Saving and restoring in different sessions.
-    start = 2
-    stop = 10
-    num_epochs = 5
-    break_point = 5
-    break_epoch = 3
-    with ops.Graph().as_default() as g:
-      init_op, get_next, save_op, _ = _build_graph(start, stop, num_epochs)
-      with self.test_session(graph=g) as sess:
-        sess.run(variables.global_variables_initializer())
-        sess.run(init_op)
-        for _ in range(break_epoch):
-          for i in range(start, stop):
-            self.assertEqual(i, sess.run(get_next))
-        for i in range(start, break_point):
-          self.assertEqual(i, sess.run(get_next))
-        sess.run(save_op)
-
-    with ops.Graph().as_default() as g:
-      # Create an empty IteratorResource and restore the Iterator into it.
-      output_types = dtypes.int64
-      output_shapes = tensor_shape.scalar()
-      iterator = iterator_ops.Iterator.from_structure(output_types,
-                                                      output_shapes)
-      restore_op = self._restore_op(iterator._iterator_resource)
-      get_next = iterator.get_next()
-      with self.test_session(graph=g) as sess:
-        sess.run(restore_op)
-        for i in range(break_point, stop):
-          self.assertEqual(i, sess.run(get_next))
-        for _ in range(break_epoch + 1, num_epochs):
-          for i in range(start, stop):
-            self.assertEqual(i, sess.run(get_next))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
-
-  def testRestoreInModifiedGraph(self):
-
-    def _build_graph(start, stop):
-      dataset = dataset_ops.Dataset.range(start, stop)
-      iterator = dataset.make_initializable_iterator()
-      init_op = iterator.initializer
-      get_next = iterator.get_next()
-      save_op = self._save_op(iterator._iterator_resource)
-      restore_op = self._restore_op(iterator._iterator_resource)
-      return init_op, get_next, save_op, restore_op
-
-    # Saving and restoring in different sessions.
+  def testRangeCore(self):
     start = 2
     stop = 10
     stop_1 = 8
-    break_point = 5
-    with ops.Graph().as_default() as g:
-      init_op, get_next, save_op, _ = _build_graph(start, stop)
-      with self.test_session(graph=g) as sess:
-        sess.run(variables.global_variables_initializer())
-        sess.run(init_op)
-        for i in range(start, break_point):
-          self.assertEqual(i, sess.run(get_next))
-        sess.run(save_op)
-
-    with ops.Graph().as_default() as g:
-      # Intentionally build a graph with a different value for stop to make sure
-      # the original dataset graph is actually getting loaded.
-      init_op, get_next, _, restore_op = _build_graph(start, stop_1)
-      with self.test_session(graph=g) as sess:
-        sess.run(restore_op)
-        for i in range(break_point, stop):
-          self.assertEqual(i, sess.run(get_next))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
-
-  def testInitThenRestore(self):
-    # Note: Calling init_op before restore_op is redundant. This test just makes
-    # sure we do not fail if restore is called on an already initialized
-    # iterator resource.
-
-    def _build_graph(start, stop):
-      dataset = dataset_ops.Dataset.range(start, stop)
-      iterator = dataset.make_initializable_iterator()
-      init_op = iterator.initializer
-      get_next = iterator.get_next()
-      save_op = self._save_op(iterator._iterator_resource)
-      restore_op = self._restore_op(iterator._iterator_resource)
-      return init_op, get_next, save_op, restore_op
-
-    # Saving and restoring in different sessions.
-    start = 2
-    stop = 10
-    break_point = 5
-    with ops.Graph().as_default() as g:
-      init_op, get_next, save_op, _ = _build_graph(start, stop)
-      with self.test_session(graph=g) as sess:
-        sess.run(variables.global_variables_initializer())
-        sess.run(init_op)
-        for i in range(start, break_point):
-          self.assertEqual(i, sess.run(get_next))
-        sess.run(save_op)
-
-    with ops.Graph().as_default() as g:
-      init_op, get_next, _, restore_op = _build_graph(start, stop)
-      with self.test_session(graph=g) as sess:
-        sess.run(init_op)
-        sess.run(restore_op)
-        for i in range(break_point, stop):
-          self.assertEqual(i, sess.run(get_next))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
-
-  def testMultipleSaves(self):
-
-    def _build_graph(start, stop):
-      iterator = dataset_ops.Dataset.range(start,
-                                           stop).make_initializable_iterator()
-      init_op = iterator.initializer
-      get_next = iterator.get_next()
-      save_op = self._save_op(iterator._iterator_resource)
-      restore_op = self._restore_op(iterator._iterator_resource)
-      return init_op, get_next, save_op, restore_op
-
-    start = 2
-    stop = 10
-    break_point1 = 5
-    break_point2 = 7
-
-    with ops.Graph().as_default() as g:
-      init_op, get_next, save_op, _ = _build_graph(start, stop)
-      with self.test_session(graph=g) as sess:
-        sess.run(variables.global_variables_initializer())
-        sess.run(init_op)
-        for i in range(start, break_point1):
-          self.assertEqual(i, sess.run(get_next))
-        sess.run(save_op)
-
-    with ops.Graph().as_default() as g:
-      init_op, get_next, save_op, restore_op = _build_graph(start, stop)
-      with self.test_session(graph=g) as sess:
-        sess.run(restore_op)
-        for i in range(break_point1, break_point2):
-          self.assertEqual(i, sess.run(get_next))
-        sess.run(save_op)
-
-    break_point2 = 7
-    with ops.Graph().as_default() as g:
-      init_op, get_next, save_op, restore_op = _build_graph(start, stop)
-      with self.test_session(graph=g) as sess:
-        sess.run(restore_op)
-        for i in range(break_point2, stop):
-          self.assertEqual(i, sess.run(get_next))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
-
-  def testSaveRestoreWithRepeat(self):
-
-    def _build_graph(start, stop, num_epochs):
-      iterator = dataset_ops.Dataset.range(
-          start, stop).repeat(num_epochs).make_initializable_iterator()
-      init_op = iterator.initializer
-      get_next = iterator.get_next()
-      save_op = self._save_op(iterator._iterator_resource)
-      restore_op = self._restore_op(iterator._iterator_resource)
-      return init_op, get_next, save_op, restore_op
-
-    start = 2
-    stop = 10
-    num_epochs = 5
-    break_range = 5
-    break_epoch = 3
-    with ops.Graph().as_default() as g:
-      init_op, get_next, save_op, restore_op = _build_graph(
-          start, stop, num_epochs)
-      with self.test_session(graph=g) as sess:
-        sess.run(variables.global_variables_initializer())
-        sess.run(init_op)
-        # Note: There is no checkpoint saved currently so a NotFoundError is
-        # raised.
-        with self.assertRaises(errors.NotFoundError):
-          sess.run(restore_op)
-        for _ in range(break_epoch - 1):
-          for i in range(start, stop):
-            self.assertEqual(i, sess.run(get_next))
-        for i in range(start, break_range):
-          self.assertEqual(i, sess.run(get_next))
-        sess.run(save_op)
-
-    with ops.Graph().as_default() as g:
-      init_op, get_next, _, restore_op = _build_graph(start, stop, num_epochs)
-      with self.test_session(graph=g) as sess:
-        sess.run(restore_op)
-        for i in range(break_range, stop):
-          self.assertEqual(i, sess.run(get_next))
-        for _ in range(break_epoch, num_epochs):
-          for i in range(start, stop):
-            self.assertEqual(i, sess.run(get_next))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
-
-  def testSaveRestoreExhaustedIterator(self):
-
-    def _build_graph(start, stop, num_epochs):
-      iterator = dataset_ops.Dataset.range(
-          start, stop).repeat(num_epochs).make_initializable_iterator()
-      init_op = iterator.initializer
-      get_next = iterator.get_next()
-      save_op = self._save_op(iterator._iterator_resource)
-      restore_op = self._restore_op(iterator._iterator_resource)
-      return init_op, get_next, save_op, restore_op
-
-    start = 2
-    stop = 10
-    num_epochs = 5
-    with ops.Graph().as_default() as g:
-      init_op, get_next, save_op, restore_op = _build_graph(
-          start, stop, num_epochs)
-      with self.test_session(graph=g) as sess:
-        sess.run(variables.global_variables_initializer())
-        sess.run(init_op)
-        # Note: There is no checkpoint saved currently so a NotFoundError is
-        # raised.
-        with self.assertRaises(errors.NotFoundError):
-          sess.run(restore_op)
-        for _ in range(num_epochs):
-          for i in range(start, stop):
-            self.assertEqual(i, sess.run(get_next))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
-        sess.run(save_op)
-
-    with ops.Graph().as_default() as g:
-      init_op, get_next, _, restore_op = _build_graph(start, stop, num_epochs)
-      with self.test_session(graph=g) as sess:
-        sess.run(restore_op)
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
+    self.run_core_tests(lambda: self._build_range_dataset(start, stop),
+                        lambda: self._build_range_dataset(start, stop_1),
+                        stop - start)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py
index 5338ec56bf275e481a984964e39aa0c1ade3a752..e0494736b72ae52f586cb80d42a5c1e50ac17a61 100644
--- a/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py
@@ -21,6 +21,7 @@ import itertools
 
 import numpy as np
 
+from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base
 from tensorflow.contrib.data.python.ops import scan_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
@@ -124,5 +125,18 @@ class ScanDatasetTest(test.TestCase):
           scan_ops.scan(constant_op.constant(1, dtype=dtypes.int32), _scan_fn))
 
 
+class ScanDatasetSerialzationTest(
+    dataset_serialization_test_base.DatasetSerializationTestBase):
+
+  def _build_dataset(self, num_elements):
+    return dataset_ops.Dataset.from_tensors(1).repeat(num_elements).apply(
+        scan_ops.scan([0, 1], lambda a, _: ([a[1], a[0] + a[1]], a[1])))
+
+  def testScanCore(self):
+    num_output = 5
+    self.run_core_tests(lambda: self._build_dataset(num_output),
+                        lambda: self._build_dataset(2), num_output)
+
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py
index 6b5b53cc0f8f2d1df5622a5bc5e2f8ef04c6342a..45943d56ecb4bc18a6221157d0eeeae4efdf23cc 100644
--- a/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py
@@ -18,12 +18,12 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
-import os
 
 import numpy as np
 
+from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base
 from tensorflow.contrib.data.python.ops import dataset_ops as contrib_dataset_ops
-from tensorflow.contrib.data.python.ops import iterator_ops as contrib_iterator_ops
+from tensorflow.contrib.data.python.ops import shuffle_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.framework import constant_op
@@ -31,9 +31,7 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
-from tensorflow.python.platform import gfile
 from tensorflow.python.platform import test
-from tensorflow.python.training import saver as saver_lib
 
 
 class ShuffleDatasetTest(test.TestCase):
@@ -157,321 +155,135 @@ class ShuffleDatasetTest(test.TestCase):
       self.assertEqual(10, counts[i])
 
 
-class ShuffleDatasetSerializationTest(test.TestCase):
+class ShuffleDatasetSerializationTest(
+    dataset_serialization_test_base.DatasetSerializationTestBase):
 
-  def tearDown(self):
-    # Remove all checkpoint files.
-    prefix = self._ckpt_path()
-    pattern = prefix + "*"
-    files = gfile.Glob(pattern)
-    map(gfile.Remove, files)
-
-  def _build_graph(self,
-                   range_limit=10,
-                   num_repeats=5,
-                   buffer_size=5,
-                   seed=None,
-                   reshuffle_each_iteration=None,
-                   build_saveable=True):
-    iterator = dataset_ops.Dataset.range(range_limit).shuffle(
+  def _build_shuffle_dataset(
+      self,
+      range_limit=10,
+      num_repeats=5,
+      buffer_size=5,
+      seed=None,
+      reshuffle_each_iteration=None,
+  ):
+    return dataset_ops.Dataset.range(range_limit).shuffle(
         buffer_size,
         seed=seed,
-        reshuffle_each_iteration=reshuffle_each_iteration).repeat(
-            num_repeats).make_initializable_iterator()
-    if build_saveable:
-      saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator)
-      ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable)
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    ops.add_to_collection("iterator_ops", init_op)
-    ops.add_to_collection("iterator_ops", get_next)
-    saver = saver_lib.Saver(allow_empty=True)
-    return init_op, get_next, saver
-
-  def _ckpt_path(self):
-    return os.path.join(self.get_temp_dir(), "iterator")
-
-  def _latest_ckpt(self):
-    return saver_lib.latest_checkpoint(self.get_temp_dir())
-
-  def _save(self, sess, saver):
-    saver.save(sess, self._ckpt_path())
+        reshuffle_each_iteration=reshuffle_each_iteration).repeat(num_repeats)
 
-  def _restore(self, saver, sess):
-    saver.restore(sess, self._latest_ckpt())
+  def testShuffleCore(self):
 
-  def _import_meta_graph(self):
-    meta_file_path = self._ckpt_path() + ".meta"
-    return saver_lib.import_meta_graph(meta_file_path)
-
-  def _testReadWithBreaks(self, break_points, init_before_restore=False):
     seed = 55
     range_limit = 10
     num_repeats = 5
     num_outputs = range_limit * num_repeats
     buffer_sizes = [1, 3, 8, 10, 25, 50]
     reshuffle_each_iteration = False
+    # pylint: disable=cell-var-from-loop
+    # pylint: disable=g-long-lambda
     for buffer_size in buffer_sizes:
-      expected = []
-      actual = []
-      # Generate the ground truth.
-      with ops.Graph().as_default() as g:
-        g.seed = 10
-        init_op, get_next_op, _ = self._build_graph(
-            range_limit=range_limit,
-            num_repeats=num_repeats,
-            buffer_size=buffer_size,
-            seed=seed,
-            reshuffle_each_iteration=reshuffle_each_iteration)
-        with self.test_session(graph=g) as sess:
-          sess.run(init_op)
-          for _ in range(num_outputs):
-            expected.append(sess.run(get_next_op))
-          with self.assertRaises(errors.OutOfRangeError):
-            sess.run(get_next_op)
-
-      # Run and checkpoint after first break_point.
-      with ops.Graph().as_default() as g:
-        g.seed = 10
-        init_op, get_next_op, saver = self._build_graph(
-            range_limit=range_limit,
-            num_repeats=num_repeats,
-            buffer_size=buffer_size,
-            seed=seed,
-            reshuffle_each_iteration=reshuffle_each_iteration)
-        with self.test_session(graph=g) as sess:
-          sess.run(init_op)
-          for _ in range(break_points[0]):
-            actual.append(sess.run(get_next_op))
-          self._save(sess, saver)
-
-      # Load from checkpoint and continue running while stopping at each
-      # subsequent checkpoint.
-      for i in range(len(break_points)):
-        with ops.Graph().as_default() as g:
-          saver = self._import_meta_graph()
-          init_op, get_next_op = ops.get_collection("iterator_ops")
-          with self.test_session(graph=g) as sess:
-            if init_before_restore:
-              sess.run(init_op)
-            self._restore(saver, sess)
-            start = break_points[i]
-            end = break_points[
-                i + 1] if i < len(break_points) - 1 else num_outputs
-            for _ in range(end - start):
-              actual.append(sess.run(get_next_op))
-            self._save(sess, saver)
-            if end == num_outputs:
-              with self.assertRaises(errors.OutOfRangeError):
-                sess.run(get_next_op)
-      self.assertEqual(expected, actual)
-
-  def testSaveRestore(self):
-    self._testReadWithBreaks([8])  # rng buffer_size: 0
-    self._testReadWithBreaks([13])  # rng buffer_size: 1
-    self._testReadWithBreaks([18])  # rng buffer_size: 2
-    self._testReadWithBreaks([23])  # rng buffer_size: 3
-
-  def testSaveUnusedIterator(self):
-    self._testReadWithBreaks([0])
-
-  def testSaveFullyUsedIterator(self):
-    self._testReadWithBreaks([50])
-
-  def testMultipleBreaks(self):
-    self._testReadWithBreaks([0, 5, 9, 15, 25, 32])
-
-  def testIdempotence(self):
-    # Attempt to save iterator immediately after restoring.
-    self._testReadWithBreaks([1, 1, 5, 5, 5, 25, 32])
-
-  def testInitThenRestore(self):
-    self._testReadWithBreaks([0, 5, 9, 15, 25, 32], init_before_restore=True)
-
-  def testRestoreExhaustedIterator(self):
-    seed = 55
-    range_limit = 10
-    num_repeats = 5
-    num_outputs = range_limit * num_repeats
-    buffer_sizes = [1, 3, 8, 10, 25, 50]
-    reshuffle_each_iteration = False
-    for buffer_size in buffer_sizes:
-      with ops.Graph().as_default() as g:
-        g.seed = 10
-        init_op, get_next_op, saver = self._build_graph(
-            range_limit=range_limit,
-            num_repeats=num_repeats,
-            buffer_size=buffer_size,
-            seed=seed,
-            reshuffle_each_iteration=reshuffle_each_iteration)
-        with self.test_session(graph=g) as sess:
-          sess.run(init_op)
-          for _ in range(num_outputs):
-            sess.run(get_next_op)
-          with self.assertRaises(errors.OutOfRangeError):
-            sess.run(get_next_op)
-          self._save(sess, saver)
-
-        with ops.Graph().as_default() as g:
-          saver = self._import_meta_graph()
-          init_op, get_next_op = ops.get_collection("iterator_ops")
-          with self.test_session(graph=g) as sess:
-            self._restore(saver, sess)
-            with self.assertRaises(errors.OutOfRangeError):
-              sess.run(get_next_op)
-
-  def testResetRestoredIterator(self):
-    seed = 55
-    range_limit = 10
-    num_repeats = 5
-    num_outputs = range_limit * num_repeats
-    buffer_sizes = [1, 3, 8, 10, 25, 50]
-    reshuffle_each_iteration = False
-    for buffer_size in buffer_sizes:
-      with ops.Graph().as_default() as g:
-        g.seed = 10
-        init_op, get_next_op, saver = self._build_graph(
-            range_limit=range_limit,
-            num_repeats=num_repeats,
-            buffer_size=buffer_size,
-            seed=seed,
-            reshuffle_each_iteration=reshuffle_each_iteration)
-        with self.test_session(graph=g) as sess:
-          sess.run(init_op)
-          for _ in range(num_outputs // 2):
-            sess.run(get_next_op)
-          self._save(sess, saver)
-
-        outputs = []
-        with ops.Graph().as_default() as g:
-          saver = self._import_meta_graph()
-          init_op, get_next_op = ops.get_collection("iterator_ops")
-          with self.test_session(graph=g) as sess:
-            self._restore(saver, sess)
-            sess.run(init_op)
-            for _ in range(num_outputs):
-              outputs.append(sess.run(get_next_op))
-            with self.assertRaises(errors.OutOfRangeError):
-              sess.run(get_next_op)
-        expected_outputs_sorted = sorted(
-            np.array([range(range_limit)
-                      for _ in range(num_repeats)]).flatten())
-        self.assertEqual(expected_outputs_sorted, sorted(outputs))
-
-  def testRestoreInModifiedGraph(self):
-    seed = 55
-    break_point = 25
-    range_limit = 10
-    num_repeats = 5
-    num_outputs = range_limit * num_repeats
-    buffer_sizes = [3, 8, 10, 25, 50]
-    reshuffle_each_iteration = False
-    for buffer_size in buffer_sizes:
-      expected = []
-      actual_without_restore = []
-      actual = []
-      with ops.Graph().as_default() as g:
-        g.seed = 10
-        init_op, get_next_op, saver = self._build_graph(
-            range_limit=range_limit,
-            num_repeats=num_repeats,
-            buffer_size=buffer_size,
-            seed=seed,
-            reshuffle_each_iteration=reshuffle_each_iteration)
-        with self.test_session(graph=g) as sess:
-          sess.run(init_op)
-          for _ in range(break_point):
-            expected.append(sess.run(get_next_op))
-          actual.extend(expected)
-          self._save(sess, saver)
-          for _ in range(num_outputs - break_point):
-            expected.append(sess.run(get_next_op))
-          with self.assertRaises(errors.OutOfRangeError):
-            sess.run(get_next_op)
-
-      with ops.Graph().as_default() as g:
-        g.seed = 20  # Different seed than previous graph for shuffle rngs.
-        init_op, get_next_op, saver = self._build_graph(
-            range_limit=range_limit,
-            num_repeats=num_repeats,
-            buffer_size=buffer_size,
-            seed=seed,
-            reshuffle_each_iteration=reshuffle_each_iteration)
-        with self.test_session(graph=g) as sess:
-          sess.run(init_op)
-          for _ in range(num_outputs):
-            actual_without_restore.append(sess.run(get_next_op))
-          with self.assertRaises(errors.OutOfRangeError):
-            sess.run(get_next_op)
-
-      with ops.Graph().as_default() as g:
-        g.seed = 20  # Different seed than previous graph for shuffle rngs.
-        init_op, get_next_op, saver = self._build_graph(
-            range_limit=range_limit,
-            num_repeats=num_repeats,
-            buffer_size=buffer_size,
-            seed=seed,
-            reshuffle_each_iteration=reshuffle_each_iteration)
-        with self.test_session(graph=g) as sess:
-          self._restore(saver, sess)
-          for _ in range(num_outputs - break_point):
-            actual.append(sess.run(get_next_op))
-          with self.assertRaises(errors.OutOfRangeError):
-            sess.run(get_next_op)
-
-      # Since the modified graph has a different random seed it produces a
-      # different order of examples.
-      self.assertNotEqual(expected, actual_without_restore)
-      self.assertEqual(sorted(expected), sorted(actual_without_restore))
-      self.assertEqual(expected, actual)
-
-  def testDoNotBuildSaveable(self):
-    seed = 55
-    break_point = 25
-    range_limit = 10
-    num_repeats = 5
-    num_outputs = range_limit * num_repeats
-    buffer_sizes = [3, 8, 10, 25, 50]
-    reshuffle_each_iteration = False
-    for buffer_size in buffer_sizes:
-      actual = []
-      with ops.Graph().as_default() as g:
-        g.seed = 10
-        init_op, get_next_op, saver = self._build_graph(
-            range_limit=range_limit,
-            num_repeats=num_repeats,
-            buffer_size=buffer_size,
-            seed=seed,
-            reshuffle_each_iteration=reshuffle_each_iteration)
-        with self.test_session(graph=g) as sess:
-          sess.run(init_op)
-          for _ in range(break_point):
-            sess.run(get_next_op)
-          self._save(sess, saver)
-
-      with ops.Graph().as_default() as g:
-        g.seed = 20  # Different seed than previous graph for shuffle rngs.
-        init_op, get_next_op, saver = self._build_graph(
-            range_limit=range_limit,
-            num_repeats=num_repeats,
-            buffer_size=buffer_size,
-            seed=seed,
-            reshuffle_each_iteration=reshuffle_each_iteration,
-            build_saveable=False)
-        with self.test_session(graph=g) as sess:
-          # Since the SaveableObject was not added to Saver's list
-          # of saveables, iterator state is not restored by saver.restore().
-          self._restore(saver, sess)
-          with self.assertRaises(errors.FailedPreconditionError):
-            sess.run(get_next_op)
-          sess.run(init_op)
-          for _ in range(num_outputs):
-            actual.append(sess.run(get_next_op))
-          with self.assertRaises(errors.OutOfRangeError):
-            sess.run(get_next_op)
-      expected_outputs_sorted = sorted(
-          np.array([range(range_limit) for _ in range(num_repeats)]).flatten())
-      self.assertEqual(expected_outputs_sorted, sorted(actual))
+      self.run_core_tests(
+          lambda: self._build_shuffle_dataset(
+              range_limit=range_limit,
+              num_repeats=num_repeats,
+              buffer_size=buffer_size,
+              seed=seed,
+              reshuffle_each_iteration=reshuffle_each_iteration),
+          lambda: self._build_shuffle_dataset(
+              range_limit=range_limit,
+              num_repeats=num_repeats,
+              buffer_size=buffer_size,
+              seed=10,
+              reshuffle_each_iteration=reshuffle_each_iteration),
+          num_outputs)
+    # pylint: enable=cell-var-from-loop
+    # pylint: enable=g-long-lambda
+
+
+class ShuffleAndRepeatTest(
+    dataset_serialization_test_base.DatasetSerializationTestBase):
+
+  def _build_ds(self, seed, count=5, num_elements=20):
+    return dataset_ops.Dataset.range(num_elements).apply(
+        shuffle_ops.shuffle_and_repeat(buffer_size=5, count=count, seed=seed))
+
+  def testCorrectOutput(self):
+    output = self.gen_outputs(lambda: self._build_ds(10), [], 100)
+    self.assertSequenceEqual(
+        sorted(output), sorted(
+            np.array([range(20) for _ in range(5)]).flatten()))
+    for i in range(5):
+      self.assertSequenceEqual(sorted(output[i * 20:(i + 1) * 20]), range(20))
+
+  def testReshuffling(self):
+    # Check that the output orders of different epochs are indeed different.
+    output = self.gen_outputs(lambda: self._build_ds(10), [], 100)
+    for i in range(4):
+      epoch1 = output[i * 20:(i + 1) * 20]
+      epoch2 = output[(i + 1) * 20:(i + 2) * 20]
+      self.assertNotEqual(epoch1, epoch2)
+
+  def testSameOrderForSameSeeds(self):
+    output1 = self.gen_outputs(lambda: self._build_ds(10), [], 100)
+    output2 = self.gen_outputs(lambda: self._build_ds(10), [], 100)
+    self.assertEqual(output1, output2)
+
+  def testDifferentOrderForDifferentSeeds(self):
+    output1 = self.gen_outputs(lambda: self._build_ds(10), [], 100)
+    output2 = self.gen_outputs(lambda: self._build_ds(20), [], 100)
+    self.assertNotEqual(output1, output2)
+    self.assertEqual(sorted(output1), sorted(output2))
+
+  def testCountNone(self):
+    output1 = self.gen_outputs(
+        lambda: self._build_ds(10, count=None), [], 100, verify_exhausted=False)
+    output2 = self.gen_outputs(
+        lambda: self._build_ds(20, count=None), [], 100, verify_exhausted=False)
+    self.assertNotEqual(output1, output2)
+    self.assertEqual(sorted(output1), sorted(output2))
+
+  def testCountMinusOne(self):
+    output1 = self.gen_outputs(
+        lambda: self._build_ds(10, count=-1), [], 100, verify_exhausted=False)
+    output2 = self.gen_outputs(
+        lambda: self._build_ds(20, count=-1), [], 100, verify_exhausted=False)
+    self.assertNotEqual(output1, output2)
+    self.assertEqual(sorted(output1), sorted(output2))
+
+  def testInfiniteOutputs(self):
+    # Asserting the iterator is exhausted after producing 100 items should fail.
+    with self.assertRaises(AssertionError):
+      self.gen_outputs(lambda: self._build_ds(10, count=None), [], 100)
+    with self.assertRaises(AssertionError):
+      self.gen_outputs(lambda: self._build_ds(10, count=-1), [], 100)
+
+  def testInfiniteEmpty(self):
+    with self.assertRaises(errors.OutOfRangeError):
+      self.gen_outputs(lambda: self._build_ds(10, count=None, num_elements=0),
+                       [], 100)
+    with self.assertRaises(errors.OutOfRangeError):
+      self.gen_outputs(lambda: self._build_ds(10, count=-1, num_elements=0), [],
+                       100)
+
+  def testLargeBufferSize(self):
+    with ops.Graph().as_default() as g:
+      ds = dataset_ops.Dataset.range(20).apply(
+          shuffle_ops.shuffle_and_repeat(buffer_size=21))
+      get_next_op = ds.make_one_shot_iterator().get_next()
+      with self.test_session(graph=g) as sess:
+        sess.run(get_next_op)
+
+
+class ShuffleAndRepeatSerializationTest(
+    dataset_serialization_test_base.DatasetSerializationTestBase):
+
+  def _build_ds(self, seed):
+    return dataset_ops.Dataset.range(20).apply(
+        shuffle_ops.shuffle_and_repeat(buffer_size=5, count=5, seed=seed))
+
+  def testCore(self):
+    self.run_core_tests(lambda: self._build_ds(10), lambda: self._build_ds(20),
+                        100)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py
index 8f24d6b2f612cff662aa8a36085bc69a9ea1a290..07bdf920446e953c2a1abaf495d2e9e1256106fd 100644
--- a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base
 from tensorflow.contrib.data.python.ops import stats_ops
 from tensorflow.core.framework import summary_pb2
 from tensorflow.python.data.ops import dataset_ops
@@ -209,5 +210,48 @@ class StatsDatasetTest(test.TestCase):
         sess.run(stats_aggregator_1.subscribe(iterator))
 
 
+class StatsDatasetSerializationTest(
+    dataset_serialization_test_base.DatasetSerializationTestBase):
+
+  def _build_dataset_bytes_stats(self, num_elements):
+    return dataset_ops.Dataset.range(num_elements).map(
+        lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).apply(
+            stats_ops.bytes_produced_stats("bytes_produced"))
+
+  def testBytesStatsDatasetSaveableCore(self):
+    num_outputs = 100
+    self.run_core_tests(
+        lambda: self._build_dataset_bytes_stats(num_outputs),
+        lambda: self._build_dataset_bytes_stats(num_outputs // 10), num_outputs)
+
+  def _build_dataset_latency_stats(self, num_elements, tag="record_latency"):
+    return dataset_ops.Dataset.range(num_elements).apply(
+        stats_ops.latency_stats(tag))
+
+  def _build_dataset_multiple_tags(self,
+                                   num_elements,
+                                   tag1="record_latency",
+                                   tag2="record_latency_2"):
+    return dataset_ops.Dataset.range(num_elements).apply(
+        stats_ops.latency_stats(tag1)).apply(stats_ops.latency_stats(tag2))
+
+  def testLatencyStatsDatasetSaveableCore(self):
+    num_outputs = 100
+
+    self.run_core_tests(
+        lambda: self._build_dataset_latency_stats(num_outputs),
+        lambda: self._build_dataset_latency_stats(num_outputs // 10),
+        num_outputs)
+
+    self.run_core_tests(lambda: self._build_dataset_multiple_tags(num_outputs),
+                        None, num_outputs)
+
+    tag1 = "record_latency"
+    tag2 = "record_latency"
+    self.run_core_tests(
+        lambda: self._build_dataset_multiple_tags(num_outputs, tag1, tag2),
+        None, num_outputs)
+
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/data/python/kernel_tests/unique_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/unique_dataset_op_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..55296d5710e7f66408bb7464cf790149d6df9fa1
--- /dev/null
+++ b/tensorflow/contrib/data/python/kernel_tests/unique_dataset_op_test.py
@@ -0,0 +1,96 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the experimental input pipeline ops."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base
+from tensorflow.contrib.data.python.ops import dataset_ops
+from tensorflow.contrib.data.python.ops import unique
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.platform import test
+from tensorflow.python.util import compat
+
+
+class UniqueDatasetTest(test.TestCase):
+
+  def _testSimpleHelper(self, dtype, test_cases):
+    """Test the `unique()` transformation on a list of test cases.
+
+    Args:
+      dtype: The `dtype` of the elements in each test case.
+      test_cases: A list of pairs of lists. The first component is the test
+        input that will be passed to the transformation; the second component
+        is the expected sequence of outputs from the transformation.
+    """
+
+    # The `current_test_case` will be updated when we loop over `test_cases`
+    # below; declare it here so that the generator can capture it once.
+    current_test_case = []
+    dataset = dataset_ops.Dataset.from_generator(lambda: current_test_case,
+                                                 dtype).apply(unique.unique())
+    iterator = dataset.make_initializable_iterator()
+    next_element = iterator.get_next()
+
+    with self.test_session() as sess:
+      for test_case, expected in test_cases:
+        current_test_case = test_case
+        sess.run(iterator.initializer)
+        for element in expected:
+          if dtype == dtypes.string:
+            element = compat.as_bytes(element)
+          self.assertAllEqual(element, sess.run(next_element))
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run(next_element)
+
+  def testSimpleInt(self):
+    for dtype in [dtypes.int32, dtypes.int64]:
+      self._testSimpleHelper(dtype, [
+          ([], []),
+          ([1], [1]),
+          ([1, 1, 1, 1, 1, 1, 1], [1]),
+          ([1, 2, 3, 4], [1, 2, 3, 4]),
+          ([1, 2, 4, 3, 2, 1, 2, 3, 4], [1, 2, 4, 3]),
+          ([[1], [1, 1], [1, 1, 1]], [[1], [1, 1], [1, 1, 1]]),
+          ([[1, 1], [1, 1], [2, 2], [3, 3], [1, 1]], [[1, 1], [2, 2], [3, 3]]),
+      ])
+
+  def testSimpleString(self):
+    self._testSimpleHelper(dtypes.string, [
+        ([], []),
+        (["hello"], ["hello"]),
+        (["hello", "hello", "hello"], ["hello"]),
+        (["hello", "world"], ["hello", "world"]),
+        (["foo", "bar", "baz", "baz", "bar", "foo"], ["foo", "bar", "baz"]),
+    ])
+
+
+class UniqueSerializationTest(
+    dataset_serialization_test_base.DatasetSerializationTestBase):
+
+  def testUnique(self):
+
+    def build_dataset(num_elements, unique_elem_range):
+      return dataset_ops.Dataset.range(num_elements).map(
+          lambda x: x % unique_elem_range).apply(unique.unique())
+
+    self.run_core_tests(lambda: build_dataset(200, 100),
+                        lambda: build_dataset(40, 100), 100)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD
index 25ed58cdf5833cd041582046bc1a358625e321e0..4349085a10135b4dee842a29916aeb5febe9ddd4 100644
--- a/tensorflow/contrib/data/python/ops/BUILD
+++ b/tensorflow/contrib/data/python/ops/BUILD
@@ -40,6 +40,25 @@ py_library(
     ],
 )
 
+py_library(
+    name = "random_ops",
+    srcs = [
+        "random_ops.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:random_seed",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
 py_library(
     name = "readers",
     srcs = [
@@ -62,6 +81,19 @@ py_library(
     ],
 )
 
+py_library(
+    name = "shuffle_ops",
+    srcs = [
+        "shuffle_ops.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":random_ops",
+        ":transformation_ops",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
 py_library(
     name = "transformation_ops",
     srcs = [
@@ -73,6 +105,7 @@ py_library(
         "resampling.py",
         "scan_ops.py",
         "stats_ops.py",
+        "unique.py",
     ],
     srcs_version = "PY2AND3",
     deps = [
@@ -89,6 +122,7 @@ py_library(
         "//tensorflow/python:tensor_util",
         "//tensorflow/python:util",
         "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:convert",
         "//tensorflow/python/data/util:nest",
         "//tensorflow/python/data/util:sparse",
         "//third_party/py/numpy",
diff --git a/tensorflow/contrib/data/python/ops/batching.py b/tensorflow/contrib/data/python/ops/batching.py
index 63782d229e1535892686f202ca1f0833dee6ed80..76c07b2c999e1424e8efe4af515fddee73922c9c 100644
--- a/tensorflow/contrib/data/python/ops/batching.py
+++ b/tensorflow/contrib/data/python/ops/batching.py
@@ -22,6 +22,7 @@ from tensorflow.python.data.util import nest
 from tensorflow.python.data.util import sparse
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
@@ -231,32 +232,29 @@ class DenseToSparseBatchDataset(dataset_ops.Dataset):
                       input_dataset.output_types)
     self._input_dataset = input_dataset
     self._batch_size = batch_size
-    # pylint: disable=protected-access
-    self._row_shape = dataset_ops._partial_shape_to_tensor(row_shape)
-    # pylint: enable=protected-access
+    self._row_shape = row_shape
 
   def _as_variant_tensor(self):
     return gen_dataset_ops.dense_to_sparse_batch_dataset(
         self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
         self._batch_size,
-        self._row_shape,
-        output_shapes=self.output_shapes,
-        output_types=self.output_types)
+        row_shape=dataset_ops._partial_shape_to_tensor(self._row_shape),  # pylint: disable=protected-access
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)),
+        output_types=nest.flatten(
+            sparse.as_dense_types(self.output_types, self.output_classes)))
 
   @property
   def output_classes(self):
-    return (ops.Tensor, ops.Tensor, ops.Tensor)
+    return sparse_tensor.SparseTensor
 
   @property
   def output_shapes(self):
-    num_elements = tensor_shape.Dimension(None)
-    return (tensor_shape.matrix(num_elements, self._row_shape.shape[0] + 1),
-            tensor_shape.vector(num_elements),
-            tensor_shape.vector(self._row_shape.shape[0] + 1))
+    return tensor_shape.vector(None).concatenate(self._row_shape)
 
   @property
   def output_types(self):
-    return (dtypes.int64, self._input_dataset.output_types, dtypes.int64)
+    return self._input_dataset.output_types
 
 
 class _RestructuredDataset(dataset_ops.Dataset):
@@ -390,17 +388,12 @@ def map_and_batch(map_func, batch_size, num_parallel_batches=1):
   """Fused implementation of `map` and `batch`.
 
   Maps `map_func` across `batch_size` consecutive elements of this dataset
-  and then combines them into a batch. Similarly to `batch_and_drop_remainder`,
-  if the batch size does not evenly divide the input dataset size, this
-  transformation will drop the final smaller element.
-
-
-  Functionally, it is equivalent to `map` followed by
-  `batch_and_drop_remainder`. However, by fusing the two transformations
-  together, the implementation can be more efficient. This transformation is a
-  stop gap solution for performance critical workloads. Once automatic input
-  pipeline optimization are implemented, the fusing of map and batch will not
-  need to be exposed at the API level and this method will be removed.
+  and then combines them into a batch. Functionally, it is equivalent to `map`
+  followed by `batch`. However, by fusing the two transformations together, the
+  implementation can be more efficient. Surfacing this transformation in the API
+  is temporary. Once automatic input pipeline optimization is implemented,
+  the fusing of `map` and `batch` will happen automatically and this API will be
+  deprecated.
 
   Args:
     map_func: A function mapping a nested structure of tensors to another
@@ -414,7 +407,7 @@ def map_and_batch(map_func, batch_size, num_parallel_batches=1):
 
   Returns:
     A `Dataset` transformation function, which can be passed to
-    @{tf.contrib.data.Dataset.apply}.
+    @{tf.data.Dataset.apply}.
   """
 
   def _apply_fn(dataset):
diff --git a/tensorflow/contrib/data/python/ops/dataset_ops.py b/tensorflow/contrib/data/python/ops/dataset_ops.py
index 626a9e0edcea5928b1636c1a2a86e83657c966a5..fafd231061a9108b2585f4fc9256b6f069b7c37a 100644
--- a/tensorflow/contrib/data/python/ops/dataset_ops.py
+++ b/tensorflow/contrib/data/python/ops/dataset_ops.py
@@ -364,7 +364,7 @@ class Dataset(dataset_ops.Dataset):
     When reading a single input file, you can skip elements as follows:
 
     ```python
-    d = tf.contrib.data.TFRecordDataset(FLAGS.input_file)
+    d = tf.data.TFRecordDataset(FLAGS.input_file)
     d = d.shard(FLAGS.num_workers, FLAGS.worker_index)
     d = d.repeat(FLAGS.num_epochs)
     d = d.shuffle(FLAGS.shuffle_buffer_size)
@@ -382,12 +382,11 @@ class Dataset(dataset_ops.Dataset):
       sharding strategy within a complete pipeline:
 
     ```python
-    d = Dataset.list_files(FLAGS.pattern)
+    d = tf.data.Dataset.list_files(FLAGS.pattern)
     d = d.shard(FLAGS.num_workers, FLAGS.worker_index)
     d = d.repeat(FLAGS.num_epochs)
     d = d.shuffle(FLAGS.shuffle_buffer_size)
-    d = d.repeat()
-    d = d.interleave(tf.contrib.data.TFRecordDataset,
+    d = d.interleave(tf.data.TFRecordDataset,
                      cycle_length=FLAGS.num_readers, block_length=1)
     d = d.map(parser_fn, num_parallel_calls=FLAGS.num_map_threads)
     ```
@@ -549,7 +548,7 @@ class Dataset(dataset_ops.Dataset):
     elements are produced. `cycle_length` controls the number of input elements
     that are processed concurrently. If you set `cycle_length` to 1, this
     transformation will handle one input element at a time, and will produce
-    identical results = to @{tf.contrib.data.Dataset.flat_map}. In general,
+    identical results = to @{tf.data.Dataset.flat_map}. In general,
     this transformation will apply `map_func` to `cycle_length` input elements,
     open iterators on the returned `Dataset` objects, and cycle through them
     producing `block_length` consecutive elements from each iterator, and
diff --git a/tensorflow/contrib/data/python/ops/interleave_ops.py b/tensorflow/contrib/data/python/ops/interleave_ops.py
index 53324e06e7f1dc249388410f0e14e42336630cd1..3124ca1d1540e12d949dded88ce1c66181be3595 100644
--- a/tensorflow/contrib/data/python/ops/interleave_ops.py
+++ b/tensorflow/contrib/data/python/ops/interleave_ops.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import convert
 from tensorflow.python.data.util import nest
 from tensorflow.python.data.util import sparse
 from tensorflow.python.framework import dtypes
@@ -31,7 +32,7 @@ class ParallelInterleaveDataset(dataset_ops.Dataset):
   """A `Dataset` that maps a function over its input and flattens the result."""
 
   def __init__(self, input_dataset, map_func, cycle_length, block_length,
-               sloppy):
+               sloppy, buffer_output_elements, prefetch_input_elements):
     """See `tf.contrib.data.parallel_interleave()` for details."""
     super(ParallelInterleaveDataset, self).__init__()
     self._input_dataset = input_dataset
@@ -74,6 +75,14 @@ class ParallelInterleaveDataset(dataset_ops.Dataset):
         block_length, dtype=dtypes.int64, name="block_length")
     self._sloppy = ops.convert_to_tensor(
         sloppy, dtype=dtypes.bool, name="sloppy")
+    self._buffer_output_elements = convert.optional_param_to_tensor(
+        "buffer_output_elements",
+        buffer_output_elements,
+        argument_default=2 * block_length)
+    self._prefetch_input_elements = convert.optional_param_to_tensor(
+        "prefetch_input_elements",
+        prefetch_input_elements,
+        argument_default=2 * cycle_length)
 
   def _as_variant_tensor(self):
     return gen_dataset_ops.parallel_interleave_dataset(
@@ -82,6 +91,8 @@ class ParallelInterleaveDataset(dataset_ops.Dataset):
         self._cycle_length,
         self._block_length,
         self._sloppy,
+        self._buffer_output_elements,
+        self._prefetch_input_elements,
         f=self._map_func,
         output_types=nest.flatten(
             sparse.as_dense_types(self.output_types, self.output_classes)),
@@ -101,7 +112,12 @@ class ParallelInterleaveDataset(dataset_ops.Dataset):
     return self._output_types
 
 
-def parallel_interleave(map_func, cycle_length, block_length=1, sloppy=False):
+def parallel_interleave(map_func,
+                        cycle_length,
+                        block_length=1,
+                        sloppy=False,
+                        buffer_output_elements=None,
+                        prefetch_input_elements=None):
   """A parallel version of the `Dataset.interleave()` transformation.
 
   `parallel_interleave()` maps `map_func` across its input to produce nested
@@ -129,12 +145,17 @@ def parallel_interleave(map_func, cycle_length, block_length=1, sloppy=False):
 
   Args:
     map_func: A function mapping a nested structure of tensors to a `Dataset`.
-    cycle_length: The number of threads to interleave from in parallel.
-    block_length: The number of consecutive elements to pull from a thread
-      before advancing to the next thread.
+    cycle_length: The number of input `Dataset`s to interleave from in parallel.
+    block_length: The number of consecutive elements to pull from an input
+      `Dataset` before advancing to the next input `Dataset`.
     sloppy: If false, elements are produced in deterministic order. Otherwise,
       the implementation is allowed, for the sake of expediency, to produce
       elements in a non-deterministic order.
+    buffer_output_elements: The number of elements each iterator being
+      interleaved should buffer (similar to the `.prefetch()` transformation for
+      each interleaved iterator).
+    prefetch_input_elements: The number of input elements to transform to
+      iterators before they are needed for interleaving.
 
   Returns:
     A `Dataset` transformation function, which can be passed to
@@ -142,7 +163,9 @@ def parallel_interleave(map_func, cycle_length, block_length=1, sloppy=False):
   """
   def _apply_fn(dataset):
     return ParallelInterleaveDataset(
-        dataset, map_func, cycle_length, block_length, sloppy)
+        dataset, map_func, cycle_length, block_length, sloppy,
+        buffer_output_elements, prefetch_input_elements)
+
   return _apply_fn
 
 
@@ -187,11 +210,11 @@ def sloppy_interleave(map_func, cycle_length, block_length=1):
     map_func: A function mapping a nested structure of tensors (having shapes
       and types defined by `self.output_shapes` and `self.output_types`) to a
       `Dataset`.
-    cycle_length: The number of threads to interleave from in parallel.
-    block_length: The number of consecutive elements to pull from a thread
-      before advancing to the next thread. Note: sloppy_interleave will
-      skip the remainder of elements in the block_length in order to avoid
-      blocking.
+    cycle_length: The number of input `Dataset`s to interleave from in parallel.
+    block_length: The number of consecutive elements to pull from an input
+      `Dataset` before advancing to the next input `Dataset`. Note:
+      `sloppy_interleave` will skip the remainder of elements in the
+      `block_length` in order to avoid blocking.
 
   Returns:
     A `Dataset` transformation function, which can be passed to
@@ -199,5 +222,12 @@ def sloppy_interleave(map_func, cycle_length, block_length=1):
   """
   def _apply_fn(dataset):
     return ParallelInterleaveDataset(
-        dataset, map_func, cycle_length, block_length, sloppy=True)
+        dataset,
+        map_func,
+        cycle_length,
+        block_length,
+        sloppy=True,
+        buffer_output_elements=None,
+        prefetch_input_elements=None)
+
   return _apply_fn
diff --git a/tensorflow/contrib/data/python/ops/random_ops.py b/tensorflow/contrib/data/python/ops/random_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d727165feabb101549567f28a2dfa07083de244
--- /dev/null
+++ b/tensorflow/contrib/data/python/ops/random_ops.py
@@ -0,0 +1,67 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Datasets for random number generators."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import nest
+from tensorflow.python.data.util import sparse
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import random_seed
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import gen_dataset_ops
+
+
+class RandomDataset(dataset_ops.Dataset):
+  """A `Dataset` of pseudorandom values."""
+
+  def __init__(self, seed=None):
+    """A `Dataset` of pseudorandom values."""
+    super(RandomDataset, self).__init__()
+    seed, seed2 = random_seed.get_seed(seed)
+    if seed is None:
+      self._seed = constant_op.constant(0, dtype=dtypes.int64, name="seed")
+    else:
+      self._seed = ops.convert_to_tensor(seed, dtype=dtypes.int64, name="seed")
+    if seed2 is None:
+      self._seed2 = constant_op.constant(0, dtype=dtypes.int64, name="seed2")
+    else:
+      self._seed2 = ops.convert_to_tensor(
+          seed2, dtype=dtypes.int64, name="seed2")
+
+  def _as_variant_tensor(self):
+    return gen_dataset_ops.random_dataset(
+        seed=self._seed,
+        seed2=self._seed2,
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)),
+        output_types=nest.flatten(
+            sparse.as_dense_types(self.output_types, self.output_classes)))
+
+  @property
+  def output_classes(self):
+    return ops.Tensor
+
+  @property
+  def output_shapes(self):
+    return tensor_shape.scalar()
+
+  @property
+  def output_types(self):
+    return dtypes.int64
diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py
index acb7a43211482f9cdeed66542abab5dbde78d60e..347e5edc7b0d479dfa260e8cec500ffaaba375be 100644
--- a/tensorflow/contrib/data/python/ops/readers.py
+++ b/tensorflow/contrib/data/python/ops/readers.py
@@ -179,6 +179,7 @@ def read_batch_features(file_pattern,
     dataset = dataset.shuffle(capacity)
   dataset = dataset.batch(batch_size)
   dataset = dataset.map(lambda x: parsing_ops.parse_example(x, features))
+  dataset = dataset.prefetch(1)
   iterator = dataset.make_one_shot_iterator()
   outputs = iterator.get_next()
   return outputs
diff --git a/tensorflow/contrib/data/python/ops/scan_ops.py b/tensorflow/contrib/data/python/ops/scan_ops.py
index 2744786e9eec4c9268ba854df6ea761339bb0b4e..1c88366273f5d186509454188e02350d4ea9f66b 100644
--- a/tensorflow/contrib/data/python/ops/scan_ops.py
+++ b/tensorflow/contrib/data/python/ops/scan_ops.py
@@ -188,7 +188,7 @@ def scan(initial_state, scan_func):
 
   Returns:
     A `Dataset` transformation function, which can be passed to
-    @{tf.contrib.data.Dataset.apply}.
+    @{tf.data.Dataset.apply}.
   """
   def _apply_fn(dataset):
     return _ScanDataset(dataset, initial_state, scan_func)
diff --git a/tensorflow/contrib/data/python/ops/shuffle_ops.py b/tensorflow/contrib/data/python/ops/shuffle_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..99bb79bc06a421f811869ca9169aaa11deaca2f3
--- /dev/null
+++ b/tensorflow/contrib/data/python/ops/shuffle_ops.py
@@ -0,0 +1,120 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Experimental shuffle ops."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import nest
+from tensorflow.python.data.util import sparse
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import random_seed
+from tensorflow.python.ops import gen_dataset_ops
+
+
+class _ShuffleAndRepeatDataset(dataset_ops.Dataset):
+  """A `Dataset` that fuses `shuffle` and `repeat`."""
+
+  def __init__(self,
+               input_dataset,
+               buffer_size,
+               count=None,
+               seed=None):
+    """See `Dataset.map()` for details."""
+    super(_ShuffleAndRepeatDataset, self).__init__()
+    self._input_dataset = input_dataset
+    self._buffer_size = ops.convert_to_tensor(
+        buffer_size, dtype=dtypes.int64, name="buffer_size")
+    if count is None:
+      self._count = constant_op.constant(-1, dtype=dtypes.int64, name="count")
+    else:
+      self._count = ops.convert_to_tensor(
+          count, dtype=dtypes.int64, name="count")
+
+    seed, seed2 = random_seed.get_seed(seed)
+    if seed is None:
+      self._seed = constant_op.constant(0, dtype=dtypes.int64, name="seed")
+    else:
+      self._seed = ops.convert_to_tensor(seed, dtype=dtypes.int64, name="seed")
+    if seed2 is None:
+      self._seed2 = constant_op.constant(0, dtype=dtypes.int64, name="seed2")
+    else:
+      self._seed2 = ops.convert_to_tensor(
+          seed2, dtype=dtypes.int64, name="seed2")
+
+  def _as_variant_tensor(self):
+    # pylint: disable=protected-access
+    input_resource = self._input_dataset._as_variant_tensor()
+    return gen_dataset_ops.shuffle_and_repeat_dataset(
+        input_resource,
+        buffer_size=self._buffer_size,
+        count=self._count,
+        seed=self._seed,
+        seed2=self._seed2,
+        output_types=nest.flatten(
+            sparse.as_dense_types(self.output_types, self.output_classes)),
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)))
+    # pylint: enable=protected-access
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
+
+
+def shuffle_and_repeat(buffer_size, count=None, seed=None):
+  """Shuffles and repeats a Dataset returning a new permutation for each epoch.
+
+  `dataset.apply(tf.contrib.data.shuffle_and_repeat(buffer_size, count))`
+
+  is equivalent to
+
+  `dataset.shuffle(buffer_size, reshuffle_each_iteration=True).repeat(count)`
+
+  The difference is that the latter dataset is not serializable. So,
+  if you need to checkpoint an input pipeline with reshuffling you must use
+  this implementation.
+
+  Args:
+    buffer_size: A `tf.int64` scalar `tf.Tensor`, representing the
+      maximum number elements that will be buffered when prefetching.
+    count: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the
+      number of times the dataset should be repeated. The default behavior
+      (if `count` is `None` or `-1`) is for the dataset be repeated
+      indefinitely.
+    seed: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the
+      random seed that will be used to create the distribution. See
+      @{tf.set_random_seed} for behavior.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    @{tf.data.Dataset.apply}.
+  """
+
+  def _apply_fn(dataset):  # pylint: disable=missing-docstring
+    return _ShuffleAndRepeatDataset(dataset, buffer_size, count, seed)
+
+  return _apply_fn
diff --git a/tensorflow/contrib/data/python/ops/stats_ops.py b/tensorflow/contrib/data/python/ops/stats_ops.py
index b8875bd533ddc9e2c195646619dccf3aab5225e4..1dd0729513c0d46db25226178eb17b41efaae0ae 100644
--- a/tensorflow/contrib/data/python/ops/stats_ops.py
+++ b/tensorflow/contrib/data/python/ops/stats_ops.py
@@ -117,7 +117,7 @@ def bytes_produced_stats(tag):
 
   Returns:
     A `Dataset` transformation function, which can be passed to
-    @{tf.contrib.data.Dataset.apply}.
+    @{tf.data.Dataset.apply}.
   """
 
   def _apply_fn(dataset):
@@ -139,7 +139,7 @@ def latency_stats(tag):
 
   Returns:
     A `Dataset` transformation function, which can be passed to
-    @{tf.contrib.data.Dataset.apply}.
+    @{tf.data.Dataset.apply}.
   """
 
   def _apply_fn(dataset):
diff --git a/tensorflow/contrib/data/python/ops/unique.py b/tensorflow/contrib/data/python/ops/unique.py
new file mode 100644
index 0000000000000000000000000000000000000000..133e17d20d0fc4c8d52cef3c95c132374e927a0b
--- /dev/null
+++ b/tensorflow/contrib/data/python/ops/unique.py
@@ -0,0 +1,82 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Unique element dataset transformations."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import nest
+from tensorflow.python.data.util import sparse
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import gen_dataset_ops
+
+
+def unique():
+  """Creates a `Dataset` from another `Dataset`, discarding duplicates.
+
+  Use this transformation to produce a dataset that contains one instance of
+  each unique element in the input. For example:
+
+  ```python
+  dataset = tf.data.Dataset.from_tensor_slices([1, 37, 2, 37, 2, 1])
+
+  # Using `unique()` will drop the duplicate elements.
+  dataset = dataset.apply(tf.contrib.data.unique())  # ==> { 1, 37, 2 }
+  ```
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    @{tf.data.Dataset.apply}.
+  """
+
+  def _apply_fn(dataset):
+    return UniqueDataset(dataset)
+
+  return _apply_fn
+
+
+class UniqueDataset(dataset_ops.Dataset):
+  """A `Dataset` contains the unique elements from its input."""
+
+  def __init__(self, input_dataset):
+    """See `unique()` for details."""
+    super(UniqueDataset, self).__init__()
+    self._input_dataset = input_dataset
+    if input_dataset.output_types not in (dtypes.int32, dtypes.int64,
+                                          dtypes.string):
+      raise TypeError(
+          "`tf.contrib.data.unique()` only supports inputs with a single "
+          "`tf.int32`, `tf.int64`, or `tf.string` component.")
+
+  def _as_variant_tensor(self):
+    return gen_dataset_ops.unique_dataset(
+        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+        output_shapes=nest.flatten(
+            sparse.as_dense_shapes(self.output_shapes, self.output_classes)),
+        output_types=nest.flatten(
+            sparse.as_dense_types(self.output_types, self.output_classes)))
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
diff --git a/tensorflow/contrib/decision_trees/proto/BUILD b/tensorflow/contrib/decision_trees/proto/BUILD
index 87c80740a8f0c0721394b5d832bc96e548e3a313..f6de5998d73a4869d2444cd90c9b64d1a2c889ac 100644
--- a/tensorflow/contrib/decision_trees/proto/BUILD
+++ b/tensorflow/contrib/decision_trees/proto/BUILD
@@ -7,7 +7,11 @@ exports_files([
     "generic_tree_model_proto.swig",
 ])
 
-load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library")
+load(
+    "//tensorflow/core:platform/default/build_config.bzl",
+    "tf_proto_library",
+    "tf_pyclif_proto_library",
+)
 
 filegroup(
     name = "all_files",
@@ -34,3 +38,10 @@ tf_proto_library(
     protodeps = [":generic_tree_model"],
     visibility = ["//visibility:public"],
 )
+
+tf_pyclif_proto_library(
+    name = "generic_tree_model_pyclif",
+    proto_lib = ":generic_tree_model",
+    proto_srcfile = "generic_tree_model.proto",
+    visibility = ["//visibility:public"],
+)
diff --git a/tensorflow/contrib/decision_trees/proto/generic_tree_model_proto.swig b/tensorflow/contrib/decision_trees/proto/generic_tree_model_proto.swig
index d3d201afd5761e7c5c136301c779222bedc68492..cafb9314caee1c4907786b8101e7c71bd7095306 100644
--- a/tensorflow/contrib/decision_trees/proto/generic_tree_model_proto.swig
+++ b/tensorflow/contrib/decision_trees/proto/generic_tree_model_proto.swig
@@ -2,7 +2,7 @@
 
 %include "net/proto/swig/protofunc.swig"
 
-#ifndef MUST_USE_RESULT
+#ifndef ABSL_MUST_USE_RESULT
 #error Use this file only as a %include or %import after google.swig.
 #endif
 
diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD
index b2c641f8ab3ea23c5135042e4b1223d487ae8cbc..95848af69950bdaa680c41daecd8cbd8f3174f8e 100644
--- a/tensorflow/contrib/distributions/BUILD
+++ b/tensorflow/contrib/distributions/BUILD
@@ -60,6 +60,7 @@ py_library(
         "//tensorflow/python:nn",
         "//tensorflow/python:nn_ops",
         "//tensorflow/python:random_ops",
+        "//tensorflow/python:spectral_ops",
         "//tensorflow/python:state_ops",
         "//tensorflow/python:tensor_util",
         "//tensorflow/python:util",
@@ -437,6 +438,7 @@ cuda_py_test(
         "//tensorflow/python:framework",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:spectral_ops_test_util",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:nn_ops",
         "//tensorflow/python:platform_test",
diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py
index 66827179e9fa1bea852f55246c263c4696cf3bdc..7b401e178f35fe56e4eb461936565f5c630ec4cf 100644
--- a/tensorflow/contrib/distributions/__init__.py
+++ b/tensorflow/contrib/distributions/__init__.py
@@ -159,6 +159,10 @@ _allowed_symbols = [
     'assign_log_moving_mean_exp',
     'moving_mean_variance',
     'estimator_head_distribution_regression',
+    'quadrature_scheme_softmaxnormal_gauss_hermite',
+    'quadrature_scheme_softmaxnormal_quantiles',
+    'quadrature_scheme_lognormal_gauss_hermite',
+    'quadrature_scheme_lognormal_quantiles',
 ]
 
 remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/masked_autoregressive_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/masked_autoregressive_test.py
index 25a9b6f5fe2ed6d218d6b44650fce17fa89c0664..dcfb0eb05185d36d96947905c2eb91b2201aece1 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/masked_autoregressive_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/masked_autoregressive_test.py
@@ -22,9 +22,9 @@ import numpy as np
 
 from tensorflow.contrib.distributions.python.ops import test_util
 from tensorflow.contrib.distributions.python.ops.bijectors.invert import Invert
+from tensorflow.contrib.distributions.python.ops.bijectors.masked_autoregressive import _gen_mask
 from tensorflow.contrib.distributions.python.ops.bijectors.masked_autoregressive import masked_autoregressive_default_template
 from tensorflow.contrib.distributions.python.ops.bijectors.masked_autoregressive import MaskedAutoregressiveFlow
-from tensorflow.contrib.distributions.python.ops.bijectors.masked_autoregressive_impl import _gen_mask
 from tensorflow.python.framework import constant_op
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import variables
@@ -149,5 +149,17 @@ class MaskedAutoregressiveFlowShiftOnlyTest(MaskedAutoregressiveFlowTest):
     }
 
 
+class MaskedAutoregressiveFlowUnrollLoopTest(MaskedAutoregressiveFlowTest):
+
+  @property
+  def _autoregressive_flow_kwargs(self):
+    return {
+        "shift_and_log_scale_fn": masked_autoregressive_default_template(
+            hidden_layers=[2], shift_only=False),
+        "is_constant_jacobian": False,
+        "unroll_loop": True,
+    }
+
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/reshape_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/reshape_test.py
index 38b3a23c2d684a6f89b7c4be4a763c649bf4de15..49451446b56d290f130c5db90c13b94974d92dc9 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/reshape_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/reshape_test.py
@@ -28,8 +28,19 @@ from tensorflow.python.ops.distributions.bijector_test_util import assert_biject
 from tensorflow.python.platform import test
 
 
-class ReshapeBijectorTest(test.TestCase):
-  """Tests correctness of the reshape transformation."""
+class _ReshapeBijectorTest(object):
+  """Base class for testing the reshape transformation.
+
+  Methods defined in this class call a method self.build_shapes() that
+  is implemented by subclasses defined below, returning respectively
+   ReshapeBijectorTestStatic: static shapes,
+   ReshapeBijectorTestDynamic: shape placeholders of known ndims, and
+   ReshapeBijectorTestDynamicNdims: shape placeholders of unspecified ndims,
+  so that each test in this base class is automatically run over all
+  three cases. The subclasses also implement assertRaisesError to test
+  for either Python exceptions (in the case of static shapes) or
+  TensorFlow op errors (dynamic shapes).
+  """
 
   def setUp(self):
     self._rng = np.random.RandomState(42)
@@ -40,9 +51,10 @@ class ReshapeBijectorTest(test.TestCase):
     expected_y = np.reshape(expected_x, [4, 6])
 
     with self.test_session() as sess:
+      shape_in, shape_out, feed_dict = self.build_shapes([3, 2], [6,])
       bijector = Reshape(
-          event_shape_out=[6,],
-          event_shape_in=[3, 2],
+          event_shape_out=shape_out,
+          event_shape_in=shape_in,
           validate_args=True)
       (x_,
        y_,
@@ -52,66 +64,23 @@ class ReshapeBijectorTest(test.TestCase):
            bijector.forward(expected_x),
            bijector.forward_log_det_jacobian(expected_x),
            bijector.inverse_log_det_jacobian(expected_y),
-       ))
+       ), feed_dict=feed_dict)
       self.assertEqual("reshape", bijector.name)
       self.assertAllClose(expected_y, y_, rtol=1e-6, atol=0)
       self.assertAllClose(expected_x, x_, rtol=1e-6, atol=0)
       self.assertAllClose(0., fldj_, rtol=1e-6, atol=0)
       self.assertAllClose(0., ildj_, rtol=1e-6, atol=0)
 
-  def testEventShapeDynamicNdims(self):
-    """Check forward/inverse shape methods with dynamic ndims."""
-
-    shape_in = tensor_shape.TensorShape([6,])
-    shape_in_ph = array_ops.placeholder(dtype=dtypes.int32)
-
-    shape_out = tensor_shape.TensorShape([2, 3])
-    shape_out_ph = array_ops.placeholder(dtype=dtypes.int32)
-
-    bijector = Reshape(
-        event_shape_out=shape_out_ph,
-        event_shape_in=shape_in_ph, validate_args=True)
-
-    # using the _tensor methods, we should always get a fully-specified
-    # result since these are evaluated at graph runtime.
-    with self.test_session() as sess:
-      (shape_out_,
-       shape_in_) = sess.run((
-           bijector.forward_event_shape_tensor(shape_in),
-           bijector.inverse_event_shape_tensor(shape_out),
-       ), feed_dict={
-           shape_in_ph: shape_in,
-           shape_out_ph: shape_out,
-       })
-      self.assertAllEqual(shape_out, shape_out_)
-      self.assertAllEqual(shape_in, shape_in_)
-
-  def testEventShapeDynamic(self):
-    """Check shape methods with static ndims but dynamic shape."""
-
-    shape_in = tensor_shape.TensorShape([6,])
-    shape_in_partial = tensor_shape.TensorShape([None,])
-    shape_in_ph = array_ops.placeholder(
-        shape=[1,], dtype=dtypes.int32)
-
-    shape_out = tensor_shape.TensorShape([2, 3])
-    shape_out_partial = tensor_shape.TensorShape([None, None])
-    shape_out_ph = array_ops.placeholder(
-        shape=[2,], dtype=dtypes.int32)
+  def testEventShapeTensor(self):
+    """Test event_shape_tensor methods when even ndims may be dynamic."""
 
+    shape_in_static = [2, 3]
+    shape_out_static = [6,]
+    shape_in, shape_out, feed_dict = self.build_shapes(shape_in_static,
+                                                       shape_out_static)
     bijector = Reshape(
-        event_shape_out=shape_out_ph,
-        event_shape_in=shape_in_ph,
-        validate_args=True)
-
-    # if event shapes are not statically available, should
-    # return partially-specified TensorShapes.
-    self.assertAllEqual(
-        bijector.forward_event_shape(shape_in).as_list(),
-        shape_out_partial.as_list())
-    self.assertAllEqual(
-        bijector.inverse_event_shape(shape_out).as_list(),
-        shape_in_partial.as_list())
+        event_shape_out=shape_out,
+        event_shape_in=shape_in, validate_args=True)
 
     # using the _tensor methods, we should always get a fully-specified
     # result since these are evaluated at graph runtime.
@@ -120,42 +89,9 @@ class ReshapeBijectorTest(test.TestCase):
        shape_in_) = sess.run((
            bijector.forward_event_shape_tensor(shape_in),
            bijector.inverse_event_shape_tensor(shape_out),
-       ), feed_dict={
-           shape_in_ph: shape_in,
-           shape_out_ph: shape_out,
-       })
-      self.assertAllEqual(shape_out, shape_out_)
-      self.assertAllEqual(shape_in, shape_in_)
-
-  def testEventShapeStatic(self):
-    """Check shape methods when shape is statically known."""
-
-    shape_in = tensor_shape.TensorShape([6,])
-    shape_out = tensor_shape.TensorShape([2, 3])
-
-    bijector_static = Reshape(
-        event_shape_out=shape_out,
-        event_shape_in=shape_in,
-        validate_args=True)
-
-    # test that forward_ and inverse_event_shape do sensible things
-    # when shapes are statically known.
-    self.assertEqual(
-        bijector_static.forward_event_shape(shape_in),
-        shape_out)
-    self.assertEqual(
-        bijector_static.inverse_event_shape(shape_out),
-        shape_in)
-
-    with self.test_session() as sess:
-      (shape_out_static_,
-       shape_in_static_,
-      ) = sess.run((
-          bijector_static.forward_event_shape_tensor(shape_in),
-          bijector_static.inverse_event_shape_tensor(shape_out),
-      ))
-      self.assertAllEqual(shape_out, shape_out_static_)
-      self.assertAllEqual(shape_in, shape_in_static_)
+       ), feed_dict=feed_dict)
+      self.assertAllEqual(shape_out_static, shape_out_)
+      self.assertAllEqual(shape_in_static, shape_in_)
 
   def testScalarReshape(self):
     """Test reshaping to and from a scalar shape ()."""
@@ -166,11 +102,11 @@ class ReshapeBijectorTest(test.TestCase):
     expected_x_scalar = np.random.randn(1,)
     expected_y_scalar = expected_x_scalar[0]
 
+    shape_in, shape_out, feed_dict = self.build_shapes([], [1,])
     with self.test_session() as sess:
       bijector = Reshape(
-          event_shape_out=[],
-          event_shape_in=[1,], validate_args=True)
-
+          event_shape_out=shape_in,
+          event_shape_in=shape_out, validate_args=True)
       (x_,
        y_,
        x_scalar_,
@@ -180,53 +116,178 @@ class ReshapeBijectorTest(test.TestCase):
           bijector.forward(expected_x),
           bijector.inverse(expected_y_scalar),
           bijector.forward(expected_x_scalar),
-      ))
+      ), feed_dict=feed_dict)
       self.assertAllClose(expected_y, y_, rtol=1e-6, atol=0)
       self.assertAllClose(expected_x, x_, rtol=1e-6, atol=0)
       self.assertAllClose(expected_y_scalar, y_scalar_, rtol=1e-6, atol=0)
       self.assertAllClose(expected_x_scalar, x_scalar_, rtol=1e-6, atol=0)
 
-  def testRaisesOpError(self):
-    x1 = np.random.randn(4, 2, 3)
-    x2 = np.random.randn(4, 3, 2)
-    x3 = np.random.randn(4, 5, 1, 1)
+  def testMultipleUnspecifiedDimensionsOpError(self):
 
     with self.test_session() as sess:
-      shape_in_ph = array_ops.placeholder(shape=[2,], dtype=dtypes.int32)
-      shape_out_ph = array_ops.placeholder(shape=[3,], dtype=dtypes.int32)
+      shape_in, shape_out, feed_dict = self.build_shapes([2, 3], [4, -1, -1,])
       bijector = Reshape(
-          event_shape_out=shape_out_ph,
-          event_shape_in=shape_in_ph,
+          event_shape_out=shape_out,
+          event_shape_in=shape_in,
           validate_args=True)
 
-      with self.assertRaisesOpError(
+      with self.assertRaisesError(
+          "elements must have at most one `-1`."):
+        sess.run(bijector.forward_event_shape_tensor(shape_in),
+                 feed_dict=feed_dict)
+
+  def testInvalidDimensionsOpError(self):
+
+    with self.test_session() as sess:
+
+      shape_in, shape_out, feed_dict = self.build_shapes([2, 3], [1, 2, -2,])
+      bijector = Reshape(
+          event_shape_out=shape_out,
+          event_shape_in=shape_in,
+          validate_args=True)
+
+      with self.assertRaisesError(
+          "elements must be either positive integers or `-1`."):
+        sess.run(bijector.forward_event_shape_tensor(shape_in),
+                 feed_dict=feed_dict)
+
+  def testValidButNonMatchingInputOpError(self):
+    x = np.random.randn(4, 3, 2)
+
+    with self.test_session() as sess:
+      shape_in, shape_out, feed_dict = self.build_shapes([2, 3], [1, 6, 1,])
+      bijector = Reshape(
+          event_shape_out=shape_out,
+          event_shape_in=shape_in,
+          validate_args=True)
+
+      # Here we pass in a tensor (x) whose shape is compatible with
+      # the output shape, so tf.reshape will throw no error, but
+      # doesn't match the expected input shape.
+      with self.assertRaisesError(
           "Input `event_shape` does not match `event_shape_in`."):
-        sess.run(bijector.forward(x2),
-                 feed_dict={shape_out_ph: [1, 6, 1],
-                            shape_in_ph: [2, 3]})
+        sess.run(bijector.forward(x),
+                 feed_dict=feed_dict)
 
-      with self.assertRaisesOpError(
-          "event_shape_out entries must be positive."):
-        sess.run(bijector.forward(x1),
-                 feed_dict={shape_out_ph: [-1, -1, 6],
-                            shape_in_ph: [2, 3]})
+  def testValidButNonMatchingInputPartiallySpecifiedOpError(self):
+    x = np.random.randn(4, 3, 2)
+
+    with self.test_session() as sess:
+      shape_in, shape_out, feed_dict = self.build_shapes([2, -1], [1, 6, 1,])
+      bijector = Reshape(
+          event_shape_out=shape_out,
+          event_shape_in=shape_in,
+          validate_args=True)
+
+      with self.assertRaisesError(
+          "Input `event_shape` does not match `event_shape_in`."):
+        sess.run(bijector.forward(x),
+                 feed_dict=feed_dict)
+
+  def testInputOutputMismatchOpError(self):
+    x1 = np.random.randn(4, 2, 3)
+    x2 = np.random.randn(4, 1, 1, 5)
+
+    with self.test_session() as sess:
+      shape_in, shape_out, fd_mismatched = self.build_shapes([2, 3],
+                                                             [1, 1, 5])
+      bijector = Reshape(
+          event_shape_out=shape_out,
+          event_shape_in=shape_in,
+          validate_args=True)
 
       # test that *all* methods check basic assertions
-      fd_mismatched = {shape_out_ph: [1, 1, 5], shape_in_ph: [2, 3]}
-      with self.assertRaisesOpError(
-          "Input/output `event_size`s do not match."):
+      with self.assertRaisesError(
+          "Input to reshape is a tensor with"):
         sess.run(bijector.forward(x1), feed_dict=fd_mismatched)
-      with self.assertRaisesOpError(
-          "Input/output `event_size`s do not match."):
-        sess.run(bijector.inverse(x3), feed_dict=fd_mismatched)
-      with self.assertRaisesOpError(
-          "Input/output `event_size`s do not match."):
-        sess.run(bijector.inverse_log_det_jacobian(x3),
-                 feed_dict=fd_mismatched)
-      with self.assertRaisesOpError(
-          "Input/output `event_size`s do not match."):
-        sess.run(bijector.forward_log_det_jacobian(x1),
-                 feed_dict=fd_mismatched)
+      with self.assertRaisesError(
+          "Input to reshape is a tensor with"):
+        sess.run(bijector.inverse(x2), feed_dict=fd_mismatched)
+
+  def testOneShapePartiallySpecified(self):
+    expected_x = np.random.randn(4, 6)
+    expected_y = np.reshape(expected_x, [4, 2, 3])
+
+    with self.test_session() as sess:
+      # one of input/output shapes is partially specified
+      shape_in, shape_out, feed_dict = self.build_shapes([-1,], [2, 3])
+      bijector = Reshape(
+          event_shape_out=shape_out,
+          event_shape_in=shape_in,
+          validate_args=True)
+      (x_,
+       y_,
+      ) = sess.run((
+          bijector.inverse(expected_y),
+          bijector.forward(expected_x),
+      ), feed_dict=feed_dict)
+      self.assertAllClose(expected_y, y_, rtol=1e-6, atol=0)
+      self.assertAllClose(expected_x, x_, rtol=1e-6, atol=0)
+
+  def testBothShapesPartiallySpecified(self):
+    expected_x = np.random.randn(4, 2, 3)
+    expected_y = np.reshape(expected_x, [4, 3, 2])
+    with self.test_session() as sess:
+      shape_in, shape_out, feed_dict = self.build_shapes([-1, 3], [-1, 2])
+      bijector = Reshape(
+          event_shape_out=shape_out,
+          event_shape_in=shape_in,
+          validate_args=True)
+      (x_,
+       y_,
+      ) = sess.run((
+          bijector.inverse(expected_y),
+          bijector.forward(expected_x),
+      ), feed_dict=feed_dict)
+      self.assertAllClose(expected_y, y_, rtol=1e-6, atol=0)
+      self.assertAllClose(expected_x, x_, rtol=1e-6, atol=0)
+
+  def testDefaultVectorShape(self):
+    expected_x = np.random.randn(4, 4)
+    expected_y = np.reshape(expected_x, [4, 2, 2])
+    with self.test_session() as sess:
+      _, shape_out, feed_dict = self.build_shapes([-1,], [-1, 2])
+      bijector = Reshape(shape_out,
+                         validate_args=True)
+      (x_,
+       y_,
+      ) = sess.run((
+          bijector.inverse(expected_y),
+          bijector.forward(expected_x),
+      ), feed_dict=feed_dict)
+      self.assertAllClose(expected_y, y_, rtol=1e-6, atol=0)
+      self.assertAllClose(expected_x, x_, rtol=1e-6, atol=0)
+
+  def build_shapes(self, *args, **kwargs):
+    raise NotImplementedError("Subclass failed to implement `build_shapes`.")
+
+
+class ReshapeBijectorTestStatic(test.TestCase, _ReshapeBijectorTest):
+
+  def build_shapes(self, shape_in, shape_out):
+    shape_in_static = shape_in
+    shape_out_static = shape_out
+    feed_dict = {}
+    return shape_in_static, shape_out_static, feed_dict
+
+  def assertRaisesError(self, msg):
+    return self.assertRaisesRegexp(Exception, msg)
+
+  def testEventShape(self):
+    shape_in_static = tensor_shape.TensorShape([2, 3])
+    shape_out_static = tensor_shape.TensorShape([6,])
+    bijector = Reshape(
+        event_shape_out=shape_out_static,
+        event_shape_in=shape_in_static, validate_args=True)
+
+    # test that forward_ and inverse_event_shape do sensible things
+    # when shapes are statically known.
+    self.assertEqual(
+        bijector.forward_event_shape(shape_in_static),
+        shape_out_static)
+    self.assertEqual(
+        bijector.inverse_event_shape(shape_out_static),
+        shape_in_static)
 
   def testBijectiveAndFinite(self):
     x = np.random.randn(4, 2, 3)
@@ -238,5 +299,32 @@ class ReshapeBijectorTest(test.TestCase):
           validate_args=True)
       assert_bijective_and_finite(bijector, x, y, rtol=1e-6, atol=0)
 
+
+class ReshapeBijectorTestDynamic(test.TestCase, _ReshapeBijectorTest):
+
+  def build_shapes(self, shape_in, shape_out):
+    shape_in_ph = array_ops.placeholder(shape=(len(shape_in),),
+                                        dtype=dtypes.int32)
+    shape_out_ph = array_ops.placeholder(shape=(len(shape_out),),
+                                         dtype=dtypes.int32)
+    feed_dict = {shape_in_ph: shape_in, shape_out_ph: shape_out}
+    return shape_in_ph, shape_out_ph, feed_dict
+
+  def assertRaisesError(self, msg):
+    return self.assertRaisesOpError(msg)
+
+
+class ReshapeBijectorTestDynamicNdims(test.TestCase, _ReshapeBijectorTest):
+
+  def build_shapes(self, shape_in, shape_out):
+    shape_in_ph = array_ops.placeholder(shape=None, dtype=dtypes.int32)
+    shape_out_ph = array_ops.placeholder(shape=None, dtype=dtypes.int32)
+    feed_dict = {shape_in_ph: shape_in, shape_out_ph: shape_out}
+    return shape_in_ph, shape_out_ph, feed_dict
+
+  def assertRaisesError(self, msg):
+    return self.assertRaisesOpError(msg)
+
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/distribution_util_test.py b/tensorflow/contrib/distributions/python/kernel_tests/distribution_util_test.py
index 2d74aa1f320149d0f7ef9e9c52b8c7053c2f74d7..a255d4fc890e67180532e342332a8e3f63a869cd 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/distribution_util_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/distribution_util_test.py
@@ -395,5 +395,110 @@ class MixtureStddevTest(test.TestCase):
     self.assertAllClose(actual_devs, expected_devs)
 
 
+class _PadTest(object):
+
+  def testNegAxisCorrectness(self):
+    x_ = np.float32([[1., 2, 3],
+                     [4, 5, 6]])
+    value_ = np.float32(0.25)
+    count_ = np.int32(2)
+    with self.test_session() as sess:
+      x = array_ops.placeholder_with_default(
+          x_, shape=x_.shape if self.is_static_shape else None)
+      value = (constant_op.constant(value_) if self.is_static_shape
+               else array_ops.placeholder_with_default(value_, shape=None))
+      count = (constant_op.constant(count_) if self.is_static_shape
+               else array_ops.placeholder_with_default(count_, shape=None))
+
+      x0_front = distribution_util.pad(
+          x, axis=-2, value=value, count=count, front=True)
+      x0_back = distribution_util.pad(
+          x, axis=-2, count=count, back=True)
+      x0_both = distribution_util.pad(
+          x, axis=-2, value=value, front=True, back=True)
+
+      if self.is_static_shape:
+        self.assertAllEqual([4, 3], x0_front.shape)
+        self.assertAllEqual([4, 3], x0_back.shape)
+        self.assertAllEqual([4, 3], x0_both.shape)
+
+      [x0_front_, x0_back_, x0_both_] = sess.run([
+          x0_front, x0_back, x0_both])
+
+      self.assertAllClose(
+          np.float32([[value_]*3,
+                      [value_]*3,
+                      [1, 2, 3],
+                      [4, 5, 6]]),
+          x0_front_, atol=0., rtol=1e-6)
+      self.assertAllClose(
+          np.float32([[1, 2, 3],
+                      [4, 5, 6],
+                      [0.]*3,
+                      [0.]*3]),
+          x0_back_, atol=0., rtol=1e-6)
+      self.assertAllClose(
+          np.float32([[value_]*3,
+                      [1, 2, 3],
+                      [4, 5, 6],
+                      [value_]*3]),
+          x0_both_, atol=0., rtol=1e-6)
+
+  def testPosAxisCorrectness(self):
+    x_ = np.float32([[1., 2, 3],
+                     [4, 5, 6]])
+    value_ = np.float32(0.25)
+    count_ = np.int32(2)
+    with self.test_session() as sess:
+      x = array_ops.placeholder_with_default(
+          x_, shape=x_.shape if self.is_static_shape else None)
+      value = (constant_op.constant(value_) if self.is_static_shape
+               else array_ops.placeholder_with_default(value_, shape=None))
+      count = (constant_op.constant(count_) if self.is_static_shape
+               else array_ops.placeholder_with_default(count_, shape=None))
+
+      x1_front = distribution_util.pad(
+          x, axis=1, value=value, count=count, front=True)
+      x1_back = distribution_util.pad(
+          x, axis=1, count=count, back=True)
+      x1_both = distribution_util.pad(
+          x, axis=1, value=value, front=True, back=True)
+
+      if self.is_static_shape:
+        self.assertAllEqual([2, 5], x1_front.shape)
+        self.assertAllEqual([2, 5], x1_back.shape)
+        self.assertAllEqual([2, 5], x1_both.shape)
+
+      [x1_front_, x1_back_, x1_both_] = sess.run([
+          x1_front, x1_back, x1_both])
+
+      self.assertAllClose(
+          np.float32([[value_]*2 + [1, 2, 3],
+                      [value_]*2 + [4, 5, 6]]),
+          x1_front_, atol=0., rtol=1e-6)
+      self.assertAllClose(
+          np.float32([[1, 2, 3] + [0.]*2,
+                      [4, 5, 6] + [0.]*2]),
+          x1_back_, atol=0., rtol=1e-6)
+      self.assertAllClose(
+          np.float32([[value_, 1, 2, 3, value_],
+                      [value_, 4, 5, 6, value_]]),
+          x1_both_, atol=0., rtol=1e-6)
+
+
+class PadStaticTest(_PadTest, test.TestCase):
+
+  @property
+  def is_static_shape(self):
+    return True
+
+
+class PadDynamicTest(_PadTest, test.TestCase):
+
+  @property
+  def is_static_shape(self):
+    return False
+
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/half_normal_test.py b/tensorflow/contrib/distributions/python/kernel_tests/half_normal_test.py
index a7571806f295af4566e57ac4a785bc8774fd31ab..a4e75660083dc2edd1759a3a54e221d9e8a268c3 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/half_normal_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/half_normal_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import importlib
 import numpy as np
 
+from tensorflow.contrib.distributions.python.ops import half_normal as hn_lib
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -28,7 +29,6 @@ from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import variables
-from tensorflow.contrib.distributions.python.ops import half_normal as hn_lib
 from tensorflow.python.platform import test
 from tensorflow.python.platform import tf_logging
 
@@ -200,7 +200,7 @@ class HalfNormalTest(test.TestCase):
     with self.test_session():
       scale = np.array([[1.0, 2.0, 3.0]])
       halfnorm = hn_lib.HalfNormal(scale=scale)
-      
+
       # See https://en.wikipedia.org/wiki/Half-normal_distribution for the
       # entropy formula used here.
       expected_entropy = 0.5 * np.log(np.pi * scale ** 2.0 / 2.0) + 0.5
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/mixture_same_family_test.py b/tensorflow/contrib/distributions/python/kernel_tests/mixture_same_family_test.py
index ece6bc077d9e21502fdfd01300a9d3e9f2c9c380..ff6092fc260660b512e8123823c63e98a023af6d 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/mixture_same_family_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/mixture_same_family_test.py
@@ -45,6 +45,17 @@ class MixtureSameFamilyTest(test_util.VectorDistributionTestHelpers,
       self.assertEqual([4, 5], x.shape)
       self.assertEqual([4, 5], log_prob_x.shape)
 
+  def testSampleAndLogProbBatch(self):
+    with self.test_session():
+      gm = mixture_same_family_lib.MixtureSameFamily(
+          mixture_distribution=categorical_lib.Categorical(probs=[[0.3, 0.7]]),
+          components_distribution=normal_lib.Normal(
+              loc=[[-1., 1]], scale=[[0.1, 0.5]]))
+      x = gm.sample([4, 5], seed=42)
+      log_prob_x = gm.log_prob(x)
+      self.assertEqual([4, 5, 1], x.shape)
+      self.assertEqual([4, 5, 1], log_prob_x.shape)
+
   def testSampleAndLogProbShapesBroadcastMix(self):
     mix_probs = np.float32([.3, .7])
     bern_probs = np.float32([[.4, .6], [.25, .75]])
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/poisson_lognormal_test.py b/tensorflow/contrib/distributions/python/kernel_tests/poisson_lognormal_test.py
index 3c0147b8cf6e1b6a2791e85c0c0997992445fa7e..1035cb00f76d95c7c52c3e812e8bb2868d34b890 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/poisson_lognormal_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/poisson_lognormal_test.py
@@ -18,37 +18,40 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import numpy as np
-
 from tensorflow.contrib.distributions.python.ops import poisson_lognormal
 from tensorflow.contrib.distributions.python.ops import test_util
-from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-class PoissonLogNormalQuadratureCompoundTest(
-    test_util.DiscreteScalarDistributionTestHelpers, test.TestCase):
+class _PoissonLogNormalQuadratureCompoundTest(
+    test_util.DiscreteScalarDistributionTestHelpers):
   """Tests the PoissonLogNormalQuadratureCompoundTest distribution."""
 
   def testSampleProbConsistent(self):
     with self.test_session() as sess:
       pln = poisson_lognormal.PoissonLogNormalQuadratureCompound(
-          loc=-2.,
-          scale=1.1,
-          quadrature_grid_and_probs=(
-              np.polynomial.hermite.hermgauss(deg=10)),
+          loc=array_ops.placeholder_with_default(
+              -2.,
+              shape=[] if self.static_shape else None),
+          scale=array_ops.placeholder_with_default(
+              1.1,
+              shape=[] if self.static_shape else None),
+          quadrature_size=10,
           validate_args=True)
       self.run_test_sample_consistent_log_prob(
-          sess.run, pln, rtol=0.1)
+          sess.run, pln, batch_size=1, rtol=0.1)
 
   def testMeanVariance(self):
     with self.test_session() as sess:
       pln = poisson_lognormal.PoissonLogNormalQuadratureCompound(
-          loc=0.,
-          scale=1.,
-          quadrature_grid_and_probs=(
-              np.polynomial.hermite.hermgauss(deg=10)),
+          loc=array_ops.placeholder_with_default(
+              0.,
+              shape=[] if self.static_shape else None),
+          scale=array_ops.placeholder_with_default(
+              1.,
+              shape=[] if self.static_shape else None),
+          quadrature_size=10,
           validate_args=True)
       self.run_test_sample_consistent_mean_variance(
           sess.run, pln, rtol=0.02)
@@ -56,21 +59,27 @@ class PoissonLogNormalQuadratureCompoundTest(
   def testSampleProbConsistentBroadcastScalar(self):
     with self.test_session() as sess:
       pln = poisson_lognormal.PoissonLogNormalQuadratureCompound(
-          loc=[0., -0.5],
-          scale=1.,
-          quadrature_grid_and_probs=(
-              np.polynomial.hermite.hermgauss(deg=10)),
+          loc=array_ops.placeholder_with_default(
+              [0., -0.5],
+              shape=[2] if self.static_shape else None),
+          scale=array_ops.placeholder_with_default(
+              1.,
+              shape=[] if self.static_shape else None),
+          quadrature_size=10,
           validate_args=True)
       self.run_test_sample_consistent_log_prob(
-          sess.run, pln, rtol=0.1, atol=0.01)
+          sess.run, pln, batch_size=2, rtol=0.1, atol=0.01)
 
   def testMeanVarianceBroadcastScalar(self):
     with self.test_session() as sess:
       pln = poisson_lognormal.PoissonLogNormalQuadratureCompound(
-          loc=[0., -0.5],
-          scale=1.,
-          quadrature_grid_and_probs=(
-              np.polynomial.hermite.hermgauss(deg=10)),
+          loc=array_ops.placeholder_with_default(
+              [0., -0.5],
+              shape=[2] if self.static_shape else None),
+          scale=array_ops.placeholder_with_default(
+              1.,
+              shape=[] if self.static_shape else None),
+          quadrature_size=10,
           validate_args=True)
       self.run_test_sample_consistent_mean_variance(
           sess.run, pln, rtol=0.1, atol=0.01)
@@ -78,38 +87,46 @@ class PoissonLogNormalQuadratureCompoundTest(
   def testSampleProbConsistentBroadcastBoth(self):
     with self.test_session() as sess:
       pln = poisson_lognormal.PoissonLogNormalQuadratureCompound(
-          loc=[[0.], [-0.5]],
-          scale=[[1., 0.9]],
-          quadrature_grid_and_probs=(
-              np.polynomial.hermite.hermgauss(deg=10)),
+          loc=array_ops.placeholder_with_default(
+              [[0.], [-0.5]],
+              shape=[2, 1] if self.static_shape else None),
+          scale=array_ops.placeholder_with_default(
+              [[1., 0.9]],
+              shape=[1, 2] if self.static_shape else None),
+          quadrature_size=10,
           validate_args=True)
       self.run_test_sample_consistent_log_prob(
-          sess.run, pln, rtol=0.1, atol=0.08)
+          sess.run, pln, batch_size=4, rtol=0.1, atol=0.08)
 
   def testMeanVarianceBroadcastBoth(self):
     with self.test_session() as sess:
       pln = poisson_lognormal.PoissonLogNormalQuadratureCompound(
-          loc=[[0.], [-0.5]],
-          scale=[[1., 0.9]],
-          quadrature_grid_and_probs=(
-              np.polynomial.hermite.hermgauss(deg=10)),
+          loc=array_ops.placeholder_with_default(
+              [[0.], [-0.5]],
+              shape=[2, 1] if self.static_shape else None),
+          scale=array_ops.placeholder_with_default(
+              [[1., 0.9]],
+              shape=[1, 2] if self.static_shape else None),
+          quadrature_size=10,
           validate_args=True)
       self.run_test_sample_consistent_mean_variance(
           sess.run, pln, rtol=0.1, atol=0.01)
 
-  def testSampleProbConsistentDynamicQuadrature(self):
-    with self.test_session() as sess:
-      qgrid = array_ops.placeholder(dtype=dtypes.float32)
-      qprobs = array_ops.placeholder(dtype=dtypes.float32)
-      g, p = np.polynomial.hermite.hermgauss(deg=10)
-      pln = poisson_lognormal.PoissonLogNormalQuadratureCompound(
-          loc=-2.,
-          scale=1.1,
-          quadrature_grid_and_probs=(g, p),
-          validate_args=True)
-      self.run_test_sample_consistent_log_prob(
-          lambda x: sess.run(x, feed_dict={qgrid: g, qprobs: p}),
-          pln, rtol=0.1)
+
+class PoissonLogNormalQuadratureCompoundStaticShapeTest(
+    _PoissonLogNormalQuadratureCompoundTest, test.TestCase):
+
+  @property
+  def static_shape(self):
+    return True
+
+
+class PoissonLogNormalQuadratureCompoundDynamicShapeTest(
+    _PoissonLogNormalQuadratureCompoundTest, test.TestCase):
+
+  @property
+  def static_shape(self):
+    return False
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/sample_stats_test.py b/tensorflow/contrib/distributions/python/kernel_tests/sample_stats_test.py
index 595d9f5df755d7defa63d385039bafe4f87aa6ec..4186cf129dbf31724c84133734da3f226817c71a 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/sample_stats_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/sample_stats_test.py
@@ -23,11 +23,244 @@ import numpy as np
 from tensorflow.contrib.distributions.python.ops import sample_stats
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import spectral_ops_test_util
 from tensorflow.python.platform import test
 
 rng = np.random.RandomState(0)
 
 
+class _AutoCorrelationTest(object):
+
+  @property
+  def use_static_shape(self):
+    raise NotImplementedError("Subclass failed to implement `use_static_shape`")
+
+  @property
+  def dtype(self):
+    raise NotImplementedError("Subclass failed to implement `dtype`.")
+
+  def test_constant_sequence_axis_0_max_lags_none_center_false(self):
+    x_ = np.array([[0., 0., 0.],
+                   [1., 1., 1.]]).astype(self.dtype)
+    x_ph = array_ops.placeholder_with_default(
+        input=x_,
+        shape=x_.shape if self.use_static_shape else None)
+    with spectral_ops_test_util.fft_kernel_label_map():
+      with self.test_session() as sess:
+        # Setting normalize = True means we divide by zero.
+        auto_corr = sample_stats.auto_correlation(
+            x_ph, axis=1, center=False, normalize=False)
+        if self.use_static_shape:
+          self.assertEqual((2, 3), auto_corr.shape)
+        auto_corr_ = sess.run(auto_corr)
+        self.assertAllClose(
+            [[0., 0., 0.],
+             [1., 1., 1.]], auto_corr_)
+
+  def test_constant_sequence_axis_0_max_lags_none_center_true(self):
+    x_ = np.array([[0., 0., 0.],
+                   [1., 1., 1.]]).astype(self.dtype)
+    x_ph = array_ops.placeholder_with_default(
+        input=x_,
+        shape=x_.shape if self.use_static_shape else None)
+    with spectral_ops_test_util.fft_kernel_label_map():
+      with self.test_session() as sess:
+        # Setting normalize = True means we divide by zero.
+        auto_corr = sample_stats.auto_correlation(
+            x_ph, axis=1, normalize=False, center=True)
+        if self.use_static_shape:
+          self.assertEqual((2, 3), auto_corr.shape)
+        auto_corr_ = sess.run(auto_corr)
+        self.assertAllClose(
+            [[0., 0., 0.],
+             [0., 0., 0.]], auto_corr_)
+
+  def check_results_versus_brute_force(
+      self, x, axis, max_lags, center, normalize):
+    """Compute auto-correlation by brute force, then compare to tf result."""
+    # Brute for auto-corr -- avoiding fft and transpositions.
+    axis_len = x.shape[axis]
+    if max_lags is None:
+      max_lags = axis_len - 1
+    else:
+      max_lags = min(axis_len - 1, max_lags)
+    auto_corr_at_lag = []
+    if center:
+      x -= x.mean(axis=axis, keepdims=True)
+    for m in range(max_lags + 1):
+      auto_corr_at_lag.append((
+          np.take(x, indices=range(0, axis_len - m), axis=axis) *
+          np.conj(np.take(x, indices=range(m, axis_len), axis=axis))
+      ).mean(axis=axis, keepdims=True))
+    rxx = np.concatenate(auto_corr_at_lag, axis=axis)
+    if normalize:
+      rxx /= np.take(rxx, [0], axis=axis)
+
+    x_ph = array_ops.placeholder_with_default(
+        x, shape=x.shape if self.use_static_shape else None)
+    with spectral_ops_test_util.fft_kernel_label_map():
+      with self.test_session():
+        auto_corr = sample_stats.auto_correlation(
+            x_ph, axis=axis, max_lags=max_lags, center=center,
+            normalize=normalize)
+        if self.use_static_shape:
+          output_shape = list(x.shape)
+          output_shape[axis] = max_lags + 1
+          self.assertAllEqual(output_shape, auto_corr.shape)
+        self.assertAllClose(rxx, auto_corr.eval(), rtol=1e-5, atol=1e-5)
+
+  def test_axis_n1_center_false_max_lags_none(self):
+    x = rng.randn(2, 3, 4).astype(self.dtype)
+    if self.dtype in [np.complex64]:
+      x = 1j * rng.randn(2, 3, 4).astype(self.dtype)
+    self.check_results_versus_brute_force(
+        x, axis=-1, max_lags=None, center=False, normalize=False)
+
+  def test_axis_n2_center_false_max_lags_none(self):
+    x = rng.randn(3, 4, 5).astype(self.dtype)
+    if self.dtype in [np.complex64]:
+      x = 1j * rng.randn(3, 4, 5).astype(self.dtype)
+    self.check_results_versus_brute_force(
+        x, axis=-2, max_lags=None, center=False, normalize=False)
+
+  def test_axis_n1_center_false_max_lags_none_normalize_true(self):
+    x = rng.randn(2, 3, 4).astype(self.dtype)
+    if self.dtype in [np.complex64]:
+      x = 1j * rng.randn(2, 3, 4).astype(self.dtype)
+    self.check_results_versus_brute_force(
+        x, axis=-1, max_lags=None, center=False, normalize=True)
+
+  def test_axis_n2_center_false_max_lags_none_normalize_true(self):
+    x = rng.randn(3, 4, 5).astype(self.dtype)
+    if self.dtype in [np.complex64]:
+      x = 1j * rng.randn(3, 4, 5).astype(self.dtype)
+    self.check_results_versus_brute_force(
+        x, axis=-2, max_lags=None, center=False, normalize=True)
+
+  def test_axis_0_center_true_max_lags_none(self):
+    x = rng.randn(3, 4, 5).astype(self.dtype)
+    if self.dtype in [np.complex64]:
+      x = 1j * rng.randn(3, 4, 5).astype(self.dtype)
+    self.check_results_versus_brute_force(
+        x, axis=0, max_lags=None, center=True, normalize=False)
+
+  def test_axis_2_center_true_max_lags_1(self):
+    x = rng.randn(3, 4, 5).astype(self.dtype)
+    if self.dtype in [np.complex64]:
+      x = 1j * rng.randn(3, 4, 5).astype(self.dtype)
+    self.check_results_versus_brute_force(
+        x, axis=2, max_lags=1, center=True, normalize=False)
+
+  def test_axis_2_center_true_max_lags_100(self):
+    # There are less than 100 elements in axis 2, so expect we get back an array
+    # the same size as x, despite having asked for 100 lags.
+    x = rng.randn(3, 4, 5).astype(self.dtype)
+    if self.dtype in [np.complex64]:
+      x = 1j * rng.randn(3, 4, 5).astype(self.dtype)
+    self.check_results_versus_brute_force(
+        x, axis=2, max_lags=100, center=True, normalize=False)
+
+  def test_long_orthonormal_sequence_has_corr_length_0(self):
+    l = 10000
+    x = rng.randn(l).astype(self.dtype)
+    x_ph = array_ops.placeholder_with_default(
+        x, shape=(l,) if self.use_static_shape else None)
+    with spectral_ops_test_util.fft_kernel_label_map():
+      with self.test_session():
+        rxx = sample_stats.auto_correlation(
+            x_ph, max_lags=l // 2, center=True, normalize=False)
+        if self.use_static_shape:
+          self.assertAllEqual((l // 2 + 1,), rxx.shape)
+        rxx_ = rxx.eval()
+        # OSS CPU FFT has some accuracy issues is not the most accurate.
+        # So this tolerance is a bit bad.
+        self.assertAllClose(1., rxx_[0], rtol=0.05)
+        # The maximal error in the rest of the sequence is not great.
+        self.assertAllClose(np.zeros(l // 2), rxx_[1:], atol=0.1)
+        # The mean error in the rest is ok, actually 0.008 when I tested it.
+        self.assertLess(np.abs(rxx_[1:]).mean(), 0.02)
+
+  def test_step_function_sequence(self):
+    # x jumps to new random value every 10 steps.  So correlation length = 10.
+    x = (rng.randint(-10, 10, size=(1000, 1))
+         * np.ones((1, 10))).ravel().astype(self.dtype)
+    x_ph = array_ops.placeholder_with_default(
+        x, shape=(1000 * 10,) if self.use_static_shape else None)
+    with spectral_ops_test_util.fft_kernel_label_map():
+      with self.test_session():
+        rxx = sample_stats.auto_correlation(
+            x_ph, max_lags=1000 * 10 // 2, center=True, normalize=False)
+        if self.use_static_shape:
+          self.assertAllEqual((1000 * 10 // 2 + 1,), rxx.shape)
+        rxx_ = rxx.eval()
+        rxx_ /= rxx_[0]
+        # Expect positive correlation for the first 10 lags, then significantly
+        # smaller negative.
+        self.assertGreater(rxx_[:10].min(), 0)
+        self.assertGreater(rxx_[9], 5 * rxx_[10:20].mean())
+        # RXX should be decreasing for the first 10 lags.
+        diff = np.diff(rxx_)
+        self.assertLess(diff[:10].max(), 0)
+
+  def test_normalization(self):
+    l = 10000
+    x = 3 * rng.randn(l).astype(self.dtype)
+    x_ph = array_ops.placeholder_with_default(
+        x, shape=(l,) if self.use_static_shape else None)
+    with spectral_ops_test_util.fft_kernel_label_map():
+      with self.test_session():
+        rxx = sample_stats.auto_correlation(
+            x_ph, max_lags=l // 2, center=True, normalize=True)
+        if self.use_static_shape:
+          self.assertAllEqual((l // 2 + 1,), rxx.shape)
+        rxx_ = rxx.eval()
+        # Note that RXX[0] = 1, despite the fact that E[X^2] = 9, and this is
+        # due to normalize=True.
+        # OSS CPU FFT has some accuracy issues is not the most accurate.
+        # So this tolerance is a bit bad.
+        self.assertAllClose(1., rxx_[0], rtol=0.05)
+        # The maximal error in the rest of the sequence is not great.
+        self.assertAllClose(np.zeros(l // 2), rxx_[1:], atol=0.1)
+        # The mean error in the rest is ok, actually 0.008 when I tested it.
+        self.assertLess(np.abs(rxx_[1:]).mean(), 0.02)
+
+
+class AutoCorrelationTestStaticShapeFloat32(test.TestCase,
+                                            _AutoCorrelationTest):
+
+  @property
+  def dtype(self):
+    return np.float32
+
+  @property
+  def use_static_shape(self):
+    return True
+
+
+class AutoCorrelationTestStaticShapeComplex64(test.TestCase,
+                                              _AutoCorrelationTest):
+
+  @property
+  def dtype(self):
+    return np.complex64
+
+  @property
+  def use_static_shape(self):
+    return True
+
+
+class AutoCorrelationTestDynamicShapeFloat32(test.TestCase,
+                                             _AutoCorrelationTest):
+
+  @property
+  def dtype(self):
+    return np.float32
+
+  @property
+  def use_static_shape(self):
+    return False
+
+
 class PercentileTestWithLowerInterpolation(test.TestCase):
 
   _interpolation = "lower"
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py b/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py
index 103d8e186221e879d1734a097114708429f725bd..cbaf74d3f66253ae5727e1ba579e2d49235b748e 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py
@@ -200,6 +200,27 @@ class TransformedDistributionTest(test.TestCase):
       self.assertAllEqual([2], multi_logit_normal.event_shape)
       self.assertAllEqual([2], multi_logit_normal.event_shape_tensor().eval())
 
+  def testCastLogDetJacobian(self):
+    """Test log_prob when Jacobian and log_prob dtypes do not match."""
+
+    with self.test_session():
+      # Create an identity bijector whose jacobians have dtype int32
+      int_identity = bs.Inline(
+          forward_fn=array_ops.identity,
+          inverse_fn=array_ops.identity,
+          inverse_log_det_jacobian_fn=lambda x: math_ops.cast(0, dtypes.int32),
+          forward_log_det_jacobian_fn=lambda x: math_ops.cast(0, dtypes.int32),
+          is_constant_jacobian=True)
+      normal = self._cls()(
+          distribution=ds.Normal(loc=0., scale=1.),
+          bijector=int_identity,
+          validate_args=True)
+
+      y = normal.sample()
+      normal.log_prob(y).eval()
+      normal.prob(y).eval()
+      normal.entropy().eval()
+
   def testEntropy(self):
     with self.test_session():
       shift = np.array([[-1, 0, 1], [-1, -2, -3]], dtype=np.float32)
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/vector_diffeomixture_test.py b/tensorflow/contrib/distributions/python/kernel_tests/vector_diffeomixture_test.py
index de4a221f7badca8267a81d612a57137c676ff052..d292b04665e34196670ee4f1c1655f805e04e06a 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/vector_diffeomixture_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/vector_diffeomixture_test.py
@@ -21,9 +21,7 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.contrib.distributions.python.ops import test_util
-from tensorflow.contrib.distributions.python.ops import vector_diffeomixture as vector_diffeomixture_lib
-from tensorflow.python.framework import dtypes
-from tensorflow.python.ops import array_ops
+from tensorflow.contrib.distributions.python.ops import vector_diffeomixture as vdm_lib
 from tensorflow.python.ops.distributions import normal as normal_lib
 from tensorflow.python.ops.linalg import linear_operator_diag as linop_diag_lib
 from tensorflow.python.ops.linalg import linear_operator_identity as linop_identity_lib
@@ -37,7 +35,7 @@ class VectorDiffeomixtureTest(
   def testSampleProbConsistentBroadcastMixNoBatch(self):
     with self.test_session() as sess:
       dims = 4
-      vdm = vector_diffeomixture_lib.VectorDiffeomixture(
+      vdm = vdm_lib.VectorDiffeomixture(
           mix_loc=[[0.], [1.]],
           mix_scale=[1.],
           distribution=normal_lib.Normal(0., 1.),
@@ -54,18 +52,19 @@ class VectorDiffeomixtureTest(
                   diag=np.linspace(2.5, 3.5, dims, dtype=np.float32),
                   is_positive_definite=True),
           ],
+          quadrature_size=8,
           validate_args=True)
       # Ball centered at component0's mean.
       self.run_test_sample_consistent_log_prob(
-          sess.run, vdm, radius=2., center=0., rtol=0.005)
+          sess.run, vdm, radius=2., center=0., rtol=0.015)
       # Larger ball centered at component1's mean.
       self.run_test_sample_consistent_log_prob(
-          sess.run, vdm, radius=4., center=2., rtol=0.005)
+          sess.run, vdm, radius=4., center=2., rtol=0.015)
 
   def testSampleProbConsistentBroadcastMixNonStandardBase(self):
     with self.test_session() as sess:
       dims = 4
-      vdm = vector_diffeomixture_lib.VectorDiffeomixture(
+      vdm = vdm_lib.VectorDiffeomixture(
           mix_loc=[[0.], [1.]],
           mix_scale=[1.],
           distribution=normal_lib.Normal(1., 1.5),
@@ -82,18 +81,19 @@ class VectorDiffeomixtureTest(
                   diag=np.linspace(2.5, 3.5, dims, dtype=np.float32),
                   is_positive_definite=True),
           ],
+          quadrature_size=8,
           validate_args=True)
       # Ball centered at component0's mean.
       self.run_test_sample_consistent_log_prob(
-          sess.run, vdm, radius=2., center=1., rtol=0.006)
+          sess.run, vdm, radius=2., center=1., rtol=0.015)
       # Larger ball centered at component1's mean.
       self.run_test_sample_consistent_log_prob(
-          sess.run, vdm, radius=4., center=3., rtol=0.009)
+          sess.run, vdm, radius=4., center=3., rtol=0.01)
 
   def testSampleProbConsistentBroadcastMixBatch(self):
     with self.test_session() as sess:
       dims = 4
-      vdm = vector_diffeomixture_lib.VectorDiffeomixture(
+      vdm = vdm_lib.VectorDiffeomixture(
           mix_loc=[[0.], [1.]],
           mix_scale=[1.],
           distribution=normal_lib.Normal(0., 1.),
@@ -113,18 +113,19 @@ class VectorDiffeomixtureTest(
                   ]),
                   is_positive_definite=True),
           ],
+          quadrature_size=8,
           validate_args=True)
       # Ball centered at component0's mean.
       self.run_test_sample_consistent_log_prob(
-          sess.run, vdm, radius=2., center=0., rtol=0.005)
+          sess.run, vdm, radius=2., center=0., rtol=0.01)
       # Larger ball centered at component1's mean.
       self.run_test_sample_consistent_log_prob(
-          sess.run, vdm, radius=4., center=2., rtol=0.005)
+          sess.run, vdm, radius=4., center=2., rtol=0.01)
 
   def testMeanCovarianceNoBatch(self):
     with self.test_session() as sess:
       dims = 3
-      vdm = vector_diffeomixture_lib.VectorDiffeomixture(
+      vdm = vdm_lib.VectorDiffeomixture(
           mix_loc=[[0.], [4.]],
           mix_scale=[10.],
           distribution=normal_lib.Normal(0., 1.),
@@ -141,14 +142,15 @@ class VectorDiffeomixtureTest(
                   diag=np.linspace(2.5, 3.5, dims, dtype=np.float32),
                   is_positive_definite=True),
           ],
+          quadrature_size=8,
           validate_args=True)
       self.run_test_sample_consistent_mean_covariance(
-          sess.run, vdm, rtol=0.02, cov_rtol=0.06)
+          sess.run, vdm, rtol=0.02, cov_rtol=0.08)
 
   def testMeanCovarianceNoBatchUncenteredNonStandardBase(self):
     with self.test_session() as sess:
       dims = 3
-      vdm = vector_diffeomixture_lib.VectorDiffeomixture(
+      vdm = vdm_lib.VectorDiffeomixture(
           mix_loc=[[0.], [4.]],
           mix_scale=[10.],
           distribution=normal_lib.Normal(-1., 1.5),
@@ -165,6 +167,7 @@ class VectorDiffeomixtureTest(
                   diag=np.linspace(2.5, 3.5, dims, dtype=np.float32),
                   is_positive_definite=True),
           ],
+          quadrature_size=8,
           validate_args=True)
       self.run_test_sample_consistent_mean_covariance(
           sess.run, vdm, num_samples=int(1e6), rtol=0.01, cov_atol=0.025)
@@ -172,7 +175,7 @@ class VectorDiffeomixtureTest(
   def testMeanCovarianceBatch(self):
     with self.test_session() as sess:
       dims = 3
-      vdm = vector_diffeomixture_lib.VectorDiffeomixture(
+      vdm = vdm_lib.VectorDiffeomixture(
           mix_loc=[[0.], [4.]],
           mix_scale=[10.],
           distribution=normal_lib.Normal(0., 1.),
@@ -192,18 +195,16 @@ class VectorDiffeomixtureTest(
                   ]),
                   is_positive_definite=True),
           ],
+          quadrature_size=8,
           validate_args=True)
       self.run_test_sample_consistent_mean_covariance(
-          sess.run, vdm, rtol=0.02, cov_rtol=0.06)
+          sess.run, vdm, rtol=0.02, cov_rtol=0.07)
 
-  def testSampleProbConsistentDynamicQuadrature(self):
+  def testSampleProbConsistentQuadrature(self):
     with self.test_session() as sess:
-      qgrid = array_ops.placeholder(dtype=dtypes.float32)
-      qprobs = array_ops.placeholder(dtype=dtypes.float32)
-      g, p = np.polynomial.hermite.hermgauss(deg=8)
       dims = 4
-      vdm = vector_diffeomixture_lib.VectorDiffeomixture(
-          mix_loc=[[0.], [1.]],
+      vdm = vdm_lib.VectorDiffeomixture(
+          mix_loc=[0.],
           mix_scale=[1.],
           distribution=normal_lib.Normal(0., 1.),
           loc=[
@@ -219,15 +220,14 @@ class VectorDiffeomixtureTest(
                   diag=np.linspace(2.5, 3.5, dims, dtype=np.float32),
                   is_positive_definite=True),
           ],
-          quadrature_grid_and_probs=(g, p),
+          quadrature_size=3,
           validate_args=True)
       # Ball centered at component0's mean.
-      sess_run_fn = lambda x: sess.run(x, feed_dict={qgrid: g, qprobs: p})
       self.run_test_sample_consistent_log_prob(
-          sess_run_fn, vdm, radius=2., center=0., rtol=0.005)
+          sess.run, vdm, radius=2., center=0., rtol=0.015)
       # Larger ball centered at component1's mean.
       self.run_test_sample_consistent_log_prob(
-          sess_run_fn, vdm, radius=4., center=2., rtol=0.005)
+          sess.run, vdm, radius=4., center=2., rtol=0.005)
 
   # TODO(jvdillon): We've tested that (i) .sample and .log_prob are consistent,
   # (ii) .mean, .stddev etc... and .sample are consistent. However, we haven't
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value.py b/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value.py
index 6049419818e18c54209f0be95d41fcecf6627b7e..0fe9f6aa78fbe845b99d0668f075b0162ec2a9f7 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value.py
@@ -18,12 +18,117 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.absolute_value_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops.distributions import bijector
 
-_allowed_symbols = ["AbsoluteValue"]
+__all__ = [
+    "AbsoluteValue",
+]
 
-remove_undocumented(__name__, _allowed_symbols)
+
+class AbsoluteValue(bijector.Bijector):
+  """Computes `Y = g(X) = Abs(X)`, element-wise.
+
+  This non-injective bijector allows for transformations of scalar distributions
+  with the absolute value function, which maps `(-inf, inf)` to `[0, inf)`.
+
+  * For `y in (0, inf)`, `AbsoluteValue.inverse(y)` returns the set inverse
+    `{x in (-inf, inf) : |x| = y}` as a tuple, `-y, y`.
+  * `AbsoluteValue.inverse(0)` returns `0, 0`, which is not the set inverse
+    (the set inverse is the singleton `{0}`), but "works" in conjunction with
+    `TransformedDistribution` to produce a left semi-continuous pdf.
+  * For `y < 0`, `AbsoluteValue.inverse(y)` happily returns the
+    wrong thing, `-y, y`.  This is done for efficiency.  If
+    `validate_args == True`, `y < 0` will raise an exception.
+
+
+  ```python
+  tfd = tf.contrib.distributions
+
+  abs = tfd.bijectors.AbsoluteValue()
+
+  abs.forward([-1., 0., 1.])
+  ==> [1., 0.,  1.]
+
+  abs.inverse(1.)
+  ==> [-1., 1.]
+
+  # The |dX/dY| is constant, == 1.  So Log|dX/dY| == 0.
+  abs.inverse_log_det_jacobian(1.)
+  ==> [0., 0.]
+
+  # Special case handling of 0.
+  abs.inverse(0.)
+  ==> [0., 0.]
+
+  abs.inverse_log_det_jacobian(0.)
+  ==> [0., 0.]
+  ```
+
+  """
+
+  def __init__(self, event_ndims=0, validate_args=False, name="absolute_value"):
+    """Instantiates the `AbsoluteValue` bijector.
+
+    Args:
+      event_ndims: Python scalar indicating the number of dimensions associated
+        with a particular draw from the distribution.  Currently only zero is
+        supported.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness, in particular whether inputs to `inverse` and
+        `inverse_log_det_jacobian` are non-negative.
+      name: Python `str` name given to ops managed by this object.
+
+    Raises:
+      ValueError:  If `event_ndims` is not zero.
+    """
+    self._graph_parents = []
+    self._name = name
+
+    event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims")
+    event_ndims_const = tensor_util.constant_value(event_ndims)
+    if event_ndims_const is not None and event_ndims_const not in (0,):
+      raise ValueError("event_ndims(%s) was not 0" % event_ndims_const)
+    else:
+      if validate_args:
+        event_ndims = control_flow_ops.with_dependencies(
+            [check_ops.assert_equal(
+                event_ndims, 0, message="event_ndims was not 0")],
+            event_ndims)
+
+    with self._name_scope("init"):
+      super(AbsoluteValue, self).__init__(
+          event_ndims=event_ndims,
+          validate_args=validate_args,
+          name=name)
+
+  def _forward(self, x):
+    return math_ops.abs(x)
+
+  def _inverse(self, y):
+    if self.validate_args:
+      y = control_flow_ops.with_dependencies(
+          [check_ops.assert_non_negative(y, message="Argument y was negative")],
+          y)
+    return -y, y
+
+  def _inverse_log_det_jacobian(self, y):
+    # If event_ndims = 2,
+    # F^{-1}(y) = (-y, y), so DF^{-1}(y) = (-1, 1),
+    # so Log|DF^{-1}(y)| = Log[1, 1] = [0, 0].
+    batch_shape = array_ops.shape(y)[:array_ops.rank(y) - self.event_ndims]
+    zeros = array_ops.zeros(batch_shape, dtype=y.dtype)
+    if self.validate_args:
+      zeros = control_flow_ops.with_dependencies(
+          [check_ops.assert_non_negative(y, message="Argument y was negative")],
+          zeros)
+    return zeros, zeros
+
+  @property
+  def _is_injective(self):
+    return False
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value_impl.py
deleted file mode 100644
index b84502003ab6c0c4ffdda21eea162f441509e1fa..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value_impl.py
+++ /dev/null
@@ -1,132 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""AbsoluteValue bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops.distributions import bijector
-
-__all__ = [
-    "AbsoluteValue",
-]
-
-
-class AbsoluteValue(bijector.Bijector):
-  """Computes `Y = g(X) = Abs(X)`, element-wise.
-
-  This non-injective bijector allows for transformations of scalar distributions
-  with the absolute value function, which maps `(-inf, inf)` to `[0, inf)`.
-
-  * For `y in (0, inf)`, `AbsoluteValue.inverse(y)` returns the set inverse
-    `{x in (-inf, inf) : |x| = y}` as a tuple, `-y, y`.
-  * `AbsoluteValue.inverse(0)` returns `0, 0`, which is not the set inverse
-    (the set inverse is the singleton `{0}`), but "works" in conjunction with
-    `TransformedDistribution` to produce a left semi-continuous pdf.
-  * For `y < 0`, `AbsoluteValue.inverse(y)` happily returns the
-    wrong thing, `-y, y`.  This is done for efficiency.  If
-    `validate_args == True`, `y < 0` will raise an exception.
-
-
-  ```python
-  abs = ds.bijectors.AbsoluteValue()
-
-  abs.forward([-1., 0., 1.])
-  ==> [1., 0.,  1.]
-
-  abs.inverse(1.)
-  ==> [-1., 1.]
-
-  # The |dX/dY| is constant, == 1.  So Log|dX/dY| == 0.
-  abs.inverse_log_det_jacobian(1.)
-  ==> [0., 0.]
-
-  # Special case handling of 0.
-  abs.inverse(0.)
-  ==> [0., 0.]
-
-  abs.inverse_log_det_jacobian(0.)
-  ==> [0., 0.]
-  ```
-
-  """
-
-  def __init__(self, event_ndims=0, validate_args=False, name="absolute_value"):
-    """Instantiates the `AbsoluteValue` bijector.
-
-    Args:
-      event_ndims: Python scalar indicating the number of dimensions associated
-        with a particular draw from the distribution.  Currently only zero is
-        supported.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness, in particular whether inputs to `inverse` and
-        `inverse_log_det_jacobian` are non-negative.
-      name: Python `str` name given to ops managed by this object.
-
-    Raises:
-      ValueError:  If `event_ndims` is not zero.
-    """
-    self._graph_parents = []
-    self._name = name
-
-    event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims")
-    event_ndims_const = tensor_util.constant_value(event_ndims)
-    if event_ndims_const is not None and event_ndims_const not in (0,):
-      raise ValueError("event_ndims(%s) was not 0" % event_ndims_const)
-    else:
-      if validate_args:
-        event_ndims = control_flow_ops.with_dependencies(
-            [check_ops.assert_equal(
-                event_ndims, 0, message="event_ndims was not 0")],
-            event_ndims)
-
-    with self._name_scope("init"):
-      super(AbsoluteValue, self).__init__(
-          event_ndims=event_ndims,
-          validate_args=validate_args,
-          name=name)
-
-  def _forward(self, x):
-    return math_ops.abs(x)
-
-  def _inverse(self, y):
-    if self.validate_args:
-      y = control_flow_ops.with_dependencies(
-          [check_ops.assert_non_negative(y, message="Argument y was negative")],
-          y)
-    return -y, y
-
-  def _inverse_log_det_jacobian(self, y):
-    # If event_ndims = 2,
-    # F^{-1}(y) = (-y, y), so DF^{-1}(y) = (-1, 1),
-    # so Log|DF^{-1}(y)| = Log[1, 1] = [0, 0].
-    batch_shape = array_ops.shape(y)[:array_ops.rank(y) - self.event_ndims]
-    zeros = array_ops.zeros(batch_shape, dtype=y.dtype)
-    if self.validate_args:
-      zeros = control_flow_ops.with_dependencies(
-          [check_ops.assert_non_negative(y, message="Argument y was negative")],
-          zeros)
-    return zeros, zeros
-
-  @property
-  def _is_injective(self):
-    return False
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/affine.py b/tensorflow/contrib/distributions/python/ops/bijectors/affine.py
index 940cceff04e77cfc2f7caae5a798d135f7601b95..05bb9c2f9bdf35e222c94db3491157893da64ebd 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/affine.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/affine.py
@@ -18,12 +18,386 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.affine_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.contrib import linalg
+from tensorflow.contrib.distributions.python.ops import distribution_util
+from tensorflow.contrib.distributions.python.ops.shape import _DistributionShape
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops.distributions import bijector
 
-_allowed_symbols = ["Affine"]
 
-remove_undocumented(__name__, _allowed_symbols)
+__all__ = [
+    "Affine",
+]
+
+
+def _as_tensor(x, name):
+  """Convenience to convert to `Tensor` or leave as `None`."""
+  return None if x is None else ops.convert_to_tensor(x, name=name)
+
+
+class Affine(bijector.Bijector):
+  """Compute `Y = g(X; shift, scale) = scale @ X + shift`.
+
+  Here `scale = c * I + diag(D1) + tril(L) + V @ diag(D2) @ V.T`.
+
+  In TF parlance, the `scale` term is logically equivalent to:
+
+  ```python
+  scale = (
+    scale_identity_multiplier * tf.diag(tf.ones(d)) +
+    tf.diag(scale_diag) +
+    scale_tril +
+    scale_perturb_factor @ diag(scale_perturb_diag) @
+      tf.transpose([scale_perturb_factor])
+  )
+  ```
+
+  The `scale` term is applied without necessarily materializing constituent
+  matrices, i.e., the matmul is [matrix-free](
+  https://en.wikipedia.org/wiki/Matrix-free_methods) when possible.
+
+  Examples:
+
+  ```python
+  # Y = X
+  b = Affine()
+
+  # Y = X + shift
+  b = Affine(shift=[1., 2, 3])
+
+  # Y = 2 * I @ X.T + shift
+  b = Affine(shift=[1., 2, 3],
+             scale_identity_multiplier=2.)
+
+  # Y = tf.diag(d1) @ X.T + shift
+  b = Affine(shift=[1., 2, 3],
+             scale_diag=[-1., 2, 1])         # Implicitly 3x3.
+
+  # Y = (I + v * v.T) @ X.T + shift
+  b = Affine(shift=[1., 2, 3],
+             scale_perturb_factor=[[1., 0],
+                                   [0, 1],
+                                   [1, 1]])
+
+  # Y = (diag(d1) + v * diag(d2) * v.T) @ X.T + shift
+  b = Affine(shift=[1., 2, 3],
+             scale_diag=[1., 3, 3],          # Implicitly 3x3.
+             scale_perturb_diag=[2., 1],     # Implicitly 2x2.
+             scale_perturb_factor=[[1., 0],
+                                   [0, 1],
+                                   [1, 1]])
+
+  ```
+
+  """
+
+  def __init__(self,
+               shift=None,
+               scale_identity_multiplier=None,
+               scale_diag=None,
+               scale_tril=None,
+               scale_perturb_factor=None,
+               scale_perturb_diag=None,
+               event_ndims=1,
+               validate_args=False,
+               name="affine"):
+    """Instantiates the `Affine` bijector.
+
+    This `Bijector` is initialized with `shift` `Tensor` and `scale` arguments,
+    giving the forward operation:
+
+    ```none
+    Y = g(X) = scale @ X + shift
+    ```
+
+    where the `scale` term is logically equivalent to:
+
+    ```python
+    scale = (
+      scale_identity_multiplier * tf.diag(tf.ones(d)) +
+      tf.diag(scale_diag) +
+      scale_tril +
+      scale_perturb_factor @ diag(scale_perturb_diag) @
+        tf.transpose([scale_perturb_factor])
+    )
+    ```
+
+    If none of `scale_identity_multiplier`, `scale_diag`, or `scale_tril` are
+    specified then `scale += IdentityMatrix`. Otherwise specifying a
+    `scale` argument has the semantics of `scale += Expand(arg)`, i.e.,
+    `scale_diag != None` means `scale += tf.diag(scale_diag)`.
+
+    Args:
+      shift: Floating-point `Tensor`. If this is set to `None`, no shift is
+        applied.
+      scale_identity_multiplier: floating point rank 0 `Tensor` representing a
+        scaling done to the identity matrix.
+        When `scale_identity_multiplier = scale_diag = scale_tril = None` then
+        `scale += IdentityMatrix`. Otherwise no scaled-identity-matrix is added
+        to `scale`.
+      scale_diag: Floating-point `Tensor` representing the diagonal matrix.
+        `scale_diag` has shape [N1, N2, ...  k], which represents a k x k
+        diagonal matrix.
+        When `None` no diagonal term is added to `scale`.
+      scale_tril: Floating-point `Tensor` representing the diagonal matrix.
+        `scale_diag` has shape [N1, N2, ...  k, k], which represents a k x k
+        lower triangular matrix.
+        When `None` no `scale_tril` term is added to `scale`.
+        The upper triangular elements above the diagonal are ignored.
+      scale_perturb_factor: Floating-point `Tensor` representing factor matrix
+        with last two dimensions of shape `(k, r)`. When `None`, no rank-r
+        update is added to `scale`.
+      scale_perturb_diag: Floating-point `Tensor` representing the diagonal
+        matrix. `scale_perturb_diag` has shape [N1, N2, ...  r], which
+        represents an `r x r` diagonal matrix. When `None` low rank updates will
+        take the form `scale_perturb_factor * scale_perturb_factor.T`.
+      event_ndims: Scalar `int` `Tensor` indicating the number of dimensions
+        associated with a particular draw from the distribution. Must be 0 or 1.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+      name: Python `str` name given to ops managed by this object.
+
+    Raises:
+      ValueError: if `perturb_diag` is specified but not `perturb_factor`.
+      TypeError: if `shift` has different `dtype` from `scale` arguments.
+    """
+    self._graph_parents = []
+    self._name = name
+    self._validate_args = validate_args
+
+    # Ambiguous definition of low rank update.
+    if scale_perturb_diag is not None and scale_perturb_factor is None:
+      raise ValueError("When scale_perturb_diag is specified, "
+                       "scale_perturb_factor must be specified.")
+
+    # Special case, only handling a scaled identity matrix. We don't know its
+    # dimensions, so this is special cased.
+    # We don't check identity_multiplier, since below we set it to 1. if all
+    # other scale args are None.
+    self._is_only_identity_multiplier = (scale_tril is None and
+                                         scale_diag is None and
+                                         scale_perturb_factor is None)
+
+    with self._name_scope("init", values=[
+        shift, scale_identity_multiplier, scale_diag, scale_tril,
+        scale_perturb_diag, scale_perturb_factor]):
+      event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims")
+      event_ndims_const = tensor_util.constant_value(event_ndims)
+      if event_ndims_const is not None and event_ndims_const not in (0, 1):
+        raise ValueError("event_ndims(%s) was not 0 or 1" % event_ndims_const)
+      else:
+        if validate_args:
+          # Shape tool will catch if event_ndims is negative.
+          event_ndims = control_flow_ops.with_dependencies(
+              [check_ops.assert_less(
+                  event_ndims, 2, message="event_ndims must be 0 or 1")],
+              event_ndims)
+
+      if event_ndims_const == 0 and not self._is_only_identity_multiplier:
+        raise ValueError(
+            "If event_ndims == 0, the only scale argument you can pass is "
+            "scale_identity_multiplier.  All others operate on vectors.")
+
+      # In the absence of `loc` and `scale`, we'll assume `dtype` is `float32`.
+      dtype = dtypes.float32
+
+      if shift is not None:
+        shift = ops.convert_to_tensor(shift, name="shift")
+        dtype = shift.dtype.base_dtype
+      self._shift = shift
+
+      # When no args are specified, pretend the scale matrix is the identity
+      # matrix.
+      if (self._is_only_identity_multiplier and
+          scale_identity_multiplier is None):
+        scale_identity_multiplier = ops.convert_to_tensor(1., dtype=dtype)
+
+      # self._create_scale_operator returns a LinearOperator in all cases
+      # except if self._is_only_identity_multiplier; in which case it
+      # returns a scalar Tensor.
+      scale = self._create_scale_operator(
+          identity_multiplier=scale_identity_multiplier,
+          diag=scale_diag,
+          tril=scale_tril,
+          perturb_diag=scale_perturb_diag,
+          perturb_factor=scale_perturb_factor,
+          shift=shift,
+          validate_args=validate_args)
+
+      if scale.dtype is not None:
+        dtype = scale.dtype.base_dtype
+
+      if scale is not None and not self._is_only_identity_multiplier:
+        if (shift is not None and
+            shift.dtype.base_dtype != scale.dtype.base_dtype):
+          raise TypeError(
+              "shift.dtype({}) is incompatible with scale.dtype({}).".format(
+                  shift.dtype, scale.dtype))
+
+        if scale.tensor_rank is not None:
+          batch_ndims = scale.tensor_rank - 2
+        else:
+          batch_ndims = scale.tensor_rank_tensor() - 2
+      else:
+        # We won't need shape inference when scale is None or when scale is a
+        # scalar.
+        batch_ndims = 0
+      self._scale = scale
+      self._shaper = _DistributionShape(
+          batch_ndims=batch_ndims,
+          event_ndims=event_ndims,
+          validate_args=validate_args)
+      super(Affine, self).__init__(
+          event_ndims=event_ndims,
+          graph_parents=(
+              [event_ndims] +
+              [self._scale] if tensor_util.is_tensor(self._scale)
+              else self._scale.graph_parents +
+              [self._shift] if self._shift is not None else []),
+          is_constant_jacobian=True,
+          dtype=dtype,
+          validate_args=validate_args,
+          name=name)
+
+  def _create_scale_operator(self, identity_multiplier, diag, tril,
+                             perturb_diag, perturb_factor, shift,
+                             validate_args):
+    """Construct `scale` from various components.
+
+    Args:
+      identity_multiplier: floating point rank 0 `Tensor` representing a scaling
+        done to the identity matrix.
+      diag: Floating-point `Tensor` representing the diagonal matrix.
+        `scale_diag` has shape [N1, N2, ...  k], which represents a k x k
+        diagonal matrix.
+      tril: Floating-point `Tensor` representing the diagonal matrix.
+        `scale_tril` has shape [N1, N2, ...  k], which represents a k x k lower
+        triangular matrix.
+      perturb_diag: Floating-point `Tensor` representing the diagonal matrix of
+        the low rank update.
+      perturb_factor: Floating-point `Tensor` representing factor matrix.
+      shift: Floating-point `Tensor` representing `shift in `scale @ X + shift`.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+
+    Returns:
+      scale. In the case of scaling by a constant, scale is a
+      floating point `Tensor`. Otherwise, scale is a `LinearOperator`.
+
+    Raises:
+      ValueError: if all of `tril`, `diag` and `identity_multiplier` are `None`.
+    """
+    identity_multiplier = _as_tensor(identity_multiplier, "identity_multiplier")
+    diag = _as_tensor(diag, "diag")
+    tril = _as_tensor(tril, "tril")
+    perturb_diag = _as_tensor(perturb_diag, "perturb_diag")
+    perturb_factor = _as_tensor(perturb_factor, "perturb_factor")
+
+    # If possible, use the low rank update to infer the shape of
+    # the identity matrix, when scale represents a scaled identity matrix
+    # with a low rank update.
+    shape_hint = None
+    if perturb_factor is not None:
+      shape_hint = distribution_util.dimension_size(perturb_factor, axis=-2)
+
+    if self._is_only_identity_multiplier:
+      if validate_args:
+        return control_flow_ops.with_dependencies(
+            [check_ops.assert_none_equal(
+                identity_multiplier,
+                array_ops.zeros([], identity_multiplier.dtype),
+                ["identity_multiplier should be non-zero."])],
+            identity_multiplier)
+      return identity_multiplier
+
+    scale = distribution_util.make_tril_scale(
+        loc=shift,
+        scale_tril=tril,
+        scale_diag=diag,
+        scale_identity_multiplier=identity_multiplier,
+        validate_args=validate_args,
+        assert_positive=False,
+        shape_hint=shape_hint)
+
+    if perturb_factor is not None:
+      return linalg.LinearOperatorLowRankUpdate(
+          scale,
+          u=perturb_factor,
+          diag_update=perturb_diag,
+          is_diag_update_positive=perturb_diag is None,
+          is_non_singular=True,  # Implied by is_positive_definite=True.
+          is_self_adjoint=True,
+          is_positive_definite=True,
+          is_square=True)
+
+    return scale
+
+  @property
+  def shift(self):
+    """The `shift` `Tensor` in `Y = scale @ X + shift`."""
+    return self._shift
+
+  @property
+  def scale(self):
+    """The `scale` `LinearOperator` in `Y = scale @ X + shift`."""
+    return self._scale
+
+  def _forward(self, x):
+    y = x
+    if self._is_only_identity_multiplier:
+      y *= self._scale
+      if self.shift is not None:
+        return y + self.shift
+      return y
+    y, sample_shape = self._shaper.make_batch_of_event_sample_matrices(
+        y, expand_batch_dim=False)
+    with ops.control_dependencies(self._maybe_check_scale() if
+                                  self.validate_args else []):
+      y = self.scale.matmul(y)
+    y = self._shaper.undo_make_batch_of_event_sample_matrices(
+        y, sample_shape, expand_batch_dim=False)
+    if self.shift is not None:
+      y += self.shift
+    return y
+
+  def _inverse(self, y):
+    x = y
+    if self.shift is not None:
+      x -= self.shift
+    if self._is_only_identity_multiplier:
+      return x / self._scale
+
+    x, sample_shape = self._shaper.make_batch_of_event_sample_matrices(
+        x, expand_batch_dim=False)
+    # Solve fails if the op is singular so we may safely skip this assertion.
+    x = self.scale.solve(x)
+    x = self._shaper.undo_make_batch_of_event_sample_matrices(
+        x, sample_shape, expand_batch_dim=False)
+    return x
+
+  def _inverse_log_det_jacobian(self, y):
+    return -self._forward_log_det_jacobian(y)
+
+  def _forward_log_det_jacobian(self, x):
+    if self._is_only_identity_multiplier:
+      # We don't pad in this case and instead let the fldj be applied
+      # via broadcast.
+      event_size = distribution_util.pick_vector(
+          math_ops.equal(self._shaper.event_ndims, 0),
+          [1], array_ops.shape(x))[-1]
+      event_size = math_ops.cast(event_size, dtype=self._scale.dtype)
+      return math_ops.log(math_ops.abs(self._scale)) * event_size
+    return self.scale.log_abs_determinant()
+
+  def _maybe_check_scale(self):
+    try:
+      return [self.scale.assert_non_singular()]
+    except NotImplementedError:
+      pass
+    return []
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/affine_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/affine_impl.py
deleted file mode 100644
index 05bb9c2f9bdf35e222c94db3491157893da64ebd..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/affine_impl.py
+++ /dev/null
@@ -1,403 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Affine bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib import linalg
-from tensorflow.contrib.distributions.python.ops import distribution_util
-from tensorflow.contrib.distributions.python.ops.shape import _DistributionShape
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops.distributions import bijector
-
-
-__all__ = [
-    "Affine",
-]
-
-
-def _as_tensor(x, name):
-  """Convenience to convert to `Tensor` or leave as `None`."""
-  return None if x is None else ops.convert_to_tensor(x, name=name)
-
-
-class Affine(bijector.Bijector):
-  """Compute `Y = g(X; shift, scale) = scale @ X + shift`.
-
-  Here `scale = c * I + diag(D1) + tril(L) + V @ diag(D2) @ V.T`.
-
-  In TF parlance, the `scale` term is logically equivalent to:
-
-  ```python
-  scale = (
-    scale_identity_multiplier * tf.diag(tf.ones(d)) +
-    tf.diag(scale_diag) +
-    scale_tril +
-    scale_perturb_factor @ diag(scale_perturb_diag) @
-      tf.transpose([scale_perturb_factor])
-  )
-  ```
-
-  The `scale` term is applied without necessarily materializing constituent
-  matrices, i.e., the matmul is [matrix-free](
-  https://en.wikipedia.org/wiki/Matrix-free_methods) when possible.
-
-  Examples:
-
-  ```python
-  # Y = X
-  b = Affine()
-
-  # Y = X + shift
-  b = Affine(shift=[1., 2, 3])
-
-  # Y = 2 * I @ X.T + shift
-  b = Affine(shift=[1., 2, 3],
-             scale_identity_multiplier=2.)
-
-  # Y = tf.diag(d1) @ X.T + shift
-  b = Affine(shift=[1., 2, 3],
-             scale_diag=[-1., 2, 1])         # Implicitly 3x3.
-
-  # Y = (I + v * v.T) @ X.T + shift
-  b = Affine(shift=[1., 2, 3],
-             scale_perturb_factor=[[1., 0],
-                                   [0, 1],
-                                   [1, 1]])
-
-  # Y = (diag(d1) + v * diag(d2) * v.T) @ X.T + shift
-  b = Affine(shift=[1., 2, 3],
-             scale_diag=[1., 3, 3],          # Implicitly 3x3.
-             scale_perturb_diag=[2., 1],     # Implicitly 2x2.
-             scale_perturb_factor=[[1., 0],
-                                   [0, 1],
-                                   [1, 1]])
-
-  ```
-
-  """
-
-  def __init__(self,
-               shift=None,
-               scale_identity_multiplier=None,
-               scale_diag=None,
-               scale_tril=None,
-               scale_perturb_factor=None,
-               scale_perturb_diag=None,
-               event_ndims=1,
-               validate_args=False,
-               name="affine"):
-    """Instantiates the `Affine` bijector.
-
-    This `Bijector` is initialized with `shift` `Tensor` and `scale` arguments,
-    giving the forward operation:
-
-    ```none
-    Y = g(X) = scale @ X + shift
-    ```
-
-    where the `scale` term is logically equivalent to:
-
-    ```python
-    scale = (
-      scale_identity_multiplier * tf.diag(tf.ones(d)) +
-      tf.diag(scale_diag) +
-      scale_tril +
-      scale_perturb_factor @ diag(scale_perturb_diag) @
-        tf.transpose([scale_perturb_factor])
-    )
-    ```
-
-    If none of `scale_identity_multiplier`, `scale_diag`, or `scale_tril` are
-    specified then `scale += IdentityMatrix`. Otherwise specifying a
-    `scale` argument has the semantics of `scale += Expand(arg)`, i.e.,
-    `scale_diag != None` means `scale += tf.diag(scale_diag)`.
-
-    Args:
-      shift: Floating-point `Tensor`. If this is set to `None`, no shift is
-        applied.
-      scale_identity_multiplier: floating point rank 0 `Tensor` representing a
-        scaling done to the identity matrix.
-        When `scale_identity_multiplier = scale_diag = scale_tril = None` then
-        `scale += IdentityMatrix`. Otherwise no scaled-identity-matrix is added
-        to `scale`.
-      scale_diag: Floating-point `Tensor` representing the diagonal matrix.
-        `scale_diag` has shape [N1, N2, ...  k], which represents a k x k
-        diagonal matrix.
-        When `None` no diagonal term is added to `scale`.
-      scale_tril: Floating-point `Tensor` representing the diagonal matrix.
-        `scale_diag` has shape [N1, N2, ...  k, k], which represents a k x k
-        lower triangular matrix.
-        When `None` no `scale_tril` term is added to `scale`.
-        The upper triangular elements above the diagonal are ignored.
-      scale_perturb_factor: Floating-point `Tensor` representing factor matrix
-        with last two dimensions of shape `(k, r)`. When `None`, no rank-r
-        update is added to `scale`.
-      scale_perturb_diag: Floating-point `Tensor` representing the diagonal
-        matrix. `scale_perturb_diag` has shape [N1, N2, ...  r], which
-        represents an `r x r` diagonal matrix. When `None` low rank updates will
-        take the form `scale_perturb_factor * scale_perturb_factor.T`.
-      event_ndims: Scalar `int` `Tensor` indicating the number of dimensions
-        associated with a particular draw from the distribution. Must be 0 or 1.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-      name: Python `str` name given to ops managed by this object.
-
-    Raises:
-      ValueError: if `perturb_diag` is specified but not `perturb_factor`.
-      TypeError: if `shift` has different `dtype` from `scale` arguments.
-    """
-    self._graph_parents = []
-    self._name = name
-    self._validate_args = validate_args
-
-    # Ambiguous definition of low rank update.
-    if scale_perturb_diag is not None and scale_perturb_factor is None:
-      raise ValueError("When scale_perturb_diag is specified, "
-                       "scale_perturb_factor must be specified.")
-
-    # Special case, only handling a scaled identity matrix. We don't know its
-    # dimensions, so this is special cased.
-    # We don't check identity_multiplier, since below we set it to 1. if all
-    # other scale args are None.
-    self._is_only_identity_multiplier = (scale_tril is None and
-                                         scale_diag is None and
-                                         scale_perturb_factor is None)
-
-    with self._name_scope("init", values=[
-        shift, scale_identity_multiplier, scale_diag, scale_tril,
-        scale_perturb_diag, scale_perturb_factor]):
-      event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims")
-      event_ndims_const = tensor_util.constant_value(event_ndims)
-      if event_ndims_const is not None and event_ndims_const not in (0, 1):
-        raise ValueError("event_ndims(%s) was not 0 or 1" % event_ndims_const)
-      else:
-        if validate_args:
-          # Shape tool will catch if event_ndims is negative.
-          event_ndims = control_flow_ops.with_dependencies(
-              [check_ops.assert_less(
-                  event_ndims, 2, message="event_ndims must be 0 or 1")],
-              event_ndims)
-
-      if event_ndims_const == 0 and not self._is_only_identity_multiplier:
-        raise ValueError(
-            "If event_ndims == 0, the only scale argument you can pass is "
-            "scale_identity_multiplier.  All others operate on vectors.")
-
-      # In the absence of `loc` and `scale`, we'll assume `dtype` is `float32`.
-      dtype = dtypes.float32
-
-      if shift is not None:
-        shift = ops.convert_to_tensor(shift, name="shift")
-        dtype = shift.dtype.base_dtype
-      self._shift = shift
-
-      # When no args are specified, pretend the scale matrix is the identity
-      # matrix.
-      if (self._is_only_identity_multiplier and
-          scale_identity_multiplier is None):
-        scale_identity_multiplier = ops.convert_to_tensor(1., dtype=dtype)
-
-      # self._create_scale_operator returns a LinearOperator in all cases
-      # except if self._is_only_identity_multiplier; in which case it
-      # returns a scalar Tensor.
-      scale = self._create_scale_operator(
-          identity_multiplier=scale_identity_multiplier,
-          diag=scale_diag,
-          tril=scale_tril,
-          perturb_diag=scale_perturb_diag,
-          perturb_factor=scale_perturb_factor,
-          shift=shift,
-          validate_args=validate_args)
-
-      if scale.dtype is not None:
-        dtype = scale.dtype.base_dtype
-
-      if scale is not None and not self._is_only_identity_multiplier:
-        if (shift is not None and
-            shift.dtype.base_dtype != scale.dtype.base_dtype):
-          raise TypeError(
-              "shift.dtype({}) is incompatible with scale.dtype({}).".format(
-                  shift.dtype, scale.dtype))
-
-        if scale.tensor_rank is not None:
-          batch_ndims = scale.tensor_rank - 2
-        else:
-          batch_ndims = scale.tensor_rank_tensor() - 2
-      else:
-        # We won't need shape inference when scale is None or when scale is a
-        # scalar.
-        batch_ndims = 0
-      self._scale = scale
-      self._shaper = _DistributionShape(
-          batch_ndims=batch_ndims,
-          event_ndims=event_ndims,
-          validate_args=validate_args)
-      super(Affine, self).__init__(
-          event_ndims=event_ndims,
-          graph_parents=(
-              [event_ndims] +
-              [self._scale] if tensor_util.is_tensor(self._scale)
-              else self._scale.graph_parents +
-              [self._shift] if self._shift is not None else []),
-          is_constant_jacobian=True,
-          dtype=dtype,
-          validate_args=validate_args,
-          name=name)
-
-  def _create_scale_operator(self, identity_multiplier, diag, tril,
-                             perturb_diag, perturb_factor, shift,
-                             validate_args):
-    """Construct `scale` from various components.
-
-    Args:
-      identity_multiplier: floating point rank 0 `Tensor` representing a scaling
-        done to the identity matrix.
-      diag: Floating-point `Tensor` representing the diagonal matrix.
-        `scale_diag` has shape [N1, N2, ...  k], which represents a k x k
-        diagonal matrix.
-      tril: Floating-point `Tensor` representing the diagonal matrix.
-        `scale_tril` has shape [N1, N2, ...  k], which represents a k x k lower
-        triangular matrix.
-      perturb_diag: Floating-point `Tensor` representing the diagonal matrix of
-        the low rank update.
-      perturb_factor: Floating-point `Tensor` representing factor matrix.
-      shift: Floating-point `Tensor` representing `shift in `scale @ X + shift`.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-
-    Returns:
-      scale. In the case of scaling by a constant, scale is a
-      floating point `Tensor`. Otherwise, scale is a `LinearOperator`.
-
-    Raises:
-      ValueError: if all of `tril`, `diag` and `identity_multiplier` are `None`.
-    """
-    identity_multiplier = _as_tensor(identity_multiplier, "identity_multiplier")
-    diag = _as_tensor(diag, "diag")
-    tril = _as_tensor(tril, "tril")
-    perturb_diag = _as_tensor(perturb_diag, "perturb_diag")
-    perturb_factor = _as_tensor(perturb_factor, "perturb_factor")
-
-    # If possible, use the low rank update to infer the shape of
-    # the identity matrix, when scale represents a scaled identity matrix
-    # with a low rank update.
-    shape_hint = None
-    if perturb_factor is not None:
-      shape_hint = distribution_util.dimension_size(perturb_factor, axis=-2)
-
-    if self._is_only_identity_multiplier:
-      if validate_args:
-        return control_flow_ops.with_dependencies(
-            [check_ops.assert_none_equal(
-                identity_multiplier,
-                array_ops.zeros([], identity_multiplier.dtype),
-                ["identity_multiplier should be non-zero."])],
-            identity_multiplier)
-      return identity_multiplier
-
-    scale = distribution_util.make_tril_scale(
-        loc=shift,
-        scale_tril=tril,
-        scale_diag=diag,
-        scale_identity_multiplier=identity_multiplier,
-        validate_args=validate_args,
-        assert_positive=False,
-        shape_hint=shape_hint)
-
-    if perturb_factor is not None:
-      return linalg.LinearOperatorLowRankUpdate(
-          scale,
-          u=perturb_factor,
-          diag_update=perturb_diag,
-          is_diag_update_positive=perturb_diag is None,
-          is_non_singular=True,  # Implied by is_positive_definite=True.
-          is_self_adjoint=True,
-          is_positive_definite=True,
-          is_square=True)
-
-    return scale
-
-  @property
-  def shift(self):
-    """The `shift` `Tensor` in `Y = scale @ X + shift`."""
-    return self._shift
-
-  @property
-  def scale(self):
-    """The `scale` `LinearOperator` in `Y = scale @ X + shift`."""
-    return self._scale
-
-  def _forward(self, x):
-    y = x
-    if self._is_only_identity_multiplier:
-      y *= self._scale
-      if self.shift is not None:
-        return y + self.shift
-      return y
-    y, sample_shape = self._shaper.make_batch_of_event_sample_matrices(
-        y, expand_batch_dim=False)
-    with ops.control_dependencies(self._maybe_check_scale() if
-                                  self.validate_args else []):
-      y = self.scale.matmul(y)
-    y = self._shaper.undo_make_batch_of_event_sample_matrices(
-        y, sample_shape, expand_batch_dim=False)
-    if self.shift is not None:
-      y += self.shift
-    return y
-
-  def _inverse(self, y):
-    x = y
-    if self.shift is not None:
-      x -= self.shift
-    if self._is_only_identity_multiplier:
-      return x / self._scale
-
-    x, sample_shape = self._shaper.make_batch_of_event_sample_matrices(
-        x, expand_batch_dim=False)
-    # Solve fails if the op is singular so we may safely skip this assertion.
-    x = self.scale.solve(x)
-    x = self._shaper.undo_make_batch_of_event_sample_matrices(
-        x, sample_shape, expand_batch_dim=False)
-    return x
-
-  def _inverse_log_det_jacobian(self, y):
-    return -self._forward_log_det_jacobian(y)
-
-  def _forward_log_det_jacobian(self, x):
-    if self._is_only_identity_multiplier:
-      # We don't pad in this case and instead let the fldj be applied
-      # via broadcast.
-      event_size = distribution_util.pick_vector(
-          math_ops.equal(self._shaper.event_ndims, 0),
-          [1], array_ops.shape(x))[-1]
-      event_size = math_ops.cast(event_size, dtype=self._scale.dtype)
-      return math_ops.log(math_ops.abs(self._scale)) * event_size
-    return self.scale.log_abs_determinant()
-
-  def _maybe_check_scale(self):
-    try:
-      return [self.scale.assert_non_singular()]
-    except NotImplementedError:
-      pass
-    return []
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/affine_linear_operator.py b/tensorflow/contrib/distributions/python/ops/bijectors/affine_linear_operator.py
index aca04a89df7c3ee09d5f7cc10f6779e33fa7aa66..89043b1410370074f11f2cfa59b6b6663fa62521 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/affine_linear_operator.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/affine_linear_operator.py
@@ -18,12 +18,214 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.affine_linear_operator_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.contrib.distributions.python.ops.shape import _DistributionShape
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops.distributions import bijector
+from tensorflow.python.ops.linalg import linear_operator
 
-_allowed_symbols = ["AffineLinearOperator"]
 
-remove_undocumented(__name__, _allowed_symbols)
+__all__ = [
+    "AffineLinearOperator",
+]
+
+
+class AffineLinearOperator(bijector.Bijector):
+  """Compute `Y = g(X; shift, scale) = scale @ X + shift`.
+
+  `shift` is a numeric `Tensor` and `scale` is a `LinearOperator`.
+
+  If `X` is a scalar then the forward transformation is: `scale * X + shift`
+  where `*` denotes the scalar product.
+
+  Note: we don't always simply transpose `X` (but write it this way for
+  brevity). Actually the input `X` undergoes the following transformation
+  before being premultiplied by `scale`:
+
+  1. If there are no sample dims, we call `X = tf.expand_dims(X, 0)`, i.e.,
+     `new_sample_shape = [1]`. Otherwise do nothing.
+  2. The sample shape is flattened to have one dimension, i.e.,
+     `new_sample_shape = [n]` where `n = tf.reduce_prod(old_sample_shape)`.
+  3. The sample dim is cyclically rotated left by 1, i.e.,
+     `new_shape = [B1,...,Bb, k, n]` where `n` is as above, `k` is the
+     event_shape, and `B1,...,Bb` are the batch shapes for each of `b` batch
+     dimensions.
+
+  (For more details see `shape.make_batch_of_event_sample_matrices`.)
+
+  The result of the above transformation is that `X` can be regarded as a batch
+  of matrices where each column is a draw from the distribution. After
+  premultiplying by `scale`, we take the inverse of this procedure. The input
+  `Y` also undergoes the same transformation before/after premultiplying by
+  `inv(scale)`.
+
+  Example Use:
+
+  ```python
+  linalg = tf.linalg
+
+  x = [1., 2, 3]
+
+  shift = [-1., 0., 1]
+  diag = [1., 2, 3]
+  scale = linalg.LinearOperatorDiag(diag)
+  affine = AffineLinearOperator(shift, scale)
+  # In this case, `forward` is equivalent to:
+  # y = scale @ x + shift
+  y = affine.forward(x)  # [0., 4, 10]
+
+  shift = [2., 3, 1]
+  tril = [[1., 0, 0],
+          [2, 1, 0],
+          [3, 2, 1]]
+  scale = linalg.LinearOperatorLowerTriangular(tril)
+  affine = AffineLinearOperator(shift, scale)
+  # In this case, `forward` is equivalent to:
+  # np.squeeze(np.matmul(tril, np.expand_dims(x, -1)), -1) + shift
+  y = affine.forward(x)  # [3., 7, 11]
+  ```
+
+  """
+
+  def __init__(self,
+               shift=None,
+               scale=None,
+               event_ndims=1,
+               validate_args=False,
+               name="affine_linear_operator"):
+    """Instantiates the `AffineLinearOperator` bijector.
+
+    Args:
+      shift: Floating-point `Tensor`.
+      scale:  Subclass of `LinearOperator`. Represents the (batch) positive
+        definite matrix `M` in `R^{k x k}`.
+      event_ndims: Scalar `integer` `Tensor` indicating the number of dimensions
+        associated with a particular draw from the distribution. Must be 0 or 1.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+      name: Python `str` name given to ops managed by this object.
+
+    Raises:
+      ValueError: if `event_ndims` is not 0 or 1.
+      TypeError: if `scale` is not a `LinearOperator`.
+      TypeError: if `shift.dtype` does not match `scale.dtype`.
+      ValueError: if not `scale.is_non_singular`.
+    """
+    self._graph_parents = []
+    self._name = name
+    self._validate_args = validate_args
+    graph_parents = []
+    with self._name_scope("init", values=[shift]):
+      event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims")
+      if tensor_util.constant_value(event_ndims) is not None:
+        event_ndims = tensor_util.constant_value(event_ndims)
+        if event_ndims not in (0, 1):
+          raise ValueError("event_ndims({}) was not 0 or 1".format(event_ndims))
+      else:
+        if validate_args:
+          # Shape tool will catch if event_ndims is negative.
+          event_ndims = control_flow_ops.with_dependencies(
+              [check_ops.assert_less(
+                  event_ndims, 2, message="event_ndims must be 0 or 1")],
+              event_ndims)
+        graph_parents += [event_ndims]
+
+      # In the absence of `loc` and `scale`, we'll assume `dtype` is `float32`.
+      dtype = dtypes.float32
+
+      if shift is not None:
+        shift = ops.convert_to_tensor(shift, name="shift")
+        graph_parents += [shift]
+        dtype = shift.dtype.base_dtype
+      self._shift = shift
+
+      if scale is not None:
+        if (shift is not None and
+            shift.dtype.base_dtype != scale.dtype.base_dtype):
+          raise TypeError(
+              "shift.dtype({}) is incompatible with scale.dtype({}).".format(
+                  shift.dtype, scale.dtype))
+        if not isinstance(scale, linear_operator.LinearOperator):
+          raise TypeError("scale is not an instance of tf.LinearOperator")
+        if validate_args and not scale.is_non_singular:
+          raise ValueError("Scale matrix must be non-singular.")
+        graph_parents += scale.graph_parents
+        if scale.tensor_rank is not None:
+          batch_ndims = scale.tensor_rank - 2
+        else:
+          batch_ndims = scale.tensor_rank_tensor() - 2
+          graph_parents += [batch_ndims]
+        if scale.dtype is not None:
+          dtype = scale.dtype.base_dtype
+      else:
+        batch_ndims = 0  # We won't need shape inference when scale is None.
+      self._scale = scale
+      self._shaper = _DistributionShape(
+          batch_ndims=batch_ndims,
+          event_ndims=event_ndims,
+          validate_args=validate_args)
+      super(AffineLinearOperator, self).__init__(
+          event_ndims=event_ndims,
+          graph_parents=graph_parents,
+          is_constant_jacobian=True,
+          dtype=dtype,
+          validate_args=validate_args,
+          name=name)
+
+  @property
+  def shift(self):
+    """The `shift` `Tensor` in `Y = scale @ X + shift`."""
+    return self._shift
+
+  @property
+  def scale(self):
+    """The `scale` `LinearOperator` in `Y = scale @ X + shift`."""
+    return self._scale
+
+  def _forward(self, x):
+    y = x
+    if self.scale is not None:
+      y, sample_shape = self._shaper.make_batch_of_event_sample_matrices(
+          y, expand_batch_dim=False)
+      with ops.control_dependencies(self._maybe_collect_assertions() if
+                                    self.validate_args else []):
+        y = self.scale.matmul(y)
+      y = self._shaper.undo_make_batch_of_event_sample_matrices(
+          y, sample_shape, expand_batch_dim=False)
+    if self.shift is not None:
+      y += self.shift
+    return y
+
+  def _inverse(self, y):
+    x = y
+    if self.shift is not None:
+      x -= self.shift
+    if self.scale is not None:
+      x, sample_shape = self._shaper.make_batch_of_event_sample_matrices(
+          x, expand_batch_dim=False)
+      # Solve fails if the op is singular so we may safely skip this assertion.
+      x = self.scale.solve(x)
+      x = self._shaper.undo_make_batch_of_event_sample_matrices(
+          x, sample_shape, expand_batch_dim=False)
+    return x
+
+  def _inverse_log_det_jacobian(self, y):
+    return -self._forward_log_det_jacobian(y)
+
+  def _forward_log_det_jacobian(self, x):  # pylint: disable=unused-argument
+    if self.scale is None:
+      return constant_op.constant(0, dtype=x.dtype.base_dtype)
+    with ops.control_dependencies(self._maybe_collect_assertions() if
+                                  self.validate_args else []):
+      return self.scale.log_abs_determinant()
+
+  def _maybe_collect_assertions(self):
+    try:
+      return [self.scale.assert_non_singular()]
+    except NotImplementedError:
+      pass
+    return []
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/affine_linear_operator_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/affine_linear_operator_impl.py
deleted file mode 100644
index 89043b1410370074f11f2cfa59b6b6663fa62521..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/affine_linear_operator_impl.py
+++ /dev/null
@@ -1,231 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""AffineLinearOperator bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib.distributions.python.ops.shape import _DistributionShape
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops.distributions import bijector
-from tensorflow.python.ops.linalg import linear_operator
-
-
-__all__ = [
-    "AffineLinearOperator",
-]
-
-
-class AffineLinearOperator(bijector.Bijector):
-  """Compute `Y = g(X; shift, scale) = scale @ X + shift`.
-
-  `shift` is a numeric `Tensor` and `scale` is a `LinearOperator`.
-
-  If `X` is a scalar then the forward transformation is: `scale * X + shift`
-  where `*` denotes the scalar product.
-
-  Note: we don't always simply transpose `X` (but write it this way for
-  brevity). Actually the input `X` undergoes the following transformation
-  before being premultiplied by `scale`:
-
-  1. If there are no sample dims, we call `X = tf.expand_dims(X, 0)`, i.e.,
-     `new_sample_shape = [1]`. Otherwise do nothing.
-  2. The sample shape is flattened to have one dimension, i.e.,
-     `new_sample_shape = [n]` where `n = tf.reduce_prod(old_sample_shape)`.
-  3. The sample dim is cyclically rotated left by 1, i.e.,
-     `new_shape = [B1,...,Bb, k, n]` where `n` is as above, `k` is the
-     event_shape, and `B1,...,Bb` are the batch shapes for each of `b` batch
-     dimensions.
-
-  (For more details see `shape.make_batch_of_event_sample_matrices`.)
-
-  The result of the above transformation is that `X` can be regarded as a batch
-  of matrices where each column is a draw from the distribution. After
-  premultiplying by `scale`, we take the inverse of this procedure. The input
-  `Y` also undergoes the same transformation before/after premultiplying by
-  `inv(scale)`.
-
-  Example Use:
-
-  ```python
-  linalg = tf.linalg
-
-  x = [1., 2, 3]
-
-  shift = [-1., 0., 1]
-  diag = [1., 2, 3]
-  scale = linalg.LinearOperatorDiag(diag)
-  affine = AffineLinearOperator(shift, scale)
-  # In this case, `forward` is equivalent to:
-  # y = scale @ x + shift
-  y = affine.forward(x)  # [0., 4, 10]
-
-  shift = [2., 3, 1]
-  tril = [[1., 0, 0],
-          [2, 1, 0],
-          [3, 2, 1]]
-  scale = linalg.LinearOperatorLowerTriangular(tril)
-  affine = AffineLinearOperator(shift, scale)
-  # In this case, `forward` is equivalent to:
-  # np.squeeze(np.matmul(tril, np.expand_dims(x, -1)), -1) + shift
-  y = affine.forward(x)  # [3., 7, 11]
-  ```
-
-  """
-
-  def __init__(self,
-               shift=None,
-               scale=None,
-               event_ndims=1,
-               validate_args=False,
-               name="affine_linear_operator"):
-    """Instantiates the `AffineLinearOperator` bijector.
-
-    Args:
-      shift: Floating-point `Tensor`.
-      scale:  Subclass of `LinearOperator`. Represents the (batch) positive
-        definite matrix `M` in `R^{k x k}`.
-      event_ndims: Scalar `integer` `Tensor` indicating the number of dimensions
-        associated with a particular draw from the distribution. Must be 0 or 1.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-      name: Python `str` name given to ops managed by this object.
-
-    Raises:
-      ValueError: if `event_ndims` is not 0 or 1.
-      TypeError: if `scale` is not a `LinearOperator`.
-      TypeError: if `shift.dtype` does not match `scale.dtype`.
-      ValueError: if not `scale.is_non_singular`.
-    """
-    self._graph_parents = []
-    self._name = name
-    self._validate_args = validate_args
-    graph_parents = []
-    with self._name_scope("init", values=[shift]):
-      event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims")
-      if tensor_util.constant_value(event_ndims) is not None:
-        event_ndims = tensor_util.constant_value(event_ndims)
-        if event_ndims not in (0, 1):
-          raise ValueError("event_ndims({}) was not 0 or 1".format(event_ndims))
-      else:
-        if validate_args:
-          # Shape tool will catch if event_ndims is negative.
-          event_ndims = control_flow_ops.with_dependencies(
-              [check_ops.assert_less(
-                  event_ndims, 2, message="event_ndims must be 0 or 1")],
-              event_ndims)
-        graph_parents += [event_ndims]
-
-      # In the absence of `loc` and `scale`, we'll assume `dtype` is `float32`.
-      dtype = dtypes.float32
-
-      if shift is not None:
-        shift = ops.convert_to_tensor(shift, name="shift")
-        graph_parents += [shift]
-        dtype = shift.dtype.base_dtype
-      self._shift = shift
-
-      if scale is not None:
-        if (shift is not None and
-            shift.dtype.base_dtype != scale.dtype.base_dtype):
-          raise TypeError(
-              "shift.dtype({}) is incompatible with scale.dtype({}).".format(
-                  shift.dtype, scale.dtype))
-        if not isinstance(scale, linear_operator.LinearOperator):
-          raise TypeError("scale is not an instance of tf.LinearOperator")
-        if validate_args and not scale.is_non_singular:
-          raise ValueError("Scale matrix must be non-singular.")
-        graph_parents += scale.graph_parents
-        if scale.tensor_rank is not None:
-          batch_ndims = scale.tensor_rank - 2
-        else:
-          batch_ndims = scale.tensor_rank_tensor() - 2
-          graph_parents += [batch_ndims]
-        if scale.dtype is not None:
-          dtype = scale.dtype.base_dtype
-      else:
-        batch_ndims = 0  # We won't need shape inference when scale is None.
-      self._scale = scale
-      self._shaper = _DistributionShape(
-          batch_ndims=batch_ndims,
-          event_ndims=event_ndims,
-          validate_args=validate_args)
-      super(AffineLinearOperator, self).__init__(
-          event_ndims=event_ndims,
-          graph_parents=graph_parents,
-          is_constant_jacobian=True,
-          dtype=dtype,
-          validate_args=validate_args,
-          name=name)
-
-  @property
-  def shift(self):
-    """The `shift` `Tensor` in `Y = scale @ X + shift`."""
-    return self._shift
-
-  @property
-  def scale(self):
-    """The `scale` `LinearOperator` in `Y = scale @ X + shift`."""
-    return self._scale
-
-  def _forward(self, x):
-    y = x
-    if self.scale is not None:
-      y, sample_shape = self._shaper.make_batch_of_event_sample_matrices(
-          y, expand_batch_dim=False)
-      with ops.control_dependencies(self._maybe_collect_assertions() if
-                                    self.validate_args else []):
-        y = self.scale.matmul(y)
-      y = self._shaper.undo_make_batch_of_event_sample_matrices(
-          y, sample_shape, expand_batch_dim=False)
-    if self.shift is not None:
-      y += self.shift
-    return y
-
-  def _inverse(self, y):
-    x = y
-    if self.shift is not None:
-      x -= self.shift
-    if self.scale is not None:
-      x, sample_shape = self._shaper.make_batch_of_event_sample_matrices(
-          x, expand_batch_dim=False)
-      # Solve fails if the op is singular so we may safely skip this assertion.
-      x = self.scale.solve(x)
-      x = self._shaper.undo_make_batch_of_event_sample_matrices(
-          x, sample_shape, expand_batch_dim=False)
-    return x
-
-  def _inverse_log_det_jacobian(self, y):
-    return -self._forward_log_det_jacobian(y)
-
-  def _forward_log_det_jacobian(self, x):  # pylint: disable=unused-argument
-    if self.scale is None:
-      return constant_op.constant(0, dtype=x.dtype.base_dtype)
-    with ops.control_dependencies(self._maybe_collect_assertions() if
-                                  self.validate_args else []):
-      return self.scale.log_abs_determinant()
-
-  def _maybe_collect_assertions(self):
-    try:
-      return [self.scale.assert_non_singular()]
-    except NotImplementedError:
-      pass
-    return []
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/chain.py b/tensorflow/contrib/distributions/python/ops/bijectors/chain.py
index 0db10fb75c8483a8209f39370362b05a03d047ca..3ce7c26213034c7345a20faa803c94a1bfa8d579 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/chain.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/chain.py
@@ -18,12 +18,151 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.chain_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+import itertools
 
-_allowed_symbols = ["Chain"]
+from tensorflow.python.framework import constant_op
+from tensorflow.python.ops.distributions import bijector
 
-remove_undocumented(__name__, _allowed_symbols)
+
+__all__ = [
+    "Chain",
+]
+
+
+class Chain(bijector.Bijector):
+  """Bijector which applies a sequence of bijectors.
+
+  Example Use:
+
+  ```python
+  chain = Chain([Exp(), Softplus()], name="one_plus_exp")
+  ```
+
+  Results in:
+
+  * Forward:
+
+   ```python
+   exp = Exp()
+   softplus = Softplus()
+   Chain([exp, softplus]).forward(x)
+   = exp.forward(softplus.forward(x))
+   = tf.exp(tf.log(1. + tf.exp(x)))
+   = 1. + tf.exp(x)
+   ```
+
+  * Inverse:
+
+   ```python
+   exp = Exp()
+   softplus = Softplus()
+   Chain([exp, softplus]).inverse(y)
+   = softplus.inverse(exp.inverse(y))
+   = tf.log(tf.exp(tf.log(y)) - 1.)
+   = tf.log(y - 1.)
+   ```
+
+  """
+
+  def __init__(self, bijectors=None, validate_args=False, name=None):
+    """Instantiates `Chain` bijector.
+
+    Args:
+      bijectors: Python `list` of bijector instances. An empty list makes this
+        bijector equivalent to the `Identity` bijector.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+      name: Python `str`, name given to ops managed by this object. Default:
+        E.g., `Chain([Exp(), Softplus()]).name == "chain_of_exp_of_softplus"`.
+
+    Raises:
+      ValueError: if bijectors have different dtypes.
+    """
+    if bijectors is None:
+      bijectors = ()
+    self._bijectors = bijectors
+
+    for a_bijector in bijectors:
+      if not a_bijector._is_injective:  # pylint: disable=protected-access
+        raise NotImplementedError(
+            "Invert is not implemented for non-injective bijector ({})".format(
+                a_bijector.name))
+
+    dtype = list(set([b.dtype for b in bijectors]))
+    if len(dtype) > 2:
+      raise ValueError("incompatible dtypes: %s" % dtype)
+    elif len(dtype) == 2:
+      dtype = dtype[1] if dtype[0] is None else dtype[0]
+      event_ndims = bijectors[0].event_ndims
+    elif len(dtype) == 1:
+      dtype = dtype[0]
+      event_ndims = bijectors[0].event_ndims
+    else:
+      dtype = None
+      event_ndims = None
+
+    super(Chain, self).__init__(
+        graph_parents=list(itertools.chain.from_iterable(
+            b.graph_parents for b in bijectors)),
+        is_constant_jacobian=all(b.is_constant_jacobian for b in bijectors),
+        validate_args=validate_args,
+        dtype=dtype,
+        event_ndims=event_ndims,
+        name=name or ("identity" if not bijectors else
+                      "_of_".join(["chain"] + [b.name for b in bijectors])))
+
+  @property
+  def bijectors(self):
+    return self._bijectors
+
+  def _shape_helper(self, func_name, input_shape, reverse):
+    new_shape = input_shape
+    for b in reversed(self.bijectors) if reverse else self.bijectors:
+      func = getattr(b, func_name, None)
+      if func is None:
+        raise ValueError("unable to call %s on bijector %s (%s)" %
+                         (func_name, b.name, func))
+      new_shape = func(new_shape)
+    return new_shape
+
+  def _forward_event_shape(self, input_shape):
+    return self._shape_helper("forward_event_shape", input_shape,
+                              reverse=True)
+
+  def _forward_event_shape_tensor(self, input_shape):
+    return self._shape_helper(
+        "forward_event_shape_tensor", input_shape, reverse=True)
+
+  def _inverse_event_shape(self, output_shape):
+    return self._shape_helper("inverse_event_shape", output_shape,
+                              reverse=False)
+
+  def _inverse_event_shape_tensor(self, output_shape):
+    return self._shape_helper("inverse_event_shape_tensor", output_shape,
+                              reverse=False)
+
+  def _inverse(self, y, **kwargs):
+    for b in self.bijectors:
+      y = b.inverse(y, **kwargs.get(b.name, {}))
+    return y
+
+  def _inverse_log_det_jacobian(self, y, **kwargs):
+    ildj = constant_op.constant(0., dtype=y.dtype,
+                                name="inverse_log_det_jacobian")
+    for b in self.bijectors:
+      ildj += b.inverse_log_det_jacobian(y, **kwargs.get(b.name, {}))
+      y = b.inverse(y, **kwargs.get(b.name, {}))
+    return ildj
+
+  def _forward(self, x, **kwargs):
+    for b in reversed(self.bijectors):
+      x = b.forward(x, **kwargs.get(b.name, {}))
+    return x
+
+  def _forward_log_det_jacobian(self, x, **kwargs):
+    fldj = constant_op.constant(0., dtype=x.dtype,
+                                name="forward_log_det_jacobian")
+    for b in reversed(self.bijectors):
+      fldj += b.forward_log_det_jacobian(x, **kwargs.get(b.name, {}))
+      x = b.forward(x, **kwargs.get(b.name, {}))
+    return fldj
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/chain_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/chain_impl.py
deleted file mode 100644
index 3ce7c26213034c7345a20faa803c94a1bfa8d579..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/chain_impl.py
+++ /dev/null
@@ -1,168 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Chain bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import itertools
-
-from tensorflow.python.framework import constant_op
-from tensorflow.python.ops.distributions import bijector
-
-
-__all__ = [
-    "Chain",
-]
-
-
-class Chain(bijector.Bijector):
-  """Bijector which applies a sequence of bijectors.
-
-  Example Use:
-
-  ```python
-  chain = Chain([Exp(), Softplus()], name="one_plus_exp")
-  ```
-
-  Results in:
-
-  * Forward:
-
-   ```python
-   exp = Exp()
-   softplus = Softplus()
-   Chain([exp, softplus]).forward(x)
-   = exp.forward(softplus.forward(x))
-   = tf.exp(tf.log(1. + tf.exp(x)))
-   = 1. + tf.exp(x)
-   ```
-
-  * Inverse:
-
-   ```python
-   exp = Exp()
-   softplus = Softplus()
-   Chain([exp, softplus]).inverse(y)
-   = softplus.inverse(exp.inverse(y))
-   = tf.log(tf.exp(tf.log(y)) - 1.)
-   = tf.log(y - 1.)
-   ```
-
-  """
-
-  def __init__(self, bijectors=None, validate_args=False, name=None):
-    """Instantiates `Chain` bijector.
-
-    Args:
-      bijectors: Python `list` of bijector instances. An empty list makes this
-        bijector equivalent to the `Identity` bijector.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-      name: Python `str`, name given to ops managed by this object. Default:
-        E.g., `Chain([Exp(), Softplus()]).name == "chain_of_exp_of_softplus"`.
-
-    Raises:
-      ValueError: if bijectors have different dtypes.
-    """
-    if bijectors is None:
-      bijectors = ()
-    self._bijectors = bijectors
-
-    for a_bijector in bijectors:
-      if not a_bijector._is_injective:  # pylint: disable=protected-access
-        raise NotImplementedError(
-            "Invert is not implemented for non-injective bijector ({})".format(
-                a_bijector.name))
-
-    dtype = list(set([b.dtype for b in bijectors]))
-    if len(dtype) > 2:
-      raise ValueError("incompatible dtypes: %s" % dtype)
-    elif len(dtype) == 2:
-      dtype = dtype[1] if dtype[0] is None else dtype[0]
-      event_ndims = bijectors[0].event_ndims
-    elif len(dtype) == 1:
-      dtype = dtype[0]
-      event_ndims = bijectors[0].event_ndims
-    else:
-      dtype = None
-      event_ndims = None
-
-    super(Chain, self).__init__(
-        graph_parents=list(itertools.chain.from_iterable(
-            b.graph_parents for b in bijectors)),
-        is_constant_jacobian=all(b.is_constant_jacobian for b in bijectors),
-        validate_args=validate_args,
-        dtype=dtype,
-        event_ndims=event_ndims,
-        name=name or ("identity" if not bijectors else
-                      "_of_".join(["chain"] + [b.name for b in bijectors])))
-
-  @property
-  def bijectors(self):
-    return self._bijectors
-
-  def _shape_helper(self, func_name, input_shape, reverse):
-    new_shape = input_shape
-    for b in reversed(self.bijectors) if reverse else self.bijectors:
-      func = getattr(b, func_name, None)
-      if func is None:
-        raise ValueError("unable to call %s on bijector %s (%s)" %
-                         (func_name, b.name, func))
-      new_shape = func(new_shape)
-    return new_shape
-
-  def _forward_event_shape(self, input_shape):
-    return self._shape_helper("forward_event_shape", input_shape,
-                              reverse=True)
-
-  def _forward_event_shape_tensor(self, input_shape):
-    return self._shape_helper(
-        "forward_event_shape_tensor", input_shape, reverse=True)
-
-  def _inverse_event_shape(self, output_shape):
-    return self._shape_helper("inverse_event_shape", output_shape,
-                              reverse=False)
-
-  def _inverse_event_shape_tensor(self, output_shape):
-    return self._shape_helper("inverse_event_shape_tensor", output_shape,
-                              reverse=False)
-
-  def _inverse(self, y, **kwargs):
-    for b in self.bijectors:
-      y = b.inverse(y, **kwargs.get(b.name, {}))
-    return y
-
-  def _inverse_log_det_jacobian(self, y, **kwargs):
-    ildj = constant_op.constant(0., dtype=y.dtype,
-                                name="inverse_log_det_jacobian")
-    for b in self.bijectors:
-      ildj += b.inverse_log_det_jacobian(y, **kwargs.get(b.name, {}))
-      y = b.inverse(y, **kwargs.get(b.name, {}))
-    return ildj
-
-  def _forward(self, x, **kwargs):
-    for b in reversed(self.bijectors):
-      x = b.forward(x, **kwargs.get(b.name, {}))
-    return x
-
-  def _forward_log_det_jacobian(self, x, **kwargs):
-    fldj = constant_op.constant(0., dtype=x.dtype,
-                                name="forward_log_det_jacobian")
-    for b in reversed(self.bijectors):
-      fldj += b.forward_log_det_jacobian(x, **kwargs.get(b.name, {}))
-      x = b.forward(x, **kwargs.get(b.name, {}))
-    return fldj
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py b/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py
index 4686af8bc42a3232cb3a34f2cfcce8323c5896dd..cbd60f92a60612c6cf791b2c7708a3310c6e2b6b 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py
@@ -18,12 +18,219 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.cholesky_outer_product_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+import numpy as np
 
-_allowed_symbols = ["CholeskyOuterProduct"]
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import linalg_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops.distributions import bijector
+from tensorflow.python.ops.distributions import util as distribution_util
 
-remove_undocumented(__name__, _allowed_symbols)
+
+__all__ = [
+    "CholeskyOuterProduct",
+]
+
+
+class CholeskyOuterProduct(bijector.Bijector):
+  """Compute `g(X) = X @ X.T`; X is lower-triangular, positive-diagonal matrix.
+
+  `event_ndims` must be 0 or 2, i.e., scalar or matrix.
+
+  Note: the upper-triangular part of X is ignored (whether or not its zero).
+
+  The surjectivity of g as a map from  the set of n x n positive-diagonal
+  lower-triangular matrices to the set of SPD matrices follows immediately from
+  executing the Cholesky factorization algorithm on an SPD matrix A to produce a
+  positive-diagonal lower-triangular matrix L such that `A = L @ L.T`.
+
+  To prove the injectivity of g, suppose that L_1 and L_2 are lower-triangular
+  with positive diagonals and satisfy `A = L_1 @ L_1.T = L_2 @ L_2.T`. Then
+    `inv(L_1) @ A @ inv(L_1).T = [inv(L_1) @ L_2] @ [inv(L_1) @ L_2].T = I`.
+  Setting `L_3 := inv(L_1) @ L_2`, that L_3 is a positive-diagonal
+  lower-triangular matrix follows from `inv(L_1)` being positive-diagonal
+  lower-triangular (which follows from the diagonal of a triangular matrix being
+  its spectrum), and that the product of two positive-diagonal lower-triangular
+  matrices is another positive-diagonal lower-triangular matrix.
+
+  A simple inductive argument (proceding one column of L_3 at a time) shows
+  that, if `I = L_3 @ L_3.T`, with L_3 being lower-triangular with positive-
+  diagonal, then `L_3 = I`. Thus, `L_1 = L_2`, proving injectivity of g.
+
+  Examples:
+
+  ```python
+  bijector.CholeskyOuterProduct(event_ndims=2).forward(x=[[1., 0], [2, 1]])
+  # Result: [[1., 2], [2, 5]], i.e., x @ x.T
+
+  bijector.CholeskyOuterProduct(event_ndims=2).inverse(y=[[1., 2], [2, 5]])
+  # Result: [[1., 0], [2, 1]], i.e., cholesky(y).
+  ```
+
+  """
+
+  def __init__(self, event_ndims=2, validate_args=False,
+               name="cholesky_outer_product"):
+    """Instantiates the `CholeskyOuterProduct` bijector.
+
+    Args:
+      event_ndims: `constant` `int32` scalar `Tensor` indicating the number of
+        dimensions associated with a particular draw from the distribution. Must
+        be 0 or 2.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+      name: Python `str` name given to ops managed by this object.
+
+    Raises:
+      ValueError: if event_ndims is neither 0 or 2.
+    """
+    self._graph_parents = []
+    self._name = name
+    with self._name_scope("init", values=[event_ndims]):
+      event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims")
+      event_ndims = tensor_util.constant_value(event_ndims)
+    if event_ndims is None or event_ndims not in [0, 2]:
+      raise ValueError("`event_ndims` must be a TF constant which is 0 or 2")
+    self._static_event_ndims = event_ndims
+    super(CholeskyOuterProduct, self).__init__(
+        event_ndims=event_ndims,
+        validate_args=validate_args,
+        name=name)
+
+  def _forward(self, x):
+    if self._static_event_ndims == 0:
+      return math_ops.square(x)
+    if self.validate_args:
+      is_matrix = check_ops.assert_rank_at_least(x, 2)
+      shape = array_ops.shape(x)
+      is_square = check_ops.assert_equal(shape[-2], shape[-1])
+      x = control_flow_ops.with_dependencies([is_matrix, is_square], x)
+    # For safety, explicitly zero-out the upper triangular part.
+    x = array_ops.matrix_band_part(x, -1, 0)
+    return math_ops.matmul(x, x, adjoint_b=True)
+
+  def _inverse(self, y):
+    return (math_ops.sqrt(y) if self._static_event_ndims == 0
+            else linalg_ops.cholesky(y))
+
+  def _inverse_log_det_jacobian(self, y):
+    return -self._forward_log_det_jacobian(x=self._inverse(y))
+
+  def _forward_log_det_jacobian(self, x):
+    # Let Y be a symmetric, positive definite matrix and write:
+    #   Y = X X.T
+    # where X is lower-triangular.
+    #
+    # Observe that,
+    #   dY[i,j]/dX[a,b]
+    #   = d/dX[a,b] { X[i,:] X[j,:] }
+    #   = sum_{d=1}^p { I[i=a] I[d=b] X[j,d] + I[j=a] I[d=b] X[i,d] }
+    #
+    # To compute the Jacobian dX/dY we must represent X,Y as vectors. Since Y is
+    # symmetric and X is lower-triangular, we need vectors of dimension:
+    #   d = p (p + 1) / 2
+    # where X, Y are p x p matrices, p > 0. We use a row-major mapping, i.e.,
+    #   k = { i (i + 1) / 2 + j   i>=j
+    #       { undef               i<j
+    # and assume zero-based indexes. When k is undef, the element is dropped.
+    # Example:
+    #           j      k
+    #        0 1 2 3  /
+    #    0 [ 0 . . . ]
+    # i  1 [ 1 2 . . ]
+    #    2 [ 3 4 5 . ]
+    #    3 [ 6 7 8 9 ]
+    # Write vec[.] to indicate transforming a matrix to vector via k(i,j). (With
+    # slight abuse: k(i,j)=undef means the element is dropped.)
+    #
+    # We now show d vec[Y] / d vec[X] is lower triangular. Assuming both are
+    # defined, observe that k(i,j) < k(a,b) iff (1) i<a or (2) i=a and j<b.
+    # In both cases dvec[Y]/dvec[X]@[k(i,j),k(a,b)] = 0 since:
+    # (1) j<=i<a thus i,j!=a.
+    # (2) i=a>j  thus i,j!=a.
+    #
+    # Since the Jacobian is lower-triangular, we need only compute the product
+    # of diagonal elements:
+    #   d vec[Y] / d vec[X] @[k(i,j), k(i,j)]
+    #   = X[j,j] + I[i=j] X[i,j]
+    #   = 2 X[j,j].
+    # Since there is a 2 X[j,j] term for every lower-triangular element of X we
+    # conclude:
+    #   |Jac(d vec[Y]/d vec[X])| = 2^p prod_{j=0}^{p-1} X[j,j]^{p-j}.
+    if self._static_event_ndims == 0:
+      if self.validate_args:
+        is_positive = check_ops.assert_positive(
+            x, message="All elements must be positive.")
+        x = control_flow_ops.with_dependencies([is_positive], x)
+      return np.log(2.) + math_ops.log(x)
+
+    diag = array_ops.matrix_diag_part(x)
+
+    # We now ensure diag is columnar. Eg, if `diag = [1, 2, 3]` then the output
+    # is `[[1], [2], [3]]` and if `diag = [[1, 2, 3], [4, 5, 6]]` then the
+    # output is unchanged.
+    diag = self._make_columnar(diag)
+
+    if self.validate_args:
+      is_matrix = check_ops.assert_rank_at_least(
+          x, 2, message="Input must be a (batch of) matrix.")
+      shape = array_ops.shape(x)
+      is_square = check_ops.assert_equal(
+          shape[-2], shape[-1],
+          message="Input must be a (batch of) square matrix.")
+      # Assuming lower-triangular means we only need check diag>0.
+      is_positive_definite = check_ops.assert_positive(
+          diag, message="Input must be positive definite.")
+      x = control_flow_ops.with_dependencies(
+          [is_matrix, is_square, is_positive_definite], x)
+
+    # Create a vector equal to: [p, p-1, ..., 2, 1].
+    if x.get_shape().ndims is None or x.get_shape()[-1].value is None:
+      p_int = array_ops.shape(x)[-1]
+      p_float = math_ops.cast(p_int, dtype=x.dtype)
+    else:
+      p_int = x.get_shape()[-1].value
+      p_float = np.array(p_int, dtype=x.dtype.as_numpy_dtype)
+    exponents = math_ops.linspace(p_float, 1., p_int)
+
+    sum_weighted_log_diag = array_ops.squeeze(
+        math_ops.matmul(math_ops.log(diag),
+                        exponents[..., array_ops.newaxis]),
+        squeeze_dims=-1)
+    fldj = p_float * np.log(2.) + sum_weighted_log_diag
+
+    return fldj
+
+  def _make_columnar(self, x):
+    """Ensures non-scalar input has at least one column.
+
+    Example:
+      If `x = [1, 2, 3]` then the output is `[[1], [2], [3]]`.
+
+      If `x = [[1, 2, 3], [4, 5, 6]]` then the output is unchanged.
+
+      If `x = 1` then the output is unchanged.
+
+    Args:
+      x: `Tensor`.
+
+    Returns:
+      columnar_x: `Tensor` with at least two dimensions.
+    """
+    if x.get_shape().ndims is not None:
+      if x.get_shape().ndims == 1:
+        x = x[array_ops.newaxis, :]
+      return x
+    shape = array_ops.shape(x)
+    maybe_expanded_shape = array_ops.concat([
+        shape[:-1],
+        distribution_util.pick_vector(
+            math_ops.equal(array_ops.rank(x), 1),
+            [1], np.array([], dtype=np.int32)),
+        shape[-1:],
+    ], 0)
+    return array_ops.reshape(x, maybe_expanded_shape)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product_impl.py
deleted file mode 100644
index cbd60f92a60612c6cf791b2c7708a3310c6e2b6b..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product_impl.py
+++ /dev/null
@@ -1,236 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""CholeskyOuterProduct bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import linalg_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops.distributions import bijector
-from tensorflow.python.ops.distributions import util as distribution_util
-
-
-__all__ = [
-    "CholeskyOuterProduct",
-]
-
-
-class CholeskyOuterProduct(bijector.Bijector):
-  """Compute `g(X) = X @ X.T`; X is lower-triangular, positive-diagonal matrix.
-
-  `event_ndims` must be 0 or 2, i.e., scalar or matrix.
-
-  Note: the upper-triangular part of X is ignored (whether or not its zero).
-
-  The surjectivity of g as a map from  the set of n x n positive-diagonal
-  lower-triangular matrices to the set of SPD matrices follows immediately from
-  executing the Cholesky factorization algorithm on an SPD matrix A to produce a
-  positive-diagonal lower-triangular matrix L such that `A = L @ L.T`.
-
-  To prove the injectivity of g, suppose that L_1 and L_2 are lower-triangular
-  with positive diagonals and satisfy `A = L_1 @ L_1.T = L_2 @ L_2.T`. Then
-    `inv(L_1) @ A @ inv(L_1).T = [inv(L_1) @ L_2] @ [inv(L_1) @ L_2].T = I`.
-  Setting `L_3 := inv(L_1) @ L_2`, that L_3 is a positive-diagonal
-  lower-triangular matrix follows from `inv(L_1)` being positive-diagonal
-  lower-triangular (which follows from the diagonal of a triangular matrix being
-  its spectrum), and that the product of two positive-diagonal lower-triangular
-  matrices is another positive-diagonal lower-triangular matrix.
-
-  A simple inductive argument (proceding one column of L_3 at a time) shows
-  that, if `I = L_3 @ L_3.T`, with L_3 being lower-triangular with positive-
-  diagonal, then `L_3 = I`. Thus, `L_1 = L_2`, proving injectivity of g.
-
-  Examples:
-
-  ```python
-  bijector.CholeskyOuterProduct(event_ndims=2).forward(x=[[1., 0], [2, 1]])
-  # Result: [[1., 2], [2, 5]], i.e., x @ x.T
-
-  bijector.CholeskyOuterProduct(event_ndims=2).inverse(y=[[1., 2], [2, 5]])
-  # Result: [[1., 0], [2, 1]], i.e., cholesky(y).
-  ```
-
-  """
-
-  def __init__(self, event_ndims=2, validate_args=False,
-               name="cholesky_outer_product"):
-    """Instantiates the `CholeskyOuterProduct` bijector.
-
-    Args:
-      event_ndims: `constant` `int32` scalar `Tensor` indicating the number of
-        dimensions associated with a particular draw from the distribution. Must
-        be 0 or 2.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-      name: Python `str` name given to ops managed by this object.
-
-    Raises:
-      ValueError: if event_ndims is neither 0 or 2.
-    """
-    self._graph_parents = []
-    self._name = name
-    with self._name_scope("init", values=[event_ndims]):
-      event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims")
-      event_ndims = tensor_util.constant_value(event_ndims)
-    if event_ndims is None or event_ndims not in [0, 2]:
-      raise ValueError("`event_ndims` must be a TF constant which is 0 or 2")
-    self._static_event_ndims = event_ndims
-    super(CholeskyOuterProduct, self).__init__(
-        event_ndims=event_ndims,
-        validate_args=validate_args,
-        name=name)
-
-  def _forward(self, x):
-    if self._static_event_ndims == 0:
-      return math_ops.square(x)
-    if self.validate_args:
-      is_matrix = check_ops.assert_rank_at_least(x, 2)
-      shape = array_ops.shape(x)
-      is_square = check_ops.assert_equal(shape[-2], shape[-1])
-      x = control_flow_ops.with_dependencies([is_matrix, is_square], x)
-    # For safety, explicitly zero-out the upper triangular part.
-    x = array_ops.matrix_band_part(x, -1, 0)
-    return math_ops.matmul(x, x, adjoint_b=True)
-
-  def _inverse(self, y):
-    return (math_ops.sqrt(y) if self._static_event_ndims == 0
-            else linalg_ops.cholesky(y))
-
-  def _inverse_log_det_jacobian(self, y):
-    return -self._forward_log_det_jacobian(x=self._inverse(y))
-
-  def _forward_log_det_jacobian(self, x):
-    # Let Y be a symmetric, positive definite matrix and write:
-    #   Y = X X.T
-    # where X is lower-triangular.
-    #
-    # Observe that,
-    #   dY[i,j]/dX[a,b]
-    #   = d/dX[a,b] { X[i,:] X[j,:] }
-    #   = sum_{d=1}^p { I[i=a] I[d=b] X[j,d] + I[j=a] I[d=b] X[i,d] }
-    #
-    # To compute the Jacobian dX/dY we must represent X,Y as vectors. Since Y is
-    # symmetric and X is lower-triangular, we need vectors of dimension:
-    #   d = p (p + 1) / 2
-    # where X, Y are p x p matrices, p > 0. We use a row-major mapping, i.e.,
-    #   k = { i (i + 1) / 2 + j   i>=j
-    #       { undef               i<j
-    # and assume zero-based indexes. When k is undef, the element is dropped.
-    # Example:
-    #           j      k
-    #        0 1 2 3  /
-    #    0 [ 0 . . . ]
-    # i  1 [ 1 2 . . ]
-    #    2 [ 3 4 5 . ]
-    #    3 [ 6 7 8 9 ]
-    # Write vec[.] to indicate transforming a matrix to vector via k(i,j). (With
-    # slight abuse: k(i,j)=undef means the element is dropped.)
-    #
-    # We now show d vec[Y] / d vec[X] is lower triangular. Assuming both are
-    # defined, observe that k(i,j) < k(a,b) iff (1) i<a or (2) i=a and j<b.
-    # In both cases dvec[Y]/dvec[X]@[k(i,j),k(a,b)] = 0 since:
-    # (1) j<=i<a thus i,j!=a.
-    # (2) i=a>j  thus i,j!=a.
-    #
-    # Since the Jacobian is lower-triangular, we need only compute the product
-    # of diagonal elements:
-    #   d vec[Y] / d vec[X] @[k(i,j), k(i,j)]
-    #   = X[j,j] + I[i=j] X[i,j]
-    #   = 2 X[j,j].
-    # Since there is a 2 X[j,j] term for every lower-triangular element of X we
-    # conclude:
-    #   |Jac(d vec[Y]/d vec[X])| = 2^p prod_{j=0}^{p-1} X[j,j]^{p-j}.
-    if self._static_event_ndims == 0:
-      if self.validate_args:
-        is_positive = check_ops.assert_positive(
-            x, message="All elements must be positive.")
-        x = control_flow_ops.with_dependencies([is_positive], x)
-      return np.log(2.) + math_ops.log(x)
-
-    diag = array_ops.matrix_diag_part(x)
-
-    # We now ensure diag is columnar. Eg, if `diag = [1, 2, 3]` then the output
-    # is `[[1], [2], [3]]` and if `diag = [[1, 2, 3], [4, 5, 6]]` then the
-    # output is unchanged.
-    diag = self._make_columnar(diag)
-
-    if self.validate_args:
-      is_matrix = check_ops.assert_rank_at_least(
-          x, 2, message="Input must be a (batch of) matrix.")
-      shape = array_ops.shape(x)
-      is_square = check_ops.assert_equal(
-          shape[-2], shape[-1],
-          message="Input must be a (batch of) square matrix.")
-      # Assuming lower-triangular means we only need check diag>0.
-      is_positive_definite = check_ops.assert_positive(
-          diag, message="Input must be positive definite.")
-      x = control_flow_ops.with_dependencies(
-          [is_matrix, is_square, is_positive_definite], x)
-
-    # Create a vector equal to: [p, p-1, ..., 2, 1].
-    if x.get_shape().ndims is None or x.get_shape()[-1].value is None:
-      p_int = array_ops.shape(x)[-1]
-      p_float = math_ops.cast(p_int, dtype=x.dtype)
-    else:
-      p_int = x.get_shape()[-1].value
-      p_float = np.array(p_int, dtype=x.dtype.as_numpy_dtype)
-    exponents = math_ops.linspace(p_float, 1., p_int)
-
-    sum_weighted_log_diag = array_ops.squeeze(
-        math_ops.matmul(math_ops.log(diag),
-                        exponents[..., array_ops.newaxis]),
-        squeeze_dims=-1)
-    fldj = p_float * np.log(2.) + sum_weighted_log_diag
-
-    return fldj
-
-  def _make_columnar(self, x):
-    """Ensures non-scalar input has at least one column.
-
-    Example:
-      If `x = [1, 2, 3]` then the output is `[[1], [2], [3]]`.
-
-      If `x = [[1, 2, 3], [4, 5, 6]]` then the output is unchanged.
-
-      If `x = 1` then the output is unchanged.
-
-    Args:
-      x: `Tensor`.
-
-    Returns:
-      columnar_x: `Tensor` with at least two dimensions.
-    """
-    if x.get_shape().ndims is not None:
-      if x.get_shape().ndims == 1:
-        x = x[array_ops.newaxis, :]
-      return x
-    shape = array_ops.shape(x)
-    maybe_expanded_shape = array_ops.concat([
-        shape[:-1],
-        distribution_util.pick_vector(
-            math_ops.equal(array_ops.rank(x), 1),
-            [1], np.array([], dtype=np.int32)),
-        shape[-1:],
-    ], 0)
-    return array_ops.reshape(x, maybe_expanded_shape)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/conditional_bijector.py b/tensorflow/contrib/distributions/python/ops/bijectors/conditional_bijector.py
index d254b635d28099a09a2054536f04ffee3a355b2f..ccb1f029277bc07011df7be047a075274f2b3a27 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/conditional_bijector.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/conditional_bijector.py
@@ -18,12 +18,38 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.conditional_bijector_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.python.ops.distributions import bijector
+from tensorflow.python.ops.distributions import util as distribution_util
 
-_allowed_symbols = ["ConditionalBijector"]
 
-remove_undocumented(__name__, _allowed_symbols)
+__all__ = ["ConditionalBijector"]
+
+
+class ConditionalBijector(bijector.Bijector):
+  """Conditional Bijector is a Bijector that allows intrinsic conditioning."""
+
+  @distribution_util.AppendDocstring(kwargs_dict={
+      "**condition_kwargs":
+      "Named arguments forwarded to subclass implementation."})
+  def forward(self, x, name="forward", **condition_kwargs):
+    return self._call_forward(x, name, **condition_kwargs)
+
+  @distribution_util.AppendDocstring(kwargs_dict={
+      "**condition_kwargs":
+      "Named arguments forwarded to subclass implementation."})
+  def inverse(self, y, name="inverse", **condition_kwargs):
+    return self._call_inverse(y, name, **condition_kwargs)
+
+  @distribution_util.AppendDocstring(kwargs_dict={
+      "**condition_kwargs":
+      "Named arguments forwarded to subclass implementation."})
+  def inverse_log_det_jacobian(
+      self, y, name="inverse_log_det_jacobian", **condition_kwargs):
+    return self._call_inverse_log_det_jacobian(y, name, **condition_kwargs)
+
+  @distribution_util.AppendDocstring(kwargs_dict={
+      "**condition_kwargs":
+      "Named arguments forwarded to subclass implementation."})
+  def forward_log_det_jacobian(
+      self, x, name="forward_log_det_jacobian", **condition_kwargs):
+    return self._call_forward_log_det_jacobian(x, name, **condition_kwargs)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/conditional_bijector_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/conditional_bijector_impl.py
deleted file mode 100644
index ccb1f029277bc07011df7be047a075274f2b3a27..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/conditional_bijector_impl.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""ConditionalBijector base."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.ops.distributions import bijector
-from tensorflow.python.ops.distributions import util as distribution_util
-
-
-__all__ = ["ConditionalBijector"]
-
-
-class ConditionalBijector(bijector.Bijector):
-  """Conditional Bijector is a Bijector that allows intrinsic conditioning."""
-
-  @distribution_util.AppendDocstring(kwargs_dict={
-      "**condition_kwargs":
-      "Named arguments forwarded to subclass implementation."})
-  def forward(self, x, name="forward", **condition_kwargs):
-    return self._call_forward(x, name, **condition_kwargs)
-
-  @distribution_util.AppendDocstring(kwargs_dict={
-      "**condition_kwargs":
-      "Named arguments forwarded to subclass implementation."})
-  def inverse(self, y, name="inverse", **condition_kwargs):
-    return self._call_inverse(y, name, **condition_kwargs)
-
-  @distribution_util.AppendDocstring(kwargs_dict={
-      "**condition_kwargs":
-      "Named arguments forwarded to subclass implementation."})
-  def inverse_log_det_jacobian(
-      self, y, name="inverse_log_det_jacobian", **condition_kwargs):
-    return self._call_inverse_log_det_jacobian(y, name, **condition_kwargs)
-
-  @distribution_util.AppendDocstring(kwargs_dict={
-      "**condition_kwargs":
-      "Named arguments forwarded to subclass implementation."})
-  def forward_log_det_jacobian(
-      self, x, name="forward_log_det_jacobian", **condition_kwargs):
-    return self._call_forward_log_det_jacobian(x, name, **condition_kwargs)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/exp.py b/tensorflow/contrib/distributions/python/ops/bijectors/exp.py
index 399d713098eb7223601beb9518dc51dd6160ad64..b1ff840d62a73c941a4d67dec73b5c9f4d5353f9 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/exp.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/exp.py
@@ -18,12 +18,49 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.exp_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.contrib.distributions.python.ops.bijectors import power_transform
 
-_allowed_symbols = ["Exp"]
 
-remove_undocumented(__name__, _allowed_symbols)
+__all__ = [
+    "Exp",
+]
+
+
+class Exp(power_transform.PowerTransform):
+  """Compute `Y = g(X) = exp(X)`.
+
+    Example Use:
+
+    ```python
+    # Create the Y=g(X)=exp(X) transform which works only on Tensors with 1
+    # batch ndim and 2 event ndims (i.e., vector of matrices).
+    exp = Exp(event_ndims=2)
+    x = [[[1., 2],
+           [3, 4]],
+          [[5, 6],
+           [7, 8]]]
+    exp(x) == exp.forward(x)
+    log(x) == exp.inverse(x)
+    ```
+
+    Note: the exp(.) is applied element-wise but the Jacobian is a reduction
+    over the event space.
+  """
+
+  def __init__(self,
+               event_ndims=0,
+               validate_args=False,
+               name="exp"):
+    """Instantiates the `Exp` bijector.
+
+    Args:
+      event_ndims: Scalar `int32` `Tensor` indicating the number of dimensions
+        associated with a particular draw from the distribution.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+      name: Python `str` name given to ops managed by this object.
+    """
+    super(Exp, self).__init__(
+        event_ndims=event_ndims,
+        validate_args=validate_args,
+        name=name)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/exp_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/exp_impl.py
deleted file mode 100644
index b1ff840d62a73c941a4d67dec73b5c9f4d5353f9..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/exp_impl.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Exp bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib.distributions.python.ops.bijectors import power_transform
-
-
-__all__ = [
-    "Exp",
-]
-
-
-class Exp(power_transform.PowerTransform):
-  """Compute `Y = g(X) = exp(X)`.
-
-    Example Use:
-
-    ```python
-    # Create the Y=g(X)=exp(X) transform which works only on Tensors with 1
-    # batch ndim and 2 event ndims (i.e., vector of matrices).
-    exp = Exp(event_ndims=2)
-    x = [[[1., 2],
-           [3, 4]],
-          [[5, 6],
-           [7, 8]]]
-    exp(x) == exp.forward(x)
-    log(x) == exp.inverse(x)
-    ```
-
-    Note: the exp(.) is applied element-wise but the Jacobian is a reduction
-    over the event space.
-  """
-
-  def __init__(self,
-               event_ndims=0,
-               validate_args=False,
-               name="exp"):
-    """Instantiates the `Exp` bijector.
-
-    Args:
-      event_ndims: Scalar `int32` `Tensor` indicating the number of dimensions
-        associated with a particular draw from the distribution.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-      name: Python `str` name given to ops managed by this object.
-    """
-    super(Exp, self).__init__(
-        event_ndims=event_ndims,
-        validate_args=validate_args,
-        name=name)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/gumbel.py b/tensorflow/contrib/distributions/python/ops/bijectors/gumbel.py
index cf37aa51115ed98ab263bc03bcb297a03432a7ae..67f39785563255be0fe154aca3cbcf01c6a01e73 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/gumbel.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/gumbel.py
@@ -18,12 +18,107 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.gumbel_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops.distributions import bijector
 
-_allowed_symbols = ["Gumbel"]
+__all__ = [
+    "Gumbel",
+]
 
-remove_undocumented(__name__, _allowed_symbols)
+
+class Gumbel(bijector.Bijector):
+  """Compute `Y = g(X) = exp(-exp(-(X - loc) / scale))`.
+
+  This bijector maps inputs from `[-inf, inf]` to [0, 1]`. The inverse of the
+  bijector applied to a uniform random variable `X ~ U(0, 1) gives back a
+  random variable with the
+  [Gumbel distribution](https://en.wikipedia.org/wiki/Gumbel_distribution):
+
+  ```none
+  Y ~ Gumbel(loc, scale)
+  pdf(y; loc, scale) = exp(
+    -( (y - loc) / scale + exp(- (y - loc) / scale) ) ) / scale
+  ```
+  """
+
+  def __init__(self,
+               loc=0.,
+               scale=1.,
+               event_ndims=0,
+               validate_args=False,
+               name="gumbel"):
+    """Instantiates the `Gumbel` bijector.
+
+    Args:
+      loc: Float-like `Tensor` that is the same dtype and is
+        broadcastable with `scale`.
+        This is `loc` in `Y = g(X) = exp(-exp(-(X - loc) / scale))`.
+      scale: Positive Float-like `Tensor` that is the same dtype and is
+        broadcastable with `loc`.
+        This is `scale` in `Y = g(X) = exp(-exp(-(X - loc) / scale))`.
+      event_ndims: Python scalar indicating the number of dimensions associated
+        with a particular draw from the distribution.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+      name: Python `str` name given to ops managed by this object.
+    """
+    self._graph_parents = []
+    self._name = name
+    self._validate_args = validate_args
+    with self._name_scope("init", values=[loc, scale]):
+      self._loc = ops.convert_to_tensor(loc, name="loc")
+      self._scale = ops.convert_to_tensor(scale, name="scale")
+      check_ops.assert_same_float_dtype([self._loc, self._scale])
+      if validate_args:
+        self._scale = control_flow_ops.with_dependencies([
+            check_ops.assert_positive(
+                self._scale, message="Argument scale was not positive")
+        ], self._scale)
+
+    super(Gumbel, self).__init__(
+        event_ndims=event_ndims, validate_args=validate_args, name=name)
+
+  @property
+  def loc(self):
+    """The `loc` in `Y = g(X) = exp(-exp(-(X - loc) / scale))`."""
+    return self._loc
+
+  @property
+  def scale(self):
+    """This is `scale` in `Y = g(X) = exp(-exp(-(X - loc) / scale))`."""
+    return self._scale
+
+  def _forward(self, x):
+    z = (x - self.loc) / self.scale
+    return math_ops.exp(-math_ops.exp(-z))
+
+  def _inverse(self, y):
+    y = self._maybe_assert_valid_y(y)
+    return self.loc - self.scale * math_ops.log(-math_ops.log(y))
+
+  def _inverse_log_det_jacobian(self, y):
+    y = self._maybe_assert_valid_y(y)
+    event_dims = self._event_dims_tensor(y)
+    return math_ops.reduce_sum(
+        math_ops.log(self.scale / (-math_ops.log(y) * y)), axis=event_dims)
+
+  def _forward_log_det_jacobian(self, x):
+    event_dims = self._event_dims_tensor(x)
+    z = (x - self.loc) / self.scale
+    return math_ops.reduce_sum(
+        -z - math_ops.exp(-z) - math_ops.log(self.scale), axis=event_dims)
+
+  def _maybe_assert_valid_y(self, y):
+    if not self.validate_args:
+      return y
+    is_positive = check_ops.assert_non_negative(
+        y, message="Inverse transformation input must be greater than 0.")
+    less_than_one = check_ops.assert_less_equal(
+        y,
+        constant_op.constant(1., y.dtype),
+        message="Inverse transformation input must be less than or equal to 1.")
+    return control_flow_ops.with_dependencies([is_positive, less_than_one], y)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/gumbel_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/gumbel_impl.py
deleted file mode 100644
index 67f39785563255be0fe154aca3cbcf01c6a01e73..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/gumbel_impl.py
+++ /dev/null
@@ -1,124 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Gumbel bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops.distributions import bijector
-
-__all__ = [
-    "Gumbel",
-]
-
-
-class Gumbel(bijector.Bijector):
-  """Compute `Y = g(X) = exp(-exp(-(X - loc) / scale))`.
-
-  This bijector maps inputs from `[-inf, inf]` to [0, 1]`. The inverse of the
-  bijector applied to a uniform random variable `X ~ U(0, 1) gives back a
-  random variable with the
-  [Gumbel distribution](https://en.wikipedia.org/wiki/Gumbel_distribution):
-
-  ```none
-  Y ~ Gumbel(loc, scale)
-  pdf(y; loc, scale) = exp(
-    -( (y - loc) / scale + exp(- (y - loc) / scale) ) ) / scale
-  ```
-  """
-
-  def __init__(self,
-               loc=0.,
-               scale=1.,
-               event_ndims=0,
-               validate_args=False,
-               name="gumbel"):
-    """Instantiates the `Gumbel` bijector.
-
-    Args:
-      loc: Float-like `Tensor` that is the same dtype and is
-        broadcastable with `scale`.
-        This is `loc` in `Y = g(X) = exp(-exp(-(X - loc) / scale))`.
-      scale: Positive Float-like `Tensor` that is the same dtype and is
-        broadcastable with `loc`.
-        This is `scale` in `Y = g(X) = exp(-exp(-(X - loc) / scale))`.
-      event_ndims: Python scalar indicating the number of dimensions associated
-        with a particular draw from the distribution.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-      name: Python `str` name given to ops managed by this object.
-    """
-    self._graph_parents = []
-    self._name = name
-    self._validate_args = validate_args
-    with self._name_scope("init", values=[loc, scale]):
-      self._loc = ops.convert_to_tensor(loc, name="loc")
-      self._scale = ops.convert_to_tensor(scale, name="scale")
-      check_ops.assert_same_float_dtype([self._loc, self._scale])
-      if validate_args:
-        self._scale = control_flow_ops.with_dependencies([
-            check_ops.assert_positive(
-                self._scale, message="Argument scale was not positive")
-        ], self._scale)
-
-    super(Gumbel, self).__init__(
-        event_ndims=event_ndims, validate_args=validate_args, name=name)
-
-  @property
-  def loc(self):
-    """The `loc` in `Y = g(X) = exp(-exp(-(X - loc) / scale))`."""
-    return self._loc
-
-  @property
-  def scale(self):
-    """This is `scale` in `Y = g(X) = exp(-exp(-(X - loc) / scale))`."""
-    return self._scale
-
-  def _forward(self, x):
-    z = (x - self.loc) / self.scale
-    return math_ops.exp(-math_ops.exp(-z))
-
-  def _inverse(self, y):
-    y = self._maybe_assert_valid_y(y)
-    return self.loc - self.scale * math_ops.log(-math_ops.log(y))
-
-  def _inverse_log_det_jacobian(self, y):
-    y = self._maybe_assert_valid_y(y)
-    event_dims = self._event_dims_tensor(y)
-    return math_ops.reduce_sum(
-        math_ops.log(self.scale / (-math_ops.log(y) * y)), axis=event_dims)
-
-  def _forward_log_det_jacobian(self, x):
-    event_dims = self._event_dims_tensor(x)
-    z = (x - self.loc) / self.scale
-    return math_ops.reduce_sum(
-        -z - math_ops.exp(-z) - math_ops.log(self.scale), axis=event_dims)
-
-  def _maybe_assert_valid_y(self, y):
-    if not self.validate_args:
-      return y
-    is_positive = check_ops.assert_non_negative(
-        y, message="Inverse transformation input must be greater than 0.")
-    less_than_one = check_ops.assert_less_equal(
-        y,
-        constant_op.constant(1., y.dtype),
-        message="Inverse transformation input must be less than or equal to 1.")
-    return control_flow_ops.with_dependencies([is_positive, less_than_one], y)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/inline.py b/tensorflow/contrib/distributions/python/ops/bijectors/inline.py
index db10c3fc3a9135b4c408ada74622ba9b360f9ec1..fab1b22fbf92e7b92a5ec86ec62d66bec71a8c94 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/inline.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/inline.py
@@ -18,12 +18,124 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.inline_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.python.ops.distributions import bijector
 
-_allowed_symbols = ["Inline"]
 
-remove_undocumented(__name__, _allowed_symbols)
+__all__ = [
+    "Inline",
+]
+
+
+class Inline(bijector.Bijector):
+  """Bijector constructed from custom callables.
+
+  Example Use:
+
+  ```python
+  exp = Inline(
+    forward_fn=tf.exp,
+    inverse_fn=tf.log,
+    inverse_log_det_jacobian_fn=(
+      lambda y: -tf.reduce_sum(tf.log(y), axis=-1)),
+    name="exp")
+  ```
+
+  The above example is equivalent to the `Bijector` `Exp(event_ndims=1)`.
+  """
+
+  def __init__(self,
+               forward_fn=None,
+               inverse_fn=None,
+               inverse_log_det_jacobian_fn=None,
+               forward_log_det_jacobian_fn=None,
+               forward_event_shape_fn=None,
+               forward_event_shape_tensor_fn=None,
+               inverse_event_shape_fn=None,
+               inverse_event_shape_tensor_fn=None,
+               is_constant_jacobian=False,
+               validate_args=False,
+               name="inline"):
+    """Creates a `Bijector` from callables.
+
+    Args:
+      forward_fn: Python callable implementing the forward transformation.
+      inverse_fn: Python callable implementing the inverse transformation.
+      inverse_log_det_jacobian_fn: Python callable implementing the
+        log o det o jacobian of the inverse transformation.
+      forward_log_det_jacobian_fn: Python callable implementing the
+        log o det o jacobian of the forward transformation.
+      forward_event_shape_fn: Python callable implementing non-identical
+        static event shape changes. Default: shape is assumed unchanged.
+      forward_event_shape_tensor_fn: Python callable implementing non-identical
+        event shape changes. Default: shape is assumed unchanged.
+      inverse_event_shape_fn: Python callable implementing non-identical
+        static event shape changes. Default: shape is assumed unchanged.
+      inverse_event_shape_tensor_fn: Python callable implementing non-identical
+        event shape changes. Default: shape is assumed unchanged.
+      is_constant_jacobian: Python `bool` indicating that the Jacobian is
+        constant for all input arguments.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+      name: Python `str`, name given to ops managed by this object.
+    """
+    super(Inline, self).__init__(
+        event_ndims=0,
+        is_constant_jacobian=is_constant_jacobian,
+        validate_args=validate_args,
+        name=name)
+    self._forward_fn = forward_fn
+    self._inverse_fn = inverse_fn
+    self._inverse_log_det_jacobian_fn = inverse_log_det_jacobian_fn
+    self._forward_log_det_jacobian_fn = forward_log_det_jacobian_fn
+    self._forward_event_shape_fn = forward_event_shape_fn
+    self._forward_event_shape_tensor_fn = forward_event_shape_tensor_fn
+    self._inverse_event_shape_fn = inverse_event_shape_fn
+    self._inverse_event_shape_tensor_fn = inverse_event_shape_tensor_fn
+
+  def _forward_event_shape(self, input_shape):
+    if self._forward_event_shape_fn is None:
+      # By default assume shape doesn't change.
+      return input_shape
+    return self._forward_event_shape_fn(input_shape)
+
+  def _forward_event_shape_tensor(self, input_shape):
+    if self._forward_event_shape_tensor_fn is None:
+      # By default assume shape doesn't change.
+      return input_shape
+    return self._forward_event_shape_tensor_fn(input_shape)
+
+  def _inverse_event_shape(self, output_shape):
+    if self._inverse_event_shape_fn is None:
+      # By default assume shape doesn't change.
+      return output_shape
+    return self._inverse_event_shape_fn(output_shape)
+
+  def _inverse_event_shape_tensor(self, output_shape):
+    if self._inverse_event_shape_tensor_fn is None:
+      # By default assume shape doesn't change.
+      return output_shape
+    return self._inverse_event_shape_tensor_fn(output_shape)
+
+  def _forward(self, x, **kwargs):
+    if not callable(self._forward_fn):
+      raise NotImplementedError(
+          "forward_fn is not a callable function.")
+    return self._forward_fn(x, **kwargs)
+
+  def _inverse(self, y, **kwargs):
+    if not callable(self._inverse_fn):
+      raise NotImplementedError(
+          "inverse_fn is not a callable function.")
+    return self._inverse_fn(y, **kwargs)
+
+  def _inverse_log_det_jacobian(self, y, **kwargs):
+    if not callable(self._inverse_log_det_jacobian_fn):
+      raise NotImplementedError(
+          "inverse_log_det_jacobian_fn is not a callable function.")
+    return self._inverse_log_det_jacobian_fn(y, **kwargs)
+
+  def _forward_log_det_jacobian(self, y, **kwargs):
+    if not callable(self._forward_log_det_jacobian_fn):
+      raise NotImplementedError(
+          "forward_log_det_jacobian_fn is not a callable function.")
+    return self._forward_log_det_jacobian_fn(y, **kwargs)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/inline_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/inline_impl.py
deleted file mode 100644
index fab1b22fbf92e7b92a5ec86ec62d66bec71a8c94..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/inline_impl.py
+++ /dev/null
@@ -1,141 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Inline bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.ops.distributions import bijector
-
-
-__all__ = [
-    "Inline",
-]
-
-
-class Inline(bijector.Bijector):
-  """Bijector constructed from custom callables.
-
-  Example Use:
-
-  ```python
-  exp = Inline(
-    forward_fn=tf.exp,
-    inverse_fn=tf.log,
-    inverse_log_det_jacobian_fn=(
-      lambda y: -tf.reduce_sum(tf.log(y), axis=-1)),
-    name="exp")
-  ```
-
-  The above example is equivalent to the `Bijector` `Exp(event_ndims=1)`.
-  """
-
-  def __init__(self,
-               forward_fn=None,
-               inverse_fn=None,
-               inverse_log_det_jacobian_fn=None,
-               forward_log_det_jacobian_fn=None,
-               forward_event_shape_fn=None,
-               forward_event_shape_tensor_fn=None,
-               inverse_event_shape_fn=None,
-               inverse_event_shape_tensor_fn=None,
-               is_constant_jacobian=False,
-               validate_args=False,
-               name="inline"):
-    """Creates a `Bijector` from callables.
-
-    Args:
-      forward_fn: Python callable implementing the forward transformation.
-      inverse_fn: Python callable implementing the inverse transformation.
-      inverse_log_det_jacobian_fn: Python callable implementing the
-        log o det o jacobian of the inverse transformation.
-      forward_log_det_jacobian_fn: Python callable implementing the
-        log o det o jacobian of the forward transformation.
-      forward_event_shape_fn: Python callable implementing non-identical
-        static event shape changes. Default: shape is assumed unchanged.
-      forward_event_shape_tensor_fn: Python callable implementing non-identical
-        event shape changes. Default: shape is assumed unchanged.
-      inverse_event_shape_fn: Python callable implementing non-identical
-        static event shape changes. Default: shape is assumed unchanged.
-      inverse_event_shape_tensor_fn: Python callable implementing non-identical
-        event shape changes. Default: shape is assumed unchanged.
-      is_constant_jacobian: Python `bool` indicating that the Jacobian is
-        constant for all input arguments.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-      name: Python `str`, name given to ops managed by this object.
-    """
-    super(Inline, self).__init__(
-        event_ndims=0,
-        is_constant_jacobian=is_constant_jacobian,
-        validate_args=validate_args,
-        name=name)
-    self._forward_fn = forward_fn
-    self._inverse_fn = inverse_fn
-    self._inverse_log_det_jacobian_fn = inverse_log_det_jacobian_fn
-    self._forward_log_det_jacobian_fn = forward_log_det_jacobian_fn
-    self._forward_event_shape_fn = forward_event_shape_fn
-    self._forward_event_shape_tensor_fn = forward_event_shape_tensor_fn
-    self._inverse_event_shape_fn = inverse_event_shape_fn
-    self._inverse_event_shape_tensor_fn = inverse_event_shape_tensor_fn
-
-  def _forward_event_shape(self, input_shape):
-    if self._forward_event_shape_fn is None:
-      # By default assume shape doesn't change.
-      return input_shape
-    return self._forward_event_shape_fn(input_shape)
-
-  def _forward_event_shape_tensor(self, input_shape):
-    if self._forward_event_shape_tensor_fn is None:
-      # By default assume shape doesn't change.
-      return input_shape
-    return self._forward_event_shape_tensor_fn(input_shape)
-
-  def _inverse_event_shape(self, output_shape):
-    if self._inverse_event_shape_fn is None:
-      # By default assume shape doesn't change.
-      return output_shape
-    return self._inverse_event_shape_fn(output_shape)
-
-  def _inverse_event_shape_tensor(self, output_shape):
-    if self._inverse_event_shape_tensor_fn is None:
-      # By default assume shape doesn't change.
-      return output_shape
-    return self._inverse_event_shape_tensor_fn(output_shape)
-
-  def _forward(self, x, **kwargs):
-    if not callable(self._forward_fn):
-      raise NotImplementedError(
-          "forward_fn is not a callable function.")
-    return self._forward_fn(x, **kwargs)
-
-  def _inverse(self, y, **kwargs):
-    if not callable(self._inverse_fn):
-      raise NotImplementedError(
-          "inverse_fn is not a callable function.")
-    return self._inverse_fn(y, **kwargs)
-
-  def _inverse_log_det_jacobian(self, y, **kwargs):
-    if not callable(self._inverse_log_det_jacobian_fn):
-      raise NotImplementedError(
-          "inverse_log_det_jacobian_fn is not a callable function.")
-    return self._inverse_log_det_jacobian_fn(y, **kwargs)
-
-  def _forward_log_det_jacobian(self, y, **kwargs):
-    if not callable(self._forward_log_det_jacobian_fn):
-      raise NotImplementedError(
-          "forward_log_det_jacobian_fn is not a callable function.")
-    return self._forward_log_det_jacobian_fn(y, **kwargs)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/invert.py b/tensorflow/contrib/distributions/python/ops/bijectors/invert.py
index c134e10109ce5065eb58de1d847e3c487258954c..2c603fe61f36dd27f4984fe6c13c11f2fb534321 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/invert.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/invert.py
@@ -18,12 +18,85 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.invert_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.python.ops.distributions import bijector as bijector_lib
 
-_allowed_symbols = ["Invert"]
+__all__ = [
+    "Invert",
+]
 
-remove_undocumented(__name__, _allowed_symbols)
+
+class Invert(bijector_lib.Bijector):
+  """Bijector which inverts another Bijector.
+
+  Example Use: [ExpGammaDistribution (see Background & Context)](
+  https://reference.wolfram.com/language/ref/ExpGammaDistribution.html)
+  models `Y=log(X)` where `X ~ Gamma`.
+
+  ```python
+  exp_gamma_distribution = TransformedDistribution(
+    distribution=Gamma(concentration=1., rate=2.),
+    bijector=bijector.Invert(bijector.Exp())
+  ```
+
+  """
+
+  def __init__(self, bijector, validate_args=False, name=None):
+    """Creates a `Bijector` which swaps the meaning of `inverse` and `forward`.
+
+    Note: An inverted bijector's `inverse_log_det_jacobian` is often more
+    efficient if the base bijector implements `_forward_log_det_jacobian`. If
+    `_forward_log_det_jacobian` is not implemented then the following code is
+    used:
+
+    ```python
+    y = self.inverse(x, **kwargs)
+    return -self.inverse_log_det_jacobian(y, **kwargs)
+    ```
+
+    Args:
+      bijector: Bijector instance.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+      name: Python `str`, name given to ops managed by this object.
+    """
+
+    if not bijector._is_injective:  # pylint: disable=protected-access
+      raise NotImplementedError(
+          "Invert is not implemented for non-injective bijectors.")
+
+    self._bijector = bijector
+    super(Invert, self).__init__(
+        event_ndims=bijector.event_ndims,
+        graph_parents=bijector.graph_parents,
+        is_constant_jacobian=bijector.is_constant_jacobian,
+        validate_args=validate_args,
+        dtype=bijector.dtype,
+        name=name or "_".join(["invert", bijector.name]))
+
+  def _forward_event_shape(self, input_shape):
+    return self.bijector._inverse_event_shape(input_shape)  # pylint: disable=protected-access
+
+  def _forward_event_shape_tensor(self, input_shape):
+    return self.bijector._inverse_event_shape_tensor(input_shape)  # pylint: disable=protected-access
+
+  def _inverse_event_shape(self, output_shape):
+    return self.bijector._forward_event_shape(output_shape)  # pylint: disable=protected-access
+
+  def _inverse_event_shape_tensor(self, output_shape):
+    return self.bijector._forward_event_shape_tensor(output_shape)  # pylint: disable=protected-access
+
+  @property
+  def bijector(self):
+    return self._bijector
+
+  def _forward(self, x, **kwargs):
+    return self.bijector._inverse(x, **kwargs)  # pylint: disable=protected-access
+
+  def _inverse(self, y, **kwargs):
+    return self.bijector._forward(y, **kwargs)  # pylint: disable=protected-access
+
+  def _inverse_log_det_jacobian(self, y, **kwargs):
+    return self.bijector._forward_log_det_jacobian(y, **kwargs)  # pylint: disable=protected-access
+
+  def _forward_log_det_jacobian(self, x, **kwargs):
+    return self.bijector._inverse_log_det_jacobian(x, **kwargs)  # pylint: disable=protected-access
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/invert_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/invert_impl.py
deleted file mode 100644
index 2c603fe61f36dd27f4984fe6c13c11f2fb534321..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/invert_impl.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Invert bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.ops.distributions import bijector as bijector_lib
-
-__all__ = [
-    "Invert",
-]
-
-
-class Invert(bijector_lib.Bijector):
-  """Bijector which inverts another Bijector.
-
-  Example Use: [ExpGammaDistribution (see Background & Context)](
-  https://reference.wolfram.com/language/ref/ExpGammaDistribution.html)
-  models `Y=log(X)` where `X ~ Gamma`.
-
-  ```python
-  exp_gamma_distribution = TransformedDistribution(
-    distribution=Gamma(concentration=1., rate=2.),
-    bijector=bijector.Invert(bijector.Exp())
-  ```
-
-  """
-
-  def __init__(self, bijector, validate_args=False, name=None):
-    """Creates a `Bijector` which swaps the meaning of `inverse` and `forward`.
-
-    Note: An inverted bijector's `inverse_log_det_jacobian` is often more
-    efficient if the base bijector implements `_forward_log_det_jacobian`. If
-    `_forward_log_det_jacobian` is not implemented then the following code is
-    used:
-
-    ```python
-    y = self.inverse(x, **kwargs)
-    return -self.inverse_log_det_jacobian(y, **kwargs)
-    ```
-
-    Args:
-      bijector: Bijector instance.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-      name: Python `str`, name given to ops managed by this object.
-    """
-
-    if not bijector._is_injective:  # pylint: disable=protected-access
-      raise NotImplementedError(
-          "Invert is not implemented for non-injective bijectors.")
-
-    self._bijector = bijector
-    super(Invert, self).__init__(
-        event_ndims=bijector.event_ndims,
-        graph_parents=bijector.graph_parents,
-        is_constant_jacobian=bijector.is_constant_jacobian,
-        validate_args=validate_args,
-        dtype=bijector.dtype,
-        name=name or "_".join(["invert", bijector.name]))
-
-  def _forward_event_shape(self, input_shape):
-    return self.bijector._inverse_event_shape(input_shape)  # pylint: disable=protected-access
-
-  def _forward_event_shape_tensor(self, input_shape):
-    return self.bijector._inverse_event_shape_tensor(input_shape)  # pylint: disable=protected-access
-
-  def _inverse_event_shape(self, output_shape):
-    return self.bijector._forward_event_shape(output_shape)  # pylint: disable=protected-access
-
-  def _inverse_event_shape_tensor(self, output_shape):
-    return self.bijector._forward_event_shape_tensor(output_shape)  # pylint: disable=protected-access
-
-  @property
-  def bijector(self):
-    return self._bijector
-
-  def _forward(self, x, **kwargs):
-    return self.bijector._inverse(x, **kwargs)  # pylint: disable=protected-access
-
-  def _inverse(self, y, **kwargs):
-    return self.bijector._forward(y, **kwargs)  # pylint: disable=protected-access
-
-  def _inverse_log_det_jacobian(self, y, **kwargs):
-    return self.bijector._forward_log_det_jacobian(y, **kwargs)  # pylint: disable=protected-access
-
-  def _forward_log_det_jacobian(self, x, **kwargs):
-    return self.bijector._inverse_log_det_jacobian(x, **kwargs)  # pylint: disable=protected-access
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py b/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py
index 132dc570f94719b6c71fb269866c943774481b7e..dc8ae1eed19eda772219287d8661f534ac242d10 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py
@@ -18,16 +18,484 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.masked_autoregressive_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+import numpy as np
 
-_allowed_symbols = [
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.layers import core as layers
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import clip_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import template as template_ops
+from tensorflow.python.ops import variable_scope as variable_scope_lib
+from tensorflow.python.ops.distributions import bijector as bijector_lib
+
+
+__all__ = [
     "MaskedAutoregressiveFlow",
-    "masked_dense",
     "masked_autoregressive_default_template",
+    "masked_dense",
 ]
 
-remove_undocumented(__name__, _allowed_symbols)
+
+class MaskedAutoregressiveFlow(bijector_lib.Bijector):
+  """Affine MaskedAutoregressiveFlow bijector for vector-valued events.
+
+  The affine autoregressive flow [1] provides a relatively simple framework for
+  user-specified (deep) architectures to learn a distribution over vector-valued
+  events. Regarding terminology,
+
+    "Autoregressive models decompose the joint density as a product of
+    conditionals, and model each conditional in turn. Normalizing flows
+    transform a base density (e.g. a standard Gaussian) into the target density
+    by an invertible transformation with tractable Jacobian." [1]
+
+  In other words, the "autoregressive property" is equivalent to the
+  decomposition, `p(x) = prod{ p(x[i] | x[0:i]) : i=0, ..., d }`. The provided
+  `shift_and_log_scale_fn`, `masked_autoregressive_default_template`, achieves
+  this property by zeroing out weights in its `masked_dense` layers.
+
+  In the `tf.distributions` framework, a "normalizing flow" is implemented as a
+  `tf.distributions.bijectors.Bijector`. The `forward` "autoregression"
+  is implemented using a `tf.while_loop` and a deep neural network (DNN) with
+  masked weights such that the autoregressive property is automatically met in
+  the `inverse`.
+
+  A `TransformedDistribution` using `MaskedAutoregressiveFlow(...)` uses the
+  (expensive) forward-mode calculation to draw samples and the (cheap)
+  reverse-mode calculation to compute log-probabilities. Conversely, a
+  `TransformedDistribution` using `Invert(MaskedAutoregressiveFlow(...))` uses
+  the (expensive) forward-mode calculation to compute log-probabilities and the
+  (cheap) reverse-mode calculation to compute samples.  See "Example Use"
+  [below] for more details.
+
+  Given a `shift_and_log_scale_fn`, the forward and inverse transformations are
+  (a sequence of) affine transformations. A "valid" `shift_and_log_scale_fn`
+  must compute each `shift` (aka `loc` or "mu" [2]) and `log(scale)` (aka
+  "alpha" [2]) such that each are broadcastable with the arguments to `forward`
+  and `inverse`, i.e., such that the calculations in `forward`, `inverse`
+  [below] are possible.
+
+  For convenience, `masked_autoregressive_default_template` is offered as a
+  possible `shift_and_log_scale_fn` function. It implements the MADE
+  architecture [2]. MADE is a feed-forward network that computes a `shift` and
+  `log(scale)` using `masked_dense` layers in a deep neural network. Weights are
+  masked to ensure the autoregressive property. It is possible that this
+  architecture is suboptimal for your task. To build alternative networks,
+  either change the arguments to `masked_autoregressive_default_template`, use
+  the `masked_dense` function to roll-out your own, or use some other
+  architecture, e.g., using `tf.layers`.
+
+  Warning: no attempt is made to validate that the `shift_and_log_scale_fn`
+  enforces the "autoregressive property".
+
+  Assuming `shift_and_log_scale_fn` has valid shape and autoregressive
+  semantics, the forward transformation is,
+
+  ```python
+  def forward(x):
+    y = zeros_like(x)
+    event_size = x.shape[-1]
+    for _ in range(event_size):
+      shift, log_scale = shift_and_log_scale_fn(y)
+      y = x * math_ops.exp(log_scale) + shift
+    return y
+  ```
+
+  and the inverse transformation is,
+
+  ```python
+  def inverse(y):
+    shift, log_scale = shift_and_log_scale_fn(y)
+    return (y - shift) / math_ops.exp(log_scale)
+  ```
+
+  Notice that the `inverse` does not need a for-loop. This is because in the
+  forward pass each calculation of `shift` and `log_scale` is based on the `y`
+  calculated so far (not `x`). In the `inverse`, the `y` is fully known, thus is
+  equivalent to the scaling used in `forward` after `event_size` passes, i.e.,
+  the "last" `y` used to compute `shift`, `log_scale`. (Roughly speaking, this
+  also proves the transform is bijective.)
+
+  #### Example Use
+
+  ```python
+  tfd = tf.contrib.distributions
+  tfb = tfd.bijectors
+
+  dims = 5
+
+  # A common choice for a normalizing flow is to use a Gaussian for the base
+  # distribution. (However, any continuous distribution would work.) E.g.,
+  maf = tfd.TransformedDistribution(
+      distribution=tfd.Normal(loc=0., scale=1.),
+      bijector=tfb.MaskedAutoregressiveFlow(
+          shift_and_log_scale_fn=tfb.masked_autoregressive_default_template(
+              hidden_layers=[512, 512])),
+      event_shape=[dims])
+
+  x = maf.sample()  # Expensive; uses `tf.while_loop`, no Bijector caching.
+  maf.log_prob(x)   # Almost free; uses Bijector caching.
+  maf.log_prob(0.)  # Cheap; no `tf.while_loop` despite no Bijector caching.
+
+  # [1] also describes an "Inverse Autoregressive Flow", e.g.,
+  iaf = tfd.TransformedDistribution(
+      distribution=tfd.Normal(loc=0., scale=1.),
+      bijector=tfb.Invert(tfb.MaskedAutoregressiveFlow(
+          shift_and_log_scale_fn=tfb.masked_autoregressive_default_template(
+              hidden_layers=[512, 512]))),
+      event_shape=[dims])
+
+  x = iaf.sample()  # Cheap; no `tf.while_loop` despite no Bijector caching.
+  iaf.log_prob(x)   # Almost free; uses Bijector caching.
+  iaf.log_prob(0.)  # Expensive; uses `tf.while_loop`, no Bijector caching.
+
+  # In many (if not most) cases the default `shift_and_log_scale_fn` will be a
+  # poor choice. Here's an example of using a "shift only" version and with a
+  # different number/depth of hidden layers.
+  shift_only = True
+  maf_no_scale_hidden2 = tfd.TransformedDistribution(
+      distribution=tfd.Normal(loc=0., scale=1.),
+      bijector=tfb.MaskedAutoregressiveFlow(
+          tfb.masked_autoregressive_default_template(
+              hidden_layers=[32],
+              shift_only=shift_only),
+          is_constant_jacobian=shift_only),
+      event_shape=[dims])
+  ```
+
+  [1]: "Masked Autoregressive Flow for Density Estimation."
+       George Papamakarios, Theo Pavlakou, Iain Murray. Arxiv. 2017.
+       https://arxiv.org/abs/1705.07057
+
+  [2]: "MADE: Masked Autoencoder for Distribution Estimation."
+       Mathieu Germain, Karol Gregor, Iain Murray, Hugo Larochelle. ICML. 2015.
+       https://arxiv.org/abs/1502.03509
+
+  """
+
+  def __init__(self,
+               shift_and_log_scale_fn,
+               is_constant_jacobian=False,
+               validate_args=False,
+               unroll_loop=False,
+               name=None):
+    """Creates the MaskedAutoregressiveFlow bijector.
+
+    Args:
+      shift_and_log_scale_fn: Python `callable` which computes `shift` and
+        `log_scale` from both the forward domain (`x`) and the inverse domain
+        (`y`). Calculation must respect the "autoregressive property" (see class
+        docstring). Suggested default
+        `masked_autoregressive_default_template(hidden_layers=...)`.
+        Typically the function contains `tf.Variables` and is wrapped using
+        `tf.make_template`. Returning `None` for either (both) `shift`,
+        `log_scale` is equivalent to (but more efficient than) returning zero.
+      is_constant_jacobian: Python `bool`. Default: `False`. When `True` the
+        implementation assumes `log_scale` does not depend on the forward domain
+        (`x`) or inverse domain (`y`) values. (No validation is made;
+        `is_constant_jacobian=False` is always safe but possibly computationally
+        inefficient.)
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+      unroll_loop: Python `bool` indicating whether the `tf.while_loop` in
+        `_forward` should be replaced with a static for loop. Requires that
+        the final dimension of `x` be known at graph construction time. Defaults
+        to `False`.
+      name: Python `str`, name given to ops managed by this object.
+    """
+    name = name or "masked_autoregressive_flow"
+    self._shift_and_log_scale_fn = shift_and_log_scale_fn
+    self._unroll_loop = unroll_loop
+    super(MaskedAutoregressiveFlow, self).__init__(
+        is_constant_jacobian=is_constant_jacobian,
+        validate_args=validate_args,
+        name=name)
+
+  def _forward(self, x):
+    if self._unroll_loop:
+      event_size = x.shape.with_rank_at_least(1)[-1].value
+      if event_size is None:
+        raise ValueError(
+            "The final dimension of `x` must be known at graph construction "
+            "time if `unroll_loop=True`. `x.shape: %r`" % x.shape)
+      y = array_ops.zeros_like(x, name="y0")
+
+      for _ in range(event_size):
+        shift, log_scale = self._shift_and_log_scale_fn(y)
+        # next_y = scale * x + shift
+        next_y = x
+        if log_scale is not None:
+          next_y *= math_ops.exp(log_scale)
+        if shift is not None:
+          next_y += shift
+        y = next_y
+      return y
+
+    event_size = array_ops.shape(x)[-1]
+    y0 = array_ops.zeros_like(x, name="y0")
+    # call the template once to ensure creation
+    _ = self._shift_and_log_scale_fn(y0)
+    def _loop_body(index, y0):
+      """While-loop body for autoregression calculation."""
+      # Set caching device to avoid re-getting the tf.Variable for every while
+      # loop iteration.
+      with variable_scope_lib.variable_scope(
+          variable_scope_lib.get_variable_scope()) as vs:
+        if vs.caching_device is None:
+          vs.set_caching_device(lambda op: op.device)
+        shift, log_scale = self._shift_and_log_scale_fn(y0)
+      y = x
+      if log_scale is not None:
+        y *= math_ops.exp(log_scale)
+      if shift is not None:
+        y += shift
+      return index + 1, y
+    _, y = control_flow_ops.while_loop(
+        cond=lambda index, _: index < event_size,
+        body=_loop_body,
+        loop_vars=[0, y0])
+    return y
+
+  def _inverse(self, y):
+    shift, log_scale = self._shift_and_log_scale_fn(y)
+    x = y
+    if shift is not None:
+      x -= shift
+    if log_scale is not None:
+      x *= math_ops.exp(-log_scale)
+    return x
+
+  def _inverse_log_det_jacobian(self, y):
+    _, log_scale = self._shift_and_log_scale_fn(y)
+    if log_scale is None:
+      return constant_op.constant(0., dtype=y.dtype, name="ildj")
+    return -math_ops.reduce_sum(log_scale, axis=-1)
+
+
+MASK_INCLUSIVE = "inclusive"
+MASK_EXCLUSIVE = "exclusive"
+
+
+def _gen_slices(num_blocks, n_in, n_out, mask_type=MASK_EXCLUSIVE):
+  """Generate the slices for building an autoregressive mask."""
+  # TODO(b/67594795): Better support of dynamic shape.
+  slices = []
+  col = 0
+  d_in = n_in // num_blocks
+  d_out = n_out // num_blocks
+  row = d_out if mask_type == MASK_EXCLUSIVE else 0
+  for _ in range(num_blocks):
+    row_slice = slice(row, None)
+    col_slice = slice(col, col + d_in)
+    slices.append([row_slice, col_slice])
+    col += d_in
+    row += d_out
+  return slices
+
+
+def _gen_mask(num_blocks,
+              n_in,
+              n_out,
+              mask_type=MASK_EXCLUSIVE,
+              dtype=dtypes.float32):
+  """Generate the mask for building an autoregressive dense layer."""
+  # TODO(b/67594795): Better support of dynamic shape.
+  mask = np.zeros([n_out, n_in], dtype=dtype.as_numpy_dtype())
+  slices = _gen_slices(num_blocks, n_in, n_out, mask_type=mask_type)
+  for [row_slice, col_slice] in slices:
+    mask[row_slice, col_slice] = 1
+  return mask
+
+
+def masked_dense(inputs,
+                 units,
+                 num_blocks=None,
+                 exclusive=False,
+                 kernel_initializer=None,
+                 reuse=None,
+                 name=None,
+                 *args,
+                 **kwargs):
+  """A autoregressively masked dense layer. Analogous to `tf.layers.dense`.
+
+  See [1] for detailed explanation.
+
+  [1]: "MADE: Masked Autoencoder for Distribution Estimation."
+       Mathieu Germain, Karol Gregor, Iain Murray, Hugo Larochelle. ICML. 2015.
+       https://arxiv.org/abs/1502.03509
+
+  Arguments:
+    inputs: Tensor input.
+    units: Python `int` scalar representing the dimensionality of the output
+      space.
+    num_blocks: Python `int` scalar representing the number of blocks for the
+      MADE masks.
+    exclusive: Python `bool` scalar representing whether to zero the diagonal of
+      the mask, used for the first layer of a MADE.
+    kernel_initializer: Initializer function for the weight matrix.
+      If `None` (default), weights are initialized using the
+      `tf.glorot_random_initializer`.
+    reuse: Python `bool` scalar representing whether to reuse the weights of a
+      previous layer by the same name.
+    name: Python `str` used to describe ops managed by this function.
+    *args: `tf.layers.dense` arguments.
+    **kwargs: `tf.layers.dense` keyword arguments.
+
+  Returns:
+    Output tensor.
+
+  Raises:
+    NotImplementedError: if rightmost dimension of `inputs` is unknown prior to
+      graph execution.
+  """
+  # TODO(b/67594795): Better support of dynamic shape.
+  input_depth = inputs.shape.with_rank_at_least(1)[-1].value
+  if input_depth is None:
+    raise NotImplementedError(
+        "Rightmost dimension must be known prior to graph execution.")
+
+  mask = _gen_mask(num_blocks, input_depth, units,
+                   MASK_EXCLUSIVE if exclusive else MASK_INCLUSIVE).T
+
+  if kernel_initializer is None:
+    kernel_initializer = init_ops.glorot_normal_initializer()
+
+  def masked_initializer(shape, dtype=None, partition_info=None):
+    return mask * kernel_initializer(shape, dtype, partition_info)
+
+  with ops.name_scope(name, "masked_dense", [inputs, units, num_blocks]):
+    layer = layers.Dense(
+        units,
+        kernel_initializer=masked_initializer,
+        kernel_constraint=lambda x: mask * x,
+        name=name,
+        dtype=inputs.dtype.base_dtype,
+        _scope=name,
+        _reuse=reuse,
+        *args,
+        **kwargs)
+    return layer.apply(inputs)
+
+
+def masked_autoregressive_default_template(
+    hidden_layers,
+    shift_only=False,
+    activation=nn_ops.relu,
+    log_scale_min_clip=-5.,
+    log_scale_max_clip=3.,
+    log_scale_clip_gradient=False,
+    name=None,
+    *args,
+    **kwargs):
+  """Build the MADE Model [1].
+
+  This will be wrapped in a make_template to ensure the variables are only
+  created once. It takes the input and returns the `loc` ("mu" [1]) and
+  `log_scale` ("alpha" [1]) from the MADE network.
+
+  Warning: This function uses `masked_dense` to create randomly initialized
+  `tf.Variables`. It is presumed that these will be fit, just as you would any
+  other neural architecture which uses `tf.layers.dense`.
+
+  #### About Hidden Layers:
+
+  Each element of `hidden_layers` should be greater than the `input_depth`
+  (i.e., `input_depth = tf.shape(input)[-1]` where `input` is the input to the
+  neural network). This is necessary to ensure the autoregressivity property.
+
+  #### About Clipping:
+
+  This function also optionally clips the `log_scale` (but possibly not its
+  gradient). This is useful because if `log_scale` is too small/large it might
+  underflow/overflow making it impossible for the `MaskedAutoregressiveFlow`
+  bijector to implement a bijection. Additionally, the `log_scale_clip_gradient`
+  `bool` indicates whether the gradient should also be clipped. The default does
+  not clip the gradient; this is useful because it still provides gradient
+  information (for fitting) yet solves the numerical stability problem. I.e.,
+  `log_scale_clip_gradient = False` means
+  `grad[exp(clip(x))] = grad[x] exp(clip(x))` rather than the usual
+  `grad[clip(x)] exp(clip(x))`.
+
+  [1]: "MADE: Masked Autoencoder for Distribution Estimation."
+       Mathieu Germain, Karol Gregor, Iain Murray, Hugo Larochelle. ICML. 2015.
+       https://arxiv.org/abs/1502.03509
+
+  Arguments:
+    hidden_layers: Python `list`-like of non-negative integer, scalars
+      indicating the number of units in each hidden layer. Default: `[512, 512].
+    shift_only: Python `bool` indicating if only the `shift` term shall be
+      computed. Default: `False`.
+    activation: Activation function (callable). Explicitly setting to `None`
+      implies a linear activation.
+    log_scale_min_clip: `float`-like scalar `Tensor`, or a `Tensor` with the
+      same shape as `log_scale`. The minimum value to clip by. Default: -5.
+    log_scale_max_clip: `float`-like scalar `Tensor`, or a `Tensor` with the
+      same shape as `log_scale`. The maximum value to clip by. Default: 3.
+    log_scale_clip_gradient: Python `bool` indicating that the gradient of
+      `tf.clip_by_value` should be preserved. Default: `False`.
+    name: A name for ops managed by this function. Default:
+      "masked_autoregressive_default_template".
+    *args: `tf.layers.dense` arguments.
+    **kwargs: `tf.layers.dense` keyword arguments.
+
+  Returns:
+    shift: `Float`-like `Tensor` of shift terms (the "mu" in [2]).
+    log_scale: `Float`-like `Tensor` of log(scale) terms (the "alpha" in [2]).
+
+  Raises:
+    NotImplementedError: if rightmost dimension of `inputs` is unknown prior to
+      graph execution.
+  """
+
+  with ops.name_scope(name, "masked_autoregressive_default_template",
+                      values=[log_scale_min_clip, log_scale_max_clip]):
+    def _fn(x):
+      """MADE parameterized via `masked_autoregressive_default_template`."""
+      # TODO(b/67594795): Better support of dynamic shape.
+      input_depth = x.shape.with_rank_at_least(1)[-1].value
+      if input_depth is None:
+        raise NotImplementedError(
+            "Rightmost dimension must be known prior to graph execution.")
+      input_shape = (np.int32(x.shape.as_list()) if x.shape.is_fully_defined()
+                     else array_ops.shape(x))
+      for i, units in enumerate(hidden_layers):
+        x = masked_dense(
+            inputs=x,
+            units=units,
+            num_blocks=input_depth,
+            exclusive=True if i == 0 else False,
+            activation=activation,
+            *args,
+            **kwargs)
+      x = masked_dense(
+          inputs=x,
+          units=(1 if shift_only else 2) * input_depth,
+          num_blocks=input_depth,
+          activation=None,
+          *args,
+          **kwargs)
+      if shift_only:
+        x = array_ops.reshape(x, shape=input_shape)
+        return x, None
+      x = array_ops.reshape(
+          x, shape=array_ops.concat([input_shape, [2]], axis=0))
+      shift, log_scale = array_ops.unstack(x, num=2, axis=-1)
+      which_clip = (math_ops.clip_by_value if log_scale_clip_gradient
+                    else _clip_by_value_preserve_grad)
+      log_scale = which_clip(log_scale, log_scale_min_clip, log_scale_max_clip)
+      return shift, log_scale
+    return template_ops.make_template(
+        "masked_autoregressive_default_template", _fn)
+
+
+def _clip_by_value_preserve_grad(x, clip_value_min, clip_value_max, name=None):
+  """Clips input while leaving gradient unaltered."""
+  with ops.name_scope(name, "clip_by_value_preserve_grad",
+                      [x, clip_value_min, clip_value_max]):
+    clip_x = clip_ops.clip_by_value(x, clip_value_min, clip_value_max)
+    return x + array_ops.stop_gradient(clip_x - x)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive_impl.py
deleted file mode 100644
index ae142883931274b594dbbafbe86bd71e75c621bc..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive_impl.py
+++ /dev/null
@@ -1,473 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""MaskedAutoregressiveFlow bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.layers import core as layers
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import clip_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops import template as template_ops
-from tensorflow.python.ops import variable_scope as variable_scope_lib
-from tensorflow.python.ops.distributions import bijector as bijector_lib
-
-
-__all__ = [
-    "MaskedAutoregressiveFlow",
-    "masked_autoregressive_default_template",
-    "masked_dense",
-]
-
-
-class MaskedAutoregressiveFlow(bijector_lib.Bijector):
-  """Affine MaskedAutoregressiveFlow bijector for vector-valued events.
-
-  The affine autoregressive flow [1] provides a relatively simple framework for
-  user-specified (deep) architectures to learn a distribution over vector-valued
-  events. Regarding terminology,
-
-    "Autoregressive models decompose the joint density as a product of
-    conditionals, and model each conditional in turn. Normalizing flows
-    transform a base density (e.g. a standard Gaussian) into the target density
-    by an invertible transformation with tractable Jacobian." [1]
-
-  In other words, the "autoregressive property" is equivalent to the
-  decomposition, `p(x) = prod{ p(x[i] | x[0:i]) : i=0, ..., d }`. The provided
-  `shift_and_log_scale_fn`, `masked_autoregressive_default_template`, achieves
-  this property by zeroing out weights in its `masked_dense` layers.
-
-  In the `tf.distributions` framework, a "normalizing flow" is implemented as a
-  `tf.distributions.bijectors.Bijector`. The `forward` "autoregression"
-  is implemented using a `tf.while_loop` and a deep neural network (DNN) with
-  masked weights such that the autoregressive property is automatically met in
-  the `inverse`.
-
-  A `TransformedDistribution` using `MaskedAutoregressiveFlow(...)` uses the
-  (expensive) forward-mode calculation to draw samples and the (cheap)
-  reverse-mode calculation to compute log-probabilities. Conversely, a
-  `TransformedDistribution` using `Invert(MaskedAutoregressiveFlow(...))` uses
-  the (expensive) forward-mode calculation to compute log-probabilities and the
-  (cheap) reverse-mode calculation to compute samples.  See "Example Use"
-  [below] for more details.
-
-  Given a `shift_and_log_scale_fn`, the forward and inverse transformations are
-  (a sequence of) affine transformations. A "valid" `shift_and_log_scale_fn`
-  must compute each `shift` (aka `loc` or "mu" [2]) and `log(scale)` (aka
-  "alpha" [2]) such that each are broadcastable with the arguments to `forward`
-  and `inverse`, i.e., such that the calculations in `forward`, `inverse`
-  [below] are possible.
-
-  For convenience, `masked_autoregressive_default_template` is offered as a
-  possible `shift_and_log_scale_fn` function. It implements the MADE
-  architecture [2]. MADE is a feed-forward network that computes a `shift` and
-  `log(scale)` using `masked_dense` layers in a deep neural network. Weights are
-  masked to ensure the autoregressive property. It is possible that this
-  architecture is suboptimal for your task. To build alternative networks,
-  either change the arguments to `masked_autoregressive_default_template`, use
-  the `masked_dense` function to roll-out your own, or use some other
-  architecture, e.g., using `tf.layers`.
-
-  Warning: no attempt is made to validate that the `shift_and_log_scale_fn`
-  enforces the "autoregressive property".
-
-  Assuming `shift_and_log_scale_fn` has valid shape and autoregressive
-  semantics, the forward transformation is,
-
-  ```python
-  def forward(x):
-    y = zeros_like(x)
-    event_size = x.shape[-1]
-    for _ in range(event_size):
-      shift, log_scale = shift_and_log_scale_fn(y)
-      y = x * math_ops.exp(log_scale) + shift
-    return y
-  ```
-
-  and the inverse transformation is,
-
-  ```python
-  def inverse(y):
-    shift, log_scale = shift_and_log_scale_fn(y)
-    return (y - shift) / math_ops.exp(log_scale)
-  ```
-
-  Notice that the `inverse` does not need a for-loop. This is because in the
-  forward pass each calculation of `shift` and `log_scale` is based on the `y`
-  calculated so far (not `x`). In the `inverse`, the `y` is fully known, thus is
-  equivalent to the scaling used in `forward` after `event_size` passes, i.e.,
-  the "last" `y` used to compute `shift`, `log_scale`. (Roughly speaking, this
-  also proves the transform is bijective.)
-
-  #### Example Use
-
-  ```python
-  ds = tf.contrib.distributions
-  bs = tf.contrib.distributions.bijectors
-
-  dims = 5
-
-  # A common choice for a normalizing flow is to use a Gaussian for the base
-  # distribution. (However, any continuous distribution would work.) E.g.,
-  maf = ds.TransformedDistribution(
-      distribution=ds.Normal(loc=0., scale=1.),
-      bijector=bs.MaskedAutoregressiveFlow(
-          shift_and_log_scale_fn=bs.masked_autoregressive_default_template(
-              hidden_layers=[512, 512])),
-      event_shape=[dims])
-
-  x = maf.sample()  # Expensive; uses `tf.while_loop`, no Bijector caching.
-  maf.log_prob(x)   # Almost free; uses Bijector caching.
-  maf.log_prob(0.)  # Cheap; no `tf.while_loop` despite no Bijector caching.
-
-  # [1] also describes an "Inverse Autoregressive Flow", e.g.,
-  iaf = ds.TransformedDistribution(
-      distribution=ds.Normal(loc=0., scale=1.),
-      bijector=bs.Invert(bs.MaskedAutoregressiveFlow(
-          shift_and_log_scale_fn=bs.masked_autoregressive_default_template(
-              hidden_layers=[512, 512]))),
-      event_shape=[dims])
-
-  x = iaf.sample()  # Cheap; no `tf.while_loop` despite no Bijector caching.
-  iaf.log_prob(x)   # Almost free; uses Bijector caching.
-  iaf.log_prob(0.)  # Expensive; uses `tf.while_loop`, no Bijector caching.
-
-  # In many (if not most) cases the default `shift_and_log_scale_fn` will be a
-  # poor choice. Here's an example of using a "shift only" version and with a
-  # different number/depth of hidden layers.
-  shift_only = True
-  maf_no_scale_hidden2 = ds.TransformedDistribution(
-      distribution=ds.Normal(loc=0., scale=1.),
-      bijector=bs.MaskedAutoregressiveFlow(
-          bs.masked_autoregressive_default_template(
-              hidden_layers=[32],
-              shift_only=shift_only),
-          is_constant_jacobian=shift_only),
-      event_shape=[dims])
-  ```
-
-  [1]: "Masked Autoregressive Flow for Density Estimation."
-       George Papamakarios, Theo Pavlakou, Iain Murray. Arxiv. 2017.
-       https://arxiv.org/abs/1705.07057
-
-  [2]: "MADE: Masked Autoencoder for Distribution Estimation."
-       Mathieu Germain, Karol Gregor, Iain Murray, Hugo Larochelle. ICML. 2015.
-       https://arxiv.org/abs/1502.03509
-
-  """
-
-  def __init__(self,
-               shift_and_log_scale_fn,
-               is_constant_jacobian=False,
-               validate_args=False,
-               name=None):
-    """Creates the MaskedAutoregressiveFlow bijector.
-
-    Args:
-      shift_and_log_scale_fn: Python `callable` which computes `shift` and
-        `log_scale` from both the forward domain (`x`) and the inverse domain
-        (`y`). Calculation must respect the "autoregressive property" (see class
-        docstring). Suggested default
-        `masked_autoregressive_default_template(hidden_layers=...)`.
-        Typically the function contains `tf.Variables` and is wrapped using
-        `tf.make_template`. Returning `None` for either (both) `shift`,
-        `log_scale` is equivalent to (but more efficient than) returning zero.
-      is_constant_jacobian: Python `bool`. Default: `False`. When `True` the
-        implementation assumes `log_scale` does not depend on the forward domain
-        (`x`) or inverse domain (`y`) values. (No validation is made;
-        `is_constant_jacobian=False` is always safe but possibly computationally
-        inefficient.)
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-      name: Python `str`, name given to ops managed by this object.
-    """
-    name = name or "masked_autoregressive_flow"
-    self._shift_and_log_scale_fn = shift_and_log_scale_fn
-    super(MaskedAutoregressiveFlow, self).__init__(
-        is_constant_jacobian=is_constant_jacobian,
-        validate_args=validate_args,
-        name=name)
-
-  def _forward(self, x):
-    event_size = array_ops.shape(x)[-1]
-    def _loop_body(index, y0):
-      """While-loop body for autoregression calculation."""
-      # Set caching device to avoid re-getting the tf.Variable for every while
-      # loop iteration.
-      with variable_scope_lib.variable_scope(
-          variable_scope_lib.get_variable_scope()) as vs:
-        if vs.caching_device is None:
-          vs.set_caching_device(lambda op: op.device)
-        shift, log_scale = self._shift_and_log_scale_fn(y0)
-      y = x
-      if log_scale is not None:
-        y *= math_ops.exp(log_scale)
-      if shift is not None:
-        y += shift
-      return index + 1, y
-    _, y = control_flow_ops.while_loop(
-        cond=lambda index, _: index < event_size,
-        body=_loop_body,
-        loop_vars=[0, array_ops.zeros_like(x, name="y0")])
-    return y
-
-  def _inverse(self, y):
-    shift, log_scale = self._shift_and_log_scale_fn(y)
-    x = y
-    if shift is not None:
-      x -= shift
-    if log_scale is not None:
-      x *= math_ops.exp(-log_scale)
-    return x
-
-  def _inverse_log_det_jacobian(self, y):
-    _, log_scale = self._shift_and_log_scale_fn(y)
-    if log_scale is None:
-      return constant_op.constant(0., dtype=y.dtype, name="ildj")
-    return -math_ops.reduce_sum(log_scale, axis=-1)
-
-
-MASK_INCLUSIVE = "inclusive"
-MASK_EXCLUSIVE = "exclusive"
-
-
-def _gen_slices(num_blocks, n_in, n_out, mask_type=MASK_EXCLUSIVE):
-  """Generate the slices for building an autoregressive mask."""
-  # TODO(b/67594795): Better support of dynamic shape.
-  slices = []
-  col = 0
-  d_in = n_in // num_blocks
-  d_out = n_out // num_blocks
-  row = d_out if mask_type == MASK_EXCLUSIVE else 0
-  for _ in range(num_blocks):
-    row_slice = slice(row, None)
-    col_slice = slice(col, col + d_in)
-    slices.append([row_slice, col_slice])
-    col += d_in
-    row += d_out
-  return slices
-
-
-def _gen_mask(num_blocks,
-              n_in,
-              n_out,
-              mask_type=MASK_EXCLUSIVE,
-              dtype=dtypes.float32):
-  """Generate the mask for building an autoregressive dense layer."""
-  # TODO(b/67594795): Better support of dynamic shape.
-  mask = np.zeros([n_out, n_in], dtype=dtype.as_numpy_dtype())
-  slices = _gen_slices(num_blocks, n_in, n_out, mask_type=mask_type)
-  for [row_slice, col_slice] in slices:
-    mask[row_slice, col_slice] = 1
-  return mask
-
-
-def masked_dense(inputs,
-                 units,
-                 num_blocks=None,
-                 exclusive=False,
-                 kernel_initializer=None,
-                 reuse=None,
-                 name=None,
-                 *args,
-                 **kwargs):
-  """A autoregressively masked dense layer. Analogous to `tf.layers.dense`.
-
-  See [1] for detailed explanation.
-
-  [1]: "MADE: Masked Autoencoder for Distribution Estimation."
-       Mathieu Germain, Karol Gregor, Iain Murray, Hugo Larochelle. ICML. 2015.
-       https://arxiv.org/abs/1502.03509
-
-  Arguments:
-    inputs: Tensor input.
-    units: Python `int` scalar representing the dimensionality of the output
-      space.
-    num_blocks: Python `int` scalar representing the number of blocks for the
-      MADE masks.
-    exclusive: Python `bool` scalar representing whether to zero the diagonal of
-      the mask, used for the first layer of a MADE.
-    kernel_initializer: Initializer function for the weight matrix.
-      If `None` (default), weights are initialized using the
-      `tf.glorot_random_initializer`.
-    reuse: Python `bool` scalar representing whether to reuse the weights of a
-      previous layer by the same name.
-    name: Python `str` used to describe ops managed by this function.
-    *args: `tf.layers.dense` arguments.
-    **kwargs: `tf.layers.dense` keyword arguments.
-
-  Returns:
-    Output tensor.
-
-  Raises:
-    NotImplementedError: if rightmost dimension of `inputs` is unknown prior to
-      graph execution.
-  """
-  # TODO(b/67594795): Better support of dynamic shape.
-  input_depth = inputs.shape.with_rank_at_least(1)[-1].value
-  if input_depth is None:
-    raise NotImplementedError(
-        "Rightmost dimension must be known prior to graph execution.")
-
-  mask = _gen_mask(num_blocks, input_depth, units,
-                   MASK_EXCLUSIVE if exclusive else MASK_INCLUSIVE).T
-
-  if kernel_initializer is None:
-    kernel_initializer = init_ops.glorot_normal_initializer()
-
-  def masked_initializer(shape, dtype=None, partition_info=None):
-    return mask * kernel_initializer(shape, dtype, partition_info)
-
-  with ops.name_scope(name, "masked_dense", [inputs, units, num_blocks]):
-    layer = layers.Dense(
-        units,
-        kernel_initializer=masked_initializer,
-        kernel_constraint=lambda x: mask * x,
-        name=name,
-        dtype=inputs.dtype.base_dtype,
-        _scope=name,
-        _reuse=reuse,
-        *args,
-        **kwargs)
-    return layer.apply(inputs)
-
-
-def masked_autoregressive_default_template(
-    hidden_layers,
-    shift_only=False,
-    activation=nn_ops.relu,
-    log_scale_min_clip=-5.,
-    log_scale_max_clip=3.,
-    log_scale_clip_gradient=False,
-    name=None,
-    *args,
-    **kwargs):
-  """Build the MADE Model [1].
-
-  This will be wrapped in a make_template to ensure the variables are only
-  created once. It takes the input and returns the `loc` ("mu" [1]) and
-  `log_scale` ("alpha" [1]) from the MADE network.
-
-  Warning: This function uses `masked_dense` to create randomly initialized
-  `tf.Variables`. It is presumed that these will be fit, just as you would any
-  other neural architecture which uses `tf.layers.dense`.
-
-  #### About Hidden Layers:
-
-  Each element of `hidden_layers` should be greater than the `input_depth`
-  (i.e., `input_depth = tf.shape(input)[-1]` where `input` is the input to the
-  neural network). This is necessary to ensure the autoregressivity property.
-
-  #### About Clipping:
-
-  This function also optionally clips the `log_scale` (but possibly not its
-  gradient). This is useful because if `log_scale` is too small/large it might
-  underflow/overflow making it impossible for the `MaskedAutoregressiveFlow`
-  bijector to implement a bijection. Additionally, the `log_scale_clip_gradient`
-  `bool` indicates whether the gradient should also be clipped. The default does
-  not clip the gradient; this is useful because it still provides gradient
-  information (for fitting) yet solves the numerical stability problem. I.e.,
-  `log_scale_clip_gradient = False` means
-  `grad[exp(clip(x))] = grad[x] exp(clip(x))` rather than the usual
-  `grad[clip(x)] exp(clip(x))`.
-
-  [1]: "MADE: Masked Autoencoder for Distribution Estimation."
-       Mathieu Germain, Karol Gregor, Iain Murray, Hugo Larochelle. ICML. 2015.
-       https://arxiv.org/abs/1502.03509
-
-  Arguments:
-    hidden_layers: Python `list`-like of non-negative integer, scalars
-      indicating the number of units in each hidden layer. Default: `[512, 512].
-    shift_only: Python `bool` indicating if only the `shift` term shall be
-      computed. Default: `False`.
-    activation: Activation function (callable). Explicitly setting to `None`
-      implies a linear activation.
-    log_scale_min_clip: `float`-like scalar `Tensor`, or a `Tensor` with the
-      same shape as `log_scale`. The minimum value to clip by. Default: -5.
-    log_scale_max_clip: `float`-like scalar `Tensor`, or a `Tensor` with the
-      same shape as `log_scale`. The maximum value to clip by. Default: 3.
-    log_scale_clip_gradient: Python `bool` indicating that the gradient of
-      `tf.clip_by_value` should be preserved. Default: `False`.
-    name: A name for ops managed by this function. Default:
-      "masked_autoregressive_default_template".
-    *args: `tf.layers.dense` arguments.
-    **kwargs: `tf.layers.dense` keyword arguments.
-
-  Returns:
-    shift: `Float`-like `Tensor` of shift terms (the "mu" in [2]).
-    log_scale: `Float`-like `Tensor` of log(scale) terms (the "alpha" in [2]).
-
-  Raises:
-    NotImplementedError: if rightmost dimension of `inputs` is unknown prior to
-      graph execution.
-  """
-
-  with ops.name_scope(name, "masked_autoregressive_default_template",
-                      values=[log_scale_min_clip, log_scale_max_clip]):
-    def _fn(x):
-      """MADE parameterized via `masked_autoregressive_default_template`."""
-      # TODO(b/67594795): Better support of dynamic shape.
-      input_depth = x.shape.with_rank_at_least(1)[-1].value
-      if input_depth is None:
-        raise NotImplementedError(
-            "Rightmost dimension must be known prior to graph execution.")
-      input_shape = (np.int32(x.shape.as_list()) if x.shape.is_fully_defined()
-                     else array_ops.shape(x))
-      for i, units in enumerate(hidden_layers):
-        x = masked_dense(
-            inputs=x,
-            units=units,
-            num_blocks=input_depth,
-            exclusive=True if i == 0 else False,
-            activation=activation,
-            *args,
-            **kwargs)
-      x = masked_dense(
-          inputs=x,
-          units=(1 if shift_only else 2) * input_depth,
-          num_blocks=input_depth,
-          activation=None,
-          *args,
-          **kwargs)
-      if shift_only:
-        x = array_ops.reshape(x, shape=input_shape)
-        return x, None
-      x = array_ops.reshape(
-          x, shape=array_ops.concat([input_shape, [2]], axis=0))
-      shift, log_scale = array_ops.unstack(x, num=2, axis=-1)
-      which_clip = (math_ops.clip_by_value if log_scale_clip_gradient
-                    else _clip_by_value_preserve_grad)
-      log_scale = which_clip(log_scale, log_scale_min_clip, log_scale_max_clip)
-      return shift, log_scale
-    return template_ops.make_template(
-        "masked_autoregressive_default_template", _fn)
-
-
-def _clip_by_value_preserve_grad(x, clip_value_min, clip_value_max, name=None):
-  """Clips input while leaving gradient unaltered."""
-  with ops.name_scope(name, "clip_by_value_preserve_grad",
-                      [x, clip_value_min, clip_value_max]):
-    clip_x = clip_ops.clip_by_value(x, clip_value_min, clip_value_max)
-    return x + array_ops.stop_gradient(clip_x - x)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/permute.py b/tensorflow/contrib/distributions/python/ops/bijectors/permute.py
index a187ce22d686ee1203802ae2bfe64b0e1a3ea850..8654cc39d0c41ec4f1b85cd5fc4366ceaf4b224d 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/permute.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/permute.py
@@ -12,18 +12,127 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Permute bijector."""
+"""Permutation bijectors."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.permute_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+import numpy as np
 
-_allowed_symbols = ["Permute"]
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops.distributions import bijector as bijector_lib
 
-remove_undocumented(__name__, _allowed_symbols)
+
+__all__ = [
+    "Permute",
+]
+
+
+class Permute(bijector_lib.Bijector):
+  """Permutes the rightmost dimension of a `Tensor`.
+
+  ```python
+  tfd = tf.contrib.distributions
+
+  reverse = tfd.bijectors.Permute(permutation=[2, 1, 0])
+
+  reverse.forward([-1., 0., 1.])
+  # ==> [1., 0., -1]
+
+  reverse.inverse([1., 0., -1])
+  # ==> [-1., 0., 1.]
+
+  reverse.forward_log_det_jacobian(any_value)
+  # ==> 0.
+
+  reverse.inverse_log_det_jacobian(any_value)
+  # ==> 0.
+  ```
+
+  Warning: `tf.estimator` may repeatedly build the graph thus
+  `Permute(np.random.permutation(event_size)).astype("int32"))` is not a
+  reliable parameterization (nor would it be even if using `tf.constant`). A
+  safe alternative is to use `tf.get_variable` to achieve "init once" behavior,
+  i.e.,
+
+  ```python
+  def init_once(x, name):
+    return tf.get_variable(name, initializer=x, trainable=False)
+
+  Permute(permutation=init_once(
+      np.random.permutation(event_size).astype("int32"),
+      name="permutation"))
+  ```
+
+  """
+
+  def __init__(self, permutation, validate_args=False, name=None):
+    """Creates the `Permute` bijector.
+
+    Args:
+      permutation: An `int`-like vector-shaped `Tensor` representing the
+        permutation to apply to the rightmost dimension of the transformed
+        `Tensor`.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+      name: Python `str`, name given to ops managed by this object.
+
+    Raises:
+      TypeError: if `not permutation.dtype.is_integer`.
+      ValueError: if `permutation` does not contain exactly one of each of
+        `{0, 1, ..., d}`.
+    """
+    with ops.name_scope(name, "permute", values=[permutation]):
+      permutation = ops.convert_to_tensor(
+          permutation,
+          name="permutation")
+      if not permutation.dtype.is_integer:
+        raise TypeError("permutation.dtype ({}) should be `int`-like.".format(
+            permutation.dtype.name))
+      p = tensor_util.constant_value(permutation)
+      if p is not None:
+        if set(p) != set(np.arange(p.size)):
+          raise ValueError("Permutation over `d` must contain exactly one of "
+                           "each of `{0, 1, ..., d}`.")
+      elif validate_args:
+        p, _ = nn_ops.top_k(-permutation,
+                            k=array_ops.shape(permutation)[-1],
+                            sorted=True)
+        permutation = control_flow_ops.with_dependencies([
+            check_ops.assert_equal(
+                -p, math_ops.range(array_ops.size(p)),
+                message=("Permutation over `d` must contain exactly one of "
+                         "each of `{0, 1, ..., d}`.")),
+        ], permutation)
+      self._permutation = permutation
+      super(Permute, self).__init__(
+          is_constant_jacobian=True,
+          validate_args=validate_args,
+          name=name or "permute")
+
+  @property
+  def permutation(self):
+    return self._permutation
+
+  def _forward(self, x):
+    return array_ops.gather(x, self.permutation, axis=-1)
+
+  def _inverse(self, y):
+    return array_ops.gather(
+        y,
+        array_ops.invert_permutation(self.permutation),
+        axis=-1)
+
+  def _inverse_log_det_jacobian(self, y):
+    return constant_op.constant(0., dtype=y.dtype)
+
+  def _forward_log_det_jacobian(self, x):
+    return constant_op.constant(0., dtype=x.dtype)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/permute_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/permute_impl.py
deleted file mode 100644
index b1d8f2f41b28a88208a19824377f93882b767f03..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/permute_impl.py
+++ /dev/null
@@ -1,138 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Permutation bijectors."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops.distributions import bijector as bijector_lib
-
-
-__all__ = [
-    "Permute",
-]
-
-
-class Permute(bijector_lib.Bijector):
-  """Permutes the rightmost dimension of a `Tensor`.
-
-  ```python
-  bs = tf.contrib.distributions.bijectors
-
-  reverse = bs.Permute(permutation=[2, 1, 0])
-
-  reverse.forward([-1., 0., 1.])
-  # ==> [1., 0., -1]
-
-  reverse.inverse([1., 0., -1])
-  # ==> [-1., 0., 1.]
-
-  reverse.forward_log_det_jacobian(any_value)
-  # ==> 0.
-
-  reverse.inverse_log_det_jacobian(any_value)
-  # ==> 0.
-  ```
-
-  Warning: `tf.estimator` may repeatedly build the graph thus
-  `Permute(np.random.permutation(event_size)).astype("int32"))` is not a
-  reliable parameterization (nor would it be even if using `tf.constant`). A
-  safe alternative is to use `tf.get_variable` to achieve "init once" behavior,
-  i.e.,
-
-  ```python
-  def init_once(x, name):
-    return tf.get_variable(name, initializer=x, trainable=False)
-
-  Permute(permutation=init_once(
-      np.random.permutation(event_size).astype("int32"),
-      name="permutation"))
-  ```
-
-  """
-
-  def __init__(self, permutation, validate_args=False, name=None):
-    """Creates the `Permute` bijector.
-
-    Args:
-      permutation: An `int`-like vector-shaped `Tensor` representing the
-        permutation to apply to the rightmost dimension of the transformed
-        `Tensor`.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-      name: Python `str`, name given to ops managed by this object.
-
-    Raises:
-      TypeError: if `not permutation.dtype.is_integer`.
-      ValueError: if `permutation` does not contain exactly one of each of
-        `{0, 1, ..., d}`.
-    """
-    with ops.name_scope(name, "permute", values=[permutation]):
-      permutation = ops.convert_to_tensor(
-          permutation,
-          name="permutation")
-      if not permutation.dtype.is_integer:
-        raise TypeError("permutation.dtype ({}) should be `int`-like.".format(
-            permutation.dtype.name))
-      p = tensor_util.constant_value(permutation)
-      if p is not None:
-        if set(p) != set(np.arange(p.size)):
-          raise ValueError("Permutation over `d` must contain exactly one of "
-                           "each of `{0, 1, ..., d}`.")
-      elif validate_args:
-        p, _ = nn_ops.top_k(-permutation,
-                            k=array_ops.shape(permutation)[-1],
-                            sorted=True)
-        permutation = control_flow_ops.with_dependencies([
-            check_ops.assert_equal(
-                -p, math_ops.range(array_ops.size(p)),
-                message=("Permutation over `d` must contain exactly one of "
-                         "each of `{0, 1, ..., d}`.")),
-        ], permutation)
-      self._permutation = permutation
-      super(Permute, self).__init__(
-          is_constant_jacobian=True,
-          validate_args=validate_args,
-          name=name or "permute")
-
-  @property
-  def permutation(self):
-    return self._permutation
-
-  def _forward(self, x):
-    return array_ops.gather(x, self.permutation, axis=-1)
-
-  def _inverse(self, y):
-    return array_ops.gather(
-        y,
-        array_ops.invert_permutation(self.permutation),
-        axis=-1)
-
-  def _inverse_log_det_jacobian(self, y):
-    return constant_op.constant(0., dtype=y.dtype)
-
-  def _forward_log_det_jacobian(self, x):
-    return constant_op.constant(0., dtype=x.dtype)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/power_transform.py b/tensorflow/contrib/distributions/python/ops/bijectors/power_transform.py
index a83199549cd16101ab7b39b43d19a17bc66f03df..c37db61720d10949f294ff7b2e9778ba6efa57f0 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/power_transform.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/power_transform.py
@@ -18,12 +18,110 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.power_transform_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops.distributions import bijector
 
-_allowed_symbols = ["PowerTransform"]
 
-remove_undocumented(__name__, _allowed_symbols)
+__all__ = [
+    "PowerTransform",
+]
+
+
+class PowerTransform(bijector.Bijector):
+  """Compute `Y = g(X) = (1 + X * c)**(1 / c), X >= -1 / c`.
+
+  The [power transform](https://en.wikipedia.org/wiki/Power_transform) maps
+  inputs from `[0, inf]` to `[-1/c, inf]`; this is equivalent to the `inverse`
+  of this bijector.
+
+  This bijector is equivalent to the `Exp` bijector when `c=0`.
+  """
+
+  def __init__(self,
+               power=0.,
+               event_ndims=0,
+               validate_args=False,
+               name="power_transform"):
+    """Instantiates the `PowerTransform` bijector.
+
+    Args:
+      power: Python `float` scalar indicating the transform power, i.e.,
+        `Y = g(X) = (1 + X * c)**(1 / c)` where `c` is the `power`.
+      event_ndims: Python scalar indicating the number of dimensions associated
+        with a particular draw from the distribution.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+      name: Python `str` name given to ops managed by this object.
+
+    Raises:
+      ValueError: if `power < 0` or is not known statically.
+    """
+    self._graph_parents = []
+    self._name = name
+    self._validate_args = validate_args
+    with self._name_scope("init", values=[power]):
+      power = tensor_util.constant_value(
+          ops.convert_to_tensor(power, name="power"))
+    if power is None or power < 0:
+      raise ValueError("`power` must be a non-negative TF constant.")
+    self._power = power
+    super(PowerTransform, self).__init__(
+        event_ndims=event_ndims,
+        validate_args=validate_args,
+        name=name)
+
+  @property
+  def power(self):
+    """The `c` in: `Y = g(X) = (1 + X * c)**(1 / c)`."""
+    return self._power
+
+  def _forward(self, x):
+    x = self._maybe_assert_valid_x(x)
+    if self.power == 0.:
+      return math_ops.exp(x)
+    # If large x accuracy is an issue, consider using:
+    # (1. + x * self.power)**(1. / self.power) when x >> 1.
+    return math_ops.exp(math_ops.log1p(x * self.power) / self.power)
+
+  def _inverse(self, y):
+    y = self._maybe_assert_valid_y(y)
+    if self.power == 0.:
+      return math_ops.log(y)
+    # If large y accuracy is an issue, consider using:
+    # (y**self.power - 1.) / self.power when y >> 1.
+    return math_ops.expm1(math_ops.log(y) * self.power) / self.power
+
+  def _inverse_log_det_jacobian(self, y):
+    y = self._maybe_assert_valid_y(y)
+    event_dims = self._event_dims_tensor(y)
+    return (self.power - 1.) * math_ops.reduce_sum(
+        math_ops.log(y), axis=event_dims)
+
+  def _forward_log_det_jacobian(self, x):
+    x = self._maybe_assert_valid_x(x)
+    event_dims = self._event_dims_tensor(x)
+    if self.power == 0.:
+      return math_ops.reduce_sum(x, axis=event_dims)
+    return (1. / self.power - 1.) * math_ops.reduce_sum(
+        math_ops.log1p(x * self.power),
+        axis=event_dims)
+
+  def _maybe_assert_valid_x(self, x):
+    if not self.validate_args or self.power == 0.:
+      return x
+    is_valid = check_ops.assert_non_negative(
+        1. + self.power * x,
+        message="Forward transformation input must be at least {}.".format(
+            -1. / self.power))
+    return control_flow_ops.with_dependencies([is_valid], x)
+
+  def _maybe_assert_valid_y(self, y):
+    if not self.validate_args:
+      return y
+    is_valid = check_ops.assert_positive(
+        y, message="Inverse transformation input must be greater than 0.")
+    return control_flow_ops.with_dependencies([is_valid], y)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/power_transform_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/power_transform_impl.py
deleted file mode 100644
index c37db61720d10949f294ff7b2e9778ba6efa57f0..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/power_transform_impl.py
+++ /dev/null
@@ -1,127 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""PowerTransform bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops.distributions import bijector
-
-
-__all__ = [
-    "PowerTransform",
-]
-
-
-class PowerTransform(bijector.Bijector):
-  """Compute `Y = g(X) = (1 + X * c)**(1 / c), X >= -1 / c`.
-
-  The [power transform](https://en.wikipedia.org/wiki/Power_transform) maps
-  inputs from `[0, inf]` to `[-1/c, inf]`; this is equivalent to the `inverse`
-  of this bijector.
-
-  This bijector is equivalent to the `Exp` bijector when `c=0`.
-  """
-
-  def __init__(self,
-               power=0.,
-               event_ndims=0,
-               validate_args=False,
-               name="power_transform"):
-    """Instantiates the `PowerTransform` bijector.
-
-    Args:
-      power: Python `float` scalar indicating the transform power, i.e.,
-        `Y = g(X) = (1 + X * c)**(1 / c)` where `c` is the `power`.
-      event_ndims: Python scalar indicating the number of dimensions associated
-        with a particular draw from the distribution.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-      name: Python `str` name given to ops managed by this object.
-
-    Raises:
-      ValueError: if `power < 0` or is not known statically.
-    """
-    self._graph_parents = []
-    self._name = name
-    self._validate_args = validate_args
-    with self._name_scope("init", values=[power]):
-      power = tensor_util.constant_value(
-          ops.convert_to_tensor(power, name="power"))
-    if power is None or power < 0:
-      raise ValueError("`power` must be a non-negative TF constant.")
-    self._power = power
-    super(PowerTransform, self).__init__(
-        event_ndims=event_ndims,
-        validate_args=validate_args,
-        name=name)
-
-  @property
-  def power(self):
-    """The `c` in: `Y = g(X) = (1 + X * c)**(1 / c)`."""
-    return self._power
-
-  def _forward(self, x):
-    x = self._maybe_assert_valid_x(x)
-    if self.power == 0.:
-      return math_ops.exp(x)
-    # If large x accuracy is an issue, consider using:
-    # (1. + x * self.power)**(1. / self.power) when x >> 1.
-    return math_ops.exp(math_ops.log1p(x * self.power) / self.power)
-
-  def _inverse(self, y):
-    y = self._maybe_assert_valid_y(y)
-    if self.power == 0.:
-      return math_ops.log(y)
-    # If large y accuracy is an issue, consider using:
-    # (y**self.power - 1.) / self.power when y >> 1.
-    return math_ops.expm1(math_ops.log(y) * self.power) / self.power
-
-  def _inverse_log_det_jacobian(self, y):
-    y = self._maybe_assert_valid_y(y)
-    event_dims = self._event_dims_tensor(y)
-    return (self.power - 1.) * math_ops.reduce_sum(
-        math_ops.log(y), axis=event_dims)
-
-  def _forward_log_det_jacobian(self, x):
-    x = self._maybe_assert_valid_x(x)
-    event_dims = self._event_dims_tensor(x)
-    if self.power == 0.:
-      return math_ops.reduce_sum(x, axis=event_dims)
-    return (1. / self.power - 1.) * math_ops.reduce_sum(
-        math_ops.log1p(x * self.power),
-        axis=event_dims)
-
-  def _maybe_assert_valid_x(self, x):
-    if not self.validate_args or self.power == 0.:
-      return x
-    is_valid = check_ops.assert_non_negative(
-        1. + self.power * x,
-        message="Forward transformation input must be at least {}.".format(
-            -1. / self.power))
-    return control_flow_ops.with_dependencies([is_valid], x)
-
-  def _maybe_assert_valid_y(self, y):
-    if not self.validate_args:
-      return y
-    is_valid = check_ops.assert_positive(
-        y, message="Inverse transformation input must be greater than 0.")
-    return control_flow_ops.with_dependencies([is_valid], y)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/reshape.py b/tensorflow/contrib/distributions/python/ops/bijectors/reshape.py
index 8997f7ab6929745275edb38712a5bbb0a9b25ddb..55eca063126797d577653f0d6bcdfddf8192bdb5 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/reshape.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/reshape.py
@@ -12,18 +12,303 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Reshape bijector."""
+"""Reshape bijectors."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.reshape_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+import numpy as np
 
-_allowed_symbols = ["Reshape"]
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops.distributions import bijector as bijector_lib
 
-remove_undocumented(__name__, _allowed_symbols)
+
+__all__ = [
+    "Reshape",
+]
+
+
+def _static_ndims_from_shape(shape):
+  return shape.shape.with_rank_at_least(1)[0].value
+
+
+def _ndims_from_shape(shape):
+  return array_ops.shape(shape)[0]
+
+
+class Reshape(bijector_lib.Bijector):
+  """Reshapes the `event_shape` of a `Tensor`.
+
+  The semantics generally follow that of `tf.reshape()`, with
+  a few differences:
+
+  * The user must provide both the input and output shape, so that
+    the transformation can be inverted. If an input shape is not
+    specified, the default assumes a vector-shaped input, i.e.,
+    event_shape_in = (-1,).
+  * The `Reshape` bijector automatically broadcasts over the leftmost
+    dimensions of its input (`sample_shape` and `batch_shape`); only
+    the rightmost `event_ndims_in` dimensions are reshaped. The
+    number of dimensions to reshape is inferred from the provided
+    `event_shape_in` (`event_ndims_in = len(event_shape_in)`).
+
+  Example usage:
+  ```python
+
+  tfd = tf.contrib.distributions
+
+  r = tfd.bijectors.Reshape(event_shape_out=[1, -1])
+
+  r.forward([3., 4.])    # shape [2]
+  # ==> [[3., 4.]]       # shape [1, 2]
+
+  r.forward([[1., 2.], [3., 4.]])  # shape [2, 2]
+  # ==> [[[1., 2.]],
+  #      [[3., 4.]]]   # shape [2, 1, 2]
+
+  r.inverse([[3., 4.]])  # shape [1,2]
+  # ==> [3., 4.]         # shape [2]
+
+  r.forward_log_det_jacobian(any_value)
+  # ==> 0.
+
+  r.inverse_log_det_jacobian(any_value)
+  # ==> 0.
+  ```
+
+  """
+
+  def __init__(self, event_shape_out, event_shape_in=(-1,),
+               validate_args=False, name=None):
+    """Creates a `Reshape` bijector.
+
+    Args:
+      event_shape_out: An `int`-like vector-shaped `Tensor`
+        representing the event shape of the transformed output.
+      event_shape_in: An optional `int`-like vector-shape `Tensor`
+        representing the event shape of the input. This is required in
+        order to define inverse operations; the default of (-1,)
+        assumes a vector-shaped input.
+      validate_args: Python `bool` indicating whether arguments should
+        be checked for correctness.
+      name: Python `str`, name given to ops managed by this object.
+
+    Raises:
+      TypeError: if either `event_shape_in` or `event_shape_out` has
+        non-integer `dtype`.
+      ValueError: if either of `event_shape_in` or `event_shape_out`
+       has non-vector shape (`rank > 1`), or if their sizes do not
+       match.
+    """
+    with ops.name_scope(name, "reshape",
+                        values=[event_shape_out, event_shape_in]):
+
+      event_shape_out = ops.convert_to_tensor(event_shape_out,
+                                              name="event_shape_out",
+                                              preferred_dtype=dtypes.int32)
+      event_shape_in = ops.convert_to_tensor(event_shape_in,
+                                             name="event_shape_in",
+                                             preferred_dtype=dtypes.int32)
+
+      assertions = []
+      assertions.extend(self._maybe_check_valid_shape(
+          event_shape_out, validate_args))
+      assertions.extend(self._maybe_check_valid_shape(
+          event_shape_in, validate_args))
+
+      self._assertions = assertions
+      self._event_shape_in = event_shape_in
+      self._event_shape_out = event_shape_out
+
+      super(Reshape, self).__init__(is_constant_jacobian=True,
+                                    validate_args=validate_args,
+                                    name=name or "reshape")
+
+  def _maybe_check_valid_shape(self, shape, validate_args):
+    """Check that a shape Tensor is int-type and otherwise sane."""
+    if not shape.dtype.is_integer:
+      raise TypeError("{} dtype ({}) should be `int`-like.".format(
+          shape.op.name, shape.dtype.name))
+
+    assertions = []
+
+    ndims = array_ops.rank(shape)
+    ndims_ = tensor_util.constant_value(ndims)
+    if ndims_ is not None and ndims_ > 1:
+      raise ValueError("`{}` rank ({}) should be <= 1.".format(
+          shape.op.name, ndims_))
+    elif validate_args:
+      assertions.append(check_ops.assert_less_equal(
+          ndims, 1, message="`{}` rank should be <= 1.".format(shape.op.name)))
+
+    shape_ = tensor_util.constant_value_as_shape(shape)
+    if shape_.is_fully_defined():
+      es = np.int32(shape_.as_list())
+      if sum(es == -1) > 1:
+        raise ValueError(
+            "`{}` must have at most one `-1` (given {})"
+            .format(shape.op.name, es))
+      if np.any(es < -1):
+        raise ValueError(
+            "`{}` elements must be either positive integers or `-1`"
+            "(given {})."
+            .format(shape.op.name, es))
+    elif validate_args:
+      assertions.extend([
+          check_ops.assert_less_equal(
+              math_ops.reduce_sum(
+                  math_ops.cast(math_ops.equal(shape, -1), dtypes.int32)),
+              1,
+              message="`{}` elements must have at most one `-1`."
+              .format(shape.op.name)),
+          check_ops.assert_greater_equal(
+              shape, -1,
+              message="`{}` elements must be either positive integers or `-1`."
+              .format(shape.op.name)),
+      ])
+    return assertions
+
+  def _reshape_helper(self, x, event_shape_in, event_shape_out):
+    """Reshape only the event_shape of an input `Tensor`."""
+
+    event_ndims_in_ = _static_ndims_from_shape(event_shape_in)
+    event_ndims_in = _ndims_from_shape(event_shape_in)
+    x_ndims_, x_ndims = x.shape.ndims, array_ops.rank(x)
+
+    assertions = []
+
+    # Ensure x.event_shape is compatible with event_shape_in.
+    if (event_ndims_in_ is not None
+        and x_ndims_ is not None
+        and x.shape.with_rank_at_least(event_ndims_in_)[
+            x_ndims_-event_ndims_in_:].is_fully_defined()):
+      x_event_shape_, x_event_shape = [  # pylint: disable=unbalanced-tuple-unpacking
+          np.int32(x.shape[x_ndims_-event_ndims_in_:])]*2
+    else:
+      x_event_shape_, x_event_shape = (
+          None, array_ops.shape(x)[x_ndims-event_ndims_in:])
+
+    event_shape_in_ = tensor_util.constant_value(event_shape_in)
+
+    if x_event_shape_ is not None and event_shape_in_ is not None:
+      # Compare the shape dimensions that are fully specified in the
+      # input (i.e., for which event_shape_in is not -1). If x_event_shape
+      # matches along all of these dimensions, it is compatible with
+      # the desired input shape and any further mismatches (i.e.,
+      # imcompatibility with the desired *output* shape) will be
+      # caught inside of array_ops.reshape() below.
+      x_event_shape_specified_ = x_event_shape_[event_shape_in_ >= 0]
+      event_shape_in_specified_ = event_shape_in_[event_shape_in_ >= 0]
+      if not np.equal(x_event_shape_specified_,
+                      event_shape_in_specified_).all():
+        raise ValueError(
+            "Input `event_shape` does not match `event_shape_in` ({} vs {}).".
+            format(x_event_shape_, event_shape_in_))
+    elif self.validate_args:
+      # Similarly to the static case, we compare the shape dimensions
+      # that are fully specified in the input. We extract these
+      # dimensions using boolean_mask(), which requires that the mask
+      # have known ndims. We can assume that shape Tensors always have
+      # ndims==1 (this assumption is verified inside of
+      # _maybe_check_valid_shape), so the reshape operation is just a
+      # no-op that formally encodes this fact to make boolean_mask()
+      # happy.
+      event_shape_mask = array_ops.reshape(event_shape_in >= 0, [-1])
+      x_event_shape_specified = array_ops.boolean_mask(x_event_shape,
+                                                       event_shape_mask)
+      event_shape_in_specified = array_ops.boolean_mask(event_shape_in,
+                                                        event_shape_mask)
+      assertions.append(check_ops.assert_equal(
+          x_event_shape_specified, event_shape_in_specified,
+          message="Input `event_shape` does not match `event_shape_in`."))
+
+    if assertions:
+      x = control_flow_ops.with_dependencies(assertions, x)
+
+    # get the parts of shape(x) that will not change
+    sample_and_batch_shape = array_ops.shape(x)
+
+    ndims = (x.shape.ndims if x.shape.ndims is not None
+             else array_ops.rank(x))
+    sample_and_batch_shape = sample_and_batch_shape[
+        :(ndims - math_ops.abs(event_ndims_in))]
+
+    if (event_ndims_in_ is not None
+        and x_ndims_ is not None
+        and event_ndims_in_ == x_ndims_):
+      # Hack to allow forward/inverse_event_shape to do shape
+      # inference by calling this helper method with a dummy Tensor of
+      # shape event_shape_in. In this special case,
+      # sample_and_batch_shape will be empty so we can preserve static
+      # shape information by avoiding the concat operation below
+      # (which would be a no-op).
+      new_shape = event_shape_out
+    else:
+      new_shape = array_ops.concat(
+          [sample_and_batch_shape, event_shape_out], axis=0)
+
+    return array_ops.reshape(x, new_shape)
+
+  def _forward(self, x):
+    with ops.control_dependencies(self._assertions):
+      return self._reshape_helper(x,
+                                  self._event_shape_in,
+                                  self._event_shape_out)
+
+  def _inverse(self, y):
+    with ops.control_dependencies(self._assertions):
+      return self._reshape_helper(y,
+                                  self._event_shape_out,
+                                  self._event_shape_in)
+
+  def _inverse_log_det_jacobian(self, y):
+    with ops.control_dependencies(self._assertions):
+      return constant_op.constant(0., dtype=y.dtype)
+
+  def _forward_log_det_jacobian(self, x):
+    with ops.control_dependencies(self._assertions):
+      return constant_op.constant(0., dtype=x.dtype)
+
+  def _forward_event_shape(self, input_shape):
+    # NOTE: this method and the other *_event_shape* methods
+    # compute shape by explicit transformation of a dummy
+    # variable. This approach is not generally recommended because it
+    # bloats the graph and could in general trigger side effects.
+    #
+    # In this particular case of the Reshape bijector, the
+    # forward and inverse transforms have no side effects, and we
+    # believe the reduction in code complexity from delegating the
+    # heavy lifting to tf.reshape() is worth the added graph ops.
+    # However, you should think hard before implementing this approach
+    # in other Bijectors; it is strongly preferred to compute
+    # shapes explicitly whenever it's feasible to do so.
+    with ops.control_dependencies(self._assertions):
+      dummy = array_ops.zeros(dtype=dtypes.float32, shape=input_shape)
+      dummy_reshaped = self.forward(dummy)
+      return dummy_reshaped.shape
+
+  def _inverse_event_shape(self, output_shape):
+    with ops.control_dependencies(self._assertions):
+      dummy = array_ops.zeros(dtype=dtypes.float32, shape=output_shape)
+      dummy_reshaped = self.inverse(dummy)
+      return dummy_reshaped.shape
+
+  def _forward_event_shape_tensor(self, input_shape):
+    with ops.control_dependencies(self._assertions):
+      dummy = array_ops.zeros(dtype=dtypes.float32, shape=input_shape)
+      dummy_reshaped = self.forward(dummy)
+      return array_ops.shape(dummy_reshaped)
+
+  def _inverse_event_shape_tensor(self, output_shape):
+    with ops.control_dependencies(self._assertions):
+      dummy = array_ops.zeros(dtype=dtypes.float32, shape=output_shape)
+      dummy_reshaped = self.inverse(dummy)
+      return array_ops.shape(dummy_reshaped)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/reshape_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/reshape_impl.py
deleted file mode 100644
index 93682639aa3be3b8f59a369dedb6ee773c468130..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/reshape_impl.py
+++ /dev/null
@@ -1,297 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Reshape bijectors."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops.distributions import bijector as bijector_lib
-
-
-__all__ = [
-    "Reshape",
-]
-
-
-class Reshape(bijector_lib.Bijector):
-  """Reshapes the `event_shape` of a `Tensor`.
-
-  The semantics generally follow that of `tf.reshape()`, with
-  a few differences:
-   * The user must provide both the input and output shape, so that
-     the transformation can be inverted.
-   * The `Reshape` bijector automatically broadcasts over the leftmost
-     dimensions of its input (`sample_shape` and `batch_shape`); only
-     the rightmost `event_ndims_in` dimensions are reshaped. The
-     number of dimensions to reshape is inferred from the provided
-     `event_shape_in` (`event_ndims_in = len(event_shape_in)`).
-   * The `Reshape` bijector does not currently support
-     partially-specified shapes, i.e., those with a dimension
-     implicitly specified by `-1`.
-
-  Example usage:
-  ```python
-
-  bs = tf.contrib.distributions.bijectors
-
-  reverse = bs.Reshape(event_shape_out=[1,2],
-                       event_shape_in=[2,])
-
-  reverse.forward([1., 2.])    # shape [2,]
-  # ==> [[1., 2.]]             # shape [1,2]
-
-  reverse.forward([[1., 2.], [3., 4.]])  # shape [2, 2]
-  # ==> [[[1., 2.]], [[3., 4.]]]         # shape [2, 1, 2]
-
-  reverse.inverse([[1., 2.]])  # shape [1,2]
-  # ==> [1., 2.]               # shape [2,]
-
-  reverse.forward_log_det_jacobian(any_value)
-  # ==> 0.
-
-  reverse.inverse_log_det_jacobian(any_value)
-  # ==> 0.
-  ```
-
-  """
-
-  def __init__(self, event_shape_out, event_shape_in,
-               validate_args=False, name=None):
-    """Creates a `Reshape` bijector.
-
-    Args:
-      event_shape_out: An `int`-like vector-shaped `Tensor`
-        representing the fully specified (no -1's) event shape of the
-        transformed output.
-      event_shape_in: An `int`-like vector-shaped `Tensor`
-        representing the fully specified (no -1's) event shape of the
-        input.
-      validate_args: Python `bool` indicating whether arguments should
-        be checked for correctness.
-      name: Python `str`, name given to ops managed by this object.
-
-    Raises:
-      TypeError: if either `event_shape_in` or `event_shape_out` has
-       non-vector shape (`rank > 1`), or non-integer `dtype`.
-      ValueError: if either `event_shape_in` or `event_shape_out`
-       contains non-positive entries, or if their sizes do not match
-       (`prod(event_shape_in)` != `prod(event_shape_out)`), or if
-       their dimensionality(s) cannot be statically inferred.
-    """
-    with ops.name_scope(name, "reshape",
-                        values=[event_shape_out, event_shape_in]):
-
-      event_shape_out = ops.convert_to_tensor(event_shape_out,
-                                              name="event_shape_out",
-                                              preferred_dtype=dtypes.int32)
-      event_shape_in = ops.convert_to_tensor(event_shape_in,
-                                             name="event_shape_in",
-                                             preferred_dtype=dtypes.int32)
-
-      # check that input shapes are positive integers
-      assertions = []
-      assertions += self._maybe_check_valid_shape(
-          event_shape_out, "event_shape_out",
-          validate_args=validate_args)
-      assertions += self._maybe_check_valid_shape(
-          event_shape_in, "event_shape_in", validate_args=validate_args)
-
-      # check that prod(event_shape_in) = prod(event_shape_out)
-      assertions += self._maybe_check_matching_sizes(
-          event_shape_in, event_shape_out, validate_args=validate_args)
-
-      self._assertions = assertions
-      self._event_shape_in = event_shape_in
-      self._event_shape_out = event_shape_out
-      self._event_shape_in_static = tensor_util.constant_value_as_shape(
-          event_shape_in)
-      self._event_shape_out_static = tensor_util.constant_value_as_shape(
-          event_shape_out)
-
-      super(Reshape, self).__init__(is_constant_jacobian=True,
-                                    validate_args=validate_args,
-                                    name=name or "reshape")
-
-  def _maybe_check_valid_shape(self, shape_tensor, label,
-                               validate_args=False):
-    """Check that a shape Tensor is int-type and positive."""
-
-    assertions = []
-
-    if not shape_tensor.dtype.is_integer:
-      raise TypeError("{} dtype ({}) should be `int`-like.".format(
-          label, shape_tensor.dtype.name))
-
-    shape_rank = tensor_util.constant_value(array_ops.rank(shape_tensor))
-    if shape_rank is not None and shape_rank > 1:
-      raise ValueError("{} rank should be <= 1.".format(label))
-
-    s = tensor_util.constant_value(shape_tensor)
-    if s is not None:
-      if (s <= 0).any():
-        raise ValueError("{} entries must be positive, but found {}".format(
-            label, s))
-    elif validate_args:
-      assertions.append(check_ops.assert_positive(
-          shape_tensor, message="{} entries must be positive".format(label)))
-
-    return assertions
-
-  def _maybe_check_matching_sizes(self, event_shape_in, event_shape_out,
-                                  validate_args=False):
-    """Check that prod(event_shape_in)==prod(event_shape_out)."""
-
-    def _get_size_from_shape(shape):
-      """Computes size from a shape `Tensor`, statically if possible."""
-      s = tensor_util.constant_value(shape)
-      if s is not None:
-        return [np.int32(np.prod(s))]*2
-      return None, math_ops.reduce_prod(shape, name="size")
-
-    # Ensure `event_shape_in` is compatible with `event_shape_out`.
-    event_size_in_, event_size_in = _get_size_from_shape(  # pylint: disable=unbalanced-tuple-unpacking
-        event_shape_in)
-    event_size_out_, event_size_out = _get_size_from_shape(  # pylint: disable=unbalanced-tuple-unpacking
-        event_shape_out)
-
-    assertions = []
-    if event_size_in_ is not None and event_size_out_ is not None:
-      if event_size_in_ != event_size_out_:
-        raise ValueError(
-            "Input `event_size` ({}) does not match output `event_size` ({}).".
-            format(event_size_in, event_size_out_))
-    elif validate_args:
-      assertions.append(check_ops.assert_equal(
-          event_size_in, event_size_out,
-          message="Input/output `event_size`s do not match."))
-
-    return assertions
-
-  def _reshape_helper(self, x, event_shape_in, event_shape_out):
-    """Reshape only the event_shape of an input `Tensor`."""
-
-    def _get_rank_from_shape(shape):
-      """Computes rank from a shape `Tensor`, statically if possible."""
-      # Uses fact that rank is "shape of shape".
-      ndims = shape.shape.with_rank_at_least(1)[0].value
-      if ndims is not None:
-        return ndims, ndims
-      return None, array_ops.shape(shape)[0]
-
-    event_ndims_in_, event_ndims_in = _get_rank_from_shape(event_shape_in)
-
-    assertions = []
-    # Ensure x.event_shape is compatible with event_shape_in.
-    if x.shape.ndims is not None:
-      x_ndims_, x_ndims = [x.shape.ndims]*2
-    else:
-      x_ndims_, x_ndims = None, array_ops.rank(x)
-
-    if (event_ndims_in_ is not None
-        and x_ndims_ is not None
-        and x.shape.with_rank_at_least(event_ndims_in_)[
-            x_ndims_-event_ndims_in_:].is_fully_defined()):
-      x_event_shape_, x_event_shape = [  # pylint: disable=unbalanced-tuple-unpacking
-          np.int32(x.shape[x_ndims_-event_ndims_in_:])]*2
-    else:
-      x_event_shape_, x_event_shape = (
-          None, array_ops.shape(x)[x_ndims-event_ndims_in:])
-
-    event_shape_in_ = tensor_util.constant_value(event_shape_in)
-
-    if x_event_shape_ is not None and event_shape_in_ is not None:
-      if not np.equal(x_event_shape_, event_shape_in_).all():
-        raise ValueError(
-            "Input `event_shape` ({}) does not match `event_shape_in` ({}).".
-            format(x_event_shape_, event_shape_in_))
-    elif self.validate_args:
-      assertions.append(check_ops.assert_equal(
-          x_event_shape, event_shape_in,
-          message="Input `event_shape` does not match `event_shape_in`."))
-
-    if assertions:
-      x = control_flow_ops.with_dependencies(assertions, x)
-
-    # get the parts of shape(x) that will not change
-    sample_and_batch_shape = array_ops.shape(x)
-
-    ndims = (x.shape.ndims if x.shape.ndims is not None
-             else array_ops.rank(x))
-    sample_and_batch_shape = sample_and_batch_shape[
-        :(ndims - math_ops.abs(event_ndims_in))]
-
-    new_shape = array_ops.concat(
-        [sample_and_batch_shape, event_shape_out], axis=0)
-
-    return array_ops.reshape(x, new_shape)
-
-  def _forward(self, x):
-    with ops.control_dependencies(self._assertions):
-      return self._reshape_helper(x,
-                                  self._event_shape_in,
-                                  self._event_shape_out)
-
-  def _inverse(self, y):
-    with ops.control_dependencies(self._assertions):
-      return self._reshape_helper(y,
-                                  self._event_shape_out,
-                                  self._event_shape_in)
-
-  def _inverse_log_det_jacobian(self, y):
-    with ops.control_dependencies(self._assertions):
-      return constant_op.constant(0., dtype=y.dtype)
-
-  def _forward_log_det_jacobian(self, x):
-    with ops.control_dependencies(self._assertions):
-      return constant_op.constant(0., dtype=x.dtype)
-
-  def _forward_event_shape(self, input_shape):
-    self._event_shape_in_static.assert_is_compatible_with(input_shape)
-    return self._event_shape_out_static
-
-  def _inverse_event_shape(self, output_shape):
-    self._event_shape_out_static.assert_is_compatible_with(output_shape)
-    return self._event_shape_in_static
-
-  def _forward_event_shape_tensor(self, input_shape):
-    input_assertions = self._maybe_check_valid_shape(
-        input_shape, "input event shape", validate_args=self.validate_args)
-    input_assertions += self._maybe_check_matching_sizes(
-        input_shape, self._event_shape_out,
-        validate_args=self.validate_args)
-
-    return control_flow_ops.with_dependencies(
-        input_assertions + self._assertions, self._event_shape_out)
-
-  def _inverse_event_shape_tensor(self, output_shape):
-
-    output_assertions = self._maybe_check_valid_shape(
-        output_shape, "output event shape", validate_args=self.validate_args)
-    output_assertions += self._maybe_check_matching_sizes(
-        output_shape, self._event_shape_in, validate_args=self.validate_args)
-
-    return control_flow_ops.with_dependencies(
-        output_assertions + self._assertions, self._event_shape_in)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid.py b/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid.py
index c20e76c0b7367369865faf973377201c8b8b17e6..a640dfe7dfbcce96261589c7fc49107deaefdd54 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid.py
@@ -18,12 +18,31 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.sigmoid_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops.distributions import bijector
 
-_allowed_symbols = ["Sigmoid"]
 
-remove_undocumented(__name__, _allowed_symbols)
+__all__ = [
+    "Sigmoid",
+]
+
+
+class Sigmoid(bijector.Bijector):
+  """Bijector which computes `Y = g(X) = 1 / (1 + exp(-X))`."""
+
+  def __init__(self, validate_args=False, name="sigmoid"):
+    super(Sigmoid, self).__init__(
+        event_ndims=0, validate_args=validate_args, name=name)
+
+  def _forward(self, x):
+    return math_ops.sigmoid(x)
+
+  def _inverse(self, y):
+    return math_ops.log(y) - math_ops.log1p(-y)
+
+  def _inverse_log_det_jacobian(self, y):
+    return -math_ops.log(y) - math_ops.log1p(-y)
+
+  def _forward_log_det_jacobian(self, x):
+    return -nn_ops.softplus(-x) - nn_ops.softplus(x)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid_centered.py b/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid_centered.py
index 448125230d24066697624bce03fed71a2c2f00b1..223bc9d042c69be05b0e578835a31ed6e83c0c97 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid_centered.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid_centered.py
@@ -18,12 +18,22 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.sigmoid_centered_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.contrib.distributions.python.ops.bijectors import softmax_centered
 
-_allowed_symbols = ["SigmoidCentered"]
 
-remove_undocumented(__name__, _allowed_symbols)
+__all__ = [
+    "SigmoidCentered",
+]
+
+
+class SigmoidCentered(softmax_centered.SoftmaxCentered):
+  """Bijector which computes Y = g(X) = exp([X 0]) / (1 + exp(-X)).
+
+  Equivalent to: `bijector.SoftmaxCentered(event_ndims=0)`.
+
+  See `bijector.SoftmaxCentered` for more details.
+  """
+
+  def __init__(self, validate_args=False, name="sigmoid_centered"):
+    super(SigmoidCentered, self).__init__(
+        event_ndims=0, validate_args=validate_args, name=name)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/sinh_arcsinh.py b/tensorflow/contrib/distributions/python/ops/bijectors/sinh_arcsinh.py
index b3cf03c24612f5c618c71c0a8615f272acdf2d10..3a75e4ae9495793901b0da91a5aa3982aab35852 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/sinh_arcsinh.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/sinh_arcsinh.py
@@ -18,12 +18,162 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.sinh_arcsinh_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+import numpy as np
 
-_allowed_symbols = ["SinhArcsinh"]
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops.distributions import bijector
 
-remove_undocumented(__name__, _allowed_symbols)
+__all__ = [
+    "SinhArcsinh",
+]
+
+
+def _sqrtx2p1(x):
+  """Implementation of `sqrt(1 + x**2)` which is stable despite large `x`."""
+  return array_ops.where(
+      math_ops.abs(x) * np.sqrt(np.finfo(x.dtype.as_numpy_dtype).eps) <= 1.,
+      math_ops.sqrt(x**2. + 1.),
+      # For large x, calculating x**2 can overflow. This can be alleviated by
+      # considering:
+      # sqrt(1 + x**2)
+      # = exp(0.5 log(1 + x**2))
+      # = exp(0.5 log(x**2 * (1 + x**-2)))
+      # = exp(log(x) + 0.5 * log(1 + x**-2))
+      # = |x| * exp(0.5 log(1 + x**-2))
+      # = |x| * sqrt(1 + x**-2)
+      # We omit the last term in this approximation.
+      # When |x| > 1 / sqrt(machineepsilon), the second term will be 1,
+      # due to sqrt(1 + x**-2) = 1. This is also true with the gradient term,
+      # and higher order gradients, since the first order derivative of
+      # sqrt(1 + x**-2) is -2 * x**-3 / (1 + x**-2) = -2 / (x**3 + x),
+      # and all nth-order derivatives will be O(x**-(n + 2)). This makes any
+      # gradient terms that contain any derivatives of sqrt(1 + x**-2) vanish.
+      math_ops.abs(x))
+
+
+class SinhArcsinh(bijector.Bijector):
+  """Compute `Y = g(X) = Sinh( (Arcsinh(X) + skewness) * tailweight )`.
+
+  For `skewness in (-inf, inf)` and `tailweight in (0, inf)`, this
+  transformation is a
+  diffeomorphism of the real line `(-inf, inf)`.  The inverse transform is
+  `X = g^{-1}(Y) = Sinh( ArcSinh(Y) / tailweight - skewness )`.
+
+  The `SinhArcsinh` transformation of the Normal is described in
+  [Sinh-arcsinh distributions](https://www.jstor.org/stable/27798865)
+  This Bijector allows a similar transformation of any distribution supported on
+  `(-inf, inf)`.
+
+  #### Meaning of the parameters
+
+  * If `skewness = 0` and `tailweight = 1`, this transform is the identity.
+  * Positive (negative) `skewness` leads to positive (negative) skew.
+    * positive skew means, for unimodal `X` centered at zero, the mode of `Y` is
+      "tilted" to the right.
+    * positive skew means positive values of `Y` become more likely, and
+      negative values become less likely.
+  * Larger (smaller) `tailweight` leads to fatter (thinner) tails.
+    * Fatter tails mean larger values of `|Y|` become more likely.
+    * If `X` is a unit Normal, `tailweight < 1` leads to a distribution that is
+      "flat" around `Y = 0`, and a very steep drop-off in the tails.
+    * If `X` is a unit Normal, `tailweight > 1` leads to a distribution more
+      peaked at the mode with heavier tails.
+
+  To see the argument about the tails, note that for `|X| >> 1` and
+  `|X| >> (|skewness| * tailweight)**tailweight`, we have
+  `Y approx 0.5 X**tailweight e**(sign(X) skewness * tailweight)`.
+  """
+
+  def __init__(self,
+               skewness=None,
+               tailweight=None,
+               event_ndims=0,
+               validate_args=False,
+               name="SinhArcsinh"):
+    """Instantiates the `SinhArcsinh` bijector.
+
+    Args:
+      skewness:  Skewness parameter.  Float-type `Tensor`.  Default is `0`
+        of type `float32`.
+      tailweight:  Tailweight parameter.  Positive `Tensor` of same `dtype` as
+        `skewness` and broadcastable `shape`.  Default is `1` of type `float32`.
+      event_ndims: Python scalar indicating the number of dimensions associated
+        with a particular draw from the distribution.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+      name: Python `str` name given to ops managed by this object.
+    """
+    self._graph_parents = []
+    self._name = name
+    self._validate_args = validate_args
+    with self._name_scope("init", values=[skewness, tailweight]):
+      tailweight = 1. if tailweight is None else tailweight
+      skewness = 0. if skewness is None else skewness
+      self._skewness = ops.convert_to_tensor(
+          skewness, name="skewness")
+      self._tailweight = ops.convert_to_tensor(
+          tailweight, name="tailweight", dtype=self._skewness.dtype)
+      check_ops.assert_same_float_dtype([self._skewness, self._tailweight])
+      if validate_args:
+        self._tailweight = control_flow_ops.with_dependencies([
+            check_ops.assert_positive(
+                self._tailweight,
+                message="Argument tailweight was not positive")
+        ], self._tailweight)
+    super(SinhArcsinh, self).__init__(
+        event_ndims=event_ndims, validate_args=validate_args, name=name)
+
+  @property
+  def skewness(self):
+    """The `skewness` in: `Y  = Sinh((Arcsinh(X) + skewness) * tailweight)`."""
+    return self._skewness
+
+  @property
+  def tailweight(self):
+    """The `tailweight` in: `Y = Sinh((Arcsinh(X) + skewness) * tailweight)`."""
+    return self._tailweight
+
+  def _forward(self, x):
+    return math_ops.sinh((math_ops.asinh(x) + self.skewness) * self.tailweight)
+
+  def _inverse(self, y):
+    return math_ops.sinh(math_ops.asinh(y) / self.tailweight - self.skewness)
+
+  def _inverse_log_det_jacobian(self, y):
+    # x = sinh(arcsinh(y) / tailweight - skewness)
+    # Using sinh' = cosh, arcsinh'(y) = 1 / sqrt(y**2 + 1),
+    # dx/dy
+    # = cosh(arcsinh(y) / tailweight - skewness)
+    #     / (tailweight * sqrt(y**2 + 1))
+    event_dims = self._event_dims_tensor(y)
+    return math_ops.reduce_sum(
+        # This is computed inside the log to avoid catastrophic cancellations
+        # from cosh((arcsinh(y) / tailweight) - skewness) and sqrt(x**2 + 1).
+        math_ops.log(math_ops.cosh(
+            math_ops.asinh(y) / self.tailweight - self.skewness)
+                     # TODO(srvasude): Consider using cosh(arcsinh(x)) in cases
+                     # where (arcsinh(x) / tailweight) - skewness ~= arcsinh(x).
+                     / _sqrtx2p1(y))
+        - math_ops.log(self.tailweight),
+        axis=event_dims)
+
+  def _forward_log_det_jacobian(self, x):
+    # y = sinh((arcsinh(x) + skewness) * tailweight)
+    # Using sinh' = cosh, arcsinh'(x) = 1 / sqrt(x**2 + 1),
+    # dy/dx
+    # = cosh((arcsinh(x) + skewness) * tailweight) * tailweight / sqrt(x**2 + 1)
+    event_dims = self._event_dims_tensor(x)
+    return math_ops.reduce_sum(
+        # This is computed inside the log to avoid catastrophic cancellations
+        # from cosh((arcsinh(x) + skewness) * tailweight) and sqrt(x**2 + 1).
+        math_ops.log(math_ops.cosh(
+            (math_ops.asinh(x) + self.skewness) * self.tailweight)
+                     # TODO(srvasude): Consider using cosh(arcsinh(x)) in cases
+                     # where (arcsinh(x) + skewness) * tailweight ~= arcsinh(x).
+                     / _sqrtx2p1(x))
+        + math_ops.log(self.tailweight),
+        axis=event_dims)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/sinh_arcsinh_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/sinh_arcsinh_impl.py
deleted file mode 100644
index 3a75e4ae9495793901b0da91a5aa3982aab35852..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/sinh_arcsinh_impl.py
+++ /dev/null
@@ -1,179 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""SinhArcsinh bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops.distributions import bijector
-
-__all__ = [
-    "SinhArcsinh",
-]
-
-
-def _sqrtx2p1(x):
-  """Implementation of `sqrt(1 + x**2)` which is stable despite large `x`."""
-  return array_ops.where(
-      math_ops.abs(x) * np.sqrt(np.finfo(x.dtype.as_numpy_dtype).eps) <= 1.,
-      math_ops.sqrt(x**2. + 1.),
-      # For large x, calculating x**2 can overflow. This can be alleviated by
-      # considering:
-      # sqrt(1 + x**2)
-      # = exp(0.5 log(1 + x**2))
-      # = exp(0.5 log(x**2 * (1 + x**-2)))
-      # = exp(log(x) + 0.5 * log(1 + x**-2))
-      # = |x| * exp(0.5 log(1 + x**-2))
-      # = |x| * sqrt(1 + x**-2)
-      # We omit the last term in this approximation.
-      # When |x| > 1 / sqrt(machineepsilon), the second term will be 1,
-      # due to sqrt(1 + x**-2) = 1. This is also true with the gradient term,
-      # and higher order gradients, since the first order derivative of
-      # sqrt(1 + x**-2) is -2 * x**-3 / (1 + x**-2) = -2 / (x**3 + x),
-      # and all nth-order derivatives will be O(x**-(n + 2)). This makes any
-      # gradient terms that contain any derivatives of sqrt(1 + x**-2) vanish.
-      math_ops.abs(x))
-
-
-class SinhArcsinh(bijector.Bijector):
-  """Compute `Y = g(X) = Sinh( (Arcsinh(X) + skewness) * tailweight )`.
-
-  For `skewness in (-inf, inf)` and `tailweight in (0, inf)`, this
-  transformation is a
-  diffeomorphism of the real line `(-inf, inf)`.  The inverse transform is
-  `X = g^{-1}(Y) = Sinh( ArcSinh(Y) / tailweight - skewness )`.
-
-  The `SinhArcsinh` transformation of the Normal is described in
-  [Sinh-arcsinh distributions](https://www.jstor.org/stable/27798865)
-  This Bijector allows a similar transformation of any distribution supported on
-  `(-inf, inf)`.
-
-  #### Meaning of the parameters
-
-  * If `skewness = 0` and `tailweight = 1`, this transform is the identity.
-  * Positive (negative) `skewness` leads to positive (negative) skew.
-    * positive skew means, for unimodal `X` centered at zero, the mode of `Y` is
-      "tilted" to the right.
-    * positive skew means positive values of `Y` become more likely, and
-      negative values become less likely.
-  * Larger (smaller) `tailweight` leads to fatter (thinner) tails.
-    * Fatter tails mean larger values of `|Y|` become more likely.
-    * If `X` is a unit Normal, `tailweight < 1` leads to a distribution that is
-      "flat" around `Y = 0`, and a very steep drop-off in the tails.
-    * If `X` is a unit Normal, `tailweight > 1` leads to a distribution more
-      peaked at the mode with heavier tails.
-
-  To see the argument about the tails, note that for `|X| >> 1` and
-  `|X| >> (|skewness| * tailweight)**tailweight`, we have
-  `Y approx 0.5 X**tailweight e**(sign(X) skewness * tailweight)`.
-  """
-
-  def __init__(self,
-               skewness=None,
-               tailweight=None,
-               event_ndims=0,
-               validate_args=False,
-               name="SinhArcsinh"):
-    """Instantiates the `SinhArcsinh` bijector.
-
-    Args:
-      skewness:  Skewness parameter.  Float-type `Tensor`.  Default is `0`
-        of type `float32`.
-      tailweight:  Tailweight parameter.  Positive `Tensor` of same `dtype` as
-        `skewness` and broadcastable `shape`.  Default is `1` of type `float32`.
-      event_ndims: Python scalar indicating the number of dimensions associated
-        with a particular draw from the distribution.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-      name: Python `str` name given to ops managed by this object.
-    """
-    self._graph_parents = []
-    self._name = name
-    self._validate_args = validate_args
-    with self._name_scope("init", values=[skewness, tailweight]):
-      tailweight = 1. if tailweight is None else tailweight
-      skewness = 0. if skewness is None else skewness
-      self._skewness = ops.convert_to_tensor(
-          skewness, name="skewness")
-      self._tailweight = ops.convert_to_tensor(
-          tailweight, name="tailweight", dtype=self._skewness.dtype)
-      check_ops.assert_same_float_dtype([self._skewness, self._tailweight])
-      if validate_args:
-        self._tailweight = control_flow_ops.with_dependencies([
-            check_ops.assert_positive(
-                self._tailweight,
-                message="Argument tailweight was not positive")
-        ], self._tailweight)
-    super(SinhArcsinh, self).__init__(
-        event_ndims=event_ndims, validate_args=validate_args, name=name)
-
-  @property
-  def skewness(self):
-    """The `skewness` in: `Y  = Sinh((Arcsinh(X) + skewness) * tailweight)`."""
-    return self._skewness
-
-  @property
-  def tailweight(self):
-    """The `tailweight` in: `Y = Sinh((Arcsinh(X) + skewness) * tailweight)`."""
-    return self._tailweight
-
-  def _forward(self, x):
-    return math_ops.sinh((math_ops.asinh(x) + self.skewness) * self.tailweight)
-
-  def _inverse(self, y):
-    return math_ops.sinh(math_ops.asinh(y) / self.tailweight - self.skewness)
-
-  def _inverse_log_det_jacobian(self, y):
-    # x = sinh(arcsinh(y) / tailweight - skewness)
-    # Using sinh' = cosh, arcsinh'(y) = 1 / sqrt(y**2 + 1),
-    # dx/dy
-    # = cosh(arcsinh(y) / tailweight - skewness)
-    #     / (tailweight * sqrt(y**2 + 1))
-    event_dims = self._event_dims_tensor(y)
-    return math_ops.reduce_sum(
-        # This is computed inside the log to avoid catastrophic cancellations
-        # from cosh((arcsinh(y) / tailweight) - skewness) and sqrt(x**2 + 1).
-        math_ops.log(math_ops.cosh(
-            math_ops.asinh(y) / self.tailweight - self.skewness)
-                     # TODO(srvasude): Consider using cosh(arcsinh(x)) in cases
-                     # where (arcsinh(x) / tailweight) - skewness ~= arcsinh(x).
-                     / _sqrtx2p1(y))
-        - math_ops.log(self.tailweight),
-        axis=event_dims)
-
-  def _forward_log_det_jacobian(self, x):
-    # y = sinh((arcsinh(x) + skewness) * tailweight)
-    # Using sinh' = cosh, arcsinh'(x) = 1 / sqrt(x**2 + 1),
-    # dy/dx
-    # = cosh((arcsinh(x) + skewness) * tailweight) * tailweight / sqrt(x**2 + 1)
-    event_dims = self._event_dims_tensor(x)
-    return math_ops.reduce_sum(
-        # This is computed inside the log to avoid catastrophic cancellations
-        # from cosh((arcsinh(x) + skewness) * tailweight) and sqrt(x**2 + 1).
-        math_ops.log(math_ops.cosh(
-            (math_ops.asinh(x) + self.skewness) * self.tailweight)
-                     # TODO(srvasude): Consider using cosh(arcsinh(x)) in cases
-                     # where (arcsinh(x) + skewness) * tailweight ~= arcsinh(x).
-                     / _sqrtx2p1(x))
-        + math_ops.log(self.tailweight),
-        axis=event_dims)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered.py b/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered.py
index be6608f97880ae68e10b17c815bf2d8438293261..a9dcce6c526600f3b26c6bceb730417000917ce7 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered.py
@@ -18,12 +18,223 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.softmax_centered_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+import numpy as np
 
-_allowed_symbols = ["SoftmaxCentered"]
+from tensorflow.contrib.distributions.python.ops import distribution_util
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops.distributions import bijector
 
-remove_undocumented(__name__, _allowed_symbols)
+
+__all__ = [
+    "SoftmaxCentered",
+]
+
+
+class SoftmaxCentered(bijector.Bijector):
+  """Bijector which computes `Y = g(X) = exp([X 0]) / sum(exp([X 0]))`.
+
+  To implement [softmax](https://en.wikipedia.org/wiki/Softmax_function) as a
+  bijection, the forward transformation appends a value to the input and the
+  inverse removes this coordinate. The appended coordinate represents a pivot,
+  e.g., `softmax(x) = exp(x-c) / sum(exp(x-c))` where `c` is the implicit last
+  coordinate.
+
+  Because we append a coordinate, this bijector only supports `event_ndim in [0,
+  1]`, i.e., scalars and vectors.
+
+  Example Use:
+
+  ```python
+  bijector.SoftmaxCentered(event_ndims=1).forward(tf.log([2, 3, 4]))
+  # Result: [0.2, 0.3, 0.4, 0.1]
+  # Extra result: 0.1
+
+  bijector.SoftmaxCentered(event_ndims=1).inverse([0.2, 0.3, 0.4, 0.1])
+  # Result: tf.log([2, 3, 4])
+  # Extra coordinate removed.
+  ```
+
+  At first blush it may seem like the [Invariance of domain](
+  https://en.wikipedia.org/wiki/Invariance_of_domain) theorem implies this
+  implementation is not a bijection. However, the appended dimension
+  makes the (forward) image non-open and the theorem does not directly apply.
+  """
+
+  def __init__(self,
+               event_ndims=0,
+               validate_args=False,
+               name="softmax_centered"):
+    self._graph_parents = []
+    self._name = name
+    with self._name_scope("init", values=[event_ndims]):
+      event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims")
+      event_ndims = tensor_util.constant_value(event_ndims)
+      if event_ndims is None or event_ndims not in [0, 1]:
+        raise ValueError("`event_ndims` must be a TF constant which is 0 or 1")
+    self._static_event_ndims = event_ndims
+    super(SoftmaxCentered, self).__init__(
+        event_ndims=event_ndims,
+        validate_args=validate_args,
+        name=name)
+
+  def _forward_event_shape(self, input_shape):
+    if input_shape.ndims is None:
+      return input_shape
+    if input_shape.ndims != self._static_event_ndims:
+      raise ValueError("input_shape.dims = %d != %d" %
+                       (input_shape.ndims, self._static_event_ndims))
+    if input_shape.ndims == 0:
+      return tensor_shape.TensorShape([2])
+    if input_shape.ndims == 1:
+      return tensor_shape.TensorShape(input_shape[0] + 1)
+    # Unreachable code:
+    raise ValueError("event_ndims = %d must be 0 or 1" % input_shape.ndims)
+
+  def _forward_event_shape_tensor(self, input_shape):
+    ndims = array_ops.shape(input_shape)
+    if self.validate_args:
+      # It is not possible for a negative shape so we need only check <= 1.
+      is_zero_or_one = check_ops.assert_equal(
+          ndims, 0 if self._static_event_ndims == 0 else 1,
+          message="event_ndims must be 0 or 1")
+      ndims = control_flow_ops.with_dependencies([is_zero_or_one], ndims)
+    if self._static_event_ndims == 0:
+      return ops.convert_to_tensor(
+          [2], dtype=dtypes.int32, name="output_shape")
+    return input_shape + 1
+
+  def _inverse_event_shape(self, output_shape):
+    if output_shape.ndims is None:
+      return output_shape
+    if output_shape.ndims != 1:
+      raise ValueError("output_shape.ndims = %d != 1" % output_shape.ndims)
+    if self._static_event_ndims == 0:
+      return tensor_shape.TensorShape([])
+    return tensor_shape.TensorShape(output_shape[0] - 1)
+
+  def _inverse_event_shape_tensor(self, output_shape):
+    ndims = array_ops.shape(output_shape)[0]
+    if self.validate_args:
+      # It is not possible for a negative shape so we need only check <= 1.
+      is_one = check_ops.assert_equal(
+          ndims, 1, message="event_ndims must be 1")
+      ndims = control_flow_ops.with_dependencies([is_one], ndims)
+    if self._static_event_ndims == 0:
+      return ops.convert_to_tensor([], dtype=dtypes.int32, name="output_shape")
+    return array_ops.expand_dims(output_shape[0] - 1, dim=0)
+
+  def _forward(self, x):
+    # Pad the last dim with a zeros vector. We need this because it lets us
+    # infer the scale in the inverse function.
+    y = array_ops.expand_dims(x, dim=-1) if self._static_event_ndims == 0 else x
+    y = distribution_util.pad(y, axis=-1, back=True)
+
+    # Set shape hints.
+    if x.shape.ndims is not None:
+      shape = x.shape.as_list()
+      if self._static_event_ndims == 0:
+        shape += [2]
+      elif shape[-1] is not None:
+        shape[-1] += 1
+      shape = tensor_shape.TensorShape(shape)
+      y.shape.assert_is_compatible_with(shape)
+      y.set_shape(shape)
+
+    # Since we only support event_ndims in [0, 1] and we do padding, we always
+    # reduce over the last dimension, i.e., dim=-1 (which is the default).
+    return nn_ops.softmax(y)
+
+  def _inverse(self, y):
+    # To derive the inverse mapping note that:
+    #   y[i] = exp(x[i]) / normalization
+    # and
+    #   y[end] = 1 / normalization.
+    # Thus:
+    # x[i] = log(exp(x[i])) - log(y[end]) - log(normalization)
+    #      = log(exp(x[i])/normalization) - log(y[end])
+    #      = log(y[i]) - log(y[end])
+    shape = (np.asarray(y.shape.as_list(), dtype=np.int32)
+             if y.shape.is_fully_defined()
+             else array_ops.shape(y, name="shape"))
+    ndims = distribution_util.prefer_static_rank(y)
+
+    # Do this first to make sure CSE catches that it'll happen again in
+    # _inverse_log_det_jacobian.
+    x = math_ops.log(y)
+
+    # We now extract the last coordinate of the rightmost dimension.
+    # Our trick is to slice from [0,0,...,shape[-1]-1] to shape[:-1]+[1].
+    begin = array_ops.one_hot(indices=ndims-1,
+                              depth=ndims,
+                              on_value=shape[-1]-np.array(1, dtype=shape.dtype),
+                              dtype=shape.dtype)
+    size = array_ops.concat([shape[:-1], np.asarray([1], dtype=shape.dtype)], 0)
+    log_normalization = -array_ops.strided_slice(x, begin, begin + size)
+
+    # Here we slice out all but the last coordinate; see above for idea.
+    begin = array_ops.zeros_like(shape)
+    size = array_ops.concat([shape[:-1], [shape[-1] - 1]], 0)
+    x = array_ops.strided_slice(x, begin, begin + size)
+
+    x += log_normalization
+
+    if self._static_event_ndims == 0:
+      x = array_ops.squeeze(x, squeeze_dims=[ndims-1])
+
+    # Set shape hints.
+    if y.shape.ndims is not None:
+      shape = y.shape.as_list()
+      if self._static_event_ndims == 0:
+        shape = shape[:-1]
+      elif shape[-1] is not None:
+        shape[-1] -= 1
+      shape = tensor_shape.TensorShape(shape)
+      x.shape.assert_is_compatible_with(shape)
+      x.set_shape(shape)
+
+    return x
+
+  def _inverse_log_det_jacobian(self, y):
+    # WLOG, consider the vector case:
+    #   x = log(y[:-1]) - log(y[-1])
+    # where,
+    #   y[-1] = 1 - sum(y[:-1]).
+    # We have:
+    #   det{ dX/dY } = det{ diag(1 ./ y[:-1]) + 1 / y[-1] }
+    #                = det{ inv{ diag(y[:-1]) - y[:-1]' y[:-1] } }   (1)
+    #                = 1 / det{ diag(y[:-1]) - y[:-1]' y[:-1] }
+    #                = 1 / { (1 + y[:-1]' inv(diag(y[:-1])) y[:-1]) *
+    #                        det(diag(y[:-1])) }                     (2)
+    #                = 1 / { y[-1] prod(y[:-1]) }
+    #                = 1 / prod(y)
+    # (1) - https://en.wikipedia.org/wiki/Sherman%E2%80%93Morrison_formula
+    #       or by noting that det{ dX/dY } = 1 / det{ dY/dX } from Bijector
+    #       docstring "Tip".
+    # (2) - https://en.wikipedia.org/wiki/Matrix_determinant_lemma
+    return -math_ops.reduce_sum(math_ops.log(y), axis=-1)
+
+  def _forward_log_det_jacobian(self, x):
+    if self._static_event_ndims == 0:
+      return x - 2. * nn_ops.softplus(x)
+    else:
+      # This code is similar to nn_ops.log_softmax but different because we have
+      # an implicit zero column to handle. I.e., instead of:
+      #   reduce_sum(logits - reduce_sum(exp(logits), dim))
+      # we must do:
+      #   log_normalization = 1 + reduce_sum(exp(logits))
+      #   -log_normalization + reduce_sum(logits - log_normalization)
+      log_normalization = nn_ops.softplus(
+          math_ops.reduce_logsumexp(x, axis=-1, keep_dims=True))
+      fldj = (-log_normalization +
+              math_ops.reduce_sum(x - log_normalization,
+                                  axis=-1,
+                                  keep_dims=True))
+      return array_ops.squeeze(fldj, squeeze_dims=-1)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered_impl.py
deleted file mode 100644
index 8645cc1b6b04be75a419342591272f07a4a1711c..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered_impl.py
+++ /dev/null
@@ -1,245 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""SoftmaxCentered bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops.distributions import bijector
-
-
-__all__ = [
-    "SoftmaxCentered",
-]
-
-
-class SoftmaxCentered(bijector.Bijector):
-  """Bijector which computes `Y = g(X) = exp([X 0]) / sum(exp([X 0]))`.
-
-  To implement [softmax](https://en.wikipedia.org/wiki/Softmax_function) as a
-  bijection, the forward transformation appends a value to the input and the
-  inverse removes this coordinate. The appended coordinate represents a pivot,
-  e.g., `softmax(x) = exp(x-c) / sum(exp(x-c))` where `c` is the implicit last
-  coordinate.
-
-  Because we append a coordinate, this bijector only supports `event_ndim in [0,
-  1]`, i.e., scalars and vectors.
-
-  Example Use:
-
-  ```python
-  bijector.SoftmaxCentered(event_ndims=1).forward(tf.log([2, 3, 4]))
-  # Result: [0.2, 0.3, 0.4, 0.1]
-  # Extra result: 0.1
-
-  bijector.SoftmaxCentered(event_ndims=1).inverse([0.2, 0.3, 0.4, 0.1])
-  # Result: tf.log([2, 3, 4])
-  # Extra coordinate removed.
-  ```
-
-  At first blush it may seem like the [Invariance of domain](
-  https://en.wikipedia.org/wiki/Invariance_of_domain) theorem implies this
-  implementation is not a bijection. However, the appended dimension
-  makes the (forward) image non-open and the theorem does not directly apply.
-  """
-
-  def __init__(self,
-               event_ndims=0,
-               validate_args=False,
-               name="softmax_centered"):
-    self._graph_parents = []
-    self._name = name
-    with self._name_scope("init", values=[event_ndims]):
-      event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims")
-      event_ndims = tensor_util.constant_value(event_ndims)
-      if event_ndims is None or event_ndims not in [0, 1]:
-        raise ValueError("`event_ndims` must be a TF constant which is 0 or 1")
-    self._static_event_ndims = event_ndims
-    super(SoftmaxCentered, self).__init__(
-        event_ndims=event_ndims,
-        validate_args=validate_args,
-        name=name)
-
-  def _forward_event_shape(self, input_shape):
-    if input_shape.ndims is None:
-      return input_shape
-    if input_shape.ndims != self._static_event_ndims:
-      raise ValueError("input_shape.dims = %d != %d" %
-                       (input_shape.ndims, self._static_event_ndims))
-    if input_shape.ndims == 0:
-      return tensor_shape.TensorShape([2])
-    if input_shape.ndims == 1:
-      return tensor_shape.TensorShape(input_shape[0] + 1)
-    # Unreachable code:
-    raise ValueError("event_ndims = %d must be 0 or 1" % input_shape.ndims)
-
-  def _forward_event_shape_tensor(self, input_shape):
-    ndims = array_ops.shape(input_shape)
-    if self.validate_args:
-      # It is not possible for a negative shape so we need only check <= 1.
-      is_zero_or_one = check_ops.assert_equal(
-          ndims, 0 if self._static_event_ndims == 0 else 1,
-          message="event_ndims must be 0 or 1")
-      ndims = control_flow_ops.with_dependencies([is_zero_or_one], ndims)
-    if self._static_event_ndims == 0:
-      return ops.convert_to_tensor(
-          [2], dtype=dtypes.int32, name="output_shape")
-    return input_shape + 1
-
-  def _inverse_event_shape(self, output_shape):
-    if output_shape.ndims is None:
-      return output_shape
-    if output_shape.ndims != 1:
-      raise ValueError("output_shape.ndims = %d != 1" % output_shape.ndims)
-    if self._static_event_ndims == 0:
-      return tensor_shape.TensorShape([])
-    return tensor_shape.TensorShape(output_shape[0] - 1)
-
-  def _inverse_event_shape_tensor(self, output_shape):
-    ndims = array_ops.shape(output_shape)[0]
-    if self.validate_args:
-      # It is not possible for a negative shape so we need only check <= 1.
-      is_one = check_ops.assert_equal(
-          ndims, 1, message="event_ndims must be 1")
-      ndims = control_flow_ops.with_dependencies([is_one], ndims)
-    if self._static_event_ndims == 0:
-      return ops.convert_to_tensor([], dtype=dtypes.int32, name="output_shape")
-    return array_ops.expand_dims(output_shape[0] - 1, dim=0)
-
-  def _forward(self, x):
-    # Pad the last dim with a zeros vector. We need this because it lets us
-    # infer the scale in the inverse function.
-    y = array_ops.expand_dims(x, dim=-1) if self._static_event_ndims == 0 else x
-    ndims = (y.get_shape().ndims if y.get_shape().ndims is not None
-             else array_ops.rank(y))
-    y = array_ops.pad(y,
-                      paddings=array_ops.concat(
-                          (array_ops.zeros(
-                              (ndims - 1, 2), dtype=dtypes.int32), [[0, 1]]),
-                          0))
-
-    # Set shape hints.
-    if x.get_shape().ndims is not None:
-      shape = x.get_shape().as_list()
-      if self._static_event_ndims == 0:
-        shape += [2]
-      elif shape[-1] is not None:
-        shape[-1] += 1
-      shape = tensor_shape.TensorShape(shape)
-      y.get_shape().assert_is_compatible_with(shape)
-      y.set_shape(shape)
-
-    # Since we only support event_ndims in [0, 1] and we do padding, we always
-    # reduce over the last dimension, i.e., dim=-1 (which is the default).
-    return nn_ops.softmax(y)
-
-  def _inverse(self, y):
-    # To derive the inverse mapping note that:
-    #   y[i] = exp(x[i]) / normalization
-    # and
-    #   y[end] = 1 / normalization.
-    # Thus:
-    # x[i] = log(exp(x[i])) - log(y[end]) - log(normalization)
-    #      = log(exp(x[i])/normalization) - log(y[end])
-    #      = log(y[i]) - log(y[end])
-    shape = (np.asarray(y.get_shape().as_list(), dtype=np.int32)
-             if y.get_shape().is_fully_defined()
-             else array_ops.shape(y, name="shape"))
-    ndims = y.get_shape().ndims or math_ops.rank(y, name="ndims")
-
-    # Do this first to make sure CSE catches that it'll happen again in
-    # _inverse_log_det_jacobian.
-    x = math_ops.log(y)
-
-    # We now extract the last coordinate of the rightmost dimension.
-    # Our trick is to slice from [0,0,...,shape[-1]-1] to shape[:-1]+[1].
-    begin = array_ops.one_hot(indices=ndims-1,
-                              depth=ndims,
-                              on_value=shape[-1]-np.array(1, dtype=shape.dtype),
-                              dtype=shape.dtype)
-    size = array_ops.concat([shape[:-1], np.asarray([1], dtype=shape.dtype)], 0)
-    log_normalization = -array_ops.strided_slice(x, begin, begin + size)
-
-    # Here we slice out all but the last coordinate; see above for idea.
-    begin = array_ops.zeros_like(shape)
-    size = array_ops.concat([shape[:-1], [shape[-1] - 1]], 0)
-    x = array_ops.strided_slice(x, begin, begin + size)
-
-    x += log_normalization
-
-    if self._static_event_ndims == 0:
-      x = array_ops.squeeze(x, squeeze_dims=[ndims-1])
-
-    # Set shape hints.
-    if y.get_shape().ndims is not None:
-      shape = y.get_shape().as_list()
-      if self._static_event_ndims == 0:
-        shape = shape[:-1]
-      elif shape[-1] is not None:
-        shape[-1] -= 1
-      shape = tensor_shape.TensorShape(shape)
-      x.get_shape().assert_is_compatible_with(shape)
-      x.set_shape(shape)
-
-    return x
-
-  def _inverse_log_det_jacobian(self, y):
-    # WLOG, consider the vector case:
-    #   x = log(y[:-1]) - log(y[-1])
-    # where,
-    #   y[-1] = 1 - sum(y[:-1]).
-    # We have:
-    #   det{ dX/dY } = det{ diag(1 ./ y[:-1]) + 1 / y[-1] }
-    #                = det{ inv{ diag(y[:-1]) - y[:-1]' y[:-1] } }   (1)
-    #                = 1 / det{ diag(y[:-1]) - y[:-1]' y[:-1] }
-    #                = 1 / { (1 + y[:-1]' inv(diag(y[:-1])) y[:-1]) *
-    #                        det(diag(y[:-1])) }                     (2)
-    #                = 1 / { y[-1] prod(y[:-1]) }
-    #                = 1 / prod(y)
-    # (1) - https://en.wikipedia.org/wiki/Sherman%E2%80%93Morrison_formula
-    #       or by noting that det{ dX/dY } = 1 / det{ dY/dX } from Bijector
-    #       docstring "Tip".
-    # (2) - https://en.wikipedia.org/wiki/Matrix_determinant_lemma
-    return -math_ops.reduce_sum(math_ops.log(y), axis=-1)
-
-  def _forward_log_det_jacobian(self, x):
-    if self._static_event_ndims == 0:
-      return x - 2. * nn_ops.softplus(x)
-    else:
-      # This code is similar to nn_ops.log_softmax but different because we have
-      # an implicit zero column to handle. I.e., instead of:
-      #   reduce_sum(logits - reduce_sum(exp(logits), dim))
-      # we must do:
-      #   log_normalization = 1 + reduce_sum(exp(logits))
-      #   -log_normalization + reduce_sum(logits - log_normalization)
-      log_normalization = nn_ops.softplus(
-          math_ops.reduce_logsumexp(x, axis=-1, keep_dims=True))
-      fldj = (-log_normalization +
-              math_ops.reduce_sum(x - log_normalization,
-                                  axis=-1,
-                                  keep_dims=True))
-      return array_ops.squeeze(fldj, squeeze_dims=-1)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/softplus.py b/tensorflow/contrib/distributions/python/ops/bijectors/softplus.py
index 250a1144b53bb43271ff7ee494604d9bae6feda8..81957fcf78922fa15fd20a25d144071f431161ae 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/softplus.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/softplus.py
@@ -18,12 +18,127 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.softplus_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops.distributions import bijector
+from tensorflow.python.ops.distributions import util as distribution_util
 
-_allowed_symbols = ["Softplus"]
 
-remove_undocumented(__name__, _allowed_symbols)
+__all__ = [
+    "Softplus",
+]
+
+
+class Softplus(bijector.Bijector):
+  """Bijector which computes `Y = g(X) = Log[1 + exp(X)]`.
+
+  The softplus `Bijector` has the following two useful properties:
+
+  * The domain is the positive real numbers
+  * `softplus(x) approx x`, for large `x`, so it does not overflow as easily as
+    the `Exp` `Bijector`.
+
+  The optional nonzero `hinge_softness` parameter changes the transition at
+  zero.  With `hinge_softness = c`, the bijector is:
+
+    ```f_c(x) := c * g(x / c) = c * Log[1 + exp(x / c)].```
+
+  For large `x >> 1`, `c * Log[1 + exp(x / c)] approx c * Log[exp(x / c)] = x`,
+  so the behavior for large `x` is the same as the standard softplus.
+
+  As `c > 0` approaches 0 from the right, `f_c(x)` becomes less and less soft,
+  approaching `max(0, x)`.
+
+  * `c = 1` is the default.
+  * `c > 0` but small means `f(x) approx ReLu(x) = max(0, x)`.
+  * `c < 0` flips sign and reflects around the `y-axis`: `f_{-c}(x) = -f_c(-x)`.
+  * `c = 0` results in a non-bijective transformation and triggers an exception.
+
+    Example Use:
+
+    ```python
+    # Create the Y=g(X)=softplus(X) transform which works only on Tensors with 1
+    # batch ndim and 2 event ndims (i.e., vector of matrices).
+    softplus = Softplus(event_ndims=2)
+    x = [[[1., 2],
+          [3, 4]],
+         [[5, 6],
+          [7, 8]]]
+    log(1 + exp(x)) == softplus.forward(x)
+    log(exp(x) - 1) == softplus.inverse(x)
+    ```
+
+    Note: log(.) and exp(.) are applied element-wise but the Jacobian is a
+    reduction over the event space.
+  """
+
+  @distribution_util.AppendDocstring(
+      kwargs_dict={
+          "hinge_softness": (
+              "Nonzero floating point `Tensor`.  Controls the softness of what "
+              "would otherwise be a kink at the origin.  Default is 1.0")})
+  def __init__(self,
+               event_ndims=0,
+               hinge_softness=None,
+               validate_args=False,
+               name="softplus"):
+    with ops.name_scope(name, values=[hinge_softness]):
+      if hinge_softness is not None:
+        self._hinge_softness = ops.convert_to_tensor(
+            hinge_softness, name="hinge_softness")
+      else:
+        self._hinge_softness = None
+      if validate_args:
+        nonzero_check = check_ops.assert_none_equal(
+            ops.convert_to_tensor(
+                0, dtype=self.hinge_softness.dtype),
+            self.hinge_softness,
+            message="hinge_softness must be non-zero")
+        self._hinge_softness = control_flow_ops.with_dependencies(
+            [nonzero_check], self.hinge_softness)
+
+    super(Softplus, self).__init__(
+        event_ndims=event_ndims,
+        validate_args=validate_args,
+        name=name)
+
+  def _forward(self, x):
+    if self.hinge_softness is None:
+      return nn_ops.softplus(x)
+    hinge_softness = math_ops.cast(self.hinge_softness, x.dtype)
+    return hinge_softness * nn_ops.softplus(x / hinge_softness)
+
+  def _inverse(self, y):
+    if self.hinge_softness is None:
+      return distribution_util.softplus_inverse(y)
+    hinge_softness = math_ops.cast(self.hinge_softness, y.dtype)
+    return hinge_softness * distribution_util.softplus_inverse(
+        y / hinge_softness)
+
+  def _inverse_log_det_jacobian(self, y):
+    # Could also do:
+    #   ildj = math_ops.reduce_sum(y - distribution_util.softplus_inverse(y),
+    #                              axis=event_dims)
+    # but the following is more numerically stable. Ie,
+    # Y = Log[1 + exp{X}] ==> X = Log[exp{Y} - 1]
+    # ==> dX/dY = exp{Y} / (exp{Y} - 1)
+    #           = 1 / (1 - exp{-Y}),
+    # which is the most stable for large Y > 0. For small Y, we use
+    # 1 - exp{-Y} approx Y.
+    if self.hinge_softness is not None:
+      y /= math_ops.cast(self.hinge_softness, y.dtype)
+    return -math_ops.reduce_sum(math_ops.log(-math_ops.expm1(-y)),
+                                axis=self._event_dims_tensor(y))
+
+  def _forward_log_det_jacobian(self, x):
+    if self.hinge_softness is not None:
+      x /= math_ops.cast(self.hinge_softness, x.dtype)
+    return -math_ops.reduce_sum(nn_ops.softplus(-x),
+                                axis=self._event_dims_tensor(x))
+
+  @property
+  def hinge_softness(self):
+    return self._hinge_softness
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/softplus_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/softplus_impl.py
deleted file mode 100644
index 81957fcf78922fa15fd20a25d144071f431161ae..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/softplus_impl.py
+++ /dev/null
@@ -1,144 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Softplus bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops.distributions import bijector
-from tensorflow.python.ops.distributions import util as distribution_util
-
-
-__all__ = [
-    "Softplus",
-]
-
-
-class Softplus(bijector.Bijector):
-  """Bijector which computes `Y = g(X) = Log[1 + exp(X)]`.
-
-  The softplus `Bijector` has the following two useful properties:
-
-  * The domain is the positive real numbers
-  * `softplus(x) approx x`, for large `x`, so it does not overflow as easily as
-    the `Exp` `Bijector`.
-
-  The optional nonzero `hinge_softness` parameter changes the transition at
-  zero.  With `hinge_softness = c`, the bijector is:
-
-    ```f_c(x) := c * g(x / c) = c * Log[1 + exp(x / c)].```
-
-  For large `x >> 1`, `c * Log[1 + exp(x / c)] approx c * Log[exp(x / c)] = x`,
-  so the behavior for large `x` is the same as the standard softplus.
-
-  As `c > 0` approaches 0 from the right, `f_c(x)` becomes less and less soft,
-  approaching `max(0, x)`.
-
-  * `c = 1` is the default.
-  * `c > 0` but small means `f(x) approx ReLu(x) = max(0, x)`.
-  * `c < 0` flips sign and reflects around the `y-axis`: `f_{-c}(x) = -f_c(-x)`.
-  * `c = 0` results in a non-bijective transformation and triggers an exception.
-
-    Example Use:
-
-    ```python
-    # Create the Y=g(X)=softplus(X) transform which works only on Tensors with 1
-    # batch ndim and 2 event ndims (i.e., vector of matrices).
-    softplus = Softplus(event_ndims=2)
-    x = [[[1., 2],
-          [3, 4]],
-         [[5, 6],
-          [7, 8]]]
-    log(1 + exp(x)) == softplus.forward(x)
-    log(exp(x) - 1) == softplus.inverse(x)
-    ```
-
-    Note: log(.) and exp(.) are applied element-wise but the Jacobian is a
-    reduction over the event space.
-  """
-
-  @distribution_util.AppendDocstring(
-      kwargs_dict={
-          "hinge_softness": (
-              "Nonzero floating point `Tensor`.  Controls the softness of what "
-              "would otherwise be a kink at the origin.  Default is 1.0")})
-  def __init__(self,
-               event_ndims=0,
-               hinge_softness=None,
-               validate_args=False,
-               name="softplus"):
-    with ops.name_scope(name, values=[hinge_softness]):
-      if hinge_softness is not None:
-        self._hinge_softness = ops.convert_to_tensor(
-            hinge_softness, name="hinge_softness")
-      else:
-        self._hinge_softness = None
-      if validate_args:
-        nonzero_check = check_ops.assert_none_equal(
-            ops.convert_to_tensor(
-                0, dtype=self.hinge_softness.dtype),
-            self.hinge_softness,
-            message="hinge_softness must be non-zero")
-        self._hinge_softness = control_flow_ops.with_dependencies(
-            [nonzero_check], self.hinge_softness)
-
-    super(Softplus, self).__init__(
-        event_ndims=event_ndims,
-        validate_args=validate_args,
-        name=name)
-
-  def _forward(self, x):
-    if self.hinge_softness is None:
-      return nn_ops.softplus(x)
-    hinge_softness = math_ops.cast(self.hinge_softness, x.dtype)
-    return hinge_softness * nn_ops.softplus(x / hinge_softness)
-
-  def _inverse(self, y):
-    if self.hinge_softness is None:
-      return distribution_util.softplus_inverse(y)
-    hinge_softness = math_ops.cast(self.hinge_softness, y.dtype)
-    return hinge_softness * distribution_util.softplus_inverse(
-        y / hinge_softness)
-
-  def _inverse_log_det_jacobian(self, y):
-    # Could also do:
-    #   ildj = math_ops.reduce_sum(y - distribution_util.softplus_inverse(y),
-    #                              axis=event_dims)
-    # but the following is more numerically stable. Ie,
-    # Y = Log[1 + exp{X}] ==> X = Log[exp{Y} - 1]
-    # ==> dX/dY = exp{Y} / (exp{Y} - 1)
-    #           = 1 / (1 - exp{-Y}),
-    # which is the most stable for large Y > 0. For small Y, we use
-    # 1 - exp{-Y} approx Y.
-    if self.hinge_softness is not None:
-      y /= math_ops.cast(self.hinge_softness, y.dtype)
-    return -math_ops.reduce_sum(math_ops.log(-math_ops.expm1(-y)),
-                                axis=self._event_dims_tensor(y))
-
-  def _forward_log_det_jacobian(self, x):
-    if self.hinge_softness is not None:
-      x /= math_ops.cast(self.hinge_softness, x.dtype)
-    return -math_ops.reduce_sum(nn_ops.softplus(-x),
-                                axis=self._event_dims_tensor(x))
-
-  @property
-  def hinge_softness(self):
-    return self._hinge_softness
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/weibull.py b/tensorflow/contrib/distributions/python/ops/bijectors/weibull.py
index d439f28884d8bd7f2b808317e10c5b5e44bfcfa2..00520bcda85e9527767e6342bf75f10667c264a8 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/weibull.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/weibull.py
@@ -18,12 +18,132 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.distributions.python.ops.bijectors.weibull_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops.distributions import bijector
 
-_allowed_symbols = ["Weibull"]
 
-remove_undocumented(__name__, _allowed_symbols)
+__all__ = [
+    "Weibull",
+]
+
+
+class Weibull(bijector.Bijector):
+  """Compute `Y = g(X) = 1 - exp((-X / scale) ** concentration), X >= 0`.
+
+  This bijector maps inputs from `[0, inf]` to [0, 1]`. The inverse of the
+  bijector applied to a uniform random variable `X ~ U(0, 1) gives back a
+  random variable with the
+  [Weibull distribution](https://en.wikipedia.org/wiki/Weibull_distribution):
+
+  ```none
+  Y ~ Weibull(scale, concentration)
+  pdf(y; scale, concentration, y >= 0) = (scale / concentration) * (
+    scale / concentration) ** (concentration - 1) * exp(
+      -(y / scale) ** concentration)
+  ```
+  """
+
+  def __init__(self,
+               scale=1.,
+               concentration=1.,
+               event_ndims=0,
+               validate_args=False,
+               name="weibull"):
+    """Instantiates the `Weibull` bijector.
+
+    Args:
+      scale: Positive Float-type `Tensor` that is the same dtype and is
+        broadcastable with `concentration`.
+        This is `l` in `Y = g(X) = 1 - exp((-x / l) ** k)`.
+      concentration: Positive Float-type `Tensor` that is the same dtype and is
+        broadcastable with `scale`.
+        This is `k` in `Y = g(X) = 1 - exp((-x / l) ** k)`.
+      event_ndims: Python scalar indicating the number of dimensions associated
+        with a particular draw from the distribution.
+      validate_args: Python `bool` indicating whether arguments should be
+        checked for correctness.
+      name: Python `str` name given to ops managed by this object.
+    """
+    self._graph_parents = []
+    self._name = name
+    self._validate_args = validate_args
+    with self._name_scope("init", values=[scale, concentration]):
+      self._scale = ops.convert_to_tensor(scale, name="scale")
+      self._concentration = ops.convert_to_tensor(
+          concentration, name="concentration")
+      check_ops.assert_same_float_dtype([self._scale, self._concentration])
+      if validate_args:
+        self._scale = control_flow_ops.with_dependencies([
+            check_ops.assert_positive(
+                self._scale,
+                message="Argument scale was not positive")
+        ], self._scale)
+        self._concentration = control_flow_ops.with_dependencies([
+            check_ops.assert_positive(
+                self._concentration,
+                message="Argument concentration was not positive")
+        ], self._concentration)
+
+    super(Weibull, self).__init__(
+        event_ndims=event_ndims,
+        validate_args=validate_args,
+        name=name)
+
+  @property
+  def scale(self):
+    """The `l` in `Y = g(X) = 1 - exp((-x / l) ** k)`."""
+    return self._scale
+
+  @property
+  def concentration(self):
+    """The `k` in `Y = g(X) = 1 - exp((-x / l) ** k)`."""
+    return self._concentration
+
+  def _forward(self, x):
+    x = self._maybe_assert_valid_x(x)
+    return -math_ops.expm1(-((x / self.scale) ** self.concentration))
+
+  def _inverse(self, y):
+    y = self._maybe_assert_valid_y(y)
+    return self.scale * (-math_ops.log1p(-y)) ** (1 / self.concentration)
+
+  def _inverse_log_det_jacobian(self, y):
+    y = self._maybe_assert_valid_y(y)
+    event_dims = self._event_dims_tensor(y)
+    return math_ops.reduce_sum(
+        -math_ops.log1p(-y) +
+        (1 / self.concentration - 1) * math_ops.log(-math_ops.log1p(-y)) +
+        math_ops.log(self.scale / self.concentration),
+        axis=event_dims)
+
+  def _forward_log_det_jacobian(self, x):
+    x = self._maybe_assert_valid_x(x)
+    event_dims = self._event_dims_tensor(x)
+    return math_ops.reduce_sum(
+        -(x / self.scale) ** self.concentration +
+        (self.concentration - 1) * math_ops.log(x) +
+        math_ops.log(self.concentration) +
+        -self.concentration * math_ops.log(self.scale),
+        axis=event_dims)
+
+  def _maybe_assert_valid_x(self, x):
+    if not self.validate_args:
+      return x
+    is_valid = check_ops.assert_non_negative(
+        x,
+        message="Forward transformation input must be at least {}.".format(0))
+    return control_flow_ops.with_dependencies([is_valid], x)
+
+  def _maybe_assert_valid_y(self, y):
+    if not self.validate_args:
+      return y
+    is_positive = check_ops.assert_non_negative(
+        y, message="Inverse transformation input must be greater than 0.")
+    less_than_one = check_ops.assert_less_equal(
+        y, constant_op.constant(1., y.dtype),
+        message="Inverse transformation input must be less than or equal to 1.")
+    return control_flow_ops.with_dependencies([is_positive, less_than_one], y)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/weibull_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/weibull_impl.py
deleted file mode 100644
index 00520bcda85e9527767e6342bf75f10667c264a8..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/distributions/python/ops/bijectors/weibull_impl.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Weibull bijector."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops.distributions import bijector
-
-
-__all__ = [
-    "Weibull",
-]
-
-
-class Weibull(bijector.Bijector):
-  """Compute `Y = g(X) = 1 - exp((-X / scale) ** concentration), X >= 0`.
-
-  This bijector maps inputs from `[0, inf]` to [0, 1]`. The inverse of the
-  bijector applied to a uniform random variable `X ~ U(0, 1) gives back a
-  random variable with the
-  [Weibull distribution](https://en.wikipedia.org/wiki/Weibull_distribution):
-
-  ```none
-  Y ~ Weibull(scale, concentration)
-  pdf(y; scale, concentration, y >= 0) = (scale / concentration) * (
-    scale / concentration) ** (concentration - 1) * exp(
-      -(y / scale) ** concentration)
-  ```
-  """
-
-  def __init__(self,
-               scale=1.,
-               concentration=1.,
-               event_ndims=0,
-               validate_args=False,
-               name="weibull"):
-    """Instantiates the `Weibull` bijector.
-
-    Args:
-      scale: Positive Float-type `Tensor` that is the same dtype and is
-        broadcastable with `concentration`.
-        This is `l` in `Y = g(X) = 1 - exp((-x / l) ** k)`.
-      concentration: Positive Float-type `Tensor` that is the same dtype and is
-        broadcastable with `scale`.
-        This is `k` in `Y = g(X) = 1 - exp((-x / l) ** k)`.
-      event_ndims: Python scalar indicating the number of dimensions associated
-        with a particular draw from the distribution.
-      validate_args: Python `bool` indicating whether arguments should be
-        checked for correctness.
-      name: Python `str` name given to ops managed by this object.
-    """
-    self._graph_parents = []
-    self._name = name
-    self._validate_args = validate_args
-    with self._name_scope("init", values=[scale, concentration]):
-      self._scale = ops.convert_to_tensor(scale, name="scale")
-      self._concentration = ops.convert_to_tensor(
-          concentration, name="concentration")
-      check_ops.assert_same_float_dtype([self._scale, self._concentration])
-      if validate_args:
-        self._scale = control_flow_ops.with_dependencies([
-            check_ops.assert_positive(
-                self._scale,
-                message="Argument scale was not positive")
-        ], self._scale)
-        self._concentration = control_flow_ops.with_dependencies([
-            check_ops.assert_positive(
-                self._concentration,
-                message="Argument concentration was not positive")
-        ], self._concentration)
-
-    super(Weibull, self).__init__(
-        event_ndims=event_ndims,
-        validate_args=validate_args,
-        name=name)
-
-  @property
-  def scale(self):
-    """The `l` in `Y = g(X) = 1 - exp((-x / l) ** k)`."""
-    return self._scale
-
-  @property
-  def concentration(self):
-    """The `k` in `Y = g(X) = 1 - exp((-x / l) ** k)`."""
-    return self._concentration
-
-  def _forward(self, x):
-    x = self._maybe_assert_valid_x(x)
-    return -math_ops.expm1(-((x / self.scale) ** self.concentration))
-
-  def _inverse(self, y):
-    y = self._maybe_assert_valid_y(y)
-    return self.scale * (-math_ops.log1p(-y)) ** (1 / self.concentration)
-
-  def _inverse_log_det_jacobian(self, y):
-    y = self._maybe_assert_valid_y(y)
-    event_dims = self._event_dims_tensor(y)
-    return math_ops.reduce_sum(
-        -math_ops.log1p(-y) +
-        (1 / self.concentration - 1) * math_ops.log(-math_ops.log1p(-y)) +
-        math_ops.log(self.scale / self.concentration),
-        axis=event_dims)
-
-  def _forward_log_det_jacobian(self, x):
-    x = self._maybe_assert_valid_x(x)
-    event_dims = self._event_dims_tensor(x)
-    return math_ops.reduce_sum(
-        -(x / self.scale) ** self.concentration +
-        (self.concentration - 1) * math_ops.log(x) +
-        math_ops.log(self.concentration) +
-        -self.concentration * math_ops.log(self.scale),
-        axis=event_dims)
-
-  def _maybe_assert_valid_x(self, x):
-    if not self.validate_args:
-      return x
-    is_valid = check_ops.assert_non_negative(
-        x,
-        message="Forward transformation input must be at least {}.".format(0))
-    return control_flow_ops.with_dependencies([is_valid], x)
-
-  def _maybe_assert_valid_y(self, y):
-    if not self.validate_args:
-      return y
-    is_positive = check_ops.assert_non_negative(
-        y, message="Inverse transformation input must be greater than 0.")
-    less_than_one = check_ops.assert_less_equal(
-        y, constant_op.constant(1., y.dtype),
-        message="Inverse transformation input must be less than or equal to 1.")
-    return control_flow_ops.with_dependencies([is_positive, less_than_one], y)
diff --git a/tensorflow/contrib/distributions/python/ops/cauchy.py b/tensorflow/contrib/distributions/python/ops/cauchy.py
index 8d59c1abfbc607c67b2bbca21f880743a43e5b2a..6f5d724a2a945ed8f9c159d8314327c6f994d1db 100644
--- a/tensorflow/contrib/distributions/python/ops/cauchy.py
+++ b/tensorflow/contrib/distributions/python/ops/cauchy.py
@@ -43,16 +43,17 @@ class Cauchy(distribution.Distribution):
   The probability density function (pdf) is,
 
   ```none
-  pdf(x; loc, scale) = 1 / (pi * scale * (1 + ((x - loc) / scale)**2))
+  pdf(x; loc, scale) = 1 / (pi scale (1 + z**2))
+  z = (x - loc) / scale
   ```
   where `loc` is the location, and `scale` is the scale.
 
   The Cauchy distribution is a member of the [location-scale family](
   https://en.wikipedia.org/wiki/Location-scale_family), i.e.
+  `Y ~ Cauchy(loc, scale)` is equivalent to,
 
   ```none
   X ~ Cauchy(loc=0, scale=1)
-  Y ~ Cauchy(loc=loc, scale=scale)
   Y = loc + scale * X
   ```
 
@@ -61,14 +62,16 @@ class Cauchy(distribution.Distribution):
   Examples of initialization of one or a batch of distributions.
 
   ```python
+  tfd = tf.contrib.distributions
+
   # Define a single scalar Cauchy distribution.
-  dist = Cauchy(loc=0., scale=3.)
+  dist = tfd.Cauchy(loc=0., scale=3.)
 
   # Evaluate the cdf at 1, returning a scalar.
   dist.cdf(1.)
 
   # Define a batch of two scalar valued Cauchy distributions.
-  dist = Cauchy(loc=[1, 2.], scale=[11, 22.])
+  dist = tfd.Cauchy(loc=[1, 2.], scale=[11, 22.])
 
   # Evaluate the pdf of the first distribution on 0, and the second on 1.5,
   # returning a length two tensor.
@@ -76,18 +79,17 @@ class Cauchy(distribution.Distribution):
 
   # Get 3 samples, returning a 3 x 2 tensor.
   dist.sample([3])
-  ```
-
-  Arguments are broadcast when possible.
 
-  ```python
+  # Arguments are broadcast when possible.
   # Define a batch of two scalar valued Cauchy distributions.
   # Both have median 1, but different scales.
-  dist = tf.contrib.distributions.Cauchy(loc=1., scale=[11, 22.])
+  dist = tfd.Cauchy(loc=1., scale=[11, 22.])
+
   # Evaluate the pdf of both distributions on the same point, 3.0,
   # returning a length 2 tensor.
-  dist.prob(3.0)
+  dist.prob(3.)
   ```
+
   """
 
   def __init__(self,
diff --git a/tensorflow/contrib/distributions/python/ops/conditional_transformed_distribution.py b/tensorflow/contrib/distributions/python/ops/conditional_transformed_distribution.py
index 599c855cda434d9249187d5d154d50a8a8c49a6c..1d4c5660d8d73b7b6a7e758fc834ccfddeb5c8ea 100644
--- a/tensorflow/contrib/distributions/python/ops/conditional_transformed_distribution.py
+++ b/tensorflow/contrib/distributions/python/ops/conditional_transformed_distribution.py
@@ -121,7 +121,7 @@ class ConditionalTransformedDistribution(
     log_prob = self.distribution.log_prob(x, **distribution_kwargs)
     if self._is_maybe_event_override:
       log_prob = math_ops.reduce_sum(log_prob, self._reduce_event_indices)
-    return ildj + log_prob
+    return math_ops.cast(ildj, log_prob.dtype) + log_prob
 
   @distribution_util.AppendDocstring(kwargs_dict=_condition_kwargs_dict)
   def _prob(self, y, bijector_kwargs=None, distribution_kwargs=None):
@@ -143,7 +143,7 @@ class ConditionalTransformedDistribution(
     prob = self.distribution.prob(x, **distribution_kwargs)
     if self._is_maybe_event_override:
       prob = math_ops.reduce_prod(prob, self._reduce_event_indices)
-    return math_ops.exp(ildj) * prob
+    return math_ops.exp(math_ops.cast(ildj, prob.dtype)) * prob
 
   @distribution_util.AppendDocstring(kwargs_dict=_condition_kwargs_dict)
   def _log_cdf(self, y, bijector_kwargs=None, distribution_kwargs=None):
diff --git a/tensorflow/contrib/distributions/python/ops/deterministic.py b/tensorflow/contrib/distributions/python/ops/deterministic.py
index 850d08d1bd69ebc7661557d648e2bffe77e6a908..8049522e9f5dc26b244b7e710a9ae8b981efd6b6 100644
--- a/tensorflow/contrib/distributions/python/ops/deterministic.py
+++ b/tensorflow/contrib/distributions/python/ops/deterministic.py
@@ -290,8 +290,10 @@ class VectorDeterministic(_BaseDeterministic):
   #### Examples
 
   ```python
+  tfd = tf.contrib.distributions
+
   # Initialize a single VectorDeterministic supported at [0., 2.] in R^2.
-  constant = tf.contrib.distributions.Deterministic([0., 2.])
+  constant = tfd.Deterministic([0., 2.])
   constant.prob([0., 2.])
   ==> 1.
   constant.prob([0., 3.])
@@ -299,7 +301,7 @@ class VectorDeterministic(_BaseDeterministic):
 
   # Initialize a [3] batch of constants on R^2.
   loc = [[0., 1.], [2., 3.], [4., 5.]]
-  constant = constant_lib.VectorDeterministic(loc)
+  constant = tfd.VectorDeterministic(loc)
   constant.prob([[0., 1.], [1.9, 3.], [3.99, 5.]])
   ==> [1., 0., 0.]
   ```
diff --git a/tensorflow/contrib/distributions/python/ops/distribution_util.py b/tensorflow/contrib/distributions/python/ops/distribution_util.py
index 869b5698e57d199755ce1686a74a1eafe3b73e7d..a4d249d41ec9733721a3583d3708e0da56db1733 100644
--- a/tensorflow/contrib/distributions/python/ops/distribution_util.py
+++ b/tensorflow/contrib/distributions/python/ops/distribution_util.py
@@ -19,9 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib import linalg
-from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
@@ -330,54 +328,14 @@ def shapes_from_loc_and_scale(loc, scale, name="shapes_from_loc_and_scale"):
       else:
         loc_batch_shape = ops.convert_to_tensor(loc_batch_shape,
                                                 name="loc_batch_shape")
+      # This is defined in the core util module.
+      # pylint: disable=undefined-variable
       batch_shape = prefer_static_broadcast_shape(batch_shape, loc_batch_shape)
+      # pylint: enable=undefined-variable
 
   return batch_shape, event_shape
 
 
-def prefer_static_broadcast_shape(
-    shape1, shape2, name="prefer_static_broadcast_shape"):
-  """Convenience function which statically broadcasts shape when possible.
-
-  Args:
-    shape1:  `1-D` integer `Tensor`.  Already converted to tensor!
-    shape2:  `1-D` integer `Tensor`.  Already converted to tensor!
-    name:  A string name to prepend to created ops.
-
-  Returns:
-    The broadcast shape, either as `TensorShape` (if broadcast can be done
-      statically), or as a `Tensor`.
-  """
-  with ops.name_scope(name, values=[shape1, shape2]):
-    def make_shape_tensor(x):
-      return ops.convert_to_tensor(x, name="shape", dtype=dtypes.int32)
-
-    def get_tensor_shape(s):
-      if isinstance(s, tensor_shape.TensorShape):
-        return s
-      s_ = tensor_util.constant_value(make_shape_tensor(s))
-      if s_ is not None:
-        return tensor_shape.TensorShape(s_)
-      return None
-
-    def get_shape_tensor(s):
-      if not isinstance(s, tensor_shape.TensorShape):
-        return make_shape_tensor(s)
-      if s.is_fully_defined():
-        return make_shape_tensor(s.as_list())
-      raise ValueError("Cannot broadcast from partially "
-                       "defined `TensorShape`.")
-
-    shape1_ = get_tensor_shape(shape1)
-    shape2_ = get_tensor_shape(shape2)
-    if shape1_ is not None and shape2_ is not None:
-      return array_ops.broadcast_static_shape(shape1_, shape2_)
-
-    shape1_ = get_shape_tensor(shape1)
-    shape2_ = get_shape_tensor(shape2)
-    return array_ops.broadcast_dynamic_shape(shape1_, shape2_)
-
-
 def get_broadcast_shape(*tensors):
   """Get broadcast shape as a Python list of integers (preferred) or `Tensor`.
 
diff --git a/tensorflow/contrib/distributions/python/ops/gumbel.py b/tensorflow/contrib/distributions/python/ops/gumbel.py
index ba8d3c639b397422f0f6210ba9f48650f0da1e3e..d0efaefb8e78ddf4436e9e5a112d2c1cdddaf3b5 100644
--- a/tensorflow/contrib/distributions/python/ops/gumbel.py
+++ b/tensorflow/contrib/distributions/python/ops/gumbel.py
@@ -62,15 +62,17 @@ class _Gumbel(distribution.Distribution):
   Examples of initialization of one or a batch of distributions.
 
   ```python
+  tfd = tf.contrib.distributions
+
   # Define a single scalar Gumbel distribution.
-  dist = tf.contrib.distributions.Gumbel(loc=0., scale=3.)
+  dist = tfd.Gumbel(loc=0., scale=3.)
 
   # Evaluate the cdf at 1, returning a scalar.
   dist.cdf(1.)
 
   # Define a batch of two scalar valued Gumbels.
   # The first has mean 1 and scale 11, the second 2 and 22.
-  dist = tf.contrib.distributions.Gumbel(loc=[1, 2.], scale=[11, 22.])
+  dist = tfd.Gumbel(loc=[1, 2.], scale=[11, 22.])
 
   # Evaluate the pdf of the first distribution on 0, and the second on 1.5,
   # returning a length two tensor.
@@ -85,7 +87,7 @@ class _Gumbel(distribution.Distribution):
   ```python
   # Define a batch of two scalar valued Logistics.
   # Both have mean 1, but different scales.
-  dist = tf.contrib.distributions.Gumbel(loc=1., scale=[11, 22.])
+  dist = tfd.Gumbel(loc=1., scale=[11, 22.])
 
   # Evaluate the pdf of both distributions on the same point, 3.0,
   # returning a length 2 tensor.
diff --git a/tensorflow/contrib/distributions/python/ops/half_normal.py b/tensorflow/contrib/distributions/python/ops/half_normal.py
index 12059b6a9e199dc3ae00ac47a62ece9c9a147000..fc0751a6e0b78cb3d79bd3478e740bb05cd26428 100644
--- a/tensorflow/contrib/distributions/python/ops/half_normal.py
+++ b/tensorflow/contrib/distributions/python/ops/half_normal.py
@@ -84,6 +84,7 @@ class HalfNormal(distribution.Distribution):
   ```
 
   """
+
   def __init__(self,
                scale,
                validate_args=False,
@@ -120,7 +121,7 @@ class HalfNormal(distribution.Distribution):
 
   @staticmethod
   def _param_shapes(sample_shape):
-    return {'scale': ops.convert_to_tensor(sample_shape, dtype=dtypes.int32)}
+    return {"scale": ops.convert_to_tensor(sample_shape, dtype=dtypes.int32)}
 
   @property
   def scale(self):
diff --git a/tensorflow/contrib/distributions/python/ops/independent.py b/tensorflow/contrib/distributions/python/ops/independent.py
index 6a74ca9a0ae1ad30081d21cc15a65be052a99e2a..cbce005013281ff3c58c94d525d5ce7a865d725a 100644
--- a/tensorflow/contrib/distributions/python/ops/independent.py
+++ b/tensorflow/contrib/distributions/python/ops/independent.py
@@ -68,11 +68,11 @@ class Independent(distribution_lib.Distribution):
   #### Examples
 
   ```python
-  ds = tf.contrib.distributions
+  tfd = tf.contrib.distributions
 
   # Make independent distribution from a 2-batch Normal.
-  ind = ds.Independent(
-      distribution=ds.Normal(loc=[-1., 1], scale=[0.1, 0.5]),
+  ind = tfd.Independent(
+      distribution=tfd.Normal(loc=[-1., 1], scale=[0.1, 0.5]),
       reinterpreted_batch_ndims=1)
 
   # All batch dims have been "absorbed" into event dims.
@@ -80,8 +80,8 @@ class Independent(distribution_lib.Distribution):
   ind.event_shape  # ==> [2]
 
   # Make independent distribution from a 2-batch bivariate Normal.
-  ind = ds.Independent(
-      distribution=ds.MultivariateNormalDiag(
+  ind = tfd.Independent(
+      distribution=tfd.MultivariateNormalDiag(
           loc=[[-1., 1], [1, -1]],
           scale_identity_multiplier=[1., 0.5]),
       reinterpreted_batch_ndims=1)
diff --git a/tensorflow/contrib/distributions/python/ops/inverse_gamma.py b/tensorflow/contrib/distributions/python/ops/inverse_gamma.py
index 956dee38a378813434656a28a69c89b6ec1e8b72..ee4d86867d48b20e97757bcec57d452085814b80 100644
--- a/tensorflow/contrib/distributions/python/ops/inverse_gamma.py
+++ b/tensorflow/contrib/distributions/python/ops/inverse_gamma.py
@@ -88,8 +88,9 @@ class InverseGamma(distribution.Distribution):
   #### Examples
 
   ```python
-  dist = InverseGamma(concentration=3.0, rate=2.0)
-  dist2 = InverseGamma(concentration=[3.0, 4.0], rate=[2.0, 3.0])
+  tfd = tf.contrib.distributions
+  dist = tfd.InverseGamma(concentration=3.0, rate=2.0)
+  dist2 = tfd.InverseGamma(concentration=[3.0, 4.0], rate=[2.0, 3.0])
   ```
 
   """
diff --git a/tensorflow/contrib/distributions/python/ops/logistic.py b/tensorflow/contrib/distributions/python/ops/logistic.py
index 48794a48828fe796e233e968d8c755136ce166ad..473677f8d91b184e029f345bb05f5c5d63df7a40 100644
--- a/tensorflow/contrib/distributions/python/ops/logistic.py
+++ b/tensorflow/contrib/distributions/python/ops/logistic.py
@@ -60,15 +60,17 @@ class Logistic(distribution.Distribution):
   Examples of initialization of one or a batch of distributions.
 
   ```python
+  tfd = tf.contrib.distributions
+
   # Define a single scalar Logistic distribution.
-  dist = tf.contrib.distributions.Logistic(loc=0., scale=3.)
+  dist = tfd.Logistic(loc=0., scale=3.)
 
   # Evaluate the cdf at 1, returning a scalar.
   dist.cdf(1.)
 
   # Define a batch of two scalar valued Logistics.
   # The first has mean 1 and scale 11, the second 2 and 22.
-  dist = tf.contrib.distributions.Logistic(loc=[1, 2.], scale=[11, 22.])
+  dist = tfd.Logistic(loc=[1, 2.], scale=[11, 22.])
 
   # Evaluate the pdf of the first distribution on 0, and the second on 1.5,
   # returning a length two tensor.
@@ -76,14 +78,11 @@ class Logistic(distribution.Distribution):
 
   # Get 3 samples, returning a 3 x 2 tensor.
   dist.sample([3])
-  ```
 
-  Arguments are broadcast when possible.
-
-  ```python
+  # Arguments are broadcast when possible.
   # Define a batch of two scalar valued Logistics.
   # Both have mean 1, but different scales.
-  dist = tf.contrib.distributions.Logistic(loc=1., scale=[11, 22.])
+  dist = tfd.Logistic(loc=1., scale=[11, 22.])
 
   # Evaluate the pdf of both distributions on the same point, 3.0,
   # returning a length 2 tensor.
diff --git a/tensorflow/contrib/distributions/python/ops/mixture.py b/tensorflow/contrib/distributions/python/ops/mixture.py
index e676931d9145e72907d990148ee2d180e0da0258..f2d492f5489a197157558ae727416b51db04793e 100644
--- a/tensorflow/contrib/distributions/python/ops/mixture.py
+++ b/tensorflow/contrib/distributions/python/ops/mixture.py
@@ -49,13 +49,13 @@ class Mixture(distribution.Distribution):
 
   ```python
   # Create a mixture of two Gaussians:
-  ds = tf.contrib.distributions
+  tfd = tf.contrib.distributions
   mix = 0.3
-  bimix_gauss = ds.Mixture(
-    cat=ds.Categorical(probs=[mix, 1.-mix]),
+  bimix_gauss = tfd.Mixture(
+    cat=tfd.Categorical(probs=[mix, 1.-mix]),
     components=[
-      ds.Normal(loc=-1., scale=0.1),
-      ds.Normal(loc=+1., scale=0.5),
+      tfd.Normal(loc=-1., scale=0.1),
+      tfd.Normal(loc=+1., scale=0.5),
   ])
 
   # Plot the PDF.
diff --git a/tensorflow/contrib/distributions/python/ops/mixture_same_family.py b/tensorflow/contrib/distributions/python/ops/mixture_same_family.py
index 5558ef0f255db684b229d129666634e50c625887..49afbea7f05136674aa0c1441bd46548b7b55c8f 100644
--- a/tensorflow/contrib/distributions/python/ops/mixture_same_family.py
+++ b/tensorflow/contrib/distributions/python/ops/mixture_same_family.py
@@ -43,15 +43,14 @@ class MixtureSameFamily(distribution.Distribution):
   #### Examples
 
   ```python
-  import matplotlib.pyplot as plt
-  ds = tf.contrib.distributions
+  tfd = tf.contrib.distributions
 
   ### Create a mixture of two scalar Gaussians:
 
-  gm = ds.MixtureSameFamily(
-      mixture_distribution=ds.Categorical(
+  gm = tfd.MixtureSameFamily(
+      mixture_distribution=tfd.Categorical(
           probs=[0.3, 0.7]),
-      components_distribution=ds.Normal(
+      components_distribution=tfd.Normal(
         loc=[-1., 1],       # One for each component.
         scale=[0.1, 0.5]))  # And same here.
 
@@ -63,14 +62,15 @@ class MixtureSameFamily(distribution.Distribution):
 
   # Plot PDF.
   x = np.linspace(-2., 3., int(1e4), dtype=np.float32)
+  import matplotlib.pyplot as plt
   plt.plot(x, gm.prob(x).eval());
 
   ### Create a mixture of two Bivariate Gaussians:
 
-  gm = ds.MixtureSameFamily(
-      mixture_distribution=ds.Categorical(
+  gm = tfd.MixtureSameFamily(
+      mixture_distribution=tfd.Categorical(
           probs=[0.3, 0.7]),
-      components_distribution=ds.MultivariateNormalDiag(
+      components_distribution=tfd.MultivariateNormalDiag(
           loc=[[-1., 1],  # component 1
                [1, -1]],  # component 2
           scale_identity_multiplier=[.3, .6]))
@@ -248,7 +248,7 @@ class MixtureSameFamily(distribution.Distribution):
       x = self._pad_sample_dims(x)
       log_prob_x = self.components_distribution.log_prob(x)  # [S, B, k]
       log_mix_prob = nn_ops.log_softmax(
-          self.mixture_distribution.logits, dim=-1)          # [B, k]
+          self.mixture_distribution.logits, axis=-1)         # [B, k]
       return math_ops.reduce_logsumexp(
           log_prob_x + log_mix_prob, axis=-1)                # [S, B]
 
@@ -264,7 +264,7 @@ class MixtureSameFamily(distribution.Distribution):
     x = self._pad_sample_dims(x)
     log_cdf_x = self.components_distribution.log_cdf(x)      # [S, B, k]
     log_mix_prob = nn_ops.log_softmax(
-        self.mixture_distribution.logits, dim=-1)            # [B, k]
+        self.mixture_distribution.logits, axis=-1)           # [B, k]
     return math_ops.reduce_logsumexp(
         log_cdf_x + log_mix_prob, axis=-1)                   # [S, B]
 
@@ -320,13 +320,14 @@ class MixtureSameFamily(distribution.Distribution):
         return array_ops.shape(d.batch_shape_tensor())[0]
       dist_batch_ndims = _get_ndims(self)
       cat_batch_ndims = _get_ndims(self.mixture_distribution)
-      bnd = distribution_util.pick_vector(
+      pad_ndims = array_ops.where(
           self.mixture_distribution.is_scalar_batch(),
-          [dist_batch_ndims], [cat_batch_ndims])[0]
+          dist_batch_ndims,
+          dist_batch_ndims - cat_batch_ndims)
       s = array_ops.shape(x)
       x = array_ops.reshape(x, shape=array_ops.concat([
           s[:-1],
-          array_ops.ones([bnd], dtype=dtypes.int32),
+          array_ops.ones([pad_ndims], dtype=dtypes.int32),
           s[-1:],
           array_ops.ones([self._event_ndims], dtype=dtypes.int32),
       ], axis=0))
diff --git a/tensorflow/contrib/distributions/python/ops/mvn_diag.py b/tensorflow/contrib/distributions/python/ops/mvn_diag.py
index 163cf75d990d5fe7ec1e3aaf0040fc71f61774a7..e862552880f4073c8fa8e90134d0633e7484b0bf 100644
--- a/tensorflow/contrib/distributions/python/ops/mvn_diag.py
+++ b/tensorflow/contrib/distributions/python/ops/mvn_diag.py
@@ -84,10 +84,10 @@ class MultivariateNormalDiag(
   #### Examples
 
   ```python
-  ds = tf.contrib.distributions
+  tfd = tf.contrib.distributions
 
   # Initialize a single 2-variate Gaussian.
-  mvn = ds.MultivariateNormalDiag(
+  mvn = tfd.MultivariateNormalDiag(
       loc=[1., -1],
       scale_diag=[1, 2.])
 
@@ -101,7 +101,7 @@ class MultivariateNormalDiag(
   mvn.prob([-1., 0]).eval()  # shape: []
 
   # Initialize a 3-batch, 2-variate scaled-identity Gaussian.
-  mvn = ds.MultivariateNormalDiag(
+  mvn = tfd.MultivariateNormalDiag(
       loc=[1., -1],
       scale_identity_multiplier=[1, 2., 3])
 
@@ -119,7 +119,7 @@ class MultivariateNormalDiag(
   mvn.prob([-1., 0]).eval()  # shape: [3]
 
   # Initialize a 2-batch of 3-variate Gaussians.
-  mvn = ds.MultivariateNormalDiag(
+  mvn = tfd.MultivariateNormalDiag(
       loc=[[1., 2, 3],
            [11, 22, 33]]           # shape: [2, 3]
       scale_diag=[[1., 2, 3],
diff --git a/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py b/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py
index 040bc230722194316b8a74627344e315a2578281..413e88f03ae0286c294f3404549a73e1a47dcff7 100644
--- a/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py
+++ b/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py
@@ -86,7 +86,7 @@ class MultivariateNormalDiagPlusLowRank(
   #### Examples
 
   ```python
-  ds = tf.contrib.distributions
+  tfd = tf.contrib.distributions
 
   # Initialize a single 3-variate Gaussian with covariance `cov = S @ S.T`,
   # `S = diag(d) + U @ diag(m) @ U.T`. The perturbation, `U @ diag(m) @ U.T`, is
@@ -97,7 +97,7 @@ class MultivariateNormalDiagPlusLowRank(
        [-1, 1],
        [2, -0.5]]        # shape: [3, 2]
   m = [4., 5]            # shape: [2]
-  mvn = ds.MultivariateNormalDiagPlusLowRank(
+  mvn = tfd.MultivariateNormalDiagPlusLowRank(
       loc=mu
       scale_diag=d
       scale_perturb_factor=U,
@@ -118,7 +118,7 @@ class MultivariateNormalDiagPlusLowRank(
   m = [[0.1, 0.2],
        [0.4, 0.5]]         # shape: [b, r] = [2, 2]
 
-  mvn = ds.MultivariateNormalDiagPlusLowRank(
+  mvn = tfd.MultivariateNormalDiagPlusLowRank(
       loc=mu,
       scale_perturb_factor=U,
       scale_perturb_diag=m)
diff --git a/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py b/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py
index f9952b2069d6dfd2593e6bd71ede0badf44cdf98..4bea99fbb75349f97fde473cb5716fe6c426ce90 100644
--- a/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py
+++ b/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py
@@ -73,14 +73,14 @@ class MultivariateNormalFullCovariance(mvn_tril.MultivariateNormalTriL):
   #### Examples
 
   ```python
-  ds = tf.contrib.distributions
+  tfd = tf.contrib.distributions
 
   # Initialize a single 3-variate Gaussian.
   mu = [1., 2, 3]
   cov = [[ 0.36,  0.12,  0.06],
          [ 0.12,  0.29, -0.13],
          [ 0.06, -0.13,  0.26]]
-  mvn = ds.MultivariateNormalFullCovariance(
+  mvn = tfd.MultivariateNormalFullCovariance(
       loc=mu,
       covariance_matrix=cov)
 
@@ -100,7 +100,7 @@ class MultivariateNormalFullCovariance(mvn_tril.MultivariateNormalTriL):
   mu = [[1., 2, 3],
         [11, 22, 33]]              # shape: [2, 3]
   covariance_matrix = ...  # shape: [2, 3, 3], symmetric, positive definite.
-  mvn = ds.MultivariateNormalFullCovariance(
+  mvn = tfd.MultivariateNormalFullCovariance(
       loc=mu,
       covariance=covariance_matrix)
 
@@ -167,12 +167,11 @@ class MultivariateNormalFullCovariance(mvn_tril.MultivariateNormalTriL):
           covariance_matrix = ops.convert_to_tensor(
               covariance_matrix, name="covariance_matrix")
           if validate_args:
-            assert_symmetric = check_ops.assert_equal(
-                covariance_matrix,
-                array_ops.matrix_transpose(covariance_matrix),
-                message="Matrix was not symmetric.")
-            covariance_matrix = control_flow_ops.with_dependencies(
-                [assert_symmetric], covariance_matrix)
+            covariance_matrix = control_flow_ops.with_dependencies([
+                check_ops.assert_near(
+                    covariance_matrix,
+                    array_ops.matrix_transpose(covariance_matrix),
+                    message="Matrix was not symmetric")], covariance_matrix)
           # No need to validate that covariance_matrix is non-singular.
           # LinearOperatorLowerTriangular has an assert_non_singular method that
           # is called by the Bijector.
diff --git a/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py b/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py
index 300bdd5f6064a1cc9c336689ac4fae04338edb30..a7399792892f4c179c05168184d76ec95c168b51 100644
--- a/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py
+++ b/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py
@@ -90,8 +90,7 @@ class MultivariateNormalLinearOperator(
   #### Examples
 
   ```python
-  ds = tf.contrib.distributions
-  la = tf.linalg
+  tfd = tf.contrib.distributions
 
   # Initialize a single 3-variate Gaussian.
   mu = [1., 2, 3]
@@ -103,9 +102,9 @@ class MultivariateNormalLinearOperator(
   #      [ 0.2,  0.5,  0. ],
   #      [ 0.1, -0.3,  0.4]])
 
-  mvn = ds.MultivariateNormalLinearOperator(
+  mvn = tfd.MultivariateNormalLinearOperator(
       loc=mu,
-      scale=la.LinearOperatorLowerTriangular(scale))
+      scale=tf.linalg.LinearOperatorLowerTriangular(scale))
 
   # Covariance agrees with cholesky(cov) parameterization.
   mvn.covariance().eval()
@@ -122,9 +121,9 @@ class MultivariateNormalLinearOperator(
   scale_diag = [[1., 2, 3],
                 [0.5, 1, 1.5]]     # shape: [2, 3]
 
-  mvn = ds.MultivariateNormalLinearOperator(
+  mvn = tfd.MultivariateNormalLinearOperator(
       loc=mu,
-      scale=la.LinearOperatorDiag(scale_diag))
+      scale=tf.linalg.LinearOperatorDiag(scale_diag))
 
   # Compute the pdf of two `R^3` observations; return a length-2 vector.
   x = [[-0.9, 0, 0.1],
diff --git a/tensorflow/contrib/distributions/python/ops/mvn_tril.py b/tensorflow/contrib/distributions/python/ops/mvn_tril.py
index 260dcc18f513d5440d3d39368539274c03faa72a..6c7dc4ca7aaf5b3a20b072e9360d15528ad10556 100644
--- a/tensorflow/contrib/distributions/python/ops/mvn_tril.py
+++ b/tensorflow/contrib/distributions/python/ops/mvn_tril.py
@@ -76,12 +76,13 @@ class MultivariateNormalTriL(
   ```
 
   Trainable (batch) lower-triangular matrices can be created with
-  `ds.matrix_diag_transform()` and/or `ds.fill_triangular()`
+  `tf.contrib.distributions.matrix_diag_transform()` and/or
+  `tf.contrib.distributions.fill_triangular()`
 
   #### Examples
 
   ```python
-  ds = tf.contrib.distributions
+  tfd = tf.contrib.distributions
 
   # Initialize a single 3-variate Gaussian.
   mu = [1., 2, 3]
@@ -92,7 +93,7 @@ class MultivariateNormalTriL(
   # ==> [[ 0.6,  0. ,  0. ],
   #      [ 0.2,  0.5,  0. ],
   #      [ 0.1, -0.3,  0.4]])
-  mvn = ds.MultivariateNormalTriL(
+  mvn = tfd.MultivariateNormalTriL(
       loc=mu,
       scale_tril=scale)
 
@@ -112,7 +113,7 @@ class MultivariateNormalTriL(
   mu = [[1., 2, 3],
         [11, 22, 33]]              # shape: [2, 3]
   tril = ...  # shape: [2, 3, 3], lower triangular, non-zero diagonal.
-  mvn = ds.MultivariateNormalTriL(
+  mvn = tfd.MultivariateNormalTriL(
       loc=mu,
       scale_tril=tril)
 
@@ -124,9 +125,9 @@ class MultivariateNormalTriL(
   # Instantiate a "learnable" MVN.
   dims = 4
   with tf.variable_scope("model"):
-    mvn = ds.MultivariateNormalTriL(
+    mvn = tfd.MultivariateNormalTriL(
         loc=tf.get_variable(shape=[dims], dtype=tf.float32, name="mu"),
-        scale_tril=ds.fill_triangular(
+        scale_tril=tfd.fill_triangular(
             tf.get_variable(shape=[dims * (dims + 1) / 2],
                             dtype=tf.float32, name="chol_Sigma")))
   ```
diff --git a/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py b/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py
index e1118ed4312ca2ed678a05a298110e2669d0a27e..92f2bba1828696248c9d9460566a08ba372c3358 100644
--- a/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py
+++ b/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py
@@ -22,21 +22,135 @@ import numpy as np
 
 from tensorflow.contrib.distributions.python.ops import distribution_util
 from tensorflow.contrib.distributions.python.ops import poisson as poisson_lib
+from tensorflow.contrib.distributions.python.ops.bijectors.exp import Exp
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops.distributions import categorical as categorical_lib
 from tensorflow.python.ops.distributions import distribution as distribution_lib
+from tensorflow.python.ops.distributions import normal as normal_lib
+from tensorflow.python.ops.distributions import transformed_distribution as transformed_lib
 
 
 __all__ = [
     "PoissonLogNormalQuadratureCompound",
+    "quadrature_scheme_lognormal_gauss_hermite",
+    "quadrature_scheme_lognormal_quantiles",
 ]
 
 
+def quadrature_scheme_lognormal_gauss_hermite(
+    loc, scale, quadrature_size,
+    validate_args=False, name=None):  # pylint: disable=unused-argument
+  """Use Gauss-Hermite quadrature to form quadrature on positive-reals.
+
+  Note: for a given `quadrature_size`, this method is generally less accurate
+  than `quadrature_scheme_lognormal_quantiles`.
+
+  Args:
+    loc: `float`-like (batch of) scalar `Tensor`; the location parameter of
+      the LogNormal prior.
+    scale: `float`-like (batch of) scalar `Tensor`; the scale parameter of
+      the LogNormal prior.
+    quadrature_size: Python `int` scalar representing the number of quadrature
+      points.
+    validate_args: Python `bool`, default `False`. When `True` distribution
+      parameters are checked for validity despite possibly degrading runtime
+      performance. When `False` invalid inputs may silently render incorrect
+      outputs.
+    name: Python `str` name prefixed to Ops created by this class.
+
+  Returns:
+    grid: (Batch of) length-`quadrature_size` vectors representing the
+      `log_rate` parameters of a `Poisson`.
+    probs: (Batch of) length-`quadrature_size` vectors representing the
+      weight associate with each `grid` value.
+  """
+  with ops.name_scope(name, "vector_diffeomixture_quadrature_gauss_hermite",
+                      [loc, scale]):
+    grid, probs = np.polynomial.hermite.hermgauss(deg=quadrature_size)
+    grid = grid.astype(loc.dtype.as_numpy_dtype)
+    probs = probs.astype(loc.dtype.as_numpy_dtype)
+    probs /= np.linalg.norm(probs, ord=1, keepdims=True)
+    probs = ops.convert_to_tensor(probs, name="probs", dtype=loc.dtype)
+    # The following maps the broadcast of `loc` and `scale` to each grid
+    # point, i.e., we are creating several log-rates that correspond to the
+    # different Gauss-Hermite quadrature points and (possible) batches of
+    # `loc` and `scale`.
+    grid = (loc[..., array_ops.newaxis]
+            + np.sqrt(2.) * scale[..., array_ops.newaxis] * grid)
+    return grid, probs
+
+
+def quadrature_scheme_lognormal_quantiles(
+    loc, scale, quadrature_size,
+    validate_args=False, name=None):
+  """Use LogNormal quantiles to form quadrature on positive-reals.
+
+  Args:
+    loc: `float`-like (batch of) scalar `Tensor`; the location parameter of
+      the LogNormal prior.
+    scale: `float`-like (batch of) scalar `Tensor`; the scale parameter of
+      the LogNormal prior.
+    quadrature_size: Python `int` scalar representing the number of quadrature
+      points.
+    validate_args: Python `bool`, default `False`. When `True` distribution
+      parameters are checked for validity despite possibly degrading runtime
+      performance. When `False` invalid inputs may silently render incorrect
+      outputs.
+    name: Python `str` name prefixed to Ops created by this class.
+
+  Returns:
+    grid: (Batch of) length-`quadrature_size` vectors representing the
+      `log_rate` parameters of a `Poisson`.
+    probs: (Batch of) length-`quadrature_size` vectors representing the
+      weight associate with each `grid` value.
+  """
+  with ops.name_scope(name, "quadrature_scheme_lognormal_quantiles",
+                      [loc, scale]):
+    # Create a LogNormal distribution.
+    dist = transformed_lib.TransformedDistribution(
+        distribution=normal_lib.Normal(loc=loc, scale=scale),
+        bijector=Exp(event_ndims=0),
+        validate_args=validate_args)
+    batch_ndims = dist.batch_shape.ndims
+    if batch_ndims is None:
+      batch_ndims = array_ops.shape(dist.batch_shape_tensor())[0]
+
+    def _compute_quantiles():
+      """Helper to build quantiles."""
+      # Omit {0, 1} since they might lead to Inf/NaN.
+      zero = array_ops.zeros([], dtype=dist.dtype)
+      edges = math_ops.linspace(zero, 1., quadrature_size + 3)[1:-1]
+      # Expand edges so its broadcast across batch dims.
+      edges = array_ops.reshape(edges, shape=array_ops.concat([
+          [-1], array_ops.ones([batch_ndims], dtype=dtypes.int32)], axis=0))
+      quantiles = dist.quantile(edges)
+      # Cyclically permute left by one.
+      perm = array_ops.concat([
+          math_ops.range(1, 1 + batch_ndims), [0]], axis=0)
+      quantiles = array_ops.transpose(quantiles, perm)
+      return quantiles
+    quantiles = _compute_quantiles()
+
+    # Compute grid as quantile midpoints.
+    grid = (quantiles[..., :-1] + quantiles[..., 1:]) / 2.
+    # Set shape hints.
+    grid.set_shape(dist.batch_shape.concatenate([quadrature_size]))
+
+    # By construction probs is constant, i.e., `1 / quadrature_size`. This is
+    # important, because non-constant probs leads to non-reparameterizable
+    # samples.
+    probs = array_ops.fill(
+        dims=[quadrature_size],
+        value=1. / math_ops.cast(quadrature_size, dist.dtype))
+
+    return grid, probs
+
+
 class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
   """`PoissonLogNormalQuadratureCompound` distribution.
 
@@ -47,30 +161,18 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
   ```none
   p(k|loc, scale)
   = int_{R_+} dl LogNormal(l | loc, scale) Poisson(k | l)
-  = int_{R} dz ((lambda(z) sqrt(2) scale)
-                * exp(-z**2) / (lambda(z) sqrt(2 pi) sigma)
-                * Poisson(k | lambda(z)))
-  = int_{R} dz exp(-z**2) / sqrt(pi) Poisson(k | lambda(z))
   approx= sum{ prob[d] Poisson(k | lambda(grid[d])) : d=0, ..., deg-1 }
   ```
 
-  where `lambda(z) = exp(sqrt(2) scale z + loc)` and the `prob,grid` terms
-  are from [numerical quadrature](
-  https://en.wikipedia.org/wiki/Numerical_integration) (default:
-  [Gauss--Hermite quadrature](
-  https://en.wikipedia.org/wiki/Gauss%E2%80%93Hermite_quadrature)). Note that
-  the second line made the substitution:
-  `z(l) = (log(l) - loc) / (sqrt(2) scale)` which implies `lambda(z)` [above]
-  and `dl = sqrt(2) scale lambda(z) dz`
+  By default, the `grid` is chosen as quantiles of the `LogNormal` distribution
+  parameterized by `loc`, `scale` and the `prob` vector is
+  `[1. / quadrature_size]*quadrature_size`.
 
   In the non-approximation case, a draw from the LogNormal prior represents the
   Poisson rate parameter. Unfortunately, the non-approximate distribution lacks
   an analytical probability density function (pdf). Therefore the
   `PoissonLogNormalQuadratureCompound` class implements an approximation based
-  on [numerical quadrature](
-  https://en.wikipedia.org/wiki/Numerical_integration) (default:
-  [Gauss--Hermite quadrature](
-  https://en.wikipedia.org/wiki/Gauss%E2%80%93Hermite_quadrature)).
+  on [quadrature](https://en.wikipedia.org/wiki/Numerical_integration).
 
   Note: although the `PoissonLogNormalQuadratureCompound` is approximately the
   Poisson-LogNormal compound distribution, it is itself a valid distribution.
@@ -84,10 +186,8 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
   https://en.wikipedia.org/wiki/Compound_probability_distribution). Using
   variable-substitution and [numerical quadrature](
   https://en.wikipedia.org/wiki/Numerical_integration) (default:
-  [Gauss--Hermite quadrature](
-  https://en.wikipedia.org/wiki/Gauss%E2%80%93Hermite_quadrature)) we can
-  redefine the distribution to be a parameter-less convex combination of `deg`
-  different Poisson samples.
+  based on `LogNormal` quantiles) we can redefine the distribution to be a
+  parameter-less convex combination of `deg` different Poisson samples.
 
   That is, defined over positive integers, this distribution is parameterized
   by a (batch of) `loc` and `scale` scalars.
@@ -96,46 +196,51 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
 
   ```none
   pdf(k | loc, scale, deg)
-    = sum{ prob[d] Poisson(k | lambda=exp(sqrt(2) scale grid[d] + loc))
+    = sum{ prob[d] Poisson(k | lambda=exp(grid[d]))
           : d=0, ..., deg-1 }
   ```
 
-  where, [e.g., `grid, w = numpy.polynomial.hermite.hermgauss(deg)`](
-  https://docs.scipy.org/doc/numpy-1.10.0/reference/generated/numpy.polynomial.hermite.hermgauss.html)
-  and `prob = w / sqrt(pi)`.
-
   #### Examples
 
   ```python
-  ds = tf.contrib.distributions
+  tfd = tf.contrib.distributions
+
   # Create two batches of PoissonLogNormalQuadratureCompounds, one with
   # prior `loc = 0.` and another with `loc = 1.` In both cases `scale = 1.`
-  pln = ds.PoissonLogNormalQuadratureCompound(
+  pln = tfd.PoissonLogNormalQuadratureCompound(
       loc=[0., -0.5],
       scale=1.,
-      quadrature_grid_and_probs=(
-        np.polynomial.hermite.hermgauss(deg=10)),
+      quadrature_size=10,
       validate_args=True)
   """
 
   def __init__(self,
                loc,
                scale,
-               quadrature_grid_and_probs=None,
+               quadrature_size=8,
+               quadrature_fn=quadrature_scheme_lognormal_quantiles,
                validate_args=False,
                allow_nan_stats=True,
                name="PoissonLogNormalQuadratureCompound"):
-    """Constructs the PoissonLogNormalQuadratureCompound on `R**k`.
+    """Constructs the PoissonLogNormalQuadratureCompound`.
+
+    Note: `probs` returned by (optional) `quadrature_fn` are presumed to be
+    either a length-`quadrature_size` vector or a batch of vectors in 1-to-1
+    correspondence with the returned `grid`. (I.e., broadcasting is only
+    partially supported.)
 
     Args:
       loc: `float`-like (batch of) scalar `Tensor`; the location parameter of
         the LogNormal prior.
       scale: `float`-like (batch of) scalar `Tensor`; the scale parameter of
         the LogNormal prior.
-      quadrature_grid_and_probs: Python pair of `float`-like `Tensor`s
-        representing the sample points and the corresponding (possibly
-        normalized) weight.  When `None`, defaults to:
-        `np.polynomial.hermite.hermgauss(deg=8)`.
+      quadrature_size: Python `int` scalar representing the number of quadrature
+        points.
+      quadrature_fn: Python callable taking `loc`, `scale`,
+        `quadrature_size`, `validate_args` and returning `tuple(grid, probs)`
+        representing the LogNormal grid and corresponding normalized weight.
+        normalized) weight.
+        Default value: `quadrature_scheme_lognormal_quantiles`.
       validate_args: Python `bool`, default `False`. When `True` distribution
         parameters are checked for validity despite possibly degrading runtime
         performance. When `False` invalid inputs may silently render incorrect
@@ -147,47 +252,41 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
       name: Python `str` name prefixed to Ops created by this class.
 
     Raises:
-      TypeError: if `loc.dtype != scale[0].dtype`.
+      TypeError: if `quadrature_grid` and `quadrature_probs` have different base
+        `dtype`.
     """
     parameters = locals()
     with ops.name_scope(name, values=[loc, scale]):
-      loc = ops.convert_to_tensor(loc, name="loc")
-      self._loc = loc
+      if loc is not None:
+        loc = ops.convert_to_tensor(loc, name="loc")
+      if scale is not None:
+        scale = ops.convert_to_tensor(
+            scale, dtype=None if loc is None else loc.dtype, name="scale")
+      self._quadrature_grid, self._quadrature_probs = tuple(quadrature_fn(
+          loc, scale, quadrature_size, validate_args))
+
+      dt = self._quadrature_grid.dtype
+      if dt.base_dtype != self._quadrature_probs.dtype.base_dtype:
+        raise TypeError("Quadrature grid dtype ({}) does not match quadrature "
+                        "probs dtype ({}).".format(
+                            dt.name, self._quadrature_probs.dtype.name))
 
-      scale = ops.convert_to_tensor(scale, name="scale")
-      self._scale = scale
-
-      dtype = loc.dtype.base_dtype
-      if dtype != scale.dtype.base_dtype:
-        raise TypeError(
-            "loc.dtype(\"{}\") does not match scale.dtype(\"{}\")".format(
-                loc.dtype.name, scale.dtype.name))
-
-      grid, probs = distribution_util.process_quadrature_grid_and_probs(
-          quadrature_grid_and_probs, dtype, validate_args)
-      self._quadrature_grid = grid
-      self._quadrature_probs = probs
-      self._quadrature_size = distribution_util.dimension_size(probs, axis=0)
+      self._distribution = poisson_lib.Poisson(
+          log_rate=self._quadrature_grid,
+          validate_args=validate_args,
+          allow_nan_stats=allow_nan_stats)
 
       self._mixture_distribution = categorical_lib.Categorical(
           logits=math_ops.log(self._quadrature_probs),
           validate_args=validate_args,
           allow_nan_stats=allow_nan_stats)
 
-      # The following maps the broadcast of `loc` and `scale` to each grid
-      # point, i.e., we are creating several log-rates that correspond to the
-      # different Gauss-Hermite quadrature points and (possible) batches of
-      # `loc` and `scale`.
-      self._log_rate = (loc[..., array_ops.newaxis]
-                        + np.sqrt(2.) * scale[..., array_ops.newaxis] * grid)
-
-      self._distribution = poisson_lib.Poisson(
-          log_rate=self._log_rate,
-          validate_args=validate_args,
-          allow_nan_stats=allow_nan_stats)
+      self._loc = loc
+      self._scale = scale
+      self._quadrature_size = quadrature_size
 
       super(PoissonLogNormalQuadratureCompound, self).__init__(
-          dtype=dtype,
+          dtype=dt,
           reparameterization_type=distribution_lib.NOT_REPARAMETERIZED,
           validate_args=validate_args,
           allow_nan_stats=allow_nan_stats,
@@ -197,12 +296,12 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
 
   @property
   def mixture_distribution(self):
-    """Distribution which randomly selects a Poisson with Gauss-Hermite rate."""
+    """Distribution which randomly selects a Poisson with quadrature param."""
     return self._mixture_distribution
 
   @property
   def distribution(self):
-    """Base Poisson parameterized by a Gauss-Hermite grid of rates."""
+    """Base Poisson parameterized by a quadrature grid."""
     return self._distribution
 
   @property
@@ -216,24 +315,18 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
     return self._scale
 
   @property
-  def quadrature_grid(self):
-    """Quadrature grid points."""
-    return self._quadrature_grid
-
-  @property
-  def quadrature_probs(self):
-    """Quadrature normalized weights."""
-    return self._quadrature_probs
+  def quadrature_size(self):
+    return self._quadrature_size
 
   def _batch_shape_tensor(self):
     return array_ops.broadcast_dynamic_shape(
-        array_ops.shape(self.loc),
-        array_ops.shape(self.scale))
+        self.distribution.batch_shape_tensor(),
+        array_ops.shape(self.mixture_distribution.logits))[:-1]
 
   def _batch_shape(self):
     return array_ops.broadcast_static_shape(
-        self.loc.shape,
-        self.scale.shape)
+        self.distribution.batch_shape,
+        self.mixture_distribution.logits.shape)[:-1]
 
   def _event_shape(self):
     return tensor_shape.scalar()
@@ -241,18 +334,31 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
   def _sample_n(self, n, seed=None):
     # Get ids as a [n, batch_size]-shaped matrix, unless batch_shape=[] then get
     # ids as a [n]-shaped vector.
-    batch_size = (np.prod(self.batch_shape.as_list(), dtype=np.int32)
-                  if self.batch_shape.is_fully_defined()
-                  else math_ops.reduce_prod(self.batch_shape_tensor()))
+    batch_size = self.batch_shape.num_elements()
+    if batch_size is None:
+      batch_size = math_ops.reduce_prod(self.batch_shape_tensor())
+    # We need to "sample extra" from the mixture distribution if it doesn't
+    # already specify a probs vector for each batch coordinate.
+    # We only support this kind of reduced broadcasting, i.e., there is exactly
+    # one probs vector for all batch dims or one for each.
     ids = self._mixture_distribution.sample(
         sample_shape=concat_vectors(
             [n],
             distribution_util.pick_vector(
-                self.is_scalar_batch(),
-                np.int32([]),
-                [batch_size])),
+                self.mixture_distribution.is_scalar_batch(),
+                [batch_size],
+                np.int32([]))),
         seed=distribution_util.gen_new_seed(
             seed, "poisson_lognormal_quadrature_compound"))
+    # We need to flatten batch dims in case mixture_distribution has its own
+    # batch dims.
+    ids = array_ops.reshape(ids, shape=concat_vectors(
+        [n],
+        distribution_util.pick_vector(
+            self.is_scalar_batch(),
+            np.int32([]),
+            np.int32([-1]))))
+
     # Stride `quadrature_size` for `batch_size` number of times.
     offset = math_ops.range(start=0,
                             limit=batch_size * self._quadrature_size,
@@ -275,7 +381,7 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
   def _mean(self):
     return math_ops.exp(
         math_ops.reduce_logsumexp(
-            self.mixture_distribution.logits + self._log_rate,
+            self.mixture_distribution.logits + self.distribution.log_rate,
             axis=-1))
 
   def _variance(self):
@@ -300,7 +406,7 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
     # Var[E[Z | V]] = sum{ prob[d] (Mean[d] - Mean)**2 : d=0, ..., deg-1 }
     v = array_ops.stack([
         # log(self.distribution.variance()) = log(Var[d]) = log(rate[d])
-        self._log_rate,
+        self.distribution.log_rate,
         # log((Mean[d] - Mean)**2)
         2. * math_ops.log(
             math_ops.abs(self.distribution.mean()
@@ -311,14 +417,9 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution):
         axis=[-2, -1])
 
 
-def static_value(x):
-  """Returns the static value of a `Tensor` or `None`."""
-  return tensor_util.constant_value(ops.convert_to_tensor(x))
-
-
 def concat_vectors(*args):
   """Concatenates input vectors, statically if possible."""
-  args_ = [static_value(x) for x in args]
+  args_ = [distribution_util.static_value(x) for x in args]
   if any(vec is None for vec in args_):
     return array_ops.concat(args, axis=0)
   return [val for vec in args_ for val in vec]
diff --git a/tensorflow/contrib/distributions/python/ops/sample_stats.py b/tensorflow/contrib/distributions/python/ops/sample_stats.py
index 2a4b92c72900f79785e7e34b77179d3decbace5b..dfc813361977c159d8d48f9d5b9ff03db5b4acdc 100644
--- a/tensorflow/contrib/distributions/python/ops/sample_stats.py
+++ b/tensorflow/contrib/distributions/python/ops/sample_stats.py
@@ -28,12 +28,190 @@ from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import spectral_ops
+from tensorflow.python.ops.distributions import util
 
 __all__ = [
+    "auto_correlation",
     "percentile",
 ]
 
 
+# TODO(langmore) Write separate versions of this for real/complex dtype, taking
+# advantage of optimized real-fft ops.
+def auto_correlation(
+    x,
+    axis=-1,
+    max_lags=None,
+    center=True,
+    normalize=True,
+    name="auto_correlation"):
+  """Auto correlation along one axis.
+
+  Given a `1-D` wide sense stationary (WSS) sequence `X`, the auto correlation
+  `RXX` may be defined as  (with `E` expectation and `Conj` complex conjugate)
+
+  ```
+  RXX[m] := E{ W[m] Conj(W[0]) } = E{ W[0] Conj(W[-m]) },
+  W[n]   := (X[n] - MU) / S,
+  MU     := E{ X[0] },
+  S**2   := E{ (X[0] - MU) Conj(X[0] - MU) }.
+  ```
+
+  This function takes the viewpoint that `x` is (along one axis) a finite
+  sub-sequence of a realization of (WSS) `X`, and then uses `x` to produce an
+  estimate of `RXX[m]` as follows:
+
+  After extending `x` from length `L` to `inf` by zero padding, the auto
+  correlation estimate `rxx[m]` is computed for `m = 0, 1, ..., max_lags` as
+
+  ```
+  rxx[m] := (L - m)**-1 sum_n w[n + m] Conj(w[n]),
+  w[n]   := (x[n] - mu) / s,
+  mu     := L**-1 sum_n x[n],
+  s**2   := L**-1 sum_n (x[n] - mu) Conj(x[n] - mu)
+  ```
+
+  The error in this estimate is proportional to `1 / sqrt(len(x) - m)`, so users
+  often set `max_lags` small enough so that the entire output is meaningful.
+
+  Note that since `mu` is an imperfect estimate of `E{ X[0] }`, and we divide by
+  `len(x) - m` rather than `len(x) - m - 1`, our estimate of auto correlation
+  contains a slight bias, which goes to zero as `len(x) - m --> infinity`.
+
+  Args:
+    x:  `float32` or `complex64` `Tensor`.
+    axis:  Python `int`. The axis number along which to compute correlation.
+      Other dimensions index different batch members.
+    max_lags:  Positive `int` tensor.  The maximum value of `m` to consider
+      (in equation above).  If `max_lags >= x.shape[axis]`, we effectively
+      re-set `max_lags` to `x.shape[axis] - 1`.
+    center:  Python `bool`.  If `False`, do not subtract the mean estimate `mu`
+      from `x[n]` when forming `w[n]`.
+    normalize:  Python `bool`.  If `False`, do not divide by the variance
+      estimate `s**2` when forming `w[n]`.
+    name:  `String` name to prepend to created ops.
+
+  Returns:
+    `rxx`: `Tensor` of same `dtype` as `x`.  `rxx.shape[i] = x.shape[i]` for
+      `i != axis`, and `rxx.shape[axis] = max_lags + 1`.
+
+  Raises:
+    TypeError:  If `x` is not a supported type.
+  """
+  # Implementation details:
+  # Extend length N / 2 1-D array x to length N by zero padding onto the end.
+  # Then, set
+  #   F[x]_k := sum_n x_n exp{-i 2 pi k n / N }.
+  # It is not hard to see that
+  #   F[x]_k Conj(F[x]_k) = F[R]_k, where
+  #   R_m := sum_n x_n Conj(x_{(n - m) mod N}).
+  # One can also check that R_m / (N / 2 - m) is an unbiased estimate of RXX[m].
+
+  # Since F[x] is the DFT of x, this leads us to a zero-padding and FFT/IFFT
+  # based version of estimating RXX.
+  # Note that this is a special case of the Wiener-Khinchin Theorem.
+  with ops.name_scope(name, values=[x]):
+    x = ops.convert_to_tensor(x, name="x")
+
+    # Rotate dimensions of x in order to put axis at the rightmost dim.
+    # FFT op requires this.
+    rank = util.prefer_static_rank(x)
+    if axis < 0:
+      axis = rank + axis
+    shift = rank - 1 - axis
+    # Suppose x.shape[axis] = T, so there are T "time" steps.
+    #   ==> x_rotated.shape = B + [T],
+    # where B is x_rotated's batch shape.
+    x_rotated = util.rotate_transpose(x, shift)
+
+    if center:
+      x_rotated -= math_ops.reduce_mean(x_rotated, axis=-1, keepdims=True)
+
+    # x_len = N / 2 from above explanation.  The length of x along axis.
+    # Get a value for x_len that works in all cases.
+    x_len = util.prefer_static_shape(x_rotated)[-1]
+
+    # TODO(langmore) Investigate whether this zero padding helps or hurts.  At
+    # the moment is is necessary so that all FFT implementations work.
+    # Zero pad to the next power of 2 greater than 2 * x_len, which equals
+    # 2**(ceil(Log_2(2 * x_len))).  Note: Log_2(X) = Log_e(X) / Log_e(2).
+    x_len_float64 = math_ops.cast(x_len, np.float64)
+    target_length = math_ops.pow(
+        np.float64(2.),
+        math_ops.ceil(math_ops.log(x_len_float64 * 2) / np.log(2.)))
+    pad_length = math_ops.cast(target_length - x_len_float64, np.int32)
+
+    # We should have:
+    # x_rotated_pad.shape = x_rotated.shape[:-1] + [T + pad_length]
+    #                     = B + [T + pad_length]
+    x_rotated_pad = util.pad(x_rotated, axis=-1, back=True, count=pad_length)
+
+    dtype = x.dtype
+    if not dtype.is_complex:
+      if not dtype.is_floating:
+        raise TypeError("Argument x must have either float or complex dtype"
+                        " found: {}".format(dtype))
+      x_rotated_pad = math_ops.complex(x_rotated_pad,
+                                       dtype.real_dtype.as_numpy_dtype(0.))
+
+    # Autocorrelation is IFFT of power-spectral density (up to some scaling).
+    fft_x_rotated_pad = spectral_ops.fft(x_rotated_pad)
+    spectral_density = fft_x_rotated_pad * math_ops.conj(fft_x_rotated_pad)
+    # shifted_product is R[m] from above detailed explanation.
+    # It is the inner product sum_n X[n] * Conj(X[n - m]).
+    shifted_product = spectral_ops.ifft(spectral_density)
+
+    # Cast back to real-valued if x was real to begin with.
+    shifted_product = math_ops.cast(shifted_product, dtype)
+
+    # Figure out if we can deduce the final static shape, and set max_lags.
+    # Use x_rotated as a reference, because it has the time dimension in the far
+    # right, and was created before we performed all sorts of crazy shape
+    # manipulations.
+    know_static_shape = True
+    if not x_rotated.shape.is_fully_defined():
+      know_static_shape = False
+    if max_lags is None:
+      max_lags = x_len - 1
+    else:
+      max_lags = ops.convert_to_tensor(max_lags, name="max_lags")
+      max_lags_ = tensor_util.constant_value(max_lags)
+      if max_lags_ is None or not know_static_shape:
+        know_static_shape = False
+        max_lags = math_ops.minimum(x_len - 1, max_lags)
+      else:
+        max_lags = min(x_len - 1, max_lags_)
+
+    # Chop off the padding.
+    # We allow users to provide a huge max_lags, but cut it off here.
+    # shifted_product_chopped.shape = x_rotated.shape[:-1] + [max_lags]
+    shifted_product_chopped = shifted_product[..., :max_lags + 1]
+
+    # If possible, set shape.
+    if know_static_shape:
+      chopped_shape = x_rotated.shape.as_list()
+      chopped_shape[-1] = min(x_len, max_lags + 1)
+      shifted_product_chopped.set_shape(chopped_shape)
+
+    # Recall R[m] is a sum of N / 2 - m nonzero terms x[n] Conj(x[n - m]).  The
+    # other terms were zeros arising only due to zero padding.
+    # `denominator = (N / 2 - m)` (defined below) is the proper term to
+    # divide by by to make this an unbiased estimate of the expectation
+    # E[X[n] Conj(X[n - m])].
+    x_len = math_ops.cast(x_len, dtype.real_dtype)
+    max_lags = math_ops.cast(max_lags, dtype.real_dtype)
+    denominator = x_len - math_ops.range(0., max_lags + 1.)
+    denominator = math_ops.cast(denominator, dtype)
+    shifted_product_rotated = shifted_product_chopped / denominator
+
+    if normalize:
+      shifted_product_rotated /= shifted_product_rotated[..., :1]
+
+    # Transpose dimensions back to those of x.
+    return util.rotate_transpose(shifted_product_rotated, -shift)
+
+
 # TODO(langmore) To make equivalent to numpy.percentile:
 #  Make work with a sequence of floats or single float for 'q'.
 #  Make work with "linear", "midpoint" interpolation. (linear should be default)
diff --git a/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py b/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py
index b05f15771a3a94779ffddea8f16ad2fa4ea2fdd1..c4b8f055b7fbc3f0835b503eddd7617610326d8c 100644
--- a/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py
+++ b/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py
@@ -115,7 +115,7 @@ class SinhArcsinh(transformed_distribution.TransformedDistribution):
       tailweight:  Tailweight parameter. Default is `1.0` (unchanged tailweight)
       distribution: `tf.Distribution`-like instance. Distribution that is
         transformed to produce this distribution.
-        Default is `ds.Normal(0., 1.)`.
+        Default is `tf.distributions.Normal(0., 1.)`.
         Must be a scalar-batch, scalar-event distribution.  Typically
         `distribution.reparameterization_type = FULLY_REPARAMETERIZED` or it is
         a function of non-trainable parameters. WARNING: If you backprop through
diff --git a/tensorflow/contrib/distributions/python/ops/test_util.py b/tensorflow/contrib/distributions/python/ops/test_util.py
index 77f2a39273dc365a4ac202d846dd2bc364655c86..15b0820cbdf560e04a304c40a47e541006523b6d 100644
--- a/tensorflow/contrib/distributions/python/ops/test_util.py
+++ b/tensorflow/contrib/distributions/python/ops/test_util.py
@@ -40,6 +40,7 @@ class DiscreteScalarDistributionTestHelpers(object):
   def run_test_sample_consistent_log_prob(
       self, sess_run_fn, dist,
       num_samples=int(1e5), num_threshold=int(1e3), seed=42,
+      batch_size=None,
       rtol=1e-2, atol=0.):
     """Tests that sample/log_prob are consistent with each other.
 
@@ -66,6 +67,8 @@ class DiscreteScalarDistributionTestHelpers(object):
       seed: Python `int` indicating the seed to use when sampling from `dist`.
         In general it is not recommended to use `None` during a test as this
         increases the likelihood of spurious test failure.
+      batch_size: Hint for unpacking result of samples. Default: `None` means
+        batch_size is inferred.
       rtol: Python `float`-type indicating the admissible relative error between
         analytical and sample statistics.
       atol: Python `float`-type indicating the admissible absolute error between
@@ -80,10 +83,11 @@ class DiscreteScalarDistributionTestHelpers(object):
     # Histogram only supports vectors so we call it once per batch coordinate.
     y = dist.sample(num_samples, seed=seed)
     y = array_ops.reshape(y, shape=[num_samples, -1])
-    batch_size = math_ops.reduce_prod(dist.batch_shape_tensor())
+    if batch_size is None:
+      batch_size = math_ops.reduce_prod(dist.batch_shape_tensor())
     batch_dims = array_ops.shape(dist.batch_shape_tensor())[0]
     edges_expanded_shape = 1 + array_ops.pad([-2], paddings=[[0, batch_dims]])
-    for b, x in enumerate(array_ops.unstack(y, axis=1)):
+    for b, x in enumerate(array_ops.unstack(y, num=batch_size, axis=1)):
       counts, edges = self.histogram(x)
       edges = array_ops.reshape(edges, edges_expanded_shape)
       probs = math_ops.exp(dist.log_prob(edges))
@@ -323,7 +327,7 @@ class VectorDistributionTestHelpers(object):
       num_samples=int(1e5),
       seed=24,
       rtol=1e-2,
-      atol=0.,
+      atol=0.1,
       cov_rtol=None,
       cov_atol=None):
     """Tests that sample/mean/covariance are consistent with each other.
diff --git a/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py b/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py
index 92043d6a08833888c36009261addca0d14949ea8..7ce8a83fd91e2dfaa0ccef633f803b3ae595e646 100644
--- a/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py
+++ b/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py
@@ -22,30 +22,176 @@ import numpy as np
 
 from tensorflow.contrib.distributions.python.ops import distribution_util
 from tensorflow.contrib.distributions.python.ops.bijectors.affine_linear_operator import AffineLinearOperator
+from tensorflow.contrib.distributions.python.ops.bijectors.softmax_centered import SoftmaxCentered
 from tensorflow.contrib.linalg.python.ops import linear_operator_addition as linop_add_lib
-from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops.distributions import categorical as categorical_lib
 from tensorflow.python.ops.distributions import distribution as distribution_lib
+from tensorflow.python.ops.distributions import normal as normal_lib
 from tensorflow.python.ops.linalg import linear_operator_diag as linop_diag_lib
 from tensorflow.python.ops.linalg import linear_operator_full_matrix as linop_full_lib
 from tensorflow.python.ops.linalg import linear_operator_identity as linop_identity_lib
 from tensorflow.python.ops.linalg import linear_operator_lower_triangular as linop_tril_lib
 
-static_value = distribution_util.static_value
-
 
 __all__ = [
     "VectorDiffeomixture",
+    "quadrature_scheme_softmaxnormal_gauss_hermite",
+    "quadrature_scheme_softmaxnormal_quantiles",
 ]
 
 
+def quadrature_scheme_softmaxnormal_gauss_hermite(
+    loc, scale, quadrature_size,
+    validate_args=False, name=None):
+  """Use Gauss-Hermite quadrature to form quadrature on `K - 1` simplex.
+
+  Note: for a given `quadrature_size`, this method is generally less accurate
+  than `quadrature_scheme_softmaxnormal_quantiles`.
+
+  Args:
+    loc: `float`-like `Tensor` with shape `[b1, ..., bB, K-1]`, B>=0.
+      Represents the `location` parameter of the SoftmaxNormal used for
+      selecting one of the `K` affine transformations.
+    scale: `float`-like `Tensor` with shape `[b1, ..., bB, K-1]`, B>=0.
+      Represents the `scale` parameter of the SoftmaxNormal used for
+      selecting one of the `K` affine transformations.
+    quadrature_size: Python `int` scalar representing the number of quadrature
+      points.
+    validate_args: Python `bool`, default `False`. When `True` distribution
+      parameters are checked for validity despite possibly degrading runtime
+      performance. When `False` invalid inputs may silently render incorrect
+      outputs.
+    name: Python `str` name prefixed to Ops created by this class.
+
+  Returns:
+    grid: Shape `[b1, ..., bB, K, quadrature_size]` `Tensor` representing the
+      convex combination of affine parameters for `K` components.
+      `grid[..., :, n]` is the `n`-th grid point, living in the `K - 1` simplex.
+    probs:  Shape `[b1, ..., bB, K, quadrature_size]` `Tensor` representing the
+      associated with each grid point.
+  """
+  with ops.name_scope(name, "quadrature_scheme_softmaxnormal_gauss_hermite",
+                      [loc, scale]):
+    loc = ops.convert_to_tensor(loc, name="loc")
+    dt = loc.dtype.base_dtype
+    scale = ops.convert_to_tensor(scale, dtype=dt, name="scale")
+
+    loc = maybe_check_quadrature_param(loc, "loc", validate_args)
+    scale = maybe_check_quadrature_param(scale, "scale", validate_args)
+
+    grid, probs = np.polynomial.hermite.hermgauss(deg=quadrature_size)
+    grid = grid.astype(loc.dtype.as_numpy_dtype)
+    probs = probs.astype(loc.dtype.as_numpy_dtype)
+    probs /= np.linalg.norm(probs, ord=1, keepdims=True)
+    probs = ops.convert_to_tensor(probs, name="probs", dtype=loc.dtype)
+
+    grid = softmax(
+        -distribution_util.pad(
+            (loc[..., array_ops.newaxis] +
+             np.sqrt(2.) * scale[..., array_ops.newaxis] * grid),
+            axis=-2,
+            front=True),
+        axis=-2)  # shape: [B, components, deg]
+
+    return grid, probs
+
+
+def quadrature_scheme_softmaxnormal_quantiles(
+    loc, scale, quadrature_size,
+    validate_args=False, name=None):
+  """Use SoftmaxNormal quantiles to form quadrature on `K - 1` simplex.
+
+  Args:
+    loc: `float`-like `Tensor` with shape `[b1, ..., bB, K-1]`, B>=0.
+      Represents the `location` parameter of the SoftmaxNormal used for
+      selecting one of the `K` affine transformations.
+    scale: `float`-like `Tensor` with shape `[b1, ..., bB, K-1]`, B>=0.
+      Represents the `scale` parameter of the SoftmaxNormal used for
+      selecting one of the `K` affine transformations.
+    quadrature_size: Python scalar `int` representing the number of quadrature
+      points.
+    validate_args: Python `bool`, default `False`. When `True` distribution
+      parameters are checked for validity despite possibly degrading runtime
+      performance. When `False` invalid inputs may silently render incorrect
+      outputs.
+    name: Python `str` name prefixed to Ops created by this class.
+
+  Returns:
+    grid: Shape `[b1, ..., bB, K, quadrature_size]` `Tensor` representing the
+      convex combination of affine parameters for `K` components.
+      `grid[..., :, n]` is the `n`-th grid point, living in the `K - 1` simplex.
+    probs:  Shape `[b1, ..., bB, K, quadrature_size]` `Tensor` representing the
+      associated with each grid point.
+  """
+  with ops.name_scope(name, "softmax_normal_grid_and_probs", [loc, scale]):
+    loc = ops.convert_to_tensor(loc, name="loc")
+    dt = loc.dtype.base_dtype
+    scale = ops.convert_to_tensor(scale, dtype=dt, name="scale")
+
+    loc = maybe_check_quadrature_param(loc, "loc", validate_args)
+    scale = maybe_check_quadrature_param(scale, "scale", validate_args)
+
+    dist = normal_lib.Normal(loc=loc, scale=scale)
+
+    def _get_batch_ndims():
+      """Helper to get dist.batch_shape.ndims, statically if possible."""
+      ndims = dist.batch_shape.ndims
+      if ndims is None:
+        ndims = array_ops.shape(dist.batch_shape_tensor())[0]
+      return ndims
+    batch_ndims = _get_batch_ndims()
+
+    def _get_final_shape(qs):
+      """Helper to build `TensorShape`."""
+      bs = dist.batch_shape.with_rank_at_least(1)
+      num_components = bs[-1].value
+      if num_components is not None:
+        num_components += 1
+      tail = tensor_shape.TensorShape([num_components, qs])
+      return bs[:-1].concatenate(tail)
+
+    def _compute_quantiles():
+      """Helper to build quantiles."""
+      # Omit {0, 1} since they might lead to Inf/NaN.
+      zero = array_ops.zeros([], dtype=dist.dtype)
+      edges = math_ops.linspace(zero, 1., quadrature_size + 3)[1:-1]
+      # Expand edges so its broadcast across batch dims.
+      edges = array_ops.reshape(edges, shape=array_ops.concat([
+          [-1], array_ops.ones([batch_ndims], dtype=dtypes.int32)], axis=0))
+      quantiles = dist.quantile(edges)
+      quantiles = SoftmaxCentered(event_ndims=1).forward(quantiles)
+      # Cyclically permute left by one.
+      perm = array_ops.concat([
+          math_ops.range(1, 1 + batch_ndims), [0]], axis=0)
+      quantiles = array_ops.transpose(quantiles, perm)
+      quantiles.set_shape(_get_final_shape(quadrature_size + 1))
+      return quantiles
+    quantiles = _compute_quantiles()
+
+    # Compute grid as quantile midpoints.
+    grid = (quantiles[..., :-1] + quantiles[..., 1:]) / 2.
+    # Set shape hints.
+    grid.set_shape(_get_final_shape(quadrature_size))
+
+    # By construction probs is constant, i.e., `1 / quadrature_size`. This is
+    # important, because non-constant probs leads to non-reparameterizable
+    # samples.
+    probs = array_ops.fill(
+        dims=[quadrature_size],
+        value=1. / math_ops.cast(quadrature_size, dist.dtype))
+
+    return grid, probs
+
+
 class VectorDiffeomixture(distribution_lib.Distribution):
   """VectorDiffeomixture distribution.
 
@@ -188,8 +334,7 @@ class VectorDiffeomixture(distribution_lib.Distribution):
   #### Examples
 
   ```python
-  ds = tf.contrib.distributions
-  la = tf.linalg
+  tfd = tf.contrib.distributions
 
   # Create two batches of VectorDiffeomixtures, one with mix_loc=[0.] and
   # another with mix_loc=[1]. In both cases, `K=2` and the affine
@@ -197,20 +342,20 @@ class VectorDiffeomixture(distribution_lib.Distribution):
   # k=0: loc=zeros(dims)  scale=LinearOperatorScaledIdentity
   # k=1: loc=[2.]*dims    scale=LinOpDiag
   dims = 5
-  vdm = ds.VectorDiffeomixture(
+  vdm = tfd.VectorDiffeomixture(
       mix_loc=[[0.], [1]],
       mix_scale=[1.],
-      distribution=ds.Normal(loc=0., scale=1.),
+      distribution=tfd.Normal(loc=0., scale=1.),
       loc=[
           None,  # Equivalent to `np.zeros(dims, dtype=np.float32)`.
           np.float32([2.]*dims),
       ],
       scale=[
-          la.LinearOperatorScaledIdentity(
+          tf.linalg.LinearOperatorScaledIdentity(
             num_rows=dims,
             multiplier=np.float32(1.1),
             is_positive_definite=True),
-          la.LinearOperatorDiag(
+          tf.linalg.LinearOperatorDiag(
             diag=np.linspace(2.5, 3.5, dims, dtype=np.float32),
             is_positive_definite=True),
       ],
@@ -223,17 +368,20 @@ class VectorDiffeomixture(distribution_lib.Distribution):
                distribution,
                loc=None,
                scale=None,
-               quadrature_grid_and_probs=None,
+               quadrature_size=8,
+               quadrature_fn=quadrature_scheme_softmaxnormal_quantiles,
                validate_args=False,
                allow_nan_stats=True,
                name="VectorDiffeomixture"):
-    """Constructs the VectorDiffeomixture on `R**k`.
+    """Constructs the VectorDiffeomixture on `R**d`.
 
     Args:
-      mix_loc: `float`-like `Tensor`. Represents the `location` parameter of the
-        SoftmaxNormal used for selecting one of the `K` affine transformations.
-      mix_scale: `float`-like `Tensor`. Represents the `scale` parameter of the
-        SoftmaxNormal used for selecting one of the `K` affine transformations.
+      mix_loc: `float`-like `Tensor` with shape `[b1, ..., bB, K-1]`. Represents
+        the `location` parameter of the SoftmaxNormal used for selecting one of
+        the `K` affine transformations.
+      mix_scale: `float`-like `Tensor` with shape `[b1, ..., bB, K-1]`.
+        Represents the `scale` parameter of the SoftmaxNormal used for selecting
+        one of the `K` affine transformations.
       distribution: `tf.Distribution`-like instance. Distribution from which `d`
         iid samples are used as input to the selected affine transformation.
         Must be a scalar-batch, scalar-event distribution.  Typically
@@ -252,10 +400,13 @@ class VectorDiffeomixture(distribution_lib.Distribution):
         `k`-th element represents the `scale` used for the `k`-th affine
         transformation. `LinearOperator`s must have shape `[B1, ..., Bb, d, d]`,
         `b >= 0`, i.e., characterizes `b`-batches of `d x d` matrices
-      quadrature_grid_and_probs: Python pair of `float`-like `Tensor`s
-        representing the sample points and the corresponding (possibly
-        normalized) weight.  When `None`, defaults to:
-        `np.polynomial.hermite.hermgauss(deg=8)`.
+      quadrature_size: Python `int` scalar representing number of
+        quadrature points.
+      quadrature_fn: Python callable taking `mix_loc`, `mix_scale`,
+        `quadrature_size`, `validate_args` and returning `tuple(grid, probs)`
+        representing the SoftmaxNormal grid and corresponding normalized weight.
+        normalized) weight.
+        Default value: `quadrature_scheme_softmaxnormal_quantiles`.
       validate_args: Python `bool`, default `False`. When `True` distribution
         parameters are checked for validity despite possibly degrading runtime
         performance. When `False` invalid inputs may silently render incorrect
@@ -322,11 +473,8 @@ class VectorDiffeomixture(distribution_lib.Distribution):
         raise NotImplementedError("Currently only bimixtures are supported; "
                                   "len(scale)={} is not 2.".format(len(scale)))
 
-      grid, probs = distribution_util.process_quadrature_grid_and_probs(
-          quadrature_grid_and_probs, dtype, validate_args)
-      self._quadrature_grid = grid
-      self._quadrature_probs = probs
-      self._quadrature_size = distribution_util.dimension_size(probs, axis=0)
+      self._grid, probs = tuple(quadrature_fn(
+          mix_loc, mix_scale, quadrature_size, validate_args))
 
       # Note: by creating the logits as `log(prob)` we ensure that
       # `self.mixture_distribution.logits` is equivalent to
@@ -336,22 +484,13 @@ class VectorDiffeomixture(distribution_lib.Distribution):
           validate_args=validate_args,
           allow_nan_stats=allow_nan_stats)
 
-      mix_loc = maybe_check_mix_param(
-          mix_loc, "mix_loc", dtype, validate_args)
-      mix_scale = maybe_check_mix_param(
-          mix_scale, "mix_scale", dtype, validate_args)
-
       asserts = distribution_util.maybe_check_scalar_distribution(
           distribution, dtype, validate_args)
       if asserts:
-        mix_loc = control_flow_ops.with_dependencies(asserts, mix_loc)
+        self._grid = control_flow_ops.with_dependencies(
+            asserts, self._grid)
       self._distribution = distribution
 
-      # shape: [B, deg]
-      self._interpolate_weight = math_ops.sigmoid(
-          mix_loc
-          + np.sqrt(2.) * mix_scale * grid)
-
       self._interpolated_affine = [
           AffineLinearOperator(shift=loc_,
                                scale=scale_,
@@ -359,15 +498,16 @@ class VectorDiffeomixture(distribution_lib.Distribution):
                                validate_args=validate_args,
                                name="interpolated_affine_{}".format(k))
           for k, (loc_, scale_) in enumerate(zip(
-              interpolate_loc(self._quadrature_size,
-                              self._interpolate_weight,
-                              loc),
-              interpolate_scale(self._quadrature_size,
-                                self._interpolate_weight,
-                                scale)))]
+              interpolate_loc(self._grid, loc),
+              interpolate_scale(self._grid, scale)))]
 
-      self._batch_shape_, self._event_shape_ = determine_batch_event_shapes(
-          mix_loc, mix_scale, self._endpoint_affine)
+      [
+          self._batch_shape_,
+          self._batch_shape_tensor_,
+          self._event_shape_,
+          self._event_shape_tensor_,
+      ] = determine_batch_event_shapes(self._grid,
+                                       self._endpoint_affine)
 
       super(VectorDiffeomixture, self).__init__(
           dtype=dtype,
@@ -386,8 +526,7 @@ class VectorDiffeomixture(distribution_lib.Distribution):
           allow_nan_stats=allow_nan_stats,
           parameters=parameters,
           graph_parents=(
-              [mix_loc, mix_scale]
-              + distribution._graph_parents  # pylint: disable=protected-access
+              distribution._graph_parents  # pylint: disable=protected-access
               + [loc_ for loc_ in loc if loc_ is not None]
               + [p for scale_ in scale for p in scale_.graph_parents]),
           name=name)
@@ -403,9 +542,9 @@ class VectorDiffeomixture(distribution_lib.Distribution):
     return self._distribution
 
   @property
-  def interpolate_weight(self):
+  def grid(self):
     """Grid of mixing probabilities, one for each grid point."""
-    return self._interpolate_weight
+    return self._grid
 
   @property
   def endpoint_affine(self):
@@ -417,27 +556,17 @@ class VectorDiffeomixture(distribution_lib.Distribution):
     """Affine transformation for each convex combination of `K` components."""
     return self._interpolated_affine
 
-  @property
-  def quadrature_grid(self):
-    """Quadrature grid points."""
-    return self._quadrature_grid
-
-  @property
-  def quadrature_probs(self):
-    """Quadrature normalized weights."""
-    return self._quadrature_probs
-
   def _batch_shape_tensor(self):
-    return self._batch_shape_
+    return self._batch_shape_tensor_
 
   def _batch_shape(self):
-    return tensor_shape.TensorShape(static_value(self._batch_shape_))
+    return self._batch_shape_
 
   def _event_shape_tensor(self):
-    return self._event_shape_
+    return self._event_shape_tensor_
 
   def _event_shape(self):
-    return tensor_shape.TensorShape(static_value(self._event_shape_))
+    return self._event_shape_
 
   def _sample_n(self, n, seed=None):
     x = self.distribution.sample(
@@ -450,25 +579,44 @@ class VectorDiffeomixture(distribution_lib.Distribution):
 
     # Get ids as a [n, batch_size]-shaped matrix, unless batch_shape=[] then get
     # ids as a [n]-shaped vector.
-    batch_size = reduce_prod(self.batch_shape_tensor())
-    ids = self._mixture_distribution.sample(
+    batch_size = self.batch_shape.num_elements()
+    if batch_size is None:
+      batch_size = array_ops.reduce_prod(self.batch_shape_tensor())
+    mix_batch_size = self.mixture_distribution.batch_shape.num_elements()
+    if mix_batch_size is None:
+      mix_batch_size = math_ops.reduce_prod(
+          self.mixture_distribution.batch_shape_tensor())
+    ids = self.mixture_distribution.sample(
         sample_shape=concat_vectors(
             [n],
             distribution_util.pick_vector(
                 self.is_scalar_batch(),
                 np.int32([]),
-                [batch_size])),
+                [batch_size // mix_batch_size])),
         seed=distribution_util.gen_new_seed(
             seed, "vector_diffeomixture"))
-
-    # Stride `quadrature_size` for `batch_size` number of times.
+    # We need to flatten batch dims in case mixture_distribution has its own
+    # batch dims.
+    ids = array_ops.reshape(ids, shape=concat_vectors(
+        [n],
+        distribution_util.pick_vector(
+            self.is_scalar_batch(),
+            np.int32([]),
+            np.int32([-1]))))
+
+    # Stride `components * quadrature_size` for `batch_size` number of times.
+    stride = self.grid.shape.with_rank_at_least(
+        2)[-2:].num_elements()
+    if stride is None:
+      stride = array_ops.reduce_prod(
+          array_ops.shape(self.grid)[-2:])
     offset = math_ops.range(start=0,
-                            limit=batch_size * self._quadrature_size,
-                            delta=self._quadrature_size,
+                            limit=batch_size * stride,
+                            delta=stride,
                             dtype=ids.dtype)
 
     weight = array_ops.gather(
-        array_ops.reshape(self.interpolate_weight, shape=[-1]),
+        array_ops.reshape(self.grid, shape=[-1]),
         ids + offset)
     weight = weight[..., array_ops.newaxis]
 
@@ -500,10 +648,7 @@ class VectorDiffeomixture(distribution_lib.Distribution):
         self.mixture_distribution.logits - fldj + log_prob, axis=-1)
 
   def _mean(self):
-    # Since we created logits to already be scaled, we can use exp which is
-    # slightly cheaper than `self.mixture_distribution.probs`.
-    p = math_ops.exp(self.mixture_distribution.logits)
-
+    p = self._expand_mix_distribution_probs()
     m = self._expand_base_distribution_mean()
     mean = None
     for k, aff in enumerate(self.interpolated_affine):
@@ -537,9 +682,7 @@ class VectorDiffeomixture(distribution_lib.Distribution):
         self._covariance_of_mean_given_quadrature_component(diag_only=True))
 
   def _mean_of_covariance_given_quadrature_component(self, diag_only):
-    # Since we created logits to already be scaled, we can use exp which is
-    # slightly cheaper than `self.mixture_distribution.probs`.
-    p = math_ops.exp(self.mixture_distribution.logits)
+    p = self.mixture_distribution.probs
 
     # To compute E[Cov(Z|V)], we'll add matrices within three categories:
     # scaled-identity, diagonal, and full. Then we'll combine these at the end.
@@ -611,10 +754,9 @@ class VectorDiffeomixture(distribution_lib.Distribution):
   def _covariance_of_mean_given_quadrature_component(self, diag_only):
     square = math_ops.square if diag_only else vec_osquare
 
-    # Since we created logits to already be scaled, we can use exp which is
-    # slightly cheaper than `self.mixture_distribution.probs`.
-    p = math_ops.exp(self.mixture_distribution.logits)
-
+    p = self._expand_mix_distribution_probs()
+    if not diag_only:
+      p = p[..., array_ops.newaxis, :]  # Assuming event.ndims=1.
     m = self._expand_base_distribution_mean()
 
     cov_e_z_given_v = None
@@ -638,17 +780,25 @@ class VectorDiffeomixture(distribution_lib.Distribution):
     m.set_shape(self.batch_shape.concatenate(self.event_shape))
     return m
 
-
-def maybe_check_mix_param(param, name, expected_base_dtype, validate_args):
-  """Helper which checks validity of `mix_loc` and `mix_scale` init args."""
+  def _expand_mix_distribution_probs(self):
+    p = self.mixture_distribution.probs  # [B, deg]
+    deg = p.shape.with_rank_at_least(1)[-1].value
+    if deg is None:
+      deg = array_ops.shape(p)[-1]
+    event_ndims = self.event_shape.ndims
+    if event_ndims is None:
+      event_ndims = array_ops.shape(self.event_shape_tensor())[0]
+    expand_shape = array_ops.concat([
+        self.mixture_distribution.batch_shape_tensor(),
+        array_ops.ones([event_ndims], dtype=dtypes.int32),
+        [deg],
+    ], axis=0)
+    return array_ops.reshape(p, shape=expand_shape)
+
+
+def maybe_check_quadrature_param(param, name, validate_args):
+  """Helper which checks validity of `loc` and `scale` init args."""
   with ops.name_scope(name="check_" + name, values=[param]):
-    param = ops.convert_to_tensor(param, dtype=expected_base_dtype, name=name)
-
-    if param.dtype.base_dtype != expected_base_dtype:
-      raise TypeError(
-          "dtype mismatch; {}.base_dtype=\"{}\" is not \"{}\".".format(
-              name, param.dtype.base_dtype.name, expected_base_dtype.name))
-
     assertions = []
     if param.shape.ndims is not None:
       if param.shape.ndims == 0:
@@ -679,79 +829,84 @@ def maybe_check_mix_param(param, name, expected_base_dtype, validate_args):
     return param
 
 
-def determine_batch_event_shapes(mix_loc, mix_scale, endpoint_affine):
+def determine_batch_event_shapes(grid, endpoint_affine):
   """Helper to infer batch_shape and event_shape."""
   with ops.name_scope(name="determine_batch_event_shapes"):
-    mix_batch_shape = distribution_util.prefer_static_broadcast_shape(
-        array_ops.shape(mix_loc, name="mix_loc_shape"),
-        array_ops.shape(mix_scale, name="mix_scale_shape"))
-    if isinstance(mix_batch_shape, tensor_shape.TensorShape):
-      mix_batch_shape = mix_batch_shape.with_rank_at_least(1)[:-1]
-    else:
-      s = static_value(mix_batch_shape)
-      if s is not None:
-        mix_batch_shape = ops.convert_to_tensor(
-            s[:-1], dtype=dtypes.int32, name="mix_batch_shape")
-      else:
-        mix_batch_shape = mix_batch_shape[:-1]
-
-    # We broadcast with a 1D constant to automatically make the result a
-    # TensorShape if possible.
-    batch_shape = distribution_util.prefer_static_broadcast_shape(
-        mix_batch_shape,
-        constant_op.constant([], dtype=dtypes.int32, name="batch_shape"))
-    event_shape = constant_op.constant(
-        [], dtype=dtypes.int32, name="event_shape")
+    # grid  # shape: [B, k, q]
+    # endpoint_affine     # len=k, shape: [B, d, d]
+    batch_shape = grid.shape[:-2]
+    batch_shape_tensor = array_ops.shape(grid)[:-2]
+    event_shape = None
+    event_shape_tensor = None
+
+    def _set_event_shape(shape, shape_tensor):
+      if event_shape is None:
+        return shape, shape_tensor
+      return (array_ops.broadcast_static_shape(event_shape, shape),
+              array_ops.broadcast_dynamic_shape(
+                  event_shape_tensor, shape_tensor))
+
     for aff in endpoint_affine:
-      b, e = distribution_util.shapes_from_loc_and_scale(aff.shift, aff.scale)
-      if batch_shape is None:
-        batch_shape = distribution_util.prefer_static_broadcast_shape(
-            mix_batch_shape, b)
-      else:
-        batch_shape = distribution_util.prefer_static_broadcast_shape(
-            batch_shape, b)
-      event_shape = distribution_util.prefer_static_broadcast_shape(
-          event_shape, e)
-    if isinstance(batch_shape, tensor_shape.TensorShape):
-      batch_shape = ops.convert_to_tensor(
-          batch_shape.as_list(), dtype=dtypes.int32, name="batch_shape")
-    if isinstance(event_shape, tensor_shape.TensorShape):
-      event_shape = ops.convert_to_tensor(
-          event_shape.as_list(), dtype=dtypes.int32, name="event_shape")
-    return batch_shape, event_shape
-
-
-def interpolate_loc(deg, interpolate_weight, loc):
+      if aff.shift is not None:
+        batch_shape = array_ops.broadcast_static_shape(
+            batch_shape, aff.shift.shape[:-1])
+        batch_shape_tensor = array_ops.broadcast_dynamic_shape(
+            batch_shape_tensor, array_ops.shape(aff.shift)[:-1])
+        event_shape, event_shape_tensor = _set_event_shape(
+            aff.shift.shape[-1:], array_ops.shape(aff.shift)[-1:])
+
+      if aff.scale is not None:
+        batch_shape = array_ops.broadcast_static_shape(
+            batch_shape, aff.scale.batch_shape)
+        batch_shape_tensor = array_ops.broadcast_dynamic_shape(
+            batch_shape_tensor, aff.scale.batch_shape_tensor())
+        event_shape, event_shape_tensor = _set_event_shape(
+            tensor_shape.TensorShape([aff.scale.range_dimension]),
+            aff.scale.range_dimension_tensor()[array_ops.newaxis])
+
+    return batch_shape, batch_shape_tensor, event_shape, event_shape_tensor
+
+
+def interpolate_loc(grid, loc):
   """Helper which interpolates between two locs."""
   if len(loc) != 2:
     raise NotImplementedError("Currently only bimixtures are supported; "
                               "len(scale)={} is not 2.".format(len(loc)))
-  with ops.name_scope("interpolate_loc", values=[interpolate_weight, loc]):
+  deg = grid.shape.with_rank_at_least(1)[-1].value
+  if deg is None:
+    raise ValueError("Num quadrature grid points must be known prior "
+                     "to graph execution.")
+  with ops.name_scope("interpolate_loc", values=[grid, loc]):
     if loc is None or loc[0] is None and loc[1] is None:
       return [None]*deg
-    w = interpolate_weight[..., array_ops.newaxis, :]  # shape: [B, 1, deg]
+    # shape: [B, 1, k, deg]
+    w = grid[..., array_ops.newaxis, :, :]
     loc = [x[..., array_ops.newaxis]                   # shape: [B, e, 1]
            if x is not None else None for x in loc]
     if loc[0] is None:
-      x = (1. - w) * loc[1]                            # shape: [B, e, deg]
+      x = w[..., 1, :] * loc[1]                        # shape: [B, e, deg]
     elif loc[1] is None:
-      x = w * loc[0]                                   # shape: [B, e, deg]
+      x = w[..., 0, :] * loc[0]                        # shape: [B, e, deg]
     else:
       delta = loc[0] - loc[1]
-      x = w * delta + loc[1]                           # shape: [B, e, deg]
+      x = w[..., 0, :] * delta + loc[1]                # shape: [B, e, deg]
     return [x[..., k] for k in range(deg)]             # list(shape:[B, e])
 
 
-def interpolate_scale(deg, interpolate_weight, scale):
+def interpolate_scale(grid, scale):
   """Helper which interpolates between two scales."""
   if len(scale) != 2:
     raise NotImplementedError("Currently only bimixtures are supported; "
                               "len(scale)={} is not 2.".format(len(scale)))
-  with ops.name_scope("interpolate_scale", values=[interpolate_weight]):
+  deg = grid.shape.with_rank_at_least(1)[-1].value
+  if deg is None:
+    raise ValueError("Num quadrature grid points must be known prior "
+                     "to graph execution.")
+  with ops.name_scope("interpolate_scale", values=[grid]):
     return [linop_add_lib.add_operators([
-        linop_scale(interpolate_weight[..., k], scale[0]),
-        linop_scale(1. - interpolate_weight[..., k], scale[1]),
-    ])[0] for k in range(deg)]
+        linop_scale(grid[..., k, q], s)
+        for k, s in enumerate(scale)
+    ])[0] for q in range(deg)]
 
 
 def linop_scale(w, op):
@@ -791,39 +946,12 @@ def linop_scale(w, op):
 
 def concat_vectors(*args):
   """Concatenates input vectors, statically if possible."""
-  args_ = [static_value(x) for x in args]
+  args_ = [distribution_util.static_value(x) for x in args]
   if any(vec is None for vec in args_):
     return array_ops.concat(args, axis=0)
   return [val for vec in args_ for val in vec]
 
 
-def reduce_prod(x):
-  """Same as `math_ops.reduce_prod` but statically if possible."""
-  x_ = static_value(x)
-  if x_ is not None:
-    return np.prod(x_, dtype=x.dtype.as_numpy_dtype)
-  return array_ops.reduce_prod(x)
-
-
-def ndims_from_shape(shape):
-  """Returns `Tensor`'s `rank` implied by a `Tensor` shape."""
-  if shape.shape.ndims not in (None, 1):
-    raise ValueError("input is not a valid shape: not 1D")
-  if not shape.dtype.is_integer:
-    raise TypeError("input is not a valid shape: wrong dtype")
-  if shape.shape.is_fully_defined():
-    return shape.shape.as_list()[0]
-  return array_ops.shape(shape)[0]
-
-
-def ndims(x):
-  """Returns rank, statically if possible."""
-  x = ops.convert_to_tensor(x)
-  if x.shape.ndims is not None:
-    return x.shape.ndims
-  return array_ops.rank(x)
-
-
 def add(x, y):
   """Adds inputs; interprets `None` as zero."""
   if x is None:
@@ -836,3 +964,18 @@ def add(x, y):
 def vec_osquare(x):
   """Computes the outer-product of a (batch of) vector, i.e., x.T x."""
   return x[..., :, array_ops.newaxis] * x[..., array_ops.newaxis, :]
+
+
+def softmax(x, axis, name=None):
+  """Equivalent to tf.nn.softmax but works around b/70297725."""
+  with ops.name_scope(name, "softmax", [x, axis]):
+    x = ops.convert_to_tensor(x, name="x")
+    ndims = (x.shape.ndims if x.shape.ndims is not None
+             else array_ops.rank(x, name="ndims"))
+    axis = ops.convert_to_tensor(axis, dtype=dtypes.int32, name="axis")
+    axis_ = tensor_util.constant_value(axis)
+    if axis_ is not None:
+      axis = np.int(ndims + axis_ if axis_ < 0 else axis_)
+    else:
+      axis = array_ops.where(axis < 0, ndims + axis, axis)
+  return nn_ops.softmax(x, axis=axis)
diff --git a/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py b/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py
index 356d78b67a8107750f68f7f84d73d1231f5b2b03..526fe2d39aef9aed833b889de80e849c469435e7 100644
--- a/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py
+++ b/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py
@@ -89,14 +89,13 @@ class VectorExponentialDiag(
   #### Examples
 
   ```python
-  ds = tf.contrib.distributions
-  la = tf.linalg
+  tfd = tf.contrib.distributions
 
   # Initialize a single 2-variate VectorExponential, supported on
   # {(x, y) in R^2 : x > 0, y > 0}.
 
   # The first component has pdf exp{-x}, the second 0.5 exp{-x / 2}
-  vex = ds.VectorExponentialDiag(scale_diag=[1., 2.])
+  vex = tfd.VectorExponentialDiag(scale_diag=[1., 2.])
 
   # Compute the pdf of an`R^2` observation; return a scalar.
   vex.prob([3., 4.]).eval()  # shape: []
@@ -107,7 +106,7 @@ class VectorExponentialDiag(
   scale_diag = [[1., 2, 3],
                 [0.5, 1, 1.5]]     # shape: [2, 3]
 
-  vex = ds.VectorExponentialDiag(loc, scale_diag)
+  vex = tfd.VectorExponentialDiag(loc, scale_diag)
 
   # Compute the pdf of two `R^3` observations; return a length-2 vector.
   x = [[1.9, 2.2, 3.1],
diff --git a/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py b/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py
index b313a851b381e5b3a057fd17e6c2ef4eb0fc34f1..9d5fd9ac4178a1ae29b1ce32f304b22fd3d234dc 100644
--- a/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py
+++ b/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py
@@ -107,16 +107,15 @@ class VectorExponentialLinearOperator(
   #### Examples
 
   ```python
-  ds = tf.contrib.distributions
-  la = tf.linalg
+  tfd = tf.contrib.distributions
 
   # Initialize a single 2-variate VectorExponential, supported on
   # {(x, y) in R^2 : x > 0, y > 0}.
   mat = [[1.0, 0.1],
          [0.1, 1.0]]
 
-  vex = ds.VectorExponentialLinearOperator(
-      scale=la.LinearOperatorFullMatrix(mat))
+  vex = tfd.VectorExponentialLinearOperator(
+      scale=tf.linalg.LinearOperatorFullMatrix(mat))
 
   # Compute the pdf of an`R^2` observation; return a scalar.
   vex.prob([1., 2.]).eval()  # shape: []
@@ -127,9 +126,9 @@ class VectorExponentialLinearOperator(
   scale_diag = [[1., 2, 3],
                 [0.5, 1, 1.5]]     # shape: [2, 3]
 
-  vex = ds.VectorExponentialLinearOperator(
+  vex = tfd.VectorExponentialLinearOperator(
       loc=mu,
-      scale=la.LinearOperatorDiag(scale_diag))
+      scale=tf.linalg.LinearOperatorDiag(scale_diag))
 
   # Compute the pdf of two `R^3` observations; return a length-2 vector.
   x = [[1.9, 2.2, 3.1],
diff --git a/tensorflow/contrib/distributions/python/ops/vector_laplace_diag.py b/tensorflow/contrib/distributions/python/ops/vector_laplace_diag.py
index 0e3867809a820f49cfa7f5282c47f786626481a6..8dd983b750d9b39775e570800006011f4968f7f3 100644
--- a/tensorflow/contrib/distributions/python/ops/vector_laplace_diag.py
+++ b/tensorflow/contrib/distributions/python/ops/vector_laplace_diag.py
@@ -101,10 +101,10 @@ class VectorLaplaceDiag(
   #### Examples
 
   ```python
-  ds = tf.contrib.distributions
+  tfd = tf.contrib.distributions
 
   # Initialize a single 2-variate VectorLaplace.
-  vla = ds.VectorLaplaceDiag(
+  vla = tfd.VectorLaplaceDiag(
       loc=[1., -1],
       scale_diag=[1, 2.])
 
@@ -118,7 +118,7 @@ class VectorLaplaceDiag(
   vla.prob([-1., 0]).eval()  # shape: []
 
   # Initialize a 3-batch, 2-variate scaled-identity VectorLaplace.
-  vla = ds.VectorLaplaceDiag(
+  vla = tfd.VectorLaplaceDiag(
       loc=[1., -1],
       scale_identity_multiplier=[1, 2., 3])
 
@@ -136,7 +136,7 @@ class VectorLaplaceDiag(
   vla.prob([-1., 0]).eval()  # shape: [3]
 
   # Initialize a 2-batch of 3-variate VectorLaplace's.
-  vla = ds.VectorLaplaceDiag(
+  vla = tfd.VectorLaplaceDiag(
       loc=[[1., 2, 3],
            [11, 22, 33]]           # shape: [2, 3]
       scale_diag=[[1., 2, 3],
diff --git a/tensorflow/contrib/distributions/python/ops/vector_laplace_linear_operator.py b/tensorflow/contrib/distributions/python/ops/vector_laplace_linear_operator.py
index c7abdbb4caf9bee4cbd5991eb5d652f20dd0f8d1..ec485c95c15da2794b67d2699d2bdd9db97bb6c4 100644
--- a/tensorflow/contrib/distributions/python/ops/vector_laplace_linear_operator.py
+++ b/tensorflow/contrib/distributions/python/ops/vector_laplace_linear_operator.py
@@ -109,8 +109,7 @@ class VectorLaplaceLinearOperator(
   #### Examples
 
   ```python
-  ds = tf.contrib.distributions
-  la = tf.linalg
+  tfd = tf.contrib.distributions
 
   # Initialize a single 3-variate VectorLaplace with some desired covariance.
   mu = [1., 2, 3]
@@ -124,9 +123,9 @@ class VectorLaplaceLinearOperator(
   #      [ 0.1, -0.3,  0.4]])
 
   # Divide scale by sqrt(2) so that the final covariance will be what we want.
-  vla = ds.VectorLaplaceLinearOperator(
+  vla = tfd.VectorLaplaceLinearOperator(
       loc=mu,
-      scale=la.LinearOperatorLowerTriangular(scale / tf.sqrt(2)))
+      scale=tf.linalg.LinearOperatorLowerTriangular(scale / tf.sqrt(2.)))
 
   # Covariance agrees with cholesky(cov) parameterization.
   vla.covariance().eval()
@@ -143,9 +142,9 @@ class VectorLaplaceLinearOperator(
   scale_diag = [[1., 2, 3],
                 [0.5, 1, 1.5]]     # shape: [2, 3]
 
-  vla = ds.VectorLaplaceLinearOperator(
+  vla = tfd.VectorLaplaceLinearOperator(
       loc=mu,
-      scale=la.LinearOperatorDiag(scale_diag))
+      scale=tf.linalg.LinearOperatorDiag(scale_diag))
 
   # Compute the pdf of two `R^3` observations; return a length-2 vector.
   x = [[-0.9, 0, 0.1],
diff --git a/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py b/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py
index 544a8710709a0afb56c6ae6f36d35de892e8e420..e1ccf116457a97261b9ce3965552764771d3bdd2 100644
--- a/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py
+++ b/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py
@@ -143,7 +143,7 @@ class VectorSinhArcsinhDiag(transformed_distribution.TransformedDistribution):
         broadcastable with `event_shape`.
       distribution: `tf.Distribution`-like instance. Distribution from which `k`
         iid samples are used as input to transformation `F`.  Default is
-        `ds.Normal(0., 1.)`.
+        `tf.distributions.Normal(loc=0., scale=1.)`.
         Must be a scalar-batch, scalar-event distribution.  Typically
         `distribution.reparameterization_type = FULLY_REPARAMETERIZED` or it is
         a function of non-trainable parameters. WARNING: If you backprop through
diff --git a/tensorflow/contrib/distributions/python/ops/vector_student_t.py b/tensorflow/contrib/distributions/python/ops/vector_student_t.py
index 29d41ab81c62d621c3c3533e1449341e9a085645..8c67647a618d22a58428d78865c4ebf7d98bdf9e 100644
--- a/tensorflow/contrib/distributions/python/ops/vector_student_t.py
+++ b/tensorflow/contrib/distributions/python/ops/vector_student_t.py
@@ -91,14 +91,14 @@ class _VectorStudentT(transformed_distribution.TransformedDistribution):
   Extra leading dimensions, if provided, allow for batches.
 
   ```python
-  ds = tf.contrib.distributions
+  tfd = tf.contrib.distributions
 
   # Initialize a single 3-variate vector Student's t-distribution.
   mu = [1., 2, 3]
   chol = [[1., 0, 0.],
           [1, 3, 0],
           [1, 2, 3]]
-  vt = ds.VectorStudentT(df=2, loc=mu, scale_tril=chol)
+  vt = tfd.VectorStudentT(df=2, loc=mu, scale_tril=chol)
 
   # Evaluate this on an observation in R^3, returning a scalar.
   vt.prob([-1., 0, 1])
@@ -107,7 +107,7 @@ class _VectorStudentT(transformed_distribution.TransformedDistribution):
   mu = [[1., 2, 3],
         [11, 22, 33]]
   chol = ...  # shape 2 x 3 x 3, lower triangular, positive diagonal.
-  vt = ds.VectorStudentT(loc=mu, scale_tril=chol)
+  vt = tfd.VectorStudentT(loc=mu, scale_tril=chol)
 
   # Evaluate this on a two observations, each in R^3, returning a length two
   # tensor.
diff --git a/tensorflow/contrib/eager/README.md b/tensorflow/contrib/eager/README.md
index dcc370cd00d5f93cd5b145a31fd58ef5041a86a8..09242ee47ddd044dfc99e22d5b7751a989c86485 100644
--- a/tensorflow/contrib/eager/README.md
+++ b/tensorflow/contrib/eager/README.md
@@ -76,3 +76,6 @@ For an introduction to eager execution in TensorFlow, see:
 ## Changelog
 
 - 2017/10/31: Initial preview release.
+- 2017/12/01: Example of dynamic neural network:
+  [SPINN: Stack-augmented Parser-Interpreter Neural Network](https://arxiv.org/abs/1603.06021).
+  See [README.md](python/examples/spinn/README.md) for details.
diff --git a/tensorflow/contrib/eager/proto/BUILD b/tensorflow/contrib/eager/proto/BUILD
new file mode 100644
index 0000000000000000000000000000000000000000..aedfec8924e7314addd22349c0576a84a58d9aa3
--- /dev/null
+++ b/tensorflow/contrib/eager/proto/BUILD
@@ -0,0 +1,24 @@
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library")
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
+
+tf_proto_library(
+    name = "checkpointable_object_graph_proto",
+    srcs = [
+        "checkpointable_object_graph.proto",
+    ],
+    visibility = ["//tensorflow/contrib/eager/python:__subpackages__"],
+)
diff --git a/tensorflow/contrib/eager/proto/checkpointable_object_graph.proto b/tensorflow/contrib/eager/proto/checkpointable_object_graph.proto
new file mode 100644
index 0000000000000000000000000000000000000000..c962638aa11c06dcd5be6a794314e029ae84e572
--- /dev/null
+++ b/tensorflow/contrib/eager/proto/checkpointable_object_graph.proto
@@ -0,0 +1,56 @@
+syntax = "proto3";
+
+option cc_enable_arenas = true;
+
+package tensorflow.contrib.eager;
+
+// Prototype for an addition to BundleHeaderProto which saves extra information
+// about the objects which own variables, allowing for more robust checkpoint
+// loading into modified programs.
+
+message CheckpointableObjectGraph {
+  message Object {
+    message ObjectReference {
+      // An index into `CheckpointableObjectGraph.nodes`, indicating the object
+      // being referenced.
+      int32 node_id = 1;
+      // A numeric identifier for this object within its parent.
+      int32 local_uid = 2;
+      // A user-provided name for the edge. May be blank/omitted, in which case
+      // there is no explicitly provided local name; fall back on local_uid.
+      string local_name = 3;
+    }
+
+    message VariableReference {
+      // A name for the variable which is unique within the object which owns
+      // it. Does not include a name_scope or variable_scope prefix.
+      string local_name = 1;
+      // The full name of the variable. Used to allow name-based loading of
+      // checkpoints which were saved using an object-based API.
+      string full_name = 2;
+    }
+
+    message SlotVariableReference {
+      // An index into `CheckpointableObjectGraph.nodes`, indicating the object
+      // which created the variable that this variable is slotting for.
+      int32 original_variable_node_id = 1;
+      // The local name of the variable being slotted for within the object that
+      // owns it.
+      string original_variable_local_name = 2;
+      // The name of the slot (e.g. "m"/"v").
+      string slot_name = 3;
+      // The full name of the slot variable. Used to allow name-based loading of
+      // checkpoints which were saved using an object-based API.
+      string full_name = 4;
+    }
+
+    // Objects which this object depends on.
+    repeated ObjectReference children = 1;
+    // Non-slot variables owned by this object.
+    repeated VariableReference variables = 2;
+    // Slot variables owned by this object.
+    repeated SlotVariableReference slot_variables = 3;
+  }
+
+  repeated Object nodes = 1;
+}
diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD
index bf2e883bc53c3281ef89d1200f5a089305ef3e72..e984c63af7ce2b32ab30121bf34bb2de4dfeb218 100644
--- a/tensorflow/contrib/eager/python/BUILD
+++ b/tensorflow/contrib/eager/python/BUILD
@@ -19,6 +19,8 @@ py_library(
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:numerics",
         "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:script_ops",
+        "//tensorflow/python:template",
         "//tensorflow/python:util",
         "//tensorflow/python:variable_scope",
         "//tensorflow/python/eager:backprop",
@@ -67,6 +69,7 @@ cuda_py_test(
     srcs = ["datasets_test.py"],
     additional_deps = [
         ":datasets",
+        "//tensorflow/contrib/lookup:lookup_py",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:math_ops",
@@ -103,37 +106,6 @@ cuda_py_test(
     ],
 )
 
-py_library(
-    name = "summary_writer",
-    srcs = ["summary_writer.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/contrib/summary:gen_summary_ops",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:init_ops",
-        "//tensorflow/python:resource_variable_ops",
-        "//tensorflow/python:state_ops",
-        "//tensorflow/python:summary_op_util",
-        "//tensorflow/python:variable_scope",
-        "//tensorflow/python/eager:context",
-    ],
-)
-
-cuda_py_test(
-    name = "summary_writer_test",
-    srcs = ["summary_writer_test.py"],
-    additional_deps = [
-        ":summary_writer",
-        "//third_party/py/numpy",
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python/eager:context",
-        "//tensorflow/python/eager:test",
-    ],
-)
-
 py_library(
     name = "metrics",
     srcs = [
@@ -232,6 +204,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":network",
+        "//tensorflow/contrib/layers:layers_py",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_test_lib",
@@ -246,6 +219,39 @@ py_test(
     ],
 )
 
+py_library(
+    name = "checkpointable",
+    srcs = ["checkpointable.py"],
+    srcs_version = "PY2AND3",
+    visibility = ["//tensorflow:internal"],
+    deps = [
+        "//tensorflow/contrib/eager/proto:checkpointable_object_graph_proto_py",
+        "//tensorflow/python:training",
+        "//tensorflow/python:variable_scope",
+    ],
+)
+
+py_test(
+    name = "checkpointable_test",
+    srcs = ["checkpointable_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":checkpointable",
+        ":network",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:layers",
+        "//tensorflow/python:training",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/eager:context",
+        "//tensorflow/python/eager:test",
+        "@six_archive//:six",
+    ],
+)
+
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/contrib/eager/python/checkpointable.py b/tensorflow/contrib/eager/python/checkpointable.py
new file mode 100644
index 0000000000000000000000000000000000000000..b141ffb2bc03b8e38f8481bc044c3aae7e156c15
--- /dev/null
+++ b/tensorflow/contrib/eager/python/checkpointable.py
@@ -0,0 +1,392 @@
+"""An object-local variable management scheme."""
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import re
+
+from tensorflow.contrib.eager.proto import checkpointable_object_graph_pb2
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.training import optimizer as optimizer_lib
+from tensorflow.python.training import saver as saver_lib
+
+_CheckpointableReference = collections.namedtuple(
+    "_CheckpointableReference",
+    [
+        "name",  # The local name if explicitly specified, else None.
+        "local_uid",  # 0 for the first dependency, 1 for the next, ... Used for
+        # routing checkpointed variables to their correct
+        # Checkpointables when "name" is not set (see docstring of
+        # `track_checkpointable`).
+        "ref"  # The Checkpointable object being referenced.
+    ])
+
+_OwnedVariable = collections.namedtuple(
+    "_OwnedVariable",
+    [
+        "name",  # The variable's (local) name.
+        "variable"  # The owned variable object.
+    ])
+
+# Validation regular expression for the local names of Checkpointable
+# objects. In particular, disallows "/" in names, and reserves
+# underscore-prefixed names.
+_VALID_LOCAL_NAME = re.compile(r"^[A-Za-z0-9.][A-Za-z0-9_.-]*$")
+
+# Keyword for identifying that the next bit of a checkpoint variable name is a
+# slot name. May not be the local name of a checkpointable. Checkpoint names for
+# slot variables look like:
+#
+#   <path to variable>/<_OPTIMIZER_SLOTS_NAME>/<path to optimizer>/<slot name>
+#
+# Where <path to variable> is a full path from the checkpoint root to the
+# variable being slotted for.
+_OPTIMIZER_SLOTS_NAME = "_OPTIMIZER_SLOT"
+
+
+class Checkpointable(object):
+  """Manages variables and dependencies on other objects.
+
+  To make reliable checkpoints, all `Checkpointable`s on which this object
+  depends must be registered in the constructor using `track_checkpointable` in
+  a deterministic order, and if possible they should be named. Variables may be
+  created using `add_variable` outside of the constructor and in any order, but
+  only these variables will be saved.
+  """
+
+  def __init__(self):
+    # Basically less useful OrderedDicts but without the reference cycles.
+    # TODO(allenl): Switch these to OrderedDict once TensorFlow supports only
+    # Python 3.6+.
+    self._checkpoint_dependencies = []  # A list of _CheckpointableReference
+    # objects.
+    self._dependency_names = set()
+    self._owned_variables = []  # A list of _OwnedVariable objects.
+    self._owned_variable_names = set()
+
+  def add_variable(self, name, shape, dtype=None, initializer=None, **kwargs):
+    """Create a new variable object to be saved with this `Checkpointable`.
+
+    If the user has requested that this object or another `Checkpointable` which
+    depends on this object be restored from a checkpoint (deferred loading
+    before variable object creation), `initializer` may be ignored and the value
+    from the checkpoint used instead.
+
+    Args:
+      name: A name for the variable. Must be unique within this object.
+      shape: The shape of the variable.
+      dtype: The data type of the variable.
+      initializer: The initializer to use. Ignored if deferred loading has been
+        requested.
+      **kwargs: Passed to get_variable.
+
+    Returns:
+      The new variable object.
+
+    Raises:
+      ValueError: If the variable name is not unique.
+    """
+    if name in self._owned_variable_names:
+      raise ValueError(
+          ("A variable named '%s' already exists in this Checkpointable, but "
+           "Checkpointable.add_variable called to create another with "
+           "that name. Variable names must be unique within a Checkpointable "
+           "object.") % (name,))
+    if "getter" in kwargs:
+      # Allow the getter to be overridden, typically because there is a need for
+      # compatibility with some other variable creation mechanism. This should
+      # be relatively uncommon in user code.
+      getter = kwargs.pop("getter")
+    else:
+      getter = variable_scope.get_variable
+    # TODO(allenl): handle deferred loading
+    new_variable = getter(
+        name=name, shape=shape, dtype=dtype, initializer=initializer, **kwargs)
+    self._owned_variables.append(
+        _OwnedVariable(name=name, variable=new_variable))
+    self._owned_variable_names.add(name)
+    return new_variable
+
+  def track_checkpointable(self, checkpointable, name=None):
+    """Declare a dependency on another `Checkpointable` object.
+
+    Indicates that checkpoints for this object should include variables from
+    `checkpointable`.
+
+    Variables in a checkpoint are mapped to `Checkpointable`s based on names if
+    provided when the checkpoint was written, but otherwise use the order those
+    `Checkpointable`s were declared as dependencies. Both `name` arguments and
+    the dependency declaration order should be deterministic.
+
+    There are two sufficient conditions to avoid breaking existing checkpoints
+    when modifying a class: (1) New dependencies must be declared after existing
+    dependencies, and (2) dependencies which were previously declared may never
+    be removed (a trivial placeholder with the same name may be used instead).
+
+    Args:
+      checkpointable: A `Checkpointable` which this object depends on.
+      name: A local name for `checkpointable`, used for loading checkpoints into
+        the correct objects. If provided, it must be unique within this
+        `Checkpointable`. If None, dependency declaration order is used instead.
+
+    Returns:
+      `checkpointable`, for convenience when declaring a dependency and
+      assigning to a member variable in one statement.
+
+    Raises:
+      RuntimeError: If __init__ was not called.
+      TypeError: If `checkpointable` does not inherit from `Checkpointable`.
+      ValueError: For invalid names.
+    """
+    if not hasattr(self, "_checkpoint_dependencies"):
+      raise RuntimeError("Need to call Checkpointable.__init__ before calling "
+                         "Checkpointable.track_checkpointable().")
+    if not isinstance(checkpointable, Checkpointable):
+      raise TypeError(
+          ("Checkpointable.track_checkpointable() passed type %s, not a "
+           "Checkpointable.") % (type(checkpointable),))
+    if name is not None:
+      if not _VALID_LOCAL_NAME.match(name):
+        raise ValueError(
+            ("Checkpointable names must match the regular expression '%s', but "
+             "got an invalid name '%s' instead.") % (_VALID_LOCAL_NAME.pattern,
+                                                     name))
+      if name in self._dependency_names:
+        raise ValueError(
+            ("Called Checkpointable.track_checkpointable() with name='%s', but "
+             "a Checkpointable with this name is already declared as a "
+             "dependency. If provided, names must be unique.") % (name,))
+      self._dependency_names.add(name)
+    self._checkpoint_dependencies.append(
+        _CheckpointableReference(
+            name=name,
+            ref=checkpointable,
+            # TODO(allenl): Should this be exposed to allow users to stop
+            # depending on things and still load checkpoints when not using
+            # names?
+            local_uid=len(self._checkpoint_dependencies)))
+    return checkpointable
+
+  @property
+  def checkpoint_dependencies(self):
+    """Other `Checkpointable` objects on which this object depends."""
+    return self._checkpoint_dependencies
+
+
+def _breadth_first_checkpointable_traversal(root_checkpointable):
+  """Find shortest paths to all variables owned by dependencies of root."""
+  bfs_sorted = []
+  root_checkpointable_reference = _CheckpointableReference(
+      name=None, local_uid=0, ref=root_checkpointable)
+  to_visit = collections.deque([root_checkpointable_reference])
+  path_to_root = {root_checkpointable_reference: ()}
+  while to_visit:
+    current_checkpointable = to_visit.popleft()
+    bfs_sorted.append(current_checkpointable)
+    for child_checkpointable in (
+        current_checkpointable.ref.checkpoint_dependencies):
+      if child_checkpointable not in path_to_root:
+        path_to_root[child_checkpointable] = (
+            path_to_root[current_checkpointable] + (child_checkpointable,))
+        to_visit.append(child_checkpointable)
+  return bfs_sorted, path_to_root
+
+
+def _object_prefix_from_path(path_to_root):
+  return "/".join((checkpointable.name if checkpointable.name else "_%d" % (
+      checkpointable.local_uid,)) for checkpointable in path_to_root)
+
+
+def _escape_variable_name(variable_name):
+  # We need to support slashes in variable names for compatibility, since this
+  # naming scheme is being patched in to things like Layer.add_variable where
+  # slashes were previously accepted. We also want to use slashes to indicate
+  # edges traversed to reach the variable, so we escape forward slashes in
+  # variable names.
+  return variable_name.replace("_S_", "_S_.").replace(r"/", r"_S__")
+
+
+def _variable_naming_for_object(path_to_root):
+  """Make a function for naming variables in an object."""
+  # Name non-slot variables:
+  #
+  #   <path to node>/<local variable name>
+  #
+  # <path to node> is not necessarily unique, but this is fine since we also
+  # save the graph of `Checkpointable`s with the checkpoint. Even if this path
+  # no longer exists because of a change in the Python program, we can look up
+  # the `Checkpointable` which owns the variable in the checkpoint's graph and
+  # use another path if one still exists.
+
+  object_prefix = _object_prefix_from_path(path_to_root)
+  if object_prefix:
+    object_prefix += "/"
+
+  def _name_single_variable(owned_variable):
+    """Names a variable within an object."""
+    return object_prefix + _escape_variable_name(owned_variable.name)
+
+  return _name_single_variable
+
+
+def _slot_variable_naming_for_optimizer(optimizer, path_to_root):
+  """Make a function for naming slot variables in an optimizer."""
+  # Name slot variables:
+  #
+  #   <variable name>/<_OPTIMIZER_SLOTS_NAME>/<optimizer path>/<slot name>
+  #
+  # where <variable name> is exactly the checkpoint name used for the original
+  # variable, including the path from the checkpoint root and the local name in
+  # the object which owns it. Note that we only save slot variables if the
+  # variable it's slotting for is also being saved.
+
+  optimizer_identifier = "/%s/%s/" % (_OPTIMIZER_SLOTS_NAME,
+                                      _object_prefix_from_path(path_to_root))
+
+  def _name_slot_variable(variable_path, slot_name):
+    """With an optimizer specified, name a slot variable."""
+
+    if not _VALID_LOCAL_NAME.match(slot_name):
+      # Slot variable names include the name of the slot. We need to
+      # validate that part of the name to be sure that the checkpoint name
+      # is a valid name scope name.
+      raise ValueError(
+          ("Could not save slot variables for optimizer %s, because its "
+           "slot name has invalid characters (got '%s', was expecting it "
+           "to match the regular expression '%s').") %
+          (optimizer, slot_name, _VALID_LOCAL_NAME.pattern))
+
+    return variable_path + optimizer_identifier + slot_name
+
+  return _name_slot_variable
+
+
+def _serialize_non_slot_variables(checkpointable_objects, path_to_root,
+                                  object_graph_proto):
+  """Name non-slot variables and add them to `object_graph_proto`."""
+  named_variables = {}
+  non_slot_variables = []
+  checkpoint_node_ids = {}
+
+  for checkpoint_id, checkpointable in enumerate(checkpointable_objects):
+    checkpoint_node_ids[checkpointable] = checkpoint_id
+
+  for checkpoint_id, checkpointable in enumerate(checkpointable_objects):
+    naming_scheme = _variable_naming_for_object(path_to_root[checkpointable])
+    object_proto = object_graph_proto.nodes.add()
+    for owned_variable in checkpointable.ref._owned_variables:  # pylint: disable=protected-access
+      variable_name = naming_scheme(owned_variable)
+      named_variables[variable_name] = owned_variable.variable
+      non_slot_variables.append((
+          variable_name,  # The variable's full checkpoint name
+          owned_variable,  # The variable's _OwnedVariable object
+          checkpoint_id))  # The checkpoint ID of the node which owns this
+      # variable.
+      variable_proto = object_proto.variables.add()
+      variable_proto.local_name = owned_variable.name
+      # Figure out the name-based Saver's name for this variable.
+      saver_dict = saver_lib.BaseSaverBuilder.OpListToDict(
+          [owned_variable.variable], convert_variable_to_tensor=False)
+      variable_full_name, = saver_dict.keys()
+      variable_proto.full_name = variable_full_name
+
+    for child in checkpointable.ref.checkpoint_dependencies:
+      child_proto = object_proto.children.add()
+      child_proto.node_id = checkpoint_node_ids[child]
+      child_proto.local_uid = child.local_uid
+      if child.name is not None:
+        child_proto.local_name = child.name
+  return named_variables, non_slot_variables
+
+
+def _serialize_slot_variables(checkpointable_objects, path_to_root,
+                              non_slot_variables, object_graph_proto):
+  """Name slot variables and add them to `object_graph_proto`."""
+  named_slot_variables = {}
+  for optimizer_checkpoint_id, checkpointable_ref in enumerate(
+      checkpointable_objects):
+    if isinstance(checkpointable_ref.ref, optimizer_lib.Optimizer):
+      optimizer_object_proto = object_graph_proto.nodes[optimizer_checkpoint_id]
+      naming_scheme = _slot_variable_naming_for_optimizer(
+          optimizer=checkpointable_ref.ref,
+          path_to_root=path_to_root[checkpointable_ref])
+      slot_names = checkpointable_ref.ref.get_slot_names()
+      for (variable_path, owned_variable,
+           original_node_checkpoint_id) in non_slot_variables:
+        for slot_name in slot_names:
+          slot_variable = checkpointable_ref.ref.get_slot(
+              owned_variable.variable, slot_name)
+          if slot_variable is not None:
+            checkpoint_name = naming_scheme(
+                variable_path=variable_path, slot_name=slot_name)
+            named_slot_variables[checkpoint_name] = slot_variable
+            slot_variable_proto = optimizer_object_proto.slot_variables.add()
+            slot_variable_proto.slot_name = slot_name
+            # Figure out the name-based Saver's name for this variable.
+            saver_dict = saver_lib.BaseSaverBuilder.OpListToDict(
+                [slot_variable], convert_variable_to_tensor=False)
+            slot_variable_full_name, = saver_dict.keys()
+            slot_variable_proto.full_name = slot_variable_full_name
+            slot_variable_proto.original_variable_local_name = (
+                owned_variable.name)
+            slot_variable_proto.original_variable_node_id = (
+                original_node_checkpoint_id)
+  return named_slot_variables
+
+
+# TODO(allenl): Convenience utility for saving multiple objects (i.e. construct
+# a root Checkpointable if passed a list of Checkpointables).
+def _serialize_object_graph(root_checkpointable):
+  """Determine checkpoint keys for variables and build a serialized graph.
+
+  Non-slot variables are keyed based on a shortest path from the root saveable
+  to the object which owns the variable (i.e. the one which called
+  `Checkpointable.add_variable` to create it).
+
+  Slot variables are keyed based on a shortest path to the variable being
+  slotted for, a shortest path to their optimizer, and the slot name.
+
+  Args:
+    root_checkpointable: A `Checkpointable` object whose variables (including
+      the variables of dependencies, recursively) should be saved.
+
+  Returns:
+    A tuple of (named_variables, object_graph_proto):
+      named_variables: A dictionary mapping names to variable objects.
+      object_graph_proto: A CheckpointableObjectGraph protocol buffer containing
+        the serialized object graph and variable references.
+
+  Raises:
+    ValueError: If there are invalid characters in an optimizer's slot names.
+  """
+  checkpointable_objects, path_to_root = (
+      _breadth_first_checkpointable_traversal(root_checkpointable))
+  object_graph_proto = (
+      checkpointable_object_graph_pb2.CheckpointableObjectGraph())
+
+  # Gather non-slot variables.
+  named_variables, non_slot_variables = _serialize_non_slot_variables(
+      checkpointable_objects, path_to_root, object_graph_proto)
+
+  # Gather slot variables which are associated with variables gathered above.
+  named_slot_variables = _serialize_slot_variables(
+      checkpointable_objects, path_to_root, non_slot_variables,
+      object_graph_proto)
+
+  named_variables.update(named_slot_variables)
+  return named_variables, object_graph_proto
diff --git a/tensorflow/contrib/eager/python/checkpointable_test.py b/tensorflow/contrib/eager/python/checkpointable_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..f820990bbe5fe6c9b4cdf890680aaad0847010c0
--- /dev/null
+++ b/tensorflow/contrib/eager/python/checkpointable_test.py
@@ -0,0 +1,277 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import functools
+import six
+
+from tensorflow.contrib.eager.python import checkpointable
+from tensorflow.contrib.eager.python import network as network_lib
+from tensorflow.python.eager import context
+from tensorflow.python.eager import test
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.layers import core
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+from tensorflow.python.training import adam
+from tensorflow.python.training import training_util
+
+
+class CheckpointableDenseLayer(core.Dense, checkpointable.Checkpointable):
+
+  def __init__(self, *args, **kwargs):
+    checkpointable.Checkpointable.__init__(self)
+    core.Dense.__init__(self, *args, **kwargs)
+
+  def add_variable(self, name, shape, **kwargs):
+    # Calls both Checkpointable.add_variable and Layer.add_variable. Eventually
+    # Layer.add_variable should inherit from Checkpointable and simply call
+    # super and then do post-processing.
+    return checkpointable.Checkpointable.add_variable(
+        self,
+        name=name,
+        shape=shape,
+        getter=functools.partial(core.Dense.add_variable, self),
+        **kwargs)
+
+
+# pylint: disable=not-callable
+class CheckpointableNetwork(network_lib.Network, checkpointable.Checkpointable):
+
+  def __init__(self):
+    network_lib.Network.__init__(self)
+    checkpointable.Checkpointable.__init__(self)
+
+  def track_layer(self, layer, name=None):
+    self.track_checkpointable(layer, name=name)
+    return super(CheckpointableNetwork, self).track_layer(layer)
+
+
+class CheckpointableAdam(adam.AdamOptimizer, checkpointable.Checkpointable):
+
+  def __init__(self, *args, **kwargs):
+    checkpointable.Checkpointable.__init__(self)
+    adam.AdamOptimizer.__init__(self, *args, **kwargs)
+
+  # NOTE: Copied from AdamOptimizer with modifications to use add_variable
+  # for non-slot variables. These contortions are necessary to maintain
+  # checkpoint compatibility with variable.name based saving.
+  def _create_slots(self, var_list):
+    # Create the beta1 and beta2 accumulators on the same device as the first
+    # variable. Sort the var_list to make sure this device is consistent across
+    # workers (these need to go on the same PS, otherwise some updates are
+    # silently ignored).
+    first_var = min(var_list, key=lambda x: x.name)
+
+    create_new = self._beta1_power is None
+    if not create_new and context.in_graph_mode():
+      create_new = (self._beta1_power.graph is not first_var.graph)
+
+    if create_new:
+      with ops.colocate_with(first_var):
+
+        def _variable_getter(name, shape, dtype, initializer):
+          del shape, dtype  # not used, but there for compatibility
+          return variable_scope.variable(
+              name=name, initial_value=initializer, trainable=False)
+
+        self._beta1_power = self.add_variable(
+            name="beta1_power",
+            shape=[],
+            initializer=self._beta1,
+            getter=_variable_getter)
+        self._beta2_power = self.add_variable(
+            name="beta2_power",
+            shape=[],
+            initializer=self._beta2,
+            getter=_variable_getter)
+    # Create slots for the first and second moments.
+    for v in var_list:
+      self._zeros_slot(v, "m", self._name)
+      self._zeros_slot(v, "v", self._name)
+
+  # TODO(allenl): Override slot variable creation (_get_or_make_slot,
+  # _get_or_make_slot_with_initializer, _zeros_slot) to allow deferred
+  # loading. Likely no need to run this through add_variable, since gathering
+  # slot variables is special cased anyway.
+
+
+class MyNetwork(CheckpointableNetwork):
+  """A concrete Network for testing."""
+
+  def __init__(self):
+    super(MyNetwork, self).__init__()
+    self._named = self.track_layer(
+        CheckpointableDenseLayer(1, use_bias=True), name="named_dense")
+    self._unnamed = self.track_layer(
+        CheckpointableDenseLayer(1, use_bias=False))
+
+  def call(self, values):
+    return self._unnamed(self._named(values))
+
+
+class Root(checkpointable.Checkpointable):
+  """A stand-in for a Trainer class."""
+
+  def __init__(self, optimizer, network):
+    super(Root, self).__init__()
+    self.track_checkpointable(optimizer, name="optimizer")
+    self.track_checkpointable(network, name="network")
+    self._global_step = None
+
+  @property
+  def global_step(self):
+    if self._global_step is None:
+      # Get the default create_global_step utility to actually call
+      # self.add_variable, by setting a custom getter.
+      def _owned_variable_as_custom_getter(getter, *args, **kwargs):
+        return self.add_variable(*args, getter=getter, **kwargs)
+
+      with variable_scope.variable_scope(
+          "", custom_getter=_owned_variable_as_custom_getter):
+        self._global_step = training_util.create_global_step()
+    return self._global_step
+
+
+class CheckpointNamingTests(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
+  def testNamingWithOptimizer(self):
+    input_value = constant_op.constant([[3.]])
+    network = MyNetwork()
+    # A nuisance Network using the same optimizer. Its slot variables should not
+    # go in the checkpoint, since it is never depended on.
+    other_network = MyNetwork()
+    optimizer = CheckpointableAdam(0.001)
+    root_checkpointable = Root(optimizer=optimizer, network=network)
+    if context.in_eager_mode():
+      optimizer.minimize(
+          lambda: network(input_value),
+          global_step=root_checkpointable.global_step)
+      optimizer.minimize(
+          lambda: other_network(input_value),
+          global_step=root_checkpointable.global_step)
+    else:
+      train_op = optimizer.minimize(
+          network(input_value), global_step=root_checkpointable.global_step)
+      optimizer.minimize(
+          other_network(input_value),
+          global_step=root_checkpointable.global_step)
+      self.evaluate(variables.global_variables_initializer())
+      self.evaluate(train_op)
+    named_variables, serialized_graph = checkpointable._serialize_object_graph(
+        root_checkpointable)
+    expected_checkpoint_names = (
+        # Created in the root node, so no prefix.
+        "global_step",
+        # No name provided to track_checkpointable(), so the position (1, after
+        # the named track_checkpointable() which is 0) is used instead.
+        "network/_1/kernel",
+        # track_checkpointable() with a name provided, so that's used
+        "network/named_dense/kernel",
+        "network/named_dense/bias",
+        # The optimizer creates two non-slot variables
+        "optimizer/beta1_power",
+        "optimizer/beta2_power",
+        # Slot variables
+        "network/_1/kernel/_OPTIMIZER_SLOT/optimizer/m",
+        "network/_1/kernel/_OPTIMIZER_SLOT/optimizer/v",
+        "network/named_dense/kernel/_OPTIMIZER_SLOT/optimizer/m",
+        "network/named_dense/kernel/_OPTIMIZER_SLOT/optimizer/v",
+        "network/named_dense/bias/_OPTIMIZER_SLOT/optimizer/m",
+        "network/named_dense/bias/_OPTIMIZER_SLOT/optimizer/v",
+    )
+    six.assertCountEqual(self, expected_checkpoint_names,
+                         named_variables.keys())
+    # Check that we've mapped to the right variable objects (not exhaustive)
+    self.assertEqual("global_step:0", named_variables["global_step"].name)
+    self.assertEqual("my_network/checkpointable_dense_layer_1/kernel:0",
+                     named_variables["network/_1/kernel"].name)
+    self.assertEqual("my_network/checkpointable_dense_layer/kernel:0",
+                     named_variables["network/named_dense/kernel"].name)
+    self.assertEqual("beta1_power:0",
+                     named_variables["optimizer/beta1_power"].name)
+    self.assertEqual("beta2_power:0",
+                     named_variables["optimizer/beta2_power"].name)
+    # Spot check the generated protocol buffers.
+    self.assertEqual(0, serialized_graph.nodes[0].children[0].local_uid)
+    self.assertEqual("optimizer",
+                     serialized_graph.nodes[0].children[0].local_name)
+    optimizer_node = serialized_graph.nodes[serialized_graph.nodes[0].children[
+        0].node_id]
+    self.assertEqual("beta1_power", optimizer_node.variables[0].local_name)
+    self.assertEqual("beta1_power", optimizer_node.variables[0].full_name)
+    self.assertEqual(
+        "kernel", optimizer_node.slot_variables[0].original_variable_local_name)
+    original_variable_owner = serialized_graph.nodes[
+        optimizer_node.slot_variables[0].original_variable_node_id]
+    self.assertEqual("kernel", original_variable_owner.variables[0].local_name)
+    self.assertEqual("m", optimizer_node.slot_variables[0].slot_name)
+    # We strip off the :0 suffix, as variable.name-based saving does.
+    self.assertEqual("my_network/checkpointable_dense_layer/kernel/Adam",
+                     optimizer_node.slot_variables[0].full_name)
+    self.assertEqual("my_network/checkpointable_dense_layer/kernel/Adam:0",
+                     optimizer.get_slot(
+                         var=named_variables["network/named_dense/kernel"],
+                         name="m").name)
+
+  def _get_checkpoint_name(self, name):
+    root = checkpointable.Checkpointable()
+    with variable_scope.variable_scope("get_checkpoint_name"):
+      # Create the variable in a variable scope so that we get more relaxed
+      # naming rules (variables outside a scope may not start with "_", "/" or
+      # "-"). Since we don't use the scope part of the name, these cases are
+      # somewhat annoying.
+      root.add_variable(name=name, shape=[1, 2], dtype=dtypes.float64)
+    named_variables, _ = checkpointable._serialize_object_graph(root)
+    checkpoint_name, = named_variables.keys()
+    with ops.name_scope("root/" + checkpoint_name):
+      pass  # Make sure we can use this as an op name if we prefix it.
+    return checkpoint_name
+
+  @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
+  def testVariableNameEscaping(self):
+    self.assertEqual(r"a_S__b_S__c", self._get_checkpoint_name(r"a/b/c"))
+    self.assertEqual(r"", self._get_checkpoint_name(r""))
+    self.assertEqual(r"_S__", self._get_checkpoint_name(r"/"))
+    self.assertEqual(r"_S___S_._", self._get_checkpoint_name(r"/_S__"))
+
+  @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
+  def testNumberedPath(self):
+    root = checkpointable.Checkpointable()
+    leaf = checkpointable.Checkpointable()
+    root.track_checkpointable(leaf)
+    leaf.add_variable(name="v", shape=[])
+    named_variables, _ = checkpointable._serialize_object_graph(root)
+    variable_name, = named_variables.keys()
+    self.assertEqual(r"_0/v", variable_name)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testLocalNameValidation(self):
+    root = checkpointable.Checkpointable()
+    leaf = checkpointable.Checkpointable()
+    with self.assertRaisesRegexp(ValueError, "invalid name"):
+      # Leading underscores are reserved, which avoids conflicts with
+      # un-named edges in paths and the optimizer slots identifier.
+      root.track_checkpointable(leaf, name="_12")
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/eager/python/datasets.py b/tensorflow/contrib/eager/python/datasets.py
index b559cce6b12a809d671ce7855680063f02a4ac22..a7f50c13bb992fd47669fb9956dde6b271e16ffd 100644
--- a/tensorflow/contrib/eager/python/datasets.py
+++ b/tensorflow/contrib/eager/python/datasets.py
@@ -23,6 +23,7 @@ import threading
 from tensorflow.contrib.data.python.ops import prefetching_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.data.util import nest
+from tensorflow.python.data.util import sparse
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -41,7 +42,7 @@ def _generate_shared_name(prefix):
     global _uid_counter
     uid = _uid_counter
     _uid_counter += 1
-  return "{}_{}".format(prefix, uid)
+  return "{}{}".format(prefix, uid)
 
 
 class Iterator(object):
@@ -75,13 +76,16 @@ class Iterator(object):
           format(type(self)))
     with ops.device("/device:CPU:0"):
       ds_variant = dataset._as_variant_tensor()  # pylint: disable=protected-access
+      self._output_classes = dataset.output_classes
       self._output_types = dataset.output_types
       self._output_shapes = dataset.output_shapes
-      self._flat_output_types = nest.flatten(dataset.output_types)
-      self._flat_output_shapes = nest.flatten(dataset.output_shapes)
+      self._flat_output_types = nest.flatten(
+          sparse.as_dense_types(self._output_types, self._output_classes))
+      self._flat_output_shapes = nest.flatten(
+          sparse.as_dense_shapes(self._output_shapes, self._output_classes))
       self._resource = gen_dataset_ops.iterator(
-          container="",
-          shared_name=_generate_shared_name("eager_iterator"),
+          shared_name="",
+          container=_generate_shared_name("eageriterator"),
           output_types=self._flat_output_types,
           output_shapes=self._flat_output_shapes)
       gen_dataset_ops.make_iterator(ds_variant, self._resource)
@@ -125,22 +129,78 @@ class Iterator(object):
   def __next__(self):  # For Python 3 compatibility
     return self.next()
 
-  def next(self):
-    """Return the next tf.Tensor from the dataset."""
+  def _next_internal(self):
+    """Returns a nested structure of `tf.Tensor`s containing the next element.
+    """
     with ops.device(self._device):
-      try:
-        if self._buffer_resource_handle is not None:
-          ret = prefetching_ops.function_buffering_resource_get_next(
-              function_buffer_resource=self._buffer_resource_handle,
-              output_types=self._flat_output_types)
-        else:
-          # TODO(ashankar): Consider removing this ops.device() contextmanager
-          # and instead mimic ops placement in graphs: Operations on resource
-          # handles execute on the same device as where the resource is placed.
-          ret = gen_dataset_ops.iterator_get_next(
-              self._resource,
-              output_types=self._flat_output_types,
-              output_shapes=self._flat_output_shapes)
-      except errors.OutOfRangeError:
-        raise StopIteration
-      return nest.pack_sequence_as(self._output_types, ret)
+      if self._buffer_resource_handle is not None:
+        ret = prefetching_ops.function_buffering_resource_get_next(
+            function_buffer_resource=self._buffer_resource_handle,
+            output_types=self._flat_output_types)
+      else:
+        # TODO(ashankar): Consider removing this ops.device() contextmanager
+        # and instead mimic ops placement in graphs: Operations on resource
+        # handles execute on the same device as where the resource is placed.
+        ret = gen_dataset_ops.iterator_get_next(
+            self._resource,
+            output_types=self._flat_output_types,
+            output_shapes=self._flat_output_shapes)
+
+    return sparse.deserialize_sparse_tensors(
+        nest.pack_sequence_as(self._output_types, ret), self._output_types,
+        self._output_shapes, self._output_classes)
+
+  def next(self):
+    """Returns a nested structure of `tf.Tensor`s containing the next element.
+    """
+    try:
+      return self._next_internal()
+    except errors.OutOfRangeError:
+      raise StopIteration
+
+  @property
+  def output_classes(self):
+    """Returns the class of each component of an element of this iterator.
+
+    The expected values are `tf.Tensor` and `tf.SparseTensor`.
+
+    Returns:
+      A nested structure of Python `type` objects corresponding to each
+      component of an element of this dataset.
+    """
+    return self._output_classes
+
+  @property
+  def output_shapes(self):
+    """Returns the shape of each component of an element of this iterator.
+
+    Returns:
+      A nested structure of `tf.TensorShape` objects corresponding to each
+      component of an element of this dataset.
+    """
+    return self._output_shapes
+
+  @property
+  def output_types(self):
+    """Returns the type of each component of an element of this iterator.
+
+    Returns:
+      A nested structure of `tf.DType` objects corresponding to each component
+      of an element of this dataset.
+    """
+    return self._output_types
+
+  def get_next(self, name=None):
+    """Returns a nested structure of `tf.Tensor`s containing the next element.
+
+    Args:
+      name: (Optional.) A name for the created operation. Currently unused.
+
+    Returns:
+      A nested structure of `tf.Tensor` objects.
+
+    Raises:
+      `tf.errors.OutOfRangeError`: If the end of the dataset has been reached.
+    """
+    del name
+    return self._next_internal()
diff --git a/tensorflow/contrib/eager/python/datasets_test.py b/tensorflow/contrib/eager/python/datasets_test.py
index c924d81c9d85e638e4f35f260664c0ee7d03257e..a1611e92b113839c2dd2a3b2560b0ba90c0a7ef0 100644
--- a/tensorflow/contrib/eager/python/datasets_test.py
+++ b/tensorflow/contrib/eager/python/datasets_test.py
@@ -16,11 +16,19 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import time
+
+import numpy as np
+
+from tensorflow.contrib import lookup
 from tensorflow.contrib.eager.python import datasets
 from tensorflow.python.data import Dataset
 from tensorflow.python.eager import test
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import script_ops
 
@@ -33,6 +41,15 @@ class IteratorTest(test.TestCase):
       got.append(t.numpy())
     self.assertAllEqual([0, 1, 2, 3], got)
 
+  def testGetNext(self):
+    iterator = datasets.Iterator(Dataset.range(4))
+    self.assertEqual(0, iterator.get_next().numpy())
+    self.assertEqual(1, iterator.get_next().numpy())
+    self.assertEqual(2, iterator.get_next().numpy())
+    self.assertEqual(3, iterator.get_next().numpy())
+    with self.assertRaises(errors.OutOfRangeError):
+      iterator.get_next()
+
   def testMultipleIteratorsOnTheSameDataset(self):
     ds = Dataset.range(4)
     it1 = datasets.Iterator(ds)
@@ -64,6 +81,18 @@ class IteratorTest(test.TestCase):
     got = [x.numpy() for x in it]
     self.assertAllEqual([0, 4, 16, 36], got)
 
+  def testMapCaptureLookupTable(self):
+    default_val = -1
+    keys = constant_op.constant(['brain', 'salad', 'surgery'])
+    values = constant_op.constant([0, 1, 2], dtypes.int64)
+    table = lookup.HashTable(
+        lookup.KeyValueTensorInitializer(keys, values), default_val)
+    dataset = Dataset.from_tensor_slices(['brain', 'salad', 'surgery'])
+    dataset = dataset.map(table.lookup)
+    it = datasets.Iterator(dataset)
+    got = [x.numpy() for x in it]
+    self.assertAllEqual([0, 1, 2], got)
+
   def testMultipleIteratorsOnADatasetThatUsesFunctions(self):
     ds = Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6]).map(math_ops.square)
 
@@ -72,6 +101,53 @@ class IteratorTest(test.TestCase):
     got2 = [x.numpy() for x in datasets.Iterator(ds)]
     self.assertAllEqual(got1, got2)
 
+  def assertSparseValuesEqual(self, a, b):
+    self.assertAllEqual(a.indices, b.indices)
+    self.assertAllEqual(a.values, b.values)
+    self.assertAllEqual(a.dense_shape, b.dense_shape)
+
+  def testSparseTensorElements(self):
+    components = (sparse_tensor.SparseTensorValue(
+        indices=np.array([[0, 0], [1, 0], [2, 0]]),
+        values=np.array([0, 0, 0]),
+        dense_shape=np.array([3, 1])),
+                  sparse_tensor.SparseTensorValue(
+                      indices=np.array([[0, 0], [1, 1], [2, 2]]),
+                      values=np.array([1, 2, 3]),
+                      dense_shape=np.array([3, 3])))
+
+    expected = [
+        (sparse_tensor.SparseTensorValue(
+            indices=np.array([[0]]),
+            values=np.array([0]),
+            dense_shape=np.array([1])),
+         sparse_tensor.SparseTensorValue(
+             indices=np.array([[0]]),
+             values=np.array([1]),
+             dense_shape=np.array([3]))),
+        (sparse_tensor.SparseTensorValue(
+            indices=np.array([[0]]),
+            values=np.array([0]),
+            dense_shape=np.array([1])),
+         sparse_tensor.SparseTensorValue(
+             indices=np.array([[1]]),
+             values=np.array([2]),
+             dense_shape=np.array([3]))),
+        (sparse_tensor.SparseTensorValue(
+            indices=np.array([[0]]),
+            values=np.array([0]),
+            dense_shape=np.array([1])),
+         sparse_tensor.SparseTensorValue(
+             indices=np.array([[2]]),
+             values=np.array([3]),
+             dense_shape=np.array([3]))),
+    ]
+
+    for i, result in enumerate(
+        datasets.Iterator(Dataset.from_tensor_slices(components))):
+      self.assertSparseValuesEqual(expected[i][0], result[0])
+      self.assertSparseValuesEqual(expected[i][1], result[1])
+
   def testPyFunc(self):
 
     def my_map(inp):
@@ -90,5 +166,64 @@ class IteratorTest(test.TestCase):
     self.assertAllEqual([0., 2.], x.numpy())
 
 
+class DatasetConstructorBenchmark(test.Benchmark):
+
+  def benchmarkSliceRepeatBatchEager(self):
+    input_size = 10000
+    batch_size = 100
+    num_epochs = 100
+
+    input_data = np.random.randn(input_size)
+
+    dataset = (
+        Dataset.from_tensor_slices(input_data).repeat(num_epochs)
+        .batch(batch_size))
+    iterator = datasets.Iterator(dataset)
+
+    ends = [time.time()]
+    for _ in iterator:
+      ends.append(time.time())
+
+    deltas = np.ediff1d(ends)
+    median_wall_time = np.median(deltas)
+    print(
+        'Slice/repeat/batch eager input size: %d batch size: %d Median wall '
+        'time per element: %f'
+        % (input_size, batch_size, median_wall_time))
+    self.report_benchmark(
+        iters=len(deltas),
+        wall_time=median_wall_time,
+        name='benchmark_slice_repeat_batch_eager_input_%d_batch_%d' %
+        (input_size, batch_size))
+
+  def benchmarkSliceBatchCacheRepeatCallable(self):
+    input_size = 10000
+    batch_size = 100
+    num_epochs = 100
+
+    input_data = np.random.randn(input_size)
+
+    dataset = (
+        Dataset.from_tensor_slices(input_data).batch(batch_size).cache()
+        .repeat(num_epochs))
+    iterator = datasets.Iterator(dataset)
+
+    ends = [time.time()]
+    for _ in iterator:
+      ends.append(time.time())
+
+    deltas = np.ediff1d(ends)
+    median_wall_time = np.median(deltas)
+    print(
+        'Slice/batch/cache/repeat eager input size: %d batch size: %d Median '
+        'wall time per element: %f'
+        % (input_size, batch_size, median_wall_time))
+    self.report_benchmark(
+        iters=len(deltas),
+        wall_time=median_wall_time,
+        name='benchmark_slice_batch_cache_repeat_eager_input_%d_batch_%d' %
+        (input_size, batch_size))
+
+
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/contrib/eager/python/evaluator.py b/tensorflow/contrib/eager/python/evaluator.py
index bd0ab02ecf7ae6025e08dde1c3ddc634db9255c1..3faaeef5903615ea122800a6690117dde682e830 100644
--- a/tensorflow/contrib/eager/python/evaluator.py
+++ b/tensorflow/contrib/eager/python/evaluator.py
@@ -110,7 +110,7 @@ class Evaluator(object):
         return self._all_metric_results()
     else:
       def f():
-        with summary_ops.create_summary_file_writer(
+        with summary_ops.create_file_writer(
             summary_logdir).as_default(), summary_ops.always_record_summaries():
           return self._all_metric_results()
       if context.in_eager_mode():
diff --git a/tensorflow/contrib/eager/python/examples/BUILD b/tensorflow/contrib/eager/python/examples/BUILD
index aa21a6ab994acf929890ecebc07a86cf7ebf97db..15a21885f66eface291a39fa0ee1ff28bc297548 100644
--- a/tensorflow/contrib/eager/python/examples/BUILD
+++ b/tensorflow/contrib/eager/python/examples/BUILD
@@ -6,10 +6,12 @@ package(default_visibility = ["//tensorflow:internal"])
 py_library(
     name = "examples_pip",
     deps = [
+        "//tensorflow/contrib/eager/python/examples/gan:mnist",
         "//tensorflow/contrib/eager/python/examples/linear_regression",
         "//tensorflow/contrib/eager/python/examples/mnist",
         "//tensorflow/contrib/eager/python/examples/resnet50",
         "//tensorflow/contrib/eager/python/examples/rnn_colorbot",
         "//tensorflow/contrib/eager/python/examples/rnn_ptb",
+        "//tensorflow/contrib/eager/python/examples/spinn:data",
     ],
 )
diff --git a/tensorflow/contrib/eager/python/examples/gan/BUILD b/tensorflow/contrib/eager/python/examples/gan/BUILD
new file mode 100644
index 0000000000000000000000000000000000000000..c61ec2dbae60a782c0e6589701554b045dcb92ae
--- /dev/null
+++ b/tensorflow/contrib/eager/python/examples/gan/BUILD
@@ -0,0 +1,36 @@
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//tensorflow:internal"])
+
+load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+
+py_binary(
+    name = "mnist",
+    srcs = ["mnist.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/contrib/eager/python:tfe",
+        "//tensorflow/examples/tutorials/mnist:input_data",
+    ],
+)
+
+cuda_py_test(
+    name = "mnist_test",
+    srcs = ["mnist_test.py"],
+    additional_deps = [
+        ":mnist",
+        "//tensorflow/contrib/eager/python:tfe",
+        "//tensorflow:tensorflow_py",
+    ],
+)
+
+cuda_py_test(
+    name = "mnist_graph_test",
+    srcs = ["mnist_graph_test.py"],
+    additional_deps = [
+        ":mnist",
+        "//third_party/py/numpy",
+        "//tensorflow:tensorflow_py",
+    ],
+)
diff --git a/tensorflow/contrib/eager/python/examples/gan/README.md b/tensorflow/contrib/eager/python/examples/gan/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e8c9db1a1e2eb5881b08a4d3866c82b24d64be12
--- /dev/null
+++ b/tensorflow/contrib/eager/python/examples/gan/README.md
@@ -0,0 +1,38 @@
+# GAN with TensorFlow eager execution
+
+A simple Generative Adversarial Network (GAN) example using eager execution.
+The discriminator and generator networks each contain a few convolution and
+fully connected layers.
+
+Other eager execution examples can be found under the parent directory.
+
+##  Content
+
+- `mnist.py`: Model definitions and training routines.
+- `mnist_test.py`: Benchmarks for training and using the models using eager
+execution.
+- `mnist_graph_test.py`: Benchmarks for trainig and using the models using
+graph execution. The same model definitions and loss functions are used in
+all benchmarks.
+
+
+## To run
+
+- Make sure you have installed TensorFlow 1.5+ or the latest `tf-nightly`
+or `tf-nightly-gpu` pip package in order to access the eager execution feature.
+
+- Train model. E.g.,
+
+  ```bash
+  python mnist.py
+  ```
+  
+  Use `--output_dir=<DIR>` to direct the script to save TensorBoard summaries
+  during training. Disabled by default.
+  
+  Use `--checkpoint_dir=<DIR>` to direct the script to save checkpoints to
+  `<DIR>` during training. DIR defaults to /tmp/tensorflow/mnist/checkpoints/.
+  The script will load the   latest saved checkpoint from this directory if
+  one exists.
+  
+  Use `-h` for other options.
diff --git a/tensorflow/contrib/eager/python/examples/gan/mnist.py b/tensorflow/contrib/eager/python/examples/gan/mnist.py
new file mode 100644
index 0000000000000000000000000000000000000000..b9ac79f46c83bb709918e3b72830b90ddcfd71b4
--- /dev/null
+++ b/tensorflow/contrib/eager/python/examples/gan/mnist.py
@@ -0,0 +1,368 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""A deep MNIST classifier using convolutional layers.
+
+Sample usage:
+  python mnist.py --help
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import os
+import sys
+import time
+
+import tensorflow as tf
+
+import tensorflow.contrib.eager as tfe
+from tensorflow.examples.tutorials.mnist import input_data
+
+FLAGS = None
+
+
+class Discriminator(tfe.Network):
+  """GAN Discriminator.
+
+  A network to differentiate between generated and real handwritten digits.
+  """
+
+  def __init__(self, data_format):
+    """Creates a model for discriminating between real and generated digits.
+
+    Args:
+      data_format: Either 'channels_first' or 'channels_last'.
+        'channels_first' is typically faster on GPUs while 'channels_last' is
+        typically faster on CPUs. See
+        https://www.tensorflow.org/performance/performance_guide#data_formats
+    """
+    super(Discriminator, self).__init__(name='')
+    if data_format == 'channels_first':
+      self._input_shape = [-1, 1, 28, 28]
+    else:
+      assert data_format == 'channels_last'
+      self._input_shape = [-1, 28, 28, 1]
+    self.conv1 = self.track_layer(tf.layers.Conv2D(64, 5, padding='SAME',
+                                                   data_format=data_format,
+                                                   activation=tf.tanh))
+    self.pool1 = self.track_layer(
+        tf.layers.AveragePooling2D(2, 2, data_format=data_format))
+    self.conv2 = self.track_layer(tf.layers.Conv2D(128, 5,
+                                                   data_format=data_format,
+                                                   activation=tf.tanh))
+    self.pool2 = self.track_layer(
+        tf.layers.AveragePooling2D(2, 2, data_format=data_format))
+    self.flatten = self.track_layer(tf.layers.Flatten())
+    self.fc1 = self.track_layer(tf.layers.Dense(1024, activation=tf.tanh))
+    self.fc2 = self.track_layer(tf.layers.Dense(1, activation=None))
+
+  def call(self, inputs):
+    """Return two logits per image estimating input authenticity.
+
+    Users should invoke __call__ to run the network, which delegates to this
+    method (and not call this method directly).
+
+    Args:
+      inputs: A batch of images as a Tensor with shape [batch_size, 28, 28, 1]
+        or [batch_size, 1, 28, 28]
+
+    Returns:
+      A Tensor with shape [batch_size] containing logits estimating
+      the probability that corresponding digit is real.
+    """
+    x = tf.reshape(inputs, self._input_shape)
+    x = self.conv1(x)
+    x = self.pool1(x)
+    x = self.conv2(x)
+    x = self.pool2(x)
+    x = self.flatten(x)
+    x = self.fc1(x)
+    x = self.fc2(x)
+    return x
+
+
+class Generator(tfe.Network):
+  """Generator of handwritten digits similar to the ones in the MNIST dataset.
+  """
+
+  def __init__(self, data_format):
+    """Creates a model for discriminating between real and generated digits.
+
+    Args:
+      data_format: Either 'channels_first' or 'channels_last'.
+        'channels_first' is typically faster on GPUs while 'channels_last' is
+        typically faster on CPUs. See
+        https://www.tensorflow.org/performance/performance_guide#data_formats
+    """
+    super(Generator, self).__init__(name='')
+    self.data_format = data_format
+    # We are using 128 6x6 channels as input to the first deconvolution layer
+    if data_format == 'channels_first':
+      self._pre_conv_shape = [-1, 128, 6, 6]
+    else:
+      assert data_format == 'channels_last'
+      self._pre_conv_shape = [-1, 6, 6, 128]
+    self.fc1 = self.track_layer(tf.layers.Dense(6 * 6 * 128,
+                                                activation=tf.tanh))
+
+    # In call(), we reshape the output of fc1 to _pre_conv_shape
+
+    # Deconvolution layer. Resulting image shape: (batch, 14, 14, 64)
+    self.conv1 = self.track_layer(tf.layers.Conv2DTranspose(
+        64, 4, strides=2, activation=None, data_format=data_format))
+
+    # Deconvolution layer. Resulting image shape: (batch, 28, 28, 1)
+    self.conv2 = self.track_layer(tf.layers.Conv2DTranspose(
+        1, 2, strides=2, activation=tf.nn.sigmoid, data_format=data_format))
+
+  def call(self, inputs):
+    """Return a batch of generated images.
+
+    Users should invoke __call__ to run the network, which delegates to this
+    method (and not call this method directly).
+
+    Args:
+      inputs: A batch of noise vectors as a Tensor with shape
+        [batch_size, length of noise vectors].
+
+    Returns:
+      A Tensor containing generated images. If data_format is 'channels_last',
+      the shape of returned images is [batch_size, 28, 28, 1], else
+      [batch_size, 1, 28, 28]
+    """
+
+    x = self.fc1(inputs)
+    x = tf.reshape(x, shape=self._pre_conv_shape)
+    x = self.conv1(x)
+    x = self.conv2(x)
+    return x
+
+
+def discriminator_loss(discriminator_real_outputs, discriminator_gen_outputs):
+  """Original discriminator loss for GANs, with label smoothing.
+
+  See `Generative Adversarial Nets` (https://arxiv.org/abs/1406.2661) for more
+  details.
+
+  Args:
+    discriminator_real_outputs: Discriminator output on real data.
+    discriminator_gen_outputs: Discriminator output on generated data. Expected
+      to be in the range of (-inf, inf).
+
+  Returns:
+    A scalar loss Tensor.
+  """
+
+  loss_on_real = tf.losses.sigmoid_cross_entropy(
+      tf.ones_like(discriminator_real_outputs), discriminator_real_outputs,
+      label_smoothing=0.25)
+  loss_on_generated = tf.losses.sigmoid_cross_entropy(
+      tf.zeros_like(discriminator_gen_outputs), discriminator_gen_outputs)
+  loss = loss_on_real + loss_on_generated
+  tf.contrib.summary.scalar('discriminator_loss', loss)
+  return loss
+
+
+def generator_loss(discriminator_gen_outputs):
+  """Original generator loss for GANs.
+
+  L = -log(sigmoid(D(G(z))))
+
+  See `Generative Adversarial Nets` (https://arxiv.org/abs/1406.2661)
+  for more details.
+
+  Args:
+    discriminator_gen_outputs: Discriminator output on generated data. Expected
+      to be in the range of (-inf, inf).
+
+  Returns:
+    A scalar loss Tensor.
+  """
+  loss = tf.losses.sigmoid_cross_entropy(
+      tf.ones_like(discriminator_gen_outputs), discriminator_gen_outputs)
+  tf.contrib.summary.scalar('generator_loss', loss)
+  return loss
+
+
+def train_one_epoch(generator, discriminator,
+                    generator_optimizer, discriminator_optimizer,
+                    dataset, log_interval, noise_dim):
+  """Trains `generator` and `discriminator` models on `dataset`.
+
+  Args:
+    generator: Generator model.
+    discriminator: Discriminator model.
+    generator_optimizer: Optimizer to use for generator.
+    discriminator_optimizer: Optimizer to use for discriminator.
+    dataset: Dataset of images to train on.
+    log_interval: How many global steps to wait between logging and collecting
+      summaries.
+    noise_dim: Dimension of noise vector to use.
+  """
+
+  total_generator_loss = 0.0
+  total_discriminator_loss = 0.0
+  for (batch_index, images) in enumerate(tfe.Iterator(dataset)):
+    with tf.device('/cpu:0'):
+      tf.assign_add(tf.train.get_global_step(), 1)
+
+    with tf.contrib.summary.record_summaries_every_n_global_steps(log_interval):
+      current_batch_size = images.shape[0]
+      noise = tf.random_uniform(shape=[current_batch_size, noise_dim],
+                                minval=-1., maxval=1., seed=batch_index)
+
+      with tfe.GradientTape(persistent=True) as g:
+        generated_images = generator(noise)
+        tf.contrib.summary.image('generated_images',
+                                 tf.reshape(generated_images, [-1, 28, 28, 1]),
+                                 max_images=10)
+
+        discriminator_gen_outputs = discriminator(generated_images)
+        discriminator_real_outputs = discriminator(images)
+        discriminator_loss_val = discriminator_loss(discriminator_real_outputs,
+                                                    discriminator_gen_outputs)
+        total_discriminator_loss += discriminator_loss_val
+
+        generator_loss_val = generator_loss(discriminator_gen_outputs)
+        total_generator_loss += generator_loss_val
+
+      generator_grad = g.gradient(generator_loss_val, generator.variables)
+      discriminator_grad = g.gradient(discriminator_loss_val,
+                                      discriminator.variables)
+
+      with tf.variable_scope('generator'):
+        generator_optimizer.apply_gradients(zip(generator_grad,
+                                                generator.variables))
+      with tf.variable_scope('discriminator'):
+        discriminator_optimizer.apply_gradients(zip(discriminator_grad,
+                                                    discriminator.variables))
+
+      if log_interval and batch_index > 0 and batch_index % log_interval == 0:
+        print('Batch #%d\tAverage Generator Loss: %.6f\t'
+              'Average Discriminator Loss: %.6f' % (
+                  batch_index, total_generator_loss/batch_index,
+                  total_discriminator_loss/batch_index))
+
+
+def main(_):
+  (device, data_format) = ('/gpu:0', 'channels_first')
+  if FLAGS.no_gpu or tfe.num_gpus() <= 0:
+    (device, data_format) = ('/cpu:0', 'channels_last')
+  print('Using device %s, and data format %s.' % (device, data_format))
+
+  # Load the datasets
+  data = input_data.read_data_sets(FLAGS.data_dir)
+  dataset = (tf.data.Dataset
+             .from_tensor_slices(data.train.images)
+             .shuffle(60000)
+             .batch(FLAGS.batch_size))
+
+  # Create the models and optimizers
+  generator = Generator(data_format)
+  discriminator = Discriminator(data_format)
+  with tf.variable_scope('generator'):
+    generator_optimizer = tf.train.AdamOptimizer(FLAGS.lr)
+  with tf.variable_scope('discriminator'):
+    discriminator_optimizer = tf.train.AdamOptimizer(FLAGS.lr)
+
+  # Prepare summary writer and checkpoint info
+  summary_writer = tf.contrib.summary.create_summary_file_writer(
+      FLAGS.output_dir, flush_millis=1000)
+  checkpoint_prefix = os.path.join(FLAGS.checkpoint_dir, 'ckpt')
+  latest_cpkt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
+  if latest_cpkt:
+    print('Using latest checkpoint at ' + latest_cpkt)
+
+  with tf.device(device):
+    for epoch in range(1, 101):
+      with tfe.restore_variables_on_create(latest_cpkt):
+        global_step = tf.train.get_or_create_global_step()
+        start = time.time()
+        with summary_writer.as_default():
+          train_one_epoch(generator, discriminator, generator_optimizer,
+                          discriminator_optimizer,
+                          dataset, FLAGS.log_interval, FLAGS.noise)
+        end = time.time()
+        print('\nTrain time for epoch #%d (global step %d): %f' % (
+            epoch, global_step.numpy(), end - start))
+
+      all_variables = (
+          generator.variables
+          + discriminator.variables
+          + generator_optimizer.variables()
+          + discriminator_optimizer.variables()
+          + [global_step])
+      tfe.Saver(all_variables).save(
+          checkpoint_prefix, global_step=global_step)
+
+
+if __name__ == '__main__':
+  tfe.enable_eager_execution()
+
+  parser = argparse.ArgumentParser()
+  parser.add_argument(
+      '--data-dir',
+      type=str,
+      default='/tmp/tensorflow/mnist/input_data',
+      help=('Directory for storing input data (default '
+            '/tmp/tensorflow/mnist/input_data)'))
+  parser.add_argument(
+      '--batch-size',
+      type=int,
+      default=128,
+      metavar='N',
+      help='input batch size for training (default: 128)')
+  parser.add_argument(
+      '--log-interval',
+      type=int,
+      default=100,
+      metavar='N',
+      help=('number of batches between logging and writing summaries '
+            '(default: 100)'))
+  parser.add_argument(
+      '--output_dir',
+      type=str,
+      default=None,
+      metavar='DIR',
+      help='Directory to write TensorBoard summaries (defaults to none)')
+  parser.add_argument(
+      '--checkpoint_dir',
+      type=str,
+      default='/tmp/tensorflow/mnist/checkpoints/',
+      metavar='DIR',
+      help=('Directory to save checkpoints in (once per epoch) (default '
+            '/tmp/tensorflow/mnist/checkpoints/)'))
+  parser.add_argument(
+      '--lr',
+      type=float,
+      default=0.001,
+      metavar='LR',
+      help='learning rate (default: 0.001)')
+  parser.add_argument(
+      '--noise',
+      type=int,
+      default=100,
+      metavar='N',
+      help='Length of noise vector for generator input (default: 100)')
+  parser.add_argument(
+      '--no-gpu',
+      action='store_true',
+      default=False,
+      help='disables GPU usage even if a GPU is available')
+
+  FLAGS, unparsed = parser.parse_known_args()
+  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/contrib/eager/python/examples/gan/mnist_graph_test.py b/tensorflow/contrib/eager/python/examples/gan/mnist_graph_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..12b39b0cde49d4c017acfa74572c725036c54eff
--- /dev/null
+++ b/tensorflow/contrib/eager/python/examples/gan/mnist_graph_test.py
@@ -0,0 +1,151 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tempfile
+import time
+
+import numpy as np
+import tensorflow as tf
+
+from tensorflow.contrib.eager.python.examples.gan import mnist
+
+NOISE_DIM = 100
+# Big enough so that summaries are never recorded.
+# Lower this value if would like to benchmark with some summaries.
+SUMMARY_INTERVAL = 10000
+SUMMARY_FLUSH_MS = 100  # Flush summaries every 100ms
+
+
+def data_format():
+  return 'channels_first' if tf.test.is_gpu_available() else 'channels_last'
+
+
+class MnistGraphGanBenchmark(tf.test.Benchmark):
+
+  def _create_graph(self, batch_size):
+    # Generate some random data.
+    images_data = np.random.randn(batch_size, 784).astype(np.float32)
+    dataset = tf.data.Dataset.from_tensors(images_data)
+    images = dataset.repeat().make_one_shot_iterator().get_next()
+
+    # Create the models and optimizers
+    generator = mnist.Generator(data_format())
+    discriminator = mnist.Discriminator(data_format())
+    with tf.variable_scope('generator'):
+      generator_optimizer = tf.train.AdamOptimizer(0.001)
+    with tf.variable_scope('discriminator'):
+      discriminator_optimizer = tf.train.AdamOptimizer(0.001)
+
+    # Run models and compute loss
+    noise_placeholder = tf.placeholder(tf.float32,
+                                       shape=[batch_size, NOISE_DIM])
+    generated_images = generator(noise_placeholder)
+    tf.contrib.summary.image('generated_images',
+                             tf.reshape(generated_images, [-1, 28, 28, 1]),
+                             max_images=10)
+    discriminator_gen_outputs = discriminator(generated_images)
+    discriminator_real_outputs = discriminator(images)
+    generator_loss = mnist.generator_loss(discriminator_gen_outputs)
+    discriminator_loss = mnist.discriminator_loss(discriminator_real_outputs,
+                                                  discriminator_gen_outputs)
+    # Get train ops
+    with tf.variable_scope('generator'):
+      generator_train = generator_optimizer.minimize(
+          generator_loss, var_list=generator.variables)
+    with tf.variable_scope('discriminator'):
+      discriminator_train = discriminator_optimizer.minimize(
+          discriminator_loss, var_list=discriminator.variables)
+
+    return (generator_train, discriminator_train, noise_placeholder)
+
+  def _report(self, test_name, start, num_iters, batch_size):
+    avg_time = (time.time() - start) / num_iters
+    dev = 'gpu' if tf.test.is_gpu_available() else 'cpu'
+    name = 'graph_%s_%s_batch_%d_%s' % (test_name, dev, batch_size,
+                                        data_format())
+    extras = {'examples_per_sec': batch_size / avg_time}
+    self.report_benchmark(
+        iters=num_iters, wall_time=avg_time, name=name, extras=extras)
+
+  def benchmark_train(self):
+    for batch_size in [64, 128, 256]:
+      with tf.Graph().as_default():
+        global_step = tf.train.get_or_create_global_step()
+        increment_global_step = tf.assign_add(global_step, 1)
+        with tf.contrib.summary.create_file_writer(
+            tempfile.mkdtemp(), flush_millis=SUMMARY_FLUSH_MS).as_default(), (
+                tf.contrib.summary.record_summaries_every_n_global_steps(
+                    SUMMARY_INTERVAL)):
+          (generator_train, discriminator_train, noise_placeholder
+          ) = self._create_graph(batch_size)
+
+          with tf.Session() as sess:
+            tf.contrib.summary.initialize(graph=tf.get_default_graph(),
+                                          session=sess)
+
+            sess.run(tf.global_variables_initializer())
+
+            num_burn, num_iters = (3, 100)
+            for _ in range(num_burn):
+              noise = np.random.uniform(-1.0, 1.0, size=[batch_size, NOISE_DIM])
+              # Increment global step before evaluating summary ops to avoid
+              # race condition.
+              sess.run(increment_global_step)
+              sess.run([generator_train, discriminator_train,
+                        tf.contrib.summary.all_summary_ops()],
+                       feed_dict={noise_placeholder: noise})
+
+            # Run and benchmark 2 epochs
+            start = time.time()
+            for _ in range(num_iters):
+              noise = np.random.uniform(-1.0, 1.0, size=[batch_size, NOISE_DIM])
+              sess.run(increment_global_step)
+              sess.run([generator_train, discriminator_train,
+                        tf.contrib.summary.all_summary_ops()],
+                       feed_dict={noise_placeholder: noise})
+            self._report('train', start, num_iters, batch_size)
+
+  def benchmark_generate(self):
+    for batch_size in [64, 128, 256]:
+      with tf.Graph().as_default():
+        # Using random weights. This will generate garbage.
+        generator = mnist.Generator(data_format())
+        noise_placeholder = tf.placeholder(tf.float32,
+                                           shape=[batch_size, NOISE_DIM])
+        generated_images = generator(noise_placeholder)
+
+        init = tf.global_variables_initializer()
+        with tf.Session() as sess:
+          sess.run(init)
+          noise = np.random.uniform(-1.0, 1.0, size=[batch_size, NOISE_DIM])
+          num_burn, num_iters = (30, 1000)
+          for _ in range(num_burn):
+            sess.run(generated_images, feed_dict={noise_placeholder: noise})
+
+          start = time.time()
+          for _ in range(num_iters):
+            # Comparison with the eager execution benchmark in mnist_test.py
+            # isn't entirely fair as the time here includes the cost of copying
+            # the feeds from CPU memory to GPU.
+            sess.run(generated_images, feed_dict={noise_placeholder: noise})
+          self._report('generate', start, num_iters, batch_size)
+
+
+if __name__ == '__main__':
+  tf.test.main()
diff --git a/tensorflow/contrib/eager/python/examples/gan/mnist_test.py b/tensorflow/contrib/eager/python/examples/gan/mnist_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a3ca8d82bc2619b05a734f6d2e58431c1a45995
--- /dev/null
+++ b/tensorflow/contrib/eager/python/examples/gan/mnist_test.py
@@ -0,0 +1,113 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tempfile
+import time
+
+import tensorflow as tf
+
+import tensorflow.contrib.eager as tfe
+from tensorflow.contrib.eager.python.examples.gan import mnist
+
+NOISE_DIM = 100
+# Big enough so that summaries are never recorded.
+# Lower this value if would like to benchmark with some summaries.
+SUMMARY_INTERVAL = 10000
+SUMMARY_FLUSH_MS = 100  # Flush summaries every 100ms
+
+
+def data_format():
+  return 'channels_first' if tf.test.is_gpu_available() else 'channels_last'
+
+
+def device():
+  return '/gpu:0' if tfe.num_gpus() else '/cpu:0'
+
+
+class MnistEagerGanBenchmark(tf.test.Benchmark):
+
+  def _report(self, test_name, start, num_iters, batch_size):
+    avg_time = (time.time() - start) / num_iters
+    dev = 'gpu' if tfe.num_gpus() else 'cpu'
+    name = 'eager_%s_%s_batch_%d_%s' % (test_name, dev, batch_size,
+                                        data_format())
+    extras = {'examples_per_sec': batch_size / avg_time}
+    self.report_benchmark(
+        iters=num_iters, wall_time=avg_time, name=name, extras=extras)
+
+  def benchmark_train(self):
+    for batch_size in [64, 128, 256]:
+      # Generate some random data.
+      burn_batches, measure_batches = (3, 100)
+      burn_images = [tf.random_normal([batch_size, 784])
+                     for _ in range(burn_batches)]
+      burn_dataset = tf.data.Dataset.from_tensor_slices(burn_images)
+      measure_images = [tf.random_normal([batch_size, 784])
+                        for _ in range(measure_batches)]
+      measure_dataset = tf.data.Dataset.from_tensor_slices(measure_images)
+
+      tf.train.get_or_create_global_step()
+      with tf.device(device()):
+        # Create the models and optimizers
+        generator = mnist.Generator(data_format())
+        discriminator = mnist.Discriminator(data_format())
+        with tf.variable_scope('generator'):
+          generator_optimizer = tf.train.AdamOptimizer(0.001)
+        with tf.variable_scope('discriminator'):
+          discriminator_optimizer = tf.train.AdamOptimizer(0.001)
+
+        with tf.contrib.summary.create_file_writer(
+            tempfile.mkdtemp(), flush_millis=SUMMARY_FLUSH_MS).as_default():
+
+          # warm up
+          mnist.train_one_epoch(generator, discriminator, generator_optimizer,
+                                discriminator_optimizer,
+                                burn_dataset, log_interval=SUMMARY_INTERVAL,
+                                noise_dim=NOISE_DIM)
+          # measure
+          start = time.time()
+          mnist.train_one_epoch(generator, discriminator, generator_optimizer,
+                                discriminator_optimizer,
+                                measure_dataset, log_interval=SUMMARY_INTERVAL,
+                                noise_dim=NOISE_DIM)
+          self._report('train', start, measure_batches, batch_size)
+
+  def benchmark_generate(self):
+    for batch_size in [64, 128, 256]:
+      with tf.device(device()):
+        # Using random weights. This will generate garbage.
+        generator = mnist.Generator(data_format())
+
+        num_burn, num_iters = (30, 1000)
+        for _ in range(num_burn):
+          noise = tf.random_uniform(shape=[batch_size, NOISE_DIM],
+                                    minval=-1., maxval=1.)
+          generator(noise)
+
+        start = time.time()
+        for _ in range(num_iters):
+          noise = tf.random_uniform(shape=[batch_size, NOISE_DIM],
+                                    minval=-1., maxval=1.)
+          generator(noise)
+        self._report('generate', start, num_iters, batch_size)
+
+
+if __name__ == '__main__':
+  tfe.enable_eager_execution()
+  tf.test.main()
diff --git a/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py b/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py
index d0130ebd118dbaff4f0161c8b2528764c6103e02..f4b7d67f940f5d752e1d22d643b763e2d97e987e 100644
--- a/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py
+++ b/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py
@@ -41,7 +41,7 @@ class LinearModel(tfe.Network):
   For those familiar with TensorFlow graphs, notice the absence of
   `tf.Session`. The `forward()` method here immediately executes and
   returns output values. The `loss()` method immediately compares the
-  output of `forward()` with the target adn returns the MSE loss value.
+  output of `forward()` with the target and returns the MSE loss value.
   The `fit()` performs gradient-descent training on the model's weights
   and bias.
   """
@@ -85,7 +85,7 @@ def fit(model, dataset, optimizer, verbose=False, logdir=None):
   if logdir:
     # Support for TensorBoard summaries. Once training has started, use:
     #   tensorboard --logdir=<logdir>
-    summary_writer = tf.contrib.summary.create_summary_file_writer(logdir)
+    summary_writer = tf.contrib.summary.create_file_writer(logdir)
 
   # Training loop.
   for i, (xs, ys) in enumerate(tfe.Iterator(dataset)):
diff --git a/tensorflow/contrib/eager/python/examples/mnist/mnist.py b/tensorflow/contrib/eager/python/examples/mnist/mnist.py
index bfb7d5a9002787f6544d383de58150661ac2bde3..82b3d3919cf0176961853d2bd85802e5dafa789e 100644
--- a/tensorflow/contrib/eager/python/examples/mnist/mnist.py
+++ b/tensorflow/contrib/eager/python/examples/mnist/mnist.py
@@ -40,7 +40,7 @@ class MNISTModel(tfe.Network):
   """MNIST Network.
 
   Network structure is equivalent to:
-  https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/examples/tutorials/mnist/mnist_deep.py
+  https://github.com/tensorflow/tensorflow/blob/r1.5/tensorflow/examples/tutorials/mnist/mnist_deep.py
   and
   https://github.com/tensorflow/models/blob/master/tutorials/image/mnist/convolutional.py
 
@@ -190,9 +190,9 @@ def main(_):
   else:
     train_dir = None
     test_dir = None
-  summary_writer = tf.contrib.summary.create_summary_file_writer(
+  summary_writer = tf.contrib.summary.create_file_writer(
       train_dir, flush_millis=10000)
-  test_summary_writer = tf.contrib.summary.create_summary_file_writer(
+  test_summary_writer = tf.contrib.summary.create_file_writer(
       test_dir, flush_millis=10000, name='test')
   checkpoint_prefix = os.path.join(FLAGS.checkpoint_dir, 'ckpt')
 
diff --git a/tensorflow/contrib/eager/python/examples/mnist/mnist_test.py b/tensorflow/contrib/eager/python/examples/mnist/mnist_test.py
index 205709fe2edd3c260c30a84b624e322e120edf8e..136085eba21284a42282395e54f32c33bf63b5c3 100644
--- a/tensorflow/contrib/eager/python/examples/mnist/mnist_test.py
+++ b/tensorflow/contrib/eager/python/examples/mnist/mnist_test.py
@@ -39,22 +39,40 @@ def random_dataset():
   return tf.data.Dataset.from_tensors((images, labels))
 
 
+def train_one_epoch(defun=False):
+  model = mnist.MNISTModel(data_format())
+  if defun:
+    model.call = tfe.defun(model.call)
+  optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
+  dataset = random_dataset()
+  with tf.device(device()):
+    tf.train.get_or_create_global_step()
+    mnist.train_one_epoch(model, optimizer, dataset)
+
+
+def evaluate(defun=False):
+  model = mnist.MNISTModel(data_format())
+  dataset = random_dataset()
+  if defun:
+    model.call = tfe.defun(model.call)
+  with tf.device(device()):
+    tf.train.get_or_create_global_step()
+    mnist.test(model, dataset)
+
+
 class MNISTTest(tf.test.TestCase):
 
   def testTrainOneEpoch(self):
-    model = mnist.MNISTModel(data_format())
-    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
-    dataset = random_dataset()
-    with tf.device(device()):
-      tf.train.get_or_create_global_step()
-      mnist.train_one_epoch(model, optimizer, dataset)
+    train_one_epoch(defun=False)
 
   def testTest(self):
-    model = mnist.MNISTModel(data_format())
-    dataset = random_dataset()
-    with tf.device(device()):
-      tf.train.get_or_create_global_step()
-      mnist.test(model, dataset)
+    evaluate(defun=False)
+
+  def testTrainOneEpochWithDefunCall(self):
+    train_one_epoch(defun=True)
+
+  def testTestWithDefunCall(self):
+    evaluate(defun=True)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py
index 14c82c87a72457d414c4a1d3c53d4d1a68a400e6..23317886e712323f4b520000e0fd372734fc53a1 100644
--- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py
+++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py
@@ -73,7 +73,7 @@ class ResNet50GraphTest(tf.test.TestCase):
       tf.train.get_or_create_global_step()
       logdir = tempfile.mkdtemp()
       with tf.contrib.summary.always_record_summaries():
-        with tf.contrib.summary.create_summary_file_writer(
+        with tf.contrib.summary.create_file_writer(
             logdir, max_queue=0,
             name='t0').as_default():
           model = resnet50.ResNet50(data_format())
diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py
index 582f4837c6f3197081cb558063e963866d173f29..e2ae665a74fcf297b3174006783a7b8fed19ff03 100644
--- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py
+++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py
@@ -64,14 +64,22 @@ def train_one_step(model, images, labels, optimizer):
 
 class ResNet50Test(tf.test.TestCase):
 
-  def test_apply(self):
+  def _apply(self, defun=False):
     device, data_format = device_and_data_format()
     model = resnet50.ResNet50(data_format)
+    if defun:
+      model.call = tfe.defun(model.call)
     with tf.device(device):
       images, _ = random_batch(2)
       output = model(images)
     self.assertEqual((2, 1000), output.shape)
 
+  def test_apply(self):
+    self._apply(defun=False)
+
+  def test_apply_with_defun(self):
+    self._apply(defun=True)
+
   def test_apply_no_top(self):
     device, data_format = device_and_data_format()
     model = resnet50.ResNet50(data_format, include_top=False)
@@ -95,7 +103,7 @@ class ResNet50Test(tf.test.TestCase):
     model = resnet50.ResNet50(data_format)
     tf.train.get_or_create_global_step()
     logdir = tempfile.mkdtemp()
-    with tf.contrib.summary.create_summary_file_writer(
+    with tf.contrib.summary.create_file_writer(
         logdir, max_queue=0,
         name='t0').as_default(), tf.contrib.summary.always_record_summaries():
       with tf.device(device):
@@ -175,9 +183,11 @@ class ResNet50Benchmarks(tf.test.Benchmark):
     # a sync. This is a roundabout way, yes.
     tf.constant(1.).cpu()
 
-  def benchmark_eager_apply(self):
+  def _benchmark_eager_apply(self, label, defun=False):
     device, data_format = device_and_data_format()
     model = resnet50.ResNet50(data_format)
+    if defun:
+      model.call = tfe.defun(model.call)
     batch_size = 64
     num_burn = 5
     num_iters = 30
@@ -189,16 +199,23 @@ class ResNet50Benchmarks(tf.test.Benchmark):
       start = time.time()
       for _ in xrange(num_iters):
         model(images).cpu()
-      self._report('eager_apply', start, num_iters, device, batch_size,
-                   data_format)
+      self._report(label, start, num_iters, device, batch_size, data_format)
+
+  def benchmark_eager_apply(self):
+    self._benchmark_eager_apply('eager_apply', defun=False)
+
+  def benchmark_eager_apply_with_defun(self):
+    self._benchmark_eager_apply('eager_apply_with_defun', defun=True)
 
-  def _benchmark_eager_train(self, label, make_iterator):
+  def _benchmark_eager_train(self, label, make_iterator, defun=False):
     device, data_format = device_and_data_format()
     for batch_size in self._train_batch_sizes():
       (images, labels) = random_batch(batch_size)
       num_burn = 3
       num_iters = 10
       model = resnet50.ResNet50(data_format)
+      if defun:
+        model.call = tfe.defun(model.call)
       optimizer = tf.train.GradientDescentOptimizer(0.1)
 
       with tf.device(device):
@@ -217,7 +234,11 @@ class ResNet50Benchmarks(tf.test.Benchmark):
         self._report(label, start, num_iters, device, batch_size, data_format)
 
   def benchmark_eager_train(self):
-    self._benchmark_eager_train('eager_train', MockIterator)
+    self._benchmark_eager_train('eager_train', MockIterator, defun=False)
+
+  def benchmark_eager_train_with_defun(self):
+    self._benchmark_eager_train(
+        'eager_train_with_defun', MockIterator, defun=True)
 
   def benchmark_eager_train_datasets(self):
 
@@ -226,7 +247,18 @@ class ResNet50Benchmarks(tf.test.Benchmark):
         ds = tf.data.Dataset.from_tensors(tensors).repeat()
       return tfe.Iterator(ds)
 
-    self._benchmark_eager_train('eager_train_dataset', make_iterator)
+    self._benchmark_eager_train(
+        'eager_train_dataset', make_iterator, defun=False)
+
+  def benchmark_eager_train_datasets_with_defun(self):
+
+    def make_iterator(tensors):
+      with tf.device('/device:CPU:0'):
+        ds = tf.data.Dataset.from_tensors(tensors).repeat()
+      return tfe.Iterator(ds)
+
+    self._benchmark_eager_train(
+        'eager_train_dataset_with_defun', make_iterator, defun=True)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py b/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py
index 609cbd28772c3ae8da70648ca5b1b264a8a255e2..40919f2d4cf511eb35fac954719286366aef6c7c 100644
--- a/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py
+++ b/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py
@@ -247,9 +247,9 @@ def main(_):
 
   log_dir = os.path.join(FLAGS.dir, "summaries")
   tf.gfile.MakeDirs(log_dir)
-  train_summary_writer = tf.contrib.summary.create_summary_file_writer(
+  train_summary_writer = tf.contrib.summary.create_file_writer(
       os.path.join(log_dir, "train"), flush_millis=10000)
-  test_summary_writer = tf.contrib.summary.create_summary_file_writer(
+  test_summary_writer = tf.contrib.summary.create_file_writer(
       os.path.join(log_dir, "eval"), flush_millis=10000, name="eval")
 
   with tf.device(device):
diff --git a/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py b/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py
index 30bb3c8ad33d38453bd96a76c7770071e24bb034..7b9637a9d58c87e93c7c0ea7173a6b88c885ee25 100644
--- a/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py
+++ b/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py
@@ -22,6 +22,11 @@ Usage: python ./rnn_ptb.py --data-path=<path_to_dataset>
 Penn Treebank (PTB) dataset from:
 http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
 """
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
 import argparse
 import os
 import sys
@@ -209,7 +214,7 @@ class Datasets(object):
     """Load the Penn Treebank dataset.
 
     Args:
-      path: Path to the data/ directory of the dataset from from Tomas Mikolov's
+      path: Path to the data/ directory of the dataset from Tomas Mikolov's
         webpage - http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
     """
 
diff --git a/tensorflow/contrib/eager/python/examples/spinn/BUILD b/tensorflow/contrib/eager/python/examples/spinn/BUILD
new file mode 100644
index 0000000000000000000000000000000000000000..a1f8a759e2a556bc219f0aa13942f293c4f34cfa
--- /dev/null
+++ b/tensorflow/contrib/eager/python/examples/spinn/BUILD
@@ -0,0 +1,42 @@
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//tensorflow:internal"])
+
+load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+load("//tensorflow:tensorflow.bzl", "py_test")
+
+py_library(
+    name = "data",
+    srcs = ["data.py"],
+    srcs_version = "PY2AND3",
+    visibility = ["//visibility:public"],
+    deps = ["//third_party/py/numpy"],
+)
+
+py_test(
+    name = "data_test",
+    size = "small",
+    srcs = ["data_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":data",
+        "//tensorflow:tensorflow_py",
+    ],
+)
+
+cuda_py_test(
+    name = "spinn_test",
+    size = "medium",
+    srcs = ["spinn_test.py"],
+    additional_deps = [
+        ":data",
+        "//third_party/examples/eager/spinn",
+        "//third_party/py/numpy",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/contrib/summary:summary_test_util",
+        "//tensorflow/python/eager:test",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_test_lib",
+    ],
+    tags = ["no_pip"],  # because spinn.py is under third_party/.
+)
diff --git a/tensorflow/contrib/eager/python/examples/spinn/README.md b/tensorflow/contrib/eager/python/examples/spinn/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..eb0637df473e22e5d39ca1b0816464cb2b7c6435
--- /dev/null
+++ b/tensorflow/contrib/eager/python/examples/spinn/README.md
@@ -0,0 +1,13 @@
+# SPINN: Dynamic neural network with TensorFlow eager execution
+
+This directory contains files supporting the
+[spinn.py model in third_party/examples/eager/spinn/](../../../../../../third_party/examples/eager/spinn/spinn.py),
+including
+
+- `data.py`: Utility library for loading and preprocessing the SNLI and GloVe
+  data.
+- `data_test.py` and `spinn_test.py`: Unit tests for the data and model modules.
+
+See the [README.md in third_party/examples/eager/spinn/](../../../../../../third_party/examples/eager/spinn/README.md)
+for detailed background, license and usage information regarding the SPINN code.
+
diff --git a/tensorflow/contrib/eager/python/examples/spinn/data.py b/tensorflow/contrib/eager/python/examples/spinn/data.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6e046320f78541bef4e091e97f08fd51857af83
--- /dev/null
+++ b/tensorflow/contrib/eager/python/examples/spinn/data.py
@@ -0,0 +1,350 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utilities of SNLI data and GloVe word vectors for SPINN model.
+
+See more details about the SNLI data set at:
+  https://nlp.stanford.edu/projects/snli/
+
+See more details about the GloVe pretrained word embeddings at:
+  https://nlp.stanford.edu/projects/glove/
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import glob
+import math
+import os
+import random
+
+import numpy as np
+
+POSSIBLE_LABELS = ("entailment", "contradiction", "neutral")
+
+UNK_CODE = 0   # Code for unknown word tokens.
+PAD_CODE = 1   # Code for padding tokens.
+
+SHIFT_CODE = 3
+REDUCE_CODE = 2
+
+WORD_VECTOR_LEN = 300  # Embedding dimensions.
+
+LEFT_PAREN = "("
+RIGHT_PAREN = ")"
+PARENTHESES = (LEFT_PAREN, RIGHT_PAREN)
+
+
+def get_non_parenthesis_words(items):
+  """Get the non-parenthesis items from a SNLI parsed sentence.
+
+  Args:
+    items: Data items from a parsed SNLI setence, with parentheses. E.g.,
+      ["(", "Man", "(", "(", "(", "(", "(", "wearing", "pass", ")", ...
+
+  Returns:
+    A list of non-parenthis word items, all converted to lower case. E.g.,
+      ["man", "wearing", "pass", ...
+  """
+  return [x.lower() for x in items if x not in PARENTHESES and x]
+
+
+def get_shift_reduce(items):
+  """Obtain shift-reduce vector from a list of items from the SNLI data.
+
+  Args:
+    items: Data items as a list of str, e.g.,
+       ["(", "Man", "(", "(", "(", "(", "(", "wearing", "pass", ")", ...
+
+  Returns:
+    A list of shift-reduce transitions, encoded as `SHIFT_CODE` for shift and
+      `REDUCE_CODE` for reduce. See code above for the values of `SHIFT_CODE`
+      and `REDUCE_CODE`.
+  """
+  trans = []
+  for item in items:
+    if item == LEFT_PAREN:
+      continue
+    elif item == RIGHT_PAREN:
+      trans.append(REDUCE_CODE)
+    else:
+      trans.append(SHIFT_CODE)
+  return trans
+
+
+def pad_and_reverse_word_ids(sentences):
+  """Pad a list of sentences to the common maximum length + 1.
+
+  Args:
+    sentences: A list of sentences as a list of list of integers. Each integer
+      is a word ID. Each list of integer corresponds to one sentence.
+
+  Returns:
+    A numpy.ndarray of shape (num_sentences, max_length + 1), wherein max_length
+      is the maximum sentence length (in # of words). Each sentence is reversed
+      and then padded with an extra one at head, as required by the model.
+  """
+  max_len = max(len(sent) for sent in sentences)
+  for sent in sentences:
+    if len(sent) < max_len:
+      sent.extend([PAD_CODE] * (max_len - len(sent)))
+  # Reverse in time order and pad an extra one.
+  sentences = np.fliplr(np.array(sentences, dtype=np.int64))
+  sentences = np.concatenate(
+      [np.ones([sentences.shape[0], 1], dtype=np.int64), sentences], axis=1)
+  return sentences
+
+
+def pad_transitions(sentences_transitions):
+  """Pad a list of shift-reduce transitions to the maximum length."""
+  max_len = max(len(transitions) for transitions in sentences_transitions)
+  for transitions in sentences_transitions:
+    if len(transitions) < max_len:
+      transitions.extend([PAD_CODE] * (max_len - len(transitions)))
+  return np.array(sentences_transitions, dtype=np.int64)
+
+
+def load_vocabulary(data_root):
+  """Load vocabulary from SNLI data files.
+
+  Args:
+    data_root: Root directory of the data. It is assumed that the SNLI data
+      files have been downloaded and extracted to the "snli/snli_1.0"
+      subdirectory of it.
+
+  Returns:
+    Vocabulary as a set of strings.
+
+  Raises:
+    ValueError: If SNLI data files cannot be found.
+  """
+  snli_path = os.path.join(data_root, "snli")
+  snli_glob_pattern = os.path.join(snli_path, "snli_1.0/snli_1.0_*.txt")
+  file_names = glob.glob(snli_glob_pattern)
+  if not file_names:
+    raise ValueError(
+        "Cannot find SNLI data files at %s. "
+        "Please download and extract SNLI data first." % snli_glob_pattern)
+
+  print("Loading vocabulary...")
+  vocab = set()
+  for file_name in file_names:
+    with open(os.path.join(snli_path, file_name), "rt") as f:
+      for i, line in enumerate(f):
+        if i == 0:
+          continue
+        items = line.split("\t")
+        premise_words = get_non_parenthesis_words(items[1].split(" "))
+        hypothesis_words = get_non_parenthesis_words(items[2].split(" "))
+        vocab.update(premise_words)
+        vocab.update(hypothesis_words)
+  return vocab
+
+
+def load_word_vectors(data_root, vocab):
+  """Load GloVe word vectors for words present in the vocabulary.
+
+  Args:
+    data_root: Data root directory. It is assumed that the GloVe file
+     has been downloaded and extracted at the "glove/" subdirectory of it.
+    vocab: A `set` of words, representing the vocabulary.
+
+  Returns:
+    1. word2index: A dict from lower-case word to row index in the embedding
+       matrix, i.e, `embed` below.
+    2. embed: The embedding matrix as a float32 numpy array. Its shape is
+       [vocabulary_size, WORD_VECTOR_LEN]. vocabulary_size is len(vocab).
+       WORD_VECTOR_LEN is the embedding dimension (300).
+
+  Raises:
+    ValueError: If GloVe embedding file cannot be found.
+  """
+  glove_path = os.path.join(data_root, "glove/glove.42B.300d.txt")
+  if not os.path.isfile(glove_path):
+    raise ValueError(
+        "Cannot find GloVe embedding file at %s. "
+        "Please download and extract GloVe embeddings first." % glove_path)
+
+  print("Loading word vectors...")
+
+  word2index = dict()
+  embed = []
+
+  embed.append([0] * WORD_VECTOR_LEN)  # <unk>
+  embed.append([0] * WORD_VECTOR_LEN)  # <pad>
+  word2index["<unk>"] = UNK_CODE
+  word2index["<pad>"] = PAD_CODE
+
+  with open(glove_path, "rt") as f:
+    for line in f:
+      items = line.split(" ")
+      word = items[0]
+      if word in vocab and word not in word2index:
+        word2index[word] = len(embed)
+        vector = np.array([float(item) for item in items[1:]])
+        assert (WORD_VECTOR_LEN,) == vector.shape
+        embed.append(vector)
+  embed = np.array(embed, dtype=np.float32)
+  return word2index, embed
+
+
+def calculate_bins(length2count, min_bin_size):
+  """Cacluate bin boundaries given a histogram of lengths and mininum bin size.
+
+  Args:
+    length2count: A `dict` mapping length to sentence count.
+    min_bin_size: Minimum bin size in terms of total number of sentence pairs
+      in the bin.
+
+  Returns:
+    A `list` representing the right bin boundaries, starting from the inclusive
+    right boundary of the first bin. For example, if the output is
+      [10, 20, 35],
+    it means there are three bins: [1, 10], [11, 20] and [21, 35].
+  """
+  bounds = []
+  lengths = sorted(length2count.keys())
+  cum_count = 0
+  for length in lengths:
+    cum_count += length2count[length]
+    if cum_count >= min_bin_size:
+      bounds.append(length)
+      cum_count = 0
+  if bounds[-1] != lengths[-1]:
+    bounds.append(lengths[-1])
+  return bounds
+
+
+class SnliData(object):
+  """A split of SNLI data."""
+
+  def __init__(self, data_file, word2index, sentence_len_limit=-1):
+    """SnliData constructor.
+
+    Args:
+      data_file: Full path to the data file, e.g.,
+        "/tmp/spinn-data/snli/snli_1.0/snli_1.0.train.txt"
+      word2index: A dict from lower-case word to row index in the embedding
+        matrix (see `load_word_vectors()` for details).
+      sentence_len_limit: Maximum allowed sentence length (# of words).
+        A value of <= 0 means unlimited. Sentences longer than this limit
+        are currently discarded, not truncated.
+    """
+
+    self._labels = []
+    self._premises = []
+    self._premise_transitions = []
+    self._hypotheses = []
+    self._hypothesis_transitions = []
+
+    with open(data_file, "rt") as f:
+      for i, line in enumerate(f):
+        if i == 0:
+          # Skip header line.
+          continue
+        items = line.split("\t")
+        if items[0] not in POSSIBLE_LABELS:
+          continue
+
+        premise_items = items[1].split(" ")
+        hypothesis_items = items[2].split(" ")
+        premise_words = get_non_parenthesis_words(premise_items)
+        hypothesis_words = get_non_parenthesis_words(hypothesis_items)
+
+        if (sentence_len_limit > 0 and
+            (len(premise_words) > sentence_len_limit or
+             len(hypothesis_words) > sentence_len_limit)):
+          # TODO(cais): Maybe truncate; do not discard.
+          continue
+
+        premise_ids = [
+            word2index.get(word, UNK_CODE) for word in premise_words]
+        hypothesis_ids = [
+            word2index.get(word, UNK_CODE) for word in hypothesis_words]
+
+        self._premises.append(premise_ids)
+        self._hypotheses.append(hypothesis_ids)
+        self._premise_transitions.append(get_shift_reduce(premise_items))
+        self._hypothesis_transitions.append(get_shift_reduce(hypothesis_items))
+        assert (len(self._premise_transitions[-1]) ==
+                2 * len(premise_words) - 1)
+        assert (len(self._hypothesis_transitions[-1]) ==
+                2 * len(hypothesis_words) - 1)
+
+        self._labels.append(POSSIBLE_LABELS.index(items[0]) + 1)
+
+    assert len(self._labels) == len(self._premises)
+    assert len(self._labels) == len(self._hypotheses)
+    assert len(self._labels) == len(self._premise_transitions)
+    assert len(self._labels) == len(self._hypothesis_transitions)
+
+  def num_batches(self, batch_size):
+    """Calculate number of batches given batch size."""
+    return int(math.ceil(len(self._labels) / batch_size))
+
+  def get_generator(self, batch_size):
+    """Obtain a generator for batched data.
+
+    All examples of this SnliData object are randomly shuffled, sorted
+    according to the maximum sentence length of the premise and hypothesis
+    sentences in the pair, and batched.
+
+    Args:
+      batch_size: Desired batch size.
+
+    Returns:
+      A generator for data batches. The generator yields a 5-tuple:
+        label: An array of the shape (batch_size,).
+        premise: An array of the shape (max_premise_len, batch_size), wherein
+          max_premise_len is the maximum length of the (padded) premise
+          sentence in the batch.
+        premise_transitions: An array of the shape (2 * max_premise_len -3,
+          batch_size).
+        hypothesis: Same as `premise`, but for hypothesis sentences.
+        hypothesis_transitions: Same as `premise_transitions`, but for
+          hypothesis sentences.
+      All the elements of the 5-tuple have dtype `int64`.
+    """
+    # Randomly shuffle examples.
+    zipped = list(zip(
+        self._labels, self._premises, self._premise_transitions,
+        self._hypotheses, self._hypothesis_transitions))
+    random.shuffle(zipped)
+    # Then sort the examples by maximum of the premise and hypothesis sentence
+    # lengths in the pair. During training, the batches are expected to be
+    # shuffled. So it is okay to leave them sorted by max length here.
+    (labels, premises, premise_transitions, hypotheses,
+     hypothesis_transitions) = zip(
+         *sorted(zipped, key=lambda x: max(len(x[1]), len(x[3]))))
+
+    def _generator():
+      begin = 0
+      while begin < len(labels):
+        # The sorting above and the batching here makes sure that sentences of
+        # similar max lengths are batched together, minimizing the inefficiency
+        # due to uneven max lengths. The sentences are batched differently in
+        # each call to get_generator() due to the shuffling before sotring
+        # above. The pad_and_reverse_word_ids() and pad_transitions() functions
+        # take care of any remaning unevenness of the max sentence lengths.
+        end = min(begin + batch_size, len(labels))
+        # Transpose, because the SPINN model requires time-major, instead of
+        # batch-major.
+        yield (labels[begin:end],
+               pad_and_reverse_word_ids(premises[begin:end]).T,
+               pad_transitions(premise_transitions[begin:end]).T,
+               pad_and_reverse_word_ids(hypotheses[begin:end]).T,
+               pad_transitions(hypothesis_transitions[begin:end]).T)
+        begin = end
+    return _generator
diff --git a/tensorflow/contrib/eager/python/examples/spinn/data_test.py b/tensorflow/contrib/eager/python/examples/spinn/data_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..e4f0b37c5099e45b7e3b258b258c0a203c36b3b7
--- /dev/null
+++ b/tensorflow/contrib/eager/python/examples/spinn/data_test.py
@@ -0,0 +1,243 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Unit tests for SPINN data module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import shutil
+import tempfile
+
+import tensorflow as tf
+
+from tensorflow.contrib.eager.python.examples.spinn import data
+
+
+class DataTest(tf.test.TestCase):
+
+  def setUp(self):
+    super(DataTest, self).setUp()
+    self._temp_data_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    shutil.rmtree(self._temp_data_dir)
+    super(DataTest, self).tearDown()
+
+  def testGenNonParenthesisWords(self):
+    seq_with_parse = (
+        "( Man ( ( ( ( ( wearing pass ) ( on ( a lanyard ) ) ) and "
+        ") ( standing ( in ( ( a crowd ) ( of people ) ) ) ) ) . ) )")
+    self.assertEqual(
+        ["man", "wearing", "pass", "on", "a", "lanyard", "and", "standing",
+         "in", "a", "crowd", "of", "people", "."],
+        data.get_non_parenthesis_words(seq_with_parse.split(" ")))
+
+  def testGetShiftReduce(self):
+    seq_with_parse = (
+        "( Man ( ( ( ( ( wearing pass ) ( on ( a lanyard ) ) ) and "
+        ") ( standing ( in ( ( a crowd ) ( of people ) ) ) ) ) . ) )")
+    self.assertEqual(
+        [3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 3, 2, 3, 3, 3, 3, 2, 3, 3, 2, 2, 2, 2, 2,
+         3, 2, 2], data.get_shift_reduce(seq_with_parse.split(" ")))
+
+  def testPadAndReverseWordIds(self):
+    id_sequences = [[0, 2, 3, 4, 5],
+                    [6, 7, 8],
+                    [9, 10, 11, 12, 13, 14, 15, 16]]
+    self.assertAllClose(
+        [[1, 1, 1, 1, 5, 4, 3, 2, 0],
+         [1, 1, 1, 1, 1, 1, 8, 7, 6],
+         [1, 16, 15, 14, 13, 12, 11, 10, 9]],
+        data.pad_and_reverse_word_ids(id_sequences))
+
+  def testPadTransitions(self):
+    unpadded = [[3, 3, 3, 2, 2, 2, 2],
+                [3, 3, 2, 2, 2]]
+    self.assertAllClose(
+        [[3, 3, 3, 2, 2, 2, 2],
+         [3, 3, 2, 2, 2, 1, 1]],
+        data.pad_transitions(unpadded))
+
+  def testCalculateBins(self):
+    length2count = {
+        1: 10,
+        2: 15,
+        3: 25,
+        4: 40,
+        5: 35,
+        6: 10}
+    self.assertEqual([2, 3, 4, 5, 6],
+                     data.calculate_bins(length2count, 20))
+    self.assertEqual([3, 4, 6], data.calculate_bins(length2count, 40))
+    self.assertEqual([4, 6], data.calculate_bins(length2count, 60))
+
+  def testLoadVoacbulary(self):
+    snli_1_0_dir = os.path.join(self._temp_data_dir, "snli/snli_1.0")
+    fake_train_file = os.path.join(snli_1_0_dir, "snli_1.0_train.txt")
+    fake_dev_file = os.path.join(snli_1_0_dir, "snli_1.0_dev.txt")
+    os.makedirs(snli_1_0_dir)
+
+    with open(fake_train_file, "wt") as f:
+      f.write("gold_label\tsentence1_binary_parse\tsentence2_binary_parse\t"
+              "sentence1_parse\tsentence2_parse\tsentence1\tsentence2\t"
+              "captionID\tpairID\tlabel1\tlabel2\tlabel3\tlabel4\tlabel5\n")
+      f.write("neutral\t( ( Foo bar ) . )\t( ( foo baz ) . )\t"
+              "DummySentence1Parse\tDummySentence2Parse\t"
+              "Foo bar.\tfoo baz.\t"
+              "4705552913.jpg#2\t4705552913.jpg#2r1n\t"
+              "neutral\tentailment\tneutral\tneutral\tneutral\n")
+    with open(fake_dev_file, "wt") as f:
+      f.write("gold_label\tsentence1_binary_parse\tsentence2_binary_parse\t"
+              "sentence1_parse\tsentence2_parse\tsentence1\tsentence2\t"
+              "captionID\tpairID\tlabel1\tlabel2\tlabel3\tlabel4\tlabel5\n")
+      f.write("neutral\t( ( Quux quuz ) ? )\t( ( Corge grault ) ! )\t"
+              "DummySentence1Parse\tDummySentence2Parse\t"
+              "Quux quuz?\t.Corge grault!\t"
+              "4705552913.jpg#2\t4705552913.jpg#2r1n\t"
+              "neutral\tentailment\tneutral\tneutral\tneutral\n")
+
+    vocab = data.load_vocabulary(self._temp_data_dir)
+    self.assertSetEqual(
+        {".", "?", "!", "foo", "bar", "baz", "quux", "quuz", "corge", "grault"},
+        vocab)
+
+  def testLoadVoacbularyWithoutFileRaisesError(self):
+    with self.assertRaisesRegexp(ValueError, "Cannot find SNLI data files at"):
+      data.load_vocabulary(self._temp_data_dir)
+
+    os.makedirs(os.path.join(self._temp_data_dir, "snli"))
+    with self.assertRaisesRegexp(ValueError, "Cannot find SNLI data files at"):
+      data.load_vocabulary(self._temp_data_dir)
+
+    os.makedirs(os.path.join(self._temp_data_dir, "snli/snli_1.0"))
+    with self.assertRaisesRegexp(ValueError, "Cannot find SNLI data files at"):
+      data.load_vocabulary(self._temp_data_dir)
+
+  def testLoadWordVectors(self):
+    glove_dir = os.path.join(self._temp_data_dir, "glove")
+    os.makedirs(glove_dir)
+    glove_file = os.path.join(glove_dir, "glove.42B.300d.txt")
+
+    words = [".", ",", "foo", "bar", "baz"]
+    with open(glove_file, "wt") as f:
+      for i, word in enumerate(words):
+        f.write("%s " % word)
+        for j in range(data.WORD_VECTOR_LEN):
+          f.write("%.5f" % (i * 0.1))
+          if j < data.WORD_VECTOR_LEN - 1:
+            f.write(" ")
+          else:
+            f.write("\n")
+
+    vocab = {"foo", "bar", "baz", "qux", "."}
+    # Notice that "qux" is not present in `words`.
+    word2index, embed = data.load_word_vectors(self._temp_data_dir, vocab)
+
+    self.assertEqual(6, len(word2index))
+    self.assertEqual(0, word2index["<unk>"])
+    self.assertEqual(1, word2index["<pad>"])
+    self.assertEqual(2, word2index["."])
+    self.assertEqual(3, word2index["foo"])
+    self.assertEqual(4, word2index["bar"])
+    self.assertEqual(5, word2index["baz"])
+    self.assertEqual((6, data.WORD_VECTOR_LEN), embed.shape)
+    self.assertAllClose([0.0] * data.WORD_VECTOR_LEN, embed[0, :])
+    self.assertAllClose([0.0] * data.WORD_VECTOR_LEN, embed[1, :])
+    self.assertAllClose([0.0] * data.WORD_VECTOR_LEN, embed[2, :])
+    self.assertAllClose([0.2] * data.WORD_VECTOR_LEN, embed[3, :])
+    self.assertAllClose([0.3] * data.WORD_VECTOR_LEN, embed[4, :])
+    self.assertAllClose([0.4] * data.WORD_VECTOR_LEN, embed[5, :])
+
+  def testLoadWordVectorsWithoutFileRaisesError(self):
+    vocab = {"foo", "bar", "baz", "qux", "."}
+    with self.assertRaisesRegexp(
+        ValueError, "Cannot find GloVe embedding file at"):
+      data.load_word_vectors(self._temp_data_dir, vocab)
+
+    os.makedirs(os.path.join(self._temp_data_dir, "glove"))
+    with self.assertRaisesRegexp(
+        ValueError, "Cannot find GloVe embedding file at"):
+      data.load_word_vectors(self._temp_data_dir, vocab)
+
+  def testSnliData(self):
+    """Unit test for SnliData objects."""
+    snli_1_0_dir = os.path.join(self._temp_data_dir, "snli/snli_1.0")
+    fake_train_file = os.path.join(snli_1_0_dir, "snli_1.0_train.txt")
+    os.makedirs(snli_1_0_dir)
+
+    # Four sentences in total.
+    with open(fake_train_file, "wt") as f:
+      f.write("gold_label\tsentence1_binary_parse\tsentence2_binary_parse\t"
+              "sentence1_parse\tsentence2_parse\tsentence1\tsentence2\t"
+              "captionID\tpairID\tlabel1\tlabel2\tlabel3\tlabel4\tlabel5\n")
+      f.write("neutral\t( ( Foo bar ) . )\t( ( foo . )\t"
+              "DummySentence1Parse\tDummySentence2Parse\t"
+              "Foo bar.\tfoo baz.\t"
+              "4705552913.jpg#2\t4705552913.jpg#2r1n\t"
+              "neutral\tentailment\tneutral\tneutral\tneutral\n")
+      f.write("contradiction\t( ( Bar foo ) . )\t( ( baz . )\t"
+              "DummySentence1Parse\tDummySentence2Parse\t"
+              "Foo bar.\tfoo baz.\t"
+              "4705552913.jpg#2\t4705552913.jpg#2r1n\t"
+              "neutral\tentailment\tneutral\tneutral\tneutral\n")
+      f.write("entailment\t( ( Quux quuz ) . )\t( ( grault . )\t"
+              "DummySentence1Parse\tDummySentence2Parse\t"
+              "Foo bar.\tfoo baz.\t"
+              "4705552913.jpg#2\t4705552913.jpg#2r1n\t"
+              "neutral\tentailment\tneutral\tneutral\tneutral\n")
+      f.write("entailment\t( ( Quuz quux ) . )\t( ( garply . )\t"
+              "DummySentence1Parse\tDummySentence2Parse\t"
+              "Foo bar.\tfoo baz.\t"
+              "4705552913.jpg#2\t4705552913.jpg#2r1n\t"
+              "neutral\tentailment\tneutral\tneutral\tneutral\n")
+
+    glove_dir = os.path.join(self._temp_data_dir, "glove")
+    os.makedirs(glove_dir)
+    glove_file = os.path.join(glove_dir, "glove.42B.300d.txt")
+
+    words = [".", "foo", "bar", "baz", "quux", "quuz", "grault", "garply"]
+    with open(glove_file, "wt") as f:
+      for i, word in enumerate(words):
+        f.write("%s " % word)
+        for j in range(data.WORD_VECTOR_LEN):
+          f.write("%.5f" % (i * 0.1))
+          if j < data.WORD_VECTOR_LEN - 1:
+            f.write(" ")
+          else:
+            f.write("\n")
+
+    vocab = data.load_vocabulary(self._temp_data_dir)
+    word2index, _ = data.load_word_vectors(self._temp_data_dir, vocab)
+
+    train_data = data.SnliData(fake_train_file, word2index)
+    self.assertEqual(4, train_data.num_batches(1))
+    self.assertEqual(2, train_data.num_batches(2))
+    self.assertEqual(2, train_data.num_batches(3))
+    self.assertEqual(1, train_data.num_batches(4))
+
+    generator = train_data.get_generator(2)()
+    for i in range(2):
+      label, prem, prem_trans, hypo, hypo_trans = next(generator)
+      self.assertEqual(2, len(label))
+      self.assertEqual((4, 2), prem.shape)
+      self.assertEqual((5, 2), prem_trans.shape)
+      self.assertEqual((3, 2), hypo.shape)
+      self.assertEqual((3, 2), hypo_trans.shape)
+
+
+if __name__ == "__main__":
+  tf.test.main()
diff --git a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..84e25cf81a2223800c47994b26d000caddee6b01
--- /dev/null
+++ b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py
@@ -0,0 +1,409 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import gc
+import glob
+import os
+import shutil
+import tempfile
+import time
+
+import numpy as np
+import tensorflow as tf
+
+# pylint: disable=g-bad-import-order
+import tensorflow.contrib.eager as tfe
+from tensorflow.contrib.eager.python.examples.spinn import data
+from third_party.examples.eager.spinn import spinn
+from tensorflow.contrib.summary import summary_test_util
+from tensorflow.python.eager import test
+from tensorflow.python.framework import test_util
+# pylint: enable=g-bad-import-order
+
+
+def _generate_synthetic_snli_data_batch(sequence_length,
+                                        batch_size,
+                                        vocab_size):
+  """Generate a fake batch of SNLI data for testing."""
+  with tf.device("cpu:0"):
+    labels = tf.random_uniform([batch_size], minval=1, maxval=4, dtype=tf.int64)
+    prem = tf.random_uniform(
+        (sequence_length, batch_size), maxval=vocab_size, dtype=tf.int64)
+    prem_trans = tf.constant(np.array(
+        [[3, 3, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3,
+          2, 3, 3, 2, 2, 3, 3, 3, 2, 2, 2, 2,
+          3, 2, 2]] * batch_size, dtype=np.int64).T)
+    hypo = tf.random_uniform(
+        (sequence_length, batch_size), maxval=vocab_size, dtype=tf.int64)
+    hypo_trans = tf.constant(np.array(
+        [[3, 3, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3,
+          2, 3, 3, 2, 2, 3, 3, 3, 2, 2, 2, 2,
+          3, 2, 2]] * batch_size, dtype=np.int64).T)
+  if tfe.num_gpus():
+    labels = labels.gpu()
+    prem = prem.gpu()
+    prem_trans = prem_trans.gpu()
+    hypo = hypo.gpu()
+    hypo_trans = hypo_trans.gpu()
+  return labels, prem, prem_trans, hypo, hypo_trans
+
+
+def _test_spinn_config(d_embed, d_out, logdir=None):
+  config_tuple = collections.namedtuple(
+      "Config", ["d_hidden", "d_proj", "d_tracker", "predict",
+                 "embed_dropout", "mlp_dropout", "n_mlp_layers", "d_mlp",
+                 "d_out", "projection", "lr", "batch_size", "epochs",
+                 "force_cpu", "logdir", "log_every", "dev_every", "save_every",
+                 "lr_decay_every", "lr_decay_by"])
+  return config_tuple(
+      d_hidden=d_embed,
+      d_proj=d_embed * 2,
+      d_tracker=8,
+      predict=False,
+      embed_dropout=0.1,
+      mlp_dropout=0.1,
+      n_mlp_layers=2,
+      d_mlp=32,
+      d_out=d_out,
+      projection=True,
+      lr=2e-2,
+      batch_size=2,
+      epochs=10,
+      force_cpu=False,
+      logdir=logdir,
+      log_every=1,
+      dev_every=2,
+      save_every=2,
+      lr_decay_every=1,
+      lr_decay_by=0.75)
+
+
+class SpinnTest(test_util.TensorFlowTestCase):
+
+  def setUp(self):
+    super(SpinnTest, self).setUp()
+    self._test_device = "gpu:0" if tfe.num_gpus() else "cpu:0"
+    self._temp_data_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    shutil.rmtree(self._temp_data_dir)
+    super(SpinnTest, self).tearDown()
+
+  def testBundle(self):
+    with tf.device(self._test_device):
+      lstm_iter = [np.array([[0, 1], [2, 3]], dtype=np.float32),
+                   np.array([[0, -1], [-2, -3]], dtype=np.float32),
+                   np.array([[0, 2], [4, 6]], dtype=np.float32),
+                   np.array([[0, -2], [-4, -6]], dtype=np.float32)]
+      out = spinn._bundle(lstm_iter)
+
+      self.assertEqual(2, len(out))
+      self.assertEqual(tf.float32, out[0].dtype)
+      self.assertEqual(tf.float32, out[1].dtype)
+      self.assertAllEqual(np.array([[0, 2, 0, -2, 0, 4, 0, -4]]).T,
+                          out[0].numpy())
+      self.assertAllEqual(np.array([[1, 3, -1, -3, 2, 6, -2, -6]]).T,
+                          out[1].numpy())
+
+  def testUnbunbdle(self):
+    with tf.device(self._test_device):
+      state = [np.array([[0, 1, 2], [3, 4, 5]], dtype=np.float32),
+               np.array([[0, -1, -2], [-3, -4, -5]], dtype=np.float32)]
+      out = spinn._unbundle(state)
+
+      self.assertEqual(2, len(out))
+      self.assertEqual(tf.float32, out[0].dtype)
+      self.assertEqual(tf.float32, out[1].dtype)
+      self.assertAllEqual(np.array([[0, 1, 2, 0, -1, -2]]),
+                          out[0].numpy())
+      self.assertAllEqual(np.array([[3, 4, 5, -3, -4, -5]]),
+                          out[1].numpy())
+
+  def testReducer(self):
+    with tf.device(self._test_device):
+      batch_size = 3
+      size = 10
+      tracker_size = 8
+      reducer = spinn.Reducer(size, tracker_size=tracker_size)
+
+      left_in = []
+      right_in = []
+      tracking = []
+      for _ in range(batch_size):
+        left_in.append(tf.random_normal((1, size * 2)))
+        right_in.append(tf.random_normal((1, size * 2)))
+        tracking.append(tf.random_normal((1, tracker_size * 2)))
+
+      out = reducer(left_in, right_in, tracking=tracking)
+      self.assertEqual(batch_size, len(out))
+      self.assertEqual(tf.float32, out[0].dtype)
+      self.assertEqual((1, size * 2), out[0].shape)
+
+  def testReduceTreeLSTM(self):
+    with tf.device(self._test_device):
+      size = 10
+      tracker_size = 8
+      reducer = spinn.Reducer(size, tracker_size=tracker_size)
+
+      lstm_in = np.array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
+                          [0, -1, -2, -3, -4, -5, -6, -7, -8, -9]],
+                         dtype=np.float32)
+      c1 = np.array([[0, 1], [2, 3]], dtype=np.float32)
+      c2 = np.array([[0, -1], [-2, -3]], dtype=np.float32)
+
+      h, c = reducer._tree_lstm(c1, c2, lstm_in)
+      self.assertEqual(tf.float32, h.dtype)
+      self.assertEqual(tf.float32, c.dtype)
+      self.assertEqual((2, 2), h.shape)
+      self.assertEqual((2, 2), c.shape)
+
+  def testTracker(self):
+    with tf.device(self._test_device):
+      batch_size = 2
+      size = 10
+      tracker_size = 8
+      buffer_length = 18
+      stack_size = 3
+
+      tracker = spinn.Tracker(tracker_size, False)
+      tracker.reset_state()
+
+      # Create dummy inputs for testing.
+      bufs = []
+      buf = []
+      for _ in range(buffer_length):
+        buf.append(tf.random_normal((batch_size, size * 2)))
+      bufs.append(buf)
+      self.assertEqual(1, len(bufs))
+      self.assertEqual(buffer_length, len(bufs[0]))
+      self.assertEqual((batch_size, size * 2), bufs[0][0].shape)
+
+      stacks = []
+      stack = []
+      for _ in range(stack_size):
+        stack.append(tf.random_normal((batch_size, size * 2)))
+      stacks.append(stack)
+      self.assertEqual(1, len(stacks))
+      self.assertEqual(3, len(stacks[0]))
+      self.assertEqual((batch_size, size * 2), stacks[0][0].shape)
+
+      for _ in range(2):
+        out1, out2 = tracker(bufs, stacks)
+        self.assertIsNone(out2)
+        self.assertEqual(batch_size, len(out1))
+        self.assertEqual(tf.float32, out1[0].dtype)
+        self.assertEqual((1, tracker_size * 2), out1[0].shape)
+
+        self.assertEqual(tf.float32, tracker.state.c.dtype)
+        self.assertEqual((batch_size, tracker_size), tracker.state.c.shape)
+        self.assertEqual(tf.float32, tracker.state.h.dtype)
+        self.assertEqual((batch_size, tracker_size), tracker.state.h.shape)
+
+  def testSPINN(self):
+    with tf.device(self._test_device):
+      embedding_dims = 10
+      d_tracker = 8
+      sequence_length = 15
+      num_transitions = 27
+
+      config_tuple = collections.namedtuple(
+          "Config", ["d_hidden", "d_proj", "d_tracker", "predict"])
+      config = config_tuple(
+          embedding_dims, embedding_dims * 2, d_tracker, False)
+      s = spinn.SPINN(config)
+
+      # Create some fake data.
+      buffers = tf.random_normal((sequence_length, 1, config.d_proj))
+      transitions = tf.constant(
+          [[3], [3], [2], [3], [3], [3], [2], [2], [2], [3], [3], [3],
+           [2], [3], [3], [2], [2], [3], [3], [3], [2], [2], [2], [2],
+           [3], [2], [2]], dtype=tf.int64)
+      self.assertEqual(tf.int64, transitions.dtype)
+      self.assertEqual((num_transitions, 1), transitions.shape)
+
+      out = s(buffers, transitions, training=True)
+      self.assertEqual(tf.float32, out.dtype)
+      self.assertEqual((1, embedding_dims), out.shape)
+
+  def testSNLIClassifierAndTrainer(self):
+    with tf.device(self._test_device):
+      vocab_size = 40
+      batch_size = 2
+      d_embed = 10
+      sequence_length = 15
+      d_out = 4
+
+      config = _test_spinn_config(d_embed, d_out)
+
+      # Create fake embedding matrix.
+      embed = tf.random_normal((vocab_size, d_embed))
+
+      model = spinn.SNLIClassifier(config, embed)
+      trainer = spinn.SNLIClassifierTrainer(model, config.lr)
+
+      (labels, prem, prem_trans, hypo,
+       hypo_trans) = _generate_synthetic_snli_data_batch(sequence_length,
+                                                         batch_size,
+                                                         vocab_size)
+
+      # Invoke model under non-training mode.
+      logits = model(prem, prem_trans, hypo, hypo_trans, training=False)
+      self.assertEqual(tf.float32, logits.dtype)
+      self.assertEqual((batch_size, d_out), logits.shape)
+
+      # Invoke model under training model.
+      logits = model(prem, prem_trans, hypo, hypo_trans, training=True)
+      self.assertEqual(tf.float32, logits.dtype)
+      self.assertEqual((batch_size, d_out), logits.shape)
+
+      # Calculate loss.
+      loss1 = trainer.loss(labels, logits)
+      self.assertEqual(tf.float32, loss1.dtype)
+      self.assertEqual((), loss1.shape)
+
+      loss2, logits = trainer.train_batch(
+          labels, prem, prem_trans, hypo, hypo_trans)
+      self.assertEqual(tf.float32, loss2.dtype)
+      self.assertEqual((), loss2.shape)
+      self.assertEqual(tf.float32, logits.dtype)
+      self.assertEqual((batch_size, d_out), logits.shape)
+      # Training on the batch should have led to a change in the loss value.
+      self.assertNotEqual(loss1.numpy(), loss2.numpy())
+
+  def testTrainSpinn(self):
+    """Test with fake toy SNLI data and GloVe vectors."""
+
+    # 1. Create and load a fake SNLI data file and a fake GloVe embedding file.
+    snli_1_0_dir = os.path.join(self._temp_data_dir, "snli/snli_1.0")
+    fake_train_file = os.path.join(snli_1_0_dir, "snli_1.0_train.txt")
+    os.makedirs(snli_1_0_dir)
+
+    # Four sentences in total.
+    with open(fake_train_file, "wt") as f:
+      f.write("gold_label\tsentence1_binary_parse\tsentence2_binary_parse\t"
+              "sentence1_parse\tsentence2_parse\tsentence1\tsentence2\t"
+              "captionID\tpairID\tlabel1\tlabel2\tlabel3\tlabel4\tlabel5\n")
+      f.write("neutral\t( ( Foo bar ) . )\t( ( foo . )\t"
+              "DummySentence1Parse\tDummySentence2Parse\t"
+              "Foo bar.\tfoo baz.\t"
+              "4705552913.jpg#2\t4705552913.jpg#2r1n\t"
+              "neutral\tentailment\tneutral\tneutral\tneutral\n")
+      f.write("contradiction\t( ( Bar foo ) . )\t( ( baz . )\t"
+              "DummySentence1Parse\tDummySentence2Parse\t"
+              "Foo bar.\tfoo baz.\t"
+              "4705552913.jpg#2\t4705552913.jpg#2r1n\t"
+              "neutral\tentailment\tneutral\tneutral\tneutral\n")
+      f.write("entailment\t( ( Quux quuz ) . )\t( ( grault . )\t"
+              "DummySentence1Parse\tDummySentence2Parse\t"
+              "Foo bar.\tfoo baz.\t"
+              "4705552913.jpg#2\t4705552913.jpg#2r1n\t"
+              "neutral\tentailment\tneutral\tneutral\tneutral\n")
+      f.write("entailment\t( ( Quuz quux ) . )\t( ( garply . )\t"
+              "DummySentence1Parse\tDummySentence2Parse\t"
+              "Foo bar.\tfoo baz.\t"
+              "4705552913.jpg#2\t4705552913.jpg#2r1n\t"
+              "neutral\tentailment\tneutral\tneutral\tneutral\n")
+
+    glove_dir = os.path.join(self._temp_data_dir, "glove")
+    os.makedirs(glove_dir)
+    glove_file = os.path.join(glove_dir, "glove.42B.300d.txt")
+
+    words = [".", "foo", "bar", "baz", "quux", "quuz", "grault", "garply"]
+    with open(glove_file, "wt") as f:
+      for i, word in enumerate(words):
+        f.write("%s " % word)
+        for j in range(data.WORD_VECTOR_LEN):
+          f.write("%.5f" % (i * 0.1))
+          if j < data.WORD_VECTOR_LEN - 1:
+            f.write(" ")
+          else:
+            f.write("\n")
+
+    vocab = data.load_vocabulary(self._temp_data_dir)
+    word2index, embed = data.load_word_vectors(self._temp_data_dir, vocab)
+
+    train_data = data.SnliData(fake_train_file, word2index)
+    dev_data = data.SnliData(fake_train_file, word2index)
+    test_data = data.SnliData(fake_train_file, word2index)
+    print(embed)
+
+    # 2. Create a fake config.
+    config = _test_spinn_config(
+        data.WORD_VECTOR_LEN, 4,
+        logdir=os.path.join(self._temp_data_dir, "logdir"))
+
+    # 3. Test training of a SPINN model.
+    spinn.train_spinn(embed, train_data, dev_data, test_data, config)
+
+    # 4. Load train loss values from the summary files and verify that they
+    #    decrease with training.
+    summary_file = glob.glob(os.path.join(config.logdir, "events.out.*"))[0]
+    events = summary_test_util.events_from_file(summary_file)
+    train_losses = [event.summary.value[0].simple_value for event in events
+                    if event.summary.value
+                    and event.summary.value[0].tag == "train/loss"]
+    self.assertEqual(config.epochs, len(train_losses))
+    self.assertLess(train_losses[-1], train_losses[0])
+
+
+class EagerSpinnSNLIClassifierBenchmark(test.Benchmark):
+
+  def benchmarkEagerSpinnSNLIClassifier(self):
+    test_device = "gpu:0" if tfe.num_gpus() else "cpu:0"
+    with tf.device(test_device):
+      burn_in_iterations = 2
+      benchmark_iterations = 10
+
+      vocab_size = 1000
+      batch_size = 128
+      sequence_length = 15
+      d_embed = 200
+      d_out = 4
+
+      embed = tf.random_normal((vocab_size, d_embed))
+
+      config = _test_spinn_config(d_embed, d_out)
+      model = spinn.SNLIClassifier(config, embed)
+      trainer = spinn.SNLIClassifierTrainer(model, config.lr)
+
+      (labels, prem, prem_trans, hypo,
+       hypo_trans) = _generate_synthetic_snli_data_batch(sequence_length,
+                                                         batch_size,
+                                                         vocab_size)
+
+      for _ in range(burn_in_iterations):
+        trainer.train_batch(labels, prem, prem_trans, hypo, hypo_trans)
+
+      gc.collect()
+      start_time = time.time()
+      for _ in xrange(benchmark_iterations):
+        trainer.train_batch(labels, prem, prem_trans, hypo, hypo_trans)
+      wall_time = time.time() - start_time
+      # Named "examples"_per_sec to conform with other benchmarks.
+      extras = {"examples_per_sec": benchmark_iterations / wall_time}
+      self.report_benchmark(
+          name="Eager_SPINN_SNLIClassifier_Benchmark",
+          iters=benchmark_iterations,
+          wall_time=wall_time,
+          extras=extras)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/eager/python/g3doc/guide.md b/tensorflow/contrib/eager/python/g3doc/guide.md
index 147b7047f42b7ccba5829b61370e82e217ce5838..0095ffa0db99d46d25654d73504d0d7d41c18b6f 100644
--- a/tensorflow/contrib/eager/python/g3doc/guide.md
+++ b/tensorflow/contrib/eager/python/g3doc/guide.md
@@ -757,7 +757,7 @@ For example, to record summaries once every 100 global steps, use:
 
 ```python
 tf.train.get_or_create_global_step()  # Ensuring the global step variable exists
-writer = tf.contrib.summary.create_summary_file_writer(logdir)
+writer = tf.contrib.summary.create_file_writer(logdir)
 
 for _ in range(iterations):
   with writer.as_default():
diff --git a/tensorflow/contrib/eager/python/metrics_impl.py b/tensorflow/contrib/eager/python/metrics_impl.py
index 2f8016ede3caee6dbb6fd8f5226f1464b5c3976b..bf029ca5f9dddb152274da6a1cc96bea7981d8fd 100644
--- a/tensorflow/contrib/eager/python/metrics_impl.py
+++ b/tensorflow/contrib/eager/python/metrics_impl.py
@@ -49,6 +49,20 @@ class Metric(object):
 
   Example use with graph execution:
 
+  ```python
+  m = SomeMetric(...)
+  inputs = ... # Some tensors to compute the metric on.
+  m_update = m(inputs)
+  # Variables defined in first call, so get the initialization op afterwards.
+  m_init = m.init_variables()  # or tf.global_variables_initializer()
+  m_result = m.result()
+  with tf.Session() as sess:
+    sess.run(m_init)
+    for input in ...:
+      sess.run(m_update)
+    print(sess.run(m_result))
+  ```
+  Example use with graph execution with placeholders and feed_dict:
   ```python
   m = SomeMetric(...)
   m_placeholder = tf.placeholder(...)
@@ -107,6 +121,7 @@ class Metric(object):
     """Returns op to execute to update this metric for these inputs.
 
     Returns None if eager execution is enabled.
+    Returns a graph-mode function if graph execution is enabled.
 
     Args:
       *args:
@@ -183,6 +198,13 @@ class Metric(object):
     """Computes and returns a final value for the metric."""
     raise NotImplementedError("Metrics must define a result() member function")
 
+  def value(self):
+    """In graph mode returns the result Tensor while in eager the callable."""
+    if context.in_graph_mode():
+      return self.result()
+    else:
+      return self.result
+
   # We can support two different strategies of for doing data-parallel
   # distributed metric computations:
   # * Put metric variables on the first device and rely on small
diff --git a/tensorflow/contrib/eager/python/metrics_test.py b/tensorflow/contrib/eager/python/metrics_test.py
index 96eb1b4f2a0e4c4af1f3310a2801b1b6aee285d6..9cf34fd9b2dcf1b123cacc6863af817419eda007 100644
--- a/tensorflow/contrib/eager/python/metrics_test.py
+++ b/tensorflow/contrib/eager/python/metrics_test.py
@@ -27,6 +27,7 @@ from tensorflow.python.eager import context
 from tensorflow.python.eager import test
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.training import training_util
 
@@ -67,7 +68,7 @@ class MetricsTest(test.TestCase):
     m([1, 10, 100])
     training_util.get_or_create_global_step()
     logdir = tempfile.mkdtemp()
-    with summary_ops.create_summary_file_writer(
+    with summary_ops.create_file_writer(
         logdir, max_queue=0,
         name="t0").as_default(), summary_ops.always_record_summaries():
       m.result()  # As a side-effect will write summaries.
@@ -137,7 +138,7 @@ class MetricsTest(test.TestCase):
     self.assertEqual(m1.name, "has space")
     self.assertEqual(m1.numer.name, "has_space/numer:0")
 
-  def testGraph(self):
+  def testGraphWithPlaceholder(self):
     with context.graph_mode(), self.test_session() as sess:
       m = metrics.Mean()
       p = array_ops.placeholder(dtypes.float32)
@@ -153,6 +154,22 @@ class MetricsTest(test.TestCase):
       sess.run(accumulate, feed_dict={p: 7})
       self.assertAllEqual(m.result().eval(), 7)
 
+  @test_util.run_in_graph_and_eager_modes()
+  def testGraphAndEagerTensor(self):
+    m = metrics.Mean()
+    inputs = ops.convert_to_tensor([1.0, 2.0])
+    accumulate = m(inputs)
+    result = m.result()
+    self.evaluate(m.init_variables())
+    self.evaluate(accumulate)
+    self.assertEqual(self.evaluate(result), 1.5)
+    # Second init resets all the variables.
+    self.evaluate(m.init_variables())
+    inputs = ops.convert_to_tensor([2.0, 3.0])
+    self.evaluate(m(inputs))
+    value = m.value()
+    self.assertEqual(self.evaluate(value), 2.5)
+
   def testTwoMeansGraph(self):
     # Verify two metrics with the same class and name don't
     # accidentally share state.
diff --git a/tensorflow/contrib/eager/python/network.py b/tensorflow/contrib/eager/python/network.py
index 0388aaa8495f380595b2635529bc2e33e808b06f..e3c13cbd2e8ccd2ab79da74e0e97905c6ed5c02d 100644
--- a/tensorflow/contrib/eager/python/network.py
+++ b/tensorflow/contrib/eager/python/network.py
@@ -451,8 +451,30 @@ class Network(base.Layer):
         "at https://github.com/tensorflow/tensorflow/issues/new if this is "
         "important to you")
 
+  def add_loss(self, losses, inputs=None):
+    raise RuntimeError(
+        "add_loss is not supported in Network class yet. Please file an issue "
+        "at https://github.com/tensorflow/tensorflow/issues/new if this is "
+        "important to you")
+
+  @property
+  def losses(self):
+    """Gather losses from `Layer`s in the `Network`.
+
+    Note that when executing eagerly, `Layer.losses` evaluates
+    regularizers. When using graph execution, variable regularization ops have
+    already been created and are simply returned here.
+
+    Returns:
+      A list of tensors.
+    """
+    layer_losses = []
+    for layer in self.layers:
+      layer_losses.extend(layer.losses)
+    return layer_losses
+
   # TODO(allenl): Support other Layer methods needed for graph mode, such as for
-  # losses and updates
+  # updates
 
 
 class Sequential(Network):
diff --git a/tensorflow/contrib/eager/python/network_test.py b/tensorflow/contrib/eager/python/network_test.py
index e7835a63e6db926aa2d4b6c76c681c8a301757bd..8e6b947e5cb28910bcb4877aa66150992a8d6445 100644
--- a/tensorflow/contrib/eager/python/network_test.py
+++ b/tensorflow/contrib/eager/python/network_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 import gc
 
 from tensorflow.contrib.eager.python import network
+from tensorflow.contrib.layers.python.layers import regularizers
 from tensorflow.python.eager import context
 from tensorflow.python.eager import function
 from tensorflow.python.eager import test
@@ -45,6 +46,22 @@ class MyNetwork(network.Network):
     return self.l1(x)
 
 
+class RegularizedNetwork(network.Network):
+
+  def __init__(self):
+    super(RegularizedNetwork, self).__init__()
+    self.l1 = self.track_layer(core.Dense(
+        1,
+        bias_regularizer=regularizers.l1_regularizer(2.0),
+        kernel_regularizer=regularizers.l1_regularizer(2.0)))
+    self.l2 = self.track_layer(core.Dense(
+        1,
+        bias_regularizer=regularizers.l1_regularizer(2.0)))
+
+  def call(self, values):
+    return self.l2(self.l1(values))
+
+
 class NetworkTest(test.TestCase):
 
   def _save_modify_load_network_built(self, net, global_step=None):
@@ -88,15 +105,13 @@ class NetworkTest(test.TestCase):
     result = net(constant_op.constant([[2.0]]))
     self.assertEqual(34.0, self.evaluate(result))
 
-  # TODO(akshayka): This test should be changed once an API for compiling
-  # `call` into a defun is implemented.
   def testReplacingNetworkCallWithDefun(self):
     net = MyNetwork(name="abcd")
+    net.call = function.defun(net.call)
     x = constant_op.constant([[2.0]])
     net(x)  # Force variables to be created.
     self.evaluate(net.trainable_variables[0].assign([[17.0]]))
 
-    net.call = function.defun(net.call)
     result = net(x)  # Build and execute the TensorFlow function
     self.assertEqual(34.0, self.evaluate(result))
 
@@ -484,6 +499,18 @@ class NetworkTest(test.TestCase):
       _check_op_prefixes(expected_prefix="my_network_1/dense/",
                          checked_ops=checked_ops)
 
+  @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
+  def testVariableRegularizers(self):
+    net = RegularizedNetwork()
+    net(constant_op.constant([[1.]]))
+    self.evaluate(net.variables[0].assign([[2.]]))
+    self.evaluate(net.variables[1].assign([3.]))
+    self.evaluate(net.variables[2].assign([[-2.]]))
+    self.evaluate(net.variables[3].assign([4.]))
+    self.assertAllEqual([4., 6., 8.], self.evaluate(net.losses))
+    self.evaluate(net.variables[3].assign([5.]))
+    self.assertAllEqual([4., 6., 10.], self.evaluate(net.losses))
+
   @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
   def testDuplicateNameError(self):
     one = constant_op.constant([[1.]])
diff --git a/tensorflow/contrib/eager/python/summary_writer.py b/tensorflow/contrib/eager/python/summary_writer.py
deleted file mode 100644
index 5d8c41b545b3c9fd03af85f302ba05a394f085a4..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/eager/python/summary_writer.py
+++ /dev/null
@@ -1,242 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""TensorBoard Summary Writer for TensorFlow Eager Execution."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import uuid
-
-from tensorflow.contrib.summary import gen_summary_ops
-from tensorflow.python.eager import context
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import resource_variable_ops
-from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import summary_op_util
-from tensorflow.python.ops import variable_scope
-
-
-def _maybe_cpu(v):
-  if isinstance(v, (ops.EagerTensor, ops.Tensor)):
-    return v.cpu()
-  else:
-    return v
-
-
-def _summary_writer_function(name, tensor, function, family=None):
-  def record():
-    with summary_op_util.summary_scope(
-        name, family, values=[tensor]) as (tag, scope):
-      function(tag, scope)
-      return True
-  return record
-
-
-class SummaryWriter(object):
-  """Writes summaries for TensorBoard, compatible with eager execution.
-
-  This class is the supported way of writing TensorBoard summaries under
-  eager execution.
-  """
-
-  _CPU_DEVICE = "cpu:0"
-
-  def __init__(self,
-               logdir,
-               max_queue=10,
-               flush_secs=120,
-               filename_suffix=""):
-    """Summary writer for TensorBoard, compatible with eager execution.
-
-    If necessary, multiple instances of `SummaryWriter` can be created, with
-    distinct `logdir`s and `name`s. Each `SummaryWriter` instance will retain
-    its independent `global_step` counter and data writing destination.
-
-    Example:
-    ```python
-    writer = tfe.SummaryWriter("my_model")
-
-    # ... Code that sets up the model and data batches ...
-
-    for _ in xrange(train_iters):
-      loss = model.train_batch(batch)
-      writer.scalar("loss", loss)
-      writer.step()
-    ```
-
-    Args:
-      logdir: Directory in which summary files will be written.
-      max_queue: Number of summary items to buffer before flushing to
-        filesystem. If 0, summaries will be flushed immediately.
-      flush_secs: Number of secondsbetween forced commits to disk.
-      filename_suffix: Suffix of the event protobuf files in which the summary
-        data are stored.
-
-    Raises:
-      ValueError: If this constructor is called not under eager execution.
-    """
-    # TODO(apassos, ashankar): Make this class and the underlying
-    # contrib.summary_ops compatible with graph model and remove this check.
-    if not context.in_eager_mode():
-      raise ValueError(
-          "Use of SummaryWriter is currently supported only with eager "
-          "execution enabled. File an issue at "
-          "https://github.com/tensorflow/tensorflow/issues/new to express "
-          "interest in fixing this.")
-
-    # TODO(cais): Consider adding name keyword argument, which if None or empty,
-    # will register the global global_step that training_util.get_global_step()
-    # can find.
-    with context.device(self._CPU_DEVICE):
-      self._name = uuid.uuid4().hex
-      self._global_step = 0
-      self._global_step_tensor = variable_scope.get_variable(
-          "global_step/summary_writer/" + self._name,
-          shape=[], dtype=dtypes.int64,
-          initializer=init_ops.zeros_initializer())
-      self._global_step_dirty = False
-      self._resource = gen_summary_ops.summary_writer(shared_name=self._name)
-      gen_summary_ops.create_summary_file_writer(
-          self._resource, logdir, max_queue, flush_secs, filename_suffix)
-      # Delete the resource when this object is deleted
-      self._resource_deleter = resource_variable_ops.EagerResourceDeleter(
-          handle=self._resource, handle_device=self._CPU_DEVICE)
-
-  def step(self):
-    """Increment the global step counter of this SummaryWriter instance."""
-    self._global_step += 1
-    self._global_step_dirty = True
-
-  @property
-  def global_step(self):
-    """Obtain the current global_step value of this SummaryWriter instance.
-
-    Returns:
-      An `int` representing the current value of the global_step of this
-       `SummaryWriter` instance.
-    """
-    return self._global_step
-
-  def _update_global_step_tensor(self):
-    with context.device(self._CPU_DEVICE):
-      if self._global_step_dirty:
-        self._global_step_dirty = False
-        return state_ops.assign(self._global_step_tensor, self._global_step)
-      else:
-        return self._global_step_tensor
-
-  def generic(self, name, tensor, metadata, family=None):
-    """Write a generic-type summary.
-
-    Args:
-      name: A name for the generated node. Will also serve as the series name in
-        TensorBoard.
-      tensor: A `Tensor` or compatible value type containing the value of the
-        summary.
-      metadata: Metadata about the summary.
-      family: Optional; if provided, used as the prefix of the summary tag name,
-        which controls the tab name used for display on Tensorboard.
-    """
-    with context.device(self._CPU_DEVICE):
-      with summary_op_util.summary_scope(
-          name, family, values=[tensor]) as (tag, scope):
-        gen_summary_ops.write_summary(
-            self._resource,
-            self._update_global_step_tensor(),
-            _maybe_cpu(tensor),
-            tag,
-            _maybe_cpu(metadata),
-            name=scope)
-
-  def scalar(self, name, tensor, family=None):
-    """Write a scalar summary.
-
-    Args:
-      name: A name for the generated node. Will also serve as the series name in
-        TensorBoard.
-      tensor: A real numeric `Tensor` or compatible value type containing a
-        single value.
-      family: Optional; if provided, used as the prefix of the summary tag name,
-        which controls the tab name used for display on Tensorboard.
-
-    Returns:
-      A summary writer function for scalars.
-    """
-    with context.device(self._CPU_DEVICE):
-      with summary_op_util.summary_scope(
-          name, family, values=[tensor]) as (tag, scope):
-        gen_summary_ops.write_scalar_summary(
-            self._resource, self._update_global_step_tensor(),
-            tag, _maybe_cpu(tensor), name=scope)
-
-  def histogram(self, name, tensor, family=None):
-    """Write a histogram summary.
-
-    Args:
-      name: A name for the generated node. Will also serve as a series name in
-        TensorBoard.
-      tensor: A real numeric `Tensor` or compatible value type. Any shape.
-        Values to use to build the histogram.
-      family: Optional; if provided, used as the prefix of the summary tag name,
-        which controls the tab name used for display on Tensorboard.
-    """
-    with context.device(self._CPU_DEVICE):
-      with summary_op_util.summary_scope(
-          name, family, values=[tensor]) as (tag, scope):
-        gen_summary_ops.write_histogram_summary(
-            self._resource, self._update_global_step_tensor(),
-            tag, _maybe_cpu(tensor), name=scope)
-
-  def image(self, name, tensor, bad_color=None, max_images=3, family=None):
-    """Write an image summary."""
-    with context.device(self._CPU_DEVICE):
-      if bad_color is None:
-        bad_color_ = constant_op.constant([255, 0, 0, 255], dtype=dtypes.uint8)
-      with summary_op_util.summary_scope(
-          name, family, values=[tensor]) as (tag, scope):
-        gen_summary_ops.write_image_summary(
-            self._resource, self._update_global_step_tensor(),
-            tag, _maybe_cpu(tensor), bad_color_, max_images,
-            name=scope)
-
-  def audio(self, name, tensor, sample_rate, max_outputs, family=None):
-    """Write an audio summary.
-
-    Args:
-      name: A name for the generated node. Will also serve as a series name in
-        TensorBoard.
-      tensor: A 3-D `float32` `Tensor` of shape `[batch_size, frames, channels]`
-        or a 2-D `float32` `Tensor` of shape `[batch_size, frames]`, or
-        compatible value type.
-      sample_rate: A Scalar `float32` `Tensor` indicating the sample rate of the
-        signal in hertz.
-      max_outputs: Max number of batch elements to generate audio for.
-      family: Optional; if provided, used as the prefix of the summary tag name,
-        which controls the tab name used for display on Tensorboard.
-    """
-    with context.device(self._CPU_DEVICE):
-      with summary_op_util.summary_scope(
-          name, family, values=[tensor]) as (tag, scope):
-        gen_summary_ops.write_audio_summary(
-            self._resource, self._update_global_step_tensor(),
-            tag,
-            _maybe_cpu(tensor),
-            sample_rate=_maybe_cpu(sample_rate),
-            max_outputs=max_outputs,
-            name=scope)
diff --git a/tensorflow/contrib/eager/python/summary_writer_test.py b/tensorflow/contrib/eager/python/summary_writer_test.py
deleted file mode 100644
index 5ebb36d04fcba8f4558fa1c09716314af42f559f..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/eager/python/summary_writer_test.py
+++ /dev/null
@@ -1,150 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Unit tests for eager execution SummaryWriter."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import shutil
-import tempfile
-
-import numpy as np
-
-from tensorflow.contrib.eager.python import summary_writer
-from tensorflow.core.util import event_pb2
-from tensorflow.python.eager import context
-from tensorflow.python.eager import test
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.lib.io import tf_record
-from tensorflow.python.platform import gfile
-
-
-class SummaryWriterTest(test.TestCase):
-
-  def setUp(self):
-    super(SummaryWriterTest, self).setUp()
-    self._test_device = "gpu:0" if context.num_gpus() else "cpu:0"
-    self._tmp_logdir = tempfile.mkdtemp()
-    with context.device(self._test_device):
-      # Use max_queue=0 so that summaries are immediately flushed to filesystem,
-      # making testing easier.
-      self._writer = summary_writer.SummaryWriter(self._tmp_logdir, max_queue=0)
-
-  def tearDown(self):
-    if os.path.isdir(self._tmp_logdir):
-      shutil.rmtree(self._tmp_logdir)
-    super(SummaryWriterTest, self).tearDown()
-
-  def _readLastEvent(self, logdir=None):
-    if not logdir:
-      logdir = self._tmp_logdir
-    files = [f for f in gfile.ListDirectory(logdir)
-             if not gfile.IsDirectory(os.path.join(logdir, f))]
-    file_path = os.path.join(logdir, files[0])
-    records = list(tf_record.tf_record_iterator(file_path))
-    event = event_pb2.Event()
-    event.ParseFromString(records[-1])
-    return event
-
-  def testGlobalStep(self):
-    with context.device(self._test_device):
-      orig_step = self._writer.global_step
-      self._writer.step()
-      self.assertEqual(orig_step + 1, self._writer.global_step)
-      self.assertEqual(orig_step + 1, self._writer.global_step)
-      self._writer.step()
-      self._writer.step()
-      self.assertEqual(orig_step + 3, self._writer.global_step)
-
-  def testGenericSummary(self):
-    with context.device(self._test_device):
-      x = constant_op.constant(1337.0)
-      with context.device("cpu:0"):
-        metadata = constant_op.constant("foo")
-      self._writer.generic("x", x, metadata)
-      event = self._readLastEvent()
-      self.assertEqual("x", event.summary.value[0].tag)
-
-  def testScalarSummary(self):
-    with context.device(self._test_device):
-      x = constant_op.constant(1337.0)
-      self._writer.scalar("x", x)
-      event = self._readLastEvent()
-      self.assertTrue("x", event.summary.value[0].tag)
-      self.assertEqual(1337.0, event.summary.value[0].simple_value)
-
-  def testHistogramSummary(self):
-    with context.device(self._test_device):
-      y = constant_op.constant([1.0, 3.0, 3.0, 7.0])
-      self._writer.histogram("y", y)
-      event = self._readLastEvent()
-      self.assertEqual("y", event.summary.value[0].tag)
-      self.assertTrue(event.summary.value[0].histo)
-
-  def testImageSummary(self):
-    with context.device(self._test_device):
-      a = constant_op.constant([[10.0, 20.0], [-20.0, -10.0]])
-      self._writer.histogram("image1", a)
-      event = self._readLastEvent()
-      self.assertEqual("image1", event.summary.value[0].tag)
-      self.assertTrue(event.summary.value[0].image)
-
-  def testAudioSummary(self):
-    with context.device(self._test_device):
-      w = constant_op.constant(np.random.rand(3, 10, 2), dtype=dtypes.float32)
-      fs = constant_op.constant(44100.0, dtype=dtypes.float32)
-      max_outputs = 1
-      self._writer.audio("audio1", w, fs, max_outputs)
-      event = self._readLastEvent()
-      self.assertTrue(event.summary.value[0].audio)
-
-  def testTwoSummaryWritersGlobalStepsWorkWithoutCrosstalk(self):
-    tmp_logdir2 = os.path.join(self._tmp_logdir, "_writer2_")
-    writer2 = summary_writer.SummaryWriter(tmp_logdir2, max_queue=0)
-
-    self.assertEqual(0, writer2.global_step)
-    self._writer.step()
-    self.assertEqual(0, writer2.global_step)
-    writer2.step()
-    writer2.step()
-    writer2.step()
-    self.assertEqual(3, writer2.global_step)
-
-    x = constant_op.constant(1337.0)
-    writer_orig_step = self._writer.global_step
-    self._writer.step()
-    self._writer.scalar("x", x)
-
-    event = self._readLastEvent()
-    self.assertEqual(writer_orig_step + 1, event.step)
-
-    writer2.scalar("x", x)
-    event = self._readLastEvent(tmp_logdir2)
-    self.assertEqual(3, event.step)
-
-    self._writer.step()
-    self._writer.scalar("x", x)
-
-    event = self._readLastEvent()
-    self.assertEqual(writer_orig_step + 2, event.step)
-
-
-# TODO(cais): Add performance benchmark for SummaryWriter.
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/contrib/eager/python/tfe.py b/tensorflow/contrib/eager/python/tfe.py
index 1697c879def8af5c05f3c9b11d318d570785d6de..712d1cb94d2f565bf6216f6c07a45d3d855efe9c 100644
--- a/tensorflow/contrib/eager/python/tfe.py
+++ b/tensorflow/contrib/eager/python/tfe.py
@@ -23,7 +23,9 @@ To use, at program startup, call `tfe.enable_eager_execution()`.
 @@list_devices
 @@num_gpus
 
+@@py_func
 @@defun
+@@make_template
 @@implicit_gradients
 @@implicit_value_and_gradients
 @@gradients_function
@@ -50,6 +52,7 @@ To use, at program startup, call `tfe.enable_eager_execution()`.
 @@EagerVariableStore
 
 @@Network
+@@Sequential
 @@save_network_checkpoint
 @@restore_network_checkpoint
 
@@ -74,6 +77,7 @@ from __future__ import print_function
 from tensorflow.contrib.eager.python import metrics
 from tensorflow.contrib.eager.python.datasets import Iterator
 from tensorflow.contrib.eager.python.network import Network
+from tensorflow.contrib.eager.python.network import Sequential
 from tensorflow.contrib.eager.python.network import save_network_checkpoint
 from tensorflow.contrib.eager.python.network import restore_network_checkpoint
 from tensorflow.contrib.eager.python.saver import get_optimizer_variables
@@ -101,9 +105,13 @@ from tensorflow.python.framework.test_util import IsolateTest
 from tensorflow.python.framework.test_util import run_in_graph_and_eager_modes as run_test_in_graph_and_eager_modes
 from tensorflow.python.ops.resource_variable_ops import ResourceVariable as Variable
 from tensorflow.python.ops.variable_scope import EagerVariableStore
+from tensorflow.python.ops import script_ops
+from tensorflow.python.ops import template
 from tensorflow.python.util.all_util import remove_undocumented
 
+py_func = script_ops.eager_py_func
 defun = function.defun
+make_template = template.make_template_internal
 implicit_gradients = backprop.implicit_grad
 implicit_value_and_gradients = backprop.implicit_val_and_grad
 gradients_function = backprop.gradients_function
diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD
index 8395e2db5ec0ce6f4adae5fa2467159549e70143..cdbe05e4d2d7117c5acb12d679f359a9db17c9cc 100644
--- a/tensorflow/contrib/estimator/BUILD
+++ b/tensorflow/contrib/estimator/BUILD
@@ -88,8 +88,9 @@ py_library(
 
 py_test(
     name = "dnn_linear_combined_test",
-    size = "small",
+    size = "medium",
     srcs = ["python/estimator/dnn_linear_combined_test.py"],
+    shard_count = 3,
     srcs_version = "PY2AND3",
     tags = [
         "no_pip",
@@ -204,6 +205,7 @@ py_test(
         "//tensorflow/python/estimator:metric_keys",
         "//tensorflow/python/estimator:model_fn",
         "//tensorflow/python/estimator:prediction_keys",
+        "//tensorflow/python/ops/losses",
         "//tensorflow/python/saved_model:signature_constants",
         "//third_party/py/numpy",
         "@six_archive//:six",
@@ -330,23 +332,24 @@ py_library(
         "//tensorflow/python:device",
         "//tensorflow/python:device_lib",
         "//tensorflow/python:framework_ops",
-        "//tensorflow/python:gradients",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:platform",
+        "//tensorflow/python:sparse_ops",
+        "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:state_ops",
         "//tensorflow/python:training",
         "//tensorflow/python:variable_scope",
-        "//tensorflow/python:variables",
         "//tensorflow/python/estimator:export_output",
         "//tensorflow/python/estimator:model_fn",
         "//tensorflow/python/estimator:util",
+        "//tensorflow/python/ops/losses",
         "@six_archive//:six",
     ],
 )
 
 cuda_py_test(
     name = "replicate_model_fn_test",
-    size = "small",
+    size = "medium",
     srcs = ["python/estimator/replicate_model_fn_test.py"],
     additional_deps = [
         "//tensorflow/python/estimator",
@@ -374,5 +377,9 @@ cuda_py_test(
         "//tensorflow/python:variables",
         ":replicate_model_fn",
     ],
-    tags = ["multi_gpu"],
+    tags = [
+        "manual",
+        "multi_gpu",
+        "notap",
+    ],
 )
diff --git a/tensorflow/contrib/estimator/__init__.py b/tensorflow/contrib/estimator/__init__.py
index 8191e06faed004df6927708ea04a67b90bd464de..0f75b77050b0ba4c752a6a74fdc7024170b6f318 100644
--- a/tensorflow/contrib/estimator/__init__.py
+++ b/tensorflow/contrib/estimator/__init__.py
@@ -26,6 +26,7 @@ from tensorflow.contrib.estimator.python.estimator.head import *
 from tensorflow.contrib.estimator.python.estimator.linear import *
 from tensorflow.contrib.estimator.python.estimator.logit_fns import *
 from tensorflow.contrib.estimator.python.estimator.multi_head import *
+from tensorflow.contrib.estimator.python.estimator.replicate_model_fn import *
 
 from tensorflow.python.util.all_util import remove_undocumented
 # pylint: enable=unused-import,line-too-long,wildcard-import
@@ -45,6 +46,8 @@ _allowed_symbols = [
     'call_logit_fn',
     'dnn_logit_fn_builder',
     'linear_logit_fn_builder',
+    'replicate_model_fn',
+    'TowerOptimizer',
 ]
 
 remove_undocumented(__name__, allowed_exception_list=_allowed_symbols)
diff --git a/tensorflow/contrib/estimator/python/estimator/extenders.py b/tensorflow/contrib/estimator/python/estimator/extenders.py
index 29c3c7358534f6e8ebbd31cbfcd7e34086d9b506..c99bf8badb35e6fffb7cae8761db9d402b8b3a8f 100644
--- a/tensorflow/contrib/estimator/python/estimator/extenders.py
+++ b/tensorflow/contrib/estimator/python/estimator/extenders.py
@@ -100,7 +100,7 @@ def add_metrics(estimator, metric_fn):
 
 
 def clip_gradients_by_norm(optimizer, clip_norm):
-  """Returns an optimizer which clips gradients before appliying them.
+  """Returns an optimizer which clips gradients before applying them.
 
   Example:
 
diff --git a/tensorflow/contrib/estimator/python/estimator/head.py b/tensorflow/contrib/estimator/python/estimator/head.py
index a9311a20f127d92f02a95b8b48082fc90850635a..d6ca33e18923a5dd996431b0ff87c6ad3bccea92 100644
--- a/tensorflow/contrib/estimator/python/estimator/head.py
+++ b/tensorflow/contrib/estimator/python/estimator/head.py
@@ -44,6 +44,7 @@ _DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
 def multi_class_head(n_classes,
                      weight_column=None,
                      label_vocabulary=None,
+                     loss_reduction=losses.Reduction.SUM,
                      name=None):
   """Creates a `_Head` for multi class classification.
 
@@ -76,6 +77,8 @@ def multi_class_head(n_classes,
       integer within [0, n_classes). If given, labels must be of string type and
       have any value in `label_vocabulary`. Note that errors will be raised if
       `label_vocabulary` is not provided but labels are strings.
+    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
+      reduce training loss over batch. Defaults to `SUM`.
     name: name of the head. If provided, summary and metrics keys will be
       suffixed by `"/" + name`. Also used as `name_scope` when creating ops.
 
@@ -83,17 +86,20 @@ def multi_class_head(n_classes,
     An instance of `_Head` for multi class classification.
 
   Raises:
-    ValueError: if `n_classes`, `metric_class_ids` or `label_keys` is invalid.
+    ValueError: if `n_classes`, `label_vocabulary` or `loss_reduction` is
+      invalid.
   """
   return head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint:disable=protected-access
       n_classes=n_classes,
       weight_column=weight_column,
       label_vocabulary=label_vocabulary,
+      loss_reduction=loss_reduction,
       name=name)
 
 
 def binary_classification_head(
-    weight_column=None, thresholds=None, label_vocabulary=None, name=None):
+    weight_column=None, thresholds=None, label_vocabulary=None,
+    loss_reduction=losses.Reduction.SUM, name=None):
   """Creates a `_Head` for single label binary classification.
 
   This head uses `sigmoid_cross_entropy_with_logits` loss.
@@ -128,6 +134,8 @@ def binary_classification_head(
       [0, 1]. If given, labels must be string type and have any value in
       `label_vocabulary`. Note that errors will be raised if `label_vocabulary`
       is not provided but labels are strings.
+    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
+      reduce training loss over batch. Defaults to `SUM`.
     name: name of the head. If provided, summary and metrics keys will be
       suffixed by `"/" + name`. Also used as `name_scope` when creating ops.
 
@@ -135,17 +143,20 @@ def binary_classification_head(
     An instance of `_Head` for binary classification.
 
   Raises:
-    ValueError: if `thresholds` contains a value outside of `(0, 1)`.
+    ValueError: If `thresholds` contains a value outside of `(0, 1)`.
+    ValueError: If `loss_reduction` is invalid.
   """
   return head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint:disable=protected-access
       weight_column=weight_column,
       thresholds=thresholds,
       label_vocabulary=label_vocabulary,
+      loss_reduction=loss_reduction,
       name=name)
 
 
 def regression_head(weight_column=None,
                     label_dimension=1,
+                    loss_reduction=losses.Reduction.SUM,
                     name=None):
   """Creates a `_Head` for regression using the `mean_squared_error` loss.
 
@@ -172,15 +183,21 @@ def regression_head(weight_column=None,
     label_dimension: Number of regression labels per example. This is the size
       of the last dimension of the labels `Tensor` (typically, this has shape
       `[batch_size, label_dimension]`).
+    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
+      reduce training loss over batch. Defaults to `SUM`.
     name: name of the head. If provided, summary and metrics keys will be
       suffixed by `"/" + name`. Also used as `name_scope` when creating ops.
 
   Returns:
     An instance of `_Head` for linear regression.
+
+  Raises:
+    ValueError: If `label_dimension` or `loss_reduction` is invalid.
   """
   return head_lib._regression_head_with_mean_squared_error_loss(  # pylint:disable=protected-access
       weight_column=weight_column,
       label_dimension=label_dimension,
+      loss_reduction=loss_reduction,
       name=name)
 
 
@@ -188,6 +205,7 @@ def multi_label_head(n_classes,
                      weight_column=None,
                      thresholds=None,
                      label_vocabulary=None,
+                     loss_reduction=losses.Reduction.SUM,
                      loss_fn=None,
                      name=None):
   """Creates a `_Head` for multi-label classification.
@@ -237,6 +255,8 @@ def multi_label_head(n_classes,
       [0, n_classes) or multi-hot Tensor. If given, labels must be SparseTensor
       string type and have any value in `label_vocabulary`. Also there will be
       errors if vocabulary is not provided and labels are string.
+    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
+      reduce training loss over batch. Defaults to `SUM`.
     loss_fn: Optional loss function.
     name: name of the head. If provided, summary and metrics keys will be
       suffixed by `"/" + name`. Also used as `name_scope` when creating ops.
@@ -245,7 +265,8 @@ def multi_label_head(n_classes,
     An instance of `_Head` for multi-label classification.
 
   Raises:
-    ValueError: if `n_classes`, `thresholds`, or `loss_fn` is invalid.
+    ValueError: if `n_classes`, `thresholds`, `loss_reduction` or `loss_fn` is
+    invalid.
   """
   thresholds = tuple(thresholds) if thresholds else tuple()
   if n_classes is None or n_classes < 2:
@@ -267,9 +288,13 @@ def multi_label_head(n_classes,
           'Given: {}'.format(n_classes, len(label_vocabulary)))
   if loss_fn:
     _validate_loss_fn_args(loss_fn)
+  if (loss_reduction not in losses.Reduction.all() or
+      loss_reduction == losses.Reduction.NONE):
+    raise ValueError('Invalid loss_reduction: {}'.format(loss_reduction))
   return _MultiLabelHead(
       n_classes=n_classes, weight_column=weight_column, thresholds=thresholds,
-      label_vocabulary=label_vocabulary, loss_fn=loss_fn, name=name)
+      label_vocabulary=label_vocabulary, loss_reduction=loss_reduction,
+      loss_fn=loss_fn, name=name)
 
 
 class _MultiLabelHead(head_lib._Head):  # pylint:disable=protected-access
@@ -280,12 +305,14 @@ class _MultiLabelHead(head_lib._Head):  # pylint:disable=protected-access
                weight_column=None,
                thresholds=None,
                label_vocabulary=None,
+               loss_reduction=losses.Reduction.SUM,
                loss_fn=None,
                name=None):
     self._n_classes = n_classes
     self._weight_column = weight_column
     self._thresholds = thresholds
     self._label_vocabulary = label_vocabulary
+    self._loss_reduction = loss_reduction
     self._loss_fn = loss_fn
     self._name = name
 
@@ -356,14 +383,12 @@ class _MultiLabelHead(head_lib._Head):  # pylint:disable=protected-access
           unweighted_loss, axis=-1, keep_dims=True)
     weights = head_lib._get_weights_and_check_match_logits(  # pylint:disable=protected-access,
         features=features, weight_column=self._weight_column, logits=logits)
-    weighted_sum_loss = losses.compute_weighted_loss(
-        unweighted_loss, weights=weights, reduction=losses.Reduction.SUM)
-    # _weights() can return 1.
-    example_weight_sum = math_ops.reduce_sum(
-        weights * array_ops.ones_like(unweighted_loss))
+    training_loss = losses.compute_weighted_loss(
+        unweighted_loss, weights=weights, reduction=self._loss_reduction)
     return head_lib.LossSpec(
-        weighted_sum_loss=weighted_sum_loss,
-        example_weight_sum=example_weight_sum,
+        training_loss=training_loss,
+        unreduced_loss=unweighted_loss,
+        weights=weights,
         processed_labels=processed_labels)
 
   def create_estimator_spec(
@@ -394,60 +419,60 @@ class _MultiLabelHead(head_lib._Head):  # pylint:disable=protected-access
                     export_output.PredictOutput(predictions))
             })
 
-      (weighted_sum_loss, example_weight_sum,
+      (training_loss, unreduced_loss, weights,
        processed_labels) = self.create_loss(
            features=features, mode=mode, logits=logits, labels=labels)
 
       # Eval.
       if mode == model_fn.ModeKeys.EVAL:
-        weights = head_lib._get_weights_and_check_match_logits(  # pylint:disable=protected-access,
-            features=features, weight_column=self._weight_column, logits=logits)
         return model_fn.EstimatorSpec(
             mode=model_fn.ModeKeys.EVAL,
             predictions=predictions,
-            loss=weighted_sum_loss,
+            loss=training_loss,
             eval_metric_ops=self._eval_metric_ops(
                 labels=processed_labels,
                 probabilities=probabilities,
                 weights=weights,
-                weighted_sum_loss=weighted_sum_loss,
-                example_weight_sum=example_weight_sum))
+                unreduced_loss=unreduced_loss))
 
       # Train.
       if train_op_fn is None:
         raise ValueError('train_op_fn can not be None.')
+      # Only summarize mean_loss for SUM reduction to preserve backwards
+      # compatibility. Otherwise skip it to avoid unnecessary computation.
+      if self._loss_reduction == losses.Reduction.SUM:
+        example_weight_sum = math_ops.reduce_sum(
+            weights * array_ops.ones_like(unreduced_loss))
+        mean_loss = training_loss / example_weight_sum
+      else:
+        mean_loss = None
     with ops.name_scope(''):
       summary.scalar(
           head_lib._summary_key(self._name, metric_keys.MetricKeys.LOSS),  # pylint:disable=protected-access
-          weighted_sum_loss)
-      summary.scalar(
-          head_lib._summary_key(  # pylint:disable=protected-access
-              self._name, metric_keys.MetricKeys.LOSS_MEAN),
-          weighted_sum_loss / example_weight_sum)
+          training_loss)
+      if mean_loss is not None:
+        summary.scalar(
+            head_lib._summary_key(  # pylint:disable=protected-access
+                self._name, metric_keys.MetricKeys.LOSS_MEAN),
+            mean_loss)
     return model_fn.EstimatorSpec(
         mode=model_fn.ModeKeys.TRAIN,
         predictions=predictions,
-        loss=weighted_sum_loss,
-        train_op=train_op_fn(weighted_sum_loss))
+        loss=training_loss,
+        train_op=train_op_fn(training_loss))
 
-  def _eval_metric_ops(self, labels, probabilities, weights, weighted_sum_loss,
-                       example_weight_sum):
+  def _eval_metric_ops(self, labels, probabilities, weights, unreduced_loss):
     """Returns a dict of metrics for eval_metric_ops."""
     with ops.name_scope(
         None, 'metrics',
-        [labels, probabilities, weights, weighted_sum_loss, example_weight_sum
-        ]):
+        [labels, probabilities, weights, unreduced_loss]):
       keys = metric_keys.MetricKeys
       metric_ops = {
           # Estimator already adds a metric for loss.
           head_lib._summary_key(self._name, keys.LOSS_MEAN):  # pylint:disable=protected-access
               metrics_lib.mean(
-                  # Both values and weights here are reduced, scalar Tensors.
-                  # values is the actual mean we want, but we pass the scalar
-                  # example_weight_sum in order to return the correct update_op
-                  # alongside the value_op for streaming metrics.
-                  values=(weighted_sum_loss / example_weight_sum),
-                  weights=example_weight_sum,
+                  values=unreduced_loss,
+                  weights=weights,
                   name=keys.LOSS_MEAN),
           head_lib._summary_key(self._name, keys.AUC):  # pylint:disable=protected-access
               metrics_lib.auc(labels=labels, predictions=probabilities,
diff --git a/tensorflow/contrib/estimator/python/estimator/head_test.py b/tensorflow/contrib/estimator/python/estimator/head_test.py
index d1cf9090048470181818c573647923c9f5824dfa..e39e44541d2d30b1ecc9d4d41d0760decdc58168 100644
--- a/tensorflow/contrib/estimator/python/estimator/head_test.py
+++ b/tensorflow/contrib/estimator/python/estimator/head_test.py
@@ -35,6 +35,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import string_ops
+from tensorflow.python.ops.losses import losses
 from tensorflow.python.platform import test
 from tensorflow.python.saved_model import signature_constants
 from tensorflow.python.training import monitored_session
@@ -132,6 +133,16 @@ class MultiLabelHead(test.TestCase):
         r'Length of label_vocabulary must be n_classes \(3\). Given: 2'):
       head_lib.multi_label_head(n_classes=3, label_vocabulary=['foo', 'bar'])
 
+  def test_invalid_loss_reduction(self):
+    with self.assertRaisesRegexp(
+        ValueError, r'Invalid loss_reduction: invalid_loss_reduction'):
+      head_lib.multi_label_head(
+          n_classes=3, loss_reduction='invalid_loss_reduction')
+    with self.assertRaisesRegexp(
+        ValueError, r'Invalid loss_reduction: none'):
+      head_lib.multi_label_head(
+          n_classes=3, loss_reduction=losses.Reduction.NONE)
+
   def test_loss_fn_arg_labels_missing(self):
     def _loss_fn(logits):
       del logits  # Unused
@@ -262,17 +273,17 @@ class MultiLabelHead(test.TestCase):
     labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
     # loss = labels * -log(sigmoid(logits)) +
     #        (1 - labels) * -log(1 - sigmoid(logits))
-    expected_weighted_sum_loss = np.sum(
+    expected_training_loss = np.sum(
         _sigmoid_cross_entropy(labels=labels, logits=logits))
-    actual_weighted_sum_loss = head.create_loss(
+    actual_training_loss = head.create_loss(
         features={'x': np.array(((42,),), dtype=np.int32)},
         mode=model_fn.ModeKeys.EVAL,
         logits=logits,
         labels=labels)[0]
     with self.test_session():
       _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(expected_weighted_sum_loss,
-                          actual_weighted_sum_loss.eval())
+      self.assertAllClose(expected_training_loss,
+                          actual_training_loss.eval())
 
   def test_eval_create_loss_large_logits(self):
     """Tests head.create_loss for eval mode and large logits."""
@@ -286,9 +297,9 @@ class MultiLabelHead(test.TestCase):
     # For large logits, this is approximated as:
     # loss = labels * (logits < 0) * (-logits) +
     #        (1 - labels) * (logits > 0) * logits
-    expected_weighted_sum_loss = np.sum(
+    expected_training_loss = np.sum(
         np.array([[(10. + 10.) / 2.], [(15. + 0.) / 2.]], dtype=np.float32))
-    actual_weighted_sum_loss = head.create_loss(
+    actual_training_loss = head.create_loss(
         features={'x': np.array(((42,),), dtype=np.int32)},
         mode=model_fn.ModeKeys.EVAL,
         logits=logits,
@@ -296,9 +307,7 @@ class MultiLabelHead(test.TestCase):
     with self.test_session():
       _initialize_variables(self, monitored_session.Scaffold())
       self.assertAllClose(
-          expected_weighted_sum_loss,
-          actual_weighted_sum_loss.eval(),
-          atol=1e-4)
+          expected_training_loss, actual_training_loss.eval(), atol=1e-4)
 
   def test_eval_create_loss_labels_wrong_shape(self):
     """Tests head.create_loss for eval mode when labels has the wrong shape."""
@@ -307,7 +316,7 @@ class MultiLabelHead(test.TestCase):
 
     logits = np.array([[-1., 1.], [-1.5, 1.]], dtype=np.float32)
     labels_placeholder = array_ops.placeholder(dtype=dtypes.int64)
-    actual_weighted_sum_loss = head.create_loss(
+    actual_training_loss = head.create_loss(
         features={'x': np.array(((42,),), dtype=np.int32)},
         mode=model_fn.ModeKeys.EVAL,
         logits=logits,
@@ -317,14 +326,14 @@ class MultiLabelHead(test.TestCase):
       with self.assertRaisesRegexp(
           errors.InvalidArgumentError,
           r'\[expected_labels_shape: \] \[2 2\] \[labels_shape: \] \[2 1\]'):
-        actual_weighted_sum_loss.eval({
+        actual_training_loss.eval({
             labels_placeholder: np.array([[1], [1]], dtype=np.int64)
         })
       with self.assertRaisesRegexp(
           errors.InvalidArgumentError,
           r'labels shape must be \[D0, D1, ... DN, 2\]\..*'
           r'\[Received shape: \] \[2\]'):
-        actual_weighted_sum_loss.eval({
+        actual_training_loss.eval({
             labels_placeholder: np.array([1, 1], dtype=np.int64)
         })
 
@@ -344,14 +353,14 @@ class MultiLabelHead(test.TestCase):
         return constant_op.constant(loss)
     head = head_lib.multi_label_head(n_classes=2, loss_fn=_loss_fn)
 
-    actual_weighted_sum_loss = head.create_loss(
+    actual_training_loss = head.create_loss(
         features={'x': np.array(((42,),), dtype=np.int32)},
         mode=model_fn.ModeKeys.EVAL,
         logits=logits_input,
         labels=labels_input)[0]
     with self.test_session():
       _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(np.sum(loss), actual_weighted_sum_loss.eval())
+      self.assertAllClose(np.sum(loss), actual_training_loss.eval())
 
   def test_eval_create_loss_loss_fn_wrong_shape(self):
     """Tests custom loss_fn that returns Tensor of unexpected shape."""
@@ -363,7 +372,7 @@ class MultiLabelHead(test.TestCase):
 
     logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
     labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
-    actual_weighted_sum_loss = head.create_loss(
+    actual_training_loss = head.create_loss(
         features={'x': np.array(((42,),), dtype=np.int32)},
         mode=model_fn.ModeKeys.EVAL,
         logits=logits,
@@ -374,7 +383,7 @@ class MultiLabelHead(test.TestCase):
           errors.InvalidArgumentError,
           r'loss_fn must return Tensor of shape \[batch_size, 1\]\. '
           r'Given: \] \[2\]'):
-        actual_weighted_sum_loss.eval()
+        actual_training_loss.eval()
 
   def test_eval_labels_none(self):
     """Tests that error is raised when labels is None."""
@@ -618,12 +627,44 @@ class MultiLabelHead(test.TestCase):
     # For large logits, this is approximated as:
     # loss = labels * (logits < 0) * (-logits) +
     #        (1 - labels) * (logits > 0) * logits
-    expected_weighted_sum_loss = np.sum(
-        np.array(
-            [[1. * (10. + 10.) / 2.], [2. * (15. + 0.) / 2.]],
-            dtype=np.float32))
-    expected_example_weight_sum = 1. + 2.
-    actual_weighted_sum_loss, actual_example_weight_sum, _ = head.create_loss(
+    expected_unreduced_loss = [[(10. + 10.) / 2.], [(15. + 0.) / 2.]]
+    expected_weights = [[1.], [2.]]
+    expected_training_loss = 1. * (10. + 10.) / 2. + 2. * (15. + 0.) / 2.
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
+        features={
+            'x': np.array(((42,),), dtype=np.int32),
+            'example_weights': weights
+        },
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels)
+    with self.test_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(
+          expected_training_loss, training_loss.eval(), atol=1e-4)
+      self.assertAllClose(
+          expected_unreduced_loss, unreduced_loss.eval(), atol=1e-4)
+      self.assertAllClose(expected_weights, actual_weights.eval())
+
+  def test_train_create_loss_loss_reduction(self):
+    """Tests head.create_loss with loss_reduction."""
+    n_classes = 2
+    head = head_lib.multi_label_head(
+        n_classes, weight_column='example_weights',
+        loss_reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS)
+
+    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
+    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
+    weights = np.array([[1.], [2.]], dtype=np.float32)
+    # loss = labels * -log(sigmoid(logits)) +
+    #        (1 - labels) * -log(1 - sigmoid(logits))
+    # For large logits, this is approximated as:
+    # loss = labels * (logits < 0) * (-logits) +
+    #        (1 - labels) * (logits > 0) * logits
+    expected_unreduced_loss = [[(10. + 10.) / 2.], [(15. + 0.) / 2.]]
+    expected_weights = [[1.], [2.]]
+    expected_training_loss = (1. * (10. + 10.) / 2. + 2. * (15. + 0.) / 2.) / 2.
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
         features={
             'x': np.array(((42,),), dtype=np.int32),
             'example_weights': weights
@@ -634,13 +675,10 @@ class MultiLabelHead(test.TestCase):
     with self.test_session():
       _initialize_variables(self, monitored_session.Scaffold())
       self.assertAllClose(
-          expected_weighted_sum_loss,
-          actual_weighted_sum_loss.eval(),
-          atol=1e-4)
+          expected_training_loss, training_loss.eval(), atol=1e-4)
       self.assertAllClose(
-          expected_example_weight_sum,
-          actual_example_weight_sum.eval(),
-          atol=1e-4)
+          expected_unreduced_loss, unreduced_loss.eval(), atol=1e-4)
+      self.assertAllClose(expected_weights, actual_weights.eval())
 
   def test_train_labels_none(self):
     """Tests that error is raised when labels is None."""
@@ -851,12 +889,15 @@ class MultiLabelHead(test.TestCase):
     labels = np.array([[[1, 0, 0], [1, 0, 0]],
                        [[0, 1, 1], [0, 1, 1]]], dtype=np.int64)
     weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32)
-    # loss = [[10 + 10 + 0, 0 + 0 + 10], [0 + 0 + 12, 12 + 12 + 0]] / 3
-    #      = [[20/3, 10/3], [4, 8]]
+    # unreduced_loss =
+    #     [[10 + 10 + 0, 0 + 0 + 10], [0 + 0 + 12, 12 + 12 + 0]] / 3
+    #   = [[20/3, 10/3], [4, 8]]
+    expected_unreduced_loss = [[[20./3.], [10./3.]], [[4.], [8.]]]
+    # weights are reshaped to [2, 2, 1] to match logits.
+    expected_weights = [[[1.], [1.5]], [[2.], [2.5]]]
     # weighted_sum_loss = 1*20/3 + 1.5*10/3 + 2*4 + 2.5*8 = 39.6667
-    expected_weighted_sum_loss = 39.6667
-    expected_example_weight_sum = np.sum(weights)
-    actual_weighted_sum_loss, actual_example_weight_sum, _ = head.create_loss(
+    expected_training_loss = 39.6667
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
         features={'weights': weights},
         mode=model_fn.ModeKeys.TRAIN,
         logits=logits,
@@ -865,11 +906,10 @@ class MultiLabelHead(test.TestCase):
     with self.test_session():
       _initialize_variables(self, monitored_session.Scaffold())
       self.assertAllClose(
-          expected_weighted_sum_loss, actual_weighted_sum_loss.eval(),
-          atol=atol)
+          expected_training_loss, training_loss.eval(), atol=atol)
       self.assertAllClose(
-          expected_example_weight_sum, actual_example_weight_sum.eval(),
-          atol=atol)
+          expected_unreduced_loss, unreduced_loss.eval(), atol=atol)
+      self.assertAllClose(expected_weights, actual_weights.eval())
 
   def test_multi_dim_weighted_train(self):
     """Logits and labels of shape [2, 2, 3], weights [2, 2]."""
diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head.py b/tensorflow/contrib/estimator/python/estimator/multi_head.py
index f2a6eae03ec021e5c28d48b3887870d8a057e077..0346ddc24bffd61068177f4622bd03be4acd53d9 100644
--- a/tensorflow/contrib/estimator/python/estimator/multi_head.py
+++ b/tensorflow/contrib/estimator/python/estimator/multi_head.py
@@ -186,40 +186,44 @@ class _MultiHead(head_lib._Head):  # pylint:disable=protected-access
       logits_dict = logits
     else:
       logits_dict = self._split_logits(logits)
-    weighted_sum_losses = []
-    example_weight_sums = []
+    training_losses = []
     labels_by_head = {}
-    for head in self._heads:
-      (weighted_sum_loss,
-       example_weight_sum, processed_labels) = head.create_loss(
+    unreduced_losses_by_head = {}
+    example_weights_by_head = {}
+    for i, head in enumerate(self._heads):
+      (training_loss, unreduced_loss,
+       weights, processed_labels) = head.create_loss(
            features, mode, logits_dict[head.name], labels[head.name])
-      weighted_sum_losses.append(weighted_sum_loss)
-      example_weight_sums.append(example_weight_sum)
+      training_losses.append(training_loss)
       labels_by_head[head.name] = processed_labels
+      if self._head_weights:
+        head_weight = self._head_weights[i]
+        unreduced_losses_by_head[head.name] = math_ops.multiply(
+            unreduced_loss, head_weight)
+        example_weights_by_head[head.name] = math_ops.multiply(
+            weights, head_weight)
+      else:
+        unreduced_losses_by_head[head.name] = unreduced_loss
+        example_weights_by_head[head.name] = weights
 
-    weighted_sum_losses = tuple(weighted_sum_losses)
-    with ops.name_scope('merge_losses',
-                        values=weighted_sum_losses + (self._head_weights or
-                                                      tuple())):
+    training_losses = tuple(training_losses)
+    with ops.name_scope(
+        'merge_losses',
+        values=training_losses + (self._head_weights or tuple())):
       if self._head_weights:
-        head_weighted_losses = []
-        head_weighted_example_weight_sums = []
-        for loss, example_weight_sum, weight in zip(weighted_sum_losses,
-                                                    example_weight_sums,
-                                                    self._head_weights):
-          head_weighted_losses.append(math_ops.multiply(loss, weight))
-          head_weighted_example_weight_sums.append(math_ops.multiply(
-              example_weight_sum, weight))
-        merged_weighted_sum_loss = math_ops.add_n(head_weighted_losses)
-        merged_example_weight_sum = math_ops.add_n(
-            head_weighted_example_weight_sums)
+        head_weighted_training_losses = []
+        for training_loss, head_weight in zip(
+            training_losses, self._head_weights):
+          head_weighted_training_losses.append(
+              math_ops.multiply(training_loss, head_weight))
+        merged_training_loss = math_ops.add_n(head_weighted_training_losses)
       else:
-        merged_weighted_sum_loss = math_ops.add_n(weighted_sum_losses)
-        merged_example_weight_sum = math_ops.add_n(example_weight_sums)
+        merged_training_loss = math_ops.add_n(training_losses)
 
     return head_lib.LossSpec(
-        weighted_sum_loss=merged_weighted_sum_loss,
-        example_weight_sum=merged_example_weight_sum,
+        training_loss=merged_training_loss,
+        unreduced_loss=unreduced_losses_by_head,
+        weights=example_weights_by_head,
         processed_labels=labels_by_head)
 
   def create_estimator_spec(
diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py
index 68f2d5d1cd53456f7dd82222e171b3619052321a..65ea89ba1b9236d0bf4d2de430fab168ef50bf97 100644
--- a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py
+++ b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py
@@ -370,7 +370,7 @@ class MultiHeadTest(test.TestCase):
         'head1': np.array([[1, 0], [1, 1]], dtype=np.int64),
         'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64),
     }
-    weighted_sum_loss, example_weight_sum, _ = multi_head.create_loss(
+    training_loss, unreduced_losses, weights, _ = multi_head.create_loss(
         features={
             'x': np.array(((42,),), dtype=np.int32),
             'weights1': weights1,
@@ -383,14 +383,23 @@ class MultiHeadTest(test.TestCase):
     with self.test_session():
       # loss of the first head is [[(10 + 10) / 2], [(15 + 0) / 2]]
       # = [10, 7.5]
-      # weighted_sum_loss = 1 * 10 + 2 * 7.5 = 25
+      # training_loss = 1 * 10 + 2 * 7.5 = 25
+      # head-weighted unreduced_loss = 1 * [10, 7.5]
+      self.assertAllClose(
+          [[10.], [7.5]], unreduced_losses['head1'].eval(), rtol=tol, atol=tol)
       # loss of the second head is [[(20 + 20 + 20) / 3], [(30 + 0 + 0) / 3]]
       # = [20, 10]
-      # weighted_sum_loss = 2 * 20 + 3 * 10 = 70
-      # head-weighted merge = 1 * 25 + 2 * 70 = 165
-      self.assertAllClose(165, weighted_sum_loss.eval(), rtol=tol, atol=tol)
-      # example_weight_sum = 1 * (1 + 2) + 2 * (2 + 3) = 13
-      self.assertAllClose(13., example_weight_sum.eval(), rtol=tol, atol=tol)
+      # training_loss = 2 * 20 + 3 * 10 = 70
+      # head-weighted unreduced_loss = 2 * [20, 10]
+      self.assertAllClose(
+          [[40.], [20.]], unreduced_losses['head2'].eval(), rtol=tol, atol=tol)
+      # head-weighted training_loss = 1 * 25 + 2 * 70 = 165
+      self.assertAllClose(165, training_loss.eval(), rtol=tol, atol=tol)
+      # head-weighted example weights
+      self.assertAllClose(
+          [[1.], [2.]], weights['head1'].eval(), rtol=tol, atol=tol)
+      self.assertAllClose(
+          [[4.], [6.]], weights['head2'].eval(), rtol=tol, atol=tol)
 
   def test_train_create_loss_logits_tensor(self):
     """Tests create_loss with logits Tensor."""
@@ -409,7 +418,7 @@ class MultiHeadTest(test.TestCase):
         'head1': np.array([[1, 0], [1, 1]], dtype=np.int64),
         'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64),
     }
-    weighted_sum_loss, example_weight_sum, _ = multi_head.create_loss(
+    training_loss, unreduced_losses, weights, _ = multi_head.create_loss(
         features={
             'x': np.array(((42,),), dtype=np.int32),
             'weights1': weights1,
@@ -422,14 +431,23 @@ class MultiHeadTest(test.TestCase):
     with self.test_session():
       # loss of the first head is [[(10 + 10) / 2], [(15 + 0) / 2]]
       # = [10, 7.5]
-      # weighted_sum_loss = 1 * 10 + 2 * 7.5 = 25
+      # training_loss = 1 * 10 + 2 * 7.5 = 25
+      # head-weighted unreduced_loss = 1 * [10, 7.5]
+      self.assertAllClose(
+          [[10.], [7.5]], unreduced_losses['head1'].eval(), rtol=tol, atol=tol)
       # loss of the second head is [[(20 + 20 + 20) / 3], [(30 + 0 + 0) / 3]]
       # = [20, 10]
-      # weighted_sum_loss = 2 * 20 + 3 * 10 = 70
-      # head-weighted merge = 1 * 25 + 2 * 70 = 165
-      self.assertAllClose(165, weighted_sum_loss.eval(), rtol=tol, atol=tol)
-      # example_weight_sum = 1 * (1 + 2) + 2 * (2 + 3) = 13
-      self.assertAllClose(13., example_weight_sum.eval(), rtol=tol, atol=tol)
+      # training_loss = 2 * 20 + 3 * 10 = 70
+      # head-weighted unreduced_loss = 2 * [20, 10]
+      self.assertAllClose(
+          [[40.], [20.]], unreduced_losses['head2'].eval(), rtol=tol, atol=tol)
+      # head-weighted training_loss = 1 * 25 + 2 * 70 = 165
+      self.assertAllClose(165, training_loss.eval(), rtol=tol, atol=tol)
+      # head-weighted example weights
+      self.assertAllClose(
+          [[1.], [2.]], weights['head1'].eval(), rtol=tol, atol=tol)
+      self.assertAllClose(
+          [[4.], [6.]], weights['head2'].eval(), rtol=tol, atol=tol)
 
   def test_train_create_loss_logits_tensor_multi_dim(self):
     """Tests create_loss with multi-dimensional logits of shape [2, 2, 5]."""
@@ -455,20 +473,17 @@ class MultiHeadTest(test.TestCase):
     # loss2 = (0-2)^2 + (1+2)^2 + (0-2)^2 + (0-2)^2 + (1+2)^2 + (0-2)^2 +
     #         (2+2)^2 + (2-2)^2 + (0+2)^2 + (2+2)^2 + (2-2)^2 + (0+2)^2
     #       = 74
-    expected_weighted_sum_loss = 28. + 74.
+    expected_training_loss = 28. + 74.
 
-    weighted_sum_loss, example_weight_sum, _ = multi_head.create_loss(
+    training_loss = multi_head.create_loss(
         features={},
         mode=model_fn.ModeKeys.TRAIN,
         logits=logits,
-        labels=labels)
+        labels=labels)[0]
     tol = 1e-3
     with self.test_session():
       self.assertAllClose(
-          expected_weighted_sum_loss, weighted_sum_loss.eval(),
-          rtol=tol, atol=tol)
-      self.assertAllClose(
-          2. * 2. * 5., example_weight_sum.eval(), rtol=tol, atol=tol)
+          expected_training_loss, training_loss.eval(), rtol=tol, atol=tol)
 
   def test_train_one_head(self):
     head1 = head_lib.multi_label_head(n_classes=2, name='head1')
diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
index d9c83aa86577aa129458c56887ff4668c103d0db..caa9dd83233b6b850385335fde96431271d85c3a 100644
--- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
+++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
@@ -23,6 +23,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from collections import defaultdict
+from contextlib import contextmanager
 import copy
 
 import six
@@ -41,20 +43,24 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops.losses import losses
 from tensorflow.python.platform import tf_logging
-from tensorflow.python.training import training_util
+from tensorflow.python.training import device_setter as device_setter_lib
+from tensorflow.python.training import optimizer as optimizer_lib
 
 
-def replicate_model_fn(model_fn, optimizer_fn, devices=None):
-  """Replicate `Estimator.model_fn` over GPUs within a single host.
+def replicate_model_fn(model_fn,
+                       loss_reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS,
+                       devices=None):
+  """Replicate `Estimator.model_fn` over GPUs.
 
   The given `model_fn` specifies a single forward pass of a model.  To replicate
   such a model over GPUs, each GPU gets its own instance of the forward pass
   (a.k.a. a tower).  The input features and labels get sharded into the chunks
-  that correspond to the number of GPUs.  Each tower computes its own loss based
+  that correspond to the number of GPUs.  Each tower computes a loss based
   on its input.  For each such loss, gradients are computed.  After that, the
-  available losses are summed to form aggregated loss.  The available
-  gradients are summed too.  Then, they update weights using the specified
+  available losses are aggregated to form aggregated loss.  Available
+  gradients are summed.  Then, they update weights using the specified
   optimizer.
 
   If `devices` are `None`, then all available GPUs are going to be used for
@@ -63,36 +69,38 @@ def replicate_model_fn(model_fn, optimizer_fn, devices=None):
 
   Two modes of local replication over available GPUs are supported:
     1)  If exactly 1 GPU is detected, then variables and operations are placed
-        onto GPU.
+        onto the GPU.
     2)  If more than 1 GPU is detected, then variables are going to be placed on
         the CPU.  Replicas of operations are placed on each individual GPU.
 
   Here is an example of how one might use their `model_fn` to run over GPUs:
     ```python
-       def optimizer_fn():
-         return tf.train.GradientDescentOptimizer(learning_rate=0.001)
        ...
        def model_fn(...):  # See `model_fn` in `Estimator`.
          loss = ...
+         optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
+         optimizer = tf.contrib.estimator.TowerOptimizer(optimizer)
          if mode == tf.estimator.ModeKeys.TRAIN:
            #  See the section below on `EstimatorSpec.train_op`.
-           return EstimatorSpec(mode=mode, loss=loss, train_op=tf.noop())
+           return EstimatorSpec(mode=mode, loss=loss,
+                                train_op=optimizer.minimize(loss))
 
          #  No change for `ModeKeys.EVAL` or `ModeKeys.PREDICT`.
          return EstimatorSpec(...)
        ...
        classifier = tf.estimator.Estimator(
-         model_fn=replicate_model_fn.replicate_model_fn(model_fn, optimizer_fn))
+         model_fn=tf.contrib.estimator.replicate_model_fn(model_fn))
     ```
 
+  Please see `DNNClassifierIntegrationTest` for an example with a canned
+  Estimator.
+
   On `EstimatorSpec.train_op`:
   `model_fn` returns `EstimatorSpec.train_op` for
   `tf.estimator.GraphKeys.TRAIN`. It is typically derived using an optimizer.
-  `replicate_model_fn` ignores the returned `EstimatorSpec.train_op`, so there
-  is no need to use an optimizer inside the user's `model_fn`.  The
-  `EstimatorSpec.loss` subgraph is going to be executed, while
-  `EstimatorSpec.train_op` isn't going to be executed. One could pass
-  `train_op=tf.noop()` to `EstimatorSpec`.
+  Towers are expected to populate it in the same way.  Gradients from all towers
+  are reduced and applied in the last tower.  To achieve that in the case of
+  multiple towers, `TowerOptimizer` needs to be used.  See `TowerOptimizer`.
 
   On sharding input features and labels:
   Input features and labels are split for consumption by each tower. They are
@@ -101,7 +109,7 @@ def replicate_model_fn(model_fn, optimizer_fn, devices=None):
   On reduction algorithms:
   Certain algorithms were chosen for aggregating results of computations on
   multiple towers:
-    - Losses from all towers are reduced using sum.
+    - Losses from all towers are reduced according to `loss_reduction`.
     - Gradients are reduced using sum for each trainable variable.
     - `eval_metrics_ops` are reduced per metric using `reduce_mean`.
     - `EstimatorSpec.predictions` and `EstimatorSpec.export_outputs` are
@@ -109,65 +117,332 @@ def replicate_model_fn(model_fn, optimizer_fn, devices=None):
     - For all other fields of `EstimatorSpec` the values of the first tower
       are taken.
 
-  On replication of variables:
+  On distribution of variables:
   Variables are not duplicated between towers.  Instead, they are placed on a
   single device as defined above and shared across towers.
 
-  Other current limitations:
-    - `predictions` are not supported for `ModeKeys.EVAL`.  That is required for
-      `tf.contrib.estimator.add_metrics`.
+  On overhead:
+  If only one device is specified, then aggregation of loss and gradients
+  doesn't happen. Replication consists of placing `model_fn` onto the
+  specified device.
+
+  On current limitations:
+    - `predictions` are not supported for `ModeKeys.EVAL`.  They are required
+       for `tf.contrib.estimator.add_metrics`.
 
   Args:
     model_fn: `model_fn` as defined in `Estimator`.  See the section above about
       the train_op argument of `EstimatorSpec`.
-    optimizer_fn: a function that returns an optimizer instance.  The function
-      may accept one `params` argument.  This is the `params` argument as
-      defined by `Estimator`.  See  the `Estimator` documentation for details.
+    loss_reduction: controls whether losses are summed or averaged.
     devices: Optional list of devices to replicate the model across.  This
       argument can be used to replice only on the subset of available GPUs.
       If `None`, then all available GPUs are going to be used for replication.
       If no GPUs are available, then the model is going to be placed on the CPU.
 
+  Raises:
+    ValueError: if there is no `loss_reduction` or if TowerOptimizer is
+      mis-used.
+
   Returns:
     A replicated version of the supplied `model_fn`. Returned function that
       conforms to the requirements of `Estimator`'s `model_fn` and can be used
       instead of the supplied `model_fn`.
   """
+  return _replicate_model_fn_with_mode(
+      model_fn,
+      loss_reduction,
+      devices,
+      # TODO(isaprykin): Query the system configuration to choose modes other
+      # than `SHARED_LOCAL_PARAMETER_SERVER`, even though it is often
+      # appropriate.
+      mode=_VariableDistributionMode.SHARED_LOCAL_PARAMETER_SERVER)
+
+
+class _VariableDistributionMode(object):
+  """Modes for variable distribution used for forcing a particular one.
+
+  Forcing a mode is meant for performance experimentation purposes rather than
+  for general use cases.
+  """
+
+  SHARED_LOCAL_PARAMETER_SERVER = 1
+  """Variables are placed on a single device and shared across all devices.
+
+  Two ways to achieve this distribution over available GPUs are supported:
+    1)  If exactly 1 GPU is detected, then variables and operations are placed
+        onto GPU.
+    2)  If more than 1 GPU is detected, then variables are going to be placed on
+        the CPU.  Replicas of operations are placed on each individual GPU.
+  """
+
+  SHARED_ROUND_ROBIN = 2
+  """Variables are placed on all devices in a round-robin fashion.
+
+  Every subsequent variable is placed on the next device.  There is only one
+  copy of each variable that is shared across all devices.
+  """
+
+
+def _replicate_model_fn_with_mode(
+    model_fn,
+    loss_reduction,
+    devices=None,
+    mode=_VariableDistributionMode.SHARED_LOCAL_PARAMETER_SERVER):
+  """A version of `replicate_model_fn` that allows to specify a `mode`."""
+  if loss_reduction == losses.Reduction.NONE:
+    raise ValueError('Tower losses need to be reduced in some way, yet {} '
+                     'reduction is specified.'.format(loss_reduction))
   if not devices:
     devices = _get_local_devices('GPU') or _get_local_devices('CPU')
 
   is_a_single_gpu_case = len(devices) == 1 and 'GPU' in devices[0]
-  local_ps_device = '/{}:0'.format('GPU' if is_a_single_gpu_case else 'CPU')
+  consolidation_device = devices[0] if is_a_single_gpu_case else '/CPU:0'
 
-  tf_logging.info('Replicating the `model_fn` across {}.  Local parameter '
-                  'server device is going to be {}.'.format(
-                      devices, local_ps_device))
+  ps_devices = [consolidation_device]
+  if mode == _VariableDistributionMode.SHARED_ROUND_ROBIN:
+    ps_devices = devices
+
+  tf_logging.info('Replicating the `model_fn` across {}.  Variables are going '
+                  'to be placed on {}.  Consolidation device is going to be {}.'
+                  .format(devices, ps_devices, consolidation_device))
+
+  def single_device_model_fn(features, labels, mode, params=None, config=None):
+    """`model_fn` on a single device without reduction overhead."""
+    return _get_loss_towers(
+        model_fn=model_fn,
+        mode=mode,
+        features=[features],
+        labels=[labels],
+        params=params,
+        loss_reduction=loss_reduction,
+        config=config,
+        devices=devices,
+        local_ps_devices=ps_devices)[0]  # One device, so one spec is out.
 
   def replicated_model_fn(features, labels, mode, params=None, config=None):
     """Replicated version of `model_fn` to be used instead."""
     feature_shards, label_shards = _split_batch(
-        features, labels, len(devices), device=local_ps_device)
+        features, labels, len(devices), device=consolidation_device)
     tower_specs = _get_loss_towers(
         model_fn=model_fn,
         mode=mode,
         features=feature_shards,
         labels=label_shards,
         params=params,
+        loss_reduction=loss_reduction,
         config=config,
         devices=devices,
-        local_ps_device=local_ps_device)
+        local_ps_devices=ps_devices)
 
     if mode == model_fn_lib.ModeKeys.TRAIN:
-      train_op = _minimize_towers(tower_specs,
-                                  _call_optimizer_fn(optimizer_fn, params))
+      train_op = _minimize_towers(tower_specs)
       return _train_spec(
-          tower_specs, train_op, aggregation_device=local_ps_device)
+          tower_specs, train_op, aggregation_device=consolidation_device)
     elif mode == model_fn_lib.ModeKeys.EVAL:
-      return _eval_spec(tower_specs, aggregation_device=local_ps_device)
+      return _eval_spec(tower_specs, aggregation_device=consolidation_device)
     elif mode == model_fn_lib.ModeKeys.PREDICT:
-      return _predict_spec(tower_specs, aggregation_device=local_ps_device)
+      return _predict_spec(tower_specs, aggregation_device=consolidation_device)
+
+  if len(devices) == 1:
+    return single_device_model_fn
+  else:
+    return replicated_model_fn
+
+
+class TowerOptimizer(optimizer_lib.Optimizer):
+  """Gathers gradients from all towers and reduces them in the last one."""
+
+  COLLECTION_FOR_GRAPH_STATES = 'replicate_model_fn_graph_states'
 
-  return replicated_model_fn
+  def __init__(self, optimizer_or_optimizer_fn):
+    """Wrap an existing optimizer for gathering gradients across towers.
+
+    Each invocation of model_fn has to call the same optimizers in the same
+    order.
+
+    Multiple optimizers that use the same or different losses are supported.
+
+    If TowerOptimizer is used but `replicate_model_fn` isn't, then no
+    aggregation will happen.  All calls will simply be forwarded to the
+    underlying optimizer. The behavior is similar if there is only one tower.
+
+    If TowerOptimizer is used together with SyncReplicasOptimizer that wraps
+    the user's optimizer, then it's the SyncReplicasOptimizer that needs to be
+    wrapped with TowerOptimizer.
+
+    Args:
+      optimizer_or_optimizer_fn: an instance of optimizer to wrap.  That
+        instance is going to be used for optimizer-specific logic.  This can
+        also be a no-argument function that returns such an optimizer instance.
+    """
+    self._optimizer_or_optimizer_fn = optimizer_or_optimizer_fn
+
+  @staticmethod
+  def has_been_used():
+    return TowerOptimizer._graph_state().has_tower_optimizer_been_used
+
+  def get_slot(self, *args, **kwargs):
+    return self._get_optimizer().get_slot(*args, **kwargs)
+
+  def get_slot_names(self, *args, **kwargs):
+    return self._get_optimizer().get_slot_names(*args, **kwargs)
+
+  def get_name(self, *args, **kwargs):
+    return self._get_optimizer().get_name(*args, **kwargs)
+
+  def variables(self, *args, **kwargs):
+    return self._get_optimizer().variables(*args, **kwargs)
+
+  def compute_gradients(self, loss, *args, **kwargs):
+    """Compute gradients, but first, if needed, scale the loss."""
+    loss = _scale_loss(loss,
+                       self._graph_state().loss_reduction,
+                       self._graph_state().number_of_towers)
+    return self._get_optimizer().compute_gradients(loss, *args, **kwargs)
+
+  def apply_gradients(self, grads_and_vars, global_step=None, **kwargs):
+    """Collect gradients updates to apply them with the last tower."""
+    if self._graph_state().number_of_towers == 1:
+      # Avoid the overhead of reduction if there's only one tower.
+      #
+      # There assumed to be only one tower if aggregation-related methods were
+      # not called by `_get_loss_towers`, for example if the model_fn uses
+      # TowerEstimator, but `replicate_model_fn` isn't used.
+      return self._get_optimizer().apply_gradients(grads_and_vars, global_step,
+                                                   **kwargs)
+
+    self._graph_state().collect_gradients(grads_and_vars)
+
+    if not self._graph_state().is_the_last_tower:
+      with ops_lib.control_dependencies(_extract_tensors(grads_and_vars)):
+        return self._construct_no_op_train_op()
+    else:
+      # Gradients need to be gathered and applied in the scope of the first
+      # tower, so that the tensors are accessible via names without prefixes.
+      var_scope, name_scope = self._graph_state().scopes_of_the_first_tower
+      with variable_scope.variable_scope(var_scope):
+        with ops_lib.name_scope(name_scope):
+          return self._apply_gathered_gradients(global_step, **kwargs)
+
+  def _apply_gathered_gradients(self, global_step, **kwargs):
+    graph_state = self._graph_state()
+    optimizer = self._get_optimizer()
+
+    grad_lists = {}
+    for grad, var in graph_state.get_latest_gradients_from_all_towers():
+      if grad is not None:
+        grad_lists.setdefault(var, []).append(grad)
+
+    aggregated_grads = []
+    with ops_lib.name_scope('gradient_aggregating'):
+      for var, grads in six.iteritems(grad_lists):
+        grad = _compute_sum_on_device(grads, var.device)
+        aggregated_grads.append((grad, var))
+    return optimizer.apply_gradients(
+        aggregated_grads, global_step=global_step, **kwargs)
+
+  def _get_optimizer(self):
+    if callable(self._optimizer_or_optimizer_fn):
+      # If optimizer is given as a function then we need to wait till we are
+      # under the right graph context before constructing it.  That's why the
+      # optimizer is constructed in _get_optimizer() rather than __init__().
+      self._optimizer_or_optimizer_fn = self._optimizer_or_optimizer_fn()
+    self._graph_state().has_tower_optimizer_been_used = True
+    return self._optimizer_or_optimizer_fn
+
+  def _construct_no_op_train_op(self):
+    return control_flow_ops.no_op(name='train_op_placeholder')
+
+  @staticmethod
+  def _graph_state():
+    graph_states = ops_lib.get_default_graph().get_collection_ref(
+        TowerOptimizer.COLLECTION_FOR_GRAPH_STATES)
+    if not graph_states:
+      graph_states.append(TowerOptimizer._PerGraphState())
+    return graph_states[-1]
+
+  @staticmethod
+  def _did_towers_have_same_optimizer_calls():
+    graph_state = TowerOptimizer._graph_state()
+    return graph_state.did_towers_have_same_optimizer_calls()
+
+  @staticmethod
+  def _clear_graph_state():
+    # Clearing the Graph collection will prevent _PerGraphState from being
+    # serialized.
+    ops_lib.get_default_graph().clear_collection(
+        TowerOptimizer.COLLECTION_FOR_GRAPH_STATES)
+
+  class _PerGraphState(object):
+    """Gradient reduction related state of a Tensorflow graph."""
+
+    def __init__(self):
+      self._collected_grads_and_vars = defaultdict(list)
+      self._current_tower_index = 0
+      self._number_of_towers = 1
+      self._loss_reduction = None
+      # Scopes of the first tower that don't have a prefix:
+      self._variable_scope = None
+      self._name_scope = None
+      # If needed, alert that TowerOptimizer needs to be used with model_fn.
+      self._has_tower_optimizer_been_used = False
+
+    def collect_gradients(self, grads_and_vars):
+      self._collected_grads_and_vars[self._current_tower_index].append(
+          grads_and_vars)
+
+    def get_latest_gradients_from_all_towers(self):
+      """Get gradients across towers for the last called optimizer."""
+      grads_and_vars = []
+      index_of_last_gradients = len(
+          self._collected_grads_and_vars[self._current_tower_index]) - 1
+      for tower_id in range(self._current_tower_index + 1):
+        grads_and_vars.extend(
+            self._collected_grads_and_vars[tower_id][index_of_last_gradients])
+      return grads_and_vars
+
+    def set_reduction_across_towers(self, loss_reduction, number_of_towers):
+      self._loss_reduction = loss_reduction
+      self._number_of_towers = number_of_towers
+
+    @contextmanager
+    def tower(self, tower_id, var_scope, name_scope):
+      if tower_id == 0:
+        self._variable_scope = var_scope
+        self._name_scope = name_scope
+      self._current_tower_index = tower_id
+      yield
+
+    @property
+    def scopes_of_the_first_tower(self):
+      return self._variable_scope, self._name_scope
+
+    @property
+    def is_the_last_tower(self):
+      return self._current_tower_index == (self._number_of_towers - 1)
+
+    @property
+    def number_of_towers(self):
+      return self._number_of_towers
+
+    @property
+    def loss_reduction(self):
+      return self._loss_reduction
+
+    @property
+    def has_tower_optimizer_been_used(self):
+      return self._has_tower_optimizer_been_used
+
+    @has_tower_optimizer_been_used.setter
+    def has_tower_optimizer_been_used(self, value):
+      self._has_tower_optimizer_been_used = value
+
+    def did_towers_have_same_optimizer_calls(self):
+      total_number_of_grads = sum([
+          len(grads)
+          for _, grads in six.iteritems(self._collected_grads_and_vars)
+      ])
+      return total_number_of_grads % self._number_of_towers == 0
 
 
 def _get_local_devices(device_type):
@@ -222,7 +497,8 @@ def _get_loss_towers(model_fn,
                      params,
                      config,
                      devices,
-                     local_ps_device,
+                     local_ps_devices,
+                     loss_reduction,
                      name_scope_pattern=_DEFAULT_NAME_SCOPE_PATTERN):
   """Replicate the loss computation across devices."""
   tower_specs = []
@@ -234,36 +510,64 @@ def _get_loss_towers(model_fn,
   if 'config' in model_fn_args:
     optional_params['config'] = copy.deepcopy(config)
 
+  # pylint: disable=protected-access
+  round_robin_strategy = device_setter_lib._RoundRobinStrategy(
+      num_tasks=len(local_ps_devices))
+  TowerOptimizer._graph_state().set_reduction_across_towers(
+      loss_reduction, len(devices))
+
   for i, device in enumerate(devices):
     is_the_first_tower = (i == 0)
 
     device_setter = _local_device_setter(
-        worker_device=device, ps_device=local_ps_device)
+        worker_device=device,
+        ps_devices=local_ps_devices,
+        ps_strategy=round_robin_strategy)
 
-    # We would like to preserve the names of the variables and ops that a user
-    # might be relying on. Names with prefix are going to resolve to variables
-    # and ops of the first tower.
+    # We would like to preserve the names of the variables and ops that the user
+    # might be relying on. Names without a prefix are going to resolve to
+    # variables and ops of the first tower.
     name_scope = name_scope_pattern
     if is_the_first_tower:
       name_scope = ''
 
-    with variable_scope.variable_scope('', reuse=not is_the_first_tower):
-      with ops_lib.name_scope(name_scope.format(i)):
-        with ops_lib.device(device_setter):
-          labels_shard = None
-          if labels:
-            labels_shard = labels[i]
-
-          tower_specs.append(
-              model_fn(
-                  mode=mode,
-                  features=features[i],
-                  labels=labels_shard,
-                  **optional_params))
+    with variable_scope.variable_scope(
+        '', reuse=not is_the_first_tower) as var_scope:
+      with ops_lib.name_scope(name_scope.format(i)) as name_scope:
+        with TowerOptimizer._graph_state().tower(
+            tower_id=i, var_scope=var_scope, name_scope=name_scope):
+          with ops_lib.device(device_setter):
+            labels_shard = None
+            if labels:
+              labels_shard = labels[i]
+
+            tower_spec = model_fn(
+                mode=mode,
+                features=features[i],
+                labels=labels_shard,
+                **optional_params)
+
+            if (tower_spec.train_op is not None and len(devices) > 1 and
+                not TowerOptimizer.has_been_used()):
+              raise ValueError('Please wrap optimizers with TowerOptimizer'
+                               ' in order to use replicate_model_fn with'
+                               ' multiple `devices`.')
+
+            # Scaling the loss here doesn't actually affect gradients.  Another
+            # instance of scaling happens inside the TowerOptimizer.
+            tower_spec = _scale_tower_loss(
+                tower_spec, loss_reduction, number_of_towers=len(devices))
+            tower_specs.append(tower_spec)
+
+  if not TowerOptimizer._did_towers_have_same_optimizer_calls():
+    raise ValueError('Each invocation of model_fn was supposed to make the same'
+                     ' optimizer calls.')
+  TowerOptimizer._clear_graph_state()
+  # pylint: enable=protected-access
   return tower_specs
 
 
-def _local_device_setter(ps_device, worker_device):
+def _local_device_setter(worker_device, ps_devices, ps_strategy):
   """A device setter that puts distributes Var/Ops to PS/workers."""
   ps_ops = ['Variable', 'VariableV2', 'VarHandleOp']
 
@@ -273,7 +577,7 @@ def _local_device_setter(ps_device, worker_device):
     node_def = op if isinstance(op, node_def_pb2.NodeDef) else op.node_def
     if node_def.op in ps_ops:
       ps_device_spec = framework_device.DeviceSpec.from_string(
-          '{}'.format(ps_device))
+          '{}'.format(ps_devices[ps_strategy(op)]))
 
       ps_device_spec.merge_from(current_device)
       return ps_device_spec.to_string()
@@ -286,33 +590,33 @@ def _local_device_setter(ps_device, worker_device):
   return local_device_chooser
 
 
-def _minimize_towers(tower_specs, optimizer):
-  """Aggregate and apply gradients for computed losses."""
-  grad_lists = {}
-  for tower_spec in tower_specs:
-    with ops_lib.device(tower_spec.loss.device):
-      for grad, var in optimizer.compute_gradients(tower_spec.loss):
-        if grad is not None:
-          grad_lists.setdefault(var, []).append(grad)
+def _scale_tower_loss(tower_spec, loss_reduction, number_of_towers):
+  """Produce an EstimatorSpec with approproriately scaled loss."""
+  if tower_spec.loss is None:
+    return tower_spec
+
+  estimator_spec = _asdict(tower_spec)
+  estimator_spec['loss'] = _scale_loss(tower_spec.loss, loss_reduction,
+                                       number_of_towers)
+  return model_fn_lib.EstimatorSpec(**estimator_spec)
 
-  aggregated_grads = []
-  with ops_lib.name_scope('gradient_aggregating'):
-    for var, grads in six.iteritems(grad_lists):
-      grad = _compute_sum_on_device(grads, var.device)
-      aggregated_grads.append((grad, var))
 
-  train_op = optimizer.apply_gradients(
-      aggregated_grads, global_step=training_util.get_global_step())
+def _scale_loss(loss, loss_reduction, number_of_towers):
+  """If needed, scale down the loss for averaging loss by summing."""
+  if loss is None:
+    return None
+  if number_of_towers == 1:
+    return loss
 
-  return train_op
+  if loss_reduction != losses.Reduction.SUM:
+    return math_ops.div(loss, 1.0 * number_of_towers, name='averaged_loss')
+  else:
+    return loss
 
 
-def _call_optimizer_fn(optimizer_fn, params):
-  arguments = {}
-  optimizer_fn_arguments = util.fn_args(optimizer_fn)
-  if 'params' in optimizer_fn_arguments:
-    arguments['params'] = params
-  return optimizer_fn(**arguments)
+def _minimize_towers(tower_specs):
+  """`train_op` of the last tower applies aggregated gradients."""
+  return tower_specs[-1].train_op
 
 
 def _compute_sum_on_device(values, device, name=None):
@@ -335,7 +639,12 @@ def _train_spec(tower_specs,
                 aggregation_device,
                 aggregated_loss_name='loss'):
   """Populate replicated EstimatorSpec for `GraphKeys.TRAIN`."""
-  estimator_spec = tower_specs[0]._asdict()
+  # Spec of the last tower is used as the template for the final spec, because
+  # some `EstimatorSpec.training_hooks` rely on calls made in model_fn.  For
+  # example, `SyncReplicasOptimizerHook` validates the
+  # `SyncReplicasOptimizer.apply_gradients` call. `TowerEstimator` makes that
+  # call only in the last tower.
+  estimator_spec = _asdict(tower_specs[-1])
   estimator_spec['mode'] = model_fn_lib.ModeKeys.TRAIN
   estimator_spec['train_op'] = train_op
   estimator_spec['loss'] = _compute_sum_on_device(
@@ -346,7 +655,7 @@ def _train_spec(tower_specs,
 
 def _eval_spec(tower_specs, aggregation_device, aggregated_loss_name='loss'):
   """Populate replicated EstimatorSpec for `GraphKeys.EVAL`."""
-  estimator_spec = tower_specs[0]._asdict()
+  estimator_spec = _asdict(tower_specs[0])
   estimator_spec['mode'] = model_fn_lib.ModeKeys.EVAL
   estimator_spec['loss'] = _compute_sum_on_device(
       [spec.loss for spec in tower_specs], aggregation_device,
@@ -370,7 +679,7 @@ def _eval_spec(tower_specs, aggregation_device, aggregated_loss_name='loss'):
 def _reduce_metric_variables(number_of_towers):
   """Aggregate local variables used in metrics into the first tower."""
   if number_of_towers == 1:
-    return control_flow_ops.no_op()
+    return control_flow_ops.no_op(name='no_eval_metric_reduction')
 
   metric_variables = ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES)
   variables_per_tower = len(metric_variables) // number_of_towers
@@ -414,7 +723,7 @@ def _reduce_metric_variables(number_of_towers):
 
 def _predict_spec(tower_specs, aggregation_device):
   """Populate replicated EstimatorSpec for `GraphKeys.PREDICT`."""
-  estimator_spec = tower_specs[0]._asdict()
+  estimator_spec = _asdict(tower_specs[0])
   estimator_spec['mode'] = model_fn_lib.ModeKeys.PREDICT
 
   with ops_lib.device(aggregation_device):
@@ -465,6 +774,17 @@ def _concat_tensor_dicts(*tensor_dicts):
   }
 
 
+def _extract_tensors(tensors_and_vars):
+  tensors = []
+  for tensor_and_var in tensors_and_vars:
+    tensor, _ = tensor_and_var
+    if isinstance(tensor, ops_lib.IndexedSlices):
+      tensors.append(tensor.values)
+    else:
+      tensors.append(tensor)
+  return tensors
+
+
 def _dict_concat(*dicts):
   list_dict = {}
   for d in dicts:
@@ -474,3 +794,19 @@ def _dict_concat(*dicts):
     for k, v in six.iteritems(d):
       list_dict.setdefault(k, []).append(v)
   return list_dict
+
+
+def _asdict(namedtuple):
+  """Returns a namedtuple as a dictionary.
+
+  This is required because `_asdict()` in Python 3.x.x is broken in classes
+  that inherit from `collections.namedtuple`. See
+  https://bugs.python.org/issue24931 for more details.
+
+  Args:
+    namedtuple: An object that inherits from `collections.namedtuple`.
+
+  Returns:
+    A dictionary version of the tuple.
+  """
+  return {k: getattr(namedtuple, k) for k in namedtuple._fields}
diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
index ffe69f89b4c4d48d329a1aef3aa3cad2b17b3fdf..03d31226af613960a19ce116b19b30153b1fdcee 100644
--- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
+++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
@@ -40,6 +40,7 @@ from tensorflow.python.framework import ops as ops_lib
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import losses
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import metrics as metrics_lib
 from tensorflow.python.ops import variable_scope
@@ -49,15 +50,32 @@ from tensorflow.python.platform import gfile
 from tensorflow.python.platform import test
 from tensorflow.python.saved_model import signature_constants
 from tensorflow.python.summary.writer import writer_cache
+from tensorflow.python.training import adam
+from tensorflow.python.training import device_setter
 from tensorflow.python.training import gradient_descent
+from tensorflow.python.training import training
 
 
+# TODO(isaprykin):  Parametrize all the tests on
+#   replicate_model_fn._VariableDistributionMode when it's supported.
 class DNNClassifierIntegrationTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
     self._model_dir = tempfile.mkdtemp()
 
-  def test_complete_flow(self):
+  def test_complete_flow_with_public_version(self):
+    return self._complete_flow_with_mode(mode=None)
+
+  def test_complete_flow_with_mode_local_ps_server(self):
+    return self._complete_flow_with_mode(
+        replicate_model_fn._VariableDistributionMode.
+        SHARED_LOCAL_PARAMETER_SERVER)
+
+  def test_complete_flow_with_mode_round_robin(self):
+    return self._complete_flow_with_mode(
+        replicate_model_fn._VariableDistributionMode.SHARED_ROUND_ROBIN)
+
+  def _complete_flow_with_mode(self, mode):
     n_classes = 3
     input_dimension = 2
     batch_size = 12
@@ -96,20 +114,30 @@ class DNNClassifierIntegrationTest(test_util.TensorFlowTestCase):
                     0., len(x_data), len(x_data), dtype=np.int64)), 1)
     ]
 
+    def optimizer_fn():
+      return optimizers.get_optimizer_instance('Adagrad', learning_rate=0.05)
+
     estimator = dnn.DNNClassifier(
         hidden_units=(2, 2),
+        # Adagrad is configured with `get_optimizer_instance`, so the function
+        # form of `TowerOptimizer.__init__` is used.
+        optimizer=replicate_model_fn.TowerOptimizer(optimizer_fn),
         feature_columns=feature_columns,
         n_classes=n_classes,
         model_dir=self._model_dir)
 
-    def optimizer_fn():
-      return optimizers.get_optimizer_instance('Adagrad', learning_rate=0.05)
+    if not mode:  # Use the public `replicate_model_fn`.
+      model_fn = replicate_model_fn.replicate_model_fn(
+          estimator.model_fn, devices=['/gpu:0', '/gpu:1', '/gpu:2'])
+    else:
+      model_fn = replicate_model_fn._replicate_model_fn_with_mode(
+          estimator.model_fn,
+          devices=['/gpu:0', '/gpu:1', '/gpu:2'],
+          loss_reduction=losses.Reduction.SUM,
+          mode=mode)
 
     estimator = estimator_lib.Estimator(
-        model_fn=replicate_model_fn.replicate_model_fn(
-            estimator.model_fn,
-            optimizer_fn,
-            devices=['/gpu:0', '/gpu:1', '/gpu:2']),
+        model_fn=model_fn,
         model_dir=estimator.model_dir,
         config=estimator.config,
         params=estimator.params)
@@ -134,6 +162,10 @@ class DNNClassifierIntegrationTest(test_util.TensorFlowTestCase):
                                              serving_input_receiver_fn)
     self.assertTrue(gfile.Exists(export_dir))
 
+    # Nothing should be left in the graph so that it doesn't get serialized.
+    self.assertFalse(ops_lib.get_default_graph().get_collection_ref(
+        replicate_model_fn.TowerOptimizer.COLLECTION_FOR_GRAPH_STATES))
+
   def _as_label(self, data_in_float):
     return np.rint(data_in_float).astype(np.int64)
 
@@ -153,28 +185,24 @@ class ReplicateModelTest(test_util.TensorFlowTestCase):
 
     predictions = math_ops.multiply(features, c)
 
-    loss = None
-    if mode is not model_fn_lib.ModeKeys.PREDICT:
-      loss = losses.absolute_difference(
-          labels=labels,
-          predictions=predictions,
-          reduction=losses.Reduction.SUM)
-      loss = math_ops.reduce_sum(loss)
+    loss = losses.absolute_difference(
+        labels=labels, predictions=predictions, reduction=losses.Reduction.SUM)
+    loss = math_ops.reduce_sum(loss)
 
     metrics = {
         'accuracy': metrics_lib.accuracy(labels, predictions),
         'auc': metrics_lib.auc(labels, predictions)
     }
 
+    optimizer = replicate_model_fn.TowerOptimizer(
+        gradient_descent.GradientDescentOptimizer(params['learning_rate']))
+
     return model_fn_lib.EstimatorSpec(
         mode=mode,
         loss=loss,
         eval_metric_ops=metrics,
         predictions={'probabilities': predictions},
-        train_op=control_flow_ops.no_op())  # This train_op isn't actually used.
-
-  def optimizer_fn(self, params):
-    return gradient_descent.GradientDescentOptimizer(params['learning_rate'])
+        train_op=optimizer.minimize(loss))
 
   @property
   def params(self):
@@ -188,7 +216,9 @@ class ReplicateModelTest(test_util.TensorFlowTestCase):
 
     with self.test_session() as session:
       replicated_model_fn = replicate_model_fn.replicate_model_fn(
-          self.model_fn, self.optimizer_fn, devices=['/gpu:0', '/gpu:1'])
+          self.model_fn,
+          loss_reduction=losses.Reduction.SUM,
+          devices=['/gpu:0', '/gpu:1'])
       estimator_spec = replicated_model_fn(
           features, labels, model_fn_lib.ModeKeys.TRAIN, self.params)
       session.run(variables.global_variables_initializer())
@@ -197,31 +227,71 @@ class ReplicateModelTest(test_util.TensorFlowTestCase):
       total_loss = (1.0 * 10 - 1.0) + (2.0 * 10 - 2.0)
       self.assertEqual(total_loss, session.run(estimator_spec.loss))
 
-      # loss' of c is 3.
+      # derivative of loss = (1*c - 1) + (2*c - 2) is 3.
       # new value of c = 10 - learning rate * 3 = 7.0.
       session.run(estimator_spec.train_op)
       with variable_scope.variable_scope('', reuse=True):
         c = variable_scope.get_variable('c', dtype=dtypes.float64)
         self.assertEqual(7.0, session.run(c))
 
-  def test_train_spec_with_optimizer_without_params(self):
-
-    def optimizer_fn_without_params():
-      return gradient_descent.GradientDescentOptimizer(learning_rate=1.0)
-
+  def test_train_with_mean_reduction(self):
     features = np.array([[1.0], [2.0]])
     labels = np.array([[1.0], [2.0]])
 
-    with self.test_session() as session:  # pylint: disable=unused-variable
+    with self.test_session() as session:
       replicated_model_fn = replicate_model_fn.replicate_model_fn(
-          self.model_fn,
-          optimizer_fn_without_params,
-          devices=['/gpu:0', '/gpu:1'])
-      # This call is going to fail if `replicated_model_fn` is still passing
-      # `params` inside `optimizer_fn`, even though the latter doesn't take any:
+          self.model_fn, losses.Reduction.MEAN, devices=['/gpu:0', '/gpu:1'])
       estimator_spec = replicated_model_fn(
           features, labels, model_fn_lib.ModeKeys.TRAIN, self.params)
-      del estimator_spec
+      session.run(variables.global_variables_initializer())
+
+      # loss = feature * c - label
+      total_loss = ((1.0 * 10 - 1.0) + (2.0 * 10 - 2.0)) / 2.0
+      self.assertEqual(total_loss, session.run(estimator_spec.loss))
+
+      # derivative of loss = (1*c - 1)/2 + (2*c - 2)/2 is 1.5.
+      # It's the same computation as without mean reduction, but the
+      # loss from every tower is scaled by 1/<number of towers>.
+      # new value of c = 10 - learning rate * 1.5 = 8.5
+      session.run(estimator_spec.train_op)
+      with variable_scope.variable_scope('', reuse=True):
+        c = variable_scope.get_variable('c', dtype=dtypes.float64)
+        self.assertEqual(8.5, session.run(c))
+
+  def test_train_two_steps_collected_gradients_are_reset_between_steps(self):
+    with ops_lib.Graph().as_default():
+      features = array_ops.placeholder(dtypes.float64)
+      labels = array_ops.placeholder(dtypes.float64)
+
+      feature_inputs = np.array([[1.0], [2.0]]), np.array([[1.5], [2.5]])
+      label_inputs = np.array([[1.0], [2.0]]), np.array([[1.5], [2.5]])
+
+      # loss = feature * c - label
+      expected_losses = ((1.0 * 10 - 1.0) + (2.0 * 10 - 2.0),
+                         (1.5 * 7.0 - 1.5) + (2.5 * 7.0 - 2.5))
+      # Derivative of the loss is 1.0 + 2.0 for the first step and 1.5 + 2.5
+      # for the second.
+      expected_c = 10.0 - 3.0, 7.0 - 4.0
+
+      with self.test_session() as session, variable_scope.variable_scope(
+          '', reuse=variable_scope.AUTO_REUSE):
+        replicated_model_fn = replicate_model_fn.replicate_model_fn(
+            self.model_fn,
+            loss_reduction=losses.Reduction.SUM,
+            devices=['/gpu:0', '/gpu:1'])
+        estimator_spec = replicated_model_fn(
+            features, labels, model_fn_lib.ModeKeys.TRAIN, self.params)
+        session.run(variables.global_variables_initializer())
+
+        for feature_input, label_input, loss, weight in zip(
+            feature_inputs, label_inputs, expected_losses, expected_c):
+          feeds = {features: feature_input, labels: label_input}
+
+          self.assertEqual(loss, session.run(estimator_spec.loss, feeds))
+
+          session.run(estimator_spec.train_op, feeds)
+          c = variable_scope.get_variable('c', dtype=dtypes.float64)
+          self.assertEqual(weight, session.run(c, feeds))
 
   def test_eval(self):
     features = np.array([[0.01], [0.002]])
@@ -229,7 +299,9 @@ class ReplicateModelTest(test_util.TensorFlowTestCase):
 
     with self.test_session() as session:
       replicated_model_fn = replicate_model_fn.replicate_model_fn(
-          self.model_fn, self.optimizer_fn, devices=['/gpu:0', '/gpu:1'])
+          self.model_fn,
+          loss_reduction=losses.Reduction.SUM,
+          devices=['/gpu:0', '/gpu:1'])
       estimator_spec = replicated_model_fn(
           features, labels, model_fn_lib.ModeKeys.EVAL, self.params)
       session.run(variables.local_variables_initializer())
@@ -252,13 +324,42 @@ class ReplicateModelTest(test_util.TensorFlowTestCase):
       self.assertEqual(0, auc)
       self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01)
 
+  def test_eval_with_mean_reduction(self):
+    features = np.array([[0.01], [0.002]])
+    labels = np.array([[0.01], [0.02]])
+
+    with self.test_session() as session:
+      replicated_model_fn = replicate_model_fn.replicate_model_fn(
+          self.model_fn, losses.Reduction.MEAN, devices=['/gpu:0', '/gpu:1'])
+      estimator_spec = replicated_model_fn(
+          features, labels, model_fn_lib.ModeKeys.EVAL, self.params)
+      session.run(variables.local_variables_initializer())
+      session.run(variables.global_variables_initializer())
+
+      accuracy, a = estimator_spec.eval_metric_ops['accuracy']
+      auc, b = estimator_spec.eval_metric_ops['auc']
+
+      session.run([a, b])
+      accuracy = session.run(accuracy)
+      auc = session.run(auc)
+
+      # loss[i] = features[i] * 10 - labels[i].
+      # Accuracy is 0.0 (no match) in the first tower.
+      # Accuracy is 1.0 (match) in the second tower, since the feature
+      # times weight "c" happened to be equal to the label.
+      total_loss = ((0.01 * 10 - 0.01) + (0.002 * 10 - 0.02)) / 2.0
+
+      self.assertNear((0.0 + 1.0) / 2.0, accuracy, 0.01)
+      self.assertEqual(0, auc)
+      self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01)
+
   def test_predict(self):
     features = np.array([[0.01], [0.002]])
     labels = np.array([[0.01], [0.02]])
 
     with self.test_session() as session:
       replicated_model_fn = replicate_model_fn.replicate_model_fn(
-          self.model_fn, self.optimizer_fn, devices=['/gpu:0', '/gpu:1'])
+          self.model_fn, devices=['/gpu:0', '/gpu:1'])
       estimator_spec = replicated_model_fn(
           features, labels, model_fn_lib.ModeKeys.PREDICT, self.params)
       session.run(variables.global_variables_initializer())
@@ -273,7 +374,7 @@ class ReplicateModelTest(test_util.TensorFlowTestCase):
 
     with self.test_session() as session:
       replicated_model_fn = replicate_model_fn.replicate_model_fn(
-          self.model_fn, self.optimizer_fn)
+          self.model_fn, devices=['/gpu:0'])
       estimator_spec = replicated_model_fn(
           features, labels, model_fn_lib.ModeKeys.TRAIN, self.params)
       session.run(variables.global_variables_initializer())
@@ -295,7 +396,7 @@ class ReplicateModelTest(test_util.TensorFlowTestCase):
 
     with self.test_session() as session:
       replicated_model_fn = replicate_model_fn.replicate_model_fn(
-          self.model_fn, self.optimizer_fn, devices=['/gpu:0'])
+          self.model_fn, devices=['/gpu:0'])
       estimator_spec = replicated_model_fn(
           features, labels, model_fn_lib.ModeKeys.EVAL, self.params)
       session.run(variables.local_variables_initializer())
@@ -323,7 +424,7 @@ class ReplicateModelTest(test_util.TensorFlowTestCase):
 
     with self.test_session() as session:
       replicated_model_fn = replicate_model_fn.replicate_model_fn(
-          self.model_fn, self.optimizer_fn, devices=['/gpu:0'])
+          self.model_fn, devices=['/gpu:0'])
       estimator_spec = replicated_model_fn(
           features, labels, model_fn_lib.ModeKeys.PREDICT, self.params)
       session.run(variables.global_variables_initializer())
@@ -332,6 +433,412 @@ class ReplicateModelTest(test_util.TensorFlowTestCase):
           'probabilities': np.array([[0.1], [0.02]])
       }, session.run(estimator_spec.predictions))
 
+  def test_unsupported_loss_reduction(self):
+    with self.assertRaisesRegexp(ValueError,
+                                 '.+none.+reduction.+is.+specified.+'):
+      _ = replicate_model_fn.replicate_model_fn(self.model_fn,
+                                                losses.Reduction.NONE)
+
+
+class ReplicateAcrossASingleDeviceWithoutTowerOptimizer(
+    test_util.TensorFlowTestCase):
+
+  def model_fn(self, mode, features, labels, params):
+    c = variable_scope.get_variable(
+        'c',
+        initializer=constant_op.constant(10, dtype=dtypes.float64),
+        dtype=dtypes.float64)
+
+    predictions = math_ops.multiply(features, c)
+
+    loss = losses.absolute_difference(
+        labels=labels, predictions=predictions, reduction=losses.Reduction.SUM)
+    loss = math_ops.reduce_sum(loss)
+
+    metrics = {
+        'accuracy': metrics_lib.accuracy(labels, predictions),
+        'auc': metrics_lib.auc(labels, predictions)
+    }
+
+    optimizer = gradient_descent.GradientDescentOptimizer(
+        params['learning_rate'])
+
+    return model_fn_lib.EstimatorSpec(
+        mode=mode,
+        loss=loss,
+        eval_metric_ops=metrics,
+        predictions={'probabilities': predictions},
+        train_op=optimizer.minimize(loss))
+
+  @property
+  def params(self):
+    params = {}
+    params['learning_rate'] = 1.0
+    return params
+
+  def test_train_single_tower(self):
+    features = np.array([[1.0], [2.0]])
+    labels = np.array([[1.0], [2.0]])
+
+    with self.test_session() as session:
+      replicated_model_fn = replicate_model_fn.replicate_model_fn(
+          self.model_fn, devices=['/gpu:0'])
+      estimator_spec = replicated_model_fn(
+          features, labels, model_fn_lib.ModeKeys.TRAIN, self.params)
+      session.run(variables.global_variables_initializer())
+
+      # loss = feature * c - label
+      total_loss = (1.0 * 10 - 1.0) + (2.0 * 10 - 2.0)
+      self.assertEqual(total_loss, session.run(estimator_spec.loss))
+
+      # loss' of c is 3.
+      # new value of c = 10 - learning rate * 3 = 7.0.
+      session.run(estimator_spec.train_op)
+      with variable_scope.variable_scope('', reuse=True):
+        c = variable_scope.get_variable('c', dtype=dtypes.float64)
+        self.assertEqual(7.0, session.run(c))
+
+
+class UseTowerEstimatorWithoutReplication(test_util.TensorFlowTestCase):
+
+  def model_fn(self, mode, features, labels, params):
+    c = variable_scope.get_variable(
+        'c',
+        initializer=constant_op.constant(10, dtype=dtypes.float64),
+        dtype=dtypes.float64)
+
+    features = features['features']
+    predictions = math_ops.multiply(features, c)
+
+    loss = losses.absolute_difference(
+        labels=labels, predictions=predictions, reduction=losses.Reduction.SUM)
+    loss = math_ops.reduce_sum(loss)
+
+    metrics = {
+        'accuracy': metrics_lib.accuracy(labels, predictions),
+        'auc': metrics_lib.auc(labels, predictions)
+    }
+
+    optimizer = replicate_model_fn.TowerOptimizer(
+        gradient_descent.GradientDescentOptimizer(params['learning_rate']))
+
+    return model_fn_lib.EstimatorSpec(
+        mode=mode,
+        loss=loss,
+        eval_metric_ops=metrics,
+        predictions={'probabilities': predictions},
+        train_op=optimizer.minimize(loss))
+
+  @property
+  def params(self):
+    params = {}
+    params['learning_rate'] = 1.0
+    return params
+
+  def test_train_single_tower(self):
+    features = np.array([[1.0], [2.0]])
+    labels = np.array([[1.0], [2.0]])
+
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'features': features}, y=labels, batch_size=2, shuffle=False)
+
+    with self.test_session():
+      estimator = estimator_lib.Estimator(
+          model_fn=self.model_fn,
+          model_dir=tempfile.mkdtemp(),
+          params=self.params)
+      estimator.train(train_input_fn, steps=1)
+
+      self.assertEqual(7.0, estimator.get_variable_value('c'))
+
+
+class MakeSureSyncReplicasOptimizerWorks(test_util.TensorFlowTestCase):
+
+  def model_fn(self, mode, features, labels, params):
+    c = variable_scope.get_variable(
+        'c',
+        initializer=constant_op.constant(10, dtype=dtypes.float64),
+        dtype=dtypes.float64)
+
+    features = features['features']
+    predictions = math_ops.multiply(features, c)
+
+    loss = losses.absolute_difference(
+        labels=labels, predictions=predictions, reduction=losses.Reduction.SUM)
+    loss = math_ops.reduce_sum(loss)
+
+    metrics = {
+        'accuracy': metrics_lib.accuracy(labels, predictions),
+        'auc': metrics_lib.auc(labels, predictions)
+    }
+
+    optimizer = gradient_descent.GradientDescentOptimizer(
+        params['learning_rate'])
+    optimizer = training.SyncReplicasOptimizer(
+        optimizer, replicas_to_aggregate=1)
+    sync_hook = optimizer.make_session_run_hook(True)
+    optimizer = replicate_model_fn.TowerOptimizer(optimizer)
+
+    return model_fn_lib.EstimatorSpec(
+        mode=mode,
+        loss=loss,
+        eval_metric_ops=metrics,
+        training_hooks=[sync_hook],
+        predictions={'probabilities': predictions},
+        train_op=optimizer.minimize(
+            loss, global_step=training.get_global_step()))
+
+  @property
+  def params(self):
+    params = {}
+    params['learning_rate'] = 1.0
+    return params
+
+  def test_train_multiple_towers(self):
+    features = np.array([[1.0], [2.0]])
+    labels = np.array([[1.0], [2.0]])
+
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'features': features}, y=labels, batch_size=2, shuffle=False)
+
+    model_fn = replicate_model_fn.replicate_model_fn(
+        self.model_fn,
+        loss_reduction=losses.Reduction.SUM,
+        devices=['/gpu:0', '/gpu:1'])
+
+    estimator = estimator_lib.Estimator(
+        model_fn=model_fn, model_dir=tempfile.mkdtemp(), params=self.params)
+    estimator.train(train_input_fn, steps=1)
+
+    self.assertEqual(7.0, estimator.get_variable_value('c'))
+
+
+class ReplicateWithTwoOptimizersTest(test_util.TensorFlowTestCase):
+
+  def model_fn(self, mode, features, labels, params):
+    c = variable_scope.get_variable(
+        'c',
+        initializer=constant_op.constant(10, dtype=dtypes.float64),
+        dtype=dtypes.float64)
+
+    side_effects = variable_scope.get_variable(
+        'side_effects',
+        initializer=constant_op.constant(0, dtype=dtypes.float64),
+        dtype=dtypes.float64,
+        trainable=False)
+
+    predictions = math_ops.multiply(features, c)
+
+    loss = losses.absolute_difference(
+        labels=labels, predictions=predictions, reduction=losses.Reduction.SUM)
+    loss = math_ops.reduce_sum(loss)
+
+    metrics = {
+        'accuracy': metrics_lib.accuracy(labels, predictions),
+        'auc': metrics_lib.auc(labels, predictions)
+    }
+
+    first_optimizer = replicate_model_fn.TowerOptimizer(
+        gradient_descent.GradientDescentOptimizer(1.0))
+    second_optimizer = replicate_model_fn.TowerOptimizer(
+        adam.AdamOptimizer(1.0))
+
+    with ops_lib.control_dependencies([side_effects.assign_add(1.0)]):
+      first_grads_and_vars = first_optimizer.compute_gradients(loss)
+
+    train_op = control_flow_ops.group(
+        [first_optimizer.apply_gradients(first_grads_and_vars),
+         second_optimizer.minimize(loss)])
+
+    return model_fn_lib.EstimatorSpec(
+        mode=mode,
+        loss=loss,
+        eval_metric_ops=metrics,
+        predictions={'probabilities': predictions},
+        train_op=train_op)
+
+  def test_train(self):
+    features = np.array([[1.0], [2.0]])
+    labels = np.array([[1.0], [2.0]])
+
+    with self.test_session() as session:
+      replicated_model_fn = replicate_model_fn.replicate_model_fn(
+          self.model_fn,
+          loss_reduction=losses.Reduction.SUM,
+          devices=['/gpu:0', '/gpu:1'])
+      estimator_spec = replicated_model_fn(features, labels,
+                                           model_fn_lib.ModeKeys.TRAIN, {})
+      session.run(variables.global_variables_initializer())
+
+      # loss = feature * c - label
+      total_loss = (1.0 * 10 - 1.0) + (2.0 * 10 - 2.0)
+      self.assertEqual(total_loss, session.run(estimator_spec.loss))
+
+      # loss' of c is 3.
+      # new value of c = 10 - learning rate * 3 = 7.0.
+      # Adam subtracts another ~1.
+      session.run(estimator_spec.train_op)
+      with variable_scope.variable_scope('', reuse=True):
+        c = variable_scope.get_variable('c', dtype=dtypes.float64)
+        self.assertNear(6.0, session.run(c), 0.000001)
+
+        side_effects = variable_scope.get_variable(
+            'side_effects', dtype=dtypes.float64)
+        self.assertNear(2.0, session.run(side_effects), 0.000001)
+
+
+class ReplicateWithTwoLossesAndOneOptimizer(test_util.TensorFlowTestCase):
+
+  def setUp(self):
+    self._should_skip_optimizer = False
+    self._towers_left_before_skipping_optimizer = -1
+
+  def incorrectly_skip_optimizer_for_tower(self, tower_number):
+    self._should_skip_optimizer = True
+    self._towers_left_before_skipping_optimizer = tower_number
+
+  def should_skip_optimizer(self):
+    if not self._should_skip_optimizer:
+      return False
+    if self._towers_left_before_skipping_optimizer == 0:
+      return True
+    else:
+      self._towers_left_before_skipping_optimizer -= 1
+      return False
+
+  def model_fn(self, mode, features, labels, params):
+    c = variable_scope.get_variable(
+        'c',
+        initializer=constant_op.constant(10, dtype=dtypes.float64),
+        dtype=dtypes.float64)
+    d = variable_scope.get_variable(
+        'd',
+        initializer=constant_op.constant(2, dtype=dtypes.float64),
+        dtype=dtypes.float64)
+
+    predictions = math_ops.multiply(features, c)
+
+    loss = losses.absolute_difference(
+        labels=labels, predictions=predictions, reduction=losses.Reduction.SUM)
+    loss = math_ops.reduce_sum(loss)
+
+    another_predictions = math_ops.multiply(features, d)
+    another_loss = losses.absolute_difference(
+        labels=labels,
+        predictions=another_predictions,
+        reduction=losses.Reduction.SUM)
+    another_loss = math_ops.reduce_sum(another_loss)
+
+    total_loss = math_ops.add(loss, another_loss)
+
+    metrics = {
+        'accuracy': metrics_lib.accuracy(labels, predictions),
+        'auc': metrics_lib.auc(labels, predictions)
+    }
+
+    train_ops = []
+
+    optimizer = replicate_model_fn.TowerOptimizer(
+        gradient_descent.GradientDescentOptimizer(1.0))
+    train_ops.append(optimizer.minimize(loss, var_list=[c]))
+    if not self.should_skip_optimizer():
+      another_optimizer = replicate_model_fn.TowerOptimizer(
+          gradient_descent.GradientDescentOptimizer(1.0))
+      train_ops.append(another_optimizer.minimize(another_loss, var_list=[d]))
+
+    train_op = control_flow_ops.group(train_ops)
+    return model_fn_lib.EstimatorSpec(
+        mode=mode,
+        loss=total_loss,
+        eval_metric_ops=metrics,
+        predictions={'probabilities': predictions},
+        train_op=train_op)
+
+  def test_train(self):
+    features = np.array([[1.0], [2.0]])
+    labels = np.array([[1.0], [2.0]])
+
+    with self.test_session() as session:
+      replicated_model_fn = replicate_model_fn.replicate_model_fn(
+          self.model_fn,
+          loss_reduction=losses.Reduction.SUM,
+          devices=['/gpu:0', '/gpu:1'])
+      estimator_spec = replicated_model_fn(features, labels,
+                                           model_fn_lib.ModeKeys.TRAIN, {})
+      session.run(variables.global_variables_initializer())
+
+      # For each tower, loss = (feature * c - label) + (feature * d - label).
+      total_loss = (1.0 * 10 - 1.0 + 1.0 * 2.0 - 1.0) + (
+          2.0 * 10 - 2.0 + 2.0 * 2.0 - 2.0)
+      self.assertEqual(total_loss, session.run(estimator_spec.loss))
+
+      session.run(estimator_spec.train_op)
+
+      # loss' of c or loss' of d is 3.
+      # new value of c = 10 - learning rate * 3 = 7.0.
+      # new value of d = 2  - learning rate * 3 = -1.0.
+      with variable_scope.variable_scope('', reuse=True):
+        c = variable_scope.get_variable('c', dtype=dtypes.float64)
+        self.assertNear(7.0, session.run(c), 0.000001)
+        d = variable_scope.get_variable('d', dtype=dtypes.float64)
+        self.assertNear(-1.0, session.run(d), 0.000001)
+
+  def test_different_optimizer_calls_within_towers(self):
+    self.incorrectly_skip_optimizer_for_tower(1)
+
+    features = np.array([[1.0], [2.0]])
+    labels = np.array([[1.0], [2.0]])
+
+    with self.test_session(), ops_lib.Graph().as_default():
+      with self.assertRaisesRegexp(
+          ValueError, '.+was.+supposed.+to.+make.+same.+optimizer.+calls.+'):
+        replicated_model_fn = replicate_model_fn.replicate_model_fn(
+            self.model_fn, devices=['/gpu:0', '/gpu:1'])
+        _ = replicated_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN,
+                                {})
+
+
+class FailToWrapOptimizerInTheModelFn(test_util.TensorFlowTestCase):
+
+  def model_fn(self, mode, features, labels, params):
+    c = variable_scope.get_variable(
+        'c',
+        initializer=constant_op.constant(10, dtype=dtypes.float64),
+        dtype=dtypes.float64)
+
+    predictions = math_ops.multiply(features, c)
+
+    loss = losses.absolute_difference(
+        labels=labels, predictions=predictions, reduction=losses.Reduction.SUM)
+    loss = math_ops.reduce_sum(loss)
+
+    metrics = {
+        'accuracy': metrics_lib.accuracy(labels, predictions),
+        'auc': metrics_lib.auc(labels, predictions)
+    }
+
+    optimizer = gradient_descent.GradientDescentOptimizer(1.0)
+    train_op = optimizer.minimize(loss)
+
+    return model_fn_lib.EstimatorSpec(
+        mode=mode,
+        loss=loss,
+        eval_metric_ops=metrics,
+        predictions={'probabilities': predictions},
+        train_op=train_op)
+
+  def test_train(self):
+    features = np.array([[1.0], [2.0]])
+    labels = np.array([[1.0], [2.0]])
+
+    with self.test_session():
+      with self.assertRaisesRegexp(ValueError,
+                                   'Please.+wrap.+with.+TowerOptimizer'):
+        replicated_model_fn = replicate_model_fn.replicate_model_fn(
+            self.model_fn, devices=['/gpu:0', '/gpu:1'])
+        _ = replicated_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN,
+                                {})
+
 
 class GetLossTowersTest(test_util.TensorFlowTestCase):
 
@@ -358,8 +865,9 @@ class GetLossTowersTest(test_util.TensorFlowTestCase):
           labels=[[0.6], [0.6]],
           params=None,
           config=None,
+          loss_reduction=losses.Reduction.SUM,
           devices=['/gpu:0', '/gpu:1'],
-          local_ps_device='/gpu:0',
+          local_ps_devices=['/gpu:0'],
           name_scope_pattern='test_tower_{}')
       session.run(variables.global_variables_initializer())
 
@@ -382,6 +890,89 @@ class GetLossTowersTest(test_util.TensorFlowTestCase):
         c = variable_scope.get_variable('c', dtype=dtypes.float64)
         self.assertEqual(0.25, session.run(c))
 
+  def test_gradients_are_computed_with_mean_reduction(self):
+    with self.test_session() as session:
+      tower_specs = replicate_model_fn._get_loss_towers(
+          self.model_fn,
+          mode=model_fn_lib.ModeKeys.EVAL,
+          features=[[0.6], [1.6]],
+          labels=[[0.6], [0.6]],
+          params=None,
+          loss_reduction=losses.Reduction.MEAN,
+          config=None,
+          devices=['/gpu:0', '/gpu:1'],
+          local_ps_devices=['/gpu:0'],
+          name_scope_pattern='test_tower_{}')
+      session.run(variables.global_variables_initializer())
+
+      self.assertEqual(len(tower_specs), 2)
+
+      self.assertEqual('/device:GPU:0', tower_specs[0].loss.device)
+      self.assertEqual('averaged_loss:0', tower_specs[0].loss.name)
+      self.assertEqual(0.5, session.run(tower_specs[0].loss))
+
+      self.assertEqual('/device:GPU:1', tower_specs[1].loss.device)
+      self.assertEqual('test_tower_1/averaged_loss:0', tower_specs[1].loss.name)
+      # The input batch for the second tower had a loss that is 1.0
+      # bigger: 0.6 vs 1.6.
+      self.assertEqual(1.0, session.run(tower_specs[1].loss))
+
+      self.assertEqual(1, len(variables.global_variables()))
+      self.assertEqual(1, len(variables.trainable_variables()))
+
+      with variable_scope.variable_scope('', reuse=True):
+        c = variable_scope.get_variable('c', dtype=dtypes.float64)
+        self.assertEqual(0.25, session.run(c))
+
+  def test_variables_are_round_robined_correctly(self):
+    """Test that creates multiple variables and tests round-robin placement."""
+
+    def model_fn(mode, features, labels, params):
+      del params
+      for variable_name in ['a', 'b', 'c', 'd']:
+        c = variable_scope.get_variable(
+            variable_name,
+            initializer=constant_op.constant(0.25, dtype=dtypes.float64),
+            dtype=dtypes.float64)
+
+      predictions = math_ops.add(np.array([0.1, 0.2, 0.3, features[0]]), c)
+      labels = np.array([0.1, 0.2, 0.3, labels[0]])
+      loss = losses.absolute_difference(
+          labels=labels,
+          predictions=predictions,
+          reduction=losses.Reduction.SUM)
+      return model_fn_lib.EstimatorSpec(
+          mode=mode, loss=math_ops.reduce_sum(loss))
+
+    with self.test_session() as session:
+      tower_specs = replicate_model_fn._get_loss_towers(
+          model_fn,
+          mode=None,
+          features=[[0.6], [1.6], [2.6]],
+          labels=[[0.6], [0.6], [2.6]],
+          params=None,
+          loss_reduction=losses.Reduction.SUM,
+          config=None,
+          devices=['/gpu:0', '/gpu:1', '/gpu:3'],
+          local_ps_devices=['/gpu:0', '/gpu:1', '/gpu:3'],
+          name_scope_pattern='test_tower_{}')
+      session.run(variables.global_variables_initializer())
+
+      self.assertEqual(len(tower_specs), 3)
+      self.assertEqual('/device:GPU:0', tower_specs[0].loss.device)
+      self.assertEqual('/device:GPU:1', tower_specs[1].loss.device)
+      self.assertEqual('/device:GPU:3', tower_specs[2].loss.device)
+
+      with variable_scope.variable_scope('', reuse=True):
+        a = variable_scope.get_variable('a', dtype=dtypes.float64)
+        self.assertEqual('/device:GPU:0', a.device)
+        b = variable_scope.get_variable('b', dtype=dtypes.float64)
+        self.assertEqual('/device:GPU:1', b.device)
+        c = variable_scope.get_variable('c', dtype=dtypes.float64)
+        self.assertEqual('/device:GPU:3', c.device)
+        d = variable_scope.get_variable('d', dtype=dtypes.float64)
+        self.assertEqual('/device:GPU:0', d.device)
+
 
 class SplitBatchTest(test_util.TensorFlowTestCase):
 
@@ -600,11 +1191,12 @@ class PredictSpecTest(test_util.TensorFlowTestCase):
           self.model_fn,
           mode=None,
           features=[[0.1], [0.2]],
+          loss_reduction=losses.Reduction.SUM,
           labels=[[], []],
           params=None,
           config=None,
           devices=['/gpu:0', '/gpu:1'],
-          local_ps_device='/gpu:0',
+          local_ps_devices=['/gpu:0'],
       )
       session.run(variables.global_variables_initializer())
 
@@ -718,16 +1310,14 @@ class ReduceMetricVariablesTest(test_util.TensorFlowTestCase):
           variables.variables_initializer(
               ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES)))
 
-      with self.assertRaisesRegexp(ValueError, ''):
+      with self.assertRaisesRegexp(
+          ValueError, '.+Expected.+local.+variables.+but.+got.+instead.+'):
         session.run(
             replicate_model_fn._reduce_metric_variables(number_of_towers=3))
 
 
 class MergeExportOutputsTest(test_util.TensorFlowTestCase):
 
-  def optimizer_fn(self):
-    return gradient_descent.GradientDescentOptimizer(1.0)
-
   def model_fn(self, mode, features, labels, params):
     c = variable_scope.get_variable(
         'c',
@@ -769,7 +1359,6 @@ class MergeExportOutputsTest(test_util.TensorFlowTestCase):
         loss=math_ops.reduce_sum(loss),
         eval_metric_ops=metrics,
         predictions=predictions,
-        train_op=loss,  # This train_op isn't actually used.
         export_outputs=export_outputs)
 
   def replicate_estimator_spec(self, session):
@@ -777,13 +1366,13 @@ class MergeExportOutputsTest(test_util.TensorFlowTestCase):
     labels = np.array([0.01, 0.02])
 
     replicated_model_fn = replicate_model_fn.replicate_model_fn(
-        self.model_fn, self.optimizer_fn, devices=['/gpu:0', '/gpu:1'])
+        self.model_fn, devices=['/gpu:0', '/gpu:1'])
     estimator_spec = replicated_model_fn(features, labels,
                                          model_fn_lib.ModeKeys.PREDICT, {})
     session.run(variables.global_variables_initializer())
     return estimator_spec
 
-  def test_merde_predict_output(self):
+  def test_merge_predict_output(self):
     with self.test_session() as session:
       estimator_spec = self.replicate_estimator_spec(session)
       self.assertAllClose(
@@ -850,25 +1439,66 @@ class GetLocalDevicesTest(test_util.TensorFlowTestCase):
 class LocalDeviceSetterTest(test_util.TensorFlowTestCase):
 
   def test_vars_are_on_ps_but_ops_are_on_workers(self):
+    ps_devices = ['/device:GPU:3']
+    round_robin = device_setter._RoundRobinStrategy(num_tasks=len(ps_devices))
+
+    local_device_setter = replicate_model_fn._local_device_setter(
+        ps_devices=ps_devices,
+        ps_strategy=round_robin,
+        worker_device='/device:GPU:2')
+
+    with ops_lib.device(local_device_setter):
+      a = variables.Variable(0.01)
+      self.assertEqual('/device:GPU:3', a.device)
+
+      b = variables.Variable(0.02)
+      self.assertEqual('/device:GPU:3', b.device)
+
+      c = variables.Variable(0.03)
+      self.assertEqual('/device:GPU:3', c.device)
+
+      a_op = array_ops.concat(a, axis=0)
+      self.assertEqual('/device:GPU:2', a_op.device)
+
+      b_op = array_ops.concat(b, axis=0)
+      self.assertEqual('/device:GPU:2', b_op.device)
+
+  def test_round_robin_placement(self):
+    ps_devices = [
+        '/device:GPU:0', '/device:GPU:1', '/device:GPU:3', '/device:GPU:4'
+    ]
+    round_robin = device_setter._RoundRobinStrategy(num_tasks=len(ps_devices))
+
     local_device_setter = replicate_model_fn._local_device_setter(
-        ps_device='/device:GPU:3', worker_device='/device:GPU:2')
+        ps_devices=ps_devices,
+        ps_strategy=round_robin,
+        worker_device='/device:GPU:2')
 
     with ops_lib.device(local_device_setter):
-      c = variables.Variable(0.01)
+      a = variables.Variable(0.01)
+      self.assertEqual('/device:GPU:0', a.device)
+
+      b = variables.Variable(0.02)
+      self.assertEqual('/device:GPU:1', b.device)
+
+      c = variables.Variable(0.03)
       self.assertEqual('/device:GPU:3', c.device)
 
-      cc = variables.Variable(0.02)
-      self.assertEqual('/device:GPU:3', cc.device)
+      a_op = array_ops.concat(a, axis=0)
+      self.assertEqual('/device:GPU:2', a_op.device)
 
-      ccc = variables.Variable(0.03)
-      self.assertEqual('/device:GPU:3', ccc.device)
+      b_op = array_ops.concat(b, axis=0)
+      self.assertEqual('/device:GPU:2', b_op.device)
+
+      c = variables.Variable(0.03)
+      self.assertEqual('/device:GPU:4', c.device)
+
+      d = variables.Variable(0.03)
+      self.assertEqual('/device:GPU:0', d.device)
 
       c_op = array_ops.concat(c, axis=0)
       self.assertEqual('/device:GPU:2', c_op.device)
 
-      cc_op = array_ops.concat(cc, axis=0)
-      self.assertEqual('/device:GPU:2', cc_op.device)
-
 
 class ComputeSumWithDevicePlacementTest(test_util.TensorFlowTestCase):
 
@@ -939,7 +1569,7 @@ class ComputeSumWithDevicePlacementTest(test_util.TensorFlowTestCase):
         dense_shape=constant_op.constant([2]))
     b = ops_lib.IndexedSlices(constant_op.constant([3.0, 4.0]), [0, 1])
 
-    with self.assertRaisesRegexp(ValueError, ''):
+    with self.assertRaisesRegexp(ValueError, '.+name.+not.+expected.+'):
       _ = replicate_model_fn._compute_sum_on_device(
           [a, b], device='/device:GPU:0', name='cant_name_indexslices')
 
diff --git a/tensorflow/contrib/factorization/examples/BUILD b/tensorflow/contrib/factorization/examples/BUILD
index 363baa121ab3854a802ca3606e35597d31b35a57..bbe842bd5ccc7357805adda1df42ba8799fcd8f2 100644
--- a/tensorflow/contrib/factorization/examples/BUILD
+++ b/tensorflow/contrib/factorization/examples/BUILD
@@ -21,3 +21,14 @@ tf_py_test(
     ],
     tags = ["notsan"],
 )
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+)
diff --git a/tensorflow/contrib/factorization/python/ops/clustering_ops.py b/tensorflow/contrib/factorization/python/ops/clustering_ops.py
index 96cc80ce241347ebca5b68140f1b1c8b9898ae72..6d3acb2750743318aad83991bc1e89d64c329423 100644
--- a/tensorflow/contrib/factorization/python/ops/clustering_ops.py
+++ b/tensorflow/contrib/factorization/python/ops/clustering_ops.py
@@ -261,8 +261,8 @@ class KMeans(object):
             inp, clusters, 1)
         if self._distance_metric == COSINE_DISTANCE:
           distances *= 0.5
-        output.append((score, array_ops.squeeze(distances),
-                       array_ops.squeeze(indices)))
+        output.append((score, array_ops.squeeze(distances, [-1]),
+                       array_ops.squeeze(indices, [-1])))
     return zip(*output)
 
   def _clusters_l2_normalized(self):
diff --git a/tensorflow/contrib/factorization/python/ops/gmm.py b/tensorflow/contrib/factorization/python/ops/gmm.py
index 0d67e09f8151b48c97094b6b48f26e63443707ef..f72280c4ecf19e33278ffe74061f44bbb7b21709 100644
--- a/tensorflow/contrib/factorization/python/ops/gmm.py
+++ b/tensorflow/contrib/factorization/python/ops/gmm.py
@@ -24,7 +24,7 @@ import numpy as np
 from tensorflow.contrib import framework
 from tensorflow.contrib.factorization.python.ops import gmm_ops
 from tensorflow.contrib.framework.python.framework import checkpoint_utils
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib
 from tensorflow.python.framework import constant_op
@@ -167,7 +167,7 @@ class GMM(estimator.Estimator):
                                      self._num_clusters, self._random_seed,
                                      self._covariance_type,
                                      self._params)
-      incr_step = state_ops.assign_add(variables.get_global_step(), 1)
+      incr_step = state_ops.assign_add(training_util.get_global_step(), 1)
       loss = math_ops.reduce_sum(losses)
       training_op = with_dependencies([training_op, incr_step], loss)
       training_hooks = [_InitializeClustersHook(
diff --git a/tensorflow/contrib/factorization/python/ops/kmeans_test.py b/tensorflow/contrib/factorization/python/ops/kmeans_test.py
index 4709d7942583f1406a3fa0ff3a078d0283872ea6..f9598bfc08c05ea3bba88b3135da0cf2e6bb0c95 100644
--- a/tensorflow/contrib/factorization/python/ops/kmeans_test.py
+++ b/tensorflow/contrib/factorization/python/ops/kmeans_test.py
@@ -194,15 +194,7 @@ class KMeansTest(KMeansTestBase):
     score = kmeans.score(input_fn=self.input_fn(batch_size=self.num_points))
     self.assertNear(self.true_score, score, self.true_score * 0.01)
 
-  def test_infer(self):
-    kmeans = self._kmeans()
-    # Make a call to fit to initialize the cluster centers.
-    max_steps = 1
-    kmeans.train(input_fn=self.input_fn(), max_steps=max_steps)
-    clusters = kmeans.cluster_centers()
-
-    # Make a small test set
-    num_points = 10
+  def _infer_helper(self, kmeans, clusters, num_points):
     points, true_assignments, true_offsets = make_random_points(
         clusters, num_points)
     input_fn = self.input_fn(batch_size=num_points, points=points, num_epochs=1)
@@ -223,6 +215,17 @@ class KMeansTest(KMeansTestBase):
             np.sum(np.square(clusters), axis=1, keepdims=True)))
     self.assertAllClose(transform, true_transform, rtol=0.05, atol=10)
 
+  def test_infer(self):
+    kmeans = self._kmeans()
+    # Make a call to fit to initialize the cluster centers.
+    max_steps = 1
+    kmeans.train(input_fn=self.input_fn(), max_steps=max_steps)
+    clusters = kmeans.cluster_centers()
+
+    # Run inference on small datasets.
+    self._infer_helper(kmeans, clusters, 10)
+    self._infer_helper(kmeans, clusters, 1)
+
 
 class KMeansTestMultiStageInit(KMeansTestBase):
 
diff --git a/tensorflow/contrib/ffmpeg/BUILD b/tensorflow/contrib/ffmpeg/BUILD
index dc5a04a0b15870babbc98cf104e109caf829901c..eccce99071dc1477cf4f3bb152f3304b3b0fc35a 100644
--- a/tensorflow/contrib/ffmpeg/BUILD
+++ b/tensorflow/contrib/ffmpeg/BUILD
@@ -155,7 +155,10 @@ tf_py_test(
     data = [
         ":test_data",
     ],
-    tags = ["manual"],
+    tags = [
+        "manual",
+        "notap",
+    ],
 )
 
 py_library(
diff --git a/tensorflow/contrib/ffmpeg/__init__.py b/tensorflow/contrib/ffmpeg/__init__.py
index 871dff7bbe4912f0daf2bc184d6b0f12510abee7..daba965a98893b992abdc598ec713f13020d6e91 100644
--- a/tensorflow/contrib/ffmpeg/__init__.py
+++ b/tensorflow/contrib/ffmpeg/__init__.py
@@ -26,6 +26,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_audio
+from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_video
 from tensorflow.contrib.ffmpeg.ffmpeg_ops import encode_audio
 from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_video
 
diff --git a/tensorflow/contrib/ffmpeg/decode_video_op_test.py b/tensorflow/contrib/ffmpeg/decode_video_op_test.py
index 4d1fac4ef8afbf44cd45bae065f8a95b0527079a..b43b6b8919223bd7731209d5423b142601396ea5 100644
--- a/tensorflow/contrib/ffmpeg/decode_video_op_test.py
+++ b/tensorflow/contrib/ffmpeg/decode_video_op_test.py
@@ -20,11 +20,9 @@ from __future__ import print_function
 
 import os.path
 
-import six
+import six  # pylint: disable=unused-import
 
 from tensorflow.contrib import ffmpeg
-from tensorflow.python.framework import dtypes
-from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import image_ops
 from tensorflow.python.platform import resource_loader
 from tensorflow.python.platform import test
@@ -32,7 +30,8 @@ from tensorflow.python.platform import test
 
 class DecodeVideoOpTest(test.TestCase):
 
-  def _loadFileAndTest(self, filename, width, height, frames, bmp_filename, index):
+  def _loadFileAndTest(self, filename, width, height, frames, bmp_filename,
+                       index):
     """Loads an video file and validates the output tensor.
 
     Args:
@@ -40,6 +39,8 @@ class DecodeVideoOpTest(test.TestCase):
       width: The width of the video.
       height: The height of the video.
       frames: The frames of the video.
+      bmp_filename: The filename for the bmp file.
+      index: Index location inside the video.
     """
     with self.test_session():
       path = os.path.join(resource_loader.get_data_files_path(), 'testdata',
@@ -48,7 +49,7 @@ class DecodeVideoOpTest(test.TestCase):
         contents = f.read()
 
       bmp_path = os.path.join(resource_loader.get_data_files_path(), 'testdata',
-                          bmp_filename)
+                              bmp_filename)
       with open(bmp_path, 'rb') as f:
         bmp_contents = f.read()
 
@@ -58,7 +59,7 @@ class DecodeVideoOpTest(test.TestCase):
       video_op = ffmpeg.decode_video(contents)
       video = video_op.eval()
       self.assertEqual(video.shape, (frames, height, width, 3))
-      self.assertAllEqual(video[index,:,:,:], image)
+      self.assertAllEqual(video[index, :, :, :], image)
 
   def testMp4(self):
     self._loadFileAndTest('small.mp4', 560, 320, 166, 'small_100.bmp', 99)
diff --git a/tensorflow/contrib/ffmpeg/default/BUILD b/tensorflow/contrib/ffmpeg/default/BUILD
index 949ae9ad9e4b045ee1b5cc82d49c0e7468c2005d..6b455567d766dbe6d380a498bd7f521db27e077b 100644
--- a/tensorflow/contrib/ffmpeg/default/BUILD
+++ b/tensorflow/contrib/ffmpeg/default/BUILD
@@ -19,6 +19,7 @@ cc_library(
     ],
     deps = [
         "//tensorflow/core:framework_headers_lib",
+        "//third_party/eigen3",
         "@protobuf_archive//:protobuf_headers",
     ],
 )
diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
index 201774e1d011f35df9c3803f2ed8818cc9b1c1c2..1e8af1458cea13b2ddb89b7d93a4ffb8b974ecd2 100644
--- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
+++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
@@ -49,7 +49,8 @@ std::vector<string> FfmpegAudioCommandLine(const string& input_filename,
           "-nostdin",             // No interactive commands accepted.
           "-f", input_format_id,  // eg: "mp3"
           "-probesize", StrCat(kDefaultProbeSize), "-i", input_filename,
-          "-loglevel", "info",  // Enable verbose logging to support debugging.
+          "-loglevel", "error",   // Print errors only.
+          "-hide_banner",         // Skip printing build options, version, etc.
           "-map_metadata", "-1",  // Copy global metadata from input to output.
           "-vn",                  // No video recording.
           "-ac:a:0", StrCat(channel_count), "-ar:a:0",
@@ -72,7 +73,8 @@ std::vector<string> FfmpegVideoCommandLine(const string& input_filename,
           "-probesize",
           StrCat(kDefaultProbeSize),
           "-loglevel",
-          "info",  // Enable verbose logging to support debugging.
+          "error",  // Print errors only.
+          "-hide_banner",  // Skip printing build options, version, etc.
           "-vcodec",
           "rawvideo",
           "-pix_fmt",
@@ -220,7 +222,8 @@ string BuildWavFile(int32 samples_per_second, int32 channel_count,
 Status ReadInfoFile(const string& filename, uint32* width, uint32* height,
                     uint32* frames) {
   string data;
-  ReadFileToString(Env::Default(), filename, &data);
+  TF_QCHECK_OK(ReadFileToString(Env::Default(), filename, &data))
+      << "Could not read FFmpeg file: " << filename;
   bool in_output = false;
   bool in_mapping = false;
   uint32 frames_value = 0;
@@ -377,7 +380,7 @@ Status ReadVideoFile(const string& filename, std::vector<uint8>* output_data,
         open(stderr_filename.c_str(), O_RDWR | O_CREAT | O_APPEND, 0600);
     if (fd < 0) {
       const int error = errno;
-      LOG(ERROR) << "FFmpeg stderr file coule not be created: "
+      LOG(ERROR) << "FFmpeg stderr file could not be created: "
                  << strerror(error);
       ::_exit(error);
     }
diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc
index 39e7e90cccf1012eb42261bde55d0dc3b7f278ef..36fc71794b06e0f3cb86c40b325ce50e8999c667 100644
--- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc
+++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_utility_test.cc
@@ -23,6 +23,7 @@
 
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/test.h"
diff --git a/tensorflow/contrib/ffmpeg/ffmpeg_ops.py b/tensorflow/contrib/ffmpeg/ffmpeg_ops.py
index 78ead471d2cf9f0654a06dc022d7cc592d14c710..08b5a6ea48c2d4959af68a2ee9d27d21c6245457 100644
--- a/tensorflow/contrib/ffmpeg/ffmpeg_ops.py
+++ b/tensorflow/contrib/ffmpeg/ffmpeg_ops.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib.ffmpeg.ops import gen_decode_audio_op_py
+from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py
 from tensorflow.contrib.ffmpeg.ops import gen_encode_audio_op_py
 from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py
 from tensorflow.contrib.util import loader
diff --git a/tensorflow/contrib/framework/BUILD b/tensorflow/contrib/framework/BUILD
index 5b659ddaa1386736eb8cc05a203ed1827ccd160e..9e5f54f0973eae899ca65e4098358107053cb7d4 100644
--- a/tensorflow/contrib/framework/BUILD
+++ b/tensorflow/contrib/framework/BUILD
@@ -11,11 +11,12 @@ package(default_visibility = [
 ])
 
 load("//tensorflow:tensorflow.bzl", "py_test")
-load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library")
 load("//tensorflow:tensorflow.bzl", "tf_custom_op_library")
 load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py")
 load("//tensorflow:tensorflow.bzl", "tf_gen_op_libs")
 load("//tensorflow:tensorflow.bzl", "tf_kernel_library")
+load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library")
 
 tf_custom_op_py_library(
     name = "framework_py",
@@ -31,8 +32,10 @@ tf_custom_op_py_library(
         "python/ops/arg_scope.py",
         "python/ops/audio_ops.py",
         "python/ops/checkpoint_ops.py",
+        "python/ops/critical_section_ops.py",
         "python/ops/ops.py",
         "python/ops/prettyprint_ops.py",
+        "python/ops/script_ops.py",
         "python/ops/sort_ops.py",
         "python/ops/variables.py",
     ],
@@ -60,6 +63,7 @@ tf_custom_op_py_library(
         "//tensorflow/python:math_ops",
         "//tensorflow/python:platform",
         "//tensorflow/python:pywrap_tensorflow",
+        "//tensorflow/python:script_ops",
         "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:state_ops",
         "//tensorflow/python:state_ops_gen",
@@ -70,6 +74,7 @@ tf_custom_op_py_library(
         "//tensorflow/python:variable_scope",
         "//tensorflow/python:variables",
         "//tensorflow/python/eager:context",
+        "//tensorflow/python/eager:function",
         "//third_party/py/numpy",
         "@six_archive//:six",
     ],
@@ -173,6 +178,21 @@ py_test(
     ],
 )
 
+cuda_py_test(
+    name = "critical_section_test",
+    size = "medium",
+    srcs = ["python/ops/critical_section_test.py"],
+    additional_deps = [
+        "//tensorflow/python:client_testlib",
+        ":framework_py",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:gradients",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:resource_variable_ops",
+    ],
+)
+
 py_test(
     name = "accumulate_n_v2_eager_test",
     size = "small",
diff --git a/tensorflow/contrib/framework/__init__.py b/tensorflow/contrib/framework/__init__.py
index 4edc77f86ba786ca547b8d3842e2cf02833fbbac..673c51784229bd88011f8b33fb851a2885566220 100644
--- a/tensorflow/contrib/framework/__init__.py
+++ b/tensorflow/contrib/framework/__init__.py
@@ -81,7 +81,10 @@ See the @{$python/contrib.framework} guide.
 @@load_linear_multiclass_bias_initializer
 @@load_variable_slot_initializer
 
+@@py_func
 @@sort
+
+@@CriticalSection
 """
 
 from __future__ import absolute_import
diff --git a/tensorflow/contrib/framework/python/framework/graph_util.py b/tensorflow/contrib/framework/python/framework/graph_util.py
index 6d5cde5c9e118d372a6532bfc593bd08b9e18a7b..a18ff2320d99726bb355ff6179fc97a070c2fec7 100644
--- a/tensorflow/contrib/framework/python/framework/graph_util.py
+++ b/tensorflow/contrib/framework/python/framework/graph_util.py
@@ -150,5 +150,5 @@ def get_placeholders(graph):
   # The return value (a Tensor) of placeholder() is the
   # first output of this operation in fact.
   operations = graph.get_operations()
-  result = [i.outputs[0] for i in operations if i.type == 'Placeholder']
+  result = [i.outputs[0] for i in operations if i.type == "Placeholder"]
   return result
diff --git a/tensorflow/contrib/framework/python/framework/graph_util_test.py b/tensorflow/contrib/framework/python/framework/graph_util_test.py
index 0722fafc132c0db2ad621f6f9345185f34c643f5..b8a6d109e19211d271c2b15bac66ddacd38fe395 100644
--- a/tensorflow/contrib/framework/python/framework/graph_util_test.py
+++ b/tensorflow/contrib/framework/python/framework/graph_util_test.py
@@ -90,8 +90,9 @@ class GetPlaceholdersTest(test.TestCase):
     with ops.Graph().as_default() as g:
       placeholders = [array_ops.placeholder(dtypes.float32) for _ in range(5)]
       results = graph_util.get_placeholders(g)
-      self.assertEqual(sorted(placeholders, key=lambda x: x._id),  # pylint: disable=protected-access
-                       sorted(results, key=lambda x: x._id))  # pylint: disable=protected-access
+      self.assertEqual(
+          sorted(placeholders, key=lambda x: x._id),  # pylint: disable=protected-access
+          sorted(results, key=lambda x: x._id))  # pylint: disable=protected-access
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/contrib/framework/python/ops/__init__.py b/tensorflow/contrib/framework/python/ops/__init__.py
index 685bb94779762ce46ee342e7e0a182c54be64743..c4976497f5fa95d82e492153b117681f693eaa13 100644
--- a/tensorflow/contrib/framework/python/ops/__init__.py
+++ b/tensorflow/contrib/framework/python/ops/__init__.py
@@ -22,8 +22,10 @@ from __future__ import print_function
 # pylint: disable=wildcard-import
 from tensorflow.contrib.framework.python.ops.arg_scope import *
 from tensorflow.contrib.framework.python.ops.checkpoint_ops import *
+from tensorflow.contrib.framework.python.ops.critical_section_ops import *
 from tensorflow.contrib.framework.python.ops.ops import *
 from tensorflow.contrib.framework.python.ops.prettyprint_ops import *
+from tensorflow.contrib.framework.python.ops.script_ops import *
 from tensorflow.contrib.framework.python.ops.sort_ops import *
 from tensorflow.contrib.framework.python.ops.variables import *
 # pylint: enable=wildcard-import
diff --git a/tensorflow/contrib/framework/python/ops/critical_section_ops.py b/tensorflow/contrib/framework/python/ops/critical_section_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..182fec924febb74a23b82b1664d137f033f3b1b4
--- /dev/null
+++ b/tensorflow/contrib/framework/python/ops/critical_section_ops.py
@@ -0,0 +1,324 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Critical Section object and execution logic."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+# TODO(ebrevdo): Re-enable once CriticalSection is in core.
+# from tensorflow.core.protobuf import critical_section_pb2
+
+from tensorflow.python.eager import context
+from tensorflow.python.eager import function
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import gen_resource_variable_ops
+from tensorflow.python.util import nest
+
+
+# Graph Keys
+CRITICAL_SECTIONS = "critical_sections"
+CRITICAL_SECTION_EXECUTIONS = "critical_section_executions"
+
+
+class _ExecutionSignature(
+    collections.namedtuple("_ExecutionSignature",
+                           ("op", "exclusive_resource_access"))):
+  """A class storing an `ExecuteInCriticalResource` op and associated attrs."""
+  pass
+
+
+class CriticalSection(object):
+  """Critical section.
+
+  A `CriticalSection` object is a resource in the graph which executes subgraphs
+  in **serial** order.  A common example of a subgraph one may wish to run
+  exclusively is the one given by the following function:
+
+  ```python
+  v = resource_variable_ops.ResourceVariable(0.0, name="v")
+
+  def count():
+    value = v.read_value()
+    with tf.control_dependencies([value]):
+      with tf.control_dependencies([v.assign_add(1)]):
+        return tf.identity(value)
+  ```
+
+  Here, a snapshot of `v` is captured in `value`; and then `v` is updated.
+  The snapshot value is returned.
+
+  If multiple workers or threads all execute `count` in parallel, there is no
+  guarantee that access to the variable `v` is atomic at any point within
+  any thread's calculation of `count`.  In fact, even implementing an atomic
+  counter that guarantees that the user will see each value `0, 1, ...,` is
+  currently impossible.
+
+  The solution is to ensure any access to the underlying resource `v` is
+  only processed through a critical section:
+
+  ```python
+  cs = CriticalSection()
+  f1 = cs.execute(count)
+  f2 = cs.execute(count)
+  output = f1 + f2
+  session.run(output)
+  ```
+  The functions `f1` and `f2` will be executed serially, and updates to `v`
+  will be atomic.
+
+  **NOTES**
+
+  All resource objects, including the critical section and any captured
+  variables of functions executed on that critical section, will be
+  colocated to the same device (host and cpu/gpu).
+
+  When using multiple critical sections on the same resources, there is no
+  guarantee of exclusive access to those resources.  This behavior is disallowed
+  by default (but see the kwarg `exclusive_resource_access`).
+
+  For example, running the same function in two separate critical sections
+  will not ensure serial execution:
+
+  ```python
+  v = tf.get_variable("v", initializer=0.0, use_resource=True)
+  def accumulate(up):
+    x = v.read_value()
+    with tf.control_dependencies([x]):
+      with tf.control_dependencies([v.assign_add(up)]):
+        return tf.identity(x)
+  ex1 = CriticalSection().execute(
+    accumulate, 1.0, exclusive_resource_access=False)
+  ex2 = CriticalSection().execute(
+    accumulate, 1.0, exclusive_resource_access=False)
+  bad_sum = ex1 + ex2
+  sess.run(v.initializer)
+  sess.run(bad_sum)  # May return 0.0
+  ```
+  """
+
+  def __init__(self, name=None, critical_section_def=None, import_scope=None):
+    """Creates a critical section."""
+    if critical_section_def and name is not None:
+      raise ValueError("critical_section_def and name are mutually exclusive.")
+    if critical_section_def:
+      self._init_from_proto(critical_section_def, import_scope=import_scope)
+    else:
+      self._init_from_args(name)
+
+  def _init_from_proto(self, critical_section_def, import_scope):
+    raise NotImplementedError("Not yet implemented")
+    # TODO(ebrevdo): Re-enable once CriticalSection is in core.
+    # assert isinstance(
+    #     critical_section_def, critical_section_pb2.CriticalSectionDef)
+    # # Create from critical_section_def.
+    # g = ops.get_default_graph()
+    # self._handle = g.as_graph_element(
+    #     ops.prepend_name_scope(
+    #         critical_section_def.critical_section_name,
+    #         import_scope=import_scope))
+
+  def _init_from_args(self, name):
+    """Initialize the CriticalSection from constructor arguments."""
+    with ops.name_scope(name, "CriticalSection", []) as name:
+      with ops.control_dependencies(None):
+        # pylint: disable=protected-access
+        handle_name = ops._name_from_scope_name(name)
+        container = ops.get_default_graph()._container
+        # pylint: enable=protected-access
+        if container is None:
+          container = ""
+        self._handle = gen_resource_variable_ops.critical_section_op(
+            shared_name=handle_name, name=name)
+    if context.in_graph_mode():
+      ops.add_to_collections(CRITICAL_SECTIONS, self)
+
+  @property
+  def name(self):
+    return self._handle.op.name
+
+  def execute(self, fn, *args, **kwargs):
+    """Execute function `fn(*args, **kwargs)` inside the CriticalSection.
+
+    Args:
+      fn: The function to execute.  Must return at least one tensor.
+      *args: Additional positional arguments to `fn`.
+      **kwargs: Additional keyword arguments to `fn`.
+        Several keywords are reserved for `execute`.  These are:
+
+        - name; The name to use when creating the execute operation.
+        - exclusive_resource_access; Whether the resources required by
+          `fn` should be exclusive to this `CriticalSection`.  Default: `True`.
+          You may want to set this to `False` if you will be accessing a
+          resource in read-only mode in two different CriticalSections.
+
+    Returns:
+      The tensors returned from `fn(*args, **kwargs)`.
+
+    Raises:
+      ValueError: If `fn` attempts to use this `CriticalSection` in any nested
+        way.
+      ValueError: If `exclusive_resource_access` is not provided (is `True`) and
+        another `CriticalSection` has an execution requesting the same
+        resources as in `*args`, `**kwargs`, and any additionaly captured
+        inputs in `fn`.  Note, even if `exclusive_resource_access` is `True`,
+        if another execution in another `CriticalSection` was created without
+        `exclusive_resource_access=True`, a `ValueError` will be raised.
+    """
+    name = kwargs.pop("name", None)
+    exclusive_resource_access = kwargs.pop("exclusive_resource_access", True)
+
+    args = nest.map_structure(ops.convert_to_tensor, args)
+    with ops.name_scope(name, "critical_section_execute", []):
+      fn_op = function.make_defun_op(fn, *args, **kwargs)
+      flat_dtypes = nest.flatten(fn_op.output_dtypes)
+      flat_shapes = nest.flatten(fn_op.output_shapes)
+      all_inputs = nest.flatten(args) + fn_op.captured_inputs
+      if self._handle in all_inputs:
+        raise ValueError("The function fn attempts to access the "
+                         "CriticalSection in which it would be running.  This "
+                         "is illegal and would cause deadlocks.  "
+                         "CriticalSection: %s." % self._handle)
+
+      if context.in_graph_mode():
+        # Collections and op introspection does not work in eager
+        # mode.  This is generally ok; since eager mode (as of
+        # writing) executes sequentially anyway.
+        all_input_resources = [
+            x for x in all_inputs if x.dtype == dtypes.resource]
+        for sg in ops.get_collection(CRITICAL_SECTION_EXECUTIONS):
+          if sg.op.inputs[0].name == self._handle.name:
+            # Other executions in the same critical section are allowed.
+            continue
+          if not (exclusive_resource_access or sg.exclusive_resource_access):
+            # Neither execution requested exclusive access.
+            continue
+          sg_input_names = [y.name for y in sg.op.inputs[1:]]
+          for res in all_input_resources:
+            if res.name in sg_input_names:
+              raise ValueError(
+                  "This execution would access resource %s; but either this "
+                  "execution (CriticalSection: %s) or Execution '%s' "
+                  "(CriticalSection: %s) requested exclusive resource access "
+                  "of this resource for their critical section.  Did you mean "
+                  "to call execute with keyword argument "
+                  "exclusive_resource_access=False?"
+                  % (res.name,
+                     self.name,
+                     sg.op.name,
+                     sg.op.inputs[0].op.name))
+
+      flat_outputs = gen_resource_variable_ops.execute_in_critical_section(
+          critical_section=self._handle,
+          arguments=all_inputs,
+          f=fn_op,
+          output_types=flat_dtypes,
+          output_shapes=flat_shapes)
+
+      if context.in_graph_mode():
+        if isinstance(flat_outputs, ops.Operation):
+          flat_outputs = [flat_outputs]
+        op = (flat_outputs[0].op if isinstance(flat_outputs[0], ops.Tensor)
+              else flat_outputs[0])
+        signature = _ExecutionSignature(
+            op=op,
+            exclusive_resource_access=exclusive_resource_access)
+        ops.add_to_collections(
+            CRITICAL_SECTION_EXECUTIONS, signature)
+
+      return (flat_outputs[0]
+              if (len(flat_outputs) == 1
+                  and isinstance(flat_outputs[0], ops.Operation))
+              else nest.pack_sequence_as(fn_op.output_dtypes, flat_outputs))
+
+  # TODO(ebrevdo): Re-enable once CriticalSection is in core.
+
+  # def to_proto(self, export_scope=None):
+  #   """Converts a `CriticalSection` to a `CriticalSectoinDef` protocol buffer.
+
+  #   Args:
+  #     export_scope: Optional `string`. Name scope to remove.
+
+  #   Returns:
+  #     A `CriticalSectionDef` protocol buffer, or `None` if the
+  #     `CriticalSection` is not in the specified name scope.
+  #   """
+  #   if export_scope is None or self.handle.name.startswith(export_scope):
+  #     cs_def = critical_section_pb2.CriticalSectionDef()
+  #     cs_def.critical_section_name = ops.strip_name_scope(
+  #         self._handle.name, export_scope)
+  #     return cs_def
+  #   else:
+  #     return None
+
+  # @staticmethod
+  # def from_proto(critical_section_def, import_scope=None):
+  #   return CriticalSection(
+  #       critical_section_def=critical_section_def, import_scope=import_scope)
+
+
+# TODO(ebrevdo): Re-enable once CriticalSection is in core.
+
+# def _execution_to_proto_fn(execution_signature, export_scope=None):
+#   """Converts `_ExecutionSignature` to a `CriticalSectionExecutionDef`.
+
+#   Args:
+#     execution_signature: Instance of `_ExecutionSignature`.
+#     export_scope: The export scope, if any.
+
+#   Returns:
+#     An instance of `CriticalSectionExecutionDef`.
+#   """
+#   if (export_scope is None
+#       or execution_signature.op.name.startswith(export_scope)):
+#     op_def = critical_section_pb2.CriticalSectionExecutionDef()
+#     op_def.execute_in_critical_section_name = ops.strip_name_scope(
+#         execution_signature.op.name, export_scope)
+#     op_def.exclusive_resource_access = (
+#         execution_signature.exclusive_resource_access)
+#     return op_def
+#   else:
+#     return None
+
+
+# def _execution_from_proto_fn(op_def, import_scope=None):
+#   """Converts a `CriticalSectionExecutionDef` to a `_ExecutionSignature`."""
+#   assert isinstance(
+#       op_def, critical_section_pb2.CriticalSectionExecutionDef)
+
+#   # Create from op_def.
+#   g = ops.get_default_graph()
+#   execution_op = g.as_graph_element(
+#       ops.prepend_name_scope(
+#           op_def.execute_in_critical_section_name,
+#           import_scope=import_scope))
+#   return _ExecutionSignature(
+#       op=execution_op,
+#       exclusive_resource_access=op_def.exclusive_resource_access)
+
+# ops.register_proto_function(
+#     CRITICAL_SECTIONS,
+#     proto_type=critical_section_pb2.CriticalSectionDef,
+#     to_proto=CriticalSection.to_proto,
+#     from_proto=CriticalSection.from_proto)
+
+# ops.register_proto_function(
+#     CRITICAL_SECTION_EXECUTIONS,
+#     proto_type=critical_section_pb2.CriticalSectionExecutionDef,
+#     to_proto=_execution_to_proto_fn,
+#     from_proto=_execution_from_proto_fn)
diff --git a/tensorflow/contrib/framework/python/ops/critical_section_test.py b/tensorflow/contrib/framework/python/ops/critical_section_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..a416724d3ba1719471d70667e140f9cd2daf86c7
--- /dev/null
+++ b/tensorflow/contrib/framework/python/ops/critical_section_test.py
@@ -0,0 +1,178 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""critical section tests."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.framework.python.ops import critical_section_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import function
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.platform import test
+# TODO(ebrevdo): Re-enable once CriticalSection is in core.
+# from tensorflow.python.training import saver as saver_lib
+
+
+class CriticalSectionTest(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testCreateCriticalSection(self):
+    cs = critical_section_ops.CriticalSection(name="cs")
+    v = resource_variable_ops.ResourceVariable(0.0, name="v")
+
+    def fn(a, b):
+      c = v.read_value()
+      with ops.control_dependencies([c]):
+        nv = v.assign_add(a * b)
+        with ops.control_dependencies([nv]):
+          return array_ops.identity(c)
+
+    num_concurrent = 1000
+    r = [cs.execute(fn, 1.0, 2.0) for _ in range(num_concurrent)]
+    self.evaluate(v.initializer)
+    r_value = self.evaluate(r)
+    self.assertAllClose([2.0 * i for i in range(num_concurrent)],
+                        sorted(r_value))
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testCreateCriticalSectionFnReturnsOp(self):
+    cs = critical_section_ops.CriticalSection(name="cs")
+    v = resource_variable_ops.ResourceVariable(0.0, name="v")
+
+    def fn_return_op(a, b):
+      c = v.read_value()
+      with ops.control_dependencies([c]):
+        nv = v.assign_add(a * b)
+        with ops.control_dependencies([nv]):
+          return ()
+
+    num_concurrent = 100
+    r = [cs.execute(fn_return_op, 1.0, 2.0) for _ in range(num_concurrent)]
+    self.evaluate(v.initializer)
+    self.evaluate(r)
+    final_v = self.evaluate(v)
+    self.assertAllClose(2.0 * num_concurrent, final_v)
+
+  def testCreateCriticalSectionRaw(self):
+    cs = critical_section_ops.CriticalSection(name="cs")
+    v = resource_variable_ops.ResourceVariable(0.0, name="v")
+
+    @function.Defun(dtypes.float32, dtypes.float32)
+    def fn(a, b):
+      c = v.read_value()
+      with ops.control_dependencies([c]):
+        nv = v.assign_add(a * b)
+        with ops.control_dependencies([nv]):
+          return array_ops.identity(c)
+
+    def execute(fn, *args):
+      output_args = fn.definition.signature.output_arg
+      return resource_variable_ops.execute_in_critical_section(
+          critical_section=cs._handle,
+          arguments=list(args) + fn.captured_inputs,
+          f=fn,
+          output_types=[out.type for out in output_args],
+          output_shapes=[tensor_shape.TensorShape(None) for _ in output_args])
+
+    num_concurrent = 1000
+    r = [execute(fn, 1.0, 2.0)[0] for _ in range(num_concurrent)]
+    self.evaluate(v.initializer)
+    r_value = self.evaluate(r)
+    self.assertAllClose([2.0 * i for i in range(num_concurrent)],
+                        sorted(r_value))
+
+  def testCollection(self):
+    cs = critical_section_ops.CriticalSection(name="cs")
+    self.assertIn(
+        cs, ops.get_collection(critical_section_ops.CRITICAL_SECTIONS))
+    execute_op = cs.execute(lambda x: x + 1, 1.0).op
+    self.assertIn(
+        execute_op,
+        [signature.op for signature in
+         ops.get_collection(critical_section_ops.CRITICAL_SECTION_EXECUTIONS)])
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testRecursiveCriticalSectionAccessIsIllegal(self):
+    cs = critical_section_ops.CriticalSection(name="cs")
+    def fn(x):
+      return cs.execute(lambda x: x+1, x)
+    with self.assertRaisesRegexp(
+        ValueError,
+        r"attempts to access the CriticalSection in which it would be running"):
+      cs.execute(fn, 1.0)
+
+  def testMultipleCSExecutionsRequestSameResource(self):
+    cs0 = critical_section_ops.CriticalSection()
+    cs1 = critical_section_ops.CriticalSection()
+    v = resource_variable_ops.ResourceVariable(0.0, name="v")
+    cs0.execute(lambda: v + 1)
+    # It's OK for the same CriticalSection to access this resource.
+    cs0.execute(lambda: v - 1)
+    # It's *not* OK for a different CriticalSection to access it by
+    # default.
+    with self.assertRaisesRegexp(
+        ValueError, "requested exclusive resource access"):
+      cs1.execute(lambda: v + 1)
+    # It's not even OK if the second call doesn't request exclusive access.
+    with self.assertRaisesRegexp(
+        ValueError, "requested exclusive resource access"):
+      cs1.execute(lambda: v + 1, exclusive_resource_access=False)
+
+    v2 = resource_variable_ops.ResourceVariable(0.0, name="v2")
+    cs0.execute(lambda: v2 + 1, exclusive_resource_access=False)
+    # It's OK if neither requests exclusive resource access.
+    cs1.execute(lambda: v2 + 1, exclusive_resource_access=False)
+
+    # It's not OK if the second request requires exlusive resource
+    # access.
+    with self.assertRaisesRegexp(
+        ValueError, "requested exclusive resource access"):
+      cs1.execute(lambda: v2 + 1)
+
+  # TODO(ebrevdo): Re-enable once CriticalSection is in core.
+  #
+  # def testCriticalSectionAndExecuteOpSaverRoundTrip(self):
+  #   cs = critical_section_ops.CriticalSection()
+  #   r = cs.execute(lambda x: x + 1, 1.0)
+  #   graph = ops.get_default_graph()
+  #   meta_graph = saver_lib.export_meta_graph(
+  #       graph=graph, collection_list=graph.get_all_collection_keys())
+  #   graph_copy = ops.Graph()
+  #   with graph_copy.as_default():
+  #     _ = saver_lib.import_meta_graph(meta_graph, import_scope="imported")
+  #     restored_cs = ops.get_collection(critical_section_ops.CRITICAL_SECTIONS)
+  #     restored_exec = ops.get_collection(
+  #         critical_section_ops.CRITICAL_SECTION_EXECUTIONS)
+  #     self.assertEqual(1, len(restored_cs))
+  #     self.assertEqual(1, len(restored_exec))
+  #     self.assertEqual(restored_cs[0].name, "imported/%s" % cs.name)
+  #     self.assertEqual(restored_exec[0].op.name, "imported/%s" % r.op.name)
+
+  # def testToProto(self):
+  #   cs = critical_section_ops.CriticalSection(name="cs")
+  #   proto = cs.to_proto()
+  #   self.assertEqual(proto.critical_section_name, cs._handle.name)
+  #   cs_copy = critical_section_ops.CriticalSection.from_proto(proto)
+  #   self.assertEqual(cs_copy._handle, cs._handle)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/framework/python/ops/script_ops.py b/tensorflow/contrib/framework/python/ops/script_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d269fefdcfae7902b35e0f29f8cd12fcc58b882
--- /dev/null
+++ b/tensorflow/contrib/framework/python/ops/script_ops.py
@@ -0,0 +1,143 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Script Language Operators. See the @{$python/script_ops} guide.
+
+@@py_func
+"""
+
+# pylint: disable=g-bad-name
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops.script_ops import py_func as _py_func
+from tensorflow.python.util import nest
+
+__all__ = ['py_func']
+
+
+def py_func(func,
+            args=(),
+            kwargs=None,
+            output_types=None,
+            output_shapes=None,
+            stateful=True,
+            name=None):
+  """Wraps a python function and uses it as a TensorFlow op.
+
+  This function is a wrapper around `tf.py_func` and improve it with kwargs
+  and output_shapes. Further it changed some argument names.
+
+  Given a python function `func`, which takes numpy arrays as its
+  inputs and returns numpy arrays as its outputs, wrap this function as an
+  operation in a TensorFlow graph. The following snippet constructs a simple
+  TensorFlow graph that invokes the `np.sinh()` NumPy function as a operation
+  in the graph:
+
+  ```python
+  def my_func(x):
+    # x will be a numpy array with the contents of the placeholder below
+    return np.sinh(x)
+  inp = tf.placeholder(tf.float32)
+  y = tf.py_func(my_func, [inp], tf.float32)
+  ```
+
+
+  **N.B.** The `tf.py_func()` operation has the following known limitations:
+
+  * The body of the function (i.e. `func`) will not be serialized in a
+    `GraphDef`. Therefore, you should not use this function if you need to
+    serialize your model and restore it in a different environment.
+
+  * The operation must run in the same address space as the Python program
+    that calls `tf.py_func()`. If you are using distributed TensorFlow, you
+    must run a `tf.train.Server` in the same process as the program that calls
+    `tf.py_func()` and you must pin the created operation to a device in that
+    server (e.g. using `with tf.device():`).
+
+  Args:
+    func: A Python function, which accepts a list of NumPy `ndarray` objects
+      having element types that match the corresponding `tf.Tensor` objects
+      in `inp`, and returns a list of `ndarray` objects (or a single `ndarray`)
+      having element types that match the corresponding values in `Tout`.
+    args: A list of `Tensor` objects.
+    kwargs: A dict with `Tensor` objects as values.
+    output_types: A nested structure of tensorflow data types or a single
+      tensorflow data type if there is only one, indicating what `func` returns.
+    output_shapes: Same as output_types, except the types are replaces with
+      shapes (optional).
+    stateful: (Boolean.) If True, the function should be considered stateful.
+      If a function is stateless, when given the same input it will return the
+      same output and have no observable side effects. Optimizations such as
+      common subexpression elimination are only performed on stateless
+      operations.
+    name: A name for the operation (optional).
+
+  Returns:
+    Tensorflow op that wraps the input python function.
+  """
+
+  if kwargs is None:
+    kwargs = {}
+
+  if not isinstance(args, (list, tuple)):
+    raise TypeError('args must be list and not {}. args: {}'.format(
+        type(args), args))
+
+  if not isinstance(kwargs, dict):
+    raise TypeError('kwargs must be dict and not {}. args: {}'.format(
+        type(kwargs), kwargs))
+
+  # For dynamic type inference use callable output_types and output_shapes
+  if callable(output_types):
+    # If callable assume same signature and call with tensors and get the types
+    output_types = output_types(*args, **kwargs)
+  if callable(output_shapes):
+    # If callable assume same signature and call with tensors and get the shapes
+    output_shapes = output_shapes(*args, **kwargs)
+
+  flat_output_types = nest.flatten(output_types)
+  args = (args, kwargs)
+  flat_args = nest.flatten(args)
+
+  def python_function_wrapper(*py_args):
+    py_args, py_kwargs = nest.pack_sequence_as(args, py_args)
+
+    ret = func(*py_args, **py_kwargs)
+    # TODO(alextp): Catch Exceptions and improve msg, because tensorflow
+    # ist not able to preserve the traceback, i.e. the Exceptions does not
+    # contain any information where the Exception was raised.
+    nest.assert_shallow_structure(output_types, ret)
+    return nest.flatten(ret)
+
+  flat_values = _py_func(
+      python_function_wrapper,
+      flat_args,
+      flat_output_types,
+      stateful=stateful,
+      name=name)
+
+  if output_shapes is not None:
+    # I am not sure if this is nessesary
+    output_shapes = nest.map_structure_up_to(
+        output_types, tensor_shape.as_shape, output_shapes)
+
+    flattened_shapes = nest.flatten(output_shapes)
+    for ret_t, shape in zip(flat_values, flattened_shapes):
+      ret_t.set_shape(shape)
+
+  return nest.pack_sequence_as(output_types, flat_values)
diff --git a/tensorflow/contrib/framework/python/ops/variables.py b/tensorflow/contrib/framework/python/ops/variables.py
index 07b7857e7b2114d251ebb5c14eda9dff0d55bbef..3f1ece4510578b5ac39849c577fffbb2a3be45a7 100644
--- a/tensorflow/contrib/framework/python/ops/variables.py
+++ b/tensorflow/contrib/framework/python/ops/variables.py
@@ -441,7 +441,7 @@ def get_unique_variable(var_op_name):
   """
   candidates = get_variables(scope=var_op_name)
   if not candidates:
-    raise ValueError('Couldnt find variable %s' % var_op_name)
+    raise ValueError('Couldn\'t find variable %s' % var_op_name)
 
   for candidate in candidates:
     if candidate.op.name == var_op_name:
diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
index 88306094ab9947c9c78b03c0013f6afc88316803..0e06575d96f9b9538f0245b12d48cfd7c0e8d981 100644
--- a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
+++ b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
@@ -34,6 +34,7 @@ limitations under the License.
 #include "tensorflow/core/util/use_cudnn.h"
 
 #if GOOGLE_CUDA
+#include "cuda/include/cudnn.h"
 #include "tensorflow/core/kernels/conv_ops_gpu.h"
 #include "tensorflow/core/platform/stream_executor.h"
 #include "tensorflow/core/util/activation_mode.h"
@@ -278,6 +279,28 @@ Status TransformNHWCToNCHW(OpKernelContext* ctx, const Tensor& nhwc_tensor,
   return Status::OK();
 }
 
+// Adjusts padding so cudnn supports it. Sets `adjusted_padding` to be the
+// adjusted padding, and `extra_padding_before` and `extra_padding_after` to be
+// the extra padding that FusedConv needs to apply before calling cudnn.
+void AdjustPaddingForCudnn(int padding, bool is_int8x4, int filter_size,
+                           int* adjusted_padding, int* extra_padding_before,
+                           int* extra_padding_after) {
+#if CUDNN_VERSION < 7000
+  if (is_int8x4 && filter_size >= 6) {
+    // TODO(b/70795525): Remove after NVIDIA fixes this bug with int8 fused
+    // convolution. I don't know cuDNN7 still has the bug, so enable this
+    // workaround for cuDNN6 or older.
+    *adjusted_padding = 0;
+    *extra_padding_before = padding / 2;
+    *extra_padding_after = padding - *extra_padding_before;
+    return;
+  }
+#endif
+  *adjusted_padding = padding / 2 * 2;
+  *extra_padding_before = 0;
+  *extra_padding_after = padding % 2;
+}
+
 template <typename T, typename BiasType, typename ScaleType>
 void LaunchFusedConv2DBiasActivationOp<GPUDevice, T, BiasType, ScaleType>::
     launch(OpKernelContext* ctx, bool cudnn_use_autotune,
@@ -303,7 +326,7 @@ void LaunchFusedConv2DBiasActivationOp<GPUDevice, T, BiasType, ScaleType>::
     stream->parent()->GetDeviceDescription().cuda_compute_capability(&cc_major,
                                                                      &cc_minor);
     OP_REQUIRES(
-        ctx, cc_major >= 6 && cc_minor >= 1,
+        ctx, ((cc_major == 6 && cc_minor >= 1) || cc_major > 6),
         errors::Unimplemented(
             "FusedConv2DBiasActivation for int8 is only supported on GPUs with "
             "compute capability 6.1 or later."));
@@ -338,12 +361,21 @@ void LaunchFusedConv2DBiasActivationOp<GPUDevice, T, BiasType, ScaleType>::
         0, (output_rows - 1) * row_stride + filter_rows - conv_input_rows);
     padding_cols = std::max<int>(
         0, (output_cols - 1) * col_stride + filter_cols - conv_input_cols);
-    const int padding_rows_parity = padding_rows & 1;
-    const int padding_cols_parity = padding_cols & 1;
-    if ((padding_rows_parity | padding_cols_parity) != 0) {
+    int extra_top_padding = 0;
+    int extra_bottom_padding = 0;
+    int extra_left_padding = 0;
+    int extra_right_padding = 0;
+    AdjustPaddingForCudnn(padding_rows, is_int8x4, filter_rows, &padding_rows,
+                          &extra_top_padding, &extra_bottom_padding);
+    AdjustPaddingForCudnn(padding_cols, is_int8x4, filter_cols, &padding_cols,
+                          &extra_left_padding, &extra_right_padding);
+    if (extra_top_padding != 0 || extra_bottom_padding != 0 ||
+        extra_left_padding != 0 || extra_right_padding != 0) {
       Tensor transformed_input;
-      const int new_conv_input_rows = conv_input_rows + padding_rows_parity;
-      const int new_conv_input_cols = conv_input_cols + padding_cols_parity;
+      const int new_conv_input_rows =
+          conv_input_rows + extra_top_padding + extra_bottom_padding;
+      const int new_conv_input_cols =
+          conv_input_cols + extra_left_padding + extra_right_padding;
 
       using VectT = typename Int8x4ToInt32<typename RawType<T>::type>::type;
       auto pad_data_format = is_int8x4 ? FORMAT_NCHW : data_format;
@@ -361,8 +393,9 @@ void LaunchFusedConv2DBiasActivationOp<GPUDevice, T, BiasType, ScaleType>::
           maybe_padded_conv_input.reinterpret_last_dimension<VectT, 4>());
 
       functor::PadInput<GPUDevice, VectT, int, 4>()(
-          ctx->eigen_device<GPUDevice>(), conv_input_eigen_tensor, {{0, 0}},
-          {{padding_rows_parity, padding_cols_parity}},
+          ctx->eigen_device<GPUDevice>(), conv_input_eigen_tensor,
+          {{extra_top_padding, extra_left_padding}},
+          {{extra_bottom_padding, extra_right_padding}},
           padded_conv_input_eigen_tensor, pad_data_format);
 
       conv_input = &maybe_padded_conv_input;
@@ -439,6 +472,8 @@ void LaunchFusedConv2DBiasActivationOp<GPUDevice, T, BiasType, ScaleType>::
       .set_feature_map_count(output_depth)
       .set_layout(data_layout);
   dnn::ConvolutionDescriptor conv_desc;
+  CHECK_EQ(0, padding_rows % 2);
+  CHECK_EQ(0, padding_cols % 2);
   conv_desc.set_vertical_filter_stride(row_stride)
       .set_horizontal_filter_stride(col_stride)
       .set_zero_padding_height(padding_rows / 2)
@@ -493,6 +528,8 @@ void LaunchFusedConv2DBiasActivationOp<GPUDevice, T, BiasType, ScaleType>::
       {{conv_input_rows, conv_input_cols}},
       output_depth,
       {{filter_rows, filter_cols}},
+      // TODO(yangzihao): Add support for arbitrary dilations for fused conv.
+      {{1, 1}},  // dilation_rows, dilation_cols
       {{row_stride, col_stride}},
       {{padding_rows, padding_cols}},
       conv_input->dtype(),
diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h b/tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h
index dc43af11580ce5fda74ee25da6c151a5b89c7aee..fa7a3c03aa35c756252b22a004be91fa24c10e41 100644
--- a/tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h
+++ b/tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h
@@ -30,11 +30,12 @@ class FusedConvParameters : public ConvParameters {
  public:
   FusedConvParameters(int64 batch, int64 in_depths, const SpatialArray& in,
                       int64 out_depths, const SpatialArray& filter,
-                      const SpatialArray& stride, const SpatialArray& padding,
-                      DataType dtype, int device_id, bool has_side_input,
+                      const SpatialArray& dilation, const SpatialArray& stride,
+                      const SpatialArray& padding, DataType dtype,
+                      int device_id, bool has_side_input,
                       ActivationMode activation_mode)
-      : ConvParameters(batch, in_depths, in, out_depths, filter, stride,
-                       padding, dtype, device_id),
+      : ConvParameters(batch, in_depths, in, out_depths, filter, dilation,
+                       stride, padding, dtype, device_id),
         activation_mode_(activation_mode),
         has_side_input_(has_side_input) {
     hash_code_ = Hash64Combine(hash_code_, has_side_input);
diff --git a/tensorflow/contrib/fused_conv/ops/fused_conv2d_bias_activation_op.cc b/tensorflow/contrib/fused_conv/ops/fused_conv2d_bias_activation_op.cc
index 887ebc5a6c35379476fa1a643c866d38e2b25699..6a56237f67c844a3daa546eb02d64c9e2658f639 100644
--- a/tensorflow/contrib/fused_conv/ops/fused_conv2d_bias_activation_op.cc
+++ b/tensorflow/contrib/fused_conv/ops/fused_conv2d_bias_activation_op.cc
@@ -52,6 +52,7 @@ REGISTER_OP("FusedConv2DBiasActivation")
     .Attr("data_format: {'NHWC', 'NCHW', 'NCHW_VECT_C'} = 'NHWC'")
     .Attr("filter_format: {'HWIO', 'OIHW', 'OIHW_VECT_I'} = 'HWIO'")
     .Attr("activation_mode: {'Relu'} = 'Relu'")
+    .Attr("dilations: list(int) = [1, 1, 1, 1]")
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       using shape_inference::ShapeHandle;
       using shape_inference::DimensionHandle;
@@ -151,6 +152,11 @@ REGISTER_OP("FusedConv2DBiasActivation")
                      kernel_height, kernel_width, input_channels % 4 ]`
     activation_mode: The activation applied to the output.
         Currently must be "Relu".
+    dilations: 1-D tensor of length 4.  The dilation factor for each dimension
+        of `input`. If set to k > 1, there will be k-1 skipped cells between
+        each filter element on that dimension. The dimension order is determined
+        by the value of `data_format`, see above for details. Dilations in the
+        batch and depth dimensions must be 1.
 )doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py
index 2a18f3eeecc7e0e69c54b219886a263136f01b2c..bb155aa2496cbafd9f0630d3dffb2ba69395186c 100644
--- a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py
+++ b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py
@@ -658,6 +658,36 @@ def SimulateFusedConv2dBiasActivationInt8(conv_input_scale, conv_input, kernel,
 
 class FusedConvInt8Tests(test.TestCase):
   _test_params = [
+      {
+          "batch_size": 1,
+          "input_channels": 4,
+          "output_channels": 4,
+          "input_height": 8,
+          "input_width": 8,
+          "filter_height": 6,
+          "filter_width": 6,
+          "vertical_stride": 2,
+          "horizontal_stride": 2,
+          "conv_input_scale": 0.002,
+          "side_input_scale": 0.0,
+          "bias_scale": 1,
+          "padding_type": "SAME"
+      },
+      {
+          "batch_size": 1,
+          "input_channels": 4,
+          "output_channels": 4,
+          "input_height": 6,
+          "input_width": 6,
+          "filter_height": 6,
+          "filter_width": 6,
+          "vertical_stride": 2,
+          "horizontal_stride": 2,
+          "conv_input_scale": 0.002,
+          "side_input_scale": 0.0,
+          "bias_scale": 1,
+          "padding_type": "SAME"
+      },
       {
           "batch_size": 2,
           "input_channels": 8,
diff --git a/tensorflow/contrib/gan/BUILD b/tensorflow/contrib/gan/BUILD
index abe4665caa9b23b5663df48487c6c77d33d15c59..b355a79b1a5d967eb82a30d41c073bbb52e0364c 100644
--- a/tensorflow/contrib/gan/BUILD
+++ b/tensorflow/contrib/gan/BUILD
@@ -56,6 +56,7 @@ py_test(
     srcs = ["python/train_test.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":features",
         ":namedtuples",
         ":train",
         "//tensorflow/contrib/framework:framework_py",
@@ -82,6 +83,7 @@ py_library(
     deps = [
         ":classifier_metrics",
         ":eval_utils",
+        ":sliced_wasserstein",
         ":summaries",
         "//tensorflow/python:util",
     ],
@@ -116,7 +118,7 @@ py_library(
     deps = [
         ":clip_weights",
         ":conditioning_utils",
-        ":tensor_pool",
+        ":random_tensor_pool",
         ":virtual_batchnorm",
         "//tensorflow/python:util",
     ],
@@ -221,10 +223,10 @@ py_test(
 )
 
 py_library(
-    name = "tensor_pool",
+    name = "random_tensor_pool",
     srcs = [
-        "python/features/python/tensor_pool.py",
-        "python/features/python/tensor_pool_impl.py",
+        "python/features/python/random_tensor_pool.py",
+        "python/features/python/random_tensor_pool_impl.py",
     ],
     srcs_version = "PY2AND3",
     deps = [
@@ -239,11 +241,11 @@ py_library(
 )
 
 py_test(
-    name = "tensor_pool_test",
-    srcs = ["python/features/python/tensor_pool_test.py"],
+    name = "random_tensor_pool_test",
+    srcs = ["python/features/python/random_tensor_pool_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":tensor_pool",
+        ":random_tensor_pool",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
@@ -502,6 +504,41 @@ py_test(
     ],
 )
 
+py_library(
+    name = "sliced_wasserstein",
+    srcs = [
+        "python/eval/python/sliced_wasserstein.py",
+        "python/eval/python/sliced_wasserstein_impl.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:linalg_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:nn",
+        "//tensorflow/python:nn_ops",
+        "//tensorflow/python:random_ops",
+        "//tensorflow/python:script_ops",
+        "//tensorflow/python:util",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_test(
+    name = "sliced_wasserstein_test",
+    srcs = ["python/eval/python/sliced_wasserstein_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":sliced_wasserstein",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:random_ops",
+        "//third_party/py/numpy",
+    ],
+)
+
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/contrib/gan/README.md b/tensorflow/contrib/gan/README.md
index 4bca0a1d62a2b404c6783c7cfe3b5c67cfc58221..4ead66ca13e74bacc0e4679a8d5c4e0f23d04b69 100644
--- a/tensorflow/contrib/gan/README.md
+++ b/tensorflow/contrib/gan/README.md
@@ -99,8 +99,8 @@ gan_model = tfgan.gan_model(
 # Build the GAN loss.
 gan_loss = tfgan.gan_loss(
     gan_model,
-    generator_loss_fn=tfgan_losses.wasserstein_generator_loss,
-    discriminator_loss_fn=tfgan_losses.wasserstein_discriminator_loss)
+    generator_loss_fn=tfgan.losses.wasserstein_generator_loss,
+    discriminator_loss_fn=tfgan.losses.wasserstein_discriminator_loss)
 
 # Create the train ops, which calculate gradients and apply updates to weights.
 train_ops = tfgan.gan_train_ops(
@@ -161,8 +161,8 @@ gan_model = tfgan.gan_model(
 # Build the GAN loss and standard pixel loss.
 gan_loss = tfgan.gan_loss(
     gan_model,
-    generator_loss_fn=tfgan_losses.wasserstein_generator_loss,
-    discriminator_loss_fn=tfgan_losses.wasserstein_discriminator_loss,
+    generator_loss_fn=tfgan.losses.wasserstein_generator_loss,
+    discriminator_loss_fn=tfgan.losses.wasserstein_discriminator_loss,
     gradient_penalty=1.0)
 l1_pixel_loss = tf.norm(gan_model.real_data - gan_model.generated_data, ord=1)
 
@@ -193,8 +193,8 @@ gan_model = tfgan.gan_model(
 # Build the GAN loss and standard pixel loss.
 gan_loss = tfgan.gan_loss(
     gan_model,
-    generator_loss_fn=tfgan_losses.least_squares_generator_loss,
-    discriminator_loss_fn=tfgan_losses.least_squares_discriminator_loss)
+    generator_loss_fn=tfgan.losses.least_squares_generator_loss,
+    discriminator_loss_fn=tfgan.losses.least_squares_discriminator_loss)
 l1_pixel_loss = tf.norm(gan_model.real_data - gan_model.generated_data, ord=1)
 
 # Modify the loss tuple to include the pixel loss.
@@ -223,8 +223,8 @@ gan_model = tfgan.infogan_model(
 # Build the GAN loss with mutual information penalty.
 gan_loss = tfgan.gan_loss(
     gan_model,
-    generator_loss_fn=tfgan_losses.wasserstein_generator_loss,
-    discriminator_loss_fn=tfgan_losses.wasserstein_discriminator_loss,
+    generator_loss_fn=tfgan.losses.wasserstein_generator_loss,
+    discriminator_loss_fn=tfgan.losses.wasserstein_discriminator_loss,
     gradient_penalty=1.0,
     mutual_information_penalty_weight=1.0)
 
diff --git a/tensorflow/contrib/gan/__init__.py b/tensorflow/contrib/gan/__init__.py
index dff361fdc42708ea69999c2def4721f9d49fcf14..f1946c7f925660eae3aaa650c437e03da1f33d6c 100644
--- a/tensorflow/contrib/gan/__init__.py
+++ b/tensorflow/contrib/gan/__init__.py
@@ -12,7 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""TFGAN grouped API. Please see README.md for details and usage."""
+"""TFGAN is a lightweight library for training and evaluating GANs.
+
+In addition to providing the infrastructure for easily training and evaluating
+GANS, this library contains modules for a TFGAN-backed Estimator,
+evaluation metrics, features (such as virtual batch normalization), and losses.
+Please see README.md for details and usage.
+"""
 
 from __future__ import absolute_import
 from __future__ import division
diff --git a/tensorflow/contrib/gan/python/estimator/__init__.py b/tensorflow/contrib/gan/python/estimator/__init__.py
index 8c4a18228039cb4f2c06e0333f4b8408f1f631e9..c9f7bc61b25230e4159cf8cbc7c9cceead0aa706 100644
--- a/tensorflow/contrib/gan/python/estimator/__init__.py
+++ b/tensorflow/contrib/gan/python/estimator/__init__.py
@@ -12,7 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""TFGAN grouped API. Please see README.md for details and usage."""
+"""TFGAN estimator module.
+
+GANEstimator provides all the infrastructure support of a TensorFlow Estimator
+with the feature support of TFGAN.
+"""
 
 from __future__ import absolute_import
 from __future__ import division
diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
index 058dc1d1f8cc176dcdb81268da2c4704d7eddc99..0d51c282a8977871185fb4200082feb7868cdbae 100644
--- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
+++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
@@ -96,7 +96,7 @@ class GANEstimator(estimator.Estimator):
       # Generate samples from generator.
       predictions = np.array([
           x for x in gan_estimator.predict(predict_input_fn)])
-    ```
+  ```
   """
 
   def __init__(self,
@@ -107,6 +107,7 @@ class GANEstimator(estimator.Estimator):
                discriminator_loss_fn=None,
                generator_optimizer=None,
                discriminator_optimizer=None,
+               get_hooks_fn=None,
                add_summaries=None,
                use_loss_summaries=True,
                config=None):
@@ -137,6 +138,10 @@ class GANEstimator(estimator.Estimator):
         work.
       discriminator_optimizer: Same as `generator_optimizer`, but for the
         discriminator updates.
+      get_hooks_fn: A function that takes a `GANTrainOps` tuple and returns a
+        list of hooks. These hooks are run on the generator and discriminator
+        train ops, and can be used to implement the GAN training scheme.
+        Defaults to `train.get_sequential_train_hooks()`.
       add_summaries: `None`, a single `SummaryType`, or a list of `SummaryType`.
       use_loss_summaries: If `True`, add loss summaries. If `False`, does not.
         If `None`, uses defaults.
@@ -151,7 +156,7 @@ class GANEstimator(estimator.Estimator):
               else discriminator_optimizer)
       gan_head = head_lib.gan_head(
           generator_loss_fn, discriminator_loss_fn, gopt, dopt,
-          use_loss_summaries)
+          use_loss_summaries, get_hooks_fn=get_hooks_fn)
       return _gan_model_fn(
           features, labels, mode, generator_fn, discriminator_fn, gan_head,
           add_summaries)
@@ -160,11 +165,6 @@ class GANEstimator(estimator.Estimator):
         model_fn=_model_fn, model_dir=model_dir, config=config)
 
 
-def _use_check_shapes(real_data):
-  """Determines whether TFGAN should check Tensor shapes."""
-  return isinstance(real_data, ops.Tensor)
-
-
 def _gan_model_fn(
     features,
     labels,
@@ -242,7 +242,7 @@ def _make_gan_model(generator_fn, discriminator_fn, real_data,
       real_data,
       generator_inputs,
       generator_scope=generator_scope,
-      check_shapes=_use_check_shapes(real_data))
+      check_shapes=False)
   if add_summaries:
     if not isinstance(add_summaries, (tuple, list)):
       add_summaries = [add_summaries]
diff --git a/tensorflow/contrib/gan/python/estimator/python/head_impl.py b/tensorflow/contrib/gan/python/estimator/python/head_impl.py
index 204c646e194319c0e63599da0b2a4909ef270ef3..a21358c50bbdb4a1a929b0c5bc322cec4c9923b5 100644
--- a/tensorflow/contrib/gan/python/estimator/python/head_impl.py
+++ b/tensorflow/contrib/gan/python/estimator/python/head_impl.py
@@ -71,7 +71,7 @@ class GANHead(head._Head):  # pylint: disable=protected-access
   def __init__(self, generator_loss_fn, discriminator_loss_fn,
                generator_optimizer, discriminator_optimizer,
                use_loss_summaries=True,
-               get_hooks_fn=tfgan_train.get_sequential_train_hooks(),
+               get_hooks_fn=None,
                name=None):
     """`Head` for GAN training.
 
@@ -86,10 +86,12 @@ class GANHead(head._Head):  # pylint: disable=protected-access
       use_loss_summaries: If `True`, add loss summaries. If `False`, does not.
         If `None`, uses defaults.
       get_hooks_fn: A function that takes a GANTrainOps tuple and returns a list
-        of hooks.
+        of hooks. Defaults to `train.get_sequential_train_hooks()`
       name: name of the head. If provided, summary and metrics keys will be
         suffixed by `"/" + name`.
     """
+    if get_hooks_fn is None:
+      get_hooks_fn = tfgan_train.get_sequential_train_hooks()
     # TODO(joelshor): Validate inputs.
 
     if use_loss_summaries in [True, False]:
diff --git a/tensorflow/contrib/gan/python/eval/__init__.py b/tensorflow/contrib/gan/python/eval/__init__.py
index bb8046187807d0cc584f7174eb9aac578855c110..f86b8513053a45f9830411f7df2c32d1f36a97b2 100644
--- a/tensorflow/contrib/gan/python/eval/__init__.py
+++ b/tensorflow/contrib/gan/python/eval/__init__.py
@@ -12,7 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""TFGAN grouped API. Please see README.md for details and usage."""
+"""TFGAN evaluation module.
+
+This module supports techniques such as Inception Score, Frechet Inception
+distance, and Sliced Wasserstein distance.
+"""
 # pylint: disable=,wildcard-import,unused-import
 
 from __future__ import absolute_import
@@ -22,10 +26,12 @@ from __future__ import print_function
 # Collapse eval into a single namespace.
 from tensorflow.contrib.gan.python.eval.python import classifier_metrics
 from tensorflow.contrib.gan.python.eval.python import eval_utils
+from tensorflow.contrib.gan.python.eval.python import sliced_wasserstein
 from tensorflow.contrib.gan.python.eval.python import summaries
 
 from tensorflow.contrib.gan.python.eval.python.classifier_metrics import *
 from tensorflow.contrib.gan.python.eval.python.eval_utils import *
+from tensorflow.contrib.gan.python.eval.python.sliced_wasserstein import *
 from tensorflow.contrib.gan.python.eval.python.summaries import *
 # pylint: enable=wildcard-import,unused-import
 
@@ -33,7 +39,10 @@ from tensorflow.python.util.all_util import remove_undocumented
 
 _allowed_symbols = [
     'classifier_metrics',
+    'sliced_wasserstein_distance',
     'summaries',
     'eval_utils',
-] + classifier_metrics.__all__ + summaries.__all__ + eval_utils.__all__
+] + (
+    classifier_metrics.__all__ + sliced_wasserstein.__all__ +
+    summaries.__all__ + eval_utils.__all__)
 remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py
index bb65f05b5a17e9a872e41d1dcb05aeb3cd6f6f40..986a5ff6dcbeb2ff996f49137adc6d34e14c979f 100644
--- a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py
+++ b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py
@@ -57,8 +57,10 @@ __all__ = [
     'run_inception',
     'inception_score',
     'classifier_score',
+    'classifier_score_from_logits',
     'frechet_inception_distance',
     'frechet_classifier_distance',
+    'frechet_classifier_distance_from_activations',
     'INCEPTION_DEFAULT_IMAGE_SIZE',
 ]
 
@@ -222,13 +224,13 @@ def run_inception(images,
     image_size: Required image width and height. See unit tests for the default
       values.
     input_tensor: Name of input Tensor.
-    output_tensor: Name of output Tensor. This function will compute activations
-      at the specified layer. Examples include INCEPTION_V3_OUTPUT and
-      INCEPTION_V3_FINAL_POOL which would result in this function computing
+    output_tensor: Name or list of output Tensors. This function will compute
+      activations at the specified layer. Examples include INCEPTION_V3_OUTPUT
+      and INCEPTION_V3_FINAL_POOL which would result in this function computing
       the final logits or the penultimate pooling layer.
 
   Returns:
-    Logits.
+    Tensor or Tensors corresponding to computed `output_tensor`.
 
   Raises:
     ValueError: If images are not the correct size.
@@ -244,8 +246,14 @@ def run_inception(images,
 
   activations = run_image_classifier(images, graph_def, input_tensor,
                                      output_tensor)
-  if array_ops.rank(activations) != 2:
-    activations = layers.flatten(activations)
+  if isinstance(activations, list):
+    for i, activation in enumerate(activations):
+      if array_ops.rank(activation) != 2:
+        activations[i] = layers.flatten(activation)
+  else:
+    if array_ops.rank(activations) != 2:
+      activations = layers.flatten(activations)
+
   return activations
 
 
@@ -257,23 +265,26 @@ def run_image_classifier(tensor, graph_def, input_tensor,
     tensor: An Input tensor.
     graph_def: A GraphDef proto.
     input_tensor: Name of input tensor in graph def.
-    output_tensor: Name of output tensor in graph def.
+    output_tensor: A tensor name or list of tensor names in graph def.
     scope: Name scope for classifier.
 
   Returns:
-    Classifier output. Shape depends on the classifier used, but is often
-    [batch, classes].
+    Classifier output if `output_tensor` is a string, or a list of outputs if
+    `output_tensor` is a list.
 
   Raises:
-    ValueError: If `image_size` is not `None`, and `tensor` are not the correct
-      size.
+    ValueError: If `input_tensor` or `output_tensor` aren't in the graph_def.
   """
   input_map = {input_tensor: tensor}
-  return_elements = [output_tensor]
-  classifier_output = importer.import_graph_def(
-      graph_def, input_map, return_elements, name=scope)[0]
+  is_singleton = isinstance(output_tensor, str)
+  if is_singleton:
+    output_tensor = [output_tensor]
+  classifier_outputs = importer.import_graph_def(
+      graph_def, input_map, output_tensor, name=scope)
+  if is_singleton:
+    classifier_outputs = classifier_outputs[0]
 
-  return classifier_output
+  return classifier_outputs
 
 
 def classifier_score(images, classifier_fn, num_batches=1):
@@ -289,6 +300,11 @@ def classifier_score(images, classifier_fn, num_batches=1):
   which captures how different the network's classification prediction is from
   the prior distribution over classes.
 
+  NOTE: This function consumes images, computes their logits, and then
+  computes the classifier score. If you would like to precompute many logits for
+  large batches, use clasifier_score_from_logits(), which this method also
+  uses.
+
   Args:
     images: Images to calculate the classifier score for.
     classifier_fn: A function that takes images and produces logits based on a
@@ -312,6 +328,34 @@ def classifier_score(images, classifier_fn, num_batches=1):
       swap_memory=True,
       name='RunClassifier')
   logits = array_ops.concat(array_ops.unstack(logits), 0)
+
+  return classifier_score_from_logits(logits)
+
+
+def classifier_score_from_logits(logits):
+  """Classifier score for evaluating a generative model from logits.
+
+  This method computes the classifier score for a set of logits. This can be
+  used independently of the classifier_score() method, especially in the case
+  of using large batches during evaluation where we would like precompute all
+  of the logits before computing the classifier score.
+
+  This technique is described in detail in https://arxiv.org/abs/1606.03498. In
+  summary, this function calculates:
+
+  exp( E[ KL(p(y|x) || p(y)) ] )
+
+  which captures how different the network's classification prediction is from
+  the prior distribution over classes.
+
+  Args:
+    logits: Precomputed 2D tensor of logits that will be used to
+      compute the classifier score.
+
+  Returns:
+    The classifier score. A floating-point scalar of the same type as the output
+    of `logits`.
+  """
   logits.shape.assert_has_rank(2)
 
   # Use maximum precision for best results.
@@ -328,6 +372,7 @@ def classifier_score(images, classifier_fn, num_batches=1):
 
   if logits_dtype != dtypes.float64:
     final_score = math_ops.cast(final_score, logits_dtype)
+
   return final_score
 
 
@@ -406,6 +451,11 @@ def frechet_classifier_distance(real_images,
   sample size to compute frechet classifier distance when comparing two
   generative models.
 
+  NOTE: This function consumes images, computes their activations, and then
+  computes the classifier score. If you would like to precompute many
+  activations for real and generated images for large batches, please use
+  frechet_clasifier_distance_from_activations(), which this method also uses.
+
   Args:
     real_images: Real images to use to compute Frechet Inception distance.
     generated_images: Generated images to use to compute Frechet Inception
@@ -417,7 +467,7 @@ def frechet_classifier_distance(real_images,
 
   Returns:
     The Frechet Inception distance. A floating-point scalar of the same type
-    as the output of `classifier_fn`
+    as the output of `classifier_fn`.
   """
 
   real_images_list = array_ops.split(
@@ -436,31 +486,69 @@ def frechet_classifier_distance(real_images,
       swap_memory=True,
       name='RunClassifier')
 
-  activations_dtype = activations.dtype
   # Split the activations by the real and generated images.
   real_a, gen_a = array_ops.split(activations, [num_batches, num_batches], 0)
 
   # Ensure the activations have the right shapes.
   real_a = array_ops.concat(array_ops.unstack(real_a), 0)
   gen_a = array_ops.concat(array_ops.unstack(gen_a), 0)
-  if activations_dtype != dtypes.float64:
-    real_a = math_ops.to_double(real_a)
-    gen_a = math_ops.to_double(gen_a)
 
-  real_a.shape.assert_has_rank(2)
-  gen_a.shape.assert_has_rank(2)
+  return frechet_classifier_distance_from_activations(real_a, gen_a)
+
+
+def frechet_classifier_distance_from_activations(
+    real_activations, generated_activations):
+  """Classifier distance for evaluating a generative model from activations.
+
+  This methods computes the Frechet classifier distance from activations of
+  real images and generated images. This can be used independently of the
+  frechet_classifier_distance() method, especially in the case of using large
+  batches during evaluation where we would like precompute all of the
+  activations before computing the classifier distance.
+
+  This technique is described in detail in https://arxiv.org/abs/1706.08500.
+  Given two Gaussian distribution with means m and m_w and covariance matrices
+  C and C_w, this function calcuates
+
+  |m - m_w|^2 + Tr(C + C_w - 2(C * C_w)^(1/2))
+
+  which captures how different the distributions of real images and generated
+  images (or more accurately, their visual features) are. Note that unlike the
+  Inception score, this is a true distance and utilizes information about real
+  world images.
+
+  Args:
+    real_activations: 2D Tensor containing activations of real data. Shape is
+      [batch_size, activation_size].
+    generated_activations: 2D Tensor containing activations of generated data.
+      Shape is [batch_size, activation_size].
+
+  Returns:
+   The Frechet Inception distance. A floating-point scalar of the same type
+   as the output of the activations.
+
+  """
+  real_activations.shape.assert_has_rank(2)
+  generated_activations.shape.assert_has_rank(2)
+
+  activations_dtype = real_activations.dtype
+  if activations_dtype != dtypes.float64:
+    real_activations = math_ops.to_double(real_activations)
+    generated_activations = math_ops.to_double(generated_activations)
 
   # Compute mean and covariance matrices of activations.
-  m = math_ops.reduce_mean(real_a, 0)
-  m_v = math_ops.reduce_mean(gen_a, 0)
-  num_examples = math_ops.to_double(array_ops.shape(real_a)[0])
+  m = math_ops.reduce_mean(real_activations, 0)
+  m_v = math_ops.reduce_mean(generated_activations, 0)
+  num_examples = math_ops.to_double(array_ops.shape(real_activations)[0])
 
   # sigma = (1 / (n - 1)) * (X - mu) (X - mu)^T
+  real_centered = real_activations - m
   sigma = math_ops.matmul(
-      real_a - m, real_a - m, transpose_a=True) / (num_examples - 1)
+      real_centered, real_centered, transpose_a=True) / (num_examples - 1)
 
+  gen_centered = generated_activations - m_v
   sigma_v = math_ops.matmul(
-      gen_a - m_v, gen_a - m_v, transpose_a=True) / (num_examples - 1)
+      gen_centered, gen_centered, transpose_a=True) / (num_examples - 1)
 
   # Find the Tr(sqrt(sigma sigma_v)) component of FID
   sqrt_trace_component = trace_sqrt_product(sigma, sigma_v)
diff --git a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py
index 92e0a995748c1c4c2ddfff0daae59be5a6eaefb4..1e18c699ba93b5f524341c65d0a2db84556b65a2 100644
--- a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py
+++ b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py
@@ -190,6 +190,23 @@ class ClassifierMetricsTest(test.TestCase):
     # Check that none of the model variables are trainable.
     self.assertListEqual([], variables.trainable_variables())
 
+  def test_run_inception_multiple_outputs(self):
+    """Test `run_inception` graph construction with multiple outputs."""
+    batch_size = 3
+    img = array_ops.ones([batch_size, 299, 299, 3])
+    logits, pool = _run_with_mock(
+        classifier_metrics.run_inception, img,
+        output_tensor=[classifier_metrics.INCEPTION_OUTPUT,
+                       classifier_metrics.INCEPTION_FINAL_POOL])
+
+    self.assertTrue(isinstance(logits, ops.Tensor))
+    self.assertTrue(isinstance(pool, ops.Tensor))
+    logits.shape.assert_is_compatible_with([batch_size, 1001])
+    pool.shape.assert_is_compatible_with([batch_size, 2048])
+
+    # Check that none of the model variables are trainable.
+    self.assertListEqual([], variables.trainable_variables())
+
   def test_inception_score_graph(self):
     """Test `inception_score` graph construction."""
     score = _run_with_mock(classifier_metrics.inception_score,
diff --git a/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein.py b/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein.py
new file mode 100644
index 0000000000000000000000000000000000000000..523968bed91f1021ae629bf52c405cf5c2d7b917
--- /dev/null
+++ b/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein.py
@@ -0,0 +1,28 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Model evaluation tools for TFGAN."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.gan.python.eval.python import sliced_wasserstein_impl
+# pylint: disable=wildcard-import
+from tensorflow.contrib.gan.python.eval.python.sliced_wasserstein_impl import *
+# pylint: enable=wildcard-import
+from tensorflow.python.util.all_util import remove_undocumented
+
+__all__ = sliced_wasserstein_impl.__all__
+remove_undocumented(__name__, __all__)
diff --git a/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein_impl.py b/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein_impl.py
new file mode 100644
index 0000000000000000000000000000000000000000..9bebcacbe46d85fc4226c4275b71b3ecbde57a97
--- /dev/null
+++ b/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein_impl.py
@@ -0,0 +1,282 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implementation of Sliced Wasserstein Distance.
+
+Proposed in https://arxiv.org/abs/1710.10196 and the official Theano
+implementation that we used as reference can be found here:
+https://github.com/tkarras/progressive_growing_of_gans
+
+Note: this is not an exact distance but an approximation through random
+projections.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+from tensorflow.python.framework import constant_op
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import linalg_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import script_ops
+
+__all__ = ['sliced_wasserstein_distance']
+_GAUSSIAN_FILTER = np.float32([[1, 4, 6, 4, 1], [4, 16, 24, 16, 4], [
+    6, 24, 36, 24, 6
+], [4, 16, 24, 16, 4], [1, 4, 6, 4, 1]]).reshape([5, 5, 1, 1]) / 256.0
+
+
+def _laplacian_pyramid(batch, num_levels):
+  """Compute a Laplacian pyramid.
+
+  Args:
+      batch: (tensor) The batch of images (batch, height, width, channels).
+      num_levels: (int) Desired number of hierarchical levels.
+  Returns:
+      List of tensors from the highest to lowest resolution.
+  """
+  gaussian_filter = constant_op.constant(_GAUSSIAN_FILTER)
+
+  def spatial_conv(batch, gain):
+    s = array_ops.shape(batch)
+    padded = array_ops.pad(batch, [[0, 0], [2, 2], [2, 2], [0, 0]], 'REFLECT')
+    xt = array_ops.transpose(padded, [0, 3, 1, 2])
+    xt = array_ops.reshape(xt, [s[0] * s[3], s[1] + 4, s[2] + 4, 1])
+    conv_out = nn_ops.conv2d(xt, gaussian_filter * gain, [1] * 4, 'VALID')
+    conv_xt = array_ops.reshape(conv_out, [s[0], s[3], s[1], s[2]])
+    conv_xt = array_ops.transpose(conv_xt, [0, 2, 3, 1])
+    return conv_xt
+
+  def pyr_down(batch):  # matches cv2.pyrDown()
+    return spatial_conv(batch, 1)[:, ::2, ::2]
+
+  def pyr_up(batch):  # matches cv2.pyrUp()
+    s = array_ops.shape(batch)
+    zeros = array_ops.zeros([3 * s[0], s[1], s[2], s[3]])
+    res = array_ops.concat([batch, zeros], 0)
+    res = array_ops.batch_to_space(res, crops=[[0, 0], [0, 0]], block_size=2)
+    res = spatial_conv(res, 4)
+    return res
+
+  pyramid = [math_ops.to_float(batch)]
+  for _ in range(1, num_levels):
+    pyramid.append(pyr_down(pyramid[-1]))
+    pyramid[-2] -= pyr_up(pyramid[-1])
+  return pyramid
+
+
+def _batch_to_patches(batch, patches_per_image, patch_size):
+  """Extract patches from a batch.
+
+  Args:
+      batch: (tensor) The batch of images (batch, height, width, channels).
+      patches_per_image: (int) Number of patches to extract per image.
+      patch_size: (int) Size of the patches (size, size, channels) to extract.
+  Returns:
+      Tensor (batch*patches_per_image, patch_size, patch_size, channels) of
+      patches.
+  """
+
+  def py_func_random_patches(batch):
+    """Numpy wrapper."""
+    batch_size, height, width, channels = batch.shape
+    patch_count = patches_per_image * batch_size
+    hs = patch_size // 2
+    # Randomly pick patches.
+    patch_id, y, x, chan = np.ogrid[0:patch_count, -hs:hs + 1, -hs:hs + 1, 0:3]
+    img_id = patch_id // patches_per_image
+    # pylint: disable=g-no-augmented-assignment
+    # Need explicit addition for broadcast to work properly.
+    y = y + np.random.randint(hs, height - hs, size=(patch_count, 1, 1, 1))
+    x = x + np.random.randint(hs, width - hs, size=(patch_count, 1, 1, 1))
+    # pylint: enable=g-no-augmented-assignment
+    idx = ((img_id * height + y) * width + x) * channels + chan
+    patches = batch.flat[idx]
+    return patches
+
+  patches = script_ops.py_func(
+      py_func_random_patches, [batch], batch.dtype, stateful=False)
+  return patches
+
+
+def _normalize_patches(patches):
+  """Normalize patches by their mean and standard deviation.
+
+  Args:
+      patches: (tensor) The batch of patches (batch, size, size, channels).
+  Returns:
+      Tensor (batch, size, size, channels) of the normalized patches.
+  """
+  patches = array_ops.concat(patches, 0)
+  mean, variance = nn.moments(patches, [1, 2, 3], keep_dims=True)
+  patches = (patches - mean) / math_ops.sqrt(variance)
+  return array_ops.reshape(patches, [array_ops.shape(patches)[0], -1])
+
+
+def _sort_rows(matrix, num_rows):
+  """Sort matrix rows by the last column.
+
+  Args:
+      matrix: a matrix of values (row,col).
+      num_rows: (int) number of sorted rows to return from the matrix.
+  Returns:
+      Tensor (num_rows, col) of the sorted matrix top K rows.
+  """
+  tmatrix = array_ops.transpose(matrix, [1, 0])
+  sorted_tmatrix = nn_ops.top_k(tmatrix, num_rows)[0]
+  return array_ops.transpose(sorted_tmatrix, [1, 0])
+
+
+def _sliced_wasserstein(a, b, random_sampling_count, random_projection_dim):
+  """Compute the approximate sliced Wasserstein distance.
+
+  Args:
+      a: (matrix) Distribution "a" of samples (row, col).
+      b: (matrix) Distribution "b" of samples (row, col).
+      random_sampling_count: (int) Number of random projections to average.
+      random_projection_dim: (int) Dimension of the random projection space.
+  Returns:
+      Float containing the approximate distance between "a" and "b".
+  """
+  s = array_ops.shape(a)
+  means = []
+  for _ in range(random_sampling_count):
+    # Random projection matrix.
+    proj = random_ops.random_normal(
+        [array_ops.shape(a)[1], random_projection_dim])
+    proj *= math_ops.rsqrt(
+        math_ops.reduce_sum(math_ops.square(proj), 0, keep_dims=True))
+    # Project both distributions and sort them.
+    proj_a = math_ops.matmul(a, proj)
+    proj_b = math_ops.matmul(b, proj)
+    proj_a = _sort_rows(proj_a, s[0])
+    proj_b = _sort_rows(proj_b, s[0])
+    # Pairwise Wasserstein distance.
+    wdist = math_ops.reduce_mean(math_ops.abs(proj_a - proj_b))
+    means.append(wdist)
+  return math_ops.reduce_mean(means)
+
+
+def _sliced_wasserstein_svd(a, b):
+  """Compute the approximate sliced Wasserstein distance using an SVD.
+
+  This is not part of the paper, it's a variant with possibly more accurate
+  measure.
+
+  Args:
+      a: (matrix) Distribution "a" of samples (row, col).
+      b: (matrix) Distribution "b" of samples (row, col).
+  Returns:
+      Float containing the approximate distance between "a" and "b".
+  """
+  s = array_ops.shape(a)
+  # Random projection matrix.
+  sig, u = linalg_ops.svd(array_ops.concat([a, b], 0))[:2]
+  proj_a, proj_b = array_ops.split(u * sig, 2, axis=0)
+  proj_a = _sort_rows(proj_a[:, ::-1], s[0])
+  proj_b = _sort_rows(proj_b[:, ::-1], s[0])
+  # Pairwise Wasserstein distance.
+  wdist = math_ops.reduce_mean(math_ops.abs(proj_a - proj_b))
+  return wdist
+
+
+def sliced_wasserstein_distance(real_images,
+                                fake_images,
+                                resolution_min=16,
+                                patches_per_image=64,
+                                patch_size=7,
+                                random_sampling_count=1,
+                                random_projection_dim=7 * 7 * 3,
+                                use_svd=False):
+  """Compute the Wasserstein distance between two distributions of images.
+
+  Note that measure vary with the number of images. Use 8192 images to get
+  numbers comparable to the ones in the original paper.
+
+  Args:
+      real_images: (tensor) Real images (batch, height, width, channels).
+      fake_images: (tensor) Fake images (batch, height, width, channels).
+      resolution_min: (int) Minimum resolution for the Laplacion pyramid.
+      patches_per_image: (int) Number of patches to extract per image per
+        Laplacian level.
+      patch_size: (int) Width of a square patch.
+      random_sampling_count: (int) Number of random projections to average.
+      random_projection_dim: (int) Dimension of the random projection space.
+      use_svd: experimental method to compute a more accurate distance.
+  Returns:
+      List of tuples (distance_real, distance_fake) for each level of the
+      Laplacian pyramid from the highest resoluion to the lowest.
+        distance_real is the Wasserstein distance between real images
+        distance_fake is the Wasserstein distance between real and fake images.
+  Raises:
+      ValueError: If the inputs shapes are incorrect. Input tensor dimensions
+      (batch, height, width, channels) are expected to be known at graph
+      construction time. In addition height and width must be the same and the
+      number of colors should be exactly 3. Real and fake images must have the
+      same size.
+  """
+  height = real_images.shape[1]
+  real_images.shape.assert_is_compatible_with([None, None, height, 3])
+  fake_images.shape.assert_is_compatible_with(real_images.shape)
+
+  # Select resolutions.
+  resolution_full = int(height)
+  resolution_min = min(resolution_min, resolution_full)
+  resolution_max = resolution_full
+  # Base loss of detail.
+  resolutions = [
+      2**i
+      for i in range(
+          int(np.log2(resolution_max)),
+          int(np.log2(resolution_min)) - 1, -1)
+  ]
+
+  # Gather patches for each level of the Laplacian pyramids.
+  patches_real, patches_fake, patches_test = (
+      [[] for _ in resolutions] for _ in range(3))
+  for lod, level in enumerate(
+      _laplacian_pyramid(real_images, len(resolutions))):
+    patches_real[lod].append(
+        _batch_to_patches(level, patches_per_image, patch_size))
+    patches_test[lod].append(
+        _batch_to_patches(level, patches_per_image, patch_size))
+
+  for lod, level in enumerate(
+      _laplacian_pyramid(fake_images, len(resolutions))):
+    patches_fake[lod].append(
+        _batch_to_patches(level, patches_per_image, patch_size))
+
+  for lod in range(len(resolutions)):
+    for patches in [patches_real, patches_test, patches_fake]:
+      patches[lod] = _normalize_patches(patches[lod])
+
+  # Evaluate scores.
+  scores = []
+  for lod in range(len(resolutions)):
+    if not use_svd:
+      scores.append(
+          (_sliced_wasserstein(patches_real[lod], patches_test[lod],
+                               random_sampling_count, random_projection_dim),
+           _sliced_wasserstein(patches_real[lod], patches_fake[lod],
+                               random_sampling_count, random_projection_dim)))
+    else:
+      scores.append(
+          (_sliced_wasserstein_svd(patches_real[lod], patches_test[lod]),
+           _sliced_wasserstein_svd(patches_real[lod], patches_fake[lod])))
+  return scores
diff --git a/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein_test.py b/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..b960af28eaa969079b72c7aabcde2ad6cd1f5c68
--- /dev/null
+++ b/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein_test.py
@@ -0,0 +1,131 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for Sliced Wasserstein Distance."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+from scipy import ndimage
+from tensorflow.contrib.gan.python.eval.python import sliced_wasserstein_impl as swd
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.platform import test
+
+
+class ClassifierMetricsTest(test.TestCase):
+
+  def test_laplacian_pyramid(self):
+    # The numpy/scipy code for reference estimation comes from:
+    # https://github.com/tkarras/progressive_growing_of_gans
+    gaussian_filter = np.float32([[1, 4, 6, 4, 1], [4, 16, 24, 16, 4], [
+        6, 24, 36, 24, 6
+    ], [4, 16, 24, 16, 4], [1, 4, 6, 4, 1]]) / 256.0
+
+    def np_pyr_down(minibatch):  # matches cv2.pyrDown()
+      assert minibatch.ndim == 4
+      return ndimage.convolve(
+          minibatch,
+          gaussian_filter[np.newaxis, np.newaxis, :, :],
+          mode='mirror')[:, :, ::2, ::2]
+
+    def np_pyr_up(minibatch):  # matches cv2.pyrUp()
+      assert minibatch.ndim == 4
+      s = minibatch.shape
+      res = np.zeros((s[0], s[1], s[2] * 2, s[3] * 2), minibatch.dtype)
+      res[:, :, ::2, ::2] = minibatch
+      return ndimage.convolve(
+          res,
+          gaussian_filter[np.newaxis, np.newaxis, :, :] * 4.0,
+          mode='mirror')
+
+    def np_laplacian_pyramid(minibatch, num_levels):
+      # Note: there's a bug in the original SWD, fixed repeatability.
+      pyramid = [minibatch.astype('f').copy()]
+      for _ in range(1, num_levels):
+        pyramid.append(np_pyr_down(pyramid[-1]))
+        pyramid[-2] -= np_pyr_up(pyramid[-1])
+      return pyramid
+
+    data = np.random.normal(size=[256, 3, 32, 32]).astype('f')
+    pyramid = np_laplacian_pyramid(data, 3)
+    data_tf = array_ops.placeholder(dtypes.float32, [256, 32, 32, 3])
+    pyramid_tf = swd._laplacian_pyramid(data_tf, 3)
+    with self.test_session() as sess:
+      pyramid_tf = sess.run(
+          pyramid_tf, feed_dict={
+              data_tf: data.transpose(0, 2, 3, 1)
+          })
+    for x in range(3):
+      self.assertAllClose(
+          pyramid[x].transpose(0, 2, 3, 1), pyramid_tf[x], atol=1e-6)
+
+  def test_sliced_wasserstein_distance(self):
+    """Test the distance."""
+    d1 = random_ops.random_uniform([256, 32, 32, 3])
+    d2 = random_ops.random_normal([256, 32, 32, 3])
+    wfunc = swd.sliced_wasserstein_distance(d1, d2)
+    with self.test_session() as sess:
+      wscores = [sess.run(x) for x in wfunc]
+    self.assertAllClose(
+        np.array([0.014, 0.014], 'f'),
+        np.array([x[0] for x in wscores], 'f'),
+        rtol=0.1)
+    self.assertAllClose(
+        np.array([0.014, 0.020], 'f'),
+        np.array([x[1] for x in wscores], 'f'),
+        rtol=0.1)
+
+  def test_sliced_wasserstein_distance_svd(self):
+    """Test the distance."""
+    d1 = random_ops.random_uniform([256, 32, 32, 3])
+    d2 = random_ops.random_normal([256, 32, 32, 3])
+    wfunc = swd.sliced_wasserstein_distance(d1, d2, use_svd=True)
+    with self.test_session() as sess:
+      wscores = [sess.run(x) for x in wfunc]
+    self.assertAllClose(
+        np.array([0.013, 0.013], 'f'),
+        np.array([x[0] for x in wscores], 'f'),
+        rtol=0.15)
+    self.assertAllClose(
+        np.array([0.014, 0.019], 'f'),
+        np.array([x[1] for x in wscores], 'f'),
+        rtol=0.15)
+
+  def test_swd_mismatched(self):
+    """Test the inputs mismatched shapes are detected."""
+    d1 = random_ops.random_uniform([256, 32, 32, 3])
+    d2 = random_ops.random_normal([256, 32, 31, 3])
+    d3 = random_ops.random_normal([256, 31, 32, 3])
+    d4 = random_ops.random_normal([255, 32, 32, 3])
+    with self.assertRaises(ValueError):
+      swd.sliced_wasserstein_distance(d1, d2)
+    with self.assertRaises(ValueError):
+      swd.sliced_wasserstein_distance(d1, d3)
+    with self.assertRaises(ValueError):
+      swd.sliced_wasserstein_distance(d1, d4)
+
+  def test_swd_not_rgb(self):
+    """Test that only RGB is supported."""
+    d1 = random_ops.random_uniform([256, 32, 32, 1])
+    d2 = random_ops.random_normal([256, 32, 32, 1])
+    with self.assertRaises(ValueError):
+      swd.sliced_wasserstein_distance(d1, d2)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/gan/python/features/__init__.py b/tensorflow/contrib/gan/python/features/__init__.py
index 6d0972f8db418d6fcf517cc6f7e96093ae08a9e4..4816daf760143af9f1502873b123ffad8e5ec8ce 100644
--- a/tensorflow/contrib/gan/python/features/__init__.py
+++ b/tensorflow/contrib/gan/python/features/__init__.py
@@ -12,7 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""TFGAN grouped API. Please see README.md for details and usage."""
+"""TFGAN features module.
+
+This module includes support for virtual batch normalization, buffer replay,
+conditioning, etc.
+"""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -22,10 +26,12 @@ from __future__ import print_function
 # pylint: disable=unused-import,wildcard-import
 from tensorflow.contrib.gan.python.features.python import clip_weights
 from tensorflow.contrib.gan.python.features.python import conditioning_utils
+from tensorflow.contrib.gan.python.features.python import random_tensor_pool
 from tensorflow.contrib.gan.python.features.python import virtual_batchnorm
 
 from tensorflow.contrib.gan.python.features.python.clip_weights import *
 from tensorflow.contrib.gan.python.features.python.conditioning_utils import *
+from tensorflow.contrib.gan.python.features.python.random_tensor_pool import *
 from tensorflow.contrib.gan.python.features.python.virtual_batchnorm import *
 # pylint: enable=unused-import,wildcard-import
 
@@ -33,5 +39,6 @@ from tensorflow.python.util.all_util import remove_undocumented
 
 _allowed_symbols = clip_weights.__all__
 _allowed_symbols += conditioning_utils.__all__
+_allowed_symbols += random_tensor_pool.__all__
 _allowed_symbols += virtual_batchnorm.__all__
 remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/gan/python/features/python/clip_weights_test.py b/tensorflow/contrib/gan/python/features/python/clip_weights_test.py
index 030e37ec679ec58e3b534fd3644ffe1d23173404..2b7bb5f14e7f3d1b3f913d3426efaaae19079ffb 100644
--- a/tensorflow/contrib/gan/python/features/python/clip_weights_test.py
+++ b/tensorflow/contrib/gan/python/features/python/clip_weights_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for tfgan.python.features.clip_weights."""
+"""Tests for features.clip_weights."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -31,17 +31,18 @@ class ClipWeightsTest(test.TestCase):
   """Tests for `discriminator_weight_clip`."""
 
   def setUp(self):
+    super(ClipWeightsTest, self).setUp()
     self.variables = [variables.Variable(2.0)]
     self.tuple = collections.namedtuple(
         'VarTuple', ['discriminator_variables'])(self.variables)
 
   def _test_weight_clipping_helper(self, use_tuple):
-    loss = self.variables[0] * 2.0
+    loss = self.variables[0]
     opt = training.GradientDescentOptimizer(1.0)
     if use_tuple:
-      opt_clip = clip_weights.weight_clip(opt, self.variables, 0.1)
+      opt_clip = clip_weights.clip_variables(opt, self.variables, 0.1)
     else:
-      opt_clip = clip_weights.discriminator_weight_clip(opt, self.tuple, 0.1)
+      opt_clip = clip_weights.clip_discriminator_weights(opt, self.tuple, 0.1)
 
     train_op1 = opt.minimize(loss, var_list=self.variables)
     train_op2 = opt_clip.minimize(loss, var_list=self.variables)
@@ -72,10 +73,14 @@ class ClipWeightsTest(test.TestCase):
         clip_weights.clip_discriminator_weights(opt, self.tuple, weight_clip=-1)
     else:
       with self.assertRaisesRegexp(ValueError, 'must be positive'):
-        clip_weights.clip_weights(opt, self.variables, weight_clip=-1)
+        clip_weights.clip_variables(opt, self.variables, weight_clip=-1)
 
   def test_incorrect_weight_clip_value_argsonly(self):
     self._test_incorrect_weight_clip_value_helper(False)
 
   def test_incorrect_weight_clip_value_tuple(self):
     self._test_incorrect_weight_clip_value_helper(True)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/gan/python/features/python/tensor_pool.py b/tensorflow/contrib/gan/python/features/python/random_tensor_pool.py
similarity index 86%
rename from tensorflow/contrib/gan/python/features/python/tensor_pool.py
rename to tensorflow/contrib/gan/python/features/python/random_tensor_pool.py
index 0bd2fa3db9427315ed623bc4d47d74683777bb94..ca904971fa8cb0440d3e0c9060f13cc214c9eaad 100644
--- a/tensorflow/contrib/gan/python/features/python/tensor_pool.py
+++ b/tensorflow/contrib/gan/python/features/python/random_tensor_pool.py
@@ -25,11 +25,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.gan.python.features.python import tensor_pool_impl
+from tensorflow.contrib.gan.python.features.python import random_tensor_pool_impl
 # pylint: disable=wildcard-import
-from tensorflow.contrib.gan.python.features.python.tensor_pool_impl import *
+from tensorflow.contrib.gan.python.features.python.random_tensor_pool_impl import *
 # pylint: enable=wildcard-import
 from tensorflow.python.util.all_util import remove_undocumented
 
-__all__ = tensor_pool_impl.__all__
+__all__ = random_tensor_pool_impl.__all__
 remove_undocumented(__name__, __all__)
diff --git a/tensorflow/contrib/gan/python/features/python/tensor_pool_impl.py b/tensorflow/contrib/gan/python/features/python/random_tensor_pool_impl.py
similarity index 67%
rename from tensorflow/contrib/gan/python/features/python/tensor_pool_impl.py
rename to tensorflow/contrib/gan/python/features/python/random_tensor_pool_impl.py
index 79318a69d291f11b7978e898423f1dd3e757466f..4cfae0de4451880cf8229903b0eb74b1c6e2e04d 100644
--- a/tensorflow/contrib/gan/python/features/python/tensor_pool_impl.py
+++ b/tensorflow/contrib/gan/python/features/python/random_tensor_pool_impl.py
@@ -42,8 +42,14 @@ __all__ = [
 ]
 
 
-def tensor_pool(input_value,
-                pool_size,
+def _to_tuple(x):
+  if isinstance(x, (list, tuple)):
+    return tuple(x)
+  return (x,)
+
+
+def tensor_pool(input_values,
+                pool_size=50,
                 pooling_probability=0.5,
                 name='tensor_pool'):
   """Queue storing input values and returning random previously stored ones.
@@ -57,15 +63,18 @@ def tensor_pool(input_value,
   `pool_size` = 0 or `pooling_probability` = 0.
 
   Args:
-    input_value: A `Tensor` from which to read values to be pooled.
-    pool_size: An integer specifying the maximum size of the pool.
+    input_values: A `Tensor`, or a list or tuple of `Tensor`s from which to read
+      values to be pooled.
+    pool_size: An integer specifying the maximum size of the pool. Defaults to
+      50.
     pooling_probability: A float `Tensor` specifying the probability of getting
       a value from the pool, as opposed to just the current input.
     name: A string prefix for the name scope for all tensorflow ops.
 
   Returns:
-    A `Tensor` which is with given probability either the `input_value` or a
-    randomly chosen sample that was previously inserted in the pool.
+    A `Tensor`, or a list or tuple of `Tensor`s (according to the type ofx
+    `input_values`) which is with given probability either the `input_values` or
+    a randomly chosen sample that was previously inserted in the pool.
 
   Raises:
     ValueError: If `pool_size` is negative.
@@ -74,45 +83,57 @@ def tensor_pool(input_value,
   if pool_size < 0:
     raise ValueError('`pool_size` is negative.')
   elif pool_size == 0:
-    return input_value
+    return input_values
 
-  with ops.name_scope('{}_pool_queue'.format(name),
-                      values=[input_value, pooling_probability]):
+  original_input_values = input_values
+  input_values = _to_tuple(input_values)
+
+  with ops.name_scope(
+      '{}_pool_queue'.format(name),
+      values=input_values + (pooling_probability,)):
     pool_queue = data_flow_ops.RandomShuffleQueue(
         capacity=pool_size,
         min_after_dequeue=0,
-        dtypes=[input_value.dtype],
+        dtypes=[v.dtype for v in input_values],
         shapes=None)
 
     # In pseudeo code this code does the following:
     # if not pool_full:
-    #   enqueue(input_value)
-    #   return input_value
+    #   enqueue(input_values)
+    #   return input_values
     # else
-    #   dequeue_value = dequeue_random_sample()
-    #   enqueue(input_value)
+    #   dequeue_values = dequeue_random_sample()
+    #   enqueue(input_values)
     #   if rand() < pooling_probability:
-    #     return dequeue_value
+    #     return dequeue_values
     #   else
-    #     return input_value
+    #     return input_values
 
     def _get_input_value_pooled():
-      enqueue_op = pool_queue.enqueue(input_value)
+      enqueue_op = pool_queue.enqueue(input_values)
       with ops.control_dependencies([enqueue_op]):
-        return array_ops.identity(input_value)
+        return tuple(array_ops.identity(v) for v in input_values)
 
     def _get_random_pool_value_and_enqueue_input():
-      dequeue_value = pool_queue.dequeue()
-      with ops.control_dependencies([dequeue_value]):
-        enqueue_op = pool_queue.enqueue(input_value)
+      dequeue_values = _to_tuple(pool_queue.dequeue())
+      with ops.control_dependencies(dequeue_values):
+        enqueue_op = pool_queue.enqueue(input_values)
         with ops.control_dependencies([enqueue_op]):
           prob = random_ops.random_uniform(
               (), dtype=dtypes.float32) < pooling_probability
-          return control_flow_ops.cond(prob, lambda: dequeue_value,
-                                       lambda: input_value)
+          return control_flow_ops.cond(prob, lambda: dequeue_values,
+                                       lambda: input_values)
 
-    output_value = control_flow_ops.cond(
+    output_values = _to_tuple(control_flow_ops.cond(
         pool_queue.size() < pool_size, _get_input_value_pooled,
-        _get_random_pool_value_and_enqueue_input)
+        _get_random_pool_value_and_enqueue_input))
+
+    # Make sure that the shape of `output_value` is set.
+    for input_value, output_value in zip(input_values, output_values):
+      output_value.set_shape(input_value.shape)
 
-  return output_value
+  if isinstance(original_input_values, list):
+    return list(output_values)
+  elif isinstance(original_input_values, tuple):
+    return output_values
+  return output_values[0]
diff --git a/tensorflow/contrib/gan/python/features/python/tensor_pool_test.py b/tensorflow/contrib/gan/python/features/python/random_tensor_pool_test.py
similarity index 70%
rename from tensorflow/contrib/gan/python/features/python/tensor_pool_test.py
rename to tensorflow/contrib/gan/python/features/python/random_tensor_pool_test.py
index 49b77bb3fc56b91cd419f76b6eea920df7efe4a7..d8cf549cf71838178c9da01df462d41d81595fe5 100644
--- a/tensorflow/contrib/gan/python/features/python/tensor_pool_test.py
+++ b/tensorflow/contrib/gan/python/features/python/random_tensor_pool_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for tf.contrib.gan.python.features.tensor_pool."""
+"""Tests for tf.contrib.gan.python.features.random_tensor_pool."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -20,7 +20,7 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.gan.python.features.python import tensor_pool_impl as tensor_pool
+from tensorflow.contrib.gan.python.features.python.random_tensor_pool_impl import tensor_pool
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
@@ -32,7 +32,8 @@ class TensorPoolTest(test.TestCase):
     """Checks that `input_value` can have unknown shape."""
     input_value = array_ops.placeholder(
         dtype=dtypes.int32, shape=[None, None, 3])
-    output_value = tensor_pool.tensor_pool(input_value, pool_size=10)
+    output_value = tensor_pool(input_value, pool_size=10)
+    self.assertEqual(output_value.shape.as_list(), [None, None, 3])
 
     with self.test_session(use_gpu=True) as session:
       for i in range(10):
@@ -43,7 +44,8 @@ class TensorPoolTest(test.TestCase):
   def test_pool_sequence(self):
     """Checks that values are pooled and returned maximally twice."""
     input_value = array_ops.placeholder(dtype=dtypes.int32, shape=[])
-    output_value = tensor_pool.tensor_pool(input_value, pool_size=10)
+    output_value = tensor_pool(input_value, pool_size=10)
+    self.assertEqual(output_value.shape.as_list(), [])
 
     with self.test_session(use_gpu=True) as session:
       outs = []
@@ -59,8 +61,9 @@ class TensorPoolTest(test.TestCase):
   def test_never_pool(self):
     """Checks that setting `pooling_probability` to zero works."""
     input_value = array_ops.placeholder(dtype=dtypes.int32, shape=[])
-    output_value = tensor_pool.tensor_pool(
+    output_value = tensor_pool(
         input_value, pool_size=10, pooling_probability=0.0)
+    self.assertEqual(output_value.shape.as_list(), [])
 
     with self.test_session(use_gpu=True) as session:
       for i in range(50):
@@ -72,10 +75,11 @@ class TensorPoolTest(test.TestCase):
     input_value = array_ops.placeholder(dtype=dtypes.int32, shape=[])
     pool_size = 10
     pooling_probability = 0.2
-    output_value = tensor_pool.tensor_pool(
+    output_value = tensor_pool(
         input_value,
         pool_size=pool_size,
         pooling_probability=pooling_probability)
+    self.assertEqual(output_value.shape.as_list(), [])
 
     with self.test_session(use_gpu=True) as session:
       not_pooled = 0
@@ -89,6 +93,24 @@ class TensorPoolTest(test.TestCase):
           1 - pooling_probability,
           atol=0.03)
 
+  def test_input_values_tuple(self):
+    """Checks that `input_values` can be a tuple."""
+    input_values = (array_ops.placeholder(dtype=dtypes.int32, shape=[]),
+                    array_ops.placeholder(dtype=dtypes.int32, shape=[]))
+    output_values = tensor_pool(input_values, pool_size=3)
+    self.assertEqual(len(output_values), len(input_values))
+    for output_value in output_values:
+      self.assertEqual(output_value.shape.as_list(), [])
+
+    with self.test_session(use_gpu=True) as session:
+      for i in range(10):
+        outs = session.run(output_values, {
+            input_values[0]: i,
+            input_values[1]: i + 1
+        })
+        self.assertEqual(len(outs), len(input_values))
+        self.assertEqual(outs[1] - outs[0], 1)
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/contrib/gan/python/losses/__init__.py b/tensorflow/contrib/gan/python/losses/__init__.py
index 290ff867a1e443f20a63e27fd97f53fed8a6cc11..d9bf8ebfdf65dfc76e4569dcaf26e0e51c7fc107 100644
--- a/tensorflow/contrib/gan/python/losses/__init__.py
+++ b/tensorflow/contrib/gan/python/losses/__init__.py
@@ -12,7 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""TFGAN grouped API. Please see README.md for details and usage."""
+"""TFGAN losses and penalties.
+
+Losses can be used with individual arguments or with GANModel tuples.
+"""
 
 from __future__ import absolute_import
 from __future__ import division
diff --git a/tensorflow/contrib/gan/python/namedtuples.py b/tensorflow/contrib/gan/python/namedtuples.py
index 48f5e8e47dbcd5d32c23806b967a0d1e7403d2f7..3d4e315ebd0bd52b3b5e3e4a8655df8bfe9cebe8 100644
--- a/tensorflow/contrib/gan/python/namedtuples.py
+++ b/tensorflow/contrib/gan/python/namedtuples.py
@@ -79,6 +79,7 @@ class InfoGANModel(
     collections.namedtuple('InfoGANModel', GANModel._fields + (
         'structured_generator_inputs',
         'predicted_distributions',
+        'discriminator_and_aux_fn',
     ))):
   """An InfoGANModel contains all the pieces needed for InfoGAN training.
 
@@ -91,6 +92,8 @@ class InfoGANModel(
     predicted_distributions: A list of tf.Distributions. Predicted by the
       recognizer, and used to evaluate the likelihood of the structured noise.
       List length should match `structured_generator_inputs`.
+    discriminator_and_aux_fn: The original discriminator function that returns
+      a tuple of (logits, `predicted_distributions`).
   """
 
 
diff --git a/tensorflow/contrib/gan/python/train.py b/tensorflow/contrib/gan/python/train.py
index e9443f766bdc59cf45513c93e14390cd6126c295..c429ec48314b1f036beceb564bcf6d1e2a6d3b2e 100644
--- a/tensorflow/contrib/gan/python/train.py
+++ b/tensorflow/contrib/gan/python/train.py
@@ -215,7 +215,8 @@ def infogan_model(
       disc_scope,
       lambda x, y: discriminator_fn(x, y)[0],  # conform to non-InfoGAN API
       structured_generator_inputs,
-      predicted_distributions)
+      predicted_distributions,
+      discriminator_fn)
 
 
 def acgan_model(
@@ -326,6 +327,56 @@ def _use_aux_loss(aux_loss_weight):
     return False
 
 
+def _tensor_pool_adjusted_model(model, tensor_pool_fn):
+  """Adjusts model using `tensor_pool_fn`.
+
+  Args:
+    model: A GANModel tuple.
+    tensor_pool_fn: A function that takes (generated_data, generator_inputs),
+      stores them in an internal pool and returns a previously stored
+      (generated_data, generator_inputs) with some probability. For example
+      tfgan.features.tensor_pool.
+
+  Returns:
+    A new GANModel tuple where discriminator outputs are adjusted by taking
+    pooled generator outputs as inputs. Returns the original model if
+    `tensor_pool_fn` is None.
+
+  Raises:
+    ValueError: If tensor pool does not support the `model`.
+  """
+  if tensor_pool_fn is None:
+    return model
+
+  pooled_generated_data, pooled_generator_inputs = tensor_pool_fn(
+      (model.generated_data, model.generator_inputs))
+
+  if isinstance(model, namedtuples.GANModel):
+    with variable_scope.variable_scope(model.discriminator_scope, reuse=True):
+      dis_gen_outputs = model.discriminator_fn(pooled_generated_data,
+                                               pooled_generator_inputs)
+    return model._replace(discriminator_gen_outputs=dis_gen_outputs)
+  elif isinstance(model, namedtuples.ACGANModel):
+    with variable_scope.variable_scope(model.discriminator_scope, reuse=True):
+      (dis_pooled_gen_outputs,
+       dis_pooled_gen_classification_logits) = model.discriminator_fn(
+           pooled_generated_data, pooled_generator_inputs)
+    return model._replace(
+        discriminator_gen_outputs=dis_pooled_gen_outputs,
+        discriminator_gen_classification_logits=
+        dis_pooled_gen_classification_logits)
+  elif isinstance(model, namedtuples.InfoGANModel):
+    with variable_scope.variable_scope(model.discriminator_scope, reuse=True):
+      (dis_pooled_gen_outputs,
+       pooled_predicted_distributions) = model.discriminator_and_aux_fn(
+           pooled_generated_data, pooled_generator_inputs)
+    return model._replace(
+        discriminator_gen_outputs=dis_pooled_gen_outputs,
+        predicted_distributions=pooled_predicted_distributions)
+  else:
+    raise ValueError('Tensor pool does not support `model`: %s.' % type(model))
+
+
 def gan_loss(
     # GANModel.
     model,
@@ -338,6 +389,7 @@ def gan_loss(
     mutual_information_penalty_weight=None,
     aux_cond_generator_weight=None,
     aux_cond_discriminator_weight=None,
+    tensor_pool_fn=None,
     # Options.
     add_summaries=True):
   """Returns losses necessary to train generator and discriminator.
@@ -363,6 +415,10 @@ def gan_loss(
       https://arxiv.org/abs/1610.09585
     aux_cond_discriminator_weight: If not None: add a classification loss as in
       https://arxiv.org/abs/1610.09585
+    tensor_pool_fn: A function that takes (generated_data, generator_inputs),
+      stores them in an internal pool and returns previous stored
+      (generated_data, generator_inputs). For example
+      `tf.gan.features.tensor_pool`. Defaults to None (not using tensor pool).
     add_summaries: Whether or not to add summaries for the losses.
 
   Returns:
@@ -402,7 +458,9 @@ def gan_loss(
 
   # Create standard losses.
   gen_loss = generator_loss_fn(model, add_summaries=add_summaries)
-  dis_loss = discriminator_loss_fn(model, add_summaries=add_summaries)
+  dis_loss = discriminator_loss_fn(
+      _tensor_pool_adjusted_model(model, tensor_pool_fn),
+      add_summaries=add_summaries)
 
   # Add optional extra losses.
   if _use_aux_loss(gradient_penalty_weight):
diff --git a/tensorflow/contrib/gan/python/train_test.py b/tensorflow/contrib/gan/python/train_test.py
index 6b27b6926102b6e5a7ff134ceed75c23459a6534..58704e68594e947041697ec6cb1d240e1f505aae 100644
--- a/tensorflow/contrib/gan/python/train_test.py
+++ b/tensorflow/contrib/gan/python/train_test.py
@@ -23,6 +23,7 @@ import numpy as np
 from tensorflow.contrib.framework.python.ops import variables as variables_lib
 from tensorflow.contrib.gan.python import namedtuples
 from tensorflow.contrib.gan.python import train
+from tensorflow.contrib.gan.python.features.python import random_tensor_pool
 from tensorflow.contrib.slim.python.slim import learning as slim_learning
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -145,14 +146,16 @@ def get_infogan_model():
   return namedtuples.InfoGANModel(
       *get_gan_model(),
       structured_generator_inputs=[constant_op.constant(0)],
-      predicted_distributions=[categorical.Categorical([1.0])])
+      predicted_distributions=[categorical.Categorical([1.0])],
+      discriminator_and_aux_fn=infogan_discriminator_model)
 
 
 def get_callable_infogan_model():
   return namedtuples.InfoGANModel(
       *get_callable_gan_model(),
       structured_generator_inputs=[constant_op.constant(0)],
-      predicted_distributions=[categorical.Categorical([1.0])])
+      predicted_distributions=[categorical.Categorical([1.0])],
+      discriminator_and_aux_fn=infogan_discriminator_model)
 
 
 def create_infogan_model():
@@ -213,6 +216,25 @@ def get_sync_optimizer():
       replicas_to_aggregate=1)
 
 
+def get_tensor_pool_fn(pool_size):
+
+  def tensor_pool_fn_impl(input_values):
+    return random_tensor_pool.tensor_pool(input_values, pool_size=pool_size)
+
+  return tensor_pool_fn_impl
+
+
+def get_tensor_pool_fn_for_infogan(pool_size):
+
+  def tensor_pool_fn_impl(input_values):
+    generated_data, generator_inputs = input_values
+    output_values = random_tensor_pool.tensor_pool(
+        [generated_data] + generator_inputs, pool_size=pool_size)
+    return output_values[0], output_values[1:]
+
+  return tensor_pool_fn_impl
+
+
 class GANModelTest(test.TestCase):
   """Tests for `gan_model`."""
 
@@ -409,6 +431,114 @@ class GANLossTest(test.TestCase):
   def test_callable_acgan(self):
     self._test_acgan_helper(create_callable_acgan_model)
 
+  def _check_tensor_pool_adjusted_model_outputs(self, tensor1, tensor2,
+                                                pool_size):
+    history_values = []
+    with self.test_session(use_gpu=True) as sess:
+      variables.global_variables_initializer().run()
+      for i in range(2 * pool_size):
+        t1, t2 = sess.run([tensor1, tensor2])
+        history_values.append(t1)
+        if i < pool_size:
+          # For [0, pool_size), the pool is not full, tensor1 should be equal
+          # to tensor2 as the pool.
+          self.assertAllEqual(t1, t2)
+        else:
+          # For [pool_size, ?), the pool is full, tensor2 must be equal to some
+          # historical values of tensor1 (which is previously stored in the
+          # pool).
+          self.assertTrue(any([(v == t2).all() for v in history_values]))
+
+  # Test `_tensor_pool_adjusted_model` for gan model.
+  def test_tensor_pool_adjusted_model_gan(self):
+    model = create_gan_model()
+
+    new_model = train._tensor_pool_adjusted_model(model, None)
+    # 'Generator/dummy_g:0' and 'Discriminator/dummy_d:0'
+    self.assertEqual(2, len(ops.get_collection(ops.GraphKeys.VARIABLES)))
+    self.assertIs(new_model.discriminator_gen_outputs,
+                  model.discriminator_gen_outputs)
+
+    pool_size = 5
+    new_model = train._tensor_pool_adjusted_model(
+        model, get_tensor_pool_fn(pool_size=pool_size))
+    self.assertIsNot(new_model.discriminator_gen_outputs,
+                     model.discriminator_gen_outputs)
+    # Check values.
+    self._check_tensor_pool_adjusted_model_outputs(
+        model.discriminator_gen_outputs, new_model.discriminator_gen_outputs,
+        pool_size)
+
+  # Test _tensor_pool_adjusted_model for infogan model.
+  def test_tensor_pool_adjusted_model_infogan(self):
+    model = create_infogan_model()
+
+    pool_size = 5
+    new_model = train._tensor_pool_adjusted_model(
+        model, get_tensor_pool_fn_for_infogan(pool_size=pool_size))
+    # 'Generator/dummy_g:0' and 'Discriminator/dummy_d:0'
+    self.assertEqual(2, len(ops.get_collection(ops.GraphKeys.VARIABLES)))
+    self.assertIsNot(new_model.discriminator_gen_outputs,
+                     model.discriminator_gen_outputs)
+    self.assertIsNot(new_model.predicted_distributions,
+                     model.predicted_distributions)
+    # Check values.
+    self._check_tensor_pool_adjusted_model_outputs(
+        model.discriminator_gen_outputs, new_model.discriminator_gen_outputs,
+        pool_size)
+
+  # Test _tensor_pool_adjusted_model for acgan model.
+  def test_tensor_pool_adjusted_model_acgan(self):
+    model = create_acgan_model()
+
+    pool_size = 5
+    new_model = train._tensor_pool_adjusted_model(
+        model, get_tensor_pool_fn(pool_size=pool_size))
+    # 'Generator/dummy_g:0' and 'Discriminator/dummy_d:0'
+    self.assertEqual(2, len(ops.get_collection(ops.GraphKeys.VARIABLES)))
+    self.assertIsNot(new_model.discriminator_gen_outputs,
+                     model.discriminator_gen_outputs)
+    self.assertIsNot(new_model.discriminator_gen_classification_logits,
+                     model.discriminator_gen_classification_logits)
+    # Check values.
+    self._check_tensor_pool_adjusted_model_outputs(
+        model.discriminator_gen_outputs, new_model.discriminator_gen_outputs,
+        pool_size)
+
+  # Test tensor pool.
+  def _test_tensor_pool_helper(self, create_gan_model_fn):
+    model = create_gan_model_fn()
+    if isinstance(model, namedtuples.InfoGANModel):
+      tensor_pool_fn = get_tensor_pool_fn_for_infogan(pool_size=5)
+    else:
+      tensor_pool_fn = get_tensor_pool_fn(pool_size=5)
+    loss = train.gan_loss(model, tensor_pool_fn=tensor_pool_fn)
+    self.assertTrue(isinstance(loss, namedtuples.GANLoss))
+
+    # Check values.
+    with self.test_session(use_gpu=True) as sess:
+      variables.global_variables_initializer().run()
+      for _ in range(10):
+        sess.run([loss.generator_loss, loss.discriminator_loss])
+
+  def test_tensor_pool_gan(self):
+    self._test_tensor_pool_helper(create_gan_model)
+
+  def test_tensor_pool_callable_gan(self):
+    self._test_tensor_pool_helper(create_callable_gan_model)
+
+  def test_tensor_pool_infogan(self):
+    self._test_tensor_pool_helper(create_infogan_model)
+
+  def test_tensor_pool_callable_infogan(self):
+    self._test_tensor_pool_helper(create_callable_infogan_model)
+
+  def test_tensor_pool_acgan(self):
+    self._test_tensor_pool_helper(create_acgan_model)
+
+  def test_tensor_pool_callable_acgan(self):
+    self._test_tensor_pool_helper(create_callable_acgan_model)
+
   def test_doesnt_crash_when_in_nested_scope(self):
     with variable_scope.variable_scope('outer_scope'):
       gan_model = train.gan_model(
diff --git a/tensorflow/contrib/graph_editor/transform.py b/tensorflow/contrib/graph_editor/transform.py
index 2a97a79070ea3a0e634d76c5877e2307b6e2e577..14ac5296657d48c7f9e94d220c9e7e28af4d4353 100644
--- a/tensorflow/contrib/graph_editor/transform.py
+++ b/tensorflow/contrib/graph_editor/transform.py
@@ -173,6 +173,9 @@ def copy_op_handler(info, op, copy_shape=True):
   if op._original_op:
     op_._original_op = op._original_op
 
+  # Add op to the graph
+  info.graph_._add_op(op_)
+
   return op_, op_.outputs
 
 
diff --git a/tensorflow/contrib/image/BUILD b/tensorflow/contrib/image/BUILD
index 157e97d237021d95c935a6be66aa57842b97125c..3ff02e085ee63fabf42b3cc4389f4605455f3800 100755
--- a/tensorflow/contrib/image/BUILD
+++ b/tensorflow/contrib/image/BUILD
@@ -9,10 +9,12 @@ package(default_visibility = ["//visibility:public"])
 
 load(
     "//tensorflow:tensorflow.bzl",
+    "tf_cc_test",
     "tf_custom_op_library",
     "tf_gen_op_libs",
     "tf_gen_op_wrapper_py",
     "tf_kernel_library",
+    "tf_py_test",
 )
 load("//tensorflow:tensorflow.bzl", "cuda_py_test")
 load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library")
@@ -23,6 +25,8 @@ tf_custom_op_library(
         "kernels/bipartite_match_op.cc",
         "kernels/image_ops.cc",
         "kernels/image_ops.h",
+        "kernels/segmentation_ops.cc",
+        "kernels/segmentation_ops.h",
         "ops/image_ops.cc",
     ],
     gpu_srcs = [
@@ -37,6 +41,8 @@ tf_kernel_library(
         "kernels/bipartite_match_op.cc",
         "kernels/image_ops.cc",
         "kernels/image_ops.h",
+        "kernels/segmentation_ops.cc",
+        "kernels/segmentation_ops.h",
     ],
     gpu_srcs = [
         "kernels/image_ops_gpu.cu.cc",
@@ -77,6 +83,7 @@ tf_custom_op_py_library(
         "//tensorflow/python:array_ops",
         "//tensorflow/python:common_shapes",
         "//tensorflow/python:constant_op",
+        "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:linalg_ops",
         "//tensorflow/python:math_ops",
@@ -106,10 +113,33 @@ tf_custom_op_library(
     name = "python/ops/_distort_image_ops.so",
     srcs = [
         "kernels/adjust_hsv_in_yiq_op.cc",
+        "kernels/adjust_hsv_in_yiq_op.h",
         "ops/distort_image_ops.cc",
     ],
+    gpu_srcs = [
+        "kernels/adjust_hsv_in_yiq_op_gpu.cu.cc",
+        "kernels/adjust_hsv_in_yiq_op.h",
+    ],
     deps = [
-        "@protobuf_archive//:protobuf",
+        "//tensorflow/core/kernels:gpu_util_hdrs",
+    ],
+)
+
+tf_cc_test(
+    name = "adjust_hsv_in_yiq_op_test",
+    size = "small",
+    srcs = [
+        "kernels/adjust_hsv_in_yiq_op.h",
+        "kernels/adjust_hsv_in_yiq_op_test.cc",
+    ],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_testutil",
+        "//tensorflow/core/kernels:ops_util",
+        "//third_party/eigen3",
     ],
 )
 
@@ -122,19 +152,6 @@ tf_gen_op_wrapper_py(
     deps = [":distort_image_ops_op_lib"],
 )
 
-cc_library(
-    name = "distort_image_ops_cc",
-    srcs = [
-        "kernels/adjust_hsv_in_yiq_op.cc",
-    ],
-    deps = [
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//third_party/eigen3",
-    ],
-    alwayslink = 1,
-)
-
 py_library(
     name = "distort_image_py",
     srcs = [
@@ -177,6 +194,21 @@ cuda_py_test(
     ],
 )
 
+tf_py_test(
+    name = "segmentation_test",
+    size = "medium",
+    srcs = ["python/kernel_tests/segmentation_test.py"],
+    additional_deps = [
+        ":distort_image_py",
+        ":image_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform_test",
+    ],
+)
+
 tf_custom_op_library(
     name = "python/ops/_single_image_random_dot_stereograms.so",
     srcs = [
@@ -222,6 +254,23 @@ py_library(
     ],
 )
 
+cuda_py_test(
+    name = "single_image_random_dot_stereograms_ops_test",
+    size = "medium",
+    srcs = ["python/kernel_tests/single_image_random_dot_stereograms_ops_test.py"],
+    additional_deps = [
+        ":distort_image_py",
+        ":image_py",
+        ":single_image_random_dot_stereograms_py",
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform_test",
+    ],
+)
+
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/contrib/image/__init__.py b/tensorflow/contrib/image/__init__.py
index d030dffadeb9d67f7ffcbc197a2a3feb9b3b122d..cc8ed117ba2edcc7a53e609381166f17a2fbb45e 100755
--- a/tensorflow/contrib/image/__init__.py
+++ b/tensorflow/contrib/image/__init__.py
@@ -20,6 +20,8 @@ This module provides functions for image manipulation; currently, chrominance
 transformas (including changing saturation and hue) in YIQ space and
 projective transforms (including rotation) are supported.
 
+## Image Transformation `Ops`
+
 @@angles_to_projective_transforms
 @@compose_transforms
 @@adjust_yiq_hsv
@@ -28,19 +30,29 @@ projective transforms (including rotation) are supported.
 @@transform
 @@translate
 @@translations_to_projective_transforms
+
+## Image Segmentation `Ops`
+
+@@connected_components
+
+## Matching `Ops`
+
 @@bipartite_match
+
+## Random Dot Stereogram `Ops`
+
 @@single_image_random_dot_stereograms
 """
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# pylint: disable=line-too-long
 from tensorflow.contrib.image.python.ops.distort_image_ops import adjust_hsv_in_yiq
 from tensorflow.contrib.image.python.ops.distort_image_ops import random_hsv_in_yiq
 
 from tensorflow.contrib.image.python.ops.image_ops import angles_to_projective_transforms
 from tensorflow.contrib.image.python.ops.image_ops import compose_transforms
+from tensorflow.contrib.image.python.ops.image_ops import connected_components
 from tensorflow.contrib.image.python.ops.image_ops import rotate
 from tensorflow.contrib.image.python.ops.image_ops import transform
 from tensorflow.contrib.image.python.ops.image_ops import translate
diff --git a/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op.cc b/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op.cc
index f4962ed69dc68d4bad06ef29d7a167e0ba8ae044..478b716d88321101c971789f36c0ff8ecd3f418e 100644
--- a/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op.cc
+++ b/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op.cc
@@ -12,14 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include <cmath>
+#if GOOGLE_CUDA
+#define EIGEN_USE_GPU
+#endif
+
+#include "tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op.h"
 #include <memory>
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/util/work_sharder.h"
@@ -36,10 +37,10 @@ class AdjustHsvInYiqOpBase : public OpKernel {
 
   struct ComputeOptions {
     const Tensor* input = nullptr;
+    Tensor* output = nullptr;
     const Tensor* delta_h = nullptr;
     const Tensor* scale_s = nullptr;
     const Tensor* scale_v = nullptr;
-    Tensor* output = nullptr;
     int64 channel_count = 0;
   };
 
@@ -65,7 +66,7 @@ class AdjustHsvInYiqOpBase : public OpKernel {
                                         scale_v.shape().DebugString()));
     auto channels = input.dim_size(input.dims() - 1);
     OP_REQUIRES(
-        context, channels == 3,
+        context, channels == kChannelSize,
         errors::InvalidArgument("input must have 3 channels but instead has ",
                                 channels, " channels."));
 
@@ -101,53 +102,21 @@ class AdjustHsvInYiqOp<CPUDevice> : public AdjustHsvInYiqOpBase {
     const Tensor* input = options.input;
     Tensor* output = options.output;
     const int64 channel_count = options.channel_count;
-    static const int kChannelSize = 3;
     auto input_data = input->shaped<float, 2>({channel_count, kChannelSize});
     const float delta_h = options.delta_h->scalar<float>()();
     const float scale_s = options.scale_s->scalar<float>()();
     const float scale_v = options.scale_v->scalar<float>()();
     auto output_data = output->shaped<float, 2>({channel_count, kChannelSize});
+    float tranformation_matrix[kChannelSize * kChannelSize] = {0};
+    internal::compute_tranformation_matrix<kChannelSize * kChannelSize>(
+        delta_h, scale_s, scale_v, tranformation_matrix);
     const int kCostPerChannel = 10;
     const DeviceBase::CpuWorkerThreads& worker_threads =
         *context->device()->tensorflow_cpu_worker_threads();
     Shard(worker_threads.num_threads, worker_threads.workers, channel_count,
           kCostPerChannel,
-          [channel_count, &input_data, &output_data, delta_h, scale_s, scale_v](
+          [channel_count, &input_data, &output_data, &tranformation_matrix](
               int64 start_channel, int64 end_channel) {
-            // Using approximate linear transfomation described in:
-            // https://beesbuzz.biz/code/hsv_color_transforms.php
-            /** Get the constants from sympy
-             from sympy import Matrix
-             from sympy.abc import u, w
-             # Projection matrix to YIQ. http://en.wikipedia.org/wiki/YIQ
-             tyiq = Matrix([[0.299, 0.587, 0.114],
-                            [0.596, -0.274, -0.322],
-                            [0.211, -0.523, 0.312]])
-             # Hue rotation matrix in YIQ space.
-             hue_proj = Matrix(3,3, [v, 0, 0, 0, vsu, -vsw, 0, vsw, vsu])
-             m = tyiq.inv() * hue_proj * tyiq
-             **/
-            // TODO(huangyp): directly compute the projection matrix from tyiq.
-            static const float t[kChannelSize][kChannelSize][kChannelSize] = {
-                {{.299, .701, .16862179492229},
-                 {.587, -.587, .329804745287403},
-                 {.114, -.114, -0.498426540209694}},
-                {{.299, -.299, -.327963394172371},
-                 {.587, .413, .0346106879248821},
-                 {.114, -.114, .293352706247489}},
-                {{.299, -.299, 1.24646136576682},
-                 {.587, -.587, -1.04322888291964},
-                 {.114, .886, -.203232482847173}}};
-            float m[kChannelSize][kChannelSize] = {{0.}};
-            float su = scale_s * std::cos(delta_h);
-            float sw = scale_s * std::sin(delta_h);
-            for (int q_index = 0; q_index < kChannelSize; q_index++) {
-              for (int p_index = 0; p_index < kChannelSize; p_index++) {
-                m[q_index][p_index] = scale_v * (t[q_index][p_index][0] +
-                                                 t[q_index][p_index][1] * su +
-                                                 t[q_index][p_index][2] * sw);
-              }
-            }
             // Applying projection matrix to input RGB vectors.
             const float* p = input_data.data() + start_channel * kChannelSize;
             float* q = output_data.data() + start_channel * kChannelSize;
@@ -155,7 +124,9 @@ class AdjustHsvInYiqOp<CPUDevice> : public AdjustHsvInYiqOpBase {
               for (int q_index = 0; q_index < kChannelSize; q_index++) {
                 q[q_index] = 0;
                 for (int p_index = 0; p_index < kChannelSize; p_index++) {
-                  q[q_index] += m[q_index][p_index] * p[p_index];
+                  q[q_index] +=
+                      p[p_index] *
+                      tranformation_matrix[q_index + kChannelSize * p_index];
                 }
               }
               p += kChannelSize;
@@ -165,8 +136,33 @@ class AdjustHsvInYiqOp<CPUDevice> : public AdjustHsvInYiqOpBase {
   }
 };
 
-REGISTER_KERNEL_BUILDER(Name("AdjustHsvInYiq").Device(DEVICE_CPU),
-                        AdjustHsvInYiqOp<CPUDevice>);
+REGISTER_KERNEL_BUILDER(
+    Name("AdjustHsvInYiq").Device(DEVICE_CPU).TypeConstraint<float>("T"),
+    AdjustHsvInYiqOp<CPUDevice>);
+
+#if GOOGLE_CUDA
+template <>
+class AdjustHsvInYiqOp<GPUDevice> : public AdjustHsvInYiqOpBase {
+ public:
+  explicit AdjustHsvInYiqOp(OpKernelConstruction* context)
+      : AdjustHsvInYiqOpBase(context) {}
+
+  void DoCompute(OpKernelContext* ctx, const ComputeOptions& options) override {
+    const int64 number_of_elements = options.input->NumElements();
+    if (number_of_elements <= 0) {
+      return;
+    }
+    const float* delta_h = options.delta_h->flat<float>().data();
+    const float* scale_s = options.scale_s->flat<float>().data();
+    const float* scale_v = options.scale_v->flat<float>().data();
+    functor::AdjustHsvInYiqGPU()(ctx, options.channel_count, options.input,
+                                 delta_h, scale_s, scale_v, options.output);
+  }
+};
+
+REGISTER_KERNEL_BUILDER(
+    Name("AdjustHsvInYiq").Device(DEVICE_GPU).TypeConstraint<float>("T"),
+    AdjustHsvInYiqOp<GPUDevice>);
+#endif
 
-// TODO(huangyp): add the GPU kernel
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op.h b/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op.h
new file mode 100644
index 0000000000000000000000000000000000000000..194ae2ba47456cac66c01989a78ab4ce607d1295
--- /dev/null
+++ b/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op.h
@@ -0,0 +1,87 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_IMAGE_KERNELS_ADJUST_HSV_IN_YIQ_OP_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_IMAGE_KERNELS_ADJUST_HSV_IN_YIQ_OP_H_
+
+#if GOOGLE_CUDA
+#define EIGEN_USE_GPU
+#endif  // GOOGLE_CUDA
+
+#include <cmath>
+#include "third_party/eigen3/Eigen/Core"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+
+namespace tensorflow {
+
+static constexpr int kChannelSize = 3;
+
+namespace internal {
+
+template <int MATRIX_SIZE>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void compute_tranformation_matrix(
+    const float delta_h, const float scale_s, const float scale_v,
+    float* matrix) {
+  static_assert(MATRIX_SIZE == kChannelSize * kChannelSize,
+                "Size of matrix should be 9.");
+  // Projection matrix from RGB to YIQ. Numbers from wikipedia
+  // https://en.wikipedia.org/wiki/YIQ
+  Eigen::Matrix3f yiq;
+  /* clang-format off */
+  yiq << 0.299, 0.587, 0.114,
+         0.596, -0.274, -0.322,
+         0.211, -0.523, 0.312;
+  Eigen::Matrix3f yiq_inverse;
+  yiq_inverse << 1, 0.95617069, 0.62143257,
+                 1, -0.2726886, -0.64681324,
+                 1, -1.103744, 1.70062309;
+  /* clang-format on */
+  // Construct hsv linear transformation matrix in YIQ space.
+  // https://beesbuzz.biz/code/hsv_color_transforms.php
+  float vsu = scale_v * scale_s * std::cos(delta_h);
+  float vsw = scale_v * scale_s * std::sin(delta_h);
+  Eigen::Matrix3f hsv_transform;
+  /* clang-format off */
+  hsv_transform << scale_v, 0, 0,
+                   0, vsu, -vsw,
+                   0, vsw, vsu;
+  /* clang-format on */
+  // Compute final transformation matrix = inverse_yiq * hsv_transform * yiq
+  Eigen::Map<Eigen::Matrix<float, 3, 3, Eigen::ColMajor>> eigen_matrix(matrix);
+  eigen_matrix = yiq_inverse * hsv_transform * yiq;
+}
+}  // namespace internal
+
+#if GOOGLE_CUDA
+typedef Eigen::GpuDevice GPUDevice;
+
+namespace functor {
+
+struct AdjustHsvInYiqGPU {
+  void operator()(OpKernelContext* ctx, int channel_count,
+                  const Tensor* const input, const float* const delta_h,
+                  const float* const scale_s, const float* const scale_v,
+                  Tensor* const output);
+};
+
+}  // namespace functor
+
+#endif  // GOOGLE_CUDA
+
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_IMAGE_KERNELS_ADJUST_HSV_IN_YIQ_OP_H_
diff --git a/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op_gpu.cu.cc b/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op_gpu.cu.cc
new file mode 100644
index 0000000000000000000000000000000000000000..b71ff9cd507faac66b3a33d3c02ec9b5901d814a
--- /dev/null
+++ b/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op_gpu.cu.cc
@@ -0,0 +1,84 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op.h"
+#include "tensorflow/core/kernels/gpu_utils.h"
+#include "tensorflow/core/platform/stream_executor.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+namespace tensorflow {
+
+namespace internal {
+
+__global__ void compute_tranformation_matrix_cuda(const float* const delta_h,
+                                                  const float* const scale_s,
+                                                  const float* const scale_v,
+                                                  float* const matrix,
+                                                  const int matrix_size) {
+  if (matrix_size == kChannelSize * kChannelSize) {
+    compute_tranformation_matrix<kChannelSize * kChannelSize>(
+        *delta_h, *scale_s, *scale_v, matrix);
+  }
+}
+}  // namespace internal
+
+namespace functor {
+
+void AdjustHsvInYiqGPU::operator()(OpKernelContext* ctx, int channel_count,
+                                   const Tensor* const input,
+                                   const float* const delta_h,
+                                   const float* const scale_s,
+                                   const float* const scale_v,
+                                   Tensor* const output) {
+  const uint64 m = channel_count;
+  const uint64 k = kChannelSize;
+  const uint64 n = kChannelSize;
+  auto* cu_stream = ctx->eigen_device<GPUDevice>().stream();
+  OP_REQUIRES(ctx, cu_stream, errors::Internal("No GPU stream available."));
+  Tensor tranformation_matrix;
+  OP_REQUIRES_OK(ctx, ctx->allocate_temp(
+                          DT_FLOAT, TensorShape({kChannelSize * kChannelSize}),
+                          &tranformation_matrix));
+  // TODO(huangyp): It takes about 3.5 us to comute tranformation_matrix
+  // with one thread. Improve its performance if necessary.
+  internal::compute_tranformation_matrix_cuda<<<1, 1, 0, cu_stream>>>(
+      delta_h, scale_s, scale_v, tranformation_matrix.flat<float>().data(),
+      tranformation_matrix.flat<float>().size());
+  // Call cuBlas C = A * B directly.
+  auto no_transpose = perftools::gputools::blas::Transpose::kNoTranspose;
+  auto a_ptr =
+      AsDeviceMemory(input->flat<float>().data(), input->flat<float>().size());
+  auto b_ptr = AsDeviceMemory(tranformation_matrix.flat<float>().data(),
+                              tranformation_matrix.flat<float>().size());
+  auto c_ptr = AsDeviceMemory(output->flat<float>().data(),
+                              output->flat<float>().size());
+  auto* stream = ctx->op_device_context()->stream();
+  OP_REQUIRES(ctx, stream, errors::Internal("No GPU stream available."));
+  // TODO(huangyp): share/use autotune cublas algorithms in Matmul.op.
+  bool blas_launch_status =
+      stream
+          ->ThenBlasGemm(no_transpose, no_transpose, n, m, k, 1.0f, b_ptr, n,
+                         a_ptr, k, 0.0f, &c_ptr, n)
+          .ok();
+  if (!blas_launch_status) {
+    ctx->SetStatus(errors::Internal("Blas SGEMM launch failed : m=", m,
+                                    ", n=", n, ", k=", k));
+  }
+}
+}  // namespace functor
+}  // namespace tensorflow
+#endif  // GOOGLE_CUDA
diff --git a/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op_test.cc b/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..4cbbd277840133c9419f9ce3d945b7d099679dc0
--- /dev/null
+++ b/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op_test.cc
@@ -0,0 +1,48 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+
+class AdjustHsvInYiqOpTest : public OpsTestBase {
+ protected:
+};
+
+TEST_F(AdjustHsvInYiqOpTest, IdentiyTransformMatrix) {
+  Tensor matrix(allocator(), DT_FLOAT, TensorShape({9}));
+  internal::compute_tranformation_matrix<9>(0.0, 1.0, 1.0,
+                                            matrix.flat<float>().data());
+  Tensor expected(allocator(), DT_FLOAT, TensorShape({9}));
+  test::FillValues<float>(&expected, {1, 0, 0, 0, 1, 0, 0, 0, 1});
+  test::ExpectClose(matrix, expected);
+}
+
+TEST_F(AdjustHsvInYiqOpTest, ScaleValueTransformMatrix) {
+  float scale_v = 2.3;
+  Tensor matrix(allocator(), DT_FLOAT, TensorShape({9}));
+  internal::compute_tranformation_matrix<9>(0.0, 1.0, scale_v,
+                                            matrix.flat<float>().data());
+  Tensor expected(allocator(), DT_FLOAT, TensorShape({9}));
+  test::FillValues<float>(&expected,
+                          {scale_v, 0, 0, 0, scale_v, 0, 0, 0, scale_v});
+  test::ExpectClose(matrix, expected);
+}
+
+}  // end namespace tensorflow
diff --git a/tensorflow/contrib/image/kernels/segmentation_ops.cc b/tensorflow/contrib/image/kernels/segmentation_ops.cc
new file mode 100644
index 0000000000000000000000000000000000000000..fe8bf6e21c7b7310527668324571774e8bc50893
--- /dev/null
+++ b/tensorflow/contrib/image/kernels/segmentation_ops.cc
@@ -0,0 +1,139 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs for ImageConnectedComponents in ../ops/image_ops.cc, and description
+// of the algorithm in segmentation_ops.h.
+
+#define EIGEN_USE_THREADS
+
+#include "tensorflow/contrib/image/kernels/segmentation_ops.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+
+using tensorflow::functor::BlockedImageUnionFindFunctor;
+using tensorflow::functor::FindRootFunctor;
+using tensorflow::functor::ImageConnectedComponentsFunctor;
+using tensorflow::functor::TensorRangeFunctor;
+
+using OutputType = typename BlockedImageUnionFindFunctor<bool>::OutputType;
+
+// Computes connected components on batches of 2D images.
+template <typename Device, typename T>
+class ImageConnectedComponents : public OpKernel {
+ public:
+  explicit ImageConnectedComponents(OpKernelConstruction* ctx)
+      : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor& images_t = ctx->input(0);
+    OP_REQUIRES(ctx, images_t.shape().dims() == 3,
+                errors::InvalidArgument("Input images must have rank 3"));
+    Tensor forest_t, rank_t;
+    OP_REQUIRES_OK(ctx, ctx->allocate_temp(tensorflow::DT_INT64,
+                                           images_t.shape(), &forest_t));
+    OP_REQUIRES_OK(ctx, ctx->allocate_temp(tensorflow::DT_INT64,
+                                           images_t.shape(), &rank_t));
+    Tensor* output_t;
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, images_t.shape(), &output_t));
+
+    // Fill forest with values from 0 to n - 1, so that each node points to
+    // itself.
+    TensorRangeFunctor<Device>()(ctx->eigen_device<Device>(),
+                                 forest_t.flat<OutputType>());
+    auto rank = rank_t.tensor<OutputType, 3>();
+    rank.device(ctx->eigen_device<Device>()) = rank.constant(OutputType(0));
+
+    const auto images = images_t.tensor<T, 3>();
+    auto forest = forest_t.tensor<OutputType, 3>();
+    ImageConnectedComponentsFunctor<Device, T>()(
+        ctx, output_t->flat<OutputType>(), images, forest, rank);
+  }
+};
+
+using CPUDevice = Eigen::ThreadPoolDevice;
+
+namespace functor {
+
+// Connected components CPU implementation. See `segmentation_ops.h` for a
+// description of the algorithm.
+template <typename T>
+struct ImageConnectedComponentsFunctor<CPUDevice, T> {
+  void operator()(OpKernelContext* ctx,
+                  typename TTypes<OutputType>::Flat output,
+                  typename TTypes<T, 3>::ConstTensor images,
+                  typename TTypes<OutputType, 3>::Tensor forest,
+                  typename TTypes<OutputType, 3>::Tensor rank) {
+    const int64 num_images = images.dimension(0),
+                num_rows = images.dimension(1), num_cols = images.dimension(2),
+                num_elements = images.size();
+    // Bail out early for an empty image--no work to do.
+    if (num_elements == 0) {
+      return;
+    }
+    auto worker_threads = ctx->device()->tensorflow_cpu_worker_threads();
+    BlockedImageUnionFindFunctor<T> union_find(
+        images.data(), num_rows, num_cols, forest.data(), rank.data());
+    while (union_find.can_merge()) {
+      union_find.merge_blocks();
+      int64 num_blocks_vertically = union_find.num_blocks_vertically();
+      int64 num_blocks_horizontally = union_find.num_blocks_horizontally();
+      // Merging each block calls union_down for each pixel in a row of the
+      // block, and union_right for each pixel in a column of the block. Assume
+      // 20 instructions for each call to union_down or union_right. find() may
+      // loop more while searching for the root, but this should not be very
+      // significant.
+      int cost = (union_find.block_height() + union_find.block_width()) * 20;
+      Shard(worker_threads->num_threads, worker_threads->workers,
+            num_images * num_blocks_vertically * num_blocks_horizontally, cost,
+            [&union_find, num_images, num_blocks_vertically,
+             num_blocks_horizontally](int64 start_block, int64 limit_block) {
+              for (int64 i = start_block; i < limit_block; i++) {
+                int64 block_x = i % num_blocks_horizontally;
+                int64 block_y =
+                    (i / num_blocks_horizontally) % num_blocks_vertically;
+                int64 image =
+                    i / (num_blocks_horizontally * num_blocks_vertically);
+                union_find.merge_internal_block_edges(image, block_y, block_x);
+              }
+            });
+    }
+    FindRootFunctor<CPUDevice, T>()(ctx->eigen_device<CPUDevice>(), output,
+                                    images.data(), union_find);
+  }
+};
+
+}  // end namespace functor
+
+#define REGISTER_IMAGE_CONNECTED_COMPONENTS(TYPE)             \
+  REGISTER_KERNEL_BUILDER(Name("ImageConnectedComponents")    \
+                              .Device(DEVICE_CPU)             \
+                              .TypeConstraint<TYPE>("dtype"), \
+                          ImageConnectedComponents<CPUDevice, TYPE>)
+// Connected components (arguably) make sense for number, bool, and string types
+TF_CALL_NUMBER_TYPES(REGISTER_IMAGE_CONNECTED_COMPONENTS);
+TF_CALL_bool(REGISTER_IMAGE_CONNECTED_COMPONENTS);
+TF_CALL_string(REGISTER_IMAGE_CONNECTED_COMPONENTS);
+#undef REGISTER_IMAGE_CONNECTED_COMPONENTS
+
+// TODO(ringwalt): Implement on GPU. We probably want to stick to the original
+// algorithm by Stava and Benes there for efficiency (computing small blocks in
+// shared memory in CUDA thread blocks, instead of starting with single-pixel
+// blocks).
+
+}  // end namespace tensorflow
diff --git a/tensorflow/contrib/image/kernels/segmentation_ops.h b/tensorflow/contrib/image/kernels/segmentation_ops.h
new file mode 100644
index 0000000000000000000000000000000000000000..0957d5fd10f02daad3d8d51aadec9ce9da2660b5
--- /dev/null
+++ b/tensorflow/contrib/image/kernels/segmentation_ops.h
@@ -0,0 +1,303 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_IMAGE_KERNELS_SEGMENTATION_OPS_H_
+#define TENSORFLOW_CONTRIB_IMAGE_KERNELS_SEGMENTATION_OPS_H_
+
+// Connected component analysis. The op is described in ../ops/image_ops.cc. A
+// description of the algorithm appears below.
+
+#define EIGEN_USE_THREADS
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/work_sharder.h"
+
+namespace tensorflow {
+
+namespace functor {
+
+template <typename T>
+bool is_nonzero(T value) {
+  return value != T(0);
+}
+
+template <>
+bool is_nonzero(string value) {
+  return value.size() != 0;
+}
+
+// Processes each pixel of an image for union-find, in parallel blocks. This is
+// loosely based on the algorithm in "GPU Computing Gems" by Ondrej Stava and
+// Bedrich Benes, available here:
+// http://hpcg.purdue.edu/bbenes/papers/Stava2011CCL.pdf
+// The bulk of the process uses blocks of each image, which have each been
+// processed separately. As long as there are multiple blocks in the image, we
+// double the height and width of the blocks, creating new blocks which each
+// consist of 2x2 previous sub-blocks. On each new block, we process adjacent
+// pixels from the previous sub-blocks serially. However, the new blocks are not
+// connected, so we can process each block in parallel.
+// The GPU algorithm first processes blocks of a fixed size in GPU shared
+// memory, with one image block per CUDA thread block. On the CPU, we just start
+// with a block size of a single pixel, and borrow the rest of the algorithm
+// unchanged.
+template <typename T>
+class BlockedImageUnionFindFunctor {
+ public:
+  using OutputType = int64;
+
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlockedImageUnionFindFunctor(
+      const T* images, const int64 num_rows, const int64 num_cols,
+      OutputType* forest, OutputType* rank)
+      : images_(images),
+        num_rows_(num_rows),
+        num_cols_(num_cols),
+        block_height_(1),
+        block_width_(1),
+        forest_(forest),
+        rank_(rank) {}
+
+  // Returns the root of the tree that the pixel at the given index belongs to.
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE OutputType
+  find(OutputType index) const {
+    while (forest_[index] != index) {
+      index = forest_[index];
+    }
+    return index;
+  }
+
+  // Returns the number of blocks along the y axis.
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int64 num_blocks_vertically() const {
+    return (num_rows_ + block_height_ - 1) / block_height_;
+  }
+
+  // Returns the number of blocks along the x axis.
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int64 num_blocks_horizontally() const {
+    return (num_cols_ + block_width_ - 1) / block_width_;
+  }
+
+  // Returns the total number of blocks in each image.
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int64 num_blocks() const {
+    return num_blocks_vertically() * num_blocks_horizontally();
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int64 block_height() const {
+    return block_height_;
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int64 block_width() const {
+    return block_width_;
+  }
+
+  // Returns whether we may merge again (the image contains more than one
+  // block).
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool can_merge() const {
+    return block_height_ < num_rows_ || block_width_ < num_cols_;
+  }
+
+  // Doubles the block size. After this method, you must call
+  // `merge_internal_block_edges` for each image and each *new* block's xy
+  // coordinates (typically in parallel).
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void merge_blocks() {
+    block_height_ *= 2;
+    block_width_ *= 2;
+  }
+
+  // Processes pairs of pixels within the block which were adjacent in the four
+  // sub-blocks. This must be done at each stage so that the connected
+  // components in each block are joined correctly.
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void merge_internal_block_edges(
+      int64 image_index, int64 block_vertical_index,
+      int64 block_horizontal_index) const {
+    int64 block_start_y = block_vertical_index * block_height_;
+    int64 block_start_x = block_horizontal_index * block_width_;
+    // Merge the 4 sub-blocks horizontally (fixing the vertical seam).
+    int64 block_center_x = block_start_x + block_width_ / 2 - 1;
+    if (0 <= block_center_x && block_center_x + 1 < num_cols_) {
+      int64 merge_blocks_limit_y =
+          std::min(num_rows_, block_start_y + block_height_);
+      for (int64 y = block_start_y; y < merge_blocks_limit_y; y++) {
+        union_right(image_index, y, block_center_x);
+      }
+    }
+    // Merge the 4 sub-blocks vertically (fixing the horizontal seam).
+    int64 block_center_y = block_start_y + block_height_ / 2 - 1;
+    if (0 <= block_center_y && block_center_y + 1 < num_rows_) {
+      int64 merge_blocks_limit_x =
+          std::min(num_cols_, block_start_x + block_width_);
+      for (int64 x = block_start_x; x < merge_blocks_limit_x; x++) {
+        union_down(image_index, block_center_y, x);
+      }
+    }
+  }
+
+ private:
+  // The input image(s).
+  const T* const images_;
+  const int64 num_rows_;
+  const int64 num_cols_;
+  // Current height of each sub-block of the image.
+  int64 block_height_;
+  // Current width of each sub-block of the image.
+  int64 block_width_;
+  // Union-find forest. This has the same size as `images_`, and each entry
+  // holds the index of its parent in `images_` (roots hold their own index).
+  // Cycles should not occur.
+  OutputType* const forest_;
+  // Union-find rank of each pixel.
+  OutputType* const rank_;
+
+  // Unions the pixel with the pixel below it if applicable (both pixels are
+  // true, and the pixel is not in the last row).
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void union_down(OutputType batch,
+                                                        OutputType row,
+                                                        OutputType col) const {
+    T pixel = read_pixel(batch, row, col);
+    if (is_nonzero<T>(pixel)) {
+      const int64 index_a = col + num_cols_ * (row + num_rows_ * batch);
+      if (row + 1 < num_rows_ && read_pixel(batch, row + 1, col) == pixel) {
+        const int64 index_b = col + num_cols_ * (row + 1 + num_rows_ * batch);
+        do_union(index_a, index_b);
+      }
+    }
+  }
+
+  // Unions the pixel with the pixel to the right of it if applicable.
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void union_right(OutputType batch,
+                                                         OutputType row,
+                                                         OutputType col) const {
+    T pixel = read_pixel(batch, row, col);
+    if (is_nonzero<T>(pixel)) {
+      const int64 index_a = col + num_cols_ * (row + num_rows_ * batch);
+      if (col + 1 < num_cols_ && read_pixel(batch, row, col + 1) == pixel) {
+        const int64 index_b = col + 1 + num_cols_ * (row + num_rows_ * batch);
+        do_union(index_a, index_b);
+      }
+    }
+  }
+
+  // Reads a pixel value in the images.
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T
+  read_pixel(const OutputType batch, const OutputType row,
+             const OutputType col) const {
+    return images_[col + num_cols_ * (row + num_rows_ * batch)];
+  }
+
+  // Unions the trees that the two pixels belong to, using their index in the
+  // `images_` array.
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void do_union(
+      OutputType index_a, OutputType index_b) const {
+    // Find the roots of index_a and index_b in the forest, and make one the
+    // child of the other.
+    index_a = find(index_a);
+    index_b = find(index_b);
+    const OutputType rank_a = rank_[index_a];
+    const OutputType rank_b = rank_[index_b];
+    OutputType parent, child;
+    if (index_a == index_b) {
+      return;
+    } else if (rank_a < rank_b) {
+      parent = index_a;
+      child = index_b;
+    } else {
+      parent = index_b;
+      child = index_a;
+      rank_[parent]++;
+    }
+    forest_[child] = parent;
+  }
+};
+
+// Runs the ImageUnionFindFunctor on all pixels. Will require different CPU and
+// GPU implementations.
+template <typename Device, typename T>
+class ImageConnectedComponentsFunctor {
+ public:
+  using OutputType = typename BlockedImageUnionFindFunctor<T>::OutputType;
+
+  void operator()(OpKernelContext* ctx,
+                  typename TTypes<T, 3>::ConstTensor images,
+                  typename TTypes<OutputType, 3>::Tensor forest,
+                  typename TTypes<OutputType, 3>::Tensor rank);
+};
+
+// Fills a flat Tensor with indices from 0 to n - 1.
+template <typename Device>
+class TensorRangeFunctor {
+ public:
+  using OutputType = typename BlockedImageUnionFindFunctor<bool>::OutputType;
+
+  void operator()(const Device& device,
+                  typename TTypes<OutputType>::Flat tensor) {
+    tensor.device(device) = tensor.generate(TensorRangeGenerator());
+  }
+
+ private:
+  class TensorRangeGenerator {
+   public:
+    EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE OutputType
+    operator()(const Eigen::array<Eigen::DenseIndex, 1>& coords) const {
+      return coords[0];
+    }
+  };
+};
+
+// Given the union-find forest, generates the root index for each node. This
+// gives us arbitrary, usually non-consecutive ids for each connected component.
+// The ids are massaged in Python to get deterministic, consecutive ids.
+template <typename Device, typename T>
+class FindRootFunctor {
+ public:
+  using OutputType = typename BlockedImageUnionFindFunctor<T>::OutputType;
+
+  void operator()(const Device& device,
+                  typename TTypes<OutputType>::Flat component_ids,
+                  const T* images,
+                  const BlockedImageUnionFindFunctor<T>& union_find) {
+    component_ids.device(device) =
+        component_ids.generate(FindRootGenerator(images, union_find));
+  }
+
+ private:
+  class FindRootGenerator {
+    const T* const images_;
+    const BlockedImageUnionFindFunctor<T> union_find_;
+
+   public:
+    EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE FindRootGenerator(
+        const T* images, BlockedImageUnionFindFunctor<T> union_find)
+        : images_(images), union_find_(union_find) {}
+
+    EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE OutputType
+    operator()(const Eigen::array<Eigen::DenseIndex, 1>& coords) const {
+      if (is_nonzero<T>(images_[coords[0]])) {
+        // True pixels have an arbitrary segment id > 0. The segment ids will be
+        // made contiguous later.
+        return union_find_.find(coords[0]) + 1;
+      } else {
+        // False pixels have a segment of 0.
+        return 0;
+      }
+    }
+  };
+};
+
+}  // end namespace functor
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CONTRIB_IMAGE_KERNELS_SEGMENTATION_OPS_H_
diff --git a/tensorflow/contrib/image/ops/image_ops.cc b/tensorflow/contrib/image/ops/image_ops.cc
index 4527fdd87a8be3390fb0840410218ab74a27f0d2..68771b3d054a64ba94141c092e20df1ed6b2339b 100644
--- a/tensorflow/contrib/image/ops/image_ops.cc
+++ b/tensorflow/contrib/image/ops/image_ops.cc
@@ -98,4 +98,34 @@ col_to_row_match_indices: A vector of length num_columns, which is the number
   `col_to_row_match_indices[j]`.
 )doc");
 
+REGISTER_OP("ImageConnectedComponents")
+    .Input("image: dtype")
+    .Output("components: int64")
+    .Attr(
+        "dtype: {int64, int32, uint16, int16, uint8, int8, half, float, "
+        "double, bool, string}")
+    .SetShapeFn([](InferenceContext* c) {
+      return shape_inference::UnchangedShape(c);
+    })
+    .Doc(R"doc(
+Find the connected components of image(s).
+
+For each image (along the 0th axis), all connected components of adjacent pixels
+with the same non-zero value are detected and given unique ids.
+
+The returned `components` tensor has 0s for the zero pixels of `images`, and
+arbitrary nonzero ids for the connected components of nonzero values. Ids are
+unique across all of the images, and are in row-major order by the first pixel
+in the component.
+
+Uses union-find with union by rank but not path compression, giving a runtime of
+`O(n log n)`. See:
+    https://en.wikipedia.org/wiki/Disjoint-set_data_structure#Time_Complexity
+
+image: Image(s) with shape (N, H, W).
+components: Component ids for each pixel in "image". Same shape as "image". Zero
+    pixels all have an output of 0, and all components of adjacent pixels with
+    the same value are given consecutive ids, starting from 1.
+)doc");
+
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/image/ops/single_image_random_dot_stereograms_ops.cc b/tensorflow/contrib/image/ops/single_image_random_dot_stereograms_ops.cc
index f8b56ab1c5400694b3aa8d4a0c19c7769aa8cbce..1f41f243f2ebc0d1e884728defa160bf6d6c34ce 100755
--- a/tensorflow/contrib/image/ops/single_image_random_dot_stereograms_ops.cc
+++ b/tensorflow/contrib/image/ops/single_image_random_dot_stereograms_ops.cc
@@ -19,6 +19,10 @@ limitations under the License.
 
 namespace tensorflow {
 
+using shape_inference::DimensionHandle;
+using shape_inference::InferenceContext;
+using shape_inference::ShapeHandle;
+
 REGISTER_OP("SingleImageRandomDotStereograms")
     .Attr("T: {double,float,int64,int32}")
     .Input("depth_values: T")
@@ -37,6 +41,26 @@ REGISTER_OP("SingleImageRandomDotStereograms")
         "output_image_shape: shape = { dim {size:1024} dim {size: 768} dim "
         "{size: 1}}")
     .Attr("output_data_window: shape = { dim {size:1022} dim {size: 757}}")
+    .SetShapeFn([](InferenceContext* c) {
+      // Validate that the output_image_shape attr is correct.
+      // NOTE: The output_image_shape is [X, Y, C]
+      // while the output data is [Y, X, C] (or [H, W, C]).
+      // As a result, by default the output_image_shape has the value
+      // of [1024, 768, 1] but the output data will be [768, 1024, 1].
+      PartialTensorShape shape;
+      TF_RETURN_IF_ERROR(c->GetAttr("output_image_shape", &shape));
+      ShapeHandle output_image_shape;
+      TF_RETURN_IF_ERROR(
+          c->MakeShapeFromPartialTensorShape(shape, &output_image_shape));
+      DimensionHandle x_dim = c->Dim(output_image_shape, 0);
+      DimensionHandle y_dim = c->Dim(output_image_shape, 1);
+
+      int colors;
+      TF_RETURN_IF_ERROR(c->GetAttr("number_colors", &colors));
+
+      c->set_output(0, c->MakeShape({y_dim, x_dim, colors > 256? c->MakeDim(3) : c->MakeDim(1)}));
+      return Status::OK();
+    })
     .Doc(R"doc(
 Outputs a single image random dot stereogram for export via encode_PNG/JPG OP.
 
diff --git a/tensorflow/contrib/image/python/kernel_tests/distort_image_ops_test.py b/tensorflow/contrib/image/python/kernel_tests/distort_image_ops_test.py
index b85f19d29b79defa10493bdbaa4a1b237cb2a9ee..a495b58b7f6481d4cdedf73f23615d0390eb6a45 100644
--- a/tensorflow/contrib/image/python/kernel_tests/distort_image_ops_test.py
+++ b/tensorflow/contrib/image/python/kernel_tests/distort_image_ops_test.py
@@ -172,7 +172,7 @@ class AdjustValueInYiqTest(test_util.TensorFlowTestCase):
           raise AssertionError('Invalid test style: %s' % (test_style))
         y_np = self._adjust_value_in_yiq_np(x_np, scale)
         y_tf = self._adjust_value_in_yiq_tf(x_np, scale)
-        self.assertAllClose(y_tf, y_np, rtol=2e-5, atol=1e-5)
+        self.assertAllClose(y_tf, y_np, rtol=2e-4, atol=1e-4)
 
   def test_invalid_shapes(self):
     x_np = np.random.rand(2, 3) * 255.
@@ -237,7 +237,7 @@ class AdjustSaturationInYiqTest(test_util.TensorFlowTestCase):
             raise AssertionError('Invalid test style: %s' % (test_style))
           y_baseline = self._adjust_saturation_in_yiq_np(x_np, scale)
           y_tf = self._adjust_saturation_in_yiq_tf(x_np, scale)
-          self.assertAllClose(y_tf, y_baseline, rtol=2e-5, atol=1e-5)
+          self.assertAllClose(y_tf, y_baseline, rtol=2e-4, atol=1e-4)
 
   def test_invalid_shapes(self):
     x_np = np.random.rand(2, 3) * 255.
@@ -291,6 +291,9 @@ class AdjustHueInYiqBenchmark(test.Benchmark):
   def benchmark_adjust_hue_in_yiqCpuAll(self):
     self._benchmark_adjust_hue_in_yiq('/cpu:0', None)
 
+  def benchmark_adjust_hue_in_yiq_gpu_all(self):
+    self._benchmark_adjust_hue_in_yiq(test.gpu_device_name(), None)
+
 
 class AdjustSaturationInYiqBenchmark(test.Benchmark):
 
@@ -333,6 +336,9 @@ class AdjustSaturationInYiqBenchmark(test.Benchmark):
   def benchmark_adjust_saturation_in_yiq_cpu_all(self):
     self._benchmark_adjust_saturation_in_yiq('/cpu:0', None)
 
+  def benchmark_adjust_saturation_in_yiq_gpu_all(self):
+    self._benchmark_adjust_saturation_in_yiq(test.gpu_device_name(), None)
+
 
 if __name__ == '__main__':
   googletest.main()
diff --git a/tensorflow/contrib/image/python/kernel_tests/segmentation_test.py b/tensorflow/contrib/image/python/kernel_tests/segmentation_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..48066cbacefe6b229a1f485486f11e8b8af7704f
--- /dev/null
+++ b/tensorflow/contrib/image/python/kernel_tests/segmentation_test.py
@@ -0,0 +1,189 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for connected component analysis."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import logging
+
+import numpy as np
+
+from tensorflow.contrib.image.python.ops import image_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import googletest
+
+# Image for testing connected_components, with a single, winding component.
+SNAKE = np.asarray(
+    [[0, 0, 0, 0, 0, 0, 0, 0, 0],
+     [0, 1, 1, 1, 1, 0, 0, 0, 0],
+     [0, 0, 0, 0, 1, 1, 1, 1, 0],
+     [0, 0, 0, 0, 0, 0, 0, 1, 0],
+     [0, 1, 1, 1, 1, 1, 1, 1, 0],
+     [0, 1, 0, 0, 0, 0, 0, 0, 0],
+     [0, 1, 0, 1, 1, 1, 1, 1, 0],
+     [0, 1, 0, 0, 0, 0, 0, 1, 0],
+     [0, 1, 1, 1, 1, 1, 1, 1, 0],
+     [0, 0, 0, 0, 0, 0, 0, 0, 0]])  # pyformat: disable
+
+
+class SegmentationTest(test_util.TensorFlowTestCase):
+
+  def testDisconnected(self):
+    arr = math_ops.cast(
+        [[1, 0, 0, 1, 0, 0, 0, 0, 1],
+         [0, 1, 0, 0, 0, 1, 0, 1, 0],
+         [1, 0, 1, 0, 0, 0, 1, 0, 0],
+         [0, 0, 0, 0, 1, 0, 0, 0, 0],
+         [0, 0, 1, 0, 0, 0, 0, 0, 0]],
+        dtypes.bool)  # pyformat: disable
+    expected = (
+        [[1, 0, 0, 2, 0, 0, 0, 0, 3],
+         [0, 4, 0, 0, 0, 5, 0, 6, 0],
+         [7, 0, 8, 0, 0, 0, 9, 0, 0],
+         [0, 0, 0, 0, 10, 0, 0, 0, 0],
+         [0, 0, 11, 0, 0, 0, 0, 0, 0]])  # pyformat: disable
+    with self.test_session():
+      self.assertAllEqual(image_ops.connected_components(arr).eval(), expected)
+
+  def testSimple(self):
+    arr = [[0, 1, 0], [1, 1, 1], [0, 1, 0]]
+    with self.test_session():
+      # Single component with id 1.
+      self.assertAllEqual(
+          image_ops.connected_components(math_ops.cast(
+              arr, dtypes.bool)).eval(), arr)
+
+  def testSnake(self):
+    with self.test_session():
+      # Single component with id 1.
+      self.assertAllEqual(
+          image_ops.connected_components(math_ops.cast(
+              SNAKE, dtypes.bool)).eval(), SNAKE)
+
+  def testSnake_disconnected(self):
+    for i in range(SNAKE.shape[0]):
+      for j in range(SNAKE.shape[1]):
+        with self.test_session():
+          # If we disconnect any part of the snake except for the endpoints,
+          # there will be 2 components.
+          if SNAKE[i, j] and (i, j) not in [(1, 1), (6, 3)]:
+            disconnected_snake = SNAKE.copy()
+            disconnected_snake[i, j] = 0
+            components = image_ops.connected_components(
+                math_ops.cast(disconnected_snake, dtypes.bool)).eval()
+            self.assertEqual(components.max(), 2, 'disconnect (%d, %d)' % (i,
+                                                                           j))
+            bins = np.bincount(components.ravel())
+            # Nonzero number of pixels labeled 0, 1, or 2.
+            self.assertGreater(bins[0], 0)
+            self.assertGreater(bins[1], 0)
+            self.assertGreater(bins[2], 0)
+
+  def testMultipleImages(self):
+    images = [[[1, 1, 1, 1],
+               [1, 0, 0, 1],
+               [1, 0, 0, 1],
+               [1, 1, 1, 1]],
+              [[1, 0, 0, 1],
+               [0, 0, 0, 0],
+               [0, 0, 0, 0],
+               [1, 0, 0, 1]],
+              [[1, 1, 0, 1],
+               [0, 1, 1, 0],
+               [1, 0, 1, 0],
+               [0, 0, 1, 1]]]  # pyformat: disable
+    expected = [[[1, 1, 1, 1],
+                 [1, 0, 0, 1],
+                 [1, 0, 0, 1],
+                 [1, 1, 1, 1]],
+                [[2, 0, 0, 3],
+                 [0, 0, 0, 0],
+                 [0, 0, 0, 0],
+                 [4, 0, 0, 5]],
+                [[6, 6, 0, 7],
+                 [0, 6, 6, 0],
+                 [8, 0, 6, 0],
+                 [0, 0, 6, 6]]]  # pyformat: disable
+    with self.test_session():
+      self.assertAllEqual(
+          image_ops.connected_components(math_ops.cast(
+              images, dtypes.bool)).eval(), expected)
+
+  def testZeros(self):
+    with self.test_session():
+      self.assertAllEqual(
+          image_ops.connected_components(
+              array_ops.zeros((100, 20, 50), dtypes.bool)).eval(),
+          np.zeros((100, 20, 50)))
+
+  def testOnes(self):
+    with self.test_session():
+      self.assertAllEqual(
+          image_ops.connected_components(
+              array_ops.ones((100, 20, 50), dtypes.bool)).eval(),
+          np.tile(np.arange(100)[:, None, None] + 1, [1, 20, 50]))
+
+  def testOnes_small(self):
+    with self.test_session():
+      self.assertAllEqual(
+          image_ops.connected_components(array_ops.ones((3, 5),
+                                                        dtypes.bool)).eval(),
+          np.ones((3, 5)))
+
+  def testRandom_scipy(self):
+    np.random.seed(42)
+    images = np.random.randint(0, 2, size=(10, 100, 200)).astype(np.bool)
+    expected = connected_components_reference_implementation(images)
+    if expected is None:
+      return
+    with self.test_session():
+      self.assertAllEqual(
+          image_ops.connected_components(images).eval(), expected)
+
+
+def connected_components_reference_implementation(images):
+  try:
+    # pylint: disable=g-import-not-at-top
+    from scipy.ndimage import measurements
+  except ImportError:
+    logging.exception('Skipping test method because scipy could not be loaded')
+    return
+  image_or_images = np.asarray(images)
+  if len(image_or_images.shape) == 2:
+    images = image_or_images[None, :, :]
+  elif len(image_or_images.shape) == 3:
+    images = image_or_images
+  components = np.asarray([measurements.label(image)[0] for image in images])
+  # Get the count of nonzero ids for each image, and offset each image's nonzero
+  # ids using the cumulative sum.
+  num_ids_per_image = components.reshape(
+      [-1, components.shape[1] * components.shape[2]]).max(axis=-1)
+  positive_id_start_per_image = np.cumsum(num_ids_per_image)
+  for i in range(components.shape[0]):
+    new_id_start = positive_id_start_per_image[i - 1] if i > 0 else 0
+    components[i, components[i] > 0] += new_id_start
+  if len(image_or_images.shape) == 2:
+    return components[0, :, :]
+  else:
+    return components
+
+
+if __name__ == '__main__':
+  googletest.main()
diff --git a/tensorflow/contrib/image/python/kernel_tests/single_image_random_dot_stereograms_ops_test.py b/tensorflow/contrib/image/python/kernel_tests/single_image_random_dot_stereograms_ops_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..bf0c97245fc5c70469350ec66023f4d1474930e2
--- /dev/null
+++ b/tensorflow/contrib/image/python/kernel_tests/single_image_random_dot_stereograms_ops_test.py
@@ -0,0 +1,87 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for python single_image_random_dot_stereograms_ops."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from six.moves import xrange  # pylint: disable=redefined-builtin
+
+from tensorflow.contrib.image.python.ops.single_image_random_dot_stereograms \
+    import single_image_random_dot_stereograms
+from tensorflow.python.client import session
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import test_util
+from tensorflow.python.platform import googletest
+
+class SingleImageRandomDotStereogramsTest(test_util.TensorFlowTestCase):
+
+  def test_shape_function_default(self):
+    """
+    NOTE: The output_image_shape is [X, Y, C]
+    while the output data is [Y, X, C] (or [H, W, C]).
+    As a result, by default the output_image_shape has the value
+    of [1024, 768, 1], but the output data will be [768, 1024, 1].
+    """
+    x_np = [[1, 2, 3, 3, 2, 1],
+            [1, 2, 3, 4, 5, 2],
+            [1, 2, 3, 4, 5, 3],
+            [1, 2, 3, 4, 5, 4],
+            [6, 5, 4, 4, 5, 5]]
+    x_tf = constant_op.constant(x_np)
+    # By default [1024, 768, 1] => [768, 1024, 1].
+    sirds_1 = single_image_random_dot_stereograms(
+        x_tf,
+        convergence_dots_size=8,
+        number_colors=256,
+        normalize=True)
+    shape_1 = sirds_1.get_shape().as_list()
+    self.assertEqual(shape_1, [768, 1024, 1])
+    with self.test_session():
+      r_tf_1 = sirds_1.eval()
+      self.assertAllEqual(shape_1, r_tf_1.shape)
+
+    # If color > 256 then [1024, 768, 3] => [768, 1024, 3].
+    sirds_2 = single_image_random_dot_stereograms(
+        x_tf,
+        convergence_dots_size=8,
+        number_colors=512,
+        normalize=True)
+    shape_2 = sirds_2.get_shape().as_list()
+    self.assertEqual(shape_2, [768, 1024, 3])
+    with self.test_session():
+      r_tf_2 = sirds_2.eval()
+      self.assertAllEqual(shape_2, r_tf_2.shape)
+
+    # If explicitly set output_image_shape to [1200, 800, 1],
+    # then the output data should be [800, 1200, 1].
+    sirds_3 = single_image_random_dot_stereograms(
+        x_tf,
+        convergence_dots_size=8,
+        number_colors=256,
+        normalize=True,
+        output_image_shape=[1200, 800, 1])
+    shape_3 = sirds_3.get_shape().as_list()
+    self.assertEqual(shape_3, [800, 1200, 1])
+    with self.test_session():
+      r_tf_3 = sirds_3.eval()
+      self.assertAllEqual(shape_3, r_tf_3.shape)
+
+
+if __name__ == '__main__':
+  googletest.main()
diff --git a/tensorflow/contrib/image/python/ops/image_ops.py b/tensorflow/contrib/image/python/ops/image_ops.py
index faedee6f87772016561671bacd87f88657eafffb..63377ae50310db51a3111c5a6e00df7d75dccc0b 100644
--- a/tensorflow/contrib/image/python/ops/image_ops.py
+++ b/tensorflow/contrib/image/python/ops/image_ops.py
@@ -24,6 +24,7 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import resource_loader
@@ -34,6 +35,7 @@ _image_ops_so = loader.load_op_library(
 _IMAGE_DTYPES = set(
     [dtypes.uint8, dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64])
 
+ops.RegisterShape("ImageConnectedComponents")(common_shapes.call_cpp_shape_fn)
 ops.RegisterShape("ImageProjectiveTransform")(common_shapes.call_cpp_shape_fn)
 
 
@@ -395,4 +397,72 @@ def bipartite_match(distance_mat,
   return result
 
 
+def connected_components(images):
+  """Labels the connected components in a batch of images.
+
+  A component is a set of pixels in a single input image, which are all adjacent
+  and all have the same non-zero value. The components using a squared
+  connectivity of one (all True entries are joined with their neighbors above,
+  below, left, and right). Components across all images have consecutive ids 1
+  through n. Components are labeled according to the first pixel of the
+  component appearing in row-major order (lexicographic order by
+  image_index_in_batch, row, col). Zero entries all have an output id of 0.
+
+  This op is equivalent with `scipy.ndimage.measurements.label` on a 2D array
+  with the default structuring element (which is the connectivity used here).
+
+  Args:
+    images: A 2D (H, W) or 3D (N, H, W) Tensor of boolean image(s).
+
+  Returns:
+    Components with the same shape as `images`. False entries in `images` have
+    value 0, and all True entries map to a component id > 0.
+
+  Raises:
+    TypeError: if `images` is not 2D or 3D.
+  """
+  with ops.name_scope("connected_components"):
+    image_or_images = ops.convert_to_tensor(images, name="images")
+    if len(image_or_images.get_shape()) == 2:
+      images = image_or_images[None, :, :]
+    elif len(image_or_images.get_shape()) == 3:
+      images = image_or_images
+    else:
+      raise TypeError(
+          "images should have rank 2 (HW) or 3 (NHW). Static shape is %s" %
+          image_or_images.get_shape())
+    components = gen_image_ops.image_connected_components(images)
+
+    # TODO(ringwalt): Component id renaming should be done in the op, to avoid
+    # constructing multiple additional large tensors.
+    components_flat = array_ops.reshape(components, [-1])
+    unique_ids, id_index = array_ops.unique(components_flat)
+    id_is_zero = array_ops.where(math_ops.equal(unique_ids, 0))[:, 0]
+    # Map each nonzero id to consecutive values.
+    nonzero_consecutive_ids = math_ops.range(
+        array_ops.shape(unique_ids)[0] - array_ops.shape(id_is_zero)[0]) + 1
+
+    def no_zero():
+      # No need to insert a zero into the ids.
+      return nonzero_consecutive_ids
+
+    def has_zero():
+      # Insert a zero in the consecutive ids where zero appears in unique_ids.
+      # id_is_zero has length 1.
+      zero_id_ind = math_ops.to_int32(id_is_zero[0])
+      ids_before = nonzero_consecutive_ids[:zero_id_ind]
+      ids_after = nonzero_consecutive_ids[zero_id_ind:]
+      return array_ops.concat([ids_before, [0], ids_after], axis=0)
+
+    new_ids = control_flow_ops.cond(
+        math_ops.equal(array_ops.shape(id_is_zero)[0], 0), no_zero, has_zero)
+    components = array_ops.reshape(
+        array_ops.gather(new_ids, id_index), array_ops.shape(components))
+    if len(image_or_images.get_shape()) == 2:
+      return components[0, :, :]
+    else:
+      return components
+
+
 ops.NotDifferentiable("BipartiteMatch")
+ops.NotDifferentiable("ImageConnectedComponents")
diff --git a/tensorflow/contrib/keras/api/__init__.py b/tensorflow/contrib/keras/api/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..52e83069cb0c68b510da46149248369dce376647 100644
--- a/tensorflow/contrib/keras/api/__init__.py
+++ b/tensorflow/contrib/keras/api/__init__.py
@@ -0,0 +1,18 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
diff --git a/tensorflow/contrib/kernel_methods/BUILD b/tensorflow/contrib/kernel_methods/BUILD
index a2f320ab11291e4049c8367e1f133a4fbcb72a62..eff7dfeb4c1117e40f4faf43c5e92a52cffd6528 100644
--- a/tensorflow/contrib/kernel_methods/BUILD
+++ b/tensorflow/contrib/kernel_methods/BUILD
@@ -83,9 +83,11 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":kernel_methods",
+        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_for_generated_wrappers",
+        "//third_party/py/numpy",
     ],
 )
 
diff --git a/tensorflow/contrib/kernel_methods/python/losses.py b/tensorflow/contrib/kernel_methods/python/losses.py
index 208b0e1c9dbe93fb99e17e7be5ed5b6e30f4e201..f182fef067b7f523bc5ca63227265be40528b171 100644
--- a/tensorflow/contrib/kernel_methods/python/losses.py
+++ b/tensorflow/contrib/kernel_methods/python/losses.py
@@ -73,13 +73,13 @@ def sparse_multiclass_hinge_loss(
                                                               labels)) as scope:
 
     # Check logits Tensor has valid rank.
-    logits_shape = logits.get_shape()
-    logits_rank = logits_shape.ndims
+    logits_rank = logits.get_shape().ndims
     if logits_rank != 2:
       raise ValueError(
           'logits should have rank 2 ([batch_size, num_classes]). Given rank is'
           ' {}'.format(logits_rank))
-    batch_size, num_classes = logits_shape[0].value, logits_shape[1].value
+    logits_shape = array_ops.shape(logits)
+    batch_size, num_classes = logits_shape[0], logits_shape[1]
     logits = math_ops.to_float(logits)
 
     # Check labels have valid type.
diff --git a/tensorflow/contrib/kernel_methods/python/losses_test.py b/tensorflow/contrib/kernel_methods/python/losses_test.py
index 8a1a5ffe56ba283bfae514738fa87e4055f8934e..d38d8041ce1216dfb5af6e93984b35e71008610a 100644
--- a/tensorflow/contrib/kernel_methods/python/losses_test.py
+++ b/tensorflow/contrib/kernel_methods/python/losses_test.py
@@ -18,10 +18,13 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import numpy as np
+
 from tensorflow.contrib.kernel_methods.python import losses
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
+from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
@@ -114,6 +117,26 @@ class SparseMulticlassHingeLossTest(test.TestCase):
       loss = losses.sparse_multiclass_hinge_loss(labels, logits)
       self.assertAlmostEqual(loss.eval(), 0.0, 3)
 
+  def testUnknownShape(self):
+    """Result keeps same with `testZeroLossInt32Labels`"""
+    logits_np = np.array([[1.2, -1.4, -1.0],
+                          [1.4, 1.8, 4.0],
+                          [0.5, 1.8, -1.0]])
+    labels_np = np.array([0, 2, 1], dtype=np.int32)
+
+    logits_shapes = [[3, 3],  # batch_size, num_classes
+                     [None, 3],
+                     [3, None],
+                     [None, None]]
+
+    for batch_size, num_classes in logits_shapes:
+      with self.test_session():
+        logits = array_ops.placeholder(dtypes.float32, shape=(batch_size, num_classes))
+        labels = array_ops.placeholder(dtypes.int32, shape=(batch_size,))
+        loss = losses.sparse_multiclass_hinge_loss(labels, logits)
+        result = loss.eval(feed_dict={logits: logits_np, labels: labels_np})
+        self.assertAlmostEqual(result, 0.0, 3)
+
   def testCorrectPredictionsSomeClassesInsideMargin(self):
     """Loss is > 0 even if true class logits are higher than other classes."""
     with self.test_session():
diff --git a/tensorflow/contrib/kfac/examples/convnet.py b/tensorflow/contrib/kfac/examples/convnet.py
index 558bc294bc8ac129b3055ed46623c78a0d5a33e3..39d80addaac1fe855a37255b32bf4412b99df46a 100644
--- a/tensorflow/contrib/kfac/examples/convnet.py
+++ b/tensorflow/contrib/kfac/examples/convnet.py
@@ -286,7 +286,7 @@ def minimize_loss_distributed(task_id, num_worker_tasks, num_ps_tasks, master,
         damping=0.001,
         layer_collection=layer_collection,
         momentum=0.9)
-    inv_update_queue = oq.OpQueue(optimizer.inv_updates_dict.values())
+    inv_update_queue = oq.OpQueue(optimizer.inv_update_ops)
     sync_optimizer = tf.train.SyncReplicasOptimizer(
         opt=optimizer,
         replicas_to_aggregate=_num_gradient_tasks(num_worker_tasks))
diff --git a/tensorflow/contrib/kfac/examples/mlp.py b/tensorflow/contrib/kfac/examples/mlp.py
index 4275ceadc210ff471109b596e1c9aa260ce31ab5..0f0dbb53f45dfefe69aaa9e25caf6ba0a3cf449e 100644
--- a/tensorflow/contrib/kfac/examples/mlp.py
+++ b/tensorflow/contrib/kfac/examples/mlp.py
@@ -239,3 +239,85 @@ def train_mnist_multitower(data_dir,
       })
   return minimize(
       loss, accuracy, layer_collection, session_config=session_config)
+
+
+def train_mnist_estimator(data_dir, num_epochs, use_fake_data=False):
+  """Train an MLP on MNIST using tf.estimator.
+
+  Args:
+    data_dir: string. Directory to read MNIST examples from.
+    num_epochs: int. Number of passes to make over the training set.
+    use_fake_data: bool. If True, generate a synthetic dataset.
+
+  Returns:
+    accuracy of model on the final minibatch of training data.
+  """
+
+  # Load a dataset.
+  def input_fn():
+    tf.logging.info("Loading MNIST into memory.")
+    return mnist.load_mnist(
+        data_dir,
+        num_epochs=num_epochs,
+        batch_size=64,
+        flatten_images=True,
+        use_fake_data=use_fake_data)
+
+  def model_fn(features, labels, mode, params):
+    """Model function for MLP trained with K-FAC.
+
+    Args:
+      features: Tensor of shape [batch_size, input_size]. Input features.
+      labels: Tensor of shape [batch_size]. Target labels for training.
+      mode: tf.estimator.ModeKey. Must be TRAIN.
+      params: ignored.
+
+    Returns:
+      EstimatorSpec for training.
+
+    Raises:
+      ValueError: If 'mode' is anything other than TRAIN.
+    """
+    del params
+
+    if mode != tf.estimator.ModeKeys.TRAIN:
+      raise ValueError("Only training is supposed with this API.")
+
+    # Build a ConvNet.
+    layer_collection = lc.LayerCollection()
+    loss, accuracy = build_model(
+        features, labels, num_labels=10, layer_collection=layer_collection)
+
+    # Train with K-FAC.
+    global_step = tf.train.get_or_create_global_step()
+    optimizer = opt.KfacOptimizer(
+        learning_rate=tf.train.exponential_decay(
+            0.00002, global_step, 10000, 0.5, staircase=True),
+        cov_ema_decay=0.95,
+        damping=0.0001,
+        layer_collection=layer_collection,
+        momentum=0.99)
+
+    # Run cov_update_op every step. Run 1 inv_update_ops per step.
+    cov_update_op = optimizer.cov_update_op
+    inv_update_op = tf.group(
+        tf.contrib.kfac.utils.batch_execute(
+            global_step, optimizer.inv_update_thunks, batch_size=1))
+    with tf.control_dependencies([cov_update_op, inv_update_op]):
+      train_op = optimizer.minimize(loss, global_step=global_step)
+
+    # Print metrics every 5 sec.
+    hooks = [
+        tf.train.LoggingTensorHook(
+            {
+                "loss": loss,
+                "accuracy": accuracy
+            }, every_n_secs=5),
+    ]
+    return tf.estimator.EstimatorSpec(
+        mode=mode, loss=loss, train_op=train_op, training_hooks=hooks)
+
+  # Train until input_fn() is empty with Estimator. This is a prerequisite for
+  # TPU compatibility.
+  estimator = tf.estimator.Estimator(model_fn=model_fn)
+  estimator.train(input_fn=input_fn)
diff --git a/tensorflow/contrib/kfac/examples/mlp_mnist_main.py b/tensorflow/contrib/kfac/examples/mlp_mnist_main.py
index b318c71a568be2d717745579df24134ceb3b6a0b..9c34ade1d2018135b3636fddb9dcc65839cd59de 100644
--- a/tensorflow/contrib/kfac/examples/mlp_mnist_main.py
+++ b/tensorflow/contrib/kfac/examples/mlp_mnist_main.py
@@ -33,7 +33,11 @@ FLAGS = None
 
 def main(argv):
   _ = argv
-  if FLAGS.num_towers > 1:
+  if FLAGS.use_estimator:
+    if FLAGS.num_towers != 1:
+      raise ValueError("Only 1 device supported in tf.estimator example.")
+    mlp.train_mnist_estimator(FLAGS.data_dir, num_epochs=200)
+  elif FLAGS.num_towers > 1:
     mlp.train_mnist_multitower(
         FLAGS.data_dir, num_epochs=200, num_towers=FLAGS.num_towers)
   else:
@@ -52,5 +56,9 @@ if __name__ == "__main__":
       type=int,
       default=1,
       help="Number of CPUs to split minibatch across.")
+  parser.add_argument(
+      "--use_estimator",
+      action="store_true",
+      help="Use tf.estimator API to train.")
   FLAGS, unparsed = parser.parse_known_args()
   tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/contrib/kfac/examples/mnist.py b/tensorflow/contrib/kfac/examples/mnist.py
index cf92c909f4b5201bc0ffda5703136f46c7058ec6..547c4ab25d589192f2a5b65987be3b05128fe298 100644
--- a/tensorflow/contrib/kfac/examples/mnist.py
+++ b/tensorflow/contrib/kfac/examples/mnist.py
@@ -63,7 +63,7 @@ def load_mnist(data_dir,
     images = mnist_data.train.images
     labels = mnist_data.train.labels
 
-  dataset = tf.contrib.data.Dataset.from_tensor_slices((np.asarray(
+  dataset = tf.data.Dataset.from_tensor_slices((np.asarray(
       images, dtype=np.float32), np.asarray(labels, dtype=np.int64)))
   return (dataset.repeat(num_epochs).shuffle(num_examples).batch(batch_size)
           .make_one_shot_iterator().get_next())
diff --git a/tensorflow/contrib/kfac/examples/tests/convnet_test.py b/tensorflow/contrib/kfac/examples/tests/convnet_test.py
index 3c98c54ef6cbd527aa0035e0b6f40be961c6308d..8d86c2bb5150cd4bc8a2b21ba050e904929e0fe9 100644
--- a/tensorflow/contrib/kfac/examples/tests/convnet_test.py
+++ b/tensorflow/contrib/kfac/examples/tests/convnet_test.py
@@ -96,7 +96,7 @@ class ConvNetTest(tf.test.TestCase):
     """
     x = np.asarray([[1.], [2.]]).astype(np.float32)
     y = np.asarray([1., 2.]).astype(np.float32)
-    x, y = (tf.contrib.data.Dataset.from_tensor_slices((x, y))
+    x, y = (tf.data.Dataset.from_tensor_slices((x, y))
             .repeat(100).batch(2).make_one_shot_iterator().get_next())
     w = tf.get_variable("w", shape=[1, 1], initializer=tf.zeros_initializer())
     y_hat = tf.matmul(x, w)
diff --git a/tensorflow/contrib/kfac/examples/tests/mlp_test.py b/tensorflow/contrib/kfac/examples/tests/mlp_test.py
index 34a942d27f64e2583c686c2ba3240bc636ed918b..22da6c29f1b364d94432315988d844db9b95ec28 100644
--- a/tensorflow/contrib/kfac/examples/tests/mlp_test.py
+++ b/tensorflow/contrib/kfac/examples/tests/mlp_test.py
@@ -53,6 +53,11 @@ class MlpTest(tf.test.TestCase):
       mlp.train_mnist_multitower(
           data_dir=None, num_epochs=1, num_towers=2, use_fake_data=True)
 
+  def testTrainMnistEstimator(self):
+    with tf.Graph().as_default():
+      # Ensure model training doesn't crash.
+      mlp.train_mnist_estimator(data_dir=None, num_epochs=1, use_fake_data=True)
+
 
 if __name__ == "__main__":
   tf.test.main()
diff --git a/tensorflow/contrib/kfac/python/kernel_tests/BUILD b/tensorflow/contrib/kfac/python/kernel_tests/BUILD
index 95fba59e3c96ae3c69e0b154740785b0d2bcb3c9..f4ed978174a9ddd8b54a88e60bfb48a67a2e76d2 100644
--- a/tensorflow/contrib/kfac/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/kfac/python/kernel_tests/BUILD
@@ -17,12 +17,17 @@ py_test(
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
+        "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:init_ops",
+        "//tensorflow/python:linalg_ops",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:random_ops",
+        "//tensorflow/python:training",
         "//tensorflow/python:variable_scope",
+        "//tensorflow/python:variables",
+        "//third_party/py/numpy",
     ],
 )
 
@@ -110,12 +115,15 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         "//tensorflow/contrib/kfac/python/ops:utils",
+        "//tensorflow/contrib/tpu",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:linalg_ops",
         "//tensorflow/python:random_seed",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python:variables",
         "//third_party/py/numpy",
     ],
 )
diff --git a/tensorflow/contrib/kfac/python/kernel_tests/estimator_test.py b/tensorflow/contrib/kfac/python/kernel_tests/estimator_test.py
index 9b28c45c7263208d21b1514ae5f05b7e81e315a3..bfdb69ad02caaa57827e0ae6b3c9fc0d0ed03754 100644
--- a/tensorflow/contrib/kfac/python/kernel_tests/estimator_test.py
+++ b/tensorflow/contrib/kfac/python/kernel_tests/estimator_test.py
@@ -18,6 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import numpy as np
+
 from tensorflow.contrib.kfac.python.ops import estimator
 from tensorflow.contrib.kfac.python.ops import layer_collection as lc
 from tensorflow.contrib.kfac.python.ops import utils
@@ -25,11 +27,15 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
+from tensorflow.python.training import training_util
 
 _ALL_ESTIMATION_MODES = ["gradients", "empirical", "curvature_prop", "exact"]
 
@@ -119,6 +125,114 @@ class EstimatorTest(test.TestCase):
         estimator.FisherEstimator([self.weights], 0.1, 0.2,
                                   self.layer_collection, mode)
 
+  def test_cov_update_thunks(self):
+    """Ensures covariance update ops run once per global_step."""
+    with self._graph.as_default(), self.test_session() as sess:
+      fisher_estimator = estimator.FisherEstimator(
+          variables=[self.weights],
+          layer_collection=self.layer_collection,
+          cov_ema_decay=0.0,
+          damping=0.0)
+
+      # Construct an op that executes one covariance update per step.
+      global_step = training_util.get_or_create_global_step()
+      cov_matrices = [
+          fisher_factor.get_cov()
+          for fisher_factor in self.layer_collection.get_factors()
+      ]
+      cov_update_op_thunks = fisher_estimator.cov_update_thunks
+      cov_update_op = control_flow_ops.case(
+          [(math_ops.equal(global_step, i), thunk)
+           for i, thunk in enumerate(cov_update_op_thunks)])
+      increment_global_step = global_step.assign_add(1)
+
+      sess.run(variables.global_variables_initializer())
+      initial_cov_values = sess.run(cov_matrices)
+
+      # Ensure there's one update per covariance matrix.
+      self.assertEqual(len(cov_matrices), len(cov_update_op_thunks))
+
+      # Test is no-op if only 1 covariance matrix.
+      assert len(cov_matrices) > 1
+
+      for i in range(len(cov_matrices)):
+        # Compare new and old covariance values
+        new_cov_values = sess.run(cov_matrices)
+        is_cov_equal = [
+            np.allclose(initial_cov_value, new_cov_value)
+            for (initial_cov_value,
+                 new_cov_value) in zip(initial_cov_values, new_cov_values)
+        ]
+        num_cov_equal = sum(is_cov_equal)
+
+        # Ensure exactly one covariance matrix changes per step.
+        self.assertEqual(num_cov_equal, len(cov_matrices) - i)
+
+        # Run all covariance update ops.
+        sess.run(cov_update_op)
+        sess.run(increment_global_step)
+
+  def test_inv_update_thunks(self):
+    """Ensures inverse update ops run once per global_step."""
+    with self._graph.as_default(), self.test_session() as sess:
+      fisher_estimator = estimator.FisherEstimator(
+          variables=[self.weights],
+          layer_collection=self.layer_collection,
+          cov_ema_decay=0.0,
+          damping=0.0)
+
+      # Construct op that updates one inverse per global step.
+      global_step = training_util.get_or_create_global_step()
+      inv_matrices = [
+          matrix
+          for fisher_factor in self.layer_collection.get_factors()
+          for matrix in fisher_factor._inverses_by_damping.values()
+      ]
+      inv_update_op_thunks = fisher_estimator.inv_update_thunks
+      inv_update_op = control_flow_ops.case(
+          [(math_ops.equal(global_step, i), thunk)
+           for i, thunk in enumerate(inv_update_op_thunks)])
+      increment_global_step = global_step.assign_add(1)
+
+      sess.run(variables.global_variables_initializer())
+      initial_inv_values = sess.run(inv_matrices)
+
+      # Ensure there's one update per inverse matrix. This is true as long as
+      # there's no fan-in/fan-out or parameter re-use.
+      self.assertEqual(len(inv_matrices), len(inv_update_op_thunks))
+
+      # Test is no-op if only 1 invariance matrix.
+      assert len(inv_matrices) > 1
+
+      # Assign each covariance matrix a value other than the identity. This
+      # ensures that the inverse matrices are updated to something different as
+      # well.
+      cov_matrices = [
+          fisher_factor.get_cov()
+          for fisher_factor in self.layer_collection.get_factors()
+      ]
+      sess.run([
+          cov_matrix.assign(2 * linalg_ops.eye(int(cov_matrix.shape[0])))
+          for cov_matrix in cov_matrices
+      ])
+
+      for i in range(len(inv_matrices)):
+        # Compare new and old inverse values
+        new_inv_values = sess.run(inv_matrices)
+        is_inv_equal = [
+            np.allclose(initial_inv_value, new_inv_value)
+            for (initial_inv_value,
+                 new_inv_value) in zip(initial_inv_values, new_inv_values)
+        ]
+        num_inv_equal = sum(is_inv_equal)
+
+        # Ensure exactly one inverse matrix changes per step.
+        self.assertEqual(num_inv_equal, len(inv_matrices) - i)
+
+        # Run all inverse update ops.
+        sess.run(inv_update_op)
+        sess.run(increment_global_step)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py b/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py
index 5f2b5c6cace9cd18f4cc5590ff55a9b39680a381..82accd57f0c37d140238f1884fce956654d14227 100644
--- a/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py
+++ b/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py
@@ -40,6 +40,21 @@ def _make_psd(dim):
   return array_ops.constant(mat)
 
 
+class UtilsTest(test.TestCase):
+
+  def testComputePiTracenorm(self):
+    with ops.Graph().as_default(), self.test_session() as sess:
+      random_seed.set_random_seed(200)
+      left_factor = array_ops.diag([1., 2., 0., 1.])
+      right_factor = array_ops.ones([2., 2.])
+
+      # pi is the sqrt of the left trace norm divided by the right trace norm
+      pi = fb.compute_pi_tracenorm(left_factor, right_factor)
+
+      pi_val = sess.run(pi)
+      self.assertEqual(1., pi_val)
+
+
 class FullFBTest(test.TestCase):
 
   def testFullFBInitSingleTensor(self):
@@ -301,8 +316,7 @@ class FullyConnectedDiagonalFB(test.TestCase):
     multiply_result_big, multiply_inverse_result_big = self.runFisherBlockOps(
         self.w, [self.inputs], [self.outputs], [self.output_grads])
     multiply_result_small, multiply_inverse_result_small = (
-        self.runFisherBlockOps(self.w,
-                               np.split(self.inputs, 2),
+        self.runFisherBlockOps(self.w, np.split(self.inputs, 2),
                                np.split(self.outputs, 2),
                                np.split(self.output_grads, 2)))
 
@@ -584,8 +598,7 @@ class ConvDiagonalFBTest(test.TestCase):
     multiply_result_big, multiply_inverse_result_big = self.runFisherBlockOps(
         self.w, [self.inputs], [self.outputs], [self.output_grads])
     multiply_result_small, multiply_inverse_result_small = (
-        self.runFisherBlockOps(self.w,
-                               np.split(self.inputs, 2),
+        self.runFisherBlockOps(self.w, np.split(self.inputs, 2),
                                np.split(self.outputs, 2),
                                np.split(self.output_grads, 2)))
 
@@ -608,8 +621,9 @@ class ConvDiagonalFBTest(test.TestCase):
         self.kernel_size, self.kernel_size, self.input_channels + 1,
         self.output_channels
     ])
-    expected_result = (expected_result[:, :, 0:-1, :], np.reshape(
-        expected_result[:, :, -1, :], [self.output_channels]))
+    expected_result = (expected_result[:, :, 0:-1, :],
+                       np.reshape(expected_result[:, :, -1, :],
+                                  [self.output_channels]))
 
     self.assertEqual(len(result), 2)
     self.assertAllClose(expected_result[0], result[0])
@@ -692,8 +706,8 @@ class ConvKFCBasicFBTest(test.TestCase):
       sess.run(block._input_factor.make_inverse_update_ops())
       sess.run(block._output_factor.make_inverse_update_ops())
 
-      vector = (np.arange(1, 15).reshape(7, 2).astype(np.float32), np.arange(
-          2, 4).reshape(2, 1).astype(np.float32))
+      vector = (np.arange(1, 15).reshape(7, 2).astype(np.float32),
+                np.arange(2, 4).reshape(2, 1).astype(np.float32))
       output = block.multiply_inverse((array_ops.constant(vector[0]),
                                        array_ops.constant(vector[1])))
 
@@ -776,11 +790,50 @@ class ConvKFCBasicFBTest(test.TestCase):
       self.assertAllClose(output_flat, explicit)
 
 
+class FullyConnectedSeriesFBTest(test.TestCase):
+
+  def testFullyConnectedSeriesFBInit(self):
+    with ops.Graph().as_default():
+      random_seed.set_random_seed(200)
+      inputs = array_ops.constant([1., 2.])
+      outputs = array_ops.constant([3., 4.])
+      block = fb.FullyConnectedSeriesFB(
+          lc.LayerCollection(), inputs=[inputs], outputs=[outputs])
+      self.assertAllEqual([outputs], block.tensors_to_compute_grads())
+
+  def testInstantiateFactorsHasBias(self):
+    with ops.Graph().as_default():
+      random_seed.set_random_seed(200)
+      inputs = array_ops.constant([[1., 2.], [3., 4.]])
+      outputs = array_ops.constant([[3., 4.], [5., 6.]])
+      block = fb.FullyConnectedSeriesFB(
+          lc.LayerCollection(),
+          inputs=[inputs],
+          outputs=[outputs],
+          has_bias=True)
+      grads = outputs**2
+      block.instantiate_factors(((grads,),), 0.5)
+
+  def testInstantiateFactorsNoBias(self):
+    with ops.Graph().as_default():
+      random_seed.set_random_seed(200)
+      inputs = array_ops.constant([[1., 2.], [3., 4.]])
+      outputs = array_ops.constant([[3., 4.], [5., 6.]])
+      block = fb.FullyConnectedSeriesFB(
+          lc.LayerCollection(),
+          inputs=[inputs],
+          outputs=[outputs],
+          has_bias=False)
+      grads = outputs**2
+      block.instantiate_factors(((grads,),), 0.5)
+
+
 def as_tensors(tensor_or_tuple):
   """Converts a potentially nested tuple of np.array to Tensors."""
   if isinstance(tensor_or_tuple, (tuple, list)):
     return tuple(as_tensors(t) for t in tensor_or_tuple)
   return ops.convert_to_tensor(tensor_or_tuple)
 
+
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py b/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py
index 5e2ce5a3096f5b523fafad56be742154d79e4803..753378d9f4a0d8762bafbee2ec27d6c71783dda1 100644
--- a/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py
+++ b/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py
@@ -35,18 +35,27 @@ from tensorflow.python.platform import test
 
 class MaybeColocateTest(test.TestCase):
 
+  def setUp(self):
+    self._colocate_cov_ops_with_inputs = ff.COLOCATE_COV_OPS_WITH_INPUTS
+
+  def tearDown(self):
+    ff.set_global_constants(
+        colocate_cov_ops_with_inputs=self._colocate_cov_ops_with_inputs)
+
   def testFalse(self):
+    ff.set_global_constants(colocate_cov_ops_with_inputs=False)
     with tf_ops.Graph().as_default():
       a = constant_op.constant([2.0], name='a')
-      with ff._maybe_colocate_with(a, False):
+      with ff.maybe_colocate_with(a):
         b = constant_op.constant(3.0, name='b')
       self.assertEqual([b'loc:@a'], a.op.colocation_groups())
       self.assertEqual([b'loc:@b'], b.op.colocation_groups())
 
   def testTrue(self):
+    ff.set_global_constants(colocate_cov_ops_with_inputs=True)
     with tf_ops.Graph().as_default():
       a = constant_op.constant([2.0], name='a')
-      with ff._maybe_colocate_with(a, True):
+      with ff.maybe_colocate_with(a):
         b = constant_op.constant(3.0, name='b')
       self.assertEqual([b'loc:@a'], a.op.colocation_groups())
       self.assertEqual([b'loc:@a'], b.op.colocation_groups())
@@ -67,12 +76,19 @@ class FisherFactorTestingDummy(ff.FisherFactor):
   def _num_sources(self):
     return 1
 
+  @property
+  def _dtype(self):
+    return dtypes.float32
+
   def _compute_new_cov(self):
     raise NotImplementedError
 
   def instantiate_covariance(self):
     pass
 
+  def make_inverse_update_ops(self):
+    return []
+
 
 class InverseProvidingFactorTestingDummy(ff.InverseProvidingFactor):
   """Dummy class to test the non-abstract methods on ff.InverseProvidingFactor.
@@ -94,6 +110,10 @@ class InverseProvidingFactorTestingDummy(ff.InverseProvidingFactor):
   def _num_sources(self):
     return 1
 
+  @property
+  def _dtype(self):
+    return dtypes.float32
+
   def _compute_new_cov(self):
     raise NotImplementedError
 
@@ -109,7 +129,7 @@ class NumericalUtilsTest(test.TestCase):
       random_seed.set_random_seed(200)
 
       x = npr.randn(100, 3)
-      cov = ff._compute_cov(array_ops.constant(x))
+      cov = ff.compute_cov(array_ops.constant(x))
       np_cov = np.dot(x.T, x) / x.shape[0]
 
       self.assertAllClose(sess.run(cov), np_cov)
@@ -121,7 +141,7 @@ class NumericalUtilsTest(test.TestCase):
 
       normalizer = 10.
       x = npr.randn(100, 3)
-      cov = ff._compute_cov(array_ops.constant(x), normalizer)
+      cov = ff.compute_cov(array_ops.constant(x), normalizer=normalizer)
       np_cov = np.dot(x.T, x) / normalizer
 
       self.assertAllClose(sess.run(cov), np_cov)
@@ -132,7 +152,7 @@ class NumericalUtilsTest(test.TestCase):
 
       m, n = 3, 4
       a = npr.randn(m, n)
-      a_homog = ff._append_homog(array_ops.constant(a))
+      a_homog = ff.append_homog(array_ops.constant(a))
       np_result = np.hstack([a, np.ones((m, 1))])
 
       self.assertAllClose(sess.run(a_homog), np_result)
@@ -267,13 +287,13 @@ class InverseProvidingFactorTest(test.TestCase):
       for i in range(1, ff.EIGENVALUE_DECOMPOSITION_THRESHOLD + 1):
         factor.register_damped_inverse(1. / i)
       ops = factor.make_inverse_update_ops()
-      self.assertEqual(ff.EIGENVALUE_DECOMPOSITION_THRESHOLD, len(ops))
+      self.assertEqual(1, len(ops))
 
       sess.run(tf_variables.global_variables_initializer())
       new_invs = []
+      sess.run(ops)
       for i in range(1, ff.EIGENVALUE_DECOMPOSITION_THRESHOLD + 1):
         # The inverse op will assign the damped inverse of cov to the inv var.
-        sess.run(ops[i - 1])
         new_invs.append(sess.run(factor._inverses_by_damping[1. / i]))
       # We want to see that the new invs are all different from each other.
       for i in range(len(new_invs)):
@@ -331,6 +351,16 @@ class FullFactorTest(test.TestCase):
       factor = ff.FullFactor((tensor,), 32)
       self.assertEqual([6, 6], factor.get_cov().get_shape().as_list())
 
+  def testFullFactorInitFloat64(self):
+    with tf_ops.Graph().as_default():
+      dtype = dtypes.float64_ref
+      random_seed.set_random_seed(200)
+      tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c')
+      factor = ff.FullFactor((tensor,), 32)
+      cov = factor.get_cov()
+      self.assertEqual(cov.dtype, dtype)
+      self.assertEqual([6, 6], cov.get_shape().as_list())
+
   def testMakeCovarianceUpdateOp(self):
     with tf_ops.Graph().as_default(), self.test_session() as sess:
       random_seed.set_random_seed(200)
@@ -351,6 +381,16 @@ class NaiveDiagonalFactorTest(test.TestCase):
       factor = ff.NaiveDiagonalFactor((tensor,), 32)
       self.assertEqual([6, 1], factor.get_cov().get_shape().as_list())
 
+  def testNaiveDiagonalFactorInitFloat64(self):
+    with tf_ops.Graph().as_default():
+      dtype = dtypes.float64_ref
+      random_seed.set_random_seed(200)
+      tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c')
+      factor = ff.NaiveDiagonalFactor((tensor,), 32)
+      cov = factor.get_cov()
+      self.assertEqual(cov.dtype, dtype)
+      self.assertEqual([6, 1], cov.get_shape().as_list())
+
   def testMakeCovarianceUpdateOp(self):
     with tf_ops.Graph().as_default(), self.test_session() as sess:
       random_seed.set_random_seed(200)
@@ -364,18 +404,25 @@ class NaiveDiagonalFactorTest(test.TestCase):
 
 class FullyConnectedKroneckerFactorTest(test.TestCase):
 
-  def _testFullyConnectedKroneckerFactorInit(self, has_bias, final_shape):
+  def _testFullyConnectedKroneckerFactorInit(self,
+                                             has_bias,
+                                             final_shape,
+                                             dtype=dtypes.float32_ref):
     with tf_ops.Graph().as_default():
       random_seed.set_random_seed(200)
-      tensor = array_ops.ones((2, 3), name='a/b/c')
+      tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c')
       factor = ff.FullyConnectedKroneckerFactor((tensor,), has_bias=has_bias)
-      self.assertEqual(final_shape, factor.get_cov().get_shape().as_list())
+      cov = factor.get_cov()
+      self.assertEqual(cov.dtype, dtype)
+      self.assertEqual(final_shape, cov.get_shape().as_list())
 
   def testFullyConnectedKroneckerFactorInitNoBias(self):
-    self._testFullyConnectedKroneckerFactorInit(False, [3, 3])
+    for dtype in (dtypes.float32_ref, dtypes.float64_ref):
+      self._testFullyConnectedKroneckerFactorInit(False, [3, 3], dtype=dtype)
 
   def testFullyConnectedKroneckerFactorInitWithBias(self):
-    self._testFullyConnectedKroneckerFactorInit(True, [4, 4])
+    for dtype in (dtypes.float32_ref, dtypes.float64_ref):
+      self._testFullyConnectedKroneckerFactorInit(True, [4, 4], dtype=dtype)
 
   def testMakeCovarianceUpdateOpWithBias(self):
     with tf_ops.Graph().as_default(), self.test_session() as sess:
@@ -418,6 +465,18 @@ class ConvInputKroneckerFactorTest(test.TestCase):
       self.assertEqual([1 * 2 * 3 + 1, 1 * 2 * 3 + 1],
                        factor.get_cov().get_shape().as_list())
 
+  def testConvInputKroneckerFactorInitFloat64(self):
+    with tf_ops.Graph().as_default():
+      dtype = dtypes.float64_ref
+      random_seed.set_random_seed(200)
+      tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c')
+      factor = ff.ConvInputKroneckerFactor(
+          tensor, (1, 2, 3, 4), 3, 2, has_bias=True)
+      cov = factor.get_cov()
+      self.assertEqual(cov.dtype, dtype)
+      self.assertEqual([1 * 2 * 3 + 1, 1 * 2 * 3 + 1],
+                       cov.get_shape().as_list())
+
   def testMakeCovarianceUpdateOpWithBias(self):
     with tf_ops.Graph().as_default(), self.test_session() as sess:
       random_seed.set_random_seed(200)
@@ -453,6 +512,16 @@ class ConvOutputKroneckerFactorTest(test.TestCase):
       factor = ff.ConvOutputKroneckerFactor((tensor,))
       self.assertEqual([5, 5], factor.get_cov().get_shape().as_list())
 
+  def testConvOutputKroneckerFactorInitFloat64(self):
+    with tf_ops.Graph().as_default():
+      dtype = dtypes.float64_ref
+      random_seed.set_random_seed(200)
+      tensor = array_ops.ones((2, 3, 4, 5), dtype=dtype, name='a/b/c')
+      factor = ff.ConvOutputKroneckerFactor((tensor,))
+      cov = factor.get_cov()
+      self.assertEqual(cov.dtype, dtype)
+      self.assertEqual([5, 5], cov.get_shape().as_list())
+
   def testConvOutputKroneckerFactorInitNotEnoughDims(self):
     with tf_ops.Graph().as_default():
       random_seed.set_random_seed(200)
@@ -471,5 +540,49 @@ class ConvOutputKroneckerFactorTest(test.TestCase):
       self.assertAllClose([[43, 46.5], [46.5, 51.5]], new_cov)
 
 
+class FullyConnectedMultiKFTest(test.TestCase):
+
+  def testFullyConnectedMultiKFInit(self):
+    with tf_ops.Graph().as_default():
+      random_seed.set_random_seed(200)
+      tensor = array_ops.ones((2, 3), name='a/b/c')
+      tensor_list = [tensor]
+      factor = ff.FullyConnectedMultiKF((tensor_list,), has_bias=False)
+      self.assertEqual([3, 3], factor.get_cov().get_shape().as_list())
+
+  def testFullyConnectedMultiKFInitFloat64(self):
+    with tf_ops.Graph().as_default():
+      dtype = dtypes.float64_ref
+      random_seed.set_random_seed(200)
+      tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c')
+      tensor_list = [tensor]
+      factor = ff.FullyConnectedMultiKF((tensor_list,), has_bias=False)
+      cov = factor.get_cov()
+      self.assertEqual(cov.dtype, dtype)
+      self.assertEqual([3, 3], cov.get_shape().as_list())
+
+  def testMakeCovarianceUpdateOpWithBias(self):
+    with tf_ops.Graph().as_default(), self.test_session() as sess:
+      random_seed.set_random_seed(200)
+      tensor = array_ops.constant([[1., 2.], [3., 4.]], name='a/b/c')
+      tensor_list = [tensor]
+      factor = ff.FullyConnectedMultiKF((tensor_list,), has_bias=True)
+
+      sess.run(tf_variables.global_variables_initializer())
+      new_cov = sess.run(factor.make_covariance_update_op(.5))
+      self.assertAllClose([[3, 3.5, 1], [3.5, 5.5, 1.5], [1, 1.5, 1]], new_cov)
+
+  def testMakeCovarianceUpdateOpNoBias(self):
+    with tf_ops.Graph().as_default(), self.test_session() as sess:
+      random_seed.set_random_seed(200)
+      tensor = array_ops.constant([[1., 2.], [3., 4.]], name='a/b/c')
+      tensor_list = [tensor]
+      factor = ff.FullyConnectedMultiKF((tensor_list,))
+
+      sess.run(tf_variables.global_variables_initializer())
+      new_cov = sess.run(factor.make_covariance_update_op(.5))
+      self.assertAllClose([[3, 3.5], [3.5, 5.5]], new_cov)
+
+
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/contrib/kfac/python/kernel_tests/loss_functions_test.py b/tensorflow/contrib/kfac/python/kernel_tests/loss_functions_test.py
index 39ce3e9337157c8206107bc40c489e44019743ab..63f45ea55b3d1f65a113e8c81a822a08613672df 100644
--- a/tensorflow/contrib/kfac/python/kernel_tests/loss_functions_test.py
+++ b/tensorflow/contrib/kfac/python/kernel_tests/loss_functions_test.py
@@ -114,5 +114,76 @@ class CategoricalLogitsNegativeLogProbLossTest(test.TestCase):
       self.assertEqual(loss.num_registered_minibatches, num_towers)
 
 
+class OnehotCategoricalLogitsNegativeLogProbLossTest(test.TestCase):
+
+  def testSample(self):
+    """Ensure samples can be drawn."""
+    with ops.Graph().as_default(), self.test_session() as sess:
+      logits = np.asarray([
+          [0., 0., 0.],  #
+          [1., -1., 0.]
+      ]).astype(np.float32)
+      loss = loss_functions.OnehotCategoricalLogitsNegativeLogProbLoss(
+          array_ops.constant(logits))
+      sample = loss.sample(42)
+      sample = sess.run(sample)
+      self.assertEqual(sample.shape, (2, 3))
+
+  def testEvaluateOnTargets(self):
+    """Ensure log probability can be evaluated correctly."""
+    with ops.Graph().as_default(), self.test_session() as sess:
+      logits = np.asarray([
+          [0., 0., 0.],  #
+          [1., -1., 0.]
+      ]).astype(np.float32)
+      targets = np.asarray([2, 1]).astype(np.int32)
+      loss = loss_functions.OnehotCategoricalLogitsNegativeLogProbLoss(
+          array_ops.constant(logits), targets=array_ops.one_hot(targets, 3))
+      neg_log_prob = loss.evaluate()
+      neg_log_prob = sess.run(neg_log_prob)
+
+      # Calculate explicit log probability of targets.
+      probs = np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True)
+      log_probs = np.log([
+          probs[0, targets[0]],  #
+          probs[1, targets[1]]
+      ])
+      expected_log_prob = np.sum(log_probs)
+
+      self.assertAllClose(neg_log_prob, -expected_log_prob)
+
+  def testEvaluateOnSample(self):
+    """Ensure log probability of a sample can be drawn."""
+    with ops.Graph().as_default(), self.test_session() as sess:
+      logits = np.asarray([
+          [0., 0., 0.],  #
+          [1., -1., 0.]
+      ]).astype(np.float32)
+      loss = loss_functions.OnehotCategoricalLogitsNegativeLogProbLoss(
+          array_ops.constant(logits))
+      neg_log_prob = loss.evaluate_on_sample(42)
+
+      # Simply ensure this doesn't crash. As the output is random, it's
+      # difficult to say if the output is correct or not...
+      neg_log_prob = sess.run(neg_log_prob)
+
+  def testMultiMinibatchRegistration(self):
+    """Ensure this loss function supports registering multiple minibatches."""
+    with ops.Graph().as_default():
+      tower_logits = []
+      loss = None
+      num_towers = 5
+      for _ in range(num_towers):
+        logits = random_ops.random_uniform(shape=[2, 3])
+        tower_logits.append(logits)
+        if loss is None:
+          loss = loss_functions.OnehotCategoricalLogitsNegativeLogProbLoss(
+              logits)
+        else:
+          loss.register_additional_minibatch(logits)
+      self.assertListEqual(loss.input_minibatches, tower_logits)
+      self.assertEqual(loss.num_registered_minibatches, num_towers)
+
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/kfac/python/kernel_tests/utils_test.py b/tensorflow/contrib/kfac/python/kernel_tests/utils_test.py
index 55fe38e3e9aab2dbd70a45cdc8fa0c208b036db0..97a97adbf5577cd2694d3055acaa59258ad27964 100644
--- a/tensorflow/contrib/kfac/python/kernel_tests/utils_test.py
+++ b/tensorflow/contrib/kfac/python/kernel_tests/utils_test.py
@@ -22,11 +22,15 @@ import numpy as np
 import numpy.random as npr
 
 from tensorflow.contrib.kfac.python.ops import utils
+from tensorflow.contrib.tpu.python.tpu import tpu_function
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import linalg_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
 
@@ -95,6 +99,18 @@ class SubGraphTest(test.TestCase):
     filtered_list = sub_graph.filter_list(input_list)
     self.assertEqual(filtered_list, [b])
 
+  def testVariableUses(self):
+    with ops.Graph().as_default():
+      var = variable_scope.get_variable('var', shape=[10, 10])
+      resource_var = variable_scope.get_variable(
+          'resource_var', shape=[10, 10], use_resource=True)
+      x = array_ops.zeros([3, 10])
+      z0 = math_ops.matmul(x, var) + math_ops.matmul(x, var)
+      z1 = math_ops.matmul(x, resource_var)
+      sub_graph = utils.SubGraph((z0, z1))
+      self.assertEqual(2, sub_graph.variable_uses(var))
+      self.assertEqual(1, sub_graph.variable_uses(resource_var))
+
 
 class UtilsTest(test.TestCase):
 
@@ -222,18 +238,6 @@ class UtilsTest(test.TestCase):
       self.assertAllClose(b, np.array([4., 5.]))
       self.assertAllClose(c, np.array([[6.], [7.], [8.], [9.]]))
 
-  def testComputePi(self):
-    with ops.Graph().as_default(), self.test_session() as sess:
-      random_seed.set_random_seed(200)
-      left_factor = array_ops.diag([1., 2., 0., 1.])
-      right_factor = array_ops.ones([2., 2.])
-
-      # pi is the sqrt of the left trace norm divided by the right trace norm
-      pi = utils.compute_pi(left_factor, right_factor)
-
-      pi_val = sess.run(pi)
-      self.assertEqual(1., pi_val)
-
   def testPosDefInvCholesky(self):
     with ops.Graph().as_default(), self.test_session() as sess:
       random_seed.set_random_seed(200)
@@ -265,6 +269,62 @@ class UtilsTest(test.TestCase):
       np_inv = np.linalg.inv(x + damp * np.eye(size))
       self.assertAllClose(sess.run(tf_inv), np_inv)
 
+  def testCrossReplicaMean(self):
+    """Ensures that cross_replica_mean() executes only when num_shards > 1."""
+    with ops.Graph().as_default():
+      with tpu_function.tpu_shard_context(4):
+        tensor = array_ops.zeros([], dtype=dtypes.float32)
+        mean = utils.cross_replica_mean(tensor)
+      self.assertNotEqual(mean, tensor)
+
+    with ops.Graph().as_default():
+      with tpu_function.tpu_shard_context(1):
+        tensor = array_ops.zeros([], dtype=dtypes.float32)
+        mean = utils.cross_replica_mean(tensor)
+      self.assertEqual(mean, tensor)
+
+    with ops.Graph().as_default():
+      with self.assertRaises(ValueError):  # Outside of TPU context.
+        tensor = array_ops.zeros([], dtype=dtypes.float32)
+        mean = utils.cross_replica_mean(tensor)
+
+  def testBatchExecute(self):
+    """Ensure batch_execute runs in a round-robin fashion."""
+
+    def increment_var(var):
+      return lambda: var.assign_add(1)
+
+    with ops.Graph().as_default(), self.test_session() as sess:
+      i = variable_scope.get_variable('i', initializer=0)
+      accumulators = [
+          variable_scope.get_variable('var%d' % j, initializer=0)
+          for j in range(3)
+      ]
+      thunks = [increment_var(var) for var in accumulators]
+      increment_accumulators = utils.batch_execute(i, thunks, 2)
+      increment_i = i.assign_add(1)
+
+      sess.run(variables.global_variables_initializer())
+
+      # Ensure one op per thunk.
+      self.assertEqual(3, len(increment_accumulators))
+
+      # Ensure round-robin execution.
+      values = []
+      for _ in range(5):
+        sess.run(increment_accumulators)
+        sess.run(increment_i)
+        values.append(sess.run(accumulators))
+      self.assertAllClose(
+          [
+              [1, 1, 0],  #
+              [2, 1, 1],  #
+              [2, 2, 2],  #
+              [3, 3, 2],  #
+              [4, 3, 3]
+          ],
+          values)
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/contrib/kfac/python/ops/BUILD b/tensorflow/contrib/kfac/python/ops/BUILD
index b2272a4cee09b35ff672514077b4b128b870b772..ee6549b109399766579b6ea18a987ae2c8275983 100644
--- a/tensorflow/contrib/kfac/python/ops/BUILD
+++ b/tensorflow/contrib/kfac/python/ops/BUILD
@@ -38,6 +38,7 @@ py_library(
         ":utils",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:framework_ops",
+        "//tensorflow/python:init_ops",
         "//tensorflow/python:linalg_ops",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:special_math_ops",
@@ -64,6 +65,7 @@ py_library(
     srcs = ["loss_functions.py"],
     srcs_version = "PY2AND3",
     deps = [
+        "//tensorflow/contrib/distributions:distributions_py",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:tensor_shape",
@@ -195,7 +197,9 @@ py_library(
     srcs = ["utils.py"],
     srcs_version = "PY2AND3",
     deps = [
+        "//tensorflow/contrib/tpu",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:gradients",
diff --git a/tensorflow/contrib/kfac/python/ops/estimator.py b/tensorflow/contrib/kfac/python/ops/estimator.py
index 27ff951f16112e09b82ac6885072d966de09983f..a7b1f9d35c931fc44408be804479e758f28f7110 100644
--- a/tensorflow/contrib/kfac/python/ops/estimator.py
+++ b/tensorflow/contrib/kfac/python/ops/estimator.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 
 import contextlib
 import itertools
-import math
 
 import numpy as np
 
@@ -67,7 +66,21 @@ class _DeviceContextGenerator(object):
 
 
 class FisherEstimator(object):
-  """Fisher estimator class supporting various approximations of the Fisher."""
+  """Fisher estimator class supporting various approximations of the Fisher.
+
+  Attributes:
+    cov_update_thunks: list of no-arg functions. Executing a function adds
+      covariance update ops for a single FisherFactor to the graph.
+    cov_update_ops: List of Ops. Running an op updates covariance matrices for a
+      single FisherFactor.
+    cov_update_op: Op. Running updates covariance matrices for all
+      FisherFactors.
+    inv_update_thunks: list of no-arg functions.  Executing a function adds
+      inverse update ops for a single FisherFactor to the graph.
+    inv_update_ops: List of Ops. Running an op updates inverse matrices for a
+      single FisherFactor.
+    inv_update_op: Op. Running updates inverse matrices for all FisherFactors.
+  """
 
   def __init__(self,
                variables,
@@ -75,7 +88,7 @@ class FisherEstimator(object):
                damping,
                layer_collection,
                estimation_mode="gradients",
-               colocate_gradients_with_ops=False,
+               colocate_gradients_with_ops=True,
                cov_devices=None,
                inv_devices=None):
     """Create a FisherEstimator object.
@@ -111,7 +124,7 @@ class FisherEstimator(object):
           is more expensive to compute than the other three options by a factor
           equal to the output dimension, roughly speaking.
       colocate_gradients_with_ops: Whether we should request gradients be
-          colocated with their respective ops.
+          colocated with their respective ops. (Default: True)
       cov_devices: Iterable of device strings (e.g. '/gpu:0'). Covariance
           computations will be placed on these devices in a round-robin fashion.
           Can be None, which means that no devices are specified.
@@ -123,12 +136,13 @@ class FisherEstimator(object):
       ValueError: If no losses have been registered with layer_collection.
     """
 
+    self._cov_ema_decay = cov_ema_decay
     self._variables = variables
     self._damping = damping
     self._estimation_mode = estimation_mode
     self._layers = layer_collection
     self._layers.create_subgraph()
-    self._check_registration(variables)
+    self._layers.check_registration(variables)
     self._gradient_fns = {
         "gradients": self._get_grads_lists_gradients,
         "empirical": self._get_grads_lists_empirical,
@@ -136,13 +150,31 @@ class FisherEstimator(object):
         "exact": self._get_grads_lists_exact
     }
     self._colocate_gradients_with_ops = colocate_gradients_with_ops
+
+    # TODO(b/70674513): Factor device placement outside of this class.
     self._cov_device_context_generator = _DeviceContextGenerator(cov_devices)
     if inv_devices == cov_devices:
       self._inv_device_context_generator = self._cov_device_context_generator
     else:
       self._inv_device_context_generator = _DeviceContextGenerator(inv_devices)
-    setup = self._setup(cov_ema_decay)
-    self.cov_update_op, self.inv_update_op, self.inv_updates_dict = setup
+
+    self._instantiate_factors()
+
+    self.cov_update_thunks = [
+        self._create_cov_update_thunk(factor)
+        for factor in self._layers.get_factors()
+    ]
+    self.cov_update_ops = [thunk() for thunk in self.cov_update_thunks]
+    self.cov_update_op = control_flow_ops.group(
+        self.cov_update_ops, name="cov_update_op")
+
+    self.inv_update_thunks = [
+        self._create_inv_update_thunk(factor)
+        for factor in self._layers.get_factors()
+    ]
+    self.inv_update_ops = [thunk() for thunk in self.inv_update_thunks]
+    self.inv_update_op = control_flow_ops.group(
+        self.inv_update_ops, name="inv_update_op")
 
   @property
   def variables(self):
@@ -203,61 +235,8 @@ class FisherEstimator(object):
     return self._apply_transformation(vecs_and_vars,
                                       lambda fb, vec: fb.multiply(vec))
 
-  def _check_registration(self, variables):
-    """Checks that all variable uses have been registered properly.
-
-    Args:
-      variables: List of variables.
-
-    Raises:
-      ValueError: If any registered variables are not included in the list.
-      ValueError: If any variable in the list is not registered.
-      ValueError: If any variable in the list is registered with the wrong
-          number of "uses" in the subgraph recorded (vs the number of times that
-          variable is actually used in the subgraph).
-    """
-    # Note that overlapping parameters (i.e. those that share variables) will
-    # be caught by layer_collection.LayerParametersDict during registration.
-
-    reg_use_map = self._layers.get_use_count_map()
-
-    error_messages = []
-
-    for var in variables:
-      total_uses = self._layers.subgraph.variable_uses(var)
-      reg_uses = reg_use_map[var]
-
-      if reg_uses == 0:
-        error_messages.append("Variable {} not registered.".format(var))
-      elif (not math.isinf(reg_uses)) and reg_uses != total_uses:
-        error_messages.append(
-            "Variable {} registered with wrong number of uses ({} "
-            "registrations vs {} uses).".format(var, reg_uses, total_uses))
-
-    num_get_vars = len(reg_use_map)
-
-    if num_get_vars > len(variables):
-      error_messages.append("{} registered variables were not included in list."
-                            .format(num_get_vars - len(variables)))
-
-    if error_messages:
-      error_messages = [
-          "Found the following errors with variable registration:"
-      ] + error_messages
-      raise ValueError("\n\t".join(error_messages))
-
-  def _setup(self, cov_ema_decay):
-    """Sets up the various operations.
-
-    Args:
-      cov_ema_decay: The decay factor used when calculating the covariance
-          estimate moving averages.
-
-    Returns:
-      A triple (covs_update_op, invs_update_op, inv_updates_dict), where
-      covs_update_op is the grouped Op to update all the covariance estimates,
-      invs_update_op is the grouped Op to update all the inverses, and
-      inv_updates_dict is a dict mapping Op names to individual inverse updates.
+  def _instantiate_factors(self):
+    """Instantiates FisherFactors' variables.
 
     Raises:
       ValueError: If estimation_mode was improperly specified at construction.
@@ -282,20 +261,25 @@ class FisherEstimator(object):
       with self._cov_device_context_generator():
         fb.instantiate_factors(grads_list, self.damping)
 
-    cov_updates = [
-        factor.make_covariance_update_op(cov_ema_decay)
-        for factor in self._layers.get_factors()
-    ]
-    inv_updates = {op.name: op for op in self._get_all_inverse_update_ops()}
+  def _create_cov_update_thunk(self, factor):
+    """Constructs a covariance update thunk for a single FisherFactor."""
+
+    def thunk():
+      with tf_ops.name_scope(
+          "create_cov_update_thunk", values=[self._cov_ema_decay]):
+        return factor.make_covariance_update_op(self._cov_ema_decay)
+
+    return thunk
 
-    return control_flow_ops.group(*cov_updates), control_flow_ops.group(
-        *inv_updates.values()), inv_updates
+  def _create_inv_update_thunk(self, factor):
+    """Constructs an inverse update thunk for a single FisherFactor."""
 
-  def _get_all_inverse_update_ops(self):
-    for factor in self._layers.get_factors():
-      with self._inv_device_context_generator():
-        for op in factor.make_inverse_update_ops():
-          yield op
+    def thunk():
+      with tf_ops.name_scope("create_inv_update_thunk"):
+        with self._inv_device_context_generator():
+          return control_flow_ops.group(factor.make_inverse_update_ops())
+
+    return thunk
 
   def _get_grads_lists_gradients(self, tensors):
     grads_flat = gradients_impl.gradients(
@@ -333,11 +317,7 @@ class FisherEstimator(object):
     return tuple((grad,) for grad in grads_all)
 
   def _get_grads_lists_exact(self, tensors):
-    """Returns a list of all gradients, computing them exactly.
-
-    Args:
-      tensors: Tensors for which to compute gradients.
-    """
+    """No docstring required."""
     # Loop over all coordinates of all losses.
     grads_all = []
     for loss in self._layers.losses:
diff --git a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py
index e822a1213a4132522be8031401609c78572cb1a6..9436caf9618bc3d3c0dd7b3842420016b119464f 100644
--- a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py
+++ b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py
@@ -38,6 +38,7 @@ from __future__ import division
 from __future__ import print_function
 
 import abc
+import enum  # pylint: disable=g-bad-import-order
 
 import six
 
@@ -52,14 +53,61 @@ from tensorflow.python.ops import math_ops
 #   damping /= num_replications ** NORMALIZE_DAMPING_POWER
 NORMALIZE_DAMPING_POWER = 1.0
 
+# Methods for adjusting damping for FisherBlocks. See
+# compute_pi_adjusted_damping() for details.
+PI_OFF_NAME = "off"
+PI_TRACENORM_NAME = "tracenorm"
+PI_TYPE = PI_TRACENORM_NAME
 
-def set_global_constants(normalize_damping_power=None):
+
+def set_global_constants(normalize_damping_power=None, pi_type=None):
   """Sets various global constants used by the classes in this module."""
   global NORMALIZE_DAMPING_POWER
+  global PI_TYPE
 
   if normalize_damping_power is not None:
     NORMALIZE_DAMPING_POWER = normalize_damping_power
 
+  if pi_type is not None:
+    PI_TYPE = pi_type
+
+
+def normalize_damping(damping, num_replications):
+  """Normalize damping after adjusting scale by NORMALIZE_DAMPING_POWER."""
+  if NORMALIZE_DAMPING_POWER:
+    return damping / (num_replications ** NORMALIZE_DAMPING_POWER)
+  return damping
+
+
+def compute_pi_tracenorm(left_cov, right_cov):
+  """Computes the scalar constant pi for Tikhonov regularization/damping.
+
+  pi = sqrt( (trace(A) / dim(A)) / (trace(B) / dim(B)) )
+  See section 6.3 of https://arxiv.org/pdf/1503.05671.pdf for details.
+
+  Args:
+    left_cov: The left Kronecker factor "covariance".
+    right_cov: The right Kronecker factor "covariance".
+
+  Returns:
+    The computed scalar constant pi for these Kronecker Factors (as a Tensor).
+  """
+  # Instead of dividing by the dim of the norm, we multiply by the dim of the
+  # other norm. This works out the same in the ratio.
+  left_norm = math_ops.trace(left_cov) * right_cov.shape.as_list()[0]
+  right_norm = math_ops.trace(right_cov) * left_cov.shape.as_list()[0]
+  return math_ops.sqrt(left_norm / right_norm)
+
+
+def compute_pi_adjusted_damping(left_cov, right_cov, damping):
+
+  if PI_TYPE == PI_TRACENORM_NAME:
+    pi = compute_pi_tracenorm(left_cov, right_cov)
+    return (damping * pi, damping / pi)
+
+  elif PI_TYPE == PI_OFF_NAME:
+    return (damping, damping)
+
 
 @six.add_metaclass(abc.ABCMeta)
 class FisherBlock(object):
@@ -153,7 +201,7 @@ class FullFB(FisherBlock):
     self._factor.register_damped_inverse(damping)
 
   def multiply_inverse(self, vector):
-    inverse = self._factor.get_inverse(self._damping)
+    inverse = self._factor.get_damped_inverse(self._damping)
     out_flat = math_ops.matmul(inverse, utils.tensors_to_column(vector))
     return utils.column_to_tensors(vector, out_flat)
 
@@ -409,10 +457,7 @@ class ConvDiagonalFB(FisherBlock):
     self._num_locations = (
         inputs_shape[1] * inputs_shape[2] //
         (self._strides[1] * self._strides[2]))
-
-    if NORMALIZE_DAMPING_POWER:
-      damping /= self._num_locations ** NORMALIZE_DAMPING_POWER
-    self._damping = damping
+    self._damping = normalize_damping(damping, self._num_locations)
 
     self._factor = self._layer_collection.make_or_get_factor(
         fisher_factors.ConvDiagonalFactor,
@@ -465,11 +510,10 @@ class KroneckerProductFB(FisherBlock):
     Args:
       damping: The base damping factor (float or Tensor) for the damped inverse.
     """
-    pi = utils.compute_pi(self._input_factor.get_cov(),
-                          self._output_factor.get_cov())
-
-    self._input_damping = (damping**0.5) * pi
-    self._output_damping = (damping**0.5) / pi
+    self._input_damping, self._output_damping = compute_pi_adjusted_damping(
+        self._input_factor.get_cov(),
+        self._output_factor.get_cov(),
+        damping**0.5)
 
     self._input_factor.register_damped_inverse(self._input_damping)
     self._output_factor.register_damped_inverse(self._output_damping)
@@ -487,8 +531,9 @@ class KroneckerProductFB(FisherBlock):
     return 1.0
 
   def multiply_inverse(self, vector):
-    left_factor_inv = self._input_factor.get_inverse(self._input_damping)
-    right_factor_inv = self._output_factor.get_inverse(self._output_damping)
+    left_factor_inv = self._input_factor.get_damped_inverse(self._input_damping)
+    right_factor_inv = self._output_factor.get_damped_inverse(
+        self._output_damping)
     reshaped_vector = utils.layer_params_to_mat2d(vector)
     reshaped_out = math_ops.matmul(left_factor_inv,
                                    math_ops.matmul(reshaped_vector,
@@ -650,8 +695,8 @@ class ConvKFCBasicFB(KroneckerProductFB):
     grads_list = tuple(_concat_along_batch_dim(grads) for grads in grads_list)
 
     # Infer number of locations upon which convolution is applied.
-    self._num_locations = _num_conv_locations(inputs.shape.as_list(),
-                                              self._strides)
+    self._num_locations = num_conv_locations(inputs.shape.as_list(),
+                                             self._strides)
 
     self._input_factor = self._layer_collection.make_or_get_factor(
         fisher_factors.ConvInputKroneckerFactor,
@@ -660,11 +705,9 @@ class ConvKFCBasicFB(KroneckerProductFB):
     self._output_factor = self._layer_collection.make_or_get_factor(
         fisher_factors.ConvOutputKroneckerFactor, (grads_list,))
 
-    if NORMALIZE_DAMPING_POWER:
-      damping /= self._num_locations**NORMALIZE_DAMPING_POWER
-    self._damping = damping
-
+    damping = normalize_damping(damping, self._num_locations)
     self._register_damped_input_and_output_inverses(damping)
+    self._damping = damping
 
   @property
   def _renorm_coeff(self):
@@ -717,6 +760,267 @@ def _concat_along_batch_dim(tensor_list):
     return array_ops.concat(tensor_list, axis=0)
 
 
-def _num_conv_locations(input_shape, strides):
-  """Returns the number of locations a Conv kernel is applied to."""
+def num_conv_locations(input_shape, strides):
+  """Returns the number of spatial locations a 2D Conv kernel is applied to.
+
+  Args:
+    input_shape: list representing shape of inputs to the Conv layer.
+    strides: list representing strides for the Conv kernel.
+
+  Returns:
+    A scalar |T| denoting the number of spatial locations for the Conv layer.
+  """
   return input_shape[1] * input_shape[2] // (strides[1] * strides[2])
+
+
+class FullyConnectedMultiIndepFB(KroneckerProductFB):
+  """FisherBlock for fully-connected layers that share parameters.
+  """
+
+  def __init__(self, layer_collection, inputs, outputs, has_bias=False):
+    """Creates a FullyConnectedMultiIndepFB block.
+
+    Args:
+      layer_collection: LayerCollection instance.
+      inputs: list or tuple of Tensors. Each Tensor has shape [batch_size,
+        inputs_size].
+      outputs: list or tuple of Tensors. Each Tensor has shape [batch_size,
+        outputs_size].
+      has_bias: bool. If True, estimates Fisher with respect to a bias
+        parameter as well as the layer's parameters.
+    """
+
+    assert len(inputs) == len(outputs)
+    # We need to make sure inputs and outputs are tuples and not lists so that
+    # they get hashed by layer_collection.make_or_get_factor properly.
+    self._inputs = tuple(inputs)
+    self._outputs = tuple(outputs)
+    self._has_bias = has_bias
+    self._num_uses = len(inputs)
+
+    super(FullyConnectedMultiIndepFB, self).__init__(layer_collection)
+
+  @property
+  def num_registered_minibatches(self):
+    # TODO(b/69411207): Add support for registering additional minibatches.
+    return 1
+
+  def instantiate_factors(self, grads_list, damping):
+
+    self._input_factor = self._layer_collection.make_or_get_factor(
+        fisher_factors.FullyConnectedMultiKF,
+        ((self._inputs,), self._has_bias))
+
+    self._output_factor = self._layer_collection.make_or_get_factor(
+        fisher_factors.FullyConnectedMultiKF, (grads_list,))
+
+    damping = normalize_damping(damping, self._num_uses)
+    self._register_damped_input_and_output_inverses(damping)
+
+  @property
+  def _renorm_coeff(self):
+    return self._num_uses
+
+  def tensors_to_compute_grads(self):
+    return self._outputs
+
+  def num_inputs(self):
+    return len(self._inputs)
+
+
+class SeriesFBApproximation(enum.IntEnum):
+  """See FullyConnectedSeriesFB.__init__ for description and usage."""
+  option1 = 1
+  option2 = 2
+
+
+class FullyConnectedSeriesFB(FisherBlock):
+  """FisherBlock for fully-connected layers that share parameters across time.
+
+  See the following preprint for details:
+    https://openreview.net/pdf?id=HyMTkQZAb
+
+  See the end of the appendix of the paper for a pseudo-code of the
+  algorithm being implemented by multiply_inverse here.  Note that we are
+  using pre-computed versions of certain matrix-matrix products to speed
+  things up.  This is explicitly explained wherever it is done.
+  """
+
+  def __init__(self,
+               layer_collection,
+               inputs,
+               outputs,
+               has_bias=False,
+               option=SeriesFBApproximation.option2):
+    """Constructs a new `FullyConnectedSeriesFB`.
+
+    Args:
+      layer_collection: The collection of all layers in the K-FAC approximate
+        Fisher information matrix to which this FisherBlock belongs.
+      inputs: List of tensors of shape [batch_size, input_size].
+        Inputs to the layer.
+      outputs: List of tensors of shape [batch_size, input_size].
+        Outputs of the layer (before activations).
+      has_bias: Whether the layer includes a bias parameter.
+      option: A `SeriesFBApproximation` specifying the simplifying assumption
+        to be used in this block. `option1` approximates the cross-covariance
+        over time as a symmetric matrix, while `option2` makes
+        the assumption that training sequences are infinitely long. See section
+        3.5 of the paper for more details.
+    """
+
+    assert len(inputs) == len(outputs)
+    # We need to make sure inputs and outputs are tuples and not lists so that
+    # they get hashed by layer_collection.make_or_get_factor properly.
+    self._inputs = tuple(inputs)
+    self._outputs = tuple(outputs)
+    self._has_bias = has_bias
+    self._num_timesteps = len(inputs)
+    self._option = option
+
+    super(FullyConnectedSeriesFB, self).__init__(layer_collection)
+
+  @property
+  def num_registered_minibatches(self):
+    # TODO(b/69411207): Add support for registering additional minibatches.
+    return 1
+
+  def instantiate_factors(self, grads_list, damping):
+
+    self._input_factor = self._layer_collection.make_or_get_factor(
+        fisher_factors.FullyConnectedMultiKF, ((self._inputs,), self._has_bias))
+
+    self._output_factor = self._layer_collection.make_or_get_factor(
+        fisher_factors.FullyConnectedMultiKF, (grads_list,))
+
+    damping = normalize_damping(damping, self._num_timesteps)
+    self._damping_input, self._damping_output = compute_pi_adjusted_damping(
+        self._input_factor.get_cov(),
+        self._output_factor.get_cov(),
+        damping**0.5)
+
+    if self._option == SeriesFBApproximation.option1:
+      self._input_factor.register_option1quants(self._damping_input)
+      self._output_factor.register_option1quants(self._damping_output)
+    elif self._option == SeriesFBApproximation.option2:
+      self._input_factor.register_option2quants(self._damping_input)
+      self._output_factor.register_option2quants(self._damping_output)
+    else:
+      raise ValueError(
+          "Unrecognized FullyConnectedSeriesFB approximation: {}".format(
+              self._option))
+
+  def multiply_inverse(self, vector):
+    # pylint: disable=invalid-name
+
+    Z = utils.layer_params_to_mat2d(vector)
+
+    # Derivations were done for "batch_dim==1" case so we need to convert to
+    # that orientation:
+    Z = array_ops.transpose(Z)
+
+    if self._option == SeriesFBApproximation.option1:
+
+      # Note that L_A = A0^(-1/2) * U_A and L_G = G0^(-1/2) * U_G.
+      L_A, psi_A = self._input_factor.get_option1quants(self._damping_input)
+      L_G, psi_G = self._output_factor.get_option1quants(self._damping_output)
+
+      def gamma(x):
+        # We are assuming that each case has the same number of time-steps.
+        # If this stops being the case one shouldn't simply replace this T
+        # with its average value.  Instead, one needs to go back to the
+        # definition of the gamma function from the paper.
+        T = self._num_timesteps
+        return (1 - x)**2 / (T * (1 - x**2) - 2 * x * (1 - x**T))
+
+      # Y = gamma( psi_G*psi_A^T ) (computed element-wise)
+      # Even though Y is Z-independent we are recomputing it from the psi's
+      # each since Y depends on both A and G quantities, and it is relatively
+      # cheap to compute.
+      Y = gamma(array_ops.reshape(psi_G, [int(psi_G.shape[0]), -1]) * psi_A)
+
+      # Z = L_G^T * Z * L_A
+      # This is equivalent to the following computation from the original
+      # pseudo-code:
+      # Z = G0^(-1/2) * Z * A0^(-1/2)
+      # Z = U_G^T * Z * U_A
+      Z = math_ops.matmul(L_G, math_ops.matmul(Z, L_A), transpose_a=True)
+
+      # Z = Z .* Y
+      Z *= Y
+
+      # Z = L_G * Z * L_A^T
+      # This is equivalent to the following computation from the original
+      # pseudo-code:
+      # Z = U_G * Z * U_A^T
+      # Z = G0^(-1/2) * Z * A0^(-1/2)
+      Z = math_ops.matmul(L_G, math_ops.matmul(Z, L_A, transpose_b=True))
+
+    elif self._option == SeriesFBApproximation.option2:
+
+      # Note that P_A = A_1^T * A_0^(-1) and P_G = G_1^T * G_0^(-1),
+      # and K_A = A_0^(-1/2) * E_A and K_G = G_0^(-1/2) * E_G.
+      P_A, K_A, mu_A = self._input_factor.get_option2quants(self._damping_input)
+      P_G, K_G, mu_G = self._output_factor.get_option2quants(
+          self._damping_output)
+
+      # Our approach differs superficially from the pseudo-code in the paper
+      # in order to reduce the total number of matrix-matrix multiplies.
+      # In particular, the first three computations in the pseudo code are
+      # Z = G0^(-1/2) * Z * A0^(-1/2)
+      # Z = Z - hPsi_G^T * Z * hPsi_A
+      # Z = E_G^T * Z * E_A
+      # Noting that hPsi = C0^(-1/2) * C1 * C0^(-1/2), so that
+      # C0^(-1/2) * hPsi = C0^(-1) * C1 * C0^(-1/2) = P^T * C0^(-1/2)
+      # the entire computation can be written as
+      # Z = E_G^T * (G0^(-1/2) * Z * A0^(-1/2)
+      #     - hPsi_G^T * G0^(-1/2) * Z * A0^(-1/2) * hPsi_A) * E_A
+      #   = E_G^T * (G0^(-1/2) * Z * A0^(-1/2)
+      #     - G0^(-1/2) * P_G * Z * P_A^T * A0^(-1/2)) * E_A
+      #   = E_G^T * G0^(-1/2) * Z * A0^(-1/2) * E_A
+      #     -  E_G^T* G0^(-1/2) * P_G * Z * P_A^T * A0^(-1/2) * E_A
+      #   = K_G^T * Z * K_A  -  K_G^T * P_G * Z * P_A^T * K_A
+      # This final expression is computed by the following two lines:
+      # Z = Z - P_G * Z * P_A^T
+      Z -= math_ops.matmul(P_G, math_ops.matmul(Z, P_A, transpose_b=True))
+      # Z = K_G^T * Z * K_A
+      Z = math_ops.matmul(K_G, math_ops.matmul(Z, K_A), transpose_a=True)
+
+      # Z = Z ./ (1*1^T - mu_G*mu_A^T)
+      # Be careful with the outer product.  We don't want to accidentally
+      # make it an inner-product instead.
+      tmp = 1.0 - array_ops.reshape(mu_G, [int(mu_G.shape[0]), -1]) * mu_A
+      # Prevent some numerical issues by setting any 0.0 eigs to 1.0
+      tmp += 1.0 * math_ops.cast(math_ops.equal(tmp, 0.0), dtype=tmp.dtype)
+      Z /= tmp
+
+      # We now perform the transpose/reverse version of the operations
+      # derived above, whose derivation from the original pseudo-code is
+      # analgous.
+      # Z = K_G * Z * K_A^T
+      Z = math_ops.matmul(K_G, math_ops.matmul(Z, K_A, transpose_b=True))
+
+      # Z = Z - P_G^T * Z * P_A
+      Z -= math_ops.matmul(P_G, math_ops.matmul(Z, P_A), transpose_a=True)
+
+      # Z = normalize (1/E[T]) * Z
+      # Note that this normalization is done because we compute the statistics
+      # by averaging, not summing, over time. (And the gradient is presumably
+      # summed over time, not averaged, and thus their scales are different.)
+      Z /= math_ops.cast(self._num_timesteps, Z.dtype)
+
+    # Convert back to the "batch_dim==0" orientation.
+    Z = array_ops.transpose(Z)
+
+    return utils.mat2d_to_layer_params(vector, Z)
+
+    # pylint: enable=invalid-name
+
+  def multiply(self, vector):
+    raise NotImplementedError
+
+  def tensors_to_compute_grads(self):
+    return self._outputs
+
+  def num_inputs(self):
+    return len(self._inputs)
diff --git a/tensorflow/contrib/kfac/python/ops/fisher_blocks_lib.py b/tensorflow/contrib/kfac/python/ops/fisher_blocks_lib.py
index 59389f8d385c18f50914d690cfaa2825ef807ed3..ac396309206fe09af65c2b70840a513fb25b579b 100644
--- a/tensorflow/contrib/kfac/python/ops/fisher_blocks_lib.py
+++ b/tensorflow/contrib/kfac/python/ops/fisher_blocks_lib.py
@@ -33,6 +33,10 @@ _allowed_symbols = [
     'ConvKFCBasicFB',
     'ConvDiagonalFB',
     'set_global_constants',
+    'compute_pi_tracenorm',
+    'compute_pi_adjusted_damping',
+    'num_conv_locations',
+    'normalize_damping'
 ]
 
 remove_undocumented(__name__, allowed_exception_list=_allowed_symbols)
diff --git a/tensorflow/contrib/kfac/python/ops/fisher_factors.py b/tensorflow/contrib/kfac/python/ops/fisher_factors.py
index fbc192f1dcfa0b384e2cb31c43af3651436321ea..f59168cbc05fffd104ff5a44308eefd206beb9db 100644
--- a/tensorflow/contrib/kfac/python/ops/fisher_factors.py
+++ b/tensorflow/contrib/kfac/python/ops/fisher_factors.py
@@ -27,6 +27,8 @@ import six
 from tensorflow.contrib.kfac.python.ops import utils
 from tensorflow.python.framework import ops as tf_ops
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import special_math_ops
@@ -50,11 +52,15 @@ EIGENVALUE_DECOMPOSITION_THRESHOLD = 2
 # matrix powers. Must be nonnegative.
 EIGENVALUE_CLIPPING_THRESHOLD = 0.0
 
+# Colocate the covariance ops and variables with the input tensors for each
+# factor.
+COLOCATE_COV_OPS_WITH_INPUTS = True
+
 
 @contextlib.contextmanager
-def _maybe_colocate_with(op, colocate_cov_ops_with_inputs):
-  """Context to colocate with `op` if `colocate_cov_ops_with_inputs`."""
-  if colocate_cov_ops_with_inputs:
+def maybe_colocate_with(op):
+  """Context to colocate with `op` if `COLOCATE_COV_OPS_WITH_INPUTS`."""
+  if COLOCATE_COV_OPS_WITH_INPUTS:
     if isinstance(op, (list, tuple)):
       with tf_ops.colocate_with(op[0]):
         yield
@@ -68,12 +74,14 @@ def _maybe_colocate_with(op, colocate_cov_ops_with_inputs):
 def set_global_constants(init_covariances_at_zero=None,
                          zero_debias=None,
                          eigenvalue_decomposition_threshold=None,
-                         eigenvalue_clipping_threshold=None):
+                         eigenvalue_clipping_threshold=None,
+                         colocate_cov_ops_with_inputs=None):
   """Sets various global constants used by the classes in this module."""
   global INIT_COVARIANCES_AT_ZERO
   global ZERO_DEBIAS
   global EIGENVALUE_DECOMPOSITION_THRESHOLD
   global EIGENVALUE_CLIPPING_THRESHOLD
+  global COLOCATE_COV_OPS_WITH_INPUTS
 
   if init_covariances_at_zero is not None:
     INIT_COVARIANCES_AT_ZERO = init_covariances_at_zero
@@ -83,6 +91,8 @@ def set_global_constants(init_covariances_at_zero=None,
     EIGENVALUE_DECOMPOSITION_THRESHOLD = eigenvalue_decomposition_threshold
   if eigenvalue_clipping_threshold is not None:
     EIGENVALUE_CLIPPING_THRESHOLD = eigenvalue_clipping_threshold
+  if colocate_cov_ops_with_inputs is not None:
+    COLOCATE_COV_OPS_WITH_INPUTS = colocate_cov_ops_with_inputs
 
 
 def inverse_initializer(shape, dtype, partition_info=None):  # pylint: disable=unused-argument
@@ -101,7 +111,7 @@ def diagonal_covariance_initializer(shape, dtype, partition_info):  # pylint: di
   return array_ops.ones(shape, dtype)
 
 
-def _compute_cov(tensor, normalizer=None):
+def compute_cov(tensor, tensor_right=None, normalizer=None):
   """Compute the empirical second moment of the rows of a 2D Tensor.
 
   This function is meant to be applied to random matrices for which the true row
@@ -109,6 +119,8 @@ def _compute_cov(tensor, normalizer=None):
 
   Args:
     tensor: A 2D Tensor.
+    tensor_right: An optional 2D Tensor. If provided, this function computes
+      the matrix product tensor^T * tensor_right instead of tensor^T * tensor.
     normalizer: optional scalar for the estimator (by default, the normalizer is
         the number of rows of tensor).
 
@@ -117,12 +129,17 @@ def _compute_cov(tensor, normalizer=None):
   """
   if normalizer is None:
     normalizer = array_ops.shape(tensor)[0]
-  cov = (math_ops.matmul(tensor, tensor, transpose_a=True) / math_ops.cast(
-      normalizer, tensor.dtype))
-  return (cov + array_ops.transpose(cov)) / math_ops.cast(2, cov.dtype)
+  if tensor_right is None:
+    cov = (
+        math_ops.matmul(tensor, tensor, transpose_a=True) / math_ops.cast(
+            normalizer, tensor.dtype))
+    return (cov + array_ops.transpose(cov)) / math_ops.cast(2.0, cov.dtype)
+  else:
+    return (math_ops.matmul(tensor, tensor_right, transpose_a=True) /
+            math_ops.cast(normalizer, tensor.dtype))
 
 
-def _append_homog(tensor):
+def append_homog(tensor):
   """Appends a homogeneous coordinate to the last dimension of a Tensor.
 
   Args:
@@ -135,7 +152,7 @@ def _append_homog(tensor):
   rank = len(tensor.shape.as_list())
   shape = array_ops.concat([array_ops.shape(tensor)[:-1], [1]], axis=0)
   ones = array_ops.ones(shape, dtype=tensor.dtype)
-  return array_ops.concat([tensor, ones], axis=rank-1)
+  return array_ops.concat([tensor, ones], axis=rank - 1)
 
 
 def scope_string_from_params(params):
@@ -173,8 +190,8 @@ def scope_string_from_params(params):
     elif isinstance(param, (tf_ops.Tensor, variables.Variable)):
       name_parts.append(scope_string_from_name(param))
     else:
-      raise ValueError(
-          "Encountered an unsupported param type {}".format(type(param)))
+      raise ValueError("Encountered an unsupported param type {}".format(
+          type(param)))
   return "_".join(name_parts)
 
 
@@ -225,6 +242,10 @@ class FisherFactor(object):
     """
     pass
 
+  @abc.abstractproperty
+  def _dtype(self):
+    pass
+
   @property
   def _cov_initializer(self):
     return covariance_initializer
@@ -236,7 +257,8 @@ class FisherFactor(object):
           "cov",
           initializer=self._cov_initializer,
           shape=self._cov_shape,
-          trainable=False)
+          trainable=False,
+          dtype=self._dtype)
 
   @abc.abstractmethod
   def _compute_new_cov(self, idx=0):
@@ -250,15 +272,27 @@ class FisherFactor(object):
     Returns:
       An Op for updating the covariance Variable referenced by _cov.
     """
-    new_cov = math_ops.add_n(
-        tuple(self._compute_new_cov(idx) for idx in range(self._num_sources)))
-
-    return moving_averages.assign_moving_average(
-        self._cov, new_cov, ema_decay, zero_debias=ZERO_DEBIAS)
+    new_cov_contribs = tuple(self._compute_new_cov(idx)
+                             for idx in range(self._num_sources))
+    # This gets the job done but we might want a better solution in the future.
+    # In particular, we could have a separate way of specifying where the
+    # the cov variables finally end up, independent of where their various
+    # contributions are computed.  Right now these are the same thing, but in
+    # the future we might want to perform the cov computations on each tower,
+    # so that each tower will be considered a "source" (allowing us to reuse
+    # the existing "source" code for this).
+    with maybe_colocate_with(new_cov_contribs[0]):
+      new_cov = math_ops.add_n(new_cov_contribs)
+      # Synchronize value across all TPU cores.
+      if utils.on_tpu():
+        new_cov = utils.cross_replica_mean(new_cov)
+      return moving_averages.assign_moving_average(
+          self._cov, new_cov, ema_decay, zero_debias=ZERO_DEBIAS)
 
+  @abc.abstractmethod
   def make_inverse_update_ops(self):
     """Create and return update ops corresponding to registered computations."""
-    return []
+    pass
 
   def get_cov(self):
     return self._cov
@@ -273,6 +307,13 @@ class InverseProvidingFactor(FisherFactor):
   _cov_shape properties.
   """
 
+  # TODO(b/69108481): This class (and its subclasses) should be refactored to
+  # serve the matrix quantities it computes as both (potentially stale)
+  # variables, updated by the inverse update ops, and fresh values stored in
+  # tensors that recomputed once every session.run() call.  Currently matpower
+  # and damp_inverse have the former behavior, while eigendecomposition has
+  # the latter.
+
   def __init__(self):
     self._inverses_by_damping = {}
     self._matpower_by_exp_and_damping = {}
@@ -283,6 +324,10 @@ class InverseProvidingFactor(FisherFactor):
   def register_damped_inverse(self, damping):
     """Registers a damped inverse needed by a FisherBlock.
 
+    This creates a variable and signals make_inverse_update_ops to make the
+    corresponding update op.  The variable can be read via the method
+    get_inverse.
+
     Args:
       damping: The damping value (float or Tensor) for this factor.
     """
@@ -293,12 +338,17 @@ class InverseProvidingFactor(FisherFactor):
             "inv_damp{}".format(damping_string),
             initializer=inverse_initializer,
             shape=self._cov_shape,
-            trainable=False)
+            trainable=False,
+            dtype=self._dtype)
       self._inverses_by_damping[damping] = inv
 
   def register_matpower(self, exp, damping):
     """Registers a matrix power needed by a FisherBlock.
 
+    This creates a variable and signals make_inverse_update_ops to make the
+    corresponding update op.  The variable can be read via the method
+    get_matpower.
+
     Args:
       exp: The exponent (float or Tensor) to raise the matrix to.
       damping: The damping value (float or Tensor).
@@ -311,59 +361,81 @@ class InverseProvidingFactor(FisherFactor):
             "matpower_exp{}_damp{}".format(exp_string, damping_string),
             initializer=inverse_initializer,
             shape=self._cov_shape,
-            trainable=False)
+            trainable=False,
+            dtype=self._dtype)
       self._matpower_by_exp_and_damping[(exp, damping)] = matpower
 
-  def register_eigendecomp(self):
-    """Registers that an eigendecomposition is needed by a FisherBlock."""
-    if not self._eigendecomp:
-      self._eigendecomp = linalg_ops.self_adjoint_eig(self._cov)
-
   def make_inverse_update_ops(self):
     """Create and return update ops corresponding to registered computations."""
-    ops = super(InverseProvidingFactor, self).make_inverse_update_ops()
+    ops = []
+
+    # We do this to ensure that we don't reuse the eigendecomp from old calls
+    # to make_inverse_update_ops that may be placed on different devices.  This
+    # can happen is the user has both a permanent and lazily constructed
+    # version of the inverse ops (and only uses one of them).
+    self.reset_eigendecomp()
 
     num_inverses = len(self._inverses_by_damping)
     matrix_power_registered = bool(self._matpower_by_exp_and_damping)
-    use_eig = (self._eigendecomp or matrix_power_registered or
-               num_inverses >= EIGENVALUE_DECOMPOSITION_THRESHOLD)
+    use_eig = (
+        self._eigendecomp or matrix_power_registered or
+        num_inverses >= EIGENVALUE_DECOMPOSITION_THRESHOLD)
 
     if use_eig:
-      self.register_eigendecomp()  # ensures self._eigendecomp is set
-      eigenvalues, eigenvectors = self._eigendecomp  # pylint: disable=unpacking-non-sequence
-
-      # The matrix self._cov is positive semidefinite by construction, but the
-      # numerical eigenvalues could be negative due to numerical errors, so here
-      # we clip them to be at least EIGENVALUE_CLIPPING_THRESHOLD.
-      clipped_eigenvalues = math_ops.maximum(eigenvalues,
-                                             EIGENVALUE_CLIPPING_THRESHOLD)
+      eigenvalues, eigenvectors = self.get_eigendecomp()  # pylint: disable=unpacking-non-sequence
 
       for damping, inv in self._inverses_by_damping.items():
         ops.append(
             inv.assign(
-                math_ops.matmul(eigenvectors / (clipped_eigenvalues + damping),
+                math_ops.matmul(eigenvectors / (eigenvalues + damping),
                                 array_ops.transpose(eigenvectors))))
 
       for (exp, damping), matpower in self._matpower_by_exp_and_damping.items():
         ops.append(
             matpower.assign(
-                math_ops.matmul(eigenvectors * (clipped_eigenvalues + damping)**
-                                exp, array_ops.transpose(eigenvectors))))
+                math_ops.matmul(eigenvectors *
+                                (eigenvalues + damping)**exp,
+                                array_ops.transpose(eigenvectors))))
+      # These ops share computation and should be run on a single device.
+      ops = [control_flow_ops.group(*ops)]
     else:
       for damping, inv in self._inverses_by_damping.items():
         ops.append(inv.assign(utils.posdef_inv(self._cov, damping)))
 
     return ops
 
-  def get_inverse(self, damping):
+  def get_damped_inverse(self, damping):
+    # Note that this function returns a variable which gets updated by the
+    # inverse ops.  It may be stale / inconsistent with the latest value of
+    # get_cov().
     return self._inverses_by_damping[damping]
 
   def get_matpower(self, exp, damping):
+    # Note that this function returns a variable which gets updated by the
+    # inverse ops.  It may be stale / inconsistent with the latest value of
+    # get_cov().
     return self._matpower_by_exp_and_damping[(exp, damping)]
 
   def get_eigendecomp(self):
+    """Creates or retrieves eigendecomposition of self._cov."""
+    # Unlike get_inverse and get_matpower this doesn't retrieve a stored
+    # variable, but instead always computes a fresh version from the current
+    # value of get_cov().
+    if not self._eigendecomp:
+      eigenvalues, eigenvectors = linalg_ops.self_adjoint_eig(self._cov)
+
+      # The matrix self._cov is positive semidefinite by construction, but the
+      # numerical eigenvalues could be negative due to numerical errors, so here
+      # we clip them to be at least FLAGS.eigenvalue_clipping_threshold
+      clipped_eigenvalues = math_ops.maximum(eigenvalues,
+                                             EIGENVALUE_CLIPPING_THRESHOLD)
+      self._eigendecomp = (clipped_eigenvalues, eigenvectors)
+
     return self._eigendecomp
 
+  def reset_eigendecomp(self):
+    self._eigendecomp = None
+
 
 class FullFactor(InverseProvidingFactor):
   """FisherFactor for a full matrix representation of the Fisher of a parameter.
@@ -374,41 +446,38 @@ class FullFactor(InverseProvidingFactor):
 
   def __init__(self,
                params_grads,
-               batch_size,
-               colocate_cov_ops_with_inputs=False):
+               batch_size):
     self._batch_size = batch_size
-    self._colocate_cov_ops_with_inputs = colocate_cov_ops_with_inputs
-    self._orig_params_grads_name = scope_string_from_params(
-        [params_grads, self._batch_size])
-    params_grads_flat = []
-    for params_grad in params_grads:
-      with _maybe_colocate_with(params_grad,
-                                self._colocate_cov_ops_with_inputs):
-        col = utils.tensors_to_column(params_grad)
-        params_grads_flat.append(col)
-    self._params_grads_flat = tuple(params_grads_flat)
+    self._params_grads = tuple(utils.ensure_sequence(params_grad)
+                               for params_grad in params_grads)
     super(FullFactor, self).__init__()
 
   @property
   def _var_scope(self):
-    return "ff_full/" + self._orig_params_grads_name
+    return "ff_full/" + scope_string_from_params(
+        [self._params_grads, self._batch_size])
 
   @property
   def _cov_shape(self):
-    size = self._params_grads_flat[0].shape[0]
-    return [size, size]
+    size = sum(param_grad.shape.num_elements()
+               for param_grad in self._params_grads[0])
+    return (size, size)
 
   @property
   def _num_sources(self):
-    return len(self._params_grads_flat)
+    return len(self._params_grads)
+
+  @property
+  def _dtype(self):
+    return self._params_grads[0][0].dtype
 
   def _compute_new_cov(self, idx=0):
     # This will be a very basic rank 1 estimate
-    with _maybe_colocate_with(self._params_grads_flat[idx],
-                              self._colocate_cov_ops_with_inputs):
-      return ((self._params_grads_flat[idx] * array_ops.transpose(
-          self._params_grads_flat[idx])) / math_ops.cast(
-              self._batch_size, self._params_grads_flat[idx].dtype))
+    with maybe_colocate_with(self._params_grads[idx]):
+      params_grads_flat = utils.tensors_to_column(self._params_grads[idx])
+      return ((params_grads_flat * array_ops.transpose(
+          params_grads_flat)) / math_ops.cast(self._batch_size,
+                                              params_grads_flat.dtype))
 
 
 class DiagonalFactor(FisherFactor):
@@ -421,6 +490,9 @@ class DiagonalFactor(FisherFactor):
   def _cov_initializer(self):
     return diagonal_covariance_initializer
 
+  def make_inverse_update_ops(self):
+    return []
+
 
 class NaiveDiagonalFactor(DiagonalFactor):
   """FisherFactor for a diagonal approximation of any type of param's Fisher.
@@ -431,38 +503,36 @@ class NaiveDiagonalFactor(DiagonalFactor):
 
   def __init__(self,
                params_grads,
-               batch_size,
-               colocate_cov_ops_with_inputs=False):
+               batch_size):
+    self._params_grads = tuple(utils.ensure_sequence(params_grad)
+                               for params_grad in params_grads)
     self._batch_size = batch_size
-    self._colocate_cov_ops_with_inputs = colocate_cov_ops_with_inputs
-    params_grads_flat = []
-    for params_grad in params_grads:
-      with _maybe_colocate_with(params_grad,
-                                self._colocate_cov_ops_with_inputs):
-        col = utils.tensors_to_column(params_grad)
-        params_grads_flat.append(col)
-    self._params_grads = tuple(params_grads_flat)
-    self._orig_params_grads_name = scope_string_from_params(
-        [self._params_grads, self._batch_size])
     super(NaiveDiagonalFactor, self).__init__()
 
   @property
   def _var_scope(self):
-    return "ff_naivediag/" + self._orig_params_grads_name
+    return "ff_naivediag/" + scope_string_from_params(
+        [self._params_grads, self._batch_size])
 
   @property
   def _cov_shape(self):
-    return self._params_grads[0].shape
+    size = sum(param_grad.shape.num_elements()
+               for param_grad in self._params_grads[0])
+    return (size, 1)
 
   @property
   def _num_sources(self):
     return len(self._params_grads)
 
+  @property
+  def _dtype(self):
+    return self._params_grads[0][0].dtype
+
   def _compute_new_cov(self, idx=0):
-    with _maybe_colocate_with(self._params_grads[idx],
-                              self._colocate_cov_ops_with_inputs):
-      return (math_ops.square(self._params_grads[idx]) / math_ops.cast(
-          self._batch_size, self._params_grads[idx].dtype))
+    with maybe_colocate_with(self._params_grads[idx]):
+      params_grads_flat = utils.tensors_to_column(self._params_grads[idx])
+      return (math_ops.square(params_grads_flat) / math_ops.cast(
+          self._batch_size, params_grads_flat.dtype))
 
 
 class FullyConnectedDiagonalFactor(DiagonalFactor):
@@ -471,18 +541,15 @@ class FullyConnectedDiagonalFactor(DiagonalFactor):
   Given in = [batch_size, input_size] and out_grad = [batch_size, output_size],
   approximates the covariance as,
 
-    Cov(in, out) = (1/batch_size) \sum_{i} outer(in[i], out_grad[i]) ** 2.0
+    Cov(in, out) = (1/batch_size) sum_{i} outer(in[i], out_grad[i]) ** 2.0
 
   where the square is taken element-wise.
   """
 
-  # TODO(jamesmartens): add units tests for this class
-
   def __init__(self,
                inputs,
                outputs_grads,
-               has_bias=False,
-               colocate_cov_ops_with_inputs=False):
+               has_bias=False):
     """Instantiate FullyConnectedDiagonalFactor.
 
     Args:
@@ -491,44 +558,46 @@ class FullyConnectedDiagonalFactor(DiagonalFactor):
       outputs_grads: List of Tensors of shape [batch_size, output_size].
         Gradient of loss with respect to layer's preactivations.
       has_bias: bool. If True, append '1' to each input.
-      colocate_cov_ops_with_inputs: Whether to colocate cov_update ops with
-          their inputs.
     """
+    self._inputs = inputs
+    self._has_bias = has_bias
     self._outputs_grads = outputs_grads
-    self._colocate_cov_ops_with_inputs = colocate_cov_ops_with_inputs
     self._batch_size = array_ops.shape(inputs)[0]
-    self._orig_tensors_name = scope_string_from_params((inputs,) +
-                                                       tuple(outputs_grads))
-
-    # Note that we precompute the required operations on the inputs since the
-    # inputs don't change with the 'idx' argument to _compute_new_cov.  (Only
-    # the target entry of _outputs_grads changes with idx.)
-    with _maybe_colocate_with(inputs, self._colocate_cov_ops_with_inputs):
-      if has_bias:
-        inputs = _append_homog(inputs)
-      self._squared_inputs = math_ops.square(inputs)
+    self._squared_inputs = None
 
     super(FullyConnectedDiagonalFactor, self).__init__()
 
   @property
   def _var_scope(self):
-    return "ff_diagfc/" + self._orig_tensors_name
+    return "ff_diagfc/" + scope_string_from_params(
+        (self._inputs,) + tuple(self._outputs_grads))
 
   @property
   def _cov_shape(self):
-    return [self._squared_inputs.shape[1], self._outputs_grads[0].shape[1]]
+    return [self._inputs.shape[1] + self._has_bias,
+            self._outputs_grads[0].shape[1]]
 
   @property
   def _num_sources(self):
     return len(self._outputs_grads)
 
+  @property
+  def _dtype(self):
+    return self._outputs_grads[0].dtype
+
   def _compute_new_cov(self, idx=0):
     # The well-known special formula that uses the fact that the entry-wise
     # square of an outer product is the outer-product of the entry-wise squares.
     # The gradient is the outer product of the input and the output gradients,
     # so we just square both and then take their outer-product.
-    with _maybe_colocate_with(self._squared_inputs,
-                              self._colocate_cov_ops_with_inputs):
+    with maybe_colocate_with(self._outputs_grads[idx]):
+      # We only need to compute squared_inputs once
+      if self._squared_inputs is None:
+        inputs = self._inputs
+        if self._has_bias:
+          inputs = append_homog(self._inputs)
+        self._squared_inputs = math_ops.square(inputs)
+
       new_cov = math_ops.matmul(
           self._squared_inputs,
           math_ops.square(self._outputs_grads[idx]),
@@ -540,16 +609,13 @@ class FullyConnectedDiagonalFactor(DiagonalFactor):
 class ConvDiagonalFactor(DiagonalFactor):
   """FisherFactor for a diagonal approx of a convolutional layer's Fisher."""
 
-  # TODO(jamesmartens): add units tests for this class
-
   def __init__(self,
                inputs,
                outputs_grads,
                filter_shape,
                strides,
                padding,
-               has_bias=False,
-               colocate_cov_ops_with_inputs=False):
+               has_bias=False):
     """Creates a ConvDiagonalFactor object.
 
     Args:
@@ -564,53 +630,64 @@ class ConvDiagonalFactor(DiagonalFactor):
       padding: The padding in this layer (1-D of Tensor length 4).
       has_bias: Python bool. If True, the layer is assumed to have a bias
         parameter in addition to its filter parameter.
-      colocate_cov_ops_with_inputs: Whether to colocate cov_update ops with
-          their inputs.
     """
+    self._inputs = inputs
     self._filter_shape = filter_shape
+    self._strides = strides
+    self._padding = padding
     self._has_bias = has_bias
     self._outputs_grads = outputs_grads
-    self._colocate_cov_ops_with_inputs = colocate_cov_ops_with_inputs
-
-    self._orig_tensors_name = scope_string_from_name((inputs,)
-                                                     + tuple(outputs_grads))
-
-    # Note that we precompute the required operations on the inputs since the
-    # inputs don't change with the 'idx' argument to _compute_new_cov.  (Only
-    # the target entry of _outputs_grads changes with idx.)
-    with _maybe_colocate_with(inputs, self._colocate_cov_ops_with_inputs):
-      filter_height, filter_width, _, _ = self._filter_shape
-      patches = array_ops.extract_image_patches(
-          inputs,
-          ksizes=[1, filter_height, filter_width, 1],
-          strides=strides,
-          rates=[1, 1, 1, 1],
-          padding=padding)
-
-      if has_bias:
-        patches = _append_homog(patches)
-
-      self._patches = patches
+    self._patches = None
 
     super(ConvDiagonalFactor, self).__init__()
 
   @property
   def _var_scope(self):
-    return "ff_convdiag/" + self._orig_tensors_name
+    return "ff_convdiag/" + scope_string_from_name(
+        (self._inputs,) + tuple(self._outputs_grads))
 
   @property
   def _cov_shape(self):
     filter_height, filter_width, in_channels, out_channels = self._filter_shape
-    return [filter_height * filter_width * in_channels + self._has_bias,
-            out_channels]
+    return [
+        filter_height * filter_width * in_channels + self._has_bias,
+        out_channels
+    ]
 
   @property
   def _num_sources(self):
     return len(self._outputs_grads)
 
+  @property
+  def _dtype(self):
+    return self._outputs_grads[0].dtype
+
+  def make_covariance_update_op(self, ema_decay):
+    with maybe_colocate_with(self._inputs):
+      filter_height, filter_width, _, _ = self._filter_shape
+
+      # TODO(b/64144716): there is potential here for a big savings in terms
+      # of memory use.
+      patches = array_ops.extract_image_patches(
+          self._inputs,
+          ksizes=[1, filter_height, filter_width, 1],
+          strides=self._strides,
+          rates=[1, 1, 1, 1],
+          padding=self._padding)
+
+      if self._has_bias:
+        patches = append_homog(patches)
+
+      self._patches = patches
+
+    op = super(ConvDiagonalFactor, self).make_covariance_update_op(ema_decay)
+
+    self._patches = None
+
+    return op
+
   def _compute_new_cov(self, idx=0):
-    with _maybe_colocate_with(self._outputs_grads[idx],
-                              self._colocate_cov_ops_with_inputs):
+    with maybe_colocate_with(self._outputs_grads[idx]):
       outputs_grad = self._outputs_grads[idx]
       batch_size = array_ops.shape(self._patches)[0]
 
@@ -634,23 +711,18 @@ class FullyConnectedKroneckerFactor(InverseProvidingFactor):
 
   def __init__(self,
                tensors,
-               has_bias=False,
-               colocate_cov_ops_with_inputs=False):
+               has_bias=False):
     """Instantiate FullyConnectedKroneckerFactor.
 
     Args:
       tensors: List of Tensors of shape [batch_size, n]. Represents either a
         layer's inputs or its output's gradients.
-      has_bias: bool. If True, assume this factor is for the layer's inputs and
-        append '1' to each row.
-      colocate_cov_ops_with_inputs: Whether to colocate cov_update ops with
-          their inputs.
+      has_bias: bool. If True, append '1' to each row.
     """
     # The tensor argument is either a tensor of input activations or a tensor of
     # output pre-activation gradients.
     self._has_bias = has_bias
     self._tensors = tensors
-    self._colocate_cov_ops_with_inputs = colocate_cov_ops_with_inputs
     super(FullyConnectedKroneckerFactor, self).__init__()
 
   @property
@@ -667,13 +739,16 @@ class FullyConnectedKroneckerFactor(InverseProvidingFactor):
   def _num_sources(self):
     return len(self._tensors)
 
+  @property
+  def _dtype(self):
+    return self._tensors[0].dtype
+
   def _compute_new_cov(self, idx=0):
-    with _maybe_colocate_with(self._tensors[idx],
-                              self._colocate_cov_ops_with_inputs):
+    with maybe_colocate_with(self._tensors[idx]):
       tensor = self._tensors[idx]
       if self._has_bias:
-        tensor = _append_homog(tensor)
-      return _compute_cov(tensor)
+        tensor = append_homog(tensor)
+      return compute_cov(tensor)
 
 
 class ConvInputKroneckerFactor(InverseProvidingFactor):
@@ -682,7 +757,7 @@ class ConvInputKroneckerFactor(InverseProvidingFactor):
   Estimates E[ a a^T ] where a is the inputs to a convolutional layer given
   example x. Expectation is taken over all examples and locations.
 
-  Equivalent to \Omega in https://arxiv.org/abs/1602.01407 for details. See
+  Equivalent to Omega in https://arxiv.org/abs/1602.01407 for details. See
   Section 3.1 Estimating the factors.
   """
 
@@ -691,8 +766,7 @@ class ConvInputKroneckerFactor(InverseProvidingFactor):
                filter_shape,
                strides,
                padding,
-               has_bias=False,
-               colocate_cov_ops_with_inputs=False):
+               has_bias=False):
     """Initializes ConvInputKroneckerFactor.
 
     Args:
@@ -704,15 +778,12 @@ class ConvInputKroneckerFactor(InverseProvidingFactor):
         width_stride, in_channel_stride].
       padding: str. Padding method for layer. "SAME" or "VALID".
       has_bias: bool. If True, append 1 to in_channel.
-      colocate_cov_ops_with_inputs: Whether to colocate cov_update ops with
-          their inputs.
     """
     self._filter_shape = filter_shape
     self._strides = strides
     self._padding = padding
     self._has_bias = has_bias
     self._inputs = inputs
-    self._colocate_cov_ops_with_inputs = colocate_cov_ops_with_inputs
     super(ConvInputKroneckerFactor, self).__init__()
 
   @property
@@ -732,13 +803,19 @@ class ConvInputKroneckerFactor(InverseProvidingFactor):
   def _num_sources(self):
     return 1
 
+  @property
+  def _dtype(self):
+    return self._inputs.dtype
+
   def _compute_new_cov(self, idx=0):
     if idx != 0:
       raise ValueError("ConvInputKroneckerFactor only supports idx = 0")
 
-    # TODO(jamesmartens): factor this patches stuff out into a utility function
-    with _maybe_colocate_with(self._inputs, self._colocate_cov_ops_with_inputs):
+    with maybe_colocate_with(self._inputs):
       filter_height, filter_width, in_channels, _ = self._filter_shape
+
+      # TODO(b/64144716): there is potential here for a big savings in terms of
+      # memory use.
       patches = array_ops.extract_image_patches(
           self._inputs,
           ksizes=[1, filter_height, filter_width, 1],
@@ -747,12 +824,24 @@ class ConvInputKroneckerFactor(InverseProvidingFactor):
           padding=self._padding)
 
       flatten_size = (filter_height * filter_width * in_channels)
+      # patches_flat below is the matrix [[A_l]] from the KFC paper (tilde
+      # omitted over A for clarity). It has shape M|T| x J|Delta| (eq. 14),
+      # where M = minibatch size, |T| = number of spatial locations,
+      # |Delta| = number of spatial offsets, and J = number of input maps
+      # for convolutional layer l.
       patches_flat = array_ops.reshape(patches, [-1, flatten_size])
-
+      # We append a homogenous coordinate to patches_flat if the layer has
+      # bias parameters. This gives us [[A_l]]_H from the paper.
       if self._has_bias:
-        patches_flat = _append_homog(patches_flat)
-
-      return _compute_cov(patches_flat)
+        patches_flat = append_homog(patches_flat)
+      # We call compute_cov without passing in a normalizer. compute_cov uses
+      # the first dimension of patches_flat i.e. M|T| as the normalizer by
+      # default. Hence we end up computing 1/M|T| * [[A_l]]^T [[A_l]], with
+      # shape J|Delta| x J|Delta|. This is related to hat{Omega}_l from
+      # the paper but has a different scale here for consistency with
+      # ConvOutputKroneckerFactor.
+      # (Tilde omitted over A for clarity.)
+      return compute_cov(patches_flat)
 
 
 class ConvOutputKroneckerFactor(InverseProvidingFactor):
@@ -762,22 +851,19 @@ class ConvOutputKroneckerFactor(InverseProvidingFactor):
   given example x and ds = (d / d s) log(p(y|x, w)). Expectation is taken over
   all examples and locations.
 
-  Equivalent to \Gamma in https://arxiv.org/abs/1602.01407 for details. See
+  Equivalent to Gamma in https://arxiv.org/abs/1602.01407 for details. See
   Section 3.1 Estimating the factors.
   """
 
-  def __init__(self, outputs_grads, colocate_cov_ops_with_inputs=False):
+  def __init__(self, outputs_grads):
     """Initializes ConvOutputKroneckerFactor.
 
     Args:
       outputs_grads: list of Tensors. Each Tensor is of shape
           [batch_size, height, width, out_channels].
-      colocate_cov_ops_with_inputs: Whether to colocate cov_update ops with
-          their inputs.
     """
     self._out_channels = outputs_grads[0].shape.as_list()[3]
     self._outputs_grads = outputs_grads
-    self._colocate_cov_ops_with_inputs = colocate_cov_ops_with_inputs
     super(ConvOutputKroneckerFactor, self).__init__()
 
   @property
@@ -793,9 +879,292 @@ class ConvOutputKroneckerFactor(InverseProvidingFactor):
   def _num_sources(self):
     return len(self._outputs_grads)
 
+  @property
+  def _dtype(self):
+    return self._outputs_grads[0].dtype
+
   def _compute_new_cov(self, idx=0):
-    with _maybe_colocate_with(self._outputs_grads[idx],
-                              self._colocate_cov_ops_with_inputs):
+    with maybe_colocate_with(self._outputs_grads[idx]):
+      # reshaped_tensor below is the matrix DS_l defined in the KFC paper
+      # (tilde omitted over S for clarity). It has shape M|T| x I, where
+      # M = minibatch size, |T| = number of spatial locations, and
+      # I = number of output maps for convolutional layer l.
       reshaped_tensor = array_ops.reshape(self._outputs_grads[idx],
                                           [-1, self._out_channels])
-      return _compute_cov(reshaped_tensor)
+      # Following the reasoning in ConvInputKroneckerFactor._compute_new_cov,
+      # compute_cov here returns 1/M|T| * DS_l^T DS_l = hat{Gamma}_l
+      # as defined in the paper, with shape I x I.
+      # (Tilde omitted over S for clarity.)
+      return compute_cov(reshaped_tensor)
+
+
+class FullyConnectedMultiKF(InverseProvidingFactor):
+  """Kronecker factor for a fully connected recurrent layer."""
+
+  def __init__(self,
+               tensor_lists,
+               has_bias=False):
+    """Constructs a new `FullyConnectedMultiKF`.
+
+    Args:
+      tensor_lists: List of lists of Tensors of shape [batch_size, n].
+      has_bias: bool. If True, '1' is appended to each row.
+    """
+
+    self._tensor_lists = tensor_lists
+    self._has_bias = has_bias
+    self._batch_size = array_ops.shape(tensor_lists[0][0])[0]
+    self._num_timesteps = len(tensor_lists[0])
+    self._tensors = [None] * len(tensor_lists)
+
+    self._cov_dt1 = None
+    self._option1quants_by_damping = {}
+    self._option2quants_by_damping = {}
+
+    super(FullyConnectedMultiKF, self).__init__()
+
+  @property
+  def _var_scope(self):
+    return "ff_fc_multi/" + scope_string_from_params(self._tensor_lists)
+
+  @property
+  def _num_sources(self):
+    return len(self._tensor_lists)
+
+  @property
+  def _dtype(self):
+    return self._tensor_lists[0][0].dtype
+
+  def make_covariance_update_op(self, ema_decay):
+
+    op = super(FullyConnectedMultiKF, self).make_covariance_update_op(ema_decay)
+
+    if self._cov_dt1 is not None:
+      new_cov_dt1_contribs = tuple(self._compute_new_cov_dt1(idx)
+                                   for idx in range(self._num_sources))
+
+      with maybe_colocate_with(new_cov_dt1_contribs[0]):
+        new_cov_dt1 = math_ops.add_n(new_cov_dt1_contribs)
+
+        op2 = moving_averages.assign_moving_average(
+            self._cov_dt1, new_cov_dt1, ema_decay, zero_debias=ZERO_DEBIAS)
+
+        # TODO(b/69112164):
+        # It's important that _cov and _cov_dt1 remain consistent with each
+        # other while the inverse ops are happening. How can we ensure this?
+        # We will need to add explicit synchronization for this to
+        # work with asynchronous training.
+        op = control_flow_ops.group(op, op2)
+
+    return op
+
+  def _compute_new_cov(self, idx=0):
+    with maybe_colocate_with(self._tensor_lists[idx]):
+      tensor = array_ops.concat(self._tensor_lists[idx], 0)
+      if self._has_bias:
+        tensor = append_homog(tensor)
+      # We save these so they can be used by _compute_new_cov_dt1
+      self._tensors[idx] = tensor
+      return compute_cov(tensor)
+
+  def _compute_new_cov_dt1(self, idx=0):
+    tensor = self._tensors[idx]
+    with maybe_colocate_with(tensor):
+      # Is there a more elegant way to do this computation?
+      tensor_present = tensor[:-self._batch_size, :]
+      tensor_future = tensor[self._batch_size:, :]
+      # We specify a normalizer for this computation to ensure a PSD Fisher
+      # block estimate.  This is equivalent to padding with zeros, as was done
+      # in Section B.2 of the appendix.
+      normalizer = self._num_timesteps * self._batch_size
+      return compute_cov(
+          tensor_future, tensor_right=tensor_present, normalizer=normalizer)
+
+  @property
+  def _cov_shape(self):
+    size = self._tensor_lists[0][0].shape[1] + self._has_bias
+    return [size, size]
+
+  @property
+  def _vec_shape(self):
+    size = self._tensor_lists[0][0].shape[1] + self._has_bias
+    return [size]
+
+  def get_option1quants(self, damping):
+    return self._option1quants_by_damping[damping]
+
+  def get_option2quants(self, damping):
+    return self._option2quants_by_damping[damping]
+
+  def get_cov_dt1(self):
+    assert self._cov_dt1 is not None
+    return self._cov_dt1
+
+  def register_cov_dt1(self):
+    """Create a variable representing temporal cross-covariance.
+
+    (This is technically the second moment, not covariance, since it's
+    not mean subtracted.)
+    """
+    if self._cov_dt1 is None:
+      with variable_scope.variable_scope(self._var_scope):
+        self._cov_dt1 = variable_scope.get_variable(
+            "cov_dt1",
+            initializer=init_ops.zeros_initializer,
+            shape=self._cov_shape,
+            trainable=False,
+            dtype=self._dtype)
+
+  def register_option1quants(self, damping):
+
+    self.register_cov_dt1()
+
+    if damping not in self._option1quants_by_damping:
+      # It's questionable as to whether we should initialize with stuff like
+      # this at all.  Ideally these values should never be used until they are
+      # updated at least once.
+      damping_string = scalar_or_tensor_to_string(damping)
+      with variable_scope.variable_scope(self._var_scope):
+        Lmat = variable_scope.get_variable(  # pylint: disable=invalid-name
+            "Lmat_damp{}".format(damping_string),
+            initializer=inverse_initializer,
+            shape=self._cov_shape,
+            trainable=False,
+            dtype=self._dtype)
+        psi = variable_scope.get_variable(
+            "psi_damp{}".format(damping_string),
+            initializer=init_ops.ones_initializer,
+            shape=self._vec_shape,
+            trainable=False,
+            dtype=self._dtype)
+
+      self._option1quants_by_damping[damping] = (Lmat, psi)
+
+  def register_option2quants(self, damping):
+
+    self.register_cov_dt1()
+
+    if damping not in self._option2quants_by_damping:
+      # It's questionable as to whether we should initialize with stuff like
+      # this at all.  Ideally these values should never be used until they are
+      # updated at least once.
+      damping_string = scalar_or_tensor_to_string(damping)
+      with variable_scope.variable_scope(self._var_scope):
+        Pmat = variable_scope.get_variable(  # pylint: disable=invalid-name
+            "Lmat_damp{}".format(damping_string),
+            initializer=inverse_initializer,
+            shape=self._cov_shape,
+            trainable=False,
+            dtype=self._dtype)
+        Kmat = variable_scope.get_variable(  # pylint: disable=invalid-name
+            "Kmat_damp{}".format(damping_string),
+            initializer=inverse_initializer,
+            shape=self._cov_shape,
+            trainable=False,
+            dtype=self._dtype)
+        mu = variable_scope.get_variable(
+            "mu_damp{}".format(damping_string),
+            initializer=init_ops.ones_initializer,
+            shape=self._vec_shape,
+            trainable=False,
+            dtype=self._dtype)
+
+      self._option2quants_by_damping[damping] = (Pmat, Kmat, mu)
+
+  def make_inverse_update_ops(self):
+    """Create and return update ops corresponding to registered computations."""
+    # TODO(b/69918258): Add correctness tests for this method.
+    # pylint: disable=invalid-name
+
+    ops = super(FullyConnectedMultiKF, self).make_inverse_update_ops()
+
+    if (len(self._option1quants_by_damping) +
+        len(self._option2quants_by_damping)):
+
+      # Note that C0 and C1 are stand-ins for A0 and A1, or G0 and G1, from
+      # the pseudo-code in the original paper.  Because the computations for
+      # the A and G case are essentially the same they can both be performed by
+      # the same class (this one).
+
+      C1 = self.get_cov_dt1()
+
+      # Get the eigendecomposition of C0  (= self.get_cov())
+      eigen_e, eigen_V = self.get_eigendecomp()
+
+      # TODO(b/69678661): Note, there is an implicit assumption here that C1
+      # and C0 (as represented here by its eigen-decomp) are consistent.  This
+      # could fail to be the case if self._cov and self._cov_dt1 are not updated
+      # consistently, or are somehow read between or during the cov updates.
+      # Can this possibly happen?  Is there a way to prevent it?
+
+      for damping, (Lmat_var,
+                    psi_var) in self._option1quants_by_damping.items():
+
+        invsqrtC0 = math_ops.matmul(
+            eigen_V * (eigen_e + damping)**(-0.5), eigen_V, transpose_b=True)
+
+        # Might need to enforce symmetry lost due to numerical issues.
+        invsqrtC0 = (invsqrtC0 + array_ops.transpose(invsqrtC0)) / 2.0
+
+        # The following line imposses the symmetry assumed by "Option 1" on C1.
+        # Stangely the code can work okay with this line commented out,
+        # depending on how psd_eig is defined.  I'm not sure why.
+        C1 = (C1 + array_ops.transpose(C1)) / 2.0
+
+        # hPsi = C0^(-1/2) * C1 * C0^(-1/2)  (hPsi means hat{Psi})
+        hPsi = math_ops.matmul(math_ops.matmul(invsqrtC0, C1), invsqrtC0)
+
+        # Compute the decomposition U*diag(psi)*U^T = hPsi
+        psi, U = utils.posdef_eig(hPsi)
+
+        # L = C0^(-1/2) * U
+        Lmat = math_ops.matmul(invsqrtC0, U)
+
+        ops.append(Lmat_var.assign(Lmat))
+        ops.append(psi_var.assign(psi))
+
+      for damping, (Pmat_var, Kmat_var,
+                    mu_var) in self._option2quants_by_damping.items():
+
+        # compute C0^(-1/2)
+        invsqrtC0 = math_ops.matmul(
+            eigen_V * (eigen_e + damping)**(-0.5), eigen_V, transpose_b=True)
+
+        # Might need to enforce symmetry lost due to numerical issues.
+        invsqrtC0 = (invsqrtC0 + array_ops.transpose(invsqrtC0)) / 2.0
+
+        # Compute the product C0^(-1/2) * C1
+        invsqrtC0C1 = math_ops.matmul(invsqrtC0, C1)
+
+        # hPsi = C0^(-1/2) * C1 * C0^(-1/2)  (hPsi means hat{Psi})
+        hPsi = math_ops.matmul(invsqrtC0C1, invsqrtC0)
+
+        # Compute the decomposition E*diag(mu)*E^T = hPsi^T * hPsi
+        # Note that we using the notation mu instead of "m" for the eigenvalues.
+        # Instead of computing the product hPsi^T * hPsi and then doing an
+        # eigen-decomposition of this we just compute the SVD of hPsi and then
+        # square the singular values to get the eigenvalues. For a justification
+        # of this approach, see:
+        # https://en.wikipedia.org/wiki/Singular-value_decomposition#Relation_to_eigenvalue_decomposition
+        sqrtmu, _, E = linalg_ops.svd(hPsi)
+        mu = math_ops.square(sqrtmu)
+
+        # Mathematically, the eigenvalues should not should not exceed 1.0, but
+        # due to numerical issues, or possible issues with inconsistent
+        # values of C1 and (the eigen-decomposition of) C0 they might. So
+        # we enforce this condition.
+        mu = math_ops.minimum(mu, 1.0)
+
+        # P = (C0^(-1/2) * C1)^T * C0^(-1/2) = C_1^T * C_0^(-1)
+        Pmat = math_ops.matmul(invsqrtC0C1, invsqrtC0, transpose_a=True)
+
+        # K = C_0^(-1/2) * E
+        Kmat = math_ops.matmul(invsqrtC0, E)
+
+        ops.append(Pmat_var.assign(Pmat))
+        ops.append(Kmat_var.assign(Kmat))
+        ops.append(mu_var.assign(mu))
+
+    return [control_flow_ops.group(*ops)]
+
+    # pylint: enable=invalid-name
diff --git a/tensorflow/contrib/kfac/python/ops/fisher_factors_lib.py b/tensorflow/contrib/kfac/python/ops/fisher_factors_lib.py
index 23ee93cd405bbf719939df89d525c812ee061f8b..ad93919149c287b1932dd2b6bd772c0dab26192d 100644
--- a/tensorflow/contrib/kfac/python/ops/fisher_factors_lib.py
+++ b/tensorflow/contrib/kfac/python/ops/fisher_factors_lib.py
@@ -41,6 +41,9 @@ _allowed_symbols = [
     "ConvOutputKroneckerFactor",
     "ConvDiagonalFactor",
     "set_global_constants",
+    "maybe_colocate_with",
+    "compute_cov",
+    "append_homog"
 ]
 
 remove_undocumented(__name__, allowed_exception_list=_allowed_symbols)
diff --git a/tensorflow/contrib/kfac/python/ops/layer_collection.py b/tensorflow/contrib/kfac/python/ops/layer_collection.py
index 3a005ee39dd9400c21ae6c41fad5351d7fff2aac..8d450f04f379701e46a18b2e34bbbd6fcfcce2bb 100644
--- a/tensorflow/contrib/kfac/python/ops/layer_collection.py
+++ b/tensorflow/contrib/kfac/python/ops/layer_collection.py
@@ -26,7 +26,9 @@ from __future__ import print_function
 
 from collections import defaultdict
 from collections import OrderedDict
+from functools import partial
 
+import math
 import six
 
 from tensorflow.contrib.kfac.python.ops import fisher_blocks as fb
@@ -57,20 +59,22 @@ _CONV2D_APPROX_TO_BLOCK_TYPES = {
     APPROX_DIAGONAL_NAME: fb.ConvDiagonalFB,
 }
 
+APPROX_KRONECKER_INDEP_NAME = "kron_indep"
+APPROX_KRONECKER_SERIES_1_NAME = "kron_series_1"
+APPROX_KRONECKER_SERIES_2_NAME = "kron_series_2"
+
+_FULLY_CONNECTED_MULTI_APPROX_TO_BLOCK_TYPES = {
+    APPROX_KRONECKER_INDEP_NAME: fb.FullyConnectedMultiIndepFB,
+    APPROX_KRONECKER_SERIES_1_NAME: partial(fb.FullyConnectedSeriesFB,
+                                            option=1),
+    APPROX_KRONECKER_SERIES_2_NAME: partial(fb.FullyConnectedSeriesFB,
+                                            option=2)
+}
+
 # Possible value for 'reuse' keyword argument. Sets 'reuse' to
 # tf.get_variable_scope().reuse.
 VARIABLE_SCOPE = "VARIABLE_SCOPE"
 
-# TODO(jamesmartens): need to add find_canonical_output back into this somewhere
-
-
-def ensure_sequence(obj):
-  """If `obj` isn't a tuple or list, return a tuple containing `obj`."""
-  if isinstance(obj, (tuple, list)):
-    return obj
-  else:
-    return (obj,)
-
 
 class LayerParametersDict(OrderedDict):
   """An OrderedDict where keys are Tensors or tuples of Tensors.
@@ -130,7 +134,6 @@ class LayerCollection(object):
 
   def __init__(self,
                graph=None,
-               colocate_cov_ops_with_inputs=False,
                name="LayerCollection"):
     self.fisher_blocks = LayerParametersDict()
     self.fisher_factors = OrderedDict()
@@ -142,7 +145,8 @@ class LayerCollection(object):
     self._default_generic_approximation = APPROX_FULL_NAME
     self._default_fully_connected_approximation = APPROX_KRONECKER_NAME
     self._default_convolution_2d_approximation = APPROX_KRONECKER_NAME
-    self._colocate_cov_ops_with_inputs = colocate_cov_ops_with_inputs
+    self._default_fully_connected_multi_approximation = (
+        APPROX_KRONECKER_SERIES_2_NAME)
 
     with variable_scope.variable_scope(None, default_name=name) as scope:
       self._var_scope = scope.name
@@ -152,19 +156,13 @@ class LayerCollection(object):
     """LossFunctions registered with this LayerCollection."""
     return list(self._loss_dict.values())
 
-  def is_variable_registered(self, variable):
-    """Checks whether the variable has already been registered.
-
-    Args:
-      variable: A single variable or tensor.
-    Returns:
-      True if the variable has been registered either by itself or as part of a
-      tuple.
-    """
-    return any([
-        variable in key if isinstance(key, (tuple, list)) else variable == key
-        for key in self.fisher_blocks.keys()
-    ])
+  @property
+  def registered_variables(self):
+    """A tuple of all of the variables currently registered."""
+    tuple_of_tuples = (utils.ensure_sequence(key) for key, block
+                       in six.iteritems(self.fisher_blocks))
+    flat_tuple = tuple(item for tuple_ in tuple_of_tuples for item in tuple_)
+    return flat_tuple
 
   @property
   def linked_parameters(self):
@@ -213,6 +211,16 @@ class LayerCollection(object):
               value))
     self._default_convolution_2d_approximation = value
 
+  @property
+  def default_fully_connected_multi_approximation(self):
+    return self._default_fully_connected_multi_approximation
+
+  def set_default_fully_connected_multi_approximation(self, value):
+    if value not in _FULLY_CONNECTED_MULTI_APPROX_TO_BLOCK_TYPES:
+      raise ValueError("{} is not a valid approximation for a fully-connected "
+                       "multi layer.".format(value))
+    self._default_fully_connected_multi_approximation = value
+
   def register_block(self, layer_key, fisher_block, reuse=VARIABLE_SCOPE):
     """Validates and registers the layer_key associated with the fisher_block.
 
@@ -221,7 +229,7 @@ class LayerCollection(object):
           existing registrations and to register if valid.
       fisher_block: The associated `FisherBlock`.
       reuse: Method to use for inserting new `FisherBlock`s. One of True, False,
-        or VARIABLE_SCOPE.
+        or 'VARIABLE_SCOPE'.
 
     Raises:
       ValueError: If `layer_key` was already registered and reuse is `False`,
@@ -258,9 +266,9 @@ class LayerCollection(object):
     variable_to_block = {
         var: (params, block)
         for (params, block) in self.fisher_blocks.items()
-        for var in ensure_sequence(params)
+        for var in utils.ensure_sequence(params)
     }
-    for variable in ensure_sequence(layer_key):
+    for variable in utils.ensure_sequence(layer_key):
       if variable in variable_to_block:
         prev_key, prev_block = variable_to_block[variable]
         raise ValueError(
@@ -272,13 +280,65 @@ class LayerCollection(object):
 
   def get_use_count_map(self):
     """Returns a dict of variables to their number of registrations."""
+    # TODO(b/70283403): Reimplement this in the old way, where each
+    # registration function would be responsible for incrementing the count.
+    # Also, this version has a bug: it won't do the right thing for generic
+    # registration for parameters that are shared.  i.e. it won't set the use
+    # count to infinity.
     vars_to_uses = defaultdict(int)
     for key, block in six.iteritems(self.fisher_blocks):
-      key = key if isinstance(key, (tuple, list)) else (key,)
+      n = (
+          block.num_inputs()*block.num_registered_minibatches if isinstance(
+              block, (fb.FullyConnectedSeriesFB, fb.FullyConnectedMultiIndepFB))
+          else block.num_registered_minibatches)
+      key = utils.ensure_sequence(key)
       for k in key:
-        vars_to_uses[k] += block.num_registered_minibatches
+        vars_to_uses[k] += n
     return vars_to_uses
 
+  def check_registration(self, variables):
+    """Checks that all variable uses have been registered properly.
+
+    Args:
+      variables: List of variables.
+
+    Raises:
+      ValueError: If any registered variables are not included in the list.
+      ValueError: If any variable in the list is not registered.
+      ValueError: If any variable in the list is registered with the wrong
+          number of "uses" in the subgraph recorded (vs the number of times that
+          variable is actually used in the subgraph).
+    """
+    # Note that overlapping parameters (i.e. those that share variables) will
+    # be caught by layer_collection.LayerParametersDict during registration.
+
+    reg_use_map = self.get_use_count_map()
+
+    error_messages = []
+
+    for var in variables:
+      total_uses = self.subgraph.variable_uses(var)
+      reg_uses = reg_use_map[var]
+
+      if reg_uses == 0:
+        error_messages.append("Variable {} not registered.".format(var))
+      elif (not math.isinf(reg_uses)) and reg_uses != total_uses:
+        error_messages.append(
+            "Variable {} registered with wrong number of uses ({} "
+            "registrations vs {} uses).".format(var, reg_uses, total_uses))
+
+    num_get_vars = len(reg_use_map)
+
+    if num_get_vars > len(variables):
+      error_messages.append("{} registered variables were not included in list."
+                            .format(num_get_vars - len(variables)))
+
+    if error_messages:
+      error_messages = [
+          "Found the following errors with variable registration:"
+      ] + error_messages
+      raise ValueError("\n\t".join(error_messages))
+
   def get_blocks(self):
     return self.fisher_blocks.values()
 
@@ -312,12 +372,12 @@ class LayerCollection(object):
       ValueError: If the parameters were already registered in a layer or
         identified as part of an incompatible group.
     """
-    params = frozenset(ensure_sequence(params))
+    params = frozenset(utils.ensure_sequence(params))
 
     # Check if any of the variables in 'params' is already in
     # 'self.fisher_blocks.keys()'.
     for registered_params, fisher_block in self.fisher_blocks.items():
-      registered_params_set = set(ensure_sequence(registered_params))
+      registered_params_set = set(utils.ensure_sequence(registered_params))
       for variable in params:
         if (variable in registered_params_set and
             params != registered_params_set):
@@ -351,7 +411,7 @@ class LayerCollection(object):
 
   def _get_linked_approx(self, params):
     """If params were linked, return their specified approximation."""
-    params_set = frozenset(ensure_sequence(params))
+    params_set = frozenset(utils.ensure_sequence(params))
     if params_set in self.linked_parameters:
       return self.linked_parameters[params_set]
     else:
@@ -370,11 +430,11 @@ class LayerCollection(object):
         this layer. Weight matrix should have shape [input_size, output_size].
         Bias should have shape [output_size].
       inputs: Tensor of shape [batch_size, input_size]. Inputs to layer.
-      outputs: Tensor of shape [batch_size, output_size]. Preactivations
+      outputs: Tensor of shape [batch_size, output_size]. Outputs
         produced by layer.
-      approx: str. One of APPROX_KRONECKER_NAME or APPROX_DIAGONAL_NAME.
+      approx: str. One of "kron" or "diagonal".
       reuse: bool or str.  If True, reuse an existing FisherBlock. If False,
-        create a new FisherBlock.  If VARIABLE_SCOPE, use
+        create a new FisherBlock.  If "VARIABLE_SCOPE", use
         tf.get_variable_scope().reuse.
 
     Raises:
@@ -416,10 +476,10 @@ class LayerCollection(object):
       inputs: Tensor of shape [batch_size, height, width, in_channels]. Inputs
         to layer.
       outputs: Tensor of shape [batch_size, height, width, out_channels].
-        Preactivations produced by layer.
-      approx: str. One of APPROX_KRONECKER_NAME or APPROX_DIAGONAL_NAME.
+        Output produced by layer.
+      approx: str. One of "kron" or "diagonal".
       reuse: bool or str.  If True, reuse an existing FisherBlock. If False,
-        create a new FisherBlock.  If VARIABLE_SCOPE, use
+        create a new FisherBlock.  If "VARIABLE_SCOPE", use
         tf.get_variable_scope().reuse.
 
     Raises:
@@ -449,14 +509,11 @@ class LayerCollection(object):
     """Registers a generic layer.
 
     Args:
-      params: Tensor or 2-tuple of Tensors corresponding to weight and bias of
-        this layer. Weight matrix should have shape [kernel_height,
-        kernel_width, in_channels, out_channels].  Bias should have shape
-        [out_channels].
+      params: Tensor or tuple of Tensors corresponding to the parameters.
       batch_size: 0-D Tensor. Size of the minibatch.
-      approx: str. One of APPROX_KRONECKER_NAME or APPROX_DIAGONAL_NAME.
+      approx: str. One of "full" or "diagonal".
       reuse: bool or str.  If True, reuse an existing FisherBlock. If False,
-        create a new FisherBlock.  If VARIABLE_SCOPE, use
+        create a new FisherBlock.  If "VARIABLE_SCOPE", use
         tf.get_variable_scope().reuse.
 
     Raises:
@@ -477,6 +534,47 @@ class LayerCollection(object):
     block = self.register_block(params, block_type(self, params), reuse=reuse)
     block.register_additional_minibatch(batch_size)
 
+  def register_fully_connected_multi(self, params, inputs, outputs,
+                                     approx=None):
+    """Register fully connected layers with shared parameters.
+
+    This can handle general fully-connected layers with shared parameters, but
+    has specialized approximations to deal with the case where there is a
+    meaningful linear order to the share instances (such as in an RNN).
+
+    Args:
+      params: Tensor or 2-tuple of Tensors corresponding to weight and bias of
+        this layer. Weight matrix should have shape [input_size, output_size].
+        Bias should have shape [output_size].
+      inputs: A list of tensors, each of shape [batch_size, input_size]. Inputs
+        to layer. In the case of RNNs, one Tensor per time step.
+      outputs: A list of tensors, the same length as 'inputs', each of shape
+        [batch_size, output_size]. Outputs produced by layer. In the case of
+        RNNs, one Tensor per time step.
+      approx: str. One of "kron_indep", "kron_series_1", or "kron_series_2".
+
+    Raises:
+      ValueError: For improper value to 'approx'.
+    """
+    if approx is None:
+      approx = self._get_linked_approx(params)
+      if approx is None:
+        approx = self.default_fully_connected_multi_approximation
+    has_bias = isinstance(params, (tuple, list))
+
+    # TODO(b/70283649): something along the lines of find_canonical_output
+    # should be added back in here (and for the other block types, arguably).
+
+    if approx not in _FULLY_CONNECTED_MULTI_APPROX_TO_BLOCK_TYPES:
+      raise ValueError("Bad value {} for approx.".format(approx))
+    block_type = _FULLY_CONNECTED_MULTI_APPROX_TO_BLOCK_TYPES[approx]
+
+    # For now we don't support multiple minibatches for this type of layer, so
+    # we set reuse=False
+    self.register_block(params,
+                        block_type(self, inputs, outputs, has_bias=has_bias),
+                        reuse=False)
+
   def register_categorical_predictive_distribution(self,
                                                    logits,
                                                    seed=None,
@@ -619,7 +717,6 @@ class LayerCollection(object):
 
     key = cls, args
     if key not in self.fisher_factors:
-      colo = self._colocate_cov_ops_with_inputs
       with variable_scope.variable_scope(self._var_scope):
-        self.fisher_factors[key] = cls(*args, colocate_cov_ops_with_inputs=colo)
+        self.fisher_factors[key] = cls(*args)
     return self.fisher_factors[key]
diff --git a/tensorflow/contrib/kfac/python/ops/layer_collection_lib.py b/tensorflow/contrib/kfac/python/ops/layer_collection_lib.py
index d6bf61a210203dd74d4e93b65005f660b1fab4ff..f8aa230d9ca1f542950f56b1e6cf1ab7ccd3d05f 100644
--- a/tensorflow/contrib/kfac/python/ops/layer_collection_lib.py
+++ b/tensorflow/contrib/kfac/python/ops/layer_collection_lib.py
@@ -36,6 +36,9 @@ _allowed_symbols = [
     "APPROX_DIAGONAL_NAME",
     "APPROX_FULL_NAME",
     "VARIABLE_SCOPE",
+    "APPROX_KRONECKER_INDEP_NAME",
+    "APPROX_KRONECKER_SERIES_1_NAME",
+    "APPROX_KRONECKER_SERIES_2_NAME"
 ]
 
 remove_undocumented(__name__, allowed_exception_list=_allowed_symbols)
diff --git a/tensorflow/contrib/kfac/python/ops/loss_functions.py b/tensorflow/contrib/kfac/python/ops/loss_functions.py
index e2e5bc3ffea3e52087c24802948bc8260e3b199a..2daead2a7180fe57b715bd896303cd4c3fbdaca8 100644
--- a/tensorflow/contrib/kfac/python/ops/loss_functions.py
+++ b/tensorflow/contrib/kfac/python/ops/loss_functions.py
@@ -22,6 +22,7 @@ import abc
 
 import six
 
+from tensorflow.contrib.distributions.python.ops import onehot_categorical
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
@@ -91,13 +92,13 @@ class LossFunction(object):
 
   @abc.abstractmethod
   def _evaluate(self, targets):
-    """Evaluates the log probability of the targets.
+    """Evaluates the negative log probability of the targets.
 
     Args:
       targets: Tensor that distribution can calculate log_prob() of.
 
     Returns:
-      log probability of each target, summed across all targets.
+      negative log probability of each target, summed across all targets.
     """
     pass
 
@@ -785,3 +786,16 @@ def insert_slice_in_zeros(slice_to_insert, dim, dim_size, position):
   after[dim] = dim_size - position - 1
 
   return array_ops.pad(slice_to_insert, list(zip(before, after)))
+
+
+class OnehotCategoricalLogitsNegativeLogProbLoss(
+    CategoricalLogitsNegativeLogProbLoss):
+  """Neg log prob loss for a categorical distribution with onehot targets.
+
+  Identical to CategoricalLogitsNegativeLogProbLoss except that the underlying
+  distribution is OneHotCategorical as opposed to Categorical.
+  """
+
+  @property
+  def dist(self):
+    return onehot_categorical.OneHotCategorical(logits=self._logits)
diff --git a/tensorflow/contrib/kfac/python/ops/loss_functions_lib.py b/tensorflow/contrib/kfac/python/ops/loss_functions_lib.py
index e9bb4f14e9e24128382832fcdaccdc9b24017046..705a871d482565897e7ac850327729a6186f1746 100644
--- a/tensorflow/contrib/kfac/python/ops/loss_functions_lib.py
+++ b/tensorflow/contrib/kfac/python/ops/loss_functions_lib.py
@@ -31,6 +31,7 @@ _allowed_symbols = [
     "NormalMeanNegativeLogProbLoss",
     "NormalMeanVarianceNegativeLogProbLoss",
     "CategoricalLogitsNegativeLogProbLoss",
+    "OnehotCategoricalLogitsNegativeLogProbLoss",
     "MultiBernoulliNegativeLogProbLoss",
     "MultiBernoulliNegativeLogProbLoss",
     "insert_slice_in_zeros",
diff --git a/tensorflow/contrib/kfac/python/ops/optimizer.py b/tensorflow/contrib/kfac/python/ops/optimizer.py
index ecf7f3e4e5ab7d9c151f760fdab733bc3830e37b..1974b07acfc879dc4bc844db9af88fd1043d6698 100644
--- a/tensorflow/contrib/kfac/python/ops/optimizer.py
+++ b/tensorflow/contrib/kfac/python/ops/optimizer.py
@@ -41,12 +41,12 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer):
                damping,
                layer_collection,
                var_list=None,
-               momentum=0.,
+               momentum=0.9,
                momentum_type="regular",
                norm_constraint=None,
                name="KFAC",
                estimation_mode="gradients",
-               colocate_gradients_with_ops=False,
+               colocate_gradients_with_ops=True,
                cov_devices=None,
                inv_devices=None):
     """Initializes the KFAC optimizer with the given settings.
@@ -70,8 +70,8 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer):
       var_list: Optional list or tuple of variables to train. Defaults to the
           list of variables collected in the graph under the key
           `GraphKeys.TRAINABLE_VARIABLES`.
-      momentum: The momentum value for this optimizer. Only applies when
-          momentum_type is 'regular' or 'adam'. (Default: 0)
+      momentum: The momentum decay constant to use. Only applies when
+          momentum_type is 'regular' or 'adam'. (Default: 0.9)
       momentum_type: The type of momentum to use in this optimizer, one of
           'regular', 'adam', or 'qmodel'. (Default: 'regular')
       norm_constraint: float or Tensor. If specified, the update is scaled down
@@ -85,6 +85,7 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer):
           more a more detailed description of these options.
       colocate_gradients_with_ops: Whether we should request gradients we
           compute in the estimator be colocated with their respective ops.
+          (Default: True)
       cov_devices: Iterable of device strings (e.g. '/gpu:0'). Covariance
           computations will be placed on these devices in a round-robin fashion.
           Can be None, which means that no devices are specified.
@@ -136,12 +137,32 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer):
     self._batch_size = array_ops.shape(layer_collection.losses[0].inputs)[0]
     self._losses = layer_collection.losses
 
-    self.cov_update_op = self._fisher_est.cov_update_op
-    self.inv_update_op = self._fisher_est.inv_update_op
-    self.inv_updates_dict = self._fisher_est.inv_updates_dict
-
     super(KfacOptimizer, self).__init__(learning_rate, name=name)
 
+  @property
+  def cov_update_thunks(self):
+    return self._fisher_est.cov_update_thunks
+
+  @property
+  def cov_update_ops(self):
+    return self._fisher_est.cov_update_ops
+
+  @property
+  def cov_update_op(self):
+    return self._fisher_est.cov_update_op
+
+  @property
+  def inv_update_thunks(self):
+    return self._fisher_est.inv_update_thunks
+
+  @property
+  def inv_update_ops(self):
+    return self._fisher_est.inv_update_ops
+
+  @property
+  def inv_update_op(self):
+    return self._fisher_est.inv_update_op
+
   @property
   def variables(self):
     return self._fisher_est.variables
diff --git a/tensorflow/contrib/kfac/python/ops/utils.py b/tensorflow/contrib/kfac/python/ops/utils.py
index d5461c9f2ea0512ad7c4f2d393ac8e7f441d1b77..e89508fa46b6e2ce278e5373e6c9d17203ad1ef2 100644
--- a/tensorflow/contrib/kfac/python/ops/utils.py
+++ b/tensorflow/contrib/kfac/python/ops/utils.py
@@ -20,16 +20,22 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.contrib.tpu.python.ops import tpu_ops
+from tensorflow.contrib.tpu.python.tpu import tpu_function
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import variables
 
 # Method used for inverting matrices.
 POSDEF_INV_METHOD = "cholesky"
+POSDEF_EIG_METHOD = "self_adjoint"
 
 
 def set_global_constants(posdef_inv_method=None):
@@ -161,33 +167,11 @@ def mat2d_to_layer_params(vector_template, mat2d):
     return array_ops.reshape(mat2d, vector_template.shape)
 
 
-def compute_pi(left_factor, right_factor):
-  """Computes the scalar constant pi for Tikhonov regularization/damping.
-
-  pi = sqrt( (trace(A) / dim(A)) / (trace(B) / dim(B)) )
-  See section 6.3 of https://arxiv.org/pdf/1503.05671.pdf for details.
-
-  Args:
-    left_factor: The left Kronecker factor Tensor.
-    right_factor: The right Kronecker factor Tensor.
-
-  Returns:
-    The computed scalar constant pi for these Kronecker Factors (as a Tensor).
-  """
-  # Instead of dividing by the dim of the norm, we multiply by the dim of the
-  # other norm. This works out the same in the ratio.
-  left_norm = math_ops.trace(left_factor) * right_factor.get_shape().as_list()[
-      0]
-  right_norm = math_ops.trace(right_factor) * left_factor.get_shape().as_list()[
-      0]
-  return math_ops.sqrt(left_norm / right_norm)
-
-
 def posdef_inv(tensor, damping):
   """Computes the inverse of tensor + damping * identity."""
   identity = linalg_ops.eye(tensor.shape.as_list()[0], dtype=tensor.dtype)
   damping = math_ops.cast(damping, dtype=tensor.dtype)
-  return posdef_inv_funcs[POSDEF_INV_METHOD](tensor, identity, damping)
+  return posdef_inv_functions[POSDEF_INV_METHOD](tensor, identity, damping)
 
 
 def posdef_inv_matrix_inverse(tensor, identity, damping):
@@ -209,23 +193,51 @@ def posdef_inv_eig(tensor, identity, damping):
       eigenvectors / eigenvalues, eigenvectors, transpose_b=True)
 
 
-posdef_inv_funcs = {
+posdef_inv_functions = {
     "matrix_inverse": posdef_inv_matrix_inverse,
     "cholesky": posdef_inv_cholesky,
     "eig": posdef_inv_eig,
 }
 
 
+def posdef_eig(mat):
+  """Computes the eigendecomposition of a positive semidefinite matrix."""
+  return posdef_eig_functions[POSDEF_EIG_METHOD](mat)
+
+
+def posdef_eig_svd(mat):
+  """Computes the singular values and left singular vectors of a matrix."""
+  evals, evecs, _ = linalg_ops.svd(mat)
+
+  return evals, evecs
+
+
+def posdef_eig_self_adjoint(mat):
+  """Computes eigendecomposition using self_adjoint_eig."""
+  evals, evecs = linalg_ops.self_adjoint_eig(mat)
+  evals = math_ops.abs(evals)  # Should be equivalent to svd approach.
+
+  return evals, evecs
+
+
+posdef_eig_functions = {
+    "self_adjoint": posdef_eig_self_adjoint,
+    "svd": posdef_eig_svd,
+}
+
+
 class SubGraph(object):
   """Defines a subgraph given by all the dependencies of a given set of outputs.
   """
 
   def __init__(self, outputs):
+    # Set of all ancestor Tensors, Ops to 'outputs'.
     self._members = set()
 
     self._recurse_add(outputs)
 
   def _recurse_add(self, nodes):
+    """Recursively adds all of nodes' ancestors."""
     for node in nodes:
       if node in self._members:
         continue
@@ -241,8 +253,25 @@ class SubGraph(object):
     return node in self._members
 
   def variable_uses(self, var):
-    """Computes number of times a variable is used."""
-    return len(self._members.intersection(set(var.value().consumers())))
+    """Computes number of times a variable is used.
+
+    Args:
+      var: Variable or ResourceVariable instance.
+
+    Returns:
+      Number of times a variable is used within this subgraph.
+
+    Raises:
+      ValueError: If 'var' is not a variable type.
+    """
+    if isinstance(var, resource_variable_ops.ResourceVariable):
+      var = var.handle
+    elif isinstance(var, variables.Variable):
+      var = var.value()
+    else:
+      raise ValueError("%s does not appear to be a variable." % str(var))
+
+    return len(self._members.intersection(set(var.consumers())))
 
   def filter_list(self, node_list):
     """Filters 'node_list' to nodes in this subgraph."""
@@ -287,5 +316,109 @@ def fwd_gradients(ys, xs, grad_xs=None, stop_gradients=None):
 
   return dysdx
 
+
+def on_tpu():
+  """Returns True when building a TPU computation."""
+  return tpu_function.get_tpu_context().number_of_shards is not None
+
+
+def cross_replica_mean(tensor, name=None):
+  """Takes mean value of a Tensor across all TPU cores.
+
+  Args:
+    tensor: Tensor to be synchronized.
+    name: None or string. Name of Op.
+
+  Returns:
+    Average of Tensor across all TPU cores.
+
+  Raises:
+    ValueError: If called outside of TPU context.
+  """
+  with ops.name_scope(name, "cross_replica_mean", [tensor]):
+    num_shards = tpu_function.get_tpu_context().number_of_shards
+    if num_shards is None:
+      raise ValueError(
+          "Cannot take cross_replica_mean() outside of TPU Context.")
+    if num_shards == 1:
+      return tensor
+    return tpu_ops.cross_replica_sum(tensor / num_shards)
+
+
+def ensure_sequence(obj):
+  """If `obj` isn't a tuple or list, return a tuple containing `obj`."""
+  if isinstance(obj, (tuple, list)):
+    return obj
+  else:
+    return (obj,)
+
+
+def batch_execute(global_step, thunks, batch_size, name=None):
+  """Executes a subset of ops per global step.
+
+  Given a list of thunks, each of which produces a single stateful op,
+  ensures that exactly 'batch_size' ops are run per global step. Ops are
+  scheduled in a round-robin fashion. For example, with 3 ops
+
+    global_step | op0 | op1 | op2
+    ------------+-----+-----+-----
+        0       |  x  |  x  |
+    ------------+-----+-----+-----
+        1       |  x  |     |  x
+    ------------+-----+-----+-----
+        2       |     |  x  |  x
+    ------------+-----+-----+-----
+        3       |  x  |  x  |
+    ------------+-----+-----+-----
+        4       |  x  |     |  x
+
+  Does not guarantee order of op execution within a single global step.
+
+  Args:
+    global_step: Tensor indicating time. Determines which ops run.
+    thunks: List of thunks. Each thunk encapsulates one op. Return values are
+      ignored.
+    batch_size: int. Number of ops to execute per global_step.
+    name: string or None. Name scope for newly added ops.
+
+  Returns:
+    List of ops. Exactly 'batch_size' ops are guaranteed to have an effect
+    every global step.
+  """
+
+  def true_fn(thunk):
+    """Ensures thunk is executed and returns an Op (not a Tensor)."""
+
+    def result():
+      with ops.control_dependencies([thunk()]):
+        return control_flow_ops.no_op()
+
+    return result
+
+  def false_fn(_):
+    """Executes a no-op."""
+
+    def result():
+      return control_flow_ops.no_op()
+
+    return result
+
+  with ops.name_scope(name, "batch_execute"):
+    true_fns = [true_fn(thunk) for thunk in thunks]
+    false_fns = [false_fn(thunk) for thunk in thunks]
+    num_thunks = len(thunks)
+    conditions = [
+        math_ops.less(
+            math_ops.mod(batch_size - 1 + global_step * batch_size - j,
+                         num_thunks), batch_size) for j in range(num_thunks)
+    ]
+    result = [
+        control_flow_ops.cond(condition, true_fn, false_fn)
+        for (condition, true_fn,
+             false_fn) in zip(conditions, true_fns, false_fns)
+    ]
+    return result
+
+
 # TODO(b/69623235): Add a function for finding tensors that share gradients
 # to eliminate redundant fisher factor computations.
diff --git a/tensorflow/contrib/kfac/python/ops/utils_lib.py b/tensorflow/contrib/kfac/python/ops/utils_lib.py
index 9df07d69aad5e61f9cfb994c9a63fdec04f025fe..cc48e3c69f24c2abd343e2e120d3589cd323fcdc 100644
--- a/tensorflow/contrib/kfac/python/ops/utils_lib.py
+++ b/tensorflow/contrib/kfac/python/ops/utils_lib.py
@@ -30,7 +30,6 @@ _allowed_symbols = [
     "kronecker_product",
     "layer_params_to_mat2d",
     "mat2d_to_layer_params",
-    "compute_pi",
     "posdef_inv",
     "posdef_inv_matrix_inverse",
     "posdef_inv_cholesky",
@@ -38,6 +37,8 @@ _allowed_symbols = [
     "SubGraph",
     "generate_random_signs",
     "fwd_gradients",
+    "ensure_sequence",
+    "batch_execute",
 ]
 
 remove_undocumented(__name__, allowed_exception_list=_allowed_symbols)
diff --git a/tensorflow/contrib/layers/python/layers/feature_column.py b/tensorflow/contrib/layers/python/layers/feature_column.py
index 226d933d85d91600e36ffb84212703e10455bfbb..b7d34d6435789e54403926a342481971e854b449 100644
--- a/tensorflow/contrib/layers/python/layers/feature_column.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column.py
@@ -156,6 +156,10 @@ from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import deprecation
 
 
+# Imports the core `InputLayer` symbol in contrib during development.
+InputLayer = fc_core.InputLayer  # pylint: disable=invalid-name
+
+
 class _LinearEmbeddingLookupArguments(
     collections.namedtuple("_LinearEmbeddingLookupArguments",
                            ["input_tensor",
@@ -521,7 +525,7 @@ def sparse_column_with_integerized_feature(column_name,
 
   Args:
     column_name: A string defining sparse column name.
-    bucket_size: An int that is > 1. The number of buckets. It should be bigger
+    bucket_size: An int that is >= 1. The number of buckets. It should be bigger
       than maximum feature. In other words features in this column should be an
       int64 in range [0, bucket_size)
     combiner: A string specifying how to reduce if the sparse column is
@@ -539,7 +543,7 @@ def sparse_column_with_integerized_feature(column_name,
     An integerized _SparseColumn definition.
 
   Raises:
-    ValueError: bucket_size is not greater than 1.
+    ValueError: bucket_size is less than 1.
     ValueError: dtype is not integer.
   """
   return _SparseColumnIntegerized(
@@ -748,6 +752,10 @@ class _WeightedSparseColumn(
         {self.weight_column_name: parsing_ops.VarLenFeature(self.dtype)})
     return config
 
+  @property
+  def lookup_config(self):
+    return self.sparse_id_column.lookup_config
+
   @property
   def key(self):
     """Returns a string which will be used as a key when we do sorting."""
diff --git a/tensorflow/contrib/layers/python/layers/feature_column_ops.py b/tensorflow/contrib/layers/python/layers/feature_column_ops.py
index fa0047f05d893f6543ddb1680824a32469e13293..78affea44cbfb92523063968dbc1be98841854db 100644
--- a/tensorflow/contrib/layers/python/layers/feature_column_ops.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column_ops.py
@@ -97,10 +97,13 @@ def _input_from_feature_columns(columns_to_tensors,
                                 trainable,
                                 scope,
                                 output_rank,
-                                default_name):
+                                default_name,
+                                cols_to_outs=None):
   """Implementation of `input_from(_sequence)_feature_columns`."""
   columns_to_tensors = columns_to_tensors.copy()
   check_feature_columns(feature_columns)
+  if cols_to_outs is not None and not isinstance(cols_to_outs, dict):
+    raise ValueError('cols_to_outs must be a dict unless None')
   with variable_scope.variable_scope(scope,
                                      default_name=default_name,
                                      values=columns_to_tensors.values()):
@@ -144,6 +147,8 @@ def _input_from_feature_columns(columns_to_tensors,
           except ValueError as e:
             raise ValueError('Error creating input layer for column: {}.\n'
                              '{}, {}'.format(column.name, e, ee))
+        if cols_to_outs is not None:
+          cols_to_outs[column] = output_tensors[-1]
     return array_ops.concat(output_tensors, output_rank - 1)
 
 
@@ -151,7 +156,8 @@ def input_from_feature_columns(columns_to_tensors,
                                feature_columns,
                                weight_collections=None,
                                trainable=True,
-                               scope=None):
+                               scope=None,
+                               cols_to_outs=None):
   """A tf.contrib.layers style input layer builder based on FeatureColumns.
 
   Generally a single example in training data is described with feature columns.
@@ -196,6 +202,8 @@ def input_from_feature_columns(columns_to_tensors,
     trainable: If `True` also add variables to the graph collection
       `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
     scope: Optional scope for variable_scope.
+    cols_to_outs: Optional dict from feature column to output tensor,
+      which is concatenated into the returned tensor.
 
   Returns:
     A Tensor which can be consumed by hidden layers in the neural network.
@@ -209,7 +217,8 @@ def input_from_feature_columns(columns_to_tensors,
                                      trainable,
                                      scope,
                                      output_rank=2,
-                                     default_name='input_from_feature_columns')
+                                     default_name='input_from_feature_columns',
+                                     cols_to_outs=cols_to_outs)
 
 
 @experimental
diff --git a/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py b/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py
index fbfa0e32de55edab3c90189ddfe05ab826ac9167..e6bbd86ab722c4e853a59f816bed8a8ac1fe9ede 100644
--- a/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py
@@ -607,6 +607,31 @@ class CreateInputLayersForDNNsTest(test.TestCase):
       # Verify cross compatibility: Core builder output should equal to contrib.
       self.assertAllEqual(output.eval().shape, output_core.eval().shape)
 
+  def testAllDNNColumnsWithColumnwiseOutputs(self):
+    sparse_column = feature_column.sparse_column_with_keys(
+        "ids", ["a", "b", "c", "unseen"])
+    real_valued_column = feature_column.real_valued_column("income", 2)
+    one_hot_column = feature_column.one_hot_column(sparse_column)
+    embedding_column = feature_column.embedding_column(sparse_column, 10)
+    features = {
+        "ids":
+            sparse_tensor.SparseTensor(
+                values=["c", "b", "a"],
+                indices=[[0, 0], [1, 0], [2, 0]],
+                dense_shape=[3, 1]),
+        "income":
+            constant_op.constant([[20.3, 10], [110.3, 0.4], [-3.0, 30.4]]),
+    }
+    columns = [one_hot_column, embedding_column, real_valued_column]
+    cols_to_outs = {}
+    feature_column_ops.input_from_feature_columns(
+        features, columns, cols_to_outs=cols_to_outs)
+    with self.test_session():
+      variables_lib.global_variables_initializer().run()
+      lookup_ops.tables_initializer().run()
+      for column in columns:
+        self.assertTrue(column in cols_to_outs)
+
   def testRealValuedColumn(self):
     real_valued = feature_column.real_valued_column("price")
     features = {"price": constant_op.constant([[20.], [110], [-3]])}
diff --git a/tensorflow/contrib/layers/python/layers/feature_column_test.py b/tensorflow/contrib/layers/python/layers/feature_column_test.py
index 5ae885b7202357326bd8494d382adb57fa636d20..2eaea231776bd2f5fb8bb4bd422074beacd61720 100644
--- a/tensorflow/contrib/layers/python/layers/feature_column_test.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column_test.py
@@ -102,6 +102,16 @@ class FeatureColumnTest(test.TestCase):
     weighted_ids = fc.weighted_sparse_column(ids, "weights")
     self.assertEqual(weighted_ids.name, "ids_weighted_by_weights")
 
+  def testWeightedSparseColumnWithVocabularyFile(self):
+    ids = fc.sparse_column_with_vocabulary_file(
+        "ids", "a_file", num_oov_buckets=7, vocab_size=3)
+    weighted_ids = fc.weighted_sparse_column(ids, "weights")
+    self.assertEqual(weighted_ids.name, "ids_weighted_by_weights")
+    self.assertEqual(weighted_ids.lookup_config, ids.lookup_config)
+    self.assertEqual(weighted_ids.lookup_config.vocab_size, 3)
+    self.assertEqual(weighted_ids.lookup_config.num_oov_buckets, 7)
+    self.assertEqual(weighted_ids.lookup_config.vocabulary_file, "a_file")
+
   def testWeightedSparseColumnDeepCopy(self):
     ids = fc.sparse_column_with_keys("ids", ["marlo", "omar", "stringer"])
     weighted = fc.weighted_sparse_column(ids, "weights")
diff --git a/tensorflow/contrib/layers/python/layers/initializers.py b/tensorflow/contrib/layers/python/layers/initializers.py
index b12a882d9ae88f7cf4f920cfa5872e5de1c67290..51610f21b24f1d40f26630cc1e69ca723d130639 100644
--- a/tensorflow/contrib/layers/python/layers/initializers.py
+++ b/tensorflow/contrib/layers/python/layers/initializers.py
@@ -79,7 +79,8 @@ def variance_scaling_initializer(factor=2.0, mode='FAN_IN', uniform=False,
   ```
 
   * To get [Delving Deep into Rectifiers](
-     http://arxiv.org/pdf/1502.01852v1.pdf), use (Default):<br/>
+     http://arxiv.org/pdf/1502.01852v1.pdf) (also know as the "MSRA 
+     initialization"), use (Default):<br/>
     `factor=2.0 mode='FAN_IN' uniform=False`
   * To get [Convolutional Architecture for Fast Feature Embedding](
      http://arxiv.org/abs/1408.5093), use:<br/>
diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py
index 6cd586a5f016c76cc52b340bfd0d32fa08f23748..f3229a1605c72c61d0d1cc638a9a21048ac60cbe 100644
--- a/tensorflow/contrib/layers/python/layers/layers.py
+++ b/tensorflow/contrib/layers/python/layers/layers.py
@@ -1896,7 +1896,7 @@ class GDN(base.Layer):
     outputs.set_shape(inputs.get_shape())
     return outputs
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     channel_axis = self._channel_axis()
     input_shape = tensor_shape.TensorShape(input_shape)
     if not 3 <= input_shape.ndim <= 5:
@@ -2561,7 +2561,10 @@ def separable_convolution2d(
           regularizer=weights_regularizer,
           trainable=trainable,
           collections=weights_collections)
-      strides = [1, 1, stride_h, stride_w] if data_format.startswith('NC') else [1, stride_h, stride_w, 1]
+      strides = [1, 1, stride_h,
+                 stride_w] if data_format.startswith('NC') else [
+                     1, stride_h, stride_w, 1
+                 ]
 
       outputs = nn.depthwise_conv2d(inputs, depthwise_weights, strides, padding,
                                     rate=utils.two_element_tuple(rate),
@@ -2651,7 +2654,7 @@ def spatial_softmax(features,
     ValueError: If unexpected data_format specified.
     ValueError: If num_channels dimension is unspecified.
   """
-  with variable_scope.variable_scope(name, 'spatial_softmax'):  
+  with variable_scope.variable_scope(name, 'spatial_softmax'):
     shape = array_ops.shape(features)
     static_shape = features.shape
     if data_format == DATA_FORMAT_NHWC:
@@ -2663,30 +2666,39 @@ def spatial_softmax(features,
     if num_channels.value is None:
       raise ValueError('The num_channels dimension of the inputs to '
                        '`spatial_softmax` should be defined. Found `None`.')
-  
-    with ops.name_scope('spatial_softmax_op', 'spatial_softmax_op', [features]):  
+
+    with ops.name_scope('spatial_softmax_op', 'spatial_softmax_op', [features]):
       # Create tensors for x and y coordinate values, scaled to range [-1, 1].
       pos_x, pos_y = array_ops.meshgrid(math_ops.lin_space(-1., 1., num=height),
                                         math_ops.lin_space(-1., 1., num=width),
                                         indexing='ij')
       pos_x = array_ops.reshape(pos_x, [height * width])
       pos_y = array_ops.reshape(pos_y, [height * width])
+      
       if temperature is None:
-        temperature_collections = utils.get_variable_collections(
-            variables_collections, 'temperature')
-        temperature = variables.model_variable(
-            'temperature',
-            shape=(),
-            dtype=dtypes.float32,
-            initializer=init_ops.ones_initializer(),
-            collections=temperature_collections,
-            trainable=trainable)
+        temp_initializer = init_ops.ones_initializer()
+      else:
+        temp_initializer = init_ops.constant_initializer(temperature)
+          
+      if not trainable:
+        temp_collections = None
+      else:
+        temp_collections = utils.get_variable_collections(
+              variables_collections, 'temperature')
+      
+      temperature = variables.model_variable(
+          'temperature',
+          shape=(),
+          dtype=dtypes.float32,
+          initializer=temp_initializer,
+          collections=temp_collections,
+          trainable=trainable)
       if data_format == 'NCHW':
         features = array_ops.reshape(features, [-1, height * width])
       else:
         features = array_ops.reshape(
             array_ops.transpose(features, [0, 3, 1, 2]), [-1, height * width])
-  
+
       softmax_attention = nn.softmax(features/temperature)
       expected_x = math_ops.reduce_sum(
           pos_x * softmax_attention, [1], keep_dims=True)
@@ -2699,8 +2711,6 @@ def spatial_softmax(features,
   return feature_keypoints
 
 
-
-
 def stack(inputs, layer, stack_args, **kwargs):
   """Builds a stack of layers by applying layer repeatedly using stack_args.
 
diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py
index a05e464a26d8167707ce6d6455aca50b0416aa1f..a9bdbe01387653bada1f1e5e9948db7a737eb600 100644
--- a/tensorflow/contrib/layers/python/layers/layers_test.py
+++ b/tensorflow/contrib/layers/python/layers/layers_test.py
@@ -1747,6 +1747,12 @@ class BatchNormTest(test.TestCase):
     expected_var *= correction_factor
     return expected_var, correction_factor
 
+  def testBatchNormCenterFalse(self):
+    a = array_ops.placeholder(dtype=dtypes.float32, shape=(10, 10, 10, 10))
+    # Test that center=False builds a valid graph.
+    _layers.batch_norm(a, center=False, data_format='NCHW',
+                       zero_debias_moving_mean=True)
+
   def testUnknownShape(self):
     with ops.Graph().as_default() as g, self.test_session(g):
       inputs = array_ops.placeholder(dtype=dtypes.float32)
@@ -3231,7 +3237,11 @@ class SeparableConv2dTest(test.TestCase):
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       regularizer = regularizers.l2_regularizer(0.01)
       layers_lib.separable_conv2d(
-          images, 32, [3, 3], 2, weights_regularizer=regularizer)
+          images,
+          32, [3, 3],
+          2,
+          weights_regularizer=regularizer,
+          weights_initializer=init_ops.ones_initializer())
       self.assertEqual(
           len(ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)), 2)
       weight_decay = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)[0]
@@ -3239,12 +3249,31 @@ class SeparableConv2dTest(test.TestCase):
           weight_decay.op.name,
           'SeparableConv2d/depthwise_kernel/Regularizer/l2_regularizer')
       sess.run(variables_lib.global_variables_initializer())
-      self.assertLessEqual(sess.run(weight_decay), 0.05)
+      depth_weight_one = sess.run(weight_decay)
       weight_decay = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)[1]
       self.assertEqual(
           weight_decay.op.name,
           'SeparableConv2d/pointwise_kernel/Regularizer/l2_regularizer')
-      self.assertLessEqual(sess.run(weight_decay), 0.05)
+      pointwise_weight_one = sess.run(weight_decay)
+
+      regularizer = regularizers.l2_regularizer(1.0)
+      layers_lib.separable_conv2d(
+          images,
+          32, [3, 3],
+          2,
+          weights_regularizer=regularizer,
+          weights_initializer=init_ops.ones_initializer())
+      self.assertEqual(
+          len(ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)), 4)
+      weight_decay = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)[2]
+      sess.run(variables_lib.global_variables_initializer())
+      depth_weight_two = sess.run(weight_decay)
+      weight_decay = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)[3]
+      pointwise_weight_two = sess.run(weight_decay)
+
+      self.assertAllClose(
+          [100.0 * depth_weight_one, 100.0 * pointwise_weight_one],
+          [depth_weight_two, pointwise_weight_two])
 
   def testReuseConvWithWeightDecay(self):
     height, width = 3, 3
@@ -3332,11 +3361,18 @@ class SeparableConv2dTest(test.TestCase):
         batch, height, width = 4, 10, 12
         kernel_dim, stride = 3, 2
         images = random_ops.random_uniform((batch, 3, height, width), seed=1)
-        output = layers_lib.separable_conv2d(images, num_outputs=num_filters, kernel_size=[kernel_dim, kernel_dim],
-                                             depth_multiplier=2, stride=stride, padding='VALID', data_format='NCHW')
-        self.assertListEqual(
-            output.get_shape().as_list(), [batch, correct_output_filters,
-                                           (height - kernel_dim + 1) // stride, (width - kernel_dim + 1) // stride])
+        output = layers_lib.separable_conv2d(
+            images,
+            num_outputs=num_filters,
+            kernel_size=[kernel_dim, kernel_dim],
+            depth_multiplier=2,
+            stride=stride,
+            padding='VALID',
+            data_format='NCHW')
+        self.assertListEqual(output.get_shape().as_list(), [
+            batch, correct_output_filters, (height - kernel_dim + 1) // stride,
+            (width - kernel_dim + 1) // stride
+        ])
 
 
 class ScaleGradientTests(test.TestCase):
diff --git a/tensorflow/contrib/layers/python/layers/rev_block_lib.py b/tensorflow/contrib/layers/python/layers/rev_block_lib.py
index 31a1b38bd4832c5816136cab3297aa22e843b0f3..123275e1fde047cd3772528641b2e3b09742fbdc 100644
--- a/tensorflow/contrib/layers/python/layers/rev_block_lib.py
+++ b/tensorflow/contrib/layers/python/layers/rev_block_lib.py
@@ -34,12 +34,13 @@ from six.moves import xrange  # pylint: disable=redefined-builtin
 from tensorflow.contrib.framework.python import ops as contrib_framework_ops
 from tensorflow.python.framework import function
 from tensorflow.python.framework import ops as framework_ops
+from tensorflow.python.layers import base
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import template
 from tensorflow.python.ops import variable_scope
+from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import nest
 
 __all__ = ["rev_block", "RevBlock", "recompute_grad"]
@@ -137,7 +138,17 @@ def _rev_block_forward(x1,
   return y1, y2
 
 
-class RevBlock(object):
+def _scope_wrap(fn, scope):
+
+  @functools.wraps(fn)
+  def wrap(*args, **kwargs):
+    with variable_scope.variable_scope(scope):
+      return fn(*args, **kwargs)
+
+  return wrap
+
+
+class RevBlock(base.Layer):
   """Block of reversible layers. See rev_block."""
 
   def __init__(self,
@@ -146,7 +157,10 @@ class RevBlock(object):
                num_layers=1,
                f_side_input=None,
                g_side_input=None,
-               use_efficient_backprop=True):
+               use_efficient_backprop=True,
+               name="revblock",
+               **kwargs):
+    super(RevBlock, self).__init__(name=name, **kwargs)
 
     if isinstance(f, list):
       assert len(f) == num_layers
@@ -158,18 +172,8 @@ class RevBlock(object):
     else:
       g = [g] * num_layers
 
-    scope_prefix = "revblock/revlayer_%d/"
-    f_scope = scope_prefix + "f"
-    g_scope = scope_prefix + "g"
-
-    f = [
-        template.make_template(f_scope % i, fn, create_scope_now_=True)
-        for i, fn in enumerate(f)
-    ]
-    g = [
-        template.make_template(g_scope % i, fn, create_scope_now_=True)
-        for i, fn in enumerate(g)
-    ]
+    f = [_scope_wrap(fn, "revlayer_%d/f" % i) for i, fn in enumerate(f)]
+    g = [_scope_wrap(fn, "revlayer_%d/g" % i) for i, fn in enumerate(g)]
 
     self.f = f
     self.g = g
@@ -180,6 +184,39 @@ class RevBlock(object):
 
     self._use_efficient_backprop = use_efficient_backprop
 
+  def call(self, inputs, forward=True):
+    vs = variable_scope.get_variable_scope()
+    vars_before = vs.global_variables()
+
+    if forward:
+      x1, x2 = inputs
+      out = self._forward(x1, x2)
+    else:
+      y1, y2 = inputs
+      out = self._backward(y1, y2)
+
+    # Add any created variables to the Layer's variable stores
+    new_vars = vs.global_variables()[len(vars_before):]
+    train_vars = vs.trainable_variables()
+    for new_var in new_vars:
+      if new_var in train_vars:
+        self._trainable_weights.append(new_var)
+      else:
+        self._non_trainable_weights.append(new_var)
+
+    return out
+
+  def forward(self, x1, x2):
+    return self.apply([x1, x2])
+
+  def backward(self, y1, y2):
+    return self.apply([y1, y2], forward=False)
+
+  def build(self, _):
+    logging.warn("RevBlock constructs its variables on first call, not on "
+                 "build.")
+    self.built = True
+
   def _efficient_grad_fn(self, inputs, variables, ys, grad_ys):
     """Custom gradient fn for a block of reversible residual layers."""
     side_inputs = inputs[2:]
@@ -228,17 +265,18 @@ class RevBlock(object):
     f.reverse()
     g.reverse()
 
-    for i in xrange(self.num_layers):
-      ys, grad_ys, f_ret, g_ret = _rev_layer_backward(
-          ys, grad_ys, f[i], g[i], f_vars[i], self.f_side_input, g_vars[i],
-          self.g_side_input)
+    with variable_scope.variable_scope(self.scope_name, reuse=True):
+      for i in xrange(self.num_layers):
+        ys, grad_ys, f_ret, g_ret = _rev_layer_backward(
+            ys, grad_ys, f[i], g[i], f_vars[i], self.f_side_input, g_vars[i],
+            self.g_side_input)
 
-      grad_f_vars, grad_f_side = f_ret
-      grad_g_vars, grad_g_side = g_ret
-      f_var_grads.append(grad_f_vars)
-      g_var_grads.append(grad_g_vars)
-      f_side_grads.append(grad_f_side)
-      g_side_grads.append(grad_g_side)
+        grad_f_vars, grad_f_side = f_ret
+        grad_g_vars, grad_g_side = g_ret
+        f_var_grads.append(grad_f_vars)
+        g_var_grads.append(grad_g_vars)
+        f_side_grads.append(grad_f_side)
+        g_side_grads.append(grad_g_side)
 
     # Accumulate layer gradients for f_side_input and g_side_input
     acc_f_side_grads = _acc_grads(*f_side_grads)
@@ -265,7 +303,7 @@ class RevBlock(object):
     grad_x1, grad_x2 = grad_ys
     return [grad_x1, grad_x2] + side_input_grads, variable_grads
 
-  def forward(self, x1, x2):
+  def _forward(self, x1, x2):
     """Run forward through the reversible layers."""
 
     side_inputs = [self.f_side_input, self.g_side_input]
@@ -275,7 +313,7 @@ class RevBlock(object):
         self._efficient_grad_fn if self._use_efficient_backprop else None)
 
     @_fn_with_custom_grad(custom_grad_fn)
-    def _forward(x1_, x2_, *flat_side_inputs):
+    def _forward_wrap(x1_, x2_, *flat_side_inputs):
       f_side, g_side = nest.pack_sequence_as(side_inputs, flat_side_inputs)
       return _rev_block_forward(
           x1_,
@@ -287,9 +325,9 @@ class RevBlock(object):
           g_side_input=g_side,
           gate_outputs=self._use_efficient_backprop)
 
-    return _forward(x1, x2, *flat_side_inputs)
+    return _forward_wrap(x1, x2, *flat_side_inputs)
 
-  def backward(self, y1, y2):
+  def _backward(self, y1, y2):
     """Run backward through the reversible layers."""
 
     f = list(self.f)
@@ -356,7 +394,14 @@ def rev_block(x1,
   Returns:
     y1, y2: tuple of float Tensors.
   """
-  block = RevBlock(f, g, num_layers, f_side_input, g_side_input, is_training)
+  block = RevBlock(
+      f=f,
+      g=g,
+      num_layers=num_layers,
+      f_side_input=f_side_input,
+      g_side_input=g_side_input,
+      use_efficient_backprop=is_training,
+      _reuse=variable_scope.get_variable_scope().reuse)
   return block.forward(x1, x2)
 
 
diff --git a/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py b/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py
index a420753fd5728e7eef4f135d4943d25e8e05d5c2..cbcbcd75114a522b95631e4e7e95c1641b0a9987 100644
--- a/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py
+++ b/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py
@@ -188,13 +188,46 @@ class RevBlockTest(test.TestCase):
 
     def f(x):
       x = convolutional.conv1d(x, self.CHANNELS // 2, 3, padding="same")
-      x = core_layers.batch_normalization(x, training=True)
+      x = layers.batch_norm(x, is_training=True)
       x = convolutional.conv1d(x, self.CHANNELS // 2, 3, padding="same")
-      x = core_layers.batch_normalization(x, training=True)
+      x = layers.batch_norm(x, is_training=True)
       return x
 
     self._testRevBlock(x=x, f=f)
 
+  def testReuse(self):
+
+    def f(x):
+      return core_layers.dense(x, self.CHANNELS // 2)
+
+    def g(x):
+      return core_layers.dense(x, self.CHANNELS // 2)
+
+    x = random_ops.random_uniform(
+        [self.BATCH_SIZE, self.CHANNELS], dtype=dtypes.float32)
+    x1, x2 = array_ops.split(x, 2, axis=-1)
+
+    with variable_scope.variable_scope("test"):
+      y1, y2 = rev_block_lib.rev_block(x1, x2, f, g, num_layers=self.NUM_LAYERS)
+
+    num_vars_before = len(variables.global_variables())
+
+    with variable_scope.variable_scope("test", reuse=True):
+      y1, y2 = rev_block_lib.rev_block(x1, x2, f, g, num_layers=self.NUM_LAYERS)
+
+    num_vars_after = len(variables.global_variables())
+    self.assertEqual(num_vars_before, num_vars_after)
+
+    loss = math_ops.reduce_mean(y1 + y2)
+    _ = gradients_impl.gradients(loss,
+                                 [x] + variables.trainable_variables())
+
+    with variable_scope.variable_scope("test", reuse=True):
+      y1, y2 = rev_block_lib.rev_block(x1, x2, f, g, num_layers=self.NUM_LAYERS)
+
+    num_vars_after = len(variables.global_variables())
+    self.assertEqual(num_vars_before, num_vars_after)
+
 
 class RecomputeTest(test.TestCase):
 
diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD
index 94920db574e07529c28313a78e0128676fcc7970..ee3611ca9385e80d30e42f8405c8ac318e66771b 100644
--- a/tensorflow/contrib/learn/BUILD
+++ b/tensorflow/contrib/learn/BUILD
@@ -10,7 +10,7 @@ package(default_visibility = [
     "//tensorflow:internal",
 ])
 
-load("//tensorflow:tensorflow.bzl", "py_test")
+load("//tensorflow:tensorflow.bzl", "py_test", "tf_py_test")
 
 py_library(
     name = "learn",
@@ -154,12 +154,11 @@ py_test(
     ],
 )
 
-py_test(
+tf_py_test(
     name = "experiment_test",
     size = "medium",
     srcs = ["python/learn/experiment_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
+    additional_deps = [
         ":learn",
         "//tensorflow/contrib/layers:layers_py",
         "//tensorflow/core:protos_all_py",
@@ -173,6 +172,17 @@ py_test(
     ],
 )
 
+py_test(
+    name = "export_strategy_test",
+    size = "small",
+    srcs = ["python/learn/export_strategy_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":learn",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
 py_test(
     name = "graph_actions_test",
     size = "small",
@@ -346,6 +356,7 @@ py_test(
     srcs = ["python/learn/estimators/dnn_linear_combined_test.py"],
     shard_count = 4,
     srcs_version = "PY2AND3",
+    tags = ["no_oss"],  # flaky b/70524820
     deps = [
         ":learn",
         "//tensorflow/contrib/layers:layers_py",
@@ -461,6 +472,7 @@ py_test(
     size = "medium",
     srcs = ["python/learn/estimators/state_saving_rnn_estimator_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["noasan"],
     deps = [
         ":learn",
         "//tensorflow/contrib/layers:layers_py",
@@ -715,12 +727,11 @@ py_test(
     ],
 )
 
-py_test(
+tf_py_test(
     name = "graph_io_test",
     size = "small",
     srcs = ["python/learn/learn_io/graph_io_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
+    additional_deps = [
         ":learn",
         "//tensorflow/python:client",
         "//tensorflow/python:client_testlib",
@@ -736,20 +747,7 @@ py_test(
         "//tensorflow/python:training",
         "//tensorflow/python:variables",
     ],
-)
-
-py_test(
-    name = "numpy_io_test",
-    size = "small",
-    srcs = ["python/learn/learn_io/numpy_io_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":learn",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:training",
-        "//third_party/py/numpy",
-    ],
+    grpc_enabled = True,
 )
 
 py_test(
diff --git a/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py b/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py
index 14750961efa30128708430fac038498de0a42118..ef5e620e8f08cffa7c2b945089aa5d150baefefc 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py
@@ -18,7 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.framework.python.ops import variables as contrib_variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import feature_column
 from tensorflow.contrib.learn.python.learn.datasets import base
 from tensorflow.contrib.learn.python.learn.estimators import composable_model
@@ -55,7 +55,7 @@ def _base_model_fn(features, labels, mode, params):
     raise NotImplementedError
 
   def _train_op_fn(loss):
-    global_step = contrib_variables.get_global_step()
+    global_step = training_util.get_global_step()
     assert global_step
     train_step = model.get_train_step(loss)
 
diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn.py b/tensorflow/contrib/learn/python/learn/estimators/dnn.py
index cb15ef23e95d27c737d8ae08065b804bafd39a07..c17b41c0f767e19d9c3635a8f60347a49b297cfb 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn.py
@@ -23,7 +23,7 @@ import six
 from tensorflow.contrib import layers
 from tensorflow.contrib.framework import deprecated
 from tensorflow.contrib.framework import deprecated_arg_values
-from tensorflow.contrib.framework.python.ops import variables as contrib_variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import feature_column
 from tensorflow.contrib.layers.python.layers import optimizers
 from tensorflow.contrib.learn.python.learn import metric_spec
@@ -189,7 +189,7 @@ def _dnn_model_fn(features, labels, mode, params, config=None):
       """Returns the op to optimize the loss."""
       return optimizers.optimize_loss(
           loss=loss,
-          global_step=contrib_variables.get_global_step(),
+          global_step=training_util.get_global_step(),
           learning_rate=_LEARNING_RATE,
           optimizer=_get_optimizer(optimizer),
           gradient_multipliers=(
diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_test.py b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_test.py
index 57e70e169ca9d6fb2adc4e50bf387cc7cf330aed..4e65c180d8bee9ab8fe9b1fbf32edc229c31af09 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_test.py
@@ -1046,11 +1046,14 @@ class DNNLinearCombinedClassifierTest(test.TestCase):
 
     if global_step == 100:
       # Expected is 100, but because of the global step increment bug, is 50.
-      self.assertEqual(50, step_counter.steps)
+      # Occasionally, step increments one more time due to a race condition,
+      # reaching 51 steps.
+      self.assertIn(step_counter.steps, [50, 51])
     else:
-      # Occasionally, training stops when global_step == 101, due to a race
-      # condition.
-      self.assertEqual(51, step_counter.steps)
+      # Occasionally, training stops when global_step == 102, due to a race
+      # condition. In addition, occasionally step increments one more time due
+      # to a race condition reaching 52 steps.
+      self.assertIn(step_counter.steps, [51, 52])
 
   def testGlobalStepDNNLinearCombinedBugFixed(self):
     """Tests global step update for dnn-linear combined model."""
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
index 788d2d0b1a58fad16712c968593b40de0d3979f0..50c74add86fcf62c738e81426bfaf842fbac2b4e 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
@@ -30,7 +30,6 @@ import six
 
 from google.protobuf import message
 from tensorflow.contrib import layers
-from tensorflow.contrib import metrics as metrics_lib
 from tensorflow.contrib.framework import deprecated
 from tensorflow.contrib.framework import deprecated_args
 from tensorflow.contrib.framework import list_variables
@@ -60,6 +59,7 @@ from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import lookup_ops
+from tensorflow.python.ops import metrics as metrics_lib
 from tensorflow.python.ops import resources
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import gfile
@@ -360,10 +360,23 @@ def _write_dict_to_summary(output_dir, dictionary, current_global_step):
         logging.warn('Skipping summary for %s, cannot parse string to Summary.',
                      key)
         continue
+    elif isinstance(dictionary[key], np.ndarray):
+      value = summary_proto.value.add()
+      value.tag = key
+      value.node_name = key
+      tensor_proto = tensor_util.make_tensor_proto(dictionary[key])
+      value.tensor.CopyFrom(tensor_proto)
+      logging.info(
+          'Summary for np.ndarray is not visible in Tensorboard by default. '
+          'Consider using a Tensorboard plugin for visualization (see '
+          'https://github.com/tensorflow/tensorboard-plugin-example/blob/master/README.md '  # pylint:disable=line-too-long
+          'for more information).'
+      )
     else:
       logging.warn(
           'Skipping summary for %s, must be a float, np.float32, np.int64, '
-          'np.int32 or int or a serialized string of Summary.', key)
+          'np.int32 or int or np.ndarray or a serialized string of Summary.',
+          key)
   summary_writer.add_summary(summary_proto, current_global_step)
   summary_writer.flush()
 
@@ -1230,7 +1243,7 @@ class Estimator(BaseEstimator):
 
     if metric_key.MetricKey.LOSS not in model_fn_ops.eval_metric_ops:
       model_fn_ops.eval_metric_ops[metric_key.MetricKey.LOSS] = (
-          metrics_lib.streaming_mean(model_fn_ops.loss))
+          metrics_lib.mean(model_fn_ops.loss))
     return model_fn_ops
 
   def _get_predict_ops(self, features):
@@ -1256,7 +1269,9 @@ class Estimator(BaseEstimator):
       assets_extra=None,
       as_text=False,
       checkpoint_path=None,
-      graph_rewrite_specs=(GraphRewriteSpec((tag_constants.SERVING,), ()),)):
+      graph_rewrite_specs=(GraphRewriteSpec((tag_constants.SERVING,), ()),),
+      strip_default_attrs=False):
+    # pylint: disable=line-too-long
     """Exports inference graph as a SavedModel into given dir.
 
     Args:
@@ -1280,6 +1295,9 @@ class Estimator(BaseEstimator):
         produce a separate MetaGraphDef within the exported SavedModel, tagged
         and rewritten as specified.  Defaults to a single entry using the
         default serving tag ("serve") and no rewriting.
+      strip_default_attrs: Boolean. If `True`, default-valued attributes will be
+        removed from the NodeDefs. For a detailed guide, see
+        [Stripping Default-Valued Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes).
 
     Returns:
       The string path to the exported directory.
@@ -1287,6 +1305,7 @@ class Estimator(BaseEstimator):
     Raises:
       ValueError: if an unrecognized export_type is requested.
     """
+    # pylint: enable=line-too-long
     if serving_input_fn is None:
       raise ValueError('serving_input_fn must be defined.')
 
@@ -1366,7 +1385,8 @@ class Estimator(BaseEstimator):
             signature_def_map=signature_def_map,
             assets_collection=ops.get_collection(
                 ops.GraphKeys.ASSET_FILEPATHS),
-            legacy_init_op=init_op)
+            legacy_init_op=init_op,
+            strip_default_attrs=strip_default_attrs)
 
     # pylint: disable=protected-access
     base_meta_graph_def = builder._saved_model.meta_graphs[0]
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator_input_test.py b/tensorflow/contrib/learn/python/learn/estimators/estimator_input_test.py
index 248c6c733ffca351c848ba07110ba89928634a23..9d7c1a099aa4be64ca0296fa5b870597dabec7b4 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator_input_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator_input_test.py
@@ -23,7 +23,7 @@ import tempfile
 
 import numpy as np
 
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import optimizers
 from tensorflow.contrib.learn.python.learn import metric_spec
 from tensorflow.contrib.learn.python.learn import models
@@ -114,7 +114,7 @@ def linear_model_params_fn(features, labels, mode, params):
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
       loss,
-      variables.get_global_step(),
+      training_util.get_global_step(),
       optimizer='Adagrad',
       learning_rate=params['learning_rate'])
   return prediction, loss, train_op
@@ -129,7 +129,7 @@ def linear_model_fn(features, labels, mode):
     (_, features), = features.items()
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return prediction, loss, train_op
 
 
@@ -139,7 +139,7 @@ def linear_model_fn_with_model_fn_ops(features, labels, mode):
                   model_fn.ModeKeys.INFER)
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return model_fn.ModelFnOps(
       mode=mode, predictions=prediction, loss=loss, train_op=train_op)
 
@@ -150,7 +150,7 @@ def logistic_model_no_mode_fn(features, labels):
   labels = array_ops.one_hot(labels, 3, 1, 0)
   prediction, loss = (models.logistic_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return {
       'class': math_ops.argmax(prediction, 1),
       'prob': prediction
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
index be2b0cb3ca959323b4de095ca072278f028be301..5f682838b7afadec7a54df782cb5b89ac6746659 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
@@ -32,7 +32,7 @@ from google.protobuf import text_format
 
 from tensorflow.contrib import learn
 from tensorflow.contrib import lookup
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import feature_column as feature_column_lib
 from tensorflow.contrib.layers.python.layers import optimizers
 from tensorflow.contrib.learn.python.learn import experiment
@@ -132,7 +132,7 @@ def linear_model_params_fn(features, labels, mode, params):
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
       loss,
-      variables.get_global_step(),
+      training_util.get_global_step(),
       optimizer='Adagrad',
       learning_rate=params['learning_rate'])
   return prediction, loss, train_op
@@ -147,7 +147,7 @@ def linear_model_fn(features, labels, mode):
     (_, features), = features.items()
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return prediction, loss, train_op
 
 
@@ -157,7 +157,7 @@ def linear_model_fn_with_model_fn_ops(features, labels, mode):
                   model_fn.ModeKeys.INFER)
   prediction, loss = (models.linear_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return model_fn.ModelFnOps(
       mode=mode, predictions=prediction, loss=loss, train_op=train_op)
 
@@ -168,7 +168,7 @@ def logistic_model_no_mode_fn(features, labels):
   labels = array_ops.one_hot(labels, 3, 1, 0)
   prediction, loss = (models.logistic_regression_zero_init(features, labels))
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return {
       'class': math_ops.argmax(prediction, 1),
       'prob': prediction
@@ -241,7 +241,7 @@ def _build_estimator_for_resource_export_test():
     const = constant_op.constant(-1, dtype=dtypes.int64)
     table = lookup.MutableHashTable(
         dtypes.string, dtypes.int64, const, name='LookupTableModel')
-    update_global_step = variables.get_global_step().assign_add(1)
+    update_global_step = training_util.get_global_step().assign_add(1)
     if mode in (model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL):
       key = constant_op.constant(['key'])
       value = constant_op.constant([42], dtype=dtypes.int64)
@@ -306,7 +306,7 @@ def _model_fn_ops(
         mode=mode,
         predictions=constant_op.constant(0.),
         loss=constant_op.constant(0.),
-        train_op=variables.get_global_step().assign_add(1))
+        train_op=training_util.get_global_step().assign_add(1))
 
 
 def _make_input_fn(features, labels):
@@ -389,7 +389,7 @@ class EstimatorModelFnTest(test.TestCase):
       self.assertEqual(expected_param, params)
       self.assertEqual(model_dir, expected_model_dir)
       return (constant_op.constant(0.), constant_op.constant(0.),
-              variables.get_global_step().assign_add(1))
+              training_util.get_global_step().assign_add(1))
     est = estimator.Estimator(model_fn=_argument_checker,
                               params=expected_param,
                               model_dir=expected_model_dir)
@@ -400,7 +400,7 @@ class EstimatorModelFnTest(test.TestCase):
     def _invalid_model_fn(features, labels):
       # pylint: disable=unused-argument
       w = variables_lib.Variable(42.0, 'weight')
-      update_global_step = variables.get_global_step().assign_add(1)
+      update_global_step = training_util.get_global_step().assign_add(1)
       with ops.control_dependencies([update_global_step]):
         loss = 100.0 - w
       return None, loss, None
@@ -415,7 +415,7 @@ class EstimatorModelFnTest(test.TestCase):
       # pylint: disable=unused-argument
       w = variables_lib.Variable(42.0, 'weight')
       loss = 100.0 - w
-      update_global_step = variables.get_global_step().assign_add(1)
+      update_global_step = training_util.get_global_step().assign_add(1)
       with ops.control_dependencies([update_global_step]):
         train_op = w.assign_add(loss / 100.0)
       predictions = loss
@@ -434,7 +434,7 @@ class EstimatorModelFnTest(test.TestCase):
       # pylint: disable=unused-argument
       w = variables_lib.Variable(42.0, 'weight')
       loss = 100.0 - w
-      update_global_step = variables.get_global_step().assign_add(1)
+      update_global_step = training_util.get_global_step().assign_add(1)
       with ops.control_dependencies([update_global_step]):
         train_op = w.assign_add(loss / 100.0)
       return None, loss, train_op
@@ -464,7 +464,7 @@ class EstimatorModelFnTest(test.TestCase):
           mode=mode,
           predictions=constant_op.constant(0.),
           loss=constant_op.constant(0.),
-          train_op=variables.get_global_step().assign_add(1),
+          train_op=training_util.get_global_step().assign_add(1),
           scaffold=monitored_session.Scaffold(init_fn=_init_fn))
 
     est = estimator.Estimator(model_fn=_model_fn_scaffold)
@@ -483,7 +483,7 @@ class EstimatorModelFnTest(test.TestCase):
           mode=mode,
           predictions=constant_op.constant([[1.]]),
           loss=constant_op.constant(0.),
-          train_op=variables.get_global_step().assign_add(1),
+          train_op=training_util.get_global_step().assign_add(1),
           scaffold=monitored_session.Scaffold(saver=self.mock_saver))
 
     def input_fn():
@@ -884,6 +884,35 @@ class EstimatorTest(test.TestCase):
     self.assertTrue('MSE' in output_values)
     self.assertTrue(output_values['MSE'].HasField('histo'))
 
+  def testSummaryWritingWithTensor(self):
+
+    def _streaming_precition_mean_tensor(predictions,
+                                         weights=None,
+                                         metrics_collections=None,
+                                         updates_collections=None,
+                                         name=None):
+      return metric_ops.streaming_mean_tensor(
+          predictions,
+          weights=weights,
+          metrics_collections=metrics_collections,
+          updates_collections=updates_collections,
+          name=name)
+
+    est = estimator.Estimator(model_fn=linear_model_fn)
+    est.fit(input_fn=boston_input_fn, steps=200)
+    est.evaluate(
+        input_fn=boston_input_fn,
+        steps=200,
+        metrics={'PMT': _streaming_precition_mean_tensor})
+    events = util_test.latest_events(est.model_dir + '/eval')
+    output_values = {}
+    for e in events:
+      if e.HasField('summary'):
+        for v in e.summary.value:
+          output_values[v.tag] = v
+    self.assertTrue('PMT' in output_values)
+    self.assertTrue(output_values['PMT'].HasField('tensor'))
+
   def testLossInGraphCollection(self):
 
     class _LossCheckerHook(session_run_hook.SessionRunHook):
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py b/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py
index 1d89dfb55b10b032cab7dcf434d396404d4eb83b..8131e0fde6fea5501cacc4714f53ed8d867ca70f 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py
@@ -22,7 +22,7 @@ import random
 
 import numpy as np
 
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.learn.python import learn
 from tensorflow.contrib.learn.python.learn import datasets
 from tensorflow.contrib.learn.python.learn import metric_spec
@@ -62,7 +62,7 @@ class FeatureEngineeringFunctionTest(test.TestCase):
       _ = labels
       predictions = features["transformed_x"]
       loss = constant_op.constant([2.])
-      update_global_step = variables.get_global_step().assign_add(1)
+      update_global_step = training_util.get_global_step().assign_add(1)
       return predictions, loss, update_global_step
 
     estimator = estimator_lib.Estimator(
@@ -100,7 +100,7 @@ class FeatureEngineeringFunctionTest(test.TestCase):
       _ = labels
       predictions = features["x"]
       loss = constant_op.constant([2.])
-      update_global_step = variables.get_global_step().assign_add(1)
+      update_global_step = training_util.get_global_step().assign_add(1)
       return predictions, loss, update_global_step
 
     estimator = estimator_lib.Estimator(
@@ -139,7 +139,7 @@ class FeatureEngineeringFunctionTest(test.TestCase):
       _ = labels
       predictions = features["x"]
       loss = constant_op.constant([2.])
-      update_global_step = variables.get_global_step().assign_add(1)
+      update_global_step = training_util.get_global_step().assign_add(1)
       return predictions, loss, update_global_step
 
     estimator_with_fe_fn = estimator_lib.Estimator(
diff --git a/tensorflow/contrib/learn/python/learn/estimators/kmeans.py b/tensorflow/contrib/learn/python/learn/estimators/kmeans.py
index 992b804f59ecd88fedc2fba10d3079f93c4fe83d..8f9d6fc318a357853bdb8e3264f6691b410006b1 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/kmeans.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/kmeans.py
@@ -28,7 +28,7 @@ import time
 import numpy as np
 
 from tensorflow.contrib.factorization.python.ops import clustering_ops
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.contrib.learn.python.learn.estimators.model_fn import ModelFnOps
 from tensorflow.python.framework import ops
@@ -128,7 +128,7 @@ def _kmeans_clustering_model_fn(features, labels, mode, params, config):
        random_seed=params.get('random_seed'),
        kmeans_plus_plus_num_retries=params.get(
            'kmeans_plus_plus_num_retries')).training_graph()
-  incr_step = state_ops.assign_add(variables.get_global_step(), 1)
+  incr_step = state_ops.assign_add(training_util.get_global_step(), 1)
   loss = math_ops.reduce_sum(losses, name=KMeansClustering.LOSS_OP_NAME)
   summary.scalar('loss/raw', loss)
   training_op = with_dependencies([training_op, incr_step], loss)
diff --git a/tensorflow/contrib/learn/python/learn/estimators/kmeans_test.py b/tensorflow/contrib/learn/python/learn/estimators/kmeans_test.py
index ce87b4723d436495e5fb149f0ab8f2eea44d82b8..b28835a809736a099ad2f08d127dc68d7977a3c1 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/kmeans_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/kmeans_test.py
@@ -199,15 +199,7 @@ class KMeansTest(KMeansTestBase):
         input_fn=self.input_fn(batch_size=self.num_points), steps=1)
     self.assertNear(self.true_score, score, self.true_score * 0.01)
 
-  def test_infer(self):
-    kmeans = self._kmeans()
-    # Make a call to fit to initialize the cluster centers.
-    max_steps = 1
-    kmeans.fit(input_fn=self.input_fn(), max_steps=max_steps)
-    clusters = kmeans.clusters()
-
-    # Make a small test set
-    num_points = 10
+  def _infer_helper(self, kmeans, clusters, num_points):
     points, true_assignments, true_offsets = make_random_points(
         clusters, num_points)
     # Test predict
@@ -231,6 +223,17 @@ class KMeansTest(KMeansTestBase):
         np.transpose(np.sum(np.square(clusters), axis=1, keepdims=True)))
     self.assertAllClose(transform, true_transform, rtol=0.05, atol=10)
 
+  def test_infer(self):
+    kmeans = self._kmeans()
+    # Make a call to fit to initialize the cluster centers.
+    max_steps = 1
+    kmeans.fit(input_fn=self.input_fn(), max_steps=max_steps)
+    clusters = kmeans.clusters()
+
+    # Run inference on small datasets.
+    self._infer_helper(kmeans, clusters, num_points=10)
+    self._infer_helper(kmeans, clusters, num_points=1)
+
 
 class KMeansTestMultiStageInit(KMeansTestBase):
 
diff --git a/tensorflow/contrib/learn/python/learn/estimators/linear.py b/tensorflow/contrib/learn/python/learn/estimators/linear.py
index f5445ad4e728dbd3904279573771de9454b5d17c..37aa8b339622415d082933cdf66d2472a4119b48 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/linear.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/linear.py
@@ -26,7 +26,7 @@ import six
 from tensorflow.contrib import layers
 from tensorflow.contrib.framework import deprecated
 from tensorflow.contrib.framework import deprecated_arg_values
-from tensorflow.contrib.framework.python.ops import variables as contrib_variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import feature_column
 from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
@@ -170,7 +170,7 @@ def _linear_model_fn(features, labels, mode, params, config=None):
           weight_collections=[parent_scope])
 
     def _train_op_fn(loss):
-      global_step = contrib_variables.get_global_step()
+      global_step = training_util.get_global_step()
       my_vars = ops.get_collection(parent_scope)
       grads = gradients.gradients(loss, my_vars)
       if gradient_clip_norm:
@@ -252,7 +252,7 @@ def sdca_model_fn(features, labels, mode, params):
     _add_bias_column(feature_columns, features, bias, columns_to_variables)
 
   def _train_op_fn(unused_loss):
-    global_step = contrib_variables.get_global_step()
+    global_step = training_util.get_global_step()
     sdca_model, train_op = optimizer.get_train_step(columns_to_variables,
                                                     weight_column_name,
                                                     loss_type, features,
diff --git a/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor_test.py b/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor_test.py
index 93c62f87e8495f299a8c456574c7b40534186304..656d68b76888d9319c0b9be481f9b0478ac4314c 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor_test.py
@@ -21,7 +21,7 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.contrib import layers
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import optimizers
 from tensorflow.contrib.learn.python.learn.datasets import base
 from tensorflow.contrib.learn.python.learn.estimators import logistic_regressor
@@ -57,7 +57,7 @@ def _logistic_regression_model_fn(features, labels, mode):
   predictions = math_ops.sigmoid(logits)
   loss = losses.sigmoid_cross_entropy(labels, logits)
   train_op = optimizers.optimize_loss(
-      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
+      loss, training_util.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
   return predictions, loss, train_op
 
 
diff --git a/tensorflow/contrib/learn/python/learn/experiment.py b/tensorflow/contrib/learn/python/learn/experiment.py
index fc4bd1f461d7bfbfcfb78201d527959055342f0a..9576ff21c243022276bb0641882dfaf0decf05c0 100644
--- a/tensorflow/contrib/learn/python/learn/experiment.py
+++ b/tensorflow/contrib/learn/python/learn/experiment.py
@@ -35,6 +35,7 @@ from tensorflow.contrib.learn.python.learn import trainable
 from tensorflow.contrib.learn.python.learn.estimators import run_config
 from tensorflow.contrib.tpu.python.tpu import tpu_estimator
 from tensorflow.python.estimator import estimator as core_estimator
+from tensorflow.python.estimator import util as estimator_util
 from tensorflow.python.framework import ops
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import basic_session_run_hooks
@@ -46,6 +47,18 @@ from tensorflow.python.util import compat
 __all__ = ["Experiment"]
 
 
+def _get_standardized_predicate_fn(predicate_fn):
+  pred_fn_args = estimator_util.fn_args(predicate_fn)
+  if "checkpoint_path" not in pred_fn_args:
+    # pylint: disable=unused-argument
+    def _pred_fn_wrapper(eval_results, checkpoint_path):
+      return predicate_fn(eval_results)
+
+    return _pred_fn_wrapper
+  else:
+    return predicate_fn
+
+
 class _EvalAndExportListener(basic_session_run_hooks.CheckpointSaverListener):
   """Listener that evaluates and exports a model after creating a checkpoint.
 
@@ -446,22 +459,33 @@ class Experiment(object):
       evaluate_checkpoint_only_once: Whether to skip evaluation of checkpoints
         that have already been evaluated. Default is `True`.
       continuous_eval_predicate_fn: A predicate function determining whether to
-        continue eval after each iteration. `predicate_fn` takes the evaluation
-        results as arguments. At the beginning of evaluation, the passed eval
-        results will be None so it's expected that the predicate function
-        handles that gracefully. When `predicate_fn` is not specified,
-        continuous eval will run in an infinite loop (if `train_steps` is None)
-        or exit once global step reaches `train_steps`.
+        continue eval after each iteration. A `predicate_fn` has one of the
+        following signatures:
+          * (eval_results) -> boolean
+          * (eval_results, checkpoint_path) -> boolean
+        Where `eval_results` is the dictionary of metric evaluations and
+        checkpoint_path is the path to the checkpoint containing the parameters
+        on which that evaluation was based.
+        At the beginning of evaluation, the passed `eval_results` will be None
+        so it's expected that the predicate function handles that gracefully.
+        When `predicate_fn` is not specified, continuous eval will run in an
+        infinite loop (if `train_steps` is None). or exit once global step
+        reaches `train_steps`.
+
       export: Whether to export from this step. Default is 'True'.
 
     Raises:
       ValueError: if `continuous_eval_predicate_fn` is neither None nor
         callable.
     """
-    if (continuous_eval_predicate_fn is not None and
-        not callable(continuous_eval_predicate_fn)):
-      raise ValueError(
-          "`continuous_eval_predicate_fn` must be a callable, or None.")
+    if continuous_eval_predicate_fn is not None:
+      if not callable(continuous_eval_predicate_fn):
+        raise ValueError(
+            "`continuous_eval_predicate_fn` must be a callable, or None.")
+      predicate_fn = _get_standardized_predicate_fn(
+          continuous_eval_predicate_fn)
+    else:
+      predicate_fn = None
 
     if delay_secs is None:
       delay_secs = self._eval_delay_secs
@@ -475,8 +499,10 @@ class Experiment(object):
     previous_path = None
     eval_result = None
     last_warning_time = 0
-    while (not continuous_eval_predicate_fn or
-           continuous_eval_predicate_fn(eval_result)):
+    while (not predicate_fn or
+           predicate_fn(
+               eval_result,
+               checkpoint_path=previous_path if eval_result else None)):
       # Exit if we have already reached number of steps to train.
       if self._has_training_stopped(eval_result):
         logging.info("Exiting continuous eval, global_step=%s >= "
@@ -682,11 +708,19 @@ class Experiment(object):
 
     Args:
       continuous_eval_predicate_fn: A predicate function determining whether to
-        continue after each iteration. `predicate_fn` takes the evaluation
-        results as its arguments. At the beginning of evaluation, the passed
-        eval results will be None so it's expected that the predicate function
-        handles that gracefully. When `predicate_fn` is not specified, this will
-        run in an infinite loop or exit when global_step reaches `train_steps`.
+        continue eval after each iteration. A `predicate_fn` has one of the
+        following signatures:
+          * (eval_results) -> boolean
+          * (eval_results, checkpoint_path) -> boolean
+        Where `eval_results` is the dictionary of metric evaluations and
+        checkpoint_path is the path to the checkpoint containing the parameters
+        on which that evaluation was based.
+        At the beginning of evaluation, the passed `eval_results` and
+        `checkpoint_path` will be None so it's expected that the predicate
+        function handles that gracefully.
+        When `predicate_fn` is not specified, continuous eval will run in an
+        infinite loop (if `train_steps` is None). or exit once global step
+        reaches `train_steps`.
 
     Returns:
       A tuple of the result of the `evaluate` call to the `Estimator` and the
@@ -697,13 +731,18 @@ class Experiment(object):
         callable.
     """
 
-    if (continuous_eval_predicate_fn is not None and
-        not callable(continuous_eval_predicate_fn)):
-      raise ValueError(
-          "`continuous_eval_predicate_fn` must be a callable, or None.")
+    if continuous_eval_predicate_fn is not None:
+      if not callable(continuous_eval_predicate_fn):
+        raise ValueError(
+            "`continuous_eval_predicate_fn` must be a callable, or None.")
+      predicate_fn = _get_standardized_predicate_fn(
+          continuous_eval_predicate_fn)
+    else:
+      predicate_fn = None
 
-    eval_result = None
     export_results = None
+    latest_checkpoint = None
+    eval_result = None
 
     # Set the default value for train_steps_per_iteration, which will be
     # overridden by other settings.
@@ -713,8 +752,10 @@ class Experiment(object):
     elif self._train_steps is not None:
       train_steps_per_iteration = int(self._train_steps / 10)
 
-    while (not continuous_eval_predicate_fn or
-           continuous_eval_predicate_fn(eval_result)):
+    while (not predicate_fn or
+           predicate_fn(
+               eval_result,
+               checkpoint_path=latest_checkpoint if eval_result else None)):
 
       if self._has_training_stopped(eval_result):
         # Exits once max steps of training is satisfied.
@@ -729,11 +770,14 @@ class Experiment(object):
           saving_listeners=self._saving_listeners)
 
       logging.info("Evaluating model now.")
-      eval_result = self._call_evaluate(input_fn=self._eval_input_fn,
-                                        steps=self._eval_steps,
-                                        metrics=self._eval_metrics,
-                                        name="one_pass",
-                                        hooks=self._eval_hooks)
+      latest_checkpoint = saver.latest_checkpoint(self._estimator.model_dir)
+      eval_result = self._call_evaluate(
+          input_fn=self._eval_input_fn,
+          steps=self._eval_steps,
+          metrics=self._eval_metrics,
+          name="one_pass",
+          checkpoint_path=latest_checkpoint,
+          hooks=self._eval_hooks)
       export_results = self._maybe_export(eval_result)
 
     return eval_result, export_results
diff --git a/tensorflow/contrib/learn/python/learn/experiment_test.py b/tensorflow/contrib/learn/python/learn/experiment_test.py
index c29c198d094090a59c8c7dd2949c3f069adf49d0..545d7d8924c0c10544e6113e2968b7ae3d2090fc 100644
--- a/tensorflow/contrib/learn/python/learn/experiment_test.py
+++ b/tensorflow/contrib/learn/python/learn/experiment_test.py
@@ -492,6 +492,33 @@ class ExperimentTest(test.TestCase):
       self.assertEqual(3, est.eval_count)
       self.assertEqual([noop_hook], est.eval_hooks)
 
+  def test_continuous_eval_predicate_fn_with_checkpoint(self):
+    for est in self._estimators_for_tests():
+      eval_metrics = 'eval_metrics' if not isinstance(
+          est, core_estimator.Estimator) else None
+      est.fake_checkpoint()
+      noop_hook = _NoopHook()
+
+      def _predicate_fn(eval_result, checkpoint_path):
+        self.assertEqual(not eval_result,
+                         checkpoint_path is None)
+        return est.eval_count < 3  # pylint: disable=cell-var-from-loop
+
+      ex = experiment.Experiment(
+          est,
+          train_input_fn='train_input',
+          eval_input_fn='eval_input',
+          eval_metrics=eval_metrics,
+          eval_hooks=[noop_hook],
+          eval_delay_secs=0,
+          continuous_eval_throttle_secs=0)
+      ex.continuous_eval(
+          evaluate_checkpoint_only_once=False,
+          continuous_eval_predicate_fn=_predicate_fn)
+      self.assertEqual(0, est.fit_count)
+      self.assertEqual(3, est.eval_count)
+      self.assertEqual([noop_hook], est.eval_hooks)
+
   def test_run_local(self):
     for est in self._estimators_for_tests():
       eval_metrics = 'eval_metrics' if not isinstance(
diff --git a/tensorflow/contrib/learn/python/learn/export_strategy.py b/tensorflow/contrib/learn/python/learn/export_strategy.py
index f276aab0e6beb011a21c20fa194dd5212db796d1..55a8b824312b89e0ac66513242191f4201ac212a 100644
--- a/tensorflow/contrib/learn/python/learn/export_strategy.py
+++ b/tensorflow/contrib/learn/python/learn/export_strategy.py
@@ -26,13 +26,14 @@ __all__ = ['ExportStrategy']
 
 
 class ExportStrategy(
-    collections.namedtuple('ExportStrategy', ['name', 'export_fn'])):
+    collections.namedtuple('ExportStrategy',
+                           ['name', 'export_fn', 'strip_default_attrs'])):
   """A class representing a type of model export.
 
   Typically constructed by a utility function specific to the exporter, such as
   `saved_model_export_utils.make_export_strategy()`.
 
-  The fields are:
+  Attributes:
     name: The directory name under the export base directory where exports of
       this type will be written.
     export_fn: A function that writes an export, given an estimator, a
@@ -45,11 +46,20 @@ class ExportStrategy(
 
     The signature of this function must be one of:
 
-    * `(estimator, export_path) -> export_path`
-    * `(estimator, export_path, checkpoint_path) -> export_path`
-    * `(estimator, export_path, checkpoint_path, eval_result) -> export_path`
+      * `(estimator, export_path) -> export_path`
+      * `(estimator, export_path, checkpoint_path) -> export_path`
+      * `(estimator, export_path, checkpoint_path, eval_result) -> export_path`
+      * `(estimator, export_path, checkpoint_path, eval_result,
+          strip_default_attrs) -> export_path`
+    strip_default_attrs: (Optional) Boolean. If set as True, default attrs in
+        the `GraphDef` will be stripped on write. This is recommended for better
+        forward compatibility of the resulting `SavedModel`.
   """
 
+  def __new__(cls, name, export_fn, strip_default_attrs=None):
+    return super(ExportStrategy, cls).__new__(
+        cls, name, export_fn, strip_default_attrs)
+
   def export(self,
              estimator,
              export_path,
@@ -83,5 +93,6 @@ class ExportStrategy(
         raise ValueError('An export_fn accepting eval_result must also accept '
                          'checkpoint_path.')
       kwargs['eval_result'] = eval_result
-
+    if 'strip_default_attrs' in export_fn_args:
+      kwargs['strip_default_attrs'] = self.strip_default_attrs
     return self.export_fn(estimator, export_path, **kwargs)
diff --git a/tensorflow/contrib/learn/python/learn/export_strategy_test.py b/tensorflow/contrib/learn/python/learn/export_strategy_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..43c3551cccc3b8e6b66bd2b36839a3dfc5fe8eea
--- /dev/null
+++ b/tensorflow/contrib/learn/python/learn/export_strategy_test.py
@@ -0,0 +1,89 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for ExportStrategy."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.learn.python.learn import export_strategy
+from tensorflow.python.platform import test
+
+
+class ExportStrategyTest(test.TestCase):
+
+  def test_no_optional_args_export(self):
+    model_path = '/path/to/model'
+    def _export_fn(estimator, export_path):
+      self.assertTupleEqual((estimator, export_path), (None, None))
+      return model_path
+
+    strategy = export_strategy.ExportStrategy('foo', _export_fn)
+    self.assertTupleEqual(strategy, ('foo', _export_fn, None))
+    self.assertIs(strategy.export(None, None), model_path)
+
+  def test_checkpoint_export(self):
+    ckpt_model_path = '/path/to/checkpoint_model'
+    def _ckpt_export_fn(estimator, export_path, checkpoint_path):
+      self.assertTupleEqual((estimator, export_path), (None, None))
+      self.assertEqual(checkpoint_path, 'checkpoint')
+      return ckpt_model_path
+
+    strategy = export_strategy.ExportStrategy('foo', _ckpt_export_fn)
+    self.assertTupleEqual(strategy, ('foo', _ckpt_export_fn, None))
+    self.assertIs(strategy.export(None, None, 'checkpoint'), ckpt_model_path)
+
+  def test_checkpoint_eval_export(self):
+    ckpt_eval_model_path = '/path/to/checkpoint_eval_model'
+    def _ckpt_eval_export_fn(estimator, export_path, checkpoint_path,
+                             eval_result):
+      self.assertTupleEqual((estimator, export_path), (None, None))
+      self.assertEqual(checkpoint_path, 'checkpoint')
+      self.assertEqual(eval_result, 'eval')
+      return ckpt_eval_model_path
+
+    strategy = export_strategy.ExportStrategy('foo', _ckpt_eval_export_fn)
+    self.assertTupleEqual(strategy, ('foo', _ckpt_eval_export_fn, None))
+    self.assertIs(strategy.export(None, None, 'checkpoint', 'eval'),
+                  ckpt_eval_model_path)
+
+  def test_eval_only_export(self):
+    def _eval_export_fn(estimator, export_path, eval_result):
+      del estimator, export_path, eval_result
+
+    strategy = export_strategy.ExportStrategy('foo', _eval_export_fn)
+    self.assertTupleEqual(strategy, ('foo', _eval_export_fn, None))
+    with self.assertRaisesRegexp(ValueError, 'An export_fn accepting '
+                                 'eval_result must also accept '
+                                 'checkpoint_path'):
+      strategy.export(None, None, eval_result='eval')
+
+  def test_strip_default_attr_export(self):
+    strip_default_attrs_model_path = '/path/to/strip_default_attrs_model'
+    def _strip_default_attrs_export_fn(estimator, export_path,
+                                       strip_default_attrs):
+      self.assertTupleEqual((estimator, export_path), (None, None))
+      self.assertTrue(strip_default_attrs)
+      return strip_default_attrs_model_path
+
+    strategy = export_strategy.ExportStrategy('foo',
+                                              _strip_default_attrs_export_fn,
+                                              True)
+    self.assertTupleEqual(strategy,
+                          ('foo', _strip_default_attrs_export_fn, True))
+    self.assertIs(strategy.export(None, None), strip_default_attrs_model_path)
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
index 86fad4c5535a918d87e0741687cfebe3afaf9ddf..f36a778b529a83f158241ddb060959c4b33e2e95 100644
--- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
@@ -857,8 +857,8 @@ class DaskDataFeeder(object):
     """Returns a function, that will sample data and provide it to placeholders.
 
     Args:
-      input_placeholder: tf.Placeholder for input features mini batch.
-      output_placeholder: tf.Placeholder for output labels.
+      input_placeholder: tf.placeholder for input features mini batch.
+      output_placeholder: tf.placeholder for output labels.
 
     Returns:
       A function that when called samples a random subset of batch size
diff --git a/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py b/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py
index 4b34fc62849766370979bb2002d42ee03ea7161a..3a46c239688017f9204d2c6182a6f81cd325a417 100644
--- a/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py
@@ -24,6 +24,7 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.layers import utils
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import data_flow_ops
 from tensorflow.python.ops import io_ops
@@ -280,14 +281,33 @@ def _get_file_names(file_pattern, randomize_input):
 
 def _get_examples(file_name_queue, reader, num_threads, read_batch_size,
                   filter_fn, parse_fn):
+  """Get example filenames matching.
+
+  Args:
+    file_name_queue: A queue implementation that dequeues elements in
+      first-in first-out order.
+    reader: A function or class that returns an object with
+      `read` method, (filename tensor) -> (example tensor).
+    num_threads: The number of threads enqueuing examples.
+    read_batch_size: An int or scalar `Tensor` specifying the number of
+      records to read at once.
+    filter_fn: Filtering function, takes both keys as well as an `Example`
+      Tensors and returns a boolean mask of the same shape as the input Tensors
+      to be applied for filtering. If `None`, no filtering is done.
+    parse_fn: Parsing function, takes `Example` Tensor returns parsed
+      representation. If `None`, no parsing is done.
+
+  Returns:
+    List of example file names matching `file_name_queue`.
+  """
   with ops.name_scope('read'):
     example_list = []
     for _ in range(num_threads):
-      if read_batch_size > 1:
-        keys, examples_proto = reader().read_up_to(file_name_queue,
-                                                   read_batch_size)
-      else:
-        keys, examples_proto = reader().read(file_name_queue)
+      keys, examples_proto = utils.smart_cond(
+          read_batch_size > 1,
+          lambda: reader().read_up_to(file_name_queue, read_batch_size),
+          lambda: reader().read(file_name_queue))
+
       if filter_fn:
         mask = filter_fn(keys, examples_proto)
         keys = array_ops.boolean_mask(keys, mask)
@@ -379,14 +399,15 @@ def _read_keyed_batch_examples_helper(file_pattern,
             capacity=1, dtypes=[dtypes.string], shapes=[[]])
         enqueue_op = file_name_queue.enqueue(
             input_pipeline_ops.seek_next(
-                file_names, shuffle=randomize_input, num_epochs=num_epochs,
+                file_names,
+                shuffle=randomize_input,
+                num_epochs=num_epochs,
                 seed=seed))
         queue_runner.add_queue_runner(
             queue_runner.QueueRunner(file_name_queue, [enqueue_op]))
       else:
         file_name_queue = input_ops.string_input_producer(
-            constant_op.constant(
-                file_names, name='input'),
+            constant_op.constant(file_names, name='input'),
             shuffle=randomize_input,
             num_epochs=num_epochs,
             name=file_name_queue_scope,
@@ -496,7 +517,8 @@ def read_keyed_batch_features(file_pattern,
   """
 
   with ops.name_scope(name, 'read_batch_features', [file_pattern]) as scope:
-    if read_batch_size is None: read_batch_size = batch_size
+    if read_batch_size is None:
+      read_batch_size = batch_size
     keys, examples = read_keyed_batch_examples(
         file_pattern,
         batch_size,
diff --git a/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py b/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py
index 6f0fd9a2976d37d1c701a96f50c2b987562cb191..e11e8b698adc113486bbb45572c8129e964cc931 100644
--- a/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py
@@ -204,8 +204,7 @@ class GraphIOTest(test.TestCase):
     shape = (0,)
     features = {
         "feature":
-            parsing_ops.FixedLenFeature(
-                shape=shape, dtype=dtypes_lib.float32)
+            parsing_ops.FixedLenFeature(shape=shape, dtype=dtypes_lib.float32)
     }
 
     with ops.Graph().as_default() as g, self.test_session(graph=g) as sess:
@@ -255,8 +254,8 @@ class GraphIOTest(test.TestCase):
       self.assertAllEqual((None,), inputs.get_shape().as_list())
       self.assertEqual("%s:1" % name, inputs.name)
       file_name_queue_name = "%s/file_name_queue" % name
-      file_name_queue_limit_name = ("%s/limit_epochs/epochs" %
-                                    file_name_queue_name)
+      file_name_queue_limit_name = (
+          "%s/limit_epochs/epochs" % file_name_queue_name)
       file_names_name = "%s/input" % file_name_queue_name
       example_queue_name = "%s/random_shuffle_queue" % name
       op_nodes = test_util.assert_ops_in_graph({
@@ -354,8 +353,8 @@ class GraphIOTest(test.TestCase):
     json_lines = [
         "".join([
             '{"features": { "feature": { "sequence": {',
-            '"bytes_list": { "value": ["', base64.b64encode(l).decode("ascii"),
-            '"]}}}}}\n'
+            '"bytes_list": { "value": ["',
+            base64.b64encode(l).decode("ascii"), '"]}}}}}\n'
         ]) for l in lines
     ]
     return self._create_temp_file("".join(json_lines))
@@ -823,6 +822,31 @@ class GraphIOTest(test.TestCase):
       coord.request_stop()
       coord.join(threads)
 
+  def test_read_keyed_batch_features_shared_queue(self):
+    batch_size = 17
+    shape = (0,)
+    fixed_feature = parsing_ops.FixedLenFeature(
+        shape=shape, dtype=dtypes_lib.float32)
+    feature = {"feature": fixed_feature}
+    reader = io_ops.TFRecordReader
+
+    _, queued_feature = graph_io.read_keyed_batch_features_shared_queue(
+        _VALID_FILE_PATTERN, batch_size, feature, reader)
+
+    with ops.Graph().as_default() as g, self.test_session(graph=g) as session:
+      features_result = graph_io.read_batch_features(
+          _VALID_FILE_PATTERN, batch_size, feature, reader)
+      session.run(variables.local_variables_initializer())
+
+    self.assertAllEqual(
+        queued_feature.get("feature").get_shape().as_list(),
+        features_result.get("feature").get_shape().as_list())
+
+  def test_get_file_names_errors(self):
+    # Raise bad file_pattern.
+    with self.assertRaises(ValueError):
+      graph_io._get_file_names([], True)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/learn/python/learn/learn_io/numpy_io_test.py b/tensorflow/contrib/learn/python/learn/learn_io/numpy_io_test.py
deleted file mode 100644
index 6fe8de8705b8854e5861879d2a505fe03fddc7e5..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/learn/python/learn/learn_io/numpy_io_test.py
+++ /dev/null
@@ -1,280 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for numpy_io."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.contrib.learn.python.learn.learn_io import numpy_io
-from tensorflow.python.framework import errors
-from tensorflow.python.platform import test
-from tensorflow.python.training import coordinator
-from tensorflow.python.training import queue_runner_impl
-
-
-class NumpyIoTest(test.TestCase):
-
-  def testNumpyInputFn(self):
-    a = np.arange(4) * 1.0
-    b = np.arange(32, 36)
-    x = {'a': a, 'b': b}
-    y = np.arange(-32, -28)
-
-    with self.test_session() as session:
-      input_fn = numpy_io.numpy_input_fn(
-          x, y, batch_size=2, shuffle=False, num_epochs=1)
-      features, target = input_fn()
-
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
-
-      res = session.run([features, target])
-      self.assertAllEqual(res[0]['a'], [0, 1])
-      self.assertAllEqual(res[0]['b'], [32, 33])
-      self.assertAllEqual(res[1], [-32, -31])
-
-      session.run([features, target])
-      with self.assertRaises(errors.OutOfRangeError):
-        session.run([features, target])
-
-      coord.request_stop()
-      coord.join(threads)
-
-  def testNumpyInputFnWithVeryLargeBatchSizeAndMultipleEpochs(self):
-    a = np.arange(2) * 1.0
-    b = np.arange(32, 34)
-    x = {'a': a, 'b': b}
-    y = np.arange(-32, -30)
-
-    with self.test_session() as session:
-      input_fn = numpy_io.numpy_input_fn(
-          x, y, batch_size=128, shuffle=False, num_epochs=2)
-      features, target = input_fn()
-
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
-
-      res = session.run([features, target])
-      self.assertAllEqual(res[0]['a'], [0, 1, 0, 1])
-      self.assertAllEqual(res[0]['b'], [32, 33, 32, 33])
-      self.assertAllEqual(res[1], [-32, -31, -32, -31])
-
-      with self.assertRaises(errors.OutOfRangeError):
-        session.run([features, target])
-
-      coord.request_stop()
-      coord.join(threads)
-
-  def testNumpyInputFnWithZeroEpochs(self):
-    a = np.arange(4) * 1.0
-    b = np.arange(32, 36)
-    x = {'a': a, 'b': b}
-    y = np.arange(-32, -28)
-
-    with self.test_session() as session:
-      input_fn = numpy_io.numpy_input_fn(
-          x, y, batch_size=2, shuffle=False, num_epochs=0)
-      features, target = input_fn()
-
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
-
-      with self.assertRaises(errors.OutOfRangeError):
-        session.run([features, target])
-
-      coord.request_stop()
-      coord.join(threads)
-
-  def testNumpyInputFnWithBatchSizeNotDividedByDataSize(self):
-    batch_size = 2
-    a = np.arange(5) * 1.0
-    b = np.arange(32, 37)
-    x = {'a': a, 'b': b}
-    y = np.arange(-32, -27)
-
-    with self.test_session() as session:
-      input_fn = numpy_io.numpy_input_fn(
-          x, y, batch_size=batch_size, shuffle=False, num_epochs=1)
-      features, target = input_fn()
-
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
-
-      res = session.run([features, target])
-      self.assertAllEqual(res[0]['a'], [0, 1])
-      self.assertAllEqual(res[0]['b'], [32, 33])
-      self.assertAllEqual(res[1], [-32, -31])
-
-      res = session.run([features, target])
-      self.assertAllEqual(res[0]['a'], [2, 3])
-      self.assertAllEqual(res[0]['b'], [34, 35])
-      self.assertAllEqual(res[1], [-30, -29])
-
-      res = session.run([features, target])
-      self.assertAllEqual(res[0]['a'], [4])
-      self.assertAllEqual(res[0]['b'], [36])
-      self.assertAllEqual(res[1], [-28])
-
-      with self.assertRaises(errors.OutOfRangeError):
-        session.run([features, target])
-
-      coord.request_stop()
-      coord.join(threads)
-
-  def testNumpyInputFnWithBatchSizeNotDividedByDataSizeAndMultipleEpochs(self):
-    batch_size = 2
-    a = np.arange(3) * 1.0
-    b = np.arange(32, 35)
-    x = {'a': a, 'b': b}
-    y = np.arange(-32, -29)
-
-    with self.test_session() as session:
-      input_fn = numpy_io.numpy_input_fn(
-          x, y, batch_size=batch_size, shuffle=False, num_epochs=3)
-      features, target = input_fn()
-
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
-
-      res = session.run([features, target])
-      self.assertAllEqual(res[0]['a'], [0, 1])
-      self.assertAllEqual(res[0]['b'], [32, 33])
-      self.assertAllEqual(res[1], [-32, -31])
-
-      res = session.run([features, target])
-      self.assertAllEqual(res[0]['a'], [2, 0])
-      self.assertAllEqual(res[0]['b'], [34, 32])
-      self.assertAllEqual(res[1], [-30, -32])
-
-      res = session.run([features, target])
-      self.assertAllEqual(res[0]['a'], [1, 2])
-      self.assertAllEqual(res[0]['b'], [33, 34])
-      self.assertAllEqual(res[1], [-31, -30])
-
-      res = session.run([features, target])
-      self.assertAllEqual(res[0]['a'], [0, 1])
-      self.assertAllEqual(res[0]['b'], [32, 33])
-      self.assertAllEqual(res[1], [-32, -31])
-
-      res = session.run([features, target])
-      self.assertAllEqual(res[0]['a'], [2])
-      self.assertAllEqual(res[0]['b'], [34])
-      self.assertAllEqual(res[1], [-30])
-
-      with self.assertRaises(errors.OutOfRangeError):
-        session.run([features, target])
-
-      coord.request_stop()
-      coord.join(threads)
-
-  def testNumpyInputFnWithBatchSizeLargerThanDataSize(self):
-    batch_size = 10
-    a = np.arange(4) * 1.0
-    b = np.arange(32, 36)
-    x = {'a': a, 'b': b}
-    y = np.arange(-32, -28)
-
-    with self.test_session() as session:
-      input_fn = numpy_io.numpy_input_fn(
-          x, y, batch_size=batch_size, shuffle=False, num_epochs=1)
-      features, target = input_fn()
-
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
-
-      res = session.run([features, target])
-      self.assertAllEqual(res[0]['a'], [0, 1, 2, 3])
-      self.assertAllEqual(res[0]['b'], [32, 33, 34, 35])
-      self.assertAllEqual(res[1], [-32, -31, -30, -29])
-
-      with self.assertRaises(errors.OutOfRangeError):
-        session.run([features, target])
-
-      coord.request_stop()
-      coord.join(threads)
-
-  def testNumpyInputFnWithDifferentDimensionsOfFeatures(self):
-    a = np.array([[1, 2], [3, 4]])
-    b = np.array([5, 6])
-    x = {'a': a, 'b': b}
-    y = np.arange(-32, -30)
-
-    with self.test_session() as session:
-      input_fn = numpy_io.numpy_input_fn(
-          x, y, batch_size=2, shuffle=False, num_epochs=1)
-      features, target = input_fn()
-
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
-
-      res = session.run([features, target])
-      self.assertAllEqual(res[0]['a'], [[1, 2], [3, 4]])
-      self.assertAllEqual(res[0]['b'], [5, 6])
-      self.assertAllEqual(res[1], [-32, -31])
-
-      coord.request_stop()
-      coord.join(threads)
-
-  def testNumpyInputFnWithXAsNonDict(self):
-    x = np.arange(32, 36)
-    y = np.arange(4)
-    with self.test_session():
-      with self.assertRaisesRegexp(TypeError, 'x must be dict'):
-        failing_input_fn = numpy_io.numpy_input_fn(
-            x, y, batch_size=2, shuffle=False, num_epochs=1)
-        failing_input_fn()
-
-  def testNumpyInputFnWithTargetKeyAlreadyInX(self):
-    array = np.arange(32, 36)
-    x = {'__target_key__': array}
-    y = np.arange(4)
-
-    with self.test_session():
-      input_fn = numpy_io.numpy_input_fn(
-          x, y, batch_size=2, shuffle=False, num_epochs=1)
-      input_fn()
-      self.assertAllEqual(x['__target_key__'], array)
-      self.assertItemsEqual(x.keys(), ['__target_key__'])
-
-  def testNumpyInputFnWithMismatchLengthOfInputs(self):
-    a = np.arange(4) * 1.0
-    b = np.arange(32, 36)
-    x = {'a': a, 'b': b}
-    x_mismatch_length = {'a': np.arange(1), 'b': b}
-    y_longer_length = np.arange(10)
-
-    with self.test_session():
-      with self.assertRaisesRegexp(
-          ValueError, 'Length of tensors in x and y is mismatched.'):
-        failing_input_fn = numpy_io.numpy_input_fn(
-            x, y_longer_length, batch_size=2, shuffle=False, num_epochs=1)
-        failing_input_fn()
-
-      with self.assertRaisesRegexp(
-          ValueError, 'Length of tensors in x and y is mismatched.'):
-        failing_input_fn = numpy_io.numpy_input_fn(
-            x=x_mismatch_length,
-            y=None,
-            batch_size=2,
-            shuffle=False,
-            num_epochs=1)
-        failing_input_fn()
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/contrib/learn/python/learn/metric_spec.py b/tensorflow/contrib/learn/python/learn/metric_spec.py
index ed6683abedbb8ae76ba364405158eb52cbb6d762..6440bc204b8e339ff51311dcc87b36f556b94092 100644
--- a/tensorflow/contrib/learn/python/learn/metric_spec.py
+++ b/tensorflow/contrib/learn/python/learn/metric_spec.py
@@ -42,10 +42,8 @@ def _args(fn):
   """
   if hasattr(fn, 'func') and hasattr(fn, 'keywords'):
     # Handle functools.partial and similar objects.
-    return tuple([
-        arg for arg in tf_inspect.getargspec(fn.func).args
-        if arg not in set(fn.keywords.keys())
-    ])
+    return tuple(
+        [arg for arg in _args(fn.func) if arg not in set(fn.keywords.keys())])
   # Handle function.
   return tuple(tf_inspect.getargspec(fn).args)
 
diff --git a/tensorflow/contrib/learn/python/learn/utils/export.py b/tensorflow/contrib/learn/python/learn/utils/export.py
index 6af2287761299f6725f9547917101c18b0cc0164..cb34cb1d26b6812c7f3f39e9f965615de5a8ef07 100644
--- a/tensorflow/contrib/learn/python/learn/utils/export.py
+++ b/tensorflow/contrib/learn/python/learn/utils/export.py
@@ -20,7 +20,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib.framework import deprecated
-from tensorflow.contrib.framework.python.ops import variables as contrib_variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.session_bundle import exporter
 from tensorflow.contrib.session_bundle import gc
 from tensorflow.python.client import session as tf_session
@@ -78,7 +78,7 @@ def _export_graph(graph, saver, checkpoint_path, export_dir,
           default_graph_signature=default_graph_signature,
           named_graph_signatures=named_graph_signatures,
           assets_collection=ops.get_collection(ops.GraphKeys.ASSET_FILEPATHS))
-      return export.export(export_dir, contrib_variables.get_global_step(),
+      return export.export(export_dir, training_util.get_global_step(),
                            session, exports_to_keep=exports_to_keep)
 
 
@@ -295,7 +295,7 @@ def _export_estimator(estimator,
   checkpoint_path = (checkpoint_path or
                      tf_saver.latest_checkpoint(estimator._model_dir))
   with ops.Graph().as_default() as g:
-    contrib_variables.create_global_step(g)
+    training_util.create_global_step(g)
 
     if use_deprecated_input_fn:
       examples = array_ops.placeholder(dtype=dtypes.string,
diff --git a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py
index 6ffd2a133995a6ff8b35540221fb5676bf5de19f..1593380007b2799fb1d17e92408ab19a7b47fe1e 100644
--- a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py
+++ b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py
@@ -33,7 +33,6 @@ from __future__ import division
 from __future__ import print_function
 
 import os
-import tempfile
 import time
 
 from tensorflow.contrib.layers.python.layers import feature_column
@@ -51,6 +50,7 @@ from tensorflow.python.platform import gfile
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.saved_model import signature_constants
 from tensorflow.python.saved_model import signature_def_utils
+from tensorflow.python.summary import summary_iterator
 from tensorflow.python.training import saver
 
 from tensorflow.python.util import compat
@@ -391,7 +391,8 @@ def make_export_strategy(serving_input_fn,
                          default_output_alternative_key=None,
                          assets_extra=None,
                          as_text=False,
-                         exports_to_keep=5):
+                         exports_to_keep=5,
+                         strip_default_attrs=None):
   """Create an ExportStrategy for use with Experiment.
 
   Args:
@@ -412,12 +413,16 @@ def make_export_strategy(serving_input_fn,
     exports_to_keep: Number of exports to keep.  Older exports will be
       garbage-collected.  Defaults to 5.  Set to None to disable garbage
       collection.
+    strip_default_attrs: Boolean. If True, default attrs in the
+      `GraphDef` will be stripped on write. This is recommended for better
+      forward compatibility of the resulting `SavedModel`.
 
   Returns:
     An ExportStrategy that can be passed to the Experiment constructor.
   """
 
-  def export_fn(estimator, export_dir_base, checkpoint_path=None):
+  def export_fn(estimator, export_dir_base, checkpoint_path=None,
+                strip_default_attrs=False):
     """Exports the given Estimator as a SavedModel.
 
     Args:
@@ -426,6 +431,8 @@ def make_export_strategy(serving_input_fn,
         graph and checkpoints.
       checkpoint_path: The checkpoint path to export.  If None (the default),
         the most recent checkpoint found within the model directory is chosen.
+      strip_default_attrs: Boolean. If `True`, default-valued attributes will
+        be removed from the NodeDefs.
 
     Returns:
       The string path to the exported directory.
@@ -444,7 +451,8 @@ def make_export_strategy(serving_input_fn,
           serving_input_fn,
           assets_extra=assets_extra,
           as_text=as_text,
-          checkpoint_path=checkpoint_path)
+          checkpoint_path=checkpoint_path,
+          strip_default_attrs=strip_default_attrs)
     else:
       export_result = estimator.export_savedmodel(
           export_dir_base,
@@ -452,12 +460,13 @@ def make_export_strategy(serving_input_fn,
           default_output_alternative_key=default_output_alternative_key,
           assets_extra=assets_extra,
           as_text=as_text,
-          checkpoint_path=checkpoint_path)
+          checkpoint_path=checkpoint_path,
+          strip_default_attrs=strip_default_attrs)
 
     garbage_collect_exports(export_dir_base, exports_to_keep)
     return export_result
 
-  return export_strategy.ExportStrategy('Servo', export_fn)
+  return export_strategy.ExportStrategy('Servo', export_fn, strip_default_attrs)
 
 
 def make_parsing_export_strategy(feature_columns,
@@ -465,7 +474,8 @@ def make_parsing_export_strategy(feature_columns,
                                  assets_extra=None,
                                  as_text=False,
                                  exports_to_keep=5,
-                                 target_core=False):
+                                 target_core=False,
+                                 strip_default_attrs=None):
   """Create an ExportStrategy for use with Experiment, using `FeatureColumn`s.
 
   Creates a SavedModel export that expects to be fed with a single string
@@ -493,6 +503,9 @@ def make_parsing_export_strategy(feature_columns,
     target_core: If True, prepare an ExportStrategy for use with
       tensorflow.python.estimator.*.  If False (default), prepare an
       ExportStrategy for use with tensorflow.contrib.learn.python.learn.*.
+    strip_default_attrs: Boolean. If True, default attrs in the
+      `GraphDef` will be stripped on write. This is recommended for better
+      forward compatibility of the resulting `SavedModel`.
 
   Returns:
     An ExportStrategy that can be passed to the Experiment constructor.
@@ -509,7 +522,8 @@ def make_parsing_export_strategy(feature_columns,
       default_output_alternative_key=default_output_alternative_key,
       assets_extra=assets_extra,
       as_text=as_text,
-      exports_to_keep=exports_to_keep)
+      exports_to_keep=exports_to_keep,
+      strip_default_attrs=strip_default_attrs)
 
 
 def _default_compare_fn(curr_best_eval_result, cand_eval_result):
@@ -543,15 +557,16 @@ def _default_compare_fn(curr_best_eval_result, cand_eval_result):
 class BestModelSelector(object):
   """A helper that keeps track of export selection candidates."""
 
-  def __init__(self, compare_fn=None):
+  def __init__(self, event_file_pattern=None, compare_fn=None):
     """Constructor of this class.
 
     Args:
+      event_file_pattern: absolute event file name pattern.
       compare_fn: a function that returns true if the candidate is better than
         the current best model.
     """
-    self._best_eval_result = None
     self._compare_fn = compare_fn or _default_compare_fn
+    self._best_eval_result = self._get_best_eval_result(event_file_pattern)
 
   def update(self, checkpoint_path, eval_result):
     """Records a given checkpoint and exports if this is the best model.
@@ -581,11 +596,40 @@ class BestModelSelector(object):
     else:
       return '', None
 
+  def _get_best_eval_result(self, event_files):
+    """Get the best eval result from event files.
 
-def make_best_model_export_strategy(serving_input_fn,
-                                    exports_to_keep=1,
-                                    compare_fn=None,
-                                    default_output_alternative_key=None):
+    Args:
+      event_files: Absolute pattern of event files.
+
+    Returns:
+      The best eval result.
+    """
+    if not event_files:
+      return None
+
+    best_eval_result = None
+    for event_file in gfile.Glob(os.path.join(event_files)):
+      for event in summary_iterator.summary_iterator(event_file):
+        if event.HasField('summary'):
+          event_eval_result = {}
+          for value in event.summary.value:
+            if value.HasField('simple_value'):
+              event_eval_result[value.tag] = value.simple_value
+          if best_eval_result is None or self._compare_fn(
+              best_eval_result, event_eval_result):
+            best_eval_result = event_eval_result
+    return best_eval_result
+
+
+def make_best_model_export_strategy(
+    serving_input_fn,
+    exports_to_keep=1,
+    model_dir=None,
+    event_file_pattern=None,
+    compare_fn=None,
+    default_output_alternative_key=None,
+    strip_default_attrs=None):
   """Creates an custom ExportStrategy for use with tf.contrib.learn.Experiment.
 
   Args:
@@ -593,10 +637,24 @@ def make_best_model_export_strategy(serving_input_fn,
       `InputFnOps`.
     exports_to_keep: an integer indicating how many historical best models need
       to be preserved.
+    model_dir: Directory where model parameters, graph etc. are saved. This will
+        be used to load eval metrics from the directory when the export strategy
+        is created. So the best metrics would not be lost even if the export
+        strategy got preempted, which guarantees that only the best model would
+        be exported regardless of preemption. If None, however, the export
+        strategy would not be preemption-safe. To be preemption-safe, both
+        model_dir and event_file_pattern would be needed.
+    event_file_pattern: event file name pattern relative to model_dir, e.g.
+        "eval_continuous/*.tfevents.*". If None, however, the export strategy
+        would not be preemption-safe. To be preemption-safe, both
+        model_dir and event_file_pattern would be needed.
     compare_fn: a function that select the 'best' candidate from a dictionary
         of evaluation result keyed by corresponding checkpoint path.
     default_output_alternative_key: the key for default serving signature for
         multi-headed inference graphs.
+    strip_default_attrs: Boolean. If True, default attrs in the
+      `GraphDef` will be stripped on write. This is recommended for better
+      forward compatibility of the resulting `SavedModel`.
 
   Returns:
     An ExportStrategy that can be passed to the Experiment constructor.
@@ -604,9 +662,13 @@ def make_best_model_export_strategy(serving_input_fn,
   best_model_export_strategy = make_export_strategy(
       serving_input_fn,
       exports_to_keep=exports_to_keep,
-      default_output_alternative_key=default_output_alternative_key)
+      default_output_alternative_key=default_output_alternative_key,
+      strip_default_attrs=strip_default_attrs)
 
-  best_model_selector = BestModelSelector(compare_fn)
+  full_event_file_pattern = os.path.join(
+      model_dir,
+      event_file_pattern) if model_dir and event_file_pattern else None
+  best_model_selector = BestModelSelector(full_event_file_pattern, compare_fn)
 
   def export_fn(estimator, export_dir_base, checkpoint_path, eval_result=None):
     """Exports the given Estimator as a SavedModel.
@@ -682,22 +744,36 @@ def extend_export_strategy(base_export_strategy,
       ValueError: If `estimator` is a ${tf.estimator.Estimator} instance
         and `default_output_alternative_key` was specified or if post_export_fn
         does not return a valid directory.
+      RuntimeError: If unable to create temporary or final export directory.
     """
-    tmp_base_export_dir = tempfile.mkdtemp()
+    tmp_base_export_folder = 'temp-base-export-' + str(int(time.time()))
+    tmp_base_export_dir = os.path.join(export_dir_base, tmp_base_export_folder)
+    if gfile.Exists(tmp_base_export_dir):
+      raise RuntimeError('Failed to obtain base export directory')
+    gfile.MakeDirs(tmp_base_export_dir)
     tmp_base_export = base_export_strategy.export(
         estimator, tmp_base_export_dir, checkpoint_path)
-    tmp_post_export_dir = tempfile.mkdtemp()
+
+    tmp_post_export_folder = 'temp-post-export-' + str(int(time.time()))
+    tmp_post_export_dir = os.path.join(export_dir_base, tmp_post_export_folder)
+    if gfile.Exists(tmp_post_export_dir):
+      raise RuntimeError('Failed to obtain temp export directory')
+
+    gfile.MakeDirs(tmp_post_export_dir)
     tmp_post_export = post_export_fn(tmp_base_export, tmp_post_export_dir)
 
     if not tmp_post_export.startswith(tmp_post_export_dir):
       raise ValueError('post_export_fn must return a sub-directory of {}'
                        .format(tmp_post_export_dir))
-    export_relpath = os.path.relpath(tmp_post_export, tmp_post_export_dir)
-
-    gfile.Rename(
-        os.path.join(tmp_post_export_dir, export_relpath),
-        os.path.join(export_dir_base, export_relpath))
-    return os.path.join(export_dir_base, export_relpath)
+    post_export_relpath = os.path.relpath(tmp_post_export, tmp_post_export_dir)
+    post_export = os.path.join(export_dir_base, post_export_relpath)
+    if gfile.Exists(post_export):
+      raise RuntimeError('Failed to obtain final export directory')
+    gfile.Rename(tmp_post_export, post_export)
+
+    gfile.DeleteRecursively(tmp_base_export_dir)
+    gfile.DeleteRecursively(tmp_post_export_dir)
+    return post_export
 
   name = post_export_name if post_export_name else base_export_strategy.name
   return export_strategy.ExportStrategy(name, export_fn)
diff --git a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py
index ec3a88003f01b3b62591c13472029601b11ba491..14bf1136e8e9ab1488c4850d458382028ec5583d 100644
--- a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py
+++ b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py
@@ -24,13 +24,14 @@ import time
 from tensorflow.contrib.layers.python.layers import feature_column as fc
 from tensorflow.contrib.learn.python.learn import export_strategy as export_strategy_lib
 from tensorflow.contrib.learn.python.learn.estimators import constants
-from tensorflow.contrib.learn.python.learn.estimators import estimator as core_estimator
+from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.contrib.learn.python.learn.estimators import model_fn
 from tensorflow.contrib.learn.python.learn.utils import input_fn_utils
 from tensorflow.contrib.learn.python.learn.utils import saved_model_export_utils
 from tensorflow.core.framework import tensor_shape_pb2
 from tensorflow.core.framework import types_pb2
 from tensorflow.core.protobuf import meta_graph_pb2
+from tensorflow.python.estimator import estimator as core_estimator
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
@@ -41,7 +42,7 @@ from tensorflow.python.saved_model import signature_def_utils
 from tensorflow.python.util import compat
 
 
-class TestEstimator(core_estimator.Estimator):
+class TestEstimator(estimator.Estimator):
 
   def __init__(self, *args, **kwargs):
     super(TestEstimator, self).__init__(*args, **kwargs)
@@ -55,7 +56,8 @@ class TestEstimator(core_estimator.Estimator):
                         default_output_alternative_key=None,
                         assets_extra=None,
                         as_text=False,
-                        checkpoint_path=None):
+                        checkpoint_path=None,
+                        strip_default_attrs=False):
 
     if not os.path.exists(export_dir):
       os.makedirs(export_dir)
@@ -93,9 +95,9 @@ class SavedModelExportUtilsTest(test.TestCase):
             name="input-tensor-1:0", dtype=dtype_string, tensor_shape=shape))
     expected_signature_def.outputs[
         signature_constants.REGRESS_OUTPUTS].CopyFrom(
-            meta_graph_pb2.TensorInfo(name="output-tensor-1:0",
-                                      dtype=dtype_float,
-                                      tensor_shape=shape))
+            meta_graph_pb2.TensorInfo(
+                name="output-tensor-1:0", dtype=dtype_float,
+                tensor_shape=shape))
 
     expected_signature_def.method_name = signature_constants.REGRESS_METHOD_NAME
     self.assertEqual(actual_signature_def, expected_signature_def)
@@ -506,7 +508,9 @@ class SavedModelExportUtilsTest(test.TestCase):
     input_example = constant_op.constant(["input string"])
     input_ops = input_fn_utils.InputFnOps({
         "features": input_features
-    }, None, {"default input": input_example})
+    }, None, {
+        "default input": input_example
+    })
     input_alternatives, _ = (
         saved_model_export_utils.get_input_alternatives(input_ops))
     output_1 = constant_op.constant([1.0])
@@ -527,8 +531,9 @@ class SavedModelExportUtilsTest(test.TestCase):
         model_fn.ModeKeys.INFER,
         predictions={"some_output": constant_op.constant(["4"])},
         output_alternatives=provided_output_alternatives)
-    output_alternatives, _ = (saved_model_export_utils.get_output_alternatives(
-        model_fn_ops, "head-1"))
+    output_alternatives, _ = (
+        saved_model_export_utils.get_output_alternatives(
+            model_fn_ops, "head-1"))
 
     signature_defs = saved_model_export_utils.build_all_signature_defs(
         input_alternatives, output_alternatives, "head-1")
@@ -546,7 +551,9 @@ class SavedModelExportUtilsTest(test.TestCase):
         "default_input_alternative:head-3":
             signature_def_utils.predict_signature_def({
                 "default input": input_example
-            }, {"some_output_3": output_3}),
+            }, {
+                "some_output_3": output_3
+            }),
         # "features_input_alternative:head-1":
         #     signature_def_utils.regression_signature_def(input_features,
         #                                                  output_1),
@@ -589,8 +596,9 @@ class SavedModelExportUtilsTest(test.TestCase):
         model_fn.ModeKeys.INFER,
         predictions={"some_output": constant_op.constant(["4"])},
         output_alternatives=provided_output_alternatives)
-    output_alternatives, _ = (saved_model_export_utils.get_output_alternatives(
-        model_fn_ops, "head-1"))
+    output_alternatives, _ = (
+        saved_model_export_utils.get_output_alternatives(
+            model_fn_ops, "head-1"))
 
     with self.assertRaisesRegexp(
         ValueError, "A default input_alternative must be provided"):
@@ -706,25 +714,72 @@ class SavedModelExportUtilsTest(test.TestCase):
 
     self.assertNotEqual("",
                         export_strategy.export(test_estimator, export_dir_base,
-                                               "fake_ckpt_0", {"loss": 100}))
+                                               "fake_ckpt_0", {
+                                                   "loss": 100
+                                               }))
     self.assertNotEqual("", test_estimator.last_exported_dir)
     self.assertNotEqual("", test_estimator.last_exported_checkpoint)
 
     self.assertEqual("",
                      export_strategy.export(test_estimator, export_dir_base,
-                                            "fake_ckpt_1", {"loss": 101}))
+                                            "fake_ckpt_1", {
+                                                "loss": 101
+                                            }))
     self.assertEqual(test_estimator.last_exported_dir,
                      os.path.join(export_dir_base, "fake_ckpt_0"))
 
     self.assertNotEqual("",
                         export_strategy.export(test_estimator, export_dir_base,
-                                               "fake_ckpt_2", {"loss": 10}))
+                                               "fake_ckpt_2", {
+                                                   "loss": 10
+                                               }))
+    self.assertEqual(test_estimator.last_exported_dir,
+                     os.path.join(export_dir_base, "fake_ckpt_2"))
+
+    self.assertEqual("",
+                     export_strategy.export(test_estimator, export_dir_base,
+                                            "fake_ckpt_3", {
+                                                "loss": 20
+                                            }))
+    self.assertEqual(test_estimator.last_exported_dir,
+                     os.path.join(export_dir_base, "fake_ckpt_2"))
+
+  def test_make_best_model_export_strategy_with_preemption(self):
+    model_dir = self.get_temp_dir()
+    eval_dir_base = os.path.join(model_dir, "eval_continuous")
+    core_estimator._write_dict_to_summary(eval_dir_base, {"loss": 50}, 1)
+    core_estimator._write_dict_to_summary(eval_dir_base, {"loss": 60}, 2)
+
+    test_estimator = TestEstimator()
+    export_strategy = saved_model_export_utils.make_best_model_export_strategy(
+        serving_input_fn=None,
+        exports_to_keep=3,
+        model_dir=model_dir,
+        event_file_pattern="eval_continuous/*.tfevents.*",
+        compare_fn=None)
+
+    export_dir_base = os.path.join(self.get_temp_dir(), "export")
+    self.assertEqual("",
+                     export_strategy.export(test_estimator, export_dir_base,
+                                            "fake_ckpt_0", {
+                                                "loss": 100
+                                            }))
+    self.assertEqual("", test_estimator.last_exported_dir)
+    self.assertEqual("", test_estimator.last_exported_checkpoint)
+
+    self.assertNotEqual("",
+                        export_strategy.export(test_estimator, export_dir_base,
+                                               "fake_ckpt_2", {
+                                                   "loss": 10
+                                               }))
     self.assertEqual(test_estimator.last_exported_dir,
                      os.path.join(export_dir_base, "fake_ckpt_2"))
 
     self.assertEqual("",
                      export_strategy.export(test_estimator, export_dir_base,
-                                            "fake_ckpt_3", {"loss": 20}))
+                                            "fake_ckpt_3", {
+                                                "loss": 20
+                                            }))
     self.assertEqual(test_estimator.last_exported_dir,
                      os.path.join(export_dir_base, "fake_ckpt_2"))
 
@@ -766,10 +821,11 @@ class SavedModelExportUtilsTest(test.TestCase):
 
     test_estimator = TestEstimator()
     tmpdir = tempfile.mkdtemp()
-    final_path = final_export_strategy.export(test_estimator, tmpdir,
-                                              os.path.join(
-                                                  tmpdir, "checkpoint"))
-    self.assertEqual(os.path.join(tmpdir, "rewrite"), final_path)
+    export_model_dir = os.path.join(tmpdir, "model")
+    checkpoint_path = os.path.join(tmpdir, "checkpoint")
+    final_path = final_export_strategy.export(test_estimator, export_model_dir,
+                                              checkpoint_path)
+    self.assertEqual(os.path.join(export_model_dir, "rewrite"), final_path)
 
   def test_extend_export_strategy_same_name(self):
 
@@ -795,10 +851,11 @@ class SavedModelExportUtilsTest(test.TestCase):
 
     test_estimator = TestEstimator()
     tmpdir = tempfile.mkdtemp()
-    final_path = final_export_strategy.export(test_estimator, tmpdir,
-                                              os.path.join(
-                                                  tmpdir, "checkpoint"))
-    self.assertEqual(os.path.join(tmpdir, "rewrite"), final_path)
+    export_model_dir = os.path.join(tmpdir, "model")
+    checkpoint_path = os.path.join(tmpdir, "checkpoint")
+    final_path = final_export_strategy.export(test_estimator, export_model_dir,
+                                              checkpoint_path)
+    self.assertEqual(os.path.join(export_model_dir, "rewrite"), final_path)
 
   def test_extend_export_strategy_raises_error(self):
 
diff --git a/tensorflow/contrib/legacy_seq2seq/python/__init__.py b/tensorflow/contrib/legacy_seq2seq/python/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..52e83069cb0c68b510da46149248369dce376647 100644
--- a/tensorflow/contrib/legacy_seq2seq/python/__init__.py
+++ b/tensorflow/contrib/legacy_seq2seq/python/__init__.py
@@ -0,0 +1,18 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
diff --git a/tensorflow/contrib/legacy_seq2seq/python/kernel_tests/__init__.py b/tensorflow/contrib/legacy_seq2seq/python/kernel_tests/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..52e83069cb0c68b510da46149248369dce376647 100644
--- a/tensorflow/contrib/legacy_seq2seq/python/kernel_tests/__init__.py
+++ b/tensorflow/contrib/legacy_seq2seq/python/kernel_tests/__init__.py
@@ -0,0 +1,18 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
diff --git a/tensorflow/contrib/libsvm/BUILD b/tensorflow/contrib/libsvm/BUILD
new file mode 100644
index 0000000000000000000000000000000000000000..df96402a4ffd51840f77d58d8066487030362340
--- /dev/null
+++ b/tensorflow/contrib/libsvm/BUILD
@@ -0,0 +1,102 @@
+package(
+    default_visibility = ["//visibility:private"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+load("//tensorflow:tensorflow.bzl", "tf_custom_op_library")
+load("//tensorflow:tensorflow.bzl", "tf_gen_op_libs")
+load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py")
+load("//tensorflow:tensorflow.bzl", "tf_kernel_library")
+load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library")
+load("//tensorflow:tensorflow.bzl", "tf_py_test")
+
+tf_custom_op_library(
+    name = "python/ops/_libsvm_ops.so",
+    srcs = [
+        "kernels/decode_libsvm_op.cc",
+        "ops/libsvm_ops.cc",
+    ],
+    deps = [
+        "//tensorflow/core/kernels:bounds_check_lib",
+    ],
+)
+
+tf_kernel_library(
+    name = "libsvm_kernels",
+    srcs = ["kernels/decode_libsvm_op.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core/kernels:bounds_check_lib",
+    ],
+)
+
+tf_gen_op_libs(
+    op_lib_names = ["libsvm_ops"],
+    deps = [
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_gen_op_wrapper_py(
+    name = "libsvm_ops",
+    deps = [":libsvm_ops_op_lib"],
+)
+
+tf_custom_op_py_library(
+    name = "libsvm",
+    srcs = [
+        "__init__.py",
+        "python/ops/libsvm_ops.py",
+    ],
+    dso = [
+        ":python/ops/_libsvm_ops.so",
+    ],
+    kernels = [
+        ":libsvm_kernels",
+        ":libsvm_ops_op_lib",
+    ],
+    srcs_version = "PY2AND3",
+    visibility = ["//visibility:public"],
+    deps = [
+        ":libsvm_ops",
+        "//tensorflow/contrib/util:util_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:state_ops",
+        "//tensorflow/python:training",
+    ],
+)
+
+tf_py_test(
+    name = "decode_libsvm_op_test",
+    srcs = ["python/kernel_tests/decode_libsvm_op_test.py"],
+    additional_deps = [
+        ":libsvm",
+        "//third_party/py/numpy",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:platform_test",
+    ],
+)
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid_centered_impl.py b/tensorflow/contrib/libsvm/__init__.py
similarity index 58%
rename from tensorflow/contrib/distributions/python/ops/bijectors/sigmoid_centered_impl.py
rename to tensorflow/contrib/libsvm/__init__.py
index 223bc9d042c69be05b0e578835a31ed6e83c0c97..a875863caab29eb59a1834ca9184a5e272cb6656 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid_centered_impl.py
+++ b/tensorflow/contrib/libsvm/__init__.py
@@ -12,28 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""SigmoidCentered bijector."""
+"""Libsvm decoder.
+
+@@decode_libsvm
+"""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.distributions.python.ops.bijectors import softmax_centered
+from tensorflow.contrib.libsvm.python.ops.libsvm_ops import decode_libsvm
 
+from tensorflow.python.util.all_util import remove_undocumented
 
-__all__ = [
-    "SigmoidCentered",
+_allowed_symbols = [
+    "decode_libsvm",
 ]
 
-
-class SigmoidCentered(softmax_centered.SoftmaxCentered):
-  """Bijector which computes Y = g(X) = exp([X 0]) / (1 + exp(-X)).
-
-  Equivalent to: `bijector.SoftmaxCentered(event_ndims=0)`.
-
-  See `bijector.SoftmaxCentered` for more details.
-  """
-
-  def __init__(self, validate_args=False, name="sigmoid_centered"):
-    super(SigmoidCentered, self).__init__(
-        event_ndims=0, validate_args=validate_args, name=name)
+remove_undocumented(__name__)
diff --git a/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc b/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..720c74e3de5907fa006227d1278c45fd2175fe5f
--- /dev/null
+++ b/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
@@ -0,0 +1,168 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/strings/numbers.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+
+namespace tensorflow {
+
+template <typename T, typename Tlabel>
+class DecodeLibsvmOp : public OpKernel {
+ public:
+  explicit DecodeLibsvmOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("num_features", &num_features_));
+    OP_REQUIRES(ctx, (num_features_ >= 1),
+                errors::InvalidArgument("Invalid number of features \"",
+                                        num_features_, "\""));
+  }
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor* input_tensor;
+    OP_REQUIRES_OK(ctx, ctx->input("input", &input_tensor));
+    const auto& input_flat = input_tensor->flat<string>();
+
+    Tensor* label_tensor;
+    OP_REQUIRES_OK(
+        ctx, ctx->allocate_output(0, input_tensor->shape(), &label_tensor));
+    auto label = label_tensor->flat<Tlabel>();
+
+    std::vector<T> out_values;
+    std::vector<std::pair<int64, int64>> out_indices;
+    for (int i = 0; i < input_flat.size(); ++i) {
+      StringPiece line(input_flat(i));
+      str_util::RemoveWhitespaceContext(&line);
+
+      StringPiece piece;
+      OP_REQUIRES(ctx, str_util::ConsumeNonWhitespace(&line, &piece),
+                  errors::InvalidArgument("No label found for input[", i,
+                                          "]: \"", input_flat(i), "\""));
+
+      Tlabel label_value;
+      OP_REQUIRES(ctx,
+                  strings::SafeStringToNumeric<Tlabel>(piece, &label_value),
+                  errors::InvalidArgument("Label format incorrect: ", piece));
+
+      label(i) = label_value;
+
+      str_util::RemoveLeadingWhitespace(&line);
+      while (str_util::ConsumeNonWhitespace(&line, &piece)) {
+        size_t p = piece.find(':');
+        OP_REQUIRES(ctx, (p != StringPiece::npos),
+                    errors::InvalidArgument("Invalid feature \"", piece, "\""));
+
+        int64 feature_index;
+        OP_REQUIRES(
+            ctx, strings::safe_strto64(piece.substr(0, p), &feature_index),
+            errors::InvalidArgument("Feature format incorrect: ", piece));
+        OP_REQUIRES(ctx, (feature_index >= 0),
+                    errors::InvalidArgument(
+                        "Feature index should be >= 0, got ", feature_index));
+
+        T feature_value;
+        OP_REQUIRES(
+
+            ctx,
+            strings::SafeStringToNumeric<T>(piece.substr(p + 1),
+                                            &feature_value),
+            errors::InvalidArgument("Feature format incorrect: ", piece));
+
+        out_values.emplace_back(feature_value);
+        out_indices.emplace_back(std::pair<int64, int64>(i, feature_index));
+
+        str_util::RemoveLeadingWhitespace(&line);
+      }
+    }
+
+    Tensor* indices_tensor;
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(
+                            1,
+                            TensorShape({static_cast<int64>(out_indices.size()),
+                                         input_tensor->shape().dims() + 1}),
+                            &indices_tensor));
+    auto indices = indices_tensor->matrix<int64>();
+    // Translate flat index to shaped index like np.unravel_index
+    // Calculate factors for each dimension
+    std::vector<int64> factors(input_tensor->shape().dims());
+    factors[input_tensor->shape().dims() - 1] = 1;
+    for (int j = input_tensor->shape().dims() - 2; j >= 0; j--) {
+      factors[j] = factors[j + 1] * input_tensor->shape().dim_size(j + 1);
+    }
+    for (int i = 0; i < out_indices.size(); i++) {
+      indices(i, 0) = out_indices[i].first;
+      int64 value = out_indices[i].first;
+      for (int j = 0; j < input_tensor->shape().dims(); j++) {
+        indices(i, j) = value / factors[j];
+        value = value % factors[j];
+      }
+      indices(i, input_tensor->shape().dims()) = out_indices[i].second;
+    }
+
+    Tensor* values_tensor;
+    OP_REQUIRES_OK(ctx,
+                   ctx->allocate_output(
+                       2, TensorShape({static_cast<int64>(out_values.size())}),
+                       &values_tensor));
+    auto values = values_tensor->vec<T>();
+    std::copy_n(out_values.begin(), out_values.size(), &values(0));
+
+    Tensor* shape_tensor;
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(
+                            3, TensorShape({input_tensor->shape().dims() + 1}),
+                            &shape_tensor));
+    auto shape = shape_tensor->flat<int64>();
+    for (int i = 0; i < input_tensor->shape().dims(); i++) {
+      shape(i) = input_tensor->shape().dim_size(i);
+    }
+    shape(input_tensor->shape().dims()) = num_features_;
+  }
+
+ private:
+  int64 num_features_;
+};
+
+#define REGISTER_KERNEL(type)                                         \
+  REGISTER_KERNEL_BUILDER(Name("DecodeLibsvm")                        \
+                              .Device(DEVICE_CPU)                     \
+                              .TypeConstraint<type>("dtype")          \
+                              .TypeConstraint<int32>("label_dtype"),  \
+                          DecodeLibsvmOp<type, int32>);               \
+  REGISTER_KERNEL_BUILDER(Name("DecodeLibsvm")                        \
+                              .Device(DEVICE_CPU)                     \
+                              .TypeConstraint<type>("dtype")          \
+                              .TypeConstraint<int64>("label_dtype"),  \
+                          DecodeLibsvmOp<type, int64>);               \
+  REGISTER_KERNEL_BUILDER(Name("DecodeLibsvm")                        \
+                              .Device(DEVICE_CPU)                     \
+                              .TypeConstraint<type>("dtype")          \
+                              .TypeConstraint<float>("label_dtype"),  \
+                          DecodeLibsvmOp<type, float>);               \
+  REGISTER_KERNEL_BUILDER(Name("DecodeLibsvm")                        \
+                              .Device(DEVICE_CPU)                     \
+                              .TypeConstraint<type>("dtype")          \
+                              .TypeConstraint<double>("label_dtype"), \
+                          DecodeLibsvmOp<type, double>);
+
+REGISTER_KERNEL(float);
+REGISTER_KERNEL(double);
+REGISTER_KERNEL(int32);
+REGISTER_KERNEL(int64);
+#undef REGISTER_KERNEL
+
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/libsvm/ops/libsvm_ops.cc b/tensorflow/contrib/libsvm/ops/libsvm_ops.cc
new file mode 100644
index 0000000000000000000000000000000000000000..dec946189e3cd67e2557b83806c0db79a46e5f82
--- /dev/null
+++ b/tensorflow/contrib/libsvm/ops/libsvm_ops.cc
@@ -0,0 +1,58 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+
+using shape_inference::InferenceContext;
+
+REGISTER_OP("DecodeLibsvm")
+    .Input("input: string")
+    .Output("label: label_dtype")
+    .Output("feature_indices: int64")
+    .Output("feature_values: dtype")
+    .Output("feature_shape: int64")
+    .Attr("dtype: {float, double, int32, int64} = DT_FLOAT")
+    .Attr("label_dtype: {float, double, int32, int64} = DT_INT64")
+    .Attr("num_features: int >= 1")
+    .SetShapeFn([](InferenceContext* c) {
+      c->set_output(0, c->input(0));
+
+      c->set_output(1, c->Matrix(InferenceContext::kUnknownDim,
+                                 InferenceContext::kUnknownDim));
+      c->set_output(2, c->Vector(InferenceContext::kUnknownDim));
+      c->set_output(3, c->Vector(InferenceContext::kUnknownDim));
+
+      return Status::OK();
+    })
+
+    .Doc(R"doc(
+Convert LibSVM input to tensors. The output consists of
+a label and a feature tensor. The shape of the label tensor
+is the same as input and the shape of the feature tensor is
+`[input_shape, num_features]`.
+
+input: Each string is a record in the LibSVM.
+label: A tensor of the same shape as input.
+feature_indices: A 2-D int64 tensor of dense_shape [N, ndims].
+feature_values: A 1-D tensor of any type and dense_shape [N].
+feature_shape: A 1-D int64 tensor of dense_shape [ndims].
+num_features: The number of features.
+)doc");
+
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py b/tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..423dcce8de9b9c77fcfdc8c90c909e2918852905
--- /dev/null
+++ b/tensorflow/contrib/libsvm/python/kernel_tests/decode_libsvm_op_test.py
@@ -0,0 +1,71 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for DecodeLibsvm op."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.contrib.libsvm.python.ops import libsvm_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import sparse_ops
+from tensorflow.python.platform import test
+
+
+class DecodeLibsvmOpTest(test.TestCase):
+
+  def testBasic(self):
+    with self.test_session() as sess:
+      content = [
+          "1 1:3.4 2:0.5 4:0.231", "1 2:2.5 3:inf 5:0.503",
+          "2 3:2.5 2:nan 1:0.105"
+      ]
+      sparse_features, labels = libsvm_ops.decode_libsvm(
+          content, num_features=6)
+      features = sparse_ops.sparse_tensor_to_dense(
+          sparse_features, validate_indices=False)
+
+      self.assertAllEqual(labels.get_shape().as_list(), [3])
+
+      features, labels = sess.run([features, labels])
+      self.assertAllEqual(labels, [1, 1, 2])
+      self.assertAllClose(
+          features, [[0, 3.4, 0.5, 0, 0.231, 0], [0, 0, 2.5, np.inf, 0, 0.503],
+                     [0, 0.105, np.nan, 2.5, 0, 0]])
+
+  def testNDimension(self):
+    with self.test_session() as sess:
+      content = [["1 1:3.4 2:0.5 4:0.231", "1 1:3.4 2:0.5 4:0.231"],
+                 ["1 2:2.5 3:inf 5:0.503", "1 2:2.5 3:inf 5:0.503"],
+                 ["2 3:2.5 2:nan 1:0.105", "2 3:2.5 2:nan 1:0.105"]]
+      sparse_features, labels = libsvm_ops.decode_libsvm(
+          content, num_features=6, label_dtype=dtypes.float64)
+      features = sparse_ops.sparse_tensor_to_dense(
+          sparse_features, validate_indices=False)
+
+      self.assertAllEqual(labels.get_shape().as_list(), [3, 2])
+
+      features, labels = sess.run([features, labels])
+      self.assertAllEqual(labels, [[1, 1], [1, 1], [2, 2]])
+      self.assertAllClose(
+          features, [[[0, 3.4, 0.5, 0, 0.231, 0], [0, 3.4, 0.5, 0, 0.231, 0]], [
+              [0, 0, 2.5, np.inf, 0, 0.503], [0, 0, 2.5, np.inf, 0, 0.503]
+          ], [[0, 0.105, np.nan, 2.5, 0, 0], [0, 0.105, np.nan, 2.5, 0, 0]]])
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/libsvm/python/ops/libsvm_ops.py b/tensorflow/contrib/libsvm/python/ops/libsvm_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..b3022505635bca81625cf7abd2be5628a4760970
--- /dev/null
+++ b/tensorflow/contrib/libsvm/python/ops/libsvm_ops.py
@@ -0,0 +1,50 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Libsvm decoder."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.libsvm.ops import gen_libsvm_ops
+from tensorflow.contrib.util import loader
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.platform import resource_loader
+
+
+_libsvm_ops_so = loader.load_op_library(
+    resource_loader.get_path_to_datafile("_libsvm_ops.so"))
+
+
+def decode_libsvm(content, num_features, dtype=None, label_dtype=None):
+  """Convert Libsvm records to a tensor of label and a tensor of feature.
+
+  Args:
+    content: A `Tensor` of type `string`. Each string is a record/row in
+      the Libsvm format.
+    num_features: The number of features.
+    dtype: The type of the output feature tensor. Default to tf.float32.
+    label_dtype: The type of the output label tensor. Default to tf.int64.
+
+  Returns:
+    features: A `SparseTensor` of the shape `[input_shape, num_features]`.
+    labels: A `Tensor` of the same shape as content.
+  """
+  labels, indices, values, shape = gen_libsvm_ops.decode_libsvm(
+      content, num_features, dtype=dtype, label_dtype=label_dtype)
+  return sparse_tensor.SparseTensor(indices, values, shape), labels
+
+
+ops.NotDifferentiable("DecodeLibSVM")
diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py
index 7e214905b13db6a7e2f54f15873f5a9aedb4f44f..ec726bbed41a86eb314e3591ecaedaa6bf0e5e9b 100644
--- a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py
@@ -102,7 +102,7 @@ class ShardedMutableDenseHashTable(lookup.LookupInterface):
                        keys.get_shape())
 
   def lookup(self, keys, name=None):
-    if keys.dtype != self._key_dtype:
+    if keys.dtype.base_dtype != self._key_dtype:
       raise TypeError('Signature mismatch. Keys must be dtype %s, got %s.' %
                       (self._key_dtype, keys.dtype))
     self._check_keys(keys)
diff --git a/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py b/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py
index 701fc1c0597d1de0b0189e86feafbd1c5bbdc818..05794a42c5f2d0eece6adab36fb5610078cece31 100644
--- a/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py
+++ b/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py
@@ -19,7 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib import layers
-from tensorflow.contrib.framework.python.ops import variables as contrib_variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
 from tensorflow.contrib.learn.python.learn.estimators import prediction_key
@@ -154,7 +154,7 @@ def sdca_model_fn(features, labels, mode, params, config=None):
     _add_bias_column(feature_columns, features, bias, columns_to_variables)
 
   def _train_op_fn(unused_loss):
-    global_step = contrib_variables.get_global_step()
+    global_step = training_util.get_global_step()
     sdca_model, train_op = optimizer.get_train_step(
         columns_to_variables, weight_column_name, loss_type, features, labels,
         global_step)
diff --git a/tensorflow/contrib/lite/Android.bp b/tensorflow/contrib/lite/Android.bp
index be4fa7c390161beddadaa2bcf34b0cdff73b6511..2b91f1e8c900ab8ab1d99cb803944821aa038d84 100644
--- a/tensorflow/contrib/lite/Android.bp
+++ b/tensorflow/contrib/lite/Android.bp
@@ -37,6 +37,7 @@ cc_library_static {
     rtti: true,
     srcs: [
         "allocation.cc",
+        "arena_planner.cc",
         "error_reporter.cc",
         "interpreter.cc",
         "model.cc",
@@ -51,7 +52,9 @@ cc_library_static {
         "gemmlowp_headers",
     ],
     cflags: [
+        "-Wno-mismatched-tags",
         "-Wno-sign-compare",
+        "-Wno-unused-lambda-capture",
     ],
 }
 
@@ -73,4 +76,4 @@ build = [
     "tflite_static.bp",
 ]
 
-subdirs = ["kernels"]
\ No newline at end of file
+subdirs = ["kernels"]
diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD
index 52460123cc10ec9b2ee13043fd43f84508b05000..13350c5a438b75fe14e8753e5bb1bb77ec8f655b 100644
--- a/tensorflow/contrib/lite/BUILD
+++ b/tensorflow/contrib/lite/BUILD
@@ -35,6 +35,28 @@ cc_library(
     hdrs = ["version.h"],
 )
 
+cc_library(
+    name = "arena_planner",
+    srcs = ["arena_planner.cc"],
+    hdrs = ["arena_planner.h"],
+    deps = [
+        ":context",
+        ":graph_info",
+        ":memory_planner",
+        ":simple_memory_arena",
+    ],
+)
+
+cc_test(
+    name = "arena_planner_test",
+    size = "small",
+    srcs = ["arena_planner_test.cc"],
+    deps = [
+        ":arena_planner",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 # Main library. No ops are included here.
 # TODO(aselle): Resolve problems preventing C99 usage.
 cc_library(
@@ -43,6 +65,25 @@ cc_library(
     hdrs = ["context.h"],
 )
 
+cc_library(
+    name = "graph_info",
+    hdrs = ["graph_info.h"],
+    deps = [":context"],
+)
+
+cc_library(
+    name = "memory_planner",
+    hdrs = ["memory_planner.h"],
+    deps = [":context"],
+)
+
+cc_library(
+    name = "simple_memory_arena",
+    srcs = ["simple_memory_arena.cc"],
+    hdrs = ["simple_memory_arena.h"],
+    deps = [":context"],
+)
+
 cc_library(
     name = "builtin_op_data",
     hdrs = [
@@ -70,7 +111,6 @@ cc_library(
         "model.cc",
         "nnapi_delegate.cc",
         "optional_debug_tools.cc",
-        "simple_memory_arena.cc",
     ],
     hdrs = [
         "allocation.h",
@@ -80,13 +120,16 @@ cc_library(
         "model.h",
         "nnapi_delegate.h",
         "optional_debug_tools.h",
-        "simple_memory_arena.h",
     ],
     copts = tflite_copts(),
     deps = [
+        ":arena_planner",
         ":builtin_op_data",
         ":context",
+        ":graph_info",
+        ":memory_planner",
         ":schema_fbs_version",
+        ":simple_memory_arena",
         "//tensorflow/contrib/lite/kernels:gemm_support",
         "//tensorflow/contrib/lite/nnapi:nnapi_lib",
         "//tensorflow/contrib/lite/schema:schema_fbs",
@@ -111,6 +154,7 @@ cc_test(
     deps = [
         ":framework",
         ":string_util",
+        "//tensorflow/contrib/lite/testing:util",
         "@com_google_googletest//:gtest",
     ],
 )
@@ -133,7 +177,8 @@ cc_test(
     size = "small",
     srcs = ["simple_memory_arena_test.cc"],
     deps = [
-        ":framework",
+        ":simple_memory_arena",
+        "//tensorflow/contrib/lite/testing:util",
         "@com_google_googletest//:gtest",
     ],
 )
@@ -152,6 +197,7 @@ cc_test(
     ],
     deps = [
         ":framework",
+        "//tensorflow/contrib/lite/testing:util",
         "@com_google_googletest//:gtest",
     ],
 )
@@ -163,6 +209,7 @@ cc_test(
     srcs = ["context_test.cc"],
     deps = [
         ":framework",
+        "//tensorflow/contrib/lite/testing:util",
         "@com_google_googletest//:gtest",
     ],
 )
diff --git a/tensorflow/contrib/lite/Makefile b/tensorflow/contrib/lite/Makefile
index 78402727abdd2742ffff54bf59ca076d8b97b042..7f316292724ea0baaf034d4e914773ad97a957d4 100644
--- a/tensorflow/contrib/lite/Makefile
+++ b/tensorflow/contrib/lite/Makefile
@@ -56,7 +56,7 @@ LIBS := \
 -lz
 
 # If we're on Linux, also link in the dl library.
-ifeq ($(OS),LINUX)
+ifeq ($(HOST_OS),LINUX)
 	LIBS += -ldl -lpthread
 endif
 
diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md
index c7464bcc9d39b0e884e76f5a3ffa152e98bb0f47..55a524b207b258e794f97e68a96cf01dc60efb7f 100644
--- a/tensorflow/contrib/lite/README.md
+++ b/tensorflow/contrib/lite/README.md
@@ -4,7 +4,7 @@ TensorFlow Lite is TensorFlow's lightweight solution for mobile and embedded dev
 TensorFlow Lite uses many techniques for achieving low latency like optimizing the kernels for specific mobile apps, pre-fused activations, quantized kernels that allow smaller and faster (fixed-point math) models, and in the future, leverage specialized machine learning hardware to get the best possible performance for a particular model on a particular device.
 
 ![image](g3doc/TFLite-Architecture.jpg)
-# Getting Started with a Demo App
+# Getting Started with an Android Demo App
 
 This section contains an example application using TensorFlow Lite for Android devices. The demo is a sample camera app that classifies images continuously using a quantized Mobilenet model. A device running Android 5.0 ( API 21) or higher is required to run the demo.
 
@@ -17,7 +17,7 @@ There are 3 ways to get the demo app to your device
 In the demo app, inference is done using the TensorFlow Lite Java API. The demo app classifies frames in real-time, displaying the top most probable classifications. It also displays the time taken to detect the object.
 
 ## Downloading the pre-built binary
-The  fastest path to trying the demo, is to download the pre-built binary
+The fastest path to trying the demo, is to download the pre-built binary
 [TfLiteCameraDemo.apk](https://storage.googleapis.com/download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk)
 
 Once the apk is installed, click the app icon to start the app. The first-time the app is opened, the app asks for runtime permissions to access the device camera. The demo app opens the back-camera of the device and recognizes the objects in the camera's field of view. At the bottom of the image (or at the left of the image if the device is in landscape mode), it shows the latency of classification and the top three objects classified.
@@ -69,7 +69,7 @@ android_ndk_repository(
 
 Additional details on building with Android can be found [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/README.md).
 
-### Build the  source code
+### Build the source code
 Run bazel with the following command to build the demo.
 
 Build the demo app:
@@ -86,6 +86,17 @@ environment (due to a Bazel bug).
 ### More about the demo
 The demo is resizing each camera image frame to (224 width * 224 height) to match the  quantized Mobilenet model being used. The resized image is converted into a ByteBuffer row by row of size 1 * 224 * 224 * 3 bytes, where 1 is the number of images in a batch 224 * 224 is the width and height of the image 3 bytes represents three colors of a pixel. This demo uses the TensorFlow Lite Java inference API for models which take a single input and provide a single output. This outputs a two-dimensional array, with the first dimension being the category index and the second dimension being the confidence of classification. The Mobilenet model has 1001 unique categories and the app sorts the probabilities of all the categories and displays the top three. The Mobilenet quantized model is bundled within the assets directory of the app.
 
+# iOS Demo App
+
+Similar to the Android demo app, there's an iOS camera app that uses exactly the same model (224 * 224 quantized Mobilenet).
+
+This demo app requires a camera so it doesn't work with simulators. It need to be executed on a real iOS device. Follow the instructions to build and run the demo app:
+
+1.   Follow the Building section [here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/g3doc/ios.md#building) to build the universal iOS library for TensorFlow Lite.
+1.   Install [CocoaPods](https://cocoapods.org/) if it wasn't installed yet: `sudo gem install cocoapods`.
+1.   Run `pod install` in `tensorflow/contrib/lite/examples/ios/camera` to generate the workspace file.
+1.   Open the project by running `open tflite_camera_example.xcworkspace`, and build the app in XCode.
+
 # TensorFlow Lite Quick Start
 
 ## Step 1. Decide which GraphDef to use
@@ -156,6 +167,7 @@ graphviz, or [in tensorboard](https://codelabs.developers.google.com/codelabs/te
 This frozen Graphdef is now ready to be converted to flatbuffer format (.lite) for use on Android or iOS.  On Android users have the flexibility to use either the float or quantized versions of the frozen graphdef, if available, using the Tensorflow Optimizing Converter tool.
 
 Here is a sample command line to convert the frozen Graphdef to '.lite' format for  The Tensorflow Optimizing Converter supports both float and quantized models, however, different configuration parameters are needed depending on whether a FLOAT or QUANTIZED mode is being used.
+(Here is a link to the pb [file](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_1.0_224_frozen.tgz)).
 
 ```
 bazel build tensorflow/contrib/lite/toco:toco
@@ -174,9 +186,9 @@ bazel-bin/tensorflow/contrib/lite/toco/toco -- \
 - Setting the input_array, output_array and input_shape arguments are a bit trickier. The easiest way to find these values is to explore the graph in tensorboard .  The user should reuse the arguments that were used for specifying the output nodes for inference in the `freeze_graph`step.
 
 Note, it is also possible to use the Tensorflow Optimizing Converter through protos either from Python or from the command line see the
-documentation [here](https://github.com/tensorflow/tensorflow/tree/mastertensorflow/contrib/lite/python:toco_from_protos target) A developer can then integrate the conversion step into their model design workflow to ensure that a model will be easily convertible to a mobile inference graph. For example,
+documentation [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/toco/python/toco_from_protos.py). A developer can then integrate the conversion step into their model design workflow to ensure that a model will be easily convertible to a mobile inference graph. For example,
 
-```
+```python
 import tensorflow as tf
 
 img = tf.placeholder(name="img", dtype=tf.float32, shape=(1, 64, 64, 3))
@@ -191,6 +203,12 @@ For detailed instructions on how to use the Tensorflow Optimizing Converter, ple
 
 You may refer to the [Ops compatibility guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md) for troubleshooting help. If that doesn't help, please file an [issue](https://github.com/tensorflow/tensorflow/issues).
 
+If you would like to see a visual description of your TensorFlow Lite model after conversion, you can use tensorflow/contrib/lite/tools/visualize.py by running
+```sh
+bazel run tensorflow/contrib/lite/tools:visualize -- model.tflite model_viz.html
+```
+and then visualize the resulting HTML file in a browser.
+
 ## Step 3. Use the TensorFlow Lite model for inference in a mobile app
 
 After completion of Step 2 the developer should have a .lite model.
@@ -204,3 +222,7 @@ Note that you'd need to follow instructions for installing TensorFlow on Android
 
 ### For iOS
 Follow the documentation [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/ios.md) to get integrate a TFLite model into your app.
+
+## Core ML support
+
+Core ML is a machine learning framework used across Apple products. In addition to using Tensorflow Lite models directly in their applications, developers have the option to convert their trained Tensorflow models to the [CoreML](https://developer.apple.com/machine-learning/) format for use on Apple devices. For information on how to use the converter please refer to the [Tensorflow-CoreML converter documentation](https://github.com/tf-coreml/tf-coreml).
diff --git a/tensorflow/contrib/lite/arena_planner.cc b/tensorflow/contrib/lite/arena_planner.cc
new file mode 100644
index 0000000000000000000000000000000000000000..bf1bcdd1a7a7d3395c45ae95abd5980e9ffc0fc6
--- /dev/null
+++ b/tensorflow/contrib/lite/arena_planner.cc
@@ -0,0 +1,247 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/arena_planner.h"
+
+namespace tflite {
+
+namespace {
+
+// Memory allocation tuning
+constexpr const int kDefaultArenaAlignment = 64;
+constexpr const int kDefaultTensorAlignment = 4;
+
+}  // namespace
+
+struct AllocationInfo {
+  // The node index requesting this allocation.
+  int node;
+  // The tensor index to be allocated or deallocated.
+  int tensor;
+  // Whether to allocate or deallocate
+  enum { ALLOC, DEALLOC } type;
+};
+
+ArenaPlanner::ArenaPlanner(TfLiteContext* context,
+                           std::unique_ptr<GraphInfo> graph_info)
+    : context_(context),
+      graph_info_(std::move(graph_info)),
+      arena_(kDefaultArenaAlignment),
+      persistent_arena_(kDefaultArenaAlignment) {}
+
+ArenaPlanner::~ArenaPlanner() {}
+
+int64_t ArenaPlanner::BasePointer(TfLiteAllocationType type) {
+  if (type == kTfLiteArenaRwPersistent) {
+    return persistent_arena_.BasePointer();
+  }
+  if (type == kTfLiteArenaRw) {
+    return arena_.BasePointer();
+  }
+  return 0;
+}
+
+TfLiteStatus ArenaPlanner::ResetAllocations() {
+  TF_LITE_ENSURE_STATUS(arena_.Clear());
+  TF_LITE_ENSURE_STATUS(persistent_arena_.Clear());
+  allocs_.clear();
+  allocs_.resize(graph_info_->num_tensors());
+  return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::PlanAllocations() {
+  // Invalidate any existing data.
+  TF_LITE_ENSURE_STATUS(ResetAllocations());
+
+  // Keeps track of references to each tensor.
+  std::vector<int> refcounts(graph_info_->num_tensors(), 0);
+
+  // There will be an entry in alloc_queue_ for the allocation of each tensor
+  // and another for their deallocation.
+  alloc_queue_.reserve(2 * graph_info_->num_tensors());
+
+  // We must make sure the output tensors are never overwritten. We do that by
+  // artificially adding one to their ref-counts so they are never selected
+  // for deallocation.
+  for (int tensor_index : graph_info_->outputs()) {
+    refcounts[tensor_index]++;
+  }
+
+  // Count references to node input tensors.
+  for (int i = 0; i < graph_info_->num_nodes(); ++i) {
+    const TfLiteNode& node = graph_info_->node(i);
+    TfLiteIntArray* node_inputs = node.inputs;
+    for (int j = 0; j < node_inputs->size; ++j) {
+      int tensor_index = node_inputs->data[j];
+      if (tensor_index != kOptionalTensor) {
+        refcounts[tensor_index]++;
+      }
+    }
+  }
+
+  // Queue all graph inputs for allocation.
+  for (int tensor_index : graph_info_->inputs()) {
+    if (tensor_index != kOptionalTensor) {
+      alloc_queue_.push_back({0, tensor_index, AllocationInfo::ALLOC});
+    }
+  }
+
+  // Go through the graph in execution order.
+  for (int i = 0; i < graph_info_->num_nodes(); ++i) {
+    const TfLiteNode& node = graph_info_->node(i);
+
+    // First queue output tensors for allocation.
+    TfLiteIntArray* node_outputs = node.outputs;
+    for (int j = 0; j < node_outputs->size; ++j) {
+      int tensor_index = node_outputs->data[j];
+      alloc_queue_.push_back({i, tensor_index, AllocationInfo::ALLOC});
+    }
+
+    // Then update the ref-counts of the node's inputs, and if necessary queue
+    // them for deallocation.
+    TfLiteIntArray* node_inputs = node.inputs;
+    for (int j = 0; j < node_inputs->size; ++j) {
+      int tensor_index = node_inputs->data[j];
+      if (tensor_index != kOptionalTensor) {
+        refcounts[tensor_index]--;
+        if (refcounts[tensor_index] == 0) {
+          alloc_queue_.push_back({i, tensor_index, AllocationInfo::DEALLOC});
+        }
+      }
+    }
+  }
+
+  // Note that graph outputs will never be scheduled for deallocation. We
+  // could do that here for completeness, but it won't have any effect.
+  return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::ExecuteAllocations(int first_node, int last_node) {
+  TF_LITE_ENSURE_STATUS(CalculateAllocations(first_node, last_node));
+  TF_LITE_ENSURE_STATUS(Commit());
+
+  for (int i = 0; i < graph_info_->num_tensors(); ++i) {
+    // TODO(ahentz): we could do this only for the tensors that were modified
+    // in CalculateAllocations(), instead of redoing it for tensors that
+    // already had proper pointers. However we must be very careful, because
+    // SimpleMemoryArena::Commit() could move the base pointer.
+    TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(i));
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::Commit() {
+  TF_LITE_ENSURE_STATUS(arena_.Commit(context_));
+  TF_LITE_ENSURE_STATUS(persistent_arena_.Commit(context_));
+  return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::CalculateAllocations(int first_node, int last_node) {
+  int active_node = first_node;
+  // When dynamic tensors are present this method is called multiple times.
+  // The items in the alloc_queue_ referring to nodes before first_node were
+  // processed previously and should be skipped. Entries after last_node are
+  // not yet ready to be handled.
+  for (const auto& alloc_info : alloc_queue_) {
+    if (alloc_info.node < first_node) continue;
+    if (alloc_info.node > last_node) break;
+    if (alloc_info.node == active_node) {
+      // This is the first allocation/deallocation for a given node.  It is
+      // time to deallocate the previous temporaries and allocate new ones.
+      if (active_node != first_node) {
+        TF_LITE_ENSURE_STATUS(
+            CalculateDeallocationOfInternalTensors(active_node - 1));
+      }
+      TF_LITE_ENSURE_STATUS(CalculateAllocationOfInternalTensors(active_node));
+      ++active_node;
+    }
+    // Handle the current item.
+    if (alloc_info.type == AllocationInfo::ALLOC) {
+      TF_LITE_ENSURE_STATUS(CalculateTensorAllocation(alloc_info.tensor));
+    } else {
+      TF_LITE_ENSURE_STATUS(CalculateTensorDeallocation(alloc_info.tensor));
+    }
+  }
+
+  // Don't forget to deallocate temporaries of last node.
+  TF_LITE_ENSURE_STATUS(
+      CalculateDeallocationOfInternalTensors(active_node - 1));
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::ResolveTensorAllocation(int tensor_index) {
+  TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
+  if (tensor.allocation_type == kTfLiteArenaRw) {
+    TF_LITE_ENSURE_STATUS(
+        arena_.ResolveAlloc(context_, allocs_[tensor_index], &tensor.data.raw));
+  }
+  if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
+    TF_LITE_ENSURE_STATUS(persistent_arena_.ResolveAlloc(
+        context_, allocs_[tensor_index], &tensor.data.raw));
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::CalculateTensorAllocation(int tensor_index) {
+  TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
+  if (tensor.allocation_type == kTfLiteArenaRw) {
+    TF_LITE_ENSURE_STATUS(arena_.Allocate(context_, kDefaultTensorAlignment,
+                                          tensor.bytes,
+                                          &allocs_[tensor_index]));
+  }
+  if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
+    TF_LITE_ENSURE_STATUS(
+        persistent_arena_.Allocate(context_, kDefaultTensorAlignment,
+                                   tensor.bytes, &allocs_[tensor_index]));
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::CalculateTensorDeallocation(int tensor_index) {
+  TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
+  if (tensor.allocation_type == kTfLiteArenaRw) {
+    TF_LITE_ENSURE_STATUS(arena_.Deallocate(context_, allocs_[tensor_index]));
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::CalculateAllocationOfInternalTensors(
+    int node_index) {
+  if (node_index < graph_info_->num_nodes()) {
+    const TfLiteNode& node = graph_info_->node(node_index);
+    TfLiteIntArray* node_temporaries = node.temporaries;
+    for (int i = 0; i < node_temporaries->size; ++i) {
+      int tensor_index = node_temporaries->data[i];
+      TF_LITE_ENSURE_STATUS(CalculateTensorAllocation(tensor_index));
+    }
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::CalculateDeallocationOfInternalTensors(
+    int node_index) {
+  if (node_index < graph_info_->num_nodes()) {
+    const TfLiteNode& node = graph_info_->node(node_index);
+    TfLiteIntArray* node_temporaries = node.temporaries;
+    for (int i = 0; i < node_temporaries->size; ++i) {
+      int tensor_index = node_temporaries->data[i];
+      TF_LITE_ENSURE_STATUS(CalculateTensorDeallocation(tensor_index));
+    }
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/arena_planner.h b/tensorflow/contrib/lite/arena_planner.h
new file mode 100644
index 0000000000000000000000000000000000000000..bd87414ec3c8ac75b99e730fcac977a7afa08806
--- /dev/null
+++ b/tensorflow/contrib/lite/arena_planner.h
@@ -0,0 +1,107 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_ARENA_PLANNER_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_ARENA_PLANNER_H_
+
+#include <memory>
+#include <vector>
+
+#include "tensorflow/contrib/lite/context.h"
+#include "tensorflow/contrib/lite/graph_info.h"
+#include "tensorflow/contrib/lite/memory_planner.h"
+#include "tensorflow/contrib/lite/simple_memory_arena.h"
+
+namespace tflite {
+
+class AllocationInfo;
+
+// A memory planner that makes all the allocations using arenas.
+//
+// Before a model is executed by the interpreter, this class determines when
+// each tensor needs to be allocated and deallocated, and preallocates all the
+// necessary memory (the PlanAllocations phase). It then assigns portions of
+// this memory buffer to each tensor (the ExecuteAllocations phase). Tensors may
+// share some of the bufer if a tensor B is to be allocated after another tensor
+// A has been deallocated.
+//
+// If dynamic tensors are used the planning steps can be repeated during model
+// execution. Since dynamic tensors don't have sizes until after the
+// corresponding operation is executed, this class supports incremental
+// planning.
+class ArenaPlanner : public MemoryPlanner {
+ public:
+  // Ownership of 'context' is not taken and it must remain util the
+  // ArenaPlanner is destroyed.
+  ArenaPlanner(TfLiteContext* context, std::unique_ptr<GraphInfo> graph_info);
+  ~ArenaPlanner() override;
+  ArenaPlanner(const ArenaPlanner&) = delete;
+  ArenaPlanner& operator=(const ArenaPlanner&) = delete;
+
+  TfLiteStatus ResetAllocations() override;
+  TfLiteStatus PlanAllocations() override;
+  TfLiteStatus ExecuteAllocations(int first_node, int last_node) override;
+
+  // Returns the base arena location for a given allocation type.
+  int64_t BasePointer(TfLiteAllocationType type);
+
+ private:
+  // Make sure all the arenas have reserved enough memory to store all their
+  // tensors.
+  TfLiteStatus Commit();
+
+  // Traverse the allocation queue and reserve space in the appropriate arena
+  // for all tensors affected by ops in the interval [first_node, last_node].
+  TfLiteStatus CalculateAllocations(int first_node, int last_node);
+
+  // Assign absolute memory location to a tensor, based on its relative
+  // position inside the corresponding arena buffer.
+  TfLiteStatus ResolveTensorAllocation(int tensor_index);
+
+  // Register an allocation for the given tensor.
+  TfLiteStatus CalculateTensorAllocation(int tensor_index);
+
+  // Register a deallocation for the given tensor.
+  TfLiteStatus CalculateTensorDeallocation(int tensor_index);
+
+  // Register an allocation for all internal (temporary) tensors of
+  // 'node_index'.
+  TfLiteStatus CalculateAllocationOfInternalTensors(int node_index);
+
+  // Register a deallocation for all internal (temporary) tensors of
+  // 'node_index'.
+  TfLiteStatus CalculateDeallocationOfInternalTensors(int node_index);
+
+  TfLiteContext* context_;
+  std::unique_ptr<GraphInfo> graph_info_;
+
+  // Stores allocation data for all tensors.
+  std::vector<ArenaAlloc> allocs_;
+
+  // A chronological list of instructions to allocated and deallocate tensors,
+  // reflecting the way they are used in the graph.
+  std::vector<AllocationInfo> alloc_queue_;
+
+  // Raw memory buffer that is allocated for all temporary and graph outputs.
+  // that are declared kTfLiteArenaRw.
+  SimpleMemoryArena arena_;
+
+  // Raw memory buffer that is allocated for persistent tensors that are
+  // declared as kTfLiteArenaRwPersistent.
+  SimpleMemoryArena persistent_arena_;
+};
+
+}  // namespace tflite
+
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_ARENA_PLANNER_H_
diff --git a/tensorflow/contrib/lite/arena_planner_test.cc b/tensorflow/contrib/lite/arena_planner_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..c27c327abc63d7bd1e3912d368a1dacb62c50ca8
--- /dev/null
+++ b/tensorflow/contrib/lite/arena_planner_test.cc
@@ -0,0 +1,472 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/arena_planner.h"
+
+#include <cstdarg>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+namespace tflite {
+namespace {
+
+// A simple op to be used in tests, as syntactic sugar.
+class TestOp {
+ public:
+  TestOp(std::initializer_list<int> inputs, std::initializer_list<int> outputs,
+         std::initializer_list<int> temporaries)
+      : inputs_(inputs), outputs_(outputs), temporaries_(temporaries) {}
+
+  const std::vector<int>& inputs() const { return inputs_; }
+  const std::vector<int>& outputs() const { return outputs_; }
+  const std::vector<int>& temporaries() const { return temporaries_; }
+
+ private:
+  std::vector<int> inputs_;
+  std::vector<int> outputs_;
+  std::vector<int> temporaries_;
+};
+
+// A test graph where inputs are processed by the given nodes to produce
+// outputs.
+class TestGraph {
+ public:
+  TestGraph(std::initializer_list<int> inputs,
+            std::initializer_list<TestOp> nodes,
+            std::initializer_list<int> outputs)
+      : inputs_(inputs), outputs_(outputs) {
+    int max_tensor_index = 0;
+
+    for (int t : inputs) {
+      max_tensor_index = std::max(max_tensor_index, t);
+    }
+    for (int t : outputs) {
+      max_tensor_index = std::max(max_tensor_index, t);
+    }
+    for (const auto& node : nodes) {
+      auto int_array = [](const std::vector<int>& x) {
+        TfLiteIntArray* lite = TfLiteIntArrayCreate(x.size());
+        for (size_t i = 0; i < x.size(); i++) lite->data[i] = x[i];
+        return lite;
+      };
+
+      nodes_.push_back(TfLiteNode());
+      nodes_.back().inputs = int_array(node.inputs());
+      for (int t : node.inputs()) {
+        max_tensor_index = std::max(max_tensor_index, t);
+      }
+      nodes_.back().outputs = int_array(node.outputs());
+      for (int t : node.outputs()) {
+        max_tensor_index = std::max(max_tensor_index, t);
+      }
+      nodes_.back().temporaries = int_array(node.temporaries());
+      for (int t : node.temporaries()) {
+        max_tensor_index = std::max(max_tensor_index, t);
+      }
+    }
+
+    for (int i = 0; i <= max_tensor_index; ++i) {
+      tensors_.push_back(TfLiteTensor());
+      // Set some default values for allocation_type and bytes, which are the
+      // only fields used by the arena planner.
+      tensors_.back().allocation_type = kTfLiteArenaRw;
+      tensors_.back().bytes = (i + 1) * 3;
+    }
+  }
+
+  ~TestGraph() {
+    for (auto node : nodes_) {
+      TfLiteIntArrayFree(node.inputs);
+      TfLiteIntArrayFree(node.outputs);
+      TfLiteIntArrayFree(node.temporaries);
+    }
+  }
+
+  const std::vector<TfLiteNode>& nodes() { return nodes_; }
+  std::vector<TfLiteTensor>* tensors() { return &tensors_; }
+  const std::vector<int>& inputs() { return inputs_; }
+  const std::vector<int>& outputs() { return outputs_; }
+
+ private:
+  std::vector<TfLiteNode> nodes_;
+  std::vector<TfLiteTensor> tensors_;
+  std::vector<int> inputs_;
+  std::vector<int> outputs_;
+};
+
+// The GraphInfo for a TestGraph.
+class TestGraphInfo : public GraphInfo {
+ public:
+  explicit TestGraphInfo(TestGraph* graph) : graph_(graph) {}
+
+  size_t num_tensors() const override { return graph_->tensors()->size(); }
+  TfLiteTensor* tensor(size_t index) override {
+    return &graph_->tensors()->at(index);
+  }
+  size_t num_nodes() const override { return graph_->nodes().size(); }
+  const TfLiteNode& node(size_t index) const override {
+    return graph_->nodes()[index];
+  }
+  const std::vector<int>& inputs() const override { return graph_->inputs(); }
+  const std::vector<int>& outputs() const override { return graph_->outputs(); }
+
+ private:
+  TestGraph* graph_;
+};
+
+void ReportError(TfLiteContext* context, const char* format, ...) {
+  const size_t kBufferSize = 1024;
+  char temp_buffer[kBufferSize];
+
+  va_list args;
+  va_start(args, format);
+  vsnprintf(temp_buffer, kBufferSize, format, args);
+  va_end(args);
+
+  LOG(INFO) << temp_buffer;
+}
+
+class ArenaPlannerTest : public ::testing::Test {
+ protected:
+  void SetGraph(TestGraph* graph) {
+    graph_ = graph;
+    context_.ReportError = ReportError;
+    planner_.reset(new ArenaPlanner(
+        &context_, std::unique_ptr<GraphInfo>(new TestGraphInfo(graph))));
+    CHECK(planner_->ResetAllocations() == kTfLiteOk);
+    CHECK(planner_->PlanAllocations() == kTfLiteOk);
+  }
+
+  void Execute(int start, int end) {
+    CHECK(planner_->ExecuteAllocations(start, end) == kTfLiteOk);
+  }
+
+  // Returns the actual offset of a given tensor, relative to the start of its
+  // arena.
+  int64_t GetOffset(int tensor_index) {
+    const TfLiteTensor& tensor = (*graph_->tensors())[tensor_index];
+    return reinterpret_cast<int64_t>(tensor.data.raw) -
+           planner_->BasePointer(tensor.allocation_type);
+  }
+
+  // Returns the first aligned offset after a given tensor.
+  int64_t GetOffsetAfter(int tensor_index) {
+    const TfLiteTensor& tensor = (*graph_->tensors())[tensor_index];
+    int64_t offset = GetOffset(tensor_index) + tensor.bytes;
+    // We must make sure the offset is aligned to kDefaultArenaAlignment.
+    if (offset % 4 != 0) {
+      offset += 4 - offset % 4;
+    }
+    return offset;
+  };
+
+  TfLiteContext context_;
+  TestGraph* graph_;
+  std::unique_ptr<ArenaPlanner> planner_;
+};
+
+TEST_F(ArenaPlannerTest, EmptyGraph) {
+  TestGraph graph({}, {}, {});
+  SetGraph(&graph);
+  Execute(0, 10);
+}
+
+TEST_F(ArenaPlannerTest, GraphWithNoOps) {
+  TestGraph graph({0, 10}, {}, {5, 11});
+  SetGraph(&graph);
+  Execute(0, 10);
+  EXPECT_EQ(GetOffset(0), 0);
+  EXPECT_EQ(GetOffset(10), GetOffsetAfter(0));
+  // The outputs are never allocated because they are not connected to any
+  // inputs.
+  EXPECT_EQ(GetOffset(5), 0);
+  EXPECT_EQ(GetOffset(11), 0);
+}
+
+TEST_F(ArenaPlannerTest, GraphWithOneOp) {
+  TestGraph graph({1}, {{{1}, {2}, {}}}, {2});
+  SetGraph(&graph);
+  Execute(0, 10);
+  EXPECT_EQ(GetOffset(1), 0);
+  EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+}
+
+TEST_F(ArenaPlannerTest, ZeroSizedTensors) {
+  TestGraph graph({1}, {{{1}, {2}, {}}}, {2});
+  (*graph.tensors())[1].bytes = 0;
+  SetGraph(&graph);
+  // TODO(ahentz): this is currently broken because the arena finds two
+  // allocations with the same offset and returns an error.
+  ASSERT_FALSE(planner_->ExecuteAllocations(0, 10) == kTfLiteOk);
+  // EXPECT_EQ(GetOffset(1), 0);
+  // EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+}
+
+TEST_F(ArenaPlannerTest, SimpleGraph) {
+  TestGraph graph({0, 1},
+                  {
+                      /* in, out, tmp */
+                      {{0, 1}, {2}, {}},     // First op
+                      {{2, 0}, {4, 5}, {}},  // Second op
+                      {{4, 5}, {3}, {}}      // Third op
+                  },
+                  {3});
+  SetGraph(&graph);
+  Execute(0, 10);
+
+  // Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +4 +5 -2 -0 +3 -4 -5
+  EXPECT_EQ(GetOffset(0), 0);
+  EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+  EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+  EXPECT_EQ(GetOffset(4), GetOffsetAfter(2));
+  EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
+  EXPECT_EQ(GetOffset(3), 0);
+}
+
+TEST_F(ArenaPlannerTest, SimpleGraphWithTemporary) {
+  TestGraph graph({0, 1},
+                  {
+                      /* in, out, tmp */
+                      {{0, 1}, {2}, {}},   // First op
+                      {{2, 0}, {4}, {5}},  // Second op, with temporary
+                      {{4}, {3}, {}}       // Third op
+                  },
+                  {3});
+  SetGraph(&graph);
+  Execute(0, 10);
+
+  // Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
+  EXPECT_EQ(GetOffset(0), 0);
+  EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+  EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+  EXPECT_EQ(GetOffset(5), GetOffsetAfter(2));
+  EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
+  EXPECT_EQ(GetOffset(3), 0);
+}
+
+TEST_F(ArenaPlannerTest, SimpleGraphWithOptionals) {
+  TestGraph graph({0, -1, 1},
+                  {
+                      /* in, out, tmp */
+                      {{0, 1}, {2}, {}},     // First op
+                      {{2, 0}, {4, 5}, {}},  // Second op
+                      {{4, -1, 5}, {3}, {}}  // Third op, with optional
+                  },
+                  {3});
+  SetGraph(&graph);
+  Execute(0, 10);
+
+  // Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +4 +5 -2 -0 +3 -4 -5
+  EXPECT_EQ(GetOffset(0), 0);
+  EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+  EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+  EXPECT_EQ(GetOffset(4), GetOffsetAfter(2));
+  EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
+  EXPECT_EQ(GetOffset(3), 0);
+}
+
+TEST_F(ArenaPlannerTest, SimpleGraphWithLargeTensor) {
+  TestGraph graph({0, -1, 1},
+                  {
+                      /* in, out, tmp */
+                      {{0, 1}, {2}, {}},   // First op
+                      {{2, 0}, {4}, {5}},  // Second op, with temporary
+                      {{4, -1}, {3}, {}}   // Third op, with optional
+                  },
+                  {3});
+
+  // Make #1 very large so its vacancy can be filled with #5 and #4.
+  (*graph.tensors())[1].bytes = 40;
+
+  SetGraph(&graph);
+  Execute(0, 10);
+
+  // Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
+  EXPECT_EQ(GetOffset(0), 0);
+  EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+  EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+  EXPECT_EQ(GetOffset(5), GetOffsetAfter(0));
+  EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
+  EXPECT_EQ(GetOffset(3), 0);
+}
+
+TEST_F(ArenaPlannerTest, SimpleGraphWithPersistentTensor) {
+  TestGraph graph({0, -1, 1},
+                  {
+                      /* in, out, tmp */
+                      {{0, 1}, {2}, {}},   // First op
+                      {{2, 0}, {4}, {5}},  // Second op, with temporary
+                      {{4, -1}, {3}, {}}   // Third op, with optional
+                  },
+                  {3});
+
+  // Make #1 persistent so it goes into its own arena.
+  (*graph.tensors())[1].allocation_type = kTfLiteArenaRwPersistent;
+
+  SetGraph(&graph);
+  Execute(0, 10);
+
+  // Make sure #0 and #1 were given different memory locations (because they
+  // will both have offset=0, in different arenas.)
+  EXPECT_NE((*graph.tensors())[0].data.raw, (*graph.tensors())[1].data.raw);
+
+  // Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
+  EXPECT_EQ(GetOffset(0), 0);
+  EXPECT_EQ(GetOffset(1), 0);
+  EXPECT_EQ(GetOffset(2), GetOffsetAfter(0));
+  EXPECT_EQ(GetOffset(5), GetOffsetAfter(2));
+  EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
+  EXPECT_EQ(GetOffset(3), 0);
+}
+
+TEST_F(ArenaPlannerTest, SimpleGraphWithDynamicTensor) {
+  TestGraph graph({0, -1, 1},
+                  {
+                      /* in, out, tmp */
+                      {{0, 1}, {2}, {}},   // First op
+                      {{2, 0}, {4}, {5}},  // Second op, with temporary
+                      {{4, -1}, {3}, {}}   // Third op, with optional
+                  },
+                  {3});
+
+  // Make #1 dynaic so it does not get allocated.
+  (*graph.tensors())[1].allocation_type = kTfLiteDynamic;
+
+  SetGraph(&graph);
+  Execute(0, 10);
+
+  EXPECT_EQ((*graph.tensors())[1].data.raw, nullptr);
+
+  // Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
+  EXPECT_EQ(GetOffset(0), 0);
+  EXPECT_EQ(GetOffset(2), GetOffsetAfter(0));
+  EXPECT_EQ(GetOffset(5), GetOffsetAfter(2));
+  EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
+  EXPECT_EQ(GetOffset(3), 0);
+}
+
+TEST_F(ArenaPlannerTest, LargerGraphAndStepwiseAllocation) {
+  TestGraph graph({0, 1},
+                  {
+                      /* in, out, tmp */
+                      {{0, 1}, {2, 3}, {}},
+                      {{2, 0}, {4, 5}, {6}},
+                      {{1, -1}, {7}, {}},
+                      {{7, 3}, {8}, {9}},
+                      {{4, 5, 8}, {10}, {}},
+                  },
+                  {10});
+  SetGraph(&graph);
+
+  auto is_unallocated = [&](int tensor_index) {
+    // TODO(ahentz): We'd to use nullptr to represent unallocated tensors, but
+    // the current code still points them all to the beginning fo the alloc
+    // (that is, zero offset).
+    // return (*graph.tensors())[tensor_index].data.raw == nullptr;
+    return GetOffset(tensor_index) == 0;
+  };
+
+  // The allocation plan is made at the beginning and is independent of
+  // the execution steps. Here's the allocation order:
+  //   Op0: +0 +1 +2 +3
+  //   Op1: +6 +4 +5 -6 -0 -2
+  //   Op2: +7 -1
+  //   Op3: +9 +8 -9 -3 -7
+  //   Op4: +10 -4 -5 -8
+
+  Execute(0, 0);
+  EXPECT_EQ(GetOffset(0), 0);
+  EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+  EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+  EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
+  EXPECT_TRUE(is_unallocated(6));
+  EXPECT_TRUE(is_unallocated(4));
+  EXPECT_TRUE(is_unallocated(5));
+  EXPECT_TRUE(is_unallocated(7));
+  EXPECT_TRUE(is_unallocated(9));
+  EXPECT_TRUE(is_unallocated(8));
+  EXPECT_TRUE(is_unallocated(10));
+
+  Execute(1, 1);
+  EXPECT_EQ(GetOffset(0), 0);
+  EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+  EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+  EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
+  EXPECT_EQ(GetOffset(6), GetOffsetAfter(3));
+  EXPECT_EQ(GetOffset(4), GetOffsetAfter(6));
+  EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
+  EXPECT_TRUE(is_unallocated(7));
+  EXPECT_TRUE(is_unallocated(9));
+  EXPECT_TRUE(is_unallocated(8));
+  EXPECT_TRUE(is_unallocated(10));
+
+  Execute(2, 2);
+  EXPECT_EQ(GetOffset(0), 0);
+  EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+  EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+  EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
+  EXPECT_EQ(GetOffset(6), GetOffsetAfter(3));
+  EXPECT_EQ(GetOffset(4), GetOffsetAfter(6));
+  EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
+  // Here's an interesting allocation. Even though #6 requires only 21 bytes,
+  // its deallocation freed up 24 bytes due to the alignment requirements in
+  // the arena. That means we can fit #7 in the same space!
+  EXPECT_EQ(GetOffset(7), GetOffsetAfter(3));
+  EXPECT_TRUE(is_unallocated(9));
+  EXPECT_TRUE(is_unallocated(8));
+  EXPECT_TRUE(is_unallocated(10));
+
+  Execute(3, 3);
+  EXPECT_EQ(GetOffset(0), 0);
+  EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+  EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+  EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
+  EXPECT_EQ(GetOffset(6), GetOffsetAfter(3));
+  EXPECT_EQ(GetOffset(4), GetOffsetAfter(6));
+  EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
+  EXPECT_EQ(GetOffset(7), GetOffsetAfter(3));
+  // The deallocation of #0, #1 and #2 freed up 24 bytes but that's not enough
+  // for #9, so it goes at the end.
+  EXPECT_EQ(GetOffset(9), GetOffsetAfter(5));
+  EXPECT_EQ(GetOffset(8), GetOffsetAfter(9));
+  EXPECT_TRUE(is_unallocated(10));
+
+  Execute(4, 4);
+  EXPECT_EQ(GetOffset(0), 0);
+  EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+  EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+  EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
+  EXPECT_EQ(GetOffset(6), GetOffsetAfter(3));
+  EXPECT_EQ(GetOffset(4), GetOffsetAfter(6));
+  EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
+  EXPECT_EQ(GetOffset(7), GetOffsetAfter(3));
+  EXPECT_EQ(GetOffset(9), GetOffsetAfter(5));
+  EXPECT_EQ(GetOffset(8), GetOffsetAfter(9));
+  // There's just enough space at the beginning for #10 due to the
+  // deallocation of #0, #1, #2 and #3 (total 36 bytes, #10 needs
+  // only 33.)
+  EXPECT_EQ(GetOffset(10), 0);
+}
+
+}  // namespace
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  // ::tflite::LogToStderr();
+  FLAGS_logtostderr = true;
+
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl
index e3c9cdd99beb93e356c148298dcbe6498fbe0306..0a097d5a69a8bc15aa03502f7a2131fc36e36091 100644
--- a/tensorflow/contrib/lite/build_def.bzl
+++ b/tensorflow/contrib/lite/build_def.bzl
@@ -89,11 +89,11 @@ def tflite_jni_linkopts():
   return tflite_jni_linkopts_unstripped() + select({
       "//tensorflow:android": [
           "-s",  # Omit symbol table.
+          "-latomic",  # Required for some uses of ISO C++11 <atomic> in x86.
       ],
       "//conditions:default": [],
   })
 
-
 def tflite_jni_binary(name,
                       copts=tflite_copts(),
                       linkopts=tflite_jni_linkopts(),
@@ -223,11 +223,12 @@ def gen_selected_ops(name, model):
   """
   out = name + "_registration.cc"
   tool = "//tensorflow/contrib/lite/tools:generate_op_registrations"
+  tflite_path = "//tensorflow/contrib/lite"
   native.genrule(
       name = name,
       srcs = [model],
       outs = [out],
-      cmd = ("$(location %s) --input_model=$(location %s) --output_registration=$(location %s)")
-      % (tool, model, out),
+      cmd = ("$(location %s) --input_model=$(location %s) --output_registration=$(location %s) --tflite_path=%s")
+      % (tool, model, out, tflite_path[2:]),
       tools = [tool],
   )
diff --git a/tensorflow/contrib/lite/build_ios_universal_lib.sh b/tensorflow/contrib/lite/build_ios_universal_lib.sh
index e0f2ef768bfed544ed8acd6c0e3a5823e61a1e8c..4a9023ff33de15dd384531d51e39de4ffeecdb8b 100755
--- a/tensorflow/contrib/lite/build_ios_universal_lib.sh
+++ b/tensorflow/contrib/lite/build_ios_universal_lib.sh
@@ -1,5 +1,24 @@
 #!/bin/bash -x
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
 set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cd "$SCRIPT_DIR/../../.."
+
 make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=x86_64 -j 8
 make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=i386 -j 8
 make -f tensorflow/contrib/lite/Makefile TARGET=IOS IOS_ARCH=armv7 -j 8
diff --git a/tensorflow/contrib/lite/builtin_op_data.h b/tensorflow/contrib/lite/builtin_op_data.h
index 93072bf90bd8a18d9011a74c2eec95d86dbdce8a..3b43a1fd5d383b8b9eee1704b7a1b80b8d4059d4 100644
--- a/tensorflow/contrib/lite/builtin_op_data.h
+++ b/tensorflow/contrib/lite/builtin_op_data.h
@@ -83,6 +83,11 @@ typedef struct {
   TfLiteFusedActivation activation;
 } TfLiteRNNParams;
 
+typedef struct {
+  bool time_major;
+  TfLiteFusedActivation activation;
+} TfLiteSequenceRNNParams;
+
 typedef struct { TfLiteFusedActivation activation; } TfLiteFullyConnectedParams;
 
 typedef enum {
@@ -104,10 +109,40 @@ typedef struct {
   TfLiteFusedActivation activation;
 } TfLiteAddParams;
 
+typedef struct {
+  // Number of spatial dimensions.
+  // For now only NHWC is supported, and the value should always be 2.
+  int num_spatial_dimensions;
+  // TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
+  // For now we will fix the maximum possible number of dimensions.
+  int block_shape[2];
+  int before_paddings[2];
+  int after_paddings[2];
+} TfLiteSpaceToBatchNDParams;
+
+typedef struct {
+  // Number of spatial dimensions.
+  // For now only NHWC is supported, and the value should always be 2.
+  int num_spatial_dimensions;
+  // TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
+  // For now we will fix the maximum possible number of dimensions.
+  int block_shape[2];
+  int before_crops[2];
+  int after_crops[2];
+} TfLiteBatchToSpaceNDParams;
+
 typedef struct {
   TfLiteFusedActivation activation;
 } TfLiteMulParams;
 
+typedef struct {
+  TfLiteFusedActivation activation;
+} TfLiteSubParams;
+
+typedef struct {
+  TfLiteFusedActivation activation;
+} TfLiteDivParams;
+
 typedef struct {
   TfLiteFusedActivation activation;
 } TfLiteL2NormParams;
@@ -130,6 +165,14 @@ typedef struct {
   int new_width;
 } TfLiteResizeBilinearParams;
 
+typedef struct {
+  // TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
+  // For now we will fix the maximum possible number of dimensions.
+  int before_padding[8];
+  int after_padding[8];
+  int num_dimensions;
+} TfLitePadParams;
+
 typedef struct {
   // TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
   // For now we will fix the maximum possible number of dimensions.
@@ -157,6 +200,32 @@ typedef struct {
   TfLiteCombinerType combiner;
 } TfLiteEmbeddingLookupSparseParams;
 
+typedef struct {
+  int axis;
+} TfLiteGatherParams;
+
+typedef struct {
+  // TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
+  // For now we will fix the maximum possible number of dimensions.
+  int perm[8];
+  int num_dimensions;
+} TfLiteTransposeParams;
+
+typedef struct {
+  // TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
+  // For now we will fix the maximum possible number of dimensions.
+  int axis[8];
+  int num_axis_dimensions;
+  bool keep_dims;
+} TfLiteMeanParams;
+
+typedef struct {
+  // TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
+  // For now we will fix the maximum possible number of dimensions.
+  int squeeze_dims[8];
+  int num_squeeze_dims;
+} TfLiteSqueezeParams;
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif  // __cplusplus
diff --git a/tensorflow/contrib/lite/context.h b/tensorflow/contrib/lite/context.h
index 41257a53b145cbe7e252c9d4de6ea7ef654431b5..fca71165034a46b39803f4500af8dc5c6f4e8829 100644
--- a/tensorflow/contrib/lite/context.h
+++ b/tensorflow/contrib/lite/context.h
@@ -141,6 +141,7 @@ typedef struct {
 // A union of points that points to memory for a given tensor.
 typedef union {
   int* i32;
+  int64_t* i64;
   float* f;
   char* raw;
   const char* raw_const;
diff --git a/tensorflow/contrib/lite/context_test.cc b/tensorflow/contrib/lite/context_test.cc
index d0a104f43d9b9d148d80ce26b8ecf732d51ef110..20d6f69a25e9f0bb4323cf5d067b8ebd37bb3c23 100644
--- a/tensorflow/contrib/lite/context_test.cc
+++ b/tensorflow/contrib/lite/context_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/contrib/lite/context.h"
 #include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/testing/util.h"
 
 namespace tflite {
 
@@ -68,7 +69,7 @@ TEST(IntArray, TestIntArrayEqual) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/download_dependencies.sh b/tensorflow/contrib/lite/download_dependencies.sh
index 571d857be7292998996a4fb8101f0070064aa6be..362e5bee25e95e87fa22bb77904056e732c4e140 100755
--- a/tensorflow/contrib/lite/download_dependencies.sh
+++ b/tensorflow/contrib/lite/download_dependencies.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,16 +16,12 @@
 
 set -e
 
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cd "$SCRIPT_DIR/../../.."
+
 DOWNLOADS_DIR=tensorflow/contrib/lite/downloads
 BZL_FILE_PATH=tensorflow/workspace.bzl
 
-# Ensure it is being run from repo root
-if [ ! -f $BZL_FILE_PATH ]; then
-  echo "Could not find ${BZL_FILE_PATH}":
-  echo "Likely you are not running this from the root directory of the repository.";
-  exit 1;
-fi
-
 EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
 GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)"
 GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz"
diff --git a/tensorflow/contrib/lite/error_reporter.cc b/tensorflow/contrib/lite/error_reporter.cc
index 6ba5384a94dbf9de03fb2e4e2f63074525eafa2d..03fcd5409ceab1895cea3b9e0e4fcb5a127e6a45 100644
--- a/tensorflow/contrib/lite/error_reporter.cc
+++ b/tensorflow/contrib/lite/error_reporter.cc
@@ -39,7 +39,9 @@ int ErrorReporter::ReportError(void*, const char* format, ...) {
 }
 
 int StderrReporter::Report(const char* format, va_list args) {
-  return vfprintf(stderr, format, args);
+  const int result = vfprintf(stderr, format, args);
+  fputc('\n', stderr);
+  return result;
 }
 
 ErrorReporter* DefaultErrorReporter() {
diff --git a/tensorflow/contrib/lite/error_reporter.h b/tensorflow/contrib/lite/error_reporter.h
index 637d456ce7a754c7da34e551869e49b4efd18e3b..d5715e4f90aead79a617fe4576bfe5100d5e121a 100644
--- a/tensorflow/contrib/lite/error_reporter.h
+++ b/tensorflow/contrib/lite/error_reporter.h
@@ -25,10 +25,10 @@ namespace tflite {
 //
 // Usage:
 //  ErrorReporter foo;
-//  foo.Report("test %d\n", 5);
+//  foo.Report("test %d", 5);
 // or
 //  va_list args;
-//  foo.Report("test %d\n", args); // where args is va_list
+//  foo.Report("test %d", args); // where args is va_list
 //
 // Sublclass ErrorReporter to provide another reporting destination.
 // For example, if you have a GUI program, you might redirect to a buffer
diff --git a/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm
index ea398ad14e8be4c5a0021befc7cc076549b47e23..10f31bb6f17242c9f7f70f0648ec643f99c5ac86 100644
--- a/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm
+++ b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm
@@ -123,7 +123,11 @@ static void GetTopN(const uint8_t* prediction, const int prediction_size, const
   AVCaptureDevice* device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
   AVCaptureDeviceInput* deviceInput =
       [AVCaptureDeviceInput deviceInputWithDevice:device error:&error];
-  assert(error == nil);
+
+  if (error != nil) {
+    NSLog(@"Failed to initialize AVCaptureDeviceInput. Note: This app doesn't work with simulator");
+    assert(NO);
+  }
 
   if ([session canAddInput:deviceInput]) [session addInput:deviceInput];
 
diff --git a/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.h b/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.h
index 75b1f1da384b527e8332dfba08fec87c65eff8b1..94046d9728258901091f018fd0d081651145f400 100644
--- a/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.h
+++ b/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.h
@@ -14,8 +14,8 @@
 
 #import <UIKit/UIKit.h>
 
-@interface AppDelegate : UIResponder <UIApplicationDelegate>
+@interface AppDelegate : UIResponder<UIApplicationDelegate>
 
-@property (strong, nonatomic) UIWindow *window;
+@property(strong, nonatomic) UIWindow *window;
 
 @end
diff --git a/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.mm b/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.mm
index 1e808eb976ff3eeda4cf6f81b3c1794c6a037dc8..d1215fa0bffd978b4aaadbd8bc13b07723703c9a 100644
--- a/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.mm
+++ b/tensorflow/contrib/lite/examples/ios/simple/AppDelegate.mm
@@ -22,8 +22,7 @@
     didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {
 
   UITabBarController *bar = [[UITabBarController alloc] init];
-  [bar setViewControllers:
-      @[[[RunModelViewController alloc] init]]];
+  [bar setViewControllers:@[ [[RunModelViewController alloc] init] ]];
   bar.selectedIndex = 0;
   self.window = [[UIWindow alloc] initWithFrame:[[UIScreen mainScreen] bounds]];
   self.window.rootViewController = bar;
@@ -31,14 +30,19 @@
   return YES;
 }
 
-- (void)applicationWillResignActive:(UIApplication *)application {}
+- (void)applicationWillResignActive:(UIApplication *)application {
+}
 
-- (void)applicationDidEnterBackground:(UIApplication *)application {}
+- (void)applicationDidEnterBackground:(UIApplication *)application {
+}
 
-- (void)applicationWillEnterForeground:(UIApplication *)application {}
+- (void)applicationWillEnterForeground:(UIApplication *)application {
+}
 
-- (void)applicationDidBecomeActive:(UIApplication *)application {}
+- (void)applicationDidBecomeActive:(UIApplication *)application {
+}
 
-- (void)applicationWillTerminate:(UIApplication *)application {}
+- (void)applicationWillTerminate:(UIApplication *)application {
+}
 
 @end
diff --git a/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.h b/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.h
index 4e1a83ccf5a12c609baadab7359c55ec4f464ed8..a4b358b4eb7f6ba109638405091b798d30bd1768 100644
--- a/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.h
+++ b/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.h
@@ -18,7 +18,7 @@
 
 - (IBAction)getUrl:(id)sender;
 
-@property (weak, nonatomic) IBOutlet UITextView *urlContentTextView;
-@property (weak, nonatomic) IBOutlet UITextField *urlTextField;
+@property(weak, nonatomic) IBOutlet UITextView *urlContentTextView;
+@property(weak, nonatomic) IBOutlet UITextField *urlTextField;
 
 @end
diff --git a/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.mm b/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.mm
index 965d83010516c6db72c9e8b1c33079b3eda204de..a885a57b65c5c40ec13cc1c8893e02f4f75ed106 100644
--- a/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.mm
+++ b/tensorflow/contrib/lite/examples/ios/simple/RunModelViewController.mm
@@ -14,10 +14,10 @@
 
 #import "RunModelViewController.h"
 
-#include <fstream>
-#include <iostream>
 #include <pthread.h>
 #include <unistd.h>
+#include <fstream>
+#include <iostream>
 #include <queue>
 #include <sstream>
 #include <string>
@@ -30,7 +30,11 @@
 #include "ios_image_load.h"
 
 #define LOG(x) std::cerr
-#define CHECK(x) if (!(x)) { LOG(ERROR) << #x << "failed"; exit(1); }
+#define CHECK(x)                  \
+  if (!(x)) {                     \
+    LOG(ERROR) << #x << "failed"; \
+    exit(1);                      \
+  }
 
 NSString* RunInferenceOnImage();
 
@@ -49,15 +53,12 @@ NSString* RunInferenceOnImage();
 
 // Returns the top N confidence values over threshold in the provided vector,
 // sorted by confidence in descending order.
-static void GetTopN(
-    const float* prediction,
-    const int prediction_size,
-    const int num_results, const float threshold,
-    std::vector<std::pair<float, int> >* top_results) {
+static void GetTopN(const float* prediction, const int prediction_size, const int num_results,
+                    const float threshold, std::vector<std::pair<float, int> >* top_results) {
   // Will contain top N results in ascending order.
-  std::priority_queue<std::pair<float, int>,
-      std::vector<std::pair<float, int> >,
-      std::greater<std::pair<float, int> > > top_result_pq;
+  std::priority_queue<std::pair<float, int>, std::vector<std::pair<float, int> >,
+                      std::greater<std::pair<float, int> > >
+      top_result_pq;
 
   const long count = prediction_size;
   for (int i = 0; i < count; ++i) {
@@ -88,25 +89,26 @@ static void GetTopN(
 NSString* FilePathForResourceName(NSString* name, NSString* extension) {
   NSString* file_path = [[NSBundle mainBundle] pathForResource:name ofType:extension];
   if (file_path == NULL) {
-    LOG(FATAL) << "Couldn't find '" << [name UTF8String] << "."
-	       << [extension UTF8String] << "' in bundle.";
+    LOG(FATAL) << "Couldn't find '" << [name UTF8String] << "." << [extension UTF8String]
+               << "' in bundle.";
   }
   return file_path;
 }
 
 NSString* RunInferenceOnImage() {
-  std::string graph;
+  NSString* graph = @"mobilenet_v1_1.0_224";
   const int num_threads = 1;
   std::string input_layer_type = "float";
   std::vector<int> sizes = {1, 224, 224, 3};
 
-  NSString* graph_path = FilePathForResourceName(@"mobilenet_v1_1.0_224", @"tflite");
+  const NSString* graph_path = FilePathForResourceName(graph, @"tflite");
 
-  std::unique_ptr<tflite::FlatBufferModel> model(tflite::FlatBufferModel::BuildFromFile([graph_path UTF8String]));
+  std::unique_ptr<tflite::FlatBufferModel> model(
+      tflite::FlatBufferModel::BuildFromFile([graph_path UTF8String]));
   if (!model) {
-    LOG(FATAL) << "Failed to mmap model " << graph;
+    LOG(FATAL) << "Failed to mmap model " << [graph UTF8String];
   }
-  LOG(INFO) << "Loaded model " << graph;
+  LOG(INFO) << "Loaded model " << [graph UTF8String];
   model->error_reporter();
   LOG(INFO) << "resolved reporter";
 
@@ -143,7 +145,7 @@ NSString* RunInferenceOnImage() {
   std::ifstream t;
   t.open([labels_path UTF8String]);
   std::string line;
-  while(t){
+  while (t) {
     std::getline(t, line);
     label_strings.push_back(line);
   }
@@ -154,7 +156,8 @@ NSString* RunInferenceOnImage() {
   int image_width;
   int image_height;
   int image_channels;
-  std::vector<uint8_t> image_data = LoadImageFromFile([image_path UTF8String], &image_width, &image_height, &image_channels);
+  std::vector<uint8_t> image_data =
+      LoadImageFromFile([image_path UTF8String], &image_width, &image_height, &image_channels);
   const int wanted_width = 224;
   const int wanted_height = 224;
   const int wanted_channels = 3;
@@ -212,8 +215,7 @@ NSString* RunInferenceOnImage() {
 
   std::string predictions = ss.str();
   NSString* result = @"";
-  result = [NSString stringWithFormat: @"%@ - %s", result,
-            predictions.c_str()];
-  
+  result = [NSString stringWithFormat:@"%@ - %s", result, predictions.c_str()];
+
   return result;
 }
diff --git a/tensorflow/contrib/lite/examples/ios/simple/ios_image_load.h b/tensorflow/contrib/lite/examples/ios/simple/ios_image_load.h
index 7287d0d63d5b4c0b9c9a528578b6341cdb9c9954..98934ce41d349b33d4fc010a39a956e52f3d5721 100644
--- a/tensorflow/contrib/lite/examples/ios/simple/ios_image_load.h
+++ b/tensorflow/contrib/lite/examples/ios/simple/ios_image_load.h
@@ -17,9 +17,7 @@
 
 #include <vector>
 
-std::vector<uint8_t> LoadImageFromFile(const char* file_name,
-						 int* out_width,
-						 int* out_height,
-						 int* out_channels);
+std::vector<uint8_t> LoadImageFromFile(const char* file_name, int* out_width,
+                                       int* out_height, int* out_channels);
 
 #endif  // TENSORFLOW_EXAMPLES_IOS_IOS_IMAGE_LOAD_H_
diff --git a/tensorflow/contrib/lite/examples/ios/simple/ios_image_load.mm b/tensorflow/contrib/lite/examples/ios/simple/ios_image_load.mm
index 789522d2a9900b136f91f77c4ada682f1a316848..cb0fe1a7650c572d3745066431f2759daa94ffc9 100644
--- a/tensorflow/contrib/lite/examples/ios/simple/ios_image_load.mm
+++ b/tensorflow/contrib/lite/examples/ios/simple/ios_image_load.mm
@@ -14,17 +14,16 @@
 
 #include "ios_image_load.h"
 
-#include <stdlib.h>
-#include <string.h>
 #include <assert.h>
 #include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
 
 #import <CoreImage/CoreImage.h>
 #import <ImageIO/ImageIO.h>
 
-std::vector<uint8_t> LoadImageFromFile(const char* file_name,
-				     int* out_width, int* out_height,
-				     int* out_channels) {
+std::vector<uint8_t> LoadImageFromFile(const char* file_name, int* out_width, int* out_height,
+                                       int* out_channels) {
   FILE* file_handle = fopen(file_name, "rb");
   fseek(file_handle, 0, SEEK_END);
   const size_t bytes_in_file = ftell(file_handle);
@@ -32,11 +31,10 @@ std::vector<uint8_t> LoadImageFromFile(const char* file_name,
   std::vector<uint8_t> file_data(bytes_in_file);
   fread(file_data.data(), 1, bytes_in_file, file_handle);
   fclose(file_handle);
-  CFDataRef file_data_ref = CFDataCreateWithBytesNoCopy(NULL, file_data.data(),
-						      bytes_in_file,
-						      kCFAllocatorNull);
-  CGDataProviderRef image_provider =
-    CGDataProviderCreateWithCFData(file_data_ref);
+
+  CFDataRef file_data_ref =
+      CFDataCreateWithBytesNoCopy(NULL, file_data.data(), bytes_in_file, kCFAllocatorNull);
+  CGDataProviderRef image_provider = CGDataProviderCreateWithCFData(file_data_ref);
 
   const char* suffix = strrchr(file_name, '.');
   if (!suffix || suffix == file_name) {
@@ -44,12 +42,10 @@ std::vector<uint8_t> LoadImageFromFile(const char* file_name,
   }
   CGImageRef image;
   if (strcasecmp(suffix, ".png") == 0) {
-    image = CGImageCreateWithPNGDataProvider(image_provider, NULL, true,
-					     kCGRenderingIntentDefault);
-  } else if ((strcasecmp(suffix, ".jpg") == 0) ||
-    (strcasecmp(suffix, ".jpeg") == 0)) {
-    image = CGImageCreateWithJPEGDataProvider(image_provider, NULL, true,
-					      kCGRenderingIntentDefault);
+    image = CGImageCreateWithPNGDataProvider(image_provider, NULL, true, kCGRenderingIntentDefault);
+  } else if ((strcasecmp(suffix, ".jpg") == 0) || (strcasecmp(suffix, ".jpeg") == 0)) {
+    image =
+        CGImageCreateWithJPEGDataProvider(image_provider, NULL, true, kCGRenderingIntentDefault);
   } else {
     CFRelease(image_provider);
     CFRelease(file_data_ref);
@@ -68,9 +64,10 @@ std::vector<uint8_t> LoadImageFromFile(const char* file_name,
   const int bytes_in_image = (bytes_per_row * height);
   std::vector<uint8_t> result(bytes_in_image);
   const int bits_per_component = 8;
-  CGContextRef context = CGBitmapContextCreate(result.data(), width, height,
-    bits_per_component, bytes_per_row, color_space,
-    kCGImageAlphaPremultipliedLast | kCGBitmapByteOrder32Big);
+
+  CGContextRef context =
+      CGBitmapContextCreate(result.data(), width, height, bits_per_component, bytes_per_row,
+                            color_space, kCGImageAlphaPremultipliedLast | kCGBitmapByteOrder32Big);
   CGColorSpaceRelease(color_space);
   CGContextDrawImage(context, CGRectMake(0, 0, width, height), image);
   CGContextRelease(context);
diff --git a/tensorflow/contrib/lite/examples/ios/simple/main.mm b/tensorflow/contrib/lite/examples/ios/simple/main.mm
index d70550a730720e5d6799a186c1beb3cfa04b0b9d..05cb55ddd7a230593863e64b351f6aac31a1b4d7 100644
--- a/tensorflow/contrib/lite/examples/ios/simple/main.mm
+++ b/tensorflow/contrib/lite/examples/ios/simple/main.mm
@@ -14,7 +14,7 @@
 
 #import <UIKit/UIKit.h>
 
-int main(int argc, char * argv[]) {
+int main(int argc, char *argv[]) {
   @autoreleasepool {
     NSString *delegateClassName = @"AppDelegate";
     return UIApplicationMain(argc, argv, nil, delegateClassName);
diff --git a/tensorflow/contrib/lite/examples/label_image/BUILD b/tensorflow/contrib/lite/examples/label_image/BUILD
new file mode 100644
index 0000000000000000000000000000000000000000..476d85c0314e331d6d3bad382c331a8458fd01a1
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/label_image/BUILD
@@ -0,0 +1,75 @@
+# Description:
+# TensorFlow Lite Example Label Image.
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])  # Apache 2.0
+
+load("//tensorflow:tensorflow.bzl", "tf_cc_binary")
+load("//tensorflow/contrib/lite:build_def.bzl", "tflite_linkopts")
+
+exports_files(glob([
+    "testdata/*.bmp",
+]))
+
+tf_cc_binary(
+    name = "label_image",
+    srcs = [
+        "get_top_n.h",
+        "get_top_n_impl.h",
+        "label_image.cc",
+    ],
+    linkopts = tflite_linkopts() + select({
+        "//tensorflow:android": [
+            "-pie",  # Android 5.0 and later supports only PIE
+            "-lm",  # some builtin ops, e.g., tanh, need -lm
+        ],
+        "//conditions:default": [],
+    }),
+    deps = [
+        ":bitmap_helpers",
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite:string_util",
+        "//tensorflow/contrib/lite/kernels:builtin_ops",
+    ],
+)
+
+cc_library(
+    name = "bitmap_helpers",
+    srcs = ["bitmap_helpers.cc"],
+    hdrs = [
+        "bitmap_helpers.h",
+        "bitmap_helpers_impl.h",
+        "label_image.h",
+    ],
+    deps = ["//tensorflow/contrib/lite:string"],
+)
+
+# TODO(ahentz): Test disabled as it has a memory leek from read_bmp
+# cc_test(
+#     name = "label_image_test",
+#     srcs = [
+#         "get_top_n.h",
+#         "get_top_n_impl.h",
+#         "label_image_test.cc",
+#     ],
+#     data = [
+#         "testdata/grace_hopper.bmp",
+#     ],
+#     deps = [
+#         ":bitmap_helpers",
+#         "//testing/base/public:gunit",
+#     ],
+# )
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
diff --git a/tensorflow/contrib/lite/examples/label_image/bitmap_helpers.cc b/tensorflow/contrib/lite/examples/label_image/bitmap_helpers.cc
new file mode 100644
index 0000000000000000000000000000000000000000..0b38cd38c83927c65d251b9356301b6bef7521f2
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/label_image/bitmap_helpers.cc
@@ -0,0 +1,120 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+
+#include <unistd.h>  // NOLINT(build/include_order)
+
+#include "tensorflow/contrib/lite/examples/label_image/bitmap_helpers.h"
+
+#define LOG(x) std::cerr
+
+namespace tflite {
+namespace label_image {
+
+uint8_t* decode_bmp(const uint8_t* input, int row_size, uint8_t* const output,
+                    int width, int height, int channels, bool top_down) {
+  for (int i = 0; i < height; i++) {
+    int src_pos;
+    int dst_pos;
+
+    for (int j = 0; j < width; j++) {
+      if (!top_down) {
+        src_pos = ((height - 1 - i) * row_size) + j * channels;
+      } else {
+        src_pos = i * row_size + j * channels;
+      }
+
+      dst_pos = (i * width + j) * channels;
+
+      switch (channels) {
+        case 1:
+          output[dst_pos] = input[src_pos];
+          break;
+        case 3:
+          // BGR -> RGB
+          output[dst_pos] = input[src_pos + 2];
+          output[dst_pos + 1] = input[src_pos + 1];
+          output[dst_pos + 2] = input[src_pos];
+          break;
+        case 4:
+          // BGRA -> RGBA
+          output[dst_pos] = input[src_pos + 2];
+          output[dst_pos + 1] = input[src_pos + 1];
+          output[dst_pos + 2] = input[src_pos];
+          output[dst_pos + 3] = input[src_pos + 3];
+          break;
+        default:
+          LOG(FATAL) << "Unexpected number of channels: " << channels;
+          break;
+      }
+    }
+  }
+
+  return output;
+}
+
+uint8_t* read_bmp(const std::string& input_bmp_name, int* width, int* height,
+                  int* channels, Settings* s) {
+  int begin, end;
+
+  std::ifstream file(input_bmp_name, std::ios::in | std::ios::binary);
+  if (!file) {
+    LOG(FATAL) << "input file " << input_bmp_name << " not found\n";
+    exit(-1);
+  }
+
+  begin = file.tellg();
+  file.seekg(0, std::ios::end);
+  end = file.tellg();
+  size_t len = end - begin;
+
+  if (s->verbose) LOG(INFO) << "len: " << len << "\n";
+
+  const uint8_t* img_bytes = new uint8_t[len];
+  file.seekg(0, std::ios::beg);
+  file.read((char*)img_bytes, len);
+  const int32_t header_size =
+      *(reinterpret_cast<const int32_t*>(img_bytes + 10));
+  *width = *(reinterpret_cast<const int32_t*>(img_bytes + 18));
+  *height = *(reinterpret_cast<const int32_t*>(img_bytes + 22));
+  const int32_t bpp = *(reinterpret_cast<const int32_t*>(img_bytes + 28));
+  *channels = bpp / 8;
+
+  if (s->verbose)
+    LOG(INFO) << "width, height, channels: " << *width << ", " << *height
+              << ", " << *channels << "\n";
+
+  // there may be padding bytes when the width is not a multiple of 4 bytes
+  // 8 * channels == bits per pixel
+  const int row_size = (8 * *channels * *width + 31) / 32 * 4;
+
+  // if height is negative, data layout is top down
+  // otherwise, it's bottom up
+  bool top_down = (*height < 0);
+
+  // Decode image, allocating tensor once the image size is known
+  uint8_t* output = new uint8_t[abs(*height) * *width * *channels];
+  const uint8_t* bmp_pixels = &img_bytes[header_size];
+  return decode_bmp(bmp_pixels, row_size, output, *width, abs(*height),
+                    *channels, top_down);
+}
+
+}  // namespace label_image
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/examples/label_image/bitmap_helpers.h b/tensorflow/contrib/lite/examples/label_image/bitmap_helpers.h
new file mode 100644
index 0000000000000000000000000000000000000000..860e27e5ba9cc9fe23d2a7f9f65dd53bbf76f7a3
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/label_image/bitmap_helpers.h
@@ -0,0 +1,42 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_BITMAP_HELPERS_H
+#define TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_BITMAP_HELPERS_H
+
+#include "tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h"
+#include "tensorflow/contrib/lite/examples/label_image/label_image.h"
+
+namespace tflite {
+namespace label_image {
+
+uint8_t* read_bmp(const std::string& input_bmp_name, int* width, int* height,
+                  int* channels, Settings* s);
+
+template <class T>
+void downsize(T* out, uint8_t* in, int image_height, int image_width,
+              int image_channels, int wanted_height, int wanted_width,
+              int wanted_channels, Settings* s);
+
+// explicit instantiation
+template void downsize<uint8_t>(uint8_t*, unsigned char*, int, int, int, int,
+                                int, int, Settings*);
+template void downsize<float>(float*, unsigned char*, int, int, int, int, int,
+                              int, Settings*);
+
+}  // namespace label_image
+}  // namespace tflite
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_BITMAP_HELPERS_H
diff --git a/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h b/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..64a931082b0cbb4632ec3a814ce654d4f9106bc1
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h
@@ -0,0 +1,49 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_BITMAP_HELPERS_IMPL_H
+#define TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_BITMAP_HELPERS_IMPL_H
+
+#include "tensorflow/contrib/lite/examples/label_image/label_image.h"
+
+namespace tflite {
+namespace label_image {
+
+template <class T>
+void downsize(T* out, uint8_t* in, int image_height, int image_width,
+              int image_channels, int wanted_height, int wanted_width,
+              int wanted_channels, Settings* s) {
+  for (int y = 0; y < wanted_height; ++y) {
+    const int in_y = (y * image_height) / wanted_height;
+    uint8_t* in_row = in + (in_y * image_width * image_channels);
+    T* out_row = out + (y * wanted_width * wanted_channels);
+    for (int x = 0; x < wanted_width; ++x) {
+      const int in_x = (x * image_width) / wanted_width;
+      uint8_t* in_pixel = in_row + (in_x * image_channels);
+      T* out_pixel = out_row + (x * wanted_channels);
+      for (int c = 0; c < wanted_channels; ++c) {
+        if (s->input_floating)
+          out_pixel[c] = (in_pixel[c] - s->input_mean) / s->input_std;
+        else
+          out_pixel[c] = in_pixel[c];
+      }
+    }
+  }
+}
+
+}  // namespace label_image
+}  // namespace tflite
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_BITMAP_HELPERS_IMPL_H
diff --git a/tensorflow/contrib/lite/examples/label_image/get_top_n.h b/tensorflow/contrib/lite/examples/label_image/get_top_n.h
new file mode 100644
index 0000000000000000000000000000000000000000..70a7586fe6a008f0da20a7bac928ca676e5914ab
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/label_image/get_top_n.h
@@ -0,0 +1,38 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_GET_TOP_N_H
+#define TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_GET_TOP_N_H
+
+#include "tensorflow/contrib/lite/examples/label_image/get_top_n_impl.h"
+
+namespace tflite {
+namespace label_image {
+
+template <class T>
+void get_top_n(T* prediction, int prediction_size, size_t num_results,
+               float threshold, std::vector<std::pair<float, int>>* top_results,
+               bool input_floating);
+
+// explicit instantiation so that we can use them otherwhere
+template void get_top_n<uint8_t>(uint8_t*, int, size_t, float,
+                                 std::vector<std::pair<float, int>>*, bool);
+template void get_top_n<float>(float*, int, size_t, float,
+                               std::vector<std::pair<float, int>>*, bool);
+
+}  // namespace label_image
+}  // namespace tflite
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_GET_TOP_N_H
diff --git a/tensorflow/contrib/lite/examples/label_image/get_top_n_impl.h b/tensorflow/contrib/lite/examples/label_image/get_top_n_impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..e416fbd39b125ea65d1155b19ab0967a9062e71a
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/label_image/get_top_n_impl.h
@@ -0,0 +1,70 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_GET_TOP_N_IMPL_H
+#define TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_GET_TOP_N_IMPL_H
+
+#include <algorithm>
+#include <queue>
+
+namespace tflite {
+namespace label_image {
+
+extern bool input_floating;
+
+// Returns the top N confidence values over threshold in the provided vector,
+// sorted by confidence in descending order.
+template <class T>
+void get_top_n(T* prediction, int prediction_size, size_t num_results,
+               float threshold, std::vector<std::pair<float, int>>* top_results,
+               bool input_floating) {
+  // Will contain top N results in ascending order.
+  std::priority_queue<std::pair<float, int>, std::vector<std::pair<float, int>>,
+                      std::greater<std::pair<float, int>>>
+      top_result_pq;
+
+  const long count = prediction_size;  // NOLINT(runtime/int)
+  for (int i = 0; i < count; ++i) {
+    float value;
+    if (input_floating)
+      value = prediction[i];
+    else
+      value = prediction[i] / 255.0;
+    // Only add it if it beats the threshold and has a chance at being in
+    // the top N.
+    if (value < threshold) {
+      continue;
+    }
+
+    top_result_pq.push(std::pair<float, int>(value, i));
+
+    // If at capacity, kick the smallest value out.
+    if (top_result_pq.size() > num_results) {
+      top_result_pq.pop();
+    }
+  }
+
+  // Copy to output vector and reverse into descending order.
+  while (!top_result_pq.empty()) {
+    top_results->push_back(top_result_pq.top());
+    top_result_pq.pop();
+  }
+  std::reverse(top_results->begin(), top_results->end());
+}
+
+}  // namespace label_image
+}  // namespace tflite
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_GET_TOP_N_IMPL_H
diff --git a/tensorflow/contrib/lite/examples/label_image/label_image.cc b/tensorflow/contrib/lite/examples/label_image/label_image.cc
new file mode 100644
index 0000000000000000000000000000000000000000..4d2e1ce0bc751667393c4b38acc0517980c9f02a
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/label_image/label_image.cc
@@ -0,0 +1,300 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <cstdarg>
+#include <cstdio>
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include <fcntl.h>      // NOLINT(build/include_order)
+#include <getopt.h>     // NOLINT(build/include_order)
+#include <sys/time.h>   // NOLINT(build/include_order)
+#include <sys/types.h>  // NOLINT(build/include_order)
+#include <sys/uio.h>    // NOLINT(build/include_order)
+#include <unistd.h>     // NOLINT(build/include_order)
+
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/model.h"
+#include "tensorflow/contrib/lite/optional_debug_tools.h"
+#include "tensorflow/contrib/lite/string_util.h"
+
+#include "tensorflow/contrib/lite/examples/label_image/bitmap_helpers.h"
+#include "tensorflow/contrib/lite/examples/label_image/get_top_n.h"
+
+#define LOG(x) std::cerr
+
+namespace tflite {
+namespace label_image {
+
+double get_us(struct timeval t) { return (t.tv_sec * 1000000 + t.tv_usec); }
+
+// Takes a file name, and loads a list of labels from it, one per line, and
+// returns a vector of the strings. It pads with empty strings so the length
+// of the result is a multiple of 16, because our model expects that.
+TfLiteStatus ReadLabelsFile(const string& file_name,
+                            std::vector<string>* result,
+                            size_t* found_label_count) {
+  std::ifstream file(file_name);
+  if (!file) {
+    LOG(FATAL) << "Labels file " << file_name << " not found\n";
+    return kTfLiteError;
+  }
+  result->clear();
+  string line;
+  while (std::getline(file, line)) {
+    result->push_back(line);
+  }
+  *found_label_count = result->size();
+  const int padding = 16;
+  while (result->size() % padding) {
+    result->emplace_back();
+  }
+  return kTfLiteOk;
+}
+
+void RunInference(Settings* s) {
+  if (!s->model_name.c_str()) {
+    LOG(ERROR) << "no model file name\n";
+    exit(-1);
+  }
+
+  std::unique_ptr<tflite::FlatBufferModel> model;
+  std::unique_ptr<tflite::Interpreter> interpreter;
+  model = tflite::FlatBufferModel::BuildFromFile(s->model_name.c_str());
+  if (!model) {
+    LOG(FATAL) << "\nFailed to mmap model " << s->model_name << "\n";
+    exit(-1);
+  }
+  LOG(INFO) << "Loaded model " << s->model_name << "\n";
+  model->error_reporter();
+  LOG(INFO) << "resolved reporter\n";
+
+  tflite::ops::builtin::BuiltinOpResolver resolver;
+
+  tflite::InterpreterBuilder(*model, resolver)(&interpreter);
+  if (!interpreter) {
+    LOG(FATAL) << "Failed to construct interpreter\n";
+    exit(-1);
+  }
+
+  interpreter->UseNNAPI(s->accel);
+
+  if (s->verbose) {
+    LOG(INFO) << "tensors size: " << interpreter->tensors_size() << "\n";
+    LOG(INFO) << "nodes size: " << interpreter->nodes_size() << "\n";
+    LOG(INFO) << "inputs: " << interpreter->inputs().size() << "\n";
+    LOG(INFO) << "input(0) name: " << interpreter->GetInputName(0) << "\n";
+
+    int t_size = interpreter->tensors_size();
+    for (int i = 0; i < t_size; i++) {
+      if (interpreter->tensor(i)->name)
+        LOG(INFO) << i << ": " << interpreter->tensor(i)->name << ", "
+                  << interpreter->tensor(i)->bytes << ", "
+                  << interpreter->tensor(i)->type << ", "
+                  << interpreter->tensor(i)->params.scale << ", "
+                  << interpreter->tensor(i)->params.zero_point << "\n";
+    }
+  }
+
+  if (s->number_of_threads != -1) {
+    interpreter->SetNumThreads(s->number_of_threads);
+  }
+
+  int image_width = 224;
+  int image_height = 224;
+  int image_channels = 3;
+  uint8_t* in = read_bmp(s->input_bmp_name, &image_width, &image_height,
+                         &image_channels, s);
+
+  int input = interpreter->inputs()[0];
+  if (s->verbose) LOG(INFO) << "input: " << input << "\n";
+
+  const std::vector<int> inputs = interpreter->inputs();
+  const std::vector<int> outputs = interpreter->outputs();
+
+  if (s->verbose) {
+    LOG(INFO) << "number of inputs: " << inputs.size() << "\n";
+    LOG(INFO) << "number of outputs: " << outputs.size() << "\n";
+  }
+
+  if (interpreter->AllocateTensors() != kTfLiteOk) {
+    LOG(FATAL) << "Failed to allocate tensors!";
+  }
+
+  if (s->verbose) PrintInterpreterState(interpreter.get());
+
+  // get input dimension from the input tensor metadata
+  // assuming one input only
+  TfLiteIntArray* dims = interpreter->tensor(input)->dims;
+  int wanted_height = dims->data[1];
+  int wanted_width = dims->data[2];
+  int wanted_channels = dims->data[3];
+
+  if (s->input_floating) {
+    downsize<float>(interpreter->typed_tensor<float>(input), in, image_height,
+                    image_width, image_channels, wanted_height, wanted_width,
+                    wanted_channels, s);
+  } else {
+    downsize<uint8_t>(interpreter->typed_tensor<uint8_t>(input), in,
+                      image_height, image_width, image_channels, wanted_height,
+                      wanted_width, wanted_channels, s);
+  }
+
+  struct timeval start_time, stop_time;
+  gettimeofday(&start_time, NULL);
+  for (int i = 0; i < s->loop_count; i++) {
+    if (interpreter->Invoke() != kTfLiteOk) {
+      LOG(FATAL) << "Failed to invoke tflite!\n";
+    }
+  }
+  gettimeofday(&stop_time, NULL);
+  LOG(INFO) << "invoked \n";
+  LOG(INFO) << "average time: "
+            << (get_us(stop_time) - get_us(start_time)) / (s->loop_count * 1000)
+            << " ms \n";
+
+  const int output_size = 1000;
+  const size_t num_results = 5;
+  const float threshold = 0.001f;
+
+  std::vector<std::pair<float, int>> top_results;
+
+  if (s->input_floating) {
+    get_top_n<float>(interpreter->typed_output_tensor<float>(0), output_size,
+                     num_results, threshold, &top_results, s->input_floating);
+  } else {
+    get_top_n<uint8_t>(interpreter->typed_output_tensor<uint8_t>(0),
+                       output_size, num_results, threshold, &top_results,
+                       s->input_floating);
+  }
+
+  std::vector<string> labels;
+  size_t label_count;
+
+  if (ReadLabelsFile(s->labels_file_name, &labels, &label_count) != kTfLiteOk)
+    exit(-1);
+
+  for (const auto& result : top_results) {
+    const float confidence = result.first;
+    const int index = result.second;
+    LOG(INFO) << confidence << ": " << index << " " << labels[index] << "\n";
+  }
+}
+
+void display_usage() {
+  LOG(INFO) << "label_image\n"
+            << "--accelerated, -a: [0|1], use Android NNAPI or note\n"
+            << "--count, -c: loop interpreter->Invoke() for certain times\n"
+            << "--input_floating, -f: [0|1] type of input layer is floating "
+               "point numbers\n"
+            << "--input_mean, -b: input mean\n"
+            << "--input_std, -s: input standard deviation\n"
+            << "--image, -i: image_name.bmp\n"
+            << "--labels, -l: labels for the model\n"
+            << "--tflite_mode, -m: model_name.tflite\n"
+            << "--threads, -t: number of threads\n"
+            << "--verbose, -v: [0|1] print more information\n"
+            << "\n";
+}
+
+int Main(int argc, char** argv) {
+  Settings s;
+
+  int c;
+  while (1) {
+    static struct option long_options[] = {
+        {"accelerated", required_argument, 0, 'a'},
+        {"count", required_argument, 0, 'c'},
+        {"input_floating", required_argument, 0, 'f'},
+        {"verbose", required_argument, 0, 'v'},
+        {"image", required_argument, 0, 'i'},
+        {"labels", required_argument, 0, 'l'},
+        {"tflite_model", required_argument, 0, 'm'},
+        {"threads", required_argument, 0, 't'},
+        {"input_mean", required_argument, 0, 'b'},
+        {"input_std", required_argument, 0, 's'},
+        {0, 0, 0, 0}};
+
+    /* getopt_long stores the option index here. */
+    int option_index = 0;
+
+    c = getopt_long(argc, argv, "a:b:c:f:i:l:m:s:t:v:", long_options,
+                    &option_index);
+
+    /* Detect the end of the options. */
+    if (c == -1) break;
+
+    switch (c) {
+      case 'a':
+        s.accel = strtol(  // NOLINT(runtime/deprecated_fn)
+            optarg, (char**)NULL, 10);
+        break;
+      case 'b':
+        s.input_mean = strtod(optarg, NULL);
+        break;
+      case 'c':
+        s.loop_count = strtol(  // NOLINT(runtime/deprecated_fn)
+            optarg, (char**)NULL, 10);
+        break;
+      case 'f':
+        s.input_floating = strtol(  // NOLINT(runtime/deprecated_fn)
+            optarg, (char**)NULL, 10);
+        s.input_layer_type = "float";
+        break;
+      case 'i':
+        s.input_bmp_name = optarg;
+        break;
+      case 'l':
+        s.labels_file_name = optarg;
+        break;
+      case 'm':
+        s.model_name = optarg;
+        break;
+      case 's':
+        s.input_std = strtod(optarg, NULL);
+        break;
+      case 't':
+        s.number_of_threads = strtol(  // NOLINT(runtime/deprecated_fn)
+            optarg, (char**)NULL, 10);
+        break;
+      case 'v':
+        s.verbose = strtol(  // NOLINT(runtime/deprecated_fn)
+            optarg, (char**)NULL, 10);
+        break;
+      case 'h':
+      case '?':
+        /* getopt_long already printed an error message. */
+        display_usage();
+        exit(-1);
+      default:
+        exit(-1);
+    }
+  }
+  RunInference(&s);
+  return 0;
+}
+
+}  // namespace label_image
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  return tflite::label_image::Main(argc, argv);
+}
diff --git a/tensorflow/contrib/lite/examples/label_image/label_image.h b/tensorflow/contrib/lite/examples/label_image/label_image.h
new file mode 100644
index 0000000000000000000000000000000000000000..ce98e06fc162a9588707eae701e2fcb8d648a4e4
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/label_image/label_image.h
@@ -0,0 +1,36 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_LABEL_IMAGE_H
+#define TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_LABEL_IMAGE_H
+
+#include <string>
+#include "tensorflow/contrib/lite/string.h"
+
+struct Settings {
+  bool verbose = false;
+  bool accel = false;
+  bool input_floating = false;
+  int loop_count = 1;
+  float input_mean = 127.5f;
+  float input_std = 127.5f;
+  string model_name = "./mobilenet_quant_v1_224.tflite";
+  string input_bmp_name = "./grace_hopper.bmp";
+  string labels_file_name = "./labels.txt";
+  string input_layer_type = "uint8_t";
+  int number_of_threads = 4;
+};
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXAMPLES_LABEL_IMAGE_LABEL_IMAGE_H
diff --git a/tensorflow/contrib/lite/examples/label_image/label_image.md b/tensorflow/contrib/lite/examples/label_image/label_image.md
new file mode 100644
index 0000000000000000000000000000000000000000..d6019d673f1b15429e69b57e8dc9eeaad2825bc3
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/label_image/label_image.md
@@ -0,0 +1,74 @@
+label_image for TensorFlow Lite inspired by TensorFlow's label_image.
+ 
+To build it for android ARMv8:
+```
+> bazel build --cxxopt=-std=c++11 \
+  --crosstool_top=//external:android/crosstool \
+  --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
+  --cpu=arm64-v8a \
+  //tensorflow/contrib/lite/examples/label_image:label_image
+```
+or
+```
+> bazel build --config android_arm64 --cxxopt=-std=c++11 \
+  //tensorflow/contrib/lite/examples/label_image:label_image
+```
+
+To build it for android arm-v7a:
+```
+> bazel build --cxxopt=-std=c++11 \
+  --crosstool_top=//external:android/crosstool \
+  --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
+  --cpu=armeabi-v7a \
+  //tensorflow/contrib/lite/examples/label_image:label_image
+```
+or
+```
+> bazel build --config android_arm --cxxopt=-std=c++11 \
+  //tensorflow/contrib/lite/examples/label_image:label_image
+```
+
+Build it for desktop machines (tested on Ubuntu and OS X)
+```
+> bazel build --config opt --cxxopt=-std=c++11 //tensorflow/contrib/lite/examples/label_image:label_image
+```
+To run it. Prepare `./mobilenet_quant_v1_224.tflite`, `./grace_hopper.bmp`, and `./labels.txt`.
+
+Run it:
+```
+> ./label_image                                        
+Loaded model ./mobilenet_quant_v1_224.tflite
+resolved reporter
+invoked
+average time: 100.986 ms 
+0.439216: 653 military uniform
+0.372549: 458 bow tie
+0.0705882: 466 bulletproof vest
+0.0235294: 514 cornet
+0.0196078: 835 suit
+```
+Run `interpreter->Invoker()` 100 times:
+```
+> ./label_image   -c 100                               
+Loaded model ./mobilenet_quant_v1_224.tflite
+resolved reporter
+invoked
+average time: 33.4694 ms
+...
+```
+
+Run a floating point (`mobilenet_v1_1.0_224.tflite`) model,
+```
+> ./label_image -f 1 -m mobilenet_v1_1.0_224.tflite
+Loaded model mobilenet_v1_1.0_224.tflite
+resolved reporter
+invoked
+average time: 263.493 ms 
+0.88615: 653 military uniform
+0.0422316: 440 bearskin
+0.0109948: 466 bulletproof vest
+0.0105327: 401 academic gown
+0.00947104: 723 ping-pong bal
+```
+
+See the source code for other command line options.
diff --git a/tensorflow/contrib/lite/examples/label_image/label_image_test.cc b/tensorflow/contrib/lite/examples/label_image/label_image_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ce35483f76e8f40ced79e1ee30774c62d0eba94e
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/label_image/label_image_test.cc
@@ -0,0 +1,61 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include "tensorflow/contrib/lite/examples/label_image/bitmap_helpers.h"
+#include "tensorflow/contrib/lite/examples/label_image/get_top_n.h"
+#include "tensorflow/contrib/lite/examples/label_image/label_image.h"
+
+using ::testing::ElementsAreArray;
+
+namespace tflite {
+namespace label_image {
+
+TEST(LabelImageTest, GraceHopper) {
+  std::string lena_file =
+      "tensorflow/contrib/lite/examples/label_image/testdata/grace_hopper.bmp";
+  int height, width, channels;
+  Settings s;
+  uint8_t *data;
+
+  data = read_bmp(lena_file, &width, &height, &channels, &s);
+  ASSERT_EQ(height, 606);
+  ASSERT_EQ(width, 517);
+  ASSERT_EQ(channels, 3);
+
+  uint8_t *out = new uint8_t[606 * 517 * 3];
+  downsize<uint8_t>(out, data, 606, 517, 3, 214, 214, 3, &s);
+  ASSERT_EQ(out[0], 0x15);
+  ASSERT_EQ(out[214 * 214 * 3 - 1], 0x12);
+}
+
+TEST(LabelImageTest, GetTopN) {
+  uint8_t in[] = {1, 1, 2, 2, 4, 4, 16, 32, 128, 64};
+
+  std::vector<std::pair<float, int>> top_results;
+  get_top_n<uint8_t>(in, 10, 5, 0.025, &top_results, false);
+  ASSERT_EQ(top_results.size(), 4);
+  ASSERT_EQ(top_results[0].second, 8);
+}
+
+}  // namespace label_image
+}  // namespace tflite
+
+int main(int argc, char **argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tensorflow/contrib/lite/examples/label_image/testdata/grace_hopper.bmp b/tensorflow/contrib/lite/examples/label_image/testdata/grace_hopper.bmp
new file mode 100644
index 0000000000000000000000000000000000000000..0d94cd3e930a138b7c20308f5ba375576484d48b
Binary files /dev/null and b/tensorflow/contrib/lite/examples/label_image/testdata/grace_hopper.bmp differ
diff --git a/tensorflow/contrib/lite/g3doc/ios.md b/tensorflow/contrib/lite/g3doc/ios.md
index ce8b37fbf9b0db5dee60784e85a3cbf0326fddb6..a359b8d4b481dbc15cc86db14eabda5433722b8b 100644
--- a/tensorflow/contrib/lite/g3doc/ios.md
+++ b/tensorflow/contrib/lite/g3doc/ios.md
@@ -45,6 +45,10 @@ into a universal file containing armv7, armv7s, arm64, i386, and x86_64
 architectures. The resulting library is in
 `tensorflow/contrib/lite/gen/lib/libtensorflow-lite.a`.
 
+If you get an error such as `no such file or directory: 'x86_64'` when running 
+`build_ios_universal_lib.sh`: open Xcode > Preferences > Locations, and ensure 
+a value is selected in the "Command Line Tools" dropdown.
+
 ## Using in your own application
 
 You'll need to update various settings in your app to link against TensorFlow
diff --git a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md
index 9ade04eb8c696d7e0e39a8104e02b6e5feec95eb..8e5e694a5cbe7f908572114db33c8257db6151f0 100644
--- a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md
+++ b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md
@@ -329,18 +329,18 @@ Inputs {
   0: a tensor
 }
 Outputs {
-  0: a tensor equivalent to max(0, min(input, 1)
+  0: a tensor equivalent to max(0, input)
 }
 ```
 
-**RELU1**
+**RELU_N1_TO_1**
 
 ```
 Inputs {
   0: a tensor
 }
 Outputs {
-  0: a tensor equivalent to max(-1, min(input, 6)
+  0: a tensor equivalent to max(-1, min(input, 1)
 }
 ```
 
diff --git a/tensorflow/contrib/lite/graph_info.h b/tensorflow/contrib/lite/graph_info.h
new file mode 100644
index 0000000000000000000000000000000000000000..5481aede605453958adb2c2e661c73130046d9f9
--- /dev/null
+++ b/tensorflow/contrib/lite/graph_info.h
@@ -0,0 +1,53 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_GRAPH_INFO_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_GRAPH_INFO_H_
+
+#include <vector>
+
+#include "tensorflow/contrib/lite/context.h"
+
+namespace tflite {
+
+// Basic information about an inference graph, where execution nodes
+// are connected via tensors.
+class GraphInfo {
+ public:
+  virtual ~GraphInfo() {}
+
+  // Total number of tensors in the graph.
+  virtual size_t num_tensors() const = 0;
+
+  // Returns a tensor given its index which is expected to be between 0 and
+  // num_tensors().
+  virtual TfLiteTensor* tensor(size_t index) = 0;
+
+  // Total number of nodes in the graph.
+  virtual size_t num_nodes() const = 0;
+
+  // Returns a node given its index which is expected to be between 0 and
+  // num_nodes().
+  virtual const TfLiteNode& node(size_t index) const = 0;
+
+  // Returns the indices of the input tensors.
+  virtual const std::vector<int>& inputs() const = 0;
+
+  // Returns the indices of the output tensors.
+  virtual const std::vector<int>& outputs() const = 0;
+};
+
+}  // namespace tflite
+
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_GRAPH_INFO_H_
diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc
index 954e236ac8f0c8c59a9d20d62e66b3aa1164ecc1..5f5981e45a20a2c79ea1a2ba08345e831ce194da 100644
--- a/tensorflow/contrib/lite/interpreter.cc
+++ b/tensorflow/contrib/lite/interpreter.cc
@@ -18,16 +18,16 @@ limitations under the License.
 #include <cstdarg>
 #include <cstdint>
 #include <cstring>
+#include "tensorflow/contrib/lite/arena_planner.h"
 #include "tensorflow/contrib/lite/context.h"
 #include "tensorflow/contrib/lite/error_reporter.h"
+#include "tensorflow/contrib/lite/graph_info.h"
 #include "tensorflow/contrib/lite/kernels/gemm_support.h"
+#include "tensorflow/contrib/lite/memory_planner.h"
 #include "tensorflow/contrib/lite/nnapi_delegate.h"
 
 namespace {
 
-// Memory allocation tuning
-constexpr const int kDefaultArenaAlignment = 64;
-constexpr const int kDefaultTensorAlignment = 4;
 // std::vector preallocation tuning.
 constexpr const int kSlotsToReserve = 128;
 
@@ -35,10 +35,33 @@ constexpr const int kSlotsToReserve = 128;
 
 namespace tflite {
 
+// A trivial implementation of GraphInfo around the Interpreter.
+class InterpreterInfo : public GraphInfo {
+ public:
+  explicit InterpreterInfo(Interpreter* interpreter)
+      : interpreter_(interpreter) {}
+
+  size_t num_tensors() const override { return interpreter_->tensors_size(); }
+  TfLiteTensor* tensor(size_t index) override {
+    return interpreter_->tensor(index);
+  }
+  size_t num_nodes() const override { return interpreter_->nodes_size(); }
+  const TfLiteNode& node(size_t index) const override {
+    return interpreter_->node_and_registration(index)->first;
+  }
+  const std::vector<int>& inputs() const override {
+    return interpreter_->inputs();
+  }
+  const std::vector<int>& outputs() const override {
+    return interpreter_->outputs();
+  }
+
+ public:
+  Interpreter* interpreter_;
+};
+
 Interpreter::Interpreter(ErrorReporter* error_reporter)
-    : arena_(kDefaultArenaAlignment),
-      persistent_arena_(kDefaultArenaAlignment),
-      error_reporter_(error_reporter ? error_reporter
+    : error_reporter_(error_reporter ? error_reporter
                                      : DefaultErrorReporter()) {
   context_.impl_ = static_cast<void*>(this);
   context_.ResizeTensor = ResizeTensor;
@@ -50,7 +73,7 @@ Interpreter::Interpreter(ErrorReporter* error_reporter)
   // Reserve some space for the tensors to avoid excessive resizing.
   tensors_.reserve(kSlotsToReserve);
   nodes_and_registration_.reserve(kSlotsToReserve);
-  next_allocate_node_id_ = 0;
+  next_node_to_prepare_ = 0;
   UseNNAPI(false);
 }
 
@@ -128,181 +151,6 @@ TfLiteStatus Interpreter::BytesRequired(TfLiteType type, const int* dims,
   return kTfLiteOk;
 }
 
-TfLiteStatus Interpreter::AllocateTensorsWhoseSizesAreKnown() {
-  if (!consistent_) {
-    ReportError(&context_, "AllocateTensors() called on inconsistent model.");
-    return kTfLiteError;
-  }
-  if (next_allocate_node_id_ == nodes_and_registration_.size() && invokable_) {
-    return kTfLiteOk;
-  }
-  allocs_and_refcounts_.resize(context_.tensors_size);
-
-  int new_next_allocate_node_id = next_allocate_node_id_;
-  invokable_ = false;
-
-  // Allocate graph input nodes.
-  if (next_allocate_node_id_ == 0) {
-    for (int i = 0; i < inputs_.size(); ++i) {
-      int tensor_index = inputs_[i];
-      if (tensor_index == kOptionalTensor) {
-        continue;
-      }
-      TfLiteTensor& tensor = context_.tensors[tensor_index];
-      if (tensor.allocation_type == kTfLiteArenaRw) {
-        TF_LITE_ENSURE_OK(
-            &context_,
-            arena_.Allocate(&context_, kDefaultTensorAlignment, tensor.bytes,
-                            &allocs_and_refcounts_[tensor_index].alloc));
-      }
-    }
-    // Add 1 to output tensors, so they will not get overwritten.
-    for (int i = 0; i < outputs_.size(); ++i) {
-      allocs_and_refcounts_[outputs_[i]].count++;
-    }
-  }
-
-  // Count references to node input tensors, and resize node-referenced tensors
-  // until we encounter a node that has a dynamic output tensor.
-  for (int k = next_allocate_node_id_; k < nodes_and_registration_.size();
-       k++) {
-    new_next_allocate_node_id++;
-    TfLiteNode& node = nodes_and_registration_[k].first;
-    const TfLiteRegistration& registration = nodes_and_registration_[k].second;
-    if (OpPrepare(registration, &node) == kTfLiteError) {
-      return kTfLiteError;
-    }
-
-    TfLiteIntArray* node_inputs = node.inputs;
-    for (int i = 0; i < node_inputs->size; ++i) {
-      int tensor_index = node_inputs->data[i];
-      if (tensor_index != kOptionalTensor) {
-        allocs_and_refcounts_[node_inputs->data[i]].count++;
-      }
-    }
-
-    // Discontinue if the node has dynamic outputs.
-    bool has_unallocated_dynamic_tensor = false;
-    TfLiteIntArray* node_outputs = node.outputs;
-    for (int i = 0; i < node_outputs->size; ++i) {
-      TfLiteTensor& tensor = context_.tensors[node_outputs->data[i]];
-      if (tensor.allocation_type == kTfLiteDynamic) {
-        has_unallocated_dynamic_tensor = true;
-        break;
-      }
-    }
-    if (has_unallocated_dynamic_tensor) {
-      break;
-    }
-  }
-
-  // Allocate graph persistent outputs, e.g. RNN cell states, etc.
-  for (int k = next_allocate_node_id_; k < new_next_allocate_node_id; k++) {
-    TfLiteNode& node = nodes_and_registration_[k].first;
-
-    // Go through output tensors and allocate the persistent ones first.
-    TfLiteIntArray* node_outputs = node.outputs;
-    for (int i = 0; i < node_outputs->size; ++i) {
-      int tensor_index = node_outputs->data[i];
-      TfLiteTensor& tensor = context_.tensors[tensor_index];
-      if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
-        TF_LITE_ENSURE_OK(&context_,
-                          persistent_arena_.Allocate(
-                              &context_, kDefaultTensorAlignment, tensor.bytes,
-                              &allocs_and_refcounts_[tensor_index].alloc));
-      }
-    }
-  }
-
-  // Go through the graph in execution order.
-  for (int k = next_allocate_node_id_; k < new_next_allocate_node_id; k++) {
-    TfLiteNode& node = nodes_and_registration_[k].first;
-
-    // First allocate output tensors.
-    TfLiteIntArray* node_outputs = node.outputs;
-    for (int i = 0; i < node_outputs->size; ++i) {
-      int tensor_index = node_outputs->data[i];
-      TfLiteTensor& tensor = context_.tensors[tensor_index];
-      if (tensor.allocation_type == kTfLiteArenaRw) {
-        TF_LITE_ENSURE_OK(
-            &context_,
-            arena_.Allocate(&context_, kDefaultTensorAlignment, tensor.bytes,
-                            &allocs_and_refcounts_[tensor_index].alloc));
-      }
-    }
-    // Then the temporaries, in two passes. First allocate them all, them
-    // deallocate them.
-    TfLiteIntArray* node_temporaries = node.temporaries;
-    for (int i = 0; i < node_temporaries->size; ++i) {
-      int tensor_index = node_temporaries->data[i];
-      TfLiteTensor& tensor = context_.tensors[tensor_index];
-      if (tensor.allocation_type == kTfLiteArenaRw) {
-        TF_LITE_ENSURE_OK(
-            &context_,
-            arena_.Allocate(&context_, kDefaultTensorAlignment, tensor.bytes,
-                            &allocs_and_refcounts_[tensor_index].alloc));
-      }
-    }
-    for (int i = 0; i < node_temporaries->size; ++i) {
-      int tensor_index = node_temporaries->data[i];
-      TfLiteTensor& tensor = context_.tensors[tensor_index];
-      allocs_and_refcounts_[tensor_index].count--;
-      if (tensor.allocation_type == kTfLiteArenaRw &&
-          allocs_and_refcounts_[tensor_index].count == 0) {
-        TF_LITE_ENSURE_OK(
-            &context_,
-            arena_.Deallocate(&context_,
-                              allocs_and_refcounts_[tensor_index].alloc));
-      }
-    }
-
-    // Then process the node's inputs.
-    TfLiteIntArray* node_inputs = node.inputs;
-    for (int i = 0; i < node_inputs->size; ++i) {
-      int tensor_index = node_inputs->data[i];
-      if (tensor_index == kOptionalTensor) {
-        continue;
-      }
-      TfLiteTensor& tensor = context_.tensors[tensor_index];
-
-      // Decrease reference count and deallocate if not needed anymore.
-      allocs_and_refcounts_[tensor_index].count--;
-      if (tensor.allocation_type == kTfLiteArenaRw &&
-          allocs_and_refcounts_[tensor_index].count == 0) {
-        TF_LITE_ENSURE_OK(
-            &context_,
-            arena_.Deallocate(&context_,
-                              allocs_and_refcounts_[tensor_index].alloc));
-      }
-    }
-  }
-
-  // Resize the buffer and commit the arena.
-  TF_LITE_ENSURE_OK(&context_, arena_.Commit(&context_));
-  TF_LITE_ENSURE_OK(&context_, persistent_arena_.Commit(&context_));
-
-  // Rewire the tensors to use the underlying arena buffer.
-  for (int i = 0; i < context_.tensors_size; ++i) {
-    TfLiteTensor& tensor = context_.tensors[i];
-    if (tensor.allocation_type == kTfLiteArenaRw) {
-      TF_LITE_ENSURE_OK(
-          &context_,
-          arena_.ResolveAlloc(&context_, allocs_and_refcounts_[i].alloc,
-                              &tensor.data.raw));
-    }
-    if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
-      TF_LITE_ENSURE_OK(
-          &context_,
-          persistent_arena_.ResolveAlloc(
-              &context_, allocs_and_refcounts_[i].alloc, &tensor.data.raw));
-    }
-  }
-
-  invokable_ = true;
-  next_allocate_node_id_ = new_next_allocate_node_id;
-  return kTfLiteOk;
-}
-
 namespace {
 TfLiteIntArray* convertVectorToTfLiteIntArray(const std::vector<int>& x) {
   TfLiteIntArray* lite = TfLiteIntArrayCreate(x.size());
@@ -312,11 +160,19 @@ TfLiteIntArray* convertVectorToTfLiteIntArray(const std::vector<int>& x) {
 }  // namespace
 
 TfLiteStatus Interpreter::AllocateTensors() {
-  next_allocate_node_id_ = 0;
-  TF_LITE_ENSURE_OK(&context_, arena_.Clear());
-  TF_LITE_ENSURE_OK(&context_, persistent_arena_.Clear());
-  allocs_and_refcounts_.clear();
-  return AllocateTensorsWhoseSizesAreKnown();
+  next_node_to_prepare_ = 0;
+  if (memory_planner_) {
+    TF_LITE_ENSURE_STATUS(memory_planner_->ResetAllocations());
+  }
+
+  if (!consistent_) {
+    ReportError(&context_, "AllocateTensors() called on inconsistent model.");
+    return kTfLiteError;
+  }
+
+  TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors());
+  invokable_ = true;
+  return kTfLiteOk;
 }
 
 TfLiteStatus Interpreter::AddNodeWithParameters(
@@ -372,6 +228,57 @@ TfLiteStatus Interpreter::ResizeInputTensor(int tensor_index,
   return ResizeTensorImpl(&context_.tensors[tensor_index], dims_lite);
 }
 
+// Returns true if at least one tensor in the given list is kTfLiteDynamic.
+bool HasDynamicTensor(const TfLiteContext& context,
+                      const TfLiteIntArray* tensors) {
+  for (int i = 0; i < tensors->size; ++i) {
+    const TfLiteTensor& tensor = context.tensors[tensors->data[i]];
+    if (tensor.allocation_type == kTfLiteDynamic) {
+      return true;
+    }
+  }
+  return false;
+}
+
+TfLiteStatus Interpreter::PrepareOpsStartingAt(int first_node,
+                                               int* last_node_prepared) {
+  for (int i = first_node; i < nodes_and_registration_.size(); i++) {
+    TfLiteNode& node = nodes_and_registration_[i].first;
+    const TfLiteRegistration& registration = nodes_and_registration_[i].second;
+    if (OpPrepare(registration, &node) == kTfLiteError) {
+      return kTfLiteError;
+    }
+
+    *last_node_prepared = i;
+
+    // Discontinue if the node has dynamic outputs. Note that we don't
+    // stop for dynamic temporary tensors since they won't affect the
+    // sizes of other tensors in the graph.
+    if (HasDynamicTensor(context_, node.outputs)) {
+      break;
+    }
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus Interpreter::PrepareOpsAndTensors() {
+  if (!memory_planner_) {
+    memory_planner_.reset(new ArenaPlanner(
+        &context_, std::unique_ptr<GraphInfo>(new InterpreterInfo(this))));
+    memory_planner_->PlanAllocations();
+  }
+
+  int last_node_prepared = 0;
+
+  TF_LITE_ENSURE_STATUS(
+      PrepareOpsStartingAt(next_node_to_prepare_, &last_node_prepared));
+  TF_LITE_ENSURE_STATUS(memory_planner_->ExecuteAllocations(
+      next_node_to_prepare_, last_node_prepared));
+
+  next_node_to_prepare_ = last_node_prepared + 1;
+  return kTfLiteOk;
+}
+
 TfLiteStatus Interpreter::Invoke() {
   if (!consistent_) {
     ReportError(&context_, "Invoke called on model that is not consistent.");
@@ -384,10 +291,8 @@ TfLiteStatus Interpreter::Invoke() {
 
   TfLiteStatus status = kTfLiteOk;
   if (nnapi_delegate_) {
-    if (AllocateTensorsWhoseSizesAreKnown() == kTfLiteError) {
-      return kTfLiteError;
-    }
-    if (next_allocate_node_id_ == nodes_and_registration_.size()) {
+    TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors());
+    if (next_node_to_prepare_ == nodes_and_registration_.size()) {
       TF_LITE_ENSURE_OK(&context_, nnapi_delegate_->Invoke(this));
       return kTfLiteOk;
     } else {
@@ -400,14 +305,17 @@ TfLiteStatus Interpreter::Invoke() {
     }
   }
 
+  // Invocations are always done in node order.
+  // Note that calling Invoke repeatedly will cause the original memory plan to
+  // be reused, unless either ResizeInputTensor() or AllocateTensors() has been
+  // called.
+  // TODO(b/71913981): we should force recalculation in the presence of dynamic
+  // tensors, because they may have new value which in turn may affect shapes
+  // and allocations.
   for (int i = 0; i < nodes_and_registration_.size(); i++) {
-    // Ensure we have allocated up to this node. The point of this is to
-    // allocate as much as possible before running any evaluation, but
-    // dynamic shapes can prevent this from being possible.
-    if (i >= next_allocate_node_id_) {
-      if (AllocateTensorsWhoseSizesAreKnown() == kTfLiteError) {
-        return kTfLiteError;
-      }
+    if (i == next_node_to_prepare_) {
+      TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors());
+      TF_LITE_ENSURE(&context_, next_node_to_prepare_ >= i);
     }
     TfLiteNode& node = nodes_and_registration_[i].first;
     const TfLiteRegistration& registration = nodes_and_registration_[i].second;
diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h
index 65c61e44bee48535f884a3afaddc691972f5e04b..38dd402e8a971fd0aab51e98610ad12131441862 100644
--- a/tensorflow/contrib/lite/interpreter.h
+++ b/tensorflow/contrib/lite/interpreter.h
@@ -23,7 +23,7 @@ limitations under the License.
 #include "tensorflow/contrib/lite/allocation.h"
 #include "tensorflow/contrib/lite/context.h"
 #include "tensorflow/contrib/lite/error_reporter.h"
-#include "tensorflow/contrib/lite/simple_memory_arena.h"
+#include "tensorflow/contrib/lite/memory_planner.h"
 
 namespace tflite {
 
@@ -49,13 +49,6 @@ constexpr TfLiteType typeToTfLiteType<unsigned char>() {
   return kTfLiteUInt8;
 }
 
-struct ArenaAllocRefCount {
-  ArenaAllocRefCount() : alloc(), count(0) {}
-
-  ArenaAlloc alloc;
-  int count;
-};
-
 // Forward declare since NNAPIDelegate uses Interpreter.
 class NNAPIDelegate;
 
@@ -276,9 +269,17 @@ class Interpreter {
     return op_reg.invoke(&context_, node);
   }
 
-  // Allocate tensors whose sizes are known in order of nodes. Discontinue when
-  // we encounter a node that has a dynamic output tensor.
-  TfLiteStatus AllocateTensorsWhoseSizesAreKnown();
+  // Call OpPrepare() for as many ops as possible, allocating memory for their
+  // tensors. If an op containing dynamic tensors is found, preparation will be
+  // postponed until this function is called again. This allows the interpreter
+  // to wait until Invoke() to resolve the sizes of dynamic tensors.
+  TfLiteStatus PrepareOpsAndTensors();
+
+  // Call OpPrepare() for all ops starting at 'first_node'. Stop when a
+  // dynamic tensors is found or all ops have been prepared. Fill
+  // 'last_node_prepared' with the id of the op containing dynamic tensors, or
+  // the last in the graph.
+  TfLiteStatus PrepareOpsStartingAt(int first_node, int* last_node_prepared);
 
   // Tensors needed by the interpreter. Use `AddTensors` to add more blank
   // tensor entries. Note, `tensors_.data()` needs to be synchronized to the
@@ -325,17 +326,6 @@ class Interpreter {
   std::vector<std::pair<TfLiteNode, TfLiteRegistration>>
       nodes_and_registration_;
 
-  // Raw memory buffer that is allocated for all temporary and graph outputs.
-  // that are declared kTfLiteArenaRw.
-  SimpleMemoryArena arena_;
-
-  // Raw memory buffer that is allocated for persistent tensors that are
-  // declared as kTfLiteArenaRwPersistent.
-  SimpleMemoryArena persistent_arena_;
-
-  // Stores allocation and reference counts of all tensors.
-  std::vector<ArenaAllocRefCount> allocs_and_refcounts_;
-
   // Whether the model is consistent. That is to say if the inputs and outputs
   // of every node and the global inputs and outputs are valid indexes into
   // the tensor array.
@@ -356,7 +346,7 @@ class Interpreter {
   // The error reporter delegate that tflite will forward queries errors to.
   ErrorReporter* error_reporter_;
 
-  // Next node to allocate output tensors.
+  // Index of the next node to prepare.
   // During Invoke(), Interpreter will allocate input tensors first, which are
   // known to be fixed size. Then it will allocate outputs from nodes as many
   // as possible. When there is a node that produces dynamic sized tensor.
@@ -364,10 +354,12 @@ class Interpreter {
   // node id, and execute the node to generate the output tensor before continue
   // to allocate successors. This process repeats until all nodes are executed.
   // NOTE: this relies on the order of nodes that is in topological order.
-  int next_allocate_node_id_;
+  int next_node_to_prepare_;
 
   // Whether to delegate to NN API
   std::unique_ptr<NNAPIDelegate> nnapi_delegate_;
+
+  std::unique_ptr<MemoryPlanner> memory_planner_;
 };
 
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/ios_makefile.inc b/tensorflow/contrib/lite/ios_makefile.inc
index bcff7ed9889e95c13294b6cf0d0f4788991a04df..26cfe6c3e286ed603c2183986c697562e846889c 100644
--- a/tensorflow/contrib/lite/ios_makefile.inc
+++ b/tensorflow/contrib/lite/ios_makefile.inc
@@ -30,6 +30,9 @@ ifeq ($(TARGET), IOS)
 		${IPHONEOS_SYSROOT} \
 		-arch $(IOS_ARCH) \
 		-O3
+	ifeq ($(IOS_ARCH), x86_64)
+		CXXFLAGS += -msse4.1
+	endif
 	CCFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \
 		-fembed-bitcode \
 		-mno-thumb \
diff --git a/tensorflow/contrib/lite/java/AndroidManifest.xml b/tensorflow/contrib/lite/java/AndroidManifest.xml
new file mode 100644
index 0000000000000000000000000000000000000000..f705feacbec38ab5152ce52b701320d8f1cd8d3d
--- /dev/null
+++ b/tensorflow/contrib/lite/java/AndroidManifest.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="utf-8"?>
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+          package="org.tensorflow.lite">
+    <application>
+    </application>
+</manifest>
+
diff --git a/tensorflow/contrib/lite/java/BUILD b/tensorflow/contrib/lite/java/BUILD
index 1de28eb52ddb458df0be0a8f9ef453f7caf68654..9a1a888b93ff981b1d14faa7e847e80be1f167f2 100644
--- a/tensorflow/contrib/lite/java/BUILD
+++ b/tensorflow/contrib/lite/java/BUILD
@@ -7,6 +7,16 @@ licenses(["notice"])  # Apache 2.0
 
 load("//tensorflow/java:build_defs.bzl", "JAVACOPTS")
 load("//tensorflow/contrib/lite:build_def.bzl", "tflite_jni_binary")
+load("//tensorflow/contrib/lite/java:aar_with_jni.bzl", "aar_with_jni")
+
+# Building tensorflow-lite.aar including 4 variants of .so
+# To build an aar for release, run below command:
+# bazel build --cxxopt='--std=c++11' -c opt --fat_apk_cpu=x86,x86_64,arm64-v8a,armeabi-v7a \
+# tensorflow/contrib/lite/java:tensorflow-lite
+aar_with_jni(
+    name = "tensorflow-lite",
+    android_library = ":tensorflowlite",
+)
 
 android_library(
     name = "tensorflowlite",
@@ -15,6 +25,7 @@ android_library(
             "src/main/java/org/tensorflow/lite/*.java",
         ],
     ),
+    manifest = "AndroidManifest.xml",
     visibility = ["//visibility:public"],
     deps = [
         ":tflite_runtime",
diff --git a/tensorflow/contrib/lite/java/aar_with_jni.bzl b/tensorflow/contrib/lite/java/aar_with_jni.bzl
new file mode 100644
index 0000000000000000000000000000000000000000..4450bc9085555b3416f51bac07ea94a1240e919c
--- /dev/null
+++ b/tensorflow/contrib/lite/java/aar_with_jni.bzl
@@ -0,0 +1,47 @@
+"""Generate zipped aar file including different variants of .so in jni folder."""
+
+def aar_with_jni(name, android_library):
+  # Generate dummy AndroidManifest.xml for dummy apk usage
+  # (dummy apk is generated by <name>_dummy_app_for_so target below)
+  native.genrule(
+      name = name + "_binary_manifest_generator",
+      outs = [name + "_generated_AndroidManifest.xml"],
+      cmd = """
+cat > $(OUTS) <<EOF
+<manifest
+  xmlns:android="http://schemas.android.com/apk/res/android"
+  package="dummy.package.for.so">
+  <uses-sdk android:minSdkVersion="999"/>
+</manifest>
+EOF
+""",
+  )
+
+  # Generate dummy apk including .so files and later we extract out
+  # .so files and throw away the apk.
+  native.android_binary(
+      name = name + "_dummy_app_for_so",
+      manifest = name + "_generated_AndroidManifest.xml",
+      custom_package = "dummy.package.for.so",
+      deps = [android_library],
+      # In some platforms we don't have an Android SDK/NDK and this target
+      # can't be built. We need to prevent the build system from trying to
+      # use the target in that case.
+      tags = ["manual"],
+  )
+
+  native.genrule(
+      name = name,
+      srcs = [android_library + ".aar", name + "_dummy_app_for_so_unsigned.apk"],
+      outs = [name + ".aar"],
+      tags = ["manual"],
+      cmd = """
+cp $(location {}.aar) $(location :{}.aar)
+chmod +w $(location :{}.aar)
+origdir=$$PWD
+cd $$(mktemp -d)
+unzip $$origdir/$(location :{}_dummy_app_for_so_unsigned.apk) "lib/*"
+cp -r lib jni
+zip -r $$origdir/$(location :{}.aar) jni/*/*.so
+""".format(android_library, name, name, name, name),
+  )
diff --git a/tensorflow/contrib/lite/java/build_aar_for_release.sh b/tensorflow/contrib/lite/java/build_aar_for_release.sh
new file mode 100755
index 0000000000000000000000000000000000000000..fbcb1e7db9a3f9b885505e989b7ff7224f2d2b15
--- /dev/null
+++ b/tensorflow/contrib/lite/java/build_aar_for_release.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+set -e
+set -x
+
+TMPDIR=`mktemp -d`
+trap "rm -rf $TMPDIR" EXIT
+
+VERSION=1.0
+
+BUILDER=bazel
+BASEDIR=tensorflow/contrib/lite
+CROSSTOOL="//external:android/crosstool"
+HOST_CROSSTOOL="@bazel_tools//tools/cpp:toolchain"
+
+BUILD_OPTS="--cxxopt=--std=c++11 -c opt"
+CROSSTOOL_OPTS="--crosstool_top=$CROSSTOOL --host_crosstool_top=$HOST_CROSSTOOL"
+
+test -d $BASEDIR || (echo "Aborting: not at top-level build directory"; exit 1)
+
+function build_basic_aar() {
+  local OUTDIR=$1
+  $BUILDER build $BUILD_OPTS $BASEDIR/java:tensorflowlite.aar
+  unzip -d $OUTDIR $BUILDER-bin/$BASEDIR/java/tensorflowlite.aar
+  # targetSdkVersion is here to prevent the app from requesting spurious
+  # permissions, such as permission to make phone calls. It worked for v1.0,
+  # but minSdkVersion might be the preferred way to handle this.
+  sed -i -e 's/<application>/<uses-sdk android:targetSdkVersion="25"\/><application>/' $OUTDIR/AndroidManifest.xml
+}
+
+function build_arch() {
+  local ARCH=$1
+  local CONFIG=$2
+  local OUTDIR=$3
+  mkdir -p $OUTDIR/jni/$ARCH/
+  $BUILDER build $BUILD_OPTS $CROSSTOOL_OPTS --cpu=$CONFIG \
+    $BASEDIR/java:libtensorflowlite_jni.so
+  cp $BUILDER-bin/$BASEDIR/java/libtensorflowlite_jni.so $OUTDIR/jni/$ARCH/
+}
+
+rm -rf $TMPDIR
+mkdir -p $TMPDIR/jni
+
+build_basic_aar $TMPDIR
+build_arch arm64-v8a arm64-v8a $TMPDIR
+build_arch armeabi-v7a armeabi-v7a $TMPDIR
+build_arch x86 x86 $TMPDIR
+build_arch x86_64 x86_64 $TMPDIR
+
+AAR_FILE=`realpath tflite-${VERSION}.aar`
+(cd $TMPDIR && zip $AAR_FILE -r *)
+echo "New AAR file is $AAR_FILE"
+
diff --git a/tensorflow/contrib/lite/java/demo/README.md b/tensorflow/contrib/lite/java/demo/README.md
index 71b633c5774d93684f651821adad13c378a8243c..2e818f728ef208d30b0eeb27ffd7e3fa0c7c1a2d 100644
--- a/tensorflow/contrib/lite/java/demo/README.md
+++ b/tensorflow/contrib/lite/java/demo/README.md
@@ -8,7 +8,12 @@
      It's easiest with Android Studio.
 
       - You'll need at least SDK version 23.
+      - Make sure to install the latest version of Bazel. Some distributions
+        ship with Bazel 0.5.4, which is too old.
       - Bazel requires Android Build Tools `26.0.1` or higher.
+      - **Bazel is incompatible with NDK revisions 15 and above,** with revision
+        16 being a compile-breaking change. [Download an older version manually
+        instead of using the SDK Manager.](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android#install-bazel-and-android-prerequisites)
       - You also need to install the Android Support Repository, available
         through Android Studio under `Android SDK Manager -> SDK Tools ->
         Android Support Repository`.
@@ -16,10 +21,15 @@
   2. [Edit your `WORKSPACE`](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android#edit-workspace)
      to add SDK and NDK targets.
 
+     NOTE: As long as you have the SDK and NDK installed, the `./configure`
+     script will create these rules for you. Answer "Yes" when the script asks
+     to automatically configure the `./WORKSPACE`.
+
       - Make sure the `api_level` in `WORKSPACE` is set to an SDK version that
         you have installed.
       - By default, Android Studio will install the SDK to `~/Android/Sdk` and
-        the NDK to `~/Android/Sdk/ndk-bundle`.
+        the NDK to `~/Android/Sdk/ndk-bundle` (but the NDK should be a manual
+        download until Bazel supports NDK 16. See bullet points under (1)).
 
 2. Build the app with Bazel. The demo needs C++11:
 
diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java
index e7bad4637041d003c1e507d81c0c30404c587653..e44c5ae6b48eda187079dd3a0a1bc563276d816e 100644
--- a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java
+++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java
@@ -73,6 +73,11 @@ public class ImageClassifier {
 
   /** An array to hold inference results, to be feed into Tensorflow Lite as outputs. */
   private byte[][] labelProbArray = null;
+  /** multi-stage low pass filter * */
+  private float[][] filterLabelProbArray = null;
+
+  private static final int FILTER_STAGES = 3;
+  private static final float FILTER_FACTOR = 0.4f;
 
   private PriorityQueue<Map.Entry<String, Float>> sortedLabels =
       new PriorityQueue<>(
@@ -93,6 +98,7 @@ public class ImageClassifier {
             DIM_BATCH_SIZE * DIM_IMG_SIZE_X * DIM_IMG_SIZE_Y * DIM_PIXEL_SIZE);
     imgData.order(ByteOrder.nativeOrder());
     labelProbArray = new byte[1][labelList.size()];
+    filterLabelProbArray = new float[FILTER_STAGES][labelList.size()];
     Log.d(TAG, "Created a Tensorflow Lite Image Classifier.");
   }
 
@@ -108,11 +114,38 @@ public class ImageClassifier {
     tflite.run(imgData, labelProbArray);
     long endTime = SystemClock.uptimeMillis();
     Log.d(TAG, "Timecost to run model inference: " + Long.toString(endTime - startTime));
+
+    // Smooth the results across frames.
+    applyFilter();
+
+    // Print the results.
     String textToShow = printTopKLabels();
     textToShow = Long.toString(endTime - startTime) + "ms" + textToShow;
     return textToShow;
   }
 
+  void applyFilter() {
+    int numLabels = labelList.size();
+
+    // Low pass filter `labelProbArray` into the first stage of the filter.
+    for (int j = 0; j < numLabels; ++j) {
+      filterLabelProbArray[0][j] +=
+          FILTER_FACTOR * (labelProbArray[0][j] - filterLabelProbArray[0][j]);
+    }
+    // Low pass filter each stage into the next.
+    for (int i = 1; i < FILTER_STAGES; ++i) {
+      for (int j = 0; j < numLabels; ++j) {
+        filterLabelProbArray[i][j] +=
+            FILTER_FACTOR * (filterLabelProbArray[i - 1][j] - filterLabelProbArray[i][j]);
+      }
+    }
+
+    // Copy the last stage filter output back to `labelProbArray`.
+    for (int j = 0; j < numLabels; ++j) {
+      labelProbArray[0][j] = (byte)filterLabelProbArray[FILTER_STAGES - 1][j];
+    }
+  }
+
   /** Closes tflite to release resources. */
   public void close() {
     tflite.close();
@@ -177,7 +210,7 @@ public class ImageClassifier {
     final int size = sortedLabels.size();
     for (int i = 0; i < size; ++i) {
       Map.Entry<String, Float> label = sortedLabels.poll();
-      textToShow = "\n" + label.getKey() + ":" + Float.toString(label.getValue()) + textToShow;
+      textToShow = String.format("\n%s: %4.2f", label.getKey(), label.getValue()) + textToShow;
     }
     return textToShow;
   }
diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
index 1939a078ad8031b99620773c9b91335c4e8f7b22..5ee594dec492ad2fee22e603a6de311b3fed4cac 100644
--- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
+++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
@@ -34,7 +34,7 @@ final class NativeInterpreterWrapper implements AutoCloseable {
   NativeInterpreterWrapper(String modelPath) {
     errorHandle = createErrorReporter(ERROR_BUFFER_SIZE);
     modelHandle = createModel(modelPath, errorHandle);
-    interpreterHandle = createInterpreter(modelHandle);
+    interpreterHandle = createInterpreter(modelHandle, errorHandle);
   }
 
   /**
@@ -46,7 +46,7 @@ final class NativeInterpreterWrapper implements AutoCloseable {
     modelByteBuffer = mappedByteBuffer;
     errorHandle = createErrorReporter(ERROR_BUFFER_SIZE);
     modelHandle = createModelWithBuffer(modelByteBuffer, errorHandle);
-    interpreterHandle = createInterpreter(modelHandle);
+    interpreterHandle = createInterpreter(modelHandle, errorHandle);
   }
 
   /** Releases resources associated with this {@code NativeInterpreterWrapper}. */
@@ -103,11 +103,22 @@ final class NativeInterpreterWrapper implements AutoCloseable {
     return outputs;
   }
 
+  private static native long[] run(
+      long interpreterHandle,
+      long errorHandle,
+      Object[] sizes,
+      int[] dtypes,
+      int[] numsOfBytes,
+      Object[] values);
+
   /** Resizes dimensions of a specific input. */
   void resizeInput(int idx, int[] dims) {
     resizeInput(interpreterHandle, errorHandle, idx, dims);
   }
 
+  private static native void resizeInput(
+      long interpreterHandle, long errorHandle, int inputIdx, int[] dims);
+
   void setUseNNAPI(boolean useNNAPI) {
     useNNAPI(interpreterHandle, useNNAPI);
   }
@@ -245,9 +256,6 @@ final class NativeInterpreterWrapper implements AutoCloseable {
 
   private static native String[] getOutputNames(long interpreterHandle);
 
-  private static native void resizeInput(
-      long interpreterHandle, long errorHandle, int inputIdx, int[] dims);
-
   private static native void useNNAPI(long interpreterHandle, boolean state);
 
   private static native long createErrorReporter(int size);
@@ -256,15 +264,7 @@ final class NativeInterpreterWrapper implements AutoCloseable {
 
   private static native long createModelWithBuffer(MappedByteBuffer modelBuffer, long errorHandle);
 
-  private static native long createInterpreter(long modelHandle);
-
-  private static native long[] run(
-      long interpreterHandle,
-      long errorHandle,
-      Object[] sizes,
-      int[] dtypes,
-      int[] numsOfBytes,
-      Object[] values);
+  private static native long createInterpreter(long modelHandle, long errorHandle);
 
   private static native void delete(long errorHandle, long modelHandle, long interpreterHandle);
 
diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc
index bc6462eb5466e14769f94c5103984f5201b4b8dc..f3f51b668f068ffcd02862a79b72dbae31d31c02 100644
--- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc
+++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc
@@ -307,12 +307,21 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_createModelWithBuffer(
 
 JNIEXPORT jlong JNICALL
 Java_org_tensorflow_lite_NativeInterpreterWrapper_createInterpreter(
-    JNIEnv* env, jclass clazz, jlong model_handle) {
+    JNIEnv* env, jclass clazz, jlong model_handle, jlong error_handle) {
   tflite::FlatBufferModel* model = convertLongToModel(env, model_handle);
   if (model == nullptr) return 0;
+  BufferErrorReporter* error_reporter =
+      convertLongToErrorReporter(env, error_handle);
+  if (error_reporter == nullptr) return 0;
   auto resolver = ::tflite::CreateOpResolver();
   std::unique_ptr<tflite::Interpreter> interpreter;
-  tflite::InterpreterBuilder(*model, *(resolver.get()))(&interpreter);
+  TfLiteStatus status =
+      tflite::InterpreterBuilder(*model, *(resolver.get()))(&interpreter);
+  if (status != kTfLiteOk) {
+    throwException(env, kIllegalArgumentException,
+                   "Cannot create interpreter: %s",
+                   error_reporter->CachedErrorMessage());
+  }
   return reinterpret_cast<jlong>(interpreter.release());
 }
 
diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h
index 430886b7cc04a356d1826843acc1bbebf4189bf7..c52a7e4e439936344be26d5761fb5747db64794a 100644
--- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h
+++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h
@@ -95,11 +95,11 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_createModelWithBuffer(
 /*
  *  Class:     org_tensorflow_lite_NativeInterpreterWrapper
  *  Method:
- *  Signature: (J)J
+ *  Signature: (JJ)J
  */
 JNIEXPORT jlong JNICALL
 Java_org_tensorflow_lite_NativeInterpreterWrapper_createInterpreter(
-    JNIEnv* env, jclass clazz, jlong model_handle);
+    JNIEnv* env, jclass clazz, jlong model_handle, jlong error_handle);
 
 /*
  *  Class:     org_tensorflow_lite_NativeInterpreterWrapper
diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java
index 9a6894f49c0b7278511717d2671648c6d1763e00..473f73816fd3c0a414a2c2e232dec299579fcbb6 100644
--- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java
+++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java
@@ -25,6 +25,7 @@ import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
 /** Unit tests for {@link org.tensorflow.lite.NativeInterpreterWrapper}. */
+// TODO(b/71818425): Generates model files dynamically.
 @RunWith(JUnit4.class)
 public final class NativeInterpreterWrapperTest {
 
@@ -43,6 +44,9 @@ public final class NativeInterpreterWrapperTest {
   private static final String INVALID_MODEL_PATH =
       "tensorflow/contrib/lite/java/src/testdata/invalid_model.bin";
 
+  private static final String MODEL_WITH_CUSTOM_OP_PATH =
+      "tensorflow/contrib/lite/java/src/testdata/with_custom_op.lite";
+
   @Test
   public void testConstructor() {
     NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(FLOAT_MODEL_PATH);
@@ -62,6 +66,18 @@ public final class NativeInterpreterWrapperTest {
     }
   }
 
+  @Test
+  public void testConstructorWithUnresolableCustomOp() {
+    try {
+      NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(MODEL_WITH_CUSTOM_OP_PATH);
+      fail();
+    } catch (IllegalArgumentException e) {
+      assertThat(e)
+          .hasMessageThat()
+          .contains("Cannot create interpreter: Didn't find custom op for name 'Assign'");
+    }
+  }
+
   @Test
   public void testRunWithFloat() {
     NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(FLOAT_MODEL_PATH);
diff --git a/tensorflow/contrib/lite/java/src/testdata/with_custom_op.lite b/tensorflow/contrib/lite/java/src/testdata/with_custom_op.lite
new file mode 100644
index 0000000000000000000000000000000000000000..e775d56d88854ecdf70233262ff5884d224f4373
Binary files /dev/null and b/tensorflow/contrib/lite/java/src/testdata/with_custom_op.lite differ
diff --git a/tensorflow/contrib/lite/kernels/Android.bp b/tensorflow/contrib/lite/kernels/Android.bp
index f077bcfbed9b310491206d0c1b5b56fdddfbe403..de53078c8af2783cc876636ad350d0adb48fb6a9 100644
--- a/tensorflow/contrib/lite/kernels/Android.bp
+++ b/tensorflow/contrib/lite/kernels/Android.bp
@@ -32,26 +32,36 @@ cc_library_static {
         "activations.cc",
         "add.cc",
         "basic_rnn.cc",
+        "batch_to_space_nd.cc",
         "concatenation.cc",
         "conv.cc",
         "depthwise_conv.cc",
+        "div.cc",
         "embedding_lookup.cc",
         "embedding_lookup_sparse.cc",
         "fully_connected.cc",
+        "gather.cc",
         "hashtable_lookup.cc",
         "kernel_util.cc",
         "l2norm.cc",
         "local_response_norm.cc",
         "lsh_projection.cc",
         "lstm.cc",
+        "mean.cc",
         "mul.cc",
+        "pad.cc",
         "pooling.cc",
         "register.cc",
         "reshape.cc",
         "resize_bilinear.cc",
         "skip_gram.cc",
+        "space_to_batch_nd.cc",
         "space_to_depth.cc",
+        "squeeze.cc",
+        "sub.cc",
         "svdf.cc",
+        "transpose.cc",
+        "unidirectional_sequence_rnn.cc",
         "internal/tensor_utils.cc",
         "internal/quantization_util.cc",
         "internal/reference/portable_tensor_utils.cc",
diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD
index bbbfa3e7415bfd7a34dfc7d764da55cac22e7d42..7e9644f36c71ff7e03a04dd01743be811632f077 100644
--- a/tensorflow/contrib/lite/kernels/BUILD
+++ b/tensorflow/contrib/lite/kernels/BUILD
@@ -32,6 +32,7 @@ cc_library(
         "//tensorflow/contrib/lite:framework",
         "//tensorflow/contrib/lite:schema_fbs_version",
         "//tensorflow/contrib/lite:string_util",
+        "//tensorflow/contrib/lite/testing:util",
         "//tensorflow/core:lib",
         "@com_google_googletest//:gtest",
     ],
@@ -76,26 +77,36 @@ cc_library(
         "activations.cc",
         "add.cc",
         "basic_rnn.cc",
+        "batch_to_space_nd.cc",
         "concatenation.cc",
         "conv.cc",
         "depthwise_conv.cc",
+        "div.cc",
         "embedding_lookup.cc",
         "embedding_lookup_sparse.cc",
         "fully_connected.cc",
+        "gather.cc",
         "hashtable_lookup.cc",
         "kernel_util.cc",
         "l2norm.cc",
         "local_response_norm.cc",
         "lsh_projection.cc",
         "lstm.cc",
+        "mean.cc",
         "mul.cc",
+        "pad.cc",
         "pooling.cc",
         "register.cc",
         "reshape.cc",
         "resize_bilinear.cc",
         "skip_gram.cc",
+        "space_to_batch_nd.cc",
         "space_to_depth.cc",
+        "squeeze.cc",
+        "sub.cc",
         "svdf.cc",
+        "transpose.cc",
+        "unidirectional_sequence_rnn.cc",
     ],
     hdrs = [
         "kernel_util.h",
@@ -152,6 +163,44 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "transpose_test",
+    size = "small",
+    srcs = ["transpose_test.cc"],
+    deps = [
+        ":builtin_ops",
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite/kernels:test_util",
+        "//tensorflow/contrib/lite/kernels/internal:reference",
+        "//tensorflow/contrib/lite/kernels/internal:reference_base",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
+tf_cc_test(
+    name = "space_to_batch_nd_test",
+    size = "small",
+    srcs = ["space_to_batch_nd_test.cc"],
+    deps = [
+        ":builtin_ops",
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite/kernels:test_util",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
+tf_cc_test(
+    name = "batch_to_space_nd_test",
+    size = "small",
+    srcs = ["batch_to_space_nd_test.cc"],
+    deps = [
+        ":builtin_ops",
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite/kernels:test_util",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 tf_cc_test(
     name = "concatenation_test",
     size = "small",
@@ -200,6 +249,18 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "unidirectional_sequence_rnn_test",
+    size = "small",
+    srcs = ["unidirectional_sequence_rnn_test.cc"],
+    deps = [
+        ":builtin_ops",
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite/kernels:test_util",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 tf_cc_test(
     name = "l2norm_test",
     size = "small",
@@ -212,6 +273,18 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "mean_test",
+    size = "small",
+    srcs = ["mean_test.cc"],
+    deps = [
+        ":builtin_ops",
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite/kernels:test_util",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 tf_cc_test(
     name = "mul_test",
     size = "small",
@@ -224,6 +297,18 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "pad_test",
+    size = "small",
+    srcs = ["pad_test.cc"],
+    deps = [
+        ":builtin_ops",
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite/kernels:test_util",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 tf_cc_test(
     name = "reshape_test",
     size = "small",
@@ -236,6 +321,19 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "gather_test",
+    size = "small",
+    srcs = ["gather_test.cc"],
+    deps = [
+        ":builtin_ops",
+        "//tensorflow/contrib/lite:builtin_op_data",
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite/kernels:test_util",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 tf_cc_test(
     name = "resize_bilinear_test",
     size = "small",
@@ -395,6 +493,18 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "squeeze_test",
+    size = "small",
+    srcs = ["squeeze_test.cc"],
+    deps = [
+        ":builtin_ops",
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite/kernels:test_util",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/contrib/lite/kernels/activations.cc b/tensorflow/contrib/lite/kernels/activations.cc
index 7ab60a33e5e2ff61bae5f4c6db85ab9c47a391bc..8ac93bc8c8dcfc66d3822e01b6f9b29a3e49c446 100644
--- a/tensorflow/contrib/lite/kernels/activations.cc
+++ b/tensorflow/contrib/lite/kernels/activations.cc
@@ -349,7 +349,7 @@ TfLiteRegistration* Register_RELU() {
   return &r;
 }
 
-TfLiteRegistration* Register_RELU1() {
+TfLiteRegistration* Register_RELU_N1_TO_1() {
   static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr,
                                  activations::GenericPrepare,
                                  activations::Relu1Eval};
diff --git a/tensorflow/contrib/lite/kernels/activations_test.cc b/tensorflow/contrib/lite/kernels/activations_test.cc
index f10aee70170d4a94ed54376fa410b22a60f109af..68d49944e51b043b6b82aa1589d22f6ebed37574 100644
--- a/tensorflow/contrib/lite/kernels/activations_test.cc
+++ b/tensorflow/contrib/lite/kernels/activations_test.cc
@@ -102,7 +102,7 @@ TEST(FloatActivationsOpTest, Relu) {
 }
 
 TEST(FloatActivationsOpTest, Relu1) {
-  FloatActivationsOpModel m(BuiltinOperator_RELU1,
+  FloatActivationsOpModel m(BuiltinOperator_RELU_N1_TO_1,
                             /*input=*/{TensorType_FLOAT32, {1, 2, 4, 1}});
   m.SetInput({
       0.0, -0.6, 0.2, -0.4,  //
@@ -317,7 +317,7 @@ TEST(QuantizedActivationsOpTest, Softmax2D) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/add_test.cc b/tensorflow/contrib/lite/kernels/add_test.cc
index 8e12a837c4954832ff37a6d1ab377bee9e8d5763..306dfc3e803d3df34061767ba9ced032299bfa26 100644
--- a/tensorflow/contrib/lite/kernels/add_test.cc
+++ b/tensorflow/contrib/lite/kernels/add_test.cc
@@ -77,9 +77,10 @@ TEST(FloatAddOpModel, NoActivation) {
   EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1.9, 0.4, 1.0, 1.3}));
 }
 
-TEST(FloatAddOpModel, ActivationRELU1) {
+TEST(FloatAddOpModel, ActivationRELU_N1_TO_1) {
   FloatAddOpModel m({TensorType_FLOAT32, {1, 2, 2, 1}},
-                    {TensorType_FLOAT32, {}}, ActivationFunctionType_RELU1);
+                    {TensorType_FLOAT32, {}},
+                    ActivationFunctionType_RELU_N1_TO_1);
   m.PopulateTensor<float>(m.input1(), {-2.0, 0.2, 0.7, 0.8});
   m.PopulateTensor<float>(m.input2(), {0.1, 0.2, 0.3, 0.5});
   m.Invoke();
@@ -122,7 +123,7 @@ TEST(QuantizedAddOpModel, QuantizedTestsNoActivation) {
   }
 }
 
-TEST(QuantizedAddOpModel, QuantizedTestsActivationRELU1) {
+TEST(QuantizedAddOpModel, QuantizedTestsActivationRELU_N1_TO_1) {
   float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
   std::vector<std::initializer_list<float>> inputs1 = {{-0.8, 0.2, 0.9, 0.7},
                                                        {-0.8, 0.2, 0.7, 0.3}};
@@ -133,7 +134,7 @@ TEST(QuantizedAddOpModel, QuantizedTestsActivationRELU1) {
   for (int i = 0; i < inputs1.size(); ++i) {
     QuantizedAddOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
                           {TensorType_UINT8, {}, -1.0, 1.0},
-                          ActivationFunctionType_RELU1);
+                          ActivationFunctionType_RELU_N1_TO_1);
     m.QuantizeAndPopulate<uint8_t>(m.input1(), inputs1[i]);
     m.QuantizeAndPopulate<uint8_t>(m.input2(), inputs2[i]);
     m.Invoke();
@@ -164,8 +165,7 @@ TEST(QuantizedAddOpModel, QuantizedVariousInputShapes) {
 }  // namespace
 }  // namespace tflite
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
-  tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/basic_rnn_test.cc b/tensorflow/contrib/lite/kernels/basic_rnn_test.cc
index dfa75655bcfe7762c6cc4c9a98a71d529028c03a..5ecccb985e91238f1183c8f94a2b5f468758ce55 100644
--- a/tensorflow/contrib/lite/kernels/basic_rnn_test.cc
+++ b/tensorflow/contrib/lite/kernels/basic_rnn_test.cc
@@ -261,7 +261,7 @@ TEST(FullyConnectedOpTest, BlackBoxTest) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/batch_to_space_nd.cc b/tensorflow/contrib/lite/kernels/batch_to_space_nd.cc
new file mode 100644
index 0000000000000000000000000000000000000000..0eed680fdcc2afc4bc72be55a5e7722310fa4538
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/batch_to_space_nd.cc
@@ -0,0 +1,161 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <string.h>
+#include <vector>
+#include "tensorflow/contrib/lite/builtin_op_data.h"
+#include "tensorflow/contrib/lite/context.h"
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor.h"
+#include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/kernels/op_macros.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace batch_to_space_nd {
+
+// This file has two implementations of BatchToSpaceND.
+enum KernelType {
+  kReference,
+  kGenericOptimized,
+};
+
+struct BatchToSpaceNDContext {
+  BatchToSpaceNDContext(TfLiteContext* context, TfLiteNode* node) {
+    params = reinterpret_cast<TfLiteBatchToSpaceNDParams*>(node->builtin_data);
+    input = GetInput(context, node, 0);
+    output = GetOutput(context, node, 0);
+  }
+  TfLiteBatchToSpaceNDParams* params;
+  TfLiteTensor* input;
+  TfLiteTensor* output;
+};
+
+// Currently, only 4D NHWC input/output op_context are supported.
+// The 4D array need to have exactly 2 spatial dimensions.
+// TODO(ycling): Support arbitrary dimension in BatchToSpaceND.
+const int kInputDimensionNum = 4;
+const int kOutputDimensionNum = 4;
+const int kSpatialDimensionNum = 2;
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  // The 2nd tensor (block_shape) and the 3rd tensor (crops) are ignored now.
+  TF_LITE_ENSURE(context, NumInputs(node) >= 1 && NumInputs(node) <= 3);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  BatchToSpaceNDContext op_context(context, node);
+  TF_LITE_ENSURE_EQ(context, NumDimensions(op_context.input),
+                    kInputDimensionNum);
+  TF_LITE_ENSURE_EQ(context, op_context.params->num_spatial_dimensions,
+                    kSpatialDimensionNum);
+  TF_LITE_ENSURE_EQ(context, op_context.input->type, op_context.output->type);
+
+  const TfLiteIntArray* input_size = op_context.input->dims;
+  const int* block_shape = op_context.params->block_shape;
+
+  // Number of batch must be multiple of (block_shape[0] * block_shape[1]).
+  TF_LITE_ENSURE_EQ(context,
+                    input_size->data[0] % (block_shape[0] * block_shape[1]), 0);
+
+  const int output_batch_size =
+      input_size->data[0] / (block_shape[0] * block_shape[1]);
+  const int output_height = input_size->data[1] * block_shape[0];
+  const int output_width = input_size->data[2] * block_shape[1];
+  const int output_channel_size = input_size->data[3];
+
+  TfLiteIntArray* output_size = TfLiteIntArrayCreate(kOutputDimensionNum);
+  output_size->data[0] = output_batch_size;
+  output_size->data[1] = output_height;
+  output_size->data[2] = output_width;
+  output_size->data[3] = output_channel_size;
+
+  return context->ResizeTensor(context, op_context.output, output_size);
+}
+
+template <KernelType kernel_type>
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  BatchToSpaceNDContext op_context(context, node);
+
+  int block_shape_dims_array[1] = {kSpatialDimensionNum};
+  Dims<4> block_shape_dims = GetTensorDims(block_shape_dims_array, 1);
+
+#define TF_LITE_BATCH_TO_SPACE_ND(type, scalar)                          \
+  type::BatchToSpaceND(GetTensorData<scalar>(op_context.input),          \
+                       GetTensorDims(op_context.input),                  \
+                       op_context.params->block_shape, block_shape_dims, \
+                       GetTensorData<scalar>(op_context.output),         \
+                       GetTensorDims(op_context.output))
+  switch (op_context.input->type) {  // Already know in/out types are same.
+    case kTfLiteFloat32:
+      if (kernel_type == kReference) {
+        TF_LITE_BATCH_TO_SPACE_ND(reference_ops, float);
+      } else {
+        TF_LITE_BATCH_TO_SPACE_ND(optimized_ops, float);
+      }
+      break;
+    case kTfLiteUInt8:
+      if (kernel_type == kReference) {
+        TF_LITE_BATCH_TO_SPACE_ND(reference_ops, uint8_t);
+      } else {
+        TF_LITE_BATCH_TO_SPACE_ND(optimized_ops, uint8_t);
+      }
+      break;
+    case kTfLiteInt32:
+      if (kernel_type == kReference) {
+        TF_LITE_BATCH_TO_SPACE_ND(reference_ops, int32_t);
+      } else {
+        TF_LITE_BATCH_TO_SPACE_ND(optimized_ops, int32_t);
+      }
+      break;
+    case kTfLiteInt64:
+      if (kernel_type == kReference) {
+        TF_LITE_BATCH_TO_SPACE_ND(reference_ops, int64_t);
+      } else {
+        TF_LITE_BATCH_TO_SPACE_ND(optimized_ops, int64_t);
+      }
+      break;
+    default:
+      context->ReportError(context,
+                           "Type is currently not supported by BatchToSpace.");
+      return kTfLiteError;
+  }
+#undef TF_LITE_BATCH_TO_SPACE_ND
+  return kTfLiteOk;
+}
+
+}  // namespace batch_to_space_nd
+
+TfLiteRegistration* Register_BATCH_TO_SPACE_ND_REF() {
+  static TfLiteRegistration r = {
+      nullptr, nullptr, batch_to_space_nd::Prepare,
+      batch_to_space_nd::Eval<batch_to_space_nd::kReference>};
+  return &r;
+}
+
+TfLiteRegistration* Register_BATCH_TO_SPACE_ND_GENERIC_OPT() {
+  static TfLiteRegistration r = {
+      nullptr, nullptr, batch_to_space_nd::Prepare,
+      batch_to_space_nd::Eval<batch_to_space_nd::kGenericOptimized>};
+  return &r;
+}
+
+TfLiteRegistration* Register_BATCH_TO_SPACE_ND() {
+  return Register_BATCH_TO_SPACE_ND_GENERIC_OPT();
+}
+
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/batch_to_space_nd_test.cc b/tensorflow/contrib/lite/kernels/batch_to_space_nd_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..3ec4efbebcef9d55d0042d93007018c9f6ee3b58
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/batch_to_space_nd_test.cc
@@ -0,0 +1,78 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/interpreter.h"
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/kernels/test_util.h"
+#include "tensorflow/contrib/lite/model.h"
+
+namespace tflite {
+namespace {
+
+using ::testing::ElementsAreArray;
+
+class BatchToSpaceNDOpModel : public SingleOpModel {
+ public:
+  BatchToSpaceNDOpModel(std::initializer_list<int> input_shape,
+                        std::initializer_list<int> block_shape,
+                        std::initializer_list<int> before_crops,
+                        std::initializer_list<int> after_crops) {
+    input_ = AddInput(TensorType_FLOAT32);
+    output_ = AddOutput(TensorType_FLOAT32);
+    SetBuiltinOp(BuiltinOperator_BATCH_TO_SPACE_ND,
+                 BuiltinOptions_BatchToSpaceNDOptions,
+                 CreateBatchToSpaceNDOptions(
+                     builder_, builder_.CreateVector<int>(block_shape),
+                     builder_.CreateVector<int>(before_crops),
+                     builder_.CreateVector<int>(after_crops))
+                     .Union());
+    BuildInterpreter({input_shape});
+  }
+
+  void SetInput(std::initializer_list<float> data) {
+    PopulateTensor<float>(input_, data);
+  }
+
+  std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
+  std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
+
+ private:
+  int input_;
+  int output_;
+};
+
+TEST(BatchToSpaceNDOpTest, SimpleTest) {
+  BatchToSpaceNDOpModel m({4, 2, 2, 1}, {2, 2}, {0, 0}, {0, 0});
+  m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 4, 1}));
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({1, 5, 2, 6, 9, 13, 10, 14, 3, 7,
+                                               4, 8, 11, 15, 12, 16}));
+}
+
+TEST(BatchToSpaceNDOpTest, InvalidShapeTest) {
+  EXPECT_DEATH(BatchToSpaceNDOpModel({3, 2, 2, 1}, {2, 2}, {0, 0}, {0, 0}),
+               "Cannot allocate tensors");
+}
+
+}  // namespace
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  ::tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tensorflow/contrib/lite/kernels/concatenation_test.cc b/tensorflow/contrib/lite/kernels/concatenation_test.cc
index 94e5b2acdcabeedb4652baa1a008b22bf6bc8433..499856a93cbbfbf9aa1a326912e52ce32bbbdf83 100644
--- a/tensorflow/contrib/lite/kernels/concatenation_test.cc
+++ b/tensorflow/contrib/lite/kernels/concatenation_test.cc
@@ -156,7 +156,7 @@ TEST(ConcatenationOpTest, FourInputsQuantized) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/conv_test.cc b/tensorflow/contrib/lite/kernels/conv_test.cc
index 18d7a31d594efb6a05fe7292a0194ea17599a65b..1d0a81c3135625c07a3566f5f9a8e5401f0d4db7 100644
--- a/tensorflow/contrib/lite/kernels/conv_test.cc
+++ b/tensorflow/contrib/lite/kernels/conv_test.cc
@@ -434,7 +434,7 @@ TEST(ConvolutionOpTest, SimpleTestQuantizedWithAnisotropicStrides) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc b/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc
index 39227b2811e2be719a0be77f89793bcf9366d513..1439c8bce14ad127ed68dc54991aed8b8bb39383 100644
--- a/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc
+++ b/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc
@@ -180,7 +180,7 @@ TEST(QuantizedDepthwiseConvolutionOpTest, SimpleTestQuantized) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/div.cc b/tensorflow/contrib/lite/kernels/div.cc
new file mode 100644
index 0000000000000000000000000000000000000000..44bd0dc85d50c98ec6b6888e05064a8f2e2731c0
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/div.cc
@@ -0,0 +1,129 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/builtin_op_data.h"
+#include "tensorflow/contrib/lite/context.h"
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor.h"
+#include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/kernels/op_macros.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace div {
+
+// This file has three implementation of Div.
+enum KernelType {
+  kReference,
+  kGenericOptimized,  // Neon-free
+  kNeonOptimized,
+};
+
+constexpr int kInputTensor1 = 0;
+constexpr int kInputTensor2 = 1;
+constexpr int kOutputTensor = 0;
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
+  TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+
+  TF_LITE_ENSURE_EQ(context, NumDimensions(input1), NumDimensions(input2));
+  for (int i = 0; i < NumDimensions(input1); ++i) {
+    TF_LITE_ENSURE_EQ(context, SizeOfDimension(input1, i),
+                      SizeOfDimension(input2, i));
+  }
+
+  TF_LITE_ENSURE_EQ(context, input1->type, output->type);
+  TF_LITE_ENSURE_EQ(context, input2->type, output->type);
+
+  TfLiteIntArray* output_size = TfLiteIntArrayCopy(input1->dims);
+  return context->ResizeTensor(context, output, output_size);
+}
+
+template <KernelType kernel_type>
+void EvalDivFloat(TfLiteContext* context, TfLiteNode* node,
+                  TfLiteDivParams* params, TfLiteTensor* input1,
+                  TfLiteTensor* input2, TfLiteTensor* output) {
+  float output_activation_min, output_activation_max;
+  CalculateActivationRangeFloat(params->activation, &output_activation_min,
+                                &output_activation_max);
+#define TF_LITE_DIV(type)                                        \
+  type::Div(GetTensorData<float>(input1), GetTensorDims(input1), \
+            GetTensorData<float>(input2), GetTensorDims(input2), \
+            output_activation_min, output_activation_max,        \
+            GetTensorData<float>(output), GetTensorDims(output))
+  if (kernel_type == kReference) {
+    TF_LITE_DIV(reference_ops);
+  } else {
+    TF_LITE_DIV(optimized_ops);
+  }
+#undef TF_LITE_DIV
+}
+
+template <KernelType kernel_type>
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  auto* params = reinterpret_cast<TfLiteDivParams*>(node->builtin_data);
+
+  TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
+  TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+
+  if (output->type == kTfLiteFloat32) {
+    EvalDivFloat<kernel_type>(context, node, params, input1, input2, output);
+  } else {
+    context->ReportError(context, "Inputs and outputs not all float types.");
+    return kTfLiteError;
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace div
+
+TfLiteRegistration* Register_DIV_REF() {
+  static TfLiteRegistration r = {nullptr, nullptr, div::Prepare,
+                                 div::Eval<div::kReference>};
+  return &r;
+}
+
+TfLiteRegistration* Register_DIV_GENERIC_OPT() {
+  static TfLiteRegistration r = {nullptr, nullptr, div::Prepare,
+                                 div::Eval<div::kGenericOptimized>};
+  return &r;
+}
+
+TfLiteRegistration* Register_DIV_NEON_OPT() {
+  static TfLiteRegistration r = {nullptr, nullptr, div::Prepare,
+                                 div::Eval<div::kNeonOptimized>};
+  return &r;
+}
+
+TfLiteRegistration* Register_DIV() {
+#ifdef USE_NEON
+  return Register_DIV_NEON_OPT();
+#else
+  return Register_DIV_GENERIC_OPT();
+#endif
+}
+
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/embedding_lookup_sparse_test.cc b/tensorflow/contrib/lite/kernels/embedding_lookup_sparse_test.cc
index 69d9c5cc7dec13a65f1c5050f2f1c56812ad5aa1..dcdc5fffad9ceac1a9d23a4e91637a9ff92a8dda 100644
--- a/tensorflow/contrib/lite/kernels/embedding_lookup_sparse_test.cc
+++ b/tensorflow/contrib/lite/kernels/embedding_lookup_sparse_test.cc
@@ -158,9 +158,7 @@ TEST(EmbeddingLookupOpTest, Indices3DTest) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-#ifdef OS_LINUX
-  tflite::LogToStderr();
-#endif
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/embedding_lookup_test.cc b/tensorflow/contrib/lite/kernels/embedding_lookup_test.cc
index 8c030b06772ac0c6af34a45897f03ebc4637d4de..9b501878f196216a61568bfa36e6615f4dd07478 100644
--- a/tensorflow/contrib/lite/kernels/embedding_lookup_test.cc
+++ b/tensorflow/contrib/lite/kernels/embedding_lookup_test.cc
@@ -88,7 +88,7 @@ TEST(EmbeddingLookupOpTest, SimpleTest) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/fully_connected_test.cc b/tensorflow/contrib/lite/kernels/fully_connected_test.cc
index 112e3f1ba01a428023eea5ee8410fb76c1d67de6..a0f766c4f4580d7679275c0b63aa200410fcb5ad 100644
--- a/tensorflow/contrib/lite/kernels/fully_connected_test.cc
+++ b/tensorflow/contrib/lite/kernels/fully_connected_test.cc
@@ -370,8 +370,7 @@ TEST(FullyConnectedOpTest, BlackBoxTest) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
-  tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/gather.cc b/tensorflow/contrib/lite/kernels/gather.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f8df797daf7338e33b16508c21fc61cd9836db1e
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/gather.cc
@@ -0,0 +1,130 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <string.h>
+#include "tensorflow/contrib/lite/builtin_op_data.h"
+#include "tensorflow/contrib/lite/context.h"
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor.h"
+#include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/kernels/op_macros.h"
+#include "tensorflow/contrib/lite/string_util.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace gather {
+constexpr int kInputTensor = 0;
+constexpr int kInputPositions = 1;
+constexpr int kOutputTensor = 0;
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  const auto* params =
+      reinterpret_cast<const TfLiteGatherParams*>(node->builtin_data);
+  TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TfLiteTensor* positions = GetInput(context, node, kInputPositions);
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  // Only INT32 positions are supported.
+  TF_LITE_ENSURE_EQ(context, positions->type, kTfLiteInt32);
+  // Check that input and output types match.
+  TF_LITE_ENSURE_EQ(context, input->type, output->type);
+  // TODO(mgubin): only 1D positions are currently supported.
+  TF_LITE_ENSURE_EQ(context, NumDimensions(positions), 1);
+  // TODO(mgubin): Only default axis == 0 is supported.
+  // Check conditions for different types.
+  switch (input->type) {
+    case kTfLiteFloat32:
+    case kTfLiteUInt8:
+    case kTfLiteInt32: {
+      // Fully supported by reference_ops::Gather.
+    } break;
+
+    case kTfLiteString: {
+      // Only 1D input is supported.
+      TF_LITE_ENSURE_EQ(context, NumDimensions(input), 1);
+    } break;
+    default:
+      context->ReportError(context,
+                           "Only float32 and string types are supported");
+      return kTfLiteError;
+  }
+  const int num_dimensions =
+      NumDimensions(input) + NumDimensions(positions) - 1;
+  TF_LITE_ENSURE(context, params->axis < num_dimensions);
+  TfLiteIntArray* output_shape = TfLiteIntArrayCreate(num_dimensions);
+  int output_index = 0;
+  for (int i = 0; i < params->axis; ++i) {
+    output_shape->data[output_index++] = input->dims->data[i];
+  }
+  for (int i = 0; i < positions->dims->size; ++i) {
+    output_shape->data[output_index++] = positions->dims->data[i];
+  }
+  for (int i = params->axis + 1; i < input->dims->size; ++i) {
+    output_shape->data[output_index++] = input->dims->data[i];
+  }
+  return context->ResizeTensor(context, output, output_shape);
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TfLiteTensor* positions = GetInput(context, node, kInputPositions);
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  const int input_rank = NumDimensions(input);
+#define TF_LITE_GATHER(data_type, index_type)                            \
+  optimized_ops::Gather(                                                 \
+      GetTensorData<data_type>(input), GetTensorDims(input), input_rank, \
+      GetTensorData<index_type>(positions), GetTensorDims(positions),    \
+      GetTensorData<data_type>(output), GetTensorDims(output));
+  switch (input->type) {
+    case kTfLiteFloat32:
+      TF_LITE_GATHER(float, int32_t);
+      break;
+    case kTfLiteUInt8:
+      TF_LITE_GATHER(uint8_t, int32_t);
+      break;
+    case kTfLiteInt32:
+      TF_LITE_GATHER(int32_t, int32_t);
+      break;
+    case kTfLiteString: {
+      DynamicBuffer buffer;
+      const int32* indexes = positions->data.i32;
+      const int num_strings = GetStringCount(input);
+      for (int i = 0; i < positions->dims->data[0]; ++i) {
+        const int pos = indexes[i];
+        TF_LITE_ENSURE(context, pos < num_strings);
+        const auto string_ref = GetString(input, pos);
+        buffer.AddString(string_ref.str, string_ref.len);
+      }
+      buffer.WriteToTensor(output);
+    } break;
+    default:
+      return kTfLiteError;
+  }
+#undef TF_LITE_GATHER
+  return kTfLiteOk;
+}
+}  // namespace gather
+
+TfLiteRegistration* Register_GATHER() {
+  static TfLiteRegistration r = {nullptr, nullptr, gather::Prepare,
+                                 gather::Eval};
+  return &r;
+}
+
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/gather_test.cc b/tensorflow/contrib/lite/kernels/gather_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..6343d3b4ef20ae3e030396ec1b6adbcf83a3e45f
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/gather_test.cc
@@ -0,0 +1,121 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/builtin_op_data.h"
+#include "tensorflow/contrib/lite/interpreter.h"
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/kernels/test_util.h"
+#include "tensorflow/contrib/lite/model.h"
+
+namespace tflite {
+namespace {
+
+using ::testing::ElementsAreArray;
+
+class GatherOpModel : public SingleOpModel {
+ public:
+  GatherOpModel(std::initializer_list<int> input_shape, TensorType input_type,
+                std::initializer_list<int> positions_shape) {
+    input_ = AddInput(input_type);
+    positions_ = AddInput(TensorType_INT32);
+    output_ = AddOutput(input_type);
+    SetBuiltinOp(BuiltinOperator_GATHER, BuiltinOptions_GatherOptions,
+                 CreateGatherOptions(builder_, 0).Union());
+    BuildInterpreter({input_shape, positions_shape});
+  }
+
+  void SetInputFloat(std::initializer_list<float> data) {
+    PopulateTensor<float>(input_, data);
+  }
+
+  void SetInputUint8(std::initializer_list<uint8_t> data) {
+    PopulateTensor<uint8_t>(input_, data);
+  }
+
+  void SetInput(std::initializer_list<string> data) {
+    PopulateStringTensor(input_, data);
+  }
+
+  void SetPositions(std::initializer_list<int32> data) {
+    PopulateTensor<int32>(positions_, data);
+  }
+
+  std::vector<float> GetOutputFloat() { return ExtractVector<float>(output_); }
+  std::vector<uint8_t> GetOutputUint8() {
+    return ExtractVector<uint8_t>(output_);
+  }
+  std::vector<string> GetOutputString() {
+    return ExtractVector<string>(output_);
+  }
+  std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
+
+ protected:
+  int input_;
+  int positions_;
+  int output_;
+};
+
+TEST(GatherOpTest, Shuffle) {
+  GatherOpModel m({2, 2}, TensorType_FLOAT32, {2});
+  m.SetInputFloat({-2.0, 0.2, 0.7, 0.8});
+  m.SetPositions({1, 0});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputFloat(),
+              ElementsAreArray(ArrayFloatNear({0.7, 0.8, -2, 0.2})));
+}
+
+TEST(FloatGatherOpTest, Duplicate) {
+  GatherOpModel m({1, 2, 2}, TensorType_FLOAT32, {2});
+  m.SetInputFloat({-2.0, 0.2, 0.7, 0.8});
+  m.SetPositions({0, 0});
+  m.Invoke();
+  EXPECT_THAT(
+      m.GetOutputFloat(),
+      ElementsAreArray(ArrayFloatNear({-2, 0.2, 0.7, 0.8, -2, 0.2, 0.7, 0.8})));
+}
+
+TEST(FloatGatherOpTest, Slice) {
+  GatherOpModel m({4, 1}, TensorType_FLOAT32, {2});
+  m.SetInputFloat({-2.0, 0.2, 0.7, 0.8});
+  m.SetPositions({1, 3});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputFloat(), ElementsAreArray(ArrayFloatNear({0.2, 0.8})));
+}
+
+TEST(Uint8tGatherOpTest, Shuffle) {
+  GatherOpModel m({2, 2}, TensorType_UINT8, {2});
+  m.SetInputUint8({133, 134, 14, 15});
+  m.SetPositions({1, 0});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutputUint8(), ElementsAreArray({14, 15, 133, 134}));
+}
+
+TEST(GatherOpTest, SimpleString) {
+  GatherOpModel m({3}, TensorType_STRING, {2});
+  m.SetInput({"A", "B", "C"});
+  m.SetPositions({0, 2});
+  m.Invoke();
+  ASSERT_THAT(m.GetOutputShape(), ElementsAreArray({2}));
+  EXPECT_THAT(m.GetOutputString(), ElementsAreArray({"A", "C"}));
+}
+}  // namespace
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  ::tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tensorflow/contrib/lite/kernels/hashtable_lookup_test.cc b/tensorflow/contrib/lite/kernels/hashtable_lookup_test.cc
index 916a23225e2ad3c5645a7809169677a7a8880535..cb6038f9009a3865661e7b4f075c3033166d0f91 100644
--- a/tensorflow/contrib/lite/kernels/hashtable_lookup_test.cc
+++ b/tensorflow/contrib/lite/kernels/hashtable_lookup_test.cc
@@ -170,7 +170,7 @@ TEST(HashtableLookupOpTest, TestString) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD
index 288534099b9e090ce0c223a401b4152ca6ffb61f..a3ecb2ebf6a889729954d1e447997c510e8ff6d4 100644
--- a/tensorflow/contrib/lite/kernels/internal/BUILD
+++ b/tensorflow/contrib/lite/kernels/internal/BUILD
@@ -124,6 +124,13 @@ config_setting(
     },
 )
 
+config_setting(
+    name = "freebsd",
+    values = {
+        "cpu": "freebsd",
+    },
+)
+
 cc_library(
     name = "optimized_base",
     srcs = [],
@@ -147,6 +154,7 @@ cc_library(
         ":x86": tflite_deps_intel,
         ":x86_64": tflite_deps_intel,
         ":darwin": tflite_deps_intel,
+        ":freebsd": tflite_deps_intel,
         "//conditions:default": [],
     }),
 )
@@ -224,6 +232,7 @@ cc_library(
         ":x86": tflite_deps_intel,
         ":x86_64": tflite_deps_intel,
         ":darwin": tflite_deps_intel,
+        ":freebsd": tflite_deps_intel,
         "//conditions:default": [],
     }),
 )
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
index 974611f52ac74cec275f978c5af5bd561688db78..da34c8aef94b1c69e661bd33fcb518e73034c4bd 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
@@ -311,6 +311,9 @@ struct FloatDepthwiseConvKernel<true, 0, 8> {
   }
 };
 
+// Note this implementation is very slow for input_depths < 8
+// (e.g. comparable to reference implementation) see, specializations for
+// input_depth=3 below.
 template <>
 struct FloatDepthwiseConvKernel<true, 0, 2> {
   static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
@@ -417,6 +420,74 @@ struct FloatDepthwiseConvKernel<true, 0, 2> {
   }
 };
 
+template <>
+struct FloatDepthwiseConvKernel<true, 3, 2> {
+  static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+                  const float* input_ptr, int input_ptr_increment,
+                  const float* filter_ptr, float* acc_buffer_ptr) {
+    // Load the filters
+    float32x2_t filter[3];
+    for (int i = 0; i < 3; i++) {
+      filter[i] = vld1_f32(filter_ptr + 2 * i);
+    }
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++) {
+      const float32x2_t input01 = vld1_f32(input_ptr);
+      const float32x2_t input2 = vld1_dup_f32(input_ptr + 2);
+      // Load the accumulators from acc_buffer
+      float32x2_t acc[3];
+      for (int i = 0; i < 3; i++) {
+        acc[i] = vld1_f32(acc_buffer_ptr + 2 * i);
+      }
+      // Multiply-accumulate for each input channel there 2 outputs
+      acc[0] = vmla_lane_f32(acc[0], filter[0], input01, 0);
+      acc[1] = vmla_lane_f32(acc[1], filter[1], input01, 1);
+      acc[2] = vmla_lane_f32(acc[2], filter[2], input2, 0);
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 3; i++) {
+        vst1_f32(acc_buffer_ptr + 2 * i, acc[i]);
+      }
+      acc_buffer_ptr += 6;
+      input_ptr += input_ptr_increment;
+    }
+  }
+};
+
+template <>
+struct FloatDepthwiseConvKernel<true, 3, 4> {
+  static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+                  const float* input_ptr, int input_ptr_increment,
+                  const float* filter_ptr, float* acc_buffer_ptr) {
+    // Load the filters
+    float32x4_t filter[3];
+    for (int i = 0; i < 3; i++) {
+      filter[i] = vld1q_f32(filter_ptr + 4 * i);
+    }
+    // Handle one output pixel at a time.
+    for (int outp = 0; outp < num_output_pixels; outp++) {
+      // NOTE: we only want 3 values, so we read it as two ops where
+      // the second op just duplicates the lane
+      const float32x2_t input01 = vld1_f32(input_ptr);
+      const float32x2_t input2 = vld1_dup_f32(input_ptr + 2);
+      // Load the accumulators from acc_buffer
+      float32x4_t acc[3];
+      for (int i = 0; i < 3; i++) {
+        acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+      }
+      // Multiply-accumulate all outputs.
+      acc[0] = vmlaq_lane_f32(acc[0], filter[0], input01, 0);
+      acc[1] = vmlaq_lane_f32(acc[1], filter[1], input01, 1);
+      acc[2] = vmlaq_lane_f32(acc[2], filter[2], input2, 0);
+      // Store the accumulators back to acc_buffer
+      for (int i = 0; i < 3; i++) {
+        vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+      }
+      acc_buffer_ptr += 12;
+      input_ptr += input_ptr_increment;
+    }
+  }
+};
+
 template <>
 struct FloatDepthwiseConvKernel<true, 1, 8> {
   static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
@@ -857,6 +928,8 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
   TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 8)
   TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 32)
   TFMINI_USE_DEPTHWISECONV_KERNEL(true, 2, 1)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 3, 2)
+  TFMINI_USE_DEPTHWISECONV_KERNEL(true, 3, 4)
   TFMINI_USE_DEPTHWISECONV_KERNEL(true, 4, 1)
 
   // Finally, the kernels allowing a variable input depth,
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
index 1cd6442c83db77affa17c3a494475c61a9717105..ded5ae8ff50cfc5337a5ea5f6e4880b701246aa6 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
@@ -1868,6 +1868,61 @@ inline void BroadcastMul(const uint8* input1_data, const Dims<4>& input1_dims,
                output_data, output_dims);
 }
 
+// TODO(aselle): This is not actually optimized yet.
+inline void Div(const float* input1_data, const Dims<4>& input1_dims,
+                const float* input2_data, const Dims<4>& input2_dims,
+                float output_activation_min, float output_activation_max,
+                float* output_data, const Dims<4>& output_dims) {
+  const int batches =
+      MatchingArraySize(input1_dims, 3, input2_dims, 3, output_dims, 3);
+  const int height =
+      MatchingArraySize(input1_dims, 2, input2_dims, 2, output_dims, 2);
+  const int width =
+      MatchingArraySize(input1_dims, 1, input2_dims, 1, output_dims, 1);
+  const int depth =
+      MatchingArraySize(input1_dims, 0, input2_dims, 0, output_dims, 0);
+  for (int b = 0; b < batches; ++b) {
+    for (int y = 0; y < height; ++y) {
+      for (int x = 0; x < width; ++x) {
+        for (int c = 0; c < depth; ++c) {
+          output_data[Offset(output_dims, c, x, y, b)] =
+              ActivationFunctionWithMinMax(
+                  input1_data[Offset(input1_dims, c, x, y, b)] /
+                      input2_data[Offset(input2_dims, c, x, y, b)],
+                  output_activation_min, output_activation_max);
+        }
+      }
+    }
+  }
+}
+
+// TODO(aselle): This is not actually optimized yet.
+inline void Sub(const float* input1_data, const Dims<4>& input1_dims,
+                const float* input2_data, const Dims<4>& input2_dims,
+                float output_activation_min, float output_activation_max,
+                float* output_data, const Dims<4>& output_dims) {
+  const int batches =
+      MatchingArraySize(input1_dims, 3, input2_dims, 3, output_dims, 3);
+  const int height =
+      MatchingArraySize(input1_dims, 2, input2_dims, 2, output_dims, 2);
+  const int width =
+      MatchingArraySize(input1_dims, 1, input2_dims, 1, output_dims, 1);
+  const int depth =
+      MatchingArraySize(input1_dims, 0, input2_dims, 0, output_dims, 0);
+  for (int b = 0; b < batches; ++b) {
+    for (int y = 0; y < height; ++y) {
+      for (int x = 0; x < width; ++x) {
+        for (int c = 0; c < depth; ++c) {
+          output_data[Offset(output_dims, c, x, y, b)] =
+              ActivationFunctionWithMinMax(
+                  input1_data[Offset(input1_dims, c, x, y, b)] -
+                      input2_data[Offset(input2_dims, c, x, y, b)],
+                  output_activation_min, output_activation_max);
+        }
+      }
+    }
+  }
+}
 template <FusedActivationFunctionType Ac, typename Scalar>
 void Concatenation(int concat_dim, const Scalar* const* input_data,
                    const Dims<4>* const* input_dims, int inputs_count,
@@ -3381,10 +3436,11 @@ inline void SpaceToBatchND(const T* input_data, const Dims<4>& input_dims,
     for (int out_h = 0; out_h < output_height; ++out_h) {
       for (int out_w = 0; out_w < output_width; ++out_w) {
         T* out = output_data + Offset(output_dims, 0, out_w, out_h, out_b);
-        if (out_h * block_shape_height < padding_top ||
-            out_h * block_shape_height >= padding_top + input_height ||
-            out_w * block_shape_width < padding_left ||
-            out_w * block_shape_width >= padding_left + input_width) {
+        if (out_h * block_shape_height + shift_h < padding_top ||
+            out_h * block_shape_height + shift_h >=
+                padding_top + input_height ||
+            out_w * block_shape_width + shift_w < padding_left ||
+            out_w * block_shape_width + shift_w >= padding_left + input_width) {
           memset(out, 0, depth * sizeof(T));
         } else {
           const T* in =
@@ -3704,6 +3760,43 @@ void TensorFlowMaximum(const T* input1_data, const Dims<4>& input1_dims,
   auto max_value = input2_data[0];
   output_map.array() = input1_map.array().max(max_value);
 }
+
+template <typename T1, typename T2, typename T3>
+void ArgMax(const T3* axis, const T1* input_data, const Dims<4>& input_dims,
+            T2* output_data, const Dims<4>& output_dims) {
+  gemmlowp::ScopedProfilingLabel label("ArgMax");
+
+  // The current ArgMax implemention can only determine the index of the maximum
+  // value in the last dimension. So the axis argument is ignored.
+  TFLITE_DCHECK_EQ(axis[0], 3);
+
+  // For ArgMax, the number of output dimensions = (number of input dimensions -
+  // 1). For the sake of simplicity, the output dimensions are equal to the
+  // input dimensions here. We enforce the constraint that the last dimension
+  // must always be 1.
+  TFLITE_DCHECK_EQ(ArraySize(output_dims, 0), 1);
+  const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
+  const int height = MatchingArraySize(input_dims, 2, output_dims, 2);
+  const int width = MatchingArraySize(input_dims, 1, output_dims, 1);
+  const int depth = ArraySize(input_dims, 0);
+  for (int b = 0; b < batches; ++b) {
+    for (int y = 0; y < height; ++y) {
+      for (int x = 0; x < width; ++x) {
+        auto max_value = input_data[Offset(input_dims, 0, x, y, b)];
+        int max_index = 0;
+        for (int d = 1; d < depth; ++d) {
+          const auto& curr_value = input_data[Offset(input_dims, d, x, y, b)];
+          if (curr_value > max_value) {
+            max_value = curr_value;
+            max_index = d;
+          }
+        }
+        output_data[Offset(output_dims, 0, x, y, b)] = max_index;
+      }
+    }
+  }
+}
+
 }  // namespace optimized_ops
 }  // namespace tflite
 
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.h b/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.h
index c2ab78000b81485f037c507933cd024e70f39850..7f90d731b8454a020ab273e6b5591ed90aab14c7 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.h
@@ -22,7 +22,7 @@ limitations under the License.
 namespace tflite {
 namespace tensor_utils {
 
-// Limit a float input f betweeen +abs_limit and -abs_limit.
+// Limit a float input f between +abs_limit and -abs_limit.
 float PortableClip(float f, float abs_limit);
 
 // Multiply a matrix by a batch vector, and store results in a batch-size
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
index f5c43f1fd98f130507f6b3f216c4a83593d26a13..7f1f3143e8e2fa1e4a7c2a1902920e9e86ad7f68 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
@@ -1149,6 +1149,60 @@ inline void BroadcastMul(const uint8* input1_data, const Dims<4>& input1_dims,
                output_data, output_dims);
 }
 
+inline void Div(const float* input1_data, const Dims<4>& input1_dims,
+                const float* input2_data, const Dims<4>& input2_dims,
+                float output_activation_min, float output_activation_max,
+                float* output_data, const Dims<4>& output_dims) {
+  const int batches =
+      MatchingArraySize(input1_dims, 3, input2_dims, 3, output_dims, 3);
+  const int height =
+      MatchingArraySize(input1_dims, 2, input2_dims, 2, output_dims, 2);
+  const int width =
+      MatchingArraySize(input1_dims, 1, input2_dims, 1, output_dims, 1);
+  const int depth =
+      MatchingArraySize(input1_dims, 0, input2_dims, 0, output_dims, 0);
+  for (int b = 0; b < batches; ++b) {
+    for (int y = 0; y < height; ++y) {
+      for (int x = 0; x < width; ++x) {
+        for (int c = 0; c < depth; ++c) {
+          output_data[Offset(output_dims, c, x, y, b)] =
+              ActivationFunctionWithMinMax(
+                  input1_data[Offset(input1_dims, c, x, y, b)] /
+                      input2_data[Offset(input2_dims, c, x, y, b)],
+                  output_activation_min, output_activation_max);
+        }
+      }
+    }
+  }
+}
+
+inline void Sub(const float* input1_data, const Dims<4>& input1_dims,
+                const float* input2_data, const Dims<4>& input2_dims,
+                float output_activation_min, float output_activation_max,
+                float* output_data, const Dims<4>& output_dims) {
+  const int batches =
+      MatchingArraySize(input1_dims, 3, input2_dims, 3, output_dims, 3);
+  const int height =
+      MatchingArraySize(input1_dims, 2, input2_dims, 2, output_dims, 2);
+  const int width =
+      MatchingArraySize(input1_dims, 1, input2_dims, 1, output_dims, 1);
+  const int depth =
+      MatchingArraySize(input1_dims, 0, input2_dims, 0, output_dims, 0);
+  for (int b = 0; b < batches; ++b) {
+    for (int y = 0; y < height; ++y) {
+      for (int x = 0; x < width; ++x) {
+        for (int c = 0; c < depth; ++c) {
+          output_data[Offset(output_dims, c, x, y, b)] =
+              ActivationFunctionWithMinMax(
+                  input1_data[Offset(input1_dims, c, x, y, b)] -
+                      input2_data[Offset(input2_dims, c, x, y, b)],
+                  output_activation_min, output_activation_max);
+        }
+      }
+    }
+  }
+}
+
 template <FusedActivationFunctionType Ac, typename Scalar>
 void Concatenation(int concat_dim, const Scalar* const* input_data,
                    const Dims<4>* const* input_dims, int inputs_count,
@@ -2183,10 +2237,11 @@ inline void SpaceToBatchND(const T* input_data, const Dims<4>& input_dims,
     for (int out_h = 0; out_h < output_height; ++out_h) {
       for (int out_w = 0; out_w < output_width; ++out_w) {
         T* out = output_data + Offset(output_dims, 0, out_w, out_h, out_b);
-        if (out_h * block_shape_height < padding_top ||
-            out_h * block_shape_height >= padding_top + input_height ||
-            out_w * block_shape_width < padding_left ||
-            out_w * block_shape_width >= padding_left + input_width) {
+        if (out_h * block_shape_height + shift_h < padding_top ||
+            out_h * block_shape_height + shift_h >=
+                padding_top + input_height ||
+            out_w * block_shape_width + shift_w < padding_left ||
+            out_w * block_shape_width + shift_w >= padding_left + input_width) {
           memset(out, 0, depth * sizeof(T));
         } else {
           const T* in =
@@ -2335,6 +2390,64 @@ inline void Slice(const T* input_data, const Dims<4>& input_dims,
   }
 }
 
+template <typename T>
+inline void Mean(T* input_data, const int* input_dims, const int input_num_dims,
+                 T* output_data, const int* output_dims,
+                 const int output_num_dims, const int* axis,
+                 const int num_axis_dimensions, bool keep_dims, int* temp_index,
+                 int* resolved_axis) {
+  // resets output data.
+  size_t num_outputs = 1;
+  for (int idx = 0; idx < output_num_dims; ++idx) {
+    num_outputs *= static_cast<size_t>(output_dims[idx]);
+  }
+  for (size_t idx = 0; idx < num_outputs; ++idx) {
+    output_data[idx] = 0;
+  }
+  // resets temp index.
+  for (int idx = 0; idx < input_num_dims; ++idx) {
+    temp_index[idx] = 0;
+  }
+  // resolves axis.
+  int num_resolved_axis = 0;
+  for (int idx = 0; idx < num_axis_dimensions; ++idx) {
+    int current = axis[idx];
+    TFLITE_DCHECK(current < input_num_dims && current + input_num_dims >= 0);
+    if (current < 0) {
+      current += input_num_dims;
+    }
+    bool is_dup = false;
+    for (int j = 0; j < num_resolved_axis; ++j) {
+      if (resolved_axis[j] == current) {
+        is_dup = true;
+        break;
+      }
+    }
+    if (!is_dup) {
+      resolved_axis[num_resolved_axis++] = current;
+    }
+  }
+  // iterates through input_data.
+  for (bool has_next = true; has_next;
+       has_next = NextIndex(input_num_dims, input_dims, temp_index)) {
+    size_t input_offset =
+        ReducedOutputOffset(input_num_dims, input_dims, temp_index, 0, nullptr);
+    size_t output_offset =
+        ReducedOutputOffset(input_num_dims, input_dims, temp_index,
+                            num_resolved_axis, resolved_axis);
+    output_data[output_offset] += input_data[input_offset];
+  }
+  // takes average by num of elements added to get mean.
+  size_t num_elements_in_axis = 1;
+  for (int idx = 0; idx < num_resolved_axis; ++idx) {
+    num_elements_in_axis *= static_cast<size_t>(input_dims[resolved_axis[idx]]);
+  }
+  for (size_t idx = 0; idx < num_outputs; ++idx) {
+    output_data[idx] = static_cast<T>(static_cast<float>(output_data[idx]) /
+                                      num_elements_in_axis);
+  }
+}
+
 template <typename T>
 inline void Mean(const T* input_data, const Dims<4>& input_dims,
                  const std::vector<int>& reduction_indices, T* output_data,
@@ -2449,6 +2562,69 @@ void TensorFlowMaximum(const T* input1_data, const Dims<4>& input1_dims,
   }
 }
 
+template <typename T1, typename T2, typename T3>
+void ArgMax(const T3* axis, const T1* input_data, const Dims<4>& input_dims,
+            T2* output_data, const Dims<4>& output_dims) {
+  // The current ArgMax implemention can only determine the index of the maximum
+  // value in the last dimension. So the axis argument is ignored.
+  TFLITE_DCHECK_EQ(axis[0], 3);
+
+  // For ArgMax, the number of output dimensions = (number of input dimensions -
+  // 1). For the sake of simplicity, the output dimensions are equal to the
+  // input dimensions here. We enforce the constraint that the last dimension
+  // must always be 1.
+  TFLITE_DCHECK_EQ(ArraySize(output_dims, 0), 1);
+  const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
+  const int height = MatchingArraySize(input_dims, 2, output_dims, 2);
+  const int width = MatchingArraySize(input_dims, 1, output_dims, 1);
+  const int depth = ArraySize(input_dims, 0);
+  for (int b = 0; b < batches; ++b) {
+    for (int y = 0; y < height; ++y) {
+      for (int x = 0; x < width; ++x) {
+        auto max_value = input_data[Offset(input_dims, 0, x, y, b)];
+        int max_index = 0;
+        for (int d = 1; d < depth; ++d) {
+          const auto& curr_value = input_data[Offset(input_dims, d, x, y, b)];
+          if (curr_value > max_value) {
+            max_value = curr_value;
+            max_index = d;
+          }
+        }
+        output_data[Offset(output_dims, 0, x, y, b)] = max_index;
+      }
+    }
+  }
+}
+
+template <typename T>
+void Transpose(const T* input, const Dims<4>& input_dims, T* output,
+               const Dims<4>& output_dims, int* permuted_axes) {
+  int out_sizes[4];
+  // Compute the inverse permutation array so we can do an output centered
+  // transpose. Also, check to make sure output_dims is matching input_dims.
+  for (int k = 0; k < 4; k++) {
+    out_sizes[k] =
+        MatchingArraySize(input_dims, permuted_axes[k], output_dims, k);
+  }
+
+  // Naive transpose loop (iterate on output index and compute input index).
+  int o[4];  // loop index (on output).
+  int i[4];
+  for (o[3] = 0; o[3] < out_sizes[3]; o[3]++) {
+    i[permuted_axes[3]] = o[3];
+    for (o[2] = 0; o[2] < out_sizes[2]; o[2]++) {
+      i[permuted_axes[2]] = o[2];
+      for (o[1] = 0; o[1] < out_sizes[1]; o[1]++) {
+        i[permuted_axes[1]] = o[1];
+        for (o[0] = 0; o[0] < out_sizes[0]; o[0]++) {
+          i[permuted_axes[0]] = o[0];
+          output[Offset(output_dims, o)] = input[Offset(input_dims, i)];
+        }
+      }
+    }
+  }
+}
+
 }  // namespace reference_ops
 }  // namespace tflite
 
diff --git a/tensorflow/contrib/lite/kernels/internal/tensor.h b/tensorflow/contrib/lite/kernels/internal/tensor.h
index ee4111e0416560d94d513c528971bdf3bf819662..1961e1a2d5ecd4fd20c6f442b79dc88ed28062fe 100644
--- a/tensorflow/contrib/lite/kernels/internal/tensor.h
+++ b/tensorflow/contrib/lite/kernels/internal/tensor.h
@@ -41,8 +41,7 @@ inline int32_t* GetTensorData(TfLiteTensor* tensor) {
 
 template <>
 inline int64_t* GetTensorData(TfLiteTensor* tensor) {
-  return tensor != nullptr ? reinterpret_cast<int64_t*>(tensor->data.raw)
-                           : nullptr;
+  return tensor != nullptr ? tensor->data.i64 : nullptr;
 }
 
 inline int RemapDim(int max_dimensions, int d) {
diff --git a/tensorflow/contrib/lite/kernels/internal/tensor_utils.h b/tensorflow/contrib/lite/kernels/internal/tensor_utils.h
index 0e69ef5982f01e364d865684652d1dfecab6fee3..e7e2994397650004c7ba442fa1803290e6b12302 100644
--- a/tensorflow/contrib/lite/kernels/internal/tensor_utils.h
+++ b/tensorflow/contrib/lite/kernels/internal/tensor_utils.h
@@ -20,7 +20,7 @@ limitations under the License.
 namespace tflite {
 namespace tensor_utils {
 
-// Limit a float input f betweeen +abs_limit and -abs_limit.
+// Limit a float input f between +abs_limit and -abs_limit.
 float Clip(float f, float abs_limit);
 
 // Multiply a matrix by a batch vector, and store results in a batch-size
diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h
index 07f1cb40045fff3ae47ed4efa6ec43b0cb88a0a7..5989ac8fcdec101c14dd7b04d89fe8c7bfce0a10 100644
--- a/tensorflow/contrib/lite/kernels/internal/types.h
+++ b/tensorflow/contrib/lite/kernels/internal/types.h
@@ -27,6 +27,58 @@ struct Dims {
   int strides[N];
 };
 
+// Gets next index to iterate through a multidimensional array.
+inline bool NextIndex(const int num_dims, const int* dims, int* current) {
+  TFLITE_DCHECK_GT(num_dims, 0);
+  TFLITE_DCHECK(dims != nullptr);
+  TFLITE_DCHECK(current != nullptr);
+  int carry = 1;
+  for (int idx = num_dims - 1; idx >= 0; --idx) {
+    int current_val = current[idx] + carry;
+    TFLITE_DCHECK_GE(dims[idx], current_val);
+    if (dims[idx] == current_val) {
+      current[idx] = 0;
+    } else {
+      current[idx] = current_val;
+      carry = 0;
+      break;
+    }
+  }
+  return (carry == 0);
+}
+
+// Gets offset of index if reducing on axis. When reducing, the flattened offset
+// will not change, if the input index changes on the given axis. For example,
+// if you have a 3D tensor and you are reducing to 2D by eliminating axis 0,
+// then index (0, 1, 2) and index (1, 1, 2) will map to the same flattened
+// offset.
+// TODO(kanlig): uses Dims to represent dimensions.
+inline size_t ReducedOutputOffset(const int num_dims, const int* dims,
+                                  const int* index, const int num_axis,
+                                  const int* axis) {
+  TFLITE_DCHECK_GT(num_dims, 0);
+  TFLITE_DCHECK(dims != nullptr);
+  TFLITE_DCHECK(index != nullptr);
+  size_t offset = 0;
+  for (int idx = 0; idx < num_dims; ++idx) {
+    // if we need to skip this axis
+    bool is_axis = false;
+    if (axis != nullptr) {
+      for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) {
+        if (idx == axis[axis_idx]) {
+          is_axis = true;
+          break;
+        }
+      }
+    }
+    if (!is_axis) {
+      offset = offset * static_cast<size_t>(dims[idx]) +
+               static_cast<size_t>(index[idx]);
+    }
+  }
+  return offset;
+}
+
 inline int Offset(const Dims<4>& dims, int i0, int i1, int i2, int i3) {
   TFLITE_DCHECK(i0 >= 0 && i0 < dims.sizes[0]);
   TFLITE_DCHECK(i1 >= 0 && i1 < dims.sizes[1]);
@@ -36,6 +88,10 @@ inline int Offset(const Dims<4>& dims, int i0, int i1, int i2, int i3) {
          i3 * dims.strides[3];
 }
 
+inline int Offset(const Dims<4>& dims, int* index) {
+  return Offset(dims, index[0], index[1], index[2], index[3]);
+}
+
 // Get array size, DCHECKing that the dim index is in range.
 template <int N>
 int ArraySize(const Dims<N>& array, int index) {
diff --git a/tensorflow/contrib/lite/kernels/l2norm.cc b/tensorflow/contrib/lite/kernels/l2norm.cc
index f43aa372b6398a38e57dd38f3d7c7db2bd3aefc1..ee8bfe56d95e9f383ef49b40b8f58b63d61da3e1 100644
--- a/tensorflow/contrib/lite/kernels/l2norm.cc
+++ b/tensorflow/contrib/lite/kernels/l2norm.cc
@@ -43,8 +43,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TfLiteTensor* input = GetInput(context, node, kInputTensor);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
 
-  // TODO(ahentz): Our current implementations rely on the inputs being 4D.
-  TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
+  TF_LITE_ENSURE(context, NumDimensions(input) <= 4);
 
   // TODO(ahentz): Our current implementations only support float32.
   TF_LITE_ENSURE_EQ(context, output->type, kTfLiteFloat32);
@@ -54,12 +53,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   // activations.
   TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
 
-  TfLiteIntArray* output_size = TfLiteIntArrayCreate(4);
-  output_size->data[0] = input->dims->data[0];
-  output_size->data[1] = input->dims->data[1];
-  output_size->data[2] = input->dims->data[2];
-  output_size->data[3] = input->dims->data[3];
-
+  TfLiteIntArray* output_size = TfLiteIntArrayCopy(input->dims);
   return context->ResizeTensor(context, output, output_size);
 }
 
diff --git a/tensorflow/contrib/lite/kernels/l2norm_test.cc b/tensorflow/contrib/lite/kernels/l2norm_test.cc
index b1db89b8bd3474ac868d7215e4a0de12088c48ef..30e103f3303484c339ef98e6a68e0438291c102f 100644
--- a/tensorflow/contrib/lite/kernels/l2norm_test.cc
+++ b/tensorflow/contrib/lite/kernels/l2norm_test.cc
@@ -57,7 +57,7 @@ TEST(L2NormOpTest, SimpleTest) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/local_response_norm_test.cc b/tensorflow/contrib/lite/kernels/local_response_norm_test.cc
index 63a8b0a3d0186def7da2c9f31481721f1a55281c..d75ce258a04c820d8f82735988c01d0154ef36f2 100644
--- a/tensorflow/contrib/lite/kernels/local_response_norm_test.cc
+++ b/tensorflow/contrib/lite/kernels/local_response_norm_test.cc
@@ -95,7 +95,7 @@ TEST(LocalResponseNormOpTest, SmallRadius) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/lsh_projection_test.cc b/tensorflow/contrib/lite/kernels/lsh_projection_test.cc
index 1011927848d586c8541fb694914b5eee123cb8dc..414d728dfc153058ec878d3c766f58e86815cd3f 100644
--- a/tensorflow/contrib/lite/kernels/lsh_projection_test.cc
+++ b/tensorflow/contrib/lite/kernels/lsh_projection_test.cc
@@ -117,7 +117,7 @@ TEST(LSHProjectionOpTest2, Sparse3DInputs) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/lstm_test.cc b/tensorflow/contrib/lite/kernels/lstm_test.cc
index be4c7ddbf88fc902368cda13aff72f5aecb9dac4..c068286b0d84bcb51ebb0e239350a42863de6523 100644
--- a/tensorflow/contrib/lite/kernels/lstm_test.cc
+++ b/tensorflow/contrib/lite/kernels/lstm_test.cc
@@ -1081,8 +1081,7 @@ TEST(LSTMOpTest, BlackBoxTestWithPeepholeWithProjectionNoClipping) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
-  tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/mean.cc b/tensorflow/contrib/lite/kernels/mean.cc
new file mode 100644
index 0000000000000000000000000000000000000000..540e5a364dd60a42c316199d0ebe878ae07e6756
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/mean.cc
@@ -0,0 +1,200 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <string.h>
+#include <vector>
+#include "tensorflow/contrib/lite/builtin_op_data.h"
+#include "tensorflow/contrib/lite/context.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor.h"
+#include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/kernels/op_macros.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace mean {
+
+// This file has reference implementation of Mean.
+enum KernelType {
+  kReference,
+};
+
+struct MeanContext {
+  MeanContext(TfLiteContext* context, TfLiteNode* node) {
+    params = reinterpret_cast<TfLiteMeanParams*>(node->builtin_data);
+    input = GetInput(context, node, 0);
+    output = GetOutput(context, node, 0);
+  }
+  TfLiteMeanParams* params;
+  TfLiteTensor* input;
+  TfLiteTensor* output;
+};
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  // Creates two temp tensors to store index and axis for internal
+  // implementation only.
+  auto* scratch_tensor_index = new int;
+  context->AddTensors(context, 2, scratch_tensor_index);
+  return scratch_tensor_index;
+}
+
+void Free(TfLiteContext* context, void* buffer) {
+  delete reinterpret_cast<int*>(buffer);
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE(context, NumInputs(node) == 1 || NumInputs(node) == 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  MeanContext op_context(context, node);
+  int input_num_dims = NumDimensions(op_context.input);
+  int axis_num_dims = op_context.params->num_axis_dimensions;
+
+  // Creates a temp index to iterate through input data.
+  int* scratch_tensor_index = reinterpret_cast<int*>(node->user_data);
+  TfLiteIntArrayFree(node->temporaries);
+  node->temporaries = TfLiteIntArrayCreate(2);
+  node->temporaries->data[0] = *scratch_tensor_index;
+  TfLiteTensor* scratch_tensor = &context->tensors[node->temporaries->data[0]];
+  scratch_tensor->type = kTfLiteInt32;
+  scratch_tensor->allocation_type = kTfLiteArenaRw;
+  TfLiteIntArray* index_size = TfLiteIntArrayCreate(1);
+  index_size->data[0] = input_num_dims;
+  TF_LITE_ENSURE_OK(context,
+                    context->ResizeTensor(context, scratch_tensor, index_size));
+
+  // Creates a temp tensor to store resolved axis given input data.
+  node->temporaries->data[1] = *scratch_tensor_index + 1;
+  TfLiteTensor* axis_tensor = &context->tensors[node->temporaries->data[1]];
+  axis_tensor->type = kTfLiteInt32;
+  axis_tensor->allocation_type = kTfLiteArenaRw;
+  TfLiteIntArray* axis_size = TfLiteIntArrayCreate(1);
+  axis_size->data[0] = op_context.params->num_axis_dimensions;
+  TF_LITE_ENSURE_OK(context,
+                    context->ResizeTensor(context, axis_tensor, axis_size));
+
+  // Determines size of output tensor.
+  const TfLiteIntArray* input_dims = op_context.input->dims;
+  const int* axis = op_context.params->axis;
+  if (op_context.params->keep_dims) {
+    TfLiteIntArray* output_dims = TfLiteIntArrayCreate(input_num_dims);
+    for (int idx = 0; idx < input_num_dims; ++idx) {
+      bool is_axis = false;
+      for (int axis_idx = 0; axis_idx < axis_num_dims; ++axis_idx) {
+        if (axis[axis_idx] == idx || axis[axis_idx] + input_num_dims == idx) {
+          is_axis = true;
+          break;
+        }
+      }
+      if (is_axis) {
+        output_dims->data[idx] = 1;
+      } else {
+        output_dims->data[idx] = input_dims->data[idx];
+      }
+    }
+    return context->ResizeTensor(context, op_context.output, output_dims);
+  } else {
+    // Calculates size of reducing axis.
+    int num_reduce_axis = axis_num_dims;
+    for (int i = 0; i < axis_num_dims; ++i) {
+      int current = axis[i];
+      if (current < 0) {
+        current += input_num_dims;
+      }
+      TF_LITE_ENSURE(context, current >= 0 && current < input_num_dims);
+      for (int j = 0; j < i; ++j) {
+        int previous = axis[j];
+        if (previous < 0) {
+          previous += input_num_dims;
+        }
+        if (current == previous) {
+          --num_reduce_axis;
+          break;
+        }
+      }
+    }
+    // Determines output dimensions.
+    TfLiteIntArray* output_dims =
+        TfLiteIntArrayCreate(input_num_dims - num_reduce_axis);
+    int num_skip_axis = 0;
+    for (int idx = 0; idx < input_num_dims; ++idx) {
+      bool is_axis = false;
+      for (int axis_idx = 0; axis_idx < axis_num_dims; ++axis_idx) {
+        if (axis[axis_idx] == idx || axis[axis_idx] + input_num_dims == idx) {
+          ++num_skip_axis;
+          is_axis = true;
+          break;
+        }
+      }
+      if (!is_axis) {
+        output_dims->data[idx - num_skip_axis] = input_dims->data[idx];
+      }
+    }
+    return context->ResizeTensor(context, op_context.output, output_dims);
+  }
+}
+
+template <KernelType kernel_type>
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  MeanContext op_context(context, node);
+  TfLiteTensor* temp_index = &context->tensors[node->temporaries->data[0]];
+  TfLiteTensor* resolved_axis = &context->tensors[node->temporaries->data[1]];
+
+#define TF_LITE_MEAN(kernel_type, data_type)                           \
+  kernel_type::Mean<>(                                                 \
+      GetTensorData<data_type>(op_context.input),                      \
+      op_context.input->dims->data, op_context.input->dims->size,      \
+      GetTensorData<data_type>(op_context.output),                     \
+      op_context.output->dims->data, op_context.output->dims->size,    \
+      op_context.params->axis, op_context.params->num_axis_dimensions, \
+      op_context.params->keep_dims, GetTensorData<int>(temp_index),    \
+      GetTensorData<int>(resolved_axis))
+
+  if (kernel_type == kReference) {
+    switch (op_context.input->type) {
+      case kTfLiteFloat32:
+        TF_LITE_MEAN(reference_ops, float);
+        break;
+      case kTfLiteInt32:
+        TF_LITE_MEAN(reference_ops, int);
+        break;
+      case kTfLiteUInt8:
+        TF_LITE_MEAN(reference_ops, uint8_t);
+        break;
+      case kTfLiteInt64:
+        TF_LITE_MEAN(reference_ops, int64_t);
+        break;
+      default:
+        return kTfLiteError;
+    }
+  }
+#undef TF_LITE_MEAN
+  return kTfLiteOk;
+}
+
+}  // namespace mean
+
+TfLiteRegistration* Register_MEAN_REF() {
+  static TfLiteRegistration r = {mean::Init, mean::Free, mean::Prepare,
+                                 mean::Eval<mean::kReference>};
+  return &r;
+}
+
+// TODO(kanlig): add optimized implementation of Mean.
+TfLiteRegistration* Register_MEAN() { return Register_MEAN_REF(); }
+
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/mean_test.cc b/tensorflow/contrib/lite/kernels/mean_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..4305c0632f5a52b858a056109187ad4a0cc2e46e
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/mean_test.cc
@@ -0,0 +1,90 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/interpreter.h"
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/kernels/test_util.h"
+#include "tensorflow/contrib/lite/model.h"
+
+namespace tflite {
+namespace {
+
+using ::testing::ElementsAreArray;
+
+class BaseMeanOpModel : public SingleOpModel {
+ public:
+  BaseMeanOpModel(const TensorData& input, const TensorData& output,
+                  std::initializer_list<int> axis, bool keep_dims) {
+    input_ = AddInput(input);
+    output_ = AddOutput(output);
+    SetBuiltinOp(
+        BuiltinOperator_MEAN, BuiltinOptions_MeanOptions,
+        CreateMeanOptions(builder_, builder_.CreateVector<int>(axis), keep_dims)
+            .Union());
+    BuildInterpreter({GetShape(input_)});
+  }
+
+  int input() { return input_; }
+
+ protected:
+  int input_;
+  int output_;
+};
+
+class FloatMeanOpModel : public BaseMeanOpModel {
+ public:
+  using BaseMeanOpModel::BaseMeanOpModel;
+
+  void SetInput(std::initializer_list<float> data) {
+    PopulateTensor(input_, data);
+  }
+
+  std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
+  std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
+};
+
+TEST(FloatMeanOpTest, NotKeepDims) {
+  std::initializer_list<float> data = {
+      1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,  9.0,  10.0, 11.0, 12.0,
+      13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+  FloatMeanOpModel m({TensorType_FLOAT32, {4, 3, 2}}, {TensorType_FLOAT32, {2}},
+                     {1, 0, -3, -3}, false);
+  m.SetInput(data);
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2}));
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({12, 13})));
+}
+
+TEST(FloatMeanOpTest, KeepDims) {
+  std::initializer_list<float> data = {
+      1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,  9.0,  10.0, 11.0, 12.0,
+      13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+  FloatMeanOpModel m({TensorType_FLOAT32, {4, 3, 2}}, {TensorType_FLOAT32, {3}},
+                     {0, 2}, true);
+  m.SetInput(data);
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 3, 1}));
+  EXPECT_THAT(m.GetOutput(),
+              ElementsAreArray(ArrayFloatNear({10.5, 12.5, 14.5})));
+}
+
+}  // namespace
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  ::tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tensorflow/contrib/lite/kernels/mul_test.cc b/tensorflow/contrib/lite/kernels/mul_test.cc
index 4b858e1f396252e7f7bdc231bc1e00f47277f08a..8838b300c0af167bf2ffcf944fc7c31d6173f462 100644
--- a/tensorflow/contrib/lite/kernels/mul_test.cc
+++ b/tensorflow/contrib/lite/kernels/mul_test.cc
@@ -78,9 +78,10 @@ TEST(FloatMulOpTest, NoActivation) {
               ElementsAreArray(ArrayFloatNear({-0.2, 0.04, 0.21, 0.4})));
 }
 
-TEST(FloatMulOpTest, ActivationRELU1) {
+TEST(FloatMulOpTest, ActivationRELU_N1_TO_1) {
   FloatMulOpModel m({TensorType_FLOAT32, {1, 2, 2, 1}},
-                    {TensorType_FLOAT32, {}}, ActivationFunctionType_RELU1);
+                    {TensorType_FLOAT32, {}},
+                    ActivationFunctionType_RELU_N1_TO_1);
   m.PopulateTensor<float>(m.input1(), {-2.0, 0.2, 0.7, 0.8});
   m.PopulateTensor<float>(m.input2(), {0.1, 0.2, 0.3, 5});
   m.Invoke();
@@ -120,8 +121,7 @@ TEST(QuantizedMulOpTest, NoActivation) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
-  tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/op_macros.h b/tensorflow/contrib/lite/kernels/op_macros.h
index 7535afaf8ea52d855e2e4773e56ce2118a16447c..63670efcb1e6349317aa5c75756707fb7a7fa2aa 100644
--- a/tensorflow/contrib/lite/kernels/op_macros.h
+++ b/tensorflow/contrib/lite/kernels/op_macros.h
@@ -15,6 +15,8 @@ limitations under the License.
 #ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_OP_UTIL_H_
 #define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_OP_UTIL_H_
 
+#include <cstdio>
+
 #define TF_LITE_FATAL(msg)          \
   do {                              \
     fprintf(stderr, "%s\n", (msg)); \
diff --git a/tensorflow/contrib/lite/kernels/optional_tensor_test.cc b/tensorflow/contrib/lite/kernels/optional_tensor_test.cc
index 8e9cc07656c8bea83f7cb78ca0b6cc5de7ad1b73..17166715ca30ff3d8ba3d384110e403f8910e39d 100644
--- a/tensorflow/contrib/lite/kernels/optional_tensor_test.cc
+++ b/tensorflow/contrib/lite/kernels/optional_tensor_test.cc
@@ -334,8 +334,7 @@ TEST(LSTMOpTest, BlackBoxTestWithCifgWithPeepholeNoProjectionNoClipping) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
-  tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/pad.cc b/tensorflow/contrib/lite/kernels/pad.cc
new file mode 100644
index 0000000000000000000000000000000000000000..1a0d9d1505d41fb7948863f9da9e2a4f1b61e4f9
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/pad.cc
@@ -0,0 +1,159 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <string.h>
+#include <vector>
+#include "tensorflow/contrib/lite/builtin_op_data.h"
+#include "tensorflow/contrib/lite/context.h"
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor.h"
+#include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/kernels/op_macros.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace pad {
+
+// This file has two implementations of Pad.
+enum KernelType {
+  kReference,
+  kGenericOptimized,
+};
+
+// TODO(nupurgarg): Padding represented as a tensor is ignored. Only use the
+// `left_padding` and `right_padding` specified in `params`.
+struct PadContext {
+  PadContext(TfLiteContext* context, TfLiteNode* node) {
+    params = reinterpret_cast<TfLitePadParams*>(node->builtin_data);
+    input = GetInput(context, node, 0);
+    output = GetOutput(context, node, 0);
+  }
+  TfLitePadParams* params;
+  TfLiteTensor* input;
+  TfLiteTensor* output;
+};
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE(context, NumInputs(node) == 1 || NumInputs(node) == 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  // Determines size of output tensor.
+  PadContext op_context(context, node);
+  int dims = NumDimensions(op_context.input);
+  TF_LITE_ENSURE_EQ(context, dims, op_context.params->num_dimensions);
+  TF_LITE_ENSURE_EQ(context, op_context.input->type, op_context.output->type);
+
+  // TODO(nupurgarg): Our current implementations rely on the inputs being 4D.
+  TF_LITE_ENSURE_EQ(context, dims, 4);
+
+  const TfLiteIntArray* input_size = op_context.input->dims;
+  TfLiteIntArray* output_size = TfLiteIntArrayCreate(dims);
+  for (int idx = 0; idx < dims; ++idx) {
+    TF_LITE_ENSURE_MSG(context,
+                       (op_context.params->before_padding[idx] >= 0 &&
+                        op_context.params->after_padding[idx] >= 0),
+                       "Pad value has to be greater than equal to 0.");
+    output_size->data[idx] =
+        (input_size->data[idx] + op_context.params->before_padding[idx] +
+         op_context.params->after_padding[idx]);
+  }
+
+  return context->ResizeTensor(context, op_context.output, output_size);
+}
+
+template <KernelType kernel_type>
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  PadContext op_context(context, node);
+
+  std::vector<int> before_padding(
+      op_context.params->before_padding,
+      op_context.params->before_padding + op_context.params->num_dimensions);
+  std::vector<int> after_padding(
+      op_context.params->after_padding,
+      op_context.params->after_padding + op_context.params->num_dimensions);
+
+  // TODO(nupurgarg): Change TOCO's implementation to use padding arrays
+  // in forward order (depth, width, height, batch).
+  // Converts from int[] = {depth, width, height, batch} to int[] = {batch,
+  // height, width, depth} to match TOCO's implementation of pad in
+  // referenced_ops.h and optimized_ops.h.
+  std::reverse(before_padding.begin(), before_padding.end());
+  std::reverse(after_padding.begin(), after_padding.end());
+
+#define TF_LITE_PAD(type, scalar)                                           \
+  type::Pad(GetTensorData<scalar>(op_context.input),                        \
+            GetTensorDims(op_context.input), before_padding, after_padding, \
+            GetTensorData<scalar>(op_context.output),                       \
+            GetTensorDims(op_context.output))
+
+  switch (op_context.input->type) {
+    case kTfLiteFloat32:
+      if (kernel_type == kReference) {
+        TF_LITE_PAD(reference_ops, float);
+      } else if (kernel_type == kGenericOptimized) {
+        TF_LITE_PAD(optimized_ops, float);
+      }
+      break;
+    case kTfLiteUInt8:
+      if (kernel_type == kReference) {
+        TF_LITE_PAD(reference_ops, uint8_t);
+      } else if (kernel_type == kGenericOptimized) {
+        TF_LITE_PAD(optimized_ops, uint8_t);
+      }
+      break;
+    case kTfLiteInt32:
+      if (kernel_type == kReference) {
+        TF_LITE_PAD(reference_ops, int32_t);
+      } else if (kernel_type == kGenericOptimized) {
+        TF_LITE_PAD(optimized_ops, int32_t);
+      }
+      break;
+    case kTfLiteInt64:
+      if (kernel_type == kReference) {
+        TF_LITE_PAD(reference_ops, int64_t);
+      } else if (kernel_type == kGenericOptimized) {
+        TF_LITE_PAD(optimized_ops, int64_t);
+      }
+      break;
+    default:
+      context->ReportError(context, "Type is currently not supported by Pad.");
+      return kTfLiteError;
+  }
+#undef TF_LITE_PAD
+  return kTfLiteOk;
+}
+
+}  // namespace pad
+
+TfLiteRegistration* Register_PAD_REF() {
+  static TfLiteRegistration r = {nullptr, nullptr, pad::Prepare,
+                                 pad::Eval<pad::kReference>};
+  return &r;
+}
+
+TfLiteRegistration* Register_PAD_GENERIC_OPT() {
+  static TfLiteRegistration r = {nullptr, nullptr, pad::Prepare,
+                                 pad::Eval<pad::kGenericOptimized>};
+  return &r;
+}
+
+TfLiteRegistration* Register_PAD() {
+  return Register_PAD_GENERIC_OPT();
+}
+
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/pad_test.cc b/tensorflow/contrib/lite/kernels/pad_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f3ea9417df0e61dcff7a877726ab91c9b22691ba
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/pad_test.cc
@@ -0,0 +1,99 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/interpreter.h"
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/kernels/test_util.h"
+#include "tensorflow/contrib/lite/model.h"
+
+namespace tflite {
+namespace {
+
+using ::testing::ElementsAreArray;
+
+class PadOpModel : public SingleOpModel {
+ public:
+  PadOpModel(std::initializer_list<int> input_shape,
+             std::initializer_list<int> before_padding,
+             std::initializer_list<int> after_padding) {
+    input_ = AddInput(TensorType_FLOAT32);
+    output_ = AddOutput(TensorType_FLOAT32);
+    SetBuiltinOp(
+        BuiltinOperator_PAD, BuiltinOptions_PadOptions,
+        CreatePadOptions(builder_, builder_.CreateVector<int>(before_padding),
+                         builder_.CreateVector<int>(after_padding))
+            .Union());
+    BuildInterpreter({input_shape});
+  }
+
+  void SetInput(std::initializer_list<float> data) {
+    PopulateTensor<float>(input_, data);
+  }
+
+  std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
+  std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
+
+ private:
+  int input_;
+  int output_;
+};
+
+TEST(PadOpTest, TooManyDimensions) {
+  EXPECT_DEATH(
+      PadOpModel({1, 2, 3, 4, 5, 6, 7, 8, 9}, {1, 2, 3, 4, 5, 6, 7, 8, 9},
+                 {1, 2, 3, 4, 5, 6, 7, 8, 9}),
+      "dims != 4");
+}
+
+// TODO(nupurgarg): Test case where before padding and after padding arrays
+// don't contain the same number of dimensions.
+TEST(PadOpTest, UnequalDimensions) {
+  EXPECT_DEATH(PadOpModel({1, 1, 2, 1}, {1, 2, 3}, {1, 2, 3}),
+               "dims != op_context.params->num_dimensions");
+}
+
+TEST(PadOpTest, InvalidPadValue) {
+  EXPECT_DEATH(PadOpModel({1, 1, 2, 1}, {0, 1, 2, 0}, {0, -1, -1, 0}),
+               "Pad value has to be greater than equal to 0.");
+}
+
+TEST(PadOpTest, SimpleTest) {
+  PadOpModel m({1, 2, 2, 1}, {0, 1, 1, 0}, {0, 1, 1, 0});
+  m.SetInput({1, 2, 3, 4});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4,
+                                               0, 0, 0, 0, 0}));
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 4, 1}));
+}
+
+TEST(PadOpTest, AdvancedTest) {
+  // The padding is input in the order of batch, height, width, depth.
+  PadOpModel m({1, 2, 3, 1}, {0, 0, 1, 0}, {0, 2, 3, 0});
+  m.SetInput({1, 2, 3, 4, 5, 6});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput(),
+              ElementsAreArray({0, 1, 2, 3, 0, 0, 0, 0, 4, 5, 6, 0, 0, 0,
+                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}));
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 7, 1}));
+}
+
+}  // namespace
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  ::tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tensorflow/contrib/lite/kernels/pooling_test.cc b/tensorflow/contrib/lite/kernels/pooling_test.cc
index e1b51ec7d5141bf2a41e7ede3e90ff20ec523819..01c91b2ba905e249c36af19f175c68a7e7f17f6d 100644
--- a/tensorflow/contrib/lite/kernels/pooling_test.cc
+++ b/tensorflow/contrib/lite/kernels/pooling_test.cc
@@ -155,7 +155,7 @@ TEST(FloatPoolingOpTest, L2Pool) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc
index ca7a0dd1949a3a31d26be770a7df781cc5fe7533..45ad5f18903927ff8f2743e96c167cfcb11bdcca 100644
--- a/tensorflow/contrib/lite/kernels/register.cc
+++ b/tensorflow/contrib/lite/kernels/register.cc
@@ -20,7 +20,7 @@ namespace ops {
 namespace builtin {
 
 TfLiteRegistration* Register_RELU();
-TfLiteRegistration* Register_RELU1();
+TfLiteRegistration* Register_RELU_N1_TO_1();
 TfLiteRegistration* Register_RELU6();
 TfLiteRegistration* Register_TANH();
 TfLiteRegistration* Register_LOGISTIC();
@@ -31,6 +31,7 @@ TfLiteRegistration* Register_CONV_2D();
 TfLiteRegistration* Register_DEPTHWISE_CONV_2D();
 TfLiteRegistration* Register_SVDF();
 TfLiteRegistration* Register_RNN();
+TfLiteRegistration* Register_UNIDIRECTIONAL_SEQUENCE_RNN();
 TfLiteRegistration* Register_EMBEDDING_LOOKUP();
 TfLiteRegistration* Register_EMBEDDING_LOOKUP_SPARSE();
 TfLiteRegistration* Register_FULLY_CONNECTED();
@@ -39,18 +40,27 @@ TfLiteRegistration* Register_HASHTABLE_LOOKUP();
 TfLiteRegistration* Register_SOFTMAX();
 TfLiteRegistration* Register_CONCATENATION();
 TfLiteRegistration* Register_ADD();
+TfLiteRegistration* Register_SPACE_TO_BATCH_ND();
+TfLiteRegistration* Register_DIV();
+TfLiteRegistration* Register_SUB();
+TfLiteRegistration* Register_BATCH_TO_SPACE_ND();
 TfLiteRegistration* Register_MUL();
 TfLiteRegistration* Register_L2_NORMALIZATION();
 TfLiteRegistration* Register_LOCAL_RESPONSE_NORMALIZATION();
 TfLiteRegistration* Register_LSTM();
+TfLiteRegistration* Register_PAD();
 TfLiteRegistration* Register_RESHAPE();
 TfLiteRegistration* Register_RESIZE_BILINEAR();
 TfLiteRegistration* Register_SKIP_GRAM();
 TfLiteRegistration* Register_SPACE_TO_DEPTH();
+TfLiteRegistration* Register_GATHER();
+TfLiteRegistration* Register_TRANSPOSE();
+TfLiteRegistration* Register_MEAN();
+TfLiteRegistration* Register_SQUEEZE();
 
 BuiltinOpResolver::BuiltinOpResolver() {
   AddBuiltin(BuiltinOperator_RELU, Register_RELU());
-  AddBuiltin(BuiltinOperator_RELU1, Register_RELU1());
+  AddBuiltin(BuiltinOperator_RELU_N1_TO_1, Register_RELU_N1_TO_1());
   AddBuiltin(BuiltinOperator_RELU6, Register_RELU6());
   AddBuiltin(BuiltinOperator_TANH, Register_TANH());
   AddBuiltin(BuiltinOperator_LOGISTIC, Register_LOGISTIC());
@@ -61,6 +71,8 @@ BuiltinOpResolver::BuiltinOpResolver() {
   AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D, Register_DEPTHWISE_CONV_2D());
   AddBuiltin(BuiltinOperator_SVDF, Register_SVDF());
   AddBuiltin(BuiltinOperator_RNN, Register_RNN());
+  AddBuiltin(BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN,
+             Register_UNIDIRECTIONAL_SEQUENCE_RNN());
   AddBuiltin(BuiltinOperator_EMBEDDING_LOOKUP, Register_EMBEDDING_LOOKUP());
   AddBuiltin(BuiltinOperator_EMBEDDING_LOOKUP_SPARSE,
              Register_EMBEDDING_LOOKUP_SPARSE());
@@ -70,15 +82,24 @@ BuiltinOpResolver::BuiltinOpResolver() {
   AddBuiltin(BuiltinOperator_SOFTMAX, Register_SOFTMAX());
   AddBuiltin(BuiltinOperator_CONCATENATION, Register_CONCATENATION());
   AddBuiltin(BuiltinOperator_ADD, Register_ADD());
+  AddBuiltin(BuiltinOperator_SPACE_TO_BATCH_ND, Register_SPACE_TO_BATCH_ND());
+  AddBuiltin(BuiltinOperator_BATCH_TO_SPACE_ND, Register_BATCH_TO_SPACE_ND());
   AddBuiltin(BuiltinOperator_MUL, Register_MUL());
   AddBuiltin(BuiltinOperator_L2_NORMALIZATION, Register_L2_NORMALIZATION());
   AddBuiltin(BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION,
              Register_LOCAL_RESPONSE_NORMALIZATION());
   AddBuiltin(BuiltinOperator_LSTM, Register_LSTM());
+  AddBuiltin(BuiltinOperator_PAD, Register_PAD());
   AddBuiltin(BuiltinOperator_RESHAPE, Register_RESHAPE());
   AddBuiltin(BuiltinOperator_RESIZE_BILINEAR, Register_RESIZE_BILINEAR());
   AddBuiltin(BuiltinOperator_SKIP_GRAM, Register_SKIP_GRAM());
   AddBuiltin(BuiltinOperator_SPACE_TO_DEPTH, Register_SPACE_TO_DEPTH());
+  AddBuiltin(BuiltinOperator_GATHER, Register_GATHER());
+  AddBuiltin(BuiltinOperator_TRANSPOSE, Register_TRANSPOSE());
+  AddBuiltin(BuiltinOperator_MEAN, Register_MEAN());
+  AddBuiltin(BuiltinOperator_DIV, Register_DIV());
+  AddBuiltin(BuiltinOperator_SUB, Register_SUB());
+  AddBuiltin(BuiltinOperator_SQUEEZE, Register_SQUEEZE());
 }
 
 TfLiteRegistration* BuiltinOpResolver::FindOp(
diff --git a/tensorflow/contrib/lite/kernels/reshape_test.cc b/tensorflow/contrib/lite/kernels/reshape_test.cc
index 59ce7d5648c04f78123b16a195d3a4928d28394b..0fbcf6e6aa311d2cac491336ee54ccf58bbda8fd 100644
--- a/tensorflow/contrib/lite/kernels/reshape_test.cc
+++ b/tensorflow/contrib/lite/kernels/reshape_test.cc
@@ -83,8 +83,7 @@ TEST(ReshapeOpTest, WithStretchDimension) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
-  tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/resize_bilinear_test.cc b/tensorflow/contrib/lite/kernels/resize_bilinear_test.cc
index 0257c0b557feb352413bcc33cb4e2ecdb32c5111..314a71e210d9b5ea75bb137ef228273ef48f28b5 100644
--- a/tensorflow/contrib/lite/kernels/resize_bilinear_test.cc
+++ b/tensorflow/contrib/lite/kernels/resize_bilinear_test.cc
@@ -111,7 +111,7 @@ TEST(ResizeBilinearOpTest, ThreeDimensionalResize) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/skip_gram_test.cc b/tensorflow/contrib/lite/kernels/skip_gram_test.cc
index e7f6bc904be5e4c23a88f5b4ae7e199346c78ab2..185b64cb44969b57588ea5d0b40f55b6ddf8e11f 100644
--- a/tensorflow/contrib/lite/kernels/skip_gram_test.cc
+++ b/tensorflow/contrib/lite/kernels/skip_gram_test.cc
@@ -251,7 +251,7 @@ TEST(SkipGramTest, TestInputWithExtraSpace) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/softmax_test.cc b/tensorflow/contrib/lite/kernels/softmax_test.cc
index ec8ec03b0d0279cad8543352b1dbaf34c88a7957..6c5338ff0fd26337c9adc8e0b94a0a88edfde37f 100644
--- a/tensorflow/contrib/lite/kernels/softmax_test.cc
+++ b/tensorflow/contrib/lite/kernels/softmax_test.cc
@@ -136,8 +136,7 @@ TEST(SoftmaxOpTest, CompareWithTFminiBetaNotEq1) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
-  tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/space_to_batch_nd.cc b/tensorflow/contrib/lite/kernels/space_to_batch_nd.cc
new file mode 100644
index 0000000000000000000000000000000000000000..2e22d0db56a233bf554c57cf86275832ce941a18
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/space_to_batch_nd.cc
@@ -0,0 +1,182 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <string.h>
+#include <vector>
+#include "tensorflow/contrib/lite/builtin_op_data.h"
+#include "tensorflow/contrib/lite/context.h"
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor.h"
+#include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/kernels/op_macros.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace space_to_batch_nd {
+
+// This file has two implementations of SpaceToBatchND.
+enum KernelType {
+  kReference,
+  kGenericOptimized,
+};
+
+// Inputs specified in the 2nd tensor (block_shape) and 3rd tensor (paddings)
+// are ignored. Only use the `block_shape` and `paddings` specified in params.
+// TODO(nupurgarg): Support inputs as tensors in SpaceToBatchND.
+struct SpaceToBatchNDContext {
+  SpaceToBatchNDContext(TfLiteContext* context, TfLiteNode* node) {
+    params = reinterpret_cast<TfLiteSpaceToBatchNDParams*>(node->builtin_data);
+    input = GetInput(context, node, 0);
+    output = GetOutput(context, node, 0);
+  }
+  TfLiteSpaceToBatchNDParams* params;
+  TfLiteTensor* input;
+  TfLiteTensor* output;
+};
+
+// Currently, only 4D NHWC input/output op_context are supported.
+// The 4D array need to have exactly 2 spatial dimensions.
+// TODO(nupurgarg): Support arbitrary dimension in SpaceToBatchND.
+const int kInputDimensionNum = 4;
+const int kOutputDimensionNum = 4;
+const int kSpatialDimensionNum = 2;
+const int kPaddingDimensionNum = 4;
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE(context, NumInputs(node) >= 1 && NumInputs(node) <= 3);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  SpaceToBatchNDContext op_context(context, node);
+  TF_LITE_ENSURE_EQ(context, NumDimensions(op_context.input),
+                    kInputDimensionNum);
+  TF_LITE_ENSURE_EQ(context, op_context.params->num_spatial_dimensions,
+                    kSpatialDimensionNum);
+  TF_LITE_ENSURE_EQ(context, op_context.input->type, op_context.output->type);
+
+  const TfLiteIntArray* input_size = op_context.input->dims;
+  const int* block_shape = op_context.params->block_shape;
+
+  TfLiteIntArray* output_size = TfLiteIntArrayCreate(kOutputDimensionNum);
+
+  // Ensures the input height and width (with padding) is a multiple of block
+  // shape height and width.
+  for (int dim = 0; dim < kSpatialDimensionNum; ++dim) {
+    int final_dim_size =
+        (input_size->data[dim + 1] + op_context.params->before_paddings[dim] +
+         op_context.params->after_paddings[dim]);
+    TF_LITE_ENSURE_EQ(context, final_dim_size % block_shape[dim], 0);
+    output_size->data[dim + 1] = final_dim_size / block_shape[dim];
+  }
+
+  const int output_batch_size =
+      input_size->data[0] * block_shape[0] * block_shape[1];
+  const int output_channel_size = input_size->data[3];
+
+  output_size->data[0] = output_batch_size;
+  output_size->data[3] = output_channel_size;
+
+  return context->ResizeTensor(context, op_context.output, output_size);
+}
+
+template <KernelType kernel_type>
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  SpaceToBatchNDContext op_context(context, node);
+
+  int block_shape_dims_array[1] = {kSpatialDimensionNum};
+  Dims<4> block_shape_dims = GetTensorDims(block_shape_dims_array, 1);
+
+  // Initialize padding array in the format accepted by the kernel code.
+  // TODO(nupurgarg): Make kernel code accept padding array format that is
+  // consistent with Pad operation (i.e. before_paddings and after_paddings).
+  TfLiteIntArray* padding_data = TfLiteIntArrayCreate(kPaddingDimensionNum);
+  padding_data->data[0] = op_context.params->before_paddings[0];
+  padding_data->data[1] = op_context.params->after_paddings[0];
+  padding_data->data[2] = op_context.params->before_paddings[1];
+  padding_data->data[3] = op_context.params->after_paddings[1];
+  int padding_dims_array[1] = {kPaddingDimensionNum};
+  Dims<4> padding_dims = GetTensorDims(padding_dims_array, 1);
+
+#define TF_LITE_SPACE_TO_BATCH_ND(type, scalar)                          \
+  type::SpaceToBatchND(GetTensorData<scalar>(op_context.input),          \
+                       GetTensorDims(op_context.input),                  \
+                       op_context.params->block_shape, block_shape_dims, \
+                       padding_data->data, padding_dims,                 \
+                       GetTensorData<scalar>(op_context.output),         \
+                       GetTensorDims(op_context.output))
+  switch (op_context.input->type) {  // Already know in/out types are same.
+    case kTfLiteFloat32:
+      if (kernel_type == kReference) {
+        TF_LITE_SPACE_TO_BATCH_ND(reference_ops, float);
+      } else {
+        TF_LITE_SPACE_TO_BATCH_ND(optimized_ops, float);
+      }
+      break;
+    case kTfLiteUInt8:
+      if (kernel_type == kReference) {
+        TF_LITE_SPACE_TO_BATCH_ND(reference_ops, uint8_t);
+      } else {
+        TF_LITE_SPACE_TO_BATCH_ND(optimized_ops, uint8_t);
+      }
+      break;
+    case kTfLiteInt32:
+      if (kernel_type == kReference) {
+        TF_LITE_SPACE_TO_BATCH_ND(reference_ops, int32_t);
+      } else {
+        TF_LITE_SPACE_TO_BATCH_ND(optimized_ops, int32_t);
+      }
+      break;
+    case kTfLiteInt64:
+      if (kernel_type == kReference) {
+        TF_LITE_SPACE_TO_BATCH_ND(reference_ops, int64_t);
+      } else {
+        TF_LITE_SPACE_TO_BATCH_ND(optimized_ops, int64_t);
+      }
+      break;
+    default:
+      context->ReportError(context,
+                           "Type is currently not supported by SpaceToBatch.");
+      return kTfLiteError;
+  }
+#undef TF_LITE_SPACE_TO_BATCH_ND
+
+  TfLiteIntArrayFree(padding_data);
+  return kTfLiteOk;
+}
+
+}  // namespace space_to_batch_nd
+
+TfLiteRegistration* Register_SPACE_TO_BATCH_ND_REF() {
+  static TfLiteRegistration r = {
+      nullptr, nullptr, space_to_batch_nd::Prepare,
+      space_to_batch_nd::Eval<space_to_batch_nd::kReference>};
+  return &r;
+}
+
+TfLiteRegistration* Register_SPACE_TO_BATCH_ND_GENERIC_OPT() {
+  static TfLiteRegistration r = {
+      nullptr, nullptr, space_to_batch_nd::Prepare,
+      space_to_batch_nd::Eval<space_to_batch_nd::kGenericOptimized>};
+  return &r;
+}
+
+TfLiteRegistration* Register_SPACE_TO_BATCH_ND() {
+  // return Register_SPACE_TO_BATCH_ND_REF();
+  return Register_SPACE_TO_BATCH_ND_GENERIC_OPT();
+}
+
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/space_to_batch_nd_test.cc b/tensorflow/contrib/lite/kernels/space_to_batch_nd_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..45a6aef73d05b57a7f9a7fc6f58c3971c6e03118
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/space_to_batch_nd_test.cc
@@ -0,0 +1,110 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/interpreter.h"
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/kernels/test_util.h"
+#include "tensorflow/contrib/lite/model.h"
+
+namespace tflite {
+namespace {
+
+using ::testing::ElementsAreArray;
+
+class SpaceToBatchNDOpModel : public SingleOpModel {
+ public:
+  SpaceToBatchNDOpModel(std::initializer_list<int> input_shape,
+                        std::initializer_list<int> block_shape,
+                        std::initializer_list<int> before_paddings,
+                        std::initializer_list<int> after_paddings) {
+    input_ = AddInput(TensorType_FLOAT32);
+    output_ = AddOutput(TensorType_FLOAT32);
+    SetBuiltinOp(BuiltinOperator_SPACE_TO_BATCH_ND,
+                 BuiltinOptions_SpaceToBatchNDOptions,
+                 CreateSpaceToBatchNDOptions(
+                     builder_, builder_.CreateVector<int>(block_shape),
+                     builder_.CreateVector<int>(before_paddings),
+                     builder_.CreateVector<int>(after_paddings))
+                     .Union());
+    BuildInterpreter({input_shape});
+  }
+
+  void SetInput(std::initializer_list<float> data) {
+    PopulateTensor<float>(input_, data);
+  }
+
+  std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
+  std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
+
+ private:
+  int input_;
+  int output_;
+};
+
+TEST(SpaceToBatchNDOpTest, InvalidShapeTest) {
+  EXPECT_DEATH(SpaceToBatchNDOpModel({1, 3, 3, 1}, {2, 2}, {0, 0}, {0, 0}),
+               "Cannot allocate tensors");
+}
+
+TEST(SpaceToBatchNDOpTest, SimpleTest) {
+  SpaceToBatchNDOpModel m({1, 4, 4, 1}, {2, 2}, {0, 0}, {0, 0});
+  m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({4, 2, 2, 1}));
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({1, 3, 9, 11, 2, 4, 10, 12, 5, 7,
+                                               13, 15, 6, 8, 14, 16}));
+}
+
+TEST(SpaceToBatchNDOpTest, MultipleInputBatches) {
+  SpaceToBatchNDOpModel m({2, 2, 4, 1}, {2, 2}, {0, 0}, {0, 0});
+  m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({8, 1, 2, 1}));
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({1, 3, 9, 11, 2, 4, 10, 12, 5, 7,
+                                               13, 15, 6, 8, 14, 16}));
+}
+
+TEST(SpaceToBatchNDOpTest, SimplePadding) {
+  SpaceToBatchNDOpModel m({1, 5, 2, 1}, {3, 2}, {1, 2}, {0, 0});
+  m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({6, 2, 2, 1}));
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({
+                                 0, 0, 0, 5, 0, 0, 0, 6, 0, 1, 0, 7,
+                                 0, 2, 0, 8, 0, 3, 0, 9, 0, 4, 0, 10,
+                             }));
+}
+
+TEST(SpaceToBatchNDOpTest, ComplexPadding) {
+  SpaceToBatchNDOpModel m({1, 4, 2, 1}, {3, 2}, {1, 2}, {1, 4});
+  m.SetInput({1, 2, 3, 4, 5, 6, 7, 8});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({6, 2, 4, 1}));
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({
+                                 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0,
+                                 0, 1, 0, 0, 0, 7, 0, 0, 0, 2, 0, 0, 0, 8, 0, 0,
+                                 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0,
+                             }));
+}
+
+}  // namespace
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  ::tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tensorflow/contrib/lite/kernels/space_to_depth_test.cc b/tensorflow/contrib/lite/kernels/space_to_depth_test.cc
index 911f08a92ccd6a97bee414c87bd79091808f0ed1..997f354861a235fb511235e4d64544dc8c3ddb34 100644
--- a/tensorflow/contrib/lite/kernels/space_to_depth_test.cc
+++ b/tensorflow/contrib/lite/kernels/space_to_depth_test.cc
@@ -95,8 +95,7 @@ TEST(SpaceToDepthOpModel, Int64) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
-  tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/squeeze.cc b/tensorflow/contrib/lite/kernels/squeeze.cc
new file mode 100644
index 0000000000000000000000000000000000000000..29447ab021c7b68ff51070d35262402e08dc7ab9
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/squeeze.cc
@@ -0,0 +1,99 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <string.h>
+#include <vector>
+#include "tensorflow/contrib/lite/builtin_op_data.h"
+#include "tensorflow/contrib/lite/context.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor.h"
+#include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/kernels/op_macros.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace squeeze {
+
+struct SqueezeContext {
+  SqueezeContext(TfLiteContext* context, TfLiteNode* node) {
+    params = reinterpret_cast<TfLiteSqueezeParams*>(node->builtin_data);
+    input = GetInput(context, node, 0);
+    output = GetOutput(context, node, 0);
+  }
+  TfLiteSqueezeParams* params;
+  TfLiteTensor* input;
+  TfLiteTensor* output;
+};
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  SqueezeContext op_context(context, node);
+  int input_num_dims = NumDimensions(op_context.input);
+  int num_squeeze_dims = op_context.params->num_squeeze_dims;
+
+  // Determines number of dimensions of output tensor after squeeze.
+  const TfLiteIntArray* input_dims = op_context.input->dims;
+  const int* squeeze_dims = op_context.params->squeeze_dims;
+  TF_LITE_ENSURE(context, input_num_dims <= 8);
+  bool should_squeeze[8] = {false};
+  int num_squeezed_dims = 0;
+  if (num_squeeze_dims == 0) {
+    for (int idx = 0; idx < input_num_dims; ++idx) {
+      if (input_dims->data[idx] == 1) {
+        should_squeeze[idx] = true;
+        ++num_squeezed_dims;
+      }
+    }
+  } else {
+    for (int idx = 0; idx < num_squeeze_dims; ++idx) {
+      int current = squeeze_dims[idx] < 0 ? squeeze_dims[idx] + input_num_dims
+                                          : squeeze_dims[idx];
+      TF_LITE_ENSURE(context, current >= 0 && current < input_num_dims &&
+                                  input_dims->data[current] == 1);
+      if (!should_squeeze[current]) ++num_squeezed_dims;
+      should_squeeze[current] = true;
+    }
+  }
+  // Sets output dimensions.
+  TfLiteIntArray* output_dims =
+      TfLiteIntArrayCreate(input_num_dims - num_squeezed_dims);
+  for (int in_idx = 0, out_idx = 0; in_idx < input_num_dims; ++in_idx) {
+    if (!should_squeeze[in_idx]) {
+      output_dims->data[out_idx++] = input_dims->data[in_idx];
+    }
+  }
+  return context->ResizeTensor(context, op_context.output, output_dims);
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  SqueezeContext op_context(context, node);
+  TF_LITE_ENSURE_EQ(context, op_context.input->bytes, op_context.output->bytes);
+  memcpy(op_context.output->data.raw, op_context.input->data.raw,
+         op_context.input->bytes);
+  return kTfLiteOk;
+}
+
+}  // namespace squeeze
+
+TfLiteRegistration* Register_SQUEEZE() {
+  static TfLiteRegistration r = {nullptr, nullptr, squeeze::Prepare,
+                                 squeeze::Eval};
+  return &r;
+}
+
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/squeeze_test.cc b/tensorflow/contrib/lite/kernels/squeeze_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..409227b626afdc8cbed66a27e300b320b59023f2
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/squeeze_test.cc
@@ -0,0 +1,113 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/interpreter.h"
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/kernels/test_util.h"
+#include "tensorflow/contrib/lite/model.h"
+
+namespace tflite {
+namespace {
+
+using ::testing::ElementsAreArray;
+
+class BaseSqueezeOpModel : public SingleOpModel {
+ public:
+  BaseSqueezeOpModel(const TensorData& input, const TensorData& output,
+                     std::initializer_list<int> axis) {
+    input_ = AddInput(input);
+    output_ = AddOutput(output);
+    SetBuiltinOp(
+        BuiltinOperator_SQUEEZE, BuiltinOptions_SqueezeOptions,
+        CreateSqueezeOptions(builder_, builder_.CreateVector<int>(axis))
+            .Union());
+    BuildInterpreter({GetShape(input_)});
+  }
+
+  int input() { return input_; }
+
+ protected:
+  int input_;
+  int output_;
+};
+
+class FloatSqueezeOpModel : public BaseSqueezeOpModel {
+ public:
+  using BaseSqueezeOpModel::BaseSqueezeOpModel;
+
+  void SetInput(std::initializer_list<float> data) {
+    PopulateTensor(input_, data);
+  }
+
+  std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
+  std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
+};
+
+TEST(FloatSqueezeOpTest, SqueezeAll) {
+  std::initializer_list<float> data = {
+      1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,  9.0,  10.0, 11.0, 12.0,
+      13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+  FloatSqueezeOpModel m({TensorType_FLOAT32, {1, 24, 1}},
+                        {TensorType_FLOAT32, {24}}, {});
+  m.SetInput(data);
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({24}));
+  EXPECT_THAT(
+      m.GetOutput(),
+      ElementsAreArray({1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                        9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                        17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}));
+}
+
+TEST(FloatSqueezeOpTest, SqueezeSelectedAxis) {
+  std::initializer_list<float> data = {
+      1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,  9.0,  10.0, 11.0, 12.0,
+      13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+  FloatSqueezeOpModel m({TensorType_FLOAT32, {1, 24, 1}},
+                        {TensorType_FLOAT32, {24}}, {2});
+  m.SetInput(data);
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 24}));
+  EXPECT_THAT(
+      m.GetOutput(),
+      ElementsAreArray({1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                        9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                        17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}));
+}
+
+TEST(FloatSqueezeOpTest, SqueezeNegativeAxis) {
+  std::initializer_list<float> data = {
+      1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,  9.0,  10.0, 11.0, 12.0,
+      13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+  FloatSqueezeOpModel m({TensorType_FLOAT32, {1, 24, 1}},
+                        {TensorType_FLOAT32, {24}}, {-1, 0});
+  m.SetInput(data);
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({24}));
+  EXPECT_THAT(
+      m.GetOutput(),
+      ElementsAreArray({1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                        9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                        17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}));
+}
+
+}  // namespace
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  ::tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tensorflow/contrib/lite/kernels/sub.cc b/tensorflow/contrib/lite/kernels/sub.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ddaf498d5bac0109429224e7cf66cb3debcabc22
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/sub.cc
@@ -0,0 +1,129 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/builtin_op_data.h"
+#include "tensorflow/contrib/lite/context.h"
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor.h"
+#include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/kernels/op_macros.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace sub {
+
+// This file has three implementation of Div.
+enum KernelType {
+  kReference,
+  kGenericOptimized,  // Neon-free
+  kNeonOptimized,
+};
+
+constexpr int kInputTensor1 = 0;
+constexpr int kInputTensor2 = 1;
+constexpr int kOutputTensor = 0;
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
+  TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+
+  TF_LITE_ENSURE_EQ(context, NumDimensions(input1), NumDimensions(input2));
+  for (int i = 0; i < NumDimensions(input1); ++i) {
+    TF_LITE_ENSURE_EQ(context, SizeOfDimension(input1, i),
+                      SizeOfDimension(input2, i));
+  }
+
+  TF_LITE_ENSURE_EQ(context, input1->type, output->type);
+  TF_LITE_ENSURE_EQ(context, input2->type, output->type);
+
+  TfLiteIntArray* output_size = TfLiteIntArrayCopy(input1->dims);
+  return context->ResizeTensor(context, output, output_size);
+}
+
+template <KernelType kernel_type>
+void EvalSubFloat(TfLiteContext* context, TfLiteNode* node,
+                  TfLiteSubParams* params, TfLiteTensor* input1,
+                  TfLiteTensor* input2, TfLiteTensor* output) {
+  float output_activation_min, output_activation_max;
+  CalculateActivationRangeFloat(params->activation, &output_activation_min,
+                                &output_activation_max);
+#define TF_LITE_Sub(type)                                        \
+  type::Sub(GetTensorData<float>(input1), GetTensorDims(input1), \
+            GetTensorData<float>(input2), GetTensorDims(input2), \
+            output_activation_min, output_activation_max,        \
+            GetTensorData<float>(output), GetTensorDims(output))
+  if (kernel_type == kReference) {
+    TF_LITE_Sub(reference_ops);
+  } else {
+    TF_LITE_Sub(optimized_ops);
+  }
+#undef TF_LITE_Sub
+}
+
+template <KernelType kernel_type>
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  auto* params = reinterpret_cast<TfLiteSubParams*>(node->builtin_data);
+
+  TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
+  TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+
+  if (output->type == kTfLiteFloat32) {
+    EvalSubFloat<kernel_type>(context, node, params, input1, input2, output);
+  } else {
+    context->ReportError(context, "Inputs and outputs not all float types.");
+    return kTfLiteError;
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace sub
+
+TfLiteRegistration* Register_SUB_REF() {
+  static TfLiteRegistration r = {nullptr, nullptr, sub::Prepare,
+                                 sub::Eval<sub::kReference>};
+  return &r;
+}
+
+TfLiteRegistration* Register_SUB_GENERIC_OPT() {
+  static TfLiteRegistration r = {nullptr, nullptr, sub::Prepare,
+                                 sub::Eval<sub::kGenericOptimized>};
+  return &r;
+}
+
+TfLiteRegistration* Register_SUB_NEON_OPT() {
+  static TfLiteRegistration r = {nullptr, nullptr, sub::Prepare,
+                                 sub::Eval<sub::kNeonOptimized>};
+  return &r;
+}
+
+TfLiteRegistration* Register_SUB() {
+#ifdef USE_NEON
+  return Register_SUB_NEON_OPT();
+#else
+  return Register_SUB_GENERIC_OPT();
+#endif
+}
+
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/svdf_test.cc b/tensorflow/contrib/lite/kernels/svdf_test.cc
index d956025e9dfc9b6c03e55657023fb042c8ac485d..4de2ceaf053df31a4bc857fb250db416c071e80f 100644
--- a/tensorflow/contrib/lite/kernels/svdf_test.cc
+++ b/tensorflow/contrib/lite/kernels/svdf_test.cc
@@ -306,7 +306,7 @@ TEST(SVDFOpTest, BlackBoxTestRank2) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/kernels/test_util.cc b/tensorflow/contrib/lite/kernels/test_util.cc
index f716ba8741fd469e7ee405ac300924b53c5c48e5..b69f2b3e4bc66c94fdfc7ed4c244151be63a1711 100644
--- a/tensorflow/contrib/lite/kernels/test_util.cc
+++ b/tensorflow/contrib/lite/kernels/test_util.cc
@@ -180,4 +180,17 @@ int32_t SingleOpModel::GetTensorSize(int index) const {
   return total_size;
 }
 
+template <>
+std::vector<string> SingleOpModel::ExtractVector(int index) {
+  TfLiteTensor* tensor_ptr = interpreter_->tensor(index);
+  CHECK(tensor_ptr != nullptr);
+  const int num_strings = GetStringCount(tensor_ptr);
+  std::vector<string> result;
+  result.reserve(num_strings);
+  for (int i = 0; i < num_strings; ++i) {
+    const auto str = GetString(tensor_ptr, i);
+    result.emplace_back(str.str, str.len);
+  }
+  return result;
+}
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/test_util.h b/tensorflow/contrib/lite/kernels/test_util.h
index e68e49466119c50ec123edb84f1b1b6390a15a60..531c1366a87e20e140e779b767e29b1fd1111f97 100644
--- a/tensorflow/contrib/lite/kernels/test_util.h
+++ b/tensorflow/contrib/lite/kernels/test_util.h
@@ -24,16 +24,11 @@ limitations under the License.
 #include "tensorflow/contrib/lite/kernels/register.h"
 #include "tensorflow/contrib/lite/model.h"
 #include "tensorflow/contrib/lite/string_util.h"
+#include "tensorflow/contrib/lite/testing/util.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace tflite {
 
-inline void LogToStderr() {
-#ifdef PLATFORM_GOOGLE
-  FLAGS_logtostderr = true;
-#endif
-}
-
 // A gmock matcher that check that elements of a float vector match to a given
 // tolerance.
 std::vector<::testing::Matcher<float>> ArrayFloatNear(
@@ -197,6 +192,9 @@ class SingleOpModel {
   std::map<string, std::function<TfLiteRegistration*()>> custom_registrations_;
 };
 
+// Strings have a special implementation that is in test_util.cc
+template <>
+std::vector<string> SingleOpModel::ExtractVector(int index);
 }  // namespace tflite
 
 #endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_KERNELS_TEST_UTIL_H_
diff --git a/tensorflow/contrib/lite/kernels/transpose.cc b/tensorflow/contrib/lite/kernels/transpose.cc
new file mode 100644
index 0000000000000000000000000000000000000000..75d8136b6a26efd805d9fc8e9db26dce2cfcfcb1
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/transpose.cc
@@ -0,0 +1,142 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <string.h>
+#include <vector>
+#include "tensorflow/contrib/lite/builtin_op_data.h"
+#include "tensorflow/contrib/lite/context.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor.h"
+#include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/kernels/op_macros.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace transpose {
+
+// This file has two implementations of Transpose.
+enum KernelType {
+  kReference,
+};
+
+// TODO(nupurgarg): Permutation arrays represented as a tensor are ignored. Only
+// use the `perm` specified in `params`.
+struct TransposeContext {
+  TransposeContext(TfLiteContext* context, TfLiteNode* node) {
+    params = reinterpret_cast<TfLiteTransposeParams*>(node->builtin_data);
+    input = GetInput(context, node, 0);
+    output = GetOutput(context, node, 0);
+  }
+  TfLiteTransposeParams* params;
+  TfLiteTensor* input;
+  TfLiteTensor* output;
+};
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE(context, NumInputs(node) == 1 || NumInputs(node) == 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  TransposeContext op_context(context, node);
+  int dims = NumDimensions(op_context.input);
+
+  // Ensure validity of input tensor and permutation array.
+  TF_LITE_ENSURE_EQ(context, op_context.input->type, op_context.output->type);
+  TF_LITE_ENSURE_EQ(context, dims, op_context.params->num_dimensions);
+  TF_LITE_ENSURE_MSG(context, dims <= 4,
+                     "Transpose op only supports 1D-4D input arrays.");
+  for (int idx = 0; idx < dims; ++idx) {
+    TF_LITE_ENSURE_MSG(context,
+                       op_context.params->perm[idx] >= 0 &&
+                           op_context.params->perm[idx] < dims,
+                       "Transpose op permutations array is out of bounds.");
+  }
+
+  // Determine size of output tensor.
+  const TfLiteIntArray* input_size = op_context.input->dims;
+  TfLiteIntArray* output_size = TfLiteIntArrayCreate(dims);
+  for (int idx = 0; idx < dims; ++idx) {
+    output_size->data[idx] = input_size->data[op_context.params->perm[idx]];
+  }
+
+  return context->ResizeTensor(context, op_context.output, output_size);
+}
+
+template <KernelType kernel_type>
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  TransposeContext op_context(context, node);
+
+  // Reverse the permuted axes and convert to 4D due to the way Dims are
+  // constructed in GetTensorDims.
+  const int kOutputDimensionNum = 4;
+  int reversed_perm[kOutputDimensionNum];
+  int size = op_context.params->num_dimensions;
+  for (int output_k = 0, input_k = size - 1; output_k < size;
+       ++output_k, --input_k) {
+    reversed_perm[output_k] = size - op_context.params->perm[input_k] - 1;
+  }
+  for (int k = size; k < kOutputDimensionNum; ++k) {
+    reversed_perm[k] = k;
+  }
+
+#define TF_LITE_TRANSPOSE(type, scalar)                     \
+  type::Transpose(GetTensorData<scalar>(op_context.input),  \
+                  GetTensorDims(op_context.input),          \
+                  GetTensorData<scalar>(op_context.output), \
+                  GetTensorDims(op_context.output), reversed_perm)
+
+  switch (op_context.input->type) {
+    case kTfLiteFloat32:
+      if (kernel_type == kReference) {
+        TF_LITE_TRANSPOSE(reference_ops, float);
+      }
+      break;
+    case kTfLiteUInt8:
+      if (kernel_type == kReference) {
+        TF_LITE_TRANSPOSE(reference_ops, uint8_t);
+      }
+      break;
+    case kTfLiteInt32:
+      if (kernel_type == kReference) {
+        TF_LITE_TRANSPOSE(reference_ops, int32_t);
+      }
+      break;
+    case kTfLiteInt64:
+      if (kernel_type == kReference) {
+        TF_LITE_TRANSPOSE(reference_ops, int64_t);
+      }
+      break;
+    default:
+      context->ReportError(context,
+                           "Type is currently not supported by Transpose.");
+      return kTfLiteError;
+  }
+#undef TF_LITE_TRANSPOSE
+
+  return kTfLiteOk;
+}
+
+}  // namespace transpose
+
+TfLiteRegistration* Register_TRANSPOSE_REF() {
+  static TfLiteRegistration r = {nullptr, nullptr, transpose::Prepare,
+                                 transpose::Eval<transpose::kReference>};
+  return &r;
+}
+
+TfLiteRegistration* Register_TRANSPOSE() { return Register_TRANSPOSE_REF(); }
+
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/transpose_test.cc b/tensorflow/contrib/lite/kernels/transpose_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..7f5832cd5fa3d502b52bf5554111b45136b588ae
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/transpose_test.cc
@@ -0,0 +1,247 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/interpreter.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor.h"
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/kernels/test_util.h"
+#include "tensorflow/contrib/lite/model.h"
+
+namespace tflite {
+namespace {
+
+using ::testing::ElementsAreArray;
+
+void RunTestPermutation(const std::vector<int>& shape,
+                        const std::vector<int>& perms,
+                        std::vector<float>* input_transposed) {
+  // Count elements and allocate output.
+  int count = 1;
+  for (auto factor : shape) count *= factor;
+  input_transposed->resize(count);
+
+  // Create the dummy data
+  std::vector<float> input(count);
+  for (int i = 0; i < input.size(); i++) {
+    input[i] = i;
+  }
+
+  // Create reversed and padded perms.
+  int reversed_perms[4];
+  for (int output_k = 0, input_k = shape.size() - 1; output_k < shape.size();
+       output_k++, input_k--) {
+    reversed_perms[output_k] = shape.size() - perms[input_k] - 1;
+  }
+  // Unused dimensions should not be permuted so pad with identity transform
+  // subset.
+  for (int k = shape.size(); k < 4; k++) {
+    reversed_perms[k] = k;
+  }
+
+  // Make input and output dims (i.e. reversed shape and dest_shape).
+  Dims<4> input_dims = GetTensorDims(shape);
+  Dims<4> output_dims;
+  for (int i = 0; i < 4; i++) {
+    output_dims.sizes[i] = input_dims.sizes[reversed_perms[i]];
+  }
+  output_dims.strides[0] = 1;
+  for (int k = 1; k < 4; k++) {
+    output_dims.strides[k] =
+        output_dims.strides[k - 1] * output_dims.sizes[k - 1];
+  }
+
+  reference_ops::Transpose<float>(input.data(), input_dims,
+                                  input_transposed->data(), output_dims,
+                                  reversed_perms);
+}
+
+TEST(TransposeTest, TestRefOps1D) {
+  // Basic 1D identity.
+  std::vector<float> out;
+  RunTestPermutation({3}, {0}, &out);
+  ASSERT_EQ(out, std::vector<float>({0, 1, 2}));
+}
+
+TEST(TransposeTest, TestRefOps2D) {
+  std::vector<float> out;
+  // Basic 2D.
+  RunTestPermutation({3, 2}, {1, 0}, &out);
+  ASSERT_EQ(out, std::vector<float>({0, 2, 4, 1, 3, 5}));
+  // Identity.
+  RunTestPermutation({3, 2}, {0, 1}, &out);
+  ASSERT_EQ(out, std::vector<float>({0, 1, 2, 3, 4, 5}));
+}
+
+TEST(TransposeTest, TestRefOps3D) {
+  std::vector<float> out;
+  // Test 3 dimensional
+  {
+    std::vector<float> ref({0, 4, 8,  12, 16, 20, 1, 5, 9,  13, 17, 21,
+                            2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23});
+    RunTestPermutation({2, 3, 4}, {2, 0, 1}, &out);
+    ASSERT_EQ(out, ref);
+  }
+  // Test 3 dimensional identity transform
+  {
+    RunTestPermutation({2, 3, 4}, {0, 1, 2}, &out);
+    std::vector<float> ref(out.size());
+    for (int k = 0; k < ref.size(); k++) ref[k] = k;
+    ASSERT_EQ(out, ref);
+  }
+}
+
+TEST(TransposeTest, TestRefOps4D) {
+  std::vector<float> out;
+  // Basic 4d.
+  RunTestPermutation({2, 3, 4, 5}, {2, 0, 1, 3}, &out);
+  ASSERT_EQ(
+      out,
+      std::vector<float>(
+          {0,  1,  2,  3,  4,  20, 21, 22, 23, 24, 40,  41,  42,  43,  44,
+           60, 61, 62, 63, 64, 80, 81, 82, 83, 84, 100, 101, 102, 103, 104,
+           5,  6,  7,  8,  9,  25, 26, 27, 28, 29, 45,  46,  47,  48,  49,
+           65, 66, 67, 68, 69, 85, 86, 87, 88, 89, 105, 106, 107, 108, 109,
+           10, 11, 12, 13, 14, 30, 31, 32, 33, 34, 50,  51,  52,  53,  54,
+           70, 71, 72, 73, 74, 90, 91, 92, 93, 94, 110, 111, 112, 113, 114,
+           15, 16, 17, 18, 19, 35, 36, 37, 38, 39, 55,  56,  57,  58,  59,
+           75, 76, 77, 78, 79, 95, 96, 97, 98, 99, 115, 116, 117, 118, 119}));
+  RunTestPermutation({2, 3, 4, 5}, {0, 1, 2, 3}, &out);
+  // Basic identity.
+  std::vector<float> ref(out.size());
+  for (int k = 0; k < ref.size(); k++) ref[k] = k;
+  ASSERT_EQ(out, ref);
+}
+
+class TransposeOpModel : public SingleOpModel {
+ public:
+  TransposeOpModel(std::initializer_list<int> input_shape,
+                   std::initializer_list<int> perm) {
+    input_ = AddInput(TensorType_FLOAT32);
+    output_ = AddOutput(TensorType_FLOAT32);
+    SetBuiltinOp(
+        BuiltinOperator_TRANSPOSE, BuiltinOptions_TransposeOptions,
+        CreateTransposeOptions(builder_, builder_.CreateVector<int>(perm))
+            .Union());
+    BuildInterpreter({input_shape});
+  }
+
+  void SetInput(std::initializer_list<float> data) {
+    PopulateTensor<float>(input_, data);
+  }
+
+  std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
+  std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
+
+ private:
+  int input_;
+  int output_;
+};
+
+TEST(TransposeTest, TestUnequalPermSize) {
+  EXPECT_DEATH(TransposeOpModel({1, 3, 3, 1}, {2, 2}),
+               "dims != op_context.params->num_dimensions");
+}
+
+TEST(TransposeTest, TestPermOutOfBounds) {
+  EXPECT_DEATH(TransposeOpModel({1, 3, 3, 1}, {0, -1, -2, -3}),
+               "Transpose op permutations array is out of bounds.");
+  EXPECT_DEATH(TransposeOpModel({1, 3, 3, 1}, {0, 1, 2, 4}),
+               "Transpose op permutations array is out of bounds.");
+}
+
+TEST(TransposeTest, Test1DInputTensor) {
+  TransposeOpModel m({3}, {0});
+  m.SetInput({1, 2, 3});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3}));
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({1, 2, 3}));
+}
+
+TEST(TransposeTest, Test2DInputTensor) {
+  TransposeOpModel m({3, 2}, {1, 0});
+  m.SetInput({0, 1, 2, 3, 4, 5});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 3}));
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({0, 2, 4, 1, 3, 5}));
+}
+
+TEST(TransposeTest, Test3DInputTensor) {
+  TransposeOpModel m({2, 3, 4}, {2, 0, 1});
+  m.SetInput({0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
+              12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({4, 2, 3}));
+  EXPECT_THAT(m.GetOutput(),
+              ElementsAreArray({0, 4, 8,  12, 16, 20, 1, 5, 9,  13, 17, 21,
+                                2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23}));
+}
+
+TEST(TransposeTest, Test5DInputTensor) {
+  EXPECT_DEATH(TransposeOpModel({1, 2, 3, 4, 5}, {0, 1, 2, 3, 4}),
+               "Transpose op only supports 1D-4D input arrays.");
+}
+
+TEST(TransposeTest, SimpleTestNoReorder) {
+  TransposeOpModel m({1, 2, 3, 1}, {0, 1, 2, 3});
+  m.SetInput({1, 2, 3, 4, 5, 6});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2, 3, 1}));
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({1, 2, 3, 4, 5, 6}));
+}
+
+TEST(TransposeTest, SimpleTestWithReorder) {
+  TransposeOpModel m({1, 2, 3, 1}, {2, 1, 3, 0});
+  m.SetInput({1, 2, 3, 4, 5, 6});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 2, 1, 1}));
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({1, 4, 2, 5, 3, 6}));
+}
+
+TEST(TransposeTest, ComplexTestWithReorder) {
+  TransposeOpModel m({2, 3, 4, 5}, {2, 0, 1, 3});
+  m.SetInput({0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,
+              12,  13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,
+              24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
+              36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,
+              48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
+              60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,
+              72,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,
+              84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,
+              96,  97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107,
+              108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({4, 2, 3, 5}));
+  auto result = ElementsAreArray(
+      {0,  1,  2,  3,  4,  20, 21, 22, 23, 24, 40,  41,  42,  43,  44,
+       60, 61, 62, 63, 64, 80, 81, 82, 83, 84, 100, 101, 102, 103, 104,
+       5,  6,  7,  8,  9,  25, 26, 27, 28, 29, 45,  46,  47,  48,  49,
+       65, 66, 67, 68, 69, 85, 86, 87, 88, 89, 105, 106, 107, 108, 109,
+       10, 11, 12, 13, 14, 30, 31, 32, 33, 34, 50,  51,  52,  53,  54,
+       70, 71, 72, 73, 74, 90, 91, 92, 93, 94, 110, 111, 112, 113, 114,
+       15, 16, 17, 18, 19, 35, 36, 37, 38, 39, 55,  56,  57,  58,  59,
+       75, 76, 77, 78, 79, 95, 96, 97, 98, 99, 115, 116, 117, 118, 119});
+  EXPECT_THAT(m.GetOutput(), result);
+}
+
+}  // namespace
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  ::tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f5f1ec2cf3f45ae730b849b18e2b85fac50159c7
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn.cc
@@ -0,0 +1,208 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <unistd.h>
+#include <cassert>
+#include <cmath>
+#include <cstdlib>
+#include <cstdio>
+#include <iostream>
+#include <limits>
+
+#include "tensorflow/contrib/lite/builtin_op_data.h"
+#include "tensorflow/contrib/lite/context.h"
+#include "tensorflow/contrib/lite/kernels/activation_functor.h"
+#include "tensorflow/contrib/lite/kernels/op_macros.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace unidirectional_sequence_rnn {
+
+constexpr int kInputTensor = 0;
+constexpr int kWeightsTensor = 1;
+constexpr int kRecurrentWeightsTensor = 2;
+constexpr int kBiasTensor = 3;
+constexpr int kHiddenStateTensor = 0;
+constexpr int kOutputTensor = 1;
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  // Check we have all the inputs and outputs we need.
+  TF_LITE_ENSURE_EQ(context, node->inputs->size, 4);
+  TF_LITE_ENSURE_EQ(context, node->outputs->size, 2);
+
+  TfLiteTensor* input = &context->tensors[node->inputs->data[kInputTensor]];
+  TfLiteTensor* input_weights =
+      &context->tensors[node->inputs->data[kWeightsTensor]];
+  TfLiteTensor* recurrent_weights =
+      &context->tensors[node->inputs->data[kRecurrentWeightsTensor]];
+  TfLiteTensor* bias = &context->tensors[node->inputs->data[kBiasTensor]];
+
+  // Check all the parameters of tensor match within themselves and match the
+  // input configuration.
+  auto* params = reinterpret_cast<TfLiteSequenceRNNParams*>(node->builtin_data);
+  const bool time_major = params->time_major;
+  const int batch_size =
+      (time_major) ? input->dims->data[1] : input->dims->data[0];
+  const int max_time =
+      (time_major) ? input->dims->data[0] : input->dims->data[1];
+  const int num_units = input_weights->dims->data[0];
+  TF_LITE_ASSERT_EQ(input->dims->data[2], input_weights->dims->data[1]);
+  TF_LITE_ASSERT_EQ(input_weights->dims->data[0], bias->dims->data[0]);
+  TF_LITE_ASSERT_EQ(recurrent_weights->dims->data[0], bias->dims->data[0]);
+  TF_LITE_ASSERT_EQ(recurrent_weights->dims->data[1], bias->dims->data[0]);
+
+  TfLiteTensor* hidden_state =
+      &context->tensors[node->outputs->data[kHiddenStateTensor]];
+  TfLiteTensor* output = &context->tensors[node->outputs->data[kOutputTensor]];
+
+  // Resize state.
+  TfLiteIntArray* hidden_state_size_array = TfLiteIntArrayCreate(2);
+  hidden_state_size_array->data[0] = batch_size;
+  hidden_state_size_array->data[1] = num_units;
+  TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, hidden_state,
+                                                   hidden_state_size_array));
+
+  // Mark hidden state as a persistent tensor.
+  hidden_state->allocation_type = kTfLiteArenaRwPersistent;
+
+  // Resize output.
+  TfLiteIntArray* output_size_array = TfLiteIntArrayCreate(3);
+  output_size_array->data[0] = (time_major) ? max_time : batch_size;
+  output_size_array->data[1] = (time_major) ? batch_size : max_time;
+  output_size_array->data[2] = num_units;
+  TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, output,
+                                                   output_size_array));
+
+  return kTfLiteOk;
+}
+
+namespace {
+void RnnStep(const float* input_ptr_batch, const float* input_weights_ptr,
+             const float* recurrent_weights_ptr, const float* bias_ptr,
+             int input_size, int num_units, int input_weights_stride,
+             int recurrent_weights_stride, TfLiteFusedActivation activation,
+             float* hidden_state_ptr_batch, float* output_ptr_batch) {
+  // Output = bias
+  for (int o = 0; o < num_units; o++) {
+    output_ptr_batch[o] = bias_ptr[o];
+  }
+
+  // Output += input * input_weights
+  for (int o = 0; o < num_units; o++) {
+    for (int i = 0; i < input_size; i++) {
+      output_ptr_batch[o] += input_ptr_batch[i] * input_weights_ptr[i];
+    }
+    input_weights_ptr += input_weights_stride;
+  }
+
+  // Output += recurrent_weights * hidden_state
+  for (int o = 0; o < num_units; o++) {
+    for (int h = 0; h < num_units; h++) {
+      output_ptr_batch[o] +=
+          hidden_state_ptr_batch[h] * recurrent_weights_ptr[h];
+    }
+    recurrent_weights_ptr += recurrent_weights_stride;
+  }
+
+  // Output = activation(Output) and update hidden_state
+  for (int o = 0; o < num_units; o++) {
+    output_ptr_batch[o] = (ActivationFunctor(activation))(output_ptr_batch[o]);
+    hidden_state_ptr_batch[o] = output_ptr_batch[o];
+  }
+}
+}  // namespace
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  auto* params = reinterpret_cast<TfLiteSequenceRNNParams*>(node->builtin_data);
+
+  TfLiteTensor* input = &context->tensors[node->inputs->data[kInputTensor]];
+  TfLiteTensor* input_weights =
+      &context->tensors[node->inputs->data[kWeightsTensor]];
+  TfLiteTensor* recurrent_weights =
+      &context->tensors[node->inputs->data[kRecurrentWeightsTensor]];
+  TfLiteTensor* bias = &context->tensors[node->inputs->data[kBiasTensor]];
+  TfLiteTensor* hidden_state =
+      &context->tensors[node->outputs->data[kHiddenStateTensor]];
+  TfLiteTensor* output = &context->tensors[node->outputs->data[kOutputTensor]];
+
+  // Initialize the pointer bias.
+  const float* bias_ptr = bias->data.f;
+
+  const bool time_major = params->time_major;
+  const int batch_size =
+      (time_major) ? input->dims->data[1] : input->dims->data[0];
+  const int max_time =
+      (time_major) ? input->dims->data[0] : input->dims->data[1];
+  const int num_units = input_weights->dims->data[0];
+  const int input_size = input->dims->data[2];
+  const int input_weights_stride = input_weights->dims->data[1];
+  const int recurrent_weights_stride = recurrent_weights->dims->data[1];
+
+  // Initialize input_weights and recurrent_weights.
+  const float* input_weights_ptr = input_weights->data.f;
+  const float* recurrent_weights_ptr = recurrent_weights->data.f;
+
+  if (time_major) {
+    // Unroll the sequence
+    for (int s = 0; s < max_time; s++) {
+      for (int b = 0; b < batch_size; b++) {
+        // Initialize the pointer to hidden state.
+        float* hidden_state_ptr_batch = hidden_state->data.f + b * num_units;
+        // Initialize the pointer to input and output.
+        const float* input_ptr_batch =
+            input->data.f + s * input_size * batch_size + b * input_size;
+        float* output_ptr_batch =
+            output->data.f + s * num_units * batch_size + b * num_units;
+
+        RnnStep(input_ptr_batch, input_weights_ptr, recurrent_weights_ptr,
+                bias_ptr, input_size, num_units, input_weights_stride,
+                recurrent_weights_stride, params->activation,
+                hidden_state_ptr_batch, output_ptr_batch);
+      }
+    }
+  } else {
+    // For each batch
+    for (int b = 0; b < batch_size; b++) {
+      // Initialize the pointer to hidden state.
+      float* hidden_state_ptr_batch = hidden_state->data.f + b * num_units;
+      for (int s = 0; s < max_time; s++) {
+        // Initialize the pointer to input and output.
+        const float* input_ptr_batch =
+            input->data.f + b * input_size * max_time + s * input_size;
+        float* output_ptr_batch =
+            output->data.f + b * num_units * max_time + s * num_units;
+
+        RnnStep(input_ptr_batch, input_weights_ptr, recurrent_weights_ptr,
+                bias_ptr, input_size, num_units, input_weights_stride,
+                recurrent_weights_stride, params->activation,
+                hidden_state_ptr_batch, output_ptr_batch);
+      }
+    }
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace unidirectional_sequence_rnn
+
+TfLiteRegistration* Register_UNIDIRECTIONAL_SEQUENCE_RNN() {
+  static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr,
+                                 unidirectional_sequence_rnn::Prepare,
+                                 unidirectional_sequence_rnn::Eval};
+  return &r;
+}
+
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn_test.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..82c680ec3d8656004d721c8498292677cb061b6b
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn_test.cc
@@ -0,0 +1,352 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+// Unit test for TFLite Sequential RNN op.
+
+#include <vector>
+#include <iomanip>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/interpreter.h"
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/kernels/test_util.h"
+#include "tensorflow/contrib/lite/model.h"
+
+namespace tflite {
+namespace {
+
+using ::testing::ElementsAreArray;
+
+static float rnn_input[] = {
+    0.23689353,   0.285385,     0.037029743, -0.19858193,  -0.27569133,
+    0.43773448,   0.60379338,   0.35562468,  -0.69424844,  -0.93421471,
+    -0.87287879,  0.37144363,   -0.62476718, 0.23791671,   0.40060222,
+    0.1356622,    -0.99774903,  -0.98858172, -0.38952237,  -0.47685933,
+    0.31073618,   0.71511042,   -0.63767755, -0.31729108,  0.33468103,
+    0.75801885,   0.30660987,   -0.37354088, 0.77002847,   -0.62747043,
+    -0.68572164,  0.0069220066, 0.65791464,  0.35130811,   0.80834007,
+    -0.61777675,  -0.21095741,  0.41213346,  0.73784804,   0.094794154,
+    0.47791874,   0.86496925,   -0.53376222, 0.85315156,   0.10288584,
+    0.86684,      -0.011186242, 0.10513687,  0.87825835,   0.59929144,
+    0.62827742,   0.18899453,   0.31440187,  0.99059987,   0.87170351,
+    -0.35091716,  0.74861872,   0.17831337,  0.2755419,    0.51864719,
+    0.55084288,   0.58982027,   -0.47443086, 0.20875752,   -0.058871567,
+    -0.66609079,  0.59098077,   0.73017097,  0.74604273,   0.32882881,
+    -0.17503482,  0.22396147,   0.19379807,  0.29120302,   0.077113032,
+    -0.70331609,  0.15804303,   -0.93407321, 0.40182066,   0.036301374,
+    0.66521823,   0.0300982,    -0.7747041,  -0.02038002,  0.020698071,
+    -0.90300065,  0.62870288,   -0.23068321, 0.27531278,   -0.095755219,
+    -0.712036,    -0.17384434,  -0.50593495, -0.18646687,  -0.96508682,
+    0.43519354,   0.14744234,   0.62589407,  0.1653645,    -0.10651493,
+    -0.045277178, 0.99032974,   -0.88255352, -0.85147917,  0.28153265,
+    0.19455957,   -0.55479527,  -0.56042433, 0.26048636,   0.84702539,
+    0.47587705,   -0.074295521, -0.12287641, 0.70117295,   0.90532446,
+    0.89782166,   0.79817224,   0.53402734,  -0.33286154,  0.073485017,
+    -0.56172788,  -0.044897556, 0.89964068,  -0.067662835, 0.76863563,
+    0.93455386,   -0.6324693,   -0.083922029};
+
+static float rnn_golden_output[] = {
+    0.496726,   0,          0.965996,  0,         0.0584254, 0,
+    0,          0.12315,    0,         0,         0.612266,  0.456601,
+    0,          0.52286,    1.16099,   0.0291232,
+
+    0,          0,          0.524901,  0,         0,         0,
+    0,          1.02116,    0,         1.35762,   0,         0.356909,
+    0.436415,   0.0355727,  0,         0,
+
+    0,          0,          0,         0.262335,  0,         0,
+    0,          1.33992,    0,         2.9739,    0,         0,
+    1.31914,    2.66147,    0,         0,
+
+    0.942568,   0,          0,         0,         0.025507,  0,
+    0,          0,          0.321429,  0.569141,  1.25274,   1.57719,
+    0.8158,     1.21805,    0.586239,  0.25427,
+
+    1.04436,    0,          0.630725,  0,         0.133801,  0.210693,
+    0.363026,   0,          0.533426,  0,         1.25926,   0.722707,
+    0,          1.22031,    1.30117,   0.495867,
+
+    0.222187,   0,          0.72725,   0,         0.767003,  0,
+    0,          0.147835,   0,         0,         0,         0.608758,
+    0.469394,   0.00720298, 0.927537,  0,
+
+    0.856974,   0.424257,   0,         0,         0.937329,  0,
+    0,          0,          0.476425,  0,         0.566017,  0.418462,
+    0.141911,   0.996214,   1.13063,   0,
+
+    0.967899,   0,          0,         0,         0.0831304, 0,
+    0,          1.00378,    0,         0,         0,         1.44818,
+    1.01768,    0.943891,   0.502745,  0,
+
+    0.940135,   0,          0,         0,         0,         0,
+    0,          2.13243,    0,         0.71208,   0.123918,  1.53907,
+    1.30225,    1.59644,    0.70222,   0,
+
+    0.804329,   0,          0.430576,  0,         0.505872,  0.509603,
+    0.343448,   0,          0.107756,  0.614544,  1.44549,   1.52311,
+    0.0454298,  0.300267,   0.562784,  0.395095,
+
+    0.228154,   0,          0.675323,  0,         1.70536,   0.766217,
+    0,          0,          0,         0.735363,  0.0759267, 1.91017,
+    0.941888,   0,          0,         0,
+
+    0,          0,          1.5909,    0,         0,         0,
+    0,          0.5755,     0,         0.184687,  0,         1.56296,
+    0.625285,   0,          0,         0,
+
+    0,          0,          0.0857888, 0,         0,         0,
+    0,          0.488383,   0.252786,  0,         0,         0,
+    1.02817,    1.85665,    0,         0,
+
+    0.00981836, 0,          1.06371,   0,         0,         0,
+    0,          0,          0,         0.290445,  0.316406,  0,
+    0.304161,   1.25079,    0.0707152, 0,
+
+    0.986264,   0.309201,   0,         0,         0,         0,
+    0,          1.64896,    0.346248,  0,         0.918175,  0.78884,
+    0.524981,   1.92076,    2.07013,   0.333244,
+
+    0.415153,   0.210318,   0,         0,         0,         0,
+    0,          2.02616,    0,         0.728256,  0.84183,   0.0907453,
+    0.628881,   3.58099,    1.49974,   0
+};
+
+class UnidirectionalRNNOpModel : public SingleOpModel {
+ public:
+  UnidirectionalRNNOpModel(int batches, int sequence_len, int units, int size,
+                           bool time_major)
+      : batches_(batches),
+        sequence_len_(sequence_len),
+        units_(units),
+        input_size_(size) {
+    input_ = AddInput(TensorType_FLOAT32);
+    weights_ = AddInput(TensorType_FLOAT32);
+    recurrent_weights_ = AddInput(TensorType_FLOAT32);
+    bias_ = AddInput(TensorType_FLOAT32);
+    hidden_state_ = AddOutput(TensorType_FLOAT32);
+    output_ = AddOutput(TensorType_FLOAT32);
+    SetBuiltinOp(BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN,
+                 BuiltinOptions_SequenceRNNOptions,
+                 CreateSequenceRNNOptions(builder_, time_major,
+                                          ActivationFunctionType_RELU)
+                     .Union());
+    if (time_major) {
+      BuildInterpreter({{sequence_len_, batches_, input_size_},
+                        {units_, input_size_},
+                        {units_, units_},
+                        {units_}});
+    } else {
+      BuildInterpreter({{batches_, sequence_len_, input_size_},
+                        {units_, input_size_},
+                        {units_, units_},
+                        {units_}});
+    }
+  }
+
+  void SetBias(std::initializer_list<float> f) { PopulateTensor(bias_, f); }
+
+  void SetWeights(std::initializer_list<float> f) {
+    PopulateTensor(weights_, f);
+  }
+
+  void SetRecurrentWeights(std::initializer_list<float> f) {
+    PopulateTensor(recurrent_weights_, f);
+  }
+
+  void SetInput(std::initializer_list<float> data) {
+    PopulateTensor(input_, data);
+  }
+
+  void SetInput(int offset, float* begin, float* end) {
+    PopulateTensor(input_, offset, begin, end);
+  }
+
+  void ResetHiddenState() {
+    const int zero_buffer_size = units_ * batches_;
+    std::unique_ptr<float[]> zero_buffer(new float[zero_buffer_size]);
+    memset(zero_buffer.get(), 0, zero_buffer_size * sizeof(float));
+    PopulateTensor(hidden_state_, 0, zero_buffer.get(),
+                   zero_buffer.get() + zero_buffer_size);
+  }
+
+  std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
+
+  int input_size() { return input_size_; }
+  int num_units() { return units_; }
+  int num_batches() { return batches_; }
+  int sequence_len() { return sequence_len_; }
+
+ private:
+  int input_;
+  int weights_;
+  int recurrent_weights_;
+  int bias_;
+  int hidden_state_;
+  int output_;
+
+  int batches_;
+  int sequence_len_;
+  int units_;
+  int input_size_;
+};
+
+// TODO(mirkov): add another test which directly compares to TF once TOCO
+// supports the conversion from dynamic_rnn with BasicRNNCell.
+TEST(FullyConnectedOpTest, BlackBoxTest) {
+  UnidirectionalRNNOpModel rnn(/*batches=*/2, /*sequence_len=*/16,
+                               /*units=*/16, /*size=*/8, /*time_major=*/false);
+  rnn.SetWeights(
+      {0.461459,    0.153381,   0.529743,    -0.00371218, 0.676267,   -0.211346,
+       0.317493,    0.969689,   -0.343251,   0.186423,    0.398151,   0.152399,
+       0.448504,    0.317662,   0.523556,    -0.323514,   0.480877,   0.333113,
+       -0.757714,   -0.674487,  -0.643585,   0.217766,    -0.0251462, 0.79512,
+       -0.595574,   -0.422444,  0.371572,    -0.452178,   -0.556069,  -0.482188,
+       -0.685456,   -0.727851,  0.841829,    0.551535,    -0.232336,  0.729158,
+       -0.00294906, -0.69754,   0.766073,    -0.178424,   0.369513,   -0.423241,
+       0.548547,    -0.0152023, -0.757482,   -0.85491,    0.251331,   -0.989183,
+       0.306261,    -0.340716,  0.886103,    -0.0726757,  -0.723523,  -0.784303,
+       0.0354295,   0.566564,   -0.485469,   -0.620498,   0.832546,   0.697884,
+       -0.279115,   0.294415,   -0.584313,   0.548772,    0.0648819,  0.968726,
+       0.723834,    -0.0080452, -0.350386,   -0.272803,   0.115121,   -0.412644,
+       -0.824713,   -0.992843,  -0.592904,   -0.417893,   0.863791,   -0.423461,
+       -0.147601,   -0.770664,  -0.479006,   0.654782,    0.587314,   -0.639158,
+       0.816969,    -0.337228,  0.659878,    0.73107,     0.754768,   -0.337042,
+       0.0960841,   0.368357,   0.244191,    -0.817703,   -0.211223,  0.442012,
+       0.37225,     -0.623598,  -0.405423,   0.455101,    0.673656,   -0.145345,
+       -0.511346,   -0.901675,  -0.81252,    -0.127006,   0.809865,   -0.721884,
+       0.636255,    0.868989,   -0.347973,   -0.10179,    -0.777449,  0.917274,
+       0.819286,    0.206218,   -0.00785118, 0.167141,    0.45872,    0.972934,
+       -0.276798,   0.837861,   0.747958,    -0.0151566,  -0.330057,  -0.469077,
+       0.277308,    0.415818});
+
+  rnn.SetBias({0.065691948, -0.69055247, 0.1107955, -0.97084129, -0.23957068,
+               -0.23566568, -0.389184, 0.47481549, -0.4791103, 0.29931796,
+               0.10463274, 0.83918178, 0.37197268, 0.61957061, 0.3956964,
+               -0.37609905});
+
+  rnn.SetRecurrentWeights({0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1});
+
+  rnn.ResetHiddenState();
+  const int input_sequence_size = rnn.input_size() * rnn.sequence_len();
+  float* batch_start = rnn_input;
+  float* batch_end = batch_start + input_sequence_size;
+  rnn.SetInput(0, batch_start, batch_end);
+  rnn.SetInput(input_sequence_size, batch_start, batch_end);
+
+  rnn.Invoke();
+
+  float* golden_start = rnn_golden_output;
+  float* golden_end = golden_start + rnn.num_units() * rnn.sequence_len();
+  std::vector<float> expected;
+  expected.insert(expected.end(), golden_start, golden_end);
+  expected.insert(expected.end(), golden_start, golden_end);
+
+  EXPECT_THAT(rnn.GetOutput(), ElementsAreArray(ArrayFloatNear(expected)));
+}
+
+TEST(FullyConnectedOpTest, TimeMajorBlackBoxTest) {
+  UnidirectionalRNNOpModel rnn(/*batches=*/2, /*sequence_len=*/16,
+                               /*units=*/16, /*size=*/8, /*time_major=*/true);
+  rnn.SetWeights(
+      {0.461459,    0.153381,   0.529743,    -0.00371218, 0.676267,   -0.211346,
+       0.317493,    0.969689,   -0.343251,   0.186423,    0.398151,   0.152399,
+       0.448504,    0.317662,   0.523556,    -0.323514,   0.480877,   0.333113,
+       -0.757714,   -0.674487,  -0.643585,   0.217766,    -0.0251462, 0.79512,
+       -0.595574,   -0.422444,  0.371572,    -0.452178,   -0.556069,  -0.482188,
+       -0.685456,   -0.727851,  0.841829,    0.551535,    -0.232336,  0.729158,
+       -0.00294906, -0.69754,   0.766073,    -0.178424,   0.369513,   -0.423241,
+       0.548547,    -0.0152023, -0.757482,   -0.85491,    0.251331,   -0.989183,
+       0.306261,    -0.340716,  0.886103,    -0.0726757,  -0.723523,  -0.784303,
+       0.0354295,   0.566564,   -0.485469,   -0.620498,   0.832546,   0.697884,
+       -0.279115,   0.294415,   -0.584313,   0.548772,    0.0648819,  0.968726,
+       0.723834,    -0.0080452, -0.350386,   -0.272803,   0.115121,   -0.412644,
+       -0.824713,   -0.992843,  -0.592904,   -0.417893,   0.863791,   -0.423461,
+       -0.147601,   -0.770664,  -0.479006,   0.654782,    0.587314,   -0.639158,
+       0.816969,    -0.337228,  0.659878,    0.73107,     0.754768,   -0.337042,
+       0.0960841,   0.368357,   0.244191,    -0.817703,   -0.211223,  0.442012,
+       0.37225,     -0.623598,  -0.405423,   0.455101,    0.673656,   -0.145345,
+       -0.511346,   -0.901675,  -0.81252,    -0.127006,   0.809865,   -0.721884,
+       0.636255,    0.868989,   -0.347973,   -0.10179,    -0.777449,  0.917274,
+       0.819286,    0.206218,   -0.00785118, 0.167141,    0.45872,    0.972934,
+       -0.276798,   0.837861,   0.747958,    -0.0151566,  -0.330057,  -0.469077,
+       0.277308,    0.415818});
+
+  rnn.SetBias({0.065691948, -0.69055247, 0.1107955, -0.97084129, -0.23957068,
+               -0.23566568, -0.389184, 0.47481549, -0.4791103, 0.29931796,
+               0.10463274, 0.83918178, 0.37197268, 0.61957061, 0.3956964,
+               -0.37609905});
+
+  rnn.SetRecurrentWeights({0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           0.1});
+
+  rnn.ResetHiddenState();
+  for (int i = 0; i < rnn.sequence_len(); i++) {
+    float* batch_start = rnn_input + i * rnn.input_size();
+    float* batch_end = batch_start + rnn.input_size();
+    // The two batches are identical.
+    rnn.SetInput(2 * i * rnn.input_size(), batch_start, batch_end);
+    rnn.SetInput((2 * i + 1) * rnn.input_size(), batch_start, batch_end);
+  }
+
+  rnn.Invoke();
+
+  std::vector<float> expected;
+  for (int i = 0; i < rnn.sequence_len(); i++) {
+    float* golden_batch_start = rnn_golden_output + i * rnn.num_units();
+    float* golden_batch_end = golden_batch_start + rnn.num_units();
+    expected.insert(expected.end(), golden_batch_start, golden_batch_end);
+    expected.insert(expected.end(), golden_batch_start, golden_batch_end);
+  }
+
+  EXPECT_THAT(rnn.GetOutput(), ElementsAreArray(ArrayFloatNear(expected)));
+}
+
+}  // namespace
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  // On Linux, add: tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tensorflow/contrib/lite/lib_package/BUILD b/tensorflow/contrib/lite/lib_package/BUILD
new file mode 100644
index 0000000000000000000000000000000000000000..3c1b8d3d45f2bb382bbe6b789ec6ac7ec89ebc66
--- /dev/null
+++ b/tensorflow/contrib/lite/lib_package/BUILD
@@ -0,0 +1,16 @@
+package(default_visibility = ["//visibility:private"])
+
+# Create the LICENSE file for libraries that are used by TensorFlow Lite
+# C library.
+genrule(
+    name = "clicenses_generate",
+    srcs = [
+        "//third_party/eigen3:LICENSE",
+        "@arm_neon_2_x86_sse//:LICENSE",
+        "@farmhash_archive//:COPYING",
+        "@gemmlowp//:LICENSE",
+    ],
+    outs = ["LICENSE"],
+    cmd = "$(location :concat_licenses.sh) $(SRCS) >$@",
+    tools = [":concat_licenses.sh"],
+)
diff --git a/tensorflow/contrib/lite/lib_package/concat_licenses.sh b/tensorflow/contrib/lite/lib_package/concat_licenses.sh
new file mode 100755
index 0000000000000000000000000000000000000000..2070f64e9fa4384234361556da0ed6f5089319b3
--- /dev/null
+++ b/tensorflow/contrib/lite/lib_package/concat_licenses.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Script aimed to combining multiple license files into a single one.
+
+for f in $@
+do
+  echo "--------------------------------------------------------------------------------"
+  echo "BEGIN LICENSE FOR $f"
+  echo "--------------------------------------------------------------------------------"
+  cat $f
+  echo "--------------------------------------------------------------------------------"
+  echo "END LICENSE FOR $f"
+  echo "--------------------------------------------------------------------------------"
+done
diff --git a/tensorflow/contrib/lite/memory_planner.h b/tensorflow/contrib/lite/memory_planner.h
new file mode 100644
index 0000000000000000000000000000000000000000..b11d86c375ca6bd8693f2271df63ecb3c87657de
--- /dev/null
+++ b/tensorflow/contrib/lite/memory_planner.h
@@ -0,0 +1,45 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_MEMORY_PLANNER_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_MEMORY_PLANNER_H_
+
+#include "tensorflow/contrib/lite/context.h"
+
+namespace tflite {
+
+// A MemoryPlanner is responsible for planning and executing a number of
+// memory-related operations that are necessary in TF Lite.
+class MemoryPlanner {
+ public:
+  virtual ~MemoryPlanner() {}
+
+  // Plans the necessary memory allocations. This is the MemoryPlanner's
+  // pre-processing step and is called when the graph structure is known but
+  // actual size of the tensors is not.
+  virtual TfLiteStatus PlanAllocations() = 0;
+
+  // Allocates the necessary memory to execute all nodes in the interval
+  // [first_node, last_node].
+  virtual TfLiteStatus ExecuteAllocations(int first_node, int last_node) = 0;
+
+  // Invalidates allocations made earliers. This is called when tensors sizes
+  // have change. All planned allocations remain, but can't be used until
+  // ExecuteAllocations() is called.
+  virtual TfLiteStatus ResetAllocations() = 0;
+};
+
+}  // namespace tflite
+
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_MEMORY_PLANNER_H_
diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc
index e2f3560e61baae88a4afaafaa202cde784063efc..4b0c853f77c102efa7574ff97c254d92504730a3 100644
--- a/tensorflow/contrib/lite/model.cc
+++ b/tensorflow/contrib/lite/model.cc
@@ -60,6 +60,14 @@ std::unique_ptr<FlatBufferModel> FlatBufferModel::BuildFromBuffer(
   return model;
 }
 
+std::unique_ptr<FlatBufferModel> FlatBufferModel::BuildFromModel(
+    const tflite::Model* model_spec, ErrorReporter* error_reporter) {
+  std::unique_ptr<FlatBufferModel> model;
+  model.reset(new FlatBufferModel(model_spec, error_reporter));
+  if (!model->initialized()) model.reset();
+  return model;
+}
+
 FlatBufferModel::FlatBufferModel(const char* filename, bool mmap_file,
                                  ErrorReporter* error_reporter, bool use_nnapi)
     : error_reporter_(error_reporter ? error_reporter
@@ -72,8 +80,7 @@ FlatBufferModel::FlatBufferModel(const char* filename, bool mmap_file,
   } else {
     allocation_ = new FileCopyAllocation(filename, error_reporter);
   }
-  if (!allocation_->valid()) return;
-  if (!CheckModelIdentifier()) return;
+  if (!allocation_->valid() || !CheckModelIdentifier()) return;
 
   model_ = VerifyAndGetModel(allocation_->base(), allocation_->bytes());
 }
@@ -99,6 +106,13 @@ FlatBufferModel::FlatBufferModel(const char* ptr, size_t num_bytes,
   model_ = VerifyAndGetModel(allocation_->base(), allocation_->bytes());
 }
 
+FlatBufferModel::FlatBufferModel(const Model* model,
+                                 ErrorReporter* error_reporter)
+    : error_reporter_(error_reporter ? error_reporter
+                                     : DefaultErrorReporter()) {
+  model_ = model;
+}
+
 FlatBufferModel::~FlatBufferModel() { delete allocation_; }
 
 InterpreterBuilder::InterpreterBuilder(const FlatBufferModel& model,
@@ -160,6 +174,27 @@ std::vector<int> FlatBufferIntArrayToVector(T* flat_array) {
   return ret;
 }
 
+// Copies the contents from the flatbuffer int vector `flatbuffer` into the
+// int array `buffer`. `flat_vector` and `buffer` represent the same
+// configuration operation for a given operation.
+void FlatBufferIntVectorToArray(int max_size_of_buffer,
+                                const flatbuffers::Vector<int32_t>* flat_vector,
+                                int* buffer, ErrorReporter* error_reporter) {
+  if (!flat_vector) {
+    error_reporter->Report("Input array not provided for operation.\n");
+  } else {
+    int num_dimensions = flat_vector->Length();
+    if (num_dimensions > max_size_of_buffer / sizeof(int)) {
+      error_reporter->Report(
+          "Found too many dimensions in the operation's input array.\n");
+    } else {
+      for (int i = 0; i < num_dimensions; ++i) {
+        buffer[i] = flat_vector->Get(i);
+      }
+    }
+  }
+}
+
 // Allocate a structure using C malloc, but make sure the structure is a
 // POD structure that doesn't require constructors to run. The reason we do
 // this, is that Interpreter's C extension part will take ownership and wants
@@ -175,6 +210,9 @@ T* MallocPOD() {
 // This handles builtin data explicitly as there are flatbuffer schemas.
 //
 // Returns memory that must be feed.
+//
+// TODO(nupurgarg): Pass in void ** and return TfLiteStatus to ensure program
+// crashes if error reporter is called.
 void* ParseOpData(const Operator* op, BuiltinOperator op_type,
                   ErrorReporter* error_reporter) {
   auto parse_padding = [](Padding padding) {
@@ -192,7 +230,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type,
         return kTfLiteActNone;
       case ActivationFunctionType_RELU:
         return kTfLiteActRelu;
-      case ActivationFunctionType_RELU1:
+      case ActivationFunctionType_RELU_N1_TO_1:
         return kTfLiteActRelu1;
       case ActivationFunctionType_RELU6:
         return kTfLiteActRelu6;
@@ -248,7 +286,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type,
     case BuiltinOperator_TANH:
     case BuiltinOperator_LOGISTIC:
     case BuiltinOperator_RELU:
-    case BuiltinOperator_RELU1:
+    case BuiltinOperator_RELU_N1_TO_1:
     case BuiltinOperator_RELU6:
     case BuiltinOperator_CONCAT_EMBEDDINGS:
       break;
@@ -301,6 +339,17 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type,
       builtin_data = reinterpret_cast<void*>(params);
       break;
     }
+    case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN: {
+      TfLiteSequenceRNNParams* params = MallocPOD<TfLiteSequenceRNNParams>();
+      if (auto* sequence_rnn_params =
+              op->builtin_options_as_SequenceRNNOptions()) {
+        params->activation =
+            parse_activation(sequence_rnn_params->fused_activation_function());
+        params->time_major = sequence_rnn_params->time_major();
+      }
+      builtin_data = reinterpret_cast<void*>(params);
+      break;
+    }
     case BuiltinOperator_RNN: {
       TfLiteRNNParams* params = MallocPOD<TfLiteRNNParams>();
       if (auto* rnn_params = op->builtin_options_as_RNNOptions()) {
@@ -375,6 +424,24 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type,
       builtin_data = reinterpret_cast<void*>(params);
       break;
     }
+    case BuiltinOperator_DIV: {
+      auto* params = MallocPOD<TfLiteDivParams>();
+      if (auto* schema_params = op->builtin_options_as_DivOptions()) {
+        params->activation =
+            parse_activation(schema_params->fused_activation_function());
+      }
+      builtin_data = reinterpret_cast<void*>(params);
+      break;
+    }
+    case BuiltinOperator_SUB: {
+      auto* params = MallocPOD<TfLiteSubParams>();
+      if (auto* schema_params = op->builtin_options_as_SubOptions()) {
+        params->activation =
+            parse_activation(schema_params->fused_activation_function());
+      }
+      builtin_data = reinterpret_cast<void*>(params);
+      break;
+    }
     case BuiltinOperator_L2_NORMALIZATION: {
       auto* params = MallocPOD<TfLiteL2NormParams>();
       if (auto* schema_params = op->builtin_options_as_L2NormOptions()) {
@@ -417,23 +484,35 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type,
       builtin_data = reinterpret_cast<void*>(params);
       break;
     }
+    case BuiltinOperator_PAD: {
+      auto* params = MallocPOD<TfLitePadParams>();
+      if (auto* schema_params = op->builtin_options_as_PadOptions()) {
+        auto* before_padding = schema_params->before_padding();
+        FlatBufferIntVectorToArray(sizeof(params->before_padding),
+                                   before_padding, params->before_padding,
+                                   error_reporter);
+
+        auto* after_padding = schema_params->after_padding();
+        FlatBufferIntVectorToArray(sizeof(params->after_padding), after_padding,
+                                   params->after_padding, error_reporter);
+
+        if (before_padding->Length() != after_padding->Length()) {
+          error_reporter->Report(
+              "Before padding and after padding arrays need to contain the "
+              "same number of dimensions.\n");
+        }
+        params->num_dimensions = after_padding->Length();
+      }
+      builtin_data = reinterpret_cast<void*>(params);
+      break;
+    }
     case BuiltinOperator_RESHAPE: {
       auto* params = MallocPOD<TfLiteReshapeParams>();
       if (auto* schema_params = op->builtin_options_as_ReshapeOptions()) {
         auto* new_shape = schema_params->new_shape();
-        if (!new_shape) {
-          error_reporter->Report("No new_shape provided for Reshape\n");
-        } else {
-          params->num_dimensions = new_shape->Length();
-          if (params->num_dimensions > sizeof(params->shape) / sizeof(int)) {
-            error_reporter->Report(
-                "Found too many dimensions in Reshape's new_shape\n");
-          } else {
-            for (int i = 0; i < params->num_dimensions; ++i) {
-              params->shape[i] = new_shape->Get(i);
-            }
-          }
-        }
+        FlatBufferIntVectorToArray(sizeof(params->shape), new_shape,
+                                   params->shape, error_reporter);
+        params->num_dimensions = new_shape->Length();
       }
       builtin_data = reinterpret_cast<void*>(params);
       break;
@@ -456,6 +535,88 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type,
       builtin_data = reinterpret_cast<void*>(params);
       break;
     }
+    case BuiltinOperator_GATHER: {
+      TfLiteGatherParams* params = MallocPOD<TfLiteGatherParams>();
+      params->axis = 0;
+      if (auto* gather_params = op->builtin_options_as_GatherOptions()) {
+        params->axis = gather_params->axis();
+      }
+
+      builtin_data = reinterpret_cast<void*>(params);
+      break;
+    }
+    case BuiltinOperator_SPACE_TO_BATCH_ND: {
+      auto* params = MallocPOD<TfLiteSpaceToBatchNDParams>();
+      if (auto* schema_params =
+              op->builtin_options_as_SpaceToBatchNDOptions()) {
+        const auto& block_shape = schema_params->block_shape();
+        FlatBufferIntVectorToArray(sizeof(params->block_shape), block_shape,
+                                   params->block_shape, error_reporter);
+        const auto& before_paddings = schema_params->before_paddings();
+        FlatBufferIntVectorToArray(sizeof(params->before_paddings),
+                                   before_paddings, params->before_paddings,
+                                   error_reporter);
+        const auto& after_paddings = schema_params->after_paddings();
+        FlatBufferIntVectorToArray(sizeof(params->after_paddings),
+                                   after_paddings, params->after_paddings,
+                                   error_reporter);
+        params->num_spatial_dimensions = block_shape->Length();
+      }
+      builtin_data = reinterpret_cast<void*>(params);
+      break;
+    }
+    case BuiltinOperator_BATCH_TO_SPACE_ND: {
+      auto* params = MallocPOD<TfLiteBatchToSpaceNDParams>();
+      if (auto* schema_params =
+              op->builtin_options_as_BatchToSpaceNDOptions()) {
+        const auto& block_shape = schema_params->block_shape();
+        FlatBufferIntVectorToArray(sizeof(params->block_shape), block_shape,
+                                   params->block_shape, error_reporter);
+        const auto& before_crops = schema_params->before_crops();
+        FlatBufferIntVectorToArray(sizeof(params->before_crops), before_crops,
+                                   params->before_crops, error_reporter);
+        const auto& after_crops = schema_params->after_crops();
+        FlatBufferIntVectorToArray(sizeof(params->after_crops), after_crops,
+                                   params->after_crops, error_reporter);
+        params->num_spatial_dimensions = block_shape->Length();
+      }
+      builtin_data = reinterpret_cast<void*>(params);
+      break;
+    }
+    case BuiltinOperator_TRANSPOSE: {
+      auto* params = MallocPOD<TfLiteTransposeParams>();
+      if (auto* schema_params = op->builtin_options_as_TransposeOptions()) {
+        const auto& perm = schema_params->perm();
+        FlatBufferIntVectorToArray(sizeof(params->perm), perm, params->perm,
+                                   error_reporter);
+        params->num_dimensions = perm->Length();
+      }
+      builtin_data = reinterpret_cast<void*>(params);
+      break;
+    }
+    case BuiltinOperator_MEAN: {
+      auto* params = MallocPOD<TfLiteMeanParams>();
+      if (auto* schema_params = op->builtin_options_as_MeanOptions()) {
+        const auto& axis = schema_params->axis();
+        FlatBufferIntVectorToArray(sizeof(params->axis), axis, params->axis,
+                                   error_reporter);
+        params->keep_dims = schema_params->keep_dims();
+        params->num_axis_dimensions = axis->Length();
+      }
+      builtin_data = reinterpret_cast<void*>(params);
+      break;
+    }
+    case BuiltinOperator_SQUEEZE: {
+      auto* params = MallocPOD<TfLiteSqueezeParams>();
+      if (auto* schema_params = op->builtin_options_as_SqueezeOptions()) {
+        const auto& squeeze_dims = schema_params->squeeze_dims();
+        FlatBufferIntVectorToArray(sizeof(params->squeeze_dims), squeeze_dims,
+                                   params->squeeze_dims, error_reporter);
+        params->num_squeeze_dims = squeeze_dims->Length();
+      }
+      builtin_data = reinterpret_cast<void*>(params);
+      break;
+    }
   }
   return builtin_data;
 }
diff --git a/tensorflow/contrib/lite/model.h b/tensorflow/contrib/lite/model.h
index 15659d33f37dfb2f119480ed88d2e1b81f34c145..e0c96f7f0480cd3146f95a22957477809cf0096d 100644
--- a/tensorflow/contrib/lite/model.h
+++ b/tensorflow/contrib/lite/model.h
@@ -45,18 +45,25 @@ namespace tflite {
 // or mmapped. This uses flatbuffers as the serialization format.
 class FlatBufferModel {
  public:
-  // Build a model based on a file. Return a nullptr in case of failure.
+  // Builds a model based on a file. Returns a nullptr in case of failure.
   static std::unique_ptr<FlatBufferModel> BuildFromFile(
       const char* filename,
       ErrorReporter* error_reporter = DefaultErrorReporter());
 
-  // Build a model based on a pre-loaded flatbuffer. The caller retains
+  // Builds a model based on a pre-loaded flatbuffer. The caller retains
   // ownership of the buffer and should keep it alive until the returned object
-  // is destroyed. Return a nullptr in case of failure.
+  // is destroyed. Returns a nullptr in case of failure.
   static std::unique_ptr<FlatBufferModel> BuildFromBuffer(
       const char* buffer, size_t buffer_size,
       ErrorReporter* error_reporter = DefaultErrorReporter());
 
+  // Builds a model directly from a flatbuffer pointer. The caller retains
+  // ownership of the buffer and should keep it alive until the returned object
+  // is destroyed. Returns a nullptr in case of failure.
+  static std::unique_ptr<FlatBufferModel> BuildFromModel(
+      const tflite::Model* model_spec,
+      ErrorReporter* error_reporter = DefaultErrorReporter());
+
   // Releases memory or unmaps mmaped meory.
   ~FlatBufferModel();
 
@@ -75,7 +82,7 @@ class FlatBufferModel {
   bool CheckModelIdentifier() const;
 
  private:
-  // Load a model from `filename`. If `mmap_file` is true then use mmap,
+  // Loads a model from `filename`. If `mmap_file` is true then use mmap,
   // otherwise make a copy of the model in a buffer.
   //
   // Note, if `error_reporter` is null, then a DefaultErrorReporter() will be
@@ -85,8 +92,8 @@ class FlatBufferModel {
       ErrorReporter* error_reporter = DefaultErrorReporter(),
       bool use_nnapi = false);
 
-  // Load a model from `ptr` and `num_bytes` of the model file. The `ptr` has to
-  // remain alive and unchanged until the end of this flatbuffermodel's
+  // Loads a model from `ptr` and `num_bytes` of the model file. The `ptr` has
+  // to remain alive and unchanged until the end of this flatbuffermodel's
   // lifetime.
   //
   // Note, if `error_reporter` is null, then a DefaultErrorReporter() will be
@@ -94,6 +101,10 @@ class FlatBufferModel {
   FlatBufferModel(const char* ptr, size_t num_bytes,
                   ErrorReporter* error_reporter = DefaultErrorReporter());
 
+  // Loads a model from Model flatbuffer. The `model` has to remain alive and
+  // unchanged until the end of this flatbuffermodel's lifetime.
+  FlatBufferModel(const Model* model, ErrorReporter* error_reporter);
+
   // Flatbuffer traverser pointer. (Model* is a pointer that is within the
   // allocated memory of the data allocated by allocation's internals.
   const tflite::Model* model_ = nullptr;
@@ -106,9 +117,9 @@ class FlatBufferModel {
 // model are mapped to executable function pointers (TfLiteRegistrations).
 class OpResolver {
  public:
-  // Find the op registration for a builtin operator by enum code.
+  // Finds the op registration for a builtin operator by enum code.
   virtual TfLiteRegistration* FindOp(tflite::BuiltinOperator op) const = 0;
-  // Find the op registration of a custom operator by op name.
+  // Finds the op registration of a custom operator by op name.
   virtual TfLiteRegistration* FindOp(const char* op) const = 0;
   virtual ~OpResolver() {}
 };
@@ -131,7 +142,7 @@ class InterpreterBuilder {
  public:
   InterpreterBuilder(const FlatBufferModel& model,
                      const OpResolver& op_resolver);
-  // Build an interpreter given only the raw flatbuffer Model object (instead
+  // Builds an interpreter given only the raw flatbuffer Model object (instead
   // of a FlatBufferModel). Mostly used for testing.
   // If `error_reporter` is null, then DefaultErrorReporter() is used.
   InterpreterBuilder(const ::tflite::Model* model,
diff --git a/tensorflow/contrib/lite/model_test.cc b/tensorflow/contrib/lite/model_test.cc
index 61043866420752b552281e353be9a2b41a6aadc8..5330c8f594593655b2a8776cf6b399c0d16cdc19 100644
--- a/tensorflow/contrib/lite/model_test.cc
+++ b/tensorflow/contrib/lite/model_test.cc
@@ -26,6 +26,7 @@ limitations under the License.
 
 #include <gtest/gtest.h>
 #include "tensorflow/contrib/lite/error_reporter.h"
+#include "tensorflow/contrib/lite/testing/util.h"
 
 // Comparison for TfLiteRegistration. Since TfLiteRegistration is a C object,
 // we must declare this in global namespace, so argument-dependent operator
@@ -254,6 +255,28 @@ TEST(BasicFlatBufferModel, TestBuildModelFromCorruptedData) {
   ASSERT_FALSE(model);
 }
 
+// Test that loading model directly from a Model flatbuffer works.
+TEST(BasicFlatBufferModel, TestBuildFromModel) {
+  TestErrorReporter reporter;
+  FileCopyAllocation model_allocation(
+      "tensorflow/contrib/lite/testdata/test_model.bin", &reporter);
+  ASSERT_TRUE(model_allocation.valid());
+  ::flatbuffers::Verifier verifier(
+      reinterpret_cast<const uint8_t*>(model_allocation.base()),
+      model_allocation.bytes());
+  ASSERT_TRUE(VerifyModelBuffer(verifier));
+  const Model* model_fb = ::tflite::GetModel(model_allocation.base());
+
+  auto model = FlatBufferModel::BuildFromModel(model_fb);
+  ASSERT_TRUE(model);
+
+  std::unique_ptr<Interpreter> interpreter;
+  ASSERT_EQ(
+      InterpreterBuilder(*model, TrivialResolver(&dummy_reg))(&interpreter),
+      kTfLiteOk);
+  ASSERT_NE(interpreter, nullptr);
+}
+
 // TODO(aselle): Add tests for serialization of builtin op data types.
 // These tests will occur with the evaluation tests of individual operators,
 // not here.
@@ -261,7 +284,7 @@ TEST(BasicFlatBufferModel, TestBuildModelFromCorruptedData) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/models/smartreply/BUILD b/tensorflow/contrib/lite/models/smartreply/BUILD
index fbdf19f2054cf01aec44e3fcb13d0d0a2ff6f914..733c3f4c7fa0605f24a1e6b4c458e34310c079c4 100644
--- a/tensorflow/contrib/lite/models/smartreply/BUILD
+++ b/tensorflow/contrib/lite/models/smartreply/BUILD
@@ -1,7 +1,92 @@
 package(default_visibility = ["//visibility:public"])
 
+load("//tensorflow/contrib/lite:build_def.bzl", "tflite_copts", "gen_selected_ops")
+
 licenses(["notice"])  # Apache 2.0
 
+gen_selected_ops(
+    name = "smartreply_ops",
+    model = "@tflite_smartreply//:smartreply.tflite",
+)
+
+cc_library(
+    name = "custom_ops",
+    srcs = [
+        "ops/extract_feature.cc",
+        "ops/normalize.cc",
+        "ops/predict.cc",
+        ":smartreply_ops",
+    ],
+    copts = tflite_copts(),
+    deps = [
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite:string_util",
+        "//tensorflow/contrib/lite/kernels:builtin_ops",
+        "//tensorflow/contrib/lite/tools:mutable_op_resolver",
+        "@com_google_absl//absl/strings",
+        "@com_googlesource_code_re2//:re2",
+        "@farmhash_archive//:farmhash",
+    ],
+)
+
+cc_library(
+    name = "predictor_lib",
+    srcs = ["predictor.cc"],
+    hdrs = ["predictor.h"],
+    copts = tflite_copts(),
+    deps = [
+        ":custom_ops",
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite:string_util",
+        "//tensorflow/contrib/lite/kernels:builtin_ops",
+        "//tensorflow/contrib/lite/tools:mutable_op_resolver",
+        "@com_google_absl//absl/strings",
+        "@com_googlesource_code_re2//:re2",
+    ],
+)
+
+cc_test(
+    name = "extract_feature_op_test",
+    size = "small",
+    srcs = ["ops/extract_feature_test.cc"],
+    deps = [
+        ":custom_ops",
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite/kernels:builtin_ops",
+        "//tensorflow/contrib/lite/kernels:test_util",
+        "@com_google_googletest//:gtest",
+        "@farmhash_archive//:farmhash",
+    ],
+)
+
+cc_test(
+    name = "normalize_op_test",
+    size = "small",
+    srcs = ["ops/normalize_test.cc"],
+    deps = [
+        ":custom_ops",
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite:string_util",
+        "//tensorflow/contrib/lite/kernels:builtin_ops",
+        "//tensorflow/contrib/lite/kernels:test_util",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
+cc_test(
+    name = "predict_op_test",
+    size = "small",
+    srcs = ["ops/predict_test.cc"],
+    deps = [
+        ":custom_ops",
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite:string_util",
+        "//tensorflow/contrib/lite/kernels:builtin_ops",
+        "//tensorflow/contrib/lite/kernels:test_util",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/AndroidManifest.xml b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/AndroidManifest.xml
new file mode 100644
index 0000000000000000000000000000000000000000..75ed9432c8fcdfd77a64d3c659e6336c977cdda2
--- /dev/null
+++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/AndroidManifest.xml
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Copyright 2017 The Android Open Source Project
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+  package="com.example.android.smartreply" >
+
+  <uses-sdk
+      android:minSdkVersion="15"
+      android:targetSdkVersion="24" />
+
+  <application android:label="TfLite SmartReply Demo">
+    <activity
+        android:name="com.example.android.smartreply.MainActivity"
+        android:configChanges="orientation|keyboardHidden|screenSize"
+        android:windowSoftInputMode="stateUnchanged|adjustPan"
+        android:label="TfLite SmartReply Demo"
+        android:screenOrientation="portrait" >
+      <intent-filter>
+        <action android:name="android.intent.action.MAIN" />
+        <category android:name="android.intent.category.LAUNCHER" />
+      </intent-filter>
+    </activity>
+  </application>
+
+</manifest>
diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/BUILD b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/BUILD
new file mode 100644
index 0000000000000000000000000000000000000000..f8767b443a2aa64b666c3b6bfb7db30cc0be62ea
--- /dev/null
+++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/BUILD
@@ -0,0 +1,65 @@
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])  # Apache 2.0
+
+load(
+    "//tensorflow/contrib/lite:build_def.bzl",
+    "tflite_copts",
+    "tflite_jni_binary",
+)
+
+filegroup(
+    name = "assets",
+    srcs = [
+        "@tflite_smartreply//:model_files",
+    ],
+)
+
+android_binary(
+    name = "SmartReplyDemo",
+    srcs = glob(["java/**/*.java"]),
+    assets = [":assets"],
+    assets_dir = "",
+    custom_package = "com.example.android.smartreply",
+    manifest = "AndroidManifest.xml",
+    nocompress_extensions = [
+        ".tflite",
+    ],
+    resource_files = glob(["res/**"]),
+    tags = ["manual"],
+    deps = [
+        ":smartreply_runtime",
+        "@androidsdk//com.android.support:support-v13-25.2.0",
+        "@androidsdk//com.android.support:support-v4-25.2.0",
+    ],
+)
+
+cc_library(
+    name = "smartreply_runtime",
+    srcs = ["libsmartreply_jni.so"],
+    visibility = ["//visibility:public"],
+)
+
+tflite_jni_binary(
+    name = "libsmartreply_jni.so",
+    deps = [
+        ":smartreply_jni_lib",
+    ],
+)
+
+cc_library(
+    name = "smartreply_jni_lib",
+    srcs = [
+        "smartreply_jni.cc",
+    ],
+    copts = tflite_copts(),
+    linkopts = [
+        "-lm",
+        "-ldl",
+    ],
+    deps = [
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite/models/smartreply:predictor_lib",
+    ],
+    alwayslink = 1,
+)
diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/assets/BUILD b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/assets/BUILD
new file mode 100644
index 0000000000000000000000000000000000000000..3c882ffc43fde577801428151a43b592e8faaed1
--- /dev/null
+++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/assets/BUILD
@@ -0,0 +1,15 @@
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(glob(["*"]))
+
+filegroup(
+    name = "assets_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "BUILD",
+        ],
+    ),
+)
diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/assets/backoff_response.txt b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/assets/backoff_response.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a0a5b46b5f8d5fd6a0297c8056bb2fb9b6ad9ada
--- /dev/null
+++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/assets/backoff_response.txt
@@ -0,0 +1,16 @@
+Ok
+Yes
+No
+👍
+☺
+😟
+❤️
+Lol
+Thanks
+Got it
+Done
+Nice
+I don't know
+What?
+Why?
+What's up?
diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/MainActivity.java b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/MainActivity.java
new file mode 100644
index 0000000000000000000000000000000000000000..02fec9ae5e971ad756ae6c2b0149a6aacfa27cad
--- /dev/null
+++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/MainActivity.java
@@ -0,0 +1,99 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+package com.example.android.smartreply;
+
+import android.app.Activity;
+import android.os.Bundle;
+import android.os.Handler;
+import android.util.Log;
+import android.view.View;
+import android.widget.Button;
+import android.widget.EditText;
+import android.widget.TextView;
+
+/**
+ * The main (and only) activity of this demo app. Displays a text box which updates as messages are
+ * received.
+ */
+public class MainActivity extends Activity {
+  private static final String TAG = "SmartReplyDemo";
+  private SmartReplyClient client;
+
+  private Button sendButton;
+  private TextView messageTextView;
+  private EditText messageInput;
+
+  private Handler handler;
+
+  @Override
+  protected void onCreate(Bundle savedInstanceState) {
+    super.onCreate(savedInstanceState);
+    Log.v(TAG, "onCreate");
+    setContentView(R.layout.main_activity);
+
+    client = new SmartReplyClient(getApplicationContext());
+    handler = new Handler();
+
+    sendButton = (Button) findViewById(R.id.send_button);
+    sendButton.setOnClickListener(
+        (View v) -> {
+          send(messageInput.getText().toString());
+        });
+
+    messageTextView = (TextView) findViewById(R.id.message_text);
+    messageInput = (EditText) findViewById(R.id.message_input);
+  }
+
+  @Override
+  protected void onStart() {
+    super.onStart();
+    Log.v(TAG, "onStart");
+    handler.post(
+        () -> {
+          client.loadModel();
+        });
+  }
+
+  @Override
+  protected void onStop() {
+    super.onStop();
+    Log.v(TAG, "onStop");
+    handler.post(
+        () -> {
+          client.unloadModel();
+        });
+  }
+
+  private void send(final String message) {
+    handler.post(
+        () -> {
+          messageTextView.append("Input: " + message + "\n");
+
+          SmartReply[] ans = client.predict(new String[] {message});
+          for (SmartReply reply : ans) {
+            appendMessage("Reply: " + reply.getText());
+          }
+          appendMessage("------");
+        });
+  }
+
+  private void appendMessage(final String message) {
+    handler.post(
+        () -> {
+          messageTextView.append(message + "\n");
+        });
+  }
+}
diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/SmartReply.java b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/SmartReply.java
new file mode 100644
index 0000000000000000000000000000000000000000..3357fd17c11f870d1b0998bb26ffa9abf149686b
--- /dev/null
+++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/SmartReply.java
@@ -0,0 +1,44 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+package com.example.android.smartreply;
+
+import android.support.annotation.Keep;
+
+/**
+ * SmartReply contains predicted message, and confidence.
+ *
+ * <p>NOTE: this class used by JNI, class name and constructor should not be obfuscated.
+ */
+@Keep
+public class SmartReply {
+
+  private final String text;
+  private final float score;
+
+  @Keep
+  public SmartReply(String text, float score) {
+    this.text = text;
+    this.score = score;
+  }
+
+  public String getText() {
+    return text;
+  }
+
+  public float getScore() {
+    return score;
+  }
+}
diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/SmartReplyClient.java b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/SmartReplyClient.java
new file mode 100644
index 0000000000000000000000000000000000000000..d5b1ac0ffbc47283aa0c1bf68c0a85ad6228cdcc
--- /dev/null
+++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/java/com/example/android/smartreply/SmartReplyClient.java
@@ -0,0 +1,129 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+package com.example.android.smartreply;
+
+import android.content.Context;
+import android.content.res.AssetFileDescriptor;
+import android.support.annotation.Keep;
+import android.support.annotation.WorkerThread;
+import android.util.Log;
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.MappedByteBuffer;
+import java.nio.channels.FileChannel;
+import java.util.ArrayList;
+import java.util.List;
+
+/** Interface to load TfLite model and provide predictions. */
+public class SmartReplyClient implements AutoCloseable {
+  private static final String TAG = "SmartReplyDemo";
+  private static final String MODEL_PATH = "smartreply.tflite";
+  private static final String BACKOFF_PATH = "backoff_response.txt";
+  private static final String JNI_LIB = "smartreply_jni";
+
+  private final Context context;
+  private long storage;
+  private MappedByteBuffer model;
+
+  private volatile boolean isLibraryLoaded;
+
+  public SmartReplyClient(Context context) {
+    this.context = context;
+  }
+
+  public boolean isLoaded() {
+    return storage != 0;
+  }
+
+  @WorkerThread
+  public synchronized void loadModel() {
+    if (!isLibraryLoaded) {
+      System.loadLibrary(JNI_LIB);
+      isLibraryLoaded = true;
+    }
+
+    try {
+      model = loadModelFile();
+      String[] backoff = loadBackoffList();
+      storage = loadJNI(model, backoff);
+    } catch (IOException e) {
+      Log.e(TAG, "Fail to load model", e);
+      return;
+    }
+  }
+
+  @WorkerThread
+  public synchronized SmartReply[] predict(String[] input) {
+    if (storage != 0) {
+      return predictJNI(storage, input);
+    } else {
+      return new SmartReply[] {};
+    }
+  }
+
+  @WorkerThread
+  public synchronized void unloadModel() {
+    close();
+  }
+
+  @Override
+  public synchronized void close() {
+    if (storage != 0) {
+      unloadJNI(storage);
+      storage = 0;
+    }
+  }
+
+  private MappedByteBuffer loadModelFile() throws IOException {
+    AssetFileDescriptor fileDescriptor = context.getAssets().openFd(MODEL_PATH);
+    FileInputStream inputStream = new FileInputStream(fileDescriptor.getFileDescriptor());
+    try {
+      FileChannel fileChannel = inputStream.getChannel();
+      long startOffset = fileDescriptor.getStartOffset();
+      long declaredLength = fileDescriptor.getDeclaredLength();
+      return fileChannel.map(FileChannel.MapMode.READ_ONLY, startOffset, declaredLength);
+    } finally {
+      inputStream.close();
+    }
+  }
+
+  private String[] loadBackoffList() throws IOException {
+    List<String> labelList = new ArrayList<String>();
+    BufferedReader reader =
+        new BufferedReader(new InputStreamReader(context.getAssets().open(BACKOFF_PATH)));
+    String line;
+    while ((line = reader.readLine()) != null) {
+      if (!line.isEmpty()) {
+        labelList.add(line);
+      }
+    }
+    reader.close();
+    String[] ans = new String[labelList.size()];
+    labelList.toArray(ans);
+    return ans;
+  }
+
+  @Keep
+  private native long loadJNI(MappedByteBuffer buffer, String[] backoff);
+
+  @Keep
+  private native SmartReply[] predictJNI(long storage, String[] text);
+
+  @Keep
+  private native void unloadJNI(long storage);
+}
diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/res/layout/main_activity.xml b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/res/layout/main_activity.xml
new file mode 100644
index 0000000000000000000000000000000000000000..23b4cadc007a4457d33b8c8fecf9b1e7b7436320
--- /dev/null
+++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/res/layout/main_activity.xml
@@ -0,0 +1,44 @@
+<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
+    xmlns:tools="http://schemas.android.com/tools"
+    android:layout_width="match_parent"
+    android:layout_height="match_parent"
+    android:orientation="vertical">
+
+    <LinearLayout
+        android:layout_width="fill_parent"
+        android:layout_height="0dp"
+        android:padding="5dip"
+        android:layout_weight="3">
+
+        <TextView
+            android:id="@+id/message_text"
+            android:layout_width="fill_parent"
+            android:layout_height="fill_parent"
+            android:scrollbars="vertical"
+            android:gravity="bottom"/>
+    </LinearLayout>
+
+    <LinearLayout
+        android:layout_width="fill_parent"
+        android:layout_height="0dp"
+        android:padding="5dip"
+        android:layout_weight="1">
+
+        <EditText
+            android:id="@+id/message_input"
+            android:layout_width="0dp"
+            android:layout_height="fill_parent"
+            android:layout_weight="6"
+            android:scrollbars="vertical"
+            android:hint="Enter Text"
+            android:gravity="top"
+            android:inputType="text"/>
+        <Button
+            android:id="@+id/send_button"
+            android:layout_width="0dp"
+            android:layout_height="fill_parent"
+            android:layout_weight="2"
+            android:text="Send" />
+    </LinearLayout>
+
+</LinearLayout>
diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/smartreply_jni.cc b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/smartreply_jni.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f158cc511a9bee0710aee13cd04f77b6f95fb868
--- /dev/null
+++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/smartreply_jni.cc
@@ -0,0 +1,129 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <jni.h>
+#include <utility>
+#include <vector>
+
+#include "tensorflow/contrib/lite/model.h"
+#include "tensorflow/contrib/lite/models/smartreply/predictor.h"
+
+const char kIllegalStateException[] = "java/lang/IllegalStateException";
+
+using tflite::custom::smartreply::GetSegmentPredictions;
+using tflite::custom::smartreply::PredictorResponse;
+
+template <typename T>
+T CheckNotNull(JNIEnv* env, T&& t) {
+  if (t == nullptr) {
+    env->ThrowNew(env->FindClass(kIllegalStateException), "");
+    return nullptr;
+  }
+  return std::forward<T>(t);
+}
+
+std::vector<std::string> jniStringArrayToVector(JNIEnv* env,
+                                                jobjectArray string_array) {
+  int count = env->GetArrayLength(string_array);
+  std::vector<std::string> result;
+  for (int i = 0; i < count; i++) {
+    auto jstr =
+        reinterpret_cast<jstring>(env->GetObjectArrayElement(string_array, i));
+    const char* raw_str = env->GetStringUTFChars(jstr, JNI_FALSE);
+    result.emplace_back(std::string(raw_str));
+    env->ReleaseStringUTFChars(jstr, raw_str);
+  }
+  return result;
+}
+
+struct JNIStorage {
+  std::vector<std::string> backoff_list;
+  std::unique_ptr<::tflite::FlatBufferModel> model;
+};
+
+extern "C" JNIEXPORT jlong JNICALL
+Java_com_example_android_smartreply_SmartReplyClient_loadJNI(
+    JNIEnv* env, jobject thiz, jobject model_buffer,
+    jobjectArray backoff_list) {
+  const char* buf =
+      static_cast<char*>(env->GetDirectBufferAddress(model_buffer));
+  jlong capacity = env->GetDirectBufferCapacity(model_buffer);
+
+  JNIStorage* storage = new JNIStorage;
+  storage->model = tflite::FlatBufferModel::BuildFromBuffer(
+      buf, static_cast<size_t>(capacity));
+  storage->backoff_list = jniStringArrayToVector(env, backoff_list);
+
+  if (!storage->model) {
+    delete storage;
+    env->ThrowNew(env->FindClass(kIllegalStateException), "");
+    return 0;
+  }
+  return reinterpret_cast<jlong>(storage);
+}
+
+extern "C" JNIEXPORT jobjectArray JNICALL
+Java_com_example_android_smartreply_SmartReplyClient_predictJNI(
+    JNIEnv* env, jobject /*thiz*/, jlong storage_ptr, jobjectArray input_text) {
+  // Predict
+  if (storage_ptr == 0) {
+    return nullptr;
+  }
+  JNIStorage* storage = reinterpret_cast<JNIStorage*>(storage_ptr);
+  if (storage == nullptr) {
+    return nullptr;
+  }
+  std::vector<PredictorResponse> responses;
+  GetSegmentPredictions(jniStringArrayToVector(env, input_text),
+                        *storage->model, {storage->backoff_list}, &responses);
+
+  // Create a SmartReply[] to return back to Java
+  jclass smart_reply_class = CheckNotNull(
+      env, env->FindClass("com/example/android/smartreply/SmartReply"));
+  if (env->ExceptionCheck()) {
+    return nullptr;
+  }
+  jmethodID smart_reply_ctor = CheckNotNull(
+      env,
+      env->GetMethodID(smart_reply_class, "<init>", "(Ljava/lang/String;F)V"));
+  if (env->ExceptionCheck()) {
+    return nullptr;
+  }
+  jobjectArray array = CheckNotNull(
+      env, env->NewObjectArray(responses.size(), smart_reply_class, nullptr));
+  if (env->ExceptionCheck()) {
+    return nullptr;
+  }
+  for (int i = 0; i < responses.size(); i++) {
+    jstring text =
+        CheckNotNull(env, env->NewStringUTF(responses[i].GetText().data()));
+    if (env->ExceptionCheck()) {
+      return nullptr;
+    }
+    jobject reply = env->NewObject(smart_reply_class, smart_reply_ctor, text,
+                                   responses[i].GetScore());
+    env->SetObjectArrayElement(array, i, reply);
+  }
+  return array;
+}
+
+extern "C" JNIEXPORT void JNICALL
+Java_com_example_android_smartreply_SmartReplyClient_unloadJNI(
+    JNIEnv* env, jobject thiz, jlong storage_ptr) {
+  if (storage_ptr != 0) {
+    JNIStorage* storage = reinterpret_cast<JNIStorage*>(storage_ptr);
+    delete storage;
+  }
+}
diff --git a/tensorflow/contrib/lite/models/smartreply/g3doc/README.md b/tensorflow/contrib/lite/models/smartreply/g3doc/README.md
index cab5dcca43a31ec3cf824f00d6794ea9e66d9bf8..a6d75648b3f3da98afd85daad6c2234e73a802e8 100644
--- a/tensorflow/contrib/lite/models/smartreply/g3doc/README.md
+++ b/tensorflow/contrib/lite/models/smartreply/g3doc/README.md
@@ -137,8 +137,8 @@ Following are the ops supported for using On-Device Smart Reply model:
 
 *   **HASHTABLE_LOOKUP**
 
-    This is a custom op that uses label id from predict op and looks up the
-    response text from the given label id.
+    This is an op inside TensorFlow Lite that uses label id from predict op and
+    looks up the response text from the given label id.
 
 ## Further Information
 
diff --git a/tensorflow/contrib/lite/models/smartreply/ops/extract_feature.cc b/tensorflow/contrib/lite/models/smartreply/ops/extract_feature.cc
index 1c422b659abc0871a346b8cffc260df4b22a4f9d..f97a6486d6c11cf0184622f515fe5b1e096c6257 100644
--- a/tensorflow/contrib/lite/models/smartreply/ops/extract_feature.cc
+++ b/tensorflow/contrib/lite/models/smartreply/ops/extract_feature.cc
@@ -23,7 +23,7 @@ limitations under the License.
 
 #include <algorithm>
 #include <map>
-#include "re2/re2.h"
+
 #include "tensorflow/contrib/lite/context.h"
 #include "tensorflow/contrib/lite/kernels/kernel_util.h"
 #include "tensorflow/contrib/lite/string_util.h"
@@ -81,7 +81,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   TfLiteTensor* label = GetOutput(context, node, 0);
   TfLiteTensor* weight = GetOutput(context, node, 1);
 
-  std::map<int64, int> feature_id_counts;
+  std::map<int64_t, int> feature_id_counts;
   for (int i = 0; i < num_strings; i++) {
     // Use fingerprint of feature name as id.
     auto strref = tflite::GetString(input, i);
@@ -91,10 +91,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       continue;
     }
 
-    int64 feature_id =
+    int64_t feature_id =
         ::util::Fingerprint64(strref.str, strref.len) % kMaxDimension;
-
-    label->data.i32[i] = static_cast<int32>(feature_id);
+    label->data.i32[i] = static_cast<int32_t>(feature_id);
     weight->data.f[i] =
         std::count(strref.str, strref.str + strref.len, ' ') + 1;
   }
diff --git a/tensorflow/contrib/lite/models/smartreply/ops/normalize.cc b/tensorflow/contrib/lite/models/smartreply/ops/normalize.cc
index d0dc2a35a7cc527bef0b24508f207da8eec17fc0..c55ac9f52f7293a8ba5baf17f2052e11a7422074 100644
--- a/tensorflow/contrib/lite/models/smartreply/ops/normalize.cc
+++ b/tensorflow/contrib/lite/models/smartreply/ops/normalize.cc
@@ -21,7 +21,10 @@ limitations under the License.
 // Output:
 //     Output[0]: Normalized sentence. string[1]
 //
-#include "absl/strings/ascii.h"
+
+#include <algorithm>
+#include <string>
+
 #include "absl/strings/str_cat.h"
 #include "absl/strings/strip.h"
 #include "re2/re2.h"
@@ -50,7 +53,7 @@ const std::map<string, string>* kRegexTransforms =
 
 static const char kStartToken[] = "<S>";
 static const char kEndToken[] = "<E>";
-static const int32 kMaxInputChars = 300;
+static const int32_t kMaxInputChars = 300;
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   tflite::StringRef input = tflite::GetString(GetInput(context, node, 0), 0);
diff --git a/tensorflow/contrib/lite/models/smartreply/predictor.cc b/tensorflow/contrib/lite/models/smartreply/predictor.cc
index a28222213ea8c66a1e9288ba9ae06aea7653f108..6da5cc8eecc0920850f666b0992c4d9598c55b6c 100644
--- a/tensorflow/contrib/lite/models/smartreply/predictor.cc
+++ b/tensorflow/contrib/lite/models/smartreply/predictor.cc
@@ -30,7 +30,7 @@ namespace custom {
 namespace smartreply {
 
 // Split sentence into segments (using punctuation).
-std::vector<string> SplitSentence(const string& input) {
+std::vector<std::string> SplitSentence(const std::string& input) {
   string result(input);
 
   RE2::GlobalReplace(&result, "([?.!,])+", " \\1");
@@ -38,12 +38,13 @@ std::vector<string> SplitSentence(const string& input) {
   RE2::GlobalReplace(&result, "[ ]+", " ");
   RE2::GlobalReplace(&result, "\t+$", "");
 
-  return strings::Split(result, '\t');
+  return absl::StrSplit(result, '\t');
 }
 
 // Predict with TfLite model.
-void ExecuteTfLite(const string& sentence, ::tflite::Interpreter* interpreter,
-                   std::map<string, float>* response_map) {
+void ExecuteTfLite(const std::string& sentence,
+                   ::tflite::Interpreter* interpreter,
+                   std::map<std::string, float>* response_map) {
   {
     TfLiteTensor* input = interpreter->tensor(interpreter->inputs()[0]);
     tflite::DynamicBuffer buf;
@@ -67,8 +68,8 @@ void ExecuteTfLite(const string& sentence, ::tflite::Interpreter* interpreter,
 }
 
 void GetSegmentPredictions(
-    const std::vector<string>& input, const ::tflite::FlatBufferModel& model,
-    const SmartReplyConfig& config,
+    const std::vector<std::string>& input,
+    const ::tflite::FlatBufferModel& model, const SmartReplyConfig& config,
     std::vector<PredictorResponse>* predictor_responses) {
   // Initialize interpreter
   std::unique_ptr<::tflite::Interpreter> interpreter;
@@ -82,10 +83,10 @@ void GetSegmentPredictions(
   }
 
   // Execute Tflite Model
-  std::map<string, float> response_map;
-  std::vector<string> sentences;
-  for (const string& str : input) {
-    std::vector<string> splitted_str = SplitSentence(str);
+  std::map<std::string, float> response_map;
+  std::vector<std::string> sentences;
+  for (const std::string& str : input) {
+    std::vector<std::string> splitted_str = SplitSentence(str);
     sentences.insert(sentences.end(), splitted_str.begin(), splitted_str.end());
   }
   for (const auto& sentence : sentences) {
diff --git a/tensorflow/contrib/lite/models/smartreply/predictor.h b/tensorflow/contrib/lite/models/smartreply/predictor.h
index 3b9a2b32e17f93f7ebbf35e77ec1e238fe14b020..d17323a3f9a0ea80ad5e215b0a4700e625d0c590 100644
--- a/tensorflow/contrib/lite/models/smartreply/predictor.h
+++ b/tensorflow/contrib/lite/models/smartreply/predictor.h
@@ -34,7 +34,7 @@ struct SmartReplyConfig;
 // With a given string as input, predict the response with a Tflite model.
 // When config.backoff_response is not empty, predictor_responses will be filled
 // with messagees from backoff response.
-void GetSegmentPredictions(const std::vector<string>& input,
+void GetSegmentPredictions(const std::vector<std::string>& input,
                            const ::tflite::FlatBufferModel& model,
                            const SmartReplyConfig& config,
                            std::vector<PredictorResponse>* predictor_responses);
@@ -43,17 +43,17 @@ void GetSegmentPredictions(const std::vector<string>& input,
 // It includes messages, and confidence.
 class PredictorResponse {
  public:
-  PredictorResponse(const string& response_text, float score) {
+  PredictorResponse(const std::string& response_text, float score) {
     response_text_ = response_text;
     prediction_score_ = score;
   }
 
   // Accessor methods.
-  const string& GetText() const { return response_text_; }
+  const std::string& GetText() const { return response_text_; }
   float GetScore() const { return prediction_score_; }
 
  private:
-  string response_text_ = "";
+  std::string response_text_ = "";
   float prediction_score_ = 0.0;
 };
 
@@ -65,9 +65,9 @@ struct SmartReplyConfig {
   float backoff_confidence;
   // Backoff responses are used when predicted responses cannot fulfill the
   // list.
-  const std::vector<string>& backoff_responses;
+  const std::vector<std::string>& backoff_responses;
 
-  SmartReplyConfig(std::vector<string> backoff_responses)
+  SmartReplyConfig(std::vector<std::string> backoff_responses)
       : num_response(kDefaultNumResponse),
         backoff_confidence(kDefaultBackoffConfidence),
         backoff_responses(backoff_responses) {}
diff --git a/tensorflow/contrib/lite/models/smartreply/predictor_test.cc b/tensorflow/contrib/lite/models/smartreply/predictor_test.cc
index 2fa9923bc93d7e559884b6880187637b78f4b217..97d3c650e21c3cb4bef1db09df93f4bf24f38ba5 100644
--- a/tensorflow/contrib/lite/models/smartreply/predictor_test.cc
+++ b/tensorflow/contrib/lite/models/smartreply/predictor_test.cc
@@ -18,12 +18,12 @@ limitations under the License.
 #include <fstream>
 #include <unordered_set>
 
-#include "base/logging.h"
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_split.h"
 #include "tensorflow/contrib/lite/models/test_utils.h"
+#include "tensorflow/contrib/lite/string_util.h"
 
 namespace tflite {
 namespace custom {
@@ -65,7 +65,6 @@ TEST_F(PredictorTest, GetSegmentPredictions) {
 
   float max = 0;
   for (const auto &item : predictions) {
-    LOG(INFO) << "Response: " << item.GetText();
     if (item.GetScore() > max) {
       max = item.GetScore();
     }
@@ -86,7 +85,6 @@ TEST_F(PredictorTest, TestTwoSentences) {
 
   float max = 0;
   for (const auto &item : predictions) {
-    LOG(INFO) << "Response: " << item.GetText();
     if (item.GetScore() > max) {
       max = item.GetScore();
     }
@@ -119,7 +117,7 @@ TEST_F(PredictorTest, BatchTest) {
   string line;
   std::ifstream fin(StrCat(TestDataPath(), "/", kSamples));
   while (std::getline(fin, line)) {
-    const std::vector<string> &fields = strings::Split(line, '\t');
+    const std::vector<string> fields = absl::StrSplit(line, '\t');
     if (fields.empty()) {
       continue;
     }
@@ -139,9 +137,8 @@ TEST_F(PredictorTest, BatchTest) {
                                   fields.begin() + 1, fields.end())));
   }
 
-  LOG(INFO) << "Responses: " << total_responses << " / " << total_items;
-  LOG(INFO) << "Triggers: " << total_triggers << " / " << total_items;
   EXPECT_EQ(total_triggers, total_items);
+  EXPECT_GE(total_responses, total_triggers);
 }
 
 }  // namespace
diff --git a/tensorflow/contrib/lite/models/speech_terse_am_model_test.cc b/tensorflow/contrib/lite/models/speech_asr_am_model_test.cc
similarity index 93%
rename from tensorflow/contrib/lite/models/speech_terse_am_model_test.cc
rename to tensorflow/contrib/lite/models/speech_asr_am_model_test.cc
index 30d89a135403db2ef6e4533ddcc321206bf8bd5e..bf95b313f31c2f76046727353a9a7b0658dbf067 100644
--- a/tensorflow/contrib/lite/models/speech_terse_am_model_test.cc
+++ b/tensorflow/contrib/lite/models/speech_asr_am_model_test.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-// Unit test for speech TERSE AM model using TFLite Ops.
+// Unit test for speech ASR AM model using TFLite Ops.
 
 #include <string.h>
 
@@ -45,10 +45,10 @@ constexpr int kLstmLayer5OutputStateTensor = 103;
 constexpr int kLstmLayer5CellStateTensor = 104;
 constexpr int kModelOutputTensor = 109;
 
-TEST(SpeechTerseAm, RandomIOTest) {
+TEST(SpeechAsrAm, RandomIOTest) {
   // Read the model.
   string tflite_file_path =
-      file::JoinPath(TestDataPath(), "speech_terse_am_model.tflite");
+      file::JoinPath(TestDataPath(), "speech_asr_am_model.tflite");
   auto model = FlatBufferModel::BuildFromFile(tflite_file_path.c_str());
   CHECK(model) << "Failed to mmap model " << tflite_file_path;
 
@@ -62,13 +62,13 @@ TEST(SpeechTerseAm, RandomIOTest) {
   // Load the input frames.
   Frames input_frames;
   const string input_file_path =
-      file::JoinPath(TestDataPath(), "speech_terse_am_model_in.csv");
+      file::JoinPath(TestDataPath(), "speech_asr_am_model_in.csv");
   ReadFrames(input_file_path, &input_frames);
 
   // Load the golden output results.
   Frames output_frames;
   const string output_file_path =
-      file::JoinPath(TestDataPath(), "speech_terse_am_model_out.csv");
+      file::JoinPath(TestDataPath(), "speech_asr_am_model_out.csv");
   ReadFrames(output_file_path, &output_frames);
 
   const int speech_batch_size =
diff --git a/tensorflow/contrib/lite/models/speech_terse_lm_model_test.cc b/tensorflow/contrib/lite/models/speech_asr_lm_model_test.cc
similarity index 94%
rename from tensorflow/contrib/lite/models/speech_terse_lm_model_test.cc
rename to tensorflow/contrib/lite/models/speech_asr_lm_model_test.cc
index 04c54ffb2201acaac069e01707e10194f78789fd..53f2b66da492f8fe56fa9e234f0951cf61c35037 100644
--- a/tensorflow/contrib/lite/models/speech_terse_lm_model_test.cc
+++ b/tensorflow/contrib/lite/models/speech_asr_lm_model_test.cc
@@ -59,10 +59,10 @@ static void ClearLstmStates(Interpreter* interpreter) {
          interpreter->tensor(kLstmLayer3CellStateTensor)->bytes);
 }
 
-TEST(SpeechTerseLm, EndToEndTest) {
+TEST(SpeechAsrLm, EndToEndTest) {
   // Read the model.
   string tflite_file_path =
-      file::JoinPath(TestDataPath(), "speech_terse_lm_model.tflite");
+      file::JoinPath(TestDataPath(), "speech_asr_lm_model.tflite");
   auto model = FlatBufferModel::BuildFromFile(tflite_file_path.c_str());
   CHECK(model) << "Failed to mmap model " << tflite_file_path;
 
@@ -76,13 +76,13 @@ TEST(SpeechTerseLm, EndToEndTest) {
   // Load the input frames.
   Frames input_frames;
   const string input_file_path =
-      file::JoinPath(TestDataPath(), "speech_terse_lm_model_in.csv");
+      file::JoinPath(TestDataPath(), "speech_asr_lm_model_in.csv");
   ReadFrames(input_file_path, &input_frames);
 
   // Load the golden output results.
   Frames output_frames;
   const string output_file_path =
-      file::JoinPath(TestDataPath(), "speech_terse_lm_model_out.csv");
+      file::JoinPath(TestDataPath(), "speech_asr_lm_model_out.csv");
   ReadFrames(output_file_path, &output_frames);
 
   CHECK_EQ(interpreter->tensor(kModelInput1Tensor)->dims->size, 1);
diff --git a/tensorflow/contrib/lite/models/speech_endpointer_model_test.cc b/tensorflow/contrib/lite/models/speech_endpointer_model_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f7e136113aa056fdc87378f8c902f53c811cd39c
--- /dev/null
+++ b/tensorflow/contrib/lite/models/speech_endpointer_model_test.cc
@@ -0,0 +1,104 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+// Unit test for speech EndPointer model using TFLite Ops.
+
+#include <string.h>
+
+#include <memory>
+#include <string>
+
+#include "base/logging.h"
+#include "testing/base/public/googletest.h"
+#include <gtest/gtest.h>
+#include "absl/strings/str_cat.h"
+#include "tensorflow/contrib/lite/context.h"
+#include "tensorflow/contrib/lite/interpreter.h"
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/model.h"
+#include "tensorflow/contrib/lite/models/test_utils.h"
+
+namespace tflite {
+namespace models {
+
+constexpr int kModelInputTensor = 0;
+constexpr int kLstmLayer1OutputStateTensor = 28;
+constexpr int kLstmLayer1CellStateTensor = 29;
+constexpr int kLstmLayer2OutputStateTensor = 49;
+constexpr int kLstmLayer2CellStateTensor = 50;
+constexpr int kModelOutputTensor = 58;
+
+TEST(SpeechEndpointer, EndpointerTest) {
+  // Read the model.
+  string tflite_file_path =
+      StrCat(TestDataPath(), "/", "speech_endpointer_model.tflite");
+  auto model = FlatBufferModel::BuildFromFile(tflite_file_path.c_str());
+  CHECK(model) << "Failed to read model from file " << tflite_file_path;
+
+  // Initialize the interpreter.
+  ops::builtin::BuiltinOpResolver builtins;
+  std::unique_ptr<Interpreter> interpreter;
+  InterpreterBuilder(*model, builtins)(&interpreter);
+  CHECK(interpreter != nullptr);
+  interpreter->AllocateTensors();
+
+  // Load the input frames.
+  Frames input_frames;
+  const string input_file_path =
+      StrCat(TestDataPath(), "/", "speech_endpointer_model_in.csv");
+  ReadFrames(input_file_path, &input_frames);
+
+  // Load the golden output results.
+  Frames output_frames;
+  const string output_file_path =
+      StrCat(TestDataPath(), "/", "speech_endpointer_model_out.csv");
+  ReadFrames(output_file_path, &output_frames);
+
+  const int speech_batch_size =
+      interpreter->tensor(kModelInputTensor)->dims->data[0];
+  const int speech_input_size =
+      interpreter->tensor(kModelInputTensor)->dims->data[1];
+  const int speech_output_size =
+      interpreter->tensor(kModelOutputTensor)->dims->data[1];
+
+  float* input_ptr = interpreter->tensor(kModelInputTensor)->data.f;
+  float* output_ptr = interpreter->tensor(kModelOutputTensor)->data.f;
+
+  // Clear the LSTM state for layers.
+  memset(interpreter->tensor(kLstmLayer1OutputStateTensor)->data.raw, 0,
+         interpreter->tensor(kLstmLayer1OutputStateTensor)->bytes);
+  memset(interpreter->tensor(kLstmLayer1CellStateTensor)->data.raw, 0,
+         interpreter->tensor(kLstmLayer1CellStateTensor)->bytes);
+  memset(interpreter->tensor(kLstmLayer2OutputStateTensor)->data.raw, 0,
+         interpreter->tensor(kLstmLayer2OutputStateTensor)->bytes);
+  memset(interpreter->tensor(kLstmLayer2CellStateTensor)->data.raw, 0,
+         interpreter->tensor(kLstmLayer2CellStateTensor)->bytes);
+
+  for (int i = 0; i < input_frames.size(); i++) {
+    // Feed the input to model.
+    int frame_ptr = 0;
+    for (int k = 0; k < speech_input_size * speech_batch_size; k++) {
+      input_ptr[k] = input_frames[i][frame_ptr++];
+    }
+    // Run the model.
+    interpreter->Invoke();
+    // Validate the output.
+    for (int k = 0; k < speech_output_size; k++) {
+      ASSERT_NEAR(output_ptr[k], output_frames[i][k], 1e-5);
+    }
+  }
+}
+
+}  // namespace models
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/models/speech_hotword_model_test.cc b/tensorflow/contrib/lite/models/speech_hotword_model_test.cc
index 0b8266447adf758184fe3b1ad6a77f1ac6045193..f69cae8d2cb08678f9eec8c9b9d653cfce55bd2e 100644
--- a/tensorflow/contrib/lite/models/speech_hotword_model_test.cc
+++ b/tensorflow/contrib/lite/models/speech_hotword_model_test.cc
@@ -73,8 +73,8 @@ void RunTest(int model_input_tensor, int svdf_layer_state_tensor,
   float* output_ptr = interpreter->tensor(model_output_tensor)->data.f;
 
   // The first layer (SVDF) input size is 40 (speech_input_size). Each speech
-  // input frames for this model is 1280 floats, which can be fed to input in a
-  // sequence of size 32 (input_sequence_size).
+  // input frames for this model is 1600 floats, which can be fed to input in a
+  // sequence of size 40 (input_sequence_size).
   for (int i = 0; i < TestInputSize(input_frames); i++) {
     int frame_ptr = 0;
     for (int s = 0; s < input_sequence_size; s++) {
diff --git a/tensorflow/contrib/lite/models/speech_speakerid_model_test.cc b/tensorflow/contrib/lite/models/speech_speakerid_model_test.cc
index 9da0fb1fc62360dcf584c4a08f99b0cef9964a0d..e208fac8dfcb1b84e9884d303ac9b8a67d4fa47f 100644
--- a/tensorflow/contrib/lite/models/speech_speakerid_model_test.cc
+++ b/tensorflow/contrib/lite/models/speech_speakerid_model_test.cc
@@ -43,7 +43,7 @@ constexpr int kLstmLayer3OutputStateTensor = 61;
 constexpr int kLstmLayer3CellStateTensor = 62;
 constexpr int kModelOutputTensor = 66;
 
-TEST(SpeechSpeakerId, OkGoogleTest) {
+void SpeakerIdTest(bool useNNAPI) {
   // Read the model.
   string tflite_file_path =
       StrCat(TestDataPath(), "/", "speech_speakerid_model.tflite");
@@ -56,6 +56,9 @@ TEST(SpeechSpeakerId, OkGoogleTest) {
   std::unique_ptr<Interpreter> interpreter;
   InterpreterBuilder(*model, resolver)(&interpreter);
   CHECK(interpreter != nullptr);
+
+  interpreter->UseNNAPI(useNNAPI);
+
   interpreter->AllocateTensors();
 
   // Load the input frames.
@@ -110,5 +113,9 @@ TEST(SpeechSpeakerId, OkGoogleTest) {
   }
 }
 
+TEST(SpeechSpeakerId, OkGoogleTest) { SpeakerIdTest(false); }
+
+TEST(SpeechSpeakerId, OkGoogleTestUsingNNAPI) { SpeakerIdTest(true); }
+
 }  // namespace models
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/models/testdata/g3doc/README.md b/tensorflow/contrib/lite/models/testdata/g3doc/README.md
index c9630c00db56a0d40979f9fe9704cf0c9583a015..667a58838329145a9500576749c5aa497641d61c 100644
--- a/tensorflow/contrib/lite/models/testdata/g3doc/README.md
+++ b/tensorflow/contrib/lite/models/testdata/g3doc/README.md
@@ -75,6 +75,20 @@ The corresponding parameters as shown in the figure.
 
 ![asr_lm_model](asr_lm.svg "ASR LM model")
 
+### Endpointer Model
+
+The endpointer model is the neural network model for predicting end of speech
+in an utterance. More precisely, it generates posterior probabilities of various
+events that allow detection of speech start and end events.
+It has an input size of 40 (float) which are speech frontend features
+(log-mel filterbanks), and an output size of four corresponding to:
+speech, intermediate non-speech, initial non-speech, and final non-speech.
+The model consists of a convolutional layer, followed by a fully-connected
+layer, two LSTM layers, and two additional fully-connected layers.
+The corresponding parameters as shown in the figure.
+![endpointer_model](endpointer.svg "Endpointer model")
+
+
 ## Speech models test input/output generation
 
 As mentioned above the input to models are generated from a pre-processing
@@ -86,25 +100,40 @@ same input.
 
 ### Models:
 
-[Speech hotword model (Svdf rank=1)](https://storage.googleapis.com/download.tensorflow.org/models/tflite/speech_hotword_model_rank1_2017_11_14.tflite)
+[Speech hotword model (Svdf
+rank=1)](https://storage.googleapis.com/download.tensorflow.org/models/tflite/speech_hotword_model_rank1_2017_11_14.tflite)
 
-[Speech hotword model (Svdf rank=2)](https://storage.googleapis.com/download.tensorflow.org/models/tflite/speech_hotword_model_rank2_2017_11_14.tflite)
+[Speech hotword model (Svdf
+rank=2)](https://storage.googleapis.com/download.tensorflow.org/models/tflite/speech_hotword_model_rank2_2017_11_14.tflite)
 
-[Speaker-id model](https://storage.googleapis.com/download.tensorflow.org/models/tflite/speech_speakerid_model_2017_11_14.tflite)
+[Speaker-id
+model](https://storage.googleapis.com/download.tensorflow.org/models/tflite/speech_speakerid_model_2017_11_14.tflite)
 
-[TTS model](https://storage.googleapis.com/download.tensorflow.org/models/tflite/speech_tts_model_2017_11_14.tflite)
+[TTS
+model](https://storage.googleapis.com/download.tensorflow.org/models/tflite/speech_tts_model_2017_11_14.tflite)
 
-[ASR AM model](https://storage.googleapis.com/download.tensorflow.org/models/tflite/speech_terse_am_model_2017_11_14.tflite)
+[ASR AM
+model](https://storage.googleapis.com/download.tensorflow.org/models/tflite/speech_terse_am_model_2017_11_14.tflite)
 
 ### Test benches
 
-[Speech hotword model test](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_hotword_model_test.cc)
+[Speech hotword model
+test](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_hotword_model_test.cc)
+
+[Speaker-id model
+test](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_speakerid_model_test.cc)
+
+[TTS model
+test](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_tts_model_test.cc)
 
-[Speaker-id model test](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_speakerid_model_test.cc)
+[ASR AM model
+test](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_asr_am_model_test.cc)
 
-[TTS model test](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_tts_model_test.cc)
+[ASR LM model
+test](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_asr_lm_model_test.cc)
 
-[ASR AM model test](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_terse_am_model_test.cc)
+[Endpointer model
+test](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/models/speech_endpointer_model_test.cc)
 
 ## Android Support
 The models have been tested on Android phones, using the following tests:
diff --git a/tensorflow/contrib/lite/models/testdata/g3doc/endpointer.svg b/tensorflow/contrib/lite/models/testdata/g3doc/endpointer.svg
new file mode 100644
index 0000000000000000000000000000000000000000..6033bdc529e18355131965a26c49b6f17d671f27
--- /dev/null
+++ b/tensorflow/contrib/lite/models/testdata/g3doc/endpointer.svg
@@ -0,0 +1,4 @@
+<?xml version="1.0" standalone="yes"?>
+
+<svg version="1.1" viewBox="0.0 0.0 681.8005249343832 883.6010498687664" fill="none" stroke="none" stroke-linecap="square" stroke-miterlimit="10" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><clipPath id="p.0"><path d="m0 0l681.80054 0l0 883.6011l-681.80054 0l0 -883.6011z" clip-rule="nonzero"></path></clipPath><g clip-path="url(#p.0)"><path fill="#000000" fill-opacity="0.0" d="m0 0l681.80054 0l0 883.6011l-681.80054 0z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m261.15503 14.700843l166.01575 0l0 42.110233l-166.01575 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m261.15503 14.700843l166.01575 0l0 42.110233l-166.01575 0z" fill-rule="evenodd"></path><path fill="#000000" d="m278.78244 41.620842l0 -13.593752l1.8125 0l0 13.593752l-1.8125 0zm4.6676636 0l0 -9.859377l1.5 0l0 1.4062519q1.09375 -1.6250019 3.140625 -1.6250019q0.890625 0 1.640625 0.328125q0.75 0.3125019 1.109375 0.8437519q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm10.375732 3.78125l0 -13.640627l1.53125 0l0 1.2812519q0.53125 -0.75 1.203125 -1.1250019q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.6562519q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm15.313202 4.875l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109377l1.671875 0l0 5.468752q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.281252l1.671875 0l0 9.859377l-1.5 0zm7.5788574 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125019l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125019l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm9.897858 5.5q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.343752 2.578125 -4.671877l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671877q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.353302 -6.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.2812519 1.0625 -0.4843769q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.3281269 1.28125 0.9062519q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.750002l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.687502l0 -9.859377l1.671875 0l0 9.859377l-1.671875 0zm3.254181 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.3593769l8.046875 0l0 1.1093769l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.3750019 3.328125 -1.3750019q1.984375 0 3.234375 1.3437519q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm23.074646 -2.125l-8.96875 0l0 -1.5625l8.96875 0l0 1.5625zm0 4.125l-8.96875 0l0 -1.546875l8.96875 0l0 1.546875zm12.187653 3.875l0 -3.25l-5.90625 0l0 -1.53125l6.21875 -8.812502l1.359375 0l0 8.812502l1.84375 0l0 1.53125l-1.84375 0l0 3.25l-1.671875 0zm0 -4.78125l0 -6.140627l-4.25 6.140627l4.25 0zm5.016327 -1.921875q0 -2.421875 0.5 -3.890627q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875019 0.3125 3.218752q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359377 -0.78125 -4.468752q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.593752zm10.219482 10.703125l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.3906269 -0.890625 -2.671877q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671877q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m228.15503 78.02362l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m228.15503 78.02362l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m308.4097 104.94362l0 -13.59375l6.03125 0q1.8125 0 2.75 0.359375q0.953125 0.359375 1.515625 1.296875q0.5625 0.921875 0.5625 2.046875q0 1.453125 -0.9375 2.453125q-0.921875 0.984375 -2.890625 1.25q0.71875 0.34375 1.09375 0.671875q0.78125 0.734375 1.484375 1.8125l2.375 3.703125l-2.265625 0l-1.796875 -2.828125q-0.796875 -1.21875 -1.3125 -1.875q-0.5 -0.65625 -0.90625 -0.90625q-0.40625 -0.265625 -0.8125 -0.359375q-0.3125 -0.078125 -1.015625 -0.078125l-2.078125 0l0 6.046875l-1.796875 0zm1.796875 -7.59375l3.859375 0q1.234375 0 1.921875 -0.25q0.703125 -0.265625 1.0625 -0.828125q0.375 -0.5625 0.375 -1.21875q0 -0.96875 -0.703125 -1.578125q-0.703125 -0.625 -2.21875 -0.625l-4.296875 0l0 4.5zm18.176056 4.421875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm8.438232 2.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.0 2.9375l0 -13.59375l1.671875 0l0 4.875q1.171875 -1.359375 2.953125 -1.359375q1.09375 0 1.890625 0.4375q0.8125 0.421875 1.15625 1.1875q0.359375 0.765625 0.359375 2.203125l0 6.25l-1.671875 0l0 -6.25q0 -1.25 -0.546875 -1.8125q-0.546875 -0.578125 -1.53125 -0.578125q-0.75 0 -1.40625 0.390625q-0.640625 0.375 -0.921875 1.046875q-0.28125 0.65625 -0.28125 1.8125l0 5.390625l-1.671875 0zm16.813202 -1.21875q-0.9375 0.796875 -1.796875 1.125q-0.859375 0.3125 -1.84375 0.3125q-1.609375 0 -2.484375 -0.78125q-0.875 -0.796875 -0.875 -2.03125q0 -0.734375 0.328125 -1.328125q0.328125 -0.59375 0.859375 -0.953125q0.53125 -0.359375 1.203125 -0.546875q0.5 -0.140625 1.484375 -0.25q2.03125 -0.25 2.984375 -0.578125q0 -0.34375 0 -0.4375q0 -1.015625 -0.46875 -1.4375q-0.640625 -0.5625 -1.90625 -0.5625q-1.171875 0 -1.734375 0.40625q-0.5625 0.40625 -0.828125 1.46875l-1.640625 -0.234375q0.234375 -1.046875 0.734375 -1.6875q0.515625 -0.640625 1.46875 -0.984375q0.96875 -0.359375 2.25 -0.359375q1.265625 0 2.046875 0.296875q0.78125 0.296875 1.15625 0.75q0.375 0.453125 0.515625 1.140625q0.09375 0.421875 0.09375 1.53125l0 2.234375q0 2.328125 0.09375 2.953125q0.109375 0.609375 0.4375 1.171875l-1.75 0q-0.265625 -0.515625 -0.328125 -1.21875zm-0.140625 -3.71875q-0.90625 0.359375 -2.734375 0.625q-1.03125 0.140625 -1.453125 0.328125q-0.421875 0.1875 -0.65625 0.546875q-0.234375 0.359375 -0.234375 0.796875q0 0.671875 0.5 1.125q0.515625 0.4375 1.484375 0.4375q0.96875 0 1.71875 -0.421875q0.75 -0.4375 1.109375 -1.15625q0.265625 -0.578125 0.265625 -1.671875l0 -0.609375zm4.0788574 8.71875l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm15.610077 1.703125l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875z" fill-rule="nonzero"></path><path fill="#000000" d="m268.58267 130.94362q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.353302 -6.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.254181 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm23.074646 -2.125l-8.96875 0l0 -1.5625l8.96875 0l0 1.5625zm0 4.125l-8.96875 0l0 -1.546875l8.96875 0l0 1.546875zm7.3439026 7.65625l0 -17.375l3.671875 0l0 1.375l-2.015625 0l0 14.609375l2.015625 0l0 1.390625l-3.671875 0zm10.964539 -3.78125l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm4.9851074 0l0 -1.90625l1.90625 0l0 1.90625q0 1.046875 -0.375 1.6875q-0.375 0.65625 -1.171875 1.0l-0.46875 -0.71875q0.53125 -0.21875 0.78125 -0.671875q0.25 -0.453125 0.28125 -1.296875l-0.953125 0zm14.819733 0l0 -3.25l-5.90625 0l0 -1.53125l6.21875 -8.8125l1.359375 0l0 8.8125l1.84375 0l0 1.53125l-1.84375 0l0 3.25l-1.671875 0zm0 -4.78125l0 -6.140625l-4.25 6.140625l4.25 0zm5.016327 -1.921875q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm9.563232 6.703125l0 -1.90625l1.90625 0l0 1.90625q0 1.046875 -0.375 1.6875q-0.375 0.65625 -1.171875 1.0l-0.46875 -0.71875q0.53125 -0.21875 0.78125 -0.671875q0.25 -0.453125 0.28125 -1.296875l-0.953125 0zm15.757233 0l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm4.985077 0l0 -1.90625l1.90625 0l0 1.90625q0 1.046875 -0.375 1.6875q-0.375 0.65625 -1.171875 1.0l-0.46875 -0.71875q0.53125 -0.21875 0.78125 -0.671875q0.25 -0.453125 0.28125 -1.296875l-0.953125 0zm15.757233 0l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm7.3444824 3.78125l-3.6875 0l0 -1.390625l2.015625 0l0 -14.609375l-2.015625 0l0 -1.375l3.6875 0l0 17.375zm3.4801636 0.21875l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m254.95555 833.01575l180.00002 0l0 42.11023l-180.00002 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m254.95555 833.01575l180.00002 0l0 42.11023l-180.00002 0z" fill-rule="evenodd"></path><path fill="#000000" d="m276.66348 853.3107q0 -3.390625 1.8125 -5.296875q1.828125 -1.921875 4.703125 -1.921875q1.875 0 3.390625 0.90625q1.515625 0.890625 2.296875 2.5q0.796875 1.609375 0.796875 3.65625q0 2.0625 -0.84375 3.703125q-0.828125 1.625 -2.359375 2.46875q-1.53125 0.84375 -3.296875 0.84375q-1.921875 0 -3.4375 -0.921875q-1.5 -0.9375 -2.28125 -2.53125q-0.78125 -1.609375 -0.78125 -3.40625zm1.859375 0.03125q0 2.453125 1.3125 3.875q1.328125 1.40625 3.3125 1.40625q2.03125 0 3.34375 -1.421875q1.3125 -1.4375 1.3125 -4.0625q0 -1.65625 -0.5625 -2.890625q-0.546875 -1.234375 -1.640625 -1.921875q-1.078125 -0.6875 -2.421875 -0.6875q-1.90625 0 -3.28125 1.3125q-1.375 1.3125 -1.375 4.390625zm19.433289 6.59375l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm7.5788574 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5270386 5.28125l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm15.313232 4.875l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm7.578827 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm9.897858 5.5q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.353302 -6.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.2542114 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm23.074646 -2.125l-8.96875 0l0 -1.5625l8.96875 0l0 1.5625zm0 4.125l-8.96875 0l0 -1.546875l8.96875 0l0 1.546875zm12.187622 3.875l0 -3.25l-5.90625 0l0 -1.53125l6.21875 -8.8125l1.359375 0l0 8.8125l1.84375 0l0 1.53125l-1.84375 0l0 3.25l-1.671875 0zm0 -4.78125l0 -6.140625l-4.25 6.140625l4.25 0zm6.5788574 8.78125l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m344.2495 137.01575l0 24.724411" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m344.2495 137.01575l0 18.724411" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m342.59778 155.74016l1.6517334 4.538101l1.6517334 -4.538101z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m344.2495 220.72906l0 25.291336" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m344.2495 220.72906l0 19.291336" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m342.59778 240.0204l1.6517334 4.538101l1.6517334 -4.538101z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m344.1629 56.811077l0.09448242 21.19685" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m344.1629 56.81108l0.06774902 15.196915" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m342.57892 72.01535l1.671936 4.530693l1.6315002 -4.545418z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m228.15503 694.4199l232.18896 0l0 42.11029l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m228.15503 694.4199l232.18896 0l0 42.11029l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m251.96599 721.33997l0 -13.59375l9.17186 0l0 1.59375l-7.3749847 0l0 4.21875l6.3749847 0l0 1.609375l-6.3749847 0l0 6.171875l-1.796875 0zm17.536606 0l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm3.891327 0l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm4.1448364 0l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm4.097931 3.796875l-0.171875 -1.5625q0.546875 0.140625 0.953125 0.140625q0.546875 0 0.875 -0.1875q0.34375 -0.1875 0.5625 -0.515625q0.15625 -0.25 0.5 -1.25q0.046875 -0.140625 0.15625 -0.40625l-3.734375 -9.875l1.796875 0l2.046875 5.71875q0.40625 1.078125 0.71875 2.28125q0.28125 -1.15625 0.6875 -2.25l2.09375 -5.75l1.671875 0l-3.75 10.03125q-0.59375 1.625 -0.9375 2.234375q-0.4375 0.828125 -1.015625 1.203125q-0.578125 0.390625 -1.375 0.390625q-0.484375 0 -1.078125 -0.203125zm19.328125 -8.5625l1.796875 0.453125q-0.5625 2.21875 -2.03125 3.390625q-1.46875 1.15625 -3.59375 1.15625q-2.203125 0 -3.578125 -0.890625q-1.375 -0.90625 -2.09375 -2.59375q-0.71875 -1.703125 -0.71875 -3.65625q0 -2.125 0.796875 -3.703125q0.8125 -1.578125 2.3125 -2.390625q1.5 -0.828125 3.296875 -0.828125q2.046875 0 3.4375 1.046875q1.390625 1.03125 1.9375 2.90625l-1.765625 0.421875q-0.46875 -1.484375 -1.375 -2.15625q-0.90625 -0.6875 -2.265625 -0.6875q-1.5625 0 -2.625 0.75q-1.046875 0.75 -1.484375 2.03125q-0.421875 1.265625 -0.421875 2.609375q0 1.734375 0.5 3.03125q0.515625 1.28125 1.578125 1.921875q1.078125 0.640625 2.3125 0.640625q1.515625 0 2.5625 -0.859375q1.046875 -0.875 1.421875 -2.59375zm2.9260864 -0.15625q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281952 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm10.375732 0l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm17.125702 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.547607 2.265625l1.640625 0.21875q-0.265625 1.6875 -1.375 2.65625q-1.109375 0.953125 -2.734375 0.953125q-2.015625 0 -3.25 -1.3125q-1.21875 -1.328125 -1.21875 -3.796875q0 -1.59375 0.515625 -2.78125q0.53125 -1.203125 1.609375 -1.796875q1.09375 -0.609375 2.359375 -0.609375q1.609375 0 2.625 0.8125q1.015625 0.8125 1.3125 2.3125l-1.625 0.25q-0.234375 -1.0 -0.828125 -1.5q-0.59375 -0.5 -1.421875 -0.5q-1.265625 0 -2.0625 0.90625q-0.78125 0.90625 -0.78125 2.859375q0 1.984375 0.765625 2.890625q0.765625 0.890625 1.984375 0.890625q0.984375 0 1.640625 -0.59375q0.65625 -0.609375 0.84375 -1.859375zm6.546875 2.109375l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm8.277039 -1.671875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.500732 5.875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm17.637146 8.921875q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm11.228302 -14.265625l-1.65625 0.125q-0.21875 -0.984375 -0.640625 -1.421875q-0.671875 -0.71875 -1.65625 -0.71875q-0.8125 0 -1.40625 0.4375q-0.796875 0.578125 -1.25 1.6875q-0.453125 1.09375 -0.46875 3.140625q0.609375 -0.921875 1.46875 -1.359375q0.875 -0.453125 1.828125 -0.453125q1.671875 0 2.84375 1.234375q1.171875 1.234375 1.171875 3.171875q0 1.28125 -0.546875 2.390625q-0.546875 1.09375 -1.515625 1.6875q-0.96875 0.578125 -2.1875 0.578125q-2.09375 0 -3.40625 -1.53125q-1.3125 -1.546875 -1.3125 -5.0625q0 -3.953125 1.453125 -5.734375q1.265625 -1.5625 3.421875 -1.5625q1.609375 0 2.625 0.90625q1.03125 0.890625 1.234375 2.484375zm-6.8125 5.859375q0 0.859375 0.359375 1.65625q0.375 0.78125 1.03125 1.203125q0.65625 0.40625 1.375 0.40625q1.0625 0 1.8125 -0.84375q0.765625 -0.859375 0.765625 -2.328125q0 -1.40625 -0.75 -2.21875q-0.75 -0.8125 -1.890625 -0.8125q-1.125 0 -1.921875 0.8125q-0.78125 0.8125 -0.78125 2.125zm13.875702 4.40625l0 -3.25l-5.90625 0l0 -1.53125l6.21875 -8.8125l1.359375 0l0 8.8125l1.84375 0l0 1.53125l-1.84375 0l0 3.25l-1.671875 0zm0 -4.78125l0 -6.140625l-4.25 6.140625l4.25 0zm4.3757324 4.78125l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm15.328125 0l0 -3.25l-5.90625 0l0 -1.53125l6.21875 -8.8125l1.359375 0l0 8.8125l1.84375 0l0 1.53125l-1.84375 0l0 3.25l-1.671875 0zm0 -4.78125l0 -6.140625l-4.25 6.140625l4.25 0zm6.578827 8.78125l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m344.95538 503.2441l0 37.88974" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m344.95538 503.24408l0 31.88977" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m343.30365 535.13385l1.6517334 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m344.2495 284.66928l0 25.35434" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m344.2495 284.66928l0 19.35434" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m342.59778 304.02362l1.6517334 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m344.2495 664.5302l0 29.88971" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m344.2495 664.5302l0 23.88971" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m342.59778 688.4199l1.6517334 4.538147l1.6517334 -4.538147z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m228.15503 161.73694l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m228.15503 161.73694l232.18896 0l0 58.992126l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m321.72083 183.89131l1.796875 0.453125q-0.5625 2.21875 -2.03125 3.390625q-1.46875 1.15625 -3.59375 1.15625q-2.203125 0 -3.578125 -0.890625q-1.375 -0.90625 -2.09375 -2.59375q-0.71875 -1.703125 -0.71875 -3.65625q0 -2.125 0.796875 -3.703125q0.8125 -1.578125 2.3125 -2.390625q1.5 -0.828125 3.296875 -0.828125q2.046875 0 3.4375 1.046875q1.390625 1.03125 1.9375 2.90625l-1.765625 0.421875q-0.46875 -1.484375 -1.375 -2.15625q-0.90625 -0.6875 -2.265625 -0.6875q-1.5625 0 -2.625 0.75q-1.046875 0.75 -1.484375 2.03125q-0.421875 1.265625 -0.421875 2.609375q0 1.734375 0.5 3.03125q0.515625 1.28125 1.578125 1.921875q1.078125 0.640625 2.3125 0.640625q1.515625 0 2.5625 -0.859375q1.046875 -0.875 1.421875 -2.59375zm2.926056 -0.15625q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281982 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm13.110077 0l-3.75 -9.859375l1.765625 0l2.125 5.90625q0.34375 0.953125 0.625 1.984375q0.21875 -0.78125 0.625 -1.875l2.1875 -6.015625l1.71875 0l-3.734375 9.859375l-1.5625 0zm14.90625 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm2.2819824 1.609375l0 -13.59375l4.6875 0q1.578125 0 2.421875 0.1875q1.15625 0.265625 1.984375 0.96875q1.078125 0.921875 1.609375 2.34375q0.53125 1.40625 0.53125 3.21875q0 1.546875 -0.359375 2.75q-0.359375 1.1875 -0.921875 1.984375q-0.5625 0.78125 -1.234375 1.234375q-0.671875 0.4375 -1.625 0.671875q-0.953125 0.234375 -2.1875 0.234375l-4.90625 0zm1.796875 -1.609375l2.90625 0q1.34375 0 2.109375 -0.25q0.765625 -0.25 1.21875 -0.703125q0.640625 -0.640625 1.0 -1.71875q0.359375 -1.078125 0.359375 -2.625q0 -2.125 -0.703125 -3.265625q-0.703125 -1.15625 -1.703125 -1.546875q-0.71875 -0.28125 -2.328125 -0.28125l-2.859375 0l0 10.390625z" fill-rule="nonzero"></path><path fill="#000000" d="m268.58267 214.65694q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.353302 -6.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.254181 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm23.074646 -2.125l-8.96875 0l0 -1.5625l8.96875 0l0 1.5625zm0 4.125l-8.96875 0l0 -1.546875l8.96875 0l0 1.546875zm7.3439026 7.65625l0 -17.375l3.671875 0l0 1.375l-2.015625 0l0 14.609375l2.015625 0l0 1.390625l-3.671875 0zm13.339539 -14.046875l-1.65625 0.125q-0.21875 -0.984375 -0.640625 -1.421875q-0.671875 -0.71875 -1.65625 -0.71875q-0.8125 0 -1.40625 0.4375q-0.796875 0.578125 -1.25 1.6875q-0.453125 1.09375 -0.46875 3.140625q0.609375 -0.921875 1.46875 -1.359375q0.875 -0.453125 1.828125 -0.453125q1.671875 0 2.84375 1.234375q1.171875 1.234375 1.171875 3.171875q0 1.28125 -0.546875 2.390625q-0.546875 1.09375 -1.515625 1.6875q-0.96875 0.578125 -2.1875 0.578125q-2.09375 0 -3.40625 -1.53125q-1.3125 -1.546875 -1.3125 -5.0625q0 -3.953125 1.453125 -5.734375q1.265625 -1.5625 3.421875 -1.5625q1.609375 0 2.625 0.90625q1.03125 0.890625 1.234375 2.484375zm-6.8125 5.859375q0 0.859375 0.359375 1.65625q0.375 0.78125 1.03125 1.203125q0.65625 0.40625 1.375 0.40625q1.0625 0 1.8125 -0.84375q0.765625 -0.859375 0.765625 -2.328125q0 -1.40625 -0.75 -2.21875q-0.75 -0.8125 -1.890625 -0.8125q-1.125 0 -1.921875 0.8125q-0.78125 0.8125 -0.78125 2.125zm13.875732 4.40625l0 -3.25l-5.90625 0l0 -1.53125l6.21875 -8.8125l1.359375 0l0 8.8125l1.84375 0l0 1.53125l-1.84375 0l0 3.25l-1.671875 0zm0 -4.78125l0 -6.140625l-4.25 6.140625l4.25 0zm5.922577 4.78125l0 -1.90625l1.90625 0l0 1.90625q0 1.046875 -0.375 1.6875q-0.375 0.65625 -1.171875 1.0l-0.46875 -0.71875q0.53125 -0.21875 0.78125 -0.671875q0.25 -0.453125 0.28125 -1.296875l-0.953125 0zm12.038483 -7.375q-1.046875 -0.375 -1.546875 -1.078125q-0.5 -0.71875 -0.5 -1.703125q0 -1.484375 1.0625 -2.484375q1.078125 -1.015625 2.84375 -1.015625q1.78125 0 2.859375 1.03125q1.09375 1.03125 1.09375 2.515625q0 0.953125 -0.5 1.65625q-0.484375 0.703125 -1.5 1.078125q1.25 0.40625 1.90625 1.3125q0.65625 0.90625 0.65625 2.171875q0 1.75 -1.234375 2.9375q-1.234375 1.1875 -3.25 1.1875q-2.015625 0 -3.25 -1.1875q-1.234375 -1.203125 -1.234375 -2.984375q0 -1.328125 0.671875 -2.21875q0.671875 -0.890625 1.921875 -1.21875zm-0.328125 -2.828125q0 0.96875 0.609375 1.578125q0.625 0.609375 1.625 0.609375q0.953125 0 1.5625 -0.609375q0.625 -0.609375 0.625 -1.484375q0 -0.921875 -0.640625 -1.546875q-0.625 -0.625 -1.578125 -0.625q-0.953125 0 -1.578125 0.609375q-0.625 0.609375 -0.625 1.46875zm-0.546875 6.28125q0 0.71875 0.328125 1.390625q0.34375 0.65625 1.015625 1.03125q0.671875 0.359375 1.4375 0.359375q1.203125 0 1.984375 -0.765625q0.78125 -0.78125 0.78125 -1.96875q0 -1.203125 -0.8125 -1.984375q-0.796875 -0.796875 -2.0 -0.796875q-1.1875 0 -1.96875 0.78125q-0.765625 0.78125 -0.765625 1.953125zm9.578857 3.921875l0 -1.90625l1.90625 0l0 1.90625q0 1.046875 -0.375 1.6875q-0.375 0.65625 -1.171875 1.0l-0.46875 -0.71875q0.53125 -0.21875 0.78125 -0.671875q0.25 -0.453125 0.28125 -1.296875l-0.953125 0zm15.757233 0l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm4.985077 0l0 -1.90625l1.90625 0l0 1.90625q0 1.046875 -0.375 1.6875q-0.375 0.65625 -1.171875 1.0l-0.46875 -0.71875q0.53125 -0.21875 0.78125 -0.671875q0.25 -0.453125 0.28125 -1.296875l-0.953125 0zm15.757233 0l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm7.3444824 3.78125l-3.6875 0l0 -1.390625l2.015625 0l0 -14.609375l-2.015625 0l0 -1.375l3.6875 0l0 17.375zm3.4801636 0.21875l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m228.15503 246.02362l232.18896 0l0 38.64566l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m228.15503 246.02362l232.18896 0l0 38.64566l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m255.45354 272.94363l0 -13.59375l2.7187347 0l3.21875 9.625q0.4375 1.34375 0.640625 2.015625q0.234375 -0.75 0.734375 -2.1875l3.25 -9.453125l2.421875 0l0 13.59375l-1.734375 0l0 -11.390625l-3.953125 11.390625l-1.625 0l-3.9375 -11.578125l0 11.578125l-1.7343597 0zm21.822037 -1.21875q-0.9375 0.796875 -1.796875 1.125q-0.859375 0.3125 -1.84375 0.3125q-1.609375 0 -2.484375 -0.78125q-0.875 -0.796875 -0.875 -2.03125q0 -0.734375 0.328125 -1.328125q0.328125 -0.59375 0.859375 -0.953125q0.53125 -0.359375 1.203125 -0.546875q0.5 -0.140625 1.484375 -0.25q2.03125 -0.25 2.984375 -0.578125q0 -0.34375 0 -0.4375q0 -1.015625 -0.46875 -1.4375q-0.640625 -0.5625 -1.90625 -0.5625q-1.171875 0 -1.734375 0.40625q-0.5625 0.40625 -0.828125 1.46875l-1.640625 -0.234375q0.234375 -1.046875 0.734375 -1.6875q0.515625 -0.640625 1.46875 -0.984375q0.96875 -0.359375 2.25 -0.359375q1.265625 0 2.046875 0.296875q0.78125 0.296875 1.15625 0.75q0.375 0.453125 0.515625 1.140625q0.09375 0.421875 0.09375 1.53125l0 2.234375q0 2.328125 0.09375 2.953125q0.109375 0.609375 0.4375 1.171875l-1.75 0q-0.265625 -0.515625 -0.328125 -1.21875zm-0.140625 -3.71875q-0.90625 0.359375 -2.734375 0.625q-1.03125 0.140625 -1.453125 0.328125q-0.421875 0.1875 -0.65625 0.546875q-0.234375 0.359375 -0.234375 0.796875q0 0.671875 0.5 1.125q0.515625 0.4375 1.484375 0.4375q0.96875 0 1.71875 -0.421875q0.75 -0.4375 1.109375 -1.15625q0.265625 -0.578125 0.265625 -1.671875l0 -0.609375zm2.9694824 4.9375l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm10.65625 0l0 -13.59375l5.125 0q1.359375 0 2.078125 0.125q1.0 0.171875 1.671875 0.640625q0.671875 0.46875 1.078125 1.3125q0.421875 0.84375 0.421875 1.84375q0 1.734375 -1.109375 2.9375q-1.09375 1.203125 -3.984375 1.203125l-3.484375 0l0 5.53125l-1.796875 0zm1.796875 -7.140625l3.515625 0q1.75 0 2.46875 -0.640625q0.734375 -0.65625 0.734375 -1.828125q0 -0.859375 -0.4375 -1.46875q-0.421875 -0.609375 -1.125 -0.796875q-0.453125 -0.125 -1.671875 -0.125l-3.484375 0l0 4.859375zm9.802948 2.21875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm8.656952 0q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.250732 4.921875l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm12.488556 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm2.2819824 1.609375l0 -13.59375l4.6875 0q1.578125 0 2.421875 0.1875q1.15625 0.265625 1.984375 0.96875q1.078125 0.921875 1.609375 2.34375q0.53125 1.40625 0.53125 3.21875q0 1.546875 -0.359375 2.75q-0.359375 1.1875 -0.921875 1.984375q-0.5625 0.78125 -1.234375 1.234375q-0.671875 0.4375 -1.625 0.671875q-0.953125 0.234375 -2.1875 0.234375l-4.90625 0zm1.796875 -1.609375l2.90625 0q1.34375 0 2.109375 -0.25q0.765625 -0.25 1.21875 -0.703125q0.640625 -0.640625 1.0 -1.71875q0.359375 -1.078125 0.359375 -2.625q0 -2.125 -0.703125 -3.265625q-0.703125 -1.15625 -1.703125 -1.546875q-0.71875 -0.28125 -2.328125 -0.28125l-2.859375 0l0 10.390625zm19.828125 5.609375q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.353302 -6.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm13.65625 1.4375l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5114136 1.5l0 -9.859375l1.5 0l0 1.5q0.578125 -1.046875 1.0625 -1.375q0.484375 -0.34375 1.078125 -0.34375q0.84375 0 1.71875 0.546875l-0.578125 1.546875q-0.609375 -0.359375 -1.234375 -0.359375q-0.546875 0 -0.984375 0.328125q-0.421875 0.328125 -0.609375 0.90625q-0.28125 0.890625 -0.28125 1.953125l0 5.15625l-1.671875 0zm6.243927 -11.6875l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm10.519836 0l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm16.016327 1.75l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm9.578857 -2.078125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm0 7.953125l0 -1.90625l1.90625 0l0 1.90625l-1.90625 0zm9.444733 -3.59375l1.671875 -0.21875q0.28125 1.421875 0.96875 2.046875q0.703125 0.625 1.6875 0.625q1.1875 0 2.0 -0.8125q0.8125 -0.828125 0.8125 -2.03125q0 -1.140625 -0.765625 -1.890625q-0.75 -0.75 -1.90625 -0.75q-0.46875 0 -1.171875 0.1875l0.1875 -1.46875q0.15625 0.015625 0.265625 0.015625q1.0625 0 1.90625 -0.546875q0.859375 -0.5625 0.859375 -1.71875q0 -0.921875 -0.625 -1.515625q-0.609375 -0.609375 -1.59375 -0.609375q-0.96875 0 -1.625 0.609375q-0.640625 0.609375 -0.828125 1.84375l-1.671875 -0.296875q0.296875 -1.6875 1.375 -2.609375q1.09375 -0.921875 2.71875 -0.921875q1.109375 0 2.046875 0.484375q0.9375 0.46875 1.421875 1.296875q0.5 0.828125 0.5 1.75q0 0.890625 -0.46875 1.609375q-0.46875 0.71875 -1.40625 1.15625q1.21875 0.265625 1.875 1.15625q0.671875 0.875 0.671875 2.1875q0 1.78125 -1.296875 3.015625q-1.296875 1.234375 -3.28125 1.234375q-1.796875 0 -2.984375 -1.0625q-1.171875 -1.0625 -1.34375 -2.765625zm11.922577 7.59375l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m228.86089 461.13388l232.18898 0l0 42.11023l-232.18898 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m228.86089 461.13388l232.18898 0l0 42.11023l-232.18898 0z" fill-rule="evenodd"></path><path fill="#000000" d="m282.06027 488.05386l0 -13.59375l1.796875 0l0 11.984375l6.703125 0l0 1.609375l-8.5 0zm9.844482 -4.375l1.6875 -0.140625q0.125 1.015625 0.5625 1.671875q0.4375 0.65625 1.359375 1.0625q0.9375 0.40625 2.09375 0.40625q1.03125 0 1.8125 -0.3125q0.796875 -0.3125 1.1875 -0.84375q0.390625 -0.53125 0.390625 -1.15625q0 -0.640625 -0.375 -1.109375q-0.375 -0.484375 -1.234375 -0.8125q-0.546875 -0.21875 -2.421875 -0.65625q-1.875 -0.453125 -2.625 -0.859375q-0.96875 -0.515625 -1.453125 -1.265625q-0.46875 -0.75 -0.46875 -1.6875q0 -1.03125 0.578125 -1.921875q0.59375 -0.90625 1.703125 -1.359375q1.125 -0.46875 2.5 -0.46875q1.515625 0 2.671875 0.484375q1.15625 0.484375 1.765625 1.4375q0.625 0.9375 0.671875 2.140625l-1.71875 0.125q-0.140625 -1.28125 -0.953125 -1.9375q-0.796875 -0.671875 -2.359375 -0.671875q-1.625 0 -2.375 0.609375q-0.75 0.59375 -0.75 1.4375q0 0.734375 0.53125 1.203125q0.515625 0.46875 2.703125 0.96875q2.203125 0.5 3.015625 0.875q1.1875 0.546875 1.75 1.390625q0.578125 0.828125 0.578125 1.921875q0 1.09375 -0.625 2.0625q-0.625 0.953125 -1.796875 1.484375q-1.15625 0.53125 -2.609375 0.53125q-1.84375 0 -3.09375 -0.53125q-1.25 -0.546875 -1.96875 -1.625q-0.703125 -1.078125 -0.734375 -2.453125zm16.506073 4.375l0 -12.0l-4.46875 0l0 -1.59375l10.765625 0l0 1.59375l-4.5 0l0 12.0l-1.796875 0zm7.8803406 0l0 -13.59375l2.71875 0l3.21875 9.625q0.4375 1.34375 0.640625 2.015625q0.234375 -0.75 0.734375 -2.1875l3.25 -9.453125l2.421875 0l0 13.59375l-1.734375 0l0 -11.390625l-3.953125 11.390625l-1.625 0l-3.9375 -11.578125l0 11.578125l-1.734375 0zm21.212677 0l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm12.918396 4.0q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm11.2283325 -14.265625l-1.65625 0.125q-0.21875 -0.984375 -0.640625 -1.421875q-0.671875 -0.71875 -1.65625 -0.71875q-0.8125 0 -1.4062805 0.4375q-0.796875 0.578125 -1.25 1.6875q-0.453125 1.09375 -0.46875 3.140625q0.609375 -0.921875 1.46875 -1.359375q0.8750305 -0.453125 1.8281555 -0.453125q1.671875 0 2.84375 1.234375q1.171875 1.234375 1.171875 3.171875q0 1.28125 -0.546875 2.390625q-0.546875 1.09375 -1.515625 1.6875q-0.96875 0.578125 -2.1875 0.578125q-2.0937805 0 -3.4062805 -1.53125q-1.3125 -1.546875 -1.3125 -5.0625q0 -3.953125 1.453125 -5.734375q1.265625 -1.5625 3.4219055 -1.5625q1.609375 0 2.625 0.90625q1.03125 0.890625 1.234375 2.484375zm-6.8125305 5.859375q0 0.859375 0.359375 1.65625q0.375 0.78125 1.03125 1.203125q0.6562805 0.40625 1.3750305 0.40625q1.0625 0 1.8125 -0.84375q0.765625 -0.859375 0.765625 -2.328125q0 -1.40625 -0.75 -2.21875q-0.75 -0.8125 -1.890625 -0.8125q-1.1250305 0 -1.9219055 0.8125q-0.78125 0.8125 -0.78125 2.125zm13.875732 4.40625l0 -3.25l-5.90625 0l0 -1.53125l6.21875 -8.8125l1.359375 0l0 8.8125l1.84375 0l0 1.53125l-1.84375 0l0 3.25l-1.671875 0zm0 -4.78125l0 -6.140625l-4.25 6.140625l4.25 0zm4.3757324 4.78125l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm18.640625 -10.265625l-1.65625 0.125q-0.21875 -0.984375 -0.640625 -1.421875q-0.671875 -0.71875 -1.65625 -0.71875q-0.8125 0 -1.40625 0.4375q-0.796875 0.578125 -1.25 1.6875q-0.453125 1.09375 -0.46875 3.140625q0.609375 -0.921875 1.46875 -1.359375q0.875 -0.453125 1.828125 -0.453125q1.671875 0 2.84375 1.234375q1.171875 1.234375 1.171875 3.171875q0 1.28125 -0.546875 2.390625q-0.546875 1.09375 -1.515625 1.6875q-0.96875 0.578125 -2.1875 0.578125q-2.09375 0 -3.40625 -1.53125q-1.3125 -1.546875 -1.3125 -5.0625q0 -3.953125 1.453125 -5.734375q1.265625 -1.5625 3.421875 -1.5625q1.609375 0 2.625 0.90625q1.03125 0.890625 1.234375 2.484375zm-6.8125 5.859375q0 0.859375 0.359375 1.65625q0.375 0.78125 1.03125 1.203125q0.65625 0.40625 1.375 0.40625q1.0625 0 1.8125 -0.84375q0.765625 -0.859375 0.765625 -2.328125q0 -1.40625 -0.75 -2.21875q-0.75 -0.8125 -1.890625 -0.8125q-1.125 0 -1.921875 0.8125q-0.78125 0.8125 -0.78125 2.125zm13.875702 4.40625l0 -3.25l-5.90625 0l0 -1.53125l6.21875 -8.8125l1.359375 0l0 8.8125l1.84375 0l0 1.53125l-1.84375 0l0 3.25l-1.671875 0zm0 -4.78125l0 -6.140625l-4.25 6.140625l4.25 0zm6.5788574 8.78125l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m274.61697 770.54596l140.06299 0l0 42.11023l-140.06299 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m274.61697 770.54596l140.06299 0l0 42.11023l-140.06299 0z" fill-rule="evenodd"></path><path fill="#000000" d="m311.29257 793.09094l1.6875 -0.140625q0.125 1.015625 0.5625 1.671875q0.4375 0.65625 1.359375 1.0625q0.9375 0.40625 2.09375 0.40625q1.03125 0 1.8125 -0.3125q0.796875 -0.3125 1.1875 -0.84375q0.390625 -0.53125 0.390625 -1.15625q0 -0.640625 -0.375 -1.109375q-0.375 -0.484375 -1.234375 -0.8125q-0.546875 -0.21875 -2.421875 -0.65625q-1.875 -0.453125 -2.625 -0.859375q-0.96875 -0.515625 -1.453125 -1.265625q-0.46875 -0.75 -0.46875 -1.6875q0 -1.03125 0.578125 -1.921875q0.59375 -0.90625 1.703125 -1.359375q1.125 -0.46875 2.5 -0.46875q1.515625 0 2.671875 0.484375q1.15625 0.484375 1.765625 1.4375q0.625 0.9375 0.671875 2.140625l-1.71875 0.125q-0.140625 -1.28125 -0.953125 -1.9375q-0.796875 -0.671875 -2.359375 -0.671875q-1.625 0 -2.375 0.609375q-0.75 0.59375 -0.75 1.4375q0 0.734375 0.53125 1.203125q0.515625 0.46875 2.703125 0.96875q2.203125 0.5 3.015625 0.875q1.1875 0.546875 1.75 1.390625q0.578125 0.828125 0.578125 1.921875q0 1.09375 -0.625 2.0625q-0.625 0.953125 -1.796875 1.484375q-1.15625 0.53125 -2.609375 0.53125q-1.84375 0 -3.09375 -0.53125q-1.25 -0.546875 -1.96875 -1.625q-0.703125 -1.078125 -0.734375 -2.453125zm12.209198 -0.546875q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.688232 4.921875l0 -8.546875l-1.484375 0l0 -1.3125l1.484375 0l0 -1.046875q0 -0.984375 0.171875 -1.46875q0.234375 -0.65625 0.84375 -1.046875q0.609375 -0.40625 1.703125 -0.40625q0.703125 0 1.5625 0.15625l-0.25 1.46875q-0.515625 -0.09375 -0.984375 -0.09375q-0.765625 0 -1.078125 0.328125q-0.3125 0.3125 -0.3125 1.203125l0 0.90625l1.921875 0l0 1.3125l-1.921875 0l0 8.546875l-1.65625 0zm8.433289 -1.5l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm1.5270691 1.5l0 -9.859375l1.5 0l0 1.390625q0.453125 -0.71875 1.21875 -1.15625q0.78125 -0.453125 1.765625 -0.453125q1.09375 0 1.796875 0.453125q0.703125 0.453125 0.984375 1.28125q1.171875 -1.734375 3.046875 -1.734375q1.46875 0 2.25 0.8125q0.796875 0.8125 0.796875 2.5l0 6.765625l-1.671875 0l0 -6.203125q0 -1.0 -0.15625 -1.4375q-0.15625 -0.453125 -0.59375 -0.71875q-0.421875 -0.265625 -1.0 -0.265625q-1.03125 0 -1.71875 0.6875q-0.6875 0.6875 -0.6875 2.21875l0 5.71875l-1.671875 0l0 -6.40625q0 -1.109375 -0.40625 -1.65625q-0.40625 -0.5625 -1.34375 -0.5625q-0.703125 0 -1.3125 0.375q-0.59375 0.359375 -0.859375 1.078125q-0.265625 0.71875 -0.265625 2.0625l0 5.109375l-1.671875 0zm21.978302 -1.21875q-0.9375 0.796875 -1.796875 1.125q-0.859375 0.3125 -1.84375 0.3125q-1.609375 0 -2.484375 -0.78125q-0.875 -0.796875 -0.875 -2.03125q0 -0.734375 0.328125 -1.328125q0.328125 -0.59375 0.859375 -0.953125q0.53125 -0.359375 1.203125 -0.546875q0.5 -0.140625 1.484375 -0.25q2.03125 -0.25 2.984375 -0.578125q0 -0.34375 0 -0.4375q0 -1.015625 -0.46875 -1.4375q-0.640625 -0.5625 -1.90625 -0.5625q-1.171875 0 -1.734375 0.40625q-0.5625 0.40625 -0.828125 1.46875l-1.640625 -0.234375q0.234375 -1.046875 0.734375 -1.6875q0.515625 -0.640625 1.46875 -0.984375q0.96875 -0.359375 2.25 -0.359375q1.265625 0 2.046875 0.296875q0.78125 0.296875 1.15625 0.75q0.375 0.453125 0.515625 1.140625q0.09375 0.421875 0.09375 1.53125l0 2.234375q0 2.328125 0.09375 2.953125q0.109375 0.609375 0.4375 1.171875l-1.75 0q-0.265625 -0.515625 -0.328125 -1.21875zm-0.140625 -3.71875q-0.90625 0.359375 -2.734375 0.625q-1.03125 0.140625 -1.453125 0.328125q-0.421875 0.1875 -0.65625 0.546875q-0.234375 0.359375 -0.234375 0.796875q0 0.671875 0.5 1.125q0.515625 0.4375 1.484375 0.4375q0.96875 0 1.71875 -0.421875q0.75 -0.4375 1.109375 -1.15625q0.265625 -0.578125 0.265625 -1.671875l0 -0.609375zm2.969452 4.9375l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m344.2495 736.5302l0.40945435 34.015747" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m344.2495 736.5302l0.33721924 28.016113" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m342.93512 764.5662l1.7062378 4.5178833l1.5969849 -4.557617z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m344.64847 812.6562l0.31497192 20.346436" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m344.64847 812.6562l0.22210693 14.347168" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m343.21902 827.02893l1.7217712 4.511963l1.5812988 -4.5631104z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m228.15503 622.4199l232.18896 0l0 42.11029l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m228.15503 622.4199l232.18896 0l0 42.11029l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m246.77812 649.33997l0 -13.59375l9.171875 0l0 1.59375l-7.375 0l0 4.21875l6.375 0l0 1.609375l-6.375 0l0 6.171875l-1.796875 0zm17.536606 0l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.859375 0 -1.625 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.265625 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm3.8913574 0l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm4.144806 0l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm4.0979614 3.796875l-0.171875 -1.5625q0.546875 0.140625 0.953125 0.140625q0.546875 0 0.875 -0.1875q0.34375 -0.1875 0.5625 -0.515625q0.15625 -0.25 0.5 -1.25q0.046875 -0.140625 0.15625 -0.40625l-3.734375 -9.875l1.796875 0l2.046875 5.71875q0.40625 1.078125 0.71875 2.28125q0.28125 -1.15625 0.6875 -2.25l2.09375 -5.75l1.671875 0l-3.75 10.03125q-0.59375 1.625 -0.9375 2.234375q-0.4375 0.828125 -1.015625 1.203125q-0.578125 0.390625 -1.375 0.390625q-0.484375 0 -1.078125 -0.203125zm19.328125 -8.5625l1.796875 0.453125q-0.5625 2.21875 -2.03125 3.390625q-1.46875 1.15625 -3.59375 1.15625q-2.203125 0 -3.578125 -0.890625q-1.375 -0.90625 -2.09375 -2.59375q-0.71875 -1.703125 -0.71875 -3.65625q0 -2.125 0.796875 -3.703125q0.8125 -1.578125 2.3125 -2.390625q1.5 -0.828125 3.296875 -0.828125q2.046875 0 3.4375 1.046875q1.390625 1.03125 1.9375 2.90625l-1.765625 0.421875q-0.46875 -1.484375 -1.375 -2.15625q-0.90625 -0.6875 -2.265625 -0.6875q-1.5625 0 -2.625 0.75q-1.046875 0.75 -1.484375 2.03125q-0.421875 1.265625 -0.421875 2.609375q0 1.734375 0.5 3.03125q0.515625 1.28125 1.578125 1.921875q1.078125 0.640625 2.3125 0.640625q1.515625 0 2.5625 -0.859375q1.046875 -0.875 1.421875 -2.59375zm2.926056 -0.15625q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281982 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm10.375702 0l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm17.125732 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.547577 2.265625l1.640625 0.21875q-0.265625 1.6875 -1.375 2.65625q-1.109375 0.953125 -2.734375 0.953125q-2.015625 0 -3.25 -1.3125q-1.21875 -1.328125 -1.21875 -3.796875q0 -1.59375 0.515625 -2.78125q0.53125 -1.203125 1.609375 -1.796875q1.09375 -0.609375 2.359375 -0.609375q1.609375 0 2.625 0.8125q1.015625 0.8125 1.3125 2.3125l-1.625 0.25q-0.234375 -1.0 -0.828125 -1.5q-0.59375 -0.5 -1.421875 -0.5q-1.265625 0 -2.0625 0.90625q-0.78125 0.90625 -0.78125 2.859375q0 1.984375 0.765625 2.890625q0.765625 0.890625 1.984375 0.890625q0.984375 0 1.640625 -0.59375q0.65625 -0.609375 0.84375 -1.859375zm6.546875 2.109375l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm8.277069 -1.671875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.500702 5.875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm17.637146 8.921875q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm11.228302 -14.265625l-1.65625 0.125q-0.21875 -0.984375 -0.640625 -1.421875q-0.671875 -0.71875 -1.65625 -0.71875q-0.8125 0 -1.40625 0.4375q-0.796875 0.578125 -1.25 1.6875q-0.453125 1.09375 -0.46875 3.140625q0.609375 -0.921875 1.46875 -1.359375q0.875 -0.453125 1.828125 -0.453125q1.671875 0 2.84375 1.234375q1.171875 1.234375 1.171875 3.171875q0 1.28125 -0.546875 2.390625q-0.546875 1.09375 -1.515625 1.6875q-0.96875 0.578125 -2.1875 0.578125q-2.09375 0 -3.40625 -1.53125q-1.3125 -1.546875 -1.3125 -5.0625q0 -3.953125 1.453125 -5.734375q1.265625 -1.5625 3.421875 -1.5625q1.609375 0 2.625 0.90625q1.03125 0.890625 1.234375 2.484375zm-6.8125 5.859375q0 0.859375 0.359375 1.65625q0.375 0.78125 1.03125 1.203125q0.65625 0.40625 1.375 0.40625q1.0625 0 1.8125 -0.84375q0.765625 -0.859375 0.765625 -2.328125q0 -1.40625 -0.75 -2.21875q-0.75 -0.8125 -1.890625 -0.8125q-1.125 0 -1.921875 0.8125q-0.78125 0.8125 -0.78125 2.125zm13.875732 4.40625l0 -3.25l-5.90625 0l0 -1.53125l6.21875 -8.8125l1.359375 0l0 8.8125l1.84375 0l0 1.53125l-1.84375 0l0 3.25l-1.671875 0zm0 -4.78125l0 -6.140625l-4.25 6.140625l4.25 0zm4.375702 4.78125l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm18.640625 -10.265625l-1.65625 0.125q-0.21875 -0.984375 -0.640625 -1.421875q-0.671875 -0.71875 -1.65625 -0.71875q-0.8125 0 -1.40625 0.4375q-0.796875 0.578125 -1.25 1.6875q-0.453125 1.09375 -0.46875 3.140625q0.609375 -0.921875 1.46875 -1.359375q0.875 -0.453125 1.828125 -0.453125q1.671875 0 2.84375 1.234375q1.171875 1.234375 1.171875 3.171875q0 1.28125 -0.546875 2.390625q-0.546875 1.09375 -1.515625 1.6875q-0.96875 0.578125 -2.1875 0.578125q-2.09375 0 -3.40625 -1.53125q-1.3125 -1.546875 -1.3125 -5.0625q0 -3.953125 1.453125 -5.734375q1.265625 -1.5625 3.421875 -1.5625q1.609375 0 2.625 0.90625q1.03125 0.890625 1.234375 2.484375zm-6.8125 5.859375q0 0.859375 0.359375 1.65625q0.375 0.78125 1.03125 1.203125q0.65625 0.40625 1.375 0.40625q1.0625 0 1.8125 -0.84375q0.765625 -0.859375 0.765625 -2.328125q0 -1.40625 -0.75 -2.21875q-0.75 -0.8125 -1.890625 -0.8125q-1.125 0 -1.921875 0.8125q-0.78125 0.8125 -0.78125 2.125zm13.875732 4.40625l0 -3.25l-5.90625 0l0 -1.53125l6.21875 -8.8125l1.359375 0l0 8.8125l1.84375 0l0 1.53125l-1.84375 0l0 3.25l-1.671875 0zm0 -4.78125l0 -6.140625l-4.25 6.140625l4.25 0zm6.578827 8.78125l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m228.15503 390.41995l232.18896 0l0 42.11023l-232.18896 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m228.15503 390.41995l232.18896 0l0 42.11023l-232.18896 0z" fill-rule="evenodd"></path><path fill="#000000" d="m241.59027 417.33994l0 -13.59375l9.171875 0l0 1.59375l-7.375 0l0 4.21875l6.375 0l0 1.609375l-6.375 0l0 6.171875l-1.796875 0zm17.53659 0l0 -1.453125q-1.140625 1.671875 -3.125 1.671875q-0.85935974 0 -1.6249847 -0.328125q-0.75 -0.34375 -1.125 -0.84375q-0.359375 -0.5 -0.515625 -1.234375q-0.09375 -0.5 -0.09375 -1.5625l0 -6.109375l1.671875 0l0 5.46875q0 1.3125 0.09375 1.765625q0.15625 0.65625 0.671875 1.03125q0.515625 0.375 1.2656097 0.375q0.75 0 1.40625 -0.375q0.65625 -0.390625 0.921875 -1.046875q0.28125 -0.671875 0.28125 -1.9375l0 -5.28125l1.671875 0l0 9.859375l-1.5 0zm3.8913574 0l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm4.144806 0l0 -13.59375l1.671875 0l0 13.59375l-1.671875 0zm4.0979614 3.796875l-0.171875 -1.5625q0.546875 0.140625 0.953125 0.140625q0.546875 0 0.875 -0.1875q0.34375 -0.1875 0.5625 -0.515625q0.15625 -0.25 0.5 -1.25q0.046875 -0.140625 0.15625 -0.40625l-3.734375 -9.875l1.796875 0l2.046875 5.71875q0.40625 1.078125 0.71875 2.28125q0.28125 -1.15625 0.6875 -2.25l2.09375 -5.75l1.671875 0l-3.75 10.03125q-0.59375 1.625 -0.9375 2.234375q-0.4375 0.828125 -1.015625 1.203125q-0.578125 0.390625 -1.375 0.390625q-0.484375 0 -1.078125 -0.203125zm19.328125 -8.5625l1.796875 0.453125q-0.5625 2.21875 -2.03125 3.390625q-1.46875 1.15625 -3.59375 1.15625q-2.203125 0 -3.578125 -0.890625q-1.375 -0.90625 -2.09375 -2.59375q-0.71875 -1.703125 -0.71875 -3.65625q0 -2.125 0.796875 -3.703125q0.8125 -1.578125 2.3125 -2.390625q1.5 -0.828125 3.296875 -0.828125q2.046875 0 3.4375 1.046875q1.390625 1.03125 1.9375 2.90625l-1.765625 0.421875q-0.46875 -1.484375 -1.375 -2.15625q-0.90625 -0.6875 -2.265625 -0.6875q-1.5625 0 -2.625 0.75q-1.046875 0.75 -1.484375 2.03125q-0.421875 1.265625 -0.421875 2.609375q0 1.734375 0.5 3.03125q0.515625 1.28125 1.578125 1.921875q1.078125 0.640625 2.3125 0.640625q1.515625 0 2.5625 -0.859375q1.046875 -0.875 1.421875 -2.59375zm2.926056 -0.15625q0 -2.734375 1.53125 -4.0625q1.265625 -1.09375 3.09375 -1.09375q2.03125 0 3.3125 1.34375q1.296875 1.328125 1.296875 3.671875q0 1.90625 -0.578125 3.0q-0.5625 1.078125 -1.65625 1.6875q-1.078125 0.59375 -2.375 0.59375q-2.0625 0 -3.34375 -1.328125q-1.28125 -1.328125 -1.28125 -3.8125zm1.71875 0q0 1.890625 0.828125 2.828125q0.828125 0.9375 2.078125 0.9375q1.25 0 2.0625 -0.9375q0.828125 -0.953125 0.828125 -2.890625q0 -1.828125 -0.828125 -2.765625q-0.828125 -0.9375 -2.0625 -0.9375q-1.25 0 -2.078125 0.9375q-0.828125 0.9375 -0.828125 2.828125zm9.281982 4.921875l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm10.375702 0l0 -9.859375l1.5 0l0 1.40625q1.09375 -1.625 3.140625 -1.625q0.890625 0 1.640625 0.328125q0.75 0.3125 1.109375 0.84375q0.375 0.515625 0.53125 1.21875q0.09375 0.46875 0.09375 1.625l0 6.0625l-1.671875 0l0 -6.0q0 -1.015625 -0.203125 -1.515625q-0.1875 -0.515625 -0.6875 -0.8125q-0.5 -0.296875 -1.171875 -0.296875q-1.0625 0 -1.84375 0.671875q-0.765625 0.671875 -0.765625 2.578125l0 5.375l-1.671875 0zm17.125732 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.547577 2.265625l1.640625 0.21875q-0.265625 1.6875 -1.375 2.65625q-1.109375 0.953125 -2.734375 0.953125q-2.015625 0 -3.25 -1.3125q-1.21875 -1.328125 -1.21875 -3.796875q0 -1.59375 0.515625 -2.78125q0.53125 -1.203125 1.609375 -1.796875q1.09375 -0.609375 2.359375 -0.609375q1.609375 0 2.625 0.8125q1.015625 0.8125 1.3125 2.3125l-1.625 0.25q-0.234375 -1.0 -0.828125 -1.5q-0.59375 -0.5 -1.421875 -0.5q-1.265625 0 -2.0625 0.90625q-0.78125 0.90625 -0.78125 2.859375q0 1.984375 0.765625 2.890625q0.765625 0.890625 1.984375 0.890625q0.984375 0 1.640625 -0.59375q0.65625 -0.609375 0.84375 -1.859375zm6.546875 2.109375l0.234375 1.484375q-0.703125 0.140625 -1.265625 0.140625q-0.90625 0 -1.40625 -0.28125q-0.5 -0.296875 -0.703125 -0.75q-0.203125 -0.46875 -0.203125 -1.984375l0 -5.65625l-1.234375 0l0 -1.3125l1.234375 0l0 -2.4375l1.65625 -1.0l0 3.4375l1.6875 0l0 1.3125l-1.6875 0l0 5.75q0 0.71875 0.078125 0.921875q0.09375 0.203125 0.296875 0.328125q0.203125 0.125 0.578125 0.125q0.265625 0 0.734375 -0.078125zm8.277069 -1.671875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm15.500702 5.875l0 -1.25q-0.9375 1.46875 -2.75 1.46875q-1.171875 0 -2.171875 -0.640625q-0.984375 -0.65625 -1.53125 -1.8125q-0.53125 -1.171875 -0.53125 -2.6875q0 -1.46875 0.484375 -2.671875q0.5 -1.203125 1.46875 -1.84375q0.984375 -0.640625 2.203125 -0.640625q0.890625 0 1.578125 0.375q0.703125 0.375 1.140625 0.984375l0 -4.875l1.65625 0l0 13.59375l-1.546875 0zm-5.28125 -4.921875q0 1.890625 0.796875 2.828125q0.8125 0.9375 1.890625 0.9375q1.09375 0 1.859375 -0.890625q0.765625 -0.890625 0.765625 -2.734375q0 -2.015625 -0.78125 -2.953125q-0.78125 -0.953125 -1.921875 -0.953125q-1.109375 0 -1.859375 0.90625q-0.75 0.90625 -0.75 2.859375zm17.637146 8.921875q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.681427 -15.8125l0 -1.609375l8.796875 0l0 1.296875q-1.296875 1.375 -2.578125 3.671875q-1.265625 2.296875 -1.96875 4.71875q-0.5 1.703125 -0.640625 3.734375l-1.71875 0q0.03125 -1.609375 0.625 -3.875q0.609375 -2.28125 1.734375 -4.390625q1.140625 -2.109375 2.40625 -3.546875l-6.65625 0zm10.250732 5.109375q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm14.016327 6.703125l0 -3.25l-5.90625 0l0 -1.53125l6.21875 -8.8125l1.359375 0l0 8.8125l1.84375 0l0 1.53125l-1.84375 0l0 3.25l-1.671875 0zm0 -4.78125l0 -6.140625l-4.25 6.140625l4.25 0zm4.3757324 4.78125l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm18.640625 -10.265625l-1.65625 0.125q-0.21875 -0.984375 -0.640625 -1.421875q-0.671875 -0.71875 -1.65625 -0.71875q-0.8125 0 -1.40625 0.4375q-0.796875 0.578125 -1.25 1.6875q-0.453125 1.09375 -0.46875 3.140625q0.609375 -0.921875 1.46875 -1.359375q0.875 -0.453125 1.828125 -0.453125q1.671875 0 2.84375 1.234375q1.171875 1.234375 1.171875 3.171875q0 1.28125 -0.546875 2.390625q-0.546875 1.09375 -1.515625 1.6875q-0.96875 0.578125 -2.1875 0.578125q-2.09375 0 -3.40625 -1.53125q-1.3125 -1.546875 -1.3125 -5.0625q0 -3.953125 1.453125 -5.734375q1.265625 -1.5625 3.421875 -1.5625q1.609375 0 2.625 0.90625q1.03125 0.890625 1.234375 2.484375zm-6.8125 5.859375q0 0.859375 0.359375 1.65625q0.375 0.78125 1.03125 1.203125q0.65625 0.40625 1.375 0.40625q1.0625 0 1.8125 -0.84375q0.765625 -0.859375 0.765625 -2.328125q0 -1.40625 -0.75 -2.21875q-0.75 -0.8125 -1.890625 -0.8125q-1.125 0 -1.921875 0.8125q-0.78125 0.8125 -0.78125 2.125zm13.875702 4.40625l0 -3.25l-5.90625 0l0 -1.53125l6.21875 -8.8125l1.359375 0l0 8.8125l1.84375 0l0 1.53125l-1.84375 0l0 3.25l-1.671875 0zm0 -4.78125l0 -6.140625l-4.25 6.140625l4.25 0zm6.5788574 8.78125l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m228.86089 541.1336l232.18898 0l0 42.11023l-232.18898 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m228.86089 541.1336l232.18898 0l0 42.11023l-232.18898 0z" fill-rule="evenodd"></path><path fill="#000000" d="m282.06027 568.0536l0 -13.59375l1.796875 0l0 11.984375l6.703125 0l0 1.609375l-8.5 0zm9.844482 -4.375l1.6875 -0.140625q0.125 1.015625 0.5625 1.671875q0.4375 0.65625 1.359375 1.0625q0.9375 0.40625 2.09375 0.40625q1.03125 0 1.8125 -0.3125q0.796875 -0.3125 1.1875 -0.84375q0.390625 -0.53125 0.390625 -1.15625q0 -0.640625 -0.375 -1.109375q-0.375 -0.484375 -1.234375 -0.8125q-0.546875 -0.21875 -2.421875 -0.65625q-1.875 -0.453125 -2.625 -0.859375q-0.96875 -0.515625 -1.453125 -1.265625q-0.46875 -0.75 -0.46875 -1.6875q0 -1.03125 0.578125 -1.921875q0.59375 -0.90625 1.703125 -1.359375q1.125 -0.46875 2.5 -0.46875q1.515625 0 2.671875 0.484375q1.15625 0.484375 1.765625 1.4375q0.625 0.9375 0.671875 2.140625l-1.71875 0.125q-0.140625 -1.28125 -0.953125 -1.9375q-0.796875 -0.671875 -2.359375 -0.671875q-1.625 0 -2.375 0.609375q-0.75 0.59375 -0.75 1.4375q0 0.734375 0.53125 1.203125q0.515625 0.46875 2.703125 0.96875q2.203125 0.5 3.015625 0.875q1.1875 0.546875 1.75 1.390625q0.578125 0.828125 0.578125 1.921875q0 1.09375 -0.625 2.0625q-0.625 0.953125 -1.796875 1.484375q-1.15625 0.53125 -2.609375 0.53125q-1.84375 0 -3.09375 -0.53125q-1.25 -0.546875 -1.96875 -1.625q-0.703125 -1.078125 -0.734375 -2.453125zm16.506073 4.375l0 -12.0l-4.46875 0l0 -1.59375l10.765625 0l0 1.59375l-4.5 0l0 12.0l-1.796875 0zm7.8803406 0l0 -13.59375l2.71875 0l3.21875 9.625q0.4375 1.34375 0.640625 2.015625q0.234375 -0.75 0.734375 -2.1875l3.25 -9.453125l2.421875 0l0 13.59375l-1.734375 0l0 -11.390625l-3.953125 11.390625l-1.625 0l-3.9375 -11.578125l0 11.578125l-1.734375 0zm23.697052 -1.609375l0 1.609375l-8.984375 0q-0.015625 -0.609375 0.1875 -1.15625q0.34375 -0.921875 1.09375 -1.8125q0.765625 -0.890625 2.1875 -2.0625q2.21875 -1.8125 3.0 -2.875q0.78125 -1.0625 0.78125 -2.015625q0 -0.984375 -0.71875 -1.671875q-0.703125 -0.6875 -1.84375 -0.6875q-1.203125 0 -1.9375 0.734375q-0.71875 0.71875 -0.71875 2.0l-1.71875 -0.171875q0.171875 -1.921875 1.328125 -2.921875q1.15625 -1.015625 3.09375 -1.015625q1.953125 0 3.09375 1.09375q1.140625 1.078125 1.140625 2.6875q0 0.8125 -0.34375 1.609375q-0.328125 0.78125 -1.109375 1.65625q-0.765625 0.859375 -2.5625 2.390625q-1.5 1.265625 -1.9375 1.71875q-0.421875 0.4375 -0.703125 0.890625l6.671875 0zm10.434021 5.609375q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm11.2283325 -14.265625l-1.65625 0.125q-0.21875 -0.984375 -0.640625 -1.421875q-0.671875 -0.71875 -1.65625 -0.71875q-0.8125 0 -1.4062805 0.4375q-0.796875 0.578125 -1.25 1.6875q-0.453125 1.09375 -0.46875 3.140625q0.609375 -0.921875 1.46875 -1.359375q0.8750305 -0.453125 1.8281555 -0.453125q1.671875 0 2.84375 1.234375q1.171875 1.234375 1.171875 3.171875q0 1.28125 -0.546875 2.390625q-0.546875 1.09375 -1.515625 1.6875q-0.96875 0.578125 -2.1875 0.578125q-2.0937805 0 -3.4062805 -1.53125q-1.3125 -1.546875 -1.3125 -5.0625q0 -3.953125 1.453125 -5.734375q1.265625 -1.5625 3.4219055 -1.5625q1.609375 0 2.625 0.90625q1.03125 0.890625 1.234375 2.484375zm-6.8125305 5.859375q0 0.859375 0.359375 1.65625q0.375 0.78125 1.03125 1.203125q0.6562805 0.40625 1.3750305 0.40625q1.0625 0 1.8125 -0.84375q0.765625 -0.859375 0.765625 -2.328125q0 -1.40625 -0.75 -2.21875q-0.75 -0.8125 -1.890625 -0.8125q-1.1250305 0 -1.9219055 0.8125q-0.78125 0.8125 -0.78125 2.125zm13.875732 4.40625l0 -3.25l-5.90625 0l0 -1.53125l6.21875 -8.8125l1.359375 0l0 8.8125l1.84375 0l0 1.53125l-1.84375 0l0 3.25l-1.671875 0zm0 -4.78125l0 -6.140625l-4.25 6.140625l4.25 0zm4.3757324 4.78125l3.59375 -5.125l-3.328125 -4.734375l2.09375 0l1.515625 2.3125q0.421875 0.65625 0.671875 1.109375q0.421875 -0.609375 0.765625 -1.09375l1.65625 -2.328125l1.984375 0l-3.390625 4.640625l3.65625 5.21875l-2.046875 0l-2.03125 -3.0625l-0.53125 -0.828125l-2.59375 3.890625l-2.015625 0zm18.640625 -10.265625l-1.65625 0.125q-0.21875 -0.984375 -0.640625 -1.421875q-0.671875 -0.71875 -1.65625 -0.71875q-0.8125 0 -1.40625 0.4375q-0.796875 0.578125 -1.25 1.6875q-0.453125 1.09375 -0.46875 3.140625q0.609375 -0.921875 1.46875 -1.359375q0.875 -0.453125 1.828125 -0.453125q1.671875 0 2.84375 1.234375q1.171875 1.234375 1.171875 3.171875q0 1.28125 -0.546875 2.390625q-0.546875 1.09375 -1.515625 1.6875q-0.96875 0.578125 -2.1875 0.578125q-2.09375 0 -3.40625 -1.53125q-1.3125 -1.546875 -1.3125 -5.0625q0 -3.953125 1.453125 -5.734375q1.265625 -1.5625 3.421875 -1.5625q1.609375 0 2.625 0.90625q1.03125 0.890625 1.234375 2.484375zm-6.8125 5.859375q0 0.859375 0.359375 1.65625q0.375 0.78125 1.03125 1.203125q0.65625 0.40625 1.375 0.40625q1.0625 0 1.8125 -0.84375q0.765625 -0.859375 0.765625 -2.328125q0 -1.40625 -0.75 -2.21875q-0.75 -0.8125 -1.890625 -0.8125q-1.125 0 -1.921875 0.8125q-0.78125 0.8125 -0.78125 2.125zm13.875702 4.40625l0 -3.25l-5.90625 0l0 -1.53125l6.21875 -8.8125l1.359375 0l0 8.8125l1.84375 0l0 1.53125l-1.84375 0l0 3.25l-1.671875 0zm0 -4.78125l0 -6.140625l-4.25 6.140625l4.25 0zm6.5788574 8.78125l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m344.95538 583.24384l-0.6929016 39.18109" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m344.95538 583.24384l-0.5868225 33.182068" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m342.71707 616.39667l1.5712585 4.5665894l1.7316895 -4.5081787z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m344.2495 432.53018l0.6929321 28.59842" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m344.2495 432.53018l0.5475769 22.60019" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m343.14584 455.17038l1.7611694 4.496765l1.5413208 -4.576782z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m228.15486 310.02362l232.18898 0l0 58.992126l-232.18898 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m228.15486 310.02362l232.18898 0l0 58.992126l-232.18898 0z" fill-rule="evenodd"></path><path fill="#000000" d="m308.40952 336.94363l0 -13.59375l6.03125 0q1.8125 0 2.75 0.359375q0.953125 0.359375 1.515625 1.296875q0.5625 0.921875 0.5625 2.046875q0 1.453125 -0.9375 2.453125q-0.921875 0.984375 -2.890625 1.25q0.71875 0.34375 1.09375 0.671875q0.78125 0.734375 1.484375 1.8125l2.375 3.703125l-2.265625 0l-1.796875 -2.828125q-0.796875 -1.21875 -1.3125 -1.875q-0.5 -0.65625 -0.90625 -0.90625q-0.40625 -0.265625 -0.8125 -0.359375q-0.3125 -0.078125 -1.015625 -0.078125l-2.078125 0l0 6.046875l-1.796875 0zm1.796875 -7.59375l3.859375 0q1.234375 0 1.921875 -0.25q0.703125 -0.265625 1.0625 -0.828125q0.375 -0.5625 0.375 -1.21875q0 -0.96875 -0.703125 -1.578125q-0.703125 -0.625 -2.21875 -0.625l-4.296875 0l0 4.5zm18.176086 4.421875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm8.438202 2.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.0 2.9375l0 -13.59375l1.671875 0l0 4.875q1.171875 -1.359375 2.953125 -1.359375q1.09375 0 1.890625 0.4375q0.8125 0.421875 1.15625 1.1875q0.359375 0.765625 0.359375 2.203125l0 6.25l-1.671875 0l0 -6.25q0 -1.25 -0.546875 -1.8125q-0.546875 -0.578125 -1.53125 -0.578125q-0.75 0 -1.40625 0.390625q-0.640625 0.375 -0.921875 1.046875q-0.28125 0.65625 -0.28125 1.8125l0 5.390625l-1.671875 0zm16.813202 -1.21875q-0.9375 0.796875 -1.796875 1.125q-0.8593445 0.3125 -1.8437195 0.3125q-1.609375 0 -2.484375 -0.78125q-0.875 -0.796875 -0.875 -2.03125q0 -0.734375 0.328125 -1.328125q0.328125 -0.59375 0.859375 -0.953125q0.53125 -0.359375 1.203125 -0.546875q0.5 -0.140625 1.484375 -0.25q2.0312195 -0.25 2.9843445 -0.578125q0 -0.34375 0 -0.4375q0 -1.015625 -0.46875 -1.4375q-0.640625 -0.5625 -1.9062195 -0.5625q-1.171875 0 -1.734375 0.40625q-0.5625 0.40625 -0.828125 1.46875l-1.640625 -0.234375q0.234375 -1.046875 0.734375 -1.6875q0.515625 -0.640625 1.46875 -0.984375q0.96875 -0.359375 2.25 -0.359375q1.2655945 0 2.0468445 0.296875q0.78125 0.296875 1.15625 0.75q0.375 0.453125 0.515625 1.140625q0.09375 0.421875 0.09375 1.53125l0 2.234375q0 2.328125 0.09375 2.953125q0.109375 0.609375 0.4375 1.171875l-1.75 0q-0.265625 -0.515625 -0.328125 -1.21875zm-0.140625 -3.71875q-0.90625 0.359375 -2.7343445 0.625q-1.03125 0.140625 -1.453125 0.328125q-0.421875 0.1875 -0.65625 0.546875q-0.234375 0.359375 -0.234375 0.796875q0 0.671875 0.5 1.125q0.515625 0.4375 1.484375 0.4375q0.96875 0 1.7187195 -0.421875q0.75 -0.4375 1.109375 -1.15625q0.265625 -0.578125 0.265625 -1.671875l0 -0.609375zm4.0788574 8.71875l0 -13.640625l1.53125 0l0 1.28125q0.53125 -0.75 1.203125 -1.125q0.6875 -0.375 1.640625 -0.375q1.265625 0 2.234375 0.65625q0.96875 0.640625 1.453125 1.828125q0.5 1.1875 0.5 2.59375q0 1.515625 -0.546875 2.734375q-0.546875 1.203125 -1.578125 1.84375q-1.03125 0.640625 -2.171875 0.640625q-0.84375 0 -1.515625 -0.34375q-0.65625 -0.359375 -1.078125 -0.890625l0 4.796875l-1.671875 0zm1.515625 -8.65625q0 1.90625 0.765625 2.8125q0.78125 0.90625 1.875 0.90625q1.109375 0 1.890625 -0.9375q0.796875 -0.9375 0.796875 -2.921875q0 -1.875 -0.78125 -2.8125q-0.765625 -0.9375 -1.84375 -0.9375q-1.0625 0 -1.890625 1.0q-0.8125 1.0 -0.8125 2.890625zm15.610077 1.703125l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875z" fill-rule="nonzero"></path><path fill="#000000" d="m284.13696 362.94363q-1.375 -1.75 -2.328125 -4.078125q-0.953125 -2.34375 -0.953125 -4.84375q0 -2.21875 0.703125 -4.234375q0.84375 -2.34375 2.578125 -4.671875l1.203125 0q-1.125 1.921875 -1.484375 2.75q-0.5625 1.28125 -0.890625 2.671875q-0.40625 1.734375 -0.40625 3.484375q0 4.46875 2.78125 8.921875l-1.203125 0zm2.353302 -6.9375l1.65625 -0.265625q0.140625 1.0 0.765625 1.53125q0.640625 0.515625 1.78125 0.515625q1.15625 0 1.703125 -0.46875q0.5625 -0.46875 0.5625 -1.09375q0 -0.5625 -0.484375 -0.890625q-0.34375 -0.21875 -1.703125 -0.5625q-1.84375 -0.46875 -2.5625 -0.796875q-0.703125 -0.34375 -1.078125 -0.9375q-0.359375 -0.609375 -0.359375 -1.328125q0 -0.65625 0.296875 -1.21875q0.3125 -0.5625 0.828125 -0.9375q0.390625 -0.28125 1.0625 -0.484375q0.671875 -0.203125 1.4375 -0.203125q1.171875 0 2.046875 0.34375q0.875 0.328125 1.28125 0.90625q0.421875 0.5625 0.578125 1.515625l-1.625 0.21875q-0.109375 -0.75 -0.65625 -1.171875q-0.53125 -0.4375 -1.5 -0.4375q-1.15625 0 -1.640625 0.390625q-0.484375 0.375 -0.484375 0.875q0 0.328125 0.203125 0.59375q0.203125 0.265625 0.640625 0.4375q0.25 0.09375 1.46875 0.4375q1.765625 0.46875 2.46875 0.765625q0.703125 0.296875 1.09375 0.875q0.40625 0.578125 0.40625 1.4375q0 0.828125 -0.484375 1.578125q-0.484375 0.734375 -1.40625 1.140625q-0.921875 0.390625 -2.078125 0.390625q-1.921875 0 -2.9375 -0.796875q-1.0 -0.796875 -1.28125 -2.359375zm10.015625 -8.75l0 -1.90625l1.671875 0l0 1.90625l-1.671875 0zm0 11.6875l0 -9.859375l1.671875 0l0 9.859375l-1.671875 0zm3.254181 0l0 -1.359375l6.265625 -7.1875q-1.0625 0.046875 -1.875 0.046875l-4.015625 0l0 -1.359375l8.046875 0l0 1.109375l-5.34375 6.25l-1.015625 1.140625q1.109375 -0.078125 2.09375 -0.078125l4.5625 0l0 1.4375l-8.71875 0zm16.953125 -3.171875l1.71875 0.21875q-0.40625 1.5 -1.515625 2.34375q-1.09375 0.828125 -2.8125 0.828125q-2.15625 0 -3.421875 -1.328125q-1.265625 -1.328125 -1.265625 -3.734375q0 -2.484375 1.265625 -3.859375q1.28125 -1.375 3.328125 -1.375q1.984375 0 3.234375 1.34375q1.25 1.34375 1.25 3.796875q0 0.140625 -0.015625 0.4375l-7.34375 0q0.09375 1.625 0.921875 2.484375q0.828125 0.859375 2.0625 0.859375q0.90625 0 1.546875 -0.46875q0.65625 -0.484375 1.046875 -1.546875zm-5.484375 -2.703125l5.5 0q-0.109375 -1.234375 -0.625 -1.859375q-0.796875 -0.96875 -2.078125 -0.96875q-1.140625 0 -1.9375 0.78125q-0.78125 0.765625 -0.859375 2.046875zm23.074646 -2.125l-8.96875 0l0 -1.5625l8.96875 0l0 1.5625zm0 4.125l-8.96875 0l0 -1.546875l8.96875 0l0 1.546875zm7.3439026 7.65625l0 -17.375l3.671875 0l0 1.375l-2.015625 0l0 14.609375l2.015625 0l0 1.390625l-3.671875 0zm10.964539 -3.78125l-1.671875 0l0 -10.640625q-0.59375 0.578125 -1.578125 1.15625q-0.984375 0.5625 -1.765625 0.859375l0 -1.625q1.40625 -0.65625 2.453125 -1.59375q1.046875 -0.9375 1.484375 -1.8125l1.078125 0l0 13.65625zm4.985077 0l0 -1.90625l1.90625 0l0 1.90625q0 1.046875 -0.375 1.6875q-0.375 0.65625 -1.171875 1.0l-0.46875 -0.71875q0.53125 -0.21875 0.78125 -0.671875q0.25 -0.453125 0.28125 -1.296875l-0.953125 0zm9.585358 -11.8125l0 -1.609375l8.796875 0l0 1.296875q-1.296875 1.375 -2.578125 3.671875q-1.265625 2.296875 -1.96875 4.71875q-0.5 1.703125 -0.640625 3.734375l-1.71875 0q0.03125 -1.609375 0.625 -3.875q0.609375 -2.28125 1.734375 -4.390625q1.140625 -2.109375 2.40625 -3.546875l-6.65625 0zm10.250732 5.109375q0 -2.421875 0.5 -3.890625q0.5 -1.46875 1.46875 -2.265625q0.984375 -0.796875 2.46875 -0.796875q1.09375 0 1.921875 0.4375q0.828125 0.4375 1.359375 1.28125q0.546875 0.828125 0.84375 2.015625q0.3125 1.1875 0.3125 3.21875q0 2.390625 -0.5 3.859375q-0.484375 1.46875 -1.46875 2.28125q-0.96875 0.796875 -2.46875 0.796875q-1.96875 0 -3.078125 -1.40625q-1.359375 -1.703125 -1.359375 -5.53125zm1.71875 0q0 3.34375 0.78125 4.453125q0.796875 1.109375 1.9375 1.109375q1.15625 0 1.9375 -1.109375q0.78125 -1.125 0.78125 -4.453125q0 -3.359375 -0.78125 -4.46875q-0.78125 -1.109375 -1.953125 -1.109375q-1.15625 0 -1.828125 0.984375q-0.875 1.234375 -0.875 4.59375zm14.016327 6.703125l0 -3.25l-5.90625 0l0 -1.53125l6.21875 -8.8125l1.359375 0l0 8.8125l1.84375 0l0 1.53125l-1.84375 0l0 3.25l-1.671875 0zm0 -4.78125l0 -6.140625l-4.25 6.140625l4.25 0zm8.281982 8.5625l-3.6875 0l0 -1.390625l2.015625 0l0 -14.609375l-2.015625 0l0 -1.375l3.6875 0l0 17.375zm3.4801636 0.21875l-1.1875 0q2.765625 -4.453125 2.765625 -8.921875q0 -1.734375 -0.390625 -3.453125q-0.328125 -1.390625 -0.890625 -2.671875q-0.359375 -0.84375 -1.484375 -2.78125l1.1875 0q1.75 2.328125 2.578125 4.671875q0.71875 2.015625 0.71875 4.234375q0 2.5 -0.96875 4.84375q-0.953125 2.328125 -2.328125 4.078125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m344.24933 369.01575l0 21.417328" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m344.24933 369.01575l0 15.417328" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m342.59763 384.43307l1.6517029 4.538086l1.6517334 -4.538086z" fill-rule="evenodd"></path></g></svg>
+
diff --git a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
index b78e958e7f3a99993ab5e2cf487cfa73de8a74e8..3cda4bccccd0c30bb0ccfb82e1c80f7c6a7b9d84 100644
--- a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
+++ b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
@@ -108,7 +108,7 @@ enum {
  * The type of operations that can be added to a model.
  */
 enum {
-  /** Adds two tensors, elment-wise.
+  /** Adds two tensors, element-wise.
    *
    * Takes two input tensors of identical type and compatible dimensions. The
    * output is the sum of both input tensors, optionally modified by an
@@ -743,7 +743,7 @@ enum {
    */
   ANEURALNETWORKS_MAX_POOL_2D = 17,
 
-  /** Multiplies two tensors, elment-wise.
+  /** Multiplies two tensors, element-wise.
    *
    * Takes two input tensors of identical type and compatible dimensions. The
    * output is the product of both input tensors, optionally modified by an
@@ -1454,9 +1454,9 @@ inline int ANeuralNetworksModel_finish(ANeuralNetworksModel* model) {
  * {@link ANeuralNetworksExecution_setOutputFromMemory} and
  * {@link ANeuralNetworksExecution_setOperandValue}.
  *
- * To build a model that can accommodate inputs of various sizes, as you may want
- * to do for a CNN, set the size of the dimensions that will vary at run time to
- * 0. If you do so, provide the full dimensions when calling
+ * To build a model that can accommodate inputs of various sizes, as you may
+ * want to do for a CNN, set the size of the dimensions that will vary at run
+ * time to 0. If you do so, provide the full dimensions when calling
  * {@link ANeuralNetworksExecution_setInput} or {@link
  * ANeuralNetworksExecution_setInputFromMemory}.
  *
@@ -1774,7 +1774,7 @@ inline int ANeuralNetworksExecution_setInput(
  *             model. If the type is the same as specified when the model
  *             was built, NULL can be passed.
  * @param memory The memory containing the data.
- * @param offset This specifies the location of the data whithin the memory.
+ * @param offset This specifies the location of the data within the memory.
  *               The offset is in bytes from the start of memory.
  * @param length The size in bytes of the data value.
  *
@@ -1841,7 +1841,7 @@ inline int ANeuralNetworksExecution_setOutput(
  *             model. If the type is the same as specified when the model
  *             was built, NULL can be passed.
  * @param memory The memory where the data is to be stored.
- * @param offset This specifies the location of the data whithin the memory.
+ * @param offset This specifies the location of the data within the memory.
  *               The offset is in bytes from the start of memory.
  * @param length The length in bytes of the data value.
  *
diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc
index 6a199cc8406c73f822b813603e55b0ba1994a235..b3602f799e7d05bcd837135ca60cb410ac1a4fe4 100644
--- a/tensorflow/contrib/lite/nnapi_delegate.cc
+++ b/tensorflow/contrib/lite/nnapi_delegate.cc
@@ -161,6 +161,14 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
       augmented_inputs.push_back(next_id++);
     };
 
+    auto duplicate_state_tensor_float32 =
+        [interpreter, &nn_model, &augmented_inputs, &next_id](int tensor_id) {
+          const TfLiteTensor* tensor = interpreter->tensor(tensor_id);
+          CHECK_NN(ANeuralNetworksModel_setOperandValue(
+              nn_model, tensor_id, tensor->data.raw, tensor->bytes));
+          augmented_inputs.push_back(tensor_id);
+        };
+
     auto add_add_params = [&add_scalar_int32]() { add_scalar_int32(0); };
 
     auto add_pooling_params = [&add_scalar_int32](void* data) {
@@ -208,6 +216,19 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
       add_scalar_float32(builtin->beta);
     };
 
+    auto add_space_to_depth_params = [&add_scalar_int32](void* data) {
+      auto builtin = reinterpret_cast<TfLiteSpaceToDepthParams*>(data);
+      add_scalar_int32(builtin->block_size);
+    };
+
+    auto add_lstm_params = [&add_scalar_int32,
+                            &add_scalar_float32](void* data) {
+      auto builtin = reinterpret_cast<TfLiteLSTMParams*>(data);
+      add_scalar_int32(builtin->activation);
+      add_scalar_float32(builtin->cell_clip);
+      add_scalar_float32(builtin->proj_clip);
+    };
+
 #if 0
     auto add_reshape_params = [&](void* data) {
       auto builtin = reinterpret_cast<TfLiteReshapeParams*>(data);
@@ -280,22 +301,43 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
         nn_op_type = ANEURALNETWORKS_RESHAPE;
         // add_reshape_params(node.builtin_data);
         break;
+      case tflite::BuiltinOperator_SPACE_TO_DEPTH:
+        add_space_to_depth_params(node.builtin_data);
+        nn_op_type = ANEURALNETWORKS_SPACE_TO_DEPTH;
+        break;
+      case tflite::BuiltinOperator_LSTM: {
+        duplicate_state_tensor_float32(
+            node.outputs->data[/*kOutputStateTensor*/ 1]);
+        duplicate_state_tensor_float32(
+            node.outputs->data[/*kCellStateTensor*/ 2]);
+        add_lstm_params(node.builtin_data);
+        nn_op_type = ANEURALNETWORKS_LSTM;
+        break;
+      }
       case tflite::BuiltinOperator_CONCAT_EMBEDDINGS:
       case tflite::BuiltinOperator_LSH_PROJECTION:
       case tflite::BuiltinOperator_SVDF:
       case tflite::BuiltinOperator_HASHTABLE_LOOKUP:
       case tflite::BuiltinOperator_RNN:
+      case tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN:
       case tflite::BuiltinOperator_EMBEDDING_LOOKUP:
       case tflite::BuiltinOperator_EMBEDDING_LOOKUP_SPARSE:
-      case tflite::BuiltinOperator_LSTM:
       case tflite::BuiltinOperator_L2_NORMALIZATION:
       case tflite::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION:
       case tflite::BuiltinOperator_MUL:
+      case tflite::BuiltinOperator_PAD:
       case tflite::BuiltinOperator_RESIZE_BILINEAR:
       case tflite::BuiltinOperator_CALL:
       case tflite::BuiltinOperator_SKIP_GRAM:
-      case tflite::BuiltinOperator_RELU1:
-      case tflite::BuiltinOperator_SPACE_TO_DEPTH:
+      case tflite::BuiltinOperator_RELU_N1_TO_1:
+      case tflite::BuiltinOperator_GATHER:
+      case tflite::BuiltinOperator_SPACE_TO_BATCH_ND:
+      case tflite::BuiltinOperator_BATCH_TO_SPACE_ND:
+      case tflite::BuiltinOperator_TRANSPOSE:
+      case tflite::BuiltinOperator_MEAN:
+      case tflite::BuiltinOperator_DIV:
+      case tflite::BuiltinOperator_SUB:
+      case tflite::BuiltinOperator_SQUEEZE:
         FATAL("Op code %d is currently not delegated to NNAPI", builtin);
         nn_op_type = -1;  // set to invalid
         break;
diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD
index 89e8693490dcec79e7a117073696e57a9060e68f..3d6a3ec0fd4c673f601254b19452bbf8b9454e27 100644
--- a/tensorflow/contrib/lite/python/BUILD
+++ b/tensorflow/contrib/lite/python/BUILD
@@ -24,6 +24,7 @@ py_test(
     name = "lite_test",
     srcs = ["lite_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_oss"],
     deps = [
         ":lite",
         "//tensorflow/python:array_ops",
diff --git a/tensorflow/contrib/lite/python/lite.py b/tensorflow/contrib/lite/python/lite.py
index 0fd70f842b9db0e6ef48480e79cc6bb59840761c..4d87a5907b1335794e57689f144e03747cec9e70 100644
--- a/tensorflow/contrib/lite/python/lite.py
+++ b/tensorflow/contrib/lite/python/lite.py
@@ -50,7 +50,7 @@ GRAPHVIZ_DOT = _toco_flags_pb2.GRAPHVIZ_DOT
 # to protect against crashes. However, it breaks some dependent targets because
 # it forces us to depend on an external py_binary. The experimental API doesn't
 # have that drawback.
-EXPERIMENTAL_USE_TOCO_API_DIRECTLY = True
+EXPERIMENTAL_USE_TOCO_API_DIRECTLY = False
 
 # Find the toco_from_protos binary using the resource loader if using from
 # bazel, otherwise we are in a pip where console_scripts already has
@@ -184,10 +184,10 @@ def toco_convert(input_data,
     if inference_type == QUANTIZED_UINT8:
       if tflite_input_type == FLOAT:
         tflite_input_type = QUANTIZED_UINT8
-      input_array.mean, input_array.std = quantized_input_stats[idx]
+      input_array.mean_value, input_array.std_value = quantized_input_stats[idx]
 
     input_array.name = _tensor_name(input_tensor)
-    input_array.shape.extend(map(int, input_tensor.get_shape()))
+    input_array.shape.dims.extend(map(int, input_tensor.get_shape()))
     toco.inference_input_type = tflite_input_type
 
   for output_tensor in output_tensors:
diff --git a/tensorflow/contrib/lite/python/lite_test.py b/tensorflow/contrib/lite/python/lite_test.py
index da360aeb344ab9c4eb183d84e9b5f60ba715c6e8..7d55f3fe6fe41a5d9e4e57c7a8e664bba6887fc7 100644
--- a/tensorflow/contrib/lite/python/lite_test.py
+++ b/tensorflow/contrib/lite/python/lite_test.py
@@ -40,6 +40,16 @@ class LiteTest(test_util.TensorFlowTestCase):
     # with self.assertRaisesRegexp(RuntimeError, "!model->operators.empty()"):
     #   result = lite.toco_convert(sess.graph_def, [in_tensor], [in_tensor])
 
+  def testQuantization(self):
+    in_tensor = array_ops.placeholder(shape=[1, 16, 16, 3],
+                                      dtype=dtypes.float32)
+    out_tensor = array_ops.fake_quant_with_min_max_args(in_tensor + in_tensor,
+                                                        min=0., max=1.)
+    sess = session.Session()
+    result = lite.toco_convert(sess.graph_def, [in_tensor], [out_tensor],
+                               inference_type=lite.QUANTIZED_UINT8,
+                               quantized_input_stats=[(0., 1.)])
+    self.assertTrue(result)
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs
index ddb2ab792c520eb245445532f534ebce8a9f1280..260a87c93bf2886de5f951af9f3fd20d4c33bb83 100644
--- a/tensorflow/contrib/lite/schema/schema.fbs
+++ b/tensorflow/contrib/lite/schema/schema.fbs
@@ -47,8 +47,8 @@ table QuantizationParameters {
 
 table Tensor {
   // The tensor shape. The meaning of each entry is operator-specific but
-  // builtin ops use: [batch size, number of channels, height, width] (That's
-  // Tensorflow's NCHW).
+  // builtin ops use: [batch size, height, width, number of channels] (That's
+  // Tensorflow's NHWC).
   shape:[int];
   type:TensorType;
   // An index that refers to the buffers table at the root of the model. Or,
@@ -89,7 +89,10 @@ enum BuiltinOperator : byte {
   MAX_POOL_2D = 17,
   MUL = 18,
   RELU = 19,
-  RELU1 = 20,
+  // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+  // since different model developers use RELU1 in different ways. Never
+  // create another op called RELU1.
+  RELU_N1_TO_1 = 20,
   RELU6 = 21,
   RESHAPE = 22,
   RESIZE_BILINEAR = 23,
@@ -104,6 +107,16 @@ enum BuiltinOperator : byte {
   CALL = 31,
   CUSTOM = 32,
   EMBEDDING_LOOKUP_SPARSE = 33,
+  PAD = 34,
+  UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+  GATHER = 36,
+  BATCH_TO_SPACE_ND = 37,
+  SPACE_TO_BATCH_ND = 38,
+  TRANSPOSE = 39,
+  MEAN = 40,
+  SUB = 41,
+  DIV = 42,
+  SQUEEZE = 43,
 }
 
 // Options for the builtin operators.
@@ -129,6 +142,16 @@ union BuiltinOptions {
   SpaceToDepthOptions,
   EmbeddingLookupSparseOptions,
   MulOptions,
+  PadOptions,
+  GatherOptions,
+  BatchToSpaceNDOptions,
+  SpaceToBatchNDOptions,
+  TransposeOptions,
+  MeanOptions,
+  SubOptions,
+  DivOptions,
+  SqueezeOptions,
+  SequenceRNNOptions,
 }
 
 enum Padding : byte { SAME, VALID }
@@ -136,7 +159,7 @@ enum Padding : byte { SAME, VALID }
 enum ActivationFunctionType : byte {
   NONE = 0,
   RELU = 1,
-  RELU1 = 2,
+  RELU_N1_TO_1 = 2,
   RELU6 = 3,
   TANH = 4,
   SIGN_BIT = 5,
@@ -192,6 +215,12 @@ table RNNOptions {
   fused_activation_function:ActivationFunctionType;
 }
 
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+}
+
 // An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
 table FullyConnectedOptions {
   fused_activation_function:ActivationFunctionType;
@@ -244,10 +273,27 @@ table CallOptions {
   subgraph:uint;
 }
 
+table PadOptions {
+  before_padding:[int];
+  after_padding:[int];
+}
+
 table ReshapeOptions {
   new_shape:[int];
 }
 
+table SpaceToBatchNDOptions {
+  block_shape:[int];
+  before_paddings:[int];
+  after_paddings:[int];
+}
+
+table BatchToSpaceNDOptions {
+  block_shape:[int];
+  before_crops:[int];
+  after_crops:[int];
+}
+
 table SkipGramOptions {
   ngram_size: int;
   max_skip_size: int;
@@ -258,6 +304,14 @@ table SpaceToDepthOptions {
   block_size: int;
 }
 
+table SubOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table DivOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
 enum CombinerType : byte {
   SUM = 0,
   MEAN = 1,
@@ -268,6 +322,23 @@ table EmbeddingLookupSparseOptions {
   combiner:CombinerType;
 }
 
+table GatherOptions {
+  axis: int;
+}
+
+table TransposeOptions {
+  perm:[int];
+}
+
+table MeanOptions {
+  axis:[int];
+  keep_dims: bool;
+}
+
+table SqueezeOptions {
+  squeeze_dims:[int];
+}
+
 // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
 // builtin, or a string if the operator is custom.
 table OperatorCode {
@@ -343,4 +414,3 @@ table Model {
 }
 
 root_type Model;
-
diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h
index df460ab9a32f1d80c0788649e799778db8050b7f..fd98be8f70ee06024142cb8c2099fc07ffebcb87 100755
--- a/tensorflow/contrib/lite/schema/schema_generated.h
+++ b/tensorflow/contrib/lite/schema/schema_generated.h
@@ -1,5 +1,18 @@
-// automatically generated by the FlatBuffers compiler, do not modify
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
 
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+// automatically generated by the FlatBuffers compiler, do not modify
 
 #ifndef FLATBUFFERS_GENERATED_SCHEMA_TFLITE_H_
 #define FLATBUFFERS_GENERATED_SCHEMA_TFLITE_H_
@@ -35,6 +48,9 @@ struct SVDFOptionsT;
 struct RNNOptions;
 struct RNNOptionsT;
 
+struct SequenceRNNOptions;
+struct SequenceRNNOptionsT;
+
 struct FullyConnectedOptions;
 struct FullyConnectedOptionsT;
 
@@ -65,18 +81,45 @@ struct ResizeBilinearOptionsT;
 struct CallOptions;
 struct CallOptionsT;
 
+struct PadOptions;
+struct PadOptionsT;
+
 struct ReshapeOptions;
 struct ReshapeOptionsT;
 
+struct SpaceToBatchNDOptions;
+struct SpaceToBatchNDOptionsT;
+
+struct BatchToSpaceNDOptions;
+struct BatchToSpaceNDOptionsT;
+
 struct SkipGramOptions;
 struct SkipGramOptionsT;
 
 struct SpaceToDepthOptions;
 struct SpaceToDepthOptionsT;
 
+struct SubOptions;
+struct SubOptionsT;
+
+struct DivOptions;
+struct DivOptionsT;
+
 struct EmbeddingLookupSparseOptions;
 struct EmbeddingLookupSparseOptionsT;
 
+struct GatherOptions;
+struct GatherOptionsT;
+
+struct TransposeOptions;
+struct TransposeOptionsT;
+
+struct MeanOptions;
+struct MeanOptionsT;
+
+struct SqueezeOptions;
+struct SqueezeOptionsT;
+
 struct OperatorCode;
 struct OperatorCodeT;
 
@@ -104,27 +147,15 @@ enum TensorType {
 };
 
 inline TensorType (&EnumValuesTensorType())[6] {
-  static TensorType values[] = {
-    TensorType_FLOAT32,
-    TensorType_FLOAT16,
-    TensorType_INT32,
-    TensorType_UINT8,
-    TensorType_INT64,
-    TensorType_STRING
-  };
+  static TensorType values[] = {TensorType_FLOAT32, TensorType_FLOAT16,
+                                TensorType_INT32,   TensorType_UINT8,
+                                TensorType_INT64,   TensorType_STRING};
   return values;
 }
 
 inline const char **EnumNamesTensorType() {
-  static const char *names[] = {
-    "FLOAT32",
-    "FLOAT16",
-    "INT32",
-    "UINT8",
-    "INT64",
-    "STRING",
-    nullptr
-  };
+  static const char *names[] = {"FLOAT32", "FLOAT16", "INT32", "UINT8",
+                                "INT64",   "STRING",  nullptr};
   return names;
 }
 
@@ -151,7 +182,7 @@ enum BuiltinOperator {
   BuiltinOperator_MAX_POOL_2D = 17,
   BuiltinOperator_MUL = 18,
   BuiltinOperator_RELU = 19,
-  BuiltinOperator_RELU1 = 20,
+  BuiltinOperator_RELU_N1_TO_1 = 20,
   BuiltinOperator_RELU6 = 21,
   BuiltinOperator_RESHAPE = 22,
   BuiltinOperator_RESIZE_BILINEAR = 23,
@@ -165,85 +196,112 @@ enum BuiltinOperator {
   BuiltinOperator_CALL = 31,
   BuiltinOperator_CUSTOM = 32,
   BuiltinOperator_EMBEDDING_LOOKUP_SPARSE = 33,
+  BuiltinOperator_PAD = 34,
+  BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+  BuiltinOperator_GATHER = 36,
+  BuiltinOperator_BATCH_TO_SPACE_ND = 37,
+  BuiltinOperator_SPACE_TO_BATCH_ND = 38,
+  BuiltinOperator_TRANSPOSE = 39,
+  BuiltinOperator_MEAN = 40,
+  BuiltinOperator_SUB = 41,
+  BuiltinOperator_DIV = 42,
+  BuiltinOperator_SQUEEZE = 43,
   BuiltinOperator_MIN = BuiltinOperator_ADD,
-  BuiltinOperator_MAX = BuiltinOperator_EMBEDDING_LOOKUP_SPARSE
+  BuiltinOperator_MAX = BuiltinOperator_SQUEEZE
 };
 
-inline BuiltinOperator (&EnumValuesBuiltinOperator())[31] {
+inline BuiltinOperator (&EnumValuesBuiltinOperator())[41] {
   static BuiltinOperator values[] = {
-    BuiltinOperator_ADD,
-    BuiltinOperator_AVERAGE_POOL_2D,
-    BuiltinOperator_CONCATENATION,
-    BuiltinOperator_CONV_2D,
-    BuiltinOperator_DEPTHWISE_CONV_2D,
-    BuiltinOperator_EMBEDDING_LOOKUP,
-    BuiltinOperator_FULLY_CONNECTED,
-    BuiltinOperator_HASHTABLE_LOOKUP,
-    BuiltinOperator_L2_NORMALIZATION,
-    BuiltinOperator_L2_POOL_2D,
-    BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION,
-    BuiltinOperator_LOGISTIC,
-    BuiltinOperator_LSH_PROJECTION,
-    BuiltinOperator_LSTM,
-    BuiltinOperator_MAX_POOL_2D,
-    BuiltinOperator_MUL,
-    BuiltinOperator_RELU,
-    BuiltinOperator_RELU1,
-    BuiltinOperator_RELU6,
-    BuiltinOperator_RESHAPE,
-    BuiltinOperator_RESIZE_BILINEAR,
-    BuiltinOperator_RNN,
-    BuiltinOperator_SOFTMAX,
-    BuiltinOperator_SPACE_TO_DEPTH,
-    BuiltinOperator_SVDF,
-    BuiltinOperator_TANH,
-    BuiltinOperator_CONCAT_EMBEDDINGS,
-    BuiltinOperator_SKIP_GRAM,
-    BuiltinOperator_CALL,
-    BuiltinOperator_CUSTOM,
-    BuiltinOperator_EMBEDDING_LOOKUP_SPARSE
-  };
+      BuiltinOperator_ADD,
+      BuiltinOperator_AVERAGE_POOL_2D,
+      BuiltinOperator_CONCATENATION,
+      BuiltinOperator_CONV_2D,
+      BuiltinOperator_DEPTHWISE_CONV_2D,
+      BuiltinOperator_EMBEDDING_LOOKUP,
+      BuiltinOperator_FULLY_CONNECTED,
+      BuiltinOperator_HASHTABLE_LOOKUP,
+      BuiltinOperator_L2_NORMALIZATION,
+      BuiltinOperator_L2_POOL_2D,
+      BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION,
+      BuiltinOperator_LOGISTIC,
+      BuiltinOperator_LSH_PROJECTION,
+      BuiltinOperator_LSTM,
+      BuiltinOperator_MAX_POOL_2D,
+      BuiltinOperator_MUL,
+      BuiltinOperator_RELU,
+      BuiltinOperator_RELU_N1_TO_1,
+      BuiltinOperator_RELU6,
+      BuiltinOperator_RESHAPE,
+      BuiltinOperator_RESIZE_BILINEAR,
+      BuiltinOperator_RNN,
+      BuiltinOperator_SOFTMAX,
+      BuiltinOperator_SPACE_TO_DEPTH,
+      BuiltinOperator_SVDF,
+      BuiltinOperator_TANH,
+      BuiltinOperator_CONCAT_EMBEDDINGS,
+      BuiltinOperator_SKIP_GRAM,
+      BuiltinOperator_CALL,
+      BuiltinOperator_CUSTOM,
+      BuiltinOperator_EMBEDDING_LOOKUP_SPARSE,
+      BuiltinOperator_PAD,
+      BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN,
+      BuiltinOperator_GATHER,
+      BuiltinOperator_BATCH_TO_SPACE_ND,
+      BuiltinOperator_SPACE_TO_BATCH_ND,
+      BuiltinOperator_TRANSPOSE,
+      BuiltinOperator_MEAN,
+      BuiltinOperator_SUB,
+      BuiltinOperator_DIV,
+      BuiltinOperator_SQUEEZE};
   return values;
 }
 
 inline const char **EnumNamesBuiltinOperator() {
-  static const char *names[] = {
-    "ADD",
-    "AVERAGE_POOL_2D",
-    "CONCATENATION",
-    "CONV_2D",
-    "DEPTHWISE_CONV_2D",
-    "",
-    "",
-    "EMBEDDING_LOOKUP",
-    "",
-    "FULLY_CONNECTED",
-    "HASHTABLE_LOOKUP",
-    "L2_NORMALIZATION",
-    "L2_POOL_2D",
-    "LOCAL_RESPONSE_NORMALIZATION",
-    "LOGISTIC",
-    "LSH_PROJECTION",
-    "LSTM",
-    "MAX_POOL_2D",
-    "MUL",
-    "RELU",
-    "RELU1",
-    "RELU6",
-    "RESHAPE",
-    "RESIZE_BILINEAR",
-    "RNN",
-    "SOFTMAX",
-    "SPACE_TO_DEPTH",
-    "SVDF",
-    "TANH",
-    "CONCAT_EMBEDDINGS",
-    "SKIP_GRAM",
-    "CALL",
-    "CUSTOM",
-    "EMBEDDING_LOOKUP_SPARSE",
-    nullptr
-  };
+  static const char *names[] = {"ADD",
+                                "AVERAGE_POOL_2D",
+                                "CONCATENATION",
+                                "CONV_2D",
+                                "DEPTHWISE_CONV_2D",
+                                "",
+                                "",
+                                "EMBEDDING_LOOKUP",
+                                "",
+                                "FULLY_CONNECTED",
+                                "HASHTABLE_LOOKUP",
+                                "L2_NORMALIZATION",
+                                "L2_POOL_2D",
+                                "LOCAL_RESPONSE_NORMALIZATION",
+                                "LOGISTIC",
+                                "LSH_PROJECTION",
+                                "LSTM",
+                                "MAX_POOL_2D",
+                                "MUL",
+                                "RELU",
+                                "RELU_N1_TO_1",
+                                "RELU6",
+                                "RESHAPE",
+                                "RESIZE_BILINEAR",
+                                "RNN",
+                                "SOFTMAX",
+                                "SPACE_TO_DEPTH",
+                                "SVDF",
+                                "TANH",
+                                "CONCAT_EMBEDDINGS",
+                                "SKIP_GRAM",
+                                "CALL",
+                                "CUSTOM",
+                                "EMBEDDING_LOOKUP_SPARSE",
+                                "PAD",
+                                "UNIDIRECTIONAL_SEQUENCE_RNN",
+                                "GATHER",
+                                "BATCH_TO_SPACE_ND",
+                                "SPACE_TO_BATCH_ND",
+                                "TRANSPOSE",
+                                "MEAN",
+                                "SUB",
+                                "DIV",
+                                "SQUEEZE",
+                                nullptr};
   return names;
 }
 
@@ -275,64 +333,91 @@ enum BuiltinOptions {
   BuiltinOptions_SpaceToDepthOptions = 19,
   BuiltinOptions_EmbeddingLookupSparseOptions = 20,
   BuiltinOptions_MulOptions = 21,
+  BuiltinOptions_PadOptions = 22,
+  BuiltinOptions_GatherOptions = 23,
+  BuiltinOptions_BatchToSpaceNDOptions = 24,
+  BuiltinOptions_SpaceToBatchNDOptions = 25,
+  BuiltinOptions_TransposeOptions = 26,
+  BuiltinOptions_MeanOptions = 27,
+  BuiltinOptions_SubOptions = 28,
+  BuiltinOptions_DivOptions = 29,
+  BuiltinOptions_SqueezeOptions = 30,
+  BuiltinOptions_SequenceRNNOptions = 31,
   BuiltinOptions_MIN = BuiltinOptions_NONE,
-  BuiltinOptions_MAX = BuiltinOptions_MulOptions
+  BuiltinOptions_MAX = BuiltinOptions_SequenceRNNOptions
 };
 
-inline BuiltinOptions (&EnumValuesBuiltinOptions())[22] {
+inline BuiltinOptions (&EnumValuesBuiltinOptions())[32] {
   static BuiltinOptions values[] = {
-    BuiltinOptions_NONE,
-    BuiltinOptions_Conv2DOptions,
-    BuiltinOptions_DepthwiseConv2DOptions,
-    BuiltinOptions_ConcatEmbeddingsOptions,
-    BuiltinOptions_LSHProjectionOptions,
-    BuiltinOptions_Pool2DOptions,
-    BuiltinOptions_SVDFOptions,
-    BuiltinOptions_RNNOptions,
-    BuiltinOptions_FullyConnectedOptions,
-    BuiltinOptions_SoftmaxOptions,
-    BuiltinOptions_ConcatenationOptions,
-    BuiltinOptions_AddOptions,
-    BuiltinOptions_L2NormOptions,
-    BuiltinOptions_LocalResponseNormalizationOptions,
-    BuiltinOptions_LSTMOptions,
-    BuiltinOptions_ResizeBilinearOptions,
-    BuiltinOptions_CallOptions,
-    BuiltinOptions_ReshapeOptions,
-    BuiltinOptions_SkipGramOptions,
-    BuiltinOptions_SpaceToDepthOptions,
-    BuiltinOptions_EmbeddingLookupSparseOptions,
-    BuiltinOptions_MulOptions
-  };
+      BuiltinOptions_NONE,
+      BuiltinOptions_Conv2DOptions,
+      BuiltinOptions_DepthwiseConv2DOptions,
+      BuiltinOptions_ConcatEmbeddingsOptions,
+      BuiltinOptions_LSHProjectionOptions,
+      BuiltinOptions_Pool2DOptions,
+      BuiltinOptions_SVDFOptions,
+      BuiltinOptions_RNNOptions,
+      BuiltinOptions_FullyConnectedOptions,
+      BuiltinOptions_SoftmaxOptions,
+      BuiltinOptions_ConcatenationOptions,
+      BuiltinOptions_AddOptions,
+      BuiltinOptions_L2NormOptions,
+      BuiltinOptions_LocalResponseNormalizationOptions,
+      BuiltinOptions_LSTMOptions,
+      BuiltinOptions_ResizeBilinearOptions,
+      BuiltinOptions_CallOptions,
+      BuiltinOptions_ReshapeOptions,
+      BuiltinOptions_SkipGramOptions,
+      BuiltinOptions_SpaceToDepthOptions,
+      BuiltinOptions_EmbeddingLookupSparseOptions,
+      BuiltinOptions_MulOptions,
+      BuiltinOptions_PadOptions,
+      BuiltinOptions_GatherOptions,
+      BuiltinOptions_BatchToSpaceNDOptions,
+      BuiltinOptions_SpaceToBatchNDOptions,
+      BuiltinOptions_TransposeOptions,
+      BuiltinOptions_MeanOptions,
+      BuiltinOptions_SubOptions,
+      BuiltinOptions_DivOptions,
+      BuiltinOptions_SqueezeOptions,
+      BuiltinOptions_SequenceRNNOptions};
   return values;
 }
 
 inline const char **EnumNamesBuiltinOptions() {
-  static const char *names[] = {
-    "NONE",
-    "Conv2DOptions",
-    "DepthwiseConv2DOptions",
-    "ConcatEmbeddingsOptions",
-    "LSHProjectionOptions",
-    "Pool2DOptions",
-    "SVDFOptions",
-    "RNNOptions",
-    "FullyConnectedOptions",
-    "SoftmaxOptions",
-    "ConcatenationOptions",
-    "AddOptions",
-    "L2NormOptions",
-    "LocalResponseNormalizationOptions",
-    "LSTMOptions",
-    "ResizeBilinearOptions",
-    "CallOptions",
-    "ReshapeOptions",
-    "SkipGramOptions",
-    "SpaceToDepthOptions",
-    "EmbeddingLookupSparseOptions",
-    "MulOptions",
-    nullptr
-  };
+  static const char *names[] = {"NONE",
+                                "Conv2DOptions",
+                                "DepthwiseConv2DOptions",
+                                "ConcatEmbeddingsOptions",
+                                "LSHProjectionOptions",
+                                "Pool2DOptions",
+                                "SVDFOptions",
+                                "RNNOptions",
+                                "FullyConnectedOptions",
+                                "SoftmaxOptions",
+                                "ConcatenationOptions",
+                                "AddOptions",
+                                "L2NormOptions",
+                                "LocalResponseNormalizationOptions",
+                                "LSTMOptions",
+                                "ResizeBilinearOptions",
+                                "CallOptions",
+                                "ReshapeOptions",
+                                "SkipGramOptions",
+                                "SpaceToDepthOptions",
+                                "EmbeddingLookupSparseOptions",
+                                "MulOptions",
+                                "PadOptions",
+                                "GatherOptions",
+                                "BatchToSpaceNDOptions",
+                                "SpaceToBatchNDOptions",
+                                "TransposeOptions",
+                                "MeanOptions",
+                                "SubOptions",
+                                "DivOptions",
+                                "SqueezeOptions",
+                                "SequenceRNNOptions",
+                                nullptr};
   return names;
 }
 
@@ -341,114 +426,201 @@ inline const char *EnumNameBuiltinOptions(BuiltinOptions e) {
   return EnumNamesBuiltinOptions()[index];
 }
 
-template<typename T> struct BuiltinOptionsTraits {
+template <typename T>
+struct BuiltinOptionsTraits {
   static const BuiltinOptions enum_value = BuiltinOptions_NONE;
 };
 
-template<> struct BuiltinOptionsTraits<Conv2DOptions> {
+template <>
+struct BuiltinOptionsTraits<Conv2DOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_Conv2DOptions;
 };
 
-template<> struct BuiltinOptionsTraits<DepthwiseConv2DOptions> {
-  static const BuiltinOptions enum_value = BuiltinOptions_DepthwiseConv2DOptions;
+template <>
+struct BuiltinOptionsTraits<DepthwiseConv2DOptions> {
+  static const BuiltinOptions enum_value =
+      BuiltinOptions_DepthwiseConv2DOptions;
 };
 
-template<> struct BuiltinOptionsTraits<ConcatEmbeddingsOptions> {
-  static const BuiltinOptions enum_value = BuiltinOptions_ConcatEmbeddingsOptions;
+template <>
+struct BuiltinOptionsTraits<ConcatEmbeddingsOptions> {
+  static const BuiltinOptions enum_value =
+      BuiltinOptions_ConcatEmbeddingsOptions;
 };
 
-template<> struct BuiltinOptionsTraits<LSHProjectionOptions> {
+template <>
+struct BuiltinOptionsTraits<LSHProjectionOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_LSHProjectionOptions;
 };
 
-template<> struct BuiltinOptionsTraits<Pool2DOptions> {
+template <>
+struct BuiltinOptionsTraits<Pool2DOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_Pool2DOptions;
 };
 
-template<> struct BuiltinOptionsTraits<SVDFOptions> {
+template <>
+struct BuiltinOptionsTraits<SVDFOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_SVDFOptions;
 };
 
-template<> struct BuiltinOptionsTraits<RNNOptions> {
+template <>
+struct BuiltinOptionsTraits<RNNOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_RNNOptions;
 };
 
-template<> struct BuiltinOptionsTraits<FullyConnectedOptions> {
+template <>
+struct BuiltinOptionsTraits<FullyConnectedOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_FullyConnectedOptions;
 };
 
-template<> struct BuiltinOptionsTraits<SoftmaxOptions> {
+template <>
+struct BuiltinOptionsTraits<SoftmaxOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_SoftmaxOptions;
 };
 
-template<> struct BuiltinOptionsTraits<ConcatenationOptions> {
+template <>
+struct BuiltinOptionsTraits<ConcatenationOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_ConcatenationOptions;
 };
 
-template<> struct BuiltinOptionsTraits<AddOptions> {
+template <>
+struct BuiltinOptionsTraits<AddOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_AddOptions;
 };
 
-template<> struct BuiltinOptionsTraits<L2NormOptions> {
+template <>
+struct BuiltinOptionsTraits<L2NormOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_L2NormOptions;
 };
 
-template<> struct BuiltinOptionsTraits<LocalResponseNormalizationOptions> {
-  static const BuiltinOptions enum_value = BuiltinOptions_LocalResponseNormalizationOptions;
+template <>
+struct BuiltinOptionsTraits<LocalResponseNormalizationOptions> {
+  static const BuiltinOptions enum_value =
+      BuiltinOptions_LocalResponseNormalizationOptions;
 };
 
-template<> struct BuiltinOptionsTraits<LSTMOptions> {
+template <>
+struct BuiltinOptionsTraits<LSTMOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_LSTMOptions;
 };
 
-template<> struct BuiltinOptionsTraits<ResizeBilinearOptions> {
+template <>
+struct BuiltinOptionsTraits<ResizeBilinearOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_ResizeBilinearOptions;
 };
 
-template<> struct BuiltinOptionsTraits<CallOptions> {
+template <>
+struct BuiltinOptionsTraits<CallOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_CallOptions;
 };
 
-template<> struct BuiltinOptionsTraits<ReshapeOptions> {
+template <>
+struct BuiltinOptionsTraits<ReshapeOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_ReshapeOptions;
 };
 
-template<> struct BuiltinOptionsTraits<SkipGramOptions> {
+template <>
+struct BuiltinOptionsTraits<SkipGramOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_SkipGramOptions;
 };
 
-template<> struct BuiltinOptionsTraits<SpaceToDepthOptions> {
+template <>
+struct BuiltinOptionsTraits<SpaceToDepthOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_SpaceToDepthOptions;
 };
 
-template<> struct BuiltinOptionsTraits<EmbeddingLookupSparseOptions> {
-  static const BuiltinOptions enum_value = BuiltinOptions_EmbeddingLookupSparseOptions;
+template <>
+struct BuiltinOptionsTraits<EmbeddingLookupSparseOptions> {
+  static const BuiltinOptions enum_value =
+      BuiltinOptions_EmbeddingLookupSparseOptions;
 };
 
-template<> struct BuiltinOptionsTraits<MulOptions> {
+template <>
+struct BuiltinOptionsTraits<MulOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_MulOptions;
 };
 
+template <>
+struct BuiltinOptionsTraits<PadOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_PadOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<GatherOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_GatherOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<BatchToSpaceNDOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_BatchToSpaceNDOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<SpaceToBatchNDOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SpaceToBatchNDOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<TransposeOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_TransposeOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<MeanOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_MeanOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<SubOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SubOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<DivOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_DivOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<SqueezeOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SqueezeOptions;
+};
+
+template <>
+struct BuiltinOptionsTraits<SequenceRNNOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SequenceRNNOptions;
+};
+
 struct BuiltinOptionsUnion {
   BuiltinOptions type;
   void *value;
 
   BuiltinOptionsUnion() : type(BuiltinOptions_NONE), value(nullptr) {}
-  BuiltinOptionsUnion(BuiltinOptionsUnion&& u) FLATBUFFERS_NOEXCEPT :
-    type(BuiltinOptions_NONE), value(nullptr)
-    { std::swap(type, u.type); std::swap(value, u.value); }
+  BuiltinOptionsUnion(BuiltinOptionsUnion &&u) FLATBUFFERS_NOEXCEPT
+      : type(BuiltinOptions_NONE),
+        value(nullptr) {
+    std::swap(type, u.type);
+    std::swap(value, u.value);
+  }
   BuiltinOptionsUnion(const BuiltinOptionsUnion &) FLATBUFFERS_NOEXCEPT;
-  BuiltinOptionsUnion &operator=(const BuiltinOptionsUnion &u) FLATBUFFERS_NOEXCEPT
-    { BuiltinOptionsUnion t(u); std::swap(type, t.type); std::swap(value, t.value); return *this; }
-  BuiltinOptionsUnion &operator=(BuiltinOptionsUnion &&u) FLATBUFFERS_NOEXCEPT
-    { std::swap(type, u.type); std::swap(value, u.value); return *this; }
+  BuiltinOptionsUnion &operator=(const BuiltinOptionsUnion &u)
+      FLATBUFFERS_NOEXCEPT {
+    BuiltinOptionsUnion t(u);
+    std::swap(type, t.type);
+    std::swap(value, t.value);
+    return *this;
+  }
+  BuiltinOptionsUnion &operator=(BuiltinOptionsUnion &&u) FLATBUFFERS_NOEXCEPT {
+    std::swap(type, u.type);
+    std::swap(value, u.value);
+    return *this;
+  }
   ~BuiltinOptionsUnion() { Reset(); }
 
   void Reset();
 
 #ifndef FLATBUFFERS_CPP98_STL
   template <typename T>
-  void Set(T&& val) {
+  void Set(T &&val) {
     Reset();
     type = BuiltinOptionsTraits<typename T::TableType>::enum_value;
     if (type != BuiltinOptions_NONE) {
@@ -457,181 +629,332 @@ struct BuiltinOptionsUnion {
   }
 #endif  // FLATBUFFERS_CPP98_STL
 
-  static void *UnPack(const void *obj, BuiltinOptions type, const flatbuffers::resolver_function_t *resolver);
-  flatbuffers::Offset<void> Pack(flatbuffers::FlatBufferBuilder &_fbb, const flatbuffers::rehasher_function_t *_rehasher = nullptr) const;
+  static void *UnPack(const void *obj, BuiltinOptions type,
+                      const flatbuffers::resolver_function_t *resolver);
+  flatbuffers::Offset<void> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr) const;
 
   Conv2DOptionsT *AsConv2DOptions() {
-    return type == BuiltinOptions_Conv2DOptions ?
-      reinterpret_cast<Conv2DOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_Conv2DOptions
+               ? reinterpret_cast<Conv2DOptionsT *>(value)
+               : nullptr;
   }
   const Conv2DOptionsT *AsConv2DOptions() const {
-    return type == BuiltinOptions_Conv2DOptions ?
-      reinterpret_cast<const Conv2DOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_Conv2DOptions
+               ? reinterpret_cast<const Conv2DOptionsT *>(value)
+               : nullptr;
   }
   DepthwiseConv2DOptionsT *AsDepthwiseConv2DOptions() {
-    return type == BuiltinOptions_DepthwiseConv2DOptions ?
-      reinterpret_cast<DepthwiseConv2DOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_DepthwiseConv2DOptions
+               ? reinterpret_cast<DepthwiseConv2DOptionsT *>(value)
+               : nullptr;
   }
   const DepthwiseConv2DOptionsT *AsDepthwiseConv2DOptions() const {
-    return type == BuiltinOptions_DepthwiseConv2DOptions ?
-      reinterpret_cast<const DepthwiseConv2DOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_DepthwiseConv2DOptions
+               ? reinterpret_cast<const DepthwiseConv2DOptionsT *>(value)
+               : nullptr;
   }
   ConcatEmbeddingsOptionsT *AsConcatEmbeddingsOptions() {
-    return type == BuiltinOptions_ConcatEmbeddingsOptions ?
-      reinterpret_cast<ConcatEmbeddingsOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_ConcatEmbeddingsOptions
+               ? reinterpret_cast<ConcatEmbeddingsOptionsT *>(value)
+               : nullptr;
   }
   const ConcatEmbeddingsOptionsT *AsConcatEmbeddingsOptions() const {
-    return type == BuiltinOptions_ConcatEmbeddingsOptions ?
-      reinterpret_cast<const ConcatEmbeddingsOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_ConcatEmbeddingsOptions
+               ? reinterpret_cast<const ConcatEmbeddingsOptionsT *>(value)
+               : nullptr;
   }
   LSHProjectionOptionsT *AsLSHProjectionOptions() {
-    return type == BuiltinOptions_LSHProjectionOptions ?
-      reinterpret_cast<LSHProjectionOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_LSHProjectionOptions
+               ? reinterpret_cast<LSHProjectionOptionsT *>(value)
+               : nullptr;
   }
   const LSHProjectionOptionsT *AsLSHProjectionOptions() const {
-    return type == BuiltinOptions_LSHProjectionOptions ?
-      reinterpret_cast<const LSHProjectionOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_LSHProjectionOptions
+               ? reinterpret_cast<const LSHProjectionOptionsT *>(value)
+               : nullptr;
   }
   Pool2DOptionsT *AsPool2DOptions() {
-    return type == BuiltinOptions_Pool2DOptions ?
-      reinterpret_cast<Pool2DOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_Pool2DOptions
+               ? reinterpret_cast<Pool2DOptionsT *>(value)
+               : nullptr;
   }
   const Pool2DOptionsT *AsPool2DOptions() const {
-    return type == BuiltinOptions_Pool2DOptions ?
-      reinterpret_cast<const Pool2DOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_Pool2DOptions
+               ? reinterpret_cast<const Pool2DOptionsT *>(value)
+               : nullptr;
   }
   SVDFOptionsT *AsSVDFOptions() {
-    return type == BuiltinOptions_SVDFOptions ?
-      reinterpret_cast<SVDFOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_SVDFOptions
+               ? reinterpret_cast<SVDFOptionsT *>(value)
+               : nullptr;
   }
   const SVDFOptionsT *AsSVDFOptions() const {
-    return type == BuiltinOptions_SVDFOptions ?
-      reinterpret_cast<const SVDFOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_SVDFOptions
+               ? reinterpret_cast<const SVDFOptionsT *>(value)
+               : nullptr;
   }
   RNNOptionsT *AsRNNOptions() {
-    return type == BuiltinOptions_RNNOptions ?
-      reinterpret_cast<RNNOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_RNNOptions
+               ? reinterpret_cast<RNNOptionsT *>(value)
+               : nullptr;
   }
   const RNNOptionsT *AsRNNOptions() const {
-    return type == BuiltinOptions_RNNOptions ?
-      reinterpret_cast<const RNNOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_RNNOptions
+               ? reinterpret_cast<const RNNOptionsT *>(value)
+               : nullptr;
   }
   FullyConnectedOptionsT *AsFullyConnectedOptions() {
-    return type == BuiltinOptions_FullyConnectedOptions ?
-      reinterpret_cast<FullyConnectedOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_FullyConnectedOptions
+               ? reinterpret_cast<FullyConnectedOptionsT *>(value)
+               : nullptr;
   }
   const FullyConnectedOptionsT *AsFullyConnectedOptions() const {
-    return type == BuiltinOptions_FullyConnectedOptions ?
-      reinterpret_cast<const FullyConnectedOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_FullyConnectedOptions
+               ? reinterpret_cast<const FullyConnectedOptionsT *>(value)
+               : nullptr;
   }
   SoftmaxOptionsT *AsSoftmaxOptions() {
-    return type == BuiltinOptions_SoftmaxOptions ?
-      reinterpret_cast<SoftmaxOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_SoftmaxOptions
+               ? reinterpret_cast<SoftmaxOptionsT *>(value)
+               : nullptr;
   }
   const SoftmaxOptionsT *AsSoftmaxOptions() const {
-    return type == BuiltinOptions_SoftmaxOptions ?
-      reinterpret_cast<const SoftmaxOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_SoftmaxOptions
+               ? reinterpret_cast<const SoftmaxOptionsT *>(value)
+               : nullptr;
   }
   ConcatenationOptionsT *AsConcatenationOptions() {
-    return type == BuiltinOptions_ConcatenationOptions ?
-      reinterpret_cast<ConcatenationOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_ConcatenationOptions
+               ? reinterpret_cast<ConcatenationOptionsT *>(value)
+               : nullptr;
   }
   const ConcatenationOptionsT *AsConcatenationOptions() const {
-    return type == BuiltinOptions_ConcatenationOptions ?
-      reinterpret_cast<const ConcatenationOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_ConcatenationOptions
+               ? reinterpret_cast<const ConcatenationOptionsT *>(value)
+               : nullptr;
   }
   AddOptionsT *AsAddOptions() {
-    return type == BuiltinOptions_AddOptions ?
-      reinterpret_cast<AddOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_AddOptions
+               ? reinterpret_cast<AddOptionsT *>(value)
+               : nullptr;
   }
   const AddOptionsT *AsAddOptions() const {
-    return type == BuiltinOptions_AddOptions ?
-      reinterpret_cast<const AddOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_AddOptions
+               ? reinterpret_cast<const AddOptionsT *>(value)
+               : nullptr;
   }
   L2NormOptionsT *AsL2NormOptions() {
-    return type == BuiltinOptions_L2NormOptions ?
-      reinterpret_cast<L2NormOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_L2NormOptions
+               ? reinterpret_cast<L2NormOptionsT *>(value)
+               : nullptr;
   }
   const L2NormOptionsT *AsL2NormOptions() const {
-    return type == BuiltinOptions_L2NormOptions ?
-      reinterpret_cast<const L2NormOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_L2NormOptions
+               ? reinterpret_cast<const L2NormOptionsT *>(value)
+               : nullptr;
   }
   LocalResponseNormalizationOptionsT *AsLocalResponseNormalizationOptions() {
-    return type == BuiltinOptions_LocalResponseNormalizationOptions ?
-      reinterpret_cast<LocalResponseNormalizationOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_LocalResponseNormalizationOptions
+               ? reinterpret_cast<LocalResponseNormalizationOptionsT *>(value)
+               : nullptr;
   }
-  const LocalResponseNormalizationOptionsT *AsLocalResponseNormalizationOptions() const {
-    return type == BuiltinOptions_LocalResponseNormalizationOptions ?
-      reinterpret_cast<const LocalResponseNormalizationOptionsT *>(value) : nullptr;
+  const LocalResponseNormalizationOptionsT *
+  AsLocalResponseNormalizationOptions() const {
+    return type == BuiltinOptions_LocalResponseNormalizationOptions
+               ? reinterpret_cast<const LocalResponseNormalizationOptionsT *>(
+                     value)
+               : nullptr;
   }
   LSTMOptionsT *AsLSTMOptions() {
-    return type == BuiltinOptions_LSTMOptions ?
-      reinterpret_cast<LSTMOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_LSTMOptions
+               ? reinterpret_cast<LSTMOptionsT *>(value)
+               : nullptr;
   }
   const LSTMOptionsT *AsLSTMOptions() const {
-    return type == BuiltinOptions_LSTMOptions ?
-      reinterpret_cast<const LSTMOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_LSTMOptions
+               ? reinterpret_cast<const LSTMOptionsT *>(value)
+               : nullptr;
   }
   ResizeBilinearOptionsT *AsResizeBilinearOptions() {
-    return type == BuiltinOptions_ResizeBilinearOptions ?
-      reinterpret_cast<ResizeBilinearOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_ResizeBilinearOptions
+               ? reinterpret_cast<ResizeBilinearOptionsT *>(value)
+               : nullptr;
   }
   const ResizeBilinearOptionsT *AsResizeBilinearOptions() const {
-    return type == BuiltinOptions_ResizeBilinearOptions ?
-      reinterpret_cast<const ResizeBilinearOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_ResizeBilinearOptions
+               ? reinterpret_cast<const ResizeBilinearOptionsT *>(value)
+               : nullptr;
   }
   CallOptionsT *AsCallOptions() {
-    return type == BuiltinOptions_CallOptions ?
-      reinterpret_cast<CallOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_CallOptions
+               ? reinterpret_cast<CallOptionsT *>(value)
+               : nullptr;
   }
   const CallOptionsT *AsCallOptions() const {
-    return type == BuiltinOptions_CallOptions ?
-      reinterpret_cast<const CallOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_CallOptions
+               ? reinterpret_cast<const CallOptionsT *>(value)
+               : nullptr;
   }
   ReshapeOptionsT *AsReshapeOptions() {
-    return type == BuiltinOptions_ReshapeOptions ?
-      reinterpret_cast<ReshapeOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_ReshapeOptions
+               ? reinterpret_cast<ReshapeOptionsT *>(value)
+               : nullptr;
   }
   const ReshapeOptionsT *AsReshapeOptions() const {
-    return type == BuiltinOptions_ReshapeOptions ?
-      reinterpret_cast<const ReshapeOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_ReshapeOptions
+               ? reinterpret_cast<const ReshapeOptionsT *>(value)
+               : nullptr;
   }
   SkipGramOptionsT *AsSkipGramOptions() {
-    return type == BuiltinOptions_SkipGramOptions ?
-      reinterpret_cast<SkipGramOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_SkipGramOptions
+               ? reinterpret_cast<SkipGramOptionsT *>(value)
+               : nullptr;
   }
   const SkipGramOptionsT *AsSkipGramOptions() const {
-    return type == BuiltinOptions_SkipGramOptions ?
-      reinterpret_cast<const SkipGramOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_SkipGramOptions
+               ? reinterpret_cast<const SkipGramOptionsT *>(value)
+               : nullptr;
   }
   SpaceToDepthOptionsT *AsSpaceToDepthOptions() {
-    return type == BuiltinOptions_SpaceToDepthOptions ?
-      reinterpret_cast<SpaceToDepthOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_SpaceToDepthOptions
+               ? reinterpret_cast<SpaceToDepthOptionsT *>(value)
+               : nullptr;
   }
   const SpaceToDepthOptionsT *AsSpaceToDepthOptions() const {
-    return type == BuiltinOptions_SpaceToDepthOptions ?
-      reinterpret_cast<const SpaceToDepthOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_SpaceToDepthOptions
+               ? reinterpret_cast<const SpaceToDepthOptionsT *>(value)
+               : nullptr;
   }
   EmbeddingLookupSparseOptionsT *AsEmbeddingLookupSparseOptions() {
-    return type == BuiltinOptions_EmbeddingLookupSparseOptions ?
-      reinterpret_cast<EmbeddingLookupSparseOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_EmbeddingLookupSparseOptions
+               ? reinterpret_cast<EmbeddingLookupSparseOptionsT *>(value)
+               : nullptr;
   }
   const EmbeddingLookupSparseOptionsT *AsEmbeddingLookupSparseOptions() const {
-    return type == BuiltinOptions_EmbeddingLookupSparseOptions ?
-      reinterpret_cast<const EmbeddingLookupSparseOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_EmbeddingLookupSparseOptions
+               ? reinterpret_cast<const EmbeddingLookupSparseOptionsT *>(value)
+               : nullptr;
   }
   MulOptionsT *AsMulOptions() {
-    return type == BuiltinOptions_MulOptions ?
-      reinterpret_cast<MulOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_MulOptions
+               ? reinterpret_cast<MulOptionsT *>(value)
+               : nullptr;
   }
   const MulOptionsT *AsMulOptions() const {
-    return type == BuiltinOptions_MulOptions ?
-      reinterpret_cast<const MulOptionsT *>(value) : nullptr;
+    return type == BuiltinOptions_MulOptions
+               ? reinterpret_cast<const MulOptionsT *>(value)
+               : nullptr;
+  }
+  PadOptionsT *AsPadOptions() {
+    return type == BuiltinOptions_PadOptions
+               ? reinterpret_cast<PadOptionsT *>(value)
+               : nullptr;
+  }
+  const PadOptionsT *AsPadOptions() const {
+    return type == BuiltinOptions_PadOptions
+               ? reinterpret_cast<const PadOptionsT *>(value)
+               : nullptr;
+  }
+  GatherOptionsT *AsGatherOptions() {
+    return type == BuiltinOptions_GatherOptions
+               ? reinterpret_cast<GatherOptionsT *>(value)
+               : nullptr;
+  }
+  const GatherOptionsT *AsGatherOptions() const {
+    return type == BuiltinOptions_GatherOptions
+               ? reinterpret_cast<const GatherOptionsT *>(value)
+               : nullptr;
+  }
+  BatchToSpaceNDOptionsT *AsBatchToSpaceNDOptions() {
+    return type == BuiltinOptions_BatchToSpaceNDOptions
+               ? reinterpret_cast<BatchToSpaceNDOptionsT *>(value)
+               : nullptr;
+  }
+  const BatchToSpaceNDOptionsT *AsBatchToSpaceNDOptions() const {
+    return type == BuiltinOptions_BatchToSpaceNDOptions
+               ? reinterpret_cast<const BatchToSpaceNDOptionsT *>(value)
+               : nullptr;
+  }
+  SpaceToBatchNDOptionsT *AsSpaceToBatchNDOptions() {
+    return type == BuiltinOptions_SpaceToBatchNDOptions
+               ? reinterpret_cast<SpaceToBatchNDOptionsT *>(value)
+               : nullptr;
+  }
+  const SpaceToBatchNDOptionsT *AsSpaceToBatchNDOptions() const {
+    return type == BuiltinOptions_SpaceToBatchNDOptions
+               ? reinterpret_cast<const SpaceToBatchNDOptionsT *>(value)
+               : nullptr;
+  }
+  TransposeOptionsT *AsTransposeOptions() {
+    return type == BuiltinOptions_TransposeOptions
+               ? reinterpret_cast<TransposeOptionsT *>(value)
+               : nullptr;
+  }
+  const TransposeOptionsT *AsTransposeOptions() const {
+    return type == BuiltinOptions_TransposeOptions
+               ? reinterpret_cast<const TransposeOptionsT *>(value)
+               : nullptr;
+  }
+  MeanOptionsT *AsMeanOptions() {
+    return type == BuiltinOptions_MeanOptions
+               ? reinterpret_cast<MeanOptionsT *>(value)
+               : nullptr;
+  }
+  const MeanOptionsT *AsMeanOptions() const {
+    return type == BuiltinOptions_MeanOptions
+               ? reinterpret_cast<const MeanOptionsT *>(value)
+               : nullptr;
+  }
+  SubOptionsT *AsSubOptions() {
+    return type == BuiltinOptions_SubOptions
+               ? reinterpret_cast<SubOptionsT *>(value)
+               : nullptr;
+  }
+  const SubOptionsT *AsSubOptions() const {
+    return type == BuiltinOptions_SubOptions
+               ? reinterpret_cast<const SubOptionsT *>(value)
+               : nullptr;
+  }
+  DivOptionsT *AsDivOptions() {
+    return type == BuiltinOptions_DivOptions
+               ? reinterpret_cast<DivOptionsT *>(value)
+               : nullptr;
+  }
+  const DivOptionsT *AsDivOptions() const {
+    return type == BuiltinOptions_DivOptions
+               ? reinterpret_cast<const DivOptionsT *>(value)
+               : nullptr;
+  }
+  SqueezeOptionsT *AsSqueezeOptions() {
+    return type == BuiltinOptions_SqueezeOptions
+               ? reinterpret_cast<SqueezeOptionsT *>(value)
+               : nullptr;
+  }
+  const SqueezeOptionsT *AsSqueezeOptions() const {
+    return type == BuiltinOptions_SqueezeOptions
+               ? reinterpret_cast<const SqueezeOptionsT *>(value)
+               : nullptr;
+  }
+  SequenceRNNOptionsT *AsSequenceRNNOptions() {
+    return type == BuiltinOptions_SequenceRNNOptions
+               ? reinterpret_cast<SequenceRNNOptionsT *>(value)
+               : nullptr;
+  }
+  const SequenceRNNOptionsT *AsSequenceRNNOptions() const {
+    return type == BuiltinOptions_SequenceRNNOptions
+               ? reinterpret_cast<const SequenceRNNOptionsT *>(value)
+               : nullptr;
   }
 };
 
-bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type);
-bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types);
+bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj,
+                          BuiltinOptions type);
+bool VerifyBuiltinOptionsVector(
+    flatbuffers::Verifier &verifier,
+    const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
+    const flatbuffers::Vector<uint8_t> *types);
 
 enum Padding {
   Padding_SAME = 0,
@@ -641,19 +964,12 @@ enum Padding {
 };
 
 inline Padding (&EnumValuesPadding())[2] {
-  static Padding values[] = {
-    Padding_SAME,
-    Padding_VALID
-  };
+  static Padding values[] = {Padding_SAME, Padding_VALID};
   return values;
 }
 
 inline const char **EnumNamesPadding() {
-  static const char *names[] = {
-    "SAME",
-    "VALID",
-    nullptr
-  };
+  static const char *names[] = {"SAME", "VALID", nullptr};
   return names;
 }
 
@@ -665,7 +981,7 @@ inline const char *EnumNamePadding(Padding e) {
 enum ActivationFunctionType {
   ActivationFunctionType_NONE = 0,
   ActivationFunctionType_RELU = 1,
-  ActivationFunctionType_RELU1 = 2,
+  ActivationFunctionType_RELU_N1_TO_1 = 2,
   ActivationFunctionType_RELU6 = 3,
   ActivationFunctionType_TANH = 4,
   ActivationFunctionType_SIGN_BIT = 5,
@@ -675,26 +991,15 @@ enum ActivationFunctionType {
 
 inline ActivationFunctionType (&EnumValuesActivationFunctionType())[6] {
   static ActivationFunctionType values[] = {
-    ActivationFunctionType_NONE,
-    ActivationFunctionType_RELU,
-    ActivationFunctionType_RELU1,
-    ActivationFunctionType_RELU6,
-    ActivationFunctionType_TANH,
-    ActivationFunctionType_SIGN_BIT
-  };
+      ActivationFunctionType_NONE,         ActivationFunctionType_RELU,
+      ActivationFunctionType_RELU_N1_TO_1, ActivationFunctionType_RELU6,
+      ActivationFunctionType_TANH,         ActivationFunctionType_SIGN_BIT};
   return values;
 }
 
 inline const char **EnumNamesActivationFunctionType() {
-  static const char *names[] = {
-    "NONE",
-    "RELU",
-    "RELU1",
-    "RELU6",
-    "TANH",
-    "SIGN_BIT",
-    nullptr
-  };
+  static const char *names[] = {"NONE", "RELU",     "RELU_N1_TO_1", "RELU6",
+                                "TANH", "SIGN_BIT", nullptr};
   return names;
 }
 
@@ -712,21 +1017,14 @@ enum LSHProjectionType {
 };
 
 inline LSHProjectionType (&EnumValuesLSHProjectionType())[3] {
-  static LSHProjectionType values[] = {
-    LSHProjectionType_UNKNOWN,
-    LSHProjectionType_SPARSE,
-    LSHProjectionType_DENSE
-  };
+  static LSHProjectionType values[] = {LSHProjectionType_UNKNOWN,
+                                       LSHProjectionType_SPARSE,
+                                       LSHProjectionType_DENSE};
   return values;
 }
 
 inline const char **EnumNamesLSHProjectionType() {
-  static const char *names[] = {
-    "UNKNOWN",
-    "SPARSE",
-    "DENSE",
-    nullptr
-  };
+  static const char *names[] = {"UNKNOWN", "SPARSE", "DENSE", nullptr};
   return names;
 }
 
@@ -744,21 +1042,13 @@ enum CombinerType {
 };
 
 inline CombinerType (&EnumValuesCombinerType())[3] {
-  static CombinerType values[] = {
-    CombinerType_SUM,
-    CombinerType_MEAN,
-    CombinerType_SQRTN
-  };
+  static CombinerType values[] = {CombinerType_SUM, CombinerType_MEAN,
+                                  CombinerType_SQRTN};
   return values;
 }
 
 inline const char **EnumNamesCombinerType() {
-  static const char *names[] = {
-    "SUM",
-    "MEAN",
-    "SQRTN",
-    nullptr
-  };
+  static const char *names[] = {"SUM", "MEAN", "SQRTN", nullptr};
   return names;
 }
 
@@ -774,17 +1064,12 @@ enum CustomOptionsFormat {
 };
 
 inline CustomOptionsFormat (&EnumValuesCustomOptionsFormat())[1] {
-  static CustomOptionsFormat values[] = {
-    CustomOptionsFormat_FLEXBUFFERS
-  };
+  static CustomOptionsFormat values[] = {CustomOptionsFormat_FLEXBUFFERS};
   return values;
 }
 
 inline const char **EnumNamesCustomOptionsFormat() {
-  static const char *names[] = {
-    "FLEXBUFFERS",
-    nullptr
-  };
+  static const char *names[] = {"FLEXBUFFERS", nullptr};
   return names;
 }
 
@@ -799,18 +1084,13 @@ struct QuantizationParametersT : public flatbuffers::NativeTable {
   std::vector<float> max;
   std::vector<float> scale;
   std::vector<int64_t> zero_point;
-  QuantizationParametersT() {
-  }
+  QuantizationParametersT() {}
 };
 
-struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+struct QuantizationParameters FLATBUFFERS_FINAL_CLASS
+    : private flatbuffers::Table {
   typedef QuantizationParametersT NativeTableType;
-  enum {
-    VT_MIN = 4,
-    VT_MAX = 6,
-    VT_SCALE = 8,
-    VT_ZERO_POINT = 10
-  };
+  enum { VT_MIN = 4, VT_MAX = 6, VT_SCALE = 8, VT_ZERO_POINT = 10 };
   const flatbuffers::Vector<float> *min() const {
     return GetPointer<const flatbuffers::Vector<float> *>(VT_MIN);
   }
@@ -824,20 +1104,20 @@ struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
     return GetPointer<const flatbuffers::Vector<int64_t> *>(VT_ZERO_POINT);
   }
   bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_MIN) &&
-           verifier.Verify(min()) &&
-           VerifyOffset(verifier, VT_MAX) &&
-           verifier.Verify(max()) &&
-           VerifyOffset(verifier, VT_SCALE) &&
-           verifier.Verify(scale()) &&
-           VerifyOffset(verifier, VT_ZERO_POINT) &&
-           verifier.Verify(zero_point()) &&
-           verifier.EndTable();
-  }
-  QuantizationParametersT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(QuantizationParametersT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<QuantizationParameters> Pack(flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_MIN) &&
+           verifier.Verify(min()) && VerifyOffset(verifier, VT_MAX) &&
+           verifier.Verify(max()) && VerifyOffset(verifier, VT_SCALE) &&
+           verifier.Verify(scale()) && VerifyOffset(verifier, VT_ZERO_POINT) &&
+           verifier.Verify(zero_point()) && verifier.EndTable();
+  }
+  QuantizationParametersT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      QuantizationParametersT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<QuantizationParameters> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
 struct QuantizationParametersBuilder {
@@ -852,14 +1132,16 @@ struct QuantizationParametersBuilder {
   void add_scale(flatbuffers::Offset<flatbuffers::Vector<float>> scale) {
     fbb_.AddOffset(QuantizationParameters::VT_SCALE, scale);
   }
-  void add_zero_point(flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point) {
+  void add_zero_point(
+      flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point) {
     fbb_.AddOffset(QuantizationParameters::VT_ZERO_POINT, zero_point);
   }
   explicit QuantizationParametersBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+      : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
-  QuantizationParametersBuilder &operator=(const QuantizationParametersBuilder &);
+  QuantizationParametersBuilder &operator=(
+      const QuantizationParametersBuilder &);
   flatbuffers::Offset<QuantizationParameters> Finish() {
     const auto end = fbb_.EndTable(start_);
     auto o = flatbuffers::Offset<QuantizationParameters>(end);
@@ -881,21 +1163,23 @@ inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters(
   return builder_.Finish();
 }
 
-inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParametersDirect(
+inline flatbuffers::Offset<QuantizationParameters>
+CreateQuantizationParametersDirect(
     flatbuffers::FlatBufferBuilder &_fbb,
     const std::vector<float> *min = nullptr,
     const std::vector<float> *max = nullptr,
     const std::vector<float> *scale = nullptr,
     const std::vector<int64_t> *zero_point = nullptr) {
   return tflite::CreateQuantizationParameters(
-      _fbb,
-      min ? _fbb.CreateVector<float>(*min) : 0,
+      _fbb, min ? _fbb.CreateVector<float>(*min) : 0,
       max ? _fbb.CreateVector<float>(*max) : 0,
       scale ? _fbb.CreateVector<float>(*scale) : 0,
       zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0);
 }
 
-flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters(
+    flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
 struct TensorT : public flatbuffers::NativeTable {
   typedef Tensor TableType;
@@ -904,10 +1188,7 @@ struct TensorT : public flatbuffers::NativeTable {
   uint32_t buffer;
   std::string name;
   std::unique_ptr<QuantizationParametersT> quantization;
-  TensorT()
-      : type(TensorType_FLOAT32),
-        buffer(0) {
-  }
+  TensorT() : type(TensorType_FLOAT32), buffer(0) {}
 };
 
 struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
@@ -925,9 +1206,7 @@ struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   TensorType type() const {
     return static_cast<TensorType>(GetField<int8_t>(VT_TYPE, 0));
   }
-  uint32_t buffer() const {
-    return GetField<uint32_t>(VT_BUFFER, 0);
-  }
+  uint32_t buffer() const { return GetField<uint32_t>(VT_BUFFER, 0); }
   const flatbuffers::String *name() const {
     return GetPointer<const flatbuffers::String *>(VT_NAME);
   }
@@ -935,20 +1214,20 @@ struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
     return GetPointer<const QuantizationParameters *>(VT_QUANTIZATION);
   }
   bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_SHAPE) &&
-           verifier.Verify(shape()) &&
-           VerifyField<int8_t>(verifier, VT_TYPE) &&
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_SHAPE) &&
+           verifier.Verify(shape()) && VerifyField<int8_t>(verifier, VT_TYPE) &&
            VerifyField<uint32_t>(verifier, VT_BUFFER) &&
-           VerifyOffset(verifier, VT_NAME) &&
-           verifier.Verify(name()) &&
+           VerifyOffset(verifier, VT_NAME) && verifier.Verify(name()) &&
            VerifyOffset(verifier, VT_QUANTIZATION) &&
-           verifier.VerifyTable(quantization()) &&
-           verifier.EndTable();
-  }
-  TensorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(TensorT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<Tensor> Pack(flatbuffers::FlatBufferBuilder &_fbb, const TensorT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+           verifier.VerifyTable(quantization()) && verifier.EndTable();
+  }
+  TensorT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(TensorT *_o, const flatbuffers::resolver_function_t *_resolver =
+                                 nullptr) const;
+  static flatbuffers::Offset<Tensor> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
 struct TensorBuilder {
@@ -966,11 +1245,11 @@ struct TensorBuilder {
   void add_name(flatbuffers::Offset<flatbuffers::String> name) {
     fbb_.AddOffset(Tensor::VT_NAME, name);
   }
-  void add_quantization(flatbuffers::Offset<QuantizationParameters> quantization) {
+  void add_quantization(
+      flatbuffers::Offset<QuantizationParameters> quantization) {
     fbb_.AddOffset(Tensor::VT_QUANTIZATION, quantization);
   }
-  explicit TensorBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+  explicit TensorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
   TensorBuilder &operator=(const TensorBuilder &);
@@ -984,8 +1263,7 @@ struct TensorBuilder {
 inline flatbuffers::Offset<Tensor> CreateTensor(
     flatbuffers::FlatBufferBuilder &_fbb,
     flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape = 0,
-    TensorType type = TensorType_FLOAT32,
-    uint32_t buffer = 0,
+    TensorType type = TensorType_FLOAT32, uint32_t buffer = 0,
     flatbuffers::Offset<flatbuffers::String> name = 0,
     flatbuffers::Offset<QuantizationParameters> quantization = 0) {
   TensorBuilder builder_(_fbb);
@@ -1000,20 +1278,17 @@ inline flatbuffers::Offset<Tensor> CreateTensor(
 inline flatbuffers::Offset<Tensor> CreateTensorDirect(
     flatbuffers::FlatBufferBuilder &_fbb,
     const std::vector<int32_t> *shape = nullptr,
-    TensorType type = TensorType_FLOAT32,
-    uint32_t buffer = 0,
+    TensorType type = TensorType_FLOAT32, uint32_t buffer = 0,
     const char *name = nullptr,
     flatbuffers::Offset<QuantizationParameters> quantization = 0) {
   return tflite::CreateTensor(
-      _fbb,
-      shape ? _fbb.CreateVector<int32_t>(*shape) : 0,
-      type,
-      buffer,
-      name ? _fbb.CreateString(name) : 0,
-      quantization);
+      _fbb, shape ? _fbb.CreateVector<int32_t>(*shape) : 0, type, buffer,
+      name ? _fbb.CreateString(name) : 0, quantization);
 }
 
-flatbuffers::Offset<Tensor> CreateTensor(flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<Tensor> CreateTensor(
+    flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
 struct Conv2DOptionsT : public flatbuffers::NativeTable {
   typedef Conv2DOptions TableType;
@@ -1025,8 +1300,7 @@ struct Conv2DOptionsT : public flatbuffers::NativeTable {
       : padding(Padding_SAME),
         stride_w(0),
         stride_h(0),
-        fused_activation_function(ActivationFunctionType_NONE) {
-  }
+        fused_activation_function(ActivationFunctionType_NONE) {}
 };
 
 struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
@@ -1040,14 +1314,11 @@ struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   Padding padding() const {
     return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0));
   }
-  int32_t stride_w() const {
-    return GetField<int32_t>(VT_STRIDE_W, 0);
-  }
-  int32_t stride_h() const {
-    return GetField<int32_t>(VT_STRIDE_H, 0);
-  }
+  int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
+  int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
   ActivationFunctionType fused_activation_function() const {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<ActivationFunctionType>(
+        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
@@ -1057,16 +1328,22 @@ struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
            VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
            verifier.EndTable();
   }
-  Conv2DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(Conv2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<Conv2DOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+  Conv2DOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      Conv2DOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Conv2DOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
 struct Conv2DOptionsBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_padding(Padding padding) {
-    fbb_.AddElement<int8_t>(Conv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+    fbb_.AddElement<int8_t>(Conv2DOptions::VT_PADDING,
+                            static_cast<int8_t>(padding), 0);
   }
   void add_stride_w(int32_t stride_w) {
     fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_W, stride_w, 0);
@@ -1074,11 +1351,13 @@ struct Conv2DOptionsBuilder {
   void add_stride_h(int32_t stride_h) {
     fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_H, stride_h, 0);
   }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(Conv2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  void add_fused_activation_function(
+      ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(Conv2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
   }
   explicit Conv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+      : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
   Conv2DOptionsBuilder &operator=(const Conv2DOptionsBuilder &);
@@ -1090,11 +1369,10 @@ struct Conv2DOptionsBuilder {
 };
 
 inline flatbuffers::Offset<Conv2DOptions> CreateConv2DOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    Padding padding = Padding_SAME,
-    int32_t stride_w = 0,
-    int32_t stride_h = 0,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) {
+    flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
+    int32_t stride_w = 0, int32_t stride_h = 0,
+    ActivationFunctionType fused_activation_function =
+        ActivationFunctionType_NONE) {
   Conv2DOptionsBuilder builder_(_fbb);
   builder_.add_stride_h(stride_h);
   builder_.add_stride_w(stride_w);
@@ -1103,7 +1381,9 @@ inline flatbuffers::Offset<Conv2DOptions> CreateConv2DOptions(
   return builder_.Finish();
 }
 
-flatbuffers::Offset<Conv2DOptions> CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<Conv2DOptions> CreateConv2DOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
 struct Pool2DOptionsT : public flatbuffers::NativeTable {
   typedef Pool2DOptions TableType;
@@ -1119,8 +1399,7 @@ struct Pool2DOptionsT : public flatbuffers::NativeTable {
         stride_h(0),
         filter_width(0),
         filter_height(0),
-        fused_activation_function(ActivationFunctionType_NONE) {
-  }
+        fused_activation_function(ActivationFunctionType_NONE) {}
 };
 
 struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
@@ -1136,20 +1415,15 @@ struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   Padding padding() const {
     return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0));
   }
-  int32_t stride_w() const {
-    return GetField<int32_t>(VT_STRIDE_W, 0);
-  }
-  int32_t stride_h() const {
-    return GetField<int32_t>(VT_STRIDE_H, 0);
-  }
-  int32_t filter_width() const {
-    return GetField<int32_t>(VT_FILTER_WIDTH, 0);
-  }
+  int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
+  int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
+  int32_t filter_width() const { return GetField<int32_t>(VT_FILTER_WIDTH, 0); }
   int32_t filter_height() const {
     return GetField<int32_t>(VT_FILTER_HEIGHT, 0);
   }
   ActivationFunctionType fused_activation_function() const {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<ActivationFunctionType>(
+        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
@@ -1161,16 +1435,22 @@ struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
            VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
            verifier.EndTable();
   }
-  Pool2DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(Pool2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<Pool2DOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+  Pool2DOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      Pool2DOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Pool2DOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
 struct Pool2DOptionsBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_padding(Padding padding) {
-    fbb_.AddElement<int8_t>(Pool2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+    fbb_.AddElement<int8_t>(Pool2DOptions::VT_PADDING,
+                            static_cast<int8_t>(padding), 0);
   }
   void add_stride_w(int32_t stride_w) {
     fbb_.AddElement<int32_t>(Pool2DOptions::VT_STRIDE_W, stride_w, 0);
@@ -1184,11 +1464,13 @@ struct Pool2DOptionsBuilder {
   void add_filter_height(int32_t filter_height) {
     fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_HEIGHT, filter_height, 0);
   }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(Pool2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  void add_fused_activation_function(
+      ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(Pool2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
   }
   explicit Pool2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+      : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
   Pool2DOptionsBuilder &operator=(const Pool2DOptionsBuilder &);
@@ -1200,13 +1482,11 @@ struct Pool2DOptionsBuilder {
 };
 
 inline flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    Padding padding = Padding_SAME,
-    int32_t stride_w = 0,
-    int32_t stride_h = 0,
-    int32_t filter_width = 0,
+    flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
+    int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0,
     int32_t filter_height = 0,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) {
+    ActivationFunctionType fused_activation_function =
+        ActivationFunctionType_NONE) {
   Pool2DOptionsBuilder builder_(_fbb);
   builder_.add_filter_height(filter_height);
   builder_.add_filter_width(filter_width);
@@ -1217,7 +1497,9 @@ inline flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions(
   return builder_.Finish();
 }
 
-flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
 struct DepthwiseConv2DOptionsT : public flatbuffers::NativeTable {
   typedef DepthwiseConv2DOptions TableType;
@@ -1231,11 +1513,11 @@ struct DepthwiseConv2DOptionsT : public flatbuffers::NativeTable {
         stride_w(0),
         stride_h(0),
         depth_multiplier(0),
-        fused_activation_function(ActivationFunctionType_NONE) {
-  }
+        fused_activation_function(ActivationFunctionType_NONE) {}
 };
 
-struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS
+    : private flatbuffers::Table {
   typedef DepthwiseConv2DOptionsT NativeTableType;
   enum {
     VT_PADDING = 4,
@@ -1247,17 +1529,14 @@ struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
   Padding padding() const {
     return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0));
   }
-  int32_t stride_w() const {
-    return GetField<int32_t>(VT_STRIDE_W, 0);
-  }
-  int32_t stride_h() const {
-    return GetField<int32_t>(VT_STRIDE_H, 0);
-  }
+  int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
+  int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
   int32_t depth_multiplier() const {
     return GetField<int32_t>(VT_DEPTH_MULTIPLIER, 0);
   }
   ActivationFunctionType fused_activation_function() const {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<ActivationFunctionType>(
+        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
@@ -1268,16 +1547,22 @@ struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
            VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
            verifier.EndTable();
   }
-  DepthwiseConv2DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(DepthwiseConv2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<DepthwiseConv2DOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+  DepthwiseConv2DOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      DepthwiseConv2DOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<DepthwiseConv2DOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
 struct DepthwiseConv2DOptionsBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_padding(Padding padding) {
-    fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+    fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_PADDING,
+                            static_cast<int8_t>(padding), 0);
   }
   void add_stride_w(int32_t stride_w) {
     fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_STRIDE_W, stride_w, 0);
@@ -1286,16 +1571,21 @@ struct DepthwiseConv2DOptionsBuilder {
     fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_STRIDE_H, stride_h, 0);
   }
   void add_depth_multiplier(int32_t depth_multiplier) {
-    fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DEPTH_MULTIPLIER, depth_multiplier, 0);
+    fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DEPTH_MULTIPLIER,
+                             depth_multiplier, 0);
   }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  void add_fused_activation_function(
+      ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(
+        DepthwiseConv2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
+        static_cast<int8_t>(fused_activation_function), 0);
   }
   explicit DepthwiseConv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+      : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
-  DepthwiseConv2DOptionsBuilder &operator=(const DepthwiseConv2DOptionsBuilder &);
+  DepthwiseConv2DOptionsBuilder &operator=(
+      const DepthwiseConv2DOptionsBuilder &);
   flatbuffers::Offset<DepthwiseConv2DOptions> Finish() {
     const auto end = fbb_.EndTable(start_);
     auto o = flatbuffers::Offset<DepthwiseConv2DOptions>(end);
@@ -1304,12 +1594,10 @@ struct DepthwiseConv2DOptionsBuilder {
 };
 
 inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    Padding padding = Padding_SAME,
-    int32_t stride_w = 0,
-    int32_t stride_h = 0,
-    int32_t depth_multiplier = 0,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) {
+    flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
+    int32_t stride_w = 0, int32_t stride_h = 0, int32_t depth_multiplier = 0,
+    ActivationFunctionType fused_activation_function =
+        ActivationFunctionType_NONE) {
   DepthwiseConv2DOptionsBuilder builder_(_fbb);
   builder_.add_depth_multiplier(depth_multiplier);
   builder_.add_stride_h(stride_h);
@@ -1319,33 +1607,34 @@ inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
   return builder_.Finish();
 }
 
-flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
 struct ConcatEmbeddingsOptionsT : public flatbuffers::NativeTable {
   typedef ConcatEmbeddingsOptions TableType;
   int32_t num_channels;
   std::vector<int32_t> num_columns_per_channel;
   std::vector<int32_t> embedding_dim_per_channel;
-  ConcatEmbeddingsOptionsT()
-      : num_channels(0) {
-  }
+  ConcatEmbeddingsOptionsT() : num_channels(0) {}
 };
 
-struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS
+    : private flatbuffers::Table {
   typedef ConcatEmbeddingsOptionsT NativeTableType;
   enum {
     VT_NUM_CHANNELS = 4,
     VT_NUM_COLUMNS_PER_CHANNEL = 6,
     VT_EMBEDDING_DIM_PER_CHANNEL = 8
   };
-  int32_t num_channels() const {
-    return GetField<int32_t>(VT_NUM_CHANNELS, 0);
-  }
+  int32_t num_channels() const { return GetField<int32_t>(VT_NUM_CHANNELS, 0); }
   const flatbuffers::Vector<int32_t> *num_columns_per_channel() const {
-    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_NUM_COLUMNS_PER_CHANNEL);
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(
+        VT_NUM_COLUMNS_PER_CHANNEL);
   }
   const flatbuffers::Vector<int32_t> *embedding_dim_per_channel() const {
-    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_EMBEDDING_DIM_PER_CHANNEL);
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(
+        VT_EMBEDDING_DIM_PER_CHANNEL);
   }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
@@ -1353,31 +1642,43 @@ struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Ta
            VerifyOffset(verifier, VT_NUM_COLUMNS_PER_CHANNEL) &&
            verifier.Verify(num_columns_per_channel()) &&
            VerifyOffset(verifier, VT_EMBEDDING_DIM_PER_CHANNEL) &&
-           verifier.Verify(embedding_dim_per_channel()) &&
-           verifier.EndTable();
-  }
-  ConcatEmbeddingsOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(ConcatEmbeddingsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<ConcatEmbeddingsOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+           verifier.Verify(embedding_dim_per_channel()) && verifier.EndTable();
+  }
+  ConcatEmbeddingsOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      ConcatEmbeddingsOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ConcatEmbeddingsOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
 struct ConcatEmbeddingsOptionsBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_num_channels(int32_t num_channels) {
-    fbb_.AddElement<int32_t>(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0);
+    fbb_.AddElement<int32_t>(ConcatEmbeddingsOptions::VT_NUM_CHANNELS,
+                             num_channels, 0);
   }
-  void add_num_columns_per_channel(flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel) {
-    fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, num_columns_per_channel);
+  void add_num_columns_per_channel(
+      flatbuffers::Offset<flatbuffers::Vector<int32_t>>
+          num_columns_per_channel) {
+    fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL,
+                   num_columns_per_channel);
   }
-  void add_embedding_dim_per_channel(flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel) {
-    fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL, embedding_dim_per_channel);
+  void add_embedding_dim_per_channel(
+      flatbuffers::Offset<flatbuffers::Vector<int32_t>>
+          embedding_dim_per_channel) {
+    fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL,
+                   embedding_dim_per_channel);
   }
   explicit ConcatEmbeddingsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+      : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
-  ConcatEmbeddingsOptionsBuilder &operator=(const ConcatEmbeddingsOptionsBuilder &);
+  ConcatEmbeddingsOptionsBuilder &operator=(
+      const ConcatEmbeddingsOptionsBuilder &);
   flatbuffers::Offset<ConcatEmbeddingsOptions> Finish() {
     const auto end = fbb_.EndTable(start_);
     auto o = flatbuffers::Offset<ConcatEmbeddingsOptions>(end);
@@ -1385,11 +1686,13 @@ struct ConcatEmbeddingsOptionsBuilder {
   }
 };
 
-inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t num_channels = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0) {
+inline flatbuffers::Offset<ConcatEmbeddingsOptions>
+CreateConcatEmbeddingsOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                              int32_t num_channels = 0,
+                              flatbuffers::Offset<flatbuffers::Vector<int32_t>>
+                                  num_columns_per_channel = 0,
+                              flatbuffers::Offset<flatbuffers::Vector<int32_t>>
+                                  embedding_dim_per_channel = 0) {
   ConcatEmbeddingsOptionsBuilder builder_(_fbb);
   builder_.add_embedding_dim_per_channel(embedding_dim_per_channel);
   builder_.add_num_columns_per_channel(num_columns_per_channel);
@@ -1397,54 +1700,61 @@ inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOption
   return builder_.Finish();
 }
 
-inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptionsDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t num_channels = 0,
+inline flatbuffers::Offset<ConcatEmbeddingsOptions>
+CreateConcatEmbeddingsOptionsDirect(
+    flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
     const std::vector<int32_t> *num_columns_per_channel = nullptr,
     const std::vector<int32_t> *embedding_dim_per_channel = nullptr) {
   return tflite::CreateConcatEmbeddingsOptions(
-      _fbb,
-      num_channels,
-      num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0,
-      embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0);
+      _fbb, num_channels,
+      num_columns_per_channel
+          ? _fbb.CreateVector<int32_t>(*num_columns_per_channel)
+          : 0,
+      embedding_dim_per_channel
+          ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel)
+          : 0);
 }
 
-flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions(flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
 struct LSHProjectionOptionsT : public flatbuffers::NativeTable {
   typedef LSHProjectionOptions TableType;
   LSHProjectionType type;
-  LSHProjectionOptionsT()
-      : type(LSHProjectionType_UNKNOWN) {
-  }
+  LSHProjectionOptionsT() : type(LSHProjectionType_UNKNOWN) {}
 };
 
-struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS
+    : private flatbuffers::Table {
   typedef LSHProjectionOptionsT NativeTableType;
-  enum {
-    VT_TYPE = 4
-  };
+  enum { VT_TYPE = 4 };
   LSHProjectionType type() const {
     return static_cast<LSHProjectionType>(GetField<int8_t>(VT_TYPE, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_TYPE) &&
-           verifier.EndTable();
-  }
-  LSHProjectionOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(LSHProjectionOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<LSHProjectionOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+           VerifyField<int8_t>(verifier, VT_TYPE) && verifier.EndTable();
+  }
+  LSHProjectionOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      LSHProjectionOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<LSHProjectionOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
 struct LSHProjectionOptionsBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_type(LSHProjectionType type) {
-    fbb_.AddElement<int8_t>(LSHProjectionOptions::VT_TYPE, static_cast<int8_t>(type), 0);
+    fbb_.AddElement<int8_t>(LSHProjectionOptions::VT_TYPE,
+                            static_cast<int8_t>(type), 0);
   }
   explicit LSHProjectionOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+      : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
   LSHProjectionOptionsBuilder &operator=(const LSHProjectionOptionsBuilder &);
@@ -1463,29 +1773,25 @@ inline flatbuffers::Offset<LSHProjectionOptions> CreateLSHProjectionOptions(
   return builder_.Finish();
 }
 
-flatbuffers::Offset<LSHProjectionOptions> CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<LSHProjectionOptions> CreateLSHProjectionOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
 struct SVDFOptionsT : public flatbuffers::NativeTable {
   typedef SVDFOptions TableType;
   int32_t rank;
   ActivationFunctionType fused_activation_function;
   SVDFOptionsT()
-      : rank(0),
-        fused_activation_function(ActivationFunctionType_NONE) {
-  }
+      : rank(0), fused_activation_function(ActivationFunctionType_NONE) {}
 };
 
 struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   typedef SVDFOptionsT NativeTableType;
-  enum {
-    VT_RANK = 4,
-    VT_FUSED_ACTIVATION_FUNCTION = 6
-  };
-  int32_t rank() const {
-    return GetField<int32_t>(VT_RANK, 0);
-  }
+  enum { VT_RANK = 4, VT_FUSED_ACTIVATION_FUNCTION = 6 };
+  int32_t rank() const { return GetField<int32_t>(VT_RANK, 0); }
   ActivationFunctionType fused_activation_function() const {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<ActivationFunctionType>(
+        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
@@ -1493,9 +1799,14 @@ struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
            VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
            verifier.EndTable();
   }
-  SVDFOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(SVDFOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<SVDFOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+  SVDFOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      SVDFOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SVDFOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
 struct SVDFOptionsBuilder {
@@ -1504,11 +1815,13 @@ struct SVDFOptionsBuilder {
   void add_rank(int32_t rank) {
     fbb_.AddElement<int32_t>(SVDFOptions::VT_RANK, rank, 0);
   }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(SVDFOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  void add_fused_activation_function(
+      ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(SVDFOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
   }
   explicit SVDFOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+      : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
   SVDFOptionsBuilder &operator=(const SVDFOptionsBuilder &);
@@ -1520,51 +1833,57 @@ struct SVDFOptionsBuilder {
 };
 
 inline flatbuffers::Offset<SVDFOptions> CreateSVDFOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t rank = 0,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) {
+    flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0,
+    ActivationFunctionType fused_activation_function =
+        ActivationFunctionType_NONE) {
   SVDFOptionsBuilder builder_(_fbb);
   builder_.add_rank(rank);
   builder_.add_fused_activation_function(fused_activation_function);
   return builder_.Finish();
 }
 
-flatbuffers::Offset<SVDFOptions> CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<SVDFOptions> CreateSVDFOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
 struct RNNOptionsT : public flatbuffers::NativeTable {
   typedef RNNOptions TableType;
   ActivationFunctionType fused_activation_function;
-  RNNOptionsT()
-      : fused_activation_function(ActivationFunctionType_NONE) {
-  }
+  RNNOptionsT() : fused_activation_function(ActivationFunctionType_NONE) {}
 };
 
 struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   typedef RNNOptionsT NativeTableType;
-  enum {
-    VT_FUSED_ACTIVATION_FUNCTION = 4
-  };
+  enum { VT_FUSED_ACTIVATION_FUNCTION = 4 };
   ActivationFunctionType fused_activation_function() const {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<ActivationFunctionType>(
+        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
            verifier.EndTable();
   }
-  RNNOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(RNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<RNNOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+  RNNOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      RNNOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<RNNOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
 struct RNNOptionsBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(RNNOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  void add_fused_activation_function(
+      ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(RNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
   }
   explicit RNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+      : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
   RNNOptionsBuilder &operator=(const RNNOptionsBuilder &);
@@ -1577,48 +1896,128 @@ struct RNNOptionsBuilder {
 
 inline flatbuffers::Offset<RNNOptions> CreateRNNOptions(
     flatbuffers::FlatBufferBuilder &_fbb,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) {
+    ActivationFunctionType fused_activation_function =
+        ActivationFunctionType_NONE) {
   RNNOptionsBuilder builder_(_fbb);
   builder_.add_fused_activation_function(fused_activation_function);
   return builder_.Finish();
 }
 
-flatbuffers::Offset<RNNOptions> CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<RNNOptions> CreateRNNOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SequenceRNNOptionsT : public flatbuffers::NativeTable {
+  typedef SequenceRNNOptions TableType;
+  bool time_major;
+  ActivationFunctionType fused_activation_function;
+  SequenceRNNOptionsT()
+      : time_major(false),
+        fused_activation_function(ActivationFunctionType_NONE) {}
+};
+
+struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SequenceRNNOptionsT NativeTableType;
+  enum { VT_TIME_MAJOR = 4, VT_FUSED_ACTIVATION_FUNCTION = 6 };
+  bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; }
+  ActivationFunctionType fused_activation_function() const {
+    return static_cast<ActivationFunctionType>(
+        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           verifier.EndTable();
+  }
+  SequenceRNNOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      SequenceRNNOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SequenceRNNOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SequenceRNNOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_time_major(bool time_major) {
+    fbb_.AddElement<uint8_t>(SequenceRNNOptions::VT_TIME_MAJOR,
+                             static_cast<uint8_t>(time_major), 0);
+  }
+  void add_fused_activation_function(
+      ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(SequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit SequenceRNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  SequenceRNNOptionsBuilder &operator=(const SequenceRNNOptionsBuilder &);
+  flatbuffers::Offset<SequenceRNNOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SequenceRNNOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
+    ActivationFunctionType fused_activation_function =
+        ActivationFunctionType_NONE) {
+  SequenceRNNOptionsBuilder builder_(_fbb);
+  builder_.add_fused_activation_function(fused_activation_function);
+  builder_.add_time_major(time_major);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
 struct FullyConnectedOptionsT : public flatbuffers::NativeTable {
   typedef FullyConnectedOptions TableType;
   ActivationFunctionType fused_activation_function;
   FullyConnectedOptionsT()
-      : fused_activation_function(ActivationFunctionType_NONE) {
-  }
+      : fused_activation_function(ActivationFunctionType_NONE) {}
 };
 
-struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS
+    : private flatbuffers::Table {
   typedef FullyConnectedOptionsT NativeTableType;
-  enum {
-    VT_FUSED_ACTIVATION_FUNCTION = 4
-  };
+  enum { VT_FUSED_ACTIVATION_FUNCTION = 4 };
   ActivationFunctionType fused_activation_function() const {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<ActivationFunctionType>(
+        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
            verifier.EndTable();
   }
-  FullyConnectedOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(FullyConnectedOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<FullyConnectedOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+  FullyConnectedOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      FullyConnectedOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<FullyConnectedOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
 struct FullyConnectedOptionsBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  void add_fused_activation_function(
+      ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
   }
   explicit FullyConnectedOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+      : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
   FullyConnectedOptionsBuilder &operator=(const FullyConnectedOptionsBuilder &);
@@ -1631,38 +2030,39 @@ struct FullyConnectedOptionsBuilder {
 
 inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
     flatbuffers::FlatBufferBuilder &_fbb,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) {
+    ActivationFunctionType fused_activation_function =
+        ActivationFunctionType_NONE) {
   FullyConnectedOptionsBuilder builder_(_fbb);
   builder_.add_fused_activation_function(fused_activation_function);
   return builder_.Finish();
 }
 
-flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
 struct SoftmaxOptionsT : public flatbuffers::NativeTable {
   typedef SoftmaxOptions TableType;
   float beta;
-  SoftmaxOptionsT()
-      : beta(0.0f) {
-  }
+  SoftmaxOptionsT() : beta(0.0f) {}
 };
 
 struct SoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   typedef SoftmaxOptionsT NativeTableType;
-  enum {
-    VT_BETA = 4
-  };
-  float beta() const {
-    return GetField<float>(VT_BETA, 0.0f);
-  }
+  enum { VT_BETA = 4 };
+  float beta() const { return GetField<float>(VT_BETA, 0.0f); }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
-           VerifyField<float>(verifier, VT_BETA) &&
-           verifier.EndTable();
-  }
-  SoftmaxOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(SoftmaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<SoftmaxOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+           VerifyField<float>(verifier, VT_BETA) && verifier.EndTable();
+  }
+  SoftmaxOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      SoftmaxOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SoftmaxOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
 struct SoftmaxOptionsBuilder {
@@ -1672,7 +2072,7 @@ struct SoftmaxOptionsBuilder {
     fbb_.AddElement<float>(SoftmaxOptions::VT_BETA, beta, 0.0f);
   }
   explicit SoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+      : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
   SoftmaxOptionsBuilder &operator=(const SoftmaxOptionsBuilder &);
@@ -1684,36 +2084,32 @@ struct SoftmaxOptionsBuilder {
 };
 
 inline flatbuffers::Offset<SoftmaxOptions> CreateSoftmaxOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    float beta = 0.0f) {
+    flatbuffers::FlatBufferBuilder &_fbb, float beta = 0.0f) {
   SoftmaxOptionsBuilder builder_(_fbb);
   builder_.add_beta(beta);
   return builder_.Finish();
 }
 
-flatbuffers::Offset<SoftmaxOptions> CreateSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<SoftmaxOptions> CreateSoftmaxOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
 struct ConcatenationOptionsT : public flatbuffers::NativeTable {
   typedef ConcatenationOptions TableType;
   int32_t axis;
   ActivationFunctionType fused_activation_function;
   ConcatenationOptionsT()
-      : axis(0),
-        fused_activation_function(ActivationFunctionType_NONE) {
-  }
+      : axis(0), fused_activation_function(ActivationFunctionType_NONE) {}
 };
 
-struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS
+    : private flatbuffers::Table {
   typedef ConcatenationOptionsT NativeTableType;
-  enum {
-    VT_AXIS = 4,
-    VT_FUSED_ACTIVATION_FUNCTION = 6
-  };
-  int32_t axis() const {
-    return GetField<int32_t>(VT_AXIS, 0);
-  }
+  enum { VT_AXIS = 4, VT_FUSED_ACTIVATION_FUNCTION = 6 };
+  int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
   ActivationFunctionType fused_activation_function() const {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<ActivationFunctionType>(
+        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
@@ -1721,9 +2117,14 @@ struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
            VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
            verifier.EndTable();
   }
-  ConcatenationOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(ConcatenationOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<ConcatenationOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+  ConcatenationOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      ConcatenationOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ConcatenationOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
 struct ConcatenationOptionsBuilder {
@@ -1732,11 +2133,13 @@ struct ConcatenationOptionsBuilder {
   void add_axis(int32_t axis) {
     fbb_.AddElement<int32_t>(ConcatenationOptions::VT_AXIS, axis, 0);
   }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(ConcatenationOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  void add_fused_activation_function(
+      ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(ConcatenationOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
   }
   explicit ConcatenationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+      : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
   ConcatenationOptionsBuilder &operator=(const ConcatenationOptionsBuilder &);
@@ -1748,51 +2151,57 @@ struct ConcatenationOptionsBuilder {
 };
 
 inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t axis = 0,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) {
+    flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
+    ActivationFunctionType fused_activation_function =
+        ActivationFunctionType_NONE) {
   ConcatenationOptionsBuilder builder_(_fbb);
   builder_.add_axis(axis);
   builder_.add_fused_activation_function(fused_activation_function);
   return builder_.Finish();
 }
 
-flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
 struct AddOptionsT : public flatbuffers::NativeTable {
   typedef AddOptions TableType;
   ActivationFunctionType fused_activation_function;
-  AddOptionsT()
-      : fused_activation_function(ActivationFunctionType_NONE) {
-  }
+  AddOptionsT() : fused_activation_function(ActivationFunctionType_NONE) {}
 };
 
 struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   typedef AddOptionsT NativeTableType;
-  enum {
-    VT_FUSED_ACTIVATION_FUNCTION = 4
-  };
+  enum { VT_FUSED_ACTIVATION_FUNCTION = 4 };
   ActivationFunctionType fused_activation_function() const {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<ActivationFunctionType>(
+        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
            verifier.EndTable();
   }
-  AddOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(AddOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<AddOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+  AddOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      AddOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<AddOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
 struct AddOptionsBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(AddOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  void add_fused_activation_function(
+      ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(AddOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
   }
   explicit AddOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+      : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
   AddOptionsBuilder &operator=(const AddOptionsBuilder &);
@@ -1805,48 +2214,55 @@ struct AddOptionsBuilder {
 
 inline flatbuffers::Offset<AddOptions> CreateAddOptions(
     flatbuffers::FlatBufferBuilder &_fbb,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) {
+    ActivationFunctionType fused_activation_function =
+        ActivationFunctionType_NONE) {
   AddOptionsBuilder builder_(_fbb);
   builder_.add_fused_activation_function(fused_activation_function);
   return builder_.Finish();
 }
 
-flatbuffers::Offset<AddOptions> CreateAddOptions(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<AddOptions> CreateAddOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
 struct MulOptionsT : public flatbuffers::NativeTable {
   typedef MulOptions TableType;
   ActivationFunctionType fused_activation_function;
-  MulOptionsT()
-      : fused_activation_function(ActivationFunctionType_NONE) {
-  }
+  MulOptionsT() : fused_activation_function(ActivationFunctionType_NONE) {}
 };
 
 struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   typedef MulOptionsT NativeTableType;
-  enum {
-    VT_FUSED_ACTIVATION_FUNCTION = 4
-  };
+  enum { VT_FUSED_ACTIVATION_FUNCTION = 4 };
   ActivationFunctionType fused_activation_function() const {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<ActivationFunctionType>(
+        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
            verifier.EndTable();
   }
-  MulOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(MulOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<MulOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+  MulOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      MulOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<MulOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
 struct MulOptionsBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(MulOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  void add_fused_activation_function(
+      ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(MulOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
   }
   explicit MulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+      : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
   MulOptionsBuilder &operator=(const MulOptionsBuilder &);
@@ -1859,48 +2275,55 @@ struct MulOptionsBuilder {
 
 inline flatbuffers::Offset<MulOptions> CreateMulOptions(
     flatbuffers::FlatBufferBuilder &_fbb,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) {
+    ActivationFunctionType fused_activation_function =
+        ActivationFunctionType_NONE) {
   MulOptionsBuilder builder_(_fbb);
   builder_.add_fused_activation_function(fused_activation_function);
   return builder_.Finish();
 }
 
-flatbuffers::Offset<MulOptions> CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<MulOptions> CreateMulOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
 struct L2NormOptionsT : public flatbuffers::NativeTable {
   typedef L2NormOptions TableType;
   ActivationFunctionType fused_activation_function;
-  L2NormOptionsT()
-      : fused_activation_function(ActivationFunctionType_NONE) {
-  }
+  L2NormOptionsT() : fused_activation_function(ActivationFunctionType_NONE) {}
 };
 
 struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   typedef L2NormOptionsT NativeTableType;
-  enum {
-    VT_FUSED_ACTIVATION_FUNCTION = 4
-  };
+  enum { VT_FUSED_ACTIVATION_FUNCTION = 4 };
   ActivationFunctionType fused_activation_function() const {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<ActivationFunctionType>(
+        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
            verifier.EndTable();
   }
-  L2NormOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(L2NormOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<L2NormOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+  L2NormOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      L2NormOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<L2NormOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
 struct L2NormOptionsBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(L2NormOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  void add_fused_activation_function(
+      ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(L2NormOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
   }
   explicit L2NormOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+      : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
   L2NormOptionsBuilder &operator=(const L2NormOptionsBuilder &);
@@ -1913,13 +2336,16 @@ struct L2NormOptionsBuilder {
 
 inline flatbuffers::Offset<L2NormOptions> CreateL2NormOptions(
     flatbuffers::FlatBufferBuilder &_fbb,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) {
+    ActivationFunctionType fused_activation_function =
+        ActivationFunctionType_NONE) {
   L2NormOptionsBuilder builder_(_fbb);
   builder_.add_fused_activation_function(fused_activation_function);
   return builder_.Finish();
 }
 
-flatbuffers::Offset<L2NormOptions> CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<L2NormOptions> CreateL2NormOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
 struct LocalResponseNormalizationOptionsT : public flatbuffers::NativeTable {
   typedef LocalResponseNormalizationOptions TableType;
@@ -1928,66 +2354,61 @@ struct LocalResponseNormalizationOptionsT : public flatbuffers::NativeTable {
   float alpha;
   float beta;
   LocalResponseNormalizationOptionsT()
-      : radius(0),
-        bias(0.0f),
-        alpha(0.0f),
-        beta(0.0f) {
-  }
+      : radius(0), bias(0.0f), alpha(0.0f), beta(0.0f) {}
 };
 
-struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS
+    : private flatbuffers::Table {
   typedef LocalResponseNormalizationOptionsT NativeTableType;
-  enum {
-    VT_RADIUS = 4,
-    VT_BIAS = 6,
-    VT_ALPHA = 8,
-    VT_BETA = 10
-  };
-  int32_t radius() const {
-    return GetField<int32_t>(VT_RADIUS, 0);
-  }
-  float bias() const {
-    return GetField<float>(VT_BIAS, 0.0f);
-  }
-  float alpha() const {
-    return GetField<float>(VT_ALPHA, 0.0f);
-  }
-  float beta() const {
-    return GetField<float>(VT_BETA, 0.0f);
-  }
+  enum { VT_RADIUS = 4, VT_BIAS = 6, VT_ALPHA = 8, VT_BETA = 10 };
+  int32_t radius() const { return GetField<int32_t>(VT_RADIUS, 0); }
+  float bias() const { return GetField<float>(VT_BIAS, 0.0f); }
+  float alpha() const { return GetField<float>(VT_ALPHA, 0.0f); }
+  float beta() const { return GetField<float>(VT_BETA, 0.0f); }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyField<int32_t>(verifier, VT_RADIUS) &&
            VerifyField<float>(verifier, VT_BIAS) &&
            VerifyField<float>(verifier, VT_ALPHA) &&
-           VerifyField<float>(verifier, VT_BETA) &&
-           verifier.EndTable();
-  }
-  LocalResponseNormalizationOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(LocalResponseNormalizationOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<LocalResponseNormalizationOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const LocalResponseNormalizationOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+           VerifyField<float>(verifier, VT_BETA) && verifier.EndTable();
+  }
+  LocalResponseNormalizationOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      LocalResponseNormalizationOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<LocalResponseNormalizationOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb,
+      const LocalResponseNormalizationOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
 struct LocalResponseNormalizationOptionsBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_radius(int32_t radius) {
-    fbb_.AddElement<int32_t>(LocalResponseNormalizationOptions::VT_RADIUS, radius, 0);
+    fbb_.AddElement<int32_t>(LocalResponseNormalizationOptions::VT_RADIUS,
+                             radius, 0);
   }
   void add_bias(float bias) {
-    fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BIAS, bias, 0.0f);
+    fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BIAS, bias,
+                           0.0f);
   }
   void add_alpha(float alpha) {
-    fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_ALPHA, alpha, 0.0f);
+    fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_ALPHA, alpha,
+                           0.0f);
   }
   void add_beta(float beta) {
-    fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0f);
+    fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BETA, beta,
+                           0.0f);
   }
-  explicit LocalResponseNormalizationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+  explicit LocalResponseNormalizationOptionsBuilder(
+      flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
-  LocalResponseNormalizationOptionsBuilder &operator=(const LocalResponseNormalizationOptionsBuilder &);
+  LocalResponseNormalizationOptionsBuilder &operator=(
+      const LocalResponseNormalizationOptionsBuilder &);
   flatbuffers::Offset<LocalResponseNormalizationOptions> Finish() {
     const auto end = fbb_.EndTable(start_);
     auto o = flatbuffers::Offset<LocalResponseNormalizationOptions>(end);
@@ -1995,12 +2416,10 @@ struct LocalResponseNormalizationOptionsBuilder {
   }
 };
 
-inline flatbuffers::Offset<LocalResponseNormalizationOptions> CreateLocalResponseNormalizationOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t radius = 0,
-    float bias = 0.0f,
-    float alpha = 0.0f,
-    float beta = 0.0f) {
+inline flatbuffers::Offset<LocalResponseNormalizationOptions>
+CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                        int32_t radius = 0, float bias = 0.0f,
+                                        float alpha = 0.0f, float beta = 0.0f) {
   LocalResponseNormalizationOptionsBuilder builder_(_fbb);
   builder_.add_beta(beta);
   builder_.add_alpha(alpha);
@@ -2009,7 +2428,11 @@ inline flatbuffers::Offset<LocalResponseNormalizationOptions> CreateLocalRespons
   return builder_.Finish();
 }
 
-flatbuffers::Offset<LocalResponseNormalizationOptions> CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb, const LocalResponseNormalizationOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<LocalResponseNormalizationOptions>
+CreateLocalResponseNormalizationOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const LocalResponseNormalizationOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
 struct LSTMOptionsT : public flatbuffers::NativeTable {
   typedef LSTMOptions TableType;
@@ -2019,43 +2442,41 @@ struct LSTMOptionsT : public flatbuffers::NativeTable {
   LSTMOptionsT()
       : fused_activation_function(ActivationFunctionType_NONE),
         cell_clip(0.0f),
-        proj_clip(0.0f) {
-  }
+        proj_clip(0.0f) {}
 };
 
 struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   typedef LSTMOptionsT NativeTableType;
-  enum {
-    VT_FUSED_ACTIVATION_FUNCTION = 4,
-    VT_CELL_CLIP = 6,
-    VT_PROJ_CLIP = 8
-  };
+  enum { VT_FUSED_ACTIVATION_FUNCTION = 4, VT_CELL_CLIP = 6, VT_PROJ_CLIP = 8 };
   ActivationFunctionType fused_activation_function() const {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
-  }
-  float cell_clip() const {
-    return GetField<float>(VT_CELL_CLIP, 0.0f);
-  }
-  float proj_clip() const {
-    return GetField<float>(VT_PROJ_CLIP, 0.0f);
+    return static_cast<ActivationFunctionType>(
+        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
+  float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
+  float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
            VerifyField<float>(verifier, VT_CELL_CLIP) &&
-           VerifyField<float>(verifier, VT_PROJ_CLIP) &&
-           verifier.EndTable();
-  }
-  LSTMOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(LSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<LSTMOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+           VerifyField<float>(verifier, VT_PROJ_CLIP) && verifier.EndTable();
+  }
+  LSTMOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      LSTMOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<LSTMOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
 struct LSTMOptionsBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(LSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  void add_fused_activation_function(
+      ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(LSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
   }
   void add_cell_clip(float cell_clip) {
     fbb_.AddElement<float>(LSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
@@ -2064,7 +2485,7 @@ struct LSTMOptionsBuilder {
     fbb_.AddElement<float>(LSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
   }
   explicit LSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+      : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
   LSTMOptionsBuilder &operator=(const LSTMOptionsBuilder &);
@@ -2077,9 +2498,9 @@ struct LSTMOptionsBuilder {
 
 inline flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(
     flatbuffers::FlatBufferBuilder &_fbb,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
-    float cell_clip = 0.0f,
-    float proj_clip = 0.0f) {
+    ActivationFunctionType fused_activation_function =
+        ActivationFunctionType_NONE,
+    float cell_clip = 0.0f, float proj_clip = 0.0f) {
   LSTMOptionsBuilder builder_(_fbb);
   builder_.add_proj_clip(proj_clip);
   builder_.add_cell_clip(cell_clip);
@@ -2087,52 +2508,50 @@ inline flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(
   return builder_.Finish();
 }
 
-flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
 struct ResizeBilinearOptionsT : public flatbuffers::NativeTable {
   typedef ResizeBilinearOptions TableType;
   int32_t new_height;
   int32_t new_width;
-  ResizeBilinearOptionsT()
-      : new_height(0),
-        new_width(0) {
-  }
+  ResizeBilinearOptionsT() : new_height(0), new_width(0) {}
 };
 
-struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS
+    : private flatbuffers::Table {
   typedef ResizeBilinearOptionsT NativeTableType;
-  enum {
-    VT_NEW_HEIGHT = 4,
-    VT_NEW_WIDTH = 6
-  };
-  int32_t new_height() const {
-    return GetField<int32_t>(VT_NEW_HEIGHT, 0);
-  }
-  int32_t new_width() const {
-    return GetField<int32_t>(VT_NEW_WIDTH, 0);
-  }
+  enum { VT_NEW_HEIGHT = 4, VT_NEW_WIDTH = 6 };
+  int32_t new_height() const { return GetField<int32_t>(VT_NEW_HEIGHT, 0); }
+  int32_t new_width() const { return GetField<int32_t>(VT_NEW_WIDTH, 0); }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyField<int32_t>(verifier, VT_NEW_HEIGHT) &&
-           VerifyField<int32_t>(verifier, VT_NEW_WIDTH) &&
-           verifier.EndTable();
-  }
-  ResizeBilinearOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(ResizeBilinearOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<ResizeBilinearOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+           VerifyField<int32_t>(verifier, VT_NEW_WIDTH) && verifier.EndTable();
+  }
+  ResizeBilinearOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      ResizeBilinearOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ResizeBilinearOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
 struct ResizeBilinearOptionsBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_new_height(int32_t new_height) {
-    fbb_.AddElement<int32_t>(ResizeBilinearOptions::VT_NEW_HEIGHT, new_height, 0);
+    fbb_.AddElement<int32_t>(ResizeBilinearOptions::VT_NEW_HEIGHT, new_height,
+                             0);
   }
   void add_new_width(int32_t new_width) {
     fbb_.AddElement<int32_t>(ResizeBilinearOptions::VT_NEW_WIDTH, new_width, 0);
   }
   explicit ResizeBilinearOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+      : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
   ResizeBilinearOptionsBuilder &operator=(const ResizeBilinearOptionsBuilder &);
@@ -2144,8 +2563,7 @@ struct ResizeBilinearOptionsBuilder {
 };
 
 inline flatbuffers::Offset<ResizeBilinearOptions> CreateResizeBilinearOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t new_height = 0,
+    flatbuffers::FlatBufferBuilder &_fbb, int32_t new_height = 0,
     int32_t new_width = 0) {
   ResizeBilinearOptionsBuilder builder_(_fbb);
   builder_.add_new_width(new_width);
@@ -2153,32 +2571,32 @@ inline flatbuffers::Offset<ResizeBilinearOptions> CreateResizeBilinearOptions(
   return builder_.Finish();
 }
 
-flatbuffers::Offset<ResizeBilinearOptions> CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<ResizeBilinearOptions> CreateResizeBilinearOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
 struct CallOptionsT : public flatbuffers::NativeTable {
   typedef CallOptions TableType;
   uint32_t subgraph;
-  CallOptionsT()
-      : subgraph(0) {
-  }
+  CallOptionsT() : subgraph(0) {}
 };
 
 struct CallOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   typedef CallOptionsT NativeTableType;
-  enum {
-    VT_SUBGRAPH = 4
-  };
-  uint32_t subgraph() const {
-    return GetField<uint32_t>(VT_SUBGRAPH, 0);
-  }
+  enum { VT_SUBGRAPH = 4 };
+  uint32_t subgraph() const { return GetField<uint32_t>(VT_SUBGRAPH, 0); }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
-           VerifyField<uint32_t>(verifier, VT_SUBGRAPH) &&
-           verifier.EndTable();
-  }
-  CallOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(CallOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<CallOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+           VerifyField<uint32_t>(verifier, VT_SUBGRAPH) && verifier.EndTable();
+  }
+  CallOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      CallOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<CallOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
 struct CallOptionsBuilder {
@@ -2188,7 +2606,7 @@ struct CallOptionsBuilder {
     fbb_.AddElement<uint32_t>(CallOptions::VT_SUBGRAPH, subgraph, 0);
   }
   explicit CallOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+      : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
   CallOptionsBuilder &operator=(const CallOptionsBuilder &);
@@ -2200,49 +2618,130 @@ struct CallOptionsBuilder {
 };
 
 inline flatbuffers::Offset<CallOptions> CreateCallOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    uint32_t subgraph = 0) {
+    flatbuffers::FlatBufferBuilder &_fbb, uint32_t subgraph = 0) {
   CallOptionsBuilder builder_(_fbb);
   builder_.add_subgraph(subgraph);
   return builder_.Finish();
 }
 
-flatbuffers::Offset<CallOptions> CreateCallOptions(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<CallOptions> CreateCallOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct PadOptionsT : public flatbuffers::NativeTable {
+  typedef PadOptions TableType;
+  std::vector<int32_t> before_padding;
+  std::vector<int32_t> after_padding;
+  PadOptionsT() {}
+};
+
+struct PadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef PadOptionsT NativeTableType;
+  enum { VT_BEFORE_PADDING = 4, VT_AFTER_PADDING = 6 };
+  const flatbuffers::Vector<int32_t> *before_padding() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_BEFORE_PADDING);
+  }
+  const flatbuffers::Vector<int32_t> *after_padding() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_AFTER_PADDING);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_BEFORE_PADDING) &&
+           verifier.Verify(before_padding()) &&
+           VerifyOffset(verifier, VT_AFTER_PADDING) &&
+           verifier.Verify(after_padding()) && verifier.EndTable();
+  }
+  PadOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      PadOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<PadOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct PadOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_before_padding(
+      flatbuffers::Offset<flatbuffers::Vector<int32_t>> before_padding) {
+    fbb_.AddOffset(PadOptions::VT_BEFORE_PADDING, before_padding);
+  }
+  void add_after_padding(
+      flatbuffers::Offset<flatbuffers::Vector<int32_t>> after_padding) {
+    fbb_.AddOffset(PadOptions::VT_AFTER_PADDING, after_padding);
+  }
+  explicit PadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  PadOptionsBuilder &operator=(const PadOptionsBuilder &);
+  flatbuffers::Offset<PadOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<PadOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<PadOptions> CreatePadOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> before_padding = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> after_padding = 0) {
+  PadOptionsBuilder builder_(_fbb);
+  builder_.add_after_padding(after_padding);
+  builder_.add_before_padding(before_padding);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<PadOptions> CreatePadOptionsDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<int32_t> *before_padding = nullptr,
+    const std::vector<int32_t> *after_padding = nullptr) {
+  return tflite::CreatePadOptions(
+      _fbb, before_padding ? _fbb.CreateVector<int32_t>(*before_padding) : 0,
+      after_padding ? _fbb.CreateVector<int32_t>(*after_padding) : 0);
+}
+
+flatbuffers::Offset<PadOptions> CreatePadOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
 struct ReshapeOptionsT : public flatbuffers::NativeTable {
   typedef ReshapeOptions TableType;
   std::vector<int32_t> new_shape;
-  ReshapeOptionsT() {
-  }
+  ReshapeOptionsT() {}
 };
 
 struct ReshapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   typedef ReshapeOptionsT NativeTableType;
-  enum {
-    VT_NEW_SHAPE = 4
-  };
+  enum { VT_NEW_SHAPE = 4 };
   const flatbuffers::Vector<int32_t> *new_shape() const {
     return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_NEW_SHAPE);
   }
   bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_NEW_SHAPE) &&
-           verifier.Verify(new_shape()) &&
-           verifier.EndTable();
-  }
-  ReshapeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(ReshapeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<ReshapeOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NEW_SHAPE) &&
+           verifier.Verify(new_shape()) && verifier.EndTable();
+  }
+  ReshapeOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      ReshapeOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ReshapeOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
 struct ReshapeOptionsBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_new_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape) {
+  void add_new_shape(
+      flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape) {
     fbb_.AddOffset(ReshapeOptions::VT_NEW_SHAPE, new_shape);
   }
   explicit ReshapeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+      : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
   ReshapeOptionsBuilder &operator=(const ReshapeOptionsBuilder &);
@@ -2265,114 +2764,302 @@ inline flatbuffers::Offset<ReshapeOptions> CreateReshapeOptionsDirect(
     flatbuffers::FlatBufferBuilder &_fbb,
     const std::vector<int32_t> *new_shape = nullptr) {
   return tflite::CreateReshapeOptions(
-      _fbb,
-      new_shape ? _fbb.CreateVector<int32_t>(*new_shape) : 0);
+      _fbb, new_shape ? _fbb.CreateVector<int32_t>(*new_shape) : 0);
 }
 
-flatbuffers::Offset<ReshapeOptions> CreateReshapeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<ReshapeOptions> CreateReshapeOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
-struct SkipGramOptionsT : public flatbuffers::NativeTable {
-  typedef SkipGramOptions TableType;
-  int32_t ngram_size;
-  int32_t max_skip_size;
-  bool include_all_ngrams;
-  SkipGramOptionsT()
-      : ngram_size(0),
-        max_skip_size(0),
-        include_all_ngrams(false) {
-  }
+struct SpaceToBatchNDOptionsT : public flatbuffers::NativeTable {
+  typedef SpaceToBatchNDOptions TableType;
+  std::vector<int32_t> block_shape;
+  std::vector<int32_t> before_paddings;
+  std::vector<int32_t> after_paddings;
+  SpaceToBatchNDOptionsT() {}
 };
 
-struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef SkipGramOptionsT NativeTableType;
-  enum {
-    VT_NGRAM_SIZE = 4,
-    VT_MAX_SKIP_SIZE = 6,
-    VT_INCLUDE_ALL_NGRAMS = 8
-  };
-  int32_t ngram_size() const {
-    return GetField<int32_t>(VT_NGRAM_SIZE, 0);
+struct SpaceToBatchNDOptions FLATBUFFERS_FINAL_CLASS
+    : private flatbuffers::Table {
+  typedef SpaceToBatchNDOptionsT NativeTableType;
+  enum { VT_BLOCK_SHAPE = 4, VT_BEFORE_PADDINGS = 6, VT_AFTER_PADDINGS = 8 };
+  const flatbuffers::Vector<int32_t> *block_shape() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_BLOCK_SHAPE);
   }
-  int32_t max_skip_size() const {
-    return GetField<int32_t>(VT_MAX_SKIP_SIZE, 0);
+  const flatbuffers::Vector<int32_t> *before_paddings() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_BEFORE_PADDINGS);
   }
-  bool include_all_ngrams() const {
-    return GetField<uint8_t>(VT_INCLUDE_ALL_NGRAMS, 0) != 0;
+  const flatbuffers::Vector<int32_t> *after_paddings() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_AFTER_PADDINGS);
   }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_NGRAM_SIZE) &&
-           VerifyField<int32_t>(verifier, VT_MAX_SKIP_SIZE) &&
-           VerifyField<uint8_t>(verifier, VT_INCLUDE_ALL_NGRAMS) &&
-           verifier.EndTable();
-  }
-  SkipGramOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(SkipGramOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<SkipGramOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+           VerifyOffset(verifier, VT_BLOCK_SHAPE) &&
+           verifier.Verify(block_shape()) &&
+           VerifyOffset(verifier, VT_BEFORE_PADDINGS) &&
+           verifier.Verify(before_paddings()) &&
+           VerifyOffset(verifier, VT_AFTER_PADDINGS) &&
+           verifier.Verify(after_paddings()) && verifier.EndTable();
+  }
+  SpaceToBatchNDOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      SpaceToBatchNDOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SpaceToBatchNDOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
-struct SkipGramOptionsBuilder {
+struct SpaceToBatchNDOptionsBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_ngram_size(int32_t ngram_size) {
-    fbb_.AddElement<int32_t>(SkipGramOptions::VT_NGRAM_SIZE, ngram_size, 0);
+  void add_block_shape(
+      flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_shape) {
+    fbb_.AddOffset(SpaceToBatchNDOptions::VT_BLOCK_SHAPE, block_shape);
   }
-  void add_max_skip_size(int32_t max_skip_size) {
-    fbb_.AddElement<int32_t>(SkipGramOptions::VT_MAX_SKIP_SIZE, max_skip_size, 0);
+  void add_before_paddings(
+      flatbuffers::Offset<flatbuffers::Vector<int32_t>> before_paddings) {
+    fbb_.AddOffset(SpaceToBatchNDOptions::VT_BEFORE_PADDINGS, before_paddings);
   }
-  void add_include_all_ngrams(bool include_all_ngrams) {
-    fbb_.AddElement<uint8_t>(SkipGramOptions::VT_INCLUDE_ALL_NGRAMS, static_cast<uint8_t>(include_all_ngrams), 0);
+  void add_after_paddings(
+      flatbuffers::Offset<flatbuffers::Vector<int32_t>> after_paddings) {
+    fbb_.AddOffset(SpaceToBatchNDOptions::VT_AFTER_PADDINGS, after_paddings);
   }
-  explicit SkipGramOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+  explicit SpaceToBatchNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
-  SkipGramOptionsBuilder &operator=(const SkipGramOptionsBuilder &);
-  flatbuffers::Offset<SkipGramOptions> Finish() {
+  SpaceToBatchNDOptionsBuilder &operator=(const SpaceToBatchNDOptionsBuilder &);
+  flatbuffers::Offset<SpaceToBatchNDOptions> Finish() {
     const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<SkipGramOptions>(end);
+    auto o = flatbuffers::Offset<SpaceToBatchNDOptions>(end);
     return o;
   }
 };
 
-inline flatbuffers::Offset<SkipGramOptions> CreateSkipGramOptions(
+inline flatbuffers::Offset<SpaceToBatchNDOptions> CreateSpaceToBatchNDOptions(
     flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t ngram_size = 0,
-    int32_t max_skip_size = 0,
-    bool include_all_ngrams = false) {
-  SkipGramOptionsBuilder builder_(_fbb);
-  builder_.add_max_skip_size(max_skip_size);
-  builder_.add_ngram_size(ngram_size);
-  builder_.add_include_all_ngrams(include_all_ngrams);
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_shape = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> before_paddings = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> after_paddings = 0) {
+  SpaceToBatchNDOptionsBuilder builder_(_fbb);
+  builder_.add_after_paddings(after_paddings);
+  builder_.add_before_paddings(before_paddings);
+  builder_.add_block_shape(block_shape);
   return builder_.Finish();
 }
 
-flatbuffers::Offset<SkipGramOptions> CreateSkipGramOptions(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+inline flatbuffers::Offset<SpaceToBatchNDOptions>
+CreateSpaceToBatchNDOptionsDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<int32_t> *block_shape = nullptr,
+    const std::vector<int32_t> *before_paddings = nullptr,
+    const std::vector<int32_t> *after_paddings = nullptr) {
+  return tflite::CreateSpaceToBatchNDOptions(
+      _fbb, block_shape ? _fbb.CreateVector<int32_t>(*block_shape) : 0,
+      before_paddings ? _fbb.CreateVector<int32_t>(*before_paddings) : 0,
+      after_paddings ? _fbb.CreateVector<int32_t>(*after_paddings) : 0);
+}
+
+flatbuffers::Offset<SpaceToBatchNDOptions> CreateSpaceToBatchNDOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct BatchToSpaceNDOptionsT : public flatbuffers::NativeTable {
+  typedef BatchToSpaceNDOptions TableType;
+  std::vector<int32_t> block_shape;
+  std::vector<int32_t> before_crops;
+  std::vector<int32_t> after_crops;
+  BatchToSpaceNDOptionsT() {}
+};
 
-struct SpaceToDepthOptionsT : public flatbuffers::NativeTable {
-  typedef SpaceToDepthOptions TableType;
-  int32_t block_size;
-  SpaceToDepthOptionsT()
-      : block_size(0) {
+struct BatchToSpaceNDOptions FLATBUFFERS_FINAL_CLASS
+    : private flatbuffers::Table {
+  typedef BatchToSpaceNDOptionsT NativeTableType;
+  enum { VT_BLOCK_SHAPE = 4, VT_BEFORE_CROPS = 6, VT_AFTER_CROPS = 8 };
+  const flatbuffers::Vector<int32_t> *block_shape() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_BLOCK_SHAPE);
   }
+  const flatbuffers::Vector<int32_t> *before_crops() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_BEFORE_CROPS);
+  }
+  const flatbuffers::Vector<int32_t> *after_crops() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_AFTER_CROPS);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_BLOCK_SHAPE) &&
+           verifier.Verify(block_shape()) &&
+           VerifyOffset(verifier, VT_BEFORE_CROPS) &&
+           verifier.Verify(before_crops()) &&
+           VerifyOffset(verifier, VT_AFTER_CROPS) &&
+           verifier.Verify(after_crops()) && verifier.EndTable();
+  }
+  BatchToSpaceNDOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      BatchToSpaceNDOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<BatchToSpaceNDOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
-struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef SpaceToDepthOptionsT NativeTableType;
-  enum {
-    VT_BLOCK_SIZE = 4
-  };
-  int32_t block_size() const {
-    return GetField<int32_t>(VT_BLOCK_SIZE, 0);
+struct BatchToSpaceNDOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_block_shape(
+      flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_shape) {
+    fbb_.AddOffset(BatchToSpaceNDOptions::VT_BLOCK_SHAPE, block_shape);
+  }
+  void add_before_crops(
+      flatbuffers::Offset<flatbuffers::Vector<int32_t>> before_crops) {
+    fbb_.AddOffset(BatchToSpaceNDOptions::VT_BEFORE_CROPS, before_crops);
+  }
+  void add_after_crops(
+      flatbuffers::Offset<flatbuffers::Vector<int32_t>> after_crops) {
+    fbb_.AddOffset(BatchToSpaceNDOptions::VT_AFTER_CROPS, after_crops);
+  }
+  explicit BatchToSpaceNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  BatchToSpaceNDOptionsBuilder &operator=(const BatchToSpaceNDOptionsBuilder &);
+  flatbuffers::Offset<BatchToSpaceNDOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BatchToSpaceNDOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<BatchToSpaceNDOptions> CreateBatchToSpaceNDOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_shape = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> before_crops = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> after_crops = 0) {
+  BatchToSpaceNDOptionsBuilder builder_(_fbb);
+  builder_.add_after_crops(after_crops);
+  builder_.add_before_crops(before_crops);
+  builder_.add_block_shape(block_shape);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<BatchToSpaceNDOptions>
+CreateBatchToSpaceNDOptionsDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<int32_t> *block_shape = nullptr,
+    const std::vector<int32_t> *before_crops = nullptr,
+    const std::vector<int32_t> *after_crops = nullptr) {
+  return tflite::CreateBatchToSpaceNDOptions(
+      _fbb, block_shape ? _fbb.CreateVector<int32_t>(*block_shape) : 0,
+      before_crops ? _fbb.CreateVector<int32_t>(*before_crops) : 0,
+      after_crops ? _fbb.CreateVector<int32_t>(*after_crops) : 0);
+}
+
+flatbuffers::Offset<BatchToSpaceNDOptions> CreateBatchToSpaceNDOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SkipGramOptionsT : public flatbuffers::NativeTable {
+  typedef SkipGramOptions TableType;
+  int32_t ngram_size;
+  int32_t max_skip_size;
+  bool include_all_ngrams;
+  SkipGramOptionsT()
+      : ngram_size(0), max_skip_size(0), include_all_ngrams(false) {}
+};
+
+struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SkipGramOptionsT NativeTableType;
+  enum { VT_NGRAM_SIZE = 4, VT_MAX_SKIP_SIZE = 6, VT_INCLUDE_ALL_NGRAMS = 8 };
+  int32_t ngram_size() const { return GetField<int32_t>(VT_NGRAM_SIZE, 0); }
+  int32_t max_skip_size() const {
+    return GetField<int32_t>(VT_MAX_SKIP_SIZE, 0);
+  }
+  bool include_all_ngrams() const {
+    return GetField<uint8_t>(VT_INCLUDE_ALL_NGRAMS, 0) != 0;
   }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_BLOCK_SIZE) &&
+           VerifyField<int32_t>(verifier, VT_NGRAM_SIZE) &&
+           VerifyField<int32_t>(verifier, VT_MAX_SKIP_SIZE) &&
+           VerifyField<uint8_t>(verifier, VT_INCLUDE_ALL_NGRAMS) &&
            verifier.EndTable();
   }
-  SpaceToDepthOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(SpaceToDepthOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<SpaceToDepthOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+  SkipGramOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      SkipGramOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SkipGramOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SkipGramOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_ngram_size(int32_t ngram_size) {
+    fbb_.AddElement<int32_t>(SkipGramOptions::VT_NGRAM_SIZE, ngram_size, 0);
+  }
+  void add_max_skip_size(int32_t max_skip_size) {
+    fbb_.AddElement<int32_t>(SkipGramOptions::VT_MAX_SKIP_SIZE, max_skip_size,
+                             0);
+  }
+  void add_include_all_ngrams(bool include_all_ngrams) {
+    fbb_.AddElement<uint8_t>(SkipGramOptions::VT_INCLUDE_ALL_NGRAMS,
+                             static_cast<uint8_t>(include_all_ngrams), 0);
+  }
+  explicit SkipGramOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  SkipGramOptionsBuilder &operator=(const SkipGramOptionsBuilder &);
+  flatbuffers::Offset<SkipGramOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SkipGramOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SkipGramOptions> CreateSkipGramOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, int32_t ngram_size = 0,
+    int32_t max_skip_size = 0, bool include_all_ngrams = false) {
+  SkipGramOptionsBuilder builder_(_fbb);
+  builder_.add_max_skip_size(max_skip_size);
+  builder_.add_ngram_size(ngram_size);
+  builder_.add_include_all_ngrams(include_all_ngrams);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SkipGramOptions> CreateSkipGramOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SpaceToDepthOptionsT : public flatbuffers::NativeTable {
+  typedef SpaceToDepthOptions TableType;
+  int32_t block_size;
+  SpaceToDepthOptionsT() : block_size(0) {}
+};
+
+struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS
+    : private flatbuffers::Table {
+  typedef SpaceToDepthOptionsT NativeTableType;
+  enum { VT_BLOCK_SIZE = 4 };
+  int32_t block_size() const { return GetField<int32_t>(VT_BLOCK_SIZE, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_BLOCK_SIZE) && verifier.EndTable();
+  }
+  SpaceToDepthOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      SpaceToDepthOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SpaceToDepthOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
 struct SpaceToDepthOptionsBuilder {
@@ -2382,7 +3069,7 @@ struct SpaceToDepthOptionsBuilder {
     fbb_.AddElement<int32_t>(SpaceToDepthOptions::VT_BLOCK_SIZE, block_size, 0);
   }
   explicit SpaceToDepthOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+      : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
   SpaceToDepthOptionsBuilder &operator=(const SpaceToDepthOptionsBuilder &);
@@ -2394,52 +3081,180 @@ struct SpaceToDepthOptionsBuilder {
 };
 
 inline flatbuffers::Offset<SpaceToDepthOptions> CreateSpaceToDepthOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t block_size = 0) {
+    flatbuffers::FlatBufferBuilder &_fbb, int32_t block_size = 0) {
   SpaceToDepthOptionsBuilder builder_(_fbb);
   builder_.add_block_size(block_size);
   return builder_.Finish();
 }
 
-flatbuffers::Offset<SpaceToDepthOptions> CreateSpaceToDepthOptions(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<SpaceToDepthOptions> CreateSpaceToDepthOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SubOptionsT : public flatbuffers::NativeTable {
+  typedef SubOptions TableType;
+  ActivationFunctionType fused_activation_function;
+  SubOptionsT() : fused_activation_function(ActivationFunctionType_NONE) {}
+};
+
+struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SubOptionsT NativeTableType;
+  enum { VT_FUSED_ACTIVATION_FUNCTION = 4 };
+  ActivationFunctionType fused_activation_function() const {
+    return static_cast<ActivationFunctionType>(
+        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           verifier.EndTable();
+  }
+  SubOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      SubOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SubOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SubOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(
+      ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(SubOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit SubOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  SubOptionsBuilder &operator=(const SubOptionsBuilder &);
+  flatbuffers::Offset<SubOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SubOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SubOptions> CreateSubOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    ActivationFunctionType fused_activation_function =
+        ActivationFunctionType_NONE) {
+  SubOptionsBuilder builder_(_fbb);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SubOptions> CreateSubOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct DivOptionsT : public flatbuffers::NativeTable {
+  typedef DivOptions TableType;
+  ActivationFunctionType fused_activation_function;
+  DivOptionsT() : fused_activation_function(ActivationFunctionType_NONE) {}
+};
+
+struct DivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef DivOptionsT NativeTableType;
+  enum { VT_FUSED_ACTIVATION_FUNCTION = 4 };
+  ActivationFunctionType fused_activation_function() const {
+    return static_cast<ActivationFunctionType>(
+        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           verifier.EndTable();
+  }
+  DivOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      DivOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<DivOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct DivOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(
+      ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(DivOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit DivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  DivOptionsBuilder &operator=(const DivOptionsBuilder &);
+  flatbuffers::Offset<DivOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<DivOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<DivOptions> CreateDivOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    ActivationFunctionType fused_activation_function =
+        ActivationFunctionType_NONE) {
+  DivOptionsBuilder builder_(_fbb);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<DivOptions> CreateDivOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
 struct EmbeddingLookupSparseOptionsT : public flatbuffers::NativeTable {
   typedef EmbeddingLookupSparseOptions TableType;
   CombinerType combiner;
-  EmbeddingLookupSparseOptionsT()
-      : combiner(CombinerType_SUM) {
-  }
+  EmbeddingLookupSparseOptionsT() : combiner(CombinerType_SUM) {}
 };
 
-struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS
+    : private flatbuffers::Table {
   typedef EmbeddingLookupSparseOptionsT NativeTableType;
-  enum {
-    VT_COMBINER = 4
-  };
+  enum { VT_COMBINER = 4 };
   CombinerType combiner() const {
     return static_cast<CombinerType>(GetField<int8_t>(VT_COMBINER, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_COMBINER) &&
-           verifier.EndTable();
-  }
-  EmbeddingLookupSparseOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(EmbeddingLookupSparseOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<EmbeddingLookupSparseOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const EmbeddingLookupSparseOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+           VerifyField<int8_t>(verifier, VT_COMBINER) && verifier.EndTable();
+  }
+  EmbeddingLookupSparseOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      EmbeddingLookupSparseOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<EmbeddingLookupSparseOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb,
+      const EmbeddingLookupSparseOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
 struct EmbeddingLookupSparseOptionsBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_combiner(CombinerType combiner) {
-    fbb_.AddElement<int8_t>(EmbeddingLookupSparseOptions::VT_COMBINER, static_cast<int8_t>(combiner), 0);
+    fbb_.AddElement<int8_t>(EmbeddingLookupSparseOptions::VT_COMBINER,
+                            static_cast<int8_t>(combiner), 0);
   }
-  explicit EmbeddingLookupSparseOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+  explicit EmbeddingLookupSparseOptionsBuilder(
+      flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
-  EmbeddingLookupSparseOptionsBuilder &operator=(const EmbeddingLookupSparseOptionsBuilder &);
+  EmbeddingLookupSparseOptionsBuilder &operator=(
+      const EmbeddingLookupSparseOptionsBuilder &);
   flatbuffers::Offset<EmbeddingLookupSparseOptions> Finish() {
     const auto end = fbb_.EndTable(start_);
     auto o = flatbuffers::Offset<EmbeddingLookupSparseOptions>(end);
@@ -2447,31 +3262,283 @@ struct EmbeddingLookupSparseOptionsBuilder {
   }
 };
 
-inline flatbuffers::Offset<EmbeddingLookupSparseOptions> CreateEmbeddingLookupSparseOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    CombinerType combiner = CombinerType_SUM) {
+inline flatbuffers::Offset<EmbeddingLookupSparseOptions>
+CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                   CombinerType combiner = CombinerType_SUM) {
   EmbeddingLookupSparseOptionsBuilder builder_(_fbb);
   builder_.add_combiner(combiner);
   return builder_.Finish();
 }
 
-flatbuffers::Offset<EmbeddingLookupSparseOptions> CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb, const EmbeddingLookupSparseOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<EmbeddingLookupSparseOptions>
+CreateEmbeddingLookupSparseOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const EmbeddingLookupSparseOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct GatherOptionsT : public flatbuffers::NativeTable {
+  typedef GatherOptions TableType;
+  int32_t axis;
+  GatherOptionsT() : axis(0) {}
+};
+
+struct GatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef GatherOptionsT NativeTableType;
+  enum { VT_AXIS = 4 };
+  int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_AXIS) && verifier.EndTable();
+  }
+  GatherOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      GatherOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<GatherOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct GatherOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_axis(int32_t axis) {
+    fbb_.AddElement<int32_t>(GatherOptions::VT_AXIS, axis, 0);
+  }
+  explicit GatherOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  GatherOptionsBuilder &operator=(const GatherOptionsBuilder &);
+  flatbuffers::Offset<GatherOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<GatherOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<GatherOptions> CreateGatherOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0) {
+  GatherOptionsBuilder builder_(_fbb);
+  builder_.add_axis(axis);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<GatherOptions> CreateGatherOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct TransposeOptionsT : public flatbuffers::NativeTable {
+  typedef TransposeOptions TableType;
+  std::vector<int32_t> perm;
+  TransposeOptionsT() {}
+};
+
+struct TransposeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef TransposeOptionsT NativeTableType;
+  enum { VT_PERM = 4 };
+  const flatbuffers::Vector<int32_t> *perm() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_PERM);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_PERM) &&
+           verifier.Verify(perm()) && verifier.EndTable();
+  }
+  TransposeOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      TransposeOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<TransposeOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct TransposeOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_perm(flatbuffers::Offset<flatbuffers::Vector<int32_t>> perm) {
+    fbb_.AddOffset(TransposeOptions::VT_PERM, perm);
+  }
+  explicit TransposeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  TransposeOptionsBuilder &operator=(const TransposeOptionsBuilder &);
+  flatbuffers::Offset<TransposeOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<TransposeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<TransposeOptions> CreateTransposeOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> perm = 0) {
+  TransposeOptionsBuilder builder_(_fbb);
+  builder_.add_perm(perm);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<TransposeOptions> CreateTransposeOptionsDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<int32_t> *perm = nullptr) {
+  return tflite::CreateTransposeOptions(
+      _fbb, perm ? _fbb.CreateVector<int32_t>(*perm) : 0);
+}
+
+flatbuffers::Offset<TransposeOptions> CreateTransposeOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct MeanOptionsT : public flatbuffers::NativeTable {
+  typedef MeanOptions TableType;
+  std::vector<int32_t> axis;
+  bool keep_dims;
+  MeanOptionsT() : keep_dims(false) {}
+};
+
+struct MeanOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef MeanOptionsT NativeTableType;
+  enum { VT_AXIS = 4, VT_KEEP_DIMS = 6 };
+  const flatbuffers::Vector<int32_t> *axis() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_AXIS);
+  }
+  bool keep_dims() const { return GetField<uint8_t>(VT_KEEP_DIMS, 0) != 0; }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_AXIS) &&
+           verifier.Verify(axis()) &&
+           VerifyField<uint8_t>(verifier, VT_KEEP_DIMS) && verifier.EndTable();
+  }
+  MeanOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      MeanOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<MeanOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const MeanOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct MeanOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_axis(flatbuffers::Offset<flatbuffers::Vector<int32_t>> axis) {
+    fbb_.AddOffset(MeanOptions::VT_AXIS, axis);
+  }
+  void add_keep_dims(bool keep_dims) {
+    fbb_.AddElement<uint8_t>(MeanOptions::VT_KEEP_DIMS,
+                             static_cast<uint8_t>(keep_dims), 0);
+  }
+  explicit MeanOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  MeanOptionsBuilder &operator=(const MeanOptionsBuilder &);
+  flatbuffers::Offset<MeanOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<MeanOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<MeanOptions> CreateMeanOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> axis = 0,
+    bool keep_dims = false) {
+  MeanOptionsBuilder builder_(_fbb);
+  builder_.add_axis(axis);
+  builder_.add_keep_dims(keep_dims);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<MeanOptions> CreateMeanOptionsDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<int32_t> *axis = nullptr, bool keep_dims = false) {
+  return tflite::CreateMeanOptions(
+      _fbb, axis ? _fbb.CreateVector<int32_t>(*axis) : 0, keep_dims);
+}
+
+flatbuffers::Offset<MeanOptions> CreateMeanOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const MeanOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SqueezeOptionsT : public flatbuffers::NativeTable {
+  typedef SqueezeOptions TableType;
+  std::vector<int32_t> squeeze_dims;
+  SqueezeOptionsT() {}
+};
+
+struct SqueezeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SqueezeOptionsT NativeTableType;
+  enum { VT_SQUEEZE_DIMS = 4 };
+  const flatbuffers::Vector<int32_t> *squeeze_dims() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SQUEEZE_DIMS);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_SQUEEZE_DIMS) &&
+           verifier.Verify(squeeze_dims()) && verifier.EndTable();
+  }
+  SqueezeOptionsT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      SqueezeOptionsT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SqueezeOptions> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SqueezeOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_squeeze_dims(
+      flatbuffers::Offset<flatbuffers::Vector<int32_t>> squeeze_dims) {
+    fbb_.AddOffset(SqueezeOptions::VT_SQUEEZE_DIMS, squeeze_dims);
+  }
+  explicit SqueezeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+      : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  SqueezeOptionsBuilder &operator=(const SqueezeOptionsBuilder &);
+  flatbuffers::Offset<SqueezeOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SqueezeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SqueezeOptions> CreateSqueezeOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> squeeze_dims = 0) {
+  SqueezeOptionsBuilder builder_(_fbb);
+  builder_.add_squeeze_dims(squeeze_dims);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<SqueezeOptions> CreateSqueezeOptionsDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<int32_t> *squeeze_dims = nullptr) {
+  return tflite::CreateSqueezeOptions(
+      _fbb, squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0);
+}
+
+flatbuffers::Offset<SqueezeOptions> CreateSqueezeOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
 struct OperatorCodeT : public flatbuffers::NativeTable {
   typedef OperatorCode TableType;
   BuiltinOperator builtin_code;
   std::string custom_code;
-  OperatorCodeT()
-      : builtin_code(BuiltinOperator_ADD) {
-  }
+  OperatorCodeT() : builtin_code(BuiltinOperator_ADD) {}
 };
 
 struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   typedef OperatorCodeT NativeTableType;
-  enum {
-    VT_BUILTIN_CODE = 4,
-    VT_CUSTOM_CODE = 6
-  };
+  enum { VT_BUILTIN_CODE = 4, VT_CUSTOM_CODE = 6 };
   BuiltinOperator builtin_code() const {
     return static_cast<BuiltinOperator>(GetField<int8_t>(VT_BUILTIN_CODE, 0));
   }
@@ -2482,25 +3549,30 @@ struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
     return VerifyTableStart(verifier) &&
            VerifyField<int8_t>(verifier, VT_BUILTIN_CODE) &&
            VerifyOffset(verifier, VT_CUSTOM_CODE) &&
-           verifier.Verify(custom_code()) &&
-           verifier.EndTable();
-  }
-  OperatorCodeT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(OperatorCodeT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<OperatorCode> Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+           verifier.Verify(custom_code()) && verifier.EndTable();
+  }
+  OperatorCodeT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      OperatorCodeT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<OperatorCode> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
 struct OperatorCodeBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
   void add_builtin_code(BuiltinOperator builtin_code) {
-    fbb_.AddElement<int8_t>(OperatorCode::VT_BUILTIN_CODE, static_cast<int8_t>(builtin_code), 0);
+    fbb_.AddElement<int8_t>(OperatorCode::VT_BUILTIN_CODE,
+                            static_cast<int8_t>(builtin_code), 0);
   }
   void add_custom_code(flatbuffers::Offset<flatbuffers::String> custom_code) {
     fbb_.AddOffset(OperatorCode::VT_CUSTOM_CODE, custom_code);
   }
   explicit OperatorCodeBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+      : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
   OperatorCodeBuilder &operator=(const OperatorCodeBuilder &);
@@ -2526,12 +3598,12 @@ inline flatbuffers::Offset<OperatorCode> CreateOperatorCodeDirect(
     BuiltinOperator builtin_code = BuiltinOperator_ADD,
     const char *custom_code = nullptr) {
   return tflite::CreateOperatorCode(
-      _fbb,
-      builtin_code,
-      custom_code ? _fbb.CreateString(custom_code) : 0);
+      _fbb, builtin_code, custom_code ? _fbb.CreateString(custom_code) : 0);
 }
 
-flatbuffers::Offset<OperatorCode> CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<OperatorCode> CreateOperatorCode(
+    flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
 struct OperatorT : public flatbuffers::NativeTable {
   typedef Operator TableType;
@@ -2543,8 +3615,7 @@ struct OperatorT : public flatbuffers::NativeTable {
   CustomOptionsFormat custom_options_format;
   OperatorT()
       : opcode_index(0),
-        custom_options_format(CustomOptionsFormat_FLEXBUFFERS) {
-  }
+        custom_options_format(CustomOptionsFormat_FLEXBUFFERS) {}
 };
 
 struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
@@ -2568,185 +3639,387 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
     return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS);
   }
   BuiltinOptions builtin_options_type() const {
-    return static_cast<BuiltinOptions>(GetField<uint8_t>(VT_BUILTIN_OPTIONS_TYPE, 0));
+    return static_cast<BuiltinOptions>(
+        GetField<uint8_t>(VT_BUILTIN_OPTIONS_TYPE, 0));
   }
   const void *builtin_options() const {
     return GetPointer<const void *>(VT_BUILTIN_OPTIONS);
   }
-  template<typename T> const T *builtin_options_as() const;
+  template <typename T>
+  const T *builtin_options_as() const;
   const Conv2DOptions *builtin_options_as_Conv2DOptions() const {
-    return builtin_options_type() == BuiltinOptions_Conv2DOptions ? static_cast<const Conv2DOptions *>(builtin_options()) : nullptr;
-  }
-  const DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const {
-    return builtin_options_type() == BuiltinOptions_DepthwiseConv2DOptions ? static_cast<const DepthwiseConv2DOptions *>(builtin_options()) : nullptr;
-  }
-  const ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const {
-    return builtin_options_type() == BuiltinOptions_ConcatEmbeddingsOptions ? static_cast<const ConcatEmbeddingsOptions *>(builtin_options()) : nullptr;
+    return builtin_options_type() == BuiltinOptions_Conv2DOptions
+               ? static_cast<const Conv2DOptions *>(builtin_options())
+               : nullptr;
+  }
+  const DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions()
+      const {
+    return builtin_options_type() == BuiltinOptions_DepthwiseConv2DOptions
+               ? static_cast<const DepthwiseConv2DOptions *>(builtin_options())
+               : nullptr;
+  }
+  const ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions()
+      const {
+    return builtin_options_type() == BuiltinOptions_ConcatEmbeddingsOptions
+               ? static_cast<const ConcatEmbeddingsOptions *>(builtin_options())
+               : nullptr;
   }
   const LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const {
-    return builtin_options_type() == BuiltinOptions_LSHProjectionOptions ? static_cast<const LSHProjectionOptions *>(builtin_options()) : nullptr;
+    return builtin_options_type() == BuiltinOptions_LSHProjectionOptions
+               ? static_cast<const LSHProjectionOptions *>(builtin_options())
+               : nullptr;
   }
   const Pool2DOptions *builtin_options_as_Pool2DOptions() const {
-    return builtin_options_type() == BuiltinOptions_Pool2DOptions ? static_cast<const Pool2DOptions *>(builtin_options()) : nullptr;
+    return builtin_options_type() == BuiltinOptions_Pool2DOptions
+               ? static_cast<const Pool2DOptions *>(builtin_options())
+               : nullptr;
   }
   const SVDFOptions *builtin_options_as_SVDFOptions() const {
-    return builtin_options_type() == BuiltinOptions_SVDFOptions ? static_cast<const SVDFOptions *>(builtin_options()) : nullptr;
+    return builtin_options_type() == BuiltinOptions_SVDFOptions
+               ? static_cast<const SVDFOptions *>(builtin_options())
+               : nullptr;
   }
   const RNNOptions *builtin_options_as_RNNOptions() const {
-    return builtin_options_type() == BuiltinOptions_RNNOptions ? static_cast<const RNNOptions *>(builtin_options()) : nullptr;
+    return builtin_options_type() == BuiltinOptions_RNNOptions
+               ? static_cast<const RNNOptions *>(builtin_options())
+               : nullptr;
   }
-  const FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const {
-    return builtin_options_type() == BuiltinOptions_FullyConnectedOptions ? static_cast<const FullyConnectedOptions *>(builtin_options()) : nullptr;
+  const FullyConnectedOptions *builtin_options_as_FullyConnectedOptions()
+      const {
+    return builtin_options_type() == BuiltinOptions_FullyConnectedOptions
+               ? static_cast<const FullyConnectedOptions *>(builtin_options())
+               : nullptr;
   }
   const SoftmaxOptions *builtin_options_as_SoftmaxOptions() const {
-    return builtin_options_type() == BuiltinOptions_SoftmaxOptions ? static_cast<const SoftmaxOptions *>(builtin_options()) : nullptr;
+    return builtin_options_type() == BuiltinOptions_SoftmaxOptions
+               ? static_cast<const SoftmaxOptions *>(builtin_options())
+               : nullptr;
   }
   const ConcatenationOptions *builtin_options_as_ConcatenationOptions() const {
-    return builtin_options_type() == BuiltinOptions_ConcatenationOptions ? static_cast<const ConcatenationOptions *>(builtin_options()) : nullptr;
+    return builtin_options_type() == BuiltinOptions_ConcatenationOptions
+               ? static_cast<const ConcatenationOptions *>(builtin_options())
+               : nullptr;
   }
   const AddOptions *builtin_options_as_AddOptions() const {
-    return builtin_options_type() == BuiltinOptions_AddOptions ? static_cast<const AddOptions *>(builtin_options()) : nullptr;
+    return builtin_options_type() == BuiltinOptions_AddOptions
+               ? static_cast<const AddOptions *>(builtin_options())
+               : nullptr;
   }
   const L2NormOptions *builtin_options_as_L2NormOptions() const {
-    return builtin_options_type() == BuiltinOptions_L2NormOptions ? static_cast<const L2NormOptions *>(builtin_options()) : nullptr;
-  }
-  const LocalResponseNormalizationOptions *builtin_options_as_LocalResponseNormalizationOptions() const {
-    return builtin_options_type() == BuiltinOptions_LocalResponseNormalizationOptions ? static_cast<const LocalResponseNormalizationOptions *>(builtin_options()) : nullptr;
+    return builtin_options_type() == BuiltinOptions_L2NormOptions
+               ? static_cast<const L2NormOptions *>(builtin_options())
+               : nullptr;
+  }
+  const LocalResponseNormalizationOptions *
+  builtin_options_as_LocalResponseNormalizationOptions() const {
+    return builtin_options_type() ==
+                   BuiltinOptions_LocalResponseNormalizationOptions
+               ? static_cast<const LocalResponseNormalizationOptions *>(
+                     builtin_options())
+               : nullptr;
   }
   const LSTMOptions *builtin_options_as_LSTMOptions() const {
-    return builtin_options_type() == BuiltinOptions_LSTMOptions ? static_cast<const LSTMOptions *>(builtin_options()) : nullptr;
+    return builtin_options_type() == BuiltinOptions_LSTMOptions
+               ? static_cast<const LSTMOptions *>(builtin_options())
+               : nullptr;
   }
-  const ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const {
-    return builtin_options_type() == BuiltinOptions_ResizeBilinearOptions ? static_cast<const ResizeBilinearOptions *>(builtin_options()) : nullptr;
+  const ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions()
+      const {
+    return builtin_options_type() == BuiltinOptions_ResizeBilinearOptions
+               ? static_cast<const ResizeBilinearOptions *>(builtin_options())
+               : nullptr;
   }
   const CallOptions *builtin_options_as_CallOptions() const {
-    return builtin_options_type() == BuiltinOptions_CallOptions ? static_cast<const CallOptions *>(builtin_options()) : nullptr;
+    return builtin_options_type() == BuiltinOptions_CallOptions
+               ? static_cast<const CallOptions *>(builtin_options())
+               : nullptr;
   }
   const ReshapeOptions *builtin_options_as_ReshapeOptions() const {
-    return builtin_options_type() == BuiltinOptions_ReshapeOptions ? static_cast<const ReshapeOptions *>(builtin_options()) : nullptr;
+    return builtin_options_type() == BuiltinOptions_ReshapeOptions
+               ? static_cast<const ReshapeOptions *>(builtin_options())
+               : nullptr;
   }
   const SkipGramOptions *builtin_options_as_SkipGramOptions() const {
-    return builtin_options_type() == BuiltinOptions_SkipGramOptions ? static_cast<const SkipGramOptions *>(builtin_options()) : nullptr;
+    return builtin_options_type() == BuiltinOptions_SkipGramOptions
+               ? static_cast<const SkipGramOptions *>(builtin_options())
+               : nullptr;
   }
   const SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const {
-    return builtin_options_type() == BuiltinOptions_SpaceToDepthOptions ? static_cast<const SpaceToDepthOptions *>(builtin_options()) : nullptr;
+    return builtin_options_type() == BuiltinOptions_SpaceToDepthOptions
+               ? static_cast<const SpaceToDepthOptions *>(builtin_options())
+               : nullptr;
   }
-  const EmbeddingLookupSparseOptions *builtin_options_as_EmbeddingLookupSparseOptions() const {
-    return builtin_options_type() == BuiltinOptions_EmbeddingLookupSparseOptions ? static_cast<const EmbeddingLookupSparseOptions *>(builtin_options()) : nullptr;
+  const EmbeddingLookupSparseOptions *
+  builtin_options_as_EmbeddingLookupSparseOptions() const {
+    return builtin_options_type() == BuiltinOptions_EmbeddingLookupSparseOptions
+               ? static_cast<const EmbeddingLookupSparseOptions *>(
+                     builtin_options())
+               : nullptr;
   }
   const MulOptions *builtin_options_as_MulOptions() const {
-    return builtin_options_type() == BuiltinOptions_MulOptions ? static_cast<const MulOptions *>(builtin_options()) : nullptr;
+    return builtin_options_type() == BuiltinOptions_MulOptions
+               ? static_cast<const MulOptions *>(builtin_options())
+               : nullptr;
+  }
+  const PadOptions *builtin_options_as_PadOptions() const {
+    return builtin_options_type() == BuiltinOptions_PadOptions
+               ? static_cast<const PadOptions *>(builtin_options())
+               : nullptr;
+  }
+  const GatherOptions *builtin_options_as_GatherOptions() const {
+    return builtin_options_type() == BuiltinOptions_GatherOptions
+               ? static_cast<const GatherOptions *>(builtin_options())
+               : nullptr;
+  }
+  const BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions()
+      const {
+    return builtin_options_type() == BuiltinOptions_BatchToSpaceNDOptions
+               ? static_cast<const BatchToSpaceNDOptions *>(builtin_options())
+               : nullptr;
+  }
+  const SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions()
+      const {
+    return builtin_options_type() == BuiltinOptions_SpaceToBatchNDOptions
+               ? static_cast<const SpaceToBatchNDOptions *>(builtin_options())
+               : nullptr;
+  }
+  const TransposeOptions *builtin_options_as_TransposeOptions() const {
+    return builtin_options_type() == BuiltinOptions_TransposeOptions
+               ? static_cast<const TransposeOptions *>(builtin_options())
+               : nullptr;
+  }
+  const MeanOptions *builtin_options_as_MeanOptions() const {
+    return builtin_options_type() == BuiltinOptions_MeanOptions
+               ? static_cast<const MeanOptions *>(builtin_options())
+               : nullptr;
+  }
+  const SubOptions *builtin_options_as_SubOptions() const {
+    return builtin_options_type() == BuiltinOptions_SubOptions
+               ? static_cast<const SubOptions *>(builtin_options())
+               : nullptr;
+  }
+  const DivOptions *builtin_options_as_DivOptions() const {
+    return builtin_options_type() == BuiltinOptions_DivOptions
+               ? static_cast<const DivOptions *>(builtin_options())
+               : nullptr;
+  }
+  const SqueezeOptions *builtin_options_as_SqueezeOptions() const {
+    return builtin_options_type() == BuiltinOptions_SqueezeOptions
+               ? static_cast<const SqueezeOptions *>(builtin_options())
+               : nullptr;
+  }
+  const SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const {
+    return builtin_options_type() == BuiltinOptions_SequenceRNNOptions
+               ? static_cast<const SequenceRNNOptions *>(builtin_options())
+               : nullptr;
   }
   const flatbuffers::Vector<uint8_t> *custom_options() const {
     return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
   }
   CustomOptionsFormat custom_options_format() const {
-    return static_cast<CustomOptionsFormat>(GetField<int8_t>(VT_CUSTOM_OPTIONS_FORMAT, 0));
+    return static_cast<CustomOptionsFormat>(
+        GetField<int8_t>(VT_CUSTOM_OPTIONS_FORMAT, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyField<uint32_t>(verifier, VT_OPCODE_INDEX) &&
-           VerifyOffset(verifier, VT_INPUTS) &&
-           verifier.Verify(inputs()) &&
-           VerifyOffset(verifier, VT_OUTPUTS) &&
-           verifier.Verify(outputs()) &&
+           VerifyOffset(verifier, VT_INPUTS) && verifier.Verify(inputs()) &&
+           VerifyOffset(verifier, VT_OUTPUTS) && verifier.Verify(outputs()) &&
            VerifyField<uint8_t>(verifier, VT_BUILTIN_OPTIONS_TYPE) &&
            VerifyOffset(verifier, VT_BUILTIN_OPTIONS) &&
-           VerifyBuiltinOptions(verifier, builtin_options(), builtin_options_type()) &&
+           VerifyBuiltinOptions(verifier, builtin_options(),
+                                builtin_options_type()) &&
            VerifyOffset(verifier, VT_CUSTOM_OPTIONS) &&
            verifier.Verify(custom_options()) &&
            VerifyField<int8_t>(verifier, VT_CUSTOM_OPTIONS_FORMAT) &&
            verifier.EndTable();
   }
-  OperatorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(OperatorT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<Operator> Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+  OperatorT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      OperatorT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Operator> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
-template<> inline const Conv2DOptions *Operator::builtin_options_as<Conv2DOptions>() const {
+template <>
+inline const Conv2DOptions *Operator::builtin_options_as<Conv2DOptions>()
+    const {
   return builtin_options_as_Conv2DOptions();
 }
 
-template<> inline const DepthwiseConv2DOptions *Operator::builtin_options_as<DepthwiseConv2DOptions>() const {
+template <>
+inline const DepthwiseConv2DOptions *
+Operator::builtin_options_as<DepthwiseConv2DOptions>() const {
   return builtin_options_as_DepthwiseConv2DOptions();
 }
 
-template<> inline const ConcatEmbeddingsOptions *Operator::builtin_options_as<ConcatEmbeddingsOptions>() const {
+template <>
+inline const ConcatEmbeddingsOptions *
+Operator::builtin_options_as<ConcatEmbeddingsOptions>() const {
   return builtin_options_as_ConcatEmbeddingsOptions();
 }
 
-template<> inline const LSHProjectionOptions *Operator::builtin_options_as<LSHProjectionOptions>() const {
+template <>
+inline const LSHProjectionOptions *
+Operator::builtin_options_as<LSHProjectionOptions>() const {
   return builtin_options_as_LSHProjectionOptions();
 }
 
-template<> inline const Pool2DOptions *Operator::builtin_options_as<Pool2DOptions>() const {
+template <>
+inline const Pool2DOptions *Operator::builtin_options_as<Pool2DOptions>()
+    const {
   return builtin_options_as_Pool2DOptions();
 }
 
-template<> inline const SVDFOptions *Operator::builtin_options_as<SVDFOptions>() const {
+template <>
+inline const SVDFOptions *Operator::builtin_options_as<SVDFOptions>() const {
   return builtin_options_as_SVDFOptions();
 }
 
-template<> inline const RNNOptions *Operator::builtin_options_as<RNNOptions>() const {
+template <>
+inline const RNNOptions *Operator::builtin_options_as<RNNOptions>() const {
   return builtin_options_as_RNNOptions();
 }
 
-template<> inline const FullyConnectedOptions *Operator::builtin_options_as<FullyConnectedOptions>() const {
+template <>
+inline const FullyConnectedOptions *
+Operator::builtin_options_as<FullyConnectedOptions>() const {
   return builtin_options_as_FullyConnectedOptions();
 }
 
-template<> inline const SoftmaxOptions *Operator::builtin_options_as<SoftmaxOptions>() const {
+template <>
+inline const SoftmaxOptions *Operator::builtin_options_as<SoftmaxOptions>()
+    const {
   return builtin_options_as_SoftmaxOptions();
 }
 
-template<> inline const ConcatenationOptions *Operator::builtin_options_as<ConcatenationOptions>() const {
+template <>
+inline const ConcatenationOptions *
+Operator::builtin_options_as<ConcatenationOptions>() const {
   return builtin_options_as_ConcatenationOptions();
 }
 
-template<> inline const AddOptions *Operator::builtin_options_as<AddOptions>() const {
+template <>
+inline const AddOptions *Operator::builtin_options_as<AddOptions>() const {
   return builtin_options_as_AddOptions();
 }
 
-template<> inline const L2NormOptions *Operator::builtin_options_as<L2NormOptions>() const {
+template <>
+inline const L2NormOptions *Operator::builtin_options_as<L2NormOptions>()
+    const {
   return builtin_options_as_L2NormOptions();
 }
 
-template<> inline const LocalResponseNormalizationOptions *Operator::builtin_options_as<LocalResponseNormalizationOptions>() const {
+template <>
+inline const LocalResponseNormalizationOptions *
+Operator::builtin_options_as<LocalResponseNormalizationOptions>() const {
   return builtin_options_as_LocalResponseNormalizationOptions();
 }
 
-template<> inline const LSTMOptions *Operator::builtin_options_as<LSTMOptions>() const {
+template <>
+inline const LSTMOptions *Operator::builtin_options_as<LSTMOptions>() const {
   return builtin_options_as_LSTMOptions();
 }
 
-template<> inline const ResizeBilinearOptions *Operator::builtin_options_as<ResizeBilinearOptions>() const {
+template <>
+inline const ResizeBilinearOptions *
+Operator::builtin_options_as<ResizeBilinearOptions>() const {
   return builtin_options_as_ResizeBilinearOptions();
 }
 
-template<> inline const CallOptions *Operator::builtin_options_as<CallOptions>() const {
+template <>
+inline const CallOptions *Operator::builtin_options_as<CallOptions>() const {
   return builtin_options_as_CallOptions();
 }
 
-template<> inline const ReshapeOptions *Operator::builtin_options_as<ReshapeOptions>() const {
+template <>
+inline const ReshapeOptions *Operator::builtin_options_as<ReshapeOptions>()
+    const {
   return builtin_options_as_ReshapeOptions();
 }
 
-template<> inline const SkipGramOptions *Operator::builtin_options_as<SkipGramOptions>() const {
+template <>
+inline const SkipGramOptions *Operator::builtin_options_as<SkipGramOptions>()
+    const {
   return builtin_options_as_SkipGramOptions();
 }
 
-template<> inline const SpaceToDepthOptions *Operator::builtin_options_as<SpaceToDepthOptions>() const {
+template <>
+inline const SpaceToDepthOptions *
+Operator::builtin_options_as<SpaceToDepthOptions>() const {
   return builtin_options_as_SpaceToDepthOptions();
 }
 
-template<> inline const EmbeddingLookupSparseOptions *Operator::builtin_options_as<EmbeddingLookupSparseOptions>() const {
+template <>
+inline const EmbeddingLookupSparseOptions *
+Operator::builtin_options_as<EmbeddingLookupSparseOptions>() const {
   return builtin_options_as_EmbeddingLookupSparseOptions();
 }
 
-template<> inline const MulOptions *Operator::builtin_options_as<MulOptions>() const {
+template <>
+inline const MulOptions *Operator::builtin_options_as<MulOptions>() const {
   return builtin_options_as_MulOptions();
 }
 
+template <>
+inline const PadOptions *Operator::builtin_options_as<PadOptions>() const {
+  return builtin_options_as_PadOptions();
+}
+
+template <>
+inline const GatherOptions *Operator::builtin_options_as<GatherOptions>()
+    const {
+  return builtin_options_as_GatherOptions();
+}
+
+template <>
+inline const BatchToSpaceNDOptions *
+Operator::builtin_options_as<BatchToSpaceNDOptions>() const {
+  return builtin_options_as_BatchToSpaceNDOptions();
+}
+
+template <>
+inline const SpaceToBatchNDOptions *
+Operator::builtin_options_as<SpaceToBatchNDOptions>() const {
+  return builtin_options_as_SpaceToBatchNDOptions();
+}
+
+template <>
+inline const TransposeOptions *Operator::builtin_options_as<TransposeOptions>()
+    const {
+  return builtin_options_as_TransposeOptions();
+}
+
+template <>
+inline const MeanOptions *Operator::builtin_options_as<MeanOptions>() const {
+  return builtin_options_as_MeanOptions();
+}
+
+template <>
+inline const SubOptions *Operator::builtin_options_as<SubOptions>() const {
+  return builtin_options_as_SubOptions();
+}
+
+template <>
+inline const DivOptions *Operator::builtin_options_as<DivOptions>() const {
+  return builtin_options_as_DivOptions();
+}
+
+template <>
+inline const SqueezeOptions *Operator::builtin_options_as<SqueezeOptions>()
+    const {
+  return builtin_options_as_SqueezeOptions();
+}
+
+template <>
+inline const SequenceRNNOptions *
+Operator::builtin_options_as<SequenceRNNOptions>() const {
+  return builtin_options_as_SequenceRNNOptions();
+}
+
 struct OperatorBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
@@ -2760,19 +4033,21 @@ struct OperatorBuilder {
     fbb_.AddOffset(Operator::VT_OUTPUTS, outputs);
   }
   void add_builtin_options_type(BuiltinOptions builtin_options_type) {
-    fbb_.AddElement<uint8_t>(Operator::VT_BUILTIN_OPTIONS_TYPE, static_cast<uint8_t>(builtin_options_type), 0);
+    fbb_.AddElement<uint8_t>(Operator::VT_BUILTIN_OPTIONS_TYPE,
+                             static_cast<uint8_t>(builtin_options_type), 0);
   }
   void add_builtin_options(flatbuffers::Offset<void> builtin_options) {
     fbb_.AddOffset(Operator::VT_BUILTIN_OPTIONS, builtin_options);
   }
-  void add_custom_options(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options) {
+  void add_custom_options(
+      flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options) {
     fbb_.AddOffset(Operator::VT_CUSTOM_OPTIONS, custom_options);
   }
   void add_custom_options_format(CustomOptionsFormat custom_options_format) {
-    fbb_.AddElement<int8_t>(Operator::VT_CUSTOM_OPTIONS_FORMAT, static_cast<int8_t>(custom_options_format), 0);
+    fbb_.AddElement<int8_t>(Operator::VT_CUSTOM_OPTIONS_FORMAT,
+                            static_cast<int8_t>(custom_options_format), 0);
   }
-  explicit OperatorBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+  explicit OperatorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
   OperatorBuilder &operator=(const OperatorBuilder &);
@@ -2784,14 +4059,14 @@ struct OperatorBuilder {
 };
 
 inline flatbuffers::Offset<Operator> CreateOperator(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    uint32_t opcode_index = 0,
+    flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
     flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
     flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
     BuiltinOptions builtin_options_type = BuiltinOptions_NONE,
     flatbuffers::Offset<void> builtin_options = 0,
     flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0,
-    CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS) {
+    CustomOptionsFormat custom_options_format =
+        CustomOptionsFormat_FLEXBUFFERS) {
   OperatorBuilder builder_(_fbb);
   builder_.add_custom_options(custom_options);
   builder_.add_builtin_options(builtin_options);
@@ -2804,26 +4079,25 @@ inline flatbuffers::Offset<Operator> CreateOperator(
 }
 
 inline flatbuffers::Offset<Operator> CreateOperatorDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    uint32_t opcode_index = 0,
+    flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
     const std::vector<int32_t> *inputs = nullptr,
     const std::vector<int32_t> *outputs = nullptr,
     BuiltinOptions builtin_options_type = BuiltinOptions_NONE,
     flatbuffers::Offset<void> builtin_options = 0,
     const std::vector<uint8_t> *custom_options = nullptr,
-    CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS) {
+    CustomOptionsFormat custom_options_format =
+        CustomOptionsFormat_FLEXBUFFERS) {
   return tflite::CreateOperator(
-      _fbb,
-      opcode_index,
-      inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
-      outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0,
-      builtin_options_type,
+      _fbb, opcode_index, inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
+      outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0, builtin_options_type,
       builtin_options,
       custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0,
       custom_options_format);
 }
 
-flatbuffers::Offset<Operator> CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<Operator> CreateOperator(
+    flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
 struct SubGraphT : public flatbuffers::NativeTable {
   typedef SubGraph TableType;
@@ -2832,8 +4106,7 @@ struct SubGraphT : public flatbuffers::NativeTable {
   std::vector<int32_t> outputs;
   std::vector<std::unique_ptr<OperatorT>> operators;
   std::string name;
-  SubGraphT() {
-  }
+  SubGraphT() {}
 };
 
 struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
@@ -2846,7 +4119,8 @@ struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
     VT_NAME = 12
   };
   const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *tensors() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *>(VT_TENSORS);
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *>(
+        VT_TENSORS);
   }
   const flatbuffers::Vector<int32_t> *inputs() const {
     return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INPUTS);
@@ -2855,36 +4129,41 @@ struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
     return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS);
   }
   const flatbuffers::Vector<flatbuffers::Offset<Operator>> *operators() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Operator>> *>(VT_OPERATORS);
+    return GetPointer<
+        const flatbuffers::Vector<flatbuffers::Offset<Operator>> *>(
+        VT_OPERATORS);
   }
   const flatbuffers::String *name() const {
     return GetPointer<const flatbuffers::String *>(VT_NAME);
   }
   bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_TENSORS) &&
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_TENSORS) &&
            verifier.Verify(tensors()) &&
            verifier.VerifyVectorOfTables(tensors()) &&
-           VerifyOffset(verifier, VT_INPUTS) &&
-           verifier.Verify(inputs()) &&
-           VerifyOffset(verifier, VT_OUTPUTS) &&
-           verifier.Verify(outputs()) &&
+           VerifyOffset(verifier, VT_INPUTS) && verifier.Verify(inputs()) &&
+           VerifyOffset(verifier, VT_OUTPUTS) && verifier.Verify(outputs()) &&
            VerifyOffset(verifier, VT_OPERATORS) &&
            verifier.Verify(operators()) &&
            verifier.VerifyVectorOfTables(operators()) &&
-           VerifyOffset(verifier, VT_NAME) &&
-           verifier.Verify(name()) &&
+           VerifyOffset(verifier, VT_NAME) && verifier.Verify(name()) &&
            verifier.EndTable();
   }
-  SubGraphT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(SubGraphT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<SubGraph> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+  SubGraphT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(
+      SubGraphT *_o,
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SubGraph> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
 struct SubGraphBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_tensors(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors) {
+  void add_tensors(
+      flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>>
+          tensors) {
     fbb_.AddOffset(SubGraph::VT_TENSORS, tensors);
   }
   void add_inputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs) {
@@ -2893,14 +4172,15 @@ struct SubGraphBuilder {
   void add_outputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs) {
     fbb_.AddOffset(SubGraph::VT_OUTPUTS, outputs);
   }
-  void add_operators(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators) {
+  void add_operators(
+      flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>>
+          operators) {
     fbb_.AddOffset(SubGraph::VT_OPERATORS, operators);
   }
   void add_name(flatbuffers::Offset<flatbuffers::String> name) {
     fbb_.AddOffset(SubGraph::VT_NAME, name);
   }
-  explicit SubGraphBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+  explicit SubGraphBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
   SubGraphBuilder &operator=(const SubGraphBuilder &);
@@ -2913,10 +4193,12 @@ struct SubGraphBuilder {
 
 inline flatbuffers::Offset<SubGraph> CreateSubGraph(
     flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors = 0,
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>>
+        tensors = 0,
     flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
     flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators = 0,
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>>
+        operators = 0,
     flatbuffers::Offset<flatbuffers::String> name = 0) {
   SubGraphBuilder builder_(_fbb);
   builder_.add_name(name);
@@ -2939,36 +4221,38 @@ inline flatbuffers::Offset<SubGraph> CreateSubGraphDirect(
       tensors ? _fbb.CreateVector<flatbuffers::Offset<Tensor>>(*tensors) : 0,
       inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
       outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0,
-      operators ? _fbb.CreateVector<flatbuffers::Offset<Operator>>(*operators) : 0,
+      operators ? _fbb.CreateVector<flatbuffers::Offset<Operator>>(*operators)
+                : 0,
       name ? _fbb.CreateString(name) : 0);
 }
 
-flatbuffers::Offset<SubGraph> CreateSubGraph(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<SubGraph> CreateSubGraph(
+    flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
 struct BufferT : public flatbuffers::NativeTable {
   typedef Buffer TableType;
   std::vector<uint8_t> data;
-  BufferT() {
-  }
+  BufferT() {}
 };
 
 struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   typedef BufferT NativeTableType;
-  enum {
-    VT_DATA = 4
-  };
+  enum { VT_DATA = 4 };
   const flatbuffers::Vector<uint8_t> *data() const {
     return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_DATA);
   }
   bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_DATA) &&
-           verifier.Verify(data()) &&
-           verifier.EndTable();
-  }
-  BufferT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(BufferT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<Buffer> Pack(flatbuffers::FlatBufferBuilder &_fbb, const BufferT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_DATA) &&
+           verifier.Verify(data()) && verifier.EndTable();
+  }
+  BufferT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(BufferT *_o, const flatbuffers::resolver_function_t *_resolver =
+                                 nullptr) const;
+  static flatbuffers::Offset<Buffer> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
 struct BufferBuilder {
@@ -2977,8 +4261,7 @@ struct BufferBuilder {
   void add_data(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data) {
     fbb_.AddOffset(Buffer::VT_DATA, data);
   }
-  explicit BufferBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+  explicit BufferBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
   BufferBuilder &operator=(const BufferBuilder &);
@@ -3000,12 +4283,13 @@ inline flatbuffers::Offset<Buffer> CreateBuffer(
 inline flatbuffers::Offset<Buffer> CreateBufferDirect(
     flatbuffers::FlatBufferBuilder &_fbb,
     const std::vector<uint8_t> *data = nullptr) {
-  return tflite::CreateBuffer(
-      _fbb,
-      data ? _fbb.CreateVector<uint8_t>(*data) : 0);
+  return tflite::CreateBuffer(_fbb,
+                              data ? _fbb.CreateVector<uint8_t>(*data) : 0);
 }
 
-flatbuffers::Offset<Buffer> CreateBuffer(flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<Buffer> CreateBuffer(
+    flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
 struct ModelT : public flatbuffers::NativeTable {
   typedef Model TableType;
@@ -3014,9 +4298,7 @@ struct ModelT : public flatbuffers::NativeTable {
   std::vector<std::unique_ptr<SubGraphT>> subgraphs;
   std::string description;
   std::vector<std::unique_ptr<BufferT>> buffers;
-  ModelT()
-      : version(0) {
-  }
+  ModelT() : version(0) {}
 };
 
 struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
@@ -3028,20 +4310,24 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
     VT_DESCRIPTION = 10,
     VT_BUFFERS = 12
   };
-  uint32_t version() const {
-    return GetField<uint32_t>(VT_VERSION, 0);
-  }
-  const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *operator_codes() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *>(VT_OPERATOR_CODES);
+  uint32_t version() const { return GetField<uint32_t>(VT_VERSION, 0); }
+  const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *operator_codes()
+      const {
+    return GetPointer<
+        const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *>(
+        VT_OPERATOR_CODES);
   }
   const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *subgraphs() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *>(VT_SUBGRAPHS);
+    return GetPointer<
+        const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *>(
+        VT_SUBGRAPHS);
   }
   const flatbuffers::String *description() const {
     return GetPointer<const flatbuffers::String *>(VT_DESCRIPTION);
   }
   const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *buffers() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *>(VT_BUFFERS);
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *>(
+        VT_BUFFERS);
   }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
@@ -3054,14 +4340,16 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
            verifier.VerifyVectorOfTables(subgraphs()) &&
            VerifyOffset(verifier, VT_DESCRIPTION) &&
            verifier.Verify(description()) &&
-           VerifyOffset(verifier, VT_BUFFERS) &&
-           verifier.Verify(buffers()) &&
-           verifier.VerifyVectorOfTables(buffers()) &&
-           verifier.EndTable();
-  }
-  ModelT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(ModelT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<Model> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ModelT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+           VerifyOffset(verifier, VT_BUFFERS) && verifier.Verify(buffers()) &&
+           verifier.VerifyVectorOfTables(buffers()) && verifier.EndTable();
+  }
+  ModelT *UnPack(
+      const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ModelT *_o, const flatbuffers::resolver_function_t *_resolver =
+                                nullptr) const;
+  static flatbuffers::Offset<Model> Pack(
+      flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o,
+      const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 };
 
 struct ModelBuilder {
@@ -3070,20 +4358,26 @@ struct ModelBuilder {
   void add_version(uint32_t version) {
     fbb_.AddElement<uint32_t>(Model::VT_VERSION, version, 0);
   }
-  void add_operator_codes(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes) {
+  void add_operator_codes(
+      flatbuffers::Offset<
+          flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>>
+          operator_codes) {
     fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes);
   }
-  void add_subgraphs(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs) {
+  void add_subgraphs(
+      flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>>
+          subgraphs) {
     fbb_.AddOffset(Model::VT_SUBGRAPHS, subgraphs);
   }
   void add_description(flatbuffers::Offset<flatbuffers::String> description) {
     fbb_.AddOffset(Model::VT_DESCRIPTION, description);
   }
-  void add_buffers(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers) {
+  void add_buffers(
+      flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>>
+          buffers) {
     fbb_.AddOffset(Model::VT_BUFFERS, buffers);
   }
-  explicit ModelBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
+  explicit ModelBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) {
     start_ = fbb_.StartTable();
   }
   ModelBuilder &operator=(const ModelBuilder &);
@@ -3095,12 +4389,14 @@ struct ModelBuilder {
 };
 
 inline flatbuffers::Offset<Model> CreateModel(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    uint32_t version = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs = 0,
+    flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>>
+        operator_codes = 0,
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>>
+        subgraphs = 0,
     flatbuffers::Offset<flatbuffers::String> description = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers = 0) {
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>>
+        buffers = 0) {
   ModelBuilder builder_(_fbb);
   builder_.add_buffers(buffers);
   builder_.add_description(description);
@@ -3111,890 +4407,2019 @@ inline flatbuffers::Offset<Model> CreateModel(
 }
 
 inline flatbuffers::Offset<Model> CreateModelDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    uint32_t version = 0,
-    const std::vector<flatbuffers::Offset<OperatorCode>> *operator_codes = nullptr,
+    flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
+    const std::vector<flatbuffers::Offset<OperatorCode>> *operator_codes =
+        nullptr,
     const std::vector<flatbuffers::Offset<SubGraph>> *subgraphs = nullptr,
     const char *description = nullptr,
     const std::vector<flatbuffers::Offset<Buffer>> *buffers = nullptr) {
   return tflite::CreateModel(
-      _fbb,
-      version,
-      operator_codes ? _fbb.CreateVector<flatbuffers::Offset<OperatorCode>>(*operator_codes) : 0,
-      subgraphs ? _fbb.CreateVector<flatbuffers::Offset<SubGraph>>(*subgraphs) : 0,
+      _fbb, version,
+      operator_codes ? _fbb.CreateVector<flatbuffers::Offset<OperatorCode>>(
+                           *operator_codes)
+                     : 0,
+      subgraphs ? _fbb.CreateVector<flatbuffers::Offset<SubGraph>>(*subgraphs)
+                : 0,
       description ? _fbb.CreateString(description) : 0,
       buffers ? _fbb.CreateVector<flatbuffers::Offset<Buffer>>(*buffers) : 0);
 }
 
-flatbuffers::Offset<Model> CreateModel(flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+flatbuffers::Offset<Model> CreateModel(
+    flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
-inline QuantizationParametersT *QuantizationParameters::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+inline QuantizationParametersT *QuantizationParameters::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new QuantizationParametersT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void QuantizationParameters::UnPackTo(QuantizationParametersT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void QuantizationParameters::UnPackTo(
+    QuantizationParametersT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = min(); if (_e) { _o->min.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->min[_i] = _e->Get(_i); } } };
-  { auto _e = max(); if (_e) { _o->max.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->max[_i] = _e->Get(_i); } } };
-  { auto _e = scale(); if (_e) { _o->scale.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->scale[_i] = _e->Get(_i); } } };
-  { auto _e = zero_point(); if (_e) { _o->zero_point.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->zero_point[_i] = _e->Get(_i); } } };
+  {
+    auto _e = min();
+    if (_e) {
+      _o->min.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->min[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = max();
+    if (_e) {
+      _o->max.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->max[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = scale();
+    if (_e) {
+      _o->scale.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->scale[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = zero_point();
+    if (_e) {
+      _o->zero_point.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->zero_point[_i] = _e->Get(_i);
+      }
+    }
+  };
 }
 
-inline flatbuffers::Offset<QuantizationParameters> QuantizationParameters::Pack(flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<QuantizationParameters> QuantizationParameters::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   return CreateQuantizationParameters(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters(
+    flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const QuantizationParametersT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const QuantizationParametersT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
   auto _min = _o->min.size() ? _fbb.CreateVector(_o->min) : 0;
   auto _max = _o->max.size() ? _fbb.CreateVector(_o->max) : 0;
   auto _scale = _o->scale.size() ? _fbb.CreateVector(_o->scale) : 0;
-  auto _zero_point = _o->zero_point.size() ? _fbb.CreateVector(_o->zero_point) : 0;
-  return tflite::CreateQuantizationParameters(
-      _fbb,
-      _min,
-      _max,
-      _scale,
-      _zero_point);
+  auto _zero_point =
+      _o->zero_point.size() ? _fbb.CreateVector(_o->zero_point) : 0;
+  return tflite::CreateQuantizationParameters(_fbb, _min, _max, _scale,
+                                              _zero_point);
 }
 
-inline TensorT *Tensor::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+inline TensorT *Tensor::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new TensorT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void Tensor::UnPackTo(TensorT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void Tensor::UnPackTo(
+    TensorT *_o, const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = shape(); if (_e) { _o->shape.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->shape[_i] = _e->Get(_i); } } };
-  { auto _e = type(); _o->type = _e; };
-  { auto _e = buffer(); _o->buffer = _e; };
-  { auto _e = name(); if (_e) _o->name = _e->str(); };
-  { auto _e = quantization(); if (_e) _o->quantization = std::unique_ptr<QuantizationParametersT>(_e->UnPack(_resolver)); };
+  {
+    auto _e = shape();
+    if (_e) {
+      _o->shape.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->shape[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = type();
+    _o->type = _e;
+  };
+  {
+    auto _e = buffer();
+    _o->buffer = _e;
+  };
+  {
+    auto _e = name();
+    if (_e) _o->name = _e->str();
+  };
+  {
+    auto _e = quantization();
+    if (_e)
+      _o->quantization =
+          std::unique_ptr<QuantizationParametersT>(_e->UnPack(_resolver));
+  };
 }
 
-inline flatbuffers::Offset<Tensor> Tensor::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TensorT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<Tensor> Tensor::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   return CreateTensor(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<Tensor> CreateTensor(flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<Tensor> CreateTensor(
+    flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TensorT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const TensorT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
   auto _shape = _o->shape.size() ? _fbb.CreateVector(_o->shape) : 0;
   auto _type = _o->type;
   auto _buffer = _o->buffer;
   auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name);
-  auto _quantization = _o->quantization ? CreateQuantizationParameters(_fbb, _o->quantization.get(), _rehasher) : 0;
-  return tflite::CreateTensor(
-      _fbb,
-      _shape,
-      _type,
-      _buffer,
-      _name,
-      _quantization);
+  auto _quantization = _o->quantization
+                           ? CreateQuantizationParameters(
+                                 _fbb, _o->quantization.get(), _rehasher)
+                           : 0;
+  return tflite::CreateTensor(_fbb, _shape, _type, _buffer, _name,
+                              _quantization);
 }
 
-inline Conv2DOptionsT *Conv2DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+inline Conv2DOptionsT *Conv2DOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new Conv2DOptionsT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void Conv2DOptions::UnPackTo(Conv2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void Conv2DOptions::UnPackTo(
+    Conv2DOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = padding(); _o->padding = _e; };
-  { auto _e = stride_w(); _o->stride_w = _e; };
-  { auto _e = stride_h(); _o->stride_h = _e; };
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; };
+  {
+    auto _e = padding();
+    _o->padding = _e;
+  };
+  {
+    auto _e = stride_w();
+    _o->stride_w = _e;
+  };
+  {
+    auto _e = stride_h();
+    _o->stride_h = _e;
+  };
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  };
 }
 
-inline flatbuffers::Offset<Conv2DOptions> Conv2DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<Conv2DOptions> Conv2DOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   return CreateConv2DOptions(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<Conv2DOptions> CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<Conv2DOptions> CreateConv2DOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Conv2DOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const Conv2DOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
   auto _padding = _o->padding;
   auto _stride_w = _o->stride_w;
   auto _stride_h = _o->stride_h;
   auto _fused_activation_function = _o->fused_activation_function;
-  return tflite::CreateConv2DOptions(
-      _fbb,
-      _padding,
-      _stride_w,
-      _stride_h,
-      _fused_activation_function);
+  return tflite::CreateConv2DOptions(_fbb, _padding, _stride_w, _stride_h,
+                                     _fused_activation_function);
 }
 
-inline Pool2DOptionsT *Pool2DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+inline Pool2DOptionsT *Pool2DOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new Pool2DOptionsT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void Pool2DOptions::UnPackTo(Pool2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void Pool2DOptions::UnPackTo(
+    Pool2DOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = padding(); _o->padding = _e; };
-  { auto _e = stride_w(); _o->stride_w = _e; };
-  { auto _e = stride_h(); _o->stride_h = _e; };
-  { auto _e = filter_width(); _o->filter_width = _e; };
-  { auto _e = filter_height(); _o->filter_height = _e; };
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; };
+  {
+    auto _e = padding();
+    _o->padding = _e;
+  };
+  {
+    auto _e = stride_w();
+    _o->stride_w = _e;
+  };
+  {
+    auto _e = stride_h();
+    _o->stride_h = _e;
+  };
+  {
+    auto _e = filter_width();
+    _o->filter_width = _e;
+  };
+  {
+    auto _e = filter_height();
+    _o->filter_height = _e;
+  };
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  };
 }
 
-inline flatbuffers::Offset<Pool2DOptions> Pool2DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<Pool2DOptions> Pool2DOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   return CreatePool2DOptions(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Pool2DOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const Pool2DOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
   auto _padding = _o->padding;
   auto _stride_w = _o->stride_w;
   auto _stride_h = _o->stride_h;
   auto _filter_width = _o->filter_width;
   auto _filter_height = _o->filter_height;
   auto _fused_activation_function = _o->fused_activation_function;
-  return tflite::CreatePool2DOptions(
-      _fbb,
-      _padding,
-      _stride_w,
-      _stride_h,
-      _filter_width,
-      _filter_height,
-      _fused_activation_function);
+  return tflite::CreatePool2DOptions(_fbb, _padding, _stride_w, _stride_h,
+                                     _filter_width, _filter_height,
+                                     _fused_activation_function);
 }
 
-inline DepthwiseConv2DOptionsT *DepthwiseConv2DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+inline DepthwiseConv2DOptionsT *DepthwiseConv2DOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new DepthwiseConv2DOptionsT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void DepthwiseConv2DOptions::UnPackTo(DepthwiseConv2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void DepthwiseConv2DOptions::UnPackTo(
+    DepthwiseConv2DOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = padding(); _o->padding = _e; };
-  { auto _e = stride_w(); _o->stride_w = _e; };
-  { auto _e = stride_h(); _o->stride_h = _e; };
-  { auto _e = depth_multiplier(); _o->depth_multiplier = _e; };
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; };
+  {
+    auto _e = padding();
+    _o->padding = _e;
+  };
+  {
+    auto _e = stride_w();
+    _o->stride_w = _e;
+  };
+  {
+    auto _e = stride_h();
+    _o->stride_h = _e;
+  };
+  {
+    auto _e = depth_multiplier();
+    _o->depth_multiplier = _e;
+  };
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  };
 }
 
-inline flatbuffers::Offset<DepthwiseConv2DOptions> DepthwiseConv2DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<DepthwiseConv2DOptions> DepthwiseConv2DOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   return CreateDepthwiseConv2DOptions(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DepthwiseConv2DOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const DepthwiseConv2DOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
   auto _padding = _o->padding;
   auto _stride_w = _o->stride_w;
   auto _stride_h = _o->stride_h;
   auto _depth_multiplier = _o->depth_multiplier;
   auto _fused_activation_function = _o->fused_activation_function;
-  return tflite::CreateDepthwiseConv2DOptions(
-      _fbb,
-      _padding,
-      _stride_w,
-      _stride_h,
-      _depth_multiplier,
-      _fused_activation_function);
+  return tflite::CreateDepthwiseConv2DOptions(_fbb, _padding, _stride_w,
+                                              _stride_h, _depth_multiplier,
+                                              _fused_activation_function);
 }
 
-inline ConcatEmbeddingsOptionsT *ConcatEmbeddingsOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+inline ConcatEmbeddingsOptionsT *ConcatEmbeddingsOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new ConcatEmbeddingsOptionsT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void ConcatEmbeddingsOptions::UnPackTo(ConcatEmbeddingsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void ConcatEmbeddingsOptions::UnPackTo(
+    ConcatEmbeddingsOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = num_channels(); _o->num_channels = _e; };
-  { auto _e = num_columns_per_channel(); if (_e) { _o->num_columns_per_channel.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->num_columns_per_channel[_i] = _e->Get(_i); } } };
-  { auto _e = embedding_dim_per_channel(); if (_e) { _o->embedding_dim_per_channel.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->embedding_dim_per_channel[_i] = _e->Get(_i); } } };
+  {
+    auto _e = num_channels();
+    _o->num_channels = _e;
+  };
+  {
+    auto _e = num_columns_per_channel();
+    if (_e) {
+      _o->num_columns_per_channel.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->num_columns_per_channel[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = embedding_dim_per_channel();
+    if (_e) {
+      _o->embedding_dim_per_channel.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->embedding_dim_per_channel[_i] = _e->Get(_i);
+      }
+    }
+  };
 }
 
-inline flatbuffers::Offset<ConcatEmbeddingsOptions> ConcatEmbeddingsOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<ConcatEmbeddingsOptions>
+ConcatEmbeddingsOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   return CreateConcatEmbeddingsOptions(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions(flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<ConcatEmbeddingsOptions>
+CreateConcatEmbeddingsOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ConcatEmbeddingsOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ConcatEmbeddingsOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
   auto _num_channels = _o->num_channels;
-  auto _num_columns_per_channel = _o->num_columns_per_channel.size() ? _fbb.CreateVector(_o->num_columns_per_channel) : 0;
-  auto _embedding_dim_per_channel = _o->embedding_dim_per_channel.size() ? _fbb.CreateVector(_o->embedding_dim_per_channel) : 0;
-  return tflite::CreateConcatEmbeddingsOptions(
-      _fbb,
-      _num_channels,
-      _num_columns_per_channel,
-      _embedding_dim_per_channel);
-}
-
-inline LSHProjectionOptionsT *LSHProjectionOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _num_columns_per_channel =
+      _o->num_columns_per_channel.size()
+          ? _fbb.CreateVector(_o->num_columns_per_channel)
+          : 0;
+  auto _embedding_dim_per_channel =
+      _o->embedding_dim_per_channel.size()
+          ? _fbb.CreateVector(_o->embedding_dim_per_channel)
+          : 0;
+  return tflite::CreateConcatEmbeddingsOptions(_fbb, _num_channels,
+                                               _num_columns_per_channel,
+                                               _embedding_dim_per_channel);
+}
+
+inline LSHProjectionOptionsT *LSHProjectionOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new LSHProjectionOptionsT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void LSHProjectionOptions::UnPackTo(LSHProjectionOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void LSHProjectionOptions::UnPackTo(
+    LSHProjectionOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = type(); _o->type = _e; };
+  {
+    auto _e = type();
+    _o->type = _e;
+  };
 }
 
-inline flatbuffers::Offset<LSHProjectionOptions> LSHProjectionOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<LSHProjectionOptions> LSHProjectionOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   return CreateLSHProjectionOptions(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<LSHProjectionOptions> CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<LSHProjectionOptions> CreateLSHProjectionOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LSHProjectionOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const LSHProjectionOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
   auto _type = _o->type;
-  return tflite::CreateLSHProjectionOptions(
-      _fbb,
-      _type);
+  return tflite::CreateLSHProjectionOptions(_fbb, _type);
 }
 
-inline SVDFOptionsT *SVDFOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+inline SVDFOptionsT *SVDFOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new SVDFOptionsT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void SVDFOptions::UnPackTo(SVDFOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void SVDFOptions::UnPackTo(
+    SVDFOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = rank(); _o->rank = _e; };
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; };
+  {
+    auto _e = rank();
+    _o->rank = _e;
+  };
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  };
 }
 
-inline flatbuffers::Offset<SVDFOptions> SVDFOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<SVDFOptions> SVDFOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   return CreateSVDFOptions(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<SVDFOptions> CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<SVDFOptions> CreateSVDFOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SVDFOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SVDFOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
   auto _rank = _o->rank;
   auto _fused_activation_function = _o->fused_activation_function;
-  return tflite::CreateSVDFOptions(
-      _fbb,
-      _rank,
-      _fused_activation_function);
+  return tflite::CreateSVDFOptions(_fbb, _rank, _fused_activation_function);
 }
 
-inline RNNOptionsT *RNNOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+inline RNNOptionsT *RNNOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new RNNOptionsT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void RNNOptions::UnPackTo(RNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void RNNOptions::UnPackTo(
+    RNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; };
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  };
 }
 
-inline flatbuffers::Offset<RNNOptions> RNNOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<RNNOptions> RNNOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   return CreateRNNOptions(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<RNNOptions> CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<RNNOptions> CreateRNNOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const RNNOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const RNNOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
   auto _fused_activation_function = _o->fused_activation_function;
-  return tflite::CreateRNNOptions(
-      _fbb,
-      _fused_activation_function);
+  return tflite::CreateRNNOptions(_fbb, _fused_activation_function);
+}
+
+inline SequenceRNNOptionsT *SequenceRNNOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new SequenceRNNOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void SequenceRNNOptions::UnPackTo(
+    SequenceRNNOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = time_major();
+    _o->time_major = _e;
+  }
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  }
+}
+
+inline flatbuffers::Offset<SequenceRNNOptions> SequenceRNNOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSequenceRNNOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SequenceRNNOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _time_major = _o->time_major;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return tflite::CreateSequenceRNNOptions(_fbb, _time_major,
+                                          _fused_activation_function);
 }
 
-inline FullyConnectedOptionsT *FullyConnectedOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+inline FullyConnectedOptionsT *FullyConnectedOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new FullyConnectedOptionsT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void FullyConnectedOptions::UnPackTo(FullyConnectedOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void FullyConnectedOptions::UnPackTo(
+    FullyConnectedOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; };
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  };
 }
 
-inline flatbuffers::Offset<FullyConnectedOptions> FullyConnectedOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<FullyConnectedOptions> FullyConnectedOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   return CreateFullyConnectedOptions(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const FullyConnectedOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const FullyConnectedOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
   auto _fused_activation_function = _o->fused_activation_function;
-  return tflite::CreateFullyConnectedOptions(
-      _fbb,
-      _fused_activation_function);
+  return tflite::CreateFullyConnectedOptions(_fbb, _fused_activation_function);
 }
 
-inline SoftmaxOptionsT *SoftmaxOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+inline SoftmaxOptionsT *SoftmaxOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new SoftmaxOptionsT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void SoftmaxOptions::UnPackTo(SoftmaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void SoftmaxOptions::UnPackTo(
+    SoftmaxOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = beta(); _o->beta = _e; };
+  {
+    auto _e = beta();
+    _o->beta = _e;
+  };
 }
 
-inline flatbuffers::Offset<SoftmaxOptions> SoftmaxOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<SoftmaxOptions> SoftmaxOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   return CreateSoftmaxOptions(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<SoftmaxOptions> CreateSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<SoftmaxOptions> CreateSoftmaxOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SoftmaxOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SoftmaxOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
   auto _beta = _o->beta;
-  return tflite::CreateSoftmaxOptions(
-      _fbb,
-      _beta);
+  return tflite::CreateSoftmaxOptions(_fbb, _beta);
 }
 
-inline ConcatenationOptionsT *ConcatenationOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+inline ConcatenationOptionsT *ConcatenationOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new ConcatenationOptionsT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void ConcatenationOptions::UnPackTo(ConcatenationOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void ConcatenationOptions::UnPackTo(
+    ConcatenationOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = axis(); _o->axis = _e; };
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; };
+  {
+    auto _e = axis();
+    _o->axis = _e;
+  };
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  };
 }
 
-inline flatbuffers::Offset<ConcatenationOptions> ConcatenationOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<ConcatenationOptions> ConcatenationOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   return CreateConcatenationOptions(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ConcatenationOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ConcatenationOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
   auto _axis = _o->axis;
   auto _fused_activation_function = _o->fused_activation_function;
-  return tflite::CreateConcatenationOptions(
-      _fbb,
-      _axis,
-      _fused_activation_function);
+  return tflite::CreateConcatenationOptions(_fbb, _axis,
+                                            _fused_activation_function);
 }
 
-inline AddOptionsT *AddOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+inline AddOptionsT *AddOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new AddOptionsT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void AddOptions::UnPackTo(AddOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void AddOptions::UnPackTo(
+    AddOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  };
+}
+
+inline flatbuffers::Offset<AddOptions> AddOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateAddOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<AddOptions> CreateAddOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const AddOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return tflite::CreateAddOptions(_fbb, _fused_activation_function);
+}
+
+inline MulOptionsT *MulOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new MulOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void MulOptions::UnPackTo(
+    MulOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  };
+}
+
+inline flatbuffers::Offset<MulOptions> MulOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateMulOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<MulOptions> CreateMulOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const MulOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return tflite::CreateMulOptions(_fbb, _fused_activation_function);
+}
+
+inline L2NormOptionsT *L2NormOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new L2NormOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void L2NormOptions::UnPackTo(
+    L2NormOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  };
+}
+
+inline flatbuffers::Offset<L2NormOptions> L2NormOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateL2NormOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<L2NormOptions> CreateL2NormOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const L2NormOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return tflite::CreateL2NormOptions(_fbb, _fused_activation_function);
+}
+
+inline LocalResponseNormalizationOptionsT *
+LocalResponseNormalizationOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new LocalResponseNormalizationOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void LocalResponseNormalizationOptions::UnPackTo(
+    LocalResponseNormalizationOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = radius();
+    _o->radius = _e;
+  };
+  {
+    auto _e = bias();
+    _o->bias = _e;
+  };
+  {
+    auto _e = alpha();
+    _o->alpha = _e;
+  };
+  {
+    auto _e = beta();
+    _o->beta = _e;
+  };
+}
+
+inline flatbuffers::Offset<LocalResponseNormalizationOptions>
+LocalResponseNormalizationOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const LocalResponseNormalizationOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateLocalResponseNormalizationOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<LocalResponseNormalizationOptions>
+CreateLocalResponseNormalizationOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const LocalResponseNormalizationOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const LocalResponseNormalizationOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _radius = _o->radius;
+  auto _bias = _o->bias;
+  auto _alpha = _o->alpha;
+  auto _beta = _o->beta;
+  return tflite::CreateLocalResponseNormalizationOptions(_fbb, _radius, _bias,
+                                                         _alpha, _beta);
+}
+
+inline LSTMOptionsT *LSTMOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new LSTMOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void LSTMOptions::UnPackTo(
+    LSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  };
+  {
+    auto _e = cell_clip();
+    _o->cell_clip = _e;
+  };
+  {
+    auto _e = proj_clip();
+    _o->proj_clip = _e;
+  };
+}
+
+inline flatbuffers::Offset<LSTMOptions> LSTMOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateLSTMOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const LSTMOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _cell_clip = _o->cell_clip;
+  auto _proj_clip = _o->proj_clip;
+  return tflite::CreateLSTMOptions(_fbb, _fused_activation_function, _cell_clip,
+                                   _proj_clip);
+}
+
+inline ResizeBilinearOptionsT *ResizeBilinearOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new ResizeBilinearOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void ResizeBilinearOptions::UnPackTo(
+    ResizeBilinearOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = new_height();
+    _o->new_height = _e;
+  };
+  {
+    auto _e = new_width();
+    _o->new_width = _e;
+  };
+}
+
+inline flatbuffers::Offset<ResizeBilinearOptions> ResizeBilinearOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateResizeBilinearOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ResizeBilinearOptions> CreateResizeBilinearOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ResizeBilinearOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _new_height = _o->new_height;
+  auto _new_width = _o->new_width;
+  return tflite::CreateResizeBilinearOptions(_fbb, _new_height, _new_width);
+}
+
+inline CallOptionsT *CallOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new CallOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void CallOptions::UnPackTo(
+    CallOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = subgraph();
+    _o->subgraph = _e;
+  };
+}
+
+inline flatbuffers::Offset<CallOptions> CallOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateCallOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<CallOptions> CreateCallOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const CallOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _subgraph = _o->subgraph;
+  return tflite::CreateCallOptions(_fbb, _subgraph);
+}
+
+inline PadOptionsT *PadOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new PadOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void PadOptions::UnPackTo(
+    PadOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = before_padding();
+    if (_e) {
+      _o->before_padding.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->before_padding[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = after_padding();
+    if (_e) {
+      _o->after_padding.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->after_padding[_i] = _e->Get(_i);
+      }
+    }
+  };
+}
+
+inline flatbuffers::Offset<PadOptions> PadOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreatePadOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<PadOptions> CreatePadOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const PadOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _before_padding =
+      _o->before_padding.size() ? _fbb.CreateVector(_o->before_padding) : 0;
+  auto _after_padding =
+      _o->after_padding.size() ? _fbb.CreateVector(_o->after_padding) : 0;
+  return tflite::CreatePadOptions(_fbb, _before_padding, _after_padding);
+}
+
+inline ReshapeOptionsT *ReshapeOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new ReshapeOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void ReshapeOptions::UnPackTo(
+    ReshapeOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  {
+    auto _e = new_shape();
+    if (_e) {
+      _o->new_shape.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->new_shape[_i] = _e->Get(_i);
+      }
+    }
+  };
+}
+
+inline flatbuffers::Offset<ReshapeOptions> ReshapeOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateReshapeOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ReshapeOptions> CreateReshapeOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ReshapeOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _new_shape = _o->new_shape.size() ? _fbb.CreateVector(_o->new_shape) : 0;
+  return tflite::CreateReshapeOptions(_fbb, _new_shape);
+}
+
+inline SpaceToBatchNDOptionsT *SpaceToBatchNDOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new SpaceToBatchNDOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void SpaceToBatchNDOptions::UnPackTo(
+    SpaceToBatchNDOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; };
+  {
+    auto _e = block_shape();
+    if (_e) {
+      _o->block_shape.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->block_shape[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = before_paddings();
+    if (_e) {
+      _o->before_paddings.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->before_paddings[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = after_paddings();
+    if (_e) {
+      _o->after_paddings.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->after_paddings[_i] = _e->Get(_i);
+      }
+    }
+  };
 }
 
-inline flatbuffers::Offset<AddOptions> AddOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateAddOptions(_fbb, _o, _rehasher);
+inline flatbuffers::Offset<SpaceToBatchNDOptions> SpaceToBatchNDOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSpaceToBatchNDOptions(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<AddOptions> CreateAddOptions(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<SpaceToBatchNDOptions> CreateSpaceToBatchNDOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const AddOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _fused_activation_function = _o->fused_activation_function;
-  return tflite::CreateAddOptions(
-      _fbb,
-      _fused_activation_function);
-}
-
-inline MulOptionsT *MulOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new MulOptionsT();
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SpaceToBatchNDOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _block_shape =
+      _o->block_shape.size() ? _fbb.CreateVector(_o->block_shape) : 0;
+  auto _before_paddings =
+      _o->before_paddings.size() ? _fbb.CreateVector(_o->before_paddings) : 0;
+  auto _after_paddings =
+      _o->after_paddings.size() ? _fbb.CreateVector(_o->after_paddings) : 0;
+  return tflite::CreateSpaceToBatchNDOptions(_fbb, _block_shape,
+                                             _before_paddings, _after_paddings);
+}
+
+inline BatchToSpaceNDOptionsT *BatchToSpaceNDOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new BatchToSpaceNDOptionsT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void MulOptions::UnPackTo(MulOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void BatchToSpaceNDOptions::UnPackTo(
+    BatchToSpaceNDOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; };
+  {
+    auto _e = block_shape();
+    if (_e) {
+      _o->block_shape.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->block_shape[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = before_crops();
+    if (_e) {
+      _o->before_crops.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->before_crops[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = after_crops();
+    if (_e) {
+      _o->after_crops.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->after_crops[_i] = _e->Get(_i);
+      }
+    }
+  };
 }
 
-inline flatbuffers::Offset<MulOptions> MulOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateMulOptions(_fbb, _o, _rehasher);
+inline flatbuffers::Offset<BatchToSpaceNDOptions> BatchToSpaceNDOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateBatchToSpaceNDOptions(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<MulOptions> CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<BatchToSpaceNDOptions> CreateBatchToSpaceNDOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const MulOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _fused_activation_function = _o->fused_activation_function;
-  return tflite::CreateMulOptions(
-      _fbb,
-      _fused_activation_function);
-}
-
-inline L2NormOptionsT *L2NormOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new L2NormOptionsT();
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const BatchToSpaceNDOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _block_shape =
+      _o->block_shape.size() ? _fbb.CreateVector(_o->block_shape) : 0;
+  auto _before_crops =
+      _o->before_crops.size() ? _fbb.CreateVector(_o->before_crops) : 0;
+  auto _after_crops =
+      _o->after_crops.size() ? _fbb.CreateVector(_o->after_crops) : 0;
+  return tflite::CreateBatchToSpaceNDOptions(_fbb, _block_shape, _before_crops,
+                                             _after_crops);
+}
+
+inline SkipGramOptionsT *SkipGramOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new SkipGramOptionsT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void L2NormOptions::UnPackTo(L2NormOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void SkipGramOptions::UnPackTo(
+    SkipGramOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; };
+  {
+    auto _e = ngram_size();
+    _o->ngram_size = _e;
+  };
+  {
+    auto _e = max_skip_size();
+    _o->max_skip_size = _e;
+  };
+  {
+    auto _e = include_all_ngrams();
+    _o->include_all_ngrams = _e;
+  };
 }
 
-inline flatbuffers::Offset<L2NormOptions> L2NormOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateL2NormOptions(_fbb, _o, _rehasher);
+inline flatbuffers::Offset<SkipGramOptions> SkipGramOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSkipGramOptions(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<L2NormOptions> CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<SkipGramOptions> CreateSkipGramOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const L2NormOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _fused_activation_function = _o->fused_activation_function;
-  return tflite::CreateL2NormOptions(
-      _fbb,
-      _fused_activation_function);
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SkipGramOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _ngram_size = _o->ngram_size;
+  auto _max_skip_size = _o->max_skip_size;
+  auto _include_all_ngrams = _o->include_all_ngrams;
+  return tflite::CreateSkipGramOptions(_fbb, _ngram_size, _max_skip_size,
+                                       _include_all_ngrams);
 }
 
-inline LocalResponseNormalizationOptionsT *LocalResponseNormalizationOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new LocalResponseNormalizationOptionsT();
+inline SpaceToDepthOptionsT *SpaceToDepthOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new SpaceToDepthOptionsT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void LocalResponseNormalizationOptions::UnPackTo(LocalResponseNormalizationOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void SpaceToDepthOptions::UnPackTo(
+    SpaceToDepthOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = radius(); _o->radius = _e; };
-  { auto _e = bias(); _o->bias = _e; };
-  { auto _e = alpha(); _o->alpha = _e; };
-  { auto _e = beta(); _o->beta = _e; };
+  {
+    auto _e = block_size();
+    _o->block_size = _e;
+  };
 }
 
-inline flatbuffers::Offset<LocalResponseNormalizationOptions> LocalResponseNormalizationOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LocalResponseNormalizationOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateLocalResponseNormalizationOptions(_fbb, _o, _rehasher);
+inline flatbuffers::Offset<SpaceToDepthOptions> SpaceToDepthOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSpaceToDepthOptions(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<LocalResponseNormalizationOptions> CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb, const LocalResponseNormalizationOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<SpaceToDepthOptions> CreateSpaceToDepthOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LocalResponseNormalizationOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _radius = _o->radius;
-  auto _bias = _o->bias;
-  auto _alpha = _o->alpha;
-  auto _beta = _o->beta;
-  return tflite::CreateLocalResponseNormalizationOptions(
-      _fbb,
-      _radius,
-      _bias,
-      _alpha,
-      _beta);
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SpaceToDepthOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _block_size = _o->block_size;
+  return tflite::CreateSpaceToDepthOptions(_fbb, _block_size);
 }
 
-inline LSTMOptionsT *LSTMOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new LSTMOptionsT();
+inline SubOptionsT *SubOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new SubOptionsT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void LSTMOptions::UnPackTo(LSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void SubOptions::UnPackTo(
+    SubOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; };
-  { auto _e = cell_clip(); _o->cell_clip = _e; };
-  { auto _e = proj_clip(); _o->proj_clip = _e; };
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  };
 }
 
-inline flatbuffers::Offset<LSTMOptions> LSTMOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateLSTMOptions(_fbb, _o, _rehasher);
+inline flatbuffers::Offset<SubOptions> SubOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSubOptions(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<SubOptions> CreateSubOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LSTMOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SubOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
   auto _fused_activation_function = _o->fused_activation_function;
-  auto _cell_clip = _o->cell_clip;
-  auto _proj_clip = _o->proj_clip;
-  return tflite::CreateLSTMOptions(
-      _fbb,
-      _fused_activation_function,
-      _cell_clip,
-      _proj_clip);
+  return tflite::CreateSubOptions(_fbb, _fused_activation_function);
 }
 
-inline ResizeBilinearOptionsT *ResizeBilinearOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new ResizeBilinearOptionsT();
+inline DivOptionsT *DivOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new DivOptionsT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void ResizeBilinearOptions::UnPackTo(ResizeBilinearOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void DivOptions::UnPackTo(
+    DivOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = new_height(); _o->new_height = _e; };
-  { auto _e = new_width(); _o->new_width = _e; };
+  {
+    auto _e = fused_activation_function();
+    _o->fused_activation_function = _e;
+  };
 }
 
-inline flatbuffers::Offset<ResizeBilinearOptions> ResizeBilinearOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateResizeBilinearOptions(_fbb, _o, _rehasher);
+inline flatbuffers::Offset<DivOptions> DivOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateDivOptions(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<ResizeBilinearOptions> CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<DivOptions> CreateDivOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ResizeBilinearOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _new_height = _o->new_height;
-  auto _new_width = _o->new_width;
-  return tflite::CreateResizeBilinearOptions(
-      _fbb,
-      _new_height,
-      _new_width);
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const DivOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return tflite::CreateDivOptions(_fbb, _fused_activation_function);
 }
 
-inline CallOptionsT *CallOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new CallOptionsT();
+inline EmbeddingLookupSparseOptionsT *EmbeddingLookupSparseOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new EmbeddingLookupSparseOptionsT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void CallOptions::UnPackTo(CallOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void EmbeddingLookupSparseOptions::UnPackTo(
+    EmbeddingLookupSparseOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = subgraph(); _o->subgraph = _e; };
+  {
+    auto _e = combiner();
+    _o->combiner = _e;
+  };
 }
 
-inline flatbuffers::Offset<CallOptions> CallOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateCallOptions(_fbb, _o, _rehasher);
+inline flatbuffers::Offset<EmbeddingLookupSparseOptions>
+EmbeddingLookupSparseOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const EmbeddingLookupSparseOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateEmbeddingLookupSparseOptions(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<CallOptions> CreateCallOptions(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<EmbeddingLookupSparseOptions>
+CreateEmbeddingLookupSparseOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const EmbeddingLookupSparseOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CallOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _subgraph = _o->subgraph;
-  return tflite::CreateCallOptions(
-      _fbb,
-      _subgraph);
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const EmbeddingLookupSparseOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _combiner = _o->combiner;
+  return tflite::CreateEmbeddingLookupSparseOptions(_fbb, _combiner);
 }
 
-inline ReshapeOptionsT *ReshapeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new ReshapeOptionsT();
+inline GatherOptionsT *GatherOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new GatherOptionsT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void ReshapeOptions::UnPackTo(ReshapeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void GatherOptions::UnPackTo(
+    GatherOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = new_shape(); if (_e) { _o->new_shape.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->new_shape[_i] = _e->Get(_i); } } };
+  {
+    auto _e = axis();
+    _o->axis = _e;
+  };
 }
 
-inline flatbuffers::Offset<ReshapeOptions> ReshapeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateReshapeOptions(_fbb, _o, _rehasher);
+inline flatbuffers::Offset<GatherOptions> GatherOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateGatherOptions(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<ReshapeOptions> CreateReshapeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<GatherOptions> CreateGatherOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ReshapeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _new_shape = _o->new_shape.size() ? _fbb.CreateVector(_o->new_shape) : 0;
-  return tflite::CreateReshapeOptions(
-      _fbb,
-      _new_shape);
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const GatherOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _axis = _o->axis;
+  return tflite::CreateGatherOptions(_fbb, _axis);
 }
 
-inline SkipGramOptionsT *SkipGramOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new SkipGramOptionsT();
+inline TransposeOptionsT *TransposeOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new TransposeOptionsT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void SkipGramOptions::UnPackTo(SkipGramOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void TransposeOptions::UnPackTo(
+    TransposeOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = ngram_size(); _o->ngram_size = _e; };
-  { auto _e = max_skip_size(); _o->max_skip_size = _e; };
-  { auto _e = include_all_ngrams(); _o->include_all_ngrams = _e; };
+  {
+    auto _e = perm();
+    if (_e) {
+      _o->perm.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->perm[_i] = _e->Get(_i);
+      }
+    }
+  };
 }
 
-inline flatbuffers::Offset<SkipGramOptions> SkipGramOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateSkipGramOptions(_fbb, _o, _rehasher);
+inline flatbuffers::Offset<TransposeOptions> TransposeOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateTransposeOptions(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<SkipGramOptions> CreateSkipGramOptions(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<TransposeOptions> CreateTransposeOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SkipGramOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _ngram_size = _o->ngram_size;
-  auto _max_skip_size = _o->max_skip_size;
-  auto _include_all_ngrams = _o->include_all_ngrams;
-  return tflite::CreateSkipGramOptions(
-      _fbb,
-      _ngram_size,
-      _max_skip_size,
-      _include_all_ngrams);
-}
-
-inline SpaceToDepthOptionsT *SpaceToDepthOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new SpaceToDepthOptionsT();
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const TransposeOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _perm = _o->perm.size() ? _fbb.CreateVector(_o->perm) : 0;
+  return tflite::CreateTransposeOptions(_fbb, _perm);
+}
+
+inline MeanOptionsT *MeanOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new MeanOptionsT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void SpaceToDepthOptions::UnPackTo(SpaceToDepthOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void MeanOptions::UnPackTo(
+    MeanOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = block_size(); _o->block_size = _e; };
+  {
+    auto _e = axis();
+    if (_e) {
+      _o->axis.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->axis[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = keep_dims();
+    _o->keep_dims = _e;
+  };
 }
 
-inline flatbuffers::Offset<SpaceToDepthOptions> SpaceToDepthOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateSpaceToDepthOptions(_fbb, _o, _rehasher);
+inline flatbuffers::Offset<MeanOptions> MeanOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const MeanOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateMeanOptions(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<SpaceToDepthOptions> CreateSpaceToDepthOptions(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<MeanOptions> CreateMeanOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const MeanOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SpaceToDepthOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _block_size = _o->block_size;
-  return tflite::CreateSpaceToDepthOptions(
-      _fbb,
-      _block_size);
-}
-
-inline EmbeddingLookupSparseOptionsT *EmbeddingLookupSparseOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new EmbeddingLookupSparseOptionsT();
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const MeanOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _axis = _o->axis.size() ? _fbb.CreateVector(_o->axis) : 0;
+  auto _keep_dims = _o->keep_dims;
+  return tflite::CreateMeanOptions(_fbb, _axis, _keep_dims);
+}
+
+inline SqueezeOptionsT *SqueezeOptions::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new SqueezeOptionsT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void EmbeddingLookupSparseOptions::UnPackTo(EmbeddingLookupSparseOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void SqueezeOptions::UnPackTo(
+    SqueezeOptionsT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = combiner(); _o->combiner = _e; };
+  {
+    auto _e = squeeze_dims();
+    if (_e) {
+      _o->squeeze_dims.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->squeeze_dims[_i] = _e->Get(_i);
+      }
+    }
+  };
 }
 
-inline flatbuffers::Offset<EmbeddingLookupSparseOptions> EmbeddingLookupSparseOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const EmbeddingLookupSparseOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateEmbeddingLookupSparseOptions(_fbb, _o, _rehasher);
+inline flatbuffers::Offset<SqueezeOptions> SqueezeOptions::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSqueezeOptions(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<EmbeddingLookupSparseOptions> CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb, const EmbeddingLookupSparseOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<SqueezeOptions> CreateSqueezeOptions(
+    flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const EmbeddingLookupSparseOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _combiner = _o->combiner;
-  return tflite::CreateEmbeddingLookupSparseOptions(
-      _fbb,
-      _combiner);
-}
-
-inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SqueezeOptionsT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _squeeze_dims =
+      _o->squeeze_dims.size() ? _fbb.CreateVector(_o->squeeze_dims) : 0;
+  return tflite::CreateSqueezeOptions(_fbb, _squeeze_dims);
+}
+
+inline OperatorCodeT *OperatorCode::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new OperatorCodeT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void OperatorCode::UnPackTo(OperatorCodeT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void OperatorCode::UnPackTo(
+    OperatorCodeT *_o,
+    const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = builtin_code(); _o->builtin_code = _e; };
-  { auto _e = custom_code(); if (_e) _o->custom_code = _e->str(); };
+  {
+    auto _e = builtin_code();
+    _o->builtin_code = _e;
+  };
+  {
+    auto _e = custom_code();
+    if (_e) _o->custom_code = _e->str();
+  };
 }
 
-inline flatbuffers::Offset<OperatorCode> OperatorCode::Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<OperatorCode> OperatorCode::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   return CreateOperatorCode(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<OperatorCode> CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<OperatorCode> CreateOperatorCode(
+    flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const OperatorCodeT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const OperatorCodeT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
   auto _builtin_code = _o->builtin_code;
-  auto _custom_code = _o->custom_code.empty() ? 0 : _fbb.CreateString(_o->custom_code);
-  return tflite::CreateOperatorCode(
-      _fbb,
-      _builtin_code,
-      _custom_code);
+  auto _custom_code =
+      _o->custom_code.empty() ? 0 : _fbb.CreateString(_o->custom_code);
+  return tflite::CreateOperatorCode(_fbb, _builtin_code, _custom_code);
 }
 
-inline OperatorT *Operator::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+inline OperatorT *Operator::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new OperatorT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void Operator::UnPackTo(OperatorT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void Operator::UnPackTo(
+    OperatorT *_o, const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = opcode_index(); _o->opcode_index = _e; };
-  { auto _e = inputs(); if (_e) { _o->inputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->inputs[_i] = _e->Get(_i); } } };
-  { auto _e = outputs(); if (_e) { _o->outputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->outputs[_i] = _e->Get(_i); } } };
-  { auto _e = builtin_options_type(); _o->builtin_options.type = _e; };
-  { auto _e = builtin_options(); if (_e) _o->builtin_options.value = BuiltinOptionsUnion::UnPack(_e, builtin_options_type(), _resolver); };
-  { auto _e = custom_options(); if (_e) { _o->custom_options.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->custom_options[_i] = _e->Get(_i); } } };
-  { auto _e = custom_options_format(); _o->custom_options_format = _e; };
+  {
+    auto _e = opcode_index();
+    _o->opcode_index = _e;
+  };
+  {
+    auto _e = inputs();
+    if (_e) {
+      _o->inputs.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->inputs[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = outputs();
+    if (_e) {
+      _o->outputs.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->outputs[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = builtin_options_type();
+    _o->builtin_options.type = _e;
+  };
+  {
+    auto _e = builtin_options();
+    if (_e)
+      _o->builtin_options.value =
+          BuiltinOptionsUnion::UnPack(_e, builtin_options_type(), _resolver);
+  };
+  {
+    auto _e = custom_options();
+    if (_e) {
+      _o->custom_options.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->custom_options[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = custom_options_format();
+    _o->custom_options_format = _e;
+  };
 }
 
-inline flatbuffers::Offset<Operator> Operator::Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<Operator> Operator::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   return CreateOperator(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<Operator> CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<Operator> CreateOperator(
+    flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const OperatorT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const OperatorT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
   auto _opcode_index = _o->opcode_index;
   auto _inputs = _o->inputs.size() ? _fbb.CreateVector(_o->inputs) : 0;
   auto _outputs = _o->outputs.size() ? _fbb.CreateVector(_o->outputs) : 0;
   auto _builtin_options_type = _o->builtin_options.type;
   auto _builtin_options = _o->builtin_options.Pack(_fbb);
-  auto _custom_options = _o->custom_options.size() ? _fbb.CreateVector(_o->custom_options) : 0;
+  auto _custom_options =
+      _o->custom_options.size() ? _fbb.CreateVector(_o->custom_options) : 0;
   auto _custom_options_format = _o->custom_options_format;
-  return tflite::CreateOperator(
-      _fbb,
-      _opcode_index,
-      _inputs,
-      _outputs,
-      _builtin_options_type,
-      _builtin_options,
-      _custom_options,
-      _custom_options_format);
+  return tflite::CreateOperator(_fbb, _opcode_index, _inputs, _outputs,
+                                _builtin_options_type, _builtin_options,
+                                _custom_options, _custom_options_format);
 }
 
-inline SubGraphT *SubGraph::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+inline SubGraphT *SubGraph::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new SubGraphT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void SubGraph::UnPackTo(SubGraphT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void SubGraph::UnPackTo(
+    SubGraphT *_o, const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = tensors(); if (_e) { _o->tensors.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->tensors[_i] = std::unique_ptr<TensorT>(_e->Get(_i)->UnPack(_resolver)); } } };
-  { auto _e = inputs(); if (_e) { _o->inputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->inputs[_i] = _e->Get(_i); } } };
-  { auto _e = outputs(); if (_e) { _o->outputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->outputs[_i] = _e->Get(_i); } } };
-  { auto _e = operators(); if (_e) { _o->operators.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->operators[_i] = std::unique_ptr<OperatorT>(_e->Get(_i)->UnPack(_resolver)); } } };
-  { auto _e = name(); if (_e) _o->name = _e->str(); };
+  {
+    auto _e = tensors();
+    if (_e) {
+      _o->tensors.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->tensors[_i] =
+            std::unique_ptr<TensorT>(_e->Get(_i)->UnPack(_resolver));
+      }
+    }
+  };
+  {
+    auto _e = inputs();
+    if (_e) {
+      _o->inputs.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->inputs[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = outputs();
+    if (_e) {
+      _o->outputs.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->outputs[_i] = _e->Get(_i);
+      }
+    }
+  };
+  {
+    auto _e = operators();
+    if (_e) {
+      _o->operators.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->operators[_i] =
+            std::unique_ptr<OperatorT>(_e->Get(_i)->UnPack(_resolver));
+      }
+    }
+  };
+  {
+    auto _e = name();
+    if (_e) _o->name = _e->str();
+  };
 }
 
-inline flatbuffers::Offset<SubGraph> SubGraph::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<SubGraph> SubGraph::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   return CreateSubGraph(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<SubGraph> CreateSubGraph(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<SubGraph> CreateSubGraph(
+    flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SubGraphT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _tensors = _o->tensors.size() ? _fbb.CreateVector<flatbuffers::Offset<Tensor>> (_o->tensors.size(), [](size_t i, _VectorArgs *__va) { return CreateTensor(*__va->__fbb, __va->__o->tensors[i].get(), __va->__rehasher); }, &_va ) : 0;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const SubGraphT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
+  auto _tensors =
+      _o->tensors.size()
+          ? _fbb.CreateVector<flatbuffers::Offset<Tensor>>(
+                _o->tensors.size(),
+                [](size_t i, _VectorArgs *__va) {
+                  return CreateTensor(*__va->__fbb, __va->__o->tensors[i].get(),
+                                      __va->__rehasher);
+                },
+                &_va)
+          : 0;
   auto _inputs = _o->inputs.size() ? _fbb.CreateVector(_o->inputs) : 0;
   auto _outputs = _o->outputs.size() ? _fbb.CreateVector(_o->outputs) : 0;
-  auto _operators = _o->operators.size() ? _fbb.CreateVector<flatbuffers::Offset<Operator>> (_o->operators.size(), [](size_t i, _VectorArgs *__va) { return CreateOperator(*__va->__fbb, __va->__o->operators[i].get(), __va->__rehasher); }, &_va ) : 0;
+  auto _operators = _o->operators.size()
+                        ? _fbb.CreateVector<flatbuffers::Offset<Operator>>(
+                              _o->operators.size(),
+                              [](size_t i, _VectorArgs *__va) {
+                                return CreateOperator(
+                                    *__va->__fbb, __va->__o->operators[i].get(),
+                                    __va->__rehasher);
+                              },
+                              &_va)
+                        : 0;
   auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name);
-  return tflite::CreateSubGraph(
-      _fbb,
-      _tensors,
-      _inputs,
-      _outputs,
-      _operators,
-      _name);
+  return tflite::CreateSubGraph(_fbb, _tensors, _inputs, _outputs, _operators,
+                                _name);
 }
 
-inline BufferT *Buffer::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+inline BufferT *Buffer::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new BufferT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void Buffer::UnPackTo(BufferT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void Buffer::UnPackTo(
+    BufferT *_o, const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = data(); if (_e) { _o->data.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->data[_i] = _e->Get(_i); } } };
+  {
+    auto _e = data();
+    if (_e) {
+      _o->data.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->data[_i] = _e->Get(_i);
+      }
+    }
+  };
 }
 
-inline flatbuffers::Offset<Buffer> Buffer::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BufferT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<Buffer> Buffer::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   return CreateBuffer(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<Buffer> CreateBuffer(flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<Buffer> CreateBuffer(
+    flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BufferT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const BufferT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
   auto _data = _o->data.size() ? _fbb.CreateVector(_o->data) : 0;
-  return tflite::CreateBuffer(
-      _fbb,
-      _data);
+  return tflite::CreateBuffer(_fbb, _data);
 }
 
-inline ModelT *Model::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+inline ModelT *Model::UnPack(
+    const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new ModelT();
   UnPackTo(_o, _resolver);
   return _o;
 }
 
-inline void Model::UnPackTo(ModelT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+inline void Model::UnPackTo(
+    ModelT *_o, const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = version(); _o->version = _e; };
-  { auto _e = operator_codes(); if (_e) { _o->operator_codes.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->operator_codes[_i] = std::unique_ptr<OperatorCodeT>(_e->Get(_i)->UnPack(_resolver)); } } };
-  { auto _e = subgraphs(); if (_e) { _o->subgraphs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->subgraphs[_i] = std::unique_ptr<SubGraphT>(_e->Get(_i)->UnPack(_resolver)); } } };
-  { auto _e = description(); if (_e) _o->description = _e->str(); };
-  { auto _e = buffers(); if (_e) { _o->buffers.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->buffers[_i] = std::unique_ptr<BufferT>(_e->Get(_i)->UnPack(_resolver)); } } };
+  {
+    auto _e = version();
+    _o->version = _e;
+  };
+  {
+    auto _e = operator_codes();
+    if (_e) {
+      _o->operator_codes.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->operator_codes[_i] =
+            std::unique_ptr<OperatorCodeT>(_e->Get(_i)->UnPack(_resolver));
+      }
+    }
+  };
+  {
+    auto _e = subgraphs();
+    if (_e) {
+      _o->subgraphs.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->subgraphs[_i] =
+            std::unique_ptr<SubGraphT>(_e->Get(_i)->UnPack(_resolver));
+      }
+    }
+  };
+  {
+    auto _e = description();
+    if (_e) _o->description = _e->str();
+  };
+  {
+    auto _e = buffers();
+    if (_e) {
+      _o->buffers.resize(_e->size());
+      for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) {
+        _o->buffers[_i] =
+            std::unique_ptr<BufferT>(_e->Get(_i)->UnPack(_resolver));
+      }
+    }
+  };
 }
 
-inline flatbuffers::Offset<Model> Model::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ModelT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<Model> Model::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   return CreateModel(_fbb, _o, _rehasher);
 }
 
-inline flatbuffers::Offset<Model> CreateModel(flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+inline flatbuffers::Offset<Model> CreateModel(
+    flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o,
+    const flatbuffers::rehasher_function_t *_rehasher) {
   (void)_rehasher;
   (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ModelT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  struct _VectorArgs {
+    flatbuffers::FlatBufferBuilder *__fbb;
+    const ModelT *__o;
+    const flatbuffers::rehasher_function_t *__rehasher;
+  } _va = {&_fbb, _o, _rehasher};
+  (void)_va;
   auto _version = _o->version;
-  auto _operator_codes = _o->operator_codes.size() ? _fbb.CreateVector<flatbuffers::Offset<OperatorCode>> (_o->operator_codes.size(), [](size_t i, _VectorArgs *__va) { return CreateOperatorCode(*__va->__fbb, __va->__o->operator_codes[i].get(), __va->__rehasher); }, &_va ) : 0;
-  auto _subgraphs = _o->subgraphs.size() ? _fbb.CreateVector<flatbuffers::Offset<SubGraph>> (_o->subgraphs.size(), [](size_t i, _VectorArgs *__va) { return CreateSubGraph(*__va->__fbb, __va->__o->subgraphs[i].get(), __va->__rehasher); }, &_va ) : 0;
-  auto _description = _o->description.empty() ? 0 : _fbb.CreateString(_o->description);
-  auto _buffers = _o->buffers.size() ? _fbb.CreateVector<flatbuffers::Offset<Buffer>> (_o->buffers.size(), [](size_t i, _VectorArgs *__va) { return CreateBuffer(*__va->__fbb, __va->__o->buffers[i].get(), __va->__rehasher); }, &_va ) : 0;
-  return tflite::CreateModel(
-      _fbb,
-      _version,
-      _operator_codes,
-      _subgraphs,
-      _description,
-      _buffers);
-}
-
-inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type) {
+  auto _operator_codes =
+      _o->operator_codes.size()
+          ? _fbb.CreateVector<flatbuffers::Offset<OperatorCode>>(
+                _o->operator_codes.size(),
+                [](size_t i, _VectorArgs *__va) {
+                  return CreateOperatorCode(*__va->__fbb,
+                                            __va->__o->operator_codes[i].get(),
+                                            __va->__rehasher);
+                },
+                &_va)
+          : 0;
+  auto _subgraphs = _o->subgraphs.size()
+                        ? _fbb.CreateVector<flatbuffers::Offset<SubGraph>>(
+                              _o->subgraphs.size(),
+                              [](size_t i, _VectorArgs *__va) {
+                                return CreateSubGraph(
+                                    *__va->__fbb, __va->__o->subgraphs[i].get(),
+                                    __va->__rehasher);
+                              },
+                              &_va)
+                        : 0;
+  auto _description =
+      _o->description.empty() ? 0 : _fbb.CreateString(_o->description);
+  auto _buffers =
+      _o->buffers.size()
+          ? _fbb.CreateVector<flatbuffers::Offset<Buffer>>(
+                _o->buffers.size(),
+                [](size_t i, _VectorArgs *__va) {
+                  return CreateBuffer(*__va->__fbb, __va->__o->buffers[i].get(),
+                                      __va->__rehasher);
+                },
+                &_va)
+          : 0;
+  return tflite::CreateModel(_fbb, _version, _operator_codes, _subgraphs,
+                             _description, _buffers);
+}
+
+inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier,
+                                 const void *obj, BuiltinOptions type) {
   switch (type) {
     case BuiltinOptions_NONE: {
       return true;
@@ -4048,7 +6473,8 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_LocalResponseNormalizationOptions: {
-      auto ptr = reinterpret_cast<const LocalResponseNormalizationOptions *>(obj);
+      auto ptr =
+          reinterpret_cast<const LocalResponseNormalizationOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
     case BuiltinOptions_LSTMOptions: {
@@ -4083,22 +6509,68 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob
       auto ptr = reinterpret_cast<const MulOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
-    default: return false;
+    case BuiltinOptions_PadOptions: {
+      auto ptr = reinterpret_cast<const PadOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_GatherOptions: {
+      auto ptr = reinterpret_cast<const GatherOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_BatchToSpaceNDOptions: {
+      auto ptr = reinterpret_cast<const BatchToSpaceNDOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SpaceToBatchNDOptions: {
+      auto ptr = reinterpret_cast<const SpaceToBatchNDOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_TransposeOptions: {
+      auto ptr = reinterpret_cast<const TransposeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_MeanOptions: {
+      auto ptr = reinterpret_cast<const MeanOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SubOptions: {
+      auto ptr = reinterpret_cast<const SubOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_DivOptions: {
+      auto ptr = reinterpret_cast<const DivOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SqueezeOptions: {
+      auto ptr = reinterpret_cast<const SqueezeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SequenceRNNOptions: {
+      auto ptr = reinterpret_cast<const SequenceRNNOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    default:
+      return false;
   }
 }
 
-inline bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types) {
+inline bool VerifyBuiltinOptionsVector(
+    flatbuffers::Verifier &verifier,
+    const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
+    const flatbuffers::Vector<uint8_t> *types) {
   if (values->size() != types->size()) return false;
   for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) {
-    if (!VerifyBuiltinOptions(
-        verifier,  values->Get(i), types->GetEnum<BuiltinOptions>(i))) {
+    if (!VerifyBuiltinOptions(verifier, values->Get(i),
+                              types->GetEnum<BuiltinOptions>(i))) {
       return false;
     }
   }
   return true;
 }
 
-inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, const flatbuffers::resolver_function_t *resolver) {
+inline void *BuiltinOptionsUnion::UnPack(
+    const void *obj, BuiltinOptions type,
+    const flatbuffers::resolver_function_t *resolver) {
   switch (type) {
     case BuiltinOptions_Conv2DOptions: {
       auto ptr = reinterpret_cast<const Conv2DOptions *>(obj);
@@ -4149,7 +6621,8 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c
       return ptr->UnPack(resolver);
     }
     case BuiltinOptions_LocalResponseNormalizationOptions: {
-      auto ptr = reinterpret_cast<const LocalResponseNormalizationOptions *>(obj);
+      auto ptr =
+          reinterpret_cast<const LocalResponseNormalizationOptions *>(obj);
       return ptr->UnPack(resolver);
     }
     case BuiltinOptions_LSTMOptions: {
@@ -4184,11 +6657,54 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c
       auto ptr = reinterpret_cast<const MulOptions *>(obj);
       return ptr->UnPack(resolver);
     }
-    default: return nullptr;
+    case BuiltinOptions_PadOptions: {
+      auto ptr = reinterpret_cast<const PadOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_GatherOptions: {
+      auto ptr = reinterpret_cast<const GatherOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_BatchToSpaceNDOptions: {
+      auto ptr = reinterpret_cast<const BatchToSpaceNDOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SpaceToBatchNDOptions: {
+      auto ptr = reinterpret_cast<const SpaceToBatchNDOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_TransposeOptions: {
+      auto ptr = reinterpret_cast<const TransposeOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_MeanOptions: {
+      auto ptr = reinterpret_cast<const MeanOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SubOptions: {
+      auto ptr = reinterpret_cast<const SubOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_DivOptions: {
+      auto ptr = reinterpret_cast<const DivOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SqueezeOptions: {
+      auto ptr = reinterpret_cast<const SqueezeOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SequenceRNNOptions: {
+      auto ptr = reinterpret_cast<const SequenceRNNOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    default:
+      return nullptr;
   }
 }
 
-inline flatbuffers::Offset<void> BuiltinOptionsUnion::Pack(flatbuffers::FlatBufferBuilder &_fbb, const flatbuffers::rehasher_function_t *_rehasher) const {
+inline flatbuffers::Offset<void> BuiltinOptionsUnion::Pack(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const flatbuffers::rehasher_function_t *_rehasher) const {
   switch (type) {
     case BuiltinOptions_Conv2DOptions: {
       auto ptr = reinterpret_cast<const Conv2DOptionsT *>(value);
@@ -4239,8 +6755,10 @@ inline flatbuffers::Offset<void> BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff
       return CreateL2NormOptions(_fbb, ptr, _rehasher).Union();
     }
     case BuiltinOptions_LocalResponseNormalizationOptions: {
-      auto ptr = reinterpret_cast<const LocalResponseNormalizationOptionsT *>(value);
-      return CreateLocalResponseNormalizationOptions(_fbb, ptr, _rehasher).Union();
+      auto ptr =
+          reinterpret_cast<const LocalResponseNormalizationOptionsT *>(value);
+      return CreateLocalResponseNormalizationOptions(_fbb, ptr, _rehasher)
+          .Union();
     }
     case BuiltinOptions_LSTMOptions: {
       auto ptr = reinterpret_cast<const LSTMOptionsT *>(value);
@@ -4274,26 +6792,72 @@ inline flatbuffers::Offset<void> BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff
       auto ptr = reinterpret_cast<const MulOptionsT *>(value);
       return CreateMulOptions(_fbb, ptr, _rehasher).Union();
     }
-    default: return 0;
+    case BuiltinOptions_PadOptions: {
+      auto ptr = reinterpret_cast<const PadOptionsT *>(value);
+      return CreatePadOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_GatherOptions: {
+      auto ptr = reinterpret_cast<const GatherOptionsT *>(value);
+      return CreateGatherOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_BatchToSpaceNDOptions: {
+      auto ptr = reinterpret_cast<const BatchToSpaceNDOptionsT *>(value);
+      return CreateBatchToSpaceNDOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SpaceToBatchNDOptions: {
+      auto ptr = reinterpret_cast<const SpaceToBatchNDOptionsT *>(value);
+      return CreateSpaceToBatchNDOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_TransposeOptions: {
+      auto ptr = reinterpret_cast<const TransposeOptionsT *>(value);
+      return CreateTransposeOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_MeanOptions: {
+      auto ptr = reinterpret_cast<const MeanOptionsT *>(value);
+      return CreateMeanOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SubOptions: {
+      auto ptr = reinterpret_cast<const SubOptionsT *>(value);
+      return CreateSubOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_DivOptions: {
+      auto ptr = reinterpret_cast<const DivOptionsT *>(value);
+      return CreateDivOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SqueezeOptions: {
+      auto ptr = reinterpret_cast<const SqueezeOptionsT *>(value);
+      return CreateSqueezeOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SequenceRNNOptions: {
+      auto ptr = reinterpret_cast<const SequenceRNNOptionsT *>(value);
+      return CreateSequenceRNNOptions(_fbb, ptr, _rehasher).Union();
+    }
+    default:
+      return 0;
   }
 }
 
-inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FLATBUFFERS_NOEXCEPT : type(u.type), value(nullptr) {
+inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u)
+    FLATBUFFERS_NOEXCEPT : type(u.type),
+                           value(nullptr) {
   switch (type) {
     case BuiltinOptions_Conv2DOptions: {
       value = new Conv2DOptionsT(*reinterpret_cast<Conv2DOptionsT *>(u.value));
       break;
     }
     case BuiltinOptions_DepthwiseConv2DOptions: {
-      value = new DepthwiseConv2DOptionsT(*reinterpret_cast<DepthwiseConv2DOptionsT *>(u.value));
+      value = new DepthwiseConv2DOptionsT(
+          *reinterpret_cast<DepthwiseConv2DOptionsT *>(u.value));
       break;
     }
     case BuiltinOptions_ConcatEmbeddingsOptions: {
-      value = new ConcatEmbeddingsOptionsT(*reinterpret_cast<ConcatEmbeddingsOptionsT *>(u.value));
+      value = new ConcatEmbeddingsOptionsT(
+          *reinterpret_cast<ConcatEmbeddingsOptionsT *>(u.value));
       break;
     }
     case BuiltinOptions_LSHProjectionOptions: {
-      value = new LSHProjectionOptionsT(*reinterpret_cast<LSHProjectionOptionsT *>(u.value));
+      value = new LSHProjectionOptionsT(
+          *reinterpret_cast<LSHProjectionOptionsT *>(u.value));
       break;
     }
     case BuiltinOptions_Pool2DOptions: {
@@ -4309,15 +6873,18 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL
       break;
     }
     case BuiltinOptions_FullyConnectedOptions: {
-      value = new FullyConnectedOptionsT(*reinterpret_cast<FullyConnectedOptionsT *>(u.value));
+      value = new FullyConnectedOptionsT(
+          *reinterpret_cast<FullyConnectedOptionsT *>(u.value));
       break;
     }
     case BuiltinOptions_SoftmaxOptions: {
-      value = new SoftmaxOptionsT(*reinterpret_cast<SoftmaxOptionsT *>(u.value));
+      value =
+          new SoftmaxOptionsT(*reinterpret_cast<SoftmaxOptionsT *>(u.value));
       break;
     }
     case BuiltinOptions_ConcatenationOptions: {
-      value = new ConcatenationOptionsT(*reinterpret_cast<ConcatenationOptionsT *>(u.value));
+      value = new ConcatenationOptionsT(
+          *reinterpret_cast<ConcatenationOptionsT *>(u.value));
       break;
     }
     case BuiltinOptions_AddOptions: {
@@ -4329,7 +6896,8 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL
       break;
     }
     case BuiltinOptions_LocalResponseNormalizationOptions: {
-      value = new LocalResponseNormalizationOptionsT(*reinterpret_cast<LocalResponseNormalizationOptionsT *>(u.value));
+      value = new LocalResponseNormalizationOptionsT(
+          *reinterpret_cast<LocalResponseNormalizationOptionsT *>(u.value));
       break;
     }
     case BuiltinOptions_LSTMOptions: {
@@ -4337,7 +6905,8 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL
       break;
     }
     case BuiltinOptions_ResizeBilinearOptions: {
-      value = new ResizeBilinearOptionsT(*reinterpret_cast<ResizeBilinearOptionsT *>(u.value));
+      value = new ResizeBilinearOptionsT(
+          *reinterpret_cast<ResizeBilinearOptionsT *>(u.value));
       break;
     }
     case BuiltinOptions_CallOptions: {
@@ -4345,25 +6914,74 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL
       break;
     }
     case BuiltinOptions_ReshapeOptions: {
-      value = new ReshapeOptionsT(*reinterpret_cast<ReshapeOptionsT *>(u.value));
+      value =
+          new ReshapeOptionsT(*reinterpret_cast<ReshapeOptionsT *>(u.value));
       break;
     }
     case BuiltinOptions_SkipGramOptions: {
-      value = new SkipGramOptionsT(*reinterpret_cast<SkipGramOptionsT *>(u.value));
+      value =
+          new SkipGramOptionsT(*reinterpret_cast<SkipGramOptionsT *>(u.value));
       break;
     }
     case BuiltinOptions_SpaceToDepthOptions: {
-      value = new SpaceToDepthOptionsT(*reinterpret_cast<SpaceToDepthOptionsT *>(u.value));
+      value = new SpaceToDepthOptionsT(
+          *reinterpret_cast<SpaceToDepthOptionsT *>(u.value));
       break;
     }
     case BuiltinOptions_EmbeddingLookupSparseOptions: {
-      value = new EmbeddingLookupSparseOptionsT(*reinterpret_cast<EmbeddingLookupSparseOptionsT *>(u.value));
+      value = new EmbeddingLookupSparseOptionsT(
+          *reinterpret_cast<EmbeddingLookupSparseOptionsT *>(u.value));
       break;
     }
     case BuiltinOptions_MulOptions: {
       value = new MulOptionsT(*reinterpret_cast<MulOptionsT *>(u.value));
       break;
     }
+    case BuiltinOptions_PadOptions: {
+      value = new PadOptionsT(*reinterpret_cast<PadOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_GatherOptions: {
+      value = new GatherOptionsT(*reinterpret_cast<GatherOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_BatchToSpaceNDOptions: {
+      value = new BatchToSpaceNDOptionsT(
+          *reinterpret_cast<BatchToSpaceNDOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SpaceToBatchNDOptions: {
+      value = new SpaceToBatchNDOptionsT(
+          *reinterpret_cast<SpaceToBatchNDOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_TransposeOptions: {
+      value = new TransposeOptionsT(
+          *reinterpret_cast<TransposeOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_MeanOptions: {
+      value = new MeanOptionsT(*reinterpret_cast<MeanOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SubOptions: {
+      value = new SubOptionsT(*reinterpret_cast<SubOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_DivOptions: {
+      value = new DivOptionsT(*reinterpret_cast<DivOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SqueezeOptions: {
+      value =
+          new SqueezeOptionsT(*reinterpret_cast<SqueezeOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SequenceRNNOptions: {
+      value = new SequenceRNNOptionsT(
+          *reinterpret_cast<SequenceRNNOptionsT *>(u.value));
+      break;
+    }
     default:
       break;
   }
@@ -4476,7 +7094,58 @@ inline void BuiltinOptionsUnion::Reset() {
       delete ptr;
       break;
     }
-    default: break;
+    case BuiltinOptions_PadOptions: {
+      auto ptr = reinterpret_cast<PadOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_GatherOptions: {
+      auto ptr = reinterpret_cast<GatherOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_BatchToSpaceNDOptions: {
+      auto ptr = reinterpret_cast<BatchToSpaceNDOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SpaceToBatchNDOptions: {
+      auto ptr = reinterpret_cast<SpaceToBatchNDOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_TransposeOptions: {
+      auto ptr = reinterpret_cast<TransposeOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_MeanOptions: {
+      auto ptr = reinterpret_cast<MeanOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SubOptions: {
+      auto ptr = reinterpret_cast<SubOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_DivOptions: {
+      auto ptr = reinterpret_cast<DivOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SqueezeOptions: {
+      auto ptr = reinterpret_cast<SqueezeOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SequenceRNNOptions: {
+      auto ptr = reinterpret_cast<SequenceRNNOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    default:
+      break;
   }
   value = nullptr;
   type = BuiltinOptions_NONE;
@@ -4486,33 +7155,25 @@ inline const tflite::Model *GetModel(const void *buf) {
   return flatbuffers::GetRoot<tflite::Model>(buf);
 }
 
-inline const char *ModelIdentifier() {
-  return "TFL3";
-}
+inline const char *ModelIdentifier() { return "TFL3"; }
 
 inline bool ModelBufferHasIdentifier(const void *buf) {
-  return flatbuffers::BufferHasIdentifier(
-      buf, ModelIdentifier());
+  return flatbuffers::BufferHasIdentifier(buf, ModelIdentifier());
 }
 
-inline bool VerifyModelBuffer(
-    flatbuffers::Verifier &verifier) {
+inline bool VerifyModelBuffer(flatbuffers::Verifier &verifier) {
   return verifier.VerifyBuffer<tflite::Model>(ModelIdentifier());
 }
 
-inline const char *ModelExtension() {
-  return "tflite";
-}
+inline const char *ModelExtension() { return "tflite"; }
 
-inline void FinishModelBuffer(
-    flatbuffers::FlatBufferBuilder &fbb,
-    flatbuffers::Offset<tflite::Model> root) {
+inline void FinishModelBuffer(flatbuffers::FlatBufferBuilder &fbb,
+                              flatbuffers::Offset<tflite::Model> root) {
   fbb.Finish(root, ModelIdentifier());
 }
 
 inline std::unique_ptr<ModelT> UnPackModel(
-    const void *buf,
-    const flatbuffers::resolver_function_t *res = nullptr) {
+    const void *buf, const flatbuffers::resolver_function_t *res = nullptr) {
   return std::unique_ptr<ModelT>(GetModel(buf)->UnPack(res));
 }
 
diff --git a/tensorflow/contrib/lite/simple_memory_arena.h b/tensorflow/contrib/lite/simple_memory_arena.h
index 0d0b7f9ff79bf9fd8a60dbc057d63f44eeaa6396..07a38c42436655d307c89a987ebba4db38eba442 100644
--- a/tensorflow/contrib/lite/simple_memory_arena.h
+++ b/tensorflow/contrib/lite/simple_memory_arena.h
@@ -68,6 +68,10 @@ class SimpleMemoryArena {
 
   TfLiteStatus Clear();
 
+  int64_t BasePointer() const {
+    return reinterpret_cast<int64_t>(underlying_buffer_aligned_ptr_);
+  }
+
  private:
   bool commited_;
   size_t arena_alignment_;
diff --git a/tensorflow/contrib/lite/simple_memory_arena_test.cc b/tensorflow/contrib/lite/simple_memory_arena_test.cc
index ac676092c6d5d8982b65cd35c2b9770d10ea37b2..4444f642eb75c563c57762d095e454ac63d836c6 100644
--- a/tensorflow/contrib/lite/simple_memory_arena_test.cc
+++ b/tensorflow/contrib/lite/simple_memory_arena_test.cc
@@ -16,6 +16,7 @@ limitations under the License.
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/testing/util.h"
 
 namespace tflite {
 namespace {
@@ -85,7 +86,7 @@ TEST(SimpleMemoryArenaTest, TestAfterClear) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/string_util_test.cc b/tensorflow/contrib/lite/string_util_test.cc
index 5c351638dc2fad0e64fda6d3a9cb14dfc45375af..d53fec7512f902fb277524100640f4a6a2aaf130 100644
--- a/tensorflow/contrib/lite/string_util_test.cc
+++ b/tensorflow/contrib/lite/string_util_test.cc
@@ -17,6 +17,7 @@ limitations under the License.
 #include <gtest/gtest.h>
 #include "tensorflow/contrib/lite/context.h"
 #include "tensorflow/contrib/lite/interpreter.h"
+#include "tensorflow/contrib/lite/testing/util.h"
 
 namespace tflite {
 
@@ -111,7 +112,7 @@ TEST(StringUtil, TestEmptyList) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: tflite::LogToStderr();
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD
index ecddb4b807bf1dddec10adfcbab6db6cca85247a..933da11353a04d4b1538c9b8d777365a875e62fc 100644
--- a/tensorflow/contrib/lite/testing/BUILD
+++ b/tensorflow/contrib/lite/testing/BUILD
@@ -18,19 +18,24 @@ gen_zipped_test_files(
     files = [
         "add.zip",
         "avg_pool.zip",
+        "batch_to_space_nd.zip",
         "concat.zip",
         "constant.zip",
         "control_dep.zip",
         "conv.zip",
         "depthwiseconv.zip",
+        "div.zip",
         "fully_connected.zip",
         "fused_batch_norm.zip",
+        "gather.zip",
         "global_batch_norm.zip",
         "l2_pool.zip",
         "l2norm.zip",
         "local_response_norm.zip",
         "max_pool.zip",
+        "mean.zip",
         "mul.zip",
+        "pad.zip",
         "relu.zip",
         "relu1.zip",
         "relu6.zip",
@@ -38,7 +43,11 @@ gen_zipped_test_files(
         "resize_bilinear.zip",
         "sigmoid.zip",
         "softmax.zip",
+        "space_to_batch_nd.zip",
         "space_to_depth.zip",
+        "squeeze.zip",
+        "sub.zip",
+        "transpose.zip",
     ],
 )
 
@@ -160,6 +169,12 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "util",
+    testonly = 1,
+    hdrs = ["util.h"],
+)
+
 cc_test(
     name = "test_runner_test",
     srcs = ["test_runner_test.cc"],
@@ -174,9 +189,7 @@ cc_binary(
     srcs = ["nnapi_example.cc"],
     deps = [
         ":parse_testdata_lib",
-        "//tensorflow/contrib/lite:builtin_op_data",
-        "//tensorflow/contrib/lite:framework",
-        "//tensorflow/contrib/lite/kernels:builtin_ops",
+        ":tflite_driver",
         "//tensorflow/contrib/lite/nnapi:nnapi_lib",
     ],
 )
@@ -185,20 +198,35 @@ tf_cc_test(
     name = "generated_examples_zip_test",
     size = "medium",
     srcs = ["generated_examples_zip_test.cc"],
+    args = [
+        "--zip_files_dir=tensorflow/contrib/lite/testing/optest",
+        # TODO(angerson) We may be able to add an external unzip binary instead
+        # of relying on an existing one for OSS builds.
+        "--unzip_binary_path=/usr/bin/unzip",
+    ],
     data = [":optest"],
     shard_count = 10,
     tags = ["no_oss"],
     deps = [
         ":parse_testdata_lib",
+        ":tflite_driver",
+        ":util",
+        "@com_google_googletest//:gtest",
+        "@com_googlesource_code_re2//:re2",
         "//tensorflow/contrib/lite:builtin_op_data",
         "//tensorflow/contrib/lite:framework",
         "//tensorflow/contrib/lite/kernels:builtin_ops",
-        "//tensorflow/core:framework_internal",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:test",
-        "@com_google_googletest//:gtest",
-        "@com_googlesource_code_re2//:re2",
-    ],
+    ] + select({
+        "//conditions:default": [
+            "//tensorflow/core:framework_internal",
+            "//tensorflow/core:lib",
+            "//tensorflow/core:test",
+        ],
+        "//tensorflow:android": [
+            "//tensorflow/core:android_tensorflow_lib",
+            "//tensorflow/core:android_tensorflow_test_lib",
+        ],
+    }),
 )
 
 filegroup(
diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py
index 5bca82ded038ded702effd46c0f4247e45a36524..6c3d31fc9a278e14864c3de12be9e8d0f835c522 100644
--- a/tensorflow/contrib/lite/testing/generate_examples.py
+++ b/tensorflow/contrib/lite/testing/generate_examples.py
@@ -94,6 +94,12 @@ KNOWN_BUGS = {
     r"softmax.*input_shape=\[1,3,4,3\]": "67749831",
     # SpaceToDepth only supports float32.
     r"space_to_depth.*(float16|int32|uint8|int64)": "68018134",
+    # BatchToSpaceND doesn't support cropping.
+    r"batch_to_space_nd.*crops=\[\[1,1\],\[1,1\]\]": "70594634",
+    # BatchToSpaceND only supports 4D tensors.
+    r"batch_to_space_nd.*input_shape=\[8,2,2,2,1,1\]": "70594733",
+    # Div will use floordiv
+    r"div.*int32": "72051395"
 }
 
 
@@ -120,7 +126,7 @@ def toco_options(data_types,
   # to change
   if data_types[0] == "QUANTIZED_UINT8":
     inference_type = "QUANTIZED_UINT8"
-  s = (" --input_types=%s" % ",".join(data_types) +
+  s = (" --input_data_types=%s" % ",".join(data_types) +
        " --inference_type=%s" % inference_type +
        " --input_format=TENSORFLOW_GRAPHDEF" + " --output_format=TFLITE" +
        " --input_arrays=%s" % ",".join(input_arrays) +
@@ -626,7 +632,7 @@ def make_constant_tests(zip_path):
   make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
 
 
-def make_add_tests(zip_path):
+def make_binary_op_tests(zip_path, binary_operator):
   """Make a set of tests to do add with and without broadcast."""
 
   # These parameters are split because we don't support broadcasting.
@@ -634,25 +640,36 @@ def make_add_tests(zip_path):
       "dtype": [tf.float32, tf.int32],
       "input_shape_1": [[1, 3, 4, 3]],
       "input_shape_2": [[1, 3, 4, 3]],
+      "activation": [True]
   }, {
       "dtype": [tf.float32],
       "input_shape_1": [[5]],
       "input_shape_2": [[5]],
+      "activation": [False, True]
   }, {
       "dtype": [tf.float32],
       "input_shape_1": [[1, 3, 4, 3]],
       "input_shape_2": [[3]],
+      "activation": [True]
   }]
 
   def build_graph(parameters):
-    input1 = tf.placeholder(dtype=parameters["dtype"], name="input1",
-                            shape=parameters["input_shape_1"])
-    input2 = tf.placeholder(dtype=parameters["dtype"], name="input2",
-                            shape=parameters["input_shape_2"])
-    out = tf.add(input1, input2)
+    """Builds the graph given the current parameters."""
+    input1 = tf.placeholder(
+        dtype=parameters["dtype"],
+        name="input1",
+        shape=parameters["input_shape_1"])
+    input2 = tf.placeholder(
+        dtype=parameters["dtype"],
+        name="input2",
+        shape=parameters["input_shape_2"])
+    out = binary_operator(input1, input2)
+    if parameters["activation"]:
+      out = tf.nn.relu(out)
     return [input1, input2], [out]
 
   def build_inputs(parameters, sess, inputs, outputs):
+    """Builds operand inputs for op."""
     input1 = create_tensor_data(parameters["dtype"],
                                 parameters["input_shape_1"])
     input2 = create_tensor_data(parameters["dtype"],
@@ -666,40 +683,92 @@ def make_add_tests(zip_path):
   make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
 
 
-def make_mul_tests(zip_path):
-  """Make a set of tests to do mul with and without broadcast."""
+def make_mean_tests(zip_path):
+  """Make a set of tests to do mean."""
 
-  # These parameters are split because we don't support broadcasting.
   test_parameters = [{
-      "dtype": [tf.float32, tf.int32],
-      "input_shape_1": [[1, 3, 4, 3]],
-      "input_shape_2": [[1, 3, 4, 3]],
+      "input_dtype": [tf.float32, tf.int32, tf.int64],
+      "input_shape": [[3, 2, 4]],
+      "axis": [
+          None, 0, 1, 2, [0, 1], [0, 2], [1, 2], [0, 1, 2], [1, 0], [2, 0],
+          [2, 1], [2, 1, 0], [2, 0, 1], -1, -2, -3, [1, -1], [0, -1], [-1, 0],
+          [-1, -2, -3], [0, 0, 0], [2, 2, 0], [1, 0, -3, -3]
+      ],
+      "keep_dims": [True, False],
   }, {
-      "dtype": [tf.float32],
-      "input_shape_1": [[5]],
-      "input_shape_2": [[5]],
-  }, {
-      "dtype": [tf.float32],
-      "input_shape_1": [[1, 3, 4, 3]],
-      "input_shape_2": [[3]],
+      "input_dtype": [tf.float32, tf.int32, tf.int64],
+      "input_shape": [[1, 224, 224, 3]],
+      "axis": [
+          None, 0, 1, 2, 3, [1, 2], [0, 3], [1, 2, 3], [0, 1, 2, 3],
+          [3, 2, 1, 0], [3, 1, 0, 2], [2, 0], [3, 0], [3, 1], [1, 0], -1, -2,
+          -3, -4, [0, -2], [2, 3, -1, 0], [3, 1, 2, -3], [3, -4], [2, 2, 2],
+          [2, 2, 3], [-3, -3, -4], [-3, 2, 1]
+      ],
+      "keep_dims": [True, False],
   }]
 
   def build_graph(parameters):
-    input1 = tf.placeholder(dtype=parameters["dtype"], name="input1",
-                            shape=parameters["input_shape_1"])
-    input2 = tf.placeholder(dtype=parameters["dtype"], name="input2",
-                            shape=parameters["input_shape_2"])
-    out = tf.multiply(input1, input2)
-    return [input1, input2], [out]
+    """Build the mean op testing graph."""
+    input_tensor = tf.placeholder(
+        dtype=parameters["input_dtype"],
+        name="input",
+        shape=parameters["input_shape"])
+    out = tf.reduce_mean(
+        input_tensor,
+        axis=parameters["axis"],
+        keep_dims=parameters["keep_dims"])
+    return [input_tensor], [out]
 
   def build_inputs(parameters, sess, inputs, outputs):
-    input1 = create_tensor_data(parameters["dtype"],
-                                parameters["input_shape_1"])
-    input2 = create_tensor_data(parameters["dtype"],
-                                parameters["input_shape_2"])
-    return [input1, input2], sess.run(
-        outputs, feed_dict={inputs[0]: input1,
-                            inputs[1]: input2})
+    input_values = create_tensor_data(parameters["input_dtype"],
+                                      parameters["input_shape"])
+    return [input_values], sess.run(
+        outputs, feed_dict=dict(zip(inputs, [input_values])))
+
+  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+
+
+def make_binary_op_tests_func(binary_operator):
+  """Return a function that does a test on a binary operator."""
+  return lambda zip_path: make_binary_op_tests(zip_path, binary_operator)
+
+
+def make_gather_tests(zip_path):
+  """Make a set of tests to do gather."""
+
+  test_parameters = [{
+      # TODO(mgubin): add string tests when they are supported by Toco.
+      # TODO(mgubin): add tests for Nd indices when they are supported by
+      # TfLite.
+      # TODO(mgubin): add tests for axis != 0 when it is supported by TfLite.
+      "params_dtype": [tf.float32, tf.int32],
+      "params_shape": [[10], [1, 2, 20]],
+      "indices_dtype": [tf.int32],
+      "indices_shape": [[3], [5]],
+      "axis": [0],  # axis!=0 is GatherV2
+  }]
+
+  def build_graph(parameters):
+    """Build the gather op testing graph."""
+    params = tf.placeholder(
+        dtype=parameters["params_dtype"],
+        name="params",
+        shape=parameters["params_shape"])
+    indices = tf.placeholder(
+        dtype=parameters["indices_dtype"],
+        name="indices",
+        shape=parameters["indices_shape"])
+    out = tf.gather(params, indices, axis=parameters["axis"])
+    return [params, indices], [out]
+
+  def build_inputs(parameters, sess, inputs, outputs):
+    params = create_tensor_data(parameters["params_dtype"],
+                                parameters["params_shape"])
+    indices = create_tensor_data(parameters["indices_dtype"],
+                                 parameters["indices_shape"], 0,
+                                 parameters["params_shape"][0] - 1)
+    return [params, indices], sess.run(
+        outputs, feed_dict=dict(zip(inputs, [params, indices])))
 
   make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
 
@@ -999,10 +1068,46 @@ def make_local_response_norm_tests(zip_path):
   make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
 
 
+def make_pad_tests(zip_path):
+  """Make a set of tests to do pad."""
+
+  # TODO(nupurgarg): Add test for tf.uint8.
+  test_parameters = [
+      {
+          "dtype": [tf.int32, tf.int64, tf.float32],
+          "input_shape": [[1, 1, 2, 1], [2, 1, 1, 1]],
+          "paddings": [[[0, 0], [0, 1], [2, 3], [0, 0]], [[0, 1], [0, 0],
+                                                          [0, 0], [2, 3]]],
+      },
+      # Non-4D use case.
+      {
+          "dtype": [tf.int32, tf.int64, tf.float32],
+          "input_shape": [[1, 2], [0, 1, 2]],
+          "paddings": [[[0, 1], [2, 3]]],
+      },
+  ]
+
+  def build_graph(parameters):
+    input_tensor = tf.placeholder(
+        dtype=parameters["dtype"],
+        name="input",
+        shape=parameters["input_shape"])
+    out = tf.pad(input_tensor, paddings=parameters["paddings"])
+    return [input_tensor], [out]
+
+  def build_inputs(parameters, sess, inputs, outputs):
+    input_values = create_tensor_data(parameters["dtype"],
+                                      parameters["input_shape"])
+    return [input_values], sess.run(
+        outputs, feed_dict=dict(zip(inputs, [input_values])))
+
+  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+
+
 def make_reshape_tests(zip_path):
   """Make a set of tests to do reshape."""
 
-  # Alll shapes below are suitable for tensors with 420 elements.
+  # All shapes below are suitable for tensors with 420 elements.
   test_parameters = [{
       "dtype": [tf.float32, tf.int32],
       "input_shape": [[3, 4, 5, 7], [4, 105], [21, 5, 2, 2], [420]],
@@ -1125,6 +1230,156 @@ def make_space_to_depth_tests(zip_path):
   make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
 
 
+def make_space_to_batch_nd_tests(zip_path):
+  """Make a set of tests to do space_to_batch_nd."""
+
+  # TODO(nupurgarg): Add test for uint8.
+  test_parameters = [
+      {
+          "dtype": [tf.int32, tf.int64, tf.float32],
+          "input_shape": [[1, 2, 2, 3], [2, 2, 4, 1]],
+          "block_shape": [[1, 3], [2, 2]],
+          "paddings": [[[0, 0], [0, 0]], [[0, 0], [2, 0]], [[1, 1], [1, 1]]],
+      },
+      {
+          "dtype": [tf.float32],
+          "input_shape": [[2, 3, 7, 3]],
+          "block_shape": [[1, 3], [2, 2]],
+          "paddings": [[[0, 0], [2, 0]], [[1, 0], [1, 0]]],
+      },
+      # Non-4D use case: 1 bath dimension, 3 spatial dimensions, 2 others.
+      {
+          "dtype": [tf.float32],
+          "input_shape": [[1, 4, 4, 4, 1, 1]],
+          "block_shape": [[2, 2, 2]],
+          "paddings": [[[0, 0], [0, 0], [0, 0]]],
+      },
+  ]
+
+  def build_graph(parameters):
+    input_tensor = tf.placeholder(
+        dtype=parameters["dtype"],
+        name="input",
+        shape=parameters["input_shape"])
+    out = tf.space_to_batch_nd(input_tensor, parameters["block_shape"],
+                               parameters["paddings"])
+    return [input_tensor], [out]
+
+  def build_inputs(parameters, sess, inputs, outputs):
+    input_values = create_tensor_data(parameters["dtype"],
+                                      parameters["input_shape"])
+    return [input_values], sess.run(
+        outputs, feed_dict=dict(zip(inputs, [input_values])))
+
+  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+
+
+def make_batch_to_space_nd_tests(zip_path):
+  """Make a set of tests to do batch_to_space_nd."""
+
+  test_parameters = [
+      {
+          "dtype": [tf.float32, tf.int64, tf.int32],
+          "input_shape": [[12, 2, 2, 1]],
+          "block_shape": [[1, 4], [2, 2], [3, 4]],
+          "crops": [[[0, 0], [0, 0]], [[1, 1], [1, 1]]],
+      },
+      # Non-4D use case: 1 bath dimension, 3 spatial dimensions, 2 others.
+      {
+          "dtype": [tf.float32],
+          "input_shape": [[8, 2, 2, 2, 1, 1]],
+          "block_shape": [[2, 2, 2]],
+          "crops": [[[0, 0], [0, 0], [0, 0]]],
+      },
+  ]
+
+  def build_graph(parameters):
+    input_tensor = tf.placeholder(
+        dtype=parameters["dtype"],
+        name="input",
+        shape=parameters["input_shape"])
+    out = tf.batch_to_space_nd(input_tensor, parameters["block_shape"],
+                               parameters["crops"])
+    return [input_tensor], [out]
+
+  def build_inputs(parameters, sess, inputs, outputs):
+    input_values = create_tensor_data(parameters["dtype"],
+                                      parameters["input_shape"])
+    return [input_values], sess.run(
+        outputs, feed_dict=dict(zip(inputs, [input_values])))
+
+  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+
+
+def make_transpose_tests(zip_path):
+  """Make a set of tests to do transpose."""
+
+  # TODO(nupurgarg): Add test for uint8.
+  test_parameters = [{
+      "dtype": [tf.int32, tf.int64, tf.float32],
+      "input_shape": [[2, 2, 3]],
+      "perm": [[0, 1, 2], [0, 2, 1]],
+  }, {
+      "dtype": [tf.float32],
+      "input_shape": [[1, 2, 3, 4]],
+      "perm": [[0, 1, 2, 3], [3, 0, 1, 2]],
+  }, {
+      "dtype": [tf.float32],
+      "input_shape": [[1, 2, 3, 4, 5]],
+      "perm": [[0, 1, 2, 3, 4]],
+  }]
+
+  def build_graph(parameters):
+    input_tensor = tf.placeholder(
+        dtype=parameters["dtype"],
+        name="input",
+        shape=parameters["input_shape"])
+    out = tf.transpose(input_tensor, perm=parameters["perm"])
+    return [input_tensor], [out]
+
+  def build_inputs(parameters, sess, inputs, outputs):
+    input_values = create_tensor_data(parameters["dtype"],
+                                      parameters["input_shape"])
+    return [input_values], sess.run(
+        outputs, feed_dict=dict(zip(inputs, [input_values])))
+
+  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+
+
+def make_squeeze_tests(zip_path):
+  """Make a set of tests to do squeeze."""
+
+  test_parameters = [{
+      "dtype": [tf.int32, tf.float32, tf.int64],
+      "input_shape": [[1, 2, 1, 3, 1, 4, 1, 1]],
+      "axis": [
+          None, [], [0, 2], [4, 7], [-1, 0, 2, 0, 7, -6], [1], [2, 3, 2],
+          [-1, -2, -4, -6, -8], [0, 2, 4, 6, 7], [7, 6, 4, 2, 0], [6, 6],
+          [0, 1, 2, 3, 4, 5, 6, 7], [-2, -3, 1, 0, 7, -5]
+      ],
+  }, {
+      "dtype": [tf.int32, tf.float32, tf.int64],
+      "input_shape": [[1]],
+      "axis": [None, [], [0], [-1]],
+  }]
+
+  def build_graph(parameters):
+    input_tensor = tf.placeholder(
+        dtype=parameters["dtype"],
+        name="input",
+        shape=parameters["input_shape"])
+    out = tf.squeeze(input_tensor, axis=parameters["axis"])
+    return [input_tensor], [out]
+
+  def build_inputs(parameters, sess, inputs, outputs):
+    input_values = create_tensor_data(parameters["dtype"],
+                                      parameters["input_shape"])
+    return [input_values], sess.run(
+        outputs, feed_dict=dict(zip(inputs, [input_values])))
+
+  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+
+
 def make_l2_pool(input_tensor, ksize, strides, padding, data_format):
   """Given an input perform a sequence of TensorFlow ops to produce l2pool."""
   return tf.sqrt(tf.nn.avg_pool(
@@ -1152,28 +1407,37 @@ def main(unused_args):
 
     dispatch = {
         "control_dep.zip": make_control_dep_tests,
-        "add.zip": make_add_tests,
+        "add.zip": make_binary_op_tests_func(tf.add),
+        "space_to_batch_nd.zip": make_space_to_batch_nd_tests,
+        "div.zip": make_binary_op_tests_func(tf.div),
+        "sub.zip": make_binary_op_tests_func(tf.subtract),
+        "batch_to_space_nd.zip": make_batch_to_space_nd_tests,
         "conv.zip": make_conv_tests,
         "constant.zip": make_constant_tests,
         "depthwiseconv.zip": make_depthwiseconv_tests,
         "concat.zip": make_concatenation_tests,
         "fully_connected.zip": make_fully_connected_tests,
         "global_batch_norm.zip": make_global_batch_norm_tests,
+        "gather.zip": make_gather_tests,
         "fused_batch_norm.zip": make_fused_batch_norm_tests,
         "l2norm.zip": make_l2norm_tests,
         "local_response_norm.zip": make_local_response_norm_tests,
-        "mul.zip": make_mul_tests,
+        "mul.zip": make_binary_op_tests_func(tf.multiply),
         "relu.zip": make_relu_tests,
         "relu1.zip": make_relu1_tests,
         "relu6.zip": make_relu6_tests,
         "l2_pool.zip": make_pool_tests(make_l2_pool),
         "avg_pool.zip": make_pool_tests(tf.nn.avg_pool),
         "max_pool.zip": make_pool_tests(tf.nn.max_pool),
+        "pad.zip": make_pad_tests,
         "reshape.zip": make_reshape_tests,
         "resize_bilinear.zip": make_resize_bilinear_tests,
         "sigmoid.zip": make_sigmoid_tests,
         "softmax.zip": make_softmax_tests,
         "space_to_depth.zip": make_space_to_depth_tests,
+        "transpose.zip": make_transpose_tests,
+        "mean.zip": make_mean_tests,
+        "squeeze.zip": make_squeeze_tests,
     }
     out = FLAGS.zip_to_output
     bin_path = FLAGS.toco
diff --git a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc
index e7df97ee54cc631c29a3a6f63a85894236f08157..c8a6e07abd02633f90ea768ad6f65d2a7d9d716a 100644
--- a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc
+++ b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc
@@ -20,24 +20,25 @@ limitations under the License.
 #include <sstream>
 #include <gtest/gtest.h>
 #include "re2/re2.h"
-#include "tensorflow/contrib/lite/builtin_op_data.h"
-#include "tensorflow/contrib/lite/interpreter.h"
-#include "tensorflow/contrib/lite/kernels/register.h"
-#include "tensorflow/contrib/lite/model.h"
 #include "tensorflow/contrib/lite/testing/parse_testdata.h"
+#include "tensorflow/contrib/lite/testing/tflite_driver.h"
+#include "tensorflow/contrib/lite/testing/util.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/subprocess.h"
-#include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/util/command_line_flags.h"
 
+namespace tflite {
+namespace testing {
+
 namespace {
 bool FLAGS_ignore_known_bugs = true;
+// TODO(b/71769302) zip_files_dir should have a more accurate default, if
+// possible
+string* FLAGS_zip_files_dir = new string("./");
+string* FLAGS_unzip_binary_path = new string("/usr/bin/unzip");
 }  // namespace
 
-namespace tflite {
-namespace testing {
-
 // TensorFlow system environment for file system called.
 tensorflow::Env* env = tensorflow::Env::Default();
 
@@ -47,23 +48,31 @@ tensorflow::Env* env = tensorflow::Env::Default();
 // TODO(ahentz): make sure we clean this list up frequently.
 std::map<string, string> kBrokenTests = {
     // Add doesn't support broadcasting.
-    {R"(addd.*input_shape_1=\[1,3,4,3\],input_shape_2=\[3\])", "68500195"},
-    {R"(muld.*input_shape_1=\[1,3,4,3\],input_shape_2=\[3\])", "68500195"},
+    {R"(adda.*input_shape_1=\[1,3,4,3\],input_shape_2=\[3\])", "68500195"},
+    {R"(mula.*input_shape_1=\[1,3,4,3\],input_shape_2=\[3\])", "68500195"},
+    {R"(diva.*input_shape_1=\[1,3,4,3\],input_shape_2=\[3\])", "68500195"},
+    {R"(suba.*input_shape_1=\[1,3,4,3\],input_shape_2=\[3\])", "68500195"},
 
     // Add only supports float32. (and "constant" tests use Add)
-    {R"(addd.*int32)", "68808744"},
+    {R"(adda.*int32)", "68808744"},
     {R"(constant.*int32)", "68808744"},
     {R"(mul.*int32)", "68808744"},
+    {R"(div.*int32)", "68808744"},
+    {R"(sub.*int32)", "68808744"},
 
-    // Toco or TFLite has a bug to deal with some constant functions with
-    // more than 1 element.
-    {R"(constant.*input_shape=\[(2|2,2,2,2)\])", "68721522"},
+    // Pad only supports 4D tensors.
+    {R"(paddtype=.*,input_shape=\[.,.\],paddings=\[\[.,.\],\[.,.\]\])",
+     "70527055"},
 
-    // L2Norm only supports 4D tensors.
-    {R"(l2normdim=.*,epsilon=.*,input_shape=\[.,.\])", "67963684"},
+    // L2Norm only supports tensors with 4D or fewer.
     {R"(l2normdim=.*,epsilon=.*,input_shape=\[.,.,.,.,.*\])", "67963684"},
 
+    // SpaceToBatch only supports 4D tensors.
+    {R"(space_to_batch_nd.*input_shape=\[1,4,4,4,1,1\])", "70848787"},
+
     // L2Norm only works for dim=-1.
+    {R"(l2normdim=-2,epsilon=.*,input_shape=\[.,.\])", "67963812"},
+    {R"(l2normdim=0,epsilon=.*,input_shape=\[.,.\])", "67963812"},
     {R"(l2normdim=-2,epsilon=.*,input_shape=\[3,15,14,3\])", "67963812"},
     {R"(l2normdim=-2,epsilon=.*,input_shape=\[1,3,4,3\])", "67963812"},
     {R"(l2normdim=2,epsilon=.*,input_shape=\[3,15,14,3\])", "67963812"},
@@ -77,6 +86,9 @@ std::map<string, string> kBrokenTests = {
 
     // ResizeBilinear looks completely incompatible with Tensorflow
     {R"(resize_bilinear)", "67964336"},
+
+    // Transpose only supports 1D-4D input tensors.
+    {R"(transposedtype=.*,input_shape=\[.,.,.,.,.\],perm=.*)", "71545879"},
 };
 
 // Allows test data to be unzipped into a temporary directory and makes
@@ -96,13 +108,14 @@ class ZipEnvironment : public ::testing::Environment {
   }
 
   // Unzip `zip` file into a new temporary directory  `out_dir`.
-  tensorflow::Status UnZip(const std::string& zip, std::string* out_dir) {
+  tensorflow::Status UnZip(const string& zip, string* out_dir) {
     string dir;
     TF_CHECK_OK(MakeTemporaryDirectory(&dir));
     tensorflow::SubProcess proc;
-    std::string unzip_binary =
-        "/usr/bin/unzip";
-    proc.SetProgram(unzip_binary, {"unzip", "-d", dir, zip.c_str()});
+    string unzip_binary = *FLAGS_unzip_binary_path;
+    TF_CHECK_OK(env->FileExists(unzip_binary));
+    TF_CHECK_OK(env->FileExists(zip));
+    proc.SetProgram(unzip_binary, {"unzip", "-d", dir, zip});
     proc.SetChannelAction(tensorflow::CHAN_STDOUT, tensorflow::ACTION_PIPE);
     proc.SetChannelAction(tensorflow::CHAN_STDERR, tensorflow::ACTION_PIPE);
     if (!proc.Start())
@@ -144,85 +157,68 @@ ZipEnvironment* zip_environment() {
 // the temporary directory where the zip file has been unarchived and
 // `test_paths` is the list of test prefixes that were in the manifest.
 // Note, it is an error for a manifest to contain no tests.
-tensorflow::Status ReadManifest(const std::string& original_file,
-                                const std::string& dir,
-                                std::vector<std::string>* test_paths) {
+tensorflow::Status ReadManifest(const string& original_file, const string& dir,
+                                std::vector<string>* test_paths) {
   // Read the newline delimited list of entries in the manifest.
   std::ifstream manifest_fp(dir + "/manifest.txt");
-  std::string manifest((std::istreambuf_iterator<char>(manifest_fp)),
-                       std::istreambuf_iterator<char>());
+  string manifest((std::istreambuf_iterator<char>(manifest_fp)),
+                  std::istreambuf_iterator<char>());
   size_t pos = 0;
   int added = 0;
   while (true) {
     size_t end_pos = manifest.find("\n", pos);
-    if (end_pos == std::string::npos) break;
-    std::string filename = manifest.substr(pos, end_pos - pos);
+    if (end_pos == string::npos) break;
+    string filename = manifest.substr(pos, end_pos - pos);
     test_paths->push_back(dir + "/" + filename);
     pos = end_pos + 1;
     added += 1;
   }
   if (!added) {
-    std::string message = "Test had no examples: " + original_file;
+    string message = "Test had no examples: " + original_file;
     return tensorflow::Status(tensorflow::error::UNKNOWN, message.c_str());
   }
   return tensorflow::Status::OK();
 }
 
 // Get a list of tests from a zip file `zip_file_name`.
-std::vector<std::string> UnarchiveZipAndFindTestNames(
-    const std::string& zip_file_name) {
-  std::string zip_file = ::tensorflow::testing::TensorFlowSrcRoot() +
-                         "/contrib/lite/testing/optest/" + zip_file_name;
-  std::string decompress_tmp_dir;
+std::vector<string> UnarchiveZipAndFindTestNames(const string& zip_file_name) {
+  string zip_file = *FLAGS_zip_files_dir + "/" + zip_file_name;
+  string decompress_tmp_dir;
   TF_CHECK_OK(zip_environment()->UnZip(zip_file, &decompress_tmp_dir));
-  std::vector<std::string> stuff;
+  std::vector<string> stuff;
   TF_CHECK_OK(ReadManifest(zip_file, decompress_tmp_dir, &stuff));
   return stuff;
 }
 
-class OpsTest : public ::testing::TestWithParam<std::string> {};
+class OpsTest : public ::testing::TestWithParam<string> {};
 
 TEST_P(OpsTest, RunStuff) {
-  std::string test_path = GetParam();
-  std::string tflite_file = test_path + ".bin";
-  std::string tflite_examples = test_path + ".inputs";
-  auto model = tflite::FlatBufferModel::BuildFromFile(tflite_file.c_str());
-  std::unique_ptr<tflite::Interpreter> interpreter;
-
-  tflite::ops::builtin::BuiltinOpResolver builtins;
-  ASSERT_EQ(tflite::InterpreterBuilder(*model, builtins)(&interpreter),
-            kTfLiteOk);
+  string test_path = GetParam();
+  string tflite_test_case = test_path + "_tests.txt";
+  string tflite_dir = test_path.substr(0, test_path.find_last_of("/"));
+  string test_name = test_path.substr(test_path.find_last_of('/'));
 
-  std::vector<tflite::testing::Example> examples;
-  ASSERT_EQ(tflite::testing::ParseExamples(tflite_examples.c_str(), &examples),
-            kTfLiteOk);
+  std::ifstream tflite_stream(tflite_test_case);
+  ASSERT_TRUE(tflite_stream.is_open()) << tflite_test_case;
+  tflite::testing::TfLiteDriver test_driver(/*use_nnapi=*/true);
+  test_driver.SetModelBaseDir(tflite_dir);
 
   string bug_number;
   for (const auto& p : kBrokenTests) {
-    if (RE2::PartialMatch(test_path, p.first)) {
+    if (RE2::PartialMatch(test_name, p.first)) {
       bug_number = p.second;
     }
   }
 
-  for (const auto& example : examples) {
-    ASSERT_EQ(interpreter->inputs().size(), example.inputs.size());
-    auto result = [&]() {
-      TF_LITE_ENSURE_STATUS(FeedExample(interpreter.get(), example));
-      TF_LITE_ENSURE_STATUS(interpreter->Invoke());
-      TF_LITE_ENSURE_STATUS(CheckOutputs(interpreter.get(), example));
-      return kTfLiteOk;
-    }();
-
-    if (bug_number.empty()) {
-      ASSERT_EQ(result, kTfLiteOk);
+  bool result = tflite::testing::ParseAndRunTests(&tflite_stream, &test_driver);
+  if (bug_number.empty()) {
+    EXPECT_TRUE(result) << test_driver.GetErrorMessage();
+  } else {
+    if (FLAGS_ignore_known_bugs) {
+      EXPECT_FALSE(result);
     } else {
-      if (FLAGS_ignore_known_bugs) {
-        ASSERT_EQ(result, kTfLiteError)
-            << "Not failing as expected dut to http://b/" << bug_number;
-      } else {
-        ASSERT_EQ(result, kTfLiteOk)
-            << "Possibly due to http://b/" << bug_number;
-      }
+      EXPECT_TRUE(result) << test_driver.GetErrorMessage()
+                          << ": Possibly due to http://b/" << bug_number;
     }
   }
 }
@@ -236,19 +232,24 @@ TEST_P(OpsTest, RunStuff) {
 
 INSTANTIATE_TESTS(add)
 INSTANTIATE_TESTS(avg_pool)
+INSTANTIATE_TESTS(space_to_batch_nd)
+INSTANTIATE_TESTS(batch_to_space_nd)
 INSTANTIATE_TESTS(concat)
-INSTANTIATE_TESTS(constant)
+// TODO(b/71642435) re-enable this test
+// INSTANTIATE_TESTS(constant)
 INSTANTIATE_TESTS(control_dep)
 INSTANTIATE_TESTS(conv)
 INSTANTIATE_TESTS(depthwiseconv)
 INSTANTIATE_TESTS(fully_connected)
 INSTANTIATE_TESTS(fused_batch_norm)
+INSTANTIATE_TESTS(gather)
 INSTANTIATE_TESTS(global_batch_norm)
 INSTANTIATE_TESTS(l2norm)
 INSTANTIATE_TESTS(l2_pool)
 INSTANTIATE_TESTS(local_response_norm)
 INSTANTIATE_TESTS(max_pool)
 INSTANTIATE_TESTS(mul)
+INSTANTIATE_TESTS(pad)
 INSTANTIATE_TESTS(relu)
 INSTANTIATE_TESTS(relu1)
 INSTANTIATE_TESTS(relu6)
@@ -257,6 +258,11 @@ INSTANTIATE_TESTS(resize_bilinear)
 INSTANTIATE_TESTS(sigmoid)
 INSTANTIATE_TESTS(softmax)
 INSTANTIATE_TESTS(space_to_depth)
+INSTANTIATE_TESTS(sub)
+INSTANTIATE_TESTS(div)
+INSTANTIATE_TESTS(transpose)
+INSTANTIATE_TESTS(mean)
+INSTANTIATE_TESTS(squeeze)
 
 }  // namespace testing
 }  // namespace tflite
@@ -264,16 +270,23 @@ INSTANTIATE_TESTS(space_to_depth)
 int main(int argc, char** argv) {
   ::testing::AddGlobalTestEnvironment(tflite::testing::zip_environment());
 
-  std::vector<tensorflow::Flag> flags = {tensorflow::Flag(
-      "ignore_known_bugs", &FLAGS_ignore_known_bugs,
-      "If a particular model is affected by a known bug, the "
-      "corresponding test should expect the outputs to not match.")};
+  std::vector<tensorflow::Flag> flags = {
+      tensorflow::Flag(
+          "ignore_known_bugs", &tflite::testing::FLAGS_ignore_known_bugs,
+          "If a particular model is affected by a known bug, the "
+          "corresponding test should expect the outputs to not match."),
+      tensorflow::Flag("zip_files_dir", tflite::testing::FLAGS_zip_files_dir,
+                       "Required: Location of the test zips."),
+      tensorflow::Flag("unzip_binary_path",
+                       tflite::testing::FLAGS_unzip_binary_path,
+                       "Required: Location of a suitable unzip binary.")};
   bool success = tensorflow::Flags::Parse(&argc, argv, flags);
   if (!success || (argc == 2 && !strcmp(argv[1], "--helpfull"))) {
     fprintf(stderr, "%s", tensorflow::Flags::Usage(argv[0], flags).c_str());
     return 1;
   }
 
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/tensorflow/contrib/lite/testing/nnapi_example.cc b/tensorflow/contrib/lite/testing/nnapi_example.cc
index 74f6cfc3de5d209671c38595434a43128966bb0e..5870782b69217f292fe60821ea8ce4ea1174c495 100644
--- a/tensorflow/contrib/lite/testing/nnapi_example.cc
+++ b/tensorflow/contrib/lite/testing/nnapi_example.cc
@@ -19,80 +19,35 @@ limitations under the License.
 // Usage: bazel run -c opt \
 // tensorflow/contrib/lite/nnapi:nnapi_example -- <filename>
 //
+#include <dirent.h>
 #include <cstdarg>
 #include <cstdio>
-#include "tensorflow/contrib/lite/builtin_op_data.h"
-#include "tensorflow/contrib/lite/interpreter.h"
-#include "tensorflow/contrib/lite/kernels/register.h"
-#include "tensorflow/contrib/lite/model.h"
+#include <fstream>
+#include <iostream>
+#include <sstream>
 #include "tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h"
 #include "tensorflow/contrib/lite/testing/parse_testdata.h"
+#include "tensorflow/contrib/lite/testing/tflite_driver.h"
 
-// TODO(aselle): FATAL leaves resources hanging.
-void FATAL(const char* format, ...) {
-  va_list args;
-  va_start(args, format);
-  vfprintf(stderr, format, args);
-  va_end(args);
-  fflush(stderr);
-  exit(1);
-}
+string dirname(const string& s) { return s.substr(0, s.find_last_of("/")); }
 
-#define CHECK_TFLITE_SUCCESS(x)                       \
-  if (x != kTfLiteOk) {                               \
-    FATAL("Aborting since tflite returned failure."); \
+bool Interpret(const char* examples_filename, bool use_nnapi) {
+  std::ifstream tflite_stream(examples_filename);
+  if (!tflite_stream.is_open()) {
+    fprintf(stderr, "Can't open input file.");
+    return false;
   }
 
-void Interpret(const char* filename, const char* examples_filename,
-               bool use_nnapi) {
-  // TODO(aselle): Resize of input image should go here
-  // ...
-  // For now I am allocating all tensors. This means I am fixed size.
-  // So I am not using the variable size ability yet.
-  fprintf(stderr, "example file %s\n", examples_filename);
-  std::vector<tflite::testing::Example> examples;
-  CHECK_TFLITE_SUCCESS(
-      tflite::testing::ParseExamples(examples_filename, &examples));
-
-  for (const tflite::testing::Example& example : examples) {
-    auto model = tflite::FlatBufferModel::BuildFromFile(filename);
-    if (!model) FATAL("Cannot read file %s\n", filename);
-    std::unique_ptr<tflite::Interpreter> interpreter;
-    tflite::ops::builtin::BuiltinOpResolver builtins;
-
-    CHECK_TFLITE_SUCCESS(
-        tflite::InterpreterBuilder(*model, builtins)(&interpreter));
+  printf("Use nnapi is set to: %d\n", use_nnapi);
+  tflite::testing::TfLiteDriver test_driver(use_nnapi);
 
-    printf("Use nnapi is set to: %d\n", use_nnapi);
-    interpreter->UseNNAPI(use_nnapi);
-    CHECK_TFLITE_SUCCESS(
-        tflite::testing::FeedExample(interpreter.get(), example));
-
-    {
-      TfLiteTensor* tensor = interpreter->tensor(interpreter->outputs()[0]);
-      if (float* data =
-              interpreter->typed_tensor<float>(interpreter->outputs()[0])) {
-        size_t num = tensor->bytes / sizeof(float);
-        for (float* p = data; p < data + num; p++) {
-          *p = 0;
-        }
-      }
-    }
-    interpreter->Invoke();
-
-    CHECK_TFLITE_SUCCESS(
-        tflite::testing::CheckOutputs(interpreter.get(), example));
-
-    printf("Result:\n");
-    TfLiteTensor* tensor = interpreter->tensor(interpreter->outputs()[0]);
-    if (float* data =
-            interpreter->typed_tensor<float>(interpreter->outputs()[0])) {
-      size_t num = tensor->bytes / sizeof(float);
-      for (float* p = data; p < data + num; p++) {
-        printf(" %f", *p);
-      }
-    }
+  test_driver.SetModelBaseDir(dirname(examples_filename));
+  if (!tflite::testing::ParseAndRunTests(&tflite_stream, &test_driver)) {
+    fprintf(stderr, "Results from tflite don't match.");
+    return false;
   }
+
+  return true;
 }
 
 int main(int argc, char* argv[]) {
@@ -109,6 +64,25 @@ int main(int argc, char* argv[]) {
             argv[0]);
     return 1;
   }
-  Interpret(argv[1], argv[2], use_nnapi);
+
+  string base_dir = dirname(argv[1]);
+  DIR* dir = opendir(base_dir.c_str());
+  if (dir == nullptr) {
+    fprintf(stderr, "Can't open dir %s\n", base_dir.c_str());
+    return 1;
+  }
+  while (struct dirent* ent = readdir(dir)) {
+    string name = ent->d_name;
+    if (name.rfind(".txt") == name.length() - 4) {
+      printf("%s: ", name.c_str());
+      if (Interpret((base_dir + "/" + name).c_str(), use_nnapi)) {
+        printf(" %s\n", "OK");
+      } else {
+        printf(" %s\n", "FAIL");
+      }
+    }
+  }
+  closedir(dir);
+
   return 0;
 }
diff --git a/tensorflow/contrib/lite/testing/parse_testdata.cc b/tensorflow/contrib/lite/testing/parse_testdata.cc
index d745ed27158cdad55bdcd97162cb3dfa9e32c112..7c371f2bd445e10bc6d4b20793582c34300316b3 100644
--- a/tensorflow/contrib/lite/testing/parse_testdata.cc
+++ b/tensorflow/contrib/lite/testing/parse_testdata.cc
@@ -169,6 +169,11 @@ TfLiteStatus FeedExample(tflite::Interpreter* interpreter,
       for (size_t idx = 0; idx < example.inputs[i].flat_data.size(); idx++) {
         data[idx] = example.inputs[i].flat_data[idx];
       }
+    } else if (int64_t* data =
+                   interpreter->typed_tensor<int64_t>(input_index)) {
+      for (size_t idx = 0; idx < example.inputs[i].flat_data.size(); idx++) {
+        data[idx] = example.inputs[i].flat_data[idx];
+      }
     } else {
       fprintf(stderr, "input[%zu] was not float or int data\n", i);
       return kTfLiteError;
@@ -219,6 +224,19 @@ TfLiteStatus CheckOutputs(tflite::Interpreter* interpreter,
         }
       }
       fprintf(stderr, "\n");
+    } else if (const int64_t* data =
+                   interpreter->typed_tensor<int64_t>(output_index)) {
+      for (size_t idx = 0; idx < example.outputs[i].flat_data.size(); idx++) {
+        int64_t computed = data[idx];
+        int64_t reference = example.outputs[0].flat_data[idx];
+        if (std::abs(computed - reference) > 0) {
+          fprintf(stderr,
+                  "output[%zu][%zu] did not match %ld vs reference %f\n", i,
+                  idx, data[idx], example.outputs[0].flat_data[idx]);
+          return kTfLiteError;
+        }
+      }
+      fprintf(stderr, "\n");
     } else {
       fprintf(stderr, "output[%zu] was not float or int data\n", i);
       return kTfLiteError;
diff --git a/tensorflow/contrib/lite/testing/split.h b/tensorflow/contrib/lite/testing/split.h
index 24071442e8929f37443df1b98d22711b3024b87c..cfc1e929e9e66a6641fc3a9c47cbe511f692b748 100644
--- a/tensorflow/contrib/lite/testing/split.h
+++ b/tensorflow/contrib/lite/testing/split.h
@@ -53,6 +53,15 @@ inline std::vector<int> Split(const string& s, const string& delimiter) {
   return fields;
 }
 
+template <>
+inline std::vector<int64_t> Split(const string& s, const string& delimiter) {
+  std::vector<int64_t> fields;
+  for (const auto& p : SplitToPos(s, delimiter)) {
+    fields.push_back(strtoll(s.data() + p.first, nullptr, 10));
+  }
+  return fields;
+}
+
 template <>
 inline std::vector<float> Split(const string& s, const string& delimiter) {
   std::vector<float> fields;
diff --git a/tensorflow/contrib/lite/testing/tflite_driver.cc b/tensorflow/contrib/lite/testing/tflite_driver.cc
index cf9df2ec264bcff7f836a70db37afe8a5ce01c28..bae639ea95318a16c963269de5e55afcb681d4c5 100644
--- a/tensorflow/contrib/lite/testing/tflite_driver.cc
+++ b/tensorflow/contrib/lite/testing/tflite_driver.cc
@@ -31,6 +31,14 @@ float Value(const TfLitePtrUnion& data, int index) {
   return data.f[index];
 }
 template <>
+int32_t Value(const TfLitePtrUnion& data, int index) {
+  return data.i32[index];
+}
+template <>
+int64_t Value(const TfLitePtrUnion& data, int index) {
+  return data.i64[index];
+}
+template <>
 uint8_t Value(const TfLitePtrUnion& data, int index) {
   return data.uint8[index];
 }
@@ -61,9 +69,14 @@ class TfLiteDriver::Expectation {
     switch (tensor.type) {
       case kTfLiteFloat32:
         return TypedCheck<float>(verbose, tensor);
+      case kTfLiteInt32:
+        return TypedCheck<int32_t>(verbose, tensor);
+      case kTfLiteInt64:
+        return TypedCheck<int64_t>(verbose, tensor);
       case kTfLiteUInt8:
         return TypedCheck<uint8_t>(verbose, tensor);
       default:
+        fprintf(stderr, "Unsupported type %d in Check\n", tensor.type);
         return false;
     }
   }
@@ -71,15 +84,30 @@ class TfLiteDriver::Expectation {
  private:
   template <typename T>
   bool TypedCheck(bool verbose, const TfLiteTensor& tensor) {
+    // TODO(ahentz): must find a way to configure the tolerance.
+    constexpr double kRelativeThreshold = 1e-2f;
+    constexpr double kAbsoluteThreshold = 1e-4f;
+
     int tensor_size = tensor.bytes / sizeof(T);
 
     bool good_output = true;
     for (int i = 0; i < tensor_size; ++i) {
-      if (std::abs(Value<T>(data_, i) - Value<T>(tensor.data, i)) > 1e-5) {
+      float computed = Value<T>(tensor.data, i);
+      float reference = Value<T>(data_, i);
+      float diff = std::abs(computed - reference);
+      bool error_is_large = false;
+      // For very small numbers, try absolute error, otherwise go with
+      // relative.
+      if (std::abs(reference) < kRelativeThreshold) {
+        error_is_large = (diff > kAbsoluteThreshold);
+      } else {
+        error_is_large = (diff > kRelativeThreshold * std::abs(reference));
+      }
+      if (error_is_large) {
         good_output = false;
         if (verbose) {
-          std::cerr << "  index " << i << ": " << Value<T>(data_, i)
-                    << " != " << Value<T>(tensor.data, i) << std::endl;
+          std::cerr << "  index " << i << ": " << reference
+                    << " != " << computed << std::endl;
         }
       }
     }
@@ -95,8 +123,8 @@ TfLiteDriver::~TfLiteDriver() {}
 void TfLiteDriver::AllocateTensors() {
   if (must_allocate_tensors_) {
     if (interpreter_->AllocateTensors() != kTfLiteOk) {
-      std::cerr << "Failed to allocate tensors" << std::endl;
-      abort();
+      Invalidate("Failed to allocate tensors");
+      return;
     }
     must_allocate_tensors_ = false;
   }
@@ -147,6 +175,18 @@ void TfLiteDriver::SetInput(int id, const string& csv_values) {
       SetTensorData(values, &tensor->data);
       break;
     }
+    case kTfLiteInt32: {
+      const auto& values = testing::Split<int32_t>(csv_values, ",");
+      if (!CheckSizes<int32_t>(tensor->bytes, values.size())) return;
+      SetTensorData(values, &tensor->data);
+      break;
+    }
+    case kTfLiteInt64: {
+      const auto& values = testing::Split<int64_t>(csv_values, ",");
+      if (!CheckSizes<int64_t>(tensor->bytes, values.size())) return;
+      SetTensorData(values, &tensor->data);
+      break;
+    }
     case kTfLiteUInt8: {
       const auto& values = testing::Split<uint8_t>(csv_values, ",");
       if (!CheckSizes<uint8_t>(tensor->bytes, values.size())) return;
@@ -154,6 +194,7 @@ void TfLiteDriver::SetInput(int id, const string& csv_values) {
       break;
     }
     default:
+      fprintf(stderr, "Unsupported type %d in SetInput\n", tensor->type);
       Invalidate("Unsupported tensor data type");
       return;
   }
@@ -167,10 +208,17 @@ void TfLiteDriver::SetExpectation(int id, const string& csv_values) {
     case kTfLiteFloat32:
       expected_output_[id]->SetData<float>(csv_values);
       break;
+    case kTfLiteInt32:
+      expected_output_[id]->SetData<int32_t>(csv_values);
+      break;
+    case kTfLiteInt64:
+      expected_output_[id]->SetData<int64_t>(csv_values);
+      break;
     case kTfLiteUInt8:
       expected_output_[id]->SetData<uint8_t>(csv_values);
       break;
     default:
+      fprintf(stderr, "Unsupported type %d in SetExpectation\n", tensor->type);
       Invalidate("Unsupported tensor data type");
       return;
   }
diff --git a/tensorflow/contrib/lite/testing/util.h b/tensorflow/contrib/lite/testing/util.h
new file mode 100644
index 0000000000000000000000000000000000000000..4d4304f022187027950f58050ececae73dedffb6
--- /dev/null
+++ b/tensorflow/contrib/lite/testing/util.h
@@ -0,0 +1,28 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TESTING_UTIL_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TESTING_UTIL_H_
+
+namespace tflite {
+
+inline void LogToStderr() {
+#ifdef PLATFORM_GOOGLE
+  FLAGS_logtostderr = true;
+#endif
+}
+
+}  // namespace tflite
+
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TESTING_UTIL_H_
diff --git a/tensorflow/contrib/lite/tflite_static.bp b/tensorflow/contrib/lite/tflite_static.bp
index 3d3c7ec67ada73d58bce432c4b3635eec0037c4e..771771fd5f6e013bdf5e863e9d216f9b50e86d97 100644
--- a/tensorflow/contrib/lite/tflite_static.bp
+++ b/tensorflow/contrib/lite/tflite_static.bp
@@ -19,6 +19,7 @@ cc_library_static {
     rtti: true,
     srcs: [
         "allocation.cc",
+        "arena_planner.cc",
         "context.c",
         "error_reporter.cc",
         "interpreter.cc",
@@ -30,12 +31,15 @@ cc_library_static {
         "kernels/activations.cc",
         "kernels/add.cc",
         "kernels/basic_rnn.cc",
+        "kernels/batch_to_space_nd.cc",
         "kernels/concatenation.cc",
         "kernels/conv.cc",
         "kernels/depthwise_conv.cc",
+        "kernels/div.cc",
         "kernels/embedding_lookup.cc",
         "kernels/embedding_lookup_sparse.cc",
         "kernels/fully_connected.cc",
+        "kernels/gather.cc",
         "kernels/gemm_support.cc",
         "kernels/hashtable_lookup.cc",
         "kernels/kernel_util.cc",
@@ -43,14 +47,21 @@ cc_library_static {
         "kernels/local_response_norm.cc",
         "kernels/lsh_projection.cc",
         "kernels/lstm.cc",
+        "kernels/mean.cc",
         "kernels/mul.cc",
+        "kernels/pad.cc",
         "kernels/pooling.cc",
         "kernels/register.cc",
         "kernels/reshape.cc",
         "kernels/resize_bilinear.cc",
         "kernels/skip_gram.cc",
+        "kernels/space_to_batch_nd.cc",
         "kernels/space_to_depth.cc",
+        "kernels/squeeze.cc",
+        "kernels/sub.cc",
         "kernels/svdf.cc",
+        "kernels/transpose.cc",
+        "kernels/unidirectional_sequence_rnn.cc",
         "kernels/internal/tensor_utils.cc",
         "kernels/internal/quantization_util.cc",
         "kernels/internal/reference/portable_tensor_utils.cc",
@@ -73,8 +84,10 @@ cc_library_static {
         "-Wextra",
         "-Wno-array-bounds",
         "-Wno-invalid-partial-specialization",
+        "-Wno-mismatched-tags",
         "-Wno-missing-field-initializers",
         "-Wno-sign-compare",
+        "-Wno-unused-lambda-capture",
         "-Wno-unused-parameter",
         "-Wno-unused-variable",
     ],
diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD
index 0bf8d067a3f21a01fc1b384bba2a1703f9367733..967e304742fb27ba05591a3b1614de14cd9f5262 100644
--- a/tensorflow/contrib/lite/toco/BUILD
+++ b/tensorflow/contrib/lite/toco/BUILD
@@ -159,22 +159,19 @@ cc_library(
         "toco_types.h",
     ],
     deps = [
+        # Placeholder for internal file dependency.
         "//tensorflow/core:framework_lite",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-    ] + select({
-        "//tensorflow:android": [],
-        "//tensorflow:darwin": [],
-        "//tensorflow:ios": [],
-        "//conditions:default": [],
-        "//tensorflow:dummy_disabled_internal": [],
-    }),
+    ],
 )
 
 cc_library(
     name = "graph_transformations",
     srcs = [
+        "graph_transformations/convert_expanddims_to_reshape.cc",
         "graph_transformations/convert_pure_conv_to_depthwise.cc",
+        "graph_transformations/convert_trivial_transpose_to_reshape.cc",
         "graph_transformations/create_im2col_arrays.cc",
         "graph_transformations/dequantize.cc",
         "graph_transformations/drop_fake_quant.cc",
@@ -206,23 +203,30 @@ cc_library(
         "graph_transformations/remove_trivial_reshape.cc",
         "graph_transformations/remove_unused_op.cc",
         "graph_transformations/resolve_batch_normalization.cc",
+        "graph_transformations/resolve_batch_to_space_nd_attributes.cc",
         "graph_transformations/resolve_constant_binary.cc",
         "graph_transformations/resolve_constant_concatenation.cc",
         "graph_transformations/resolve_constant_fake_quant.cc",
-        "graph_transformations/resolve_constant_tensorflow_shape.cc",
+        "graph_transformations/resolve_constant_fill.cc",
+        "graph_transformations/resolve_constant_range.cc",
+        "graph_transformations/resolve_constant_shape_or_rank.cc",
+        "graph_transformations/resolve_constant_stack.cc",
+        "graph_transformations/resolve_constant_strided_slice.cc",
         "graph_transformations/resolve_constant_unary.cc",
         "graph_transformations/resolve_mean_attributes.cc",
         "graph_transformations/resolve_pad_attributes.cc",
         "graph_transformations/resolve_reorder_axes.cc",
         "graph_transformations/resolve_reshape_attributes.cc",
         "graph_transformations/resolve_slice_attributes.cc",
+        "graph_transformations/resolve_space_to_batch_nd_attributes.cc",
+        "graph_transformations/resolve_squeeze_attributes.cc",
         "graph_transformations/resolve_strided_slice_attributes.cc",
         "graph_transformations/resolve_tensorflow_concat.cc",
         "graph_transformations/resolve_tensorflow_matmul.cc",
         "graph_transformations/resolve_tensorflow_merge.cc",
-        "graph_transformations/resolve_tensorflow_squeeze.cc",
         "graph_transformations/resolve_tensorflow_switch.cc",
         "graph_transformations/resolve_tensorflow_tile.cc",
+        "graph_transformations/resolve_transpose_attributes.cc",
         "graph_transformations/unfuse_activation_functions.cc",
     ],
     hdrs = [
diff --git a/tensorflow/contrib/lite/toco/allocate_transient_arrays.cc b/tensorflow/contrib/lite/toco/allocate_transient_arrays.cc
index 2f4454d7c849c49c853e1379cbdd8241062ba348..d4da8f5dfe13a38e8b6886656c5c7e0c8fbb1316 100644
--- a/tensorflow/contrib/lite/toco/allocate_transient_arrays.cc
+++ b/tensorflow/contrib/lite/toco/allocate_transient_arrays.cc
@@ -218,7 +218,8 @@ void AllocateTransientArrays(Model* model,
   // just guard this assumption with a CHECK:
   bool batchless_input_shapes = true;
   for (const auto& input_array : model->flags.input_arrays()) {
-    if (input_array.shape().empty() || input_array.shape(0) != 1) {
+    if (!input_array.has_shape() || input_array.shape().dims().empty() ||
+        input_array.shape().dims(0) != 1) {
       batchless_input_shapes = false;
       break;
     }
@@ -238,8 +239,8 @@ void AllocateTransientArrays(Model* model,
   // is a misnormer, should read 'workspace'.
   for (const auto& array_pair : ordered_arrays_map) {
     const string& array_name = array_pair.first;
-    const auto& array_lifespan = array_lifespans.find(array_name)->second;
-    if (array_lifespan.persistent) {
+    auto it = array_lifespans.find(array_name);
+    if (it != array_lifespans.end() && it->second.persistent) {
       AllocateTransientArray(*model, array_name, &allocator,
                              transient_data_alignment);
     }
@@ -281,8 +282,8 @@ void AllocateTransientArrays(Model* model,
   std::size_t persistent_alloc_size = 0;
   for (const auto& array_pair : ordered_arrays_map) {
     const string& array_name = array_pair.first;
-    const auto& array_lifespan = array_lifespans.find(array_name)->second;
-    if (array_lifespan.persistent) {
+    auto it = array_lifespans.find(array_name);
+    if (it != array_lifespans.end() && it->second.persistent) {
       persistent_alloc_size +=
           TransientArraySize(*model, array_name, transient_data_alignment);
     }
diff --git a/tensorflow/contrib/lite/toco/args.h b/tensorflow/contrib/lite/toco/args.h
index 5268902346f720be7ecd4980c696d4df8c3da173..eb2d7ba916e49cd5ec838eb945d478f008f149ca 100644
--- a/tensorflow/contrib/lite/toco/args.h
+++ b/tensorflow/contrib/lite/toco/args.h
@@ -21,6 +21,9 @@ limitations under the License.
 #include <functional>
 #include <unordered_map>
 #include <vector>
+#if defined(PLATFORM_GOOGLE)
+#include "strings/split.h"
+#endif
 #include "absl/strings/numbers.h"
 #include "absl/strings/str_split.h"
 #include "tensorflow/contrib/lite/toco/toco_port.h"
@@ -203,6 +206,8 @@ struct ParsedModelFlags {
   Arg<string> graphviz_last_array;
   Arg<string> dump_graphviz;
   Arg<bool> dump_graphviz_video = Arg<bool>(false);
+  Arg<bool> allow_nonexistent_arrays = Arg<bool>(false);
+  Arg<bool> allow_nonascii_arrays = Arg<bool>(false);
 };
 
 // Flags that describe the operation you would like to do (what conversion
diff --git a/tensorflow/contrib/lite/toco/dump_graphviz.cc b/tensorflow/contrib/lite/toco/dump_graphviz.cc
index f5e2868dc05306d9f08d585e54900a3f873e6079..39809216c77bdadfd44aafbddc8e0979fde66a49 100644
--- a/tensorflow/contrib/lite/toco/dump_graphviz.cc
+++ b/tensorflow/contrib/lite/toco/dump_graphviz.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/strings/str_replace.h"
+#include "absl/strings/strip.h"
 #include "tensorflow/contrib/lite/toco/model_flags.pb.h"
 #include "tensorflow/contrib/lite/toco/toco_graphviz_dump_options.h"
 #include "tensorflow/contrib/lite/toco/toco_port.h"
@@ -105,6 +106,34 @@ Color GetColorForArray(const Model& model, const string& array_name) {
   return Color(0xF5, 0xF5, 0xF5);
 }
 
+void AppendArrayVal(string* string, Array const& array, int index) {
+  if (array.buffer->type == ArrayDataType::kFloat) {
+    const auto& data = array.GetBuffer<ArrayDataType::kFloat>().data;
+    if (index >= data.size()) {
+      return;
+    }
+    AppendF(string, "%.3f", data[index]);
+  } else if (array.buffer->type == ArrayDataType::kUint8) {
+    const auto& data = array.GetBuffer<ArrayDataType::kUint8>().data;
+    if (index >= data.size()) {
+      return;
+    }
+    AppendF(string, "%d", data[index]);
+  } else if (array.buffer->type == ArrayDataType::kInt32) {
+    const auto& data = array.GetBuffer<ArrayDataType::kInt32>().data;
+    if (index >= data.size()) {
+      return;
+    }
+    AppendF(string, "%d", data[index]);
+  } else if (array.buffer->type == ArrayDataType::kInt64) {
+    const auto& data = array.GetBuffer<ArrayDataType::kInt64>().data;
+    if (index >= data.size()) {
+      return;
+    }
+    AppendF(string, "%d", data[index]);
+  }
+}
+
 NodeProperties GetPropertiesForArray(const Model& model,
                                      const string& array_name) {
   NodeProperties node_properties;
@@ -129,10 +158,44 @@ NodeProperties GetPropertiesForArray(const Model& model,
       if (id == 0) {
         AppendF(&node_properties.label, "%d", array_shape.dims(id));
       } else {
-        AppendF(&node_properties.label, "x%d", array_shape.dims(id));
+        // 0x00D7 is the unicode multiplication symbol
+        AppendF(&node_properties.label, "\u00D7%d", array_shape.dims(id));
       }
     }
     node_properties.label += "]";
+
+    if (array.buffer) {
+      const auto& array = model.GetArray(array_name);
+      int buffer_size = RequiredBufferSizeForShape(array.shape());
+      if (buffer_size <= 4) {
+        AppendF(&node_properties.label, " = ");
+        if (array.shape().dimensions_count() > 0) {
+          AppendF(&node_properties.label, "{");
+        }
+        for (int i = 0; i < buffer_size; i++) {
+          AppendArrayVal(&node_properties.label, array, i);
+          if (i + 1 < buffer_size) {
+            AppendF(&node_properties.label, ", ");
+          }
+        }
+      } else {
+        AppendF(&node_properties.label, "\\n = ");
+        if (array.shape().dimensions_count() > 0) {
+          AppendF(&node_properties.label, "{");
+        }
+        AppendArrayVal(&node_properties.label, array, 0);
+        AppendF(&node_properties.label, ", ");
+        AppendArrayVal(&node_properties.label, array, 1);
+        // 0x2026 is the unicode ellipsis symbol
+        AppendF(&node_properties.label, " \u2026 ");
+        AppendArrayVal(&node_properties.label, array, buffer_size - 2);
+        AppendF(&node_properties.label, ", ");
+        AppendArrayVal(&node_properties.label, array, buffer_size - 1);
+      }
+      if (array.shape().dimensions_count() > 0) {
+        AppendF(&node_properties.label, "}");
+      }
+    }
   }
 
   if (array.minmax) {
@@ -160,7 +223,21 @@ NodeProperties GetPropertiesForOperator(const Operator& op) {
     node_properties.label =
         static_cast<const TensorFlowUnsupportedOperator&>(op).tensorflow_op;
   } else {
-    node_properties.label = OperatorTypeName(op.type);
+    node_properties.label =
+        string(absl::StripPrefix(OperatorTypeName(op.type), "TensorFlow"));
+  }
+  switch (op.fused_activation_function) {
+    case FusedActivationFunctionType::kRelu:
+      AppendF(&node_properties.label, "\\nReLU");
+      break;
+    case FusedActivationFunctionType::kRelu6:
+      AppendF(&node_properties.label, "\\nReLU6");
+      break;
+    case FusedActivationFunctionType::kRelu1:
+      AppendF(&node_properties.label, "\\nReLU1");
+      break;
+    default:
+      break;
   }
   // Additional information for some of the operators.
   switch (op.type) {
@@ -259,6 +336,10 @@ void DumpGraphviz(const Model& model, string* output_file_contents) {
             op_properties.color.TextColorString().c_str());
     // Add nodes and edges for all inputs of the operator.
     for (const auto& input : op.inputs) {
+      if (model.arrays.count(input) == 0) {
+        // Arrays should _always_ exist. Except, perhaps, during development.
+        continue;
+      }
       auto array_properties = GetPropertiesForArray(model, input);
       if (!already_added_arrays.count(input)) {
         AppendF(output_file_contents, kNodeFormat, input,
@@ -271,6 +352,10 @@ void DumpGraphviz(const Model& model, string* output_file_contents) {
     }
     // Add nodes and edges for all outputs of the operator.
     for (const auto& output : op.outputs) {
+      if (model.arrays.count(output) == 0) {
+        // Arrays should _always_ exist. Except, perhaps, during development.
+        continue;
+      }
       auto array_properties = GetPropertiesForArray(model, output);
       if (!already_added_arrays.count(output)) {
         AppendF(output_file_contents, kNodeFormat, output,
diff --git a/tensorflow/contrib/lite/toco/export_tensorflow.cc b/tensorflow/contrib/lite/toco/export_tensorflow.cc
index e18cf46c69badf4b7584f723a4ba39f2e0d8dd1d..90fa442746cdee975b0103ce60817a95f9b31086 100644
--- a/tensorflow/contrib/lite/toco/export_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/export_tensorflow.cc
@@ -780,13 +780,12 @@ void ConvertConcatenationOperator(const Model& model,
   auto* dc_op = tensorflow_graph->add_node();
   dc_op->set_op("ConcatV2");
   dc_op->set_name(src_op.outputs[0]);
-  const string dummy_concat_dim = src_op.outputs[0] + "/concat_dim";
-  CreateDummyConcatDimTensorConst(dummy_concat_dim, src_op.concat_dim,
-                                  tensorflow_graph);
+  const string dummy_axis = src_op.outputs[0] + "/axis";
+  CreateDummyConcatDimTensorConst(dummy_axis, src_op.axis, tensorflow_graph);
   for (const auto& input : src_op.inputs) {
     *dc_op->add_input() = input;
   }
-  *dc_op->add_input() = dummy_concat_dim;
+  *dc_op->add_input() = dummy_axis;
   (*dc_op->mutable_attr())["T"].set_type(DT_FLOAT);
   (*dc_op->mutable_attr())["Tidx"].set_type(DT_INT32);
   (*dc_op->mutable_attr())["N"].set_i(src_op.inputs.size());
@@ -803,8 +802,10 @@ void ConvertTensorFlowReshapeOperator(const Model& model,
   *reshape_op->add_input() = src_op.inputs[1];
   (*reshape_op->mutable_attr())["T"].set_type(DT_FLOAT);
   const auto& shape_array = model.GetArray(src_op.inputs[1]);
-  CHECK(shape_array.data_type == ArrayDataType::kInt32);
-  CHECK(shape_array.buffer != nullptr);
+  QCHECK(shape_array.data_type == ArrayDataType::kInt32)
+      << "Only int32 shape is supported.";
+  QCHECK(shape_array.buffer != nullptr)
+      << "Shape inferred at runtime is not supported.";
   const auto& shape_data = shape_array.GetBuffer<ArrayDataType::kInt32>().data;
   CreateReshapeShapeTensorConst(src_op.inputs[1], shape_data, tensorflow_graph);
 }
@@ -900,13 +901,15 @@ tensorflow::DataType GetTensorFlowDataType(const Model& model,
                                            const string& array_name) {
   auto& dtype = model.GetArray(array_name).data_type;
   CHECK(dtype == ArrayDataType::kFloat || dtype == ArrayDataType::kInt32 ||
-        dtype == ArrayDataType::kUint8);
+        dtype == ArrayDataType::kUint8 || dtype == ArrayDataType::kInt64);
   if (dtype == ArrayDataType::kFloat) {
     return tensorflow::DT_FLOAT;
   } else if (dtype == ArrayDataType::kInt32) {
     return tensorflow::DT_INT32;
   } else if (dtype == ArrayDataType::kUint8) {
     return tensorflow::DT_UINT8;
+  } else if (dtype == ArrayDataType::kInt64) {
+    return tensorflow::DT_INT64;
   } else {
     LOG(FATAL) << "Wrong data type";
   }
@@ -950,6 +953,22 @@ void ConvertGatherOperator(const Model& model, const GatherOperator& src_op,
   (*gather_op->mutable_attr())["Tparams"].set_type(params_type);
 }
 
+void ConvertArgMaxOperator(const Model& model, const ArgMaxOperator& src_op,
+                           GraphDef* tensorflow_graph) {
+  auto* argmax_op = tensorflow_graph->add_node();
+  argmax_op->set_op("ArgMax");
+  argmax_op->set_name(src_op.outputs[0]);
+  CHECK_EQ(src_op.inputs.size(), 2);
+  *argmax_op->add_input() = src_op.inputs[0];
+  *argmax_op->add_input() = src_op.inputs[1];
+  (*argmax_op->mutable_attr())["T"].set_type(
+      GetTensorFlowDataType(model, src_op.inputs[0]));
+  (*argmax_op->mutable_attr())["Tidx"].set_type(
+      GetTensorFlowDataType(model, src_op.inputs[1]));
+  (*argmax_op->mutable_attr())["output_type"].set_type(
+      GetTensorFlowDataType(model, src_op.outputs[0]));
+}
+
 void ConvertResizeBilinearOperator(const Model& model,
                                    const ResizeBilinearOperator& src_op,
                                    GraphDef* tensorflow_graph) {
@@ -993,22 +1012,21 @@ void ConvertLstmCellOperator(const Model& model, const LstmCellOperator& src_op,
   const string concat_output = base + "basic_lstm_cell/concat";
   // Op names have been chosen to match the tf.slim LSTM naming
   // as closely as possible.
-  const int concat_dim =
+  const int axis =
       model.arrays.at(src_op.inputs[LstmCellOperator::PREV_ACTIV_INPUT])
           ->shape()
           .dimensions_count() -
       1;
   // Note that DATA_INPUT may have extra size 1 dimensions, but TF concat
   // works the same since the tensor has the same underlying data layout.
-  const string concat_dim_output = concat_output + "/concat_dim";
-  CreateDummyConcatDimTensorConst(concat_dim_output, concat_dim,
-                                  tensorflow_graph);
+  const string axis_output = concat_output + "/axis";
+  CreateDummyConcatDimTensorConst(axis_output, axis, tensorflow_graph);
   auto* concat_op = tensorflow_graph->add_node();
   concat_op->set_op("ConcatV2");
   concat_op->set_name(concat_output);
   *concat_op->add_input() = src_op.inputs[LstmCellOperator::DATA_INPUT];
   *concat_op->add_input() = src_op.inputs[LstmCellOperator::PREV_ACTIV_INPUT];
-  *concat_op->add_input() = concat_dim_output;
+  *concat_op->add_input() = axis_output;
   (*concat_op->mutable_attr())["T"].set_type(DT_FLOAT);
   (*concat_op->mutable_attr())["Tidx"].set_type(DT_INT32);
   (*concat_op->mutable_attr())["N"].set_i(2);  // Number of inputs
@@ -1069,8 +1087,7 @@ void ConvertLstmCellOperator(const Model& model, const LstmCellOperator& src_op,
   // Split
   string split_dim_output = base + "split/split_dim";
   // The dimension is the same as the concatenation dimension
-  CreateDummyConcatDimTensorConst(split_dim_output, concat_dim,
-                                  tensorflow_graph);
+  CreateDummyConcatDimTensorConst(split_dim_output, axis, tensorflow_graph);
   string split_output = base + "split";
   auto* split_op = tensorflow_graph->add_node();
   split_op->set_op("Split");
@@ -1298,11 +1315,11 @@ void ConvertMeanOperator(const Model& model, const MeanOperator& src_op,
   auto* tensor = (*params_op->mutable_attr())["value"].mutable_tensor();
   tensor->set_dtype(DT_INT32);
 
-  for (int i = 0; i < src_op.reduction_indices.size(); ++i) {
-    tensor->add_int_val(src_op.reduction_indices[i]);
+  for (int i = 0; i < src_op.axis.size(); ++i) {
+    tensor->add_int_val(src_op.axis[i]);
   }
   auto* shape = tensor->mutable_tensor_shape();
-  shape->add_dim()->set_size(src_op.reduction_indices.size());
+  shape->add_dim()->set_size(src_op.axis.size());
 }
 
 void ConvertSqueezeOperator(const Model& model, const SqueezeOperator& src_op,
@@ -1498,6 +1515,9 @@ void ConvertOperator(const Model& model, const Operator& src_op,
   } else if (src_op.type == OperatorType::kSlice) {
     ConvertSliceOperator(model, static_cast<const SliceOperator&>(src_op),
                          tensorflow_graph);
+  } else if (src_op.type == OperatorType::kArgMax) {
+    ConvertArgMaxOperator(model, static_cast<const ArgMaxOperator&>(src_op),
+                          tensorflow_graph);
   } else {
     LOG(FATAL) << "Unhandled operator type " << OperatorTypeName(src_op.type);
   }
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_expanddims_to_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_expanddims_to_reshape.cc
new file mode 100644
index 0000000000000000000000000000000000000000..3bde9b0169ddfb7fc37657122e2e8eb65ccbdf6d
--- /dev/null
+++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_expanddims_to_reshape.cc
@@ -0,0 +1,101 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "absl/strings/str_cat.h"
+#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h"
+#include "tensorflow/contrib/lite/toco/model.h"
+#include "tensorflow/contrib/lite/toco/tooling_util.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace toco {
+
+bool ConvertExpandDimsToReshape::Run(Model* model, std::size_t op_index) {
+  auto expand_it = model->operators.begin() + op_index;
+  if (expand_it->get()->type != OperatorType::kExpandDims) {
+    return false;
+  }
+  ExpandDimsOperator* expand_op =
+      static_cast<ExpandDimsOperator*>(expand_it->get());
+  CHECK_EQ(expand_op->inputs.size(), 2);
+  CHECK_EQ(expand_op->outputs.size(), 1);
+
+  const auto& input_array = *model->arrays[expand_op->inputs[0]];
+  if (!input_array.has_shape()) {
+    // Yield until input dims have been resolved.
+    return false;
+  }
+  if (input_array.shape().dimensions_count() == 0) {
+    // Input array cannot be 0-D.
+    // (Unsure if this is TF behavior, but was required to get a test to pass.)
+    return false;
+  }
+
+  const auto& axis_array = *model->arrays[expand_op->inputs[1]];
+  if (!axis_array.has_shape()) {
+    // Yield until input axis array shape has been resolved.
+    return false;
+  }
+  CHECK_EQ(RequiredBufferSizeForShape(axis_array.shape()), 1);
+  if (!axis_array.buffer) {
+    // Yield until the input axis array is constant
+    return false;
+  }
+  int axis = axis_array.GetBuffer<ArrayDataType::kInt32>().data[0];
+  std::vector<int> reshape_dims(input_array.shape().dims());
+  if (axis < 0) {
+    axis = reshape_dims.size();
+  }
+  reshape_dims.insert(reshape_dims.begin() + axis, 1);
+
+  // The input tensor has shape, and the axis input is constant. We can now
+  // replace ExpandDims with a Reshape.
+  auto* reshape_op = new TensorFlowReshapeOperator;
+
+  // Copy inputs
+  reshape_op->inputs.push_back(expand_op->inputs[0]);
+  reshape_op->outputs = expand_op->outputs;
+
+  // Create a new input array
+  string axis_array_name = expand_op->inputs[1];
+  string shape_array_name = toco::AvailableArrayName(*model, axis_array_name);
+  Array& shape_array = model->GetOrCreateArray(shape_array_name);
+  *(shape_array.mutable_shape()->mutable_dims()) = {
+      1, static_cast<int>(reshape_dims.size())};
+  reshape_op->inputs.push_back(shape_array_name);
+  shape_array.data_type = ArrayDataType::kInt32;
+  auto& shape_buffer = shape_array.GetMutableBuffer<ArrayDataType::kInt32>();
+  shape_buffer.data = reshape_dims;
+
+  // Delete axis array if unused
+  if (IsDiscardableArray(*model, axis_array_name) &&
+      CountOpsWithInput(*model, axis_array_name) == 1 &&
+      !GetOpWithOutput(*model, axis_array_name)) {
+    model->arrays.erase(axis_array_name);
+  }
+
+  // Replace the operator in the graph.
+  const auto reshape_it = model->operators.emplace(expand_it, reshape_op);
+  expand_it = reshape_it + 1;
+  CHECK_EQ(expand_it->get(), expand_op);
+  model->operators.erase(expand_it);
+
+  return true;
+}
+
+}  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc
new file mode 100644
index 0000000000000000000000000000000000000000..a234c209240ecb9eeba1d2e416a294be53d221ee
--- /dev/null
+++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc
@@ -0,0 +1,85 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <vector>
+
+#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h"
+#include "tensorflow/contrib/lite/toco/model.h"
+#include "tensorflow/contrib/lite/toco/tooling_util.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace toco {
+
+bool ConvertTrivialTransposeToReshape::Run(Model* model, std::size_t op_index) {
+  auto transpose_it = model->operators.begin() + op_index;
+  if (transpose_it->get()->type != OperatorType::kTranspose) {
+    return false;
+  }
+  TransposeOperator* transpose_op =
+      static_cast<TransposeOperator*>(transpose_it->get());
+
+  const auto& output_array = *model->arrays[transpose_op->outputs[0]];
+  if (!output_array.has_shape()) {
+    // Yield until PropagateFixedSizes has been run on this op.
+    return false;
+  }
+  // Note: We can assume we have error checked inputs in PropagateFixedSizes.
+
+  // This transpose is trivial if we only have one non-unitary dimension.
+  std::vector<int> const& dims = output_array.shape().dims();
+  unsigned non_unitary_axis_count = 0;
+  for (int i = 0; i < dims.size(); i++) {
+    if (dims[i] != 1) {
+      non_unitary_axis_count++;
+    }
+  }
+  if (non_unitary_axis_count > 1) {
+    // Transpose is not trivial
+    return false;
+  }
+
+  // This transpose is trivial. Replace it with a Reshape op.
+  auto* reshape_op = new TensorFlowReshapeOperator;
+
+  // Copy input and output
+  reshape_op->inputs.push_back(transpose_op->inputs[0]);
+  reshape_op->outputs = transpose_op->outputs;
+
+  // Create a new input array for the shape input
+  string perm_array_name = transpose_op->inputs[1];
+  string shape_array_name = toco::AvailableArrayName(*model, perm_array_name);
+  Array& shape_array = model->GetOrCreateArray(shape_array_name);
+  *(shape_array.mutable_shape()->mutable_dims()) = {
+      1, static_cast<int>(dims.size())};
+  reshape_op->inputs.push_back(shape_array_name);
+  shape_array.data_type = ArrayDataType::kInt32;
+  auto& shape_buffer = shape_array.GetMutableBuffer<ArrayDataType::kInt32>();
+  shape_buffer.data = dims;
+
+  // Delete perm array if unused
+  if (IsDiscardableArray(*model, perm_array_name) &&
+      CountOpsWithInput(*model, perm_array_name) == 1) {
+    model->arrays.erase(perm_array_name);
+  }
+
+  // Replace the operator in the graph.
+  const auto reshape_it = model->operators.emplace(transpose_it, reshape_op);
+  transpose_it = reshape_it + 1;
+  CHECK_EQ(transpose_it->get(), transpose_op);
+  model->operators.erase(transpose_it);
+
+  return true;
+}
+
+}  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc b/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc
index d129b5ecf2615434b8ff8387a04af9561fe617a4..ad4a6f9b78b06fd738da40c2054c07e8f272ee17 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc
@@ -68,10 +68,11 @@ bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) {
     return false;
   }
 
-  // TODO(dkalenichenko): Great many ops don't support activation function
-  // fusing. Switch to the whilelist approach instead.
+  // TODO(b/72172404): Great many ops don't support activation function
+  // fusing. Switch to a categorizing function instead.
   if (op->type == OperatorType::kConcatenation ||
       op->type == OperatorType::kSlice ||
+      op->type == OperatorType::kTensorFlowReshape ||
       op->type == OperatorType::kTensorFlowSplit) {
     AddMessageF(
         "Not fusing activation function because the %s op doesn't support it",
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc
index 323fec6cf864a798a02aecdbbbf7c2e7bb904d2b..f861c4147a04fe31b7236bfa22ed4627f7742d09 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc
@@ -41,11 +41,104 @@ void PrintModelStats(const string& label, const Model& model) {
             << " quantized)";
 }
 
+// Some graphs have RNN back-edges that are discardable, having been
+// created typically by TensorFlow import rather than specified by the user.
+// Such graphs might have cycles (closed by RNN back-edges) that may be pruned.
+// Local graph transformations can't identify such global features,
+// so this function performs this global transformation.
+//
+// The other (and related) thing that is peculiar about RNN back-edges
+// is that they do not prevent the arrays that they touch, from being
+// pruned. Thus, they may refer to array names which no longer exist.
+// The intent is for that to result in the eventual pruning of such
+// 'dangling' RNN back-edges. We perform this pruning at the end of this
+// function, as the pruning of connected components done here may leave
+// more RNN back-edges dangling.
+void DiscardUselessConnectedComponentsAndRNNBackEdges(Model* model) {
+  // Identify the set of arrays that are in 'useful' connected components
+  // of the graph, which means connected to output arrays.
+  std::unordered_set<string> useful_arrays;
+  for (const string& output_array : model->flags.output_arrays()) {
+    useful_arrays.insert(output_array);
+  }
+  bool found_new_useful_arrays;
+  do {
+    found_new_useful_arrays = false;
+    for (const auto& op : model->operators) {
+      bool op_touches_useful_arrays = false;
+      for (const string& output : op->outputs) {
+        op_touches_useful_arrays |= useful_arrays.count(output);
+      }
+      if (op_touches_useful_arrays) {
+        for (const string& input : op->inputs) {
+          found_new_useful_arrays |= !useful_arrays.count(input);
+          useful_arrays.insert(input);
+        }
+        for (const string& output : op->outputs) {
+          found_new_useful_arrays |= !useful_arrays.count(output);
+          useful_arrays.insert(output);
+        }
+      }
+    }
+    for (const auto& rnn_state : model->flags.rnn_states()) {
+      bool rnn_back_edge_touches_useful_arrays =
+          useful_arrays.count(rnn_state.state_array());
+      if (rnn_back_edge_touches_useful_arrays) {
+        found_new_useful_arrays |=
+            !useful_arrays.count(rnn_state.back_edge_source_array());
+        useful_arrays.insert(rnn_state.back_edge_source_array());
+      }
+    }
+  } while (found_new_useful_arrays);
+  // Erase arrays that aren't useful, and that are discardable.
+  for (auto it = model->arrays.begin(); it != model->arrays.end();) {
+    if (useful_arrays.count(it->first) ||
+        !IsDiscardableArray(*model, it->first)) {
+      ++it;
+    } else {
+      it = model->arrays.erase(it);
+    }
+  }
+  // Erase operators that do not produce a useful output array.
+  for (auto it = model->operators.begin(); it != model->operators.end();) {
+    // Only need to test the first output, as we simultaneously added all of
+    // an operator's outputs to the list of output arrays.
+    if (useful_arrays.count((*it)->outputs[0])) {
+      ++it;
+    } else {
+      for (const string& output : (*it)->outputs) {
+        CHECK(!useful_arrays.count(output));
+      }
+      it = model->operators.erase(it);
+    }
+  }
+  // Erase RNN back-edges that are 'dangling' i.e. that touch an array
+  // that no longer exists. This should only happen for discardable RNN
+  // back-edges.
+  std::vector<RnnState> rnn_states_to_keep;
+  for (const auto& rnn_state : model->flags.rnn_states()) {
+    const bool dangling =
+        !model->arrays.count(rnn_state.back_edge_source_array()) ||
+        !model->arrays.count(rnn_state.state_array());
+    if (dangling) {
+      CHECK(rnn_state.discardable());
+    } else {
+      rnn_states_to_keep.push_back(rnn_state);
+    }
+  }
+  model->flags.clear_rnn_states();
+  for (const auto& rnn_state : rnn_states_to_keep) {
+    *model->flags.add_rnn_states() = rnn_state;
+  }
+}
+
 bool GraphTransformationsPass(int increment, Model* model,
                               const GraphTransformationsSet& transformations) {
   CHECK(increment == 1 || increment == -1);
   bool changed = false;
-  CHECK(!model->operators.empty());
+  if (model->operators.empty()) {
+    return false;
+  }
   int op_index = increment == 1 ? 0 : model->operators.size() - 1;
   while (true) {
     bool changed_now = false;
@@ -54,23 +147,28 @@ bool GraphTransformationsPass(int increment, Model* model,
       CHECK(!changed_now);
       CHECK(transformation->Messages().empty());
       changed_now = transformation->Run(model, op_index);
-      if (changed_now) {
-        DumpGraphvizVideoFrame(*model);
-        CHECK(!model->operators.empty());
-        op_index = std::min<int>(op_index, model->operators.size() - 1);
-        // Uncomment for debugging
-        // CheckInvariants(*model);
-      }
       const char* made_a_change_msg =
           changed_now ? "made a change" : "did NOT make a change";
       const int log_level =
           changed_now ? kLogLevelModelChanged : kLogLevelModelUnchanged;
+      if (transformation->Messages().empty()) {
+        VLOG(log_level) << transformation->Name() << " " << made_a_change_msg
+                        << " at op_index=" << op_index << "/"
+                        << model->operators.size() - 1;
+      }
       for (const string& message : transformation->Messages()) {
         VLOG(log_level) << transformation->Name() << " " << made_a_change_msg
                         << " at op_index=" << op_index << "/"
                         << model->operators.size() - 1 << ": " << message;
       }
       transformation->ClearMessages();
+      if (changed_now) {
+        DumpGraphvizVideoFrame(*model);
+        if (model->operators.empty()) return true;
+        op_index = std::min<int>(op_index, model->operators.size() - 1);
+        // Uncomment for debugging
+        // CheckInvariants(*model);
+      }
       if (changed_now) {
         break;
       }
@@ -86,6 +184,7 @@ bool GraphTransformationsPass(int increment, Model* model,
       op_index += increment;
     }
   }
+  DiscardUselessConnectedComponentsAndRNNBackEdges(model);
   return changed;
 }
 
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h
index 9ad1b9622fd4374d10bd83fdded2fcd7795ca47d..9ec9f92c90fb93962994d084d9354c11ba367e95 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h
+++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h
@@ -112,7 +112,9 @@ void RunGraphTransformations(Model* model, const string& message,
   };
 
 // List of all graph transformations
+DECLARE_GRAPH_TRANSFORMATION(ConvertExpandDimsToReshape)
 DECLARE_GRAPH_TRANSFORMATION(ConvertPureConvToDepthwise)
+DECLARE_GRAPH_TRANSFORMATION(ConvertTrivialTransposeToReshape)
 DECLARE_GRAPH_TRANSFORMATION(EnsureBiasVectors)
 DECLARE_GRAPH_TRANSFORMATION(FuseActivationFunctions)
 DECLARE_GRAPH_TRANSFORMATION(FuseBinaryIntoFollowingAffine)
@@ -144,18 +146,25 @@ DECLARE_GRAPH_TRANSFORMATION(ResolveReorderAxes)
 DECLARE_GRAPH_TRANSFORMATION(ResolveTensorFlowConcat)
 DECLARE_GRAPH_TRANSFORMATION(ResolveTensorFlowMatMul)
 DECLARE_GRAPH_TRANSFORMATION(ResolveTensorFlowMerge)
-DECLARE_GRAPH_TRANSFORMATION(ResolveTensorFlowSqueeze)
+DECLARE_GRAPH_TRANSFORMATION(ResolveSqueezeAttributes)
 DECLARE_GRAPH_TRANSFORMATION(ResolveTensorFlowSwitch)
 DECLARE_GRAPH_TRANSFORMATION(ResolveTensorFlowTile)
 DECLARE_GRAPH_TRANSFORMATION(ResolveConstantFakeQuant)
 DECLARE_GRAPH_TRANSFORMATION(ResolveConstantConcatenation)
 DECLARE_GRAPH_TRANSFORMATION(DropFakeQuant)
 DECLARE_GRAPH_TRANSFORMATION(UnfuseActivationFunctions)
+DECLARE_GRAPH_TRANSFORMATION(ResolveSpaceToBatchNDAttributes)
+DECLARE_GRAPH_TRANSFORMATION(ResolveBatchToSpaceNDAttributes)
 DECLARE_GRAPH_TRANSFORMATION(ResolvePadAttributes)
 DECLARE_GRAPH_TRANSFORMATION(ResolveStridedSliceAttributes)
 DECLARE_GRAPH_TRANSFORMATION(ResolveSliceAttributes)
 DECLARE_GRAPH_TRANSFORMATION(ResolveMeanAttributes)
-DECLARE_GRAPH_TRANSFORMATION(ResolveConstantTensorFlowShape)
+DECLARE_GRAPH_TRANSFORMATION(ResolveTransposeAttributes)
+DECLARE_GRAPH_TRANSFORMATION(ResolveConstantRange)
+DECLARE_GRAPH_TRANSFORMATION(ResolveConstantShapeOrRank)
+DECLARE_GRAPH_TRANSFORMATION(ResolveConstantStack)
+DECLARE_GRAPH_TRANSFORMATION(ResolveConstantStridedSlice)
+DECLARE_GRAPH_TRANSFORMATION(ResolveConstantFill)
 DECLARE_GRAPH_TRANSFORMATION(Dequantize)
 
 class ResolveReshapeAttributes : public GraphTransformation {
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc b/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc
index 9cb26c8752c0d27a3d1138b9ad32e60f34177520..9689b205cd137904504d87906cb691d0ed8235bf 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc
@@ -143,7 +143,7 @@ bool HardcodeMinMaxForAverageOrMaxPool(Model* model, Operator* op) {
   return true;
 }
 
-bool HardcodeMinMaxForReshapeOrSqueeze(Model* model, Operator* op) {
+bool HardcodeMinMaxFromFirstInput(Model* model, Operator* op) {
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.minmax) {
     return false;
@@ -203,7 +203,8 @@ bool HardcodeMinMax::Run(Model* model, std::size_t op_index) {
 
     case OperatorType::kSqueeze:
     case OperatorType::kTensorFlowReshape:
-      changed = HardcodeMinMaxForReshapeOrSqueeze(model, op);
+    case OperatorType::kPad:
+      changed = HardcodeMinMaxFromFirstInput(model, op);
       break;
 
     case OperatorType::kLogistic:
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc
index 1ff4e827aa043cbbb0515e10a6ae9bd33e6d819c..c6f17cf31967d4b5dfa004b5e76120482e92392d 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc
@@ -24,19 +24,6 @@ limitations under the License.
 namespace toco {
 
 namespace {
-
-ArrayDataType CommonDataTypeOfAllInputs(const Model& model,
-                                        const Operator& op) {
-  CHECK_GT(op.inputs.size(), 0);
-  const ArrayDataType data_type = model.GetArray(op.inputs[0]).data_type;
-  for (const auto& input : op.inputs) {
-    const auto& array = model.GetArray(input);
-    CHECK(array.data_type == data_type)
-        << " Unexpected: this operator has inputs with different data types.";
-  }
-  return data_type;
-}
-
 void SetDataTypeForAllOutputs(Model* model, Operator* op,
                               ArrayDataType data_type) {
   for (const auto& output : op->outputs) {
@@ -72,41 +59,15 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) {
              op->type == OperatorType::kTensorFlowGreaterEqual) {
     // These operators unconditionally produce bool outputs
     SetDataTypeForAllOutputs(model, op, ArrayDataType::kBool);
-  } else if (op->type == OperatorType::kTensorFlowShape) {
-    // These operators are assumed to produce int32 outputs.
+  } else if (op->type == OperatorType::kRank ||
+             op->type == OperatorType::kTensorFlowShape) {
+    // These operators only produce int32 outputs.
     SetDataTypeForAllOutputs(model, op, ArrayDataType::kInt32);
-  } else if (op->type == OperatorType::kAveragePool ||
-             op->type == OperatorType::kMaxPool ||
-             op->type == OperatorType::kL2Pool ||
-             op->type == OperatorType::kConv ||
-             op->type == OperatorType::kDepthwiseConv ||
-             op->type == OperatorType::kFullyConnected ||
-             op->type == OperatorType::kTensorFlowMax ||
-             op->type == OperatorType::kTensorFlowMin ||
-             op->type == OperatorType::kPad ||
-             op->type == OperatorType::kStridedSlice ||
-             op->type == OperatorType::kTensorFlowReshape ||
-             op->type == OperatorType::kSlice ||
-             op->type == OperatorType::kSqueeze ||
-             op->type == OperatorType::kTensorFlowSum ||
-             op->type == OperatorType::kTensorFlowSwitch ||
-             op->type == OperatorType::kTensorFlowTile ||
-             op->type == OperatorType::kTensorFlowAll ||
-             op->type == OperatorType::kReorderAxes ||
-             op->type == OperatorType::kTensorFlowConcatV2 ||
-             op->type == OperatorType::kFloor ||
-             op->type == OperatorType::kGather ||
-             op->type == OperatorType::kSpaceToBatchND ||
-             op->type == OperatorType::kBatchToSpaceND ||
-             op->type == OperatorType::kMean) {
-    // These operators produce outputs with the same type as their 1st input
-    CHECK_GT(op->inputs.size(), 0);
-    const ArrayDataType data_type = model->arrays[op->inputs[0]]->data_type;
-    SetDataTypeForAllOutputs(model, op, data_type);
   } else if (op->type == OperatorType::kTensorFlowSplit ||
-             op->type == OperatorType::kTensorFlowConcat) {
+             op->type == OperatorType::kTensorFlowConcat ||
+             op->type == OperatorType::kFill) {
     // These operators produce an output with the same type as their 2nd input
-    CHECK_GT(op->inputs.size(), 1);
+    CHECK_GE(op->inputs.size(), 2);
     const ArrayDataType data_type = model->arrays[op->inputs[1]]->data_type;
     SetDataTypeForAllOutputs(model, op, data_type);
   } else if (op->type == OperatorType::kCast) {
@@ -114,6 +75,25 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) {
     CHECK_EQ(op->outputs.size(), 1);
     auto* cast_op = static_cast<CastOperator*>(op);
     model->arrays[op->outputs[0]]->data_type = cast_op->dst_data_type;
+  } else if (op->type == OperatorType::kArgMax) {
+    // Data type of the ArgMax op is specified.
+    CHECK_EQ(op->outputs.size(), 1);
+    auto* argmax_op = static_cast<ArgMaxOperator*>(op);
+    model->arrays[op->outputs[0]]->data_type = argmax_op->output_data_type;
+  } else if (op->type == OperatorType::kRange) {
+    auto* range_op = static_cast<RangeOperator*>(op);
+    // Output type of the Range op can be set via an attribute
+    ArrayDataType data_type;
+    if (range_op->dtype != ArrayDataType::kNone) {
+      // Use the type if specified
+      data_type = range_op->dtype;
+    } else {
+      // Otherwise use the first input
+      CHECK_GE(op->inputs.size(), 1);
+      data_type = model->arrays[op->inputs[0]]->data_type;
+    }
+    CHECK_EQ(op->outputs.size(), 1);
+    SetDataTypeForAllOutputs(model, op, data_type);
   } else if (op->type == OperatorType::kTensorFlowUnsupported) {
     auto* unsupported_op = static_cast<TensorFlowUnsupportedOperator*>(op);
     if (unsupported_op->output_data_types.size() != op->outputs.size()) {
@@ -124,10 +104,13 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) {
       auto data_type = unsupported_op->output_data_types[i];
       model->arrays[output]->data_type = data_type;
     }
+  } else if (op->type == OperatorType::kExpandDims) {
+    // Yield on ExpandDim until it is converted to Reshape
+    return false;
   } else {
-    // These operators produce an output with the same type as any of their
-    // inputs, which must always have the same type.
-    const ArrayDataType data_type = CommonDataTypeOfAllInputs(*model, *op);
+    // These operators produce outputs with the same type as their 1st input
+    CHECK_GT(op->inputs.size(), 0);
+    const ArrayDataType data_type = model->arrays[op->inputs[0]]->data_type;
     SetDataTypeForAllOutputs(model, op, data_type);
   }
   // Return true if any output data type changed, false if none changed.
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
index f6daad9020be52b7616b074ba966f4e9b079ebeb..a939efb4dbbc6ec0af2e44270d7c028eff882b70 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
+#include "absl/strings/str_join.h"
 #include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h"
 #include "tensorflow/contrib/lite/toco/model.h"
 #include "tensorflow/contrib/lite/toco/tooling_util.h"
@@ -254,6 +255,33 @@ void ProcessSpaceToDepthOperator(Model* model, SpaceToDepthOperator* op) {
                          depth * block_size * block_size}));
 }
 
+void ProcessFillOperator(Model* model, FillOperator* op) {
+  CHECK_EQ(op->inputs.size(), 2);
+  CHECK_EQ(op->outputs.size(), 1);
+  auto& output_array = *model->arrays[op->outputs[0]];
+  if (output_array.has_shape()) {
+    // We have already run
+    return;
+  }
+
+  auto& dims_array = model->GetArray(op->inputs[0]);
+  if (!dims_array.has_shape()) {
+    // Yield until dims shape been resolved.
+    return;
+  }
+  if (!dims_array.buffer) {
+    // Yield until the dims are constant
+    return;
+  }
+  CHECK(dims_array.data_type == ArrayDataType::kInt32) << "dims must be int32";
+  CHECK_LE(RequiredBufferSizeForShape(dims_array.shape()), 4)
+      << "dims vector can be no larger than 4 values";
+
+  std::vector<int32> const& dims =
+      dims_array.GetBuffer<ArrayDataType::kInt32>().data;
+  *(output_array.mutable_shape()->mutable_dims()) = dims;
+}
+
 void ProcessFullyConnectedOperator(Model* model, FullyConnectedOperator* op) {
   if (!EnsureBiasVectorShape(model, op)) {
     return;
@@ -288,25 +316,30 @@ void ProcessFullyConnectedOperator(Model* model, FullyConnectedOperator* op) {
 void ProcessTensorFlowReshapeOperator(Model* model,
                                       TensorFlowReshapeOperator* op) {
   auto& output_array = *model->arrays[op->outputs[0]];
-  // Bail if we already have output dims
   if (output_array.has_shape()) {
+    // We have already run
     return;
   }
 
   const auto& input_array = *model->arrays[op->inputs[0]];
-  // Yield until input dims have been resolved.
   if (!input_array.has_shape()) {
+    // Yield until input dims have been resolved.
     return;
   }
   const auto& input_shape = input_array.shape();
 
-  const string& shape_name = op->inputs[1];
-  auto& shape_array = model->GetArray(shape_name);
-  // Yield until the shape is resolved as a constant array
+  auto& shape_array = model->GetArray(op->inputs[1]);
+  if (!shape_array.has_shape()) {
+    // Yield until target_shape shape been resolved.
+    return;
+  }
   if (!shape_array.buffer) {
+    // Yield until the target_shape is constant
     return;
   }
-  CHECK(shape_array.data_type == ArrayDataType::kInt32);
+  CHECK(shape_array.data_type == ArrayDataType::kInt32)
+      << "Reshape dims must be int32";
+
   // shape_data is the raw array of ints describing the shape
   // in the TensorFlow node. We intentionally make a copy here, rather than
   // modify wildcards in-place below, because in some graphs, the same shape
@@ -329,12 +362,18 @@ void ProcessTensorFlowReshapeOperator(Model* model,
   }
   const int input_flat_size = RequiredBufferSizeForShape(input_shape);
   if (has_wildcard) {
+    CHECK_GE(input_flat_size, product_non_wildcard_dims)
+        << "Array not large enough to fill the requested dimensions for "
+           "Reshape op with output \""
+        << op->outputs[0] << "\". Are your input shapes correct?";
     shape_data[wildcard_index] = input_flat_size / product_non_wildcard_dims;
   }
   auto& output_shape = *output_array.mutable_shape();
   *output_shape.mutable_dims() = shape_data;
-  const int output_flat_size = RequiredBufferSizeForShape(output_shape);
-  CHECK_EQ(output_flat_size, input_flat_size);
+  CHECK_EQ(input_flat_size, RequiredBufferSizeForShape(output_shape))
+      << "Input cannot be reshaped to requested dimensions for Reshape op with "
+         "output \""
+      << op->outputs[0] << "\". Are your input shapes correct?";
 }
 
 void ProcessSimpleOperator(Model* model, Operator* op) {
@@ -497,14 +536,64 @@ void ProcessConcatenationOperator(Model* model, ConcatenationOperator* op) {
     CHECK_EQ(input_array.shape().dimensions_count(),
              output_array.shape().dimensions_count());
     const std::vector<int>& input_dims = input_array.shape().dims();
-    CHECK_LT(op->concat_dim, input_dims.size());
-    concat_size += input_dims[op->concat_dim];
+    CHECK_LT(op->axis, input_dims.size());
+    concat_size += input_dims[op->axis];
   }
   // Write out the concat_size on the output array shape.
   auto& output_shape = *output_array.mutable_shape();
   auto& output_dims = *output_shape.mutable_dims();
-  CHECK_LT(op->concat_dim, output_shape.dimensions_count());
-  output_dims[op->concat_dim] = concat_size;
+  CHECK_LT(op->axis, output_shape.dimensions_count());
+  output_dims[op->axis] = concat_size;
+}
+
+void ProcessRangeOperator(Model* model, RangeOperator* op) {
+  CHECK_EQ(op->inputs.size(), 3);
+  const auto& start_array = *model->arrays[op->inputs[0]];
+  if (!start_array.has_shape()) {
+    // Yield until input dims have been resolved.
+    return;
+  }
+  const auto& limit_array = *model->arrays[op->inputs[1]];
+  if (!limit_array.has_shape()) {
+    return;
+  }
+  const auto& delta_array = *model->arrays[op->inputs[2]];
+  if (!delta_array.has_shape()) {
+    return;
+  }
+
+  if (!IsConstantParameterArray(*model, op->inputs[0])) {
+    // Yield until inputs are constant.
+    return;
+  }
+  if (!IsConstantParameterArray(*model, op->inputs[1])) {
+    return;
+  }
+  if (!IsConstantParameterArray(*model, op->inputs[2])) {
+    return;
+  }
+
+  CHECK(start_array.data_type == ArrayDataType::kInt32)
+      << "Range op inputs must be int32.";
+  CHECK(limit_array.data_type == ArrayDataType::kInt32)
+      << "Range op inputs must be int32.";
+  CHECK(delta_array.data_type == ArrayDataType::kInt32)
+      << "Range op inputs must be int32.";
+  CHECK_EQ(RequiredBufferSizeForShape(start_array.shape()), 1)
+      << "Range op inputs must be scalar.";
+  CHECK_EQ(RequiredBufferSizeForShape(limit_array.shape()), 1)
+      << "Range op inputs must be scalar.";
+  CHECK_EQ(RequiredBufferSizeForShape(delta_array.shape()), 1)
+      << "Range op inputs must be scalar.";
+  int size = floor((limit_array.GetBuffer<ArrayDataType::kInt32>().data[0] -
+                    start_array.GetBuffer<ArrayDataType::kInt32>().data[0]) /
+                   delta_array.GetBuffer<ArrayDataType::kInt32>().data[0]);
+
+  // Only set the output shape. Contents are set by ResolveConstantRange.
+  CHECK_EQ(op->outputs.size(), 1);
+  auto& output_array = model->GetArray(op->outputs[0]);
+  Shape* output_shape = output_array.mutable_shape();
+  output_shape->ReplaceDims({size});
 }
 
 void ProcessTensorFlowSplitOperator(Model* model, TensorFlowSplitOperator* op) {
@@ -603,6 +692,9 @@ void ProcessResizeBilinearOperator(Model* model, ResizeBilinearOperator* op) {
   const auto& output_size_shape = output_size_array.shape();
   CHECK_EQ(output_size_shape.dimensions_count(), 1);
   CHECK_EQ(output_size_shape.dims(0), 2);
+  if (!output_size_array.buffer) {
+    return;
+  }
   std::vector<int32> output_shape =
       output_size_array.GetBuffer<ArrayDataType::kInt32>().data;
   model->arrays[op->outputs[0]]->copy_shape(
@@ -698,7 +790,10 @@ void ProcessSpaceToBatchNDOperator(Model* model, SpaceToBatchNDOperator* op) {
     return;
   }
   const auto& input_shape = input_array.shape();
-  CHECK_EQ(input_shape.dimensions_count(), 4);
+  // This method only handles input dimensions of 4.
+  if (input_shape.dimensions_count() != 4) {
+    return;
+  }
   const auto input_height = input_shape.dims(1);
   const auto input_width = input_shape.dims(2);
 
@@ -817,6 +912,7 @@ void ProcessGatherOperator(Model* model, GatherOperator* op) {
 
   // Copy the input dimensions to the output except for dimension 0,
   // where the dimension of indices_shape is used.
+  // TODO(mgubin): if axis != 0 this is not true, change when it's supported.
   auto output_dims = output_array.mutable_shape()->mutable_dims();
   output_dims->push_back(indices_shape.dims(0));
   for (int dim = 1; dim < input_shape.dimensions_count(); dim++) {
@@ -850,35 +946,166 @@ void ProcessPadOperator(Model* model, PadOperator* op) {
   output_array.copy_shape(output_shape);
 }
 
-void ProcessStridedSliceOperator(Model* model, StridedSliceOperator* op) {
-  CHECK_EQ(op->inputs.size(), 4);
+void ProcessRankOperator(Model* model, RankOperator* op) {
+  CHECK_GE(op->inputs.size(), 1);
   CHECK_EQ(op->outputs.size(), 1);
+  auto& output_array = *model->arrays[op->outputs[0]];
+  if (output_array.has_shape()) {
+    // Shape already propagated
+    return;
+  }
 
   const auto& input_array = *model->arrays[op->inputs[0]];
+  if (!input_array.has_shape()) {
+    // Yield until input dims have been resolved.
+    return;
+  }
 
-  // Yield until input dims have been resolved.
-  if (!input_array.has_shape()) return;
+  // Only set the output shape. Array contents are set by
+  // ResolveConstantShapeOrRank.
+  Shape* output_shape = output_array.mutable_shape();
+  output_shape->ReplaceDims({});
+}
+
+void ProcessShapeOperator(Model* model, TensorFlowShapeOperator* op) {
+  CHECK_GE(op->inputs.size(), 1);
+  CHECK_EQ(op->outputs.size(), 1);
+  auto& output_array = *model->arrays[op->outputs[0]];
+  if (output_array.has_shape()) {
+    // Shape already propagated
+    return;
+  }
+
+  const auto& input_array = *model->arrays[op->inputs[0]];
+  if (!input_array.has_shape()) {
+    // Yield until input dims have been resolved.
+    return;
+  }
 
-  if (op->start_indices.empty()) return;
-  CHECK_EQ(op->start_indices.size(), op->stop_indices.size());
-  CHECK_EQ(op->start_indices.size(), op->strides.size());
+  // Only set the output shape. Array contents are set by
+  // ResolveConstantShapeOrRank.
+  Shape* output_shape = output_array.mutable_shape();
+  output_shape->ReplaceDims({input_array.shape().dimensions_count()});
+}
 
+void ProcessStackOperator(Model* model, StackOperator* op) {
+  CHECK_GE(op->inputs.size(), 1);
+  CHECK_EQ(op->outputs.size(), 1);
   auto& output_array = *model->arrays[op->outputs[0]];
-  if (output_array.has_shape()) return;
+  if (output_array.has_shape()) {
+    // Shape already propagated
+    return;
+  }
 
-  Shape output_shape = input_array.shape();
-  std::vector<int>& dims = *output_shape.mutable_dims();
-  CHECK_EQ(op->start_indices.size(), dims.size());
+  std::unique_ptr<Shape> stacked_shape;
+  for (const auto& input : op->inputs) {
+    const auto& input_array = model->GetArray(input);
+    if (!input_array.has_shape()) {
+      // Yield until all input dims have been resolved.
+      return;
+    }
 
-  for (int i = 0; i < op->start_indices.size(); ++i) {
-    const int mask = 1 << i;
-    const int start = (op->begin_mask & mask) ? 0 : op->start_indices[i];
-    const int stop = (op->end_mask & mask) ? input_array.shape().dims()[i]
-                                           : op->stop_indices[i];
-    dims[i] = (stop - start) / op->strides[i];
+    Shape shape = input_array.shape();
+    if (shape.dimensions_count() == 0) {
+      // Convert 0D scalars to 1D scalars of shape {1}.
+      shape.mutable_dims()->push_back(1);
+    }
+    if (!stacked_shape) {
+      stacked_shape.reset(new Shape(shape));
+    } else {
+      CHECK(*stacked_shape == shape) << "All input arrays to Stack operators "
+                                        "must have the same shape. Input \""
+                                     << input << "\" is different.";
+    }
   }
 
-  output_array.copy_shape(output_shape);
+  int axis = op->axis;
+  if (axis < 0) {
+    // Handle negative axis
+    axis += stacked_shape->dims().size() + 1;
+  }
+  stacked_shape->mutable_dims()->insert(
+      stacked_shape->mutable_dims()->begin() + axis, op->inputs.size());
+  output_array.copy_shape(*stacked_shape);
+}
+
+void ProcessStridedSliceOperator(Model* model, StridedSliceOperator* op) {
+  CHECK_GE(op->inputs.size(), 1);
+  CHECK_EQ(op->outputs.size(), 1);
+  auto& output_array = *model->arrays[op->outputs[0]];
+  if (output_array.has_shape()) {
+    // Shape already propagated
+    return;
+  }
+
+  if (op->start_indices.empty() || op->stop_indices.empty() ||
+      op->strides.empty()) {
+    // ResolveStridedSliceAttributes has not run yet.
+    return;
+  }
+
+  const auto& input_array = model->GetArray(op->inputs[0]);
+  if (!input_array.has_shape()) {
+    // Yield until input dims have been resolved.
+    return;
+  }
+
+  if (op->ellipsis_mask != 0) {
+    // Something like LOG_FIRST_N(WARNING, 10) would be prefferable to reduce
+    // log noise. However, the TensorFlow logging library does not appear to
+    // support this.
+    LOG(WARNING) << "Skipping StridedSlice op with output \"" << op->outputs[0]
+                 << "\". ellipsis_mask is not supported (mask="
+                 << op->ellipsis_mask << ")";
+    return;
+  }
+  if (op->new_axis_mask != 0) {
+    LOG(WARNING) << "Skipping StridedSlice op with output \"" << op->outputs[0]
+                 << "\". new_axis_mask is not supported (mask="
+                 << op->new_axis_mask << ")";
+    return;
+  }
+
+  int dim_count = input_array.shape().dimensions_count();
+  CHECK(op->start_indices.size() == dim_count)
+      << ": Incorrect number of start indices supplied to StridedSlice op with "
+         "output \""
+      << op->outputs[0] << "\". Op requires " << dim_count << " start indices";
+  CHECK(op->stop_indices.size() == dim_count)
+      << ": Incorrect number of stop indices supplied to StridedSlice op with "
+         "output \""
+      << op->outputs[0] << "\". Op requires " << dim_count << " stop indices";
+  CHECK(op->strides.size() == dim_count)
+      << ": Incorrect number of strides supplied to StridedSlice op with "
+         " output \""
+      << op->outputs[0] << "\". Op requires " << dim_count << " strides";
+
+  // Create output shape
+  std::vector<int>* dims = output_array.mutable_shape()->mutable_dims();
+
+  // Compute output shape
+  for (int i = 0; i < dim_count; ++i) {
+    const int mask = 1 << i;
+    int start = (op->begin_mask & mask) ? 0 : op->start_indices[i];
+    if (start < 0) {
+      // handle negative indices
+      start += input_array.shape().dims(i);
+    }
+    int stop = (op->end_mask & mask) ? input_array.shape().dims(i)
+                                     : op->stop_indices[i];
+    if (stop < 0) {
+      // handle negative indices
+      stop += input_array.shape().dims(i);
+    }
+
+    int dim_size = (stop - start) / op->strides[i];
+    if (op->shrink_axis_mask & mask) {
+      CHECK_EQ(dim_size, 1) << "Output size for an axis must compute to 1 when "
+                               "shrinking that axis";
+    } else {
+      dims->push_back(dim_size);
+    }
+  }
 }
 
 void ProcessSqueezeOperator(Model* model, SqueezeOperator* op) {
@@ -935,6 +1162,73 @@ void ProcessSvdfOperator(Model* model, SvdfOperator* op) {
   auto& output_array = model->GetArray(op->outputs[1]);
   output_array.mutable_shape()->ReplaceDims({batch_size, num_units});
 }
+
+void ProcessTransposeOperator(Model* model, TransposeOperator* op) {
+  auto& output_array = *model->arrays[op->outputs[0]];
+  if (output_array.has_shape()) {
+    // We have already run
+    return;
+  }
+
+  const auto& input_array = *model->arrays[op->inputs[0]];
+  if (!input_array.has_shape()) {
+    // Yield until input dims have been resolved.
+    return;
+  }
+  const auto& input_shape = input_array.shape();
+
+  auto& perm_array = model->GetArray(op->inputs[1]);
+  if (!perm_array.has_shape()) {
+    // Yield until permutation shape been resolved.
+    return;
+  }
+  if (!perm_array.buffer) {
+    // Yield until the permutation is constant
+    return;
+  }
+  CHECK(perm_array.data_type == ArrayDataType::kInt32)
+      << "Transpose permutation input must be int32";
+
+  std::vector<int32> const& perm =
+      perm_array.GetBuffer<ArrayDataType::kInt32>().data;
+  CHECK_EQ(perm.size(), input_shape.dimensions_count())
+      << "Transpose permutation input must be same length as input dimensions";
+  std::vector<int>* output_dims = output_array.mutable_shape()->mutable_dims();
+  for (int i = 0; i < perm.size(); i++) {
+    int axis = perm[i];
+    CHECK_GE(axis, 0);
+    CHECK_LT(axis, input_shape.dimensions_count());
+    output_dims->push_back(input_shape.dims(axis));
+  }
+}
+
+void ProcessArgMaxOperator(Model* model, ArgMaxOperator* op) {
+  CHECK_EQ(op->inputs.size(), 2);
+  const auto& input_array = *model->arrays[op->inputs[0]];
+  // Yield until input dims have been resolved.
+  if (!input_array.has_shape()) {
+    return;
+  }
+
+  // The current ArgMax implementation only supports 4-dimensional inputs with
+  // the last dimension as the axis to perform ArgMax for.
+  const std::vector<int>& input_dims = input_array.shape().dims();
+  CHECK_EQ(input_dims.size(), 4);
+  std::vector<int> output_dims;
+
+  output_dims.reserve(input_dims.size() - 1);
+  for (int i = 0; i < input_dims.size() - 1; ++i) {
+    output_dims.push_back(input_dims[i]);
+  }
+  output_dims.push_back(1);
+  const string& output_name = op->outputs[0];
+  auto& output_array = *model->arrays[output_name];
+  if (output_array.has_shape()) {
+    return;
+  }
+  *output_array.mutable_shape()->mutable_dims() = output_dims;
+}
+
 }  // namespace
 
 bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) {
@@ -960,6 +1254,7 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) {
     case OperatorType::kLocalResponseNormalization:
     case OperatorType::kTensorFlowIdentity:
     case OperatorType::kFakeQuant:
+    case OperatorType::kNeg:
     case OperatorType::kTensorFlowRsqrt:
     case OperatorType::kTensorFlowSqrt:
     case OperatorType::kTensorFlowSquare:
@@ -977,6 +1272,8 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) {
     case OperatorType::kSub:
     case OperatorType::kMul:
     case OperatorType::kDiv:
+    case OperatorType::kFloorDiv:
+    case OperatorType::kFloorMod:
     case OperatorType::kTensorFlowLess:
     case OperatorType::kTensorFlowLessEqual:
     case OperatorType::kTensorFlowGreater:
@@ -988,6 +1285,10 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) {
     case OperatorType::kConv:
       ProcessConvOperator(model, static_cast<ConvOperator*>(op));
       break;
+    case OperatorType::kTransposeConv:
+      // Unimplemented, hopefully another graph transformation will drop it or
+      // rewrite it.
+      break;
     case OperatorType::kDepthwiseConv:
       ProcessDepthwiseConvOperator(model,
                                    static_cast<DepthwiseConvOperator*>(op));
@@ -1000,6 +1301,9 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) {
       ProcessSpaceToDepthOperator(model,
                                   static_cast<SpaceToDepthOperator*>(op));
       break;
+    case OperatorType::kFill:
+      ProcessFillOperator(model, static_cast<FillOperator*>(op));
+      break;
     case OperatorType::kFullyConnected:
       ProcessFullyConnectedOperator(model,
                                     static_cast<FullyConnectedOperator*>(op));
@@ -1062,9 +1366,20 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) {
       // a more general non-depth concatenation that will hopefully be dropped,
       // or else at the moment we will abort.
       break;
+    case OperatorType::kExpandDims:
+      // Yield until ExpandDims is converted to Reshape
+      break;
+    case OperatorType::kRange:
+      ProcessRangeOperator(model, static_cast<RangeOperator*>(op));
+      break;
+    case OperatorType::kRank:
+      ProcessRankOperator(model, static_cast<RankOperator*>(op));
+      break;
     case OperatorType::kTensorFlowShape:
-      // Unimplemented, hopefully another graph transformation will drop it or
-      // rewrite it.
+      ProcessShapeOperator(model, static_cast<TensorFlowShapeOperator*>(op));
+      break;
+    case OperatorType::kStack:
+      ProcessStackOperator(model, static_cast<StackOperator*>(op));
       break;
     case OperatorType::kReorderAxes:
       ProcessReorderAxesOperator(model, static_cast<ReorderAxesOperator*>(op));
@@ -1099,11 +1414,17 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) {
       ProcessStridedSliceOperator(model,
                                   static_cast<StridedSliceOperator*>(op));
       break;
+    case OperatorType::kArgMax:
+      ProcessArgMaxOperator(model, static_cast<ArgMaxOperator*>(op));
+      break;
     case OperatorType::kTensorFlowUnsupported:
       break;
     case OperatorType::kSvdf:
       ProcessSvdfOperator(model, static_cast<SvdfOperator*>(op));
       break;
+    case OperatorType::kTranspose:
+      ProcessTransposeOperator(model, static_cast<TransposeOperator*>(op));
+      break;
     default:
       // Unimplemented, another graph transformation should drop it.
       LOG(FATAL) << "Unhandled operator type " << OperatorTypeName(op->type);
@@ -1114,6 +1435,8 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) {
   for (const auto& output : op->outputs) {
     if (model->arrays[output]->has_shape() &&
         (old_output_dims[output] != model->arrays[output]->shape().dims())) {
+      AddMessageF("Set shape of %s to [%s]", output,
+                  absl::StrJoin(model->arrays[output]->shape().dims(), ","));
       return true;
     }
   }
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc
index d33597d38144278dfca66edbdd9b3da68fbaa32c..56082b965a7cbd9d61cca2e26f7d76764c0e54aa 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc
@@ -42,7 +42,7 @@ bool SupportsQuantization(const Operator& op) {
          type == OperatorType::kL2Normalization || type == OperatorType::kAdd ||
          type == OperatorType::kAveragePool || type == OperatorType::kMaxPool ||
          type == OperatorType::kLogistic || type == OperatorType::kSoftmax ||
-         type == OperatorType::kSqueeze ||
+         type == OperatorType::kSqueeze || type == OperatorType::kPad ||
          type == OperatorType::kTensorFlowReshape ||
          type == OperatorType::kMul || type == OperatorType::kSpaceToDepth ||
          type == OperatorType::kDepthToSpace;
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc
index b6037357047fc699ffb15cb40d539be148a0b637..23a5c857e8b19f7edbb48f2c004d03e21008833d 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc
@@ -57,7 +57,8 @@ bool RemoveTrivialConcatenationInput::Run(Model* model, std::size_t op_index) {
 
   // Drop trivial inputs.
   for (const string& input : trivial_inputs) {
-    if (CountOpsWithInput(*model, input) == 1) {
+    if (IsDiscardableArray(*model, input) &&
+        CountOpsWithInput(*model, input) == 1) {
       model->arrays.erase(input);
     }
   }
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc
index 0ab301552ff61405cd9c2ae42ddd11805eb707e3..e6cca8acf36745d989fb731aa948f257375d7e90 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc
@@ -47,10 +47,7 @@ bool RemoveUnusedOp::Run(Model* model, std::size_t op_index) {
     bool found_output_as_rnn_state_array = false;
     for (const auto& rnn_state : model->flags.rnn_states()) {
       if (output == rnn_state.state_array()) {
-        CHECK(op->type == OperatorType::kTensorFlowUnsupported);
-        CHECK_EQ(static_cast<const TensorFlowUnsupportedOperator*>(op)
-                     ->tensorflow_op,
-                 "Fill");
+        CHECK(op->type == OperatorType::kFill);
         found_output_as_rnn_state_array = true;
         break;
       }
@@ -65,7 +62,12 @@ bool RemoveUnusedOp::Run(Model* model, std::size_t op_index) {
     }
     for (const auto& rnn_state : model->flags.rnn_states()) {
       if (output == rnn_state.back_edge_source_array()) {
-        return false;
+        // The output is consumed by a RNN back-edge..
+        if (!IsDiscardableArray(*model, rnn_state.back_edge_source_array()) ||
+            !IsDiscardableArray(*model, rnn_state.state_array()) ||
+            CountOpsWithInput(*model, rnn_state.state_array())) {
+          return false;
+        }
       }
     }
     if (CountOpsWithInput(*model, output)) {
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_to_space_nd_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_to_space_nd_attributes.cc
new file mode 100644
index 0000000000000000000000000000000000000000..7777d4f54359071c775806999ecf1418a8762d60
--- /dev/null
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_to_space_nd_attributes.cc
@@ -0,0 +1,74 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h"
+#include "tensorflow/contrib/lite/toco/model.h"
+#include "tensorflow/contrib/lite/toco/tooling_util.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace toco {
+
+bool ResolveBatchToSpaceNDAttributes::Run(Model* model, std::size_t op_index) {
+  const auto op_it = model->operators.begin() + op_index;
+  if (op_it->get()->type != OperatorType::kBatchToSpaceND) return false;
+
+  auto* op = static_cast<BatchToSpaceNDOperator*>(op_it->get());
+
+  // The attributes are resolved only when the 3 attributes (block_shape,
+  // before_crops, after_crops) are all constant.
+  if (!op->block_shape.empty()) {
+    return false;
+  }
+
+  CHECK_EQ(op->inputs.size(), 3);
+  if (!IsConstantParameterArray(*model, op->inputs[1]) ||
+      !IsConstantParameterArray(*model, op->inputs[2]))
+    return false;
+
+  // Handle crops
+  const auto& crops_array = *model->arrays[op->inputs[2]];
+  if (!crops_array.has_shape()) return false;
+  const std::vector<int>& crops_dims = crops_array.shape().dims();
+  if (crops_dims.size() != 2) {
+    // Code only handles crops of 2 dimensions. Perhaps another transformation
+    // will delete this op.
+    return false;
+  }
+  std::vector<int> crops_buffer =
+      crops_array.GetBuffer<ArrayDataType::kInt32>().data;
+  for (int i = 0; i < crops_dims[0]; ++i) {
+    op->before_crops.push_back(crops_buffer[i * 2]);
+    op->after_crops.push_back(crops_buffer[i * 2 + 1]);
+  }
+
+  // Handle block_shape
+  const auto& block_shape_array = *model->arrays[op->inputs[1]];
+  if (!block_shape_array.has_shape()) return false;
+  const std::vector<int>& block_shape_dims = block_shape_array.shape().dims();
+  CHECK_EQ(block_shape_dims.size(), 1);
+  std::vector<int> block_shape_buffer =
+      block_shape_array.GetBuffer<ArrayDataType::kInt32>().data;
+  for (int i = 0; i < block_shape_dims[0]; ++i) {
+    op->block_shape.push_back(block_shape_buffer[i]);
+  }
+
+  return true;
+}
+
+}  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc
index 53e1be7a05807cde305eca2a7a8901f652f986f6..fd51df4058dbda4732686983f9b9dab3781ec4d1 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc
@@ -141,6 +141,10 @@ void EvaluateBinaryOperatorOnConstantInputs(Model* model,
       outval = val0 - val1;
     } else if (binary_op->type == OperatorType::kDiv) {
       outval = val0 / val1;
+    } else if (binary_op->type == OperatorType::kFloorDiv) {
+      outval = floor(val0 / val1);
+    } else if (binary_op->type == OperatorType::kFloorMod) {
+      outval = val0 - (floor(val0 / val1) * val1);
     } else if (binary_op->type == OperatorType::kTensorFlowMinimum) {
       outval = std::min(val0, val1);
     } else if (binary_op->type == OperatorType::kTensorFlowMaximum) {
@@ -191,6 +195,8 @@ bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) {
       binary_op->type != OperatorType::kMul &&
       binary_op->type != OperatorType::kSub &&
       binary_op->type != OperatorType::kDiv &&
+      binary_op->type != OperatorType::kFloorDiv &&
+      binary_op->type != OperatorType::kFloorMod &&
       binary_op->type != OperatorType::kTensorFlowMinimum &&
       binary_op->type != OperatorType::kTensorFlowMaximum &&
       binary_op->type != OperatorType::kTensorFlowLess &&
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
index 0983c438498fed28903f8facf8db239ec1a7c2c4..9835f86398a37f118d3ebd5b568ffddbcd56c38b 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
@@ -151,7 +151,7 @@ bool ResolveConstantConcatenation::Run(Model* model, std::size_t op_index) {
     if (!IsDiscardableArray(*model, input_name)) return false;
   }
 
-  const int concatenation_axis = concat_op->concat_dim;
+  const int concatenation_axis = concat_op->axis;
 
   CHECK_EQ(concat_op->outputs.size(), 1);
   string concatenated_array_name = concat_op->outputs[0];
@@ -179,6 +179,10 @@ bool ResolveConstantConcatenation::Run(Model* model, std::size_t op_index) {
       ConcatenateTensorBuffers<ArrayDataType::kInt64>(
           input_arrays, concatenation_axis, &concatenated_array);
       break;
+    case ArrayDataType::kString:
+      ConcatenateTensorBuffers<ArrayDataType::kString>(
+          input_arrays, concatenation_axis, &concatenated_array);
+      break;
     default:
       LOG(FATAL) << "ArrayDataType not supported";
   }
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc
new file mode 100644
index 0000000000000000000000000000000000000000..9da51d9147a98a935d00db04827aa7ebb12998b9
--- /dev/null
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc
@@ -0,0 +1,120 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <vector>
+
+#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h"
+#include "tensorflow/contrib/lite/toco/model.h"
+#include "tensorflow/contrib/lite/toco/tooling_util.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace toco {
+
+template <ArrayDataType Type>
+bool ComputeFillArray(Model* model, FillOperator* op) {
+  const auto& val_array = model->GetArray(op->inputs[1]);
+  auto& output_array = model->GetArray(op->outputs[0]);
+
+  CHECK(val_array.data_type == Type);
+  CHECK(output_array.data_type == Type);
+
+  // Compute the array data
+  std::vector<DataType<Type>>& data =
+      output_array.GetMutableBuffer<Type>().data;
+  data.resize(RequiredBufferSizeForShape(output_array.shape()));
+  DataType<Type> fill_val = val_array.GetBuffer<Type>().data[0];
+  for (size_t i = 0; i < data.size(); i++) {
+    data[i] = fill_val;
+  }
+
+  return true;
+}
+
+bool ResolveConstantFill::Run(Model* model, std::size_t op_index) {
+  const auto fill_it = model->operators.begin() + op_index;
+  auto* base_op = fill_it->get();
+  if (base_op->type != OperatorType::kFill) {
+    return false;
+  }
+  auto* op = static_cast<FillOperator*>(base_op);
+
+  CHECK_EQ(op->inputs.size(), 2);
+  CHECK_EQ(op->outputs.size(), 1);
+
+  auto& output_array = model->GetArray(op->outputs[0]);
+  if (output_array.data_type == ArrayDataType::kNone) {
+    // Yield until the output type has been set by PropagateArrayDataTypes
+    return false;
+  }
+
+  if (!output_array.has_shape()) {
+    // Yield until the output shape has been set by PropagateFixedShapes
+    return false;
+  }
+
+  const auto& val_array = model->GetArray(op->inputs[1]);
+  if (!val_array.has_shape()) {
+    // Yield until the value shape has been resolved.
+    return false;
+  }
+  if (!IsConstantParameterArray(*model, op->inputs[1])) {
+    // Yield until the value is constant.
+    return false;
+  }
+  CHECK_EQ(RequiredBufferSizeForShape(val_array.shape()), 1);
+
+  switch (output_array.data_type) {
+    case ArrayDataType::kFloat:
+      if (!ComputeFillArray<ArrayDataType::kFloat>(model, op)) {
+        return false;
+      }
+      break;
+    case ArrayDataType::kUint8:
+      if (!ComputeFillArray<ArrayDataType::kUint8>(model, op)) {
+        return false;
+      }
+      break;
+    case ArrayDataType::kInt32:
+      if (!ComputeFillArray<ArrayDataType::kInt32>(model, op)) {
+        return false;
+      }
+      break;
+    case ArrayDataType::kInt64:
+      if (!ComputeFillArray<ArrayDataType::kInt64>(model, op)) {
+        return false;
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unsupported data type given to Fill op with output \""
+                 << op->outputs[0] << "\"";
+      break;
+  }
+
+  // Erase input arrays if no longer used
+  if (IsDiscardableArray(*model, op->inputs[0]) &&
+      CountOpsWithInput(*model, op->inputs[0]) == 1) {
+    model->arrays.erase(op->inputs[0]);
+  }
+  if (IsDiscardableArray(*model, op->inputs[1]) &&
+      CountOpsWithInput(*model, op->inputs[1]) == 1) {
+    model->arrays.erase(op->inputs[1]);
+  }
+
+  // Erase the operator
+  model->operators.erase(fill_it);
+
+  return true;
+}
+
+}  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc
new file mode 100644
index 0000000000000000000000000000000000000000..383d54aa5a7fa4933a9eb9ffac014bab4497d40d
--- /dev/null
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc
@@ -0,0 +1,107 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h"
+#include "tensorflow/contrib/lite/toco/model.h"
+#include "tensorflow/contrib/lite/toco/tooling_util.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace toco {
+
+bool ResolveConstantRange::Run(Model* model, std::size_t op_index) {
+  const auto it = model->operators.begin() + op_index;
+  auto* base_op = it->get();
+  if (base_op->type != OperatorType::kRange) {
+    return false;
+  }
+  auto* op = static_cast<RangeOperator*>(base_op);
+
+  CHECK_EQ(op->inputs.size(), 3);
+  const auto& start_array = *model->arrays[op->inputs[0]];
+  if (!start_array.has_shape()) {
+    // Yield until all input dims have been resolved.
+    return false;
+  }
+  const auto& limit_array = *model->arrays[op->inputs[1]];
+  if (!limit_array.has_shape()) {
+    // Yield until all input dims have been resolved.
+    return false;
+  }
+  const auto& delta_array = *model->arrays[op->inputs[2]];
+  if (!delta_array.has_shape()) {
+    // Yield until all input dims have been resolved.
+    return false;
+  }
+
+  for (const auto& input : op->inputs) {
+    if (!IsConstantParameterArray(*model, input)) {
+      // yield if any input is mutable
+      return false;
+    }
+  }
+
+  CHECK_EQ(op->outputs.size(), 1);
+  auto& output_array = *model->arrays[op->outputs[0]];
+  if (output_array.data_type == ArrayDataType::kNone) {
+    // Yield until the output type has been set by PropagateArrayDataTypes
+    return false;
+  }
+
+  CHECK_EQ(RequiredBufferSizeForShape(start_array.shape()), 1)
+      << "Range op inputs must be scalar.";
+  CHECK_EQ(RequiredBufferSizeForShape(limit_array.shape()), 1)
+      << "Range op inputs must be scalar.";
+  CHECK_EQ(RequiredBufferSizeForShape(delta_array.shape()), 1)
+      << "Range op inputs must be scalar.";
+
+  CHECK(start_array.data_type == ArrayDataType::kInt32)
+      << "Range op inputs must be int32.";
+  CHECK(limit_array.data_type == ArrayDataType::kInt32)
+      << "Range op inputs must be int32.";
+  CHECK(delta_array.data_type == ArrayDataType::kInt32)
+      << "Range op inputs must be int32.";
+
+  // Compute buffer contents
+  int start = start_array.GetBuffer<ArrayDataType::kInt32>().data[0];
+  int limit = limit_array.GetBuffer<ArrayDataType::kInt32>().data[0];
+  int delta = delta_array.GetBuffer<ArrayDataType::kInt32>().data[0];
+  auto& buffer = output_array.GetMutableBuffer<ArrayDataType::kInt32>();
+  buffer.data.clear();
+  for (int32 val = start; val < limit; val += delta) {
+    buffer.data.push_back(val);
+  }
+  CHECK_EQ(floor((limit - start) / delta), buffer.data.size());
+  CHECK_EQ(buffer.data.size(), output_array.shape().dims()[0]);
+
+  // Delete the input array if no longer used
+  if (IsDiscardableArray(*model, op->inputs[0]) &&
+      CountOpsWithInput(*model, op->inputs[0]) == 1) {
+    model->arrays.erase(op->inputs[0]);
+  }
+  if (IsDiscardableArray(*model, op->inputs[1]) &&
+      CountOpsWithInput(*model, op->inputs[1]) == 1) {
+    model->arrays.erase(op->inputs[1]);
+  }
+  if (IsDiscardableArray(*model, op->inputs[2]) &&
+      CountOpsWithInput(*model, op->inputs[2]) == 1) {
+    model->arrays.erase(op->inputs[2]);
+  }
+
+  // Delete the operator
+  model->operators.erase(it);
+
+  return true;
+}
+
+}  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc
new file mode 100644
index 0000000000000000000000000000000000000000..35b81dd5506cfb0048ab1347bfefd07b128bc92b
--- /dev/null
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc
@@ -0,0 +1,72 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h"
+#include "tensorflow/contrib/lite/toco/model.h"
+#include "tensorflow/contrib/lite/toco/tooling_util.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace toco {
+
+bool ResolveConstantShapeOrRank::Run(Model* model, std::size_t op_index) {
+  const auto it = model->operators.begin() + op_index;
+  const auto* op = it->get();
+  if (!(op->type == OperatorType::kTensorFlowShape ||
+        op->type == OperatorType::kRank)) {
+    return false;
+  }
+
+  CHECK_EQ(op->outputs.size(), 1);
+  auto& output_array = model->GetArray(op->outputs[0]);
+  if (output_array.data_type == ArrayDataType::kNone) {
+    // Yield until the output type has been resolved
+    return false;
+  }
+
+  const auto& input_array = model->GetArray(op->inputs[0]);
+  if (!input_array.has_shape()) {
+    // Yield until the input array's shape has been resolved.
+    return false;
+  }
+
+  if (!output_array.has_shape()) {
+    // Yield until the output shape has been resolved.
+    return false;
+  }
+
+  // Compute the output
+  CHECK(!output_array.buffer);
+  auto& output_buffer = output_array.GetMutableBuffer<ArrayDataType::kInt32>();
+  if (op->type == OperatorType::kTensorFlowShape) {
+    // Copy the input shape into the output buffer.
+    output_buffer.data = input_array.shape().dims();
+  } else if (op->type == OperatorType::kRank) {
+    // Copy the dimension count into the output buffer.
+    output_buffer.data.resize(1);
+    output_buffer.data[0] = input_array.shape().dimensions_count();
+  }
+  output_array.mutable_shape()->ReplaceDims(
+      {static_cast<int>(output_buffer.data.size())});
+
+  // Delete the input array if no longer used
+  if (IsDiscardableArray(*model, op->inputs[0]) &&
+      CountOpsWithInput(*model, op->inputs[0]) == 1) {
+    model->arrays.erase(op->inputs[0]);
+  }
+
+  model->operators.erase(it);
+  return true;
+}
+
+}  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_stack.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_stack.cc
new file mode 100644
index 0000000000000000000000000000000000000000..86c76141a4705de841c8e70790cce7be28fb59c9
--- /dev/null
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_stack.cc
@@ -0,0 +1,113 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <vector>
+
+#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h"
+#include "tensorflow/contrib/lite/toco/model.h"
+#include "tensorflow/contrib/lite/toco/tooling_util.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace toco {
+
+namespace {
+
+template <ArrayDataType Type>
+void Stack(Model* model, StackOperator const& op) {
+  auto& output_array = model->GetArray(op.outputs[0]);
+  CHECK(output_array.data_type == Type);
+
+  // Create a buffer for the output array
+  std::vector<DataType<Type>>& output_data =
+      output_array.GetMutableBuffer<Type>().data;
+  output_data.resize(RequiredBufferSizeForShape(output_array.shape()));
+
+  // Stack inputs into buffer
+  CHECK_EQ(op.axis, 0) << "Stacking only supported along first axis";
+  int dst_offset = 0;
+  for (int i = 0; i < op.inputs.size(); i++) {
+    // Append array data to output for each input array
+    const auto& input_array = model->GetArray(op.inputs[i]);
+    int input_size = RequiredBufferSizeForShape(input_array.shape());
+    memcpy(&output_data[dst_offset], &input_array.GetBuffer<Type>().data[0],
+           input_size * sizeof(Type));
+    dst_offset += input_size;
+  }
+  CHECK_EQ(dst_offset, output_data.size());
+}
+
+}  // namespace
+
+bool ResolveConstantStack::Run(Model* model, std::size_t op_index) {
+  auto it = model->operators.begin() + op_index;
+  const auto* base_op = it->get();
+  if (base_op->type != OperatorType::kStack) {
+    return false;
+  }
+  const auto* op = static_cast<const StackOperator*>(base_op);
+
+  CHECK_GE(op->inputs.size(), 1);
+  CHECK_EQ(op->outputs.size(), 1);
+  auto& output_array = model->GetArray(op->outputs[0]);
+  if (output_array.data_type == ArrayDataType::kNone) {
+    // Yield until the output type has been set by PropagateArrayDataTypes
+    return false;
+  }
+
+  if (!output_array.has_shape()) {
+    // Yield until the output shape has been set by PropagateFixedShapes
+    return false;
+  }
+
+  for (const auto& input : op->inputs) {
+    if (!IsConstantParameterArray(*model, input)) {
+      // Yield if any input is mutable
+      return false;
+    }
+  }
+
+  CHECK(!output_array.buffer);
+  switch (output_array.data_type) {
+    case ArrayDataType::kFloat:
+      Stack<ArrayDataType::kFloat>(model, *op);
+      break;
+    case ArrayDataType::kUint8:
+      Stack<ArrayDataType::kUint8>(model, *op);
+      break;
+    case ArrayDataType::kInt32:
+      Stack<ArrayDataType::kInt32>(model, *op);
+      break;
+    case ArrayDataType::kInt64:
+      Stack<ArrayDataType::kInt64>(model, *op);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported data type given to Stack op with output \""
+                 << op->outputs[0] << "\"";
+      break;
+  }
+
+  // Erase input arrays if no longer used
+  for (const auto& input : op->inputs) {
+    if (IsDiscardableArray(*model, input) &&
+        CountOpsWithInput(*model, input) == 1) {
+      model->arrays.erase(input);
+    }
+  }
+
+  // Erase the operator
+  model->operators.erase(it);
+  return true;
+}
+
+}  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc
new file mode 100644
index 0000000000000000000000000000000000000000..3976d9cbb492138c0c45801045833e08411acbd4
--- /dev/null
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc
@@ -0,0 +1,198 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <vector>
+
+#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h"
+#include "tensorflow/contrib/lite/toco/model.h"
+#include "tensorflow/contrib/lite/toco/tooling_util.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace toco {
+
+namespace {
+
+int StartForAxis(StridedSliceOperator const& op, Shape const& input_shape,
+                 int axis) {
+  int start;
+  if (op.begin_mask & 1 << axis) {
+    // If begin mask bit is set, use the first element
+    start = 0;
+  } else {
+    // Otherwise, use the specified element
+    start = op.start_indices[axis];
+    if (start < 0) {
+      // Handle negative indices
+      start += input_shape.dims(axis);
+    }
+  }
+  return start;
+}
+
+int StopForAxis(StridedSliceOperator const& op, Shape const& input_shape,
+                int axis) {
+  int stop;
+  if (op.end_mask & (1 << axis)) {
+    // If end mask bit set, use the last element
+    stop = input_shape.dims(axis);
+  } else {
+    // Otherwise, use the specified element
+    stop = op.stop_indices[axis];
+    if (stop < 0) {
+      // Handle negative indices
+      stop += input_shape.dims(axis);
+    }
+  }
+  return stop;
+}
+
+template <ArrayDataType Type>
+void StridedSlice(StridedSliceOperator const& op, Array const& input_array,
+                  Array* output_array) {
+  // The TensorFlow documentation for StridedSlice is a bit ambiguous in places
+  // (https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/strided-slice).
+  // Use the source code at /third_party/tensorflow/core/util/strided_op.cc as
+  // "master documentation".
+
+  CHECK(input_array.data_type == Type);
+  CHECK(output_array->data_type == Type);
+  CHECK_EQ(op.ellipsis_mask, 0);
+  CHECK_EQ(op.new_axis_mask, 0);
+
+  int num_input_axes = op.start_indices.size();
+  CHECK_EQ(num_input_axes, op.stop_indices.size());
+  CHECK_EQ(num_input_axes, op.strides.size());
+  for (int i = 0; i < op.strides.size(); i++) {
+    CHECK_GE(op.strides[i], 0) << "Negative strides usupported";
+  }
+
+  // Create a buffer for the output array
+  std::vector<DataType<Type>>& output_data =
+      output_array->GetMutableBuffer<Type>().data;
+  output_data.resize(RequiredBufferSizeForShape(output_array->shape()));
+
+  // Initialize source coordinate
+  Shape const& input_shape = input_array.shape();
+  Buffer<Type> const& input_buffer = input_array.GetBuffer<Type>();
+  std::vector<int> src_coord(op.start_indices.size());
+  for (int axis = 0; axis < num_input_axes; axis++) {
+    src_coord[axis] = StartForAxis(op, input_shape, axis);
+  }
+
+  // In order to handle any number (N) of dimensions, we copy elements one by
+  // one and treat the source coordinate as an N digit number (src_coord here).
+  // Each "digit" is incremented individually (by the stride). When it overflows
+  // (becomes greater than the stop), that digit is reset and a carry flag is
+  // used to increment the next digit.
+  int dst_offset = 0;
+  do {
+    // Copy element.
+    output_data[dst_offset] = input_buffer.data[Offset(input_shape, src_coord)];
+
+    // Compute next source input coordinates.
+    bool carry = true;
+    for (int axis = 0; axis < num_input_axes; axis++) {
+      // Increment this axis if we carried from the previous one
+      if (carry) {
+        src_coord[axis] += op.strides[axis];
+      }
+
+      // Check if we've overflowed.
+      if (src_coord[axis] >= StopForAxis(op, input_shape, axis)) {
+        // Reset axis and set carry
+        src_coord[axis] = StartForAxis(op, input_shape, axis);
+        carry = true;
+      } else {
+        carry = false;
+      }
+    }
+    // increment destination buffer offset
+    dst_offset++;
+  } while (dst_offset < output_data.size());
+}
+
+}  // anonymous namespace
+
+bool ResolveConstantStridedSlice::Run(Model* model, std::size_t op_index) {
+  const auto it = model->operators.begin() + op_index;
+  const auto* base_op = it->get();
+  if (base_op->type != OperatorType::kStridedSlice) {
+    return false;
+  }
+
+  const StridedSliceOperator* op =
+      static_cast<const StridedSliceOperator*>(base_op);
+
+  CHECK_EQ(op->outputs.size(), 1);
+  auto& output_array = model->GetArray(op->outputs[0]);
+  if (output_array.data_type == ArrayDataType::kNone) {
+    // Yield until the output type has been set by PropagateArrayDataTypes
+    return false;
+  }
+
+  if (!output_array.has_shape()) {
+    // Yield until the output shape has been set by PropagateFixedShapes
+    return false;
+  }
+
+  if (op->start_indices.empty() || op->stop_indices.empty() ||
+      op->strides.empty()) {
+    // Attributes have not resolved yet.
+    return false;
+  }
+
+  const auto& input_array = model->GetArray(op->inputs[0]);
+  if (!input_array.has_shape()) {
+    // Yield until the value shape has been resolved.
+    return false;
+  }
+  if (!IsConstantParameterArray(*model, op->inputs[0])) {
+    // Yield until the value is constant.
+    return false;
+  }
+
+  CHECK(!output_array.buffer);
+  switch (output_array.data_type) {
+    case ArrayDataType::kFloat:
+      StridedSlice<ArrayDataType::kFloat>(*op, input_array, &output_array);
+      break;
+    case ArrayDataType::kUint8:
+      StridedSlice<ArrayDataType::kUint8>(*op, input_array, &output_array);
+      break;
+    case ArrayDataType::kInt32:
+      StridedSlice<ArrayDataType::kInt32>(*op, input_array, &output_array);
+      break;
+    case ArrayDataType::kInt64:
+      StridedSlice<ArrayDataType::kInt64>(*op, input_array, &output_array);
+      break;
+    default:
+      LOG(FATAL)
+          << "Unsupported data type input to StridedSlice op with output \""
+          << op->outputs[0] << "\"";
+      break;
+  }
+
+  // Erase input array if no longer used
+  if (IsDiscardableArray(*model, op->inputs[0]) &&
+      CountOpsWithInput(*model, op->inputs[0]) == 1) {
+    model->arrays.erase(op->inputs[0]);
+  }
+
+  // Erase the operator
+  model->operators.erase(it);
+
+  return true;
+}
+
+}  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tensorflow_shape.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tensorflow_shape.cc
deleted file mode 100644
index 8cc6db161987bbd834212fdfed7e1f82cac958ce..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tensorflow_shape.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include <cstddef>
-#include <memory>
-#include <string>
-#include <unordered_map>
-#include <vector>
-
-#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h"
-#include "tensorflow/contrib/lite/toco/model.h"
-#include "tensorflow/contrib/lite/toco/tooling_util.h"
-#include "tensorflow/core/platform/logging.h"
-
-namespace toco {
-
-bool ResolveConstantTensorFlowShape::Run(Model* model, std::size_t op_index) {
-  const auto tfshape_it = model->operators.begin() + op_index;
-  const auto* tfshape_base_op = tfshape_it->get();
-  if (tfshape_base_op->type != OperatorType::kTensorFlowShape) {
-    return false;
-  }
-
-  const auto* tfshape_op =
-      static_cast<const TensorFlowShapeOperator*>(tfshape_base_op);
-
-  const auto& input_array = model->GetArray(tfshape_op->inputs[0]);
-  auto& output_array = model->GetArray(tfshape_op->outputs[0]);
-
-  // Yield until the input array's shape has been resolved.
-  if (!input_array.has_shape()) {
-    return false;
-  }
-
-  // Create a buffer for the output array, making it a constant array, and
-  // copy the input shape into the output buffer.
-  CHECK(!output_array.buffer);
-  auto& output_buffer = output_array.GetMutableBuffer<ArrayDataType::kInt32>();
-  output_buffer.data = input_array.shape().dims();
-
-  // Erase the input array if no longer used
-  if (IsDiscardableArray(*model, tfshape_op->inputs[0]) &&
-      CountOpsWithInput(*model, tfshape_op->inputs[0]) == 1) {
-    model->arrays.erase(tfshape_op->inputs[0]);
-  }
-  model->operators.erase(tfshape_it);
-
-  return true;
-}
-
-}  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
index bb9bda3c82cc9e9d3526efdabbb2c478fb172d80..26ff9d887b40651559ad030cd41a824679d6dd15 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
@@ -32,7 +32,9 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
   const auto unary_it = model->operators.begin() + op_index;
   const auto* unary_op = unary_it->get();
   // Test for unary ops of types that we know how to resolve
-  if (unary_op->type != OperatorType::kTensorFlowRsqrt &&
+  if (unary_op->type != OperatorType::kCast &&
+      unary_op->type != OperatorType::kNeg &&
+      unary_op->type != OperatorType::kTensorFlowRsqrt &&
       unary_op->type != OperatorType::kTensorFlowSqrt &&
       unary_op->type != OperatorType::kTensorFlowSquare &&
       unary_op->type != OperatorType::kTensorFlowSum &&
@@ -56,6 +58,12 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
     }
   }
 
+  auto& output_array = model->GetArray(unary_op->outputs[0]);
+  if (!output_array.has_shape()) {
+    // Yield until the output array dims have been resolved.
+    return false;
+  }
+
   // At the moment we don't want to care about fused activation functions.
   // The idea is that we should do the present constants-propagation before
   // activation functions get fused.
@@ -67,48 +75,76 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
         LogName(*unary_op));
     return false;
   }
+
   const auto& input_array = model->GetArray(unary_op->inputs[0]);
   // We have already tested above for existence of buffers (synonymous to being
   // a constant param).
   CHECK(input_array.buffer);
-  // At the moment we only support float buffers.
-  if (input_array.buffer->type != ArrayDataType::kFloat) {
-    return false;
-  }
-  const auto& input_float_data =
-      input_array.GetBuffer<ArrayDataType::kFloat>().data;
-  // Create the float buffer on the output array, effectively turning it into
-  // a constant parameter
-  const auto& output_name = unary_op->outputs[0];
-  auto& output_array = model->GetArray(output_name);
-  // Yield until the output array dims have been resolved.
-  if (!output_array.has_shape()) {
-    return false;
+  std::vector<DataType<ArrayDataType::kFloat>> const* input_float_data;
+  if (unary_op->type == OperatorType::kCast) {
+    CastOperator const* cast_op = static_cast<CastOperator const*>(unary_op);
+    if (cast_op->dst_data_type != ArrayDataType::kFloat) {
+      AddMessageF(
+          "Not resolving constant %s because we currently only support casting "
+          "to float",
+          LogName(*unary_op));
+      return false;
+    }
+    if (cast_op->src_data_type != input_array.buffer->type) {
+      AddMessageF(
+          "Not resolving constant %s because cast op source type does not "
+          "match input type",
+          LogName(*unary_op));
+    }
+  } else {
+    if (input_array.buffer->type != ArrayDataType::kFloat) {
+      return false;
+    }
+    input_float_data = &(input_array.GetBuffer<ArrayDataType::kFloat>().data);
   }
 
-  int input_buffer_size = RequiredBufferSizeForShape(input_array.shape());
-  int output_buffer_size = RequiredBufferSizeForShape(output_array.shape());
-  const Shape& input_shape = input_array.shape();
+  // Create a float buffer on the output array, which are always constant.
   const Shape& output_shape = output_array.shape();
-
+  const int output_dims_count = output_shape.dimensions_count();
+  const int output_buffer_size = RequiredBufferSizeForShape(output_shape);
   auto& output_float_data =
       output_array.GetMutableBuffer<ArrayDataType::kFloat>().data;
   output_float_data.resize(output_buffer_size);
 
-  const int output_dims_count = output_shape.dimensions_count();
-  if (unary_op->type == OperatorType::kTensorFlowReshape) {
+  const Shape& input_shape = input_array.shape();
+  const int input_buffer_size = RequiredBufferSizeForShape(input_shape);
+  if (unary_op->type == OperatorType::kCast) {
+    for (int i = 0; i < output_buffer_size; i++) {
+      float outval = 0.0f;
+      if (input_array.buffer->type == ArrayDataType::kFloat) {
+        outval = static_cast<float>(
+            input_array.GetBuffer<ArrayDataType::kFloat>().data[i]);
+      } else if (input_array.buffer->type == ArrayDataType::kUint8) {
+        outval = static_cast<float>(
+            input_array.GetBuffer<ArrayDataType::kUint8>().data[i]);
+      } else if (input_array.buffer->type == ArrayDataType::kInt32) {
+        outval = static_cast<float>(
+            input_array.GetBuffer<ArrayDataType::kInt32>().data[i]);
+      } else if (input_array.buffer->type == ArrayDataType::kInt64) {
+        outval = static_cast<float>(
+            input_array.GetBuffer<ArrayDataType::kInt64>().data[i]);
+      } else {
+        LOG(FATAL) << "Unsupported cast op input type";
+      }
+      output_float_data[i] = outval;
+    }
+  } else if (unary_op->type == OperatorType::kTensorFlowReshape) {
     CHECK(input_buffer_size == output_buffer_size);
-    memcpy(output_float_data.data(), input_float_data.data(),
-           input_buffer_size * sizeof(input_float_data[0]));
+    memcpy(output_float_data.data(), (*input_float_data).data(),
+           output_buffer_size * sizeof(output_float_data[0]));
   } else if (unary_op->type == OperatorType::kTensorFlowSum) {
     // At the moment only full reduction across all dimensions is supported.
     for (int i = 0; i < output_dims_count; i++) {
       CHECK_EQ(output_shape.dims(i), 1);
     }
     float sum = 0.f;
-    const int input_size = RequiredBufferSizeForShape(input_shape);
-    for (int i = 0; i < input_size; i++) {
-      sum += input_float_data[i];
+    for (int i = 0; i < input_buffer_size; i++) {
+      sum += (*input_float_data)[i];
     }
     output_float_data[0] = sum;
   } else if (unary_op->type == OperatorType::kTensorFlowMin) {
@@ -117,10 +153,9 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
     for (int i = 0; i < output_dims_count; i++) {
       CHECK_EQ(output_shape.dims(i), 1);
     }
-    float min = input_float_data[0];
-    const int input_size = RequiredBufferSizeForShape(input_shape);
-    for (int i = 0; i < input_size; i++) {
-      min = std::min(min, input_float_data[i]);
+    float min = (*input_float_data)[0];
+    for (int i = 0; i < input_buffer_size; i++) {
+      min = std::min(min, (*input_float_data)[i]);
     }
     output_float_data[0] = min;
   } else if (unary_op->type == OperatorType::kTensorFlowMax) {
@@ -129,25 +164,26 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
     for (int i = 0; i < output_dims_count; i++) {
       CHECK_EQ(output_shape.dims(i), 1);
     }
-    float max = input_float_data[0];
-    const int input_size = RequiredBufferSizeForShape(input_shape);
-    for (int i = 0; i < input_size; i++) {
-      max = std::max(max, input_float_data[i]);
+    float max = (*input_float_data)[0];
+    for (int i = 0; i < input_buffer_size; i++) {
+      max = std::max(max, (*input_float_data)[i]);
     }
     output_float_data[0] = max;
-  } else if (unary_op->type == OperatorType::kTensorFlowRsqrt ||
+  } else if (unary_op->type == OperatorType::kNeg ||
+             unary_op->type == OperatorType::kTensorFlowRsqrt ||
              unary_op->type == OperatorType::kTensorFlowSqrt ||
              unary_op->type == OperatorType::kTensorFlowSquare) {
     // Element-wise ops. Should have perfectly matching sizes here.
-    const int input_size = RequiredBufferSizeForShape(input_shape);
     for (int i = 0; i < output_dims_count; i++) {
       CHECK_EQ(output_shape.dims(i), input_shape.dims(i));
     }
 
-    for (int i = 0; i < input_size; i++) {
-      const float val = input_float_data[i];
+    for (int i = 0; i < output_buffer_size; i++) {
+      const float val = (*input_float_data)[i];
       float outval = 0.f;
-      if (unary_op->type == OperatorType::kTensorFlowRsqrt) {
+      if (unary_op->type == OperatorType::kNeg) {
+        outval = -val;
+      } else if (unary_op->type == OperatorType::kTensorFlowRsqrt) {
         outval = 1.0f / std::sqrt(val);
       } else if (unary_op->type == OperatorType::kTensorFlowSqrt) {
         outval = std::sqrt(val);
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_mean_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_mean_attributes.cc
index d25c773f195cea407251bf046f0b1f1924e01968..b77be3f5c0d04b028391c1ce9de39afd7632eb36 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_mean_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_mean_attributes.cc
@@ -29,22 +29,16 @@ bool ResolveMeanAttributes::Run(Model* model, std::size_t op_index) {
   if (mean_op->type != OperatorType::kMean) return false;
   auto* op = static_cast<MeanOperator*>(mean_op);
 
-  if (!op->reduction_indices.empty()) return false;
+  if (!op->axis.empty()) {
+    // Attributes already resolved
+    return false;
+  }
   if (op->inputs.size() != 2) return false;
   if (!IsConstantParameterArray(*model, op->inputs[1])) return false;
 
   const auto& indices_array = *model->arrays[op->inputs[1]];
   if (!indices_array.has_shape()) return false;
-
-  op->reduction_indices = indices_array.GetBuffer<ArrayDataType::kInt32>().data;
-
-  // At the moment, we only support simultaneous reduction over width and
-  // height. This is mainly limited by the fact that currently, the runtime
-  // arrays are always 4-dimensional.
-  CHECK_EQ(op->reduction_indices.size(), 2);
-  CHECK((op->reduction_indices[0] == 1 && op->reduction_indices[1] == 2) ||
-        (op->reduction_indices[0] == 2 && op->reduction_indices[1] == 1));
-
+  op->axis = indices_array.GetBuffer<ArrayDataType::kInt32>().data;
   return true;
 }
 
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc
index 8fa7b83bedc0da99c3a5a60f38586f712eeb3c4e..b5093bc4c7c33b3e555ca14151c2489cddc6dbd3 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc
@@ -25,6 +25,31 @@ limitations under the License.
 
 namespace toco {
 
+// Reorder the elements of an input_array according to the input_axes_order and
+// output_axes_order. Then adjust the shapes of the input and output arrays
+// accordingly. Note that input_array must have a buffer (that is, it is a
+// constant array).
+template <typename T, ArrayDataType DataType>
+void ReorderAxes(AxesOrder input_axes_order, AxesOrder output_axes_order,
+                 Array* input_array, Array* output_array) {
+  CHECK(input_array->buffer->type == DataType);
+  CHECK(!output_array->buffer);
+  auto& input_data = input_array->GetMutableBuffer<DataType>().data;
+  std::vector<T> reordered_data;
+  reordered_data.resize(RequiredBufferSizeForShape(output_array->shape()));
+  // TODO(b/62904716) Shapes should be used directly.
+  Shape input_shape = input_array->shape();
+  Shape output_shape = output_array->shape();
+  if (AxesCount(input_axes_order) == 2) {
+    UnextendShape(&input_shape, 2);
+    UnextendShape(&output_shape, 2);
+  }
+  ShuffleArray(input_shape, input_axes_order, output_axes_order, output_shape,
+               input_data.data(), reordered_data.data());
+  input_data = reordered_data;
+  input_array->copy_shape(output_array->shape());
+}
+
 bool ResolveReorderAxes::Run(Model* model, std::size_t op_index) {
   auto reorder_it = model->operators.begin() + op_index;
   auto* reorder_op = static_cast<ReorderAxesOperator*>(reorder_it->get());
@@ -52,26 +77,19 @@ bool ResolveReorderAxes::Run(Model* model, std::size_t op_index) {
     return false;
   }
   // Reorder the input array dims and buffer data
-  CHECK(constant_input_array.buffer->type == ArrayDataType::kFloat);
-  CHECK(!output_array.buffer);
-  auto& input_data =
-      constant_input_array.GetMutableBuffer<ArrayDataType::kFloat>().data;
-  std::vector<float> reordered_data;
-  reordered_data.resize(RequiredBufferSizeForShape(output_array.shape()));
-  const auto input_axes_order = reorder_op->input_axes_order;
-  const auto output_axes_order = reorder_op->output_axes_order;
-  // TODO(b/62904716) Shapes should be used directly.
-  Shape input_shape = constant_input_array.shape();
-  Shape output_shape = output_array.shape();
-  if (AxesCount(input_axes_order) == 2) {
-    UnextendShape(&input_shape, 2);
-    UnextendShape(&output_shape, 2);
+  if (constant_input_array.buffer->type == ArrayDataType::kFloat) {
+    ReorderAxes<float, ArrayDataType::kFloat>(
+        reorder_op->input_axes_order, reorder_op->output_axes_order,
+        &constant_input_array, &output_array);
+  } else if (constant_input_array.buffer->type == ArrayDataType::kInt32) {
+    ReorderAxes<uint8, ArrayDataType::kUint8>(
+        reorder_op->input_axes_order, reorder_op->output_axes_order,
+        &constant_input_array, &output_array);
+  } else {
+    LOG(FATAL) << "Cannot ReorderAxes unless input buffer is float or uint8.";
   }
-  ShuffleArray(input_shape, input_axes_order, output_axes_order, output_shape,
-               input_data.data(), reordered_data.data());
-  input_data = reordered_data;
+
   input_array.copy_shape(output_array.shape());
-  constant_input_array.copy_shape(output_array.shape());
 
   // Update the edges of the graph to point to the input array
   for (const auto& other_op : model->operators) {
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_space_to_batch_nd_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_space_to_batch_nd_attributes.cc
new file mode 100644
index 0000000000000000000000000000000000000000..a73f16735cb232753e8f64caae31f5c945b6bffd
--- /dev/null
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_space_to_batch_nd_attributes.cc
@@ -0,0 +1,77 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h"
+#include "tensorflow/contrib/lite/toco/model.h"
+#include "tensorflow/contrib/lite/toco/tooling_util.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace toco {
+
+bool ResolveSpaceToBatchNDAttributes::Run(Model* model, std::size_t op_index) {
+  const auto op_it = model->operators.begin() + op_index;
+  if (op_it->get()->type != OperatorType::kSpaceToBatchND) return false;
+
+  auto* op = static_cast<SpaceToBatchNDOperator*>(op_it->get());
+
+  // The attributes are resolved only when the 3 attributes (block_shape,
+  // before_paddings, after_paddings) are all constant.
+  if (!op->block_shape.empty()) {
+    return false;
+  }
+
+  const int block_shape_index = 1;
+  const int paddings_index = 2;
+
+  CHECK_EQ(op->inputs.size(), 3);
+  if (!IsConstantParameterArray(*model, op->inputs[block_shape_index]) ||
+      !IsConstantParameterArray(*model, op->inputs[paddings_index]))
+    return false;
+
+  // Handle paddings.
+  const auto& paddings_array = *model->arrays[op->inputs[paddings_index]];
+  if (!paddings_array.has_shape()) return false;
+  const std::vector<int>& paddings_dims = paddings_array.shape().dims();
+  if (paddings_dims.size() != 2) {
+    // Code only handles padding of 2 dimensions. Perhaps another transformation
+    // will delete this op.
+    return false;
+  }
+  std::vector<int> paddings_buffer =
+      paddings_array.GetBuffer<ArrayDataType::kInt32>().data;
+  for (int i = 0; i < paddings_dims[0]; ++i) {
+    op->before_paddings.push_back(paddings_buffer[i * 2]);
+    op->after_paddings.push_back(paddings_buffer[i * 2 + 1]);
+  }
+
+  // Handle block_shape.
+  const auto& block_shape_array = *model->arrays[op->inputs[block_shape_index]];
+  if (!block_shape_array.has_shape()) return false;
+  const std::vector<int>& block_shape_dims = block_shape_array.shape().dims();
+  CHECK_EQ(block_shape_dims.size(), 1);
+  std::vector<int> block_shape_buffer =
+      block_shape_array.GetBuffer<ArrayDataType::kInt32>().data;
+  for (int i = 0; i < block_shape_dims[0]; ++i) {
+    op->block_shape.push_back(block_shape_buffer[i]);
+  }
+
+  return true;
+}
+
+}  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_squeeze.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_squeeze_attributes.cc
similarity index 86%
rename from tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_squeeze.cc
rename to tensorflow/contrib/lite/toco/graph_transformations/resolve_squeeze_attributes.cc
index 1d3f42b5ec4cab29189c12043d12ea687d684832..dd3e73635ae0215510f0a8d1aee487da5af35700 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_squeeze.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_squeeze_attributes.cc
@@ -25,15 +25,13 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveTensorFlowSqueeze::Run(Model* model, std::size_t op_index) {
-  const auto squeeze_it = model->operators.begin() + op_index;
-  const auto* squeeze_op = squeeze_it->get();
+bool ResolveSqueezeAttributes::Run(Model* model, std::size_t op_index) {
+  auto* squeeze_op = model->operators[op_index].get();
   if (squeeze_op->type != OperatorType::kSqueeze) {
     return false;
   }
-
-  CHECK_EQ(squeeze_op->inputs.size(), 1);
-  CHECK_EQ(squeeze_op->outputs.size(), 1);
+  DCHECK_EQ(squeeze_op->inputs.size(), 1);
+  DCHECK_EQ(squeeze_op->outputs.size(), 1);
 
   // If the output is consumed by a reshape op, it's a trivial squeeze.
   if (CountOpsWithInput(*model, squeeze_op->outputs[0]) == 1) {
@@ -47,7 +45,6 @@ bool ResolveTensorFlowSqueeze::Run(Model* model, std::size_t op_index) {
       return RemoveTrivialPassthroughOp(this, model, op_index);
     }
   }
-
   return false;
 }
 
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc
index 5fc3b25bc12b0644ce2fcd3f7ee5e793791d54d5..dbe69adcbd34bb0544239ebb096fb8bfc4bfcb49 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc
@@ -12,11 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include <memory>
-#include <string>
-#include <unordered_map>
-#include <vector>
-
 #include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h"
 #include "tensorflow/contrib/lite/toco/model.h"
 #include "tensorflow/contrib/lite/toco/tooling_util.h"
@@ -30,13 +25,12 @@ bool ResolveStridedSliceAttributes::Run(Model* model, std::size_t op_index) {
   if (slice_op->type != OperatorType::kStridedSlice) return false;
 
   auto* op = static_cast<StridedSliceOperator*>(slice_op);
-  if (!op->start_indices.empty()) return false;
+  if (!op->start_indices.empty()) {
+    // We have already resolved these attributes
+    return false;
+  }
 
   CHECK_EQ(op->inputs.size(), 4);
-  if (!IsConstantParameterArray(*model, op->inputs[1])) return false;
-  if (!IsConstantParameterArray(*model, op->inputs[2])) return false;
-  if (!IsConstantParameterArray(*model, op->inputs[3])) return false;
-
   const auto& start_array = *model->arrays[op->inputs[1]];
   if (!start_array.has_shape()) return false;
 
@@ -46,17 +40,24 @@ bool ResolveStridedSliceAttributes::Run(Model* model, std::size_t op_index) {
   const auto& stride_array = *model->arrays[op->inputs[3]];
   if (!stride_array.has_shape()) return false;
 
+  if (!IsConstantParameterArray(*model, op->inputs[1])) return false;
+  if (!IsConstantParameterArray(*model, op->inputs[2])) return false;
+  if (!IsConstantParameterArray(*model, op->inputs[3])) return false;
+
   op->start_indices = start_array.GetBuffer<ArrayDataType::kInt32>().data;
   op->stop_indices = stop_array.GetBuffer<ArrayDataType::kInt32>().data;
   op->strides = stride_array.GetBuffer<ArrayDataType::kInt32>().data;
 
-  // Only 4D arrays are supported for now.
-  CHECK_EQ(op->start_indices.size(), 4);
-  CHECK_EQ(op->stop_indices.size(), 4);
-  CHECK_EQ(op->strides.size(), 4);
-
-  // TODO(dkalenichenko): Delete the extra inputs?
+  CHECK_GE(op->start_indices.size(), 1);
+  CHECK_LE(op->start_indices.size(), 4);
+  CHECK_EQ(op->stop_indices.size(), op->start_indices.size());
+  CHECK_EQ(op->strides.size(), op->stop_indices.size());
 
+  // Ideally, we would remove the input arrays after they have been resolved.
+  // However, we must then reconstitute these input arrays for all supported
+  // export formats. For now, leave the arrays so we don't have to modify our
+  // exporters. Ideally, we wouldn't have op attributes, and would work directly
+  // with the input arrays.
   return true;
 }
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc
index b482f5cf51f7bde67e76792439203487402b75ce..c6723a880ed0e51cc5828f77742a6c8eb70fa864 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc
@@ -35,37 +35,36 @@ bool ResolveTensorFlowConcat::Run(Model* model, std::size_t op_index) {
 
   CHECK_GE(tf_concat_op->inputs.size(), 2);
   // TensorFlow Concat and ConcatV2 nodes only differ by the ordering
-  // of inputs: in Concat, the concat_dim is the first input, while in
+  // of inputs: in Concat,the axis is the first input, while in
   // ConcatV2, it is the last input.
-  std::size_t concat_dim_pos = 0;
+  std::size_t axis_pos = 0;
   if (tf_concat_op->type == OperatorType::kTensorFlowConcatV2) {
-    concat_dim_pos = tf_concat_op->inputs.size() - 1;
+    axis_pos = tf_concat_op->inputs.size() - 1;
   }
-  const string concat_dim_name = tf_concat_op->inputs[concat_dim_pos];
+  const string axis_name = tf_concat_op->inputs[axis_pos];
   std::vector<string> concat_input_names;
   for (std::size_t i = 0; i < tf_concat_op->inputs.size(); i++) {
-    if (i != concat_dim_pos) {
+    if (i != axis_pos) {
       concat_input_names.push_back(tf_concat_op->inputs[i]);
     }
   }
-  // If the concat_dim array hasn't been resolved to a constant yet,
+  // If the axis array hasn't been resolved to a constant yet,
   // we need to yield.
-  const auto& concat_dim_array = model->GetArray(concat_dim_name);
-  if (!concat_dim_array.buffer) {
-    AddMessageF("Waiting for the concat_dim of %s to be resolved to a constant",
+  const auto& axis_array = model->GetArray(axis_name);
+  if (!axis_array.buffer) {
+    AddMessageF("Waiting for the axis of %s to be resolved to a constant",
                 LogName(*tf_concat_op));
     return false;
   }
 
-  CHECK(concat_dim_array.data_type == ArrayDataType::kInt32);
-  const auto& concat_dim_data =
-      concat_dim_array.GetBuffer<ArrayDataType::kInt32>().data;
-  CHECK_EQ(concat_dim_data.size(), 1);
-  const int concat_dim = concat_dim_data[0];
+  CHECK(axis_array.data_type == ArrayDataType::kInt32);
+  const auto& axis_data = axis_array.GetBuffer<ArrayDataType::kInt32>().data;
+  CHECK_EQ(axis_data.size(), 1);
+  const int axis = axis_data[0];
 
   // Create the Concatenation op replacing the TensorFlowConcat op.
   auto* concatenation_op = new ConcatenationOperator;
-  concatenation_op->concat_dim = concat_dim;
+  concatenation_op->axis = axis;
   concatenation_op->inputs = concat_input_names;
   concatenation_op->outputs = {tf_concat_op->outputs[0]};
   auto depth_concat_it = model->operators.emplace(concat_it, concatenation_op);
@@ -74,9 +73,9 @@ bool ResolveTensorFlowConcat::Run(Model* model, std::size_t op_index) {
   concat_it = depth_concat_it + 1;
   CHECK_EQ(concat_it->get(), tf_concat_op);
 
-  // Remove the concat_dim array if it is not used by anything else.
-  if (CountOpsWithInput(*model, concat_dim_name) == 1) {
-    model->arrays.erase(concat_dim_name);
+  // Remove the axis array if it is not used by anything else.
+  if (CountOpsWithInput(*model, axis_name) == 1) {
+    model->arrays.erase(axis_name);
   }
   // Remove the TensorFlowConcat op
   model->operators.erase(concat_it);
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc
index 55adfca03739deb35cbeb50c67222768f8a02164..150cf53da3099227c5c637ee58c44512d5a41d4f 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc
@@ -59,7 +59,7 @@ bool ResolveTensorFlowSwitch::Run(Model* model, std::size_t op_index) {
   // From the TensorFlow docs on .switch() in
   // third_party/tensorflow/python/ops/control_flow_ops.py
   //
-  //    If `pred` is false, the `data` input is forwared to the first output.
+  //    If `pred` is false, the `data` input is forwarded to the first output.
   //    Otherwise, the data goes to the second output.
   //
   // Note that this comment used to say the opposite and was recently fixed:
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_transpose_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_transpose_attributes.cc
new file mode 100644
index 0000000000000000000000000000000000000000..12d966b26104fd491f914fbdb39e0a62fdda19bc
--- /dev/null
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_transpose_attributes.cc
@@ -0,0 +1,53 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h"
+#include "tensorflow/contrib/lite/toco/model.h"
+#include "tensorflow/contrib/lite/toco/tooling_util.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace toco {
+
+bool ResolveTransposeAttributes::Run(Model* model, std::size_t op_index) {
+  const auto op_it = model->operators.begin() + op_index;
+  if (op_it->get()->type != OperatorType::kTranspose) return false;
+
+  auto* op = static_cast<TransposeOperator*>(op_it->get());
+  if (!op->perm.empty()) return false;
+
+  CHECK_EQ(op->inputs.size(), 2);
+  if (!IsConstantParameterArray(*model, op->inputs[1])) return false;
+
+  // Handling perm.
+  const auto& perm_array = *model->arrays[op->inputs[1]];
+  if (!perm_array.has_shape()) return false;
+
+  const std::vector<int>& perm_dims = perm_array.shape().dims();
+  CHECK_EQ(perm_dims.size(), 1);
+
+  std::vector<int> perm_buffer =
+      perm_array.GetBuffer<ArrayDataType::kInt32>().data;
+  for (int i = 0; i < perm_dims[0]; ++i) {
+    op->perm.push_back(perm_buffer[i]);
+  }
+
+  return true;
+}
+
+}  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc
index c6705ad305ac85f7098f40469ebc54fc6fa1b3ab..a14016e8e2705a66c392118899335eb3997fa1de 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc
@@ -109,7 +109,7 @@ class ResolveConstantConcatenationTest : public ::testing::Test {
   // Prepare a hypothetical TOCO model with one Concatenation operator in it
   // together with 4 arrays as its inputs.
   // It receives the dimension of concatenation as input.
-  void PrepareModel(Model* model, int concat_dim) {
+  void PrepareModel(Model* model, int axis) {
     std::vector<string> concat_input_names = {"array0", "array1", "array2",
                                               "array3"};
 
@@ -142,7 +142,7 @@ class ResolveConstantConcatenationTest : public ::testing::Test {
       cnt++;
     }
     auto* concatenation_op = new ConcatenationOperator;
-    concatenation_op->concat_dim = concat_dim;
+    concatenation_op->axis = axis;
     concatenation_op->inputs = concat_input_names;
     concatenation_op->outputs = {"concat_op_outputs"};
     Array& out_array = model->GetOrCreateArray(concatenation_op->outputs[0]);
@@ -151,7 +151,7 @@ class ResolveConstantConcatenationTest : public ::testing::Test {
     std::vector<int>* out_array_shape_dim = out_array_shape->mutable_dims();
     out_array_shape_dim->resize(kDim);
     for (int i = 0; i < kDim; i++) {
-      if (i == concat_dim) {
+      if (i == axis) {
         (*out_array_shape_dim)[i] = kNumArrays * kElementPerDim;
       } else {
         (*out_array_shape_dim)[i] = kElementPerDim;
@@ -163,8 +163,8 @@ class ResolveConstantConcatenationTest : public ::testing::Test {
 
 TEST_F(ResolveConstantConcatenationTest, ConcatAtAxis0) {
   Model model;
-  const int concat_dim = 0;
-  PrepareModel(&model, concat_dim);
+  const int axis = 0;
+  PrepareModel(&model, axis);
 
   GraphTransformationsSet graph_transformation_set;
   graph_transformation_set.Add(new toco::ResolveConstantConcatenation);
@@ -182,8 +182,8 @@ TEST_F(ResolveConstantConcatenationTest, ConcatAtAxis0) {
 
 TEST_F(ResolveConstantConcatenationTest, ConcatAtAxis1) {
   Model model;
-  const int concat_dim = 1;
-  PrepareModel(&model, concat_dim);
+  const int axis = 1;
+  PrepareModel(&model, axis);
 
   GraphTransformationsSet graph_transformation_set;
   graph_transformation_set.Add(new toco::ResolveConstantConcatenation);
@@ -201,8 +201,8 @@ TEST_F(ResolveConstantConcatenationTest, ConcatAtAxis1) {
 
 TEST_F(ResolveConstantConcatenationTest, ConcatAtAxis2) {
   Model model;
-  const int concat_dim = 2;
-  PrepareModel(&model, concat_dim);
+  const int axis = 2;
+  PrepareModel(&model, axis);
 
   GraphTransformationsSet graph_transformation_set;
   graph_transformation_set.Add(new toco::ResolveConstantConcatenation);
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index cde5a936afd0f12dbd3f5adb333c0c7d73cde25f..995e9d67ca3ae34471595d2d629d2fe993c21ab5 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -25,7 +25,6 @@ limitations under the License.
 #include "absl/strings/numbers.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_split.h"
-//#include "absl/strings/string_view_utils.h"
 #include "absl/strings/strip.h"
 #include "tensorflow/contrib/lite/toco/model.h"
 #include "tensorflow/contrib/lite/toco/model_flags.pb.h"
@@ -53,6 +52,8 @@ using tensorflow::DT_BOOL;
 using tensorflow::DT_FLOAT;
 using tensorflow::DT_INT32;
 using tensorflow::DT_INT64;
+using tensorflow::DT_QUINT8;
+using tensorflow::DT_STRING;
 using tensorflow::DT_UINT8;
 using tensorflow::GraphDef;
 using tensorflow::NodeDef;
@@ -136,6 +137,8 @@ ArrayDataType ConvertDataType(tensorflow::DataType dtype) {
     return ArrayDataType::kInt32;
   else if (dtype == DT_INT64)
     return ArrayDataType::kInt64;
+  else if (dtype == DT_STRING)
+    return ArrayDataType::kString;
   else
     LOG(INFO) << "Unsupported data type in placehoder op: " << dtype;
   return ArrayDataType::kNone;
@@ -171,7 +174,11 @@ void ImportFloatArray(const TensorProto& input_tensor, Array* output_array) {
   auto& output_float_data =
       output_array->GetMutableBuffer<ArrayDataType::kFloat>().data;
   output_float_data.resize(input_flat_size);
-  if (input_tensor.float_val_size()) {
+  if (input_tensor.float_val_size() == 1) {
+    for (int i = 0; i < input_flat_size; i++) {
+      output_float_data[i] = input_tensor.float_val(0);
+    }
+  } else if (input_tensor.float_val_size() == input_flat_size) {
     for (int i = 0; i < input_tensor.float_val_size(); i++) {
       output_float_data[i] = input_tensor.float_val(i);
     }
@@ -185,6 +192,32 @@ void ImportFloatArray(const TensorProto& input_tensor, Array* output_array) {
   }
 }
 
+void ImportQuint8Array(const TensorProto& input_tensor, Array* output_array) {
+  CHECK_EQ(input_tensor.dtype(), DT_QUINT8);
+  const auto& input_shape = input_tensor.tensor_shape();
+  CHECK_LE(input_shape.dim_size(), 4);
+  ImportShape(input_shape.dim(), output_array->mutable_shape());
+  int input_flat_size = 1;
+  for (int k = 0; k < input_shape.dim_size(); k++) {
+    input_flat_size *= input_shape.dim(k).size();
+  }
+  auto& output_int_data =
+      output_array->GetMutableBuffer<ArrayDataType::kUint8>().data;
+  output_int_data.resize(input_flat_size);
+  if (input_tensor.int_val_size()) {
+    for (int i = 0; i < input_tensor.int_val_size(); i++) {
+      output_int_data[i] = input_tensor.int_val(i);
+    }
+  } else if (input_tensor.tensor_content().size() ==
+             input_flat_size * sizeof(uint8_t)) {
+    toco::port::CopyToBuffer(input_tensor.tensor_content(),
+                             reinterpret_cast<char*>(output_int_data.data()));
+  } else {
+    LOG(FATAL) << "Neither input_content nor int_val have the right "
+                  "dimensions for this uint8 tensor.";
+  }
+}
+
 void ImportInt32Array(const TensorProto& input_tensor, Array* output_array) {
   CHECK_EQ(input_tensor.dtype(), DT_INT32);
   const auto& input_shape = input_tensor.tensor_shape();
@@ -237,6 +270,27 @@ void ImportInt64Array(const TensorProto& input_tensor, Array* output_array) {
   }
 }
 
+void ImportStringArray(const TensorProto& input_tensor, Array* output_array) {
+  CHECK_EQ(input_tensor.dtype(), DT_STRING);
+  const auto& input_shape = input_tensor.tensor_shape();
+  CHECK_LE(input_shape.dim_size(), 4);
+  ImportShape(input_shape.dim(), output_array->mutable_shape());
+  int input_flat_size = 1;
+  for (int k = 0; k < input_shape.dim_size(); k++) {
+    input_flat_size *= input_shape.dim(k).size();
+  }
+  auto& output_string_data =
+      output_array->GetMutableBuffer<ArrayDataType::kString>().data;
+  output_string_data.resize(input_flat_size);
+  if (input_flat_size != input_tensor.string_val_size()) {
+    LOG(FATAL) << "Input_content string_val doesn't have the right "
+                  "dimensions for this string tensor.";
+  }
+  for (int i = 0; i < input_flat_size; ++i) {
+    output_string_data[i] = input_tensor.string_val(i);
+  }
+}
+
 // Count the number of inputs of a given node. If
 // `tf_import_flags.drop_control_dependency` is true, count the number of
 // non-control-dependency inputs.
@@ -254,6 +308,14 @@ int GetInputsCount(const NodeDef& node,
   }
 }
 
+void CheckInputsCount(const NodeDef& node,
+                      const TensorFlowImportFlags& tf_import_flags,
+                      int expected_input_count) {
+  QCHECK_EQ(GetInputsCount(node, tf_import_flags), expected_input_count)
+      << node.op() << " node expects " << expected_input_count
+      << " input(s) other than control dependencies: " << node.DebugString();
+}
+
 void ConvertConstOperator(const NodeDef& node,
                           const TensorFlowImportFlags& tf_import_flags,
                           Model* model) {
@@ -262,23 +324,34 @@ void ConvertConstOperator(const NodeDef& node,
   const auto dtype = GetDataTypeAttr(node, "dtype");
 
   auto& array = model->GetOrCreateArray(node.name());
-  array.data_type = dtype == DT_FLOAT
-                        ? ArrayDataType::kFloat
-                        : dtype == DT_INT32
-                              ? ArrayDataType::kInt32
-                              : dtype == DT_INT64 ? ArrayDataType::kInt64
-                                                  : ArrayDataType::kNone;
-  if (dtype == DT_FLOAT) {
-    ImportFloatArray(tensor, &array);
-  } else if (dtype == DT_INT32) {
-    ImportInt32Array(tensor, &array);
-  } else if (dtype == DT_INT64) {
-    ImportInt64Array(tensor, &array);
-  } else {
-    // do nothing, silently ignore the Const data. For example, there are consts
-    // of string type. We just make a dummy buffer to indicate that this array
-    // does not rely on external input.
-    array.GetMutableBuffer<ArrayDataType::kNone>();
+  switch (dtype) {
+    case DT_FLOAT:
+      array.data_type = ArrayDataType::kFloat;
+      ImportFloatArray(tensor, &array);
+      break;
+    case DT_INT32:
+      array.data_type = ArrayDataType::kInt32;
+      ImportInt32Array(tensor, &array);
+      break;
+    case DT_QUINT8:
+      array.data_type = ArrayDataType::kUint8;
+      ImportQuint8Array(tensor, &array);
+      break;
+    case DT_INT64:
+      array.data_type = ArrayDataType::kInt64;
+      ImportInt64Array(tensor, &array);
+      break;
+    case DT_STRING:
+      array.data_type = ArrayDataType::kString;
+      ImportStringArray(tensor, &array);
+      break;
+    default:
+      array.data_type = ArrayDataType::kNone;
+      // do nothing, silently ignore the Const data.
+      // We just make a dummy buffer to indicate that
+      // this array does not rely on external input.
+      array.GetMutableBuffer<ArrayDataType::kNone>();
+      break;
   }
 }
 
@@ -286,7 +359,7 @@ void ConvertConvOperator(const NodeDef& node,
                          const TensorFlowImportFlags& tf_import_flags,
                          Model* model) {
   CHECK_EQ(node.op(), "Conv2D");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
+  CheckInputsCount(node, tf_import_flags, 2);
 
   // We only support NHWC, which is the default data_format.
   // So if data_format is not defined, we're all good.
@@ -339,7 +412,7 @@ void ConvertDepthwiseConvOperator(const NodeDef& node,
                                   const TensorFlowImportFlags& tf_import_flags,
                                   Model* model) {
   CHECK_EQ(node.op(), "DepthwiseConv2dNative");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
+  CheckInputsCount(node, tf_import_flags, 2);
 
   // We only support NHWC, which is the default data_format.
   // So if data_format is not defined, we're all good.
@@ -392,7 +465,8 @@ void ConvertDepthToSpaceOperator(const NodeDef& node,
                                  const TensorFlowImportFlags& tf_import_flags,
                                  Model* model) {
   CHECK_EQ(node.op(), "DepthToSpace");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
+  CheckInputsCount(node, tf_import_flags, 1);
+
   CHECK_EQ(GetDataTypeAttr(node, "T"), DT_FLOAT);
   auto* op = new DepthToSpaceOperator;
   op->inputs.push_back(node.input(0));
@@ -406,7 +480,8 @@ void ConvertSpaceToDepthOperator(const NodeDef& node,
                                  const TensorFlowImportFlags& tf_import_flags,
                                  Model* model) {
   CHECK_EQ(node.op(), "SpaceToDepth");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
+  CheckInputsCount(node, tf_import_flags, 1);
+
   CHECK_EQ(GetDataTypeAttr(node, "T"), DT_FLOAT);
   auto* op = new SpaceToDepthOperator;
   op->inputs.push_back(node.input(0));
@@ -420,7 +495,8 @@ void ConvertBiasAddOperator(const NodeDef& node,
                             const TensorFlowImportFlags& tf_import_flags,
                             Model* model) {
   CHECK_EQ(node.op(), "BiasAdd");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
+  CheckInputsCount(node, tf_import_flags, 2);
+
   const auto& input_name = node.input(0);
   const auto& bias_name = node.input(1);
   CHECK_EQ(GetDataTypeAttr(node, "T"), DT_FLOAT);
@@ -435,7 +511,7 @@ void ConvertReluOperator(const NodeDef& node,
                          const TensorFlowImportFlags& tf_import_flags,
                          Model* model) {
   CHECK_EQ(node.op(), "Relu");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
+  CheckInputsCount(node, tf_import_flags, 1);
   const auto& input_name = node.input(0);
   auto* relu = new ReluOperator;
   relu->inputs.push_back(input_name);
@@ -447,7 +523,8 @@ void ConvertRelu6Operator(const NodeDef& node,
                           const TensorFlowImportFlags& tf_import_flags,
                           Model* model) {
   CHECK_EQ(node.op(), "Relu6");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
+  CheckInputsCount(node, tf_import_flags, 1);
+
   const auto& input_name = node.input(0);
   auto* op = new Relu6Operator;
   op->inputs.push_back(input_name);
@@ -459,7 +536,8 @@ void ConvertLogisticOperator(const NodeDef& node,
                              const TensorFlowImportFlags& tf_import_flags,
                              Model* model) {
   CHECK_EQ(node.op(), "Sigmoid");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
+  CheckInputsCount(node, tf_import_flags, 1);
+
   const auto& input_name = node.input(0);
   auto* op = new LogisticOperator;
   op->inputs.push_back(input_name);
@@ -471,7 +549,8 @@ void ConvertTanhOperator(const NodeDef& node,
                          const TensorFlowImportFlags& tf_import_flags,
                          Model* model) {
   CHECK_EQ(node.op(), "Tanh");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
+  CheckInputsCount(node, tf_import_flags, 1);
+
   const auto& input_name = node.input(0);
   auto* op = new TanhOperator;
   op->inputs.push_back(input_name);
@@ -483,7 +562,7 @@ void ConvertDivOperator(const NodeDef& node,
                         const TensorFlowImportFlags& tf_import_flags,
                         Model* model) {
   CHECK(node.op() == "Div" || node.op() == "RealDiv");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
+  CheckInputsCount(node, tf_import_flags, 2);
   auto* op = new DivOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -495,14 +574,17 @@ void ConvertIdentityOperator(const NodeDef& node,
                              const TensorFlowImportFlags& tf_import_flags,
                              Model* model) {
   CHECK(node.op() == "Identity" || node.op() == "CheckNumerics" ||
-        node.op() == "PlaceholderWithDefault");
+        node.op() == "PlaceholderWithDefault" || node.op() == "StopGradient");
   auto* op = new TensorFlowIdentityOperator;
   // Amazingly, some TensorFlow graphs (at least rajeev_lstm.pb) have
   // identity nodes with multiple inputs, but the other inputs seem
   // to be gratuitous (in the case of rajeev_lstm.pb, these are
   // enumerating the LSTM state arrays). We will just ignore extra
   // inputs beyond the first input.
-  CHECK_GE(node.input_size(), 1);
+  QCHECK_GE(node.input_size(), 1)
+      << node.op()
+      << " node expects at least 1 input other than control dependencies: "
+      << node.DebugString();
   const auto& input_name = node.input(0);
   op->inputs.push_back(input_name);
   op->outputs.push_back(node.name());
@@ -513,7 +595,7 @@ void ConvertFakeQuantWithMinMaxArgs(
     const NodeDef& node, const TensorFlowImportFlags& tf_import_flags,
     Model* model) {
   CHECK_EQ(node.op(), "FakeQuantWithMinMaxArgs");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
+  CheckInputsCount(node, tf_import_flags, 1);
   auto* op = new FakeQuantOperator;
   op->inputs.push_back(node.input(0));
   op->minmax.reset(new MinMax);
@@ -529,7 +611,10 @@ void ConvertFakeQuantWithMinMaxVars(
     Model* model) {
   CHECK_EQ(node.op(), "FakeQuantWithMinMaxVars");
   const int num_inputs = GetInputsCount(node, tf_import_flags);
-  CHECK(num_inputs == 3 || num_inputs == 4);
+  QCHECK(num_inputs == 3 || num_inputs == 4)
+      << "FakeQuantWithMinMaxVars node expects 3 or 4 inputs other than "
+         "control dependencies: "
+      << node.DebugString();
   auto* op = new FakeQuantOperator;
   for (int i = 0; i < 3; i++) {
     op->inputs.push_back(node.input(i));
@@ -538,11 +623,22 @@ void ConvertFakeQuantWithMinMaxVars(
   model->operators.emplace_back(op);
 }
 
+void ConvertNegOperator(const NodeDef& node,
+                        const TensorFlowImportFlags& tf_import_flags,
+                        Model* model) {
+  CHECK_EQ(node.op(), "Neg");
+  CheckInputsCount(node, tf_import_flags, 1);
+  auto* op = new NegOperator;
+  op->inputs.push_back(node.input(0));
+  op->outputs.push_back(node.name());
+  model->operators.emplace_back(op);
+}
+
 void ConvertRsqrtOperator(const NodeDef& node,
                           const TensorFlowImportFlags& tf_import_flags,
                           Model* model) {
   CHECK_EQ(node.op(), "Rsqrt");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
+  CheckInputsCount(node, tf_import_flags, 1);
   auto* op = new TensorFlowRsqrtOperator;
   op->inputs.push_back(node.input(0));
   op->outputs.push_back(node.name());
@@ -553,7 +649,7 @@ void ConvertSqrtOperator(const NodeDef& node,
                          const TensorFlowImportFlags& tf_import_flags,
                          Model* model) {
   CHECK_EQ(node.op(), "Sqrt");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
+  CheckInputsCount(node, tf_import_flags, 1);
   auto* op = new TensorFlowSqrtOperator;
   op->inputs.push_back(node.input(0));
   op->outputs.push_back(node.name());
@@ -564,7 +660,7 @@ void ConvertSqueezeOperator(const NodeDef& node,
                             const TensorFlowImportFlags& tf_import_flags,
                             Model* model) {
   CHECK_EQ(node.op(), "Squeeze");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
+  CheckInputsCount(node, tf_import_flags, 1);
   auto* op = new SqueezeOperator;
   op->inputs.push_back(node.input(0));
   op->outputs.push_back(node.name());
@@ -581,7 +677,7 @@ void ConvertSquareOperator(const NodeDef& node,
                            const TensorFlowImportFlags& tf_import_flags,
                            Model* model) {
   CHECK_EQ(node.op(), "Square");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
+  CheckInputsCount(node, tf_import_flags, 1);
   auto* op = new TensorFlowSquareOperator;
   op->inputs.push_back(node.input(0));
   op->outputs.push_back(node.name());
@@ -592,7 +688,7 @@ void ConvertAddOperator(const NodeDef& node,
                         const TensorFlowImportFlags& tf_import_flags,
                         Model* model) {
   CHECK_EQ(node.op(), "Add");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
+  CheckInputsCount(node, tf_import_flags, 2);
   auto* op = new AddOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -604,7 +700,7 @@ void ConvertMulOperator(const NodeDef& node,
                         const TensorFlowImportFlags& tf_import_flags,
                         Model* model) {
   CHECK_EQ(node.op(), "Mul");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
+  CheckInputsCount(node, tf_import_flags, 2);
   auto* op = new MulOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -616,7 +712,7 @@ void ConvertSubOperator(const NodeDef& node,
                         const TensorFlowImportFlags& tf_import_flags,
                         Model* model) {
   CHECK_EQ(node.op(), "Sub");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
+  CheckInputsCount(node, tf_import_flags, 2);
   auto* op = new SubOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -628,7 +724,7 @@ void ConvertSumOperator(const NodeDef& node,
                         const TensorFlowImportFlags& tf_import_flags,
                         Model* model) {
   CHECK_EQ(node.op(), "Sum");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
+  CheckInputsCount(node, tf_import_flags, 2);
   auto* op = new TensorFlowSumOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -643,7 +739,7 @@ void ConvertTileOperator(const NodeDef& node,
                          const TensorFlowImportFlags& tf_import_flags,
                          Model* model) {
   CHECK_EQ(node.op(), "Tile");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
+  CheckInputsCount(node, tf_import_flags, 2);
   auto* op = new TensorFlowTileOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -655,7 +751,7 @@ void ConvertSliceOperator(const NodeDef& node,
                           const TensorFlowImportFlags& tf_import_flags,
                           Model* model) {
   CHECK_EQ(node.op(), "Slice");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 3);
+  CheckInputsCount(node, tf_import_flags, 3);
   auto* op = new SliceOperator;
   for (int i = 0; i < 3; ++i) {
     op->inputs.push_back(node.input(i));
@@ -668,7 +764,7 @@ void ConvertPadOperator(const NodeDef& node,
                         const TensorFlowImportFlags& tf_import_flags,
                         Model* model) {
   CHECK_EQ(node.op(), "Pad");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
+  CheckInputsCount(node, tf_import_flags, 2);
   auto* op = new PadOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -680,7 +776,7 @@ void ConvertShapeOperator(const NodeDef& node,
                           const TensorFlowImportFlags& tf_import_flags,
                           Model* model) {
   CHECK_EQ(node.op(), "Shape");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
+  CheckInputsCount(node, tf_import_flags, 1);
   auto* op = new TensorFlowShapeOperator;
   op->inputs.push_back(node.input(0));
   op->outputs.push_back(node.name());
@@ -691,7 +787,7 @@ void ConvertSplitOperator(const NodeDef& node,
                           const TensorFlowImportFlags& tf_import_flags,
                           Model* model) {
   CHECK_EQ(node.op(), "Split");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
+  CheckInputsCount(node, tf_import_flags, 2);
   auto* op = new TensorFlowSplitOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -708,7 +804,7 @@ void ConvertMergeOperator(const NodeDef& node,
                           const TensorFlowImportFlags& tf_import_flags,
                           Model* model) {
   CHECK_EQ(node.op(), "Merge");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
+  CheckInputsCount(node, tf_import_flags, 2);
   auto* op = new TensorFlowMergeOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -720,7 +816,7 @@ void ConvertSwitchOperator(const NodeDef& node,
                            const TensorFlowImportFlags& tf_import_flags,
                            Model* model) {
   CHECK_EQ(node.op(), "Switch");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
+  CheckInputsCount(node, tf_import_flags, 2);
   auto* op = new TensorFlowSwitchOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -733,7 +829,7 @@ void ConvertSoftmaxOperator(const NodeDef& node,
                             const TensorFlowImportFlags& tf_import_flags,
                             Model* model) {
   CHECK_EQ(node.op(), "Softmax");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
+  CheckInputsCount(node, tf_import_flags, 1);
   const auto& input_name = node.input(0);
   auto* softmax = new SoftmaxOperator;
   softmax->inputs.push_back(input_name);
@@ -748,7 +844,7 @@ void ConvertLRNOperator(const NodeDef& node,
                         const TensorFlowImportFlags& tf_import_flags,
                         Model* model) {
   CHECK_EQ(node.op(), "LRN");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
+  CheckInputsCount(node, tf_import_flags, 1);
   const auto& input_name = node.input(0);
   auto* lrn = new LocalResponseNormalizationOperator;
   lrn->inputs.push_back(input_name);
@@ -764,7 +860,7 @@ void ConvertMaxPoolOperator(const NodeDef& node,
                             const TensorFlowImportFlags& tf_import_flags,
                             Model* model) {
   CHECK_EQ(node.op(), "MaxPool");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
+  CheckInputsCount(node, tf_import_flags, 1);
   const auto& input_name = node.input(0);
   // We only support NHWC, which is the default data_format.
   // So if data_format is not defined, we're all good.
@@ -806,7 +902,7 @@ void ConvertAvgPoolOperator(const NodeDef& node,
                             const TensorFlowImportFlags& tf_import_flags,
                             Model* model) {
   CHECK_EQ(node.op(), "AvgPool");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
+  CheckInputsCount(node, tf_import_flags, 1);
   const auto& input_name = node.input(0);
   // We only support NHWC, which is the default data_format.
   // So if data_format is not defined, we're all good.
@@ -844,7 +940,7 @@ void ConvertReshapeOperator(const NodeDef& node,
                             const TensorFlowImportFlags& tf_import_flags,
                             Model* model) {
   CHECK_EQ(node.op(), "Reshape");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
+  CheckInputsCount(node, tf_import_flags, 2);
   auto* op = new TensorFlowReshapeOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -855,12 +951,24 @@ void ConvertReshapeOperator(const NodeDef& node,
 void ConvertMatMulOperator(const NodeDef& node,
                            const TensorFlowImportFlags& tf_import_flags,
                            Model* model) {
-  CHECK_EQ(node.op(), "MatMul");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
-  // Transpose flags should be easy to support, but we don't have a
-  // GraphDef with them to test on at the moment.
-  CHECK_EQ(GetBoolAttr(node, "transpose_a"), false);
-  CHECK_EQ(GetBoolAttr(node, "transpose_b"), false);
+  CheckInputsCount(node, tf_import_flags, 2);
+  if (node.op() == "MatMul") {
+    // Transpose flags should be easy to support, but we don't have a
+    // GraphDef with them to test on at the moment.
+    CHECK_EQ(GetBoolAttr(node, "transpose_a"), false);
+    CHECK_EQ(GetBoolAttr(node, "transpose_b"), false);
+    CHECK(!HasAttr(node, "adjoint_a") ||
+          (GetBoolAttr(node, "adjoint_a") == false));
+    CHECK(!HasAttr(node, "adjoint_b") ||
+          (GetBoolAttr(node, "adjoint_b") == false));
+  } else if (node.op() == "BatchMatMul") {
+    // https://www.tensorflow.org/versions/r0.12/api_docs/python/math_ops/matrix_math_functions
+    CHECK(!HasAttr(node, "adj_a") || (GetBoolAttr(node, "adj_a") == false));
+    CHECK(!HasAttr(node, "adj_b") || (GetBoolAttr(node, "adj_b") == false));
+  } else {
+    LOG(FATAL) << "op must be 'MatMul' or 'BatchMatMul'";
+  }
+
   const auto& input_name = node.input(0);
   const auto& weights_name = node.input(1);
   const auto& reordered_weights_name = weights_name + "_reordered";
@@ -899,7 +1007,10 @@ void ConvertConcatOperator(const NodeDef& node,
     LOG(FATAL) << "Expected Concat or ConcatV2";
   }
   const int num_inputs = GetInputsCount(node, tf_import_flags);
-  CHECK_GE(num_inputs, 2);
+  QCHECK_GE(num_inputs, 2)
+      << node.op()
+      << " node expects at least 2 inputs other than control dependencies: "
+      << node.DebugString();
   CHECK_EQ(num_inputs, 1 + GetIntAttr(node, "N"));
   for (int i = 0; i < num_inputs; ++i) {
     op->inputs.push_back(node.input(i));
@@ -990,7 +1101,7 @@ void ConvertMaxOperator(const NodeDef& node,
                         const TensorFlowImportFlags& tf_import_flags,
                         Model* model) {
   CHECK_EQ(node.op(), "Max");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
+  CheckInputsCount(node, tf_import_flags, 2);
   auto* op = new TensorFlowMaxOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -1005,7 +1116,7 @@ void ConvertMinOperator(const NodeDef& node,
                         const TensorFlowImportFlags& tf_import_flags,
                         Model* model) {
   CHECK_EQ(node.op(), "Min");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
+  CheckInputsCount(node, tf_import_flags, 2);
   auto* op = new TensorFlowMinOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -1020,7 +1131,7 @@ void ConvertMaximumOperator(const NodeDef& node,
                             const TensorFlowImportFlags& tf_import_flags,
                             Model* model) {
   CHECK_EQ(node.op(), "Maximum");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
+  CheckInputsCount(node, tf_import_flags, 2);
   auto* op = new TensorFlowMaximumOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -1032,7 +1143,7 @@ void ConvertMinimumOperator(const NodeDef& node,
                             const TensorFlowImportFlags& tf_import_flags,
                             Model* model) {
   CHECK_EQ(node.op(), "Minimum");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
+  CheckInputsCount(node, tf_import_flags, 2);
   auto* op = new TensorFlowMinimumOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -1068,22 +1179,7 @@ void ConvertStridedSliceOperator(const NodeDef& node,
                                  const TensorFlowImportFlags& tf_import_flags,
                                  Model* model) {
   CHECK_EQ(node.op(), "StridedSlice");
-  CHECK_EQ(node.input_size(), 4);
-
-  // Only a subset of the full TF op functionality is supported now.
-  if (  // No 64-bit indices.
-      GetDataTypeAttr(node, "Index") != DT_INT32 ||
-      // No dimensionality changes.
-      GetIntAttr(node, "new_axis_mask") != 0 ||
-      GetIntAttr(node, "shrink_axis_mask") != 0 ||
-      // No sparse indices.
-      GetIntAttr(node, "ellipsis_mask") != 0 ||
-      // Only 4D tensors are supported.
-      GetIntAttr(node, "begin_mask") > 15 ||
-      GetIntAttr(node, "end_mask") > 15) {
-    ConvertUnsupportedOperator(node, tf_import_flags, model);
-    return;
-  }
+  CheckInputsCount(node, tf_import_flags, 4);
 
   auto* op = new StridedSliceOperator;
   for (const auto& input : node.input()) {
@@ -1104,7 +1200,7 @@ void ConvertPlaceholderOperator(const NodeDef& node,
                                 Model* model) {
   CHECK(node.op() == "Placeholder" || node.op() == "LegacyFedInput");
   if (node.op() == "Placeholder") {
-    CHECK_EQ(GetInputsCount(node, tf_import_flags), 0);
+    CheckInputsCount(node, tf_import_flags, 0);
   }
   auto& array = model->GetOrCreateArray(node.name());
   if (node.attr().count("dtype")) {
@@ -1135,34 +1231,16 @@ void ConvertNoOpOperator(const NodeDef& node,
                          const TensorFlowImportFlags& tf_import_flags,
                          Model* model) {}
 
-ArrayDataType GetArrayDataType(tensorflow::DataType tf_data_type) {
-  if (tf_data_type == DT_UINT8) {
-    return ArrayDataType::kUint8;
-  } else if (tf_data_type == DT_INT32) {
-    return ArrayDataType::kInt32;
-  } else if (tf_data_type == DT_FLOAT) {
-    return ArrayDataType::kFloat;
-  } else {
-    return ArrayDataType::kNone;
-  }
-}
-
 void ConvertCastOperator(const NodeDef& node,
                          const TensorFlowImportFlags& tf_import_flags,
                          Model* model) {
   CHECK_EQ(node.op(), "Cast");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
+  CheckInputsCount(node, tf_import_flags, 1);
   const auto tf_src_dtype = GetDataTypeAttr(node, "SrcT");
   const auto tf_dst_dtype = GetDataTypeAttr(node, "DstT");
-  CHECK(tf_src_dtype == DT_UINT8 || tf_src_dtype == DT_INT32 ||
-        tf_src_dtype == DT_FLOAT);
-  CHECK(tf_dst_dtype == DT_UINT8 || tf_dst_dtype == DT_INT32 ||
-        tf_dst_dtype == DT_FLOAT);
-  CHECK_NE(tf_src_dtype, tf_dst_dtype)
-      << "Same input and output data type. No need to cast.";
   auto* op = new CastOperator;
-  op->src_data_type = GetArrayDataType(tf_src_dtype);
-  op->dst_data_type = GetArrayDataType(tf_dst_dtype);
+  op->src_data_type = ConvertDataType(tf_src_dtype);
+  op->dst_data_type = ConvertDataType(tf_dst_dtype);
   op->inputs.push_back(node.input(0));
   op->outputs.push_back(node.name());
   model->operators.emplace_back(op);
@@ -1172,7 +1250,7 @@ void ConvertFloorOperator(const NodeDef& node,
                           const TensorFlowImportFlags& tf_import_flags,
                           Model* model) {
   CHECK_EQ(node.op(), "Floor");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 1);
+  CheckInputsCount(node, tf_import_flags, 1);
   const auto data_type = GetDataTypeAttr(node, "T");
   CHECK(data_type == DT_FLOAT);
   auto* op = new FloorOperator;
@@ -1185,9 +1263,9 @@ void ConvertGatherOperator(const NodeDef& node,
                            const TensorFlowImportFlags& tf_import_flags,
                            Model* model) {
   CHECK_EQ(node.op(), "Gather");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
+  CheckInputsCount(node, tf_import_flags, 2);
   const auto indices_data_type = GetDataTypeAttr(node, "Tindices");
-  CHECK(indices_data_type == DT_INT32);
+  CHECK(indices_data_type == DT_INT32 || indices_data_type == DT_INT64);
   auto* op = new GatherOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -1195,11 +1273,28 @@ void ConvertGatherOperator(const NodeDef& node,
   model->operators.emplace_back(op);
 }
 
+void ConvertArgMaxOperator(const NodeDef& node,
+                           const TensorFlowImportFlags& tf_import_flags,
+                           Model* model) {
+  CHECK_EQ(node.op(), "ArgMax");
+  CheckInputsCount(node, tf_import_flags, 2);
+  const auto axis_data_type = GetDataTypeAttr(node, "Tidx");
+  const auto output_type = GetDataTypeAttr(node, "output_type");
+  CHECK(axis_data_type == DT_INT64 || axis_data_type == DT_INT32);
+  CHECK(output_type == DT_INT64 || output_type == DT_INT32);
+  auto* op = new ArgMaxOperator;
+  op->output_data_type = ConvertDataType(output_type);
+  op->inputs.push_back(node.input(0));
+  op->inputs.push_back(node.input(1));
+  op->outputs.push_back(node.name());
+  model->operators.emplace_back(op);
+}
+
 void ConvertResizeBilinearOperator(const NodeDef& node,
                                    const TensorFlowImportFlags& tf_import_flags,
                                    Model* model) {
   CHECK_EQ(node.op(), "ResizeBilinear");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 2);
+  CheckInputsCount(node, tf_import_flags, 2);
   auto* op = new ResizeBilinearOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -1211,7 +1306,7 @@ void ConvertBatchNormWithGlobalNormalizationOperator(
     const NodeDef& node, const TensorFlowImportFlags& tf_import_flags,
     Model* model) {
   CHECK_EQ(node.op(), "BatchNormWithGlobalNormalization");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 5);
+  CheckInputsCount(node, tf_import_flags, 5);
 
   // TODO(ahentz): to really match tensorflow we need to add variance_epsilon
   // to the input, before feeding it into TensorFlowRsqrtOperator.
@@ -1260,7 +1355,7 @@ void ConvertFusedBatchNormOperator(const NodeDef& node,
                                    const TensorFlowImportFlags& tf_import_flags,
                                    Model* model) {
   CHECK_EQ(node.op(), "FusedBatchNorm");
-  CHECK_EQ(node.input_size(), 5);
+  CheckInputsCount(node, tf_import_flags, 5);
 
   // Declare shortcuts for the inputs.
   const string& gamma_input = node.input(1);
@@ -1316,7 +1411,7 @@ void ConvertSpaceToBatchNDOperator(const NodeDef& node,
                                    const TensorFlowImportFlags& tf_import_flags,
                                    Model* model) {
   CHECK_EQ(node.op(), "SpaceToBatchND");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 3);
+  CheckInputsCount(node, tf_import_flags, 3);
   CHECK_EQ(GetDataTypeAttr(node, "Tblock_shape"), DT_INT32);
   CHECK_EQ(GetDataTypeAttr(node, "Tpaddings"), DT_INT32);
   auto* op = new SpaceToBatchNDOperator;
@@ -1331,7 +1426,7 @@ void ConvertBatchToSpaceNDOperator(const NodeDef& node,
                                    const TensorFlowImportFlags& tf_import_flags,
                                    Model* model) {
   CHECK_EQ(node.op(), "BatchToSpaceND");
-  CHECK_EQ(GetInputsCount(node, tf_import_flags), 3);
+  CheckInputsCount(node, tf_import_flags, 3);
   CHECK_EQ(GetDataTypeAttr(node, "Tblock_shape"), DT_INT32);
   CHECK_EQ(GetDataTypeAttr(node, "Tcrops"), DT_INT32);
   auto* op = new BatchToSpaceNDOperator;
@@ -1346,7 +1441,7 @@ void ConvertMeanOperator(const NodeDef& node,
                          const TensorFlowImportFlags& tf_import_flags,
                          Model* model) {
   CHECK_EQ(node.op(), "Mean");
-  CHECK_EQ(node.input_size(), 2);
+  CheckInputsCount(node, tf_import_flags, 2);
   auto* op = new MeanOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -1361,7 +1456,11 @@ void ConvertSvdfOperator(const NodeDef& node,
                          const TensorFlowImportFlags& tf_import_flags,
                          Model* model) {
   CHECK_EQ(node.op(), "Svdf");
-  bool has_bias = (node.input_size() == 4);
+  const int input_size = GetInputsCount(node, tf_import_flags);
+  QCHECK(input_size == 3 || input_size == 4)
+      << "Svdf node expects 3 or 4 inputs other than control dependencies: "
+      << node.DebugString();
+  bool has_bias = (input_size == 4);
   auto* op = new SvdfOperator;
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
@@ -1380,6 +1479,170 @@ void ConvertSvdfOperator(const NodeDef& node,
   model->operators.emplace_back(op);
 }
 
+// This is just bare bones support to get the shapes to propagate.
+void ConvertTransposeConvOperator(const NodeDef& node,
+                                  const TensorFlowImportFlags& tf_import_flags,
+                                  Model* model) {
+  CHECK_EQ(node.op(), "Conv2DBackpropInput");
+  CheckInputsCount(node, tf_import_flags, 3);
+  auto* op = new TransposeConvOperator;
+  op->inputs.push_back(node.input(2));
+  op->inputs.push_back(node.input(1));
+  op->inputs.push_back(node.input(0));
+  op->outputs.push_back(node.name());
+  const auto& strides = GetListAttr(node, "strides");
+  CHECK_EQ(strides.i_size(), 4);
+  CHECK_EQ(strides.i(0), 1);
+  op->stride_height = strides.i(1);
+  op->stride_width = strides.i(2);
+  CHECK_EQ(strides.i(3), 1);
+  auto const& padding = GetStringAttr(node, "padding");
+  if (padding == "SAME") {
+    op->padding.type = PaddingType::kSame;
+  } else if (padding == "VALID") {
+    op->padding.type = PaddingType::kValid;
+  } else {
+    LOG(FATAL) << "Only SAME and VALID padding supported on "
+                  "Conv2DBackpropInput nodes.";
+  }
+  model->operators.emplace_back(op);
+}
+
+void ConvertExpandDimsOperator(const NodeDef& node,
+                               const TensorFlowImportFlags& tf_import_flags,
+                               Model* model) {
+  CHECK_EQ(node.op(), "ExpandDims");
+  CheckInputsCount(node, tf_import_flags, 2);
+  auto* op = new ExpandDimsOperator;
+  op->inputs.push_back(node.input(0));
+  op->inputs.push_back(node.input(1));
+  op->outputs.push_back(node.name());
+  model->operators.emplace_back(op);
+}
+
+void ConvertFillOperator(const NodeDef& node,
+                         const TensorFlowImportFlags& tf_import_flags,
+                         Model* model) {
+  CHECK_EQ(node.op(), "Fill");
+  CheckInputsCount(node, tf_import_flags, 2);
+  auto* op = new FillOperator;
+  op->inputs.push_back(node.input(0));
+  op->inputs.push_back(node.input(1));
+  op->outputs.push_back(node.name());
+  model->operators.emplace_back(op);
+}
+
+void ConvertFloorDivOperator(const NodeDef& node,
+                             const TensorFlowImportFlags& tf_import_flags,
+                             Model* model) {
+  CHECK_EQ(node.op(), "FloorDiv");
+  CheckInputsCount(node, tf_import_flags, 2);
+  auto* op = new FloorDivOperator;
+  op->inputs.push_back(node.input(0));
+  op->inputs.push_back(node.input(1));
+  op->outputs.push_back(node.name());
+  model->operators.emplace_back(op);
+}
+
+void ConvertFloorModOperator(const NodeDef& node,
+                             const TensorFlowImportFlags& tf_import_flags,
+                             Model* model) {
+  CHECK(node.op() == "FloorMod");
+  CheckInputsCount(node, tf_import_flags, 2);
+  auto* op = new FloorModOperator;
+  op->inputs.push_back(node.input(0));
+  op->inputs.push_back(node.input(1));
+  op->outputs.push_back(node.name());
+  model->operators.emplace_back(op);
+}
+
+void ConvertRangeOperator(const NodeDef& node,
+                          const TensorFlowImportFlags& tf_import_flags,
+                          Model* model) {
+  CHECK_EQ(node.op(), "Range");
+  CheckInputsCount(node, tf_import_flags, 3);
+  auto* op = new RangeOperator;
+  if (HasAttr(node, "Tidx")) {
+    const auto dtype = toco::GetDataTypeAttr(node, "Tidx");
+    CHECK(dtype == DT_UINT8 || dtype == DT_INT32 || dtype == DT_INT64 ||
+          dtype == DT_FLOAT);
+    op->dtype = ConvertDataType(dtype);
+  }
+  op->inputs.push_back(node.input(0));
+  op->inputs.push_back(node.input(1));
+  op->inputs.push_back(node.input(2));
+  op->outputs.push_back(node.name());
+  model->operators.emplace_back(op);
+}
+
+void ConvertRankOperator(const NodeDef& node,
+                         const TensorFlowImportFlags& tf_import_flags,
+                         Model* model) {
+  CHECK_EQ(node.op(), "Rank");
+  CheckInputsCount(node, tf_import_flags, 1);
+  auto* op = new RankOperator;
+  op->inputs.push_back(node.input(0));
+  op->outputs.push_back(node.name());
+  model->operators.emplace_back(op);
+}
+
+void ConvertStackOperator(const NodeDef& node,
+                          const TensorFlowImportFlags& tf_import_flags,
+                          Model* model) {
+  CHECK((node.op() == "Stack") || (node.op() == "Pack"));
+  auto* op = new StackOperator;
+  const int num_inputs = GetInputsCount(node, tf_import_flags);
+  QCHECK_GE(num_inputs, 1)
+      << node.op()
+      << " node expects at least 1 input other than control dependencies: "
+      << node.DebugString();
+  CHECK_EQ(num_inputs, GetIntAttr(node, "N"));
+  for (int i = 0; i < num_inputs; ++i) {
+    op->inputs.push_back(node.input(i));
+  }
+  // Both "Stack" and "Pack" have the "axis" attribute.
+  op->axis = GetIntAttr(node, "axis");
+  op->outputs.push_back(node.name());
+  model->operators.emplace_back(op);
+}
+
+void ConvertTransposeOperator(const NodeDef& node,
+                              const TensorFlowImportFlags& tf_import_flags,
+                              Model* model) {
+  CHECK_EQ(node.op(), "Transpose");
+  CheckInputsCount(node, tf_import_flags, 2);
+  auto* op = new TransposeOperator;
+  op->inputs.push_back(node.input(0));
+  op->inputs.push_back(node.input(1));
+  op->outputs.push_back(node.name());
+  model->operators.emplace_back(op);
+}
+
+// Some TensorFlow ops only occur in graph cycles, representing
+// control flow. We do not currently support control flow, so we wouldn't
+// be able to fully support such graphs, including performing inference,
+// anyway. However, rather than erroring out early on graphs being cyclic,
+// it helps to at least support these just enough to allow getting a
+// graph visualization. This is not trivial, as we require graphs to be
+// acyclic aside from RNN back-edges. The solution is to special-case
+// such ops as RNN back-edges, which is technically incorrect (does not
+// allow representing the op's semantics) but good enough to get a
+// graph visualization.
+void ConvertOperatorSpecialCasedAsRNNBackEdge(
+    const NodeDef& node, const TensorFlowImportFlags& tf_import_flags,
+    Model* model) {
+  // At the moment, the only type of operator special-cased in this way is
+  // NextIteration, occurring only in control-flow cycles.
+  CHECK_EQ(node.op(), "NextIteration");
+  CHECK_EQ(node.input_size(), 1);
+  auto* rnn_state = model->flags.add_rnn_states();
+  // This RNN state is not explicitly created by the user, so it's
+  // OK for some later graph transformation to discard it.
+  rnn_state->set_discardable(true);
+  rnn_state->set_state_array(node.name());
+  rnn_state->set_back_edge_source_array(node.input(0));
+}
+
 void StripCaretFromArrayNames(Model* model) {
   for (auto& op : model->operators) {
     for (auto& input : op->inputs) {
@@ -1402,26 +1665,61 @@ void StripZeroOutputIndexFromInputs(NodeDef* node) {
   }
 }
 
-void AddExtraOutputsFedIntoOtherOps(Model* model) {
+// In TensorFlow GraphDef, when a node has multiple outputs, they are named
+// name:0, name:1, ...
+// where 'name' is the node's name(). Just 'name' is an equivalent shorthand
+// form for name:0.
+// A TensorFlow GraphDef does not explicitly list all the outputs of each node
+// (unlike inputs), it being implied by the node's name and operator type
+// (the latter implies the number of outputs).
+// This makes it non-trivial for us to reconstruct the list of all arrays
+// present in the graph and, for each operator, the list of its outputs.
+// We do that by taking advantage of the fact that
+// at least each node lists explicitly its inputs, so after we've loaded
+// all nodes, we can use that information.
+void AddExtraOutputs(Model* model) {
+  // Construct the list of all arrays consumed by anything in the graph.
+  std::vector<string> consumed_arrays;
+  // Add arrays consumed by an op.
   for (const auto& consumer_op : model->operators) {
     for (const string& input : consumer_op->inputs) {
-      const std::vector<string>& split = absl::StrSplit(input, ':');
-      if (split.size() != 2) {
-        continue;
-      }
-      int output_index = 0;
-      if (!absl::SimpleAtoi(split[1], &output_index)) {
-        continue;
-      }
-      auto* producer_op = GetOpWithOutput(*model, split[0]);
-      if (!producer_op) {
-        continue;
-      }
-      while (producer_op->outputs.size() <= output_index) {
-        using toco::port::StringF;
-        producer_op->outputs.push_back(
-            StringF("%s:%d", split[0], producer_op->outputs.size()));
-      }
+      consumed_arrays.push_back(input);
+    }
+  }
+  // Add global outputs of the model.
+  for (const string& output_array : model->flags.output_arrays()) {
+    consumed_arrays.push_back(output_array);
+  }
+  // Add arrays consumed by a RNN back-edge.
+  for (const auto& rnn_state : model->flags.rnn_states()) {
+    consumed_arrays.push_back(rnn_state.back_edge_source_array());
+  }
+  // Now add operator outputs so that all arrays that are consumed,
+  // are produced.
+  for (const string& consumed_array : consumed_arrays) {
+    // Split the consumed array name into the form name:output_index.
+    const std::vector<string>& split = absl::StrSplit(consumed_array, ':');
+    // If not of the form name:output_index, then this is not an additional
+    // output of a node with multiple outputs, so nothing to do here.
+    if (split.size() != 2) {
+      continue;
+    }
+    int output_index = 0;
+    if (!absl::SimpleAtoi(split[1], &output_index)) {
+      continue;
+    }
+    // Each op is initially recorded as producing at least the array that
+    // has its name. We use that to identify the producer node.
+    auto* producer_op = GetOpWithOutput(*model, split[0]);
+    if (!producer_op) {
+      continue;
+    }
+    // Add extra outputs to that producer node, all the way to the
+    // output_index.
+    while (producer_op->outputs.size() <= output_index) {
+      using toco::port::StringF;
+      producer_op->outputs.push_back(
+          StringF("%s:%d", split[0], producer_op->outputs.size()));
     }
   }
 }
@@ -1461,11 +1759,12 @@ bool InlineAllFunctions(GraphDef* graphdef) {
   flr = pflr.GetFLR("/job:localhost/replica:0/task:0/cpu:0");
 
   tensorflow::Graph graph(fld);
-  tensorflow::GraphConstructorOptions gc_opts;
-  const auto& tf_convert_status =
-      tensorflow::ConvertGraphDefToGraph(gc_opts, graphdef_copy, &graph);
+  tensorflow::ImportGraphDefOptions gc_opts;
+  gc_opts.validate_shape = false;
+  const auto& tf_convert_status = tensorflow::ImportGraphDef(
+      gc_opts, graphdef_copy, &graph, nullptr, nullptr);
   if (!tf_convert_status.ok()) {
-    LOG(ERROR) << "tensorflow::ConvertGraphDefToGraph failed with status: "
+    LOG(ERROR) << "tensorflow::ImportGraphDef failed with status: "
                << tf_convert_status.ToString();
     return false;
   }
@@ -1514,6 +1813,8 @@ std::unique_ptr<Model> ImportTensorFlowGraphDef(
       ConvertConstOperator(node, tf_import_flags, model);
     } else if (node.op() == "Conv2D") {
       ConvertConvOperator(node, tf_import_flags, model);
+    } else if (node.op() == "Conv2DBackpropInput") {
+      ConvertTransposeConvOperator(node, tf_import_flags, model);
     } else if (node.op() == "DepthwiseConv2dNative") {
       ConvertDepthwiseConvOperator(node, tf_import_flags, model);
     } else if (node.op() == "DepthToSpace") {
@@ -1536,16 +1837,19 @@ std::unique_ptr<Model> ImportTensorFlowGraphDef(
       ConvertAvgPoolOperator(node, tf_import_flags, model);
     } else if (node.op() == "Reshape") {
       ConvertReshapeOperator(node, tf_import_flags, model);
-    } else if (node.op() == "MatMul") {
+    } else if (node.op() == "MatMul" || node.op() == "BatchMatMul") {
       ConvertMatMulOperator(node, tf_import_flags, model);
     } else if (node.op() == "Div" || node.op() == "RealDiv") {
       ConvertDivOperator(node, tf_import_flags, model);
-    } else if (node.op() == "Identity" || node.op() == "CheckNumerics") {
+    } else if (node.op() == "Identity" || node.op() == "CheckNumerics" ||
+               node.op() == "StopGradient") {
       ConvertIdentityOperator(node, tf_import_flags, model);
     } else if (node.op() == "FakeQuantWithMinMaxVars") {
       ConvertFakeQuantWithMinMaxVars(node, tf_import_flags, model);
     } else if (node.op() == "FakeQuantWithMinMaxArgs") {
       ConvertFakeQuantWithMinMaxArgs(node, tf_import_flags, model);
+    } else if (node.op() == "Neg") {
+      ConvertNegOperator(node, tf_import_flags, model);
     } else if (node.op() == "Rsqrt") {
       ConvertRsqrtOperator(node, tf_import_flags, model);
     } else if (node.op() == "Squeeze") {
@@ -1633,6 +1937,26 @@ std::unique_ptr<Model> ImportTensorFlowGraphDef(
       ConvertMeanOperator(node, tf_import_flags, model);
     } else if (node.op() == "Svdf") {
       ConvertSvdfOperator(node, tf_import_flags, model);
+    } else if (node.op() == "NextIteration") {
+      ConvertOperatorSpecialCasedAsRNNBackEdge(node, tf_import_flags, model);
+    } else if (node.op() == "ExpandDims") {
+      ConvertExpandDimsOperator(node, tf_import_flags, model);
+    } else if (node.op() == "Fill") {
+      ConvertFillOperator(node, tf_import_flags, model);
+    } else if (node.op() == "FloorDiv") {
+      ConvertFloorDivOperator(node, tf_import_flags, model);
+    } else if (node.op() == "FloorMod") {
+      ConvertFloorModOperator(node, tf_import_flags, model);
+    } else if (node.op() == "Range") {
+      ConvertRangeOperator(node, tf_import_flags, model);
+    } else if (node.op() == "Rank") {
+      ConvertRankOperator(node, tf_import_flags, model);
+    } else if (node.op() == "Stack" || node.op() == "Pack") {
+      ConvertStackOperator(node, tf_import_flags, model);
+    } else if (node.op() == "Transpose") {
+      ConvertTransposeOperator(node, tf_import_flags, model);
+    } else if (node.op() == "ArgMax") {
+      ConvertArgMaxOperator(node, tf_import_flags, model);
     } else {
       ConvertUnsupportedOperator(node, tf_import_flags, model);
     }
@@ -1641,7 +1965,7 @@ std::unique_ptr<Model> ImportTensorFlowGraphDef(
   ResolveModelFlags(model_flags, model);
 
   StripCaretFromArrayNames(model);
-  AddExtraOutputsFedIntoOtherOps(model);
+  AddExtraOutputs(model);
   FixNoMissingArray(model);
   FixNoOrphanedArray(model);
   FixOperatorOrdering(model);
diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h
index 04b081352340b2ba14754fd2a4fea8894d7ad4fb..7b2235e2751e1bb359195a3d69f91725a5463434 100644
--- a/tensorflow/contrib/lite/toco/model.h
+++ b/tensorflow/contrib/lite/toco/model.h
@@ -41,6 +41,10 @@ enum class OperatorType {
   kSpaceToDepth,
   kDequantize,
   kDiv,
+  kExpandDims,
+  kFill,
+  kFloorDiv,
+  kFloorMod,
   kFullyConnected,
   kL2Normalization,
   kL2Pool,
@@ -50,23 +54,28 @@ enum class OperatorType {
   kMaxPool,
   kFakeQuant,
   kMul,
+  kRange,
+  kRank,
   kRelu,
   kRelu1,
   kRelu6,
   kSoftmax,
   kSub,
   kTanh,
+  kTransposeConv,
   kCast,
   kFloor,
   kGather,
   kResizeBilinear,
   kSpaceToBatchND,
+  kStack,
   kBatchToSpaceND,
   kPad,
   kStridedSlice,
   kSlice,
   kSqueeze,
   kMean,
+  kArgMax,
   // The SVDF Op is a decomposition of a densely connected Op into
   // low rank filters. For details:
   // https://research.google.com/pubs/pub43813.html
@@ -89,6 +98,7 @@ enum class OperatorType {
   kTensorFlowMinimum,
   kTensorFlowMatMul,
   kTensorFlowMerge,
+  kNeg,
   kTensorFlowReshape,
   kTensorFlowRsqrt,
   kTensorFlowShape,
@@ -98,6 +108,7 @@ enum class OperatorType {
   kTensorFlowSum,
   kTensorFlowSwitch,
   kTensorFlowTile,
+  kTranspose,
   // An unsupported TF operation. It's only needed to be able to represent TF
   // graph internally and is expected to be dropped by graph transformations.
   kTensorFlowUnsupported,
@@ -142,7 +153,15 @@ enum class AxesOrder {
 // because we'll be dropping the array anyway (e.g. some exotic array types
 // may be involved only in debug-only subgraphs that we may not be interested
 // in actually supporting).
-enum class ArrayDataType { kNone, kBool, kFloat, kUint8, kInt32, kInt64 };
+enum class ArrayDataType {
+  kNone,
+  kBool,
+  kFloat,
+  kUint8,
+  kInt32,
+  kInt64,
+  kString
+};
 
 // Compile-time logic to map ArrayDataType to the corresponding C++ scalar type
 template <ArrayDataType A>
@@ -171,6 +190,10 @@ template <>
 struct DataTypeImpl<ArrayDataType::kInt64> {
   typedef int64 Type;
 };
+template <>
+struct DataTypeImpl<ArrayDataType::kString> {
+  typedef string Type;
+};
 
 template <ArrayDataType A>
 using DataType = typename DataTypeImpl<A>::Type;
@@ -302,6 +325,10 @@ struct ConvOperator : Operator {
   Padding padding;
   int stride_width = 0;
   int stride_height = 0;
+  // A dilation_rate of 0 is invalid and this field is an optional attribute.
+  // Thus initializing it to 1 to allow default conv behavior when the
+  // attribute is not present.
+  int dilation_rate = 1;
 };
 
 // Depthwise-separable convolution operator.
@@ -533,7 +560,7 @@ struct AddOperator : Operator {
 };
 
 // Concatenation operator: concatenates its inputs
-// along the concat_dim dimension.
+// along the axis.
 //
 // Inputs: this operator accepts any number >= 1 of inputs.
 //   inputs[i]: the i-th array to concatenate.
@@ -541,7 +568,7 @@ struct AddOperator : Operator {
 // TensorFlow equivalent: Concat.
 struct ConcatenationOperator : Operator {
   ConcatenationOperator() : Operator(OperatorType::kConcatenation) {}
-  int concat_dim = 0;
+  int axis = 0;
 };
 
 // Reordering dimensions. Used only during tooling to transform graphs from
@@ -754,6 +781,112 @@ struct SqueezeOperator : Operator {
   std::vector<int> squeeze_dims;
 };
 
+// Inputs:
+//   inputs[0]: required: the input activations array
+//   inputs[1]: required: the Conv weights
+//   channel.
+//
+// Outputs:
+//   outputs[0]: required: the output activations array
+//
+// TensorFlow equivalent: Conv2DBackpropInput
+struct TransposeConvOperator : Operator {
+  TransposeConvOperator() : Operator(OperatorType::kTransposeConv) {}
+  Padding padding;
+  int stride_width = 0;
+  int stride_height = 0;
+};
+
+// Given a tensor input, this operation inserts a dimension of 1 at the
+// dimension index axis of input's shape. The dimension index axis starts at
+// zero; if you specify a negative number for axis it is counted backward from
+// the end.
+//
+// Inputs:
+//   inputs[0]: required: input tensor
+//   inputs[1]: required: 0-D (scalar). Specifies the dimension index at which
+//   to expand the shape of input
+//
+// TensorFlow equivalent: ExpandDims
+struct ExpandDimsOperator : Operator {
+  ExpandDimsOperator() : Operator(OperatorType::kExpandDims) {}
+};
+
+// Ceates a tensor of shape dims and fills it with the given scalar value.
+// Output type will be the same as the given scalar value.
+//
+// Inputs:
+//   inputs[0]: required: 1-D (int32) - the shape of the output tensor
+//   inputs[1]: required: 0-D (scalar) - value to fill the tensor with
+//
+// TensorFlow equivalent: Fill
+struct FillOperator : Operator {
+  FillOperator() : Operator(OperatorType::kFill) {}
+};
+
+// Element-wise floor division operator.
+//
+// Inputs:
+//   inputs[0]: required: the left-hand side array
+//   inputs[1]: required: the right-hand side array
+//
+// TensorFlow equivalent: FloorDiv
+struct FloorDivOperator : Operator {
+  FloorDivOperator() : Operator(OperatorType::kFloorDiv) {}
+};
+
+// Element-wise floor mod operator.
+//
+// Inputs:
+//   inputs[0]: required: the left-hand side array
+//   inputs[1]: required: the right-hand side array
+//
+// TensorFlow equivalent: FloorMod
+struct FloorModOperator : Operator {
+  FloorModOperator() : Operator(OperatorType::kFloorMod) {}
+};
+
+// Creates a sequence of numbers that begins at start and extends by increments
+// of delta up to but not including limit.
+//
+// The dtype of the resulting tensor is inferred from the inputs unless it is
+// provided explicitly.
+//
+// Inputs:
+//   inputs[0]: required: the start
+//   inputs[1]: required: the limit
+//   inputs[2]: required: the delta
+//
+// TensorFlow equivalent: Range
+struct RangeOperator : Operator {
+  RangeOperator() : Operator(OperatorType::kRange) {}
+  ArrayDataType dtype = ArrayDataType::kNone;
+};
+
+// Rank operator. Extracts the rank of the tensor.
+//
+// Inputs:
+//   inputs[0]: required: the input array
+//
+// This operation outputs a 0-D integer tensor representing the rank of
+// the input.
+//
+// TensorFlow equivalent: Rank.  We currently assume that the output is int32
+// and not int64.  The output type could be stored herein.
+struct RankOperator : Operator {
+  RankOperator() : Operator(OperatorType::kRank) {}
+};
+
+// Element-wise negation (-x) operator.
+//
+// Inputs:
+//   inputs[0]: required: the input array
+//
+// TensorFlow equivalent: Neg
+struct NegOperator : Operator {
+  NegOperator() : Operator(OperatorType::kNeg) {}
+};
+
 // Element-wise reciprocal-square-root (x^-0.5) operator.
 //
 // Inputs:
@@ -764,6 +897,21 @@ struct TensorFlowRsqrtOperator : Operator {
   TensorFlowRsqrtOperator() : Operator(OperatorType::kTensorFlowRsqrt) {}
 };
 
+// Stacks a list of rank-R tensors into one rank-(R+1) tensor.
+//
+// Packs the list of tensors in values into a tensor with rank one higher than
+// each tensor in values, by packing them along the axis dimension. Given a list
+// of length N of tensors of shape (A, B, C);.
+//
+// Inputs: this operator accepts any number >= 1 of inputs.
+//   inputs[i]: the i-th array to merge.
+//
+// TensorFlow equivalent: Stack or Pack
+struct StackOperator : Operator {
+  StackOperator() : Operator(OperatorType::kStack) {}
+  int axis = 0;
+};
+
 // Shape operator. Extracts the shape of the tensor.
 //
 // Inputs:
@@ -798,6 +946,20 @@ struct TensorFlowSquareOperator : Operator {
   TensorFlowSquareOperator() : Operator(OperatorType::kTensorFlowSquare) {}
 };
 
+// Transposes a tensor.
+//
+// By default, this operation performs a regular matrix transpose on 2-D input
+// tensors.
+//
+// Inputs:
+//   inputs[0]: required: the input array
+//
+// TensorFlow equivalent: Transpose
+struct TransposeOperator : Operator {
+  TransposeOperator() : Operator(OperatorType::kTranspose) {}
+  std::vector<int> perm;
+};
+
 // Element-wise subtraction operator.
 //
 // Inputs:
@@ -1071,7 +1233,19 @@ struct FloorOperator : Operator {
 // TensorFlow equivalent: Gather
 struct GatherOperator : Operator {
   GatherOperator() : Operator(OperatorType::kGather) {}
-  int input_rank;
+  int axis = 0;
+  int input_rank = 0;
+};
+
+// ArgMax operator. It returns the index of the maximum value along axis.
+//
+// Inputs:
+//   inputs[0]: required: the input tensor
+//
+// TensorFlow equivalent: ArgMax
+struct ArgMaxOperator : Operator {
+  ArgMaxOperator() : Operator(OperatorType::kArgMax) {}
+  ArrayDataType output_data_type = ArrayDataType::kInt64;
 };
 
 // ResizeBilinear operator. It resizes input images with bilinear interpolation.
@@ -1098,6 +1272,10 @@ struct ResizeBilinearOperator : Operator {
 // TensorFlow equivalent: SpaceToBatchND
 struct SpaceToBatchNDOperator : Operator {
   SpaceToBatchNDOperator() : Operator(OperatorType::kSpaceToBatchND) {}
+
+  std::vector<int> block_shape;
+  std::vector<int> before_paddings;
+  std::vector<int> after_paddings;
 };
 
 // BatchToSpaceND operator. Rearranges data from batch into blocks of
@@ -1112,6 +1290,10 @@ struct SpaceToBatchNDOperator : Operator {
 // TensorFlow equivalent: BatchToSpaceND
 struct BatchToSpaceNDOperator : Operator {
   BatchToSpaceNDOperator() : Operator(OperatorType::kBatchToSpaceND) {}
+
+  std::vector<int> block_shape;
+  std::vector<int> before_crops;
+  std::vector<int> after_crops;
 };
 
 // Mean operator.
@@ -1123,7 +1305,7 @@ struct BatchToSpaceNDOperator : Operator {
 struct MeanOperator : Operator {
   MeanOperator() : Operator(OperatorType::kMean) {}
 
-  std::vector<int> reduction_indices;
+  std::vector<int> axis;
   bool keep_dims = false;
 };
 
diff --git a/tensorflow/contrib/lite/toco/model_cmdline_flags.cc b/tensorflow/contrib/lite/toco/model_cmdline_flags.cc
index dde602e1868dc865ae7b37e7fa11985f013450de..790b3443cef1c577e19bafc5e087ca42e6fce60a 100644
--- a/tensorflow/contrib/lite/toco/model_cmdline_flags.cc
+++ b/tensorflow/contrib/lite/toco/model_cmdline_flags.cc
@@ -17,7 +17,6 @@ limitations under the License.
 #include <string>
 #include <vector>
 
-#include "absl/strings/ascii.h"
 #include "absl/strings/numbers.h"
 #include "absl/strings/str_join.h"
 #include "absl/strings/str_split.h"
@@ -28,6 +27,7 @@ limitations under the License.
 #include "tensorflow/contrib/lite/toco/toco_port.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/util/command_line_flags.h"
+
 // "batch" flag only exists internally
 #ifdef PLATFORM_GOOGLE
 #include "base/commandlineflags.h"
@@ -134,6 +134,20 @@ bool ParseModelFlagsFromCommandLineFlags(
            parsed_flags.dump_graphviz_video.default_value(),
            "If true, will dump graphviz at each "
            "graph transformation, which may be used to generate a video."),
+      Flag("allow_nonexistent_arrays",
+           parsed_flags.allow_nonexistent_arrays.bind(),
+           parsed_flags.allow_nonexistent_arrays.default_value(),
+           "If true, will allow passing inexistent arrays in --input_arrays "
+           "and --output_arrays. This makes little sense, is only useful to "
+           "more easily get graph visualizations."),
+      Flag("allow_nonascii_arrays", parsed_flags.allow_nonascii_arrays.bind(),
+           parsed_flags.allow_nonascii_arrays.default_value(),
+           "If true, will allow passing non-ascii-printable characters in "
+           "--input_arrays and --output_arrays. By default (if false), only "
+           "ascii printable characters are allowed, i.e. character codes "
+           "ranging from 32 to 127. This is disallowed by default so as to "
+           "catch common copy-and-paste issues where invisible unicode "
+           "characters are unwittingly added to these strings."),
   };
   bool asked_for_help =
       *argc == 2 && (!strcmp(argv[1], "--help") || !strcmp(argv[1], "-help"));
@@ -265,10 +279,10 @@ void ReadModelFlagsFromCommandLineFlags(
       model_flags->add_input_arrays();
     }
     auto* shape = model_flags->mutable_input_arrays(0)->mutable_shape();
-    shape->Clear();
+    shape->clear_dims();
     const IntList& list = parsed_model_flags.input_shape.value();
     for (auto& dim : list.elements) {
-      shape->Add(dim);
+      shape->add_dims(dim);
     }
   }
   if (parsed_model_flags.input_shapes.specified()) {
@@ -278,25 +292,12 @@ void ReadModelFlagsFromCommandLineFlags(
     QCHECK(input_shapes.size() == model_flags->input_arrays_size());
     for (int i = 0; i < input_shapes.size(); ++i) {
       auto* shape = model_flags->mutable_input_arrays(i)->mutable_shape();
-      shape->Clear();
-      if (input_shapes[i].empty()) {
-        // empty i.e. 0-dimensional input shape.
-        // Unfortunately, the current toco::InputArray
-        // proto does not allow to distinguish between a known 0-D shape,
-        // and an unknown shape. Indeed, shape is currently a plain array,
-        // and it being empty means unknown shape. So here, we import a
-        // 0-D shape as a 1-D shape of size.
-        // TODO(benoitjacob): fix toco::InputArray to allow 0-D shape,
-        // probably by making shape an optional message,
-        // encapsulating the array.
-        shape->Add(1);
-      } else {
-        for (const auto& dim_str : absl::StrSplit(input_shapes[i], ',')) {
-          int size;
-          CHECK(absl::SimpleAtoi(dim_str, &size))
-              << "Failed to parse input_shape: " << input_shapes[i];
-          shape->Add(size);
-        }
+      shape->clear_dims();
+      for (const auto& dim_str : absl::StrSplit(input_shapes[i], ',')) {
+        int size;
+        CHECK(absl::SimpleAtoi(dim_str, &size))
+            << "Failed to parse input_shape: " << input_shapes[i];
+        shape->add_dims(size);
       }
     }
   }
@@ -362,6 +363,11 @@ void ReadModelFlagsFromCommandLineFlags(
       }
     }
   }
+
+  model_flags->set_allow_nonascii_arrays(
+      parsed_model_flags.allow_nonascii_arrays.value());
+  model_flags->set_allow_nonexistent_arrays(
+      parsed_model_flags.allow_nonexistent_arrays.value());
 }
 
 ParsedModelFlags* UncheckedGlobalParsedModelFlags(bool must_already_exist) {
diff --git a/tensorflow/contrib/lite/toco/model_flags.proto b/tensorflow/contrib/lite/toco/model_flags.proto
index 5b30904696b5cd71d3acfdeaee3af901c6bee884..13fea29a07ed9ea75ebe1b9b046f2a68d814c649 100644
--- a/tensorflow/contrib/lite/toco/model_flags.proto
+++ b/tensorflow/contrib/lite/toco/model_flags.proto
@@ -16,7 +16,11 @@ import "tensorflow/contrib/lite/toco/types.proto";
 
 package toco;
 
-// Next ID to USE: 6.
+message InputArrayShape {
+  repeated int32 dims = 2;
+}
+
+// Next ID to USE: 7.
 message InputArray {
   // Name of the input arrays, i.e. the arrays from which input activations
   // will be read.
@@ -28,7 +32,7 @@ message InputArray {
   //
   // The last dimension is typically called 'depth' or 'channels'. For example,
   // for an image model taking RGB images as input, this would have the value 3.
-  repeated int32 shape = 2;
+  optional InputArrayShape shape = 6;
 
   // mean_value and std_value parameters control the interpretation of raw input
   // activation values (elements of the input array) as real numbers. The
@@ -73,6 +77,25 @@ message InputArray {
   optional IODataType data_type = 5;
 }
 
+message RnnState {
+  optional string state_array = 1;
+  optional string back_edge_source_array = 2;
+  optional bool discardable = 5;
+  // TODO(benoitjacob): drop the 'size' field. Should be redundant with
+  // --input_shapes and shapes propagation.
+  optional int32 size = 3;
+  // TODO(benoitjacob): manually_create is a temporary hack:
+  // due to discrepancies between the current toco dims tracking and
+  // TensorFlow shapes, for some models we need to manually create RNN state
+  // arrays with a specified shape.
+  // Maybe we should actually implement back-edges as operators of their own,
+  // which would remove the need for much special-casing, including here,
+  // we could probably consistently let PropagateFixedSizes handle state
+  // arrays.
+  // TODO(benoitjacob): should really drop manually_create now.
+  optional bool manually_create = 4;
+}
+
 // ModelFlags encodes properties of a model that, depending on the file
 // format, may or may not be recorded in the model file. The purpose of
 // representing these properties in ModelFlags is to allow passing them
@@ -94,7 +117,7 @@ message InputArray {
 //   optional int32 input_dims = 11 [ default = 4];
 //   repeated int32 input_shape = 13;
 //
-// Next ID to USE: 16.
+// Next ID to USE: 18.
 message ModelFlags {
   // Information about the input arrays, i.e. the arrays from which input
   // activations will be read.
@@ -108,20 +131,6 @@ message ModelFlags {
   // the 'batch' field: at most one of these two fields can be set.
   optional bool variable_batch = 10;
 
-  message RnnState {
-    optional string state_array = 1;
-    optional string back_edge_source_array = 2;
-    optional int32 size = 3;
-    // TODO(benoitjacob): manually_create is a temporary hack:
-    // due to discrepancies between the current toco dims tracking and
-    // TensorFlow shapes, for some models we need to manually create RNN state
-    // arrays with a specified shape.
-    // Maybe we should actually implement back-edges as operators of their own,
-    // which would remove the need for much special-casing, including here,
-    // we could probably consistently let PropagateFixedSizes handle state
-    // arrays.
-    optional bool manually_create = 4;
-  }
   repeated RnnState rnn_states = 12;
 
   // Checks applied to the model, typically after toco's comprehensive
@@ -138,4 +147,17 @@ message ModelFlags {
     optional int32 count_max = 3 [default = -1];
   }
   repeated ModelCheck model_checks = 14;
+
+  // If true, will allow passing inexistent arrays in --input_arrays
+  // and --output_arrays. This makes little sense, is only useful to
+  // more easily get graph visualizations.
+  optional bool allow_nonexistent_arrays = 16;
+
+  // If true, will allow passing non-ascii-printable characters in
+  // --input_arrays and --output_arrays. By default (if false), only
+  // ascii printable characters are allowed, i.e. character codes
+  // ranging from 32 to 127. This is disallowed by default so as to
+  // catch common copy-and-paste issues where invisible unicode
+  // characters are unwittingly added to these strings.
+  optional bool allow_nonascii_arrays = 17;
 }
diff --git a/tensorflow/contrib/lite/toco/python/toco_from_protos_test.py b/tensorflow/contrib/lite/toco/python/toco_from_protos_test.py
index 28d52067a9a19ae240582f578e04776340a0cb2d..c35b6f99259b762aa83d92d21512169a7ab50b70 100644
--- a/tensorflow/contrib/lite/toco/python/toco_from_protos_test.py
+++ b/tensorflow/contrib/lite/toco/python/toco_from_protos_test.py
@@ -53,7 +53,7 @@ class TocoFromProtosTest(googletest.TestCase):
     model_flags = model_flags_pb2.ModelFlags()
     input_array = model_flags.input_arrays.add()
     input_array.name = TensorName(in_tensor)
-    input_array.shape.extend(map(int, in_tensor.get_shape()))
+    input_array.shape.dims.extend(map(int, in_tensor.get_shape()))
     model_flags.output_arrays.append(TensorName(out_tensor))
     # Shell out to run toco (in case it crashes)
     with tempfile.NamedTemporaryFile() as fp_toco, \
diff --git a/tensorflow/contrib/lite/toco/tflite/BUILD b/tensorflow/contrib/lite/toco/tflite/BUILD
index e910e3957f77fcf28ab379026bae4cc33ed00bc5..332253a092aff812fb18601862c66bc0423599c2 100644
--- a/tensorflow/contrib/lite/toco/tflite/BUILD
+++ b/tensorflow/contrib/lite/toco/tflite/BUILD
@@ -1,3 +1,8 @@
+package(
+    # To suppress build cleaner error about inclusion of schema_generate.h.
+    features = ["-layering_check"],
+)
+
 licenses(["notice"])  # Apache 2.0
 
 load(
@@ -93,6 +98,7 @@ tf_cc_test(
     ],
     deps = [
         ":export",
+        "//tensorflow/contrib/lite/schema:schema_fbs",
         "@com_google_googletest//:gtest_main",
     ],
 )
diff --git a/tensorflow/contrib/lite/toco/tflite/export.cc b/tensorflow/contrib/lite/toco/tflite/export.cc
index beda710614fd607a2e373582620d24dc3656fcf4..bec694a23377c7c70684000069e9c08ee446b6c0 100644
--- a/tensorflow/contrib/lite/toco/tflite/export.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export.cc
@@ -188,19 +188,26 @@ Offset<Vector<Offset<OperatorCode>>> ExportOperatorCodes(
     const details::OperatorKey operator_key = GetOperatorKey(*op);
     int op_index = operators_map.at(operator_key);
 
-    if (ops_by_type.count(op->type) == 0) {
-      LOG(FATAL) << "Unsupported operator: " << HelpfulOperatorTypeName(*op);
+    string name = HelpfulOperatorTypeName(*op);
+    bool is_builtin = false;
+    if (ops_by_type.count(op->type) != 0) {
+      name = ops_by_type.at(op->type)->name();
+      is_builtin = (builtin_ops.count(name) > 0);
     }
 
-    string name = ops_by_type.at(op->type)->name();
-    if (builtin_ops.count(name) > 0) {
+    if (is_builtin) {
       ordered_opcodes[op_index] =
           CreateOperatorCode(*builder, builtin_ops[name], 0);
     } else {
-      // If use the custom operation code if it's available in the OperatorKey.
+      // This could be a kTensorFlowUnsupported, in which case we should be
+      // able to retrieve the original Tensorflow name from the OperatorKey, or
+      // this could be a proper TOCO operator that is completely unknown to TF
+      // Lite.
       if (!operator_key.custom_code.empty()) {
         name = operator_key.custom_code;
       }
+      // Either way, this is an operator that is not supported by TF Lite,
+      // so we output it as a custom op and add it to the error summary.
       if (error_summary) {
         error_summary->insert(name);
       }
@@ -226,11 +233,6 @@ Offset<Vector<Offset<Operator>>> ExportOperators(
   // The operators are in execution order, so we just follow tf.mini order.
   std::vector<Offset<Operator>> op_vector;
   for (const auto& op : model.operators) {
-    if (ops_by_type.count(op->type) == 0) {
-      LOG(FATAL) << "Op type '" << OperatorTypeName(op->type)
-                 << "' not supported";
-    }
-
     std::vector<int32_t> inputs;
     for (const string& input : op->inputs) {
       inputs.push_back(tensors_map.at(input));
@@ -241,8 +243,15 @@ Offset<Vector<Offset<Operator>>> ExportOperators(
       outputs.push_back(tensors_map.at(output));
     }
 
-    auto options = ops_by_type.at(op->type)->Serialize(*op, builder);
     int op_index = operators_map.at(GetOperatorKey(*op));
+
+    // This is a custom op unless we can find it in ops_by_type, and even then
+    // it could be a custom op (such as kTensorFlowUnsupported).
+
+    auto options = Options::Custom(0);
+    if (ops_by_type.count(op->type) != 0) {
+      options = ops_by_type.at(op->type)->Serialize(*op, builder);
+    }
     // The only supported CustomOptionFormat is FLEXBUFFERS now.
     op_vector.push_back(CreateOperator(
         *builder, op_index, builder->CreateVector(inputs),
diff --git a/tensorflow/contrib/lite/toco/tflite/export_test.cc b/tensorflow/contrib/lite/toco/tflite/export_test.cc
index e395645383144f663fa108f05ca9930a56cf26a6..6754372330797ae30230af26a3b478c24ad44005 100644
--- a/tensorflow/contrib/lite/toco/tflite/export_test.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export_test.cc
@@ -16,12 +16,14 @@ limitations under the License.
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/schema/schema_generated.h"
 
 namespace toco {
-
 namespace tflite {
 namespace {
 
+using ::testing::ElementsAre;
+
 class ExportTest : public ::testing::Test {
  protected:
   // This is a very simplistic model. We are not interested in testing all the
@@ -31,11 +33,20 @@ class ExportTest : public ::testing::Test {
   void BuildTestModel() {
     input_model_.GetOrCreateArray("tensor_one");
     input_model_.GetOrCreateArray("tensor_two");
-    input_model_.operators.emplace_back(new ConvOperator);
+    {
+      auto* op = new ConvOperator;
+      op->padding.type = PaddingType::kSame;
+      input_model_.operators.emplace_back(op);
+    }
     input_model_.operators.emplace_back(new AddOperator);
-    auto unsupported_operator = new TensorFlowUnsupportedOperator;
-    unsupported_operator->tensorflow_op = "MyCrazyOp";
-    input_model_.operators.emplace_back(unsupported_operator);
+    {
+      auto* op = new TensorFlowUnsupportedOperator;
+      op->tensorflow_op = "MyCrazyOp";
+      input_model_.operators.emplace_back(op);
+    }
+    // Note that Sub is not know to TF Lite, so it gets exported as a custom
+    // op (and no options).
+    input_model_.operators.emplace_back(new SubOperator);
   }
 
   Model input_model_;
@@ -57,13 +68,44 @@ TEST_F(ExportTest, LoadOperatorsMap) {
   details::LoadOperatorsMap(input_model_, &operators);
   EXPECT_EQ(0, operators[details::OperatorKey(OperatorType::kAdd, "")]);
   EXPECT_EQ(1, operators[details::OperatorKey(OperatorType::kConv, "")]);
-  EXPECT_EQ(2, operators[details::OperatorKey(
+  EXPECT_EQ(2, operators[details::OperatorKey(OperatorType::kSub, "")]);
+  EXPECT_EQ(3, operators[details::OperatorKey(
                    OperatorType::kTensorFlowUnsupported, "MyCrazyOp")]);
 }
 
+TEST_F(ExportTest, Export) {
+  BuildTestModel();
+
+  string result;
+  Export(input_model_, true, &result);
+
+  auto* model = ::tflite::GetModel(result.data());
+
+  std::vector<string> names;
+  for (const ::tflite::OperatorCode* opcode : *model->operator_codes()) {
+    if (opcode->builtin_code() != ::tflite::BuiltinOperator_CUSTOM) {
+      names.push_back(string("builtin:") + ::tflite::EnumNameBuiltinOperator(
+                                               opcode->builtin_code()));
+    } else {
+      names.push_back(string("custom:") + opcode->custom_code()->c_str());
+    }
+  }
+
+  EXPECT_THAT(names, ElementsAre("builtin:ADD", "builtin:CONV_2D",
+                                 "builtin:SUB", "custom:MyCrazyOp"));
+
+  std::vector<uint32_t> indices;
+  auto operators = (*model->subgraphs())[0]->operators();
+  EXPECT_EQ(operators->Length(), 4);
+  for (const auto* op : *operators) {
+    indices.push_back(op->opcode_index());
+  }
+
+  EXPECT_THAT(indices, ElementsAre(1, 0, 3, 2));
+}
+
 // TODO(ahentz): tests for tensors, inputs, outpus, opcodes and operators.
 
 }  // namespace
 }  // namespace tflite
-
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc
index 8a33500ddcda67d97e68158ce40d8d7e086a27cc..0111e1ed92f479cd35f03971ff74ab08c4ccf55a 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator.cc
@@ -130,6 +130,108 @@ class Add : public BuiltinOperator<AddOperator, ::tflite::AddOptions,
   }
 };
 
+class SpaceToBatchND
+    : public BuiltinOperator<SpaceToBatchNDOperator,
+                             ::tflite::SpaceToBatchNDOptions,
+                             ::tflite::BuiltinOptions_SpaceToBatchNDOptions> {
+ public:
+  using BuiltinOperator::BuiltinOperator;
+
+  flatbuffers::Offset<TfLiteOptions> WriteOptions(
+      const TocoOperator& op,
+      flatbuffers::FlatBufferBuilder* builder) const override {
+    auto block_shape = builder->CreateVector(op.block_shape);
+    auto before_paddings = builder->CreateVector(op.before_paddings);
+    auto after_paddings = builder->CreateVector(op.after_paddings);
+    return ::tflite::CreateSpaceToBatchNDOptions(
+        *builder, block_shape, before_paddings, after_paddings);
+  }
+
+  void ReadOptions(const TfLiteOptions& options,
+                   TocoOperator* op) const override {
+    op->block_shape.insert(op->block_shape.end(),
+                           options.block_shape()->begin(),
+                           options.block_shape()->end());
+    op->before_paddings.insert(op->before_paddings.end(),
+                               options.before_paddings()->begin(),
+                               options.before_paddings()->end());
+    op->after_paddings.insert(op->after_paddings.end(),
+                              options.after_paddings()->begin(),
+                              options.after_paddings()->end());
+  }
+};
+
+class Sub : public BuiltinOperator<SubOperator, ::tflite::SubOptions,
+                                   ::tflite::BuiltinOptions_SubOptions> {
+ public:
+  using BuiltinOperator::BuiltinOperator;
+
+  flatbuffers::Offset<TfLiteOptions> WriteOptions(
+      const TocoOperator& op,
+      flatbuffers::FlatBufferBuilder* builder) const override {
+    auto activation_function =
+        ActivationFunction::Serialize(op.fused_activation_function);
+    return ::tflite::CreateSubOptions(*builder, activation_function);
+  }
+
+  void ReadOptions(const TfLiteOptions& options,
+                   TocoOperator* op) const override {
+    op->fused_activation_function =
+        ActivationFunction::Deserialize(options.fused_activation_function());
+  }
+};
+
+class Div : public BuiltinOperator<DivOperator, ::tflite::DivOptions,
+                                   ::tflite::BuiltinOptions_DivOptions> {
+ public:
+  using BuiltinOperator::BuiltinOperator;
+
+  flatbuffers::Offset<TfLiteOptions> WriteOptions(
+      const TocoOperator& op,
+      flatbuffers::FlatBufferBuilder* builder) const override {
+    auto activation_function =
+        ActivationFunction::Serialize(op.fused_activation_function);
+    return ::tflite::CreateDivOptions(*builder, activation_function);
+  }
+
+  void ReadOptions(const TfLiteOptions& options,
+                   TocoOperator* op) const override {
+    op->fused_activation_function =
+        ActivationFunction::Deserialize(options.fused_activation_function());
+  }
+};
+
+class BatchToSpaceND
+    : public BuiltinOperator<BatchToSpaceNDOperator,
+                             ::tflite::BatchToSpaceNDOptions,
+                             ::tflite::BuiltinOptions_BatchToSpaceNDOptions> {
+ public:
+  using BuiltinOperator::BuiltinOperator;
+
+  flatbuffers::Offset<TfLiteOptions> WriteOptions(
+      const TocoOperator& op,
+      flatbuffers::FlatBufferBuilder* builder) const override {
+    auto block_shape = builder->CreateVector(op.block_shape);
+    auto before_crops = builder->CreateVector(op.before_crops);
+    auto after_crops = builder->CreateVector(op.after_crops);
+    return ::tflite::CreateBatchToSpaceNDOptions(*builder, block_shape,
+                                                 before_crops, after_crops);
+  }
+
+  void ReadOptions(const TfLiteOptions& options,
+                   TocoOperator* op) const override {
+    op->block_shape.insert(op->block_shape.end(),
+                           options.block_shape()->begin(),
+                           options.block_shape()->end());
+    op->before_crops.insert(op->before_crops.end(),
+                            options.before_crops()->begin(),
+                            options.before_crops()->end());
+    op->after_crops.insert(op->after_crops.end(),
+                           options.after_crops()->begin(),
+                           options.after_crops()->end());
+  }
+};
+
 class Cast : public CustomOperator<CastOperator> {
  public:
   using CustomOperator::CustomOperator;
@@ -153,12 +255,12 @@ class Concatenation
   flatbuffers::Offset<TfLiteOptions> WriteOptions(
       const TocoOperator& op,
       flatbuffers::FlatBufferBuilder* builder) const override {
-    return ::tflite::CreateConcatenationOptions(*builder, op.concat_dim);
+    return ::tflite::CreateConcatenationOptions(*builder, op.axis);
   }
 
   void ReadOptions(const TfLiteOptions& options,
                    TocoOperator* op) const override {
-    op->concat_dim = options.axis();
+    op->axis = options.axis();
   }
 };
 
@@ -211,6 +313,22 @@ class FullyConnected
   }
 };
 
+class Gather : public BuiltinOperator<GatherOperator, ::tflite::GatherOptions,
+                                      ::tflite::BuiltinOptions_GatherOptions> {
+ public:
+  using BuiltinOperator::BuiltinOperator;
+  flatbuffers::Offset<TfLiteOptions> WriteOptions(
+      const TocoOperator& op,
+      flatbuffers::FlatBufferBuilder* builder) const override {
+    return ::tflite::CreateGatherOptions(*builder, op.axis);
+  }
+
+  void ReadOptions(const TfLiteOptions& options,
+                   TocoOperator* op) const override {
+    op->axis = options.axis();
+  }
+};
+
 class Svdf : public BuiltinOperator<SvdfOperator, ::tflite::SVDFOptions,
                                     ::tflite::BuiltinOptions_SVDFOptions> {
  public:
@@ -348,6 +466,30 @@ class Mul : public BuiltinOperator<MulOperator, ::tflite::MulOptions,
   }
 };
 
+class Pad : public BuiltinOperator<PadOperator, ::tflite::PadOptions,
+                                   ::tflite::BuiltinOptions_PadOptions> {
+ public:
+  using BuiltinOperator::BuiltinOperator;
+
+  flatbuffers::Offset<TfLiteOptions> WriteOptions(
+      const TocoOperator& op,
+      flatbuffers::FlatBufferBuilder* builder) const override {
+    auto before_padding = builder->CreateVector(op.left_padding);
+    auto after_padding = builder->CreateVector(op.right_padding);
+    return ::tflite::CreatePadOptions(*builder, before_padding, after_padding);
+  }
+
+  void ReadOptions(const TfLiteOptions& options,
+                   TocoOperator* op) const override {
+    op->left_padding.insert(op->left_padding.end(),
+                            options.before_padding()->begin(),
+                            options.before_padding()->end());
+    op->right_padding.insert(op->right_padding.end(),
+                             options.after_padding()->begin(),
+                             options.after_padding()->end());
+  }
+};
+
 class Reshape
     : public BuiltinOperator<TensorFlowReshapeOperator,
                              ::tflite::ReshapeOptions,
@@ -404,6 +546,65 @@ class SpaceToDepth
   }
 };
 
+class Transpose
+    : public BuiltinOperator<TransposeOperator, ::tflite::TransposeOptions,
+                             ::tflite::BuiltinOptions_TransposeOptions> {
+ public:
+  using BuiltinOperator::BuiltinOperator;
+  flatbuffers::Offset<TfLiteOptions> WriteOptions(
+      const TocoOperator& op,
+      flatbuffers::FlatBufferBuilder* builder) const override {
+    return ::tflite::CreateTransposeOptions(*builder,
+                                            builder->CreateVector(op.perm));
+  }
+
+  void ReadOptions(const TfLiteOptions& options,
+                   TocoOperator* op) const override {
+    op->perm.insert(op->perm.end(), options.perm()->begin(),
+                    options.perm()->end());
+  }
+};
+
+class Mean : public BuiltinOperator<MeanOperator, ::tflite::MeanOptions,
+                                    ::tflite::BuiltinOptions_MeanOptions> {
+ public:
+  using BuiltinOperator::BuiltinOperator;
+  flatbuffers::Offset<TfLiteOptions> WriteOptions(
+      const TocoOperator& op,
+      flatbuffers::FlatBufferBuilder* builder) const override {
+    auto axis = builder->CreateVector(op.axis);
+    return ::tflite::CreateMeanOptions(*builder, axis, op.keep_dims);
+  }
+
+  void ReadOptions(const TfLiteOptions& options,
+                   TocoOperator* op) const override {
+    op->axis.insert(op->axis.end(), options.axis()->begin(),
+                    options.axis()->end());
+    op->keep_dims = options.keep_dims();
+  }
+};
+
+class Squeeze
+    : public BuiltinOperator<SqueezeOperator, ::tflite::SqueezeOptions,
+                             ::tflite::BuiltinOptions_SqueezeOptions> {
+ public:
+  using BuiltinOperator::BuiltinOperator;
+
+  flatbuffers::Offset<TfLiteOptions> WriteOptions(
+      const TocoOperator& op,
+      flatbuffers::FlatBufferBuilder* builder) const override {
+    auto squeeze_dims = builder->CreateVector(op.squeeze_dims);
+    return ::tflite::CreateSqueezeOptions(*builder, squeeze_dims);
+  }
+
+  void ReadOptions(const TfLiteOptions& options,
+                   TocoOperator* op) const override {
+    op->squeeze_dims.insert(op->squeeze_dims.end(),
+                            options.squeeze_dims()->begin(),
+                            options.squeeze_dims()->end());
+  }
+};
+
 class Split : public CustomOperator<TensorFlowSplitOperator> {
  public:
   using CustomOperator::CustomOperator;
@@ -529,8 +730,16 @@ std::vector<std::unique_ptr<BaseOperator>> BuildOperatorList() {
 
   // Builtin Operators.
   ops.emplace_back(new Add(::tflite::BuiltinOperator_ADD, OperatorType::kAdd));
+  ops.emplace_back(new Div(::tflite::BuiltinOperator_DIV, OperatorType::kDiv));
+  ops.emplace_back(new Sub(::tflite::BuiltinOperator_SUB, OperatorType::kSub));
   ops.emplace_back(new AveragePool(::tflite::BuiltinOperator_AVERAGE_POOL_2D,
                                    OperatorType::kAveragePool));
+  ops.emplace_back(
+      new SpaceToBatchND(::tflite::BuiltinOperator_SPACE_TO_BATCH_ND,
+                         OperatorType::kSpaceToBatchND));
+  ops.emplace_back(
+      new BatchToSpaceND(::tflite::BuiltinOperator_BATCH_TO_SPACE_ND,
+                         OperatorType::kBatchToSpaceND));
   ops.emplace_back(new Concatenation(::tflite::BuiltinOperator_CONCATENATION,
                                      OperatorType::kConcatenation));
   ops.emplace_back(
@@ -540,6 +749,8 @@ std::vector<std::unique_ptr<BaseOperator>> BuildOperatorList() {
                                OperatorType::kDepthwiseConv));
   ops.emplace_back(new FullyConnected(::tflite::BuiltinOperator_FULLY_CONNECTED,
                                       OperatorType::kFullyConnected));
+  ops.emplace_back(
+      new Gather(::tflite::BuiltinOperator_GATHER, OperatorType::kGather));
   ops.emplace_back(
       new L2Normalization(::tflite::BuiltinOperator_L2_NORMALIZATION,
                           OperatorType::kL2Normalization));
@@ -551,6 +762,7 @@ std::vector<std::unique_ptr<BaseOperator>> BuildOperatorList() {
   ops.emplace_back(new MaxPool(::tflite::BuiltinOperator_MAX_POOL_2D,
                                OperatorType::kMaxPool));
   ops.emplace_back(new Mul(::tflite::BuiltinOperator_MUL, OperatorType::kMul));
+  ops.emplace_back(new Pad(::tflite::BuiltinOperator_PAD, OperatorType::kPad));
   ops.emplace_back(new Reshape(::tflite::BuiltinOperator_RESHAPE,
                                OperatorType::kTensorFlowReshape));
   ops.emplace_back(
@@ -559,6 +771,12 @@ std::vector<std::unique_ptr<BaseOperator>> BuildOperatorList() {
                                     OperatorType::kSpaceToDepth));
   ops.emplace_back(
       new Svdf(::tflite::BuiltinOperator_SVDF, OperatorType::kSvdf));
+  ops.emplace_back(new Transpose(::tflite::BuiltinOperator_TRANSPOSE,
+                                 OperatorType::kTranspose));
+  ops.emplace_back(
+      new Mean(::tflite::BuiltinOperator_MEAN, OperatorType::kMean));
+  ops.emplace_back(
+      new Squeeze(::tflite::BuiltinOperator_SQUEEZE, OperatorType::kSqueeze));
 
   // Custom Operators.
   ops.emplace_back(new Cast("CAST", OperatorType::kCast));
@@ -571,22 +789,18 @@ std::vector<std::unique_ptr<BaseOperator>> BuildOperatorList() {
 
   // There operators are supported by Toco, but not by TF Lite, and has no
   // attributes.
+  ops.emplace_back(new SimpleOperator<NegOperator>("NEG", OperatorType::kNeg));
   ops.emplace_back(new SimpleOperator<TensorFlowRsqrtOperator>(
       "RSQRT", OperatorType::kTensorFlowRsqrt));
-  ops.emplace_back(
-      new SimpleOperator<TensorFlowRsqrtOperator>("DIV", OperatorType::kDiv));
-
   // Simple Operators.
   ops.emplace_back(new SimpleOperator<DequantizeOperator>(
       "DEQUANTIZE", OperatorType::kDequantize));
   ops.emplace_back(
       new SimpleOperator<FloorOperator>("FLOOR", OperatorType::kFloor));
-  ops.emplace_back(
-      new SimpleOperator<GatherOperator>("GATHER", OperatorType::kGather));
   ops.emplace_back(
       new SimpleOperator<ReluOperator>("RELU", OperatorType::kRelu));
   ops.emplace_back(
-      new SimpleOperator<Relu1Operator>("RELU1", OperatorType::kRelu1));
+      new SimpleOperator<Relu1Operator>("RELU_N1_TO_1", OperatorType::kRelu1));
   ops.emplace_back(
       new SimpleOperator<Relu6Operator>("RELU6", OperatorType::kRelu6));
   ops.emplace_back(new SimpleOperator<ResizeBilinearOperator>(
diff --git a/tensorflow/contrib/lite/toco/tflite/operator_test.cc b/tensorflow/contrib/lite/toco/tflite/operator_test.cc
index 8e77c56d8aaa88d5c801ae246e1ee63e40b6f955..77c70847d1e94fc5c7eeac6480a5286ba6557fab 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator_test.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator_test.cc
@@ -101,9 +101,8 @@ TEST_F(OperatorTest, SimpleOperators) {
   CheckSimpleOperator<DequantizeOperator>("DEQUANTIZE",
                                           OperatorType::kDequantize);
   CheckSimpleOperator<FloorOperator>("FLOOR", OperatorType::kFloor);
-  CheckSimpleOperator<GatherOperator>("GATHER", OperatorType::kGather);
   CheckSimpleOperator<ReluOperator>("RELU", OperatorType::kRelu);
-  CheckSimpleOperator<Relu1Operator>("RELU1", OperatorType::kRelu1);
+  CheckSimpleOperator<Relu1Operator>("RELU_N1_TO_1", OperatorType::kRelu1);
   CheckSimpleOperator<Relu6Operator>("RELU6", OperatorType::kRelu6);
   CheckSimpleOperator<ResizeBilinearOperator>("RESIZE_BILINEAR",
                                               OperatorType::kResizeBilinear);
@@ -120,6 +119,43 @@ TEST_F(OperatorTest, BuiltinAdd) {
             output_toco_op->fused_activation_function);
 }
 
+TEST_F(OperatorTest, BuiltinSpaceToBatchND) {
+  SpaceToBatchNDOperator op;
+  op.block_shape = {2, 2};
+  op.before_paddings = {1, 2};
+  op.after_paddings = {3, 4};
+
+  auto output_toco_op = SerializeAndDeserialize(
+      GetOperator("SPACE_TO_BATCH_ND", OperatorType::kSpaceToBatchND), op);
+  EXPECT_EQ(op.block_shape, output_toco_op->block_shape);
+  EXPECT_EQ(op.before_paddings, output_toco_op->before_paddings);
+  EXPECT_EQ(op.after_paddings, output_toco_op->after_paddings);
+}
+
+TEST_F(OperatorTest, BuiltinBatchToSpaceND) {
+  BatchToSpaceNDOperator op;
+  op.block_shape = {2, 2};
+  op.before_crops = {1, 2};
+  op.after_crops = {3, 4};
+
+  auto output_toco_op = SerializeAndDeserialize(
+      GetOperator("BATCH_TO_SPACE_ND", OperatorType::kBatchToSpaceND), op);
+  EXPECT_EQ(op.block_shape, output_toco_op->block_shape);
+  EXPECT_EQ(op.before_crops, output_toco_op->before_crops);
+  EXPECT_EQ(op.after_crops, output_toco_op->after_crops);
+}
+
+TEST_F(OperatorTest, BuiltinMean) {
+  MeanOperator op;
+  op.axis = {1, 2};
+  op.keep_dims = false;
+
+  auto output_toco_op =
+      SerializeAndDeserialize(GetOperator("MEAN", OperatorType::kMean), op);
+  EXPECT_EQ(op.axis, output_toco_op->axis);
+  EXPECT_EQ(op.keep_dims, output_toco_op->keep_dims);
+}
+
 TEST_F(OperatorTest, CustomCast) {
   CastOperator op;
   op.src_data_type = ArrayDataType::kFloat;
@@ -132,10 +168,10 @@ TEST_F(OperatorTest, CustomCast) {
 
 TEST_F(OperatorTest, CustomConcatenation) {
   ConcatenationOperator op;
-  op.concat_dim = 123;
+  op.axis = 123;
   auto output_toco_op = SerializeAndDeserialize(
       GetOperator("CONCATENATION", OperatorType::kConcatenation), op);
-  EXPECT_EQ(op.concat_dim, output_toco_op->concat_dim);
+  EXPECT_EQ(op.axis, output_toco_op->axis);
 }
 
 TEST_F(OperatorTest, CustomDepthToSpace) {
@@ -167,6 +203,13 @@ TEST_F(OperatorTest, CustomFullyConnected) {
             output_toco_op->fused_activation_function);
 }
 
+TEST_F(OperatorTest, BuiltinGather) {
+  GatherOperator op;
+  auto output_toco_op =
+      SerializeAndDeserialize(GetOperator("GATHER", OperatorType::kGather), op);
+  ASSERT_NE(nullptr, output_toco_op.get());
+}
+
 TEST_F(OperatorTest, BuiltinL2Pool) {
   L2PoolOperator op;
   op.stride_width = 123;
@@ -215,6 +258,16 @@ TEST_F(OperatorTest, BuiltinMaxPool) {
   EXPECT_EQ(op.kheight, output_toco_op->kheight);
 }
 
+TEST_F(OperatorTest, BuiltinPad) {
+  PadOperator op;
+  op.left_padding = {1, 2, 3};
+  op.right_padding = {1, 2, 3};
+  auto output_toco_op =
+      SerializeAndDeserialize(GetOperator("PAD", OperatorType::kPad), op);
+  EXPECT_EQ(op.left_padding, output_toco_op->left_padding);
+  EXPECT_EQ(op.right_padding, output_toco_op->right_padding);
+}
+
 TEST_F(OperatorTest, BuiltinReshape) {
   TensorFlowReshapeOperator op;
   op.shape = {1, 2, 4, 5, 8};
@@ -327,6 +380,24 @@ TEST_F(OperatorTest, Svdf) {
   EXPECT_EQ(op.rank, output_toco_op->rank);
 }
 
+TEST_F(OperatorTest, Transpose) {
+  TransposeOperator op;
+  op.perm = {0, 1, 2, 3};
+
+  auto output_toco_op = SerializeAndDeserialize(
+      GetOperator("TRANSPOSE", OperatorType::kTranspose), op);
+  EXPECT_EQ(op.perm, output_toco_op->perm);
+}
+
+TEST_F(OperatorTest, Squeeze) {
+  SqueezeOperator op;
+  op.squeeze_dims = {-2, -3, 4, 1, 4};
+
+  auto output_toco_op = SerializeAndDeserialize(
+      GetOperator("SQUEEZE", OperatorType::kSqueeze), op);
+  EXPECT_EQ(op.squeeze_dims, output_toco_op->squeeze_dims);
+}
+
 TEST_F(OperatorTest, TensorFlowUnsupported) {
   TensorFlowUnsupportedOperator op;
   op.tensorflow_op = "MyCustomUnsupportedOp";
diff --git a/tensorflow/contrib/lite/toco/tflite/types.cc b/tensorflow/contrib/lite/toco/tflite/types.cc
index 5b4dbfae2477d629624a70bf7c6e93606c937605..b4c2851502a40a1ca36965d4ddd2c8a15b8fe60f 100644
--- a/tensorflow/contrib/lite/toco/tflite/types.cc
+++ b/tensorflow/contrib/lite/toco/tflite/types.cc
@@ -51,8 +51,12 @@ void CopyBuffer(const ::tflite::Buffer& buffer, Array* array) {
       return ::tflite::TensorType_FLOAT32;
     case ArrayDataType::kInt32:
       return ::tflite::TensorType_INT32;
+    case ArrayDataType::kInt64:
+      return ::tflite::TensorType_INT64;
     case ArrayDataType::kUint8:
       return ::tflite::TensorType_UINT8;
+    case ArrayDataType::kString:
+      return ::tflite::TensorType_STRING;
     default:
       // FLOAT32 is filled for unknown data types.
       // TODO(ycling): Implement type inference in TF Lite interpreter.
@@ -66,6 +70,10 @@ ArrayDataType DataType::Deserialize(int tensor_type) {
       return ArrayDataType::kFloat;
     case ::tflite::TensorType_INT32:
       return ArrayDataType::kInt32;
+    case ::tflite::TensorType_INT64:
+      return ArrayDataType::kInt64;
+    case ::tflite::TensorType_STRING:
+      return ArrayDataType::kString;
     case ::tflite::TensorType_UINT8:
       return ArrayDataType::kUint8;
     default:
@@ -82,6 +90,8 @@ flatbuffers::Offset<flatbuffers::Vector<uint8_t>> DataBuffer::Serialize(
       return CopyBuffer<ArrayDataType::kFloat>(array, builder);
     case ArrayDataType::kInt32:
       return CopyBuffer<ArrayDataType::kInt32>(array, builder);
+    case ArrayDataType::kString:
+      return CopyBuffer<ArrayDataType::kString>(array, builder);
     case ArrayDataType::kUint8:
       return CopyBuffer<ArrayDataType::kUint8>(array, builder);
     default:
@@ -99,6 +109,10 @@ void DataBuffer::Deserialize(const ::tflite::Tensor& tensor,
       return CopyBuffer<ArrayDataType::kFloat>(buffer, array);
     case ::tflite::TensorType_INT32:
       return CopyBuffer<ArrayDataType::kInt32>(buffer, array);
+    case ::tflite::TensorType_INT64:
+      return CopyBuffer<ArrayDataType::kInt64>(buffer, array);
+    case ::tflite::TensorType_STRING:
+      return CopyBuffer<ArrayDataType::kString>(buffer, array);
     case ::tflite::TensorType_UINT8:
       return CopyBuffer<ArrayDataType::kUint8>(buffer, array);
     default:
@@ -138,7 +152,7 @@ PaddingType Padding::Deserialize(int padding) {
     case FusedActivationFunctionType::kRelu6:
       return ::tflite::ActivationFunctionType_RELU6;
     case FusedActivationFunctionType::kRelu1:
-      return ::tflite::ActivationFunctionType_RELU1;
+      return ::tflite::ActivationFunctionType_RELU_N1_TO_1;
     default:
       LOG(FATAL) << "Unhandled fused activation function type.";
   }
@@ -153,7 +167,7 @@ FusedActivationFunctionType ActivationFunction::Deserialize(
       return FusedActivationFunctionType::kRelu;
     case ::tflite::ActivationFunctionType_RELU6:
       return FusedActivationFunctionType::kRelu6;
-    case ::tflite::ActivationFunctionType_RELU1:
+    case ::tflite::ActivationFunctionType_RELU_N1_TO_1:
       return FusedActivationFunctionType::kRelu1;
     default:
       LOG(FATAL) << "Unhandled fused activation function type.";
diff --git a/tensorflow/contrib/lite/toco/tflite/types_test.cc b/tensorflow/contrib/lite/toco/tflite/types_test.cc
index 174b78f3e632fde8dc6ea0ed83ed7a67fa12c16a..a040fe135841b92a6e668f32cc5e36cf812ab15b 100644
--- a/tensorflow/contrib/lite/toco/tflite/types_test.cc
+++ b/tensorflow/contrib/lite/toco/tflite/types_test.cc
@@ -28,8 +28,8 @@ using flatbuffers::Vector;
 
 // These are types that exist in TF Mini but don't have a correspondence
 // in TF Lite.
-static const ArrayDataType kUnsupportedTocoTypes[] = {
-    ArrayDataType::kNone, ArrayDataType::kBool, ArrayDataType::kInt64};
+static const ArrayDataType kUnsupportedTocoTypes[] = {ArrayDataType::kNone,
+                                                      ArrayDataType::kBool};
 
 // These are TF Lite types for which there is no correspondence in TF Mini.
 static const ::tflite::TensorType kUnsupportedTfLiteTypes[] = {
@@ -70,6 +70,7 @@ TEST(DataType, SupportedTypes) {
   std::vector<std::pair<ArrayDataType, ::tflite::TensorType>> testdata = {
       {ArrayDataType::kUint8, ::tflite::TensorType_UINT8},
       {ArrayDataType::kInt32, ::tflite::TensorType_INT32},
+      {ArrayDataType::kInt64, ::tflite::TensorType_INT64},
       {ArrayDataType::kFloat, ::tflite::TensorType_FLOAT32}};
   for (auto x : testdata) {
     EXPECT_EQ(x.second, DataType::Serialize(x.first));
@@ -172,7 +173,7 @@ TEST(ActivationFunction, All) {
                   {FusedActivationFunctionType::kRelu6,
                    ::tflite::ActivationFunctionType_RELU6},
                   {FusedActivationFunctionType::kRelu1,
-                   ::tflite::ActivationFunctionType_RELU1}};
+                   ::tflite::ActivationFunctionType_RELU_N1_TO_1}};
   for (auto x : testdata) {
     EXPECT_EQ(x.second, ActivationFunction::Serialize(x.first));
     EXPECT_EQ(x.first, ActivationFunction::Deserialize(x.second));
diff --git a/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc b/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc
index 83947d6b28010e6b75ff377648f51a0364a4d580..f8281f3a5725283d472e5e1a36e4d904b4dc1c49 100644
--- a/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc
+++ b/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc
@@ -65,10 +65,12 @@ bool ParseTocoFlagsFromCommandLineFlags(
            "is used."),
       Flag("input_type", parsed_flags.input_type.bind(),
            parsed_flags.input_type.default_value(),
-           "Deprecated old name of inference_input_type."),
+           "Deprecated ambiguous flag that set both --input_data_types and "
+           "--inference_input_type."),
       Flag("input_types", parsed_flags.input_types.bind(),
            parsed_flags.input_types.default_value(),
-           "Deprecated old name of inference_input_type. Was meant to be a "
+           "Deprecated ambiguous flag that set both --input_data_types and "
+           "--inference_input_type. Was meant to be a "
            "comma-separated list, but this was deprecated before "
            "multiple-input-types was ever properly supported."),
 
@@ -140,7 +142,6 @@ void ReadTocoFlagsFromCommandLineFlags(const ParsedTocoFlags& parsed_toco_flags,
           << #name;                                                          \
     }                                                                        \
   } while (false)
-
 #define READ_TOCO_FLAG(name, requirement)                     \
   ENFORCE_FLAG_REQUIREMENT(name, requirement);                \
   do {                                                        \
@@ -174,14 +175,26 @@ void ReadTocoFlagsFromCommandLineFlags(const ParsedTocoFlags& parsed_toco_flags,
 
   // Deprecated flag handling.
   if (parsed_toco_flags.input_type.specified()) {
-    LOG(WARNING) << "--input_type is deprecated. Use --inference_input_type.";
+    LOG(WARNING)
+        << "--input_type is deprecated. It was an ambiguous flag that set both "
+           "--input_data_types and --inference_input_type. If you are trying "
+           "to complement the input file with information about the type of "
+           "input arrays, use --input_data_type. If you are trying to control "
+           "the quantization/dequantization of real-numbers input arrays in "
+           "the output file, use --inference_input_type.";
     toco::IODataType input_type;
     QCHECK(toco::IODataType_Parse(parsed_toco_flags.input_type.value(),
                                   &input_type));
     toco_flags->set_inference_input_type(input_type);
   }
   if (parsed_toco_flags.input_types.specified()) {
-    LOG(WARNING) << "--input_types is deprecated. Use --inference_input_type.";
+    LOG(WARNING)
+        << "--input_types is deprecated. It was an ambiguous flag that set "
+           "both --input_data_types and --inference_input_type. If you are "
+           "trying to complement the input file with information about the "
+           "type of input arrays, use --input_data_type. If you are trying to "
+           "control the quantization/dequantization of real-numbers input "
+           "arrays in the output file, use --inference_input_type.";
     std::vector<string> input_types =
         absl::StrSplit(parsed_toco_flags.input_types.value(), ',');
     QCHECK(!input_types.empty());
diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc
index ca092b2d72d5c708a4db57cbb8810ec978446fab..94b4d146968d4bf92bd8f662763eecdc92a66663 100644
--- a/tensorflow/contrib/lite/toco/toco_tooling.cc
+++ b/tensorflow/contrib/lite/toco/toco_tooling.cc
@@ -51,6 +51,8 @@ void CheckUnsupportedOperations(const Model& model) {
 void MakeGeneralGraphTransformationsSet(
     GraphTransformationsSet* transformations) {
   CHECK(transformations->empty());
+  transformations->Add(new ConvertExpandDimsToReshape);
+  transformations->Add(new ConvertTrivialTransposeToReshape);
   transformations->Add(new ResolveReshapeAttributes);
   transformations->Add(new PropagateArrayDataTypes);
   transformations->Add(new PropagateFixedSizes);
@@ -66,9 +68,13 @@ void MakeGeneralGraphTransformationsSet(
   transformations->Add(new FuseBinaryIntoFollowingAffine);
   transformations->Add(new ResolveBatchNormalization);
   transformations->Add(new ResolveConstantBinaryOperator);
+  transformations->Add(new ResolveConstantFill);
+  transformations->Add(new ResolveConstantRange);
+  transformations->Add(new ResolveConstantStack);
+  transformations->Add(new ResolveConstantStridedSlice);
   transformations->Add(new ResolveConstantUnaryOperator);
   transformations->Add(new ResolveTensorFlowMerge);
-  transformations->Add(new ResolveTensorFlowSqueeze);
+  transformations->Add(new ResolveSqueezeAttributes);
   transformations->Add(new ResolveTensorFlowSwitch);
   transformations->Add(new ResolveTensorFlowTile);
   transformations->Add(new ResolveTensorFlowConcat);
@@ -77,11 +83,14 @@ void MakeGeneralGraphTransformationsSet(
   transformations->Add(new IdentifyRelu1);
   transformations->Add(new RemoveTrivialBinaryOperator);
   transformations->Add(new ReadFakeQuantMinMax);
+  transformations->Add(new ResolveSpaceToBatchNDAttributes);
+  transformations->Add(new ResolveBatchToSpaceNDAttributes);
   transformations->Add(new ResolvePadAttributes);
   transformations->Add(new ResolveStridedSliceAttributes);
   transformations->Add(new ResolveSliceAttributes);
   transformations->Add(new ResolveMeanAttributes);
-  transformations->Add(new ResolveConstantTensorFlowShape);
+  transformations->Add(new ResolveTransposeAttributes);
+  transformations->Add(new ResolveConstantShapeOrRank);
   transformations->Add(new MakeInitialDequantizeOperator);
 }
 
@@ -99,7 +108,7 @@ bool SupportsLstmCell(FileFormat format) {
 }
 
 bool SupportsPreallocatedWorkspace(FileFormat format) {
-  return (format == GRAPHVIZ_DOT || format == TFLITE);
+  return (format == TFLITE);
 }
 
 bool IsRealValued(toco::ArrayDataType type) {
@@ -184,6 +193,13 @@ void Transform(const TocoFlags& toco_flags, Model* model) {
 
   SetFinalDataTypeOnInputs(toco_flags, model);
 
+  // Remove unused ops before performing any other optimizations. This is to
+  // stop optimizations from crossing the input/output boundaries. For example
+  // this will stop BatchNorm fusing if the output node is in between a conv
+  // and BatchNorm layers.
+  RunGraphTransformations(model, "Removing unused ops",
+                          {new toco::RemoveUnusedOp});
+
   GraphTransformationsSet transformations;
   MakeGeneralGraphTransformationsSet(&transformations);
   auto* remove_trivial_reshape = new RemoveTrivialReshape;
@@ -202,11 +218,7 @@ void Transform(const TocoFlags& toco_flags, Model* model) {
     // See the doc for --reorder_across_fake_quant: that flag is needed to
     // support some existing models, e.g. WordLens, that have FakeQuant
     // nodes in the wrong places.
-    // We currently unconditionally enable that behavior when the output
-    // format is DarwiNN because the DarwiNN test code does not make it
-    // easy to pass a new toco flag. Once that is resolved on the DarwiNN
-    // tests side, the special-casing of DarwiNN here can go away.
-    // TODO(benoitjacob): so drop it when we can.
+    // TODO(benoitjacob): drop special casing when we can.
     if ((quantize_output && toco_flags.reorder_across_fake_quant())) {
       transformations.Add(new DropFakeQuant);
     }
@@ -229,6 +241,10 @@ void Transform(const TocoFlags& toco_flags, Model* model) {
         toco_flags.has_default_ranges_max()) {
       UseDefaultMinMaxRangeValues(model, toco_flags.default_ranges_min(),
                                   toco_flags.default_ranges_max());
+      // The new MinMax info may need to be propagated a bit.
+      RunGraphTransformations(
+          model, "default min-max range propagation graph transformations",
+          {new HardcodeMinMax});
     }
     CheckIsReadyForQuantization(*model);
     RunGraphTransformations(
diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index 3f289817e061afb87e621ff23bb312ff8fe73ae7..e09a469d55bae7d2abc6bfa5a3e78ce41ae7a4f5 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include <unordered_set>
 #include <utility>
 
+#include "absl/strings/ascii.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
 #include "absl/strings/str_replace.h"
@@ -30,7 +31,6 @@ limitations under the License.
 #include "tensorflow/contrib/lite/toco/toco_port.h"
 #include "tensorflow/core/platform/logging.h"
 
-
 namespace toco {
 
 string LogName(const Operator& op) {
@@ -223,6 +223,10 @@ const char* OperatorTypeName(OperatorType type) {
     HANDLE_OPERATORTYPENAME_CASE(Tanh)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowAll)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowAssert)
+    HANDLE_OPERATORTYPENAME_CASE(ExpandDims)
+    HANDLE_OPERATORTYPENAME_CASE(Fill)
+    HANDLE_OPERATORTYPENAME_CASE(FloorMod)
+    HANDLE_OPERATORTYPENAME_CASE(FloorDiv)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowGreater)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowGreaterEqual)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowIdentity)
@@ -234,8 +238,12 @@ const char* OperatorTypeName(OperatorType type) {
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowMerge)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowMin)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowMinimum)
+    HANDLE_OPERATORTYPENAME_CASE(Neg)
     HANDLE_OPERATORTYPENAME_CASE(Pad)
     HANDLE_OPERATORTYPENAME_CASE(StridedSlice)
+    HANDLE_OPERATORTYPENAME_CASE(Stack)
+    HANDLE_OPERATORTYPENAME_CASE(Range)
+    HANDLE_OPERATORTYPENAME_CASE(Rank)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowReshape)
     HANDLE_OPERATORTYPENAME_CASE(Squeeze)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowRsqrt)
@@ -248,6 +256,8 @@ const char* OperatorTypeName(OperatorType type) {
     HANDLE_OPERATORTYPENAME_CASE(Sub)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowSum)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowTile)
+    HANDLE_OPERATORTYPENAME_CASE(Transpose)
+    HANDLE_OPERATORTYPENAME_CASE(TransposeConv)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowConcat)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowConcatV2)
     HANDLE_OPERATORTYPENAME_CASE(Cast)
@@ -258,6 +268,7 @@ const char* OperatorTypeName(OperatorType type) {
     HANDLE_OPERATORTYPENAME_CASE(BatchToSpaceND)
     HANDLE_OPERATORTYPENAME_CASE(Mean)
     HANDLE_OPERATORTYPENAME_CASE(Svdf)
+    HANDLE_OPERATORTYPENAME_CASE(ArgMax)
     HANDLE_OPERATORTYPENAME_CASE(TensorFlowUnsupported)
     default:
       LOG(FATAL) << "Unhandled op type";
@@ -276,7 +287,7 @@ string HelpfulOperatorTypeName(const Operator& op) {
 
 void LogSummary(int log_level, const Model& model) {
   VLOG(log_level) << "Operators summary (" << model.operators.size()
-                  << " operators): ";
+                  << " operators):";
   std::unordered_multiset<OperatorType> ops_by_type;
   for (const auto& op : model.operators) {
     ops_by_type.insert(op->type);
@@ -305,6 +316,9 @@ void LogArray(int log_level, const Model& model, const string& name) {
     case ArrayDataType::kUint8:
       VLOG(log_level) << "  Data type: kUint8";
       break;
+    case ArrayDataType::kString:
+      VLOG(log_level) << "  Data type: kString";
+      break;
     default:
       VLOG(log_level) << "  Data type: other (numerical value: "
                       << static_cast<int>(array.data_type) << ")";
@@ -323,6 +337,9 @@ void LogArray(int log_level, const Model& model, const string& name) {
     case ArrayDataType::kUint8:
       VLOG(log_level) << "  Final type: kUint8";
       break;
+    case ArrayDataType::kString:
+      VLOG(log_level) << "  Final type: kString";
+      break;
     default:
       VLOG(log_level) << "  Final type: other (numerical value: "
                       << static_cast<int>(array.data_type) << ")";
@@ -387,6 +404,7 @@ void DumpGraphvizVideoFrame(const Model& model) {
   DumpGraphviz(model, &graphviz_dump);
   std::size_t hash = std::hash<string>{}(graphviz_dump);
   if (!dump_hashes.count(hash)) {
+    LOG(INFO) << "DUMPING GRAPHVIZ VIDEO FRAME: " << dump_id;
     dump_hashes.insert(hash);
     CHECK(port::file::SetContents(
               port::file::JoinPath(
@@ -430,7 +448,7 @@ void LogDump(int log_level, const string& message, const Model& model) {
         LogArray(log_level, model, input);
       }
     }
-    VLOG(log_level) << HelpfulOperatorTypeName(*op) << " : ";
+    VLOG(log_level) << HelpfulOperatorTypeName(*op) << " :";
     VLOG(log_level) << "  " << FormatArraysList(model, op->inputs) << " -> "
                     << FormatArraysList(model, op->outputs);
     if (op->fused_activation_function != FusedActivationFunctionType::kNone) {
@@ -555,15 +573,65 @@ bool IsConstantParameterArray(const Model& model, const string& name) {
   return !!model.arrays.at(name)->buffer;
 }
 
-void CheckNoMissingArray(const Model& model) {
-  for (const auto& op : model.operators) {
-    for (const auto& input : op->inputs) {
-      CHECK(model.arrays.count(input));
+namespace {
+void CheckInputArraysAreNotOutputArrays(const ModelFlags& model_flags) {
+  for (const auto& input_array : model_flags.input_arrays()) {
+    for (const string& output_array : model_flags.output_arrays()) {
+      QCHECK_NE(input_array.name(), output_array)
+          << "The array " << output_array
+          << " is listed in both --input_arrays and --output_arrays.";
     }
-    for (const auto& output : op->outputs) {
-      CHECK(model.arrays.count(output));
+  }
+}
+
+bool IsAsciiPrintable(const string& name) {
+  for (char c : name) {
+    if (!absl::ascii_isprint(c)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+string DumpAscii(const string& name) {
+  string result;
+  port::AppendF(&result, "ASCII | Hex\n");
+  port::AppendF(&result, "------+----\n");
+  for (char c : name) {
+    if (absl::ascii_isprint(c)) {
+      port::AppendF(&result, "%c     | %x\n", c, c);
+    } else {
+      port::AppendF(&result, "      | %x   Not ASCII printable!\n", c);
     }
   }
+  return result;
+}
+
+void CheckNonAsciiIOArrays(const ModelFlags& model_flags) {
+  if (model_flags.allow_nonascii_arrays()) {
+    return;
+  }
+  for (const auto& input_array : model_flags.input_arrays()) {
+    QCHECK(IsAsciiPrintable(input_array.name()))
+        << "Non-ASCII-printable character found in --input_arrays: "
+        << input_array.name()
+        << ". Pass --allow_nonascii_arrays to allow that. "
+        << "Here is a dump of the string:\n\n"
+        << DumpAscii(input_array.name());
+  }
+  for (const string& output_array : model_flags.output_arrays()) {
+    QCHECK(IsAsciiPrintable(output_array))
+        << "Non-ASCII-printable character found in --output_arrays: "
+        << output_array << ". Pass --allow_nonascii_arrays to allow that. "
+        << "Here is a dump of the string:\n\n"
+        << DumpAscii(output_array);
+  }
+}
+
+void CheckNonExistentIOArrays(const Model& model) {
+  if (model.flags.allow_nonexistent_arrays()) {
+    return;
+  }
   for (const auto& input_array : model.flags.input_arrays()) {
     CHECK(model.arrays.count(input_array.name()))
         << "Input array not found: " << input_array.name();
@@ -573,9 +641,24 @@ void CheckNoMissingArray(const Model& model) {
         << "Output array not found: " << output_array;
   }
   for (const auto& rnn_state : model.flags.rnn_states()) {
-    CHECK(model.arrays.count(rnn_state.state_array()));
-    CHECK(model.arrays.count(rnn_state.back_edge_source_array()));
+    if (!rnn_state.discardable()) {
+      CHECK(model.arrays.count(rnn_state.state_array()));
+      CHECK(model.arrays.count(rnn_state.back_edge_source_array()));
+    }
+  }
+}
+}  // namespace
+
+void CheckNoMissingArray(const Model& model) {
+  for (const auto& op : model.operators) {
+    for (const auto& input : op->inputs) {
+      CHECK(model.arrays.count(input));
+    }
+    for (const auto& output : op->outputs) {
+      CHECK(model.arrays.count(output));
+    }
   }
+  CheckNonExistentIOArrays(model);
 }
 
 void FixNoMissingArray(Model* model) {
@@ -591,17 +674,23 @@ void FixNoMissingArray(Model* model) {
       }
     }
   }
-  for (const string& output_array : model->flags.output_arrays()) {
-    if (!model->arrays.count(output_array)) {
+  if (model->flags.allow_nonexistent_arrays()) {
+    for (const string& output_array : model->flags.output_arrays()) {
       model->GetOrCreateArray(output_array);
     }
+    for (const auto& rnn_state : model->flags.rnn_states()) {
+      model->GetOrCreateArray(rnn_state.state_array());
+      model->GetOrCreateArray(rnn_state.back_edge_source_array());
+    }
   }
 }
 
 void CheckNoOrphanedArray(const Model& model) {
   std::unordered_set<string> arrays_without_known_use;
   for (const auto& array : model.arrays) {
-    arrays_without_known_use.insert(array.first);
+    if (IsDiscardableArray(model, array.first)) {
+      arrays_without_known_use.insert(array.first);
+    }
   }
   for (const auto& op : model.operators) {
     for (const auto& input : op->inputs) {
@@ -611,6 +700,10 @@ void CheckNoOrphanedArray(const Model& model) {
       arrays_without_known_use.erase(output);
     }
   }
+  for (const auto& rnn_state : model.flags.rnn_states()) {
+    arrays_without_known_use.erase(rnn_state.state_array());
+    arrays_without_known_use.erase(rnn_state.back_edge_source_array());
+  }
   if (!arrays_without_known_use.empty()) {
     for (const auto& array : arrays_without_known_use) {
       LOG(INFO) << "Error: Orphaned array: " << array;
@@ -632,8 +725,14 @@ void FixNoOrphanedArray(Model* model) {
       arrays_without_known_use.erase(output);
     }
   }
+  for (const auto& rnn_state : model->flags.rnn_states()) {
+    arrays_without_known_use.erase(rnn_state.state_array());
+    arrays_without_known_use.erase(rnn_state.back_edge_source_array());
+  }
   for (const auto& array : arrays_without_known_use) {
-    model->arrays.erase(array);
+    if (IsDiscardableArray(*model, array)) {
+      model->arrays.erase(array);
+    }
   }
 }
 
@@ -791,52 +890,13 @@ void FixOperatorOrdering(Model* model) {
       << "the above code should have generated a FATAL error already!";
 }
 
-// Checks that the --input_arrays of the Model are actually used by at least
-// one of the --output_arrays or --rnn_states i.e. that the graph contains a
-// path from each one of the inputs to at least one of the outputs or RNN
-// states. This catches cases where the user passed the wrong --input_arrays or
-// --output_arrays or --rnn_states, which otherwise may result in cryptic error
-// messages.
-void CheckInputsActuallyUsed(const Model& model) {
-  std::set<string> used_arrays;
-  for (const string& output : model.flags.output_arrays()) {
-    used_arrays.insert(output);
-  }
-  for (const auto& rnn_state : model.flags.rnn_states()) {
-    used_arrays.insert(rnn_state.back_edge_source_array());
-  }
-  for (int i = model.operators.size() - 1; i >= 0; i--) {
-    bool is_op_used = false;
-    for (const string& op_output : model.operators[i]->outputs) {
-      if (used_arrays.count(op_output)) {
-        is_op_used = true;
-        break;
-      }
-    }
-    if (!is_op_used) {
-      continue;
-    }
-    for (const string& op_input : model.operators[i]->inputs) {
-      used_arrays.insert(op_input);
-    }
-  }
-  for (const auto& input_array : model.flags.input_arrays()) {
-    QCHECK(used_arrays.count(input_array.name()))
-        << "The graph does not connect the input (" << input_array.name()
-        << ") specified by --input_arrays to any of the specified "
-        << "--output_arrays ("
-        << absl::StrJoin(model.flags.output_arrays(), ", ")
-        << "). Did you pass the wrong flags for this model, "
-        << "or is that model's graph actually incomplete?";
-  }
-}
-
 void CheckInvariants(const Model& model) {
+  CheckInputArraysAreNotOutputArrays(model.flags);
+  CheckNonAsciiIOArrays(model.flags);
   CheckNoMissingArray(model);
   CheckNoOrphanedArray(model);
   CheckArrayFieldsConsistent(model);
   CheckOperatorOrdering(model);
-  CheckInputsActuallyUsed(model);
 }
 
 void CheckCountInRange(const ::toco::ModelFlags::ModelCheck& model_check,
@@ -914,9 +974,9 @@ void CreateOrCheckRnnStateArray(const string& name, int size, Model* model) {
     // Pick 'num_dims' and 'batch' from the first input_arrays, unless we find
     // a better match by name.
     if (input_array.name() == name || num_dims == -1) {
-      num_dims = input_array.shape_size();
-      if (num_dims != 0) {
-        batch = input_array.shape(0);
+      num_dims = input_array.shape().dims_size();
+      if (num_dims > 0) {
+        batch = input_array.shape().dims(0);
       }
     }
   }
@@ -985,33 +1045,32 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) {
     RESOLVE_MODEL_FLAG(mean_value);
 #undef RESOLVE_MODEL_FLAG
 
-    if (!specified_input_array.shape().empty()) {
-      if (!dst_input_array->shape().empty()) {
-        QCHECK_EQ(specified_input_array.shape().size(),
-                  dst_input_array->shape().size())
+    if (specified_input_array.has_shape()) {
+      if (dst_input_array->has_shape()) {
+        QCHECK_EQ(specified_input_array.shape().dims_size(),
+                  dst_input_array->shape().dims_size())
             << "For input array '" << specified_input_array.name() << "', "
             << "size of specified input shape flag with size: "
-            << specified_input_array.shape().size()
+            << specified_input_array.shape().dims_size()
             << " does not agree with already defined input shape"
                " of this model, with size: "
-            << dst_input_array->shape().size();
+            << dst_input_array->shape().dims_size();
         // We treat the first dimension as a special case, since it is often
         // a batch size and the input_shape flag is effectively overriding
         // the model.
-        for (int i = 1; i < specified_input_array.shape().size(); i++) {
-          QCHECK_EQ(specified_input_array.shape().Get(i),
-                    dst_input_array->shape().Get(i))
+        for (int i = 1; i < specified_input_array.shape().dims_size(); i++) {
+          QCHECK_EQ(specified_input_array.shape().dims(i),
+                    dst_input_array->shape().dims(i))
               << "At dimension number " << i << " of input array "
               << specified_input_array.name() << ", the specified shape's "
               << "dimension flag with dimension: "
-              << specified_input_array.shape().Get(i)
+              << specified_input_array.shape().dims(i)
               << " does not agree with already defined shape"
               << " of this model, with dimension: "
-              << dst_input_array->shape().Get(i);
+              << dst_input_array->shape().dims(i);
         }
       } else {
-        dst_input_array->mutable_shape()->CopyFrom(
-            specified_input_array.shape());
+        *dst_input_array->mutable_shape() = specified_input_array.shape();
       }
     }
 
@@ -1042,25 +1101,14 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) {
 
 #undef RESOLVE_MODEL_FLAG
 
-  if (model->flags.rnn_states_size() == 0) {
+  if (!model_flags.rnn_states().empty()) {
     model->flags.mutable_rnn_states()->CopyFrom(model_flags.rnn_states());
-  } else {
-    CHECK_EQ(model->flags.rnn_states_size(), model_flags.rnn_states_size());
-    for (int i = 0; i < model->flags.rnn_states_size(); i++) {
-      CHECK_EQ(model->flags.rnn_states(i).state_array(),
-               model_flags.rnn_states(i).state_array());
-      CHECK_EQ(model->flags.rnn_states(i).back_edge_source_array(),
-               model_flags.rnn_states(i).back_edge_source_array());
-    }
   }
 
   if (model->flags.model_checks_size() == 0) {
     model->flags.mutable_model_checks()->CopyFrom(model_flags.model_checks());
   }
 
-  QCHECK_GT(model->flags.input_arrays_size(), 0)
-      << "This model does not define input arrays, so a "
-         "--input_arrays flag must be given on the command-line.";
   QCHECK_GT(model->flags.output_arrays_size(), 0)
       << "This model does not define output arrays, so a "
          "--output_arrays flag must be given on the command-line.";
@@ -1088,24 +1136,27 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) {
       input_array.data_type = ArrayDataType::kFloat;
     }
 
-    if (!input_array.has_shape()) {
-      QCHECK(!input_array_proto.shape().empty())
-          << "This model does not have shape defined for input array "
-          << input_array_proto.name();
-    }
-
     // Compare/merge the model->flags describing the input_shape with
     // the actual input array's shape.
-    auto& input_array_dims = *input_array.mutable_shape()->mutable_dims();
-    if (input_array_dims.empty()) {
-      for (auto dim : input_array_proto.shape()) {
-        CHECK_GE(dim, 1);
-        input_array_dims.push_back(dim);
+    if (!input_array.has_shape()) {
+      if (input_array_proto.has_shape()) {
+        auto& input_array_dims = *input_array.mutable_shape()->mutable_dims();
+        for (auto dim : input_array_proto.shape().dims()) {
+          CHECK_GE(dim, 1);
+          input_array_dims.push_back(dim);
+        }
       }
     } else {
-      CHECK_EQ(input_array_dims.size(), input_array_proto.shape_size());
-      for (int i = 0; i < input_array_dims.size(); i++) {
-        CHECK_EQ(input_array_dims[i], input_array_proto.shape(i));
+      if (input_array_proto.has_shape()) {
+        // If an input shape was specified on the flags ensure that it matches
+        // the actual shape in the model.
+        const auto& input_array_dims =
+            *input_array.mutable_shape()->mutable_dims();
+        CHECK_EQ(input_array_dims.size(),
+                 input_array_proto.shape().dims_size());
+        for (int i = 0; i < input_array_dims.size(); i++) {
+          CHECK_EQ(input_array_dims[i], input_array_proto.shape().dims(i));
+        }
       }
     }
 
@@ -1134,6 +1185,16 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) {
     CreateOrCheckRnnStateArray(rnn_state.state_array(), rnn_state.size(),
                                model);
   }
+
+  for (const auto& input_array : model->flags.input_arrays()) {
+    if (input_array.has_shape()) {
+      CHECK(input_array.shape().dims_size());
+    }
+  }
+
+  model->flags.set_allow_nonascii_arrays(model_flags.allow_nonascii_arrays());
+  model->flags.set_allow_nonexistent_arrays(
+      model_flags.allow_nonexistent_arrays());
 }
 
 void CheckIsReadyForQuantization(const Model& model) {
@@ -1197,6 +1258,13 @@ int ElementSize(ArrayDataType data_type) {
       return 4;
     case ArrayDataType::kUint8:
       return 1;
+    case ArrayDataType::kInt64:
+      return 8;
+    // Usually not critical limitation because strings are only input and/or
+    // output.
+    case ArrayDataType::kString:
+      LOG(FATAL) << "Transient arrays with strings are not supported yet";
+      return 0;
     default:
       LOG(FATAL) << "Should not get here.";
       return 0;
@@ -1461,9 +1529,11 @@ void ShuffleDims(const Shape& input_shape, AxesOrder input_axes_order,
   }
 }
 
-void ShuffleArray(const Shape& input_shape, AxesOrder input_axes_order,
-                  AxesOrder output_axes_order, const Shape& output_shape,
-                  const float* input_data, float* output_data) {
+template <typename T>
+void ShuffleArrayTemplate(const Shape& input_shape, AxesOrder input_axes_order,
+                          AxesOrder output_axes_order,
+                          const Shape& output_shape, const T* input_data,
+                          T* output_data) {
   if (input_axes_order == AxesOrder::kHWIM &&
       output_axes_order == AxesOrder::k1HWO) {
     // This special case isn't just a permutation, the IM pair of dims get
@@ -1515,16 +1585,15 @@ void ShuffleArray(const Shape& input_shape, AxesOrder input_axes_order,
   const int output_stride_3 = output_stride_2 * output_size_2;
 
   for (int i3 = 0; i3 < output_size_3; i3++) {
-    const float* const input_ptr_3 = input_data + i3 * input_stride_3;
-    float* const output_ptr_3 = output_data + i3 * output_stride_3;
+    const T* const input_ptr_3 = input_data + i3 * input_stride_3;
+    T* const output_ptr_3 = output_data + i3 * output_stride_3;
     for (int i2 = 0; i2 < output_size_2; i2++) {
-      const float* const input_ptr_2 = input_ptr_3 + i2 * input_stride_2;
-      float* const output_ptr_2 = output_ptr_3 + i2 * output_stride_2;
+      const T* const input_ptr_2 = input_ptr_3 + i2 * input_stride_2;
+      T* const output_ptr_2 = output_ptr_3 + i2 * output_stride_2;
       for (int i1 = 0; i1 < output_size_1; i1++) {
-        const float* input_ptr = input_ptr_2 + i1 * input_stride_1;
-        float* output_ptr = output_ptr_2 + i1 * output_stride_1;
-        float* const output_ptr_end =
-            output_ptr + output_size_0 * output_stride_0;
+        const T* input_ptr = input_ptr_2 + i1 * input_stride_1;
+        T* output_ptr = output_ptr_2 + i1 * output_stride_1;
+        T* const output_ptr_end = output_ptr + output_size_0 * output_stride_0;
         while (output_ptr != output_ptr_end) {
           *output_ptr = *input_ptr;
           input_ptr += input_stride_0;
@@ -1535,6 +1604,20 @@ void ShuffleArray(const Shape& input_shape, AxesOrder input_axes_order,
   }
 }
 
+void ShuffleArray(const Shape& input_shape, AxesOrder input_axes_order,
+                  AxesOrder output_axes_order, const Shape& output_shape,
+                  const uint8* input_data, uint8* output_data) {
+  ShuffleArrayTemplate<uint8>(input_shape, input_axes_order, output_axes_order,
+                              output_shape, input_data, output_data);
+}
+
+void ShuffleArray(const Shape& input_shape, AxesOrder input_axes_order,
+                  AxesOrder output_axes_order, const Shape& output_shape,
+                  const float* input_data, float* output_data) {
+  ShuffleArrayTemplate<float>(input_shape, input_axes_order, output_axes_order,
+                              output_shape, input_data, output_data);
+}
+
 int AxesCount(AxesOrder axes_order) {
   switch (axes_order) {
     case AxesOrder::kOneAxis:
@@ -1571,11 +1654,13 @@ bool IsDiscardableArray(const Model& model, const string& array_name) {
     }
   }
   for (const auto& rnn_state : model.flags.rnn_states()) {
-    if (array_name == rnn_state.state_array()) {
-      return false;
-    }
-    if (array_name == rnn_state.back_edge_source_array()) {
-      return false;
+    if (!rnn_state.discardable()) {
+      if (array_name == rnn_state.state_array()) {
+        return false;
+      }
+      if (array_name == rnn_state.back_edge_source_array()) {
+        return false;
+      }
     }
   }
   return true;
diff --git a/tensorflow/contrib/lite/toco/tooling_util.h b/tensorflow/contrib/lite/toco/tooling_util.h
index d820d619d0de425407e88076082a3e0f8d4783a9..c81e77874e36d78ca3ee23f84f55596627e9c73d 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.h
+++ b/tensorflow/contrib/lite/toco/tooling_util.h
@@ -279,6 +279,9 @@ void ShuffleDims(const Shape& input_shape, AxesOrder input_axes_order,
 void ShuffleArray(const Shape& input_shape, AxesOrder input_axes_order,
                   AxesOrder output_axes_order, const Shape& output_shape,
                   const float* input_data, float* output_data);
+void ShuffleArray(const Shape& input_shape, AxesOrder input_axes_order,
+                  AxesOrder output_axes_order, const Shape& output_shape,
+                  const uint8* input_data, uint8* output_data);
 
 // Returns true if it may be OK for any graph transformation to ever discard
 // that array. The idea is that we can't ever discard arrays that are either
diff --git a/tensorflow/contrib/lite/tools/BUILD b/tensorflow/contrib/lite/tools/BUILD
index 21b32d8434204ca625ba0c5d3f371ee8061b77d7..389ef2323a376f33c0f539ef27a29c92b3d8be6e 100644
--- a/tensorflow/contrib/lite/tools/BUILD
+++ b/tensorflow/contrib/lite/tools/BUILD
@@ -6,6 +6,16 @@ licenses(["notice"])  # Apache 2.0
 
 load("//tensorflow:tensorflow.bzl", "tf_cc_binary")
 
+py_binary(
+    name = "visualize",
+    srcs = ["visualize.py"],
+    data = [
+        "//tensorflow/contrib/lite/schema:schema.fbs",
+        "@flatbuffers//:flatc",
+    ],
+    srcs_version = "PY2AND3",
+)
+
 tf_cc_binary(
     name = "generate_op_registrations",
     srcs = ["gen_op_registration_main.cc"],
@@ -13,6 +23,26 @@ tf_cc_binary(
         "//tensorflow/contrib/lite/tools:gen_op_registration",
         "//tensorflow/core:framework_internal",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+tf_cc_binary(
+    name = "benchmark_model",
+    srcs = ["benchmark_model.cc"],
+    linkopts = select({
+        "//tensorflow:android": [
+            "-pie",
+            "-landroid",
+            "-lm",
+            "-z defs",
+            "-Wl,--exclude-libs,ALL",  # Exclude syms in all libs from auto export
+        ],
+        "//conditions:default": [],
+    }),
+    deps = [
+        ":mutable_op_resolver",
+        "//tensorflow/contrib/lite/kernels:builtin_ops",
     ],
 )
 
diff --git a/tensorflow/contrib/lite/tools/benchmark_model.cc b/tensorflow/contrib/lite/tools/benchmark_model.cc
index f80949b23e417d074e070a28608688d8863765b5..6ae3ab57294a92162b15f326630ac202a9ba2a82 100644
--- a/tensorflow/contrib/lite/tools/benchmark_model.cc
+++ b/tensorflow/contrib/lite/tools/benchmark_model.cc
@@ -31,7 +31,12 @@ void RegisterSelectedOps(::tflite::MutableOpResolver* resolver);
 #endif
 
 #define LOG(x) std::cerr
-#define CHECK(x) if (!(x)) { LOG(ERROR) << #x << "failed"; exit(1); }
+
+#define CHECK(x)                  \
+  if (!(x)) {                     \
+    LOG(ERROR) << #x << "failed"; \
+    exit(1);                      \
+  }
 
 namespace tensorflow {
 namespace benchmark_tflite_model {
diff --git a/tensorflow/contrib/lite/tools/gen_op_registration_main.cc b/tensorflow/contrib/lite/tools/gen_op_registration_main.cc
index 1b28b8bcd97125a67bdf8eecb2c61a999a72425d..17b514c9169817479e18eecf5799ea4371f3b051 100644
--- a/tensorflow/contrib/lite/tools/gen_op_registration_main.cc
+++ b/tensorflow/contrib/lite/tools/gen_op_registration_main.cc
@@ -13,30 +13,50 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <cassert>
 #include <fstream>
+#include <map>
 #include <sstream>
 #include <string>
 #include <vector>
 
+#include "absl/strings/strip.h"
 #include "tensorflow/contrib/lite/tools/gen_op_registration.h"
 #include "tensorflow/core/platform/init_main.h"
 #include "tensorflow/core/util/command_line_flags.h"
 
+const char kInputModelFlag[] = "input_model";
+const char kOutputRegistrationFlag[] = "output_registration";
+const char kTfLitePathFlag[] = "tflite_path";
+
 using tensorflow::Flag;
 using tensorflow::Flags;
 using tensorflow::string;
 
+void ParseFlagAndInit(int argc, char** argv, string* input_model,
+                      string* output_registration, string* tflite_path) {
+  std::vector<tensorflow::Flag> flag_list = {
+      Flag(kInputModelFlag, input_model, "path to the tflite model"),
+      Flag(kOutputRegistrationFlag, output_registration,
+           "filename for generated registration code"),
+      Flag(kTfLitePathFlag, tflite_path, "Path to tensorflow lite dir"),
+  };
+
+  Flags::Parse(&argc, argv, flag_list);
+  tensorflow::port::InitMain(argv[0], &argc, &argv);
+}
+
 namespace {
 
-void GenerateFileContent(const string& filename,
+void GenerateFileContent(const std::string& tflite_path,
+                         const std::string& filename,
                          const std::vector<string>& builtin_ops,
                          const std::vector<string>& custom_ops) {
   std::ofstream fout(filename);
 
-  fout << "#include "
-          "\"third_party/tensorflow/contrib/lite/model.h\"\n";
-  fout << "#include "
-          "\"third_party/tensorflow/contrib/lite/tools/mutable_op_resolver.h\"\n";
+  fout << "#include \"" << tflite_path << "/model.h\"\n";
+  fout << "#include \"" << tflite_path << "/tools/mutable_op_resolver.h\"\n";
+
   fout << "namespace tflite {\n";
   fout << "namespace ops {\n";
   if (!builtin_ops.empty()) {
@@ -78,22 +98,20 @@ void GenerateFileContent(const string& filename,
 int main(int argc, char** argv) {
   string input_model;
   string output_registration;
-  std::vector<tensorflow::Flag> flag_list = {
-      Flag("input_model", &input_model, "path to the tflite model"),
-      Flag("output_registration", &output_registration,
-           "filename for generated registration code"),
-  };
-  Flags::Parse(&argc, argv, flag_list);
+  string tflite_path;
+  ParseFlagAndInit(argc, argv, &input_model, &output_registration,
+                   &tflite_path);
 
-  tensorflow::port::InitMain(argv[0], &argc, &argv);
   std::vector<string> builtin_ops;
   std::vector<string> custom_ops;
-
   std::ifstream fin(input_model);
   std::stringstream content;
   content << fin.rdbuf();
-  const ::tflite::Model* model = ::tflite::GetModel(content.str().data());
+  // Need to store content data first, otherwise, it won't work in bazel.
+  string content_str = content.str();
+  const ::tflite::Model* model = ::tflite::GetModel(content_str.data());
   ::tflite::ReadOpsFromModel(model, &builtin_ops, &custom_ops);
-  GenerateFileContent(output_registration, builtin_ops, custom_ops);
+  GenerateFileContent(tflite_path, output_registration, builtin_ops,
+                      custom_ops);
   return 0;
 }
diff --git a/tensorflow/contrib/lite/tools/mutable_op_resolver.h b/tensorflow/contrib/lite/tools/mutable_op_resolver.h
index 8206a5481d7c43a9c8fb8445d056dbc7f022cfcc..906553da570720a0c4b90bbd2eebb6d8bdea6bb8 100644
--- a/tensorflow/contrib/lite/tools/mutable_op_resolver.h
+++ b/tensorflow/contrib/lite/tools/mutable_op_resolver.h
@@ -20,15 +20,14 @@ limitations under the License.
 #include "tensorflow/contrib/lite/model.h"
 
 // Needed to resolve unordered_set hash on older compilers.
-namespace std
-{
-template<>
-  struct hash<tflite::BuiltinOperator> {
-    size_t operator()(const tflite::BuiltinOperator &op) const {
-      return std::hash<int>()(op);
-    }
-  };
-}
+namespace std {
+template <>
+struct hash<tflite::BuiltinOperator> {
+  size_t operator()(const tflite::BuiltinOperator& op) const {
+    return std::hash<int>()(op);
+  }
+};
+}  // namespace std
 
 namespace tflite {
 
@@ -47,7 +46,7 @@ class MutableOpResolver : public OpResolver {
   void AddCustom(const char* name, TfLiteRegistration* registration);
 
  private:
-  std::map<tflite::BuiltinOperator, TfLiteRegistration*> builtins_;
+  std::map<int, TfLiteRegistration*> builtins_;
   std::map<std::string, TfLiteRegistration*> custom_ops_;
 };
 
diff --git a/tensorflow/contrib/lite/tools/visualize.py b/tensorflow/contrib/lite/tools/visualize.py
new file mode 100644
index 0000000000000000000000000000000000000000..d0d78e3afab7d89f216bb8ceb42e4429ca4f1759
--- /dev/null
+++ b/tensorflow/contrib/lite/tools/visualize.py
@@ -0,0 +1,379 @@
+#!/usr/bin/env python
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""This tool creates an html visualization of a TensorFlow Lite graph.
+
+Example usage:
+
+python visualize.py foo.tflite foo.html
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import os
+import sys
+
+# Schema to use for flatbuffers
+_SCHEMA = "third_party/tensorflow/contrib/lite/schema/schema.fbs"
+
+# Where the binary will be once built in for the flatc converter
+_BINARY = "third_party/flatbuffers/flatc"
+
+# A CSS description for making the visualizer
+_CSS = """
+<html>
+<head>
+<style>
+body {font-family: sans-serif; background-color: #ffaa00;}
+table {background-color: #eeccaa;}
+th {background-color: black; color: white;}
+h1 {
+  background-color: ffaa00;
+  padding:5px;
+  color: black;
+}
+
+div {
+  border-radius: 5px;
+  background-color: #ffeecc;
+  padding:5px;
+  margin:5px;
+}
+
+.tooltip {color: blue;}
+.tooltip .tooltipcontent  {
+    visibility: hidden;
+    color: black;
+    background-color: yellow;
+    padding: 5px;
+    border-radius: 4px;
+    position: absolute;
+    z-index: 1;
+}
+.tooltip:hover .tooltipcontent {
+    visibility: visible;
+}
+
+.edges line {
+  stroke: #333333;
+}
+
+.nodes text {
+  color: black;
+  pointer-events: none;
+  font-family: sans-serif;
+  font-size: 11px;
+}
+</style>
+
+<script src="https://d3js.org/d3.v4.min.js"></script>
+
+</head>
+<body>
+"""
+
+_D3_HTML_TEMPLATE = """
+  <script>
+    // Build graph data
+    var graph = %s;
+
+    var svg = d3.select("#subgraph%d");
+    var width = svg.attr("width");
+    var height = svg.attr("height");
+    var color = d3.scaleOrdinal(d3.schemeCategory20);
+
+    var simulation = d3.forceSimulation()
+        .force("link", d3.forceLink().id(function(d) {return d.id;}))
+        .force("charge", d3.forceManyBody())
+        .force("center", d3.forceCenter(0.5 * width, 0.5 * height));
+
+
+    function buildGraph() {
+      var edge = svg.append("g").attr("class", "edges").selectAll("line")
+        .data(graph.edges).enter().append("line")
+      // Make the node group
+      var node = svg.selectAll(".nodes")
+        .data(graph.nodes)
+        .enter().append("g")
+        .attr("class", "nodes")
+          .call(d3.drag()
+              .on("start", function(d) {
+                if(!d3.event.active) simulation.alphaTarget(1.0).restart();
+                d.fx = d.x;d.fy = d.y;
+              })
+              .on("drag", function(d) {
+                d.fx = d3.event.x; d.fy = d3.event.y;
+              })
+              .on("end", function(d) {
+                if (!d3.event.active) simulation.alphaTarget(0);
+                d.fx = d.fy = null;
+              }));
+      // Within the group, draw a circle for the node position and text
+      // on the side.
+      node.append("circle")
+          .attr("r", "5px")
+          .attr("fill", function(d) { return color(d.group); })
+      node.append("text")
+          .attr("dx", 8).attr("dy", 5).text(function(d) { return d.name; });
+      // Setup force parameters and update position callback
+      simulation.nodes(graph.nodes).on("tick", forceSimulationUpdated);
+      simulation.force("link").links(graph.edges);
+
+      function forceSimulationUpdated() {
+        // Update edges.
+        edge.attr("x1", function(d) {return d.source.x;})
+            .attr("y1", function(d) {return d.source.y;})
+            .attr("x2", function(d) {return d.target.x;})
+            .attr("y2", function(d) {return d.target.y;});
+        // Update node positions
+        node.attr("transform", function(d) { return "translate(" + d.x + "," + d.y + ")"; });
+      }
+    }
+  buildGraph()
+</script>
+"""
+
+
+class OpCodeMapper(object):
+  """Maps an opcode index to an op name."""
+
+  def __init__(self, data):
+    self.code_to_name = {}
+    for idx, d in enumerate(data["operator_codes"]):
+      self.code_to_name[idx] = d["builtin_code"]
+
+  def __call__(self, x):
+    if x not in self.code_to_name:
+      s = "<UNKNOWN>"
+    else:
+      s = self.code_to_name[x]
+    return "%s (opcode=%d)" % (s, x)
+
+
+class DataSizeMapper(object):
+  """For buffers, report the number of bytes."""
+
+  def __call__(self, x):
+    if x is not None:
+      return "%d bytes" % len(x)
+    else:
+      return "--"
+
+
+class TensorMapper(object):
+  """Maps a list of tensor indices to a tooltip hoverable indicator of more."""
+
+  def __init__(self, subgraph_data):
+    self.data = subgraph_data
+
+  def __call__(self, x):
+    html = ""
+    html += "<span class='tooltip'><span class='tooltipcontent'>"
+    for i in x:
+      tensor = self.data["tensors"][i]
+      html += str(i) + " "
+      html += tensor["name"] + " "
+      html += str(tensor["type"]) + " "
+      html += repr(tensor["shape"]) + "<br>"
+    html += "</span>"
+    html += repr(x)
+    html += "</span>"
+    return html
+
+
+def GenerateGraph(subgraph_idx, g, opcode_mapper):
+  """Produces the HTML required to have a d3 visualization of the dag."""
+  def TensorName(idx):
+    return "t%d"%idx
+  def OpName(idx):
+    return "o%d"%idx
+  edges = []
+  nodes = []
+  first = {}
+  pixel_mult = 50  # TODO(aselle): multiplier for initial placement
+  for op_index, op in enumerate(g["operators"]):
+    for tensor_input_position, tensor_index in enumerate(op["inputs"]):
+      if tensor_index not in first:
+        first[tensor_index] = (
+            op_index*pixel_mult,
+            tensor_input_position*pixel_mult - pixel_mult/2)
+      edges.append(
+          {"source": TensorName(tensor_index), "target": OpName(op_index)})
+    for tensor_index in op["outputs"]:
+      edges.append(
+          {"target": TensorName(tensor_index), "source": OpName(op_index)})
+    nodes.append({"id": OpName(op_index),
+                  "name": opcode_mapper(op["opcode_index"]),
+                  "group": 2,
+                  "x": pixel_mult,
+                  "y": op_index * pixel_mult})
+  for tensor_index, tensor in enumerate(g["tensors"]):
+    initial_y = (first[tensor_index] if tensor_index in first
+                 else len(g["operators"]))
+
+    nodes.append({"id": TensorName(tensor_index),
+                  "name": "%s (%d)" % (tensor["name"], tensor_index),
+                  "group": 1,
+                  "x": 2,
+                  "y": initial_y})
+  graph_str = json.dumps({"nodes": nodes, "edges": edges})
+
+  html = _D3_HTML_TEMPLATE % (graph_str, subgraph_idx)
+  return html
+
+
+def GenerateTableHtml(items, keys_to_print, display_index=True):
+  """Given a list of object values and keys to print, make an HTML table.
+
+  Args:
+    items: Items to print an array of dicts.
+    keys_to_print: (key, display_fn). `key` is a key in the object. i.e.
+      items[0][key] should exist. display_fn is the mapping function on display.
+      i.e. the displayed html cell will have the string returned by
+      `mapping_fn(items[0][key])`.
+    display_index: add a column which is the index of each row in `items`.
+  Returns:
+    An html table.
+  """
+  html = ""
+  # Print the list of  items
+  html += "<table><tr>\n"
+  html += "<tr>\n"
+  if display_index:
+    html += "<th>index</th>"
+  for h, mapper in keys_to_print:
+    html += "<th>%s</th>" % h
+  html += "</tr>\n"
+  for idx, tensor in enumerate(items):
+    html += "<tr>\n"
+    if display_index:
+      html += "<td>%d</td>" % idx
+    # print tensor.keys()
+    for h, mapper in keys_to_print:
+      val = tensor[h] if h in tensor else None
+      val = val if mapper is None else mapper(val)
+      html += "<td>%s</td>\n"%val
+
+    html += "</tr>\n"
+  html += "</table>\n"
+  return html
+
+
+def CreateHtmlFile(tflite_input, html_output):
+  """Given a tflite model in `tflite_input` file, produce html description."""
+
+  # Convert the model into a JSON flatbuffer using flatc (build if doesn't
+  # exist.
+  if  not os.path.exists(tflite_input):
+    raise RuntimeError("Invalid filename %r" % tflite_input)
+  if tflite_input.endswith(".tflite") or tflite_input.endswith(".bin"):
+
+    # Run convert
+    cmd = (_BINARY + " -t "
+           "--strict-json --defaults-json -o /tmp {schema} -- {input}".format(
+               input=tflite_input, schema=_SCHEMA))
+    print(cmd)
+    os.system(cmd)
+    real_output = ("/tmp/"+ os.path.splitext(os.path.split(tflite_input)[-1])[0]
+                   + ".json")
+
+    data = json.load(open(real_output))
+  elif tflite_input.endswith(".json"):
+    data = json.load(open(tflite_input))
+  else:
+    raise RuntimeError("Input file was not .tflite or .json")
+  html = ""
+  html += _CSS
+  html += "<h1>TensorFlow Lite Model</h2>"
+
+  data["filename"] = tflite_input  # Avoid special case
+  toplevel_stuff = [("filename", None), ("version", None),
+                    ("description", None)]
+
+  html += "<table>\n"
+  for key, mapping in toplevel_stuff:
+    if not mapping: mapping = lambda x: x
+    html += "<tr><th>%s</th><td>%s</td></tr>\n" % (key, mapping(data[key]))
+  html += "</table>\n"
+
+  # Spec on what keys to display
+  buffer_keys_to_display = [("data", DataSizeMapper())]
+  operator_keys_to_display = [("builtin_code", None)]
+
+  for subgraph_idx, g in enumerate(data["subgraphs"]):
+    # Subgraph local specs on what to display
+    html += "<div class='subgraph'>"
+    tensor_mapper = TensorMapper(g)
+    opcode_mapper = OpCodeMapper(data)
+    op_keys_to_display = [
+        ("inputs", tensor_mapper), ("outputs", tensor_mapper),
+        ("builtin_options", None), ("opcode_index", opcode_mapper)]
+    tensor_keys_to_display = [
+        ("name", None), ("type", None), ("shape", None), ("buffer", None),
+        ("quantization", None)]
+
+    html += "<h2>Subgraph %d</h2>\n" % subgraph_idx
+
+    # Inputs and outputs.
+    html += "<h3>Inputs/Outputs</h3>\n"
+    html += GenerateTableHtml([{"inputs": g["inputs"],
+                                "outputs": g["outputs"]}],
+                              [("inputs", tensor_mapper),
+                               ("outputs", tensor_mapper)],
+                              display_index=False)
+
+    # Print the tensors.
+    html += "<h3>Tensors</h3>\n"
+    html += GenerateTableHtml(g["tensors"], tensor_keys_to_display)
+
+    # Print the ops.
+    html += "<h3>Ops</h3>\n"
+    html += GenerateTableHtml(g["operators"], op_keys_to_display)
+
+    # Visual graph.
+    html += "<svg id='subgraph%d' width='960' height='1600'></svg>\n" % (
+        subgraph_idx,)
+    html += GenerateGraph(subgraph_idx, g, opcode_mapper)
+    html += "</div>"
+
+  # Buffers have no data, but maybe in the future they will
+  html += "<h2>Buffers</h2>\n"
+  html += GenerateTableHtml(data["buffers"], buffer_keys_to_display)
+
+  # Operator codes
+  html += "<h2>Operator Codes</h2>\n"
+  html += GenerateTableHtml(data["operator_codes"],
+                            operator_keys_to_display)
+
+  html += "</body></html>\n"
+
+  open(html_output, "w").write(html)
+
+
+def main(argv):
+  try:
+    tflite_input = argv[1]
+    html_output = argv[2]
+  except IndexError:
+    print ("Usage: %s <input tflite> <output html>" % (argv[0]))
+  else:
+    CreateHtmlFile(tflite_input, html_output)
+
+if __name__ == "__main__":
+  main(sys.argv)
+
diff --git a/tensorflow/contrib/lookup/BUILD b/tensorflow/contrib/lookup/BUILD
index b7b5418fe91e496f021b44fc32a33d2a549782e5..8ca03f4193f260ce32f942ccaf76a8260b282156 100644
--- a/tensorflow/contrib/lookup/BUILD
+++ b/tensorflow/contrib/lookup/BUILD
@@ -7,7 +7,7 @@ exports_files(["LICENSE"])
 
 package(default_visibility = ["//tensorflow:internal"])
 
-load("//tensorflow:tensorflow.bzl", "py_test")
+load("//tensorflow:tensorflow.bzl", "tf_py_test")
 
 # TODO(yleon): Refactor after one we switching to the V2 kernels.
 py_library(
@@ -26,13 +26,14 @@ py_library(
     ],
 )
 
-py_test(
+tf_py_test(
     name = "lookup_ops_test",
     size = "small",
     srcs = ["lookup_ops_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
+    additional_deps = [
         ":lookup_py",
+        "//third_party/py/numpy",
+        "@six_archive//:six",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
@@ -43,9 +44,8 @@ py_test(
         "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:training",
         "//tensorflow/python:variables",
-        "//third_party/py/numpy",
-        "@six_archive//:six",
     ],
+    grpc_enabled = True,
 )
 
 filegroup(
diff --git a/tensorflow/contrib/lookup/lookup_ops.py b/tensorflow/contrib/lookup/lookup_ops.py
index 66caa6a2e5d17f74706965b7ca3f7928d63ae130..a430dac4ec43ce31f0b5aaae5e7b0b51d25c9632 100644
--- a/tensorflow/contrib/lookup/lookup_ops.py
+++ b/tensorflow/contrib/lookup/lookup_ops.py
@@ -399,7 +399,7 @@ class MutableHashTable(LookupInterface):
     Raises:
       TypeError: when `keys` do not match the table data types.
     """
-    if keys.dtype != self._key_dtype:
+    if keys.dtype.base_dtype != self._key_dtype:
       raise TypeError("Signature mismatch. Keys must be dtype %s, got %s." %
                       (self._key_dtype, keys.dtype))
 
@@ -600,7 +600,7 @@ class MutableDenseHashTable(LookupInterface):
     Raises:
       TypeError: when `keys` do not match the table data types.
     """
-    if keys.dtype != self._key_dtype:
+    if keys.dtype.base_dtype != self._key_dtype:
       raise TypeError("Signature mismatch. Keys must be dtype %s, got %s." %
                       (self._key_dtype, keys.dtype))
 
diff --git a/tensorflow/contrib/lookup/lookup_ops_test.py b/tensorflow/contrib/lookup/lookup_ops_test.py
index f0499010d476f68da541ee67d085b12e48faeaf5..f681b7b132750ef80aa56f25143418fbc4eaa1bb 100644
--- a/tensorflow/contrib/lookup/lookup_ops_test.py
+++ b/tensorflow/contrib/lookup/lookup_ops_test.py
@@ -187,6 +187,11 @@ class HashTableOpTest(test.TestCase):
           lookup.KeyValueTensorInitializer(keys, values), default_val)
       table.init.run()
 
+      # Ref types do not produce a lookup signature mismatch.
+      input_string_ref = variables.Variable("brain")
+      variables.global_variables_initializer().run()
+      self.assertEqual(0, table.lookup(input_string_ref).eval())
+
       input_string = constant_op.constant([1, 2, 3], dtypes.int64)
       with self.assertRaises(TypeError):
         table.lookup(input_string)
@@ -629,6 +634,17 @@ class MutableHashTableOpTest(test.TestCase):
       table.insert(keys, values).run()
       self.assertAllEqual(3, table.size().eval())
 
+      input_string_ref = variables.Variable("brain")
+      input_int64_ref = variables.Variable(-1, dtype=dtypes.int64)
+      variables.global_variables_initializer().run()
+
+      # Ref types do not produce an insert signature mismatch.
+      table.insert(input_string_ref, input_int64_ref).run()
+      self.assertAllEqual(3, table.size().eval())
+
+      # Ref types do not produce a lookup signature mismatch.
+      self.assertEqual(-1, table.lookup(input_string_ref).eval())
+
       # lookup with keys of the wrong type
       input_string = constant_op.constant([1, 2, 3], dtypes.int64)
       with self.assertRaises(TypeError):
@@ -1640,23 +1656,22 @@ class InitializeTableFromFileOpTest(test.TestCase):
       f.write("\n".join(values) + "\n")
     return vocabulary_file
 
+  @test_util.run_in_graph_and_eager_modes()
   def testInitializeStringTable(self):
     vocabulary_file = self._createVocabFile("one_column_1.txt")
+    default_value = -1
+    table = lookup.HashTable(
+        lookup.TextFileInitializer(vocabulary_file, dtypes.string,
+                                   lookup.TextFileIndex.WHOLE_LINE,
+                                   dtypes.int64,
+                                   lookup.TextFileIndex.LINE_NUMBER),
+        default_value)
+    self.evaluate(table.init)
 
-    with self.test_session():
-      default_value = -1
-      table = lookup.HashTable(
-          lookup.TextFileInitializer(vocabulary_file, dtypes.string,
-                                     lookup.TextFileIndex.WHOLE_LINE,
-                                     dtypes.int64,
-                                     lookup.TextFileIndex.LINE_NUMBER),
-          default_value)
-      table.init.run()
-
-      output = table.lookup(constant_op.constant(["brain", "salad", "tank"]))
+    output = table.lookup(constant_op.constant(["brain", "salad", "tank"]))
 
-      result = output.eval()
-      self.assertAllEqual([0, 1, -1], result)
+    result = self.evaluate(output)
+    self.assertAllEqual([0, 1, -1], result)
 
   def testInitializeInt64Table(self):
     vocabulary_file = self._createVocabFile(
diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile
index 617ef25fa4b9da64bdb155c3f30874dc97784166..dd5770dc996b3efab8647a5e3ee4a069593c679b 100644
--- a/tensorflow/contrib/makefile/Makefile
+++ b/tensorflow/contrib/makefile/Makefile
@@ -89,7 +89,6 @@ HOST_INCLUDES := \
 -I$(MAKEFILE_DIR)/downloads/gemmlowp \
 -I$(MAKEFILE_DIR)/downloads/nsync/public \
 -I$(MAKEFILE_DIR)/downloads/fft2d \
--I$(MAKEFILE_DIR)/downloads/double_conversion \
 -I$(HOST_GENDIR)
 ifeq ($(HAS_GEN_HOST_PROTOC),true)
 	HOST_INCLUDES += -I$(MAKEFILE_DIR)/gen/protobuf-host/include
@@ -126,9 +125,7 @@ PROTO_TEXT := $(HOST_BINDIR)proto_text
 # The list of dependencies is derived from the Bazel build file by running
 # the gen_file_lists.sh script on a system with a working Bazel setup.
 PROTO_TEXT_CC_FILES := $(shell cat $(MAKEFILE_DIR)/proto_text_cc_files.txt)
-PROTO_TEXT_PB_CC_LIST := \
-	$(shell cat $(MAKEFILE_DIR)/proto_text_pb_cc_files.txt) \
-	$(wildcard tensorflow/contrib/makefile/downloads/double_conversion/double-conversion/*.cc)
+PROTO_TEXT_PB_CC_LIST := $(shell cat $(MAKEFILE_DIR)/proto_text_pb_cc_files.txt)
 PROTO_TEXT_PB_H_LIST := $(shell cat $(MAKEFILE_DIR)/proto_text_pb_h_files.txt)
 
 # Locations of the intermediate files proto_text generates.
@@ -174,7 +171,6 @@ INCLUDES := \
 -I$(MAKEFILE_DIR)/downloads/gemmlowp \
 -I$(MAKEFILE_DIR)/downloads/nsync/public \
 -I$(MAKEFILE_DIR)/downloads/fft2d \
--I$(MAKEFILE_DIR)/downloads/double_conversion \
 -I$(PROTOGENDIR) \
 -I$(PBTGENDIR)
 ifeq ($(HAS_GEN_HOST_PROTOC),true)
@@ -304,7 +300,7 @@ ifeq ($(TARGET),ANDROID)
 	ifeq ($(ANDROID_ARCH),x86_64)
 		TOOLCHAIN := x86_64-4.9
 		SYSROOT_ARCH := x86_64
-		BIN_PREFIX := x86-64-linux-android
+		BIN_PREFIX := x86_64-linux-android
 		MARCH_OPTION :=
 	endif
     
@@ -330,8 +326,6 @@ $(MARCH_OPTION) \
 -I$(MAKEFILE_DIR)/downloads/gemmlowp \
 -I$(MAKEFILE_DIR)/downloads/nsync/public \
 -I$(MAKEFILE_DIR)/downloads/fft2d \
--I$(MAKEFILE_DIR)/downloads/double_conversion \
--I$(MAKEFILE_DIR)/gen/protobuf/include \
 -I$(MAKEFILE_DIR)/gen/protobuf_android/$(ANDROID_ARCH)/include \
 -I$(PROTOGENDIR) \
 -I$(PBTGENDIR)
@@ -380,12 +374,72 @@ $(MARCH_OPTION) \
 	ifdef ENABLE_EXPERIMENTAL_HEXNN_OPS
 		CXXFLAGS += -DENABLE_EXPERIMENTAL_HEXNN_OPS
 	endif
-	
-	OBJDIR := $(OBJDIR)android_$(ANDROID_ARCH)/
-	LIBDIR := $(LIBDIR)android_$(ANDROID_ARCH)/
-	BINDIR := $(BINDIR)android_$(ANDROID_ARCH)/
-	DEPDIR := $(DEPDIR)android_$(ANDROID_ARCH)/
 
+	ifeq ($(BUILD_FOR_TEGRA),1)
+		NVCC := $(JETPACK)/cuda/bin/nvcc
+		NVCCFLAGS := -x=cu -D__CUDACC__ -DNVCC -DNVIDIA_TEGRA -ccbin $(NDK_ROOT)/toolchains/$(TOOLCHAIN)/prebuilt/$(ANDROID_HOST_OS_ARCH)/bin/$(BIN_PREFIX)-g++ --std c++11 --expt-relaxed-constexpr -m64 -gencode arch=compute_53,\"code=sm_53\" -gencode arch=compute_62,\"code=sm_62\" -DEIGEN_AVOID_STL_ARRAY -DTENSORFLOW_USE_EIGEN_THREADPOOL -DLANG_CXX11 -DEIGEN_HAS_C99_MATH -DGOOGLE_CUDA=1 -DTF_EXTRA_CUDA_CAPABILITIES=5.3
+		CXXFLAGS4NVCC =\
+-DIS_SLIM_BUILD \
+-DNVIDIA_TEGRA \
+-fno-exceptions \
+-DNDEBUG $(OPTFLAGS) \
+-march=armv8-a \
+-fPIE \
+-D__ANDROID_TYPES_FULL__ \
+--sysroot $(NDK_ROOT)/platforms/android-21/arch-arm64
+
+		CXXFLAGS +=\
+-DGOOGLE_CUDA=1 \
+-D__ANDROID_TYPES_FULL__ \
+-DNVIDIA_TEGRA \
+-DEIGEN_AVOID_STL_ARRAY \
+-DEIGEN_HAS_C99_MATH \
+-DLANG_CXX11 -DTENSORFLOW_USE_EIGEN_THREADPOOL -DTF_EXTRA_CUDA_CAPABILITIES=5.3
+
+		INCLUDES += \
+-Itensorflow/core/kernels \
+-I$(MAKEFILE_DIR)/downloads/cub \
+-I$(MAKEFILE_DIR)/downloads/cub/cub_archive/cub/device \
+-Ithird_party/toolchains/gpus/cuda \
+-I$(JETPACK)/cuda/include \
+-I$(JETPACK) \
+-I$(JETPACK)/cuDNN/aarch64 \
+-I$(JETPACK)/cuda/extras/CUPTI/include
+
+
+		LIBS += \
+-ltfcuda \
+-lcudart_static \
+-lcudnn \
+-lcublas_static \
+-lcufftw_static \
+-lcusolver_static \
+-lcusparse_static \
+-lcufft \
+-lcuda \
+-lculibos \
+-lcurand_static
+
+		OBJDIR := $(OBJDIR)Tegra/
+		LIBDIR := $(LIBDIR)Tegra/
+		BINDIR := $(BINDIR)Tegra/
+		DEPDIR := $(DEPDIR)Tegra/
+
+		TEGRA_LIBS := \
+-L$(JETPACK)/cuda/targets/aarch64-linux-androideabi/lib \
+-L$(JETPACK)/cuda/targets/aarch64-linux-androideabi/lib/stubs \
+-L$(JETPACK)/cuda/targets/aarch64-linux-androideabi/lib64 \
+-L$(JETPACK)/cuda/targets/aarch64-linux-androideabi/lib64/stubs \
+-L$(JETPACK)/cuDNN/aarch64/cuda/lib64 \
+-L$(LIBDIR)
+
+		CUDA_LIB_DEPS := $(LIBDIR)libtfcuda.a
+	else
+		OBJDIR := $(OBJDIR)android_$(ANDROID_ARCH)/
+		LIBDIR := $(LIBDIR)android_$(ANDROID_ARCH)/
+		BINDIR := $(BINDIR)android_$(ANDROID_ARCH)/
+		DEPDIR := $(DEPDIR)android_$(ANDROID_ARCH)/
+	endif # ifeq ($(BUILD_FOR_TEGRA),1)
 endif  # ANDROID
 # LINT.ThenChange(//tensorflow/contrib/android/cmake/CMakeLists.txt)
 
@@ -549,7 +603,6 @@ $(wildcard tensorflow/core/platform/*/*.cc) \
 $(wildcard tensorflow/core/platform/*/*/*.cc) \
 $(wildcard tensorflow/core/util/*.cc) \
 $(wildcard tensorflow/core/util/*/*.cc) \
-$(wildcard tensorflow/contrib/makefile/downloads/double_conversion/double-conversion/*.cc) \
 tensorflow/core/util/version_info.cc
 # Remove duplicates (for version_info.cc)
 CORE_CC_ALL_SRCS := $(sort $(CORE_CC_ALL_SRCS))
@@ -592,6 +645,65 @@ $(wildcard tensorflow/core/common_runtime/gpu_device_factory.*) \
 $(wildcard tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.*) \
 $(wildcard tensorflow/core/grappler/inputs/file_input_yielder.*) \
 $(wildcard tensorflow/core/grappler/clusters/single_machine.*)
+
+ifeq ($(BUILD_FOR_TEGRA),1)
+CORE_CC_ALL_SRCS := \
+$(wildcard tensorflow/core/*.cc) \
+$(wildcard tensorflow/core/common_runtime/*.cc) \
+$(wildcard tensorflow/core/common_runtime/gpu/*.cc) \
+$(wildcard tensorflow/core/framework/*.cc) \
+$(wildcard tensorflow/core/graph/*.cc) \
+$(wildcard tensorflow/core/platform/*.cc) \
+$(wildcard tensorflow/core/platform/*/*.cc) \
+$(wildcard tensorflow/core/platform/*/*/*.cc) \
+$(wildcard tensorflow/core/util/*.cc) \
+$(wildcard tensorflow/core/util/*/*.cc) \
+$(wildcard tensorflow/cc/training/*.cc) \
+$(wildcard tensorflow/stream_executor/*.cc) \
+$(wildcard tensorflow/stream_executor/*/*.cc) \
+$(wildcard tensorflow/core/grappler/optimizers/*.cc) \
+$(wildcard tensorflow/core/grappler/*.cc) \
+$(wildcard tensorflow/core/grappler/costs/*.cc) \
+$(wildcard tensorflow/core/grappler/clusters/*.cc) \
+$(wildcard tensorflow/core/grappler/utils/*.cc) \
+$(wildcard tensorflow/core/lib/core/*.cc) \
+$(wildcard tensorflow/core/lib/*/*.cc) \
+tensorflow/core/grappler/inputs/utils.cc \
+tensorflow/core/kernels/concat_lib_gpu.cc \
+tensorflow/core/kernels/cuda_solvers.cc \
+tensorflow/core/kernels/cudnn_pooling_gpu.cc \
+tensorflow/core/kernels/dense_update_functor.cc \
+tensorflow/core/kernels/fractional_avg_pool_op.cc \
+tensorflow/core/kernels/fractional_max_pool_op.cc \
+tensorflow/core/kernels/fractional_pool_common.cc \
+tensorflow/core/kernels/pooling_ops_3d.cc \
+tensorflow/core/kernels/sparse_fill_empty_rows_op.cc
+
+CORE_CC_EXCLUDE_SRCS := \
+$(wildcard tensorflow/core/*/*test.cc) \
+$(wildcard tensorflow/core/*/*testutil*) \
+$(wildcard tensorflow/core/*/*testlib*) \
+$(wildcard tensorflow/core/*/*/*test.cc) \
+$(wildcard tensorflow/core/*/*/*testutil*) \
+$(wildcard tensorflow/core/framework/op_gen_lib.cc) \
+$(wildcard tensorflow/core/lib/gif/*) \
+$(wildcard tensorflow/core/lib/jpeg/*) \
+$(wildcard tensorflow/core/lib/png/*) \
+$(wildcard tensorflow/core/lib/db/*) \
+$(wildcard tensorflow/core/platform/jpeg.*) \
+$(wildcard tensorflow/core/platform/png.*) \
+$(wildcard tensorflow/core/platform/cloud/*) \
+$(wildcard tensorflow/core/platform/s3/*) \
+$(wildcard tensorflow/core/platform/windows/*) \
+$(wildcard tensorflow/core/*/*/*testlib*) \
+$(wildcard tensorflow/cc/training/*test.cc) \
+tensorflow/core/lib/io/record_reader.cc \
+tensorflow/core/util/cuda_kernel_helper_test.cu.cc
+
+CUDA_CC_SRCS := $(wildcard tensorflow/core/kernels/*.cu.cc)
+CUDA_CC_OBJS := $(addprefix $(OBJDIR), $(CUDA_CC_SRCS:.cc=.o))
+endif  # TEGRA
+
 # Filter out all the excluded files.
 TF_CC_SRCS := $(filter-out $(CORE_CC_EXCLUDE_SRCS), $(CORE_CC_ALL_SRCS))
 # Add in any extra files that don't fit the patterns easily
@@ -644,11 +756,23 @@ $(LIB_PATH): $(LIB_OBJS)
 	@mkdir -p $(dir $@)
 	$(AR) $(ARFLAGS) $(LIB_PATH) $(LIB_OBJS)
 
-$(BENCHMARK_NAME): $(BENCHMARK_OBJS) $(LIB_PATH)
+$(BENCHMARK_NAME): $(BENCHMARK_OBJS) $(LIB_PATH) $(CUDA_LIB_DEPS)
 	@mkdir -p $(dir $@)
 	$(CXX) $(CXXFLAGS) $(INCLUDES) \
 	-o $(BENCHMARK_NAME) $(BENCHMARK_OBJS) \
-	$(LIBFLAGS) $(LIB_PATH) $(LDFLAGS) $(LIBS)
+	$(LIBFLAGS) $(TEGRA_LIBS) $(LIB_PATH) $(LDFLAGS) $(LIBS)
+
+# NVCC compilation rules for Tegra
+ifeq ($(BUILD_FOR_TEGRA),1)
+$(OBJDIR)%.cu.o: %.cu.cc
+	@mkdir -p $(dir $@)
+	@mkdir -p $(dir $(DEPDIR)$*)
+	$(NVCC) $(NVCCFLAGS) -Xcompiler "$(CXXFLAGS4NVCC) $(DEPFLAGS)" $(INCLUDES) -c $< -o $@
+
+$(LIBDIR)libtfcuda.a: $(CUDA_CC_OBJS)
+	@mkdir -p $(dir $@)
+	$(AR) $(ARFLAGS) $@ $(CUDA_CC_OBJS)
+endif
 
 # Matches on the normal hand-written TensorFlow C++ source files.
 $(OBJDIR)%.o: %.cc | $(PBT_GEN_FILES)
@@ -737,6 +861,7 @@ clean_except_protobuf_libs:
 cleantarget:
 	rm -rf $(OBJDIR)
 	rm -rf $(BINDIR)
+	rm -rf $(LIBDIR)
 
 $(DEPDIR)/%.d: ;
 .PRECIOUS: $(DEPDIR)/%.d
diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md
index 9345303ff11462a447ed6299b0ac3cba558ea68b..0613de2cabe2065f1e4a816f2295d41b69159c10 100644
--- a/tensorflow/contrib/makefile/README.md
+++ b/tensorflow/contrib/makefile/README.md
@@ -262,6 +262,14 @@ to register ops and kernels.
 
 #### Optimization
 
+The `build_all_ios.sh` script can take optional command-line arguments to
+selectively register only for the operators used in your graph.
+
+```bash
+tensorflow/contrib/makefile/build_all_ios.sh -a arm64 -g $HOME/graphs/inception/tensorflow_inception_graph.pb
+```
+Please note this is an aggresive optimization of the operators and the resulting library may not work with other graphs but will reduce the size of the final library.
+
 The `compile_ios_tensorflow.sh` script can take optional command-line arguments.
 The first argument will be passed as a C++ optimization flag and defaults to
 debug mode. If you are concerned about performance or are working on a release
diff --git a/tensorflow/contrib/makefile/build_all_android.sh b/tensorflow/contrib/makefile/build_all_android.sh
index 81cb17a311fd94aa397eb7a766cd8c668268759a..980a44a5952a098da8a00e666d37a6d1642f4095 100755
--- a/tensorflow/contrib/makefile/build_all_android.sh
+++ b/tensorflow/contrib/makefile/build_all_android.sh
@@ -26,7 +26,7 @@ usage() {
   echo "-x [hexagon library path] copy and hexagon libraries in the specified path"
   echo "-a [architecture] Architecture of target android [default=armeabi-v7a] \
 (supported architecture list: \
-arm64-v8a armeabi armeabi-v7a mips mips64 x86 x86_64)"
+arm64-v8a armeabi armeabi-v7a mips mips64 x86 x86_64 tegra)"
   exit 1
 }
 
@@ -50,6 +50,26 @@ while getopts "Es:t:Tx:a:" opt_name; do
 done
 shift $((OPTIND - 1))
 
+if [ "$ARCH" == "tegra" ]; then
+    if [[ -z "${JETPACK}" ]]; then
+        export JETPACK="$HOME/JetPack_Android_3.0"
+    fi
+    if [ ! -d ${JETPACK} ]; then
+        echo "Can't find Jetpack at ${JETPACK}"
+        echo "Set JETPACK=<path to Jetpack Android> to specify a non-default Jetpack path"
+        exit -1
+    fi
+    if [ ! -d ${JETPACK}/cuda ]; then
+        ln -s $(ls -d ${JETPACK}/cuda-*/|sort -r|head -n1) ${JETPACK}/cuda
+    fi
+    if [ ! -d ${JETPACK}/cuda ]; then
+        ln -s $(ls -d ${JETPACK}/cuda-*/|sort -r|head -n1) ${JETPACK}/cuda
+    fi
+
+    export BUILD_FOR_TEGRA=1
+    ARCH="arm64-v8a"
+fi
+
 # Make sure we're in the correct directory, at the root of the source tree.
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null && pwd)"
 cd "${SCRIPT_DIR}"/../../../
diff --git a/tensorflow/contrib/makefile/build_all_ios.sh b/tensorflow/contrib/makefile/build_all_ios.sh
index 988e12b48287300004cc23c31cb4a20e63f72a27..a18df256f976c3c0ac4cefe1c884d951e63ef823 100755
--- a/tensorflow/contrib/makefile/build_all_ios.sh
+++ b/tensorflow/contrib/makefile/build_all_ios.sh
@@ -26,13 +26,16 @@ fi
 usage() {
   echo "Usage: $(basename "$0") [-a:T]"
   echo "-a [build_arch] build only for specified arch x86_64 [default=all]"
+  echo "-g [graph] optimize and selectively register ops only for this graph"
   echo "-T only build tensorflow (dont download other deps etc)"
   exit 1
 }
 
-while getopts "a:T" opt_name; do
+DEFAULT_ARCH="i386 x86_64 armv7 armv7s arm64"
+while getopts "a:g:T" opt_name; do
   case "$opt_name" in
     a) BUILD_ARCH="${OPTARG}";;
+    g) OPTIMIZE_FOR_GRAPH="${OPTARG}";;
     T) ONLY_MAKE_TENSORFLOW="true";;
     *) usage;;
   esac
@@ -42,7 +45,8 @@ shift $((OPTIND - 1))
 
 # Make sure we're in the correct directory, at the root of the source tree.
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-cd ${SCRIPT_DIR}/../../../
+TOP_SRCDIR="${SCRIPT_DIR}/../../../"
+cd ${TOP_SRCDIR}
 
 source "${SCRIPT_DIR}/build_helper.subr"
 JOB_COUNT="${JOB_COUNT:-$(get_job_count)}"
@@ -56,6 +60,32 @@ if [[ -n MACOSX_DEPLOYMENT_TARGET ]]; then
     export MACOSX_DEPLOYMENT_TARGET=$(sw_vers -productVersion)
 fi
 
+PRNT_SLCTV_BIN="${TOP_SRCDIR}bazel-bin/tensorflow/python/tools/print_selective_registration_header"
+
+if [[ ! -z "${OPTIMIZE_FOR_GRAPH}" ]]; then
+    echo "Request to optimize for graph: ${OPTIMIZE_FOR_GRAPH}"
+    #Request to trim the OPs by selectively registering
+    if [ ! -f ${PRNT_SLCTV_BIN} ]; then
+        #Build bazel build tensorflow/python/tools:print_selective_registration_header
+        echo "${PRNT_SLCTV_BIN} not found. Trying to build it"
+        cd ${TOP_SRCDIR}
+        bazel build --copt="-DUSE_GEMM_FOR_CONV" tensorflow/python/tools:print_selective_registration_header
+         if [ ! -f ${PRNT_SLCTV_BIN} ]; then
+            echo "Building print_selective_registration_header failed"
+            echo "You may want to build TensorFlow with: "
+            echo "./configure"
+            echo "bazel build --copt="-DUSE_GEMM_FOR_CONV" tensorflow/python/tools:print_selective_registration_header"
+            echo "and then run this script again"
+            exit 1
+        fi
+    else
+        echo "${PRNT_SLCTV_BIN} found. Using it"
+        ${PRNT_SLCTV_BIN} --graphs=${OPTIMIZE_FOR_GRAPH} > ${TOP_SRCDIR}/tensorflow/core/framework/ops_to_register.h
+
+    fi
+
+fi
+
 if [[ "${ONLY_MAKE_TENSORFLOW}" != "true" ]]; then
     # Remove any old files first.
     make -f tensorflow/contrib/makefile/Makefile clean
@@ -64,8 +94,13 @@ if [[ "${ONLY_MAKE_TENSORFLOW}" != "true" ]]; then
     # Pull down the required versions of the frameworks we need.
     tensorflow/contrib/makefile/download_dependencies.sh
 
-    # Compile protobuf for the target iOS device architectures.
-    tensorflow/contrib/makefile/compile_ios_protobuf.sh
+    if [[ -z "${BUILD_ARCH}" ]]; then
+        # Compile protobuf for the target iOS device architectures.
+        tensorflow/contrib/makefile/compile_ios_protobuf.sh -a ${DEFAULT_ARCH}
+    else
+        # Compile protobuf for the target iOS device architectures.
+        tensorflow/contrib/makefile/compile_ios_protobuf.sh -a ${BUILD_ARCH}
+    fi
 fi
 
 # Compile nsync for the target iOS device architectures.
@@ -80,13 +115,24 @@ else
 fi
 export HOST_NSYNC_LIB TARGET_NSYNC_LIB
 
-if [[ -z "${BUILD_ARCH}" ]]; then
-    # build the ios tensorflow libraries.
-    tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -h $HOST_NSYNC_LIB -n $TARGET_NSYNC_LIB
-else
+TF_CC_FLAGS="-O3"
+TF_SCRIPT_FLAGS="-h ${HOST_NSYNC_LIB} -n ${TARGET_NSYNC_LIB}"
+
+if [[ ! -z "${OPTIMIZE_FOR_GRAPH}" ]]; then
+    # arch specified so build just that
+    TF_CC_FLAGS="${TF_CC_FLAGS} -DANDROID_TYPES=__ANDROID_TYPES_FULL__ -DSELECTIVE_REGISTRATION -DSUPPORT_SELECTIVE_REGISTRATION"
+    # The Makefile checks the env var to decide which ANDROID_TYPES to build
+    export ANDROID_TYPES="-D__ANDROID_TYPES_FULL__"
+fi
+
+if [[ ! -z "${BUILD_ARCH}" ]]; then
     # arch specified so build just that
-    tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -a "${BUILD_ARCH}" -h $HOST_NSYNC_LIB -n $TARGET_NSYNC_LIB
+    TF_SCRIPT_FLAGS="${TF_SCRIPT_FLAGS} -a ${BUILD_ARCH}"
 fi
 
+# build the ios tensorflow libraries.
+echo "Building TensorFlow with flags: ${TF_SCRIPT_FLAGS} -f ${TF_CC_FLAGS}"
+tensorflow/contrib/makefile/compile_ios_tensorflow.sh ${TF_SCRIPT_FLAGS} -f "${TF_CC_FLAGS}"
+
 # Creates a static universal library in
 # tensorflow/contrib/makefile/gen/lib/libtensorflow-core.a
diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh
index 675ab2428930469ff1ce89415f4cb560b8ce65f0..4ae18b2cef28335a90bbc967529c0cf76b0a5da2 100755
--- a/tensorflow/contrib/makefile/download_dependencies.sh
+++ b/tensorflow/contrib/makefile/download_dependencies.sh
@@ -33,8 +33,8 @@ NSYNC_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/nsync/.*tar\.
 PROTOBUF_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/protobuf/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)"
 RE2_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/re2/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)"
 FFT2D_URL="$(grep -o 'http.*fft\.tgz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)"
-DOUBLE_CONVERSION_URL="$(grep -o "https.*google/double-conversion.*\.tar.gz" "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)"
 ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)"
+CUB_URL="$(grep -o 'https.*cub/archive.*zip' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
 
 # TODO(petewarden): Some new code in Eigen triggers a clang bug with iOS arm64,
 #                   so work around it by patching the source.
@@ -63,12 +63,17 @@ download_and_extract() {
   elif [[ "${url}" == *zip ]]; then
     tempdir=$(mktemp -d)
     tempdir2=$(mktemp -d)
-    wget -P ${tempdir} ${url}
-    unzip ${tempdir}/* -d ${tempdir2}
+    if [[ "$OSTYPE" == "darwin"* ]]; then
+      # macOS (AKA darwin) doesn't have wget.
+      (cd "${tempdir}"; curl --remote-name --silent --location "${url}")
+    else
+      wget -P "${tempdir}" "${url}"
+    fi
+    unzip "${tempdir}"/* -d "${tempdir2}"
     # unzip has no strip components, so unzip to a temp dir, and move the files
     # we want from the tempdir to destination.
-    cp -R ${tempdir2}/*/* ${dir}/
-    rm -rf ${tempdir2} ${tempdir}
+    cp -R "${tempdir2}"/*/* "${dir}"/
+    rm -rf "${tempdir2}" "${tempdir}"
   fi
 
   # Delete any potential BUILD files, which would interfere with Bazel builds.
@@ -82,8 +87,8 @@ download_and_extract "${NSYNC_URL}" "${DOWNLOADS_DIR}/nsync"
 download_and_extract "${PROTOBUF_URL}" "${DOWNLOADS_DIR}/protobuf"
 download_and_extract "${RE2_URL}" "${DOWNLOADS_DIR}/re2"
 download_and_extract "${FFT2D_URL}" "${DOWNLOADS_DIR}/fft2d"
-download_and_extract "${DOUBLE_CONVERSION_URL}" "${DOWNLOADS_DIR}/double_conversion"
 download_and_extract "${ABSL_URL}" "${DOWNLOADS_DIR}/absl"
+download_and_extract "${CUB_URL}" "${DOWNLOADS_DIR}/cub/external/cub_archive"
 
 replace_by_sed 's#static uint32x4_t p4ui_CONJ_XOR = vld1q_u32( conj_XOR_DATA );#static uint32x4_t p4ui_CONJ_XOR; // = vld1q_u32( conj_XOR_DATA ); - Removed by script#' \
   "${DOWNLOADS_DIR}/eigen/Eigen/src/Core/arch/NEON/Complex.h"
diff --git a/tensorflow/contrib/makefile/sub_makefiles/android/Makefile.in b/tensorflow/contrib/makefile/sub_makefiles/android/Makefile.in
index 26c1ad4947363e98d9bb8e400f40290fb87b2e4e..d9277ed60cb456208572ca1ad8df530648faef82 100644
--- a/tensorflow/contrib/makefile/sub_makefiles/android/Makefile.in
+++ b/tensorflow/contrib/makefile/sub_makefiles/android/Makefile.in
@@ -48,10 +48,10 @@ INFERENCE_OBJS := $(addprefix $(OBJDIR), $(INFERENCE_SRCS:.cc=.o))
 INFERENCE_SO_NAME := libtensorflow_inference.so
 INFERENCE_SO_PATH := $(LIBDIR)$(INFERENCE_SO_NAME)
 
-$(INFERENCE_SO_PATH): $(LIB_OBJS) $(INFERENCE_OBJS)
+$(INFERENCE_SO_PATH): $(LIB_OBJS) $(INFERENCE_OBJS) $(CUDA_LIB_DEPS)
 	@mkdir -p $(dir $@)
 	$(CXX) $(CXXFLAGS) $(INCLUDES) \
-	-o $@ $(INFERENCE_OBJS) $(LIB_OBJS) \
+	-o $@ $(INFERENCE_OBJS) $(LIB_OBJS) $(TEGRA_LIBS) \
 	$(LIBFLAGS) $(LDFLAGS) \
 	-shared -Wl,-soname,$(INFERENCE_SO_NAME) \
 	$(LIBS)
diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt
index ff612f1fdf8c526322c4dfb997f32f78e2ae5609..5f275663986f9d480659880ab601eeb5c41037be 100644
--- a/tensorflow/contrib/makefile/tf_op_files.txt
+++ b/tensorflow/contrib/makefile/tf_op_files.txt
@@ -68,6 +68,8 @@ tensorflow/core/kernels/scatter_nd_op_cpu_impl_2.cc
 tensorflow/core/kernels/scatter_nd_op_cpu_impl_3.cc
 tensorflow/core/kernels/scatter_nd_op_cpu_impl_4.cc
 tensorflow/core/kernels/scatter_nd_op_cpu_impl_5.cc
+tensorflow/core/kernels/scatter_nd_op_cpu_impl_6.cc
+tensorflow/core/kernels/scatter_nd_op_cpu_impl_7.cc
 tensorflow/core/kernels/scatter_nd_op.cc
 tensorflow/core/kernels/save_restore_tensor.cc
 tensorflow/core/kernels/save_restore_v2_ops.cc
@@ -132,6 +134,8 @@ tensorflow/core/kernels/gather_nd_op_cpu_impl_2.cc
 tensorflow/core/kernels/gather_nd_op_cpu_impl_3.cc
 tensorflow/core/kernels/gather_nd_op_cpu_impl_4.cc
 tensorflow/core/kernels/gather_nd_op_cpu_impl_5.cc
+tensorflow/core/kernels/gather_nd_op_cpu_impl_6.cc
+tensorflow/core/kernels/gather_nd_op_cpu_impl_7.cc
 tensorflow/core/kernels/fused_batch_norm_op.cc
 tensorflow/core/kernels/function_ops.cc
 tensorflow/core/kernels/fill_functor.cc
@@ -144,6 +148,7 @@ tensorflow/core/kernels/dynamic_stitch_op.cc
 tensorflow/core/kernels/dynamic_partition_op.cc
 tensorflow/core/kernels/decode_bmp_op.cc
 tensorflow/core/kernels/depthtospace_op.cc
+tensorflow/core/kernels/data_format_ops.cc
 tensorflow/core/kernels/spacetodepth_op.cc
 tensorflow/core/kernels/dense_update_ops.cc
 tensorflow/core/kernels/deep_conv2d.cc
diff --git a/tensorflow/contrib/memory_stats/__init__.py b/tensorflow/contrib/memory_stats/__init__.py
index a32302c854b68ed1b211a221f3026e8d5b6091ac..2ce849ca660076aa5d25db4f16b8d24051e315ae 100644
--- a/tensorflow/contrib/memory_stats/__init__.py
+++ b/tensorflow/contrib/memory_stats/__init__.py
@@ -19,6 +19,10 @@
 @@MaxBytesInUse
 """
 
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
 from tensorflow.contrib.memory_stats.python.ops.memory_stats_ops import BytesInUse
 from tensorflow.contrib.memory_stats.python.ops.memory_stats_ops import BytesLimit
 from tensorflow.contrib.memory_stats.python.ops.memory_stats_ops import MaxBytesInUse
diff --git a/tensorflow/contrib/memory_stats/kernels/memory_stats_ops.cc b/tensorflow/contrib/memory_stats/kernels/memory_stats_ops.cc
index 7e2e96e160167ae68d3bdabacbbbeb45df61778f..39c0d5af45b4a81fa4dde0b5deac14a3af372cbb 100644
--- a/tensorflow/contrib/memory_stats/kernels/memory_stats_ops.cc
+++ b/tensorflow/contrib/memory_stats/kernels/memory_stats_ops.cc
@@ -59,7 +59,7 @@ REGISTER_KERNEL_BUILDER(Name("BytesInUse").Device(DEVICE_GPU).HostMemory("out"),
 
 #ifdef TENSORFLOW_USE_SYCL
 REGISTER_KERNEL_BUILDER(
-    Name("BytesInUse").Device(DEVICE_SYCL).HostMemory("out"), MaxBytesInUseOp);
+    Name("BytesInUse").Device(DEVICE_SYCL).HostMemory("out"), BytesInUseOp);
 #endif  // TENSORFLOW_USE_SYCL
 
 // Op that measures the total memory (in bytes) of a device.
diff --git a/tensorflow/contrib/memory_stats/python/kernel_tests/memory_stats_ops_test.py b/tensorflow/contrib/memory_stats/python/kernel_tests/memory_stats_ops_test.py
index d1b430b8039fcf7e10bcb842c3f34b960b9026b3..02c2ac06fb7dc0c930deaaa4c21a6971d96f19a1 100644
--- a/tensorflow/contrib/memory_stats/python/kernel_tests/memory_stats_ops_test.py
+++ b/tensorflow/contrib/memory_stats/python/kernel_tests/memory_stats_ops_test.py
@@ -77,8 +77,9 @@ class MemoryStatsOpsTest(test_util.TensorFlowTestCase):
         bytes_in_use_op = memory_stats_ops.BytesInUse()
       with ops.control_dependencies([bytes_in_use_op]):
         b = random_ops.random_uniform(matrix_shape, dtype=dtype)
+        c = math_ops.matmul(a, b)
 
-      _, bytes_in_use, max_bytes_in_use = sess.run([a, bytes_in_use_op,
+      _, bytes_in_use, max_bytes_in_use = sess.run([c, bytes_in_use_op,
                                                     max_bytes_in_use_op])
 
       # intermediate result allocates 1 matrix, max usage is at least 2
diff --git a/tensorflow/contrib/metrics/__init__.py b/tensorflow/contrib/metrics/__init__.py
index 27dad5379a2e56b91960a1f2274610e4f2568dbc..d3dce46bfb6e9c77cc7ae107b323a9bc7074c47e 100644
--- a/tensorflow/contrib/metrics/__init__.py
+++ b/tensorflow/contrib/metrics/__init__.py
@@ -66,6 +66,7 @@ See the @{$python/contrib.metrics} guide.
 @@set_intersection
 @@set_size
 @@set_union
+@@cohen_kappa
 @@count
 @@precision_recall_at_equal_thresholds
 @@recall_at_precision
@@ -82,6 +83,7 @@ from tensorflow.contrib.metrics.python.ops.confusion_matrix_ops import confusion
 from tensorflow.contrib.metrics.python.ops.histogram_ops import auc_using_histogram
 from tensorflow.contrib.metrics.python.ops.metric_ops import aggregate_metric_map
 from tensorflow.contrib.metrics.python.ops.metric_ops import aggregate_metrics
+from tensorflow.contrib.metrics.python.ops.metric_ops import cohen_kappa
 from tensorflow.contrib.metrics.python.ops.metric_ops import count
 from tensorflow.contrib.metrics.python.ops.metric_ops import precision_recall_at_equal_thresholds
 from tensorflow.contrib.metrics.python.ops.metric_ops import recall_at_precision
diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py
index 6b08b749f86bc098ac511d142770362952b491d8..c3de1c4c62f04c7ef3d85f36662805c0c0ec4b4c 100644
--- a/tensorflow/contrib/metrics/python/ops/metric_ops.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py
@@ -24,10 +24,12 @@ from __future__ import print_function
 
 import collections as collections_lib
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import confusion_matrix
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import metrics
@@ -2268,7 +2270,7 @@ def recall_at_precision(labels,
     thresholds = [0.0 - _EPSILON] + thresholds + [1.0 + _EPSILON]
 
     values, update_ops = _streaming_confusion_matrix_at_thresholds(
-        labels, predictions, thresholds, weights)
+        predictions, labels, thresholds, weights)
 
     recall = _compute_recall_at_precision(values['tp'], values['fp'],
                                           values['fn'], precision, 'value')
@@ -3297,9 +3299,131 @@ def count(values,
     return count_, update_op
 
 
+def cohen_kappa(labels, predictions_idx, num_classes, weights=None,
+                metrics_collections=None, updates_collections=None, name=None):
+  """Calculates Cohen's kappa.
+
+  [Cohen's kappa](https://en.wikipedia.org/wiki/Cohen's_kappa) is a statistic
+  that measures inter-annotator agreement.
+
+  The `cohen_kappa` function calculates the confusion matrix, and creates three
+  local variables to compute the Cohen's kappa: `po`, `pe_row`, and `pe_col`,
+  which refer to the diagonal part, rows and columns totals of the confusion
+  matrix, respectively. This value is ultimately returned as `kappa`, an
+  idempotent operation that is calculated by
+
+      pe = (pe_row * pe_col) / N
+      k = (sum(po) - sum(pe)) / (N - sum(pe))
+
+  For estimation of the metric over a stream of data, the function creates an
+  `update_op` operation that updates these variables and returns the
+  `kappa`. `update_op` weights each prediction by the corresponding value in
+  `weights`.
+
+  Class labels are expected to start at 0. E.g., if `num_classes`
+  was three, then the possible labels would be [0, 1, 2].
+
+  If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
+
+  NOTE: Equivalent to `sklearn.metrics.cohen_kappa_score`, but the method
+  doesn't support weighted matrix yet.
+
+  Args:
+    labels: 1-D `Tensor` of real labels for the classification task. Must be
+      one of the following types: int16, int32, int64.
+    predictions_idx: 1-D `Tensor` of predicted class indices for a given
+      classification. Must have the same type as `labels`.
+    num_classes: The possible number of labels.
+    weights: Optional `Tensor` whose shape matches `predictions`.
+    metrics_collections: An optional list of collections that `kappa` should
+      be added to.
+    updates_collections: An optional list of collections that `update_op` should
+      be added to.
+    name: An optional variable_scope name.
+
+  Returns:
+    kappa: Scalar float `Tensor` representing the current Cohen's kappa.
+    update_op: `Operation` that increments `po`, `pe_row` and `pe_col`
+      variables appropriately and whose value matches `kappa`.
+
+  Raises:
+    ValueError: If `num_classes` is less than 2, or `predictions` and `labels`
+      have mismatched shapes, or if `weights` is not `None` and its shape
+      doesn't match `predictions`, or if either `metrics_collections` or
+      `updates_collections` are not a list or tuple.
+    RuntimeError: If eager execution is enabled.
+  """
+  if context.in_eager_mode():
+    raise RuntimeError('tf.contrib.metrics.cohen_kappa is not supported'
+                       'when eager execution is enabled.')
+  if num_classes < 2:
+    raise ValueError('`num_classes` must be >= 2.'
+                     'Found: {}'.format(num_classes))
+  with variable_scope.variable_scope(name, 'cohen_kappa',
+                                     (labels, predictions_idx, weights)):
+    # Convert 2-dim (num, 1) to 1-dim (num,)
+    labels.get_shape().with_rank_at_most(2)
+    if labels.get_shape().ndims == 2:
+      labels = array_ops.squeeze(labels, axis=[-1])
+    predictions_idx, labels, weights = (
+        metrics_impl._remove_squeezable_dimensions(  # pylint: disable=protected-access
+            predictions=predictions_idx, labels=labels, weights=weights))
+    predictions_idx.get_shape().assert_is_compatible_with(labels.get_shape())
+
+    stat_dtype = (dtypes.int64
+                  if weights is None or weights.dtype.is_integer
+                  else dtypes.float32)
+    po = metrics_impl.metric_variable(
+        (num_classes,), stat_dtype, name='po')
+    pe_row = metrics_impl.metric_variable(
+        (num_classes,), stat_dtype, name='pe_row')
+    pe_col = metrics_impl.metric_variable(
+        (num_classes,), stat_dtype, name='pe_col')
+
+    # Table of the counts of agreement:
+    counts_in_table = confusion_matrix.confusion_matrix(
+      labels, predictions_idx,
+      num_classes=num_classes, weights=weights,
+      dtype=stat_dtype, name="counts_in_table")
+
+    po_t = array_ops.diag_part(counts_in_table)
+    pe_row_t = math_ops.reduce_sum(counts_in_table, axis=0)
+    pe_col_t = math_ops.reduce_sum(counts_in_table, axis=1)
+    update_po = state_ops.assign_add(po, po_t)
+    update_pe_row = state_ops.assign_add(pe_row, pe_row_t)
+    update_pe_col = state_ops.assign_add(pe_col, pe_col_t)
+
+    def _calculate_k(po, pe_row, pe_col, name):
+      po_sum = math_ops.reduce_sum(po)
+      total = math_ops.reduce_sum(pe_row)
+      pe_sum = math_ops.reduce_sum(
+          metrics_impl._safe_div(  # pylint: disable=protected-access
+              pe_row * pe_col, total, None))
+      po_sum, pe_sum, total = (math_ops.to_double(po_sum),
+                               math_ops.to_double(pe_sum),
+                               math_ops.to_double(total))
+      # kappa = (po - pe) / (N - pe)
+      k = metrics_impl._safe_scalar_div(  # pylint: disable=protected-access
+          po_sum - pe_sum, total - pe_sum, name=name)
+      return k
+
+    kappa = _calculate_k(po, pe_row, pe_col, name='value')
+    update_op = _calculate_k(update_po, update_pe_row, update_pe_col,
+                             name='update_op')
+
+    if metrics_collections:
+      ops.add_to_collections(metrics_collections, kappa)
+
+    if updates_collections:
+      ops.add_to_collections(updates_collections, update_op)
+
+    return kappa, update_op
+
+
 __all__ = [
     'aggregate_metric_map',
     'aggregate_metrics',
+    'cohen_kappa',
     'count',
     'precision_recall_at_equal_thresholds',
     'recall_at_precision',
diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
index 7db06609de4e73fe5c18f81cef225829e9f54123..89aa29f711e3b0114a5d776b258f77214cb349bc 100644
--- a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
@@ -3162,7 +3162,7 @@ class RecallAtPrecisionTest(test.TestCase):
     labels = random_ops.random_uniform(
         (10, 3), maxval=2, dtype=dtypes_lib.int64, seed=2)
     recall, update_op = metrics.recall_at_precision(
-        predictions, labels, precision=0.7)
+        labels, predictions, precision=0.7)
 
     with self.test_session() as sess:
       sess.run(variables.local_variables_initializer())
@@ -3182,7 +3182,7 @@ class RecallAtPrecisionTest(test.TestCase):
     predictions = constant_op.constant(inputs, dtype=dtypes_lib.float32)
     labels = constant_op.constant(inputs)
     recall, update_op = metrics.recall_at_precision(
-        predictions, labels, precision=1.0)
+        labels, predictions, precision=1.0)
 
     with self.test_session() as sess:
       sess.run(variables.local_variables_initializer())
@@ -3197,7 +3197,7 @@ class RecallAtPrecisionTest(test.TestCase):
         predictions_values, dtype=dtypes_lib.float32)
     labels = constant_op.constant(labels_values)
     recall, update_op = metrics.recall_at_precision(
-        predictions, labels, precision=0.8)
+        labels, predictions, precision=0.8)
 
     with self.test_session() as sess:
       sess.run(variables.local_variables_initializer())
@@ -3212,7 +3212,7 @@ class RecallAtPrecisionTest(test.TestCase):
         predictions_values, dtype=dtypes_lib.float32)
     labels = constant_op.constant(labels_values)
     recall, update_op = metrics.recall_at_precision(
-        predictions, labels, precision=0.4)
+        labels, predictions, precision=0.4)
 
     with self.test_session() as sess:
       sess.run(variables.local_variables_initializer())
@@ -3230,7 +3230,7 @@ class RecallAtPrecisionTest(test.TestCase):
     labels = constant_op.constant(labels_values)
     weights = constant_op.constant(weights_values)
     recall, update_op = metrics.recall_at_precision(
-        predictions, labels, weights=weights, precision=0.4)
+        labels, predictions, weights=weights, precision=0.4)
 
     with self.test_session() as sess:
       sess.run(variables.local_variables_initializer())
@@ -6660,5 +6660,213 @@ class CountTest(test.TestCase):
       self.assertAlmostEqual(4.1, result.eval(), 5)
 
 
+class CohenKappaTest(test.TestCase):
+
+  def _confusion_matrix_to_samples(self, confusion_matrix):
+    x, y = confusion_matrix.shape
+    pairs = []
+    for label in range(x):
+      for feature in range(y):
+        pairs += [label, feature] * confusion_matrix[label, feature]
+    pairs = np.array(pairs).reshape((-1, 2))
+    return pairs[:, 0], pairs[:, 1]
+
+  def setUp(self):
+    np.random.seed(1)
+    ops.reset_default_graph()
+
+  def testVars(self):
+    metrics.cohen_kappa(
+        predictions_idx=array_ops.ones((10, 1)),
+        labels=array_ops.ones((10, 1)),
+        num_classes=2)
+    _assert_metric_variables(self, (
+        'cohen_kappa/po:0',
+        'cohen_kappa/pe_row:0',
+        'cohen_kappa/pe_col:0',))
+
+  def testMetricsCollection(self):
+    my_collection_name = '__metrics__'
+    kappa, _ = metrics.cohen_kappa(
+        predictions_idx=array_ops.ones((10, 1)),
+        labels=array_ops.ones((10, 1)),
+        num_classes=2,
+        metrics_collections=[my_collection_name])
+    self.assertListEqual(ops.get_collection(my_collection_name), [kappa])
+
+  def testUpdatesCollection(self):
+    my_collection_name = '__updates__'
+    _, update_op = metrics.cohen_kappa(
+        predictions_idx=array_ops.ones((10, 1)),
+        labels=array_ops.ones((10, 1)),
+        num_classes=2,
+        updates_collections=[my_collection_name])
+    self.assertListEqual(ops.get_collection(my_collection_name), [update_op])
+
+  def testValueTensorIsIdempotent(self):
+    predictions = random_ops.random_uniform(
+      (10, 1), maxval=3, dtype=dtypes_lib.int64, seed=1)
+    labels = random_ops.random_uniform(
+      (10, 1), maxval=3, dtype=dtypes_lib.int64, seed=2)
+    kappa, update_op = metrics.cohen_kappa(labels, predictions, 3)
+
+    with self.test_session() as sess:
+      sess.run(variables.local_variables_initializer())
+
+      # Run several updates.
+      for _ in range(10):
+        sess.run(update_op)
+
+      # Then verify idempotency.
+      initial_kappa = kappa.eval()
+      for _ in range(10):
+        self.assertAlmostEqual(initial_kappa, kappa.eval(), 5)
+
+  def testBasic(self):
+    confusion_matrix = np.array([
+      [9, 3, 1],
+      [4, 8, 2],
+      [2, 1, 6]])
+    # overall total = 36
+    # po = [9, 8, 6], sum(po) = 23
+    # pe_row = [15, 12, 9], pe_col = [13, 14, 9], so pe = [5.42, 4.67, 2.25]
+    # finally, kappa = (sum(po) - sum(pe)) / (N - sum(pe))
+    #                = (23 - 12.34) / (36 - 12.34)
+    #                = 0.45
+    # see: http://psych.unl.edu/psycrs/handcomp/hckappa.PDF
+    expect = 0.45
+    labels, predictions = self._confusion_matrix_to_samples(confusion_matrix)
+
+    dtypes = [dtypes_lib.int16, dtypes_lib.int32, dtypes_lib.int64]
+    shapes = [(len(labels,)),  # 1-dim
+              (len(labels), 1)]  # 2-dim
+    weights = [None, np.ones_like(labels)]
+
+    for dtype in dtypes:
+      for shape in shapes:
+        for weight in weights:
+          with self.test_session() as sess:
+            predictions_tensor = constant_op.constant(
+                np.reshape(predictions, shape), dtype=dtype)
+            labels_tensor = constant_op.constant(
+                np.reshape(labels, shape), dtype=dtype)
+            kappa, update_op = metrics.cohen_kappa(
+                labels_tensor, predictions_tensor, 3, weights=weight)
+
+            sess.run(variables.local_variables_initializer())
+            self.assertAlmostEqual(expect, sess.run(update_op), 2)
+            self.assertAlmostEqual(expect, kappa.eval(), 2)
+
+  def testAllCorrect(self):
+    inputs = np.arange(0, 100) % 4
+    # confusion matrix
+    # [[25, 0, 0],
+    #  [0, 25, 0],
+    #  [0, 0, 25]]
+    # Calculated by v0.19: sklearn.metrics.cohen_kappa_score(inputs, inputs)
+    expect = 1.0
+
+    with self.test_session() as sess:
+      predictions = constant_op.constant(inputs, dtype=dtypes_lib.float32)
+      labels = constant_op.constant(inputs)
+      kappa, update_op = metrics.cohen_kappa(labels, predictions, 4)
+
+      sess.run(variables.local_variables_initializer())
+      self.assertAlmostEqual(expect, sess.run(update_op), 5)
+      self.assertAlmostEqual(expect, kappa.eval(), 5)
+
+  def testAllIncorrect(self):
+    labels = np.arange(0, 100) % 4
+    predictions = (labels + 1) % 4
+    # confusion matrix
+    # [[0, 25, 0],
+    #  [0, 0, 25],
+    #  [25, 0, 0]]
+    # Calculated by v0.19: sklearn.metrics.cohen_kappa_score(labels, predictions)
+    expect = -0.333333333333
+
+    with self.test_session() as sess:
+      predictions = constant_op.constant(predictions, dtype=dtypes_lib.float32)
+      labels = constant_op.constant(labels)
+      kappa, update_op = metrics.cohen_kappa(labels, predictions, 4)
+
+      sess.run(variables.local_variables_initializer())
+      self.assertAlmostEqual(expect, sess.run(update_op), 5)
+      self.assertAlmostEqual(expect, kappa.eval(), 5)
+
+  def testWeighted(self):
+    confusion_matrix = np.array([
+      [9, 3, 1],
+      [4, 8, 2],
+      [2, 1, 6]])
+    labels, predictions = self._confusion_matrix_to_samples(confusion_matrix)
+    num_samples = np.sum(confusion_matrix, dtype=np.int32)
+    weights = (np.arange(0, num_samples) % 5) / 5.0
+    # Calculated by v0.19: sklearn.metrics.cohen_kappa_score(
+    #                          labels, predictions, sample_weight=weights)
+    expect = 0.453466583385
+
+    with self.test_session() as sess:
+      predictions = constant_op.constant(predictions, dtype=dtypes_lib.float32)
+      labels = constant_op.constant(labels)
+      kappa, update_op = metrics.cohen_kappa(labels, predictions, 4,
+                                             weights=weights)
+
+      sess.run(variables.local_variables_initializer())
+      self.assertAlmostEqual(expect, sess.run(update_op), 5)
+      self.assertAlmostEqual(expect, kappa.eval(), 5)
+
+  def testWithMultipleUpdates(self):
+    confusion_matrix = np.array([
+      [90, 30, 10, 20],
+      [40, 80, 20, 30],
+      [20, 10, 60, 35],
+      [15, 25, 30, 25]])
+    labels, predictions = self._confusion_matrix_to_samples(confusion_matrix)
+    num_samples = np.sum(confusion_matrix, dtype=np.int32)
+    weights = (np.arange(0, num_samples) % 5) / 5.0
+    num_classes = confusion_matrix.shape[0]
+
+    batch_size = num_samples // 10
+    predictions_t = array_ops.placeholder(dtypes_lib.float32,
+                                          shape=(batch_size,))
+    labels_t = array_ops.placeholder(dtypes_lib.int32,
+                                     shape=(batch_size,))
+    weights_t = array_ops.placeholder(dtypes_lib.float32,
+                                      shape=(batch_size,))
+    kappa, update_op = metrics.cohen_kappa(
+        labels_t, predictions_t, num_classes, weights=weights_t)
+    with self.test_session() as sess:
+      sess.run(variables.local_variables_initializer())
+
+      for idx in range(0, num_samples, batch_size):
+        batch_start, batch_end = idx, idx + batch_size
+        sess.run(update_op,
+                 feed_dict={labels_t: labels[batch_start:batch_end],
+                            predictions_t: predictions[batch_start:batch_end],
+                            weights_t: weights[batch_start:batch_end]})
+      # Calculated by v0.19: sklearn.metrics.cohen_kappa_score(
+      #                          labels_np, predictions_np, sample_weight=weights_np)
+      expect = 0.289965397924
+      self.assertAlmostEqual(expect, kappa.eval(), 5)
+
+  def testInvalidNumClasses(self):
+    predictions = array_ops.placeholder(dtypes_lib.float32, shape=(4, 1))
+    labels = array_ops.placeholder(dtypes_lib.int32, shape=(4, 1))
+    with self.assertRaisesRegexp(ValueError, 'num_classes'):
+      metrics.cohen_kappa(labels, predictions, 1)
+
+  def testInvalidDimension(self):
+    predictions = array_ops.placeholder(dtypes_lib.float32, shape=(4, 1))
+    invalid_labels = array_ops.placeholder(dtypes_lib.int32, shape=(4, 2))
+    with self.assertRaises(ValueError):
+      metrics.cohen_kappa(invalid_labels, predictions, 3)
+
+    invalid_predictions = array_ops.placeholder(dtypes_lib.float32, shape=(4, 2))
+    labels = array_ops.placeholder(dtypes_lib.int32, shape=(4, 1))
+    with self.assertRaises(ValueError):
+      metrics.cohen_kappa(labels, invalid_predictions, 3)
+
+
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/contrib/model_pruning/README.md b/tensorflow/contrib/model_pruning/README.md
index 764e126e0d64d5e6c6caf0a9f0d43a87995447eb..d286750c257e9a78a82c95c1fc872b3ca6972203 100644
--- a/tensorflow/contrib/model_pruning/README.md
+++ b/tensorflow/contrib/model_pruning/README.md
@@ -42,10 +42,13 @@ The pruning library allows for specification of the following hyper parameters:
 | name | string | model_pruning | Name of the pruning specification. Used for adding summaries and ops under a common tensorflow name_scope |
 | begin_pruning_step | integer | 0 | The global step at which to begin pruning |
 | end_pruning_step   | integer | -1 | The global step at which to terminate pruning. Defaults to -1 implying that pruning continues till  the training stops |
-| do_not_prune | list of strings | [""] | list of layers strings that are not pruned |
+| do_not_prune | list of strings | [""] | list of layers names that are not pruned |
 | threshold_decay | float | 0.9 | The decay factor to use for exponential decay of the thresholds |
 | pruning_frequency | integer | 10 | How often should the masks be updated? (in # of global_steps) |
 | nbins | integer | 255 | Number of bins to use for histogram computation |
+| block_height|integer | 1 | Number of rows in a block for block sparse matrices|
+| block_width |integer | 1 | Number of cols in a block for block sparse matrices|
+| block_pooling_function| string | AVG | The function to use to pool weight values in a block: average (AVG) or max (MAX)|
 | initial_sparsity | float | 0.0 | Initial sparsity value |
 | target_sparsity | float | 0.5 | Target sparsity value |
 | sparsity_function_begin_step | integer | 0 | The global step at this which the gradual sparsity function begins to take effect |
@@ -128,3 +131,12 @@ Eval:
 ```shell
 $ bazel-bin/$examples_dir/cifar10/cifar10_eval --run_once
 ```
+
+### Block Sparsity
+
+For some hardware architectures, it may be beneficial to induce spatially correlated sparsity. To train models in which the weight tensors have block sparse structure, set *block_height* and *block_width* hyperparameters to the desired block configuration (2x2, 4x4, 4x1, 1x8, etc). Currently, block sparsity is supported for weight tensors with rank 2 only. The matrix is partitioned into non-overlapping blocks of size *[block_height, block_dim]* and the either the average or max absolute value in this block is taken as a proxy for the entire block (set by *block_pooling_function* hyperparameter).
+The convolution layer tensors are always pruned used block dimensions of [1,1].
+
+## References
+
+Michael Zhu and Suyog Gupta, “To prune, or not to prune: exploring the efficacy of pruning for model compression”, *2017 NIPS Workshop on Machine Learning of Phones and other Consumer Devices* (https://arxiv.org/pdf/1710.01878.pdf)
diff --git a/tensorflow/contrib/model_pruning/python/layers/core_layers.py b/tensorflow/contrib/model_pruning/python/layers/core_layers.py
index 95dfd8f4213a8729f5954eb0626f28ecc9265bbb..764ab620bc2227ff5e8e3f473d689e0e133e83d4 100644
--- a/tensorflow/contrib/model_pruning/python/layers/core_layers.py
+++ b/tensorflow/contrib/model_pruning/python/layers/core_layers.py
@@ -210,7 +210,7 @@ class _MaskedConv(base.Layer):
       return self.activation(outputs)
     return outputs
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     if self.data_format == 'channels_last':
       space = input_shape[1:-1]
@@ -467,7 +467,7 @@ class MaskedFullyConnected(base.Layer):
       return self.activation(outputs)  # pylint: disable=not-callable
     return outputs
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape)
     input_shape = input_shape.with_rank_at_least(2)
     if input_shape[-1].value is None:
diff --git a/tensorflow/contrib/model_pruning/python/pruning.py b/tensorflow/contrib/model_pruning/python/pruning.py
index 42d91a71fde41d8681d7a0c439d6c49325730418..d16af9da19816211ee22f6ea48a347f0b9a4e612 100644
--- a/tensorflow/contrib/model_pruning/python/pruning.py
+++ b/tensorflow/contrib/model_pruning/python/pruning.py
@@ -72,8 +72,10 @@ from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_impl
+from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.summary import summary
 from tensorflow.python.training import training_util
@@ -129,6 +131,23 @@ def _weight_threshold_variable(var, scope):
     return threshold
 
 
+def _kronecker_product(mat1, mat2):
+  """Computes the Kronecker product of two matrices mat1 and mat2.
+
+  Args:
+    mat1: A matrix of size m x n
+    mat2: A matrix of size p x q
+  Returns:
+    Kronecker product of matrices mat1 and mat2 of size mp x nq
+  """
+
+  m1, n1 = mat1.get_shape().as_list()
+  mat1_rsh = array_ops.reshape(mat1, [m1, 1, n1, 1])
+  m2, n2 = mat2.get_shape().as_list()
+  mat2_rsh = array_ops.reshape(mat2, [1, m2, 1, n2])
+  return array_ops.reshape(mat1_rsh * mat2_rsh, [m1 * m2, n1 * n2])
+
+
 def _histogram(values, value_range, nbins=100, dtype=np.int32, name=None):
   """Return histogram of values.
 
@@ -297,6 +316,13 @@ def get_pruning_hparams():
       How often should the masks be updated? (in # of global_steps)
     nbins: integer
       number of bins to use for histogram computation
+    block_height: integer
+      number of rows in a block (defaults to 1)
+    block_width: integer
+      number of cols in a block (defaults to 1)
+    block_pooling_function: string
+      Whether to perform average (AVG) or max (MAX) pooling in the block
+      (default: AVG)
     initial_sparsity: float
       initial sparsity value
     target_sparsity: float
@@ -332,6 +358,9 @@ def get_pruning_hparams():
       threshold_decay=0.9,
       pruning_frequency=10,
       nbins=255,
+      block_height=1,
+      block_width=1,
+      block_pooling_function='AVG',
       initial_sparsity=0,
       target_sparsity=0.5,
       sparsity_function_begin_step=0,
@@ -341,11 +370,7 @@ def get_pruning_hparams():
 
 class Pruning(object):
 
-  def __init__(self,
-               spec=None,
-               global_step=None,
-               sparsity=None,
-               partitioner=None):
+  def __init__(self, spec=None, global_step=None, sparsity=None):
     """Set up the specification for model pruning.
 
     If a spec is provided, the sparsity is set up based on the sparsity_function
@@ -358,8 +383,6 @@ class Pruning(object):
       global_step: A tensorflow variable that is used while setting up the
         sparsity function
       sparsity: A tensorflow scalar variable storing the sparsity
-      partitioner: The tensorflow partitioner function used to distribute
-        parameters across shards
     """
     # Pruning specification
     self._spec = spec if spec else get_pruning_hparams()
@@ -373,9 +396,6 @@ class Pruning(object):
     # Built using self._setup_sparsity() or provided externally
     self._sparsity = sparsity if sparsity else self._setup_sparsity()
 
-    # Stores the partitioner function uses to partition variables across tasks/
-    self._partitioner = partitioner
-
     # List of tensorflow assignments ops for new masks and thresholds
     self._assign_ops = []
 
@@ -383,6 +403,12 @@ class Pruning(object):
     # were updated
     self._last_update_step = self._setup_last_update_step()
 
+    # Block dimensions
+    self._block_dim = [self._spec.block_height, self._spec.block_width]
+
+    # Block pooling function
+    self._block_pooling_function = self._spec.block_pooling_function
+
   def _setup_global_step(self, global_step):
     graph_global_step = global_step
     if graph_global_step is None:
@@ -457,9 +483,10 @@ class Pruning(object):
 
     Returns:
       new_threshold: The new value of the threshold based on weights, and
-        desired_sparsity
-      new_mask: A n-D numpy array containing 0 or 1 to indicate which of the
-        values in weights falls below the threshold
+        sparsity at the current global_step
+      new_mask: A numpy array of the same size and shape as weights containing
+        0 or 1 to indicate which of the values in weights falls below
+        the threshold
 
     Raises:
       ValueError: if sparsity is not defined
@@ -492,6 +519,63 @@ class Pruning(object):
           math_ops.greater(abs_weights, smoothed_threshold), np.float32)
     return smoothed_threshold, new_mask
 
+  def _maybe_update_block_mask(self, weights, threshold):
+    """Performs block-granular masking of the weights.
+
+    Block pruning occurs only if the block_height or block_width is > 1 and
+    if the weight tensor has ndims = 2. Otherwise, elementwise pruning occurs.
+    Args:
+      weights: The weight tensor that needs to be masked.
+      threshold: The current threshold value. The function will compute a new
+        threshold and return the exponential moving average using the current
+        value of threshold
+
+    Returns:
+      new_threshold: The new value of the threshold based on weights, and
+        sparsity at the current global_step
+      new_mask: A numpy array of the same size and shape as weights containing
+        0 or 1 to indicate which of the values in weights falls below
+        the threshold
+
+    Raises:
+      ValueError: if block pooling function is not AVG or MAX
+    """
+    if weights.get_shape().ndims != 2 or self._block_dim == [1, 1]:
+      return self._update_mask(weights, threshold)
+
+    if self._block_pooling_function not in ['AVG', 'MAX']:
+      raise ValueError('Unknown pooling function for block sparsity: %s' %
+                       self._block_pooling_function)
+
+    with ops.name_scope(weights.op.name + '_pruning_ops'):
+      abs_weights = math_ops.abs(
+          array_ops.reshape(
+              weights, [1, weights.get_shape()[0],
+                        weights.get_shape()[1], 1]))
+      pool_window = [self._block_dim[0], self._block_dim[1]]
+      pooled_weights = nn_ops.pool(
+          abs_weights,
+          window_shape=pool_window,
+          pooling_type=self._block_pooling_function,
+          strides=pool_window,
+          padding='SAME',
+          name=weights.op.name + '_pooled')
+
+      smoothed_threshold, new_mask = self._update_mask(pooled_weights,
+                                                       threshold)
+
+      reshaped_mask = array_ops.reshape(
+          new_mask,
+          [pooled_weights.get_shape()[1],
+           pooled_weights.get_shape()[2]])
+      updated_mask = _kronecker_product(reshaped_mask,
+                                        array_ops.ones(self._block_dim))
+      sliced_mask = array_ops.slice(
+          updated_mask, [0, 0],
+          [weights.get_shape()[0],
+           weights.get_shape()[1]])
+    return smoothed_threshold, sliced_mask
+
   def _get_mask_assign_ops(self):
     # Make sure the assignment ops have not already been added to the list
     if self._assign_ops:
@@ -509,18 +593,21 @@ class Pruning(object):
 
     for index, mask in enumerate(masks):
       threshold = thresholds[index]
-      weight = weights[index] if self._partitioner is None else weights[
-          index].as_tensor()
+      weight = weights[index]
+      is_partitioned = isinstance(weight, variables.PartitionedVariable)
+      if is_partitioned:
+        weight = weight.as_tensor()
 
       if self._spec.do_not_prune:
         if self._exists_in_do_not_prune_list(mask.name):
           continue
 
-      new_threshold, new_mask = self._update_mask(weight, threshold)
+      new_threshold, new_mask = self._maybe_update_block_mask(weight, threshold)
       self._assign_ops.append(_variable_assign(threshold, new_threshold))
+
       self._assign_ops.append(
-          _variable_assign(mask, new_mask) if self._partitioner is None else
-          _partitioned_variable_assign(mask, new_mask))
+          _partitioned_variable_assign(mask, new_mask)
+          if is_partitioned else _variable_assign(mask, new_mask))
 
   def mask_update_op(self):
     with ops.name_scope(self._spec.name):
diff --git a/tensorflow/contrib/model_pruning/python/pruning_test.py b/tensorflow/contrib/model_pruning/python/pruning_test.py
index c23fd649ce1fc72a2e8d516bfa3750b7ced1b111..1767b4bb94a9bb56bc6a4933423ad27d8cf3ed35 100644
--- a/tensorflow/contrib/model_pruning/python/pruning_test.py
+++ b/tensorflow/contrib/model_pruning/python/pruning_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.contrib.model_pruning.python import pruning
+from tensorflow.python.framework import constant_op
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.ops import random_ops
@@ -111,6 +112,39 @@ class PruningTest(test.TestCase):
       masked_weights_val = masked_weights.eval()
       self.assertAllEqual(np.count_nonzero(masked_weights_val), 51)
 
+  def _blockMasking(self, hparams, weights, expected_mask):
+
+    threshold = variables.Variable(0.0, name="threshold")
+    sparsity = variables.Variable(0.51, name="sparsity")
+    test_spec = ",".join(hparams)
+    pruning_hparams = pruning.get_pruning_hparams().parse(test_spec)
+
+    # Set up pruning
+    p = pruning.Pruning(pruning_hparams, sparsity=sparsity)
+    with self.test_session():
+      variables.global_variables_initializer().run()
+      _, new_mask = p._maybe_update_block_mask(weights, threshold)
+      # Check if the mask is the same size as the weights
+      self.assertAllEqual(new_mask.get_shape(), weights.get_shape())
+      mask_val = new_mask.eval()
+      self.assertAllEqual(mask_val, expected_mask)
+
+  def testBlockMasking(self):
+    param_list = ["block_height=2", "block_width=2", "threshold_decay=0"]
+
+    weights_avg = constant_op.constant(
+        [[0.1, 0.1, 0.2, 0.2], [0.1, 0.1, 0.2, 0.2], [0.3, 0.3, 0.4, 0.4],
+         [0.3, 0.3, 0.4, 0.4]])
+    weights_max = constant_op.constant(
+        [[0.1, 0.0, 0.2, 0.0], [0.0, -0.1, 0.0, -0.2], [0.3, 0.0, 0.4, 0.0],
+         [0.0, -0.3, 0.0, -0.4]])
+    expected_mask = [[0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 1, 1], [1, 1, 1, 1]]
+
+    self._blockMasking(param_list + ["block_pooling_function=MAX"], weights_max,
+                       expected_mask)
+    self._blockMasking(param_list + ["block_pooling_function=AVG"],
+                       weights_avg, expected_mask)
+
   def testPartitionedVariableMasking(self):
     partitioner = partitioned_variables.variable_axis_size_partitioner(40)
     with self.test_session() as session:
@@ -120,7 +154,7 @@ class PruningTest(test.TestCase):
             "weights", initializer=math_ops.linspace(1.0, 100.0, 100))
         masked_weights = pruning.apply_mask(
             weights, scope=variable_scope.get_variable_scope())
-      p = pruning.Pruning(sparsity=sparsity, partitioner=partitioner)
+      p = pruning.Pruning(sparsity=sparsity)
       p._spec.threshold_decay = 0.0
       mask_update_op = p.mask_update_op()
       variables.global_variables_initializer().run()
diff --git a/tensorflow/contrib/mpi_collectives/BUILD b/tensorflow/contrib/mpi_collectives/BUILD
index 11c5d6e776d6adbf7c439012027752e2235883ab..9f9802b8fe12356c0da82ebb2b48b565cf3f7319 100644
--- a/tensorflow/contrib/mpi_collectives/BUILD
+++ b/tensorflow/contrib/mpi_collectives/BUILD
@@ -6,20 +6,9 @@ package(default_visibility = [
 
 licenses(["notice"])  # Apache 2.0
 
-filegroup(
-    name = "all_files",
-    srcs = glob(
-        ["**/*"],
-        exclude = [
-            "**/METADATA",
-            "**/OWNERS",
-        ],
-    ),
-    visibility = ["//tensorflow:__subpackages__"],
-)
-
 load(
     "//tensorflow/core:platform/default/build_config.bzl",
+    "tf_additional_mpi_lib_defines",
     "tf_proto_library_cc",
 )
 
@@ -33,26 +22,98 @@ tf_proto_library_cc(
     ],
 )
 
-load("//tensorflow:tensorflow.bzl", "tf_custom_op_library")
-load("//tensorflow:tensorflow.bzl", "tf_py_test")
+cc_library(
+    name = "mpi_defines",
+    defines = tf_additional_mpi_lib_defines(),
+)
+
+load(
+    "//tensorflow:tensorflow.bzl",
+    "tf_custom_op_py_library",
+    "tf_custom_op_library",
+    "tf_gen_op_wrapper_py",
+    "tf_gen_op_libs",
+    "tf_kernel_library",
+    "tf_py_test",
+)
 
 tf_custom_op_library(
-    name = "mpi_collectives.so",
+    name = "python/ops/_mpi_ops.so",
     srcs = [
-        "mpi_ops.cc",
-        "ring.cc",
-        "ring.h",
+        "kernels/mpi_ops.cc",
+        "kernels/ring.cc",
+        "kernels/ring.h",
+        "ops/mpi_ops.cc",
     ],
     gpu_srcs = [
-        "ring.cu.cc",
-        "ring.h",
+        "kernels/ring.cu.cc",
+        "kernels/ring.h",
     ],
     deps = [
+        ":mpi_defines",
         ":mpi_message_proto_cc",
         "//third_party/mpi",
     ],
 )
 
+tf_kernel_library(
+    name = "mpi_ops_kernels",
+    srcs = [
+        "kernels/mpi_ops.cc",
+        "kernels/ring.cc",
+    ],
+    hdrs = [
+        "kernels/ring.h",
+    ],
+    gpu_srcs = [
+        "kernels/ring.cu.cc",
+    ],
+    deps = [
+        ":mpi_defines",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:gpu_headers_lib",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:proto_text",
+        "//tensorflow/core:stream_executor",
+    ],
+    # TODO: Include?    alwayslink = 1,
+)
+
+tf_gen_op_libs(
+    op_lib_names = ["mpi_ops"],
+)
+
+tf_gen_op_wrapper_py(
+    name = "mpi_ops",
+    deps = [":mpi_ops_op_lib"],
+)
+
+tf_custom_op_py_library(
+    name = "mpi_collectives_py",
+    srcs = [
+        "__init__.py",
+        "python/ops/mpi_ops.py",
+    ],
+    dso = [
+        ":python/ops/_mpi_ops.so",
+    ],
+    kernels = [
+        ":mpi_ops_kernels",
+        ":mpi_ops_op_lib",
+    ],
+    srcs_version = "PY2AND3",
+    visibility = ["//visibility:public"],
+    deps = [
+        ":mpi_ops",
+        "//tensorflow/contrib/util:util_py",
+        "//tensorflow/python:device",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:util",
+    ],
+)
+
 tf_py_test(
     name = "mpi_ops_test",
     srcs = ["mpi_ops_test.py"],
@@ -61,20 +122,19 @@ tf_py_test(
         "//tensorflow/python:platform",
     ],
     data = [
-        ":mpi_collectives.so",
+        ":python/ops/_mpi_ops.so",
     ],
     tags = ["manual"],
 )
 
-py_library(
-    name = "mpi_ops_py",
-    srcs = [
-        "__init__.py",
-        "mpi_ops.py",
-    ],
-    data = [
-        ":mpi_collectives.so",
-    ],
-    srcs_version = "PY2AND3",
-    visibility = ["//visibility:public"],
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
 )
diff --git a/tensorflow/contrib/mpi_collectives/__init__.py b/tensorflow/contrib/mpi_collectives/__init__.py
index 9ed16a6f078a506b60fd14f4356ff65a0a692203..52029cbc36a3bb77ea38d3973a75bfd37e93bfa4 100644
--- a/tensorflow/contrib/mpi_collectives/__init__.py
+++ b/tensorflow/contrib/mpi_collectives/__init__.py
@@ -37,7 +37,7 @@ for detecting the running MPI configuration.
 Example:
 
 ```python
-from tensorflow.contrib import mpi
+import tensorflow.contrib.mpi_collectives as mpi
 
 # Use `mpi.Session` instead of `tf.Session`
 with mpi.Session() as session:
@@ -48,8 +48,10 @@ with mpi.Session() as session:
         print("MPI Size:", session.run(mpi.size()))
 ```
 
-@@rank
+@@init
 @@size
+@@rank
+@@local_rank
 
 ### Ring Allreduce and Allgather
 
@@ -123,12 +125,12 @@ from __future__ import print_function
 
 import tensorflow as tf
 
-from tensorflow.contrib.mpi_collectives.mpi_ops import size
-from tensorflow.contrib.mpi_collectives.mpi_ops import rank
-from tensorflow.contrib.mpi_collectives.mpi_ops import local_rank
-from tensorflow.contrib.mpi_collectives.mpi_ops import allgather
-from tensorflow.contrib.mpi_collectives.mpi_ops import _allreduce
-from tensorflow.contrib.mpi_collectives.mpi_ops import init
+from tensorflow.contrib.mpi_collectives.python.ops.mpi_ops import init
+from tensorflow.contrib.mpi_collectives.python.ops.mpi_ops import size
+from tensorflow.contrib.mpi_collectives.python.ops.mpi_ops import rank
+from tensorflow.contrib.mpi_collectives.python.ops.mpi_ops import local_rank
+from tensorflow.contrib.mpi_collectives.python.ops.mpi_ops import allgather
+from tensorflow.contrib.mpi_collectives.python.ops.mpi_ops import _allreduce
 
 
 def allreduce(tensor, average=True):
diff --git a/tensorflow/contrib/mpi_collectives/mpi_ops.cc b/tensorflow/contrib/mpi_collectives/kernels/mpi_ops.cc
similarity index 93%
rename from tensorflow/contrib/mpi_collectives/mpi_ops.cc
rename to tensorflow/contrib/mpi_collectives/kernels/mpi_ops.cc
index a051ab0004626d034071112bb37671137ca5a3f0..2d5b98022c3aafb627e986a2764ee60184014945 100644
--- a/tensorflow/contrib/mpi_collectives/mpi_ops.cc
+++ b/tensorflow/contrib/mpi_collectives/kernels/mpi_ops.cc
@@ -21,7 +21,6 @@ limitations under the License.
 
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/shape_inference.h"
 #include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/platform/mutex.h"
 
@@ -37,7 +36,7 @@ limitations under the License.
 #define OMPI_SKIP_MPICXX
 #include "third_party/mpi/mpi.h"
 #include "tensorflow/contrib/mpi_collectives/mpi_message.pb.h"
-#include "tensorflow/contrib/mpi_collectives/ring.h"
+#include "tensorflow/contrib/mpi_collectives/kernels/ring.h"
 
 /*
  * MPI Allreduce and Allgather Ops for TensorFlow.
@@ -81,7 +80,7 @@ using GPUDevice = Eigen::GpuDevice;
 
 namespace tensorflow {
 namespace contrib {
-namespace mpi {
+namespace mpi_collectives {
 
 // Make sure template specializations are generated in the ring.cu.cc and the
 // ring.cc file, not in this file.
@@ -877,14 +876,6 @@ REGISTER_KERNEL_BUILDER(Name("MPIInit").Device(DEVICE_GPU),
                         MPIInitOp<GPUDevice>);
 #endif
 
-REGISTER_OP("MPIInit").Doc(R"doc(
-Initialize MPI for the current process.
-
-If this is run on a GPU, then that GPU must be used for all future MPI
-operations. If it is run on CPU, then all future MPI operations must also
-run on CPU.
-)doc");
-
 // Op to get the current MPI Size.
 template <typename Device>
 class MPISizeOp : public OpKernel {
@@ -911,21 +902,6 @@ REGISTER_KERNEL_BUILDER(Name("MPISize").Device(DEVICE_GPU).HostMemory("size"),
                         MPISizeOp<GPUDevice>);
 #endif
 
-REGISTER_OP("MPISize")
-    .Output("size: int32")
-    .SetShapeFn([](shape_inference::InferenceContext* c) {
-      c->set_output(0, c->Scalar());
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Returns the number of running MPI processes.
-
-More precisely, returns the number of MPI processes in the group associated
-with the MPI_COMM_WORLD communicator.
-
-size:   Size of the MPI group.
-)doc");
-
 // Op to get the current MPI Rank.
 template <typename Device>
 class MPIRankOp : public OpKernel {
@@ -952,21 +928,6 @@ REGISTER_KERNEL_BUILDER(Name("MPIRank").Device(DEVICE_GPU).HostMemory("rank"),
                         MPIRankOp<GPUDevice>);
 #endif
 
-REGISTER_OP("MPIRank")
-    .Output("rank: int32")
-    .SetShapeFn([](shape_inference::InferenceContext* c) {
-      c->set_output(0, c->Scalar());
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Returns the index of the current process in the MPI group.
-
-More precisely, returns the rank of the calling process in the MPI_COMM_WORLD
-communicator.
-
-rank:   Rank of the calling process.
-)doc");
-
 // Op to get the current local MPI Rank.
 template <typename Device>
 class MPILocalRankOp : public OpKernel {
@@ -994,21 +955,6 @@ REGISTER_KERNEL_BUILDER(
     MPILocalRankOp<GPUDevice>);
 #endif
 
-REGISTER_OP("MPILocalRank")
-    .Output("rank: int32")
-    .SetShapeFn([](shape_inference::InferenceContext* c) {
-      c->set_output(0, c->Scalar());
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Returns the index of the current process in the node it is on.
-
-More precisely, returns the rank of the calling process in communicator that
-only spans the MPI processes running on that node.
-
-rank:   Rank of the calling process on the node it is on.
-)doc");
-
 template <typename Device>
 class MPIAllreduceOp : public AsyncOpKernel {
  public:
@@ -1083,28 +1029,6 @@ REGISTER_KERNEL_BUILDER(Name("MPIAllreduce").Device(DEVICE_GPU),
                         MPIAllreduceOp<GPUDevice>);
 #endif
 
-REGISTER_OP("MPIAllreduce")
-    .Attr("T: {int32, int64, float32}")
-    .Input("tensor: T")
-    .Output("sum: T")
-    .SetShapeFn([](shape_inference::InferenceContext* c) {
-      c->set_output(0, c->input(0));
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Perform an MPI Allreduce on a tensor. All other processes that do a reduction
-on a tensor with the same name must have the same dimension for that tensor.
-Tensors are reduced with other tensors that have the same node name for the
-allreduce.
-
-Arguments
-    tensor:     A tensor to reduce.
-
-Output
-    sum:        A tensor with the same shape as `tensor`, summed across all
-                MPI processes.
-)doc");
-
 template <typename Device>
 class MPIAllgatherOp : public AsyncOpKernel {
  public:
@@ -1192,34 +1116,6 @@ class MPIAllgatherOp : public AsyncOpKernel {
   }
 };
 
-REGISTER_OP("MPIAllgather")
-    .Attr("T: {int32, int64, float32}")
-    .Attr("S: {int64}")
-    .Input("tensor: T")
-    .Input("sizes: S")
-    .Output("gathered: T")
-    .SetShapeFn([](shape_inference::InferenceContext* c) {
-      shape_inference::ShapeHandle output;
-      TF_RETURN_IF_ERROR(
-          c->ReplaceDim(c->input(0), 0, c->UnknownDim(), &output));
-      c->set_output(0, output);
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Perform an MPI Allgather on a tensor. All other processes that do a gather on a
-tensor with the same name must have the same rank for that tensor, and have the
-same dimension on all but the first dimension.
-
-Arguments
-    tensor:     A tensor to gather.
-    sizes:      A tensor containing the first-dimension sizes of tensors to be
-                gathered from other ranks
-
-Output
-    gathered:   A tensor with the same shape as `tensor` except for the first
-                dimension, which is the sum of dimensions in `sizes`.
-)doc");
-
 REGISTER_KERNEL_BUILDER(
     Name("MPIAllgather").Device(DEVICE_CPU).HostMemory("sizes"),
     MPIAllgatherOp<CPUDevice>);
@@ -1229,7 +1125,7 @@ REGISTER_KERNEL_BUILDER(
     MPIAllgatherOp<GPUDevice>);
 #endif
 
-}  // namespace mpi
+}  // namespace mpi_collectives
 }  // namespace contrib
 }  // namespace tensorflow
 
diff --git a/tensorflow/contrib/mpi_collectives/ring.cc b/tensorflow/contrib/mpi_collectives/kernels/ring.cc
similarity index 96%
rename from tensorflow/contrib/mpi_collectives/ring.cc
rename to tensorflow/contrib/mpi_collectives/kernels/ring.cc
index d93233eb210b80df10fd9c2c7975ce77112d18a2..8970ceb1a206ff2f9d6e18f7d19e313b8a036042 100644
--- a/tensorflow/contrib/mpi_collectives/ring.cc
+++ b/tensorflow/contrib/mpi_collectives/kernels/ring.cc
@@ -17,11 +17,11 @@ limitations under the License.
 
 #define EIGEN_USE_THREADS
 
-#include "tensorflow/contrib/mpi_collectives/ring.h"
+#include "tensorflow/contrib/mpi_collectives/kernels/ring.h"
 
 namespace tensorflow {
 namespace contrib {
-namespace mpi {
+namespace mpi_collectives {
 
 using CPUDevice = Eigen::ThreadPoolDevice;
 
@@ -73,7 +73,7 @@ GENERATE_ACCUMULATE(long long);
 GENERATE_ACCUMULATE(float);
 #undef GENERATE_ACCUMULATE
 
-}  // namespace mpi
+}  // namespace mpi_collectives
 }  // namespace contrib
 }  // namespace tensorflow
 
diff --git a/tensorflow/contrib/mpi_collectives/ring.cu.cc b/tensorflow/contrib/mpi_collectives/kernels/ring.cu.cc
similarity index 97%
rename from tensorflow/contrib/mpi_collectives/ring.cu.cc
rename to tensorflow/contrib/mpi_collectives/kernels/ring.cu.cc
index 2f3eef366a9a3c10e59cd5298fc1626e1094dff8..b04abde4694199d827a1738850bded9bf696d56c 100644
--- a/tensorflow/contrib/mpi_collectives/ring.cu.cc
+++ b/tensorflow/contrib/mpi_collectives/kernels/ring.cu.cc
@@ -19,11 +19,11 @@ limitations under the License.
 
 #define EIGEN_USE_GPU
 
-#include "tensorflow/contrib/mpi_collectives/ring.h"
+#include "tensorflow/contrib/mpi_collectives/kernels/ring.h"
 
 namespace tensorflow {
 namespace contrib {
-namespace mpi {
+namespace mpi_collectives {
 
 using CPUDevice = Eigen::ThreadPoolDevice;
 
@@ -109,7 +109,7 @@ GENERATE_ACCUMULATE(long long);
 GENERATE_ACCUMULATE(float);
 #undef GENERATE_ACCUMULATE
 
-}  // namespace mpi
+}  // namespace mpi_collectives
 }  // namespace contrib
 }  // namespace tensorflow
 #endif  // GOOGLE_CUDA
diff --git a/tensorflow/contrib/mpi_collectives/ring.h b/tensorflow/contrib/mpi_collectives/kernels/ring.h
similarity index 99%
rename from tensorflow/contrib/mpi_collectives/ring.h
rename to tensorflow/contrib/mpi_collectives/kernels/ring.h
index cae57ce60eb09509af69f8ccab9eacedea361548..1d56d588bc49eda542303ae6ebb19602352ae01d 100644
--- a/tensorflow/contrib/mpi_collectives/ring.h
+++ b/tensorflow/contrib/mpi_collectives/kernels/ring.h
@@ -37,7 +37,7 @@ limitations under the License.
 
 namespace tensorflow {
 namespace contrib {
-namespace mpi {
+namespace mpi_collectives {
 
 using CPUDevice = Eigen::ThreadPoolDevice;
 using GPUDevice = Eigen::GpuDevice;
@@ -317,7 +317,7 @@ Status RingAllgather(OpKernelContext* context, const Tensor* input,
   return Status::OK();
 }
 
-}  // namespace mpi
+}  // namespace mpi_collectives
 }  // namespace contrib
 }  // namespace tensorflow
 
diff --git a/tensorflow/contrib/mpi_collectives/mpi_message.proto b/tensorflow/contrib/mpi_collectives/mpi_message.proto
index 7fa5e203010465766b8ab9562cac010de51a7bbc..afbce981ae1bdd5ae143ba5c45a4d9790a52fafc 100644
--- a/tensorflow/contrib/mpi_collectives/mpi_message.proto
+++ b/tensorflow/contrib/mpi_collectives/mpi_message.proto
@@ -15,7 +15,7 @@ limitations under the License.
 
 syntax = "proto3";
 
-package tensorflow.contrib.mpi;
+package tensorflow.contrib.mpi_collectives;
 
 import "tensorflow/core/framework/tensor_shape.proto";
 import "tensorflow/core/framework/types.proto";
diff --git a/tensorflow/contrib/mpi_collectives/ops/mpi_ops.cc b/tensorflow/contrib/mpi_collectives/ops/mpi_ops.cc
new file mode 100644
index 0000000000000000000000000000000000000000..18e6bb61cffc6471412cb4c5141655839d7ddb3a
--- /dev/null
+++ b/tensorflow/contrib/mpi_collectives/ops/mpi_ops.cc
@@ -0,0 +1,132 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifdef TENSORFLOW_USE_MPI
+
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+namespace contrib {
+namespace mpi_collectives {
+
+REGISTER_OP("MPIInit").Doc(R"doc(
+Initialize MPI for the current process.
+
+If this is run on a GPU, then that GPU must be used for all future MPI
+operations. If it is run on CPU, then all future MPI operations must also
+run on CPU.
+)doc");
+
+REGISTER_OP("MPISize")
+    .Output("size: int32")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      c->set_output(0, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Returns the number of running MPI processes.
+
+More precisely, returns the number of MPI processes in the group associated
+with the MPI_COMM_WORLD communicator.
+
+size:   Size of the MPI group.
+)doc");
+
+REGISTER_OP("MPIRank")
+    .Output("rank: int32")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      c->set_output(0, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Returns the index of the current process in the MPI group.
+
+More precisely, returns the rank of the calling process in the MPI_COMM_WORLD
+communicator.
+
+rank:   Rank of the calling process.
+)doc");
+
+REGISTER_OP("MPILocalRank")
+    .Output("rank: int32")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      c->set_output(0, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Returns the index of the current process in the node it is on.
+
+More precisely, returns the rank of the calling process in communicator that
+only spans the MPI processes running on that node.
+
+rank:   Rank of the calling process on the node it is on.
+)doc");
+
+REGISTER_OP("MPIAllreduce")
+    .Attr("T: {int32, int64, float32}")
+    .Input("tensor: T")
+    .Output("sum: T")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      c->set_output(0, c->input(0));
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Perform an MPI Allreduce on a tensor. All other processes that do a reduction
+on a tensor with the same name must have the same dimension for that tensor.
+Tensors are reduced with other tensors that have the same node name for the
+allreduce.
+
+Arguments
+    tensor:     A tensor to reduce.
+
+Output
+    sum:        A tensor with the same shape as `tensor`, summed across all
+                MPI processes.
+)doc");
+
+REGISTER_OP("MPIAllgather")
+    .Attr("T: {int32, int64, float32}")
+    .Attr("S: {int64}")
+    .Input("tensor: T")
+    .Input("sizes: S")
+    .Output("gathered: T")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      shape_inference::ShapeHandle output;
+      TF_RETURN_IF_ERROR(
+          c->ReplaceDim(c->input(0), 0, c->UnknownDim(), &output));
+      c->set_output(0, output);
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Perform an MPI Allgather on a tensor. All other processes that do a gather on a
+tensor with the same name must have the same rank for that tensor, and have the
+same dimension on all but the first dimension.
+
+Arguments
+    tensor:     A tensor to gather.
+    sizes:      A tensor containing the first-dimension sizes of tensors to be
+                gathered from other ranks
+
+Output
+    gathered:   A tensor with the same shape as `tensor` except for the first
+                dimension, which is the sum of dimensions in `sizes`.
+)doc");
+
+}  // namespace mpi_collectives
+}  // namespace contrib
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_USE_MPI
diff --git a/tensorflow/contrib/mpi_collectives/mpi_ops.py b/tensorflow/contrib/mpi_collectives/python/ops/mpi_ops.py
similarity index 71%
rename from tensorflow/contrib/mpi_collectives/mpi_ops.py
rename to tensorflow/contrib/mpi_collectives/python/ops/mpi_ops.py
index 81567cc688ac8666c3755d5f84162a6dff869107..f0a116239d6f4f7271c2a8f68806ff1ccaae80ae 100644
--- a/tensorflow/contrib/mpi_collectives/mpi_ops.py
+++ b/tensorflow/contrib/mpi_collectives/python/ops/mpi_ops.py
@@ -20,44 +20,13 @@ from __future__ import print_function
 
 import tensorflow as tf
 
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import load_library
+from tensorflow.contrib.mpi_collectives.ops import gen_mpi_ops
+from tensorflow.contrib.util import loader
 from tensorflow.python.framework import ops
 from tensorflow.python.platform import resource_loader
-from tensorflow.python.platform import tf_logging as logging
-
-
-def _load_library(name, op_list=None):
-  """Loads a .so file containing the specified operators.
-
-  Args:
-    name: The name of the .so file to load.
-    op_list: A list of names of operators that the library should have. If None
-        then the .so file's contents will not be verified.
-
-  Raises:
-    NameError if one of the required ops is missing.
-  """
-  try:
-    filename = resource_loader.get_path_to_datafile(name)
-    library = load_library.load_op_library(filename)
-    for expected_op in (op_list or []):
-      for lib_op in library.OP_LIST.op:
-        if lib_op.name == expected_op:
-          break
-      else:
-        raise NameError(
-          'Could not find operator %s in dynamic library %s' %
-          (expected_op, name))
-    return library
-  except errors.NotFoundError:
-    logging.warning('%s file could not be loaded.', name)
-
-
-MPI_LIB = _load_library('mpi_collectives.so', ['MPISize', 'MPIRank',
-                                               'MPILocalRank', 'MPIAllgather',
-                                               'MPIAllreduce'])
 
+_mpi_ops_so = loader.load_op_library(
+    resource_loader.get_path_to_datafile("_mpi_ops.so"))
 
 def size(name=None):
   """An op which returns the number of MPI processes.
@@ -68,7 +37,7 @@ def size(name=None):
   Returns:
     An integer scalar containing the number of MPI processes.
   """
-  return MPI_LIB.mpi_size(name=name)
+  return gen_mpi_ops.mpi_size(name=name)
 
 
 ops.NotDifferentiable('MPISize')
@@ -83,7 +52,7 @@ def rank(name=None):
   Returns:
     An integer scalar with the MPI rank of the calling process.
   """
-  return MPI_LIB.mpi_rank(name=name)
+  return gen_mpi_ops.mpi_rank(name=name)
 
 
 ops.NotDifferentiable('MPIRank')
@@ -95,7 +64,7 @@ def init(name=None):
   All future MPI ops must be run on the same device that the `init` op was run
   on.
   """
-  return MPI_LIB.mpi_init(name=name)
+  return gen_mpi_ops.mpi_init(name=name)
 
 
 ops.NotDifferentiable('MPIInit')
@@ -112,7 +81,7 @@ def local_rank(name=None):
   Returns:
     An integer scalar with the local MPI rank of the calling process.
   """
-  return MPI_LIB.mpi_local_rank(name=name)
+  return gen_mpi_ops.mpi_local_rank(name=name)
 
 
 ops.NotDifferentiable('MPILocalRank')
@@ -129,7 +98,7 @@ def _allreduce(tensor, name=None):
     A tensor of the same shape and type as `tensor`, summed across all
     processes.
   """
-  return MPI_LIB.mpi_allreduce(tensor, name=name)
+  return gen_mpi_ops.mpi_allreduce(tensor, name=name)
 
 
 ops.NotDifferentiable('MPIAllreduce')
@@ -156,8 +125,8 @@ def allgather(tensor, name=None):
   if name is None:
     name = "allgather"
   sizing_name = "{}_sizing".format(name)
-  sizes = MPI_LIB.mpi_allgather(my_size, sizes_flag, name=sizing_name)
-  return MPI_LIB.mpi_allgather(tensor, sizes, name=name)
+  sizes = gen_mpi_ops.mpi_allgather(my_size, sizes_flag, name=sizing_name)
+  return gen_mpi_ops.mpi_allgather(tensor, sizes, name=name)
 
 
 ops.NotDifferentiable('MPIAllgather')
diff --git a/tensorflow/contrib/nccl/BUILD b/tensorflow/contrib/nccl/BUILD
index df9dbb457ace32ab804f7fc736a23f5b08bd077a..5ac96007df7ee08b1e32aacd28f83768859810a9 100644
--- a/tensorflow/contrib/nccl/BUILD
+++ b/tensorflow/contrib/nccl/BUILD
@@ -23,15 +23,17 @@ load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library")
 tf_custom_op_library(
     name = "python/ops/_nccl_ops.so",
     srcs = [
+        "ops/nccl_ops.cc",
+    ],
+    gpu_srcs = [
         "kernels/nccl_manager.cc",
         "kernels/nccl_manager.h",
         "kernels/nccl_ops.cc",
-        "ops/nccl_ops.cc",
     ],
-    deps = [
-        "//tensorflow/core:gpu_headers_lib",
+    deps = if_cuda([
         "@nccl_archive//:nccl",
-    ],
+        "//tensorflow/core:gpu_headers_lib",
+    ]),
 )
 
 tf_cuda_cc_test(
@@ -52,17 +54,14 @@ tf_cuda_cc_test(
         "no_oss",
         "notap",
     ],
-    deps = if_cuda(
+    deps =
         [
-            "@nccl_archive//:nccl",
             "//tensorflow/core:cuda",
+            "//tensorflow/core:test",
+            "//tensorflow/core:test_main",
+            "//tensorflow/core:testlib",
+            "@nccl_archive//:nccl",
         ],
-        [],
-    ) + [
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-    ],
 )
 
 tf_kernel_library(
@@ -103,11 +102,8 @@ tf_custom_op_py_library(
         "__init__.py",
         "python/ops/nccl_ops.py",
     ],
-    dso = [
-        ":python/ops/_nccl_ops.so",
-    ],
-    kernels = [
-        ":nccl_kernels",
+    dso = [":python/ops/_nccl_ops.so"],
+    kernels = if_cuda([":nccl_kernels"]) + [
         ":nccl_ops_op_lib",
     ],
     srcs_version = "PY2AND3",
diff --git a/tensorflow/contrib/nccl/kernels/nccl_manager.cc b/tensorflow/contrib/nccl/kernels/nccl_manager.cc
index 31a35b0d53309bc2930b8a6f1b9d6a817b4a911e..913935b38246f1c5c0f7da4c1ea1f986bc00891b 100644
--- a/tensorflow/contrib/nccl/kernels/nccl_manager.cc
+++ b/tensorflow/contrib/nccl/kernels/nccl_manager.cc
@@ -258,9 +258,37 @@ NcclManager::Communicator* NcclManager::GetCommunicator(
     devices[i] = collective->participants[i]->gpu_device_id;
   }
 
+  int device_count = num_devices;
+#if NCCL_MAJOR >= 2
+  // NCCL2 prevents InitAll for more communicators than devices (but doesn't
+  // check that device ids are unique). Work around it by initializing each
+  // rank individually.
+  cudaGetDeviceCount(&device_count);
+#endif
   std::vector<ncclComm_t> nccl_comms(num_devices);
-  auto result = ncclCommInitAll(nccl_comms.data(), num_devices, devices.data());
-  CHECK_EQ(result, ncclSuccess) << ncclGetErrorString(result);
+  if (num_devices <= device_count) {
+    auto result =
+        ncclCommInitAll(nccl_comms.data(), num_devices, devices.data());
+    CHECK_EQ(result, ncclSuccess) << ncclGetErrorString(result);
+  } else {
+    int savedDevice = 0;
+    CHECK_EQ(cudaGetDevice(&savedDevice), cudaSuccess);
+    ncclUniqueId commId;
+    ncclGetUniqueId(&commId);
+#if NCCL_MAJOR >= 2
+    CHECK_EQ(ncclGroupStart(), ncclSuccess);
+#endif
+    for (int rank = 0; rank < num_devices; ++rank) {
+      cudaSetDevice(devices[rank]);
+      auto result =
+          ncclCommInitRank(nccl_comms.data() + rank, num_devices, commId, rank);
+      CHECK_EQ(result, ncclSuccess) << ncclGetErrorString(result);
+    }
+#if NCCL_MAJOR >= 2
+    CHECK_EQ(ncclGroupEnd(), ncclSuccess);
+#endif
+    cudaSetDevice(savedDevice);
+  }
   for (int rank = 0; rank < num_devices; ++rank) {
     members[rank].nccl_comm = nccl_comms[rank];
   }
diff --git a/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc b/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc
index 505c4b0d71028c64b5075cff7ea010597b4263b3..985b2bae2566c38dfb2c71a899e4b03bbb8fa55d 100644
--- a/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc
+++ b/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #ifdef GOOGLE_CUDA
 
 #include <algorithm>
+#include <random>
 #include <vector>
 
 #include "tensorflow/contrib/nccl/kernels/nccl_manager.h"
@@ -30,6 +31,8 @@ namespace tensorflow {
 static std::vector<BaseGPUDevice*> GetGPUDevices() {
   std::vector<Device*> devices;
   SessionOptions session_options;
+  session_options.config.mutable_gpu_options()
+      ->set_per_process_gpu_memory_fraction(0.1);
   session_options.env = Env::Default();
   Status s = DeviceFactory::GetFactory(DEVICE_GPU)
                  ->AddDevices(session_options, "", &devices);
@@ -173,7 +176,7 @@ class NcclManagerTest : public ::testing::Test {
       auto out_gpu_mem = AsDeviceMemory(out_gpu.flat<float>().data());
       stream->ThenMemcpy(out_cpu.flat<float>().data(), out_gpu_mem,
                          out_cpu.TotalBytes());
-      stream->BlockHostUntilDone();
+      SE_ASSERT_OK(stream->BlockHostUntilDone());
       test::ExpectTensorEqual<float>(test_case->expected, out_cpu);
     }
   }
@@ -234,10 +237,11 @@ TEST_F(NcclManagerTest, MultipleCallers) {
     for (int i = 0; i < num_ranks; ++i) {
       auto* device = devices->at(i % devices->size());
       auto* stream = device->tensorflow_gpu_device_info()->stream;
-      stream->BlockHostUntilDone();
+      SE_ASSERT_OK(stream->BlockHostUntilDone());
     }
 
-    std::random_shuffle(case_and_device_num.begin(), case_and_device_num.end());
+    std::shuffle(case_and_device_num.begin(), case_and_device_num.end(),
+                 std::mt19937(std::random_device()()));
 
     mutex mu;  // guards case_and_device_num.
     std::unique_ptr<thread::ThreadPool> pool(
diff --git a/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py b/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py
index bad0abd44cc507c6ebbe4481f80b8cafd8480322..98fe394c5b38294700617591992d3207b0a4706b 100644
--- a/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py
+++ b/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py
@@ -77,10 +77,6 @@ class NcclTestCase(test.TestCase):
       # same communicator across multiple sessions.
       with self.test_session(use_gpu=True) as sess:
 
-        # Check GPU availability *after* creating test session, see b/68975239.
-        if not test.is_gpu_available():
-          return  # Test requires access to a GPU
-
         for devices in device_sets:
           shape = (3, 4)
           random = (np.random.random_sample(shape) - .5) * 1024
@@ -100,6 +96,11 @@ class NcclTestCase(test.TestCase):
 
           result_tensors = [array_ops.identity(t) for t in reduce_tensors]
 
+          # Check GPU availability *after* creating session, see b/68975239.
+          if not test.is_gpu_available():
+            # If no GPU is available, only test graph construction.
+            continue
+
           # Test execution and results.
           for t in sess.run(result_tensors):
             self.assertAllClose(t, np_ans)
@@ -114,6 +115,7 @@ class NcclTestCase(test.TestCase):
       numpy_fn: A function taking two tensors and returning the gradient of the
           reduction of the two.
     """
+
     def _Gradient(tensors, devices):
       inputs = [array_ops.placeholder(t.dtype, t.shape) for t in tensors]
       reduce_tensors = nccl_reduce(inputs, devices)
@@ -164,12 +166,17 @@ class BroadcastTest(NcclTestCase):
                (['/device:GPU:0', '/device:GPU:0'],))
 
   def testBroadcastToCpuError(self):
-    # Broadcasts to CPU is not supported.
-    with self.assertRaisesRegexp(
-        errors.NotFoundError,
-        "No registered '_NcclBroadcastRecv' OpKernel for CPU devices"):
+    try:
+      # Broadcasts to CPU is not supported.
       self._Test(_NcclBroadcast, lambda x, y: x,
                  (['/device:GPU:0', '/device:CPU:0'],))
+    except errors.NotFoundError as e:
+      self.assertRegexpMatches(
+          str(e), "No registered '_NcclBroadcastRecv' OpKernel for CPU devices")
+    else:
+      # Session isn't executed when no GPU is available.
+      if test.is_gpu_available():
+        self.fail("Didn't raise NotFoundError trying to broadcast to CPU")
 
 
 class CombinedTest(NcclTestCase):
diff --git a/tensorflow/contrib/ndlstm/__init__.py b/tensorflow/contrib/ndlstm/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..52e83069cb0c68b510da46149248369dce376647 100644
--- a/tensorflow/contrib/ndlstm/__init__.py
+++ b/tensorflow/contrib/ndlstm/__init__.py
@@ -0,0 +1,18 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
diff --git a/tensorflow/contrib/nearest_neighbor/kernels/hyperplane_lsh_probes.cc b/tensorflow/contrib/nearest_neighbor/kernels/hyperplane_lsh_probes.cc
index 62ee6630ac613c80a56d4e854cf7af4ae19f6faa..2b412fac9a621f01bd21c6b4391da3c462dd78b3 100644
--- a/tensorflow/contrib/nearest_neighbor/kernels/hyperplane_lsh_probes.cc
+++ b/tensorflow/contrib/nearest_neighbor/kernels/hyperplane_lsh_probes.cc
@@ -45,16 +45,16 @@ class HyperplaneLSHProbesOp : public OpKernel {
     const Tensor& products_tensor = context->input(0);
     OP_REQUIRES(context, products_tensor.dims() == 2,
                 InvalidArgument("Need a two-dimensional products tensor, got ",
-                                products_tensor.dims(), " dimensions."))
+                                products_tensor.dims(), " dimensions."));
 
     const Tensor& num_tables_tensor = context->input(1);
     OP_REQUIRES(context, num_tables_tensor.dims() == 0,
                 InvalidArgument("Need a scalar num_tables tensor, got ",
-                                num_tables_tensor.dims(), " dimensions."))
+                                num_tables_tensor.dims(), " dimensions."));
     int num_tables = num_tables_tensor.scalar<int32>()();
     OP_REQUIRES(context, num_tables >= 1,
                 InvalidArgument("num_tables must be at least 1 but got ",
-                                num_tables, "."))
+                                num_tables, "."));
     OP_REQUIRES(context, num_tables <= 1000,
                 InvalidArgument("Need num_tables <= 1000, got ", num_tables,
                                 ". This is mostly to protect against incorrect "
@@ -66,12 +66,13 @@ class HyperplaneLSHProbesOp : public OpKernel {
                 InvalidArgument("Need a scalar num_hyperplanes_per_table "
                                 "tensor, got ",
                                 num_hyperplanes_per_table_tensor.dims(),
-                                " dimensions."))
+                                " dimensions."));
     int num_hyperplanes_per_table =
         num_hyperplanes_per_table_tensor.scalar<int32>()();
     OP_REQUIRES(context, num_hyperplanes_per_table >= 1,
                 InvalidArgument("num_hyperplanes_per_table must be at least 1 "
-                                "but got ", num_hyperplanes_per_table, "."))
+                                "but got ",
+                                num_hyperplanes_per_table, "."));
     OP_REQUIRES(context, num_hyperplanes_per_table <= 30,
                 InvalidArgument("Need num_hyperplanes_per_table <= 30, got ",
                                 num_hyperplanes_per_table, ". "
@@ -81,10 +82,10 @@ class HyperplaneLSHProbesOp : public OpKernel {
     const Tensor& num_probes_tensor = context->input(3);
     OP_REQUIRES(context, num_probes_tensor.dims() == 0,
                 InvalidArgument("Need a scalar num_probes tensor, got ",
-                                num_probes_tensor.dims(), " dimensions."))
+                                num_probes_tensor.dims(), " dimensions."));
     int num_probes = num_probes_tensor.scalar<int32>()();
     OP_REQUIRES(context, num_probes >= 1,
-                InvalidArgument("num_probes must be at least 1."))
+                InvalidArgument("num_probes must be at least 1."));
 
     int expected_num_hyperplanes = num_tables * num_hyperplanes_per_table;
     OP_REQUIRES(
diff --git a/tensorflow/contrib/nn/BUILD b/tensorflow/contrib/nn/BUILD
index 56a24ac77f0b9a87b6e4db48cddacdf35f4855d0..5543eb6c6e3785978e9c878f309b9bd0863b0b0a 100644
--- a/tensorflow/contrib/nn/BUILD
+++ b/tensorflow/contrib/nn/BUILD
@@ -17,6 +17,7 @@ py_library(
         "python/ops/__init__.py",
         "python/ops/alpha_dropout.py",
         "python/ops/cross_entropy.py",
+        "python/ops/fwd_gradients.py",
         "python/ops/sampling_ops.py",
         "python/ops/scaled_softplus.py",
     ],
@@ -28,6 +29,7 @@ py_library(
         "//tensorflow/python:embedding_ops",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:function",
+        "//tensorflow/python:gradients",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:nn",
         "//tensorflow/python:nn_ops",
@@ -55,6 +57,19 @@ py_test(
     ],
 )
 
+py_test(
+    name = "fwd_gradients_test",
+    size = "small",
+    srcs = ["python/ops/fwd_gradients_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":nn_py",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:math_ops",
+    ],
+)
+
 py_test(
     name = "sampling_ops_test",
     size = "small",
diff --git a/tensorflow/contrib/nn/__init__.py b/tensorflow/contrib/nn/__init__.py
index 0bc133a00e619930f1d5fe4c7a8996556b833ddf..96d60e149809aff6fcb7eff77edc23737db177e8 100644
--- a/tensorflow/contrib/nn/__init__.py
+++ b/tensorflow/contrib/nn/__init__.py
@@ -21,6 +21,7 @@
 @@deprecated_flipped_sigmoid_cross_entropy_with_logits
 @@nth_element
 @@rank_sampled_softmax_loss
+@@sampled_sparse_softmax_loss
 @@scaled_softplus
 """
 
diff --git a/tensorflow/contrib/nn/python/ops/fwd_gradients.py b/tensorflow/contrib/nn/python/ops/fwd_gradients.py
new file mode 100644
index 0000000000000000000000000000000000000000..922497779b1d6ce426df9d7bb8fb343eea48502b
--- /dev/null
+++ b/tensorflow/contrib/nn/python/ops/fwd_gradients.py
@@ -0,0 +1,76 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Forward-mode derivatives."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops.gradients_impl import gradients
+
+
+def fwd_gradients(ys, xs, grad_xs=None, assert_unused=False):
+  """Computes forward-mode derivatives.
+
+  This is accomplished in pure-python using tensorflow's existing (reverse-mode)
+  gradients. There is additional overhead on graph construction, but runtime
+  performance should be equal to a manual implementation [citation needed].
+
+  See https://j-towns.github.io/2017/06/12/A-new-trick.html and
+  https://github.com/HIPS/autograd/pull/175 for the original discussion of this
+  method, and https://github.com/renmengye/tensorflow-forward-ad for a "direct"
+  implementation.
+
+  Args:
+    ys: A list of tensors.
+    xs: A list of tensors.
+    grad_xs: An optional list of tensors. If provided, must have the same length
+      and shapes compatible with xs.
+    assert_unused: Add assertions that intermediate values are not computed.
+  Returns:
+    A list of tensors of the same shapes as ys. The directional derivatives of
+    ys with respect to xs in the direction grad_xs. Leaving grad_xs unspecified
+    is equivalent to passing in 1s for each x in xs.
+  """
+  # This version of forward-mode autodiff is based on code by Tim Cooijmans
+  # and handles list arguments and certain special cases such as when the
+  # ys doesn't depend on one or more of the xs, and when tf.IndexedSlices are
+  # generated by the first tf.gradients call.
+
+  us = [array_ops.zeros_like(y) + float('nan') for y in ys]
+
+  dydxs = gradients(ys, xs, grad_ys=us)
+
+  # deal with strange types that tf.gradients returns but can't deal with
+  dydxs = [ops.convert_to_tensor(dydx) if isinstance(dydx, ops.IndexedSlices)
+           else dydx for dydx in dydxs]
+
+  if assert_unused:
+    with ops.control_dependencies(dydxs):
+      assert_unused = control_flow_ops.Assert(False, [1], name='fwd_gradients')
+    with ops.control_dependencies([assert_unused]):
+      dydxs = array_ops.identity_n(dydxs)
+
+  dydxs = [array_ops.zeros_like(x) if dydx is None else dydx
+           for x, dydx in zip(xs, dydxs)]
+  for x, dydx in zip(xs, dydxs):
+    dydx.set_shape(x.shape)
+
+  dysdx = gradients(dydxs, us, grad_ys=grad_xs)
+
+  return dysdx
diff --git a/tensorflow/contrib/nn/python/ops/fwd_gradients_test.py b/tensorflow/contrib/nn/python/ops/fwd_gradients_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..56062c3cab32d727dd22a78d1f60c823a2f86a79
--- /dev/null
+++ b/tensorflow/contrib/nn/python/ops/fwd_gradients_test.py
@@ -0,0 +1,52 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for forward_ad.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.nn.python.ops import fwd_gradients
+from tensorflow.python.framework import constant_op
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test
+
+
+class ForwardAdTest(test.TestCase):
+
+  def testSquare(self):
+    x = constant_op.constant(1.)
+    y = math_ops.square(x)
+    grad_x = 3.
+
+    dydx_tf = fwd_gradients.fwd_gradients([y], [x], [grad_x])[0]
+    dydx_py = 2. * grad_x
+
+    with self.test_session() as sess:
+      self.assertAllClose(sess.run(dydx_tf), dydx_py, 1e-6)
+
+  def testGather(self):
+    x = constant_op.constant([1., 2., 3.])
+    y = array_ops.gather(x, [0, 1])
+    y.set_shape([2])
+    dydx = fwd_gradients.fwd_gradients([y], [x], assert_unused=True)
+
+    with self.test_session() as sess:
+      sess.run(dydx)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/nn/python/ops/sampling_ops.py b/tensorflow/contrib/nn/python/ops/sampling_ops.py
index 98749cff7ee896436cdc40471929d9a3a8618dba..63fc487dca69a4777821595a0366d0ae0b393ce2 100644
--- a/tensorflow/contrib/nn/python/ops/sampling_ops.py
+++ b/tensorflow/contrib/nn/python/ops/sampling_ops.py
@@ -24,6 +24,8 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import embedding_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
+from tensorflow.python.ops import nn_impl
+from tensorflow.python.ops import nn_ops
 
 
 def _rank_resample(weights, biases, inputs, sampled_values, num_resampled,
@@ -240,3 +242,101 @@ def rank_sampled_softmax_loss(weights,
         remove_accidental_hits=remove_accidental_hits,
         partition_strategy=partition_strategy,
         name=name)
+
+
+def sampled_sparse_softmax_loss(weights,
+                                biases,
+                                labels,
+                                inputs,
+                                num_sampled,
+                                num_classes,
+                                sampled_values=None,
+                                remove_accidental_hits=True,
+                                partition_strategy="mod",
+                                name="sampled_sparse_softmax_loss"):
+  """Computes and returns the sampled sparse softmax training loss.
+
+  This is a faster way to train a softmax classifier over a huge number of
+  classes.
+
+  This operation is for training only.  It is generally an underestimate of
+  the full softmax loss.
+
+  A common use case is to use this method for training, and calculate the full
+  softmax loss for evaluation or inference. In this case, you must set
+  `partition_strategy="div"` for the two losses to be consistent, as in the
+  following example:
+
+  ```python
+  if mode == "train":
+    loss = tf.nn.sampled_sparse_softmax_loss(
+        weights=weights,
+        biases=biases,
+        labels=labels,
+        inputs=inputs,
+        ...,
+        partition_strategy="div")
+  elif mode == "eval":
+    logits = tf.matmul(inputs, tf.transpose(weights))
+    logits = tf.nn.bias_add(logits, biases)
+    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
+        labels=tf.squeeze(labels),
+        logits=logits)
+  ```
+
+  See our [Candidate Sampling Algorithms Reference]
+  (https://www.tensorflow.org/extras/candidate_sampling.pdf)
+
+  Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007)
+  ([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math.
+
+  Args:
+    weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor`
+        objects whose concatenation along dimension 0 has shape
+        [num_classes, dim].  The (possibly-sharded) class embeddings.
+    biases: A `Tensor` of shape `[num_classes]`.  The class biases.
+    labels: A `Tensor` of type `int64` and shape `[batch_size, 1]`.
+        The index of the single target class for each row of logits.  Note that
+        this format differs from the `labels` argument of
+        `nn.sparse_softmax_cross_entropy_with_logits`.
+    inputs: A `Tensor` of shape `[batch_size, dim]`.  The forward
+        activations of the input network.
+    num_sampled: An `int`.  The number of classes to randomly sample per batch.
+    num_classes: An `int`. The number of possible classes.
+    sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`,
+        `sampled_expected_count`) returned by a `*_candidate_sampler` function.
+        (if None, we default to `log_uniform_candidate_sampler`)
+    remove_accidental_hits:  A `bool`.  whether to remove "accidental hits"
+        where a sampled class equals one of the target classes.  Default is
+        True.
+    partition_strategy: A string specifying the partitioning strategy, relevant
+        if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported.
+        Default is `"mod"`. See `tf.nn.embedding_lookup` for more details.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `batch_size` 1-D tensor of per-example sampled softmax losses.
+
+  """
+  logits, _ = nn_impl._compute_sampled_logits(
+      weights=weights,
+      biases=biases,
+      labels=labels,
+      inputs=inputs,
+      num_sampled=num_sampled,
+      num_classes=num_classes,
+      num_true=1,
+      sampled_values=sampled_values,
+      subtract_log_q=True,
+      remove_accidental_hits=remove_accidental_hits,
+      partition_strategy=partition_strategy,
+      name=name)
+
+  # There is only one true label. _compute_sampled_logits puts the true logit
+  # at index 0.
+  labels = array_ops.zeros([array_ops.shape(logits)[0], 1], dtype=dtypes.int64)
+
+  sampled_losses = nn_ops.sparse_softmax_cross_entropy_with_logits(
+      labels=array_ops.squeeze(labels), logits=logits)
+  # sampled_losses is a [batch_size] tensor.
+  return sampled_losses
diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD
index a9a63cbce0de807059b4756c4f9057081721b15a..9c961f2b9c828f7406516860b7e3fd3dc343d993 100644
--- a/tensorflow/contrib/opt/BUILD
+++ b/tensorflow/contrib/opt/BUILD
@@ -16,6 +16,7 @@ py_library(
         "__init__.py",
         "python/training/addsign.py",
         "python/training/drop_stale_gradient_optimizer.py",
+        "python/training/elastic_average_optimizer.py",
         "python/training/external_optimizer.py",
         "python/training/lazy_adam_optimizer.py",
         "python/training/moving_average_optimizer.py",
@@ -80,22 +81,22 @@ py_test(
     ],
 )
 
-py_test(
+tf_py_test(
     name = "variable_clipping_optimizer_test",
     srcs = ["python/training/variable_clipping_optimizer_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "manual",  # Flaky: b/29892493
-        "notap",  # data race due to b/62910646
-    ],
-    deps = [
+    additional_deps = [
         ":opt_py",
+        "//third_party/py/numpy",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:session",
         "//tensorflow/python:training",
         "//tensorflow/python:variables",
-        "//third_party/py/numpy",
+    ],
+    grpc_enabled = True,
+    tags = [
+        "manual",  # Flaky: b/29892493
+        "notap",  # data race due to b/62910646
     ],
 )
 
@@ -168,11 +169,30 @@ tf_py_test(
         "//tensorflow/python:training",
         "//tensorflow/python:variables",
     ],
+    grpc_enabled = True,
     tags = [
         "no_oss",  # Flaky due to port collisions
     ],
 )
 
+tf_py_test(
+    name = "elastic_average_optimizer_test",
+    srcs = ["python/training/elastic_average_optimizer_test.py"],
+    additional_deps = [
+        ":opt_py",
+        "//tensorflow/python:client",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:variables",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:training",
+        "//tensorflow/python:ops",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//third_party/py/numpy",
+    ],
+)
+
 py_test(
     name = "sign_decay_test",
     srcs = ["python/training/sign_decay_test.py"],
diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py
index 3275ad8239aa91660f88b7bc149fb915b4fad9fe..90d2f924629800ccf26c160edd22c13b817f4584 100644
--- a/tensorflow/contrib/opt/__init__.py
+++ b/tensorflow/contrib/opt/__init__.py
@@ -28,6 +28,7 @@ from tensorflow.contrib.opt.python.training.multitask_optimizer_wrapper import *
 from tensorflow.contrib.opt.python.training.nadam_optimizer import *
 from tensorflow.contrib.opt.python.training.powersign import *
 from tensorflow.contrib.opt.python.training.variable_clipping_optimizer import *
+from tensorflow.contrib.opt.python.training.elastic_average_optimizer import *
 # pylint: enable=wildcard-import
 
 from tensorflow.python.util.all_util import remove_undocumented
@@ -35,7 +36,7 @@ from tensorflow.python.util.all_util import remove_undocumented
 
 _allowed_symbols = [
     'PowerSignOptimizer',
-    'AddSignOptimizer'
+    'AddSignOptimizer',
     'DelayCompensatedGradientDescentOptimizer',
     'DropStaleGradientOptimizer',
     'ExternalOptimizerInterface',
@@ -46,6 +47,8 @@ _allowed_symbols = [
     'VariableClippingOptimizer',
     'MultitaskOptimizerWrapper',
     'clip_gradients_by_global_norm',
+    'ElasticAverageOptimizer',
+    'ElasticAverageCustomGetter'
 ]
 
 remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/opt/python/training/addsign.py b/tensorflow/contrib/opt/python/training/addsign.py
index 729e59cb0aab97e6cd657571647fc45a44ae0ab1..22da4453e205c9111056d6afd1ddb08e093653aa 100644
--- a/tensorflow/contrib/opt/python/training/addsign.py
+++ b/tensorflow/contrib/opt/python/training/addsign.py
@@ -30,8 +30,8 @@ from tensorflow.python.training import training_ops
 class AddSignOptimizer(optimizer.Optimizer):
   """Optimizer that implements the AddSign update.
 
-  See  Neural Optimizer Search with Reinforcement Learning
-  [Bello et al., ICML2017].
+  See [Bello et al., ICML2017],
+  [Neural Optimizer Search with RL](https://arxiv.org/abs/1709.07417).
   """
 
   def __init__(self,
@@ -45,6 +45,7 @@ class AddSignOptimizer(optimizer.Optimizer):
 
     Initialization:
 
+    ```
     m_0 <- 0 (Initialize initial 1st moment vector)
     t <- 0 (Initialize timestep)
     ```
@@ -54,7 +55,7 @@ class AddSignOptimizer(optimizer.Optimizer):
     ```
     t <- t + 1
     m_t <- beta1 * m_{t-1} + (1 - beta1) * g
-    sign_decay <- sign_decay(t)
+    sign_decay <- sign_decay_fn(t)
     update <- (alpha + sign_decay * sign(g) *sign(m)) * g
     variable <- variable - lr_t * update
     ```
@@ -70,11 +71,9 @@ class AddSignOptimizer(optimizer.Optimizer):
       learning_rate: learning_rate used when taking a step.
       alpha: alpha used in optimizer.
       beta: decay used for computing the moving average m.
-      sign_decay_fn: decay function applied to the sign(g*m) quantity.
-          Takes global_step as an argument and returns the quantity to multiply
-          the sign(g*m) by.
-        compute (1.0 + alpha * decay * sign(g) * sign(m)) * m.
-      use_locking: If True use locks for update operations.
+      sign_decay_fn: decay function applied to the sign(g) sign(m) quantity.
+          Takes global_step as an argument. See sign_decay.py for some examples.
+      use_locking: If True, use locks for update operations.
       name: Optional name for the operations created when applying gradients.
         Defaults to "AddSignOptimizer".
     """
diff --git a/tensorflow/contrib/opt/python/training/drop_stale_gradient_optimizer.py b/tensorflow/contrib/opt/python/training/drop_stale_gradient_optimizer.py
index f20c172ee376d0a808a21fe96bec80367bf2e9f4..4a905b1b2a0c3b7c4002451f37102eb2abdc5a2b 100644
--- a/tensorflow/contrib/opt/python/training/drop_stale_gradient_optimizer.py
+++ b/tensorflow/contrib/opt/python/training/drop_stale_gradient_optimizer.py
@@ -78,10 +78,11 @@ class DropStaleGradientOptimizer(optimizer.Optimizer):
   def apply_gradients(self, grads_and_vars, global_step=None, name=None):
     gradients = []
     # Number of stale gradients.
-    stale_counter = variable_scope.get_variable(
-        "stale_counter", [],
-        initializer=init_ops.zeros_initializer(),
-        trainable=False)
+    with ops.colocate_with(global_step):
+      stale_counter = variable_scope.get_variable(
+          "stale_counter", [],
+          initializer=init_ops.zeros_initializer(),
+          trainable=False)
 
     def _AcceptGradientOp():
       with ops.control_dependencies(
diff --git a/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py b/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..6132cba1f5aecbafd8ca820ecda39355dd768847
--- /dev/null
+++ b/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py
@@ -0,0 +1,344 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Wrapper optimizer for Elastic Average SGD """
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import math_ops
+
+from tensorflow.python.ops import gen_nn_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+from tensorflow.python.training import optimizer
+from tensorflow.python.training import session_run_hook
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import data_flow_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import constant_op
+
+LOCAL_VARIABLE_NAME = 'local_center_variable'
+GLOBAL_VARIABLE_NAME = 'global_center_variable'
+
+
+class ElasticAverageCustomGetter(object):
+  """Custom_getter class is used to do:
+  1. Change trainable variables to local collection and place them at worker
+    device
+  2. Generate global variables(global center variables)
+  3. Generate local variables(local center variables) which record the global
+    variables and place them at worker device
+    Notice that the class should be used with tf.replica_device_setter,
+    so that the global center variables and global step variable can be placed
+    at ps device. Besides, use 'tf.get_variable' instead of 'tf.Variable' to
+    use this custom getter.
+
+  For example,
+  ea_custom_getter = ElasticAverageCustomGetter(worker_device)
+  with tf.device(
+    tf.train.replica_device_setter(
+      worker_device=worker_device,
+      ps_device="/job:ps/cpu:0",
+      cluster=cluster)),
+    tf.variable_scope('',custom_getter=ea_custom_getter):
+    hid_w = tf.get_variable(
+      initializer=tf.truncated_normal(
+          [IMAGE_PIXELS * IMAGE_PIXELS, FLAGS.hidden_units],
+          stddev=1.0 / IMAGE_PIXELS),
+      name="hid_w")
+    hid_b = tf.get_variable(initializer=tf.zeros([FLAGS.hidden_units]),
+                            name="hid_b")
+  """
+
+  def __init__(self, worker_device):
+    """Create a new `ElasticAverageCustomGetter`.
+
+    Args:
+      worker_device: String.  Name of the `worker` job.
+    """
+    self._worker_device = worker_device
+    self._local_map = {}
+    self._global_map = {}
+
+  def __call__(self, getter, name, trainable, collections, *args, **kwargs):
+    if trainable:
+      with ops.device(self._worker_device):
+        local_var = getter(name, trainable=True,
+                           collections=[ops.GraphKeys.LOCAL_VARIABLES],
+                           *args, **kwargs)
+      global_center_variable = variable_scope.variable(
+        name='%s/%s' %
+             (GLOBAL_VARIABLE_NAME,
+              name),
+        initial_value=local_var.initialized_value(),
+        trainable=False,
+        collections=[ops.GraphKeys.GLOBAL_VARIABLES])
+
+      with ops.device(self._worker_device):
+        local_center_variable = variable_scope.variable(
+          name='%s/%s' % (LOCAL_VARIABLE_NAME, name),
+          initial_value=local_var.initialized_value(),
+          trainable=False,
+          collections=[ops.GraphKeys.LOCAL_VARIABLES])
+
+      self._local_map[local_var] = local_center_variable
+      self._global_map[local_var] = global_center_variable
+      return local_var
+    else:
+      return getter(name, trainable, collections, *args, **kwargs)
+
+
+class ElasticAverageOptimizer(optimizer.Optimizer):
+  """Wrapper optimizer that implements the Elastic Average SGD algorithm.
+  This is an async optimizer. During the training, Each worker will update
+  the local variables and maintains its own local_step, which starts from 0
+  and is incremented by 1 after each update of local variables. Whenever
+  the communication period divides the local step, the worker requests
+  the current global center variables and then computed the elastic difference
+  between global center variables and local variables. The elastic difference
+  then be used to update both local variables and global variables.
+  """
+
+  # Default value as paper described
+  BETA = 0.9
+
+  def __init__(
+      self,
+      opt,
+      num_worker,
+      ea_custom_getter,
+      communication_period=10,
+      moving_rate=None,
+      rho=None,
+      use_locking=True,
+      name="ElasticAverageOptimizer"):
+    """Construct a new gradient descent optimizer.
+
+    Args:
+      opt: The actual optimizer that will be used to update local variables.
+        Must be one of the Optimizer classes.
+      num_worker: The number of workers
+      ea_custom_getter: The ElasticAverageCustomGetter
+      communication_period: An int point value to controls the frequency
+        of the communication between every worker and the ps.
+      moving_rate: A floating point value to control the elastic difference.
+      rho: the amount of exploration we allow ine the model. The default
+        value is moving_rate/learning_rate
+      use_locking: If True use locks for update operations.
+      name: Optional name prefix for the operations created when applying
+        gradients. Defaults to "ElasticAverageOptimizer".
+    """
+    super(ElasticAverageOptimizer, self).__init__(use_locking, name)
+    self._opt = opt
+    self._num_worker = num_worker
+    self._period = communication_period
+    self._local_map = ea_custom_getter._local_map
+    self._global_map = ea_custom_getter._global_map
+
+    if moving_rate is None:
+      self._moving_rate = BETA / communication_period / num_worker
+    else:
+      self._moving_rate = moving_rate
+    if rho is None:
+      self._rho = self._moving_rate / self._opt._learning_rate
+    else:
+      self._rho = rho
+
+    self._local_step = variable_scope.get_variable(
+      initializer=0,
+      trainable=False,
+      collections=[ops.GraphKeys.LOCAL_VARIABLES],
+      name="local_step")
+    self._opt._prepare()
+
+  def compute_gradients(self, loss, var_list=None,
+                        gate_gradients=optimizer.Optimizer.GATE_OP,
+                        aggregation_method=None,
+                        colocate_gradients_with_ops=False,
+                        grad_loss=None):
+    """Compute gradients of `loss` for the variables in `var_list`.
+
+    Add rho*elastic_difference to loss to control the exploration
+    This is the first part of `minimize()`.  It returns a list
+    of (gradient, variable) pairs where "gradient" is the gradient
+    for "variable".  Note that "gradient" can be a `Tensor`, an
+    `IndexedSlices`, or `None` if there is no gradient for the
+    given variable.
+
+    Args:
+      loss: A Tensor containing the value to minimize.
+      var_list: Optional list or tuple of `tf.Variable` to update to minimize
+        `loss`.  Defaults to the list of variables collected in the graph
+        under the key `GraphKey.TRAINABLE_VARIABLES`.
+      gate_gradients: How to gate the computation of gradients.  Can be
+        `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`.
+      aggregation_method: Specifies the method used to combine gradient terms.
+        Valid values are defined in the class `AggregationMethod`.
+      colocate_gradients_with_ops: If True, try colocating gradients with
+        the corresponding op.
+      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.
+
+    Returns:
+      A list of (gradient, variable) pairs. Variable is always present, but
+      gradient can be `None`.
+
+    Raises:
+      TypeError: If `var_list` contains anything else than `Variable` objects.
+      ValueError: If some arguments are invalid.
+    """
+    if not var_list:
+      var_list = variables.trainable_variables()
+
+    elastic_difference = [math_ops.subtract(v, lv) for v, lv in zip(
+      variables.trainable_variables(),
+      [self._local_map[var] for var in var_list])]
+
+    distance_loss = self._rho * math_ops.add_n(
+                      [gen_nn_ops.l2_loss(ed) for ed in elastic_difference])
+
+    total_loss = loss + distance_loss
+    return self._opt.compute_gradients(total_loss, var_list,
+                                       gate_gradients, aggregation_method,
+                                       colocate_gradients_with_ops, grad_loss)
+
+  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
+    """Apply gradients to global variables.
+
+    This is the second part of `minimize()`. It returns an `Operation` that
+    applies gradients.
+
+    Args:
+      grads_and_vars: List of (gradient, variable) pairs as returned by
+        `compute_gradients()`.
+      global_step: Optional `Variable` to increment by one after the
+        variables have been updated.
+      name: Optional name for the returned operation.  Default to the
+        name passed to the `Optimizer` constructor.
+
+    Returns:
+      An `Operation` that applies the specified gradients. If `global_step`
+      was not None, that operation also increments `global_step`.
+
+    Raises:
+      TypeError: If `grads_and_vars` is malformed.
+      ValueError: If none of the variables have gradients.
+    """
+    apply_updates = self._opt.apply_gradients(grads_and_vars)
+    with ops.control_dependencies([apply_updates]):
+      local_update = state_ops.assign_add(
+        self._local_step, 1, name='local_step_update').op
+
+    # update global variables.
+    def _Update_global_variables():
+      local_vars = [v for g, v in grads_and_vars if g is not None]
+      global_center_vars = [self._global_map[var] for var in local_vars]
+      local_center_vars = [self._local_map[var] for var in local_vars]
+      local_center_vars_update = []
+      for lvar, var in zip(local_center_vars, global_center_vars):
+        local_center_vars_update.append(lvar.assign(var))
+      update_ops = []
+      differences = []
+      with ops.control_dependencies(local_center_vars_update):
+        for v, lv in zip(local_vars, local_center_vars):
+          with ops.device(v.device):
+            differences.append(math_ops.subtract(v, lv))
+        for lvar, diff in zip(local_vars, differences):
+          with ops.device(lvar.device):
+            update_ops.append(state_ops.assign_sub(lvar, math_ops.multiply(
+              self._moving_rate, diff)))
+        for var, diff in zip(global_center_vars, differences):
+          with ops.device(var.device):
+            update_ops.append(state_ops.assign_add(var, math_ops.multiply(
+              self._moving_rate, diff)))
+        if global_step:
+          with ops.colocate_with(global_step):
+            update_ops.append(state_ops.assign_add(global_step, 1))
+      variable_update = control_flow_ops.group(*(update_ops))
+      return variable_update
+
+    with ops.control_dependencies([local_update]):
+      condition = math_ops.equal(math_ops.mod(
+        self._local_step, self._period), 0)
+      conditional_update = control_flow_ops.cond(
+        condition, _Update_global_variables, control_flow_ops.no_op)
+    return conditional_update
+
+  def get_init_op(self, task_index):
+    """Returns the op to let all the local variables and local center
+    variables equal to the global center variables before the training begins"""
+
+    def _Add_sync_queues_and_barrier(enqueue_after_list):
+      """Adds ops to enqueu on all worker queues"""
+      sync_queues = [
+        data_flow_ops.FIFOQueue(self._num_worker, [dtypes.bool], shapes=[[]],
+                                shared_name='%s%s' % (
+                                  'variable_init_sync_queue', i)) for i in
+        range(self._num_worker)]
+      queue_ops = []
+      # For each other worker, add an entry in a queue
+      token = constant_op.constant(False)
+      with ops.control_dependencies(enqueue_after_list):
+        for i, q in enumerate(sync_queues):
+          if i == task_index:
+            queue_ops.append(control_flow_ops.no_op())
+          else:
+            queue_ops.append(q.enqueue(token))
+      queue_ops.append(
+        sync_queues[task_index].dequeue_many(len(sync_queues) - 1))
+      return control_flow_ops.group(*queue_ops)
+
+    init_ops = []
+    local_vars = variables.trainable_variables()
+    global_center_vars = [self._global_map[var] for var in local_vars]
+    local_center_vars = [self._local_map[var] for var in local_vars]
+    if not (local_vars and global_center_vars and local_center_vars):
+      raise ValueError(
+        'The lists of local_variables, global_center_variables, '
+        'local_center_variables should not be empty  ')
+    for lvar, gc_var, lc_var in zip(
+        local_vars, global_center_vars, local_center_vars):
+      init_ops.append(state_ops.assign(lvar, gc_var))
+      init_ops.append(state_ops.assign(lc_var, gc_var))
+
+    init_op = control_flow_ops.group(*(init_ops))
+    sync_queue_op = _Add_sync_queues_and_barrier([init_op])
+    return sync_queue_op
+
+  def make_session_run_hook(self, is_chief, task_index):
+    """Creates a hook to handle ElasticAverageOptimizerHook ops such as initialization."""
+    return _ElasticAverageOptimizerHook(self, is_chief, task_index)
+
+
+class _ElasticAverageOptimizerHook(session_run_hook.SessionRunHook):
+  def __init__(self, ea_optimizer, is_chief, task_index):
+    """Creates hook to handle ElasticAverageOptimizer initialization ops.
+
+    Args:
+      ea_optimizer: `ElasticAverageOptimizer` which this hook will initialize.
+      is_chief: `Bool`, whether is this a chief replica or not.
+    """
+    self._ea_optimizer = ea_optimizer
+    self._is_chief = is_chief
+    self._task_index = task_index
+
+  def begin(self):
+    self._local_init_op = variables.local_variables_initializer()
+    self._global_init_op = None
+    if self._is_chief:
+      self._global_init_op = variables.global_variables_initializer()
+    self._variable_init_op = self._ea_optimizer.get_init_op(self._task_index)
diff --git a/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py b/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..446e91018d477d75116f6b78a2443ed79ed3b3ef
--- /dev/null
+++ b/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py
@@ -0,0 +1,225 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for ElasticAverageOptimizer."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import portpicker
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+from tensorflow.python.training import gradient_descent
+from tensorflow.python.training import server_lib
+from tensorflow.python.training import training
+from tensorflow.python.training import training_util
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.training import device_setter
+
+from tensorflow.contrib.opt.python.training.elastic_average_optimizer import \
+  ElasticAverageOptimizer, ElasticAverageCustomGetter, GLOBAL_VARIABLE_NAME
+
+
+def create_local_cluster(num_workers, num_ps, protocol="grpc"):
+  """Create local GRPC servers and return them."""
+  worker_ports = [portpicker.pick_unused_port() for _ in range(num_workers)]
+  ps_ports = [portpicker.pick_unused_port() for _ in range(num_ps)]
+  cluster_dict = {
+    "worker": ["localhost:%s" % port for port in worker_ports],
+    "ps": ["localhost:%s" % port for port in ps_ports]
+  }
+  cs = server_lib.ClusterSpec(cluster_dict)
+
+  workers = [
+    server_lib.Server(
+      cs, job_name="worker", protocol=protocol, task_index=ix, start=True)
+    for ix in range(num_workers)
+  ]
+  ps_servers = [
+    server_lib.Server(
+      cs, job_name="ps", protocol=protocol, task_index=ix, start=True)
+    for ix in range(num_ps)
+  ]
+
+  return cluster_dict, workers, ps_servers
+
+
+# Creates the workers and return their sessions, graphs, train_ops.
+# Cheif worker will update at last
+def _get_workers(num_workers, period, workers, moving_rate):
+  sessions = []
+  graphs = []
+  train_ops = []
+  for worker_id in range(num_workers):
+    graph = ops.Graph()
+    is_chief = (worker_id == 0)
+    with graph.as_default():
+      worker_device = "/job:worker/task:%d/cpu:0" % (worker_id)
+      ea_coustom = ElasticAverageCustomGetter(
+        worker_device=worker_device)
+      with variable_scope.variable_scope('',
+                                         custom_getter=ea_coustom), ops.device(
+        device_setter.replica_device_setter(worker_device=worker_device,
+                                            ps_device="/job:ps/task:0/cpu:0",
+                                            ps_tasks=1)):
+        global_step = variables.Variable(0, name='global_step',
+                                         trainable=False)
+        var_0 = variable_scope.get_variable(initializer=0.0, name="v0")
+        var_1 = variable_scope.get_variable(initializer=1.0, name="v1")
+
+      with ops.device("/job:worker/task:" + str(worker_id)):
+        grads_0 = constant_op.constant(-1.0)
+        grads_1 = constant_op.constant(-1.0)
+
+        sgd_opt = gradient_descent.GradientDescentOptimizer(1.0)
+        opt = ElasticAverageOptimizer(
+          opt=sgd_opt,
+          num_worker=num_workers,
+          moving_rate=moving_rate,
+          communication_period=period,
+          ea_custom_getter=ea_coustom
+        )
+        train_op = [
+          opt.apply_gradients(
+            ([grads_0, var_0],
+             [grads_1, var_1]), global_step)
+        ]
+        easgd_hook = opt.make_session_run_hook(is_chief, worker_id)
+      # Creates MonitoredSession
+      sess = training.MonitoredTrainingSession(workers[worker_id].target,
+                                               hooks=[easgd_hook])
+
+    sessions.append(sess)
+    graphs.append(graph)
+    train_ops.append(train_op)
+
+  return sessions, graphs, train_ops
+
+
+class ElasticAverageOptimizerTest(test.TestCase):
+  def _run(self, train_op, sess):
+    sess.run(train_op)
+
+  def test1Workers2Period(self):
+    num_workers = 1
+    communication_period = 2
+    num_ps = 1
+    cluster, workers, _ = create_local_cluster(num_workers=num_workers,
+                                               num_ps=num_ps)
+
+    sessions, graphs, train_ops = _get_workers(num_workers,
+                                               communication_period,
+                                               workers, 1.0)
+
+    var_0 = graphs[0].get_tensor_by_name('v0:0')
+    var_1 = graphs[0].get_tensor_by_name('v1:0')
+    global_step = training_util.get_global_step(graphs[0])
+    var_0_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v0:0")
+    var_1_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v1:0")
+    # Verify the initialized value.
+    self.assertAllEqual(0.0, sessions[0].run(var_0))
+    self.assertAllEqual(1.0, sessions[0].run(var_1))
+    self.assertAllEqual(0.0, sessions[0].run(var_0_g))
+    self.assertAllEqual(1.0, sessions[0].run(var_1_g))
+    self.assertAllEqual(0, sessions[0].run(global_step))
+
+    sessions[0].run(train_ops[0])
+
+    self.assertAllEqual(1.0, sessions[0].run(var_0))
+    self.assertAllEqual(2.0, sessions[0].run(var_1))
+    self.assertAllEqual(0.0, sessions[0].run(var_0_g))
+    self.assertAllEqual(1.0, sessions[0].run(var_1_g))
+    self.assertAllEqual(0, sessions[0].run(global_step))
+
+    # iteration 2, global variable update
+    sessions[0].run(train_ops[0])
+
+    self.assertAllEqual(0.0, sessions[0].run(var_0))
+    self.assertAllEqual(1.0, sessions[0].run(var_1))
+    self.assertAllEqual(2.0, sessions[0].run(var_0_g))
+    self.assertAllEqual(3.0, sessions[0].run(var_1_g))
+    self.assertAllEqual(1, sessions[0].run(global_step))
+
+    # iteration 3
+    sessions[0].run(train_ops[0])
+
+    self.assertAllEqual(1.0, sessions[0].run(var_0))
+    self.assertAllEqual(2.0, sessions[0].run(var_1))
+    self.assertAllEqual(2.0, sessions[0].run(var_0_g))
+    self.assertAllEqual(3.0, sessions[0].run(var_1_g))
+    self.assertAllEqual(1, sessions[0].run(global_step))
+
+  def test2Worker1Period(self):
+    num_workers = 2
+    communication_period = 1
+    num_ps = 2
+    cluster, workers, _ = create_local_cluster(num_workers=num_workers,
+                                               num_ps=num_ps)
+
+    sessions, graphs, train_ops = _get_workers(num_workers,
+                                               communication_period,
+                                               workers, 0.5)
+
+    var_0 = graphs[0].get_tensor_by_name('v0:0')
+    var_1 = graphs[0].get_tensor_by_name('v1:0')
+
+    var_0_1 = graphs[1].get_tensor_by_name('v0:0')
+    var_1_1 = graphs[1].get_tensor_by_name('v1:0')
+
+    var_0_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v0:0")
+    var_1_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v1:0")
+    # Verify the initialized value.
+    self.assertAllEqual(0.0, sessions[0].run(var_0))
+    self.assertAllEqual(1.0, sessions[0].run(var_1))
+    self.assertAllEqual(0.0, sessions[1].run(var_0_1))
+    self.assertAllEqual(1.0, sessions[1].run(var_1_1))
+    self.assertAllEqual(0.0, sessions[0].run(var_0_g))
+    self.assertAllEqual(1.0, sessions[0].run(var_1_g))
+
+    sessions[0].run(train_ops[0])
+    sessions[1].run(train_ops[1])
+
+    self.assertAllEqual(0.5, sessions[0].run(var_0))
+    self.assertAllEqual(1.5, sessions[0].run(var_1))
+    self.assertAllEqual(0.75, sessions[0].run(var_0_g))
+    self.assertAllEqual(1.75, sessions[0].run(var_1_g))
+    self.assertAllEqual(0.75, sessions[1].run(var_0_1))
+    self.assertAllEqual(1.75, sessions[1].run(var_1_1))
+
+  def testPS2TasksWithClusterSpecClass(self):
+    cluster_spec = server_lib.ClusterSpec({
+      "ps": ["ps0:2222", "ps1:2222"],
+      "worker": ["worker0:2222", "worker1:2222", "worker2:2222"]
+    })
+    ea_coustom = ElasticAverageCustomGetter(
+      worker_device="/job:worker/task:0")
+    from tensorflow.python.training import device_setter
+    with ops.device(
+        device_setter.replica_device_setter(cluster=cluster_spec,
+                                            worker_device="/job:worker/task:0",
+                                            ps_device="/job:ps")), \
+         variable_scope.variable_scope('', custom_getter=ea_coustom):
+      v = variable_scope.get_variable(initializer=[1, 2], name="v")
+      w = variable_scope.get_variable(initializer=[2, 1], name='w')
+      v_g, w_g = ea_coustom._global_map[v],ea_coustom._global_map[w]
+      self.assertDeviceEqual("/job:worker/task:0", v.device)
+      self.assertDeviceEqual("job:ps/task:0", v_g.device)
+      self.assertDeviceEqual("/job:worker/task:0", w.device)
+      self.assertDeviceEqual("job:ps/task:1", w_g.device)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/opt/python/training/moving_average_optimizer.py b/tensorflow/contrib/opt/python/training/moving_average_optimizer.py
index c48494585eb66c40e69a87439265b9cd08d51712..d68ad23d65500cc2348459cdc53030c2ea08373a 100644
--- a/tensorflow/contrib/opt/python/training/moving_average_optimizer.py
+++ b/tensorflow/contrib/opt/python/training/moving_average_optimizer.py
@@ -86,6 +86,9 @@ class MovingAverageOptimizer(optimizer.Optimizer):
     self._variable_map = None
     self._sequential_update = sequential_update
 
+  def compute_gradients(self, *args, **kwargs):
+    return self._optimizer.compute_gradients(*args, **kwargs)
+
   def apply_gradients(self, grads_and_vars, global_step=None, name=None):
     train_op = self._optimizer.apply_gradients(
         grads_and_vars, global_step=global_step, name=name)
diff --git a/tensorflow/contrib/opt/python/training/moving_average_optimizer_test.py b/tensorflow/contrib/opt/python/training/moving_average_optimizer_test.py
index a4ffbfe1c6bf8a63b10593e6c783047c99cad523..60929add198f2e69b5acc2eb5516dafc82b1f3ba 100644
--- a/tensorflow/contrib/opt/python/training/moving_average_optimizer_test.py
+++ b/tensorflow/contrib/opt/python/training/moving_average_optimizer_test.py
@@ -116,6 +116,37 @@ class MovingAverageOptimizerTest(test.TestCase):
       with self.assertRaises(RuntimeError):
         _ = opt.swapping_saver([var])
 
+  def testCorrectOverride(self):
+
+    class WrapperOptimizer(gradient_descent.GradientDescentOptimizer):
+
+      def compute_gradients(self, *args, **kwargs):
+        self.compute_gradients_called = True
+        return super(WrapperOptimizer, self).compute_gradients(
+            *args, **kwargs)
+
+      def apply_gradients(self, *args, **kwargs):
+        self.apply_gradients_called = True
+        return super(WrapperOptimizer, self).apply_gradients(*args, **kwargs)
+
+    with self.test_session() as sess:
+      var = variables.Variable([1.2], name='var', dtype=dtypes.float32)
+      loss = var ** 2
+      wrapper_opt = WrapperOptimizer(learning_rate=2.0)
+      opt = moving_average_optimizer.MovingAverageOptimizer(wrapper_opt)
+      train_op = opt.minimize(loss)
+
+      # Check that both methods are called on the underlying optimizer.
+      self.assertTrue(wrapper_opt.compute_gradients_called)
+      self.assertTrue(wrapper_opt.apply_gradients_called)
+
+      # Run train_op once, and verify that we've updated the variable.
+      variables.global_variables_initializer().run()
+      sess.run(train_op)
+      var_value = sess.run(var)
+      # Started at 1.2, gradient is 2*1.2=2.4, lr=2, so should now be -3.6.
+      self.assertNear(-3.6, var_value, 1e-6)
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/contrib/opt/python/training/powersign.py b/tensorflow/contrib/opt/python/training/powersign.py
index 7f7521581fd685c7a65119e2bd2b4af64aafcd69..828f3c51c9868c70d881fabb33995fb4e90c64e3 100644
--- a/tensorflow/contrib/opt/python/training/powersign.py
+++ b/tensorflow/contrib/opt/python/training/powersign.py
@@ -32,8 +32,8 @@ from tensorflow.python.training import training_ops
 class PowerSignOptimizer(optimizer.Optimizer):
   """Optimizer that implements the PowerSign update.
 
-  See  Neural Optimizer Search with Reinforcement Learning
-  [Bello et al., ICML2017].
+  See [Bello et al., ICML2017],
+  [Neural Optimizer Search with RL](https://arxiv.org/abs/1709.07417).
   """
 
   def __init__(self,
@@ -57,7 +57,7 @@ class PowerSignOptimizer(optimizer.Optimizer):
     ```
     t <- t + 1
     m_t <- beta1 * m_{t-1} + (1 - beta1) * g
-    sign_decay <- sign_decay(t)
+    sign_decay <- sign_decay_fn(t)
     update <- base ** (sign_decay * sign(g) * sign(m)) * g
     variable <- variable - lr_t * update
     ```
@@ -73,10 +73,9 @@ class PowerSignOptimizer(optimizer.Optimizer):
       learning_rate: learning_rate used when taking a step.
       base: base used in optimizer.
       beta: decay used for computing the moving average m.
-      sign_decay_fn: decay function applied to the sign(g*m) quantity.
-          Takes global_step as an argument and returns the quantity to multiply
-          the sign(g*m) by.
-      use_locking: If True use locks for update operations.
+      sign_decay_fn: decay function applied to the sign(g) sign(m) quantity.
+          Takes global_step as an argument. See sign_decay.py for some examples.
+      use_locking: If True, use locks for update operations.
       name: Optional name for the operations created iwhen applying gradients.
         Defaults to "PowerSignOptimizer".
     """
diff --git a/tensorflow/contrib/periodic_resample/BUILD b/tensorflow/contrib/periodic_resample/BUILD
index 25d700f1209692a467c05ba56cee041456718378..71582f9c9a01eb221666e2c71c4a2edb18e7cb98 100644
--- a/tensorflow/contrib/periodic_resample/BUILD
+++ b/tensorflow/contrib/periodic_resample/BUILD
@@ -1,13 +1,12 @@
+package(default_visibility = ["//visibility:public"])
+
 licenses(["notice"])  # Apache 2.0
 
 exports_files(["LICENSE"])
 
-package(default_visibility = ["//visibility:public"])
-
 load(
     "//tensorflow:tensorflow.bzl",
     "tf_gen_op_libs",
-    "tf_py_test",
     "tf_custom_op_library",
     "tf_custom_op_py_library",
     "tf_gen_op_wrapper_py",
diff --git a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h
index 72e355deb74808dc0a02d145c950850c6de85fb2..bef21f7a5c8a27011f95eb7fae8451ca944d3cde 100644
--- a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h
+++ b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h
@@ -20,192 +20,198 @@
 #include <cmath>
 #include <type_traits>
 #include <vector>
-#include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/shape_inference.h"
 #include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/lib/core/status.h"
 
 namespace {
 
-  template <class IndexVecT, class IndexT>
-  IndexT compute_input_index(
-      IndexVecT* target_dimensions, const IndexT& output_index,
-      const IndexVecT& original_dimensions, const int& adjustable_dimension,
-      const std::vector<tensorflow::int64>& dimension_ceiling,
-      const std::vector<tensorflow::int64>& cumulative_dimensions,
-      IndexT* result,
-      std::vector<IndexT>* output_indices,
-      const int& rank) {
-
-    *result = 0;
-    output_indices->clear();
-
-    // un-rasterize the output index
-    auto last_reduced_i = output_index;
-    for (auto r = rank - 1; r >= 0; --r) {
-      (*output_indices)[r] = last_reduced_i % (*target_dimensions)[r];
-      last_reduced_i = (last_reduced_i - (*output_indices)[r]) / (*target_dimensions)[r];
-    }
-
-    // rasterize the input index
-    IndexT last_index_factor = 1;
-    for (auto r = rank - 1; r >= 0; --r) {
-      IndexT index = 0;
-      if (r != adjustable_dimension)
-        index = (*output_indices)[r] / dimension_ceiling[r];
-      else {
-        for (int qi = 0; qi < rank; ++qi) {
-          if (qi == adjustable_dimension) continue;
-          index += cumulative_dimensions[qi] * ((*output_indices)[qi] % dimension_ceiling[qi]);
-        }
-        index *= (*target_dimensions)[adjustable_dimension];
-        index += (*output_indices)[r];
-      }
-      *result += last_index_factor * index;
-      last_index_factor *= original_dimensions[r];
-    }
-
-    return *result;
+template <class IndexVecT, class IndexT>
+IndexT compute_input_index(
+    IndexVecT* target_dimensions, const IndexT& output_index,
+    const IndexVecT& original_dimensions, const int& adjustable_dimension,
+    const std::vector<tensorflow::int64>& dimension_ceiling,
+    const std::vector<tensorflow::int64>& cumulative_dimensions, IndexT* result,
+    std::vector<IndexT>* output_indices, const int& rank) {
+  *result = 0;
+  output_indices->clear();
+
+  // un-rasterize the output index
+  auto last_reduced_i = output_index;
+  for (auto r = rank - 1; r >= 0; --r) {
+    (*output_indices)[r] = last_reduced_i % (*target_dimensions)[r];
+    last_reduced_i =
+        (last_reduced_i - (*output_indices)[r]) / (*target_dimensions)[r];
   }
 
-  template <class InputDataT, class IndexVecT> // both types are needed here b/c IndexVecT and InputDataT are not related
-  void fill_periodic_tensor(tensorflow::OpKernelContext* context,
-                            const IndexVecT& desired_shape,
-                            const tensorflow::Tensor& input_tensor) {
-    // input is a strided array (last index is fastest, C-ordered)
-    auto input = input_tensor.flat<InputDataT>();
-    const int rank = input_tensor.dims();
-    const auto original_size = input.size();
-    // original and target dimensions
-    std::vector<tensorflow::int64> original_dimensions(rank),
-                                   target_dimensions(rank);
-    tensorflow::int64 total_size(input_tensor.NumElements()),
-                      new_sliced_size(1);
-    // factors by which original_dimensions increases/decreases w.r.t. target_dimensions
-    std::vector<tensorflow::int64> dimension_ceiling(rank),
-                                   cumulative_dimensions(rank);
-    // index of adjustable dimension
-    int adjustable_dimension;
-    tensorflow::TensorShape output_shape;
-
-    // requires that the rank of the input tensor and length of the desired shape
-    // are equal
-    OP_REQUIRES(context, rank == desired_shape.size(),
-                tensorflow::errors::InvalidArgument(
-                    "periodic_resample expects the rank of the input tensor, ",
-                    rank, ", to be the same as the length of the desired shape, ",
-                    desired_shape.size(), "."));
-
-    bool found = false;
-    for (int i = 0; i < rank; ++i) {
-      // if (desired_shape(i) < 1) {
-      if (desired_shape[i] < 1) {
-        // only one index can be adjustable
-        OP_REQUIRES(context, !found,
-                    tensorflow::errors::InvalidArgument(
-                        "periodic_resample expects only "
-                        "one index to be marked as adjustable."));
-        adjustable_dimension = i;
-        found = true;
-      } else {
-        // target_dimensions[i] = desired_shape(i);
-        target_dimensions[i] = desired_shape[i];
-        new_sliced_size *= target_dimensions[i];
+  // rasterize the input index
+  IndexT last_index_factor = 1;
+  for (auto r = rank - 1; r >= 0; --r) {
+    IndexT index = 0;
+    if (r != adjustable_dimension)
+      index = (*output_indices)[r] / dimension_ceiling[r];
+    else {
+      for (int qi = 0; qi < rank; ++qi) {
+        if (qi == adjustable_dimension) continue;
+        index += cumulative_dimensions[qi] *
+                 ((*output_indices)[qi] % dimension_ceiling[qi]);
       }
+      index *= (*target_dimensions)[adjustable_dimension];
+      index += (*output_indices)[r];
     }
-    // at least one index needs to be adjustable
-    OP_REQUIRES(context, found, tensorflow::errors::InvalidArgument(
-                                    "periodic_resample expects at least "
-                                    "one index to be marked as adjustable."));
-
-    int count = 0;
-    for (const auto dim_info : input_tensor.shape()) {
-      original_dimensions[count] = dim_info.size;
-      ++count;
-    }
+    *result += last_index_factor * index;
+    last_index_factor *= original_dimensions[r];
+  }
 
-    target_dimensions[adjustable_dimension] = total_size / new_sliced_size;
-
-    count = 0;
-    for (const auto dim_info : input_tensor.shape()) {
-      dimension_ceiling[count] = tensorflow::int64(
-        std::ceil(float(target_dimensions[count]) / float(original_dimensions[count]))
-      );
-      if (count == 0)
-        cumulative_dimensions[count] = 1;
-      else
-        cumulative_dimensions[count] = cumulative_dimensions[count - 1] * dimension_ceiling[count - 1];
-      ++count;
+  return *result;
+}
+
+template <class InputDataT,
+          class IndexVecT>  // both types are needed here b/c IndexVecT and
+                            // InputDataT are not related
+                            void
+                            fill_periodic_tensor(
+                                tensorflow::OpKernelContext* context,
+                                const IndexVecT& desired_shape,
+                                const tensorflow::Tensor& input_tensor) {
+  // input is a strided array (last index is fastest, C-ordered)
+  auto input = input_tensor.flat<InputDataT>();
+  const int rank = input_tensor.dims();
+  // original and target dimensions
+  std::vector<tensorflow::int64> original_dimensions(rank),
+      target_dimensions(rank);
+  tensorflow::int64 total_size(input_tensor.NumElements()), new_sliced_size(1);
+  // factors by which original_dimensions increases/decreases w.r.t.
+  // target_dimensions
+  std::vector<tensorflow::int64> dimension_ceiling(rank),
+      cumulative_dimensions(rank);
+  // index of adjustable dimension
+  int adjustable_dimension;
+  tensorflow::TensorShape output_shape;
+
+  // requires that the rank of the input tensor and length of the desired shape
+  // are equal
+  OP_REQUIRES(context, rank == desired_shape.size(),
+              tensorflow::errors::InvalidArgument(
+                  "periodic_resample expects the rank of the input tensor, ",
+                  rank, ", to be the same as the length of the desired shape, ",
+                  desired_shape.size(), "."));
+
+  bool found = false;
+  for (int i = 0; i < rank; ++i) {
+    // if (desired_shape(i) < 1) {
+    if (desired_shape[i] < 1) {
+      // only one index can be adjustable
+      OP_REQUIRES(context, !found,
+                  tensorflow::errors::InvalidArgument(
+                      "periodic_resample expects only "
+                      "one index to be marked as adjustable."));
+      adjustable_dimension = i;
+      found = true;
+    } else {
+      // target_dimensions[i] = desired_shape(i);
+      target_dimensions[i] = desired_shape[i];
+      new_sliced_size *= target_dimensions[i];
     }
+  }
+  // at least one index needs to be adjustable
+  OP_REQUIRES(context, found,
+              tensorflow::errors::InvalidArgument(
+                  "periodic_resample expects at least "
+                  "one index to be marked as adjustable."));
+
+  int count = 0;
+  for (const auto dim_info : input_tensor.shape()) {
+    original_dimensions[count] = dim_info.size;
+    ++count;
+  }
 
-    // ensure that the new dimension is greater than zero
-    OP_REQUIRES(context, target_dimensions[adjustable_dimension] > 0,
-                tensorflow::errors::InvalidArgument(
-                    "periodic_resample found that the "
-                    "adjustable dimension, ",
-                    adjustable_dimension,
-                    ", isn't greater than zero, ",
-                    target_dimensions[adjustable_dimension], "."));
-    for (int i = 0; i < rank; ++i) {
-      output_shape.AddDim(target_dimensions[i]);
-    }
-    const auto new_size = new_sliced_size * target_dimensions[adjustable_dimension];
-
-    // Create an output tensor and attach it to the current context
-    tensorflow::Tensor* output_tensor = nullptr;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(0, output_shape, &output_tensor));
-    auto output = output_tensor->flat<InputDataT>();
-
-    // memory is allocated for these variables outside the inner loop for
-    // efficiency (although, I could create a separate class scope for
-    // this purpose instead)
-    tensorflow::int64 result = 0;
-    std::vector<tensorflow::int64> output_indices(target_dimensions.size());
-
-    // Fill output tensor with periodically resampled input tensor values
-    for (tensorflow::int64 output_index = 0;
-         output_index < new_size; ++output_index) {
-      output(output_index) = input(
-          compute_input_index(&target_dimensions, output_index,
-                              original_dimensions, adjustable_dimension,
-                              dimension_ceiling, cumulative_dimensions,
-                              &result, &output_indices, rank));
-    }
+  target_dimensions[adjustable_dimension] = total_size / new_sliced_size;
+
+  count = 0;
+  for (int i = 0; i < input_tensor.shape().dims(); ++i) {
+    dimension_ceiling[count] = tensorflow::int64(std::ceil(
+        float(target_dimensions[count]) / float(original_dimensions[count])));
+    if (count == 0)
+      cumulative_dimensions[count] = 1;
+    else
+      cumulative_dimensions[count] =
+          cumulative_dimensions[count - 1] * dimension_ceiling[count - 1];
+    ++count;
   }
 
-  void create_output_tensor(tensorflow::OpKernelContext* context,
-                            const tensorflow::Tensor& input_tensor,
-                            const tensorflow::DataType& input_tensor_type,
-                            const tensorflow::PartialTensorShape& desired_shape_tensor) {
-    auto desired_shape = desired_shape_tensor.dim_sizes();
-
-    // obligatory type switch
-    switch (input_tensor_type) {
-      case tensorflow::DataTypeToEnum<float>::value:
-        fill_periodic_tensor<float>(context, desired_shape, input_tensor);
-      case tensorflow::DataTypeToEnum<double>::value:
-        fill_periodic_tensor<double>(context, desired_shape, input_tensor);
-      case tensorflow::DataTypeToEnum<tensorflow::int32>::value:
-        fill_periodic_tensor<tensorflow::int32>(context, desired_shape, input_tensor);
-      case tensorflow::DataTypeToEnum<tensorflow::int64>::value:
-        fill_periodic_tensor<tensorflow::int64>(context, desired_shape, input_tensor);
-      default:
-        ;
-    }
+  // ensure that the new dimension is greater than zero
+  OP_REQUIRES(context, target_dimensions[adjustable_dimension] > 0,
+              tensorflow::errors::InvalidArgument(
+                  "periodic_resample found that the "
+                  "adjustable dimension, ",
+                  adjustable_dimension, ", isn't greater than zero, ",
+                  target_dimensions[adjustable_dimension], "."));
+  for (int i = 0; i < rank; ++i) {
+    output_shape.AddDim(target_dimensions[i]);
+  }
+  const auto new_size =
+      new_sliced_size * target_dimensions[adjustable_dimension];
+
+  // Create an output tensor and attach it to the current context
+  tensorflow::Tensor* output_tensor = nullptr;
+  OP_REQUIRES_OK(context,
+                 context->allocate_output(0, output_shape, &output_tensor));
+  auto output = output_tensor->flat<InputDataT>();
+
+  // memory is allocated for these variables outside the inner loop for
+  // efficiency (although, I could create a separate class scope for
+  // this purpose instead)
+  tensorflow::int64 result = 0;
+  std::vector<tensorflow::int64> output_indices(target_dimensions.size());
+
+  // Fill output tensor with periodically resampled input tensor values
+  for (tensorflow::int64 output_index = 0; output_index < new_size;
+       ++output_index) {
+    output(output_index) = input(compute_input_index(
+        &target_dimensions, output_index, original_dimensions,
+        adjustable_dimension, dimension_ceiling, cumulative_dimensions, &result,
+        &output_indices, rank));
   }
+}
+
+void create_output_tensor(
+    tensorflow::OpKernelContext* context,
+    const tensorflow::Tensor& input_tensor,
+    const tensorflow::DataType& input_tensor_type,
+    const tensorflow::PartialTensorShape& desired_shape_tensor) {
+  auto desired_shape = desired_shape_tensor.dim_sizes();
+
+  // obligatory type switch
+  switch (input_tensor_type) {
+    case tensorflow::DataTypeToEnum<float>::value:
+      fill_periodic_tensor<float>(context, desired_shape, input_tensor);
+      break;
+    case tensorflow::DataTypeToEnum<double>::value:
+      fill_periodic_tensor<double>(context, desired_shape, input_tensor);
+      break;
+    case tensorflow::DataTypeToEnum<tensorflow::int32>::value:
+      fill_periodic_tensor<tensorflow::int32>(context, desired_shape,
+                                              input_tensor);
+      break;
+    case tensorflow::DataTypeToEnum<tensorflow::int64>::value:
+      fill_periodic_tensor<tensorflow::int64>(context, desired_shape,
+                                              input_tensor);
+      break;
+    default:;
+  }
+}
 
 }  // namespace
 
-
 class PeriodicResampleOp : public tensorflow::OpKernel {
  public:
   explicit PeriodicResampleOp(tensorflow::OpKernelConstruction* context)
       : tensorflow::OpKernel(context) {
     // Get the desired shape
-    OP_REQUIRES_OK(context,
-                   context->GetAttr("shape", &desired_shape));
+    OP_REQUIRES_OK(context, context->GetAttr("shape", &desired_shape));
   }
 
   void Compute(tensorflow::OpKernelContext* context) override {
@@ -213,10 +219,11 @@ class PeriodicResampleOp : public tensorflow::OpKernel {
     const tensorflow::Tensor& input_tensor = context->input(0);
     const tensorflow::DataType input_tensor_type = context->input_dtype(0);
 
-    create_output_tensor(context, input_tensor, input_tensor_type, desired_shape);
+    create_output_tensor(context, input_tensor, input_tensor_type,
+                         desired_shape);
   }
 
-private:
+ private:
   tensorflow::PartialTensorShape desired_shape;
 };
 
diff --git a/tensorflow/contrib/periodic_resample/ops/array_ops.cc b/tensorflow/contrib/periodic_resample/ops/array_ops.cc
index 498799764fd9e13202f6304a23bae089ddcaa456..c90fc06c7fb9d79e8fd7a937e786a34947d8c1cb 100644
--- a/tensorflow/contrib/periodic_resample/ops/array_ops.cc
+++ b/tensorflow/contrib/periodic_resample/ops/array_ops.cc
@@ -14,13 +14,12 @@
 // limitations under the License.
 // =============================================================================
 
+#include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/shape_inference.h"
-#include "tensorflow/core/framework/common_shape_fns.h"
 
-
-using namespace tensorflow;
+namespace tensorflow {
 
 REGISTER_OP("PeriodicResample")
     .Attr("T: numbertype")
@@ -80,10 +79,12 @@ values: The tensor of rank `R` to periodic_resample
 shape: A 1-D tensor representing the desired shape of the output tensor.
   Exactly one element of this tensor must have the value `None` which represents
   that this dimension of `values` can be adjusted downward in order to
-  accomodate increases in other dimensions. The specified sizes of the
+  accommodate increases in other dimensions. The specified sizes of the
   non-adjustable dimensions must by at least as large as in the `values` tensor.
 output: Periodically resampled tensor that has dimensions specified as in
   `shape` except that the dimension specified as `None` will be minimally
   decreased as necessary.
 
 )doc");
+
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/periodic_resample/python/__init__.py b/tensorflow/contrib/periodic_resample/python/__init__.py
index 36aeeb8da26dcf75cf830b6924fc23d2c14392f0..a8b6ead0f594ad23e73901254857313635fbd1c5 100644
--- a/tensorflow/contrib/periodic_resample/python/__init__.py
+++ b/tensorflow/contrib/periodic_resample/python/__init__.py
@@ -1,4 +1,3 @@
-
 # =============================================================================
 # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
@@ -14,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
+"""Public API of periodic_resample."""
 
 from __future__ import absolute_import
 from __future__ import division
diff --git a/tensorflow/contrib/predictor/BUILD b/tensorflow/contrib/predictor/BUILD
index 82cd7b4c8aeb64cf461d9244c5aaf32a91691a5a..a80f060b91df3b6d5e2ca9ff63c721382f0cbb0a 100644
--- a/tensorflow/contrib/predictor/BUILD
+++ b/tensorflow/contrib/predictor/BUILD
@@ -136,6 +136,18 @@ py_test(
     ],
 )
 
+py_test(
+    name = "predictor_factories_test",
+    srcs = ["predictor_factories_test.py"],
+    data = [":test_export_dir"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        ":predictor_factories",
+        ":testing_common",
+    ],
+)
+
 py_test(
     name = "core_estimator_predictor_test",
     srcs = ["core_estimator_predictor_test.py"],
diff --git a/tensorflow/contrib/predictor/core_estimator_predictor.py b/tensorflow/contrib/predictor/core_estimator_predictor.py
index bd5174aef81a84488c896d259de83d0714745fee..d78d94c2699b14c80e7decee2181d190a6d91f99 100644
--- a/tensorflow/contrib/predictor/core_estimator_predictor.py
+++ b/tensorflow/contrib/predictor/core_estimator_predictor.py
@@ -68,10 +68,10 @@ class CoreEstimatorPredictor(predictor.Predictor):
       serving_input_receiver = serving_input_receiver_fn()
       signature_def = _get_signature_def(
           serving_input_receiver, estimator, output_key)
-      checkpoint_path = estimator.model_dir
+      checkpoint_dir = estimator.model_dir
       self._session = monitored_session.MonitoredSession(
           session_creator=monitored_session.ChiefSessionCreator(
-              checkpoint_filename_with_path=checkpoint_path))
+              checkpoint_dir=checkpoint_dir))
 
     feed_tensor_info = signature_def.inputs
     self._feed_tensors = {k: self._graph.get_tensor_by_name(v.name)
diff --git a/tensorflow/contrib/predictor/predictor_factories.py b/tensorflow/contrib/predictor/predictor_factories.py
index e3f30d917d637d2e2d821a727e12b8d0b54942df..04b5d5bdf158dc6a478d7a24b538c75d1dca8d45 100644
--- a/tensorflow/contrib/predictor/predictor_factories.py
+++ b/tensorflow/contrib/predictor/predictor_factories.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 """Factory functions for `Predictor`s."""
 
 from __future__ import absolute_import
@@ -22,6 +21,8 @@ from __future__ import print_function
 from tensorflow.contrib.predictor import contrib_estimator_predictor
 from tensorflow.contrib.predictor import core_estimator_predictor
 from tensorflow.contrib.predictor import saved_model_predictor
+
+from tensorflow.contrib.learn.python.learn.estimators import estimator as contrib_estimator
 from tensorflow.python.estimator import estimator as core_estimator
 
 
@@ -59,9 +60,9 @@ def from_contrib_estimator(estimator,
   return contrib_estimator_predictor.ContribEstimatorPredictor(
       estimator,
       prediction_input_fn,
-      input_alternative_key,
-      output_alternative_key,
-      graph)
+      input_alternative_key=input_alternative_key,
+      output_alternative_key=output_alternative_key,
+      graph=graph)
 
 
 def from_estimator(estimator,
@@ -86,16 +87,13 @@ def from_estimator(estimator,
     TypeError: if `estimator` is a contrib `Estimator` instead of a core
       `Estimator`.
   """
-  if isinstance(estimator, estimator.Estimator):
+  if isinstance(estimator, contrib_estimator.Estimator):
     raise TypeError('Espected estimator to be of type '
                     'tf.python.estimator.Estimator, but got type '
                     'tf.contrib.learn.Estimator. You likely want to call '
                     'from_contrib_estimator.')
   return core_estimator_predictor.CoreEstimatorPredictor(
-      estimator,
-      serving_input_receiver_fn,
-      output_key,
-      graph)
+      estimator, serving_input_receiver_fn, output_key=output_key, graph=graph)
 
 
 def from_saved_model(export_dir,
@@ -125,8 +123,9 @@ def from_saved_model(export_dir,
     ValueError: More than one of `signature_def_key` and `signature_def` is
       specified.
   """
-  return saved_model_predictor.SavedModelPredictor(export_dir,
-                                                   signature_def_key,
-                                                   signature_def,
-                                                   tags,
-                                                   graph)
+  return saved_model_predictor.SavedModelPredictor(
+      export_dir,
+      signature_def_key=signature_def_key,
+      signature_def=signature_def,
+      tags=tags,
+      graph=graph)
diff --git a/tensorflow/contrib/predictor/predictor_factories_test.py b/tensorflow/contrib/predictor/predictor_factories_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8443e718d1e81a88b752eb639dcee9c89aa56dc
--- /dev/null
+++ b/tensorflow/contrib/predictor/predictor_factories_test.py
@@ -0,0 +1,75 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for predictor.predictor_factories."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.predictor import predictor_factories
+from tensorflow.contrib.predictor import testing_common
+from tensorflow.python.platform import test
+
+MODEL_DIR_NAME = 'contrib/predictor/test_export_dir'
+
+
+class PredictorFactoriesTest(test.TestCase):
+
+  @classmethod
+  def setUpClass(cls):
+    # Load a saved model exported from the arithmetic `Estimator`.
+    # See `testing_common.py`.
+    cls._export_dir = test.test_src_dir_path(MODEL_DIR_NAME)
+
+  def testFromSavedModel(self):
+    """Test loading from_saved_model."""
+    predictor_factories.from_saved_model(self._export_dir)
+
+  def testFromSavedModelWithTags(self):
+    """Test loading from_saved_model with tags."""
+    predictor_factories.from_saved_model(self._export_dir, tags='serve')
+
+  def testFromSavedModelWithBadTags(self):
+    """Test that loading fails for bad tags."""
+    bad_tags_regex = ('.*? could not be found in SavedModel')
+    with self.assertRaisesRegexp(RuntimeError, bad_tags_regex):
+      predictor_factories.from_saved_model(self._export_dir, tags='bad_tag')
+
+  def testFromContribEstimator(self):
+    estimator = testing_common.get_arithmetic_estimator(core=False)
+    input_fn = testing_common.get_arithmetic_input_fn(core=False)
+    predictor_factories.from_contrib_estimator(estimator, input_fn,
+        output_alternative_key='sum')
+
+  def testFromContribEstimatorWithCoreEstimatorRaises(self):
+    estimator = testing_common.get_arithmetic_estimator(core=True)
+    input_fn = testing_common.get_arithmetic_input_fn(core=True)
+    with self.assertRaises(TypeError):
+      predictor_factories.from_contrib_estimator(estimator, input_fn)
+
+  def testFromCoreEstimator(self):
+    estimator = testing_common.get_arithmetic_estimator(core=True)
+    input_fn = testing_common.get_arithmetic_input_fn(core=True)
+    predictor_factories.from_estimator(estimator, input_fn)
+
+  def testFromCoreEstimatorWithContribEstimatorRaises(self):
+    estimator = testing_common.get_arithmetic_estimator(core=False)
+    input_fn = testing_common.get_arithmetic_input_fn(core=False)
+    with self.assertRaises(TypeError):
+      predictor_factories.from_estimator(estimator, input_fn)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/py2tf/BUILD b/tensorflow/contrib/py2tf/BUILD
new file mode 100644
index 0000000000000000000000000000000000000000..7358822ef5ca7dba87cc1046001aa7f07f45f845
--- /dev/null
+++ b/tensorflow/contrib/py2tf/BUILD
@@ -0,0 +1,83 @@
+licenses(["notice"])  # Apache 2.0
+
+load("//tensorflow:tensorflow.bzl", "py_test")
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
+
+py_library(
+    name = "py2tf",
+    srcs = [
+        "__init__.py",
+        "api.py",
+        "config.py",
+        "conversion.py",
+        "naming.py",
+    ],
+    srcs_version = "PY2AND3",
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow/contrib/py2tf/convert",
+        "//tensorflow/contrib/py2tf/pyct",
+        "//tensorflow/contrib/py2tf/pyct/static_analysis",
+        "@gast_archive//:gast",
+        "@six_archive//:six",
+    ],
+)
+
+# Separate target that allows access to internal symbols for testing.
+py_library(
+    name = "py2tf_internal",
+    srcs = [
+        "api.py",
+        "config.py",
+        "conversion.py",
+        "naming.py",
+    ],
+    srcs_version = "PY2AND3",
+    visibility = ["//tensorflow:__subpackages__"],
+    deps = [
+        "//tensorflow/contrib/py2tf/convert",
+        "//tensorflow/contrib/py2tf/pyct",
+        "//tensorflow/contrib/py2tf/pyct/static_analysis",
+        "@gast_archive//:gast",
+        "@six_archive//:six",
+    ],
+)
+
+py_test(
+    name = "api_test",
+    srcs = ["api_test.py"],
+    deps = [
+        ":py2tf_internal",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
+py_test(
+    name = "conversion_test",
+    srcs = ["conversion_test.py"],
+    deps = [
+        ":py2tf_internal",
+        "//tensorflow/python:client_testlib",
+        "@gast_archive//:gast",
+    ],
+)
+
+py_test(
+    name = "naming_test",
+    srcs = ["naming_test.py"],
+    deps = [
+        ":py2tf_internal",
+        "//tensorflow/python:client_testlib",
+    ],
+)
diff --git a/tensorflow/contrib/py2tf/README.md b/tensorflow/contrib/py2tf/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..cd50675ad57316b9c749c137e6acd30b91c10073
--- /dev/null
+++ b/tensorflow/contrib/py2tf/README.md
@@ -0,0 +1,4 @@
+# Py2TF
+
+A compiler for generating TensorFlow numeric and control flow ops from Python
+code.
diff --git a/tensorflow/contrib/py2tf/__init__.py b/tensorflow/contrib/py2tf/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d187da99e065cb2d31ae4e45a9570378f9d1bf27
--- /dev/null
+++ b/tensorflow/contrib/py2tf/__init__.py
@@ -0,0 +1,31 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Py2TF compiles Python code into equivalent TensorFlow code.
+
+Equivalent here means that they have the same effect when executed.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.py2tf.api import to_code
+from tensorflow.contrib.py2tf.api import to_graph
+from tensorflow.python.util.all_util import remove_undocumented
+
+
+_allowed_symbols = ['to_graph', 'to_code']
+
+remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/py2tf/api.py b/tensorflow/contrib/py2tf/api.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a367209694d3210913e515ece62ad1f9e3fc3ed
--- /dev/null
+++ b/tensorflow/contrib/py2tf/api.py
@@ -0,0 +1,95 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Public API."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gast
+import six
+
+from tensorflow.contrib.py2tf import config
+from tensorflow.contrib.py2tf import conversion
+from tensorflow.contrib.py2tf.pyct import compiler
+from tensorflow.contrib.py2tf.pyct import parser
+from tensorflow.python.util import tf_inspect
+
+# TODO(mdan): Properly document the type hints.
+# TODO(mdan): Reduce the type hint information to (module, type).
+# (currently we require (module + class name, type))
+
+
+def to_graph(o, arg_value_hints=None):
+  """Compile a Python entity into equivalent TensorFlow code.
+
+  Currently supported entities:
+    * functions
+    * classes
+
+  Classes are handled by converting all their methods into a new class.
+
+  Args:
+    o: A Python function or class.
+    arg_value_hints: A dict mapping parameter names to objects that can hint
+        at the type of those parameters.
+
+  Returns:
+    A function with a signature identical to `o`, but which when executed it
+  creates TF a graph that has the same functionality as the original entity.
+  """
+  conversion_map = conversion.ConversionMap()
+  _, name = conversion.object_to_graph(o, conversion_map, arg_value_hints)
+
+  module = gast.Module([])
+  for import_line in config.COMPILED_IMPORT_STATEMENTS:
+    module.body.append(parser.parse_str(import_line))
+  for dep in conversion_map.dependency_cache.values():
+    module.body.append(dep)
+  compiled_node = compiler.ast_to_object(module)
+
+  # The compiled code should see everything the entry function saw.
+  # TODO(mdan): This might not work well if the call tree spans modules?
+  if tf_inspect.isfunction(o):
+    compiled_node.__dict__.update(six.get_function_globals(o))
+
+  compiled_fn = getattr(compiled_node, name)
+  return compiled_fn
+
+
+def to_code(o, arg_value_hints=None, indentation='  '):
+  """Return the equivalent of an entity in TensorFlow code.
+
+  See `to_graph` for more details.
+
+  Args:
+    o: A Python function or class.
+    arg_value_hints: A dict mapping parameter names to objects that can hint
+        at the type of those parameters.
+    indentation: String, when to use for each level of indentation.
+
+  Returns:
+    String.
+  """
+  conversion_map = conversion.ConversionMap()
+  conversion.object_to_graph(o, conversion_map, arg_value_hints)
+
+  imports = '\n'.join(config.COMPILED_IMPORT_STATEMENTS)
+  code = '\n'.join(
+      compiler.ast_to_source(dep, indentation)
+      for dep in reversed(tuple(
+          six.itervalues(conversion_map.dependency_cache))))
+
+  return imports + '\n\n' + code
diff --git a/tensorflow/contrib/py2tf/api_test.py b/tensorflow/contrib/py2tf/api_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..225b6d305fa5fe5a89cf0a639df84c2e29cda527
--- /dev/null
+++ b/tensorflow/contrib/py2tf/api_test.py
@@ -0,0 +1,63 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for api module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.py2tf import api
+from tensorflow.contrib.py2tf import config
+from tensorflow.contrib.py2tf.pyct import parser
+from tensorflow.python.framework import constant_op
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test
+
+
+class ApiTest(test.TestCase):
+
+  def test_to_graph_basic(self):
+    def test_fn(x, s):
+      while math_ops.reduce_sum(x) > s:
+        x //= 2
+      return x
+
+    config.DEFAULT_UNCOMPILED_MODULES.add((math_ops.__name__,))
+    config.COMPILED_IMPORT_STATEMENTS = (
+        'from tensorflow.python.ops '
+        'import control_flow_ops as tf',
+    )
+    compiled_fn = api.to_graph(test_fn)
+
+    with self.test_session() as sess:
+      x = compiled_fn(constant_op.constant([4, 8]), 4)
+      self.assertListEqual([1, 2], sess.run(x).tolist())
+
+  def test_to_code_basic(self):
+    def test_fn(x, s):
+      while math_ops.reduce_sum(x) > s:
+        x /= 2
+      return x
+
+    config.DEFAULT_UNCOMPILED_MODULES.add((math_ops.__name__,))
+    compiled_code = api.to_code(test_fn)
+
+    # Just check for some key words and that it is parseable Python code.
+    self.assertRegexpMatches(compiled_code, 'tf\\.while_loop')
+    self.assertIsNotNone(parser.parse_str(compiled_code))
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/py2tf/config.py b/tensorflow/contrib/py2tf/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a9d52136eab494907992db0b6ad0cebcc1985ac
--- /dev/null
+++ b/tensorflow/contrib/py2tf/config.py
@@ -0,0 +1,37 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Global configuration."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+PYTHON_LITERALS = {
+    'None': None,
+    'False': False,
+    'True': True,
+}
+
+DEFAULT_UNCOMPILED_MODULES = set((
+    ('tensorflow',),
+))
+
+NO_SIDE_EFFECT_CONSTRUCTORS = set(('tensorflow',))
+
+# TODO(mdan): Also allow controlling the generated names (for testability).
+COMPILED_IMPORT_STATEMENTS = (
+    'from contextlib import contextmanager',
+    'import tensorflow as tf',
+)
diff --git a/tensorflow/contrib/py2tf/conversion.py b/tensorflow/contrib/py2tf/conversion.py
new file mode 100644
index 0000000000000000000000000000000000000000..43bccae9538c4c68867764a9e433cac81bb98e78
--- /dev/null
+++ b/tensorflow/contrib/py2tf/conversion.py
@@ -0,0 +1,238 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""High level conversion support."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gast
+import six
+
+from tensorflow.contrib.py2tf import config
+from tensorflow.contrib.py2tf import naming
+from tensorflow.contrib.py2tf.convert import break_canonicalization
+from tensorflow.contrib.py2tf.convert import builtin_functions
+from tensorflow.contrib.py2tf.convert import call_trees
+from tensorflow.contrib.py2tf.convert import continue_canonicalization
+from tensorflow.contrib.py2tf.convert import control_flow
+from tensorflow.contrib.py2tf.convert import for_canonicalization
+from tensorflow.contrib.py2tf.convert import logical_expressions
+from tensorflow.contrib.py2tf.convert import print_functions
+from tensorflow.contrib.py2tf.convert import side_effect_guards
+from tensorflow.contrib.py2tf.pyct import parser
+from tensorflow.contrib.py2tf.pyct.static_analysis import access
+from tensorflow.contrib.py2tf.pyct.static_analysis import live_values
+from tensorflow.contrib.py2tf.pyct.static_analysis import type_info
+from tensorflow.python.util import tf_inspect
+
+
+class ConversionMap(object):
+  """ConversionMaps keep track of converting function hierarchies.
+
+  Attributes:
+    dependency_cache: dict[object]: ast; maps original objects to their
+        converted AST
+    name_map: dict[string]: string; maps original objects to the name of
+        their converted counterparts
+  """
+
+  def __init__(self):
+    self.dependency_cache = {}
+    self.name_map = {}
+
+  def new_namer(self, global_symbols):
+    return naming.Namer(global_symbols, self.name_map)
+
+  def update_name_map(self, namer):
+    for o, name in namer.renamed_calls.items():
+      if o in self.name_map:
+        if self.name_map[o] != name:
+          raise ValueError(
+              'Calls to %s were converted using multiple names (%s). This is '
+              'possible when an object with one of these names already '
+              'existed. To fix, avoid using any of these names.')
+      else:
+        self.name_map[o] = name
+
+  def add_to_cache(self, original_object, converted_ast):
+    self.dependency_cache[original_object] = converted_ast
+
+
+def object_to_graph(o, conversion_map, value_hints):
+  """Compile a Python object into equivalent TensorFlow.
+
+  The function will also recursively compile all the objects that `o`
+  references, updating `dependency_cache`.
+
+  This function is reentrant, and relies on dependency_cache to avoid
+  generating duplicate code.
+
+  Args:
+    o: A Python object.
+    conversion_map: A ConversionMap object.
+    value_hints: A dict containing value hints for symbols like function
+        parameters.
+
+  Returns:
+    A tuple (ast, new_name):
+        * ast: An AST representing an object with interface equivalent to `o`,
+            but which when executed it creates TF a graph.
+        * new_name: The symbol name under which the new object can be found.
+
+  Raises:
+    ValueError: if the object is not supported.
+  """
+  if value_hints is None:
+    value_hints = {}
+
+  if tf_inspect.isclass(o):
+    node, new_name = class_to_graph(o, conversion_map, value_hints)
+  elif tf_inspect.isfunction(o):
+    node, new_name = function_to_graph(o, conversion_map, value_hints)
+  else:
+    raise ValueError(
+        'Unsupported object type %s. Only functions and classes are supported'
+        ' for now.')
+
+  conversion_map.add_to_cache(o, node)
+  # Recursively convert remaining dependencies.
+  for obj in conversion_map.name_map.keys():
+    if obj not in conversion_map.dependency_cache:
+      if hasattr(obj, 'im_class'):
+        # Class members are converted with their objects.
+        continue
+      object_to_graph(obj, conversion_map, None)
+
+  return node, new_name
+
+
+def class_to_graph(c, conversion_map, param_value_hints):
+  """Specialization of `object_to_graph` for classes."""
+  converted_members = {}
+  members = tf_inspect.getmembers(c, predicate=tf_inspect.ismethod)
+  if not members:
+    raise ValueError('Cannot convert %s: it has no member methods.')
+
+  if 'self' in param_value_hints:
+    raise ValueError('Hints may not be provided for reserved name "self".')
+  param_value_hints['self'] = (c.__name__, c)
+
+  class_globals = None
+  for _, m in members:
+    node, _ = function_to_graph(m, conversion_map, param_value_hints, c)
+    # TODO(mdan): Do not assume all members have the same view of globals.
+    if class_globals is None:
+      class_globals = six.get_function_globals(m)
+    converted_members[m] = node
+  namer = conversion_map.new_namer(class_globals)
+  class_name = namer.compiled_class_name(c.__name__, c)
+  node = gast.ClassDef(
+      class_name,
+      bases=[],
+      keywords=[],
+      body=converted_members.values(),
+      decorator_list=[])
+
+  return node, class_name
+
+
+def function_to_graph(f, conversion_map, param_value_hints, owner_type=None):
+  """Specialization of `object_to_graph` for callable functions."""
+  node = parser.parse_object(f).body[0]
+  node_globals = six.get_function_globals(f)
+
+  # This is needed for non-global functions.
+  closure = six.get_function_closure(f)
+  if closure:
+    for e in closure:
+      if callable(e.cell_contents):
+        fn = e.cell_contents
+        node_globals[fn.__name__] = fn
+
+  namer = conversion_map.new_namer(node_globals)
+  node = node_to_graph(node, namer, node_globals, param_value_hints)
+
+  # Simulate a rename to ensure the top level is in the name map. This is needed
+  # for top level functions, and it also helps the consistency verification made
+  # by update_name_map.
+  if owner_type is not None:
+    new_name = namer.compiled_function_name(f.__name__, f, owner_type)
+  else:
+    new_name = namer.compiled_function_name(f.__name__, f)
+  node.name = new_name
+  conversion_map.update_name_map(namer)
+  return node, conversion_map.name_map[f]
+
+
+def _static_analysis_pass(node, namespace, value_hints):
+  node = access.resolve(node)
+  node = live_values.resolve(node, namespace, config.PYTHON_LITERALS)
+  node = type_info.resolve(node, value_hints)
+  return node
+
+
+def node_to_graph(node, namer, namespace, value_hints):
+  """Convert Python code to equivalent TF graph mode code.
+
+  Args:
+    node: A Python AST node representing the code to convert.
+    namer: A naming.Namer object.
+    namespace: Dict mapping symbol names to their corresponding live objects.
+    value_hints: A dict containing value hints for symbols like function
+        parameters.
+
+  Returns:
+    A tuple (node, deps):
+        * node: A Python ast node, representing the converted code.
+        * deps: A set of strings, the fully qualified names of object
+            dependencies that this node has.
+  """
+  # TODO(mdan): Factor out common elements.
+  # These include:
+  #   * keeping track of symbols that have been created
+  #   * marking nodes (e.g. py_func wrappers) to suppress further processing
+  #   * code move between blocks
+  #   * insertion of new global references
+  #   * visiting blocks in transformers
+
+  # Certain steps, especially canonicalization, insert new symbols into the
+  # tree, which must be accounted. Although less efficient, it is most robust
+  # to re-run the analysis.
+
+  node = _static_analysis_pass(node, namespace, value_hints)
+  node = break_canonicalization.transform(node, namer)
+
+  # Note: sequencing continue canonicalization before for loop one avoids
+  # dealing with the extra loop increment operation that the for
+  # canonicalization creates.
+  node = continue_canonicalization.transform(node, namer)
+  namespace['len'] = len
+
+  node = _static_analysis_pass(node, namespace, value_hints)
+  node = for_canonicalization.transform(node, namer)
+  # for_canonicalization may insert new global references.
+  node = builtin_functions.transform(node)
+  # builtin_functions may insert new global references.
+  namespace['print'] = print
+
+  node = _static_analysis_pass(node, namespace, value_hints)
+  node = print_functions.transform(node)
+  node = call_trees.transform(node, namer, config.DEFAULT_UNCOMPILED_MODULES)
+  node = control_flow.transform(node, namer)
+  node = logical_expressions.transform(node)
+  node = side_effect_guards.transform(node, namer)
+
+  return node
diff --git a/tensorflow/contrib/py2tf/conversion_test.py b/tensorflow/contrib/py2tf/conversion_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..d76f14180951217810a3f5ddbca6423d8be63ce3
--- /dev/null
+++ b/tensorflow/contrib/py2tf/conversion_test.py
@@ -0,0 +1,61 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for conversion module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gast
+
+from tensorflow.contrib.py2tf import conversion
+from tensorflow.python.platform import test
+
+
+class ConversionTest(test.TestCase):
+
+  def test_object_to_graph_unsupported_types(self):
+    with self.assertRaises(ValueError):
+      conversion.object_to_graph('dummy', {}, {})
+
+  def test_object_to_graph_callable(self):
+    def f(a):
+      return a
+
+    conversion_map = conversion.ConversionMap()
+    ast, new_name = conversion.object_to_graph(f, conversion_map, {})
+    self.assertTrue(isinstance(ast, gast.FunctionDef), ast)
+    self.assertEqual('tf__f', new_name)
+
+  def test_object_to_graph_call_tree(self):
+    def g(a):
+      return a
+
+    def f(a):
+      return g(a)
+
+    conversion_map = conversion.ConversionMap()
+    conversion.object_to_graph(f, conversion_map, {})
+
+    self.assertTrue(f in conversion_map.dependency_cache)
+    self.assertTrue(g in conversion_map.dependency_cache)
+    self.assertEqual('tf__f', conversion_map.dependency_cache[f].name)
+    self.assertEqual(
+        'tf__g', conversion_map.dependency_cache[f].body[0].value.func.id)
+    self.assertEqual('tf__g', conversion_map.dependency_cache[g].name)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/py2tf/convert/BUILD b/tensorflow/contrib/py2tf/convert/BUILD
new file mode 100644
index 0000000000000000000000000000000000000000..0eb7998dc4c6acdc7760024b8e4359360b60c23e
--- /dev/null
+++ b/tensorflow/contrib/py2tf/convert/BUILD
@@ -0,0 +1,135 @@
+licenses(["notice"])  # Apache 2.0
+
+load("//tensorflow:tensorflow.bzl", "py_test")
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
+
+py_library(
+    name = "convert",
+    srcs = [
+        "break_canonicalization.py",
+        "builtin_functions.py",
+        "call_trees.py",
+        "continue_canonicalization.py",
+        "control_flow.py",
+        "for_canonicalization.py",
+        "logical_expressions.py",
+        "print_functions.py",
+        "side_effect_guards.py",
+    ],
+    srcs_version = "PY2AND3",
+    visibility = ["//tensorflow:__subpackages__"],
+    deps = [
+        "@gast_archive//:gast",
+    ],
+)
+
+py_test(
+    name = "break_canonicalization_test",
+    srcs = ["break_canonicalization_test.py"],
+    deps = [
+        ":convert",
+        "//tensorflow/contrib/py2tf/pyct",
+        "//tensorflow/contrib/py2tf/pyct/static_analysis",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
+py_test(
+    name = "call_trees_test",
+    srcs = ["call_trees_test.py"],
+    deps = [
+        ":convert",
+        "//tensorflow/contrib/py2tf/pyct",
+        "//tensorflow/contrib/py2tf/pyct/static_analysis",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
+py_test(
+    name = "continue_canonicalization_test",
+    srcs = ["continue_canonicalization_test.py"],
+    deps = [
+        ":convert",
+        "//tensorflow/contrib/py2tf/pyct",
+        "//tensorflow/contrib/py2tf/pyct/static_analysis",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
+py_test(
+    name = "control_flow_test",
+    srcs = ["control_flow_test.py"],
+    deps = [
+        ":convert",
+        "//tensorflow/contrib/py2tf/pyct",
+        "//tensorflow/contrib/py2tf/pyct/static_analysis",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
+py_test(
+    name = "builtin_functions_test",
+    srcs = ["builtin_functions_test.py"],
+    deps = [
+        ":convert",
+        "//tensorflow/contrib/py2tf/pyct",
+        "//tensorflow/contrib/py2tf/pyct/static_analysis",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
+py_test(
+    name = "for_canonicalization_test",
+    srcs = ["for_canonicalization_test.py"],
+    deps = [
+        ":convert",
+        "//tensorflow/contrib/py2tf/pyct",
+        "//tensorflow/contrib/py2tf/pyct/static_analysis",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
+py_test(
+    name = "logical_expressions_test",
+    srcs = ["logical_expressions_test.py"],
+    deps = [
+        ":convert",
+        "//tensorflow/contrib/py2tf/pyct",
+        "//tensorflow/contrib/py2tf/pyct/static_analysis",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
+py_test(
+    name = "print_functions_test",
+    srcs = ["print_functions_test.py"],
+    deps = [
+        ":convert",
+        "//tensorflow/contrib/py2tf/pyct",
+        "//tensorflow/contrib/py2tf/pyct/static_analysis",
+        "//tensorflow/python:client_testlib",
+        "@gast_archive//:gast",
+    ],
+)
+
+py_test(
+    name = "side_effect_guards_test",
+    srcs = ["side_effect_guards_test.py"],
+    deps = [
+        ":convert",
+        "//tensorflow/contrib/py2tf/pyct",
+        "//tensorflow/contrib/py2tf/pyct/static_analysis",
+        "//tensorflow/python:client_testlib",
+    ],
+)
diff --git a/tensorflow/contrib/py2tf/convert/__init__.py b/tensorflow/contrib/py2tf/convert/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca10896ee5c6c23d9b20ff23add9945de68e5bf9
--- /dev/null
+++ b/tensorflow/contrib/py2tf/convert/__init__.py
@@ -0,0 +1,22 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Code converters used by Py2TF."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# TODO(mdan): Define a base transformer class that can recognize skip_processing
+# TODO(mdan): All converters are incomplete, especially those that change blocks
diff --git a/tensorflow/contrib/py2tf/convert/break_canonicalization.py b/tensorflow/contrib/py2tf/convert/break_canonicalization.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef585734454db1aa1ffdb798d93978fb09752f05
--- /dev/null
+++ b/tensorflow/contrib/py2tf/convert/break_canonicalization.py
@@ -0,0 +1,124 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Canonicalizes break statements by de-sugaring into a control boolean."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gast
+
+from tensorflow.contrib.py2tf.pyct import anno
+from tensorflow.contrib.py2tf.pyct import templates
+
+
+class BreakCanonicalizationTransformer(gast.NodeTransformer):
+  """Canonicalizes continue statements into additional conditionals."""
+
+  def __init__(self, namer):
+    self.namer = namer
+    # This is a stack structure, to correctly process nested loops.
+    self.break_uses = []
+
+  def _create_break_check(self):
+
+    def template(var_name):
+      (not var_name)  # pylint:disable=pointless-statement
+
+    expr, = templates.replace(
+        template, var_name=gast.Name(self.break_uses[-1][1], None, None))
+    return expr.value
+
+  def _create_break_trigger(self):
+
+    def template(var_name):  # pylint:disable=unused-argument
+      var_name = True
+
+    block = templates.replace(
+        template, var_name=gast.Name(self.break_uses[-1][1], None, None))
+    block.append(gast.Continue())
+    return block
+
+  def _create_break_init(self):
+
+    def template(var_name):  # pylint:disable=unused-argument
+      var_name = False
+
+    assign, = templates.replace(
+        template, var_name=gast.Name(self.break_uses[-1][1], None, None))
+    return assign
+
+  # TODO(mdan): Surely the transformer supports this better?
+  def _manual_visit_list(self, block):
+    new_block = []
+    for n in block:
+      new_n = self.visit(n)
+      if isinstance(new_n, list):
+        new_block.extend(new_n)
+      else:
+        new_block.append(new_n)
+    return new_block
+
+  def visit_While(self, node):
+    self.generic_visit(node.test)
+    scope = anno.getanno(node, 'body_scope')
+
+    break_var = self.namer.new_symbol('break_requested', scope.referenced)
+    self.break_uses.append([False, break_var])
+    node.body = self._manual_visit_list(node.body)
+    if self.break_uses[-1][0]:
+      node.test = gast.BoolOp(gast.And(), [
+          node.test,
+          gast.UnaryOp(gast.Not(), gast.Name(break_var, gast.Load(), None))
+      ])
+      final_nodes = [self._create_break_init(), node]
+    else:
+      final_nodes = node
+    self.break_uses.pop()
+
+    for n in node.orelse:
+      self.generic_visit(n)
+    return final_nodes
+
+  def visit_For(self, node):
+    self.generic_visit(node.target)
+    self.generic_visit(node.iter)
+    scope = anno.getanno(node, 'body_scope')
+
+    break_var = self.namer.new_symbol('break_requested', scope.referenced)
+    self.break_uses.append([False, break_var])
+    node.body = self._manual_visit_list(node.body)
+    if self.break_uses[-1][0]:
+      anno.setanno(node, 'extra_cond',
+                   gast.UnaryOp(gast.Not(),
+                                gast.Name(break_var, gast.Load(), None)))
+      final_nodes = [self._create_break_init(), node]
+    else:
+      final_nodes = node
+    self.break_uses.pop()
+
+    for n in node.orelse:
+      self.generic_visit(n)
+    return final_nodes
+
+  def visit_Break(self, node):
+    self.break_uses[-1][0] = True
+    return self._create_break_trigger()
+
+
+def transform(node, namer):
+  transformer = BreakCanonicalizationTransformer(namer)
+  node = transformer.visit(node)
+  return node
diff --git a/tensorflow/contrib/py2tf/convert/break_canonicalization_test.py b/tensorflow/contrib/py2tf/convert/break_canonicalization_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..23c4c4d3e23e3e8eaafbafe9166d8c9618701fa5
--- /dev/null
+++ b/tensorflow/contrib/py2tf/convert/break_canonicalization_test.py
@@ -0,0 +1,125 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for break_canonicalization module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.py2tf.convert import break_canonicalization
+from tensorflow.contrib.py2tf.convert import control_flow
+from tensorflow.contrib.py2tf.pyct import compiler
+from tensorflow.contrib.py2tf.pyct import parser
+from tensorflow.contrib.py2tf.pyct.static_analysis import access
+from tensorflow.python.platform import test
+
+
+class TestNamer(control_flow.SymbolNamer):
+
+  def new_symbol(self, name_root, _):
+    return name_root
+
+
+class BreakCanonicalizationTest(test.TestCase):
+
+  def _parse_and_analyze(self, test_fn, namespace):
+    node = parser.parse_object(test_fn)
+    node = access.resolve(node)
+    return node
+
+  def test_basic_break(self):
+
+    def test_fn(x):
+      v = []
+      while x > 0:
+        x -= 1
+        if x % 2 == 0:
+          break
+        v.append(x)
+      return v
+
+    node = self._parse_and_analyze(test_fn, {})
+    node = break_canonicalization.transform(node, TestNamer())
+    result = compiler.ast_to_object(node)
+
+    self.assertEqual(test_fn(0), result.test_fn(0))
+    self.assertEqual(test_fn(1), result.test_fn(1))
+    self.assertEqual(test_fn(2), result.test_fn(2))
+    self.assertEqual(test_fn(3), result.test_fn(3))
+    self.assertEqual(test_fn(4), result.test_fn(4))
+
+  def test_basic_break_for_loop(self):
+
+    def test_fn(a):
+      v = []
+      for x in a:
+        x -= 1
+        if x % 2 == 0:
+          break
+        v.append(x)
+      return v
+
+    # The break is incompletely canonicalized for for loops. Everything is
+    # in place except for the condition verification.
+    def test_equiv_fn(a):
+      v = []
+      for x in a:
+        x -= 1
+        if x % 2 == 0:
+          continue
+        v.append(x)
+      return v
+
+    node = self._parse_and_analyze(test_fn, {})
+    node = break_canonicalization.transform(node, TestNamer())
+    result = compiler.ast_to_object(node)
+
+    # The break is incompletely canonicalized. Everything is in place, but
+    # the loop does not break.
+    self.assertEqual(test_equiv_fn([]), result.test_fn([]))
+    self.assertEqual(test_equiv_fn([1]), result.test_fn([1]))
+    self.assertEqual(test_equiv_fn([2]), result.test_fn([2]))
+    self.assertEqual(test_equiv_fn([1, 2, 3, 4]), result.test_fn([1, 2, 3, 4]))
+
+  def test_continue_deeply_nested(self):
+
+    def test_fn(x):
+      v = []
+      u = []
+      w = []
+      while x > 0:
+        x -= 1
+        if x % 2 == 0:
+          if x % 3 != 0:
+            u.append(x)
+          else:
+            w.append(x)
+            continue
+        v.append(x)
+      return v, u, w
+
+    node = self._parse_and_analyze(test_fn, {})
+    node = break_canonicalization.transform(node, TestNamer())
+    result = compiler.ast_to_object(node)
+
+    self.assertEqual(test_fn(0), result.test_fn(0))
+    self.assertEqual(test_fn(1), result.test_fn(1))
+    self.assertEqual(test_fn(2), result.test_fn(2))
+    self.assertEqual(test_fn(3), result.test_fn(3))
+    self.assertEqual(test_fn(4), result.test_fn(4))
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/py2tf/convert/builtin_functions.py b/tensorflow/contrib/py2tf/convert/builtin_functions.py
new file mode 100644
index 0000000000000000000000000000000000000000..b80c96c97ac0c55f449a83bd43f2b65cdbdba390
--- /dev/null
+++ b/tensorflow/contrib/py2tf/convert/builtin_functions.py
@@ -0,0 +1,54 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Handles builtins and other special functions."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gast
+
+from tensorflow.contrib.py2tf.pyct import templates
+
+
+class BuiltinFunctionTransformer(gast.NodeTransformer):
+  """Transforms Print nodes to Call so they can be handled as functions."""
+
+  # TODO(mdan): Bring print_functions in here.
+
+  def _convert_len(self, node):
+
+    def template(args):
+      tf.shape(args)[0]  # pylint:disable=undefined-variable,expression-not-assigned
+
+    new_call = templates.replace(template, args=node.args)[0].value
+    return new_call
+
+  # pylint:disable=invalid-name
+
+  def visit_Call(self, node):
+    self.generic_visit(node)
+    # TODO(mdan): This won't work if the function was hidden.
+    if isinstance(node.func, gast.Name) and node.func.id == 'len':
+      return self._convert_len(node)
+    return node
+
+  # pylint:enable=invalid-name
+
+
+def transform(node):
+  transformer = BuiltinFunctionTransformer()
+  node = transformer.visit(node)
+  return node
diff --git a/tensorflow/contrib/py2tf/convert/builtin_functions_test.py b/tensorflow/contrib/py2tf/convert/builtin_functions_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..633602f4d49792c45826afd8646593e280e35d12
--- /dev/null
+++ b/tensorflow/contrib/py2tf/convert/builtin_functions_test.py
@@ -0,0 +1,58 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for builtin_functions module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.py2tf.convert import builtin_functions
+from tensorflow.contrib.py2tf.pyct import compiler
+from tensorflow.contrib.py2tf.pyct import parser
+from tensorflow.contrib.py2tf.pyct.static_analysis import access
+from tensorflow.contrib.py2tf.pyct.static_analysis import live_values
+from tensorflow.contrib.py2tf.pyct.static_analysis import type_info
+from tensorflow.python.framework import constant_op
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test
+
+
+class BuiltinFunctionsTest(test.TestCase):
+
+  def _parse_and_analyze(self, test_fn, namespace):
+    node = parser.parse_object(test_fn)
+    node = access.resolve(node)
+    node = live_values.resolve(node, namespace, {})
+    node = type_info.resolve(node, {})
+    return node
+
+  def test_len(self):
+
+    def test_fn(a):
+      return len(a)
+
+    node = self._parse_and_analyze(test_fn, {'len': len})
+    node = builtin_functions.transform(node)
+    result = compiler.ast_to_object(node)
+    setattr(result, 'tf', array_ops)
+
+    with self.test_session() as sess:
+      self.assertEqual(3,
+                       sess.run(
+                           result.test_fn(constant_op.constant([0, 0, 0]))))
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/py2tf/convert/call_trees.py b/tensorflow/contrib/py2tf/convert/call_trees.py
new file mode 100644
index 0000000000000000000000000000000000000000..92c3439101ed9d3fe54147346be3cd6a1c0f9d8c
--- /dev/null
+++ b/tensorflow/contrib/py2tf/convert/call_trees.py
@@ -0,0 +1,199 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Handles function calls, by generating compiled function names and calls.
+
+Note: this transformer does not rename the top level object being converted;
+that is the caller's responsibility.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import types
+
+import gast
+
+from tensorflow.contrib.py2tf.pyct import anno
+from tensorflow.contrib.py2tf.pyct import templates
+
+
+class FunctionNamer(object):
+  """Describes the interface for CallTreeTransformer's namer."""
+
+  def compiled_function_name(self,
+                             original_name,
+                             live_object=None,
+                             owner_type=None):
+    """Generate the name corresponding to the compiled version of a function.
+
+    Args:
+      original_name: String
+      live_object: Callable, the actual target function, if known.
+      owner_type: Optional object. If present, it indicates that the function is
+          a member of the given type.
+    Returns:
+      String.
+    """
+    raise NotImplementedError()
+
+  def compiled_class_name(self, original_name, live_object=None):
+    """Generate the name corresponding to the compiled version of a class.
+
+    Args:
+      original_name: String
+      live_object: The actual target class, if known.
+    Returns:
+      String.
+    """
+    raise NotImplementedError()
+
+
+class CallTreeTransformer(gast.NodeTransformer):
+  """Transforms the call tree by renaming transformed symbols."""
+
+  def __init__(self, namer, uncompiled_modules):
+    self.namer = namer
+    self.uncompiled_modules = uncompiled_modules
+
+  # pylint:disable=invalid-name
+
+  def _should_compile(self, fqn):
+    for i in range(1, len(fqn)):
+      if fqn[:i] in self.uncompiled_modules:
+        return False
+    return True
+
+  def _rename_compilable_function(self, node):
+    assert anno.hasanno(node.func, 'live_val')
+    assert anno.hasanno(node.func, 'fqn')
+    target_obj = anno.getanno(node.func, 'live_val')
+    target_fqn = anno.getanno(node.func, 'fqn')
+
+    if not self._should_compile(target_fqn):
+      return node
+
+    if anno.hasanno(node, 'is_constructor'):
+      new_name = self.namer.compiled_class_name(
+          '.'.join(target_fqn), live_object=target_obj)
+    else:
+      new_name = self.namer.compiled_function_name(
+          '.'.join(target_fqn), live_object=target_obj)
+    node.func = gast.Name(id=new_name, ctx=gast.Load(), annotation=None)
+    return node
+
+  def _rename_member_function_of_known_type(self, node):
+    assert isinstance(node.func, gast.Attribute)
+
+    type_fqn = anno.getanno(node.func, 'type_fqn')
+    assert anno.hasanno(node.func, 'type')
+    target_type = anno.getanno(node.func, 'type')
+
+    if not self._should_compile(type_fqn):
+      return node
+
+    # TODO(mdan): We should not assume that the namer only needs the
+    # member function name.
+    new_name = self.namer.compiled_function_name(
+        node.func.attr, live_object=None, owner_type=target_type)
+    node.func.attr = new_name
+
+    return node
+
+  def _wrap_to_py_func_no_return(self, node):
+    args_scope = anno.getanno(node, 'args_scope')
+    # TODO(mdan): Properly handle varargs, kwargs, etc.
+    args = tuple(gast.Name(n, gast.Load(), None) for n in args_scope.used)
+
+    # pylint:disable=undefined-variable,unused-argument,function-redefined
+
+    def template(call, wrapper, args):
+
+      def wrapper(args):
+        call(args)
+        return 1
+
+      tf.py_func(wrapper, [args], [tf.int64])
+
+    # pylint:enable=undefined-variable,unused-argument,function-redefined
+
+    wrapper_name = self.namer.compiled_function_name(node.func.id)
+    wrapper_def, call_expr = templates.replace(
+        template,
+        call=node.func,
+        wrapper=gast.Name(wrapper_name, gast.Load(), None),
+        args=args)
+    anno.setanno(call_expr.value, 'args_scope', args_scope)
+    anno.setanno(wrapper_def, 'skip_processing', True)
+
+    return (wrapper_def, call_expr)
+
+  def _function_is_compilable(self, target_obj):
+    # TODO(mdan): This is just a placeholder. Implement.
+    return not isinstance(target_obj, types.BuiltinFunctionType)
+
+  def visit_Expr(self, node):
+    if isinstance(node.value, gast.Call):
+      if anno.hasanno(node.value.func, 'live_val'):
+        target_obj = anno.getanno(node.value.func, 'live_val')
+        if not self._function_is_compilable(target_obj):
+          if anno.hasanno(node.value.func, 'fqn'):
+            target_fqn = anno.getanno(node.value.func, 'fqn')
+            if not self._should_compile(target_fqn):
+              return node
+            node = self._wrap_to_py_func_no_return(node.value)
+            return node
+      # Only the case of py_func with no return value is special.
+      # Everything else is processed by visit_Call.
+      self.visit(node.value)
+    else:
+      self.generic_visit(node)
+    return node
+
+  def visit_Call(self, node):
+    self.generic_visit(node)
+    if anno.hasanno(node.func, 'live_val'):
+      target_obj = anno.getanno(node.func, 'live_val')
+      if self._function_is_compilable(target_obj):
+        node = self._rename_compilable_function(node)
+      else:
+        raise NotImplementedError('py_func with return values')
+    elif anno.hasanno(node.func, 'type_fqn'):
+      node = self._rename_member_function_of_known_type(node)
+    else:
+      raise NotImplementedError(
+          'Member function call (of unknown type): %s.' % node.func.id)
+    return node
+
+  # pylint:enable=invalid-name
+
+
+def transform(node, namer, uncompiled_modules):
+  """Transform function call to the compiled counterparts.
+
+  Args:
+    node: AST to transform.
+    namer: FunctionNamer-like.
+    uncompiled_modules: set of string tuples, each tuple represents the fully
+        qualified name of a package containing functions that will not be
+        compiled.
+  Returns:
+    A tuple (node, new_names):
+        node: The transformed AST
+        new_names: set(string), containing any newly-generated names
+  """
+  transformer = CallTreeTransformer(namer, uncompiled_modules)
+  node = transformer.visit(node)
+  return node
diff --git a/tensorflow/contrib/py2tf/convert/call_trees_test.py b/tensorflow/contrib/py2tf/convert/call_trees_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..38c701eaadee8ad4df006a950192d51d78c799fe
--- /dev/null
+++ b/tensorflow/contrib/py2tf/convert/call_trees_test.py
@@ -0,0 +1,94 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for call_trees module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.py2tf.convert import call_trees
+from tensorflow.contrib.py2tf.pyct import compiler
+from tensorflow.contrib.py2tf.pyct import parser
+from tensorflow.contrib.py2tf.pyct.static_analysis import access
+from tensorflow.contrib.py2tf.pyct.static_analysis import live_values
+from tensorflow.contrib.py2tf.pyct.static_analysis import type_info
+from tensorflow.python.framework import constant_op
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test
+
+
+class TestNamer(call_trees.FunctionNamer):
+
+  def compiled_function_name(self, original_name, live_object=None):
+    return 'renamed_%s' % original_name
+
+
+class CallTreesTest(test.TestCase):
+
+  def _parse_and_analyze(self, test_fn, namespace):
+    node = parser.parse_object(test_fn)
+    node = access.resolve(node)
+    node = live_values.resolve(node, namespace, {})
+    node = type_info.resolve(node, {})
+    return node
+
+  def test_basic(self):
+
+    def test_fn_1(_):
+      raise ValueError('This should not be called in the compiled verison.')
+
+    def renamed_test_fn_1(a):
+      return a + 1
+
+    def test_fn_2(a):
+      return test_fn_1(a) + 1
+
+    node = self._parse_and_analyze(test_fn_2, {'test_fn_1': test_fn_1})
+    node = call_trees.transform(node, TestNamer(), set())
+    result = compiler.ast_to_object(node)
+    # Only test_fn_2 is transformed, so we'll insert renamed_test_fn_1 manually.
+    setattr(result, 'renamed_test_fn_1', renamed_test_fn_1)
+
+    self.assertEquals(3, result.test_fn_2(1))
+
+  def test_uncompiled_modules(self):
+
+    def test_fn(a):
+      a = math_ops.multiply(a, constant_op.constant(2))
+      a = math_ops.add(a, constant_op.constant(1))
+      return a
+
+    node = self._parse_and_analyze(test_fn, {
+        'math_ops': math_ops,
+        'constant_op': constant_op
+    })
+    node = call_trees.transform(node, TestNamer(),
+                                set(((math_ops.__name__,),
+                                     (constant_op.__name__,))))
+    result = compiler.ast_to_object(node)
+    setattr(result, 'math_ops', math_ops)
+    setattr(result, 'constant_op', constant_op)
+
+    with self.test_session() as sess:
+      # Not renamed, because the converter doesn't rename the definition itself.
+      # (the caller is responsible for that).
+      result_tensor = result.test_fn(constant_op.constant(1))
+      result_val = sess.run(result_tensor)
+
+    self.assertEquals(3, result_val)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/py2tf/convert/continue_canonicalization.py b/tensorflow/contrib/py2tf/convert/continue_canonicalization.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f8ace77a830ebcc4d49fcf2190e4bac920b1cde
--- /dev/null
+++ b/tensorflow/contrib/py2tf/convert/continue_canonicalization.py
@@ -0,0 +1,131 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Canonicalizes continue statements by de-sugaring into a control boolean."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gast
+
+from tensorflow.contrib.py2tf.pyct import anno
+from tensorflow.contrib.py2tf.pyct import templates
+
+
+class ContinueCanonicalizationTransformer(gast.NodeTransformer):
+  """Canonicalizes continue statements into additional conditionals."""
+
+  def __init__(self, namer):
+    self.namer = namer
+    # This is a stack structure, to correctly process nested loops.
+    self.continuation_uses = []
+
+  def _create_continuation_check(self):
+
+    def template(var_name):
+      if not var_name:
+        pass
+
+    cond, = templates.replace(
+        template, var_name=gast.Name(self.continuation_uses[-1][1], None, None))
+    cond.body = []
+    return cond
+
+  def _create_continuation_trigger(self):
+
+    def template(var_name):  # pylint:disable=unused-argument
+      var_name = True
+
+    assign, = templates.replace(
+        template, var_name=gast.Name(self.continuation_uses[-1][1], None, None))
+    return assign
+
+  def _create_continuation_init(self):
+
+    def template(var_name):  # pylint:disable=unused-argument
+      var_name = False
+
+    assign, = templates.replace(
+        template, var_name=gast.Name(self.continuation_uses[-1][1], None, None))
+    return assign
+
+  def _visit_and_reindent_if_necessary(self, nodes):
+    reorganized_nodes = []
+    current_dest = reorganized_nodes
+    continue_used_in_block = False
+    for i, n in enumerate(nodes):
+      # TODO(mdan): This could be optimized if control structures are simple.
+      self.continuation_uses[-1][0] = False
+      n = self.visit(n)
+      current_dest.append(n)
+      if self.continuation_uses[-1][0]:
+        continue_used_in_block = True
+        if i < len(nodes) - 1:  # Last statement in block needs no protection.
+          cond = self._create_continuation_check()
+          current_dest.append(cond)
+          current_dest = cond.body
+    self.continuation_uses[-1][0] = continue_used_in_block
+    return reorganized_nodes
+
+  def _process_loop_block(self, block, scope):
+    cont_var = self.namer.new_symbol('cont_requested', scope.referenced)
+    self.continuation_uses.append([False, cont_var])
+    block = self._visit_and_reindent_if_necessary(block)
+    if self.continuation_uses[-1][0]:
+      block.insert(0, self._create_continuation_init())
+    self.continuation_uses.pop()
+    return block
+
+  def visit_While(self, node):
+    self.generic_visit(node.test)
+    node.body = self._process_loop_block(node.body,
+                                         anno.getanno(node, 'body_scope'))
+    for n in node.orelse:
+      self.generic_visit(n)
+    return node
+
+  def visit_For(self, node):
+    self.generic_visit(node.target)
+    self.generic_visit(node.iter)
+    node.body = self._process_loop_block(node.body,
+                                         anno.getanno(node, 'body_scope'))
+    for n in node.orelse:
+      self.generic_visit(n)
+    return node
+
+  def visit_If(self, node):
+    if self.continuation_uses:
+      self.generic_visit(node.test)
+      node.body = self._visit_and_reindent_if_necessary(node.body)
+      continue_used_in_body = self.continuation_uses[-1][0]
+      node.orelse = self._visit_and_reindent_if_necessary(node.orelse)
+      self.continuation_uses[-1][0] = (
+          continue_used_in_body or self.continuation_uses[-1][0])
+    else:
+      node = self.generic_visit(node)
+    return node
+
+  def visit_Continue(self, node):
+    self.continuation_uses[-1][0] = True
+    return self._create_continuation_trigger()
+
+  def visit_Break(self, node):
+    assert False, 'break statement should be desugared at this point'
+
+
+def transform(node, namer):
+  transformer = ContinueCanonicalizationTransformer(namer)
+  node = transformer.visit(node)
+  return node
diff --git a/tensorflow/contrib/py2tf/convert/continue_canonicalization_test.py b/tensorflow/contrib/py2tf/convert/continue_canonicalization_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..a041ff4641fef6c6d5cd7c502d1196dde26c55e0
--- /dev/null
+++ b/tensorflow/contrib/py2tf/convert/continue_canonicalization_test.py
@@ -0,0 +1,112 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for continue_canonicalization module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.py2tf.convert import continue_canonicalization
+from tensorflow.contrib.py2tf.convert import control_flow
+from tensorflow.contrib.py2tf.pyct import compiler
+from tensorflow.contrib.py2tf.pyct import parser
+from tensorflow.contrib.py2tf.pyct.static_analysis import access
+from tensorflow.python.platform import test
+
+
+class TestNamer(control_flow.SymbolNamer):
+
+  def new_symbol(self, name_root, _):
+    return name_root
+
+
+class ContinueCanonicalizationTest(test.TestCase):
+
+  def _parse_and_analyze(self, test_fn, namespace):
+    node = parser.parse_object(test_fn)
+    node = access.resolve(node)
+    return node
+
+  def test_basic_continue(self):
+
+    def test_fn(x):
+      v = []
+      while x > 0:
+        x -= 1
+        if x % 2 == 0:
+          continue
+        v.append(x)
+      return v
+
+    node = self._parse_and_analyze(test_fn, {})
+    node = continue_canonicalization.transform(node, TestNamer())
+    result = compiler.ast_to_object(node)
+
+    self.assertEqual(test_fn(0), result.test_fn(0))
+    self.assertEqual(test_fn(1), result.test_fn(1))
+    self.assertEqual(test_fn(2), result.test_fn(2))
+    self.assertEqual(test_fn(3), result.test_fn(3))
+    self.assertEqual(test_fn(4), result.test_fn(4))
+
+  def test_basic_continue_for_loop(self):
+
+    def test_fn(a):
+      v = []
+      for x in a:
+        x -= 1
+        if x % 2 == 0:
+          continue
+        v.append(x)
+      return v
+
+    node = self._parse_and_analyze(test_fn, {})
+    node = continue_canonicalization.transform(node, TestNamer())
+    result = compiler.ast_to_object(node)
+
+    self.assertEqual(test_fn([]), result.test_fn([]))
+    self.assertEqual(test_fn([1]), result.test_fn([1]))
+    self.assertEqual(test_fn([2]), result.test_fn([2]))
+    self.assertEqual(test_fn([1, 2, 3]), result.test_fn([1, 2, 3]))
+
+  def test_continue_deeply_nested(self):
+
+    def test_fn(x):
+      v = []
+      u = []
+      w = []
+      while x > 0:
+        x -= 1
+        if x % 2 == 0:
+          if x % 3 != 0:
+            u.append(x)
+          else:
+            w.append(x)
+            continue
+        v.append(x)
+      return v, u, w
+
+    node = self._parse_and_analyze(test_fn, {})
+    node = continue_canonicalization.transform(node, TestNamer())
+    result = compiler.ast_to_object(node)
+
+    self.assertEqual(test_fn(0), result.test_fn(0))
+    self.assertEqual(test_fn(1), result.test_fn(1))
+    self.assertEqual(test_fn(2), result.test_fn(2))
+    self.assertEqual(test_fn(3), result.test_fn(3))
+    self.assertEqual(test_fn(4), result.test_fn(4))
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/py2tf/convert/control_flow.py b/tensorflow/contrib/py2tf/convert/control_flow.py
new file mode 100644
index 0000000000000000000000000000000000000000..8ebd9ad93dbc17814d1d7f53c3eac2e078030141
--- /dev/null
+++ b/tensorflow/contrib/py2tf/convert/control_flow.py
@@ -0,0 +1,189 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Handles control flow statements: while, if."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gast
+
+from tensorflow.contrib.py2tf.pyct import anno
+from tensorflow.contrib.py2tf.pyct import templates
+
+
+class SymbolNamer(object):
+  """Describes the interface for ControlFlowTransformer's namer."""
+
+  def new_symbol(self, name_root, reserved_locals):
+    """Generate a new unique symbol.
+
+    Args:
+      name_root: String, used as stem in the new name.
+      reserved_locals: Set(string), additional local symbols that are reserved
+          and which should not be used.
+    Returns:
+      String.
+    """
+    raise NotImplementedError()
+
+
+class SymbolRenamer(gast.NodeTransformer):
+
+  def __init__(self, name_map):
+    self.name_map = name_map
+
+  def visit_Name(self, node):
+    if node.id in self.name_map:
+      node.id = self.name_map[node.id]
+    return node
+
+
+class ControlFlowTransformer(gast.NodeTransformer):
+  """Transforms control flow structures like loops an conditionals."""
+
+  def __init__(self, namer):
+    self.namer = namer
+
+  # pylint:disable=invalid-name
+
+  def visit_For(self, node):
+    assert False, 'for statement should have been canonicalized at this point'
+
+  def visit_If(self, node):
+    self.generic_visit(node)
+
+    body_scope = anno.getanno(node, 'body_scope')
+    orelse_scope = anno.getanno(node, 'orelse_scope')
+
+    if body_scope.created - orelse_scope.created:
+      raise ValueError(
+          'The if branch creates new symbols that the else branch does not.')
+    if orelse_scope.created - body_scope.created:
+      raise ValueError(
+          'The else branch creates new symbols that the if branch does not.')
+
+    def template(  # pylint:disable=missing-docstring
+        test,
+        body_name,
+        body,
+        orelse_name,
+        orelse,
+        aliased,
+        aliases,  # pylint:disable=unused-argument
+        aliased_results,
+        results):  # pylint:disable=unused-argument
+
+      def body_name():  # pylint:disable=function-redefined
+        aliases, = aliased,  # pylint:disable=unused-variable
+        body  # pylint:disable=pointless-statement
+        return (aliased_results,)
+
+      def orelse_name():  # pylint:disable=function-redefined
+        aliases, = aliased,  # pylint:disable=unused-variable
+        orelse  # pylint:disable=pointless-statement
+        return (aliased_results,)
+
+      results = tf.cond(test, body_name, orelse_name)  # pylint:disable=undefined-variable
+
+    all_modified = tuple(body_scope.modified | orelse_scope.modified)
+    all_referenced = body_scope.referenced | orelse_scope.referenced
+
+    # Alias the closure variables inside the conditional functions
+    # to avoid errors caused by the local variables created in the branch
+    # functions.
+    need_alias = (
+        (body_scope.modified | orelse_scope.modified) -
+        (body_scope.created | orelse_scope.created))
+    aliased = tuple(need_alias)
+    aliases = tuple(
+        self.namer.new_symbol(s, all_referenced) for s in aliased)
+    alias_map = dict(zip(aliased, aliases))
+    node_body = node.body
+    node_body = [SymbolRenamer(alias_map).visit(n) for n in node_body]
+    node_orelse = node.orelse
+    node_orelse = [SymbolRenamer(alias_map).visit(n) for n in node_orelse]
+
+    if len(all_modified) == 1:
+      results = gast.Name(all_modified[0], None, None)
+    else:
+      results = gast.Tuple(
+          tuple(gast.Name(s, None, None) for s in all_modified), None)
+
+    return templates.replace(
+        template,
+        test=node.test,
+        body_name=gast.Name(
+            self.namer.new_symbol('if_true', all_referenced), None, None),
+        body=node_body,
+        orelse_name=gast.Name(
+            self.namer.new_symbol('if_false', all_referenced), None, None),
+        orelse=node_orelse,
+        aliased=tuple(gast.Name(s, None, None) for s in aliased),
+        aliases=tuple(gast.Name(s, None, None) for s in aliases),
+        aliased_results=tuple(
+            gast.Name(alias_map[s] if s in aliased else s, None, None)
+            for s in all_modified),
+        results=results)
+
+  def visit_While(self, node):
+    self.generic_visit(node)
+
+    body_scope = anno.getanno(node, 'body_scope')
+    body_closure = tuple(body_scope.modified - body_scope.created)
+
+    def template(
+        state,  # pylint:disable=unused-argument
+        state_ast_tuple,  # pylint:disable=unused-argument
+        test_name,
+        test,  # pylint:disable=unused-argument
+        body_name,
+        body):
+
+      def test_name(state):  # pylint:disable=function-redefined,unused-argument
+        return test
+
+      def body_name(state):  # pylint:disable=function-redefined,unused-argument
+        body  # pylint:disable=pointless-statement
+        return state,
+
+      state_ast_tuple = tf.while_loop(test_name, body_name, [state])  # pylint:disable=undefined-variable
+
+    test_name = self.namer.new_symbol('loop_test', body_scope.referenced)
+    body_name = self.namer.new_symbol('loop_body', body_scope.referenced)
+    if len(body_closure) == 1:
+      state = gast.Name(body_closure[0], None, None)
+      state_ast_tuple = state
+    else:
+      state = tuple(gast.Name(n, None, None) for n in body_closure)
+      state_ast_tuple = gast.Tuple(state, None)
+    node = templates.replace(
+        template,
+        state=state,
+        state_ast_tuple=state_ast_tuple,
+        test_name=gast.Name(test_name, gast.Load(), None),
+        test=node.test,
+        body_name=gast.Name(body_name, gast.Load(), None),
+        body=node.body)
+
+    return node
+
+  # pylint:enable=invalid-name
+
+
+def transform(node, namer):
+  transformer = ControlFlowTransformer(namer)
+  node = transformer.visit(node)
+  return node
diff --git a/tensorflow/contrib/py2tf/convert/control_flow_test.py b/tensorflow/contrib/py2tf/convert/control_flow_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..121af4ee949152cb6df7496a4a0c64f13f65a5eb
--- /dev/null
+++ b/tensorflow/contrib/py2tf/convert/control_flow_test.py
@@ -0,0 +1,125 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for control_flow module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.py2tf.convert import control_flow
+from tensorflow.contrib.py2tf.pyct import compiler
+from tensorflow.contrib.py2tf.pyct import parser
+from tensorflow.contrib.py2tf.pyct.static_analysis import access
+from tensorflow.contrib.py2tf.pyct.static_analysis import live_values
+from tensorflow.contrib.py2tf.pyct.static_analysis import type_info
+from tensorflow.python.framework import constant_op
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.platform import test
+
+
+class TestNamer(control_flow.SymbolNamer):
+
+  def new_symbol(self, name_root, used):
+    i = 0
+    while True:
+      name = '%s%d' % (name_root, i)
+      if name not in used:
+        return name
+      i += 1
+
+
+class ControlFlowTest(test.TestCase):
+
+  def _parse_and_analyze(self, test_fn, namespace):
+    node = parser.parse_object(test_fn)
+    node = access.resolve(node)
+    node = live_values.resolve(node, namespace, {})
+    node = type_info.resolve(node, {})
+    return node
+
+  def test_simple_while(self):
+
+    def test_fn(n):
+      i = 0
+      s = 0
+      while i < n:
+        s += i
+        i += 1
+      return s, i, n
+
+    node = self._parse_and_analyze(test_fn, {})
+    node = control_flow.transform(node, TestNamer())
+    result = compiler.ast_to_object(node)
+    setattr(result, 'tf', control_flow_ops)
+
+    with self.test_session() as sess:
+      self.assertEqual((10, 5, 5),
+                       sess.run(result.test_fn(constant_op.constant(5))))
+
+  def test_while_single_var(self):
+
+    def test_fn(n):
+      while n > 0:
+        n -= 1
+      return n
+
+    node = self._parse_and_analyze(test_fn, {})
+    node = control_flow.transform(node, TestNamer())
+    result = compiler.ast_to_object(node)
+    setattr(result, 'tf', control_flow_ops)
+
+    with self.test_session() as sess:
+      self.assertEqual(0, sess.run(result.test_fn(constant_op.constant(5))))
+
+  def test_simple_if(self):
+
+    def test_fn(n):
+      a = 0
+      b = 0
+      if n > 0:
+        a = -n
+      else:
+        b = 2 * n
+      return a, b
+
+    node = self._parse_and_analyze(test_fn, {})
+    node = control_flow.transform(node, TestNamer())
+    result = compiler.ast_to_object(node)
+    setattr(result, 'tf', control_flow_ops)
+
+    with self.test_session() as sess:
+      self.assertEqual((-1, 0), sess.run(
+          result.test_fn(constant_op.constant(1))))
+      self.assertEqual((0, -2),
+                       sess.run(result.test_fn(constant_op.constant(-1))))
+
+  def test_if_single_var(self):
+
+    def test_fn(n):
+      if n > 0:
+        n = -n
+      return n
+
+    node = self._parse_and_analyze(test_fn, {})
+    node = control_flow.transform(node, TestNamer())
+    result = compiler.ast_to_object(node)
+    setattr(result, 'tf', control_flow_ops)
+
+    with self.test_session() as sess:
+      self.assertEqual(-1, sess.run(result.test_fn(constant_op.constant(1))))
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/py2tf/convert/for_canonicalization.py b/tensorflow/contrib/py2tf/convert/for_canonicalization.py
new file mode 100644
index 0000000000000000000000000000000000000000..52360789cdc25528d925092e3e269c9968f2022f
--- /dev/null
+++ b/tensorflow/contrib/py2tf/convert/for_canonicalization.py
@@ -0,0 +1,96 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Canonicalizes for loops into while loops.
+
+This canonicalizer uses the len function on its argument. That should be
+converted to a tf.shape separately.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gast
+
+from tensorflow.contrib.py2tf.pyct import anno
+from tensorflow.contrib.py2tf.pyct import templates
+
+
+class ForLoopCanonicalizationTransformer(gast.NodeTransformer):
+  """Canonicalizes for loops (e.g. into while loops)."""
+
+  def __init__(self, namer):
+    self.namer = namer
+
+  def visit_For(self, node):
+    self.generic_visit(node)
+    body_scope = anno.getanno(node, 'body_scope')
+
+    # TODO(mdan): Distinguish between `for i in n` and `for i in range(n)`
+    # Or maybe we should replace range with tf.range?
+
+    if anno.hasanno(node, 'extra_cond'):
+
+      def template(loop_iter, target, body, i, n, extra_cond):  # pylint:disable=unused-argument
+        i = 0
+        n = len(loop_iter)  # pylint:disable=undefined-variable
+        while i < n and extra_cond:
+          # TODO(mdan): Use TensorListFromTensor(loop_iter) here.
+          target = loop_iter[i]
+          body  # pylint:disable=pointless-statement
+          i += 1
+
+      return templates.replace(
+          template,
+          loop_iter=node.iter,
+          target=node.target,
+          body=node.body,
+          i=gast.Name(
+              self.namer.new_symbol('i', body_scope.referenced), None, None),
+          n=gast.Name(
+              self.namer.new_symbol('n', body_scope.referenced), None, None),
+          extra_cond=anno.getanno(node, 'extra_cond'))
+    else:
+
+      def template(loop_iter, target, body, i, n):  # pylint:disable=unused-argument
+        i = 0
+        n = len(loop_iter)  # pylint:disable=undefined-variable
+        while i < n:
+          # TODO(mdan): Use TensorListFromTensor(loop_iter) here.
+          target = loop_iter[i]
+          body  # pylint:disable=pointless-statement
+          i += 1
+
+      return templates.replace(
+          template,
+          loop_iter=node.iter,
+          target=node.target,
+          body=node.body,
+          i=gast.Name(
+              self.namer.new_symbol('i', body_scope.referenced), None, None),
+          n=gast.Name(
+              self.namer.new_symbol('n', body_scope.referenced), None, None))
+
+  def visit_Continue(self, node):
+    assert False, 'continue statement should be desugared at this point'
+
+  def visit_Break(self, node):
+    assert False, 'break statement should be desugared at this point'
+
+
+def transform(node, namer):
+  transformer = ForLoopCanonicalizationTransformer(namer)
+  node = transformer.visit(node)
+  return node
diff --git a/tensorflow/contrib/py2tf/convert/for_canonicalization_test.py b/tensorflow/contrib/py2tf/convert/for_canonicalization_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..8de2d1a0f82cbb2f995a83fcdc1521ebf172e1ce
--- /dev/null
+++ b/tensorflow/contrib/py2tf/convert/for_canonicalization_test.py
@@ -0,0 +1,61 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for for_canonicalization module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.py2tf.convert import control_flow
+from tensorflow.contrib.py2tf.convert import for_canonicalization
+from tensorflow.contrib.py2tf.pyct import compiler
+from tensorflow.contrib.py2tf.pyct import parser
+from tensorflow.contrib.py2tf.pyct.static_analysis import access
+from tensorflow.python.platform import test
+
+
+class TestNamer(control_flow.SymbolNamer):
+
+  def new_symbol(self, name_root, _):
+    return name_root
+
+
+class ControlFlowTest(test.TestCase):
+
+  def _parse_and_analyze(self, test_fn, namespace):
+    node = parser.parse_object(test_fn)
+    node = access.resolve(node)
+    return node
+
+  def test_basic_for(self):
+
+    def test_fn(l):
+      s = 0
+      for e in l:
+        s += e
+      return s
+
+    node = self._parse_and_analyze(test_fn, {})
+    node = for_canonicalization.transform(node, TestNamer())
+    result = compiler.ast_to_object(node)
+
+    l = [1, 2, 3]
+    self.assertEqual(test_fn(l), result.test_fn(l))
+    l = []
+    self.assertEqual(test_fn(l), result.test_fn(l))
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/py2tf/convert/logical_expressions.py b/tensorflow/contrib/py2tf/convert/logical_expressions.py
new file mode 100644
index 0000000000000000000000000000000000000000..df980d41c9c57e325bee9a1fa870d9c95f46ea41
--- /dev/null
+++ b/tensorflow/contrib/py2tf/convert/logical_expressions.py
@@ -0,0 +1,74 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Converter for logical expressions.
+
+e.g. `a and b -> tf.logical_and(a, b)`. This is not done automatically in TF.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gast
+
+from tensorflow.contrib.py2tf.pyct import parser
+
+
+class LogicalExpressionTransformer(gast.NodeTransformer):
+  """Converts logical expressions to corresponding TF calls."""
+
+  def __init__(self):
+    # TODO(mdan): Look into replacing with bitwise operators instead.
+    self.op_mapping = {
+        gast.And: 'tf.logical_and',
+        gast.Or: 'tf.logical_or',
+        gast.Not: 'tf.logical_not',
+        gast.Eq: 'tf.equal',
+    }
+
+  def visit_Compare(self, node):
+    node = self.generic_visit(node)
+    if len(node.ops) > 1:
+      raise NotImplementedError()
+    cmp_type = type(node.ops[0])
+    if cmp_type in self.op_mapping:
+      tf_function = parser.parse_str(self.op_mapping[cmp_type]).body[0].value
+      return gast.Call(
+          func=tf_function, args=[node.left, node.comparators[0]], keywords=[])
+    return node
+
+  def visit_UnaryOp(self, node):
+    node = self.generic_visit(node)
+    if isinstance(node.op, gast.Not):
+      tf_function = parser.parse_str(self.op_mapping[type(
+          node.op)]).body[0].value
+      node = gast.Call(func=tf_function, args=[node.operand], keywords=[])
+    return node
+
+  def visit_BoolOp(self, node):
+    # TODO(mdan): A normalizer may be useful here. Use ANF?
+    node = self.generic_visit(node)
+    tf_function = parser.parse_str(self.op_mapping[type(node.op)]).body[0].value
+    left = node.values[0]
+    for i in range(1, len(node.values)):
+      left = gast.Call(
+          func=tf_function, args=[left, node.values[i]], keywords=[])
+    return left
+
+
+def transform(node):
+  transformer = LogicalExpressionTransformer()
+  node = transformer.visit(node)
+  return node
diff --git a/tensorflow/contrib/py2tf/convert/logical_expressions_test.py b/tensorflow/contrib/py2tf/convert/logical_expressions_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..f07fa017b9dacd7a998f04fa7f6fdd83fccb1811
--- /dev/null
+++ b/tensorflow/contrib/py2tf/convert/logical_expressions_test.py
@@ -0,0 +1,59 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for logical_expressions module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.py2tf.convert import logical_expressions
+from tensorflow.contrib.py2tf.pyct import compiler
+from tensorflow.contrib.py2tf.pyct import parser
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test
+
+
+class GradientsFunctionTest(test.TestCase):
+
+  def test_equals(self):
+
+    def test_fn(a, b):
+      return a == b
+
+    node = parser.parse_object(test_fn)
+    node = logical_expressions.transform(node)
+    result = compiler.ast_to_object(node)
+    setattr(result, 'tf', math_ops)
+
+    with self.test_session() as sess:
+      self.assertTrue(sess.run(result.test_fn(1, 1)))
+      self.assertFalse(sess.run(result.test_fn(1, 2)))
+
+  def test_bool_ops(self):
+
+    def test_fn(a, b, c):
+      return (a or b) and (a or b or c)
+
+    node = parser.parse_object(test_fn)
+    node = logical_expressions.transform(node)
+    result = compiler.ast_to_object(node)
+    setattr(result, 'tf', math_ops)
+
+    with self.test_session() as sess:
+      self.assertTrue(sess.run(result.test_fn(True, False, True)))
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/py2tf/convert/print_functions.py b/tensorflow/contrib/py2tf/convert/print_functions.py
new file mode 100644
index 0000000000000000000000000000000000000000..5da738c4954fb628212562b73641e1fc27032168
--- /dev/null
+++ b/tensorflow/contrib/py2tf/convert/print_functions.py
@@ -0,0 +1,51 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Compatibility support. Converts Print nodes to function calls."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gast
+
+from tensorflow.contrib.py2tf.pyct import anno
+
+
+class PrintFunctionTransformer(gast.NodeTransformer):
+  """Transforms Print nodes to Call so they can be handled as functions."""
+
+  # pylint:disable=invalid-name
+
+  def visit_Print(self, node):
+    self.generic_visit(node)
+    for n in node.values:
+      n.ctx = gast.Param()
+    call_node = gast.Call(
+        func=gast.Name('print', gast.Load(), None),
+        args=node.values,
+        keywords=[])
+    anno.setanno(call_node.func, 'live_val', print)
+    anno.setanno(call_node.func, 'fqn', 'print')
+    anno.setanno(call_node, 'args_scope', anno.getanno(node, 'args_scope'))
+    node = gast.Expr(call_node)
+    return node
+
+  # pylint:enable=invalid-name
+
+
+def transform(node):
+  transformer = PrintFunctionTransformer()
+  node = transformer.visit(node)
+  return node
diff --git a/tensorflow/contrib/py2tf/convert/print_functions_test.py b/tensorflow/contrib/py2tf/convert/print_functions_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..65e592b66e9d0c08c7d2127ff40be8a0dc28ec6c
--- /dev/null
+++ b/tensorflow/contrib/py2tf/convert/print_functions_test.py
@@ -0,0 +1,55 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for print_functions module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gast
+
+from tensorflow.contrib.py2tf.convert import print_functions
+from tensorflow.contrib.py2tf.pyct import compiler
+from tensorflow.contrib.py2tf.pyct import parser
+from tensorflow.contrib.py2tf.pyct.static_analysis import access
+from tensorflow.contrib.py2tf.pyct.static_analysis import live_values
+from tensorflow.contrib.py2tf.pyct.static_analysis import type_info
+from tensorflow.python.platform import test
+
+
+class PrintFunctionsTest(test.TestCase):
+
+  def _parse_and_analyze(self, test_fn, namespace):
+    node = parser.parse_object(test_fn)
+    node = access.resolve(node)
+    node = live_values.resolve(node, namespace, {})
+    node = type_info.resolve(node, {})
+    return node
+
+  def test_transform(self):
+
+    def test_fn(a):
+      print(a)
+
+    node = self._parse_and_analyze(test_fn, {'print': print})
+    node = print_functions.transform(node)
+    result = compiler.ast_to_object(node)
+
+    result.test_fn('a')
+    self.assertTrue(isinstance(node.body[0].body[0].value, gast.Call))
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/py2tf/convert/side_effect_guards.py b/tensorflow/contrib/py2tf/convert/side_effect_guards.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f25303fbac1184d016a63d629ba2ecf17d7e426
--- /dev/null
+++ b/tensorflow/contrib/py2tf/convert/side_effect_guards.py
@@ -0,0 +1,159 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Adds guards against function calls with side effects.
+
+Only standalone calls are guarded.
+
+WARNING: This mechanism is incomplete. Particularly, it only guards the
+arguments passed to functions, and does not account for indirectly modified
+state.
+
+Example:
+  y = tf.layers.dense(x)       # Creates TF variable 'foo'
+  loss = loss(y)
+  opt.minimize(loss)           # indirectly affects 'foo'
+  z = tf.get_variable('foo')   # Indirectly affects `loss` and 'foo'
+  # Here, `loss` can be guarded. But `z` cannot.
+
+# TODO(mdan): We should probably define a safe mode where we guard everything.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gast
+
+from tensorflow.contrib.py2tf.pyct import anno
+from tensorflow.contrib.py2tf.pyct import templates
+
+
+class SymbolNamer(object):
+  """Describes the interface for SideEffectGuardTransformer's namer."""
+
+  def new_symbol(self, name_root, reserved_locals):
+    """Generate a new unique function_name.
+
+    Args:
+      name_root: String, used as stem in the new name.
+      reserved_locals: Set(string), additional local symbols that are reserved.
+    Returns:
+      String.
+    """
+    raise NotImplementedError()
+
+
+class SideEffectGuardTransformer(gast.NodeTransformer):
+  """Adds control dependencies to functions with side effects."""
+
+  def __init__(self, namer):
+    self.namer = namer
+    self.indent_next = False
+    self.next_indent_owner = None
+
+  # pylint:disable=invalid-name
+
+  def _visit_and_reindent(self, nodes):
+    new_nodes = []
+    current_dest = new_nodes
+    for n in nodes:
+      n = self.visit(n)
+      if isinstance(n, (list, tuple)):
+        current_dest.extend(n)
+      else:
+        current_dest.append(n)
+      if self.indent_next:
+        assert self.next_indent_owner is not None
+        current_dest.append(self.next_indent_owner)
+        current_dest = self.next_indent_owner.body
+        self.next_indent_owner = None
+        self.indent_next = False
+    if not current_dest:
+      # TODO(mdan): There may still be something that could be done.
+      raise ValueError('Unable to insert statement into the computation flow: '
+                       'it is not followed by any computation that can we can '
+                       'condition on the statement.')
+    return new_nodes
+
+  def visit_FunctionDef(self, node):
+    if anno.hasanno(node, 'skip_processing'):
+      return node
+    node.body = self._visit_and_reindent(node.body)
+    return node
+
+  def _gate_symbols(self, guard_statement, guarded_args):
+
+    def template(args):  # pylint:disable=unused-argument
+      (args,) = (tf.identity(a) for a in (args,))  # pylint:disable=undefined-variable
+
+    guards = templates.replace(
+        template, args=tuple(gast.Name(a, None, None) for a in guarded_args))
+    guard_statement.body.extend(guards)
+    return guard_statement
+
+  def visit_Expr(self, node):
+    self.generic_visit(node)
+    if isinstance(node.value, gast.Call):
+      # Patterns of single function calls, like:
+      #   opt.minimize(loss)
+      # or:
+      #   tf.py_func(...)
+
+      args_scope = anno.getanno(node.value, 'args_scope')
+      temp_name = self.namer.new_symbol('temp', args_scope.parent.referenced)
+      # TODO(mdan): Unsafe reference modification!
+      args_scope.mark_write(temp_name)
+
+      def template(call, temp_result):
+        temp_result = call
+        if temp_result is not None:
+          if not isinstance(temp_result, (list, tuple)):
+            temp_result = (temp_result,)
+          ctx = tf.control_dependencies(temp_result)  # pylint:disable=undefined-variable
+        else:
+          ctx = contextmanager(lambda: (yield))()  # pylint:disable=undefined-variable
+        with ctx:
+          # TODO(mdan): Also insert ops to re-fetch if variables are involved.
+          pass  # Will be removed below.
+
+      # TODO(mdan): This is brittle. Reorganize this mechanism.
+      statements = templates.replace(
+          template,
+          call=node.value,
+          temp_result=gast.Name(temp_name, None, None))
+      control_deps_guard = statements[-1]
+      control_deps_guard.body = []
+
+      # First, attempt to gate future evaluation of args. If that's not
+      # possible, gate all remaining statements (and that may fail too, see
+      # _visit_and_reindent.
+      guarded_args = tuple(
+          n for n in args_scope.used if n in args_scope.parent.modified)
+      if guarded_args:
+        node = tuple(statements[:-1]) + (
+            self._gate_symbols(control_deps_guard, guarded_args),)
+      else:
+        node = tuple(statements[:-1])
+        # The mechanism will insert the guard statement later.
+        self.indent_next = True
+        self.next_indent_owner = control_deps_guard
+    return node
+
+  # pylint:enable=invalid-name
+
+
+def transform(node, namer):
+  transformer = SideEffectGuardTransformer(namer)
+  return transformer.visit(node)
diff --git a/tensorflow/contrib/py2tf/convert/side_effect_guards_test.py b/tensorflow/contrib/py2tf/convert/side_effect_guards_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..d932840186034c073512cbd1e253fc7676aa83e7
--- /dev/null
+++ b/tensorflow/contrib/py2tf/convert/side_effect_guards_test.py
@@ -0,0 +1,71 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for side_effect_guards module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.py2tf.convert import side_effect_guards
+from tensorflow.contrib.py2tf.pyct import compiler
+from tensorflow.contrib.py2tf.pyct import parser
+from tensorflow.contrib.py2tf.pyct.static_analysis import access
+from tensorflow.contrib.py2tf.pyct.static_analysis import live_values
+from tensorflow.contrib.py2tf.pyct.static_analysis import type_info
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+
+class TestNamer(side_effect_guards.SymbolNamer):
+
+  def new_symbol(self, name_root, _):
+    return name_root
+
+
+class SideEffectGuardsTest(test.TestCase):
+
+  def _parse_and_analyze(self, test_fn, namespace):
+    node = parser.parse_object(test_fn)
+    node = access.resolve(node)
+    node = live_values.resolve(node, namespace, {})
+    node = type_info.resolve(node, {})
+    return node
+
+  def test_transform(self):
+
+    def test_fn(a):
+      state_ops.assign(a, a + 1)
+      return a
+
+    node = self._parse_and_analyze(test_fn, {'state_ops': state_ops})
+    node = side_effect_guards.transform(node, TestNamer())
+    result = compiler.ast_to_object(node)
+    setattr(result, 'state_ops', state_ops)
+
+    # TODO(mdan): Configure the namespaces instead of doing these hacks.
+    ops.identity = array_ops.identity
+    setattr(result, 'tf', ops)
+
+    with self.test_session() as sess:
+      v = variables.Variable(2)
+      sess.run(v.initializer)
+      self.assertEqual(3, sess.run(result.test_fn(v)))
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/py2tf/naming.py b/tensorflow/contrib/py2tf/naming.py
new file mode 100644
index 0000000000000000000000000000000000000000..61772ec07b41d366769307982bf0376de9bb495e
--- /dev/null
+++ b/tensorflow/contrib/py2tf/naming.py
@@ -0,0 +1,102 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Symbol naming utilities."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.util import tf_inspect
+
+
+class Namer(object):
+  """Implementation of the namer interfaces required by various converters.
+
+  This implementation performs additional tasks like keeping track of the
+  function calls that have been encountered and replaced with calls to their
+  corresponding compiled counterparts.
+
+  Interfaces currently implemented:
+    * call_trees.FunctionNamer
+    * control_flow.SymbolNamer
+    * side_effect_guards.SymbolNamer
+  """
+
+  def __init__(self, global_namespace, name_map=None):
+    self.global_namespace = global_namespace
+
+    self.renamed_calls = {}
+    if name_map is not None:
+      self.renamed_calls.update(name_map)
+
+    self.generated_names = set()
+
+  def compiled_class_name(self, original_name, live_object=None):
+    """See call_trees.FunctionNamer.compiled_class_name."""
+    if live_object is not None and live_object in self.renamed_calls:
+      return self.renamed_calls[live_object]
+
+    new_name_root = 'Tf%s' % original_name
+    new_name = new_name_root
+    n = 0
+    while new_name in self.global_namespace:
+      n += 1
+      new_name = '%s_%d' % (new_name_root, n)
+    if live_object is not None:
+      self.renamed_calls[live_object] = new_name
+    self.generated_names.add(new_name)
+    return new_name
+
+  def compiled_function_name(self,
+                             original_name,
+                             live_object=None,
+                             owner_type=None):
+    """See call_trees.FunctionNamer.compiled_function_name."""
+    if live_object is not None and live_object in self.renamed_calls:
+      return self.renamed_calls[live_object]
+
+    if owner_type is None:
+      # Top level functions: rename
+      new_name_root = 'tf__%s' % original_name
+      new_name = new_name_root
+      n = 0
+      while new_name in self.global_namespace:
+        n += 1
+        new_name = '%s_%d' % (new_name_root, n)
+    else:
+      if tf_inspect.isclass(owner_type):
+        # Class members: do not rename (the entire class will be renamed)
+        new_name = original_name
+      else:
+        raise NotImplementedError('Member function "%s" of non-class type: %s' %
+                                  (original_name, owner_type))
+
+    if live_object is not None:
+      self.renamed_calls[live_object] = new_name
+    self.generated_names.add(new_name)
+    return new_name
+
+  def new_symbol(self, name_root, reserved_locals):
+    """See control_flow.SymbolNamer.new_symbol."""
+    new_name = name_root
+    n = 0
+    while (new_name in self.global_namespace
+           or new_name in reserved_locals
+           or new_name in self.generated_names):
+      n += 1
+      new_name = '%s_%d' % (name_root, n)
+
+    self.generated_names.add(new_name)
+    return new_name
diff --git a/tensorflow/contrib/py2tf/naming_test.py b/tensorflow/contrib/py2tf/naming_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..9403d9ae1f68d49ac19503b24fb86486cf197200
--- /dev/null
+++ b/tensorflow/contrib/py2tf/naming_test.py
@@ -0,0 +1,73 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for naming module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.py2tf import naming
+from tensorflow.python.platform import test
+
+
+class NamerTest(test.TestCase):
+
+  def test_compiled_function_name_tracks_names(self):
+    def bar():
+      pass
+
+    namer = naming.Namer(set())
+    self.assertEqual('tf__foo', namer.compiled_function_name('foo'))
+    self.assertEqual('tf__bar', namer.compiled_function_name('bar', bar))
+    self.assertEqual({bar: 'tf__bar'}, namer.renamed_calls)
+    self.assertItemsEqual(('tf__bar', 'tf__foo'), namer.generated_names)
+
+  def test_compiled_function_name_consistent(self):
+    def foo():
+      pass
+
+    namer = naming.Namer(set())
+    self.assertEqual('tf__foo', namer.compiled_function_name('foo', foo))
+    self.assertEqual('tf__foo', namer.compiled_function_name('foo', foo))
+
+  def test_compiled_function_name_avoids_global_conflicts(self):
+    def foo():
+      pass
+
+    namer = naming.Namer(set(('tf__foo',)))
+    self.assertEqual('tf__foo_1', namer.compiled_function_name('foo', foo))
+
+  def test_new_symbol_tracks_names(self):
+    namer = naming.Namer(set())
+    self.assertEqual('temp', namer.new_symbol('temp', set()))
+    self.assertItemsEqual(('temp',), namer.generated_names)
+
+  def test_new_symbol_avoids_duplicates(self):
+    namer = naming.Namer(set())
+    self.assertEqual('temp', namer.new_symbol('temp', set()))
+    self.assertEqual('temp_1', namer.new_symbol('temp', set()))
+    self.assertItemsEqual(('temp', 'temp_1'), namer.generated_names)
+
+  def test_new_symbol_avoids_conflicts(self):
+    namer = naming.Namer(set(('temp',)))
+    # temp is reserved in the global namespace
+    self.assertEqual('temp_1', namer.new_symbol('temp', set()))
+    # temp_2 is reserved in the local namespace
+    self.assertEqual('temp_3', namer.new_symbol('temp', set(('temp_2',))))
+    self.assertItemsEqual(('temp_1', 'temp_3'), namer.generated_names)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/py2tf/pyct/BUILD b/tensorflow/contrib/py2tf/pyct/BUILD
new file mode 100644
index 0000000000000000000000000000000000000000..b60ed918f5185e963de0877b13c3747d2b86f1e4
--- /dev/null
+++ b/tensorflow/contrib/py2tf/pyct/BUILD
@@ -0,0 +1,81 @@
+licenses(["notice"])  # Apache 2.0
+
+load("//tensorflow:tensorflow.bzl", "py_test")
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
+
+py_library(
+    name = "pyct",
+    srcs = [
+        "__init__.py",
+        "anno.py",
+        "compiler.py",
+        "parser.py",
+        "pretty_printer.py",
+        "templates.py",
+    ],
+    srcs_version = "PY2AND3",
+    visibility = ["//visibility:public"],
+    deps = [
+        "@astor_archive//:astor",
+        "@gast_archive//:gast",
+        "@termcolor_archive//:termcolor",
+    ],
+)
+
+py_test(
+    name = "anno_test",
+    srcs = ["anno_test.py"],
+    deps = [
+        ":pyct",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
+py_test(
+    name = "compiler_test",
+    srcs = ["compiler_test.py"],
+    deps = [
+        ":pyct",
+        "//tensorflow/python:client_testlib",
+        "@gast_archive//:gast",
+    ],
+)
+
+py_test(
+    name = "parser_test",
+    srcs = ["parser_test.py"],
+    deps = [
+        ":pyct",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
+py_test(
+    name = "pretty_printer_test",
+    srcs = ["pretty_printer_test.py"],
+    deps = [
+        ":pyct",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
+py_test(
+    name = "templates_test",
+    srcs = ["templates_test.py"],
+    deps = [
+        ":pyct",
+        "//tensorflow/python:client_testlib",
+        "@gast_archive//:gast",
+    ],
+)
diff --git a/tensorflow/contrib/py2tf/pyct/__init__.py b/tensorflow/contrib/py2tf/pyct/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d787e56bbecbd1d891fdf41207256c4c5096224f
--- /dev/null
+++ b/tensorflow/contrib/py2tf/pyct/__init__.py
@@ -0,0 +1,19 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Python source code transformation library."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid_impl.py b/tensorflow/contrib/py2tf/pyct/anno.py
similarity index 51%
rename from tensorflow/contrib/distributions/python/ops/bijectors/sigmoid_impl.py
rename to tensorflow/contrib/py2tf/pyct/anno.py
index a640dfe7dfbcce96261589c7fc49107deaefdd54..889e4ba4ffaed887faffb8736e4a59502da99e81 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid_impl.py
+++ b/tensorflow/contrib/py2tf/pyct/anno.py
@@ -12,37 +12,29 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Sigmoid bijector."""
+"""Handling annotations on AST nodes.
+
+Adapted from Tangent.
+"""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops.distributions import bijector
-
-
-__all__ = [
-    "Sigmoid",
-]
-
 
-class Sigmoid(bijector.Bijector):
-  """Bijector which computes `Y = g(X) = 1 / (1 + exp(-X))`."""
+def getanno(node, key, field_name='___pyct_anno'):
+  return getattr(node, field_name)[key]
 
-  def __init__(self, validate_args=False, name="sigmoid"):
-    super(Sigmoid, self).__init__(
-        event_ndims=0, validate_args=validate_args, name=name)
 
-  def _forward(self, x):
-    return math_ops.sigmoid(x)
+def hasanno(node, key, field_name='___pyct_anno'):
+  return hasattr(node, field_name) and key in getattr(node, field_name)
 
-  def _inverse(self, y):
-    return math_ops.log(y) - math_ops.log1p(-y)
 
-  def _inverse_log_det_jacobian(self, y):
-    return -math_ops.log(y) - math_ops.log1p(-y)
+def setanno(node, key, value, field_name='___pyct_anno'):
+  annotations = getattr(node, field_name, {})
+  setattr(node, field_name, annotations)
+  annotations[key] = value
 
-  def _forward_log_det_jacobian(self, x):
-    return -nn_ops.softplus(-x) - nn_ops.softplus(x)
+  # So that the annotations survive gast_to_ast() and ast_to_gast()
+  if field_name not in node._fields:
+    node._fields += (field_name,)
diff --git a/tensorflow/contrib/py2tf/pyct/anno_test.py b/tensorflow/contrib/py2tf/pyct/anno_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..19e3b4576210c3715620fc7002c91c5130b46ed0
--- /dev/null
+++ b/tensorflow/contrib/py2tf/pyct/anno_test.py
@@ -0,0 +1,42 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for anno module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import ast
+
+from tensorflow.contrib.py2tf.pyct import anno
+from tensorflow.python.platform import test
+
+
+class AnnoTest(test.TestCase):
+
+  def test_basic(self):
+    node = ast.Name()
+
+    self.assertFalse(anno.hasanno(node, 'foo'))
+    with self.assertRaises(AttributeError):
+      anno.getanno(node, 'foo')
+
+    anno.setanno(node, 'foo', 3)
+    self.assertTrue(anno.hasanno(node, 'foo'))
+    self.assertEqual(3, anno.getanno(node, 'foo'))
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/py2tf/pyct/compiler.py b/tensorflow/contrib/py2tf/pyct/compiler.py
new file mode 100644
index 0000000000000000000000000000000000000000..b09353cc72bd5f9d02a8973ebe880b92d39ac304
--- /dev/null
+++ b/tensorflow/contrib/py2tf/pyct/compiler.py
@@ -0,0 +1,62 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Converting AST to code.
+
+Adapted from Tangent.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# TODO(mdan): Use six for compatibility here.
+import imp
+import os
+import tempfile
+
+import astor
+import gast
+
+
+def ast_to_source(node, indentation):
+  """Return the source code of given AST."""
+  if isinstance(node, gast.AST):
+    node = gast.gast_to_ast(node)
+  generator = astor.codegen.SourceGenerator(indentation, False,
+                                            astor.string_repr.pretty_string)
+  generator.visit(node)
+  generator.result.append('\n')
+  return astor.source_repr.pretty_source(generator.result).lstrip()
+
+
+def ast_to_object(node, indentation='  '):
+  """Return the Python objects represented by given AST.
+
+  Compiling the AST code this way ensures that the source code is readable by
+  e.g. `pdb` or `inspect`.
+
+  Args:
+    node: The code to compile, as an AST object.
+    indentation: The string to use for indentation.
+
+  Returns:
+    A module object containing the compiled source code.
+  """
+  source = ast_to_source(node, indentation)
+
+  with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
+    module_name = os.path.basename(f.name[:-3])
+    f.write(source)
+  return imp.load_source(module_name, f.name)
diff --git a/tensorflow/contrib/py2tf/pyct/compiler_test.py b/tensorflow/contrib/py2tf/pyct/compiler_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..e0cde43566310b99bac5035285154fde906fa127
--- /dev/null
+++ b/tensorflow/contrib/py2tf/pyct/compiler_test.py
@@ -0,0 +1,86 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for compiler module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import textwrap
+
+import gast
+
+from tensorflow.contrib.py2tf.pyct import compiler
+from tensorflow.python.platform import test
+
+
+class CompilerTest(test.TestCase):
+
+  def test_ast_to_source(self):
+    node = gast.If(
+        test=gast.Num(1),
+        body=[
+            gast.Assign(
+                targets=[gast.Name('a', gast.Store(), None)],
+                value=gast.Name('b', gast.Load(), None))
+        ],
+        orelse=[
+            gast.Assign(
+                targets=[gast.Name('a', gast.Store(), None)],
+                value=gast.Str('c'))
+        ])
+    self.assertEqual(
+        textwrap.dedent("""
+            if 1:
+              a = b
+            else:
+              a = 'c'
+        """).strip(),
+        compiler.ast_to_source(node, indentation='  ').strip())
+
+  def test_ast_to_object(self):
+    node = gast.FunctionDef(
+        name='f',
+        args=gast.arguments(
+            args=[gast.Name('a', gast.Param(), None)],
+            vararg=None,
+            kwonlyargs=[],
+            kwarg=None,
+            defaults=[],
+            kw_defaults=[]),
+        body=[
+            gast.Return(
+                gast.BinOp(
+                    op=gast.Add(),
+                    left=gast.Name('a', gast.Load(), None),
+                    right=gast.Num(1)))
+        ],
+        decorator_list=[],
+        returns=None)
+
+    mod = compiler.ast_to_object(node)
+
+    self.assertEqual(2, mod.f(1))
+    with open(mod.__file__, 'r') as temp_output:
+      self.assertEqual(
+          textwrap.dedent("""
+              def f(a):
+                return a + 1
+          """).strip(),
+          temp_output.read().strip())
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/py2tf/pyct/parser.py b/tensorflow/contrib/py2tf/pyct/parser.py
new file mode 100644
index 0000000000000000000000000000000000000000..3daa69b9ceff714c94c61134f6fb81f9927ea258
--- /dev/null
+++ b/tensorflow/contrib/py2tf/pyct/parser.py
@@ -0,0 +1,38 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Converting code to AST.
+
+Adapted from Tangent.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import textwrap
+
+import gast
+
+from tensorflow.python.util import tf_inspect
+
+
+def parse_object(obj):
+  """Return the AST of given object."""
+  return parse_str(tf_inspect.getsource(obj))
+
+
+def parse_str(src):
+  """Return the AST of given piece of code."""
+  return gast.parse(textwrap.dedent(src))
diff --git a/tensorflow/contrib/py2tf/pyct/parser_test.py b/tensorflow/contrib/py2tf/pyct/parser_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..46f9aa82071efa98518810851b76761ff42751e5
--- /dev/null
+++ b/tensorflow/contrib/py2tf/pyct/parser_test.py
@@ -0,0 +1,44 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for parser module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.py2tf.pyct import parser
+from tensorflow.python.platform import test
+
+
+def f(x):
+  return x + 1
+
+
+class ParserTest(test.TestCase):
+
+  def test_parse_object(self):
+    mod = parser.parse_object(f)
+    self.assertEqual('f', mod.body[0].name)
+
+  def test_parse_str(self):
+    mod = parser.parse_str("""
+        def f(x):
+          return x + 1
+    """)
+    self.assertEqual('f', mod.body[0].name)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/py2tf/pyct/pretty_printer.py b/tensorflow/contrib/py2tf/pyct/pretty_printer.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e70c0ed833c10012e6a5b4cb26e9e4198162693
--- /dev/null
+++ b/tensorflow/contrib/py2tf/pyct/pretty_printer.py
@@ -0,0 +1,97 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Print an AST tree in a form more readable than ast.dump."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gast
+import termcolor
+
+
+class PrettyPrinter(gast.NodeVisitor):
+  """Print AST nodes."""
+
+  def __init__(self):
+    self.indent_lvl = 0
+    self.result = ''
+
+  def _type(self, node):
+    return termcolor.colored(node.__class__.__name__, None, attrs=['bold'])
+
+  def _field(self, name):
+    return termcolor.colored(name, 'blue')
+
+  def _value(self, name):
+    return termcolor.colored(name, 'magenta')
+
+  def _warning(self, name):
+    return termcolor.colored(name, 'red')
+
+  def _indent(self):
+    return termcolor.colored('| ' * self.indent_lvl, None, attrs=['dark'])
+
+  def _print(self, s):
+    self.result += s
+    self.result += '\n'
+
+  def generic_visit(self, node, name=None):
+    if node._fields:
+      cont = ':'
+    else:
+      cont = '()'
+
+    if name:
+      self._print('%s%s=%s%s' % (self._indent(), self._field(name),
+                                 self._type(node), cont))
+    else:
+      self._print('%s%s%s' % (self._indent(), self._type(node), cont))
+
+    self.indent_lvl += 1
+    for f in node._fields:
+      if not hasattr(node, f):
+        self._print('%s%s' % (self._indent(), self._warning('%s=<unset>' % f)))
+        continue
+      v = getattr(node, f)
+      if isinstance(v, list):
+        if v:
+          self._print('%s%s=[' % (self._indent(), self._field(f)))
+          self.indent_lvl += 1
+          for n in v:
+            self.generic_visit(n)
+          self.indent_lvl -= 1
+          self._print('%s]' % (self._indent()))
+        else:
+          self._print('%s%s=[]' % (self._indent(), self._field(f)))
+      elif isinstance(v, gast.AST):
+        self.generic_visit(v, f)
+      elif isinstance(v, str):
+        self._print('%s%s=%s' % (self._indent(), self._field(f),
+                                 self._value('"%s"' % v)))
+      else:
+        self._print('%s%s=%s' % (self._indent(), self._field(f),
+                                 self._value(v)))
+    self.indent_lvl -= 1
+
+
+def fmt(node):
+  printer = PrettyPrinter()
+  if isinstance(node, (list, tuple)):
+    for n in node:
+      printer.visit(n)
+  else:
+    printer.visit(node)
+  return printer.result
diff --git a/tensorflow/contrib/py2tf/pyct/pretty_printer_test.py b/tensorflow/contrib/py2tf/pyct/pretty_printer_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..65e5b1d9191749a0caeeda48df37690564a8fc1e
--- /dev/null
+++ b/tensorflow/contrib/py2tf/pyct/pretty_printer_test.py
@@ -0,0 +1,56 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for pretty_printer module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import ast
+
+from tensorflow.contrib.py2tf.pyct import pretty_printer
+from tensorflow.python.platform import test
+
+
+def f(x):
+  return x + 1
+
+
+class PrettyPrinterTest(test.TestCase):
+
+  def test_format(self):
+    node = ast.FunctionDef(
+        name='f',
+        args=ast.arguments(
+            args=[ast.Name(id='a', ctx=ast.Param())],
+            vararg=None,
+            kwarg=None,
+            defaults=[]),
+        body=[
+            ast.Return(
+                ast.BinOp(
+                    op=ast.Add(),
+                    left=ast.Name(id='a', ctx=ast.Load()),
+                    right=ast.Num(1)))
+        ],
+        decorator_list=[],
+        returns=None)
+    # Just checking for functionality, the color control characters make it
+    # difficult to inspect the result.
+    self.assertIsNotNone(pretty_printer.fmt(node))
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/BUILD b/tensorflow/contrib/py2tf/pyct/static_analysis/BUILD
new file mode 100644
index 0000000000000000000000000000000000000000..abaf9536781efadea61b0da684020baeeed0597d
--- /dev/null
+++ b/tensorflow/contrib/py2tf/pyct/static_analysis/BUILD
@@ -0,0 +1,61 @@
+licenses(["notice"])  # Apache 2.0
+
+load("//tensorflow:tensorflow.bzl", "py_test")
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
+
+py_library(
+    name = "static_analysis",
+    srcs = [
+        "access.py",
+        "live_values.py",
+        "type_info.py",
+    ],
+    srcs_version = "PY2AND3",
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow/contrib/py2tf/pyct",
+        "@gast_archive//:gast",
+    ],
+)
+
+py_test(
+    name = "access_test",
+    srcs = ["access_test.py"],
+    deps = [
+        ":static_analysis",
+        "//tensorflow/contrib/py2tf/pyct",
+        "//tensorflow/python:client_testlib",
+        "@gast_archive//:gast",
+    ],
+)
+
+py_test(
+    name = "live_values_test",
+    srcs = ["live_values_test.py"],
+    deps = [
+        ":static_analysis",
+        "//tensorflow/contrib/py2tf/pyct",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
+py_test(
+    name = "type_info_test",
+    srcs = ["type_info_test.py"],
+    deps = [
+        ":static_analysis",
+        "//tensorflow/contrib/py2tf/pyct",
+        "//tensorflow/python:client_testlib",
+    ],
+)
diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/__init__.py b/tensorflow/contrib/py2tf/pyct/static_analysis/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c325e19f28376da3be6db4b00b9f664eac047af2
--- /dev/null
+++ b/tensorflow/contrib/py2tf/pyct/static_analysis/__init__.py
@@ -0,0 +1,29 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Static information resolution.
+
+This module contains utilities to help annotate AST nodes with as much runtime
+information as can be possibly extracted without actually executing the code,
+under that assumption that the context in which the code will run is known.
+
+Note: It's a fair bet that this analysis cannot be reused across contexts
+without re-running it. In most cases, the context usually means referenced
+modules, which should be static enough to allow reuse, but that is not being
+reliably verified.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/access.py b/tensorflow/contrib/py2tf/pyct/static_analysis/access.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f3ac48b68c05256fbac4c4d8d86381755c8027c
--- /dev/null
+++ b/tensorflow/contrib/py2tf/pyct/static_analysis/access.py
@@ -0,0 +1,205 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Access information (reads, writes) resolution."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import copy
+
+import gast
+
+from tensorflow.contrib.py2tf.pyct import anno
+
+# TODO(mdan): Add support for PY3 (e.g. Param vs arg).
+
+
+class Scope(object):
+  """Encloses local symbol definition and usage information.
+
+  This can track for instance whether a symbol is modified in the current scope.
+  Note that scopes do not necessarily align with Python's scopes. For example,
+  the body of an if statement may be considered a separate scope.
+
+  Attributes:
+    modified: identifiers modified in this scope
+    created: identifiers created in this scope
+    used: identifiers referenced in this scope
+  """
+
+  def __init__(self, parent, isolated=True):
+    """Create a new scope.
+
+    Args:
+      parent: A Scope or None.
+      isolated: Whether the scope is isolated, that is, whether variables
+          created in this scope should be visible to the parent scope.
+    """
+    self.isolated = isolated
+    self.parent = parent
+    self.modified = set()
+    self.created = set()
+    self.used = set()
+
+  # TODO(mdan): Rename to `locals`
+  @property
+  def referenced(self):
+    if not self.isolated and self.parent is not None:
+      return self.used | self.parent.referenced
+    return self.used
+
+  def __repr__(self):
+    return 'Scope{r=%s, c=%s, w=%s}' % (tuple(self.used), tuple(self.created),
+                                        tuple(self.modified))
+
+  def copy_from(self, other):
+    self.modified = copy.copy(other.modified)
+    self.created = copy.copy(other.created)
+    self.used = copy.copy(other.used)
+
+  def merge_from(self, other):
+    self.modified |= other.modified
+    self.created |= other.created
+    self.used |= other.used
+
+  def has(self, name):
+    if name in self.modified:
+      return True
+    elif self.parent is not None:
+      return self.parent.has(name)
+    return False
+
+  def mark_read(self, name):
+    self.used.add(name)
+    if self.parent is not None and name not in self.created:
+      self.parent.mark_read(name)
+
+  def mark_write(self, name):
+    self.modified.add(name)
+    if self.isolated:
+      self.created.add(name)
+    else:
+      if self.parent is None:
+        self.created.add(name)
+      else:
+        if not self.parent.has(name):
+          self.created.add(name)
+        self.parent.mark_write(name)
+
+
+class AccessResolver(gast.NodeTransformer):
+  """Annotates nodes with local scope information. See Scope."""
+
+  def __init__(self):
+    self.scope = Scope(None)
+
+  def visit_Name(self, node):
+    # TODO(mdan): This is insufficient for object fields, e.g. hp.learning_rate.
+    self.generic_visit(node)
+    if isinstance(node.ctx, gast.Store):
+      self.scope.mark_write(node.id)
+    elif isinstance(node.ctx, gast.Load):
+      anno.setanno(node, 'is_local', self.scope.has(node.id))
+      self.scope.mark_read(node.id)
+    elif isinstance(node.ctx, gast.Param):
+      # Param contexts appear in function defs, so they have the meaning of
+      # defining a variable.
+      # TODO(mdan): This bay be incorrect with nested functions.
+      # For nested functions, we'll have to add the notion of hiding args from
+      # the parent scope, not writing to them.
+      self.scope.mark_write(node.id)
+    else:
+      raise ValueError('Unknown context %s for node %s.' % (type(node.ctx),
+                                                            node.id))
+    return node
+
+  def visit_Print(self, node):
+    current_scope = self.scope
+    args_scope = Scope(current_scope)
+    self.scope = args_scope
+    for n in node.values:
+      self.visit(n)
+    anno.setanno(node, 'args_scope', args_scope)
+    self.scope = current_scope
+    return node
+
+  def visit_Call(self, node):
+    current_scope = self.scope
+    args_scope = Scope(current_scope)
+    self.scope = args_scope
+    for n in node.args:
+      self.visit(n)
+    # TODO(mdan): Account starargs, kwargs
+    for n in node.keywords:
+      self.visit(n)
+    anno.setanno(node, 'args_scope', args_scope)
+    self.scope = current_scope
+    self.visit(node.func)
+    return node
+
+  def _process_block_node(self, node, block, scope_name):
+    current_scope = self.scope
+    block_scope = Scope(current_scope, isolated=False)
+    self.scope = block_scope
+    for n in block:
+      self.visit(n)
+    anno.setanno(node, '%s_scope' % scope_name, block_scope)
+    self.scope = current_scope
+    return node
+
+  def _process_parallel_blocks(self, parent, children):
+    # Because the scopes are not isolated, processing any child block
+    # modifies the parent state causing the other child blocks to be
+    # processed incorrectly. So we need to checkpoint the parent scope so that
+    # each child sees the same context.
+    before_parent = Scope(None)
+    before_parent.copy_from(self.scope)
+    after_children = []
+    for child, name in children:
+      self.scope.copy_from(before_parent)
+      parent = self._process_block_node(parent, child, name)
+      after_child = Scope(None)
+      after_child.copy_from(self.scope)
+      after_children.append(after_child)
+    for after_child in after_children:
+      self.scope.merge_from(after_child)
+    for child, name in children:
+      # TODO(mdan): We don't need this - we have the parent link from scope.
+      anno.setanno(parent, '%s_parent_scope' % name, self.scope)
+    return parent
+
+  def visit_If(self, node):
+    self.visit(node.test)
+    node = self._process_parallel_blocks(
+        node, ((node.body, 'body'), (node.orelse, 'orelse')))
+    return node
+
+  def visit_For(self, node):
+    self.visit(node.target)
+    self.visit(node.iter)
+    node = self._process_parallel_blocks(
+        node, ((node.body, 'body'), (node.orelse, 'orelse')))
+    return node
+
+  def visit_While(self, node):
+    self.visit(node.test)
+    node = self._process_parallel_blocks(
+        node, ((node.body, 'body'), (node.orelse, 'orelse')))
+    return node
+
+
+def resolve(node):
+  return AccessResolver().visit(node)
diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/access_test.py b/tensorflow/contrib/py2tf/pyct/static_analysis/access_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..0912ebb4c355c2ae2563e13e36926a4b8e3599a1
--- /dev/null
+++ b/tensorflow/contrib/py2tf/pyct/static_analysis/access_test.py
@@ -0,0 +1,234 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for access module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gast
+
+from tensorflow.contrib.py2tf.pyct import anno
+from tensorflow.contrib.py2tf.pyct import parser
+from tensorflow.contrib.py2tf.pyct.static_analysis import access
+from tensorflow.python.platform import test
+
+
+class ScopeTest(test.TestCase):
+
+  def test_basic(self):
+    scope = access.Scope(None)
+    self.assertFalse(scope.has('foo'))
+
+    scope.mark_read('foo')
+    self.assertFalse(scope.has('foo'))
+
+    scope.mark_write('foo')
+    self.assertTrue(scope.has('foo'))
+
+    scope.mark_read('bar')
+    self.assertFalse(scope.has('bar'))
+
+  def test_copy(self):
+    scope = access.Scope(None)
+    scope.mark_write('foo')
+
+    other = access.Scope(None)
+    other.copy_from(scope)
+
+    self.assertTrue('foo' in other.created)
+
+    scope.mark_write('bar')
+    scope.copy_from(other)
+
+    self.assertFalse('bar' in scope.created)
+
+    scope.mark_write('bar')
+    scope.merge_from(other)
+
+    self.assertTrue('bar' in scope.created)
+    self.assertFalse('bar' in other.created)
+
+  def test_nesting(self):
+    scope = access.Scope(None)
+    scope.mark_write('foo')
+    scope.mark_read('bar')
+
+    child = access.Scope(scope)
+    self.assertTrue(child.has('foo'))
+    self.assertTrue(scope.has('foo'))
+
+    child.mark_write('bar')
+    self.assertTrue(child.has('bar'))
+    self.assertFalse(scope.has('bar'))
+
+  def test_referenced(self):
+    scope = access.Scope(None)
+    scope.mark_read('a')
+
+    child = access.Scope(scope)
+    child.mark_read('b')
+
+    child2 = access.Scope(child, isolated=False)
+    child2.mark_read('c')
+
+    self.assertTrue('c' in child2.referenced)
+    self.assertTrue('b' in child2.referenced)
+    self.assertFalse('a' in child2.referenced)
+
+    self.assertTrue('c' in child.referenced)
+    self.assertTrue('b' in child.referenced)
+    self.assertFalse('a' in child.referenced)
+
+
+class AccessResolverTest(test.TestCase):
+
+  def test_local_markers(self):
+
+    def test_fn(a):  # pylint:disable=unused-argument
+      b = c  # pylint:disable=undefined-variable
+      while b > 0:
+        b -= 1
+      return b
+
+    node = parser.parse_object(test_fn)
+    node = access.resolve(node)
+
+    self.assertFalse(anno.getanno(node.body[0].body[0].value,
+                                  'is_local'))  # c in b = c
+    self.assertTrue(anno.getanno(node.body[0].body[1].test.left,
+                                 'is_local'))  # b in b > 0
+    self.assertTrue(anno.getanno(node.body[0].body[2].value,
+                                 'is_local'))  # b in return b
+
+  def assertScopeIs(self, scope, used, modified, created):
+    self.assertItemsEqual(used, scope.used)
+    self.assertItemsEqual(modified, scope.modified)
+    self.assertItemsEqual(created, scope.created)
+
+  def test_print_statement(self):
+
+    def test_fn(a):
+      b = 0
+      c = 1
+      print(a, b)
+      return c
+
+    node = parser.parse_object(test_fn)
+    node = access.resolve(node)
+
+    print_node = node.body[0].body[2]
+    if isinstance(print_node, gast.Print):
+      # Python 2
+      print_args_scope = anno.getanno(print_node, 'args_scope')
+    else:
+      # Python 3
+      assert isinstance(print_node, gast.Expr)
+      # The call node should be the one being annotated.
+      print_node = print_node.value
+      print_args_scope = anno.getanno(print_node, 'args_scope')
+    # We basically need to detect which variables are captured by the call
+    # arguments.
+    self.assertScopeIs(print_args_scope, ('a', 'b'), (), ())
+
+  def test_call(self):
+
+    def test_fn(a):
+      b = 0
+      c = 1
+      foo(a, b)  # pylint:disable=undefined-variable
+      return c
+
+    node = parser.parse_object(test_fn)
+    node = access.resolve(node)
+
+    call_node = node.body[0].body[2].value
+    # We basically need to detect which variables are captured by the call
+    # arguments.
+    self.assertScopeIs(
+        anno.getanno(call_node, 'args_scope'), ('a', 'b'), (), ())
+
+  def test_while(self):
+
+    def test_fn(a):
+      b = a
+      while b > 0:
+        c = b
+        b -= 1
+      return b, c
+
+    node = parser.parse_object(test_fn)
+    node = access.resolve(node)
+
+    while_node = node.body[0].body[1]
+    self.assertScopeIs(
+        anno.getanno(while_node, 'body_scope'), ('b',), ('b', 'c'), ('c',))
+    self.assertScopeIs(
+        anno.getanno(while_node, 'body_parent_scope'), ('a', 'b', 'c'),
+        ('a', 'b', 'c'), ('a', 'b', 'c'))
+
+  def test_for(self):
+
+    def test_fn(a):
+      b = a
+      for _ in a:
+        c = b
+        b -= 1
+      return b, c
+
+    node = parser.parse_object(test_fn)
+    node = access.resolve(node)
+
+    for_node = node.body[0].body[1]
+    self.assertScopeIs(
+        anno.getanno(for_node, 'body_scope'), ('b',), ('b', 'c'), ('c',))
+    self.assertScopeIs(
+        anno.getanno(for_node, 'body_parent_scope'), ('a', 'b', 'c'),
+        ('a', 'b', 'c', '_'), ('a', 'b', 'c', '_'))
+
+  def test_if(self):
+
+    def test_fn(x):
+      if x > 0:
+        x = -x
+        y = 2 * x
+        z = -y
+      else:
+        x = 2 * x
+        y = -x
+        u = -y
+      return z, u
+
+    node = parser.parse_object(test_fn)
+    node = access.resolve(node)
+
+    if_node = node.body[0].body[0]
+    self.assertScopeIs(
+        anno.getanno(if_node, 'body_scope'), ('x', 'y'), ('x', 'y', 'z'),
+        ('y', 'z'))
+    # TODO(mdan): Double check: is it ok to not mark a local symbol as not read?
+    self.assertScopeIs(
+        anno.getanno(if_node, 'body_parent_scope'), ('x', 'z', 'u'),
+        ('x', 'y', 'z', 'u'), ('x', 'y', 'z', 'u'))
+    self.assertScopeIs(
+        anno.getanno(if_node, 'orelse_scope'), ('x', 'y'), ('x', 'y', 'u'),
+        ('y', 'u'))
+    self.assertScopeIs(
+        anno.getanno(if_node, 'body_parent_scope'), ('x', 'z', 'u'),
+        ('x', 'y', 'z', 'u'), ('x', 'y', 'z', 'u'))
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/live_values.py b/tensorflow/contrib/py2tf/pyct/static_analysis/live_values.py
new file mode 100644
index 0000000000000000000000000000000000000000..242e544b5286c683ee4aa97bc586751932c73815
--- /dev/null
+++ b/tensorflow/contrib/py2tf/pyct/static_analysis/live_values.py
@@ -0,0 +1,93 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Live value resolution.
+
+Live values are extracted from the known execution context.
+
+Requires annotations generated by AccessResolver.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gast
+
+from tensorflow.contrib.py2tf.pyct import anno
+
+
+class LiveValueResolver(gast.NodeTransformer):
+  """Annotates nodes with live values."""
+
+  def __init__(self, namespace, literals):
+    """Create a new resolver.
+
+    Args:
+      namespace: A dict representing the namespace visible to the AST in the
+          intended execution context.
+      literals: A dict mapping literal lymbol names to their value. An example
+          literal is "None".
+    """
+    self.namespace = namespace
+    self.literals = literals
+
+  def visit_ClassDef(self, node):
+    self.generic_visit(node)
+    anno.setanno(node, 'live_val', self.namespace[node.name])
+    return node
+
+  def visit_Name(self, node):
+    self.generic_visit(node)
+    if isinstance(node.ctx, gast.Load):
+      assert anno.hasanno(node, 'is_local'), node
+      symbol_is_local = anno.getanno(node, 'is_local')
+      if not symbol_is_local:
+        if node.id in self.literals:
+          anno.setanno(node, 'live_val', self.literals[node.id])
+          # TODO(mdan): Could live values have FQNs? i.e. 'a'.join()
+        elif node.id in self.namespace:
+          obj = self.namespace[node.id]
+          anno.setanno(node, 'live_val', obj)
+          anno.setanno(node, 'fqn', (obj.__name__,))
+        else:
+          raise ValueError('Could not find global symbol %s.' % node.id)
+      else:
+        pass
+        # TODO(mdan): Attempt to trace its value through the local chain.
+        # TODO(mdan): Use type annotations as fallback.
+    return node
+
+  def visit_Attribute(self, node):
+    self.generic_visit(node)
+    if anno.hasanno(node.value, 'live_val'):
+      assert anno.hasanno(node.value, 'fqn')
+      parent_object = anno.getanno(node.value, 'live_val')
+      if not hasattr(parent_object, node.attr):
+        raise AttributeError('%s has no attribute %s' % (parent_object,
+                                                         node.attr))
+      anno.setanno(node, 'live_val', getattr(parent_object, node.attr))
+      anno.setanno(node, 'fqn', anno.getanno(node.value, 'fqn') + (node.attr,))
+    elif isinstance(node.value, gast.Name):
+      stem_name = node.value
+      # All nonlocal symbols should be fully resolved.
+      assert anno.hasanno(stem_name, 'is_local'), stem_name
+      assert anno.getanno(stem_name, 'is_local'), stem_name
+      # TODO(mdan): Figure out what to do when calling attribute on local object
+      # Maybe just leave as-is?
+    return node
+
+
+def resolve(node, namespace, literals):
+  return LiveValueResolver(namespace, literals).visit(node)
diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/live_values_test.py b/tensorflow/contrib/py2tf/pyct/static_analysis/live_values_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..e77497654a0b3096422deef9a3f008eeb6c6be05
--- /dev/null
+++ b/tensorflow/contrib/py2tf/pyct/static_analysis/live_values_test.py
@@ -0,0 +1,75 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for live_values module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.py2tf.pyct import anno
+from tensorflow.contrib.py2tf.pyct import parser
+from tensorflow.contrib.py2tf.pyct.static_analysis import access
+from tensorflow.contrib.py2tf.pyct.static_analysis import live_values
+from tensorflow.python.framework import constant_op
+from tensorflow.python.platform import test
+
+
+class LiveValuesResolverTest(test.TestCase):
+
+  def test_literals(self):
+
+    def test_fn():
+      return Foo  # pylint: disable=undefined-variable
+
+    node = parser.parse_object(test_fn)
+    node = access.resolve(node)
+    node = live_values.resolve(node, {}, {'Foo': 'bar'})
+
+    retval_node = node.body[0].body[0].value
+    self.assertEquals('bar', anno.getanno(retval_node, 'live_val'))
+
+  def test_namespace(self):
+
+    def foo():
+      return 'bar'
+
+    def test_fn():
+      return foo()
+
+    node = parser.parse_object(test_fn)
+    node = access.resolve(node)
+    node = live_values.resolve(node, {'foo': foo}, {})
+
+    func_node = node.body[0].body[0].value.func
+    self.assertEquals(foo, anno.getanno(func_node, 'live_val'))
+    self.assertEquals(('foo',), anno.getanno(func_node, 'fqn'))
+
+  def test_attribute_names(self):
+
+    def test_fn():
+      return constant_op.constant(0)
+
+    node = parser.parse_object(test_fn)
+    node = access.resolve(node)
+    node = live_values.resolve(node, {'constant_op': constant_op}, {})
+
+    func_node = node.body[0].body[0].value.func
+    self.assertEquals(constant_op.constant, anno.getanno(func_node, 'live_val'))
+    self.assertEquals((constant_op.__name__, 'constant'),
+                      anno.getanno(func_node, 'fqn'))
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/type_info.py b/tensorflow/contrib/py2tf/pyct/static_analysis/type_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e545903261a41cac4dc9ac0e23f857e0be41f96
--- /dev/null
+++ b/tensorflow/contrib/py2tf/pyct/static_analysis/type_info.py
@@ -0,0 +1,211 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Type resolution.
+
+Requires annotations generated by LiveValuesResolver.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gast
+
+from tensorflow.contrib.py2tf.pyct import anno
+from tensorflow.python.util import tf_inspect
+
+
+class Scope(object):
+  """Encloses symbol value references.
+
+  Attributes:
+    values: A dict mapping string to gast.Node, containing the value that was
+        most recently assigned to the symbol.
+  """
+
+  # TODO(mdan): Should rather use a CFG here?
+
+  def __init__(self, parent):
+    """Create a new scope.
+
+    Args:
+      parent: A Scope or None.
+    """
+    self.parent = parent
+    self.values = {}
+
+  def __repr__(self):
+    return 'Scope[%s]' % self.values.keys()
+
+  def copy(self):
+    s = Scope(self.parent)
+    s.values = self.values.copy()
+    return s
+
+  def setval(self, name, value):
+    self.values[name] = value
+
+  def hasval(self, name):
+    return (name in self.values or
+            (self.parent is not None and self.parent.hasval(name)))
+
+  def getval(self, name):
+    if name in self.values:
+      return self.values[name]
+    if self.parent is not None:
+      return self.parent.getval(name)
+    raise KeyError(name)
+
+
+class TypeInfoResolver(gast.NodeTransformer):
+  """Annotates symbols with type information where possible.
+
+  Nodes currently annotated:
+    * Call (helps detect class constructors)
+    * Attribute (helps resolve object methods)
+  """
+
+  def __init__(self, value_hints):
+    self.scope = Scope(None)
+    self.value_hints = value_hints
+    self.function_level = 0
+
+  def visit_FunctionDef(self, node):
+    self.scope = Scope(self.scope)
+    self.function_level += 1
+    self.generic_visit(node)
+    self.function_level -= 1
+    self.scope = self.scope.parent
+    return node
+
+  def _visit_block(self, block):
+    self.scope = Scope(self.scope)
+    for i, n in enumerate(block):
+      block[i] = self.generic_visit(n)
+    self.scope = self.scope.parent
+    return block
+
+  def visit_For(self, node):
+    self.generic_visit(node.target)
+    self.generic_visit(node.iter)
+    node.body = self._visit_block(node.body)
+    node.orelse = self._visit_block(node.orelse)
+    return node
+
+  def visit_While(self, node):
+    self.generic_visit(node.test)
+    node.body = self._visit_block(node.body)
+    node.orelse = self._visit_block(node.orelse)
+    return node
+
+  def visit_If(self, node):
+    self.generic_visit(node.test)
+    node.body = self._visit_block(node.body)
+    node.orelse = self._visit_block(node.orelse)
+    return node
+
+  def visit_Name(self, node):
+    self.generic_visit(node)
+    if isinstance(node.ctx, gast.Param):
+      self.scope.setval(node.id, gast.Name(node.id, gast.Load(), None))
+      # TODO(mdan): Member functions should not need type hints.
+      # We could attemp to extract im_class from the live_val annotation.
+      if self.function_level == 1 and node.id in self.value_hints:
+        # Forge a node to hold the type information, so that method calls on
+        # it can resolve the type.
+        type_holder = gast.Name(node.id, gast.Load(), None)
+        type_string, type_obj = self.value_hints[node.id]
+        anno.setanno(type_holder, 'type', type_obj)
+        anno.setanno(type_holder, 'type_fqn', tuple(type_string.split('.')))
+        self.scope.setval(node.id, type_holder)
+    return node
+
+  def _process_variable_assignment(self, source, targets):
+    if isinstance(source, gast.Call):
+      func = source.func
+      if anno.hasanno(func, 'live_val'):
+        func_obj = anno.getanno(func, 'live_val')
+        if tf_inspect.isclass(func_obj):
+          anno.setanno(source, 'is_constructor', True)
+          anno.setanno(source, 'type', func_obj)
+          anno.setanno(source, 'type_fqn', anno.getanno(func, 'fqn'))
+          # TODO(mdan): Raise an error if constructor has side effects.
+          # We can have a whitelist of no-side-effects constructors.
+          # We can also step inside the constructor and further analyze.
+
+    for t in targets:
+      if isinstance(t, gast.Tuple):
+        for i, e in enumerate(t.elts):
+          self.scope.setval(e.id,
+                            gast.Subscript(
+                                source, gast.Index(i), ctx=gast.Store()))
+      elif isinstance(t, gast.Name):
+        self.scope.setval(t.id, source)
+      elif isinstance(t, gast.Attribute):
+        if not (isinstance(t.value, gast.Name) and t.value.id == 'self'):
+          raise ValueError(
+              'Dont know how to handle assignment to attributes of objects'
+              ' other than "self": [%s].%s' % (t.value, t.attr))
+      else:
+        raise ValueError('Dont know how to handle assignment to %s' % t)
+
+  def visit_With(self, node):
+    for wi in node.items:
+      if wi.optional_vars is not None:
+        self._process_variable_assignment(wi.context_expr, (wi.optional_vars,))
+    self.generic_visit(node)
+    return node
+
+  def visit_Assign(self, node):
+    self.generic_visit(node)
+    self._process_variable_assignment(node.value, node.targets)
+    return node
+
+  def visit_Call(self, node):
+    target = node.func
+    if not anno.hasanno(target, 'live_val'):
+      if not isinstance(target, gast.Attribute):
+        # Suspecting this pattern would reach here:
+        #   foo = bar
+        #   foo()
+        raise ValueError('Dont know how to handle dynamic functions.')
+      if not isinstance(target.value, gast.Name):
+        # Possible example of this kind:
+        #   foo = module.Foo()
+        #   foo.bar.baz()
+        # TODO(mdan): This should be doable by using the FQN.
+        raise ValueError('Dont know how to handle object properties yet.')
+      # In the example below, object_source is 'tr.train.Optimizer()':
+      #   opt = tf.train.Optimizer()
+      #   opt.foo()
+      if self.scope.hasval(target.value.id):
+        object_source = self.scope.getval(target.value.id)
+        if not anno.hasanno(object_source, 'type'):
+          raise ValueError('Could not determine type of "%s". Is it dynamic?' %
+                           (target.value.id))
+        anno.setanno(target, 'type', anno.getanno(object_source, 'type'))
+        anno.setanno(target, 'type_fqn', anno.getanno(object_source,
+                                                      'type_fqn'))
+      else:
+        # TODO(mdan): Figure out what could the user do to get past this.
+        raise ValueError('No info on "%s". Is it dynamically built?' %
+                         (target.value.id))
+    self.generic_visit(node)
+    return node
+
+
+def resolve(node, value_hints):
+  assert value_hints is not None
+  return TypeInfoResolver(value_hints).visit(node)
diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/type_info_test.py b/tensorflow/contrib/py2tf/pyct/static_analysis/type_info_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..8526f42413b9cca077da45195249615b55c45bc9
--- /dev/null
+++ b/tensorflow/contrib/py2tf/pyct/static_analysis/type_info_test.py
@@ -0,0 +1,183 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for type_info module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.py2tf.pyct import anno
+from tensorflow.contrib.py2tf.pyct import parser
+from tensorflow.contrib.py2tf.pyct.static_analysis import access
+from tensorflow.contrib.py2tf.pyct.static_analysis import live_values
+from tensorflow.contrib.py2tf.pyct.static_analysis import type_info
+from tensorflow.python.client import session
+from tensorflow.python.platform import test
+from tensorflow.python.training import training
+
+
+class ScopeTest(test.TestCase):
+
+  def test_basic(self):
+    scope = type_info.Scope(None)
+    self.assertFalse(scope.hasval('foo'))
+
+    scope.setval('foo', 'bar')
+    self.assertTrue(scope.hasval('foo'))
+
+    self.assertFalse(scope.hasval('baz'))
+
+  def test_nesting(self):
+    scope = type_info.Scope(None)
+    scope.setval('foo', '')
+
+    child = type_info.Scope(scope)
+    self.assertTrue(child.hasval('foo'))
+    self.assertTrue(scope.hasval('foo'))
+
+    child.setval('bar', '')
+    self.assertTrue(child.hasval('bar'))
+    self.assertFalse(scope.hasval('bar'))
+
+
+class TypeInfoResolverTest(test.TestCase):
+
+  def test_constructor_detection(self):
+
+    def test_fn():
+      opt = training.GradientDescentOptimizer(0.1)
+      return opt
+
+    node = parser.parse_object(test_fn)
+    node = access.resolve(node)
+    node = live_values.resolve(node, {'training': training}, {})
+    node = type_info.resolve(node, {})
+
+    call_node = node.body[0].body[0].value
+    self.assertEquals(training.GradientDescentOptimizer,
+                      anno.getanno(call_node, 'type'))
+    self.assertEquals((training.__name__, 'GradientDescentOptimizer'),
+                      anno.getanno(call_node, 'type_fqn'))
+
+  def test_class_members(self):
+
+    def test_fn():
+      opt = training.GradientDescentOptimizer(0.1)
+      opt.minimize(0)
+
+    node = parser.parse_object(test_fn)
+    node = access.resolve(node)
+    node = live_values.resolve(node, {'training': training}, {})
+    node = type_info.resolve(node, {})
+
+    attr_call_node = node.body[0].body[1].value.func
+    self.assertEquals((training.__name__, 'GradientDescentOptimizer'),
+                      anno.getanno(attr_call_node, 'type_fqn'))
+
+  def test_class_members_in_with_stmt(self):
+
+    def test_fn(x):
+      with session.Session() as sess:
+        sess.run(x)
+
+    node = parser.parse_object(test_fn)
+    node = access.resolve(node)
+    node = live_values.resolve(node, {'session': session}, {})
+    node = type_info.resolve(node, {})
+
+    constructor_call = node.body[0].body[0].items[0].context_expr
+    self.assertEquals(session.Session, anno.getanno(constructor_call, 'type'))
+    self.assertEquals((session.__name__, 'Session'),
+                      anno.getanno(constructor_call, 'type_fqn'))
+
+    member_call = node.body[0].body[0].body[0].value.func
+    self.assertEquals((session.__name__, 'Session'),
+                      anno.getanno(member_call, 'type_fqn'))
+
+  def test_constructor_deta_dependent(self):
+
+    def test_fn(x):
+      if x > 0:
+        opt = training.GradientDescentOptimizer(0.1)
+      else:
+        opt = training.GradientDescentOptimizer(0.01)
+      opt.minimize(0)
+
+    node = parser.parse_object(test_fn)
+    node = access.resolve(node)
+    node = live_values.resolve(node, {'training': training}, {})
+    with self.assertRaises(ValueError):
+      node = type_info.resolve(node, {})
+
+  def test_parameter_class_members(self):
+
+    def test_fn(opt):
+      opt.minimize(0)
+
+    node = parser.parse_object(test_fn)
+    node = access.resolve(node)
+    node = live_values.resolve(node, {'training': training}, {})
+    with self.assertRaises(ValueError):
+      node = type_info.resolve(node, {})
+
+  def test_parameter_class_members_with_value_hints(self):
+
+    def test_fn(opt):
+      opt.minimize(0)
+
+    node = parser.parse_object(test_fn)
+    node = access.resolve(node)
+    node = live_values.resolve(node, {'training': training}, {})
+    node = type_info.resolve(
+        node, {
+            'opt': (('%s.GradientDescentOptimizer' % training.__name__),
+                    training.GradientDescentOptimizer(0.1))
+        })
+
+    attr_call_node = node.body[0].body[0].value.func
+    self.assertEquals(
+        tuple(training.__name__.split('.')) + ('GradientDescentOptimizer',),
+        anno.getanno(attr_call_node, 'type_fqn'))
+
+  def test_function_variables(self):
+
+    def bar():
+      pass
+
+    def test_fn():
+      foo = bar
+      foo()
+
+    node = parser.parse_object(test_fn)
+    node = access.resolve(node)
+    node = live_values.resolve(node, {'bar': bar}, {})
+    with self.assertRaises(ValueError):
+      node = type_info.resolve(node, {})
+
+  def test_nested_members(self):
+
+    def test_fn():
+      foo = training.GradientDescentOptimizer(0.1)
+      foo.bar.baz()
+
+    node = parser.parse_object(test_fn)
+    node = access.resolve(node)
+    node = live_values.resolve(node, {'training': training}, {})
+    with self.assertRaises(ValueError):
+      node = type_info.resolve(node, {})
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/py2tf/pyct/templates.py b/tensorflow/contrib/py2tf/pyct/templates.py
new file mode 100644
index 0000000000000000000000000000000000000000..4fadc793e6d1dfa8ddabea1d607de68ac6ad9c85
--- /dev/null
+++ b/tensorflow/contrib/py2tf/pyct/templates.py
@@ -0,0 +1,116 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""AST conversion templates.
+
+Adapted from Tangent.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import ast
+import copy
+
+import gast
+
+from tensorflow.contrib.py2tf.pyct import parser
+
+
+class ReplaceTransformer(gast.NodeTransformer):
+  """Replace AST nodes."""
+
+  def __init__(self, replacements):
+    """Create a new ReplaceTransformer.
+
+    Args:
+      replacements: A mapping from placeholder names to (lists of) AST nodes
+          that these placeholders will be replaced by.
+    """
+    self.replacements = replacements
+
+  # TODO(mdan): Make a more detailed pass and clean up if needed.
+
+  def visit_Expr(self, node):
+    if (isinstance(node.value, gast.Name) and
+        node.value.id in self.replacements):
+      return self.visit(node.value)
+    self.generic_visit(node)
+    return node
+
+  def visit_FunctionDef(self, node):
+    node = self.generic_visit(node)
+    if node.name in self.replacements:
+      repl = self.replacements[node.name]
+      if not isinstance(repl, (gast.Name, ast.Name)):
+        raise ValueError(
+            'A function name can only be replaced by a Name node. Found: %s',
+            repl)
+      node.name = repl.id
+    return node
+
+  def visit_Name(self, node):
+    if node.id in self.replacements:
+      # TODO(mdan): Sanitize the nodes by erasing scope-dependent annotations.
+      new_nodes = copy.copy(self.replacements[node.id])
+      if isinstance(new_nodes, gast.AST):
+        new_nodes = [new_nodes]
+      # Preserve the target context.
+      for n in new_nodes:
+        if isinstance(n, gast.Tuple):
+          for e in n.elts:
+            e.ctx = node.ctx
+        n.ctx = node.ctx
+      if len(new_nodes) == 1:
+        new_nodes, = new_nodes
+      return new_nodes
+    else:
+      return node
+
+
+def replace(template, **replacements):
+  """Replace placeholders in a Python template.
+
+  Args:
+    template: A function to be used as a template. Any placeholder is expected
+        to also be a function argument.
+    **replacements: A mapping from placeholder names to (lists of) AST nodes
+        that these placeholders will be replaced by.
+
+  Returns:
+    body: An AST node or list of AST nodes with the replacements made. If the
+        template was a function, a list will be returned. If the template was a
+        node, the same node will be returned. If the template was a string, an
+        AST node will be returned (a `Module` node in the case of a multi-line
+        string, an `Expr` node otherwise).
+
+  Raises:
+    ValueError: If a function is used as a template and an incorrect set of
+        replacements was passed.
+  """
+  tree = parser.parse_object(template).body[0]
+  placeholders = set(arg.id for arg in tree.args.args)
+  tree.args.args = []
+  if tree.args.vararg:
+    placeholders.add(tree.args.vararg)
+    tree.args.vararg = None
+  if set(replacements.keys()) != placeholders:
+    raise ValueError(
+        'too many or few replacements. replacements: %s; placeholders: %s' %
+        (replacements.keys(), placeholders))
+
+  # Perform the replacement, stripping the function into which the template was
+  # wrapped.
+  return ReplaceTransformer(replacements).visit(tree).body
diff --git a/tensorflow/contrib/py2tf/pyct/templates_test.py b/tensorflow/contrib/py2tf/pyct/templates_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ad8b9317b67c7ae18a16efac745138e14101e6a
--- /dev/null
+++ b/tensorflow/contrib/py2tf/pyct/templates_test.py
@@ -0,0 +1,77 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for templates module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gast
+
+from tensorflow.contrib.py2tf.pyct import compiler
+from tensorflow.contrib.py2tf.pyct import templates
+from tensorflow.python.platform import test
+
+
+class TemplatesTest(test.TestCase):
+
+  def test_replace_variable(self):
+    def template(a):  # pylint:disable=unused-argument
+      def test_fn(a):  # pylint:disable=unused-variable
+        a += 1
+        a = 2 * a + 1
+        return b  # pylint:disable=undefined-variable
+
+    node = templates.replace(
+        template, a=gast.Name('b', gast.Load(), None))[0]
+    result = compiler.ast_to_object(node)
+    self.assertEquals(7, result.test_fn(2))
+
+  def test_replace_function_name(self):
+    def template(fname):  # pylint:disable=unused-argument
+      def fname(a):  # pylint:disable=function-redefined
+        a += 1
+        a = 2 * a + 1
+        return a
+
+    node = templates.replace(
+        template, fname=gast.Name('test_fn', gast.Load(), None))[0]
+    result = compiler.ast_to_object(node)
+    self.assertEquals(7, result.test_fn(2))
+
+  def test_code_block(self):
+    def template(block):  # pylint:disable=unused-argument
+      def test_fn(a):  # pylint:disable=unused-variable
+        block  # pylint:disable=pointless-statement
+        return a
+
+    node = templates.replace(
+        template,
+        block=[
+            gast.Assign(
+                [
+                    gast.Name('a', gast.Store(), None)
+                ],
+                gast.BinOp(
+                    gast.Name('a', gast.Load(), None),
+                    gast.Add(),
+                    gast.Num(1))),
+        ] * 2)[0]
+    result = compiler.ast_to_object(node)
+    self.assertEquals(3, result.test_fn(1))
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/quantize/BUILD b/tensorflow/contrib/quantize/BUILD
index 389e26cca3eb04fe43abbee62a1efde7ae0d204d..3c5b34a0a6adb2f4e340a8e378c1eb51a2e2b534 100644
--- a/tensorflow/contrib/quantize/BUILD
+++ b/tensorflow/contrib/quantize/BUILD
@@ -88,16 +88,22 @@ py_test(
     srcs = ["python/fold_batch_norms_test.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":copy_graph",
         ":fold_batch_norms",
         "//tensorflow/contrib/layers:layers_py",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:gradients",
         "//tensorflow/python:init_ops",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:nn_ops",
         "//tensorflow/python:platform_test",
+        "//tensorflow/python:random_ops",
+        "//tensorflow/python:random_seed",
+        "//tensorflow/python:session",
+        "//tensorflow/python:variables",
     ],
 )
 
diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py
index 647d4044001f7be701037d07dc46db86c0aa3a0e..aa605e6caadf4d1e69a4a331b1e580797e4fdef8 100644
--- a/tensorflow/contrib/quantize/python/fold_batch_norms.py
+++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py
@@ -23,11 +23,14 @@ from tensorflow.contrib import graph_editor
 from tensorflow.contrib.quantize.python import common
 from tensorflow.contrib.quantize.python import graph_matcher
 from tensorflow.contrib.quantize.python import input_to_ops
+from tensorflow.core.framework import attr_value_pb2
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
 from tensorflow.python.ops import nn_ops
+from tensorflow.python.util import compat
 
 
 def FoldBatchNorms(graph):
@@ -66,23 +69,26 @@ def _FoldFusedBatchNorms(graph):
     # `scope`.
     with graph.as_default(), graph.name_scope(scope + sep), ops.device(
         match.bn_op.device):
-      # new weights = old weights * gamma / sqrt(variance + epsilon)
-      # new biases = -mean * gamma / sqrt(variance + epsilon) + beta
-      multiplier_tensor = match.gamma_tensor * math_ops.rsqrt(
-          match.variance_tensor + match.bn_op.get_attr('epsilon'))
-      bias_tensor = math_ops.subtract(
-          match.beta_tensor, match.mean_tensor * multiplier_tensor, name='bias')
-
-      # The shape of depthwise weights is different, so we need to reshape the
-      # multiplier_tensor to ensure that the scaled_weight_tensor has the
-      # expected shape.
-      if match.layer_op.type == 'DepthwiseConv2dNative':
-        new_shape = [
-            match.weight_tensor.get_shape().as_list()[2],
-            match.weight_tensor.get_shape().as_list()[3]
-        ]
-        multiplier_tensor = array_ops.reshape(
-            multiplier_tensor, new_shape, name='scale_reshape')
+      with graph.name_scope(scope + sep + 'BatchNorm_Fold' + sep):
+        # new weights = old weights * gamma / sqrt(variance + epsilon)
+        # new biases = -mean * gamma / sqrt(variance + epsilon) + beta
+        multiplier_tensor = match.gamma_tensor * math_ops.rsqrt(
+            match.variance_tensor + match.bn_op.get_attr('epsilon'))
+        bias_tensor = math_ops.subtract(
+            match.beta_tensor,
+            match.mean_tensor * multiplier_tensor,
+            name='bias')
+
+        # The shape of depthwise weights is different, so we need to reshape the
+        # multiplier_tensor to ensure that the scaled_weight_tensor has the
+        # expected shape.
+        if match.layer_op.type == 'DepthwiseConv2dNative':
+          new_shape = [
+              match.weight_tensor.get_shape().as_list()[2],
+              match.weight_tensor.get_shape().as_list()[3]
+          ]
+          multiplier_tensor = array_ops.reshape(
+              multiplier_tensor, new_shape, name='scale_reshape')
 
       # TODO(suharshs): This naming of the following ops needs to carefully
       # follow the naming expected by quantize.py. Generalize the quantize code
@@ -133,6 +139,16 @@ def _CloneWithNewOperands(layer_op, input_tensor, weight_tensor):
     raise ValueError('Cannot handle operation of type: %s' % layer_op.type)
 
 
+@ops.RegisterGradient('FoldFusedBatchNormGrad')
+def _FoldFusedBatchNormGrad(op, unused_grad_y, grad_mean, grad_var, unused_1,
+                            unused_2):
+  x = op.inputs[0]
+  n = x.get_shape().num_elements() / grad_mean.get_shape().num_elements()
+  dmean_dx = grad_mean / n
+  dvar_dx = 2 * grad_var * (x - op.outputs[1]) / (n - 1)
+  return (dmean_dx + dvar_dx), None, None, None, None
+
+
 def _FindFusedBatchNorms(graph):
   """Finds all ops and tensors related to found FusedBatchNorms.
 
@@ -178,7 +194,7 @@ def _FindFusedBatchNorms(graph):
   conv_matcher = graph_matcher.GraphMatcher(conv_batch_norm_pattern)
   matmul_matcher = graph_matcher.GraphMatcher(matmul_bn_output_reshape_pattern)
 
-  def _GetCommonTensors(match_result):
+  def _GetCommonTensors(match_result, bn_op, bn_input_tensor):
     """Gets tensors needed for FusedBatchNormMatch from match_result."""
     input_tensor = match_result.get_tensor(input_pattern)
     weight_tensor = match_result.get_tensor(weight_pattern)
@@ -191,8 +207,25 @@ def _FindFusedBatchNorms(graph):
     # respectively; when is_training is false, they point to bn_op's inputs.
     is_training = bn_op.get_attr('is_training')
     if is_training:
+      # FusedBatchNormGrad doesn't compute gradients of the batch_mean and
+      # batch_variance outputs, so we need to substitute our own custom
+      # gradient.
+      # TODO(suharshs, raghuramank): Find a way to avoid needing this hack.
+      # pylint: disable=protected-access
+      bn_op._set_attr(
+          '_gradient_op_type',
+          attr_value_pb2.AttrValue(s=compat.as_bytes('FoldFusedBatchNormGrad')))
+      # pylint: enable=protected-access
       mean_tensor = bn_op.outputs[1]
-      variance_tensor = bn_op.outputs[2]
+      # The batch variance used during forward and backward prop is biased,
+      # i.e it is calculated as: V=sum(x(k)-mu)^2/N. For the moving average
+      # calculation, the variance is corrected by the term N/N-1 (Bessel's
+      # correction). The variance tensor read from FuseBatchNorm has bessel's
+      # correction applied, so we undo it here.
+      n = math_ops.cast(
+          array_ops.size(bn_input_tensor) / array_ops.size(mean_tensor),
+          dtypes.float32)
+      variance_tensor = bn_op.outputs[2] * (n - 1) / n
     else:
       mean_tensor = match_result.get_tensor(mean_pattern)
       variance_tensor = match_result.get_tensor(variance_pattern)
@@ -201,12 +234,13 @@ def _FindFusedBatchNorms(graph):
 
   for match_result in conv_matcher.match_graph(graph):
     layer_op = match_result.get_op(conv_pattern)
+    layer_tensor = match_result.get_tensor(conv_pattern)
     bn_op = match_result.get_op(conv_batch_norm_pattern)
     # In the case of convolution the output_tensor is the output of bn_op.
     output_tensor = bn_op.outputs[0]
 
     (input_tensor, weight_tensor, gamma_tensor, beta_tensor, mean_tensor,
-     variance_tensor) = _GetCommonTensors(match_result)
+     variance_tensor) = _GetCommonTensors(match_result, bn_op, layer_tensor)
     yield _FusedBatchNormMatch(
         layer_op=layer_op,
         bn_op=bn_op,
@@ -220,6 +254,7 @@ def _FindFusedBatchNorms(graph):
 
   for match_result in matmul_matcher.match_graph(graph):
     layer_op = match_result.get_op(matmul_pattern)
+    layer_tensor = match_result.get_tensor(matmul_pattern)
     bn_op = match_result.get_op(matmul_batch_norm_pattern)
     # In the MatMul case, the output of batch norm is reshaped back into a
     # 2D tensor, so the output_tensor is the output of the Reshape op.
@@ -227,7 +262,7 @@ def _FindFusedBatchNorms(graph):
     output_tensor = output_reshape_op.outputs[0]
 
     (input_tensor, weight_tensor, gamma_tensor, beta_tensor, mean_tensor,
-     variance_tensor) = _GetCommonTensors(match_result)
+     variance_tensor) = _GetCommonTensors(match_result, bn_op, layer_tensor)
     yield _FusedBatchNormMatch(
         layer_op=layer_op,
         bn_op=bn_op,
diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms_test.py b/tensorflow/contrib/quantize/python/fold_batch_norms_test.py
index 2cecf6851467f82675bd67bf1fb108e9a39df1b0..ecf321ff573181c7a2e325770a8dde223bf0c021 100644
--- a/tensorflow/contrib/quantize/python/fold_batch_norms_test.py
+++ b/tensorflow/contrib/quantize/python/fold_batch_norms_test.py
@@ -19,14 +19,20 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib.layers.python.layers import layers
+from tensorflow.contrib.quantize.python import copy_graph
 from tensorflow.contrib.quantize.python import fold_batch_norms
+from tensorflow.python.client import session
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import random_seed
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gradients
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import variables
 from tensorflow.python.platform import googletest
 
 batch_norm = layers.batch_norm
@@ -284,16 +290,20 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
 
     folded_mul = g.get_operation_by_name(scope + '/mul_fold')
     self.assertEqual(folded_mul.type, 'Mul')
+    if fused_batch_norm:
+      scale_reshape_op_name = scope + '/BatchNorm_Fold/scale_reshape'
+    else:
+      scale_reshape_op_name = scope + '/scale_reshape'
     self._AssertInputOpsAre(folded_mul,
                             [scope + '/depthwise_weights/read',
-                             scope + '/scale_reshape'])
+                             scale_reshape_op_name])
     self._AssertOutputGoesToOps(folded_mul, g, [scope + '/depthwise_Fold'])
 
-    scale_reshape = g.get_operation_by_name(scope + '/scale_reshape')
+    scale_reshape = g.get_operation_by_name(scale_reshape_op_name)
     self.assertEqual(scale_reshape.type, 'Reshape')
     self._AssertInputOpsAre(scale_reshape, [
         self._BatchNormMultiplierName(scope, has_scaling, fused_batch_norm),
-        scope + '/scale_reshape/shape'
+        scale_reshape_op_name + '/shape'
     ])
     self._AssertOutputGoesToOps(scale_reshape, g, [scope + '/mul_fold'])
 
@@ -315,6 +325,68 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
   def testFoldDepthwiseConv2d(self):
     self._RunTestOverParameters(self._TestFoldDepthwiseConv2d)
 
+  def _TestCompareFoldAndUnfolded(self, relu, relu_op_name, with_bypass,
+                                  has_scaling, fused_batch_norm):
+    """Tests that running folded and unfolded BN returns the same results.
+
+    Args:
+      relu: Callable that returns an Operation, a factory method for the Relu*.
+      relu_op_name: String, name of the Relu* operation.
+      with_bypass: Bool, when true there is an extra connection added from
+        inputs to just before Relu*.
+      has_scaling: Bool, when true the batch norm has scaling.
+      fused_batch_norm: Bool, when true the batch norm is fused.
+    """
+    random_seed.set_random_seed(1234)
+    unfolded_g = ops.Graph()
+    with unfolded_g.as_default():
+      batch_size, height, width = 5, 128, 128
+      inputs = random_ops.random_uniform(
+          (batch_size, height, width, 3), dtype=dtypes.float32, seed=1234)
+      out_depth = 3 if with_bypass else 32
+      stride = 1 if with_bypass else 2
+      activation_fn = None if with_bypass else relu
+      scope = 'test/test2' if with_bypass else 'test'
+      node = conv2d(
+          inputs,
+          out_depth, [5, 5],
+          stride=stride,
+          padding='SAME',
+          weights_initializer=self._WeightInit(0.09),
+          activation_fn=activation_fn,
+          normalizer_fn=batch_norm,
+          normalizer_params=self._BatchNormParams(
+              scale=has_scaling, fused=fused_batch_norm),
+          scope=scope)
+      if with_bypass:
+        node = math_ops.add(inputs, node, name='test/Add')
+      relu_node = relu(node, name='test/' + relu_op_name)
+
+    folded_g = copy_graph.CopyGraph(unfolded_g)
+    with folded_g.as_default():
+      fold_batch_norms.FoldBatchNorms(folded_g)
+
+    with session.Session(graph=unfolded_g) as sess:
+      sess.run(variables.global_variables_initializer())
+      grad_node = gradients.gradients(relu_node, inputs)
+      results = sess.run([relu_node, grad_node])
+      unfolded_forward, unfolded_backward = results[0], results[1]
+
+    with session.Session(graph=folded_g) as sess:
+      sess.run(variables.global_variables_initializer())
+      relu_node = folded_g.get_tensor_by_name(relu_node.name)
+      inputs = folded_g.get_tensor_by_name(inputs.name)
+      grad_node = gradients.gradients(relu_node, inputs)
+      results = sess.run([relu_node, grad_node])
+      folded_forward, folded_backward = results[0], results[1]
+
+    # Check that the folded and unfolded results match.
+    self.assertAllClose(unfolded_forward, folded_forward, atol=1e-3)
+    self.assertAllClose(unfolded_backward, folded_backward, atol=1e-3)
+
+  def testCompareFoldAndUnfolded(self):
+    self._RunTestOverParameters(self._TestCompareFoldAndUnfolded)
+
   def _BatchNormParams(self, scale=True, fused=False):
     return {
         'center': True,
@@ -326,13 +398,13 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
   def _BatchNormMultiplierName(self, scope, has_scaling, fused):
     if has_scaling:
       if fused:
-        return scope + '/mul'
+        return scope + '/BatchNorm_Fold/mul'
       return scope + '/BatchNorm/batchnorm/mul'
     return scope + '/BatchNorm/batchnorm/Rsqrt'
 
   def _BathNormBiasName(self, scope, fused):
     if fused:
-      return scope + '/bias'
+      return scope + '/BatchNorm_Fold/bias'
     return scope + '/BatchNorm/batchnorm/sub'
 
   def _WeightInit(self, stddev):
@@ -346,7 +418,7 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
     Returns:
       An initializer that initializes with a truncated normal variable.
     """
-    return init_ops.truncated_normal_initializer(stddev=stddev)
+    return init_ops.truncated_normal_initializer(stddev=stddev, seed=1234)
 
   def _AssertInputOpsAre(self, op, in_op_names):
     """Asserts that all inputs to op come from in_op_names (disregarding order).
diff --git a/tensorflow/contrib/receptive_field/BUILD b/tensorflow/contrib/receptive_field/BUILD
index d16b2908a0285e04ef5d3ede2050bf24c508228d..e975aeaea7ee78f8e912be8ab1be61b9acc7b418 100644
--- a/tensorflow/contrib/receptive_field/BUILD
+++ b/tensorflow/contrib/receptive_field/BUILD
@@ -15,7 +15,6 @@ load("//tensorflow:tensorflow.bzl", "py_test")
 py_library(
     name = "receptive_field_pip",
     deps = [
-        ":graph_compute_order_py",
         ":receptive_field_py",
     ],
 )
@@ -23,28 +22,75 @@ py_library(
 py_library(
     name = "graph_compute_order_py",
     srcs = [
-        "__init__.py",
         "python/util/graph_compute_order.py",
     ],
     srcs_version = "PY2AND3",
+    deps = [
+        ":parse_layer_parameters_py",
+        "//tensorflow/python:platform",
+    ],
+)
+
+py_library(
+    name = "parse_layer_parameters_py",
+    srcs = [
+        "python/util/parse_layer_parameters.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/contrib/util:util_py",
+        "//tensorflow/python:platform",
+    ],
 )
 
 py_library(
     name = "receptive_field_py",
     srcs = [
-        "__init__.py",
+        "python/util/parse_layer_parameters.py",
         "python/util/receptive_field.py",
+        "receptive_field_api.py",
     ],
     srcs_version = "PY2AND3",
     deps = [
         ":graph_compute_order_py",
-        "//tensorflow/contrib/util:util_py",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:platform",
         "//third_party/py/numpy",
     ],
 )
 
+py_test(
+    name = "graph_compute_order_test",
+    srcs = ["python/util/graph_compute_order_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":graph_compute_order_py",
+        ":receptive_field_py",
+        "//tensorflow/contrib/slim",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:nn",
+    ],
+)
+
+py_test(
+    name = "parse_layer_parameters_test",
+    srcs = ["python/util/parse_layer_parameters_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":graph_compute_order_py",
+        ":parse_layer_parameters_py",
+        "//tensorflow/contrib/slim",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:nn",
+    ],
+)
+
 py_test(
     name = "receptive_field_test",
     srcs = ["python/util/receptive_field_test.py"],
diff --git a/tensorflow/contrib/receptive_field/README.md b/tensorflow/contrib/receptive_field/README.md
index dfe53cdf14299b055fa4cdc13008d88234e93aa8..3ff85faf611afad71b6e6203453bbe97c56f9242 100644
--- a/tensorflow/contrib/receptive_field/README.md
+++ b/tensorflow/contrib/receptive_field/README.md
@@ -17,7 +17,6 @@ For example, if your model is constructed using the function
 
 ```python
 import tensorflow as tf
-from tensorflow.contrib import receptive_field
 
 # Construct graph.
 g = tf.Graph()
@@ -27,7 +26,7 @@ with g.as_default():
 
 # Compute receptive field parameters.
 rf_x, rf_y, eff_stride_x, eff_stride_y, eff_pad_x, eff_pad_y = \
-  receptive_field.compute_receptive_field_from_graph_def( \
+  tf.contrib.receptive_field.compute_receptive_field_from_graph_def( \
     g.as_graph_def(), 'input_image', 'my_output_endpoint')
 ```
 
@@ -47,7 +46,6 @@ You can then compute the receptive field parameters for Inception-Resnet-v2 as:
 ```python
 from nets import inception
 import tensorflow as tf
-from tensorflow.contrib import receptive_field
 
 # Construct graph.
 g = tf.Graph()
@@ -57,7 +55,7 @@ with g.as_default():
 
 # Compute receptive field parameters.
 rf_x, rf_y, eff_stride_x, eff_stride_y, eff_pad_x, eff_pad_y = \
-  receptive_field.compute_receptive_field_from_graph_def( \
+  tf.contrib.receptive_field.compute_receptive_field_from_graph_def( \
     g.as_graph_def(), 'input_image', 'InceptionResnetV2/Conv2d_7b_1x1/Relu')
 ```
 
diff --git a/tensorflow/contrib/receptive_field/python/util/examples/compute_rf.py b/tensorflow/contrib/receptive_field/python/util/examples/compute_rf.py
index 1cf978b90a3661a075130790d82a499da4d8a0cc..d6fdd12bbe37fb0e0cb12f1d0adc3fce29b19e8a 100644
--- a/tensorflow/contrib/receptive_field/python/util/examples/compute_rf.py
+++ b/tensorflow/contrib/receptive_field/python/util/examples/compute_rf.py
@@ -26,7 +26,7 @@ import sys
 
 from google.protobuf import text_format
 
-from tensorflow.contrib import receptive_field
+from tensorflow.contrib.receptive_field import receptive_field_api as receptive_field
 from tensorflow.core.framework import graph_pb2
 from tensorflow.python.platform import app
 from tensorflow.python.platform import gfile
diff --git a/tensorflow/contrib/receptive_field/python/util/examples/rf_benchmark.py b/tensorflow/contrib/receptive_field/python/util/examples/rf_benchmark.py
index 94228dfa61b1de617f131611173fda7c3917d250..a298b4d49038468299b58140758c69675368e855 100644
--- a/tensorflow/contrib/receptive_field/python/util/examples/rf_benchmark.py
+++ b/tensorflow/contrib/receptive_field/python/util/examples/rf_benchmark.py
@@ -28,19 +28,19 @@ import argparse
 import csv
 import sys
 
-from nets import alexnet
-from nets import inception
-from nets import mobilenet_v1
-from nets import resnet_v1
-from nets import resnet_v2
-from nets import vgg
 from tensorflow.contrib import framework
-from tensorflow.contrib import receptive_field
 from tensorflow.contrib import slim
+from tensorflow.contrib.receptive_field import receptive_field_api as receptive_field
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import app
+from nets import alexnet
+from nets import inception
+from nets import mobilenet_v1
+from nets import resnet_v1
+from nets import resnet_v2
+from nets import vgg
 
 cmd_args = None
 
@@ -245,7 +245,8 @@ def _model_rf(graphdef,
               end_points,
               desired_end_point_keys,
               model_type='resnet_v1_50',
-              csv_writer=None):
+              csv_writer=None,
+              input_resolution=None):
   """Computes receptive field information for a given CNN model.
 
   The information will be printed to stdout. If the RF parameters are the same
@@ -261,45 +262,93 @@ def _model_rf(graphdef,
       information will be computed.
     model_type: Type of model to be used, used only for printing purposes.
     csv_writer: A CSV writer for RF parameters, which is used if it is not None.
+    input_resolution: Input resolution to use when computing RF
+      parameters. This is important for the case where padding can only be
+      defined if the input resolution is known, which may happen if using SAME
+      padding. This is assumed the resolution for both height and width. If
+      None, we consider the resolution is unknown.
   """
   for desired_end_point_key in desired_end_point_keys:
     print('- %s:' % desired_end_point_key)
     output_node_with_colon = end_points[desired_end_point_key].name
     pos = output_node_with_colon.rfind(':')
     output_node = output_node_with_colon[:pos]
-    (receptive_field_x, receptive_field_y, effective_stride_x,
-     effective_stride_y, effective_padding_x, effective_padding_y
-    ) = receptive_field.compute_receptive_field_from_graph_def(
-        graphdef, _INPUT_NODE, output_node)
-    # If values are the same in horizontal/vertical directions, just report one
-    # of them. Otherwise, report both.
-    if (receptive_field_x == receptive_field_y) and (
-        effective_stride_x == effective_stride_y) and (
-            effective_padding_x == effective_padding_y):
-      print('Receptive field size = %5s, effective stride = %5s, effective '
-            'padding = %5s' % (str(receptive_field_x), str(effective_stride_x),
-                               str(effective_padding_x)))
-    else:
-      print('Receptive field size: horizontal = %5s, vertical = %5s. '
-            'Effective stride: horizontal = %5s, vertical = %5s. Effective '
-            'padding: horizontal = %5s, vertical = %5s' %
-            (str(receptive_field_x), str(receptive_field_y),
-             str(effective_stride_x), str(effective_stride_y),
-             str(effective_padding_x), str(effective_padding_y)))
-    if csv_writer is not None:
-      csv_writer.writerow({
-          'CNN': model_type,
-          'end_point': desired_end_point_key,
-          'RF size hor': str(receptive_field_x),
-          'RF size ver': str(receptive_field_y),
-          'effective stride hor': str(effective_stride_x),
-          'effective stride ver': str(effective_stride_y),
-          'effective padding hor': str(effective_padding_x),
-          'effective padding ver': str(effective_padding_y)
-      })
-
-
-def _process_model_rf(model_type='resnet_v1_50', csv_writer=None, arg_sc=None):
+    try:
+      (receptive_field_x, receptive_field_y, effective_stride_x,
+       effective_stride_y, effective_padding_x, effective_padding_y
+      ) = receptive_field.compute_receptive_field_from_graph_def(
+          graphdef, _INPUT_NODE, output_node, input_resolution=input_resolution)
+      # If values are the same in horizontal/vertical directions, just report
+      # one of them. Otherwise, report both.
+      if (receptive_field_x == receptive_field_y) and (
+          effective_stride_x == effective_stride_y) and (
+              effective_padding_x == effective_padding_y):
+        print('Receptive field size = %5s, effective stride = %5s, effective '
+              'padding = %5s' % (str(receptive_field_x),
+                                 str(effective_stride_x),
+                                 str(effective_padding_x)))
+      else:
+        print('Receptive field size: horizontal = %5s, vertical = %5s. '
+              'Effective stride: horizontal = %5s, vertical = %5s. Effective '
+              'padding: horizontal = %5s, vertical = %5s' %
+              (str(receptive_field_x), str(receptive_field_y),
+               str(effective_stride_x), str(effective_stride_y),
+               str(effective_padding_x), str(effective_padding_y)))
+      if csv_writer is not None:
+        csv_writer.writerow({
+            'CNN':
+                model_type,
+            'input resolution':
+                str(input_resolution[0])
+                if input_resolution is not None else 'None',
+            'end_point':
+                desired_end_point_key,
+            'RF size hor':
+                str(receptive_field_x),
+            'RF size ver':
+                str(receptive_field_y),
+            'effective stride hor':
+                str(effective_stride_x),
+            'effective stride ver':
+                str(effective_stride_y),
+            'effective padding hor':
+                str(effective_padding_x),
+            'effective padding ver':
+                str(effective_padding_y)
+        })
+    except ValueError as e:
+      print('---->ERROR: Computing RF parameters for model %s with final end '
+            'point %s and input resolution %s did not work' %
+            (model_type, desired_end_point_key, input_resolution))
+      print('---->The returned error is: %s' % e)
+      if csv_writer is not None:
+        csv_writer.writerow({
+            'CNN':
+                model_type,
+            'input resolution':
+                str(input_resolution[0])
+                if input_resolution is not None else 'None',
+            'end_point':
+                desired_end_point_key,
+            'RF size hor':
+                'None',
+            'RF size ver':
+                'None',
+            'effective stride hor':
+                'None',
+            'effective stride ver':
+                'None',
+            'effective padding hor':
+                'None',
+            'effective padding ver':
+                'None'
+        })
+
+
+def _process_model_rf(model_type='resnet_v1_50',
+                      csv_writer=None,
+                      arg_sc=None,
+                      input_resolutions=None):
   """Contructs model graph and desired end-points, and compute RF.
 
   The computed RF parameters are printed to stdout by the _model_rf function.
@@ -308,13 +357,30 @@ def _process_model_rf(model_type='resnet_v1_50', csv_writer=None, arg_sc=None):
     model_type: Type of model to be used.
     csv_writer: A CSV writer for RF parameters, which is used if it is not None.
     arg_sc: Optional arg scope to use in constructing the graph.
+    input_resolutions: List of 1D input resolutions to use when computing RF
+      parameters. This is important for the case where padding can only be
+      defined if the input resolution is known, which may happen if using SAME
+      padding. The entries in the list are assumed the resolution for both
+      height and width. If one of the elements in the list is None, we consider
+      it to mean that the resolution is unknown. If the list itself is None,
+      we use the default list [None, 224, 321].
 
   """
-  print('********************%s' % model_type)
-  graphdef, end_points = _model_graph_def(model_type, arg_sc)
-  desired_end_point_keys = _get_desired_end_point_keys(model_type)
-  _model_rf(graphdef, end_points, desired_end_point_keys, model_type,
-            csv_writer)
+  # Process default value for this list.
+  if input_resolutions is None:
+    input_resolutions = [None, 224, 321]
+
+  for n in input_resolutions:
+    print('********************%s, input resolution = %s' % (model_type, n))
+    graphdef, end_points = _model_graph_def(model_type, arg_sc)
+    desired_end_point_keys = _get_desired_end_point_keys(model_type)
+    _model_rf(
+        graphdef,
+        end_points,
+        desired_end_point_keys,
+        model_type,
+        csv_writer,
+        input_resolution=[n, n] if n is not None else None)
 
 
 def _resnet_rf(csv_writer=None):
@@ -421,7 +487,7 @@ def main(unused_argv):
   if cmd_args.csv_path:
     csv_file = open(cmd_args.csv_path, 'w')
     field_names = [
-        'CNN', 'end_point', 'RF size hor', 'RF size ver',
+        'CNN', 'input resolution', 'end_point', 'RF size hor', 'RF size ver',
         'effective stride hor', 'effective stride ver', 'effective padding hor',
         'effective padding ver'
     ]
diff --git a/tensorflow/contrib/receptive_field/python/util/examples/write_inception_resnet_v2_graph.py b/tensorflow/contrib/receptive_field/python/util/examples/write_inception_resnet_v2_graph.py
index 793ae163d807fdda62c2025cb8176b96832cb61a..a494883396614bcee04886b5c05d0393df958580 100644
--- a/tensorflow/contrib/receptive_field/python/util/examples/write_inception_resnet_v2_graph.py
+++ b/tensorflow/contrib/receptive_field/python/util/examples/write_inception_resnet_v2_graph.py
@@ -22,12 +22,12 @@ from __future__ import print_function
 import argparse
 import sys
 
-from nets import inception
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import graph_io
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import app
+from nets import inception
 
 cmd_args = None
 
diff --git a/tensorflow/contrib/receptive_field/python/util/graph_compute_order.py b/tensorflow/contrib/receptive_field/python/util/graph_compute_order.py
index 8af4be16d6c17286287713a1fb6f5017355e3b32..b2360fec6ca2afd23233041cdd0d3fcadb4a460b 100644
--- a/tensorflow/contrib/receptive_field/python/util/graph_compute_order.py
+++ b/tensorflow/contrib/receptive_field/python/util/graph_compute_order.py
@@ -20,69 +20,173 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+import math
+from tensorflow.contrib.receptive_field.python.util import parse_layer_parameters
+from tensorflow.python.platform import tf_logging as logging
 
 
-class GraphDefHelper(object):
-  """Helper class to collect node names and definitions.
+def parse_graph_nodes(graph_def):
+  """Helper function to parse GraphDef's nodes.
 
-  Example:
-    b = GraphDefHelper(graph_def)
-    # Prints node that produces given output.
-    print b.output_of['conv/foo/bar']
+  It returns a dict mapping from node name to NodeDef.
+
+  Args:
+    graph_def: A GraphDef object.
+
+  Returns:
+    name_to_node: Dict keyed by node name, each entry containing the node's
+      NodeDef.
   """
+  name_to_node = {}
+  for node_def in graph_def.node:
+    name_to_node[node_def.name] = node_def
+  return name_to_node
 
-  def __init__(self, gd):
-    self.output_of = {}
-    for each in gd.node:
-      self.output_of[each.name] = each
 
+# Named tuple used to collect information from each node in a computation graph.
+_node_info = collections.namedtuple(
+    'NodeInfo', field_names=['order', 'node', 'input_size', 'output_size'])
 
-# pylint: disable=invalid-name
-_NodeEntry = collections.namedtuple('NodeEntry', field_names=['order', 'node'])
 
+def _compute_output_resolution(input_spatial_resolution, kernel_size, stride,
+                               total_padding):
+  """Computes output resolution, given input resolution and layer parameters.
 
-def _get_computed_nodes(g, output, seen):
-  """Traverses the graph in topological order.
+  Note that this computation is done only over one dimension (eg, x or y).
+  If any of the inputs is None, returns None.
+
+  Args:
+    input_spatial_resolution: Input spatial resolution (int).
+    kernel_size: Kernel size (int).
+    stride: Stride (int).
+    total_padding: Total padding to be applied (int).
+  Returns:
+    output_resolution: Ouput dimension (int) or None.
+  """
+  if (input_spatial_resolution is None) or (kernel_size is None) or (
+      stride is None) or (total_padding is None):
+    return None
+  return int(
+      math.ceil((
+          input_spatial_resolution + total_padding - kernel_size + 1) / stride))
+
+
+def _get_computed_nodes(name_to_node,
+                        current,
+                        node_info,
+                        input_node_name='',
+                        input_node_size=None):
+  """Traverses the graph recursively to compute its topological order.
+
+  Optionally, the function may also compute the input and output feature map
+  resolutions at each node. In this case, input_node_name and input_node_size
+  must be set. Note that if a node's op type is unknown, the input and output
+  resolutions are ignored and set to None.
 
   Args:
-    g: GraphDefHelper object.
-    output: current node.
-    seen: map of nodes we've already traversed.
+    name_to_node: Dict keyed by node name, each entry containing the node's
+      NodeDef.
+    current: Current node name.
+    node_info: Map of nodes we've already traversed, containing their _node_info
+      information.
+    input_node_name: Name of node with fixed input resolution (optional).
+    input_node_size: Fixed input resolution to use (optional).
   Returns:
-    order in topological sort for 'output'.
+    order: Order in topological sort for 'current'.
+    input_size: Tensor spatial resolution at input of current node.
+    output_size: Tensor spatial resolution at output of current node.
   """
-  if output in seen:
-    return seen[output].order
-  node_def = g.output_of.get(output, None)
-  if node_def is None:
-    seen[output] = _NodeEntry(0, None)
-    return 0
-
-  r = 0
+  if current in node_info:
+    return (node_info[current].order, node_info[current].input_size,
+            node_info[current].output_size)
+
+  node_def = name_to_node[current]
+
+  if current == input_node_name:
+    order = 0
+    input_size = None
+    output_size = input_node_size
+    node_info[current] = _node_info(order, node_def, input_size, output_size)
+    return (order, input_size, output_size)
+
+  input_size = None
+  output_size = None
+
+  order = 0
+  number_inputs = 0
   for each in node_def.input:
     # Parses name of input node.
     if each.startswith('^'):
-      each = each[1:]
+      # The character '^' denotes a control dependency, so this input node can
+      # be safely ignored.
+      continue
     each = each.split(':')[0]
     # Recursively computes ordering.
-    new_v = _get_computed_nodes(g, each, seen)
-    r = max(r, new_v + 1)
-
-  seen[output] = _NodeEntry(r, node_def)
-
-  return seen[output].order
-
-
-def get_compute_order(graph_def):
-  """Computes order of computation for a given graph.
+    (parent_order, _, parent_output_size) = _get_computed_nodes(
+        name_to_node, each, node_info, input_node_name, input_node_size)
+    order = max(order, parent_order + 1)
+    if number_inputs == 0:
+      # For all the types of nodes we consider, the first input corresponds to
+      # the feature map.
+      input_size = parent_output_size
+    number_inputs += 1
+
+  # Figure out output size for this layer.
+  logging.vlog(3, 'input_size = %s', input_size)
+  if input_size is None:
+    output_size = None
+  else:
+    (kernel_size_x, kernel_size_y, stride_x, stride_y, _, _, total_padding_x,
+     total_padding_y) = (
+         parse_layer_parameters.get_layer_params(
+             node_def, name_to_node, input_size, force=True))
+    logging.vlog(3, 'kernel_size_x = %s, kernel_size_y = %s, '
+                 'stride_x = %s, stride_y = %s, '
+                 'total_padding_x = %s, total_padding_y = %s' %
+                 (kernel_size_x, kernel_size_y, stride_x, stride_y,
+                  total_padding_x, total_padding_y))
+    output_size = [None] * 2
+    output_size[0] = _compute_output_resolution(input_size[0], kernel_size_x,
+                                                stride_x, total_padding_x)
+    output_size[1] = _compute_output_resolution(input_size[1], kernel_size_y,
+                                                stride_y, total_padding_y)
+
+  logging.vlog(3, 'output_size = %s', output_size)
+  node_info[current] = _node_info(order, node_def, input_size, output_size)
+
+  return order, input_size, output_size
+
+
+def get_compute_order(graph_def, input_node_name='', input_node_size=None):
+  """Computes order of computation for a given CNN graph.
+
+  Optionally, the function may also compute the input and output feature map
+  resolutions at each node. In this case, input_node_name and input_node_size
+  must be set. Note that if a node's op type is unknown, the input and output
+  resolutions are ignored and set to None.
 
   Args:
     graph_def: GraphDef object.
+    input_node_name: Name of node with fixed input resolution (optional). This
+      is usually the node name for the input image in a CNN.
+    input_node_size: 2D list of integers, fixed input resolution to use
+      (optional). This is usually the input resolution used for the input image
+      in a CNN (common examples are: [224, 224], [299, 299], [321, 321]).
   Returns:
-    map: name -> {order, node}
+    node_info: Default dict keyed by node name, mapping to a named tuple with
+      the following fields:
+      - order: Integer denoting topological order;
+      - node: NodeDef for the given node;
+      - input_size: 2D list of integers, denoting the input spatial resolution
+        to the node;
+      - output_size: 2D list of integers, denoting the output spatial resolution
+        of the node.
+    name_to_node: Dict keyed by node name, each entry containing the node's
+      NodeDef.
   """
-  helper = GraphDefHelper(graph_def)
-  seen = collections.defaultdict(_NodeEntry)
+  name_to_node = parse_graph_nodes(graph_def)
+  node_info = collections.defaultdict(_node_info)
   for each in graph_def.node:
-    _get_computed_nodes(helper, each.name, seen)
-  return seen
+    _get_computed_nodes(name_to_node, each.name, node_info, input_node_name,
+                        input_node_size)
+  return node_info, name_to_node
diff --git a/tensorflow/contrib/receptive_field/python/util/graph_compute_order_test.py b/tensorflow/contrib/receptive_field/python/util/graph_compute_order_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..94c992ad2155d9c80cceee260a90c478a54371fb
--- /dev/null
+++ b/tensorflow/contrib/receptive_field/python/util/graph_compute_order_test.py
@@ -0,0 +1,152 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for graph_compute_order module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib import slim
+from tensorflow.contrib.receptive_field import receptive_field_api as receptive_field
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_math_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.platform import test
+
+
+def create_test_network():
+  """Convolutional neural network for test.
+
+  Returns:
+    g: Tensorflow graph object (Graph proto).
+  """
+  g = ops.Graph()
+  with g.as_default():
+    # An input test image with unknown spatial resolution.
+    x = array_ops.placeholder(
+        dtypes.float32, (None, None, None, 1), name='input_image')
+    # Left branch before first addition.
+    l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID')
+    # Right branch before first addition.
+    l2_pad = array_ops.pad(x, [[0, 0], [1, 0], [1, 0], [0, 0]], name='L2_pad')
+    l2 = slim.conv2d(l2_pad, 1, [3, 3], stride=2, scope='L2', padding='VALID')
+    l3 = slim.max_pool2d(l2, [3, 3], stride=2, scope='L3', padding='SAME')
+    # First addition.
+    l4 = nn.relu(l1 + l3, name='L4_relu')
+    # Left branch after first addition.
+    l5 = slim.conv2d(l4, 1, [1, 1], stride=2, scope='L5', padding='SAME')
+    # Right branch after first addition.
+    l6 = slim.conv2d(l4, 1, [3, 3], stride=2, scope='L6', padding='SAME')
+    # Final addition.
+    gen_math_ops.add(l5, l6, name='L7_add')
+
+  return g
+
+
+class GraphComputeOrderTest(test.TestCase):
+
+  def check_topological_sort_and_sizes(self,
+                                       node_info,
+                                       expected_input_sizes=None,
+                                       expected_output_sizes=None):
+    """Helper function to check topological sorting and sizes are correct.
+
+    The arguments expected_input_sizes and expected_output_sizes are used to
+    check that the sizes are correct, if they are given.
+
+    Args:
+      node_info: Default dict keyed by node name, mapping to a named tuple with
+        the following keys: {order, node, input_size, output_size}.
+      expected_input_sizes: Dict mapping node names to expected input sizes
+        (optional).
+      expected_output_sizes: Dict mapping node names to expected output sizes
+        (optional).
+    """
+    # Loop over nodes in sorted order, collecting those that were already seen.
+    # These will be used to make sure that the graph is topologically sorted.
+    # At the same time, we construct dicts from node name to input/output size,
+    # which will be used to check those.
+    already_seen_nodes = []
+    input_sizes = {}
+    output_sizes = {}
+    for _, (_, node, input_size, output_size) in sorted(
+        node_info.items(), key=lambda x: x[1].order):
+      for inp_name in node.input:
+        # Since the graph is topologically sorted, the inputs to the current
+        # node must have been seen beforehand.
+        self.assertIn(inp_name, already_seen_nodes)
+      input_sizes[node.name] = input_size
+      output_sizes[node.name] = output_size
+      already_seen_nodes.append(node.name)
+
+    # Check input sizes, if desired.
+    if expected_input_sizes is not None:
+      for k, v in expected_input_sizes.items():
+        self.assertIn(k, input_sizes)
+        self.assertEqual(input_sizes[k], v)
+
+    # Check output sizes, if desired.
+    if expected_output_sizes is not None:
+      for k, v in expected_output_sizes.items():
+        self.assertIn(k, output_sizes)
+        self.assertEqual(output_sizes[k], v)
+
+  def testGraphOrderIsCorrect(self):
+    """Tests that the order and sizes of create_test_network() are correct."""
+
+    graph_def = create_test_network().as_graph_def()
+
+    # Case 1: Input node name/size are not given.
+    node_info, _ = receptive_field.get_compute_order(graph_def)
+    self.check_topological_sort_and_sizes(node_info)
+
+    # Case 2: Input node name is given, but not size.
+    node_info, _ = receptive_field.get_compute_order(
+        graph_def, input_node_name='input_image')
+    self.check_topological_sort_and_sizes(node_info)
+
+    # Case 3: Input node name and size (224) are given.
+    node_info, _ = receptive_field.get_compute_order(
+        graph_def, input_node_name='input_image', input_node_size=[224, 224])
+    expected_input_sizes = {
+        'input_image': None,
+        'L1/Conv2D': [224, 224],
+        'L2_pad': [224, 224],
+        'L2/Conv2D': [225, 225],
+        'L3/MaxPool': [112, 112],
+        'L4_relu': [56, 56],
+        'L5/Conv2D': [56, 56],
+        'L6/Conv2D': [56, 56],
+        'L7_add': [28, 28],
+    }
+    expected_output_sizes = {
+        'input_image': [224, 224],
+        'L1/Conv2D': [56, 56],
+        'L2_pad': [225, 225],
+        'L2/Conv2D': [112, 112],
+        'L3/MaxPool': [56, 56],
+        'L4_relu': [56, 56],
+        'L5/Conv2D': [28, 28],
+        'L6/Conv2D': [28, 28],
+        'L7_add': [28, 28],
+    }
+    self.check_topological_sort_and_sizes(node_info, expected_input_sizes,
+                                          expected_output_sizes)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/receptive_field/python/util/parse_layer_parameters.py b/tensorflow/contrib/receptive_field/python/util/parse_layer_parameters.py
new file mode 100644
index 0000000000000000000000000000000000000000..44998b3b6591221fde55d8d2d406d5141b1647f2
--- /dev/null
+++ b/tensorflow/contrib/receptive_field/python/util/parse_layer_parameters.py
@@ -0,0 +1,297 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functions to parse RF-related parameters from TF layers."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+from tensorflow.contrib.util import make_ndarray
+from tensorflow.python.platform import tf_logging as logging
+
+# White-listed layer operations, which do not affect the receptive field
+# computation.
+_UNCHANGED_RF_LAYER_OPS = [
+    "Add", "BiasAdd", "Cast", "Ceil", "ConcatV2", "Const", "Floor",
+    "FusedBatchNorm", "Identity", "Log", "Mul", "Pow", "RealDiv", "Relu",
+    "Relu6", "Round", "Rsqrt", "Softplus", "Sub", "VariableV2"
+]
+
+# Different ways in which padding modes may be spelled.
+_VALID_PADDING = ["VALID", b"VALID"]
+_SAME_PADDING = ["SAME", b"SAME"]
+
+
+def _stride_size(node):
+  """Computes stride size given a TF node.
+
+  Args:
+    node: Tensorflow node (NodeDef proto).
+
+  Returns:
+    stride_x: Stride size for horizontal direction (integer).
+    stride_y: Stride size for vertical direction (integer).
+  """
+  strides_attr = node.attr["strides"]
+  logging.vlog(4, "strides_attr = %s", strides_attr)
+  stride_y = strides_attr.list.i[1]
+  stride_x = strides_attr.list.i[2]
+  return stride_x, stride_y
+
+
+def _conv_kernel_size(node, name_to_node):
+  """Computes kernel size given a TF convolution or pooling node.
+
+  Args:
+    node: Tensorflow node (NodeDef proto).
+    name_to_node: Dict keyed by node name, each entry containing the node's
+      NodeDef.
+
+  Returns:
+    kernel_size_x: Kernel size for horizontal direction (integer).
+    kernel_size_y: Kernel size for vertical direction (integer).
+
+  Raises:
+    ValueError: If the weight layer node is invalid.
+  """
+  weights_layer_read_name = node.input[1]
+  if not weights_layer_read_name.endswith("/read"):
+    raise ValueError(
+        "Weight layer's name input to conv layer does not end with '/read'")
+  weights_layer_param_name = weights_layer_read_name[:-5]
+  weights_node = name_to_node[weights_layer_param_name]
+  if weights_node.op != "VariableV2":
+    raise ValueError("Weight layer is not of type VariableV2")
+  shape = weights_node.attr["shape"]
+  logging.vlog(4, "weight shape = %s", shape)
+  kernel_size_y = shape.shape.dim[0].size
+  kernel_size_x = shape.shape.dim[1].size
+  return kernel_size_x, kernel_size_y
+
+
+def _padding_size_conv_pool(node, kernel_size, stride, input_resolution=None):
+  """Computes padding size given a TF convolution or pooling node.
+
+  Args:
+    node: Tensorflow node (NodeDef proto).
+    kernel_size: Kernel size of node (integer).
+    stride: Stride size of node (integer).
+    input_resolution: Input resolution to assume, if not None (integer).
+
+  Returns:
+    total_padding: Total padding size (integer).
+    padding: Padding size, applied to the left or top (integer).
+
+  Raises:
+    ValueError: If padding is invalid.
+  """
+  # In this case, we need to carefully consider the different TF padding modes.
+  # The padding depends on kernel size, and may depend on input size. If it
+  # depends on input size and input_resolution is None, we raise an exception.
+  padding_attr = node.attr["padding"]
+  logging.vlog(4, "padding_attr = %s", padding_attr)
+  if padding_attr.s in _VALID_PADDING:
+    total_padding = 0
+    padding = 0
+  elif padding_attr.s in _SAME_PADDING:
+    if input_resolution is None:
+      # In this case, we do not know the input resolution, so we can only know
+      # the padding in some special cases.
+      if kernel_size == 1:
+        total_padding = 0
+        padding = 0
+      elif stride == 1:
+        total_padding = kernel_size - 1
+        padding = int(math.floor(float(total_padding) / 2))
+      elif stride == 2 and kernel_size % 2 == 0:
+        # In this case, we can be sure of the left/top padding, but not of the
+        # total padding.
+        total_padding = None
+        padding = int(math.floor((float(kernel_size) - 1) / 2))
+      else:
+        total_padding = None
+        padding = None
+        logging.warning(
+            "Padding depends on input size, which means that the effective "
+            "padding may be different depending on the input image "
+            "dimensionality. In this case, alignment check will be skipped. If"
+            " you know the input resolution, please set it.")
+    else:
+      # First, compute total_padding based on documentation.
+      if input_resolution % stride == 0:
+        total_padding = int(max(float(kernel_size - stride), 0.0))
+      else:
+        total_padding = int(
+            max(float(kernel_size - (input_resolution % stride)), 0.0))
+      # Then, compute left/top padding.
+      padding = int(math.floor(float(total_padding) / 2))
+
+  else:
+    raise ValueError("Invalid padding operation %s" % padding_attr.s)
+  return total_padding, padding
+
+
+def _pool_kernel_size(node):
+  """Computes kernel size given a TF pooling node.
+
+  Args:
+    node: Tensorflow node (NodeDef proto).
+
+  Returns:
+    kernel_size_x: Kernel size for horizontal direction (integer).
+    kernel_size_y: Kernel size for vertical direction (integer).
+
+  Raises:
+    ValueError: If pooling is invalid.
+  """
+  ksize = node.attr["ksize"]
+  kernel_size_y = ksize.list.i[1]
+  kernel_size_x = ksize.list.i[2]
+  if ksize.list.i[0] != 1:
+    raise ValueError("pool ksize for first dim is not 1")
+  if ksize.list.i[3] != 1:
+    raise ValueError("pool ksize for last dim is not 1")
+  return kernel_size_x, kernel_size_y
+
+
+def _padding_size_pad_layer(node, name_to_node):
+  """Computes padding size given a TF padding node.
+
+  Args:
+    node: Tensorflow node (NodeDef proto).
+    name_to_node: Dict keyed by node name, each entry containing the node's
+      NodeDef.
+
+  Returns:
+    total_padding_x: Total padding size for horizontal direction (integer).
+    padding_x: Padding size for horizontal direction, left side (integer).
+    total_padding_y: Total padding size for vertical direction (integer).
+    padding_y: Padding size for vertical direction, top side (integer).
+
+  Raises:
+    ValueError: If padding layer is invalid.
+  """
+  paddings_layer_name = node.input[1]
+  if not paddings_layer_name.endswith("/paddings"):
+    raise ValueError("Padding layer name does not end with '/paddings'")
+  paddings_node = name_to_node[paddings_layer_name]
+  if paddings_node.op != "Const":
+    raise ValueError("Padding op is not Const")
+  value = paddings_node.attr["value"]
+  t = make_ndarray(value.tensor)
+  padding_y = t[1][0]
+  padding_x = t[2][0]
+  total_padding_y = padding_y + t[1][1]
+  total_padding_x = padding_x + t[2][1]
+  if (t[0][0] != 0) or (t[0][1] != 0):
+    raise ValueError("padding is not zero for first tensor dim")
+  if (t[3][0] != 0) or (t[3][1] != 0):
+    raise ValueError("padding is not zero for last tensor dim")
+  return total_padding_x, padding_x, total_padding_y, padding_y
+
+
+def get_layer_params(node, name_to_node, input_resolution=None, force=False):
+  """Gets layer parameters relevant for RF computation.
+
+  Currently, only these nodes are supported:
+  - Conv2D
+  - DepthwiseConv2dNative
+  - Pad
+  - MaxPool
+  - AvgPool
+  - all nodes listed in _UNCHANGED_RF_LAYER_OPS
+
+  Args:
+    node: Tensorflow node (NodeDef proto).
+    name_to_node: Dict keyed by node name, each entry containing the node's
+      NodeDef.
+    input_resolution: List with 2 dimensions, denoting the height/width of the
+      input feature map to this layer. If set to None, then the padding may be
+      undefined (in tensorflow, SAME padding depends on input spatial
+      resolution).
+    force: If True, the function does not raise a ValueError if the layer op is
+      unknown. Instead, in this case it sets each of the returned parameters to
+      None.
+
+  Returns:
+    kernel_size_x: Kernel size for horizontal direction (integer).
+    kernel_size_y: Kernel size for vertical direction (integer).
+    stride_x: Stride size for horizontal direction (integer).
+    stride_y: Stride size for vertical direction (integer).
+    padding_x: Padding size for horizontal direction, left side (integer).
+    padding_y: Padding size for vertical direction, top side (integer).
+    total_padding_x: Total padding size for horizontal direction (integer).
+    total_padding_y: Total padding size for vertical direction (integer).
+
+  Raises:
+    ValueError: If layer op is unknown and force is False.
+  """
+  logging.vlog(3, "node.name = %s", node.name)
+  logging.vlog(3, "node.op = %s", node.op)
+  logging.vlog(4, "node = %s", node)
+  if node.op == "Conv2D" or node.op == "DepthwiseConv2dNative":
+    stride_x, stride_y = _stride_size(node)
+    kernel_size_x, kernel_size_y = _conv_kernel_size(node, name_to_node)
+    # Compute the padding for this node separately for each direction.
+    total_padding_x, padding_x = _padding_size_conv_pool(
+        node, kernel_size_x, stride_x, input_resolution[1]
+        if input_resolution is not None else None)
+    total_padding_y, padding_y = _padding_size_conv_pool(
+        node, kernel_size_y, stride_y, input_resolution[0]
+        if input_resolution is not None else None)
+  elif node.op == "Pad":
+    # Kernel and stride are simply 1 in this case.
+    kernel_size_x = 1
+    kernel_size_y = 1
+    stride_x = 1
+    stride_y = 1
+    total_padding_x, padding_x, total_padding_y, padding_y = (
+        _padding_size_pad_layer(node, name_to_node))
+  elif node.op == "MaxPool" or node.op == "AvgPool":
+    stride_x, stride_y = _stride_size(node)
+    kernel_size_x, kernel_size_y = _pool_kernel_size(node)
+    # Compute the padding for this node separately for each direction.
+    total_padding_x, padding_x = _padding_size_conv_pool(
+        node, kernel_size_x, stride_x, input_resolution[1]
+        if input_resolution is not None else None)
+    total_padding_y, padding_y = _padding_size_conv_pool(
+        node, kernel_size_y, stride_y, input_resolution[0]
+        if input_resolution is not None else None)
+  elif node.op in _UNCHANGED_RF_LAYER_OPS:
+    # These nodes do not modify the RF parameters.
+    kernel_size_x = 1
+    kernel_size_y = 1
+    stride_x = 1
+    stride_y = 1
+    total_padding_x = 0
+    padding_x = 0
+    total_padding_y = 0
+    padding_y = 0
+  else:
+    if force:
+      kernel_size_x = None
+      kernel_size_y = None
+      stride_x = None
+      stride_y = None
+      total_padding_x = None
+      padding_x = None
+      total_padding_y = None
+      padding_y = None
+    else:
+      raise ValueError("Unknown layer for operation '%s': %s" % (node.name,
+                                                                 node.op))
+  return (kernel_size_x, kernel_size_y, stride_x, stride_y, padding_x,
+          padding_y, total_padding_x, total_padding_y)
diff --git a/tensorflow/contrib/receptive_field/python/util/parse_layer_parameters_test.py b/tensorflow/contrib/receptive_field/python/util/parse_layer_parameters_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..369758a28402f7c9e55cca9c6f9ffa9182c91140
--- /dev/null
+++ b/tensorflow/contrib/receptive_field/python/util/parse_layer_parameters_test.py
@@ -0,0 +1,149 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for parse_layer_parameters module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib import slim
+from tensorflow.contrib.receptive_field.python.util import graph_compute_order
+from tensorflow.contrib.receptive_field.python.util import parse_layer_parameters
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_math_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.platform import test
+
+
+def create_test_network():
+  """Convolutional neural network for test.
+
+  Returns:
+    name_to_node: Dict keyed by node name, each entry containing the node's
+      NodeDef.
+  """
+  g = ops.Graph()
+  with g.as_default():
+    # An input test image with unknown spatial resolution.
+    x = array_ops.placeholder(
+        dtypes.float32, (None, None, None, 1), name='input_image')
+    # Left branch before first addition.
+    l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID')
+    # Right branch before first addition.
+    l2_pad = array_ops.pad(x, [[0, 0], [1, 0], [1, 0], [0, 0]], name='L2_pad')
+    l2 = slim.conv2d(l2_pad, 1, [3, 3], stride=2, scope='L2', padding='VALID')
+    l3 = slim.max_pool2d(l2, [3, 3], stride=2, scope='L3', padding='SAME')
+    # First addition.
+    l4 = nn.relu(l1 + l3, name='L4_relu')
+    # Left branch after first addition.
+    l5 = slim.conv2d(l4, 1, [1, 1], stride=2, scope='L5', padding='SAME')
+    # Right branch after first addition.
+    l6 = slim.conv2d(l4, 1, [3, 3], stride=2, scope='L6', padding='SAME')
+    # Final addition.
+    gen_math_ops.add(l5, l6, name='L7_add')
+
+  name_to_node = graph_compute_order.parse_graph_nodes(g.as_graph_def())
+  return name_to_node
+
+
+class ParseLayerParametersTest(test.TestCase):
+
+  def testParametersAreParsedCorrectly(self):
+    """Checks parameters from create_test_network() are parsed correctly."""
+    name_to_node = create_test_network()
+
+    # L1.
+    l1_node_name = 'L1/Conv2D'
+    l1_params = parse_layer_parameters.get_layer_params(
+        name_to_node[l1_node_name], name_to_node)
+    expected_l1_params = (1, 1, 4, 4, 0, 0, 0, 0)
+    self.assertEqual(l1_params, expected_l1_params)
+
+    # L2 padding.
+    l2_pad_name = 'L2_pad'
+    l2_pad_params = parse_layer_parameters.get_layer_params(
+        name_to_node[l2_pad_name], name_to_node)
+    expected_l2_pad_params = (1, 1, 1, 1, 1, 1, 1, 1)
+    self.assertEqual(l2_pad_params, expected_l2_pad_params)
+
+    # L2.
+    l2_node_name = 'L2/Conv2D'
+    l2_params = parse_layer_parameters.get_layer_params(
+        name_to_node[l2_node_name], name_to_node)
+    expected_l2_params = (3, 3, 2, 2, 0, 0, 0, 0)
+    self.assertEqual(l2_params, expected_l2_params)
+
+    # L3.
+    l3_node_name = 'L3/MaxPool'
+    # - Without knowing input size.
+    l3_params = parse_layer_parameters.get_layer_params(
+        name_to_node[l3_node_name], name_to_node)
+    expected_l3_params = (3, 3, 2, 2, None, None, None, None)
+    self.assertEqual(l3_params, expected_l3_params)
+    # - Input size is even.
+    l3_even_params = parse_layer_parameters.get_layer_params(
+        name_to_node[l3_node_name], name_to_node, input_resolution=[4, 4])
+    expected_l3_even_params = (3, 3, 2, 2, 0, 0, 1, 1)
+    self.assertEqual(l3_even_params, expected_l3_even_params)
+    # - Input size is odd.
+    l3_odd_params = parse_layer_parameters.get_layer_params(
+        name_to_node[l3_node_name], name_to_node, input_resolution=[5, 5])
+    expected_l3_odd_params = (3, 3, 2, 2, 1, 1, 2, 2)
+    self.assertEqual(l3_odd_params, expected_l3_odd_params)
+
+    # L4.
+    l4_node_name = 'L4_relu'
+    l4_params = parse_layer_parameters.get_layer_params(
+        name_to_node[l4_node_name], name_to_node)
+    expected_l4_params = (1, 1, 1, 1, 0, 0, 0, 0)
+    self.assertEqual(l4_params, expected_l4_params)
+
+    # L5.
+    l5_node_name = 'L5/Conv2D'
+    l5_params = parse_layer_parameters.get_layer_params(
+        name_to_node[l5_node_name], name_to_node)
+    expected_l5_params = (1, 1, 2, 2, 0, 0, 0, 0)
+    self.assertEqual(l5_params, expected_l5_params)
+
+    # L6.
+    l6_node_name = 'L6/Conv2D'
+    # - Without knowing input size.
+    l6_params = parse_layer_parameters.get_layer_params(
+        name_to_node[l6_node_name], name_to_node)
+    expected_l6_params = (3, 3, 2, 2, None, None, None, None)
+    self.assertEqual(l6_params, expected_l6_params)
+    # - Input size is even.
+    l6_even_params = parse_layer_parameters.get_layer_params(
+        name_to_node[l6_node_name], name_to_node, input_resolution=[4, 4])
+    expected_l6_even_params = (3, 3, 2, 2, 0, 0, 1, 1)
+    self.assertEqual(l6_even_params, expected_l6_even_params)
+    # - Input size is odd.
+    l6_odd_params = parse_layer_parameters.get_layer_params(
+        name_to_node[l6_node_name], name_to_node, input_resolution=[5, 5])
+    expected_l6_odd_params = (3, 3, 2, 2, 1, 1, 2, 2)
+    self.assertEqual(l6_odd_params, expected_l6_odd_params)
+
+    # L7.
+    l7_node_name = 'L7_add'
+    l7_params = parse_layer_parameters.get_layer_params(
+        name_to_node[l7_node_name], name_to_node)
+    expected_l7_params = (1, 1, 1, 1, 0, 0, 0, 0)
+    self.assertEqual(l7_params, expected_l7_params)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/receptive_field/python/util/receptive_field.py b/tensorflow/contrib/receptive_field/python/util/receptive_field.py
index 8b34465d21d14508c24056b588f2533d8fea6a1d..b9bd2f09761ab10a62d37e8e2580b93b9b8a4453 100644
--- a/tensorflow/contrib/receptive_field/python/util/receptive_field.py
+++ b/tensorflow/contrib/receptive_field/python/util/receptive_field.py
@@ -23,242 +23,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import math
+import numpy as np
 from tensorflow.contrib.receptive_field.python.util import graph_compute_order
-from tensorflow.contrib.util import make_ndarray
-from tensorflow.python.platform import tf_logging as logging
+from tensorflow.contrib.receptive_field.python.util import parse_layer_parameters
 from tensorflow.python.framework import ops as framework_ops
-import numpy as np
-
-# White-listed layer operations, which do not affect the receptive field
-# computation.
-_UNCHANGED_RF_LAYER_OPS = [
-  'Add', 'BiasAdd', 'Ceil', 'ConcatV2', 'Const', 'Floor', 'Identity', 'Log',
-  'Mul', 'Pow', 'RealDiv', 'Relu', 'Round', 'Rsqrt', 'Softplus', 'Sub',
-  'VariableV2']
-
-# Different ways in which padding modes may be spelled.
-_VALID_PADDING = ["VALID", b"VALID"]
-_SAME_PADDING = ["SAME", b"SAME"]
-
-
-def _stride_size(node):
-  """Computes stride size given a TF node.
-
-  Args:
-    node: Tensorflow node (NodeDef proto).
-
-  Returns:
-    stride_x: Stride size for horizontal direction (integer).
-    stride_y: Stride size for vertical direction (integer).
-  """
-  strides_attr = node.attr["strides"]
-  logging.vlog(4, "strides_attr = %s", strides_attr)
-  stride_y = strides_attr.list.i[1]
-  stride_x = strides_attr.list.i[2]
-  return stride_x, stride_y
-
-
-def _conv_kernel_size(node, name_to_order_node):
-  """Computes kernel size given a TF convolution or pooling node.
-
-  Args:
-    node: Tensorflow node (NodeDef proto).
-    name_to_order_node: Map from name to {order, node}. Output of
-      graph_compute_order.get_compute_order().
-
-  Returns:
-    kernel_size_x: Kernel size for horizontal direction (integer).
-    kernel_size_y: Kernel size for vertical direction (integer).
-
-  Raises:
-    ValueError: If the weight layer node is invalid.
-  """
-  weights_layer_read_name = node.input[1]
-  if not weights_layer_read_name.endswith("/read"):
-    raise ValueError(
-        "Weight layer's name input to conv layer does not end with '/read'")
-  weights_layer_param_name = weights_layer_read_name[:-5]
-  weights_node = name_to_order_node[weights_layer_param_name].node
-  if weights_node.op != "VariableV2":
-    raise ValueError("Weight layer is not of type VariableV2")
-  shape = weights_node.attr["shape"]
-  logging.vlog(4, "weight shape = %s", shape)
-  kernel_size_y = shape.shape.dim[0].size
-  kernel_size_x = shape.shape.dim[1].size
-  return kernel_size_x, kernel_size_y
-
-
-def _padding_size_conv_pool(node, kernel_size, stride):
-  """Computes padding size given a TF convolution or pooling node.
-
-  Args:
-    node: Tensorflow node (NodeDef proto).
-    kernel_size: Kernel size of node (integer).
-    stride: Stride size of node (integer).
-
-  Returns:
-    padding: Padding size (integer).
-
-  Raises:
-    ValueError: If padding is invalid.
-  """
-  # In this case, we need to carefully consider the different TF padding modes.
-  # The padding depends on kernel size, and may depend on input size. If it
-  # depends on input size, we raise an exception.
-  padding_attr = node.attr["padding"]
-  logging.vlog(4, "padding_attr = %s", padding_attr)
-  if padding_attr.s in _VALID_PADDING:
-    padding = 0
-  elif padding_attr.s in _SAME_PADDING:
-    if kernel_size == 1:
-      padding = 0
-    elif stride == 1:
-      padding = int(math.floor((float(kernel_size) - 1) / 2))
-    elif stride == 2 and kernel_size % 2 == 0:
-      padding = int(math.floor((float(kernel_size) - 1) / 2))
-    else:
-      padding = None
-      logging.warning(
-          "Padding depends on input size, which means that the effective "
-          "padding may be different depending on the input image "
-          "dimensionality. In this case, alignment check will be skipped.")
-  else:
-    raise ValueError("Invalid padding operation %s" % padding_attr.s)
-  return padding
-
-
-def _pool_kernel_size(node):
-  """Computes kernel size given a TF pooling node.
-
-  Args:
-    node: Tensorflow node (NodeDef proto).
-
-  Returns:
-    kernel_size_x: Kernel size for horizontal direction (integer).
-    kernel_size_y: Kernel size for vertical direction (integer).
-
-  Raises:
-    ValueError: If pooling is invalid.
-  """
-  ksize = node.attr["ksize"]
-  kernel_size_y = ksize.list.i[1]
-  kernel_size_x = ksize.list.i[2]
-  if ksize.list.i[0] != 1:
-    raise ValueError("pool ksize for first dim is not 1")
-  if ksize.list.i[3] != 1:
-    raise ValueError("pool ksize for last dim is not 1")
-  return kernel_size_x, kernel_size_y
-
-
-def _padding_size_pad_layer(node, name_to_order_node):
-  """Computes padding size given a TF padding node.
-
-  Args:
-    node: Tensorflow node (NodeDef proto).
-    name_to_order_node: Map from name to {order, node}. Output of
-      graph_compute_order.get_compute_order().
-
-  Returns:
-    padding_x: Padding size for horizontal direction (integer).
-    padding_y: Padding size for vertical direction (integer).
-
-  Raises:
-    ValueError: If padding layer is invalid.
-  """
-  paddings_layer_name = node.input[1]
-  if not paddings_layer_name.endswith("/paddings"):
-    raise ValueError("Padding layer name does not end with '/paddings'")
-  paddings_node = name_to_order_node[paddings_layer_name].node
-  if paddings_node.op != "Const":
-    raise ValueError("Padding op is not Const")
-  value = paddings_node.attr["value"]
-  t = make_ndarray(value.tensor)
-  padding_y = t[1][0]
-  padding_x = t[2][0]
-  if t[0][0] != 0:
-    raise ValueError("padding is not zero for first tensor dim")
-  if t[3][0] != 0:
-    raise ValueError("padding is not zero for last tensor dim")
-  return padding_x, padding_y
-
-
-def _get_layer_params(node, name_to_order_node):
-  """Gets layer parameters relevant for RF computation.
-
-  Currently, only these nodes are supported:
-  - Conv2D
-  - DepthwiseConv2dNative
-  - Pad
-  - MaxPool
-  - AvgPool
-  - all nodes listed in _UNCHANGED_RF_LAYER_OPS
-
-  Args:
-    node: Tensorflow node (NodeDef proto).
-    name_to_order_node: Map from name to {order, node}. Output of
-      graph_compute_order.get_compute_order().
-
-  Returns:
-    kernel_size_x: Kernel size for horizontal direction (integer).
-    kernel_size_y: Kernel size for vertical direction (integer).
-    stride_x: Stride size for horizontal direction (integer).
-    stride_y: Stride size for vertical direction (integer).
-    padding_x: Padding size for horizontal direction (integer).
-    padding_y: Padding size for vertical direction (integer).
-
-  Raises:
-    ValueError: If layer op is unknown.
-  """
-  logging.vlog(3, "node.op = %s", node.op)
-  logging.vlog(4, "node = %s", node)
-  if node.op == "Conv2D" or node.op == "DepthwiseConv2dNative":
-    stride_x, stride_y = _stride_size(node)
-    kernel_size_x, kernel_size_y = _conv_kernel_size(node, name_to_order_node)
-    # Compute the padding for this node separately for each direction.
-    padding_x = _padding_size_conv_pool(node, kernel_size_x, stride_x)
-    padding_y = _padding_size_conv_pool(node, kernel_size_y, stride_y)
-  elif node.op == "Pad":
-    # Kernel and stride are simply 1 in this case.
-    kernel_size_x = 1
-    kernel_size_y = 1
-    stride_x = 1
-    stride_y = 1
-    padding_x, padding_y = _padding_size_pad_layer(node, name_to_order_node)
-  elif node.op == "MaxPool" or node.op == "AvgPool":
-    stride_x, stride_y = _stride_size(node)
-    kernel_size_x, kernel_size_y = _pool_kernel_size(node)
-    # Compute the padding for this node separately for each direction.
-    padding_x = _padding_size_conv_pool(node, kernel_size_x, stride_x)
-    padding_y = _padding_size_conv_pool(node, kernel_size_y, stride_y)
-  elif node.op in _UNCHANGED_RF_LAYER_OPS:
-    # These nodes do not modify the RF parameters.
-    kernel_size_x = 1
-    kernel_size_y = 1
-    stride_x = 1
-    stride_y = 1
-    padding_x = 0
-    padding_y = 0
-  else:
-    raise ValueError("Unknown layer for operation '%s': %s" %
-                     (node.name, node.op))
-  return kernel_size_x, kernel_size_y, stride_x, stride_y, padding_x, padding_y
-
-
-def _reverse_sort_by_order(name_to_order_node):
-  """Sorts map of name_to_order_node nodes in reverse order.
-
-  The output is such that the nodes in name_to_order_node are sorted in
-  descending order of the "order" field.
-
-  Args:
-    name_to_order_node: Map from name to {order, node}. Output of
-      graph_compute_order.get_compute_order().
-
-  Returns:
-    sorted_name_to_order_node: Sorted version of the input, in descending order.
-  """
-  return sorted(name_to_order_node.items(), key=lambda x: -x[1].order)
+from tensorflow.python.platform import tf_logging as logging
 
 
 def _get_rf_size_node_input(stride, kernel_size, rf_size_output):
@@ -307,23 +76,22 @@ def _get_effective_padding_node_input(stride, padding,
   return stride * effective_padding_output + padding
 
 
-class ReceptiveField:
-  """
-  Receptive field of a convolutional neural network.
+class ReceptiveField(object):
+  """Receptive field of a convolutional neural network.
 
   Args:
     size: Receptive field size.
     stride: Effective stride.
     padding: Effective padding.
   """
+
   def __init__(self, size, stride, padding):
     self.size = np.asarray(size)
     self.stride = np.asarray(stride)
     self.padding = np.asarray(padding)
 
   def compute_input_center_coordinates(self, y, axis=None):
-    """
-    Computes the center of the receptive field that generated a feature.
+    """Computes the center of the receptive field that generated a feature.
 
     Args:
       y: An array of feature coordinates with shape `(..., d)`, where `d` is the
@@ -350,12 +118,11 @@ class ReceptiveField:
       raise ValueError("Dimensionality of the feature coordinates `y` (%d) "
                        "does not match dimensionality of `axis` (%d)" %
                        (y.shape[-1], len(axis)))
-    return - self.padding[axis] + y * self.stride[axis] + \
-      (self.size[axis] - 1) / 2
+    return -self.padding[axis] + y * self.stride[axis] + (
+        self.size[axis] - 1) / 2
 
   def compute_feature_coordinates(self, x, axis=None):
-    """
-    Computes the position of a feature given the center of a receptive field.
+    """Computes the position of a feature given the center of a receptive field.
 
     Args:
       x: An array of input center coordinates with shape `(..., d)`, where `d`
@@ -381,15 +148,18 @@ class ReceptiveField:
       raise ValueError("Dimensionality of the input center coordinates `x` "
                        "(%d) does not match dimensionality of `axis` (%d)" %
                        (x.shape[-1], len(axis)))
-    return (x + self.padding[axis] + (1 - self.size[axis]) / 2) / \
-      self.stride[axis]
+    return (x + self.padding[axis] +
+            (1 - self.size[axis]) / 2) / self.stride[axis]
 
   def __iter__(self):
     return iter(np.concatenate([self.size, self.stride, self.padding]))
 
 
-def compute_receptive_field_from_graph_def(graph_def, input_node, output_node,
-                                           stop_propagation=None):
+def compute_receptive_field_from_graph_def(graph_def,
+                                           input_node,
+                                           output_node,
+                                           stop_propagation=None,
+                                           input_resolution=None):
   """Computes receptive field (RF) parameters from a Graph or GraphDef object.
 
   The algorithm stops the calculation of the receptive field whenever it
@@ -402,8 +172,14 @@ def compute_receptive_field_from_graph_def(graph_def, input_node, output_node,
     graph_def: Graph or GraphDef object.
     input_node: Name of the input node or Tensor object from graph.
     output_node: Name of the output node or Tensor object from graph.
-    stop_propagation: List of operation or scope names for which to stop the
+    stop_propagation: List of operations or scope names for which to stop the
       propagation of the receptive field.
+    input_resolution: 2D list. If the input resolution to the model is fixed and
+      known, this may be set. This is helpful for cases where the RF parameters
+      vary depending on the input resolution (this happens since SAME padding in
+      tensorflow depends on input resolution in general). If this is None, it is
+      assumed that the input resolution is unknown, so some RF parameters may be
+      unknown (depending on the model architecture).
 
   Returns:
     rf_size_x: Receptive field size of network in the horizontal direction, with
@@ -437,11 +213,13 @@ def compute_receptive_field_from_graph_def(graph_def, input_node, output_node,
   stop_propagation = stop_propagation or []
 
   # Computes order of computation for a given graph.
-  name_to_order_node = graph_compute_order.get_compute_order(
-      graph_def=graph_def)
+  node_info, name_to_node = graph_compute_order.get_compute_order(
+      graph_def=graph_def,
+      input_node_name=input_node,
+      input_node_size=input_resolution)
 
   # Sort in reverse topological order.
-  order = _reverse_sort_by_order(name_to_order_node)
+  ordered_node_info = sorted(node_info.items(), key=lambda x: -x[1].order)
 
   # Dictionaries to keep track of receptive field, effective stride and
   # effective padding of different nodes.
@@ -470,7 +248,7 @@ def compute_receptive_field_from_graph_def(graph_def, input_node, output_node,
   # alignment checks are skipped, and the effective padding is None.
   undefined_padding = False
 
-  for _, (o, node) in order:
+  for _, (o, node, _, _) in ordered_node_info:
     if node:
       logging.vlog(3, "%10d %-100s %-20s" % (o, node.name[:90], node.op))
     else:
@@ -496,13 +274,14 @@ def compute_receptive_field_from_graph_def(graph_def, input_node, output_node,
         continue
 
       # Get params for this layer.
-      kernel_size_x, kernel_size_y, stride_x, stride_y, padding_x, padding_y = (
-          _get_layer_params(node, name_to_order_node))
+      (kernel_size_x, kernel_size_y, stride_x, stride_y, padding_x,
+       padding_y, _, _) = parse_layer_parameters.get_layer_params(
+           node, name_to_node, node_info[node.name].input_size)
       logging.vlog(3, "kernel_size_x = %s, kernel_size_y = %s, "
                    "stride_x = %s, stride_y = %s, "
-                   "padding_x = %s, padding_y = %s" %
+                   "padding_x = %s, padding_y = %s, input size = %s" %
                    (kernel_size_x, kernel_size_y, stride_x, stride_y, padding_x,
-                    padding_y))
+                    padding_y, node_info[node.name].input_size))
       if padding_x is None or padding_y is None:
         undefined_padding = True
 
@@ -524,72 +303,93 @@ def compute_receptive_field_from_graph_def(graph_def, input_node, output_node,
       else:
         effective_padding_input_x = None
         effective_padding_input_y = None
+      logging.vlog(
+          4, "rf_size_input_x = %s, rf_size_input_y = %s, "
+          "effective_stride_input_x = %s, effective_stride_input_y = %s, "
+          "effective_padding_input_x = %s, effective_padding_input_y = %s" %
+          (rf_size_input_x, rf_size_input_y, effective_stride_input_x,
+           effective_stride_input_y, effective_padding_input_x,
+           effective_padding_input_y))
 
       # Loop over this node's inputs and potentially propagate information down.
       for inp_name in node.input:
         # Stop the propagation of the receptive field.
         if any(inp_name.startswith(stop) for stop in stop_propagation):
-          logging.vlog(3, "Skipping explicitly ignored node %s.", node.name)
+          logging.vlog(3, "Skipping explicitly ignored node %s.", inp_name)
           continue
+
         logging.vlog(4, "inp_name = %s", inp_name)
-        inp_node = name_to_order_node[inp_name].node
+        if inp_name.startswith("^"):
+          # The character "^" denotes a control dependency, so this input node
+          # can be safely ignored.
+          continue
+
+        inp_node = name_to_node[inp_name]
         logging.vlog(4, "inp_node = \n%s", inp_node)
-        if inp_node.name in rf_sizes_x:
-          assert inp_node.name in rf_sizes_y, (
-              "Node %s is in rf_sizes_x, but "
-              "not in rf_sizes_y" % inp_node.name)
+        if inp_name in rf_sizes_x:
+          assert inp_name in rf_sizes_y, ("Node %s is in rf_sizes_x, but "
+                                          "not in rf_sizes_y" % inp_name)
+          logging.vlog(
+              4, "rf_sizes_x[inp_name] = %s,"
+              " rf_sizes_y[inp_name] = %s, "
+              "effective_strides_x[inp_name] = %s,"
+              " effective_strides_y[inp_name] = %s, "
+              "effective_paddings_x[inp_name] = %s,"
+              " effective_paddings_y[inp_name] = %s" %
+              (rf_sizes_x[inp_name], rf_sizes_y[inp_name],
+               effective_strides_x[inp_name], effective_strides_y[inp_name],
+               effective_paddings_x[inp_name], effective_paddings_y[inp_name]))
           # This node was already discovered through a previous path, so we need
           # to make sure that graph is aligned. This alignment check is skipped
           # if the padding is not defined, since in this case alignment cannot
           # be checked.
           if not undefined_padding:
-            if effective_strides_x[inp_node.name] != effective_stride_input_x:
+            if effective_strides_x[inp_name] != effective_stride_input_x:
               raise ValueError(
                   "Graph is not aligned since effective stride from different "
                   "paths is different in horizontal direction")
-            if effective_strides_y[inp_node.name] != effective_stride_input_y:
+            if effective_strides_y[inp_name] != effective_stride_input_y:
               raise ValueError(
                   "Graph is not aligned since effective stride from different "
                   "paths is different in vertical direction")
-            if (rf_sizes_x[inp_node.name] - 1
-               ) / 2 - effective_paddings_x[inp_node.name] != (
+            if (rf_sizes_x[inp_name] - 1
+               ) / 2 - effective_paddings_x[inp_name] != (
                    rf_size_input_x - 1) / 2 - effective_padding_input_x:
               raise ValueError(
                   "Graph is not aligned since center shift from different "
                   "paths is different in horizontal direction")
-            if (rf_sizes_y[inp_node.name] - 1
-               ) / 2 - effective_paddings_y[inp_node.name] != (
+            if (rf_sizes_y[inp_name] - 1
+               ) / 2 - effective_paddings_y[inp_name] != (
                    rf_size_input_y - 1) / 2 - effective_padding_input_y:
               raise ValueError(
                   "Graph is not aligned since center shift from different "
                   "paths is different in vertical direction")
           # Keep track of path with largest RF, for both directions.
-          if rf_sizes_x[inp_node.name] < rf_size_input_x:
-            rf_sizes_x[inp_node.name] = rf_size_input_x
-            effective_strides_x[inp_node.name] = effective_stride_input_x
-            effective_paddings_x[inp_node.name] = effective_padding_input_x
-          if rf_sizes_y[inp_node.name] < rf_size_input_y:
-            rf_sizes_y[inp_node.name] = rf_size_input_y
-            effective_strides_y[inp_node.name] = effective_stride_input_y
-            effective_paddings_y[inp_node.name] = effective_padding_input_y
+          if rf_sizes_x[inp_name] < rf_size_input_x:
+            rf_sizes_x[inp_name] = rf_size_input_x
+            effective_strides_x[inp_name] = effective_stride_input_x
+            effective_paddings_x[inp_name] = effective_padding_input_x
+          if rf_sizes_y[inp_name] < rf_size_input_y:
+            rf_sizes_y[inp_name] = rf_size_input_y
+            effective_strides_y[inp_name] = effective_stride_input_y
+            effective_paddings_y[inp_name] = effective_padding_input_y
         else:
-          assert inp_node.name not in rf_sizes_y, (
-              "Node %s is in rf_sizes_y, but "
-              "not in rf_sizes_x" % inp_node.name)
+          assert inp_name not in rf_sizes_y, ("Node %s is in rf_sizes_y, but "
+                                              "not in rf_sizes_x" % inp_name)
           # In this case, it is the first time we encounter this node. So we
           # propagate the RF parameters.
-          rf_sizes_x[inp_node.name] = rf_size_input_x
-          rf_sizes_y[inp_node.name] = rf_size_input_y
-          effective_strides_x[inp_node.name] = effective_stride_input_x
-          effective_strides_y[inp_node.name] = effective_stride_input_y
-          effective_paddings_x[inp_node.name] = effective_padding_input_x
-          effective_paddings_y[inp_node.name] = effective_padding_input_y
+          rf_sizes_x[inp_name] = rf_size_input_x
+          rf_sizes_y[inp_name] = rf_size_input_y
+          effective_strides_x[inp_name] = effective_stride_input_x
+          effective_strides_y[inp_name] = effective_stride_input_y
+          effective_paddings_x[inp_name] = effective_padding_input_x
+          effective_paddings_y[inp_name] = effective_padding_input_y
 
   if not found_output_node:
     raise ValueError("Output node was not found")
   if input_node not in rf_sizes_x:
     raise ValueError("Input node was not found")
   return ReceptiveField(
-    (rf_sizes_x[input_node], rf_sizes_y[input_node]),
-    (effective_strides_x[input_node], effective_strides_y[input_node]),
-    (effective_paddings_x[input_node], effective_paddings_y[input_node]))
+      (rf_sizes_x[input_node], rf_sizes_y[input_node]),
+      (effective_strides_x[input_node], effective_strides_y[input_node]),
+      (effective_paddings_x[input_node], effective_paddings_y[input_node]))
diff --git a/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py b/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py
index 8d7d5440f630a3a78749e04a5eb058d637c258fc..cf55da27236d17c709cbde689831ad68da9a8a7b 100644
--- a/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py
+++ b/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py
@@ -18,16 +18,21 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import numpy as np
+
 from tensorflow.contrib import slim
-from tensorflow.contrib.receptive_field.python.util import receptive_field
+from tensorflow.contrib.receptive_field import receptive_field_api as receptive_field
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gen_math_ops
 from tensorflow.python.ops import nn
 from tensorflow.python.platform import test
-import numpy as np
 
 
+# TODO(andrearaujo): Rename the create_test_network_* functions in order to have
+# more descriptive names.
 def create_test_network_1():
   """Aligned network for test.
 
@@ -39,8 +44,9 @@ def create_test_network_1():
   """
   g = ops.Graph()
   with g.as_default():
-    # An 8x8 test image.
-    x = array_ops.placeholder(dtypes.float32, (1, 8, 8, 1), name='input_image')
+    # An input test image with unknown spatial resolution.
+    x = array_ops.placeholder(
+        dtypes.float32, (None, None, None, 1), name='input_image')
     # Left branch.
     l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID')
     # Right branch.
@@ -66,8 +72,9 @@ def create_test_network_2():
   """
   g = ops.Graph()
   with g.as_default():
-    # An 8x8 test image.
-    x = array_ops.placeholder(dtypes.float32, (1, 8, 8, 1), name='input_image')
+    # An input test image with unknown spatial resolution.
+    x = array_ops.placeholder(
+        dtypes.float32, (None, None, None, 1), name='input_image')
     # Left branch.
     l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID')
     # Right branch.
@@ -90,8 +97,9 @@ def create_test_network_3():
   """
   g = ops.Graph()
   with g.as_default():
-    # An 8x8 test image.
-    x = array_ops.placeholder(dtypes.float32, (1, 8, 8, 1), name='input_image')
+    # An input test image with unknown spatial resolution.
+    x = array_ops.placeholder(
+        dtypes.float32, (None, None, None, 1), name='input_image')
     # Left branch.
     l1_pad = array_ops.pad(x, [[0, 0], [2, 1], [2, 1], [0, 0]])
     l1 = slim.conv2d(l1_pad, 1, [5, 5], stride=2, scope='L1', padding='VALID')
@@ -117,8 +125,9 @@ def create_test_network_4():
   """
   g = ops.Graph()
   with g.as_default():
-    # An 8x8 test image.
-    x = array_ops.placeholder(dtypes.float32, (1, 8, 8, 1), name='input_image')
+    # An input test image with unknown spatial resolution.
+    x = array_ops.placeholder(
+        dtypes.float32, (None, None, None, 1), name='input_image')
     # Left branch.
     l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID')
     # Right branch.
@@ -141,8 +150,9 @@ def create_test_network_5():
   """
   g = ops.Graph()
   with g.as_default():
-    # An 8x8 test image.
-    x = array_ops.placeholder(dtypes.float32, (1, 8, 8, 1), name='input_image')
+    # An input test image with unknown spatial resolution.
+    x = array_ops.placeholder(
+        dtypes.float32, (None, None, None, 1), name='input_image')
     # Two convolutional layers, where the first one has non-square kernel.
     l1 = slim.conv2d(x, 1, [3, 5], stride=2, scope='L1', padding='VALID')
     l2 = slim.conv2d(l1, 1, [3, 1], stride=2, scope='L2', padding='VALID')
@@ -162,8 +172,9 @@ def create_test_network_6():
   """
   g = ops.Graph()
   with g.as_default():
-    # An 8x8 test image.
-    x = array_ops.placeholder(dtypes.float32, (1, 8, 8, 1), name='input_image')
+    # An input test image with unknown spatial resolution.
+    x = array_ops.placeholder(
+        dtypes.float32, (None, None, None, 1), name='input_image')
     # Left branch.
     l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID')
     # Right branch.
@@ -176,7 +187,102 @@ def create_test_network_6():
   return g
 
 
-class RfUtilsTest(test.TestCase):
+def create_test_network_7():
+  """Aligned network for test, with a control dependency.
+
+  The graph is similar to create_test_network_1(), except that it includes an
+  assert operation on the left branch.
+
+  Returns:
+    g: Tensorflow graph object (Graph proto).
+  """
+  g = ops.Graph()
+  with g.as_default():
+    # An 8x8 test image.
+    x = array_ops.placeholder(dtypes.float32, (1, 8, 8, 1), name='input_image')
+    # Left branch.
+    l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID')
+    l1_shape = array_ops.shape(l1)
+    assert_op = control_flow_ops.Assert(
+        gen_math_ops.equal(l1_shape[1], 2), [l1_shape], summarize=4)
+    # Right branch.
+    l2_pad = array_ops.pad(x, [[0, 0], [1, 0], [1, 0], [0, 0]])
+    l2 = slim.conv2d(l2_pad, 1, [3, 3], stride=2, scope='L2', padding='VALID')
+    l3 = slim.conv2d(l2, 1, [1, 1], stride=2, scope='L3', padding='VALID')
+    # Addition.
+    with ops.control_dependencies([assert_op]):
+      nn.relu(l1 + l3, name='output')
+  return g
+
+
+def create_test_network_8():
+  """Aligned network for test, including an intermediate addition.
+
+  The graph is similar to create_test_network_1(), except that it includes a few
+  more layers on top. The added layers compose two different branches whose
+  receptive fields are different. This makes this test case more challenging; in
+  particular, this test fails if a naive DFS-like algorithm is used for RF
+  computation.
+
+  Returns:
+    g: Tensorflow graph object (Graph proto).
+  """
+  g = ops.Graph()
+  with g.as_default():
+    # An input test image with unknown spatial resolution.
+    x = array_ops.placeholder(
+        dtypes.float32, (None, None, None, 1), name='input_image')
+    # Left branch before first addition.
+    l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID')
+    # Right branch before first addition.
+    l2_pad = array_ops.pad(x, [[0, 0], [1, 0], [1, 0], [0, 0]])
+    l2 = slim.conv2d(l2_pad, 1, [3, 3], stride=2, scope='L2', padding='VALID')
+    l3 = slim.conv2d(l2, 1, [1, 1], stride=2, scope='L3', padding='VALID')
+    # First addition.
+    l4 = nn.relu(l1 + l3)
+    # Left branch after first addition.
+    l5 = slim.conv2d(l4, 1, [1, 1], stride=2, scope='L5', padding='VALID')
+    # Right branch after first addition.
+    l6_pad = array_ops.pad(l4, [[0, 0], [1, 0], [1, 0], [0, 0]])
+    l6 = slim.conv2d(l6_pad, 1, [3, 3], stride=2, scope='L6', padding='VALID')
+    # Final addition.
+    nn.relu(l5 + l6, name='output')
+
+  return g
+
+
+def create_test_network_9():
+  """Aligned network for test, including an intermediate addition.
+
+  The graph is the same as create_test_network_8(), except that VALID padding is
+  changed to SAME.
+
+  Returns:
+    g: Tensorflow graph object (Graph proto).
+  """
+  g = ops.Graph()
+  with g.as_default():
+    # An input test image with unknown spatial resolution.
+    x = array_ops.placeholder(
+        dtypes.float32, (None, None, None, 1), name='input_image')
+    # Left branch before first addition.
+    l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='SAME')
+    # Right branch before first addition.
+    l2 = slim.conv2d(x, 1, [3, 3], stride=2, scope='L2', padding='SAME')
+    l3 = slim.conv2d(l2, 1, [1, 1], stride=2, scope='L3', padding='SAME')
+    # First addition.
+    l4 = nn.relu(l1 + l3)
+    # Left branch after first addition.
+    l5 = slim.conv2d(l4, 1, [1, 1], stride=2, scope='L5', padding='SAME')
+    # Right branch after first addition.
+    l6 = slim.conv2d(l4, 1, [3, 3], stride=2, scope='L6', padding='SAME')
+    # Final addition.
+    nn.relu(l5 + l6, name='output')
+
+  return g
+
+
+class ReceptiveFieldTest(test.TestCase):
 
   def testComputeRFFromGraphDefAligned(self):
     graph_def = create_test_network_1().as_graph_def()
@@ -216,7 +322,7 @@ class RfUtilsTest(test.TestCase):
       receptive_field.compute_receptive_field_from_graph_def(
           graph_def, input_node, output_node)
 
-  def testComputeRFFromGraphDefUnaligned2(self):
+  def testComputeRFFromGraphDefUndefinedPadding(self):
     graph_def = create_test_network_4().as_graph_def()
     input_node = 'input_image'
     output_node = 'output'
@@ -231,6 +337,29 @@ class RfUtilsTest(test.TestCase):
     self.assertEqual(effective_padding_x, None)
     self.assertEqual(effective_padding_y, None)
 
+  def testComputeRFFromGraphDefFixedInputDim(self):
+    graph_def = create_test_network_4().as_graph_def()
+    input_node = 'input_image'
+    output_node = 'output'
+    (receptive_field_x, receptive_field_y, effective_stride_x,
+     effective_stride_y, effective_padding_x, effective_padding_y) = (
+         receptive_field.compute_receptive_field_from_graph_def(
+             graph_def, input_node, output_node, input_resolution=[9, 9]))
+    self.assertEqual(receptive_field_x, 3)
+    self.assertEqual(receptive_field_y, 3)
+    self.assertEqual(effective_stride_x, 4)
+    self.assertEqual(effective_stride_y, 4)
+    self.assertEqual(effective_padding_x, 1)
+    self.assertEqual(effective_padding_y, 1)
+
+  def testComputeRFFromGraphDefUnalignedFixedInputDim(self):
+    graph_def = create_test_network_4().as_graph_def()
+    input_node = 'input_image'
+    output_node = 'output'
+    with self.assertRaises(ValueError):
+      receptive_field.compute_receptive_field_from_graph_def(
+          graph_def, input_node, output_node, input_resolution=[8, 8])
+
   def testComputeRFFromGraphDefNonSquareRF(self):
     graph_def = create_test_network_5().as_graph_def()
     input_node = 'input_image'
@@ -269,7 +398,7 @@ class RfUtilsTest(test.TestCase):
     input_node = 'input_image'
     output_node = 'output'
     rf = receptive_field.compute_receptive_field_from_graph_def(
-      graph_def, input_node, output_node)
+        graph_def, input_node, output_node)
 
     x = np.random.randint(0, 100, (50, 2))
     y = rf.compute_feature_coordinates(x)
@@ -277,5 +406,52 @@ class RfUtilsTest(test.TestCase):
 
     self.assertAllEqual(x, x2)
 
+  def testComputeRFFromGraphDefAlignedWithControlDependencies(self):
+    graph_def = create_test_network_7().as_graph_def()
+    input_node = 'input_image'
+    output_node = 'output'
+    (receptive_field_x, receptive_field_y, effective_stride_x,
+     effective_stride_y, effective_padding_x, effective_padding_y) = (
+         receptive_field.compute_receptive_field_from_graph_def(
+             graph_def, input_node, output_node))
+    self.assertEqual(receptive_field_x, 3)
+    self.assertEqual(receptive_field_y, 3)
+    self.assertEqual(effective_stride_x, 4)
+    self.assertEqual(effective_stride_y, 4)
+    self.assertEqual(effective_padding_x, 1)
+    self.assertEqual(effective_padding_y, 1)
+
+  def testComputeRFFromGraphDefWithIntermediateAddNode(self):
+    graph_def = create_test_network_8().as_graph_def()
+    input_node = 'input_image'
+    output_node = 'output'
+    (receptive_field_x, receptive_field_y, effective_stride_x,
+     effective_stride_y, effective_padding_x, effective_padding_y) = (
+         receptive_field.compute_receptive_field_from_graph_def(
+             graph_def, input_node, output_node))
+    self.assertEqual(receptive_field_x, 11)
+    self.assertEqual(receptive_field_y, 11)
+    self.assertEqual(effective_stride_x, 8)
+    self.assertEqual(effective_stride_y, 8)
+    self.assertEqual(effective_padding_x, 5)
+    self.assertEqual(effective_padding_y, 5)
+
+  def testComputeRFFromGraphDefWithIntermediateAddNodeSamePaddingFixedInputDim(
+      self):
+    graph_def = create_test_network_9().as_graph_def()
+    input_node = 'input_image'
+    output_node = 'output'
+    (receptive_field_x, receptive_field_y, effective_stride_x,
+     effective_stride_y, effective_padding_x, effective_padding_y) = (
+         receptive_field.compute_receptive_field_from_graph_def(
+             graph_def, input_node, output_node, input_resolution=[17, 17]))
+    self.assertEqual(receptive_field_x, 11)
+    self.assertEqual(receptive_field_y, 11)
+    self.assertEqual(effective_stride_x, 8)
+    self.assertEqual(effective_stride_y, 8)
+    self.assertEqual(effective_padding_x, 5)
+    self.assertEqual(effective_padding_y, 5)
+
+
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/contrib/receptive_field/__init__.py b/tensorflow/contrib/receptive_field/receptive_field_api.py
similarity index 89%
rename from tensorflow/contrib/receptive_field/__init__.py
rename to tensorflow/contrib/receptive_field/receptive_field_api.py
index 10745a6a53d5b3ef9521b2313ddc28799ee8b886..4d81b4292df5f696b761b2977fec078abc28569f 100644
--- a/tensorflow/contrib/receptive_field/__init__.py
+++ b/tensorflow/contrib/receptive_field/receptive_field_api.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Module to compute receptive field parameters for CNN tensorflow models."""
+"""Module that declares the functions in tf.contrib.receptive_field's API."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -21,3 +21,7 @@ from __future__ import print_function
 from tensorflow.contrib.receptive_field.python.util.graph_compute_order import get_compute_order
 from tensorflow.contrib.receptive_field.python.util.receptive_field import compute_receptive_field_from_graph_def
 # pylint: enable=unused-import
+
+del absolute_import
+del division
+del print_function
diff --git a/tensorflow/contrib/resampler/kernels/resampler_ops.cc b/tensorflow/contrib/resampler/kernels/resampler_ops.cc
index 7d9ef14cefc578e9401d95db9a625428cc0e2605..e02c1b6a2bd9daf9e1f81059f7c1f92106cebc8f 100644
--- a/tensorflow/contrib/resampler/kernels/resampler_ops.cc
+++ b/tensorflow/contrib/resampler/kernels/resampler_ops.cc
@@ -406,10 +406,10 @@ class ResamplerGradOp : public ::tensorflow::OpKernel {
                                    data_channels);
     OP_REQUIRES(ctx, grad_output_shape == resampler_output_shape,
                 ::tensorflow::errors::InvalidArgument(
-                   "grad_output shape is not consistent with data and warp "
-                   "shapes; it should be ",
-                   resampler_output_shape.DebugString(), " but is ",
-                   grad_output_shape.DebugString()))
+                    "grad_output shape is not consistent with data and warp "
+                    "shapes; it should be ",
+                    resampler_output_shape.DebugString(), " but is ",
+                    grad_output_shape.DebugString()));
     const int num_sampling_points = warp.NumElements() / batch_size / 2;
     ::tensorflow::Tensor* grad_data = nullptr;
     ::tensorflow::Tensor* grad_warp = nullptr;
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
index cfecab00440ed72f385de8b9cd41fa689ae7b5eb..b5d81b7caac5186b34548a06c67ba48afab0a1a5 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
@@ -43,7 +43,6 @@ from tensorflow.python.framework import test_util
 from tensorflow.contrib.rnn.python.ops import rnn_cell as contrib_rnn_cell
 
 
-
 # pylint: enable=protected-access
 Linear = core_rnn_cell._Linear  # pylint: disable=invalid-name
 
@@ -141,6 +140,20 @@ class RNNCellTest(test.TestCase):
         # Smoke test
         self.assertAllClose(res[0], [[0.156736, 0.156736]])
 
+  def testSRUCell(self):
+    with self.test_session() as sess:
+      with variable_scope.variable_scope(
+          "root", initializer=init_ops.constant_initializer(0.5)):
+        x = array_ops.zeros([1, 2])
+        m = array_ops.zeros([1, 2])
+        g, _ = contrib_rnn_cell.SRUCell(2)(x, m)
+        sess.run([variables_lib.global_variables_initializer()])
+        res = sess.run(
+            [g], {x.name: np.array([[1., 1.]]),
+                  m.name: np.array([[0.1, 0.1]])})
+        # Smoke test
+        self.assertAllClose(res[0], [[0.509682,  0.509682]])
+
   def testBasicLSTMCell(self):
     for dtype in [dtypes.float16, dtypes.float32]:
       np_dtype = dtype.as_numpy_dtype
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py
index 9cea2ec79a982e4fb362ec564eb72b3894917842..0258d7202df20a536ae4240a532249b6b5e7e641 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py
@@ -2175,9 +2175,9 @@ class DeviceWrapperCell(rnn_cell.RNNCell):
   def __call__(self, input_, state, scope=None):
     if self._device is not None:
       with ops_lib.device(self._device):
-        return self._cell(input_, state, scope)
+        return self._cell(input_, state, scope=scope)
     else:
-      return self._cell(input_, state, scope)
+      return self._cell(input_, state, scope=scope)
 
 
 class TensorArrayOnCorrectDeviceTest(test.TestCase):
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py b/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py
index a288072ae5da0751f1999128029f38bea933490e..7957edf68cc8a1461fccfc2de93ad5250dc9fdb5 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py
@@ -49,6 +49,7 @@ def blocks_match(sess, use_peephole):
     inp = ops.convert_to_tensor(
         np.random.randn(batch_size, input_size), dtype=dtypes.float32)
     inputs.append(inp)
+  stacked_inputs = array_ops.stack(inputs)
 
   initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=19890212)
 
@@ -72,23 +73,6 @@ def blocks_match(sess, use_peephole):
         dtype=dtypes.float32,
         initializer=init_ops.zeros_initializer())
 
-    if use_peephole:
-      wci_block = variable_scope.get_variable(
-          "rnn/lstm_cell/lstm_block_wrapper/w_i_diag",
-          initializer=wci.initialized_value())
-      wcf_block = variable_scope.get_variable(
-          "rnn/lstm_cell/lstm_block_wrapper/w_f_diag",
-          initializer=wcf.initialized_value())
-      wco_block = variable_scope.get_variable(
-          "rnn/lstm_cell/lstm_block_wrapper/w_o_diag",
-          initializer=wco.initialized_value())
-    w_block = variable_scope.get_variable(
-        "rnn/lstm_cell/lstm_block_wrapper/kernel",
-        initializer=w.initialized_value())
-    b_block = variable_scope.get_variable(
-        "rnn/lstm_cell/lstm_block_wrapper/bias",
-        initializer=b.initialized_value())
-
     basic_cell = rnn_cell.LSTMCell(
         cell_size, use_peepholes=use_peephole, state_is_tuple=True, reuse=True)
     basic_outputs_op, basic_state_op = rnn.static_rnn(
@@ -113,11 +97,11 @@ def blocks_match(sess, use_peephole):
           b,
           cell_clip=0)
 
-    with variable_scope.variable_scope("rnn/lstm_cell", reuse=True):
-      fused_cell = lstm_ops.LSTMBlockFusedCell(
-          cell_size, cell_clip=0, use_peephole=use_peephole)
-      fused_outputs_op, fused_state_op = fused_cell(
-          inputs, dtype=dtypes.float32)
+    fused_cell = lstm_ops.LSTMBlockFusedCell(
+        cell_size, cell_clip=0, use_peephole=use_peephole, reuse=True,
+        name="rnn/lstm_cell")
+    fused_outputs_op, fused_state_op = fused_cell(
+        stacked_inputs, dtype=dtypes.float32)
 
     sess.run([variables.global_variables_initializer()])
     basic_outputs, basic_state = sess.run([basic_outputs_op, basic_state_op[0]])
@@ -131,9 +115,9 @@ def blocks_match(sess, use_peephole):
     block_grads = sess.run(gradients_impl.gradients(block_outputs_op, inputs))
     block_wgrads = sess.run(gradients_impl.gradients(block_outputs_op, xs))
 
-    xs = [w_block, b_block]
+    xs = [w, b]
     if use_peephole:
-      xs += [wci_block, wcf_block, wco_block]
+      xs += [wci, wcf, wco]
     fused_outputs, fused_state = sess.run([fused_outputs_op, fused_state_op[0]])
     fused_grads = sess.run(gradients_impl.gradients(fused_outputs_op, inputs))
     fused_wgrads = sess.run(gradients_impl.gradients(fused_outputs_op, xs))
@@ -216,7 +200,7 @@ class LSTMBlockCellTest(test.TestCase):
     with self.test_session(use_gpu=True, graph=ops.Graph()):
       cell = lstm_ops.LSTMBlockFusedCell(10)
       pcell = lstm_ops.LSTMBlockFusedCell(10, use_peephole=True)
-      inputs = [array_ops.zeros([4, 5])] * 6
+      inputs = array_ops.stack([array_ops.zeros([4, 5])] * 6)
       cell(inputs, dtype=dtypes.float32, scope="basic/lstm_cell")
       pcell(inputs, dtype=dtypes.float32, scope="peephole/lstm_cell")
       fused_names = {
@@ -380,13 +364,13 @@ class LSTMBlockCellTest(test.TestCase):
             np.random.randn(batch_size, input_size), dtype=dtypes.float32)
         inputs.append(inp)
       seq_lengths = constant_op.constant([3, 4, 5])
+      cell_inputs = array_ops.stack(inputs)
 
       initializer = init_ops.random_uniform_initializer(
           -0.01, 0.01, seed=19890213)
 
-      with variable_scope.variable_scope(
-          "lstm_block_wrapper", initializer=initializer):
-        # magic naming so that the cells pick up these variables and resuse them
+      with variable_scope.variable_scope("lstm_cell", initializer=initializer):
+        # magic naming so that the cells pick up these variables and reuse them
         variable_scope.get_variable(
             "kernel",
             shape=[input_size + cell_size, cell_size * 4],
@@ -398,13 +382,12 @@ class LSTMBlockCellTest(test.TestCase):
             dtype=dtypes.float32,
             initializer=init_ops.zeros_initializer())
 
-      with variable_scope.variable_scope(
-          variable_scope.get_variable_scope(), reuse=True):
-        cell = lstm_ops.LSTMBlockFusedCell(
-            cell_size, cell_clip=0, use_peephole=False)
+      cell = lstm_ops.LSTMBlockFusedCell(
+          cell_size, cell_clip=0, use_peephole=False, reuse=True,
+          name="lstm_cell")
 
-        fused_outputs_op, fused_state_op = cell(
-            inputs, dtype=dtypes.float32, sequence_length=seq_lengths)
+      fused_outputs_op, fused_state_op = cell(
+          cell_inputs, dtype=dtypes.float32, sequence_length=seq_lengths)
 
       cell_vars = [
           v for v in variables.trainable_variables()
@@ -420,7 +403,7 @@ class LSTMBlockCellTest(test.TestCase):
         for i, inp in enumerate(inputs):
           lengths = [int(i < l) for l in seq_lengths.eval()]
           output, state = cell(
-              [inp],
+              array_ops.expand_dims(inp, 0),
               initial_state=state,
               dtype=dtypes.float32,
               sequence_length=lengths)
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
index 46823fa3643c5b4a3d857fa38d1a70792d97ca40..73789206f3120c34b686a8af98f37d7683bc88ae 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
@@ -845,12 +845,14 @@ class RNNCellTest(test.TestCase):
       batch_size = 3
       input_size = 4
       expected_state_c = np.array(
-          [[0.00072015, 0.00036633], [0.00083481, 0.00047266],
-           [0.00085111, 0.00053054]],
+          [[6.450831e-04, 4.697885e-04],
+           [9.862894e-05, 7.212213e-04],
+           [4.401947e-04, 9.143004e-04]],
           dtype=np.float32)
       expected_state_h = np.array(
-          [[0.0005159, 0.00026243], [0.00062958, 0.00035646],
-           [0.00064732, 0.00040351]],
+          [[4.621217e-04, 3.365449e-04],
+           [7.438179e-05, 5.439147e-04],
+           [3.347936e-04, 6.953785e-04]],
           dtype=np.float32)
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
@@ -1328,7 +1330,7 @@ class LayerNormBasicLSTMCellTest(test.TestCase):
     h_low = 0.761552567265
     h_high = 0.995008519604
     num_units = 5
-    allowed_low = [2, 3]
+    allowed_low = [1, 2, 3]
 
     with self.test_session() as sess:
       with variable_scope.variable_scope(
diff --git a/tensorflow/contrib/rnn/python/ops/gru_ops.py b/tensorflow/contrib/rnn/python/ops/gru_ops.py
index 75536e3f5f8cbe44231f19d4d455537e654f7a08..4c964ec201f153d6c8293d3bf93bc231ff8f751d 100644
--- a/tensorflow/contrib/rnn/python/ops/gru_ops.py
+++ b/tensorflow/contrib/rnn/python/ops/gru_ops.py
@@ -20,18 +20,20 @@ from __future__ import print_function
 from tensorflow.contrib.rnn.ops import gen_gru_ops
 from tensorflow.contrib.util import loader
 from tensorflow.python.framework import ops
+from tensorflow.python.layers import base as base_layer
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import rnn_cell_impl
-from tensorflow.python.ops import variable_scope as vs
 from tensorflow.python.platform import resource_loader
 from tensorflow.python.util.deprecation import deprecated_args
 
 _gru_ops_so = loader.load_op_library(
     resource_loader.get_path_to_datafile("_gru_ops.so"))
 
+LayerRNNCell = rnn_cell_impl._LayerRNNCell  # pylint: disable=invalid-name,protected-access
+
 
 @ops.RegisterGradient("GRUBlockCell")
 def _GRUBlockCellGrad(op, *grad):
@@ -95,7 +97,7 @@ def _GRUBlockCellGrad(op, *grad):
   return d_x, d_h_prev, d_w_ru, d_w_c, d_b_ru, d_b_c
 
 
-class GRUBlockCell(rnn_cell_impl.RNNCell):
+class GRUBlockCell(LayerRNNCell):
   r"""Block GRU cell implementation.
 
   Deprecated: use GRUBlockCellV2 instead.
@@ -132,22 +134,37 @@ class GRUBlockCell(rnn_cell_impl.RNNCell):
 
   @deprecated_args(None, "cell_size is deprecated, use num_units instead",
                    "cell_size")
-  def __init__(self, num_units=None, cell_size=None):
+  def __init__(self,
+               num_units=None,
+               cell_size=None,
+               reuse=None,
+               name="gru_cell"):
     """Initialize the Block GRU cell.
 
     Args:
       num_units: int, The number of units in the GRU cell.
       cell_size: int, The old (deprecated) name for `num_units`.
+      reuse: (optional) boolean describing whether to reuse variables in an
+        existing scope.  If not `True`, and the existing scope already has the
+        given variables, an error is raised.
+      name: String, the name of the layer. Layers with the same name will
+        share weights, but to avoid mistakes we require reuse=True in such
+        cases.  By default this is "lstm_cell", for variable-name compatibility
+        with `tf.nn.rnn_cell.GRUCell`.
 
     Raises:
       ValueError: if both cell_size and num_units are not None;
         or both are None.
     """
+    super(GRUBlockCell, self).__init__(_reuse=reuse, name=name)
     if (cell_size is None) == (num_units is None):
-      raise ValueError("Exactly one of num_units or cell_size must be provided.")
+      raise ValueError(
+          "Exactly one of num_units or cell_size must be provided.")
     if num_units is None:
       num_units = cell_size
     self._cell_size = num_units
+    # Inputs must be 2-dimensional.
+    self.input_spec = base_layer.InputSpec(ndim=2)
 
   @property
   def state_size(self):
@@ -157,40 +174,43 @@ class GRUBlockCell(rnn_cell_impl.RNNCell):
   def output_size(self):
     return self._cell_size
 
-  def __call__(self, x, h_prev, scope=None):
+  def build(self, input_shape):
+    # Check if the input size exist.
+    input_size = input_shape[1].value
+    if input_size is None:
+      raise ValueError("Expecting input_size to be set.")
+
+    self._gate_kernel = self.add_variable(
+        "w_ru", [input_size + self._cell_size, self._cell_size * 2])
+    self._gate_bias = self.add_variable(
+        "b_ru", [self._cell_size * 2],
+        initializer=init_ops.constant_initializer(1.0))
+    self._candidate_kernel = self.add_variable(
+        "w_c", [input_size + self._cell_size, self._cell_size])
+    self._candidate_bias = self.add_variable(
+        "b_c", [self._cell_size],
+        initializer=init_ops.constant_initializer(0.0))
+
+    self.built = True
+
+  def call(self, inputs, h_prev):
     """GRU cell."""
-    with vs.variable_scope(scope or type(self).__name__):
-      input_size = x.get_shape().with_rank(2)[1]
-
-      # Check if the input size exist.
-      if input_size is None:
-        raise ValueError("Expecting input_size to be set.")
-
-      # Check cell_size == state_size from h_prev.
-      cell_size = h_prev.get_shape().with_rank(2)[1]
-      if cell_size != self._cell_size:
-        raise ValueError("Shape of h_prev[1] incorrect: cell_size %i vs %s" %
-                         (self._cell_size, cell_size))
-
-      if cell_size is None:
-        raise ValueError("cell_size from `h_prev` should not be None.")
-
-      w_ru = vs.get_variable("w_ru", [input_size + self._cell_size,
-                                      self._cell_size * 2])
-      b_ru = vs.get_variable(
-          "b_ru", [self._cell_size * 2],
-          initializer=init_ops.constant_initializer(1.0))
-      w_c = vs.get_variable("w_c",
-                            [input_size + self._cell_size, self._cell_size])
-      b_c = vs.get_variable(
-          "b_c", [self._cell_size],
-          initializer=init_ops.constant_initializer(0.0))
+    # Check cell_size == state_size from h_prev.
+    cell_size = h_prev.get_shape().with_rank(2)[1]
+    if cell_size != self._cell_size:
+      raise ValueError("Shape of h_prev[1] incorrect: cell_size %i vs %s" %
+                       (self._cell_size, cell_size))
 
-      _gru_block_cell = gen_gru_ops.gru_block_cell  # pylint: disable=invalid-name
-      _, _, _, new_h = _gru_block_cell(
-          x=x, h_prev=h_prev, w_ru=w_ru, w_c=w_c, b_ru=b_ru, b_c=b_c)
+    _gru_block_cell = gen_gru_ops.gru_block_cell  # pylint: disable=invalid-name
+    _, _, _, new_h = _gru_block_cell(
+        x=inputs,
+        h_prev=h_prev,
+        w_ru=self._gate_kernel,
+        w_c=self._candidate_kernel,
+        b_ru=self._gate_bias,
+        b_c=self._candidate_bias)
 
-      return new_h, new_h
+    return new_h, new_h
 
 
 class GRUBlockCellV2(GRUBlockCell):
@@ -199,39 +219,21 @@ class GRUBlockCellV2(GRUBlockCell):
   Only differs from GRUBlockCell by variable names.
   """
 
-  def __call__(self, x, h_prev, scope=None):
+  def build(self, input_shape):
     """GRU cell."""
-    with vs.variable_scope(scope or type(self).__name__):
-      input_size = x.get_shape().with_rank(2)[1]
-
-      # Check if the input size exist.
-      if input_size is None:
-        raise ValueError("Expecting input_size to be set.")
-
-      # Check cell_size == state_size from h_prev.
-      cell_size = h_prev.get_shape().with_rank(2)[1]
-      if cell_size != self._cell_size:
-        raise ValueError("Shape of h_prev[1] incorrect: cell_size %i vs %s" %
-                         (self._cell_size, cell_size))
-
-      if cell_size is None:
-        raise ValueError("cell_size from `h_prev` should not be None.")
-
-      with vs.variable_scope("gates"):
-        w_ru = vs.get_variable("kernel", [input_size + self._cell_size,
-                                          self._cell_size * 2])
-        b_ru = vs.get_variable(
-            "bias", [self._cell_size * 2],
-            initializer=init_ops.constant_initializer(1.0))
-      with vs.variable_scope("candidate"):
-        w_c = vs.get_variable("kernel",
-                              [input_size + self._cell_size, self._cell_size])
-        b_c = vs.get_variable(
-            "bias", [self._cell_size],
-            initializer=init_ops.constant_initializer(0.0))
-
-      _gru_block_cell = gen_gru_ops.gru_block_cell  # pylint: disable=invalid-name
-      _, _, _, new_h = _gru_block_cell(
-          x=x, h_prev=h_prev, w_ru=w_ru, w_c=w_c, b_ru=b_ru, b_c=b_c)
-
-      return new_h, new_h
+    input_size = input_shape[1].value
+    if input_size is None:
+      raise ValueError("Expecting input_size to be set.")
+
+    self._gate_kernel = self.add_variable(
+        "gates/kernel", [input_size + self._cell_size, self._cell_size * 2])
+    self._gate_bias = self.add_variable(
+        "gates/bias", [self._cell_size * 2],
+        initializer=init_ops.constant_initializer(1.0))
+    self._candidate_kernel = self.add_variable(
+        "candidate/kernel", [input_size + self._cell_size, self._cell_size])
+    self._candidate_bias = self.add_variable(
+        "candidate/bias", [self._cell_size],
+        initializer=init_ops.constant_initializer(0.0))
+
+    self.built = True
diff --git a/tensorflow/contrib/rnn/python/ops/lstm_ops.py b/tensorflow/contrib/rnn/python/ops/lstm_ops.py
index df910a3423083972bdee42bec10733e37b8e5f96..04f342cd18271425068b2b02c2937236c900c5e2 100644
--- a/tensorflow/contrib/rnn/python/ops/lstm_ops.py
+++ b/tensorflow/contrib/rnn/python/ops/lstm_ops.py
@@ -20,21 +20,22 @@ from __future__ import print_function
 import abc
 
 from tensorflow.contrib.rnn.ops import gen_lstm_ops
-from tensorflow.contrib.rnn.python.ops import fused_rnn_cell
 from tensorflow.contrib.util import loader
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.layers import base as base_layer
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import rnn_cell_impl
-from tensorflow.python.ops import variable_scope as vs
 from tensorflow.python.platform import resource_loader
 
 _lstm_ops_so = loader.load_op_library(
     resource_loader.get_path_to_datafile("_lstm_ops.so"))
 
+LayerRNNCell = rnn_cell_impl._LayerRNNCell  # pylint: disable=invalid-name,protected-access
+
 
 # pylint: disable=invalid-name
 def _lstm_block_cell(x,
@@ -327,7 +328,7 @@ def _BlockLSTMGrad(op, *grad):
   ]
 
 
-class LSTMBlockCell(rnn_cell_impl.RNNCell):
+class LSTMBlockCell(LayerRNNCell):
   """Basic LSTM recurrent network cell.
 
   The implementation is based on: http://arxiv.org/abs/1409.2329.
@@ -345,7 +346,8 @@ class LSTMBlockCell(rnn_cell_impl.RNNCell):
                forget_bias=1.0,
                cell_clip=None,
                use_peephole=False,
-               reuse=None):
+               reuse=None,
+               name="lstm_cell"):
     """Initialize the basic LSTM cell.
 
     Args:
@@ -356,11 +358,15 @@ class LSTMBlockCell(rnn_cell_impl.RNNCell):
       reuse: (optional) boolean describing whether to reuse variables in an
         existing scope.  If not `True`, and the existing scope already has the
         given variables, an error is raised.
+      name: String, the name of the layer. Layers with the same name will
+        share weights, but to avoid mistakes we require reuse=True in such
+        cases.  By default this is "lstm_cell", for variable-name compatibility
+        with `tf.nn.rnn_cell.LSTMCell`.
 
       When restoring from CudnnLSTM-trained checkpoints, must use
       CudnnCompatibleLSTMBlockCell instead.
     """
-    super(LSTMBlockCell, self).__init__(_reuse=reuse)
+    super(LSTMBlockCell, self).__init__(_reuse=reuse, name=name)
     self._num_units = num_units
     self._forget_bias = forget_bias
     self._use_peephole = use_peephole
@@ -373,6 +379,8 @@ class LSTMBlockCell(rnn_cell_impl.RNNCell):
         "wco": "w_o_diag",
         "scope": "lstm_cell"
     }
+    # Inputs must be 2-dimensional.
+    self.input_spec = base_layer.InputSpec(ndim=2)
 
   @property
   def state_size(self):
@@ -382,45 +390,54 @@ class LSTMBlockCell(rnn_cell_impl.RNNCell):
   def output_size(self):
     return self._num_units
 
-  def __call__(self, x, states_prev, scope=None):
+  def build(self, inputs_shape):
+    if not inputs_shape[1].value:
+      raise ValueError(
+          "Expecting inputs_shape[1] to be set: %s" % str(inputs_shape))
+    input_size = inputs_shape[1].value
+    self._kernel = self.add_variable(
+        self._names["W"], [input_size + self._num_units, self._num_units * 4])
+    self._bias = self.add_variable(
+        self._names["b"], [self._num_units * 4],
+        initializer=init_ops.constant_initializer(0.0))
+    if self._use_peephole:
+      self._w_i_diag = self.add_variable(self._names["wci"], [self._num_units])
+      self._w_f_diag = self.add_variable(self._names["wcf"], [self._num_units])
+      self._w_o_diag = self.add_variable(self._names["wco"], [self._num_units])
+
+    self.built = True
+
+  def call(self, inputs, state):
     """Long short-term memory cell (LSTM)."""
-    with vs.variable_scope(scope or self._names["scope"]):
-      x_shape = x.get_shape().with_rank(2)
-      if not x_shape[1].value:
-        raise ValueError("Expecting x_shape[1] to be set: %s" % str(x_shape))
-      if len(states_prev) != 2:
-        raise ValueError("Expecting states_prev to be a tuple with length 2.")
-      input_size = x_shape[1].value
-      w = vs.get_variable(self._names["W"], [input_size + self._num_units,
-                                             self._num_units * 4])
-      b = vs.get_variable(
-          self._names["b"], [w.get_shape().with_rank(2)[1].value],
-          initializer=init_ops.constant_initializer(0.0))
-      if self._use_peephole:
-        wci = vs.get_variable(self._names["wci"], [self._num_units])
-        wcf = vs.get_variable(self._names["wcf"], [self._num_units])
-        wco = vs.get_variable(self._names["wco"], [self._num_units])
-      else:
-        wci = wcf = wco = array_ops.zeros([self._num_units])
-      (cs_prev, h_prev) = states_prev
-      (_, cs, _, _, _, _, h) = _lstm_block_cell(
-          x,
-          cs_prev,
-          h_prev,
-          w,
-          b,
-          wci=wci,
-          wcf=wcf,
-          wco=wco,
-          forget_bias=self._forget_bias,
-          cell_clip=self._cell_clip,
-          use_peephole=self._use_peephole)
-
-      new_state = rnn_cell_impl.LSTMStateTuple(cs, h)
-      return h, new_state
-
-
-class LSTMBlockWrapper(fused_rnn_cell.FusedRNNCell):
+    if len(state) != 2:
+      raise ValueError("Expecting state to be a tuple with length 2.")
+
+    if self._use_peephole:
+      wci = self._w_i_diag
+      wcf = self._w_f_diag
+      wco = self._w_o_diag
+    else:
+      wci = wcf = wco = array_ops.zeros([self._num_units])
+
+    (cs_prev, h_prev) = state
+    (_, cs, _, _, _, _, h) = _lstm_block_cell(
+        inputs,
+        cs_prev,
+        h_prev,
+        self._kernel,
+        self._bias,
+        wci=wci,
+        wcf=wcf,
+        wco=wco,
+        forget_bias=self._forget_bias,
+        cell_clip=self._cell_clip,
+        use_peephole=self._use_peephole)
+
+    new_state = rnn_cell_impl.LSTMStateTuple(cs, h)
+    return h, new_state
+
+
+class LSTMBlockWrapper(base_layer.Layer):
   """This is a helper class that provides housekeeping for LSTM cells.
 
   This may be useful for alternative LSTM and similar type of cells.
@@ -459,12 +476,7 @@ class LSTMBlockWrapper(fused_rnn_cell.FusedRNNCell):
     """
     pass
 
-  def __call__(self,
-               inputs,
-               initial_state=None,
-               dtype=None,
-               sequence_length=None,
-               scope=None):
+  def call(self, inputs, initial_state=None, dtype=None, sequence_length=None):
     """Run this LSTM on inputs, starting from the given state.
 
     Args:
@@ -480,7 +492,6 @@ class LSTMBlockWrapper(fused_rnn_cell.FusedRNNCell):
         `int32` or `int64` vector (tensor) size `[batch_size]`, values in `[0,
         time_len).`
         Defaults to `time_len` for each element.
-      scope: `VariableScope` for the created subgraph; defaults to class name.
 
     Returns:
       A pair containing:
@@ -493,75 +504,71 @@ class LSTMBlockWrapper(fused_rnn_cell.FusedRNNCell):
     Raises:
       ValueError: in case of shape mismatches
     """
-    with vs.variable_scope(scope or "lstm_block_wrapper"):
-      is_list = isinstance(inputs, list)
-      if is_list:
-        inputs = array_ops.stack(inputs)
-      inputs_shape = inputs.get_shape().with_rank(3)
-      if not inputs_shape[2]:
-        raise ValueError("Expecting inputs_shape[2] to be set: %s" %
-                         inputs_shape)
-      batch_size = inputs_shape[1].value
-      if batch_size is None:
-        batch_size = array_ops.shape(inputs)[1]
-      time_len = inputs_shape[0].value
-      if time_len is None:
-        time_len = array_ops.shape(inputs)[0]
-
-      # Provide default values for initial_state and dtype
-      if initial_state is None:
-        if dtype is None:
-          raise ValueError(
-              "Either initial_state or dtype needs to be specified")
-        z = array_ops.zeros(
-            array_ops.stack([batch_size, self.num_units]), dtype=dtype)
-        initial_state = z, z
-      else:
-        if len(initial_state) != 2:
-          raise ValueError(
-              "Expecting initial_state to be a tuple with length 2 or None")
-        if dtype is None:
-          dtype = initial_state[0].dtype
-
-      # create the actual cell
-      if sequence_length is not None:
-        sequence_length = ops.convert_to_tensor(sequence_length)
-      initial_cell_state, initial_output = initial_state  # pylint: disable=unpacking-non-sequence
-      cell_states, outputs = self._call_cell(inputs, initial_cell_state,
-                                             initial_output, dtype,
-                                             sequence_length)
-
-      if sequence_length is not None:
-        # Mask out the part beyond sequence_length
-        mask = array_ops.transpose(
-            array_ops.sequence_mask(
-                sequence_length, time_len, dtype=dtype), [1, 0])
-        mask = array_ops.tile(
-            array_ops.expand_dims(mask, [-1]), [1, 1, self.num_units])
-        outputs *= mask
-        # Prepend initial states to cell_states and outputs for indexing to work
-        # correctly,since we want to access the last valid state at
-        # sequence_length - 1, which can even be -1, corresponding to the
-        # initial state.
-        mod_cell_states = array_ops.concat(
-            [array_ops.expand_dims(initial_cell_state, [0]), cell_states], 0)
-        mod_outputs = array_ops.concat(
-            [array_ops.expand_dims(initial_output, [0]), outputs], 0)
-        final_cell_state = self._gather_states(mod_cell_states, sequence_length,
-                                               batch_size)
-        final_output = self._gather_states(mod_outputs, sequence_length,
-                                           batch_size)
-      else:
-        # No sequence_lengths used: final state is the last state
-        final_cell_state = cell_states[-1]
-        final_output = outputs[-1]
-
-      if is_list:
-        # Input was a list, so return a list
-        outputs = array_ops.unstack(outputs)
-
-      final_state = rnn_cell_impl.LSTMStateTuple(final_cell_state, final_output)
-      return outputs, final_state
+    is_list = isinstance(inputs, list)
+    if is_list:
+      inputs = array_ops.stack(inputs)
+    inputs_shape = inputs.get_shape().with_rank(3)
+    if not inputs_shape[2]:
+      raise ValueError("Expecting inputs_shape[2] to be set: %s" % inputs_shape)
+    batch_size = inputs_shape[1].value
+    if batch_size is None:
+      batch_size = array_ops.shape(inputs)[1]
+    time_len = inputs_shape[0].value
+    if time_len is None:
+      time_len = array_ops.shape(inputs)[0]
+
+    # Provide default values for initial_state and dtype
+    if initial_state is None:
+      if dtype is None:
+        raise ValueError("Either initial_state or dtype needs to be specified")
+      z = array_ops.zeros(
+          array_ops.stack([batch_size, self.num_units]), dtype=dtype)
+      initial_state = z, z
+    else:
+      if len(initial_state) != 2:
+        raise ValueError(
+            "Expecting initial_state to be a tuple with length 2 or None")
+      if dtype is None:
+        dtype = initial_state[0].dtype
+
+    # create the actual cell
+    if sequence_length is not None:
+      sequence_length = ops.convert_to_tensor(sequence_length)
+    initial_cell_state, initial_output = initial_state  # pylint: disable=unpacking-non-sequence
+    cell_states, outputs = self._call_cell(
+        inputs, initial_cell_state, initial_output, dtype, sequence_length)
+
+    if sequence_length is not None:
+      # Mask out the part beyond sequence_length
+      mask = array_ops.transpose(
+          array_ops.sequence_mask(sequence_length, time_len, dtype=dtype),
+          [1, 0])
+      mask = array_ops.tile(
+          array_ops.expand_dims(mask, [-1]), [1, 1, self.num_units])
+      outputs *= mask
+      # Prepend initial states to cell_states and outputs for indexing to work
+      # correctly,since we want to access the last valid state at
+      # sequence_length - 1, which can even be -1, corresponding to the
+      # initial state.
+      mod_cell_states = array_ops.concat(
+          [array_ops.expand_dims(initial_cell_state, [0]), cell_states], 0)
+      mod_outputs = array_ops.concat(
+          [array_ops.expand_dims(initial_output, [0]), outputs], 0)
+      final_cell_state = self._gather_states(mod_cell_states, sequence_length,
+                                             batch_size)
+      final_output = self._gather_states(mod_outputs, sequence_length,
+                                         batch_size)
+    else:
+      # No sequence_lengths used: final state is the last state
+      final_cell_state = cell_states[-1]
+      final_output = outputs[-1]
+
+    if is_list:
+      # Input was a list, so return a list
+      outputs = array_ops.unstack(outputs)
+
+    final_state = rnn_cell_impl.LSTMStateTuple(final_cell_state, final_output)
+    return outputs, final_state
 
   def _gather_states(self, data, indices, batch_size):
     """Produce `out`, s.t. out(i, j) = data(indices(i), i, j)."""
@@ -589,7 +596,9 @@ class LSTMBlockFusedCell(LSTMBlockWrapper):
                num_units,
                forget_bias=1.0,
                cell_clip=None,
-               use_peephole=False):
+               use_peephole=False,
+               reuse=None,
+               name="lstm_fused_cell"):
     """Initialize the LSTM cell.
 
     Args:
@@ -597,19 +606,48 @@ class LSTMBlockFusedCell(LSTMBlockWrapper):
       forget_bias: float, The bias added to forget gates (see above).
       cell_clip: clip the cell to this value. Default is no cell clipping.
       use_peephole: Whether to use peephole connections or not.
+      reuse: (optional) boolean describing whether to reuse variables in an
+        existing scope.  If not `True`, and the existing scope already has the
+        given variables, an error is raised.
+      name: String, the name of the layer. Layers with the same name will
+        share weights, but to avoid mistakes we require reuse=True in such
+        cases.  By default this is "lstm_cell", for variable-name compatibility
+        with `tf.nn.rnn_cell.LSTMCell`.
     """
+    super(LSTMBlockFusedCell, self).__init__(_reuse=reuse, name=name)
     self._num_units = num_units
     self._forget_bias = forget_bias
     self._cell_clip = cell_clip if cell_clip is not None else -1
     self._use_peephole = use_peephole
 
+    # Inputs must be 3-dimensional.
+    self.input_spec = base_layer.InputSpec(ndim=3)
+
   @property
   def num_units(self):
     """Number of units in this cell (output dimension)."""
     return self._num_units
 
-  def _call_cell(self, inputs, initial_cell_state, initial_output, dtype,
-                 sequence_length):
+  def build(self, input_shape):
+    input_size = input_shape[2].value
+    self._kernel = self.add_variable(
+        "kernel", [input_size + self._num_units, self._num_units * 4])
+    self._bias = self.add_variable(
+        "bias", [self._num_units * 4],
+        initializer=init_ops.constant_initializer(0.0))
+    if self._use_peephole:
+      self._w_i_diag = self.add_variable("w_i_diag", [self._num_units])
+      self._w_f_diag = self.add_variable("w_f_diag", [self._num_units])
+      self._w_o_diag = self.add_variable("w_o_diag", [self._num_units])
+
+    self.built = True
+
+  def _call_cell(self,
+                 inputs,
+                 initial_cell_state=None,
+                 initial_output=None,
+                 dtype=None,
+                 sequence_length=None):
     """Run this LSTM on inputs, starting from the given state.
 
     Args:
@@ -636,18 +674,11 @@ class LSTMBlockFusedCell(LSTMBlockWrapper):
     time_len = inputs_shape[0].value
     if time_len is None:
       time_len = array_ops.shape(inputs)[0]
-    input_size = inputs_shape[2].value
-    w = vs.get_variable(
-        "kernel",
-        [input_size + self._num_units, self._num_units * 4], dtype=dtype)
-    b = vs.get_variable(
-        "bias", [w.get_shape().with_rank(2)[1]],
-        initializer=init_ops.constant_initializer(0.0),
-        dtype=dtype)
+
     if self._use_peephole:
-      wci = vs.get_variable("w_i_diag", [self._num_units], dtype=dtype)
-      wcf = vs.get_variable("w_f_diag", [self._num_units], dtype=dtype)
-      wco = vs.get_variable("w_o_diag", [self._num_units], dtype=dtype)
+      wci = self._w_i_diag
+      wco = self._w_o_diag
+      wcf = self._w_f_diag
     else:
       wci = wcf = wco = array_ops.zeros([self._num_units], dtype=dtype)
 
@@ -661,11 +692,11 @@ class LSTMBlockFusedCell(LSTMBlockWrapper):
         x=inputs,
         cs_prev=initial_cell_state,
         h_prev=initial_output,
-        w=w,
+        w=self._kernel,
         wci=wci,
         wcf=wcf,
         wco=wco,
-        b=b,
+        b=self._bias,
         forget_bias=self._forget_bias,
         cell_clip=self._cell_clip,
         use_peephole=self._use_peephole)
diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
index 91cb04daedf07ed60ff0a2c722c108ffb783a41b..e4667828cdaad627143efcb823eee39aec24fab7 100644
--- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py
+++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
@@ -28,6 +28,7 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import op_def_registry
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.layers import base as base_layer
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import clip_ops
 from tensorflow.python.ops import init_ops
@@ -1822,7 +1823,7 @@ class CompiledWrapper(rnn_cell_impl.RNNCell):
         return not _REGISTERED_OPS[node_def.op].is_stateful
 
     with jit.experimental_jit_scope(compile_ops=compile_ops):
-      return self._cell(inputs, state, scope)
+      return self._cell(inputs, state, scope=scope)
 
 
 def _random_exp_initializer(minval,
@@ -2630,3 +2631,95 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
 
     new_state = (rnn_cell_impl.LSTMStateTuple(c, m))
     return m, new_state
+
+
+class SRUCell(rnn_cell_impl._LayerRNNCell):
+  """SRU, Simple Recurrent Unit
+     Implementation based on
+     Training RNNs as Fast as CNNs (cf. https://arxiv.org/abs/1709.02755).
+
+     This variation of RNN cell is characterized by the simplified data dependence
+     between hidden states of two consecutive time steps. Traditionally, hidden
+     states from a cell at time step t-1 needs to be multiplied with a matrix
+     W_hh before being fed into the ensuing cell at time step t.
+     This flavor of RNN replaces the matrix multiplication between h_{t-1}
+     and W_hh with a pointwise multiplication, resulting in performance
+     gain.
+
+  Args:
+    num_units: int, The number of units in the SRU cell.
+    activation: Nonlinearity to use.  Default: `tanh`.
+    reuse: (optional) Python boolean describing whether to reuse variables
+      in an existing scope.  If not `True`, and the existing scope already has
+      the given variables, an error is raised.
+    name: (optional) String, the name of the layer. Layers with the same name
+      will share weights, but to avoid mistakes we require reuse=True in such
+      cases.
+  """
+  def __init__(self, num_units,
+               activation=None, reuse=None, name=None):
+    super(SRUCell, self).__init__(_reuse=reuse, name=name)
+    self._num_units = num_units
+    self._activation = activation or math_ops.tanh
+
+    # Restrict inputs to be 2-dimensional matrices
+    self.input_spec = base_layer.InputSpec(ndim=2)
+
+  @property
+  def state_size(self):
+    return self._num_units
+
+  @property
+  def output_size(self):
+    return self._num_units
+
+  def build(self, inputs_shape):
+    if inputs_shape[1].value is None:
+      raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
+                       % inputs_shape)
+
+    input_depth = inputs_shape[1].value
+
+    # Here the contributor believes that the following constraints
+    # are implied. The reasoning is explained here with reference to
+    # the paper https://arxiv.org/pdf/1709.02755.pdf upon which this
+    # implementation is based.
+    # In section 2.1 Equation 5, specifically:
+    # h_t = r_t \odot g(c_t) + (1 - r_t) \odot x_t
+    # the pointwise operation between r_t and x_t means they have
+    # the same shape (since we are implementing an RNN cell, braodcasting
+    # does not happen to input of a single timestep); by the same
+    # reasons, x_t has the same shape as h_t, essentially mandating that
+    # input_depth = unit_num.
+    if input_depth != self._num_units:
+      raise ValueError("SRU requires input_depth == num_units, got "
+                       "input_depth = %s, num_units = %s" % (input_depth,
+                                                             self._num_units))
+
+    self._kernel = self.add_variable(
+        rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
+        shape=[input_depth, 3 * self._num_units])
+
+    self._bias = self.add_variable(
+        rnn_cell_impl._BIAS_VARIABLE_NAME,
+        shape=[2 * self._num_units],
+        initializer=init_ops.constant_initializer(0.0, dtype=self.dtype))
+
+    self._built = True
+
+  def call(self, inputs, state):
+    """Simple recurrent unit (SRU) with num_units cells."""
+
+    U = math_ops.matmul(inputs, self._kernel)
+    x_bar, f_intermediate, r_intermediate = array_ops.split(value=U,
+                                                            num_or_size_splits=3,
+                                                            axis=1)
+
+    f_r = math_ops.sigmoid(nn_ops.bias_add(array_ops.concat(
+        [f_intermediate, r_intermediate], 1), self._bias))
+    f, r = array_ops.split(value=f_r, num_or_size_splits=2, axis=1)
+
+    c = f * state + (1.0 - f) * x_bar
+    h = r * self._activation(c) + (1.0 - r) * inputs
+
+    return h, c
diff --git a/tensorflow/contrib/rnn/python/tools/checkpoint_convert.py b/tensorflow/contrib/rnn/python/tools/checkpoint_convert.py
index 5536a01328676e5fe01251fefdaaecb0f9569918..460e172a6d949804319b8833e34b6590f5fcf93b 100644
--- a/tensorflow/contrib/rnn/python/tools/checkpoint_convert.py
+++ b/tensorflow/contrib/rnn/python/tools/checkpoint_convert.py
@@ -128,10 +128,8 @@ RNN_NAME_REPLACEMENTS = collections.OrderedDict([
      'attention_cell_wrapper/attention/bias'),
     ############################################################################
     # contrib/legacy_seq2seq/python/ops/seq2seq.py
-    ('attention_decoder/weights',
-     'attention_decoder/kernel'),
-    ('attention_decoder/biases',
-     'attention_decoder/bias'),
+    ('attention_decoder/weights', 'attention_decoder/kernel'),
+    ('attention_decoder/biases', 'attention_decoder/bias'),
     ('attention_decoder/Attention_0/weights',
      'attention_decoder/Attention_0/kernel'),
     ('attention_decoder/Attention_0/biases',
@@ -140,6 +138,19 @@ RNN_NAME_REPLACEMENTS = collections.OrderedDict([
      'attention_decoder/AttnOutputProjection/kernel'),
     ('attention_decoder/AttnOutputProjection/biases',
      'attention_decoder/AttnOutputProjection/bias'),
+    # contrib/legacy_seq2seq/python/ops/seq2seq.py before cl/140060366
+    ('attention_decoder/Attention_0/Linear/Bias',
+     'attention_decoder/Attention_0/bias'),
+    ('attention_decoder/Attention_0/Linear/Matrix',
+     'attention_decoder/Attention_0/kernel'),
+    ('attention_decoder/AttnOutputProjection/Linear/Bias',
+     'attention_decoder/AttnOutputProjection/bias'),
+    ('attention_decoder/AttnOutputProjection/Linear/Matrix',
+     'attention_decoder/AttnOutputProjection/kernel'),
+    ('attention_decoder/LSTMCell/B', 'attention_decoder/lstm_cell/bias'),
+    ('attention_decoder/LSTMCell/W_0', 'attention_decoder/lstm_cell/kernel'),
+    ('attention_decoder/Linear/Bias', 'attention_decoder/bias'),
+    ('attention_decoder/Linear/Matrix', 'attention_decoder/kernel')
 ])
 
 _RNN_SHARDED_NAME_REPLACEMENTS = collections.OrderedDict([
diff --git a/tensorflow/contrib/rnn/python/tools/checkpoint_convert_test.py b/tensorflow/contrib/rnn/python/tools/checkpoint_convert_test.py
index a9e79494639418c22b7380b5b78092052fbf305d..b4785ee395a2452d9595d81c3bdb88711a8fe66a 100644
--- a/tensorflow/contrib/rnn/python/tools/checkpoint_convert_test.py
+++ b/tensorflow/contrib/rnn/python/tools/checkpoint_convert_test.py
@@ -67,7 +67,7 @@ class CheckpointConvertTest(test.TestCase):
         self._old_ckpt_path, self._new_ckpt_path)
     self.assertTrue(glob.glob(self._new_ckpt_path + "*"))
     self.assertItemsEqual(
-        ["a"] + list(checkpoint_convert.RNN_NAME_REPLACEMENTS.values()),
+        set(checkpoint_convert.RNN_NAME_REPLACEMENTS.values()).union(["a"]),
         new_var_map.keys())
     self.assertEqual(checkpoint_convert.RNN_NAME_REPLACEMENTS, conversion_map)
 
diff --git a/tensorflow/contrib/saved_model/BUILD b/tensorflow/contrib/saved_model/BUILD
index 20be819e07d0e47a0b24b5cc2548727322093e50..245fe07f2bcdaddb2bc47c0e1234dc1f19bd85e3 100644
--- a/tensorflow/contrib/saved_model/BUILD
+++ b/tensorflow/contrib/saved_model/BUILD
@@ -82,22 +82,6 @@ py_test(
     ],
 )
 
-py_test(
-    name = "utils_test",
-    size = "small",
-    srcs = ["python/saved_model/utils_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":saved_model_py",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:variables",
-        "//tensorflow/python/saved_model:loader",
-        "//tensorflow/python/saved_model:signature_constants",
-        "//tensorflow/python/saved_model:tag_constants",
-    ],
-)
-
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/__init__.py b/tensorflow/contrib/seq2seq/python/kernel_tests/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..52e83069cb0c68b510da46149248369dce376647 100644
--- a/tensorflow/contrib/seq2seq/python/kernel_tests/__init__.py
+++ b/tensorflow/contrib/seq2seq/python/kernel_tests/__init__.py
@@ -0,0 +1,18 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py
index 01a5540121ae9ebf22de0493daadff6c7710d29a..b427dff88b2d586ccf8c512bb498cdaf879ac781 100644
--- a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py
+++ b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py
@@ -69,7 +69,7 @@ class AttentionWrapperTest(test.TestCase):
   def assertAllCloseOrEqual(self, x, y, **kwargs):
     if isinstance(x, np.ndarray) or isinstance(x, float):
       return super(AttentionWrapperTest, self).assertAllClose(
-          x, y, atol=1e-4, **kwargs)
+          x, y, atol=1e-3, **kwargs)
     else:
       self.assertAllEqual(x, y, **kwargs)
 
@@ -80,6 +80,28 @@ class AttentionWrapperTest(test.TestCase):
     self.assertEqual(state.time, None)
     self.assertEqual(new_state.time, 1)
 
+  def testAttentionWrapperStateShapePropgation(self):
+    batch_size = 5
+    max_time = 5
+    num_units = 5
+
+    memory = random_ops.random_uniform(
+        [batch_size, max_time, num_units], seed=1)
+    mechanism = wrapper.LuongAttention(num_units, memory)
+    cell = wrapper.AttentionWrapper(rnn_cell.LSTMCell(num_units), mechanism)
+
+    # Create zero state with static batch size.
+    static_state = cell.zero_state(batch_size, dtypes.float32)
+    # Create zero state without static batch size.
+    state = cell.zero_state(array_ops.shape(memory)[0], dtypes.float32)
+
+    state = static_state.clone(
+        cell_state=state.cell_state, attention=state.attention)
+
+    self.assertEqual(state.cell_state.c.shape, static_state.cell_state.c.shape)
+    self.assertEqual(state.cell_state.h.shape, static_state.cell_state.h.shape)
+    self.assertEqual(state.attention.shape, static_state.attention.shape)
+
   def _testWithAttention(self,
                          create_attention_mechanism,
                          expected_final_output,
@@ -254,6 +276,8 @@ class AttentionWrapperTest(test.TestCase):
         time=3,
         alignments=ResultSummary(
             shape=(5, 8), dtype=dtype('float32'), mean=0.125),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=dtype('float32'), mean=0.125),
         alignment_history=())
     expected_final_alignment_history = ResultSummary(
         shape=(3, 5, 8), dtype=dtype('float32'), mean=0.12500001)
@@ -274,7 +298,7 @@ class AttentionWrapperTest(test.TestCase):
         rnn_output=ResultSummary(
             shape=(5, 3, 6), dtype=dtype('float32'), mean=-0.00597103),
         sample_id=ResultSummary(
-            shape=(5, 3), dtype=dtype('int32'), mean=1.4))
+            shape=(5, 3), dtype=dtype('int32'), mean=1.6))
     expected_final_state = AttentionWrapperState(
         cell_state=LSTMStateTuple(
             c=ResultSummary(
@@ -286,6 +310,8 @@ class AttentionWrapperTest(test.TestCase):
         time=3,
         alignments=ResultSummary(
             shape=(5, 8), dtype=dtype('float32'), mean=0.125),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=dtype('float32'), mean=0.125),
         alignment_history=())
 
     self._testWithAttention(
@@ -301,7 +327,7 @@ class AttentionWrapperTest(test.TestCase):
         rnn_output=ResultSummary(
             shape=(5, 3, 6), dtype=dtype('float32'), mean=-0.0052615386),
         sample_id=ResultSummary(
-            shape=(5, 3), dtype=dtype('int32'), mean=1.4666666666666666))
+            shape=(5, 3), dtype=dtype('int32'), mean=1.3333333333))
     expected_final_state = AttentionWrapperState(
         cell_state=LSTMStateTuple(
             c=ResultSummary(
@@ -313,6 +339,8 @@ class AttentionWrapperTest(test.TestCase):
         time=3,
         alignments=ResultSummary(
             shape=(5, 8), dtype=dtype('float32'), mean=0.125),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=dtype('float32'), mean=0.125),
         alignment_history=())
 
     self._testWithAttention(
@@ -330,7 +358,7 @@ class AttentionWrapperTest(test.TestCase):
         rnn_output=ResultSummary(
             shape=(5, 3, 6), dtype=dtype('float32'), mean=-0.0052615386),
         sample_id=ResultSummary(
-            shape=(5, 3), dtype=dtype('int32'), mean=1.4666666666666666))
+            shape=(5, 3), dtype=dtype('int32'), mean=1.3333333333333333))
     expected_final_state = AttentionWrapperState(
         cell_state=LSTMStateTuple(
             c=ResultSummary(
@@ -342,6 +370,8 @@ class AttentionWrapperTest(test.TestCase):
         time=3,
         alignments=ResultSummary(
             shape=(5, 8), dtype=dtype('float32'), mean=0.125),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=dtype('float32'), mean=0.125),
         alignment_history=())
 
     self._testWithAttention(
@@ -370,6 +400,8 @@ class AttentionWrapperTest(test.TestCase):
         time=3,
         alignments=ResultSummary(
             shape=(5, 8), dtype=dtype('float32'), mean=0.125),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=dtype('float32'), mean=0.125),
         alignment_history=())
 
     self._testWithAttention(
@@ -545,6 +577,8 @@ class AttentionWrapperTest(test.TestCase):
         time=3,
         alignments=ResultSummary(
             shape=(5, 8), dtype=dtype('float32'), mean=0.032228071),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=dtype('float32'), mean=0.032228071),
         alignment_history=())
     expected_final_alignment_history = ResultSummary(
         shape=(3, 5, 8), dtype=dtype('float32'), mean=0.050430927)
@@ -566,7 +600,7 @@ class AttentionWrapperTest(test.TestCase):
         rnn_output=ResultSummary(
             shape=(5, 3, 6), dtype=dtype('float32'), mean=-0.0025896581),
         sample_id=ResultSummary(
-            shape=(5, 3), dtype=dtype('int32'), mean=1.8666666666666667))
+            shape=(5, 3), dtype=dtype('int32'), mean=1.6))
     expected_final_state = AttentionWrapperState(
         cell_state=LSTMStateTuple(
             c=ResultSummary(
@@ -578,9 +612,11 @@ class AttentionWrapperTest(test.TestCase):
         time=3,
         alignments=ResultSummary(
             shape=(5, 8), dtype=dtype('float32'), mean=0.028698336),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=dtype('float32'), mean=0.028698336),
         alignment_history=())
     expected_final_alignment_history = ResultSummary(
-        shape=(3, 5, 8), dtype=dtype('float32'), mean=0.046009291)
+        shape=(3, 5, 8), dtype=dtype('float32'), mean=0.04865776002407074)
 
     self._testWithAttention(
         create_attention_mechanism,
@@ -599,7 +635,8 @@ class AttentionWrapperTest(test.TestCase):
           random_ops.random_normal((b, t, u)),
           mode='hard')
       # Just feed previous attention as [1, 0, 0, ...]
-      attn = a(random_ops.random_normal((b, d)), array_ops.one_hot([0]*b, t))
+      attn, unused_state = a(
+          random_ops.random_normal((b, d)), array_ops.one_hot([0]*b, t))
       sess.run(variables.global_variables_initializer())
       attn_out = attn.eval()
       # All values should be 0 or 1
@@ -629,6 +666,8 @@ class AttentionWrapperTest(test.TestCase):
         time=3,
         alignments=ResultSummary(
             shape=(5, 8), dtype=dtype('float32'), mean=0.032198936),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=dtype('float32'), mean=0.032198936),
         alignment_history=())
     expected_final_alignment_history = ResultSummary(
         shape=(3, 5, 8), dtype=dtype('float32'), mean=0.050387777)
@@ -663,6 +702,8 @@ class AttentionWrapperTest(test.TestCase):
         time=3,
         alignments=ResultSummary(
             shape=(5, 8), dtype=dtype('float32'), mean=0.032198936),
+        attention_state=ResultSummary(
+            shape=(5, 8), dtype=dtype('float32'), mean=0.032198936),
         alignment_history=())
     expected_final_alignment_history = ResultSummary(
         shape=(3, 5, 8), dtype=dtype('float32'), mean=0.050387777)
@@ -697,6 +738,9 @@ class AttentionWrapperTest(test.TestCase):
         alignments=(
             ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125),
             ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125)),
+        attention_state=(
+            ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125),
+            ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125)),
         alignment_history=())
 
     expected_final_alignment_history = (
@@ -723,7 +767,8 @@ class AttentionWrapperTest(test.TestCase):
           random_ops.random_normal((b, t, u)),
           mode='hard')
       # Just feed previous attention as [1, 0, 0, ...]
-      attn = a(random_ops.random_normal((b, d)), array_ops.one_hot([0]*b, t))
+      attn, unused_state = a(
+          random_ops.random_normal((b, d)), array_ops.one_hot([0]*b, t))
       sess.run(variables.global_variables_initializer())
       attn_out = attn.eval()
       # All values should be 0 or 1
@@ -738,9 +783,9 @@ class AttentionWrapperTest(test.TestCase):
 
     expected_final_output = BasicDecoderOutput(
         rnn_output=ResultSummary(
-            shape=(5, 3, 20), dtype=dtype('float32'), mean=0.11691988),
+            shape=(5, 3, 20), dtype=dtype('float32'), mean=0.11798714846372604),
         sample_id=ResultSummary(
-            shape=(5, 3), dtype=dtype('int32'), mean=7.2666666666666666))
+            shape=(5, 3), dtype=dtype('int32'), mean=7.933333333333334))
     expected_final_state = AttentionWrapperState(
         cell_state=LSTMStateTuple(
             c=ResultSummary(
@@ -748,11 +793,14 @@ class AttentionWrapperTest(test.TestCase):
             h=ResultSummary(
                 shape=(5, 9), dtype=dtype('float32'), mean=-0.0018835809)),
         attention=ResultSummary(
-            shape=(5, 20), dtype=dtype('float32'), mean=0.11680689),
+            shape=(5, 20), dtype=dtype('float32'), mean=0.11798714846372604),
         time=3,
         alignments=(
             ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125),
             ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125)),
+        attention_state=(
+            ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125),
+            ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125)),
         alignment_history=())
     expected_final_alignment_history = (
         ResultSummary(shape=(3, 5, 8), dtype=dtype('float32'), mean=0.125),
@@ -787,6 +835,8 @@ class AttentionWrapperTest(test.TestCase):
         time=3,
         alignments=(
             ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125),),
+        attention_state=(
+            ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125),),
         alignment_history=())
 
     expected_final_alignment_history = (
diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_decoder_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_decoder_test.py
index d2beac5f31460ec1c0d978a9f6fcd0e0f09cb9b4..f498b2bb5709ea28faca1c5cfa21ad30aac14ab7 100644
--- a/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_decoder_test.py
+++ b/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_decoder_test.py
@@ -225,6 +225,94 @@ class TestBeamStep(test.TestCase):
     self.assertAllEqual(next_state_.log_probs, expected_log_probs)
 
 
+class TestLargeBeamStep(test.TestCase):
+  """
+  Tests a single step of beam search in such
+  case that beam size is larger than vocabulary size.
+  """
+
+  def setUp(self):
+    super(TestLargeBeamStep, self).setUp()
+    self.batch_size = 2
+    self.beam_width = 8
+    self.vocab_size = 5
+    self.end_token = 0
+    self.length_penalty_weight = 0.6
+
+
+  def test_step(self):
+    def get_probs():
+      """this simulates the initialize method in BeamSearchDecoder"""
+      log_prob_mask = array_ops.one_hot(array_ops.zeros([self.batch_size],
+                                                        dtype=dtypes.int32),
+                                        depth=self.beam_width, on_value=True,
+                                        off_value=False, dtype=dtypes.bool)
+
+      log_prob_zeros = array_ops.zeros([self.batch_size, self.beam_width],
+                                       dtype=dtypes.float32)
+      log_prob_neg_inf = array_ops.ones([self.batch_size, self.beam_width],
+                                        dtype=dtypes.float32) * -np.Inf
+
+      log_probs = array_ops.where(log_prob_mask, log_prob_zeros,
+                                  log_prob_neg_inf)
+      return log_probs
+
+    log_probs = get_probs()
+    dummy_cell_state = array_ops.zeros([self.batch_size, self.beam_width])
+
+    _finished = array_ops.one_hot(
+        array_ops.zeros([self.batch_size], dtype=dtypes.int32),
+        depth=self.beam_width, on_value=False,
+        off_value=True, dtype=dtypes.bool)
+    _lengths = np.zeros([self.batch_size, self.beam_width], dtype=np.int64)
+    _lengths[:, 0]=2
+    _lengths = constant_op.constant(_lengths, dtype=dtypes.int64)
+
+    beam_state = beam_search_decoder.BeamSearchDecoderState(
+        cell_state=dummy_cell_state,
+        log_probs=log_probs,
+        lengths=_lengths,
+        finished=_finished)
+
+    logits_ = np.full([self.batch_size, self.beam_width, self.vocab_size],
+                      0.0001)
+    logits_[0, 0, 2] = 1.9
+    logits_[0, 0, 3] = 2.1
+    logits_[0, 1, 3] = 3.1
+    logits_[0, 1, 4] = 0.9
+    logits_[1, 0, 1] = 0.5
+    logits_[1, 1, 2] = 2.7
+    logits_[1, 2, 2] = 10.0
+    logits_[1, 2, 3] = 0.2
+    logits = constant_op.constant(logits_, dtype=dtypes.float32)
+    log_probs = nn_ops.log_softmax(logits)
+
+    outputs, next_beam_state = beam_search_decoder._beam_search_step(
+        time=2,
+        logits=logits,
+        next_cell_state=dummy_cell_state,
+        beam_state=beam_state,
+        batch_size=ops.convert_to_tensor(self.batch_size),
+        beam_width=self.beam_width,
+        end_token=self.end_token,
+        length_penalty_weight=self.length_penalty_weight)
+
+    with self.test_session() as sess:
+      outputs_, next_state_, state_, log_probs_ = sess.run(
+          [outputs, next_beam_state, beam_state, log_probs])
+
+    self.assertEqual(outputs_.predicted_ids[0, 0], 3)
+    self.assertEqual(outputs_.predicted_ids[0, 1], 2)
+    self.assertEqual(outputs_.predicted_ids[1, 0], 1)
+    neg_inf = -np.Inf
+    self.assertAllEqual(next_state_.log_probs[:, -3:],
+                        [[neg_inf, neg_inf, neg_inf],
+                         [neg_inf, neg_inf, neg_inf]])
+    self.assertEqual((next_state_.log_probs[:, :-3] > neg_inf).all(), True)
+    self.assertEqual((next_state_.lengths[:, :-3] > 0).all(), True)
+    self.assertAllEqual(next_state_.lengths[:, -3:], [[0, 0, 0],
+                                                      [0, 0, 0]])
+
 class BeamSearchDecoderTest(test.TestCase):
 
   def _testDynamicDecodeRNN(self, time_major, has_attention):
diff --git a/tensorflow/contrib/seq2seq/python/ops/__init__.py b/tensorflow/contrib/seq2seq/python/ops/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..52e83069cb0c68b510da46149248369dce376647 100644
--- a/tensorflow/contrib/seq2seq/python/ops/__init__.py
+++ b/tensorflow/contrib/seq2seq/python/ops/__init__.py
@@ -0,0 +1,18 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
index e87ef413880e37e553c604ec8cfbaef307569682..95dea312f3a4e77176a4bc4af290ad48c078deda 100644
--- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
+++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
@@ -24,6 +24,7 @@ import math
 
 import numpy as np
 
+from tensorflow.contrib.framework.python.framework import tensor_util
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
@@ -61,7 +62,14 @@ _zero_state_tensors = rnn_cell_impl._zero_state_tensors  # pylint: disable=prote
 
 
 class AttentionMechanism(object):
-  pass
+
+  @property
+  def alignments_size(self):
+    raise NotImplementedError
+
+  @property
+  def state_size(self):
+    raise NotImplementedError
 
 
 def _prepare_memory(memory, memory_sequence_length, check_inner_dims_defined):
@@ -161,7 +169,7 @@ class _BaseAttentionMechanism(AttentionMechanism):
         tensor should be shaped `[batch_size, max_time, ...]`.
       probability_fn: A `callable`.  Converts the score and previous alignments
         to probabilities. Its signature should be:
-        `probabilities = probability_fn(score, previous_alignments)`.
+        `probabilities = probability_fn(score, state)`.
       memory_sequence_length (optional): Sequence lengths for the batch entries
         in memory.  If provided, the memory tensor rows are masked with zeros
         for values past the respective sequence lengths.
@@ -235,6 +243,10 @@ class _BaseAttentionMechanism(AttentionMechanism):
   def alignments_size(self):
     return self._alignments_size
 
+  @property
+  def state_size(self):
+    return self._alignments_size
+
   def initial_alignments(self, batch_size, dtype):
     """Creates the initial alignment values for the `AttentionWrapper` class.
 
@@ -254,6 +266,23 @@ class _BaseAttentionMechanism(AttentionMechanism):
     max_time = self._alignments_size
     return _zero_state_tensors(max_time, batch_size, dtype)
 
+  def initial_state(self, batch_size, dtype):
+    """Creates the initial state values for the `AttentionWrapper` class.
+
+    This is important for AttentionMechanisms that use the previous alignment
+    to calculate the alignment at the next time step (e.g. monotonic attention).
+
+    The default behavior is to return the same output as initial_alignments.
+
+    Args:
+      batch_size: `int32` scalar, the batch_size.
+      dtype: The `dtype`.
+
+    Returns:
+      A structure of all-zero tensors with shapes as described by `state_size`.
+    """
+    return self.initial_alignments(batch_size, dtype)
+
 
 def _luong_score(query, keys, scale):
   """Implements Luong-style (multiplicative) scoring function.
@@ -381,13 +410,13 @@ class LuongAttention(_BaseAttentionMechanism):
     self._scale = scale
     self._name = name
 
-  def __call__(self, query, previous_alignments):
+  def __call__(self, query, state):
     """Score the query based on the keys and values.
 
     Args:
       query: Tensor of dtype matching `self.values` and shape
         `[batch_size, query_depth]`.
-      previous_alignments: Tensor of dtype matching `self.values` and shape
+      state: Tensor of dtype matching `self.values` and shape
         `[batch_size, alignments_size]`
         (`alignments_size` is memory's `max_time`).
 
@@ -398,8 +427,9 @@ class LuongAttention(_BaseAttentionMechanism):
     """
     with variable_scope.variable_scope(None, "luong_attention", [query]):
       score = _luong_score(query, self._keys, self._scale)
-    alignments = self._probability_fn(score, previous_alignments)
-    return alignments
+    alignments = self._probability_fn(score, state)
+    next_state = alignments
+    return alignments, next_state
 
 
 def _bahdanau_score(processed_query, keys, normalize):
@@ -526,13 +556,13 @@ class BahdanauAttention(_BaseAttentionMechanism):
     self._normalize = normalize
     self._name = name
 
-  def __call__(self, query, previous_alignments):
+  def __call__(self, query, state):
     """Score the query based on the keys and values.
 
     Args:
       query: Tensor of dtype matching `self.values` and shape
         `[batch_size, query_depth]`.
-      previous_alignments: Tensor of dtype matching `self.values` and shape
+      state: Tensor of dtype matching `self.values` and shape
         `[batch_size, alignments_size]`
         (`alignments_size` is memory's `max_time`).
 
@@ -544,8 +574,9 @@ class BahdanauAttention(_BaseAttentionMechanism):
     with variable_scope.variable_scope(None, "bahdanau_attention", [query]):
       processed_query = self.query_layer(query) if self.query_layer else query
       score = _bahdanau_score(processed_query, self._keys, self._normalize)
-    alignments = self._probability_fn(score, previous_alignments)
-    return alignments
+    alignments = self._probability_fn(score, state)
+    next_state = alignments
+    return alignments, next_state
 
 
 def safe_cumprod(x, *args, **kwargs):
@@ -805,13 +836,13 @@ class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism):
     self._name = name
     self._score_bias_init = score_bias_init
 
-  def __call__(self, query, previous_alignments):
+  def __call__(self, query, state):
     """Score the query based on the keys and values.
 
     Args:
       query: Tensor of dtype matching `self.values` and shape
         `[batch_size, query_depth]`.
-      previous_alignments: Tensor of dtype matching `self.values` and shape
+      state: Tensor of dtype matching `self.values` and shape
         `[batch_size, alignments_size]`
         (`alignments_size` is memory's `max_time`).
 
@@ -828,8 +859,9 @@ class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism):
           "attention_score_bias", dtype=processed_query.dtype,
           initializer=self._score_bias_init)
       score += score_bias
-    alignments = self._probability_fn(score, previous_alignments)
-    return alignments
+    alignments = self._probability_fn(score, state)
+    next_state = alignments
+    return alignments, next_state
 
 
 class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism):
@@ -906,13 +938,13 @@ class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism):
     self._score_bias_init = score_bias_init
     self._name = name
 
-  def __call__(self, query, previous_alignments):
+  def __call__(self, query, state):
     """Score the query based on the keys and values.
 
     Args:
       query: Tensor of dtype matching `self.values` and shape
         `[batch_size, query_depth]`.
-      previous_alignments: Tensor of dtype matching `self.values` and shape
+      state: Tensor of dtype matching `self.values` and shape
         `[batch_size, alignments_size]`
         (`alignments_size` is memory's `max_time`).
 
@@ -928,14 +960,15 @@ class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism):
           "attention_score_bias", dtype=query.dtype,
           initializer=self._score_bias_init)
       score += score_bias
-    alignments = self._probability_fn(score, previous_alignments)
-    return alignments
+    alignments = self._probability_fn(score, state)
+    next_state = alignments
+    return alignments, next_state
 
 
 class AttentionWrapperState(
     collections.namedtuple("AttentionWrapperState",
                            ("cell_state", "attention", "time", "alignments",
-                            "alignment_history"))):
+                            "alignment_history", "attention_state"))):
   """`namedtuple` storing the state of a `AttentionWrapper`.
 
   Contains:
@@ -949,11 +982,18 @@ class AttentionWrapperState(
     - `alignment_history`: (if enabled) a single or tuple of `TensorArray`(s)
        containing alignment matrices from all time steps for each attention
        mechanism. Call `stack()` on each to convert to a `Tensor`.
+    - `attention_state`: A single or tuple of nested objects
+       containing attention mechanism state for each attention mechanism.
+       The objects may contain Tensors or TensorArrays.
   """
 
   def clone(self, **kwargs):
     """Clone this object, overriding components provided by kwargs.
 
+    The new state fields' shape must match original state fields' shape. This
+    will be validated, and original fields' shape will be propagated to new
+    fields.
+
     Example:
 
     ```python
@@ -969,7 +1009,16 @@ class AttentionWrapperState(
       A new `AttentionWrapperState` whose properties are the same as
       this one, except any overridden properties as provided in `kwargs`.
     """
-    return super(AttentionWrapperState, self)._replace(**kwargs)
+    def with_same_shape(old, new):
+      """Check and set new tensor's shape."""
+      if isinstance(old, ops.Tensor) and isinstance(new, ops.Tensor):
+        return tensor_util.with_same_shape(old, new)
+      return new
+
+    return nest.map_structure(
+        with_same_shape,
+        self,
+        super(AttentionWrapperState, self)._replace(**kwargs))
 
 
 def hardmax(logits, name=None):
@@ -993,11 +1042,11 @@ def hardmax(logits, name=None):
         math_ops.argmax(logits, -1), depth, dtype=logits.dtype)
 
 
-def _compute_attention(attention_mechanism, cell_output, previous_alignments,
+def _compute_attention(attention_mechanism, cell_output, attention_state,
                        attention_layer):
   """Computes the attention and alignments for a given attention_mechanism."""
-  alignments = attention_mechanism(
-      cell_output, previous_alignments=previous_alignments)
+  alignments, next_attention_state = attention_mechanism(
+      cell_output, state=attention_state)
 
   # Reshape from [batch_size, memory_time] to [batch_size, 1, memory_time]
   expanded_alignments = array_ops.expand_dims(alignments, 1)
@@ -1018,7 +1067,7 @@ def _compute_attention(attention_mechanism, cell_output, previous_alignments,
   else:
     attention = context
 
-  return attention, alignments
+  return attention, alignments, next_attention_state
 
 
 class AttentionWrapper(rnn_cell_impl.RNNCell):
@@ -1229,6 +1278,8 @@ class AttentionWrapper(rnn_cell_impl.RNNCell):
         attention=self._attention_layer_size,
         alignments=self._item_or_tuple(
             a.alignments_size for a in self._attention_mechanisms),
+        attention_state=self._item_or_tuple(
+            a.state_size for a in self._attention_mechanisms),
         alignment_history=self._item_or_tuple(
             () for _ in self._attention_mechanisms))  # sometimes a TensorArray
 
@@ -1278,6 +1329,9 @@ class AttentionWrapper(rnn_cell_impl.RNNCell):
           alignments=self._item_or_tuple(
               attention_mechanism.initial_alignments(batch_size, dtype)
               for attention_mechanism in self._attention_mechanisms),
+          attention_state=self._item_or_tuple(
+              attention_mechanism.initial_state(batch_size, dtype)
+              for attention_mechanism in self._attention_mechanisms),
           alignment_history=self._item_or_tuple(
               tensor_array_ops.TensorArray(dtype=dtype, size=0,
                                            dynamic_size=True)
@@ -1339,33 +1393,36 @@ class AttentionWrapper(rnn_cell_impl.RNNCell):
           cell_output, name="checked_cell_output")
 
     if self._is_multi:
-      previous_alignments = state.alignments
+      previous_attention_state = state.attention_state
       previous_alignment_history = state.alignment_history
     else:
-      previous_alignments = [state.alignments]
+      previous_attention_state = [state.attention_state]
       previous_alignment_history = [state.alignment_history]
 
     all_alignments = []
     all_attentions = []
-    all_histories = []
+    all_attention_states = []
+    maybe_all_histories = []
     for i, attention_mechanism in enumerate(self._attention_mechanisms):
-      attention, alignments = _compute_attention(
-          attention_mechanism, cell_output, previous_alignments[i],
+      attention, alignments, next_attention_state = _compute_attention(
+          attention_mechanism, cell_output, previous_attention_state[i],
           self._attention_layers[i] if self._attention_layers else None)
       alignment_history = previous_alignment_history[i].write(
           state.time, alignments) if self._alignment_history else ()
 
+      all_attention_states.append(next_attention_state)
       all_alignments.append(alignments)
-      all_histories.append(alignment_history)
       all_attentions.append(attention)
+      maybe_all_histories.append(alignment_history)
 
     attention = array_ops.concat(all_attentions, 1)
     next_state = AttentionWrapperState(
         time=state.time + 1,
         cell_state=next_cell_state,
         attention=attention,
+        attention_state=self._item_or_tuple(all_attention_states),
         alignments=self._item_or_tuple(all_alignments),
-        alignment_history=self._item_or_tuple(all_histories))
+        alignment_history=self._item_or_tuple(maybe_all_histories))
 
     if self._output_attention:
       return attention, next_state
diff --git a/tensorflow/contrib/seq2seq/python/ops/basic_decoder.py b/tensorflow/contrib/seq2seq/python/ops/basic_decoder.py
index c7c4182f0d9a17dacebc1cda693cda6eaaf8451f..ed226239b860e2250072a28a5538b816642ec54b 100644
--- a/tensorflow/contrib/seq2seq/python/ops/basic_decoder.py
+++ b/tensorflow/contrib/seq2seq/python/ops/basic_decoder.py
@@ -90,7 +90,7 @@ class BasicDecoder(decoder.Decoder):
       output_shape_with_unknown_batch = nest.map_structure(
           lambda s: tensor_shape.TensorShape([None]).concatenate(s),
           size)
-      layer_output_shape = self._output_layer._compute_output_shape(  # pylint: disable=protected-access
+      layer_output_shape = self._output_layer.compute_output_shape(
           output_shape_with_unknown_batch)
       return nest.map_structure(lambda s: s[1:], layer_output_shape)
 
diff --git a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py
index 5be0c92243da10af438be97fab982515266be1de..a5f7169c3106d12cd22e822dca96c6adf43a45fe 100644
--- a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py
+++ b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py
@@ -19,7 +19,6 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
-
 import numpy as np
 
 from tensorflow.contrib.seq2seq.python.ops import beam_search_ops
@@ -67,7 +66,8 @@ class FinalBeamSearchDecoderOutput(
 
   Args:
     predicted_ids: The final prediction. A tensor of shape
-      `[T, batch_size, beam_width]`.
+      `[batch_size, T, beam_width]` (or `[T, batch_size, beam_width]` if
+      `output_time_major` is True). Beams are ordered from best to worst.
     beam_search_decoder_output: An instance of `BeamSearchDecoderOutput` that
       describes the state of the beam search.
   """
@@ -228,8 +228,11 @@ class BeamSearchDecoder(decoder.Decoder):
     self._start_tokens = array_ops.tile(
         array_ops.expand_dims(self._start_tokens, 1), [1, self._beam_width])
     self._start_inputs = self._embedding_fn(self._start_tokens)
-    self._finished = array_ops.zeros(
-        [self._batch_size, self._beam_width], dtype=dtypes.bool)
+    
+    self._finished = array_ops.one_hot(
+        array_ops.zeros([self._batch_size], dtype=dtypes.int32),
+        depth=self._beam_width, on_value=False,
+        off_value=True, dtype=dtypes.bool)
 
   @property
   def batch_size(self):
@@ -249,7 +252,7 @@ class BeamSearchDecoder(decoder.Decoder):
       output_shape_with_unknown_batch = nest.map_structure(
           lambda s: tensor_shape.TensorShape([None]).concatenate(s),
           size)
-      layer_output_shape = self._output_layer._compute_output_shape(  # pylint: disable=protected-access
+      layer_output_shape = self._output_layer.compute_output_shape(
           output_shape_with_unknown_batch)
       return nest.map_structure(lambda s: s[1:], layer_output_shape)
 
@@ -297,11 +300,15 @@ class BeamSearchDecoder(decoder.Decoder):
     """
     finished, start_inputs = self._finished, self._start_inputs
 
+    log_probs = array_ops.one_hot(  # shape(batch_sz, beam_sz)
+        array_ops.zeros([self._batch_size], dtype=dtypes.int32),
+        depth=self._beam_width, on_value=0.0, off_value=-np.Inf,
+        dtype=nest.flatten(self._initial_cell_state)[0].dtype)
+
+
     initial_state = BeamSearchDecoderState(
         cell_state=self._initial_cell_state,
-        log_probs=array_ops.zeros(
-            [self._batch_size, self._beam_width],
-            dtype=nest.flatten(self._initial_cell_state)[0].dtype),
+        log_probs=log_probs,
         finished=finished,
         lengths=array_ops.zeros(
             [self._batch_size, self._beam_width], dtype=dtypes.int64))
@@ -562,18 +569,11 @@ def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size,
   time = ops.convert_to_tensor(time, name="time")
   # During the first time step we only consider the initial beam
   scores_shape = array_ops.shape(scores)
-  scores_flat = control_flow_ops.cond(
-      time > 0,
-      lambda: array_ops.reshape(scores, [batch_size, -1]),
-      lambda: scores[:, 0])
-  num_available_beam = control_flow_ops.cond(
-      time > 0, lambda: math_ops.reduce_prod(scores_shape[1:]),
-      lambda: math_ops.reduce_prod(scores_shape[2:]))
+  scores_flat = array_ops.reshape(scores, [batch_size, -1])
 
   # Pick the next beams according to the specified successors function
-  next_beam_size = math_ops.minimum(
-      ops.convert_to_tensor(beam_width, dtype=dtypes.int32, name="beam_width"),
-      num_available_beam)
+  next_beam_size = ops.convert_to_tensor(beam_width, dtype=dtypes.int32,
+                                         name="beam_width")
   next_beam_scores, word_indices = nn_ops.top_k(scores_flat, k=next_beam_size)
 
   next_beam_scores.set_shape([static_batch_size, beam_width])
diff --git a/tensorflow/contrib/seq2seq/python/ops/helper.py b/tensorflow/contrib/seq2seq/python/ops/helper.py
index b55d90cbabcc0bb63aaff86ba74c9fa2c6c917cf..ef3722ee41bb0b49e5f81d4d6514e2f40d2ad9f1 100644
--- a/tensorflow/contrib/seq2seq/python/ops/helper.py
+++ b/tensorflow/contrib/seq2seq/python/ops/helper.py
@@ -540,8 +540,7 @@ class GreedyEmbeddingHelper(Helper):
     if not isinstance(outputs, ops.Tensor):
       raise TypeError("Expected outputs to be a single Tensor, got: %s" %
                       type(outputs))
-    sample_ids = math_ops.cast(
-        math_ops.argmax(outputs, axis=-1), dtypes.int32)
+    sample_ids = math_ops.argmax(outputs, axis=-1, output_type=dtypes.int32)
     return sample_ids
 
   def next_inputs(self, time, outputs, state, sample_ids, name=None):
diff --git a/tensorflow/contrib/session_bundle/bundle_shim.cc b/tensorflow/contrib/session_bundle/bundle_shim.cc
index a367ea059c9a2017e94c1541e42d6296665cc466..4fc36d85edf4ac2d48769d209f0b78d6d29d9a62 100644
--- a/tensorflow/contrib/session_bundle/bundle_shim.cc
+++ b/tensorflow/contrib/session_bundle/bundle_shim.cc
@@ -371,9 +371,15 @@ Status LoadSessionBundleOrSavedModelBundle(
     return LoadSavedModelFromLegacySessionBundlePath(
         session_options, run_options, export_dir, saved_model_bundle);
   }
-  return Status(error::Code::NOT_FOUND,
-                "Session bundle or SavedModel bundle not found at specified "
-                "export location");
+  return Status(
+      error::Code::NOT_FOUND,
+      strings::StrCat(
+          "Specified file path does not appear to contain a:\n"
+          "- Session bundle (should have a file called `export.meta`)\n"
+          "- or, SavedModel bundle (should have a file called "
+          "`saved_model.pb`)\n"
+          "Specified file path: ",
+          export_dir));
 }
 
 }  // namespace serving
diff --git a/tensorflow/contrib/signal/python/kernel_tests/mel_ops_test.py b/tensorflow/contrib/signal/python/kernel_tests/mel_ops_test.py
index b861476b67fc360f383465145ccd1cc620de5a99..35c4b5bec172858b39dd4628a37e164efe87bdbf 100644
--- a/tensorflow/contrib/signal/python/kernel_tests/mel_ops_test.py
+++ b/tensorflow/contrib/signal/python/kernel_tests/mel_ops_test.py
@@ -158,6 +158,9 @@ class LinearToMelTest(test.TestCase):
     with self.assertRaises(ValueError):
       mel_ops.linear_to_mel_weight_matrix(lower_edge_hertz=100,
                                           upper_edge_hertz=10)
+    with self.assertRaises(ValueError):
+      mel_ops.linear_to_mel_weight_matrix(upper_edge_hertz=1000,
+                                          sample_rate=800)
     with self.assertRaises(ValueError):
       mel_ops.linear_to_mel_weight_matrix(dtype=dtypes.int32)
 
diff --git a/tensorflow/contrib/signal/python/ops/mel_ops.py b/tensorflow/contrib/signal/python/ops/mel_ops.py
index 2ad07027aa73f1ae7f44fd684a18fc67400a4f90..d1a36548d95cf44d2bf7e6108141aeb00853db04 100644
--- a/tensorflow/contrib/signal/python/ops/mel_ops.py
+++ b/tensorflow/contrib/signal/python/ops/mel_ops.py
@@ -80,6 +80,10 @@ def _validate_arguments(num_mel_bins, num_spectrogram_bins, sample_rate,
   if lower_edge_hertz >= upper_edge_hertz:
     raise ValueError('lower_edge_hertz %.1f >= upper_edge_hertz %.1f' %
                      (lower_edge_hertz, upper_edge_hertz))
+  if upper_edge_hertz > sample_rate / 2:
+    raise ValueError('upper_edge_hertz must not be larger than the Nyquist '
+                     'frequency (sample_rate / 2). Got: %s for sample_rate: %s'
+                     % (upper_edge_hertz, sample_rate))
   if not dtype.is_floating:
     raise ValueError('dtype must be a floating point type. Got: %s' % dtype)
 
@@ -138,8 +142,8 @@ def linear_to_mel_weight_matrix(num_mel_bins=20,
 
   Raises:
     ValueError: If num_mel_bins/num_spectrogram_bins/sample_rate are not
-      positive, lower_edge_hertz is negative, or frequency edges are incorrectly
-      ordered.
+      positive, lower_edge_hertz is negative, frequency edges are incorrectly
+      ordered, or upper_edge_hertz is larger than the Nyquist frequency.
 
   [mel]: https://en.wikipedia.org/wiki/Mel_scale
   """
diff --git a/tensorflow/contrib/signal/python/ops/mfcc_ops.py b/tensorflow/contrib/signal/python/ops/mfcc_ops.py
index 7bc7b57cd4f1033a8bda0845ccd8e777e0213d6b..6cef95f742515709f0f41632358c2d8663daed2c 100644
--- a/tensorflow/contrib/signal/python/ops/mfcc_ops.py
+++ b/tensorflow/contrib/signal/python/ops/mfcc_ops.py
@@ -50,7 +50,7 @@ def mfccs_from_log_mel_spectrograms(log_mel_spectrograms, name=None):
   # A 1024-point STFT with frames of 64 ms and 75% overlap.
   stfts = tf.contrib.signal.stft(pcm, frame_length=1024, frame_step=256,
                                  fft_length=1024)
-  spectrograms = tf.abs(stft)
+  spectrograms = tf.abs(stfts)
 
   # Warp the linear scale spectrograms into the mel-scale.
   num_spectrogram_bins = stfts.shape[-1].value
diff --git a/tensorflow/contrib/slim/README.md b/tensorflow/contrib/slim/README.md
index dc92ae0c859394f44ba83d814adbef7d324a9ada..c7a54cb9a2e9535efbdc179f1463cef379ebb1f9 100644
--- a/tensorflow/contrib/slim/README.md
+++ b/tensorflow/contrib/slim/README.md
@@ -676,7 +676,7 @@ file were implicitly obtained from each provided variable's `var.op.name`.
 
 This works well when the variable names in the checkpoint file match those in
 the graph. However, sometimes, we want to restore a model from a checkpoint
-whose variables have different names those in the current graph. In this case,
+whose variables have different names to those in the current graph. In this case,
 we must provide the `Saver` a dictionary that maps from each checkpoint variable
 name to each graph variable. Consider the following example where the checkpoint
 variables names are obtained via a simple function:
diff --git a/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py b/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py
index 82c6b5a619662ba5cbaba1b3a238045a8d9a2cd2..c42c7b3391db40fd0aad89c45f449487f484f371 100644
--- a/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py
+++ b/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py
@@ -62,7 +62,9 @@ class DatasetDataProvider(data_provider.DataProvider):
                seed=None,
                scope=None):
     """Creates a DatasetDataProvider.
-
+    Note: if `num_epochs` is not `None`,  local counter `epochs` will be created
+    by relevant function. Use `local_variables_initializer()` to initialize
+    local variables.
     Args:
       dataset: An instance of the Dataset class.
       num_readers: The number of parallel readers to use.
@@ -96,12 +98,12 @@ class DatasetDataProvider(data_provider.DataProvider):
     items = dataset.decoder.list_items()
     tensors = dataset.decoder.decode(data, items)
 
-    if record_key in items:
+    items_to_tensors = dict(zip(items, tensors))
+    if record_key in items_to_tensors:
       raise ValueError('The item name used for `record_key` cannot also be '
                        'used for a dataset item: %s', record_key)
-    items.append(record_key)
-    tensors.append(key)
+    items_to_tensors[record_key] = key
 
     super(DatasetDataProvider, self).__init__(
-        items_to_tensors=dict(zip(items, tensors)),
+        items_to_tensors=items_to_tensors,
         num_samples=dataset.num_samples)
diff --git a/tensorflow/contrib/slim/python/slim/learning.py b/tensorflow/contrib/slim/python/slim/learning.py
index def00b76184ba4e1fc630cd83d8e055448100562..54362c87b561595697ee64b9d5e565fdc3f0bbe0 100644
--- a/tensorflow/contrib/slim/python/slim/learning.py
+++ b/tensorflow/contrib/slim/python/slim/learning.py
@@ -753,9 +753,10 @@ def train(train_op,
           if logdir:
             sv.start_standard_services(sess)
         elif startup_delay_steps > 0:
+           # (use sys.maxsize because sys.maxint doesn't exist in Python 3)
           _wait_for_step(sess, global_step,
                          min(startup_delay_steps, number_of_steps or
-                             sys.maxint))
+                             sys.maxsize))
         threads = sv.start_queue_runners(sess)
         logging.info('Starting Queues.')
         if is_chief and sync_optimizer is not None:
diff --git a/tensorflow/contrib/slim/python/slim/nets/inception_v3.py b/tensorflow/contrib/slim/python/slim/nets/inception_v3.py
index e3c0c036d90c95a5f371bef2ca9f960926d82166..afe261e43a9f144992318086f958e21d50286d11 100644
--- a/tensorflow/contrib/slim/python/slim/nets/inception_v3.py
+++ b/tensorflow/contrib/slim/python/slim/nets/inception_v3.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 from tensorflow.contrib import layers
 from tensorflow.contrib.framework.python.ops import arg_scope
+from tensorflow.contrib.layers.python.layers import initializers
 from tensorflow.contrib.layers.python.layers import layers as layers_lib
 from tensorflow.contrib.layers.python.layers import regularizers
 from tensorflow.python.framework import ops
@@ -547,7 +548,10 @@ def inception_v3(inputs,
       parameters or computation cost of the model.
     prediction_fn: a function to get predictions out of logits.
     spatial_squeeze: if True, logits is of shape is [B, C], if false logits is
-        of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
+      of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
+      To use this parameter, the input images must be smaller
+      than 300x300 pixels, in which case the output logit layer
+      does not contain spatial information and can be removed.
     reuse: whether or not the network and its variables should be reused. To be
       able to reuse 'scope' must be given.
     scope: Optional variable_scope.
@@ -675,16 +679,20 @@ def _reduced_kernel_size_for_small_input(input_tensor, kernel_size):
 
 
 def inception_v3_arg_scope(weight_decay=0.00004,
-                           stddev=0.1,
                            batch_norm_var_collection='moving_vars',
+                           batch_norm_decay=0.9997,
+                           batch_norm_epsilon=0.001,
+                           updates_collections=ops.GraphKeys.UPDATE_OPS,
                            use_fused_batchnorm=True):
   """Defines the default InceptionV3 arg scope.
 
   Args:
     weight_decay: The weight decay to use for regularizing the model.
-    stddev: The standard deviation of the trunctated normal weight initializer.
     batch_norm_var_collection: The name of the collection for the batch norm
       variables.
+    batch_norm_decay: Decay for batch norm moving average
+    batch_norm_epsilon: Small float added to variance to avoid division by zero
+    updates_collections: Collections for the update ops of the layer
     use_fused_batchnorm: Enable fused batchnorm.
 
   Returns:
@@ -692,11 +700,11 @@ def inception_v3_arg_scope(weight_decay=0.00004,
   """
   batch_norm_params = {
       # Decay for the moving averages.
-      'decay': 0.9997,
+      'decay': batch_norm_decay,
       # epsilon to prevent 0s in variance.
-      'epsilon': 0.001,
+      'epsilon': batch_norm_epsilon,
       # collection containing update_ops.
-      'updates_collections': ops.GraphKeys.UPDATE_OPS,
+      'updates_collections': updates_collections,
       # Use fused batch norm if possible.
       'fused': use_fused_batchnorm,
       # collection containing the moving mean and moving variance.
@@ -714,8 +722,7 @@ def inception_v3_arg_scope(weight_decay=0.00004,
       weights_regularizer=regularizers.l2_regularizer(weight_decay)):
     with arg_scope(
         [layers.conv2d],
-        weights_initializer=init_ops.truncated_normal_initializer(
-            stddev=stddev),
+        weights_initializer=initializers.variance_scaling_initializer(),
         activation_fn=nn_ops.relu,
         normalizer_fn=layers_lib.batch_norm,
         normalizer_params=batch_norm_params) as sc:
diff --git a/tensorflow/contrib/specs/__init__.py b/tensorflow/contrib/specs/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..52e83069cb0c68b510da46149248369dce376647 100644
--- a/tensorflow/contrib/specs/__init__.py
+++ b/tensorflow/contrib/specs/__init__.py
@@ -0,0 +1,18 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
diff --git a/tensorflow/contrib/summary/BUILD b/tensorflow/contrib/summary/BUILD
index f34291c203d468603abfaebe4a00f1847e71934d..b58c83fdaf574fb349fac57c922f1178b7d13b66 100644
--- a/tensorflow/contrib/summary/BUILD
+++ b/tensorflow/contrib/summary/BUILD
@@ -13,10 +13,7 @@ load(
 tf_gen_op_wrapper_py(
     name = "gen_summary_ops",
     out = "gen_summary_ops.py",
-    visibility = ["//tensorflow:internal"],
-    deps = [
-        "//tensorflow/core:summary_ops_op_lib",
-    ],
+    deps = ["//tensorflow/core:summary_ops_op_lib"],
 )
 
 py_test(
@@ -115,5 +112,6 @@ py_library(
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:lib",
         "//tensorflow/python:platform",
+        "@org_sqlite//:python",
     ],
 )
diff --git a/tensorflow/contrib/summary/summary.py b/tensorflow/contrib/summary/summary.py
index f783179f61495f33c80b897d00aecb46743fddd9..7d3b8b7437a9ff5aaa0834db79bca8883cd679c8 100644
--- a/tensorflow/contrib/summary/summary.py
+++ b/tensorflow/contrib/summary/summary.py
@@ -28,9 +28,11 @@ from __future__ import print_function
 from tensorflow.contrib.summary.summary_ops import all_summary_ops
 from tensorflow.contrib.summary.summary_ops import always_record_summaries
 from tensorflow.contrib.summary.summary_ops import audio
-from tensorflow.contrib.summary.summary_ops import create_summary_db_writer
+from tensorflow.contrib.summary.summary_ops import create_db_writer
+from tensorflow.contrib.summary.summary_ops import create_file_writer
 from tensorflow.contrib.summary.summary_ops import create_summary_file_writer
 from tensorflow.contrib.summary.summary_ops import eval_dir
+from tensorflow.contrib.summary.summary_ops import flush
 from tensorflow.contrib.summary.summary_ops import generic
 from tensorflow.contrib.summary.summary_ops import graph
 from tensorflow.contrib.summary.summary_ops import histogram
diff --git a/tensorflow/contrib/summary/summary_ops.py b/tensorflow/contrib/summary/summary_ops.py
index 8e37987cb71c570d4c3bcea0be2a06e182290815..ee661dfdc11451bb72bc2741b0b54ebf5c1e6543 100644
--- a/tensorflow/contrib/summary/summary_ops.py
+++ b/tensorflow/contrib/summary/summary_ops.py
@@ -38,9 +38,11 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import summary_op_util
+from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import training_util
 from tensorflow.python.util import tf_contextlib
 
+
 # Name for a collection which is expected to have at most a single boolean
 # Tensor. If this tensor is True the summary ops will record summaries.
 _SHOULD_RECORD_SUMMARIES_NAME = "ShouldRecordSummaries"
@@ -69,7 +71,7 @@ def should_record_summaries():
 def record_summaries_every_n_global_steps(n, global_step=None):
   """Sets the should_record_summaries Tensor to true if global_step % n == 0."""
   if global_step is None:
-    global_step = training_util.get_global_step()
+    global_step = training_util.get_or_create_global_step()
   collection_ref = ops.get_collection_ref(_SHOULD_RECORD_SUMMARIES_NAME)
   old = collection_ref[:]
   with ops.device("cpu:0"):
@@ -102,8 +104,8 @@ class SummaryWriter(object):
   """Encapsulates a stateful summary writer resource.
 
   See also:
-  - @{tf.contrib.summary.create_summary_file_writer}
-  - @{tf.contrib.summary.create_summary_db_writer}
+  - @{tf.contrib.summary.create_file_writer}
+  - @{tf.contrib.summary.create_db_writer}
   """
 
   def  __init__(self, resource):
@@ -169,11 +171,11 @@ def initialize(
     session.run(_graph(x, 0), feed_dict={x: data})
 
 
-def create_summary_file_writer(logdir,
-                               max_queue=None,
-                               flush_millis=None,
-                               filename_suffix=None,
-                               name=None):
+def create_file_writer(logdir,
+                       max_queue=None,
+                       flush_millis=None,
+                       filename_suffix=None,
+                       name=None):
   """Creates a summary file writer in the current context.
 
   Args:
@@ -210,11 +212,11 @@ def create_summary_file_writer(logdir,
         filename_suffix=filename_suffix)
 
 
-def create_summary_db_writer(db_uri,
-                             experiment_name=None,
-                             run_name=None,
-                             user_name=None,
-                             name=None):
+def create_db_writer(db_uri,
+                     experiment_name=None,
+                     run_name=None,
+                     user_name=None,
+                     name=None):
   """Creates a summary database writer in the current context.
 
   This can be used to write tensors from the execution graph directly
@@ -498,7 +500,7 @@ _graph = graph  # for functions with a graph parameter
 def import_event(tensor, name=None):
   """Writes a @{tf.Event} binary proto.
 
-  When using create_summary_db_writer(), this can be used alongside
+  When using create_db_writer(), this can be used alongside
   @{tf.TFRecordReader} to load event logs into the database. Please
   note that this is lower level than the other summary functions and
   will ignore any conditions set by methods like
@@ -516,11 +518,39 @@ def import_event(tensor, name=None):
       context.context().summary_writer_resource, tensor, name=name)
 
 
+def flush(writer=None, name=None):
+  """Forces summary writer to send any buffered data to storage.
+
+  This operation blocks until that finishes.
+
+  Args:
+    writer: The @{tf.contrib.summary.SummaryWriter} resource to flush.
+      The thread default will be used if this parameter is None.
+      Otherwise a @{tf.no_op} is returned.
+    name: A name for the operation (optional).
+
+  Returns:
+    The created @{tf.Operation}.
+  """
+  if writer is None:
+    writer = context.context().summary_writer_resource
+    if writer is None:
+      return control_flow_ops.no_op()
+  return gen_summary_ops.flush_summary_writer(writer, name=name)
+
+
 def eval_dir(model_dir, name=None):
   """Construct a logdir for an eval summary writer."""
   return os.path.join(model_dir, "eval" if not name else "eval_" + name)
 
 
+def create_summary_file_writer(*args, **kwargs):
+  """Please use @{tf.contrib.summary.create_file_writer}."""
+  logging.warning("Deprecation Warning: create_summary_file_writer was renamed "
+                  "to create_file_writer")
+  return create_file_writer(*args, **kwargs)
+
+
 def _serialize_graph(arbitrary_graph):
   if isinstance(arbitrary_graph, ops.Graph):
     return arbitrary_graph.as_graph_def(add_shapes=True).SerializeToString()
@@ -530,7 +560,7 @@ def _serialize_graph(arbitrary_graph):
 
 def _choose_step(step):
   if step is None:
-    return training_util.get_global_step()
+    return training_util.get_or_create_global_step()
   if not isinstance(step, ops.Tensor):
     return ops.convert_to_tensor(step, dtypes.int64)
   return step
diff --git a/tensorflow/contrib/summary/summary_ops_graph_test.py b/tensorflow/contrib/summary/summary_ops_graph_test.py
index 703adb7b46c47ee505e24ce2434e293b9c19729f..2b7806f80d020e0064b0f5cf32fd765a9ee993d1 100644
--- a/tensorflow/contrib/summary/summary_ops_graph_test.py
+++ b/tensorflow/contrib/summary/summary_ops_graph_test.py
@@ -29,6 +29,7 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import state_ops
 from tensorflow.python.platform import test
 from tensorflow.python.training import training_util
 
@@ -48,16 +49,47 @@ class DbTest(summary_test_util.SummaryDbTest):
     name = 'hi'
     graph = graph_pb2.GraphDef(node=(node_def_pb2.NodeDef(name=name),))
     with self.test_session():
-      with self.create_summary_db_writer().as_default():
+      with self.create_db_writer().as_default():
         summary_ops.initialize(graph=graph)
     six.assertCountEqual(self, [name],
                          get_all(self.db, 'SELECT node_name FROM Nodes'))
 
+  def testScalarSummary(self):
+    """Test record_summaries_every_n_global_steps and all_summaries()."""
+    with ops.Graph().as_default(), self.test_session() as sess:
+      global_step = training_util.get_or_create_global_step()
+      global_step.initializer.run()
+      with ops.device('/cpu:0'):
+        step_increment = state_ops.assign_add(global_step, 1)
+      sess.run(step_increment)  # Increment global step from 0 to 1
+
+      logdir = tempfile.mkdtemp()
+      with summary_ops.create_file_writer(logdir, max_queue=0,
+                                          name='t2').as_default():
+        with summary_ops.record_summaries_every_n_global_steps(2):
+          summary_ops.initialize()
+          summary_op = summary_ops.scalar('my_scalar', 2.0)
+
+          # Neither of these should produce a summary because
+          # global_step is 1 and "1 % 2 != 0"
+          sess.run(summary_ops.all_summary_ops())
+          sess.run(summary_op)
+          events = summary_test_util.events_from_logdir(logdir)
+          self.assertEqual(len(events), 1)
+
+          # Increment global step from 1 to 2 and check that the summary
+          # is now written
+          sess.run(step_increment)
+          sess.run(summary_ops.all_summary_ops())
+          events = summary_test_util.events_from_logdir(logdir)
+          self.assertEqual(len(events), 2)
+          self.assertEqual(events[1].summary.value[0].tag, 'my_scalar')
+
   def testSummaryGraphModeCond(self):
     with ops.Graph().as_default(), self.test_session():
       training_util.get_or_create_global_step()
       logdir = tempfile.mkdtemp()
-      with summary_ops.create_summary_file_writer(
+      with summary_ops.create_file_writer(
           logdir, max_queue=0,
           name='t2').as_default(), summary_ops.always_record_summaries():
         summary_ops.initialize()
@@ -78,7 +110,7 @@ class DbTest(summary_test_util.SummaryDbTest):
     with ops.Graph().as_default(), self.test_session():
       training_util.get_or_create_global_step()
       logdir = tempfile.mkdtemp()
-      with summary_ops.create_summary_file_writer(
+      with summary_ops.create_file_writer(
           logdir, max_queue=0,
           name='t2').as_default(), summary_ops.always_record_summaries():
         summary_ops.initialize()
diff --git a/tensorflow/contrib/summary/summary_ops_test.py b/tensorflow/contrib/summary/summary_ops_test.py
index d20300c858f2b925bbc813e07b798b2d45bdab8f..dfaa4182bb867cc03480320eaf1804da36206655 100644
--- a/tensorflow/contrib/summary/summary_ops_test.py
+++ b/tensorflow/contrib/summary/summary_ops_test.py
@@ -18,12 +18,14 @@ from __future__ import print_function
 
 import tempfile
 
+import numpy as np
 import six
 
 from tensorflow.contrib.summary import summary_ops
 from tensorflow.contrib.summary import summary_test_util
 from tensorflow.core.framework import graph_pb2
 from tensorflow.core.framework import node_def_pb2
+from tensorflow.core.framework import types_pb2
 from tensorflow.python.eager import function
 from tensorflow.python.eager import test
 from tensorflow.python.framework import dtypes
@@ -37,6 +39,23 @@ from tensorflow.python.training import training_util
 get_all = summary_test_util.get_all
 get_one = summary_test_util.get_one
 
+_NUMPY_NUMERIC_TYPES = {
+    types_pb2.DT_HALF: np.float16,
+    types_pb2.DT_FLOAT: np.float32,
+    types_pb2.DT_DOUBLE: np.float64,
+    types_pb2.DT_INT8: np.int8,
+    types_pb2.DT_INT16: np.int16,
+    types_pb2.DT_INT32: np.int32,
+    types_pb2.DT_INT64: np.int64,
+    types_pb2.DT_UINT8: np.uint8,
+    types_pb2.DT_UINT16: np.uint16,
+    types_pb2.DT_UINT32: np.uint32,
+    types_pb2.DT_UINT64: np.uint64,
+    types_pb2.DT_COMPLEX64: np.complex64,
+    types_pb2.DT_COMPLEX128: np.complex128,
+    types_pb2.DT_BOOL: np.bool_,
+}
+
 
 class TargetTest(test_util.TensorFlowTestCase):
 
@@ -44,7 +63,7 @@ class TargetTest(test_util.TensorFlowTestCase):
     logdir = '/tmp/apath/that/doesnt/exist'
     self.assertFalse(gfile.Exists(logdir))
     with self.assertRaises(errors.NotFoundError):
-      summary_ops.create_summary_file_writer(logdir, max_queue=0, name='t0')
+      summary_ops.create_file_writer(logdir, max_queue=0, name='t0')
 
   def testShouldRecordSummary(self):
     self.assertFalse(summary_ops.should_record_summaries())
@@ -54,7 +73,7 @@ class TargetTest(test_util.TensorFlowTestCase):
   def testSummaryOps(self):
     training_util.get_or_create_global_step()
     logdir = tempfile.mkdtemp()
-    with summary_ops.create_summary_file_writer(
+    with summary_ops.create_file_writer(
         logdir, max_queue=0,
         name='t0').as_default(), summary_ops.always_record_summaries():
       summary_ops.generic('tensor', 1, '')
@@ -69,7 +88,7 @@ class TargetTest(test_util.TensorFlowTestCase):
   def testDefunSummarys(self):
     training_util.get_or_create_global_step()
     logdir = tempfile.mkdtemp()
-    with summary_ops.create_summary_file_writer(
+    with summary_ops.create_file_writer(
         logdir, max_queue=0,
         name='t1').as_default(), summary_ops.always_record_summaries():
 
@@ -85,7 +104,7 @@ class TargetTest(test_util.TensorFlowTestCase):
   def testSummaryName(self):
     training_util.get_or_create_global_step()
     logdir = tempfile.mkdtemp()
-    with summary_ops.create_summary_file_writer(
+    with summary_ops.create_file_writer(
         logdir, max_queue=0,
         name='t2').as_default(), summary_ops.always_record_summaries():
 
@@ -98,7 +117,7 @@ class TargetTest(test_util.TensorFlowTestCase):
   def testSummaryGlobalStep(self):
     step = training_util.get_or_create_global_step()
     logdir = tempfile.mkdtemp()
-    with summary_ops.create_summary_file_writer(
+    with summary_ops.create_file_writer(
         logdir, max_queue=0,
         name='t2').as_default(), summary_ops.always_record_summaries():
 
@@ -108,11 +127,39 @@ class TargetTest(test_util.TensorFlowTestCase):
       self.assertEqual(len(events), 2)
       self.assertEqual(events[1].summary.value[0].tag, 'scalar')
 
+  def testMaxQueue(self):
+    logs = tempfile.mkdtemp()
+    with summary_ops.create_file_writer(
+        logs, max_queue=2, flush_millis=999999,
+        name='lol').as_default(), summary_ops.always_record_summaries():
+      get_total = lambda: len(summary_test_util.events_from_logdir(logs))
+      # Note: First tf.Event is always file_version.
+      self.assertEqual(1, get_total())
+      summary_ops.scalar('scalar', 2.0, step=1)
+      self.assertEqual(1, get_total())
+      summary_ops.scalar('scalar', 2.0, step=2)
+      self.assertEqual(3, get_total())
+
+  def testFlush(self):
+    logs = tempfile.mkdtemp()
+    with summary_ops.create_file_writer(
+        logs, max_queue=999999, flush_millis=999999,
+        name='lol').as_default(), summary_ops.always_record_summaries():
+      get_total = lambda: len(summary_test_util.events_from_logdir(logs))
+      # Note: First tf.Event is always file_version.
+      self.assertEqual(1, get_total())
+      summary_ops.scalar('scalar', 2.0, step=1)
+      summary_ops.scalar('scalar', 2.0, step=2)
+      self.assertEqual(1, get_total())
+      summary_ops.flush()
+      self.assertEqual(3, get_total())
+
 
 class DbTest(summary_test_util.SummaryDbTest):
 
   def testIntegerSummaries(self):
     step = training_util.create_global_step()
+    writer = self.create_db_writer()
 
     def adder(x, y):
       state_ops.assign_add(step, 1)
@@ -123,11 +170,12 @@ class DbTest(summary_test_util.SummaryDbTest):
       return sum_
 
     with summary_ops.always_record_summaries():
-      with self.create_summary_db_writer().as_default():
+      with writer.as_default():
         self.assertEqual(5, adder(int64(2), int64(3)).numpy())
 
-    six.assertCountEqual(self, [1, 1, 1],
-                         get_all(self.db, 'SELECT step FROM Tensors'))
+    six.assertCountEqual(
+        self, [1, 1, 1],
+        get_all(self.db, 'SELECT step FROM Tensors WHERE dtype IS NOT NULL'))
     six.assertCountEqual(self, ['x', 'y', 'sum'],
                          get_all(self.db, 'SELECT tag_name FROM Tags'))
     x_id = get_one(self.db, 'SELECT tag_id FROM Tags WHERE tag_name = "x"')
@@ -135,11 +183,12 @@ class DbTest(summary_test_util.SummaryDbTest):
     sum_id = get_one(self.db, 'SELECT tag_id FROM Tags WHERE tag_name = "sum"')
 
     with summary_ops.always_record_summaries():
-      with self.create_summary_db_writer().as_default():
+      with writer.as_default():
         self.assertEqual(9, adder(int64(4), int64(5)).numpy())
 
-    six.assertCountEqual(self, [1, 1, 1, 2, 2, 2],
-                         get_all(self.db, 'SELECT step FROM Tensors'))
+    six.assertCountEqual(
+        self, [1, 1, 1, 2, 2, 2],
+        get_all(self.db, 'SELECT step FROM Tensors WHERE dtype IS NOT NULL'))
     six.assertCountEqual(self, [x_id, y_id, sum_id],
                          get_all(self.db, 'SELECT tag_id FROM Tags'))
     self.assertEqual(2, get_tensor(self.db, x_id, 1))
@@ -158,35 +207,41 @@ class DbTest(summary_test_util.SummaryDbTest):
 
   def testBadExperimentName(self):
     with self.assertRaises(ValueError):
-      self.create_summary_db_writer(experiment_name='\0')
+      self.create_db_writer(experiment_name='\0')
 
   def testBadRunName(self):
     with self.assertRaises(ValueError):
-      self.create_summary_db_writer(run_name='\0')
+      self.create_db_writer(run_name='\0')
 
   def testBadUserName(self):
     with self.assertRaises(ValueError):
-      self.create_summary_db_writer(user_name='-hi')
+      self.create_db_writer(user_name='-hi')
     with self.assertRaises(ValueError):
-      self.create_summary_db_writer(user_name='hi-')
+      self.create_db_writer(user_name='hi-')
     with self.assertRaises(ValueError):
-      self.create_summary_db_writer(user_name='@')
+      self.create_db_writer(user_name='@')
 
   def testGraphSummary(self):
     training_util.get_or_create_global_step()
     name = 'hi'
     graph = graph_pb2.GraphDef(node=(node_def_pb2.NodeDef(name=name),))
     with summary_ops.always_record_summaries():
-      with self.create_summary_db_writer().as_default():
+      with self.create_db_writer().as_default():
         summary_ops.graph(graph)
     six.assertCountEqual(self, [name],
                          get_all(self.db, 'SELECT node_name FROM Nodes'))
 
 
 def get_tensor(db, tag_id, step):
-  return get_one(
-      db, 'SELECT tensor FROM Tensors WHERE tag_id = ? AND step = ?', tag_id,
-      step)
+  cursor = db.execute(
+      'SELECT dtype, shape, data FROM Tensors WHERE series = ? AND step = ?',
+      (tag_id, step))
+  dtype, shape, data = cursor.fetchone()
+  assert dtype in _NUMPY_NUMERIC_TYPES
+  buf = np.frombuffer(data, dtype=_NUMPY_NUMERIC_TYPES[dtype])
+  if not shape:
+    return buf[0]
+  return buf.reshape([int(i) for i in shape.split(',')])
 
 
 def int64(x):
diff --git a/tensorflow/contrib/summary/summary_test_util.py b/tensorflow/contrib/summary/summary_test_util.py
index 94767c8df25023cfe6dd050df6d34153834df70a..bda57e6a0ca8e1ddb979a80de276911c7738f0aa 100644
--- a/tensorflow/contrib/summary/summary_test_util.py
+++ b/tensorflow/contrib/summary/summary_test_util.py
@@ -39,8 +39,8 @@ class SummaryDbTest(test_util.TensorFlowTestCase):
     if os.path.exists(self.db_path):
       os.unlink(self.db_path)
     self.db = sqlite3.connect(self.db_path)
-    self.create_summary_db_writer = functools.partial(
-        summary_ops.create_summary_db_writer,
+    self.create_db_writer = functools.partial(
+        summary_ops.create_db_writer,
         db_uri=self.db_path,
         experiment_name='experiment',
         run_name='run',
@@ -83,7 +83,7 @@ def events_from_logdir(logdir):
   """
   assert gfile.Exists(logdir)
   files = gfile.ListDirectory(logdir)
-  assert len(files) == 1, "Found not exactly one file in logdir: %s" % files
+  assert len(files) == 1, 'Found not exactly one file in logdir: %s' % files
   return events_from_file(os.path.join(logdir, files[0]))
 
 
diff --git a/tensorflow/contrib/tensor_forest/BUILD b/tensorflow/contrib/tensor_forest/BUILD
index f54daa71255f2a49edf30f73e16dfc211dc92e39..58a7fa095d8356229fdb5879bea99d316113c828 100644
--- a/tensorflow/contrib/tensor_forest/BUILD
+++ b/tensorflow/contrib/tensor_forest/BUILD
@@ -530,7 +530,6 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":client_lib",
-        "//tensorflow/contrib/framework:framework_py",
         "//tensorflow/contrib/layers:layers_py",
         "//tensorflow/contrib/learn",
         "//tensorflow/python:array_ops",
@@ -555,6 +554,7 @@ py_test(
     tags = [
         "no_windows",
         "nomac",  # b/63258195
+        "notsan",
     ],
     deps = [
         ":random_forest",
diff --git a/tensorflow/contrib/tensor_forest/client/random_forest.py b/tensorflow/contrib/tensor_forest/client/random_forest.py
index 807c8398439b9d225c974d2a1cdc9b845df4d26e..a998ac1e111090a3702c0499a54ef1a5c1b3ac90 100644
--- a/tensorflow/contrib/tensor_forest/client/random_forest.py
+++ b/tensorflow/contrib/tensor_forest/client/random_forest.py
@@ -17,7 +17,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib import framework as contrib_framework
 from tensorflow.contrib import layers
 
 from tensorflow.contrib.learn.python.learn.estimators import estimator
@@ -190,7 +189,7 @@ def get_model_fn(params,
                 features, labels, input_weights=weights,
                 num_trainers=num_trainers,
                 trainer_id=trainer_id),
-            state_ops.assign_add(contrib_framework.get_global_step(), 1))
+            state_ops.assign_add(training_util.get_global_step(), 1))
 
     # Put weights back in
     if weights is not None:
@@ -238,8 +237,7 @@ def get_model_fn(params,
     if params.inference_tree_paths:
       model_ops.predictions[TREE_PATHS_PREDICTION_KEY] = tree_paths
 
-    if params.regression:
-      model_ops.predictions[VARIANCE_PREDICTION_KEY] = regression_variance
+    model_ops.predictions[VARIANCE_PREDICTION_KEY] = regression_variance
 
     return model_ops
 
diff --git a/tensorflow/contrib/tensor_forest/hybrid/core/ops/unpack_path_op.cc b/tensorflow/contrib/tensor_forest/hybrid/core/ops/unpack_path_op.cc
index 9d5e1400a58cce75c03dfe3e0b5c973c11b89199..cacad03e274c3279eb3706e71e1bcdf8433ca1ef 100644
--- a/tensorflow/contrib/tensor_forest/hybrid/core/ops/unpack_path_op.cc
+++ b/tensorflow/contrib/tensor_forest/hybrid/core/ops/unpack_path_op.cc
@@ -13,16 +13,6 @@
 // limitations under the License.
 // =============================================================================
 
-#include <stdlib.h>
-#include <time.h>
-#include <algorithm>
-#include <cmath>
-#include <memory>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
 #include "tensorflow/contrib/tensor_forest/hybrid/core/ops/utils.h"
 #include "tensorflow/contrib/tensor_forest/kernels/tree_utils.h"
 #include "tensorflow/core/framework/op.h"
@@ -30,6 +20,7 @@
 #include "tensorflow/core/framework/shape_inference.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/gtl/top_n.h"
+#include "tensorflow/core/lib/math/math_util.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/work_sharder.h"
 
@@ -85,7 +76,7 @@ class UnpackPath : public OpKernel {
     const int32 tree_depth = static_cast<int32>(
         path_tensor.shape().dim_size(1));
 
-    const int32 num_nodes = pow(2, tree_depth) - 1;
+    const int32 num_nodes = MathUtil::IPow(2, tree_depth) - 1;
 
     VLOG(1) << "num_data: " << num_data;
     VLOG(1) << "tree_depth: " << tree_depth;
@@ -118,4 +109,5 @@ class UnpackPath : public OpKernel {
 
 REGISTER_KERNEL_BUILDER(Name("UnpackPath").Device(DEVICE_CPU),
                         UnpackPath);
+
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/tensor_forest/kernels/v4/BUILD b/tensorflow/contrib/tensor_forest/kernels/v4/BUILD
index b7876e1df6c77d1fa3fa31abb37fc16d06540f15..794b76d8583c3608d540d34a5aaf1d1a799f35e3 100644
--- a/tensorflow/contrib/tensor_forest/kernels/v4/BUILD
+++ b/tensorflow/contrib/tensor_forest/kernels/v4/BUILD
@@ -302,6 +302,7 @@ cc_library(
             "//tensorflow/contrib/tensor_forest/proto:fertile_stats_proto_cc",
         ],
         [
+            "//third_party/eigen3",
             "//tensorflow/contrib/decision_trees/proto:generic_tree_model_cc_headers_only",
             "//tensorflow/contrib/tensor_forest/proto:fertile_stats_proto_cc_headers_only",
         ],
@@ -322,6 +323,7 @@ cc_library(
     srcs = ["params.cc"],
     hdrs = ["params.h"],
     deps = [
+        "//third_party/eigen3",
         "//tensorflow/core:framework_headers_lib",
     ] + if_static(
         [
diff --git a/tensorflow/contrib/tensor_forest/python/tensor_forest.py b/tensorflow/contrib/tensor_forest/python/tensor_forest.py
index eb938763f12efd9281bec4321384acd4617cdfcf..3650b5d52fe8a1b87a239d41ecfa3de677fffc72 100644
--- a/tensorflow/contrib/tensor_forest/python/tensor_forest.py
+++ b/tensorflow/contrib/tensor_forest/python/tensor_forest.py
@@ -478,8 +478,7 @@ class RandomForestGraphs(object):
       **inference_args: Keyword arguments to pass through to each tree.
 
     Returns:
-      A tuple of (probabilities, tree_paths, variance), where variance
-      is the variance over all the trees for regression problems only.
+      A tuple of (probabilities, tree_paths, variance).
 
     Raises:
       NotImplementedError: If trying to use feature bagging with sparse
@@ -513,13 +512,12 @@ class RandomForestGraphs(object):
           self.params.num_trees,
           name='probabilities')
       tree_paths = array_ops.stack(paths, axis=1)
-      regression_variance = None
-      if self.params.regression:
-        expected_squares = math_ops.div(
-            math_ops.reduce_sum(all_predict * all_predict, 1),
-            self.params.num_trees)
-        regression_variance = math_ops.maximum(
-            0., expected_squares - average_values * average_values)
+
+      expected_squares = math_ops.div(
+          math_ops.reduce_sum(all_predict * all_predict, 1),
+          self.params.num_trees)
+      regression_variance = math_ops.maximum(
+          0., expected_squares - average_values * average_values)
       return average_values, tree_paths, regression_variance
 
   def average_size(self):
diff --git a/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py b/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py
index 113dfb85d3bf671e0a9448e0cb0fbfd7f3ea04e7..bbe627b15773fafe83a0700da696f429876c0968 100644
--- a/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py
+++ b/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py
@@ -108,7 +108,7 @@ class TensorForestTest(test_util.TensorFlowTestCase):
     probs, paths, var = graph_builder.inference_graph(input_data)
     self.assertTrue(isinstance(probs, ops.Tensor))
     self.assertTrue(isinstance(paths, ops.Tensor))
-    self.assertIsNone(var)
+    self.assertTrue(isinstance(var, ops.Tensor))
 
   def testTrainingConstructionClassificationSparse(self):
     input_data = sparse_tensor.SparseTensor(
diff --git a/tensorflow/contrib/tensorboard/db/BUILD b/tensorflow/contrib/tensorboard/db/BUILD
index 9d3d60c24d72e28cf449cd196e34e53d5450d85f..6ff5a9e2b18ead9ea9f77f796b91b05d9b895489 100644
--- a/tensorflow/contrib/tensorboard/db/BUILD
+++ b/tensorflow/contrib/tensorboard/db/BUILD
@@ -5,12 +5,18 @@ package(default_visibility = ["//tensorflow:internal"])
 
 licenses(["notice"])  # Apache 2.0
 
-load("//tensorflow:tensorflow.bzl", "tf_cc_test")
+load(
+    "//tensorflow:tensorflow.bzl",
+    "tf_cc_binary",
+    "tf_cc_test",
+    "tf_copts",
+)
 
 cc_library(
     name = "schema",
     srcs = ["schema.cc"],
     hdrs = ["schema.h"],
+    copts = tf_copts(),
     deps = [
         "//tensorflow/core:lib",
         "//tensorflow/core/lib/db:sqlite",
@@ -19,6 +25,7 @@ cc_library(
 
 tf_cc_test(
     name = "schema_test",
+    size = "small",
     srcs = ["schema_test.cc"],
     deps = [
         ":schema",
@@ -31,8 +38,11 @@ cc_library(
     name = "summary_db_writer",
     srcs = ["summary_db_writer.cc"],
     hdrs = ["summary_db_writer.h"],
+    copts = tf_copts(),
     deps = [
         ":schema",
+        ":summary_converter",
+        "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
@@ -46,6 +56,7 @@ tf_cc_test(
     size = "small",
     srcs = ["summary_db_writer_test.cc"],
     deps = [
+        ":schema",
         ":summary_db_writer",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
@@ -55,6 +66,77 @@ tf_cc_test(
     ],
 )
 
+cc_library(
+    name = "summary_file_writer",
+    srcs = ["summary_file_writer.cc"],
+    hdrs = ["summary_file_writer.h"],
+    copts = tf_copts(),
+    deps = [
+        ":summary_converter",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:ptr_util",
+        "//tensorflow/core/kernels:summary_interface",
+    ],
+)
+
+tf_cc_test(
+    name = "summary_file_writer_test",
+    size = "medium",  # file i/o
+    timeout = "short",
+    srcs = ["summary_file_writer_test.cc"],
+    deps = [
+        ":summary_file_writer",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
+
+cc_library(
+    name = "summary_converter",
+    srcs = ["summary_converter.cc"],
+    hdrs = ["summary_converter.h"],
+    copts = tf_copts(),
+    visibility = ["//visibility:private"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:protos_all_cc",
+    ],
+)
+
+tf_cc_binary(
+    name = "loader",
+    srcs = ["loader.cc"],
+    linkstatic = 1,
+    deps = [
+        ":schema",
+        ":summary_db_writer",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core/lib/db:sqlite",
+    ],
+)
+
+tf_cc_binary(
+    name = "vacuum",
+    srcs = ["vacuum.cc"],
+    linkstatic = 1,
+    deps = [
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core:lib",
+        "//tensorflow/core/lib/db:sqlite",
+    ],
+)
+
 filegroup(
     name = "all_files",
     srcs = glob(["*"]),
diff --git a/tensorflow/contrib/tensorboard/db/loader.cc b/tensorflow/contrib/tensorboard/db/loader.cc
new file mode 100644
index 0000000000000000000000000000000000000000..4d7337a53d025f29ae5f85151b7f60d2cca6f771
--- /dev/null
+++ b/tensorflow/contrib/tensorboard/db/loader.cc
@@ -0,0 +1,124 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <iostream>
+#include <vector>
+
+#include "tensorflow/contrib/tensorboard/db/schema.h"
+#include "tensorflow/contrib/tensorboard/db/summary_db_writer.h"
+#include "tensorflow/core/lib/db/sqlite.h"
+#include "tensorflow/core/lib/io/record_reader.h"
+#include "tensorflow/core/platform/init_main.h"
+#include "tensorflow/core/util/command_line_flags.h"
+#include "tensorflow/core/util/event.pb.h"
+
+namespace tensorflow {
+namespace {
+
+template <typename T>
+string AddCommas(T n) {
+  static_assert(std::is_integral<T>::value, "is_integral");
+  string s = strings::StrCat(n);
+  if (s.size() > 3) {
+    int extra = s.size() / 3 - (s.size() % 3 == 0 ? 1 : 0);
+    s.append(extra, 'X');
+    int c = 0;
+    for (int i = s.size() - 1; i > 0; --i) {
+      s[i] = s[i - extra];
+      if (++c % 3 == 0) {
+        s[--i] = ',';
+        --extra;
+      }
+    }
+  }
+  return s;
+}
+
+int main(int argc, char* argv[]) {
+  string path;
+  string events;
+  string experiment_name;
+  string run_name;
+  string user_name;
+  std::vector<Flag> flag_list = {
+      Flag("db", &path, "Path of SQLite DB file"),
+      Flag("events", &events, "TensorFlow record proto event log file"),
+      Flag("experiment_name", &experiment_name, "The DB experiment_name value"),
+      Flag("run_name", &run_name, "The DB run_name value"),
+      Flag("user_name", &user_name, "The DB user_name value"),
+  };
+  string usage = Flags::Usage(argv[0], flag_list);
+  bool parse_result = Flags::Parse(&argc, argv, flag_list);
+  if (!parse_result || path.empty()) {
+    std::cerr << "The loader tool imports tf.Event record files, created by\n"
+              << "SummaryFileWriter, into the sorts of SQLite database files\n"
+              << "created by SummaryDbWriter.\n\n"
+              << "In addition to the flags below, the environment variables\n"
+              << "defined by core/lib/db/sqlite.cc can also be set.\n\n"
+              << usage;
+    return -1;
+  }
+  port::InitMain(argv[0], &argc, &argv);
+  Env* env = Env::Default();
+
+  LOG(INFO) << "Opening SQLite file: " << path;
+  Sqlite* db;
+  TF_CHECK_OK(Sqlite::Open(
+      path, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_NOMUTEX,
+      &db));
+  core::ScopedUnref unref_db(db);
+
+  LOG(INFO) << "Initializing TensorBoard schema";
+  TF_CHECK_OK(SetupTensorboardSqliteDb(db));
+
+  LOG(INFO) << "Creating SummaryDbWriter";
+  SummaryWriterInterface* db_writer;
+  TF_CHECK_OK(CreateSummaryDbWriter(db, experiment_name, run_name, user_name,
+                                    env, &db_writer));
+  core::ScopedUnref unref(db_writer);
+
+  LOG(INFO) << "Loading TF event log: " << events;
+  std::unique_ptr<RandomAccessFile> file;
+  TF_CHECK_OK(env->NewRandomAccessFile(events, &file));
+  io::RecordReader reader(file.get());
+
+  uint64 start = env->NowMicros();
+  uint64 records = 0;
+  uint64 offset = 0;
+  string record;
+  while (true) {
+    std::unique_ptr<Event> event = std::unique_ptr<Event>(new Event);
+    Status s = reader.ReadRecord(&offset, &record);
+    if (s.code() == error::OUT_OF_RANGE) break;
+    TF_CHECK_OK(s);
+    if (!ParseProtoUnlimited(event.get(), record)) {
+      LOG(FATAL) << "Corrupt tf.Event record"
+                 << " offset=" << (offset - record.size())
+                 << " size=" << static_cast<int>(record.size());
+    }
+    TF_CHECK_OK(db_writer->WriteEvent(std::move(event)));
+    ++records;
+  }
+  uint64 elapsed = env->NowMicros() - start;
+  LOG(INFO) << "Loaded " << AddCommas(offset) << " bytes with "
+            << AddCommas(records) << " records at "
+            << AddCommas(offset / (elapsed / 1000000)) << " bps";
+
+  return 0;
+}
+
+}  // namespace
+}  // namespace tensorflow
+
+int main(int argc, char* argv[]) { return tensorflow::main(argc, argv); }
diff --git a/tensorflow/contrib/tensorboard/db/schema.cc b/tensorflow/contrib/tensorboard/db/schema.cc
index d63b2c6cc23248c2dc5bdd4433047d3fa58c1d14..3c7bc87e4a2dbeadef2b9589d58c845204049123 100644
--- a/tensorflow/contrib/tensorboard/db/schema.cc
+++ b/tensorflow/contrib/tensorboard/db/schema.cc
@@ -14,437 +14,430 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/contrib/tensorboard/db/schema.h"
 
+#include "tensorflow/core/lib/core/errors.h"
+
 namespace tensorflow {
 namespace {
 
-class SqliteSchema {
- public:
-  explicit SqliteSchema(std::shared_ptr<Sqlite> db) : db_(std::move(db)) {}
-
-  /// \brief Creates Tensors table.
-  ///
-  /// Fields:
-  ///   rowid: Ephemeral b-tree ID dictating locality.
-  ///   tag_id: ID of associated Tag.
-  ///   computed_time: Float UNIX timestamp with microsecond precision.
-  ///     In the old summaries system that uses FileWriter, this is the
-  ///     wall time around when tf.Session.run finished. In the new
-  ///     summaries system, it is the wall time of when the tensor was
-  ///     computed. On systems with monotonic clocks, it is calculated
-  ///     by adding the monotonic run duration to Run.started_time.
-  ///     This field is not indexed because, in practice, it should be
-  ///     ordered the same or nearly the same as TensorIndex, so local
-  ///     insertion sort might be more suitable.
-  ///   step: User-supplied number, ordering this tensor in Tag.
-  ///     If NULL then the Tag must have only one Tensor.
-  ///   tensor: Can be an INTEGER (DT_INT64), FLOAT (DT_DOUBLE), or
-  ///     BLOB. The structure of a BLOB is currently undefined, but in
-  ///     essence it is a Snappy tf.TensorProto that spills over into
-  ///     TensorChunks.
-  Status CreateTensorsTable() {
-    return Run(R"sql(
-      CREATE TABLE IF NOT EXISTS Tensors (
-        rowid INTEGER PRIMARY KEY,
-        tag_id INTEGER NOT NULL,
-        computed_time REAL,
-        step INTEGER,
-        tensor BLOB
-      )
-    )sql");
-  }
-
-  /// \brief Creates TensorChunks table.
-  ///
-  /// This table can be used to split up a tensor across many rows,
-  /// which has the advantage of not slowing down table scans on the
-  /// main table, allowing asynchronous fetching, minimizing copying,
-  /// and preventing large buffers from being allocated.
-  ///
-  /// Fields:
-  ///   rowid: Ephemeral b-tree ID dictating locality.
-  ///   tag_id: ID of associated Tag.
-  ///   step: Same as corresponding Tensors.step.
-  ///   sequence: 1-indexed sequence number for ordering chunks. Please
-  ///     note that the 0th index is Tensors.tensor.
-  ///   chunk: Bytes of next chunk in tensor.
-  Status CreateTensorChunksTable() {
-    return Run(R"sql(
-      CREATE TABLE IF NOT EXISTS TensorChunks (
-        rowid INTEGER PRIMARY KEY,
-        tag_id INTEGER NOT NULL,
-        step INTEGER,
-        sequence INTEGER,
-        chunk BLOB
-      )
-    )sql");
-  }
-
-  /// \brief Creates Tags table.
-  ///
-  /// Fields:
-  ///   rowid: Ephemeral b-tree ID dictating locality.
-  ///   tag_id: Permanent >0 unique ID.
-  ///   run_id: Optional ID of associated Run.
-  ///   tag_name: The tag field in summary.proto, unique across Run.
-  ///   inserted_time: Float UNIX timestamp with µs precision. This is
-  ///     always the wall time of when the row was inserted into the
-  ///     DB. It may be used as a hint for an archival job.
-  ///   metadata: Optional BLOB of SummaryMetadata proto.
-  ///   display_name: Optional for GUI and defaults to tag_name.
-  ///   summary_description: Optional markdown information.
-  Status CreateTagsTable() {
-    return Run(R"sql(
-      CREATE TABLE IF NOT EXISTS Tags (
-        rowid INTEGER PRIMARY KEY,
-        run_id INTEGER,
-        tag_id INTEGER NOT NULL,
-        tag_name TEXT,
-        inserted_time DOUBLE,
-        metadata BLOB,
-        display_name TEXT,
-        description TEXT
-      )
-    )sql");
-  }
-
-  /// \brief Creates Runs table.
-  ///
-  /// This table stores information about runs. Each row usually
-  /// represents a single attempt at training or testing a TensorFlow
-  /// model, with a given set of hyper-parameters, whose summaries are
-  /// written out to a single event logs directory with a monotonic step
-  /// counter.
-  ///
-  /// When a run is deleted from this table, TensorBoard should treat all
-  /// information associated with it as deleted, even if those rows in
-  /// different tables still exist.
-  ///
-  /// Fields:
-  ///   rowid: Ephemeral b-tree ID dictating locality.
-  ///   run_id: Permanent >0 unique ID.
-  ///   experiment_id: Optional ID of associated Experiment.
-  ///   run_name: User-supplied string, unique across Experiment.
-  ///   inserted_time: Float UNIX timestamp with µs precision. This is
-  ///     always the time the row was inserted into the database. It
-  ///     does not change.
-  ///   started_time: Float UNIX timestamp with µs precision. In the
-  ///     old summaries system that uses FileWriter, this is
-  ///     approximated as the first tf.Event.wall_time. In the new
-  ///     summaries system, it is the wall time of when summary writing
-  ///     started, from the perspective of whichever machine talks to
-  ///     the database. This field will be mutated if the run is
-  ///     restarted.
-  ///   description: Optional markdown information.
-  ///   graph_id: ID of associated Graphs row.
-  Status CreateRunsTable() {
-    return Run(R"sql(
-      CREATE TABLE IF NOT EXISTS Runs (
-        rowid INTEGER PRIMARY KEY,
-        experiment_id INTEGER,
-        run_id INTEGER NOT NULL,
-        run_name TEXT,
-        inserted_time REAL,
-        started_time REAL,
-        description TEXT,
-        graph_id INTEGER
-      )
-    )sql");
-  }
-
-  /// \brief Creates Experiments table.
-  ///
-  /// This table stores information about experiments, which are sets of
-  /// runs.
-  ///
-  /// Fields:
-  ///   rowid: Ephemeral b-tree ID dictating locality.
-  ///   user_id: Optional ID of associated User.
-  ///   experiment_id: Permanent >0 unique ID.
-  ///   experiment_name: User-supplied string, unique across User.
-  ///   inserted_time: Float UNIX timestamp with µs precision. This is
-  ///     always the time the row was inserted into the database. It
-  ///     does not change.
-  ///   started_time: Float UNIX timestamp with µs precision. This is
-  ///     the MIN(experiment.started_time, run.started_time) of each
-  ///     Run added to the database.
-  ///   description: Optional markdown information.
-  Status CreateExperimentsTable() {
-    return Run(R"sql(
-      CREATE TABLE IF NOT EXISTS Experiments (
-        rowid INTEGER PRIMARY KEY,
-        user_id INTEGER,
-        experiment_id INTEGER NOT NULL,
-        experiment_name TEXT,
-        inserted_time REAL,
-        started_time REAL,
-        description TEXT
-      )
-    )sql");
-  }
-
-  /// \brief Creates Users table.
-  ///
-  /// Fields:
-  ///   rowid: Ephemeral b-tree ID dictating locality.
-  ///   user_id: Permanent >0 unique ID.
-  ///   user_name: Unique user name.
-  ///   email: Optional unique email address.
-  ///   inserted_time: Float UNIX timestamp with µs precision. This is
-  ///     always the time the row was inserted into the database. It
-  ///     does not change.
-  Status CreateUsersTable() {
-    return Run(R"sql(
-      CREATE TABLE IF NOT EXISTS Users (
-        rowid INTEGER PRIMARY KEY,
-        user_id INTEGER NOT NULL,
-        user_name TEXT,
-        email TEXT,
-        inserted_time REAL
-      )
-    )sql");
-  }
-
-  /// \brief Creates Graphs table.
-  ///
-  /// Fields:
-  ///   rowid: Ephemeral b-tree ID dictating locality.
-  ///   graph_id: Permanent >0 unique ID.
-  ///   inserted_time: Float UNIX timestamp with µs precision. This is
-  ///     always the wall time of when the row was inserted into the
-  ///     DB. It may be used as a hint for an archival job.
-  ///   node_def: Contains Snappy tf.GraphDef proto. All fields will be
-  ///     cleared except those not expressed in SQL.
-  Status CreateGraphsTable() {
-    return Run(R"sql(
-      CREATE TABLE IF NOT EXISTS Graphs (
-        rowid INTEGER PRIMARY KEY,
-        graph_id INTEGER NOT NULL,
-        inserted_time REAL,
-        graph_def BLOB
-      )
-    )sql");
-  }
-
-  /// \brief Creates Nodes table.
-  ///
-  /// Fields:
-  ///   rowid: Ephemeral b-tree ID dictating locality.
-  ///   graph_id: Permanent >0 unique ID.
-  ///   node_id: ID for this node. This is more like a 0-index within
-  ///     the Graph. Please note indexes are allowed to be removed.
-  ///   node_name: Unique name for this Node within Graph. This is
-  ///     copied from the proto so it can be indexed. This is allowed
-  ///     to be NULL to save space on the index, in which case the
-  ///     node_def.name proto field must not be cleared.
-  ///   op: Copied from tf.NodeDef proto.
-  ///   device: Copied from tf.NodeDef proto.
-  ///   node_def: Contains Snappy tf.NodeDef proto. All fields will be
-  ///     cleared except those not expressed in SQL.
-  Status CreateNodesTable() {
-    return Run(R"sql(
-      CREATE TABLE IF NOT EXISTS Nodes (
-        rowid INTEGER PRIMARY KEY,
-        graph_id INTEGER NOT NULL,
-        node_id INTEGER NOT NULL,
-        node_name TEXT,
-        op TEXT,
-        device TEXT,
-        node_def BLOB
-      )
-    )sql");
-  }
-
-  /// \brief Creates NodeInputs table.
-  ///
-  /// Fields:
-  ///   rowid: Ephemeral b-tree ID dictating locality.
-  ///   graph_id: Permanent >0 unique ID.
-  ///   node_id: Index of Node in question. This can be considered the
-  ///     'to' vertex.
-  ///   idx: Used for ordering inputs on a given Node.
-  ///   input_node_id: Nodes.node_id of the corresponding input node.
-  ///     This can be considered the 'from' vertex.
-  ///   is_control: If non-zero, indicates this input is a controlled
-  ///     dependency, which means this isn't an edge through which
-  ///     tensors flow. NULL means 0.
-  Status CreateNodeInputsTable() {
-    return Run(R"sql(
-      CREATE TABLE IF NOT EXISTS NodeInputs (
-        rowid INTEGER PRIMARY KEY,
-        graph_id INTEGER NOT NULL,
-        node_id INTEGER NOT NULL,
-        idx INTEGER NOT NULL,
-        input_node_id INTEGER NOT NULL,
-        is_control INTEGER
-      )
-    )sql");
-  }
-
-  /// \brief Uniquely indexes (tag_id, step) on Tensors table.
-  Status CreateTensorIndex() {
-    return Run(R"sql(
-      CREATE UNIQUE INDEX IF NOT EXISTS TensorIndex
-      ON Tensors (tag_id, step)
-    )sql");
-  }
-
-  /// \brief Uniquely indexes (tag_id, step, sequence) on TensorChunks table.
-  Status CreateTensorChunkIndex() {
-    return Run(R"sql(
-      CREATE UNIQUE INDEX IF NOT EXISTS TensorChunkIndex
-      ON TensorChunks (tag_id, step, sequence)
-    )sql");
-  }
-
-  /// \brief Uniquely indexes tag_id on Tags table.
-  Status CreateTagIdIndex() {
-    return Run(R"sql(
-      CREATE UNIQUE INDEX IF NOT EXISTS TagIdIndex
-      ON Tags (tag_id)
-    )sql");
-  }
-
-  /// \brief Uniquely indexes run_id on Runs table.
-  Status CreateRunIdIndex() {
-    return Run(R"sql(
-      CREATE UNIQUE INDEX IF NOT EXISTS RunIdIndex
-      ON Runs (run_id)
-    )sql");
-  }
-
-  /// \brief Uniquely indexes experiment_id on Experiments table.
-  Status CreateExperimentIdIndex() {
-    return Run(R"sql(
-      CREATE UNIQUE INDEX IF NOT EXISTS ExperimentIdIndex
-      ON Experiments (experiment_id)
-    )sql");
-  }
-
-  /// \brief Uniquely indexes user_id on Users table.
-  Status CreateUserIdIndex() {
-    return Run(R"sql(
-      CREATE UNIQUE INDEX IF NOT EXISTS UserIdIndex
-      ON Users (user_id)
-    )sql");
-  }
-
-  /// \brief Uniquely indexes graph_id on Graphs table.
-  Status CreateGraphIdIndex() {
-    return Run(R"sql(
-      CREATE UNIQUE INDEX IF NOT EXISTS GraphIdIndex
-      ON Graphs (graph_id)
-    )sql");
-  }
-
-  /// \brief Uniquely indexes (graph_id, node_id) on Nodes table.
-  Status CreateNodeIdIndex() {
-    return Run(R"sql(
-      CREATE UNIQUE INDEX IF NOT EXISTS NodeIdIndex
-      ON Nodes (graph_id, node_id)
-    )sql");
-  }
-
-  /// \brief Uniquely indexes (graph_id, node_id, idx) on NodeInputs table.
-  Status CreateNodeInputsIndex() {
-    return Run(R"sql(
-      CREATE UNIQUE INDEX IF NOT EXISTS NodeInputsIndex
-      ON NodeInputs (graph_id, node_id, idx)
-    )sql");
-  }
-
-  /// \brief Uniquely indexes (run_id, tag_name) on Tags table.
-  Status CreateTagNameIndex() {
-    return Run(R"sql(
-      CREATE UNIQUE INDEX IF NOT EXISTS TagNameIndex
-      ON Tags (run_id, tag_name)
-      WHERE tag_name IS NOT NULL
-    )sql");
-  }
-
-  /// \brief Uniquely indexes (experiment_id, run_name) on Runs table.
-  Status CreateRunNameIndex() {
-    return Run(R"sql(
-      CREATE UNIQUE INDEX IF NOT EXISTS RunNameIndex
-      ON Runs (experiment_id, run_name)
-      WHERE run_name IS NOT NULL
-    )sql");
-  }
-
-  /// \brief Uniquely indexes (user_id, experiment_name) on Experiments table.
-  Status CreateExperimentNameIndex() {
-    return Run(R"sql(
-      CREATE UNIQUE INDEX IF NOT EXISTS ExperimentNameIndex
-      ON Experiments (user_id, experiment_name)
-      WHERE experiment_name IS NOT NULL
-    )sql");
-  }
-
-  /// \brief Uniquely indexes user_name on Users table.
-  Status CreateUserNameIndex() {
-    return Run(R"sql(
-      CREATE UNIQUE INDEX IF NOT EXISTS UserNameIndex
-      ON Users (user_name)
-      WHERE user_name IS NOT NULL
-    )sql");
-  }
-
-  /// \brief Uniquely indexes email on Users table.
-  Status CreateUserEmailIndex() {
-    return Run(R"sql(
-      CREATE UNIQUE INDEX IF NOT EXISTS UserEmailIndex
-      ON Users (email)
-      WHERE email IS NOT NULL
-    )sql");
-  }
-
-  /// \brief Uniquely indexes (graph_id, node_name) on Nodes table.
-  Status CreateNodeNameIndex() {
-    return Run(R"sql(
-      CREATE UNIQUE INDEX IF NOT EXISTS NodeNameIndex
-      ON Nodes (graph_id, node_name)
-      WHERE node_name IS NOT NULL
-    )sql");
-  }
-
-  Status Run(const char* sql) {
-    auto stmt = db_->Prepare(sql);
-    TF_RETURN_WITH_CONTEXT_IF_ERROR(stmt.StepAndReset(), sql);
-    return Status::OK();
-  }
-
- private:
-  std::shared_ptr<Sqlite> db_;
-};
+Status Run(Sqlite* db, const char* sql) {
+  SqliteStatement stmt;
+  TF_RETURN_IF_ERROR(db->Prepare(sql, &stmt));
+  return stmt.StepAndReset();
+}
 
 }  // namespace
 
-Status SetupTensorboardSqliteDb(std::shared_ptr<Sqlite> db) {
-  SqliteSchema s(std::move(db));
-  TF_RETURN_IF_ERROR(s.CreateTensorsTable());
-  TF_RETURN_IF_ERROR(s.CreateTensorChunksTable());
-  TF_RETURN_IF_ERROR(s.CreateTagsTable());
-  TF_RETURN_IF_ERROR(s.CreateRunsTable());
-  TF_RETURN_IF_ERROR(s.CreateExperimentsTable());
-  TF_RETURN_IF_ERROR(s.CreateUsersTable());
-  TF_RETURN_IF_ERROR(s.CreateGraphsTable());
-  TF_RETURN_IF_ERROR(s.CreateNodeInputsTable());
-  TF_RETURN_IF_ERROR(s.CreateNodesTable());
-  TF_RETURN_IF_ERROR(s.CreateTensorIndex());
-  TF_RETURN_IF_ERROR(s.CreateTensorChunkIndex());
-  TF_RETURN_IF_ERROR(s.CreateTagIdIndex());
-  TF_RETURN_IF_ERROR(s.CreateRunIdIndex());
-  TF_RETURN_IF_ERROR(s.CreateExperimentIdIndex());
-  TF_RETURN_IF_ERROR(s.CreateUserIdIndex());
-  TF_RETURN_IF_ERROR(s.CreateGraphIdIndex());
-  TF_RETURN_IF_ERROR(s.CreateNodeIdIndex());
-  TF_RETURN_IF_ERROR(s.CreateNodeInputsIndex());
-  TF_RETURN_IF_ERROR(s.CreateTagNameIndex());
-  TF_RETURN_IF_ERROR(s.CreateRunNameIndex());
-  TF_RETURN_IF_ERROR(s.CreateExperimentNameIndex());
-  TF_RETURN_IF_ERROR(s.CreateUserNameIndex());
-  TF_RETURN_IF_ERROR(s.CreateUserEmailIndex());
-  TF_RETURN_IF_ERROR(s.CreateNodeNameIndex());
-  return Status::OK();
+Status SetupTensorboardSqliteDb(Sqlite* db) {
+  // Note: GCC raw strings macros are broken.
+  // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55971
+  TF_RETURN_IF_ERROR(
+      db->PrepareOrDie(strings::StrCat("PRAGMA application_id=",
+                                       kTensorboardSqliteApplicationId))
+          .StepAndReset());
+  db->PrepareOrDie("PRAGMA user_version=0").StepAndResetOrDie();
+  Status s;
+
+  // Ids identify resources.
+  //
+  // This table can be used to efficiently generate Permanent IDs in
+  // conjunction with a random number generator. Unlike rowids these
+  // IDs safe to use in URLs and unique across tables.
+  //
+  // Within any given system, there can't be any foo_id == bar_id for
+  // all rows of any two (Foos, Bars) tables. A row should only be
+  // deleted from this table if there's a very high level of confidence
+  // it exists nowhere else in the system.
+  //
+  // Fields:
+  //   id: The system-wide ID. This must be in the range [1,2**47). 0
+  //     is assigned the same meaning as NULL and shouldn't be stored
+  //     and all other int64 values are reserved for future use. Please
+  //     note that id is also the rowid.
+  s.Update(Run(db, R"sql(
+    CREATE TABLE IF NOT EXISTS Ids (
+      id INTEGER PRIMARY KEY
+    )
+  )sql"));
+
+  // Descriptions are Markdown text that can be associated with any
+  // resource that has a Permanent ID.
+  //
+  // Fields:
+  //   id: The foo_id of the associated row in Foos.
+  //   description: Arbitrary NUL-terminated Markdown text.
+  s.Update(Run(db, R"sql(
+    CREATE TABLE IF NOT EXISTS Descriptions (
+      id INTEGER PRIMARY KEY,
+      description TEXT
+    )
+  )sql"));
+
+  // Tensors are 0..n-dimensional numbers or strings.
+  //
+  // Fields:
+  //   rowid: Ephemeral b-tree ID.
+  //   series: The Permanent ID of a different resource, e.g. tag_id. A
+  //     tensor will be vacuumed if no series == foo_id exists for all
+  //     rows of all Foos. When series is NULL this tensor may serve
+  //     undefined purposes. This field should be set on placeholders.
+  //   step: Arbitrary number to uniquely order tensors within series.
+  //     The meaning of step is undefined when series is NULL. This may
+  //     be set on placeholders to prepopulate index pages.
+  //   computed_time: Float UNIX timestamp with microsecond precision.
+  //     In the old summaries system that uses FileWriter, this is the
+  //     wall time around when tf.Session.run finished. In the new
+  //     summaries system, it is the wall time of when the tensor was
+  //     computed. On systems with monotonic clocks, it is calculated
+  //     by adding the monotonic run duration to Run.started_time.
+  //   dtype: The tensorflow::DataType ID. For example, DT_INT64 is 9.
+  //     When NULL or 0 this must be treated as a placeholder row that
+  //     does not officially exist.
+  //   shape: A comma-delimited list of int64 >=0 values representing
+  //     length of each dimension in the tensor. This must be a valid
+  //     shape. That means no -1 values and, in the case of numeric
+  //     tensors, length(data) == product(shape) * sizeof(dtype). Empty
+  //     means this is a scalar a.k.a. 0-dimensional tensor.
+  //   data: Little-endian raw tensor memory. If dtype is DT_STRING and
+  //     shape is empty, the nullness of this field indicates whether or
+  //     not it contains the tensor contents; otherwise TensorStrings
+  //     must be queried. If dtype is NULL then ZEROBLOB can be used on
+  //     this field to reserve row space to be updated later.
+  s.Update(Run(db, R"sql(
+    CREATE TABLE IF NOT EXISTS Tensors (
+      rowid INTEGER PRIMARY KEY,
+      series INTEGER,
+      step INTEGER,
+      dtype INTEGER,
+      computed_time REAL,
+      shape TEXT,
+      data BLOB
+    )
+  )sql"));
+
+  s.Update(Run(db, R"sql(
+    CREATE UNIQUE INDEX IF NOT EXISTS
+      TensorSeriesStepIndex
+    ON
+      Tensors (series, step)
+    WHERE
+      series IS NOT NULL
+      AND step IS NOT NULL
+  )sql"));
+
+  // TensorStrings are the flat contents of 1..n dimensional DT_STRING
+  // Tensors.
+  //
+  // The number of rows associated with a Tensor must be equal to the
+  // product of its Tensors.shape.
+  //
+  // Fields:
+  //   rowid: Ephemeral b-tree ID.
+  //   tensor_rowid: References Tensors.rowid.
+  //   idx: Index in flattened tensor, starting at 0.
+  //   data: The string value at a particular index. NUL characters are
+  //     permitted.
+  s.Update(Run(db, R"sql(
+    CREATE TABLE IF NOT EXISTS TensorStrings (
+      rowid INTEGER PRIMARY KEY,
+      tensor_rowid INTEGER NOT NULL,
+      idx INTEGER NOT NULL,
+      data BLOB
+    )
+  )sql"));
+
+  s.Update(Run(db, R"sql(
+    CREATE UNIQUE INDEX IF NOT EXISTS TensorStringIndex
+    ON TensorStrings (tensor_rowid, idx)
+  )sql"));
+
+  // Tags are series of Tensors.
+  //
+  // Fields:
+  //   rowid: Ephemeral b-tree ID.
+  //   tag_id: The Permanent ID of the Tag.
+  //   run_id: Optional ID of associated Run.
+  //   inserted_time: Float UNIX timestamp with µs precision. This is
+  //     always the wall time of when the row was inserted into the
+  //     DB. It may be used as a hint for an archival job.
+  //   tag_name: The tag field in summary.proto, unique across Run.
+  //   display_name: Optional for GUI and defaults to tag_name.
+  //   plugin_name: Arbitrary TensorBoard plugin name for dispatch.
+  //   plugin_data: Arbitrary data that plugin wants.
+  //
+  // TODO(jart): Maybe there should be a Plugins table?
+  s.Update(Run(db, R"sql(
+    CREATE TABLE IF NOT EXISTS Tags (
+      rowid INTEGER PRIMARY KEY,
+      run_id INTEGER,
+      tag_id INTEGER NOT NULL,
+      inserted_time DOUBLE,
+      tag_name TEXT,
+      display_name TEXT,
+      plugin_name TEXT,
+      plugin_data BLOB
+    )
+  )sql"));
+
+  s.Update(Run(db, R"sql(
+    CREATE UNIQUE INDEX IF NOT EXISTS TagIdIndex
+    ON Tags (tag_id)
+  )sql"));
+
+  s.Update(Run(db, R"sql(
+    CREATE UNIQUE INDEX IF NOT EXISTS
+      TagRunNameIndex
+    ON
+      Tags (run_id, tag_name)
+    WHERE
+      run_id IS NOT NULL
+      AND tag_name IS NOT NULL
+  )sql"));
+
+  // Runs are groups of Tags.
+  //
+  // Each Run usually represents a single attempt at training or testing
+  // a TensorFlow model, with a given set of hyper-parameters, whose
+  // summaries are written out to a single event logs directory with a
+  // monotonic step counter.
+  //
+  // Fields:
+  //   rowid: Ephemeral b-tree ID.
+  //   run_id: The Permanent ID of the Run. This has a 1:1 mapping
+  //     with a SummaryWriter instance. If two writers spawn for a
+  //     given (user_name, run_name, run_name) then each should
+  //     allocate its own run_id and whichever writer puts it in the
+  //     database last wins. The Tags / Tensors associated with the
+  //     previous invocations will then enter limbo, where they may be
+  //     accessible for certain operations, but should be garbage
+  //     collected eventually.
+  //   run_name: User-supplied string, unique across Experiment.
+  //   experiment_id: Optional ID of associated Experiment.
+  //   inserted_time: Float UNIX timestamp with µs precision. This is
+  //     always the time the row was inserted into the database. It
+  //     does not change.
+  //   started_time: Float UNIX timestamp with µs precision. In the
+  //     old summaries system that uses FileWriter, this is
+  //     approximated as the first tf.Event.wall_time. In the new
+  //     summaries system, it is the wall time of when summary writing
+  //     started, from the perspective of whichever machine talks to
+  //     the database. This field will be mutated if the run is
+  //     restarted.
+  //   finished_time: Float UNIX timestamp with µs precision of when
+  //     SummaryWriter resource that created this run was destroyed.
+  //     Once this value becomes non-NULL a Run and its Tags and
+  //     Tensors should be regarded as immutable.
+  s.Update(Run(db, R"sql(
+    CREATE TABLE IF NOT EXISTS Runs (
+      rowid INTEGER PRIMARY KEY,
+      experiment_id INTEGER,
+      run_id INTEGER NOT NULL,
+      inserted_time REAL,
+      started_time REAL,
+      finished_time REAL,
+      run_name TEXT
+    )
+  )sql"));
+
+  s.Update(Run(db, R"sql(
+    CREATE UNIQUE INDEX IF NOT EXISTS RunIdIndex
+    ON Runs (run_id)
+  )sql"));
+
+  s.Update(Run(db, R"sql(
+    CREATE UNIQUE INDEX IF NOT EXISTS RunNameIndex
+    ON Runs (experiment_id, run_name)
+    WHERE run_name IS NOT NULL
+  )sql"));
+
+  // Experiments are groups of Runs.
+  //
+  // Fields:
+  //   rowid: Ephemeral b-tree ID.
+  //   user_id: Optional ID of associated User.
+  //   experiment_id: The Permanent ID of the Experiment.
+  //   experiment_name: User-supplied string, unique across User.
+  //   inserted_time: Float UNIX timestamp with µs precision. This is
+  //     always the time the row was inserted into the database. It
+  //     does not change.
+  //   started_time: Float UNIX timestamp with µs precision. This is
+  //     the MIN(experiment.started_time, run.started_time) of each
+  //     Run added to the database, including Runs which have since
+  //     been overwritten.
+  //   is_watching: A boolean indicating if someone is actively
+  //     looking at this Experiment in the TensorBoard GUI. Tensor
+  //     writers that do reservoir sampling can query this value to
+  //     decide if they want the "keep last" behavior. This improves
+  //     the performance of long running training while allowing low
+  //     latency feedback in TensorBoard.
+  s.Update(Run(db, R"sql(
+    CREATE TABLE IF NOT EXISTS Experiments (
+      rowid INTEGER PRIMARY KEY,
+      user_id INTEGER,
+      experiment_id INTEGER NOT NULL,
+      inserted_time REAL,
+      started_time REAL,
+      is_watching INTEGER,
+      experiment_name TEXT
+    )
+  )sql"));
+
+  s.Update(Run(db, R"sql(
+    CREATE UNIQUE INDEX IF NOT EXISTS ExperimentIdIndex
+    ON Experiments (experiment_id)
+  )sql"));
+
+  s.Update(Run(db, R"sql(
+    CREATE UNIQUE INDEX IF NOT EXISTS ExperimentNameIndex
+    ON Experiments (user_id, experiment_name)
+    WHERE experiment_name IS NOT NULL
+  )sql"));
+
+  // Users are people who love TensorBoard.
+  //
+  // Fields:
+  //   rowid: Ephemeral b-tree ID.
+  //   user_id: The Permanent ID of the User.
+  //   user_name: Unique user name.
+  //   email: Optional unique email address.
+  //   inserted_time: Float UNIX timestamp with µs precision. This is
+  //     always the time the row was inserted into the database. It
+  //     does not change.
+  s.Update(Run(db, R"sql(
+    CREATE TABLE IF NOT EXISTS Users (
+      rowid INTEGER PRIMARY KEY,
+      user_id INTEGER NOT NULL,
+      inserted_time REAL,
+      user_name TEXT,
+      email TEXT
+    )
+  )sql"));
+
+  s.Update(Run(db, R"sql(
+    CREATE UNIQUE INDEX IF NOT EXISTS UserIdIndex
+    ON Users (user_id)
+  )sql"));
+
+  s.Update(Run(db, R"sql(
+    CREATE UNIQUE INDEX IF NOT EXISTS UserNameIndex
+    ON Users (user_name)
+    WHERE user_name IS NOT NULL
+  )sql"));
+
+  s.Update(Run(db, R"sql(
+    CREATE UNIQUE INDEX IF NOT EXISTS UserEmailIndex
+    ON Users (email)
+    WHERE email IS NOT NULL
+  )sql"));
+
+  // Graphs define how Tensors flowed in Runs.
+  //
+  // Fields:
+  //   rowid: Ephemeral b-tree ID.
+  //   run_id: The Permanent ID of the associated Run. Only one Graph
+  //     can be associated with a Run.
+  //   graph_id: The Permanent ID of the Graph.
+  //   inserted_time: Float UNIX timestamp with µs precision. This is
+  //     always the wall time of when the row was inserted into the
+  //     DB. It may be used as a hint for an archival job.
+  //   graph_def: Contains the tf.GraphDef proto parts leftover which
+  //     haven't been defined in SQL yet.
+  s.Update(Run(db, R"sql(
+    CREATE TABLE IF NOT EXISTS Graphs (
+      rowid INTEGER PRIMARY KEY,
+      run_id INTEGER,
+      graph_id INTEGER NOT NULL,
+      inserted_time REAL,
+      graph_def BLOB
+    )
+  )sql"));
+
+  s.Update(Run(db, R"sql(
+    CREATE UNIQUE INDEX IF NOT EXISTS GraphIdIndex
+    ON Graphs (graph_id)
+  )sql"));
+
+  s.Update(Run(db, R"sql(
+    CREATE UNIQUE INDEX IF NOT EXISTS GraphRunIndex
+    ON Graphs (run_id)
+    WHERE run_id IS NOT NULL
+  )sql"));
+
+  // Nodes are the vertices in Graphs.
+  //
+  // Fields:
+  //   rowid: Ephemeral b-tree ID.
+  //   graph_id: The Permanent ID of the associated Graph.
+  //   node_id: ID for this node. This is more like a 0-index within
+  //     the Graph. Please note indexes are allowed to be removed.
+  //   node_name: Unique name for this Node within Graph. This is
+  //     copied from the proto so it can be indexed. This is allowed
+  //     to be NULL to save space on the index, in which case the
+  //     node_def.name proto field must not be cleared.
+  //   op: Copied from tf.NodeDef proto.
+  //   device: Copied from tf.NodeDef proto.
+  //   node_def: Contains the tf.NodeDef proto parts leftover which
+  //     haven't been defined in SQL yet.
+  //
+  // TODO(jart): Make separate tables for op and device strings.
+  s.Update(Run(db, R"sql(
+    CREATE TABLE IF NOT EXISTS Nodes (
+      rowid INTEGER PRIMARY KEY,
+      graph_id INTEGER NOT NULL,
+      node_id INTEGER NOT NULL,
+      node_name TEXT,
+      op TEXT,
+      device TEXT,
+      node_def BLOB
+    )
+  )sql"));
+
+  s.Update(Run(db, R"sql(
+    CREATE UNIQUE INDEX IF NOT EXISTS NodeIdIndex
+    ON Nodes (graph_id, node_id)
+  )sql"));
+
+  s.Update(Run(db, R"sql(
+    CREATE UNIQUE INDEX IF NOT EXISTS NodeNameIndex
+    ON Nodes (graph_id, node_name)
+    WHERE node_name IS NOT NULL
+  )sql"));
+
+  // NodeInputs are directed edges between Nodes in Graphs.
+  //
+  // Fields:
+  //   rowid: Ephemeral b-tree ID.
+  //   graph_id: The Permanent ID of the associated Graph.
+  //   node_id: Index of Node in question. This can be considered the
+  //     'to' vertex.
+  //   idx: Used for ordering inputs on a given Node.
+  //   input_node_id: Nodes.node_id of the corresponding input node.
+  //     This can be considered the 'from' vertex.
+  //   input_node_idx: Since a Node can output multiple Tensors, this
+  //     is the integer index of which of those outputs is our input.
+  //     NULL is treated as 0.
+  //   is_control: If non-zero, indicates this input is a controlled
+  //     dependency, which means this isn't an edge through which
+  //     tensors flow. NULL means 0.
+  //
+  // TODO(jart): Rename to NodeEdges.
+  s.Update(Run(db, R"sql(
+    CREATE TABLE IF NOT EXISTS NodeInputs (
+      rowid INTEGER PRIMARY KEY,
+      graph_id INTEGER NOT NULL,
+      node_id INTEGER NOT NULL,
+      idx INTEGER NOT NULL,
+      input_node_id INTEGER NOT NULL,
+      input_node_idx INTEGER,
+      is_control INTEGER
+    )
+  )sql"));
+
+  s.Update(Run(db, R"sql(
+    CREATE UNIQUE INDEX IF NOT EXISTS NodeInputsIndex
+    ON NodeInputs (graph_id, node_id, idx)
+  )sql"));
+
+  return s;
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/tensorboard/db/schema.h b/tensorflow/contrib/tensorboard/db/schema.h
index 900c10298ce0a69b92f7528db9742517243c3c51..3da450422523dbe4304446869a38d43981d76eb5 100644
--- a/tensorflow/contrib/tensorboard/db/schema.h
+++ b/tensorflow/contrib/tensorboard/db/schema.h
@@ -15,18 +15,18 @@ limitations under the License.
 #ifndef TENSORFLOW_CONTRIB_TENSORBOARD_DB_SCHEMA_H_
 #define TENSORFLOW_CONTRIB_TENSORBOARD_DB_SCHEMA_H_
 
-#include <memory>
-
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/db/sqlite.h"
 
 namespace tensorflow {
 
+constexpr uint32 kTensorboardSqliteApplicationId = 0xfeedabee;
+
 /// \brief Creates TensorBoard SQLite tables and indexes.
 ///
 /// If they are already created, this has no effect. If schema
 /// migrations are necessary, they will be performed with logging.
-Status SetupTensorboardSqliteDb(std::shared_ptr<Sqlite> db);
+Status SetupTensorboardSqliteDb(Sqlite* db);
 
 }  // namespace tensorflow
 
diff --git a/tensorflow/contrib/tensorboard/db/schema_test.cc b/tensorflow/contrib/tensorboard/db/schema_test.cc
index 463c4e59e7e76e6460b7ddfbd92262ac249aa9ed..4d3f2880bd02682ad00a90760f2a4478f1e6b2a2 100644
--- a/tensorflow/contrib/tensorboard/db/schema_test.cc
+++ b/tensorflow/contrib/tensorboard/db/schema_test.cc
@@ -23,7 +23,9 @@ namespace tensorflow {
 namespace {
 
 TEST(SchemaTest, SmokeTestTensorboardSchema) {
-  auto db = Sqlite::Open(":memory:").ValueOrDie();
+  Sqlite* db;
+  TF_ASSERT_OK(Sqlite::Open(":memory:", SQLITE_OPEN_READWRITE, &db));
+  core::ScopedUnref unref_db(db);
   TF_ASSERT_OK(SetupTensorboardSqliteDb(db));
 }
 
diff --git a/tensorflow/contrib/tensorboard/db/summary_converter.cc b/tensorflow/contrib/tensorboard/db/summary_converter.cc
new file mode 100644
index 0000000000000000000000000000000000000000..93c1183072b4d791843e740f970234ba52857463
--- /dev/null
+++ b/tensorflow/contrib/tensorboard/db/summary_converter.cc
@@ -0,0 +1,322 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/tensorboard/db/summary_converter.h"
+
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/summary.pb.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/lib/histogram/histogram.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/lib/png/png_io.h"
+#include "tensorflow/core/lib/wav/wav_io.h"
+
+namespace tensorflow {
+namespace {
+
+template <typename T>
+Status TensorValueAt(Tensor t, int64 i, T* out) {
+#define CASE(I)                            \
+  case DataTypeToEnum<I>::value:           \
+    *out = static_cast<T>(t.flat<I>()(i)); \
+    break;
+#define COMPLEX_CASE(I)                           \
+  case DataTypeToEnum<I>::value:                  \
+    *out = static_cast<T>(t.flat<I>()(i).real()); \
+    break;
+  // clang-format off
+  switch (t.dtype()) {
+    TF_CALL_half(CASE)
+    TF_CALL_float(CASE)
+    TF_CALL_double(CASE)
+    TF_CALL_int8(CASE)
+    TF_CALL_int16(CASE)
+    TF_CALL_int32(CASE)
+    TF_CALL_int64(CASE)
+    TF_CALL_uint8(CASE)
+    TF_CALL_uint16(CASE)
+    TF_CALL_uint32(CASE)
+    TF_CALL_uint64(CASE)
+    TF_CALL_complex64(COMPLEX_CASE)
+    TF_CALL_complex128(COMPLEX_CASE)
+    default:
+        return errors::Unimplemented("SummaryFileWriter ",
+                                     DataTypeString(t.dtype()),
+                                     " not supported.");
+  }
+  // clang-format on
+  return Status::OK();
+#undef CASE
+#undef COMPLEX_CASE
+}
+
+typedef Eigen::Tensor<uint8, 2, Eigen::RowMajor> Uint8Image;
+
+// Add the sequence of images specified by ith_image to the summary.
+//
+// Factoring this loop out into a helper function lets ith_image behave
+// differently in the float and uint8 cases: the float case needs a temporary
+// buffer which can be shared across calls to ith_image, but the uint8 case
+// does not.
+Status AddImages(const string& tag, int max_images, int batch_size, int w,
+                 int h, int depth,
+                 const std::function<Uint8Image(int)>& ith_image, Summary* s) {
+  const int N = std::min<int>(max_images, batch_size);
+  for (int i = 0; i < N; ++i) {
+    Summary::Value* v = s->add_value();
+    // The tag depends on the number of requested images (not the number
+    // produced.)
+    //
+    // Note that later on avisu uses "/" to figure out a consistent naming
+    // convention for display, so we append "/image" to guarantee that the
+    // image(s) won't be displayed in the global scope with no name.
+    if (max_images > 1) {
+      v->set_tag(strings::StrCat(tag, "/image/", i));
+    } else {
+      v->set_tag(strings::StrCat(tag, "/image"));
+    }
+
+    const auto image = ith_image(i);
+    Summary::Image* si = v->mutable_image();
+    si->set_height(h);
+    si->set_width(w);
+    si->set_colorspace(depth);
+    const int channel_bits = 8;
+    const int compression = -1;  // Use zlib default
+    if (!png::WriteImageToBuffer(image.data(), w, h, w * depth, depth,
+                                 channel_bits, compression,
+                                 si->mutable_encoded_image_string(), nullptr)) {
+      return errors::Internal("PNG encoding failed");
+    }
+  }
+  return Status::OK();
+}
+
+template <class T>
+void NormalizeFloatImage(int hw, int depth,
+                         typename TTypes<T>::ConstMatrix values,
+                         typename TTypes<uint8>::ConstVec bad_color,
+                         Uint8Image* image) {
+  if (!image->size()) return;  // Nothing to do for empty images
+
+  // Rescale the image to uint8 range.
+  //
+  // We are trying to generate an RGB image from a float/half tensor.  We do
+  // not have any info about the expected range of values in the tensor
+  // but the generated image needs to have all RGB values within [0, 255].
+  //
+  // We use two different algorithms to generate these values.  If the
+  // tensor has only positive values we scale them all by 255/max(values).
+  // If the tensor has both negative and positive values we scale them by
+  // the max of their absolute values and center them around 127.
+  //
+  // This works for most cases, but does not respect the relative dynamic
+  // range across different instances of the tensor.
+
+  // Compute min and max ignoring nonfinite pixels
+  float image_min = std::numeric_limits<float>::infinity();
+  float image_max = -image_min;
+  for (int i = 0; i < hw; i++) {
+    bool finite = true;
+    for (int j = 0; j < depth; j++) {
+      if (!Eigen::numext::isfinite(values(i, j))) {
+        finite = false;
+        break;
+      }
+    }
+    if (finite) {
+      for (int j = 0; j < depth; j++) {
+        float value(values(i, j));
+        image_min = std::min(image_min, value);
+        image_max = std::max(image_max, value);
+      }
+    }
+  }
+
+  // Pick an affine transform into uint8
+  const float kZeroThreshold = 1e-6;
+  T scale, offset;
+  if (image_min < 0) {
+    const float max_val = std::max(std::abs(image_min), std::abs(image_max));
+    scale = T(max_val < kZeroThreshold ? 0.0f : 127.0f / max_val);
+    offset = T(128.0f);
+  } else {
+    scale = T(image_max < kZeroThreshold ? 0.0f : 255.0f / image_max);
+    offset = T(0.0f);
+  }
+
+  // Transform image, turning nonfinite values to bad_color
+  for (int i = 0; i < hw; i++) {
+    bool finite = true;
+    for (int j = 0; j < depth; j++) {
+      if (!Eigen::numext::isfinite(values(i, j))) {
+        finite = false;
+        break;
+      }
+    }
+    if (finite) {
+      image->chip<0>(i) =
+          (values.template chip<0>(i) * scale + offset).template cast<uint8>();
+    } else {
+      image->chip<0>(i) = bad_color;
+    }
+  }
+}
+
+template <class T>
+Status NormalizeAndAddImages(const Tensor& tensor, int max_images, int h, int w,
+                             int hw, int depth, int batch_size,
+                             const string& base_tag, Tensor bad_color_tensor,
+                             Summary* s) {
+  // For float and half images, nans and infs are replaced with bad_color.
+  if (bad_color_tensor.dim_size(0) < depth) {
+    return errors::InvalidArgument(
+        "expected depth <= bad_color.size, got depth = ", depth,
+        ", bad_color.size = ", bad_color_tensor.dim_size(0));
+  }
+  auto bad_color_full = bad_color_tensor.vec<uint8>();
+  typename TTypes<uint8>::ConstVec bad_color(bad_color_full.data(), depth);
+
+  // Float images must be scaled and translated.
+  Uint8Image image(hw, depth);
+  auto ith_image = [&tensor, &image, bad_color, batch_size, hw, depth](int i) {
+    auto tensor_eigen = tensor.template shaped<T, 3>({batch_size, hw, depth});
+    typename TTypes<T>::ConstMatrix values(
+        &tensor_eigen(i, 0, 0), Eigen::DSizes<Eigen::DenseIndex, 2>(hw, depth));
+    NormalizeFloatImage<T>(hw, depth, values, bad_color, &image);
+    return image;
+  };
+  return AddImages(base_tag, max_images, batch_size, w, h, depth, ith_image, s);
+}
+
+}  // namespace
+
+Status AddTensorAsScalarToSummary(const Tensor& t, const string& tag,
+                                  Summary* s) {
+  Summary::Value* v = s->add_value();
+  v->set_tag(tag);
+  float value;
+  TF_RETURN_IF_ERROR(TensorValueAt<float>(t, 0, &value));
+  v->set_simple_value(value);
+  return Status::OK();
+}
+
+Status AddTensorAsHistogramToSummary(const Tensor& t, const string& tag,
+                                     Summary* s) {
+  Summary::Value* v = s->add_value();
+  v->set_tag(tag);
+  histogram::Histogram histo;
+  for (int64 i = 0; i < t.NumElements(); i++) {
+    double double_val;
+    TF_RETURN_IF_ERROR(TensorValueAt<double>(t, i, &double_val));
+    if (Eigen::numext::isnan(double_val)) {
+      return errors::InvalidArgument("Nan in summary histogram for: ", tag);
+    } else if (Eigen::numext::isinf(double_val)) {
+      return errors::InvalidArgument("Infinity in summary histogram for: ",
+                                     tag);
+    }
+    histo.Add(double_val);
+  }
+  histo.EncodeToProto(v->mutable_histo(), false /* Drop zero buckets */);
+  return Status::OK();
+}
+
+Status AddTensorAsImageToSummary(const Tensor& tensor, const string& tag,
+                                 int max_images, const Tensor& bad_color,
+                                 Summary* s) {
+  if (!(tensor.dims() == 4 &&
+        (tensor.dim_size(3) == 1 || tensor.dim_size(3) == 3 ||
+         tensor.dim_size(3) == 4))) {
+    return errors::InvalidArgument(
+        "Tensor must be 4-D with last dim 1, 3, or 4, not ",
+        tensor.shape().DebugString());
+  }
+  if (!(tensor.dim_size(0) < (1LL << 31) && tensor.dim_size(1) < (1LL << 31) &&
+        tensor.dim_size(2) < (1LL << 31) &&
+        (tensor.dim_size(1) * tensor.dim_size(2)) < (1LL << 29))) {
+    return errors::InvalidArgument("Tensor too large for summary ",
+                                   tensor.shape().DebugString());
+  }
+  // The casts and h * w cannot overflow because of the limits above.
+  const int batch_size = static_cast<int>(tensor.dim_size(0));
+  const int h = static_cast<int>(tensor.dim_size(1));
+  const int w = static_cast<int>(tensor.dim_size(2));
+  const int hw = h * w;  // Compact these two dims for simplicity
+  const int depth = static_cast<int>(tensor.dim_size(3));
+  if (tensor.dtype() == DT_UINT8) {
+    // For uint8 input, no normalization is necessary
+    auto ith_image = [&tensor, batch_size, hw, depth](int i) {
+      auto values = tensor.shaped<uint8, 3>({batch_size, hw, depth});
+      return typename TTypes<uint8>::ConstMatrix(
+          &values(i, 0, 0), Eigen::DSizes<Eigen::DenseIndex, 2>(hw, depth));
+    };
+    TF_RETURN_IF_ERROR(
+        AddImages(tag, max_images, batch_size, w, h, depth, ith_image, s));
+  } else if (tensor.dtype() == DT_HALF) {
+    TF_RETURN_IF_ERROR(NormalizeAndAddImages<Eigen::half>(
+        tensor, max_images, h, w, hw, depth, batch_size, tag, bad_color, s));
+  } else if (tensor.dtype() == DT_FLOAT) {
+    TF_RETURN_IF_ERROR(NormalizeAndAddImages<float>(
+        tensor, max_images, h, w, hw, depth, batch_size, tag, bad_color, s));
+  } else {
+    return errors::InvalidArgument(
+        "Only DT_INT8, DT_HALF, and DT_FLOAT images are supported. Got ",
+        DataTypeString(tensor.dtype()));
+  }
+  return Status::OK();
+}
+
+Status AddTensorAsAudioToSummary(const Tensor& tensor, const string& tag,
+                                 int max_outputs, float sample_rate,
+                                 Summary* s) {
+  if (sample_rate <= 0.0f) {
+    return errors::InvalidArgument("sample_rate must be > 0");
+  }
+  const int batch_size = tensor.dim_size(0);
+  const int64 length_frames = tensor.dim_size(1);
+  const int64 num_channels =
+      tensor.dims() == 2 ? 1 : tensor.dim_size(tensor.dims() - 1);
+  const int N = std::min<int>(max_outputs, batch_size);
+  for (int i = 0; i < N; ++i) {
+    Summary::Value* v = s->add_value();
+    if (max_outputs > 1) {
+      v->set_tag(strings::StrCat(tag, "/audio/", i));
+    } else {
+      v->set_tag(strings::StrCat(tag, "/audio"));
+    }
+
+    Summary::Audio* sa = v->mutable_audio();
+    sa->set_sample_rate(sample_rate);
+    sa->set_num_channels(num_channels);
+    sa->set_length_frames(length_frames);
+    sa->set_content_type("audio/wav");
+
+    auto values =
+        tensor.shaped<float, 3>({batch_size, length_frames, num_channels});
+    auto channels_by_frames = typename TTypes<float>::ConstMatrix(
+        &values(i, 0, 0),
+        Eigen::DSizes<Eigen::DenseIndex, 2>(length_frames, num_channels));
+    size_t sample_rate_truncated = lrintf(sample_rate);
+    if (sample_rate_truncated == 0) {
+      sample_rate_truncated = 1;
+    }
+    TF_RETURN_IF_ERROR(wav::EncodeAudioAsS16LEWav(
+        channels_by_frames.data(), sample_rate_truncated, num_channels,
+        length_frames, sa->mutable_encoded_audio_string()));
+  }
+  return Status::OK();
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/tensorboard/db/summary_converter.h b/tensorflow/contrib/tensorboard/db/summary_converter.h
new file mode 100644
index 0000000000000000000000000000000000000000..329c7f9f2f9fe25cdff8d5ac2e52c25362f624c2
--- /dev/null
+++ b/tensorflow/contrib/tensorboard/db/summary_converter.h
@@ -0,0 +1,38 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_TENSORBOARD_DB_SUMMARY_CONVERTER_H_
+#define TENSORFLOW_CONTRIB_TENSORBOARD_DB_SUMMARY_CONVERTER_H_
+
+#include "tensorflow/core/framework/summary.pb.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/lib/core/status.h"
+
+namespace tensorflow {
+
+// TODO(jart): Delete these methods in favor of new Python implementation.
+Status AddTensorAsScalarToSummary(const Tensor& t, const string& tag,
+                                  Summary* s);
+Status AddTensorAsHistogramToSummary(const Tensor& t, const string& tag,
+                                     Summary* s);
+Status AddTensorAsImageToSummary(const Tensor& tensor, const string& tag,
+                                 int max_images, const Tensor& bad_color,
+                                 Summary* s);
+Status AddTensorAsAudioToSummary(const Tensor& tensor, const string& tag,
+                                 int max_outputs, float sample_rate,
+                                 Summary* s);
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CONTRIB_TENSORBOARD_DB_SUMMARY_CONVERTER_H_
diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc
index 37a32acb1eac7c43df00e33486c4f7676728c796..6590d6f7df4f35cad78db1fa9c4407bfb1270a2f 100644
--- a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc
+++ b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc
@@ -14,180 +14,242 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/contrib/tensorboard/db/summary_db_writer.h"
 
-#include "tensorflow/contrib/tensorboard/db/schema.h"
+#include "tensorflow/contrib/tensorboard/db/summary_converter.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/summary.pb.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/db/sqlite.h"
 #include "tensorflow/core/lib/random/random.h"
-#include "tensorflow/core/lib/strings/stringprintf.h"
-#include "tensorflow/core/platform/fingerprint.h"
-#include "tensorflow/core/platform/snappy.h"
 #include "tensorflow/core/util/event.pb.h"
 
+// TODO(jart): Break this up into multiple files with excellent unit tests.
+// TODO(jart): Make decision to write in separate op.
+// TODO(jart): Add really good busy handling.
+
+// clang-format off
+#define CALL_SUPPORTED_TYPES(m) \
+  TF_CALL_string(m)             \
+  TF_CALL_half(m)               \
+  TF_CALL_float(m)              \
+  TF_CALL_double(m)             \
+  TF_CALL_complex64(m)          \
+  TF_CALL_complex128(m)         \
+  TF_CALL_int8(m)               \
+  TF_CALL_int16(m)              \
+  TF_CALL_int32(m)              \
+  TF_CALL_int64(m)              \
+  TF_CALL_uint8(m)              \
+  TF_CALL_uint16(m)             \
+  TF_CALL_uint32(m)             \
+  TF_CALL_uint64(m)
+// clang-format on
+
 namespace tensorflow {
 namespace {
 
-double GetWallTime(Env* env) {
+// https://www.sqlite.org/fileformat.html#record_format
+const uint64 kIdTiers[] = {
+    0x7fffffULL,        // 23-bit (3 bytes on disk)
+    0x7fffffffULL,      // 31-bit (4 bytes on disk)
+    0x7fffffffffffULL,  // 47-bit (5 bytes on disk)
+                        // remaining bits for future use
+};
+const int kMaxIdTier = sizeof(kIdTiers) / sizeof(uint64);
+const int kIdCollisionDelayMicros = 10;
+const int kMaxIdCollisions = 21;  // sum(2**i*10µs for i in range(21))~=21s
+const int64 kAbsent = 0LL;
+
+const char* kScalarPluginName = "scalars";
+const char* kImagePluginName = "images";
+const char* kAudioPluginName = "audio";
+const char* kHistogramPluginName = "histograms";
+
+const int kScalarSlots = 10000;
+const int kImageSlots = 10;
+const int kAudioSlots = 10;
+const int kHistogramSlots = 1;
+const int kTensorSlots = 10;
+
+const int64 kReserveMinBytes = 32;
+const double kReserveMultiplier = 1.5;
+
+// Flush is a misnomer because what we're actually doing is having lots
+// of commits inside any SqliteTransaction that writes potentially
+// hundreds of megs but doesn't need the transaction to maintain its
+// invariants. This ensures the WAL read penalty is small and might
+// allow writers in other processes a chance to schedule.
+const uint64 kFlushBytes = 1024 * 1024;
+
+double DoubleTime(uint64 micros) {
   // TODO(@jart): Follow precise definitions for time laid out in schema.
   // TODO(@jart): Use monotonic clock from gRPC codebase.
-  return static_cast<double>(env->NowMicros()) / 1.0e6;
+  return static_cast<double>(micros) / 1.0e6;
 }
 
-int64 MakeRandomId() {
-  // TODO(@jart): Try generating ID in 2^24 space, falling back to 2^63
-  //              https://sqlite.org/src4/doc/trunk/www/varint.wiki
-  int64 id = static_cast<int64>(random::New64() & ((1ULL << 63) - 1));
-  if (id == 0) {
-    ++id;
+string StringifyShape(const TensorShape& shape) {
+  string result;
+  bool first = true;
+  for (const auto& dim : shape) {
+    if (first) {
+      first = false;
+    } else {
+      strings::StrAppend(&result, ",");
+    }
+    strings::StrAppend(&result, dim.size);
   }
-  return id;
+  return result;
 }
 
-Status Serialize(const protobuf::MessageLite& proto, string* output) {
-  output->clear();
-  if (!proto.SerializeToString(output)) {
-    return errors::DataLoss("SerializeToString failed");
+Status CheckSupportedType(const Tensor& t) {
+#define CASE(T)                  \
+  case DataTypeToEnum<T>::value: \
+    break;
+  switch (t.dtype()) {
+    CALL_SUPPORTED_TYPES(CASE)
+    default:
+      return errors::Unimplemented(DataTypeString(t.dtype()),
+                                   " tensors unsupported on platform");
   }
   return Status::OK();
+#undef CASE
 }
 
-Status Compress(const string& data, string* output) {
-  output->clear();
-  if (!port::Snappy_Compress(data.data(), data.size(), output)) {
-    return errors::FailedPrecondition("TensorBase needs Snappy");
+Tensor AsScalar(const Tensor& t) {
+  Tensor t2{t.dtype(), {}};
+#define CASE(T)                        \
+  case DataTypeToEnum<T>::value:       \
+    t2.scalar<T>()() = t.flat<T>()(0); \
+    break;
+  switch (t.dtype()) {
+    CALL_SUPPORTED_TYPES(CASE)
+    default:
+      t2 = {DT_FLOAT, {}};
+      t2.scalar<float>()() = NAN;
+      break;
   }
-  return Status::OK();
+  return t2;
+#undef CASE
 }
 
-Status BindProto(SqliteStatement* stmt, int parameter,
-                 const protobuf::MessageLite& proto) {
-  string serialized;
-  TF_RETURN_IF_ERROR(Serialize(proto, &serialized));
-  string compressed;
-  TF_RETURN_IF_ERROR(Compress(serialized, &compressed));
-  stmt->BindBlob(parameter, compressed);
-  return Status::OK();
+void PatchPluginName(SummaryMetadata* metadata, const char* name) {
+  if (metadata->plugin_data().plugin_name().empty()) {
+    metadata->mutable_plugin_data()->set_plugin_name(name);
+  }
 }
 
-Status BindTensor(SqliteStatement* stmt, int parameter, const Tensor& t) {
-  // TODO(@jart): Make portable between little and big endian systems.
-  // TODO(@jart): Use TensorChunks with minimal copying for big tensors.
-  // TODO(@jart): Add field to indicate encoding.
-  // TODO(@jart): Allow crunch tool to re-compress with zlib instead.
-  TensorProto p;
-  t.AsProtoTensorContent(&p);
-  return BindProto(stmt, parameter, p);
+int GetSlots(const Tensor& t, const SummaryMetadata& metadata) {
+  if (metadata.plugin_data().plugin_name() == kScalarPluginName) {
+    return kScalarSlots;
+  } else if (metadata.plugin_data().plugin_name() == kImagePluginName) {
+    return kImageSlots;
+  } else if (metadata.plugin_data().plugin_name() == kAudioPluginName) {
+    return kAudioSlots;
+  } else if (metadata.plugin_data().plugin_name() == kHistogramPluginName) {
+    return kHistogramSlots;
+  } else if (t.dims() == 0 && t.dtype() != DT_STRING) {
+    return kScalarSlots;
+  } else {
+    return kTensorSlots;
+  }
 }
 
-// Tries to fudge shape and dtype to something with smaller storage.
-Status CoerceScalar(const Tensor& t, Tensor* out) {
-  switch (t.dtype()) {
-    case DT_DOUBLE:
-      *out = t;
-      break;
-    case DT_INT64:
-      *out = t;
-      break;
-    case DT_FLOAT:
-      *out = {DT_DOUBLE, {}};
-      out->scalar<double>()() = t.scalar<float>()();
-      break;
-    case DT_HALF:
-      *out = {DT_DOUBLE, {}};
-      out->scalar<double>()() = static_cast<double>(t.scalar<Eigen::half>()());
-      break;
-    case DT_INT32:
-      *out = {DT_INT64, {}};
-      out->scalar<int64>()() = t.scalar<int32>()();
-      break;
-    case DT_INT16:
-      *out = {DT_INT64, {}};
-      out->scalar<int64>()() = t.scalar<int16>()();
-      break;
-    case DT_INT8:
-      *out = {DT_INT64, {}};
-      out->scalar<int64>()() = t.scalar<int8>()();
-      break;
-    case DT_UINT32:
-      *out = {DT_INT64, {}};
-      out->scalar<int64>()() = t.scalar<uint32>()();
-      break;
-    case DT_UINT16:
-      *out = {DT_INT64, {}};
-      out->scalar<int64>()() = t.scalar<uint16>()();
-      break;
-    case DT_UINT8:
-      *out = {DT_INT64, {}};
-      out->scalar<int64>()() = t.scalar<uint8>()();
-      break;
-    default:
-      return errors::Unimplemented("Scalar summary for dtype ",
-                                   DataTypeString(t.dtype()),
-                                   " is not supported.");
-  }
-  return Status::OK();
+Status SetDescription(Sqlite* db, int64 id, const StringPiece& markdown) {
+  const char* sql = R"sql(
+    INSERT OR REPLACE INTO Descriptions (id, description) VALUES (?, ?)
+  )sql";
+  SqliteStatement insert_desc;
+  TF_RETURN_IF_ERROR(db->Prepare(sql, &insert_desc));
+  insert_desc.BindInt(1, id);
+  insert_desc.BindText(2, markdown);
+  return insert_desc.StepAndReset();
 }
 
-class Transactor {
+/// \brief Generates unique IDs randomly in the [1,2**63-1] range.
+///
+/// This class starts off generating IDs in the [1,2**23-1] range,
+/// because it's human friendly and occupies 4 bytes max on disk with
+/// SQLite's zigzag varint encoding. Then, each time a collision
+/// happens, the random space is increased by 8 bits.
+///
+/// This class uses exponential back-off so writes gradually slow down
+/// as IDs become exhausted but reads are still possible.
+///
+/// This class is thread safe.
+class IdAllocator {
  public:
-  explicit Transactor(std::shared_ptr<Sqlite> db)
-      : db_(std::move(db)),
-        begin_(db_->Prepare("BEGIN TRANSACTION")),
-        commit_(db_->Prepare("COMMIT TRANSACTION")),
-        rollback_(db_->Prepare("ROLLBACK TRANSACTION")) {}
-
-  template <typename T, typename... Args>
-  Status Transact(T callback, Args&&... args) {
-    TF_RETURN_IF_ERROR(begin_.StepAndReset());
-    Status s = callback(std::forward<Args>(args)...);
-    if (s.ok()) {
-      TF_RETURN_IF_ERROR(commit_.StepAndReset());
-    } else {
-      TF_RETURN_WITH_CONTEXT_IF_ERROR(rollback_.StepAndReset(), s.ToString());
+  IdAllocator(Env* env, Sqlite* db) : env_{env}, db_{db} {
+    DCHECK(env_ != nullptr);
+    DCHECK(db_ != nullptr);
+  }
+
+  Status CreateNewId(int64* id) LOCKS_EXCLUDED(mu_) {
+    mutex_lock lock(mu_);
+    Status s;
+    SqliteStatement stmt;
+    TF_RETURN_IF_ERROR(db_->Prepare("INSERT INTO Ids (id) VALUES (?)", &stmt));
+    for (int i = 0; i < kMaxIdCollisions; ++i) {
+      int64 tid = MakeRandomId();
+      stmt.BindInt(1, tid);
+      s = stmt.StepAndReset();
+      if (s.ok()) {
+        *id = tid;
+        break;
+      }
+      // SQLITE_CONSTRAINT maps to INVALID_ARGUMENT in sqlite.cc
+      if (s.code() != error::INVALID_ARGUMENT) break;
+      if (tier_ < kMaxIdTier) {
+        LOG(INFO) << "IdAllocator collision at tier " << tier_ << " (of "
+                  << kMaxIdTier << ") so auto-adjusting to a higher tier";
+        ++tier_;
+      } else {
+        LOG(WARNING) << "IdAllocator (attempt #" << i << ") "
+                     << "resulted in a collision at the highest tier; this "
+                        "is problematic if it happens often; you can try "
+                        "pruning the Ids table; you can also file a bug "
+                        "asking for the ID space to be increased; otherwise "
+                        "writes will gradually slow down over time until they "
+                        "become impossible";
+      }
+      env_->SleepForMicroseconds((1 << i) * kIdCollisionDelayMicros);
     }
     return s;
   }
 
  private:
-  std::shared_ptr<Sqlite> db_;
-  SqliteStatement begin_;
-  SqliteStatement commit_;
-  SqliteStatement rollback_;
+  int64 MakeRandomId() EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+    int64 id = static_cast<int64>(random::New64() & kIdTiers[tier_]);
+    if (id == kAbsent) ++id;
+    return id;
+  }
+
+  mutex mu_;
+  Env* const env_;
+  Sqlite* const db_;
+  int tier_ GUARDED_BY(mu_) = 0;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(IdAllocator);
 };
 
-class GraphSaver {
+class GraphWriter {
  public:
-  static Status SaveToRun(Env* env, Sqlite* db, GraphDef* graph, int64 run_id) {
-    auto get = db->Prepare("SELECT graph_id FROM Runs WHERE run_id = ?");
-    get.BindInt(1, run_id);
-    bool is_done;
-    TF_RETURN_IF_ERROR(get.Step(&is_done));
-    int64 graph_id = is_done ? 0 : get.ColumnInt(0);
-    if (graph_id == 0) {
-      graph_id = MakeRandomId();
-      // TODO(@jart): Check for ID collision.
-      auto set = db->Prepare("UPDATE Runs SET graph_id = ? WHERE run_id = ?");
-      set.BindInt(1, graph_id);
-      set.BindInt(2, run_id);
-      TF_RETURN_IF_ERROR(set.StepAndReset());
-    }
-    return Save(env, db, graph, graph_id);
-  }
-
-  static Status Save(Env* env, Sqlite* db, GraphDef* graph, int64 graph_id) {
-    GraphSaver saver{env, db, graph, graph_id};
+  static Status Save(Sqlite* db, SqliteTransaction* txn, IdAllocator* ids,
+                     GraphDef* graph, uint64 now, int64 run_id, int64* graph_id)
+      SQLITE_EXCLUSIVE_TRANSACTIONS_REQUIRED(*db) {
+    TF_RETURN_IF_ERROR(ids->CreateNewId(graph_id));
+    GraphWriter saver{db, txn, graph, now, *graph_id};
     saver.MapNameToNodeId();
-    TF_RETURN_IF_ERROR(saver.SaveNodeInputs());
-    TF_RETURN_IF_ERROR(saver.SaveNodes());
-    TF_RETURN_IF_ERROR(saver.SaveGraph());
+    TF_RETURN_WITH_CONTEXT_IF_ERROR(saver.SaveNodeInputs(), "SaveNodeInputs");
+    TF_RETURN_WITH_CONTEXT_IF_ERROR(saver.SaveNodes(), "SaveNodes");
+    TF_RETURN_WITH_CONTEXT_IF_ERROR(saver.SaveGraph(run_id), "SaveGraph");
     return Status::OK();
   }
 
  private:
-  GraphSaver(Env* env, Sqlite* db, GraphDef* graph, int64 graph_id)
-      : env_(env), db_(db), graph_(graph), graph_id_(graph_id) {}
+  GraphWriter(Sqlite* db, SqliteTransaction* txn, GraphDef* graph, uint64 now,
+              int64 graph_id)
+      : db_(db), txn_(txn), graph_(graph), now_(now), graph_id_(graph_id) {}
 
   void MapNameToNodeId() {
     size_t toto = static_cast<size_t>(graph_->node_size());
@@ -202,344 +264,983 @@ class GraphSaver {
   }
 
   Status SaveNodeInputs() {
-    auto purge = db_->Prepare("DELETE FROM NodeInputs WHERE graph_id = ?");
-    purge.BindInt(1, graph_id_);
-    TF_RETURN_IF_ERROR(purge.StepAndReset());
-    auto insert = db_->Prepare(R"sql(
-      INSERT INTO NodeInputs (graph_id, node_id, idx, input_node_id, is_control)
-      VALUES (?, ?, ?, ?, ?)
-    )sql");
+    const char* sql = R"sql(
+      INSERT INTO NodeInputs (
+        graph_id,
+        node_id,
+        idx,
+        input_node_id,
+        input_node_idx,
+        is_control
+      ) VALUES (?, ?, ?, ?, ?, ?)
+    )sql";
+    SqliteStatement insert;
+    TF_RETURN_IF_ERROR(db_->Prepare(sql, &insert));
     for (int node_id = 0; node_id < graph_->node_size(); ++node_id) {
       const NodeDef& node = graph_->node(node_id);
       for (int idx = 0; idx < node.input_size(); ++idx) {
         StringPiece name = node.input(idx);
-        insert.BindInt(1, graph_id_);
-        insert.BindInt(2, node_id);
-        insert.BindInt(3, idx);
+        int64 input_node_id;
+        int64 input_node_idx = 0;
+        int64 is_control = 0;
+        size_t i = name.rfind(':');
+        if (i != StringPiece::npos) {
+          if (!strings::safe_strto64(name.substr(i + 1, name.size() - i - 1),
+                                     &input_node_idx)) {
+            return errors::DataLoss("Bad NodeDef.input: ", name);
+          }
+          name.remove_suffix(name.size() - i);
+        }
         if (!name.empty() && name[0] == '^') {
           name.remove_prefix(1);
-          insert.BindInt(5, 1);
+          is_control = 1;
         }
         auto e = name_to_node_id_.find(name);
         if (e == name_to_node_id_.end()) {
           return errors::DataLoss("Could not find node: ", name);
         }
-        insert.BindInt(4, e->second);
+        input_node_id = e->second;
+        insert.BindInt(1, graph_id_);
+        insert.BindInt(2, node_id);
+        insert.BindInt(3, idx);
+        insert.BindInt(4, input_node_id);
+        insert.BindInt(5, input_node_idx);
+        insert.BindInt(6, is_control);
+        unflushed_bytes_ += insert.size();
         TF_RETURN_WITH_CONTEXT_IF_ERROR(insert.StepAndReset(), node.name(),
                                         " -> ", name);
+        TF_RETURN_IF_ERROR(MaybeFlush());
       }
     }
     return Status::OK();
   }
 
   Status SaveNodes() {
-    auto purge = db_->Prepare("DELETE FROM Nodes WHERE graph_id = ?");
-    purge.BindInt(1, graph_id_);
-    TF_RETURN_IF_ERROR(purge.StepAndReset());
-    auto insert = db_->Prepare(R"sql(
-      INSERT INTO Nodes (graph_id, node_id, node_name, op, device, node_def)
+    const char* sql = R"sql(
+      INSERT INTO Nodes (
+        graph_id,
+        node_id,
+        node_name,
+        op,
+        device,
+        node_def)
       VALUES (?, ?, ?, ?, ?, ?)
-    )sql");
+    )sql";
+    SqliteStatement insert;
+    TF_RETURN_IF_ERROR(db_->Prepare(sql, &insert));
     for (int node_id = 0; node_id < graph_->node_size(); ++node_id) {
       NodeDef* node = graph_->mutable_node(node_id);
       insert.BindInt(1, graph_id_);
       insert.BindInt(2, node_id);
       insert.BindText(3, node->name());
+      insert.BindText(4, node->op());
+      insert.BindText(5, node->device());
       node->clear_name();
-      if (!node->op().empty()) {
-        insert.BindText(4, node->op());
-        node->clear_op();
-      }
-      if (!node->device().empty()) {
-        insert.BindText(5, node->device());
-        node->clear_device();
-      }
+      node->clear_op();
+      node->clear_device();
       node->clear_input();
-      TF_RETURN_IF_ERROR(BindProto(&insert, 6, *node));
+      string node_def;
+      if (node->SerializeToString(&node_def)) {
+        insert.BindBlobUnsafe(6, node_def);
+      }
+      unflushed_bytes_ += insert.size();
       TF_RETURN_WITH_CONTEXT_IF_ERROR(insert.StepAndReset(), node->name());
+      TF_RETURN_IF_ERROR(MaybeFlush());
     }
     return Status::OK();
   }
 
-  Status SaveGraph() {
-    auto insert = db_->Prepare(R"sql(
-      INSERT OR REPLACE INTO Graphs (graph_id, inserted_time, graph_def)
-      VALUES (?, ?, ?)
-    )sql");
-    insert.BindInt(1, graph_id_);
-    insert.BindDouble(2, GetWallTime(env_));
+  Status SaveGraph(int64 run_id) {
+    const char* sql = R"sql(
+      INSERT OR REPLACE INTO Graphs (
+        run_id,
+        graph_id,
+        inserted_time,
+        graph_def
+      ) VALUES (?, ?, ?, ?)
+    )sql";
+    SqliteStatement insert;
+    TF_RETURN_IF_ERROR(db_->Prepare(sql, &insert));
+    if (run_id != kAbsent) insert.BindInt(1, run_id);
+    insert.BindInt(2, graph_id_);
+    insert.BindDouble(3, DoubleTime(now_));
     graph_->clear_node();
-    TF_RETURN_IF_ERROR(BindProto(&insert, 3, *graph_));
+    string graph_def;
+    if (graph_->SerializeToString(&graph_def)) {
+      insert.BindBlobUnsafe(4, graph_def);
+    }
     return insert.StepAndReset();
   }
 
-  Env* env_;
-  Sqlite* db_;
-  GraphDef* graph_;
-  int64 graph_id_;
+  Status MaybeFlush() {
+    if (unflushed_bytes_ >= kFlushBytes) {
+      TF_RETURN_WITH_CONTEXT_IF_ERROR(txn_->Commit(), "flushing ",
+                                      unflushed_bytes_, " bytes");
+      unflushed_bytes_ = 0;
+    }
+    return Status::OK();
+  }
+
+  Sqlite* const db_;
+  SqliteTransaction* const txn_;
+  uint64 unflushed_bytes_ = 0;
+  GraphDef* const graph_;
+  const uint64 now_;
+  const int64 graph_id_;
   std::vector<string> name_copies_;
   std::unordered_map<StringPiece, int64, StringPieceHasher> name_to_node_id_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(GraphWriter);
+};
+
+/// \brief Run metadata manager.
+///
+/// This class gives us Tag IDs we can pass to SeriesWriter. In order
+/// to do that, rows are created in the Ids, Tags, Runs, Experiments,
+/// and Users tables.
+///
+/// This class is thread safe.
+class RunMetadata {
+ public:
+  RunMetadata(IdAllocator* ids, const string& experiment_name,
+              const string& run_name, const string& user_name)
+      : ids_{ids},
+        experiment_name_{experiment_name},
+        run_name_{run_name},
+        user_name_{user_name} {
+    DCHECK(ids_ != nullptr);
+  }
+
+  const string& experiment_name() { return experiment_name_; }
+  const string& run_name() { return run_name_; }
+  const string& user_name() { return user_name_; }
+
+  int64 run_id() LOCKS_EXCLUDED(mu_) {
+    mutex_lock lock(mu_);
+    return run_id_;
+  }
+
+  Status SetGraph(Sqlite* db, uint64 now, double computed_time,
+                  std::unique_ptr<GraphDef> g) SQLITE_TRANSACTIONS_EXCLUDED(*db)
+      LOCKS_EXCLUDED(mu_) {
+    int64 run_id;
+    {
+      mutex_lock lock(mu_);
+      TF_RETURN_IF_ERROR(InitializeRun(db, now, computed_time));
+      run_id = run_id_;
+    }
+    int64 graph_id;
+    SqliteTransaction txn(*db);  // only to increase performance
+    TF_RETURN_IF_ERROR(
+        GraphWriter::Save(db, &txn, ids_, g.get(), now, run_id, &graph_id));
+    return txn.Commit();
+  }
+
+  Status GetTagId(Sqlite* db, uint64 now, double computed_time,
+                  const string& tag_name, int64* tag_id,
+                  const SummaryMetadata& metadata) LOCKS_EXCLUDED(mu_) {
+    mutex_lock lock(mu_);
+    TF_RETURN_IF_ERROR(InitializeRun(db, now, computed_time));
+    auto e = tag_ids_.find(tag_name);
+    if (e != tag_ids_.end()) {
+      *tag_id = e->second;
+      return Status::OK();
+    }
+    TF_RETURN_IF_ERROR(ids_->CreateNewId(tag_id));
+    tag_ids_[tag_name] = *tag_id;
+    TF_RETURN_IF_ERROR(
+        SetDescription(db, *tag_id, metadata.summary_description()));
+    const char* sql = R"sql(
+      INSERT INTO Tags (
+        run_id,
+        tag_id,
+        tag_name,
+        inserted_time,
+        display_name,
+        plugin_name,
+        plugin_data
+      ) VALUES (
+        :run_id,
+        :tag_id,
+        :tag_name,
+        :inserted_time,
+        :display_name,
+        :plugin_name,
+        :plugin_data
+      )
+    )sql";
+    SqliteStatement insert;
+    TF_RETURN_IF_ERROR(db->Prepare(sql, &insert));
+    if (run_id_ != kAbsent) insert.BindInt(":run_id", run_id_);
+    insert.BindInt(":tag_id", *tag_id);
+    insert.BindTextUnsafe(":tag_name", tag_name);
+    insert.BindDouble(":inserted_time", DoubleTime(now));
+    insert.BindTextUnsafe(":display_name", metadata.display_name());
+    insert.BindTextUnsafe(":plugin_name", metadata.plugin_data().plugin_name());
+    insert.BindBlobUnsafe(":plugin_data", metadata.plugin_data().content());
+    return insert.StepAndReset();
+  }
+
+  Status GetIsWatching(Sqlite* db, bool* is_watching)
+      SQLITE_TRANSACTIONS_EXCLUDED(*db) LOCKS_EXCLUDED(mu_) {
+    mutex_lock lock(mu_);
+    if (experiment_id_ == kAbsent) {
+      *is_watching = true;
+      return Status::OK();
+    }
+    const char* sql = R"sql(
+      SELECT is_watching FROM Experiments WHERE experiment_id = ?
+    )sql";
+    SqliteStatement stmt;
+    TF_RETURN_IF_ERROR(db->Prepare(sql, &stmt));
+    stmt.BindInt(1, experiment_id_);
+    TF_RETURN_IF_ERROR(stmt.StepOnce());
+    *is_watching = stmt.ColumnInt(0) != 0;
+    return Status::OK();
+  }
+
+ private:
+  Status InitializeUser(Sqlite* db, uint64 now) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+    if (user_id_ != kAbsent || user_name_.empty()) return Status::OK();
+    const char* get_sql = R"sql(
+      SELECT user_id FROM Users WHERE user_name = ?
+    )sql";
+    SqliteStatement get;
+    TF_RETURN_IF_ERROR(db->Prepare(get_sql, &get));
+    get.BindText(1, user_name_);
+    bool is_done;
+    TF_RETURN_IF_ERROR(get.Step(&is_done));
+    if (!is_done) {
+      user_id_ = get.ColumnInt(0);
+      return Status::OK();
+    }
+    TF_RETURN_IF_ERROR(ids_->CreateNewId(&user_id_));
+    const char* insert_sql = R"sql(
+      INSERT INTO Users (
+        user_id,
+        user_name,
+        inserted_time
+      ) VALUES (?, ?, ?)
+    )sql";
+    SqliteStatement insert;
+    TF_RETURN_IF_ERROR(db->Prepare(insert_sql, &insert));
+    insert.BindInt(1, user_id_);
+    insert.BindText(2, user_name_);
+    insert.BindDouble(3, DoubleTime(now));
+    TF_RETURN_IF_ERROR(insert.StepAndReset());
+    return Status::OK();
+  }
+
+  Status InitializeExperiment(Sqlite* db, uint64 now, double computed_time)
+      EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+    if (experiment_name_.empty()) return Status::OK();
+    if (experiment_id_ == kAbsent) {
+      TF_RETURN_IF_ERROR(InitializeUser(db, now));
+      const char* get_sql = R"sql(
+        SELECT
+          experiment_id,
+          started_time
+        FROM
+          Experiments
+        WHERE
+          user_id IS ?
+          AND experiment_name = ?
+      )sql";
+      SqliteStatement get;
+      TF_RETURN_IF_ERROR(db->Prepare(get_sql, &get));
+      if (user_id_ != kAbsent) get.BindInt(1, user_id_);
+      get.BindText(2, experiment_name_);
+      bool is_done;
+      TF_RETURN_IF_ERROR(get.Step(&is_done));
+      if (!is_done) {
+        experiment_id_ = get.ColumnInt(0);
+        experiment_started_time_ = get.ColumnInt(1);
+      } else {
+        TF_RETURN_IF_ERROR(ids_->CreateNewId(&experiment_id_));
+        experiment_started_time_ = computed_time;
+        const char* insert_sql = R"sql(
+          INSERT INTO Experiments (
+            user_id,
+            experiment_id,
+            experiment_name,
+            inserted_time,
+            started_time,
+            is_watching
+          ) VALUES (?, ?, ?, ?, ?, ?)
+        )sql";
+        SqliteStatement insert;
+        TF_RETURN_IF_ERROR(db->Prepare(insert_sql, &insert));
+        if (user_id_ != kAbsent) insert.BindInt(1, user_id_);
+        insert.BindInt(2, experiment_id_);
+        insert.BindText(3, experiment_name_);
+        insert.BindDouble(4, DoubleTime(now));
+        insert.BindDouble(5, computed_time);
+        insert.BindInt(6, 0);
+        TF_RETURN_IF_ERROR(insert.StepAndReset());
+      }
+    }
+    if (computed_time < experiment_started_time_) {
+      experiment_started_time_ = computed_time;
+      const char* update_sql = R"sql(
+        UPDATE
+          Experiments
+        SET
+          started_time = ?
+        WHERE
+          experiment_id = ?
+      )sql";
+      SqliteStatement update;
+      TF_RETURN_IF_ERROR(db->Prepare(update_sql, &update));
+      update.BindDouble(1, computed_time);
+      update.BindInt(2, experiment_id_);
+      TF_RETURN_IF_ERROR(update.StepAndReset());
+    }
+    return Status::OK();
+  }
+
+  Status InitializeRun(Sqlite* db, uint64 now, double computed_time)
+      EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+    if (run_name_.empty()) return Status::OK();
+    TF_RETURN_IF_ERROR(InitializeExperiment(db, now, computed_time));
+    if (run_id_ == kAbsent) {
+      TF_RETURN_IF_ERROR(ids_->CreateNewId(&run_id_));
+      run_started_time_ = computed_time;
+      const char* insert_sql = R"sql(
+        INSERT OR REPLACE INTO Runs (
+          experiment_id,
+          run_id,
+          run_name,
+          inserted_time,
+          started_time
+        ) VALUES (?, ?, ?, ?, ?)
+      )sql";
+      SqliteStatement insert;
+      TF_RETURN_IF_ERROR(db->Prepare(insert_sql, &insert));
+      if (experiment_id_ != kAbsent) insert.BindInt(1, experiment_id_);
+      insert.BindInt(2, run_id_);
+      insert.BindText(3, run_name_);
+      insert.BindDouble(4, DoubleTime(now));
+      insert.BindDouble(5, computed_time);
+      TF_RETURN_IF_ERROR(insert.StepAndReset());
+    }
+    if (computed_time < run_started_time_) {
+      run_started_time_ = computed_time;
+      const char* update_sql = R"sql(
+        UPDATE
+          Runs
+        SET
+          started_time = ?
+        WHERE
+          run_id = ?
+      )sql";
+      SqliteStatement update;
+      TF_RETURN_IF_ERROR(db->Prepare(update_sql, &update));
+      update.BindDouble(1, computed_time);
+      update.BindInt(2, run_id_);
+      TF_RETURN_IF_ERROR(update.StepAndReset());
+    }
+    return Status::OK();
+  }
+
+  mutex mu_;
+  IdAllocator* const ids_;
+  const string experiment_name_;
+  const string run_name_;
+  const string user_name_;
+  int64 experiment_id_ GUARDED_BY(mu_) = kAbsent;
+  int64 run_id_ GUARDED_BY(mu_) = kAbsent;
+  int64 user_id_ GUARDED_BY(mu_) = kAbsent;
+  double experiment_started_time_ GUARDED_BY(mu_) = 0.0;
+  double run_started_time_ GUARDED_BY(mu_) = 0.0;
+  std::unordered_map<string, int64> tag_ids_ GUARDED_BY(mu_);
+
+  TF_DISALLOW_COPY_AND_ASSIGN(RunMetadata);
+};
+
+/// \brief Tensor writer for a single series, e.g. Tag.
+///
+/// This class can be used to write an infinite stream of Tensors to the
+/// database in a fixed block of contiguous disk space. This is
+/// accomplished using Algorithm R reservoir sampling.
+///
+/// The reservoir consists of a fixed number of rows, which are inserted
+/// using ZEROBLOB upon receiving the first sample, which is used to
+/// predict how big the other ones are likely to be. This is done
+/// transactionally in a way that tries to be mindful of other processes
+/// that might be trying to access the same DB.
+///
+/// Once the reservoir fills up, rows are replaced at random, and writes
+/// gradually become no-ops. This allows long training to go fast
+/// without configuration. The exception is when someone is actually
+/// looking at TensorBoard. When that happens, the "keep last" behavior
+/// is turned on and Append() will always result in a write.
+///
+/// If no one is watching training, this class still holds on to the
+/// most recent "dangling" Tensor, so if Finish() is called, the most
+/// recent training state can be written to disk.
+///
+/// The randomly selected sampling points should be consistent across
+/// multiple instances.
+///
+/// This class is thread safe.
+class SeriesWriter {
+ public:
+  SeriesWriter(int64 series, int slots, RunMetadata* meta)
+      : series_{series},
+        slots_{slots},
+        meta_{meta},
+        rng_{std::mt19937_64::default_seed} {
+    DCHECK(series_ > 0);
+    DCHECK(slots_ > 0);
+  }
+
+  Status Append(Sqlite* db, int64 step, uint64 now, double computed_time,
+                Tensor t) SQLITE_TRANSACTIONS_EXCLUDED(*db)
+      LOCKS_EXCLUDED(mu_) {
+    mutex_lock lock(mu_);
+    if (rowids_.empty()) {
+      Status s = Reserve(db, t);
+      if (!s.ok()) {
+        rowids_.clear();
+        return s;
+      }
+    }
+    DCHECK(rowids_.size() == slots_);
+    int64 rowid;
+    size_t i = count_;
+    if (i < slots_) {
+      rowid = last_rowid_ = rowids_[i];
+    } else {
+      i = rng_() % (i + 1);
+      if (i < slots_) {
+        rowid = last_rowid_ = rowids_[i];
+      } else {
+        bool keep_last;
+        TF_RETURN_IF_ERROR(meta_->GetIsWatching(db, &keep_last));
+        if (!keep_last) {
+          ++count_;
+          dangling_tensor_.reset(new Tensor(std::move(t)));
+          dangling_step_ = step;
+          dangling_computed_time_ = computed_time;
+          return Status::OK();
+        }
+        rowid = last_rowid_;
+      }
+    }
+    Status s = Write(db, rowid, step, computed_time, t);
+    if (s.ok()) {
+      ++count_;
+      dangling_tensor_.reset();
+    }
+    return s;
+  }
+
+  Status Finish(Sqlite* db) SQLITE_TRANSACTIONS_EXCLUDED(*db)
+      LOCKS_EXCLUDED(mu_) {
+    mutex_lock lock(mu_);
+    // Short runs: Delete unused pre-allocated Tensors.
+    if (count_ < rowids_.size()) {
+      SqliteTransaction txn(*db);
+      const char* sql = R"sql(
+        DELETE FROM Tensors WHERE rowid = ?
+      )sql";
+      SqliteStatement deleter;
+      TF_RETURN_IF_ERROR(db->Prepare(sql, &deleter));
+      for (size_t i = count_; i < rowids_.size(); ++i) {
+        deleter.BindInt(1, rowids_[i]);
+        TF_RETURN_IF_ERROR(deleter.StepAndReset());
+      }
+      TF_RETURN_IF_ERROR(txn.Commit());
+      rowids_.clear();
+    }
+    // Long runs: Make last sample be the very most recent one.
+    if (dangling_tensor_) {
+      DCHECK(last_rowid_ != kAbsent);
+      TF_RETURN_IF_ERROR(Write(db, last_rowid_, dangling_step_,
+                               dangling_computed_time_, *dangling_tensor_));
+      dangling_tensor_.reset();
+    }
+    return Status::OK();
+  }
+
+ private:
+  Status Write(Sqlite* db, int64 rowid, int64 step, double computed_time,
+               const Tensor& t) SQLITE_TRANSACTIONS_EXCLUDED(*db) {
+    if (t.dtype() == DT_STRING) {
+      if (t.dims() == 0) {
+        return Update(db, step, computed_time, t, t.scalar<string>()(), rowid);
+      } else {
+        SqliteTransaction txn(*db);
+        TF_RETURN_IF_ERROR(
+            Update(db, step, computed_time, t, StringPiece(), rowid));
+        TF_RETURN_IF_ERROR(UpdateNdString(db, t, rowid));
+        return txn.Commit();
+      }
+    } else {
+      return Update(db, step, computed_time, t, t.tensor_data(), rowid);
+    }
+  }
+
+  Status Update(Sqlite* db, int64 step, double computed_time, const Tensor& t,
+                const StringPiece& data, int64 rowid) {
+    // TODO(jart): How can we ensure reservoir fills on replace?
+    const char* sql = R"sql(
+      UPDATE OR REPLACE
+        Tensors
+      SET
+        step = ?,
+        computed_time = ?,
+        dtype = ?,
+        shape = ?,
+        data = ?
+      WHERE
+        rowid = ?
+    )sql";
+    SqliteStatement stmt;
+    TF_RETURN_IF_ERROR(db->Prepare(sql, &stmt));
+    stmt.BindInt(1, step);
+    stmt.BindDouble(2, computed_time);
+    stmt.BindInt(3, t.dtype());
+    stmt.BindText(4, StringifyShape(t.shape()));
+    stmt.BindBlobUnsafe(5, data);
+    stmt.BindInt(6, rowid);
+    TF_RETURN_IF_ERROR(stmt.StepAndReset());
+    return Status::OK();
+  }
+
+  Status UpdateNdString(Sqlite* db, const Tensor& t, int64 tensor_rowid)
+      SQLITE_EXCLUSIVE_TRANSACTIONS_REQUIRED(*db) {
+    DCHECK_EQ(t.dtype(), DT_STRING);
+    DCHECK_GT(t.dims(), 0);
+    const char* deleter_sql = R"sql(
+      DELETE FROM TensorStrings WHERE tensor_rowid = ?
+    )sql";
+    SqliteStatement deleter;
+    TF_RETURN_IF_ERROR(db->Prepare(deleter_sql, &deleter));
+    deleter.BindInt(1, tensor_rowid);
+    TF_RETURN_WITH_CONTEXT_IF_ERROR(deleter.StepAndReset(), tensor_rowid);
+    const char* inserter_sql = R"sql(
+      INSERT INTO TensorStrings (
+        tensor_rowid,
+        idx,
+        data
+      ) VALUES (?, ?, ?)
+    )sql";
+    SqliteStatement inserter;
+    TF_RETURN_IF_ERROR(db->Prepare(inserter_sql, &inserter));
+    auto flat = t.flat<string>();
+    for (int64 i = 0; i < flat.size(); ++i) {
+      inserter.BindInt(1, tensor_rowid);
+      inserter.BindInt(2, i);
+      inserter.BindBlobUnsafe(3, flat(i));
+      TF_RETURN_WITH_CONTEXT_IF_ERROR(inserter.StepAndReset(), "i=", i);
+    }
+    return Status::OK();
+  }
+
+  Status Reserve(Sqlite* db, const Tensor& t) SQLITE_TRANSACTIONS_EXCLUDED(*db)
+      EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+    SqliteTransaction txn(*db);  // only for performance
+    unflushed_bytes_ = 0;
+    if (t.dtype() == DT_STRING) {
+      if (t.dims() == 0) {
+        TF_RETURN_IF_ERROR(ReserveData(db, &txn, t.scalar<string>()().size()));
+      } else {
+        TF_RETURN_IF_ERROR(ReserveTensors(db, &txn, kReserveMinBytes));
+      }
+    } else {
+      TF_RETURN_IF_ERROR(ReserveData(db, &txn, t.tensor_data().size()));
+    }
+    return txn.Commit();
+  }
+
+  Status ReserveData(Sqlite* db, SqliteTransaction* txn, size_t size)
+      SQLITE_EXCLUSIVE_TRANSACTIONS_REQUIRED(*db)
+          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+    int64 space =
+        static_cast<int64>(static_cast<double>(size) * kReserveMultiplier);
+    if (space < kReserveMinBytes) space = kReserveMinBytes;
+    return ReserveTensors(db, txn, space);
+  }
+
+  Status ReserveTensors(Sqlite* db, SqliteTransaction* txn,
+                        int64 reserved_bytes)
+      SQLITE_EXCLUSIVE_TRANSACTIONS_REQUIRED(*db)
+          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+    const char* sql = R"sql(
+      INSERT INTO Tensors (
+        series,
+        data
+      ) VALUES (?, ZEROBLOB(?))
+    )sql";
+    SqliteStatement insert;
+    TF_RETURN_IF_ERROR(db->Prepare(sql, &insert));
+    // TODO(jart): Maybe preallocate index pages by setting step. This
+    //             is tricky because UPDATE OR REPLACE can have a side
+    //             effect of deleting preallocated rows.
+    for (int64 i = 0; i < slots_; ++i) {
+      insert.BindInt(1, series_);
+      insert.BindInt(2, reserved_bytes);
+      TF_RETURN_WITH_CONTEXT_IF_ERROR(insert.StepAndReset(), "i=", i);
+      rowids_.push_back(db->last_insert_rowid());
+      unflushed_bytes_ += reserved_bytes;
+      TF_RETURN_IF_ERROR(MaybeFlush(db, txn));
+    }
+    return Status::OK();
+  }
+
+  Status MaybeFlush(Sqlite* db, SqliteTransaction* txn)
+      SQLITE_EXCLUSIVE_TRANSACTIONS_REQUIRED(*db)
+          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+    if (unflushed_bytes_ >= kFlushBytes) {
+      TF_RETURN_WITH_CONTEXT_IF_ERROR(txn->Commit(), "flushing ",
+                                      unflushed_bytes_, " bytes");
+      unflushed_bytes_ = 0;
+    }
+    return Status::OK();
+  }
+
+  mutex mu_;
+  const int64 series_;
+  const int slots_;
+  RunMetadata* const meta_;
+  std::mt19937_64 rng_ GUARDED_BY(mu_);
+  uint64 count_ GUARDED_BY(mu_) = 0;
+  int64 last_rowid_ GUARDED_BY(mu_) = kAbsent;
+  std::vector<int64> rowids_ GUARDED_BY(mu_);
+  uint64 unflushed_bytes_ GUARDED_BY(mu_) = 0;
+  std::unique_ptr<Tensor> dangling_tensor_ GUARDED_BY(mu_);
+  int64 dangling_step_ GUARDED_BY(mu_) = 0;
+  double dangling_computed_time_ GUARDED_BY(mu_) = 0.0;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(SeriesWriter);
+};
+
+/// \brief Tensor writer for a single Run.
+///
+/// This class farms out tensors to SeriesWriter instances. It also
+/// keeps track of whether or not someone is watching the TensorBoard
+/// GUI, so it can avoid writes when possible.
+///
+/// This class is thread safe.
+class RunWriter {
+ public:
+  explicit RunWriter(RunMetadata* meta) : meta_{meta} {}
+
+  Status Append(Sqlite* db, int64 tag_id, int64 step, uint64 now,
+                double computed_time, Tensor t, int slots)
+      SQLITE_TRANSACTIONS_EXCLUDED(*db) LOCKS_EXCLUDED(mu_) {
+    SeriesWriter* writer = GetSeriesWriter(tag_id, slots);
+    return writer->Append(db, step, now, computed_time, std::move(t));
+  }
+
+  Status Finish(Sqlite* db) SQLITE_TRANSACTIONS_EXCLUDED(*db)
+      LOCKS_EXCLUDED(mu_) {
+    mutex_lock lock(mu_);
+    if (series_writers_.empty()) return Status::OK();
+    for (auto i = series_writers_.begin(); i != series_writers_.end(); ++i) {
+      if (!i->second) continue;
+      TF_RETURN_WITH_CONTEXT_IF_ERROR(i->second->Finish(db),
+                                      "finish tag_id=", i->first);
+      i->second.reset();
+    }
+    return Status::OK();
+  }
+
+ private:
+  SeriesWriter* GetSeriesWriter(int64 tag_id, int slots) LOCKS_EXCLUDED(mu_) {
+    mutex_lock sl(mu_);
+    auto spot = series_writers_.find(tag_id);
+    if (spot == series_writers_.end()) {
+      SeriesWriter* writer = new SeriesWriter(tag_id, slots, meta_);
+      series_writers_[tag_id].reset(writer);
+      return writer;
+    } else {
+      return spot->second.get();
+    }
+  }
+
+  mutex mu_;
+  RunMetadata* const meta_;
+  std::unordered_map<int64, std::unique_ptr<SeriesWriter>> series_writers_
+      GUARDED_BY(mu_);
+
+  TF_DISALLOW_COPY_AND_ASSIGN(RunWriter);
 };
 
+/// \brief SQLite implementation of SummaryWriterInterface.
+///
+/// This class is thread safe.
 class SummaryDbWriter : public SummaryWriterInterface {
  public:
-  SummaryDbWriter(Env* env, std::shared_ptr<Sqlite> db)
+  SummaryDbWriter(Env* env, Sqlite* db, const string& experiment_name,
+                  const string& run_name, const string& user_name)
       : SummaryWriterInterface(),
-        env_(env),
-        db_(std::move(db)),
-        txn_(db_),
-        run_id_{0LL} {}
-  ~SummaryDbWriter() override {}
-
-  Status Initialize(const string& experiment_name, const string& run_name,
-                    const string& user_name) {
-    mutex_lock ml(mu_);
-    insert_tensor_ = db_->Prepare(R"sql(
-      INSERT OR REPLACE INTO Tensors (tag_id, step, computed_time, tensor)
-      VALUES (?, ?, ?, ?)
-    )sql");
-    update_metadata_ = db_->Prepare(R"sql(
-      UPDATE Tags SET metadata = ? WHERE tag_id = ?
-    )sql");
-    experiment_name_ = experiment_name;
-    run_name_ = run_name;
-    user_name_ = user_name;
-    return Status::OK();
+        env_{env},
+        db_{db},
+        ids_{env_, db_},
+        meta_{&ids_, experiment_name, run_name, user_name},
+        run_{&meta_} {
+    DCHECK(env_ != nullptr);
+    db_->Ref();
+  }
+
+  ~SummaryDbWriter() override {
+    core::ScopedUnref unref(db_);
+    Status s = run_.Finish(db_);
+    if (!s.ok()) {
+      // TODO(jart): Retry on transient errors here.
+      LOG(ERROR) << s.ToString();
+    }
+    int64 run_id = meta_.run_id();
+    if (run_id == kAbsent) return;
+    const char* sql = R"sql(
+      UPDATE Runs SET finished_time = ? WHERE run_id = ?
+    )sql";
+    SqliteStatement update;
+    s = db_->Prepare(sql, &update);
+    if (s.ok()) {
+      update.BindDouble(1, DoubleTime(env_->NowMicros()));
+      update.BindInt(2, run_id);
+      s = update.StepAndReset();
+    }
+    if (!s.ok()) {
+      LOG(ERROR) << "Failed to set Runs[" << run_id
+                 << "].finish_time: " << s.ToString();
+    }
   }
 
-  // TODO(@jart): Use transactions that COMMIT on Flush()
-  // TODO(@jart): Retry Commit() on SQLITE_BUSY with exponential back-off.
   Status Flush() override { return Status::OK(); }
 
   Status WriteTensor(int64 global_step, Tensor t, const string& tag,
                      const string& serialized_metadata) override {
-    mutex_lock ml(mu_);
-    TF_RETURN_IF_ERROR(InitializeParents());
-    // TODO(@jart): Memoize tag_id.
-    int64 tag_id;
-    TF_RETURN_IF_ERROR(GetTagId(run_id_, tag, &tag_id));
-    if (!serialized_metadata.empty()) {
-      // TODO(@jart): Only update metadata for first tensor.
-      update_metadata_.BindBlobUnsafe(1, serialized_metadata);
-      update_metadata_.BindInt(2, tag_id);
-      TF_RETURN_IF_ERROR(update_metadata_.StepAndReset());
-    }
-    // TODO(@jart): Lease blocks of rowids and *_ids to minimize fragmentation.
-    // TODO(@jart): Check for random ID collisions without needing txn retry.
-    insert_tensor_.BindInt(1, tag_id);
-    insert_tensor_.BindInt(2, global_step);
-    insert_tensor_.BindDouble(3, GetWallTime(env_));
-    if (t.shape().dims() == 0 && t.dtype() == DT_INT64) {
-      insert_tensor_.BindInt(4, t.scalar<int64>()());
-    } else if (t.shape().dims() == 0 && t.dtype() == DT_DOUBLE) {
-      insert_tensor_.BindDouble(4, t.scalar<double>()());
-    } else {
-      TF_RETURN_IF_ERROR(BindTensor(&insert_tensor_, 4, t));
+    TF_RETURN_IF_ERROR(CheckSupportedType(t));
+    SummaryMetadata metadata;
+    if (!metadata.ParseFromString(serialized_metadata)) {
+      return errors::InvalidArgument("Bad serialized_metadata");
     }
-    return insert_tensor_.StepAndReset();
+    return Write(global_step, t, tag, metadata);
   }
 
   Status WriteScalar(int64 global_step, Tensor t, const string& tag) override {
-    Tensor t2;
-    TF_RETURN_IF_ERROR(CoerceScalar(t, &t2));
-    // TODO(jart): Generate scalars plugin metadata on this value.
-    return WriteTensor(global_step, std::move(t2), tag, "");
+    TF_RETURN_IF_ERROR(CheckSupportedType(t));
+    SummaryMetadata metadata;
+    PatchPluginName(&metadata, kScalarPluginName);
+    return Write(global_step, AsScalar(t), tag, metadata);
   }
 
   Status WriteGraph(int64 global_step, std::unique_ptr<GraphDef> g) override {
-    mutex_lock ml(mu_);
-    TF_RETURN_IF_ERROR(InitializeParents());
-    return txn_.Transact(GraphSaver::SaveToRun, env_, db_.get(), g.get(),
-                         run_id_);
+    uint64 now = env_->NowMicros();
+    return meta_.SetGraph(db_, now, DoubleTime(now), std::move(g));
   }
 
   Status WriteEvent(std::unique_ptr<Event> e) override {
-    switch (e->what_case()) {
-      case Event::WhatCase::kSummary: {
-        mutex_lock ml(mu_);
-        TF_RETURN_IF_ERROR(InitializeParents());
-        const Summary& summary = e->summary();
-        for (int i = 0; i < summary.value_size(); ++i) {
-          TF_RETURN_IF_ERROR(WriteSummary(e.get(), summary.value(i)));
-        }
-        return Status::OK();
-      }
-      case Event::WhatCase::kGraphDef: {
-        std::unique_ptr<GraphDef> graph{new GraphDef};
-        if (!ParseProtoUnlimited(graph.get(), e->graph_def())) {
-          return errors::DataLoss("parse event.graph_def failed");
-        }
-        return WriteGraph(e->step(), std::move(graph));
-      }
-      default:
-        // TODO(@jart): Handle other stuff.
-        return Status::OK();
-    }
+    return MigrateEvent(std::move(e));
   }
 
   Status WriteHistogram(int64 global_step, Tensor t,
                         const string& tag) override {
-    return errors::Unimplemented(
-        "SummaryDbWriter::WriteHistogram not supported. Please use ",
-        "tensorboard.summary.histogram() instead.");
+    uint64 now = env_->NowMicros();
+    std::unique_ptr<Event> e{new Event};
+    e->set_step(global_step);
+    e->set_wall_time(DoubleTime(now));
+    TF_RETURN_IF_ERROR(
+        AddTensorAsHistogramToSummary(t, tag, e->mutable_summary()));
+    return MigrateEvent(std::move(e));
   }
 
-  Status WriteImage(int64 global_step, Tensor tensor, const string& tag,
+  Status WriteImage(int64 global_step, Tensor t, const string& tag,
                     int max_images, Tensor bad_color) override {
-    return errors::Unimplemented(
-        "SummaryDbWriter::WriteImage not supported. Please use ",
-        "tensorboard.summary.image() instead.");
+    uint64 now = env_->NowMicros();
+    std::unique_ptr<Event> e{new Event};
+    e->set_step(global_step);
+    e->set_wall_time(DoubleTime(now));
+    TF_RETURN_IF_ERROR(AddTensorAsImageToSummary(t, tag, max_images, bad_color,
+                                                 e->mutable_summary()));
+    return MigrateEvent(std::move(e));
   }
 
-  Status WriteAudio(int64 global_step, Tensor tensor, const string& tag,
+  Status WriteAudio(int64 global_step, Tensor t, const string& tag,
                     int max_outputs, float sample_rate) override {
-    return errors::Unimplemented(
-        "SummaryDbWriter::WriteAudio not supported. Please use ",
-        "tensorboard.summary.audio() instead.");
+    uint64 now = env_->NowMicros();
+    std::unique_ptr<Event> e{new Event};
+    e->set_step(global_step);
+    e->set_wall_time(DoubleTime(now));
+    TF_RETURN_IF_ERROR(AddTensorAsAudioToSummary(
+        t, tag, max_outputs, sample_rate, e->mutable_summary()));
+    return MigrateEvent(std::move(e));
   }
 
   string DebugString() override { return "SummaryDbWriter"; }
 
  private:
-  Status InitializeParents() EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-    if (run_id_ > 0) {
-      return Status::OK();
-    }
-    int64 user_id;
-    TF_RETURN_IF_ERROR(GetUserId(user_name_, &user_id));
-    int64 experiment_id;
+  Status Write(int64 step, const Tensor& t, const string& tag,
+               const SummaryMetadata& metadata) {
+    uint64 now = env_->NowMicros();
+    double computed_time = DoubleTime(now);
+    int64 tag_id;
     TF_RETURN_IF_ERROR(
-        GetExperimentId(user_id, experiment_name_, &experiment_id));
-    TF_RETURN_IF_ERROR(GetRunId(experiment_id, run_name_, &run_id_));
+        meta_.GetTagId(db_, now, computed_time, tag, &tag_id, metadata));
+    TF_RETURN_WITH_CONTEXT_IF_ERROR(
+        run_.Append(db_, tag_id, step, now, computed_time, t,
+                    GetSlots(t, metadata)),
+        meta_.user_name(), "/", meta_.experiment_name(), "/", meta_.run_name(),
+        "/", tag, "@", step);
     return Status::OK();
   }
 
-  Status GetUserId(const string& user_name, int64* user_id)
-      EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-    if (user_name.empty()) {
-      *user_id = 0LL;
-      return Status::OK();
-    }
-    SqliteStatement get_user_id = db_->Prepare(R"sql(
-      SELECT user_id FROM Users WHERE user_name = ?
-    )sql");
-    get_user_id.BindText(1, user_name);
-    bool is_done;
-    TF_RETURN_IF_ERROR(get_user_id.Step(&is_done));
-    if (!is_done) {
-      *user_id = get_user_id.ColumnInt(0);
-    } else {
-      *user_id = MakeRandomId();
-      SqliteStatement insert_user = db_->Prepare(R"sql(
-        INSERT INTO Users (user_id, user_name, inserted_time) VALUES (?, ?, ?)
-      )sql");
-      insert_user.BindInt(1, *user_id);
-      insert_user.BindText(2, user_name);
-      insert_user.BindDouble(3, GetWallTime(env_));
-      TF_RETURN_IF_ERROR(insert_user.StepAndReset());
+  Status MigrateEvent(std::unique_ptr<Event> e) {
+    switch (e->what_case()) {
+      case Event::WhatCase::kSummary: {
+        uint64 now = env_->NowMicros();
+        auto summaries = e->mutable_summary();
+        for (int i = 0; i < summaries->value_size(); ++i) {
+          Summary::Value* value = summaries->mutable_value(i);
+          TF_RETURN_WITH_CONTEXT_IF_ERROR(
+              MigrateSummary(e.get(), value, now), meta_.user_name(), "/",
+              meta_.experiment_name(), "/", meta_.run_name(), "/", value->tag(),
+              "@", e->step());
+        }
+        break;
+      }
+      case Event::WhatCase::kGraphDef:
+        TF_RETURN_WITH_CONTEXT_IF_ERROR(
+            MigrateGraph(e.get(), e->graph_def()), meta_.user_name(), "/",
+            meta_.experiment_name(), "/", meta_.run_name(), "/__graph__@",
+            e->step());
+        break;
+      default:
+        // TODO(@jart): Handle other stuff.
+        break;
     }
     return Status::OK();
   }
 
-  Status GetExperimentId(int64 user_id, const string& experiment_name,
-                         int64* experiment_id) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-    // TODO(@jart): Compute started_time.
-    return GetId("Experiments", "user_id", user_id, "experiment_name",
-                 experiment_name, "experiment_id", experiment_id);
+  Status MigrateGraph(const Event* e, const string& graph_def) {
+    uint64 now = env_->NowMicros();
+    std::unique_ptr<GraphDef> graph{new GraphDef};
+    if (!ParseProtoUnlimited(graph.get(), graph_def)) {
+      return errors::InvalidArgument("bad proto");
+    }
+    return meta_.SetGraph(db_, now, e->wall_time(), std::move(graph));
   }
 
-  Status GetRunId(int64 experiment_id, const string& run_name, int64* run_id)
-      EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-    // TODO(@jart): Compute started_time.
-    return GetId("Runs", "experiment_id", experiment_id, "run_name", run_name,
-                 "run_id", run_id);
+  Status MigrateSummary(const Event* e, Summary::Value* s, uint64 now) {
+    switch (s->value_case()) {
+      case Summary::Value::ValueCase::kTensor:
+        TF_RETURN_WITH_CONTEXT_IF_ERROR(MigrateTensor(e, s, now), "tensor");
+        break;
+      case Summary::Value::ValueCase::kSimpleValue:
+        TF_RETURN_WITH_CONTEXT_IF_ERROR(MigrateScalar(e, s, now), "scalar");
+        break;
+      case Summary::Value::ValueCase::kHisto:
+        TF_RETURN_WITH_CONTEXT_IF_ERROR(MigrateHistogram(e, s, now), "histo");
+        break;
+      case Summary::Value::ValueCase::kImage:
+        TF_RETURN_WITH_CONTEXT_IF_ERROR(MigrateImage(e, s, now), "image");
+        break;
+      case Summary::Value::ValueCase::kAudio:
+        TF_RETURN_WITH_CONTEXT_IF_ERROR(MigrateAudio(e, s, now), "audio");
+        break;
+      default:
+        break;
+    }
+    return Status::OK();
   }
 
-  Status GetTagId(int64 run_id, const string& tag_name, int64* tag_id)
-      EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-    return GetId("Tags", "run_id", run_id, "tag_name", tag_name, "tag_id",
-                 tag_id);
+  Status MigrateTensor(const Event* e, Summary::Value* s, uint64 now) {
+    Tensor t;
+    if (!t.FromProto(s->tensor())) return errors::InvalidArgument("bad proto");
+    TF_RETURN_IF_ERROR(CheckSupportedType(t));
+    int64 tag_id;
+    TF_RETURN_IF_ERROR(meta_.GetTagId(db_, now, e->wall_time(), s->tag(),
+                                      &tag_id, s->metadata()));
+    return run_.Append(db_, tag_id, e->step(), now, e->wall_time(), t,
+                       GetSlots(t, s->metadata()));
   }
 
-  Status GetId(const char* table, const char* parent_id_field, int64 parent_id,
-               const char* name_field, const string& name, const char* id_field,
-               int64* id) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-    if (name.empty()) {
-      *id = 0LL;
-      return Status::OK();
-    }
-    SqliteStatement select = db_->Prepare(
-        strings::Printf("SELECT %s FROM %s WHERE %s = ? AND %s = ?", id_field,
-                        table, parent_id_field, name_field));
-    if (parent_id > 0) {
-      select.BindInt(1, parent_id);
+  // TODO(jart): Refactor Summary -> Tensor logic into separate file.
+
+  Status MigrateScalar(const Event* e, Summary::Value* s, uint64 now) {
+    // See tensorboard/plugins/scalar/summary.py and data_compat.py
+    Tensor t{DT_FLOAT, {}};
+    t.scalar<float>()() = s->simple_value();
+    int64 tag_id;
+    PatchPluginName(s->mutable_metadata(), kScalarPluginName);
+    TF_RETURN_IF_ERROR(meta_.GetTagId(db_, now, e->wall_time(), s->tag(),
+                                      &tag_id, s->metadata()));
+    return run_.Append(db_, tag_id, e->step(), now, e->wall_time(),
+                       std::move(t), kScalarSlots);
+  }
+
+  Status MigrateHistogram(const Event* e, Summary::Value* s, uint64 now) {
+    const HistogramProto& histo = s->histo();
+    int k = histo.bucket_size();
+    if (k != histo.bucket_limit_size()) {
+      return errors::InvalidArgument("size mismatch");
     }
-    select.BindText(2, name);
-    bool is_done;
-    TF_RETURN_IF_ERROR(select.Step(&is_done));
-    if (!is_done) {
-      *id = select.ColumnInt(0);
-    } else {
-      *id = MakeRandomId();
-      SqliteStatement insert = db_->Prepare(strings::Printf(
-          "INSERT INTO %s (%s, %s, %s, inserted_time) VALUES (?, ?, ?, ?)",
-          table, parent_id_field, id_field, name_field));
-      if (parent_id > 0) {
-        insert.BindInt(1, parent_id);
-      }
-      insert.BindInt(2, *id);
-      insert.BindText(3, name);
-      insert.BindDouble(4, GetWallTime(env_));
-      TF_RETURN_IF_ERROR(insert.StepAndReset());
+    // See tensorboard/plugins/histogram/summary.py and data_compat.py
+    Tensor t{DT_DOUBLE, {k, 3}};
+    auto data = t.flat<double>();
+    for (int i = 0; i < k; ++i) {
+      double left_edge = ((i - 1 >= 0) ? histo.bucket_limit(i - 1)
+                                       : std::numeric_limits<double>::min());
+      double right_edge = ((i + 1 < k) ? histo.bucket_limit(i + 1)
+                                       : std::numeric_limits<double>::max());
+      data(i + 0) = left_edge;
+      data(i + 1) = right_edge;
+      data(i + 2) = histo.bucket(i);
     }
-    return Status::OK();
+    int64 tag_id;
+    PatchPluginName(s->mutable_metadata(), kHistogramPluginName);
+    TF_RETURN_IF_ERROR(meta_.GetTagId(db_, now, e->wall_time(), s->tag(),
+                                      &tag_id, s->metadata()));
+    return run_.Append(db_, tag_id, e->step(), now, e->wall_time(),
+                       std::move(t), kHistogramSlots);
   }
 
-  Status WriteSummary(const Event* e, const Summary::Value& summary)
-      EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+  Status MigrateImage(const Event* e, Summary::Value* s, uint64 now) {
+    // See tensorboard/plugins/image/summary.py and data_compat.py
+    Tensor t{DT_STRING, {3}};
+    auto img = s->mutable_image();
+    t.flat<string>()(0) = strings::StrCat(img->width());
+    t.flat<string>()(1) = strings::StrCat(img->height());
+    t.flat<string>()(2) = std::move(*img->mutable_encoded_image_string());
     int64 tag_id;
-    TF_RETURN_IF_ERROR(GetTagId(run_id_, summary.tag(), &tag_id));
-    insert_tensor_.BindInt(1, tag_id);
-    insert_tensor_.BindInt(2, e->step());
-    insert_tensor_.BindDouble(3, e->wall_time());
-    switch (summary.value_case()) {
-      case Summary::Value::ValueCase::kSimpleValue:
-        insert_tensor_.BindDouble(4, summary.simple_value());
-        break;
-      default:
-        // TODO(@jart): Handle the rest.
-        return Status::OK();
-    }
-    return insert_tensor_.StepAndReset();
+    PatchPluginName(s->mutable_metadata(), kImagePluginName);
+    TF_RETURN_IF_ERROR(meta_.GetTagId(db_, now, e->wall_time(), s->tag(),
+                                      &tag_id, s->metadata()));
+    return run_.Append(db_, tag_id, e->step(), now, e->wall_time(),
+                       std::move(t), kImageSlots);
   }
 
-  mutex mu_;
-  Env* env_;
-  std::shared_ptr<Sqlite> db_ GUARDED_BY(mu_);
-  Transactor txn_ GUARDED_BY(mu_);
-  SqliteStatement insert_tensor_ GUARDED_BY(mu_);
-  SqliteStatement update_metadata_ GUARDED_BY(mu_);
-  string user_name_ GUARDED_BY(mu_);
-  string experiment_name_ GUARDED_BY(mu_);
-  string run_name_ GUARDED_BY(mu_);
-  int64 run_id_ GUARDED_BY(mu_);
+  Status MigrateAudio(const Event* e, Summary::Value* s, uint64 now) {
+    // See tensorboard/plugins/audio/summary.py and data_compat.py
+    Tensor t{DT_STRING, {1, 2}};
+    auto wav = s->mutable_audio();
+    t.flat<string>()(0) = std::move(*wav->mutable_encoded_audio_string());
+    t.flat<string>()(1) = "";
+    int64 tag_id;
+    PatchPluginName(s->mutable_metadata(), kAudioPluginName);
+    TF_RETURN_IF_ERROR(meta_.GetTagId(db_, now, e->wall_time(), s->tag(),
+                                      &tag_id, s->metadata()));
+    return run_.Append(db_, tag_id, e->step(), now, e->wall_time(),
+                       std::move(t), kAudioSlots);
+  }
+
+  Env* const env_;
+  Sqlite* const db_;
+  IdAllocator ids_;
+  RunMetadata meta_;
+  RunWriter run_;
 };
 
 }  // namespace
 
-Status CreateSummaryDbWriter(std::shared_ptr<Sqlite> db,
-                             const string& experiment_name,
+Status CreateSummaryDbWriter(Sqlite* db, const string& experiment_name,
                              const string& run_name, const string& user_name,
                              Env* env, SummaryWriterInterface** result) {
-  TF_RETURN_IF_ERROR(SetupTensorboardSqliteDb(db));
-  SummaryDbWriter* w = new SummaryDbWriter(env, std::move(db));
-  const Status s = w->Initialize(experiment_name, run_name, user_name);
-  if (!s.ok()) {
-    w->Unref();
-    *result = nullptr;
-    return s;
-  }
-  *result = w;
+  *result = new SummaryDbWriter(env, db, experiment_name, run_name, user_name);
   return Status::OK();
 }
 
diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer.h b/tensorflow/contrib/tensorboard/db/summary_db_writer.h
index 74f61e50b7cdf4b4151162a2e1e5e0af0d468be2..746da1533b157bf7b2be5c85ada8b61ba224cc3e 100644
--- a/tensorflow/contrib/tensorboard/db/summary_db_writer.h
+++ b/tensorflow/contrib/tensorboard/db/summary_db_writer.h
@@ -19,21 +19,21 @@ limitations under the License.
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/db/sqlite.h"
 #include "tensorflow/core/platform/env.h"
-#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 
 /// \brief Creates SQLite SummaryWriterInterface.
 ///
 /// This can be used to write tensors from the execution graph directly
-/// to a database. The schema will be created automatically, but only
-/// if necessary. Entries in the Users, Experiments, and Runs tables
-/// will be created automatically if they don't already exist.
+/// to a database. The schema must be created beforehand. Entries in
+/// Users, Experiments, and Runs tables will be created automatically
+/// if they don't already exist.
 ///
 /// Please note that the type signature of this function may change in
 /// the future if support for other DBs is added to core.
-Status CreateSummaryDbWriter(std::shared_ptr<Sqlite> db,
-                             const string& experiment_name,
+///
+/// The result holds a new reference to db.
+Status CreateSummaryDbWriter(Sqlite* db, const string& experiment_name,
                              const string& run_name, const string& user_name,
                              Env* env, SummaryWriterInterface** result);
 
diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc
index 625861fa6b137c6880c2072d7522f11c22720774..29b8063218de72aac1a73bbfb440e75fcdd5013f 100644
--- a/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc
+++ b/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc
@@ -14,6 +14,8 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/contrib/tensorboard/db/summary_db_writer.h"
 
+#include "tensorflow/contrib/tensorboard/db/schema.h"
+#include "tensorflow/core/framework/function.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/summary.pb.h"
@@ -27,8 +29,6 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
-const float kTolerance = 1e-5;
-
 Tensor MakeScalarInt64(int64 x) {
   Tensor t(DT_INT64, TensorShape({}));
   t.scalar<int64>()() = x;
@@ -48,17 +48,22 @@ class FakeClockEnv : public EnvWrapper {
 
 class SummaryDbWriterTest : public ::testing::Test {
  protected:
-  void SetUp() override { db_ = Sqlite::Open(":memory:").ValueOrDie(); }
+  void SetUp() override {
+    TF_ASSERT_OK(Sqlite::Open(":memory:", SQLITE_OPEN_READWRITE, &db_));
+    TF_ASSERT_OK(SetupTensorboardSqliteDb(db_));
+  }
 
   void TearDown() override {
     if (writer_ != nullptr) {
       writer_->Unref();
       writer_ = nullptr;
     }
+    db_->Unref();
+    db_ = nullptr;
   }
 
   int64 QueryInt(const string& sql) {
-    SqliteStatement stmt = db_->Prepare(sql);
+    SqliteStatement stmt = db_->PrepareOrDie(sql);
     bool is_done;
     Status s = stmt.Step(&is_done);
     if (!s.ok() || is_done) {
@@ -69,7 +74,7 @@ class SummaryDbWriterTest : public ::testing::Test {
   }
 
   double QueryDouble(const string& sql) {
-    SqliteStatement stmt = db_->Prepare(sql);
+    SqliteStatement stmt = db_->PrepareOrDie(sql);
     bool is_done;
     Status s = stmt.Step(&is_done);
     if (!s.ok() || is_done) {
@@ -80,7 +85,7 @@ class SummaryDbWriterTest : public ::testing::Test {
   }
 
   string QueryString(const string& sql) {
-    SqliteStatement stmt = db_->Prepare(sql);
+    SqliteStatement stmt = db_->PrepareOrDie(sql);
     bool is_done;
     Status s = stmt.Step(&is_done);
     if (!s.ok() || is_done) {
@@ -91,7 +96,7 @@ class SummaryDbWriterTest : public ::testing::Test {
   }
 
   FakeClockEnv env_;
-  std::shared_ptr<Sqlite> db_;
+  Sqlite* db_ = nullptr;
   SummaryWriterInterface* writer_ = nullptr;
 };
 
@@ -101,6 +106,7 @@ TEST_F(SummaryDbWriterTest, NothingWritten_NoRowsCreated) {
   TF_ASSERT_OK(writer_->Flush());
   writer_->Unref();
   writer_ = nullptr;
+  EXPECT_EQ(0LL, QueryInt("SELECT COUNT(*) FROM Ids"));
   EXPECT_EQ(0LL, QueryInt("SELECT COUNT(*) FROM Users"));
   EXPECT_EQ(0LL, QueryInt("SELECT COUNT(*) FROM Experiments"));
   EXPECT_EQ(0LL, QueryInt("SELECT COUNT(*) FROM Runs"));
@@ -109,20 +115,31 @@ TEST_F(SummaryDbWriterTest, NothingWritten_NoRowsCreated) {
 }
 
 TEST_F(SummaryDbWriterTest, TensorsWritten_RowsGetInitialized) {
+  SummaryMetadata metadata;
+  metadata.set_display_name("display_name");
+  metadata.set_summary_description("description");
+  metadata.mutable_plugin_data()->set_plugin_name("plugin_name");
+  metadata.mutable_plugin_data()->set_content("plugin_data");
+  SummaryMetadata metadata_nope;
+  metadata_nope.set_display_name("nope");
+  metadata_nope.set_summary_description("nope");
+  metadata_nope.mutable_plugin_data()->set_plugin_name("nope");
+  metadata_nope.mutable_plugin_data()->set_content("nope");
   TF_ASSERT_OK(CreateSummaryDbWriter(db_, "mad-science", "train", "jart", &env_,
                                      &writer_));
   env_.AdvanceByMillis(23);
   TF_ASSERT_OK(writer_->WriteTensor(1, MakeScalarInt64(123LL), "taggy",
-                                    "this-is-metaaa"));
+                                    metadata.SerializeAsString()));
   env_.AdvanceByMillis(23);
-  TF_ASSERT_OK(writer_->WriteTensor(2, MakeScalarInt64(314LL), "taggy", ""));
+  TF_ASSERT_OK(writer_->WriteTensor(2, MakeScalarInt64(314LL), "taggy",
+                                    metadata_nope.SerializeAsString()));
   TF_ASSERT_OK(writer_->Flush());
 
   ASSERT_EQ(1LL, QueryInt("SELECT COUNT(*) FROM Users"));
   ASSERT_EQ(1LL, QueryInt("SELECT COUNT(*) FROM Experiments"));
   ASSERT_EQ(1LL, QueryInt("SELECT COUNT(*) FROM Runs"));
   ASSERT_EQ(1LL, QueryInt("SELECT COUNT(*) FROM Tags"));
-  ASSERT_EQ(2LL, QueryInt("SELECT COUNT(*) FROM Tensors"));
+  ASSERT_EQ(10000LL, QueryInt("SELECT COUNT(*) FROM Tensors"));
 
   int64 user_id = QueryInt("SELECT user_id FROM Users");
   int64 experiment_id = QueryInt("SELECT experiment_id FROM Experiments");
@@ -148,33 +165,30 @@ TEST_F(SummaryDbWriterTest, TensorsWritten_RowsGetInitialized) {
   EXPECT_EQ(run_id, QueryInt("SELECT run_id FROM Tags"));
   EXPECT_EQ("taggy", QueryString("SELECT tag_name FROM Tags"));
   EXPECT_EQ(0.023, QueryDouble("SELECT inserted_time FROM Tags"));
-  EXPECT_EQ("this-is-metaaa", QueryString("SELECT metadata FROM Tags"));
 
-  EXPECT_EQ(tag_id, QueryInt("SELECT tag_id FROM Tensors WHERE step = 1"));
+  EXPECT_EQ("display_name", QueryString("SELECT display_name FROM Tags"));
+  EXPECT_EQ("plugin_name", QueryString("SELECT plugin_name FROM Tags"));
+  EXPECT_EQ("plugin_data", QueryString("SELECT plugin_data FROM Tags"));
+  EXPECT_EQ("description", QueryString("SELECT description FROM Descriptions"));
+
+  EXPECT_EQ(tag_id, QueryInt("SELECT series FROM Tensors WHERE step = 1"));
   EXPECT_EQ(0.023,
             QueryDouble("SELECT computed_time FROM Tensors WHERE step = 1"));
-  EXPECT_EQ("this-is-metaaa", QueryString("SELECT metadata FROM Tags"));
-  EXPECT_FALSE(
-      QueryString("SELECT tensor FROM Tensors WHERE step = 1").empty());
 
-  EXPECT_EQ(tag_id, QueryInt("SELECT tag_id FROM Tensors WHERE step = 2"));
+  EXPECT_EQ(tag_id, QueryInt("SELECT series FROM Tensors WHERE step = 2"));
   EXPECT_EQ(0.046,
             QueryDouble("SELECT computed_time FROM Tensors WHERE step = 2"));
-  EXPECT_EQ("this-is-metaaa", QueryString("SELECT metadata FROM Tags"));
-  EXPECT_FALSE(
-      QueryString("SELECT tensor FROM Tensors WHERE step = 2").empty());
 }
 
 TEST_F(SummaryDbWriterTest, EmptyParentNames_NoParentsCreated) {
   TF_ASSERT_OK(CreateSummaryDbWriter(db_, "", "", "", &env_, &writer_));
-  TF_ASSERT_OK(writer_->WriteTensor(1, MakeScalarInt64(123LL), "taggy",
-                                    "this-is-metaaa"));
+  TF_ASSERT_OK(writer_->WriteTensor(1, MakeScalarInt64(123LL), "taggy", ""));
   TF_ASSERT_OK(writer_->Flush());
   ASSERT_EQ(0LL, QueryInt("SELECT COUNT(*) FROM Users"));
   ASSERT_EQ(0LL, QueryInt("SELECT COUNT(*) FROM Experiments"));
   ASSERT_EQ(0LL, QueryInt("SELECT COUNT(*) FROM Runs"));
   ASSERT_EQ(1LL, QueryInt("SELECT COUNT(*) FROM Tags"));
-  ASSERT_EQ(1LL, QueryInt("SELECT COUNT(*) FROM Tensors"));
+  ASSERT_EQ(10000LL, QueryInt("SELECT COUNT(*) FROM Tensors"));
 }
 
 TEST_F(SummaryDbWriterTest, WriteEvent_Scalar) {
@@ -191,33 +205,24 @@ TEST_F(SummaryDbWriterTest, WriteEvent_Scalar) {
   TF_ASSERT_OK(writer_->WriteEvent(std::move(e)));
   TF_ASSERT_OK(writer_->Flush());
   ASSERT_EQ(2LL, QueryInt("SELECT COUNT(*) FROM Tags"));
-  ASSERT_EQ(2LL, QueryInt("SELECT COUNT(*) FROM Tensors"));
+  ASSERT_EQ(20000LL, QueryInt("SELECT COUNT(*) FROM Tensors"));
   int64 tag1_id = QueryInt("SELECT tag_id FROM Tags WHERE tag_name = 'π'");
   int64 tag2_id = QueryInt("SELECT tag_id FROM Tags WHERE tag_name = 'φ'");
   EXPECT_GT(tag1_id, 0LL);
   EXPECT_GT(tag2_id, 0LL);
   EXPECT_EQ(123.456, QueryDouble(strings::StrCat(
-                         "SELECT computed_time FROM Tensors WHERE tag_id = ",
+                         "SELECT computed_time FROM Tensors WHERE series = ",
                          tag1_id, " AND step = 7")));
   EXPECT_EQ(123.456, QueryDouble(strings::StrCat(
-                         "SELECT computed_time FROM Tensors WHERE tag_id = ",
+                         "SELECT computed_time FROM Tensors WHERE series = ",
                          tag2_id, " AND step = 7")));
-  EXPECT_NEAR(3.14,
-              QueryDouble(strings::StrCat(
-                  "SELECT tensor FROM Tensors WHERE tag_id = ", tag1_id,
-                  " AND step = 7")),
-              kTolerance);  // Summary::simple_value is float
-  EXPECT_NEAR(1.61,
-              QueryDouble(strings::StrCat(
-                  "SELECT tensor FROM Tensors WHERE tag_id = ", tag2_id,
-                  " AND step = 7")),
-              kTolerance);
 }
 
 TEST_F(SummaryDbWriterTest, WriteGraph) {
   TF_ASSERT_OK(CreateSummaryDbWriter(db_, "", "R", "", &env_, &writer_));
   env_.AdvanceByMillis(23);
   GraphDef graph;
+  graph.mutable_library()->add_gradient()->set_function_name("funk");
   NodeDef* node = graph.add_node();
   node->set_name("x");
   node->set_op("Placeholder");
@@ -243,11 +248,17 @@ TEST_F(SummaryDbWriterTest, WriteGraph) {
   ASSERT_EQ(4LL, QueryInt("SELECT COUNT(*) FROM Nodes"));
   ASSERT_EQ(3LL, QueryInt("SELECT COUNT(*) FROM NodeInputs"));
 
+  ASSERT_EQ(QueryInt("SELECT run_id FROM Runs"),
+            QueryInt("SELECT run_id FROM Graphs"));
+
   int64 graph_id = QueryInt("SELECT graph_id FROM Graphs");
   EXPECT_GT(graph_id, 0LL);
-  EXPECT_EQ(graph_id, QueryInt("SELECT graph_id FROM Runs"));
   EXPECT_EQ(0.023, QueryDouble("SELECT inserted_time FROM Graphs"));
-  EXPECT_FALSE(QueryString("SELECT graph_def FROM Graphs").empty());
+
+  GraphDef graph2;
+  graph2.ParseFromString(QueryString("SELECT graph_def FROM Graphs"));
+  EXPECT_EQ(0, graph2.node_size());
+  EXPECT_EQ("funk", graph2.library().gradient(0).function_name());
 
   EXPECT_EQ("x", QueryString("SELECT node_name FROM Nodes WHERE node_id = 0"));
   EXPECT_EQ("y", QueryString("SELECT node_name FROM Nodes WHERE node_id = 1"));
@@ -290,31 +301,38 @@ TEST_F(SummaryDbWriterTest, WriteGraph) {
   EXPECT_EQ(1LL, QueryInt("SELECT is_control FROM NodeInputs WHERE idx = 2"));
 }
 
-TEST_F(SummaryDbWriterTest, WriteScalarInt32_CoercesToInt64) {
-  TF_ASSERT_OK(CreateSummaryDbWriter(db_, "", "", "", &env_, &writer_));
-  Tensor t(DT_INT32, {});
-  t.scalar<int32>()() = -17;
-  TF_ASSERT_OK(writer_->WriteScalar(1, t, "t"));
-  TF_ASSERT_OK(writer_->Flush());
-  ASSERT_EQ(-17LL, QueryInt("SELECT tensor FROM Tensors"));
-}
-
-TEST_F(SummaryDbWriterTest, WriteScalarInt8_CoercesToInt64) {
-  TF_ASSERT_OK(CreateSummaryDbWriter(db_, "", "", "", &env_, &writer_));
-  Tensor t(DT_INT8, {});
-  t.scalar<int8>()() = static_cast<int8>(-17);
-  TF_ASSERT_OK(writer_->WriteScalar(1, t, "t"));
+TEST_F(SummaryDbWriterTest, UsesIdsTable) {
+  SummaryMetadata metadata;
+  TF_ASSERT_OK(CreateSummaryDbWriter(db_, "mad-science", "train", "jart", &env_,
+                                     &writer_));
+  env_.AdvanceByMillis(23);
+  TF_ASSERT_OK(writer_->WriteTensor(1, MakeScalarInt64(123LL), "taggy",
+                                    metadata.SerializeAsString()));
   TF_ASSERT_OK(writer_->Flush());
-  ASSERT_EQ(-17LL, QueryInt("SELECT tensor FROM Tensors"));
+  ASSERT_EQ(4LL, QueryInt("SELECT COUNT(*) FROM Ids"));
+  EXPECT_EQ(4LL, QueryInt(strings::StrCat(
+                     "SELECT COUNT(*) FROM Ids WHERE id IN (",
+                     QueryInt("SELECT user_id FROM Users"), ", ",
+                     QueryInt("SELECT experiment_id FROM Experiments"), ", ",
+                     QueryInt("SELECT run_id FROM Runs"), ", ",
+                     QueryInt("SELECT tag_id FROM Tags"), ")")));
 }
 
-TEST_F(SummaryDbWriterTest, WriteScalarUint8_CoercesToInt64) {
-  TF_ASSERT_OK(CreateSummaryDbWriter(db_, "", "", "", &env_, &writer_));
-  Tensor t(DT_UINT8, {});
-  t.scalar<uint8>()() = static_cast<uint8>(254);
-  TF_ASSERT_OK(writer_->WriteScalar(1, t, "t"));
+TEST_F(SummaryDbWriterTest, SetsRunFinishedTime) {
+  SummaryMetadata metadata;
+  TF_ASSERT_OK(CreateSummaryDbWriter(db_, "mad-science", "train", "jart", &env_,
+                                     &writer_));
+  env_.AdvanceByMillis(23);
+  TF_ASSERT_OK(writer_->WriteTensor(1, MakeScalarInt64(123LL), "taggy",
+                                    metadata.SerializeAsString()));
   TF_ASSERT_OK(writer_->Flush());
-  ASSERT_EQ(254LL, QueryInt("SELECT tensor FROM Tensors"));
+  ASSERT_EQ(0.023, QueryDouble("SELECT started_time FROM Runs"));
+  ASSERT_EQ(0.0, QueryDouble("SELECT finished_time FROM Runs"));
+  env_.AdvanceByMillis(23);
+  writer_->Unref();
+  writer_ = nullptr;
+  ASSERT_EQ(0.023, QueryDouble("SELECT started_time FROM Runs"));
+  ASSERT_EQ(0.046, QueryDouble("SELECT finished_time FROM Runs"));
 }
 
 }  // namespace
diff --git a/tensorflow/contrib/tensorboard/db/summary_file_writer.cc b/tensorflow/contrib/tensorboard/db/summary_file_writer.cc
new file mode 100644
index 0000000000000000000000000000000000000000..d891e86e53f4d760bfaea0e67601cfda037a4564
--- /dev/null
+++ b/tensorflow/contrib/tensorboard/db/summary_file_writer.cc
@@ -0,0 +1,191 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/tensorboard/db/summary_file_writer.h"
+
+#include "tensorflow/contrib/tensorboard/db/summary_converter.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/resource_mgr.h"
+#include "tensorflow/core/framework/summary.pb.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/util/events_writer.h"
+#include "tensorflow/core/util/ptr_util.h"
+
+namespace tensorflow {
+namespace {
+
+class SummaryFileWriter : public SummaryWriterInterface {
+ public:
+  SummaryFileWriter(int max_queue, int flush_millis, Env* env)
+      : SummaryWriterInterface(),
+        is_initialized_(false),
+        max_queue_(max_queue),
+        flush_millis_(flush_millis),
+        env_(env) {}
+
+  Status Initialize(const string& logdir, const string& filename_suffix) {
+    const Status is_dir = env_->IsDirectory(logdir);
+    if (!is_dir.ok()) {
+      if (is_dir.code() != tensorflow::error::NOT_FOUND) {
+        return is_dir;
+      }
+      TF_RETURN_IF_ERROR(env_->CreateDir(logdir));
+    }
+    mutex_lock ml(mu_);
+    events_writer_ =
+        tensorflow::MakeUnique<EventsWriter>(io::JoinPath(logdir, "events"));
+    if (!events_writer_->InitWithSuffix(filename_suffix)) {
+      return errors::Unknown("Could not initialize events writer.");
+    }
+    last_flush_ = env_->NowMicros();
+    is_initialized_ = true;
+    return Status::OK();
+  }
+
+  Status Flush() override {
+    mutex_lock ml(mu_);
+    if (!is_initialized_) {
+      return errors::FailedPrecondition("Class was not properly initialized.");
+    }
+    return InternalFlush();
+  }
+
+  ~SummaryFileWriter() override {
+    (void)Flush();  // Ignore errors.
+  }
+
+  Status WriteTensor(int64 global_step, Tensor t, const string& tag,
+                     const string& serialized_metadata) override {
+    std::unique_ptr<Event> e{new Event};
+    e->set_step(global_step);
+    e->set_wall_time(GetWallTime());
+    Summary::Value* v = e->mutable_summary()->add_value();
+    t.AsProtoTensorContent(v->mutable_tensor());
+    v->set_tag(tag);
+    if (!serialized_metadata.empty()) {
+      v->mutable_metadata()->ParseFromString(serialized_metadata);
+    }
+    return WriteEvent(std::move(e));
+  }
+
+  Status WriteScalar(int64 global_step, Tensor t, const string& tag) override {
+    std::unique_ptr<Event> e{new Event};
+    e->set_step(global_step);
+    e->set_wall_time(GetWallTime());
+    TF_RETURN_IF_ERROR(
+        AddTensorAsScalarToSummary(t, tag, e->mutable_summary()));
+    return WriteEvent(std::move(e));
+  }
+
+  Status WriteHistogram(int64 global_step, Tensor t,
+                        const string& tag) override {
+    std::unique_ptr<Event> e{new Event};
+    e->set_step(global_step);
+    e->set_wall_time(GetWallTime());
+    TF_RETURN_IF_ERROR(
+        AddTensorAsHistogramToSummary(t, tag, e->mutable_summary()));
+    return WriteEvent(std::move(e));
+  }
+
+  Status WriteImage(int64 global_step, Tensor t, const string& tag,
+                    int max_images, Tensor bad_color) override {
+    std::unique_ptr<Event> e{new Event};
+    e->set_step(global_step);
+    e->set_wall_time(GetWallTime());
+    TF_RETURN_IF_ERROR(AddTensorAsImageToSummary(t, tag, max_images, bad_color,
+                                                 e->mutable_summary()));
+    return WriteEvent(std::move(e));
+  }
+
+  Status WriteAudio(int64 global_step, Tensor t, const string& tag,
+                    int max_outputs, float sample_rate) override {
+    std::unique_ptr<Event> e{new Event};
+    e->set_step(global_step);
+    e->set_wall_time(GetWallTime());
+    TF_RETURN_IF_ERROR(AddTensorAsAudioToSummary(
+        t, tag, max_outputs, sample_rate, e->mutable_summary()));
+    return WriteEvent(std::move(e));
+  }
+
+  Status WriteGraph(int64 global_step,
+                    std::unique_ptr<GraphDef> graph) override {
+    std::unique_ptr<Event> e{new Event};
+    e->set_step(global_step);
+    e->set_wall_time(GetWallTime());
+    graph->SerializeToString(e->mutable_graph_def());
+    return WriteEvent(std::move(e));
+  }
+
+  Status WriteEvent(std::unique_ptr<Event> event) override {
+    mutex_lock ml(mu_);
+    queue_.emplace_back(std::move(event));
+    if (queue_.size() >= max_queue_ ||
+        env_->NowMicros() - last_flush_ > 1000 * flush_millis_) {
+      return InternalFlush();
+    }
+    return Status::OK();
+  }
+
+  string DebugString() override { return "SummaryFileWriter"; }
+
+ private:
+  double GetWallTime() {
+    return static_cast<double>(env_->NowMicros()) / 1.0e6;
+  }
+
+  Status InternalFlush() EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+    for (const std::unique_ptr<Event>& e : queue_) {
+      events_writer_->WriteEvent(*e);
+    }
+    queue_.clear();
+    if (!events_writer_->Flush()) {
+      return errors::InvalidArgument("Could not flush events file.");
+    }
+    last_flush_ = env_->NowMicros();
+    return Status::OK();
+  }
+
+  bool is_initialized_;
+  const int max_queue_;
+  const int flush_millis_;
+  uint64 last_flush_;
+  Env* env_;
+  mutex mu_;
+  std::vector<std::unique_ptr<Event>> queue_ GUARDED_BY(mu_);
+  // A pointer to allow deferred construction.
+  std::unique_ptr<EventsWriter> events_writer_ GUARDED_BY(mu_);
+  std::vector<std::pair<string, SummaryMetadata>> registered_summaries_
+      GUARDED_BY(mu_);
+};
+
+}  // namespace
+
+Status CreateSummaryFileWriter(int max_queue, int flush_millis,
+                               const string& logdir,
+                               const string& filename_suffix, Env* env,
+                               SummaryWriterInterface** result) {
+  SummaryFileWriter* w = new SummaryFileWriter(max_queue, flush_millis, env);
+  const Status s = w->Initialize(logdir, filename_suffix);
+  if (!s.ok()) {
+    w->Unref();
+    *result = nullptr;
+    return s;
+  }
+  *result = w;
+  return Status::OK();
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/tensorboard/db/summary_file_writer.h b/tensorflow/contrib/tensorboard/db/summary_file_writer.h
new file mode 100644
index 0000000000000000000000000000000000000000..73b0a5542beabdc460c32156dd44aacc5f08610a
--- /dev/null
+++ b/tensorflow/contrib/tensorboard/db/summary_file_writer.h
@@ -0,0 +1,43 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_TENSORBOARD_DB_SUMMARY_FILE_WRITER_H_
+#define TENSORFLOW_CONTRIB_TENSORBOARD_DB_SUMMARY_FILE_WRITER_H_
+
+#include "tensorflow/core/kernels/summary_interface.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+
+/// \brief Creates SummaryWriterInterface which writes to a file.
+///
+/// The file is an append-only records file of tf.Event protos. That
+/// makes this summary writer suitable for file systems like GCS.
+///
+/// It will enqueue up to max_queue summaries, and flush at least every
+/// flush_millis milliseconds. The summaries will be written to the
+/// directory specified by logdir and with the filename suffixed by
+/// filename_suffix. The caller owns a reference to result if the
+/// returned status is ok. The Env object must not be destroyed until
+/// after the returned writer.
+Status CreateSummaryFileWriter(int max_queue, int flush_millis,
+                               const string& logdir,
+                               const string& filename_suffix, Env* env,
+                               SummaryWriterInterface** result);
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CONTRIB_TENSORBOARD_DB_SUMMARY_FILE_WRITER_H_
diff --git a/tensorflow/core/kernels/summary_interface_test.cc b/tensorflow/contrib/tensorboard/db/summary_file_writer_test.cc
similarity index 93%
rename from tensorflow/core/kernels/summary_interface_test.cc
rename to tensorflow/contrib/tensorboard/db/summary_file_writer_test.cc
index 58e021a0b3e889ce1efe1bb5c73bcc74e16db139..c61b4655961664a6c9c22a5f6d6f26a55c34bfcd 100644
--- a/tensorflow/core/kernels/summary_interface_test.cc
+++ b/tensorflow/contrib/tensorboard/db/summary_file_writer_test.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/summary_interface.h"
+#include "tensorflow/contrib/tensorboard/db/summary_file_writer.h"
 
 #include "tensorflow/core/framework/summary.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -37,7 +37,7 @@ class FakeClockEnv : public EnvWrapper {
   uint64 current_millis_;
 };
 
-class SummaryInterfaceTest : public ::testing::Test {
+class SummaryFileWriterTest : public ::testing::Test {
  protected:
   Status SummaryTestHelper(
       const string& test_name,
@@ -47,8 +47,8 @@ class SummaryInterfaceTest : public ::testing::Test {
     CHECK(tests->insert(test_name).second) << ": " << test_name;
 
     SummaryWriterInterface* writer;
-    TF_CHECK_OK(CreateSummaryWriter(1, 1, testing::TmpDir(), test_name, &env_,
-                                    &writer));
+    TF_CHECK_OK(CreateSummaryFileWriter(1, 1, testing::TmpDir(), test_name,
+                                        &env_, &writer));
     core::ScopedUnref deleter(writer);
 
     TF_CHECK_OK(writer_fn(writer));
@@ -87,7 +87,7 @@ class SummaryInterfaceTest : public ::testing::Test {
   FakeClockEnv env_;
 };
 
-TEST_F(SummaryInterfaceTest, WriteTensor) {
+TEST_F(SummaryFileWriterTest, WriteTensor) {
   TF_CHECK_OK(SummaryTestHelper("tensor_test",
                                 [](SummaryWriterInterface* writer) {
                                   Tensor one(DT_FLOAT, TensorShape({}));
@@ -105,7 +105,7 @@ TEST_F(SummaryInterfaceTest, WriteTensor) {
                                 }));
 }
 
-TEST_F(SummaryInterfaceTest, WriteScalar) {
+TEST_F(SummaryFileWriterTest, WriteScalar) {
   TF_CHECK_OK(SummaryTestHelper(
       "scalar_test",
       [](SummaryWriterInterface* writer) {
@@ -123,7 +123,7 @@ TEST_F(SummaryInterfaceTest, WriteScalar) {
       }));
 }
 
-TEST_F(SummaryInterfaceTest, WriteHistogram) {
+TEST_F(SummaryFileWriterTest, WriteHistogram) {
   TF_CHECK_OK(SummaryTestHelper("hist_test",
                                 [](SummaryWriterInterface* writer) {
                                   Tensor one(DT_FLOAT, TensorShape({}));
@@ -141,7 +141,7 @@ TEST_F(SummaryInterfaceTest, WriteHistogram) {
                                 }));
 }
 
-TEST_F(SummaryInterfaceTest, WriteImage) {
+TEST_F(SummaryFileWriterTest, WriteImage) {
   TF_CHECK_OK(SummaryTestHelper(
       "image_test",
       [](SummaryWriterInterface* writer) {
@@ -162,7 +162,7 @@ TEST_F(SummaryInterfaceTest, WriteImage) {
       }));
 }
 
-TEST_F(SummaryInterfaceTest, WriteAudio) {
+TEST_F(SummaryFileWriterTest, WriteAudio) {
   TF_CHECK_OK(SummaryTestHelper(
       "audio_test",
       [](SummaryWriterInterface* writer) {
@@ -180,7 +180,7 @@ TEST_F(SummaryInterfaceTest, WriteAudio) {
       }));
 }
 
-TEST_F(SummaryInterfaceTest, WriteEvent) {
+TEST_F(SummaryFileWriterTest, WriteEvent) {
   TF_CHECK_OK(
       SummaryTestHelper("event_test",
                         [](SummaryWriterInterface* writer) {
@@ -198,7 +198,7 @@ TEST_F(SummaryInterfaceTest, WriteEvent) {
                         }));
 }
 
-TEST_F(SummaryInterfaceTest, WallTime) {
+TEST_F(SummaryFileWriterTest, WallTime) {
   env_.AdvanceByMillis(7023);
   TF_CHECK_OK(SummaryTestHelper(
       "wall_time_test",
diff --git a/tensorflow/contrib/tensorboard/db/vacuum.cc b/tensorflow/contrib/tensorboard/db/vacuum.cc
new file mode 100644
index 0000000000000000000000000000000000000000..5febe63f0612046f96b89053811952e67d4c449b
--- /dev/null
+++ b/tensorflow/contrib/tensorboard/db/vacuum.cc
@@ -0,0 +1,137 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <iostream>
+
+#include "tensorflow/core/lib/db/sqlite.h"
+#include "tensorflow/core/platform/init_main.h"
+#include "tensorflow/core/util/command_line_flags.h"
+
+namespace tensorflow {
+namespace {
+
+void Vacuum(const char* path) {
+  LOG(INFO) << "Opening SQLite DB: " << path;
+  Sqlite* db;
+  TF_CHECK_OK(Sqlite::Open(path, SQLITE_OPEN_READWRITE, &db));
+  core::ScopedUnref db_unref(db);
+
+  // TODO(jart): Maybe defragment rowids on Tensors.
+  // TODO(jart): Maybe LIMIT deletes and incremental VACUUM.
+
+  // clang-format off
+
+  LOG(INFO) << "Deleting orphaned Experiments";
+  db->PrepareOrDie(R"sql(
+    DELETE FROM
+      Experiments
+    WHERE
+      user_id IS NOT NULL
+      AND user_id NOT IN (SELECT user_id FROM Users)
+  )sql").StepAndResetOrDie();
+
+  LOG(INFO) << "Deleting orphaned Runs";
+  db->PrepareOrDie(R"sql(
+    DELETE FROM
+      Runs
+    WHERE
+      experiment_id IS NOT NULL
+      AND experiment_id NOT IN (SELECT experiment_id FROM Experiments)
+  )sql").StepAndResetOrDie();
+
+  LOG(INFO) << "Deleting orphaned Tags";
+  db->PrepareOrDie(R"sql(
+    DELETE FROM
+      Tags
+    WHERE
+      run_id IS NOT NULL
+      AND run_id NOT IN (SELECT run_id FROM Runs)
+  )sql").StepAndResetOrDie();
+
+  // TODO(jart): What should we do if plugins define non-tag tensor series?
+  LOG(INFO) << "Deleting orphaned Tensors";
+  db->PrepareOrDie(R"sql(
+    DELETE FROM
+      Tensors
+    WHERE
+      series IS NOT NULL
+      AND series NOT IN (SELECT tag_id FROM Tags)
+  )sql").StepAndResetOrDie();
+
+  LOG(INFO) << "Deleting orphaned TensorStrings";
+  db->PrepareOrDie(R"sql(
+    DELETE FROM
+      TensorStrings
+    WHERE
+      tensor_rowid NOT IN (SELECT rowid FROM Tensors)
+  )sql").StepAndResetOrDie();
+
+  LOG(INFO) << "Deleting orphaned Graphs";
+  db->PrepareOrDie(R"sql(
+    DELETE FROM
+      Graphs
+    WHERE
+      run_id IS NOT NULL
+      AND run_id NOT IN (SELECT run_id FROM Runs)
+  )sql").StepAndResetOrDie();
+
+  LOG(INFO) << "Deleting orphaned Nodes";
+  db->PrepareOrDie(R"sql(
+    DELETE FROM
+      Nodes
+    WHERE
+      graph_id NOT IN (SELECT graph_id FROM Graphs)
+  )sql").StepAndResetOrDie();
+
+  LOG(INFO) << "Deleting orphaned NodeInputs";
+  db->PrepareOrDie(R"sql(
+    DELETE FROM
+      NodeInputs
+    WHERE
+      graph_id NOT IN (SELECT graph_id FROM Graphs)
+  )sql").StepAndResetOrDie();
+
+  LOG(INFO) << "Running VACUUM";
+  db->PrepareOrDie("VACUUM").StepAndResetOrDie();
+
+  // clang-format on
+}
+
+int main(int argc, char* argv[]) {
+  string usage = Flags::Usage(argv[0], {});
+  bool parse_result = Flags::Parse(&argc, argv, {});
+  if (!parse_result) {
+    std::cerr << "The vacuum tool rebuilds SQLite database files created by\n"
+              << "SummaryDbWriter, which makes them smaller.\n\n"
+              << "This means deleting orphaned rows and rebuilding b-tree\n"
+              << "pages so empty space from deleted rows is cleared. Any\n"
+              << "superfluous padding of Tensor BLOBs is also removed.\n\n"
+              << usage;
+    return -1;
+  }
+  port::InitMain(argv[0], &argc, &argv);
+  if (argc < 2 || argv[1][0] == '-') {
+    std::cerr << "Need at least one SQLite DB path.\n";
+    return -1;
+  }
+  for (int i = 1; i < argc; ++i) {
+    Vacuum(argv[i]);
+  }
+  return 0;
+}
+
+}  // namespace
+}  // namespace tensorflow
+
+int main(int argc, char* argv[]) { return tensorflow::main(argc, argv); }
diff --git a/tensorflow/contrib/timeseries/examples/BUILD b/tensorflow/contrib/timeseries/examples/BUILD
index 755b0657e9fb29c167911407cee340ac7e3e9b7a..bb86ecb2209f9bed3ad6c37f4b23bc7b361e1bd6 100644
--- a/tensorflow/contrib/timeseries/examples/BUILD
+++ b/tensorflow/contrib/timeseries/examples/BUILD
@@ -103,6 +103,7 @@ py_test(
     deps = [
         ":lstm",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/estimator:estimator_py",
     ],
 )
 
diff --git a/tensorflow/contrib/timeseries/examples/__init__.py b/tensorflow/contrib/timeseries/examples/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..52e83069cb0c68b510da46149248369dce376647 100644
--- a/tensorflow/contrib/timeseries/examples/__init__.py
+++ b/tensorflow/contrib/timeseries/examples/__init__.py
@@ -0,0 +1,18 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
diff --git a/tensorflow/contrib/timeseries/examples/lstm.py b/tensorflow/contrib/timeseries/examples/lstm.py
index 3ba823f638da8f750981bc910d960706ff652fb7..c7193cef6915f9d0caf5b52fc084129cbc736994 100644
--- a/tensorflow/contrib/timeseries/examples/lstm.py
+++ b/tensorflow/contrib/timeseries/examples/lstm.py
@@ -165,12 +165,13 @@ class _LSTMModel(ts_model.SequentialTimeSeriesModel):
         "Exogenous inputs are not implemented for this example.")
 
 
-def train_and_predict(csv_file_name=_DATA_FILE, training_steps=200):
+def train_and_predict(
+    csv_file_name=_DATA_FILE, training_steps=200, estimator_config=None):
   """Train and predict using a custom time series model."""
   # Construct an Estimator from our LSTM model.
   estimator = ts_estimators.TimeSeriesRegressor(
       model=_LSTMModel(num_features=5, num_units=128),
-      optimizer=tf.train.AdamOptimizer(0.001))
+      optimizer=tf.train.AdamOptimizer(0.001), config=estimator_config)
   reader = tf.contrib.timeseries.CSVReader(
       csv_file_name,
       column_names=((tf.contrib.timeseries.TrainEvalFeatures.TIMES,)
diff --git a/tensorflow/contrib/timeseries/examples/lstm_test.py b/tensorflow/contrib/timeseries/examples/lstm_test.py
index 56daa1e10d9d1e7e96d71f33afc72671512dbaf8..3cace567266d497b12d836f44a335bbe5d916949 100644
--- a/tensorflow/contrib/timeseries/examples/lstm_test.py
+++ b/tensorflow/contrib/timeseries/examples/lstm_test.py
@@ -20,14 +20,23 @@ from __future__ import print_function
 
 from tensorflow.contrib.timeseries.examples import lstm
 
+from tensorflow.python.estimator import estimator_lib
 from tensorflow.python.platform import test
 
 
+class _SeedRunConfig(estimator_lib.RunConfig):
+
+  @property
+  def tf_random_seed(self):
+    return 3
+
+
 class LSTMExampleTest(test.TestCase):
 
   def test_periodicity_learned(self):
     (observed_times, observed_values,
-     all_times, predicted_values) = lstm.train_and_predict(training_steps=100)
+     all_times, predicted_values) = lstm.train_and_predict(
+         training_steps=100, estimator_config=_SeedRunConfig())
     self.assertAllEqual([100], observed_times.shape)
     self.assertAllEqual([100, 5], observed_values.shape)
     self.assertAllEqual([200], all_times.shape)
diff --git a/tensorflow/contrib/timeseries/python/timeseries/BUILD b/tensorflow/contrib/timeseries/python/timeseries/BUILD
index 5f04eb2f5a4af031ad19662b05a8a2396299925d..fff972c1f3277ad5d83673a202a50d1e6f7df210 100644
--- a/tensorflow/contrib/timeseries/python/timeseries/BUILD
+++ b/tensorflow/contrib/timeseries/python/timeseries/BUILD
@@ -296,6 +296,8 @@ py_test(
     ],
     srcs_version = "PY2AND3",
     tags = [
+        "no_oss",  # b/63709811
+        "no_pip",  # b/63709811
         "no_pip_gpu",  # b/63391119
     ],
     deps = [
diff --git a/tensorflow/contrib/timeseries/python/timeseries/head.py b/tensorflow/contrib/timeseries/python/timeseries/head.py
index 5896fc2a206bc747688b5b012e0f87465592dd8a..f0330bfbbd6e8067e5d085376acdf2e6bcaccb6a 100644
--- a/tensorflow/contrib/timeseries/python/timeseries/head.py
+++ b/tensorflow/contrib/timeseries/python/timeseries/head.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import re
 
-from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.python.training import training_util
 from tensorflow.contrib.layers.python.layers import optimizers
 
 from tensorflow.contrib.timeseries.python.timeseries import feature_keys
@@ -79,7 +79,7 @@ class _TimeSeriesRegressionHead(head_lib._Head):  # pylint:disable=protected-acc
 
     train_op = optimizers.optimize_loss(
         model_outputs.loss,
-        global_step=variables.get_global_step(),
+        global_step=training_util.get_global_step(),
         optimizer=self.optimizer,
         # Learning rate is set in the Optimizer object
         learning_rate=None)
diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/__init__.py b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..52e83069cb0c68b510da46149248369dce376647 100644
--- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/__init__.py
+++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/__init__.py
@@ -0,0 +1,18 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model_test.py b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model_test.py
index ca57715e2b2e6bbadd276d641703c0a3b842652e..5980fc5d5deccc151b01c72fa19b734a7c485bdc 100644
--- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model_test.py
+++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model_test.py
@@ -627,9 +627,8 @@ class UnknownShapeModel(TimeDependentStateSpaceModel):
 
   def get_observation_model(self, times):
     parent_model = super(UnknownShapeModel, self).get_observation_model(times)
-    parent_model._shape = tensor_shape.unknown_shape()
-    assert parent_model.get_shape().ndims is None
-    return parent_model
+    return array_ops.placeholder_with_default(
+        input=parent_model, shape=tensor_shape.unknown_shape())
 
 
 class TimeDependentTests(test.TestCase):
diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD
index f542d9413944052bd5ad3c351793185c63e8ae19..0199313bc8d0214a547498b97e9a1d83ee37b708 100644
--- a/tensorflow/contrib/tpu/BUILD
+++ b/tensorflow/contrib/tpu/BUILD
@@ -28,21 +28,7 @@ cc_library(
         ":outfeed_ops_op_lib",
         ":replication_ops_op_lib",
         ":tpu_configuration_ops_op_lib",
-    ],
-)
-
-py_library(
-    name = "tpu_test_util",
-    srcs = ["python/tpu/test_util.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":tpu_lib",
-        ":tpu_py",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:session",
-        "//tensorflow/python:variables",
+        ":tpu_embedding_ops_op_lib",
     ],
 )
 
@@ -84,9 +70,11 @@ tf_gen_op_libs(
         "outfeed_ops",
         "replication_ops",
         "tpu_configuration_ops",
+        "tpu_embedding_ops",
     ],
     deps = [
-        "//tensorflow/core:lib",
+        "//tensorflow/contrib/tpu/proto:tpu_embedding_config_proto_cc",
+        "//tensorflow/core:lib_proto_parsing",
     ],
 )
 
@@ -98,6 +86,11 @@ tf_custom_op_library(
         "ops/outfeed_ops.cc",
         "ops/replication_ops.cc",
         "ops/tpu_configuration_ops.cc",
+        "ops/tpu_embedding_ops.cc",
+    ],
+    deps = [
+        "//tensorflow/contrib/tpu/proto:tpu_embedding_config_proto_cc",
+        "//tensorflow/core:lib_proto_parsing",
     ],
 )
 
@@ -109,6 +102,7 @@ tf_gen_op_wrapper_py(
         ":outfeed_ops_op_lib",
         ":replication_ops_op_lib",
         ":tpu_configuration_ops_op_lib",
+        ":tpu_embedding_ops_op_lib",
     ],
 )
 
@@ -172,6 +166,7 @@ py_library(
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:control_flow_util",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework",
         "//tensorflow/python:framework_ops",
@@ -183,6 +178,19 @@ py_library(
     ],
 )
 
+tf_py_test(
+    name = "tpu_test",
+    size = "small",
+    srcs = ["python/tpu/tpu_test.py"],
+    additional_deps = [
+        ":tpu",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:layers",
+    ],
+)
+
 tf_py_test(
     name = "tpu_sharding_test",
     size = "small",
@@ -216,6 +224,17 @@ tf_py_test(
     ],
 )
 
+tf_py_test(
+    name = "tpu_config_test",
+    size = "small",
+    srcs = ["python/tpu/tpu_config_test.py"],
+    additional_deps = [
+        ":tpu_estimator",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/contrib/tpu/ops/cross_replica_ops.cc b/tensorflow/contrib/tpu/ops/cross_replica_ops.cc
index cbbd19800eb2e336fc343671fb82bb3ed631c129..d389050e67f9a9e48b91583e5088058ec4e2832f 100644
--- a/tensorflow/contrib/tpu/ops/cross_replica_ops.cc
+++ b/tensorflow/contrib/tpu/ops/cross_replica_ops.cc
@@ -22,7 +22,7 @@ namespace tensorflow {
 REGISTER_OP("CrossReplicaSum")
     .Input("input: T")
     .Output("output: T")
-    .Attr("T: {float}")
+    .Attr("T: {bfloat16, float}")
     .SetShapeFn(shape_inference::UnchangedShape)
     .Doc(R"doc(
 An Op to sum inputs across replicated TPU instances. Each
diff --git a/tensorflow/contrib/tpu/ops/replication_ops.cc b/tensorflow/contrib/tpu/ops/replication_ops.cc
index 36e865bf3c461878e12cca5f46f24411ceb61a78..cba71c6b98e1079de6c6c4c32fa2ffc44a9ce71e 100644
--- a/tensorflow/contrib/tpu/ops/replication_ops.cc
+++ b/tensorflow/contrib/tpu/ops/replication_ops.cc
@@ -72,10 +72,12 @@ REGISTER_OP("TPUReplicate")
     .Attr("Tinputs: list(type) >= 0")
     .Attr("Tbroadcast_inputs: list(type) >= 0")
     .Attr("NumVariables: int >= 0")
+    .Attr("Tguaranteed_constants: list(type) >= 0")
     .Attr("output_types: list(type) >= 0")
     .Input("inputs: Tinputs")
     .Input("broadcast_inputs: Tbroadcast_inputs")
     .Input("variables: NumVariables * resource")
+    .Input("guaranteed_constants: Tguaranteed_constants")
     .Output("outputs: output_types")
     .SetShapeFn(shape_inference::UnknownShape)
     .Doc(R"doc(
@@ -95,9 +97,13 @@ Tinputs: the types of the arguments to 'computation'.
 inputs: the inputs to 'computation', flattened, in replica-major order.
 Tbroadcast_inputs: the types of the additional arguments to broadcast to all
   replicas.
+Tguaranteed_constants: the types of the arguments to 'guaranteed_constants'.
 broadcast_inputs: additional arguments to broadcast to all replicas. The
   broadcast inputs are appended to the per-replica inputs when calling
   computation.
+guaranteed_constants: arguments which have been guaranteed to not
+change their values during the session lifetime. These contain tensors marked as
+constant using the GuaranteeConstOp.
 output_types: the types of the outputs of 'computation'.
 outputs: the outputs of 'computation'.
 )doc");
diff --git a/tensorflow/contrib/tpu/ops/tpu_configuration_ops.cc b/tensorflow/contrib/tpu/ops/tpu_configuration_ops.cc
index 8c4fe5538d832f390845fe2d31aa6a08342b280b..28417b89e0d4e0c5b2ca4f4794d29ab8a31049d7 100644
--- a/tensorflow/contrib/tpu/ops/tpu_configuration_ops.cc
+++ b/tensorflow/contrib/tpu/ops/tpu_configuration_ops.cc
@@ -26,29 +26,25 @@ using shape_inference::ShapeHandle;
 // Configuring a distributed TPU system is achieved by running
 // the following Ops:
 //
-// 1 Run _DisconnectHostFromDistributedTPUSystem on the CPU of each
-// host. This is needed in case the system had previously been
-// configured. It returns, for each host, the number of TPU chips on
-// the host.
+// 1 Run _DisconnectHostFromDistributedTPUSystem on the TPU_SYSTEM of each
+// host. This is needed in case the system had previously been configured. It
+// returns, for each host, the number of TPU chips on the host.
 //
-// 2 Run _ConfigureDistributedTPU on TPU_SYSTEM. Takes as input the
-// number of chips on each host. Validates that all hosts have the
-// same number of chips, and that the chips are consistent with the
-// topology set by flags. Has a single output which is a proto
-// describing the requested system configuration, which is sent to all
-// hosts.
+// 2 Run _ConfigureDistributedTPU on TPU_SYSTEM of worker 0. Takes as input the
+// number of chips on each host. Validates that all hosts have the same number
+// of chips, and that the chips are consistent with the topology set by
+// flags. Has a single output which is a proto describing the requested system
+// configuration, which is sent to all hosts.
 //
-// 3 Run _InitializeHostForDistributedTPU on the CPU of each host,
-// taking as input the output from ConfigureDistributedTPU. Has a
-// single Tensor output which is a vector of int32 indicating, for
-// each TPU on the host, what its global TPU system id is.
+// 3 Run _InitializeHostForDistributedTPU on the TPU_SYSTEM of each host, taking
+// as input the output from ConfigureDistributedTPU. Has a single Tensor output
+// which is a vector of int32 indicating, for each TPU on the host, what its
+// global TPU system id is.
 //
 // 4 Run _WaitForDistributedTPU on TPU_SYSTEM, taking as input the
 // outputs from all the _InitializeHostForDistributedTPU
-// Ops. _WaitForDistributedTPU has an attr host_specs which is a
-// vector<string> giving the partial device spec for each host. These
-// partial specs are combined in the Op with the outputs from the host
-// initialization Ops to construct a mapping from full TPU device
+// Ops. _These partial specs are combined in the Op with the outputs from
+// the host initialization Ops to construct a mapping from full TPU device
 // specs to global TPU ids. Has a single Tensor output which is a
 // matrix of int32 indicating, for each host (outer dimension) and for
 // each TPU on the host (inner dimension) what that TPU's global id
@@ -56,29 +52,28 @@ using shape_inference::ShapeHandle;
 // system to initialize fully, which may take several minutes for a
 // large system.
 //
-// 5 Run _SetGlobalTPUArray on the CPU of each host, taking as input
-// the output from _WaitForDistributedTPU. This Op tells each host the
-// global Id of every TPU on every host.
+// 5 Run _SetGlobalTPUArray on the TPU_SYSTEM of each host, taking as input the
+// output from _WaitForDistributedTPU. This Op tells each host the global Id of
+// every TPU on every host.
 //
-// Most user code works by placing the ConfigureDistributedTPU Op on
-// the desired TPU_SYSTEM device, and a graph rewrite replaces it by
-// the subgraph described above.
+// Most user code works by placing the ConfigureDistributedTPU Op on the desired
+// TPU_SYSTEM device, and a graph rewrite replaces it by the subgraph described
+// above.
 //
 //
-// A distributed TPU system can be cleanly shut down by running
-// the following Ops:
+// A distributed TPU system can be cleanly shut down by running the following
+// Ops:
 //
-// 1 Run _DisconnectHostFromDistributedTPUSystem on the CPU of each
-// host.
+// 1 Run _DisconnectHostFromDistributedTPUSystem on the TPU_SYSTEM of each host.
 //
 // 2 Run _ShutdownDistributedTPU on the TPU_SYSTEM where
-// _ConfigureDistributedTPU was run. The Op will return an error if no
-// system is configured.
+// _ConfigureDistributedTPU was run. The Op will return an error if no system is
+// configured.
 //
 //
-// Most user code works by placing the ShutdownDistributedTPU Op on
-// the desired TPU_SYSTEM device, and a graph rewrite replaces it by
-// the subgraph described above.
+// Most user code works by placing the ShutdownDistributedTPU Op on the desired
+// TPU_SYSTEM device, and a graph rewrite replaces it by the subgraph described
+// above.
 
 REGISTER_OP("_ConfigureDistributedTPU")
     .Input("inputs: N * int32")
@@ -108,7 +103,6 @@ in a host.
 REGISTER_OP("_WaitForDistributedTPU")
     .Input("inputs: N * int32")
     .Output("topology: string")
-    .Attr("host_specs: list(string)")
     .Attr("startup_timeout_sec: int = 20")
     .Attr("N: int")
     .SetIsStateful()
@@ -196,6 +190,7 @@ chips on the host.
 REGISTER_OP("ConfigureDistributedTPU")
     .Output("topology: string")
     .Attr("embedding_config: string = ''")
+    .Attr("tpu_embedding_config: string = ''")
     .SetIsStateful()
     .SetShapeFn(shape_inference::UnknownShape)
     .Doc(R"doc(
@@ -204,6 +199,9 @@ system.
 
 topology: A serialized tensorflow.tpu.TopologyProto that describes the TPU
 topology.
+tpu_embedding_config: Serialized tensorflow.tpu.TPUEmbeddingConfiguration that
+describes the embedding lookups of the program.
+embedding_config: Reserved. Do not use.
 )doc");
 
 REGISTER_OP("ShutdownDistributedTPU")
diff --git a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
new file mode 100644
index 0000000000000000000000000000000000000000..cc32a265286951a1e4d59228da6b3ac83a75c5e9
--- /dev/null
+++ b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
@@ -0,0 +1,328 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tpu/proto/tpu_embedding_config.pb.h"
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/lib/core/status.h"
+
+namespace tensorflow {
+
+// TPUs use a specialized mechanism for performing embedding lookups,
+// necessitating differences in TF Graphs that use embeddings on TPUs relative
+// to CPUs. Embedding lookups on TPU systems are achieved by including the
+// following in the TF Graph.
+//
+// 0. Construct a TPUEmbeddingConfiguration, specifying the embedding tables
+//    in the model, the size of the TPU system to be used, and the optimizer to
+//    be used for each table. Some of this information is redundant with other
+//    pieces of the TF Graph.
+// 1. Pass this TPUEmbeddingConfiguration to tpu.initialize_system() as the
+//    tpu_embedding_config parameter.
+// 2. Use the TPUEmbeddingLoad Op to initialize the embedding tables in TPU
+//    memories, sharded across the memories attached to each Host.
+// 3. Use TPUEmbeddingEnqueueSparseBatch to provide the TPU with embedding
+//    indices and aggregation weights.
+// 4. TPUEmbeddingReceiveActivations returns a list of Tensors, containing the
+//    activations from each table specified in the configuration.
+// 5. TPUEmbeddingActivations, when used with appropriate Python libraries,
+//    enables the automatic differentiation of models that use embeddings.
+// 6. TPUEmbeddingSendGradients takes a list of Tensors (of the same shapes
+//    as those returned by TPUEmbeddingReceivActivations) containing gradients
+//    to use in updating the embedding tables.
+// 7. Before saving a checkpoint, use the TPUEmbeddingRetrieve Op to update
+//    the Graph's embedding table Variables from the updated tables in the
+//    TPU memories.
+//
+// TPU Embeddings use dedicated ops to enforce Host/TPU consistency in the
+// state of embedding table variables. Before beginning training or inference,
+// the model must Load the optimizer parameters into the TPU memories. Before
+// saving a checkpoint, the model must Retreieve the parameters back into the
+// host CPU memory.
+
+REGISTER_OP("TPUEmbeddingLoadGradientDescentParameters")
+    .Input("parameters: float32")
+    .Attr("tpu_embedding_config: string")
+    .Attr("table_id: int >= 0")
+    .Attr("num_hosts: int >= 1")
+    .Attr("host_id: int >= 0")
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+Load an embedding table shard into TPU memory for use with GradientDescent.
+
+TPU embeddings use dedicated per-optimizer Ops for loading and retrieving 
+trainable variables and optimizer state from TPU memory. This op enables
+functionality equivalent to GradientDescentOptimizer.
+
+parameters: The shard of the embedding table resident on the host executing this
+    op. For single-TPU models, this is the entire embedding table.
+tpu_embedding_config: Serialized TPUEmbeddingConfiguration proto.
+table_id: The id of the table specified in the tpu_embedding_config.
+num_hosts: The number of CPU hosts in the distributed training job.
+host_id: Which CPU host in the distributed training job will execute this op.
+)doc");
+
+namespace tpu_embedding_config_util {
+
+Status GradientDescentShapes(shape_inference::InferenceContext *c) {
+  string config_string;
+  TF_RETURN_IF_ERROR(c->GetAttr("tpu_embedding_config", &config_string));
+  tpu::TPUEmbeddingConfiguration config;
+  if (!config.ParseFromString(config_string)) {
+    return errors::InvalidArgument("Malformed tpu_embedding_config.");
+  }
+
+  int table_id;
+  TF_RETURN_IF_ERROR(c->GetAttr("table_id", &table_id));
+  int64 num_tables = config.table_config_size();
+  if (table_id >= num_tables) {
+    return errors::InvalidArgument("Table id >= num_tables");
+  }
+  int64 width = config.table_config(table_id).width();
+  int64 num_rows = config.table_config(table_id).num_rows();
+
+  TF_RETURN_IF_ERROR(c->set_output("parameters", {c->Matrix(num_rows, width)}));
+  return Status::OK();
+}
+
+}  // namespace tpu_embedding_config_util
+
+REGISTER_OP("TPUEmbeddingRetrieveGradientDescentParameters")
+    .Output("parameters: float32")
+    .Attr("tpu_embedding_config: string")
+    .Attr("table_id: int")
+    .Attr("num_hosts: int")
+    .Attr("host_id: int")
+    .SetIsStateful()
+    .SetShapeFn(tpu_embedding_config_util::GradientDescentShapes)
+    .Doc(R"doc(
+Retrieve an embedding table shard from TPU memory.
+
+TPU embeddings use dedicated per-optimizer Ops for loading and retrieving 
+trainable variables and optimizer state from TPU memory. This op enables
+functionality equivalent to GradientDescentOptimizer.
+
+tpu_embedding_config: Serialized TPUEmbeddingConfiguration proto.
+table_id: The id of the table specified in tpu_embedding_config.
+num_hosts: The number of CPU hosts in the distributed training job.
+host_id: Which CPU host in the distributed training job will execute this op.
+)doc");
+
+REGISTER_OP("TPUEmbeddingLoadAdagradParameters")
+    .Input("parameters: float32")
+    .Input("accumulators: float32")
+    .Attr("tpu_embedding_config: string")
+    .Attr("table_id: int >= 0")
+    .Attr("num_hosts: int >= 1")
+    .Attr("host_id: int >= 0")
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+Load an embedding table shard into TensorNode memories for use with Adagrad.
+
+TPU embeddings use dedicated per-optimizer Ops for loading and retrieving
+trainable variables and optimizer state from TPU memory. This op enables
+functionality equivalent to AdagradOptimizer.
+
+parameters: The shard of the embedding table resident on the host executing this
+    op. For single-TPU models, this is the entire embedding table.
+accumulators: Shard of the Adagrad accumulators resident on the host executing
+    this op.
+tpu_embedding_config: Serialized TPUEmbeddingConfiguration proto.
+table_id: The id of the table specified in the embedding_config.
+num_hosts: The number of CPU hosts in the distributed training job.
+host_id: Which CPU host in the distributed training job will execute this op.
+)doc");
+
+namespace tpu_embedding_config_util {
+
+Status AdagradShapes(shape_inference::InferenceContext *c) {
+  string config_string;
+  TF_RETURN_IF_ERROR(c->GetAttr("tpu_embedding_config", &config_string));
+  tpu::TPUEmbeddingConfiguration config;
+  if (!config.ParseFromString(config_string)) {
+    return errors::InvalidArgument("Malformed tpu_embedding_config.");
+  }
+
+  int table_id;
+  TF_RETURN_IF_ERROR(c->GetAttr("table_id", &table_id));
+  int64 num_tables = config.table_config_size();
+  if (table_id >= num_tables) {
+    return errors::InvalidArgument("Table id >= num_tables");
+  }
+  int64 width = config.table_config(table_id).width();
+  int64 num_rows = config.table_config(table_id).num_rows();
+
+  TF_RETURN_IF_ERROR(c->set_output("parameters", {c->Matrix(num_rows, width)}));
+  TF_RETURN_IF_ERROR(
+      c->set_output("accumulators", {c->Matrix(num_rows, width)}));
+  return Status::OK();
+}
+
+}  // namespace tpu_embedding_config_util
+
+REGISTER_OP("TPUEmbeddingRetrieveAdagradParameters")
+    .Output("parameters: float32")
+    .Output("accumulators: float32")
+    .Attr("tpu_embedding_config: string")
+    .Attr("table_id: int >= 0")
+    .Attr("num_hosts: int >= 1")
+    .Attr("host_id: int >= 0")
+    .SetIsStateful()
+    .SetShapeFn(tpu_embedding_config_util::AdagradShapes)
+    .Doc(R"doc(
+Retrieve an embedding table shard from TPU memory.
+
+TPU embeddings use dedicated per-optimizer Ops for loading and retrieving 
+trainable variables and optimizer state from TPU memory. This op enables
+functionality equivalent to AdagradOptimizer.
+
+tpu_embedding_config: Serialized TPUEmbeddingConfiguration proto.
+table_id: The id of the table specified in the embedding_config_json.
+num_hosts: The number of CPU hosts in the distributed training job.
+host_id: Which CPU host in the distributed training job will execute this op.
+)doc");
+
+REGISTER_OP("TPUEmbeddingEnqueueSparseBatch")
+    .Input("sample_indices: num_tables * int32")
+    .Input("embedding_indices: num_tables * int32")
+    .Input("aggregation_weights: num_tables * float32")
+    .Attr("num_tables: int")
+    .Attr("device_ordinal: int = -1")
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+An op that feeds a batch of embedding indices and weights to the TPU.
+
+Embedding lookups are equivalent to sparse-dense matrix multiplications: the
+sparse matrix contains nonzeros in column j in order to retrieve row j from the
+embedding table.
+
+The three Tensor list arguments (sample_indices, embedding_indices, and
+aggregation_weights) represent these sparse matrices in COO format. The Tensor
+lists each have one entry for each embedding table specified in the model.
+For the kth embedding table, the three Tensors at position k in the list
+specify a COO-format sparse matrix. For the kth table, the row indices,
+column indices, and nonzero values of the COO sparse matrix are specified by
+sample_indices[k], embedding_indices[k], and aggregation_weights[k],
+respectively. Entries must be sorted by row index, then by column index.
+
+There should be at most one TPUEmbeddingEnqueueSparseBatch op in a signle
+training step per TPU shard.
+
+sample_indices: A list of rank 1 Tensors specifying row indices of the COO
+    sparse matrix representing the embedding lookups for each table.
+embedding_indices: A list of rank 1 Tensors  specifying column indices of the
+    COO sparse matrix representing the embedding lookups for each table.
+aggregation_weights: A list of rank 1 Tensors specifying the nonzero values
+    of the COO sparse matrix representing the embedding lookups for each table.
+device_ordinal: The TPU device to use. This should be -1 when the Op
+    is running on a TPU device, and >= 0 when the Op is running on the CPU
+    device.
+)doc");
+
+namespace tpu_embedding_config_util {
+
+Status ActivationShapes(shape_inference::InferenceContext *c) {
+  string config_string;
+  TF_RETURN_IF_ERROR(c->GetAttr("tpu_embedding_config", &config_string));
+  tpu::TPUEmbeddingConfiguration config;
+  if (!config.ParseFromString(config_string)) {
+    return errors::InvalidArgument("Malformed tpu_embedding_config.");
+  }
+  int64 batch_size = config.batch_size();
+  int64 num_tables = config.table_config_size();
+  for (int table_id = 0; table_id < num_tables; ++table_id) {
+    int64 width = config.table_config(table_id).width();
+    int64 num_features = config.table_config(table_id).num_features();
+    c->set_output(table_id, c->Matrix(batch_size * num_features, width));
+  }
+  return Status::OK();
+}
+
+}  // namespace tpu_embedding_config_util
+
+REGISTER_OP("TPUEmbeddingReceiveActivations")
+    .Output("outputs: num_tables * float")
+    .Attr("num_tables: int >= 1")
+    .Attr("tpu_embedding_config: string")
+    .SetIsStateful()
+    .SetShapeFn(tpu_embedding_config_util::ActivationShapes)
+    .Doc(R"doc(
+An op that receives embeddng activations on the TPU.
+
+The TPU system performs the embedding lookups and aggregations specified by
+the arguments to TPUEmbeddingEnqueueSparseBatch. The results of these
+aggregations are visible to the Tensorflow Graph as the outputs of a
+TPUEmbeddingDequeueActivations Op. This op returns a list containing one
+Tensor of activations per table specified in the model. There can be at most
+one ReceieveActivations op in the TPU graph.
+
+outputs: A TensorList of embedding activations containing one Tensor per
+    embedding table in the model.
+num_tables: The number of output activation tensors, equal to the number of
+    embedding tables in the model.
+tpu_embedding_config: Serialized TPUEmbeddingConfiguration proto.
+)doc");
+
+REGISTER_OP("TPUEmbeddingActivations")
+    .Input("embedding_variable: float32")
+    .Input("sliced_activations: float32")
+    .Output("output: float32")
+    .Attr("table_id: int >= 0")
+    .Attr("lookup_id: int >= 0")
+    .SetShapeFn([](shape_inference::InferenceContext *c) {
+      c->set_output(0, c->input(1));
+      return Status::OK();
+    })
+    .Doc(R"doc(
+An op enabling differentiation of TPU Embeddings.
+
+This op simply returns its first input, which is assumed to have been sliced
+from the Tensors returnd by TPUEmbeddingDequeueActivations. The presence of this
+op, and its first argument being a trainable Variable, enables automatic
+differentiation of graphs containing embeddings via the TPU Embedding Python
+libraries.
+
+embedding_variable: A trainable variable, enabling optimizers to find this op.
+sliced_activations: The embedding activations Tensor to return.
+table_id: The id of the table in the embedding layer configuration from which
+    these activations were computed.
+lookup_id: Identifier of the set of embedding indices which produced these
+    activations.
+)doc");
+
+REGISTER_OP("TPUEmbeddingSendGradients")
+    .Input("gradients: num_tables * float32")
+    .Attr("num_tables: int >= 1")
+    .Attr("tpu_embedding_config: string")
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+An op that performs gradient updates of embedding tables.
+
+The TensorList argument has the same length and shapes as the return value of
+TPUEmbeddingReceiveActivations, but contains gradients of the model's loss
+with respect to the embedding activations. The embedding tables are updated
+from these gradients via the optimizer specified in the configuration given
+to tpu.initialize_system.
+
+gradients: A TensorList of gradients with which to update embedding tables.
+tpu_embedding_config: Serialized TPUEmbeddingConfiguration proto.
+)doc");
+
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/tpu/profiler/BUILD b/tensorflow/contrib/tpu/profiler/BUILD
index 0e1fca3d3c8b6f3a19b3e989dbee1863475796c5..346c03067d51350b9939123d6afa69d8127bdf01 100644
--- a/tensorflow/contrib/tpu/profiler/BUILD
+++ b/tensorflow/contrib/tpu/profiler/BUILD
@@ -47,7 +47,7 @@ cc_library(
 tf_cc_binary(
     name = "capture_tpu_profile",
     srcs = ["capture_tpu_profile.cc"],
-    visibility = ["//tensorflow/contrib/tpu/profiler:__subpackages__"],
+    visibility = ["//visibility:public"],
     deps = [
         ":dump_tpu_profile",
         ":tpu_profiler_proto_cc",
diff --git a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc
index bff23a447f841339d9bf5bd3bf125d705bf1fee7..b67f2f47a7b753fd4629d7ad4db0b4c67933ce0b 100644
--- a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc
+++ b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc
@@ -51,6 +51,7 @@ ProfileResponse Profile(const string& service_addr, int duration_ms) {
   request.set_duration_ms(duration_ms);
   request.set_max_events(kMaxEvents);
   request.add_tools("input_pipeline");
+  request.add_tools("overview_page");
   std::cout << "Limiting the number of trace events to " << kMaxEvents
             << std::endl;
   ::grpc::ClientContext context;
diff --git a/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc
index 120a38b6c2353deaf0b86d330cda999ba6be7dbf..0ed5b2fad333eaa8a9820da334e953cbc282371f 100644
--- a/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc
+++ b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc
@@ -149,8 +149,10 @@ Status WriteTensorboardTPUProfile(const string& logdir, const string& run,
   // Dumps profile data to <logdir>/plugins/profile/<run>/.
   string profile_run_dir = JoinPath(logdir, kProfilePluginDirectory, run);
   TF_RETURN_IF_ERROR(Env::Default()->RecursivelyCreateDir(profile_run_dir));
+
   // Ignore computation_graph for now.
-  if (response.encoded_trace().empty()) {
+  const bool empty_trace = response.encoded_trace().empty();
+  if (empty_trace) {
     *os << "No trace event is collected." << std::endl;
   } else {
     LOG(INFO) << "Converting trace events to TraceViewer JSON.";
@@ -163,7 +165,7 @@ Status WriteTensorboardTPUProfile(const string& logdir, const string& run,
     TF_RETURN_IF_ERROR(DumpOpProfileToLogDirectory(profile_run_dir,
                                                    response.op_profile(), os));
   }
-  if (!response.tool_data().empty()) {
+  if (!empty_trace && !response.tool_data().empty()) {
     for (const auto& tool_data : response.tool_data()) {
       TF_RETURN_IF_ERROR(
           DumpToolDataToLogDirectory(profile_run_dir, tool_data, os));
diff --git a/tensorflow/contrib/tpu/profiler/pip_package/setup.py b/tensorflow/contrib/tpu/profiler/pip_package/setup.py
index ee6950699e740139b75f3f061ca0ca455fe2a1af..179d29602b9f970fb450bc057332fa092066255c 100644
--- a/tensorflow/contrib/tpu/profiler/pip_package/setup.py
+++ b/tensorflow/contrib/tpu/profiler/pip_package/setup.py
@@ -70,7 +70,7 @@ setup(
         'Topic :: Scientific/Engineering :: Mathematics',
         'Topic :: Scientific/Engineering :: Artificial Intelligence',
         'Topic :: Software Development',
-        'Topic :: Software Development :: Libraries',  
+        'Topic :: Software Development :: Libraries',
         'Topic :: Software Development :: Libraries :: Python Modules',
     ],
     license='Apache 2.0',
diff --git a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto
index 6943ff5f471d7cb0c5302261ec9aa7273ef5ae35..5440bbbfdd75207bd209c19d5cc42dc69504d39b 100644
--- a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto
+++ b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto
@@ -114,6 +114,26 @@ message HloExtraInfoMapResult {
   map<string, HloExtraInfoResult> hlo_extrainfo_map = 1;
 }
 
+// Result proto for host-independent job information.
+message HostIndependentJobInfoResult {
+  // The change-list number of this build.
+  optional int64 change_list = 1;
+  // The time of this build.
+  optional int64 build_time = 2;
+  // The target of this build.
+  optional string build_target = 3;
+}
+
+// Result proto for host-dependent job information.
+message HostDependentJobInfoResult {
+  // This ID of the host where the job was run on.
+  optional string host_id = 1;
+  // The command line used to run the job.
+  optional string command_line = 2;
+  // The start time of the job on this host.
+  optional int64 start_time = 3;
+}
+
 // Result proto for RunEnvironment (the run environment of a profiling session).
 message RunEnvironmentResult {
   // Number of hosts used.
@@ -124,8 +144,10 @@ message RunEnvironmentResult {
   optional int32 tpu_core_count = 3;
   // The per-TPU-core batch size.
   optional int32 per_core_batch_size = 4;
-  // Job information including build target and command line.
-  optional string job_info = 5;
+  // Host-independent job information.
+  optional HostIndependentJobInfoResult host_independent_job_info = 5;
+  // Host-dependent job information.
+  repeated HostDependentJobInfoResult host_dependent_job_info = 6;
 }
 
 // Result proto for TfStatsHelper.
diff --git a/tensorflow/contrib/tpu/profiler/tpu_profiler.proto b/tensorflow/contrib/tpu/profiler/tpu_profiler.proto
index 9c3fd45fd1ec9736b638b45907e585165d4d9057..bf30d2ce091302eaf361a0018464d3b7de94ea6d 100644
--- a/tensorflow/contrib/tpu/profiler/tpu_profiler.proto
+++ b/tensorflow/contrib/tpu/profiler/tpu_profiler.proto
@@ -40,7 +40,7 @@ message ProfileToolData {
 }
 
 message ProfileResponse {
-  uint64 xprof_response_size = 1;  // Placeholder: return something meaningful.
+  reserved 1;  // was uint64 placeholder for returning something meaningful.
   // Graphs of programs executed on TPUs during the profiling period.
   repeated GraphDef computation_graph = 2;
 
diff --git a/tensorflow/contrib/tpu/proto/BUILD b/tensorflow/contrib/tpu/proto/BUILD
index 79a79efb6b62d3e98127558e951ceefd276b580c..e1660985676e8c2efe3b01e32b48b211391885b7 100644
--- a/tensorflow/contrib/tpu/proto/BUILD
+++ b/tensorflow/contrib/tpu/proto/BUILD
@@ -15,6 +15,15 @@ filegroup(
     visibility = ["//tensorflow:__subpackages__"],
 )
 
+tf_proto_library(
+    name = "tpu_embedding_config_proto",
+    srcs = [
+        "tpu_embedding_config.proto",
+    ],
+    cc_api_version = 2,
+    visibility = ["//visibility:public"],
+)
+
 tf_proto_library(
     name = "topology_proto",
     srcs = [
diff --git a/tensorflow/contrib/tpu/proto/tpu_embedding_config.proto b/tensorflow/contrib/tpu/proto/tpu_embedding_config.proto
new file mode 100644
index 0000000000000000000000000000000000000000..b0ec968d3a401f1b80ed1bf6fd7a83a69c068fe2
--- /dev/null
+++ b/tensorflow/contrib/tpu/proto/tpu_embedding_config.proto
@@ -0,0 +1,76 @@
+syntax = "proto3";
+
+package tensorflow.tpu;
+
+// The TPUEmbeddingConfiguration contains specification of TPU Embedding lookups
+// and gradient updates separate from the TF Graph.
+message TPUEmbeddingConfiguration {
+  // model_mode specifies whether the model is to be run in training or
+  // inference. In inference mode, gradient updates to embedding tables are not
+  // performed.
+  enum ModelMode {
+    INVALID = 0;
+    TRAINING = 1;
+    INFERENCE = 2;
+  }
+
+  ModelMode model_mode = 1;
+
+  // num_hosts is the number of host CPU systems in the training/inference job.
+  // Each embedding table must be sharded into num_hosts separate Variables,
+  // placed separately on the num_hosts CPU devices in the cluster. Sharding
+  // will be performed equivalently to the 'div' sharding_strategy option of
+  // embedding_lookup() and embedding_lookup_sparse().
+  int32 num_hosts = 2;
+
+  // The total number of TensorNodes. This is equal to num_hosts times the
+  // number of TensorNodes attached to each host.
+  int32 num_tensornodes = 3;
+
+  // The number of training examples per TensorNode.
+  int32 batch_size = 4;
+
+  message GradientDescentOptimizer {
+    float learning_rate = 1;
+  }
+
+  message AdagradOptimizer {
+    float learning_rate = 1;
+    float initial_accumulator = 2;
+  }
+
+  // Each Embedding
+  message TPUEmbeddingTable {
+    // Name of the embedding table. This will be used to name Variables in the
+    // Tensorflow Graph.
+    string name = 1;
+
+    // Number of rows of the embedding table. The Variable created to hold the
+    // learned embedding table values will have shape (num_rows, width).
+    int32 num_rows = 3;
+
+    // Width of the embedding table. The Variable created to hold the
+    // learned embedding table values will have shape (num_rows, width).
+    int32 width = 4;
+
+    // Number of distinct embedding activation vectors per training example
+    // produced by lookups into this table during model evaluation. For each
+    // table, the Graph will receive an activations Tensor of shape
+    //   (batch_size * table.num_features, table.width).
+    // For example, num_features = 1 produces equivalent behavior to a single
+    // tf.nn.embedding_lookup() call. In the case of 'multivalent' embeddings,
+    // (i.e. tf.nn.embedding_lookup_sparse()) which compute weighted averages of
+    // embedding table rows, num_features is the number of vectors produced
+    // after averaging. In sequence models num_features is typically equal
+    // to the sequence length, since each sequence element must be represented
+    // separately to the convolutional or recurrent network.
+    int32 num_features = 5;
+
+    oneof optimizer {
+      GradientDescentOptimizer gradient_descent = 6;
+      AdagradOptimizer adagrad = 7;
+    }
+  }
+
+  repeated TPUEmbeddingTable table_config = 5;
+}
diff --git a/tensorflow/contrib/tpu/python/tpu/test_util.py b/tensorflow/contrib/tpu/python/tpu/test_util.py
deleted file mode 100644
index a5d4ff972277cda0bd6f5b3ecdb4bef59a2f8d0e..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/tpu/python/tpu/test_util.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ===================================================================
-"""Utilities to ease testing on TPU devices."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os.path
-import pickle
-import tempfile
-
-import numpy as np
-
-from tensorflow.contrib.tpu.python.tpu import tpu
-from tensorflow.contrib.tpu.python.tpu import tpu_config
-from tensorflow.contrib.tpu.python.tpu import tpu_estimator
-from tensorflow.core.protobuf import config_pb2
-from tensorflow.python.client import session as tf_session
-from tensorflow.python.estimator import model_fn as model_fn_lib
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import random_seed
-from tensorflow.python.framework import test_util
-from tensorflow.python.ops import gen_array_ops
-from tensorflow.python.ops import variables
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training import saver as tf_saver
-
-
-def has_tpu():
-  """Check if a TPU device is available.
-
-  Device enumeration via `device_lib` currently fails for TPU systems.
-  (http://b/68333779).  To work around this, we determine the existence of a
-  TPU by a successful call to `initialize_system`.
-
-  Returns:
-    boolean, True if a TPU device is available, otherwise False.
-  """
-
-  def _check():
-    with tf_session.Session() as sess:
-      sess.run(tpu.initialize_system())
-      sess.run(tpu.shutdown_system())
-
-  try:
-    _check()
-    return True
-  except errors.OpError as _:
-    return False
-
-
-def _available_devices():
-  devices = ["cpu"]
-  if not test_util.gpu_device_name():
-    devices.append("gpu")
-
-  if has_tpu():
-    devices.append("tpu")
-
-  return tuple(devices)
-
-
-def copy_dir(src, tgt):
-  """Copy src to tgt."""
-  gfile.MakeDirs(tgt)
-  seen_dirs = set()
-  for dirname, _, files in gfile.Walk(src):
-    for f in files:
-      src_f = os.path.join(dirname, f)
-      tgt_f = src_f.replace(src, tgt)
-      tgt_d = os.path.dirname(tgt_f)
-      if tgt_d not in seen_dirs:
-        gfile.MkDir(tgt_d)
-        seen_dirs.add(tgt_d)
-      gfile.Copy(src_f, tgt_f, overwrite=True)
-
-
-def compare_model(model_fn,
-                  input_fn,
-                  params,
-                  master="local",
-                  temp_dir=None,
-                  num_shards=2,
-                  tolerance=1e-4):
-  """Compare the results of running `model_fn` on the TPU and CPU."""
-  if not temp_dir:
-    temp_dir = tempfile.mkdtemp()
-
-  cpu_model_dir = "%s/cpu-model" % temp_dir
-  tpu_model_dir = "%s/tpu-model" % temp_dir
-  initial_model_dir = "%s/initial-model" % temp_dir
-
-  logging.info("Checkpoints and weights will be written to %s", temp_dir)
-
-  num_steps = 1
-
-  def _model_adapter(features, labels, mode, params):
-    """Run users model function with random seeds fixed to known values."""
-    random_seed.set_random_seed(0)
-    np.random.seed(0)
-    return model_fn(features, labels, mode, params)
-
-  def _input_adapter(params):
-    random_seed.set_random_seed(0)
-    np.random.seed(0)
-    return input_fn(params)
-
-  def _make_run_config(model_dir):
-    return tpu_config.RunConfig(
-        master=master,
-        model_dir=model_dir,
-        save_checkpoints_secs=10000,
-        session_config=config_pb2.ConfigProto(
-            allow_soft_placement=True, log_device_placement=False),
-        tpu_config=tpu_config.TPUConfig(
-            iterations_per_loop=num_steps,
-            num_shards=num_shards,
-        ),
-    )
-
-  def _make_estimator(use_tpu, model_dir):
-    return tpu_estimator.TPUEstimator(
-        model_fn=_model_adapter,
-        use_tpu=use_tpu,
-        config=_make_run_config(model_dir),
-        train_batch_size=num_shards,
-        params=dict(params, use_tpu=use_tpu),
-    )
-
-  def _extract_weights(checkpoint):
-    """Extract model weights from the given checkpoint file."""
-    weights = {}
-    graph = ops.Graph()
-    with graph.as_default():
-      features, labels = _input_adapter(dict(params, batch_size=num_shards))
-      model_fn(
-          features, labels,
-          params=dict(params, use_tpu=False),
-          mode=model_fn_lib.ModeKeys.TRAIN)
-      saver = tf_saver.Saver()
-      with tf_session.Session(graph=graph) as sess:
-        saver.restore(sess, checkpoint)
-        all_vars = []
-        all_vars.extend(graph.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))
-        all_vars.extend(graph.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES))
-        all_vars.extend(graph.get_collection(ops.GraphKeys.MODEL_VARIABLES))
-
-        for var in all_vars:
-          weights[var.name] = sess.run(var)
-    return weights
-
-  def _run_step(use_tpu, model_dir):
-    """Create an estimator and run a single step on the given device."""
-    tf_session.Session.reset(target=master)
-
-    logging.info("Running step.  TPU=%d.  model_dir=%s", use_tpu, model_dir)
-    est = _make_estimator(use_tpu=use_tpu, model_dir=model_dir)
-    est.train(input_fn=_input_adapter, steps=num_steps)
-    weights = _extract_weights(est.latest_checkpoint())
-    with gfile.Open(os.path.join(temp_dir, "tpu-%d.weights" % use_tpu),
-                    "wb") as f:
-      f.write(pickle.dumps(weights))
-    return weights
-
-  # initialize models to the same weights by running a single step on the CPU
-  _run_step(use_tpu=False, model_dir=initial_model_dir)
-
-  copy_dir(initial_model_dir, cpu_model_dir)
-  copy_dir(initial_model_dir, tpu_model_dir)
-
-  cpu_weights = _run_step(use_tpu=False, model_dir=cpu_model_dir)
-  tpu_weights = _run_step(use_tpu=True, model_dir=tpu_model_dir)
-
-  bad_weights = False
-  for k in cpu_weights:
-    if k not in tpu_weights:
-      raise KeyError("Missing weight %s from TPU checkpoint.", k)
-
-    if not np.allclose(
-        cpu_weights[k], tpu_weights[k], rtol=tolerance, atol=tolerance):
-      bad_weights = True
-      logging.error("Weights for layer %s have diverged.", k)
-
-  if bad_weights:
-    raise ValueError("Some weights have diverged.  Output pickle files have "
-                     "been written to %s for inspection." % temp_dir)
-
-
-class TPUTestCase(test_util.TensorFlowTestCase):
-  """Adds helpers for testing on TPU devices to `TensorFlowTestCase`.
-
-  Example usage:
-
-  ```
-  def model_fn(features):
-    return tf.reduce_sum(features * 2)
-
-  class ModelTests(test_util.TPUTestCase):
-    def test_sum(self):
-      v = np.random.randn(10, 10).astype("float32")
-      self.assert_device_output(model_fn, [v], (v*2).sum(),
-                                devices=("cpu", "tpu"))
-  ```
-  """
-
-  def __init__(self, methodName="runTest"):  # pylint: disable=invalid-name
-    super(TPUTestCase, self).__init__(methodName)
-    self._available_devices = _available_devices()
-
-  def run_on_device(self, model_fn, model_inputs, device):
-    """Runs `model_fn` on the given device.
-
-    Raises an exception if no such device is available.  `model_fn` should
-    return one or more tensors as a list or tuple.
-
-    Args:
-      model_fn: Function returning one or more tensors.
-      model_inputs: An iterable of Numpy arrays or scalars.
-                    These will be passed as arguments to `model_fn`.
-      device: Device to run on.  One of ("tpu", "gpu", "cpu").
-
-    Returns:
-      Output from the model function.
-    """
-
-    def _make_placeholders():
-      return dict([(gen_array_ops.placeholder_with_default(v, v.shape), v)
-                   for v in model_inputs])
-
-    if device == "tpu":
-      with self.test_session(graph=ops.Graph()) as sess:
-        placeholders = _make_placeholders()
-        tpu_computation = tpu.rewrite(model_fn, placeholders.keys())
-        sess.run(tpu.initialize_system())
-        sess.run(variables.global_variables_initializer())
-        result = sess.run(tpu_computation, placeholders)
-        sess.run(tpu.shutdown_system())
-        # TODO(b/36891278): supports non-flat returns lists in tpu.rewrite().
-        if len(result) == 1:
-          return result[0]
-        return result
-    elif device == "gpu":
-      with self.test_session(graph=ops.Graph(), use_gpu=True) as sess:
-        placeholders = _make_placeholders()
-        sess.run(variables.global_variables_initializer())
-        return sess.run(model_fn(placeholders.keys()), placeholders)
-    elif device == "cpu":
-      # TODO(power) -- will this interact poorly with cached GPU sessions?
-      with self.test_session(graph=ops.Graph(), use_gpu=False) as sess:
-        placeholders = _make_placeholders()
-        sess.run(variables.global_variables_initializer())
-        return sess.run(model_fn(placeholders.keys()), placeholders)
-
-  def _compare_values(self, actual_outputs, expected_outputs):
-    if isinstance(expected_outputs, (list, tuple)):
-      for a, b in zip(actual_outputs, expected_outputs):
-        self.assertAllCloseAccordingToType(a, b)
-    else:
-      self.assertAllCloseAccordingToType(actual_outputs, expected_outputs)
-
-  def assert_device_output(self,
-                           model_fn,
-                           model_inputs,
-                           expected_outputs,
-                           devices=("cpu", "gpu", "tpu")):
-    """Run `model_fn` on the given devices.
-
-    Results are compared via `assertAllCloseAccordingToType`.
-
-    Args:
-      model_fn: Function returning one or more tensors
-      model_inputs: Numpy arrays or scalars passed as arguments to model_fn
-      expected_outputs: Numpy arrays or scalars to compare against.
-      devices: Set of devices to run on.  If a device is not available, tests
-               will be skipped for that device.
-    """
-    devices = set(devices).intersection(self._available_devices)
-
-    for device in devices:
-      device_out = self.run_on_device(model_fn, model_inputs, device=device)
-      self._compare_values(device_out, expected_outputs)
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu.py b/tensorflow/contrib/tpu/python/tpu/tpu.py
index 7fb8a33698fdd2b37f42464e934331de65904bfe..8fec379aad8a90d06cd05f4858d25656384a12b2 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu.py
@@ -30,6 +30,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.util import compat
 
 
 # Operations that indicate some error in the users graph, e.g. a placeholder
@@ -52,6 +53,10 @@ _NOT_IMPLEMENTED_OPS = set([
     "TensorSummaryV2",
     ])
 
+_MAX_WARNING_LINES = 5
+
+_TPU_REPLICATE_ATTR = "_tpu_replicate"
+
 
 def _tpu_system_device_name(job):
   """Returns the device name for the TPU_SYSTEM device of `job`."""
@@ -101,7 +106,7 @@ def core(num):
   return "device:TPU_REPLICATED_CORE:{}".format(num)
 
 
-class TPUReplicateContext(control_flow_ops.ControlFlowContext):
+class TPUReplicateContext(control_flow_ops.XLAControlFlowContext):
   """A `ControlFlowContext` for nodes inside a TPU computation.
 
   The primary role of `TPUReplicateContext` is to mark operators inside a
@@ -117,8 +122,19 @@ class TPUReplicateContext(control_flow_ops.ControlFlowContext):
   """
 
   def __init__(self, name):
-    control_flow_ops.ControlFlowContext.__init__(self)
+    super(TPUReplicateContext, self).__init__()
     self._name = name
+    self._unsupported_ops = []
+
+  def report_unsupported_operations(self):
+    if self._unsupported_ops:
+      op_str = "\n".join(["  %s (%s)" % (op.type, op.name)
+                          for op in self._unsupported_ops[:_MAX_WARNING_LINES]])
+      logging.warning("%d unsupported operations found: \n%s",
+                      len(self._unsupported_ops), op_str)
+      if len(self._unsupported_ops) > _MAX_WARNING_LINES:
+        logging.warning("... and %d more" %
+                        (len(self._unsupported_ops) - _MAX_WARNING_LINES))
 
   def AddOp(self, op):
     self._AddOpInternal(op)
@@ -126,21 +142,21 @@ class TPUReplicateContext(control_flow_ops.ControlFlowContext):
   def _AddOpInternal(self, op):
     # pylint: disable=protected-access
     if op.type in _BLACKLISTED_OPS:
-      raise ValueError("Operation of type %s (%s) is not supported on the TPU" %
-                       (op.type, op.name))
+      logging.error("Operation of type %s (%s) is not supported on the TPU. "
+                    "Execution will fail if this op is used in the graph. " %
+                    (op.type, op.name))
 
     if op.type in _NOT_IMPLEMENTED_OPS:
-      logging.warning(
-          "Operation %s (%s) is not currently supported", op.type, op.name)
+      self._unsupported_ops.append(op)
 
     if any(x.dtype._is_ref_dtype for x in op.inputs):
       raise NotImplementedError(
           "Non-resource Variables are not supported inside TPU computations "
           "(operator name: %s)" % op.name)
     # pylint: enable=protected-access
-    if "_tpu_replicate" in op.node_def.attr:
+    if _TPU_REPLICATE_ATTR in op.node_def.attr:
       raise ValueError("TPU computations cannot be nested")
-    op.node_def.attr["_tpu_replicate"].s = self._name
+    op.node_def.attr[_TPU_REPLICATE_ATTR].s = compat.as_bytes(self._name)
     op.graph.prevent_feeding(op)
     op.graph.prevent_fetching(op)
 
@@ -344,6 +360,7 @@ def replicate(computation,
           new_output_tensors.append(array_ops.identity(t))
       output_tensors = new_output_tensors
     finally:
+      context.report_unsupported_operations()
       context.Exit()
 
     # Fan-out: Builds a TPUReplicatedOutput node for each output.
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_config.py b/tensorflow/contrib/tpu/python/tpu/tpu_config.py
index 916b9b3082fc197694933bdd6042706891be115c..0c2580211ab7674d841ca1953c9327df9488bb8e 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_config.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_config.py
@@ -20,9 +20,19 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+import json
+import os
 
 from tensorflow.contrib.tpu.python.tpu import util as util_lib
 from tensorflow.python.estimator import run_config as run_config_lib
+from tensorflow.python.platform import tf_logging as logging
+
+# pylint: disable=protected-access
+_TF_CONFIG_ENV = run_config_lib._TF_CONFIG_ENV
+_SERVICE_KEY = run_config_lib._SERVICE_KEY
+_TPU_WORKER_JOB_NAME = 'tpu_worker_job_name'
+
+# pylint: enable=protected-access
 
 
 class TPUConfig(
@@ -31,6 +41,7 @@ class TPUConfig(
         'num_shards',
         'per_host_input_for_training',
         'tpu_job_name',
+        'initial_infeed_sleep_secs',
     ])):
   """TPU related configuration required by `TPUEstimator`.
 
@@ -50,13 +61,17 @@ class TPUConfig(
       within TPUEstimator, however when using ClusterSpec propagation in more
       esoteric cluster configurations, you may need to specify the job name as a
       string.
+    initial_infeed_sleep_secs: The number of seconds the infeed thread should
+      wait before enqueueing the first batch. This helps avoid timeouts for
+      models that require a long compilation time.
   """
 
   def __new__(cls,
               iterations_per_loop=2,
               num_shards=2,
               per_host_input_for_training=True,
-              tpu_job_name=None):
+              tpu_job_name=None,
+              initial_infeed_sleep_secs=None):
 
     # Check iterations_per_loop.
     util_lib.check_positive_integer(iterations_per_loop,
@@ -64,18 +79,30 @@ class TPUConfig(
 
     # Check num_shards.
     util_lib.check_positive_integer(num_shards, 'TPUConfig num_shards')
+
+    # Check initial_infeed_sleep_secs.
+    if initial_infeed_sleep_secs:
+      util_lib.check_positive_integer(initial_infeed_sleep_secs,
+                                      'TPUConfig initial_infeed_sleep_secs')
+
+    tpu_job_name = tpu_job_name or _get_tpu_job_name_from_tf_config()
+
     return super(TPUConfig, cls).__new__(
         cls,
         iterations_per_loop=iterations_per_loop,
         num_shards=num_shards,
         per_host_input_for_training=per_host_input_for_training,
-        tpu_job_name=tpu_job_name)
+        tpu_job_name=tpu_job_name,
+        initial_infeed_sleep_secs=initial_infeed_sleep_secs)
 
 
 class RunConfig(run_config_lib.RunConfig):
   """RunConfig with TPU support."""
 
-  def __init__(self, tpu_config=None, evaluation_master=None, master='',
+  def __init__(self,
+               tpu_config=None,
+               evaluation_master=None,
+               master=None,
                **kwargs):
     """Constructs a RunConfig.
 
@@ -89,11 +116,23 @@ class RunConfig(run_config_lib.RunConfig):
     """
     super(RunConfig, self).__init__(**kwargs)
     self._tpu_config = tpu_config or TPUConfig()
-    if evaluation_master is None:
-      self._evaluation_master = master
-    else:
+
+    # If user sets master and/or evaluation_master explicilty, including empty
+    # string '', take it. Otherwise, take the values set by parent class.
+    if master is not None:
+      self._master = master
+
+    if evaluation_master is not None:
       self._evaluation_master = evaluation_master
-    self._master = master
+    elif (not self._evaluation_master and
+          self.task_type != run_config_lib.TaskType.EVALUATOR):
+      # If the task type is EVALUATOR, it means some cluster manager sets the
+      # TF_CONFIG. In that case, we respect the configuration in TF_CONFIG.
+      #
+      # Otherwise, it means user executes the code without external cluster
+      # manager. For that, we optimize the user experience by setting
+      # evaluation_master to master, unless user overwrites it.
+      self._evaluation_master = self._master
 
   @property
   def evaluation_master(self):
@@ -115,3 +154,14 @@ class RunConfig(run_config_lib.RunConfig):
     new_instance = super(RunConfig, self).replace(**kwargs)
     new_instance._tpu_config = tpu_config  # pylint: disable=protected-access
     return new_instance
+
+
+def _get_tpu_job_name_from_tf_config():
+  """Extracts the TPU job name from TF_CONFIG env variable."""
+  # TODO(xiejw): Extends this to support both TF_CONFIG env variable and cluster
+  # spec propagation.
+  tf_config = json.loads(os.environ.get(_TF_CONFIG_ENV, '{}'))
+  tpu_job_name = tf_config.get(_SERVICE_KEY, {}).get(_TPU_WORKER_JOB_NAME)
+  if tpu_job_name:
+    logging.info('Load TPU job name from TF_CONFIG: %s', tpu_job_name)
+  return tpu_job_name
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_config_test.py b/tensorflow/contrib/tpu/python/tpu/tpu_config_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..60884aa32f932413b49ea2193a145828489ea04c
--- /dev/null
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_config_test.py
@@ -0,0 +1,134 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TPU RunConfig tests."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+
+from tensorflow.contrib.tpu.python.tpu import tpu_config as tpu_config_lib
+from tensorflow.python.estimator import run_config as run_config_lib
+from tensorflow.python.platform import test
+
+
+def _set_tf_config_env_variable(tf_config):
+  return test.mock.patch.dict('os.environ', {
+      'TF_CONFIG': json.dumps(tf_config)
+  })
+
+
+class TPURunConfigTest(test.TestCase):
+
+  def test_fail_with_invalid_num_shards(self):
+    with self.assertRaisesRegexp(ValueError, 'must be positive'):
+      tpu_config_lib.RunConfig(
+          tpu_config=tpu_config_lib.TPUConfig(num_shards=0))
+
+  def test_fail_with_iterations_per_loop(self):
+    with self.assertRaisesRegexp(ValueError, 'must be positive'):
+      tpu_config_lib.RunConfig(
+          tpu_config=tpu_config_lib.TPUConfig(iterations_per_loop=0))
+
+
+class TPURunConfigMasterTest(test.TestCase):
+
+  def test_default_values(self):
+    run_config = tpu_config_lib.RunConfig()
+    self.assertEqual('', run_config.master)
+    self.assertEqual('', run_config.evaluation_master)
+
+  def test_user_provided_master_and_evaluation_master(self):
+    run_config = tpu_config_lib.RunConfig(
+        master='_master_123', evaluation_master='_eval_master_123')
+    self.assertEqual('_master_123', run_config.master)
+    self.assertEqual('_eval_master_123', run_config.evaluation_master)
+
+  def test_evaluation_master_defaults_to_master(self):
+    run_config = tpu_config_lib.RunConfig(master='_master_123')
+    self.assertEqual('_master_123', run_config.master)
+    self.assertEqual('_master_123', run_config.evaluation_master)
+
+  def test_tf_config(self):
+    tf_config = {
+        'session_master': '_master_123',
+        'eval_session_master': '_eval_master_123'
+    }
+    with _set_tf_config_env_variable(tf_config):
+      run_config = tpu_config_lib.RunConfig()
+      self.assertEqual('_master_123', run_config.master)
+      self.assertEqual('_eval_master_123', run_config.evaluation_master)
+
+  def test_evaluation_master_defaults_to_master_in_tf_config(self):
+    tf_config = {
+        'session_master': '_master_123',
+    }
+    with _set_tf_config_env_variable(tf_config):
+      run_config = tpu_config_lib.RunConfig()
+      self.assertEqual('_master_123', run_config.master)
+      self.assertEqual('_master_123', run_config.evaluation_master)
+
+  def test_respect_evaluation_master_in_tf_config(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+        },
+        'task': {
+            'type': run_config_lib.TaskType.EVALUATOR,
+            'index': 0
+        },
+    }
+    with _set_tf_config_env_variable(tf_config):
+      run_config = tpu_config_lib.RunConfig(master='_something')
+      self.assertEqual('', run_config.evaluation_master)
+
+  def test_user_overwrites_tf_config(self):
+    tf_config = {
+        'session_master': '_master_123',
+        'eval_session_master': '_eval_master_123'
+    }
+    with _set_tf_config_env_variable(tf_config):
+      run_config = tpu_config_lib.RunConfig(
+          master='_new_master_123', evaluation_master='_new_eval_master_123')
+      self.assertEqual('_new_master_123', run_config.master)
+      self.assertEqual('_new_eval_master_123', run_config.evaluation_master)
+
+  def test_user_overwrites_master_in_tf_config(self):
+    tf_config = {
+        'session_master': '_master_123',
+        'eval_session_master': '_eval_master_123'
+    }
+    with _set_tf_config_env_variable(tf_config):
+      run_config = tpu_config_lib.RunConfig(master='_new_master_123')
+      self.assertEqual('_new_master_123', run_config.master)
+      self.assertEqual('_eval_master_123', run_config.evaluation_master)
+
+
+class TPUJobNameTest(test.TestCase):
+
+  def test_default_name(self):
+    config = tpu_config_lib.RunConfig()
+    self.assertIsNone(config.tpu_config.tpu_job_name)
+
+  def test_with_tf_config(self):
+    tf_config = {'service': {'tpu_worker_job_name': '_my_new_name',}}
+    with _set_tf_config_env_variable(tf_config):
+      config = tpu_config_lib.RunConfig()
+      self.assertEqual('_my_new_name', config.tpu_config.tpu_job_name)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index fe17664d7f4afd033a795f22ebc1bc5819b7d108..bb35f4ece6ea7ebfd0db0332c6e8f2d2e2eb9f81 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -27,15 +27,16 @@ import time
 
 import six
 from six.moves import queue as Queue  # pylint: disable=redefined-builtin
+from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.contrib.tpu.python.ops import tpu_ops
 from tensorflow.contrib.tpu.python.tpu import tpu
 from tensorflow.contrib.tpu.python.tpu import tpu_config
 from tensorflow.contrib.tpu.python.tpu import tpu_feed
-from tensorflow.contrib.tpu.python.tpu import tpu_function
 from tensorflow.contrib.tpu.python.tpu import training_loop
 from tensorflow.contrib.tpu.python.tpu import util as util_lib
 
+from tensorflow.core.framework.summary_pb2 import Summary
 from tensorflow.core.protobuf import config_pb2
 
 from tensorflow.python.estimator import estimator as estimator_lib
@@ -53,6 +54,7 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.summary import summary
+from tensorflow.python.training import basic_session_run_hooks
 from tensorflow.python.training import evaluation
 from tensorflow.python.training import session_run_hook
 from tensorflow.python.training import training
@@ -216,6 +218,15 @@ class _TPUContext(object):
             (mode == model_fn_lib.ModeKeys.EVAL and
              self._eval_batch_size is None))
 
+  @property
+  def global_batch_size(self):
+    mode = self._assert_mode()
+    if mode == model_fn_lib.ModeKeys.EVAL and self._eval_batch_size is None:
+      raise RuntimeError('Internal error, EVAL on TPU is not enabled, but '
+                         '`global_batch_size` is called.')
+    return (self._train_batch_size
+            if mode == model_fn_lib.ModeKeys.TRAIN else self._eval_batch_size)
+
   @property
   def batch_size_for_input_fn(self):
     """Returns the shard batch size for `input_fn`."""
@@ -365,13 +376,17 @@ class TPUEstimatorSpec(collections.namedtuple('TPUEstimatorSpec', [
     'loss',
     'train_op',
     'eval_metrics',
-    'export_outputs'])):
+    'export_outputs',
+    'scaffold_fn'])):
   """Ops and objects returned from a `model_fn` and passed to `TPUEstimator`.
 
   See `EstimatorSpec` for `mode`, 'predictions, 'loss', 'train_op', and
   'export_outputs`.
 
-  TPU evaluation expects a slightly different signature from the
+  For evaluation, `eval_metrics `is a tuple of `metric_fn` and `tensors`, where
+  `metric_fn` runs on CPU to generate metrics and `tensors` represents the
+  `Tensor`s transferred from TPU system to CPU host and passed to `metric_fn`.
+  To be precise, TPU evaluation expects a slightly different signature from the
   ${tf.estimator.Estimator}. While `EstimatorSpec.eval_metric_ops` expects a
   dict, `TPUEstimatorSpec.eval_metrics` is a tuple of `metric_fn` and `tensors`.
   The `tensors` could be a list of `Tensor`s or dict of names to `Tensor`s. The
@@ -382,9 +397,11 @@ class TPUEstimatorSpec(collections.namedtuple('TPUEstimatorSpec', [
   to the `metric_fn` if `tensors` is list or keyword arguments if `tensors` is
   dict. `metric_fn` takes the `tensors` and returns a dict from metric string
   name to the result of calling a metric function, namely a `(metric_tensor,
-  update_op)` tuple.
+  update_op)` tuple. See `TPUEstimator` for MNIST example how to specify the
+  `eval_metrics`.
 
-  See `TPUEstimator` for MNIST example how to specify the `eval_metrics`.
+  `scaffold_fn` is a function running on CPU to generate the `Scaffold`. This
+  function should not capture any Tensors in `model_fn`.
   """
 
   def __new__(cls,
@@ -393,7 +410,8 @@ class TPUEstimatorSpec(collections.namedtuple('TPUEstimatorSpec', [
               loss=None,
               train_op=None,
               eval_metrics=None,
-              export_outputs=None):
+              export_outputs=None,
+              scaffold_fn=None):
     """Creates a validated `TPUEstimatorSpec` instance."""
     if eval_metrics is not None:
       _EvalMetrics.validate(eval_metrics)
@@ -403,18 +421,21 @@ class TPUEstimatorSpec(collections.namedtuple('TPUEstimatorSpec', [
                                                 loss=loss,
                                                 train_op=train_op,
                                                 eval_metrics=eval_metrics,
-                                                export_outputs=export_outputs)
+                                                export_outputs=export_outputs,
+                                                scaffold_fn=scaffold_fn)
 
   def as_estimator_spec(self):
     """Creates an equivalent `EstimatorSpec` used by CPU train/eval."""
     eval_metric_ops = _EvalMetrics.to_metric_metric_ops_for_cpu(
         self.eval_metrics)
+    scaffold = self.scaffold_fn() if self.scaffold_fn else None
     return model_fn_lib.EstimatorSpec(mode=self.mode,
                                       predictions=self.predictions,
                                       loss=self.loss,
                                       train_op=self.train_op,
                                       eval_metric_ops=eval_metric_ops,
-                                      export_outputs=self.export_outputs)
+                                      export_outputs=self.export_outputs,
+                                      scaffold=scaffold)
 
 
 class _InfeedOutfeedThreadBaseController(object):
@@ -467,13 +488,20 @@ class _OutfeedThreadController(_InfeedOutfeedThreadBaseController):
 class _InfeedThreadController(_InfeedOutfeedThreadBaseController):
   """This wraps the infeed thread and stops when Estimator finishes."""
 
-  def __init__(self, session, enqueue_ops):
+  def __init__(self, session, enqueue_ops, initial_infeed_sleep_secs):
     super(_InfeedThreadController, self).__init__(
-        threading.Thread(target=self._input_thread_fn_for_loading,
-                         args=(session, enqueue_ops)))
+        threading.Thread(
+            target=self._input_thread_fn_for_loading,
+            args=(session, enqueue_ops, initial_infeed_sleep_secs)))
 
-  def _input_thread_fn_for_loading(self, session, enqueue_ops):
+  def _input_thread_fn_for_loading(self, session, enqueue_ops,
+                                   initial_infeed_sleep_secs):
     count = 0
+    if initial_infeed_sleep_secs:
+      logging.info('Infeed thread sleeping for %d seconds.',
+                   initial_infeed_sleep_secs)
+      time.sleep(initial_infeed_sleep_secs)
+      logging.info('Infeed thread starting after sleep')
     try:
       while True:
         signal = self._signal_queue.get()
@@ -514,6 +542,7 @@ class _InfeedThreadController(_InfeedOutfeedThreadBaseController):
           exc_info=1
       )
       time.sleep(120)
+      logging.error('Closing the failed session.')
       session.close()
 
   def join(self):
@@ -534,6 +563,8 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook):
     self._master_job = ctx.master_job
     self._enqueue_ops = enqueue_ops
     self._dequeue_ops = dequeue_ops
+    self._initial_infeed_sleep_secs = (
+        ctx.config.tpu_config.initial_infeed_sleep_secs)
 
   def begin(self):
     logging.info('TPU job name %s', self._master_job)
@@ -548,7 +579,7 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook):
 
     logging.info('Start infeed thread controller')
     self._infeed_thd_controller = _InfeedThreadController(
-        session, self._enqueue_ops)
+        session, self._enqueue_ops, self._initial_infeed_sleep_secs)
 
     if self._dequeue_ops is not None:
       logging.info('Start outfeed thread controller')
@@ -669,7 +700,7 @@ class _SetEvalIterationsHook(session_run_hook.SessionRunHook):
 def generate_per_core_enqueue_ops_fn_for_host(
     ctx, input_fn, inputs_structure_recorder):
   """Generates infeed enqueue ops for per-core input_fn on a single host."""
-  infeed_queue_holder = {'instance': None}
+  captured_infeed_queue = _CapturedObject()
 
   def enqueue_ops_fn():
     """A fn returns enqueue_ops."""
@@ -692,7 +723,7 @@ def generate_per_core_enqueue_ops_fn_for_host(
 
     infeed_queue = tpu_feed.InfeedQueue(
         number_of_tuple_elements=len(per_host_sharded_inputs[0]))
-    infeed_queue_holder['instance'] = infeed_queue
+    captured_infeed_queue.capture(infeed_queue)
     infeed_queue.set_configuration_from_sharded_input_tensors(
         per_host_sharded_inputs)
 
@@ -700,13 +731,13 @@ def generate_per_core_enqueue_ops_fn_for_host(
         per_host_sharded_inputs,
         tpu_ordinal_function=ctx.tpu_ordinal_function)
     return per_host_enqueue_ops
-  return enqueue_ops_fn, (lambda: infeed_queue_holder['instance'])
+  return enqueue_ops_fn, captured_infeed_queue
 
 
 def generate_per_host_enqueue_ops_fn_for_host(
     ctx, input_fn, inputs_structure_recorder, batch_axis, device):
   """Generates infeed enqueue ops for per-host input_fn on a single host."""
-  infeed_queue_holder = {'instance': None}
+  captured_infeed_queue = _CapturedObject()
 
   def enqueue_ops_fn():
     with ops.device(device):
@@ -726,7 +757,7 @@ def generate_per_host_enqueue_ops_fn_for_host(
           tuple_types=[t.dtype for t in unsharded_tensor_list],
           tuple_shapes=[t.shape for t in unsharded_tensor_list],
           shard_dimensions=batch_axis)
-      infeed_queue_holder['instance'] = infeed_queue
+      captured_infeed_queue.capture(infeed_queue)
       infeed_queue.set_number_of_shards(num_cores_per_host)
 
       per_host_enqueue_ops = (
@@ -734,7 +765,7 @@ def generate_per_host_enqueue_ops_fn_for_host(
               unsharded_tensor_list,
               placement_function=lambda x: device))
       return per_host_enqueue_ops
-  return enqueue_ops_fn, (lambda: infeed_queue_holder['instance'])
+  return enqueue_ops_fn, captured_infeed_queue
 
 
 class _InputPipeline(object):
@@ -924,7 +955,7 @@ class _InputPipeline(object):
         host_device = tpu_host_placement_fn(host_id=host_id)
         with ops.device(host_device):
           with ops.name_scope('input_pipeline_task%d' % (host_id)):
-            enqueue_ops_fn, infeed_queue_getter = (
+            enqueue_ops_fn, captured_infeed_queue = (
                 generate_per_core_enqueue_ops_fn_for_host(
                     self._ctx, self._input_fn, self._inputs_structure_recorder))
 
@@ -934,14 +965,14 @@ class _InputPipeline(object):
             else:
               enqueue_ops.append(enqueue_ops_fn())
             # Infeed_queue_getter must be called after enqueue_ops_fn is called.
-            infeed_queues.append(infeed_queue_getter())
+            infeed_queues.append(captured_infeed_queue.get())
 
     else:
       for host_id in range(num_hosts):
         host_device = tpu_host_placement_fn(host_id=host_id)
         with ops.device(host_device):
           with ops.name_scope('input_pipeline_task%d' % (host_id)):
-            enqueue_ops_fn, infeed_queue_getter = (
+            enqueue_ops_fn, captured_infeed_queue = (
                 generate_per_host_enqueue_ops_fn_for_host(
                     self._ctx, self._input_fn, self._inputs_structure_recorder,
                     self._batch_axis, host_device))
@@ -951,7 +982,7 @@ class _InputPipeline(object):
                   device=host_device, op_fn=enqueue_ops_fn))
             else:
               enqueue_ops.append(enqueue_ops_fn())
-            infeed_queues.append(infeed_queue_getter())
+            infeed_queues.append(captured_infeed_queue.get())
     # infeed_queue is used to generate dequeue ops. The only thing it uses for
     # dequeue is dtypes and types. So, any one can be used. Here, grab the
     # first one.
@@ -992,10 +1023,7 @@ class _ModelFnWrapper(object):
     self._ctx = ctx
 
   def call_without_tpu(self, features, labels):
-    # Let CrossShardOptimizer be called without TPU in model_fn, since it's
-    # common to set the train_op even when running evaluate() or predict().
-    with tpu_function.tpu_shard_context(1):
-      return self._call_model_fn(features, labels)
+    return self._call_model_fn(features, labels)
 
   def convert_to_single_tpu_train_step(self, dequeue_fn):
     """Converts user provided model_fn` as a single train step on TPU.
@@ -1019,6 +1047,8 @@ class _ModelFnWrapper(object):
       A Fn representing the train step for TPU.
     """
 
+    captured_scaffold_fn = _CapturedObject()
+
     def train_step(loss):
       """Training step function for use inside a while loop."""
       del loss  # unused; required in function signature.
@@ -1027,9 +1057,15 @@ class _ModelFnWrapper(object):
       estimator_spec = self._verify_estimator_spec(
           self._call_model_fn(features, labels))
       loss, train_op = estimator_spec.loss, estimator_spec.train_op
+
+      if isinstance(estimator_spec, TPUEstimatorSpec):
+        captured_scaffold_fn.capture(estimator_spec.scaffold_fn)
+      else:
+        captured_scaffold_fn.capture(None)
+
       with ops.control_dependencies([train_op]):
         return array_ops.identity(loss)
-    return train_step
+    return train_step, captured_scaffold_fn
 
   def convert_to_single_tpu_eval_step(self, dequeue_fn):
     """Converts user provided model_fn` as a single eval step on TPU.
@@ -1058,6 +1094,7 @@ class _ModelFnWrapper(object):
       step for TPU. and eval_metrics is an `_EvalMetrics` instance.
     """
     eval_metrics = _EvalMetrics(self._ctx)
+    captured_scaffold_fn = _CapturedObject()
 
     def eval_step(total_loss):
       """Evaluation step function for use inside a while loop."""
@@ -1070,12 +1107,13 @@ class _ModelFnWrapper(object):
             '`TPUEstimatorSpec`. Got {}'.format(type(tpu_estimator_spec)))
 
       loss = tpu_estimator_spec.loss
+      captured_scaffold_fn.capture(tpu_estimator_spec.scaffold_fn)
       eval_metrics.record(tpu_estimator_spec)
       outfeed_ops = tpu_ops.outfeed_enqueue_tuple(eval_metrics.outfeed_tensors)
 
       with ops.control_dependencies([outfeed_ops]):
         return math_ops.add(total_loss, loss)
-    return eval_step, eval_metrics
+    return eval_step, eval_metrics, captured_scaffold_fn
 
   def _call_model_fn(self, features, labels):
     """Calls the model_fn with required parameters."""
@@ -1129,6 +1167,10 @@ class _ModelFnWrapper(object):
       raise ValueError(err_msg.format('training_hooks'))
     if estimator_spec.evaluation_hooks:
       raise ValueError(err_msg.format('evaluation_hooks'))
+
+    if estimator_spec.scaffold:
+      logging.warning('EstimatorSpec.Scaffold is ignored by TPU train/eval. '
+                      'Please use TPUEstimatorSpec.')
     return estimator_spec
 
 
@@ -1286,6 +1328,31 @@ class _EvalMetrics(object):
     return eval_metric_ops, eval_update_ops
 
 
+class ExamplesPerSecondHook(basic_session_run_hooks.StepCounterHook):
+  """Count examples during runtime."""
+
+  def __init__(self,
+               batch_size,
+               every_n_steps=100,
+               every_n_secs=None,
+               output_dir=None,
+               summary_writer=None):
+    self._batch_size = batch_size
+    super(ExamplesPerSecondHook, self).__init__(
+        every_n_steps=every_n_steps,
+        every_n_secs=every_n_secs,
+        output_dir=output_dir,
+        summary_writer=summary_writer)
+
+  def _log_and_record(self, elapsed_steps, elapsed_time, global_step):
+    examples_per_sec = self._batch_size * elapsed_steps / elapsed_time
+    if self._summary_writer is not None:
+      example_summary = Summary(value=[Summary.Value(
+          tag='examples_sec', simple_value=examples_per_sec)])
+      self._summary_writer.add_summary(example_summary, global_step)
+    logging.info('examples/sec: %g', examples_per_sec)
+
+
 class TPUEstimator(estimator_lib.Estimator):
   """Estimator with TPU support.
 
@@ -1503,8 +1570,8 @@ class TPUEstimator(estimator_lib.Estimator):
     if max_steps is not None:
       util_lib.check_positive_integer(max_steps, 'Train max_steps')
 
-    return [_TPUStopAtStepHook(self._iterations_per_training_loop,
-                               steps, max_steps)]
+    return [_TPUStopAtStepHook(self._iterations_per_training_loop, steps,
+                               max_steps)]
 
   def _convert_eval_steps_to_hooks(self, steps):
     with self._ctx.with_mode(model_fn_lib.ModeKeys.EVAL) as ctx:
@@ -1516,11 +1583,11 @@ class TPUEstimator(estimator_lib.Estimator):
 
     util_lib.check_positive_integer(steps, 'Eval steps')
 
-    hooks = []
-    hooks.append(evaluation._StopAfterNEvalsHook(  # pylint: disable=protected-access
-        num_evals=steps))
-    hooks.append(_SetEvalIterationsHook(steps))
-    return hooks
+    return [
+        evaluation._StopAfterNEvalsHook(  # pylint: disable=protected-access
+            num_evals=steps),
+        _SetEvalIterationsHook(steps)
+    ]
 
   def _call_input_fn(self, input_fn, mode):
     """Calls the input function.
@@ -1597,12 +1664,16 @@ class TPUEstimator(estimator_lib.Estimator):
             input_holders.generate_infeed_enqueue_ops_and_dequeue_fn())
 
         if mode == model_fn_lib.ModeKeys.TRAIN:
-          loss = _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn)
+          loss, scaffold = (
+              _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn))
           hooks = [
               TPUInfeedOutfeedSessionHook(ctx, enqueue_ops),
+              ExamplesPerSecondHook(ctx.global_batch_size),
               training.LoggingTensorHook(
-                  {'loss': array_ops.identity(loss),
-                   'step': training.get_global_step()},
+                  {
+                      'loss': array_ops.identity(loss),
+                      'step': training.get_global_step()
+                  },
                   every_n_secs=30)
           ]
           summary.scalar(model_fn_lib.LOSS_METRIC_KEY, loss)
@@ -1616,10 +1687,11 @@ class TPUEstimator(estimator_lib.Estimator):
               mode,
               loss=loss,
               training_hooks=hooks,
-              train_op=control_flow_ops.group(*update_ops))
+              train_op=control_flow_ops.group(*update_ops),
+              scaffold=scaffold)
 
         # Now eval.
-        total_loss, eval_metric_ops = _eval_on_tpu_system(
+        total_loss, eval_metric_ops, scaffold = _eval_on_tpu_system(
             ctx, model_fn_wrapper, dequeue_fn)
         iterations_per_loop_var = _create_or_get_iterations_per_loop()
         mean_loss = math_ops.div(
@@ -1650,7 +1722,8 @@ class TPUEstimator(estimator_lib.Estimator):
             mode,
             loss=mean_loss,
             evaluation_hooks=hooks,
-            eval_metric_ops=eval_metric_ops)
+            eval_metric_ops=eval_metric_ops,
+            scaffold=scaffold)
     return _model_fn
 
 
@@ -1659,7 +1732,7 @@ def _eval_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn):
   num_cores = ctx.num_cores
   iterations_per_loop_var = _create_or_get_iterations_per_loop()
 
-  single_tpu_eval_step, eval_metric_ops = (
+  single_tpu_eval_step, eval_metric_ops, captured_scaffold_fn = (
       model_fn_wrapper.convert_to_single_tpu_eval_step(dequeue_fn))
 
   def multi_tpu_eval_steps_on_single_shard():
@@ -1672,7 +1745,9 @@ def _eval_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn):
                       inputs=[],
                       num_shards=num_cores,
                       outputs_from_all_shards=False)
-  return loss, eval_metric_ops
+
+  scaffold = _get_scaffold(captured_scaffold_fn)
+  return loss, eval_metric_ops, scaffold
 
 
 def _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn):
@@ -1680,8 +1755,8 @@ def _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn):
   num_cores = ctx.num_cores
   iterations_per_loop_var = _create_or_get_iterations_per_loop()
 
-  single_tpu_train_step = model_fn_wrapper.convert_to_single_tpu_train_step(
-      dequeue_fn)
+  single_tpu_train_step, captured_scaffold_fn = (
+      model_fn_wrapper.convert_to_single_tpu_train_step(dequeue_fn))
 
   def multi_tpu_train_steps_on_single_shard():
     return training_loop.repeat(
@@ -1694,7 +1769,9 @@ def _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn):
                       inputs=[],
                       num_shards=num_cores,
                       outputs_from_all_shards=False)
-  return loss
+
+  scaffold = _get_scaffold(captured_scaffold_fn)
+  return loss, scaffold
 
 
 def _wrap_computation_in_while_loop(device, op_fn):
@@ -1730,3 +1807,74 @@ def _validate_tpu_training_graph():
         'CrossShardOptimizer must be used for model training on TPUs.')
 
 
+class _CapturedObject(object):
+  """A placeholder to capture an object.
+
+  This is useful when we need to capture a Python object in the Tensorflow
+  control flow body function and use it outside the control flow.
+  """
+
+  def __init__(self):
+    self._object = None
+    self._captured = False
+
+  def capture(self, o):
+    if self._captured:
+      raise RuntimeError(
+          'InternalError: Object can be captured only. Please file bug .')
+
+    self._captured = True
+    self._object = o
+
+  def get(self):
+    if not self._captured:
+      raise RuntimeError(
+          'InternalError: Object is not captured properly before `get`. '
+          'Please file bug .')
+    return self._object
+
+
+def _get_scaffold(captured_scaffold_fn):
+  """Retrieves the Scaffold from `captured_scaffold_fn`."""
+  with _CapturingContext(message='Inside scaffold_fn'):
+    scaffold_fn = captured_scaffold_fn.get()
+    if scaffold_fn:
+      scaffold = scaffold_fn()
+      if scaffold is None:
+        raise ValueError(
+            'TPUEstimatorSpec.scaffold_fn returns None, which is not allowed')
+    else:
+      scaffold = None
+
+  if scaffold:
+    wrapped_finalize = scaffold.finalize
+    def _finalize():
+      with _CapturingContext('Inside Scaffold.finalize'):
+        wrapped_finalize()
+    scaffold.finalize = _finalize
+  return scaffold
+
+
+class _CapturingContext(control_flow_ops.ControlFlowContext):
+  """Tracks references to Tensors defined in TPU replication."""
+
+  def __init__(self, message):
+    control_flow_ops.ControlFlowContext.__init__(self)
+    self._message = message
+
+  def AddOp(self, op):  # pylint: disable=invalid-name
+    for c in op.inputs:
+      if tpu._TPU_REPLICATE_ATTR in c.op.node_def.attr:  # pylint: disable=protected-access
+        raise ValueError(
+            '{}: Op {} depends on TPU computation {}, '
+            'which is not allowed.'.format(self._message, op, c))
+
+  def __enter__(self):
+    # pylint: disable=protected-access
+    self._g = ops.get_default_graph()
+    self._old = self._g._get_control_flow_context()
+    self._g._set_control_flow_context(self)
+    # pylint: enable=protected-access
+
+  def __exit__(self, _, __, ___):  # pylint: disable=invalid-name
+    self._g._set_control_flow_context(self._old)  # pylint: disable=protected-access
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_optimizer.py b/tensorflow/contrib/tpu/python/tpu/tpu_optimizer.py
index a00fd1d0869ab4403d879d2fc08f2bba0a13a7a8..e76cf83e4ddcd86ab3971bcecefe2e2dc979bf63 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_optimizer.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_optimizer.py
@@ -22,6 +22,7 @@ from __future__ import print_function
 from tensorflow.contrib.tpu.python.ops import tpu_ops
 from tensorflow.contrib.tpu.python.tpu import tpu_function
 from tensorflow.python.ops.losses import losses
+from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import optimizer
 
 
@@ -74,8 +75,10 @@ class CrossShardOptimizer(optimizer.Optimizer):
     """
     num_shards = tpu_function.get_tpu_context().number_of_shards
     if num_shards is None:
-      raise ValueError("CrossShardOptimizer must be used within a "
-                       "tpu_shard_context.")
+      logging.warning(
+          "CrossShardOptimizer should be used within a tpu_shard_context, but "
+          "got unset number_of_shards. Assuming 1.")
+      num_shards = 1
     if num_shards > 1 and self._reduction == losses.Reduction.MEAN:
       scale = 1.0 / num_shards
       loss *= scale
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_test.py b/tensorflow/contrib/tpu/python/tpu/tpu_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..336d8260c3c8a5c30efa603e3faeabcc0944b8d0
--- /dev/null
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_test.py
@@ -0,0 +1,80 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""Tests for tpu_function helpers."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.tpu.python.tpu import tpu
+from tensorflow.contrib.tpu.python.tpu import tpu_feed
+from tensorflow.contrib.tpu.python.tpu import training_loop
+
+from tensorflow.python.framework import dtypes
+from tensorflow.python.layers import convolutional
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_util
+from tensorflow.python.ops import math_ops
+
+from tensorflow.python.platform import test
+
+
+class TPUContextTest(test.TestCase):
+
+  def testIsInContext(self):
+    """Test that control_flow_util can check that we're in a TPU context."""
+    z1 = array_ops.identity(1)
+    context = tpu.TPUReplicateContext(b"context")
+    context.Enter()
+    z2 = array_ops.identity(1)
+    context.Exit()
+    self.assertFalse(control_flow_util.IsInXLAContext(z1.op))
+    self.assertTrue(control_flow_util.IsInXLAContext(z2.op))
+
+
+class TPULayerRewriteTest(test.TestCase):
+
+  def testUsingInfeedQueueWithRegularizer(self):
+    """Test that Layer regularizers can reference data created in loops."""
+
+    def make_regularizer(scale):
+      return lambda inputs: scale * math_ops.reduce_sum(math_ops.square(inputs))
+
+    def training_step(inputs, scale):
+      outputs = convolutional.conv2d(
+          inputs,
+          filters=16,
+          kernel_size=(3, 3),
+          data_format="channels_first",
+          kernel_regularizer=make_regularizer(scale))
+      loss = math_ops.reduce_mean(math_ops.square(outputs))
+      return loss.op
+
+    inputs = array_ops.zeros(shape=(128, 32, 32, 16))
+    scale = array_ops.ones(shape=())
+    infeed = tpu_feed.InfeedQueue(
+        tuple_types=[dtypes.float32, dtypes.float32],
+        tuple_shapes=[inputs.shape, scale.shape])
+
+    def loop():
+      return training_loop.repeat(5, training_step, infeed_queue=infeed)
+
+    # This should not throw an error.
+    tpu.rewrite(loop)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/training/BUILD b/tensorflow/contrib/training/BUILD
index 6139c1d5838c24414549b4e2bc4722175f2d1925..cccaa2b833ee764921508a5b6d6affe0b8822ede 100644
--- a/tensorflow/contrib/training/BUILD
+++ b/tensorflow/contrib/training/BUILD
@@ -26,7 +26,6 @@ py_library(
         "python/training/resample.py",
         "python/training/sampling_ops.py",
         "python/training/sequence_queueing_state_saver.py",
-        "python/training/sgdr_learning_rate_decay.py",
         "python/training/training.py",
         "python/training/tuner.py",
     ],
diff --git a/tensorflow/contrib/training/python/__init__.py b/tensorflow/contrib/training/python/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..52e83069cb0c68b510da46149248369dce376647 100644
--- a/tensorflow/contrib/training/python/__init__.py
+++ b/tensorflow/contrib/training/python/__init__.py
@@ -0,0 +1,18 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
diff --git a/tensorflow/contrib/training/python/training/__init__.py b/tensorflow/contrib/training/python/training/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..52e83069cb0c68b510da46149248369dce376647 100644
--- a/tensorflow/contrib/training/python/training/__init__.py
+++ b/tensorflow/contrib/training/python/training/__init__.py
@@ -0,0 +1,18 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
diff --git a/tensorflow/contrib/training/python/training/bucket_ops.py b/tensorflow/contrib/training/python/training/bucket_ops.py
index 95fbc50cba73b25b748c31ecd443eb19c0b6fc8a..e7f23edc901eacfa3a753792c2dbf738bb5a9421 100644
--- a/tensorflow/contrib/training/python/training/bucket_ops.py
+++ b/tensorflow/contrib/training/python/training/bucket_ops.py
@@ -265,16 +265,22 @@ def bucket(tensors,
         for i, (q, bs) in enumerate(zip(bucket_queues, batch_size))
     ]
 
-    for i, q in enumerate(bucket_queues):
-      queue_runner.add_queue_runner(
-          queue_runner.QueueRunner(
-              q, [enqueues_to_top[i]],
-              queue_closed_exception_types=(errors.OutOfRangeError,
-                                            errors.CancelledError)))
+    queue_runner.add_queue_runner(
+        queue_runner.QueueRunner(
+            bucket_queues[0], enqueues_to_top,
+            close_op=top_queue.close(),
+            cancel_op=top_queue.close(cancel_pending_enqueues=True),
+            queue_closed_exception_types=(errors.OutOfRangeError,
+                                          errors.CancelledError)))
     queue_runner.add_queue_runner(
         queue_runner.QueueRunner(
             top_queue,
             bucket_enqueue_ops,
+            close_op=control_flow_ops.group(
+                *[q.close() for q in bucket_queues]),
+            cancel_op=control_flow_ops.group(
+                *[q.close(cancel_pending_enqueues=True)
+                  for q in bucket_queues]),
             queue_closed_exception_types=(errors.OutOfRangeError,
                                           errors.CancelledError)))
 
diff --git a/tensorflow/contrib/training/python/training/bucket_ops_test.py b/tensorflow/contrib/training/python/training/bucket_ops_test.py
index 330bee8a3fb13cd703fb260952d33e58623ca09c..504f1fcd417f99a8aaa72504f1852e523da1a4c9 100644
--- a/tensorflow/contrib/training/python/training/bucket_ops_test.py
+++ b/tensorflow/contrib/training/python/training/bucket_ops_test.py
@@ -23,6 +23,7 @@ import numpy as np
 from tensorflow.contrib.training.python.training import bucket_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes as dtypes_lib
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import array_ops
@@ -321,7 +322,8 @@ class BucketBySequenceLengthTest(test.TestCase):
 
   def _testBucketBySequenceLength(self,
                                   allow_small_batch,
-                                  bucket_capacities=None):
+                                  bucket_capacities=None,
+                                  drain_entire_queue=True):
     ops.reset_default_graph()
 
     # All inputs must be identical lengths across tuple index.
@@ -339,6 +341,7 @@ class BucketBySequenceLengthTest(test.TestCase):
 
     batch_size = 8
     bucket_boundaries = [3, 4, 5, 10]
+    num_pairs_to_enqueue = 50 * batch_size + 100
 
     # Make capacity very large so we can feed all the inputs in the
     # main thread without blocking
@@ -366,34 +369,47 @@ class BucketBySequenceLengthTest(test.TestCase):
                      [expected_batch_size, labels_len])
 
     def _read_test(sess):
-      for _ in range(50):
-        (out_lengths, (data, labels)) = sess.run(
-            (out_lengths_t, data_and_labels_t))
+      num_pairs_dequeued = 0
+      try:
+        while drain_entire_queue or num_pairs_dequeued < 40 * batch_size:
+          (out_lengths, (data, labels)) = sess.run(
+              (out_lengths_t, data_and_labels_t))
+          num_pairs_dequeued += out_lengths.shape[0]
+          if allow_small_batch:
+            self.assertEqual(data_len, data.shape[1])
+            self.assertEqual(labels_len, labels.shape[1])
+            self.assertGreaterEqual(batch_size, out_lengths.shape[0])
+            self.assertGreaterEqual(batch_size, data.shape[0])
+            self.assertGreaterEqual(batch_size, labels.shape[0])
+          else:
+            self.assertEqual((batch_size, data_len), data.shape)
+            self.assertEqual((batch_size, labels_len), labels.shape)
+            self.assertEqual((batch_size,), out_lengths.shape)
+          for (lr, dr, tr) in zip(out_lengths, data, labels):
+            # Make sure length matches data (here it's the same value).
+            self.assertEqual(dr[0], lr)
+            # Make sure data & labels match.
+            self.assertEqual(dr[0], int(tr[0].decode("ascii")))
+            # Make sure for each row, data came from the same bucket.
+            self.assertEqual(
+                _which_bucket(bucket_boundaries, dr[0]),
+                _which_bucket(bucket_boundaries, dr[1]))
+      except errors.OutOfRangeError:
         if allow_small_batch:
-          self.assertEqual(data_len, data.shape[1])
-          self.assertEqual(labels_len, labels.shape[1])
-          self.assertGreaterEqual(batch_size, out_lengths.shape[0])
-          self.assertGreaterEqual(batch_size, data.shape[0])
-          self.assertGreaterEqual(batch_size, labels.shape[0])
+          self.assertEqual(num_pairs_to_enqueue, num_pairs_dequeued)
         else:
-          self.assertEqual((batch_size, data_len), data.shape)
-          self.assertEqual((batch_size, labels_len), labels.shape)
-          self.assertEqual((batch_size,), out_lengths.shape)
-        for (lr, dr, tr) in zip(out_lengths, data, labels):
-          # Make sure length matches data (here it's the same value).
-          self.assertEqual(dr[0], lr)
-          # Make sure data & labels match.
-          self.assertEqual(dr[0], int(tr[0].decode("ascii")))
-          # Make sure for each row, data came from the same bucket.
-          self.assertEqual(
-              _which_bucket(bucket_boundaries, dr[0]),
-              _which_bucket(bucket_boundaries, dr[1]))
+          # Maximum left over in the queues should be at most one less than the
+          # batch_size, for every bucket.
+          num_buckets = len(bucket_boundaries) + 2
+          self.assertLessEqual(
+              num_pairs_to_enqueue - (batch_size - 1) * num_buckets,
+              num_pairs_dequeued)
 
     with self.test_session() as sess:
       coord = coordinator.Coordinator()
 
       # Feed the inputs, then close the input thread.
-      for _ in range(50 * batch_size + 100):
+      for _ in range(num_pairs_to_enqueue):
         which = random.randint(0, len(input_pairs) - 1)
         length, pair = input_pairs[which]
         sess.run(input_enqueue_op,
@@ -425,6 +441,10 @@ class BucketBySequenceLengthTest(test.TestCase):
     self._testBucketBySequenceLength(allow_small_batch=True,
                                      bucket_capacities=capacities)
 
+  def testBucketBySequenceLengthShutdown(self):
+    self._testBucketBySequenceLength(allow_small_batch=True,
+                                     drain_entire_queue=False)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/training/python/training/sgdr_learning_rate_decay.py b/tensorflow/contrib/training/python/training/sgdr_learning_rate_decay.py
deleted file mode 100644
index ed0f398e30a7f3c0b1b9378f8fc5d5bfbea1536a..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/training/python/training/sgdr_learning_rate_decay.py
+++ /dev/null
@@ -1,187 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""SGDR learning rate decay function."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import math_ops, control_flow_ops
-
-
-def sgdr_decay(learning_rate, global_step, initial_period_steps,
-               t_mul=2.0, m_mul=1.0, name=None):
-  """Implements Stochastic Gradient Descent with Warm Restarts (SGDR).
-
-  As described in "SGDR: Stochastic Gradient Descent
-  with Warm Restarts" by Ilya Loshchilov & Frank Hutter, Proceedings of
-  ICLR'2017, available at https://arxiv.org/pdf/1608.03983.pdf
-
-  The learning rate decreases according to cosine annealing:
-
-  ```python
-  learning_rate * 0.5 * (1 + cos(x_val * pi)) # for x_val defined in [0, 1]
-  ```
-
-  Thus, at the beginning (when the restart index i = 0),
-  the learning rate decreases for `initial_period_steps` steps from the initial
-  learning rate `learning_rate` (when `x_val=0`, we get `cos(0)=1`) to
-  0 (when `x_val=1`, we get `cos(pi)=-1`).
-
-  The decrease within the i-th period takes `t_i` steps,
-  where `t_0` = `initial_period_steps` is the user-defined number of batch
-  iterations (not epochs as in the paper) to be performed before the first
-  restart is launched.
-
-  Then, we perform the first restart (i=1) by setting the learning rate to
-  `learning_rate*(m_mul^i)`, where `m_mul in [0,1]` (set to 1 by default).
-  The i-th restart runs for `t_i=t_0*(t_mul^i)` steps, i.e., every new
-  restart runs `t_mul` times longer than the previous one.
-
-  Importantly, when one has no access to a validation set, SGDR suggests
-  to report the best expected / recommended solution in the following way:
-  When we are within our initial run (i=0), every new solution represents
-  SGDR's recommended solution. Instead, when i>0, the recommended solution is
-  the one obtained at the end of each restart.
-
-  Note that the minimum learning rate is set to 0 for simplicity,
-  you can adjust the code to deal with any positive minimum learning rate
-  as defined in the paper.
-
-  `initial_period_steps` is the duration of the first period measured in terms
-  of number of minibatch updates. If one wants to use epochs, one should compute
-  the number of updates required for an epoch.
-
-  For example, assume the following parameters and intention:
-      Minibatch size: 100
-      Training dataset size: 10000
-      If the user wants the first decay period to span across 5 epochs, then
-      `initial_period_steps` = 5 * 10000/100 = 500
-
-      Train for 10000 batch iterations with the initial learning rate set to
-      0.1, then restart to run 2 times longer, i.e, for 20000 batch iterations
-      and with the initial learning rate 0.05, then restart again and again,
-      doubling the runtime of each new period and with two times smaller
-      initial learning rate.
-
-  To accomplish the above, one would write:
-
-  ```python
-  ...
-  global_step = tf.Variable(0, trainable=False)
-  starter_learning_rate = 0.1
-  learning_rate = sgdr_decay(starter_learning_rate, global_step,
-                             initial_period_steps=10000, t_mul=2, m_mul=0.5)
-  # Passing global_step to minimize() will increment it at each step.
-  learning_step = (
-      tf.train.GradientDescentOptimizer(learning_rate)
-      .minimize(...my loss..., global_step=global_step)
-  )
-
-  # Step  | 0   | 1000  | 5000 | 9000  | 9999 | 10000 | 11000  |
-  # LR    | 0.1 | 0.097 | 0.05 | 0.002 | 0.00 | 0.05  | 0.0496 |
-
-  # Step  | 20000 | 29000  | 29999 | 30000 |
-  # LR    | 0.025 | 0.0003 | 0.00  | 0.025 |
-  ```
-
-  Args:
-    learning_rate: A scalar `float32` or `float64` `Tensor` or a
-      Python number.  The initial learning rate.
-    global_step: A scalar `int32` or `int64` `Tensor` or a Python number.
-      Global step to use for the decay computation.  Must not be negative.
-    initial_period_steps: Duration of the first period measured as the number
-      of minibatch updates, if one wants to use epochs, one should compute
-      the number of updates required for an epoch.
-    t_mul: A scalar `float32` or `float64` `Tensor` or a Python number.
-      Must be positive.
-      Used to derive the number of iterations in the i-th period:
-      `initial_period_steps * (t_mul^i)`. Defaults to 2.0.
-    m_mul: A scalar `float32` or `float64` `Tensor` or a Python number.
-      Must be positive.
-      Used to derive the initial learning rate of the i-th period:
-      `learning_rate * (m_mul^i)`. Defaults to 1.0
-
-  Returns:
-    A scalar `Tensor` of the same type as `learning_rate`.
-    The learning rate for a provided global_step.
-  Raises:
-    ValueError: if `global_step` is not supplied.
-  """
-
-  if global_step is None:
-    raise ValueError("global_step is required for sgdr_decay.")
-  with ops.name_scope(name, "SGDRDecay",
-                      [learning_rate, global_step,
-                       initial_period_steps, t_mul, m_mul]) as name:
-    learning_rate = ops.convert_to_tensor(learning_rate,
-                                          name="initial_learning_rate")
-    dtype = learning_rate.dtype
-    global_step = math_ops.cast(global_step, dtype)
-    t_0 = math_ops.cast(initial_period_steps, dtype)
-    t_mul = math_ops.cast(t_mul, dtype)
-    m_mul = math_ops.cast(m_mul, dtype)
-
-    c_one = math_ops.cast(constant_op.constant(1.0), dtype)
-    c_half = math_ops.cast(constant_op.constant(0.5), dtype)
-    c_pi = math_ops.cast(constant_op.constant(math.pi), dtype)
-
-    # Find normalized value of the current step
-    x_val = math_ops.div(global_step, t_0)
-
-    def compute_step(x_val, geometric=False):
-      if geometric:
-        # Consider geometric series where t_mul != 1
-        # 1 + t_mul + t_mul^2 ... = (1 - t_mul^i_restart) / (1 - t_mul)
-
-        # First find how many restarts were performed for a given x_val
-        # Find maximal integer i_restart value for which this equation holds
-        # x_val >= (1 - t_mul^i_restart) / (1 - t_mul)
-        # x_val * (1 - t_mul) <= (1 - t_mul^i_restart)
-        # t_mul^i_restart <= (1 - x_val * (1 - t_mul))
-
-        # tensorflow allows only log with base e
-        # i_restart <= log(1 - x_val * (1 - t_mul) / log(t_mul)
-        # Find how many restarts were performed
-
-        i_restart = math_ops.floor(
-            math_ops.log(c_one - x_val * (c_one - t_mul)) / math_ops.log(t_mul))
-        # Compute the sum of all restarts before the current one
-        sum_r = (c_one - t_mul ** i_restart) / (c_one - t_mul)
-        # Compute our position within the current restart
-        x_val = (x_val - sum_r) / t_mul ** i_restart
-
-      else:
-        # Find how many restarts were performed
-        i_restart = math_ops.floor(x_val)
-        # Compute our position within the current restart
-        x_val = x_val - i_restart
-      return i_restart, x_val
-
-    i_restart, x_val = control_flow_ops.cond(
-        math_ops.equal(t_mul, c_one),
-        lambda: compute_step(x_val, geometric=False),
-        lambda: compute_step(x_val, geometric=True))
-
-    # If m_mul < 1, then the initial learning rate of every new restart will be
-    # smaller, i.e., by a factor of m_mul ** i_restart at i_restart-th restart
-    m_fac = learning_rate * (m_mul ** i_restart)
-
-  return math_ops.multiply(c_half * m_fac,
-                           (math_ops.cos(x_val * c_pi) + c_one), name=name)
diff --git a/tensorflow/contrib/training/python/training/sgdr_learning_rate_decay_test.py b/tensorflow/contrib/training/python/training/sgdr_learning_rate_decay_test.py
deleted file mode 100644
index 4a46e9a49ef203384e36698f81d6cbe3a3881ef8..0000000000000000000000000000000000000000
--- a/tensorflow/contrib/training/python/training/sgdr_learning_rate_decay_test.py
+++ /dev/null
@@ -1,145 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Functional test for sgdr learning rate decay."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-
-from sgdr_learning_rate_decay import sgdr_decay
-from tensorflow.python.platform import googletest
-from tensorflow.python.framework import test_util
-from tensorflow.python.framework import dtypes
-from tensorflow import placeholder
-
-
-class SGDRDecayTest(test_util.TensorFlowTestCase):
-  """Unit tests for SGDR learning rate decay."""
-
-  def get_original_values(self, lr, t_e, mult_factor, iter_per_epoch, epochs):
-    """Get an array with learning rate values from the consecutive steps using
-    the original implementation
-    (https://github.com/loshchil/SGDR/blob/master/SGDR_WRNs.py)."""
-    t0 = math.pi / 2.0
-    tt = 0
-    te_next = t_e
-
-    lr_values = []
-    sh_lr = lr
-    for epoch in range(epochs):
-      for _ in range(iter_per_epoch):
-        # In the original approach training function is executed here
-        lr_values.append(sh_lr)
-        dt = 2.0 * math.pi / float(2.0 * t_e)
-        tt = tt + float(dt) / iter_per_epoch
-        if tt >= math.pi:
-          tt = tt - math.pi
-        cur_t = t0 + tt
-        new_lr = lr * (1.0 + math.sin(cur_t)) / 2.0  # lr_min = 0, lr_max = lr
-        sh_lr = new_lr
-      if (epoch + 1) == te_next:  # time to restart
-        sh_lr = lr
-        tt = 0                # by setting to 0 we set lr to lr_max, see above
-        t_e = t_e * mult_factor  # change the period of restarts
-        te_next = te_next + t_e  # note the next restart's epoch
-
-    return lr_values
-
-  def get_sgdr_values(self, lr, initial_period_steps, t_mul, iters):
-    """Get an array with learning rate values from the consecutive steps
-    using current tensorflow implementation."""
-    with self.test_session():
-      step = placeholder(dtypes.int32)
-
-      decay = sgdr_decay(lr, step, initial_period_steps, t_mul)
-      lr_values = []
-      for i in range(iters):
-        lr_values.append(decay.eval(feed_dict={step: i}))
-
-      return lr_values
-
-  def testCompareToOriginal(self):
-    """Compare values generated by tensorflow implementation to the values
-    generated by the original implementation
-    (https://github.com/loshchil/SGDR/blob/master/SGDR_WRNs.py)."""
-    with self.test_session():
-      lr = 10.0
-      init_steps = 2
-      t_mul = 3
-      iters = 10
-      epochs = 50
-
-      org_lr = self.get_original_values(lr, init_steps, t_mul, iters, epochs)
-      sgdr_lr = self.get_sgdr_values(lr, init_steps*iters, t_mul, iters*epochs)
-
-      for org, sgdr in zip(org_lr, sgdr_lr):
-        self.assertAllClose(org, sgdr)
-
-  def testMDecay(self):
-    """Test m_mul argument. Check values for learning rate at the beginning
-    of the first, second, third and fourth period. """
-    with self.test_session():
-      step = placeholder(dtypes.int32)
-
-      lr = 0.1
-      t_e = 10
-      t_mul = 3
-      m_mul = 0.9
-
-      decay = sgdr_decay(lr, step, t_e, t_mul, m_mul)
-
-      test_step = 0
-      self.assertAllClose(decay.eval(feed_dict={step: test_step}),
-                          lr)
-
-      test_step = t_e
-      self.assertAllClose(decay.eval(feed_dict={step: test_step}),
-                          lr * m_mul)
-
-      test_step = t_e + t_e*t_mul
-      self.assertAllClose(decay.eval(feed_dict={step: test_step}),
-                          lr * m_mul**2)
-
-      test_step = t_e + t_e*t_mul + t_e * (t_mul**2)
-      self.assertAllClose(decay.eval(feed_dict={step: test_step}),
-                          lr * (m_mul**3))
-
-  def testCos(self):
-    """Check learning rate values at the beginning, in the middle
-    and at the end of the period."""
-    with self.test_session():
-      step = placeholder(dtypes.int32)
-      lr = 0.2
-      t_e = 1000
-      t_mul = 1
-
-      decay = sgdr_decay(lr, step, t_e, t_mul)
-
-      test_step = 0
-      self.assertAllClose(decay.eval(feed_dict={step: test_step}), lr)
-
-      test_step = t_e//2
-      self.assertAllClose(decay.eval(feed_dict={step: test_step}), lr/2)
-
-      test_step = t_e
-      self.assertAllClose(decay.eval(feed_dict={step: test_step}), lr)
-
-      test_step = t_e*3//2
-      self.assertAllClose(decay.eval(feed_dict={step: test_step}), lr/2)
-
-if __name__ == "__main__":
-  googletest.main()
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index bd7617fa9641ce47f93bd2104029f20798fd2815..579174efa31bf62580feff3e46ae2290826b2add 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -79,6 +79,7 @@ load(
     "if_linux_x86_64",
     "if_mobile",
     "if_not_mobile",
+    "if_windows",
     "if_not_windows",
     "tf_copts",
     "tf_cc_test",
@@ -202,6 +203,8 @@ CORE_PROTO_SRCS = [
 ADDITIONAL_CORE_PROTO_SRCS = [
     "example/example_parser_configuration.proto",
     "protobuf/control_flow.proto",
+    # TODO(ebrevdo): Re-enable once CriticalSection is in core.
+    # "protobuf/critical_section.proto",
     "protobuf/meta_graph.proto",
     "protobuf/named_tensor.proto",
     "protobuf/saved_model.proto",
@@ -274,11 +277,11 @@ cc_library(
         "platform/platform.h",
         "platform/protobuf.h",
         "platform/types.h",
-    ] + glob(tf_additional_proto_hdrs()) + glob(tf_env_time_hdrs()),
+        "platform/windows/cpu_info.h",
+        "lib/bfloat16/bfloat16.h",
+    ] + tf_additional_proto_hdrs() + glob(tf_env_time_hdrs()),
     copts = tf_copts(),
-    deps = tf_lib_proto_parsing_deps() + [
-        "@double_conversion//:double-conversion",
-    ],
+    deps = tf_lib_proto_parsing_deps(),
 )
 
 # This build rule (along with :lib_internal, :framework, and
@@ -288,6 +291,7 @@ cc_library(
 cc_library(
     name = "lib",
     hdrs = [
+        "lib/bfloat16/bfloat16.h",
         "lib/core/arena.h",
         "lib/core/bitmap.h",
         "lib/core/bits.h",
@@ -364,6 +368,23 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "abi",
+    srcs = ["platform/abi.cc"],
+    hdrs = ["platform/abi.h"],
+)
+
+cc_library(
+    name = "stacktrace_handler",
+    srcs = ["platform/stacktrace_handler.cc"],
+    hdrs = ["platform/stacktrace_handler.h"],
+    deps = [
+        ":abi",
+        ":lib",
+        ":lib_platform",
+    ],
+)
+
 # Test support library needed for all tests
 # This is currently public, but may be made internal in the
 # future.  Try to avoid depending on it.
@@ -513,14 +534,6 @@ cc_library(
     ],
 )
 
-tf_proto_library_cc(
-    name = "op_gen_overrides_proto",
-    srcs = ["framework/op_gen_overrides.proto"],
-    cc_api_version = 2,
-    protodeps = tf_additional_all_protos(),
-    visibility = ["//visibility:public"],
-)
-
 cc_library(
     name = "op_gen_lib",
     srcs = ["framework/op_gen_lib.cc"],
@@ -529,7 +542,6 @@ cc_library(
     deps = [
         ":lib",
         ":lib_internal",
-        ":op_gen_overrides_proto_cc",
         ":protos_all_cc",
     ],
 )
@@ -551,6 +563,7 @@ cc_library(
         "framework/numeric_types.h",
         "framework/tensor_types.h",
         "framework/type_traits.h",
+        "lib/bfloat16/bfloat16.h",
         "platform/default/dynamic_annotations.h",
         "platform/default/integral_types.h",
         "platform/default/logging.h",
@@ -564,7 +577,7 @@ cc_library(
         "platform/prefetch.h",
         "platform/thread_annotations.h",
         "platform/types.h",
-    ],
+    ] + if_windows(["platform/windows/integral_types.h"]),
     visibility = ["//visibility:public"],
     deps =
         [
@@ -577,6 +590,7 @@ cc_library(
 
 # Generates library per group of ops.
 tf_gen_op_libs(
+    is_external = False,
     op_lib_names = [
         "bitwise_ops",
         "candidate_sampling_ops",
@@ -590,6 +604,7 @@ tf_gen_op_libs(
         "image_ops",
         "io_ops",
         "linalg_ops",
+        "list_ops",
         "lookup_ops",
         "logging_ops",
         "math_ops",
@@ -670,6 +685,7 @@ cc_library(
         ":image_ops_op_lib",
         ":io_ops_op_lib",
         ":linalg_ops_op_lib",
+        ":list_ops_op_lib",
         ":logging_ops_op_lib",
         ":lookup_ops_op_lib",
         ":math_ops_op_lib",
@@ -805,6 +821,7 @@ cc_library(
         "//tensorflow/core/kernels:image",
         "//tensorflow/core/kernels:io",
         "//tensorflow/core/kernels:linalg",
+        "//tensorflow/core/kernels:list_kernels",
         "//tensorflow/core/kernels:lookup",
         "//tensorflow/core/kernels:logging",
         "//tensorflow/core/kernels:math",
@@ -843,6 +860,7 @@ cc_library(
         "//tensorflow/core/kernels:mkl_pooling_ops",
         "//tensorflow/core/kernels:mkl_relu_op",
         "//tensorflow/core/kernels:mkl_reshape_op",
+        "//tensorflow/core/kernels:mkl_softmax_op",
         "//tensorflow/core/kernels:mkl_tfconv_op",
         "//tensorflow/core/kernels:mkl_aggregate_ops",
     ]),
@@ -1016,7 +1034,7 @@ filegroup(
 cc_library(
     name = "android_tensorflow_lib_lite",
     srcs = if_android(["//tensorflow/core:android_srcs"]),
-    copts = tf_copts() + if_not_android_mips_and_mips64(["-Os"]),
+    copts = tf_copts(android_optimization_level_override = None),
     linkopts = ["-lz"],
     tags = [
         "manual",
@@ -1026,7 +1044,6 @@ cc_library(
     deps = [
         ":protos_all_cc_impl",
         "//third_party/eigen3",
-        "@double_conversion//:double-conversion",
         "@nsync//:nsync_cpp",
         "@protobuf_archive//:protobuf",
     ],
@@ -1041,19 +1058,31 @@ cc_library(
     name = "ios_tensorflow_lib",
     srcs = if_ios([
         ":android_op_registrations_and_gradients",
-        "//tensorflow/core:android_srcs",
         "//tensorflow/core/kernels:android_core_ops",
         "//tensorflow/core/kernels:android_extended_ops",
     ]),
     copts = tf_copts() + ["-Os"] + ["-std=c++11"],
     visibility = ["//visibility:public"],
     deps = [
+        ":ios_tensorflow_lib_lite",
         ":protos_all_cc_impl",
         "//third_party/eigen3",
         "//third_party/fft2d:fft2d_headers",
-        "@double_conversion//:double-conversion",
         "@fft2d//:fft2d",
         "@gemmlowp//:gemmlowp",
+        "@protobuf_archive//:protobuf",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "ios_tensorflow_lib_lite",
+    srcs = if_ios(["//tensorflow/core:android_srcs"]),
+    copts = tf_copts() + ["-Os"] + ["-std=c++11"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":protos_all_cc_impl",
+        "//third_party/eigen3",
         "@nsync//:nsync_cpp",
         "@protobuf_archive//:protobuf",
     ],
@@ -1106,8 +1135,7 @@ cc_library(
 cc_library(
     name = "android_tensorflow_lib_selective_registration",
     srcs = if_android(["//tensorflow/core:android_srcs"]),
-    copts = tf_copts() + [
-        "-Os",
+    copts = tf_copts(android_optimization_level_override = None) + [
         "-DSUPPORT_SELECTIVE_REGISTRATION",
     ],
     tags = [
@@ -1118,7 +1146,6 @@ cc_library(
     deps = [
         ":protos_all_cc_impl",
         "//third_party/eigen3",
-        "@double_conversion//:double-conversion",
         "@protobuf_archive//:protobuf",
     ],
     alwayslink = 1,
@@ -1129,8 +1156,7 @@ cc_library(
 cc_library(
     name = "android_tensorflow_lib_selective_registration_nortti",
     srcs = if_android(["//tensorflow/core:android_srcs"]),
-    copts = tf_copts() + tf_opts_nortti_if_android() + [
-        "-Os",
+    copts = tf_copts(android_optimization_level_override = None) + tf_opts_nortti_if_android() + [
         "-DSUPPORT_SELECTIVE_REGISTRATION",
     ],
     tags = [
@@ -1141,7 +1167,6 @@ cc_library(
     deps = [
         ":protos_all_cc_impl",
         "//third_party/eigen3",
-        "@double_conversion//:double-conversion",
         "@nsync//:nsync_cpp",
         "@protobuf_archive//:protobuf",
     ],
@@ -1210,7 +1235,7 @@ cc_library(
         "framework/tensor_testutil.h",
         "util/reporter.h",
     ],
-    copts = tf_copts() + ["-Os"],
+    copts = tf_copts(android_optimization_level_override = None),
     tags = [
         "manual",
         "notap",
@@ -1503,7 +1528,6 @@ cc_library(
         "//tensorflow/core/platform/default/build_config:platformlib",
         "@snappy",
         "@zlib_archive//:zlib",
-        "@double_conversion//:double-conversion",
         "@protobuf_archive//:protobuf",
     ] + tf_protos_all_impl(),
 )
@@ -1570,6 +1594,7 @@ cc_library(
         "platform/jpeg.h",
     ]),
     hdrs = [
+        "lib/bfloat16/bfloat16.h",
         "lib/core/stringpiece.h",
         "lib/jpeg/jpeg_handle.h",
         "lib/jpeg/jpeg_mem.h",
@@ -1597,6 +1622,7 @@ cc_library(
         "platform/gif.h",
     ]),
     hdrs = [
+        "lib/bfloat16/bfloat16.h",
         "lib/core/stringpiece.h",
         "lib/gif/gif_io.h",
         "lib/gtl/cleanup.h",
@@ -1624,6 +1650,7 @@ cc_library(
         "platform/png.h",
     ]),
     hdrs = [
+        "lib/bfloat16/bfloat16.h",
         "lib/core/casts.h",
         "lib/core/stringpiece.h",
         "lib/png/png_io.h",
@@ -1862,11 +1889,13 @@ cc_library(
     deps = ["//tensorflow/core/platform/default/build_config:protos_cc"],
 )
 
-CORE_CPU_BASE_HDRS = [
-    "common_runtime/device.h",
-    "common_runtime/graph_runner.h",
-    "common_runtime/shape_refiner.h",
-    "framework/versions.h",
+# Library containing all of the graph construction code that is
+# independent of the runtime.
+#
+# TODO(mrry): Refactor graph_constructor.cc so that it does not depend on code
+# in "common_runtime/", and then the entire "graph/" directory can be included
+# in this library.
+GRAPH_HDRS = [
     "graph/algorithm.h",
     "graph/colors.h",
     "graph/control_flow.h",
@@ -1874,7 +1903,7 @@ CORE_CPU_BASE_HDRS = [
     "graph/default_device.h",
     "graph/edgeset.h",
     "graph/graph.h",
-    "graph/graph_constructor.h",
+    "graph/graph_constructor.h",  # NOTE(mrry): Don't include the .cc since it depends on common_runtime.
     "graph/graph_def_builder.h",
     "graph/graph_partition.h",
     "graph/mkl_layout_pass.h",
@@ -1890,16 +1919,12 @@ CORE_CPU_BASE_HDRS = [
 ]
 
 tf_cuda_library(
-    name = "core_cpu_base",
+    name = "graph",
     srcs = [
-        "common_runtime/shape_refiner.cc",
-        "common_runtime/shape_refiner.h",
-        "framework/versions.h",
         "graph/algorithm.cc",
         "graph/colors.cc",
         "graph/control_flow.cc",
         "graph/costmodel.cc",
-        "graph/graph_constructor.cc",
         "graph/graph_def_builder.cc",
         "graph/graph_partition.cc",
         "graph/node_builder.cc",
@@ -1907,6 +1932,33 @@ tf_cuda_library(
         "graph/subgraph.cc",
         "graph/tensor_id.cc",
         "graph/validate.cc",
+    ],
+    hdrs = GRAPH_HDRS,
+    deps = [
+        ":framework",
+        ":framework_internal",
+        ":lib",
+        ":lib_internal",
+        ":proto_text",
+        ":protos_all_cc",
+        "//third_party/eigen3",
+    ],
+)
+
+CORE_CPU_BASE_HDRS = GRAPH_HDRS + [
+    "common_runtime/device.h",
+    "common_runtime/graph_runner.h",
+    "common_runtime/shape_refiner.h",
+    "framework/versions.h",
+]
+
+tf_cuda_library(
+    name = "core_cpu_base",
+    srcs = [
+        "common_runtime/shape_refiner.cc",
+        "common_runtime/shape_refiner.h",
+        "framework/versions.h",
+        "graph/graph_constructor.cc",  # Depends on common_runtime.
         "public/session.h",
         "public/session_options.h",
         "public/version.h",
@@ -1914,6 +1966,7 @@ tf_cuda_library(
     hdrs = CORE_CPU_BASE_HDRS,
     copts = tf_copts(),
     deps = [
+        ":graph",
         ":framework",
         ":framework_internal",
         ":lib",
@@ -2017,6 +2070,7 @@ tf_cuda_library(
     hdrs = CORE_CPU_LIB_HEADERS,
     copts = tf_copts(),
     deps = [
+        ":graph",
         ":framework",
         ":framework_internal",
         ":lib",
@@ -2058,6 +2112,7 @@ tf_cuda_library(
         ":function_ops_op_lib",
         ":functional_grad",
         ":functional_ops_op_lib",
+        ":graph",
         ":lib",
         ":lib_internal",
         ":proto_text",
@@ -2103,6 +2158,7 @@ tf_cuda_library(
         ":core_cpu_internal",
         ":device_tracer",
         ":framework",
+        ":graph",
         ":lib",
         ":lib_internal",
         ":proto_text",
@@ -2152,12 +2208,15 @@ GPU_RUNTIME_HEADERS = [
     "common_runtime/gpu/gpu_cudamalloc_allocator.h",
     "common_runtime/gpu/gpu_debug_allocator.h",
     "common_runtime/gpu/gpu_device.h",
+    "common_runtime/gpu/gpu_id.h",
+    "common_runtime/gpu/gpu_id_utils.h",
     "common_runtime/gpu/gpu_init.h",
     "common_runtime/gpu/gpu_managed_allocator.h",
     "common_runtime/gpu/gpu_stream_util.h",
     "common_runtime/gpu/gpu_util.h",
     "common_runtime/gpu/pool_allocator.h",
     "common_runtime/gpu/process_state.h",
+    "common_runtime/gpu_device_context.h",
 ]
 
 tf_cuda_library(
@@ -2168,13 +2227,13 @@ tf_cuda_library(
         "common_runtime/gpu/gpu_debug_allocator.cc",
         "common_runtime/gpu/gpu_device.cc",
         "common_runtime/gpu/gpu_device_factory.cc",
+        "common_runtime/gpu/gpu_id_utils.cc",
         "common_runtime/gpu/gpu_managed_allocator.cc",
         "common_runtime/gpu/gpu_stream_util.cc",
         "common_runtime/gpu/gpu_util.cc",
         "common_runtime/gpu/gpu_util_platform_specific.cc",
         "common_runtime/gpu/pool_allocator.cc",
         "common_runtime/gpu/process_state.cc",
-        "common_runtime/gpu_device_context.h",
     ],
     hdrs = GPU_RUNTIME_HEADERS,
     copts = tf_copts(),
@@ -2185,6 +2244,7 @@ tf_cuda_library(
         ":framework_internal",
         ":gpu_init_impl",
         ":gpu_lib",
+        ":graph",
         ":lib",
         ":lib_internal",
         ":protos_all_cc",
@@ -2333,6 +2393,7 @@ cc_library(
     deps = [
         ":lib",
         ":lib_internal",
+        ":stacktrace_handler",
         ":test",  # buildcleaner: keep
         "//tensorflow/core/platform/default/build_config:test_main",
     ],
@@ -2403,6 +2464,7 @@ tf_cc_tests(
         "platform/net_test.cc",
         "platform/port_test.cc",
         "platform/profile_utils/cpu_utils_test.cc",
+        "platform/stacktrace_handler_test.cc",
         "platform/subprocess_test.cc",
     ],
     deps = [
@@ -2758,15 +2820,30 @@ tf_cc_test_mkl(
         "//tensorflow/core/kernels:mkl_pooling_ops",
         "//tensorflow/core/kernels:mkl_relu_op",
         "//tensorflow/core/kernels:mkl_reshape_op",
+        "//tensorflow/core/kernels:mkl_softmax_op",
         "//tensorflow/core/kernels:mkl_tfconv_op",
     ]),
 )
 
+tf_cc_tests_gpu(
+    name = "gpu_device_on_non_gpu_machine_test",
+    size = "small",
+    srcs = ["common_runtime/gpu/gpu_device_on_non_gpu_machine_test.cc"],
+    linkstatic = tf_kernel_tests_linkstatic(),
+    deps = [
+        ":gpu_headers_lib",
+        ":gpu_runtime",
+        ":test",
+    ],
+)
+
 tf_cc_tests_gpu(
     name = "gpu_related_tests",
     size = "small",
     srcs = glob(["user_ops/**/*_test.cc"]) + [
         "common_runtime/gpu/gpu_bfc_allocator_test.cc",
+        "common_runtime/gpu/gpu_device_test.cc",
+        "common_runtime/gpu/gpu_id_utils_test.cc",
         "common_runtime/gpu/gpu_event_mgr_test.cc",
         "common_runtime/gpu/pool_allocator_test.cc",
     ],
@@ -3131,6 +3208,7 @@ tf_cc_test(
         "//tensorflow/core/kernels:cwise_op",
         "//tensorflow/core/kernels:function_ops",
         "//tensorflow/core/kernels:matmul_op",
+        "//tensorflow/core/kernels:random_ops",
         "//tensorflow/core/kernels:shape_ops",
         "//third_party/eigen3",
     ],
@@ -3381,37 +3459,6 @@ tf_cc_test(
     ],
 )
 
-filegroup(
-    name = "base_api_def",
-    srcs = glob(["api_def/base_api/*"]),
-)
-
-filegroup(
-    name = "python_api_def",
-    data = glob(["api_def/python_api/*"]),
-)
-
-tf_cc_test(
-    name = "api_test",
-    srcs = ["api_def/api_test.cc"],
-    data = [
-        ":base_api_def",
-        "//tensorflow/cc:ops/op_gen_overrides.pbtxt",
-    ],
-    deps = [
-        ":framework",
-        ":framework_internal",
-        ":lib",
-        ":lib_internal",
-        ":lib_test_internal",
-        ":op_gen_lib",
-        ":op_gen_overrides_proto_cc",
-        ":ops",
-        ":protos_all_cc",
-        ":test",
-    ],
-)
-
 tf_cc_test_gpu(
     name = "device_tracer_test",
     size = "small",
diff --git a/tensorflow/core/api_def/BUILD b/tensorflow/core/api_def/BUILD
new file mode 100644
index 0000000000000000000000000000000000000000..81187ff6b772633105e0962d9da8f87d6cfd9558
--- /dev/null
+++ b/tensorflow/core/api_def/BUILD
@@ -0,0 +1,113 @@
+# Description:
+#   Provides ApiDef access and ApiDef validation for TensorFlow.
+#
+# The following targets can be used to access ApiDefs:
+#   :base_api_def
+#   :python_api_def
+
+package(
+    default_visibility = ["//visibility:private"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+load(
+    "//tensorflow:tensorflow.bzl",
+    "tf_cc_binary",
+    "tf_cc_test",
+)
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
+
+filegroup(
+    name = "base_api_def",
+    srcs = glob(["base_api/*"]),
+    visibility = ["//tensorflow:internal"],
+)
+
+filegroup(
+    name = "python_api_def",
+    srcs = glob(["python_api/*"]),
+    visibility = ["//tensorflow:internal"],
+)
+
+cc_library(
+    name = "excluded_ops_lib",
+    srcs = ["excluded_ops.cc"],
+    hdrs = ["excluded_ops.h"],
+)
+
+cc_library(
+    name = "update_api_def_lib",
+    srcs = ["update_api_def.cc"],
+    hdrs = ["update_api_def.h"],
+    deps = [
+        ":excluded_ops_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:op_gen_lib",
+        "//tensorflow/core:ops",
+        "//tensorflow/core:protos_all_cc",
+    ],
+)
+
+tf_cc_test(
+    name = "update_api_def_test",
+    srcs = ["update_api_def_test.cc"],
+    deps = [
+        ":update_api_def_lib",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
+
+tf_cc_binary(
+    name = "update_api_def",
+    srcs = [
+        "update_api_def_main.cc",
+    ],
+    data = [
+        ":base_api_def",
+    ],
+    deps = [
+        ":update_api_def_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_cc_test(
+    name = "api_test",
+    srcs = ["api_test.cc"],
+    data = [
+        ":base_api_def",
+    ],
+    deps = [
+        ":excluded_ops_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:lib_test_internal",
+        "//tensorflow/core:op_gen_lib",
+        "//tensorflow/core:ops",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
diff --git a/tensorflow/core/api_def/api_test.cc b/tensorflow/core/api_def/api_test.cc
index 2cdc14843f61a2585b61e214527e0a0b5bdea446..112c55ccc3ba1262b48c1b6c0890b3ae22744383 100644
--- a/tensorflow/core/api_def/api_test.cc
+++ b/tensorflow/core/api_def/api_test.cc
@@ -13,9 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-// Test that verifies tensorflow/core/api_def/base_api/api_def*.pbtxt files
-// are correct. If api_def*.pbtxt do not match expected contents, run
-// tensorflow/core/api_def/base_api/update_api_def.sh script to update them.
+// Test that validates tensorflow/core/api_def/base_api/api_def*.pbtxt files.
 
 #include <ctype.h>
 #include <algorithm>
@@ -23,12 +21,11 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
+#include "tensorflow/core/api_def/excluded_ops.h"
 #include "tensorflow/core/framework/api_def.pb.h"
-#include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/framework/op_gen_lib.h"
-#include "tensorflow/core/framework/op_gen_overrides.pb.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/io/path.h"
@@ -44,309 +41,164 @@ namespace tensorflow {
 namespace {
 constexpr char kDefaultApiDefDir[] =
     "tensorflow/core/api_def/base_api";
-constexpr char kOverridesFilePath[] =
-    "tensorflow/cc/ops/op_gen_overrides.pbtxt";
-constexpr char kApiDefFileFormat[] = "api_def_%s.pbtxt";
 constexpr char kApiDefFilePattern[] = "api_def_*.pbtxt";
+}  // namespace
 
-void FillBaseApiDef(ApiDef* api_def, const OpDef& op) {
-  api_def->set_graph_op_name(op.name());
-  // Add arg docs
-  for (auto& input_arg : op.input_arg()) {
-    if (!input_arg.description().empty()) {
-      auto* api_def_in_arg = api_def->add_in_arg();
-      api_def_in_arg->set_name(input_arg.name());
-      api_def_in_arg->set_description(input_arg.description());
-    }
-  }
-  for (auto& output_arg : op.output_arg()) {
-    if (!output_arg.description().empty()) {
-      auto* api_def_out_arg = api_def->add_out_arg();
-      api_def_out_arg->set_name(output_arg.name());
-      api_def_out_arg->set_description(output_arg.description());
-    }
-  }
-  // Add attr docs
-  for (auto& attr : op.attr()) {
-    if (!attr.description().empty()) {
-      auto* api_def_attr = api_def->add_attr();
-      api_def_attr->set_name(attr.name());
-      api_def_attr->set_description(attr.description());
-    }
-  }
-  // Add docs
-  api_def->set_summary(op.summary());
-  api_def->set_description(op.description());
-}
+// Reads golden ApiDef files and returns a map from file name to ApiDef file
+// contents.
+void GetGoldenApiDefs(Env* env, const string& api_files_dir,
+                      std::unordered_map<string, ApiDef>* name_to_api_def) {
+  std::vector<string> matching_paths;
+  TF_CHECK_OK(env->GetMatchingPaths(
+      io::JoinPath(api_files_dir, kApiDefFilePattern), &matching_paths));
 
-// Checks if arg1 should be before arg2 according to ordering in args.
-bool CheckArgBefore(const ApiDef::Arg* arg1, const ApiDef::Arg* arg2,
-                    const protobuf::RepeatedPtrField<OpDef::ArgDef>& args) {
-  for (auto& arg : args) {
-    if (arg.name() == arg2->name()) {
-      return false;
-    } else if (arg.name() == arg1->name()) {
-      return true;
-    }
-  }
-  return false;
-}
+  for (auto& file_path : matching_paths) {
+    string file_contents;
+    TF_CHECK_OK(ReadFileToString(env, file_path, &file_contents));
+    file_contents = PBTxtFromMultiline(file_contents);
 
-// Checks if attr1 should be before attr2 according to ordering in op_def.
-bool CheckAttrBefore(const ApiDef::Attr* attr1, const ApiDef::Attr* attr2,
-                     const OpDef& op_def) {
-  for (auto& attr : op_def.attr()) {
-    if (attr.name() == attr2->name()) {
-      return false;
-    } else if (attr.name() == attr1->name()) {
-      return true;
-    }
+    ApiDefs api_defs;
+    CHECK(tensorflow::protobuf::TextFormat::ParseFromString(file_contents,
+                                                            &api_defs))
+        << "Failed to load " << file_path;
+    CHECK_EQ(api_defs.op_size(), 1);
+    (*name_to_api_def)[api_defs.op(0).graph_op_name()] = api_defs.op(0);
   }
-  return false;
 }
 
-// Applies renames to args.
-void ApplyArgOverrides(
-    protobuf::RepeatedPtrField<ApiDef::Arg>* args,
-    const protobuf::RepeatedPtrField<OpGenOverride::Rename>& renames,
-    const protobuf::RepeatedPtrField<OpDef::ArgDef>& op_args,
-    const string& op_name) {
-  for (auto& rename : renames) {
-    // First check if rename is valid.
-    bool valid = false;
-    for (const auto& op_arg : op_args) {
-      if (op_arg.name() == rename.from()) {
-        valid = true;
-      }
-    }
-    QCHECK(valid) << rename.from() << " is not a valid argument for "
-                  << op_name;
-    bool found_arg = false;
-    // If Arg is already in ApiDef, just update it.
-    for (int i = 0; i < args->size(); ++i) {
-      auto* arg = args->Mutable(i);
-      if (arg->name() == rename.from()) {
-        arg->set_rename_to(rename.to());
-        found_arg = true;
-        break;
-      }
-    }
-    if (!found_arg) {  // not in ApiDef, add a new arg.
-      auto* new_arg = args->Add();
-      new_arg->set_name(rename.from());
-      new_arg->set_rename_to(rename.to());
-    }
+class ApiTest : public ::testing::Test {
+ protected:
+  ApiTest() {
+    OpRegistry::Global()->Export(false, &ops_);
+    const std::vector<string> multi_line_fields = {"description"};
+
+    Env* env = Env::Default();
+    GetGoldenApiDefs(env, kDefaultApiDefDir, &api_defs_map_);
   }
-  // We don't really need a specific order here right now.
-  // However, it is clearer if order follows OpDef.
-  std::sort(args->pointer_begin(), args->pointer_end(),
-            [&](ApiDef::Arg* arg1, ApiDef::Arg* arg2) {
-              return CheckArgBefore(arg1, arg2, op_args);
-            });
-}
+  OpList ops_;
+  std::unordered_map<string, ApiDef> api_defs_map_;
+};
 
-// Returns existing attribute with the given name if such
-// attribute exists. Otherwise, adds a new attribute and returns it.
-ApiDef::Attr* FindOrAddAttr(ApiDef* api_def, const string attr_name) {
-  // If Attr is already in ApiDef, just update it.
-  for (int i = 0; i < api_def->attr_size(); ++i) {
-    auto* attr = api_def->mutable_attr(i);
-    if (attr->name() == attr_name) {
-      return attr;
+// Check that all ops have an ApiDef.
+TEST_F(ApiTest, AllOpsAreInApiDef) {
+  auto* excluded_ops = GetExcludedOps();
+  for (const auto& op : ops_.op()) {
+    if (excluded_ops->find(op.name()) != excluded_ops->end()) {
+      continue;
     }
+    ASSERT_TRUE(api_defs_map_.find(op.name()) != api_defs_map_.end())
+        << op.name() << " op does not have api_def_*.pbtxt file. "
+        << "Please add api_def_" << op.name() << ".pbtxt file "
+        << "under tensorflow/core/api_def/base_api/ directory.";
   }
-  // Add a new Attr.
-  auto* new_attr = api_def->add_attr();
-  new_attr->set_name(attr_name);
-  return new_attr;
 }
 
-// Applies renames and default values to attributes.
-void ApplyAttrOverrides(ApiDef* api_def, const OpGenOverride& op_override,
-                        const OpDef& op_def) {
-  for (auto& attr_rename : op_override.attr_rename()) {
-    auto* attr = FindOrAddAttr(api_def, attr_rename.from());
-    attr->set_rename_to(attr_rename.to());
+// Check that ApiDefs have a corresponding op.
+TEST_F(ApiTest, AllApiDefsHaveCorrespondingOp) {
+  std::unordered_set<string> op_names;
+  for (const auto& op : ops_.op()) {
+    op_names.insert(op.name());
   }
-
-  for (auto& attr_default : op_override.attr_default()) {
-    auto* attr = FindOrAddAttr(api_def, attr_default.name());
-    *(attr->mutable_default_value()) = attr_default.value();
+  for (const auto& name_and_api_def : api_defs_map_) {
+    ASSERT_TRUE(op_names.find(name_and_api_def.first) != op_names.end())
+        << name_and_api_def.first << " op has ApiDef but missing from ops. "
+        << "Does api_def_" << name_and_api_def.first << " need to be deleted?";
   }
-  // We don't really need a specific order here right now.
-  // However, it is clearer if order follows OpDef.
-  std::sort(api_def->mutable_attr()->pointer_begin(),
-            api_def->mutable_attr()->pointer_end(),
-            [&](ApiDef::Attr* attr1, ApiDef::Attr* attr2) {
-              return CheckAttrBefore(attr1, attr2, op_def);
-            });
 }
 
-void ApplyOverridesToApiDef(ApiDef* api_def, const OpDef& op,
-                            const OpGenOverride& op_override) {
-  // Fill ApiDef with data based on op and op_override.
-  // Set visibility
-  if (op_override.skip()) {
-    api_def->set_visibility(ApiDef_Visibility_SKIP);
-  } else if (op_override.hide()) {
-    api_def->set_visibility(ApiDef_Visibility_HIDDEN);
-  }
-  // Add endpoints
-  if (!op_override.rename_to().empty()) {
-    api_def->add_endpoint()->set_name(op_override.rename_to());
-  } else if (!op_override.alias().empty()) {
-    api_def->add_endpoint()->set_name(op.name());
-  }
-
-  for (auto& alias : op_override.alias()) {
-    auto* endpoint = api_def->add_endpoint();
-    endpoint->set_name(alias);
-  }
-
-  ApplyArgOverrides(api_def->mutable_in_arg(), op_override.input_rename(),
-                    op.input_arg(), api_def->graph_op_name());
-  ApplyArgOverrides(api_def->mutable_out_arg(), op_override.output_rename(),
-                    op.output_arg(), api_def->graph_op_name());
-  ApplyAttrOverrides(api_def, op_override, op);
+string GetOpDefHasDocStringError(const string& op_name) {
+  return strings::Printf(
+      "OpDef for %s has a doc string. "
+      "Doc strings must be defined in ApiDef instead of OpDef. "
+      "Please, add summary and descriptions in api_def_%s"
+      ".pbtxt file instead",
+      op_name.c_str(), op_name.c_str());
 }
 
-// Get map from ApiDef file path to corresponding ApiDefs proto.
-std::unordered_map<string, ApiDefs> GenerateApiDef(
-    const string& api_def_dir, const OpList& ops,
-    const OpGenOverrides& overrides) {
-  std::unordered_map<string, OpGenOverride> name_to_override;
-  for (const auto& op_override : overrides.op()) {
-    name_to_override[op_override.name()] = op_override;
-  }
-
-  std::unordered_map<string, ApiDefs> api_defs_map;
-
-  // These ops are included in OpList only if TF_NEED_GCP
-  // is set to true. So, we skip them for now so that this test passes
-  // whether TF_NEED_GCP is set or not.
-  const std::unordered_set<string> ops_to_exclude = {
-      "BigQueryReader", "GenerateBigQueryReaderPartitions"};
-  for (const auto& op : ops.op()) {
-    CHECK(!op.name().empty())
-        << "Encountered empty op name: %s" << op.DebugString();
-    if (ops_to_exclude.find(op.name()) != ops_to_exclude.end()) {
-      LOG(INFO) << "Skipping " << op.name();
+// Check that OpDef's do not have descriptions and summaries.
+// Descriptions and summaries must be in corresponding ApiDefs.
+TEST_F(ApiTest, OpDefsShouldNotHaveDocs) {
+  auto* excluded_ops = GetExcludedOps();
+  for (const auto& op : ops_.op()) {
+    if (excluded_ops->find(op.name()) != excluded_ops->end()) {
       continue;
     }
-    string file_path = io::JoinPath(api_def_dir, kApiDefFileFormat);
-    file_path = strings::Printf(file_path.c_str(), op.name().c_str());
-    ApiDef* api_def = api_defs_map[file_path].add_op();
-    FillBaseApiDef(api_def, op);
-
-    if (name_to_override.find(op.name()) != name_to_override.end()) {
-      ApplyOverridesToApiDef(api_def, op, name_to_override[op.name()]);
+    ASSERT_TRUE(op.summary().empty()) << GetOpDefHasDocStringError(op.name());
+    ASSERT_TRUE(op.description().empty())
+        << GetOpDefHasDocStringError(op.name());
+    for (const auto& arg : op.input_arg()) {
+      ASSERT_TRUE(arg.description().empty())
+          << GetOpDefHasDocStringError(op.name());
+    }
+    for (const auto& arg : op.output_arg()) {
+      ASSERT_TRUE(arg.description().empty())
+          << GetOpDefHasDocStringError(op.name());
+    }
+    for (const auto& attr : op.attr()) {
+      ASSERT_TRUE(attr.description().empty())
+          << GetOpDefHasDocStringError(op.name());
     }
   }
-  return api_defs_map;
 }
 
-// Reads golden ApiDef files and returns a map from file name to ApiDef file
-// contents.
-std::unordered_map<string, string> GetGoldenApiDefs(
-    Env* env, const string& api_files_dir) {
-  std::vector<string> matching_paths;
-  TF_CHECK_OK(env->GetMatchingPaths(
-      io::JoinPath(api_files_dir, kApiDefFilePattern), &matching_paths));
-
-  std::unordered_map<string, string> file_path_to_api_def;
-  for (auto& file_path : matching_paths) {
-    string file_contents;
-    TF_CHECK_OK(ReadFileToString(env, file_path, &file_contents));
-    file_path_to_api_def[file_path] = file_contents;
+// Checks that input arg names in an ApiDef match input
+// arg names in corresponding OpDef.
+TEST_F(ApiTest, AllApiDefInputArgsAreValid) {
+  for (const auto& op : ops_.op()) {
+    const auto& api_def = api_defs_map_[op.name()];
+    for (const auto& api_def_arg : api_def.in_arg()) {
+      bool found_arg = false;
+      for (const auto& op_arg : op.input_arg()) {
+        if (api_def_arg.name() == op_arg.name()) {
+          found_arg = true;
+          break;
+        }
+      }
+      ASSERT_TRUE(found_arg)
+          << "Input argument " << api_def_arg.name()
+          << " (overwritten in api_def_" << op.name()
+          << ".pbtxt) is not defined in OpDef for " << op.name();
+    }
   }
-  return file_path_to_api_def;
 }
 
-void RunApiTest(bool update_api_def, const string& api_files_dir) {
-  // Read C++ overrides file
-  OpGenOverrides overrides;
-  Env* env = Env::Default();
-  TF_EXPECT_OK(ReadTextProto(env, kOverridesFilePath, &overrides));
-
-  // Read all ops
-  OpList ops;
-  OpRegistry::Global()->Export(false, &ops);
-  const std::vector<string> multi_line_fields = {"description"};
-
-  // Get expected ApiDefs
-  const auto new_api_defs_map = GenerateApiDef(api_files_dir, ops, overrides);
-
-  bool updated_at_least_one_file = false;
-  const auto golden_api_defs_map = GetGoldenApiDefs(env, api_files_dir);
-
-  for (auto new_api_entry : new_api_defs_map) {
-    const auto& file_path = new_api_entry.first;
-    std::string golden_api_defs_str = "";
-    if (golden_api_defs_map.find(file_path) != golden_api_defs_map.end()) {
-      golden_api_defs_str = golden_api_defs_map.at(file_path);
-    }
-    string new_api_defs_str = new_api_entry.second.DebugString();
-    new_api_defs_str = PBTxtToMultiline(new_api_defs_str, multi_line_fields);
-    if (golden_api_defs_str == new_api_defs_str) {
-      continue;
-    }
-    if (update_api_def) {
-      std::cout << "Updating " << file_path << "..." << std::endl;
-      TF_EXPECT_OK(WriteStringToFile(env, file_path, new_api_defs_str));
-      updated_at_least_one_file = true;
-    } else {
-      EXPECT_EQ(golden_api_defs_str, new_api_defs_str)
-          << "To update golden API files, run "
-          << "tensorflow/core/api_def/update_api_def.sh.";
+// Checks that output arg names in an ApiDef match output
+// arg names in corresponding OpDef.
+TEST_F(ApiTest, AllApiDefOutputArgsAreValid) {
+  for (const auto& op : ops_.op()) {
+    const auto& api_def = api_defs_map_[op.name()];
+    for (const auto& api_def_arg : api_def.out_arg()) {
+      bool found_arg = false;
+      for (const auto& op_arg : op.output_arg()) {
+        if (api_def_arg.name() == op_arg.name()) {
+          found_arg = true;
+          break;
+        }
+      }
+      ASSERT_TRUE(found_arg)
+          << "Output argument " << api_def_arg.name()
+          << " (overwritten in api_def_" << op.name()
+          << ".pbtxt) is not defined in OpDef for " << op.name();
     }
   }
+}
 
-  for (const auto& golden_api_entry : golden_api_defs_map) {
-    const auto& file_path = golden_api_entry.first;
-    if (new_api_defs_map.find(file_path) == new_api_defs_map.end()) {
-      if (update_api_def) {
-        std::cout << "Deleting " << file_path << "..." << std::endl;
-        TF_EXPECT_OK(env->DeleteFile(file_path));
-        updated_at_least_one_file = true;
-      } else {
-        EXPECT_EQ("", golden_api_entry.second)
-            << "To update golden API files, run "
-            << "tensorflow/core/api_def/update_api_def.sh.";
+// Checks that attribute names in an ApiDef match attribute
+// names in corresponding OpDef.
+TEST_F(ApiTest, AllApiDefAttributeNamesAreValid) {
+  for (const auto& op : ops_.op()) {
+    const auto& api_def = api_defs_map_[op.name()];
+    for (const auto& api_def_attr : api_def.attr()) {
+      bool found_attr = false;
+      for (const auto& op_attr : op.attr()) {
+        if (api_def_attr.name() == op_attr.name()) {
+          found_attr = true;
+        }
       }
+      ASSERT_TRUE(found_attr)
+          << "Attribute " << api_def_attr.name() << " (overwritten in api_def_"
+          << op.name() << ".pbtxt) is not defined in OpDef for " << op.name();
     }
   }
-
-  if (update_api_def && !updated_at_least_one_file) {
-    std::cout << "Api def files are already up to date." << std::endl;
-  }
 }
-
-TEST(ApiTest, GenerateBaseAPIDef) { RunApiTest(false, kDefaultApiDefDir); }
-}  // namespace
 }  // namespace tensorflow
-
-int main(int argc, char** argv) {
-  bool update_api_def = false;
-  tensorflow::string api_files_dir = tensorflow::kDefaultApiDefDir;
-  std::vector<tensorflow::Flag> flag_list = {
-      tensorflow::Flag(
-          "update_api_def", &update_api_def,
-          "Whether to update tensorflow/core/api_def/base_api/api_def*.pbtxt "
-          "files if they differ from expected API."),
-      tensorflow::Flag("api_def_dir", &api_files_dir,
-                       "Base directory of api_def*.pbtxt files.")};
-  std::string usage = tensorflow::Flags::Usage(argv[0], flag_list);
-  bool parsed_values_ok = tensorflow::Flags::Parse(&argc, argv, flag_list);
-  if (!parsed_values_ok) {
-    std::cerr << usage << std::endl;
-    return 2;
-  }
-  if (update_api_def) {
-    tensorflow::port::InitMain(argv[0], &argc, &argv);
-    tensorflow::RunApiTest(update_api_def, api_files_dir);
-    return 0;
-  }
-  testing::InitGoogleTest(&argc, argv);
-  // Run tests
-  return RUN_ALL_TESTS();
-}
diff --git a/tensorflow/core/api_def/base_api/api_def_Conv2D.pbtxt b/tensorflow/core/api_def/base_api/api_def_Conv2D.pbtxt
index 6522ce976f2b507c4c66d4d3709427b5fa8222e9..070d6adb978e4a62e7209f299dba08515aa21e83 100644
--- a/tensorflow/core/api_def/base_api/api_def_Conv2D.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_Conv2D.pbtxt
@@ -26,7 +26,7 @@ END
     description: <<END
 1-D tensor of length 4.  The stride of the sliding window for each
 dimension of `input`. The dimension order is determined by the value of
-  `data_format`, see below for details.
+`data_format`, see below for details.
 END
   }
   attr {
@@ -43,6 +43,16 @@ default format "NHWC", the data is stored in the order of:
     [batch, height, width, channels].
 Alternatively, the format could be "NCHW", the data storage order of:
     [batch, channels, height, width].
+END
+  }
+  attr {
+    name: "dilations"
+    description: <<END
+1-D tensor of length 4.  The dilation factor for each dimension of
+`input`. If set to k > 1, there will be k-1 skipped cells between each
+filter element on that dimension. The dimension order is determined by the
+value of `data_format`, see above for details. Dilations in the batch and
+depth dimensions must be 1.
 END
   }
   summary: "Computes a 2-D convolution given 4-D `input` and `filter` tensors."
diff --git a/tensorflow/core/api_def/base_api/api_def_Conv2DBackpropFilter.pbtxt b/tensorflow/core/api_def/base_api/api_def_Conv2DBackpropFilter.pbtxt
index 4ea3374dbbc8c690143a3a7a5fb9e67aca5bf1b0..ff2d9d71db646a27a88763f79bb6beb6b5ede44b 100644
--- a/tensorflow/core/api_def/base_api/api_def_Conv2DBackpropFilter.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_Conv2DBackpropFilter.pbtxt
@@ -51,6 +51,16 @@ default format "NHWC", the data is stored in the order of:
     [batch, in_height, in_width, in_channels].
 Alternatively, the format could be "NCHW", the data storage order of:
     [batch, in_channels, in_height, in_width].
+END
+  }
+  attr {
+    name: "dilations"
+    description: <<END
+1-D tensor of length 4.  The dilation factor for each dimension of
+`input`. If set to k > 1, there will be k-1 skipped cells between each filter
+element on that dimension. The dimension order is determined by the value of
+`data_format`, see above for details. Dilations in the batch and depth
+dimensions must be 1.
 END
   }
   summary: "Computes the gradients of convolution with respect to the filter."
diff --git a/tensorflow/core/api_def/base_api/api_def_Conv2DBackpropInput.pbtxt b/tensorflow/core/api_def/base_api/api_def_Conv2DBackpropInput.pbtxt
index 4420073e384c1c24d3109b8c6c4cadb59e9ed9d0..2de38b4263a380b5d0aec45270b9b67347c7021d 100644
--- a/tensorflow/core/api_def/base_api/api_def_Conv2DBackpropInput.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_Conv2DBackpropInput.pbtxt
@@ -50,6 +50,16 @@ default format "NHWC", the data is stored in the order of:
     [batch, in_height, in_width, in_channels].
 Alternatively, the format could be "NCHW", the data storage order of:
     [batch, in_channels, in_height, in_width].
+END
+  }
+  attr {
+    name: "dilations"
+    description: <<END
+1-D tensor of length 4.  The dilation factor for each dimension of
+`input`. If set to k > 1, there will be k-1 skipped cells between each filter
+element on that dimension. The dimension order is determined by the value of
+`data_format`, see above for details. Dilations in the batch and depth
+dimensions must be 1.
 END
   }
   summary: "Computes the gradients of convolution with respect to the input."
diff --git a/tensorflow/core/api_def/base_api/api_def_Conv3D.pbtxt b/tensorflow/core/api_def/base_api/api_def_Conv3D.pbtxt
index 8f3cd4493c7af152c7a4eab78d1f96e02e325bbc..d26564097e976013fbb7f026c6a403cf6bd808e0 100644
--- a/tensorflow/core/api_def/base_api/api_def_Conv3D.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_Conv3D.pbtxt
@@ -34,6 +34,16 @@ default format "NDHWC", the data is stored in the order of:
     [batch, in_depth, in_height, in_width, in_channels].
 Alternatively, the format could be "NCDHW", the data storage order is:
     [batch, in_channels, in_depth, in_height, in_width].
+END
+  }
+  attr {
+    name: "dilations"
+    description: <<END
+1-D tensor of length 5.  The dilation factor for each dimension of
+`input`. If set to k > 1, there will be k-1 skipped cells between each
+filter element on that dimension. The dimension order is determined by the
+value of `data_format`, see above for details. Dilations in the batch and
+depth dimensions must be 1.
 END
   }
   summary: "Computes a 3-D convolution given 5-D `input` and `filter` tensors."
diff --git a/tensorflow/core/api_def/base_api/api_def_Conv3DBackpropFilterV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_Conv3DBackpropFilterV2.pbtxt
index 6f9b917237b5748ac91c0a3bfbe35a21954dfd9d..937c9c8eadaaeceaadc180ad44f35a12ba9a2dfb 100644
--- a/tensorflow/core/api_def/base_api/api_def_Conv3DBackpropFilterV2.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_Conv3DBackpropFilterV2.pbtxt
@@ -43,6 +43,16 @@ default format "NDHWC", the data is stored in the order of:
     [batch, in_depth, in_height, in_width, in_channels].
 Alternatively, the format could be "NCDHW", the data storage order is:
     [batch, in_channels, in_depth, in_height, in_width].
+END
+  }
+  attr {
+    name: "dilations"
+    description: <<END
+1-D tensor of length 5.  The dilation factor for each dimension of
+`input`. If set to k > 1, there will be k-1 skipped cells between each
+filter element on that dimension. The dimension order is determined by the
+value of `data_format`, see above for details. Dilations in the batch and
+depth dimensions must be 1.
 END
   }
   summary: "Computes the gradients of 3-D convolution with respect to the filter."
diff --git a/tensorflow/core/api_def/base_api/api_def_Conv3DBackpropInputV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_Conv3DBackpropInputV2.pbtxt
index 19aba156d5907eb79d1438c16f866dfbd99ed548..414e418dc5a91e55f22dc5eec93d16fabad3d8fb 100644
--- a/tensorflow/core/api_def/base_api/api_def_Conv3DBackpropInputV2.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_Conv3DBackpropInputV2.pbtxt
@@ -43,6 +43,16 @@ default format "NDHWC", the data is stored in the order of:
     [batch, in_depth, in_height, in_width, in_channels].
 Alternatively, the format could be "NCDHW", the data storage order is:
     [batch, in_channels, in_depth, in_height, in_width].
+END
+  }
+  attr {
+    name: "dilations"
+    description: <<END
+1-D tensor of length 5.  The dilation factor for each dimension of
+`input`. If set to k > 1, there will be k-1 skipped cells between each
+filter element on that dimension. The dimension order is determined by the
+value of `data_format`, see above for details. Dilations in the batch and
+depth dimensions must be 1.
 END
   }
   summary: "Computes the gradients of 3-D convolution with respect to the input."
diff --git a/tensorflow/core/api_def/base_api/api_def_CriticalSectionOp.pbtxt b/tensorflow/core/api_def/base_api/api_def_CriticalSectionOp.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..5027fa861e7d8914b1e8ae06cd1ffa2ed06b6ad2
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_CriticalSectionOp.pbtxt
@@ -0,0 +1,16 @@
+op {
+  graph_op_name: "CriticalSectionOp"
+  attr {
+    name: "container"
+    description: <<END
+the container this critical section is placed in.
+END
+  }
+  attr {
+    name: "shared_name"
+    description: <<END
+the name by which this critical section is referred to.
+END
+  }
+  summary: "Creates a handle to a CriticalSection resource."
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_DataFormatDimMap.pbtxt b/tensorflow/core/api_def/base_api/api_def_DataFormatDimMap.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..994d3b8ddb6e44804b8d64a76bef7c9136f943f4
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_DataFormatDimMap.pbtxt
@@ -0,0 +1,32 @@
+op {
+  graph_op_name: "DataFormatDimMap"
+  in_arg {
+    name: "x"
+    description: <<END
+A Tensor with each element as a dimension index in source data format.
+Must be in the range [-4, 4).
+END
+  }
+  out_arg {
+    name: "y"
+    description: <<END
+A Tensor with each element as a dimension index in destination data format.
+END
+  }
+  attr {
+    name: "src_format"
+    description: <<END
+source data format.
+END
+  }
+  attr {
+    name: "dst_format"
+    description: <<END
+destination data format.
+END
+  }
+  summary: "Returns the dimension index in the destination data format given the one in"
+  description: <<END
+the source data format.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_DataFormatVecPermute.pbtxt b/tensorflow/core/api_def/base_api/api_def_DataFormatVecPermute.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..d87c088899e26bdd8a86f41c07681fa5aa49a07a
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_DataFormatVecPermute.pbtxt
@@ -0,0 +1,31 @@
+op {
+  graph_op_name: "DataFormatVecPermute"
+  in_arg {
+    name: "x"
+    description: <<END
+Vector of size 4 or Tensor of shape (4, 2) in source data format.
+END
+  }
+  out_arg {
+    name: "y"
+    description: <<END
+Vector of size 4 or Tensor of shape (4, 2) in destination data format.
+END
+  }
+  attr {
+    name: "src_format"
+    description: <<END
+source data format.
+END
+  }
+  attr {
+    name: "dst_format"
+    description: <<END
+destination data format.
+END
+  }
+  summary: "Returns the permuted vector/tensor in the destination data format given the"
+  description: <<END
+one in the source data format.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_DebugGradientIdentity.pbtxt b/tensorflow/core/api_def/base_api/api_def_DebugGradientIdentity.pbtxt
index 38fd6877e9d26e7ab86a4e7f95352a4a39efb7c2..6f932eb80cd969d345bd22514491643f28a92536 100644
--- a/tensorflow/core/api_def/base_api/api_def_DebugGradientIdentity.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_DebugGradientIdentity.pbtxt
@@ -4,5 +4,6 @@ op {
   description: <<END
 This op is hidden from public in Python. It is used by TensorFlow Debugger to
 register gradient tensors for gradient debugging.
+This op operates on non-reference-type tensors.
 END
 }
diff --git a/tensorflow/core/api_def/base_api/api_def_DebugGradientRefIdentity.pbtxt b/tensorflow/core/api_def/base_api/api_def_DebugGradientRefIdentity.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..e4d23fe6506f6df7881f41c858b0b6b40f049201
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_DebugGradientRefIdentity.pbtxt
@@ -0,0 +1,9 @@
+op {
+  graph_op_name: "DebugGradientRefIdentity"
+  summary: "Identity op for gradient debugging."
+  description: <<END
+This op is hidden from public in Python. It is used by TensorFlow Debugger to
+register gradient tensors for gradient debugging.
+This op operates on reference-type tensors.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_DecodeCompressed.pbtxt b/tensorflow/core/api_def/base_api/api_def_DecodeCompressed.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..9babd822938dce8609a91816bcfb3988dd6a06d4
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_DecodeCompressed.pbtxt
@@ -0,0 +1,32 @@
+op {
+  graph_op_name: "DecodeCompressed"
+  in_arg {
+    name: "bytes"
+    description: <<END
+A Tensor of string which is compressed.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+A Tensor with the same shape as input `bytes`, uncompressed
+from bytes.
+END
+  }
+  attr {
+    name: "compression_type"
+    description: <<END
+A scalar containing either (i) the empty string (no
+compression), (ii) "ZLIB", or (iii) "GZIP".
+END
+  }
+  summary: "Decompress strings."
+  description: <<END
+This op decompresses each element of the `bytes` input `Tensor`, which
+is assumed to be compressed using the given `compression_type`.
+
+The `output` is a string `Tensor` of the same shape as `bytes`,
+each element containing the decompressed data from the corresponding
+element in `bytes`.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_DenseToSparseBatchDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_DenseToSparseBatchDataset.pbtxt
index f2f5594c7c16b20ef934539b96bc78d324c1542d..e275cfdd3de5de36979967b1d85d1ae9cd0582a8 100644
--- a/tensorflow/core/api_def/base_api/api_def_DenseToSparseBatchDataset.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_DenseToSparseBatchDataset.pbtxt
@@ -21,5 +21,5 @@ SparseTensor. The shape may be partially specified, using `-1` to indicate
 that a particular dimension should use the maximum size of all batch elements.
 END
   }
-  summary: "Creates a dataset that yields a SparseTensor for each element of the input."
+  summary: "Creates a dataset that batches input elements into a SparseTensor."
 }
diff --git a/tensorflow/core/api_def/base_api/api_def_DepthToSpace.pbtxt b/tensorflow/core/api_def/base_api/api_def_DepthToSpace.pbtxt
index e7a18cd6b474d34bcc839f51fd13218c76c61294..d20b47a3ed50f9a8bb65f0cd6c332d03172e6bd0 100644
--- a/tensorflow/core/api_def/base_api/api_def_DepthToSpace.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_DepthToSpace.pbtxt
@@ -28,7 +28,7 @@ with the following options:
   "NHWC": `[ batch, height, width, channels ]`
   "NCHW": `[ batch, channels, height, width ]`
   "NCHW_VECT_C":
-      `qint8 [ batch, channels / 4, height, width, channels % 4 ]`
+      `qint8 [ batch, channels / 4, height, width, 4 ]`
 
 It is useful to consider the operation as transforming a 6-D Tensor.
 e.g. for data_format = NHWC,
diff --git a/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNative.pbtxt b/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNative.pbtxt
index cc10ebe923870426bc9076ca6c96f0497bce1d51..3c313f7be6b38317ab7721a0d494fec42bdb52f4 100644
--- a/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNative.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNative.pbtxt
@@ -21,6 +21,16 @@ default format "NHWC", the data is stored in the order of:
     [batch, height, width, channels].
 Alternatively, the format could be "NCHW", the data storage order of:
     [batch, channels, height, width].
+END
+  }
+  attr {
+    name: "dilations"
+    description: <<END
+1-D tensor of length 4.  The dilation factor for each dimension of
+`input`. If set to k > 1, there will be k-1 skipped cells between each filter
+element on that dimension. The dimension order is determined by the value of
+`data_format`, see above for details. Dilations in the batch and depth
+dimensions must be 1.
 END
   }
   summary: "Computes a 2-D depthwise convolution given 4-D `input` and `filter` tensors."
diff --git a/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNativeBackpropFilter.pbtxt b/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNativeBackpropFilter.pbtxt
index 9126be2afa9bafb3372cfe38fe43f73239e86c72..e66aa3b70707c2216ff5195b9d2dda407c50ec74 100644
--- a/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNativeBackpropFilter.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNativeBackpropFilter.pbtxt
@@ -54,6 +54,16 @@ default format "NHWC", the data is stored in the order of:
     [batch, height, width, channels].
 Alternatively, the format could be "NCHW", the data storage order of:
     [batch, channels, height, width].
+END
+  }
+  attr {
+    name: "dilations"
+    description: <<END
+1-D tensor of length 4.  The dilation factor for each dimension of
+`input`. If set to k > 1, there will be k-1 skipped cells between each filter
+element on that dimension. The dimension order is determined by the value of
+`data_format`, see above for details. Dilations in the batch and depth
+dimensions must be 1.
 END
   }
   summary: "Computes the gradients of depthwise convolution with respect to the filter."
diff --git a/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNativeBackpropInput.pbtxt b/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNativeBackpropInput.pbtxt
index f1d16858dbf17e2974f6f1487857b63a40c99b91..f501ad21b35b6ad8d3ee16650919b1ff897cdccb 100644
--- a/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNativeBackpropInput.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_DepthwiseConv2dNativeBackpropInput.pbtxt
@@ -54,6 +54,16 @@ default format "NHWC", the data is stored in the order of:
     [batch, height, width, channels].
 Alternatively, the format could be "NCHW", the data storage order of:
     [batch, channels, height, width].
+END
+  }
+  attr {
+    name: "dilations"
+    description: <<END
+1-D tensor of length 4.  The dilation factor for each dimension of
+`input`. If set to k > 1, there will be k-1 skipped cells between each filter
+element on that dimension. The dimension order is determined by the value of
+`data_format`, see above for details. Dilations in the batch and depth
+dimensions must be 1.
 END
   }
   summary: "Computes the gradients of depthwise convolution with respect to the input."
diff --git a/tensorflow/core/api_def/base_api/api_def_DeserializeSparse.pbtxt b/tensorflow/core/api_def/base_api/api_def_DeserializeSparse.pbtxt
index 00e96c8a15b3529b13cb6eecfecd5e1551f390f2..dfaa531cbcc8adf46e5c6c57164fa7f674cda18d 100644
--- a/tensorflow/core/api_def/base_api/api_def_DeserializeSparse.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_DeserializeSparse.pbtxt
@@ -14,4 +14,47 @@ The `dtype` of the serialized `SparseTensor` objects.
 END
   }
   summary: "Deserialize `SparseTensor` objects."
+  description: <<END
+The input `serialized_sparse` must have the shape `[?, ?, ..., ?, 3]` where
+the last dimension stores serialized `SparseTensor` objects and the other N
+dimensions (N >= 0) correspond to a batch. The ranks of the original
+`SparseTensor` objects must all match. When the final `SparseTensor` is
+created, its rank is the rank of the incoming `SparseTensor` objects plus N;
+the sparse tensors have been concatenated along new dimensions, one for each
+batch.
+
+The output `SparseTensor` object's shape values for the original dimensions
+are the max across the input `SparseTensor` objects' shape values for the
+corresponding dimensions. The new dimensions match the size of the batch.
+
+The input `SparseTensor` objects' indices are assumed ordered in
+standard lexicographic order.  If this is not the case, after this
+step run `SparseReorder` to restore index ordering.
+
+For example, if the serialized input is a `[2 x 3]` matrix representing two
+original `SparseTensor` objects:
+
+    index = [ 0]
+            [10]
+            [20]
+    values = [1, 2, 3]
+    shape = [50]
+
+and
+
+    index = [ 2]
+            [10]
+    values = [4, 5]
+    shape = [30]
+
+then the final deserialized `SparseTensor` will be:
+
+    index = [0  0]
+            [0 10]
+            [0 20]
+            [1  2]
+            [1 10]
+    values = [1, 2, 3, 4, 5]
+    shape = [2 50]
+END
 }
diff --git a/tensorflow/core/api_def/base_api/api_def_EagerPyFunc.pbtxt b/tensorflow/core/api_def/base_api/api_def_EagerPyFunc.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..9231368e1654d6bb710a128e076e93005f31116d
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_EagerPyFunc.pbtxt
@@ -0,0 +1,8 @@
+op {
+  graph_op_name: "EagerPyFunc"
+  summary: "Eagerly executes a python function to compute func(input)->output. The"
+  description: <<END
+semantics of the input, output, and attributes are the same as those for
+PyFunc.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_EmptyTensorList.pbtxt b/tensorflow/core/api_def/base_api/api_def_EmptyTensorList.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..2c2ad003d01770c3fe823a40555ef3548ce318e8
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_EmptyTensorList.pbtxt
@@ -0,0 +1,12 @@
+op {
+  graph_op_name: "EmptyTensorList"
+  summary: "Creates and returns an empty tensor list."
+  description: <<END
+All list elements must be tensors of dtype element_dtype and shape compatible
+with element_shape.
+
+handle: an empty tensor list.
+element_dtype: the type of elements in the list.
+element_shape: a shape compatible with that of elements in the list.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_ExecuteInCriticalSection.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExecuteInCriticalSection.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..cd5fc84a74faa209262da0402c546bcc3b4256fe
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ExecuteInCriticalSection.pbtxt
@@ -0,0 +1,49 @@
+op {
+  graph_op_name: "ExecuteInCriticalSection"
+  in_arg {
+    name: "critical_section"
+    description: <<END
+The handle of the `critical_section`.
+END
+  }
+  in_arg {
+    name: "arguments"
+    description: <<END
+Arguments for `f`, including any captured inputs appended at the end.
+END
+  }
+  out_arg {
+    name: "outputs"
+    description: <<END
+The outputs of `f`.
+END
+  }
+  attr {
+    name: "f"
+    description: <<END
+The `Function` to execute.
+END
+  }
+  summary: "Executes function `f` within critical section `critical_section`."
+  description: <<END
+While `f` is running in `critical_section`, no other functions which wish to
+use this critical section may run.
+
+Often the use case is that two executions of the same graph, in parallel,
+wish to run `f`; and we wish to ensure that only one of them executes
+at a time.  This is especially important if `f` modifies one or more
+variables at a time.
+
+It is also useful if two separate functions must share a resource, but we
+wish to ensure the usage is exclusive.
+
+The signature of `f` is expected to be:
+
+```
+  outputs <- F(arguments)
+```
+Typically, but this is not required, `arguments` contain resources.  The
+primary purpose of this op is to limit access to these resources to one
+execution of `F` at a time.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt b/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt
index c7f8b6c21ba9fd85ee20c259425b04a8d4aade75..6cd76ff340efeb970e95aefe6544a1e52a9931a0 100644
--- a/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt
@@ -43,6 +43,10 @@ of `params`.  The output tensor has shape
 
     indices.shape[:-1] + params.shape[indices.shape[-1]:]
 
+Note that on CPU, if an out of bound index is found, an error is returned.
+On GPU, if an out of bound index is found, a 0 is stored in the
+corresponding output value.
+
 Some examples below.
 
 Simple indexing into a matrix:
diff --git a/tensorflow/core/api_def/base_api/api_def_GatherV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_GatherV2.pbtxt
index c020176a3b41b257b54601aecab0d47d36849c81..162ef2b033ef9e789251d4e1a04844bae6aeac46 100644
--- a/tensorflow/core/api_def/base_api/api_def_GatherV2.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_GatherV2.pbtxt
@@ -50,5 +50,9 @@ params.shape[axis + 1:]` where:
 <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
 <img style="width:100%" src="https://www.tensorflow.org/images/Gather.png" alt>
 </div>
+
+Note that on CPU, if an out of bound index is found, an error is returned.
+On GPU, if an out of bound index is found, a 0 is stored in the
+corresponding output value.
 END
 }
diff --git a/tensorflow/core/api_def/base_api/api_def_GuaranteeConst.pbtxt b/tensorflow/core/api_def/base_api/api_def_GuaranteeConst.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..b2a2e1aaef84f8c978f8c9312cc52b9bdcd35ca8
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_GuaranteeConst.pbtxt
@@ -0,0 +1,12 @@
+op {
+  graph_op_name: "GuaranteeConst"
+  summary: "Gives a guarantee to the TF runtime that the input tensor is a constant."
+  description: <<END
+The runtime is then free to make optimizations based on this.
+
+Only accepts value typed tensors as inputs and rejects resource variable handles
+as input.
+
+Returns the input tensor without modification.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_MatrixLogarithm.pbtxt b/tensorflow/core/api_def/base_api/api_def_MatrixLogarithm.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..a6c4d0d4008f368cd07bfcaafd0b3266a1f6207b
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_MatrixLogarithm.pbtxt
@@ -0,0 +1,38 @@
+op {
+  graph_op_name: "MatrixLogarithm"
+  visibility: HIDDEN
+  in_arg {
+    name: "input"
+    description: <<END
+Shape is `[..., M, M]`.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+Shape is `[..., M, M]`.
+
+@compatibility(scipy)
+Equivalent to scipy.linalg.logm
+@end_compatibility
+END
+  }
+  summary: "Computes the matrix logarithm of one or more square matrices:"
+  description: <<END
+
+log(exp(A)) = A
+
+This op is only defined for complex matrices. If A is positive-definite and
+real, then casting to a complex matrix, taking the logarithm and casting back
+to a real matrix will give the correct result.
+
+This function computes the matrix logarithm using the Schur-Parlett algorithm.
+Details of the algorithm can be found in Section 11.6.2 of:
+Nicholas J. Higham, Functions of Matrices: Theory and Computation, SIAM 2008.
+ISBN 978-0-898716-46-7.
+
+The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+form square matrices. The output is a tensor of the same shape as the input
+containing the exponential for all input submatrices `[..., :, :]`.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_ParseSingleExample.pbtxt b/tensorflow/core/api_def/base_api/api_def_ParseSingleExample.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..476c01d0add5df174ead50c9ebfc7b86cfc6aed9
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ParseSingleExample.pbtxt
@@ -0,0 +1,78 @@
+op {
+  graph_op_name: "ParseSingleExample"
+  in_arg {
+    name: "serialized"
+    description: <<END
+A vector containing a batch of binary serialized Example protos.
+END
+  }
+  in_arg {
+    name: "dense_defaults"
+    description: <<END
+A list of Tensors (some may be empty), whose length matches
+the length of `dense_keys`. dense_defaults[j] provides default values
+when the example's feature_map lacks dense_key[j].  If an empty Tensor is
+provided for dense_defaults[j], then the Feature dense_keys[j] is required.
+The input type is inferred from dense_defaults[j], even when it's empty.
+If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined,
+then the shape of dense_defaults[j] must match that of dense_shapes[j].
+If dense_shapes[j] has an undefined major dimension (variable strides dense
+feature), dense_defaults[j] must contain a single element:
+the padding element.
+END
+  }
+  attr {
+    name: "num_sparse"
+    description: <<END
+The number of sparse features to be parsed from the example. This
+must match the lengths of `sparse_keys` and `sparse_types`.
+END
+  }
+  attr {
+    name: "sparse_keys"
+    description: <<END
+A list of `num_sparse` strings.
+The keys expected in the Examples' features associated with sparse values.
+END
+  }
+  attr {
+    name: "dense_keys"
+    description: <<END
+The keys expected in the Examples' features associated with dense
+values.
+END
+  }
+  attr {
+    name: "sparse_types"
+    description: <<END
+A list of `num_sparse` types; the data types of data in each
+Feature given in sparse_keys.
+Currently the ParseSingleExample op supports DT_FLOAT (FloatList),
+DT_INT64 (Int64List), and DT_STRING (BytesList).
+END
+  }
+  attr {
+    name: "Tdense"
+    description: <<END
+The data types of data in each Feature given in dense_keys.
+The length of this list must match the length of `dense_keys`.
+Currently the ParseSingleExample op supports DT_FLOAT (FloatList),
+DT_INT64 (Int64List), and DT_STRING (BytesList).
+END
+  }
+  attr {
+    name: "dense_shapes"
+    description: <<END
+The shapes of data in each Feature given in dense_keys.
+The length of this list must match the length of `dense_keys`.  The
+number of elements in the Feature corresponding to dense_key[j] must
+always equal dense_shapes[j].NumEntries().  If dense_shapes[j] ==
+(D0, D1, ..., DN) then the shape of output Tensor dense_values[j]
+will be (D0, D1, ..., DN): In the case dense_shapes[j] = (-1, D1,
+..., DN), the shape of the output Tensor dense_values[j] will be (M,
+D1, .., DN), where M is the number of blocks of elements of length
+D1 * .... * DN, in the input.
+END
+  }
+  summary: "Transforms a tf.Example proto (as a string) into typed tensors."
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizedConv2D.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2D.pbtxt
index b19bbeab12db322064dcbf31779ce01adffadeb9..d18bafdce9b3aaccfae6eff0c489e133b492f26d 100644
--- a/tensorflow/core/api_def/base_api/api_def_QuantizedConv2D.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2D.pbtxt
@@ -53,6 +53,16 @@ END
     name: "padding"
     description: <<END
 The type of padding algorithm to use.
+END
+  }
+  attr {
+    name: "dilations"
+    description: <<END
+1-D tensor of length 4.  The dilation factor for each dimension of
+`input`. If set to k > 1, there will be k-1 skipped cells between each
+filter element on that dimension. The dimension order is determined by the
+value of `data_format`, see above for details. Dilations in the batch and
+depth dimensions must be 1.
 END
   }
   summary: "Computes a 2D convolution given quantized 4D input and filter tensors."
diff --git a/tensorflow/core/api_def/base_api/api_def_RandomDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_RandomDataset.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..0466b40f85eb118c94404e2f0d7670392bc7afdf
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RandomDataset.pbtxt
@@ -0,0 +1,18 @@
+op {
+  graph_op_name: "RandomDataset"
+  in_arg {
+    name: "seed"
+    description: <<END
+A scalar seed for the random number generator. If either seed or
+seed2 is set to be non-zero, the random number generator is seeded
+by the given seed.  Otherwise, a random seed is used.
+END
+  }
+  in_arg {
+    name: "seed2"
+    description: <<END
+A second scalar seed to avoid seed collision.
+END
+  }
+  summary: "Creates a Dataset that returns pseudorandom numbers."
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_RecordInput.pbtxt b/tensorflow/core/api_def/base_api/api_def_RecordInput.pbtxt
index 7efc8cd8334e80be3b1cc8ba5b50c2259931b1b6..333144d76e3f78204a8e35cbbf195871bbed3aef 100644
--- a/tensorflow/core/api_def/base_api/api_def_RecordInput.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_RecordInput.pbtxt
@@ -41,6 +41,13 @@ END
     name: "batch_size"
     description: <<END
 The batch size.
+END
+  }
+  attr {
+    name: "compression_type"
+    description: <<END
+The type of compression for the file. Currently ZLIB and
+GZIP are supported. Defaults to none.
 END
   }
   summary: "Emits randomized records."
diff --git a/tensorflow/core/api_def/base_api/api_def_ResourceScatterNdUpdate.pbtxt b/tensorflow/core/api_def/base_api/api_def_ResourceScatterNdUpdate.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..b07ee9fda94851b7bc64a02dbf748b74eb63cdee
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ResourceScatterNdUpdate.pbtxt
@@ -0,0 +1,69 @@
+op {
+  graph_op_name: "ResourceScatterNdUpdate"
+  in_arg {
+    name: "ref"
+    description: <<END
+A resource handle. Must be from a VarHandleOp.
+END
+  }
+  in_arg {
+    name: "indices"
+    description: <<END
+A Tensor. Must be one of the following types: int32, int64.
+A tensor of indices into ref.
+END
+  }
+  in_arg {
+    name: "updates"
+    description: <<END
+A Tensor. Must have the same type as ref. A tensor of updated
+values to add to ref.
+END
+  }
+  attr {
+    name: "use_locking"
+    description: <<END
+An optional bool. Defaults to True. If True, the assignment will
+be protected by a lock; otherwise the behavior is undefined,
+but may exhibit less contention.
+END
+  }
+  summary: "Applies sparse `updates` to individual values or slices within a given"
+  description: <<END
+variable according to `indices`.
+
+`ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
+
+`indices` must be integer tensor, containing indices into `ref`.
+It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
+
+The innermost dimension of `indices` (with length `K`) corresponds to
+indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
+dimension of `ref`.
+
+`updates` is `Tensor` of rank `Q-1+P-K` with shape:
+
+```
+[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].
+```
+
+For example, say we want to update 4 scattered elements to a rank-1 tensor to
+8 elements. In Python, that update would look like this:
+
+```python
+    ref = tfe.Variable([1, 2, 3, 4, 5, 6, 7, 8])
+    indices = tf.constant([[4], [3], [1] ,[7]])
+    updates = tf.constant([9, 10, 11, 12])
+    update = tf.scatter_nd_update(ref, indices, updates)
+    with tf.Session() as sess:
+      print sess.run(update)
+```
+
+The resulting update to ref would look like this:
+
+    [1, 11, 3, 10, 9, 6, 7, 12]
+
+See @{tf.scatter_nd} for more details about how to make updates to
+slices.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_SampleDistortedBoundingBox.pbtxt b/tensorflow/core/api_def/base_api/api_def_SampleDistortedBoundingBox.pbtxt
index 0716b2611403b54d894007fad801380f30e70acc..6f1121dd37d4b01a0b6dab8a650f1c7a3f01fb60 100644
--- a/tensorflow/core/api_def/base_api/api_def_SampleDistortedBoundingBox.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_SampleDistortedBoundingBox.pbtxt
@@ -117,7 +117,7 @@ For example,
     # Draw the bounding box in an image summary.
     image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
                                                   bbox_for_draw)
-    tf.image_summary('images_with_box', image_with_box)
+    tf.summary.image('images_with_box', image_with_box)
 
     # Employ the bounding box to distort the image.
     distorted_image = tf.slice(image, begin, size)
diff --git a/tensorflow/core/api_def/base_api/api_def_SampleDistortedBoundingBoxV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_SampleDistortedBoundingBoxV2.pbtxt
index e9912609729fbadf7a3dd706903ecc4d915d72eb..473aec50aa214e6d285f20407d4274ce3ccd9a1f 100644
--- a/tensorflow/core/api_def/base_api/api_def_SampleDistortedBoundingBoxV2.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_SampleDistortedBoundingBoxV2.pbtxt
@@ -117,7 +117,7 @@ For example,
     # Draw the bounding box in an image summary.
     image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
                                                   bbox_for_draw)
-    tf.image_summary('images_with_box', image_with_box)
+    tf.summary.image('images_with_box', image_with_box)
 
     # Employ the bounding box to distort the image.
     distorted_image = tf.slice(image, begin, size)
diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt
index 23732546edaf120eb1a1a9b45219014ba55c6d81..4cb8c064fce615ace8e971505518e85f303d4c12 100644
--- a/tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt
@@ -98,5 +98,8 @@ The resulting tensor would look like this:
      [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
      [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],
      [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]]
+
+Note that on CPU, if an out of bound index is found, an error is returned.
+On GPU, if an out of bound index is found, the index is ignored.
 END
 }
diff --git a/tensorflow/core/api_def/base_api/api_def_SerializeManySparse.pbtxt b/tensorflow/core/api_def/base_api/api_def_SerializeManySparse.pbtxt
index 0010bca0b017bcaac5552f7aa9462b0c56d4c01a..d46b4b20eeb58ef1cc261372d69acfe5a70668fe 100644
--- a/tensorflow/core/api_def/base_api/api_def_SerializeManySparse.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_SerializeManySparse.pbtxt
@@ -18,7 +18,14 @@ END
 1-D.  The `shape` of the minibatch `SparseTensor`.
 END
   }
-  summary: "Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` string `Tensor`."
+  attr {
+    name: "out_type"
+    description: <<END
+The `dtype` to use for serialization; the supported types are `string`
+(default) and `variant`.
+END
+  }
+  summary: "Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor` object."
   description: <<END
 The `SparseTensor` must have rank `R` greater than 1, and the first dimension
 is treated as the minibatch dimension.  Elements of the `SparseTensor`
diff --git a/tensorflow/core/api_def/base_api/api_def_SerializeSparse.pbtxt b/tensorflow/core/api_def/base_api/api_def_SerializeSparse.pbtxt
index bb4a352d489c597b6e953bc79e307b0d74042e14..491f69fda088edb8a051b81e65d581094823ca5a 100644
--- a/tensorflow/core/api_def/base_api/api_def_SerializeSparse.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_SerializeSparse.pbtxt
@@ -18,5 +18,12 @@ END
 1-D.  The `shape` of the `SparseTensor`.
 END
   }
-  summary: "Serialize a `SparseTensor` into a string 3-vector (1-D `Tensor`) object."
+  attr {
+    name: "out_type"
+    description: <<END
+The `dtype` to use for serialization; the supported types are `string`
+(default) and `variant`.
+END
+  }
+  summary: "Serialize a `SparseTensor` into a `[3]` `Tensor` object."
 }
diff --git a/tensorflow/core/api_def/base_api/api_def_ShuffleAndRepeatDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_ShuffleAndRepeatDataset.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..fb425b24a4134366df1129df63dc0361537dd746
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ShuffleAndRepeatDataset.pbtxt
@@ -0,0 +1,36 @@
+op {
+  graph_op_name: "ShuffleAndRepeatDataset"
+  in_arg {
+    name: "buffer_size"
+    description: <<END
+The number of output elements to buffer in an iterator over
+this dataset. Compare with the `min_after_dequeue` attr when creating a
+`RandomShuffleQueue`.
+END
+  }
+  in_arg {
+    name: "seed"
+    description: <<END
+A scalar seed for the random number generator. If either `seed` or
+`seed2` is set to be non-zero, the random number generator is seeded
+by the given seed.  Otherwise, a random seed is used.
+END
+  }
+  in_arg {
+    name: "seed2"
+    description: <<END
+A second scalar seed to avoid seed collision.
+END
+  }
+  in_arg {
+    name: "count"
+    description: <<END
+A scalar representing the number of times the underlying dataset
+should be repeated. The default is `-1`, which results in infinite repetition.
+END
+  }
+  summary: "Creates a dataset that shuffles and repeats elements from `input_dataset`"
+  description: <<END
+pseudorandomly.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_ShuffleDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_ShuffleDataset.pbtxt
index b12d3af9d74411fb46fb50d7dba57b7e60bbe933..ea5c52c0ee3826076b855ca243f03cb940b8e0b2 100644
--- a/tensorflow/core/api_def/base_api/api_def_ShuffleDataset.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_ShuffleDataset.pbtxt
@@ -11,8 +11,8 @@ END
   in_arg {
     name: "seed"
     description: <<END
-A scalar seed for the random number generator. If either seed or
-seed2 is set to be non-zero, the random number generator is seeded
+A scalar seed for the random number generator. If either `seed` or
+`seed2` is set to be non-zero, the random number generator is seeded
 by the given seed.  Otherwise, a random seed is used.
 END
   }
diff --git a/tensorflow/core/api_def/base_api/api_def_Snapshot.pbtxt b/tensorflow/core/api_def/base_api/api_def_Snapshot.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..49b7f5798cd58d7c96c9b0a582a6d79df4dab5a6
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_Snapshot.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "Snapshot"
+  summary: "Returns a copy of the input tensor."
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_SpaceToDepth.pbtxt b/tensorflow/core/api_def/base_api/api_def_SpaceToDepth.pbtxt
index 8fd3966f7038a507ea3402e300f9362bd4f3d54b..b808ff5f9cf9072bdb95e779589668160d909b8f 100644
--- a/tensorflow/core/api_def/base_api/api_def_SpaceToDepth.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_SpaceToDepth.pbtxt
@@ -25,7 +25,7 @@ with the following options:
   "NHWC": `[ batch, height, width, channels ]`
   "NCHW": `[ batch, channels, height, width ]`
   "NCHW_VECT_C":
-      `qint8 [ batch, channels / 4, height, width, channels % 4 ]`
+      `qint8 [ batch, channels / 4, height, width, 4 ]`
 
 It is useful to consider the operation as transforming a 6-D Tensor.
 e.g. for data_format = NHWC,
diff --git a/tensorflow/core/api_def/base_api/api_def_SparseSegmentMeanWithNumSegments.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseSegmentMeanWithNumSegments.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..d6e105400307b178720a3b1e04955aaad61c9931
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_SparseSegmentMeanWithNumSegments.pbtxt
@@ -0,0 +1,36 @@
+op {
+  graph_op_name: "SparseSegmentMeanWithNumSegments"
+  in_arg {
+    name: "indices"
+    description: <<END
+A 1-D tensor. Has same rank as `segment_ids`.
+END
+  }
+  in_arg {
+    name: "segment_ids"
+    description: <<END
+A 1-D tensor. Values should be sorted and can be repeated.
+END
+  }
+  in_arg {
+    name: "num_segments"
+    description: <<END
+Should equal the number of distinct segment IDs.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+Has same shape as data, except for dimension 0 which has size
+`num_segments`.
+END
+  }
+  summary: "Computes the mean along sparse segments of a tensor."
+  description: <<END
+Like `SparseSegmentMean`, but allows missing ids in `segment_ids`. If an id is
+misisng, the `output` tensor at that position will be zeroed.
+
+Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+segments.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_SparseSegmentSqrtNWithNumSegments.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseSegmentSqrtNWithNumSegments.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..9ba98b81911cc85d942d91a0f689cb075fc987e9
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_SparseSegmentSqrtNWithNumSegments.pbtxt
@@ -0,0 +1,38 @@
+op {
+  graph_op_name: "SparseSegmentSqrtNWithNumSegments"
+  in_arg {
+    name: "indices"
+    description: <<END
+A 1-D tensor. Has same rank as `segment_ids`.
+END
+  }
+  in_arg {
+    name: "segment_ids"
+    description: <<END
+A 1-D tensor. Values should be sorted and can be repeated.
+END
+  }
+  in_arg {
+    name: "num_segments"
+    description: <<END
+Should equal the number of distinct segment IDs.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+Has same shape as data, except for dimension 0 which
+has size `k`, the number of segments.
+END
+  }
+  summary: "Computes the sum along sparse segments of a tensor divided by the sqrt of N."
+  description: <<END
+N is the size of the segment being reduced.
+
+Like `SparseSegmentSqrtN`, but allows missing ids in `segment_ids`. If an id is
+misisng, the `output` tensor at that position will be zeroed.
+
+Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+segments.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_SparseSegmentSumWithNumSegments.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseSegmentSumWithNumSegments.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..3aeaba38e9447d175e33eae4cf6168679129bc8d
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_SparseSegmentSumWithNumSegments.pbtxt
@@ -0,0 +1,57 @@
+op {
+  graph_op_name: "SparseSegmentSumWithNumSegments"
+  in_arg {
+    name: "indices"
+    description: <<END
+A 1-D tensor. Has same rank as `segment_ids`.
+END
+  }
+  in_arg {
+    name: "segment_ids"
+    description: <<END
+A 1-D tensor. Values should be sorted and can be repeated.
+END
+  }
+  in_arg {
+    name: "num_segments"
+    description: <<END
+Should equal the number of distinct segment IDs.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+Has same shape as data, except for dimension 0 which
+has size `num_segments`.
+END
+  }
+  summary: "Computes the sum along sparse segments of a tensor."
+  description: <<END
+Like `SparseSegmentSum`, but allows missing ids in `segment_ids`. If an id is
+misisng, the `output` tensor at that position will be zeroed.
+
+Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+segments.
+
+For example:
+
+```python
+c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
+
+tf.sparse_segment_sum_with_num_segments(
+    c, tf.constant([0, 1]), tf.constant([0, 0]), num_segments=3)
+# => [[0 0 0 0]
+#     [0 0 0 0]
+#     [0 0 0 0]]
+
+tf.sparse_segment_sum_with_num_segments(c,
+                                        tf.constant([0, 1]),
+                                        tf.constant([0, 2],
+                                        num_segments=4))
+# => [[ 1  2  3  4]
+#     [ 0  0  0  0]
+#     [-1 -2 -3 -4]
+#     [ 0  0  0  0]]
+```
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_TensorListFromTensor.pbtxt b/tensorflow/core/api_def/base_api/api_def_TensorListFromTensor.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..633b7c18019ba8429e154d9f98835e82815ca528
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_TensorListFromTensor.pbtxt
@@ -0,0 +1,10 @@
+op {
+  graph_op_name: "TensorListFromTensor"
+  summary: "Creates a TensorList which, when stacked, has the value of `tensor`."
+  description: <<END
+Each tensor in the result list corresponds to one row of the input tensor.
+
+tensor: The input tensor.
+output_handle: The list.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_TensorListLength.pbtxt b/tensorflow/core/api_def/base_api/api_def_TensorListLength.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..f450c20f86b3442567f0eeee0bf4c1d45c2e2987
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_TensorListLength.pbtxt
@@ -0,0 +1,8 @@
+op {
+  graph_op_name: "TensorListLength"
+  summary: "Returns the number of tensors in the input tensor list."
+  description: <<END
+input_handle: the input list
+length: the number of tensors in the list
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_TensorListPopBack.pbtxt b/tensorflow/core/api_def/base_api/api_def_TensorListPopBack.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..0f752f9cf45d96e8d90fb2826f7cb9eb349a8cad
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_TensorListPopBack.pbtxt
@@ -0,0 +1,12 @@
+op {
+  graph_op_name: "TensorListPopBack"
+  summary: "Returns the last element of the input list as well as a list with all but that element."
+  description: <<END
+Fails if the list is empty.
+
+input_handle: the input list
+tensor: the withdrawn last element of the list
+element_dtype: the type of elements in the list
+element_shape: the shape of the output tensor
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_TensorListPushBack.pbtxt b/tensorflow/core/api_def/base_api/api_def_TensorListPushBack.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..73297c03003d91e16f288802c5223730af7c766c
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_TensorListPushBack.pbtxt
@@ -0,0 +1,11 @@
+op {
+  graph_op_name: "TensorListPushBack"
+  summary: "Returns a list list which has the passed-in `Tensor` as last element and the other elements of the given list in `input_handle`."
+  description: <<END
+tensor: The tensor to put on the list.
+input_handle: The old list.
+output_handle: A list with the elements of the old list followed by tensor.
+element_dtype: the type of elements in the list.
+element_shape: a shape compatible with that of elements in the list.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_TensorListStack.pbtxt b/tensorflow/core/api_def/base_api/api_def_TensorListStack.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..2402875951848cf922a3252d1a5d8e53312fb4e1
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_TensorListStack.pbtxt
@@ -0,0 +1,12 @@
+op {
+  graph_op_name: "TensorListStack"
+  summary: "Stacks all tensors in the list."
+  description: <<END
+Requires that all tensors have the same shape.
+
+input_handle: the input list
+tensor: the gathered result
+num_elements: optional. If not -1, the number of elements in the list.
+
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_UniqueDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_UniqueDataset.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..009256916908c412fdebd0775387a7f7f4d30a25
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_UniqueDataset.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "UniqueDataset"
+  summary: "Creates a dataset that contains the unique elements of `input_dataset`."
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt
index cd7ec6e5518c5a7788bb4fff88a38b74295e9df4..2fb5bd5b88652f5124761bc27f68ffb9859b76c5 100644
--- a/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt
@@ -9,7 +9,7 @@ END
   in_arg {
     name: "axis"
     description: <<END
-A `Tensor` of type `int64` (default: 0). The axis of the Tensor to
+A `Tensor` of type `int32` (default: None). The axis of the Tensor to
 find the unique elements.
 END
   }
@@ -26,12 +26,15 @@ A 1-D Tensor. Has the same type as x that contains the index of each
 value of x in the output y.
 END
   }
-  summary: "Finds unique elements in a 1-D tensor."
+  summary: "Finds unique elements along an axis of a tensor."
   description: <<END
-This operation returns a tensor `y` containing all of the unique elements of `x`
-sorted in the same order that they occur in `x`. This operation also returns a
-tensor `idx` the same size as `x` that contains the index of each value of `x`
-in the unique output `y`. In other words:
+This operation either returns a tensor `y` containing unique elements
+along the `axis` of a tensor. The returned unique elements is sorted
+in the same order as they occur along `axis` in `x`.
+This operation also returns a tensor `idx` that is the same size as
+the number of the elements in `x` along the `axis` dimension. It
+contains the index in the unique output `y`.
+In other words, for an `1-D` tensor `x` with `axis = None:
 
 `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
 
@@ -43,5 +46,30 @@ y, idx = unique(x)
 y ==> [1, 2, 4, 7, 8]
 idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
 ```
+
+For an `2-D` tensor `x` with `axis = 0`:
+
+```
+# tensor 'x' is [[1, 0, 0],
+#                [1, 0, 0],
+#                [2, 0, 0]]
+y, idx = unique(x, axis=0)
+y ==> [[1, 0, 0],
+       [2, 0, 0]]
+idx ==> [0, 0, 1]
+```
+
+For an `2-D` tensor `x` with `axis = 1`:
+
+```
+# tensor 'x' is [[1, 0, 0],
+#                [1, 0, 0],
+#                [2, 0, 0]]
+y, idx = unique(x, axis=1)
+y ==> [[1, 0],
+       [1, 0],
+       [2, 0]]
+idx ==> [0, 1, 1]
+```
 END
 }
diff --git a/tensorflow/core/api_def/excluded_ops.cc b/tensorflow/core/api_def/excluded_ops.cc
new file mode 100644
index 0000000000000000000000000000000000000000..07ac974ff9aa7e66d9bb3c4e536f91d1249abb90
--- /dev/null
+++ b/tensorflow/core/api_def/excluded_ops.cc
@@ -0,0 +1,26 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/api_def/excluded_ops.h"
+
+namespace tensorflow {
+
+const std::unordered_set<std::string>* GetExcludedOps() {
+  static std::unordered_set<std::string>* excluded_ops =
+      new std::unordered_set<std::string>(
+          {"BigQueryReader", "GenerateBigQueryReaderPartitions"});
+  return excluded_ops;
+}
+}  // namespace tensorflow
diff --git a/tensorflow/core/api_def/excluded_ops.h b/tensorflow/core/api_def/excluded_ops.h
new file mode 100644
index 0000000000000000000000000000000000000000..409e5d32a7c0a9b3d724ee3e36e98e4f5dfddd85
--- /dev/null
+++ b/tensorflow/core/api_def/excluded_ops.h
@@ -0,0 +1,28 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CORE_API_DEF_EXCLUDED_OPS_H_
+#define TENSORFLOW_CORE_API_DEF_EXCLUDED_OPS_H_
+
+#include <string>
+#include <unordered_set>
+
+namespace tensorflow {
+
+// Returns a list of ops excluded from ApiDef.
+// TODO(annarev): figure out if we should keep ApiDefs for these ops as well
+const std::unordered_set<std::string>* GetExcludedOps();
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_API_DEF_EXCLUDED_OPS_H_
diff --git a/tensorflow/core/api_def/python_api/api_def_DebugGradientRefIdentity.pbtxt b/tensorflow/core/api_def/python_api/api_def_DebugGradientRefIdentity.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..5e14e5fffd6e3683eec6eca65f587b5f0ab0016b
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_DebugGradientRefIdentity.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "DebugGradientRefIdentity"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_EagerPyFunc.pbtxt b/tensorflow/core/api_def/python_api/api_def_EagerPyFunc.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..ee0f95dacbc09702039da97fccd98a2d8bb83b1b
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_EagerPyFunc.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "EagerPyFunc"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_Snapshot.pbtxt b/tensorflow/core/api_def/python_api/api_def_Snapshot.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..ea9ccee39765b659cc27e04a48cffc1caf97d5af
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_Snapshot.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "Snapshot"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_Unique.pbtxt b/tensorflow/core/api_def/python_api/api_def_Unique.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..e763d66e9a42c809eda574ecd8419ee452cbc829
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_Unique.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "Unique"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_UniqueV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_UniqueV2.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..c0d5046858b0fb38b88d0965bd1f0f28890a0b26
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_UniqueV2.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "UniqueV2"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/update_api_def.cc b/tensorflow/core/api_def/update_api_def.cc
new file mode 100644
index 0000000000000000000000000000000000000000..1a6d15ec68b23a57a1f9b6982b4dc295bba6626b
--- /dev/null
+++ b/tensorflow/core/api_def/update_api_def.cc
@@ -0,0 +1,272 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/api_def/update_api_def.h"
+
+#include <ctype.h>
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include "tensorflow/core/api_def/excluded_ops.h"
+#include "tensorflow/core/framework/api_def.pb.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_def_builder.h"
+#include "tensorflow/core/framework/op_gen_lib.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/core/platform/env.h"
+
+namespace tensorflow {
+
+namespace {
+constexpr char kApiDefFileFormat[] = "api_def_%s.pbtxt";
+// TODO(annarev): look into supporting other prefixes, not just 'doc'.
+constexpr char kDocStart[] = ".Doc(R\"doc(";
+constexpr char kDocEnd[] = ")doc\")";
+
+// Updates api_def based on the given op.
+void FillBaseApiDef(ApiDef* api_def, const OpDef& op) {
+  api_def->set_graph_op_name(op.name());
+  // Add arg docs
+  for (auto& input_arg : op.input_arg()) {
+    if (!input_arg.description().empty()) {
+      auto* api_def_in_arg = api_def->add_in_arg();
+      api_def_in_arg->set_name(input_arg.name());
+      api_def_in_arg->set_description(input_arg.description());
+    }
+  }
+  for (auto& output_arg : op.output_arg()) {
+    if (!output_arg.description().empty()) {
+      auto* api_def_out_arg = api_def->add_out_arg();
+      api_def_out_arg->set_name(output_arg.name());
+      api_def_out_arg->set_description(output_arg.description());
+    }
+  }
+  // Add attr docs
+  for (auto& attr : op.attr()) {
+    if (!attr.description().empty()) {
+      auto* api_def_attr = api_def->add_attr();
+      api_def_attr->set_name(attr.name());
+      api_def_attr->set_description(attr.description());
+    }
+  }
+  // Add docs
+  api_def->set_summary(op.summary());
+  api_def->set_description(op.description());
+}
+
+// Returns true if op has any description or summary.
+bool OpHasDocs(const OpDef& op) {
+  if (!op.summary().empty() || !op.description().empty()) {
+    return true;
+  }
+  for (const auto& arg : op.input_arg()) {
+    if (!arg.description().empty()) {
+      return true;
+    }
+  }
+  for (const auto& arg : op.output_arg()) {
+    if (!arg.description().empty()) {
+      return true;
+    }
+  }
+  for (const auto& attr : op.attr()) {
+    if (!attr.description().empty()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+// Returns true if summary and all descriptions are the same in op1
+// and op2.
+bool CheckDocsMatch(const OpDef& op1, const OpDef& op2) {
+  if (op1.summary() != op2.summary() ||
+      op1.description() != op2.description() ||
+      op1.input_arg_size() != op2.input_arg_size() ||
+      op1.output_arg_size() != op2.output_arg_size() ||
+      op1.attr_size() != op2.attr_size()) {
+    return false;
+  }
+  // Iterate over args and attrs to compare their docs.
+  for (int i = 0; i < op1.input_arg_size(); ++i) {
+    if (op1.input_arg(i).description() != op2.input_arg(i).description()) {
+      return false;
+    }
+  }
+  for (int i = 0; i < op1.output_arg_size(); ++i) {
+    if (op1.output_arg(i).description() != op2.output_arg(i).description()) {
+      return false;
+    }
+  }
+  for (int i = 0; i < op1.attr_size(); ++i) {
+    if (op1.attr(i).description() != op2.attr(i).description()) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// Returns true if descriptions and summaries in op match a
+// given single doc-string.
+bool ValidateOpDocs(const OpDef& op, const string& doc) {
+  OpDefBuilder b(op.name());
+  // We don't really care about type we use for arguments and
+  // attributes. We just want to make sure attribute and argument names
+  // are added so that descriptions can be assigned to them when parsing
+  // documentation.
+  for (const auto& arg : op.input_arg()) {
+    b.Input(arg.name() + ":string");
+  }
+  for (const auto& arg : op.output_arg()) {
+    b.Output(arg.name() + ":string");
+  }
+  for (const auto& attr : op.attr()) {
+    b.Attr(attr.name() + ":string");
+  }
+  b.Doc(doc);
+  OpRegistrationData op_reg_data;
+  TF_CHECK_OK(b.Finalize(&op_reg_data));
+  return CheckDocsMatch(op, op_reg_data.op_def);
+}
+}  // namespace
+
+string RemoveDoc(const OpDef& op, const string& file_contents,
+                 size_t start_location) {
+  // Look for a line starting with .Doc( after the REGISTER_OP.
+  const auto doc_start_location = file_contents.find(kDocStart, start_location);
+  const string format_error = strings::Printf(
+      "Could not find %s doc for removal. Make sure the doc is defined with "
+      "'%s' prefix and '%s' suffix or remove the doc manually.",
+      op.name().c_str(), kDocStart, kDocEnd);
+  if (doc_start_location == string::npos) {
+    std::cerr << format_error << std::endl;
+    LOG(ERROR) << "Didn't find doc start";
+    return file_contents;
+  }
+  const auto doc_end_location = file_contents.find(kDocEnd, doc_start_location);
+  if (doc_end_location == string::npos) {
+    LOG(ERROR) << "Didn't find doc start";
+    std::cerr << format_error << std::endl;
+    return file_contents;
+  }
+
+  const auto doc_start_size = sizeof(kDocStart) - 1;
+  string doc_text = file_contents.substr(
+      doc_start_location + doc_start_size,
+      doc_end_location - doc_start_location - doc_start_size);
+
+  // Make sure the doc text we found actually matches OpDef docs to
+  // avoid removing incorrect text.
+  if (!ValidateOpDocs(op, doc_text)) {
+    LOG(ERROR) << "Invalid doc: " << doc_text;
+    std::cerr << format_error << std::endl;
+    return file_contents;
+  }
+  // Remove .Doc call.
+  auto before_doc = file_contents.substr(0, doc_start_location);
+  str_util::StripTrailingWhitespace(&before_doc);
+  return before_doc +
+         file_contents.substr(doc_end_location + sizeof(kDocEnd) - 1);
+}
+
+namespace {
+// Remove .Doc calls that follow REGISTER_OP calls for the given ops.
+// We search for REGISTER_OP calls in the given op_files list.
+void RemoveDocs(const std::vector<const OpDef*>& ops,
+                const std::vector<string>& op_files) {
+  // Set of ops that we already found REGISTER_OP calls for.
+  std::set<string> processed_ops;
+
+  for (const auto& file : op_files) {
+    string file_contents;
+    bool file_contents_updated = false;
+    TF_CHECK_OK(ReadFileToString(Env::Default(), file, &file_contents));
+
+    for (auto op : ops) {
+      if (processed_ops.find(op->name()) != processed_ops.end()) {
+        // We already found REGISTER_OP call for this op in another file.
+        continue;
+      }
+      string register_call =
+          strings::Printf("REGISTER_OP(\"%s\")", op->name().c_str());
+      const auto register_call_location = file_contents.find(register_call);
+      // Find REGISTER_OP(OpName) call.
+      if (register_call_location == string::npos) {
+        continue;
+      }
+      std::cout << "Removing .Doc call for " << op->name() << " from " << file
+                << "." << std::endl;
+      file_contents = RemoveDoc(*op, file_contents, register_call_location);
+      file_contents_updated = true;
+
+      processed_ops.insert(op->name());
+    }
+    if (file_contents_updated) {
+      TF_CHECK_OK(WriteStringToFile(Env::Default(), file, file_contents))
+          << "Could not remove .Doc calls in " << file
+          << ". Make sure the file is writable.";
+    }
+  }
+}
+}  // namespace
+
+// Returns ApiDef text representation in multi-line format
+// constructed based on the given op.
+string CreateApiDef(const OpDef& op) {
+  ApiDef api_def;
+  FillBaseApiDef(&api_def, op);
+
+  const std::vector<string> multi_line_fields = {"description"};
+  string new_api_defs_str = api_def.DebugString();
+  return PBTxtToMultiline(new_api_defs_str, multi_line_fields);
+}
+
+// Creates ApiDef files for any new ops.
+// If op_file_pattern is not empty, then also removes .Doc calls from
+// new op registrations in these files.
+void CreateApiDefs(const OpList& ops, const string& api_def_dir,
+                   const string& op_file_pattern) {
+  auto* excluded_ops = GetExcludedOps();
+  std::vector<const OpDef*> new_ops_with_docs;
+
+  for (const auto& op : ops.op()) {
+    if (excluded_ops->find(op.name()) != excluded_ops->end()) {
+      continue;
+    }
+    // Form the expected ApiDef path.
+    string file_path =
+        io::JoinPath(tensorflow::string(api_def_dir), kApiDefFileFormat);
+    file_path = strings::Printf(file_path.c_str(), op.name().c_str());
+
+    // Create ApiDef if it doesn't exist.
+    if (!Env::Default()->FileExists(file_path).ok()) {
+      std::cout << "Creating ApiDef file " << file_path << std::endl;
+      const auto& api_def_text = CreateApiDef(op);
+      TF_CHECK_OK(WriteStringToFile(Env::Default(), file_path, api_def_text));
+
+      if (OpHasDocs(op)) {
+        new_ops_with_docs.push_back(&op);
+      }
+    }
+  }
+  if (!op_file_pattern.empty()) {
+    std::vector<string> op_files;
+    TF_CHECK_OK(Env::Default()->GetMatchingPaths(op_file_pattern, &op_files));
+    RemoveDocs(new_ops_with_docs, op_files);
+  }
+}
+}  // namespace tensorflow
diff --git a/tensorflow/core/api_def/update_api_def.h b/tensorflow/core/api_def/update_api_def.h
new file mode 100644
index 0000000000000000000000000000000000000000..5eae7e528efae43d533d76f2ca96d6a016a63961
--- /dev/null
+++ b/tensorflow/core/api_def/update_api_def.h
@@ -0,0 +1,45 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_API_DEF_UPDATE_API_DEF_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_API_DEF_UPDATE_API_DEF_H_
+// Functions for updating ApiDef when new ops are added.
+
+#include "tensorflow/core/framework/op_def.pb.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+
+// Returns ApiDef text representation in multi-line format
+// constructed based on the given op.
+string CreateApiDef(const OpDef& op);
+
+// Removes .Doc call for the given op.
+// If unsuccessful, returns original file_contents and prints an error.
+// start_location - We search for .Doc call starting at this location
+//   in file_contents.
+string RemoveDoc(const OpDef& op, const string& file_contents,
+                 size_t start_location);
+
+// Creates api_def_*.pbtxt files for any new ops (i.e. ops that don't have an
+// api_def_*.pbtxt file yet).
+// If op_file_pattern is non-empty, then this method will also
+// look for a REGISTER_OP call for the new ops and removes corresponding
+// .Doc() calls since the newly generated api_def_*.pbtxt files will
+// store the doc strings.
+void CreateApiDefs(const OpList& ops, const string& api_def_dir,
+                   const string& op_file_pattern);
+
+}  // namespace tensorflow
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_API_DEF_UPDATE_API_DEF_H_
diff --git a/tensorflow/core/api_def/update_api_def.sh b/tensorflow/core/api_def/update_api_def.sh
index 07c76e65620956a2cb7a44093314d89303d2d921..21d0aa3c34c3679e93afcc4b3d4b99b16ea33277 100755
--- a/tensorflow/core/api_def/update_api_def.sh
+++ b/tensorflow/core/api_def/update_api_def.sh
@@ -14,15 +14,15 @@
 # limitations under the License.
 # ==============================================================================
 
-# Script to update tensorflow/core/api_def/base_api/api_def*.pbtxt files.
+# Script to create tensorflow/core/api_def/base_api/api_def*.pbtxt
+# files for new ops.
 
 set -e
 
 current_file="$(readlink -f "$0")"
 current_dir="$(dirname "$current_file")"
 
-bazel build //tensorflow/core:api_test
-bazel-bin/tensorflow/core/api_test \
-  --update_api_def \
-  --api_def_dir="${current_dir}/base_api"
-
+bazel build //tensorflow/core/api_def:update_api_def
+bazel-bin/tensorflow/core/api_def/update_api_def \
+  --api_def_dir="${current_dir}/base_api" \
+  --op_file_pattern="${current_dir}/../ops/*_ops.cc"
diff --git a/tensorflow/core/api_def/update_api_def_main.cc b/tensorflow/core/api_def/update_api_def_main.cc
new file mode 100644
index 0000000000000000000000000000000000000000..3fd975ce178b5ff779b90305cb0ce9f8e8116494
--- /dev/null
+++ b/tensorflow/core/api_def/update_api_def_main.cc
@@ -0,0 +1,56 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+// This program can be used to automatically create an api_def_*.pbtxt
+// file based on op definition.
+//
+// To run, use the following script:
+// tensorflow/core/api_def/update_api_def.sh
+//
+// There are 2 ways to use this script:
+//   1. Define a REGISTER_OP call without a .Doc() call. Then, run
+//      this script and add summaries and descriptions in the generated
+//      api_def_*.pbtxt file manually.
+//   2. Add .Doc() call to a REGISTER_OP call. Then run this script
+//      to remove that .Doc() call and instead add corresponding summaries
+//      and descriptions in api_def_*.pbtxt file automatically.
+//      Note that .Doc() call must have the following format for this to work:
+//      .Doc(R"doc(<doc goes here>)doc").
+#include "tensorflow/core/api_def/update_api_def.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/platform/init_main.h"
+#include "tensorflow/core/util/command_line_flags.h"
+
+int main(int argc, char** argv) {
+  tensorflow::string api_files_dir;
+  tensorflow::string op_file_pattern;
+  std::vector<tensorflow::Flag> flag_list = {
+      tensorflow::Flag("api_def_dir", &api_files_dir,
+                       "Base directory of api_def*.pbtxt files."),
+      tensorflow::Flag("op_file_pattern", &op_file_pattern,
+                       "Pattern that matches C++ files containing REGISTER_OP "
+                       "calls. If specified, we will try to remove .Doc() "
+                       "calls for new ops defined in these files.")};
+  std::string usage = tensorflow::Flags::Usage(argv[0], flag_list);
+  bool parsed_values_ok = tensorflow::Flags::Parse(&argc, argv, flag_list);
+  if (!parsed_values_ok) {
+    std::cerr << usage << std::endl;
+    return 2;
+  }
+  tensorflow::port::InitMain(argv[0], &argc, &argv);
+
+  tensorflow::OpList ops;
+  tensorflow::OpRegistry::Global()->Export(false, &ops);
+  tensorflow::CreateApiDefs(ops, api_files_dir, op_file_pattern);
+}
diff --git a/tensorflow/core/api_def/update_api_def_test.cc b/tensorflow/core/api_def/update_api_def_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..8948f2c1d5b9f03d418bc11d6481b2b98cb37693
--- /dev/null
+++ b/tensorflow/core/api_def/update_api_def_test.cc
@@ -0,0 +1,205 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/api_def/update_api_def.h"
+
+#include "tensorflow/core/framework/op_def.pb.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace {
+
+TEST(UpdateApiDefTest, TestRemoveDocSingleOp) {
+  const string op_def_text = R"opdef(
+REGISTER_OP("Op1")
+    .Input("a: T")
+    .Output("output: T")
+    .Attr("b: type")
+    .SetShapeFn(shape_inference::UnchangedShape);
+)opdef";
+
+  const string op_def_text_with_doc = R"opdef(
+REGISTER_OP("Op1")
+    .Input("a: T")
+    .Output("output: T")
+    .Attr("b: type")
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Summary for Op1.
+
+Description
+for Op1.
+
+b :   Description for b.
+a: Description for a.
+output: Description for output.
+)doc");
+)opdef";
+
+  const string op_text = R"(
+name: "Op1"
+input_arg {
+  name: "a"
+  description: "Description for a."
+}
+output_arg {
+  name: "output"
+  description: "Description for output."
+}
+attr {
+  name: "b"
+  description: "Description for b."
+}
+summary: "Summary for Op1."
+description: "Description\nfor Op1."
+)";
+  OpDef op;
+  protobuf::TextFormat::ParseFromString(op_text, &op);  // NOLINT
+
+  EXPECT_EQ(op_def_text,
+            RemoveDoc(op, op_def_text_with_doc, 0 /* start_location */));
+}
+
+TEST(UpdateApiDefTest, TestRemoveDocMultipleOps) {
+  const string op_def_text = R"opdef(
+REGISTER_OP("Op1")
+    .Input("a: T")
+    .SetShapeFn(shape_inference::UnchangedShape);
+
+REGISTER_OP("Op2")
+    .Input("a: T")
+    .SetShapeFn(shape_inference::UnchangedShape);
+
+REGISTER_OP("Op3")
+    .Input("c: T")
+    .SetShapeFn(shape_inference::UnchangedShape);
+)opdef";
+
+  const string op_def_text_with_doc = R"opdef(
+REGISTER_OP("Op1")
+    .Input("a: T")
+    .Doc(R"doc(
+Summary for Op1.
+)doc")
+    .SetShapeFn(shape_inference::UnchangedShape);
+
+REGISTER_OP("Op2")
+    .Input("a: T")
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Summary for Op2.
+)doc");
+
+REGISTER_OP("Op3")
+    .Input("c: T")
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Summary for Op3.
+)doc");
+)opdef";
+
+  const string op1_text = R"(
+name: "Op1"
+input_arg {
+  name: "a"
+}
+summary: "Summary for Op1."
+)";
+  const string op2_text = R"(
+name: "Op2"
+input_arg {
+  name: "a"
+}
+summary: "Summary for Op2."
+)";
+  const string op3_text = R"(
+name: "Op3"
+input_arg {
+  name: "c"
+}
+summary: "Summary for Op3."
+)";
+  OpDef op1, op2, op3;
+  protobuf::TextFormat::ParseFromString(op1_text, &op1);  // NOLINT
+  protobuf::TextFormat::ParseFromString(op2_text, &op2);  // NOLINT
+  protobuf::TextFormat::ParseFromString(op3_text, &op3);  // NOLINT
+
+  string updated_text =
+      RemoveDoc(op2, op_def_text_with_doc,
+                op_def_text_with_doc.find("Op2") /* start_location */);
+  EXPECT_EQ(string::npos, updated_text.find("Summary for Op2"));
+  EXPECT_NE(string::npos, updated_text.find("Summary for Op1"));
+  EXPECT_NE(string::npos, updated_text.find("Summary for Op3"));
+
+  updated_text = RemoveDoc(op3, updated_text,
+                           updated_text.find("Op3") /* start_location */);
+  updated_text = RemoveDoc(op1, updated_text,
+                           updated_text.find("Op1") /* start_location */);
+  EXPECT_EQ(op_def_text, updated_text);
+}
+
+TEST(UpdateApiDefTest, TestCreateApiDef) {
+  const string op_text = R"(
+name: "Op1"
+input_arg {
+  name: "a"
+  description: "Description for a."
+}
+output_arg {
+  name: "output"
+  description: "Description for output."
+}
+attr {
+  name: "b"
+  description: "Description for b."
+}
+summary: "Summary for Op1."
+description: "Description\nfor Op1."
+)";
+  OpDef op;
+  protobuf::TextFormat::ParseFromString(op_text, &op);  // NOLINT
+
+  const string expected_api_def = R"(graph_op_name: "Op1"
+in_arg {
+  name: "a"
+  description: <<END
+Description for a.
+END
+}
+out_arg {
+  name: "output"
+  description: <<END
+Description for output.
+END
+}
+attr {
+  name: "b"
+  description: <<END
+Description for b.
+END
+}
+summary: "Summary for Op1."
+description: <<END
+Description
+for Op1.
+END
+)";
+  EXPECT_EQ(expected_api_def, CreateApiDef(op));
+}
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/bfc_allocator.cc b/tensorflow/core/common_runtime/bfc_allocator.cc
index 6399b8cf55b98f330a93ae28b516c59bee5c9d79..63594e83faaf457207ea3d90a4f48fa565291906 100644
--- a/tensorflow/core/common_runtime/bfc_allocator.cc
+++ b/tensorflow/core/common_runtime/bfc_allocator.cc
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <atomic>
+
 #include "tensorflow/core/common_runtime/bfc_allocator.h"
 
 #include "tensorflow/core/common_runtime/allocator_retry.h"
@@ -206,20 +208,20 @@ void* BFCAllocator::AllocateRaw(size_t unused_alignment, size_t num_bytes,
   if (allocation_attr.no_retry_on_failure) {
     // Return immediately upon the first failure if this is for allocating an
     // optional scratch space.
-    void* result = AllocateRawInternal(unused_alignment, num_bytes, false);
+    bool dump_log_on_failure = VLOG_IS_ON(2);
+    void* result =
+        AllocateRawInternal(unused_alignment, num_bytes, dump_log_on_failure);
     if (result == nullptr) {
-      // The counter incrementing is not thread-safe. But we don't really care.
-      // TODO(zhengxq): we should implement a LOG_FIRST_N and LOG_EVERY_N for
-      // more general usage.
-      static int log_counter = 0;
-      if (log_counter < 10) {
-        log_counter++;
+      static std::atomic<int32> log_counter{0};
+      int32 counter_value = log_counter.load(std::memory_order_relaxed);
+      if (counter_value < 10) {
+        log_counter.store(counter_value + 1, std::memory_order_relaxed);
         LOG(WARNING)
             << "Allocator (" << Name() << ") ran out of memory trying "
             << "to allocate " << strings::HumanReadableNumBytes(num_bytes)
             << ". The caller indicates that this is not a failure, but"
             << " may mean that there could be performance gains if more"
-            << " memory is available.";
+            << " memory were available.";
       }
     }
     return result;
@@ -659,17 +661,9 @@ void BFCAllocator::DumpMemoryLog(size_t num_bytes) {
       const Chunk* c = ChunkFromHandle(h);
       if (c->in_use()) {
         in_use_by_size[c->size]++;
-        LOG(INFO) << "Chunk at " << c->ptr << " of size " << c->size;
-      }
-      h = c->next;
-    }
-
-    h = region_manager_.get_handle(region.ptr());
-    while (h != kInvalidChunkHandle) {
-      const Chunk* c = ChunkFromHandle(h);
-      if (!c->in_use()) {
-        LOG(INFO) << "Free at " << c->ptr << " of size " << c->size;
       }
+      LOG(INFO) << (c->in_use() ? "Chunk" : "Free ") << " at " << c->ptr
+                << " of size " << c->size;
       h = c->next;
     }
   }
@@ -691,6 +685,13 @@ void BFCAllocator::GetStats(AllocatorStats* stats) {
   *stats = stats_;
 }
 
+void BFCAllocator::ClearStats() {
+  mutex_lock l(lock_);
+  stats_.num_allocs = 0;
+  stats_.max_bytes_in_use = stats_.bytes_in_use;
+  stats_.max_alloc_size = 0;
+}
+
 std::array<BFCAllocator::BinDebugInfo, BFCAllocator::kNumBins>
 BFCAllocator::get_bin_debug_info() {
   std::array<BinDebugInfo, kNumBins> bin_infos;
diff --git a/tensorflow/core/common_runtime/bfc_allocator.h b/tensorflow/core/common_runtime/bfc_allocator.h
index 20fa05f0d228c754ca0093ca7f360592cdaa23f2..3dd011a58e4724a8db34703ec68055c3a3a26fa3 100644
--- a/tensorflow/core/common_runtime/bfc_allocator.h
+++ b/tensorflow/core/common_runtime/bfc_allocator.h
@@ -70,6 +70,8 @@ class BFCAllocator : public VisitableAllocator {
 
   void GetStats(AllocatorStats* stats) override;
 
+  void ClearStats() override;
+
  private:
   struct Bin;
 
@@ -418,11 +420,13 @@ class BFCAllocator : public VisitableAllocator {
   mutable mutex lock_;
   RegionManager region_manager_ GUARDED_BY(lock_);
 
-  std::vector<Chunk> chunks_;
-  ChunkHandle free_chunks_list_;  // Ptr to head of linked list of free Chunks
+  std::vector<Chunk> chunks_ GUARDED_BY(lock_);
+
+  // Pointer to head of linked list of free Chunks
+  ChunkHandle free_chunks_list_ GUARDED_BY(lock_);
 
   // Called once on each region, ASAP.
-  std::vector<Visitor> region_visitors_;
+  std::vector<Visitor> region_visitors_ GUARDED_BY(lock_);
 
   // Counter containing the next unique identifier to assign to a
   // newly-created chunk.
diff --git a/tensorflow/core/common_runtime/device.h b/tensorflow/core/common_runtime/device.h
index d5a452a796d67400d56ca08c675e0386348dea13..5918cd9bbf35a7e277ec8d7e17f9008400e1eea3 100644
--- a/tensorflow/core/common_runtime/device.h
+++ b/tensorflow/core/common_runtime/device.h
@@ -148,6 +148,9 @@ class Device : public DeviceBase {
     return BuildDeviceAttributes(name, device, memory_limit, locality, "");
   }
 
+  // Clears the resource manager associated with this device.
+  void ClearResourceMgr() { rmgr_->Clear(); }
+
  protected:
   void DeleteResourceMgr() {
     delete rmgr_;
diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc
index 2d4f2a2d902a6a1457aa7a90b172dd9c9d5f8f5c..e9bdd922bae74cab225551d9c57febe158b0f0cd 100644
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@@ -259,9 +259,10 @@ DirectSession::DirectSession(const SessionOptions& options,
       factory_(factory),
       cancellation_manager_(new CancellationManager()),
       operation_timeout_in_ms_(options_.config.operation_timeout_in_ms()) {
-  if (options_.config.session_inter_op_thread_pool_size() > 0) {
-    for (int i = 0; i < options_.config.session_inter_op_thread_pool_size();
-         ++i) {
+  const int thread_pool_size =
+      options_.config.session_inter_op_thread_pool_size();
+  if (thread_pool_size > 0) {
+    for (int i = 0; i < thread_pool_size; ++i) {
       thread::ThreadPool* pool = nullptr;
       bool owned = false;
       init_error_.Update(NewThreadPoolFromThreadPoolOptions(
@@ -321,6 +322,10 @@ DirectSession::~DirectSession() {
   for (auto d : device_mgr_->ListDevices()) {
     d->op_segment()->RemoveHold(session_handle_);
   }
+  for (auto d : device_mgr_->ListDevices()) {
+    d->ClearResourceMgr();
+  }
+  functions_.clear();
   delete cancellation_manager_;
   for (const auto& p_and_owned : thread_pools_) {
     if (p_and_owned.second) delete p_and_owned.first;
@@ -521,9 +526,7 @@ Status DirectSession::Run(const RunOptions& run_options,
 
   args.rendezvous = run_state.rendez;
   args.cancellation_manager = &step_cancellation_manager;
-  args.runner = [this, pool](Executor::Args::Closure c) {
-    SchedClosure(pool, std::move(c));
-  };
+
   args.session_state = &session_state_;
   args.tensor_store = &run_state.tensor_store;
   args.step_container = &run_state.step_container;
@@ -584,7 +587,23 @@ Status DirectSession::Run(const RunOptions& run_options,
     return errors::Cancelled("Run call was cancelled");
   }
 
+  Executor::Args::Runner default_runner = [this,
+                                           pool](Executor::Args::Closure c) {
+    SchedClosure(pool, std::move(c));
+  };
   for (const auto& item : executors_and_keys->items) {
+    // TODO(zhengxq): support partial run.
+    // TODO(zhengxq): if the device picks its own threadpool, we need to assign
+    //     less threads to the main compute pool by default.
+    thread::ThreadPool* device_thread_pool =
+        item.device->tensorflow_device_thread_pool();
+    if (!device_thread_pool) {
+      args.runner = default_runner;
+    } else {
+      args.runner = [this, device_thread_pool](Executor::Args::Closure c) {
+        SchedClosure(device_thread_pool, std::move(c));
+      };
+    }
     item.executor->RunAsync(args, barrier->Get());
   }
 
@@ -1125,11 +1144,12 @@ Status DirectSession::GetOrCreateExecutors(
   }
 
   std::shared_ptr<ExecutorsAndKeys> ek(new ExecutorsAndKeys);
+  std::unique_ptr<FunctionInfo> func_info(new FunctionInfo);
 
   // The executor_lock_ is intentionally released while executor is
   // being created.
   std::unordered_map<string, std::unique_ptr<Graph>> graphs;
-  TF_RETURN_IF_ERROR(CreateGraphs(options, &graphs, &ek->flib_def,
+  TF_RETURN_IF_ERROR(CreateGraphs(options, &graphs, &func_info->flib_def,
                                   run_state_args, &ek->input_types,
                                   &ek->output_types));
 
@@ -1160,9 +1180,9 @@ Status DirectSession::GetOrCreateExecutors(
     graph_def_version =
         execution_state_->original_graph_def().versions().producer();
   }
-  ek->proc_flr.reset(new ProcessFunctionLibraryRuntime(
-      device_mgr_.get(), options_.env, graph_def_version, ek->flib_def.get(),
-      optimizer_opts));
+  func_info->proc_flr.reset(new ProcessFunctionLibraryRuntime(
+      device_mgr_.get(), options_.env, graph_def_version,
+      func_info->flib_def.get(), optimizer_opts));
 
   GraphOptimizer optimizer(optimizer_opts);
   for (auto iter = graphs.begin(); iter != graphs.end(); ++iter) {
@@ -1174,7 +1194,7 @@ Status DirectSession::GetOrCreateExecutors(
 
     ek->items.resize(ek->items.size() + 1);
     auto* item = &(ek->items.back());
-    auto lib = ek->proc_flr->GetFLR(partition_name);
+    auto lib = func_info->proc_flr->GetFLR(partition_name);
     if (lib == nullptr) {
       return errors::Internal("Could not find device: ", partition_name);
     }
@@ -1186,8 +1206,14 @@ Status DirectSession::GetOrCreateExecutors(
     auto opseg = device->op_segment();
     params.create_kernel = [this, lib, opseg](const NodeDef& ndef,
                                               OpKernel** kernel) {
-      // Caches the kernel only if the node is stateful.
-      if (!lib->IsStateful(ndef.op())) {
+      // We do not share the kernel via the OpSegment if the node is
+      // stateless, or a function.
+      // NOTE(mrry): We must not share function kernels (implemented
+      // using `CallOp`) between subgraphs, because `CallOp::handle_`
+      // is tied to a particular subgraph. Even if the function itself
+      // is stateful, the `CallOp` that invokes it is not.
+      if (!lib->IsStateful(ndef.op()) ||
+          lib->GetFunctionLibraryDefinition()->Find(ndef.op()) != nullptr) {
         return lib->CreateKernel(ndef, kernel);
       }
       auto create_fn = [lib, &ndef](OpKernel** kernel) {
@@ -1222,6 +1248,7 @@ Status DirectSession::GetOrCreateExecutors(
     // NewLocalExecutor takes ownership of partition_graph.
     item->graph = partition_graph.get();
     item->executor = nullptr;
+    item->device = device;
     Executor* executor;
     TF_RETURN_IF_ERROR(
         NewLocalExecutor(params, partition_graph.release(), &executor));
@@ -1263,6 +1290,7 @@ Status DirectSession::GetOrCreateExecutors(
 
   // Reacquire the lock, try to insert into the map.
   mutex_lock l(executor_lock_);
+  functions_.push_back(std::move(func_info));
 
   // Another thread may have created the entry before us, in which case we will
   // reuse the already created one.
diff --git a/tensorflow/core/common_runtime/direct_session.h b/tensorflow/core/common_runtime/direct_session.h
index 780d0b46a8c2c7440a5a739c27b368af8aad5bc2..45d765f8498e5e12eef3a47cd4a7ff0ad22aa495 100644
--- a/tensorflow/core/common_runtime/direct_session.h
+++ b/tensorflow/core/common_runtime/direct_session.h
@@ -112,6 +112,7 @@ class DirectSession : public Session {
   // every partition.
   struct PerPartitionExecutorsAndLib {
     Graph* graph = nullptr;                  // not owned.
+    Device* device = nullptr;                // not owned.
     FunctionLibraryRuntime* flib = nullptr;  // not owned.
     std::unique_ptr<Executor> executor;
   };
@@ -124,20 +125,12 @@ class DirectSession : public Session {
   // a partition of the graph bundled with its dependent library runtime.
   // 'input_keys' are the rendezvous keys for the feeds and 'output_keys'
   // are rendezvous keys for the fetches.
-  // 'flib_def' is the function library used by graphs in 'items'.
-  // 'proc_flr' is the collection of FunctionLibraryRuntime objects, one per
-  // device.
-  // TODO(phawkins): currently partitions always share the same function
-  // library. Consider giving each partition its own function library to enable
-  // per-partition rewrites.
   struct ExecutorsAndKeys {
     ExecutorsAndKeys() : step_count(0) {}
 
     std::atomic_int_fast64_t step_count;
     std::unique_ptr<Graph> graph;
     NameNodeMap name_to_node;
-    std::unique_ptr<FunctionLibraryDefinition> flib_def;
-    std::unique_ptr<ProcessFunctionLibraryRuntime> proc_flr;
     std::vector<PerPartitionExecutorsAndLib> items;
     std::unordered_map<string, size_t> input_name_to_index;
     std::unordered_map<string, string> input_name_to_rendezvous_key;
@@ -148,6 +141,22 @@ class DirectSession : public Session {
     DataTypeVector output_types;
   };
 
+  // A FunctionInfo object is created for every unique set of feeds/fetches.
+  // This info could be folded into the ExecutorsAndKeys object but we would
+  // like to maintain a deletion order in which the OpKernels (owned by the
+  // executor) should be destroyed first, followed by the resources in the
+  // device and then followed by the function stuff.
+  // TODO(rohanj): Consolidate function library definitions so that we can
+  // instantiate only one ProcFLR and lib_def and make this just a member
+  // variable and not a vector.
+  // 'flib_def' is the function library used.
+  // 'proc_flr' is the collection of FunctionLibraryRuntime objects, one per
+  // device.
+  struct FunctionInfo {
+    std::unique_ptr<FunctionLibraryDefinition> flib_def;
+    std::unique_ptr<ProcessFunctionLibraryRuntime> proc_flr;
+  };
+
   // For each live partial execution, the session maintains a RunState.
   // 'status' is the current status of this partial execution. 'executor_done'
   // is "notified" when all executors are done. 'pending_inputs' are the set
@@ -282,6 +291,9 @@ class DirectSession : public Session {
   // Schedules 'c' for execution on pool.
   void SchedClosure(thread::ThreadPool* pool, std::function<void()> c);
 
+  std::vector<std::unique_ptr<FunctionInfo>> functions_
+      GUARDED_BY(executor_lock_);
+
   mutex executor_lock_;  // protects executors_
   // Holds mappings from signature to the executors that process
   // it. The reason for a level of indirection around mapped_type is
diff --git a/tensorflow/core/common_runtime/direct_session_test.cc b/tensorflow/core/common_runtime/direct_session_test.cc
index 15edce6a68200dc45532d2b69779f48601a47c25..99b33e2ef0d532aca08dfb538857d347d22a7351 100644
--- a/tensorflow/core/common_runtime/direct_session_test.cc
+++ b/tensorflow/core/common_runtime/direct_session_test.cc
@@ -1265,7 +1265,7 @@ TEST(DirectSessionTest, LocalDeviceManager) {
 
 // A simple benchmark for the overhead of `DirectSession::Run()` calls
 // with varying numbers of feeds/fetches.
-void FeedFetchBenchmarkHelper(int num_feeds, int iters) {
+void FeedFetchBenchmarkHelper(int iters, int num_feeds) {
   testing::StopTiming();
 
   Tensor value(DT_FLOAT, TensorShape());
diff --git a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc
index 14f5fdc5d304e1d64dca313b2aca673f691288e1..df9cf0c91f1b7e5521061b6915fc1b7ed609e003 100644
--- a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc
+++ b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc
@@ -142,7 +142,7 @@ TEST(DirectSessionWithTrackingAllocTest, CostModelWarmup) {
   DirectSession* ds = static_cast<DirectSession*>(session.get());
   CostModelManager::CostModelMap cost_models;
   ds->ExportCostModels(&cost_models);
-  CHECK_EQ(cost_models.size(), 1);
+  CHECK_GE(cost_models.size(), 1);
   const CostModel* cm = (*cost_models.begin()).second;
   EXPECT_EQ(measure_steps, cm->GetUpdateTimes());
 }
diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc
index 1896baaf668864fc1b29ac3ea6c9b1ab6eaaaeaa..9d03caff1e1e89c4c667f94853352580545e70e5 100644
--- a/tensorflow/core/common_runtime/executor.cc
+++ b/tensorflow/core/common_runtime/executor.cc
@@ -172,17 +172,11 @@ void SetMemory(NodeExecStatsWrapper* stats, OpKernelContext* ctx) {
     stats->AddAllocation(allocator_pair.first, allocator_pair.second);
   }
   auto* ms = stats->stats()->mutable_memory_stats();
-  ms->set_host_temp_memory_size(ctx->host_temp_memory_size());
-  ms->set_device_temp_memory_size(ctx->device_temp_memory_size());
-  for (const auto& alloc_id : ctx->host_persistent_alloc_ids()) {
-    ms->mutable_host_persistent_tensor_alloc_ids()->Add(alloc_id);
+  ms->set_temp_memory_size(ctx->temp_memory_size());
+  for (const auto& alloc_id : ctx->persistent_alloc_ids()) {
+    ms->mutable_persistent_tensor_alloc_ids()->Add(alloc_id);
   }
-  for (const auto& alloc_id : ctx->device_persistent_alloc_ids()) {
-    ms->mutable_device_persistent_tensor_alloc_ids()->Add(alloc_id);
-  }
-  ms->set_host_persistent_memory_size(ctx->host_persistent_memory_allocated());
-  ms->set_device_persistent_memory_size(
-      ctx->device_persistent_memory_allocated());
+  ms->set_persistent_memory_size(ctx->persistent_memory_allocated());
 }
 
 void SetReferencedTensors(NodeExecStatsWrapper* stats,
@@ -1188,7 +1182,7 @@ class ExecutorState {
   // QUESTION: Make it a checkpoint::TensorSliceReaderCacheWrapper
   // instead of a pointer?  (avoids having to delete).
   checkpoint::TensorSliceReaderCacheWrapper* slice_reader_cache_;
-  FunctionCallFrame* call_frame_;
+  CallFrameInterface* call_frame_;
   const ExecutorImpl* impl_;
   CancellationManager* cancellation_manager_;
   Executor::Args::Runner runner_;
diff --git a/tensorflow/core/common_runtime/executor.h b/tensorflow/core/common_runtime/executor.h
index e09dc4e34630fc0ab22615b7204bd0ec2d117d35..3fd932da5b6c44833ba940351dad6cf373ffa05c 100644
--- a/tensorflow/core/common_runtime/executor.h
+++ b/tensorflow/core/common_runtime/executor.h
@@ -84,7 +84,7 @@ class Executor {
     int64 step_id = 0;
     Rendezvous* rendezvous = nullptr;
     StepStatsCollector* stats_collector = nullptr;
-    FunctionCallFrame* call_frame = nullptr;
+    CallFrameInterface* call_frame = nullptr;
     CancellationManager* cancellation_manager = nullptr;
     SessionState* session_state = nullptr;
     TensorStore* tensor_store = nullptr;
@@ -202,11 +202,12 @@ class ExecutorBarrier {
       // below.
       if (--pending_ == 0) {
         CHECK(done_cb_ != nullptr);
-        done = done_cb_;
-        done_cb_ = nullptr;
+        std::swap(done, done_cb_);
       }
 
-      status = status_;
+      if (!status_.ok()) {
+        status = status_;
+      }
     }
 
     if (error) {
diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc
index 23d0f331c5d096cfb944da48e9b5ce58e04daf65..e9c4328f29e2c941afd8e14142beb0db224110d8 100644
--- a/tensorflow/core/common_runtime/function.cc
+++ b/tensorflow/core/common_runtime/function.cc
@@ -30,6 +30,7 @@ limitations under the License.
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/versions.pb.h"
 #include "tensorflow/core/graph/algorithm.h"
+#include "tensorflow/core/graph/control_flow.h"
 #include "tensorflow/core/graph/gradients.h"
 #include "tensorflow/core/graph/graph_constructor.h"
 #include "tensorflow/core/graph/optimizer_cse.h"
@@ -151,20 +152,29 @@ class FunctionLibraryRuntimeImpl : public FunctionLibraryRuntime {
   ~FunctionLibraryRuntimeImpl() override;
 
   Status Instantiate(const string& function_name, AttrSlice attrs,
+                     const InstantiateOptions& options,
                      Handle* handle) override;
 
+  Status ReleaseHandle(Handle handle) override;
+
   const FunctionBody* GetFunctionBody(Handle handle) override;
 
   Status CreateKernel(const NodeDef& ndef, OpKernel** kernel) override;
 
   void Run(const Options& opts, Handle handle, gtl::ArraySlice<Tensor> args,
            std::vector<Tensor>* rets, DoneCallback done) override;
+  // NOTE(mrry): This overload is currently only implemented for local function
+  // execution.
+  // TODO(b/70346412): Implement support for remote function execution when
+  // passing a call frame.
+  void Run(const Options& opts, Handle handle, CallFrameInterface* frame,
+           DoneCallback done) override;
 
   bool IsStateful(const string& function) override;
 
   const FunctionLibraryDefinition* GetFunctionLibraryDefinition()
       const override {
-    return lib_def_;
+    return base_lib_def_;
   }
 
   Device* device() override { return device_; }
@@ -180,7 +190,7 @@ class FunctionLibraryRuntimeImpl : public FunctionLibraryRuntime {
   Device* const device_;
   Env* const env_;
   const int graph_def_version_;
-  const FunctionLibraryDefinition* const lib_def_;
+  const FunctionLibraryDefinition* const base_lib_def_;
   GraphOptimizer optimizer_;
   const CustomKernelCreator custom_kernel_creator_;
   const string device_name_;
@@ -190,28 +200,37 @@ class FunctionLibraryRuntimeImpl : public FunctionLibraryRuntime {
 
   mutable mutex mu_;
 
-  // func_graphs_ never shrinks or reorders its members.
-  std::vector<FunctionBody*> func_graphs_ GUARDED_BY(mu_);
+  int next_handle_ GUARDED_BY(mu_);
 
   // The instantiated and transformed function is encoded as a Graph
   // object, and an executor is created for the graph.
   struct Item : public core::RefCounted {
     const Graph* graph = nullptr;  // Owned by exec.
+    const FunctionLibraryDefinition* overlay_lib = nullptr;  // Not owned.
+    FunctionBody* func_graph = nullptr;
     Executor* exec = nullptr;
 
-    ~Item() override { delete this->exec; }
+    ~Item() override {
+      delete this->func_graph;
+      delete this->exec;
+    }
   };
-  std::vector<Item*> items_;
+  std::unordered_map<Handle, Item*> items_ GUARDED_BY(mu_);
 
   ProcessFunctionLibraryRuntime* parent_ = nullptr;  // not owned.
 
+  Status CreateKernel(const NodeDef& ndef,
+                      const FunctionLibraryDefinition* lib_def,
+                      OpKernel** kernel);
   Status FunctionDefToBody(const FunctionDef& fdef, AttrSlice attrs,
+                           const FunctionLibraryDefinition* lib_def,
                            FunctionBody** fbody);
   Status CreateItem(Handle handle, Item** item);
   Status GetOrCreateItem(Handle handle, Item** item);
   Status InstantiateSymbolicGradient(const NameAttrList& func,
+                                     const FunctionLibraryDefinition* lib_def,
                                      FunctionBody** g_body);
-  bool IsLocalTarget(const AttrSlice& attrs);
+  bool IsLocalTarget(const InstantiateOptions& options);
   AttrValueMap FixAttrs(const AttrSlice& attrs);
   void RunRemote(const Options& opts, Handle handle,
                  gtl::ArraySlice<Tensor> args, std::vector<Tensor>* rets,
@@ -230,15 +249,16 @@ FunctionLibraryRuntimeImpl::FunctionLibraryRuntimeImpl(
       device_(device),
       env_(env),
       graph_def_version_(graph_def_version),
-      lib_def_(lib_def),
+      base_lib_def_(lib_def),
       optimizer_(optimizer_options),
       custom_kernel_creator_(std::move(custom_kernel_creator)),
       device_name_(device_ == nullptr
                        ? ProcessFunctionLibraryRuntime::kDefaultFLRDevice
                        : device_->name()),
+      next_handle_(0),
       parent_(parent) {
   get_func_sig_ = [this](const string& op, const OpDef** sig) {
-    return lib_def_->LookUpOpDef(op, sig);
+    return base_lib_def_->LookUpOpDef(op, sig);
   };
   create_kernel_ = [this](const NodeDef& ndef, OpKernel** kernel) {
     return CreateKernel(ndef, kernel);
@@ -246,9 +266,15 @@ FunctionLibraryRuntimeImpl::FunctionLibraryRuntimeImpl(
 }
 
 FunctionLibraryRuntimeImpl::~FunctionLibraryRuntimeImpl() {
-  for (FunctionBody* p : func_graphs_) delete p;
-  for (Item* item : items_)
-    if (item) item->Unref();
+  // The most common patterns of FLR usage don't require the caller to
+  // explicitly release handles. As a result, we try to unref each item until
+  // it's erased.
+  for (auto item : items_) {
+    if (item.second) {
+      while (!item.second->Unref()) {
+      }
+    }
+  }
 }
 
 // An asynchronous op kernel which executes an instantiated function
@@ -309,13 +335,18 @@ const FunctionBody* FunctionLibraryRuntimeImpl::GetFunctionBody(Handle h) {
   }
 
   mutex_lock l(mu_);
-  CHECK_LE(0, local_handle);
-  CHECK_LT(local_handle, func_graphs_.size());
-  return func_graphs_[local_handle];
+  CHECK_EQ(1, items_.count(local_handle));
+  return items_[local_handle]->func_graph;
 }
 
 Status FunctionLibraryRuntimeImpl::CreateKernel(const NodeDef& ndef,
                                                 OpKernel** kernel) {
+  return CreateKernel(ndef, base_lib_def_, kernel);
+}
+
+Status FunctionLibraryRuntimeImpl::CreateKernel(
+    const NodeDef& ndef, const FunctionLibraryDefinition* lib_def,
+    OpKernel** kernel) {
   // If a custom kernel creator is given, try that.
   Status s;
   if (custom_kernel_creator_) {
@@ -331,16 +362,21 @@ Status FunctionLibraryRuntimeImpl::CreateKernel(const NodeDef& ndef,
     }
   }
 
-  if (lib_def_->Find(ndef.op()) == nullptr) {
+  if (lib_def->Find(ndef.op()) == nullptr) {
     // A primitive operation. Creates the registered kernel.
     return CreateNonCachedKernel(device_, this, ndef, graph_def_version_,
                                  kernel);
   }
 
-  // Try to instantiate this function for the func/attr. Maybe its
+  // Try to instantiate this function for the func/attr. Maybe it's
   // cached already.
+  InstantiateOptions options;
+  if (lib_def != base_lib_def_) {
+    options.overlay_lib = lib_def;
+  }
   Handle handle;
-  TF_RETURN_IF_ERROR(Instantiate(ndef.op(), AttrSlice(&ndef.attr()), &handle));
+  TF_RETURN_IF_ERROR(
+      Instantiate(ndef.op(), AttrSlice(&ndef.attr()), options, &handle));
 
   const FunctionBody* fbody = GetFunctionBody(handle);
   CHECK_NOTNULL(fbody);
@@ -372,15 +408,23 @@ Status FunctionLibraryRuntimeImpl::CreateKernel(const NodeDef& ndef,
   return s;
 }
 
-Status FunctionLibraryRuntimeImpl::FunctionDefToBody(const FunctionDef& fdef,
-                                                     AttrSlice attrs,
-                                                     FunctionBody** fbody) {
-  return FunctionDefToBodyHelper(fdef, attrs, lib_def_, get_func_sig_, fbody);
+Status FunctionLibraryRuntimeImpl::FunctionDefToBody(
+    const FunctionDef& fdef, AttrSlice attrs,
+    const FunctionLibraryDefinition* lib_def, FunctionBody** fbody) {
+  if (lib_def == base_lib_def_) {
+    return FunctionDefToBodyHelper(fdef, attrs, lib_def, get_func_sig_, fbody);
+  } else {
+    auto get_func_sig = [lib_def](const string& op, const OpDef** sig) {
+      return lib_def->LookUpOpDef(op, sig);
+    };
+    return FunctionDefToBodyHelper(fdef, attrs, lib_def, get_func_sig, fbody);
+  }
 }
 
 Status FunctionLibraryRuntimeImpl::InstantiateSymbolicGradient(
-    const NameAttrList& func, FunctionBody** g_body) {
-  const FunctionDef* fdef = lib_def_->Find(func.name());
+    const NameAttrList& func, const FunctionLibraryDefinition* lib_def,
+    FunctionBody** g_body) {
+  const FunctionDef* fdef = lib_def->Find(func.name());
   if (fdef == nullptr) {
     // f is a primitive op.
     gradient::Creator creator;
@@ -394,12 +438,16 @@ Status FunctionLibraryRuntimeImpl::InstantiateSymbolicGradient(
     // by the gradient function.
     TF_RETURN_IF_ERROR(creator(AttrSlice(&func.attr()), &grad_fdef));
     TF_RETURN_IF_ERROR(
-        FunctionDefToBody(grad_fdef, AttrSlice(&func.attr()), g_body));
+        FunctionDefToBody(grad_fdef, AttrSlice(&func.attr()), lib_def, g_body));
   } else {
     // f is a user-defined function.
+    InstantiateOptions options;
+    if (lib_def != base_lib_def_) {
+      options.overlay_lib = lib_def;
+    }
     Handle f_handle;
     TF_RETURN_IF_ERROR(
-        Instantiate(func.name(), AttrSlice(&func.attr()), &f_handle));
+        Instantiate(func.name(), AttrSlice(&func.attr()), options, &f_handle));
     const FunctionBody* f_body = GetFunctionBody(f_handle);
     CHECK_NOTNULL(f_body);
     *g_body = SymbolicGradient(*f_body);
@@ -407,51 +455,42 @@ Status FunctionLibraryRuntimeImpl::InstantiateSymbolicGradient(
   return Status::OK();
 }
 
-bool FunctionLibraryRuntimeImpl::IsLocalTarget(const AttrSlice& attrs) {
+bool FunctionLibraryRuntimeImpl::IsLocalTarget(
+    const InstantiateOptions& options) {
   if (device_ == nullptr) return true;
-  string target = ProcessFunctionLibraryRuntime::ObtainFunctionTarget(attrs);
-  if (target.empty()) return true;
+  if (options.target.empty()) return true;
   Device* target_device;
-  if (!device_mgr_->LookupDevice(target, &target_device).ok()) {
+  if (!device_mgr_->LookupDevice(options.target, &target_device).ok()) {
     return false;
   }
   return target_device == device_;
 }
 
-AttrValueMap FunctionLibraryRuntimeImpl::FixAttrs(const AttrSlice& attrs) {
-  AttrValueMap value_map;
-  for (auto it : attrs) {
-    value_map[it.first] = it.second;
-  }
-  if (attrs.Find("_target") != nullptr) {
-    return value_map;
-  }
-  AttrValue v;
-  v.set_s(device_name_);
-  AddAttr("_target", v, &value_map);
-  return value_map;
-}
-
-Status FunctionLibraryRuntimeImpl::Instantiate(const string& function_name,
-                                               AttrSlice attrs,
-                                               Handle* handle) {
-  AttrValueMap value_map = FixAttrs(attrs);
-  AttrSlice new_attrs(&value_map);
-
-  if (!IsLocalTarget(new_attrs)) {
-    return parent_->Instantiate(function_name, new_attrs, handle);
+Status FunctionLibraryRuntimeImpl::Instantiate(
+    const string& function_name, AttrSlice attrs,
+    const InstantiateOptions& options, Handle* handle) {
+  if (!IsLocalTarget(options)) {
+    return parent_->Instantiate(function_name, attrs, options, handle);
   }
 
-  const string key = Canonicalize(function_name, new_attrs);
+  // Since this is a local target, ensure that the local `device_name_` appears
+  // in the canonical key.
+  InstantiateOptions options_copy(options);
+  options_copy.target = device_name_;
+  const string key = Canonicalize(function_name, attrs, options_copy);
   *handle = parent_->GetHandle(key);
   if (*handle != kInvalidHandle) {
+    mutex_lock l(mu_);
+    items_[parent_->GetHandleOnDevice(device_name_, *handle)]->Ref();
     return Status::OK();
   }
 
   Status s;
+  const FunctionLibraryDefinition* lib_def =
+      options.overlay_lib ? options.overlay_lib : base_lib_def_;
   FunctionBody* fbody = nullptr;
   if (function_name == kGradientOp) {
-    const AttrValue* f = new_attrs.Find(kFuncAttr);
+    const AttrValue* f = attrs.Find(kFuncAttr);
     if (f == nullptr) {
       return errors::InvalidArgument("SymbolicGradient is missing attr: f");
     }
@@ -459,17 +498,17 @@ Status FunctionLibraryRuntimeImpl::Instantiate(const string& function_name,
     if (func.name() == kGradientOp) {
       return errors::InvalidArgument("Can't take gradient of SymbolicGradient");
     }
-    const string grad = lib_def_->FindGradient(func.name());
+    const string grad = lib_def->FindGradient(func.name());
     if (!grad.empty()) {
-      return Instantiate(grad, AttrSlice(&func.attr()), handle);
+      return Instantiate(grad, AttrSlice(&func.attr()), options, handle);
     }
-    TF_RETURN_IF_ERROR(InstantiateSymbolicGradient(func, &fbody));
+    TF_RETURN_IF_ERROR(InstantiateSymbolicGradient(func, lib_def, &fbody));
   } else {
-    const FunctionDef* fdef = lib_def_->Find(function_name);
+    const FunctionDef* fdef = lib_def->Find(function_name);
     if (fdef == nullptr) {
       return errors::NotFound("Function ", function_name, " is not defined.");
     }
-    TF_RETURN_IF_ERROR(FunctionDefToBody(*fdef, new_attrs, &fbody));
+    TF_RETURN_IF_ERROR(FunctionDefToBody(*fdef, attrs, lib_def, &fbody));
   }
 
   {
@@ -477,15 +516,35 @@ Status FunctionLibraryRuntimeImpl::Instantiate(const string& function_name,
     *handle = parent_->GetHandle(key);
     if (*handle != kInvalidHandle) {
       delete fbody;
+      items_[parent_->GetHandleOnDevice(device_name_, *handle)]->Ref();
     } else {
-      *handle = parent_->AddHandle(key, device_name_, func_graphs_.size());
-      func_graphs_.push_back(fbody);
-      items_.resize(func_graphs_.size());
+      *handle = parent_->AddHandle(key, device_name_, next_handle_);
+      Item* item = new Item;
+      item->func_graph = fbody;
+      item->overlay_lib = options.overlay_lib;
+      items_.insert({next_handle_, item});
+      next_handle_++;
     }
   }
   return Status::OK();
 }
 
+Status FunctionLibraryRuntimeImpl::ReleaseHandle(Handle handle) {
+  if (!parent_->IsInstantiatedOnDevice(device_name_, handle)) {
+    return parent_->ReleaseHandle(handle);
+  }
+
+  LocalHandle h = parent_->GetHandleOnDevice(device_name_, handle);
+  mutex_lock l(mu_);
+  CHECK_EQ(1, items_.count(h));
+  Item* item = items_[h];
+  if (item->Unref()) {
+    items_.erase(h);
+    TF_RETURN_IF_ERROR(parent_->RemoveHandle(handle));
+  }
+  return Status::OK();
+}
+
 void DumpGraph(StringPiece label, const Graph* g) {
   // TODO(zhifengc): Change Graph to record #nodes.
   VLOG(1) << "Graph " << label << " #nodes " << g->num_nodes() << " #edges "
@@ -506,12 +565,47 @@ void OptimizeGraph(FunctionLibraryRuntime* lib, std::unique_ptr<Graph>* g) {
   optimizer.Optimize(lib, lib->env(), lib->device(), g, /*shape_map=*/nullptr);
 }
 
+namespace {
+// Removes all stateless nodes that do not contribute to a return
+// value from the function body.  Unlike `RemoveDeadNodes()`, which is
+// triggered by `OptimizerOptions.do_function_inlining`, this pass
+// ignores the SINK node, from which (by definition) all nodes are
+// reverse reachable.
+void PruneFunctionBody(Graph* g) {
+  VLOG(2) << "Pruning function body";
+  std::unordered_set<const Node*> nodes;
+  for (auto n : g->nodes()) {
+    // NOTE(mrry): "_Retval" nodes are stateful, and so will be added
+    // to the seed set of `nodes`.
+    // TODO(mrry): Investigate whether the `n->IsControlFlow()` test is
+    // still needed. It would be preferable to prune entire loops and/or
+    // conditionals if they are not used in the graph.
+    if (n->IsControlFlow() || n->op_def().is_stateful()) {
+      nodes.insert(n);
+    }
+  }
+  bool changed = PruneForReverseReachability(g, std::move(nodes));
+  if (changed) {
+    FixupSourceAndSinkEdges(g);
+  }
+}
+}  // namespace
+
 Status FunctionLibraryRuntimeImpl::CreateItem(Handle handle, Item** item) {
-  const FunctionBody* fbody = GetFunctionBody(handle);
-  CHECK_NOTNULL(fbody);
-  std::unique_ptr<Graph> g(new Graph(lib_def_));
+  const FunctionBody* fbody;
+  const FunctionLibraryDefinition* lib_def;
+  {
+    mutex_lock l(mu_);
+    fbody = (*item)->func_graph;
+    lib_def = (*item)->overlay_lib;
+  }
+  if (!lib_def) {
+    lib_def = base_lib_def_;
+  }
+  std::unique_ptr<Graph> g(new Graph(lib_def));
   CopyGraph(*fbody->graph, g.get());
 
+  PruneFunctionBody(g.get());
   optimizer_.Optimize(this, env(), device(), &g, /*shape_map=*/nullptr);
   TF_RETURN_IF_ERROR(EnsureMemoryTypes(DeviceType(device()->device_type()),
                                        device()->name(), g.get()));
@@ -521,7 +615,14 @@ Status FunctionLibraryRuntimeImpl::CreateItem(Handle handle, Item** item) {
   LocalExecutorParams params;
   params.device = device_;
   params.function_library = this;
-  params.create_kernel = create_kernel_;
+  if (lib_def == base_lib_def_) {
+    params.create_kernel = create_kernel_;
+  } else {
+    params.create_kernel = [this, lib_def](const NodeDef& ndef,
+                                           OpKernel** kernel) {
+      return CreateKernel(ndef, lib_def, kernel);
+    };
+  }
   params.delete_kernel = [](OpKernel* kernel) {
     DeleteNonCachedKernel(kernel);
   };
@@ -529,9 +630,16 @@ Status FunctionLibraryRuntimeImpl::CreateItem(Handle handle, Item** item) {
   Executor* exec;
   TF_RETURN_IF_ERROR(NewLocalExecutor(params, g.release(), &exec));
 
-  *item = new Item;
-  (*item)->graph = graph;
-  (*item)->exec = exec;
+  {
+    // Guard item since it is already inserted in items_.
+    mutex_lock l(mu_);
+    if ((*item)->exec) {
+      delete exec;
+    } else {
+      (*item)->graph = graph;
+      (*item)->exec = exec;
+    }
+  }
   return Status::OK();
 }
 
@@ -539,29 +647,18 @@ Status FunctionLibraryRuntimeImpl::GetOrCreateItem(Handle handle, Item** item) {
   LocalHandle local_handle = parent_->GetHandleOnDevice(device_name_, handle);
   {
     mutex_lock l(mu_);
-    if (local_handle >= items_.size()) {
+    if (items_.count(local_handle) == 0) {
       return errors::NotFound("Function handle ", handle,
                               " is not valid. Likely an internal error.");
     }
     *item = items_[local_handle];
-    if (*item != nullptr) {
-      (*item)->Ref();
+    if ((*item)->exec != nullptr) {
       return Status::OK();
     }
   }
   // NOTE: We need to call CreateItem out of mu_ because creating an
   // executor needs to call CreateKernel.
-  TF_RETURN_IF_ERROR(CreateItem(handle, item));
-
-  {
-    mutex_lock l(mu_);
-    if (items_[local_handle] == nullptr) {
-      // Install *item in items_.
-      items_[local_handle] = *item;
-      (*item)->Ref();
-    }
-  }
-  return Status::OK();
+  return CreateItem(handle, item);
 }
 
 void FunctionLibraryRuntimeImpl::RunRemote(const Options& opts, Handle handle,
@@ -569,14 +666,13 @@ void FunctionLibraryRuntimeImpl::RunRemote(const Options& opts, Handle handle,
                                            std::vector<Tensor>* rets,
                                            Executor::Args* exec_args,
                                            Item* item, DoneCallback done) {
-  FunctionCallFrame* frame = exec_args->call_frame;
+  DCHECK(exec_args->call_frame == nullptr);
   string target_device = parent_->GetDeviceName(handle);
   string source_device = opts.source_device;
   Rendezvous* rendezvous = opts.rendezvous;
   DeviceContext* device_context;
   Status s = parent_->GetDeviceContext(target_device, &device_context);
   if (!s.ok()) {
-    delete frame;
     delete exec_args;
     done(s);
     return;
@@ -584,6 +680,16 @@ void FunctionLibraryRuntimeImpl::RunRemote(const Options& opts, Handle handle,
   int64 src_incarnation, target_incarnation;
   s = parent_->GetDeviceIncarnation(source_device, &src_incarnation);
   s.Update(parent_->GetDeviceIncarnation(target_device, &target_incarnation));
+  if (!s.ok()) {
+    delete exec_args;
+    done(s);
+    return;
+  }
+
+  const FunctionBody* fbody = GetFunctionBody(handle);
+  FunctionCallFrame* frame =
+      new FunctionCallFrame(fbody->arg_types, fbody->ret_types);
+  exec_args->call_frame = frame;
   if (!s.ok()) {
     delete frame;
     delete exec_args;
@@ -617,7 +723,6 @@ void FunctionLibraryRuntimeImpl::RunRemote(const Options& opts, Handle handle,
             *exec_args, [item, frame, rets, done, source_device, target_device,
                          target_incarnation, rendezvous, device_context,
                          remote_args, exec_args](const Status& status) {
-              item->Unref();
               Status s = status;
               if (s.ok()) {
                 s = frame->ConsumeRetvals(rets);
@@ -661,17 +766,7 @@ void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle,
     parent_->Run(run_opts, handle, args, rets, done);
     return;
   }
-  const FunctionBody* fbody = GetFunctionBody(handle);
-  FunctionCallFrame* frame =
-      new FunctionCallFrame(fbody->arg_types, fbody->ret_types);
 
-  Item* item = nullptr;
-  Status s = GetOrCreateItem(handle, &item);
-  if (!s.ok()) {
-    delete frame;
-    done(s);
-    return;
-  }
   DCHECK(run_opts.runner != nullptr);
 
   Executor::Args* exec_args = new Executor::Args;
@@ -679,16 +774,28 @@ void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle,
   exec_args->step_id = run_opts.step_id;
   exec_args->rendezvous = run_opts.rendezvous;
   exec_args->stats_collector = run_opts.stats_collector;
-  exec_args->call_frame = frame;
   exec_args->cancellation_manager = run_opts.cancellation_manager;
   exec_args->step_container = run_opts.step_container;
   exec_args->runner = *run_opts.runner;
 
+  Item* item = nullptr;
+  Status s = GetOrCreateItem(handle, &item);
+  if (!s.ok()) {
+    delete exec_args;
+    done(s);
+    return;
+  }
+
   if (run_opts.remote_execution) {
+    // NOTE(mrry): `RunRemote()` will set `exec_args->call_frame` for us.
     RunRemote(run_opts, handle, args, rets, exec_args, item, done);
     return;
   }
 
+  const FunctionBody* fbody = GetFunctionBody(handle);
+  FunctionCallFrame* frame =
+      new FunctionCallFrame(fbody->arg_types, fbody->ret_types);
+  exec_args->call_frame = frame;
   s = frame->SetArgs(args);
   if (!s.ok()) {
     delete frame;
@@ -696,12 +803,12 @@ void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle,
     done(s);
     return;
   }
+
   item->exec->RunAsync(
       // Executor args
       *exec_args,
       // Done callback.
       [item, frame, rets, done, exec_args](const Status& status) {
-        item->Unref();
         Status s = status;
         if (s.ok()) {
           s = frame->ConsumeRetvals(rets);
@@ -712,9 +819,69 @@ void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle,
       });
 }
 
+void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle,
+                                     CallFrameInterface* frame,
+                                     DoneCallback done) {
+  if (opts.cancellation_manager && opts.cancellation_manager->IsCancelled()) {
+    done(errors::Cancelled(""));
+    return;
+  }
+  if (!parent_->IsInstantiatedOnDevice(device_name_, handle) ||
+      opts.remote_execution) {
+    done(errors::Unimplemented("Remote calling with CallFrameInterface"));
+    return;
+  }
+
+  Options run_opts = opts;
+  if (opts.create_rendezvous) {
+    Rendezvous* rendezvous = new IntraProcessRendezvous(device_mgr_);
+    run_opts.rendezvous = rendezvous;
+    run_opts.create_rendezvous = false;
+    done = std::bind(
+        [rendezvous](DoneCallback done,
+                     // Begin unbound arguments.
+                     const Status& status) {
+          rendezvous->Unref();
+          done(status);
+        },
+        std::move(done), std::placeholders::_1);
+  }
+
+  Item* item = nullptr;
+  Status s = GetOrCreateItem(handle, &item);
+  if (!s.ok()) {
+    done(s);
+    return;
+  }
+  DCHECK(run_opts.runner != nullptr);
+
+  Executor::Args* exec_args = new Executor::Args;
+  // Inherit the step_id from the caller.
+  exec_args->step_id = run_opts.step_id;
+  exec_args->rendezvous = run_opts.rendezvous;
+  exec_args->stats_collector = run_opts.stats_collector;
+  exec_args->cancellation_manager = run_opts.cancellation_manager;
+  exec_args->step_container = run_opts.step_container;
+  exec_args->runner = *run_opts.runner;
+  exec_args->call_frame = frame;
+
+  item->exec->RunAsync(
+      // Executor args
+      *exec_args,
+      // Done callback.
+      std::bind(
+          [item, frame, exec_args](DoneCallback done,
+                                   // Start unbound arguments.
+                                   const Status& status) {
+            delete exec_args;
+            done(status);
+          },
+          std::move(done), std::placeholders::_1));
+}
+
 bool FunctionLibraryRuntimeImpl::IsStateful(const string& func) {
   const OpDef* op_def;
-  const Status s = lib_def_->LookUpOpDef(func, &op_def);
+  const Status s = base_lib_def_->LookUpOpDef(func, &op_def);
   return s.ok() && op_def->is_stateful();
 }
 
@@ -1387,17 +1554,23 @@ Status FunctionDefToBodyHelper(
   InstantiationResult result;
   TF_RETURN_IF_ERROR(InstantiateFunction(fdef, attrs, get_func_sig, &result));
 
-  Graph* graph = new Graph(lib_def);
+  std::unique_ptr<Graph> graph(new Graph(lib_def));
   GraphConstructorOptions opts;
   opts.allow_internal_ops = true;
   opts.expect_device_spec = false;
-  Status s = ConvertNodeDefsToGraph(opts, result.nodes, graph);
-  if (!s.ok()) {
-    delete graph;
-  } else {
-    *fbody = new FunctionBody(fdef, result.arg_types, result.ret_types, graph);
-  }
-  return s;
+  TF_RETURN_IF_ERROR(ConvertNodeDefsToGraph(opts, result.nodes, graph.get()));
+
+  // Call BuildControlFlowInfo to validate that this function body has
+  // well-formed control flow.
+  // NOTE(skyewm): this is usually done in Partition(), but we don't partition
+  // function bodies. This should be removed if function bodies ever go through
+  // the Partition() path.
+  std::vector<ControlFlowInfo> dummy;
+  TF_RETURN_IF_ERROR(BuildControlFlowInfo(graph.get(), &dummy));
+
+  *fbody = new FunctionBody(fdef, result.arg_types, result.ret_types,
+                            graph.release());
+  return Status::OK();
 }
 
 }  // end namespace tensorflow
diff --git a/tensorflow/core/common_runtime/function_test.cc b/tensorflow/core/common_runtime/function_test.cc
index d183bf7c978f1a39882b6f2f0a94386e25e5f0cd..cad3b3801e74a00a9f6fb6b236842f5caeaf72bc 100644
--- a/tensorflow/core/common_runtime/function_test.cc
+++ b/tensorflow/core/common_runtime/function_test.cc
@@ -27,6 +27,7 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/executor.h"
 #include "tensorflow/core/common_runtime/function_testlib.h"
 #include "tensorflow/core/common_runtime/rendezvous_mgr.h"
+#include "tensorflow/core/common_runtime/step_stats_collector.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/function_testlib.h"
 #include "tensorflow/core/framework/op.h"
@@ -190,24 +191,113 @@ class FunctionLibraryRuntimeTest : public ::testing::Test {
   Status Instantiate(FunctionLibraryRuntime* flr, const string& name,
                      test::function::Attrs attrs,
                      FunctionLibraryRuntime::Handle* handle) {
-    Status status = flr->Instantiate(name, attrs, handle);
-    if (!status.ok()) {
-      return status;
-    }
-    return Status::OK();
+    return flr->Instantiate(name, attrs, handle);
+  }
+
+  Status Instantiate(FunctionLibraryRuntime* flr, const string& name,
+                     test::function::Attrs attrs,
+                     const FunctionLibraryRuntime::InstantiateOptions& options,
+                     FunctionLibraryRuntime::Handle* handle) {
+    return flr->Instantiate(name, attrs, options, handle);
   }
 
   Status InstantiateAndRun(FunctionLibraryRuntime* flr, const string& name,
                            test::function::Attrs attrs,
                            const std::vector<Tensor>& args,
                            std::vector<Tensor*> rets) {
+    return InstantiateAndRun(flr, name, attrs,
+                             FunctionLibraryRuntime::InstantiateOptions(), args,
+                             std::move(rets));
+  }
+
+  Status InstantiateAndRun(
+      FunctionLibraryRuntime* flr, const string& name,
+      test::function::Attrs attrs,
+      const FunctionLibraryRuntime::InstantiateOptions& options,
+      const std::vector<Tensor>& args, std::vector<Tensor*> rets) {
+    FunctionLibraryRuntime::Handle handle;
+    Status status = flr->Instantiate(name, attrs, options, &handle);
+    if (!status.ok()) {
+      return status;
+    }
+    FunctionLibraryRuntime::Options opts;
+    status = Run(flr, handle, opts, args, rets);
+    if (!status.ok()) return status;
+
+    // Release the handle and try running again. It should not succeed.
+    status = flr->ReleaseHandle(handle);
+    if (!status.ok()) return status;
+
+    Status status2 = Run(flr, handle, opts, args, std::move(rets));
+    EXPECT_TRUE(errors::IsInvalidArgument(status2));
+    EXPECT_TRUE(
+        StringPiece(status2.error_message()).contains("remote execution."));
+
+    return status;
+  }
+
+  Status Run(FunctionLibraryRuntime* flr, FunctionLibraryRuntime::Handle handle,
+             FunctionLibraryRuntime::Options opts, CallFrameInterface* frame) {
+    std::atomic<int32> call_count(0);
+    std::function<void(std::function<void()>)> runner =
+        [&call_count](std::function<void()> fn) {
+          ++call_count;
+          test::function::FunctionTestSchedClosure(fn);
+        };
+
+    Notification done;
+    opts.runner = &runner;
+    std::vector<Tensor> out;
+    Status status;
+    flr->Run(opts, handle, frame, [&status, &done](const Status& s) {
+      status = s;
+      done.Notify();
+    });
+    done.WaitForNotification();
+    if (!status.ok()) {
+      return status;
+    }
+
+    EXPECT_GE(call_count, 1);  // Test runner is used.
+
+    return Status::OK();
+  }
+
+  Status InstantiateAndRunViaCallFrameInterface(FunctionLibraryRuntime* flr,
+                                                const string& name,
+                                                test::function::Attrs attrs,
+                                                const std::vector<Tensor>& args,
+                                                std::vector<Tensor*> rets) {
     FunctionLibraryRuntime::Handle handle;
     Status status = flr->Instantiate(name, attrs, &handle);
     if (!status.ok()) {
       return status;
     }
+    const FunctionBody* fbody = flr->GetFunctionBody(handle);
+    FunctionCallFrame frame(fbody->arg_types, fbody->ret_types);
+    TF_RETURN_IF_ERROR(frame.SetArgs(args));
+
     FunctionLibraryRuntime::Options opts;
-    return Run(flr, handle, opts, args, std::move(rets));
+    status = Run(flr, handle, opts, &frame);
+    if (!status.ok()) return status;
+
+    std::vector<Tensor> retvals;
+    TF_RETURN_IF_ERROR(frame.GetRetvals(&retvals));
+    CHECK_EQ(rets.size(), retvals.size());
+    for (size_t i = 0; i < rets.size(); ++i) {
+      *rets[i] = retvals[i];
+    }
+
+    // Release the handle and try running again. It should not succeed.
+    status = flr->ReleaseHandle(handle);
+    if (!status.ok()) return status;
+
+    Status status2 = Run(flr, handle, opts, args, std::move(rets));
+    EXPECT_TRUE(errors::IsInvalidArgument(status2));
+    EXPECT_TRUE(
+        StringPiece(status2.error_message()).contains("remote execution."));
+
+    return status;
   }
 
   std::unique_ptr<Graph> GetFuncBody(FunctionLibraryRuntime* flr,
@@ -268,6 +358,9 @@ TEST_F(FunctionLibraryRuntimeTest, XTimesTwo) {
   TF_CHECK_OK(
       InstantiateAndRun(flr0_, "XTimesTwo", {{"T", DT_FLOAT}}, {x}, {&y}));
   test::ExpectTensorEqual<float>(y, test::AsTensor<float>({2, 4, 6, 8}));
+  TF_CHECK_OK(InstantiateAndRunViaCallFrameInterface(
+      flr0_, "XTimesTwo", {{"T", DT_FLOAT}}, {x}, {&y}));
+  test::ExpectTensorEqual<float>(y, test::AsTensor<float>({2, 4, 6, 8}));
 }
 
 TEST_F(FunctionLibraryRuntimeTest, XTimesN) {
@@ -286,6 +379,142 @@ TEST_F(FunctionLibraryRuntimeTest, XTimesN) {
   test::ExpectTensorEqual<float>(y, test::AsTensor<float>({16, 32, 48, 64}));
 }
 
+TEST_F(FunctionLibraryRuntimeTest, XTimesNInOverlayLib) {
+  Init({});
+  FunctionDefLibrary proto;
+  *proto.add_function() = test::function::XTimesTwo();
+  *proto.add_function() = test::function::XTimesFour();
+  *proto.add_function() = test::function::XTimes16();
+  std::unique_ptr<FunctionLibraryDefinition> overlay_lib(
+      new FunctionLibraryDefinition(OpRegistry::Global(), proto));
+
+  FunctionLibraryRuntime::InstantiateOptions options;
+  options.overlay_lib = overlay_lib.get();
+
+  auto x = test::AsTensor<float>({1, 2, 3, 4});
+  Tensor y;
+
+  // Ensure that the function is not installed in the base library.
+  HasError(InstantiateAndRun(flr0_, "XTimesTwo", {{"T", DT_FLOAT}},
+                             {} /* options */, {x}, {&y}),
+           "Not found: Function XTimesTwo is not defined.");
+
+  TF_CHECK_OK(InstantiateAndRun(flr0_, "XTimesTwo", {{"T", DT_FLOAT}}, options,
+                                {x}, {&y}));
+  test::ExpectTensorEqual<float>(y, test::AsTensor<float>({2, 4, 6, 8}));
+  TF_CHECK_OK(InstantiateAndRun(flr0_, "XTimesFour", {{"T", DT_FLOAT}}, options,
+                                {x}, {&y}));
+  test::ExpectTensorEqual<float>(y, test::AsTensor<float>({4, 8, 12, 16}));
+  TF_CHECK_OK(InstantiateAndRun(flr0_, "XTimes16", {{"T", DT_FLOAT}}, options,
+                                {x}, {&y}));
+  test::ExpectTensorEqual<float>(y, test::AsTensor<float>({16, 32, 48, 64}));
+
+  // Ensure that the use of the overlay has not leaked into the base library.
+  HasError(InstantiateAndRun(flr0_, "XTimesTwo", {{"T", DT_FLOAT}},
+                             {} /* options */, {x}, {&y}),
+           "Not found: Function XTimesTwo is not defined.");
+}
+
+TEST_F(FunctionLibraryRuntimeTest, StateHandle) {
+  auto T = DT_INT32;
+
+  // The expected sequence of outputs from this function is [6, 4, 0, 1, ...].
+  FunctionDef stateful_func = FDH::Define(
+      // Name
+      "RandomUniformWrapper",
+      // Args
+      {},
+      // Return values
+      {"y: int32"},
+      // Attrs
+      {},
+      // Nodes
+      {FDH::Const<int32>("shape", gtl::ArraySlice<int32>({1})),
+       FDH::Const<int32>("minval", 0),
+       FDH::Const<int32>("maxval", 10),
+       // A stateful node.
+       {{"y"},
+        "RandomUniformInt",
+        {"shape", "minval", "maxval"},
+        {{"seed", 37}, {"seed2", 48}, {"Tout", T}, {"T", T}}}});
+  Init({stateful_func});
+
+  FunctionLibraryRuntime::Handle handle;
+  TF_CHECK_OK(Instantiate(flr0_, "RandomUniformWrapper", {}, &handle));
+
+  FunctionLibraryRuntime::Options opts;
+  Tensor y;
+  {
+    // Simple case: instantiating with no state_handle.
+    for (int32 expected : {6, 4}) {
+      TF_CHECK_OK(Run(flr0_, handle, opts, {}, {&y}));
+      test::ExpectTensorEqual<int>(y, test::AsTensor<int32>({expected}));
+    }
+  }
+
+  {
+    // Instantiating again with no state_handle should yield the same handle and
+    // the continuation of the same sequence.
+    FunctionLibraryRuntime::Handle handle_non_isolated;
+    TF_CHECK_OK(
+        Instantiate(flr0_, "RandomUniformWrapper", {}, &handle_non_isolated));
+    EXPECT_EQ(handle, handle_non_isolated);
+    for (int32 expected : {0, 1}) {
+      TF_CHECK_OK(Run(flr0_, handle_non_isolated, opts, {}, {&y}));
+      test::ExpectTensorEqual<int>(y, test::AsTensor<int32>({expected}));
+    }
+  }
+
+  {
+    // Instantiating with a given state handle will create new state and yield
+    // the original sequence.
+    FunctionLibraryRuntime::InstantiateOptions options;
+    FunctionLibraryRuntime::Handle handle_isolated;
+    options.state_handle = "handle_1";
+    TF_CHECK_OK(Instantiate(flr0_, "RandomUniformWrapper", {}, options,
+                            &handle_isolated));
+    EXPECT_NE(handle, handle_isolated);
+    for (int32 expected : {6, 4, 0, 1}) {
+      TF_CHECK_OK(Run(flr0_, handle_isolated, opts, {}, {&y}));
+      test::ExpectTensorEqual<int>(y, test::AsTensor<int32>({expected}));
+    }
+  }
+
+  {
+    // Instantiating with a different given state handle will create new state
+    // and yield the original sequence.
+    FunctionLibraryRuntime::InstantiateOptions options;
+    FunctionLibraryRuntime::Handle handle_isolated;
+    options.state_handle = "handle_2";
+    TF_CHECK_OK(Instantiate(flr0_, "RandomUniformWrapper", {}, options,
+                            &handle_isolated));
+    EXPECT_NE(handle, handle_isolated);
+    for (int32 expected : {6, 4, 0, 1}) {
+      TF_CHECK_OK(Run(flr0_, handle_isolated, opts, {}, {&y}));
+      test::ExpectTensorEqual<int>(y, test::AsTensor<int32>({expected}));
+    }
+  }
+
+  {
+    // Reinstantiating after releasing a handle will yield the original sequence
+    // multiple times.
+    FunctionLibraryRuntime::InstantiateOptions options;
+    FunctionLibraryRuntime::Handle handle_isolated;
+    options.state_handle = "handle_3";
+
+    for (int i = 0; i < 2; ++i) {
+      TF_CHECK_OK(Instantiate(flr0_, "RandomUniformWrapper", {}, options,
+                              &handle_isolated));
+      EXPECT_NE(handle, handle_isolated);
+      for (int32 expected : {6, 4, 0, 1}) {
+        TF_CHECK_OK(Run(flr0_, handle_isolated, opts, {}, {&y}));
+        test::ExpectTensorEqual<int>(y, test::AsTensor<int32>({expected}));
+      }
+      TF_CHECK_OK(flr0_->ReleaseHandle(handle_isolated));
+    }
+  }
+}
+
 TEST_F(FunctionLibraryRuntimeTest, ExpandInlineFunctions) {
   Init({test::function::XTimesTwo(), test::function::XTimesFour(),
         test::function::XTimes16()});
@@ -487,6 +716,66 @@ TEST_F(FunctionLibraryRuntimeTest, ExpandInlineFunctionsWithControlDeps) {
   }
 }
 
+TEST_F(FunctionLibraryRuntimeTest, PruneBody) {
+  auto T = DT_INT32;
+  FunctionDef stateful_func = FDH::Define(
+      // Name
+      "SquareAndAddOneWithStatefulNodes",
+      // Args
+      {"x: int32"},
+      // Return values
+      {"y: int32"},
+      // Attrs
+      {},
+      // Nodes
+      {// a = Square<T>(x)
+       {{"a"}, "Square", {"x"}, {{"T", T}}},
+       // 1
+       FDH::Const("o", 1),
+       // A bunch of extra arithmetic that y doesn't depend on
+       {{"x1"}, "Add", {"o", "o"}, {{"T", T}}},
+       {{"x2"}, "Mul", {"a", "x1"}, {{"T", T}}},
+       {{"x3"}, "Mul", {"x1", "x2"}, {{"T", T}}},
+       FDH::Const<int32>("shape", {1, 2}),
+       // A stateful node.
+       {{"keep_me"},
+        "RandomUniform",
+        {"shape"},
+        {{"T", T}, {"dtype", DT_FLOAT}}},
+       // y = Add<T>(a, o)
+       {{"y"}, "Add", {"a", "o"}, {{"T", T}}}});
+  Init({stateful_func});
+
+  auto x = test::AsTensor<int32>({1, 2, 3, 4});
+  Tensor y;
+
+  FunctionLibraryRuntime::Handle handle;
+  TF_CHECK_OK(
+      Instantiate(flr0_, "SquareAndAddOneWithStatefulNodes", {}, &handle));
+
+  StepStats stats;
+  StepStatsCollector stats_collector(&stats);
+  FunctionLibraryRuntime::Options opts;
+  opts.stats_collector = &stats_collector;
+  TF_CHECK_OK(Run(flr0_, handle, opts, {x}, {&y}));
+  TF_CHECK_OK(flr0_->ReleaseHandle(handle));
+
+  TF_CHECK_OK(InstantiateAndRun(flr0_, "SquareAndAddOneWithStatefulNodes", {},
+                                {x}, {&y}));
+  test::ExpectTensorEqual<int>(y, test::AsTensor<int32>({2, 5, 10, 17}));
+
+  stats_collector.FinalizeAndSwap(&stats);
+
+  // Note that we do not expect the nodes named "x1", "x2", or "x3" to execute.
+  std::set<string> expected_node_names(
+      {"_SOURCE", "shape", "x", "o", "a", "keep_me", "y", "y_RetVal"});
+  std::set<string> executed_node_names;
+  for (const auto& node_stats : stats.dev_stats()[0].node_stats()) {
+    executed_node_names.insert(node_stats.node_name());
+  }
+  EXPECT_EQ(expected_node_names, executed_node_names);
+}
+
 TEST_F(FunctionLibraryRuntimeTest, OptimizeGraph) {
   Init({test::function::XTimesTwo(), test::function::XTimesFour(),
         test::function::XTimes16()});
@@ -498,7 +787,7 @@ TEST_F(FunctionLibraryRuntimeTest, OptimizeGraph) {
     Scope s = Scope::NewRootScope();
     auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0);
     auto x4_x2_scale = ops::Const<float>(
-        s.WithOpName("x4/x2/scale/_12__cf__2")
+        s.WithOpName("x4/x2/scale/_15__cf__9")
             .WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"),
         2.0f);
     auto x4_x2_y = ops::Mul(s.WithOpName("x4/x2/y"), x, x4_x2_scale);
@@ -644,6 +933,16 @@ TEST_F(FunctionLibraryRuntimeTest, Error_InstantiaionError) {
            "type attr not found");
 }
 
+TEST_F(FunctionLibraryRuntimeTest, Error_BadControlFlow) {
+  Init({test::function::InvalidControlFlow()});
+  auto x = test::AsTensor<int32>({0});
+  DCHECK_EQ(x.dtype(), DT_INT32);
+  Tensor y;
+  HasError(InstantiateAndRun(flr0_, "InvalidControlFlow", {}, {x}, {&y}),
+           "The node 'add' has inputs from different frames. The input 'enter' "
+           "is in frame 'while'. The input 'i' is in frame ''.");
+}
+
 TEST_F(FunctionLibraryRuntimeTest, Gradient_XTimesTwo) {
   Init({test::function::XTimesTwo(), test::function::XTimesFour(),
         test::function::XTimes16()});
@@ -694,13 +993,13 @@ TEST_F(FunctionLibraryRuntimeTest, Gradient_XTimesTwo) {
     auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0);
     auto func0 = ops::_Arg(s.WithOpName("Func/_0"), DT_FLOAT, 1);
     auto scale = ops::Const(
-        s.WithOpName("scale/_5__cf__6")
+        s.WithOpName("scale/_5__cf__10")
             .WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"),
         2.0f);
     auto func1_gx = ops::Mul(s.WithOpName("Func/_1/gx"), func0, scale);
     auto func1_sx = ops::Shape(s.WithOpName("Func/_1/sx"), x);
     auto const0 = ops::Const(
-        s.WithOpName("Func/_1/sy/_6__cf__7")
+        s.WithOpName("Func/_1/sy/_6__cf__11")
             .WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"),
         0, {0});
     auto func1_rx = ops::internal::BroadcastGradientArgs(
@@ -938,9 +1237,10 @@ TEST_F(FunctionLibraryRuntimeTest, Gradient_AddSum) {
 
 TEST_F(FunctionLibraryRuntimeTest, CrossDevice) {
   Init({test::function::FindDevice()});
+  FunctionLibraryRuntime::InstantiateOptions instantiate_opts;
+  instantiate_opts.target = "/device:CPU:1";
   FunctionLibraryRuntime::Handle handle;
-  TF_CHECK_OK(Instantiate(flr0_, "FindDevice", {{"_target", "/device:CPU:1"}},
-                          &handle));
+  TF_CHECK_OK(Instantiate(flr0_, "FindDevice", {}, instantiate_opts, &handle));
 
   Tensor y;
   FunctionLibraryRuntime::Options opts;
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
index 646cd88a3a340a7ce3d85f19cb55fea27d9dc1b2..2f7fbbbec2a285976701b94c426bc3f870c65cf5 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
@@ -15,20 +15,23 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h"
 
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 
 namespace tensorflow {
 
-GPUBFCAllocator::GPUBFCAllocator(int device_id, size_t total_memory)
-    : GPUBFCAllocator(device_id, total_memory, GPUOptions()) {}
+GPUBFCAllocator::GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
+                                 const string& name)
+    : GPUBFCAllocator(cuda_gpu_id, total_memory, GPUOptions(), name) {}
 
-GPUBFCAllocator::GPUBFCAllocator(int device_id, size_t total_memory,
-                                 const GPUOptions& gpu_options)
+GPUBFCAllocator::GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
+                                 const GPUOptions& gpu_options,
+                                 const string& name)
     : BFCAllocator(
           new GPUMemAllocator(
-              GPUMachineManager()->ExecutorForDevice(device_id).ValueOrDie()),
-          total_memory, gpu_options.allow_growth(),
-          strings::StrCat("GPU_", device_id, "_bfc")) {}
+              GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie()),
+          total_memory, gpu_options.allow_growth(), name) {}
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
index 2c23340b6d3fb26a38304fd2d0544dcdfcdeeb5e..c2c0b020c7409e7be168d42e83579a2ff3c29a60 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
@@ -23,6 +23,7 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/allocator_retry.h"
 #include "tensorflow/core/common_runtime/bfc_allocator.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
 #include "tensorflow/core/platform/stream_executor.h"
 #include "tensorflow/core/platform/thread_annotations.h"
 #include "tensorflow/core/platform/types.h"
@@ -36,11 +37,12 @@ namespace tensorflow {
 // algorithm.
 class GPUBFCAllocator : public BFCAllocator {
  public:
-  // 'device_id' refers to the StreamExecutor ID of the device within
+  // 'cuda_gpu_id' refers to the ID of the GPU device within
   // the process and must reference a valid ID in the process.
-  GPUBFCAllocator(int device_id, size_t total_memory);
-  GPUBFCAllocator(int device_id, size_t total_memory,
-                  const GPUOptions& gpu_options);
+  GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
+                  const string& name);
+  GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory,
+                  const GPUOptions& gpu_options, const string& name);
   virtual ~GPUBFCAllocator() {}
 
   TF_DISALLOW_COPY_AND_ASSIGN(GPUBFCAllocator);
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
index 00ef130d34bbbe06ad9dabae124ff3fa0d38450a..9e4b617d2bd5b070f5b8bdeedabb15b94d212743 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <algorithm>
 #include <vector>
 
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
@@ -45,7 +46,7 @@ static void CheckStats(Allocator* a, int64 num_allocs, int64 bytes_in_use,
 }
 
 TEST(GPUBFCAllocatorTest, NoDups) {
-  GPUBFCAllocator a(0, 1 << 30);
+  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
   CheckStats(&a, 0, 0, 0, 0);
 
   // Allocate a lot of raw pointers
@@ -74,7 +75,7 @@ TEST(GPUBFCAllocatorTest, NoDups) {
 }
 
 TEST(GPUBFCAllocatorTest, AllocationsAndDeallocations) {
-  GPUBFCAllocator a(0, 1 << 30);
+  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
   // Allocate 256 raw pointers of sizes between 100 bytes and about
   // a meg
   random::PhiloxRandom philox(123, 17);
@@ -132,7 +133,7 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocations) {
 }
 
 TEST(GPUBFCAllocatorTest, ExerciseCoalescing) {
-  GPUBFCAllocator a(0, 1 << 30);
+  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
   CheckStats(&a, 0, 0, 0, 0);
 
   float* first_ptr = a.Allocate<float>(1024);
@@ -166,18 +167,18 @@ TEST(GPUBFCAllocatorTest, ExerciseCoalescing) {
 }
 
 TEST(GPUBFCAllocatorTest, AllocateZeroBufSize) {
-  GPUBFCAllocator a(0, 1 << 30);
+  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
   float* ptr = a.Allocate<float>(0);
   EXPECT_EQ(nullptr, ptr);
 }
 
 TEST(GPUBFCAllocatorTest, TracksSizes) {
-  GPUBFCAllocator a(0, 1 << 30);
+  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
   EXPECT_EQ(true, a.TracksAllocationSizes());
 }
 
 TEST(GPUBFCAllocatorTest, AllocatedVsRequested) {
-  GPUBFCAllocator a(0, 1 << 30);
+  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
   float* t1 = a.Allocate<float>(1);
   EXPECT_EQ(4, a.RequestedSize(t1));
   EXPECT_EQ(256, a.AllocatedSize(t1));
@@ -186,7 +187,7 @@ TEST(GPUBFCAllocatorTest, AllocatedVsRequested) {
 
 TEST(GPUBFCAllocatorTest, TestCustomMemoryLimit) {
   // Configure a 1MiB byte limit
-  GPUBFCAllocator a(0, 1 << 20);
+  GPUBFCAllocator a(CudaGpuId(0), 1 << 20, "GPU_0_bfc");
 
   float* first_ptr = a.Allocate<float>(1 << 6);
   float* second_ptr = a.Allocate<float>(1 << 20);
@@ -201,7 +202,7 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocationsWithGrowth) {
   options.set_allow_growth(true);
 
   // Max of 2GiB, but starts out small.
-  GPUBFCAllocator a(0, 1LL << 31, options);
+  GPUBFCAllocator a(CudaGpuId(0), 1LL << 31, options, "GPU_0_bfc");
 
   // Allocate 10 raw pointers of sizes between 100 bytes and about
   // 64 megs.
@@ -262,8 +263,8 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocationsWithGrowth) {
 }
 
 TEST(GPUBFCAllocatorTest, DISABLED_AllocatorReceivesZeroMemory) {
-  GPUBFCAllocator a(0, 1UL << 60);
-  GPUBFCAllocator b(0, 1UL << 60);
+  GPUBFCAllocator a(CudaGpuId(0), 1UL << 60, "GPU_0_bfc");
+  GPUBFCAllocator b(CudaGpuId(0), 1UL << 60, "GPU_0_bfc");
   void* amem = a.AllocateRaw(1, 1);
   void* bmem = b.AllocateRaw(1, 1 << 30);
   a.DeallocateRaw(amem);
@@ -271,7 +272,7 @@ TEST(GPUBFCAllocatorTest, DISABLED_AllocatorReceivesZeroMemory) {
 }
 
 static void BM_Allocation(int iters) {
-  GPUBFCAllocator a(0, 1uLL << 33);
+  GPUBFCAllocator a(CudaGpuId(0), 1uLL << 33, "GPU_0_bfc");
   // Exercise a few different allocation sizes
   std::vector<size_t> sizes = {256,        4096,      16384,    524288,
                                512,        1048576,   10485760, 104857600,
@@ -287,7 +288,7 @@ static void BM_Allocation(int iters) {
 BENCHMARK(BM_Allocation);
 
 static void BM_AllocationThreaded(int iters, int num_threads) {
-  GPUBFCAllocator a(0, 1uLL << 33);
+  GPUBFCAllocator a(CudaGpuId(0), 1uLL << 33, "GPU_0_bfc");
   thread::ThreadPool pool(Env::Default(), "test", num_threads);
   std::atomic_int_fast32_t count(iters);
   mutex done_lock;
@@ -323,7 +324,7 @@ BENCHMARK(BM_AllocationThreaded)->Arg(1)->Arg(4)->Arg(16);
 // A more complex benchmark that defers deallocation of an object for
 // "delay" allocations.
 static void BM_AllocationDelayed(int iters, int delay) {
-  GPUBFCAllocator a(0, 1 << 30);
+  GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
   // Exercise a few different allocation sizes
   std::vector<int> sizes = {256, 4096, 16384, 4096, 512, 1024, 1024};
   int size_index = 0;
@@ -361,7 +362,7 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test {
   // only methods inside this class can access private members of BFCAllocator.
 
   void TestBinDebugInfo() {
-    GPUBFCAllocator a(0, 1 << 30);
+    GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc");
 
     std::vector<void*> initial_ptrs;
     std::vector<size_t> initial_ptrs_allocated_sizes;
@@ -439,7 +440,7 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test {
   }
 
   void TestLog2FloorNonZeroSlow() {
-    GPUBFCAllocator a(0 /* device_id */, 1 /* total_memory */);
+    GPUBFCAllocator a(CudaGpuId(0), 1 /* total_memory */, "GPU_0_bfc");
     EXPECT_EQ(-1, a.Log2FloorNonZeroSlow(0));
     EXPECT_EQ(0, a.Log2FloorNonZeroSlow(1));
     EXPECT_EQ(1, a.Log2FloorNonZeroSlow(2));
diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc
index 70c2d96763e72909bd1d58ae637d8393f1368197..08961fc1055b67902e85887f69158ece5dd68e76 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc
@@ -20,17 +20,17 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h"
 
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
 #include "tensorflow/core/platform/stream_executor.h"
 
-namespace gpu = ::perftools::gputools;
-
 namespace tensorflow {
 
 GPUcudaMallocAllocator::GPUcudaMallocAllocator(VisitableAllocator* allocator,
-                                               int device_id)
+                                               CudaGpuId cuda_gpu_id)
     : base_allocator_(allocator) {
-  stream_exec_ = GPUMachineManager()->ExecutorForDevice(device_id).ValueOrDie();
+  stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
 }
 
 GPUcudaMallocAllocator::~GPUcudaMallocAllocator() { delete base_allocator_; }
diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h
index 23552b809a8a735aaeb8ac9643eccd0b0542f03b..208697361d2dfc4f3b8290ea511d15c9bd86857b 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <memory>
 
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
 #include "tensorflow/core/common_runtime/visitable_allocator.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/stream_executor.h"
@@ -30,7 +31,8 @@ namespace tensorflow {
 // allocated memory.
 class GPUcudaMallocAllocator : public VisitableAllocator {
  public:
-  explicit GPUcudaMallocAllocator(VisitableAllocator* allocator, int device_id);
+  explicit GPUcudaMallocAllocator(VisitableAllocator* allocator,
+                                  CudaGpuId cuda_gpu_id);
   ~GPUcudaMallocAllocator() override;
   string Name() override { return "gpu_debug"; }
   void* AllocateRaw(size_t alignment, size_t num_bytes) override;
diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc
index 6480f0b256b2fe05db5ac5bc8037b4fa216682ac..cd29a5c50b6d4d6e9b36ad627fe72d855bde1372 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc
@@ -15,20 +15,21 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h"
 
+#include <cstddef>
 #include <vector>
+
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
 #include "tensorflow/core/platform/stream_executor.h"
 
-namespace gpu = ::perftools::gputools;
-
-namespace tensorflow {
-
 #define MASK_WORDS 2
 #define MASK_BYTES (MASK_WORDS * sizeof(int64))
 
+namespace tensorflow {
 namespace {
 
-static int64* NewMask(int64 word) {
+int64* NewMask(int64 word) {
   int64* m = new int64[MASK_WORDS];
   for (int i = 0; i < MASK_WORDS; ++i) {
     m[i] = word;
@@ -36,8 +37,8 @@ static int64* NewMask(int64 word) {
   return m;
 }
 
-static int64* before_mask = NewMask(0xabababababababab);
-static int64* after_mask = NewMask(0xcdcdcdcdcdcdcdcd);
+int64* before_mask = NewMask(0xabababababababab);
+int64* after_mask = NewMask(0xcdcdcdcdcdcdcdcd);
 
 bool CheckMask(perftools::gputools::StreamExecutor* exec, void* ptr,
                int64* mask) {
@@ -75,9 +76,9 @@ void InitMask(perftools::gputools::StreamExecutor* exec, void* ptr,
 // GPUDebugAllocator
 // -----------------------------------------------------------------------------
 GPUDebugAllocator::GPUDebugAllocator(VisitableAllocator* allocator,
-                                     int device_id)
+                                     CudaGpuId cuda_gpu_id)
     : base_allocator_(allocator) {
-  stream_exec_ = GPUMachineManager()->ExecutorForDevice(device_id).ValueOrDie();
+  stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
 }
 
 GPUDebugAllocator::~GPUDebugAllocator() { delete base_allocator_; }
@@ -138,6 +139,8 @@ void GPUDebugAllocator::GetStats(AllocatorStats* stats) {
   base_allocator_->GetStats(stats);
 }
 
+void GPUDebugAllocator::ClearStats() { base_allocator_->ClearStats(); }
+
 bool GPUDebugAllocator::CheckHeader(void* ptr) {
   return CheckMask(stream_exec_, static_cast<char*>(ptr) - MASK_BYTES,
                    before_mask);
@@ -154,9 +157,9 @@ bool GPUDebugAllocator::CheckFooter(void* ptr) {
 // GPUNanResetAllocator
 // -----------------------------------------------------------------------------
 GPUNanResetAllocator::GPUNanResetAllocator(VisitableAllocator* allocator,
-                                           int device_id)
+                                           CudaGpuId cuda_gpu_id)
     : base_allocator_(allocator) {
-  stream_exec_ = GPUMachineManager()->ExecutorForDevice(device_id).ValueOrDie();
+  stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
 }
 
 GPUNanResetAllocator::~GPUNanResetAllocator() { delete base_allocator_; }
@@ -210,4 +213,6 @@ void GPUNanResetAllocator::GetStats(AllocatorStats* stats) {
   base_allocator_->GetStats(stats);
 }
 
+void GPUNanResetAllocator::ClearStats() { base_allocator_->ClearStats(); }
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h
index 9fbaf64f8a296fe012511cc91d845566f52f13d5..139fa2847e5e4e9b114e5289572da68419d002c7 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <string>
 #include <unordered_map>
 
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
 #include "tensorflow/core/common_runtime/visitable_allocator.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/stream_executor.h"
@@ -32,7 +33,8 @@ namespace tensorflow {
 // allocated memory.
 class GPUDebugAllocator : public VisitableAllocator {
  public:
-  explicit GPUDebugAllocator(VisitableAllocator* allocator, int device_id);
+  explicit GPUDebugAllocator(VisitableAllocator* allocator,
+                             CudaGpuId cuda_gpu_id);
   ~GPUDebugAllocator() override;
   string Name() override { return "gpu_debug"; }
   void* AllocateRaw(size_t alignment, size_t num_bytes) override;
@@ -44,6 +46,7 @@ class GPUDebugAllocator : public VisitableAllocator {
   size_t AllocatedSize(void* ptr) override;
   int64 AllocationId(void* ptr) override;
   void GetStats(AllocatorStats* stats) override;
+  void ClearStats() override;
 
   // For testing.
   bool CheckHeader(void* ptr);
@@ -62,7 +65,8 @@ class GPUDebugAllocator : public VisitableAllocator {
 // user forgets to initialize the memory.
 class GPUNanResetAllocator : public VisitableAllocator {
  public:
-  explicit GPUNanResetAllocator(VisitableAllocator* allocator, int device_id);
+  explicit GPUNanResetAllocator(VisitableAllocator* allocator,
+                                CudaGpuId cuda_gpu_id);
   ~GPUNanResetAllocator() override;
   string Name() override { return "gpu_nan_reset"; }
   void* AllocateRaw(size_t alignment, size_t num_bytes) override;
@@ -72,6 +76,7 @@ class GPUNanResetAllocator : public VisitableAllocator {
   size_t RequestedSize(void* ptr) override;
   size_t AllocatedSize(void* ptr) override;
   void GetStats(AllocatorStats* stats) override;
+  void ClearStats() override;
 
  private:
   VisitableAllocator* base_allocator_ = nullptr;  // owned
diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
index 14d8591731f0d544976a661c591920fb937f0cbd..d34f0cb3c28af5d2720c61cc7c5016622b1c0876 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
@@ -21,6 +21,8 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/platform/logging.h"
@@ -28,15 +30,14 @@ limitations under the License.
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/types.h"
 
-namespace gpu = ::perftools::gputools;
-
 namespace tensorflow {
+namespace {
 
 TEST(GPUDebugAllocatorTest, OverwriteDetection_None) {
-  const int device_id = 0;
-  GPUDebugAllocator a(new GPUBFCAllocator(device_id, 1 << 30), device_id);
-  auto stream_exec =
-      GPUMachineManager()->ExecutorForDevice(device_id).ValueOrDie();
+  const CudaGpuId cuda_gpu_id(0);
+  GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+                      cuda_gpu_id);
+  auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
 
   for (int s : {8}) {
     std::vector<int64> cpu_array(s);
@@ -57,11 +58,11 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Header) {
   for (int s : {8, 211}) {
     EXPECT_DEATH(
         {
-          const int device_id = 0;
-          GPUDebugAllocator a(new GPUBFCAllocator(device_id, 1 << 30),
-                              device_id);
+          const CudaGpuId cuda_gpu_id(0);
+          GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+                              cuda_gpu_id);
           auto stream_exec =
-              GPUMachineManager()->ExecutorForDevice(device_id).ValueOrDie();
+              GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
 
           std::vector<int64> cpu_array(s);
           memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64));
@@ -90,11 +91,11 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) {
   for (int s : {8, 22}) {
     EXPECT_DEATH(
         {
-          const int device_id = 0;
-          GPUDebugAllocator a(new GPUBFCAllocator(device_id, 1 << 30),
-                              device_id);
+          const CudaGpuId cuda_gpu_id(0);
+          GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+                              cuda_gpu_id);
           auto stream_exec =
-              GPUMachineManager()->ExecutorForDevice(device_id).ValueOrDie();
+              GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
 
           std::vector<int64> cpu_array(s);
           memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64));
@@ -120,10 +121,10 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) {
 }
 
 TEST(GPUDebugAllocatorTest, ResetToNan) {
-  const int device_id = 0;
-  GPUNanResetAllocator a(new GPUBFCAllocator(device_id, 1 << 30), device_id);
-  auto stream_exec =
-      GPUMachineManager()->ExecutorForDevice(device_id).ValueOrDie();
+  const CudaGpuId cuda_gpu_id(0);
+  GPUNanResetAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+                         cuda_gpu_id);
+  auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
 
   std::vector<float> cpu_array(1024);
   std::vector<float> cpu_array_result(1024);
@@ -160,13 +161,13 @@ TEST(GPUDebugAllocatorTest, ResetToNan) {
 }
 
 TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) {
-  const int device_id = 0;
+  const CudaGpuId cuda_gpu_id(0);
   // NaN reset must be the outer-most allocator.
   GPUNanResetAllocator a(
-      new GPUDebugAllocator(new GPUBFCAllocator(device_id, 1 << 30), device_id),
-      device_id);
-  auto stream_exec =
-      GPUMachineManager()->ExecutorForDevice(device_id).ValueOrDie();
+      new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+                            cuda_gpu_id),
+      cuda_gpu_id);
+  auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
 
   std::vector<float> cpu_array(1024);
   std::vector<float> cpu_array_result(1024);
@@ -203,19 +204,25 @@ TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) {
 }
 
 TEST(GPUDebugAllocatorTest, TracksSizes) {
-  GPUDebugAllocator a(new GPUBFCAllocator(0, 1 << 30), 0);
+  const CudaGpuId cuda_gpu_id(0);
+  GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+                      cuda_gpu_id);
   EXPECT_EQ(true, a.TracksAllocationSizes());
 }
 
 TEST(GPUDebugAllocatorTest, AllocatedVsRequested) {
+  const CudaGpuId cuda_gpu_id(0);
   GPUNanResetAllocator a(
-      new GPUDebugAllocator(new GPUBFCAllocator(0, 1 << 30), 0), 0);
+      new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
+                            cuda_gpu_id),
+      cuda_gpu_id);
   float* t1 = a.Allocate<float>(1);
   EXPECT_EQ(4, a.RequestedSize(t1));
   EXPECT_EQ(256, a.AllocatedSize(t1));
   a.DeallocateRaw(t1);
 }
 
+}  // namespace
 }  // namespace tensorflow
 
 #endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc
index eff169640f6eef4f82a3b0ef205990bef4237a74..0e5b6b7ef87f67bcb0b46d6e0acec82f8612b80f 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc
@@ -32,6 +32,8 @@ limitations under the License.
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/common_runtime/device_factory.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_stream_util.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_util.h"
@@ -60,6 +62,7 @@ limitations under the License.
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/public/session_options.h"
 #include "tensorflow/core/util/device_name_utils.h"
+#include "tensorflow/core/util/env_var.h"
 #include "tensorflow/core/util/stream_executor_util.h"
 
 namespace tensorflow {
@@ -84,7 +87,8 @@ class EigenCudaStreamDevice : public ::Eigen::StreamInterface {
   }
   ~EigenCudaStreamDevice() override {}
   void Reinitialize(OpKernelContext* context, const cudaStream_t* cuda_stream,
-                    int gpu_id, ::tensorflow::Allocator* alloc, char* scratch) {
+                    TfGpuId tf_gpu_id, ::tensorflow::Allocator* alloc,
+                    char* scratch) {
     if (LogMemory::IsEnabled()) {
       operation_ = context->op_kernel().name() + "/EigenAllocator";
       step_id_ = context->step_id();
@@ -95,7 +99,8 @@ class EigenCudaStreamDevice : public ::Eigen::StreamInterface {
         reinterpret_cast<unsigned int*>(scratch + Eigen::kCudaScratchSize);
     stream_ = cuda_stream;
     allocator_ = alloc;
-    device_prop_ = &Eigen::m_deviceProperties[gpu_id];
+    const int cuda_gpu_id = GpuIdUtil::TfToCudaGpuId(tf_gpu_id).value();
+    device_prop_ = &Eigen::m_deviceProperties[cuda_gpu_id];
   }
 
   const cudaStream_t& stream() const override { return *stream_; }
@@ -185,13 +190,15 @@ class EigenCudaStreamDevice : public ::Eigen::StreamInterface {
 class BaseGPUDevice::StreamGroupFactory {
  public:
   // Returns the unique stream group for use with the stream defined by
-  // {gpu_id, stream_group_within_gpu}, creating it if it does not yet exist.
+  // {tf_gpu_id, stream_group_within_gpu}, creating it if it does not yet
+  // exist.
   // This function is thread safe.
-  BaseGPUDevice::StreamGroup* GetOrCreate(int gpu_id,
+  BaseGPUDevice::StreamGroup* GetOrCreate(TfGpuId tf_gpu_id,
                                           int stream_group_within_gpu,
                                           gpu::StreamExecutor* executor) {
     mutex_lock guard(lock_);
-    StreamGroup* group = &streams_[key_type(gpu_id, stream_group_within_gpu)];
+    StreamGroup* group =
+        &streams_[key_type(tf_gpu_id.value(), stream_group_within_gpu)];
     if (!group->compute) {
       group->compute = new gpu::Stream(executor);
       group->compute->Init();
@@ -236,7 +243,8 @@ class BaseGPUDevice::StreamGroupFactory {
 
 BaseGPUDevice::BaseGPUDevice(const SessionOptions& options, const string& name,
                              Bytes memory_limit, const DeviceLocality& locality,
-                             int gpu_id, const string& physical_device_desc,
+                             TfGpuId tf_gpu_id,
+                             const string& physical_device_desc,
                              Allocator* gpu_allocator, Allocator* cpu_allocator,
                              bool sync_every_op, int32 max_streams)
     : LocalDevice(options, Device::BuildDeviceAttributes(name, DEVICE_GPU,
@@ -244,7 +252,7 @@ BaseGPUDevice::BaseGPUDevice(const SessionOptions& options, const string& name,
                                                          physical_device_desc)),
       gpu_allocator_(gpu_allocator),
       cpu_allocator_(cpu_allocator),
-      gpu_id_(gpu_id),
+      tf_gpu_id_(tf_gpu_id),
       sync_every_op_(sync_every_op),
       max_streams_(max_streams) {
   ProcessState::singleton()->EnableGPUDevice();
@@ -256,10 +264,10 @@ BaseGPUDevice::~BaseGPUDevice() {
 }
 
 Status BaseGPUDevice::Init(const SessionOptions& options) {
-  auto executor_status = GPUMachineManager()->ExecutorForDevice(gpu_id_);
+  auto executor_status = GpuIdUtil::ExecutorForTfGpuId(tf_gpu_id_);
   if (!executor_status.status().ok()) {
     return errors::Internal("Failed to get StreamExecutor for device ",
-                            gpu_id_);
+                            tf_gpu_id_.value());
   }
 
   executor_ = executor_status.ValueOrDie();
@@ -272,14 +280,14 @@ Status BaseGPUDevice::Init(const SessionOptions& options) {
   // Create the specified number of GPU streams
   for (int i = 0; i < max_streams_; i++) {
     streams_.push_back(
-        StreamGroupFactory::Global().GetOrCreate(gpu_id_, i, executor_));
+        StreamGroupFactory::Global().GetOrCreate(tf_gpu_id_, i, executor_));
 
     size_t scratch_buffer_size = Eigen::kCudaScratchSize + sizeof(unsigned int);
     void* scratch_buffer = gpu_allocator_->AllocateRaw(
         Allocator::kAllocatorAlignment, scratch_buffer_size);
     if (scratch_buffer == nullptr) {
       return errors::FailedPrecondition(
-          "Failed to allocate scratch buffer for device ", gpu_id_);
+          "Failed to allocate scratch buffer for device ", tf_gpu_id_.value());
     }
     scratch_.push_back(static_cast<char*>(scratch_buffer));
 
@@ -291,7 +299,8 @@ Status BaseGPUDevice::Init(const SessionOptions& options) {
         &mem, Eigen::kCudaScratchSize + sizeof(unsigned int));
     if (!ok) {
       return errors::FailedPrecondition(
-          "Failed to memcopy into scratch buffer for device ", gpu_id_);
+          "Failed to memcopy into scratch buffer for device ",
+          tf_gpu_id_.value());
     }
 
     device_contexts_.push_back(new GPUDeviceContext(
@@ -302,9 +311,49 @@ Status BaseGPUDevice::Init(const SessionOptions& options) {
   gpu_device_info_->stream = streams_[0]->compute;
   gpu_device_info_->default_context = device_contexts_[0];
   gpu_device_info_->event_mgr = em_.get();
-  gpu_device_info_->gpu_id = gpu_id_;
+  gpu_device_info_->gpu_id = GpuIdUtil::TfToCudaGpuId(tf_gpu_id_).value();
   set_tensorflow_gpu_device_info(gpu_device_info_);
 
+  // Whether and how the GPU device uses its own threadpool.
+  // This option is experimental. Once we confirm the best setting, we
+  // may change the default behavior and completely remove this flag.
+  // Default values might change in future releases.
+  // Possible values:
+  //   * global: GPU uses threads shared with CPU in the main compute
+  //          thread-pool. This is currently the default.
+  //   * gpu_private: GPU uses threads dedicated to this device.
+  //   * gpu_shared: All GPUs share a dedicated thread pool.
+  string gpu_thread_mode;
+  TF_RETURN_IF_ERROR(
+      ReadStringFromEnvVar("TF_GPU_THREAD_MODE", "global", &gpu_thread_mode));
+  gpu_thread_mode = str_util::Lowercase(gpu_thread_mode);
+  if (gpu_thread_mode != "global") {
+    int64 gpu_thread_count = -1;
+    // Default to two threads. One for device compute and another for memory
+    // copies.
+    TF_RETURN_IF_ERROR(
+        ReadInt64FromEnvVar("TF_GPU_THREAD_COUNT", 2, &gpu_thread_count));
+    if (gpu_thread_mode == "gpu_private") {
+      // TODO(zhengxq): since these threads only serve a single GPU device,
+      //   we should set the device context once for each thread, and avoid
+      //   setting them for each kernel.
+      // TODO(zhengxq): pin the thread to the same socket of the target GPU.
+      thread_pool_.reset(new thread::ThreadPool(
+          options.env, strings::StrCat("gpu_private_", tf_gpu_id_.value()),
+          static_cast<int32>(gpu_thread_count)));
+      set_tensorflow_device_thread_pool(thread_pool_.get());
+    } else if (gpu_thread_mode == "gpu_shared") {
+      static thread::ThreadPool* thread_pool = new thread::ThreadPool(
+          options.env, "gpu_shared", static_cast<int32>(gpu_thread_count));
+      set_tensorflow_device_thread_pool(thread_pool);
+    } else {
+      string error_message =
+          strings::StrCat("Invalid gpu_thread_mode: ", gpu_thread_mode);
+      LOG(WARNING) << error_message;
+      return errors::InvalidArgument(error_message);
+    }
+  }
+
   return Status::OK();
 }
 
@@ -394,7 +443,7 @@ void BaseGPUDevice::ComputeHelper(OpKernel* op_kernel,
 
   if (vlog_1) {
     VLOG(1) << "GpuDevice::Compute " << op_kernel->name() << " op "
-            << op_kernel->type_string() << " on GPU" << gpu_id_ << " stream["
+            << op_kernel->type_string() << " on GPU" << tf_gpu_id_ << " stream["
             << stream_id << "]";
   }
 
@@ -469,7 +518,7 @@ void BaseGPUDevice::ComputeAsync(AsyncOpKernel* op_kernel,
   const auto stream_id = gpu_device_context->stream_id();
 
   VLOG(1) << "GpuDevice::ComputeAsync " << op_kernel->name() << " op "
-          << op_kernel->type_string() << " on GPU" << gpu_id_ << " stream["
+          << op_kernel->type_string() << " on GPU" << tf_gpu_id_ << " stream["
           << stream_id << "]";
 
   // When TraceMe profiling is off (which is the default), the
@@ -594,8 +643,9 @@ class ConcretePerOpGpuDevice : public PerOpGpuDevice {
   ConcretePerOpGpuDevice() : device_(&stream_device_) {}
 
   void Reinitialize(OpKernelContext* context, const cudaStream_t* cuda_stream,
-                    int gpu_id, Allocator* base_allocator, char* scratch) {
-    stream_device_.Reinitialize(context, cuda_stream, gpu_id, base_allocator,
+                    TfGpuId tf_gpu_id, Allocator* base_allocator,
+                    char* scratch) {
+    stream_device_.Reinitialize(context, cuda_stream, tf_gpu_id, base_allocator,
                                 scratch);
   }
 
@@ -605,6 +655,150 @@ class ConcretePerOpGpuDevice : public PerOpGpuDevice {
   EigenCudaStreamDevice stream_device_;
   Eigen::GpuDevice device_;
 };
+
+// Parse 'visible_device_list' into a list of CUDA GPU ids.
+Status ParseVisibleDeviceList(const string& visible_device_list,
+                              std::vector<CudaGpuId>* visible_gpu_order) {
+  visible_gpu_order->clear();
+  gpu::Platform* gpu_manager = GPUMachineManager();
+
+  // If the user wants to remap the visible to virtual GPU mapping,
+  // check for that here.
+  if (visible_device_list.empty()) {
+    visible_gpu_order->resize(gpu_manager->VisibleDeviceCount());
+    // By default, visible to virtual mapping is unchanged.
+    int deviceNo = 0;
+    std::generate(visible_gpu_order->begin(), visible_gpu_order->end(),
+                  [&deviceNo] { return deviceNo++; });
+  } else {
+    const std::vector<string> order_str =
+        str_util::Split(visible_device_list, ',');
+    for (const string& cuda_gpu_id_str : order_str) {
+      int32 cuda_gpu_id;
+      if (!strings::safe_strto32(cuda_gpu_id_str, &cuda_gpu_id)) {
+        return errors::InvalidArgument(
+            "Could not parse entry in 'visible_device_list': '",
+            cuda_gpu_id_str, "'. visible_device_list = ", visible_device_list);
+      }
+      if (cuda_gpu_id < 0 || cuda_gpu_id >= gpu_manager->VisibleDeviceCount()) {
+        return errors::InvalidArgument(
+            "'visible_device_list' listed an invalid GPU id '", cuda_gpu_id,
+            "' but visible device count is ",
+            gpu_manager->VisibleDeviceCount());
+      }
+      visible_gpu_order->push_back(CudaGpuId(cuda_gpu_id));
+    }
+  }
+
+  // Validate no repeats.
+  std::set<CudaGpuId> visible_device_set(visible_gpu_order->begin(),
+                                         visible_gpu_order->end());
+  if (visible_device_set.size() != visible_gpu_order->size()) {
+    return errors::InvalidArgument(
+        "visible_device_list contained a duplicate entry: ",
+        visible_device_list);
+  }
+  return Status::OK();
+}
+
+Status VerifyVirtualDeviceSettings(
+    const size_t num_gpus_to_use, const GPUOptions& gpu_options,
+    const std::vector<CudaGpuId>& visible_gpu_order,
+    const std::vector<CudaGpuId>& valid_cuda_gpu_ids) {
+  const auto& virtual_devices = gpu_options.experimental().virtual_devices();
+  CHECK(!virtual_devices.empty());
+  if (gpu_options.per_process_gpu_memory_fraction() > 0) {
+    return errors::InvalidArgument(
+        "It's invalid to set per_process_gpu_memory_fraction when "
+        "virtual_devices is set.");
+  }
+  if (num_gpus_to_use < virtual_devices.size()) {
+    return errors::Unknown(
+        "Not enough GPUs to create virtual devices."
+        " num_gpus_to_use: ",
+        num_gpus_to_use, " #virtual_devices: ", virtual_devices.size());
+  }
+  if (!gpu_options.visible_device_list().empty() &&
+      visible_gpu_order.size() != virtual_devices.size()) {
+    return errors::InvalidArgument(
+        "The number of GPUs in visible_device_list doesn't match the number "
+        "of elements in the virtual_devices list.",
+        " #GPUs in visible_device_list: ", visible_gpu_order.size(),
+        " virtual_devices.size(): ", virtual_devices.size());
+  }
+  if (valid_cuda_gpu_ids.size() != virtual_devices.size()) {
+    return errors::Unknown(
+        "The number of valid GPUs doesn't match the number of elements in "
+        "the virtual_devices list.",
+        " #valid GPUs: ", valid_cuda_gpu_ids.size(),
+        " virtual_devices.size(): ", virtual_devices.size());
+  }
+  return Status::OK();
+}
+
+int64 MinSystemMemory(int64 available_memory) {
+  // We use the following heuristic for now:
+  //
+  // If the available_memory is < 2GiB, we allocate 225MiB to system memory.
+  // Otherwise, allocate max(300MiB, 0.05 * available_memory) to system memory.
+  //
+  // In the future we could be more sophisticated by using a table of devices.
+  int64 min_system_memory;
+  if (available_memory < (1LL << 31)) {
+    // 225MiB
+    min_system_memory = 225 * 1024 * 1024;
+  } else {
+    // max(300 MiB, 0.05 * available_memory)
+    min_system_memory =
+        std::max(314572800LL, static_cast<int64>(available_memory * 0.05));
+  }
+#if defined(__GNUC__) && defined(__OPTIMIZE__)
+// Do nothing
+#elif !defined(__GNUC__) && defined(NDEBUG)
+// Do nothing
+#else
+  // Double the amount of available GPU memory in non-opt builds (debug
+  // builds in windows); because in non-opt builds more system memory
+  // is necessary.
+  min_system_memory *= 2;
+#endif
+#if defined(NVIDIA_TEGRA)
+  // 1GB system mem for NVIDIA Tegra devices since they use the same mem for RAM and Video RAM
+  min_system_memory = 1<<30;
+#endif
+  return min_system_memory;
+}
+
+// Get the memory limit for the virtual device being created on GPU with
+// 'cuda_gpu_id', when that virtual device is the only virtual device being
+// created on that GPU.
+Status SingleVirtualDeviceMemoryLimit(const GPUOptions& gpu_options,
+                                      CudaGpuId cuda_gpu_id,
+                                      int64* memory_limit) {
+  int64 total_memory = 0;
+  int64 available_memory = 0;
+  gpu::StreamExecutor* se =
+      GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
+  if (!se->DeviceMemoryUsage(&available_memory, &total_memory)) {
+    return errors::Unknown("Failed to query available memory for GPU ",
+                           cuda_gpu_id.value());
+  }
+
+  int64 allocated_memory = 0;
+  const double per_process_gpu_memory_fraction =
+      gpu_options.per_process_gpu_memory_fraction();
+  if (per_process_gpu_memory_fraction == 0) {
+    allocated_memory = available_memory;
+    const int64 min_system_memory = MinSystemMemory(available_memory);
+    if (min_system_memory < allocated_memory) {
+      allocated_memory -= min_system_memory;
+    }
+  } else {
+    allocated_memory = total_memory * per_process_gpu_memory_fraction;
+  }
+  *memory_limit = allocated_memory;
+  return Status::OK();
+}
 }  // namespace
 
 void BaseGPUDevice::ReinitializeDevice(OpKernelContext* context,
@@ -615,7 +809,7 @@ void BaseGPUDevice::ReinitializeDevice(OpKernelContext* context,
   DCHECK(concrete_device);
   const cudaStream_t* cuda_stream = reinterpret_cast<const cudaStream_t*>(
       streams_[stream_id]->compute->implementation()->CudaStreamMemberHack());
-  concrete_device->Reinitialize(context, cuda_stream, gpu_id_, allocator,
+  concrete_device->Reinitialize(context, cuda_stream, tf_gpu_id_, allocator,
                                 scratch_[stream_id]);
 }
 
@@ -642,18 +836,32 @@ void BaseGPUDevice::ReinitializeGpuDevice(OpKernelContext* context,
 Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options,
                                            const string& name_prefix,
                                            std::vector<Device*>* devices) {
-  size_t n = INT_MAX;
+  TF_RETURN_IF_ERROR(ValidateGPUMachineManager());
+  gpu::Platform* gpu_manager = GPUMachineManager();
+  if (gpu_manager == nullptr) {
+    return Status::OK();
+  }
+  // If there are no GPUs visible, do nothing.
+  if (gpu_manager->VisibleDeviceCount() <= 0) {
+    return Status::OK();
+  }
+
+  size_t num_gpus_to_use = INT_MAX;
   auto iter = options.config.device_count().find("GPU");
   if (iter != options.config.device_count().end()) {
-    n = iter->second;
+    num_gpus_to_use = iter->second;
   }
-  std::vector<int> valid_gpu_ids;
-  TF_RETURN_IF_ERROR(GetValidDeviceIds(
-      options.config.gpu_options().visible_device_list(), &valid_gpu_ids));
-  if (static_cast<size_t>(n) > valid_gpu_ids.size()) {
-    n = valid_gpu_ids.size();
+  const auto& gpu_options = options.config.gpu_options();
+  std::vector<CudaGpuId> visible_gpu_order;
+  TF_RETURN_IF_ERROR(ParseVisibleDeviceList(gpu_options.visible_device_list(),
+                                            &visible_gpu_order));
+
+  std::vector<CudaGpuId> valid_cuda_gpu_ids;
+  TF_RETURN_IF_ERROR(GetValidDeviceIds(visible_gpu_order, &valid_cuda_gpu_ids));
+  if (num_gpus_to_use > valid_cuda_gpu_ids.size()) {
+    num_gpus_to_use = valid_cuda_gpu_ids.size();
   }
-  if (!valid_gpu_ids.empty()) {
+  if (!valid_cuda_gpu_ids.empty()) {
     // Save the original device.
     int original_device = 0;
     cudaError_t err = cudaGetDevice(&original_device);
@@ -663,16 +871,16 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options,
     }
     // Force to implicitly initialize CUDA runtime on each valid GPU before
     // CreateGPUDevice().
-    for (int gpu_id : valid_gpu_ids) {
-      err = cudaSetDevice(gpu_id);
+    for (CudaGpuId cuda_gpu_id : valid_cuda_gpu_ids) {
+      err = cudaSetDevice(cuda_gpu_id.value());
       if (err != cudaSuccess) {
-        return errors::Internal("cudaSetDevice() on GPU:", gpu_id,
+        return errors::Internal("cudaSetDevice() on GPU:", cuda_gpu_id.value(),
                                 " failed. Status: ", cudaGetErrorString(err));
       }
       err = cudaFree(nullptr);
       if (err != cudaSuccess) {
         return errors::Internal(
-            "CUDA runtime implicit initialization on GPU:", gpu_id,
+            "CUDA runtime implicit initialization on GPU:", cuda_gpu_id.value(),
             " failed. Status: ", cudaGetErrorString(err));
       }
     }
@@ -683,51 +891,45 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options,
                               " failed. Status: ", cudaGetErrorString(err));
     }
   }
-  for (int i = 0; i < n; i++) {
-    BaseGPUDevice* gpu_device;
-    TF_RETURN_IF_ERROR(CreateGPUDevice(
-        options, strings::StrCat(name_prefix, "/device:GPU:", i),
-        valid_gpu_ids[i], &gpu_device));
-    TF_RETURN_IF_ERROR(gpu_device->Init(options));
-    devices->push_back(gpu_device);
-  }
-
-  return Status::OK();
-}
 
-namespace {
-int64 MinSystemMemory(int64 available_memory) {
-  // We use the following heuristic for now:
-  //
-  // If the available_memory is < 2GiB, we allocate 225MiB to system memory.
-  // Otherwise, allocate max(300MiB, 0.05 * available_memory) to system memory.
-  //
-  // In the future we could be more sophisticated by using a table of devices.
-  int64 min_system_memory;
-  if (available_memory < (1LL << 31)) {
-    // 225MiB
-    min_system_memory = 225 * 1024 * 1024;
-  } else {
-    // max(300 MiB, 0.05 * available_memory)
-    min_system_memory =
-        std::max(314572800LL, static_cast<int64>(available_memory * 0.05));
+  const auto& virtual_devices = gpu_options.experimental().virtual_devices();
+  if (!virtual_devices.empty()) {
+    TF_RETURN_IF_ERROR(VerifyVirtualDeviceSettings(
+        num_gpus_to_use, gpu_options, visible_gpu_order, valid_cuda_gpu_ids));
+    // We've verified that num_gpus_to_use >= virtual_devices.size().
+    num_gpus_to_use = virtual_devices.size();
+    CHECK(gpu_options.visible_device_list().empty() ||
+          valid_cuda_gpu_ids == visible_gpu_order);
   }
-#if defined(__GNUC__) && defined(__OPTIMIZE__)
-// Do nothing
-#elif !defined(__GNUC__) && defined(NDEBUG)
-// Do nothing
-#else
-  // Double the amount of available GPU memory in non-opt builds (debug
-  // builds in windows); because in non-opt builds more system memory
-  // is necessary.
-  min_system_memory *= 2;
-#endif
-  return min_system_memory;
+  int next_tf_gpu_id = 0;
+  for (int i = 0; i < num_gpus_to_use; ++i) {
+    const CudaGpuId cuda_gpu_id = valid_cuda_gpu_ids[i];
+    std::vector<int64> memory_limit_bytes;
+    if (virtual_devices.empty() ||
+        virtual_devices.Get(i).memory_limit_mb_size() == 0) {
+      int64 single_virtual_device_memory_limit = 0;
+      TF_RETURN_IF_ERROR(SingleVirtualDeviceMemoryLimit(
+          gpu_options, cuda_gpu_id, &single_virtual_device_memory_limit));
+      memory_limit_bytes.push_back(single_virtual_device_memory_limit);
+    } else {
+      const auto& memory_limit_mb = virtual_devices.Get(i).memory_limit_mb();
+      std::transform(memory_limit_mb.begin(), memory_limit_mb.end(),
+                     std::back_inserter(memory_limit_bytes), [](float mb) {
+                       return static_cast<int64>(mb) * (1ll << 20);
+                     });
+    }
+    for (int64 bytes : memory_limit_bytes) {
+      TfGpuId tf_gpu_id(next_tf_gpu_id);
+      ++next_tf_gpu_id;
+      GpuIdUtil::InsertTfCudaGpuIdPair(tf_gpu_id, cuda_gpu_id);
+      TF_RETURN_IF_ERROR(
+          CreateGPUDevice(options, name_prefix, tf_gpu_id, bytes, devices));
+    }
+  }
+  return Status::OK();
 }
 
-}  // namespace
-
-static string GetShortDeviceDescription(int device_id,
+static string GetShortDeviceDescription(CudaGpuId cuda_gpu_id,
                                         const gpu::DeviceDescription& desc) {
   int cc_major;
   int cc_minor;
@@ -736,22 +938,26 @@ static string GetShortDeviceDescription(int device_id,
     cc_minor = 0;
   }
   // LINT.IfChange
-  return strings::StrCat("device: ", device_id, ", name: ", desc.name(),
+  return strings::StrCat("device: ", cuda_gpu_id.value(),
+                         ", name: ", desc.name(),
                          ", pci bus id: ", desc.pci_bus_id(),
                          ", compute capability: ", cc_major, ".", cc_minor);
   // LINT.ThenChange(//tensorflow/python/platform/test.py)
 }
 
 Status BaseGPUDeviceFactory::CreateGPUDevice(const SessionOptions& options,
-                                             const string& name, int gpu_id,
-                                             BaseGPUDevice** out_device) {
-  CHECK_GE(gpu_id, 0);
+                                             const string& name_prefix,
+                                             TfGpuId tf_gpu_id,
+                                             int64 memory_limit,
+                                             std::vector<Device*>* devices) {
+  CHECK_GE(tf_gpu_id.value(), 0);
+  const string device_name =
+      strings::StrCat(name_prefix, "/device:GPU:", tf_gpu_id.value());
 
   // Look up the device, to see its attributes.
-  gpu::Platform* gpu_platform = GPUMachineManager();
-  CHECK_LT(gpu_id, gpu_platform->VisibleDeviceCount());
+  GpuIdUtil::CheckValidTfGpuId(tf_gpu_id);
   gpu::StreamExecutor* se =
-      gpu_platform->ExecutorForDevice(gpu_id).ValueOrDie();
+      GpuIdUtil::ExecutorForTfGpuId(tf_gpu_id).ValueOrDie();
   const gpu::DeviceDescription& desc = se->GetDeviceDescription();
   int numa_node = desc.numa_node();
   if (numa_node < 0) {
@@ -761,60 +967,49 @@ Status BaseGPUDeviceFactory::CreateGPUDevice(const SessionOptions& options,
     // may run into trouble later with data transfer operations.  The
     // trouble may manifest as slower than expected performance, or
     // outright failures.
-    LOG(INFO) << "Could not identify NUMA node of " << name
+    LOG(INFO) << "Could not identify NUMA node of " << device_name
               << ", defaulting to 0.  Your kernel may not have been built "
               << "with NUMA support.";
     numa_node = 0;
   }
-
-  int64 total_memory, available_memory;
-  if (!se->DeviceMemoryUsage(&available_memory, &total_memory)) {
-    return errors::Unknown(
-        strings::StrCat("Failed to query available memory for GPU ", gpu_id));
-  }
-
-  int64 allocated_memory;
-  double config_memory_fraction =
-      options.config.gpu_options().per_process_gpu_memory_fraction();
-  if (config_memory_fraction == 0) {
-    allocated_memory = available_memory;
-    const int64 min_system_memory = MinSystemMemory(available_memory);
-    if (min_system_memory < allocated_memory) {
-      allocated_memory -= min_system_memory;
-    }
-  } else {
-    allocated_memory = total_memory * config_memory_fraction;
-  }
-
-  Bytes allocated_bytes = static_cast<Bytes>(allocated_memory);
+  Bytes allocated_bytes = static_cast<Bytes>(memory_limit);
 
   // Get GPU bus_id from its reported NUMA affinity.  Because GPUs are
   // virtualized in some environments, we can't just use the GPU id.
   // NUMA locales are indexed from 0, buses are indexed from 1.
   DeviceLocality dev_locality;
   dev_locality.set_bus_id(numa_node + 1);
-  VLOG(1) << "GPUDevice id " << gpu_id << " on bus " << dev_locality.bus_id()
-          << " numa: " << numa_node << " pci: " << desc.pci_bus_id();
-
+  const CudaGpuId cuda_gpu_id = GpuIdUtil::TfToCudaGpuId(tf_gpu_id);
+  VLOG(1) << "GPUDevice id " << cuda_gpu_id << " on bus "
+          << dev_locality.bus_id() << " numa: " << numa_node
+          << " pci: " << desc.pci_bus_id();
+
+  LOG(INFO) << "Creating TensorFlow device (" << device_name << " with "
+            << (memory_limit >> 20) << " MB memory) -> physical GPU ("
+            << GetShortDeviceDescription(cuda_gpu_id, desc) << ")";
   ProcessState* process_state = ProcessState::singleton();
-  *out_device = CreateGPUDevice(
-      options, name, allocated_bytes, dev_locality, gpu_id,
-      GetShortDeviceDescription(gpu_id, desc),
-      process_state->GetGPUAllocator(options.config.gpu_options(), gpu_id,
-                                     allocated_memory),
+  BaseGPUDevice* gpu_device = CreateGPUDevice(
+      options, device_name, allocated_bytes, dev_locality, tf_gpu_id,
+      GetShortDeviceDescription(cuda_gpu_id, desc),
+      process_state->GetGPUAllocator(options.config.gpu_options(), tf_gpu_id,
+                                     memory_limit),
       process_state->GetCPUAllocator(numa_node));
+  TF_RETURN_IF_ERROR(gpu_device->Init(options));
+  devices->push_back(gpu_device);
 
   return Status::OK();
 }
 
 static int GetDefaultMinGPUMultiprocessorCount(
-    gpu::Platform* gpu_manager, const std::vector<int>& visible_gpu_order) {
+    gpu::Platform* gpu_manager,
+    const std::vector<CudaGpuId>& visible_gpu_order) {
   static const int kDefaultMinGPUMultiprocessorCount = 8;
 
   // Find the highest multi-processor count across all visible GPUs.
   int max_count = -1;
   for (int i = 0; i < visible_gpu_order.size(); ++i) {
-    auto exec_status = gpu_manager->ExecutorForDevice(visible_gpu_order[i]);
+    auto exec_status =
+        GpuIdUtil::ExecutorForCudaGpuId(gpu_manager, visible_gpu_order[i]);
     if (!exec_status.ok()) {
       continue;
     }
@@ -832,7 +1027,8 @@ static int GetDefaultMinGPUMultiprocessorCount(
 }
 
 static int GetMinGPUMultiprocessorCount(
-    gpu::Platform* gpu_manager, const std::vector<int>& visible_gpu_order) {
+    gpu::Platform* gpu_manager,
+    const std::vector<CudaGpuId>& visible_gpu_order) {
   const char* tf_min_gpu_core_count = getenv("TF_MIN_GPU_MULTIPROCESSOR_COUNT");
 
   if (tf_min_gpu_core_count == nullptr ||
@@ -910,17 +1106,17 @@ std::vector<CudaVersion> GetSupportedCudaComputeCapabilities() {
 }
 
 std::unique_ptr<std::map<std::pair<int, int>, bool>> GetPeerAccessMap(
-    gpu::Platform* platform, const std::vector<int>& visible_gpu_order) {
+    gpu::Platform* platform, const std::vector<CudaGpuId>& visible_gpu_order) {
   std::unique_ptr<std::map<std::pair<int, int>, bool>> map(
       new std::map<std::pair<int, int>, bool>);
   for (int i = 0; i < visible_gpu_order.size(); ++i) {
-    const int i_gpu_id = visible_gpu_order[i];
+    const CudaGpuId i_gpu_id = visible_gpu_order[i];
     for (int j = 0; j < visible_gpu_order.size(); ++j) {
-      const int j_gpu_id = visible_gpu_order[j];
+      const CudaGpuId j_gpu_id = visible_gpu_order[j];
       gpu::StreamExecutor* from =
-          platform->ExecutorForDevice(i_gpu_id).ValueOrDie();
+          GpuIdUtil::ExecutorForCudaGpuId(platform, i_gpu_id).ValueOrDie();
       gpu::StreamExecutor* to =
-          platform->ExecutorForDevice(j_gpu_id).ValueOrDie();
+          GpuIdUtil::ExecutorForCudaGpuId(platform, j_gpu_id).ValueOrDie();
       (*map)[{i, j}] = from->CanEnablePeerAccessTo(to);
     }
   }
@@ -929,19 +1125,18 @@ std::unique_ptr<std::map<std::pair<int, int>, bool>> GetPeerAccessMap(
 }
 
 Status EnablePeerAccess(gpu::Platform* platform,
-                        const std::vector<int>& visible_gpu_order) {
+                        const std::vector<CudaGpuId>& visible_gpu_order) {
   int possible_peer_count = 0;
   int enabled_peer_count = 0;
   for (int i = 0; i < visible_gpu_order.size(); ++i) {
-    const int i_gpu_id = visible_gpu_order[i];
+    const CudaGpuId i_gpu_id = visible_gpu_order[i];
     for (int j = 0; j < visible_gpu_order.size(); ++j) {
-      const int j_gpu_id = visible_gpu_order[j];
-      // We have already validated that ExecutorForDevice() calls
-      // return OK.
+      const CudaGpuId j_gpu_id = visible_gpu_order[j];
+      // We have already validated that ExecutorForDevice() calls return OK.
       gpu::StreamExecutor* from =
-          platform->ExecutorForDevice(i_gpu_id).ValueOrDie();
+          GpuIdUtil::ExecutorForCudaGpuId(platform, i_gpu_id).ValueOrDie();
       gpu::StreamExecutor* to =
-          platform->ExecutorForDevice(j_gpu_id).ValueOrDie();
+          GpuIdUtil::ExecutorForCudaGpuId(platform, j_gpu_id).ValueOrDie();
 
       if (from->CanEnablePeerAccessTo(to)) {
         ++possible_peer_count;
@@ -949,7 +1144,7 @@ Status EnablePeerAccess(gpu::Platform* platform,
         if (!status.ok()) {
           LOG(WARNING)
               << "Unable to enable peer access between device ordinals "
-              << i_gpu_id << " and " << j_gpu_id;
+              << i_gpu_id << " and " << j_gpu_id << ", status: " << status;
         } else {
           ++enabled_peer_count;
         }
@@ -972,73 +1167,22 @@ Status EnablePeerAccess(gpu::Platform* platform,
 }  // namespace
 
 Status BaseGPUDeviceFactory::GetValidDeviceIds(
-    const string& visible_device_list, std::vector<int>* ids) {
-  TF_RETURN_IF_ERROR(ValidateGPUMachineManager());
-
+    const std::vector<CudaGpuId>& visible_gpu_order,
+    std::vector<CudaGpuId>* ids) {
   gpu::Platform* gpu_manager = GPUMachineManager();
-  if (gpu_manager == nullptr) {
-    return Status::OK();
-  }
-
-  // If there are no GPUs visible, do nothing.
-  if (gpu_manager->VisibleDeviceCount() <= 0) {
-    return Status::OK();
-  }
-
-  // If the user wants to remap the visible to virtual GPU mapping,
-  // check for that here.
-  std::vector<int> visible_gpu_order;
-  if (visible_device_list.empty()) {
-    visible_gpu_order.resize(gpu_manager->VisibleDeviceCount());
-    // By default, visible to virtual mapping is unchanged.
-    int deviceNo = 0;
-    std::generate(visible_gpu_order.begin(), visible_gpu_order.end(),
-                  [&deviceNo] { return deviceNo++; });
-  } else {
-    std::vector<string> order_str = str_util::Split(visible_device_list, ',');
-    for (int i = 0; i < order_str.size(); ++i) {
-      const string& gpu_id_str = order_str[i];
-      int32 gpu_id;
-      if (!strings::safe_strto32(gpu_id_str, &gpu_id)) {
-        return errors::InvalidArgument(
-            "Could not parse entry in 'visible_device_list': '", gpu_id_str,
-            "'.  visible_device_list = ", visible_device_list);
-      }
-
-      if (gpu_id < 0 || gpu_id >= gpu_manager->VisibleDeviceCount()) {
-        return errors::InvalidArgument(
-            "'visible_device_list' listed an invalid GPU id '", gpu_id,
-            "' but visible device count is ",
-            gpu_manager->VisibleDeviceCount());
-      }
-
-      visible_gpu_order.push_back(gpu_id);
-    }
-  }
-
-  // Validate no repeats.
-  std::set<int> visible_device_set(visible_gpu_order.begin(),
-                                   visible_gpu_order.end());
-  if (visible_device_set.size() != visible_gpu_order.size()) {
-    return errors::InvalidArgument(
-        "visible_device_list contained "
-        "a duplicate entry: ",
-        visible_device_list);
-  }
-
   bool new_gpu_found = false;
   for (int i = 0; i < visible_gpu_order.size(); ++i) {
-    int gpu_id = visible_gpu_order[i];
+    const CudaGpuId cuda_gpu_id = visible_gpu_order[i];
 
-    // Only perform this once per visible gpu id.
-    if (visible_gpu_initialized_[gpu_id]) {
+    // Only perform this once per visible cuda gpu id.
+    if (visible_gpu_initialized_[cuda_gpu_id.value()]) {
       continue;
     }
 
-    visible_gpu_initialized_[gpu_id] = true;
+    visible_gpu_initialized_[cuda_gpu_id.value()] = true;
     new_gpu_found = true;
 
-    auto executor = gpu_manager->ExecutorForDevice(gpu_id);
+    auto executor = GpuIdUtil::ExecutorForCudaGpuId(gpu_manager, cuda_gpu_id);
     if (!executor.ok()) {
       return StreamExecutorUtil::ConvertStatus(executor.status());
     }
@@ -1078,11 +1222,11 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds(
     auto access_map = GetPeerAccessMap(gpu_manager, visible_gpu_order);
     string line_buf = "DMA: ";
     for (int i = 0; i < visible_gpu_order.size(); ++i) {
-      strings::StrAppend(&line_buf, visible_gpu_order[i], " ");
+      strings::StrAppend(&line_buf, visible_gpu_order[i].value(), " ");
     }
     LOG(INFO) << line_buf;
     for (int i = 0; i < visible_gpu_order.size(); ++i) {
-      line_buf = strings::StrCat(visible_gpu_order[i], ":   ");
+      line_buf = strings::StrCat(visible_gpu_order[i].value(), ":   ");
       for (int j = 0; j < visible_gpu_order.size(); ++j) {
         if ((*access_map)[{i, j}]) {
           line_buf.append("Y ");
@@ -1107,9 +1251,13 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds(
 
   // Filter out devices that don't have the right capability or power.
   for (int i = 0; i < visible_gpu_order.size(); ++i) {
-    const int32 visible_gpu_id = visible_gpu_order[i];
-    auto exec_status = gpu_manager->ExecutorForDevice(visible_gpu_id);
+    const CudaGpuId visible_gpu_id = visible_gpu_order[i];
+    auto exec_status =
+        GpuIdUtil::ExecutorForCudaGpuId(gpu_manager, visible_gpu_id);
     if (!exec_status.ok()) {
+      LOG(INFO) << "Ignoring visible gpu device " << visible_gpu_id
+                << " whose executor is in invalid state: "
+                << exec_status.status().ToString();
       continue;
     }
     gpu::StreamExecutor* se = exec_status.ValueOrDie();
@@ -1117,6 +1265,10 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds(
     CudaVersion device_capability;
     if (!desc.cuda_compute_capability(&device_capability.major_part,
                                       &device_capability.minor_part)) {
+      LOG(INFO) << "Ignoring visible gpu device "
+                << "(" << GetShortDeviceDescription(visible_gpu_id, desc)
+                << ") "
+                << "whose CUDA compute capability is not available.";
       continue;
     }
     // Only GPUs with no less than the minimum supported compute capability is
@@ -1136,7 +1288,7 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds(
     // multiprocessors. If the TF_MIN_GPU_MULTIPROCESSOR_COUNT environment
     // variable is set, its value will be used to filter out GPUs.
     if (desc.core_count() < min_gpu_core_count) {
-      LOG(INFO) << "Ignoring gpu device "
+      LOG(INFO) << "Ignoring visible gpu device "
                 << "(" << GetShortDeviceDescription(visible_gpu_id, desc)
                 << ") "
                 << "with Cuda multiprocessor count: " << desc.core_count()
@@ -1145,12 +1297,14 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds(
                    "TF_MIN_GPU_MULTIPROCESSOR_COUNT.";
       continue;
     }
-
-    size_t new_id = ids->size();
     ids->push_back(visible_gpu_id);
-
-    LOG(INFO) << "Creating TensorFlow device (/device:GPU:" << new_id << ") -> "
-              << "(" << GetShortDeviceDescription(visible_gpu_id, desc) << ")";
+  }
+  if (!ids->empty()) {
+    std::vector<int> raw_ids(ids->size());
+    std::transform(ids->begin(), ids->end(), raw_ids.begin(),
+                   [](CudaGpuId id) -> int { return id.value(); });
+    LOG(INFO) << "Adding visible gpu devices: "
+              << str_util::Join(raw_ids, ", ");
   }
 
   return Status::OK();
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h
index 442496437af5f4796f6d216f7c688d31f2f457d7..41e60b4884673673f2e791cbbafa4ef0091bdf8f 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.h
@@ -28,6 +28,8 @@ limitations under the License.
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/common_runtime/device_factory.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h"
 #include "tensorflow/core/common_runtime/gpu_device_context.h"
 #include "tensorflow/core/common_runtime/local_device.h"
 #include "tensorflow/core/framework/allocator.h"
@@ -45,10 +47,10 @@ namespace tensorflow {
 class BaseGPUDevice : public LocalDevice {
  public:
   BaseGPUDevice(const SessionOptions& options, const string& name,
-                Bytes memory_limit, const DeviceLocality& locality, int gpu_id,
-                const string& physical_device_desc, Allocator* gpu_allocator,
-                Allocator* cpu_allocator, bool sync_every_op,
-                int32 max_streams);
+                Bytes memory_limit, const DeviceLocality& locality,
+                TfGpuId tf_gpu_id, const string& physical_device_desc,
+                Allocator* gpu_allocator, Allocator* cpu_allocator,
+                bool sync_every_op, int32 max_streams);
 
   ~BaseGPUDevice() override;
 
@@ -84,9 +86,9 @@ class BaseGPUDevice : public LocalDevice {
   void ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device,
                              DeviceContext* dc, Allocator* allocator) override;
 
-  // Returns the id of this device within the native driver system; e.g., for
-  // CUDA this is the ordinal of the GPU within the system.
-  int gpu_id() const { return gpu_id_; }
+  // Returns the CUDA GPU id of this device within the native driver system;
+  // e.g., for CUDA this is the ordinal of the GPU within the system.
+  int gpu_id() const { return GpuIdUtil::TfToCudaGpuId(tf_gpu_id_).value(); }
 
   // The executor that provides control for the device; e.g., for CUDA this
   // corresponds to the cuda context.
@@ -112,10 +114,11 @@ class BaseGPUDevice : public LocalDevice {
   std::vector<GPUDeviceContext*> device_contexts_;
   GpuDeviceInfo* gpu_device_info_ = nullptr;
   mutex trace_mu_;
-  int gpu_id_ = -1;
+  TfGpuId tf_gpu_id_;
   const bool sync_every_op_ = false;
   const int32 max_streams_;
   std::unique_ptr<EventMgr> em_;
+  std::unique_ptr<thread::ThreadPool> thread_pool_;
 
   void ReinitializeDevice(OpKernelContext* context, PerOpGpuDevice* device,
                           int stream_id, Allocator* allocator);
@@ -138,25 +141,30 @@ class BaseGPUDeviceFactory : public DeviceFactory {
                        std::vector<Device*>* devices) override;
 
  private:
-  Status CreateGPUDevice(const SessionOptions& options, const string& name,
-                         int gpu_id, BaseGPUDevice** out_device);
+  // Creates a BaseGPUDevice associated with 'tf_gpu_id', allocates (strictly)
+  // 'memory_limit' bytes of GPU memory to it, and adds it to the 'devices'
+  // vector.
+  Status CreateGPUDevice(const SessionOptions& options,
+                         const string& name_prefix, TfGpuId tf_gpu_id,
+                         int64 memory_limit, std::vector<Device*>* devices);
 
   virtual BaseGPUDevice* CreateGPUDevice(const SessionOptions& options,
                                          const string& name, Bytes memory_limit,
                                          const DeviceLocality& locality,
-                                         int gpu_id,
+                                         TfGpuId tf_gpu_id,
                                          const string& physical_device_desc,
                                          Allocator* gpu_allocator,
                                          Allocator* cpu_allocator) = 0;
 
-  // Returns into 'ids' the list of valid GPU ids, in the order that
-  // they should map to logical gpu ids "/device:GPU:0", "/device:GPU:1", etc, based
-  // upon 'visible_device_list', a comma-separated list of 'visible
-  // gpu ids'.
-  Status GetValidDeviceIds(const string& visible_device_list,
-                           std::vector<int>* ids);
+  // Returns into 'ids' the list of valid CUDA GPU ids, in the order that
+  // they should map to TF GPU ids "/device:GPU:0", "/device:GPU:1", etc,
+  // based upon 'visible_gpu_order' which was generated by parsing
+  // GPUOptions::visible_device_list which is a comma-separated list of CUDA GPU
+  // ids.
+  Status GetValidDeviceIds(const std::vector<CudaGpuId>& visible_gpu_order,
+                           std::vector<CudaGpuId>* ids);
 
-  // visible_gpu_initialized_[gpu_id] is true if visible GPU gpu_id
+  // visible_gpu_initialized_[cuda_gpu_id] is true if visible GPU cuda_gpu_id
   // has been initialized by the process.
   std::unordered_map<int, bool> visible_gpu_initialized_;
 };
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc b/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc
index 63ac3daba142b0076407110509034a512b00ff37..9a000749c6e677743ea700eb941f4147646ddc55 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #define EIGEN_USE_GPU
 
 #include "tensorflow/core/common_runtime/gpu/gpu_device.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
 #include "tensorflow/core/common_runtime/gpu/process_state.h"
 #include "tensorflow/core/common_runtime/threadpool_device.h"
 
@@ -26,10 +27,10 @@ namespace tensorflow {
 class GPUDevice : public BaseGPUDevice {
  public:
   GPUDevice(const SessionOptions& options, const string& name,
-            Bytes memory_limit, const DeviceLocality& locality, int gpu_id,
-            const string& physical_device_desc, Allocator* gpu_allocator,
-            Allocator* cpu_allocator)
-      : BaseGPUDevice(options, name, memory_limit, locality, gpu_id,
+            Bytes memory_limit, const DeviceLocality& locality,
+            TfGpuId tf_gpu_id, const string& physical_device_desc,
+            Allocator* gpu_allocator, Allocator* cpu_allocator)
+      : BaseGPUDevice(options, name, memory_limit, locality, tf_gpu_id,
                       physical_device_desc, gpu_allocator, cpu_allocator,
                       false /* sync every op */, 1 /* max_streams */) {
     if (options.config.has_gpu_options()) {
@@ -59,11 +60,12 @@ class GPUDeviceFactory : public BaseGPUDeviceFactory {
  private:
   BaseGPUDevice* CreateGPUDevice(const SessionOptions& options,
                                  const string& name, Bytes memory_limit,
-                                 const DeviceLocality& locality, int gpu_id,
+                                 const DeviceLocality& locality,
+                                 TfGpuId tf_gpu_id,
                                  const string& physical_device_desc,
                                  Allocator* gpu_allocator,
                                  Allocator* cpu_allocator) override {
-    return new GPUDevice(options, name, memory_limit, locality, gpu_id,
+    return new GPUDevice(options, name, memory_limit, locality, tf_gpu_id,
                          physical_device_desc, gpu_allocator, cpu_allocator);
   }
 };
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_on_non_gpu_machine_test.cc b/tensorflow/core/common_runtime/gpu/gpu_device_on_non_gpu_machine_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..75be6d60b86af101fb9de7497490e72c523d632b
--- /dev/null
+++ b/tensorflow/core/common_runtime/gpu/gpu_device_on_non_gpu_machine_test.cc
@@ -0,0 +1,54 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/platform/test.h"
+
+#if GOOGLE_CUDA
+
+#include "tensorflow/core/common_runtime/gpu/gpu_device.h"
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include "tensorflow/core/common_runtime/device.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/platform/platform.h"
+#include "tensorflow/core/public/session_options.h"
+
+namespace tensorflow {
+namespace {
+
+TEST(GPUDeviceOnNonGPUMachineTest, CreateGPUDevicesOnNonGPUMachine) {
+  SessionOptions opts;
+  std::vector<tensorflow::Device*> devices;
+  TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices(
+      opts, "/job:localhost/replica:0/task:0", &devices));
+  EXPECT_TRUE(devices.empty());
+}
+
+}  // namespace
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
+
+int main(int argc, char** argv) {
+#if GOOGLE_CUDA
+  // Sets CUDA_VISIBLE_DEVICES to empty string to simulate non-gpu environment.
+  setenv("CUDA_VISIBLE_DEVICES", "", 1);
+#endif  // GOOGLE_CUDA
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ff46be9c015ac3d0ad59e302f53d52c4bd3e25ea
--- /dev/null
+++ b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
@@ -0,0 +1,189 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#include "tensorflow/core/common_runtime/gpu/gpu_device.h"
+
+#include "tensorflow/core/common_runtime/gpu/gpu_init.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace {
+const char* kDeviceNamePrefix = "/job:localhost/replica:0/task:0";
+
+static SessionOptions MakeSessionOptions(
+    const string& visible_device_list = "",
+    double per_process_gpu_memory_fraction = 0, int gpu_device_count = 1,
+    const std::vector<std::vector<float>>& memory_limit_mb = {}) {
+  SessionOptions options;
+  ConfigProto* config = &options.config;
+  (*config->mutable_device_count())["GPU"] = gpu_device_count;
+  GPUOptions* gpu_options = config->mutable_gpu_options();
+  gpu_options->set_visible_device_list(visible_device_list);
+  gpu_options->set_per_process_gpu_memory_fraction(
+      per_process_gpu_memory_fraction);
+  for (const auto& v : memory_limit_mb) {
+    auto virtual_devices =
+        gpu_options->mutable_experimental()->add_virtual_devices();
+    for (float mb : v) {
+      virtual_devices->add_memory_limit_mb(mb);
+    }
+  }
+  return options;
+}
+
+static bool StartsWith(const string& lhs, const string& rhs) {
+  if (rhs.length() > lhs.length()) return false;
+  return lhs.substr(0, rhs.length()) == rhs;
+}
+
+TEST(GPUDeviceTest, FailedToParseVisibleDeviceList) {
+  SessionOptions opts = MakeSessionOptions("0,abc");
+  std::vector<tensorflow::Device*> devices;
+  Status status = DeviceFactory::GetFactory("GPU")->CreateDevices(
+      opts, kDeviceNamePrefix, &devices);
+  EXPECT_EQ(status.code(), error::INVALID_ARGUMENT);
+  EXPECT_TRUE(StartsWith(status.error_message(), "Could not parse entry"))
+      << status;
+}
+
+TEST(GPUDeviceTest, InvalidGpuId) {
+  SessionOptions opts = MakeSessionOptions("100");
+  std::vector<tensorflow::Device*> devices;
+  Status status = DeviceFactory::GetFactory("GPU")->CreateDevices(
+      opts, kDeviceNamePrefix, &devices);
+  EXPECT_EQ(status.code(), error::INVALID_ARGUMENT);
+  EXPECT_TRUE(StartsWith(status.error_message(),
+                         "'visible_device_list' listed an invalid GPU id"))
+      << status;
+}
+
+TEST(GPUDeviceTest, DuplicateEntryInVisibleDeviceList) {
+  SessionOptions opts = MakeSessionOptions("0,0");
+  std::vector<tensorflow::Device*> devices;
+  Status status = DeviceFactory::GetFactory("GPU")->CreateDevices(
+      opts, kDeviceNamePrefix, &devices);
+  EXPECT_EQ(status.code(), error::INVALID_ARGUMENT);
+  EXPECT_TRUE(StartsWith(status.error_message(),
+                         "visible_device_list contained a duplicate entry"))
+      << status;
+}
+
+TEST(GPUDeviceTest, VirtualDeviceConfigConflictsWithMemoryFractionSettings) {
+  SessionOptions opts = MakeSessionOptions("0", 0.1, 1, {{}});
+  std::vector<tensorflow::Device*> devices;
+  Status status = DeviceFactory::GetFactory("GPU")->CreateDevices(
+      opts, kDeviceNamePrefix, &devices);
+  EXPECT_EQ(status.code(), error::INVALID_ARGUMENT);
+  EXPECT_TRUE(StartsWith(status.error_message(),
+                         "It's invalid to set per_process_gpu_memory_fraction"))
+      << status;
+}
+
+TEST(GPUDeviceTest, GpuDeviceCountTooSmall) {
+  // device_count is 0, but with one entry in visible_device_list and one
+  // (empty) VirtualDevices messages.
+  SessionOptions opts = MakeSessionOptions("0", 0, 0, {{}});
+  std::vector<tensorflow::Device*> devices;
+  Status status = DeviceFactory::GetFactory("GPU")->CreateDevices(
+      opts, kDeviceNamePrefix, &devices);
+  EXPECT_EQ(status.code(), error::UNKNOWN);
+  EXPECT_TRUE(StartsWith(status.error_message(),
+                         "Not enough GPUs to create virtual devices."))
+      << status;
+}
+
+TEST(GPUDeviceTest, NotEnoughGpuInVisibleDeviceList) {
+  // Single entry in visible_device_list with two (empty) VirtualDevices
+  // messages.
+  SessionOptions opts = MakeSessionOptions("0", 0, 8, {{}, {}});
+  std::vector<tensorflow::Device*> devices;
+  Status status = DeviceFactory::GetFactory("GPU")->CreateDevices(
+      opts, kDeviceNamePrefix, &devices);
+  EXPECT_EQ(status.code(), error::UNKNOWN);
+  EXPECT_TRUE(StartsWith(status.error_message(),
+                         "Not enough GPUs to create virtual devices."))
+      << status;
+}
+
+TEST(GPUDeviceTest, VirtualDeviceConfigConflictsWithVisibleDeviceList) {
+  // This test requires at least two visible GPU hardware.
+  if (GPUMachineManager()->VisibleDeviceCount() < 2) return;
+  // Three entries in visible_device_list with two (empty) VirtualDevices
+  // messages.
+  SessionOptions opts = MakeSessionOptions("0,1", 0, 8, {{}});
+  std::vector<tensorflow::Device*> devices;
+  Status status = DeviceFactory::GetFactory("GPU")->CreateDevices(
+      opts, kDeviceNamePrefix, &devices);
+  EXPECT_EQ(status.code(), error::INVALID_ARGUMENT);
+  EXPECT_TRUE(StartsWith(status.error_message(),
+                         "The number of GPUs in visible_device_list doesn't "
+                         "match the number of elements in the virtual_devices "
+                         "list."))
+      << status;
+}
+
+TEST(GPUDeviceTest, EmptyVirtualDeviceConfig) {
+  // It'll create single virtual device when the virtual device config is empty.
+  SessionOptions opts = MakeSessionOptions("0");
+  std::vector<tensorflow::Device*> devices;
+  TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices(
+      opts, kDeviceNamePrefix, &devices));
+  EXPECT_EQ(1, devices.size());
+  EXPECT_GE(devices[0]->attributes().memory_limit(), 0);
+  for (auto d : devices) delete d;
+}
+
+TEST(GPUDeviceTest, SingleVirtualDeviceWithNoMemoryLimit) {
+  // It'll create single virtual device for the gpu in question when
+  // memory_limit_mb is unset.
+  SessionOptions opts = MakeSessionOptions("0", 0, 1, {{}});
+  std::vector<tensorflow::Device*> devices;
+  TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices(
+      opts, kDeviceNamePrefix, &devices));
+  EXPECT_EQ(1, devices.size());
+  EXPECT_GE(devices[0]->attributes().memory_limit(), 0);
+  for (auto d : devices) delete d;
+}
+
+TEST(GPUDeviceTest, SingleVirtualDeviceWithMemoryLimit) {
+  SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123}});
+  std::vector<tensorflow::Device*> devices;
+  TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices(
+      opts, kDeviceNamePrefix, &devices));
+  EXPECT_EQ(1, devices.size());
+  EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit());
+  for (auto d : devices) delete d;
+}
+
+TEST(GPUDeviceTest, MultipleVirtualDevices) {
+  SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}});
+  std::vector<tensorflow::Device*> devices;
+  TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices(
+      opts, kDeviceNamePrefix, &devices));
+  EXPECT_EQ(2, devices.size());
+  EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit());
+  EXPECT_EQ(456 << 20, devices[1]->attributes().memory_limit());
+  for (auto d : devices) delete d;
+}
+
+}  // namespace
+}  // namespace tensorflow
+
+#endif
diff --git a/tensorflow/core/common_runtime/gpu/gpu_id.h b/tensorflow/core/common_runtime/gpu/gpu_id.h
new file mode 100644
index 0000000000000000000000000000000000000000..ff81ccd4325e0ad22636cd78ba99e0bff6a03347
--- /dev/null
+++ b/tensorflow/core/common_runtime/gpu/gpu_id.h
@@ -0,0 +1,88 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ID_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ID_H_
+
+#include "tensorflow/core/lib/gtl/int_type.h"
+
+namespace tensorflow {
+
+// There are three types of GPU ids:
+// - *physical* GPU id: this is the integer index of a GPU hardware in the
+//   physical machine, it can be filtered by CUDA environment variable
+//   CUDA_VISIBLE_DEVICES. Note that this id is not visible to Tensorflow, but
+//   result after filtering by CUDA_VISIBLE_DEVICES is visible to TF and is
+//   called CUDA GPU id as below. See
+//   http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
+//   for more details.
+// - CUDA GPU id (also called *visible* GPU id in
+//   third_party/tensorflow/core/protobuf/config.proto): this is the id that is
+//   visible to Tensorflow after filtering by CUDA_VISIBLE_DEVICES, and is
+//   generated by the CUDA GPU driver. It starts from 0 and is used for CUDA API
+//   calls like cuDeviceGet().
+// - TF GPU id (also called *virtual* GPU id in
+//   third_party/tensorflow/core/protobuf/config.proto): this is the id that
+//   Tensorflow generates and exposes to its users. It is the id in the <id>
+//   field of the device name "/device:GPU:<id>", and is also the identifier of
+//   a BaseGPUDevice. Note that the configuration allows us to create multiple
+//   BaseGPUDevice per GPU hardware in order to use multi CUDA streams on the
+//   hardware, so the mapping between TF GPU id and CUDA GPU id is not a 1:1
+//   mappping, see the example below.
+//
+// For example, assuming that in the machine we have GPU device with index 0, 1,
+// 2 and 3 (physical GPU id). Setting "CUDA_VISIBLE_DEVICES=1,2,3" will create
+// the following mapping between CUDA GPU id and physical GPU id:
+//
+//        CUDA GPU id ->  physical GPU id
+//                 0  ->  1
+//                 1  ->  2
+//                 2  ->  3
+//
+// Note that physical GPU id 0 is invisible to TF so there is no mapping entry
+// for it.
+//
+// Assuming we configure the Session to create one BaseGPUDevice per GPU
+// hardware, then setting GPUOptions::visible_device_list to "2,0" will create
+// the following mappting between TF GPU id and CUDA GPU id:
+//
+//                  TF GPU id  ->  CUDA GPU ID
+//      0 (i.e. /device:GPU:0) ->  2
+//      1 (i.e. /device:GPU:1) ->  0
+//
+// Note that CUDA GPU id 1 is filtered out by GPUOptions::visible_device_list,
+// so it won't be used by the TF process.
+//
+// On the other hand, if we configure it to create 2 BaseGPUDevice per GPU
+// hardware, then setting GPUOptions::visible_device_list to "2,0" will create
+// the following mappting between TF GPU id and CUDA GPU id:
+//
+//                  TF GPU id  ->  CUDA GPU ID
+//      0 (i.e. /device:GPU:0) ->  2
+//      1 (i.e. /device:GPU:1) ->  2
+//      2 (i.e. /device:GPU:2) ->  0
+//      3 (i.e. /device:GPU:3) ->  0
+//
+// We create strong-typed integer classes for both TF GPU id and CUDA GPU id to
+// minimize programming errors and improve code readability. Except for the
+// StreamExecutor interface (as we don't change its API), whenever we need a
+// TF GPU id (or CUDA GPU id) we should use TfGpuId (or CudaGpuId) instead of a
+// raw integer.
+TF_LIB_GTL_DEFINE_INT_TYPE(TfGpuId, int32);
+TF_LIB_GTL_DEFINE_INT_TYPE(CudaGpuId, int32);
+
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ID_H_
diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_utils.cc b/tensorflow/core/common_runtime/gpu/gpu_id_utils.cc
new file mode 100644
index 0000000000000000000000000000000000000000..92cd19453f14c886c0d105a5c1809b7fdbcafc9b
--- /dev/null
+++ b/tensorflow/core/common_runtime/gpu/gpu_id_utils.cc
@@ -0,0 +1,74 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h"
+
+#include <unordered_map>
+
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/mutex.h"
+
+namespace tensorflow {
+namespace {
+// Manages the map between TfGpuId and CUDA GPU id.
+class GpuIdManager {
+ public:
+  static GpuIdManager* singleton() {
+    static auto* manager = new GpuIdManager;
+    return manager;
+  }
+
+  void InsertOrDie(TfGpuId tf_gpu_id, CudaGpuId cuda_gpu_id)
+      LOCKS_EXCLUDED(mu_) {
+    std::pair<IdMapType::iterator, bool> result;
+    {
+      mutex_lock lock(mu_);
+      result = id_map_.insert({tf_gpu_id.value(), cuda_gpu_id.value()});
+    }
+    if (!result.second) {
+      CHECK_EQ(cuda_gpu_id.value(), result.first->second)
+          << "Mapping the same TfGpuId to a different CUDA GPU id."
+          << " TfGpuId: " << tf_gpu_id
+          << " Existing mapped CUDA GPU id: " << result.first->second
+          << " CUDA GPU id being tried to map to: " << cuda_gpu_id;
+    }
+  }
+
+  int32 FindOrDie(TfGpuId tf_gpu_id) const LOCKS_EXCLUDED(mu_) {
+    mutex_lock lock(mu_);
+    auto result = id_map_.find(tf_gpu_id.value());
+    CHECK(result != id_map_.end())
+        << "Could not find the mapping for TfGpuId: " << tf_gpu_id;
+    return result->second;
+  }
+
+ private:
+  using IdMapType = std::unordered_map<int32, int32>;
+  mutable mutex mu_;
+  IdMapType id_map_ GUARDED_BY(mu_);
+};
+}  // namespace
+
+void GpuIdUtil::InsertTfCudaGpuIdPair(TfGpuId tf_gpu_id,
+                                      CudaGpuId cuda_gpu_id) {
+  GpuIdManager::singleton()->InsertOrDie(tf_gpu_id, cuda_gpu_id);
+}
+
+CudaGpuId GpuIdUtil::TfToCudaGpuId(TfGpuId tf_gpu_id) {
+  return CudaGpuId(GpuIdManager::singleton()->FindOrDie(tf_gpu_id));
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_utils.h b/tensorflow/core/common_runtime/gpu/gpu_id_utils.h
new file mode 100644
index 0000000000000000000000000000000000000000..78e51c84c146693dfc02ce445bda030797de6c07
--- /dev/null
+++ b/tensorflow/core/common_runtime/gpu/gpu_id_utils.h
@@ -0,0 +1,61 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ID_UTILS_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ID_UTILS_H_
+
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_init.h"
+#include "tensorflow/core/lib/gtl/int_type.h"
+#include "tensorflow/core/platform/stream_executor.h"
+
+namespace tensorflow {
+namespace gpu = ::perftools::gputools;
+
+// Utility methods for translation between Tensorflow GPU ids and CUDA GPU ids.
+class GpuIdUtil {
+ public:
+  static void InsertTfCudaGpuIdPair(TfGpuId tf_gpu_id, CudaGpuId cuda_gpu_id);
+  static CudaGpuId TfToCudaGpuId(TfGpuId tf_gpu_id);
+
+  // Convenient methods for getting the associated executor given a TfGpuId or
+  // CudaGpuId.
+  static gpu::port::StatusOr<gpu::StreamExecutor*> ExecutorForCudaGpuId(
+      gpu::Platform* gpu_manager, CudaGpuId cuda_gpu_id) {
+    return gpu_manager->ExecutorForDevice(cuda_gpu_id.value());
+  }
+  static gpu::port::StatusOr<gpu::StreamExecutor*> ExecutorForCudaGpuId(
+      CudaGpuId cuda_gpu_id) {
+    return ExecutorForCudaGpuId(GPUMachineManager(), cuda_gpu_id);
+  }
+  static gpu::port::StatusOr<gpu::StreamExecutor*> ExecutorForTfGpuId(
+      TfGpuId tf_gpu_id) {
+    return ExecutorForCudaGpuId(GpuIdUtil::TfToCudaGpuId(tf_gpu_id));
+  }
+
+  // Verify that the cuda_gpu_id associated with a TfGpuId is legitimate.
+  static void CheckValidTfGpuId(TfGpuId tf_gpu_id) {
+    const CudaGpuId cuda_gpu_id = GpuIdUtil::TfToCudaGpuId(tf_gpu_id);
+    const int visible_device_count = GPUMachineManager()->VisibleDeviceCount();
+    CHECK_LT(cuda_gpu_id.value(), visible_device_count)
+        << "cuda_gpu_id is outside discovered device range."
+        << " TF GPU id: " << tf_gpu_id << " CUDA GPU id: " << cuda_gpu_id
+        << " visible device count: " << visible_device_count;
+  }
+};
+
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ID_UTILS_H_
diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_utils_test.cc b/tensorflow/core/common_runtime/gpu/gpu_id_utils_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..bebe00a4317becdba1fc6146b4eb188b93933fff
--- /dev/null
+++ b/tensorflow/core/common_runtime/gpu/gpu_id_utils_test.cc
@@ -0,0 +1,55 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h"
+
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace test {
+
+TEST(GpuIdTest, Basics) {
+  TfGpuId key_0(0);
+  CudaGpuId value_0(0);
+  GpuIdUtil::InsertTfCudaGpuIdPair(key_0, value_0);
+  EXPECT_EQ(value_0, GpuIdUtil::TfToCudaGpuId(key_0));
+
+  // Multiple calls to map the same value is ok.
+  GpuIdUtil::InsertTfCudaGpuIdPair(key_0, value_0);
+  EXPECT_EQ(value_0, GpuIdUtil::TfToCudaGpuId(key_0));
+
+  // Map a different TfGpuId to a different value.
+  TfGpuId key_1(3);
+  CudaGpuId value_1(2);
+  GpuIdUtil::InsertTfCudaGpuIdPair(key_1, value_1);
+  EXPECT_EQ(value_1, GpuIdUtil::TfToCudaGpuId(key_1));
+
+  // Mapping a different TfGpuId to the same value is ok.
+  TfGpuId key_2(10);
+  GpuIdUtil::InsertTfCudaGpuIdPair(key_2, value_1);
+  EXPECT_EQ(value_1, GpuIdUtil::TfToCudaGpuId(key_2));
+
+  // Mapping the same TfGpuId to a different value will crash the program.
+  ASSERT_DEATH(GpuIdUtil::InsertTfCudaGpuIdPair(key_2, value_0),
+               "Mapping the same TfGpuId to a different CUDA GPU id");
+
+  // Getting an nonexistent mapping will crash the program.
+  ASSERT_DEATH(GpuIdUtil::TfToCudaGpuId(TfGpuId(100)),
+               "Could not find the mapping for TfGpuId");
+}
+
+}  // namespace test
+}  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/gpu/gpu_util.cc b/tensorflow/core/common_runtime/gpu/gpu_util.cc
index 657bdf0601bcc721c36209060654a19c3b6afb8a..a0f5877d62f0c889c2a598b8e03771e4bb49e0a9 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_util.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_util.cc
@@ -352,11 +352,7 @@ Status GPUUtil::Sync(Device* gpu_device) {
   if (!dev_info) {
     return errors::Internal("Failed to find dest device GPUDeviceInfo");
   }
-  dev_info->stream->BlockHostUntilDone();
-  if (!dev_info->stream->ok()) {
-    return errors::Internal("GPU sync failed");
-  }
-  return Status::OK();
+  return dev_info->stream->BlockHostUntilDone();
 }
 
 Status GPUUtil::SyncAll(Device* gpu_device) {
diff --git a/tensorflow/core/common_runtime/gpu/process_state.cc b/tensorflow/core/common_runtime/gpu/process_state.cc
index 0675dbf3fcdc772f4d45025d296eaddbf4397271..995fd1253fb9f352742410199174b8567e92351b 100644
--- a/tensorflow/core/common_runtime/gpu/process_state.cc
+++ b/tensorflow/core/common_runtime/gpu/process_state.cc
@@ -15,11 +15,14 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/gpu/process_state.h"
 
+#include <cstring>
 #include <vector>
 
 #include "tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
 #include "tensorflow/core/common_runtime/gpu/pool_allocator.h"
 #include "tensorflow/core/framework/allocator.h"
@@ -45,27 +48,19 @@ const bool FLAGS_brain_mem_reg_cuda_dma = true;
 // performance issues.
 const bool FLAGS_brain_gpu_record_mem_types = false;
 
-namespace gpu = ::perftools::gputools;
-
 namespace tensorflow {
-
 namespace {
+
 bool useCudaMallocAllocator() {
   const char* debug_allocator_str = std::getenv("TF_GPU_ALLOCATOR");
-  if (debug_allocator_str != nullptr &&
-      strcmp(debug_allocator_str, "cuda_malloc") == 0)
-    return true;
-  else
-    return false;
+  return debug_allocator_str != nullptr &&
+         std::strcmp(debug_allocator_str, "cuda_malloc") == 0;
 }
 
 bool useCudaMemoryGuardAllocator() {
   const char* debug_allocator_str = std::getenv("TF_GPU_ALLOCATOR");
-  if (debug_allocator_str != nullptr &&
-      strcmp(debug_allocator_str, "memory_guard") == 0)
-    return true;
-  else
-    return false;
+  return debug_allocator_str != nullptr &&
+         std::strcmp(debug_allocator_str, "memory_guard") == 0;
 }
 
 }  // namespace
@@ -107,23 +102,20 @@ ProcessState::MemDesc ProcessState::PtrType(const void* ptr) {
   return MemDesc();
 }
 
-Allocator* ProcessState::GetGPUAllocator(const GPUOptions& options, int gpu_id,
+Allocator* ProcessState::GetGPUAllocator(const GPUOptions& options,
+                                         TfGpuId tf_gpu_id,
                                          size_t total_bytes) {
 #if GOOGLE_CUDA
   const string& allocator_type = options.allocator_type();
   mutex_lock lock(mu_);
-  gpu::Platform* gpu_platform = GPUMachineManager();
+  GpuIdUtil::CheckValidTfGpuId(tf_gpu_id);
 
-  // Verify that gpu_id is legitimate.
-  CHECK_LT(gpu_id, gpu_platform->VisibleDeviceCount())
-      << "gpu_id is outside discovered device range";
-
-  if (gpu_id >= static_cast<int64>(gpu_allocators_.size())) {
-    gpu_allocators_.resize(gpu_id + 1);
-    if (FLAGS_brain_gpu_record_mem_types) gpu_al_.resize(gpu_id + 1);
+  if (tf_gpu_id.value() >= static_cast<int64>(gpu_allocators_.size())) {
+    gpu_allocators_.resize(tf_gpu_id.value() + 1);
+    if (FLAGS_brain_gpu_record_mem_types) gpu_al_.resize(tf_gpu_id.value() + 1);
   }
 
-  if (gpu_allocators_[gpu_id] == nullptr) {
+  if (gpu_allocators_[tf_gpu_id.value()] == nullptr) {
     VisitableAllocator* gpu_allocator;
 
     // Validate allocator types.
@@ -132,45 +124,49 @@ Allocator* ProcessState::GetGPUAllocator(const GPUOptions& options, int gpu_id,
       return nullptr;
     }
 
-    gpu_allocator = new GPUBFCAllocator(gpu_id, total_bytes, options);
+    const CudaGpuId cuda_gpu_id = GpuIdUtil::TfToCudaGpuId(tf_gpu_id);
+    gpu_allocator =
+        new GPUBFCAllocator(cuda_gpu_id, total_bytes, options,
+                            strings::StrCat("GPU_", tf_gpu_id.value(), "_bfc"));
 
     // If true, checks for memory overwrites by writing
     // distinctive patterns on both ends of allocated memory.
     if (useCudaMemoryGuardAllocator()) {
-      gpu_allocator = new GPUDebugAllocator(gpu_allocator, gpu_id);
-      gpu_allocator = new GPUNanResetAllocator(gpu_allocator, gpu_id);
+      gpu_allocator = new GPUDebugAllocator(gpu_allocator, cuda_gpu_id);
+      gpu_allocator = new GPUNanResetAllocator(gpu_allocator, cuda_gpu_id);
     } else if (useCudaMallocAllocator()) {
       // If true, passes all allocation requests through to cudaMalloc
       // useful for doing memory debugging with tools like cuda-memcheck
       // **WARNING** probably will not work in a multi-gpu scenario
-      gpu_allocator = new GPUcudaMallocAllocator(gpu_allocator, gpu_id);
+      gpu_allocator = new GPUcudaMallocAllocator(gpu_allocator, cuda_gpu_id);
     }
-    gpu_allocators_[gpu_id] = gpu_allocator;
+    gpu_allocators_[tf_gpu_id.value()] = gpu_allocator;
 
     // If there are any pending AllocVisitors for this bus, add
     // them now.
     gpu::StreamExecutor* se =
-        gpu_platform->ExecutorForDevice(gpu_id).ValueOrDie();
+        GpuIdUtil::ExecutorForTfGpuId(tf_gpu_id).ValueOrDie();
     int bus_id = se->GetDeviceDescription().numa_node();
     if (bus_id >= 0 && bus_id < static_cast<int64>(gpu_visitors_.size())) {
       for (const auto& v : gpu_visitors_[bus_id]) {
-        gpu_allocators_[gpu_id]->AddAllocVisitor(v);
+        gpu_allocator->AddAllocVisitor(v);
       }
     }
     if (FLAGS_brain_gpu_record_mem_types) {
       MemDesc md;
       md.loc = MemDesc::GPU;
-      md.dev_index = gpu_id;
+      md.dev_index = cuda_gpu_id.value();
       md.gpu_registered = false;
       md.nic_registered = true;
-      if (static_cast<int64>(gpu_al_.size()) <= gpu_id)
-        gpu_al_.resize(gpu_id + 1);
-      gpu_al_[gpu_id] = new internal::RecordingAllocator(
-          &mem_desc_map_, gpu_allocators_[gpu_id], md, &mu_);
+      if (static_cast<int64>(gpu_al_.size()) <= tf_gpu_id.value()) {
+        gpu_al_.resize(tf_gpu_id.value() + 1);
+      }
+      gpu_al_[tf_gpu_id.value()] = new internal::RecordingAllocator(
+          &mem_desc_map_, gpu_allocator, md, &mu_);
     }
   }
-  if (FLAGS_brain_gpu_record_mem_types) return gpu_al_[gpu_id];
-  return gpu_allocators_[gpu_id];
+  if (FLAGS_brain_gpu_record_mem_types) return gpu_al_[tf_gpu_id.value()];
+  return gpu_allocators_[tf_gpu_id.value()];
 #else
   LOG(FATAL) << "GPUAllocator unavailable. Not compiled with --config=cuda.";
   return nullptr;
@@ -246,7 +242,7 @@ Allocator* ProcessState::GetCUDAHostAllocator(int numa_node) {
   gpu::StreamExecutor* se = nullptr;
   for (int i = 0; i < static_cast<int>(gpu_allocators_.size()); ++i) {
     if (gpu_allocators_[i] != nullptr) {
-      se = GPUMachineManager()->ExecutorForDevice(i).ValueOrDie();
+      se = GpuIdUtil::ExecutorForTfGpuId(TfGpuId(i)).ValueOrDie();
       break;
     }
   }
@@ -290,14 +286,12 @@ Allocator* ProcessState::GetCUDAHostAllocator(int numa_node) {
 void ProcessState::AddGPUAllocVisitor(int bus_id, AllocVisitor visitor) {
 #if GOOGLE_CUDA
   mutex_lock lock(mu_);
-  gpu::Platform* gpu_platform = GPUMachineManager();
-  for (int gpu_id = 0; gpu_id < static_cast<int64>(gpu_allocators_.size());
-       ++gpu_id) {
+  for (int i = 0; i < static_cast<int64>(gpu_allocators_.size()); ++i) {
     gpu::StreamExecutor* se =
-        gpu_platform->ExecutorForDevice(gpu_id).ValueOrDie();
-    if (gpu_allocators_[gpu_id] &&
+        GpuIdUtil::ExecutorForTfGpuId(TfGpuId(i)).ValueOrDie();
+    if (gpu_allocators_[i] &&
         (se->GetDeviceDescription().numa_node() + 1) == bus_id) {
-      gpu_allocators_[gpu_id]->AddAllocVisitor(visitor);
+      gpu_allocators_[i]->AddAllocVisitor(visitor);
     }
   }
   while (bus_id >= static_cast<int64>(gpu_visitors_.size())) {
diff --git a/tensorflow/core/common_runtime/gpu/process_state.h b/tensorflow/core/common_runtime/gpu/process_state.h
index 319c508b92f539cdac04ff5acfa4740b0697bcd5..abe458f685b5425d3dc4c469a33251c2b531fb80 100644
--- a/tensorflow/core/common_runtime/gpu/process_state.h
+++ b/tensorflow/core/common_runtime/gpu/process_state.h
@@ -17,9 +17,11 @@ limitations under the License.
 #define TENSORFLOW_COMMON_RUNTIME_GPU_PROCESS_STATE_H_
 
 #include <functional>
+#include <map>
 #include <unordered_map>
 #include <vector>
 
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/thread_annotations.h"
@@ -80,17 +82,17 @@ class ProcessState {
   //
   // 'total_bytes' is the total number of bytes that should be made
   // available to the allocator.  The first call to this function for
-  // a given gpu_id creates the allocator, so only the total_bytes
+  // a given tf_gpu_id creates the allocator, so only the total_bytes
   // used on that first call is used.
   //
   // "Allocator type" describes the type of algorithm to use for the
   // underlying allocator.  REQUIRES: Must be a valid type (see
   // config.proto for the list of supported strings.).
   //
-  // REQUIRES: gpu_id must be a valid ordinal for a GPU available in the
+  // REQUIRES: tf_gpu_id must be a valid id for a BaseGPUDevice available in the
   // current system environment.  Otherwise returns nullptr.
-  virtual Allocator* GetGPUAllocator(const GPUOptions& options, int gpu_id,
-                                     size_t total_bytes);
+  virtual Allocator* GetGPUAllocator(const GPUOptions& options,
+                                     TfGpuId tf_gpu_id, size_t total_bytes);
 
   virtual Allocator* GetCUDAHostAllocator(int numa_node);
 
@@ -155,7 +157,8 @@ class RecordingAllocator : public Allocator {
   bool TracksAllocationSizes() override { return a_->TracksAllocationSizes(); }
   size_t RequestedSize(void* p) override { return a_->RequestedSize(p); }
   size_t AllocatedSize(void* p) override { return a_->AllocatedSize(p); }
-  void GetStats(AllocatorStats* stats) override { return a_->GetStats(stats); }
+  void GetStats(AllocatorStats* stats) override { a_->GetStats(stats); }
+  void ClearStats() override { a_->ClearStats(); }
   ProcessState::MDMap* mm_;  // not owned
   Allocator* a_;             // not owned
   ProcessState::MemDesc md_;
diff --git a/tensorflow/core/common_runtime/graph_optimizer.cc b/tensorflow/core/common_runtime/graph_optimizer.cc
index def185e52280bf004bf67cb1daef675c2f6ccff5..96ecfb41d4cae37112f73e6e60ece013a2a14bc0 100644
--- a/tensorflow/core/common_runtime/graph_optimizer.cc
+++ b/tensorflow/core/common_runtime/graph_optimizer.cc
@@ -37,7 +37,8 @@ void GraphOptimizer::Optimize(
     FunctionLibraryRuntime* runtime, Env* env, Device* device,
     std::unique_ptr<Graph>* graph,
     const std::unordered_map<string, std::vector<PartialTensorShape>>*
-        shape_map) {
+        shape_map,
+    const std::function<bool(const Node*)>& cse_consider_fn) {
   Graph* g = graph->get();
   DumpGraph("Initial", g);
 
@@ -80,7 +81,7 @@ void GraphOptimizer::Optimize(
       changed = true;
     }
     if (opts_.do_common_subexpression_elimination() &&
-        OptimizeCSE(g, nullptr)) {
+        OptimizeCSE(g, cse_consider_fn)) {
       DumpGraph("OptimizeCSE", g);
       changed = true;
     }
diff --git a/tensorflow/core/common_runtime/graph_optimizer.h b/tensorflow/core/common_runtime/graph_optimizer.h
index f5be1f870db711e0e0d64371c6595009cfef1092..8f3a0821346f7485bc82e0f7a29076abdce7d4e9 100644
--- a/tensorflow/core/common_runtime/graph_optimizer.h
+++ b/tensorflow/core/common_runtime/graph_optimizer.h
@@ -42,11 +42,15 @@ class GraphOptimizer {
   // pass may replace a node with a different node of the same name that has a
   // different number of outputs, or outputs with different known shapes.
   // TODO(b/65453533) introduce a unique way to name nodes in a graph.
+  //
+  // If cse_consider_fn is not null then only nodes for which cse_consider_fn
+  // returns true will be considered for CSE.
   void Optimize(
       FunctionLibraryRuntime* runtime, Env* env, Device* device,
       std::unique_ptr<Graph>* graph,
       const std::unordered_map<string, std::vector<PartialTensorShape>>*
-          shape_map);
+          shape_map,
+      const std::function<bool(const Node*)>& cse_consider_fn = nullptr);
 
  private:
   OptimizerOptions opts_;
diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
index 63b74e8dbf1ac6482579e96fba32c952e0fe561e..c7a2b616c7b4ebeecf7e8f00b45a30263b36dd40 100644
--- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h
+++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
@@ -115,7 +115,9 @@ class MklCPUAllocator : public Allocator {
     allocator_->DeallocateRaw(ptr);
   }
 
-  void GetStats(AllocatorStats* stats) { return allocator_->GetStats(stats); }
+  void GetStats(AllocatorStats* stats) override { allocator_->GetStats(stats); }
+
+  void ClearStats() override { allocator_->ClearStats(); }
 
  private:
   // Hooks provided by this allocator for memory allocation routines from MKL
diff --git a/tensorflow/core/common_runtime/pending_counts.h b/tensorflow/core/common_runtime/pending_counts.h
index 9e39b6b7b93a8e35ad3b47c1c637f7d906649823..5707f5259228c0e54d6d858652a8c50986c0c49b 100644
--- a/tensorflow/core/common_runtime/pending_counts.h
+++ b/tensorflow/core/common_runtime/pending_counts.h
@@ -44,7 +44,7 @@ namespace tensorflow {
 
 //    PendingCounts counts(layout);
 //    ...
-//    counts.decrement_panding(h[id], 1);
+//    counts.decrement_pending(h[id], 1);
 class PendingCounts {
  public:
   // The state machine for a node's execution.
diff --git a/tensorflow/core/common_runtime/placer.cc b/tensorflow/core/common_runtime/placer.cc
index 54f082e823d463301fc5f437781d01ce96741568..a913f2075181a3896015579d79093395d67101ff 100644
--- a/tensorflow/core/common_runtime/placer.cc
+++ b/tensorflow/core/common_runtime/placer.cc
@@ -369,7 +369,8 @@ class ColocationGraph {
                 "Could not satisfy explicit device specification '",
                 node->requested_device(), "' because no supported kernel for ",
                 specified_device_name.type, " devices is available.",
-                debug_info);
+                debug_info, "\nRegistered kernels:\n",
+                KernelsRegisteredForOp(node->type_string()));
           } else {
             return errors::InvalidArgument(
                 "Could not satisfy explicit device specification '",
diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.cc b/tensorflow/core/common_runtime/process_function_library_runtime.cc
index 142ff2339b90d56381e211c4c7b73009c8134949..12947e284a36fef171caf6af0c46d59ca89efb61 100644
--- a/tensorflow/core/common_runtime/process_function_library_runtime.cc
+++ b/tensorflow/core/common_runtime/process_function_library_runtime.cc
@@ -30,7 +30,10 @@ ProcessFunctionLibraryRuntime::ProcessFunctionLibraryRuntime(
     const FunctionLibraryDefinition* lib_def,
     const OptimizerOptions& optimizer_options,
     DistributedFunctionLibraryRuntime* parent)
-    : device_mgr_(device_mgr), lib_def_(lib_def), parent_(parent) {
+    : device_mgr_(device_mgr),
+      lib_def_(lib_def),
+      next_handle_(0),
+      parent_(parent) {
   if (device_mgr == nullptr) {
     flr_map_[nullptr] =
         NewFunctionLibraryRuntime(nullptr, env, nullptr, graph_def_version,
@@ -50,7 +53,10 @@ ProcessFunctionLibraryRuntime::ProcessFunctionLibraryRuntime(
     const OptimizerOptions& optimizer_options,
     CustomKernelCreator custom_kernel_creator,
     DistributedFunctionLibraryRuntime* parent)
-    : device_mgr_(device_mgr), lib_def_(lib_def), parent_(parent) {
+    : device_mgr_(device_mgr),
+      lib_def_(lib_def),
+      next_handle_(0),
+      parent_(parent) {
   if (device_mgr == nullptr) {
     flr_map_[nullptr] = NewFunctionLibraryRuntime(
         nullptr, env, nullptr, graph_def_version, lib_def, optimizer_options,
@@ -81,16 +87,6 @@ ProcessFunctionLibraryRuntime::ProcessFunctionLibraryRuntime(
           device_mgr, env, graph_def_version, lib_def, optimizer_options,
           std::move(custom_kernel_creator), nullptr /* cluster_flr */) {}
 
-/* static */
-string ProcessFunctionLibraryRuntime::ObtainFunctionTarget(
-    const AttrSlice& attrs) {
-  const AttrValue* value;
-  if (!attrs.Find("_target", &value).ok()) {
-    return "";
-  }
-  return DeviceNameUtils::CanonicalizeDeviceName(value->s());
-}
-
 /* static */
 Status ProcessFunctionLibraryRuntime::SendTensors(
     const string& source_device, const string& target_device,
@@ -185,30 +181,38 @@ FunctionLibraryRuntime::Handle ProcessFunctionLibraryRuntime::AddHandle(
   FunctionLibraryRuntime::Handle h =
       gtl::FindWithDefault(table_, function_key, kInvalidHandle);
   if (h != kInvalidHandle) {
-    return h;
+    if (function_data_.count(h) != 0) return h;
   }
-  h = function_data_.size();
-  function_data_.emplace_back(device_name, local_handle);
+  h = next_handle_;
+  function_data_.insert({h, FunctionData(device_name, local_handle)});
   table_[function_key] = h;
+  next_handle_++;
   return h;
 }
 
 FunctionLibraryRuntime::Handle ProcessFunctionLibraryRuntime::GetHandle(
     const string& function_key) const {
   mutex_lock l(mu_);
-  return gtl::FindWithDefault(table_, function_key, kInvalidHandle);
+  FunctionLibraryRuntime::Handle h =
+      gtl::FindWithDefault(table_, function_key, kInvalidHandle);
+  if (h != kInvalidHandle) {
+    if (function_data_.count(h) == 0) return kInvalidHandle;
+  }
+  return h;
 }
 
 bool ProcessFunctionLibraryRuntime::IsInstantiatedOnDevice(
     const string& device_name, FunctionLibraryRuntime::Handle handle) {
-  return GetHandleOnDevice(device_name, handle) != -1;
+  return GetHandleOnDevice(device_name, handle) != kInvalidHandle;
 }
 
 FunctionLibraryRuntime::LocalHandle
 ProcessFunctionLibraryRuntime::GetHandleOnDevice(
     const string& device_name, FunctionLibraryRuntime::Handle handle) {
   mutex_lock l(mu_);
-  CHECK_LE(handle, function_data_.size());
+  if (function_data_.count(handle) == 0) {
+    return kInvalidLocalHandle;
+  }
   const FunctionData& function_data = function_data_[handle];
   if (function_data.target_device != device_name) {
     return kInvalidLocalHandle;
@@ -219,32 +223,56 @@ ProcessFunctionLibraryRuntime::GetHandleOnDevice(
 string ProcessFunctionLibraryRuntime::GetDeviceName(
     FunctionLibraryRuntime::Handle handle) {
   mutex_lock l(mu_);
-  CHECK_LE(handle, function_data_.size());
+  CHECK_EQ(1, function_data_.count(handle));
   const FunctionData& function_data = function_data_[handle];
   return function_data.target_device;
 }
 
 Status ProcessFunctionLibraryRuntime::Instantiate(
     const string& function_name, AttrSlice attrs,
+    const FunctionLibraryRuntime::InstantiateOptions& options,
     FunctionLibraryRuntime::Handle* handle) {
   *handle = kInvalidHandle;
-  string target = ObtainFunctionTarget(attrs);
-  FunctionLibraryRuntime* flr = GetFLR(target);
+  FunctionLibraryRuntime* flr = GetFLR(options.target);
   if (flr != nullptr) {
-    return flr->Instantiate(function_name, attrs, handle);
+    return flr->Instantiate(function_name, attrs, options, handle);
   }
   if (parent_ == nullptr) {
     return errors::Internal(
-        "Currently don't support instantiating functions on device: ", target);
+        "Currently don't support instantiating functions on device: ",
+        options.target);
   }
   FunctionLibraryRuntime::Handle cluster_handle;
-  TF_RETURN_IF_ERROR(
-      parent_->Instantiate(function_name, *lib_def_, attrs, &cluster_handle));
+  TF_RETURN_IF_ERROR(parent_->Instantiate(function_name, *lib_def_, attrs,
+                                          options, &cluster_handle));
   string function_key = Canonicalize(function_name, attrs);
-  *handle = AddHandle(function_key, target, cluster_handle);
+  *handle = AddHandle(function_key, options.target, cluster_handle);
+  return Status::OK();
+}
+
+Status ProcessFunctionLibraryRuntime::RemoveHandle(
+    FunctionLibraryRuntime::Handle handle) {
+  mutex_lock l(mu_);
+  function_data_.erase(handle);
   return Status::OK();
 }
 
+Status ProcessFunctionLibraryRuntime::ReleaseHandle(
+    FunctionLibraryRuntime::Handle handle) {
+  FunctionLibraryRuntime* flr = nullptr;
+  string target_device;
+  {
+    mutex_lock l(mu_);
+    CHECK_EQ(1, function_data_.count(handle));
+    target_device = function_data_[handle].target_device;
+  }
+  flr = GetFLR(target_device);
+  if (flr != nullptr) {
+    return flr->ReleaseHandle(handle);
+  }
+  return errors::InvalidArgument("Handle not found: ", handle);
+}
+
 void ProcessFunctionLibraryRuntime::Run(
     const FunctionLibraryRuntime::Options& opts,
     FunctionLibraryRuntime::Handle handle, gtl::ArraySlice<Tensor> args,
@@ -261,7 +289,10 @@ void ProcessFunctionLibraryRuntime::Run(
   FunctionLibraryRuntime::LocalHandle local_handle;
   {
     mutex_lock l(mu_);
-    CHECK_LE(handle, function_data_.size());
+    if (function_data_.count(handle) == 0) {
+      done(errors::NotFound("Handle: ", handle, " not found."));
+      return;
+    }
     target_device = function_data_[handle].target_device;
     local_handle = function_data_[handle].local_handle;
   }
diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.h b/tensorflow/core/common_runtime/process_function_library_runtime.h
index a267bc3601f990206f7fb5202f6186543e42eb19..38003b772630221f3681866309a1a83a526eb95c 100644
--- a/tensorflow/core/common_runtime/process_function_library_runtime.h
+++ b/tensorflow/core/common_runtime/process_function_library_runtime.h
@@ -53,11 +53,6 @@ class ProcessFunctionLibraryRuntime {
                                 const OptimizerOptions& optimizer_options,
                                 CustomKernelCreator custom_kernel_creator);
 
-  // Given a list of attrs on a function, extracts the "_target" attribute which
-  // indicates which device to run the function on. If it can't find the _target
-  // attribute, returns "". Canonicalizes the device name.
-  static string ObtainFunctionTarget(const AttrSlice& attrs);
-
   // Sends `tensors_to_send` from `source_device` to `target_device` using
   // `rendezvous`. `key_prefix` is used as a prefix for the keys sent to the
   // Rendezvous. `device_context` should be the DeviceContext of the device
@@ -121,8 +116,15 @@ class ProcessFunctionLibraryRuntime {
   // Allows for function_name to be instantiated on different devices
   // as specified in attrs.
   Status Instantiate(const string& function_name, AttrSlice attrs,
+                     const FunctionLibraryRuntime::InstantiateOptions& options,
                      FunctionLibraryRuntime::Handle* handle);
 
+  // Delegates to the local FLR that owns state corresponding to `handle` and
+  // tells it to release it. If the `handle` isnt' needed at all, the local FLR
+  // might call RemoveHandle on this to get rid of the state owned by the Proc
+  // FLR.
+  Status ReleaseHandle(FunctionLibraryRuntime::Handle handle);
+
   // Runs the function with given `handle`. Function could have been
   // instantiated on any device. More details in framework/function.h
   void Run(const FunctionLibraryRuntime::Options& opts,
@@ -140,6 +142,9 @@ class ProcessFunctionLibraryRuntime {
   // of the device where the function is registered.
   string GetDeviceName(FunctionLibraryRuntime::Handle handle);
 
+  // Removes handle from the state owned by this object.
+  Status RemoveHandle(FunctionLibraryRuntime::Handle handle);
+
   friend class FunctionLibraryRuntimeImpl;
 
   mutable mutex mu_;
@@ -151,6 +156,7 @@ class ProcessFunctionLibraryRuntime {
     FunctionData(const string& target_device,
                  FunctionLibraryRuntime::LocalHandle local_handle)
         : target_device(target_device), local_handle(local_handle) {}
+    FunctionData() : FunctionData("", -1) {}
   };
 
   const DeviceMgr* const device_mgr_;
@@ -158,8 +164,10 @@ class ProcessFunctionLibraryRuntime {
   // Holds all the function invocations here.
   std::unordered_map<string, FunctionLibraryRuntime::Handle> table_
       GUARDED_BY(mu_);
-  std::vector<FunctionData> function_data_ GUARDED_BY(mu_);
+  std::unordered_map<FunctionLibraryRuntime::Handle, FunctionData>
+      function_data_ GUARDED_BY(mu_);
   std::unordered_map<Device*, std::unique_ptr<FunctionLibraryRuntime>> flr_map_;
+  int next_handle_ GUARDED_BY(mu_);
   DistributedFunctionLibraryRuntime* const parent_;
 };
 
diff --git a/tensorflow/core/common_runtime/process_function_library_runtime_test.cc b/tensorflow/core/common_runtime/process_function_library_runtime_test.cc
index 6bc8f980c7ab508f80a7c85a8e557880b8a4ab58..439ba1ce965ebe4addb525cd3d17d794feaecd1f 100644
--- a/tensorflow/core/common_runtime/process_function_library_runtime_test.cc
+++ b/tensorflow/core/common_runtime/process_function_library_runtime_test.cc
@@ -49,10 +49,12 @@ class ProcessFunctionLibraryRuntimeTest : public ::testing::Test {
   }
 
   Status Run(const string& name, FunctionLibraryRuntime::Options opts,
-             test::function::Attrs attrs, const std::vector<Tensor>& args,
-             std::vector<Tensor*> rets) {
+             test::function::Attrs attrs,
+             const FunctionLibraryRuntime::InstantiateOptions& instantiate_opts,
+             const std::vector<Tensor>& args, std::vector<Tensor*> rets) {
     FunctionLibraryRuntime::Handle handle;
-    Status status = proc_flr_->Instantiate(name, attrs, &handle);
+    Status status =
+        proc_flr_->Instantiate(name, attrs, instantiate_opts, &handle);
     if (!status.ok()) {
       return status;
     }
@@ -82,6 +84,22 @@ class ProcessFunctionLibraryRuntimeTest : public ::testing::Test {
 
     EXPECT_GE(call_count, 1);  // Test runner is used.
 
+    // Release the handle and then try running the function. It shouldn't
+    // succeed.
+    status = proc_flr_->ReleaseHandle(handle);
+    if (!status.ok()) {
+      return status;
+    }
+    Notification done2;
+    proc_flr_->Run(opts, handle, args, &out,
+                   [&status, &done2](const Status& s) {
+                     status = s;
+                     done2.Notify();
+                   });
+    done2.WaitForNotification();
+    EXPECT_TRUE(errors::IsNotFound(status));
+    EXPECT_TRUE(StringPiece(status.error_message()).contains("not found."));
+
     return Status::OK();
   }
 
@@ -126,21 +144,6 @@ TEST_F(ProcessFunctionLibraryRuntimeTest, Basic) {
   rendezvous_->Unref();
 }
 
-TEST_F(ProcessFunctionLibraryRuntimeTest, ObtainFunctionTarget) {
-  AttrSlice empty_attrs;
-  string target =
-      ProcessFunctionLibraryRuntime::ObtainFunctionTarget(empty_attrs);
-  EXPECT_EQ("", target);
-
-  AttrValueMap attr_values;
-  AttrValue v;
-  v.set_s("/job:a/replica:0/task:0/cpu:1");
-  AddAttr("_target", v, &attr_values);
-  AttrSlice attrs(&attr_values);
-  target = ProcessFunctionLibraryRuntime::ObtainFunctionTarget(attrs);
-  EXPECT_EQ("/job:a/replica:0/task:0/device:CPU:1", target);
-}
-
 TEST_F(ProcessFunctionLibraryRuntimeTest, GetDeviceIncarnation) {
   Init({});
   int64 incarnation;
@@ -160,12 +163,12 @@ TEST_F(ProcessFunctionLibraryRuntimeTest, SingleCall) {
   opts.source_device = "/job:a/replica:0/task:0/cpu:0";
   opts.rendezvous = rendezvous_;
   opts.remote_execution = true;
+  FunctionLibraryRuntime::InstantiateOptions instantiate_opts;
+  instantiate_opts.target = "/job:a/replica:0/task:0/cpu:0";
   auto x = test::AsTensor<float>({1, 2, 3, 4});
   Tensor y;
   TF_CHECK_OK(
-      Run("XTimesTwo", opts,
-          {{"T", DT_FLOAT}, {"_target", "/job:a/replica:0/task:0/cpu:0"}}, {x},
-          {&y}));
+      Run("XTimesTwo", opts, {{"T", DT_FLOAT}}, instantiate_opts, {x}, {&y}));
   test::ExpectTensorEqual<float>(y, test::AsTensor<float>({2, 4, 6, 8}));
   rendezvous_->Unref();
 }
@@ -176,9 +179,10 @@ TEST_F(ProcessFunctionLibraryRuntimeTest, SingleCallFindDevice) {
   opts.source_device = "/job:a/replica:0/task:0/cpu:0";
   opts.rendezvous = rendezvous_;
   opts.remote_execution = true;
+  FunctionLibraryRuntime::InstantiateOptions instantiate_opts;
+  instantiate_opts.target = "/job:a/replica:0/task:0/cpu:0";
   Tensor y;
-  TF_CHECK_OK(Run("FindDevice", opts,
-                  {{"_target", "/job:a/replica:0/task:0/cpu:0"}}, {}, {&y}));
+  TF_CHECK_OK(Run("FindDevice", opts, {}, instantiate_opts, {}, {&y}));
   test::ExpectTensorEqual<string>(
       y, test::AsTensor<string>({"/job:a/replica:0/task:0/device:CPU:0"},
                                 TensorShape({})));
@@ -192,16 +196,14 @@ TEST_F(ProcessFunctionLibraryRuntimeTest, MultipleCallsSameDeviceXTimes) {
   opts.source_device = "/job:a/replica:0/task:0/cpu:0";
   opts.rendezvous = rendezvous_;
   opts.remote_execution = true;
+  FunctionLibraryRuntime::InstantiateOptions instantiate_opts;
+  instantiate_opts.target = "/job:a/replica:0/task:0/cpu:0";
   Tensor y;
   TF_CHECK_OK(
-      Run("XTimesTwo", opts,
-          {{"T", DT_FLOAT}, {"_target", "/job:a/replica:0/task:0/cpu:0"}}, {x},
-          {&y}));
+      Run("XTimesTwo", opts, {{"T", DT_FLOAT}}, instantiate_opts, {x}, {&y}));
   test::ExpectTensorEqual<float>(y, test::AsTensor<float>({2, 4, 6, 8}));
   TF_CHECK_OK(
-      Run("XTimesFour", opts,
-          {{"T", DT_FLOAT}, {"_target", "/job:a/replica:0/task:0/cpu:0"}}, {x},
-          {&y}));
+      Run("XTimesFour", opts, {{"T", DT_FLOAT}}, instantiate_opts, {x}, {&y}));
   test::ExpectTensorEqual<float>(y, test::AsTensor<float>({4, 8, 12, 16}));
   rendezvous_->Unref();
 }
@@ -212,14 +214,14 @@ TEST_F(ProcessFunctionLibraryRuntimeTest, MultipleCallsSameDeviceFindDevice) {
   opts.source_device = "/job:a/replica:0/task:0/cpu:0";
   opts.rendezvous = rendezvous_;
   opts.remote_execution = true;
+  FunctionLibraryRuntime::InstantiateOptions instantiate_opts;
+  instantiate_opts.target = "/job:a/replica:0/task:0/cpu:1";
   Tensor y;
-  TF_CHECK_OK(Run("FindDevice", opts,
-                  {{"_target", "/job:a/replica:0/task:0/cpu:1"}}, {}, {&y}));
+  TF_CHECK_OK(Run("FindDevice", opts, {}, instantiate_opts, {}, {&y}));
   test::ExpectTensorEqual<string>(
       y, test::AsTensor<string>({"/job:a/replica:0/task:0/device:CPU:1"},
                                 TensorShape({})));
-  TF_CHECK_OK(Run("FindDevice", opts,
-                  {{"_target", "/job:a/replica:0/task:0/cpu:1"}}, {}, {&y}));
+  TF_CHECK_OK(Run("FindDevice", opts, {}, instantiate_opts, {}, {&y}));
   test::ExpectTensorEqual<string>(
       y, test::AsTensor<string>({"/job:a/replica:0/task:0/device:CPU:1"},
                                 TensorShape({})));
@@ -233,11 +235,15 @@ TEST_F(ProcessFunctionLibraryRuntimeTest, MultipleCallsDiffDeviceFindDevice) {
   opts.rendezvous = rendezvous_;
   opts.remote_execution = true;
   Tensor y;
-  TF_CHECK_OK(Run("FindDevice", opts, {{"_target", "/cpu:0"}}, {}, {&y}));
+  FunctionLibraryRuntime::InstantiateOptions instantiate_opts_0;
+  instantiate_opts_0.target = "/job:a/replica:0/task:0/device:CPU:0";
+  TF_CHECK_OK(Run("FindDevice", opts, {}, instantiate_opts_0, {}, {&y}));
   test::ExpectTensorEqual<string>(
       y, test::AsTensor<string>({"/job:a/replica:0/task:0/device:CPU:0"},
                                 TensorShape({})));
-  TF_CHECK_OK(Run("FindDevice", opts, {{"_target", "/cpu:1"}}, {}, {&y}));
+  FunctionLibraryRuntime::InstantiateOptions instantiate_opts_1;
+  instantiate_opts_1.target = "/job:a/replica:0/task:0/device:CPU:1";
+  TF_CHECK_OK(Run("FindDevice", opts, {}, instantiate_opts_1, {}, {&y}));
   test::ExpectTensorEqual<string>(
       y, test::AsTensor<string>({"/job:a/replica:0/task:0/device:CPU:1"},
                                 TensorShape({})));
diff --git a/tensorflow/core/common_runtime/rendezvous_util.cc b/tensorflow/core/common_runtime/rendezvous_util.cc
index a1e31016c2bc93aeae76175320255e0d43602265..92dc03812e9941e07500a9dc26baa7c1227430dc 100644
--- a/tensorflow/core/common_runtime/rendezvous_util.cc
+++ b/tensorflow/core/common_runtime/rendezvous_util.cc
@@ -32,6 +32,10 @@ Status SendTensorsToRendezvous(
         "; alloc_attrs.size() = ", alloc_attrs.size());
   }
 
+  if (!rendezvous) {
+    return errors::InvalidArgument("Rendezvous is null.");
+  }
+
   Rendezvous::ParsedKey parsed;
   for (int i = 0; i < keys.size(); ++i) {
     Rendezvous::Args rendez_args;
diff --git a/tensorflow/core/common_runtime/shape_refiner.cc b/tensorflow/core/common_runtime/shape_refiner.cc
index c82d57694a72f1f66381f957e8654ad1b13db21c..45cdab98e0642a3fbfee3dfa415696b98251600a 100644
--- a/tensorflow/core/common_runtime/shape_refiner.cc
+++ b/tensorflow/core/common_runtime/shape_refiner.cc
@@ -127,7 +127,7 @@ Status InferShapesForFunctionSubNode(const Node* node, ShapeRefiner* refiner,
 //
 // NOTE: Recursive user-defined functions are not supported.
 // Maybe we won't support recursive functions at all in TF, because of
-// other maintanabilty issues.
+// other maintainability issues.
 Status ShapeRefiner::InferShapesForFunction(
     const tensorflow::FunctionDef* function_def, bool keep_nested_shapes,
     ExtendedInferenceContext* outer_context) {
@@ -558,6 +558,13 @@ Status ShapeRefiner::ExtractConstantSubgraph(
     return Status::OK();
   }
 
+  if (target_node->type_string() == "PlaceholderWithDefault") {
+    return Status::OK();
+  }
+
+  // TODO(skyewm): more of the filtering applied in input nodes below should be
+  // applied to target_node here
+
   struct NodeAndRecursed {
     Node* new_node = nullptr;
     bool recursed = false;
@@ -608,6 +615,14 @@ Status ShapeRefiner::ExtractConstantSubgraph(
       return Status::OK();
     }
 
+    // Placeholders should never be constant folded because their outputs are
+    // fed by the user. Note that "Placeholder" nodes have no inputs so are
+    // handled below.
+    if (current_node->type_string() == "PlaceholderWithDefault") {
+      *is_constant_graph = false;
+      return Status::OK();
+    }
+
     // If there is nothing more to recurse down, see if
     // the generator node is a constant.
     if (current_node->num_inputs() == 0) {
diff --git a/tensorflow/core/common_runtime/shape_refiner_test.cc b/tensorflow/core/common_runtime/shape_refiner_test.cc
index e4eef1dbe28bc79d2838b90ba6595a04ad1e4e2e..adf5a9afff2ebc6848db8811506ebd4a031df2bb 100644
--- a/tensorflow/core/common_runtime/shape_refiner_test.cc
+++ b/tensorflow/core/common_runtime/shape_refiner_test.cc
@@ -724,6 +724,25 @@ TEST_F(ShapeRefinerTest, PropagateRange) {
   EXPECT_EQ("[1,4,7,10]", ctx->DebugString(ctx->output(0)));
 }
 
+// Make sure PlaceholderWithDefaults aren't treated as constants.
+TEST_F(ShapeRefinerTest, NoPropagatePlaceholderWithDefault) {
+  Scope root = Scope::NewRootScope();
+  auto constant = ops::Const<int>(root, 2);
+  auto placeholder =
+      ops::PlaceholderWithDefault(root, constant, PartialTensorShape());
+  Node* shape_data;
+  TF_ASSERT_OK(NodeBuilder("Test", "ShapeData")
+                   .Input(placeholder.node())
+                   .Finalize(root.graph(), &shape_data));
+
+  ShapeRefiner m(TF_GRAPH_DEF_VERSION, OpRegistry::Global());
+  TF_ASSERT_OK(m.AddNode(constant.node()));
+  TF_ASSERT_OK(m.AddNode(placeholder.node()));
+  TF_ASSERT_OK(m.AddNode(shape_data));
+  shape_inference::InferenceContext* ic = m.GetContext(shape_data);
+  EXPECT_EQ(ic->DebugString(ic->output(0)), "?");
+}
+
 TEST_F(ShapeRefinerTest, ConstantValueTwoInputsToSameNode) {
   Scope root = Scope::NewRootScope();
   // This node is used as two inputs to 'range'.
diff --git a/tensorflow/core/common_runtime/sycl/sycl_allocator.cc b/tensorflow/core/common_runtime/sycl/sycl_allocator.cc
index 65b0db5bf610989f2b6187cb85f8c5e6ecae14d5..9094824ee734a9398db5aca2a507af4acd07c26b 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_allocator.cc
+++ b/tensorflow/core/common_runtime/sycl/sycl_allocator.cc
@@ -71,6 +71,13 @@ void SYCLAllocator::GetStats(AllocatorStats* stats) {
   *stats = stats_;
 }
 
+void SYCLAllocator::ClearStats() override {
+  mutex_lock l(mu_);
+  stats_.num_allocs = 0;
+  stats_.max_bytes_in_use = stats_.bytes_in_use;
+  stats_.max_alloc_size = 0;
+}
+
 size_t SYCLAllocator::RequestedSize(void* ptr) {
   mutex_lock lock(mu_);
   if(!sycl_device_) {
diff --git a/tensorflow/core/common_runtime/sycl/sycl_allocator.h b/tensorflow/core/common_runtime/sycl/sycl_allocator.h
index 3066e0e4426cbe6688307cf2ee9aae6b8c2e7d34..cca9f92c62e2a4f4d57c8a6111b53dccee505f93 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_allocator.h
+++ b/tensorflow/core/common_runtime/sycl/sycl_allocator.h
@@ -44,6 +44,8 @@ class SYCLAllocator : public Allocator {
   }
   bool Ok() { return sycl_device_ && sycl_device_->ok(); }
   void GetStats(AllocatorStats* stats) override;
+  void ClearStats() override;
+
   // The SYCL buffers keep track of their size, so we already have tracking.
   bool TracksAllocationSizes() override { return true; }
   // Get the size of the corresponding SYCL buffer.
diff --git a/tensorflow/core/debug/BUILD b/tensorflow/core/debug/BUILD
index 6d796768de0a33f0f16e6954474ac95b92ec29e4..a32badef6dfdb8b62662da880c99842b1cafd13c 100644
--- a/tensorflow/core/debug/BUILD
+++ b/tensorflow/core/debug/BUILD
@@ -56,6 +56,7 @@ tf_proto_library(
     cc_grpc_version = 1,
     protodeps = [
         ":debugger_event_metadata_proto",
+        "//tensorflow/core/profiler:protos_all",
     ] + tf_additional_all_protos(),
     visibility = ["//tensorflow:__subpackages__"],
 )
@@ -123,6 +124,7 @@ tf_cuda_library(
     deps = [
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
+        "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:proto_text",
@@ -144,6 +146,7 @@ tf_cuda_library(
         ":debugger_event_metadata_proto_cc",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
+        "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:proto_text",
diff --git a/tensorflow/core/debug/debug_service.proto b/tensorflow/core/debug/debug_service.proto
index 547c0576f08769f9e373a98231caf172a9312937..4bef74dfc5706b0033ff91b5e6cf09bb119d657d 100644
--- a/tensorflow/core/debug/debug_service.proto
+++ b/tensorflow/core/debug/debug_service.proto
@@ -18,6 +18,8 @@ syntax = "proto3";
 package tensorflow;
 
 import "tensorflow/core/framework/tensor.proto";
+import "tensorflow/core/profiler/tfprof_log.proto";
+import "tensorflow/core/protobuf/debug.proto";
 import "tensorflow/core/util/event.proto";
 
 // Reply message from EventListener to the client, i.e., to the source of the
@@ -46,6 +48,38 @@ message EventReply {
   // during debugging.
 }
 
+// Data on the traceback of a debugged call, e.g., a Session.run() call, or the
+// execution of an eager operation.
+message CallTraceback {
+  enum CallType {
+    UNSPECIFIED = 0;
+    GRAPH_EXECUTION = 1;
+    EAGER_EXECUTION = 2;
+  }
+
+  CallType call_type = 1;
+
+  // A key for the call. For example, for graph execution, this is a key
+  // consisting of the names of the fed and fetched tensors.
+  string call_key = 2;
+
+  // Traceback stack for the origin of the call event.
+  // For graph execution, this is the stack of the Session.run() call.
+  // For eager execution, this is the stack of the Python line that invokes
+  // the execution of the eager op.
+  tfprof.CodeDef origin_stack = 3;
+
+  // Keeps track of the mapping from integer IDs in `origin_stack` to actual
+  // string values (e.g., file paths, function names).
+  map<int64, string> origin_id_to_string = 4;
+
+  // Traceback for the graph (if any) involved in the call.
+  tfprof.OpLogProto graph_traceback = 5;
+
+  // Version of the graph in `graph_traceback` (if any).
+  int64 graph_version = 6;
+}
+
 // EventListener: Receives Event protos, e.g., from debugged TensorFlow
 // runtime(s).
 service EventListener {
@@ -57,4 +91,10 @@ service EventListener {
   //      ops that get executed immediately after the beginning of the graph
   //      execution.
   rpc SendEvents(stream Event) returns (stream EventReply);
+
+  // Send the tracebacks of a TensorFlow execution call.
+  rpc SendTracebacks(CallTraceback) returns (EventReply);
+
+  // Send a collection of source code files being debugged.
+  rpc SendSourceFiles(DebuggedSourceFiles) returns (EventReply);
 }
diff --git a/tensorflow/core/distributed_runtime/BUILD b/tensorflow/core/distributed_runtime/BUILD
index 29164bbffe0fa586d0ca7ac72ee94c4fbb91aa3e..2db7ebd7952c9e1edf374267ee33f697eb846885 100644
--- a/tensorflow/core/distributed_runtime/BUILD
+++ b/tensorflow/core/distributed_runtime/BUILD
@@ -269,7 +269,6 @@ cc_library(
     hdrs = ["worker_cache_wrapper.h"],
     deps = [
         ":worker_cache",
-        ":worker_interface",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
     ],
@@ -335,6 +334,7 @@ cc_library(
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
+        "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:master_proto_cc",
@@ -372,6 +372,7 @@ cc_library(
     deps = [
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
+        "//tensorflow/core:graph",
         "//tensorflow/core:tensorflow_opensource",
     ],
 )
@@ -415,6 +416,7 @@ cc_library(
         ":worker_env",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
+        "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
diff --git a/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc b/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc
index 593fe0e363edc543a74572ed51128777e048a47d..3a8d5912369525253904bd700dfdc6e3eb26e0ae 100644
--- a/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc
+++ b/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc
@@ -26,10 +26,10 @@ namespace tensorflow {
 
 /* static */
 Status ClusterFunctionLibraryRuntime::ConstructFunctionGraph(
-    const OpDef& sig, AttrSlice attrs, GraphDef* g,
+    const OpDef& sig, AttrSlice attrs,
+    const FunctionLibraryRuntime::InstantiateOptions& options, GraphDef* g,
     std::vector<string>* send_keys, std::vector<string>* recv_keys) {
-  const string& target =
-      ProcessFunctionLibraryRuntime::ObtainFunctionTarget(attrs);
+  const string& target = options.target;
   // Construct recv nodes for each input argument.
   int i = 0;
   for (const auto& in : sig.input_arg()) {
@@ -105,6 +105,7 @@ Status ClusterFunctionLibraryRuntime::ConstructFunctionGraph(
         Rendezvous::CreateKey(target, 1 /* src_incarnation */, target,
                               out.name(), FrameAndIter(0, 0));
     recv_keys->push_back(key);
+    ++i;
   }
   return Status::OK();
 }
@@ -118,14 +119,17 @@ ClusterFunctionLibraryRuntime::~ClusterFunctionLibraryRuntime() {
 
 Status ClusterFunctionLibraryRuntime::Instantiate(
     const string& function_name, const FunctionLibraryDefinition& lib_def,
-    AttrSlice attrs, FunctionLibraryRuntime::LocalHandle* handle) {
-  const string& target =
-      ProcessFunctionLibraryRuntime::ObtainFunctionTarget(attrs);
-  WorkerInterface* wi = worker_session_->worker_cache->CreateWorker(target);
+    AttrSlice attrs, const FunctionLibraryRuntime::InstantiateOptions& options,
+    FunctionLibraryRuntime::LocalHandle* handle) {
+  WorkerInterface* wi =
+      worker_session_->worker_cache->CreateWorker(options.target);
 
   if (wi == nullptr) {
-    return errors::InvalidArgument("Could not find worker with target: ",
-                                   target);
+    std::vector<string> workers;
+    worker_session_->worker_cache->ListWorkers(&workers);
+    return errors::InvalidArgument(
+        "Could not find worker with target: ", options.target,
+        " Available workers: ", str_util::Join(workers, ", "));
   }
 
   // Make RPC and obtain a graph handle.
@@ -133,8 +137,8 @@ Status ClusterFunctionLibraryRuntime::Instantiate(
   const OpDef& sig = fdef->signature();
   GraphDef gdef;
   std::vector<string> send_keys, recv_keys;
-  TF_RETURN_IF_ERROR(
-      ConstructFunctionGraph(sig, attrs, &gdef, &send_keys, &recv_keys));
+  TF_RETURN_IF_ERROR(ConstructFunctionGraph(sig, attrs, options, &gdef,
+                                            &send_keys, &recv_keys));
   *gdef.mutable_library() = lib_def.ToProto();
 
   RegisterGraphRequest req;
@@ -148,8 +152,8 @@ Status ClusterFunctionLibraryRuntime::Instantiate(
 
   mutex_lock l(mu_);
   *handle = function_data_.size();
-  function_data_.push_back(
-      FunctionData(resp.graph_handle(), target, wi, send_keys, recv_keys));
+  function_data_.push_back(FunctionData(resp.graph_handle(), options.target, wi,
+                                        send_keys, recv_keys));
   return Status::OK();
 }
 
diff --git a/tensorflow/core/distributed_runtime/cluster_function_library_runtime.h b/tensorflow/core/distributed_runtime/cluster_function_library_runtime.h
index dd4ea68f57121d491f6352cbf13ed47b7ecf6bd4..3deb80dff79e7f54684b39d4bd17a63b99836eab 100644
--- a/tensorflow/core/distributed_runtime/cluster_function_library_runtime.h
+++ b/tensorflow/core/distributed_runtime/cluster_function_library_runtime.h
@@ -34,6 +34,7 @@ class ClusterFunctionLibraryRuntime : public DistributedFunctionLibraryRuntime {
 
   Status Instantiate(const string& function_name,
                      const FunctionLibraryDefinition& lib_def, AttrSlice attrs,
+                     const FunctionLibraryRuntime::InstantiateOptions& options,
                      FunctionLibraryRuntime::LocalHandle* handle) override;
 
   void Run(const FunctionLibraryRuntime::Options& opts,
@@ -42,10 +43,10 @@ class ClusterFunctionLibraryRuntime : public DistributedFunctionLibraryRuntime {
            FunctionLibraryRuntime::DoneCallback done) override;
 
  private:
-  static Status ConstructFunctionGraph(const OpDef& sig, AttrSlice attrs,
-                                       GraphDef* g,
-                                       std::vector<string>* send_keys,
-                                       std::vector<string>* recv_keys);
+  static Status ConstructFunctionGraph(
+      const OpDef& sig, AttrSlice attrs,
+      const FunctionLibraryRuntime::InstantiateOptions& options, GraphDef* g,
+      std::vector<string>* send_keys, std::vector<string>* recv_keys);
   friend class ClusterFunctionLibraryRuntimeTest;
 
   mutable mutex mu_;
diff --git a/tensorflow/core/distributed_runtime/cluster_function_library_runtime_test.cc b/tensorflow/core/distributed_runtime/cluster_function_library_runtime_test.cc
index 04587dd8ca8638d031d840b0b53b5168bdab63c2..1810996ab8c2a8e4901c007517ae276829b4fc2a 100644
--- a/tensorflow/core/distributed_runtime/cluster_function_library_runtime_test.cc
+++ b/tensorflow/core/distributed_runtime/cluster_function_library_runtime_test.cc
@@ -47,30 +47,31 @@ class ClusterFunctionLibraryRuntimeTest : public ::testing::Test {
         new ClusterFunctionLibraryRuntime(worker_session_.get()));
   }
 
-  Status ConstructFunctionGraphHelper(const OpDef& sig,
-                                      test::function::Attrs attrs, GraphDef* g,
-                                      std::vector<string>* send_keys,
-                                      std::vector<string>* recv_keys) {
+  Status ConstructFunctionGraphHelper(
+      const OpDef& sig, test::function::Attrs attrs,
+      const FunctionLibraryRuntime::InstantiateOptions& options, GraphDef* g,
+      std::vector<string>* send_keys, std::vector<string>* recv_keys) {
     return ClusterFunctionLibraryRuntime::ConstructFunctionGraph(
-        sig, attrs, g, send_keys, recv_keys);
+        sig, attrs, options, g, send_keys, recv_keys);
   }
 
   Status Instantiate(const string& function_name,
                      const FunctionLibraryDefinition& lib_def,
                      test::function::Attrs attrs,
+                     const FunctionLibraryRuntime::InstantiateOptions& options,
                      FunctionLibraryRuntime::LocalHandle* local_handle) {
-    return cluster_flr_->Instantiate(function_name, lib_def, attrs,
+    return cluster_flr_->Instantiate(function_name, lib_def, attrs, options,
                                      local_handle);
   }
 
-  Status InstantiateAndRun(const string& function_name,
-                           const FunctionLibraryDefinition& lib_def,
-                           test::function::Attrs attrs,
-                           const std::vector<Tensor>& args,
-                           std::vector<Tensor*> rets) {
+  Status InstantiateAndRun(
+      const string& function_name, const FunctionLibraryDefinition& lib_def,
+      test::function::Attrs attrs,
+      const FunctionLibraryRuntime::InstantiateOptions& options,
+      const std::vector<Tensor>& args, std::vector<Tensor*> rets) {
     FunctionLibraryRuntime::LocalHandle handle;
-    TF_RETURN_IF_ERROR(
-        cluster_flr_->Instantiate(function_name, lib_def, attrs, &handle));
+    TF_RETURN_IF_ERROR(cluster_flr_->Instantiate(function_name, lib_def, attrs,
+                                                 options, &handle));
 
     Notification done;
     FunctionLibraryRuntime::Options opts;
@@ -102,15 +103,15 @@ class ClusterFunctionLibraryRuntimeTest : public ::testing::Test {
 TEST_F(ClusterFunctionLibraryRuntimeTest, ConstructFunctionGraph) {
   GraphDef actual;
   std::vector<string> send_keys, recv_keys;
-  TF_CHECK_OK(ConstructFunctionGraphHelper(
-      test::function::XTimesTwo().signature(),
-      {{"T", DT_FLOAT}, {"_target", "/job:a/replica:0/task:0/cpu:0"}}, &actual,
-      &send_keys, &recv_keys));
-
+  FunctionLibraryRuntime::InstantiateOptions instantiate_opts;
+  instantiate_opts.target = "/job:a/replica:0/task:0/device:CPU:0";
+  TF_CHECK_OK(ConstructFunctionGraphHelper(test::function::Swap().signature(),
+                                           {{"T", DT_FLOAT}}, instantiate_opts,
+                                           &actual, &send_keys, &recv_keys));
   GraphDef expected;
   protobuf::TextFormat::ParseFromString(R"(
 node {
-  name: "_recv_x_0"
+  name: "_recv_i0_0"
   op: "_Recv"
   device: "/job:a/replica:0/task:0/device:CPU:0"
   attr {
@@ -140,7 +141,7 @@ node {
   attr {
     key: "tensor_name"
     value {
-      s: "x"
+      s: "i0"
     }
   }
   attr {
@@ -151,9 +152,51 @@ node {
   }
 }
 node {
-  name: "XTimesTwo"
-  op: "XTimesTwo"
-  input: "_recv_x_0"
+  name: "_recv_i1_1"
+  op: "_Recv"
+  device: "/job:a/replica:0/task:0/device:CPU:0"
+  attr {
+    key: "client_terminated"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "recv_device"
+    value {
+      s: "/job:a/replica:0/task:0/device:CPU:0"
+    }
+  }
+  attr {
+    key: "send_device"
+    value {
+      s: "/job:a/replica:0/task:0/device:CPU:0"
+    }
+  }
+  attr {
+    key: "send_device_incarnation"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "tensor_name"
+    value {
+      s: "i1"
+    }
+  }
+  attr {
+    key: "tensor_type"
+    value {
+      type: DT_FLOAT
+    }
+  }
+}
+node {
+  name: "Swap"
+  op: "Swap"
+  input: "_recv_i0_0"
+  input: "_recv_i1_1"
   device: "/job:a/replica:0/task:0/device:CPU:0"
   attr {
     key: "T"
@@ -169,9 +212,51 @@ node {
   }
 }
 node {
-  name: "_send_y_0"
+  name: "_send_o0_0"
+  op: "_Send"
+  input: "Swap"
+  device: "/job:a/replica:0/task:0/device:CPU:0"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "client_terminated"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "recv_device"
+    value {
+      s: "/job:a/replica:0/task:0/device:CPU:0"
+    }
+  }
+  attr {
+    key: "send_device"
+    value {
+      s: "/job:a/replica:0/task:0/device:CPU:0"
+    }
+  }
+  attr {
+    key: "send_device_incarnation"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "tensor_name"
+    value {
+      s: "o0"
+    }
+  }
+}
+node {
+  name: "_send_o1_1"
   op: "_Send"
-  input: "XTimesTwo"
+  input: "Swap:1"
   device: "/job:a/replica:0/task:0/device:CPU:0"
   attr {
     key: "T"
@@ -206,10 +291,11 @@ node {
   attr {
     key: "tensor_name"
     value {
-      s: "y"
+      s: "o1"
     }
   }
-})",
+}
+)",
                                         &expected);
   TF_EXPECT_GRAPH_EQ(expected, actual);
 }
@@ -222,28 +308,30 @@ TEST_F(ClusterFunctionLibraryRuntimeTest, DISABLED_InstantiateAndRun) {
   FunctionDefLibrary proto;
   *(proto.add_function()) = test::function::XTimesTwoInt32();
   FunctionLibraryDefinition lib_def(OpRegistry::Global(), proto);
+  FunctionLibraryRuntime::InstantiateOptions instantiate_opts;
+  instantiate_opts.target = "/job:localhost/replica:0/task:1/cpu:0";
 
   Tensor y;
   auto x = test::AsTensor<int32>({1, 2, 3, 4});
-  TF_EXPECT_OK(InstantiateAndRun(
-      "XTimesTwoInt32", lib_def,
-      {{"_target", "/job:localhost/replica:0/task:1/cpu:0"}}, {x}, {&y}));
+  TF_EXPECT_OK(InstantiateAndRun("XTimesTwoInt32", lib_def, {},
+                                 instantiate_opts, {x}, {&y}));
   test::ExpectTensorEqual<int32>(y, test::AsTensor<int32>({2, 4, 6, 8}));
 }
 
 TEST_F(ClusterFunctionLibraryRuntimeTest,
        DISABLED_InstantiateAndRunAttrSubstitution) {
   FunctionDefLibrary proto;
-  *(proto.add_function()) = test::function::XTimesTwo();
+  *(proto.add_function()) = test::function::Swap();
   FunctionLibraryDefinition lib_def(OpRegistry::Global(), proto);
-
-  Tensor y;
-  auto x = test::AsTensor<float>({1, 2, 3, 4});
-  TF_EXPECT_OK(InstantiateAndRun(
-      "XTimesTwo", lib_def,
-      {{"T", DT_FLOAT}, {"_target", "/job:localhost/replica:0/task:1/cpu:0"}},
-      {x}, {&y}));
-  test::ExpectTensorEqual<float>(y, test::AsTensor<float>({2, 4, 6, 8}));
+  FunctionLibraryRuntime::InstantiateOptions instantiate_opts;
+  instantiate_opts.target = "/job:localhost/replica:0/task:1/cpu:0";
+  Tensor y1, y2;
+  auto x1 = test::AsTensor<float>({1, 2, 3, 4});
+  auto x2 = test::AsTensor<float>({4, 3, 2, 1});
+  TF_EXPECT_OK(InstantiateAndRun("Swap", lib_def, {{"T", DT_FLOAT}},
+                                 instantiate_opts, {x1, x2}, {&y1, &y2}));
+  test::ExpectTensorEqual<float>(y1, test::AsTensor<float>({4, 3, 2, 1}));
+  test::ExpectTensorEqual<float>(y2, test::AsTensor<float>({1, 2, 3, 4}));
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/distributed_runtime/graph_mgr.cc b/tensorflow/core/distributed_runtime/graph_mgr.cc
index 60d58af61dad56fbb09df041fb5ca1429fd451ad..0120f612ac8bee32999304b1a6f63fff3802606a 100644
--- a/tensorflow/core/distributed_runtime/graph_mgr.cc
+++ b/tensorflow/core/distributed_runtime/graph_mgr.cc
@@ -228,8 +228,14 @@ Status GraphMgr::InitItem(const string& session, const GraphDef& gdef,
     params.function_library = lib;
     params.create_kernel = [session, lib, opseg](const NodeDef& ndef,
                                                  OpKernel** kernel) {
-      // Caches the kernel only if the node is stateful.
-      if (!lib->IsStateful(ndef.op())) {
+      // We do not share the kernel via the OpSegment if the node is
+      // stateless, or a function.
+      // NOTE(mrry): We must not share function kernels (implemented
+      // using `CallOp`) between subgraphs, because `CallOp::handle_`
+      // is tied to a particular subgraph. Even if the function itself
+      // is stateful, the `CallOp` that invokes it is not.
+      if (!lib->IsStateful(ndef.op()) ||
+          lib->GetFunctionLibraryDefinition()->Find(ndef.op()) != nullptr) {
         return lib->CreateKernel(ndef, kernel);
       }
       auto create_fn = [lib, &ndef](OpKernel** kernel) {
@@ -475,8 +481,18 @@ void GraphMgr::StartParallelExecutors(const string& handle, int64 step_id,
   using std::placeholders::_1;
   // Line below is equivalent to this code, but does one less indirect call:
   //  args.runner = [pool](std::function<void()> fn) { pool->Schedule(fn); };
-  args.runner = std::bind(&thread::ThreadPool::Schedule, pool, _1);
+  auto default_runner = std::bind(&thread::ThreadPool::Schedule, pool, _1);
   for (const auto& unit : item->units) {
+    // TODO(zhengxq): if the device picks its own threadpool, we need to assign
+    //     less threads to the main compute pool by default.
+    thread::ThreadPool* device_thread_pool =
+        unit.device->tensorflow_device_thread_pool();
+    if (!device_thread_pool) {
+      args.runner = default_runner;
+    } else {
+      args.runner =
+          std::bind(&thread::ThreadPool::Schedule, device_thread_pool, _1);
+    }
     unit.root->RunAsync(args, barrier->Get());
   }
 }
diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc
index 03b65d8cba9112e272f52518ca6050ce5f16eb5d..dcc25e4426df386da2543f76239a1468af4bc3d2 100644
--- a/tensorflow/core/distributed_runtime/master_session.cc
+++ b/tensorflow/core/distributed_runtime/master_session.cc
@@ -446,7 +446,13 @@ class RunManyGraphs {
   // When the index-th call is done, updates the overall status.
   void WhenDone(int index, const Status& s) {
     TRACEPRINTF("Partition %d %s", index, s.ToString().c_str());
-    if (!s.ok()) {
+    auto resp = get(index)->resp.get();
+    if (resp->status_code() != error::Code::OK) {
+      // resp->status_code will only be non-OK if s.ok().
+      mutex_lock l(mu_);
+      UpdateStatusLocked(
+          Status(resp->status_code(), resp->status_error_message()));
+    } else if (!s.ok()) {
       mutex_lock l(mu_);
       UpdateStatusLocked(s);
     }
@@ -539,6 +545,7 @@ Status MasterSession::ReffedClientGraph::RunPartitions(
     c->req->set_graph_handle(part.graph_handle);
     c->req->set_step_id(step_id);
     *c->req->mutable_exec_opts() = exec_opts;
+    c->req->set_store_errors_in_response_body(true);
     // If any feeds are provided, send the feed values together
     // in the RunGraph request.
     // In the partial case, we only want to include feeds provided in the req.
diff --git a/tensorflow/core/distributed_runtime/message_wrappers.cc b/tensorflow/core/distributed_runtime/message_wrappers.cc
index a4a88e6e3b9ec734c0720b715dc9b3e30850c0ae..66ebb3080af7cd01021362b5ea0c0b54458aebfc 100644
--- a/tensorflow/core/distributed_runtime/message_wrappers.cc
+++ b/tensorflow/core/distributed_runtime/message_wrappers.cc
@@ -93,6 +93,15 @@ const RunOptions& InMemoryRunStepRequest::options() const { return options_; }
 
 RunOptions* InMemoryRunStepRequest::mutable_options() { return &options_; }
 
+bool InMemoryRunStepRequest::store_errors_in_response_body() const {
+  return store_errors_in_response_body_;
+}
+
+void InMemoryRunStepRequest::set_store_errors_in_response_body(
+    bool store_errors) {
+  store_errors_in_response_body_ = store_errors;
+}
+
 string InMemoryRunStepRequest::DebugString() const {
   return ToProto().DebugString();
 }
@@ -192,6 +201,15 @@ RunOptions* MutableProtoRunStepRequest::mutable_options() {
   return request_.mutable_options();
 }
 
+bool MutableProtoRunStepRequest::store_errors_in_response_body() const {
+  return request_.store_errors_in_response_body();
+}
+
+void MutableProtoRunStepRequest::set_store_errors_in_response_body(
+    bool store_errors) {
+  request_.set_store_errors_in_response_body(store_errors);
+}
+
 string MutableProtoRunStepRequest::DebugString() const {
   return request_.DebugString();
 }
@@ -250,6 +268,10 @@ const RunOptions& ProtoRunStepRequest::options() const {
   return request_->options();
 }
 
+bool ProtoRunStepRequest::store_errors_in_response_body() const {
+  return request_->store_errors_in_response_body();
+}
+
 string ProtoRunStepRequest::DebugString() const {
   return request_->DebugString();
 }
@@ -329,6 +351,15 @@ void InMemoryRunGraphRequest::set_is_last_partial_run(
   is_last_partial_run_ = is_last_partial_run;
 }
 
+bool InMemoryRunGraphRequest::store_errors_in_response_body() const {
+  return store_errors_in_response_body_;
+}
+
+void InMemoryRunGraphRequest::set_store_errors_in_response_body(
+    bool store_errors) {
+  store_errors_in_response_body_ = store_errors;
+}
+
 const RunGraphRequest& InMemoryRunGraphRequest::ToProto() const {
   if (!proto_version_) {
     proto_version_.reset(new RunGraphRequest);
@@ -437,6 +468,15 @@ void MutableProtoRunGraphRequest::set_is_last_partial_run(
   request_.set_is_last_partial_run(is_last_partial_run);
 }
 
+bool MutableProtoRunGraphRequest::store_errors_in_response_body() const {
+  return request_.store_errors_in_response_body();
+}
+
+void MutableProtoRunGraphRequest::set_store_errors_in_response_body(
+    bool store_errors) {
+  request_.set_store_errors_in_response_body(store_errors);
+}
+
 const RunGraphRequest& MutableProtoRunGraphRequest::ToProto() const {
   return request_;
 }
@@ -486,6 +526,10 @@ bool ProtoRunGraphRequest::is_last_partial_run() const {
   return request_->is_last_partial_run();
 }
 
+bool ProtoRunGraphRequest::store_errors_in_response_body() const {
+  return request_->store_errors_in_response_body();
+}
+
 const RunGraphRequest& ProtoRunGraphRequest::ToProto() const {
   return *request_;
 }
@@ -518,6 +562,18 @@ CostGraphDef* InMemoryRunGraphResponse::mutable_cost_graph() {
   return &cost_graph_;
 }
 
+errors::Code InMemoryRunGraphResponse::status_code() const {
+  return status_.code();
+}
+
+const string& InMemoryRunGraphResponse::status_error_message() const {
+  return status_.error_message();
+}
+
+void InMemoryRunGraphResponse::set_status(const Status& status) {
+  status_ = status;
+}
+
 RunGraphResponse* InMemoryRunGraphResponse::get_proto() {
   LOG(FATAL) << "Cannot get a mutable protobuf for an InMemoryRunGraphResponse";
   return nullptr;
@@ -574,6 +630,19 @@ CostGraphDef* OwnedProtoRunGraphResponse::mutable_cost_graph() {
   return response_.mutable_cost_graph();
 }
 
+errors::Code OwnedProtoRunGraphResponse::status_code() const {
+  return response_.status_code();
+}
+
+const string& OwnedProtoRunGraphResponse::status_error_message() const {
+  return response_.status_error_message();
+}
+
+void OwnedProtoRunGraphResponse::set_status(const Status& status) {
+  response_.set_status_code(status.code());
+  response_.set_status_error_message(status.error_message());
+}
+
 RunGraphResponse* OwnedProtoRunGraphResponse::get_proto() { return &response_; }
 
 size_t OwnedProtoRunGraphResponse::num_partition_graphs() const {
@@ -632,6 +701,19 @@ CostGraphDef* NonOwnedProtoRunGraphResponse::mutable_cost_graph() {
   return response_->mutable_cost_graph();
 }
 
+errors::Code NonOwnedProtoRunGraphResponse::status_code() const {
+  return response_->status_code();
+}
+
+const string& NonOwnedProtoRunGraphResponse::status_error_message() const {
+  return response_->status_error_message();
+}
+
+void NonOwnedProtoRunGraphResponse::set_status(const Status& status) {
+  response_->set_status_code(status.code());
+  response_->set_status_error_message(status.error_message());
+}
+
 RunGraphResponse* NonOwnedProtoRunGraphResponse::get_proto() {
   return response_;
 }
@@ -678,6 +760,18 @@ Status InMemoryRunStepResponse::AddTensorFromRunGraphResponse(
 
 RunMetadata* InMemoryRunStepResponse::mutable_metadata() { return &metadata_; }
 
+errors::Code InMemoryRunStepResponse::status_code() const {
+  return status_.code();
+}
+
+const string& InMemoryRunStepResponse::status_error_message() const {
+  return status_.error_message();
+}
+
+void InMemoryRunStepResponse::set_status(const Status& status) {
+  status_ = status;
+}
+
 RunStepResponse* InMemoryRunStepResponse::get_proto() {
   LOG(FATAL) << "Cannot get a mutable protobuf for an InMemoryRunStepResponse";
   return nullptr;
@@ -716,6 +810,19 @@ RunMetadata* OwnedProtoRunStepResponse::mutable_metadata() {
   return response_.mutable_metadata();
 }
 
+errors::Code OwnedProtoRunStepResponse::status_code() const {
+  return response_.status_code();
+}
+
+const string& OwnedProtoRunStepResponse::status_error_message() const {
+  return response_.status_error_message();
+}
+
+void OwnedProtoRunStepResponse::set_status(const Status& status) {
+  response_.set_status_code(status.code());
+  response_.set_status_error_message(status.error_message());
+}
+
 RunStepResponse* OwnedProtoRunStepResponse::get_proto() { return &response_; }
 
 NonOwnedProtoRunStepResponse::NonOwnedProtoRunStepResponse(
@@ -755,6 +862,19 @@ RunMetadata* NonOwnedProtoRunStepResponse::mutable_metadata() {
   return response_->mutable_metadata();
 }
 
+errors::Code NonOwnedProtoRunStepResponse::status_code() const {
+  return response_->status_code();
+}
+
+const string& NonOwnedProtoRunStepResponse::status_error_message() const {
+  return response_->status_error_message();
+}
+
+void NonOwnedProtoRunStepResponse::set_status(const Status& status) {
+  response_->set_status_code(status.code());
+  response_->set_status_error_message(status.error_message());
+}
+
 RunStepResponse* NonOwnedProtoRunStepResponse::get_proto() { return response_; }
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/distributed_runtime/message_wrappers.h b/tensorflow/core/distributed_runtime/message_wrappers.h
index 0e3f5b98cb58bb76f599ca67938a420c9b3ffdce..7113d73dd77c6141c904388b3fb9a28c7561daf2 100644
--- a/tensorflow/core/distributed_runtime/message_wrappers.h
+++ b/tensorflow/core/distributed_runtime/message_wrappers.h
@@ -80,6 +80,13 @@ class RunStepRequestWrapper {
   // Options for the run call.
   virtual const RunOptions& options() const = 0;
 
+  // If true then some errors, e.g., execution errors that have long
+  // error messages, may return an OK RunStepResponse with the actual
+  // error saved in the status_code/status_error_message fields of the
+  // response body. This is a workaround since the RPC subsystem may
+  // truncate long metadata messages.
+  virtual bool store_errors_in_response_body() const = 0;
+
   // Returns a human-readable representation of this message for debugging.
   virtual string DebugString() const = 0;
 
@@ -98,6 +105,7 @@ class MutableRunStepRequestWrapper : public RunStepRequestWrapper {
   virtual void add_fetch(const string& name) = 0;
   virtual void add_target(const string& name) = 0;
   virtual RunOptions* mutable_options() = 0;
+  virtual void set_store_errors_in_response_body(bool store_errors) = 0;
 };
 
 // Specialized (and mutable) wrapper for RunStep requests between a client and
@@ -118,6 +126,7 @@ class InMemoryRunStepRequest : public MutableRunStepRequestWrapper {
   const RunOptions& options() const override;
   string DebugString() const override;
   const RunStepRequest& ToProto() const override;
+  bool store_errors_in_response_body() const override;
 
   // MutableRunStepRequestWrapper methods.
   void set_session_handle(const string& handle) override;
@@ -126,6 +135,7 @@ class InMemoryRunStepRequest : public MutableRunStepRequestWrapper {
   void add_fetch(const string& name) override;
   void add_target(const string& name) override;
   RunOptions* mutable_options() override;
+  void set_store_errors_in_response_body(bool store_errors) override;
 
  private:
   string session_handle_;
@@ -134,6 +144,7 @@ class InMemoryRunStepRequest : public MutableRunStepRequestWrapper {
   gtl::InlinedVector<string, 4> fetches_;
   gtl::InlinedVector<string, 4> targets_;
   RunOptions options_;
+  bool store_errors_in_response_body_ = false;
 
   // Holds a cached and owned representation of the proto
   // representation of this request, if needed, so that `ToProto()`
@@ -165,6 +176,7 @@ class MutableProtoRunStepRequest : public MutableRunStepRequestWrapper {
   const RunOptions& options() const override;
   string DebugString() const override;
   const RunStepRequest& ToProto() const override;
+  bool store_errors_in_response_body() const override;
 
   // MutableRunStepRequestWrapper methods.
   void set_session_handle(const string& handle) override;
@@ -173,6 +185,7 @@ class MutableProtoRunStepRequest : public MutableRunStepRequestWrapper {
   void add_fetch(const string& name) override;
   void add_target(const string& name) override;
   RunOptions* mutable_options() override;
+  void set_store_errors_in_response_body(bool store_errors) override;
 
  private:
   RunStepRequest request_;
@@ -202,6 +215,7 @@ class ProtoRunStepRequest : public RunStepRequestWrapper {
   const RunOptions& options() const override;
   string DebugString() const override;
   const RunStepRequest& ToProto() const override;
+  bool store_errors_in_response_body() const override;
 
  private:
   const RunStepRequest* const request_;  // Not owned.
@@ -262,6 +276,13 @@ class RunGraphRequestWrapper {
   // True if this is the last partial run request in a sequence of requests.
   virtual bool is_last_partial_run() const = 0;
 
+  // If true then some errors, e.g., execution errors that have long
+  // error messages, may return an OK RunStepResponse with the actual
+  // error saved in the status_code/status_error_message fields of the
+  // response body. This is a workaround since the RPC subsystem may
+  // truncate long metadata messages.
+  virtual bool store_errors_in_response_body() const = 0;
+
   // Returns the wrapped data as a protocol buffer message.
   virtual const RunGraphRequest& ToProto() const = 0;
 };
@@ -285,6 +306,7 @@ class MutableRunGraphRequestWrapper : public RunGraphRequestWrapper {
   virtual void add_recv_key(const string& recv_key) = 0;
   virtual void set_is_partial(bool is_partial) = 0;
   virtual void set_is_last_partial_run(bool is_last_partial_run) = 0;
+  virtual void set_store_errors_in_response_body(bool store_errors) = 0;
 };
 
 class InMemoryRunGraphRequest : public MutableRunGraphRequestWrapper {
@@ -302,6 +324,7 @@ class InMemoryRunGraphRequest : public MutableRunGraphRequestWrapper {
   bool is_partial() const override;
   bool is_last_partial_run() const override;
   const RunGraphRequest& ToProto() const override;
+  bool store_errors_in_response_body() const override;
 
   // MutableRunGraphRequestWrapper methods.
   void set_session_handle(const string& handle) override;
@@ -314,6 +337,7 @@ class InMemoryRunGraphRequest : public MutableRunGraphRequestWrapper {
   void add_recv_key(const string& recv_key) override;
   void set_is_partial(bool is_partial) override;
   void set_is_last_partial_run(bool is_last_partial_run) override;
+  void set_store_errors_in_response_body(bool store_errors) override;
 
  private:
   string session_handle_;
@@ -324,6 +348,7 @@ class InMemoryRunGraphRequest : public MutableRunGraphRequestWrapper {
   gtl::InlinedVector<string, 4> recvs_;
   bool is_partial_ = false;
   bool is_last_partial_run_ = false;
+  bool store_errors_in_response_body_ = false;
 
   // Holds a cached and owned representation of the proto
   // representation of this request, if needed, so that `ToProto()`
@@ -349,6 +374,7 @@ class MutableProtoRunGraphRequest : public MutableRunGraphRequestWrapper {
   const string& recv_key(size_t i) const override;
   bool is_partial() const override;
   bool is_last_partial_run() const override;
+  bool store_errors_in_response_body() const override;
   const RunGraphRequest& ToProto() const override;
 
   // MutableRunGraphRequestWrapper methods.
@@ -362,6 +388,7 @@ class MutableProtoRunGraphRequest : public MutableRunGraphRequestWrapper {
   void add_recv_key(const string& recv_key) override;
   void set_is_partial(bool is_partial) override;
   void set_is_last_partial_run(bool is_last_partial_run) override;
+  void set_store_errors_in_response_body(bool store_errors) override;
 
  private:
   RunGraphRequest request_;
@@ -383,6 +410,7 @@ class ProtoRunGraphRequest : public RunGraphRequestWrapper {
   const string& recv_key(size_t i) const override;
   bool is_partial() const override;
   bool is_last_partial_run() const override;
+  bool store_errors_in_response_body() const override;
   const RunGraphRequest& ToProto() const override;
 
  private:
@@ -429,6 +457,11 @@ class MutableRunGraphResponseWrapper {
   virtual GraphDef* mutable_partition_graph(size_t i) = 0;
   virtual void AddPartitionGraph(const GraphDef& partition_graph) = 0;
 
+  // Returned status if requested.
+  virtual errors::Code status_code() const = 0;
+  virtual const string& status_error_message() const = 0;
+  virtual void set_status(const Status& status) = 0;
+
  protected:
   // Returns a mutable protobuf message that represents the contents of
   // this wrapper, for passing to an RPC subsystem that will populate
@@ -458,6 +491,9 @@ class InMemoryRunGraphResponse : public MutableRunGraphResponseWrapper {
   size_t num_partition_graphs() const override;
   GraphDef* mutable_partition_graph(size_t i) override;
   void AddPartitionGraph(const GraphDef& partition_graph) override;
+  errors::Code status_code() const override;
+  const string& status_error_message() const override;
+  void set_status(const Status& status) override;
 
  protected:
   // NOTE: This method is not implemented. See
@@ -469,6 +505,9 @@ class InMemoryRunGraphResponse : public MutableRunGraphResponseWrapper {
   StepStats step_stats_;
   CostGraphDef cost_graph_;
   std::vector<GraphDef> partition_graphs_;
+  // Store the code and message separately so that they can be updated
+  // independently by setters.
+  Status status_;
 };
 
 // Proto-based message wrapper for use on the client side of the RunGraph RPC.
@@ -485,6 +524,9 @@ class OwnedProtoRunGraphResponse : public MutableRunGraphResponseWrapper {
   size_t num_partition_graphs() const override;
   GraphDef* mutable_partition_graph(size_t i) override;
   void AddPartitionGraph(const GraphDef& partition_graph) override;
+  errors::Code status_code() const override;
+  const string& status_error_message() const override;
+  void set_status(const Status& status) override;
 
  protected:
   RunGraphResponse* get_proto() override;
@@ -509,6 +551,9 @@ class NonOwnedProtoRunGraphResponse : public MutableRunGraphResponseWrapper {
   size_t num_partition_graphs() const override;
   GraphDef* mutable_partition_graph(size_t i) override;
   void AddPartitionGraph(const GraphDef& partition_graph) override;
+  errors::Code status_code() const override;
+  const string& status_error_message() const override;
+  void set_status(const Status& status) override;
 
  protected:
   RunGraphResponse* get_proto() override;
@@ -558,6 +603,11 @@ class MutableRunStepResponseWrapper {
   virtual const RunMetadata& metadata() const = 0;
   virtual RunMetadata* mutable_metadata() = 0;
 
+  // Returned status if requested.
+  virtual errors::Code status_code() const = 0;
+  virtual const string& status_error_message() const = 0;
+  virtual void set_status(const Status& status) = 0;
+
  protected:
   // Returns a mutable protobuf message that represents the contents of
   // this wrapper, for passing to an RPC subsystem that will populate
@@ -585,6 +635,9 @@ class InMemoryRunStepResponse : public MutableRunStepResponseWrapper {
       size_t i) override;
   const RunMetadata& metadata() const override;
   RunMetadata* mutable_metadata() override;
+  errors::Code status_code() const override;
+  const string& status_error_message() const override;
+  void set_status(const Status& status) override;
 
  protected:
   // NOTE: This method is not implemented. See
@@ -594,6 +647,9 @@ class InMemoryRunStepResponse : public MutableRunStepResponseWrapper {
  private:
   gtl::InlinedVector<std::pair<string, Tensor>, 4> tensors_;
   RunMetadata metadata_;
+  // Store the code and message separately so that they can be updated
+  // independently by setters.
+  Status status_;
 };
 
 // Proto-based message wrapper for use on the client side of the RunStep RPC.
@@ -608,6 +664,9 @@ class OwnedProtoRunStepResponse : public MutableRunStepResponseWrapper {
       size_t i) override;
   const RunMetadata& metadata() const override;
   RunMetadata* mutable_metadata() override;
+  errors::Code status_code() const override;
+  const string& status_error_message() const override;
+  void set_status(const Status& status) override;
 
  protected:
   RunStepResponse* get_proto() override;
@@ -630,6 +689,9 @@ class NonOwnedProtoRunStepResponse : public MutableRunStepResponseWrapper {
       size_t i) override;
   const RunMetadata& metadata() const override;
   RunMetadata* mutable_metadata() override;
+  errors::Code status_code() const override;
+  const string& status_error_message() const override;
+  void set_status(const Status& status) override;
 
  protected:
   RunStepResponse* get_proto() override;
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_master_service.cc b/tensorflow/core/distributed_runtime/rpc/grpc_master_service.cc
index 41ee81c01d6ebb9085d3271eae86484bb786ecfb..ac279937730466514451d7e81257d2110e128eff 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_master_service.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_master_service.cc
@@ -192,7 +192,15 @@ class GrpcMasterService : public AsyncServiceInterface {
                             delete call_opts;
                             delete wrapped_request;
                             delete trace;
-                            call->SendResponse(ToGrpcStatus(status));
+                            if (call->request.store_errors_in_response_body() &&
+                                !status.ok()) {
+                              call->response.set_status_code(status.code());
+                              call->response.set_status_error_message(
+                                  status.error_message());
+                              call->SendResponse(ToGrpcStatus(Status::OK()));
+                            } else {
+                              call->SendResponse(ToGrpcStatus(status));
+                            }
                           });
     ENQUEUE_REQUEST(RunStep, true);
   }
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_session.cc b/tensorflow/core/distributed_runtime/rpc/grpc_session.cc
index 9a08335c1c93c56e8bbd61a76bae211482555e62..120a33f17b0d1f81e50dfbc844f56e3d85def096 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_session.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_session.cc
@@ -190,6 +190,9 @@ Status GrpcSession::RunHelper(
     req->add_feed(it.first, it.second);
   }
 
+  // Support long error messages by storing the error code in the response body.
+  req->set_store_errors_in_response_body(true);
+
   // Build an index from fetch tensor name to first index in
   // output_tensor_names.
   std::unordered_map<string, int> output_name_to_offset;
@@ -207,6 +210,11 @@ Status GrpcSession::RunHelper(
   call_options.SetTimeout(req->options().timeout_in_ms());
   TF_RETURN_IF_ERROR(RunProto(&call_options, req.get(), resp.get()));
 
+  // Look for an extended error returned in the response body.
+  if (resp->status_code() != error::Code::OK) {
+    return Status(resp->status_code(), resp->status_error_message());
+  }
+
   if (!output_tensor_names.empty()) {
     outputs->resize(output_tensor_names.size());
   }
@@ -322,7 +330,7 @@ Status GrpcSession::Close() {
   {
     mutex_lock l(mu_);
     if (handle_.empty()) {
-      return errors::InvalidArgument("A session is not created yet....");
+      return Status::OK();
     }
     req.set_session_handle(handle_);
     handle_.clear();
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_session_test.cc b/tensorflow/core/distributed_runtime/rpc/grpc_session_test.cc
index b673f200ccaaccbdab7b0f589af3d3450a6c44b6..335c3febe20e17e5b5ea57dc68c69e616997e14b 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_session_test.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_session_test.cc
@@ -572,6 +572,66 @@ TEST(GrpcSessionTest, Error) {
   Env::Default()->SleepForMicroseconds(2000000);
 }
 
+TEST(GrpcSessionTest, LongErrorMessage) {
+  std::unique_ptr<test::TestCluster> cluster;
+  TF_CHECK_OK(test::TestCluster::MakeTestCluster(Devices(1, 0), 2, &cluster));
+  const string& master = cluster->targets()[0];
+  const string& dev_a = cluster->devices()[0].name();
+  const string& dev_b = cluster->devices()[1].name();
+  LOG(INFO) << "master " << master << "dev_a " << dev_a << "dev_b " << dev_b;
+  GraphDef gdef;
+  std::vector<string> fetches;
+  {
+    Graph g(OpRegistry::Global());
+
+    // a2 = a + error(a)
+    //
+    // Subgraph for "a" fails. The master will cancel the subgraph for
+    // "b" and then returns the Session::Run.
+    auto a = test::graph::Constant(&g, Tensor());
+    a->set_assigned_device_name(dev_a);
+    std::vector<char> long_string_buffer(1024 * 1024, 'x');
+    StringPiece long_string(long_string_buffer.data(), 1024 * 1024);
+    string name = strings::StrCat(long_string, "fantasia!");
+    auto a_err = test::graph::Error(&g, a, name);
+    a_err->set_assigned_device_name(dev_a);
+    auto a2 = test::graph::Add(&g, a, a_err);
+    a2->set_assigned_device_name(dev_a);
+    fetches.push_back(a2->name());
+
+    // b2 = b + delay(b)
+    //
+    // Subgraph for "b" sleeps at the node "b_delay". When the sleep
+    // finishes, the subgraph "b" will continue execution till it
+    // notices that it is canceled. Meanwhile, subgraph's executor
+    // and its related state (registered ops) should still be alive.
+    auto b = test::graph::Constant(&g, Tensor());
+    b->set_assigned_device_name(dev_b);
+    auto b_delay = test::graph::Delay(&g, b, Microseconds(1000000));
+    b_delay->set_assigned_device_name(dev_b);
+    auto b2 = test::graph::Add(&g, b, b_delay);
+    b2->set_assigned_device_name(dev_b);
+    fetches.push_back(b2->name());
+    test::graph::ToGraphDef(&g, &gdef);
+  }
+  std::unique_ptr<Session> session(NewRemote(Options(master, 1)));
+  ASSERT_TRUE(session != nullptr);
+
+  TF_CHECK_OK(session->Create(gdef));
+  {
+    Status status = session->Run({}, fetches, {}, nullptr);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.ToString().find("fantasia!"), string::npos);
+  }
+  // session->Close() shall clean up all states related to the session->
+  // E.g., deregisters subgraph with workers, etc.
+  TF_CHECK_OK(session->Close());
+
+  // Sleep a bit so that most of asynchronous works finishes before
+  // the test process finishes.
+  Env::Default()->SleepForMicroseconds(2000000);
+}
+
 TEST(SessionTest, SharedVar) {
   std::unique_ptr<test::TestCluster> cluster;
   TF_CHECK_OK(test::TestCluster::MakeTestCluster(Devices(1, 0), 1, &cluster));
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_util.h b/tensorflow/core/distributed_runtime/rpc/grpc_util.h
index 0ddcd89130b3b1b1209c255b6200d8ce88d4cb7c..bb854783472c4a5e1261e9e737f4b830e5cbf3e2 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_util.h
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_util.h
@@ -23,15 +23,36 @@ limitations under the License.
 #include "grpc++/support/byte_buffer.h"
 #include "tensorflow/core/distributed_runtime/tensor_coding.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/protobuf.h"
 
 namespace tensorflow {
 
+constexpr char kStreamRemovedMessage[] = "Stream removed";
+
+// Identify if the given grpc::Status corresponds to an HTTP stream removed
+// error (see chttp2_transport.cc).
+//
+// When auto-reconnecting to a remote TensorFlow worker after it restarts, gRPC
+// can return an UNKNOWN error code with a "Stream removed" error message.
+// This should not be treated as an unrecoverable error.
+//
+// N.B. This is dependent on the error message from grpc remaining consistent.
+inline bool IsStreamRemovedError(const ::grpc::Status& s) {
+  return !s.ok() && s.error_code() == ::grpc::StatusCode::UNKNOWN &&
+         s.error_message() == kStreamRemovedMessage;
+}
+
 inline Status FromGrpcStatus(const ::grpc::Status& s) {
   if (s.ok()) {
     return Status::OK();
   } else {
+    // Convert "UNKNOWN" stream removed errors into unavailable, to allow
+    // for retry upstream.
+    if (IsStreamRemovedError(s)) {
+      return Status(tensorflow::error::UNAVAILABLE, s.error_message());
+    }
     return Status(static_cast<tensorflow::error::Code>(s.error_code()),
                   s.error_message());
   }
@@ -41,6 +62,13 @@ inline ::grpc::Status ToGrpcStatus(const ::tensorflow::Status& s) {
   if (s.ok()) {
     return ::grpc::Status::OK;
   } else {
+    if (s.error_message().size() > 3072 /* 3k bytes */) {
+      // TODO(b/62947679): Remove truncation once the gRPC issue is resolved.
+      string scratch =
+          strings::Printf("%.3072s ... [truncated]", s.error_message().c_str());
+      LOG(ERROR) << "Truncated error message: " << s;
+      return ::grpc::Status(static_cast<::grpc::StatusCode>(s.code()), scratch);
+    }
     return ::grpc::Status(static_cast<::grpc::StatusCode>(s.code()),
                           s.error_message());
   }
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_cache.cc b/tensorflow/core/distributed_runtime/rpc/grpc_worker_cache.cc
index a7b93e04607fe2dbb9bd87b372441607b5a19b0c..bb14e0197b7b0ea44c4a75528f4919045574f4c5 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_cache.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_cache.cc
@@ -15,6 +15,8 @@ limitations under the License.
 
 #include "tensorflow/core/distributed_runtime/rpc/grpc_worker_cache.h"
 
+#include <unordered_map>
+
 #include "tensorflow/core/distributed_runtime/rpc/grpc_channel.h"
 #include "tensorflow/core/distributed_runtime/rpc/grpc_client_cq_tag.h"
 #include "tensorflow/core/distributed_runtime/rpc/grpc_remote_worker.h"
@@ -23,6 +25,7 @@ limitations under the License.
 #include "tensorflow/core/distributed_runtime/worker_cache_partial.h"
 #include "tensorflow/core/distributed_runtime/worker_interface.h"
 #include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/mutex.h"
 
 namespace tensorflow {
 
@@ -30,29 +33,21 @@ namespace {
 
 class GrpcWorkerCache : public WorkerCachePartial {
  public:
+  // TODO(ncteisen): consider adding a config var or flag for this
+  static constexpr const size_t kGrpcWorkerCacheThreadCount = 8;
+
   explicit GrpcWorkerCache(GrpcChannelCache* channel_cache,
                            WorkerInterface* local_worker,
                            const string& local_target)
       : local_target_(local_target),
         local_worker_(local_worker),
-        channel_cache_(channel_cache) {
-    // TODO(mrry): Investigate possible performance improvements by
-    // replacing this thread with a threadpool.
-    polling_thread_ = Env::Default()->StartThread(
-        ThreadOptions(), "grpc_worker_cache", [this]() {
-          void* tag;
-          bool ok;
-          while (completion_queue_.Next(&tag, &ok)) {
-            GrpcClientCQTag* callback_tag = static_cast<GrpcClientCQTag*>(tag);
-            callback_tag->OnCompleted(ok);
-          }
-        });
-  }
+        channel_cache_(channel_cache),
+        threads_(kGrpcWorkerCacheThreadCount),
+        next_round_robin_assignment_(0) {}
 
   // Explicit destructor to control destruction order.
   ~GrpcWorkerCache() override {
-    completion_queue_.Shutdown();
-    delete polling_thread_;  // Blocks until thread exits.
+    threads_.clear();  // Blocks until threads exit.
     delete channel_cache_;
   }
 
@@ -66,7 +61,9 @@ class GrpcWorkerCache : public WorkerCachePartial {
     } else {
       SharedGrpcChannelPtr channel = channel_cache_->FindWorkerChannel(target);
       if (!channel) return nullptr;
-      return NewGrpcRemoteWorker(channel, &completion_queue_, &logger_);
+      return NewGrpcRemoteWorker(
+          channel, threads_[AssignWorkerToThread(target)].completion_queue(),
+          &logger_);
     }
   }
 
@@ -88,12 +85,59 @@ class GrpcWorkerCache : public WorkerCachePartial {
   }
 
  private:
+  // Thread wrapping class that drives work over a single gRPC
+  // CompletionQueue.
+  class GrpcWorkerCacheThread {
+   public:
+    GrpcWorkerCacheThread() {
+      thread_.reset(Env::Default()->StartThread(
+          ThreadOptions(), "grpc_worker_cache", [this]() {
+            void* tag;
+            bool ok;
+            while (completion_queue_.Next(&tag, &ok)) {
+              GrpcClientCQTag* callback_tag =
+                  static_cast<GrpcClientCQTag*>(tag);
+              callback_tag->OnCompleted(ok);
+            }
+          }));
+    }
+
+    ~GrpcWorkerCacheThread() {
+      completion_queue_.Shutdown();
+      thread_.reset();
+    }
+
+    ::grpc::CompletionQueue* completion_queue() { return &completion_queue_; }
+
+   private:
+    ::grpc::CompletionQueue completion_queue_;
+    std::unique_ptr<Thread> thread_;
+  };  // GrpcWorkerCacheThread
+
+  size_t AssignWorkerToThread(const string& target) {
+    // Round-robin target assignment, but keeps the same target on the same
+    // polling thread always, as this is important for gRPC performace
+    mutex_lock lock(assignment_mu_);
+    auto it = target_assignments_.find(target);
+    if (it == target_assignments_.end()) {
+      it = target_assignments_
+               .insert(std::make_pair(
+                   target, (next_round_robin_assignment_++) % threads_.size()))
+               .first;
+    }
+    return it->second;
+  }
+
   const string local_target_;
   WorkerInterface* const local_worker_;  // Not owned.
-  GrpcChannelCache* channel_cache_;  // Owned.
-  ::grpc::CompletionQueue completion_queue_;
-  Thread* polling_thread_;  // Owned.
+  GrpcChannelCache* channel_cache_;      // Owned.
   WorkerCacheLogger logger_;
+  std::vector<GrpcWorkerCacheThread> threads_;
+
+  mutex assignment_mu_;
+  std::unordered_map<std::string, size_t> target_assignments_
+      GUARDED_BY(assignment_mu_);
+  size_t next_round_robin_assignment_ GUARDED_BY(assignment_mu_);
 };
 
 }  // namespace
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc
index eee93ec65726b416fdf8d4fe8a339c0fc3bf2d48..15faf21dafc2ee1a2a6d6ad6463b87aa9a62d88d 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc
@@ -51,19 +51,23 @@ namespace tensorflow {
 namespace {
 
 class GrpcWorkerService : public AsyncServiceInterface {
+  // TODO(ncteisen): consider adding a config var or flag for this
+  static constexpr const size_t kGrpcWorkerServiceThreadCount = 8;
+
  public:
   GrpcWorkerService(GrpcWorker* worker, ::grpc::ServerBuilder* builder)
-      : worker_(worker), is_shutdown_(false) {
+      : is_shutdown_(false) {
     builder->RegisterService(&worker_service_);
-    cq_ = builder->AddCompletionQueue();
+    for (int i = 0; i < kGrpcWorkerServiceThreadCount; i++) {
+      threads_.emplace_back(
+          new GrpcWorkerServiceThread(worker, builder, &worker_service_));
+    }
   }
 
-  ~GrpcWorkerService() override { delete shutdown_alarm_; }
-
   void Shutdown() override {
     bool did_shutdown = false;
     {
-      mutex_lock l(shutdown_mu_);
+      mutex_lock l(service_shutdown_mu_);
       if (!is_shutdown_) {
         LOG(INFO) << "Shutting down GrpcWorkerService.";
         is_shutdown_ = true;
@@ -71,11 +75,9 @@ class GrpcWorkerService : public AsyncServiceInterface {
       }
     }
     if (did_shutdown) {
-      // NOTE(mrry): This enqueues a special event (with a null tag)
-      // that causes the completion queue to be shut down on the
-      // polling thread.
-      shutdown_alarm_ =
-          new ::grpc::Alarm(cq_.get(), gpr_now(GPR_CLOCK_MONOTONIC), nullptr);
+      for (auto& worker_thread : threads_) {
+        worker_thread->Shutdown();
+      }
     }
   }
 
@@ -90,220 +92,262 @@ class GrpcWorkerService : public AsyncServiceInterface {
 // The implementation of the request handler for each RPC method
 // must ensure that it calls ENQUEUE_REQUEST() for that RPC method,
 // to keep accepting new requests.
-#define ENQUEUE_REQUEST(method, supports_cancel)                       \
-  do {                                                                 \
-    mutex_lock l(shutdown_mu_);                                        \
-    if (!is_shutdown_) {                                               \
-      Call<GrpcWorkerService, grpc::WorkerService::AsyncService,       \
-           method##Request, method##Response>::                        \
-          EnqueueRequestForMethod(                                     \
-              &worker_service_, cq_.get(),                             \
-              static_cast<int>(GrpcWorkerMethod::k##method),           \
-              &GrpcWorkerService::method##Handler, (supports_cancel)); \
-    }                                                                  \
+#define ENQUEUE_REQUEST(method, supports_cancel)                             \
+  do {                                                                       \
+    mutex_lock l(shutdown_mu_);                                              \
+    if (!is_shutdown_) {                                                     \
+      Call<GrpcWorkerServiceThread, grpc::WorkerService::AsyncService,       \
+           method##Request, method##Response>::                              \
+          EnqueueRequestForMethod(                                           \
+              worker_service_, cq_.get(),                                    \
+              static_cast<int>(GrpcWorkerMethod::k##method),                 \
+              &GrpcWorkerServiceThread::method##Handler, (supports_cancel)); \
+    }                                                                        \
   } while (0)
 
   // This method blocks forever handling requests from the completion queue.
   void HandleRPCsLoop() override {
-    // TODO(mrry): This may require performance engineering. We can
-    // add more threads to service the completion queue, and add more
-    // of various request types if they are short and frequent.
-    // Currently we allow unbounded numbers of pending calls for each
-    // method, by re-enqueuing a request before the previous one
-    // completes, and we may decide to bound some of the request
-    // types.
-    ENQUEUE_REQUEST(GetStatus, false);
-    ENQUEUE_REQUEST(CreateWorkerSession, false);
-    ENQUEUE_REQUEST(DeleteWorkerSession, false);
-    ENQUEUE_REQUEST(CleanupAll, false);
-    ENQUEUE_REQUEST(RegisterGraph, false);
-    ENQUEUE_REQUEST(DeregisterGraph, false);
-
-    // TODO(mrry): Determine a better policy for enqueuing the appropriate
-    // number of each request type.
-    for (int i = 0; i < 1000; ++i) {
-      EnqueueRecvTensorRequestRaw();
+    for (auto& worker_thread : threads_) {
+      worker_thread->Start();
     }
-    for (int i = 0; i < 100; ++i) {
-      ENQUEUE_REQUEST(RunGraph, true);
+    for (auto& worker_thread : threads_) {
+      worker_thread->Join();
     }
-    for (int i = 0; i < 100; ++i) {
-      ENQUEUE_REQUEST(CleanupGraph, false);
+  }
+
+ private:
+  // Thread wrapping class that drives work over a single gRPC
+  // CompletionQueue.
+  class GrpcWorkerServiceThread {
+   public:
+    explicit GrpcWorkerServiceThread(
+        GrpcWorker* worker, ::grpc::ServerBuilder* builder,
+        grpc::WorkerService::AsyncService* worker_service)
+        : worker_(worker),
+          worker_service_(worker_service),
+          is_shutdown_(false) {
+      cq_ = builder->AddCompletionQueue();
     }
 
-    ENQUEUE_REQUEST(Logging, false);
-    ENQUEUE_REQUEST(Tracing, false);
+    void Start() {
+      thread_.reset(worker_->env()->env->StartThread(
+          ThreadOptions(), "grpc_worker_service",
+          [this]() { HandleRPCsLoop(); }));
+    }
 
-    void* tag;
-    bool ok;
+    void Join() { thread_.reset(); }  // Blocks until thread exits
 
-    while (cq_->Next(&tag, &ok)) {
-      UntypedCall<GrpcWorkerService>::Tag* callback_tag =
-          static_cast<UntypedCall<GrpcWorkerService>::Tag*>(tag);
-      if (callback_tag) {
-        callback_tag->OnCompleted(this, ok);
-      } else {
-        // NOTE(mrry): A null `callback_tag` indicates that this is
-        // the shutdown alarm.
-        cq_->Shutdown();
+    void Shutdown() {
+      {
+        mutex_lock lock(shutdown_mu_);
+        is_shutdown_ = true;
       }
+      cq_->Shutdown();
     }
-  }
 
- private:
-  GrpcWorker* worker_ = nullptr;  // Not owned.
-  std::unique_ptr<::grpc::ServerCompletionQueue> cq_;
+   private:
+    void HandleRPCsLoop() {
+      // TODO(ncteisen): This may require performance engineering. We can
+      // change the number of threads, the number of handlers per thread,
+      // or even decide to specialize certain threads to certain methods.
+      ENQUEUE_REQUEST(GetStatus, false);
+      ENQUEUE_REQUEST(CreateWorkerSession, false);
+      ENQUEUE_REQUEST(DeleteWorkerSession, false);
+      ENQUEUE_REQUEST(CleanupAll, false);
+      ENQUEUE_REQUEST(RegisterGraph, false);
+      ENQUEUE_REQUEST(DeregisterGraph, false);
+
+      // TODO(ncteisen): Determine a better policy for enqueuing the
+      // appropriate number of each request type.
+      for (int i = 0; i < 1000; ++i) {
+        EnqueueRecvTensorRequestRaw();
+      }
+      for (int i = 0; i < 100; ++i) {
+        ENQUEUE_REQUEST(RunGraph, true);
+      }
+      for (int i = 0; i < 100; ++i) {
+        ENQUEUE_REQUEST(CleanupGraph, false);
+      }
 
-  grpc::WorkerService::AsyncService worker_service_;
+      ENQUEUE_REQUEST(Logging, false);
+      ENQUEUE_REQUEST(Tracing, false);
 
-  mutex shutdown_mu_;
-  bool is_shutdown_ GUARDED_BY(shutdown_mu_);
-  ::grpc::Alarm* shutdown_alarm_ = nullptr;
+      void* tag;
+      bool ok;
 
-  void Schedule(std::function<void()> f) {
-    worker_->env()->compute_pool->Schedule(std::move(f));
-  }
+      while (cq_->Next(&tag, &ok)) {
+        UntypedCall<GrpcWorkerServiceThread>::Tag* callback_tag =
+            static_cast<UntypedCall<GrpcWorkerServiceThread>::Tag*>(tag);
+        CHECK(callback_tag);
+        callback_tag->OnCompleted(this, ok);
+      }
+    }
 
-  // The following section contains one request handler method per
-  // RPC. The `FooHandler` method is called (indirectly) by
-  // `HandleRPCsLoop()` when the next Foo RPC is received. Each
-  // `FooHandler` call schedules a closure on `worker_->env()->compute_pool`,
-  // and is responsible for requesting the next Foo call by calling
-  // `ENQUEUE_REQUEST(Foo)`.
-
-  template <class RequestMessage, class ResponseMessage>
-  using WorkerCall = Call<GrpcWorkerService, grpc::WorkerService::AsyncService,
-                          RequestMessage, ResponseMessage>;
-
-  void GetStatusHandler(WorkerCall<GetStatusRequest, GetStatusResponse>* call) {
-    Schedule([this, call]() {
-      Status s = worker_->GetStatus(&call->request, &call->response);
-      call->SendResponse(ToGrpcStatus(s));
-    });
-    ENQUEUE_REQUEST(GetStatus, false);
-  }
+   private:
+    void Schedule(std::function<void()> f) {
+      worker_->env()->compute_pool->Schedule(std::move(f));
+    }
 
-  void CreateWorkerSessionHandler(
-      WorkerCall<CreateWorkerSessionRequest, CreateWorkerSessionResponse>*
-          call) {
-    Schedule([this, call]() {
-      Status s = worker_->CreateWorkerSession(&call->request, &call->response);
-      call->SendResponse(ToGrpcStatus(s));
-    });
-    ENQUEUE_REQUEST(CreateWorkerSession, false);
-  }
+    // The following section contains one request handler method per
+    // RPC. The `FooHandler` method is called (indirectly) by
+    // `HandleRPCsLoop()` when the next Foo RPC is received. Each
+    // `FooHandler` call schedules a closure on `worker_->env()->compute_pool`,
+    // and is responsible for requesting the next Foo call by calling
+    // `ENQUEUE_REQUEST(Foo)`.
+
+    template <class RequestMessage, class ResponseMessage>
+    using WorkerCall =
+        Call<GrpcWorkerServiceThread, grpc::WorkerService::AsyncService,
+             RequestMessage, ResponseMessage>;
+
+    void GetStatusHandler(
+        WorkerCall<GetStatusRequest, GetStatusResponse>* call) {
+      Schedule([this, call]() {
+        Status s = worker_->GetStatus(&call->request, &call->response);
+        call->SendResponse(ToGrpcStatus(s));
+      });
+      ENQUEUE_REQUEST(GetStatus, false);
+    }
 
-  void DeleteWorkerSessionHandler(
-      WorkerCall<DeleteWorkerSessionRequest, DeleteWorkerSessionResponse>*
-          call) {
-    Schedule([this, call]() {
-      Status s = worker_->DeleteWorkerSession(&call->request, &call->response);
-      call->SendResponse(ToGrpcStatus(s));
-    });
-    ENQUEUE_REQUEST(DeleteWorkerSession, false);
-  }
+    void CreateWorkerSessionHandler(
+        WorkerCall<CreateWorkerSessionRequest, CreateWorkerSessionResponse>*
+            call) {
+      Schedule([this, call]() {
+        Status s =
+            worker_->CreateWorkerSession(&call->request, &call->response);
+        call->SendResponse(ToGrpcStatus(s));
+      });
+      ENQUEUE_REQUEST(CreateWorkerSession, false);
+    }
 
-  void CleanupAllHandler(
-      WorkerCall<CleanupAllRequest, CleanupAllResponse>* call) {
-    Schedule([this, call]() {
-      Status s = worker_->CleanupAll(&call->request, &call->response);
-      call->SendResponse(ToGrpcStatus(s));
-    });
-    ENQUEUE_REQUEST(CleanupAll, false);
-  }
+    void DeleteWorkerSessionHandler(
+        WorkerCall<DeleteWorkerSessionRequest, DeleteWorkerSessionResponse>*
+            call) {
+      Schedule([this, call]() {
+        Status s =
+            worker_->DeleteWorkerSession(&call->request, &call->response);
+        call->SendResponse(ToGrpcStatus(s));
+      });
+      ENQUEUE_REQUEST(DeleteWorkerSession, false);
+    }
 
-  void RegisterGraphHandler(
-      WorkerCall<RegisterGraphRequest, RegisterGraphResponse>* call) {
-    Schedule([this, call]() {
-      Status s = worker_->RegisterGraph(&call->request, &call->response);
-      call->SendResponse(ToGrpcStatus(s));
-    });
-    ENQUEUE_REQUEST(RegisterGraph, false);
-  }
+    void CleanupAllHandler(
+        WorkerCall<CleanupAllRequest, CleanupAllResponse>* call) {
+      Schedule([this, call]() {
+        Status s = worker_->CleanupAll(&call->request, &call->response);
+        call->SendResponse(ToGrpcStatus(s));
+      });
+      ENQUEUE_REQUEST(CleanupAll, false);
+    }
 
-  void DeregisterGraphHandler(
-      WorkerCall<DeregisterGraphRequest, DeregisterGraphResponse>* call) {
-    Schedule([this, call]() {
-      Status s = worker_->DeregisterGraph(&call->request, &call->response);
-      call->SendResponse(ToGrpcStatus(s));
-    });
-    ENQUEUE_REQUEST(DeregisterGraph, false);
-  }
+    void RegisterGraphHandler(
+        WorkerCall<RegisterGraphRequest, RegisterGraphResponse>* call) {
+      Schedule([this, call]() {
+        Status s = worker_->RegisterGraph(&call->request, &call->response);
+        call->SendResponse(ToGrpcStatus(s));
+      });
+      ENQUEUE_REQUEST(RegisterGraph, false);
+    }
 
-  void RunGraphHandler(WorkerCall<RunGraphRequest, RunGraphResponse>* call) {
-    Schedule([this, call]() {
-      CallOptions* call_opts = new CallOptions;
-      ProtoRunGraphRequest* wrapped_request =
-          new ProtoRunGraphRequest(&call->request);
-      NonOwnedProtoRunGraphResponse* wrapped_response =
-          new NonOwnedProtoRunGraphResponse(&call->response);
-      call->SetCancelCallback([call_opts]() { call_opts->StartCancel(); });
-      worker_->RunGraphAsync(call_opts, wrapped_request, wrapped_response,
-                             [call, call_opts, wrapped_request,
-                              wrapped_response](const Status& s) {
-                               call->ClearCancelCallback();
-                               delete call_opts;
-                               delete wrapped_request;
-                               delete wrapped_response;
-                               call->SendResponse(ToGrpcStatus(s));
-                             });
-    });
-    ENQUEUE_REQUEST(RunGraph, true);
-  }
+    void DeregisterGraphHandler(
+        WorkerCall<DeregisterGraphRequest, DeregisterGraphResponse>* call) {
+      Schedule([this, call]() {
+        Status s = worker_->DeregisterGraph(&call->request, &call->response);
+        call->SendResponse(ToGrpcStatus(s));
+      });
+      ENQUEUE_REQUEST(DeregisterGraph, false);
+    }
 
-  void RecvTensorHandlerRaw(
-      WorkerCall<RecvTensorRequest, ::grpc::ByteBuffer>* call) {
-    Schedule([this, call]() {
-      CallOptions* call_opts = new CallOptions;
-      call->SetCancelCallback([call_opts]() { call_opts->StartCancel(); });
-      worker_->GrpcRecvTensorAsync(call_opts, &call->request, &call->response,
-                                   [call, call_opts](const Status& s) {
-                                     call->ClearCancelCallback();
-                                     delete call_opts;
-                                     call->SendResponse(ToGrpcStatus(s));
-                                   });
-    });
-    EnqueueRecvTensorRequestRaw();
-  }
+    void RunGraphHandler(WorkerCall<RunGraphRequest, RunGraphResponse>* call) {
+      Schedule([this, call]() {
+        CallOptions* call_opts = new CallOptions;
+        ProtoRunGraphRequest* wrapped_request =
+            new ProtoRunGraphRequest(&call->request);
+        NonOwnedProtoRunGraphResponse* wrapped_response =
+            new NonOwnedProtoRunGraphResponse(&call->response);
+        call->SetCancelCallback([call_opts]() { call_opts->StartCancel(); });
+        worker_->RunGraphAsync(call_opts, wrapped_request, wrapped_response,
+                               [call, call_opts, wrapped_request,
+                                wrapped_response](const Status& s) {
+                                 call->ClearCancelCallback();
+                                 delete call_opts;
+                                 delete wrapped_request;
+                                 delete wrapped_response;
+                                 call->SendResponse(ToGrpcStatus(s));
+                               });
+      });
+      ENQUEUE_REQUEST(RunGraph, true);
+    }
 
-  void CleanupGraphHandler(
-      WorkerCall<CleanupGraphRequest, CleanupGraphResponse>* call) {
-    Schedule([this, call]() {
-      Status s = worker_->CleanupGraph(&call->request, &call->response);
-      call->SendResponse(ToGrpcStatus(s));
-    });
-    ENQUEUE_REQUEST(CleanupGraph, false);
-  }
+    void RecvTensorHandlerRaw(
+        WorkerCall<RecvTensorRequest, ::grpc::ByteBuffer>* call) {
+      Schedule([this, call]() {
+        CallOptions* call_opts = new CallOptions;
+        call->SetCancelCallback([call_opts]() { call_opts->StartCancel(); });
+        worker_->GrpcRecvTensorAsync(call_opts, &call->request, &call->response,
+                                     [call, call_opts](const Status& s) {
+                                       call->ClearCancelCallback();
+                                       delete call_opts;
+                                       call->SendResponse(ToGrpcStatus(s));
+                                     });
+      });
+      EnqueueRecvTensorRequestRaw();
+    }
 
-  void LoggingHandler(WorkerCall<LoggingRequest, LoggingResponse>* call) {
-    Schedule([this, call]() {
-      Status s = worker_->Logging(&call->request, &call->response);
-      call->SendResponse(ToGrpcStatus(s));
-    });
-    ENQUEUE_REQUEST(Logging, false);
-  }
+    void CleanupGraphHandler(
+        WorkerCall<CleanupGraphRequest, CleanupGraphResponse>* call) {
+      Schedule([this, call]() {
+        Status s = worker_->CleanupGraph(&call->request, &call->response);
+        call->SendResponse(ToGrpcStatus(s));
+      });
+      ENQUEUE_REQUEST(CleanupGraph, false);
+    }
 
-  void TracingHandler(WorkerCall<TracingRequest, TracingResponse>* call) {
-    Schedule([this, call]() {
-      Status s = worker_->Tracing(&call->request, &call->response);
-      call->SendResponse(ToGrpcStatus(s));
-    });
-    ENQUEUE_REQUEST(Tracing, false);
-  }
+    void LoggingHandler(WorkerCall<LoggingRequest, LoggingResponse>* call) {
+      Schedule([this, call]() {
+        Status s = worker_->Logging(&call->request, &call->response);
+        call->SendResponse(ToGrpcStatus(s));
+      });
+      ENQUEUE_REQUEST(Logging, false);
+    }
+
+    void TracingHandler(WorkerCall<TracingRequest, TracingResponse>* call) {
+      Schedule([this, call]() {
+        Status s = worker_->Tracing(&call->request, &call->response);
+        call->SendResponse(ToGrpcStatus(s));
+      });
+      ENQUEUE_REQUEST(Tracing, false);
+    }
 #undef ENQUEUE_REQUEST
 
-  void EnqueueRecvTensorRequestRaw() {
-    mutex_lock l(shutdown_mu_);
-    if (!is_shutdown_) {
-      Call<GrpcWorkerService, grpc::WorkerService::AsyncService,
-           RecvTensorRequest, ::grpc::ByteBuffer>::
-          EnqueueRequestForMethod(
-              &worker_service_, cq_.get(),
-              static_cast<int>(GrpcWorkerMethod::kRecvTensor),
-              &GrpcWorkerService::RecvTensorHandlerRaw,
-              true /* supports cancel*/);
+    void EnqueueRecvTensorRequestRaw() {
+      mutex_lock l(shutdown_mu_);
+      if (!is_shutdown_) {
+        Call<GrpcWorkerServiceThread, grpc::WorkerService::AsyncService,
+             RecvTensorRequest, ::grpc::ByteBuffer>::
+            EnqueueRequestForMethod(
+                worker_service_, cq_.get(),
+                static_cast<int>(GrpcWorkerMethod::kRecvTensor),
+                &GrpcWorkerServiceThread::RecvTensorHandlerRaw,
+                true /* supports cancel*/);
+      }
     }
-  }
+
+    GrpcWorker* const worker_ = nullptr;  // Not owned.
+    std::unique_ptr<::grpc::ServerCompletionQueue> cq_;
+    std::unique_ptr<Thread> thread_;
+    grpc::WorkerService::AsyncService* const worker_service_;
+
+    mutex shutdown_mu_;
+    bool is_shutdown_ GUARDED_BY(shutdown_mu_);
+    TF_DISALLOW_COPY_AND_ASSIGN(GrpcWorkerServiceThread);
+  };  // GrpcWorkerServiceThread
+
+  grpc::WorkerService::AsyncService worker_service_;
+  std::vector<std::unique_ptr<GrpcWorkerServiceThread>> threads_;
+
+  mutex service_shutdown_mu_;
+  bool is_shutdown_ GUARDED_BY(service_shutdown_mu_);
 
   TF_DISALLOW_COPY_AND_ASSIGN(GrpcWorkerService);
 };
diff --git a/tensorflow/core/distributed_runtime/session_mgr.cc b/tensorflow/core/distributed_runtime/session_mgr.cc
index fabcbd00f5e59a68a8db54c441dcc74377c44617..8db49e7f151517a51de1f64242031a8bd9bd96e6 100644
--- a/tensorflow/core/distributed_runtime/session_mgr.cc
+++ b/tensorflow/core/distributed_runtime/session_mgr.cc
@@ -33,12 +33,13 @@ SessionMgr::SessionMgr(
     WorkerCacheFactory worker_cache_factory)
     : worker_env_(worker_env),
       default_worker_cache_(std::move(default_worker_cache)),
-      legacy_session_("", default_worker_name,
-                      std::unique_ptr<WorkerCacheInterface>(
-                          new WorkerCacheWrapper(default_worker_cache_.get())),
-                      std::unique_ptr<DeviceMgr>(worker_env->device_mgr),
-                      std::unique_ptr<GraphMgr>(
-                          new GraphMgr(worker_env, worker_env->device_mgr))),
+      legacy_session_(new WorkerSession(
+          "", default_worker_name,
+          std::unique_ptr<WorkerCacheInterface>(
+              new WorkerCacheWrapper(default_worker_cache_.get())),
+          std::unique_ptr<DeviceMgr>(worker_env->device_mgr),
+          std::unique_ptr<GraphMgr>(
+              new GraphMgr(worker_env, worker_env->device_mgr)))),
       worker_cache_factory_(std::move(worker_cache_factory)) {}
 
 string SessionMgr::WorkerNameFromServerDef(const ServerDef& server_def) {
@@ -75,7 +76,7 @@ Status SessionMgr::CreateSession(const string& session,
   std::unique_ptr<GraphMgr> graph_mgr(
       new GraphMgr(worker_env_, device_mgr.get()));
 
-  std::unique_ptr<WorkerSession> worker_session(new WorkerSession(
+  std::shared_ptr<WorkerSession> worker_session(new WorkerSession(
       session, worker_name, std::unique_ptr<WorkerCacheInterface>(worker_cache),
       std::move(device_mgr), std::move(graph_mgr)));
 
@@ -92,21 +93,24 @@ Status SessionMgr::DeleteSession(const string& session) {
   return Status::OK();
 }
 
-WorkerSession* SessionMgr::WorkerSessionForSessionUnlocked(
+std::shared_ptr<WorkerSession> SessionMgr::WorkerSessionForSessionUnlocked(
     const string& session) {
   auto it = sessions_.find(session);
   if (it == sessions_.end()) {
-    return &legacy_session_;
+    return legacy_session_;
   } else {
-    return it->second.get();
+    return it->second;
   }
 }
 
-WorkerSession* SessionMgr::WorkerSessionForSession(const string& session) {
+std::shared_ptr<WorkerSession> SessionMgr::WorkerSessionForSession(
+    const string& session) {
   mutex_lock l(mu_);
   return WorkerSessionForSessionUnlocked(session);
 }
 
-WorkerSession* SessionMgr::LegacySession() { return &legacy_session_; }
+std::shared_ptr<WorkerSession> SessionMgr::LegacySession() {
+  return legacy_session_;
+}
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/distributed_runtime/session_mgr.h b/tensorflow/core/distributed_runtime/session_mgr.h
index d85b6c305941014fb52c4b4da6d646a707054c3a..ba077c3accff672f088bb7222858197b43ea4676 100644
--- a/tensorflow/core/distributed_runtime/session_mgr.h
+++ b/tensorflow/core/distributed_runtime/session_mgr.h
@@ -49,8 +49,8 @@ class SessionMgr {
                        bool isolate_session_state);
 
   // Locates the worker session for a given session handle
-  WorkerSession* WorkerSessionForSession(const string& session);
-  WorkerSession* LegacySession();
+  std::shared_ptr<WorkerSession> WorkerSessionForSession(const string& session);
+  std::shared_ptr<WorkerSession> LegacySession();
 
   Status DeleteSession(const string& session);
 
@@ -73,16 +73,16 @@ class SessionMgr {
   // device_mgr is deleted after WorkerSession's graph_mgr.
 
   std::unique_ptr<WorkerCacheInterface> default_worker_cache_;
-  WorkerSession legacy_session_;
+  std::shared_ptr<WorkerSession> legacy_session_;
 
   const WorkerCacheFactory worker_cache_factory_;
 
-  WorkerSession* WorkerSessionForSessionUnlocked(const string& session)
-      EXCLUSIVE_LOCKS_REQUIRED(mu_);
+  std::shared_ptr<WorkerSession> WorkerSessionForSessionUnlocked(
+      const string& session) EXCLUSIVE_LOCKS_REQUIRED(mu_);
 
   mutex mu_;
   // A map from session identifier to internal session structure.
-  std::map<string, std::unique_ptr<WorkerSession>> sessions_ GUARDED_BY(mu_);
+  std::map<string, std::shared_ptr<WorkerSession>> sessions_ GUARDED_BY(mu_);
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/distributed_runtime/session_mgr_test.cc b/tensorflow/core/distributed_runtime/session_mgr_test.cc
index ffe4809f2b10398ca4c7dc503dd82236cbc8dd18..4d028f7f4a9e5eea7cd52b67ac41b03de3f0078f 100644
--- a/tensorflow/core/distributed_runtime/session_mgr_test.cc
+++ b/tensorflow/core/distributed_runtime/session_mgr_test.cc
@@ -59,7 +59,7 @@ class SessionMgrTest : public ::testing::Test {
         return Status::OK();
       };
   SessionMgr mgr_;
-  WorkerSession* legacy_session_;
+  std::shared_ptr<WorkerSession> legacy_session_;
 };
 
 TEST_F(SessionMgrTest, CreateSessionSimple) {
@@ -69,7 +69,7 @@ TEST_F(SessionMgrTest, CreateSessionSimple) {
 
   string session_handle = "test_session_handle";
   TF_EXPECT_OK(mgr_.CreateSession(session_handle, server_def, true));
-  WorkerSession* session = mgr_.WorkerSessionForSession(session_handle);
+  auto session = mgr_.WorkerSessionForSession(session_handle);
   EXPECT_NE(nullptr, session) << "Session for " << session_handle << "was null";
   EXPECT_NE(mgr_.LegacySession(), session);
   TF_EXPECT_OK(mgr_.DeleteSession(session_handle));
@@ -81,22 +81,22 @@ TEST_F(SessionMgrTest, CreateSessionIsolateSessionState) {
   server_def.set_task_index(3);
 
   TF_EXPECT_OK(mgr_.CreateSession("handle_1", server_def, false));
-  WorkerSession* session_1 = mgr_.WorkerSessionForSession("handle_1");
+  auto session_1 = mgr_.WorkerSessionForSession("handle_1");
   std::vector<Device*> devices_1 = session_1->device_mgr->ListDevices();
   EXPECT_EQ(1, devices_1.size());
 
   TF_EXPECT_OK(mgr_.CreateSession("handle_2", server_def, false));
-  WorkerSession* session_2 = mgr_.WorkerSessionForSession("handle_2");
+  auto session_2 = mgr_.WorkerSessionForSession("handle_2");
   std::vector<Device*> devices_2 = session_2->device_mgr->ListDevices();
   EXPECT_EQ(1, devices_2.size());
 
   TF_EXPECT_OK(mgr_.CreateSession("handle_3", server_def, true));
-  WorkerSession* session_3 = mgr_.WorkerSessionForSession("handle_3");
+  auto session_3 = mgr_.WorkerSessionForSession("handle_3");
   std::vector<Device*> devices_3 = session_3->device_mgr->ListDevices();
   EXPECT_EQ(1, devices_3.size());
 
   TF_EXPECT_OK(mgr_.CreateSession("handle_4", server_def, true));
-  WorkerSession* session_4 = mgr_.WorkerSessionForSession("handle_4");
+  auto session_4 = mgr_.WorkerSessionForSession("handle_4");
   std::vector<Device*> devices_4 = session_4->device_mgr->ListDevices();
   EXPECT_EQ(1, devices_4.size());
 
@@ -109,7 +109,7 @@ TEST_F(SessionMgrTest, CreateSessionIsolateSessionState) {
 TEST_F(SessionMgrTest, LegacySession) {
   ServerDef server_def;
   string session_handle = "";
-  WorkerSession* session = mgr_.WorkerSessionForSession(session_handle);
+  auto session = mgr_.WorkerSessionForSession(session_handle);
   EXPECT_EQ(mgr_.LegacySession(), session);
 
   TF_EXPECT_OK(mgr_.DeleteSession(session_handle));
diff --git a/tensorflow/core/distributed_runtime/worker.cc b/tensorflow/core/distributed_runtime/worker.cc
index 6cd92f5fe7a9edaef1ed7db0926281d1a91cdcf2..63455493671fcd1f4282bc804f8f2a521c056dce 100644
--- a/tensorflow/core/distributed_runtime/worker.cc
+++ b/tensorflow/core/distributed_runtime/worker.cc
@@ -59,7 +59,7 @@ void Worker::DeleteWorkerSessionAsync(const DeleteWorkerSessionRequest* request,
 void Worker::RegisterGraphAsync(const RegisterGraphRequest* request,
                                 RegisterGraphResponse* response,
                                 StatusCallback done) {
-  WorkerSession* session =
+  auto session =
       env_->session_mgr->WorkerSessionForSession(request->session_handle());
   Status s = session->graph_mgr->Register(
       request->session_handle(), request->graph_def(), request->graph_options(),
@@ -71,7 +71,7 @@ void Worker::RegisterGraphAsync(const RegisterGraphRequest* request,
 void Worker::DeregisterGraphAsync(const DeregisterGraphRequest* request,
                                   DeregisterGraphResponse* response,
                                   StatusCallback done) {
-  WorkerSession* session =
+  auto session =
       env_->session_mgr->WorkerSessionForSession(request->session_handle());
   Status s = session->graph_mgr->Deregister(request->graph_handle());
 
@@ -109,6 +109,12 @@ Status Worker::PrepareRunGraph(RunGraphRequestWrapper* req,
 void Worker::RunGraphAsync(CallOptions* opts, RunGraphRequestWrapper* request,
                            MutableRunGraphResponseWrapper* response,
                            StatusCallback done) {
+  if (request->store_errors_in_response_body()) {
+    done = [response, done](const Status& status) {
+      response->set_status(status);
+      done(Status::OK());
+    };
+  }
   if (request->is_partial()) {
     DoPartialRunGraph(opts, request, response, std::move(done));
   } else {
@@ -129,7 +135,7 @@ void Worker::DoRunGraph(CallOptions* opts, RunGraphRequestWrapper* request,
                         StatusCallback done) {
   const int64 step_id = request->step_id();
   TRACEPRINTF("RunGraph: %lld", step_id);
-  WorkerSession* session =
+  auto session =
       env_->session_mgr->WorkerSessionForSession(request->session_handle());
   GraphMgr::NamedTensors in;
   GraphMgr::NamedTensors* out = new GraphMgr::NamedTensors;
@@ -167,7 +173,7 @@ void Worker::DoRunGraph(CallOptions* opts, RunGraphRequestWrapper* request,
     }
   }
   session->graph_mgr->ExecuteAsync(
-      request->graph_handle(), step_id, session, request->exec_opts(),
+      request->graph_handle(), step_id, session.get(), request->exec_opts(),
       collector, response, cm, in,
       [this, step_id, response, session, cm, out, token, collector, opts,
        done](Status s) {
@@ -203,7 +209,7 @@ void Worker::DoPartialRunGraph(CallOptions* opts,
   const int64 step_id = request->step_id();
   const string& graph_handle = request->graph_handle();
   TRACEPRINTF("PartialRunGraph: %lld", step_id);
-  WorkerSession* session =
+  auto session =
       env_->session_mgr->WorkerSessionForSession(request->session_handle());
 
   GraphMgr::NamedTensors in;
@@ -239,9 +245,9 @@ void Worker::DoPartialRunGraph(CallOptions* opts,
                                               [cm]() { cm->StartCancel(); });
     }
     session->graph_mgr->ExecuteAsync(
-        graph_handle, step_id, session, request->exec_opts(),
+        graph_handle, step_id, session.get(), request->exec_opts(),
         nullptr /* collector */, nullptr /* response */, cm, in,
-        [this, token, step_id, cm](Status s) {
+        [this, token, step_id, session, cm](Status s) {
           {
             mutex_lock l(mu_);
             cancellation_manager_->DeregisterCallback(token);
diff --git a/tensorflow/core/example/feature_util.h b/tensorflow/core/example/feature_util.h
index a87c2c9a57c7c80692359dc88be3aca2ce7779b6..4e9352ee32227376957157c7ada63390689ac39a 100644
--- a/tensorflow/core/example/feature_util.h
+++ b/tensorflow/core/example/feature_util.h
@@ -33,7 +33,7 @@ limitations under the License.
 //   GetFeatureValues<int64>("tag", &example)->Add(id);
 //
 // Modification of bytes features is slightly different:
-//   auto tag = GetFeatureValues<string>("tag", example);
+//   auto tag = GetFeatureValues<string>("tag", &example);
 //   *tag->Add() = "lorem ipsum";
 //
 // To copy multiple values into a feature:
diff --git a/tensorflow/core/framework/allocator.cc b/tensorflow/core/framework/allocator.cc
index f5dadf76daf8d351e509c4ae538b31abf00d9566..2bd19663fc6f45aeae857a5bdd69bf41d5a94bd4 100644
--- a/tensorflow/core/framework/allocator.cc
+++ b/tensorflow/core/framework/allocator.cc
@@ -106,6 +106,13 @@ class CPUAllocator : public Allocator {
     *stats = stats_;
   }
 
+  void ClearStats() override {
+    mutex_lock l(mu_);
+    stats_.num_allocs = 0;
+    stats_.max_bytes_in_use = stats_.bytes_in_use;
+    stats_.max_alloc_size = 0;
+  }
+
   size_t AllocatedSizeSlow(void* ptr) override {
     return port::MallocExtension_GetAllocatedSize(ptr);
   }
diff --git a/tensorflow/core/framework/allocator.h b/tensorflow/core/framework/allocator.h
index 5e048a028d2dd9bf60722c3bab6a81330a16d2d8..5a95d3a15d1699e518e16cd300bccfb7a40ab50f 100644
--- a/tensorflow/core/framework/allocator.h
+++ b/tensorflow/core/framework/allocator.h
@@ -198,6 +198,9 @@ class Allocator {
   // Fills in 'stats' with statistics collected by this allocator.
   virtual void GetStats(AllocatorStats* stats) { stats->Clear(); }
 
+  // Clears the internal stats except for the `in_use` field.
+  virtual void ClearStats() {}
+
  private:
   // No constructors or destructors are run for simple types
   template <typename T>
diff --git a/tensorflow/core/framework/allocator_test.cc b/tensorflow/core/framework/allocator_test.cc
index 032aeec161bb6978cb942747d3e0f8cff12f8853..a409cb2de7fbae20f435f464ca07155a36fede4a 100644
--- a/tensorflow/core/framework/allocator_test.cc
+++ b/tensorflow/core/framework/allocator_test.cc
@@ -110,6 +110,8 @@ TEST(CPUAllocatorTest, Simple) {
 
   CheckStats(a, 1025, 0, 1048576 * sizeof(double) + 1024 * sizeof(float),
              1048576 * sizeof(double));
+  a->ClearStats();
+  CheckStats(a, 0, 0, 0, 0);
   EnableCPUAllocatorStats(false);
 }
 
diff --git a/tensorflow/core/framework/attr_value_util.cc b/tensorflow/core/framework/attr_value_util.cc
index 5aba091840ed0cd32bf85980c7d12dc74e7f3fd9..a1c39d2a7a78354239f2cdbb718160906b233ddd 100644
--- a/tensorflow/core/framework/attr_value_util.cc
+++ b/tensorflow/core/framework/attr_value_util.cc
@@ -33,7 +33,19 @@ namespace tensorflow {
 namespace {
 
 string SummarizeString(const string& str) {
-  return strings::StrCat("\"", str_util::CEscape(str), "\"");
+  string escaped = str_util::CEscape(str);
+
+  // If the string is long, replace the middle with ellipses.
+  constexpr int kMaxStringSummarySize = 80;
+  if (escaped.size() >= kMaxStringSummarySize) {
+    StringPiece prefix(escaped);
+    StringPiece suffix = prefix;
+    prefix.remove_suffix(escaped.size() - 10);
+    suffix.remove_prefix(escaped.size() - 10);
+    return strings::StrCat("\"", prefix, "...", suffix, "\"");
+  } else {
+    return strings::StrCat("\"", escaped, "\"");
+  }
 }
 
 string SummarizeTensor(const TensorProto& tensor_proto) {
@@ -74,54 +86,47 @@ string SummarizeAttrValue(const AttrValue& attr_value) {
     case AttrValue::kTensor:
       return SummarizeTensor(attr_value.tensor());
     case AttrValue::kList: {
-      string ret = "[";
+      std::vector<string> pieces;
       if (attr_value.list().s_size() > 0) {
         for (int i = 0; i < attr_value.list().s_size(); ++i) {
-          if (i > 0) strings::StrAppend(&ret, ", ");
-          strings::StrAppend(&ret, SummarizeString(attr_value.list().s(i)));
+          pieces.push_back(SummarizeString(attr_value.list().s(i)));
         }
       } else if (attr_value.list().i_size() > 0) {
         for (int i = 0; i < attr_value.list().i_size(); ++i) {
-          if (i > 0) strings::StrAppend(&ret, ", ");
-          strings::StrAppend(&ret, attr_value.list().i(i));
+          pieces.push_back(strings::StrCat(attr_value.list().i(i)));
         }
       } else if (attr_value.list().f_size() > 0) {
         for (int i = 0; i < attr_value.list().f_size(); ++i) {
-          if (i > 0) strings::StrAppend(&ret, ", ");
-          strings::StrAppend(&ret, attr_value.list().f(i));
+          pieces.push_back(strings::StrCat(attr_value.list().f(i)));
         }
       } else if (attr_value.list().b_size() > 0) {
         for (int i = 0; i < attr_value.list().b_size(); ++i) {
-          if (i > 0) strings::StrAppend(&ret, ", ");
-          strings::StrAppend(&ret, attr_value.list().b(i) ? "true" : "false");
+          pieces.push_back(attr_value.list().b(i) ? "true" : "false");
         }
       } else if (attr_value.list().type_size() > 0) {
         for (int i = 0; i < attr_value.list().type_size(); ++i) {
-          if (i > 0) strings::StrAppend(&ret, ", ");
-          strings::StrAppend(&ret,
-                             EnumName_DataType(attr_value.list().type(i)));
+          pieces.push_back(EnumName_DataType(attr_value.list().type(i)));
         }
       } else if (attr_value.list().shape_size() > 0) {
         for (int i = 0; i < attr_value.list().shape_size(); ++i) {
-          if (i > 0) strings::StrAppend(&ret, ", ");
-          strings::StrAppend(
-              &ret, TensorShape::DebugString(attr_value.list().shape(i)));
+          pieces.push_back(
+              TensorShape::DebugString(attr_value.list().shape(i)));
         }
       } else if (attr_value.list().tensor_size() > 0) {
         for (int i = 0; i < attr_value.list().tensor_size(); ++i) {
-          if (i > 0) strings::StrAppend(&ret, ", ");
-          strings::StrAppend(&ret,
-                             SummarizeTensor(attr_value.list().tensor(i)));
+          pieces.push_back(SummarizeTensor(attr_value.list().tensor(i)));
         }
       } else if (attr_value.list().func_size() > 0) {
         for (int i = 0; i < attr_value.list().func_size(); ++i) {
-          if (i > 0) strings::StrAppend(&ret, ", ");
-          strings::StrAppend(&ret, SummarizeFunc(attr_value.list().func(i)));
+          pieces.push_back(SummarizeFunc(attr_value.list().func(i)));
         }
       }
-
-      strings::StrAppend(&ret, "]");
-      return ret;
+      constexpr int kMaxListSummarySize = 15;
+      if (pieces.size() >= kMaxListSummarySize) {
+        pieces.erase(pieces.begin() + 5, pieces.begin() + (pieces.size() - 6));
+        pieces[5] = "...";
+      }
+      return strings::StrCat("[", str_util::Join(pieces, ", "), "]");
     }
     case AttrValue::kFunc: {
       return SummarizeFunc(attr_value.func());
diff --git a/tensorflow/core/framework/attr_value_util_test.cc b/tensorflow/core/framework/attr_value_util_test.cc
index 1c9a209f05bcab1a0b4304aaddb2d0421e4df45f..e4fad917ffe1d4a0790bf1fd56e3c72f841523d8 100644
--- a/tensorflow/core/framework/attr_value_util_test.cc
+++ b/tensorflow/core/framework/attr_value_util_test.cc
@@ -135,6 +135,38 @@ TEST(AttrValueUtil, DeepAttr) {
             "f[F=f[F=f[F=[f[T=x[]], g[T=x[]]], T=x[]], T=x[]], T=x[]]");
 }
 
+TEST(AttrValueUtil, SummarizeAttrValueDoesNotElideShortStrings) {
+  AttrValue attr_value;
+  SetAttrValue(string(40, '-'), &attr_value);
+  EXPECT_EQ(strings::StrCat("\"", string(40, '-'), "\""),
+            SummarizeAttrValue(attr_value));
+}
+
+TEST(AttrValueUtil, SummarizeAttrValueElidesLongStrings) {
+  AttrValue attr_value;
+  SetAttrValue(string(80, '-'), &attr_value);
+  EXPECT_EQ("\"----------...----------\"", SummarizeAttrValue(attr_value));
+}
+
+TEST(AttrValueUtil, SummarizeAttrValueDoesNotElideShortLists) {
+  std::vector<int> alist(10);
+  std::iota(alist.begin(), alist.end(), 0);
+
+  AttrValue attr_value;
+  SetAttrValue(alist, &attr_value);
+  EXPECT_EQ("[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", SummarizeAttrValue(attr_value));
+}
+
+TEST(AttrValueUtil, SummarizeAttrValueElidesLongLists) {
+  std::vector<int> alist(30);
+  std::iota(alist.begin(), alist.end(), 0);
+
+  AttrValue attr_value;
+  SetAttrValue(alist, &attr_value);
+  EXPECT_EQ("[0, 1, 2, 3, 4, ..., 25, 26, 27, 28, 29]",
+            SummarizeAttrValue(attr_value));
+}
+
 AttrValue FromText(const string& text) {
   AttrValue attr;
   EXPECT_TRUE(protobuf::TextFormat::MergeFromString(text, &attr));
diff --git a/tensorflow/core/framework/bfloat16_test.cc b/tensorflow/core/framework/bfloat16_test.cc
index 6e4533875160120229877664cff7429cfaf71d43..17e6209f8e5ad5240dfc8ca1def75c178da45c27 100644
--- a/tensorflow/core/framework/bfloat16_test.cc
+++ b/tensorflow/core/framework/bfloat16_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/bfloat16.h"
 
+#include "tensorflow/core/framework/numeric_types.h"
 #include "tensorflow/core/lib/core/casts.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/test_benchmark.h"
@@ -104,6 +105,17 @@ TEST(Bfloat16Test, Conversion) {
   }
 }
 
+TEST(Bfloat16Test, Epsilon) {
+  EXPECT_LT(1.0f, static_cast<float>(bfloat16::epsilon() + bfloat16(1.0f)));
+  EXPECT_EQ(1.0f, static_cast<float>((bfloat16::epsilon() / bfloat16(2.0f)) +
+                                     bfloat16(1.0f)));
+}
+
+TEST(Bfloat16Test, Negate) {
+  EXPECT_EQ(-3.0f, static_cast<float>(-bfloat16(3.0f)));
+  EXPECT_EQ(4.5f, static_cast<float>(-bfloat16(-4.5f)));
+}
+
 static void BM_FloatToBFloat16(int iters) {
   testing::StopTiming();
   static const int N = 32 << 20;
diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc
index ea66863bed3f3c9d91587a64370f635766d0794d..7ab8e3ec188a223e35b47b6f9517abd9327b23f8 100644
--- a/tensorflow/core/framework/common_shape_fns.cc
+++ b/tensorflow/core/framework/common_shape_fns.cc
@@ -397,6 +397,15 @@ Status Conv2DShape(shape_inference::InferenceContext* c) {
   TF_RETURN_IF_ERROR(
       CheckFormatConstraintsOnShape(data_format, filter_shape, "filter", c));
 
+  std::vector<int32> dilations;
+  TF_RETURN_IF_ERROR(c->GetAttr("dilations", &dilations));
+
+  if (dilations.size() != 4) {
+    return errors::InvalidArgument(
+        "Conv2D requires the dilation attribute to contain 4 values, but got: ",
+        dilations.size());
+  }
+
   std::vector<int32> strides;
   TF_RETURN_IF_ERROR(c->GetAttr("strides", &strides));
 
@@ -410,6 +419,8 @@ Status Conv2DShape(shape_inference::InferenceContext* c) {
 
   const int32 stride_rows = GetTensorDim(strides, data_format, 'H');
   const int32 stride_cols = GetTensorDim(strides, data_format, 'W');
+  const int32 dilation_rows = GetTensorDim(dilations, data_format, 'H');
+  const int32 dilation_cols = GetTensorDim(dilations, data_format, 'W');
 
   DimensionHandle batch_size_dim;
   DimensionHandle input_depth_dim;
@@ -447,12 +458,12 @@ Status Conv2DShape(shape_inference::InferenceContext* c) {
   TF_RETURN_IF_ERROR(c->GetAttr("padding", &padding));
 
   DimensionHandle output_rows, output_cols;
-  TF_RETURN_IF_ERROR(GetWindowedOutputSizeFromDims(c, input_spatial_dims[0],
-                                                   filter_rows_dim, stride_rows,
-                                                   padding, &output_rows));
-  TF_RETURN_IF_ERROR(GetWindowedOutputSizeFromDims(c, input_spatial_dims[1],
-                                                   filter_cols_dim, stride_cols,
-                                                   padding, &output_cols));
+  TF_RETURN_IF_ERROR(GetWindowedOutputSizeFromDimsV2(
+      c, input_spatial_dims[0], filter_rows_dim, dilation_rows, stride_rows,
+      padding, &output_rows));
+  TF_RETURN_IF_ERROR(GetWindowedOutputSizeFromDimsV2(
+      c, input_spatial_dims[1], filter_cols_dim, dilation_cols, stride_cols,
+      padding, &output_cols));
 
   ShapeHandle output_shape;
   TF_RETURN_IF_ERROR(
@@ -1114,16 +1125,20 @@ Status ConcatShapeHelper(InferenceContext* c, int start_value_index,
     for (int i = start_value_index; i < end_value_index; ++i) {
       if (rank == InferenceContext::kUnknownRank) rank = c->Rank(c->input(i));
       if (rank != InferenceContext::kUnknownRank) {
-        TF_RETURN_IF_ERROR(c->WithRank(c->input(i), rank, &unused));
+        break;
       }
     }
     if (rank == InferenceContext::kUnknownRank) {
       c->set_output(0, c->UnknownShape());
       return Status::OK();
-    }
-    if (rank == 0) {
+    } else if (rank == 0) {
       return errors::InvalidArgument(
           "Can't concatenate scalars (use tf.stack instead)");
+    } else {
+      for (int i = start_value_index; i < end_value_index; ++i) {
+        // Check that all the inputs are of the correct rank.
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(i), rank, &unused));
+      }
     }
     // Build result of <rank> different unknown dims.
     std::vector<DimensionHandle> dims;
@@ -1307,6 +1322,9 @@ Status ValidateSparseTensor(InferenceContext* c, ShapeHandle indices_shape,
 
 Status ScatterNdUpdateShape(InferenceContext* c) {
   ShapeHandle input_shape = c->input(0);
+  if (c->input_handle_shapes_and_types(0) != nullptr) {
+    input_shape = (*c->input_handle_shapes_and_types(0))[0].shape;
+  }
   ShapeHandle indices_shape;
   TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(1), 1, &indices_shape));
   ShapeHandle updates_shape;
@@ -1361,7 +1379,9 @@ Status ScatterNdUpdateShape(InferenceContext* c) {
     }
   }
 
-  c->set_output(0, input_shape);
+  if (c->input_handle_shapes_and_types(0) == nullptr) {
+    c->set_output(0, input_shape);
+  }
   return Status::OK();
 }
 
diff --git a/tensorflow/core/framework/common_shape_fns_test.cc b/tensorflow/core/framework/common_shape_fns_test.cc
index ec9746b2af1ed0da348fbe7459c5d93d842b25d9..5f3e5ad45731750bfd73181c41cd029f23aab55f 100644
--- a/tensorflow/core/framework/common_shape_fns_test.cc
+++ b/tensorflow/core/framework/common_shape_fns_test.cc
@@ -423,6 +423,15 @@ TEST(CommonShapeFnsTest, Conv2DShapeTest) {
                     .Finalize(&op.node_def));
   };
 
+  // Invalid rank for input
+  INFER_ERROR("must be rank 4", op, "[4,4];[2,1,1,1]");
+  // Invalid rank for filter
+  INFER_ERROR("must be rank 4", op, "[1,4,4,1];[2,1,1]");
+
+  // Invalid value for strides
+  set_op({{1, 1, 0, 1}}, "VALID", "NHWC", "HWIO");
+  INFER_ERROR("must be > 0", op, "[1,2,2,1];[1,1,1,1]");
+
   // 1x1 filter
   set_op({{1, 1, 1, 1}}, "VALID", "NHWC", "HWIO");
   INFER_OK(op, "[1,2,2,1];[1,1,1,1]", "[d0_0,2,2,d1_3]");
@@ -443,11 +452,6 @@ TEST(CommonShapeFnsTest, Conv2DShapeTest) {
   set_op({{1, 1, 2, 1}}, "VALID", "NHWC", "HWIO");
   INFER_OK(op, "[1,4,4,1];[2,1,1,1]", "[d0_0,3,2,d1_3]");
 
-  // Invalid rank for input
-  INFER_ERROR("must be rank 4", op, "[4,4];[2,1,1,1]");
-  // Invalid rank for filter
-  INFER_ERROR("must be rank 4", op, "[1,4,4,1];[2,1,1]");
-
   // Unknown dims in the critical fields lead to partial inference.
   INFER_OK(op, "[1,4,4,1];[2,1,1,1]", "[d0_0,3,2,d1_3]");
   INFER_OK(op, "[1,?,4,1];[2,1,1,1]", "[d0_0,?,2,d1_3]");
@@ -538,6 +542,98 @@ TEST(CommonShapeFnsTest, Conv2DShapeTest) {
   INFER_OK(op, "[1,4,4,?];[?,?,?,?]", "[d0_0,2,2,d1_3]");
 }
 
+TEST(CommonShapeFnsTest, Conv2DDilatedShapeTest) {
+  ShapeInferenceTestOp op("Conv2D");
+  auto set_op = [&op](const std::vector<int32>& dilations,
+                      const std::vector<int32>& strides, const string& padding,
+                      const string& data_format) {
+    TF_CHECK_OK(NodeDefBuilder("test", "Conv2D")
+                    .Input("input", 0, DT_FLOAT)
+                    .Input("filter", 0, DT_FLOAT)
+                    .Attr("dilations", dilations)
+                    .Attr("strides", strides)
+                    .Attr("padding", padding)
+                    .Attr("data_format", data_format)
+                    .Finalize(&op.node_def));
+  };
+
+  // Invalid rank for dilation
+  set_op({{1, 2, 1}}, {{1, 1, 1, 1}}, "VALID", "NHWC");
+  INFER_ERROR("contain 4 values", op, "[1,2,2,1];[1,1,1,1]");
+
+  // Invalid value for dilation
+  set_op({{1, 0, 1, 1}}, {{1, 1, 1, 1}}, "VALID", "NHWC");
+  INFER_ERROR("must be >= 1", op, "[1,2,2,1];[1,1,1,1]");
+
+  // Tests for NHWC
+  // 1x1 filter, 2x1 dilations, 1x1 strides
+  set_op({{1, 2, 1, 1}}, {{1, 1, 1, 1}}, "VALID", "NHWC");
+  INFER_OK(op, "[1,2,2,1];[1,1,1,1]", "[d0_0,2,2,d1_3]");
+
+  // 1x1 filter, 2x1 dilations, 2x1 strides
+  set_op({{1, 2, 1, 1}}, {{1, 2, 1, 1}}, "VALID", "NHWC");
+  INFER_OK(op, "[1,4,4,1];[1,1,1,1]", "[d0_0,2,4,d1_3]");
+
+  // 1x1 filter, 2x1 dilations, 2x2 strides
+  set_op({{1, 2, 1, 1}}, {{1, 2, 2, 1}}, "VALID", "NHWC");
+  INFER_OK(op, "[1,4,4,1];[1,1,1,1]", "[d0_0,2,2,d1_3]");
+
+  // 3x3 filter, 2x1 dilations, 1x1 strides
+  set_op({{1, 2, 1, 1}}, {{1, 1, 1, 1}}, "VALID", "NHWC");
+  INFER_OK(op, "[1,5,5,1];[3,3,1,1]", "[d0_0,1,3,d1_3]");
+
+  // 3x3 filter, 2x1 dilations, 2x1 strides
+  set_op({{1, 2, 1, 1}}, {{1, 2, 1, 1}}, "VALID", "NHWC");
+  INFER_OK(op, "[1,5,5,1];[3,3,1,1]", "[d0_0,1,3,d1_3]");
+
+  // 3x3 filter, 1x2 dilations, 2x2 strides
+  set_op({{1, 1, 2, 1}}, {{1, 2, 2, 1}}, "VALID", "NHWC");
+  INFER_OK(op, "[1,5,5,1];[3,3,1,1]", "[d0_0,2,1,d1_3]");
+
+  // Tests for NCHW
+  // 1x1 filter, 2x1 dilations, 1x1 strides
+  set_op({{1, 1, 2, 1}}, {{1, 1, 1, 1}}, "VALID", "NCHW");
+  INFER_OK(op, "[1,1,2,2];[1,1,1,1]", "[d0_0,d1_3,2,2]");
+
+  // 1x1 filter, 2x1 dilations, 2x1 strides
+  set_op({{1, 1, 2, 1}}, {{1, 1, 2, 1}}, "VALID", "NCHW");
+  INFER_OK(op, "[1,1,4,4];[1,1,1,1]", "[d0_0,d1_3,2,4]");
+
+  // 1x1 filter, 2x1 dilations, 2x2 strides
+  set_op({{1, 1, 2, 1}}, {{1, 1, 2, 2}}, "VALID", "NCHW");
+  INFER_OK(op, "[1,1,4,4];[1,1,1,1]", "[d0_0,d1_3,2,2]");
+
+  // 3x3 filter, 2x1 dilations, 1x1 strides
+  set_op({{1, 1, 2, 1}}, {{1, 1, 1, 1}}, "VALID", "NCHW");
+  INFER_OK(op, "[1,1,5,5];[3,3,1,1]", "[d0_0,d1_3,1,3]");
+
+  // 3x3 filter, 2x1 dilations, 2x1 strides
+  set_op({{1, 1, 2, 1}}, {{1, 1, 2, 1}}, "VALID", "NCHW");
+  INFER_OK(op, "[1,1,5,5];[3,3,1,1]", "[d0_0,d1_3,1,3]");
+
+  // 3x3 filter, 1x2 dilations, 2x2 strides
+  set_op({{1, 1, 1, 2}}, {{1, 1, 2, 2}}, "VALID", "NCHW");
+  INFER_OK(op, "[1,1,5,5];[3,3,1,1]", "[d0_0,d1_3,2,1]");
+
+  // Some tests for "SAME" padding
+
+  // 4x4 input, 1x1 filter, 2x1 dilations, 1x1 stride
+  set_op({{1, 2, 1, 1}}, {{1, 1, 1, 1}}, "SAME", "NHWC");
+  INFER_OK(op, "[1,4,4,1];[1,1,1,1]", "[d0_0,d0_1,d0_2,d1_3]");
+
+  // 3x3 input, 2x2 filter, 2x2 dilations, 1x1 stride
+  set_op({{1, 2, 2, 1}}, {{1, 1, 1, 1}}, "SAME", "NHWC");
+  INFER_OK(op, "[1,3,3,1];[2,2,1,1]", "[d0_0,d0_1,d0_2,d1_3]");
+
+  // 4x4 input, 2x2 filter, 1x2 dilations, 2x2 stride
+  set_op({{1, 1, 2, 1}}, {{1, 2, 2, 1}}, "SAME", "NHWC");
+  INFER_OK(op, "[1,4,4,1];[2,2,1,1]", "[d0_0,2,2,d1_3]");
+
+  // 4x4 input, 2x2 filter, 2x2 dilations, 1x1 stride
+  set_op({{1, 2, 2, 1}}, {{1, 1, 1, 1}}, "SAME", "NHWC");
+  INFER_OK(op, "[1,4,4,1];[2,2,1,1]", "[d0_0,d0_1,d0_2,d1_3]");
+}
+
 TEST(CommonShapeFnsTest, Conv3DShapeTest) {
   ShapeInferenceTestOp op("Conv3D");
   auto set_op = [&op](const std::vector<int32>& strides,
diff --git a/tensorflow/core/framework/cost_graph.proto b/tensorflow/core/framework/cost_graph.proto
index f4837fbfc55dc266bad01c9300e3a8b63c67f1e0..7885b0171a55a408878a127eb1259b65fb9466ea 100644
--- a/tensorflow/core/framework/cost_graph.proto
+++ b/tensorflow/core/framework/cost_graph.proto
@@ -45,10 +45,12 @@ message CostGraphDef {
     // Temporary memory used by this node.
     int64 temporary_memory_size = 6;
 
-    int64 host_temp_memory_size = 10;
-    int64 device_temp_memory_size = 11;
-    int64 host_persistent_memory_size = 12;
-    int64 device_persistent_memory_size = 16;
+    // Persistent memory used by this node.
+    int64 persistent_memory_size = 12;
+
+    int64 host_temp_memory_size = 10 [deprecated = true];
+    int64 device_temp_memory_size = 11 [deprecated = true];
+    int64 device_persistent_memory_size = 16 [deprecated = true];
 
     // Estimate of the computational cost of this node, in microseconds.
     int64 compute_cost = 9;
diff --git a/tensorflow/core/framework/device_base.h b/tensorflow/core/framework/device_base.h
index 33bd5d250cd6b5df8c933e3f353efd9a1eee592c..1838a8ad02d2bd5522ce3162fea53e3f5afc0309 100644
--- a/tensorflow/core/framework/device_base.h
+++ b/tensorflow/core/framework/device_base.h
@@ -145,6 +145,12 @@ class DeviceBase {
     return gpu_device_info_;
   }
 
+  // The preferred thread pool for this device. If it is nullptr, the system
+  // automatically assigns a thread pool for execution.
+  virtual thread::ThreadPool* tensorflow_device_thread_pool() {
+    return device_thread_pool_;
+  }
+
   // Does not take ownership.
   void set_eigen_cpu_device(Eigen::ThreadPoolDevice* d) {
     eigen_cpu_device_ = d;
@@ -215,10 +221,17 @@ class DeviceBase {
     return errors::Internal("Device does not implement MakeTensorFromProto()");
   }
 
+ protected:
+  // Does not take ownership.
+  void set_tensorflow_device_thread_pool(thread::ThreadPool* thread_pool) {
+    device_thread_pool_ = thread_pool;
+  }
+
  private:
   Env* const env_;
   CpuWorkerThreads* cpu_worker_threads_ = nullptr;
   GpuDeviceInfo* gpu_device_info_ = nullptr;
+  thread::ThreadPool* device_thread_pool_ = nullptr;
   Eigen::ThreadPoolDevice* eigen_cpu_device_ = nullptr;
 #ifdef TENSORFLOW_USE_SYCL
   Eigen::SyclDevice* eigen_sycl_device_ = nullptr;
diff --git a/tensorflow/core/framework/function.cc b/tensorflow/core/framework/function.cc
index d757e962e522f801243a35a362f0c6821814d948..0224f252270cdfb856957be33b3dd857ecb07ec9 100644
--- a/tensorflow/core/framework/function.cc
+++ b/tensorflow/core/framework/function.cc
@@ -795,12 +795,25 @@ uint64 FunctionDefHash(const FunctionDef& fdef) {
   return h;
 }
 
-string Canonicalize(const string& funcname, AttrSlice attrs) {
+string Canonicalize(const string& funcname, AttrSlice attrs,
+                    const FunctionLibraryRuntime::InstantiateOptions& options) {
   std::vector<string> entries;
-  entries.reserve(attrs.size());
+  entries.reserve(options.target.empty() ? attrs.size() : (attrs.size() + 1));
   for (auto p : attrs) {
     entries.push_back(strings::StrCat(p.first, "=", Print(p.second)));
   }
+  if (!options.target.empty()) {
+    entries.push_back(
+        strings::StrCat("_target", "=", str_util::CEscape(options.target)));
+  }
+  if (options.overlay_lib) {
+    entries.push_back(strings::StrCat(
+        "_overlay_lib", "=", reinterpret_cast<uintptr_t>(options.overlay_lib)));
+  }
+  if (!options.state_handle.empty()) {
+    entries.push_back(
+        strings::StrCat("_state_handle", "=", options.state_handle));
+  }
   std::sort(entries.begin(), entries.end());
   return strings::StrCat(funcname, "[", str_util::Join(entries, ","), "]");
 }
diff --git a/tensorflow/core/framework/function.h b/tensorflow/core/framework/function.h
index 305b140a446171ddc4b249c97967057aa3e00152..3bb5638cdf232c144157b587a7431f435e2fa6ea 100644
--- a/tensorflow/core/framework/function.h
+++ b/tensorflow/core/framework/function.h
@@ -234,14 +234,16 @@ bool FunctionDefsEqual(const FunctionDef& f1, const FunctionDef& f2);
 // same.
 uint64 FunctionDefHash(const FunctionDef& fdef);
 
-// Returns a canonicalized string for the instantiation of the
-// function of the given "name" and attributes "attrs".
-//
-// The returned string is guaranteed to be stable within one address
-// space. But it may be change as the implementation
-// evolves. Therefore, it should not be persisted or compared across
-// address spaces.
-string Canonicalize(const string& funcname, AttrSlice attrs);
+class CallFrameInterface {
+ public:
+  virtual ~CallFrameInterface() {}
+
+  virtual size_t num_args() const = 0;
+  virtual size_t num_retvals() const = 0;
+
+  virtual Status GetArg(int index, Tensor* val) const = 0;
+  virtual Status SetRetval(int index, const Tensor& val) = 0;
+};
 
 // Represents a function call frame. I.e., the data structure used to
 // pass arguments to a function and retrieve its results.
@@ -249,7 +251,7 @@ string Canonicalize(const string& funcname, AttrSlice attrs);
 // Runtime must arrange accesses to one FunctionCallFrame s.t.
 //   1. SetArgs() happens before any GetArg();
 //   2. GetRetvals happens after all SetRetval();
-class FunctionCallFrame {
+class FunctionCallFrame : public CallFrameInterface {
  public:
   FunctionCallFrame(DataTypeSlice arg_types, DataTypeSlice ret_types);
   ~FunctionCallFrame();
@@ -259,9 +261,12 @@ class FunctionCallFrame {
   Status GetRetvals(std::vector<Tensor>* rets) const;
   Status ConsumeRetvals(std::vector<Tensor>* rets);
 
+  size_t num_args() const override { return arg_types_.size(); }
+  size_t num_retvals() const override { return ret_types_.size(); }
+
   // Callee methods.
-  Status GetArg(int index, Tensor* val) const;
-  Status SetRetval(int index, const Tensor& val);
+  Status GetArg(int index, Tensor* val) const override;
+  Status SetRetval(int index, const Tensor& val) override;
 
  private:
   DataTypeVector arg_types_;
@@ -349,6 +354,8 @@ class FunctionLibraryDefinition : public OpRegistryInterface {
   // Returns a proto representation of the state of this function library.
   FunctionDefLibrary ToProto() const;
 
+  size_t num_functions() const { return function_defs_.size(); }
+
   const OpRegistryInterface* default_registry() const {
     return default_registry_;
   }
@@ -404,9 +411,47 @@ class FunctionLibraryRuntime {
   //
   // Returns OK and fills in "handle" if the instantiation succeeds.
   // Otherwise returns an error and "handle" is undefined.
+  struct InstantiateOptions {
+    // The canonical device name of the device on which the function
+    // should be instantiated. If empty, the function will be
+    // instantiated on the local device.
+    string target;
+
+    // This interface is EXPERIMENTAL and subject to change.
+    //
+    // If non-null, the runtime will use `overlay_lib` to resolve
+    // function(s) named in `function_name` and `attrs`. Otherwise,
+    // the runtime will use its internal library.
+    // NOTE(mrry): If provided, all functions defined in `overlay_lib`
+    // must be self-contained, and cannot refer to functions defined
+    // in other libraries.
+    // TODO(mrry): Provide a mechanism for sharing core functions
+    // between a set of libraries (e.g. by allowing a
+    // `FunctionLibraryDefinition` to store an `outer_scope` pointer
+    // and implementing name resolution across libraries).
+    const FunctionLibraryDefinition* overlay_lib = nullptr;
+
+    // This interface is EXPERIMENTAL and subject to change.
+    //
+    // If non-empty, the runtime will use `state_handle` to identify
+    // cached state related the instantiated function. Two functions
+    // of the same name and attrs, instantiated with the same
+    // `state_handle` will have the same handle and share the same
+    // state (in stateful kernels); and two functions with different
+    // values for `state_handle` will have independent state.
+    string state_handle;
+  };
   typedef uint64 Handle;
   virtual Status Instantiate(const string& function_name, AttrSlice attrs,
+                             const InstantiateOptions& options,
                              Handle* handle) = 0;
+  Status Instantiate(const string& function_name, AttrSlice attrs,
+                     Handle* handle) {
+    return Instantiate(function_name, attrs, {}, handle);
+  }
+
+  // Releases state associated with the handle.
+  virtual Status ReleaseHandle(Handle handle) = 0;
 
   // Returns the function body for the instantiated function given its
   // handle 'h'. Returns nullptr if "h" is not found.
@@ -453,6 +498,8 @@ class FunctionLibraryRuntime {
   virtual void Run(const Options& opts, Handle handle,
                    gtl::ArraySlice<Tensor> args, std::vector<Tensor>* rets,
                    DoneCallback done) = 0;
+  virtual void Run(const Options& opts, Handle handle,
+                   CallFrameInterface* call_frame, DoneCallback done) = 0;
 
   // Creates a "kernel" for the given node def "ndef".
   //
@@ -460,13 +507,19 @@ class FunctionLibraryRuntime {
   // returned "*kernel". Otherwise, returns an error.
   virtual Status CreateKernel(const NodeDef& ndef, OpKernel** kernel) = 0;
 
-  // Returns true iff 'function' is stateful.
+  // Returns true iff the function named `function_name` is stateful.
+  // NOTE(mrry): This method assumes that the runtime is associated with a
+  // default function library, and looks up `function_name` in that library.
+  // It does not support overlay libraries.
   virtual bool IsStateful(const string& function_name) = 0;
 
   // Returns the device on which the function executes.
   virtual Device* device() = 0;
 
   // Returns the function library definition that backs this runtime.
+  // NOTE(mrry): The returned library definition is the default function library
+  // for this runtime. The runtime may instantiate functions from separate
+  // overlay libraries, which are not returned by this function.
   virtual const FunctionLibraryDefinition* GetFunctionLibraryDefinition()
       const = 0;
 
@@ -483,6 +536,19 @@ class FunctionLibraryRuntime {
   typedef uint64 LocalHandle;
 };
 
+// Returns a canonicalized string for the instantiation of the
+// function of the given "name", attributes "attrs", and "options".
+//
+// The returned string is guaranteed to be stable within one address
+// space. But it may be change as the implementation
+// evolves. Therefore, it should not be persisted or compared across
+// address spaces.
+string Canonicalize(const string& funcname, AttrSlice attrs,
+                    const FunctionLibraryRuntime::InstantiateOptions& options);
+inline string Canonicalize(const string& funcname, AttrSlice attrs) {
+  return Canonicalize(funcname, attrs, {});
+}
+
 const FunctionLibraryRuntime::Handle kInvalidHandle = -1;
 const FunctionLibraryRuntime::LocalHandle kInvalidLocalHandle = -1;
 typedef std::function<Status(FunctionLibraryRuntime*, const NodeDef&,
@@ -495,10 +561,11 @@ class DistributedFunctionLibraryRuntime {
   virtual ~DistributedFunctionLibraryRuntime() {}
 
   // The _target attr in attrs determines where the function is instantiated.
-  virtual Status Instantiate(const string& function_name,
-                             const FunctionLibraryDefinition& lib_def,
-                             AttrSlice attrs,
-                             FunctionLibraryRuntime::LocalHandle* handle) = 0;
+  virtual Status Instantiate(
+      const string& function_name, const FunctionLibraryDefinition& lib_def,
+      AttrSlice attrs,
+      const FunctionLibraryRuntime::InstantiateOptions& options,
+      FunctionLibraryRuntime::LocalHandle* handle) = 0;
 
   // opts.runner isn't used for execution.
   virtual void Run(const FunctionLibraryRuntime::Options& opts,
diff --git a/tensorflow/core/framework/function_testlib.cc b/tensorflow/core/framework/function_testlib.cc
index f8b456051b76241104febd29d55fe82a9146a239..2b5a0fe1bb897ed2a43785637e873afcb7b3e45d 100644
--- a/tensorflow/core/framework/function_testlib.cc
+++ b/tensorflow/core/framework/function_testlib.cc
@@ -149,33 +149,25 @@ FunctionDef XTimes16() {
       {{"y", "y:y:0"}});
 }
 
-FunctionDef WXPlusB(){return FDH::Define(
-    // Name
-    "WXPlusB",
-    // Args
-    {"w: T", "x: T", "b: T"},
-    // Return values
-    {"y: T"},
-    // Attr def
-    {"T: {float, double}"},
-    // Nodes
-    {
-      {{"mm"},
-       "MatMul",
-       {"w", "x"},
-       {
-           {"T", "$T"}, {"transpose_a", false}, {"transpose_b", false},
-#ifdef INTEL_MKL
-       }},
-#else
+FunctionDef WXPlusB() {
+  return FDH::Define(
+      // Name
+      "WXPlusB",
+      // Args
+      {"w: T", "x: T", "b: T"},
+      // Return values
+      {"y: T"},
+      // Attr def
+      {"T: {float, double}"},
+      // Nodes
+      {{{"mm"},
+        "MatMul",
+        {"w", "x"},
+        {{"T", "$T"},
+         {"transpose_a", false},
+         {"transpose_b", false},
          {"_kernel", "eigen"}}},
-#endif
-      {
-        {"y"}, "Add", {"mm", "b"}, {
-          { "T", "$T" }
-        }
-      }
-    });
+       {{"y"}, "Add", {"mm", "b"}, {{"T", "$T"}}}});
 }
 
 FunctionDef Swap() {
@@ -193,6 +185,23 @@ FunctionDef Swap() {
        {{"o1"}, "Identity", {"i0"}, {{"T", "$T"}}}});
 }
 
+FunctionDef InvalidControlFlow() {
+  return FDH::Create(
+      // Name
+      "InvalidControlFlow",
+      // Args
+      {"i: int32"},
+      // Return values
+      {"o: int32"},
+      // Attr def
+      {},
+      // Nodes
+      {{{"enter"}, "Enter", {"i"}, {{"T", DT_INT32}, {"frame_name", "while"}}},
+       {{"add"}, "Add", {"enter:output", "i"}, {{"T", DT_INT32}}}},
+      // Output mapping
+      {{"o", "add:z"}});
+}
+
 void FunctionTestSchedClosure(std::function<void()> fn) {
   static thread::ThreadPool* w =
       new thread::ThreadPool(Env::Default(), "Test", 8);
diff --git a/tensorflow/core/framework/function_testlib.h b/tensorflow/core/framework/function_testlib.h
index fbf273fa015c9326e01f45d1c603d22ab239fe25..b67c5cb1ab94f9e203f99b2a5982e282c76f942c 100644
--- a/tensorflow/core/framework/function_testlib.h
+++ b/tensorflow/core/framework/function_testlib.h
@@ -81,6 +81,9 @@ FunctionDef NonZero();
 // x:T, y:T -> y:T, x:T
 FunctionDef Swap();
 
+// Contains malformed control flow which can't be run by the executor.
+FunctionDef InvalidControlFlow();
+
 void FunctionTestSchedClosure(std::function<void()> fn);
 
 }  // end namespace function
diff --git a/tensorflow/core/framework/memory_types.cc b/tensorflow/core/framework/memory_types.cc
index 6a2eed94b94971d20faffa1608627290c1109d66..270118bb678e110269be9aa67a3904e36c34c512 100644
--- a/tensorflow/core/framework/memory_types.cc
+++ b/tensorflow/core/framework/memory_types.cc
@@ -61,7 +61,8 @@ void MemoryTypesHelper(const NameRangeMap& name_map,
 }
 
 MemoryType MTypeFromDType(const DataType dtype) {
-  return (dtype == DT_INT32) ? HOST_MEMORY : DEVICE_MEMORY;
+  return (dtype == DT_INT32 || DataTypeAlwaysOnHost(dtype)) ? HOST_MEMORY
+                                                            : DEVICE_MEMORY;
 }
 
 }  // namespace
@@ -118,6 +119,20 @@ Status MemoryTypesForNode(const OpRegistryInterface* op_registry,
         "HostMemory args '", str_util::Join(host_memory_args, "', '"),
         "' not found in OpDef: ", SummarizeOpDef(*op_def));
   }
+  CHECK_LE(inp_mtypes->size(), inp_dtypes.size());
+  CHECK_LE(out_mtypes->size(), out_dtypes.size());
+
+  // Mark e.g. all resource and string types as host memory.
+  for (int i = 0; i < inp_mtypes->size(); ++i) {
+    if (DataTypeAlwaysOnHost(inp_dtypes[i])) {
+      (*inp_mtypes)[i] = HOST_MEMORY;
+    }
+  }
+  for (int i = 0; i < out_mtypes->size(); ++i) {
+    if (DataTypeAlwaysOnHost(out_dtypes[i])) {
+      (*out_mtypes)[i] = HOST_MEMORY;
+    }
+  }
 
   std::vector<int32> hostmem_attr;
   if (GetNodeAttr(ndef, "_input_hostmem", &hostmem_attr).ok()) {
diff --git a/tensorflow/core/framework/memory_types_test.cc b/tensorflow/core/framework/memory_types_test.cc
index 4704da9a119c2b06db5c8b1a3874417a0b1c3617..3126ea8e5f8974cb11f88301de613eb5b920830f 100644
--- a/tensorflow/core/framework/memory_types_test.cc
+++ b/tensorflow/core/framework/memory_types_test.cc
@@ -36,11 +36,13 @@ REGISTER_OP("HostMemoryTest")
     .Input("b: T")
     .Input("c: N * string")
     .Input("d: Tlist")
+    .Input("e: Rlist")
     .Output("o: N * T")
     .Output("p: Tlist")
     .Attr("T: type")
     .Attr("N: int")
-    .Attr("Tlist: list(type)");
+    .Attr("Tlist: list(type)")
+    .Attr("Rlist: list(type)");
 REGISTER_KERNEL_BUILDER(Name("HostMemoryTest").Device(DEVICE_CPU), DummyKernel);
 REGISTER_KERNEL_BUILDER(Name("HostMemoryTest")
                             .Device(DEVICE_GPU)
@@ -57,15 +59,20 @@ TEST(MemoryTypesForNode, Simple) {
                    .Input(FakeInput(DT_BOOL))
                    .Input(FakeInput(3))
                    .Input(FakeInput({DT_INT32, DT_FLOAT, DT_INT32}))
+                   .Input(FakeInput({DT_RESOURCE, DT_STRING, DT_RESOURCE}))
                    .Finalize(&node_def));
   MemoryTypeVector input, output;
 
   TF_EXPECT_OK(MemoryTypesForNode(OpRegistry::Global(), DEVICE_CPU, node_def,
                                   &input, &output));
-  EXPECT_EQ(MemoryTypeVector({DEVICE_MEMORY, DEVICE_MEMORY, DEVICE_MEMORY,
-                              DEVICE_MEMORY, DEVICE_MEMORY, DEVICE_MEMORY,
-                              DEVICE_MEMORY, DEVICE_MEMORY}),
-            input);
+  // a:float, b:bool, c:3*string, d:(int32, float, int32),
+  // e:(resource, string, resource)
+  EXPECT_EQ(
+      MemoryTypeVector({DEVICE_MEMORY, DEVICE_MEMORY, HOST_MEMORY, HOST_MEMORY,
+                        HOST_MEMORY, DEVICE_MEMORY, DEVICE_MEMORY,
+                        DEVICE_MEMORY, HOST_MEMORY, HOST_MEMORY, HOST_MEMORY}),
+      input);
+  // o:3*bool, p:(int32, float, int32)
   EXPECT_EQ(MemoryTypeVector({DEVICE_MEMORY, DEVICE_MEMORY, DEVICE_MEMORY,
                               DEVICE_MEMORY, DEVICE_MEMORY, DEVICE_MEMORY}),
             output);
@@ -74,7 +81,8 @@ TEST(MemoryTypesForNode, Simple) {
                                   &input, &output));
   EXPECT_EQ(
       MemoryTypeVector({HOST_MEMORY, DEVICE_MEMORY, HOST_MEMORY, HOST_MEMORY,
-                        HOST_MEMORY, HOST_MEMORY, HOST_MEMORY, HOST_MEMORY}),
+                        HOST_MEMORY, HOST_MEMORY, HOST_MEMORY, HOST_MEMORY,
+                        HOST_MEMORY, HOST_MEMORY, HOST_MEMORY}),
       input);
   EXPECT_EQ(MemoryTypeVector({HOST_MEMORY, HOST_MEMORY, HOST_MEMORY,
                               DEVICE_MEMORY, DEVICE_MEMORY, DEVICE_MEMORY}),
diff --git a/tensorflow/core/framework/node_def_util.cc b/tensorflow/core/framework/node_def_util.cc
index 477184022df4bb7e4d329cc5ed09572f9dbe9585..95fb3863144e8150d78f5d21722f6bc102c451ea 100644
--- a/tensorflow/core/framework/node_def_util.cc
+++ b/tensorflow/core/framework/node_def_util.cc
@@ -347,6 +347,36 @@ Status AddArgToSig(const NodeDef& node_def, const OpDef::ArgDef& arg_def,
 
 }  // namespace
 
+Status InputTypeForNode(const NodeDef& node_def, const OpDef& op_def,
+                        int input_port, DataType* input_type) {
+  DataTypeVector input_types;
+  for (const auto& arg : op_def.input_arg()) {
+    TF_RETURN_IF_ERROR(AddArgToSig(node_def, arg, &input_types));
+    if (input_types.size() > input_port) {
+      const DataType dtype = input_types[input_port];
+      *input_type = dtype;
+      return Status::OK();
+    }
+  }
+  return errors::InvalidArgument("Input ", input_port, " not found for node ",
+                                 node_def.name());
+}
+
+Status OutputTypeForNode(const NodeDef& node_def, const OpDef& op_def,
+                         int output_port, DataType* output_type) {
+  DataTypeVector output_types;
+  for (const auto& arg : op_def.output_arg()) {
+    TF_RETURN_IF_ERROR(AddArgToSig(node_def, arg, &output_types));
+    if (output_types.size() > output_port) {
+      const DataType dtype = output_types[output_port];
+      *output_type = dtype;
+      return Status::OK();
+    }
+  }
+  return errors::InvalidArgument("Output ", output_port, " not found for node ",
+                                 node_def.name());
+}
+
 Status InOutTypesForNode(const NodeDef& node_def, const OpDef& op_def,
                          DataTypeVector* inputs, DataTypeVector* outputs) {
   for (const auto& arg : op_def.input_arg()) {
diff --git a/tensorflow/core/framework/node_def_util.h b/tensorflow/core/framework/node_def_util.h
index f6f28aac4811d30b845191735536b389e41bf259..b8a1e84f2e79d6537f58e9ac15ff8e1a22f877c7 100644
--- a/tensorflow/core/framework/node_def_util.h
+++ b/tensorflow/core/framework/node_def_util.h
@@ -23,6 +23,8 @@ limitations under the License.
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/lib/gtl/flatmap.h"
+#include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/platform/protobuf.h"
 
 namespace tensorflow {
@@ -237,6 +239,14 @@ bool GetNodeAttrSimple(const AttrSlice& attrs, StringPiece attr_name,
 // REQUIRES: Must not use the returned value beyond the lifetime of node_def.
 const string& GetNodeAttrString(const AttrSlice& attrs, StringPiece attr_name);
 
+// Computes the input type for a specific node input.
+// REQUIRES: ValidateOpDef(op_def).ok()
+Status InputTypeForNode(const NodeDef& node_def, const OpDef& op_def,
+                        int input_port, DataType* input_type);
+// Computes the output type for a specific node output.
+// REQUIRES: ValidateOpDef(op_def).ok()
+Status OutputTypeForNode(const NodeDef& node_def, const OpDef& op_def,
+                         int output_port, DataType* output_type);
 // Computes the input and output types for a specific node.
 // REQUIRES: ValidateOpDef(op_def).ok()
 Status InOutTypesForNode(const NodeDef& node_def, const OpDef& op_def,
@@ -253,8 +263,12 @@ Status ValidateNodeDef(const NodeDef& node_def, const OpDef& op_def);
 // corresponding input/output index range.  For example,
 // input "foo" corresponds to input indices
 //   [ (*inputs)["foo"].first, (*inputs)["foo"].second ).
-// TODO(irving): Remove the NodeDef version; keep only the Node version.
-typedef std::unordered_map<string, std::pair<int, int>> NameRangeMap;
+// NOTE(mrry): To reduce allocations when the map is used and save
+// space, the returned `NameRangeMap` objects borrow the input/output
+// argument names from `op_def`. The `op_def` must outlive the
+// returned `NameRangeMap` objects.
+typedef gtl::FlatMap<StringPiece, std::pair<int, int>, hash<StringPiece>>
+    NameRangeMap;
 Status NameRangesForNode(const NodeDef& node_def, const OpDef& op_def,
                          NameRangeMap* inputs, NameRangeMap* outputs);
 Status NameRangesForNode(const Node& node, const OpDef& op_def,
diff --git a/tensorflow/core/framework/node_def_util_test.cc b/tensorflow/core/framework/node_def_util_test.cc
index bfd598a97202e4bcbf1f869b2687f7cbca36b36b..ae3a93eafeefb2be3a85e546c085691a72caf2e1 100644
--- a/tensorflow/core/framework/node_def_util_test.cc
+++ b/tensorflow/core/framework/node_def_util_test.cc
@@ -151,8 +151,9 @@ TEST(NodeDefUtilTest, Out) {
   AddNodeAttr("T", DT_STRING, &bad);
   ExpectFailure(bad, op,
                 "Value for attr 'T' of string is not in the list of allowed "
-                "values: float, double, int64, int32, uint8, uint16, int16, "
-                "int8, complex64, complex128, qint8, quint8, qint32");
+                "values: float, double, int32, uint8, int16, int8, complex64, "
+                "int64, qint8, quint8, qint32, bfloat16, uint16, complex128, "
+                "half, uint32, uint64");
 }
 
 TEST(NodeDefUtilTest, Enum) {
diff --git a/tensorflow/core/framework/numeric_types.h b/tensorflow/core/framework/numeric_types.h
index 2b080e13fdb8308f71c967ab14c6ed71ccd8f357..99a5d0a054e9fe2c5dd729e165276369ebea7a71 100644
--- a/tensorflow/core/framework/numeric_types.h
+++ b/tensorflow/core/framework/numeric_types.h
@@ -41,108 +41,42 @@ typedef Eigen::QInt32 qint32;
 typedef Eigen::QInt16 qint16;
 typedef Eigen::QUInt16 quint16;
 
-// see framework/bfloat16.h for description.
-struct bfloat16 {
-  EIGEN_DEVICE_FUNC bfloat16() {}
-
-  EIGEN_DEVICE_FUNC explicit bfloat16(const float v) {
-    const uint16_t* p = reinterpret_cast<const uint16_t*>(&v);
-#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-    value = p[0];
-#else
-    value = p[1];
-#endif
-  }
-
-  template <class T>
-  explicit EIGEN_DEVICE_FUNC bfloat16(const T& val)
-      : bfloat16(static_cast<float>(val)) {}
-
-  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const {
-    float result;
-
-    uint16_t* q = reinterpret_cast<uint16_t*>(&result);
-
-#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-    q[0] = value;
-    q[1] = 0;
-#else
-    q[0] = 0;
-    q[1] = value;
-#endif
-    return result;
-  }
-
-  EIGEN_DEVICE_FUNC explicit operator bool() const {
-    return static_cast<bool>(float(*this));
-  }
-
-  EIGEN_DEVICE_FUNC explicit operator Eigen::half() const {
-    return static_cast<Eigen::half>(float(*this));
-  }
-
-  EIGEN_DEVICE_FUNC explicit operator short() const {
-    return static_cast<short>(float(*this));
-  }
-
-  EIGEN_DEVICE_FUNC explicit operator int() const {
-    return static_cast<int>(float(*this));
-  }
-
-  EIGEN_DEVICE_FUNC explicit operator char() const {
-    return static_cast<char>(float(*this));
-  }
-
-  EIGEN_DEVICE_FUNC explicit operator signed char() const {
-    return static_cast<signed char>(float(*this));
-  }
-
-  EIGEN_DEVICE_FUNC explicit operator unsigned char() const {
-    return static_cast<unsigned char>(float(*this));
-  }
+}  // namespace tensorflow
 
-  EIGEN_DEVICE_FUNC explicit operator unsigned int() const {
-    return static_cast<unsigned int>(float(*this));
-  }
-
-  EIGEN_DEVICE_FUNC explicit operator unsigned long() const {
-    return static_cast<unsigned long>(float(*this));
-  }
-
-  EIGEN_DEVICE_FUNC explicit operator unsigned long long() const {
-    return static_cast<unsigned long long>(float(*this));
-  }
-
-  EIGEN_DEVICE_FUNC explicit operator long long() const {
-    return static_cast<long long>(float(*this));
-  }
+namespace Eigen {
+// TOOD(xpan): We probably need to overwrite more methods to have correct eigen
+// behavior. E.g. loest(), is_integer, etc. See NumTraits.h in eigen.
+template <>
+struct NumTraits<tensorflow::bfloat16>
+    : GenericNumTraits<tensorflow::bfloat16> {};
 
-  EIGEN_DEVICE_FUNC explicit operator double() const {
-    return static_cast<double>(float(*this));
-  }
+using ::tensorflow::operator==;
+using ::tensorflow::operator!=;
 
-  uint16_t value;
-};
+namespace numext {
 
-inline bool operator==(const bfloat16 a, const bfloat16 b) {
-  return a.value == b.value;
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE tensorflow::bfloat16 log(
+    const tensorflow::bfloat16& x) {
+  return static_cast<tensorflow::bfloat16>(::logf(static_cast<float>(x)));
 }
 
-inline bool operator!=(const bfloat16 a, const bfloat16 b) {
-  return a.value != b.value;
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE tensorflow::bfloat16 exp(
+    const tensorflow::bfloat16& x) {
+  return static_cast<tensorflow::bfloat16>(::expf(static_cast<float>(x)));
 }
 
-}  // end namespace tensorflow
-
-namespace Eigen {
 template <>
-struct NumTraits<tensorflow::bfloat16> : GenericNumTraits<uint16_t> {};
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE tensorflow::bfloat16 abs(
+    const tensorflow::bfloat16& x) {
+  return static_cast<tensorflow::bfloat16>(::fabsf(static_cast<float>(x)));
+}
 
-using ::tensorflow::operator==;
-using ::tensorflow::operator!=;
+}  // namespace numext
 }  // namespace Eigen
 
-#ifdef COMPILER_MSVC
+#if defined(COMPILER_MSVC) && !defined(__clang__)
 namespace std {
 template <>
 struct hash<Eigen::half> {
diff --git a/tensorflow/core/framework/op_compatibility_test.cc b/tensorflow/core/framework/op_compatibility_test.cc
index ae2fdae379a21289df2e0eb2dd5cbda0a6d5ed81..b57bdcb841592578de4a2026d70b0e91bae66b02 100644
--- a/tensorflow/core/framework/op_compatibility_test.cc
+++ b/tensorflow/core/framework/op_compatibility_test.cc
@@ -163,6 +163,26 @@ class OpCompatibilityTest : public OpsTestBase {
 
     ExpectIncompatible(old_op_def, *new_op_def, compatibility_error);
   }
+
+  void ExpectDefaultChangeFailure(const OpDef& old_op_def,
+                                  const string& compatibility_error) {
+    // This should be all that is needed to get compatibility.
+    const OpDef* new_op_def = RegisteredOpDef();
+    AddDefaultsToNodeDef(*new_op_def, node_def());
+
+    // Validate that the NodeDef is valid.
+    TF_ASSERT_OK(ValidateNodeDef(*node_def(), *new_op_def));
+
+    Status status = OpDefAttrDefaultsUnchanged(old_op_def, *new_op_def);
+    if (status.ok()) {
+      ADD_FAILURE() << SummarizeOpDef(old_op_def) << " vs. "
+                    << SummarizeOpDef(*new_op_def);
+    } else {
+      EXPECT_TRUE(
+          StringPiece(status.error_message()).contains(compatibility_error))
+          << status << " does not contain " << compatibility_error;
+    }
+  }
 };
 
 // Should be compatible if the Op hasn't changed (sanity check).
@@ -260,40 +280,6 @@ TEST_F(OpCompatibilityTest, AttrOrder) {
   EXPECT_EQ("attr_order = AttrOrder[a=7, b=true]()", Result());
 }
 
-// Should be able to add a default to an attr.
-REGISTER_OP("AddDefault").Output("ndef: string").Attr("a: int = 1234");
-REGISTER_KERNEL_BUILDER(Name("AddDefault").Device(DEVICE_CPU), TestKernel);
-
-TEST_F(OpCompatibilityTest, AddDefault) {
-  OpRegistrationData old_op;
-  TF_ASSERT_OK(OpDefBuilder("AddDefault")
-                   .Output("ndef: string")
-                   .Attr("a: int")
-                   .Finalize(&old_op));
-  TF_ASSERT_OK(NodeDefBuilder("add_default", &old_op.op_def)
-                   .Attr("a", 765)
-                   .Finalize(node_def()));
-  ExpectSuccess(old_op.op_def);
-  EXPECT_EQ("add_default = AddDefault[a=765]()", Result());
-}
-
-// Should be able to remove a default from an attr, *as long as that
-// attr has always existed*.
-REGISTER_OP("RemoveDefault").Output("ndef: string").Attr("a: int");
-REGISTER_KERNEL_BUILDER(Name("RemoveDefault").Device(DEVICE_CPU), TestKernel);
-
-TEST_F(OpCompatibilityTest, RemoveDefault) {
-  OpRegistrationData old_op;
-  TF_ASSERT_OK(OpDefBuilder("RemoveDefault")
-                   .Output("ndef: string")
-                   .Attr("a: int = 91")
-                   .Finalize(&old_op));
-  TF_ASSERT_OK(
-      NodeDefBuilder("remove_default", &old_op.op_def).Finalize(node_def()));
-  ExpectSuccess(old_op.op_def);
-  EXPECT_EQ("remove_default = RemoveDefault[a=91]()", Result());
-}
-
 // Should be able to make an input/output polymorphic.
 // Changing from int32 -> T (where T: type = DT_INT32 by default).
 REGISTER_OP("TypePolymorphic")
@@ -1054,9 +1040,56 @@ TEST_F(OpCompatibilityTest, RenameOutputListFails) {
                       "Output signature mismatch 'old:T' vs. 'new:T'");
 }
 
-// Changing an attr's default is not technically illegal, but should
-// be forbidden if it the attr ever didn't exist since it likely
-// affects semantics.
+// Should not be able to add a default to an attr.
+REGISTER_OP("AddDefault").Output("ndef: string").Attr("a: int = 1234");
+REGISTER_KERNEL_BUILDER(Name("AddDefault").Device(DEVICE_CPU), TestKernel);
+
+TEST_F(OpCompatibilityTest, AddDefault) {
+  OpRegistrationData old_op;
+  TF_ASSERT_OK(OpDefBuilder("AddDefault")
+                   .Output("ndef: string")
+                   .Attr("a: int")
+                   .Finalize(&old_op));
+  TF_ASSERT_OK(NodeDefBuilder("add_default", &old_op.op_def)
+                   .Attr("a", 765)
+                   .Finalize(node_def()));
+  ExpectDefaultChangeFailure(
+      old_op.op_def,
+      "Attr 'a' has added/removed it's default; from no default to 1234");
+}
+
+// Should not be able to remove a default from an attr.
+REGISTER_OP("RemoveDefault").Output("ndef: string").Attr("a: int");
+REGISTER_KERNEL_BUILDER(Name("RemoveDefault").Device(DEVICE_CPU), TestKernel);
+
+TEST_F(OpCompatibilityTest, RemoveDefault) {
+  OpRegistrationData old_op;
+  TF_ASSERT_OK(OpDefBuilder("RemoveDefault")
+                   .Output("ndef: string")
+                   .Attr("a: int = 91")
+                   .Finalize(&old_op));
+  TF_ASSERT_OK(
+      NodeDefBuilder("remove_default", &old_op.op_def).Finalize(node_def()));
+  ExpectDefaultChangeFailure(
+      old_op.op_def,
+      "Attr 'a' has added/removed it's default; from 91 to no default");
+}
+
+// Should not be able to change a default for an attr.
+REGISTER_OP("ChangeDefault").Output("ndef: string").Attr("a: int = 1");
+REGISTER_KERNEL_BUILDER(Name("ChangeDefault").Device(DEVICE_CPU), TestKernel);
+
+TEST_F(OpCompatibilityTest, ChangeDefault) {
+  OpRegistrationData old_op;
+  TF_ASSERT_OK(OpDefBuilder("ChangeDefault")
+                   .Output("ndef: string")
+                   .Attr("a: int = 2")
+                   .Finalize(&old_op));
+  TF_ASSERT_OK(
+      NodeDefBuilder("change_default", &old_op.op_def).Finalize(node_def()));
+  ExpectDefaultChangeFailure(
+      old_op.op_def, "Attr 'a' has changed it's default value; from 2 to 1");
+}
 
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/core/framework/op_def_builder_test.cc b/tensorflow/core/framework/op_def_builder_test.cc
index c1511ebe340d99fc67f588596e028cca92e23250..9b24e3aa00425321eda2e196b1e7b243a552c730 100644
--- a/tensorflow/core/framework/op_def_builder_test.cc
+++ b/tensorflow/core/framework/op_def_builder_test.cc
@@ -124,22 +124,23 @@ TEST_F(OpDefBuilderTest, AttrWithRestrictions) {
       "attr: { name: 'a' type: 'type' allowed_values { list { type: "
       "[DT_HALF, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, DT_INT16, "
       "DT_UINT16, DT_INT8, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, "
-      "DT_QINT32, DT_UINT32, DT_UINT64] } } }");
+      "DT_QINT32, DT_UINT32, DT_UINT64, DT_BFLOAT16] } } }");
   ExpectSuccess(
       b().Attr("a:{numbertype, variant}"),
       "attr: { name: 'a' type: 'type' allowed_values { list { type: "
       "[DT_HALF, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, DT_INT16, "
       "DT_UINT16, DT_INT8, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, "
-      "DT_QINT32, DT_UINT32, DT_UINT64, DT_VARIANT] } } }");
+      "DT_QINT32, DT_UINT32, DT_UINT64, DT_BFLOAT16, DT_VARIANT] } } }");
   ExpectSuccess(b().Attr("a:realnumbertype"),
                 "attr: { name: 'a' type: 'type' allowed_values { list { type: "
                 "[DT_HALF, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, "
-                "DT_INT16, DT_UINT16, DT_INT8, DT_UINT32, DT_UINT64] } } }");
+                "DT_INT16, DT_UINT16, DT_INT8, DT_UINT32, DT_UINT64, "
+                "DT_BFLOAT16] } } }");
   ExpectSuccess(b().Attr("a:{realnumbertype,  variant , string, }"),
                 "attr: { name: 'a' type: 'type' allowed_values { list { type: "
                 "[DT_HALF, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, "
                 "DT_INT16, DT_UINT16, DT_INT8, DT_UINT32, DT_UINT64, "
-                "DT_VARIANT, DT_STRING] } } }");
+                "DT_BFLOAT16, DT_VARIANT, DT_STRING] } } }");
   ExpectSuccess(b().Attr("a:quantizedtype"),
                 "attr: { name: 'a' type: 'type' allowed_values { list { type: "
                 "[DT_QINT8, DT_QUINT8, DT_QINT32, DT_QINT16, DT_QUINT16]} } }");
@@ -216,12 +217,14 @@ TEST_F(OpDefBuilderTest, AttrListOfRestricted) {
       b().Attr("a:list(realnumbertype)"),
       "attr: { name: 'a' type: 'list(type)' allowed_values { list { type: "
       "[DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, DT_INT16, "
-      "DT_UINT16, DT_INT8, DT_HALF, DT_UINT32, DT_UINT64] } } }");
+      "DT_UINT16, DT_INT8, DT_HALF, DT_BFLOAT16, DT_UINT32, DT_UINT64"
+      "] } } }");
   ExpectSuccess(
       b().Attr("a:list({realnumbertype, variant})"),
       "attr: { name: 'a' type: 'list(type)' allowed_values { list { type: "
       "[DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, DT_INT16, "
-      "DT_UINT16, DT_INT8, DT_HALF, DT_UINT32, DT_UINT64, DT_VARIANT] } } }");
+      "DT_UINT16, DT_INT8, DT_HALF, DT_BFLOAT16, DT_UINT32, DT_UINT64, "
+      "DT_VARIANT] } } }");
   ExpectSuccess(
       b().Attr("a:list(quantizedtype)"),
       "attr: { name: 'a' type: 'list(type)' allowed_values { list { type: "
diff --git a/tensorflow/core/framework/op_def_util.cc b/tensorflow/core/framework/op_def_util.cc
index 29feda499fd2646a00c1f5bc9fc7223e9f134af9..a4e8add6c49b823948eb5978f99239bb4d9b52ef 100644
--- a/tensorflow/core/framework/op_def_util.cc
+++ b/tensorflow/core/framework/op_def_util.cc
@@ -449,6 +449,11 @@ string AllowedStr(const OpDef::AttrDef& attr) {
   return SummarizeAttrValue(attr.allowed_values());
 }
 
+string DefaultAttrStr(const OpDef::AttrDef& attr) {
+  if (!attr.has_default_value()) return "no default";
+  return SummarizeAttrValue(attr.default_value());
+}
+
 bool HigherMinimum(const OpDef::AttrDef& old_attr,
                    const OpDef::AttrDef& new_attr) {
   // Anything -> no restriction : not more restrictive.
@@ -692,6 +697,32 @@ Status OpDefAddedDefaultsUnchanged(const OpDef& old_op,
   return Status::OK();
 }
 
+Status OpDefAttrDefaultsUnchanged(const OpDef& old_op, const OpDef& new_op) {
+  AttrMap new_attrs, old_attrs;
+  FillAttrMap(old_op, &old_attrs);
+  FillAttrMap(new_op, &new_attrs);
+
+  for (const auto& old_attr : old_op.attr()) {
+    const OpDef::AttrDef* new_attr =
+        gtl::FindPtrOrNull(new_attrs, old_attr.name());
+    if (new_attr == nullptr) continue;
+    if (old_attr.has_default_value() != new_attr->has_default_value()) {
+      return errors::InvalidArgument(
+          "Attr '", old_attr.name(), "' has added/removed it's default; ",
+          "from ", DefaultAttrStr(old_attr), " to ", DefaultAttrStr(*new_attr));
+    }
+    if (old_attr.has_default_value() &&
+        !AreAttrValuesEqual(old_attr.default_value(),
+                            new_attr->default_value())) {
+      return errors::InvalidArgument(
+          "Attr '", old_attr.name(), "' has changed it's default value; ",
+          "from ", DefaultAttrStr(old_attr), " to ", DefaultAttrStr(*new_attr));
+    }
+  }
+
+  return Status::OK();
+}
+
 void RemoveNonDeprecationDescriptionsFromOpDef(OpDef* op_def) {
   for (int i = 0; i < op_def->input_arg_size(); ++i) {
     op_def->mutable_input_arg(i)->clear_description();
diff --git a/tensorflow/core/framework/op_def_util.h b/tensorflow/core/framework/op_def_util.h
index f9661dceddc1a3de694024dddb9afce1cae8680c..d1613ee89b29ef0bcdd97b1bc3c34edbcb65f5d8 100644
--- a/tensorflow/core/framework/op_def_util.h
+++ b/tensorflow/core/framework/op_def_util.h
@@ -63,6 +63,10 @@ Status OpDefAddedDefaultsUnchanged(const OpDef& old_op,
                                    const OpDef& penultimate_op,
                                    const OpDef& new_op);
 
+// Returns an error if the default value for any attr is added/removed/modified
+// in new_op compared to old_op.
+Status OpDefAttrDefaultsUnchanged(const OpDef& old_op, const OpDef& new_op);
+
 // Remove all docs from *op_def / *op_list.
 void RemoveDescriptionsFromOpDef(OpDef* op_def);
 void RemoveDescriptionsFromOpList(OpList* op_list);
diff --git a/tensorflow/core/framework/op_gen_lib.cc b/tensorflow/core/framework/op_gen_lib.cc
index 95a9b763f92d1688d97cdbafc51d7e32d7875315..e78b6ab5d977c6ea2f0dec66988432a617154916 100644
--- a/tensorflow/core/framework/op_gen_lib.cc
+++ b/tensorflow/core/framework/op_gen_lib.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <vector>
 #include "tensorflow/core/framework/attr_value.pb.h"
-#include "tensorflow/core/framework/op_gen_overrides.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -246,29 +245,6 @@ string PBTxtFromMultiline(StringPiece multiline_pbtxt) {
   return pbtxt;
 }
 
-OpGenOverrideMap::OpGenOverrideMap() {}
-OpGenOverrideMap::~OpGenOverrideMap() {}
-
-Status OpGenOverrideMap::LoadFileList(Env* env, const string& filenames) {
-  std::vector<string> v = str_util::Split(filenames, ",");
-  for (const string& f : v) {
-    TF_RETURN_IF_ERROR(LoadFile(env, f));
-  }
-  return Status::OK();
-}
-
-Status OpGenOverrideMap::LoadFile(Env* env, const string& filename) {
-  if (filename.empty()) return Status::OK();
-  string contents;
-  TF_RETURN_IF_ERROR(ReadFileToString(env, filename, &contents));
-  OpGenOverrides all;
-  protobuf::TextFormat::ParseFromString(contents, &all);
-  for (const auto& one : all.op()) {
-    map_[one.name()].reset(new OpGenOverride(one));
-  }
-  return Status::OK();
-}
-
 static void StringReplace(const string& from, const string& to, string* s) {
   // Split *s into pieces delimited by `from`.
   std::vector<string> split;
@@ -349,83 +325,6 @@ static void RenameInDocs(const string& from, const string& to,
   }
 }
 
-const OpGenOverride* OpGenOverrideMap::ApplyOverride(OpDef* op_def) const {
-  // Look up
-  const auto iter = map_.find(op_def->name());
-  if (iter == map_.end()) return nullptr;
-  const OpGenOverride& proto = *iter->second;
-
-  // Apply overrides from `proto`.
-  if (!proto.rename_to().empty()) {
-    op_def->set_name(proto.rename_to());
-    RenameInDocs(proto.name(), proto.rename_to(), op_def);
-  }
-  for (const auto& attr_default : proto.attr_default()) {
-    bool found = false;
-    for (int i = 0; i < op_def->attr_size(); ++i) {
-      if (op_def->attr(i).name() == attr_default.name()) {
-        *op_def->mutable_attr(i)->mutable_default_value() =
-            attr_default.value();
-        found = true;
-        break;
-      }
-    }
-    if (!found) {
-      LOG(WARNING) << proto.name() << " can't find attr " << attr_default.name()
-                   << " to override default";
-    }
-  }
-  for (const auto& attr_rename : proto.attr_rename()) {
-    bool found = false;
-    for (int i = 0; i < op_def->attr_size(); ++i) {
-      if (op_def->attr(i).name() == attr_rename.from()) {
-        *op_def->mutable_attr(i)->mutable_name() = attr_rename.to();
-        found = true;
-        break;
-      }
-    }
-    if (found) {
-      RenameInDocs(attr_rename.from(), attr_rename.to(), op_def);
-    } else {
-      LOG(WARNING) << proto.name() << " can't find attr " << attr_rename.from()
-                   << " to rename";
-    }
-  }
-  for (const auto& input_rename : proto.input_rename()) {
-    bool found = false;
-    for (int i = 0; i < op_def->input_arg_size(); ++i) {
-      if (op_def->input_arg(i).name() == input_rename.from()) {
-        *op_def->mutable_input_arg(i)->mutable_name() = input_rename.to();
-        found = true;
-        break;
-      }
-    }
-    if (found) {
-      RenameInDocs(input_rename.from(), input_rename.to(), op_def);
-    } else {
-      LOG(WARNING) << proto.name() << " can't find input "
-                   << input_rename.from() << " to rename";
-    }
-  }
-  for (const auto& output_rename : proto.output_rename()) {
-    bool found = false;
-    for (int i = 0; i < op_def->output_arg_size(); ++i) {
-      if (op_def->output_arg(i).name() == output_rename.from()) {
-        *op_def->mutable_output_arg(i)->mutable_name() = output_rename.to();
-        found = true;
-        break;
-      }
-    }
-    if (found) {
-      RenameInDocs(output_rename.from(), output_rename.to(), op_def);
-    } else {
-      LOG(WARNING) << proto.name() << " can't find output "
-                   << output_rename.from() << " to rename";
-    }
-  }
-
-  return &proto;
-}
 
 namespace {
 
@@ -629,14 +528,11 @@ Status ApiDefMap::LoadApiDef(const string& api_def_file_contents) {
   ApiDefs api_defs;
   protobuf::TextFormat::ParseFromString(contents, &api_defs);
   for (const auto& api_def : api_defs.op()) {
-    // Check if the op definition is already loaded.
+    // Check if the op definition is loaded. If op definition is not
+    // loaded, then we just skip this ApiDef.
     if (map_.find(api_def.graph_op_name()) != map_.end()) {
       // Overwrite current api def with data in api_def.
       TF_RETURN_IF_ERROR(MergeApiDefs(&map_[api_def.graph_op_name()], api_def));
-    } else {
-      return errors::FailedPrecondition(
-          "Unexpected ApiDef override: ", api_def.graph_op_name(),
-          " is not defined in base ApiDef.");
     }
   }
   return Status::OK();
diff --git a/tensorflow/core/framework/op_gen_lib.h b/tensorflow/core/framework/op_gen_lib.h
index 1ede3af8d7cf8f591ba3927f7fc99d646629109d..94fe194a1a5032b472259d26145ba7cd4460191c 100644
--- a/tensorflow/core/framework/op_gen_lib.h
+++ b/tensorflow/core/framework/op_gen_lib.h
@@ -28,7 +28,6 @@ namespace tensorflow {
 
 // Forward declare protos so their symbols can be removed from .so exports
 class OpDef;
-class OpGenOverride;
 
 inline string Spaces(int n) { return string(n, ' '); }
 
@@ -48,33 +47,6 @@ string PBTxtToMultiline(StringPiece pbtxt,
                         const std::vector<string>& multi_line_fields);
 string PBTxtFromMultiline(StringPiece multiline_pbtxt);
 
-// Takes a list of files with OpGenOverrides text protos, and allows you to
-// look up the specific override for any given op.
-class OpGenOverrideMap {
- public:
-  OpGenOverrideMap();
-  ~OpGenOverrideMap();
-
-  // `filenames` is a comma-separated list of file names.  If an op
-  // is mentioned in more than one file, the last one takes priority.
-  Status LoadFileList(Env* env, const string& filenames);
-
-  // Load a single file.  If more than one file is loaded, later ones
-  // take priority for any ops in common.
-  Status LoadFile(Env* env, const string& filename);
-
-  // Look up the override for `*op_def` from the loaded files, and
-  // mutate `*op_def` to reflect the requested changes. Does not apply
-  // 'skip', 'hide', or 'alias' overrides. Caller has to deal with
-  // those since they can't be simulated by mutating `*op_def`.
-  // Returns nullptr if op is not in any loaded file. Otherwise, the
-  // pointer must not be referenced beyond the lifetime of *this or
-  // the next file load.
-  const OpGenOverride* ApplyOverride(OpDef* op_def) const;
-
- private:
-  std::unordered_map<string, std::unique_ptr<OpGenOverride>> map_;
-};
 
 // Takes a list of files with ApiDefs text protos, and allows you to
 // look up the specific ApiDef for any given op.
diff --git a/tensorflow/core/framework/op_gen_lib_test.cc b/tensorflow/core/framework/op_gen_lib_test.cc
index bbe57bdd622d4339b33d15e2e92252f60ffbbcf8..857b1c8dbcac66899f98bb4f2ef87f65f7442f6b 100644
--- a/tensorflow/core/framework/op_gen_lib_test.cc
+++ b/tensorflow/core/framework/op_gen_lib_test.cc
@@ -410,8 +410,8 @@ op {
 
   ApiDefMap api_map(op_list);
   TF_CHECK_OK(api_map.LoadApiDef(kTestApiDef));
-  auto status = api_map.LoadApiDef(api_def1);
-  ASSERT_EQ(tensorflow::error::FAILED_PRECONDITION, status.code());
+  TF_CHECK_OK(api_map.LoadApiDef(api_def1));
+  ASSERT_EQ(nullptr, api_map.GetApiDef("different_testop"));
 }
 
 TEST(OpGenLibTest, ApiDefInvalidArgOrder) {
diff --git a/tensorflow/core/framework/op_gen_overrides.proto b/tensorflow/core/framework/op_gen_overrides.proto
deleted file mode 100644
index 8e66d39a7c7f4a9ff05c91f46a11446e18bc1aed..0000000000000000000000000000000000000000
--- a/tensorflow/core/framework/op_gen_overrides.proto
+++ /dev/null
@@ -1,67 +0,0 @@
-// Defines the text format for adding per-op overrides for client
-// language op code generators.
-
-syntax = "proto3";
-
-package tensorflow;
-import "tensorflow/core/framework/attr_value.proto";
-
-// Used to override the default API & behavior in the generated code
-// for client languages, from what you would get from the OpDef alone.
-// This is so we can evolve the API while remaining backwards
-// compatible when interpretting old graphs.  Overrides go in an
-// "op_gen_overrides.pbtxt" file with a text-format OpGenOverrides
-// message.  Right now these only apply to the C++ API.
-// TODO(josh11b): In the future there will be a common set of overrides
-// and per-client-language overrides.
-//
-// WARNING: Be *very* careful using these features -- these overrides
-// can change the semantics of existing code.  These changes may need
-// to wait until a major release of TensorFlow to avoid breaking our
-// compatibility promises.
-message OpGenOverride {
-  // Name of the op to apply overrides to.
-  string name = 1;
-
-  // Do not include this op in the generated API.
-  // If `skip` is true, all other overrides are ignored for this op.
-  bool skip = 2;
-
-  // Hide this op by putting it into an internal namespace (or whatever
-  // is appropriate in the target language).
-  bool hide = 3;
-
-  // Use a different name in the API than the op's name. Note that
-  // the op's name in `backticks` will also be replaced in the docs.
-  string rename_to = 4;
-
-  // Create *additional* API endpoints with different names (contrast
-  // with rename_to, which affects the original name).
-  repeated string alias = 5;
-
-  // Map the name of an attr to a new default value to use.  This
-  // default will be used when creating new graphs, as opposed to the
-  // default in the OpDef, which will be used when interpreting old
-  // GraphDefs.  If this attr is also renamed (using attr_rename
-  // below), use the original name of the attr.
-  message AttrDefault {
-    string name = 1;
-    AttrValue value = 2;
-  }
-  repeated AttrDefault attr_default = 6;
-
-  // Change the name used to access attrs/inputs/outputs in the API
-  // from what is used in the GraphDef.  Note that these names in
-  // `backticks` will also be replaced in the docs.
-  message Rename {
-    string from = 1;
-    string to = 2;
-  }
-  repeated Rename attr_rename = 7;
-  repeated Rename input_rename = 8;
-  repeated Rename output_rename = 9;
-}
-
-message OpGenOverrides {
-  repeated OpGenOverride op = 1;
-}
diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc
index 4d410809e77bd6ba7cd24f78c0ef2f97fa54e588..c879dc6f3f6039fad268680f52de128a4ae8a8f6 100644
--- a/tensorflow/core/framework/op_kernel.cc
+++ b/tensorflow/core/framework/op_kernel.cc
@@ -112,7 +112,7 @@ const string& OpKernel::requested_input(int i) const { return def_->input(i); }
 
 Status OpKernel::InputRange(StringPiece input_name, int* start,
                             int* stop) const {
-  const auto result = input_name_map_.find(input_name.ToString());
+  const auto result = input_name_map_.find(input_name);
   if (result == input_name_map_.end()) {
     return errors::InvalidArgument("Unknown input name: ", input_name);
   } else {
@@ -124,7 +124,7 @@ Status OpKernel::InputRange(StringPiece input_name, int* start,
 
 Status OpKernel::OutputRange(StringPiece output_name, int* start,
                              int* stop) const {
-  const auto result = output_name_map_.find(output_name.ToString());
+  const auto result = output_name_map_.find(output_name);
   if (result == output_name_map_.end()) {
     return errors::InvalidArgument("Unknown output name: ", output_name);
   } else {
@@ -252,10 +252,8 @@ OpKernelContext::OpKernelContext(Params* params)
 OpKernelContext::OpKernelContext(Params* params, int num_outputs)
     : params_(params),
       outputs_(num_outputs),
-      host_temp_memory_size_(0),
-      device_temp_memory_size_(0),
-      host_persistent_memory_allocated_(0),
-      device_persistent_memory_allocated_(0) {
+      temp_memory_size_(0),
+      persistent_memory_allocated_(0) {
   Allocator* eigen_gpu_allocator = get_allocator(AllocatorAttributes());
   params_->ensure_eigen_gpu_device();
   params_->device->ReinitializeGpuDevice(this, params_->eigen_gpu_device,
@@ -668,11 +666,7 @@ Status OpKernelContext::allocate_temp(
     if (a->TracksAllocationSizes()) {
       int64 alloc_size =
           a->AllocatedSize(const_cast<char*>(out_temp->tensor_data().data()));
-      if (allocate_on_host(allocator_attr)) {
-        record_host_temp_memory_size(alloc_size);
-      } else {
-        record_device_temp_memory_size(alloc_size);
-      }
+      record_temp_memory_size(alloc_size);
     }
   }
   return s;
@@ -795,26 +789,15 @@ bool OpKernelContext::allocate_on_host(AllocatorAttributes alloc_attr) const {
   return alloc_attr.on_host() || device()->attributes().device_type() == "CPU";
 }
 
-void OpKernelContext::record_host_persistent_memory_allocation(int64 size,
-                                                               int64 alloc_id) {
-  host_persistent_memory_allocated_ += size;
-  host_persistent_alloc_ids_.push_back(alloc_id);
-}
-
-void OpKernelContext::record_device_persistent_memory_allocation(
-    int64 size, int64 alloc_id) {
-  device_persistent_memory_allocated_ += size;
-  device_persistent_alloc_ids_.push_back(alloc_id);
-}
-
-std::vector<int64> OpKernelContext::host_persistent_alloc_ids() const {
-  return std::vector<int64>(host_persistent_alloc_ids_.begin(),
-                            host_persistent_alloc_ids_.end());
+void OpKernelContext::record_persistent_memory_allocation(int64 size,
+                                                          int64 alloc_id) {
+  persistent_memory_allocated_ += size;
+  persistent_alloc_ids_.push_back(alloc_id);
 }
 
-std::vector<int64> OpKernelContext::device_persistent_alloc_ids() const {
-  return std::vector<int64>(device_persistent_alloc_ids_.begin(),
-                            device_persistent_alloc_ids_.end());
+std::vector<int64> OpKernelContext::persistent_alloc_ids() const {
+  return std::vector<int64>(persistent_alloc_ids_.begin(),
+                            persistent_alloc_ids_.end());
 }
 
 // OpKernel registration ------------------------------------------------------
diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h
index da0dc549435a35cb1dec25b9e8e5ddbea7b904b3..25150499ad76c45493645a9ee4a83fd55e69eb13 100644
--- a/tensorflow/core/framework/op_kernel.h
+++ b/tensorflow/core/framework/op_kernel.h
@@ -61,7 +61,7 @@ class TensorSliceReaderCacheWrapper;
 }  // namespace checkpoint
 
 class AsyncOpKernel;
-class FunctionCallFrame;
+class CallFrameInterface;
 class FunctionLibraryRuntime;
 class OpKernelConstruction;  // declared below
 class OpKernelContext;       // declared below
@@ -548,7 +548,7 @@ class OpKernelContext {
     FrameAndIter frame_iter;
 
     // Function call supports.
-    FunctionCallFrame* call_frame = nullptr;
+    CallFrameInterface* call_frame = nullptr;
     FunctionLibraryRuntime* function_library = nullptr;
     std::function<void(std::function<void()>)>* runner = nullptr;
     StepStatsCollector* stats_collector = nullptr;
@@ -930,7 +930,7 @@ class OpKernelContext {
   //
   // If this kernel invocation is within a function execution,
   // call_frame() returns the call frame for the function call.
-  FunctionCallFrame* call_frame() const { return params_->call_frame; }
+  CallFrameInterface* call_frame() const { return params_->call_frame; }
 
   // If not nullptr, the kernel invoke functions defined in the
   // library. E.g., CHECK_NOTNULL(function_library())->Run("Foo", ...).
@@ -1033,33 +1033,21 @@ class OpKernelContext {
   bool allocate_on_host(AllocatorAttributes alloc_attr) const;
 
   // Records temporary memory sizes.
-  void record_host_temp_memory_size(int64 size) {
-    host_temp_memory_size_ += size;
-  }
-  void record_device_temp_memory_size(int64 size) {
-    device_temp_memory_size_ += size;
-  }
+  void record_temp_memory_size(int64 size) { temp_memory_size_ += size; }
 
   // Returns recorded size of temporary memory;
-  int64 host_temp_memory_size() const { return host_temp_memory_size_; }
-  int64 device_temp_memory_size() const { return device_temp_memory_size_; }
+  int64 temp_memory_size() const { return temp_memory_size_; }
 
   // Records persistent memory allocation, size can be negative indicating
   // deallocation.
-  void record_host_persistent_memory_allocation(int64 size,
-                                                int64 alloc_id = -1);
-  void record_device_persistent_memory_allocation(int64 size,
-                                                  int64 alloc_id = -1);
+  void record_persistent_memory_allocation(int64 size, int64 alloc_id = -1);
 
   // Returns recorded size and ids of persistent memory.
-  int64 host_persistent_memory_allocated() const {
-    return host_persistent_memory_allocated_;
+  int64 persistent_memory_allocated() const {
+    return persistent_memory_allocated_;
   }
-  int64 device_persistent_memory_allocated() const {
-    return device_persistent_memory_allocated_;
-  }
-  std::vector<int64> host_persistent_alloc_ids() const;
-  std::vector<int64> device_persistent_alloc_ids() const;
+
+  std::vector<int64> persistent_alloc_ids() const;
 
   bool input_is_ref(int index) const;
 
@@ -1104,12 +1092,9 @@ class OpKernelContext {
 
   bool is_output_dead_ = false;
 
-  int64 host_temp_memory_size_;
-  int64 device_temp_memory_size_;
-  gtl::InlinedVector<int64, 2> host_persistent_alloc_ids_;
-  gtl::InlinedVector<int64, 2> device_persistent_alloc_ids_;
-  int64 host_persistent_memory_allocated_;
-  int64 device_persistent_memory_allocated_;
+  int64 temp_memory_size_;
+  gtl::InlinedVector<int64, 2> persistent_alloc_ids_;
+  int64 persistent_memory_allocated_;
 
   TF_DISALLOW_COPY_AND_ASSIGN(OpKernelContext);
 };
@@ -1492,10 +1477,12 @@ inline void OpOutputList::set_ref(int i, mutex* mu, Tensor* tensor_for_ref) {
 // }
 
 #define OP_REQUIRES(CTX, EXP, STATUS) \
-  if (!TF_PREDICT_TRUE(EXP)) {        \
-    (CTX)->CtxFailure((STATUS));      \
-    return;                           \
-  }
+  do {                                \
+    if (!TF_PREDICT_TRUE(EXP)) {      \
+      (CTX)->CtxFailure((STATUS));    \
+      return;                         \
+    }                                 \
+  } while (0)
 
 #define OP_REQUIRES_OK(CTX, ...)          \
   do {                                    \
@@ -1507,11 +1494,13 @@ inline void OpOutputList::set_ref(int i, mutex* mu, Tensor* tensor_for_ref) {
   } while (0)
 
 #define OP_REQUIRES_ASYNC(CTX, EXP, STATUS, CALLBACK) \
-  if (!TF_PREDICT_TRUE(EXP)) {                        \
-    (CTX)->CtxFailure((STATUS));                      \
-    (CALLBACK)();                                     \
-    return;                                           \
-  }
+  do {                                                \
+    if (!TF_PREDICT_TRUE(EXP)) {                      \
+      (CTX)->CtxFailure((STATUS));                    \
+      (CALLBACK)();                                   \
+      return;                                         \
+    }                                                 \
+  } while (0)
 
 #define OP_REQUIRES_OK_ASYNC(CTX, STATUS, CALLBACK) \
   do {                                              \
diff --git a/tensorflow/core/framework/op_kernel_test.cc b/tensorflow/core/framework/op_kernel_test.cc
index 47523358bed40898cf82c531dc1a89fea0de88a3..94a9d1335a7c46372e05633431427d44fc46e027 100644
--- a/tensorflow/core/framework/op_kernel_test.cc
+++ b/tensorflow/core/framework/op_kernel_test.cc
@@ -33,6 +33,7 @@ limitations under the License.
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/test_benchmark.h"
 #include "tensorflow/core/public/version.h"
 
 class DummyKernel : public tensorflow::OpKernel {
@@ -898,5 +899,73 @@ TEST_F(LabelTest, Duplicate) {
                 error::INVALID_ARGUMENT);
 }
 
+void BM_InputRangeHelper(int iters, const NodeDef& node_def,
+                         const char* input_name, int expected_start,
+                         int expected_stop) {
+  Status status;
+  std::unique_ptr<DummyDevice> device(new DummyDevice(Env::Default(), false));
+
+  std::unique_ptr<OpKernel> op(CreateOpKernel(DEVICE_CPU, device.get(),
+                                              cpu_allocator(), node_def,
+                                              TF_GRAPH_DEF_VERSION, &status));
+  TF_CHECK_OK(status);
+
+  testing::StartTiming();
+  for (int i = 0; i < iters; ++i) {
+    int start;
+    int stop;
+    TF_CHECK_OK(op->InputRange(input_name, &start, &stop));
+    EXPECT_EQ(expected_start, start);
+    EXPECT_EQ(expected_stop, stop);
+  }
+  testing::StopTiming();
+}
+
+REGISTER_KERNEL_BUILDER(Name("ConcatV2").Device(DEVICE_CPU), DummyKernel);
+REGISTER_KERNEL_BUILDER(Name("Select").Device(DEVICE_CPU), DummyKernel);
+
+void BM_ConcatInputRange(int iters) {
+  testing::StopTiming();
+
+  // Create a ConcatV2 NodeDef with 4 inputs (plus the axis).
+  NodeDef node_def;
+  node_def.set_name("concat-op");
+  node_def.set_op("ConcatV2");
+  AttrValue attr_N;
+  attr_N.set_i(4);
+  AttrValue attr_T;
+  attr_T.set_type(DT_FLOAT);
+  AttrValue attr_Tidx;
+  attr_Tidx.set_type(DT_INT32);
+  node_def.mutable_attr()->insert({"N", attr_N});
+  node_def.mutable_attr()->insert({"T", attr_T});
+  node_def.mutable_attr()->insert({"Tidx", attr_Tidx});
+  for (size_t i = 0; i < 5; ++i) {
+    node_def.add_input(strings::StrCat("a:", i));
+  }
+
+  BM_InputRangeHelper(iters, node_def, "values", 0, 4);
+}
+
+void BM_SelectInputRange(int iters) {
+  testing::StopTiming();
+
+  // Create a Select NodeDef with 3 inputs.
+  NodeDef node_def;
+  node_def.set_name("select-op");
+  node_def.set_op("Select");
+  AttrValue attr_T;
+  attr_T.set_type(DT_FLOAT);
+  node_def.mutable_attr()->insert({"T", attr_T});
+  for (size_t i = 0; i < 3; ++i) {
+    node_def.add_input(strings::StrCat("a:", i));
+  }
+
+  BM_InputRangeHelper(iters, node_def, "condition", 0, 1);
+}
+
+BENCHMARK(BM_ConcatInputRange);
+BENCHMARK(BM_SelectInputRange);
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/core/framework/register_types.h b/tensorflow/core/framework/register_types.h
index 4bb37e4f6ede54b96f34963890b56ae8774edced..edc93aec7f801b77a5c7867589f9d89ff7b6ea8f 100644
--- a/tensorflow/core/framework/register_types.h
+++ b/tensorflow/core/framework/register_types.h
@@ -52,7 +52,8 @@ limitations under the License.
    #undef REGISTER_PARTITION
 */
 
-#if !defined(IS_MOBILE_PLATFORM) || defined(SUPPORT_SELECTIVE_REGISTRATION)
+#if !defined(IS_MOBILE_PLATFORM) || defined(SUPPORT_SELECTIVE_REGISTRATION) || \
+    defined(NVIDIA_TEGRA)
 
 // All types are supported, so all macros are invoked.
 //
@@ -155,11 +156,16 @@ limitations under the License.
       TF_CALL_uint8(m) TF_CALL_int8(m)
 
 #define TF_CALL_REAL_NUMBER_TYPES(m) \
+  TF_CALL_INTEGRAL_TYPES(m)          \
+  TF_CALL_half(m) TF_CALL_bfloat16(m) TF_CALL_float(m) TF_CALL_double(m)
+
+#define TF_CALL_REAL_NUMBER_TYPES_NO_BFLOAT16(m) \
   TF_CALL_INTEGRAL_TYPES(m) TF_CALL_half(m) TF_CALL_float(m) TF_CALL_double(m)
 
-#define TF_CALL_REAL_NUMBER_TYPES_NO_INT32(m)                         \
-  TF_CALL_half(m) TF_CALL_float(m) TF_CALL_double(m) TF_CALL_int64(m) \
-      TF_CALL_uint16(m) TF_CALL_int16(m) TF_CALL_uint8(m) TF_CALL_int8(m)
+#define TF_CALL_REAL_NUMBER_TYPES_NO_INT32(m)                              \
+  TF_CALL_half(m) TF_CALL_bfloat16(m) TF_CALL_float(m) TF_CALL_double(m)   \
+      TF_CALL_int64(m) TF_CALL_uint16(m) TF_CALL_int16(m) TF_CALL_uint8(m) \
+          TF_CALL_int8(m)
 
 // Call "m" for all number types, including complex64 and complex128.
 #define TF_CALL_NUMBER_TYPES(m) \
@@ -194,6 +200,13 @@ limitations under the License.
 #define TF_CALL_QUANTIZED_TYPES(m) \
   TF_CALL_qint8(m) TF_CALL_quint8(m) TF_CALL_qint32(m)
 
+// Types used for save and restore ops.
+#define TF_CALL_SAVE_RESTORE_TYPES(m)                                     \
+  TF_CALL_INTEGRAL_TYPES(m)                                               \
+  TF_CALL_half(m) TF_CALL_float(m) TF_CALL_double(m) TF_CALL_complex64(m) \
+      TF_CALL_complex128(m) TF_CALL_bool(m) TF_CALL_string(m)             \
+          TF_CALL_QUANTIZED_TYPES(m)
+
 #ifdef TENSORFLOW_SYCL_NO_DOUBLE
 #define TF_CALL_SYCL_double(m)
 #else  // TENSORFLOW_SYCL_NO_DOUBLE
diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc
index ee9192d4a1475776a892f4da9703c6e7a38f9844..641681973a1004f15163217684001c96592731d8 100644
--- a/tensorflow/core/framework/shape_inference.cc
+++ b/tensorflow/core/framework/shape_inference.cc
@@ -168,7 +168,7 @@ Status InferenceContext::Run(
 
 Status InferenceContext::set_output(StringPiece output_name,
                                     const std::vector<ShapeHandle>& shapes) {
-  const auto result = output_name_map_.find(output_name.ToString());
+  auto result = output_name_map_.find(output_name);
   if (result == output_name_map_.end()) {
     return errors::InvalidArgument("Unknown output name: ", output_name);
   } else {
@@ -187,7 +187,7 @@ Status InferenceContext::set_output(StringPiece output_name,
 
 Status InferenceContext::input(StringPiece input_name,
                                std::vector<ShapeHandle>* output) const {
-  const auto result = input_name_map_.find(input_name.ToString());
+  const auto result = input_name_map_.find(input_name);
   if (result == input_name_map_.end()) {
     return errors::InvalidArgument("Unknown input name: ", input_name);
   } else {
@@ -201,7 +201,7 @@ Status InferenceContext::input(StringPiece input_name,
 
 Status InferenceContext::output(StringPiece output_name,
                                 std::vector<ShapeHandle>* output) const {
-  const auto result = output_name_map_.find(output_name.ToString());
+  const auto result = output_name_map_.find(output_name);
   if (result == output_name_map_.end()) {
     return errors::InvalidArgument("Unknown output name: ", output_name);
   } else {
@@ -342,8 +342,8 @@ Status InferenceContext::WithRank(ShapeHandle shape, int64 rank,
     for (int i = 0; i < rank; ++i) {
       dims.push_back(UnknownDim());
     }
-    *out = shape_manager_.MakeShape(dims);
-    return Status::OK();
+    ShapeHandle shp = shape_manager_.MakeShape(dims);
+    return Merge(shape, shp, out);
   }
   *out = nullptr;
 
@@ -357,13 +357,10 @@ Status InferenceContext::WithRankAtLeast(ShapeHandle shape, int64 rank,
     return errors::InvalidArgument("Rank cannot exceed kint32max");
   }
   const int32 existing = Rank(shape);
-  if (existing >= rank) {
+  if (existing >= rank || existing == kUnknownRank) {
     *out = shape;
     return Status::OK();
   }
-  if (existing == kUnknownRank) {
-    return ReturnUnknownShape(out);
-  }
   *out = nullptr;
   return errors::InvalidArgument("Shape must be at least rank ", rank,
                                  " but is rank ", existing);
@@ -375,10 +372,7 @@ Status InferenceContext::WithRankAtMost(ShapeHandle shape, int64 rank,
     return errors::InvalidArgument("Rank cannot exceed kint32max");
   }
   const int32 existing = Rank(shape);
-  if (existing == kUnknownRank) {
-    return ReturnUnknownShape(out);
-  }
-  if (existing <= rank) {
+  if (existing <= rank || existing == kUnknownRank) {
     *out = shape;
     return Status::OK();
   }
@@ -395,8 +389,8 @@ Status InferenceContext::WithValue(DimensionHandle dim, int64 value,
     return Status::OK();
   }
   if (existing == kUnknownDim) {
-    *out = MakeDim(value);
-    return Status::OK();
+    DimensionHandle d = MakeDim(value);
+    return Merge(dim, d, out);
   }
   *out = nullptr;
   return errors::InvalidArgument("Dimension must be ", value, " but is ",
diff --git a/tensorflow/core/framework/shape_inference_test.cc b/tensorflow/core/framework/shape_inference_test.cc
index 68156e63ca77b9506b7549f9eb7c1fc302eee89a..a9b63ca60e4574bb0d59c4b939ac157e62f317e8 100644
--- a/tensorflow/core/framework/shape_inference_test.cc
+++ b/tensorflow/core/framework/shape_inference_test.cc
@@ -359,11 +359,11 @@ TEST_F(ShapeInferenceTest, WithRankAtMost) {
   // WithRankAtMost on a shape with unknown dimensionality always succeeds.
   EXPECT_TRUE(c.WithRankAtMost(in0, 1, &s1).ok());
   EXPECT_EQ("?", c.DebugString(s1));
-  EXPECT_FALSE(SameHandle(in0, s1));
+  EXPECT_TRUE(SameHandle(in0, s1));
 
   EXPECT_TRUE(c.WithRankAtMost(in0, 2, &s2).ok());
   EXPECT_EQ("?", c.DebugString(s2));
-  EXPECT_FALSE(SameHandle(s1, s2));
+  EXPECT_TRUE(SameHandle(s1, s2));
 
   // WithRankAtMost on shape with known dimensionality.
   s1 = in1;
@@ -398,11 +398,11 @@ TEST_F(ShapeInferenceTest, WithRankAtLeast) {
   // WithRankAtLeast on a shape with unknown dimensionality always succeeds.
   EXPECT_TRUE(c.WithRankAtLeast(in0, 1, &s1).ok());
   EXPECT_EQ("?", c.DebugString(s1));
-  EXPECT_FALSE(SameHandle(in0, s1));
+  EXPECT_TRUE(SameHandle(in0, s1));
 
   EXPECT_TRUE(c.WithRankAtLeast(in0, 2, &s2).ok());
   EXPECT_EQ("?", c.DebugString(s2));
-  EXPECT_FALSE(SameHandle(s1, s2));
+  EXPECT_TRUE(SameHandle(s1, s2));
 
   // WithRankAtLeast on shape with known dimensionality.
   s1 = in1;
diff --git a/tensorflow/core/framework/step_stats.proto b/tensorflow/core/framework/step_stats.proto
index 99dee2257e0a4ccab4098f5ee49feda9ed21d2cf..65c8089d51141b915db69ef2f562ba911ea2994e 100644
--- a/tensorflow/core/framework/step_stats.proto
+++ b/tensorflow/core/framework/step_stats.proto
@@ -40,12 +40,13 @@ message NodeOutput {
 
 // For memory tracking.
 message MemoryStats {
-  int64 host_temp_memory_size = 1;
-  int64 device_temp_memory_size = 2;
-  int64 host_persistent_memory_size = 3;
-  int64 device_persistent_memory_size = 4;
-  repeated int64 host_persistent_tensor_alloc_ids = 5;
-  repeated int64 device_persistent_tensor_alloc_ids = 6;
+  int64 temp_memory_size = 1;
+  int64 persistent_memory_size = 3;
+  repeated int64 persistent_tensor_alloc_ids = 5;
+
+  int64 device_temp_memory_size = 2 [deprecated = true];
+  int64 device_persistent_memory_size = 4 [deprecated = true];
+  repeated int64 device_persistent_tensor_alloc_ids = 6 [deprecated = true];
 }
 
 // Time/size stats recorded for a single execution of a graph node.
diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc
index 24b7b08ebcb8371dfa5d46c788a3146ca727da3f..4f08cdc1d7c130bd351de7b5f7574ea199977804 100644
--- a/tensorflow/core/framework/tensor.cc
+++ b/tensorflow/core/framework/tensor.cc
@@ -415,18 +415,10 @@ struct ProtoHelper<qint32> {
 
 template <>
 struct ProtoHelper<bfloat16> {
-  typedef Helper<float>::RepeatedFieldType FieldType;
-  static const bfloat16* Begin(const TensorProto& proto) {
-    // TODO: Isn't this wrong, given that int_val is 32 bits long?
-    return reinterpret_cast<const bfloat16*>(proto.int_val().data());
-  }
-  static size_t NumElements(const TensorProto& proto) {
-    return proto.int_val().size();
-  }
   static void Fill(const bfloat16* data, size_t n, TensorProto* proto) {
-    proto->mutable_int_val()->Reserve(n);
+    proto->mutable_half_val()->Reserve(n);
     for (size_t i = 0; i < n; ++i) {
-      proto->mutable_int_val()->AddAlreadyReserved(data[i].value);
+      proto->mutable_half_val()->AddAlreadyReserved(data[i].value);
     }
   }
 };
@@ -529,9 +521,9 @@ TensorBuffer* FromProtoField<Variant>(Allocator* a, const TensorProto& in,
   return buf;
 }
 
-// fp16 is opaque to the protobuf, so we deserialize these identical to uint16
-// but with data stored in half_val instead of int_val (ie., we don't use
-// ProtoHelper<uint16>).
+// fp16 and bfloat16 are opaque to the protobuf, so we deserialize these
+// identical to uint16 but with data stored in half_val instead of int_val (ie.,
+// we don't use ProtoHelper<uint16>).
 template <>
 TensorBuffer* FromProtoField<Eigen::half>(Allocator* a, const TensorProto& in,
                                           int64 n) {
@@ -556,6 +548,30 @@ TensorBuffer* FromProtoField<Eigen::half>(Allocator* a, const TensorProto& in,
   return buf;
 }
 
+template <>
+TensorBuffer* FromProtoField<bfloat16>(Allocator* a, const TensorProto& in,
+                                       int64 n) {
+  CHECK_GT(n, 0);
+  Buffer<bfloat16>* buf = new Buffer<bfloat16>(a, n);
+  uint16* data = buf->template base<uint16>();
+  if (data == nullptr) {
+    buf->Unref();
+    return nullptr;
+  }
+  const int64 in_n = in.half_val().size();
+  auto begin = in.half_val().begin();
+  if (n <= in_n) {
+    std::copy_n(begin, n, data);
+  } else if (in_n > 0) {
+    std::copy_n(begin, in_n, data);
+    const uint16 last = *(data + in_n - 1);
+    std::fill_n(data + in_n, n - in_n, last);
+  } else {
+    std::fill_n(data, n, 0);
+  }
+  return buf;
+}
+
 // Copies T[n] stored in the buffer "in" into the repeated field in
 // "out" corresponding to type T.
 template <typename T>
diff --git a/tensorflow/core/framework/tensor.h b/tensorflow/core/framework/tensor.h
index c195623b279a4275ab2646483851ec3a65a1f0d4..92d10f0d8cf452264885917bc0c897e03527a782 100644
--- a/tensorflow/core/framework/tensor.h
+++ b/tensorflow/core/framework/tensor.h
@@ -424,7 +424,8 @@ class Tensor {
   typename TTypes<T, NDIMS>::ConstTensor flat_outer_dims() const;
 
   template <typename T, size_t NDIMS = 3>
-  typename TTypes<T, NDIMS>::ConstTensor flat_inner_outer_dims(int64 begin) const;
+  typename TTypes<T, NDIMS>::ConstTensor flat_inner_outer_dims(
+      int64 begin) const;
 
   /// Render the first `max_entries` values in `*this` into a string.
   string SummarizeValue(int64 max_entries) const;
@@ -464,10 +465,6 @@ class Tensor {
   void CheckTypeAndIsAligned(DataType expected_dtype) const;
   void CheckIsAlignedAndSingleElement() const;
   void set_dtype(DataType t) { shape_.set_data_type(t); }
-  template <size_t NDIMS>
-  void FillDimsAndValidateCompatibleShape(
-      gtl::ArraySlice<int64> new_sizes,
-      Eigen::array<Eigen::DenseIndex, NDIMS>* dims) const;
 
   // TensorShape's InlineVector.
   static gtl::InlinedVector<int64, 4> ComputeFlatInnerDims(
@@ -520,8 +517,13 @@ class Tensor {
 
   template <size_t NDIMS>
   void FillDimsAndValidateCompatibleShape(
-      Eigen::array<Eigen::DenseIndex, NDIMS>* dims,
-      gtl::ArraySlice<int64> new_sizes) const;
+      gtl::ArraySlice<int64> new_sizes,
+      Eigen::array<Eigen::DenseIndex, NDIMS>* dims) const;
+
+  template <typename T, size_t NDIMS>
+  void FillDimsAndValidateCompatibleShape(
+      gtl::ArraySlice<int64> new_sizes,
+      Eigen::array<Eigen::DenseIndex, NDIMS>* dims) const;
 };
 
 // Implementation details
@@ -631,12 +633,36 @@ void Tensor::FillDimsAndValidateCompatibleShape(
   CHECK_EQ(new_num_elements, NumElements());
 }
 
+template <typename T, size_t NDIMS>
+void Tensor::FillDimsAndValidateCompatibleShape(
+    gtl::ArraySlice<int64> new_sizes,
+    Eigen::array<Eigen::DenseIndex, NDIMS>* dims) const {
+  CHECK_EQ(NDIMS, new_sizes.size());
+  int64 new_num_elements = 1;
+  for (size_t d = 0; d < NDIMS; d++) {
+    new_num_elements *= new_sizes[d];
+    (*dims)[d] = new_sizes[d];
+  }
+  const int element_size = DataTypeSize(BaseType(dtype()));
+  if (element_size > 0) {
+    CHECK_EQ(new_num_elements * sizeof(T), NumElements() * element_size);
+  } else {
+    // DataTypeSize() returns 0 for some data types. In this case, assume that T
+    // has the same size as the buffer type.
+    // NOTE: If we can be sure that DataTypeSize() does not return 0 for all POD
+    // types, then we should check DataTypeToEnum<T>::v() == dtype(). Or simply
+    // check if `element_size > 0` to err when bit cast is attempted on Tensor
+    // of unknown data type size.
+    CHECK_EQ(new_num_elements, NumElements());
+  }
+}
+
 template <typename T, size_t NDIMS>
 typename TTypes<T, NDIMS>::Tensor Tensor::shaped(
     gtl::ArraySlice<int64> new_sizes) {
   CheckTypeAndIsAligned(DataTypeToEnum<T>::v());
   Eigen::array<Eigen::DenseIndex, NDIMS> dims;
-  FillDimsAndValidateCompatibleShape<NDIMS>(new_sizes, &dims);
+  FillDimsAndValidateCompatibleShape(new_sizes, &dims);
   return typename TTypes<T, NDIMS>::Tensor(base<T>(), dims);
 }
 
@@ -645,7 +671,7 @@ typename TTypes<T, NDIMS>::Tensor Tensor::bit_casted_shaped(
     gtl::ArraySlice<int64> new_sizes) {
   CHECK(IsAligned());
   Eigen::array<Eigen::DenseIndex, NDIMS> dims;
-  FillDimsAndValidateCompatibleShape<NDIMS>(new_sizes, &dims);
+  FillDimsAndValidateCompatibleShape<T>(new_sizes, &dims);
   return typename TTypes<T, NDIMS>::Tensor(base<T>(), dims);
 }
 
@@ -654,29 +680,16 @@ typename TTypes<T, NDIMS>::UnalignedTensor Tensor::unaligned_shaped(
     gtl::ArraySlice<int64> new_sizes) {
   CheckType(DataTypeToEnum<T>::v());
   Eigen::array<Eigen::DenseIndex, NDIMS> dims;
-  FillDimsAndValidateCompatibleShape<NDIMS>(new_sizes, &dims);
+  FillDimsAndValidateCompatibleShape(new_sizes, &dims);
   return typename TTypes<T, NDIMS>::UnalignedTensor(base<T>(), dims);
 }
 
-template <size_t NDIMS>
-void Tensor::FillDimsAndValidateCompatibleShape(
-    Eigen::array<Eigen::DenseIndex, NDIMS>* dims,
-    gtl::ArraySlice<int64> new_sizes) const {
-  CHECK_EQ(NDIMS, new_sizes.size());
-  int64 new_num_elements = 1;
-  for (size_t d = 0; d < NDIMS; d++) {
-    new_num_elements *= new_sizes[d];
-    (*dims)[d] = new_sizes[d];
-  }
-  CHECK_EQ(new_num_elements, NumElements());
-}
-
 template <typename T, size_t NDIMS>
 typename TTypes<T, NDIMS>::ConstTensor Tensor::shaped(
     gtl::ArraySlice<int64> new_sizes) const {
   CheckTypeAndIsAligned(DataTypeToEnum<T>::v());
   Eigen::array<Eigen::DenseIndex, NDIMS> dims;
-  FillDimsAndValidateCompatibleShape(&dims, new_sizes);
+  FillDimsAndValidateCompatibleShape(new_sizes, &dims);
   return typename TTypes<T, NDIMS>::ConstTensor(base<T>(), dims);
 }
 
@@ -685,7 +698,7 @@ typename TTypes<T, NDIMS>::ConstTensor Tensor::bit_casted_shaped(
     gtl::ArraySlice<int64> new_sizes) const {
   CHECK(IsAligned());
   Eigen::array<Eigen::DenseIndex, NDIMS> dims;
-  FillDimsAndValidateCompatibleShape(&dims, new_sizes);
+  FillDimsAndValidateCompatibleShape<T>(new_sizes, &dims);
   return typename TTypes<T, NDIMS>::ConstTensor(base<T>(), dims);
 }
 
@@ -694,7 +707,7 @@ typename TTypes<T, NDIMS>::UnalignedConstTensor Tensor::unaligned_shaped(
     gtl::ArraySlice<int64> new_sizes) const {
   CheckType(DataTypeToEnum<T>::v());
   Eigen::array<Eigen::DenseIndex, NDIMS> dims;
-  FillDimsAndValidateCompatibleShape(&dims, new_sizes);
+  FillDimsAndValidateCompatibleShape(new_sizes, &dims);
   return typename TTypes<T, NDIMS>::UnalignedConstTensor(base<T>(), dims);
 }
 
@@ -722,8 +735,8 @@ typename TTypes<T, NDIMS>::Tensor Tensor::flat_outer_dims() {
 
 template <typename T, size_t NDIMS>
 typename TTypes<T, NDIMS>::Tensor Tensor::flat_inner_outer_dims(int64 begin) {
-  gtl::InlinedVector<int64,4> flat_outer = ComputeFlatOuterDims(
-      shape_.dim_sizes(), begin + NDIMS);
+  gtl::InlinedVector<int64, 4> flat_outer =
+      ComputeFlatOuterDims(shape_.dim_sizes(), begin + NDIMS);
   return shaped<T, NDIMS>(ComputeFlatInnerDims(flat_outer, NDIMS));
 }
 
@@ -738,9 +751,10 @@ typename TTypes<T, NDIMS>::ConstTensor Tensor::flat_outer_dims() const {
 }
 
 template <typename T, size_t NDIMS>
-typename TTypes<T, NDIMS>::ConstTensor Tensor::flat_inner_outer_dims(int64 begin) const {
-  gtl::InlinedVector<int64,4> flat_outer = ComputeFlatOuterDims(
-      shape_.dim_sizes(), begin + NDIMS);
+typename TTypes<T, NDIMS>::ConstTensor Tensor::flat_inner_outer_dims(
+    int64 begin) const {
+  gtl::InlinedVector<int64, 4> flat_outer =
+      ComputeFlatOuterDims(shape_.dim_sizes(), begin + NDIMS);
   return shaped<T, NDIMS>(ComputeFlatInnerDims(flat_outer, NDIMS));
 }
 
diff --git a/tensorflow/core/framework/tensor.proto b/tensorflow/core/framework/tensor.proto
index 6dab325969bacbda15552a79eb3c0862dbde20a1..abbf16e8103326011525feb0017922474ff8d2cf 100644
--- a/tensorflow/core/framework/tensor.proto
+++ b/tensorflow/core/framework/tensor.proto
@@ -40,8 +40,8 @@ message TensorProto {
   // be set.  The values hold the flattened representation of the tensor in
   // row major order.
 
-  // DT_HALF. Note that since protobuf has no int16 type, we'll have some
-  // pointless zero padding for each value here.
+  // DT_HALF, DT_BFLOAT16. Note that since protobuf has no int16 type, we'll
+  // have some pointless zero padding for each value here.
   repeated int32 half_val = 13 [packed = true];
 
   // DT_FLOAT.
diff --git a/tensorflow/core/framework/tensor_shape_test.cc b/tensorflow/core/framework/tensor_shape_test.cc
index 06c576c7d41e5bf48f9db6754e5814142632a371..d8a9c0bac5b950157044dae07771b6733481ac9e 100644
--- a/tensorflow/core/framework/tensor_shape_test.cc
+++ b/tensorflow/core/framework/tensor_shape_test.cc
@@ -359,7 +359,8 @@ Status TensorShapeOld::IsValidShape(const TensorShapeProto& proto) {
   for (const auto& d : proto.dim()) {
     if (d.size() < 0) {
       return errors::InvalidArgument("Shape ", DebugString(proto),
-                                     " has negative dimensions");
+                                     " has negative dimensions; ",
+                                     "perhaps an un-fed placeholder?");
     }
     num_elements *= d.size();
     if (num_elements > kMaxElements) {
diff --git a/tensorflow/core/framework/tensor_test.cc b/tensorflow/core/framework/tensor_test.cc
index 47ff29fbe1a4d118e52c8faaa04019f88db0e1ae..81644388abcf9c14bc5812069f25906a7f72b4cc 100644
--- a/tensorflow/core/framework/tensor_test.cc
+++ b/tensorflow/core/framework/tensor_test.cc
@@ -20,12 +20,14 @@ limitations under the License.
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/framework/variant_encode_decode.h"
 #include "tensorflow/core/framework/variant_tensor_data.h"
+#include "tensorflow/core/lib/math/math_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/test_benchmark.h"
 
 namespace tensorflow {
+
 class TensorTestHelper {
  public:
   // This is an operation that can be done by VariableOp.
@@ -33,13 +35,13 @@ class TensorTestHelper {
 };
 
 // To make TestCopies do the right thing.
-inline bool operator==(const ResourceHandle& a, const ResourceHandle& b) {
+bool operator==(const ResourceHandle& a, const ResourceHandle& b) {
   return a.device() == b.device() && a.container() == b.container() &&
          a.name() == b.name() && a.hash_code() == b.hash_code() &&
          a.maybe_type_name() == b.maybe_type_name();
 }
 
-inline bool operator==(const Variant& a, const Variant& b) {
+bool operator==(const Variant& a, const Variant& b) {
   if (a.is_empty()) {
     return b.is_empty();
   }
@@ -72,6 +74,8 @@ inline bool operator==(const Variant& a, const Variant& b) {
   return true;
 }
 
+namespace {
+
 TEST(TensorTest, Default) {
   Tensor t;
   EXPECT_EQ(t.dtype(), DT_FLOAT);
@@ -175,6 +179,28 @@ void TestCopies(const Tensor& t) {
   }
 }
 
+TEST(Tensor_Half, Simple) {
+  Tensor t(DT_HALF, TensorShape({5, 7}));
+  EXPECT_TRUE(t.shape().IsSameSize(TensorShape({5, 7})));
+  for (int64 a = 0; a < t.shape().dim_size(0); a++) {
+    for (int64 b = 0; b < t.shape().dim_size(1); b++) {
+      t.matrix<Eigen::half>()(a, b) = static_cast<Eigen::half>(a * b);
+    }
+  }
+  TestCopies<Eigen::half>(t);
+}
+
+TEST(Tensor_Bfloat16, Simple) {
+  Tensor t(DT_BFLOAT16, TensorShape({5, 7}));
+  EXPECT_TRUE(t.shape().IsSameSize(TensorShape({5, 7})));
+  for (int64 a = 0; a < t.shape().dim_size(0); a++) {
+    for (int64 b = 0; b < t.shape().dim_size(1); b++) {
+      t.matrix<bfloat16>()(a, b) = static_cast<bfloat16>(a * b);
+    }
+  }
+  TestCopies<bfloat16>(t);
+}
+
 TEST(Tensor_Float, Simple) {
   Tensor t(DT_FLOAT, TensorShape({10, 20}));
   EXPECT_TRUE(t.shape().IsSameSize(TensorShape({10, 20})));
@@ -334,41 +360,126 @@ class TensorReshapeTest : public ::testing::Test {
     tensor(0, 0, 0, 0) = 0.01f;
     tensor(1, 2, 3, 4) = 0.02f;
   }
-};
 
-TEST_F(TensorReshapeTest, Reshape) {
-  LOG(INFO) << "shaped";
-  {
-    auto shaped = t.shaped<float, 1>({120});
-    EXPECT_EQ(120, shaped.dimension(0));
-    EXPECT_EQ(shaped(0), 0.01f);
-    EXPECT_EQ(shaped(119), 0.02f);
-  }
-  {
-    auto shaped = t.shaped<float, 2>({6, 20});
-    EXPECT_EQ(6, shaped.dimension(0));
-    EXPECT_EQ(20, shaped.dimension(1));
-    EXPECT_EQ(shaped(0, 0), 0.01f);
-    EXPECT_EQ(shaped(5, 19), 0.02f);
+  template <typename T>
+  using ReshapeFunc = T (Tensor::*)(gtl::ArraySlice<int64>);
+  template <typename T>
+  using ConstReshapeFunc = T (Tensor::*)(gtl::ArraySlice<int64>) const;
+
+  template <typename T, ReshapeFunc<T> Func>
+  void TestReshape(std::initializer_list<int64> sizes) {
+    T shaped = (t.*Func)(sizes);
+    TestReshapeImpl(shaped, sizes);
   }
-  {
-    auto shaped = t.shaped<float, 3>({6, 4, 5});
-    EXPECT_EQ(6, shaped.dimension(0));
-    EXPECT_EQ(4, shaped.dimension(1));
-    EXPECT_EQ(5, shaped.dimension(2));
-    EXPECT_EQ(shaped(0, 0, 0), 0.01f);
-    EXPECT_EQ(shaped(5, 3, 4), 0.02f);
+
+  template <typename T, ConstReshapeFunc<T> Func>
+  void TestReshape(std::initializer_list<int64> sizes) {
+    T shaped = (static_cast<const Tensor&>(t).*Func)(sizes);
+    TestReshapeImpl(shaped, sizes);
   }
-  {
-    auto shaped = t.shaped<float, 4>({2, 3, 4, 5});
-    EXPECT_EQ(2, shaped.dimension(0));
-    EXPECT_EQ(3, shaped.dimension(1));
-    EXPECT_EQ(4, shaped.dimension(2));
-    EXPECT_EQ(5, shaped.dimension(3));
 
-    EXPECT_EQ(shaped(0, 0, 0, 0), 0.01f);
-    EXPECT_EQ(shaped(1, 2, 3, 4), 0.02f);
+  template <typename T>
+  void TestReshapeImpl(T shaped, std::initializer_list<int64> sizes) {
+    auto iter = sizes.begin();
+    for (int i = 0; i < shaped.rank(); ++i, ++iter) {
+      EXPECT_EQ(*iter, shaped.dimension(i));
+    }
+
+    using Index = typename T::Index;
+    using Scalar = typename T::Scalar;
+    constexpr int N = T::NumIndices;
+
+    // To handle the cast when `shaped` is bit casted into a different type.
+    const float expected_first = 0.01f;
+    Eigen::DSizes<Index, N> coord;
+    EXPECT_EQ(shaped(coord), *reinterpret_cast<const Scalar*>(&expected_first));
+
+    for (int i = 0; i < N; ++i) {
+      coord[i] = shaped.dimension(i) - 1;
+    }
+    const float expected_last = 0.02f;
+    constexpr int kNumScalarPerFloat =
+        sizeof(float) / sizeof(Scalar);  // Assuming even divide.
+    EXPECT_EQ(shaped(coord), reinterpret_cast<const Scalar*>(
+                                 &expected_last)[kNumScalarPerFloat - 1]);
   }
+};
+
+TEST_F(TensorReshapeTest, Reshape) {
+  LOG(INFO) << "shaped";
+
+#define TEST_RESHAPE(...)                                                  \
+  {                                                                        \
+    constexpr int N = (sizeof((int[]){__VA_ARGS__}) / sizeof(int));        \
+    TestReshape<TTypes<float, N>::Tensor, &Tensor::shaped<float, N>>(      \
+        {__VA_ARGS__});                                                    \
+    TestReshape<TTypes<float, N>::ConstTensor, &Tensor::shaped<float, N>>( \
+        {__VA_ARGS__});                                                    \
+    TestReshape<TTypes<float, N>::UnalignedTensor,                         \
+                &Tensor::unaligned_shaped<float, N>>({__VA_ARGS__});       \
+    TestReshape<TTypes<float, N>::UnalignedConstTensor,                    \
+                &Tensor::unaligned_shaped<float, N>>({__VA_ARGS__});       \
+    TestReshape<TTypes<float, N>::Tensor,                                  \
+                &Tensor::bit_casted_shaped<float, N>>({__VA_ARGS__});      \
+    TestReshape<TTypes<float, N>::ConstTensor,                             \
+                &Tensor::bit_casted_shaped<float, N>>({__VA_ARGS__});      \
+    TestReshape<TTypes<int32, N>::Tensor,                                  \
+                &Tensor::bit_casted_shaped<int32, N>>({__VA_ARGS__});      \
+    TestReshape<TTypes<int32, N>::ConstTensor,                             \
+                &Tensor::bit_casted_shaped<int32, N>>({__VA_ARGS__});      \
+  }
+
+  TEST_RESHAPE(120);
+  TEST_RESHAPE(6, 20);
+  TEST_RESHAPE(6, 4, 5);
+  TEST_RESHAPE(2, 3, 4, 5);
+#undef TEST_RESHAPE
+}
+
+TEST_F(TensorReshapeTest, BitcastReshapeDifferentSize) {
+#define TEST_BITCAST8_RESHAPE(...)                                    \
+  {                                                                   \
+    constexpr int N = (sizeof((int[]){__VA_ARGS__}) / sizeof(int));   \
+    TestReshape<TTypes<uint8, N>::Tensor,                             \
+                &Tensor::bit_casted_shaped<uint8, N>>({__VA_ARGS__}); \
+  }
+
+  TEST_BITCAST8_RESHAPE(480);
+  TEST_BITCAST8_RESHAPE(24, 20);
+  TEST_BITCAST8_RESHAPE(6, 16, 5);
+  TEST_BITCAST8_RESHAPE(2, 3, 4, 20);
+#undef TEST_BITCAST8_RESHAPE
+#define TEST_BITCAST16_RESHAPE(...)                                   \
+  {                                                                   \
+    constexpr int N = (sizeof((int[]){__VA_ARGS__}) / sizeof(int));   \
+    TestReshape<TTypes<int16, N>::Tensor,                             \
+                &Tensor::bit_casted_shaped<int16, N>>({__VA_ARGS__}); \
+  }
+
+  TEST_BITCAST16_RESHAPE(240);
+  TEST_BITCAST16_RESHAPE(6, 40);
+  TEST_BITCAST16_RESHAPE(12, 4, 5);
+  TEST_BITCAST16_RESHAPE(2, 3, 8, 5);
+  TEST_BITCAST16_RESHAPE(2, 3, 4, 1, 10);
+#undef TEST_BITCAST16_RESHAPE
+}
+
+TEST_F(TensorReshapeTest, ReshapeError) {
+  EXPECT_DEATH((t.shaped<float, 0>({})), "1 vs. 120");
+  EXPECT_DEATH((t.shaped<float, 1>({119})), "119 vs. 120");
+  EXPECT_DEATH((t.shaped<float, 4>({2, 3, 4, 6})), "144 vs. 120");
+
+  EXPECT_DEATH((t.unaligned_shaped<float, 0>({})), "1 vs. 120");
+  EXPECT_DEATH((t.unaligned_shaped<float, 1>({119})), "119 vs. 120");
+  EXPECT_DEATH((t.unaligned_shaped<float, 4>({2, 3, 4, 6})), "144 vs. 120");
+
+  EXPECT_DEATH((t.bit_casted_shaped<float, 0>({})), "4 vs. 480");
+  EXPECT_DEATH((t.bit_casted_shaped<float, 1>({119})), "476 vs. 480");
+  EXPECT_DEATH((t.bit_casted_shaped<float, 4>({2, 3, 4, 6})), "576 vs. 480");
+
+  Tensor string_tensor{DT_STRING, {10}};
+  // Note that the error message compare # of elements, not # of bytes.
+  EXPECT_DEATH((string_tensor.bit_casted_shaped<string, 1>({9})), "9 vs. 10");
 }
 
 TEST_F(TensorReshapeTest, Flat) {
@@ -890,7 +1001,7 @@ TEST(Tensor_Complex, SimpleWithHelper64) {
     // x contains all the 8-th root of unity.
     Tensor x(DT_COMPLEX64, TensorShape({8}));
     for (int i = 0; i < 8; ++i) {
-      x.vec<complex64>()(i) = std::pow(rotate_45, i);
+      x.vec<complex64>()(i) = MathUtil::IPow(rotate_45, i);
     }
 
     // Shift the roots by 45 degree.
@@ -898,7 +1009,7 @@ TEST(Tensor_Complex, SimpleWithHelper64) {
     y.vec<complex64>() = x.vec<complex64>() * rotate_45;
     Tensor y_expected(DT_COMPLEX64, TensorShape({8}));
     for (int i = 0; i < 8; ++i) {
-      y_expected.vec<complex64>()(i) = std::pow(rotate_45, i + 1);
+      y_expected.vec<complex64>()(i) = MathUtil::IPow(rotate_45, i + 1);
     }
     test::ExpectTensorNear<complex64>(y, y_expected, 1e-5);
 
@@ -939,7 +1050,7 @@ TEST(Tensor_Complex, SimpleWithHelper128) {
     // x contains all the 8-th root of unity.
     Tensor x(DT_COMPLEX128, TensorShape({8}));
     for (int i = 0; i < 8; ++i) {
-      x.vec<complex128>()(i) = std::pow(rotate_45, i);
+      x.vec<complex128>()(i) = MathUtil::IPow(rotate_45, i);
     }
 
     // Shift the roots by 45 degree.
@@ -947,7 +1058,7 @@ TEST(Tensor_Complex, SimpleWithHelper128) {
     y.vec<complex128>() = x.vec<complex128>() * rotate_45;
     Tensor y_expected(DT_COMPLEX128, TensorShape({8}));
     for (int i = 0; i < 8; ++i) {
-      y_expected.vec<complex128>()(i) = std::pow(rotate_45, i + 1);
+      y_expected.vec<complex128>()(i) = MathUtil::IPow(rotate_45, i + 1);
     }
     test::ExpectTensorNear<complex128>(y, y_expected, 1e-5);
 
@@ -962,8 +1073,6 @@ TEST(Tensor_Complex, SimpleWithHelper128) {
   }
 }
 
-namespace {
-
 // An allocator that always returns nullptr, for testing
 // failures to allocate.
 class DummyCPUAllocator : public Allocator {
@@ -1103,7 +1212,6 @@ TEST(Tensor, Slice_Basic) {
   }
 }
 
-namespace {
 template <typename T>
 Tensor MkTensor(DataType dt, const TensorShape& shape,
                 std::vector<T> init_values) {
@@ -1116,7 +1224,6 @@ Tensor MkTensor(DataType dt, const TensorShape& shape,
   }
   return x;
 }
-}  // namespace
 
 TEST(SummarizeValue, Uninitialized) {
   Tensor x(DT_INT32);
@@ -1165,7 +1272,7 @@ TEST(SummarizeValue, STRING) {
   EXPECT_EQ("one two three four five one...", x.SummarizeValue(6));
 }
 
-static void BM_CreateAndDestroy(int iters) {
+void BM_CreateAndDestroy(int iters) {
   TensorShape shape({10, 20});
   while (--iters) {
     Tensor t(DT_FLOAT, shape);
@@ -1173,7 +1280,7 @@ static void BM_CreateAndDestroy(int iters) {
 }
 BENCHMARK(BM_CreateAndDestroy);
 
-static void BM_Assign(int iters) {
+void BM_Assign(int iters) {
   Tensor a(DT_FLOAT, TensorShape({10, 20}));
   Tensor b(DT_FLOAT, TensorShape({10, 20}));
   bool a_to_b = true;
@@ -1195,7 +1302,7 @@ TEST(Tensor, EmptyTensorData) {
 }
 
 // Benchmark create and destroy a tensor, with an allocated buffer.
-static void BM_CreateAndDestroyWithBuf(int iters) {
+void BM_CreateAndDestroyWithBuf(int iters) {
   TensorShape shape({10, 20});
   Allocator* allocator = cpu_allocator();
   while (--iters) {
@@ -1205,7 +1312,7 @@ static void BM_CreateAndDestroyWithBuf(int iters) {
 BENCHMARK(BM_CreateAndDestroyWithBuf);
 
 // Benchmark create+copy a tensor, with an allocated buffer.
-static void BM_CreateAndCopyCtrWithBuf(int iters) {
+void BM_CreateAndCopyCtrWithBuf(int iters) {
   TensorShape shape({10, 20});
   Allocator* allocator = cpu_allocator();
   while (--iters) {
@@ -1216,7 +1323,7 @@ static void BM_CreateAndCopyCtrWithBuf(int iters) {
 BENCHMARK(BM_CreateAndCopyCtrWithBuf);
 
 // Benchmark create+move a tensor, with an allocated buffer.
-static void BM_CreateAndMoveCtrWithBuf(int iters) {
+void BM_CreateAndMoveCtrWithBuf(int iters) {
   TensorShape shape({10, 20});
   Allocator* allocator = cpu_allocator();
   while (--iters) {
diff --git a/tensorflow/core/framework/tracking_allocator.cc b/tensorflow/core/framework/tracking_allocator.cc
index 239dfd13ec2e45acb0a65700f2a8882c61fc03b3..65c98ad1ee7bf68249389890c05cd968ddbf068c 100644
--- a/tensorflow/core/framework/tracking_allocator.cc
+++ b/tensorflow/core/framework/tracking_allocator.cc
@@ -156,6 +156,8 @@ void TrackingAllocator::GetStats(AllocatorStats* stats) {
   allocator_->GetStats(stats);
 }
 
+void TrackingAllocator::ClearStats() { allocator_->ClearStats(); }
+
 std::tuple<size_t, size_t, size_t> TrackingAllocator::GetSizes() {
   size_t high_watermark;
   size_t total_bytes;
diff --git a/tensorflow/core/framework/tracking_allocator.h b/tensorflow/core/framework/tracking_allocator.h
index a6c26c89e51f1fec01886672b91f863ee36bedc8..4825ed414f0dc64d98ab848e18f8aceb88629f40 100644
--- a/tensorflow/core/framework/tracking_allocator.h
+++ b/tensorflow/core/framework/tracking_allocator.h
@@ -68,6 +68,7 @@ class TrackingAllocator : public Allocator {
   size_t AllocatedSize(void* ptr) override;
   int64 AllocationId(void* ptr) override;
   void GetStats(AllocatorStats* stats) override;
+  void ClearStats() override;
 
   // If the underlying allocator tracks allocation sizes, this returns
   // a tuple where the first value is the total number of bytes
diff --git a/tensorflow/core/framework/types.cc b/tensorflow/core/framework/types.cc
index faae19585d9dd2bc5f351772af93723daaa3b8be..adf4e1bae307d81d91e7e597fc882caf4c87601f 100644
--- a/tensorflow/core/framework/types.cc
+++ b/tensorflow/core/framework/types.cc
@@ -47,11 +47,8 @@ const std::string DeviceName<Eigen::GpuDevice>::value = DEVICE_GPU;
 const std::string DeviceName<Eigen::SyclDevice>::value = DEVICE_SYCL;
 #endif  // TENSORFLOW_USE_SYCL
 
-string DataTypeString(DataType dtype) {
-  if (IsRefType(dtype)) {
-    DataType non_ref = static_cast<DataType>(dtype - kDataTypeRefOffset);
-    return strings::StrCat(DataTypeString(non_ref), "_ref");
-  }
+namespace {
+string DataTypeStringInternal(DataType dtype) {
   switch (dtype) {
     case DT_INVALID:
       return "INVALID";
@@ -106,6 +103,15 @@ string DataTypeString(DataType dtype) {
       return strings::StrCat("unknown dtype enum (", dtype, ")");
   }
 }
+}  // end namespace
+
+string DataTypeString(DataType dtype) {
+  if (IsRefType(dtype)) {
+    DataType non_ref = static_cast<DataType>(dtype - kDataTypeRefOffset);
+    return strings::StrCat(DataTypeStringInternal(non_ref), "_ref");
+  }
+  return DataTypeStringInternal(dtype);
+}
 
 bool DataTypeFromString(StringPiece sp, DataType* dt) {
   if (sp.ends_with("_ref")) {
@@ -205,142 +211,12 @@ string DataTypeSliceString(const DataTypeSlice types) {
   return out;
 }
 
-DataTypeVector AllTypes() {
-  return {DT_FLOAT,   DT_DOUBLE, DT_INT32,  DT_UINT8,     DT_INT16,
-          DT_UINT16,  DT_INT8,   DT_STRING, DT_COMPLEX64, DT_COMPLEX128,
-          DT_INT64,   DT_BOOL,   DT_QINT8,  DT_QUINT8,    DT_QINT16,
-          DT_QUINT16, DT_QINT32, DT_HALF,   DT_RESOURCE,  DT_VARIANT,
-          DT_UINT32,  DT_UINT64};
-}
-
-#if !defined(IS_MOBILE_PLATFORM) || defined(SUPPORT_SELECTIVE_REGISTRATION)
-
-DataTypeVector RealNumberTypes() {
-  return {DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64,  DT_UINT8, DT_INT16,
-          DT_INT8,  DT_UINT16, DT_HALF,  DT_UINT32, DT_UINT64};
-}
-
-DataTypeVector QuantizedTypes() {
-  return {DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32};
-}
-
-DataTypeVector RealAndQuantizedTypes() {
-  return {DT_FLOAT,  DT_DOUBLE,  DT_INT32,  DT_INT64, DT_UINT8,
-          DT_UINT16, DT_UINT16,  DT_INT8,   DT_QINT8, DT_QUINT8,
-          DT_QINT16, DT_QUINT16, DT_QINT32, DT_HALF};
-}
-
-DataTypeVector NumberTypes() {
-  return {DT_FLOAT,     DT_DOUBLE,     DT_INT64,  DT_INT32,
-          DT_UINT8,     DT_UINT16,     DT_INT16,  DT_INT8,
-          DT_COMPLEX64, DT_COMPLEX128, DT_QINT8,  DT_QUINT8,
-          DT_QINT32,    DT_HALF,       DT_UINT32, DT_UINT64};
-}
-
-#elif defined(__ANDROID_TYPES_FULL__)
-
-DataTypeVector RealNumberTypes() {
-  return {DT_FLOAT, DT_INT32, DT_INT64, DT_HALF};
-}
-
-DataTypeVector NumberTypes() {
-  return {DT_FLOAT,  DT_INT32,  DT_INT64, DT_QINT8,
-          DT_QUINT8, DT_QINT32, DT_HALF};
-}
-
-DataTypeVector QuantizedTypes() {
-  return {DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32};
-}
-
-DataTypeVector RealAndQuantizedTypes() {
-  return {DT_FLOAT,  DT_INT32,   DT_INT64,  DT_QINT8, DT_QUINT8,
-          DT_QINT16, DT_QUINT16, DT_QINT32, DT_HALF};
-}
-
-#else  // defined(IS_MOBILE_PLATFORM) && !defined(__ANDROID_TYPES_FULL__)
-
-DataTypeVector RealNumberTypes() { return {DT_FLOAT, DT_INT32}; }
-
-DataTypeVector NumberTypes() {
-  return {DT_FLOAT, DT_INT32, DT_QINT8, DT_QUINT8, DT_QINT32};
-}
-
-DataTypeVector QuantizedTypes() {
-  return {DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32};
-}
-
-DataTypeVector RealAndQuantizedTypes() {
-  return {DT_FLOAT,  DT_INT32,   DT_QINT8, DT_QUINT8,
-          DT_QINT16, DT_QUINT16, DT_QINT32};
-}
-
-#endif  // defined(IS_MOBILE_PLATFORM)
-
-// TODO(jeff): Maybe unify this with Tensor::CanUseDMA, or the underlying
-// is_simple<T> in tensor.cc (and possible choose a more general name?)
-bool DataTypeCanUseMemcpy(DataType dt) {
+bool DataTypeAlwaysOnHost(DataType dt) {
+  // Includes DT_STRING and DT_RESOURCE.
   switch (dt) {
-    case DT_FLOAT:
-    case DT_DOUBLE:
-    case DT_INT32:
-    case DT_UINT32:
-    case DT_UINT8:
-    case DT_UINT16:
-    case DT_INT16:
-    case DT_INT8:
-    case DT_COMPLEX64:
-    case DT_COMPLEX128:
-    case DT_INT64:
-    case DT_UINT64:
-    case DT_BOOL:
-    case DT_QINT8:
-    case DT_QUINT8:
-    case DT_QINT16:
-    case DT_QUINT16:
-    case DT_QINT32:
-    case DT_BFLOAT16:
-    case DT_HALF:
-      return true;
-    default:
-      return false;
-  }
-}
-
-bool DataTypeIsQuantized(DataType dt) {
-  switch (dt) {
-    case DT_QINT8:
-    case DT_QUINT8:
-    case DT_QINT16:
-    case DT_QUINT16:
-    case DT_QINT32:
-      return true;
-    default:
-      return false;
-  }
-}
-
-bool DataTypeIsInteger(DataType dt) {
-  switch (dt) {
-    case DT_INT8:
-    case DT_UINT8:
-    case DT_INT16:
-    case DT_UINT16:
-    case DT_INT32:
-    case DT_UINT32:
-    case DT_INT64:
-    case DT_UINT64:
-      return true;
-    default:
-      return false;
-  }
-}
-
-bool DataTypeIsUnsigned(DataType dt) {
-  switch (dt) {
-    case DT_UINT8:
-    case DT_UINT16:
-    case DT_UINT32:
-    case DT_UINT64:
+    case DT_STRING:
+    case DT_STRING_REF:
+    case DT_RESOURCE:
       return true;
     default:
       return false;
diff --git a/tensorflow/core/framework/types.h b/tensorflow/core/framework/types.h
index dc53ed41780d90448872b1bd98e97f5e16d49592..cb8e77f1df962eb36277ac7c01e8b580d5926452 100644
--- a/tensorflow/core/framework/types.h
+++ b/tensorflow/core/framework/types.h
@@ -112,10 +112,127 @@ inline string DataTypeVectorString(const DataTypeVector& dtypes) {
   return DataTypeSliceString(dtypes);
 }
 
+// DataTypeSet represents a set of DataType values as a simple and efficient
+// bit mask.  Note that DataTypeSet cannot represent all DataType values; it
+// cannot represent any of the DT_*_REF values.
+class DataTypeSet {
+ private:
+  const uint32 mask_;
+
+  static constexpr uint32 kNumBits = 32;
+
+ public:
+  constexpr DataTypeSet(const DataTypeSet& other) : mask_(other.mask_) {}
+  explicit constexpr DataTypeSet(uint32 mask) : mask_(mask) {}
+
+  constexpr bool Contains(DataType dt) const {
+    return (static_cast<uint32>(dt) < kNumBits) &&
+           ((mask_ >> static_cast<uint32>(dt)) & 1u) != 0u;
+  }
+
+  class Iterator {
+    const DataTypeSet& set_;
+    uint32 pos_;
+
+   public:
+    Iterator(const DataTypeSet& set, uint32 pos) : set_(set), pos_(pos) {
+      DCHECK_LE(pos, kNumBits);
+    }
+    DataType operator*() const { return static_cast<DataType>(pos_); }
+    Iterator& operator++() {
+      ++pos_;
+      DCHECK_LE(pos_, kNumBits);
+      if (pos_ < kNumBits) {
+        uint32 remaining_mask = set_.mask_ >> pos_;
+        if (remaining_mask != 0u) {
+          pos_ += ctz_uint32(remaining_mask);
+        }
+      }
+      DCHECK_LE(pos_, kNumBits);
+      return *this;
+    }
+    bool operator==(const Iterator& other) const { return pos_ == other.pos_; }
+    bool operator!=(const Iterator& other) const { return !(*this == other); }
+    size_t operator-(const Iterator& other) const {
+      return this->pos_ - other.pos_;
+    }
+  };
+
+  static uint32 ctz_uint32(uint32 x) {
+    DCHECK_NE(x, 0u);
+#ifdef __GNUC__
+    return __builtin_ctz(x);
+#else
+    uint32 n = 0u;
+    while ((x & 1u) == 0u) {
+      x >>= 1;
+      ++n;
+    }
+    return n;
+#endif
+  }
+
+  static uint32 clz_uint32(uint32 x) {
+    DCHECK_NE(x, 0u);
+#ifdef __GNUC__
+    return __builtin_clz(x);
+#else
+    uint32 n = 0u;
+    while ((x >> (kNumBits - 1u)) == 0u) {
+      x <<= 1;
+      ++n;
+    }
+    return n;
+#endif
+  }
+
+  Iterator begin() const {
+    // The begin position is the index of the first bit set to 1 in the entire
+    // bit mask. If there are no bits set to 1, then the index is 0.
+    if (mask_ != 0) {
+      return Iterator(*this, ctz_uint32(mask_));
+    }
+    // The set is empty.
+    return Iterator(*this, 0);
+  }
+
+  Iterator end() const {
+    // The end position is the index of the highest bit that is set, plus 1.
+    // If there are no bits set to 1, then the index is 0.
+    if (mask_ != 0) {
+      return Iterator(*this, kNumBits - clz_uint32(mask_));
+    }
+    // The set is empty.
+    return Iterator(*this, 0);
+  }
+
+  size_t size() const {
+#if defined(__GNUC__)
+    return __builtin_popcount(mask_);
+#else
+    size_t n = 0;
+    uint32 x = mask_;
+    while (x > 0) {
+      n += x & 1u;
+      x >>= 1;
+    }
+    return n;
+#endif
+  }
+
+  constexpr DataTypeSet operator|(const DataTypeSet& other) const {
+    return DataTypeSet(mask_ | other.mask_);
+  }
+};
+
 // If "sp" names a valid type, store it in "*dt" and return true.  Otherwise,
 // return false.
 bool DataTypeFromString(StringPiece sp, DataType* dt);
 
+constexpr inline DataTypeSet ToSet(DataType dt) {
+  return DataTypeSet(1u << static_cast<uint32>(dt));
+}
+
 // DT_FLOAT + kDataTypeRefOffset == DT_FLOAT_REF, etc.
 enum { kDataTypeRefOffset = 100 };
 inline bool IsRefType(DataType dtype) {
@@ -139,17 +256,94 @@ inline bool TypesCompatible(DataType expected, DataType actual) {
 }
 
 // Does not include _ref types.
-DataTypeVector AllTypes();
+constexpr DataTypeSet kAllTypes =
+    ToSet(DT_FLOAT) | ToSet(DT_DOUBLE) | ToSet(DT_INT32) | ToSet(DT_UINT8) |
+    ToSet(DT_INT16) | ToSet(DT_UINT16) | ToSet(DT_INT8) | ToSet(DT_STRING) |
+    ToSet(DT_COMPLEX64) | ToSet(DT_COMPLEX128) | ToSet(DT_INT64) |
+    ToSet(DT_BOOL) | ToSet(DT_QINT8) | ToSet(DT_QUINT8) | ToSet(DT_QINT16) |
+    ToSet(DT_QUINT16) | ToSet(DT_QINT32) | ToSet(DT_HALF) | ToSet(DT_RESOURCE) |
+    ToSet(DT_VARIANT) | ToSet(DT_UINT32) | ToSet(DT_UINT64) |
+    ToSet(DT_BFLOAT16);
+inline const DataTypeSet& AllTypes() { return kAllTypes; }
+
+#if !defined(IS_MOBILE_PLATFORM) || defined(SUPPORT_SELECTIVE_REGISTRATION)
+
+// Types that support '<' and '>'.
+constexpr DataTypeSet kRealNumberTypes =
+    ToSet(DT_FLOAT) | ToSet(DT_DOUBLE) | ToSet(DT_INT32) | ToSet(DT_INT64) |
+    ToSet(DT_UINT8) | ToSet(DT_INT16) | ToSet(DT_INT8) | ToSet(DT_UINT16) |
+    ToSet(DT_HALF) | ToSet(DT_UINT32) | ToSet(DT_UINT64) | ToSet(DT_BFLOAT16);
+inline const DataTypeSet RealNumberTypes() { return kRealNumberTypes; }
 
 // Return the list of all numeric types.
+// Includes complex and quantized types.
 // NOTE: On Android, we only include the float and int32 types for now.
-DataTypeVector RealNumberTypes();  // Types that support '<' and '>'.
-DataTypeVector NumberTypes();      // Includes complex and quantized types.
+const DataTypeSet kNumberTypes =
+    ToSet(DT_FLOAT) | ToSet(DT_DOUBLE) | ToSet(DT_INT64) | ToSet(DT_INT32) |
+    ToSet(DT_UINT8) | ToSet(DT_UINT16) | ToSet(DT_INT16) | ToSet(DT_INT8) |
+    ToSet(DT_COMPLEX64) | ToSet(DT_COMPLEX128) | ToSet(DT_QINT8) |
+    ToSet(DT_QUINT8) | ToSet(DT_QINT32) | ToSet(DT_HALF) | ToSet(DT_UINT32) |
+    ToSet(DT_UINT64) | ToSet(DT_BFLOAT16);
+inline const DataTypeSet& NumberTypes() { return kNumberTypes; }
+
+constexpr DataTypeSet kQuantizedTypes = ToSet(DT_QINT8) | ToSet(DT_QUINT8) |
+                                        ToSet(DT_QINT16) | ToSet(DT_QUINT16) |
+                                        ToSet(DT_QINT32);
+inline const DataTypeSet& QuantizedTypes() { return kQuantizedTypes; }
+
+// Types that support '<' and '>', including quantized types.
+const DataTypeSet kRealAndQuantizedTypes =
+    ToSet(DT_FLOAT) | ToSet(DT_DOUBLE) | ToSet(DT_INT32) | ToSet(DT_INT64) |
+    ToSet(DT_UINT8) | ToSet(DT_UINT16) | ToSet(DT_UINT16) | ToSet(DT_INT8) |
+    ToSet(DT_QINT8) | ToSet(DT_QUINT8) | ToSet(DT_QINT16) | ToSet(DT_QUINT16) |
+    ToSet(DT_QINT32) | ToSet(DT_HALF) | ToSet(DT_BFLOAT16);
+inline const DataTypeSet& RealAndQuantizedTypes() {
+  return kRealAndQuantizedTypes;
+}
+
+#elif defined(__ANDROID_TYPES_FULL__)
+
+constexpr DataTypeSet kRealNumberTypes =
+    ToSet(DT_FLOAT) | ToSet(DT_INT32) | ToSet(DT_INT64) | ToSet(DT_HALF);
+inline DataTypeSet RealNumberTypes() { return kRealNumberTypes; }
+
+constexpr DataTypeSet kNumberTypes =
+    ToSet(DT_FLOAT) | ToSet(DT_INT32) | ToSet(DT_INT64) | ToSet(DT_QINT8) |
+    ToSet(DT_QUINT8) | ToSet(DT_QINT32) | ToSet(DT_HALF);
+inline DataTypeSet NumberTypes() { return kNumberTypes; }
+
+constexpr DataTypeSet kQuantizedTypes = ToSet(DT_QINT8) | ToSet(DT_QUINT8) |
+                                        ToSet(DT_QINT16) | ToSet(DT_QUINT16) |
+                                        ToSet(DT_QINT32);
+inline DataTypeSet QuantizedTypes() { return kQuantizedTypes; }
+
+constexpr DataTypeSet kRealAndQuantizedTypes =
+    ToSet(DT_FLOAT) | ToSet(DT_INT32) | ToSet(DT_INT64) | ToSet(DT_QINT8) |
+    ToSet(DT_QUINT8) | ToSet(DT_QINT16) | ToSet(DT_QUINT16) | ToSet(DT_QINT32) |
+    ToSet(DT_HALF);
+inline DataTypeSet RealAndQuantizedTypes() { return kRealAndQuantizedTypes; }
+
+#else  // defined(IS_MOBILE_PLATFORM) && !defined(__ANDROID_TYPES_FULL__)
+
+constexpr DataTypeSet kRealNumberTypes = ToSet(DT_FLOAT) | ToSet(DT_INT32);
+inline DataTypeSet RealNumberTypes() { return kRealNumberTypes; }
+
+constexpr DataTypeSet kNumberTypes = ToSet(DT_FLOAT) | ToSet(DT_INT32) |
+                                     ToSet(DT_QINT8) | ToSet(DT_QUINT8) |
+                                     ToSet(DT_QINT32);
+inline DataTypeSet NumberTypes() { return kNumberTypes; }
+
+constexpr DataTypeSet kQuantizedTypes = ToSet(DT_QINT8) | ToSet(DT_QUINT8) |
+                                        ToSet(DT_QINT16) | ToSet(DT_QUINT16) |
+                                        ToSet(DT_QINT32);
+inline DataTypeSet QuantizedTypes() { return kQuantizedTypes; }
 
-DataTypeVector QuantizedTypes();
-DataTypeVector RealAndQuantizedTypes();  // Types that support '<' and
-                                         // '>', including quantized
-                                         // types
+constexpr DataTypeSet kRealAndQuantizedTypes =
+    ToSet(DT_FLOAT) | ToSet(DT_INT32) | ToSet(DT_QINT8) | ToSet(DT_QUINT8) |
+    ToSet(DT_QINT16) | ToSet(DT_QUINT16) | ToSet(DT_QINT32);
+inline DataTypeSet RealAndQuantizedTypes() { return kRealAndQuantizedTypes; }
+
+#endif  // defined(IS_MOBILE_PLATFORM)
 
 // Validates type T for whether it is a supported DataType.
 template <class T>
@@ -220,19 +414,60 @@ struct IsValidDataType {
 static_assert(IsValidDataType<int64>::value, "Incorrect impl for int64");
 static_assert(IsValidDataType<int32>::value, "Incorrect impl for int32");
 
-bool DataTypeCanUseMemcpy(DataType dt);
+// TODO(jeff): Maybe unify this with Tensor::CanUseDMA, or the underlying
+// is_simple<T> in tensor.cc (and possible choose a more general name?)
+constexpr DataTypeSet kDataTypesCanUseMemcpy =
+    ToSet(DT_FLOAT) | ToSet(DT_DOUBLE) | ToSet(DT_INT32) | ToSet(DT_UINT32) |
+    ToSet(DT_UINT8) | ToSet(DT_UINT16) | ToSet(DT_INT16) | ToSet(DT_INT8) |
+    ToSet(DT_COMPLEX64) | ToSet(DT_COMPLEX128) | ToSet(DT_INT64) |
+    ToSet(DT_UINT64) | ToSet(DT_BOOL) | ToSet(DT_QINT8) | ToSet(DT_QUINT8) |
+    ToSet(DT_QINT16) | ToSet(DT_QUINT16) | ToSet(DT_QINT32) |
+    ToSet(DT_BFLOAT16) | ToSet(DT_HALF);
+inline bool DataTypeCanUseMemcpy(DataType dt) {
+  return kDataTypesCanUseMemcpy.Contains(dt);
+}
+
+// Returns true iff 'dt' is a real, non-quantized floating point type.
+constexpr DataTypeSet kDataTypeIsFloating =
+    ToSet(DT_HALF) | ToSet(DT_BFLOAT16) | ToSet(DT_FLOAT) | ToSet(DT_DOUBLE);
+inline bool DataTypeIsFloating(DataType dt) {
+  return kDataTypeIsFloating.Contains(dt);
+}
+
+// Returns true iff 'dt' is a complex type.
+constexpr DataTypeSet kDataTypeIsComplex =
+    ToSet(DT_COMPLEX64) | ToSet(DT_COMPLEX128);
+inline bool DataTypeIsComplex(DataType dt) {
+  return kDataTypeIsComplex.Contains(dt);
+}
 
-bool DataTypeIsQuantized(DataType dt);
+inline bool DataTypeIsQuantized(DataType dt) {
+  return kQuantizedTypes.Contains(dt);
+}
 
 // Is the dtype nonquantized integral?
-bool DataTypeIsInteger(DataType dt);
+constexpr DataTypeSet kDataTypeIsInteger =
+    ToSet(DT_INT8) | ToSet(DT_UINT8) | ToSet(DT_INT16) | ToSet(DT_UINT16) |
+    ToSet(DT_INT32) | ToSet(DT_UINT32) | ToSet(DT_INT64) | ToSet(DT_UINT64);
+inline bool DataTypeIsInteger(DataType dt) {
+  return kDataTypeIsInteger.Contains(dt);
+}
 
 // Is the dtype an unsigned integral type?
-bool DataTypeIsUnsigned(DataType dt);
+constexpr DataTypeSet kDataTypeIsUnsigned =
+    ToSet(DT_UINT8) | ToSet(DT_UINT16) | ToSet(DT_UINT32) | ToSet(DT_UINT64);
+inline bool DataTypeIsUnsigned(DataType dt) {
+  return kDataTypeIsUnsigned.Contains(dt);
+}
 
 // Returns a 0 on failure
 int DataTypeSize(DataType dt);
 
+// Types that always sit on host: DT_STRING, DT_STRING_REF, DT_RESOURCE.
+// For DT_RESOURCE, the handle always sits on host (even if the underlying
+// object has device-allocated resources).
+bool DataTypeAlwaysOnHost(DataType dt);
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_FRAMEWORK_TYPES_H_
diff --git a/tensorflow/core/framework/types_test.cc b/tensorflow/core/framework/types_test.cc
index bc57740469f96fee28de1cea8920cc0431511db1..5ddc9865633623561760bbcb06d1edf4eecec7a6 100644
--- a/tensorflow/core/framework/types_test.cc
+++ b/tensorflow/core/framework/types_test.cc
@@ -130,6 +130,13 @@ TEST(TypesTest, QuantizedTypes) {
   EXPECT_FALSE(DataTypeIsQuantized(DT_BFLOAT16));
 }
 
+TEST(TypesTest, ComplexTypes) {
+  EXPECT_TRUE(DataTypeIsComplex(DT_COMPLEX64));
+  EXPECT_TRUE(DataTypeIsComplex(DT_COMPLEX128));
+  EXPECT_FALSE(DataTypeIsComplex(DT_FLOAT));
+  EXPECT_FALSE(DataTypeIsComplex(DT_DOUBLE));
+}
+
 TEST(TypesTest, IntegerTypes) {
   for (auto dt : AllTypes()) {
     const string name = DataTypeString(dt);
diff --git a/tensorflow/core/framework/variant_encode_decode.h b/tensorflow/core/framework/variant_encode_decode.h
index 09ebf6257bdffc314e09a124db70e33801ae338d..5a84f9d94385a7048a0f4adfe78e1805b367f02d 100644
--- a/tensorflow/core/framework/variant_encode_decode.h
+++ b/tensorflow/core/framework/variant_encode_decode.h
@@ -233,6 +233,7 @@ void EncodeVariant(const T& value, string* buf) {
   VariantTensorData data;
   EncodeVariantImpl(value, TypeResolver<T>(), &data);
   data.set_type_name(TypeNameVariant(value));
+  DCHECK(buf != nullptr);
   data.SerializeToString(buf);
 }
 
diff --git a/tensorflow/core/framework/variant_tensor_data.cc b/tensorflow/core/framework/variant_tensor_data.cc
index 82479193d2a3464897b0fff6c8feaf6c487a23c4..99712dc114b248ba47ee7427c83cb84d5678e244 100644
--- a/tensorflow/core/framework/variant_tensor_data.cc
+++ b/tensorflow/core/framework/variant_tensor_data.cc
@@ -34,7 +34,9 @@ const Tensor& VariantTensorData::tensors(int index) const {
   return tensors_[index];
 }
 
-std::vector<Tensor> VariantTensorData::tensors() { return tensors_; }
+const std::vector<Tensor>& VariantTensorData::tensors() const {
+  return tensors_;
+}
 
 Tensor* VariantTensorData::add_tensors() {
   tensors_.emplace_back();
diff --git a/tensorflow/core/framework/variant_tensor_data.h b/tensorflow/core/framework/variant_tensor_data.h
index 6e04879494af447e620f6737bc749f68d9e1394d..1d87bc341a4bd268d1e461b3710d006cf99cc685 100644
--- a/tensorflow/core/framework/variant_tensor_data.h
+++ b/tensorflow/core/framework/variant_tensor_data.h
@@ -63,7 +63,7 @@ class VariantTensorData {
   // Tensors contained within objects being serialized.
   int tensors_size() const;
   const Tensor& tensors(int index) const;
-  std::vector<Tensor> tensors();
+  const std::vector<Tensor>& tensors() const;
   Tensor* add_tensors();
 
   // Conversion to and from VariantTensorDataProto
diff --git a/tensorflow/core/graph/algorithm.cc b/tensorflow/core/graph/algorithm.cc
index 6ef51aa7dfcd48f840f80040f068a766a33ff5bf..4652fbe40691a01e0567c7df2fba0ca2ea482fe1 100644
--- a/tensorflow/core/graph/algorithm.cc
+++ b/tensorflow/core/graph/algorithm.cc
@@ -83,13 +83,16 @@ void ReverseDFS(const Graph& g, const std::function<void(Node*)>& enter,
   ReverseDFSFrom(g, {g.sink_node()}, enter, leave, stable_comparator);
 }
 
-void ReverseDFSFrom(const Graph& g, gtl::ArraySlice<Node*> start,
-                    const std::function<void(Node*)>& enter,
-                    const std::function<void(Node*)>& leave,
-                    const NodeComparator& stable_comparator) {
+namespace {
+
+template <typename T>
+void ReverseDFSFromHelper(const Graph& g, gtl::ArraySlice<T> start,
+                          const std::function<void(T)>& enter,
+                          const std::function<void(T)>& leave,
+                          const NodeComparator& stable_comparator) {
   // Stack of work to do.
   struct Work {
-    Node* node;
+    T node;
     bool leave;  // Are we entering or leaving n?
   };
   std::vector<Work> stack(start.size());
@@ -102,7 +105,7 @@ void ReverseDFSFrom(const Graph& g, gtl::ArraySlice<Node*> start,
     Work w = stack.back();
     stack.pop_back();
 
-    Node* n = w.node;
+    T n = w.node;
     if (w.leave) {
       leave(n);
       continue;
@@ -117,7 +120,7 @@ void ReverseDFSFrom(const Graph& g, gtl::ArraySlice<Node*> start,
 
     gtl::iterator_range<NeighborIter> nodes = n->in_nodes();
 
-    auto add_work = [&visited, &stack](Node* out) {
+    auto add_work = [&visited, &stack](T out) {
       if (!visited[out->id()]) {
         // Note; we must not mark as visited until we actually process it.
         stack.push_back(Work{out, false});
@@ -125,22 +128,38 @@ void ReverseDFSFrom(const Graph& g, gtl::ArraySlice<Node*> start,
     };
 
     if (stable_comparator) {
-      std::vector<Node*> nodes_sorted;
-      for (Node* in : nodes) {
+      std::vector<T> nodes_sorted;
+      for (T in : nodes) {
         nodes_sorted.emplace_back(in);
       }
       std::sort(nodes_sorted.begin(), nodes_sorted.end(), stable_comparator);
-      for (Node* in : nodes_sorted) {
+      for (T in : nodes_sorted) {
         add_work(in);
       }
     } else {
-      for (Node* in : nodes) {
+      for (T in : nodes) {
         add_work(in);
       }
     }
   }
 }
 
+}  // namespace
+
+void ReverseDFSFrom(const Graph& g, gtl::ArraySlice<const Node*> start,
+                    const std::function<void(const Node*)>& enter,
+                    const std::function<void(const Node*)>& leave,
+                    const NodeComparator& stable_comparator) {
+  ReverseDFSFromHelper(g, start, enter, leave, stable_comparator);
+}
+
+void ReverseDFSFrom(const Graph& g, gtl::ArraySlice<Node*> start,
+                    const std::function<void(Node*)>& enter,
+                    const std::function<void(Node*)>& leave,
+                    const NodeComparator& stable_comparator) {
+  ReverseDFSFromHelper(g, start, enter, leave, stable_comparator);
+}
+
 void GetPostOrder(const Graph& g, std::vector<Node*>* order,
                   const NodeComparator& stable_comparator) {
   order->clear();
diff --git a/tensorflow/core/graph/algorithm.h b/tensorflow/core/graph/algorithm.h
index 5bb6041d98b6aebd3036b68fffeed32afda85e50..ac4a099013b67e0d256a9310495e4b585eb40e0a 100644
--- a/tensorflow/core/graph/algorithm.h
+++ b/tensorflow/core/graph/algorithm.h
@@ -69,6 +69,10 @@ extern void ReverseDFSFrom(const Graph& g, gtl::ArraySlice<Node*> start,
                            const std::function<void(Node*)>& enter,
                            const std::function<void(Node*)>& leave,
                            const NodeComparator& stable_comparator = {});
+extern void ReverseDFSFrom(const Graph& g, gtl::ArraySlice<const Node*> start,
+                           const std::function<void(const Node*)>& enter,
+                           const std::function<void(const Node*)>& leave,
+                           const NodeComparator& stable_comparator = {});
 
 // Stores in *order the post-order numbering of all nodes
 // in graph found via a depth first search starting at the source node.
diff --git a/tensorflow/core/graph/control_flow.h b/tensorflow/core/graph/control_flow.h
index 22dbb47010729d61547b33db3a6c8b0ad4fefdb4..372044f538f9428e1979ba80bbb18a9742fc014e 100644
--- a/tensorflow/core/graph/control_flow.h
+++ b/tensorflow/core/graph/control_flow.h
@@ -33,6 +33,7 @@ struct ControlFlowInfo {
 // Assign to each node the name of the frame and the level it belongs to.
 // We check the well-formedness of the graph: All inputs to a node must
 // come from the same frame and have the same "static" iteration level.
+// `info` is cleared and populated by this function.
 // NOTE(yuanbyu): For now, we require all sends/recvs have iteration level
 // 0. This essentially means there can't be multiple serial Nexts in
 // an iteration, which all sane front-ends should satisfy.
diff --git a/tensorflow/core/graph/costmodel.cc b/tensorflow/core/graph/costmodel.cc
index 3ed32068ae19b73f93b2b2bd12d77712a1273cfb..b1e6cf64e837a04b0121a5e2c5c5a905cf1821f6 100644
--- a/tensorflow/core/graph/costmodel.cc
+++ b/tensorflow/core/graph/costmodel.cc
@@ -291,59 +291,24 @@ Bytes CostModel::TempMemorySize(const Node* node) const {
   return max_mem_usage_[id].temp_memory_size;
 }
 
-Bytes CostModel::HostTempMemorySize(const Node* node) const {
+Bytes CostModel::PersistentMemorySize(const Node* node) const {
   const int id = Id(node);
   if (id < 0) {
     return Bytes(0);
   }
-  return max_mem_usage_[id].host_temp_memory_size;
-}
-
-Bytes CostModel::DeviceTempMemorySize(const Node* node) const {
-  const int id = Id(node);
-  if (id < 0) {
-    return Bytes(0);
-  }
-  return max_mem_usage_[id].device_temp_memory_size;
-}
-
-Bytes CostModel::HostPersistentMemorySize(const Node* node) const {
-  const int id = Id(node);
-  if (id < 0) {
-    return Bytes(0);
-  }
-  return max_mem_usage_[id].host_persistent_memory_size;
-}
-
-Bytes CostModel::DevicePersistentMemorySize(const Node* node) const {
-  const int id = Id(node);
-  if (id < 0) {
-    return Bytes(0);
-  }
-  return max_mem_usage_[id].device_persistent_memory_size;
+  return max_mem_usage_[id].persistent_memory_size;
 }
 
 void CostModel::RecordMemoryStats(const Node* node,
                                   const MemoryStats& memory_stats) {
   const int id = Id(node);
   if (id < 0) return;
-  max_mem_usage_[id].host_temp_memory_size =
-      memory_stats.host_temp_memory_size();
-  max_mem_usage_[id].device_temp_memory_size =
-      memory_stats.device_temp_memory_size();
-  max_mem_usage_[id].host_persistent_memory_size =
-      memory_stats.host_persistent_memory_size();
-  max_mem_usage_[id].device_persistent_memory_size =
-      memory_stats.device_persistent_memory_size();
-  for (int64 alloc_id : memory_stats.host_persistent_tensor_alloc_ids()) {
-    if (alloc_id > 0) {
-      host_persistent_alloc_ids_.insert(alloc_id);
-    }
-  }
-  for (int64 alloc_id : memory_stats.device_persistent_tensor_alloc_ids()) {
+  max_mem_usage_[id].temp_memory_size = memory_stats.temp_memory_size();
+  max_mem_usage_[id].persistent_memory_size =
+      memory_stats.persistent_memory_size();
+  for (int64 alloc_id : memory_stats.persistent_tensor_alloc_ids()) {
     if (alloc_id > 0) {
-      persistent_alloc_ids_by_devices_[node->assigned_device_name()].insert(
-          alloc_id);
+      persistent_alloc_ids_.insert(alloc_id);
     }
   }
 }
@@ -381,7 +346,7 @@ int64 CostModel::AllocationId(const Node* node, int slot) const {
 }
 
 bool CostModel::IsPersistentTensor(const Node* node, int64 alloc_id) const {
-  if (host_persistent_alloc_ids_.count(alloc_id) > 0) {
+  if (persistent_alloc_ids_.count(alloc_id) > 0) {
     return true;
   }
   if (persistent_alloc_ids_by_devices_.find(node->assigned_device_name()) ==
@@ -548,11 +513,8 @@ void CostModel::AddToCostGraphDef(const Graph* graph,
       cnode->add_control_input(Id(e->src()));
     }
 
-    cnode->set_host_temp_memory_size(HostTempMemorySize(n).value());
-    cnode->set_device_temp_memory_size(DeviceTempMemorySize(n).value());
-    cnode->set_host_persistent_memory_size(HostPersistentMemorySize(n).value());
-    cnode->set_device_persistent_memory_size(
-        DevicePersistentMemorySize(n).value());
+    cnode->set_temporary_memory_size(TempMemorySize(n).value());
+    cnode->set_persistent_memory_size(PersistentMemorySize(n).value());
 
     cnode->set_compute_cost(MaxExecutionTime(n).value());
 
diff --git a/tensorflow/core/graph/costmodel.h b/tensorflow/core/graph/costmodel.h
index 8afa4971ad054b31eeb63d0dadaa1a2937c47a6e..081eb2ff4c226c4dd5079f16cc6c2a102d0d2d63 100644
--- a/tensorflow/core/graph/costmodel.h
+++ b/tensorflow/core/graph/costmodel.h
@@ -133,13 +133,8 @@ class CostModel {
   // Returns the size in bytes of temporary memory consumed by "node".
   Bytes TempMemorySize(const Node* node) const;
 
-  // Returns the size in bytes of temporary memory consumed by "node".
-  Bytes HostTempMemorySize(const Node* node) const;
-  Bytes DeviceTempMemorySize(const Node* node) const;
-
   // Returns the size of persistent memory allocated by "node".
-  Bytes HostPersistentMemorySize(const Node* node) const;
-  Bytes DevicePersistentMemorySize(const Node* node) const;
+  Bytes PersistentMemorySize(const Node* node) const;
 
   // Records memory stats such as temp momory and persistent memory.
   void RecordMemoryStats(const Node* node, const MemoryStats& memory_stats);
@@ -210,21 +205,11 @@ class CostModel {
 
   // Maximum memory usage
   struct MemUsage {
-    MemUsage()
-        : temp_memory_size(-1),
-          host_temp_memory_size(0),
-          device_temp_memory_size(0),
-          host_persistent_memory_size(0),
-          device_persistent_memory_size(0) {}
+    MemUsage() : temp_memory_size(0), persistent_memory_size(0) {}
 
     // TODO(yuefengz): temp_memory_size is not being used, remove it.
     Bytes temp_memory_size;
-
-    Bytes host_temp_memory_size;
-    Bytes device_temp_memory_size;
-
-    Bytes host_persistent_memory_size;
-    Bytes device_persistent_memory_size;
+    Bytes persistent_memory_size;
 
     gtl::InlinedVector<Bytes, 2> output_port_mem;
     gtl::InlinedVector<TensorShapeProto, 2> output_port_shape;
@@ -234,7 +219,7 @@ class CostModel {
 
   std::vector<gtl::InlinedVector<int64, 2> > output_port_alloc_ids_;
 
-  std::set<int64> host_persistent_alloc_ids_;
+  std::set<int64> persistent_alloc_ids_;
   std::map<string, std::set<int64>> persistent_alloc_ids_by_devices_;
 
   TensorShapeProto unknown_shape_;
diff --git a/tensorflow/core/graph/edgeset.h b/tensorflow/core/graph/edgeset.h
index 8916ccf4d0d051b9b9d5197667a05eda7265db79..0a1ee5a666cbd0d1978c075f75ab688223355f78 100644
--- a/tensorflow/core/graph/edgeset.h
+++ b/tensorflow/core/graph/edgeset.h
@@ -54,7 +54,7 @@ class EdgeSet {
  private:
   // Up to kInline elements are stored directly in ptrs_ (nullptr means none).
   // If ptrs_[0] == this then ptrs_[1] points to a set<const Edge*>.
-  static const int kInline = 2;  // Must be >= 2.
+  static const int kInline = 4;  // Must be >= 2.
   const void* ptrs_[kInline];
 
   std::set<const Edge*>* get_set() const {
diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc
index 8890a9fb0f6858866d552d547ac31b7f40d9c8dd..2a52c7516e539d78d4534239868c5fae7f804e17 100644
--- a/tensorflow/core/graph/graph_constructor.cc
+++ b/tensorflow/core/graph/graph_constructor.cc
@@ -77,19 +77,22 @@ class GraphConstructor {
                      ? in.prefix
                      : in.prefix + "/"),
           uniquify_names(in.uniquify_names),
+          uniquify_prefix(in.uniquify_prefix),
           input_map(in.input_map),
           skip_mapped_nodes(in.skip_mapped_nodes),
           control_dependencies(in.control_dependencies),
           return_tensors(in.return_tensors),
           return_nodes(in.return_nodes),
           importing(true),
-          validate_colocation_constraints(in.validate_colocation_constraints) {}
+          validate_colocation_constraints(in.validate_colocation_constraints),
+          validate_shape(in.validate_shape) {}
 
     bool allow_internal_ops;
     bool expect_device_spec;
 
     string prefix;
     bool uniquify_names;
+    bool uniquify_prefix;
     std::map<TensorId, TensorId> input_map;
     bool skip_mapped_nodes;
     std::vector<string> control_dependencies;
@@ -106,25 +109,26 @@ class GraphConstructor {
     // remove this.
     bool importing;
     bool validate_colocation_constraints;
+    bool validate_shape = true;
   };
 
   typedef gtl::ArraySlice<const NodeDef*> NodeDefSlice;
 
   // versions and library may be nullptr
-  static Status Construct(const Options& opts, NodeDefSlice node_defs,
-                          const VersionDef* versions,
-                          const FunctionDefLibrary* library, Graph* g,
-                          ShapeRefiner* refiner,
-                          std::vector<std::pair<Node*, int>>* return_tensors,
-                          std::vector<Node*>* return_nodes,
-                          std::vector<TensorId>* unused_input_map_keys) {
+  static Status Construct(
+      const Options& opts, NodeDefSlice node_defs, const VersionDef* versions,
+      const FunctionDefLibrary* library, Graph* g, ShapeRefiner* refiner,
+      std::vector<std::pair<Node*, int>>* return_tensors,
+      std::vector<Node*>* return_nodes,
+      std::vector<TensorId>* missing_unused_input_map_keys) {
     if (versions) {
       TF_RETURN_IF_ERROR(CheckVersions(*versions, TF_GRAPH_DEF_VERSION,
                                        TF_GRAPH_DEF_VERSION_MIN_PRODUCER,
                                        "GraphDef", "graph"));
     }
     GraphConstructor c(opts, node_defs, versions, library, g, refiner,
-                       return_tensors, return_nodes, unused_input_map_keys);
+                       return_tensors, return_nodes,
+                       missing_unused_input_map_keys);
     const Status s = c.TryImport();
     if (!s.ok()) c.Undo();
     return s;
@@ -137,17 +141,18 @@ class GraphConstructor {
                    ShapeRefiner* refiner,
                    std::vector<std::pair<Node*, int>>* return_tensors,
                    std::vector<Node*>* return_nodes,
-                   std::vector<TensorId>* unused_input_map_keys)
+                   std::vector<TensorId>* missing_unused_input_map_keys)
       : opts_(opts),
         node_defs_(node_defs),
         versions_(versions),
         library_(library),
         g_(g),
         original_versions_(g->versions()),
+        prefix_(opts.prefix),
         refiner_(refiner),
         return_tensors_(return_tensors),
         return_nodes_(return_nodes),
-        unused_input_map_keys_(unused_input_map_keys) {}
+        missing_unused_input_map_keys_(missing_unused_input_map_keys) {}
 
   Status TryImport() {
     TF_RETURN_IF_ERROR(EnsureNoNameCollisions());
@@ -159,6 +164,8 @@ class GraphConstructor {
     TF_RETURN_IF_ERROR(UpdateVersionDef());
     TF_RETURN_IF_ERROR(PopulateReturnTensors());
     TF_RETURN_IF_ERROR(PopulateReturnNodes());
+    TF_RETURN_IF_ERROR(PopulateMissingUnusedInputMapKeys());
+    UpdateUniquifiedColocationNames();
     FixupSourceAndSinkEdges(g_);
     return Status::OK();
   }
@@ -172,6 +179,7 @@ class GraphConstructor {
   Status UpdateVersionDef();
   Status PopulateReturnTensors();
   Status PopulateReturnNodes();
+  Status PopulateMissingUnusedInputMapKeys();
 
   void Undo();
 
@@ -201,9 +209,18 @@ class GraphConstructor {
   void UniquifyNames(const std::vector<bool>& input_already_exists,
                      NodeDef* node_def);
 
+  // Updates any constructed nodes' colocation group names if the name has been
+  // updated by UniquifyNames. This is called after all the nodes have been
+  // constructed so all the names have been uniquified if necessary.
+  void UpdateUniquifiedColocationNames();
+
   // Returns true if `name` already exists in `g_` (either as a node name or
   // prefix).
-  bool NameExists(StringPiece name);
+  bool NameExistsInGraph(StringPiece name);
+
+  // Returns true if `name` already exists in the GraphDef being imported
+  // (either as a node name or prefix).
+  bool NameExistsInGraphDef(StringPiece name);
 
   // Returns a unique version of `original_name`, or `original_name` if it's
   // already unique in the graph.
@@ -217,6 +234,9 @@ class GraphConstructor {
   Graph* g_;
   const VersionDef original_versions_;
 
+  // A copy of opts_.prefix, possibly uniquified.
+  string prefix_;
+
   ShapeRefiner* refiner_;
 
   // May be null. Not owned.
@@ -226,9 +246,10 @@ class GraphConstructor {
   std::vector<Node*>* return_nodes_;
 
   // May be null. Not owned.
-  std::vector<TensorId>* unused_input_map_keys_;
+  std::vector<TensorId>* missing_unused_input_map_keys_;
 
-  // Intermediate datastructure used to populate `unused_input_map_keys_`.
+  // Intermediate datastructure used to populate
+  // `missing_unused_input_map_keys_`.
   std::set<TensorId> used_input_map_keys_;
 
   // Mapping from node name to the index within node_defs_.
@@ -243,6 +264,9 @@ class GraphConstructor {
   // alternative implementation of std::unordered_map.
   std::unordered_map<StringPiece, NodeInfo, StringPieceHasher> gdef_nodes_;
 
+  // Prefixes already used in the GraphDef being imported.
+  std::unordered_set<StringPiece, StringPieceHasher> gdef_prefixes_;
+
   // Mapping from node name to the existing node in g_.
   std::unordered_map<StringPiece, Node*, StringPieceHasher> existing_nodes_;
 
@@ -305,6 +329,16 @@ bool NodeNameInValues(const std::vector<string>& control_dependencies,
                    node_name) != control_dependencies.end();
 }
 
+// Adds any prefixes of `node_name` (not including the full name itself) to
+// `prefixes`.
+void AddPrefixes(StringPiece node_name,
+                 std::unordered_set<StringPiece, StringPieceHasher>* prefixes) {
+  size_t idx = -1;
+  while ((idx = node_name.find('/', idx + 1)) != StringPiece::npos) {
+    prefixes->insert(node_name.substr(0, idx));
+  }
+}
+
 Status GraphConstructor::EnsureNoNameCollisions() {
   existing_nodes_.reserve(g_->num_nodes());
   // Populate existing_nodes_ and existing_prefixes_.
@@ -323,34 +357,32 @@ Status GraphConstructor::EnsureNoNameCollisions() {
             n->name(), "'");
       }
     }
-    // Add all of node's prefixes to existing_prefixes_ (if it has any).
-    size_t idx = -1;
-    while ((idx = n->name().find('/', idx + 1)) != string::npos) {
-      StringPiece name(n->name());
-      existing_prefixes_.insert(name.substr(0, idx));
-    }
+    AddPrefixes(n->name(), &existing_prefixes_);
   }
-  if (opts_.prefix.empty() && opts_.importing && !opts_.uniquify_names) {
+  if (prefix_.empty() && opts_.importing && !opts_.uniquify_names) {
     for (const NodeDef* n : node_defs_) {
       const string& name = n->name();
-      if (NameExists(name)) {
+      if (NameExistsInGraph(name)) {
         return errors::InvalidArgument("Node name '", name,
                                        "' already exists in the Graph");
       }
     }
-  } else if (!opts_.prefix.empty()) {
-    StringPiece prefix_no_slash(opts_.prefix);
+  } else if (!prefix_.empty()) {
+    StringPiece prefix_no_slash(prefix_);
     prefix_no_slash.remove_suffix(1);
     if (!IsValidNodeName(prefix_no_slash, false)) {
-      return errors::InvalidArgument("Imported node name prefix '",
-                                     opts_.prefix,
+      return errors::InvalidArgument("Imported node name prefix '", prefix_,
                                      "' would lead to invalid node names");
     }
-    if (NameExists(prefix_no_slash)) {
-      return errors::InvalidArgument("Import node name prefix '",
-                                     prefix_no_slash,
-                                     "' conflicts with "
-                                     "name already used in the graph");
+    if (NameExistsInGraph(prefix_no_slash)) {
+      if (opts_.uniquify_prefix) {
+        prefix_ = strings::StrCat(FindUniqueName(prefix_no_slash), "/");
+      } else {
+        return errors::InvalidArgument("Import node name prefix '",
+                                       prefix_no_slash,
+                                       "' conflicts with "
+                                       "name already used in the graph");
+      }
     }
   }
   return Status::OK();
@@ -384,7 +416,7 @@ Status GraphConstructor::ValidateInputMapAndControlDependencies() {
 }
 
 Status GraphConstructor::BuildNodeIndex() {
-  // Validate the node names and add them to gdef_nodes_.
+  // Validate the node names and add them to gdef_nodes_ and gdef_prefixes_.
   for (int n = 0; n < node_defs_.size(); ++n) {
     const NodeDef& node_def = *node_defs_[n];
     if (!IsValidNodeName(node_def.name(), opts_.allow_internal_ops)) {
@@ -419,6 +451,8 @@ Status GraphConstructor::BuildNodeIndex() {
             "': Control dependencies must come after regular dependencies");
       }
     }
+    // Update gdef_prefixes_.
+    AddPrefixes(node_def.name(), &gdef_prefixes_);
   }
   return Status::OK();
 }
@@ -529,7 +563,7 @@ Status GraphConstructor::MakeNode(const NodeDef& node_def, Node** node) {
 }
 
 Status GraphConstructor::ValidateShape(Node* node) {
-  if (!opts_.importing) return Status::OK();
+  if (!opts_.importing || !opts_.validate_shape) return Status::OK();
   TF_RETURN_IF_ERROR(refiner_->AddNode(node));
   // For nodes with the _output_shapes attribute, override the shape.
   std::vector<TensorShapeProto> shape_attrs;
@@ -720,8 +754,8 @@ void GraphConstructor::AddControlDependencies(
 
 void GraphConstructor::AddPrefixToNodeDef(
     const std::vector<bool>& input_already_exists, NodeDef* node_def) {
-  if (opts_.prefix.empty()) return;
-  node_def->set_name(strings::StrCat(opts_.prefix, node_def->name()));
+  if (prefix_.empty()) return;
+  node_def->set_name(strings::StrCat(prefix_, node_def->name()));
   // Update names of input nodes
   for (int i = 0; i < node_def->input_size(); ++i) {
     StringPiece input(node_def->input(i));
@@ -729,9 +763,9 @@ void GraphConstructor::AddPrefixToNodeDef(
     // imported).
     if (input_already_exists[i]) continue;
     if (input.Consume("^")) {
-      node_def->set_input(i, strings::StrCat("^", opts_.prefix, input));
+      node_def->set_input(i, strings::StrCat("^", prefix_, input));
     } else {
-      node_def->set_input(i, strings::StrCat(opts_.prefix, input));
+      node_def->set_input(i, strings::StrCat(prefix_, input));
     }
   }
   // Update names of colocation groups
@@ -741,8 +775,7 @@ void GraphConstructor::AddPrefixToNodeDef(
     for (int i = 0; i < list->s_size(); ++i) {
       StringPiece v(list->s(i));
       if (v.Consume(kColocationGroupPrefix)) {
-        list->set_s(i,
-                    strings::StrCat(kColocationGroupPrefix, opts_.prefix, v));
+        list->set_s(i, strings::StrCat(kColocationGroupPrefix, prefix_, v));
       }
     }
   }
@@ -750,10 +783,13 @@ void GraphConstructor::AddPrefixToNodeDef(
 
 void GraphConstructor::UniquifyNames(
     const std::vector<bool>& input_already_exists, NodeDef* node_def) {
-  if (NameExists(node_def->name())) {
+  if (NameExistsInGraph(node_def->name())) {
     string old_name = node_def->name();
     node_def->set_name(FindUniqueName(node_def->name()));
     uniquified_names_[old_name] = node_def->name();
+    // Note that we don't have to update gdef_nodes_ or gdef_prefixes_ with
+    // `name` because we guarantee the original NodeDef names are unique,
+    // meaning we won't generate this name again.
   }
   for (int i = 0; i < node_def->input_size(); ++i) {
     // Skip remapped inputs (which already exist in g_ and are not being
@@ -768,31 +804,52 @@ void GraphConstructor::UniquifyNames(
     id.first = iter->second;
     node_def->set_input(i, id.ToString());
   }
-  // Update names of colocation groups
-  if (node_def->attr().find(kColocationAttrName) != node_def->attr().end()) {
-    auto* list =
-        node_def->mutable_attr()->at(kColocationAttrName).mutable_list();
-    for (int i = 0; i < list->s_size(); ++i) {
-      StringPiece v(list->s(i));
-      if (v.Consume(kColocationGroupPrefix)) {
-        auto iter = uniquified_names_.find(v.ToString());
-        if (iter == uniquified_names_.end()) continue;
-        list->set_s(i, strings::StrCat(kColocationGroupPrefix, iter->second));
+}
+
+void GraphConstructor::UpdateUniquifiedColocationNames() {
+  for (const auto& pair : gdef_nodes_) {
+    Node* node = pair.second.node;
+    if (node == nullptr) continue;
+    std::vector<string> coloc_values;
+    Status status =
+        GetNodeAttr(node->attrs(), kColocationAttrName, &coloc_values);
+    if (!status.ok()) continue;
+    bool updated = false;
+    for (int i = 0; i < coloc_values.size(); ++i) {
+      StringPiece val(coloc_values[i]);
+      if (val.Consume(kColocationGroupPrefix)) {
+        const auto& name_pair = uniquified_names_.find(val.ToString());
+        if (name_pair == uniquified_names_.end()) continue;
+        updated = true;
+        coloc_values[i] =
+            strings::StrCat(kColocationGroupPrefix, name_pair->second);
       }
     }
+    if (updated) {
+      node->AddAttr(kColocationAttrName, coloc_values);
+    }
   }
 }
 
-bool GraphConstructor::NameExists(StringPiece name) {
+bool GraphConstructor::NameExistsInGraph(StringPiece name) {
   if (existing_nodes_.find(name) != existing_nodes_.end()) return true;
-  return existing_prefixes_.find(name) != existing_prefixes_.end();
+  if (existing_prefixes_.find(name) != existing_prefixes_.end()) return true;
+  return false;
+}
+
+bool GraphConstructor::NameExistsInGraphDef(StringPiece name) {
+  if (gdef_nodes_.find(name) != gdef_nodes_.end()) return true;
+  if (gdef_prefixes_.find(name) != gdef_prefixes_.end()) return true;
+  return false;
 }
 
 string GraphConstructor::FindUniqueName(StringPiece original_name) {
   string name = original_name.ToString();
-  int count = 1;
-  while (NameExists(name)) {
-    name = strings::StrCat(original_name, "_", count++);
+  int count = 0;
+  // Check that any generated names don't collide with imported NodeDefs (as
+  // well as nodes in g_).
+  while (NameExistsInGraph(name) || (count > 0 && NameExistsInGraphDef(name))) {
+    name = strings::StrCat(original_name, "_", ++count);
   }
   return name;
 }
@@ -931,7 +988,7 @@ Status GraphConstructor::Convert() {
 
     Node* node;
     if (opts_.importing) {
-      if (!opts_.prefix.empty()) {
+      if (!prefix_.empty()) {
         AddPrefixToNodeDef(input_already_exists, &imported_node_def);
       } else if (opts_.uniquify_names) {
         UniquifyNames(input_already_exists, &imported_node_def);
@@ -972,15 +1029,6 @@ Status GraphConstructor::Convert() {
                                    " nodes in a cycle");
   }
 
-  // Update unused_input_map_keys_
-  if (unused_input_map_keys_ != nullptr) {
-    for (const auto& pair : opts_.input_map) {
-      if (used_input_map_keys_.find(pair.first) == used_input_map_keys_.end()) {
-        unused_input_map_keys_->push_back(pair.first);
-      }
-    }
-  }
-
   return Status::OK();
 }
 
@@ -1070,6 +1118,33 @@ Status GraphConstructor::PopulateReturnNodes() {
   return Status::OK();
 }
 
+Status GraphConstructor::PopulateMissingUnusedInputMapKeys() {
+  if (missing_unused_input_map_keys_ == nullptr) return Status::OK();
+  for (const auto& input_map_pair : opts_.input_map) {
+    TensorId key = input_map_pair.first;
+    if (used_input_map_keys_.count(key) > 0) continue;
+
+    auto pair = gdef_nodes_.find(key.first);
+    if (pair == gdef_nodes_.end()) {
+      // key's node doesn't exist in GraphDef
+      missing_unused_input_map_keys_->push_back(key);
+      continue;
+    }
+
+    // Check that key's index is in bounds. Get the number of outputs from the
+    // NodeDef, rather than the imported Node, since the Node may not exist if
+    // opts_.skip_mapped_nodes is true.
+    const NodeDef* node_def = node_defs_[pair->second.gdef_index];
+    const OpDef* op_def;
+    TF_RETURN_IF_ERROR(g_->op_registry()->LookUpOpDef(node_def->op(), &op_def));
+    if (key.second >= op_def->output_arg_size()) {
+      // key's index out of bounds
+      missing_unused_input_map_keys_->push_back(key);
+    }
+  }
+  return Status::OK();
+}
+
 void GraphConstructor::Undo() {
   for (const auto& iter : gdef_nodes_) {
     if (iter.second.node != nullptr) {
@@ -1101,7 +1176,7 @@ Status ConvertGraphDefToGraph(const GraphConstructorOptions& opts,
   return GraphConstructor::Construct(
       opts, gdef.node(), &gdef.versions(), &gdef.library(), g, &refiner,
       /*return_tensors=*/nullptr, /*return_nodes=*/nullptr,
-      /*unused_input_map_keys=*/nullptr);
+      /*missing_unused_input_map_keys=*/nullptr);
 }
 
 Status ConvertNodeDefsToGraph(const GraphConstructorOptions& opts,
@@ -1115,7 +1190,7 @@ Status ConvertNodeDefsToGraph(const GraphConstructorOptions& opts,
   return GraphConstructor::Construct(opts, node_defs, nullptr, nullptr, g,
                                      &refiner, /*return_tensors=*/nullptr,
                                      /*return_nodes=*/nullptr,
-                                     /*unused_input_map_keys=*/nullptr);
+                                     /*missing_unused_input_map_keys=*/nullptr);
 }
 
 Status ImportGraphDef(const ImportGraphDefOptions& opts, const GraphDef& gdef,
@@ -1144,7 +1219,7 @@ Status ImportGraphDef(const ImportGraphDefOptions& opts, const GraphDef& gdef,
 
   if (results != nullptr) {
     if (!results->return_tensors.empty() || !results->return_nodes.empty() ||
-        !results->unused_input_map_keys.empty()) {
+        !results->missing_unused_input_map_keys.empty()) {
       return errors::InvalidArgument(
           "All fields in results argument to ImportGraphDef() must be empty.");
     }
@@ -1187,7 +1262,7 @@ Status ImportGraphDef(const ImportGraphDefOptions& opts, const GraphDef& gdef,
     return GraphConstructor::Construct(
         opts, gdef.node(), &gdef.versions(), &gdef.library(), g, refiner,
         &results->return_tensors, &results->return_nodes,
-        &results->unused_input_map_keys);
+        &results->missing_unused_input_map_keys);
   }
 }
 
diff --git a/tensorflow/core/graph/graph_constructor.h b/tensorflow/core/graph/graph_constructor.h
index 4b418b862290d23f6838f6a1f43345adee467884..b03d655fe6fcd918227c62cbdbc76db6156a55c4 100644
--- a/tensorflow/core/graph/graph_constructor.h
+++ b/tensorflow/core/graph/graph_constructor.h
@@ -54,7 +54,11 @@ extern Status ConvertNodeDefsToGraph(const GraphConstructorOptions& opts,
 
 // Options for calling ImportGraphDef().
 struct ImportGraphDefOptions {
-  ImportGraphDefOptions() : uniquify_names(false), skip_mapped_nodes(false) {}
+  ImportGraphDefOptions()
+      : uniquify_names(false),
+        uniquify_prefix(false),
+        skip_mapped_nodes(false),
+        validate_shape(true) {}
 
   // Name prefix to use for nodes imported from the GraphDef.  For example, if
   // prefix="animals" and GraphDef contains a node "bunny" then the node will be
@@ -68,6 +72,11 @@ struct ImportGraphDefOptions {
   // will guarantee all node names are unique.
   bool uniquify_names;
 
+  // If true, `prefix` will be modified if it already exists as a node name or
+  // prefix in the graph. If false, a conflicting prefix will be treated as an
+  // error. This option has no effect if `prefix` isn't specified.
+  bool uniquify_prefix;
+
   // Maps tensors in `gdef` to existing tensors in `g`. Inputs in `gdef`
   // corresponding to `input_map` keys will be remapped to the nodes in `g`
   // corresponding to the values.
@@ -122,6 +131,9 @@ struct ImportGraphDefOptions {
   // If true, checks that all colocation constraints are nodes in the GraphDef.
   bool validate_colocation_constraints = true;
 
+  // If false skips shape validation.
+  bool validate_shape;
+
   // TODO(ashankar): Enable handling of GraphDefs produced by newer binaries
   // with ops that are not defined in the binary calling ImportGraphDef.
   // Similar to the producer_op_list argument to import_graph_def in the
@@ -140,9 +152,10 @@ struct ImportGraphDefResults {
   // The requested nodes associated with ImportGraphDefOptions::return_nodes.
   std::vector<Node*> return_nodes;
 
-  // Keys in ImportGraphDefOptions::input_map that weren't used as an input to
-  // any node in`gdef`.
-  std::vector<TensorId> unused_input_map_keys;
+  // Keys in ImportGraphDefOptions::input_map that don't appear in `gdef` and
+  // weren't used as an input to any node in `gdef`. These keys are likely due
+  // to typos, and callers may wish to treat their existence as an error.
+  std::vector<TensorId> missing_unused_input_map_keys;
 };
 
 // Adds the graph in GraphDef `gdef` into an existing Graph `*g`.
diff --git a/tensorflow/core/graph/graph_constructor_test.cc b/tensorflow/core/graph/graph_constructor_test.cc
index 0f88c80b85a4b05c21f76713a3406c72354cba0c..01bb1ac748fd512dcd1d715d949de8eb6e77142d 100644
--- a/tensorflow/core/graph/graph_constructor_test.cc
+++ b/tensorflow/core/graph/graph_constructor_test.cc
@@ -1433,7 +1433,7 @@ TEST_F(GraphConstructorTest, ImportGraphDef_InputMapDuplicateNodeNames) {
       &refiner);
 }
 
-TEST_F(GraphConstructorTest, ImportGraphDef_InputMapUnusedKeys) {
+TEST_F(GraphConstructorTest, ImportGraphDef_InputMapMissingUnusedKeys) {
   ShapeRefiner refiner(TF_GRAPH_DEF_VERSION, graph_.op_registry());
 
   // No input map
@@ -1443,10 +1443,10 @@ TEST_F(GraphConstructorTest, ImportGraphDef_InputMapUnusedKeys) {
       "node { name: 'W1' op: 'TestParams' }"
       "node { name: 'input' op: 'TestInput' }",
       opts, &refiner, &results);
-  EXPECT_TRUE(results.unused_input_map_keys.empty());
+  EXPECT_TRUE(results.missing_unused_input_map_keys.empty());
 
-  // Non-empty unused_input_map_keys
-  results.unused_input_map_keys.push_back(TensorId());
+  // Non-empty missing_unused_input_map_keys
+  results.missing_unused_input_map_keys.push_back(TensorId());
   ExpectError(
       "node { name: 'W2' op: 'TestParams' }", opts,
       {"All fields in results argument to ImportGraphDef() must be empty."},
@@ -1454,13 +1454,16 @@ TEST_F(GraphConstructorTest, ImportGraphDef_InputMapUnusedKeys) {
 
   // Input map with some used, some unused keys
   const int kControlSlot = Graph::kControlSlot;
-  results.unused_input_map_keys.clear();
+  results.missing_unused_input_map_keys.clear();
   opts.input_map[TensorId("W2", kControlSlot)] = TensorId("W1", kControlSlot);
   opts.input_map[TensorId("new_input", 0)] = TensorId("input", 0);
   opts.input_map[TensorId("new_input", 1)] = TensorId("input", 0);
-  opts.input_map[TensorId("new_input", kControlSlot)] =
-      TensorId("input", kControlSlot);
-  opts.input_map[TensorId("t1", 1)] = TensorId("input", 0);
+  // Unused and missing (nonexistent index)
+  opts.input_map[TensorId("new_input", 3)] = TensorId("input", 0);
+  // Unused and missing (nonexistent node)
+  opts.input_map[TensorId("DNE", 0)] = TensorId("input", 0);
+  // Unused but not missing
+  opts.input_map[TensorId("t1", 0)] = TensorId("W1", 0);
   ExpectOK(
       R"EOF(
       node { name: 'W2' op: 'TestParams' }
@@ -1470,9 +1473,36 @@ TEST_F(GraphConstructorTest, ImportGraphDef_InputMapUnusedKeys) {
       )EOF",
       opts, &refiner, &results);
 
-  std::vector<TensorId> expected_unused_keys = {
-      TensorId("new_input", kControlSlot), TensorId("t1", 1)};
-  EXPECT_EQ(results.unused_input_map_keys, expected_unused_keys);
+  std::set<TensorId> expected_unused_keys = {TensorId("new_input", 3),
+                                             TensorId("DNE", 0)};
+  ASSERT_EQ(results.missing_unused_input_map_keys.size(),
+            expected_unused_keys.size());
+
+  std::set<TensorId> actual_unused_keys(
+      results.missing_unused_input_map_keys.begin(),
+      results.missing_unused_input_map_keys.end());
+  EXPECT_EQ(actual_unused_keys, expected_unused_keys);
+
+  // Test edge case: node isn't imported due to skip_mapped_nodes, but we still
+  // have a bad input_map key involving it.
+  opts = ImportGraphDefOptions();
+  opts.input_map[TensorId("new_input", 0)] = TensorId("input", 0);
+  opts.input_map[TensorId("new_input", 1)] = TensorId("input", 1);
+  // Index out of bounds
+  opts.input_map[TensorId("new_input", 2)] = TensorId("input", 1);
+  opts.skip_mapped_nodes = true;
+  opts.prefix = "import";
+  results = ImportGraphDefResults();
+  ExpectOK(
+      R"EOF(
+      node { name: 'W2' op: 'TestParams' }
+      node { name: 'new_input' op: 'TestInput' input: [ '^W2' ] }
+      node { name: 't1' op: 'TestMul' input: [ 'new_input:0', 'new_input:1' ] }
+      )EOF",
+      opts, &refiner, &results);
+
+  ASSERT_EQ(results.missing_unused_input_map_keys.size(), 1);
+  EXPECT_EQ(results.missing_unused_input_map_keys[0], TensorId("new_input", 2));
 }
 
 TEST_F(GraphConstructorTest, ImportGraphDef_InputMapWithUnboundInput) {
@@ -1709,7 +1739,7 @@ TEST_F(GraphConstructorTest, ImportGraphDef_ReturnNodes) {
   // Check return tensors
   ASSERT_EQ(results.return_nodes.size(), 2);
   EXPECT_EQ(results.return_tensors.size(), 0);
-  EXPECT_EQ(results.unused_input_map_keys.size(), 0);
+  EXPECT_EQ(results.missing_unused_input_map_keys.size(), 0);
   EXPECT_EQ(results.return_nodes[0]->name(), "input");
   EXPECT_EQ(results.return_nodes[1]->name(), "t1");
 
@@ -1806,6 +1836,21 @@ TEST_F(GraphConstructorTest, ImportGraphDef_UniquifyNames) {
   EXPECT_EQ(results.return_nodes[1]->name(), "B_2");
   EXPECT_EQ(results.return_nodes[1]->def().input(0), "A_2:0");
 
+  // Import with an already-used prefix
+  opts.prefix = "A";
+  opts.uniquify_prefix = true;
+  results = ImportGraphDefResults();
+  ExpectOK(graph_def_str, opts, &refiner, &results);
+
+  ASSERT_EQ(results.return_nodes.size(), 2);
+  EXPECT_EQ(results.return_nodes[0]->name(), "A_3/A");
+  EXPECT_EQ(results.return_nodes[1]->name(), "A_3/B");
+  EXPECT_EQ(results.return_nodes[1]->def().input(0), "A_3/A");
+
+  // Create B_3 node to keep the A/B numbering in sync
+  opts = ImportGraphDefOptions();
+  ExpectOK("node { name: 'B_3' op: 'TestInput' }");
+
   // Import with existing de-duped node names
   opts = ImportGraphDefOptions();
   opts.uniquify_names = true;
@@ -1822,6 +1867,30 @@ TEST_F(GraphConstructorTest, ImportGraphDef_UniquifyNames) {
   EXPECT_EQ(results.return_nodes[1]->name(), "B_1_1");
   EXPECT_EQ(results.return_nodes[1]->def().input(0), "A_1_1:0");
 
+  // Import with node names that must be de-duped from names and prefixes that
+  // exist in both the existing graph and the GraphDef being imported.
+  opts = ImportGraphDefOptions();
+  opts.uniquify_names = true;
+  opts.return_nodes.push_back("A");
+  opts.return_nodes.push_back("A_4");
+  opts.return_nodes.push_back("B");
+  opts.return_nodes.push_back("B_4/B");
+  results = ImportGraphDefResults();
+  ExpectOK(
+      "node { name: 'A' op: 'TestInput' }"
+      "node { name: 'A_4' op: 'TestInput' }"
+      "node { name: 'B' op: 'TestOneInputTwoOutputs' input: ['A'] }"
+      "node { name: 'B_4/B' op: 'TestOneInputTwoOutputs' input: ['A_4'] }",
+      opts, &refiner, &results);
+
+  ASSERT_EQ(results.return_nodes.size(), 4);
+  EXPECT_EQ(results.return_nodes[0]->name(), "A_5");
+  EXPECT_EQ(results.return_nodes[1]->name(), "A_4");
+  EXPECT_EQ(results.return_nodes[2]->name(), "B_5");
+  EXPECT_EQ(results.return_nodes[2]->def().input(0), "A_5:0");
+  EXPECT_EQ(results.return_nodes[3]->name(), "B_4/B");
+  EXPECT_EQ(results.return_nodes[3]->def().input(0), "A_4");
+
   // Create node with prefix and then import node with same name
   ExpectOK("node { name: 'foo/abc' op: 'ABC' }");
   opts = ImportGraphDefOptions();
@@ -1871,16 +1940,25 @@ TEST_F(GraphConstructorTest, ImportGraphDef_UniquifyNames) {
   ExpectOK(graph_def_str, opts, &refiner, &results);
 
   ASSERT_EQ(results.return_nodes.size(), 2);
-  EXPECT_EQ(results.return_nodes[0]->name(), "A_3");
-  EXPECT_EQ(results.return_nodes[1]->name(), "B_3");
+  EXPECT_EQ(results.return_nodes[0]->name(), "A_6");
+  EXPECT_EQ(results.return_nodes[1]->name(), "B_6");
   EXPECT_EQ(results.return_nodes[1]->def().input(0), "A:0");
+}
+
+TEST_F(GraphConstructorTest, ImportGraphDef_UniquifyNames_ColocationGroups) {
+  ShapeRefiner refiner(TF_GRAPH_DEF_VERSION, graph_.op_registry());
+
+  // Create nodes 'A' and 'b"
+  ExpectOK(
+      "node { name: 'A' op: 'TestInput' }"
+      "node { name: 'B' op: 'TestOneInputTwoOutputs' input: ['A'] }");
 
   // Check that colocation groups are updated
-  opts = ImportGraphDefOptions();
+  ImportGraphDefOptions opts;
   opts.uniquify_names = true;
   opts.return_nodes.push_back("A");
   opts.return_nodes.push_back("B");
-  results = ImportGraphDefResults();
+  ImportGraphDefResults results;
   ExpectOK(
       "node { name: 'A' op: 'TestInput' }"
       "node { name: 'B' op: 'TestOneInputTwoOutputs' input: ['A:0'] "
@@ -1888,14 +1966,48 @@ TEST_F(GraphConstructorTest, ImportGraphDef_UniquifyNames) {
       opts, &refiner, &results);
 
   ASSERT_EQ(results.return_nodes.size(), 2);
-  EXPECT_EQ(results.return_nodes[0]->name(), "A_4");
-  EXPECT_EQ(results.return_nodes[1]->name(), "B_4");
-  EXPECT_EQ(results.return_nodes[1]->def().input(0), "A_4:0");
+  EXPECT_EQ(results.return_nodes[0]->name(), "A_1");
+  EXPECT_EQ(results.return_nodes[1]->name(), "B_1");
   const AttrValue* class_attr =
       results.return_nodes[1]->attrs().Find(kColocationAttrName);
   ASSERT_TRUE(class_attr != nullptr);
   ASSERT_EQ(class_attr->list().s_size(), 1);
-  EXPECT_EQ(class_attr->list().s(0), "loc:@A_4");
+  EXPECT_EQ(class_attr->list().s(0), "loc:@A_1");
+
+  results = ImportGraphDefResults();
+  ExpectOK(
+      "node { name: 'A' op: 'TestInput' "
+      "       attr { key: '_class' value { list { s:'loc:@B' } } } }"
+      "node { name: 'B' op: 'TestOneInputTwoOutputs' input: ['A:0'] }",
+      opts, &refiner, &results);
+
+  ASSERT_EQ(results.return_nodes.size(), 2);
+  EXPECT_EQ(results.return_nodes[0]->name(), "A_2");
+  EXPECT_EQ(results.return_nodes[1]->name(), "B_2");
+  class_attr = results.return_nodes[0]->attrs().Find(kColocationAttrName);
+  ASSERT_TRUE(class_attr != nullptr);
+  ASSERT_EQ(class_attr->list().s_size(), 1);
+  EXPECT_EQ(class_attr->list().s(0), "loc:@B_2");
+
+  results = ImportGraphDefResults();
+  ExpectOK(
+      "node { name: 'A' op: 'TestInput' "
+      "       attr { key: '_class' value { list { s:'loc:@B' } } } }"
+      "node { name: 'B' op: 'TestOneInputTwoOutputs' input: ['A:0'] "
+      "       attr { key: '_class' value { list { s:'loc:@B' } } } }",
+      opts, &refiner, &results);
+
+  ASSERT_EQ(results.return_nodes.size(), 2);
+  EXPECT_EQ(results.return_nodes[0]->name(), "A_3");
+  EXPECT_EQ(results.return_nodes[1]->name(), "B_3");
+  class_attr = results.return_nodes[0]->attrs().Find(kColocationAttrName);
+  ASSERT_TRUE(class_attr != nullptr);
+  ASSERT_EQ(class_attr->list().s_size(), 1);
+  EXPECT_EQ(class_attr->list().s(0), "loc:@B_3");
+  class_attr = results.return_nodes[1]->attrs().Find(kColocationAttrName);
+  ASSERT_TRUE(class_attr != nullptr);
+  ASSERT_EQ(class_attr->list().s_size(), 1);
+  EXPECT_EQ(class_attr->list().s(0), "loc:@B_3");
 }
 
 TEST_F(GraphConstructorTest, ImportGraphDef_WithCycle) {
diff --git a/tensorflow/core/graph/graph_partition.cc b/tensorflow/core/graph/graph_partition.cc
index 1924c05d3dd3944d0fa14d53c9ddb2ab14be751d..add80eda23d7887fb06902c0b123c03db8f4cccf 100644
--- a/tensorflow/core/graph/graph_partition.cc
+++ b/tensorflow/core/graph/graph_partition.cc
@@ -1152,7 +1152,7 @@ Status Partition(const PartitionOptions& opts, Graph* g,
     // Add control edges from 'ref_control_inputs' to 'ref_recvs'.
     // NOTE(yuanbyu): Adding these control edges should not introduce
     // deadlocks. 'dst' has implicit "read" nodes that, when we split
-    // across devices, are made explicit; Retargettig the dependencies
+    // across devices, are made explicit; Retargeting the dependencies
     // to 'dst' to those nodes would not introduce cycles if there isn't
     // one before the transformation.
     // NOTE(yuanbyu): This may impact performance because it defers the
diff --git a/tensorflow/core/graph/graph_partition_test.cc b/tensorflow/core/graph/graph_partition_test.cc
index 20822ecb1dd3657eb57ee070d3b722703869728d..6841f2914989b22d6aef91831ac6101b0ba6555f 100644
--- a/tensorflow/core/graph/graph_partition_test.cc
+++ b/tensorflow/core/graph/graph_partition_test.cc
@@ -43,8 +43,6 @@ limitations under the License.
 
 namespace tensorflow {
 
-using strings::StrCat;
-
 // from graph_partition.cc
 extern Status TopologicalSortNodesWithTimePriority(
     const GraphDef* gdef, std::vector<std::pair<const NodeDef*, int64>>* nodes,
@@ -52,6 +50,13 @@ extern Status TopologicalSortNodesWithTimePriority(
 
 namespace {
 
+using ops::_Recv;
+using ops::_Send;
+using ops::Const;
+using ops::Identity;
+using ops::LoopCond;
+using ops::NextIteration;
+
 const char gpu_device[] = "/job:a/replica:0/task:0/device:GPU:0";
 
 string SplitByDevice(const Node* node) { return node->assigned_device_name(); }
@@ -63,7 +68,7 @@ string DeviceName(const Node* node) {
   } else {
     const string cpu_prefix = "/job:a/replica:0/task:0/cpu:";
     int index = first - 'A';
-    return StrCat(cpu_prefix, index);
+    return strings::StrCat(cpu_prefix, index);
   }
 }
 
@@ -232,7 +237,6 @@ class GraphPartitionTest : public ::testing::Test {
 };
 
 TEST_F(GraphPartitionTest, SingleDevice) {
-  using namespace ::tensorflow::ops;  // NOLINT(build/namespaces)
   auto a1 = FloatInput(in_.WithOpName("A1"));
   Combine(in_.WithOpName("A2"), a1, a1);
 
@@ -245,7 +249,6 @@ TEST_F(GraphPartitionTest, SingleDevice) {
 }
 
 TEST_F(GraphPartitionTest, CrossDeviceData) {
-  using namespace ::tensorflow::ops;  // NOLINT(build/namespaces)
   auto a1 = FloatInput(in_.WithOpName("A1"));
   auto b1 = FloatInput(in_.WithOpName("B1"));
   Combine(in_.WithOpName("B2"), a1, b1);
@@ -267,7 +270,6 @@ TEST_F(GraphPartitionTest, CrossDeviceData) {
 }
 
 TEST_F(GraphPartitionTest, CrossDeviceControl) {
-  using namespace ::tensorflow::ops;  // NOLINT(build/namespaces)
   auto a1 = FloatInput(in_.WithOpName("A1"));
   auto b1 = FloatInput(in_.WithOpName("B1"));
   Combine(in_.WithOpName("B2").WithControlDependencies(a1), b1, b1);
@@ -291,7 +293,6 @@ TEST_F(GraphPartitionTest, CrossDeviceControl) {
 }
 
 TEST_F(GraphPartitionTest, CrossDeviceData_MultiUse) {
-  using namespace ::tensorflow::ops;  // NOLINT(build/namespaces)
   auto a1 = FloatInput(in_.WithOpName("A1"));
   auto b1 = FloatInput(in_.WithOpName("B1"));
   Combine(in_.WithOpName("B2"), a1, b1);
@@ -315,7 +316,6 @@ TEST_F(GraphPartitionTest, CrossDeviceData_MultiUse) {
 }
 
 TEST_F(GraphPartitionTest, CrossDeviceControl_MultiUse) {
-  using namespace ::tensorflow::ops;  // NOLINT(build/namespaces)
   auto a1 = FloatInput(in_.WithOpName("A1"));
   auto b1 = FloatInput(in_.WithOpName("B1"));
   Combine(in_.WithOpName("B2").WithControlDependencies(a1), b1, b1);
@@ -341,7 +341,6 @@ TEST_F(GraphPartitionTest, CrossDeviceControl_MultiUse) {
 }
 
 TEST_F(GraphPartitionTest, CrossDevice_DataControl) {
-  using namespace ::tensorflow::ops;  // NOLINT(build/namespaces)
   auto a1 = FloatInput(in_.WithOpName("A1"));
   auto b1 = FloatInput(in_.WithOpName("B1"));
   Combine(in_.WithOpName("B2"), a1, b1);
@@ -372,7 +371,6 @@ TEST_F(GraphPartitionTest, CrossDevice_DataControl) {
 }
 
 TEST_F(GraphPartitionTest, CrossDeviceLoopSimple) {
-  using namespace ::tensorflow::ops;  // NOLINT(build/namespaces)
   auto a1 = BoolInput(in_.WithOpName("A1"));
   auto a2 = ::tensorflow::ops::internal::Enter(in_.WithOpName("A2"), a1, "foo");
   auto a3 = ::tensorflow::ops::Merge(in_.WithOpName("A3"),
@@ -386,7 +384,6 @@ TEST_F(GraphPartitionTest, CrossDeviceLoopSimple) {
 }
 
 TEST_F(GraphPartitionTest, CrossDeviceLoopSimple1) {
-  using namespace ::tensorflow::ops;  // NOLINT(build/namespaces)
   auto a1 = BoolInput(in_.WithOpName("A1"));
   auto a2 = ::tensorflow::ops::internal::Enter(in_.WithOpName("B2"), a1, "foo");
   auto a3 = ::tensorflow::ops::Merge(in_.WithOpName("A3"),
@@ -493,13 +490,14 @@ TEST_F(GraphPartitionTest, SetIncarnation) {
   attr { key: 'tensor_name' value { s: 'test' } }
 )proto";
   CHECK(protobuf::TextFormat::ParseFromString(
-      StrCat("node { name: 'A/Pi' op: 'Const' ",
-             "  attr { key: 'dtype' value { type: DT_FLOAT } } ",
-             "  attr { key: 'value' value { tensor { ",
-             "    dtype: DT_FLOAT tensor_shape {} float_val: 3.14 } } } }",
-             "node { name: 'A' op: '_Send' input: 'A/Pi' ", kSendRecvAttrs, "}",
-             "node { name: 'B' op: '_Recv' ", kSendRecvAttrs,
-             "  attr { key: 'tensor_type' value { type:DT_FLOAT}}}"),
+      strings::StrCat(
+          "node { name: 'A/Pi' op: 'Const' ",
+          "  attr { key: 'dtype' value { type: DT_FLOAT } } ",
+          "  attr { key: 'value' value { tensor { ",
+          "    dtype: DT_FLOAT tensor_shape {} float_val: 3.14 } } } }",
+          "node { name: 'A' op: '_Send' input: 'A/Pi' ", kSendRecvAttrs, "}",
+          "node { name: 'B' op: '_Recv' ", kSendRecvAttrs,
+          "  attr { key: 'tensor_type' value { type:DT_FLOAT}}}"),
       &gdef));
   gdef.mutable_versions()->set_producer(TF_GRAPH_DEF_VERSION);
   Partition(gdef, &partitions_);
@@ -527,7 +525,8 @@ TEST(TopologicalSortNodesWithTimePriorityTest, NoDependencies) {
   }
   std::vector<ops::Placeholder> placeholders;
   for (int i : indexes) {
-    placeholders.emplace_back(root.WithOpName(StrCat("p", i)), DT_FLOAT);
+    placeholders.emplace_back(root.WithOpName(strings::StrCat("p", i)),
+                              DT_FLOAT);
     placeholders.back().node()->AddAttr("_start_time", i + 1);
   }
 
@@ -540,7 +539,7 @@ TEST(TopologicalSortNodesWithTimePriorityTest, NoDependencies) {
       TopologicalSortNodesWithTimePriority(&gdef, &nodes, &node_to_start_time));
   ASSERT_EQ(nodes.size(), 20);
   for (int i = 0; i < nodes.size(); ++i) {
-    EXPECT_EQ(StrCat("p", i), nodes[i].first->name());
+    EXPECT_EQ(strings::StrCat("p", i), nodes[i].first->name());
     EXPECT_EQ(i + 1, nodes[i].second);
   }
 }
@@ -554,7 +553,7 @@ TEST(TopologicalSortNodesWithTimePriority, Dependencies) {
   const int num_leaves = 20;
   for (int i = 0; i < num_leaves; ++i) {
     indexes.push_back((i + 2001) % num_leaves);
-    placeholders_in_order.emplace_back(root.WithOpName(StrCat("p", i)),
+    placeholders_in_order.emplace_back(root.WithOpName(strings::StrCat("p", i)),
                                        DT_FLOAT);
     placeholders_in_order.back().node()->AddAttr("_start_time", i + 1);
   }
@@ -568,7 +567,8 @@ TEST(TopologicalSortNodesWithTimePriority, Dependencies) {
   // placeholder runs last).
   std::vector<ops::Square> squares;
   for (int i : indexes) {
-    squares.emplace_back(root.WithOpName(StrCat("s", i)), placeholders[i]);
+    squares.emplace_back(root.WithOpName(strings::StrCat("s", i)),
+                         placeholders[i]);
     squares.back().node()->AddAttr("_start_time", 50 - (i + 1));
   }
 
@@ -591,7 +591,7 @@ TEST(TopologicalSortNodesWithTimePriority, Dependencies) {
   ASSERT_EQ(1 + squares.size() + placeholders.size(), nodes.size());
   for (int i = 0; i < placeholders.size(); ++i) {
     const NodeDef* node = nodes[i].first;
-    EXPECT_EQ(StrCat("p", i), node->name());
+    EXPECT_EQ(strings::StrCat("p", i), node->name());
     EXPECT_EQ(i + 1, nodes[i].second);
     EXPECT_EQ(i + 1, node_to_start_time[node]);
   }
@@ -599,7 +599,7 @@ TEST(TopologicalSortNodesWithTimePriority, Dependencies) {
     int node_index = placeholders.size() + i;
     int square_index = num_leaves - 1 - i;
     const NodeDef* node = nodes[node_index].first;
-    EXPECT_EQ(StrCat("s", square_index), node->name());
+    EXPECT_EQ(strings::StrCat("s", square_index), node->name());
     EXPECT_EQ(50 - (square_index + 1), nodes[node_index].second);
     EXPECT_EQ(50 - (square_index + 1), node_to_start_time[node]);
   }
@@ -619,7 +619,7 @@ TEST(TopologicalSortNodesWithTimePriority, WhileLoop) {
   const int num_leaves = 20;
   for (int i = 0; i < num_leaves; ++i) {
     indexes.push_back((i + 2001) % num_leaves);
-    placeholders_in_order.emplace_back(root.WithOpName(StrCat("p", i)),
+    placeholders_in_order.emplace_back(root.WithOpName(strings::StrCat("p", i)),
                                        DT_FLOAT);
     placeholders_in_order.back().node()->AddAttr("_start_time", i + 1);
   }
@@ -633,10 +633,10 @@ TEST(TopologicalSortNodesWithTimePriority, WhileLoop) {
   std::vector<Exit> while_exits;
   const int nodes_per_loop = 8;
   for (int i : indexes) {
-    Scope scope = root.NewSubScope(StrCat("while", i));
+    Scope scope = root.NewSubScope(strings::StrCat("while", i));
     auto dummy = Placeholder(scope, DT_FLOAT);
 
-    Enter enter(scope, placeholders[i], StrCat("frame", i));
+    Enter enter(scope, placeholders[i], strings::StrCat("frame", i));
     Merge merge(scope, std::initializer_list<Input>{enter, dummy});
     auto cv = Const(scope.WithControlDependencies({merge.output}), false);
     LoopCond loop_cond(scope, cv);
@@ -663,7 +663,8 @@ TEST(TopologicalSortNodesWithTimePriority, WhileLoop) {
   std::vector<Square> squares;
   squares.reserve(indexes.size());
   for (int i : indexes) {
-    squares.emplace_back(root.WithOpName(StrCat("s", i)), while_exits[i]);
+    squares.emplace_back(root.WithOpName(strings::StrCat("s", i)),
+                         while_exits[i]);
     squares.back().node()->AddAttr("_start_time", 500 - (i + 1));
   }
 
@@ -680,20 +681,20 @@ TEST(TopologicalSortNodesWithTimePriority, WhileLoop) {
   int node_index = 0;
   for (int i = 0; i < placeholders.size(); ++i, ++node_index) {
     const NodeDef* node = nodes[i].first;
-    EXPECT_EQ(StrCat("p", i), node->name());
+    EXPECT_EQ(strings::StrCat("p", i), node->name());
     EXPECT_EQ(i + 1, nodes[i].second);
     EXPECT_EQ(i + 1, node_to_start_time[node]);
   }
   for (int i = 0; i < while_exits.size(); ++i, node_index += nodes_per_loop) {
     const NodeDef* node = nodes[node_index].first;
-    EXPECT_EQ(StrCat("while", i, "/Enter"), node->name());
+    EXPECT_EQ(strings::StrCat("while", i, "/Enter"), node->name());
     EXPECT_EQ(100 + i * 10, nodes[node_index].second);
     EXPECT_EQ(100 + i * 10, node_to_start_time[node]);
   }
   for (int i = 0; i < squares.size(); ++i, ++node_index) {
     int square_index = num_leaves - 1 - i;
     const NodeDef* node = nodes[node_index].first;
-    EXPECT_EQ(StrCat("s", square_index), node->name());
+    EXPECT_EQ(strings::StrCat("s", square_index), node->name());
     EXPECT_EQ(500 - (square_index + 1), nodes[node_index].second);
     EXPECT_EQ(500 - (square_index + 1), node_to_start_time[node]);
   }
diff --git a/tensorflow/core/graph/mkl_graph_util.h b/tensorflow/core/graph/mkl_graph_util.h
index 880e4e712ef0a0d9378afefd91acd125351992f7..3df981437afed760744ef870fd542d7abdd6e25d 100644
--- a/tensorflow/core/graph/mkl_graph_util.h
+++ b/tensorflow/core/graph/mkl_graph_util.h
@@ -76,12 +76,12 @@ namespace tensorflow {
 namespace mkl_op_registry {
   static const char* kMklOpLabel = "MklOp";
   static const char* kMklOpLabelPattern = "label='MklOp'";
+  // Prefix that we add to Tensorflow op name to construct Mkl op name.
+  static const char* const kMklOpPrefix = "_Mkl";
 
   // Get the name of Mkl op from original TensorFlow op
   // We prefix 'Mkl' to the original op to get Mkl op.
   inline string GetMklOpName(const string& name) {
-    // Prefix that we add to Tensorflow op name to construct Mkl op name.
-    const char* const kMklOpPrefix = "_Mkl";
     return string(kMklOpPrefix) + name;
   }
 
@@ -94,9 +94,6 @@ namespace mkl_op_registry {
     string kernel = KernelsRegisteredForOp(op_name);
     bool result =
         kernel.find(kMklOpLabelPattern) != string::npos && (T == DT_FLOAT);
-    if (result) {
-      VLOG(1) << "mkl_op_registry::" << op_name << " is " << kMklOpLabel;
-    }
     return result;
   }
 
@@ -112,15 +109,12 @@ namespace mkl_op_registry {
     if (!IsMklOp(op_name, T)) {
       return false;
     }
-
     bool result = (0 == op_name.compare(GetMklOpName("Add")) ||
                     0 == op_name.compare(GetMklOpName("Sub")) ||
                     0 == op_name.compare(GetMklOpName("Mul")) ||
                     0 == op_name.compare(GetMklOpName("Maximum")) ||
                     0 == op_name.compare(GetMklOpName("SquaredDifference")));
 
-    VLOG(1) << "mkl_op_registry::" << op_name
-            << " is elementwise MKL op: " << result;
     return result;
   }
 }  // namespace mkl_op_registry
diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 912075aa286042319a93bf60495f52af3f940ec8..55bc401b9d61d43e1908faf0ac7e24639ec04c44 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -42,6 +42,8 @@ limitations under the License.
 
 namespace tensorflow {
 
+#ifndef INTEL_MKL_DNN
+
 // This pass implements rewriting of graph to support following scenarios:
 // (A) Merging nodes in the graph
 // (B) Rewriting a node in the graph to a new node
@@ -2213,6 +2215,2117 @@ Status MklLayoutRewritePass::Run(
   return Status::OK();
 }
 
+#else  // INTEL_MKL_DNN
+
+// This pass implements rewriting of graph to support following scenarios:
+// (A) Merging nodes in the graph
+// (B) Rewriting a node in the graph to a new node
+//     Rewrite happens under following scenario:
+//     - Propagating Mkl layout as an additional output tensor
+//        (we will loosely call a tensor that carries Mkl layout as Mkl tensor
+//         henceforth.) from every Mkl supported NN layer.
+//
+// Example of A : Merging nodes in the graph
+// -----------------------------------------
+// Currently, we merge Conv2D+AddBias together. Consider Conv2D and BiasAdd as:
+//
+//           O = Conv2D(A, B)
+//           P = BiasAdd(O, C)
+//
+// We merge them into Conv2DWithBias as:
+//           P = _MklConv2DWithBias(A, A_m, B, B_m, C, C_m)
+//
+// The meaning of A_m, B_m and C_m is explained in B.1.
+//
+// Merge rules:
+//  - The merge for Conv2D and BiasAdd happens when the output of Conv2D _only_
+//    goes to BiasAdd.
+//  - Also, the intersection of attributes of both the nodes must have same
+//    values.
+//  - Both the nodes must have been assigned to same device (if any).
+//
+// Example of B.1 : Rewriting nodes to Mkl nodes
+// ---------------------------------------------
+// Consider a Relu node. Current definition of Relu node looks like:
+//
+//           O = Relu(A)
+//
+// Relu has 1 input (A), and 1 output (O).
+//
+// This rewrite pass will generate a new graph node for Relu (new node is
+// called MklRelu) as:
+//
+//          O, O_m = MklRelu(A, A_m)
+//
+// MklRelu has 2 inputs (A and A_m) and 2 outputs (O and O_m). Here input A is
+// same as input A of Relu; output O is same as output O of Relu. O_m is the
+// additional output tensor that will be set by MklRelu, and it represents
+// Mkl tensor corresponding to O -- in other words, O_m is some kind of
+// metadata for O. A_m is additional input of Relu, and it represents metadata
+// for A - as O_m is metadata for O, A_m is metadata for A. MklRelu receives
+// this metadata from previous node in the graph.
+//
+// When a previous node in the graph is an Mkl node, A_m will represent a valid
+// Mkl tensor. But when a previous node is not an Mkl node, A_m will represent
+// a dummy Mkl tensor.
+//
+// Rewriting rules:
+//  - Selection of a node for rewriting happens by registering the op type of
+//    the node with the rewriting pass. If the op type is not registered, then
+//    all nodes of this op type will not be rewritten.
+//  - Number of inputs after rewriting:
+//      Since for every input Tensorflow tensor, the rewritten node gets Mkl
+//      tensor(s), rewritten node gets 2*N inputs, where N is the number of
+//      inputs for the original node.
+//  - Number of outputs after rewriting:
+//      Since for every output Tensorflow tensor, the rewritten node generates
+//      Mkl tensor(s), the rewritten node generates 2*N outputs, where N is the
+//      number of outputs of the original node.
+//  - Ordering of Tensorflow tensors and Mkl tensors:
+//      Since every rewritten node generates twice the number of inputs and
+//      outputs, one could imagine various orderings among Tensorflow tensors
+//      and Mkl tensors. E.g., assume an op 'Conv2D' that takes (A, B) as
+//      inputs, then the new op '_MklConv2D' can take inputs A, B, A_m and B_m
+//      in A, A_m, B, B_m order or it can also take them in A, B, A_m, B_m
+//      order. Among N inputs one can get N! permutations.
+//
+//      So the question is: which order do we follow? We support 2 types of
+//      orderings: (1) interleaved, and (2) contiguous. Interleaved ordering
+//      follows an intuitive order where an Mkl tensor follows the
+//      corresponding Tensorflow tensor immediately. In the context of the
+//      above example, it will be: A, A_m, B, B_m. Note that the ordering rule
+//      applies to both the inputs and outputs. Contiguous ordering means
+//      all the Tensorflow tensors are contiguous followed by all the Mkl
+//      tensors. We use contiguous ordering as default.
+//
+// Graph rewrite algorithm:
+//      Algorithm: Graph Rewrite
+//      Input: Graph G, Names of the nodes to rewrite and their new names
+//      Output: Modified Graph G' if the nodes are modified, G otherwise.
+//      Start:
+//        N = Topological_Sort(G) // N is a set of nodes in toposort order.
+//        foreach node n in N
+//        do
+//          if (Is_MKL_Op(n))  // Can this node accept an Mkl layout as input.
+//          then
+//            E = set of <incoming edge and its src_output slot> of n
+//            E' = {}   // a new set of edges for rewritten node
+//            foreach <e,s> in E
+//            do
+//              E' U {<e,s>}  // First copy edge which generates Tensorflow
+//                            // tensor as it is
+//              m = Source node of edge e
+//              if Is_Rewritten(m)  // Did we rewrite this node in this pass?
+//              then
+//                E' U {<m,s+1>}    // If yes, then m will generate an Mkl
+//                                  // tensor as an additional output.
+//              else
+//                d = Generate_Dummy_Mkl_Tensor()  // If not, generate a dummy
+//                                                 // Mkl tensor.
+//                E' U {<d,0>}  // The dummy Mkl tensor has only 1 output slot.
+//              fi
+//            done
+//            n' = Build_New_Node(G,new_name,E')
+//            Mark_Rewritten(n')  // Mark the new node as being rewritten.
+//          fi
+//        done
+//
+//      Explanation:
+//        For graph rewrite, we visit nodes of the input graph in the
+//        topological sort order. With this ordering, we visit nodes in the
+//        top-to-bottom fashion. We need this order because while visiting a
+//        node we want that all of its input nodes are visited and rewritten if
+//        applicable. This is because if we need to rewrite a given node
+//        then all of its input nodes need to be fixed (in other words they
+//        cannot be deleted later.)
+//
+//        While visiting a node, we first check if the op type of the node is
+//        an Mkl op. If it is, then we rewrite that node after constructing
+//        new inputs to the node. If the op type of the node is not Mkl op,
+//        then we do not rewrite that node.
+//
+// Handling workspace propagation for certain ops:
+//
+//        Certain backward ops in MKL (MaxPool, LRN and BatchNorm) require
+//        passing of a workspace from their respective forward ops. Workspace
+//        tensors provide memory for storing results of intermediate operations
+//        which are helpful in backward propagation. TensorFlow does not have
+//        a notion of a workspace and as a result does not allow producing
+//        additional outputs from these forward ops. For these ops, we need
+//        to add 2 extra edges between forward ops and their corresponding
+//        backward ops - the first extra edge carries a workspace tensor and
+//        the second one carries an Mkl tensor for the workspace tensor.
+//
+//        Example:
+//
+//        Typical graph for MaxPool and its gradient looks like:
+//
+//        A = MaxPool(T)
+//        B = MaxPoolGrad(X, A, Y)
+//
+//        We will transform this graph to propagate the workspace as:
+//        (with the contiguous ordering)
+//
+//        A, W, A_m, W_m = MklMaxPool(T, T_m)
+//        B, B_m = MklMaxPoolGrad(X, A, Y, W, X_m, A_m, Y_m, W_m)
+//
+//        Here W is the workspace tensor. Transformed tensor names with the
+//        suffix _m are Mkl tensors, and this transformation has been done
+//        using the algorithm discussed earlier. The transformation for
+//        workspace propagation only adds extra outputs (W, W_m) for a forward
+//        op and connects them to the corresponding backward ops.
+//
+//        Terms:
+//
+//        Forward op name = name of the op in the forward pass
+//          where a workspace tensor originates (MaxPool in this example)
+//        Backward op name = name of the op in the backward pass that receives
+//          a workspace tensor from the forward op (MaxPoolGrad in the example)
+//        Slot = Position of the output or input slot that will be
+//               used by the workspace tensor (1 for MklMaxPool as W is the 2nd
+//               output of MaxPool (0 is 1st); 3 for MklMaxPoolGrad)
+//
+//        Question:
+//
+//        How do we associate a backward op to a forward op? There can be more
+//        than one op with the exact same name.
+//
+//        In this example, we associate MaxPoolGrad with MaxPool. But there
+//        could be more than one MaxPool ops. To solve this problem, we look
+//        for _direct_ edge between a forward op and a backward op (tensor A is
+//        flowing along this edge in the example).
+//
+//        How do we transform forward and backward ops when there is no direct
+//        edge between them? In such a case, we generate dummy tensors for
+//        workspace tensors. For the example, transformation of MaxPool will
+//        be exactly same as it would be when there is a direct edge between
+//        the forward and the backward op --- it is just that MaxPool won't
+//        generate any workspace tensor. For MaxPoolGrad, the transformation
+//        will also be same, but instead of connecting W and W_m with the
+//        outputs of MaxPool, we will produce dummy tensors for them, and we
+//        will set workspace_enabled attribute to false.
+//
+class MklLayoutRewritePass : public GraphOptimizationPass {
+ public:
+  MklLayoutRewritePass() {
+    // NOTE: names are alphabetically sorted.
+    csinfo_.addn = "AddN";
+    csinfo_.avg_pool = "AvgPool";
+    csinfo_.avg_pool_grad = "AvgPoolGrad";
+    csinfo_.bias_add = "BiasAdd";
+    csinfo_.bias_add_grad = "BiasAddGrad";
+    csinfo_.concat = "Concat";
+    csinfo_.concatv2 = "ConcatV2";
+    csinfo_.conv2d = "Conv2D";
+    csinfo_.conv2d_with_bias = "__MklDummyConv2DWithBias";
+    csinfo_.conv2d_grad_input = "Conv2DBackpropInput";
+    csinfo_.conv2d_grad_filter = "Conv2DBackpropFilter";
+    csinfo_.conv2d_grad_filter_with_bias =
+                              "__MklDummyConv2DBackpropFilterWithBias";
+    csinfo_.fused_batch_norm = "FusedBatchNorm";
+    csinfo_.fused_batch_norm_grad = "FusedBatchNormGrad";
+    csinfo_.identity = "Identity";
+    csinfo_.lrn = "LRN";
+    csinfo_.lrn_grad = "LRNGrad";
+    csinfo_.matmul = "MatMul";
+    csinfo_.max_pool = "MaxPool";
+    csinfo_.max_pool_grad = "MaxPoolGrad";
+    csinfo_.mkl_conv2d = "_MklConv2D";
+    csinfo_.mkl_conv2d_grad_input = "_MklConv2DBackpropInput";
+    csinfo_.mkl_conv2d_grad_filter = "_MklConv2DBackpropFilter";
+    csinfo_.mkl_conv2d_with_bias = "_MklConv2DWithBias";
+    csinfo_.mkl_conv2d_grad_filter_with_bias =
+                                   "_MklConv2DBackpropFilterWithBias";
+    csinfo_.relu = "Relu";
+    csinfo_.relu_grad = "ReluGrad";
+    csinfo_.tanh       = "Tanh";
+    csinfo_.tanh_grad  = "TanhGrad";
+    csinfo_.reshape = "Reshape";
+    csinfo_.softmax = "Softmax";
+    csinfo_.split = "Split";
+    // Element-wise ops. Ensure you also add any new ops to IsOpElementWise
+    // in the MklUtil.h (IsMklElementWiseOp method) to ensure that the
+    // MklInputConversion op is added before it.
+    csinfo_.add = "Add";
+    csinfo_.maximum = "Maximum";
+    csinfo_.mul = "Mul";
+    csinfo_.squared_difference = "SquaredDifference";
+    csinfo_.sub = "Sub";
+    // End - element-wise ops. See note above.
+
+    // NOTE: names are alphabetically sorted.
+    rinfo_.push_back({csinfo_.addn, mkl_op_registry::GetMklOpName(csinfo_.addn),
+                      CopyAttrsAddN, AddNRewrite});
+    /* rinfo_.push_back({csinfo_.add,
+                      mkl_op_registry::GetMklOpName(csinfo_.add),
+                      CopyAttrsDataType, AlwaysRewrite}); */
+    rinfo_.push_back({csinfo_.avg_pool,
+                      mkl_op_registry::GetMklOpName(csinfo_.avg_pool),
+                      CopyAttrsPooling, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.avg_pool_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.avg_pool_grad),
+                      CopyAttrsPooling, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.concat,
+                      mkl_op_registry::GetMklOpName(csinfo_.concat),
+                      CopyAttrsConcat, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.concatv2,
+                      mkl_op_registry::GetMklOpName(csinfo_.concatv2),
+                      CopyAttrsConcatV2, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.conv2d,
+                      mkl_op_registry::GetMklOpName(csinfo_.conv2d),
+                      CopyAttrsConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.conv2d_with_bias,
+                      csinfo_.mkl_conv2d_with_bias,
+                      CopyAttrsConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.conv2d_grad_filter,
+                      mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_filter),
+                      CopyAttrsConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.conv2d_grad_filter_with_bias,
+                      csinfo_.mkl_conv2d_grad_filter_with_bias,
+                      CopyAttrsConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.conv2d_grad_input,
+                      mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_input),
+                      CopyAttrsConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.fused_batch_norm,
+                      mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm),
+                      CopyAttrsFusedBatchNorm, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.fused_batch_norm_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm_grad),
+                      CopyAttrsFusedBatchNorm, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.identity,
+                      mkl_op_registry::GetMklOpName(csinfo_.identity),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.lrn,
+                      mkl_op_registry::GetMklOpName(csinfo_.lrn),
+                      CopyAttrsLRN, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.lrn_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.lrn_grad),
+                      CopyAttrsLRN, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.max_pool,
+                      mkl_op_registry::GetMklOpName(csinfo_.max_pool),
+                      CopyAttrsPooling, NonDepthBatchWisePoolRewrite});
+    rinfo_.push_back({csinfo_.max_pool_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.max_pool_grad),
+                      CopyAttrsPooling, AlwaysRewrite});
+    /*
+    rinfo_.push_back({csinfo_.maximum,
+                      mkl_op_registry::GetMklOpName(csinfo_.maximum),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.mul,
+                      mkl_op_registry::GetMklOpName(csinfo_.mul),
+                      CopyAttrsDataType, AlwaysRewrite});
+    */
+    rinfo_.push_back({csinfo_.relu,
+                      mkl_op_registry::GetMklOpName(csinfo_.relu),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.relu_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.relu_grad),
+                      CopyAttrsDataType, AlwaysRewrite});
+    /*
+    rinfo_.push_back({csinfo_.tanh,
+                      mkl_op_registry::GetMklOpName(csinfo_.tanh),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.tanh_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.tanh_grad),
+                      CopyAttrsDataType, AlwaysRewrite});
+    */
+    rinfo_.push_back({csinfo_.reshape,
+                      mkl_op_registry::GetMklOpName(csinfo_.reshape),
+                      CopyAttrsReshape, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.softmax,
+                      mkl_op_registry::GetMklOpName(csinfo_.softmax),
+                      CopyAttrsDataType, AlwaysRewrite});
+    /*
+    rinfo_.push_back({csinfo_.squared_difference,
+                      mkl_op_registry::GetMklOpName(csinfo_.squared_difference),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.sub,
+                      mkl_op_registry::GetMklOpName(csinfo_.sub),
+                      CopyAttrsDataType, AlwaysRewrite});
+    */
+
+    // Add info about which ops to add workspace edge to and the slots.
+    wsinfo_.push_back({csinfo_.lrn, csinfo_.lrn_grad, 0, 2, 1, 3});
+    wsinfo_.push_back({csinfo_.max_pool, csinfo_.max_pool_grad, 0, 1, 1, 3});
+
+    // Add a rule for merging nodes
+    minfo_.push_back({csinfo_.conv2d, csinfo_.bias_add,
+                      csinfo_.conv2d_with_bias,
+                      GetConv2DOrBiasAdd});
+
+    minfo_.push_back({csinfo_.conv2d_grad_filter, csinfo_.bias_add_grad,
+                      csinfo_.conv2d_grad_filter_with_bias,
+                      GetConv2DBackpropFilterOrBiasAddGrad});
+  }
+
+  // Standard interface to run pass
+  Status Run(const GraphOptimizationPassOptions& options);
+
+  // Helper function which does most of heavy lifting for rewriting
+  // Mkl nodes to propagate Mkl tensor as additional output
+  //
+  // Extracts common functionality between Run public interface and
+  // test interface.
+  //
+  // @return true, if and only if graph is mutated; false otherwise.
+  bool RunPass(std::unique_ptr<Graph>* g);
+
+  /// Structure to specify the name of an original node, its new name after
+  /// rewrite, the number of inputs to the original node, the function to
+  /// be used to copy attributes for the op, and the rule (if any) which
+  /// must hold for rewriting the node
+  typedef struct {
+    string name;      // Original name of op of the node in the graph
+    string new_name;  // New name of the op of the node in the graph
+    // A function handler to copy attributes from an old node to a new node.
+    std::function<void(const Node*, NodeBuilder*)> copy_attrs;
+    // A rule under which to rewrite this node
+    std::function<bool(const Node*)> rewrite_rule;
+  } RewriteInfo;
+
+  /// Structure to specify a forward op, a backward op, and the slot numbers
+  /// in the forward and backward ops where we will add a workspace edge.
+  typedef struct {
+    string fwd_op;    // Name of a forward op in the graph
+    string bwd_op;    // Name of a backward op in the graph
+    int fwd_slot;     // Output slot in the forward op node where actual
+                      // output tensor resides
+    int bwd_slot;     // Input slot in the backward op node where actual
+                      // input tensor resides
+    int ws_fwd_slot;  // Output slot in the forward op node where workspace
+                      // edge is added
+    int ws_bwd_slot;  // Input slot in the backward op node where workspace
+                      // edge is added
+  } WorkSpaceInfo;
+
+  /// Structure to specify information used in node merge of 2 operators
+  typedef struct {
+    string op1;       // Node string for one operator.
+    string op2;       // Node string for second operator.
+    string new_node;  // Name of the node after merge
+    // Function that enables user of the node merger to specify how to find
+    // second operator given the first operator.
+    std::function<Node*(const Node*)> get_node_to_be_merged;
+  } MergeInfo;
+
+  /// Structure to store all constant strings
+  /// NOTE: names are alphabetically sorted.
+  typedef struct {
+    string addn;
+    string add;
+    string avg_pool;
+    string avg_pool_grad;
+    string bias_add;
+    string bias_add_grad;
+    string concat;
+    string concatv2;
+    string conv2d;
+    string conv2d_with_bias;
+    string conv2d_grad_input;
+    string conv2d_grad_filter;
+    string conv2d_grad_filter_with_bias;
+    string fused_batch_norm;
+    string fused_batch_norm_grad;
+    string identity;
+    string lrn;
+    string lrn_grad;
+    string matmul;
+    string max_pool;
+    string max_pool_grad;
+    string maximum;
+    string mkl_conv2d;
+    string mkl_conv2d_grad_input;
+    string mkl_conv2d_grad_filter;
+    string mkl_conv2d_grad_filter_with_bias;
+    string mkl_conv2d_with_bias;
+    string mul;
+    string relu;
+    string relu_grad;
+    string tanh;
+    string tanh_grad;
+    string reshape;
+    string softmax;
+    string split;
+    string squared_difference;
+    string sub;
+  } ConstStringsInfo;
+
+ private:
+  /// Maintain info about nodes to rewrite
+  std::vector<RewriteInfo> rinfo_;
+
+  /// Maintain info about nodes to add workspace edge
+  std::vector<WorkSpaceInfo> wsinfo_;
+
+  /// Maintain info about nodes to be merged
+  std::vector<MergeInfo> minfo_;
+
+  /// Maintain structure of constant strings
+  static ConstStringsInfo csinfo_;
+
+ private:
+  // Is OpDef::ArgDef a list type? It could be N * T or list(type).
+  // Refer to opdef.proto for details of list type.
+  inline bool ArgIsList(const OpDef::ArgDef& arg) const {
+    return !arg.type_list_attr().empty() || !arg.number_attr().empty();
+  }
+
+  // Get length of a list in 'n' if 'arg' is of list type. Refer to
+  // description of ArgIsList for definition of list type.
+  inline int GetTensorListLength(const OpDef::ArgDef& arg, Node* n) {
+    CHECK_EQ(ArgIsList(arg), true);
+    int N = 0;
+    const string attr_name = !arg.type_list_attr().empty()
+                                 ? arg.type_list_attr()
+                                 : arg.number_attr();
+    if (!arg.type_list_attr().empty()) {
+      std::vector<DataType> value;
+      TF_CHECK_OK(GetNodeAttr(n->def(), attr_name, &value));
+      N = value.size();
+    } else {
+      TF_CHECK_OK(GetNodeAttr(n->def(), attr_name, &N));
+    }
+    return N;
+  }
+
+  // Can op represented by node 'n' run on DEVICE_CPU?
+  // Op can run on CPU with MKL if the runtime assigned device or the
+  // user requested device contains device CPU, or both are empty.
+  bool CanOpRunOnCPUDevice(const Node* n) {
+    bool result = true;
+    string reason;
+
+    // Substring that should be checked for in device name for CPU device.
+    const char* const kCPUDeviceSubStr = "CPU";
+
+    // If Op has been specifically assigned to a non-CPU device, then No.
+    if (!n->assigned_device_name().empty() &&
+        !StringPiece(n->assigned_device_name()).contains(kCPUDeviceSubStr)) {
+      result = false;
+      reason = "Op has been assigned a runtime device that is not CPU.";
+    }
+
+    // If user has specifically assigned this op to a non-CPU device, then No.
+    if (!n->def().device().empty() &&
+        !StringPiece(n->def().device()).contains(kCPUDeviceSubStr)) {
+      result = false;
+      reason = "User has assigned a device that is not CPU.";
+    }
+
+    if (result == false) {
+      VLOG(1) << "MklLayoutRewritePass: Skipping rewriting of the node "
+              << n->type_string() << ", reason: " << reason;
+    }
+
+    // Otherwise Yes.
+    return result;
+  }
+
+  // Return a node that can be merged with input node 'n'
+  //
+  // @return pointer to the node if we can find such a
+  // node. Otherwise, it returns nullptr.
+  Node* CheckForNodeMerge(const Node* n) const;
+
+  // Merge node 'm' with node 'n'.
+  // Currently, we merge (1) Conv2D with BiasAdd, and (2) BiasAddGrad with
+  // Conv2DBackpropFilter.
+  //
+  // Input nodes m and n may be deleted if the call to
+  // this function is successful. Attempt to use the pointers
+  // after the call to function may result in undefined behaviors.
+  //
+  // @input g - input graph, m - graph node, n - graph node to be merged with m
+  // @return Status::OK(), if merging is successful and supported.
+  //         Returns appropriate Status error code otherwise.
+  //         Graph is updated in case nodes are merged. Otherwise, it is
+  //         not updated.
+  Status MergeNode(std::unique_ptr<Graph>* g, Node* m, Node* n);
+
+  // Helper function to merge different nodes
+  Status MergeConv2DWithBiasAdd(std::unique_ptr<Graph>* g, Node* m, Node* n);
+  Status MergeConv2DBackpropFilterWithBiasAddGrad(std::unique_ptr<Graph>* g,
+                                                  Node* m, Node* n);
+
+  // Find BiasAdd or Conv2D node that can be merged with input node 'm'.
+  // If input 'm' is BiasAdd, then check if there exists Conv2D node that can be
+  // merged with 'm'. If input 'm' is Conv2D, then check if there exists BiasAdd
+  // node that can be merged with 'm'.
+  static Node* GetConv2DOrBiasAdd(const Node* m) {
+    CHECK_NOTNULL(m);
+    Node* n = nullptr;
+
+    if (m->type_string() == csinfo_.bias_add) {
+      // If a is BiasAdd, then Conv2D is 0th input of BiasAdd.
+      TF_CHECK_OK(m->input_node(0, &n));
+    } else {
+      CHECK_EQ(m->type_string(), csinfo_.conv2d);
+      // Go over all output edges and search for BiasAdd Node.
+      // 0th input of BiasAdd is Conv2D.
+      for (const Edge* e : m->out_edges()) {
+        if (!e->IsControlEdge() &&
+            e->dst()->type_string() == csinfo_.bias_add &&
+            e->dst_input() == 0) {
+          n = e->dst();
+          break;
+        }
+      }
+    }
+
+    if (n == nullptr) {
+      VLOG(1) << "MklLayoutRewritePass: Could not find matching "
+              << "Conv2D and BiasAdd node for merging. Input node: "
+              << m->DebugString();
+    }
+
+    return n;
+  }
+
+  // Find Conv2DBackpropFilter or BiasAddGrad node that can be merged with input
+  // node 'm'. If input 'm' is Conv2DBackpropFilter, then check if there exists
+  // BiasAddGrad node that can be merged with 'm'. If input 'm' is BiasAddGrad,
+  // then check if there exists Conv2DBackpropFilter node that can be merged
+  // with 'm'.
+  //
+  // Graph that will allow us to connect Conv2DBackpropFilter with BiasAddGrad
+  // would look like:
+  //
+  // _ = Conv2DBackpropFilter(F, _, G)
+  // _ = BiasAddGrad(G)
+  //
+  // So 1st input of BiasAddGrad connects with 3rd input of
+  // Conv2DBackpropFilter and vice versa.
+  static Node* GetConv2DBackpropFilterOrBiasAddGrad(const Node* m) {
+    CHECK_NOTNULL(m);
+    Node* n = nullptr;
+
+    if (m->type_string() == csinfo_.bias_add_grad) {
+      // Get 1st input 'g' of BiasAddGrad.
+      Node* g = nullptr;
+      TF_CHECK_OK(m->input_node(0, &g));
+      // Now traverse all outgoing edges from g that have destination node as
+      // Conv2DBackpropFilter.
+      for (const Edge* e : g->out_edges()) {
+        if (!e->IsControlEdge() &&
+            e->dst()->type_string() == csinfo_.conv2d_grad_filter &&
+            e->dst_input() == 2 /* 3rd input of BackpropFilter */) {
+          n = e->dst();
+          break;
+        }
+      }
+    } else {
+      CHECK_EQ(m->type_string(), csinfo_.conv2d_grad_filter);
+      // Get 3rd input 'g' of Conv2DBackpropFilter.
+      Node* g = nullptr;
+      TF_CHECK_OK(m->input_node(2, &g));
+      // Now traverse all outgoing edges from g that have destination node as
+      // BiasAddGrad.
+      for (const Edge* e : g->out_edges()) {
+        if (!e->IsControlEdge() &&
+            e->dst()->type_string() == csinfo_.bias_add_grad &&
+            e->dst_input() == 0 /* 1st input of BiasAddGrad */) {
+          n = e->dst();
+          break;
+        }
+      }
+    }
+
+    if (n == nullptr) {
+      VLOG(1) << "MklLayoutRewritePass: Could not find matching "
+              << "Conv2DBackpropFilter and BiasAddGrad node for merging. "
+              << "Input node: " << m->DebugString();
+    }
+    return n;
+  }
+
+  // Check if the node 'n' has any applicable rewrite rule
+  // We check for 2 scenarios for rewrite.
+  //
+  // @return RewriteInfo* for the applicable rewrite rule
+  const RewriteInfo* CheckForNodeRewrite(const Node* n) const;
+
+  // Default rewrite rule to be used in scenario 1 for rewrite.
+  // @return - true (since we want to always rewrite)
+  static bool AlwaysRewrite(const Node* n) {
+    return true;
+  }
+
+  // Check if we are performing pooling on depth or batch. If it is, then we
+  // do not rewrite MaxPool node to Mkl version.
+  // @return - true (if it is not a depth/batch wise pooling case);
+  //           false otherwise.
+  static bool NonDepthBatchWisePoolRewrite(const Node* n) {
+    CHECK_NOTNULL(n);
+
+    string data_format_str;
+    TensorFormat data_format;
+    std::vector<int32> ksize, strides;
+    CHECK_EQ(GetNodeAttr(n->def(), "ksize", &ksize).ok(), true);
+    CHECK_EQ(GetNodeAttr(n->def(), "strides", &strides).ok(), true);
+    CHECK_EQ(GetNodeAttr(n->def(), "data_format", &data_format_str).ok(),
+             true);
+    CHECK_EQ(FormatFromString(data_format_str, &data_format), true);
+
+    // Condition that specifies non-batch-wise and non-depth-wise pooling.
+    if (GetTensorDim(ksize,   data_format, 'N') == 1 &&
+        GetTensorDim(strides, data_format, 'N') == 1 &&
+        GetTensorDim(ksize,   data_format, 'C') == 1 &&
+        GetTensorDim(strides, data_format, 'C') == 1) {
+      return true;
+    }
+
+    return false;
+  }
+
+  static bool AddNRewrite(const Node* n) {
+    CHECK_NOTNULL(n);
+
+    int num;
+    CHECK_EQ(GetNodeAttr(n->def(), "N", &num).ok(), true);
+
+    // Condition that specifies non-batch-wise and non-depth-wise pooling.
+    if (num == 2) {
+      return true;
+    }
+
+    return false;
+  }
+
+  // Rewrites input node to a new node specified by its matching rewrite info.
+  //
+  // Method first searches matching rewrite info for input node and then
+  // uses that info to rewrite.
+  //
+  // Input node may be deleted in case of rewrite. Attempt to use the node
+  // after the call can result in undefined behaviors.
+  //
+  // @input  g - input graph, n - Node to be rewritten,
+  //         ri - matching rewriteinfo
+  // @return Status::OK(), if the input node is rewritten;
+  //         Returns appropriate Status error code otherwise.
+  //         Graph is updated in case the input node is rewritten.
+  //         Otherwise, it is not updated.
+  Status RewriteNode(std::unique_ptr<Graph>* g, Node* n, const RewriteInfo* ri);
+
+  // Get nodes that will feed a list of TF tensors to the new
+  // node that we are constructing.
+  //
+  // @input g - input graph,
+  // @input inputs - inputs to old node that we are using for constructing
+  //                 new inputs,
+  // @input input_idx - the index in the 'inputs' vector pointing to the
+  //                    current input that we have processed so far
+  // @output input_idx - index will be incremented by the number of nodes
+  //                     from 'inputs' that are processed
+  // @input list_length - The expected length of list of TF tensors
+  // @output output_nodes - the list of new nodes creating TF tensors
+  //
+  // @return None
+  void GetNodesProducingTFTensorList(
+      const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
+      int* input_idx, int list_length,
+      std::vector<NodeBuilder::NodeOut>* output_nodes);
+
+  // Get nodes that will feed a list of Mkl tensors to the new
+  // node that we are constructing.
+  //
+  // @input g - input graph,
+  // @input orig_node - Original node that we are rewriting
+  // @input inputs - inputs to old node that we are using for constructing
+  //                 new inputs,
+  // @input input_idx - the index in the 'inputs' vector pointing to the
+  //                    current input that we have processed so far
+  // @output input_idx - index will be incremented by the number of nodes
+  //                     from 'inputs' that are processed
+  // @input list_length - The expected length of list of Mkl tensors
+  // @output output_nodes - the list of new nodes creating Mkl tensors
+  //
+  // @return None
+  void GetNodesProducingMklTensorList(std::unique_ptr<Graph>* g,
+    Node* orig_node, const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
+    int* input_idx, int list_length,
+    std::vector<NodeBuilder::NodeOut>* output_nodes);
+
+  // Get a node that will feed an Mkl tensor to the new
+  // node that we are constructing. The output node could be (1) 'n'
+  // if it is Mkl layer, or (2) a dummy node producing dummy Mkl tensor
+  // if 'n' is not an Mkl layer.
+  //
+  // @input g - input graph,
+  // @input orig_node - Original node that we are rewriting,
+  // @input n - Node based on which we are creating Mkl node,
+  // @input n_output_slot - the output slot of node 'n'
+  //            which is feeding to the node that we are constructing
+  // @output mkl_node - the new node that will feed Mkl tensor
+  // @output mkl_node_output_slot - the slot number of mkl_node that
+  //                                will feed the tensor
+  // @return None
+  void GetNodeProducingMklTensor(std::unique_ptr<Graph>* g, Node* orig_node,
+    Node* n, int n_output_slot, Node** mkl_node, int* mkl_node_output_slot);
+
+  // Setup new inputs using old inputs 'inputs' for the rewritten node in 'nb'
+  // in graph 'g'. Original node is input in 'old_node'. Inputs to 'nb' are
+  // set up in contiguous fashion. 'workspace_tensors' carry graph nodes
+  // producing workspace edges if 'are_workspace_tensors_available' is true.
+  // Otherwise, 'workspace_tensors' is empty vector.
+  //
+  // For details, refer to 'Ordering of inputs after rewriting' section in the
+  // documentation above.
+  //
+  // Returns Status::OK() if setting up inputs is successful, otherwise
+  // returns appropriate status code.
+  int SetUpContiguousInputs(
+      std::unique_ptr<Graph>* g,
+      const gtl::InlinedVector<std::pair<Node*, int>, 4>& old_node_inputs,
+      NodeBuilder* nb, Node* old_node,
+      std::vector<NodeBuilder::NodeOut>* workspace_tensors,
+      bool are_workspace_tensors_available);
+
+  // Setup new inputs using old inputs 'inputs' for the rewritten node in 'nb'
+  // in graph 'g'. Original node is input in 'orig_node'.
+  //
+  // For details, refer to 'Ordering of Tensorflow tensors and Mkl tensors'
+  // section in the documentation above.
+  //
+  // Returns Status::OK() if setting up inputs is successful, otherwise
+  // returns appropriate status code.
+  Status SetUpInputs(std::unique_ptr<Graph>* g,
+                     const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
+                     NodeBuilder* nb, Node* orig_node);
+
+  // Add workspace edge on the input or output side of Node 'orig_node' by using
+  // NodeBuilder 'nb' for the new node provided. If 'orig_node' does not dictate
+  // adding workspace edge then do not add it. Workspace Tensorflow and Mkl
+  // tensors, if they need to be added, will be set into these tensors.
+  // If we set workspace tensors, then are_ws_tensors_added should be true.
+  void AddWorkSpaceEdgeIfNeeded(std::unique_ptr<Graph>* g, Node* orig_node,
+                                NodeBuilder* nb,
+                                std::vector<NodeBuilder::NodeOut>* ws_tensors,
+                                bool* are_ws_tensors_added);
+
+  // Functions specific to operators to copy attributes
+  // We need operator-specific function to copy attributes because the framework
+  // does not provide any generic function for it.
+  // NOTE: names are alphabetically sorted.
+  static void CopyAttrsAddN(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsBiasAddGrad(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsConcat(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsConcatV2(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsConv2D(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsDataType(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsFusedBatchNorm(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsLRN(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsPooling(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsReshape(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsSplit(const Node* orig_node, NodeBuilder* nb);
+
+  // Generate a graph node in graph 'g' representing a dummy Mkl tensor node,
+  // using node for original node 'orig_node' and return it in '*out'.
+  // TODO(nhasabni) We should move this to mkl_util.h
+  void GetDummyMklTensorNode(std::unique_ptr<Graph>* g, Node** out,
+                             Node* orig_node);
+  void GetDummyWorkspaceTensorNode(std::unique_ptr<Graph>* g, Node** out,
+                                   Node* orig_node);
+};
+
+MklLayoutRewritePass::ConstStringsInfo MklLayoutRewritePass::csinfo_;
+
+// We register Mkl rewrite pass for phase 1 in post partitioning group.
+// We register it here so that we get a complete picture of all users of Mkl
+// nodes. Do not change the ordering of the Mkl passes.
+const OptimizationPassRegistry::Grouping kMklLayoutRewritePassGroup =
+    OptimizationPassRegistry::POST_PARTITIONING;
+REGISTER_OPTIMIZATION(kMklLayoutRewritePassGroup, 1, MklLayoutRewritePass);
+
+//////////////////////////////////////////////////////////////////////////
+//           Helper functions for creating new node
+//////////////////////////////////////////////////////////////////////////
+
+static void FillInputs(const Node* n,
+                       gtl::InlinedVector<Node*, 4>* control_edges,
+                       gtl::InlinedVector<std::pair<Node*, int>, 4>* in) {
+  control_edges->clear();
+  for (const Edge* e : n->in_edges()) {
+    if (e->IsControlEdge()) {
+      control_edges->push_back(e->src());
+    } else {
+      (*in)[e->dst_input()] = std::make_pair(e->src(), e->src_output());
+    }
+  }
+  std::sort(control_edges->begin(), control_edges->end());
+  if (n->op_def().is_commutative()) {
+    // For commutative inputs, we sort the input by the input Node*
+    // to get a canonical ordering (so that add(a,b) and add(b, a) will
+    // hash to the same value if is_commutative is true for 'add').
+    std::sort(in->begin(), in->end());
+  }
+}
+
+void MklLayoutRewritePass::GetNodesProducingTFTensorList(
+    const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs, int* input_idx,
+    int list_length, std::vector<NodeBuilder::NodeOut>* output_nodes) {
+  CHECK_LT(*input_idx, inputs.size());
+  CHECK_GT(list_length, 0);
+  CHECK_NOTNULL(output_nodes);
+  output_nodes->reserve(list_length);
+
+  while (list_length != 0) {
+    CHECK_GT(list_length, 0);
+    CHECK_LT(*input_idx, inputs.size());
+    Node* n = inputs[*input_idx].first;
+    int slot = inputs[*input_idx].second;
+    // If input node 'n' is just producing a single tensor at
+    // output slot 'slot' then we just add that single node.
+    output_nodes->push_back(NodeBuilder::NodeOut(n, slot));
+    (*input_idx)++;
+    list_length--;
+  }
+}
+
+// TODO(nhasabni) We should move this to mkl_util.h.
+void MklLayoutRewritePass::GetDummyMklTensorNode(std::unique_ptr<Graph>* g,
+                                                 Node** out, Node* orig_node) {
+  // We use a tensor of shape {8} and value 0,0,0,0,0,0,0,0 to represent
+  // dummy Mkl tensor. 8 = 2*size_t.
+  const DataType dt = DataTypeToEnum<uint8>::v();
+  TensorProto proto;
+  proto.set_dtype(dt);
+  uint8 zero[8] = {0, 0, 0, 0, 0, 0, 0, 0};
+  proto.set_tensor_content(const_cast<const void*>(static_cast<void*>(&zero)),
+                           8);
+  TensorShape dummy_shape({8});
+  dummy_shape.AsProto(proto.mutable_tensor_shape());
+  TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const")
+               .Attr("value", proto)
+               .Attr("dtype", dt)
+               .Device(orig_node->def().device())  // We place this node on
+                                                   // the same device as the
+                                                   // device of the original
+                                                   // node.
+               .Finalize(&**g, out));
+
+  // If number of inputs to the original node is > 0, then we add
+  // control dependency between 1st input (index 0) of the original node and
+  // the dummy Mkl node. This is needed because control-flow ops such as Enter,
+  // Merge, etc, require frame_name of the dummy Mkl node to be same as the
+  // rewritten node. Adding control edge between 1st input of the original node
+  // and the dummy Mkl node ensures that the dummy node is in the same frame
+  // as the original node. Choosing 1st input is not necessary - any input of
+  // the original node is fine because all the inputs of a node are always in
+  // the same frame.
+  if (orig_node->num_inputs() > 0) {
+    Node* orig_input0 = nullptr;
+    TF_CHECK_OK(orig_node->input_node(0,
+                                      const_cast<const Node**>(&orig_input0)));
+    // Allow duplicate while adding control edge as it would fail (return
+    // NULL) if we try to add duplicate edge.
+    CHECK_NOTNULL((*g)->AddControlEdge(orig_input0, *out, true));
+  }
+
+  (*out)->set_assigned_device_name(orig_node->assigned_device_name());
+}
+
+void MklLayoutRewritePass::GetNodesProducingMklTensorList(
+    std::unique_ptr<Graph>* g,
+    Node* orig_node,
+    const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
+    int* input_idx, int list_length,
+    std::vector<NodeBuilder::NodeOut>* output_nodes) {
+  CHECK_LT(*input_idx, inputs.size());
+  CHECK_GT(list_length, 0);
+  CHECK_NOTNULL(output_nodes);
+  output_nodes->reserve(list_length);
+
+  while (list_length != 0) {
+    CHECK_GT(list_length, 0);
+    CHECK_LT(*input_idx, inputs.size());
+    Node* n = inputs[*input_idx].first;
+    int slot = inputs[*input_idx].second;
+    // If 'n' is producing a single tensor, then create a single Mkl tensor
+    // node.
+    Node* mkl_node = nullptr;
+    int mkl_node_output_slot = 0;
+    GetNodeProducingMklTensor(g, orig_node, n, slot, &mkl_node,
+                              &mkl_node_output_slot);
+    output_nodes->push_back(NodeBuilder::NodeOut(mkl_node,
+                                                mkl_node_output_slot));
+    (*input_idx)++;
+    list_length--;
+  }
+}
+
+// Get an input node that will feed Mkl tensor to the new
+// node that we are constructing. An input node could be (1) 'n'
+// if it is Mkl layer, or (2) a dummy node producing dummy Mkl tensor
+// if 'n' is not an Mkl layer.
+void MklLayoutRewritePass::GetNodeProducingMklTensor(std::unique_ptr<Graph>* g,
+    Node* orig_node, Node* n,
+    int n_output_slot, Node** mkl_node, int* mkl_node_output_slot) {
+  CHECK_NOTNULL(n);
+  CHECK_NOTNULL(mkl_node);
+  CHECK_NOTNULL(mkl_node_output_slot);
+
+  // If this is an MKL op, then it will create extra output for MKL layout.
+  DataType T;
+  if (GetNodeAttr(n->def(), "T", &T).ok() &&
+      mkl_op_registry::IsMklOp(n->type_string(), T)) {
+    // If this is an MKL op, then it will generate an edge that will receive
+    // Mkl tensor from a node.
+    // output slot number for Mkl tensor would be N+slot number of TensorFlow
+    // tensor, where N is total number of TensorFlow tensors.
+    *mkl_node = n;
+    *mkl_node_output_slot =
+        GetTensorMetaDataIndex(n_output_slot, n->num_outputs());
+  } else {
+    // If we have not visited the node and rewritten it, then we need
+    // to create a dummy node that will feed a dummy Mkl tensor to this node.
+    // DummyMklTensor node has no input and generates only 1 output
+    // (dummy Mkl tensor) as output slot number 0.
+    GetDummyMklTensorNode(g, mkl_node, orig_node);
+    CHECK_NOTNULL(*mkl_node);
+    *mkl_node_output_slot = 0;
+  }
+}
+
+int MklLayoutRewritePass::SetUpContiguousInputs(
+    std::unique_ptr<Graph>* g,
+    const gtl::InlinedVector<std::pair<Node*, int>, 4>& old_node_inputs,
+    NodeBuilder* nb, Node* old_node,
+    std::vector<NodeBuilder::NodeOut>* workspace_tensors,
+    bool are_workspace_tensors_available) {
+  CHECK_NOTNULL(workspace_tensors);
+  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
+
+  // TODO(nhasabni): Temporary solution to connect filter input of
+  // BackpropInput with the converted filter from Conv2D.
+  bool do_connect_conv2d_backprop_input_filter = false;
+  Node* conv2d_node = nullptr;
+  // Filter node is 2nd input (slot index 1) of Conv2D.
+  int kConv2DFilterInputSlotIdx = 1;
+  int kConv2DBackpropInputFilterInputSlotIdx = 1;
+  int kConv2DFilterOutputSlotIdx = 1;
+  if (old_node->type_string() == csinfo_.conv2d_grad_input) {
+    // We need to find Conv2D node from Conv2DBackpropInput.
+    // For that let's first find filter node that is 2nd input (slot 1)
+    // of BackpropInput.
+    Node* filter_node = nullptr;
+    old_node->input_node(kConv2DBackpropInputFilterInputSlotIdx, &filter_node);
+    CHECK_NOTNULL(filter_node);
+
+    // Now check which nodes receive from filter_node. Filter feeds as
+    // 2nd input (slot 1) of _MklConv2D and _MklConv2DWithBias.
+    for (const Edge* e : filter_node->out_edges()) {
+      if ((e->dst()->type_string() == csinfo_.mkl_conv2d ||
+           e->dst()->type_string() == csinfo_.mkl_conv2d_with_bias) &&
+          e->dst_input() == kConv2DFilterInputSlotIdx
+          /* filter is 2nd input of Conv2D and _MklConv2D. */) {
+        if (conv2d_node != nullptr) {
+          VLOG(1) << "MklLayoutRewritePass: unusual case of same filter"
+                  << " feeding multiple Conv2D nodes: "
+                  << filter_node->DebugString();
+          // We will not connect filter input of Conv2DBackpropInput
+          // to be safe here.
+          do_connect_conv2d_backprop_input_filter = false;
+          break;
+        } else {
+          conv2d_node = e->dst();
+          do_connect_conv2d_backprop_input_filter = true;
+        }
+      }
+    }
+  }
+
+  // Number of input slots to original op
+  // Input slots are represented by .Input() calls in REGISTER_OP.
+  int old_node_input_slots = old_node->op_def().input_arg_size();
+  // Actual number of inputs can be greater than or equal to number
+  // of Input slots because inputs of type list could be unfolded.
+  CHECK_GE(old_node_inputs.size(), old_node_input_slots);
+  int nn_slot_idx = 0;  // slot index for inputs of new node
+
+  // Let's copy all inputs (TF tensors) of original node to new node.
+  int iidx = 0;
+  for (int on_slot_idx = 0; on_slot_idx < old_node_input_slots; on_slot_idx++) {
+    // An input slot could be a single tensor or a list. We need
+    // to handle this case accordingly.
+    CHECK_LT(iidx, old_node_inputs.size());
+    const OpDef::ArgDef& arg = old_node->op_def().input_arg(on_slot_idx);
+    if (ArgIsList(arg)) {
+      std::vector<NodeBuilder::NodeOut> new_node_inputs;
+      int N = GetTensorListLength(arg, old_node);
+      GetNodesProducingTFTensorList(old_node_inputs, &iidx, N,
+                                    &new_node_inputs);
+      nb->Input(new_node_inputs);
+      nn_slot_idx++;
+    } else {
+      // Special case for connecting filter input of Conv2DBackpropInput
+      if (do_connect_conv2d_backprop_input_filter &&
+          iidx == kConv2DBackpropInputFilterInputSlotIdx) {
+        nb->Input(conv2d_node, kConv2DFilterOutputSlotIdx);
+      } else {
+        nb->Input(old_node_inputs[iidx].first, old_node_inputs[iidx].second);
+      }
+      iidx++;
+      nn_slot_idx++;
+    }
+  }
+
+  // If workspace tensors are available for this op and we are using
+  // contiguous ordering then we need to add Tensorflow tensor for
+  // workspace here because Tensorflow tensor for workspace is the
+  // last tensor in the list of Tensorflow tensors.
+  if (are_workspace_tensors_available) {
+    CHECK_EQ(workspace_tensors->size(), 2);
+    // Tensorflow tensor
+    nb->Input((*workspace_tensors)[0].node, (*workspace_tensors)[0].index);
+    nn_slot_idx++;
+  }
+
+  // Let's now setup all Mkl inputs to a new node.
+  // Number of Mkl inputs must be same as number of TF inputs.
+  iidx = 0;
+  for (int on_slot_idx = 0; on_slot_idx < old_node_input_slots; on_slot_idx++) {
+    // An input slot could be a single tensor or a list. We need
+    // to handle this case accordingly.
+    CHECK_LT(iidx, old_node_inputs.size());
+    const OpDef::ArgDef& arg = old_node->op_def().input_arg(on_slot_idx);
+    if (ArgIsList(arg)) {
+      std::vector<NodeBuilder::NodeOut> new_node_inputs;
+      int N = GetTensorListLength(arg, old_node);
+      GetNodesProducingMklTensorList(g, old_node, old_node_inputs, &iidx,
+                                     N, &new_node_inputs);
+      nb->Input(new_node_inputs);
+      nn_slot_idx++;
+    } else {
+      Node* mkl_node = nullptr;
+      int mkl_node_output_slot = 0;
+      // Special case for connecting filter input of Conv2DBackpropInput
+      if (do_connect_conv2d_backprop_input_filter &&
+          iidx == kConv2DBackpropInputFilterInputSlotIdx) {
+        GetNodeProducingMklTensor(g, old_node, conv2d_node,
+                                  kConv2DFilterOutputSlotIdx, &mkl_node,
+                                  &mkl_node_output_slot);
+      } else {
+        GetNodeProducingMklTensor(g, old_node, old_node_inputs[iidx].first,
+                                  old_node_inputs[iidx].second, &mkl_node,
+                                  &mkl_node_output_slot);
+      }
+      nb->Input(mkl_node, mkl_node_output_slot);
+      iidx++;
+      nn_slot_idx++;
+    }
+  }
+
+  // If workspace tensors are available for this op and we are using
+  // contiguous ordering then we need to add Mkl tensor for
+  // workspace here because Mkl tensor for workspace is the
+  // last tensor in the list of Mkl tensors.
+  if (are_workspace_tensors_available) {
+    CHECK_EQ(workspace_tensors->size(), 2);
+    // Mkl tensor
+    nb->Input((*workspace_tensors)[1].node, (*workspace_tensors)[1].index);
+    nn_slot_idx++;
+  }
+
+  return nn_slot_idx;
+}
+
+Status MklLayoutRewritePass::SetUpInputs(
+    std::unique_ptr<Graph>* g,
+    const gtl::InlinedVector<std::pair<Node*, int>, 4>& old_node_inputs,
+    NodeBuilder* nb, Node* old_node) {
+  // Let's check if we need to add workspace tensors for this node.
+  // We add workspace edge only for MaxPool, LRN and BatchNorm.
+  std::vector<NodeBuilder::NodeOut> workspace_tensors;
+  bool are_workspace_tensors_available = false;
+  AddWorkSpaceEdgeIfNeeded(g, old_node, nb, &workspace_tensors,
+                           &are_workspace_tensors_available);
+
+  int new_node_input_slots = 0;
+  if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) {
+    // TODO(nhasabni): implement this function just for same of completion.
+    // We do not use interleaved ordering right now.
+    return Status(
+        error::Code::UNIMPLEMENTED,
+        "Interleaved ordering of tensors is currently not supported.");
+  } else {
+    CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
+    new_node_input_slots = SetUpContiguousInputs(
+        g, old_node_inputs, nb, old_node, &workspace_tensors,
+        are_workspace_tensors_available);
+  }
+
+  // Sanity check
+  int old_node_input_slots = old_node->op_def().input_arg_size();
+  if (!are_workspace_tensors_available) {
+    // If we are not adding workspace tensors for this op, then the total
+    // number of input slots to the new node _must_ be 2 times the number
+    // of input slots to the original node: N original Tensorflow tensors and
+    // N for Mkl tensors corresponding to each Tensorflow tensors.
+    CHECK_EQ(new_node_input_slots, old_node_input_slots * 2);
+  } else {
+    // If we are adding workspace tensors for this op, then the total
+    // The total number of input slots to new node _must_ be 2 times the number
+    // of input slots to the original node: N original Tensorflow tensors and
+    // N for Mkl tensors corresponding to each Tensorflow tensors plus 2
+    // (for workspace Tensorflow tensor and workspace Mkl tensor).
+    CHECK_EQ(new_node_input_slots, old_node_input_slots * 2 + 2);
+  }
+
+  return Status::OK();
+}
+
+//////////////////////////////////////////////////////////////////////////
+//           Helper functions related to workspace pass
+//////////////////////////////////////////////////////////////////////////
+
+// TODO(nhasabni) We should move this to mkl_util.h.
+void MklLayoutRewritePass::GetDummyWorkspaceTensorNode(
+    std::unique_ptr<Graph>* g, Node** out, Node* orig_node) {
+  // We use a tensor of shape {1} and value 0 to represent
+  // dummy float tensor. We need this as a dummy workspace tensor.
+  // Workspace tensor has type uint8.
+  const DataType dt = DataTypeToEnum<uint8>::v();
+  TensorProto proto;
+  proto.set_dtype(dt);
+  float zero[1] = {0};
+  proto.set_tensor_content(const_cast<const void*>(static_cast<void*>(&zero)),
+                           4);
+  TensorShape dummy_shape({1});
+  dummy_shape.AsProto(proto.mutable_tensor_shape());
+  TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const")
+                .Attr("value", proto)
+                .Attr("dtype", dt)
+                .Device(orig_node->def().device())  // We place this node on
+                                                    // same the device as the
+                                                    // device of the original
+                                                    // node.
+                .Finalize(&**g, out));
+
+  // If number of inputs to the original node is > 0, then we add
+  // control dependency between 1st input (index 0) of the original node and
+  // the dummy Mkl node. This is needed because control-flow ops such as Enter,
+  // Merge, etc, require frame_name of the dummy Mkl node to be same as the
+  // rewritten node. Adding control edge between 1st input of the original node
+  // and the dummy Mkl node ensures that the dummy node is in the same frame
+  // as the original node. Choosing 1st input is not necessary - any input of
+  // the original node is fine because all the inputs of a node are always in
+  // the same frame.
+  if (orig_node->num_inputs() > 0) {
+    Node* orig_input0 = nullptr;
+    TF_CHECK_OK(orig_node->input_node(0,
+                                      const_cast<const Node**>(&orig_input0)));
+    // Allow duplicate while adding control edge as it would fail (return
+    // NULL) if we try to add duplicate edge.
+    CHECK_NOTNULL((*g)->AddControlEdge(orig_input0, *out, true));
+  }
+
+  (*out)->set_assigned_device_name(orig_node->assigned_device_name());
+}
+
+void MklLayoutRewritePass::AddWorkSpaceEdgeIfNeeded(
+    std::unique_ptr<Graph>* g, Node* orig_node, NodeBuilder* nb,
+    std::vector<NodeBuilder::NodeOut>* ws_tensors, bool* are_ws_tensors_added) {
+  bool workspace_edge_added = false;  // Default initializer
+  CHECK_NOTNULL(are_ws_tensors_added);
+  *are_ws_tensors_added = false;  // Default initializer
+
+  DataType T;
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  for (auto ws : wsinfo_) {
+    if (orig_node->type_string() == ws.fwd_op &&
+        mkl_op_registry::IsMklOp(mkl_op_registry::GetMklOpName(
+          orig_node->type_string()), T)) {
+      // If this op is a fwd op, then we need to check if there is an
+      // edge from this node's fwd_slot to bwdop's bwd_slot. If there is
+      // an edge, then we just add an attribute on this node for setting
+      // workspace_passed to true. We don't add actual workspace edge
+      // in this node. Actual workspace edge gets added in the backward
+      // op for this node.
+      for (const Edge* e : orig_node->out_edges()) {
+        if (e->src_output() == ws.fwd_slot &&
+            e->dst()->type_string() == ws.bwd_op &&
+            e->dst_input() == ws.bwd_slot) {
+          nb->Attr("workspace_enabled", true);
+          VLOG(1) << "MklLayoutRewritePass: workspace_enabled for "
+                  << orig_node->type_string();
+          workspace_edge_added = true;
+          // We found the edge that we were looking for, so break.
+          break;
+        }
+      }
+
+      if (!workspace_edge_added) {
+        // If we are here, then we did not find backward operator for this
+        // node.
+        nb->Attr("workspace_enabled", false);
+      }
+    } else if (orig_node->type_string() == ws.bwd_op &&
+               mkl_op_registry::IsMklOp(mkl_op_registry::GetMklOpName(
+                                          orig_node->type_string()), T)) {
+      // If this op is a bwd op, then we need to add workspace edge and
+      // it's Mkl tensor edge between its corresponding fwd op and this
+      // op. Corresponding fwd op is specified in 'fwd_op' field of
+      // workspace info. fwd_slot and bwd_slot in workspace info specify
+      // an edge between which slots connect forward and backward op.
+      // Once all these criteria match, we add a workspace edge between
+      // ws_fwd_slot and ws_bwd_slot. Its corresponding Mkl tensor is
+      // determined by interleaved/contiguous ordering. Function
+      // DataIndexToMetaDataIndex tells us the location of Mkl tensor
+      // from the location of the Tensorflow tensor.
+      for (const Edge* e : orig_node->in_edges()) {
+        if (e->src_output() == ws.fwd_slot &&
+            // We would have rewritten the forward op, so we need to use
+            // GetMklOpName call to get its Mkl name.
+            e->src()->type_string() == mkl_op_registry::GetMklOpName(
+                                                          ws.fwd_op) &&
+            e->dst_input() == ws.bwd_slot) {
+          nb->Attr("workspace_enabled", true);
+          CHECK_NOTNULL(ws_tensors);
+          // Add workspace edge between fwd op and bwd op.
+          ws_tensors->push_back(NodeBuilder::NodeOut(e->src(), ws.ws_fwd_slot));
+          // Add Mkl tensor edge for workspace edge between fwd op and bwd op.
+          ws_tensors->push_back(NodeBuilder::NodeOut(
+              e->src(), DataIndexToMetaDataIndex(ws.ws_fwd_slot,
+                                                 e->src()->num_outputs())));
+          *are_ws_tensors_added = true;
+          // In terms of input ordering, we add these calls to add Input
+          // here because workspace edge (and its Mkl tensor) is the last
+          // edge in the fwdop and bwdop. So all inputs before workspace
+          // tensor have been added by SetUpInputs function.
+          VLOG(1) << "MklLayoutRewritePass: workspace_enabled for "
+                  << orig_node->type_string();
+          workspace_edge_added = true;
+          // We found the edge that we were looking for, so break.
+          break;
+        }
+      }
+
+      // If we are here means we did not find fwd op that feeds to this
+      // bwd op. So in this case, we need to generate dummy tensors for
+      // workspace input and Mkl tensor for workspace, and set
+      // workspace_enabled to false.
+      if (!workspace_edge_added) {
+        nb->Attr("workspace_enabled", false);
+        Node* dmt_ws = nullptr;      // Dummy tensor for workspace
+        Node* dmt_mkl_ws = nullptr;  // Dummy Mkl tensor for workspace
+        GetDummyWorkspaceTensorNode(g, &dmt_ws, orig_node);
+        GetDummyMklTensorNode(g, &dmt_mkl_ws, orig_node);
+        CHECK_NOTNULL(dmt_ws);
+        CHECK_NOTNULL(dmt_mkl_ws);
+        CHECK_NOTNULL(ws_tensors);
+        // We add dummy tensor as workspace tensor.
+        ws_tensors->push_back(NodeBuilder::NodeOut(dmt_ws, 0));
+        // We add dummy tensor as Mkl tensor for workspace tensor.
+        ws_tensors->push_back(NodeBuilder::NodeOut(dmt_mkl_ws, 0));
+        *are_ws_tensors_added = true;
+        VLOG(1) << "MklLayoutRewritePass: dummy workspace_enabled for "
+                << orig_node->type_string();
+      }
+    } else {
+      // If this node does not match any workspace info, then we do not
+      // do anything special for workspace propagation for it.
+    }
+  }
+}
+
+//////////////////////////////////////////////////////////////////////////
+// Op-specific functions to copy attributes from old node to new node
+//////////////////////////////////////////////////////////////////////////
+
+void MklLayoutRewritePass::CopyAttrsConv2D(const Node* orig_node,
+                                           NodeBuilder* nb) {
+  DataType T;
+  string data_format;
+  string padding;
+  std::vector<int32> strides;
+  bool use_cudnn_on_gpu;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
+  TF_CHECK_OK(
+      GetNodeAttr(orig_node->def(), "use_cudnn_on_gpu", &use_cudnn_on_gpu));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("strides", strides);
+  nb->Attr("padding", padding);
+  nb->Attr("data_format", data_format);
+  nb->Attr("use_cudnn_on_gpu", use_cudnn_on_gpu);
+}
+
+void MklLayoutRewritePass::CopyAttrsAddN(const Node* orig_node,
+                                         NodeBuilder* nb) {
+  DataType T;
+  int N;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "N", &N));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("N", N);
+}
+
+void MklLayoutRewritePass::CopyAttrsBiasAddGrad(const Node* orig_node,
+                                                NodeBuilder* nb) {
+  DataType T;
+  string data_format;
+  std::vector<int32> strides;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("strides", strides);
+  nb->Attr("data_format", data_format);
+}
+
+void MklLayoutRewritePass::CopyAttrsLRN(const Node* orig_node,
+                                        NodeBuilder* nb) {
+  DataType T;
+  int depth_radius;
+  float bias;
+  float alpha;
+  float beta;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "depth_radius", &depth_radius));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "bias", &bias));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "alpha", &alpha));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "beta", &beta));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("depth_radius", depth_radius);
+  nb->Attr("bias", bias);
+  nb->Attr("alpha", alpha);
+  nb->Attr("beta", beta);
+}
+
+void MklLayoutRewritePass::CopyAttrsPooling(const Node* orig_node,
+                                            NodeBuilder* nb) {
+  DataType T;
+  string data_format;
+  string padding;
+  std::vector<int32> ksize, strides;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "ksize", &ksize));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("ksize", ksize);
+  nb->Attr("strides", strides);
+  nb->Attr("padding", padding);
+  nb->Attr("data_format", data_format);
+}
+
+void MklLayoutRewritePass::CopyAttrsDataType(const Node* orig_node,
+                                             NodeBuilder* nb) {
+  DataType T;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+}
+
+void MklLayoutRewritePass::CopyAttrsReshape(const Node* orig_node,
+                                           NodeBuilder* nb) {
+  DataType T;
+  DataType Tshape;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "Tshape", &Tshape));
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("Tshape", Tshape);
+}
+
+void MklLayoutRewritePass::CopyAttrsSplit(const Node* orig_node,
+                                          NodeBuilder* nb) {
+  DataType T;
+  string data_format;
+  int num_split;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "num_split", &num_split));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("num_split", num_split);
+  nb->Attr("data_format", data_format);
+}
+
+void MklLayoutRewritePass::CopyAttrsConcat(const Node* orig_node,
+                                           NodeBuilder* nb) {
+  DataType T;
+  int N;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "N", &N));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("N", N);
+}
+
+void MklLayoutRewritePass::CopyAttrsConcatV2(const Node* orig_node,
+                                             NodeBuilder* nb) {
+  DataType T;
+  int N;
+  DataType tidx;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "N", &N));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "Tidx", &tidx));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("N", N);
+  nb->Attr("Tidx", tidx);
+}
+
+void MklLayoutRewritePass::CopyAttrsFusedBatchNorm(const Node* orig_node,
+                                                   NodeBuilder* nb) {
+  DataType T;
+  float epsilon;
+  string data_format;
+  bool is_training;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "epsilon", &epsilon));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "is_training", &is_training));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("epsilon", epsilon);
+  nb->Attr("data_format", data_format);
+  nb->Attr("is_training", is_training);
+}
+
+//////////////////////////////////////////////////////////////////////////
+//           Helper functions related to node merge pass
+//////////////////////////////////////////////////////////////////////////
+
+Node* MklLayoutRewritePass::CheckForNodeMerge(const Node* a) const {
+  // TODO(nhasabni) Add check for type of node similar to CheckForNodeRewrite
+  // once we support BiasAddGrad as Mkl layer.
+
+  // Search for all matching mergeinfo.
+  // We allow more than one match for extensibility.
+  std::vector<const MergeInfo*> matching_mi;
+  for (auto mi = minfo_.cbegin(); mi != minfo_.cend(); ++mi) {
+    if (a->type_string() == mi->op1 || a->type_string() == mi->op2) {
+      matching_mi.push_back(&*mi);
+    }
+  }
+
+  for (const MergeInfo* mi : matching_mi) {
+    // Get the operand with which 'a' can be merged.
+    Node* b = nullptr;
+    if ((b = mi->get_node_to_be_merged(a)) == nullptr) {
+      continue;
+    }
+
+    // Get the control edges and input of node
+    const int N_in = a->num_inputs();
+    gtl::InlinedVector<Node*, 4> a_control_edges;
+    gtl::InlinedVector<std::pair<Node*, int>, 4> a_in(N_in);
+    FillInputs(a, &a_control_edges, &a_in);
+
+    const int B_in = b->num_inputs();
+    gtl::InlinedVector<Node*, 4> b_control_edges;
+    gtl::InlinedVector<std::pair<Node*, int>, 4> b_in(B_in);
+    FillInputs(b, &b_control_edges, &b_in);
+
+    // Shouldn't merge if a and b have different control edges.
+    if (a_control_edges != b_control_edges) {
+      continue;
+    } else {
+      // We found a match.
+      return b;
+    }
+  }
+
+  return nullptr;
+}
+
+Status MklLayoutRewritePass::MergeConv2DWithBiasAdd(std::unique_ptr<Graph>* g,
+                                                    Node* m, Node* n) {
+  CHECK_EQ(((m->type_string() == csinfo_.bias_add &&
+             n->type_string() == csinfo_.conv2d)) ||
+           ((n->type_string() == csinfo_.bias_add &&
+             m->type_string() == csinfo_.conv2d)), true);
+
+  // If 'm' is BiasAdd, then 'n' is Conv2D. Since Conv2D feeds BiasAdd,
+  // BiasAdd is successor node, and Conv2D predecessor node.
+  Node* pred = m->type_string() == csinfo_.bias_add ? n : m;
+  Node* succ = m->type_string() == csinfo_.bias_add ? m : n;
+
+  // 1. Get all attributes from input nodes.
+  DataType T_pred, T_succ;
+  string padding;
+  std::vector<int32> strides;
+  string data_format_pred, data_format_succ;
+  bool use_cudnn_on_gnu;
+  TF_CHECK_OK(GetNodeAttr(pred->def(), "T", &T_pred));
+  TF_CHECK_OK(GetNodeAttr(succ->def(), "T", &T_succ));
+  TF_CHECK_OK(GetNodeAttr(pred->def(), "padding", &padding));
+  TF_CHECK_OK(GetNodeAttr(pred->def(), "strides", &strides));
+  TF_CHECK_OK(GetNodeAttr(pred->def(), "data_format", &data_format_pred));
+  TF_CHECK_OK(GetNodeAttr(succ->def(), "data_format", &data_format_succ));
+  TF_CHECK_OK(
+      GetNodeAttr(pred->def(), "use_cudnn_on_gpu", &use_cudnn_on_gnu));
+  // We check to ensure that data formats of both succ and pred are same.
+  // We expect them to be same, so we can enforce this as assert.
+  // But assert can be too strict, so we enforce this as a check.
+  // If the check fails, then we do not merge two nodes.
+  // We also do same check for devices.
+  if (data_format_pred != data_format_succ || T_pred != T_succ ||
+      pred->assigned_device_name() != succ->assigned_device_name() ||
+      pred->def().device() != succ->def().device()) {
+    return Status(error::Code::INVALID_ARGUMENT,
+                  "data_format or T attribute or devices of Conv2D and "
+                  "BiasAdd do not match. Will skip node merge optimization");
+  }
+
+  const int succ_num = succ->num_inputs();
+  gtl::InlinedVector<Node*, 4> succ_control_edges;
+  gtl::InlinedVector<std::pair<Node*, int>, 4> succ_in(succ_num);
+  FillInputs(succ, &succ_control_edges, &succ_in);
+
+  const int pred_num = pred->num_inputs();
+  gtl::InlinedVector<Node*, 4> pred_control_edges;
+  gtl::InlinedVector<std::pair<Node*, int>, 4> pred_in(pred_num);
+  FillInputs(pred, &pred_control_edges, &pred_in);
+
+  // We need to ensure that Conv2D only feeds to BiasAdd (some other operator is
+  // not expecting output of Conv2D). If this is not the case, then we cannot
+  // merge Conv2D with BiasAdd.
+  const int kFirstOutputSlot = 0;
+  for (const Edge* e : pred->out_edges()) {
+    if (e->src_output() == kFirstOutputSlot && e->dst() != succ) {
+      return Status(error::Code::INVALID_ARGUMENT,
+                    "Conv2D does not feed to BiasAdd, or "
+                    "it feeds BiasAdd but has multiple outputs. "
+                    "Will skip node merge optimization");
+    }
+  }
+
+  // 2. Get inputs from both the nodes.
+  // Find the 2 inputs from the conv and the bias from the add Bias.
+  // Get operand 0, 1 of conv2D.
+  CHECK_EQ(pred->in_edges().size(), 2);  // Conv2D must have 2 inputs.
+  // Get operand 1 of add_bias
+  // BiasAdd must have 2 inputs: Conv, bias
+  CHECK_EQ(succ->in_edges().size(), 2);
+
+  // We will use the node name of BiasAdd as the name of new node
+  // Build new node. We use same name as original node, but change the op
+  // name.
+  NodeBuilder nb(succ->name(), csinfo_.conv2d_with_bias);
+  nb.Input(pred_in[0].first, pred_in[0].second);  // In1 of Conv2D
+  // pred_in[1] will be 2nd Tensorflow tensor for Conv2D.
+  nb.Input(pred_in[1].first, pred_in[1].second);  // In2 of Conv2D
+  // In1 of BiasAdd is same as output of Conv2D.
+  nb.Input(succ_in[1].first, succ_in[1].second);  // In2 of BiasAdd
+
+  // Copy attributes from Conv2D to Conv2DWithBias.
+  CopyAttrsConv2D(const_cast<const Node*>(pred), &nb);
+
+  // Copy the device assigned to old node to new node.
+  nb.Device(succ->def().device());
+
+  // Create node.
+  Node* new_node;
+  nb.Finalize(&**g, &new_node);
+  CHECK_NOTNULL(new_node);
+
+  // Incoming data edges from 'pred' node and 'succ' node to new 'new_node'
+  // node are already copied in BuildNode. We handle control edges now.
+  for (const Edge* e : pred->in_edges()) {
+    if (e->IsControlEdge()) {
+      // Allow duplicate while adding control edge as it would fail (return
+      // NULL) if we try to add duplicate edge.
+      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node, true));
+    }
+  }
+  for (const Edge* e : succ->in_edges()) {
+    if (e->IsControlEdge()) {
+      // Allow duplicate while adding control edge as it would fail (return
+      // NULL) if we try to add duplicate edge.
+      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node, true));
+    }
+  }
+
+  // Incoming edges are fixed, we will fix the outgoing edges now.
+  // First, we will fix outgoing control edges from 'pred' node.
+  for (const Edge* e : pred->out_edges()) {
+    if (e->IsControlEdge()) {
+      // Allow duplicate while adding control edge as it would fail (return
+      // NULL) if we try to add duplicate edge.
+      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst(), true));
+    }
+  }
+
+  // Second, we will fix outgoing control and data edges from 'succ' node.
+  for (const Edge* e : succ->out_edges()) {
+    if (e->IsControlEdge()) {
+      // Allow duplicate while adding control edge as it would fail (return
+      // NULL) if we try to add duplicate edge.
+      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst(), true));
+    } else {
+      // BiasAdd has only 1 output (at slot 0) and merged node also has only 1
+      // output (at slot 0).
+      const int kConv2DWithBiasOutputSlot = 0;
+      CHECK_NOTNULL((*g)->AddEdge(new_node, kConv2DWithBiasOutputSlot,
+                                    e->dst(), e->dst_input()));
+    }
+  }
+
+  // Copy device assigned to old node to new node.
+  // It's ok to use pred or succ as we have enforced a check that
+  // both have same device assigned.
+  new_node->set_assigned_device_name(pred->assigned_device_name());
+
+  VLOG(1) << "MklLayoutRewritePass: Merged old node:" << pred->DebugString()
+          << ", and node: " << succ->DebugString()
+          << ", into node:" << new_node->DebugString();
+
+  (*g)->RemoveNode(succ);
+  (*g)->RemoveNode(pred);
+
+  return Status::OK();
+}
+
+Status MklLayoutRewritePass::MergeConv2DBackpropFilterWithBiasAddGrad(
+    std::unique_ptr<Graph>* g, Node* m, Node* n) {
+  CHECK_EQ(((m->type_string() == csinfo_.bias_add_grad &&
+             n->type_string() == csinfo_.conv2d_grad_filter)) ||
+           ((n->type_string() == csinfo_.bias_add_grad &&
+             m->type_string() == csinfo_.conv2d_grad_filter)), true);
+
+  // If 'm' is BiasAddGrad, then 'n' is BackpropFilter.
+  Node* badd = m->type_string() == csinfo_.bias_add_grad ? m : n;
+  Node* fltr = m->type_string() == csinfo_.bias_add_grad ? n : m;
+
+  // Sanity check for attributes from input nodes.
+  DataType T_b, T_f;
+  string data_format_b, data_format_f;
+  TF_CHECK_OK(GetNodeAttr(badd->def(), "T", &T_b));
+  TF_CHECK_OK(GetNodeAttr(fltr->def(), "T", &T_f));
+  TF_CHECK_OK(GetNodeAttr(badd->def(), "data_format", &data_format_b));
+  TF_CHECK_OK(GetNodeAttr(fltr->def(), "data_format", &data_format_f));
+  if (data_format_b != data_format_f || T_b != T_f ||
+      badd->assigned_device_name() != fltr->assigned_device_name() ||
+      badd->def().device() != fltr->def().device()) {
+    return Status(error::Code::INVALID_ARGUMENT,
+                  "data_format or T attribute or devices of "
+                  "Conv2DBackpropFilter and BiasAddGrad do not match. "
+                  "Will skip node merge optimization");
+  }
+
+  // We will use the node name of Conv2DBackpropFilter as the name of new node.
+  // This is because BackpropFilterWithBias is going to emit bias output also.
+  NodeBuilder nb(fltr->name(), csinfo_.conv2d_grad_filter_with_bias);
+  // Since Conv2DBackpropFilterWithBias has same number of inputs as
+  // Conv2DBackpropFilter, we can just copy input edges directly. We dont need
+  // to copy any data input of BiasAddGrad because that input also goes to
+  // Conv2DBackpropFilter.
+  const int fltr_ins = fltr->num_inputs();
+  gtl::InlinedVector<Node*, 4> fltr_control_edges;
+  gtl::InlinedVector<std::pair<Node*, int>, 4> fltr_in_edges(fltr_ins);
+  FillInputs(fltr, &fltr_control_edges, &fltr_in_edges);
+  for (int idx = 0; idx < fltr_ins; idx++) {
+    nb.Input(fltr_in_edges[idx].first, fltr_in_edges[idx].second);
+  }
+
+  // Copy attributes from Conv2DBackpropFilter.
+  CopyAttrsConv2D(const_cast<const Node*>(fltr), &nb);
+
+  // Copy the device assigned to old node to new node.
+  nb.Device(fltr->def().device());
+
+  // Create node.
+  Node* new_node;
+  nb.Finalize(&**g, &new_node);
+  CHECK_NOTNULL(new_node);
+
+  // Incoming data edges from BiasAddGrad node and Conv2DBackpropFilter node to
+  // new 'new_node' node are already copied in BuildNode. We handle control
+  // edges now.
+  for (const Edge* e : badd->in_edges()) {
+    if (e->IsControlEdge()) {
+      // Allow duplicate while adding control edge as it would fail (return
+      // NULL) if we try to add duplicate edge.
+      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node, true));
+    }
+  }
+  for (const Edge* e : fltr->in_edges()) {
+    if (e->IsControlEdge()) {
+      // Allow duplicate while adding control edge as it would fail (return
+      // NULL) if we try to add duplicate edge.
+      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node, true));
+    }
+  }
+
+  // Incoming edges are fixed, we will fix the outgoing edges now.
+  // First, we will fix outgoing control edges from 'badd' node.
+  // Conv2DBackpropFilter has 1 output -- filter_grad.
+  // Conv2DBackpropFilterWithBias has 2 outputs -- filter_grad and
+  // bias_grad. But filter_grad is at same slot number (0) in both the
+  // nodes. bias_grad is at slot number 1 in Conv2DBackpropFilterWithBias, while
+  // it is at slot number 0 in BiasAddGrad.
+  const int kMergedNodeFilterGradOutputIdx = 0;
+  const int kMergedNodeBiasGradOutputIdx = 1;
+
+  for (const Edge* e : badd->out_edges()) {
+    if (e->IsControlEdge()) {
+      // Allow duplicate while adding control edge as it would fail (return
+      // NULL) if we try to add duplicate edge.
+      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst(), true));
+    } else {
+      CHECK_NOTNULL((*g)->AddEdge(new_node, kMergedNodeBiasGradOutputIdx,
+                                  e->dst(), e->dst_input()));
+    }
+  }
+
+  // Second, we will fix outgoing control and data edges from 'fltr' node.
+  for (const Edge* e : fltr->out_edges()) {
+    if (e->IsControlEdge()) {
+      // We allow duplicate edge for this case since we already add control
+      // edge from new_node in line 3990. Line below could be adding same
+      // edge to same destination again. In such case, if we do not allow
+      // duplicate edge, then this call will fail.
+      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst(), true));
+    } else {
+      CHECK_NOTNULL((*g)->AddEdge(new_node, kMergedNodeFilterGradOutputIdx,
+                                  e->dst(), e->dst_input()));
+    }
+  }
+
+  // Copy device assigned to old node to new node.
+  // It's ok to use badd or fltr as we have enforced a check that
+  // both have same device assigned.
+  new_node->set_assigned_device_name(badd->assigned_device_name());
+
+  VLOG(1) << "MklLayoutRewritePass: Merged old node:" << badd->DebugString()
+          << ", and node: " << fltr->DebugString()
+          << ", into node:" << new_node->DebugString();
+
+  (*g)->RemoveNode(badd);
+  (*g)->RemoveNode(fltr);
+
+  return Status::OK();
+}
+
+Status MklLayoutRewritePass::MergeNode(std::unique_ptr<Graph>* g, Node* m,
+                                       Node* n) {
+  CHECK_NOTNULL(m);
+  CHECK_NOTNULL(n);
+
+  if (((m->type_string() == csinfo_.bias_add &&
+        n->type_string() == csinfo_.conv2d)) ||
+      ((n->type_string() == csinfo_.bias_add &&
+        m->type_string() == csinfo_.conv2d))) {
+    return this->MergeConv2DWithBiasAdd(g, m, n);
+  }
+
+  if (((m->type_string() == csinfo_.bias_add_grad &&
+        n->type_string() == csinfo_.conv2d_grad_filter)) ||
+      ((n->type_string() == csinfo_.bias_add_grad &&
+        m->type_string() == csinfo_.conv2d_grad_filter))) {
+    return this->MergeConv2DBackpropFilterWithBiasAddGrad(g, m, n);
+  }
+
+  return Status(error::Code::UNIMPLEMENTED,
+                "Unimplemented case for node merge optimization.");
+}
+
+//////////////////////////////////////////////////////////////////////////
+//           Helper functions for node rewrite
+//////////////////////////////////////////////////////////////////////////
+
+Status MklLayoutRewritePass::RewriteNode(std::unique_ptr<Graph>* g,
+                                         Node* orig_node,
+                                         const RewriteInfo* ri) {
+  CHECK_NOTNULL(ri);
+  CHECK_NOTNULL(orig_node);
+
+  VLOG(1) << "MklLayoutRewritePass: Original node:" << orig_node->DebugString();
+
+  // Get all inputs.
+  int num_inputs = orig_node->in_edges().size();
+
+  // Drop count for control edges from inputs
+  for (const Edge* e : orig_node->in_edges()) {
+    if (e->IsControlEdge()) {
+      num_inputs--;
+    }
+  }
+
+  gtl::InlinedVector<Node*, 4> control_edges;
+  gtl::InlinedVector<std::pair<Node*, int>, 4> inputs(num_inputs);
+  FillInputs(orig_node, &control_edges, &inputs);
+
+  // Build new node. We use same name as original node, but change the op name.
+  NodeBuilder nb(orig_node->name().c_str(), ri->new_name.c_str());
+  // Copy user-specified device assigned to original node to new node.
+  nb.Device(orig_node->def().device());
+  // Set up new inputs to the rewritten node.
+  Status s = SetUpInputs(g, inputs, &nb, orig_node);
+  if (s != Status::OK()) {
+    return s;
+  }
+
+  ri->copy_attrs(const_cast<const Node*>(orig_node), &nb);
+  // Set the Mkl layer label for this op.
+  nb.Attr("_kernel", mkl_op_registry::kMklOpLabel);
+
+  // Finalize graph and get new node.
+  Node* new_node = nullptr;
+  TF_CHECK_OK(nb.Finalize(&**g, &new_node));
+  CHECK_NOTNULL(new_node);
+
+  // Incoming data edges from 'orig_node' node to new 'new_node' node are
+  // already copied in BuildNode. We need to handle control edges now.
+  for (const Edge* e : orig_node->in_edges()) {
+    if (e->IsControlEdge()) {
+      // Allow duplicate while adding control edge as it would fail (return
+      // NULL) if we try to add duplicate edge.
+      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node, true));
+    }
+  }
+
+  // Copy outgoing edges from 'orig_node' node to new
+  // 'new_node' node, since the output also follows same ordering among
+  // Tensorflow tensors and Mkl tensors. We need to connect Tensorflow
+  // tensors appropriately. Specifically, nth output of the original node
+  // will become 2*nth output of the Mkl node for the interleaved ordering
+  // of the tensors. For the contiguous ordering of the tensors, it will be n.
+  // GetTensorDataIndex provides this mapping function.
+  for (const Edge* e : orig_node->out_edges()) {
+    if (e->IsControlEdge()) {
+      // Allow duplicate while adding control edge as it would fail (return
+      // NULL) if we try to add duplicate edge.
+      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst(), true));
+    } else {
+      CHECK_NOTNULL((*g)->AddEdge(new_node, GetTensorDataIndex(e->src_output(),
+                            e->src()->num_outputs()),
+                    e->dst(), e->dst_input()));
+    }
+  }
+
+  // Copy the runtime device assigned from original code to new node.
+  new_node->set_assigned_device_name(orig_node->assigned_device_name());
+
+  // Delete original node and mark new node as rewritten.
+  (*g)->RemoveNode(orig_node);
+
+  VLOG(1) << "MklLayoutRewritePass: New node:" << new_node->DebugString();
+  return Status::OK();
+}
+
+const MklLayoutRewritePass::RewriteInfo*
+MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const {
+  CHECK_NOTNULL(n);
+
+  // First check if node along with its type is supported by MKL layer.
+  // We do not want to rewrite an op into Mkl op if types are not supported.
+  // E.g., MklRelu does not support INT32. So we cannot rewrite Relu to
+  // MklRelu if type is INT32.
+  DataType T;
+  if (!GetNodeAttr(n->def(), "T", &T).ok()) {
+    return nullptr;
+  }
+
+  // We make an exception for __MklDummyConv2DWithBias and
+  // __MklConv2DBackpropFilterWithBias since their names do not match Mkl node
+  // names.
+  if (n->type_string() != csinfo_.conv2d_with_bias &&
+      n->type_string() != csinfo_.conv2d_grad_filter_with_bias &&
+      !mkl_op_registry::IsMklOp(mkl_op_registry::GetMklOpName(
+                                        n->type_string()), T)) {
+      return nullptr;
+  }
+
+  // For elementwise node, we reuse the Eigen implementation and pass the MKL
+  // metadata tensor through so we can avoid conversions. However, if all
+  // incoming edges are in TF format, we don't need all this overhead, so
+  // replace the elementwise node only if at least one of its parents is a MKL
+  // node.
+  //
+  // Identity nodes can also skip replacement if they are not being served by
+  // any MKL nodes.
+  //
+  // TODO(vrane): Add implementation for element-wise ops that doesn't reuse
+  // eigen code to reduce cross-library dependency.
+  VLOG(1) << "ELEMENTWISE: checking op: " << n->type_string();
+  if (mkl_op_registry::IsMklElementWiseOp(
+        mkl_op_registry::GetMklOpName(n->type_string()), T) ||
+      n->type_string().find("Identity") != string::npos) {
+    VLOG(1) << "ELEMENTWISE: op is elementwise: " << n->type_string();
+    bool incoming_mkl_edge = false;
+    int num_parent = 0;
+    for (auto parent : n->in_edges()) {
+      if (mkl_op_registry::IsMklOp(parent->src()->type_string(), T)) {
+        VLOG(1) << "ELEMENTWISE: parent " << num_parent++ << " is MKL op: "
+                << parent->src()->type_string();
+        incoming_mkl_edge = true;
+        break;
+      } else {
+        VLOG(1) << "ELEMENTWISE: parent " << num_parent++ << " is NON-MKL op: "
+                << parent->src()->type_string();
+      }
+    }
+    if (incoming_mkl_edge == false) {
+      VLOG(1) << "ELEMENTWISE: Skipping replacement of elementwise node which has no MKL "
+                 "parents.";
+      return nullptr;
+    } else {
+      VLOG(1) << "ELEMENTWISE: Replacing elementwise node " << n->type_string() <<
+        " which has MKL parents";
+    }
+  }
+
+  // We now check if rewrite rule applies for this op. If rewrite rule passes
+  // for this op, then we rewrite it to Mkl op.
+  // Find matching RewriteInfo and then check that rewrite rule applies.
+  for (auto ri = rinfo_.cbegin(); ri != rinfo_.cend(); ++ri) {
+    if (n->type_string().compare(ri->name) == 0 &&
+        ri->rewrite_rule(n)) {
+      return &*ri;
+    }
+  }
+
+  // Else return not found.
+  return nullptr;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+//              Run function for the pass
+///////////////////////////////////////////////////////////////////////////////
+
+bool MklLayoutRewritePass::RunPass(std::unique_ptr<Graph>* g) {
+  bool result = false;
+  CHECK_NOTNULL(g);
+
+  DumpGraph("Before running MklLayoutRewritePass", &**g);
+
+  std::vector<Node*> order;
+  GetReversePostOrder(**g, &order);  // This will give us topological sort.
+  for (Node* n : order) {
+    // If node is not an op or it cannot run on CPU device, then skip.
+    if (!n->IsOp() || !CanOpRunOnCPUDevice(n)) {
+      continue;
+    }
+
+    Node* m = nullptr;
+    if ((m = CheckForNodeMerge(n)) != nullptr && CanOpRunOnCPUDevice(m)) {
+      // Check if the node 'n' can be merged with any other node. If it can
+      // be 'm' contains the node with which it can be merged.
+      string n1_name = n->name();
+      string n2_name = m->name();
+
+      VLOG(1) << "MklLayoutRewritePass: Scheduled nodes " << n1_name << " and "
+              << n2_name << " for merging";
+
+      if (MergeNode(g, n, m) == Status::OK()) {
+        VLOG(1) << "MklLayoutRewritePass: Merged nodes " << n1_name << " and "
+                << n2_name;
+        result = true;
+      }
+    }
+  }
+
+  DumpGraph("After running MklLayoutRewritePass(NodeMerge)", &**g);
+
+  order.clear();
+  GetReversePostOrder(**g, &order);  // This will give us topological sort.
+  for (Node* n : order) {
+    // If node is not an op or it cannot run on CPU device, then skip.
+    if (!n->IsOp() || !CanOpRunOnCPUDevice(n)) {
+      continue;
+    }
+
+    const RewriteInfo* ri = nullptr;
+    // We will first search if node is to be rewritten.
+    if ((ri = CheckForNodeRewrite(n)) != nullptr) {
+      string node_name = n->name();
+      string op_name = n->type_string();
+
+      VLOG(1) << "MklLayoutRewritePass: Scheduled node " << node_name
+              << " with op " << op_name << " for rewrite using"
+              << " layout optimization.";
+
+      if (RewriteNode(g, n, ri) == Status::OK()) {
+        VLOG(1) << "MklLayoutRewritePass: rewrote node " << node_name
+                << " with op " << op_name << " for Mkl layout optimization.";
+        result = true;
+      }
+    }
+  }
+
+  DumpGraph("After running MklLayoutRewritePass(NodeMerge+Rewrite)", &**g);
+
+  return result;
+}
+
+bool RunMklLayoutRewritePass(std::unique_ptr<Graph>* g) {
+  return MklLayoutRewritePass().RunPass(g);
+}
+
+Status MklLayoutRewritePass::Run(
+  const GraphOptimizationPassOptions& options) {
+  if (options.graph == nullptr && options.partition_graphs == nullptr) {
+    return Status::OK();
+  }
+
+  auto process_graph = [&](std::unique_ptr<Graph>* g) {
+    // Get the ownership of a graph
+    std::unique_ptr<Graph>* ng = std::move(g);
+    RunPass(ng);
+    // Return the ownership of a graph back
+    g->reset(ng->release());
+  };
+
+  if (kMklLayoutRewritePassGroup !=
+      OptimizationPassRegistry::POST_PARTITIONING) {
+    // For any pre-partitioning phase, a graph is stored in options.graph.
+    process_graph(options.graph);
+  } else {
+    // For post partitioning phase, graphs are stored in
+    // options.partition_graphs.
+    for (auto& pg : *options.partition_graphs) {
+      process_graph(&pg.second);
+    }
+  }
+
+  return Status::OK();
+}
+#endif  // INTEL_MKL_DNN
 }  // namespace tensorflow
 
 #endif
diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc
index abc63e4f35aa9fd6f1df127741ae6d10f49024b9..75f7ca2d4d7ce7c86858a40fe34fed6aa707c9e5 100644
--- a/tensorflow/core/graph/mkl_layout_pass_test.cc
+++ b/tensorflow/core/graph/mkl_layout_pass_test.cc
@@ -37,6 +37,9 @@ limitations under the License.
 #include "tensorflow/core/platform/test_benchmark.h"
 
 namespace tensorflow {
+
+#ifndef INTEL_MKL_DNN
+
 namespace {
 
 const char kCPUDevice[] = "/job:a/replica:0/task:0/device:CPU:0";
@@ -1881,6 +1884,1627 @@ static void BM_MklLayoutRewritePass(int iters, int op_nodes) {
 BENCHMARK(BM_MklLayoutRewritePass)->Arg(1000)->Arg(10000);
 
 }  // namespace
+
+#else  // INTEL_MKL_DNN
+
+namespace {
+
+const char kCPUDevice[] = "/job:a/replica:0/task:0/device:CPU:0";
+const char kGPUDevice[] = "/job:a/replica:0/task:0/device:GPU:0";
+
+static void InitGraph(const string& s, Graph* graph,
+                      const string& device = kCPUDevice) {
+  GraphDef graph_def;
+
+  auto parser = protobuf::TextFormat::Parser();
+  //  parser.AllowRelaxedWhitespace(true);
+  CHECK(parser.MergeFromString(s, &graph_def)) << s;
+  GraphConstructorOptions opts;
+  TF_CHECK_OK(ConvertGraphDefToGraph(opts, graph_def, graph));
+
+  for (Node* node : graph->nodes()) {
+    node->set_assigned_device_name(device);
+  }
+}
+
+class MklLayoutPassTest : public ::testing::Test {
+ public:
+  MklLayoutPassTest() : graph_(OpRegistry::Global()) {}
+
+  void InitGraph(const string& s, const string& device = kCPUDevice) {
+    ::tensorflow::InitGraph(s, &graph_, device);
+    original_ = CanonicalGraphString(&graph_);
+  }
+
+  static bool IncludeNode(const Node* n) { return n->IsOp(); }
+
+  static string EdgeId(const Node* n, int index) {
+    if (index == 0) {
+      return n->name();
+    } else if (index == Graph::kControlSlot) {
+      return strings::StrCat(n->name(), ":control");
+    } else {
+      return strings::StrCat(n->name(), ":", index);
+    }
+  }
+
+  string CanonicalGraphString(Graph* g) {
+    std::vector<string> nodes;
+    std::vector<string> edges;
+    for (const Node* n : g->nodes()) {
+      if (IncludeNode(n)) {
+        nodes.push_back(strings::StrCat(n->name(), "(", n->type_string(), ")"));
+      }
+    }
+    for (const Edge* e : g->edges()) {
+      if (IncludeNode(e->src()) && IncludeNode(e->dst())) {
+        edges.push_back(strings::StrCat(EdgeId(e->src(), e->src_output()), "->",
+                                        EdgeId(e->dst(), e->dst_input())));
+      }
+    }
+    // Canonicalize
+    std::sort(nodes.begin(), nodes.end());
+    std::sort(edges.begin(), edges.end());
+    return strings::StrCat(str_util::Join(nodes, ";"), "|",
+                           str_util::Join(edges, ";"));
+  }
+
+  string DoMklLayoutOptimizationPass() {
+    string before = CanonicalGraphString(&graph_);
+    LOG(ERROR) << "Before MKL layout rewrite pass: " << before;
+
+    std::unique_ptr<Graph>* ug = new std::unique_ptr<Graph>(&graph_);
+    RunMklLayoutRewritePass(ug);
+
+    string result = CanonicalGraphString(&graph_);
+    LOG(ERROR) << "After MKL layout rewrite pass:  " << result;
+    return result;
+  }
+
+  const string& OriginalGraph() const { return original_; }
+
+  Graph graph_;
+  string original_;
+};
+
+REGISTER_OP("Input").Output("o: float").SetIsStateful();
+REGISTER_OP("InputList").Output("o: N * float").Attr("N: int").SetIsStateful();
+REGISTER_OP("HalfInput").Output("o: half").SetIsStateful();
+REGISTER_OP("Int32Input").Output("o: int32").SetIsStateful();
+REGISTER_OP("_MklInput").Output("o: uint8").SetIsStateful();
+REGISTER_OP("_MklInput2").Output("o: uint8")
+                        .Output("o1: uint8").SetIsStateful();
+
+/////////////////////////////////////////////////////////////////////
+//  Unit tests related to node merge optiimization
+/////////////////////////////////////////////////////////////////////
+
+TEST_F(MklLayoutPassTest, Basic) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Zeta);D(Zeta)|"
+            "A->C;A->D;B->C:1;B->D:1");
+}
+
+// Test set 1: Conv2D + AddBias
+
+// C=Conv2D(A,B); E=BiasAdd(C,D); Z=Zeta(E,Y)
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Positive) {
+  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'BiasAdd'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['C', 'D'] }"
+      "node { name: 'Y' op: 'Input'}"
+      "node { name: 'Z' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['E', 'Y']}");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);E(_MklConv2DWithBias);Y(Input);Z(Zeta)|A->E;"
+            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;B->E:1;D->E:2;DMT/_0->E:3;DMT/_1->E:4;"
+            "DMT/_2->E:5;E->Z;Y->Z:1");
+}
+
+// Graph contains only Conv2D, no AddBias.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_NoAddBias) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);DMT/_0(Const);DMT/_1(Const)|"
+            "A->C;A:control->DMT/_0:control;A:control->DMT/_1:control;B->C:1;"
+            "DMT/_0->C:2;DMT/_1->C:3");
+}
+
+// Conv2D output does not go to BiasAdd.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_Dataflow1) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Input'}"
+      "node { name: 'F' op: 'BiasAdd'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['D', 'E'] }");  // Output of _MklConv2D does not go to BiasAdd.
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(Input);DMT/_0(Const);"
+            "DMT/_1(Const);E(Input);F(BiasAdd)|A->C;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->C:1;D->F;DMT/_0->C:2;DMT/_1->C:3;"
+            "E->F:1");
+}
+
+// Conv2D has two outgoing edges: BiasAdd and some other dummy node (Zeta).
+// Merge should not be done in such case.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_Dataflow2) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Input'}"
+      "node { name: 'F' op: 'BiasAdd'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['D', 'E'] }"  // Conv2D has two outputs.
+                              // No merge should happen.
+      "node { name: 'G' op: 'Zeta'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['C', 'E'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(Input);DMT/_0(Const);"
+            "DMT/_1(Const);E(Input);F(BiasAdd);G(Zeta)|A->C;"
+            "A:control->DMT/_0:control;A:control->DMT/_1:control;B->C:1;C->G;"
+            "D->F;DMT/_0->C:2;DMT/_1->C:3;E->F:1;E->G:1");
+}
+
+// data_format attribute value mismatch. Merge should not be done
+// in such case.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_AttrMismatch) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'BiasAdd'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NHCW' } }"
+      " input: ['C', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(Input);DMT/_0(Const);"
+            "DMT/_1(Const);E(BiasAdd)|A->C;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->C:1;C->E;D->E:1;DMT/_0->C:2;"
+            "DMT/_1->C:3");
+}
+
+// Test set 2: BiasAddGrad + Conv2DBackpropFilter fusion tests
+
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackpropFilterFusion_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Int32Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Conv2DBackpropFilter'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C'] }"
+      "node { name: 'E' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Int32Input);C(Input);"
+            "D(_MklConv2DBackpropFilterWithBias);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const)|A->D;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;A:control->DMT/_2:control;B->D:1;C->D:2;"
+            "DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// BiasAddGrad fusion in the presence of BackpropFilter. But nodes do not match
+// criteria for rewrite. So rewrite should not happen. 3rd input of
+// Conv2DBackpropFilter is different than input to BiasAddGrad.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackpropFilterFusion_Negative1) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Int32Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Conv2DBackpropFilter'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C'] }"
+      "node { name: 'E' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['A'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Int32Input);C(Input);"
+            "D(_MklConv2DBackpropFilter);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);E(BiasAddGrad)|A->D;A->E;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;A:control->DMT/_2:control;B->D:1;C->D:2;"
+            "DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// BiasAddGrad fusion, but nodes do not match criteria for fusion.
+// Different input formats.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackpropFilterFusion_Negative2) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Int32Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Conv2DBackpropFilter'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C'] }"
+      "node { name: 'E' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NHWC' } }"
+      " input: ['A'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Int32Input);C(Input);"
+            "D(_MklConv2DBackpropFilter);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);E(BiasAddGrad)|A->D;A->E;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;A:control->DMT/_2:control;B->D:1;C->D:2;"
+            "DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// BiasAddGrad fusion in the presence of BackpropFilter only. Fusion is done
+// before node rewrite. Check this ordering.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackpropFilterFusion_Negative3) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'M' op: '_MklInput'}"
+      "node { name: 'N' op: '_MklInput'}"
+      "node { name: 'O' op: '_MklInput'}"
+      "node { name: 'D' op: '_MklConv2DWithBias'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
+      "node { name: 'E' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['D', 'A']}"
+      "node { name: 'F' op: 'Int32Input'}"
+      "node { name: 'G' op: '_MklConv2DBackpropFilter'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['E', 'F', 'A', 'M', 'N', 'O'] }"
+      "node { name: 'H' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['E'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);"
+            "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);H(BiasAddGrad);"
+            "M(_MklInput);N(_MklInput);O(_MklInput)|A->D;A->E:1;A->G:2;B->D:1;"
+            "C->D:2;D->E;E->G;E->H;F->G:1;M->D:3;M->G:3;N->D:4;N->G:4;O->D:5;"
+            "O->G:5");
+}
+
+// C=Conv2D(A,B); E=BiasAdd(C,D); Y=Zeta(E,X);
+// G=Conv2DBackpropInput(F,B,E)
+// This is a case of node rewrite followed by node merge followed by connecting
+// filter output of Conv2DWithBias to filter input of Conv2DBackpropInput.
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_ConvBpropInput_FilterFwd) {
+  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'BiasAdd'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['C', 'D'] }"
+      "node { name: 'X' op: 'Input'}"
+      "node { name: 'Y' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['E', 'X']}"
+      "node { name: 'F' op: 'Int32Input'}"
+      "node { name: 'G' op: 'Conv2DBackpropInput'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['F', 'B', 'E']}"
+      "node { name: 'Z' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['G', 'X']}");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);E(_MklConv2DWithBias);F(Int32Input);"
+            "G(_MklConv2DBackpropInput);X(Input);Y(Zeta);Z(Zeta)|"
+            "A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;B->E:1;D->E:2;DMT/_0->E:3;"
+            "DMT/_1->E:4;DMT/_2->E:5;DMT/_3->G:3;E->G:2;E->Y;E:1->G:1;E:2->G:5;"
+            "E:3->G:4;F->G;F:control->DMT/_3:control;G->Z;X->Y:1;X->Z:1");
+}
+
+/////////////////////////////////////////////////////////////////////
+//  Unit tests related to rewriting node to Mkl node
+/////////////////////////////////////////////////////////////////////
+
+// Single Conv2D Op; No Mkl layer on the input and on the output.
+// We will generate dummy Mkl tensor as 2nd input of Conv2D.
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Basic) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['B', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(Zeta);DMT/_0(Const);"
+            "DMT/_1(Const)|A->C;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->C:1;B->D;C->D:1;DMT/_0->C:2;"
+            "DMT/_1->C:3");
+}
+
+// 2 Conv2D Ops in sequence. Both should get transformed and 1st Conv2D will
+// have 2 outputs, both of which will be inputs to next Conv2D.
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Positive1) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'C']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(_MklConv2D);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->C;A->D;"
+            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;B->C:1;C->D:1;C->E;"
+            "C:2->D:3;D->E:1;DMT/_0->C:2;DMT/_1->C:3;DMT/_2->D:2");
+}
+
+// Conv2D with INT32 which is not supported by Mkl
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Negative_UnsupportedType) {
+  InitGraph(
+      "node { name: 'A' op: 'HalfInput'}"
+      "node { name: 'B' op: 'HalfInput'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_HALF } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_HALF } }"
+      " input: ['B', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(HalfInput);B(HalfInput);C(Conv2D);D(Zeta)|"
+            "A->C;B->C:1;B->D;C->D:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradFilter_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Int32Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Conv2DBackpropFilter'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Int32Input);C(Input);D(_MklConv2DBackpropFilter);"
+            "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|"
+            "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;"
+            "DMT/_1->D:4;DMT/_2->D:5");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradInput_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Int32Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Conv2DBackpropInput'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['B', 'A', 'C']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Int32Input);C(Input);D(_MklConv2DBackpropInput);"
+            "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|"
+            "A->D:1;A->E;B->D;B:control->DMT/_0:control;"
+            "B:control->DMT/_1:control;B:control->DMT/_2:control;C->D:2;"
+            "D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// Check that we never rewrite BiasAddGrad.
+TEST_F(MklLayoutPassTest, NodeRewrite_BiasAddGrad_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Polygamma'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['C', 'A']}"
+      "node { name: 'E' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Polygamma);D(Zeta);E(BiasAddGrad)|"
+            "A->C;A->D:1;B->C:1;C->D;D->E");
+}
+
+// Check that we never rewrite BiasAddGrad.
+TEST_F(MklLayoutPassTest, NodeRewrite_BiasAddGrad_Positive1) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'MatMul'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'transpose_a'      value { b: false } }"
+      " attr { key: 'transpose_b'      value { b: false } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['C', 'A']}"
+      "node { name: 'E' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(MatMul);D(Zeta);E(BiasAddGrad)|"
+            "A->C;A->D:1;B->C:1;C->D;D->E");
+}
+
+// Check that we never rewrite BiasAddGrad.
+TEST_F(MklLayoutPassTest, NodeRewrite_BiasAddGrad_Positive2) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'M' op: '_MklInput'}"
+      "node { name: 'N' op: '_MklInput'}"
+      "node { name: 'C' op: '_MklConv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'M', 'N']}"
+      "node { name: 'D' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['C', 'A']}"
+      "node { name: 'E' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(Zeta);E(BiasAddGrad);"
+            "M(_MklInput);N(_MklInput)|A->C;A->D:1;B->C:1;C->D;D->E;"
+            "M->C:2;N->C:3");
+}
+
+// Concat Op test: Concat with no Mkl layer feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Basic) {
+  InitGraph(
+      "node { name: 'A' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'B' op: 'InputList'"
+      " attr { key: 'N'                value { i: 2 } }}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Concat'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['A', 'B:0', 'B:1']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Const);B(InputList);C(Input);D(_MklConcat);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;A:control->DMT/_2:control;B->D:1;"
+            "B:1->D:2;C->E;D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// Concat with 2 Mkl layers feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_Mkl) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'F' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['C', 'D']}"
+      "node { name: 'G' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'H' op: 'Concat'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['G', 'E', 'F']}"
+      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'H'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(_MklConv2D);"
+            "F(_MklConv2D);G(Const);H(_MklConcat);I(Zeta)|A->E;A->I;"
+            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
+            "B->E:1;C->F;C:control->DMT/_0:control;C:control->DMT/_1:control;"
+            "D->F:1;DMT/_0->F:2;DMT/_1->F:3;DMT/_2->E:2;DMT/_3->E:3;"
+            "DMT/_4->H:3;E->H:1;E:2->H:4;F->H:2;F:2->H:5;G->H;"
+            "G:control->DMT/_4:control;H->I:1");
+}
+
+// Concat with 1 Mkl and 1 non-Mkl layer feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_MixedMkl) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D']}"
+      "node { name: 'G' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'H' op: 'Concat'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['G', 'E', 'F']}"
+      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'H'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Zeta);G(Const);"
+            "H(_MklConcat);I(Zeta)|A->E;A->I;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->E:1;C->F;D->F:1;DMT/_0->E:2;"
+            "DMT/_1->E:3;DMT/_2->H:3;DMT/_3->H:5;E->H:1;E:2->H:4;F->H:2;"
+            "G->H;G:control->DMT/_2:control;G:control->DMT/_3:control;H->I:1");
+}
+
+// ConcatV2 Op test: ConcatV2 with no Mkl layer feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Basic) {
+  InitGraph(
+      "node { name: 'A' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'B' op: 'InputList'"
+      " attr { key: 'N'                value { i: 2 } }}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'ConcatV2'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['B:0', 'B:1', 'A']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Const);B(InputList);C(Input);D(_MklConcatV2);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D:2;B->D;B:1->D:1;"
+            "B:control->DMT/_0:control;B:control->DMT/_1:control;"
+            "B:control->DMT/_2:control;C->E;D->E:1;DMT/_0->D:3;"
+            "DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// ConcatV2 with 2 Mkl layers feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_Mkl) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'F' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['C', 'D']}"
+      "node { name: 'G' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'H' op: 'ConcatV2'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['E', 'F', 'G']}"
+      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'H'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(_MklConv2D);"
+            "F(_MklConv2D);G(Const);H(_MklConcatV2);I(Zeta)|A->E;A->I;"
+            "A:control->DMT/_2:control;A:control->DMT/_3:control;B->E:1;C->F;"
+            "C:control->DMT/_0:control;C:control->DMT/_1:control;"
+            "D->F:1;DMT/_0->F:2;DMT/_1->F:3;DMT/_2->E:2;DMT/_3->E:3;"
+            "DMT/_4->H:5;E->H;E:2->H:3;E:control->DMT/_4:control;F->H:1;"
+            "F:2->H:4;G->H:2;H->I:1");
+}
+
+// ConcatV2 with 1 Mkl and 1 non-Mkl layer feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_MixedMkl) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D']}"
+      "node { name: 'G' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'H' op: 'ConcatV2'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['E', 'F', 'G']}"
+      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'H'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Zeta);G(Const);"
+            "H(_MklConcatV2);I(Zeta)|A->E;A->I;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->E:1;C->F;D->F:1;DMT/_0->E:2;"
+            "DMT/_1->E:3;DMT/_2->H:4;DMT/_3->H:5;E->H;E:2->H:3;"
+            "E:control->DMT/_2:control;E:control->DMT/_3:control;F->H:1;"
+            "G->H:2;H->I:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_Relu_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Relu'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklRelu);C(Zeta);DMT/_0(Const)|A->B;A->C;"
+            "A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_ReluGrad_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'ReluGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklReluGrad);D(Zeta);DMT/_0(Const);"
+            "DMT/_1(Const)|A->C;A->D;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->C:1;C->D:1;DMT/_0->C:2;DMT/_1->C:3");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_ReluReluGrad_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Relu'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'ReluGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklRelu);C(_MklReluGrad);D(Zeta);DMT/_0(Const);"
+            "DMT/_1(Const)|A->B;A->C;A->D;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->C:1;B:1->C:3;C->D:1;DMT/_0->B:1;"
+            "DMT/_1->C:2");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_AvgPool_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'AvgPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklAvgPool);C(Zeta);DMT/_0(Const)|A->B;A->C;"
+            "A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_AvgPoolGrad_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Int32Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'AvgPoolGrad' "
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['A', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['B', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Int32Input);B(Input);C(_MklAvgPoolGrad);D(Zeta);DMT/_0(Const);"
+            "DMT/_1(Const)|A->C;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->C:1;B->D;C->D:1;DMT/_0->C:2;"
+            "DMT/_1->C:3");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_AvgPoolAvgPoolGrad_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'I' op: 'Int32Input'}"
+      "node { name: 'B' op: 'AvgPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'AvgPoolGrad' "
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['I', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklAvgPool);C(_MklAvgPoolGrad);D(Zeta);DMT/_0(Const);"
+            "DMT/_1(Const);I(Int32Input)|A->B;A->D;A:control->DMT/_0:control;"
+            "B->C:1;B:1->C:3;C->D:1;DMT/_0->B:1;DMT/_1->C:2;I->C;"
+            "I:control->DMT/_1:control");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNormGrad_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Input'}"
+      "node { name: 'F' op: 'FusedBatchNormGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'epsilon'      value { f: 0.0001 } }"
+      " attr { key: 'is_training'  value { b: true } }"
+      " input: ['A', 'B', 'C', 'D', 'E'] }"
+      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'F'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Input);"
+            "F(_MklFusedBatchNormGrad);G(Zeta)|A->F;A->G;"
+            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
+            "A:control->DMT/_4:control;B->F:1;C->F:2;D->F:3;"
+            "DMT/_0->F:5;DMT/_1->F:6;DMT/_2->F:7;DMT/_3->F:8;DMT/_4->F:9;"
+            "E->F:4;F->G:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNorm_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Input'}"
+      "node { name: 'F' op: 'FusedBatchNorm'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'epsilon'      value { f: 0.0001 } }"
+      " attr { key: 'is_training'  value { b: true } }"
+      " input: ['A', 'B', 'C', 'D', 'E'] }"
+      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'F'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Input);"
+            "F(_MklFusedBatchNorm);G(Zeta)|A->F;A->G;"
+            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
+            "A:control->DMT/_4:control;B->F:1;C->F:2;D->F:3;"
+            "DMT/_0->F:5;DMT/_1->F:6;DMT/_2->F:7;DMT/_3->F:8;DMT/_4->F:9;"
+            "E->F:4;F->G:1");
+}
+
+/////////////////////////////////////////////////////////////////////
+//  Unit tests related to rewriting node for workspace edges
+/////////////////////////////////////////////////////////////////////
+
+/* Test LRN->MaxPool->MaxPoolGrad->LRNGrad replacement by workspace nodes. */
+TEST_F(MklLayoutPassTest, MaxPoolLRN_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'LRN'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['B'] }"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'MaxPoolGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['B', 'C', 'D'] }"
+      "node { name: 'F' op: 'Input'}"
+      "node { name: 'G' op: 'LRNGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['E', 'F', 'B'] }"
+      "node { name: 'H' op: 'Input'}"
+      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['H', 'G'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+      "A(Input);B(_MklLRN);C(_MklMaxPool);D(Input);DMT/_0(Const);DMT/_1(Const);"
+      "DMT/_2(Const);E(_MklMaxPoolGrad);F(Input);G(_MklLRNGrad);H(Input);"
+      "I(Zeta)|A->B;A:control->DMT/_0:control;B->C;B->E;B->G:2;B:1->G:3;"
+      "B:2->C:1;B:2->E:4;B:2->G:6;B:3->G:7;B:control->DMT/_1:control;C->E:1;"
+      "C:1->E:3;C:2->E:5;C:3->E:7;D->E:2;DMT/_0->B:1;DMT/_1->E:6;DMT/_2->G:5;"
+      "E->G;E:1->G:4;E:control->DMT/_2:control;F->G:1;G->I:1;H->I");
+}
+
+/* Test LRN->LRNGrad replacement by workspace nodes. */
+TEST_F(MklLayoutPassTest, LRN_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'LRN'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'LRNGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['C', 'D', 'B'] }"
+      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'E'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklLRN);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);E(_MklLRNGrad);F(Zeta)|"
+            "A->B;A:control->DMT/_0:control;B->E:2;B:1->E:3;B:2->E:6;B:3->E:7;"
+            "C->E;C->F;C:control->DMT/_1:control;C:control->DMT/_2:control;"
+            "D->E:1;DMT/_0->B:1;DMT/_1->E:4;DMT/_2->E:5;E->F:1");
+}
+
+/* Test LRN->LRNGrad replacement when only one of them is present. */
+TEST_F(MklLayoutPassTest, LRN_Negative1) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'LRN'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklLRN);C(Zeta);DMT/_0(Const)|"
+            "A->B;A->C;A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
+}
+
+/* Test LRN->LRNGrad replacement when only one of them is present. */
+TEST_F(MklLayoutPassTest, LRN_Negative2) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'LRNGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['A', 'B', 'C'] }"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(_MklLRNGrad);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Zeta)|"
+            "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
+            "A:control->DMT/_4:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;"
+            "DMT/_1->D:7;DMT/_2->D:4;DMT/_3->D:5;DMT/_4->D:6");
+}
+
+/* Test LRN->LRNGrad negative case, where single LRN feeds
+   2 LRNGrad nodes at different slots. */
+TEST_F(MklLayoutPassTest, LRN_Negative3) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'LRN'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'LRNGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['C', 'D', 'B'] }"
+      "node { name: 'F' op: 'LRNGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'        value { f: 0.001 } }"
+      " attr { key: 'beta'         value { f: 0.75 } }"
+      " attr { key: 'bias'         value { f: 1.0 } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'depth_radius' value { i: 2 } }"
+      " input: ['C', 'B', 'D'] }"
+      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['E', 'F'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklLRN);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
+            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);DMT/_5(Const);"
+            "DMT/_6(Const);E(_MklLRNGrad);F(_MklLRNGrad);G(Zeta)|A->B;"
+            "A:control->DMT/_0:control;B->E:2;"
+            "B->F:1;B:1->E:3;B:2->E:6;B:2->F:5;B:3->E:7;C->E;C->F;"
+            "C:control->DMT/_1:control;C:control->DMT/_2:control;"
+            "C:control->DMT/_3:control;C:control->DMT/_4:control;"
+            "C:control->DMT/_5:control;C:control->DMT/_6:control;"
+            "D->E:1;D->F:2;DMT/_0->B:1;DMT/_1->F:3;DMT/_2->F:7;DMT/_3->F:4;"
+            "DMT/_4->F:6;DMT/_5->E:4;DMT/_6->E:5;E->G;F->G:1");
+}
+
+/* Test MaxPool->MaxPoolGrad replacement by workspace+rewrite nodes. */
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'MaxPoolGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['C', 'B', 'D'] }"
+      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'E'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklMaxPool);C(Input);D(Input);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);E(_MklMaxPoolGrad);F(Zeta)|"
+            "A->B;A:control->DMT/_0:control;B->E:1;B:1->E:3;B:2->E:5;B:3->E:7;"
+            "C->E;C->F;C:control->DMT/_1:control;C:control->DMT/_2:control;"
+            "D->E:2;DMT/_0->B:1;DMT/_1->E:4;DMT/_2->E:6;E->F:1");
+}
+
+// Test MaxPool>MaxPoolGrad replacement when only one of them is present.
+// In this case, we will rewrite MaxPool node but workspace edges will not
+// be present.
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative1) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklMaxPool);C(Zeta);DMT/_0(Const)|"
+            "A->B;A->C;A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
+}
+
+// Test MaxPoolGrad replacement when only one of them is present.
+// In this case, we will rewrite MaxPoolGrad and for workspace tensor and
+// its Mkl part, we will generate dummy tensor.
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative2) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'MaxPoolGrad'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
+      " input: ['A', 'B', 'C'] }"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'D'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(_MklMaxPoolGrad);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Zeta)|"
+            "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
+            "A:control->DMT/_4:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;"
+            "DMT/_1->D:7;DMT/_2->D:4;DMT/_3->D:5;DMT/_4->D:6");
+}
+
+// Test MaxPool handling for batch-wise pooling (NCHW)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative3) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 2, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for batch-wise pooling (NCHW)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative4) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 2, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for depth-wise pooling (NHWC)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative5) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:2, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for depth-wise pooling (NCHW)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative6) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:2, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for batch-wise pooling (NHWC)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative7) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NHWC' } }"
+      " attr { key: 'ksize'        value { list: {i: 2, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for batch-wise pooling (NHWC)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative8) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NHWC' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 2, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for depth-wise pooling (NHWC)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative9) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NHWC' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:2} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Test MaxPool handling for depth-wise pooling (NHWC)
+// No rewrite should take place in such case
+TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative10) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NHWC' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:2} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+/////////////////////////////////////////////////////////////////////
+
+// Single Conv2D Op on GPU device
+// No rewrite should happen
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Conv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B']}"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['B', 'C'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Conv2D);D(Zeta)|A->C;B->C:1;B->D;C->D:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'M' op: '_MklInput'}"
+      "node { name: 'N' op: '_MklInput'}"
+      "node { name: 'O' op: '_MklInput'}"
+      "node { name: 'D' op: '_MklConv2DWithBias'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
+      "node { name: 'E' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['D', 'A']}"
+      "node { name: 'F' op: 'BiasAddGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['E'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);"
+            "E(Zeta);F(BiasAddGrad);M(_MklInput);N(_MklInput);"
+            "O(_MklInput)|A->D;A->E:1;B->D:1;C->D:2;D->E;E->F;"
+            "M->D:3;N->D:4;O->D:5");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradFilter_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Int32Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Conv2DBackpropFilter'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'C']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'D'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Int32Input);C(Input);D(Conv2DBackpropFilter);E(Zeta)|"
+            "A->D;A->E;B->D:1;C->D:2;D->E:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_Relu_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Relu'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Relu);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_ReluGrad_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'ReluGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'C'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(ReluGrad);D(Zeta)|A->C;A->D;B->C:1;C->D:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_MaxPool_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'MaxPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NHWC' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_AvgPool_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'AvgPool'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NHWC' } }"
+      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'      value { s: 'VALID' } }"
+      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(AvgPool);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+// Concat Op test: Concat with no Mkl layer feeding it
+TEST_F(MklLayoutPassTest, NodeRewrite_Concat_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'B' op: 'InputList'"
+      " attr { key: 'N'                value { i: 2 } }}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Concat'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['A', 'B:0', 'B:1']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Const);B(InputList);C(Input);D(Concat);E(Zeta)|A->D;"
+            "B->D:1;B:1->D:2;C->E;D->E:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Const' "
+      " attr { key: 'dtype' value { type: DT_INT32 } }"
+      " attr { key: 'value' value { "
+      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
+      "    int_val: 0 } } } }"
+      "node { name: 'B' op: 'InputList'"
+      " attr { key: 'N'                value { i: 2 } }}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'ConcatV2'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
+      " attr { key: 'N'                value { i: 2 } }"
+      " input: ['B:0', 'B:1', 'A']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['C', 'D'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Const);B(InputList);C(Input);D(ConcatV2);E(Zeta)|"
+            "A->D:2;B->D;B:1->D:1;C->E;D->E:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNorm_DeviceTest) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'Input'}"
+      "node { name: 'F' op: 'FusedBatchNorm'"
+      " attr { key: 'T'            value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'  value { s: 'NCHW' } }"
+      " attr { key: 'epsilon'      value { f: 0.0001 } }"
+      " attr { key: 'is_training'  value { b: true } }"
+      " input: ['A', 'B', 'C', 'D', 'E'] }"
+      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'F'] }", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(Input);E(Input);"
+            "F(FusedBatchNorm);G(Zeta)|A->F;A->G;B->F:1;C->F:2;D->F:3;"
+            "E->F:4;F->G:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_DeviceTest) {
+  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'M' op: '_MklInput'}"
+      "node { name: 'N' op: '_MklInput'}"
+      "node { name: 'C' op: '_MklConv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " input: ['A', 'B', 'M', 'N']}"
+      "node { name: 'D' op: 'Input'}"
+      "node { name: 'E' op: 'BiasAdd'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " input: ['C', 'D'] }"
+      "node { name: 'Y' op: 'Input'}"
+      "node { name: 'Z' op: 'Zeta'"
+      " attr {key: 'T'                 value { type: DT_FLOAT } }"
+      " input: ['E', 'Y']}", kGPUDevice);
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklConv2D);D(Input);E(BiasAdd);"
+            "M(_MklInput);N(_MklInput);Y(Input);Z(Zeta)|A->C;"
+            "B->C:1;C->E;D->E:1;E->Z;M->C:2;N->C:3;Y->Z:1");
+}
+
+/////////////////////////////////////////////////////////////////////
+
+static void BM_MklLayoutRewritePass(int iters, int op_nodes) {
+  testing::StopTiming();
+  string s;
+  for (int in = 0; in < 10; in++) {
+    s += strings::Printf("node { name: 'in%04d' op: 'Input'}", in);
+  }
+  random::PhiloxRandom philox(301, 17);
+  random::SimplePhilox rnd(&philox);
+  for (int op = 0; op < op_nodes; op++) {
+    s += strings::Printf(
+        "node { name: 'op%04d' op: 'Zeta' attr { key: 'T' value { "
+        "type: DT_FLOAT } } input: ['in%04d', 'in%04d' ] }",
+        op, rnd.Uniform(10), rnd.Uniform(10));
+  }
+
+  bool first = true;
+  while (iters > 0) {
+    Graph* graph = new Graph(OpRegistry::Global());
+    InitGraph(s, graph);
+    int N = graph->num_node_ids();
+    if (first) {
+      testing::SetLabel(strings::StrCat("Per graph node.  Nodes: ", N));
+      first = false;
+    }
+    {
+      testing::StartTiming();
+      std::unique_ptr<Graph> ug(graph);
+      RunMklLayoutRewritePass(&ug);
+      testing::StopTiming();
+    }
+    iters -= N;  // Our benchmark units are individual graph nodes,
+                 // not whole graphs
+    // delete graph;
+  }
+}
+BENCHMARK(BM_MklLayoutRewritePass)->Arg(1000)->Arg(10000);
+
+}  // namespace
+
+#endif  // INTEL_MKL_DNN
+
 }  // namespace tensorflow
 
 #endif /* INTEL_MKL */
diff --git a/tensorflow/core/grappler/BUILD b/tensorflow/core/grappler/BUILD
index 99f1318072220d397870794cf3d2643d64b9696e..2ca9b720ee127b892c06230efb3517f5afabea45 100644
--- a/tensorflow/core/grappler/BUILD
+++ b/tensorflow/core/grappler/BUILD
@@ -159,6 +159,7 @@ tf_cc_test(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
         "//tensorflow/core/grappler/inputs:trivial_test_graph_input_yielder",
     ],
 )
diff --git a/tensorflow/core/grappler/clusters/BUILD b/tensorflow/core/grappler/clusters/BUILD
index e9ddb86a108c3da0e1a052f547bdc9a40ace904f..5b8ce373bcf87a10875e764ba5cdbec96d58c080 100644
--- a/tensorflow/core/grappler/clusters/BUILD
+++ b/tensorflow/core/grappler/clusters/BUILD
@@ -78,6 +78,8 @@ tf_cc_test(
     srcs = ["virtual_cluster_test.cc"],
     deps = [
         ":virtual_cluster",
+        "//tensorflow/cc:cc_ops",
+        "//tensorflow/cc:scope",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
@@ -99,7 +101,9 @@ cc_library(
         "//tensorflow/cc:coordinator",
         "//tensorflow/cc:queue_runner",
         "//tensorflow/core:core_cpu",
+        "//tensorflow/core:core_cpu_lib",
         "//tensorflow/core:direct_session",
+        "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core/grappler:utils",
         "//tensorflow/core/kernels:ops_util",
diff --git a/tensorflow/core/grappler/clusters/cluster.cc b/tensorflow/core/grappler/clusters/cluster.cc
index e2db47b758f588f0a356bde1c9eacc0d5ff7f335..01a618ed7775eee64ce40e283394c09622353157 100644
--- a/tensorflow/core/grappler/clusters/cluster.cc
+++ b/tensorflow/core/grappler/clusters/cluster.cc
@@ -35,6 +35,10 @@ void Cluster::SetNumWarmupSteps(int num_steps) {
       num_steps);
 }
 
+int Cluster::NumWarmupSteps() const {
+  return options_.config.graph_options().build_cost_model_after();
+}
+
 void Cluster::DisableDetailedStats(bool disable) {
   if (disable) {
     options_.config.mutable_graph_options()->set_build_cost_model(0);
diff --git a/tensorflow/core/grappler/clusters/cluster.h b/tensorflow/core/grappler/clusters/cluster.h
index 616ab6ffdcc1e62c4c56f6826a8a5852d51b00d7..5068f72b30d49850ab445318d1f67d0f4e0e618a 100644
--- a/tensorflow/core/grappler/clusters/cluster.h
+++ b/tensorflow/core/grappler/clusters/cluster.h
@@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/protobuf/device_properties.pb.h"
 #include "tensorflow/core/public/session_options.h"
 
@@ -38,6 +39,9 @@ class Cluster {
   explicit Cluster(int timeout_s);
   virtual ~Cluster();
 
+  // Returns a string that represent the type of cluster that was instantiated.
+  virtual string type() const = 0;
+
   // Provision the hardware resources needed to run TensorFlow and start a
   // TensorFlow session that can take advantage of these resources.
   // The actual resources that are leveraged depend on the type of cluster
@@ -64,6 +68,9 @@ class Cluster {
   // before Provision().
   void SetNumWarmupSteps(int num_steps);
 
+  // Returns the number of warmup steps.
+  int NumWarmupSteps() const;
+
   // Disable the collection of detailed statistics. Must be called
   // before Provision().
   void DisableDetailedStats(bool disable);
@@ -85,6 +92,21 @@ class Cluster {
   // sorted alphabetically.
   const std::vector<string> GetDeviceNames() const;
 
+  // Enables collecting the allocator stats. Call with enable=true must be made
+  // before Provision().
+  virtual Status EnablePeakMemoryStats(bool enable) {
+    return errors::Unimplemented(strings ::StrCat(
+        "Peak Memory Stats are not supported on ", type(), " clusters"));
+  }
+
+  // Returns peak memory of all devices during the session creation and session
+  // runs.
+  virtual Status GetPeakMemoryUsage(
+      std::unordered_map<string, uint64>* device_peak_memory) const {
+    return errors::Unimplemented(
+        "GetPeakMemoryUsage is not implemented for this type of cluster.");
+  }
+
   // Prepare the session to run the specified grappler item. This include
   // initializing all the model variables.
   virtual Status Initialize(const GrapplerItem& item) = 0;
diff --git a/tensorflow/core/grappler/clusters/single_machine.cc b/tensorflow/core/grappler/clusters/single_machine.cc
index 1a6fad41828c1cc3eaa0d78d12d984dcf5b59692..2712c5b67910c2d10a13237673cc671222955fbb 100644
--- a/tensorflow/core/grappler/clusters/single_machine.cc
+++ b/tensorflow/core/grappler/clusters/single_machine.cc
@@ -19,6 +19,8 @@ limitations under the License.
 #include <memory>
 
 #include "tensorflow/cc/training/queue_runner.h"
+#include "tensorflow/core/common_runtime/device.h"
+#include "tensorflow/core/common_runtime/device_mgr.h"
 #include "tensorflow/core/grappler/clusters/utils.h"
 #include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/kernels/ops_util.h"
@@ -31,20 +33,13 @@ limitations under the License.
 namespace tensorflow {
 namespace grappler {
 
-static std::atomic<bool> already_created(false);
+static std::atomic<bool> already_provisioned(false);
 
 SingleMachine::SingleMachine(int timeout_s, int num_cpu_cores, int num_gpus)
     : Cluster(timeout_s),
       num_gpus_(num_gpus),
       expected_init_time_s_(0),
       closing_(false) {
-  // This is really ugly: to avoid leaking variables, we need to reset the tf
-  // session every time we're done processing a grappler item. However,
-  // variables are global, and therefore we can't have more than 1 session alive
-  // at a time. This check detects when more that one cluster is created.
-  CHECK(!already_created);
-  already_created = true;
-
   VLOG(1) << "Number of CPU cores: " << num_cpu_cores
           << " Number of GPUs: " << num_gpus;
   thread_pool_.reset(new thread::ThreadPool(
@@ -71,26 +66,42 @@ SingleMachine::~SingleMachine() {
   // Reset the thread-pool so that there are no outstanding Session::Run(...)s
   // when we delete the session.
   thread_pool_.reset();
-
-  CHECK(already_created);
-  already_created = false;
 }
 
 Status SingleMachine::Provision() {
-  Status status = ResetSession();
-  if (!status.ok()) {
-    return status;
+  // This is really ugly: to avoid leaking variables, we need to reset the tf
+  // session every time we're done processing a grappler item. However,
+  // variables are global, and therefore we can't have more than 1 session alive
+  // at a time. This check detects when more that one cluster is provisioned.
+  if (already_provisioned) {
+    return errors::Unavailable(
+        "Can't provision more than one single cluster at a time");
   }
 
-  DeviceProperties attr = GetLocalCPUInfo();
-  devices_["/job:localhost/replica:0/task:0/cpu:0"] = GetLocalCPUInfo();
+  TF_RETURN_IF_ERROR(ResetSession());
+
+  std::vector<DeviceAttributes> devices;
+  TF_RETURN_IF_ERROR(session_->ListDevices(&devices));
+  int gpu_id = 0;
+  for (const auto& dev : devices) {
+    DeviceProperties attr;
+    if (dev.device_type() == "CPU") {
+      attr = GetLocalCPUInfo();
+    } else if (dev.device_type() == "GPU") {
+      attr = GetLocalGPUInfo(gpu_id++);
+    } else {
+      attr.set_type(dev.device_type());
+    }
+    // Overwrite the memory size since users might have requested to use only a
+    // fraction of the available device memory.
+    attr.set_memory_size(dev.memory_limit());
+    devices_[dev.name()] = attr;
+  }
+  already_provisioned = true;
 
-  VLOG(1) << "Number of GPUs: " << num_gpus_;
-  for (int i = 0; i < num_gpus_; ++i) {
-    string device_name =
-        strings::StrCat("/job:localhost/replica:0/task:0/device:GPU:", i);
-    VLOG(1) << "Adding GPU device " << device_name;
-    devices_[device_name] = GetLocalGPUInfo(i);
+  // Clear highmark stats of all local allocators.
+  if (cpu_allocator_stats_enabled_) {
+    TF_RETURN_IF_ERROR(ClearAllocatorStats());
   }
   return Status::OK();
 }
@@ -108,27 +119,12 @@ Status SingleMachine::Initialize(const GrapplerItem& item) {
 }
 
 Status SingleMachine::Shutdown() {
-  TF_RETURN_IF_ERROR(CloseSession(true /*use_timeout*/));
+  TF_RETURN_IF_ERROR(ShutdownSession());
+
+  mutex_lock l(this->last_graph_mu_);
+  last_graph_ = nullptr;
+  already_provisioned = false;
 
-  // Delete the threadpool: this ensures that all the pending closures complete
-  // before we return. Note that if TF deadlocked on us, the closures will
-  // never complete, and the call to thread_pool_.reset() will never return:
-  // therefore we need to delete the threadpool with the background thread.
-  // That thread itself will also never complete, so the user should
-  // abort the process to avoid leaking too many resources.
-  auto n = std::make_shared<Notification>();
-  Env::Default()->SchedClosure([this, n]() {
-    thread_pool_.reset();
-    n->Notify();
-  });
-  int64 timeout_us = 1000000ll * timeout_s_;
-  const bool notified = WaitForNotificationWithTimeout(n.get(), timeout_us);
-  if (!notified) {
-    // Let the caller know that we can't shutdown the session properly since
-    // there are calls to Session::Run() still running.
-    return errors::Unavailable("The session is still running graphs after ",
-                               timeout_s_, " seconds");
-  }
   return Status::OK();
 }
 
@@ -196,6 +192,41 @@ Status SingleMachine::Run(const GraphDef& graph_def,
   return Status::OK();
 }
 
+Status SingleMachine::EnablePeakMemoryStats(bool enable) {
+  EnableCPUAllocatorStats(enable);
+  cpu_allocator_stats_enabled_ = enable;
+  // No need to enable GPU allocator stats since its stats are always collected.
+  return Status::OK();
+}
+
+Status SingleMachine::GetPeakMemoryUsage(
+    std::unordered_map<string, uint64>* device_peak_memory) const {
+  // Cpu_allocator->TracksAllocationSizes() returns true doesn't always mean the
+  // the AllocatorStats would be collected.
+  if (!cpu_allocator_stats_enabled_) {
+    return Status(error::INVALID_ARGUMENT,
+                  "Tracking allocation for CPU is not enabled.");
+  }
+
+  const DeviceMgr* device_mgr;
+  TF_RETURN_IF_ERROR(session_->LocalDeviceManager(&device_mgr));
+  std::vector<Device*> devices = device_mgr->ListDevices();
+
+  device_peak_memory->clear();
+  for (Device* device : devices) {
+    AllocatorStats stats;
+    auto* allocator = device->GetAllocator(AllocatorAttributes());
+    if (!allocator->TracksAllocationSizes()) {
+      return Status(error::INVALID_ARGUMENT,
+                    "Tracking allocation is not enabled.");
+    }
+    allocator->GetStats(&stats);
+    (*device_peak_memory)[device->name()] = stats.max_bytes_in_use;
+  }
+
+  return Status::OK();
+}
+
 Status SingleMachine::RunWithTimeout(
     const std::vector<std::pair<string, Tensor>>& feed,
     const std::vector<string>& fetch, RunMetadata* run_metadata) {
@@ -230,7 +261,7 @@ Status SingleMachine::RunWithTimeout(
 }
 
 Status SingleMachine::CloseSession(bool use_timeout) {
-  if (!session_) {
+  if (!session_ || !thread_pool_) {
     return Status::OK();
   }
 
@@ -274,12 +305,38 @@ Status SingleMachine::CloseSession(bool use_timeout) {
   return Status::OK();
 }
 
+Status SingleMachine::ShutdownSession() {
+  TF_RETURN_IF_ERROR(CloseSession(true /*use_timeout*/));
+
+  // Delete the threadpool: this ensures that all the pending closures complete
+  // before we return. Note that if TF deadlocked on us, the closures will
+  // never complete, and the call to thread_pool_.reset() will never return:
+  // therefore we need to delete the threadpool with the background thread.
+  // That thread itself will also never complete, so the user should
+  // abort the process to avoid leaking too many resources.
+  auto n = std::make_shared<Notification>();
+  Env::Default()->SchedClosure([this, n]() {
+    thread_pool_.reset();
+    n->Notify();
+  });
+  int64 timeout_us = 1000000ll * timeout_s_;
+  const bool notified = WaitForNotificationWithTimeout(n.get(), timeout_us);
+  if (!notified) {
+    // Let the caller know that we can't shutdown the session properly since
+    // there are calls to Session::Run() still running.
+    return errors::Unavailable("The session is still running graphs after ",
+                               timeout_s_, " seconds");
+  }
+
+  return Status::OK();
+}
+
 Status SingleMachine::ResetSession() {
   if (session_) {
     LOG(INFO) << "Cleaning up previous session";
 
     // Make sure the session is properly closed
-    TF_RETURN_IF_ERROR(Shutdown());
+    TF_RETURN_IF_ERROR(ShutdownSession());
 
     // Destroying the object deletes all its variables as well. This is only
     // true for DirectSession.
@@ -332,5 +389,29 @@ void SingleMachine::MergeCosts(CostGraphDef* graph_costs,
   }
 }
 
+Status SingleMachine::ClearAllocatorStats() const {
+  // Cpu_allocator->TracksAllocationSizes() returns true doesn't always mean the
+  // the AllocatorStats would be collected.
+  if (!cpu_allocator_stats_enabled_) {
+    return Status(error::INVALID_ARGUMENT,
+                  "Tracking allocation for CPU is not enabled.");
+  }
+
+  const DeviceMgr* device_mgr;
+  TF_RETURN_IF_ERROR(session_->LocalDeviceManager(&device_mgr));
+  std::vector<Device*> devices = device_mgr->ListDevices();
+
+  for (Device* device : devices) {
+    AllocatorStats stats;
+    auto* allocator = device->GetAllocator(AllocatorAttributes());
+    if (!allocator->TracksAllocationSizes()) {
+      return Status(error::INVALID_ARGUMENT,
+                    "Tracking allocation is not enabled.");
+    }
+    allocator->ClearStats();
+  }
+  return Status::OK();
+}
+
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/core/grappler/clusters/single_machine.h b/tensorflow/core/grappler/clusters/single_machine.h
index d3efbe3c614580d0502874412697cd5719e28be5..a254f72f0c7719e49d4f52e8cc42181a09071801 100644
--- a/tensorflow/core/grappler/clusters/single_machine.h
+++ b/tensorflow/core/grappler/clusters/single_machine.h
@@ -17,6 +17,7 @@ limitations under the License.
 #define TENSORFLOW_GRAPPLER_CLUSTERS_SINGLE_MACHINE_H_
 
 #include "tensorflow/cc/training/coordinator.h"
+#include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/grappler/clusters/cluster.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/platform/mutex.h"
@@ -32,6 +33,8 @@ class SingleMachine : public Cluster {
   SingleMachine(int timeout_s, int num_cpu_cores, int num_gpus);
   ~SingleMachine() override;
 
+  string type() const override { return "single_machine"; }
+
   Status Provision() override;
   Status Shutdown() override;
 
@@ -40,6 +43,12 @@ class SingleMachine : public Cluster {
              const std::vector<std::pair<string, Tensor>>& feed,
              const std::vector<string>& fetch, RunMetadata* metadata) override;
 
+  Status EnablePeakMemoryStats(bool enable) override;
+
+  // It requires EnableAllocatorStats(true) be called before Provision().
+  Status GetPeakMemoryUsage(
+      std::unordered_map<string, uint64>* device_peak_memory) const override;
+
  private:
   Status RunWithTimeout(const std::vector<std::pair<string, Tensor>>& feed,
                         const std::vector<string>& fetch,
@@ -49,9 +58,12 @@ class SingleMachine : public Cluster {
                         RunMetadata* run_metadata, int64 timeout_s);
   Status ResetSession();
   Status CloseSession(bool use_timeout);
+  Status ShutdownSession();
   void MergeCosts(CostGraphDef* graph_costs, const CostGraphDef& init_costs,
                   const CostGraphDef& queue_costs);
 
+  Status ClearAllocatorStats() const;
+
   const int num_gpus_;
   std::unique_ptr<Session> session_;
   std::vector<QueueRunnerDef> queue_runner_defs_;
@@ -67,6 +79,8 @@ class SingleMachine : public Cluster {
 
   mutex close_mu_;
   bool closing_ GUARDED_BY(close_mu_);
+
+  bool cpu_allocator_stats_enabled_ = false;
 };
 
 }  // end namespace grappler
diff --git a/tensorflow/core/grappler/clusters/single_machine_test.cc b/tensorflow/core/grappler/clusters/single_machine_test.cc
index f6c325c2a4bb1877f07fbfd034755ff501344f48..c6352c1448bb38ece78530007e2534d475ef7fb6 100644
--- a/tensorflow/core/grappler/clusters/single_machine_test.cc
+++ b/tensorflow/core/grappler/clusters/single_machine_test.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h"
 #include "tensorflow/core/grappler/utils.h"
+#include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/protobuf/queue_runner.pb.h"
@@ -44,10 +45,14 @@ class SingleMachineTest : public ::testing::Test {
 #endif
     cluster_.reset(
         new SingleMachine(timeout_s, 3 /* num_cpu_cores */, 0 /* num_gpus */));
+    TF_CHECK_OK(cluster_->EnablePeakMemoryStats(true));
     TF_CHECK_OK(cluster_->Provision());
   }
 
   void TearDown() override {
+    if (cluster_) {
+      TF_CHECK_OK(cluster_->Shutdown());
+    }
     cluster_.reset();
   }
 
@@ -55,6 +60,10 @@ class SingleMachineTest : public ::testing::Test {
   std::unique_ptr<SingleMachine> cluster_;
 };
 
+TEST_F(SingleMachineTest, ClusterType) {
+  CHECK_EQ("single_machine", cluster_->type());
+}
+
 TEST_F(SingleMachineTest, CostModel) {
   TrivialTestGraphInputYielder fake_input(4, 1, 10, false,
                                           cluster_->GetDeviceNames());
@@ -178,8 +187,7 @@ TEST_F(SingleMachineTest, GraphOptimizations) {
   // With optimizations turned on, some nodes could have been optimized away,
   // and the cost model could be partial. Restart the cluster with optimizations
   // disabled and make sure we have all the information we're looking for.
-  cluster_.reset();
-  cluster_.reset(new SingleMachine(5, 3, 0));
+  TF_CHECK_OK(cluster_->Shutdown());
   cluster_->DisableOptimizer(true);
   TF_CHECK_OK(cluster_->Provision());
 
@@ -324,7 +332,7 @@ static void RunInfiniteTFLoop() {
 
 TEST_F(SingleMachineTest, InfiniteLoops) {
   // The RunInfiniteTFLoop function creates its own cluster.
-  cluster_.reset();
+  TF_CHECK_OK(cluster_->Shutdown());
 
   EXPECT_EXIT(RunInfiniteTFLoop(), ::testing::ExitedWithCode(0), ".*");
 }
@@ -459,60 +467,18 @@ TEST_F(SingleMachineTest, PersistentMemory) {
       found_hashtable = true;
       // Persistent memory usage should be 0 since it's recorded as part of the
       // initialize_table op.
-      EXPECT_EQ(0, node.host_persistent_memory_size());
-      EXPECT_EQ(0, node.device_persistent_memory_size());
+      EXPECT_EQ(0, node.persistent_memory_size());
     } else if (node.name() == "initialize_table") {
       found_table_init = true;
       // Persistent memory should hold 2 keys and 2 values.
-      EXPECT_LE(4 * sizeof(int64), node.host_persistent_memory_size());
-      EXPECT_EQ(0, node.device_persistent_memory_size());
+      EXPECT_LE(4 * sizeof(int64), node.persistent_memory_size());
     }
   }
   EXPECT_TRUE(found_table_init);
   EXPECT_TRUE(found_hashtable);
 }
 
-#if defined(PLATFORM_GOOGLE)
-namespace {
-
-SessionOptions GetSessionOption(int num_cpu_cores, int num_gpus) {
-  SessionOptions options;
-  // Copied from single_machine.h
-  (*options.config.mutable_device_count())["CPU"] = 1;
-  if (num_gpus > 0) {
-    (*options.config.mutable_device_count())["GPU"] = num_gpus;
-  }
-  CHECK_GE(num_cpu_cores, 1);
-  options.config.set_intra_op_parallelism_threads(num_cpu_cores);
-  options.config.add_session_inter_op_thread_pool()->set_num_threads(
-      num_cpu_cores);
-  return options;
-}
-
-Status GetDeviceMemoryStats(
-    const SessionOptions& session_option,
-    std::unordered_map<string, AllocatorStats>* allocator_stats_by_device) {
-  std::vector<Device*> devices;
-  TF_RETURN_IF_ERROR(DeviceFactory::AddDevices(session_option,
-                                               "" /* name_prefix */, &devices));
-  allocator_stats_by_device->clear();
-  for (Device* device : devices) {
-    AllocatorStats stats;
-    auto* allocator = device->GetAllocator(AllocatorAttributes());
-    if (!allocator->TracksAllocationSizes()) {
-      return Status(error::INVALID_ARGUMENT,
-                    "Tracking allocation is not enabled.");
-    }
-    allocator->GetStats(&stats);
-    (*allocator_stats_by_device)[device->name()] = stats;
-    delete device;
-  }
-  return Status::OK();
-}
-
-}  // namespace
-
-TEST_F(SingleMachineTest, ReleaseMemoryAfterDestruction) {
+GrapplerItem CreateGrapplerItemWithResourceMemory() {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
 
   // Add a variable and initializer.
@@ -559,35 +525,88 @@ TEST_F(SingleMachineTest, ReleaseMemoryAfterDestruction) {
   item.fetch.push_back("var_matmul");
   item.fetch.push_back("dequeue");
 
-  // Run the graph
+  return item;
+}
+
+#if defined(PLATFORM_GOOGLE)
+TEST_F(SingleMachineTest, ReleaseMemoryAfterDestruction) {
+  GrapplerItem item = CreateGrapplerItemWithResourceMemory();
   TF_CHECK_OK(cluster_->Initialize(item));
-  EnableCPUAllocatorStats(true);
 
-  SessionOptions options =
-      GetSessionOption(3 /* cpu cores */, 0 /* num gpus */);
-  std::unordered_map<string, AllocatorStats> device_memory_before;
-  TF_CHECK_OK(GetDeviceMemoryStats(options, &device_memory_before));
-  EXPECT_EQ(device_memory_before.size(), 1);
+  std::unordered_map<string, uint64> device_peak_memory_before;
+  TF_CHECK_OK(cluster_->GetPeakMemoryUsage(&device_peak_memory_before));
+  EXPECT_EQ(device_peak_memory_before.size(), 1);
+  // There might be a bit memory used before session's running anything.
+  EXPECT_LT(device_peak_memory_before.begin()->second, 200);
 
   RunMetadata metadata;
   TF_CHECK_OK(cluster_->Run(item.graph, item.feed, item.fetch, &metadata));
 
   // Check there is memory that is not released.
-  std::unordered_map<string, AllocatorStats> device_memory;
-  TF_CHECK_OK(GetDeviceMemoryStats(options, &device_memory));
-  EXPECT_EQ(device_memory.size(), 1);
-  EXPECT_GT(device_memory.begin()->second.bytes_in_use, 0);
+  std::unordered_map<string, uint64> device_peak_memory;
+  TF_CHECK_OK(cluster_->GetPeakMemoryUsage(&device_peak_memory));
+  EXPECT_EQ(device_peak_memory.size(), 1);
+  EXPECT_GT(device_peak_memory.begin()->second, 0);
 
-  // Reset cluster_ would release all memory.
-  cluster_.reset();
-  std::unordered_map<string, AllocatorStats> device_memory_after;
-  TF_CHECK_OK(GetDeviceMemoryStats(options, &device_memory_after));
+  // Reprovisioning the cluster would release all memory.
+  TF_CHECK_OK(cluster_->Shutdown());
+  TF_CHECK_OK(cluster_->Provision());
+  std::unordered_map<string, uint64> device_peak_memory_after;
+  TF_CHECK_OK(cluster_->GetPeakMemoryUsage(&device_peak_memory_after));
+  TF_CHECK_OK(cluster_->Shutdown());
 
   // Check memory used by resources are released after cluster destruction.
-  EXPECT_EQ(device_memory_before.size(), 1);
-  EXPECT_EQ(device_memory_after.size(), 1);
-  EXPECT_EQ(device_memory_before.begin()->second.bytes_in_use, 0);
-  EXPECT_EQ(device_memory_after.begin()->second.bytes_in_use, 0);
+  EXPECT_EQ(device_peak_memory_before.size(), 1);
+  EXPECT_EQ(device_peak_memory_after.size(), 1);
+  EXPECT_LT(device_peak_memory_before.begin()->second, 200);
+  EXPECT_LT(device_peak_memory_after.begin()->second, 200);
+}
+
+TEST_F(SingleMachineTest, PeakMemory) {
+  GrapplerItem item = CreateGrapplerItemWithResourceMemory();
+  TF_CHECK_OK(cluster_->Initialize(item));
+
+  RunMetadata metadata;
+  TF_CHECK_OK(cluster_->Run(item.graph, item.feed, item.fetch, &metadata));
+
+  std::unordered_map<string, uint64> device_peak_memory;
+  TF_CHECK_OK(cluster_->GetPeakMemoryUsage(&device_peak_memory));
+  ASSERT_NE(
+      device_peak_memory.find("/job:localhost/replica:0/task:0/device:CPU:0"),
+      device_peak_memory.end());
+  uint64 cpu_memory =
+      device_peak_memory["/job:localhost/replica:0/task:0/device:CPU:0"];
+  EXPECT_GT(cpu_memory, 0);
+
+  TF_CHECK_OK(cluster_->Shutdown());
+  TF_CHECK_OK(cluster_->Provision());
+  device_peak_memory.clear();
+  TF_CHECK_OK(cluster_->GetPeakMemoryUsage(&device_peak_memory));
+  TF_CHECK_OK(cluster_->Shutdown());
+  ASSERT_NE(
+      device_peak_memory.find("/job:localhost/replica:0/task:0/device:CPU:0"),
+      device_peak_memory.end());
+  cpu_memory =
+      device_peak_memory["/job:localhost/replica:0/task:0/device:CPU:0"];
+  EXPECT_LT(cpu_memory, 100);
+}
+
+TEST_F(SingleMachineTest, PeakMemoryStatsNotEnabled) {
+  GrapplerItem item = CreateGrapplerItemWithResourceMemory();
+
+  TF_CHECK_OK(cluster_->Shutdown());
+  cluster_.reset();
+  SingleMachine cluster(60 /* timout_s */, 3 /* num_cpu_cores */,
+                        0 /* num_gpus */);
+
+  TF_CHECK_OK(cluster.Provision());
+  TF_CHECK_OK(cluster.Initialize(item));
+
+  std::unordered_map<string, uint64> device_peak_memory;
+  Status s = cluster.GetPeakMemoryUsage(&device_peak_memory);
+  TF_CHECK_OK(cluster.Shutdown());
+  ASSERT_FALSE(s.ok());
+  EXPECT_EQ(s.code(), errors::Code::INVALID_ARGUMENT);
 }
 #endif
 
diff --git a/tensorflow/core/grappler/clusters/virtual_cluster.cc b/tensorflow/core/grappler/clusters/virtual_cluster.cc
index e1f5925f7e56b31babedf0cf274f7bf482883d4c..ae70c9860823dae1a85ba20e00afe15b218cd2b4 100644
--- a/tensorflow/core/grappler/clusters/virtual_cluster.cc
+++ b/tensorflow/core/grappler/clusters/virtual_cluster.cc
@@ -25,14 +25,16 @@ namespace grappler {
 
 VirtualCluster::VirtualCluster(
     const std::unordered_map<string, DeviceProperties>& devices)
-    : Cluster(0), node_estimator_(new OpLevelCostEstimator()) {
+    : Cluster(0),
+      node_estimator_(new OpLevelCostEstimator()),
+      node_manager_(new FirstReadyManager()) {
   devices_ = devices;
 }
 
 VirtualCluster::VirtualCluster(
     const std::unordered_map<string, DeviceProperties>& devices,
-    OpLevelCostEstimator* node_estimator)
-    : Cluster(0), node_estimator_(node_estimator) {
+    OpLevelCostEstimator* node_estimator, ReadyNodeManager* node_manager)
+    : Cluster(0), node_estimator_(node_estimator), node_manager_(node_manager) {
   devices_ = devices;
 }
 VirtualCluster::~VirtualCluster() {}
@@ -54,7 +56,7 @@ Status VirtualCluster::Run(const GraphDef& graph,
   item.graph = graph;
   item.feed = feed;
   item.fetch = fetch;
-  VirtualScheduler scheduler(&item, true, this);
+  VirtualScheduler scheduler(&item, true, this, node_manager_.get());
   TF_RETURN_IF_ERROR(scheduler.Init());
 
   if (metadata) {
@@ -96,6 +98,33 @@ Status VirtualCluster::Run(const GraphDef& graph,
   if (metadata) {
     scheduler.Summary(metadata);
   }
+
+  const std::unordered_map<string, DeviceProperties>& device = GetDevices();
+  std::unordered_map<string, int64> peak_mem_usage =
+      scheduler.GetPeakMemoryUsage();
+  for (const auto& mem_usage : peak_mem_usage) {
+    const string& device_name = mem_usage.first;
+    auto it = device.find(device_name);
+    if (it == device.end()) {
+      // It's probably the fake send/recv device. Eventually we'll need to
+      // remove this fake device to ensure proper memory accounting for
+      // multi-device settings.
+      continue;
+    }
+    const DeviceProperties& dev = it->second;
+    if (dev.memory_size() <= 0) {
+      // Available device memory unknown
+      continue;
+    }
+    int64 peak_mem = mem_usage.second;
+    if (peak_mem >= dev.memory_size()) {
+      return errors::ResourceExhausted(
+          "Graph requires ", peak_mem, " bytes of memory on device ",
+          device_name, " to run ", " but device only has ", dev.memory_size(),
+          " available.");
+    }
+  }
+
   return Status::OK();
 }
 
diff --git a/tensorflow/core/grappler/clusters/virtual_cluster.h b/tensorflow/core/grappler/clusters/virtual_cluster.h
index a74911cb23a3fcdb8f41de624c4e5c9a01602577..dde70bab7a391e7573560b3202e9f0f7a0d69cae 100644
--- a/tensorflow/core/grappler/clusters/virtual_cluster.h
+++ b/tensorflow/core/grappler/clusters/virtual_cluster.h
@@ -19,6 +19,7 @@ limitations under the License.
 #include <unordered_map>
 #include "tensorflow/core/grappler/clusters/cluster.h"
 #include "tensorflow/core/grappler/costs/op_level_cost_estimator.h"
+#include "tensorflow/core/grappler/costs/virtual_scheduler.h"
 #include "tensorflow/core/protobuf/device_properties.pb.h"
 
 namespace tensorflow {
@@ -31,10 +32,13 @@ class VirtualCluster : public Cluster {
  public:
   VirtualCluster(const std::unordered_map<string, DeviceProperties>& devices);
   VirtualCluster(const std::unordered_map<string, DeviceProperties>& devices,
-                 OpLevelCostEstimator* node_estimator);
+                 OpLevelCostEstimator* node_estimator,
+                 ReadyNodeManager* node_manager);
 
   ~VirtualCluster() override;
 
+  string type() const override { return "virtual"; }
+
   Status Provision() override;
   Status Initialize(const GrapplerItem& item) override;
   Status Run(const GraphDef& item,
@@ -43,6 +47,7 @@ class VirtualCluster : public Cluster {
 
  private:
   std::unique_ptr<OpLevelCostEstimator> node_estimator_;
+  std::unique_ptr<ReadyNodeManager> node_manager_;
 };
 
 }  // end namespace grappler
diff --git a/tensorflow/core/grappler/clusters/virtual_cluster_test.cc b/tensorflow/core/grappler/clusters/virtual_cluster_test.cc
index ec21f5f4260d86129b63158d0d389052a8d7e82f..357b306b93be936bc81c818ca0f2ecbefdeb351a 100644
--- a/tensorflow/core/grappler/clusters/virtual_cluster_test.cc
+++ b/tensorflow/core/grappler/clusters/virtual_cluster_test.cc
@@ -14,11 +14,14 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/clusters/virtual_cluster.h"
+#include "tensorflow/cc/framework/scope.h"
+#include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/core/framework/cost_graph.pb.h"
 #include "tensorflow/core/framework/step_stats.pb.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h"
+#include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
@@ -37,18 +40,26 @@ class VirtualClusterTest : public ::testing::Test {
     cpu_device.set_l1_cache_size(32 * 1024);
     cpu_device.set_l2_cache_size(256 * 1024);
     cpu_device.set_l3_cache_size(4 * 1024 * 1024);
+    cpu_device.set_memory_size(1024 * 1024);
     std::unordered_map<string, DeviceProperties> devices;
     devices["/job:localhost/replica:0/task:0/cpu:0"] = cpu_device;
     cluster_.reset(new VirtualCluster(devices));
     TF_CHECK_OK(cluster_->Provision());
   }
 
-  void TearDown() override { cluster_.reset(); }
+  void TearDown() override {
+    TF_CHECK_OK(cluster_->Shutdown());
+    cluster_.reset();
+  }
 
  protected:
   std::unique_ptr<VirtualCluster> cluster_;
 };
 
+TEST_F(VirtualClusterTest, ClusterType) {
+  CHECK_EQ("virtual", cluster_->type());
+}
+
 TEST_F(VirtualClusterTest, CostModel) {
   TrivialTestGraphInputYielder fake_input(4, 1, 10, false,
                                           cluster_->GetDeviceNames());
@@ -91,6 +102,21 @@ TEST_F(VirtualClusterTest, CostModel) {
   }
 }
 
+TEST_F(VirtualClusterTest, OutOfMemory) {
+  tensorflow::Scope root = tensorflow::Scope::NewRootScope();
+  // Create a large variable that can't fit in memory.
+  auto zero = ops::Variable(root.WithOpName("zero"), {1024, 1024}, DT_FLOAT);
+  auto identity = ops::Identity(root.WithOpName("i"), zero);
+  auto identity2 = ops::Identity(root.WithOpName("i2"), identity);
+  GrapplerItem item;
+  TF_CHECK_OK(root.ToGraphDef(&item.graph));
+  item.fetch.push_back("i2");
+
+  TF_CHECK_OK(cluster_->Initialize(item));
+  Status s = cluster_->Run(item.graph, item.feed, item.fetch, nullptr);
+  EXPECT_EQ(error::RESOURCE_EXHAUSTED, s.code());
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD
index f02cb51038a1d34475d9c13b0ca14b7137c41f35..7abc155c19db06db81a62672f3f9f333272d5a3f 100644
--- a/tensorflow/core/grappler/costs/BUILD
+++ b/tensorflow/core/grappler/costs/BUILD
@@ -16,7 +16,10 @@ filegroup(
 
 filegroup(
     name = "graph_properties_testdata",
-    srcs = glob(["graph_properties_testdata/*.pbtxt"]),
+    srcs = glob([
+        "graph_properties_testdata/*.pbtxt",
+        "graph_properties_testdata/*.pbtxt.html",
+    ]),
     visibility = ["//visibility:public"],
 )
 
@@ -50,6 +53,7 @@ cc_library(
         "//tensorflow/core:framework",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/grappler:grappler_item",
+        "//tensorflow/core/grappler:utils",
         "//tensorflow/core/grappler/clusters:cluster",
     ],
 )
@@ -132,8 +136,8 @@ tf_cuda_library(
     visibility = ["//visibility:public"],
     deps = [
         ":op_performance_data_cc",
-        "//tensorflow/core:core_cpu_base",
         "//tensorflow/core:framework",
+        "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_proto_parsing",
         "//tensorflow/core:protos_all_cc",
@@ -306,6 +310,7 @@ cc_library(
         ":virtual_placer",
         ":virtual_scheduler",
         "//tensorflow/core:core_cpu_base",
+        "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/grappler:grappler_item",
diff --git a/tensorflow/core/grappler/costs/analytical_cost_estimator.cc b/tensorflow/core/grappler/costs/analytical_cost_estimator.cc
index ca66f7c75a5ad7eb6970004da3c9a2f92c85479d..c8ba4dfbdadf50eab22ee2f4af898fe949572c66 100644
--- a/tensorflow/core/grappler/costs/analytical_cost_estimator.cc
+++ b/tensorflow/core/grappler/costs/analytical_cost_estimator.cc
@@ -34,13 +34,15 @@ AnalyticalCostEstimator::AnalyticalCostEstimator(Cluster* cluster,
                                                  bool use_static_shapes)
     : cluster_(cluster),
       node_estimator_(new OpLevelCostEstimator()),
+      node_manager_(VirtualScheduler::ReadyNodeManagerFactory("FirstReady")),
       use_static_shapes_(use_static_shapes) {}
 
 AnalyticalCostEstimator::AnalyticalCostEstimator(
     Cluster* cluster, OpLevelCostEstimator* node_estimator,
-    bool use_static_shapes)
+    ReadyNodeManager* node_manager, bool use_static_shapes)
     : cluster_(cluster),
       node_estimator_(node_estimator),
+      node_manager_(node_manager),
       use_static_shapes_(use_static_shapes) {}
 
 Status AnalyticalCostEstimator::Initialize(const GrapplerItem& item) {
@@ -61,7 +63,9 @@ Status AnalyticalCostEstimator::PredictCosts(const GraphDef& optimized_graph,
     }
   }
   std::vector<string> inaccurate_nodes;
-  VirtualScheduler scheduler(&item, use_static_shapes_, cluster_);
+  int nodes_executed = 0;
+  VirtualScheduler scheduler(&item, use_static_shapes_, cluster_,
+                             node_manager_.get());
   auto status = scheduler.Init();
   if (!status.ok()) {
     costs->execution_time = Costs::Duration::max();
@@ -70,6 +74,7 @@ Status AnalyticalCostEstimator::PredictCosts(const GraphDef& optimized_graph,
 
   Costs node_costs;
   do {
+    ++nodes_executed;
     OpContext op_context = scheduler.GetCurrNode();
     const string& op_name = op_context.name;
 
@@ -104,8 +109,7 @@ Status AnalyticalCostEstimator::PredictCosts(const GraphDef& optimized_graph,
 
   RunMetadata run_metadata;
   *costs = scheduler.Summary(&run_metadata);
-  VLOG(1) << inaccurate_nodes.size() << " out of "
-          << optimized_graph.node_size()
+  VLOG(1) << inaccurate_nodes.size() << " out of " << nodes_executed
           << " nodes have inaccurate time estimation";
   if (VLOG_IS_ON(3)) {
     for (const auto& node : inaccurate_nodes) {
diff --git a/tensorflow/core/grappler/costs/analytical_cost_estimator.h b/tensorflow/core/grappler/costs/analytical_cost_estimator.h
index cf9163302c6740e16bbd8675ddebb23a365494ea..dd2738e088023ae387f269152c3ad9d33bcfd645 100644
--- a/tensorflow/core/grappler/costs/analytical_cost_estimator.h
+++ b/tensorflow/core/grappler/costs/analytical_cost_estimator.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include "tensorflow/core/grappler/costs/cost_estimator.h"
 #include "tensorflow/core/grappler/costs/op_level_cost_estimator.h"
+#include "tensorflow/core/grappler/costs/virtual_scheduler.h"
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/lib/core/status.h"
 
@@ -39,9 +40,10 @@ class AnalyticalCostEstimator : public CostEstimator {
   // Does not take ownership of cluster.
   AnalyticalCostEstimator(Cluster* cluster, bool use_static_shapes);
   // Does not take ownership of the cluster, but takes ownership of the
-  // node_estimator
+  // node_estimator and the node_manager
   AnalyticalCostEstimator(Cluster* cluster,
                           OpLevelCostEstimator* node_estimator,
+                          ReadyNodeManager* node_manager,
                           bool use_static_shapes);
   ~AnalyticalCostEstimator() override {}
 
@@ -59,6 +61,7 @@ class AnalyticalCostEstimator : public CostEstimator {
   Cluster* cluster_;  // Not owned.
   GrapplerItem item_;
   std::unique_ptr<OpLevelCostEstimator> node_estimator_;
+  std::unique_ptr<ReadyNodeManager> node_manager_;
   bool use_static_shapes_;
 };
 
diff --git a/tensorflow/core/grappler/costs/analytical_cost_estimator_test.cc b/tensorflow/core/grappler/costs/analytical_cost_estimator_test.cc
index d1f3e36aa8164c4a80537b8affc324503af5488b..1c2c1713834a11d0a7c85247e9a7e4cdf779c592 100644
--- a/tensorflow/core/grappler/costs/analytical_cost_estimator_test.cc
+++ b/tensorflow/core/grappler/costs/analytical_cost_estimator_test.cc
@@ -102,8 +102,14 @@ TEST_F(AnalyticalCostEstimatorTest, SimpleTest) {
   Costs summary;
   TF_ASSERT_OK(estimator.PredictCosts(item.graph, &cost_graph, &summary));
 
-  EXPECT_EQ(Costs::NanoSeconds(9156), summary.execution_time);
-  EXPECT_FALSE(summary.inaccurate);
+  EXPECT_EQ(Costs::NanoSeconds(9150), summary.execution_time);
+
+  // Make this estimate accurate:
+  // TODO(http://b/70031255): Accurate estimator for RandomUniform op needed
+  // TODO(http://b/70031363): Accurate estimator for Softmax needed
+  //
+  // Change to EXPECT_FALSE when the above TODOs are done:
+  EXPECT_TRUE(summary.inaccurate);
 }
 
 }  // end namespace grappler
diff --git a/tensorflow/core/grappler/costs/cost_estimator.h b/tensorflow/core/grappler/costs/cost_estimator.h
index cf9fa4fdaf947cba8c38d6eb3ca67d3a43f35d29..852e69737baa14e0d05de1fdcb6fc24a143f6a2d 100644
--- a/tensorflow/core/grappler/costs/cost_estimator.h
+++ b/tensorflow/core/grappler/costs/cost_estimator.h
@@ -40,6 +40,16 @@ struct Costs {
   // Builds a Costs structure with all zero values, rather than unknowns.
   static inline Costs ZeroCosts();
 
+  struct MilliSeconds : std::chrono::milliseconds {
+    MilliSeconds() : std::chrono::milliseconds(0) {}
+    MilliSeconds(double d) : std::chrono::milliseconds(static_cast<int64>(d)) {}
+    MilliSeconds(const std::chrono::milliseconds& d)
+        : std::chrono::milliseconds(d) {}
+    MilliSeconds& operator=(const std::chrono::milliseconds& d) {
+      std::chrono::milliseconds::operator=(d);
+      return *this;
+    }
+  };
   struct MicroSeconds : std::chrono::microseconds {
     MicroSeconds() : std::chrono::microseconds(0) {}
     MicroSeconds(double d) : std::chrono::microseconds(static_cast<int64>(d)) {}
@@ -49,6 +59,9 @@ struct Costs {
       std::chrono::microseconds::operator=(d);
       return *this;
     }
+    MilliSeconds asMilliSeconds() const {
+      return std::chrono::duration_cast<std::chrono::milliseconds>(*this);
+    }
   };
   struct NanoSeconds : std::chrono::nanoseconds {
     NanoSeconds() : std::chrono::nanoseconds(0) {}
@@ -60,9 +73,10 @@ struct Costs {
       return *this;
     }
     MicroSeconds asMicroSeconds() const {
-      std::chrono::microseconds us =
-          std::chrono::duration_cast<std::chrono::microseconds>(*this);
-      return MicroSeconds(us);
+      return std::chrono::duration_cast<std::chrono::microseconds>(*this);
+    }
+    MilliSeconds asMilliSeconds() const {
+      return std::chrono::duration_cast<std::chrono::milliseconds>(*this);
     }
   };
 
@@ -100,6 +114,10 @@ struct Costs {
   std::unordered_map<string, uint64> estimated_max_memory_per_device;
 };
 
+inline std::ostream& operator<<(std::ostream& os, const Costs::MilliSeconds d) {
+  os << d.count() << "ms";
+  return os;
+}
 inline std::ostream& operator<<(std::ostream& os, const Costs::MicroSeconds d) {
   os << d.count() << "us";
   return os;
diff --git a/tensorflow/core/grappler/costs/graph_memory.cc b/tensorflow/core/grappler/costs/graph_memory.cc
index 6022c47e8f689c6d9f262caae0c5e86f4cf6fb82..3604de392f803b8b2eb65e796848c2c3ec6a90e5 100644
--- a/tensorflow/core/grappler/costs/graph_memory.cc
+++ b/tensorflow/core/grappler/costs/graph_memory.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/costs/graph_memory.h"
 #include <list>
 #include "tensorflow/core/framework/allocation_description.pb.h"
+#include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/step_stats.pb.h"
 #include "tensorflow/core/framework/tensor_description.pb.h"
@@ -32,7 +33,17 @@ Status GraphMemory::InferStatically(
     const std::unordered_map<string, DeviceProperties>& devices) {
   VirtualCluster cluster(devices);
   TF_RETURN_IF_ERROR(cluster.Provision());
-  return InferDynamically(&cluster);
+  TF_RETURN_IF_ERROR(cluster.Initialize(item_));
+  RunMetadata metadata;
+  Status s = cluster.Run(item_.graph, item_.feed, item_.fetch, &metadata);
+  // The virtual cluster returns the RESOURCE_EXHAUSTED error when it detects
+  // that the model would run out of memory. We still get the metadata we need
+  // out of the simulation, so we just ignore this error.
+  if (!s.ok() && s.code() != error::RESOURCE_EXHAUSTED) {
+    return s;
+  }
+  InferFromTrace(metadata.step_stats());
+  return Status::OK();
 }
 
 Status GraphMemory::InferDynamically(Cluster* cluster) {
@@ -153,6 +164,8 @@ void GraphMemory::InferFromTrace(const StepStats& timeline) {
 
   NodeMap node_map(&item_.graph);
   for (const auto& dev_stats : timeline.dev_stats()) {
+    const string& device_name = dev_stats.device();
+    const bool is_gpu = (device_name.find("GPU:") || device_name.find("gpu:"));
     std::list<LiveTensor>& device_tensors =
         live_tensors_per_device[dev_stats.device()];
     for (const auto& node_stats : dev_stats.node_stats()) {
@@ -184,7 +197,24 @@ void GraphMemory::InferFromTrace(const StepStats& timeline) {
         // graph (e.g _Send/_Recv nodes).
         continue;
       }
-      for (const string& input : node->input()) {
+      std::unordered_set<int> swapped_inputs;
+      if (is_gpu) {
+        auto it = node->attr().find("_swap_to_host");
+        if (it != node->attr().end()) {
+          const AttrValue& val = it->second;
+          for (int port_id : val.list().i()) {
+            swapped_inputs.insert(port_id);
+          }
+        }
+      }
+      for (int i = 0; i < node->input_size(); ++i) {
+        if (swapped_inputs.find(i) != swapped_inputs.end()) {
+          // The memory of swapped inputs will be released as early as possible:
+          // therefore ignore this input when determining the deallocation time
+          // of the tensor.
+          continue;
+        }
+        const string& input = node->input(i);
         int position;
         string input_node = ParseNodeName(input, &position);
         if (position < 0) {
diff --git a/tensorflow/core/grappler/costs/graph_memory_test.cc b/tensorflow/core/grappler/costs/graph_memory_test.cc
index 6f3522b068bdb74eb98d3e6071d4d4b2e21c9ff6..95170ba49b77ef1be629cfa77bc4a333d2315e4f 100644
--- a/tensorflow/core/grappler/costs/graph_memory_test.cc
+++ b/tensorflow/core/grappler/costs/graph_memory_test.cc
@@ -134,6 +134,62 @@ TEST_F(GraphMemoryTest, MultiDevice) {
   EXPECT_EQ(gpu_expected, gpu_tensors);
 }
 
+TEST_F(GraphMemoryTest, GpuSwapping) {
+  TrivialTestGraphInputYielder fake_input(4, 2, 1024 * 1024, false, {"/GPU:0"});
+  GrapplerItem item;
+  CHECK(fake_input.NextItem(&item));
+  item.feed.clear();
+
+  {
+    // Estimate the max memory usage for the graph.
+    GraphMemory memory(item);
+    Status s = memory.InferStatically(devices_);
+    TF_CHECK_OK(s);
+
+    const GraphMemory::MemoryUsage& gpu_mem =
+        memory.GetPeakMemoryUsage("/GPU:0");
+    EXPECT_EQ(20971520, gpu_mem.used_memory);
+    std::set<string> gpu_tensors;
+    for (const auto& t : gpu_mem.live_tensors) {
+      gpu_tensors.insert(strings::StrCat(t.node, ":", t.output_id));
+    }
+    std::set<string> gpu_expected;
+    gpu_expected.insert("Square:0");
+    gpu_expected.insert("Square_1:0");
+    gpu_expected.insert("AddN:0");
+    gpu_expected.insert("AddN_1:0");
+    gpu_expected.insert("AddN_2:0");
+    EXPECT_EQ(gpu_expected, gpu_tensors);
+  }
+
+  {
+    // Swap the first input to node AddN_1: its fanin (the square nodes) should
+    // not appear in the max cut anymore.
+    for (auto& node : *item.graph.mutable_node()) {
+      if (node.name() == "AddN_1") {
+        (*node.mutable_attr())["_swap_to_host"].mutable_list()->add_i(0);
+      }
+    }
+    GraphMemory memory(item);
+    Status s = memory.InferStatically(devices_);
+    TF_CHECK_OK(s);
+    const GraphMemory::MemoryUsage& new_gpu_mem =
+        memory.GetPeakMemoryUsage("/GPU:0");
+    EXPECT_EQ(20971520, new_gpu_mem.used_memory);
+    std::set<string> new_gpu_tensors;
+    for (const auto& t : new_gpu_mem.live_tensors) {
+      new_gpu_tensors.insert(strings::StrCat(t.node, ":", t.output_id));
+    }
+    std::set<string> new_gpu_expected;
+    new_gpu_expected.insert("AddN:0");
+    new_gpu_expected.insert("AddN_1:0");
+    new_gpu_expected.insert("AddN_2:0");
+    new_gpu_expected.insert("AddN_3:0");
+    new_gpu_expected.insert("AddN_4:0");
+    EXPECT_EQ(new_gpu_expected, new_gpu_tensors);
+  }
+}
+
 TEST_F(GraphMemoryTest, CtrlDependencies) {
   // Build a simple graph with a control dependency.
   Scope s = Scope::NewRootScope();
diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc
index dd389de636088f11da92fcd33ec13c305404ffb8..243ca9121c70d91631b474da62281bc56a476d8a 100644
--- a/tensorflow/core/grappler/costs/graph_properties.cc
+++ b/tensorflow/core/grappler/costs/graph_properties.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/graph/graph_constructor.h"
 #include "tensorflow/core/grappler/costs/utils.h"
+#include "tensorflow/core/grappler/utils.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -264,6 +265,87 @@ bool IsEnterWithQueue(const Node& node) {
   return false;
 }
 
+bool HasAnyUnknownDimensions(const TensorShapeProto& proto) {
+  if (proto.unknown_rank()) {
+    return true;
+  }
+  for (const auto& dim : proto.dim()) {
+    if (dim.size() < 0) {
+      return true;
+    }
+  }
+  return false;
+}
+
+void VerboseLogUnknownDimensionSources(
+    const Graph& graph,
+    const std::map<string, std::vector<OpInfo::TensorProperties>>&
+        input_properties_map,
+    const std::map<string, std::vector<OpInfo::TensorProperties>>&
+        output_properties_map) {
+  if (!VLOG_IS_ON(2)) {
+    return;
+  }
+
+  VLOG(2) << "Nodes with known inputs, but with unknown output dimensions:";
+
+  // Find all nodes in the graph for which we
+  // do not have any unknown dimensions in their inputs, but
+  // we have some unknown dimensions in their outputs.
+  std::map<string, int> op_to_count;
+  for (const Node* const node : graph.nodes()) {
+    if (node->num_outputs() == 0) {
+      continue;
+    }
+
+    const auto& input_properties = input_properties_map.at(node->name());
+    const auto& output_properties = output_properties_map.at(node->name());
+
+    bool has_unknown_inputs = false;
+    for (int i = 0; i < node->num_inputs(); ++i) {
+      if (HasAnyUnknownDimensions(input_properties[i].shape())) {
+        has_unknown_inputs = true;
+        break;
+      }
+    }
+
+    if (has_unknown_inputs) {
+      continue;
+    }
+
+    for (int i = 0; i < node->num_outputs(); ++i) {
+      if (HasAnyUnknownDimensions(output_properties[i].shape())) {
+        string inputs = "input_shapes=[";
+        for (int i = 0; i < node->num_inputs(); ++i) {
+          inputs +=
+              PartialTensorShape::DebugString(input_properties[i].shape());
+        }
+        inputs += "]";
+
+        string outputs = "output_shapes=[";
+        for (int i = 0; i < node->num_outputs(); ++i) {
+          outputs +=
+              PartialTensorShape::DebugString(output_properties[i].shape());
+        }
+        outputs += "]";
+
+        VLOG(2) << "Node: " << node->name() << ", Op: " << node->def().op()
+                << ", " << inputs << ", " << outputs;
+
+        op_to_count[node->def().op()]++;
+
+        // don't log again for this node
+        break;
+      }
+    }
+  }
+  VLOG(2) << "Op types with known inputs, but with unknown output dimensions "
+          << "(format: <op_type> (<count>)):";
+  for (const auto& p : op_to_count) {
+    VLOG(2) << p.first << " (" << p.second << ")";
+  }
+}
+
 }  // namespace
 
 // Queue of nodes to process. Nodes can be enqueued in any order, but will be
@@ -290,7 +372,7 @@ class TopoQueue {
   // use their id to ensure they're sorted topologically.
   struct CompareNodes {
     bool operator()(const Node* lhs, const Node* rhs) const {
-      return lhs->id() > rhs->id();
+      return lhs->id() < rhs->id();
     }
   };
   std::set<const Node*, CompareNodes> queue_;
@@ -312,9 +394,15 @@ class SymbolicShapeRefiner {
   Status UpdateNode(const Node* node, bool relax, bool* refined) {
     return shape_refiner_->UpdateNode(node, relax, refined);
   }
-  Status SetShape(const Node* node, int output_port,
-                  shape_inference::ShapeHandle shape) {
-    return shape_refiner_->SetShape(node, output_port, shape);
+  Status SetUnknownShape(const Node* node, int output_port) {
+    shape_inference::ShapeHandle shape =
+        GetUnknownOutputShape(node, output_port);
+    InferenceContext* ctx = GetContext(node);
+    if (ctx == nullptr) {
+      return errors::InvalidArgument("Missing context");
+    }
+    ctx->set_output(output_port, shape);
+    return Status::OK();
   }
 
   struct ShapeId {
@@ -605,6 +693,10 @@ Status GraphProperties::UpdateMergeNode(SymbolicShapeRefiner* shape_refiner,
   InferenceContext* c = shape_refiner->GetContext(node);
   CHECK_NE(c, nullptr);
 
+  ShapeHandle out1;
+  TF_RETURN_IF_ERROR(c->WithRank(c->output(1), 0, &out1));
+  c->set_output(1, out1);
+
   ShapeHandle out;
   bool out_initialized = false;
   for (const Edge* e : node->in_edges()) {
@@ -639,13 +731,29 @@ Status GraphProperties::UpdateMergeNode(SymbolicShapeRefiner* shape_refiner,
 
   if (!shape_refiner->EquivalentShapes(out, c->output(0))) {
     c->set_output(0, out);
-    c->set_output(1, c->Scalar());
     new_shapes->push(node);
   }
 
   return Status::OK();
 }
 
+Status GraphProperties::OverwriteFedPorts(
+    SymbolicShapeRefiner* shape_refiner,
+    const std::unordered_map<string, std::unordered_set<int>>& fed_ports,
+    const Node* node, TopoQueue* new_shapes) const {
+  auto it = fed_ports.find(node->name());
+  Status status;
+  if (it != fed_ports.end()) {
+    // It is possible to feed node output ports with tensors of any shape: as a
+    // result, the shape of a fed port is completely unknown.
+    for (const int output_port : it->second) {
+      status.Update(shape_refiner->SetUnknownShape(node, output_port));
+    }
+    new_shapes->push(node);
+  }
+  return status;
+}
+
 // Manually propagate the input shape for Enter nodes and update any Merge node
 // outputs.
 Status GraphProperties::UpdateEnter(SymbolicShapeRefiner* shape_refiner,
@@ -673,9 +781,10 @@ Status GraphProperties::UpdateEnter(SymbolicShapeRefiner* shape_refiner,
   return Status::OK();
 }
 
-Status GraphProperties::UpdateShapes(SymbolicShapeRefiner* shape_refiner,
-                                     bool relax, const Node* n,
-                                     TopoQueue* new_shapes) {
+Status GraphProperties::UpdateShapes(
+    SymbolicShapeRefiner* shape_refiner, bool relax,
+    const std::unordered_map<string, std::unordered_set<int>>& fed_ports,
+    const Node* n, TopoQueue* new_shapes) const {
   if (n->IsEnter()) {
     // The Enter shape function always forwards an UnknownShape, so do the right
     // thing here.
@@ -695,7 +804,9 @@ Status GraphProperties::UpdateShapes(SymbolicShapeRefiner* shape_refiner,
       }
     }
   }
-  return Status::OK();
+  // Nodes can be fed with any shape. The TensorFlow shape inference code can't
+  // handle this properly, so overwrite its behavior here.
+  return OverwriteFedPorts(shape_refiner, fed_ports, n, new_shapes);
 }
 
 // Propagates the shapes in the transitive fan-out of <new_shapes>.
@@ -703,6 +814,7 @@ Status GraphProperties::PropagateShapes(
     SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes,
     const std::unordered_map<const Node*, std::unordered_set<const Node*>>&
         resources,
+    const std::unordered_map<string, std::unordered_set<int>>& fed_ports,
     int num_loops) const {
   // Limit the number of iterations to prevent infinite loops in the presence of
   // incorrect shape functions. The algoritm should converge in at most
@@ -728,8 +840,8 @@ Status GraphProperties::PropagateShapes(
       for (const Edge* e : n->out_edges()) {
         if (!e->IsControlEdge()) {
           const Node* fanout = e->dst();
-          TF_RETURN_IF_ERROR(
-              UpdateShapes(shape_refiner, relax, fanout, new_shapes));
+          TF_RETURN_IF_ERROR(UpdateShapes(shape_refiner, relax, fed_ports,
+                                          fanout, new_shapes));
         }
       }
     }
@@ -744,6 +856,10 @@ Status GraphProperties::PropagateShapes(
   } while (!new_shapes->empty() &&
            num_resource_iterations++ < max_resource_iterations);
 
+  if (!new_shapes->empty()) {
+    return errors::Internal("Shape inference failed to converge");
+  }
+
   return Status::OK();
 }
 
@@ -803,7 +919,7 @@ Status GraphProperties::UpdateResource(
   return Status::OK();
 }
 
-Status GraphProperties::InferStatically() {
+Status GraphProperties::InferStatically(bool assume_valid_feeds) {
   Graph graph(OpRegistry::Global());
   FunctionLibraryDefinition function_library(graph.op_registry(),
                                              item_.graph.library());
@@ -820,11 +936,21 @@ Status GraphProperties::InferStatically() {
   Status s = ImportGraphDef(options, item_.graph, &graph, &shape_refiner);
   TF_RETURN_IF_ERROR(s);
 
+  std::unordered_map<string, std::unordered_set<int>> fed_ports;
+  if (!assume_valid_feeds) {
+    for (const auto& feed : item_.feed) {
+      int port_index = 0;
+      string node_name = ParseNodeName(feed.first, &port_index);
+      fed_ports[node_name].insert(port_index);
+    }
+  }
+
   // List the resources and the nodes using them. Also collect the Enter and
   // Merge nodes.
   std::unordered_map<const Node*, std::unordered_set<const Node*>> resources;
   std::unordered_set<const Node*> enter_nodes;
   std::unordered_set<const Node*> merge_nodes;
+  std::unordered_set<const Node*> fed_nodes;
   int num_loops = 0;
   for (const Node* const node : graph.nodes()) {
     for (int i = 0; i < node->num_inputs(); ++i) {
@@ -841,6 +967,9 @@ Status GraphProperties::InferStatically() {
     } else if (node->IsNextIteration()) {
       ++num_loops;
     }
+    if (fed_ports.find(node->name()) != fed_ports.end()) {
+      fed_nodes.insert(node);
+    }
   }
 
   SymbolicShapeRefiner refiner(&shape_refiner);
@@ -855,15 +984,22 @@ Status GraphProperties::InferStatically() {
     // Force the propagation of shapes of Enter nodes manually (the Enter shape
     // function always forwards an UnknownShape).
     for (const Node* node : enter_nodes) {
-      TF_RETURN_IF_ERROR(UpdateShapes(&refiner, relax, node, &new_shapes));
+      TF_RETURN_IF_ERROR(
+          UpdateShapes(&refiner, relax, fed_ports, node, &new_shapes));
     }
     // Seed the propagation of shapes through merge nodes.
     for (const Node* node : merge_nodes) {
-      TF_RETURN_IF_ERROR(UpdateShapes(&refiner, relax, node, &new_shapes));
+      TF_RETURN_IF_ERROR(
+          UpdateShapes(&refiner, relax, fed_ports, node, &new_shapes));
+    }
+    // Also seed the propagation of shapes in the fanout of fed nodes.
+    for (const Node* node : fed_nodes) {
+      TF_RETURN_IF_ERROR(
+          OverwriteFedPorts(&refiner, fed_ports, node, &new_shapes));
     }
     // Propagate shapes normally.
-    TF_RETURN_IF_ERROR(
-        PropagateShapes(&refiner, relax, &new_shapes, resources, num_loops));
+    TF_RETURN_IF_ERROR(PropagateShapes(&refiner, relax, &new_shapes, resources,
+                                       fed_ports, num_loops));
   }
 
   // Track shapes globally across the graph.
@@ -874,6 +1010,10 @@ Status GraphProperties::InferStatically() {
     if (!node_ctx) {
       continue;
     }
+    // Skip any information that comes from fed nodes.
+    if (fed_ports.find(node->name()) != fed_ports.end()) {
+      continue;
+    }
     for (const auto& merged_shapes : node_ctx->MergedShapes()) {
       if (!shape_manager.Merge(merged_shapes.first, merged_shapes.second)
                .ok()) {
@@ -896,7 +1036,7 @@ Status GraphProperties::InferStatically() {
   }
 
   for (const Node* const node : graph.nodes()) {
-    VLOG(1) << "<Node> " << node->name();
+    VLOG(3) << "Filling in graph properties for node: " << node->name();
     auto ctx = shape_refiner.GetContext(node);
     if (!ctx) {
       continue;
@@ -948,6 +1088,10 @@ Status GraphProperties::InferStatically() {
     }
   }
 
+  // Help trace the unknown dimensions to their origins.
+  VerboseLogUnknownDimensionSources(graph, input_properties_,
+                                    output_properties_);
+
   return Status::OK();
 }
 
diff --git a/tensorflow/core/grappler/costs/graph_properties.h b/tensorflow/core/grappler/costs/graph_properties.h
index 95bc5044d0a64d72daccba9a8377ffb73147e649..6fc53a7f2e7da7bae7b6f49c7b32291c981fef53 100644
--- a/tensorflow/core/grappler/costs/graph_properties.h
+++ b/tensorflow/core/grappler/costs/graph_properties.h
@@ -34,12 +34,19 @@ class TopoQueue;
 // nodes, and potentially a set of nodes to feed.
 class GraphProperties {
  public:
-  // Factory method for creating a GrapplerShapes from a MetaGraphDef.
-  // Returns nullptr if the given meta_graph cannot be converted.
   explicit GraphProperties(const GrapplerItem& item) : item_(item) {}
 
-  Status InferStatically();
+  // Infer the shapes through abstract interpretation. Feed information can be
+  // incorrect so it should be discarded to ensure correctness of the analysis.
+  // However, it can help infer shapes in the fanout of fed nodes (even though
+  // the correctness of these shapes can't be guaranteed), so in some cases
+  // (such as simulation or scheduling) it makes sense of keep these shapes.
+  Status InferStatically(bool assume_valid_feeds);
+  // Infer the shape by running the graph on the specified cluster and recording
+  // the shapes of the processed tensors.
   Status InferDynamically(Cluster* cluster);
+  // Extract the properties from a cost graph. For testing only since there is
+  // no way to ensure that the cost graph match the item.
   Status InferFromCostGraph(const CostGraphDef& cost_graph);
 
   // Stores `item_.graph` with the inferred output shapes to `output_graph_def`.
@@ -65,12 +72,6 @@ class GraphProperties {
       OpInfo::TensorProperties*);
 
  private:
-  // Inputs
-  GrapplerItem item_;
-  std::map<string, std::vector<OpInfo::TensorProperties>> input_properties_;
-  std::map<string, std::vector<OpInfo::TensorProperties>> output_properties_;
-  const std::vector<OpInfo::TensorProperties> missing_properties_;
-
   // Merges shapes <shapes_and_types>, determined from an EnqueueV2 node, into
   // <*queue_shapes_and_types>.
   static Status MergeEnqueueShapesAndTypes(
@@ -99,17 +100,31 @@ class GraphProperties {
   static Status UpdateEnter(SymbolicShapeRefiner* shape_refiner,
                             const Node* node, bool relax,
                             TopoQueue* new_shapes);
+  // Process a node that is used to feed the model.
+  Status OverwriteFedPorts(
+      SymbolicShapeRefiner* shape_refiner,
+      const std::unordered_map<string, std::unordered_set<int>>& fed_ports,
+      const Node* node, TopoQueue* new_shapes) const;
   // Update the shapes for node 'n'. If output shapes for n have changed,
   // enqueue its fanout in 'new_shapes'.
-  static Status UpdateShapes(SymbolicShapeRefiner* shape_refiner, bool relax,
-                             const Node* n, TopoQueue* new_shapes);
+  Status UpdateShapes(
+      SymbolicShapeRefiner* shape_refiner, bool relax,
+      const std::unordered_map<string, std::unordered_set<int>>& fed_ports,
+      const Node* n, TopoQueue* new_shapes) const;
   // Propagate the shapes for the nodes enqueued in new_shapes and their
   // transitive fanout until a fixed point is reached.
   Status PropagateShapes(
       SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes,
       const std::unordered_map<const Node*, std::unordered_set<const Node*>>&
           resources,
+      const std::unordered_map<string, std::unordered_set<int>>& fed_ports,
       int num_loops) const;
+
+  // Data members
+  GrapplerItem item_;
+  std::map<string, std::vector<OpInfo::TensorProperties>> input_properties_;
+  std::map<string, std::vector<OpInfo::TensorProperties>> output_properties_;
+  const std::vector<OpInfo::TensorProperties> missing_properties_;
 };
 
 }  // end namespace grappler
diff --git a/tensorflow/core/grappler/costs/graph_properties_test.cc b/tensorflow/core/grappler/costs/graph_properties_test.cc
index c11af5777a1175eaa9b8c0262808e666f1c056d7..5012069118fbe0b3d90d2e99690b2988c45a2843 100644
--- a/tensorflow/core/grappler/costs/graph_properties_test.cc
+++ b/tensorflow/core/grappler/costs/graph_properties_test.cc
@@ -43,7 +43,10 @@ class GraphPropertiesTest : public ::testing::Test {
     TF_CHECK_OK(cluster_->Provision());
   }
 
-  void TearDown() override { cluster_.reset(); }
+  void TearDown() override {
+    TF_CHECK_OK(cluster_->Shutdown());
+    cluster_.reset();
+  }
 
  protected:
   // Returns a string form of <p>, suitable for comparing type and shape.
@@ -73,7 +76,7 @@ TEST_F(GraphPropertiesTest, StaticProperties) {
   CHECK(fake_input.NextItem(&item));
 
   GraphProperties properties(item);
-  Status s = properties.InferStatically();
+  Status s = properties.InferStatically(true);
   TF_CHECK_OK(s);
 
   for (const auto& node : item.graph.node()) {
@@ -179,7 +182,7 @@ TEST_F(GraphPropertiesTest, Variables) {
 
   {
     GraphProperties static_properties(item);
-    TF_CHECK_OK(static_properties.InferStatically());
+    TF_CHECK_OK(static_properties.InferStatically(false));
 
     const auto props = static_properties.GetOutputProperties("Var");
     EXPECT_EQ(1, props.size());
@@ -219,7 +222,7 @@ TEST_F(GraphPropertiesTest, VarHandles) {
                   .Finalize(item.graph.add_node()));
 
   GraphProperties properties(item);
-  TF_CHECK_OK(properties.InferStatically());
+  TF_CHECK_OK(properties.InferStatically(false));
 
   const auto props = properties.GetOutputProperties("VarRead");
   EXPECT_EQ(1, props.size());
@@ -286,7 +289,7 @@ TEST_F(GraphPropertiesTest, Queues) {
   TF_CHECK_OK(root.ToGraphDef(&item.graph));
 
   GraphProperties properties(item);
-  TF_CHECK_OK(properties.InferStatically());
+  TF_CHECK_OK(properties.InferStatically(false));
 
   const auto props1 = properties.GetOutputProperties("Dequeue1");
   ASSERT_EQ(1, props1.size());
@@ -335,7 +338,7 @@ TEST_F(GraphPropertiesTest, MergeWithoutLoops) {
                                  "merge_without_loops.pbtxt");
   TF_CHECK_OK(ReadGraphDefFromFile(filename, &item.graph));
   GraphProperties properties(item);
-  TF_CHECK_OK(properties.InferStatically());
+  TF_CHECK_OK(properties.InferStatically(false));
 
   std::vector<string> nodes{"cond/Merge", "cond/concat", "cond/concat_1"};
   std::vector<string> expected_outputs{"float: [-1,-1,1]", "float: [2,1,1]",
@@ -377,7 +380,7 @@ TEST_F(GraphPropertiesTest, WhileLoop) {
                                  "while_loop.pbtxt");
   TF_CHECK_OK(ReadGraphDefFromFile(filename, &item.graph));
   GraphProperties properties(item);
-  TF_CHECK_OK(properties.InferStatically());
+  TF_CHECK_OK(properties.InferStatically(false));
 
   std::vector<string> nodes{"while/Merge_1", "while/NextIteration_1",
                             "while/Exit_1"};
@@ -435,7 +438,7 @@ TEST_F(GraphPropertiesTest, NestedLoop) {
                                  "nested_loop.pbtxt");
   TF_CHECK_OK(ReadGraphDefFromFile(filename, &item.graph));
   GraphProperties properties(item);
-  TF_CHECK_OK(properties.InferStatically());
+  TF_CHECK_OK(properties.InferStatically(false));
 
   std::vector<string> outer_nodes{"while/Merge_1", "while/NextIteration_1",
                                   "while/Exit_1"};
@@ -498,7 +501,7 @@ TEST_F(GraphPropertiesTest, LoopsAndQueues) {
                                  "loops_and_queues.pbtxt");
   TF_CHECK_OK(ReadGraphDefFromFile(filename, &item.graph));
   GraphProperties properties(item);
-  TF_CHECK_OK(properties.InferStatically());
+  TF_CHECK_OK(properties.InferStatically(false));
 
   std::vector<string> outer_nodes{"while/Merge_1", "while/NextIteration_1",
                                   "while/Exit_1"};
@@ -556,7 +559,7 @@ TEST_F(GraphPropertiesTest, LoopsAndResourceVars) {
                                  "loops_and_resource_vars.pbtxt");
   TF_CHECK_OK(ReadGraphDefFromFile(filename, &item.graph));
   GraphProperties properties(item);
-  TF_CHECK_OK(properties.InferStatically());
+  TF_CHECK_OK(properties.InferStatically(false));
 
   std::vector<string> outer_nodes{"while/Merge_1", "while/NextIteration_1",
                                   "while/Exit_1"};
@@ -608,7 +611,7 @@ TEST_F(GraphPropertiesTest, QueuesAndLoops) {
                                  "queues_and_loops.pbtxt");
   TF_CHECK_OK(ReadGraphDefFromFile(filename, &item.graph));
   GraphProperties properties(item);
-  TF_CHECK_OK(properties.InferStatically());
+  TF_CHECK_OK(properties.InferStatically(false));
 
   std::vector<string> nodes{"while/Merge_1", "while/NextIteration_1",
                             "while/Exit_1"};
@@ -657,7 +660,7 @@ TEST_F(GraphPropertiesTest, InferRestoreOpShape) {
   item.fetch.push_back("init_restore");
 
   GraphProperties properties(item);
-  TF_CHECK_OK(properties.InferStatically());
+  TF_CHECK_OK(properties.InferStatically(false));
 
   const auto restore_props = properties.GetOutputProperties("restore");
   const OpInfo::TensorProperties& restore_prop = restore_props[0];
@@ -704,7 +707,7 @@ TEST_F(GraphPropertiesTest, InferRestoreOpShape_WithTwoNodesShareSameOutput) {
   item.fetch.push_back("init2");
 
   GraphProperties properties(item);
-  TF_CHECK_OK(properties.InferStatically());
+  TF_CHECK_OK(properties.InferStatically(false));
 
   const auto props = properties.GetOutputProperties("restore");
   const OpInfo::TensorProperties& prop = props[0];
@@ -732,7 +735,7 @@ TEST_F(GraphPropertiesTest, FunctionStaticShapeInference) {
                                  "simple_function.pbtxt");
   TF_CHECK_OK(ReadGraphDefFromFile(filename, &item.graph));
   GraphProperties properties(item);
-  TF_CHECK_OK(properties.InferStatically());
+  TF_CHECK_OK(properties.InferStatically(false));
   const auto props = properties.GetOutputProperties("MyAdd_55e046a8_1");
   const OpInfo::TensorProperties& prop = props[0];
   EXPECT_EQ(DT_FLOAT, prop.dtype());
@@ -740,6 +743,10 @@ TEST_F(GraphPropertiesTest, FunctionStaticShapeInference) {
   EXPECT_EQ(2, prop.shape().dim_size());
   EXPECT_EQ(1, prop.shape().dim(0).size());
   EXPECT_EQ(2, prop.shape().dim(1).size());
+
+  PartialTensorShape shape(prop.shape());
+  EXPECT_TRUE(shape.IsFullyDefined());
+  EXPECT_FALSE(shape.unknown_rank());
 }
 
 TEST_F(GraphPropertiesTest, SymbolicShapes) {
@@ -766,7 +773,7 @@ TEST_F(GraphPropertiesTest, SymbolicShapes) {
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
 
   GraphProperties properties(item);
-  TF_CHECK_OK(properties.InferStatically());
+  TF_CHECK_OK(properties.InferStatically(false));
   const auto shape_a = properties.GetOutputProperties("a").at(0).shape();
   const auto shape_c = properties.GetOutputProperties("c").at(0).shape();
   EXPECT_EQ(2, shape_a.dim_size());
@@ -776,6 +783,10 @@ TEST_F(GraphPropertiesTest, SymbolicShapes) {
   EXPECT_GE(-2, shape_a.dim(1).size());
   EXPECT_EQ(shape_a.dim(1).size(), shape_c.dim(1).size());
 
+  PartialTensorShape shape(shape_a);
+  EXPECT_FALSE(shape.IsFullyDefined());
+  EXPECT_FALSE(shape.unknown_rank());
+
   const auto shape_b = properties.GetOutputProperties("b").at(0).shape();
   const auto shape_d = properties.GetOutputProperties("d").at(0).shape();
   EXPECT_EQ(1, shape_b.dim_size());
@@ -822,7 +833,7 @@ TEST_F(GraphPropertiesTest, DoNotValidateColocationConstraints) {
   GraphProperties properties(item);
   // This function should return OK, since it doesn't validate the colocation
   // constraints internally.
-  TF_EXPECT_OK(properties.InferStatically());
+  TF_EXPECT_OK(properties.InferStatically(false));
 }
 
 TEST_F(GraphPropertiesTest, ShapeTracking) {
@@ -842,7 +853,7 @@ TEST_F(GraphPropertiesTest, ShapeTracking) {
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
 
   GraphProperties properties(item);
-  TF_CHECK_OK(properties.InferStatically());
+  TF_CHECK_OK(properties.InferStatically(false));
   const auto shape_a = properties.GetOutputProperties("a").at(0).shape();
   const auto shape_b = properties.GetOutputProperties("b").at(0).shape();
   const auto shape_o1 = properties.GetOutputProperties("o1").at(0).shape();
@@ -851,6 +862,76 @@ TEST_F(GraphPropertiesTest, ShapeTracking) {
   EXPECT_EQ(shape_b.DebugString(), shape_o2.DebugString());
 }
 
+TEST_F(GraphPropertiesTest, FedNodes) {
+  TrivialTestGraphInputYielder fake_input(4, 1, 10, false,
+                                          cluster_->GetDeviceNames());
+  GrapplerItem item;
+  CHECK(fake_input.NextItem(&item));
+
+  {
+    // Conservative shape analysis: the shape of fed ports should be unknown
+    GraphProperties properties(item);
+    Status s = properties.InferStatically(false);
+    TF_CHECK_OK(s);
+    for (const auto& node : item.graph.node()) {
+      if (node.op() == "Const") {
+        continue;
+      }
+      const auto in_props = properties.GetInputProperties(node.name());
+      EXPECT_EQ(1, in_props.size());
+      const OpInfo::TensorProperties& in_prop = in_props[0];
+      const auto out_props = properties.GetOutputProperties(node.name());
+      EXPECT_EQ(1, out_props.size());
+      const OpInfo::TensorProperties& out_prop = out_props[0];
+
+      if (node.name() == "x") {
+        // x is fed: its input should have a known shape, while its output
+        // doesn't
+        EXPECT_FALSE(in_prop.shape().unknown_rank());
+        EXPECT_EQ(1, in_prop.shape().dim_size());
+        EXPECT_EQ(2, in_prop.shape().dim(0).size());
+        EXPECT_TRUE(out_prop.shape().unknown_rank());
+      } else if (node.op() == "Square" || node.op() == "AddN") {
+        // These nodes are in the fanout of x: their shapes should be unknown.
+        EXPECT_TRUE(in_prop.shape().unknown_rank());
+        EXPECT_TRUE(out_prop.shape().unknown_rank());
+      }
+    }
+  }
+  {
+    // Optimistic shape analysis: the shape of fed ports should be derived from
+    // the shape of the fanin.
+    GraphProperties properties(item);
+    Status s = properties.InferStatically(true);
+    TF_CHECK_OK(s);
+    for (const auto& node : item.graph.node()) {
+      if (node.op() == "Square" || node.op() == "AddN") {
+        const auto in_props = properties.GetInputProperties(node.name());
+        EXPECT_EQ(1, in_props.size());
+        const OpInfo::TensorProperties& in_prop = in_props[0];
+        EXPECT_EQ(DT_FLOAT, in_prop.dtype());
+        EXPECT_FALSE(in_prop.shape().unknown_rank());
+        EXPECT_EQ(2, in_prop.shape().dim_size());
+        const auto out_props = properties.GetOutputProperties(node.name());
+        EXPECT_EQ(1, out_props.size());
+        const OpInfo::TensorProperties& out_prop = out_props[0];
+        EXPECT_EQ(in_prop.DebugString(), out_prop.DebugString());
+      }
+    }
+  }
+}
+
+TEST_F(GraphPropertiesTest, Performance) {
+  // Load a large graph with many nested loops to make sure we can infer shapes
+  // quickly.
+  GrapplerItem item;
+  string filename = io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataPath,
+                                 "large_graph.pbtxt.html");
+  TF_CHECK_OK(ReadGraphDefFromFile(filename, &item.graph));
+  GraphProperties properties(item);
+  TF_CHECK_OK(properties.InferStatically(false));
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/core/grappler/costs/graph_properties_testdata/large_graph.pbtxt.html b/tensorflow/core/grappler/costs/graph_properties_testdata/large_graph.pbtxt.html
new file mode 100644
index 0000000000000000000000000000000000000000..efc642ed52908f5e28a0aaca34bd32645a2366ff
--- /dev/null
+++ b/tensorflow/core/grappler/costs/graph_properties_testdata/large_graph.pbtxt.html
@@ -0,0 +1,255137 @@
+node {
+  name: "transcript_input"
+  op: "Placeholder"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+      }
+    }
+  }
+}
+node {
+  name: "Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: ""
+      }
+    }
+  }
+}
+node {
+  name: "speaker_input"
+  op: "PlaceholderWithDefault"
+  input: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+      }
+    }
+  }
+}
+node {
+  name: "Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: ""
+      }
+    }
+  }
+}
+node {
+  name: "vui_input"
+  op: "PlaceholderWithDefault"
+  input: "Const_1"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+      }
+    }
+  }
+}
+node {
+  name: "Const_2"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "waveform_input"
+  op: "PlaceholderWithDefault"
+  input: "Const_2"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: -1
+        }
+      }
+    }
+  }
+}
+node {
+  name: "Const_3"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "waveform_length_input"
+  op: "PlaceholderWithDefault"
+  input: "Const_3"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+      }
+    }
+  }
+}
+node {
+  name: "ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "ExpandDims"
+  op: "ExpandDims"
+  input: "transcript_input"
+  input: "ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "transcript_batch_input"
+  op: "PlaceholderWithDefault"
+  input: "ExpandDims"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: -1
+        }
+      }
+    }
+  }
+}
+node {
+  name: "ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "ExpandDims_1"
+  op: "ExpandDims"
+  input: "speaker_input"
+  input: "ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "speaker_batch_input"
+  op: "PlaceholderWithDefault"
+  input: "ExpandDims_1"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: -1
+        }
+      }
+    }
+  }
+}
+node {
+  name: "ExpandDims_2/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "ExpandDims_2"
+  op: "ExpandDims"
+  input: "vui_input"
+  input: "ExpandDims_2/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "vui_batch_input"
+  op: "PlaceholderWithDefault"
+  input: "ExpandDims_2"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: -1
+        }
+      }
+    }
+  }
+}
+node {
+  name: "ExpandDims_3/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "ExpandDims_3"
+  op: "ExpandDims"
+  input: "waveform_input"
+  input: "ExpandDims_3/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "waveform_batch_input"
+  op: "PlaceholderWithDefault"
+  input: "ExpandDims_3"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: -1
+        }
+        dim {
+          size: -1
+        }
+      }
+    }
+  }
+}
+node {
+  name: "ExpandDims_4/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "ExpandDims_4"
+  op: "ExpandDims"
+  input: "waveform_length_input"
+  input: "ExpandDims_4/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "waveform_length_batch_input"
+  op: "PlaceholderWithDefault"
+  input: "ExpandDims_4"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: -1
+        }
+      }
+    }
+  }
+}
+node {
+  name: "Shape"
+  op: "Shape"
+  input: "transcript_batch_input"
+  attr {
+    key: "T"
+    value {
+      type: DT_STRING
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "Shape_1"
+  op: "Shape"
+  input: "vui_batch_input"
+  attr {
+    key: "T"
+    value {
+      type: DT_STRING
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "assert_equal/Equal"
+  op: "Equal"
+  input: "Shape"
+  input: "Shape_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "assert_equal/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal/All"
+  op: "All"
+  input: "assert_equal/Equal"
+  input: "assert_equal/Const"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "assert_equal/Assert/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: ""
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal/Assert/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "Condition x == y did not hold element-wise:"
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal/Assert/Const_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "x (Shape:0) = "
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal/Assert/Const_3"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "y (Shape_1:0) = "
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal/Assert/Assert/data_0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: ""
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal/Assert/Assert/data_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "Condition x == y did not hold element-wise:"
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal/Assert/Assert/data_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "x (Shape:0) = "
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal/Assert/Assert/data_4"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "y (Shape_1:0) = "
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal/Assert/Assert"
+  op: "Assert"
+  input: "assert_equal/All"
+  input: "assert_equal/Assert/Assert/data_0"
+  input: "assert_equal/Assert/Assert/data_1"
+  input: "assert_equal/Assert/Assert/data_2"
+  input: "Shape"
+  input: "assert_equal/Assert/Assert/data_4"
+  input: "Shape_1"
+  attr {
+    key: "T"
+    value {
+      list {
+        type: DT_STRING
+        type: DT_STRING
+        type: DT_STRING
+        type: DT_INT32
+        type: DT_STRING
+        type: DT_INT32
+      }
+    }
+  }
+  attr {
+    key: "summarize"
+    value {
+      i: 3
+    }
+  }
+}
+node {
+  name: "Shape_2"
+  op: "Shape"
+  input: "transcript_batch_input"
+  attr {
+    key: "T"
+    value {
+      type: DT_STRING
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "Shape_3"
+  op: "Shape"
+  input: "waveform_length_batch_input"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "assert_equal_1/Equal"
+  op: "Equal"
+  input: "Shape_2"
+  input: "Shape_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "assert_equal_1/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_1/All"
+  op: "All"
+  input: "assert_equal_1/Equal"
+  input: "assert_equal_1/Const"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "assert_equal_1/Assert/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: ""
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_1/Assert/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "Condition x == y did not hold element-wise:"
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_1/Assert/Const_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "x (Shape_2:0) = "
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_1/Assert/Const_3"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "y (Shape_3:0) = "
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_1/Assert/Assert/data_0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: ""
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_1/Assert/Assert/data_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "Condition x == y did not hold element-wise:"
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_1/Assert/Assert/data_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "x (Shape_2:0) = "
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_1/Assert/Assert/data_4"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "y (Shape_3:0) = "
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_1/Assert/Assert"
+  op: "Assert"
+  input: "assert_equal_1/All"
+  input: "assert_equal_1/Assert/Assert/data_0"
+  input: "assert_equal_1/Assert/Assert/data_1"
+  input: "assert_equal_1/Assert/Assert/data_2"
+  input: "Shape_2"
+  input: "assert_equal_1/Assert/Assert/data_4"
+  input: "Shape_3"
+  attr {
+    key: "T"
+    value {
+      list {
+        type: DT_STRING
+        type: DT_STRING
+        type: DT_STRING
+        type: DT_INT32
+        type: DT_STRING
+        type: DT_INT32
+      }
+    }
+  }
+  attr {
+    key: "summarize"
+    value {
+      i: 3
+    }
+  }
+}
+node {
+  name: "Shape_4"
+  op: "Shape"
+  input: "transcript_batch_input"
+  attr {
+    key: "T"
+    value {
+      type: DT_STRING
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "Shape_5"
+  op: "Shape"
+  input: "waveform_batch_input"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice"
+  op: "StridedSlice"
+  input: "Shape_5"
+  input: "strided_slice/stack"
+  input: "strided_slice/stack_1"
+  input: "strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "assert_equal_2/Equal"
+  op: "Equal"
+  input: "Shape_4"
+  input: "strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "assert_equal_2/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_2/All"
+  op: "All"
+  input: "assert_equal_2/Equal"
+  input: "assert_equal_2/Const"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "assert_equal_2/Assert/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: ""
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_2/Assert/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "Condition x == y did not hold element-wise:"
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_2/Assert/Const_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "x (Shape_4:0) = "
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_2/Assert/Const_3"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "y (strided_slice:0) = "
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_2/Assert/Assert/data_0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: ""
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_2/Assert/Assert/data_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "Condition x == y did not hold element-wise:"
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_2/Assert/Assert/data_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "x (Shape_4:0) = "
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_2/Assert/Assert/data_4"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "y (strided_slice:0) = "
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_2/Assert/Assert"
+  op: "Assert"
+  input: "assert_equal_2/All"
+  input: "assert_equal_2/Assert/Assert/data_0"
+  input: "assert_equal_2/Assert/Assert/data_1"
+  input: "assert_equal_2/Assert/Assert/data_2"
+  input: "Shape_4"
+  input: "assert_equal_2/Assert/Assert/data_4"
+  input: "strided_slice"
+  attr {
+    key: "T"
+    value {
+      list {
+        type: DT_STRING
+        type: DT_STRING
+        type: DT_STRING
+        type: DT_INT32
+        type: DT_STRING
+        type: DT_INT32
+      }
+    }
+  }
+  attr {
+    key: "summarize"
+    value {
+      i: 3
+    }
+  }
+}
+node {
+  name: "Shape_6"
+  op: "Shape"
+  input: "transcript_batch_input"
+  attr {
+    key: "T"
+    value {
+      type: DT_STRING
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "Shape_7"
+  op: "Shape"
+  input: "speaker_batch_input"
+  attr {
+    key: "T"
+    value {
+      type: DT_STRING
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "assert_equal_3/Equal"
+  op: "Equal"
+  input: "Shape_6"
+  input: "Shape_7"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "assert_equal_3/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_3/All"
+  op: "All"
+  input: "assert_equal_3/Equal"
+  input: "assert_equal_3/Const"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "assert_equal_3/Assert/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: ""
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_3/Assert/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "Condition x == y did not hold element-wise:"
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_3/Assert/Const_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "x (Shape_6:0) = "
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_3/Assert/Const_3"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "y (Shape_7:0) = "
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_3/Assert/Assert/data_0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: ""
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_3/Assert/Assert/data_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "Condition x == y did not hold element-wise:"
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_3/Assert/Assert/data_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "x (Shape_6:0) = "
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_3/Assert/Assert/data_4"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "y (Shape_7:0) = "
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_3/Assert/Assert"
+  op: "Assert"
+  input: "assert_equal_3/All"
+  input: "assert_equal_3/Assert/Assert/data_0"
+  input: "assert_equal_3/Assert/Assert/data_1"
+  input: "assert_equal_3/Assert/Assert/data_2"
+  input: "Shape_6"
+  input: "assert_equal_3/Assert/Assert/data_4"
+  input: "Shape_7"
+  attr {
+    key: "T"
+    value {
+      list {
+        type: DT_STRING
+        type: DT_STRING
+        type: DT_STRING
+        type: DT_INT32
+        type: DT_STRING
+        type: DT_INT32
+      }
+    }
+  }
+  attr {
+    key: "summarize"
+    value {
+      i: 3
+    }
+  }
+}
+node {
+  name: "Identity"
+  op: "Identity"
+  input: "transcript_batch_input"
+  input: "^assert_equal/Assert/Assert"
+  input: "^assert_equal_1/Assert/Assert"
+  input: "^assert_equal_2/Assert/Assert"
+  input: "^assert_equal_3/Assert/Assert"
+  attr {
+    key: "T"
+    value {
+      type: DT_STRING
+    }
+  }
+
+}
+node {
+  name: "Const_4"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 400
+      }
+    }
+  }
+}
+node {
+  name: "decoder_output_length"
+  op: "PlaceholderWithDefault"
+  input: "Const_4"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+      }
+    }
+  }
+}
+node {
+  name: "Shape_8"
+  op: "Shape"
+  input: "Identity"
+  attr {
+    key: "T"
+    value {
+      type: DT_STRING
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice_1"
+  op: "StridedSlice"
+  input: "Shape_8"
+  input: "strided_slice_1/stack"
+  input: "strided_slice_1/stack_1"
+  input: "strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "key_value_init/keys"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+          dim {
+            size: 40
+          }
+        }
+        string_val: "msf_001"
+        string_val: "msf_002"
+        string_val: "msf_003"
+        string_val: "msf_004"
+        string_val: "msf_005"
+        string_val: "msf_006"
+        string_val: "msf_007"
+        string_val: "msf_008"
+        string_val: "msf_009"
+        string_val: "msf_010"
+        string_val: "msf_011"
+        string_val: "msf_012"
+        string_val: "msf_013"
+        string_val: "msf_014"
+        string_val: "msf_015"
+        string_val: "msf_016"
+        string_val: "msf_017"
+        string_val: "msf_018"
+        string_val: "msf_019"
+        string_val: "msf_020"
+        string_val: "msm_001"
+        string_val: "msm_002"
+        string_val: "msm_003"
+        string_val: "msm_004"
+        string_val: "msm_005"
+        string_val: "msm_006"
+        string_val: "msm_007"
+        string_val: "msm_008"
+        string_val: "msm_009"
+        string_val: "msm_010"
+        string_val: "msm_011"
+        string_val: "msm_012"
+        string_val: "msm_013"
+        string_val: "msm_014"
+        string_val: "msm_015"
+        string_val: "msm_016"
+        string_val: "msm_017"
+        string_val: "msm_018"
+        string_val: "msm_019"
+        string_val: "msm_020"
+      }
+    }
+  }
+}
+node {
+  name: "key_value_init/values"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 40
+          }
+        }
+        tensor_content: "\001\000\000\000\002\000\000\000\003\000\000\000\004\000\000\000\005\000\000\000\006\000\000\000\007\000\000\000\010\000\000\000\t\000\000\000\n\000\000\000\013\000\000\000\014\000\000\000\r\000\000\000\016\000\000\000\017\000\000\000\020\000\000\000\021\000\000\000\022\000\000\000\023\000\000\000\024\000\000\000\025\000\000\000\026\000\000\000\027\000\000\000\030\000\000\000\031\000\000\000\032\000\000\000\033\000\000\000\034\000\000\000\035\000\000\000\036\000\000\000\037\000\000\000 \000\000\000!\000\000\000\"\000\000\000#\000\000\000$\000\000\000%\000\000\000&\000\000\000\'\000\000\000(\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "speaker_lookup_table"
+  op: "HashTableV2"
+
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "key_dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "use_node_name_sharing"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "value_dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "speaker_lookup_table/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "key_value_init"
+  op: "InitializeTableV2"
+  input: "speaker_lookup_table"
+  input: "key_value_init/keys"
+  input: "key_value_init/values"
+  attr {
+    key: "Tkey"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "Tval"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "speaker_lookup_table_Lookup"
+  op: "LookupTableFindV2"
+  input: "speaker_lookup_table"
+  input: "speaker_batch_input"
+  input: "speaker_lookup_table/Const"
+  attr {
+    key: "Tin"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "Tout"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Const_5"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "Fill/dims"
+  op: "Pack"
+  input: "strided_slice_1"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "Fill"
+  op: "Fill"
+  input: "Fill/dims"
+  input: "Const_5"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "padding_map_fn/Shape"
+  op: "Shape"
+  input: "Identity"
+  attr {
+    key: "T"
+    value {
+      type: DT_STRING
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "padding_map_fn/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/strided_slice"
+  op: "StridedSlice"
+  input: "padding_map_fn/Shape"
+  input: "padding_map_fn/strided_slice/stack"
+  input: "padding_map_fn/strided_slice/stack_1"
+  input: "padding_map_fn/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "padding_map_fn/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/strided_slice_1"
+  op: "StridedSlice"
+  input: "Identity"
+  input: "padding_map_fn/strided_slice_1/stack"
+  input: "padding_map_fn/strided_slice_1/stack_1"
+  input: "padding_map_fn/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_STRING
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+
+node {
+  name: "padding_map_fn/TokenizeTranscriptV4/cast"
+  op: "Cast"
+  input: "padding_map_fn/strided_slice_1"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_STRING
+    }
+  }
+}
+
+node {
+  name: "padding_map_fn/TokenizeTranscriptV4/shape"
+  op: "Const"
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape { dim { size: 1 } }
+        int_val: 1
+      }
+    }
+  }
+  attr { key: "dtype" value { type: DT_INT32 } }
+}
+
+node {
+  name: "padding_map_fn/TokenizeTranscriptV4/reshape"
+  op: "Reshape"
+  input: "padding_map_fn/TokenizeTranscriptV4/cast"
+  input: "padding_map_fn/TokenizeTranscriptV4/shape"
+
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+
+node {
+  name: "padding_map_fn/TokenizeTranscriptV4"
+  op: "PlaceholderWithDefault"
+  input: "padding_map_fn/TokenizeTranscriptV4/reshape"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: -1
+        }
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/concat/values_0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 54
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/concat/values_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 55
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/concat/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/concat"
+  op: "ConcatV2"
+  input: "padding_map_fn/concat/values_0"
+  input: "padding_map_fn/TokenizeTranscriptV4"
+  input: "padding_map_fn/concat/values_2"
+  input: "padding_map_fn/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/Shape_1"
+  op: "Shape"
+  input: "padding_map_fn/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "padding_map_fn/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/strided_slice_2"
+  op: "StridedSlice"
+  input: "padding_map_fn/Shape_1"
+  input: "padding_map_fn/strided_slice_2/stack"
+  input: "padding_map_fn/strided_slice_2/stack_1"
+  input: "padding_map_fn/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "padding_map_fn/LogicalNot/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_BOOL
+        tensor_shape {
+        }
+        bool_val: false
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/LogicalNot"
+  op: "LogicalNot"
+  input: "padding_map_fn/LogicalNot/x"
+
+}
+node {
+  name: "padding_map_fn/Shape_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/Shape_3"
+  op: "Shape"
+  input: "padding_map_fn/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "padding_map_fn/Shape_4"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/TensorArray"
+  op: "TensorArrayV3"
+  input: "padding_map_fn/strided_slice"
+
+  attr {
+    key: "clear_after_read"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "dynamic_size"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        unknown_rank: true
+      }
+    }
+  }
+  attr {
+    key: "identical_element_shapes"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "tensor_array_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "padding_map_fn/TensorArray_1"
+  op: "TensorArrayV3"
+  input: "padding_map_fn/strided_slice"
+
+  attr {
+    key: "clear_after_read"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "dynamic_size"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        unknown_rank: true
+      }
+    }
+  }
+  attr {
+    key: "identical_element_shapes"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "tensor_array_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "padding_map_fn/TensorArray_2"
+  op: "TensorArrayV3"
+  input: "padding_map_fn/strided_slice"
+
+  attr {
+    key: "clear_after_read"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "dynamic_size"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        unknown_rank: true
+      }
+    }
+  }
+  attr {
+    key: "identical_element_shapes"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "tensor_array_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "padding_map_fn/TensorArrayWrite/TensorArrayWriteV3/index"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/LogicalNot"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/TensorArrayWrite/TensorArrayWriteV3"
+  op: "TensorArrayWriteV3"
+  input: "padding_map_fn/TensorArray"
+  input: "padding_map_fn/TensorArrayWrite/TensorArrayWriteV3/index"
+  input: "padding_map_fn/LogicalNot"
+  input: "padding_map_fn/TensorArray:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/LogicalNot"
+      }
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/TensorArrayWrite_1/TensorArrayWriteV3/index"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/concat"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/TensorArrayWrite_1/TensorArrayWriteV3"
+  op: "TensorArrayWriteV3"
+  input: "padding_map_fn/TensorArray_1"
+  input: "padding_map_fn/TensorArrayWrite_1/TensorArrayWriteV3/index"
+  input: "padding_map_fn/concat"
+  input: "padding_map_fn/TensorArray_1:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/concat"
+      }
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/TensorArrayWrite_2/TensorArrayWriteV3/index"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/strided_slice_2"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/TensorArrayWrite_2/TensorArrayWriteV3"
+  op: "TensorArrayWriteV3"
+  input: "padding_map_fn/TensorArray_2"
+  input: "padding_map_fn/TensorArrayWrite_2/TensorArrayWriteV3/index"
+  input: "padding_map_fn/strided_slice_2"
+  input: "padding_map_fn/TensorArray_2:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/strided_slice_2"
+      }
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/Enter"
+  op: "Enter"
+  input: "padding_map_fn/while/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/Enter_1"
+  op: "Enter"
+  input: "padding_map_fn/TensorArrayWrite/TensorArrayWriteV3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/Enter_2"
+  op: "Enter"
+  input: "padding_map_fn/TensorArrayWrite_1/TensorArrayWriteV3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/Enter_3"
+  op: "Enter"
+  input: "padding_map_fn/TensorArrayWrite_2/TensorArrayWriteV3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/Enter_4"
+  op: "Enter"
+  input: "padding_map_fn/Shape_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/Enter_5"
+  op: "Enter"
+  input: "padding_map_fn/Shape_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/Enter_6"
+  op: "Enter"
+  input: "padding_map_fn/Shape_4"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/Merge"
+  op: "Merge"
+  input: "padding_map_fn/while/Enter"
+  input: "padding_map_fn/while/NextIteration"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Merge_1"
+  op: "Merge"
+  input: "padding_map_fn/while/Enter_1"
+  input: "padding_map_fn/while/NextIteration_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Merge_2"
+  op: "Merge"
+  input: "padding_map_fn/while/Enter_2"
+  input: "padding_map_fn/while/NextIteration_2"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Merge_3"
+  op: "Merge"
+  input: "padding_map_fn/while/Enter_3"
+  input: "padding_map_fn/while/NextIteration_3"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Merge_4"
+  op: "Merge"
+  input: "padding_map_fn/while/Enter_4"
+  input: "padding_map_fn/while/NextIteration_4"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Merge_5"
+  op: "Merge"
+  input: "padding_map_fn/while/Enter_5"
+  input: "padding_map_fn/while/NextIteration_5"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Merge_6"
+  op: "Merge"
+  input: "padding_map_fn/while/Enter_6"
+  input: "padding_map_fn/while/NextIteration_6"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Less"
+  op: "Less"
+  input: "padding_map_fn/while/Merge"
+  input: "padding_map_fn/while/Less/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Less/Enter"
+  op: "Enter"
+  input: "padding_map_fn/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/LoopCond"
+  op: "LoopCond"
+  input: "padding_map_fn/while/Less"
+
+}
+node {
+  name: "padding_map_fn/while/Switch"
+  op: "Switch"
+  input: "padding_map_fn/while/Merge"
+  input: "padding_map_fn/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/while/Merge"
+      }
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Switch_1"
+  op: "Switch"
+  input: "padding_map_fn/while/Merge_1"
+  input: "padding_map_fn/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/while/Merge_1"
+      }
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Switch_2"
+  op: "Switch"
+  input: "padding_map_fn/while/Merge_2"
+  input: "padding_map_fn/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/while/Merge_2"
+      }
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Switch_3"
+  op: "Switch"
+  input: "padding_map_fn/while/Merge_3"
+  input: "padding_map_fn/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/while/Merge_3"
+      }
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Switch_4"
+  op: "Switch"
+  input: "padding_map_fn/while/Merge_4"
+  input: "padding_map_fn/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/while/Merge_4"
+      }
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Switch_5"
+  op: "Switch"
+  input: "padding_map_fn/while/Merge_5"
+  input: "padding_map_fn/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/while/Merge_5"
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/Switch_6"
+  op: "Switch"
+  input: "padding_map_fn/while/Merge_6"
+  input: "padding_map_fn/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/while/Merge_6"
+      }
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Identity"
+  op: "Identity"
+  input: "padding_map_fn/while/Switch:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Identity_1"
+  op: "Identity"
+  input: "padding_map_fn/while/Switch_1:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Identity_2"
+  op: "Identity"
+  input: "padding_map_fn/while/Switch_2:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Identity_3"
+  op: "Identity"
+  input: "padding_map_fn/while/Switch_3:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Identity_4"
+  op: "Identity"
+  input: "padding_map_fn/while/Switch_4:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Identity_5"
+  op: "Identity"
+  input: "padding_map_fn/while/Switch_5:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Identity_6"
+  op: "Identity"
+  input: "padding_map_fn/while/Switch_6:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/add/y"
+  op: "Const"
+  input: "^padding_map_fn/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/add"
+  op: "Add"
+  input: "padding_map_fn/while/Identity"
+  input: "padding_map_fn/while/add/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/strided_slice/stack/1"
+  op: "Const"
+  input: "^padding_map_fn/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/strided_slice/stack"
+  op: "Pack"
+  input: "padding_map_fn/while/Identity"
+  input: "padding_map_fn/while/strided_slice/stack/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/strided_slice/stack_1/1"
+  op: "Const"
+  input: "^padding_map_fn/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/strided_slice/stack_1"
+  op: "Pack"
+  input: "padding_map_fn/while/add"
+  input: "padding_map_fn/while/strided_slice/stack_1/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/strided_slice/stack_2"
+  op: "Const"
+  input: "^padding_map_fn/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/strided_slice"
+  op: "StridedSlice"
+  input: "padding_map_fn/while/strided_slice/Enter"
+  input: "padding_map_fn/while/strided_slice/stack"
+  input: "padding_map_fn/while/strided_slice/stack_1"
+  input: "padding_map_fn/while/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_STRING
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/strided_slice/Enter"
+  op: "Enter"
+  input: "Identity"
+  attr {
+    key: "T"
+    value {
+      type: DT_STRING
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+
+node {
+  name: "padding_map_fn/while/TokenizeTranscriptV4/cast"
+  op: "Cast"
+  input: "padding_map_fn/while/strided_slice"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_STRING
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/TokenizeTranscriptV4/shape"
+  input: "^padding_map_fn/while/TokenizeTranscriptV4/cast"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/TokenizeTranscriptV4/reshape"
+  op: "Reshape"
+  input: "padding_map_fn/while/TokenizeTranscriptV4/cast"
+  input: "padding_map_fn/while/TokenizeTranscriptV4/shape"
+
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/TokenizeTranscriptV4"
+  op: "PlaceholderWithDefault"
+  input: "padding_map_fn/while/TokenizeTranscriptV4/reshape"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: -1
+        }
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/concat/values_0"
+  op: "Const"
+  input: "^padding_map_fn/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 54
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/concat/values_2"
+  op: "Const"
+  input: "^padding_map_fn/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 55
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/concat/axis"
+  op: "Const"
+  input: "^padding_map_fn/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/concat"
+  op: "ConcatV2"
+  input: "padding_map_fn/while/concat/values_0"
+  input: "padding_map_fn/while/TokenizeTranscriptV4"
+  input: "padding_map_fn/while/concat/values_2"
+  input: "padding_map_fn/while/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Shape"
+  op: "Shape"
+  input: "padding_map_fn/while/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/strided_slice_1/stack"
+  op: "Const"
+  input: "^padding_map_fn/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/strided_slice_1/stack_1"
+  op: "Const"
+  input: "^padding_map_fn/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/strided_slice_1/stack_2"
+  op: "Const"
+  input: "^padding_map_fn/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/strided_slice_1"
+  op: "StridedSlice"
+  input: "padding_map_fn/while/Shape"
+  input: "padding_map_fn/while/strided_slice_1/stack"
+  input: "padding_map_fn/while/strided_slice_1/stack_1"
+  input: "padding_map_fn/while/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/LogicalNot/x"
+  op: "Const"
+  input: "^padding_map_fn/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_BOOL
+        tensor_shape {
+        }
+        bool_val: false
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/LogicalNot"
+  op: "LogicalNot"
+  input: "padding_map_fn/while/LogicalNot/x"
+
+}
+node {
+  name: "padding_map_fn/while/TensorArrayWrite/TensorArrayWriteV3"
+  op: "TensorArrayWriteV3"
+  input: "padding_map_fn/while/TensorArrayWrite/TensorArrayWriteV3/Enter"
+  input: "padding_map_fn/while/Identity"
+  input: "padding_map_fn/while/LogicalNot"
+  input: "padding_map_fn/while/Identity_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/LogicalNot"
+      }
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/TensorArrayWrite/TensorArrayWriteV3/Enter"
+  op: "Enter"
+  input: "padding_map_fn/TensorArray"
+  attr {
+    key: "T"
+    value {
+      type: DT_RESOURCE
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/LogicalNot"
+      }
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/TensorArrayWrite_1/TensorArrayWriteV3"
+  op: "TensorArrayWriteV3"
+  input: "padding_map_fn/while/TensorArrayWrite_1/TensorArrayWriteV3/Enter"
+  input: "padding_map_fn/while/Identity"
+  input: "padding_map_fn/while/concat"
+  input: "padding_map_fn/while/Identity_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/concat"
+      }
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/TensorArrayWrite_1/TensorArrayWriteV3/Enter"
+  op: "Enter"
+  input: "padding_map_fn/TensorArray_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_RESOURCE
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/concat"
+      }
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/TensorArrayWrite_2/TensorArrayWriteV3"
+  op: "TensorArrayWriteV3"
+  input: "padding_map_fn/while/TensorArrayWrite_2/TensorArrayWriteV3/Enter"
+  input: "padding_map_fn/while/Identity"
+  input: "padding_map_fn/while/strided_slice_1"
+  input: "padding_map_fn/while/Identity_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/strided_slice_2"
+      }
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/TensorArrayWrite_2/TensorArrayWriteV3/Enter"
+  op: "Enter"
+  input: "padding_map_fn/TensorArray_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_RESOURCE
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/strided_slice_2"
+      }
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/Shape_1"
+  op: "Const"
+  input: "^padding_map_fn/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/Maximum"
+  op: "Maximum"
+  input: "padding_map_fn/while/Identity_4"
+  input: "padding_map_fn/while/Shape_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Shape_2"
+  op: "Shape"
+  input: "padding_map_fn/while/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/Maximum_1"
+  op: "Maximum"
+  input: "padding_map_fn/while/Identity_5"
+  input: "padding_map_fn/while/Shape_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Shape_3"
+  op: "Const"
+  input: "^padding_map_fn/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/Maximum_2"
+  op: "Maximum"
+  input: "padding_map_fn/while/Identity_6"
+  input: "padding_map_fn/while/Shape_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/add_1/y"
+  op: "Const"
+  input: "^padding_map_fn/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while/add_1"
+  op: "Add"
+  input: "padding_map_fn/while/Identity"
+  input: "padding_map_fn/while/add_1/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/NextIteration"
+  op: "NextIteration"
+  input: "padding_map_fn/while/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/NextIteration_1"
+  op: "NextIteration"
+  input: "padding_map_fn/while/TensorArrayWrite/TensorArrayWriteV3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/NextIteration_2"
+  op: "NextIteration"
+  input: "padding_map_fn/while/TensorArrayWrite_1/TensorArrayWriteV3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/NextIteration_3"
+  op: "NextIteration"
+  input: "padding_map_fn/while/TensorArrayWrite_2/TensorArrayWriteV3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/NextIteration_4"
+  op: "NextIteration"
+  input: "padding_map_fn/while/Maximum"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/NextIteration_5"
+  op: "NextIteration"
+  input: "padding_map_fn/while/Maximum_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/NextIteration_6"
+  op: "NextIteration"
+  input: "padding_map_fn/while/Maximum_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Exit"
+  op: "Exit"
+  input: "padding_map_fn/while/Switch"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Exit_1"
+  op: "Exit"
+  input: "padding_map_fn/while/Switch_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Exit_2"
+  op: "Exit"
+  input: "padding_map_fn/while/Switch_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Exit_3"
+  op: "Exit"
+  input: "padding_map_fn/while/Switch_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Exit_4"
+  op: "Exit"
+  input: "padding_map_fn/while/Switch_4"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Exit_5"
+  op: "Exit"
+  input: "padding_map_fn/while/Switch_5"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while/Exit_6"
+  op: "Exit"
+  input: "padding_map_fn/while/Switch_6"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/TensorArray_3"
+  op: "TensorArrayV3"
+  input: "padding_map_fn/strided_slice"
+
+  attr {
+    key: "clear_after_read"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "dynamic_size"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        unknown_rank: true
+      }
+    }
+  }
+  attr {
+    key: "identical_element_shapes"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "tensor_array_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "padding_map_fn/TensorArray_4"
+  op: "TensorArrayV3"
+  input: "padding_map_fn/strided_slice"
+
+  attr {
+    key: "clear_after_read"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "dynamic_size"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        unknown_rank: true
+      }
+    }
+  }
+  attr {
+    key: "identical_element_shapes"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "tensor_array_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "padding_map_fn/TensorArray_5"
+  op: "TensorArrayV3"
+  input: "padding_map_fn/strided_slice"
+
+  attr {
+    key: "clear_after_read"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "dynamic_size"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        unknown_rank: true
+      }
+    }
+  }
+  attr {
+    key: "identical_element_shapes"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "tensor_array_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/Enter"
+  op: "Enter"
+  input: "padding_map_fn/while_1/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while_1/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/Enter_1"
+  op: "Enter"
+  input: "padding_map_fn/TensorArray_3:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while_1/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/Enter_2"
+  op: "Enter"
+  input: "padding_map_fn/TensorArray_4:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while_1/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/Enter_3"
+  op: "Enter"
+  input: "padding_map_fn/TensorArray_5:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while_1/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/Merge"
+  op: "Merge"
+  input: "padding_map_fn/while_1/Enter"
+  input: "padding_map_fn/while_1/NextIteration"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/Merge_1"
+  op: "Merge"
+  input: "padding_map_fn/while_1/Enter_1"
+  input: "padding_map_fn/while_1/NextIteration_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/Merge_2"
+  op: "Merge"
+  input: "padding_map_fn/while_1/Enter_2"
+  input: "padding_map_fn/while_1/NextIteration_2"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/Merge_3"
+  op: "Merge"
+  input: "padding_map_fn/while_1/Enter_3"
+  input: "padding_map_fn/while_1/NextIteration_3"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/Less"
+  op: "Less"
+  input: "padding_map_fn/while_1/Merge"
+  input: "padding_map_fn/while_1/Less/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/Less/Enter"
+  op: "Enter"
+  input: "padding_map_fn/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while_1/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/LoopCond"
+  op: "LoopCond"
+  input: "padding_map_fn/while_1/Less"
+
+}
+node {
+  name: "padding_map_fn/while_1/Switch"
+  op: "Switch"
+  input: "padding_map_fn/while_1/Merge"
+  input: "padding_map_fn/while_1/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/while_1/Merge"
+      }
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/Switch_1"
+  op: "Switch"
+  input: "padding_map_fn/while_1/Merge_1"
+  input: "padding_map_fn/while_1/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/while_1/Merge_1"
+      }
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/Switch_2"
+  op: "Switch"
+  input: "padding_map_fn/while_1/Merge_2"
+  input: "padding_map_fn/while_1/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/while_1/Merge_2"
+      }
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/Switch_3"
+  op: "Switch"
+  input: "padding_map_fn/while_1/Merge_3"
+  input: "padding_map_fn/while_1/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/while_1/Merge_3"
+      }
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/Identity"
+  op: "Identity"
+  input: "padding_map_fn/while_1/Switch:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/Identity_1"
+  op: "Identity"
+  input: "padding_map_fn/while_1/Switch_1:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/Identity_2"
+  op: "Identity"
+  input: "padding_map_fn/while_1/Switch_2:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/Identity_3"
+  op: "Identity"
+  input: "padding_map_fn/while_1/Switch_3:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/TensorArrayReadV3"
+  op: "TensorArrayReadV3"
+  input: "padding_map_fn/while_1/TensorArrayReadV3/Enter"
+  input: "padding_map_fn/while_1/Identity"
+  input: "padding_map_fn/while_1/TensorArrayReadV3/Enter_1"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_BOOL
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/TensorArrayReadV3/Enter"
+  op: "Enter"
+  input: "padding_map_fn/TensorArray"
+  attr {
+    key: "T"
+    value {
+      type: DT_RESOURCE
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while_1/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/TensorArrayReadV3/Enter_1"
+  op: "Enter"
+  input: "padding_map_fn/while/Exit_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while_1/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/Shape"
+  op: "Shape"
+  input: "padding_map_fn/while_1/TensorArrayReadV3"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/sub"
+  op: "Sub"
+  input: "padding_map_fn/while_1/sub/Enter"
+  input: "padding_map_fn/while_1/Shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/sub/Enter"
+  op: "Enter"
+  input: "padding_map_fn/while/Exit_4"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while_1/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/ExpandDims/dim"
+  op: "Const"
+  input: "^padding_map_fn/while_1/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/ExpandDims"
+  op: "ExpandDims"
+  input: "padding_map_fn/while_1/sub"
+  input: "padding_map_fn/while_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/Pad/paddings"
+  op: "Const"
+  input: "^padding_map_fn/while_1/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000\001\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/Pad"
+  op: "Pad"
+  input: "padding_map_fn/while_1/ExpandDims"
+  input: "padding_map_fn/while_1/Pad/paddings"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/Pad_1"
+  op: "Pad"
+  input: "padding_map_fn/while_1/TensorArrayReadV3"
+  input: "padding_map_fn/while_1/Pad"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/TensorArrayWrite/TensorArrayWriteV3"
+  op: "TensorArrayWriteV3"
+  input: "padding_map_fn/while_1/TensorArrayWrite/TensorArrayWriteV3/Enter"
+  input: "padding_map_fn/while_1/Identity"
+  input: "padding_map_fn/while_1/Pad_1"
+  input: "padding_map_fn/while_1/Identity_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/while_1/Pad_1"
+      }
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/TensorArrayWrite/TensorArrayWriteV3/Enter"
+  op: "Enter"
+  input: "padding_map_fn/TensorArray_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_RESOURCE
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/while_1/Pad_1"
+      }
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while_1/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/TensorArrayReadV3_1"
+  op: "TensorArrayReadV3"
+  input: "padding_map_fn/while_1/TensorArrayReadV3_1/Enter"
+  input: "padding_map_fn/while_1/Identity"
+  input: "padding_map_fn/while_1/TensorArrayReadV3_1/Enter_1"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/TensorArrayReadV3_1/Enter"
+  op: "Enter"
+  input: "padding_map_fn/TensorArray_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_RESOURCE
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while_1/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/TensorArrayReadV3_1/Enter_1"
+  op: "Enter"
+  input: "padding_map_fn/while/Exit_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while_1/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/Shape_1"
+  op: "Shape"
+  input: "padding_map_fn/while_1/TensorArrayReadV3_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/sub_1"
+  op: "Sub"
+  input: "padding_map_fn/while_1/sub_1/Enter"
+  input: "padding_map_fn/while_1/Shape_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/sub_1/Enter"
+  op: "Enter"
+  input: "padding_map_fn/while/Exit_5"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while_1/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/ExpandDims_1/dim"
+  op: "Const"
+  input: "^padding_map_fn/while_1/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "padding_map_fn/while_1/sub_1"
+  input: "padding_map_fn/while_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/Pad_2/paddings"
+  op: "Const"
+  input: "^padding_map_fn/while_1/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000\001\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/Pad_2"
+  op: "Pad"
+  input: "padding_map_fn/while_1/ExpandDims_1"
+  input: "padding_map_fn/while_1/Pad_2/paddings"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/Pad_3"
+  op: "Pad"
+  input: "padding_map_fn/while_1/TensorArrayReadV3_1"
+  input: "padding_map_fn/while_1/Pad_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/TensorArrayWrite_1/TensorArrayWriteV3"
+  op: "TensorArrayWriteV3"
+  input: "padding_map_fn/while_1/TensorArrayWrite_1/TensorArrayWriteV3/Enter"
+  input: "padding_map_fn/while_1/Identity"
+  input: "padding_map_fn/while_1/Pad_3"
+  input: "padding_map_fn/while_1/Identity_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/while_1/Pad_3"
+      }
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/TensorArrayWrite_1/TensorArrayWriteV3/Enter"
+  op: "Enter"
+  input: "padding_map_fn/TensorArray_4"
+  attr {
+    key: "T"
+    value {
+      type: DT_RESOURCE
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/while_1/Pad_3"
+      }
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while_1/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/TensorArrayReadV3_2"
+  op: "TensorArrayReadV3"
+  input: "padding_map_fn/while_1/TensorArrayReadV3_2/Enter"
+  input: "padding_map_fn/while_1/Identity"
+  input: "padding_map_fn/while_1/TensorArrayReadV3_2/Enter_1"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/TensorArrayReadV3_2/Enter"
+  op: "Enter"
+  input: "padding_map_fn/TensorArray_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_RESOURCE
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while_1/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/TensorArrayReadV3_2/Enter_1"
+  op: "Enter"
+  input: "padding_map_fn/while/Exit_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while_1/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/Shape_2"
+  op: "Shape"
+  input: "padding_map_fn/while_1/TensorArrayReadV3_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/sub_2"
+  op: "Sub"
+  input: "padding_map_fn/while_1/sub_2/Enter"
+  input: "padding_map_fn/while_1/Shape_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/sub_2/Enter"
+  op: "Enter"
+  input: "padding_map_fn/while/Exit_6"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while_1/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/ExpandDims_2/dim"
+  op: "Const"
+  input: "^padding_map_fn/while_1/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/ExpandDims_2"
+  op: "ExpandDims"
+  input: "padding_map_fn/while_1/sub_2"
+  input: "padding_map_fn/while_1/ExpandDims_2/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/Pad_4/paddings"
+  op: "Const"
+  input: "^padding_map_fn/while_1/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000\001\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/Pad_4"
+  op: "Pad"
+  input: "padding_map_fn/while_1/ExpandDims_2"
+  input: "padding_map_fn/while_1/Pad_4/paddings"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/Pad_5"
+  op: "Pad"
+  input: "padding_map_fn/while_1/TensorArrayReadV3_2"
+  input: "padding_map_fn/while_1/Pad_4"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/TensorArrayWrite_2/TensorArrayWriteV3"
+  op: "TensorArrayWriteV3"
+  input: "padding_map_fn/while_1/TensorArrayWrite_2/TensorArrayWriteV3/Enter"
+  input: "padding_map_fn/while_1/Identity"
+  input: "padding_map_fn/while_1/Pad_5"
+  input: "padding_map_fn/while_1/Identity_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/while_1/Pad_5"
+      }
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/TensorArrayWrite_2/TensorArrayWriteV3/Enter"
+  op: "Enter"
+  input: "padding_map_fn/TensorArray_5"
+  attr {
+    key: "T"
+    value {
+      type: DT_RESOURCE
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/while_1/Pad_5"
+      }
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "padding_map_fn/while_1/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/add/y"
+  op: "Const"
+  input: "^padding_map_fn/while_1/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/while_1/add"
+  op: "Add"
+  input: "padding_map_fn/while_1/Identity"
+  input: "padding_map_fn/while_1/add/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/NextIteration"
+  op: "NextIteration"
+  input: "padding_map_fn/while_1/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/NextIteration_1"
+  op: "NextIteration"
+  input: "padding_map_fn/while_1/TensorArrayWrite/TensorArrayWriteV3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/NextIteration_2"
+  op: "NextIteration"
+  input: "padding_map_fn/while_1/TensorArrayWrite_1/TensorArrayWriteV3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/NextIteration_3"
+  op: "NextIteration"
+  input: "padding_map_fn/while_1/TensorArrayWrite_2/TensorArrayWriteV3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/Exit"
+  op: "Exit"
+  input: "padding_map_fn/while_1/Switch"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/Exit_1"
+  op: "Exit"
+  input: "padding_map_fn/while_1/Switch_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/Exit_2"
+  op: "Exit"
+  input: "padding_map_fn/while_1/Switch_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/while_1/Exit_3"
+  op: "Exit"
+  input: "padding_map_fn/while_1/Switch_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/TensorArrayStack/TensorArraySizeV3"
+  op: "TensorArraySizeV3"
+  input: "padding_map_fn/TensorArray_3"
+  input: "padding_map_fn/while_1/Exit_1"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/TensorArray_3"
+      }
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/TensorArrayStack/range/start"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/TensorArray_3"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/TensorArrayStack/range/delta"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/TensorArray_3"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/TensorArrayStack/range"
+  op: "Range"
+  input: "padding_map_fn/TensorArrayStack/range/start"
+  input: "padding_map_fn/TensorArrayStack/TensorArraySizeV3"
+  input: "padding_map_fn/TensorArrayStack/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/TensorArray_3"
+      }
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/TensorArrayStack/TensorArrayGatherV3"
+  op: "TensorArrayGatherV3"
+  input: "padding_map_fn/TensorArray_3"
+  input: "padding_map_fn/TensorArrayStack/range"
+  input: "padding_map_fn/while_1/Exit_1"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/TensorArray_3"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        unknown_rank: true
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/TensorArrayStack_1/TensorArraySizeV3"
+  op: "TensorArraySizeV3"
+  input: "padding_map_fn/TensorArray_4"
+  input: "padding_map_fn/while_1/Exit_2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/TensorArray_4"
+      }
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/TensorArrayStack_1/range/start"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/TensorArray_4"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/TensorArrayStack_1/range/delta"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/TensorArray_4"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/TensorArrayStack_1/range"
+  op: "Range"
+  input: "padding_map_fn/TensorArrayStack_1/range/start"
+  input: "padding_map_fn/TensorArrayStack_1/TensorArraySizeV3"
+  input: "padding_map_fn/TensorArrayStack_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/TensorArray_4"
+      }
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/TensorArrayStack_1/TensorArrayGatherV3"
+  op: "TensorArrayGatherV3"
+  input: "padding_map_fn/TensorArray_4"
+  input: "padding_map_fn/TensorArrayStack_1/range"
+  input: "padding_map_fn/while_1/Exit_2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/TensorArray_4"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        unknown_rank: true
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/TensorArrayStack_2/TensorArraySizeV3"
+  op: "TensorArraySizeV3"
+  input: "padding_map_fn/TensorArray_5"
+  input: "padding_map_fn/while_1/Exit_3"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/TensorArray_5"
+      }
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/TensorArrayStack_2/range/start"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/TensorArray_5"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/TensorArrayStack_2/range/delta"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/TensorArray_5"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "padding_map_fn/TensorArrayStack_2/range"
+  op: "Range"
+  input: "padding_map_fn/TensorArrayStack_2/range/start"
+  input: "padding_map_fn/TensorArrayStack_2/TensorArraySizeV3"
+  input: "padding_map_fn/TensorArrayStack_2/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/TensorArray_5"
+      }
+    }
+  }
+
+}
+node {
+  name: "padding_map_fn/TensorArrayStack_2/TensorArrayGatherV3"
+  op: "TensorArrayGatherV3"
+  input: "padding_map_fn/TensorArray_5"
+  input: "padding_map_fn/TensorArrayStack_2/range"
+  input: "padding_map_fn/while_1/Exit_3"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@padding_map_fn/TensorArray_5"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        unknown_rank: true
+      }
+    }
+  }
+}
+node {
+  name: "Shape_9"
+  op: "Shape"
+  input: "padding_map_fn/TensorArrayStack_1/TensorArrayGatherV3"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice_2"
+  op: "StridedSlice"
+  input: "Shape_9"
+  input: "strided_slice_2/stack"
+  input: "strided_slice_2/stack_1"
+  input: "strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask/range"
+  op: "Range"
+  input: "sequence_length_mask/range/start"
+  input: "strided_slice_2"
+  input: "sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "sequence_length_mask/range"
+  input: "sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "sequence_length_mask/Shape"
+  op: "Shape"
+  input: "padding_map_fn/TensorArrayStack_2/TensorArrayGatherV3"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "sequence_length_mask/Shape"
+  input: "sequence_length_mask/strided_slice/stack"
+  input: "sequence_length_mask/strided_slice/stack_1"
+  input: "sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "sequence_length_mask/strided_slice"
+  input: "sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "sequence_length_mask/Tile"
+  op: "Tile"
+  input: "sequence_length_mask/ExpandDims"
+  input: "sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "padding_map_fn/TensorArrayStack_2/TensorArrayGatherV3"
+  input: "sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "sequence_length_mask/Less"
+  op: "Less"
+  input: "sequence_length_mask/Tile"
+  input: "sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "sequence_length_mask/Cast"
+  op: "Cast"
+  input: "sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "sub/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "sub"
+  op: "Sub"
+  input: "sub/x"
+  input: "sequence_length_mask/Cast"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "mul/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 55
+      }
+    }
+  }
+}
+node {
+  name: "mul"
+  op: "Mul"
+  input: "sub"
+  input: "mul/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "add"
+  op: "Add"
+  input: "padding_map_fn/TensorArrayStack_1/TensorArrayGatherV3"
+  input: "mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Const_6"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "All"
+  op: "All"
+  input: "padding_map_fn/TensorArrayStack/TensorArrayGatherV3"
+  input: "Const_6"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "Assert/Assert"
+  op: "Assert"
+  input: "All"
+  input: "Identity"
+  input: "add"
+  input: "padding_map_fn/TensorArrayStack_2/TensorArrayGatherV3"
+  attr {
+    key: "T"
+    value {
+      list {
+        type: DT_STRING
+        type: DT_INT32
+        type: DT_INT32
+      }
+    }
+  }
+  attr {
+    key: "summarize"
+    value {
+      i: 3
+    }
+  }
+}
+node {
+  name: "control_dependency"
+  op: "Identity"
+  input: "add"
+  input: "^Assert/Assert"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@add"
+      }
+    }
+  }
+
+}
+node {
+  name: "Const_7"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 24000.0
+      }
+    }
+  }
+}
+node {
+  name: "Fill_1/dims"
+  op: "Pack"
+  input: "strided_slice_1"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "Fill_1"
+  op: "Fill"
+  input: "Fill_1/dims"
+  input: "Const_7"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "Const_8"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "Abs"
+  op: "Abs"
+  input: "waveform_batch_input"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "Const_9"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "Max"
+  op: "Max"
+  input: "Abs"
+  input: "Const_9"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "div"
+  op: "RealDiv"
+  input: "waveform_batch_input"
+  input: "Max"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "pre_emphasis/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "pre_emphasis/ExpandDims"
+  op: "ExpandDims"
+  input: "div"
+  input: "pre_emphasis/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "pre_emphasis/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "pre_emphasis/ExpandDims_1"
+  op: "ExpandDims"
+  input: "pre_emphasis/ExpandDims"
+  input: "pre_emphasis/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "pre_emphasis/Const"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+        }
+        tensor_content: "\354Qx\277\000\000\200?"
+      }
+    }
+  }
+}
+node {
+  name: "pre_emphasis/Conv2D"
+  op: "Conv2D"
+  input: "pre_emphasis/ExpandDims_1"
+  input: "pre_emphasis/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "pre_emphasis/Squeeze"
+  op: "Squeeze"
+  input: "pre_emphasis/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+        i: 3
+      }
+    }
+  }
+}
+node {
+  name: "frame/frame_length"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1200
+      }
+    }
+  }
+}
+node {
+  name: "frame/frame_step"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 300
+      }
+    }
+  }
+}
+node {
+  name: "frame/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "frame/Rank"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "frame/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "frame/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "frame/range"
+  op: "Range"
+  input: "frame/range/start"
+  input: "frame/Rank"
+  input: "frame/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/add/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "frame/add"
+  op: "Add"
+  input: "frame/axis"
+  input: "frame/add/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/strided_slice/stack"
+  op: "Pack"
+  input: "frame/axis"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "frame/strided_slice/stack_1"
+  op: "Pack"
+  input: "frame/add"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "frame/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "frame/strided_slice"
+  op: "StridedSlice"
+  input: "frame/range"
+  input: "frame/strided_slice/stack"
+  input: "frame/strided_slice/stack_1"
+  input: "frame/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "frame/Shape"
+  op: "Shape"
+  input: "pre_emphasis/Squeeze"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "frame/sub/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "frame/sub"
+  op: "Sub"
+  input: "frame/Rank"
+  input: "frame/sub/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/sub_1"
+  op: "Sub"
+  input: "frame/sub"
+  input: "frame/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/packed/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "frame/packed"
+  op: "Pack"
+  input: "frame/strided_slice"
+  input: "frame/packed/1"
+  input: "frame/sub_1"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "frame/split/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "frame/split"
+  op: "SplitV"
+  input: "frame/Shape"
+  input: "frame/packed"
+  input: "frame/split/split_dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tlen"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "num_split"
+    value {
+      i: 3
+    }
+  }
+}
+node {
+  name: "frame/Reshape/shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "frame/Reshape"
+  op: "Reshape"
+  input: "frame/split:1"
+  input: "frame/Reshape/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/Size"
+  op: "Size"
+  input: "frame/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "frame/Size_1"
+  op: "Size"
+  input: "frame/split:2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "frame/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "frame/Neg"
+  op: "Neg"
+  input: "frame/Reshape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/floordiv"
+  op: "FloorDiv"
+  input: "frame/Neg"
+  input: "frame/frame_step"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/Neg_1"
+  op: "Neg"
+  input: "frame/floordiv"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/sub_2/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "frame/sub_2"
+  op: "Sub"
+  input: "frame/Neg_1"
+  input: "frame/sub_2/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/mul"
+  op: "Mul"
+  input: "frame/frame_step"
+  input: "frame/sub_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/add_1"
+  op: "Add"
+  input: "frame/frame_length"
+  input: "frame/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/sub_3"
+  op: "Sub"
+  input: "frame/add_1"
+  input: "frame/Reshape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/Maximum/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "frame/Maximum"
+  op: "Maximum"
+  input: "frame/Maximum/x"
+  input: "frame/sub_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/zeros/shape/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "frame/zeros/shape"
+  op: "Pack"
+  input: "frame/Size"
+  input: "frame/zeros/shape/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "frame/zeros/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "frame/zeros"
+  op: "Fill"
+  input: "frame/zeros/shape"
+  input: "frame/zeros/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "frame/zeros_1/shape/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "frame/zeros_1/shape"
+  op: "Pack"
+  input: "frame/Size_1"
+  input: "frame/zeros_1/shape/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "frame/zeros_1/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "frame/zeros_1"
+  op: "Fill"
+  input: "frame/zeros_1/shape"
+  input: "frame/zeros_1/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "frame/concat/values_1/0/0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "frame/concat/values_1/0"
+  op: "Pack"
+  input: "frame/concat/values_1/0/0"
+  input: "frame/Maximum"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "frame/concat/values_1"
+  op: "Pack"
+  input: "frame/concat/values_1/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "frame/concat/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "frame/concat"
+  op: "ConcatV2"
+  input: "frame/zeros"
+  input: "frame/concat/values_1"
+  input: "frame/zeros_1"
+  input: "frame/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/PadV2"
+  op: "PadV2"
+  input: "pre_emphasis/Squeeze"
+  input: "frame/concat"
+  input: "frame/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/Shape_1"
+  op: "Shape"
+  input: "frame/PadV2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "frame/add_2/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "frame/add_2"
+  op: "Add"
+  input: "frame/strided_slice"
+  input: "frame/add_2/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/strided_slice_1/stack"
+  op: "Pack"
+  input: "frame/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "frame/strided_slice_1/stack_1"
+  op: "Pack"
+  input: "frame/add_2"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "frame/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "frame/strided_slice_1"
+  op: "StridedSlice"
+  input: "frame/Shape_1"
+  input: "frame/strided_slice_1/stack"
+  input: "frame/strided_slice_1/stack_1"
+  input: "frame/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "frame/gcd/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 300
+      }
+    }
+  }
+}
+node {
+  name: "frame/floordiv_1"
+  op: "FloorDiv"
+  input: "frame/frame_length"
+  input: "frame/gcd/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/floordiv_2"
+  op: "FloorDiv"
+  input: "frame/frame_step"
+  input: "frame/gcd/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/floordiv_3"
+  op: "FloorDiv"
+  input: "frame/strided_slice_1"
+  input: "frame/gcd/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/mul_1"
+  op: "Mul"
+  input: "frame/floordiv_3"
+  input: "frame/gcd/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/concat_1/values_1"
+  op: "Pack"
+  input: "frame/mul_1"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "frame/concat_1/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "frame/concat_1"
+  op: "ConcatV2"
+  input: "frame/split"
+  input: "frame/concat_1/values_1"
+  input: "frame/split:2"
+  input: "frame/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/concat_2/values_1"
+  op: "Pack"
+  input: "frame/floordiv_3"
+  input: "frame/gcd/Const"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "frame/concat_2/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "frame/concat_2"
+  op: "ConcatV2"
+  input: "frame/split"
+  input: "frame/concat_2/values_1"
+  input: "frame/split:2"
+  input: "frame/concat_2/axis"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/zeros_like"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "frame/ones_like/Shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "frame/ones_like/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "frame/ones_like"
+  op: "Fill"
+  input: "frame/ones_like/Shape"
+  input: "frame/ones_like/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "frame/StridedSlice"
+  op: "StridedSlice"
+  input: "frame/PadV2"
+  input: "frame/zeros_like"
+  input: "frame/concat_1"
+  input: "frame/ones_like"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "frame/Reshape_1"
+  op: "Reshape"
+  input: "frame/StridedSlice"
+  input: "frame/concat_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/range_1/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "frame/range_1/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "frame/range_1"
+  op: "Range"
+  input: "frame/range_1/start"
+  input: "frame/Neg_1"
+  input: "frame/range_1/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/mul_2"
+  op: "Mul"
+  input: "frame/range_1"
+  input: "frame/floordiv_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/Reshape_2/shape/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "frame/Reshape_2/shape"
+  op: "Pack"
+  input: "frame/Neg_1"
+  input: "frame/Reshape_2/shape/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "frame/Reshape_2"
+  op: "Reshape"
+  input: "frame/mul_2"
+  input: "frame/Reshape_2/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/range_2/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "frame/range_2/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "frame/range_2"
+  op: "Range"
+  input: "frame/range_2/start"
+  input: "frame/floordiv_1"
+  input: "frame/range_2/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/Reshape_3/shape/0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "frame/Reshape_3/shape"
+  op: "Pack"
+  input: "frame/Reshape_3/shape/0"
+  input: "frame/floordiv_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "frame/Reshape_3"
+  op: "Reshape"
+  input: "frame/range_2"
+  input: "frame/Reshape_3/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/add_3"
+  op: "Add"
+  input: "frame/Reshape_2"
+  input: "frame/Reshape_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/GatherV2"
+  op: "GatherV2"
+  input: "frame/Reshape_1"
+  input: "frame/add_3"
+  input: "frame/strided_slice"
+  attr {
+    key: "Taxis"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "frame/concat_3/values_1"
+  op: "Pack"
+  input: "frame/Neg_1"
+  input: "frame/frame_length"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "frame/concat_3/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "frame/concat_3"
+  op: "ConcatV2"
+  input: "frame/split"
+  input: "frame/concat_3/values_1"
+  input: "frame/split:2"
+  input: "frame/concat_3/axis"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "frame/Reshape_4"
+  op: "Reshape"
+  input: "frame/GatherV2"
+  input: "frame/concat_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "Pad/paddings"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000\000\000\000\000\t\000\000\000\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "Pad"
+  op: "Pad"
+  input: "frame/Reshape_4"
+  input: "Pad/paddings"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "add_1/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2700
+      }
+    }
+  }
+}
+node {
+  name: "add_1"
+  op: "Add"
+  input: "waveform_length_batch_input"
+  input: "add_1/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Neg"
+  op: "Neg"
+  input: "add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "floordiv/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 300
+      }
+    }
+  }
+}
+node {
+  name: "floordiv"
+  op: "FloorDiv"
+  input: "Neg"
+  input: "floordiv/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Neg_1"
+  op: "Neg"
+  input: "floordiv"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "hw/window_length"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1200
+      }
+    }
+  }
+}
+node {
+  name: "hw/periodic"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_BOOL
+        tensor_shape {
+        }
+        bool_val: true
+      }
+    }
+  }
+}
+node {
+  name: "hw/Cast"
+  op: "Cast"
+  input: "hw/periodic"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "hw/FloorMod/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "hw/FloorMod"
+  op: "FloorMod"
+  input: "hw/window_length"
+  input: "hw/FloorMod/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "hw/sub/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "hw/sub"
+  op: "Sub"
+  input: "hw/sub/x"
+  input: "hw/FloorMod"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "hw/mul"
+  op: "Mul"
+  input: "hw/Cast"
+  input: "hw/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "hw/add"
+  op: "Add"
+  input: "hw/window_length"
+  input: "hw/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "hw/sub_1/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "hw/sub_1"
+  op: "Sub"
+  input: "hw/add"
+  input: "hw/sub_1/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "hw/Cast_1"
+  op: "Cast"
+  input: "hw/sub_1"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "hw/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "hw/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "hw/range"
+  op: "Range"
+  input: "hw/range/start"
+  input: "hw/window_length"
+  input: "hw/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "hw/Cast_2"
+  op: "Cast"
+  input: "hw/range"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "hw/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 6.28318548203
+      }
+    }
+  }
+}
+node {
+  name: "hw/mul_1"
+  op: "Mul"
+  input: "hw/Const"
+  input: "hw/Cast_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "hw/truediv"
+  op: "RealDiv"
+  input: "hw/mul_1"
+  input: "hw/Cast_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "hw/Cos"
+  op: "Cos"
+  input: "hw/truediv"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "hw/mul_2/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.5
+      }
+    }
+  }
+}
+node {
+  name: "hw/mul_2"
+  op: "Mul"
+  input: "hw/mul_2/x"
+  input: "hw/Cos"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "hw/sub_2/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.5
+      }
+    }
+  }
+}
+node {
+  name: "hw/sub_2"
+  op: "Sub"
+  input: "hw/sub_2/x"
+  input: "hw/mul_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "mul_1"
+  op: "Mul"
+  input: "Pad"
+  input: "hw/sub_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "Shape_10"
+  op: "Shape"
+  input: "Neg_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "Fill_2/value"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_BOOL
+        tensor_shape {
+        }
+        bool_val: false
+      }
+    }
+  }
+}
+node {
+  name: "Fill_2"
+  op: "Fill"
+  input: "Shape_10"
+  input: "Fill_2/value"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "LogicalNot"
+  op: "LogicalNot"
+  input: "Fill_2"
+
+}
+node {
+  name: "Const_10"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "All_1"
+  op: "All"
+  input: "LogicalNot"
+  input: "Const_10"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "Assert_1/AssertGuard/Switch"
+  op: "Switch"
+  input: "All_1"
+  input: "All_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "Assert_1/AssertGuard/switch_t"
+  op: "Identity"
+  input: "Assert_1/AssertGuard/Switch:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "Assert_1/AssertGuard/switch_f"
+  op: "Identity"
+  input: "Assert_1/AssertGuard/Switch"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "Assert_1/AssertGuard/pred_id"
+  op: "Identity"
+  input: "All_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "Assert_1/AssertGuard/NoOp"
+  op: "NoOp"
+  input: "^Assert_1/AssertGuard/switch_t"
+}
+node {
+  name: "Assert_1/AssertGuard/control_dependency"
+  op: "Identity"
+  input: "Assert_1/AssertGuard/switch_t"
+  input: "^Assert_1/AssertGuard/NoOp"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@Assert_1/AssertGuard/switch_t"
+      }
+    }
+  }
+
+}
+node {
+  name: "Assert_1/AssertGuard/Assert"
+  op: "Assert"
+  input: "Assert_1/AssertGuard/Assert/Switch"
+  input: "Assert_1/AssertGuard/Assert/Switch_1"
+  attr {
+    key: "T"
+    value {
+      list {
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    key: "summarize"
+    value {
+      i: 3
+    }
+  }
+}
+node {
+  name: "Assert_1/AssertGuard/Assert/Switch"
+  op: "Switch"
+  input: "All_1"
+  input: "Assert_1/AssertGuard/pred_id"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@All_1"
+      }
+    }
+  }
+
+}
+node {
+  name: "Assert_1/AssertGuard/Assert/Switch_1"
+  op: "Switch"
+  input: "waveform_batch_input"
+  input: "Assert_1/AssertGuard/pred_id"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@waveform_batch_input"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "Assert_1/AssertGuard/control_dependency_1"
+  op: "Identity"
+  input: "Assert_1/AssertGuard/switch_f"
+  input: "^Assert_1/AssertGuard/Assert"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@Assert_1/AssertGuard/switch_f"
+      }
+    }
+  }
+
+}
+node {
+  name: "Assert_1/AssertGuard/Merge"
+  op: "Merge"
+  input: "Assert_1/AssertGuard/control_dependency_1"
+  input: "Assert_1/AssertGuard/control_dependency"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "control_dependency_1"
+  op: "Identity"
+  input: "mul_1"
+  input: "^Assert_1/AssertGuard/Merge"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@mul_1"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "encoder_transcripts"
+  op: "Identity"
+  input: "Identity"
+  attr {
+    key: "T"
+    value {
+      type: DT_STRING
+    }
+  }
+
+}
+node {
+  name: "encoder_speaker_ids"
+  op: "Identity"
+  input: "speaker_lookup_table_Lookup"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "encoder_vuis"
+  op: "Identity"
+  input: "Fill"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "encoder_input"
+  op: "Identity"
+  input: "control_dependency"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "encoder_input_lengths"
+  op: "Identity"
+  input: "padding_map_fn/TensorArrayStack_2/TensorArrayGatherV3"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Shape_11"
+  op: "Shape"
+  input: "encoder_input"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "strided_slice_3/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice_3/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice_3/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice_3"
+  op: "StridedSlice"
+  input: "Shape_11"
+  input: "strided_slice_3/stack"
+  input: "strided_slice_3/stack_1"
+  input: "strided_slice_3/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_1/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_1/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_1/range"
+  op: "Range"
+  input: "sequence_length_mask_1/range/start"
+  input: "strided_slice_3"
+  input: "sequence_length_mask_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "sequence_length_mask_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_1/ExpandDims"
+  op: "ExpandDims"
+  input: "sequence_length_mask_1/range"
+  input: "sequence_length_mask_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "sequence_length_mask_1/Shape"
+  op: "Shape"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_1/strided_slice"
+  op: "StridedSlice"
+  input: "sequence_length_mask_1/Shape"
+  input: "sequence_length_mask_1/strided_slice/stack"
+  input: "sequence_length_mask_1/strided_slice/stack_1"
+  input: "sequence_length_mask_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_1/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_1/Tile/multiples"
+  op: "Pack"
+  input: "sequence_length_mask_1/strided_slice"
+  input: "sequence_length_mask_1/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_1/Tile"
+  op: "Tile"
+  input: "sequence_length_mask_1/ExpandDims"
+  input: "sequence_length_mask_1/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "sequence_length_mask_1/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "encoder_input_lengths"
+  input: "sequence_length_mask_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "sequence_length_mask_1/Less"
+  op: "Less"
+  input: "sequence_length_mask_1/Tile"
+  input: "sequence_length_mask_1/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "sequence_length_mask_1/Cast"
+  op: "Cast"
+  input: "sequence_length_mask_1/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "encoder_input_mask"
+  op: "Identity"
+  input: "sequence_length_mask_1/Cast"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "strided_slice_4/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice_4/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice_4/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice_4"
+  op: "StridedSlice"
+  input: "Fill_1"
+  input: "strided_slice_4/stack"
+  input: "strided_slice_4/stack_1"
+  input: "strided_slice_4/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "assert_equal_4/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 24000.0
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_4/Equal"
+  op: "Equal"
+  input: "assert_equal_4/x"
+  input: "strided_slice_4"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "assert_equal_4/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_4/All"
+  op: "All"
+  input: "assert_equal_4/Equal"
+  input: "assert_equal_4/Const"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "assert_equal_4/Assert/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "The provided sample_rate does not match output from reader."
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_4/Assert/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "Condition x == y did not hold element-wise:"
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_4/Assert/Const_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "x (assert_equal_4/x:0) = "
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_4/Assert/Const_3"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "y (strided_slice_4:0) = "
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_4/Assert/AssertGuard/Switch"
+  op: "Switch"
+  input: "assert_equal_4/All"
+  input: "assert_equal_4/All"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "assert_equal_4/Assert/AssertGuard/switch_t"
+  op: "Identity"
+  input: "assert_equal_4/Assert/AssertGuard/Switch:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "assert_equal_4/Assert/AssertGuard/switch_f"
+  op: "Identity"
+  input: "assert_equal_4/Assert/AssertGuard/Switch"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "assert_equal_4/Assert/AssertGuard/pred_id"
+  op: "Identity"
+  input: "assert_equal_4/All"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "assert_equal_4/Assert/AssertGuard/NoOp"
+  op: "NoOp"
+  input: "^assert_equal_4/Assert/AssertGuard/switch_t"
+}
+node {
+  name: "assert_equal_4/Assert/AssertGuard/control_dependency"
+  op: "Identity"
+  input: "assert_equal_4/Assert/AssertGuard/switch_t"
+  input: "^assert_equal_4/Assert/AssertGuard/NoOp"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@assert_equal_4/Assert/AssertGuard/switch_t"
+      }
+    }
+  }
+
+}
+node {
+  name: "assert_equal_4/Assert/AssertGuard/Assert/data_0"
+  op: "Const"
+  input: "^assert_equal_4/Assert/AssertGuard/switch_f"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "The provided sample_rate does not match output from reader."
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_4/Assert/AssertGuard/Assert/data_1"
+  op: "Const"
+  input: "^assert_equal_4/Assert/AssertGuard/switch_f"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "Condition x == y did not hold element-wise:"
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_4/Assert/AssertGuard/Assert/data_2"
+  op: "Const"
+  input: "^assert_equal_4/Assert/AssertGuard/switch_f"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "x (assert_equal_4/x:0) = "
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_4/Assert/AssertGuard/Assert/data_4"
+  op: "Const"
+  input: "^assert_equal_4/Assert/AssertGuard/switch_f"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "y (strided_slice_4:0) = "
+      }
+    }
+  }
+}
+node {
+  name: "assert_equal_4/Assert/AssertGuard/Assert"
+  op: "Assert"
+  input: "assert_equal_4/Assert/AssertGuard/Assert/Switch"
+  input: "assert_equal_4/Assert/AssertGuard/Assert/data_0"
+  input: "assert_equal_4/Assert/AssertGuard/Assert/data_1"
+  input: "assert_equal_4/Assert/AssertGuard/Assert/data_2"
+  input: "assert_equal_4/Assert/AssertGuard/Assert/Switch_1"
+  input: "assert_equal_4/Assert/AssertGuard/Assert/data_4"
+  input: "assert_equal_4/Assert/AssertGuard/Assert/Switch_2"
+  attr {
+    key: "T"
+    value {
+      list {
+        type: DT_STRING
+        type: DT_STRING
+        type: DT_STRING
+        type: DT_FLOAT
+        type: DT_STRING
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    key: "summarize"
+    value {
+      i: 3
+    }
+  }
+}
+node {
+  name: "assert_equal_4/Assert/AssertGuard/Assert/Switch"
+  op: "Switch"
+  input: "assert_equal_4/All"
+  input: "assert_equal_4/Assert/AssertGuard/pred_id"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@assert_equal_4/All"
+      }
+    }
+  }
+
+}
+node {
+  name: "assert_equal_4/Assert/AssertGuard/Assert/Switch_1"
+  op: "Switch"
+  input: "assert_equal_4/x"
+  input: "assert_equal_4/Assert/AssertGuard/pred_id"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@assert_equal_4/x"
+      }
+    }
+  }
+
+}
+node {
+  name: "assert_equal_4/Assert/AssertGuard/Assert/Switch_2"
+  op: "Switch"
+  input: "strided_slice_4"
+  input: "assert_equal_4/Assert/AssertGuard/pred_id"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@strided_slice_4"
+      }
+    }
+  }
+
+}
+node {
+  name: "assert_equal_4/Assert/AssertGuard/control_dependency_1"
+  op: "Identity"
+  input: "assert_equal_4/Assert/AssertGuard/switch_f"
+  input: "^assert_equal_4/Assert/AssertGuard/Assert"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@assert_equal_4/Assert/AssertGuard/switch_f"
+      }
+    }
+  }
+
+}
+node {
+  name: "assert_equal_4/Assert/AssertGuard/Merge"
+  op: "Merge"
+  input: "assert_equal_4/Assert/AssertGuard/control_dependency_1"
+  input: "assert_equal_4/Assert/AssertGuard/control_dependency"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "rfft/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2048
+      }
+    }
+  }
+}
+node {
+  name: "rfft/Pad/paddings"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000P\003\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "rfft/Pad"
+  op: "Pad"
+  input: "control_dependency_1"
+  input: "rfft/Pad/paddings"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 2048
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "rfft"
+  op: "RFFT"
+  input: "rfft/Pad"
+  input: "rfft/Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "Abs_1"
+  op: "ComplexAbs"
+  input: "rfft"
+  attr {
+    key: "T"
+    value {
+      type: DT_COMPLEX64
+    }
+  }
+  attr {
+    key: "Tout"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "Square"
+  op: "Square"
+  input: "Abs_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/sample_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_DOUBLE
+        tensor_shape {
+        }
+        double_val: 24000.0
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/lower_edge_hertz"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_DOUBLE
+        tensor_shape {
+        }
+        double_val: 80.0
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/upper_edge_hertz"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_DOUBLE
+        tensor_shape {
+        }
+        double_val: 12000.0
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_DOUBLE
+        tensor_shape {
+        }
+        double_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/truediv/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_DOUBLE
+        tensor_shape {
+        }
+        double_val: 2.0
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/truediv"
+  op: "RealDiv"
+  input: "linear_to_mel_weight_matrix/sample_rate"
+  input: "linear_to_mel_weight_matrix/truediv/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/LinSpace/num"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1025
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/LinSpace"
+  op: "LinSpace"
+  input: "linear_to_mel_weight_matrix/Const"
+  input: "linear_to_mel_weight_matrix/truediv"
+  input: "linear_to_mel_weight_matrix/LinSpace/num"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/strided_slice"
+  op: "StridedSlice"
+  input: "linear_to_mel_weight_matrix/LinSpace"
+  input: "linear_to_mel_weight_matrix/strided_slice/stack"
+  input: "linear_to_mel_weight_matrix/strided_slice/stack_1"
+  input: "linear_to_mel_weight_matrix/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/hertz_to_mel/truediv/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_DOUBLE
+        tensor_shape {
+        }
+        double_val: 700.0
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/hertz_to_mel/truediv"
+  op: "RealDiv"
+  input: "linear_to_mel_weight_matrix/strided_slice"
+  input: "linear_to_mel_weight_matrix/hertz_to_mel/truediv/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/hertz_to_mel/add/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_DOUBLE
+        tensor_shape {
+        }
+        double_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/hertz_to_mel/add"
+  op: "Add"
+  input: "linear_to_mel_weight_matrix/hertz_to_mel/add/x"
+  input: "linear_to_mel_weight_matrix/hertz_to_mel/truediv"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/hertz_to_mel/Log"
+  op: "Log"
+  input: "linear_to_mel_weight_matrix/hertz_to_mel/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/hertz_to_mel/mul/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_DOUBLE
+        tensor_shape {
+        }
+        double_val: 1127.0
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/hertz_to_mel/mul"
+  op: "Mul"
+  input: "linear_to_mel_weight_matrix/hertz_to_mel/mul/x"
+  input: "linear_to_mel_weight_matrix/hertz_to_mel/Log"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/ExpandDims"
+  op: "ExpandDims"
+  input: "linear_to_mel_weight_matrix/hertz_to_mel/mul"
+  input: "linear_to_mel_weight_matrix/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/hertz_to_mel_1/truediv/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_DOUBLE
+        tensor_shape {
+        }
+        double_val: 700.0
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/hertz_to_mel_1/truediv"
+  op: "RealDiv"
+  input: "linear_to_mel_weight_matrix/lower_edge_hertz"
+  input: "linear_to_mel_weight_matrix/hertz_to_mel_1/truediv/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/hertz_to_mel_1/add/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_DOUBLE
+        tensor_shape {
+        }
+        double_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/hertz_to_mel_1/add"
+  op: "Add"
+  input: "linear_to_mel_weight_matrix/hertz_to_mel_1/add/x"
+  input: "linear_to_mel_weight_matrix/hertz_to_mel_1/truediv"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/hertz_to_mel_1/Log"
+  op: "Log"
+  input: "linear_to_mel_weight_matrix/hertz_to_mel_1/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/hertz_to_mel_1/mul/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_DOUBLE
+        tensor_shape {
+        }
+        double_val: 1127.0
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/hertz_to_mel_1/mul"
+  op: "Mul"
+  input: "linear_to_mel_weight_matrix/hertz_to_mel_1/mul/x"
+  input: "linear_to_mel_weight_matrix/hertz_to_mel_1/Log"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/hertz_to_mel_2/truediv/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_DOUBLE
+        tensor_shape {
+        }
+        double_val: 700.0
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/hertz_to_mel_2/truediv"
+  op: "RealDiv"
+  input: "linear_to_mel_weight_matrix/upper_edge_hertz"
+  input: "linear_to_mel_weight_matrix/hertz_to_mel_2/truediv/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/hertz_to_mel_2/add/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_DOUBLE
+        tensor_shape {
+        }
+        double_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/hertz_to_mel_2/add"
+  op: "Add"
+  input: "linear_to_mel_weight_matrix/hertz_to_mel_2/add/x"
+  input: "linear_to_mel_weight_matrix/hertz_to_mel_2/truediv"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/hertz_to_mel_2/Log"
+  op: "Log"
+  input: "linear_to_mel_weight_matrix/hertz_to_mel_2/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/hertz_to_mel_2/mul/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_DOUBLE
+        tensor_shape {
+        }
+        double_val: 1127.0
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/hertz_to_mel_2/mul"
+  op: "Mul"
+  input: "linear_to_mel_weight_matrix/hertz_to_mel_2/mul/x"
+  input: "linear_to_mel_weight_matrix/hertz_to_mel_2/Log"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/LinSpace_1/num"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 82
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/LinSpace_1"
+  op: "LinSpace"
+  input: "linear_to_mel_weight_matrix/hertz_to_mel_1/mul"
+  input: "linear_to_mel_weight_matrix/hertz_to_mel_2/mul"
+  input: "linear_to_mel_weight_matrix/LinSpace_1/num"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 82
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/frame_length"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/frame_step"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/Rank"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/range"
+  op: "Range"
+  input: "linear_to_mel_weight_matrix/frame/range/start"
+  input: "linear_to_mel_weight_matrix/frame/Rank"
+  input: "linear_to_mel_weight_matrix/frame/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/add/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/add"
+  op: "Add"
+  input: "linear_to_mel_weight_matrix/frame/axis"
+  input: "linear_to_mel_weight_matrix/frame/add/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/strided_slice/stack"
+  op: "Pack"
+  input: "linear_to_mel_weight_matrix/frame/axis"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/strided_slice/stack_1"
+  op: "Pack"
+  input: "linear_to_mel_weight_matrix/frame/add"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/strided_slice"
+  op: "StridedSlice"
+  input: "linear_to_mel_weight_matrix/frame/range"
+  input: "linear_to_mel_weight_matrix/frame/strided_slice/stack"
+  input: "linear_to_mel_weight_matrix/frame/strided_slice/stack_1"
+  input: "linear_to_mel_weight_matrix/frame/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/Shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 82
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/sub/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/sub"
+  op: "Sub"
+  input: "linear_to_mel_weight_matrix/frame/Rank"
+  input: "linear_to_mel_weight_matrix/frame/sub/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/sub_1"
+  op: "Sub"
+  input: "linear_to_mel_weight_matrix/frame/sub"
+  input: "linear_to_mel_weight_matrix/frame/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/packed/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/packed"
+  op: "Pack"
+  input: "linear_to_mel_weight_matrix/frame/strided_slice"
+  input: "linear_to_mel_weight_matrix/frame/packed/1"
+  input: "linear_to_mel_weight_matrix/frame/sub_1"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/split/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/split"
+  op: "SplitV"
+  input: "linear_to_mel_weight_matrix/frame/Shape"
+  input: "linear_to_mel_weight_matrix/frame/packed"
+  input: "linear_to_mel_weight_matrix/frame/split/split_dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tlen"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "num_split"
+    value {
+      i: 3
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/Reshape/shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/Reshape"
+  op: "Reshape"
+  input: "linear_to_mel_weight_matrix/frame/split:1"
+  input: "linear_to_mel_weight_matrix/frame/Reshape/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/Size"
+  op: "Size"
+  input: "linear_to_mel_weight_matrix/frame/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/Size_1"
+  op: "Size"
+  input: "linear_to_mel_weight_matrix/frame/split:2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/sub_2"
+  op: "Sub"
+  input: "linear_to_mel_weight_matrix/frame/Reshape"
+  input: "linear_to_mel_weight_matrix/frame/frame_length"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/floordiv"
+  op: "FloorDiv"
+  input: "linear_to_mel_weight_matrix/frame/sub_2"
+  input: "linear_to_mel_weight_matrix/frame/frame_step"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/add_1/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/add_1"
+  op: "Add"
+  input: "linear_to_mel_weight_matrix/frame/add_1/x"
+  input: "linear_to_mel_weight_matrix/frame/floordiv"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/Maximum/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/Maximum"
+  op: "Maximum"
+  input: "linear_to_mel_weight_matrix/frame/Maximum/x"
+  input: "linear_to_mel_weight_matrix/frame/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/gcd/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/floordiv_1"
+  op: "FloorDiv"
+  input: "linear_to_mel_weight_matrix/frame/frame_length"
+  input: "linear_to_mel_weight_matrix/frame/gcd/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/floordiv_2"
+  op: "FloorDiv"
+  input: "linear_to_mel_weight_matrix/frame/frame_step"
+  input: "linear_to_mel_weight_matrix/frame/gcd/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/floordiv_3"
+  op: "FloorDiv"
+  input: "linear_to_mel_weight_matrix/frame/Reshape"
+  input: "linear_to_mel_weight_matrix/frame/gcd/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/mul"
+  op: "Mul"
+  input: "linear_to_mel_weight_matrix/frame/floordiv_3"
+  input: "linear_to_mel_weight_matrix/frame/gcd/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/concat/values_1"
+  op: "Pack"
+  input: "linear_to_mel_weight_matrix/frame/mul"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/concat/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/concat"
+  op: "ConcatV2"
+  input: "linear_to_mel_weight_matrix/frame/split"
+  input: "linear_to_mel_weight_matrix/frame/concat/values_1"
+  input: "linear_to_mel_weight_matrix/frame/split:2"
+  input: "linear_to_mel_weight_matrix/frame/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/concat_1/values_1"
+  op: "Pack"
+  input: "linear_to_mel_weight_matrix/frame/floordiv_3"
+  input: "linear_to_mel_weight_matrix/frame/gcd/Const"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/concat_1/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/concat_1"
+  op: "ConcatV2"
+  input: "linear_to_mel_weight_matrix/frame/split"
+  input: "linear_to_mel_weight_matrix/frame/concat_1/values_1"
+  input: "linear_to_mel_weight_matrix/frame/split:2"
+  input: "linear_to_mel_weight_matrix/frame/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/zeros_like"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/ones_like/Shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/ones_like/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/ones_like"
+  op: "Fill"
+  input: "linear_to_mel_weight_matrix/frame/ones_like/Shape"
+  input: "linear_to_mel_weight_matrix/frame/ones_like/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/StridedSlice"
+  op: "StridedSlice"
+  input: "linear_to_mel_weight_matrix/LinSpace_1"
+  input: "linear_to_mel_weight_matrix/frame/zeros_like"
+  input: "linear_to_mel_weight_matrix/frame/concat"
+  input: "linear_to_mel_weight_matrix/frame/ones_like"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/Reshape_1"
+  op: "Reshape"
+  input: "linear_to_mel_weight_matrix/frame/StridedSlice"
+  input: "linear_to_mel_weight_matrix/frame/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/range_1/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/range_1/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/range_1"
+  op: "Range"
+  input: "linear_to_mel_weight_matrix/frame/range_1/start"
+  input: "linear_to_mel_weight_matrix/frame/Maximum"
+  input: "linear_to_mel_weight_matrix/frame/range_1/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/mul_1"
+  op: "Mul"
+  input: "linear_to_mel_weight_matrix/frame/range_1"
+  input: "linear_to_mel_weight_matrix/frame/floordiv_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/Reshape_2/shape/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/Reshape_2/shape"
+  op: "Pack"
+  input: "linear_to_mel_weight_matrix/frame/Maximum"
+  input: "linear_to_mel_weight_matrix/frame/Reshape_2/shape/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/Reshape_2"
+  op: "Reshape"
+  input: "linear_to_mel_weight_matrix/frame/mul_1"
+  input: "linear_to_mel_weight_matrix/frame/Reshape_2/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/range_2/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/range_2/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/range_2"
+  op: "Range"
+  input: "linear_to_mel_weight_matrix/frame/range_2/start"
+  input: "linear_to_mel_weight_matrix/frame/floordiv_1"
+  input: "linear_to_mel_weight_matrix/frame/range_2/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/Reshape_3/shape/0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/Reshape_3/shape"
+  op: "Pack"
+  input: "linear_to_mel_weight_matrix/frame/Reshape_3/shape/0"
+  input: "linear_to_mel_weight_matrix/frame/floordiv_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/Reshape_3"
+  op: "Reshape"
+  input: "linear_to_mel_weight_matrix/frame/range_2"
+  input: "linear_to_mel_weight_matrix/frame/Reshape_3/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/add_2"
+  op: "Add"
+  input: "linear_to_mel_weight_matrix/frame/Reshape_2"
+  input: "linear_to_mel_weight_matrix/frame/Reshape_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/GatherV2"
+  op: "GatherV2"
+  input: "linear_to_mel_weight_matrix/frame/Reshape_1"
+  input: "linear_to_mel_weight_matrix/frame/add_2"
+  input: "linear_to_mel_weight_matrix/frame/strided_slice"
+  attr {
+    key: "Taxis"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/concat_2/values_1"
+  op: "Pack"
+  input: "linear_to_mel_weight_matrix/frame/Maximum"
+  input: "linear_to_mel_weight_matrix/frame/frame_length"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/concat_2/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/concat_2"
+  op: "ConcatV2"
+  input: "linear_to_mel_weight_matrix/frame/split"
+  input: "linear_to_mel_weight_matrix/frame/concat_2/values_1"
+  input: "linear_to_mel_weight_matrix/frame/split:2"
+  input: "linear_to_mel_weight_matrix/frame/concat_2/axis"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "linear_to_mel_weight_matrix/frame/Reshape_4"
+  op: "Reshape"
+  input: "linear_to_mel_weight_matrix/frame/GatherV2"
+  input: "linear_to_mel_weight_matrix/frame/concat_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 80
+          }
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/split/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/split"
+  op: "Split"
+  input: "linear_to_mel_weight_matrix/split/split_dim"
+  input: "linear_to_mel_weight_matrix/frame/Reshape_4"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 80
+          }
+          dim {
+            size: 1
+          }
+        }
+        shape {
+          dim {
+            size: 80
+          }
+          dim {
+            size: 1
+          }
+        }
+        shape {
+          dim {
+            size: 80
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 3
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/Reshape/shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000P\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/Reshape"
+  op: "Reshape"
+  input: "linear_to_mel_weight_matrix/split"
+  input: "linear_to_mel_weight_matrix/Reshape/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/Reshape_1/shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000P\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/Reshape_1"
+  op: "Reshape"
+  input: "linear_to_mel_weight_matrix/split:1"
+  input: "linear_to_mel_weight_matrix/Reshape_1/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/Reshape_2/shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000P\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/Reshape_2"
+  op: "Reshape"
+  input: "linear_to_mel_weight_matrix/split:2"
+  input: "linear_to_mel_weight_matrix/Reshape_2/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/sub"
+  op: "Sub"
+  input: "linear_to_mel_weight_matrix/ExpandDims"
+  input: "linear_to_mel_weight_matrix/Reshape"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/sub_1"
+  op: "Sub"
+  input: "linear_to_mel_weight_matrix/Reshape_1"
+  input: "linear_to_mel_weight_matrix/Reshape"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/truediv_1"
+  op: "RealDiv"
+  input: "linear_to_mel_weight_matrix/sub"
+  input: "linear_to_mel_weight_matrix/sub_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/sub_2"
+  op: "Sub"
+  input: "linear_to_mel_weight_matrix/Reshape_2"
+  input: "linear_to_mel_weight_matrix/ExpandDims"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/sub_3"
+  op: "Sub"
+  input: "linear_to_mel_weight_matrix/Reshape_2"
+  input: "linear_to_mel_weight_matrix/Reshape_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/truediv_2"
+  op: "RealDiv"
+  input: "linear_to_mel_weight_matrix/sub_2"
+  input: "linear_to_mel_weight_matrix/sub_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/Minimum"
+  op: "Minimum"
+  input: "linear_to_mel_weight_matrix/truediv_1"
+  input: "linear_to_mel_weight_matrix/truediv_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/Maximum"
+  op: "Maximum"
+  input: "linear_to_mel_weight_matrix/Const"
+  input: "linear_to_mel_weight_matrix/Minimum"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/Pad/paddings"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix/Pad"
+  op: "Pad"
+  input: "linear_to_mel_weight_matrix/Maximum"
+  input: "linear_to_mel_weight_matrix/Pad/paddings"
+  attr {
+    key: "T"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1025
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "linear_to_mel_weight_matrix"
+  op: "Cast"
+  input: "linear_to_mel_weight_matrix/Pad"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_DOUBLE
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1025
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/range/limit"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/range"
+  op: "Range"
+  input: "Tensordot/range/start"
+  input: "Tensordot/range/limit"
+  input: "Tensordot/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Tensordot/range_1/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/range_1/limit"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/range_1/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/range_1"
+  op: "Range"
+  input: "Tensordot/range_1/start"
+  input: "Tensordot/range_1/limit"
+  input: "Tensordot/range_1/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Tensordot/Shape"
+  op: "Shape"
+  input: "Square"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "Tensordot/Rank"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/GreaterEqual/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/GreaterEqual"
+  op: "GreaterEqual"
+  input: "Tensordot/range"
+  input: "Tensordot/GreaterEqual/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Tensordot/Cast"
+  op: "Cast"
+  input: "Tensordot/GreaterEqual"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "Tensordot/mul"
+  op: "Mul"
+  input: "Tensordot/Cast"
+  input: "Tensordot/range"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Tensordot/Less/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/Less"
+  op: "Less"
+  input: "Tensordot/range"
+  input: "Tensordot/Less/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Tensordot/Cast_1"
+  op: "Cast"
+  input: "Tensordot/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "Tensordot/add"
+  op: "Add"
+  input: "Tensordot/range"
+  input: "Tensordot/Rank"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Tensordot/mul_1"
+  op: "Mul"
+  input: "Tensordot/Cast_1"
+  input: "Tensordot/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Tensordot/add_1"
+  op: "Add"
+  input: "Tensordot/mul"
+  input: "Tensordot/mul_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Tensordot/range_2/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/range_2/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/range_2"
+  op: "Range"
+  input: "Tensordot/range_2/start"
+  input: "Tensordot/Rank"
+  input: "Tensordot/range_2/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/ListDiff"
+  op: "ListDiff"
+  input: "Tensordot/range_2"
+  input: "Tensordot/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_idx"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "Tensordot/Gather"
+  op: "Gather"
+  input: "Tensordot/Shape"
+  input: "Tensordot/ListDiff"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "Tensordot/Gather_1"
+  op: "Gather"
+  input: "Tensordot/Shape"
+  input: "Tensordot/add_1"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "Tensordot/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/Prod"
+  op: "Prod"
+  input: "Tensordot/Gather"
+  input: "Tensordot/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "Tensordot/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/Prod_1"
+  op: "Prod"
+  input: "Tensordot/Gather_1"
+  input: "Tensordot/Const_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "Tensordot/concat/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/concat"
+  op: "ConcatV2"
+  input: "Tensordot/Gather_1"
+  input: "Tensordot/Gather"
+  input: "Tensordot/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Tensordot/concat_1/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/concat_1"
+  op: "ConcatV2"
+  input: "Tensordot/ListDiff"
+  input: "Tensordot/add_1"
+  input: "Tensordot/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Tensordot/stack"
+  op: "Pack"
+  input: "Tensordot/Prod"
+  input: "Tensordot/Prod_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "Tensordot/transpose"
+  op: "Transpose"
+  input: "Square"
+  input: "Tensordot/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/Reshape"
+  op: "Reshape"
+  input: "Tensordot/transpose"
+  input: "Tensordot/stack"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Tensordot/Shape_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\004\000\000P\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/Rank_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/GreaterEqual_1/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/GreaterEqual_1"
+  op: "GreaterEqual"
+  input: "Tensordot/range_1"
+  input: "Tensordot/GreaterEqual_1/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Tensordot/Cast_2"
+  op: "Cast"
+  input: "Tensordot/GreaterEqual_1"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "Tensordot/mul_2"
+  op: "Mul"
+  input: "Tensordot/Cast_2"
+  input: "Tensordot/range_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Tensordot/Less_1/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/Less_1"
+  op: "Less"
+  input: "Tensordot/range_1"
+  input: "Tensordot/Less_1/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Tensordot/Cast_3"
+  op: "Cast"
+  input: "Tensordot/Less_1"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "Tensordot/add_2"
+  op: "Add"
+  input: "Tensordot/range_1"
+  input: "Tensordot/Rank_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Tensordot/mul_3"
+  op: "Mul"
+  input: "Tensordot/Cast_3"
+  input: "Tensordot/add_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Tensordot/add_3"
+  op: "Add"
+  input: "Tensordot/mul_2"
+  input: "Tensordot/mul_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Tensordot/range_3/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/range_3/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/range_3"
+  op: "Range"
+  input: "Tensordot/range_3/start"
+  input: "Tensordot/Rank_1"
+  input: "Tensordot/range_3/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Tensordot/ListDiff_1"
+  op: "ListDiff"
+  input: "Tensordot/range_3"
+  input: "Tensordot/add_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_idx"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "Tensordot/Gather_2"
+  op: "Gather"
+  input: "Tensordot/Shape_1"
+  input: "Tensordot/ListDiff_1"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "Tensordot/Gather_3"
+  op: "Gather"
+  input: "Tensordot/Shape_1"
+  input: "Tensordot/add_3"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "Tensordot/Const_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/Prod_2"
+  op: "Prod"
+  input: "Tensordot/Gather_2"
+  input: "Tensordot/Const_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "Tensordot/Const_3"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/Prod_3"
+  op: "Prod"
+  input: "Tensordot/Gather_3"
+  input: "Tensordot/Const_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "Tensordot/concat_2/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/concat_2"
+  op: "ConcatV2"
+  input: "Tensordot/Gather_3"
+  input: "Tensordot/Gather_2"
+  input: "Tensordot/concat_2/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Tensordot/concat_3/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/concat_3"
+  op: "ConcatV2"
+  input: "Tensordot/add_3"
+  input: "Tensordot/ListDiff_1"
+  input: "Tensordot/concat_3/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Tensordot/stack_1"
+  op: "Pack"
+  input: "Tensordot/Prod_3"
+  input: "Tensordot/Prod_2"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "Tensordot/transpose_1"
+  op: "Transpose"
+  input: "linear_to_mel_weight_matrix"
+  input: "Tensordot/concat_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Tensordot/Reshape_1"
+  op: "Reshape"
+  input: "Tensordot/transpose_1"
+  input: "Tensordot/stack_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Tensordot/MatMul"
+  op: "MatMul"
+  input: "Tensordot/Reshape"
+  input: "Tensordot/Reshape_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "Tensordot/concat_4/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "Tensordot/concat_4"
+  op: "ConcatV2"
+  input: "Tensordot/Gather"
+  input: "Tensordot/Gather_2"
+  input: "Tensordot/concat_4/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Tensordot"
+  op: "Reshape"
+  input: "Tensordot/MatMul"
+  input: "Tensordot/concat_4"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "pow/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.5
+      }
+    }
+  }
+}
+node {
+  name: "pow"
+  op: "Pow"
+  input: "Tensordot"
+  input: "pow/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "add_2/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 9.99999997475e-07
+      }
+    }
+  }
+}
+node {
+  name: "add_2"
+  op: "Add"
+  input: "pow"
+  input: "add_2/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "Log"
+  op: "Log"
+  input: "add_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "Maximum/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -8.0
+      }
+    }
+  }
+}
+node {
+  name: "Maximum"
+  op: "Maximum"
+  input: "Log"
+  input: "Maximum/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "control_dependency_2"
+  op: "Identity"
+  input: "Maximum"
+  input: "^assert_equal_4/Assert/AssertGuard/Merge"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@Maximum"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "Const_11"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "add_3/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 9.99999997475e-07
+      }
+    }
+  }
+}
+node {
+  name: "add_3"
+  op: "Add"
+  input: "Const_11"
+  input: "add_3/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "Log_1"
+  op: "Log"
+  input: "add_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "Maximum_1/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -8.0
+      }
+    }
+  }
+}
+node {
+  name: "Maximum_1"
+  op: "Maximum"
+  input: "Log_1"
+  input: "Maximum_1/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "decoder_target"
+  op: "Identity"
+  input: "control_dependency_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "decoder_target_lengths"
+  op: "Identity"
+  input: "Neg_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "strided_slice_5/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice_5/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice_5/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice_5"
+  op: "StridedSlice"
+  input: "Fill_1"
+  input: "strided_slice_5/stack"
+  input: "strided_slice_5/stack_1"
+  input: "strided_slice_5/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "Equal"
+  op: "Equal"
+  input: "Fill_1"
+  input: "strided_slice_5"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "Const_12"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "All_2"
+  op: "All"
+  input: "Equal"
+  input: "Const_12"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "Assert_2/AssertGuard/Switch"
+  op: "Switch"
+  input: "All_2"
+  input: "All_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "Assert_2/AssertGuard/switch_t"
+  op: "Identity"
+  input: "Assert_2/AssertGuard/Switch:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "Assert_2/AssertGuard/switch_f"
+  op: "Identity"
+  input: "Assert_2/AssertGuard/Switch"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "Assert_2/AssertGuard/pred_id"
+  op: "Identity"
+  input: "All_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "Assert_2/AssertGuard/NoOp"
+  op: "NoOp"
+  input: "^Assert_2/AssertGuard/switch_t"
+}
+node {
+  name: "Assert_2/AssertGuard/control_dependency"
+  op: "Identity"
+  input: "Assert_2/AssertGuard/switch_t"
+  input: "^Assert_2/AssertGuard/NoOp"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@Assert_2/AssertGuard/switch_t"
+      }
+    }
+  }
+
+}
+node {
+  name: "Assert_2/AssertGuard/Assert"
+  op: "Assert"
+  input: "Assert_2/AssertGuard/Assert/Switch"
+  input: "Assert_2/AssertGuard/Assert/Switch_1"
+  attr {
+    key: "T"
+    value {
+      list {
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    key: "summarize"
+    value {
+      i: 3
+    }
+  }
+}
+node {
+  name: "Assert_2/AssertGuard/Assert/Switch"
+  op: "Switch"
+  input: "All_2"
+  input: "Assert_2/AssertGuard/pred_id"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@All_2"
+      }
+    }
+  }
+
+}
+node {
+  name: "Assert_2/AssertGuard/Assert/Switch_1"
+  op: "Switch"
+  input: "Fill_1"
+  input: "Assert_2/AssertGuard/pred_id"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@Fill_1"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "Assert_2/AssertGuard/control_dependency_1"
+  op: "Identity"
+  input: "Assert_2/AssertGuard/switch_f"
+  input: "^Assert_2/AssertGuard/Assert"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@Assert_2/AssertGuard/switch_f"
+      }
+    }
+  }
+
+}
+node {
+  name: "Assert_2/AssertGuard/Merge"
+  op: "Merge"
+  input: "Assert_2/AssertGuard/control_dependency_1"
+  input: "Assert_2/AssertGuard/control_dependency"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "control_dependency_3"
+  op: "Identity"
+  input: "strided_slice_5"
+  input: "^Assert_2/AssertGuard/Merge"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@strided_slice_5"
+      }
+    }
+  }
+
+}
+node {
+  name: "decoder_target_sample_rate"
+  op: "Identity"
+  input: "control_dependency_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "Shape_12"
+  op: "Shape"
+  input: "decoder_target"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "strided_slice_6/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice_6/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice_6/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice_6"
+  op: "StridedSlice"
+  input: "Shape_12"
+  input: "strided_slice_6/stack"
+  input: "strided_slice_6/stack_1"
+  input: "strided_slice_6/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_2/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_2/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_2/range"
+  op: "Range"
+  input: "sequence_length_mask_2/range/start"
+  input: "strided_slice_6"
+  input: "sequence_length_mask_2/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "sequence_length_mask_2/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_2/ExpandDims"
+  op: "ExpandDims"
+  input: "sequence_length_mask_2/range"
+  input: "sequence_length_mask_2/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "sequence_length_mask_2/Shape"
+  op: "Shape"
+  input: "decoder_target_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_2/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_2/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_2/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_2/strided_slice"
+  op: "StridedSlice"
+  input: "sequence_length_mask_2/Shape"
+  input: "sequence_length_mask_2/strided_slice/stack"
+  input: "sequence_length_mask_2/strided_slice/stack_1"
+  input: "sequence_length_mask_2/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_2/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_2/Tile/multiples"
+  op: "Pack"
+  input: "sequence_length_mask_2/strided_slice"
+  input: "sequence_length_mask_2/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_2/Tile"
+  op: "Tile"
+  input: "sequence_length_mask_2/ExpandDims"
+  input: "sequence_length_mask_2/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "sequence_length_mask_2/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_2/ExpandDims_1"
+  op: "ExpandDims"
+  input: "decoder_target_lengths"
+  input: "sequence_length_mask_2/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "sequence_length_mask_2/Less"
+  op: "Less"
+  input: "sequence_length_mask_2/Tile"
+  input: "sequence_length_mask_2/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "sequence_length_mask_2/Cast"
+  op: "Cast"
+  input: "sequence_length_mask_2/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "decoder_target_mask"
+  op: "Identity"
+  input: "sequence_length_mask_2/Cast"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "max_decoder_output_length"
+  op: "Identity"
+  input: "decoder_output_length"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "rfft_1/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2048
+      }
+    }
+  }
+}
+node {
+  name: "rfft_1/Pad/paddings"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000P\003\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "rfft_1/Pad"
+  op: "Pad"
+  input: "control_dependency_1"
+  input: "rfft_1/Pad/paddings"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 2048
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "rfft_1"
+  op: "RFFT"
+  input: "rfft_1/Pad"
+  input: "rfft_1/Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "Abs_2"
+  op: "ComplexAbs"
+  input: "rfft_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_COMPLEX64
+    }
+  }
+  attr {
+    key: "Tout"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "Square_1"
+  op: "Square"
+  input: "Abs_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "pow_1/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.5
+      }
+    }
+  }
+}
+node {
+  name: "pow_1"
+  op: "Pow"
+  input: "Square_1"
+  input: "pow_1/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "add_4/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 9.99999997475e-07
+      }
+    }
+  }
+}
+node {
+  name: "add_4"
+  op: "Add"
+  input: "pow_1"
+  input: "add_4/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "Log_2"
+  op: "Log"
+  input: "add_4"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "Maximum_2/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -8.0
+      }
+    }
+  }
+}
+node {
+  name: "Maximum_2"
+  op: "Maximum"
+  input: "Log_2"
+  input: "Maximum_2/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "synthesis_target"
+  op: "Identity"
+  input: "Maximum_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "synthesis_target_lengths"
+  op: "Identity"
+  input: "Neg_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Shape_13"
+  op: "Shape"
+  input: "synthesis_target"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "strided_slice_7/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice_7/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice_7/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice_7"
+  op: "StridedSlice"
+  input: "Shape_13"
+  input: "strided_slice_7/stack"
+  input: "strided_slice_7/stack_1"
+  input: "strided_slice_7/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_3/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_3/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_3/range"
+  op: "Range"
+  input: "sequence_length_mask_3/range/start"
+  input: "strided_slice_7"
+  input: "sequence_length_mask_3/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "sequence_length_mask_3/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_3/ExpandDims"
+  op: "ExpandDims"
+  input: "sequence_length_mask_3/range"
+  input: "sequence_length_mask_3/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "sequence_length_mask_3/Shape"
+  op: "Shape"
+  input: "synthesis_target_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_3/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_3/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_3/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_3/strided_slice"
+  op: "StridedSlice"
+  input: "sequence_length_mask_3/Shape"
+  input: "sequence_length_mask_3/strided_slice/stack"
+  input: "sequence_length_mask_3/strided_slice/stack_1"
+  input: "sequence_length_mask_3/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_3/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_3/Tile/multiples"
+  op: "Pack"
+  input: "sequence_length_mask_3/strided_slice"
+  input: "sequence_length_mask_3/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_3/Tile"
+  op: "Tile"
+  input: "sequence_length_mask_3/ExpandDims"
+  input: "sequence_length_mask_3/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "sequence_length_mask_3/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "sequence_length_mask_3/ExpandDims_1"
+  op: "ExpandDims"
+  input: "synthesis_target_lengths"
+  input: "sequence_length_mask_3/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "sequence_length_mask_3/Less"
+  op: "Less"
+  input: "sequence_length_mask_3/Tile"
+  input: "sequence_length_mask_3/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "sequence_length_mask_3/Cast"
+  op: "Cast"
+  input: "sequence_length_mask_3/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "synthesis_target_mask"
+  op: "Identity"
+  input: "decoder_target_mask"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "decoder_input_sample_prob/tags"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "decoder_input_sample_prob"
+      }
+    }
+  }
+}
+node {
+  name: "decoder_input_sample_prob/values"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "decoder_input_sample_prob"
+  op: "ScalarSummary"
+  input: "decoder_input_sample_prob/tags"
+  input: "decoder_input_sample_prob/values"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/speaker_embedding/embedding/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/speaker_embedding/embedding"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: ")\000\000\000@\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/speaker_embedding/embedding/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/speaker_embedding/embedding"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.239045724273
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/speaker_embedding/embedding/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/speaker_embedding/embedding"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.239045724273
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/speaker_embedding/embedding/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/speaker_embedding/embedding/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/speaker_embedding/embedding"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 41
+          }
+          dim {
+            size: 64
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/speaker_embedding/embedding/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/speaker_embedding/embedding/Initializer/random_uniform/max"
+  input: "seq2seq/speaker_embedding/embedding/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/speaker_embedding/embedding"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/speaker_embedding/embedding/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/speaker_embedding/embedding/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/speaker_embedding/embedding/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/speaker_embedding/embedding"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 41
+          }
+          dim {
+            size: 64
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/speaker_embedding/embedding/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/speaker_embedding/embedding/Initializer/random_uniform/mul"
+  input: "seq2seq/speaker_embedding/embedding/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/speaker_embedding/embedding"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 41
+          }
+          dim {
+            size: 64
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/speaker_embedding/embedding"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/speaker_embedding/embedding"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 41
+          }
+          dim {
+            size: 64
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 41
+        }
+        dim {
+          size: 64
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/speaker_embedding/embedding/Assign"
+  op: "Assign"
+  input: "seq2seq/speaker_embedding/embedding"
+  input: "seq2seq/speaker_embedding/embedding/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/speaker_embedding/embedding"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 41
+          }
+          dim {
+            size: 64
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/speaker_embedding/embedding/read"
+  op: "Identity"
+  input: "seq2seq/speaker_embedding/embedding"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/speaker_embedding/embedding"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 41
+          }
+          dim {
+            size: 64
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/embedding_lookup"
+  op: "Gather"
+  input: "seq2seq/speaker_embedding/embedding/read"
+  input: "encoder_speaker_ids"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/speaker_embedding/embedding"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 64
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "speaker_embedding_lookup"
+  op: "PlaceholderWithDefault"
+  input: "seq2seq/embedding_lookup"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 64
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: -1
+        }
+        dim {
+          size: 64
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/Mean"
+  op: "Mean"
+  input: "seq2seq/Const"
+  input: "seq2seq/Const_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/embedding/embedding/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/embedding/embedding"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "8\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/embedding/embedding/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/embedding/embedding"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.138675048947
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/embedding/embedding/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/embedding/embedding"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.138675048947
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/embedding/embedding/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/embedding/embedding/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/embedding/embedding"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 56
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/embedding/embedding/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/embedding/embedding/Initializer/random_uniform/max"
+  input: "seq2seq/embedding/embedding/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/embedding/embedding"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/embedding/embedding/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/embedding/embedding/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/embedding/embedding/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/embedding/embedding"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 56
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/embedding/embedding/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/embedding/embedding/Initializer/random_uniform/mul"
+  input: "seq2seq/embedding/embedding/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/embedding/embedding"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 56
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/embedding/embedding"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/embedding/embedding"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 56
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 56
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/embedding/embedding/Assign"
+  op: "Assign"
+  input: "seq2seq/embedding/embedding"
+  input: "seq2seq/embedding/embedding/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/embedding/embedding"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 56
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/embedding/embedding/read"
+  op: "Identity"
+  input: "seq2seq/embedding/embedding"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/embedding/embedding"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 56
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/embedding/token_to_embedding"
+  op: "Gather"
+  input: "seq2seq/embedding/embedding/read"
+  input: "encoder_input"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/embedding/embedding"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\001\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.10825317353
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.10825317353
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/biases"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/biases/read"
+  op: "Identity"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq/embedding/token_to_embedding"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Rank"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/axes"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/GreaterEqual/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/GreaterEqual"
+  op: "GreaterEqual"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/GreaterEqual/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/GreaterEqual"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/axes"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Less/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Less/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Cast_1"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/add"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Rank"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Cast_1"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/mul"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/mul_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Rank"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/ListDiff"
+  op: "ListDiff"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/range"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_idx"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Gather"
+  op: "Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/ListDiff"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Gather_1"
+  op: "Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/add_1"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Prod"
+  op: "Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Prod_1"
+  op: "Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Gather_1"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Const_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/concat/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/concat"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Gather_1"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/concat_1/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/concat_1"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/ListDiff"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/add_1"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/stack"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Prod_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/transpose"
+  op: "Transpose"
+  input: "seq2seq/seq2seq/embedding/token_to_embedding"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Reshape"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/stack"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/transpose_1/perm"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/transpose_1"
+  op: "Transpose"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/weights/read"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/transpose_1/perm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Reshape_1/shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\001\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Reshape_1"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/transpose_1"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Reshape_1/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Reshape_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Const_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 256
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/concat_2/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/concat_2"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/Const_2"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/concat_2/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot/concat_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Tensordot"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/Dropout/Identity"
+  op: "Identity"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected/Relu"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\001\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.125
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.125
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/biases"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/biases/read"
+  op: "Identity"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/Dropout/Identity"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Rank"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/axes"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/GreaterEqual/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/GreaterEqual"
+  op: "GreaterEqual"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/GreaterEqual/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/GreaterEqual"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/axes"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Less/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Less/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Cast_1"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/add"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Rank"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Cast_1"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/mul"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/mul_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Rank"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/ListDiff"
+  op: "ListDiff"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/range"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_idx"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Gather"
+  op: "Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/ListDiff"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Gather_1"
+  op: "Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/add_1"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Prod"
+  op: "Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Prod_1"
+  op: "Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Gather_1"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Const_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/concat/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/concat"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Gather_1"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/concat_1/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/concat_1"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/ListDiff"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/add_1"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/stack"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Prod_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/transpose"
+  op: "Transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/Dropout/Identity"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Reshape"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/stack"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/transpose_1/perm"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/transpose_1"
+  op: "Transpose"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/weights/read"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/transpose_1/perm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Reshape_1/shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\001\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Reshape_1"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/transpose_1"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Reshape_1/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Reshape_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Const_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 128
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/concat_2/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/concat_2"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/Const_2"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/concat_2/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot/concat_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Tensordot"
+  input: "seq2seq/encoder/pre_enc_rnn_net/fully_connected_1/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/Dropout_1/Identity"
+  op: "Identity"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/fully_connected_1/Relu"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/pre_enc_rnn_net/Dropout_1/Identity"
+  input: "seq2seq/seq2seq/encoder/encoder/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/ExpandDims"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "encoder_input_lengths"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "encoder_input_lengths"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 4
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\001\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.013505294919
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.013505294919
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 1
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 128
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/Const"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/beta/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/beta"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/beta/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/beta"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/beta/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/beta/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/beta"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/moving_mean/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/moving_mean"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/moving_mean/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/moving_mean"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/moving_mean/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/moving_mean/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/moving_mean"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/moving_variance/Initializer/ones"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/moving_variance"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/moving_variance/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/moving_variance"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/moving_variance/Initializer/ones"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/moving_variance/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/moving_variance"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/FusedBatchNorm"
+  op: "FusedBatchNorm"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/Const"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/beta/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/moving_mean/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/moving_variance/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "epsilon"
+    value {
+      f: 0.0010000000475
+    }
+  }
+  attr {
+    key: "is_training"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0010000000475
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/BatchNorm/FusedBatchNorm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/Relu"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "encoder_input_lengths"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter0/Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 4
+          }
+        }
+        tensor_content: "\002\000\000\000\200\000\000\000\001\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.00954968575388
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.00954968575388
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 2
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 2
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 2
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 2
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 2
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 1
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 2
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 2
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 128
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/Const"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/beta/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/beta"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/beta/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/beta"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/beta/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/beta/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/beta"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/moving_mean/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/moving_mean"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/moving_mean/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/moving_mean"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/moving_mean/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/moving_mean/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/moving_mean"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/moving_variance/Initializer/ones"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/moving_variance"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/moving_variance/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/moving_variance"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/moving_variance/Initializer/ones"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/moving_variance/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/moving_variance"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/FusedBatchNorm"
+  op: "FusedBatchNorm"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/Const"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/beta/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/moving_mean/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/moving_variance/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "epsilon"
+    value {
+      f: 0.0010000000475
+    }
+  }
+  attr {
+    key: "is_training"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0010000000475
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/BatchNorm/FusedBatchNorm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_1"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/Relu"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_1/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/Shape"
+  op: "Shape"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "encoder_input_lengths"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_2/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_2"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_1/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_2/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_3/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_3"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_3/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter1/Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 4
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\001\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.00779728591442
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.00779728591442
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 1
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 128
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/Const"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/beta/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/beta"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/beta/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/beta"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/beta/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/beta/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/beta"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/moving_mean/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/moving_mean"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/moving_mean/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/moving_mean"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/moving_mean/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/moving_mean/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/moving_mean"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/moving_variance/Initializer/ones"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/moving_variance"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/moving_variance/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/moving_variance"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/moving_variance/Initializer/ones"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/moving_variance/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/moving_variance"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/FusedBatchNorm"
+  op: "FusedBatchNorm"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/Const"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/beta/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/moving_mean/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/moving_variance/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "epsilon"
+    value {
+      f: 0.0010000000475
+    }
+  }
+  attr {
+    key: "is_training"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0010000000475
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/BatchNorm/FusedBatchNorm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_2"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/Relu"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_2/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/Shape"
+  op: "Shape"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/ExpandDims_1"
+  op: "ExpandDims"
+  input: "encoder_input_lengths"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_4/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_4"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_2/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_4/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_5/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_5"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_4"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_5/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_2"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter2/Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_5"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 4
+          }
+        }
+        tensor_content: "\004\000\000\000\200\000\000\000\001\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.00675264745951
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.00675264745951
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 4
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 1
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 128
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/Const"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/beta/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/beta"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/beta/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/beta"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/beta/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/beta/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/beta"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/moving_mean/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/moving_mean"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/moving_mean/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/moving_mean"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/moving_mean/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/moving_mean/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/moving_mean"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/moving_variance/Initializer/ones"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/moving_variance"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/moving_variance/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/moving_variance"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/moving_variance/Initializer/ones"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/moving_variance/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/moving_variance"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/FusedBatchNorm"
+  op: "FusedBatchNorm"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/Const"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/beta/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/moving_mean/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/moving_variance/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "epsilon"
+    value {
+      f: 0.0010000000475
+    }
+  }
+  attr {
+    key: "is_training"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0010000000475
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/BatchNorm/FusedBatchNorm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_3"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/Relu"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_3/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_3/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_3/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_3"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_3/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_3/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_3/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/Shape"
+  op: "Shape"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/ExpandDims_1"
+  op: "ExpandDims"
+  input: "encoder_input_lengths"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_6/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_6"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_3/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_6/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_7/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_7"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_6"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_7/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_3"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter3/Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_7"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 4
+          }
+        }
+        tensor_content: "\005\000\000\000\200\000\000\000\001\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.00603975169361
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.00603975169361
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 5
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 5
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 5
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 5
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 5
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 1
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 5
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 5
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 128
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/Const"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/beta/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/beta"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/beta/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/beta"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/beta/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/beta/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/beta"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/moving_mean/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/moving_mean"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/moving_mean/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/moving_mean"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/moving_mean/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/moving_mean/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/moving_mean"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/moving_variance/Initializer/ones"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/moving_variance"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/moving_variance/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/moving_variance"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/moving_variance/Initializer/ones"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/moving_variance/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/moving_variance"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/FusedBatchNorm"
+  op: "FusedBatchNorm"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/Const"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/beta/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/moving_mean/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/moving_variance/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "epsilon"
+    value {
+      f: 0.0010000000475
+    }
+  }
+  attr {
+    key: "is_training"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0010000000475
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/BatchNorm/FusedBatchNorm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_4"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/Relu"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_4/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_4/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_4/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_4"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_4"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_4/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_4/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_4/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_4"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/Shape"
+  op: "Shape"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/ExpandDims_1"
+  op: "ExpandDims"
+  input: "encoder_input_lengths"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_8/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_8"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_4/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_8/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_9/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_9"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_8"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_9/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_4"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter4/Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_9"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 4
+          }
+        }
+        tensor_content: "\006\000\000\000\200\000\000\000\001\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.00551351346076
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.00551351346076
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 6
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 6
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 6
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 6
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 6
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 1
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 6
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 6
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 128
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/Const"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/beta/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/beta"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/beta/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/beta"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/beta/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/beta/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/beta"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/moving_mean/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/moving_mean"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/moving_mean/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/moving_mean"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/moving_mean/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/moving_mean/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/moving_mean"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/moving_variance/Initializer/ones"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/moving_variance"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/moving_variance/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/moving_variance"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/moving_variance/Initializer/ones"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/moving_variance/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/moving_variance"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/FusedBatchNorm"
+  op: "FusedBatchNorm"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/Const"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/beta/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/moving_mean/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/moving_variance/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "epsilon"
+    value {
+      f: 0.0010000000475
+    }
+  }
+  attr {
+    key: "is_training"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0010000000475
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/BatchNorm/FusedBatchNorm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_5"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/Relu"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_5/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_5/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_5/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_5"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_5"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_5/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_5/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_5/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_5"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/Shape"
+  op: "Shape"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/ExpandDims_1"
+  op: "ExpandDims"
+  input: "encoder_input_lengths"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_10/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_10"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_5/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_10/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_11/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_11"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_10"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_11/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_5"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter5/Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_11"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 4
+          }
+        }
+        tensor_content: "\007\000\000\000\200\000\000\000\001\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.00510452175513
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.00510452175513
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 7
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 7
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 7
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 7
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 7
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 1
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 7
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 7
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 128
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/Const"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/beta/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/beta"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/beta/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/beta"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/beta/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/beta/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/beta"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/moving_mean/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/moving_mean"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/moving_mean/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/moving_mean"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/moving_mean/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/moving_mean/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/moving_mean"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/moving_variance/Initializer/ones"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/moving_variance"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/moving_variance/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/moving_variance"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/moving_variance/Initializer/ones"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/moving_variance/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/moving_variance"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/FusedBatchNorm"
+  op: "FusedBatchNorm"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/Const"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/beta/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/moving_mean/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/moving_variance/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "epsilon"
+    value {
+      f: 0.0010000000475
+    }
+  }
+  attr {
+    key: "is_training"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0010000000475
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/BatchNorm/FusedBatchNorm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_6"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/Relu"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_6/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_6/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_6/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_6"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_6"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_6/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_6/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_6/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_6"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/Shape"
+  op: "Shape"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/ExpandDims_1"
+  op: "ExpandDims"
+  input: "encoder_input_lengths"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_12/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_12"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_6/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_12/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_13/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_13"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_12"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_13/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_6"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter6/Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_13"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 4
+          }
+        }
+        tensor_content: "\010\000\000\000\200\000\000\000\001\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.00477484287694
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.00477484287694
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 8
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 8
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 8
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 8
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 8
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 1
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 8
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 8
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 128
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/Const"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/beta/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/beta"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/beta/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/beta"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/beta/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/beta/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/beta"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/moving_mean/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/moving_mean"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/moving_mean/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/moving_mean"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/moving_mean/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/moving_mean/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/moving_mean"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/moving_variance/Initializer/ones"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/moving_variance"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/moving_variance/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/moving_variance"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/moving_variance/Initializer/ones"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/moving_variance/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/moving_variance"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/FusedBatchNorm"
+  op: "FusedBatchNorm"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/Const"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/beta/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/moving_mean/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/moving_variance/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "epsilon"
+    value {
+      f: 0.0010000000475
+    }
+  }
+  attr {
+    key: "is_training"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0010000000475
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/BatchNorm/FusedBatchNorm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_7"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/Relu"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_7/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_7/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_7/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_7"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_7"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_7/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_7/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_7/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_7"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/Shape"
+  op: "Shape"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/ExpandDims_1"
+  op: "ExpandDims"
+  input: "encoder_input_lengths"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_14/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_14"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_7/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_14/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_15/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_15"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_14"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_15/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_7"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter7/Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_15"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 4
+          }
+        }
+        tensor_content: "\t\000\000\000\200\000\000\000\001\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.00450176512823
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.00450176512823
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 9
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 9
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 9
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 9
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 9
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 1
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 9
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 9
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 128
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/Const"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/beta/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/beta"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/beta/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/beta"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/beta/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/beta/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/beta"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/moving_mean/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/moving_mean"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/moving_mean/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/moving_mean"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/moving_mean/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/moving_mean/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/moving_mean"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/moving_variance/Initializer/ones"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/moving_variance"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/moving_variance/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/moving_variance"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/moving_variance/Initializer/ones"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/moving_variance/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/moving_variance"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/FusedBatchNorm"
+  op: "FusedBatchNorm"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/Const"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/beta/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/moving_mean/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/moving_variance/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "epsilon"
+    value {
+      f: 0.0010000000475
+    }
+  }
+  attr {
+    key: "is_training"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0010000000475
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/BatchNorm/FusedBatchNorm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_8"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/Relu"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_8/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_8/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_8/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_8"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_8"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_8/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_8/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_8/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_8"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/Shape"
+  op: "Shape"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/ExpandDims_1"
+  op: "ExpandDims"
+  input: "encoder_input_lengths"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_16/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_16"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_8/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_16/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_17/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_17"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_16"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_17/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_8"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter8/Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_17"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 4
+          }
+        }
+        tensor_content: "\n\000\000\000\200\000\000\000\001\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.00427074916661
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.00427074916661
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 10
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 10
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 10
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 10
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 10
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 1
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 10
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 10
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 128
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/Const"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/beta/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/beta"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/beta/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/beta"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/beta/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/beta/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/beta"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/moving_mean/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/moving_mean"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/moving_mean/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/moving_mean"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/moving_mean/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/moving_mean/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/moving_mean"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/moving_variance/Initializer/ones"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/moving_variance"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/moving_variance/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/moving_variance"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/moving_variance/Initializer/ones"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/moving_variance/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/moving_variance"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/FusedBatchNorm"
+  op: "FusedBatchNorm"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/Const"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/beta/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/moving_mean/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/moving_variance/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "epsilon"
+    value {
+      f: 0.0010000000475
+    }
+  }
+  attr {
+    key: "is_training"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0010000000475
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/BatchNorm/FusedBatchNorm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_9"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/Relu"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_9/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_9/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_9/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_9"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_9"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_9/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_9/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_9/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_9"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/Shape"
+  op: "Shape"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/ExpandDims_1"
+  op: "ExpandDims"
+  input: "encoder_input_lengths"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_18/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_18"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_9/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_18/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_19/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_19"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_18"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_19/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_9"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter9/Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_19"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 4
+          }
+        }
+        tensor_content: "\013\000\000\000\200\000\000\000\001\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.00407199980691
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.00407199980691
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 11
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 11
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 11
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 11
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 11
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 1
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 11
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 11
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 128
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/Const"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/beta/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/beta"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/beta/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/beta"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/beta/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/beta/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/beta"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/moving_mean/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/moving_mean"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/moving_mean/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/moving_mean"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/moving_mean/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/moving_mean/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/moving_mean"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/moving_variance/Initializer/ones"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/moving_variance"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/moving_variance/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/moving_variance"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/moving_variance/Initializer/ones"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/moving_variance/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/moving_variance"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/FusedBatchNorm"
+  op: "FusedBatchNorm"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/Const"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/beta/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/moving_mean/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/moving_variance/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "epsilon"
+    value {
+      f: 0.0010000000475
+    }
+  }
+  attr {
+    key: "is_training"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0010000000475
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/BatchNorm/FusedBatchNorm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_10"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/Relu"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_10/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_10/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_10/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_10"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_10"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_10/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_10/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_10/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_10"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/Shape"
+  op: "Shape"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/ExpandDims_1"
+  op: "ExpandDims"
+  input: "encoder_input_lengths"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_20/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_20"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_10/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_20/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_21/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_21"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_20"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_21/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_10"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter10/Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_21"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 4
+          }
+        }
+        tensor_content: "\014\000\000\000\200\000\000\000\001\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.00389864295721
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.00389864295721
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 12
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 12
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 12
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 12
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 12
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 1
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 12
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 12
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 128
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/Const"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/beta/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/beta"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/beta/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/beta"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/beta/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/beta/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/beta"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/moving_mean/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/moving_mean"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/moving_mean/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/moving_mean"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/moving_mean/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/moving_mean/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/moving_mean"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/moving_variance/Initializer/ones"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/moving_variance"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/moving_variance/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/moving_variance"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/moving_variance/Initializer/ones"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/moving_variance/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/moving_variance"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/FusedBatchNorm"
+  op: "FusedBatchNorm"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/Const"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/beta/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/moving_mean/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/moving_variance/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "epsilon"
+    value {
+      f: 0.0010000000475
+    }
+  }
+  attr {
+    key: "is_training"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0010000000475
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/BatchNorm/FusedBatchNorm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_11"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/Relu"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_11/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_11/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_11/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_11"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_11"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_11/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_11/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_11/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_11"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/Shape"
+  op: "Shape"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/ExpandDims_1"
+  op: "ExpandDims"
+  input: "encoder_input_lengths"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_22/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_22"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_11/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_22/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_23/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_23"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_22"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_23/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_11"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter11/Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_23"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 4
+          }
+        }
+        tensor_content: "\r\000\000\000\200\000\000\000\001\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.00374569487758
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.00374569487758
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 13
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 13
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 13
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 13
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 13
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 1
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 13
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 13
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 128
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/Const"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/beta/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/beta"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/beta/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/beta"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/beta/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/beta/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/beta"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/moving_mean/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/moving_mean"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/moving_mean/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/moving_mean"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/moving_mean/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/moving_mean/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/moving_mean"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/moving_variance/Initializer/ones"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/moving_variance"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/moving_variance/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/moving_variance"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/moving_variance/Initializer/ones"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/moving_variance/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/moving_variance"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/FusedBatchNorm"
+  op: "FusedBatchNorm"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/Const"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/beta/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/moving_mean/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/moving_variance/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "epsilon"
+    value {
+      f: 0.0010000000475
+    }
+  }
+  attr {
+    key: "is_training"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0010000000475
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/BatchNorm/FusedBatchNorm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_12"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/Relu"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_12/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_12/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_12/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_12"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_12"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_12/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_12/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_12/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_12"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/Shape"
+  op: "Shape"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/ExpandDims_1"
+  op: "ExpandDims"
+  input: "encoder_input_lengths"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_24/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_24"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_12/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_24/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_25/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_25"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_24"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_25/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_12"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter12/Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_25"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 4
+          }
+        }
+        tensor_content: "\016\000\000\000\200\000\000\000\001\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.00360944191925
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.00360944191925
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 14
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 14
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 14
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 14
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 14
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 1
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 14
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 14
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 128
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/Const"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/beta/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/beta"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/beta/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/beta"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/beta/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/beta/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/beta"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/moving_mean/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/moving_mean"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/moving_mean/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/moving_mean"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/moving_mean/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/moving_mean/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/moving_mean"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/moving_variance/Initializer/ones"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/moving_variance"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/moving_variance/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/moving_variance"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/moving_variance/Initializer/ones"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/moving_variance/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/moving_variance"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/FusedBatchNorm"
+  op: "FusedBatchNorm"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/Const"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/beta/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/moving_mean/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/moving_variance/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "epsilon"
+    value {
+      f: 0.0010000000475
+    }
+  }
+  attr {
+    key: "is_training"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0010000000475
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/BatchNorm/FusedBatchNorm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_13"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/Relu"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_13/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_13/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_13/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_13"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_13"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_13/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_13/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_13/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_13"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/Shape"
+  op: "Shape"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/ExpandDims_1"
+  op: "ExpandDims"
+  input: "encoder_input_lengths"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_26/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_26"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_13/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_26/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_27/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_27"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_26"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_27/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_13"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter13/Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_27"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 4
+          }
+        }
+        tensor_content: "\017\000\000\000\200\000\000\000\001\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.00348705216311
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.00348705216311
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 15
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 15
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 15
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 15
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 15
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 1
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 15
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 15
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 128
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/Const"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/beta/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/beta"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/beta/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/beta"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/beta/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/beta/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/beta"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/moving_mean/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/moving_mean"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/moving_mean/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/moving_mean"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/moving_mean/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/moving_mean/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/moving_mean"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/moving_variance/Initializer/ones"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/moving_variance"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/moving_variance/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/moving_variance"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/moving_variance/Initializer/ones"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/moving_variance/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/moving_variance"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/FusedBatchNorm"
+  op: "FusedBatchNorm"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/Const"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/beta/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/moving_mean/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/moving_variance/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "epsilon"
+    value {
+      f: 0.0010000000475
+    }
+  }
+  attr {
+    key: "is_training"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0010000000475
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/BatchNorm/FusedBatchNorm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_14"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/Relu"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_14/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_14/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_14/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_14"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_14"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_14/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_14/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_14/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_14"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/Shape"
+  op: "Shape"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/ExpandDims_1"
+  op: "ExpandDims"
+  input: "encoder_input_lengths"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_28/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_28"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_14/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_28/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_29/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_29"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_28"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_29/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_14"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter14/Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_29"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 4
+          }
+        }
+        tensor_content: "\020\000\000\000\200\000\000\000\001\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.00337632372975
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.00337632372975
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 16
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 16
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 16
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 16
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 16
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 1
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 16
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 16
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 128
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/Const"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/beta/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/beta"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/beta/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/beta"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/beta/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/beta/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/beta"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/moving_mean/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/moving_mean"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/moving_mean/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/moving_mean"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/moving_mean/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/moving_mean/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/moving_mean"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/moving_variance/Initializer/ones"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/moving_variance"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/moving_variance/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/moving_variance"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/moving_variance/Initializer/ones"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/moving_variance/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/moving_variance"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/FusedBatchNorm"
+  op: "FusedBatchNorm"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/Const"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/beta/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/moving_mean/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/moving_variance/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "epsilon"
+    value {
+      f: 0.0010000000475
+    }
+  }
+  attr {
+    key: "is_training"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0010000000475
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/BatchNorm/FusedBatchNorm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_15"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/Relu"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_15/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_15/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_15/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_15"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/Shape_15"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_15/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_15/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_15/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/strided_slice_15"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/Shape"
+  op: "Shape"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/ExpandDims_1"
+  op: "ExpandDims"
+  input: "encoder_input_lengths"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_30/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_30"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/sequence_length_mask_15/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_30/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_31/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_31"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_30"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_31/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_15"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/conv1d_filter15/Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/ExpandDims_31"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/concat/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/concat"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_4"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_5"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_6"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_7"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_8"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_9"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_10"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_11"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_12"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_13"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_14"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/mul_15"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 16
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 16
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/maxpool/MaxPool"
+  op: "MaxPool"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/conv1d_bank/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 16
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "ksize"
+    value {
+      list {
+        i: 1
+        i: 2
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/Shape_1"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/maxpool/MaxPool"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/Shape_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice_1/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/Shape"
+  op: "Shape"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "encoder_input_lengths"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_2/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_2"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_1/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_2/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_3/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_3"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_3/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/maxpool/MaxPool"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 16
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 4
+          }
+        }
+        tensor_content: "\003\000\000\000\020\000\000\000\000\001\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.015625
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.015625
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 16
+          }
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 16
+          }
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 16
+          }
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 16
+          }
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+        dim {
+          size: 16
+        }
+        dim {
+          size: 256
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 16
+          }
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 16
+          }
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/mul_1"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 16
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/Const"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/beta/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/beta"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/beta/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/beta"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/beta/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/beta/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/beta"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/moving_mean/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/moving_mean"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/moving_mean/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/moving_mean"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/moving_mean/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/moving_mean/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/moving_mean"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/moving_variance/Initializer/ones"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/moving_variance"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/moving_variance/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/moving_variance"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/moving_variance/Initializer/ones"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/moving_variance/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/moving_variance"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/FusedBatchNorm"
+  op: "FusedBatchNorm"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/Const"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/beta/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/moving_mean/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/moving_variance/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "epsilon"
+    value {
+      f: 0.0010000000475
+    }
+  }
+  attr {
+    key: "is_training"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0010000000475
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/BatchNorm/FusedBatchNorm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/Shape_2"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/Relu"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/Shape_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice_2/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/Shape"
+  op: "Shape"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/ExpandDims_1"
+  op: "ExpandDims"
+  input: "encoder_input_lengths"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_4/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_4"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_2/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_4/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_5/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_5"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_4"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_5/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/mul_2"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/fixed_conv1/Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_5"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 4
+          }
+        }
+        tensor_content: "\003\000\000\000\001\000\000\000\000\001\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+        dim {
+          size: 1
+        }
+        dim {
+          size: 256
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/mul_2"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/Const"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/beta/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/beta"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/beta/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/beta"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/beta/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/beta/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/beta"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/beta"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/moving_mean/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/moving_mean"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/moving_mean/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/moving_mean"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/moving_mean/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/moving_mean/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/moving_mean"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/moving_mean"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/moving_variance/Initializer/ones"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/moving_variance"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/moving_variance/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/moving_variance"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/moving_variance/Initializer/ones"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/moving_variance/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/moving_variance"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/moving_variance"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/FusedBatchNorm"
+  op: "FusedBatchNorm"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/Conv2D"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/Const"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/beta/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/moving_mean/read"
+  input: "seq2seq/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/moving_variance/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "epsilon"
+    value {
+      f: 0.0010000000475
+    }
+  }
+  attr {
+    key: "is_training"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0010000000475
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/Shape_3"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/FusedBatchNorm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice_3/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice_3/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice_3/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice_3"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/Shape_3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice_3/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice_3/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice_3/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/strided_slice_3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/Shape"
+  op: "Shape"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/ExpandDims_1"
+  op: "ExpandDims"
+  input: "encoder_input_lengths"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_6/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_6"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/sequence_length_mask_3/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_6/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_7/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_7"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_6"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_7/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/mul_3"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/fixed_conv2/BatchNorm/FusedBatchNorm"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/ExpandDims_7"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 3
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/Squeeze_1"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/mul_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/add"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/Squeeze"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/Squeeze_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "@\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.176776692271
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.176776692271
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 64
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights"
+  input: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/biases"
+  input: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/biases/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/MatMul"
+  op: "MatMul"
+  input: "speaker_embedding_lookup"
+  input: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/MatMul"
+  input: "seq2seq/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/Dropout/Identity"
+  op: "Identity"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/fully_connected/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/combination_0/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/Dropout/Identity"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/combination_0/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/combination_0/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/combination_0/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/combination_0/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/combination_0/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/combination_0/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/combination_0/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/combination_0/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/combination_0/Reshape/shape/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/combination_0/Reshape/shape/2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 128
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/combination_0/Reshape/shape"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/combination_0/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/combination_0/Reshape/shape/1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/combination_0/Reshape/shape/2"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/combination_0/Reshape"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/projection_0/Dropout/Identity"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/combination_0/Reshape/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/combination_0/add"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/conv1d_maxpool_residual/Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/combination_0/Reshape"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights"
+  input: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/linear_proj/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/linear_proj/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/biases"
+  input: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/linear_proj/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/biases/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/linear_proj/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/combination_0/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Rank"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/axes"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/GreaterEqual/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/GreaterEqual"
+  op: "GreaterEqual"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/GreaterEqual/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/GreaterEqual"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/axes"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Less/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Less/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Cast_1"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/add"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Rank"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Cast_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/mul_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Rank"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/ListDiff"
+  op: "ListDiff"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_idx"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Gather"
+  op: "Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/ListDiff"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Gather_1"
+  op: "Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/add_1"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Prod"
+  op: "Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Prod_1"
+  op: "Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Gather_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Const_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/concat/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/concat"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Gather_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/concat_1/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/concat_1"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/ListDiff"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/add_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/stack"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Prod_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/transpose"
+  op: "Transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/speaker_conditioning/cbhg_pre_highway/combination_0/add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Reshape"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/stack"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/transpose_1/perm"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/transpose_1"
+  op: "Transpose"
+  input: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/weights/read"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/transpose_1/perm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Reshape_1/shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Reshape_1"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/transpose_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Reshape_1/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Reshape_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Const_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 128
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/concat_2/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/concat_2"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/Const_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/concat_2/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot/concat_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/Tensordot"
+  input: "seq2seq/encoder/cbhg/hw_mlp/linear_proj/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate0/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate0/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate0/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate0/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate0/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate0/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate0/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate0/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate0/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate0/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate0/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate0/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate0/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate0/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate0/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate0/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate0/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate0/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate0/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate0/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate0/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate0/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate0/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate0/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate0/weights"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate0/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate0/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate0/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate0/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate0/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate0/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate0/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate0/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate0/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate0/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate0/biases"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate0/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate0/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate0/biases/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate0/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate0/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Rank"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/axes"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/GreaterEqual/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/GreaterEqual"
+  op: "GreaterEqual"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/GreaterEqual/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/GreaterEqual"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/axes"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Less/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Less/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Cast_1"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/add"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Rank"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Cast_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/mul_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Rank"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/ListDiff"
+  op: "ListDiff"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_idx"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Gather"
+  op: "Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/ListDiff"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Gather_1"
+  op: "Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/add_1"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Prod"
+  op: "Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Prod_1"
+  op: "Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Gather_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Const_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/concat/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/concat"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Gather_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/concat_1/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/concat_1"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/ListDiff"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/add_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/stack"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Prod_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/transpose"
+  op: "Transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/BiasAdd"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Reshape"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/stack"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/transpose_1/perm"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/transpose_1"
+  op: "Transpose"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate0/weights/read"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/transpose_1/perm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Reshape_1/shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Reshape_1"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/transpose_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Reshape_1/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Reshape_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Const_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 128
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/concat_2/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/concat_2"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/Const_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/concat_2/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot/concat_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Tensordot"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate0/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden0/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden0/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden0/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden0/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden0/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden0/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden0/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden0/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden0/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden0/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden0/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden0/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden0/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden0/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden0/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden0/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden0/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden0/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden0/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden0/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden0/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden0/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden0/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden0/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden0/weights"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden0/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden0/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden0/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden0/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden0/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden0/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden0/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden0/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden0/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden0/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden0/biases"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden0/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden0/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden0/biases/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden0/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden0/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Rank"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/axes"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/GreaterEqual/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/GreaterEqual"
+  op: "GreaterEqual"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/GreaterEqual/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/GreaterEqual"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/axes"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Less/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Less/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Cast_1"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/add"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Rank"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Cast_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/mul_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Rank"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/ListDiff"
+  op: "ListDiff"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_idx"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Gather"
+  op: "Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/ListDiff"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Gather_1"
+  op: "Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/add_1"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Prod"
+  op: "Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Prod_1"
+  op: "Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Gather_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Const_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/concat/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/concat"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Gather_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/concat_1/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/concat_1"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/ListDiff"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/add_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/stack"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Prod_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/transpose"
+  op: "Transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/BiasAdd"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Reshape"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/stack"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/transpose_1/perm"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/transpose_1"
+  op: "Transpose"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden0/weights/read"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/transpose_1/perm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Reshape_1/shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Reshape_1"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/transpose_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Reshape_1/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Reshape_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Const_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 128
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/concat_2/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/concat_2"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/Const_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/concat_2/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot/concat_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Tensordot"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden0/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Sigmoid"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden0/Relu"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/sub/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/sub/x"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate0/Sigmoid"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/sub"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/linear_proj/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/add"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/mul_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate1/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate1/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate1/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate1/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate1/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate1/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate1/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate1/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate1/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate1/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate1/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate1/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate1/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate1/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate1/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate1/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate1/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate1/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate1/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate1/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate1/weights"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate1/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate1/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate1/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate1/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate1/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate1/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate1/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate1/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate1/biases"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate1/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate1/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate1/biases/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate1/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate1/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Rank"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/axes"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/GreaterEqual/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/GreaterEqual"
+  op: "GreaterEqual"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/GreaterEqual/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/GreaterEqual"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/axes"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Less/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Less/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Cast_1"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/add"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Rank"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Cast_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/mul_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Rank"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/ListDiff"
+  op: "ListDiff"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_idx"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Gather"
+  op: "Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/ListDiff"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Gather_1"
+  op: "Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/add_1"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Prod"
+  op: "Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Prod_1"
+  op: "Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Gather_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Const_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/concat/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/concat"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Gather_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/concat_1/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/concat_1"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/ListDiff"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/add_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/stack"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Prod_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/transpose"
+  op: "Transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Reshape"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/stack"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/transpose_1/perm"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/transpose_1"
+  op: "Transpose"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate1/weights/read"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/transpose_1/perm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Reshape_1/shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Reshape_1"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/transpose_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Reshape_1/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Reshape_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Const_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 128
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/concat_2/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/concat_2"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/Const_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/concat_2/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot/concat_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Tensordot"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate1/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden1/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden1/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden1/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden1/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden1/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden1/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden1/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden1/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden1/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden1/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden1/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden1/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden1/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden1/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden1/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden1/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden1/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden1/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden1/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden1/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden1/weights"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden1/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden1/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden1/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden1/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden1/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden1/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden1/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden1/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden1/biases"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden1/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden1/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden1/biases/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden1/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden1/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Rank"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/axes"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/GreaterEqual/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/GreaterEqual"
+  op: "GreaterEqual"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/GreaterEqual/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/GreaterEqual"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/axes"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Less/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Less/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Cast_1"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/add"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Rank"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Cast_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/mul_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Rank"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/ListDiff"
+  op: "ListDiff"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_idx"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Gather"
+  op: "Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/ListDiff"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Gather_1"
+  op: "Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/add_1"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Prod"
+  op: "Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Prod_1"
+  op: "Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Gather_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Const_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/concat/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/concat"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Gather_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/concat_1/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/concat_1"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/ListDiff"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/add_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/stack"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Prod_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/transpose"
+  op: "Transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Reshape"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/stack"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/transpose_1/perm"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/transpose_1"
+  op: "Transpose"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden1/weights/read"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/transpose_1/perm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Reshape_1/shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Reshape_1"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/transpose_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Reshape_1/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Reshape_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Const_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 128
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/concat_2/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/concat_2"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/Const_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/concat_2/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot/concat_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Tensordot"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden1/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/mul_2"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Sigmoid"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden1/Relu"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/sub_1/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/sub_1"
+  op: "Sub"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/sub_1/x"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate1/Sigmoid"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/mul_3"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/sub_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/mul_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/mul_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate2/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate2/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate2/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate2/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate2/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate2/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate2/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate2/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate2/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate2/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate2/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate2/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate2/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate2/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate2/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate2/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate2/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate2/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate2/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate2/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate2/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate2/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate2/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate2/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate2/weights"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate2/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate2/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate2/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate2/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate2/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate2/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate2/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate2/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate2/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate2/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate2/biases"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate2/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate2/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate2/biases/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate2/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate2/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Rank"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/axes"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/GreaterEqual/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/GreaterEqual"
+  op: "GreaterEqual"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/GreaterEqual/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/GreaterEqual"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/axes"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Less/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Less/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Cast_1"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/add"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Rank"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Cast_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/mul_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Rank"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/ListDiff"
+  op: "ListDiff"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_idx"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Gather"
+  op: "Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/ListDiff"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Gather_1"
+  op: "Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/add_1"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Prod"
+  op: "Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Prod_1"
+  op: "Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Gather_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Const_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/concat/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/concat"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Gather_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/concat_1/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/concat_1"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/ListDiff"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/add_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/stack"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Prod_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/transpose"
+  op: "Transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/add_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Reshape"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/stack"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/transpose_1/perm"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/transpose_1"
+  op: "Transpose"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate2/weights/read"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/transpose_1/perm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Reshape_1/shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Reshape_1"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/transpose_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Reshape_1/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Reshape_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Const_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 128
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/concat_2/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/concat_2"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/Const_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/concat_2/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot/concat_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Tensordot"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate2/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden2/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden2/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden2/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden2/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden2/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden2/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden2/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden2/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden2/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden2/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden2/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden2/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden2/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden2/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden2/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden2/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden2/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden2/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden2/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden2/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden2/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden2/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden2/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden2/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden2/weights"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden2/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden2/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden2/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden2/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden2/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden2/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden2/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden2/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden2/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden2/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden2/biases"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden2/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden2/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden2/biases/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden2/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden2/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Rank"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/axes"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/GreaterEqual/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/GreaterEqual"
+  op: "GreaterEqual"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/GreaterEqual/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/GreaterEqual"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/axes"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Less/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Less/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Cast_1"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/add"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Rank"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Cast_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/mul_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Rank"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/ListDiff"
+  op: "ListDiff"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_idx"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Gather"
+  op: "Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/ListDiff"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Gather_1"
+  op: "Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/add_1"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Prod"
+  op: "Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Prod_1"
+  op: "Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Gather_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Const_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/concat/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/concat"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Gather_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/concat_1/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/concat_1"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/ListDiff"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/add_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/stack"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Prod_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/transpose"
+  op: "Transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/add_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Reshape"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/stack"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/transpose_1/perm"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/transpose_1"
+  op: "Transpose"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden2/weights/read"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/transpose_1/perm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Reshape_1/shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Reshape_1"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/transpose_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Reshape_1/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Reshape_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Const_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 128
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/concat_2/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/concat_2"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/Const_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/concat_2/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot/concat_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Tensordot"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden2/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/mul_4"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Sigmoid"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden2/Relu"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/sub_2/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/sub_2"
+  op: "Sub"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/sub_2/x"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate2/Sigmoid"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/mul_5"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/sub_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/add_2"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/mul_4"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/mul_5"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate3/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate3/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate3/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate3/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate3/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate3/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate3/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate3/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate3/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate3/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate3/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate3/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate3/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate3/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate3/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate3/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate3/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate3/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate3/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate3/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate3/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate3/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate3/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate3/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate3/weights"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate3/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate3/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate3/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate3/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate3/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate3/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate3/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate3/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate3/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate3/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate3/biases"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate3/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate3/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/gate3/biases/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate3/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/gate3/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/add_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Rank"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/axes"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/GreaterEqual/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/GreaterEqual"
+  op: "GreaterEqual"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/GreaterEqual/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/GreaterEqual"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/axes"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Less/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Less/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Cast_1"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/add"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Rank"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Cast_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/mul_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Rank"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/ListDiff"
+  op: "ListDiff"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_idx"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Gather"
+  op: "Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/ListDiff"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Gather_1"
+  op: "Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/add_1"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Prod"
+  op: "Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Prod_1"
+  op: "Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Gather_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Const_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/concat/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/concat"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Gather_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/concat_1/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/concat_1"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/ListDiff"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/add_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/stack"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Prod_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/transpose"
+  op: "Transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/add_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Reshape"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/stack"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/transpose_1/perm"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/transpose_1"
+  op: "Transpose"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate3/weights/read"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/transpose_1/perm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Reshape_1/shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Reshape_1"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/transpose_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Reshape_1/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Reshape_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Const_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 128
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/concat_2/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/concat_2"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/Const_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/concat_2/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot/concat_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Tensordot"
+  input: "seq2seq/encoder/cbhg/hw_mlp/gate3/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden3/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden3/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden3/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden3/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden3/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden3/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden3/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden3/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden3/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden3/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden3/weights/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden3/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden3/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden3/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden3/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden3/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden3/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden3/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden3/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden3/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden3/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden3/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden3/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden3/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden3/weights"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden3/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden3/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden3/weights/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden3/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden3/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden3/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden3/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden3/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden3/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden3/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden3/biases"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden3/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden3/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/hw_mlp/hidden3/biases/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden3/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/hw_mlp/hidden3/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/add_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Rank"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/axes"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/GreaterEqual/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/GreaterEqual"
+  op: "GreaterEqual"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/GreaterEqual/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/GreaterEqual"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/axes"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Less/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Less/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Cast_1"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/add"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/axes"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Rank"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Cast_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/mul_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Rank"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/ListDiff"
+  op: "ListDiff"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_idx"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Gather"
+  op: "Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/ListDiff"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Gather_1"
+  op: "Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/add_1"
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "validate_indices"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Prod"
+  op: "Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Prod_1"
+  op: "Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Gather_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Const_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/concat/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/concat"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Gather_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/concat_1/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/concat_1"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/ListDiff"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/add_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/stack"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Prod"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Prod_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/transpose"
+  op: "Transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/add_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Reshape"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/stack"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/transpose_1/perm"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/transpose_1"
+  op: "Transpose"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden3/weights/read"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/transpose_1/perm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Reshape_1/shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Reshape_1"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/transpose_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Reshape_1/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Reshape_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Const_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 128
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/concat_2/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/concat_2"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Gather"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/Const_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/concat_2/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot/concat_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Tensordot"
+  input: "seq2seq/encoder/cbhg/hw_mlp/hidden3/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/mul_6"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Sigmoid"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/hidden3/Relu"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/sub_3/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/sub_3"
+  op: "Sub"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/sub_3/x"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/gate3/Sigmoid"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/mul_7"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/sub_3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/add_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/add_3"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/mul_6"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/mul_7"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/Shape_1"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/add_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/Shape_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/strided_slice_1/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/strided_slice_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/Shape"
+  op: "Shape"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "encoder_input_lengths"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/Tile"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/ExpandDims_2/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/ExpandDims_2"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/sequence_length_mask_1/Cast"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/ExpandDims_2/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/hw_mlp/add_3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/ExpandDims_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Rank"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Rank"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/concat/values_0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/concat/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/concat"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/concat/values_0"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/transpose"
+  op: "Transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/mul_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/sequence_length"
+  op: "Identity"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/transpose"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/GRUCellZeroState/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/GRUCellZeroState/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/GRUCellZeroState/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/GRUCellZeroState/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 128
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/GRUCellZeroState/concat/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/GRUCellZeroState/concat"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/GRUCellZeroState/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/GRUCellZeroState/Const"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/GRUCellZeroState/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/GRUCellZeroState/zeros/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/GRUCellZeroState/zeros"
+  op: "Fill"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/GRUCellZeroState/concat"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/GRUCellZeroState/zeros/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/GRUCellZeroState/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/GRUCellZeroState/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/GRUCellZeroState/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/GRUCellZeroState/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 128
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Shape_1"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/sequence_length"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/stack"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Equal"
+  op: "Equal"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Shape_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/stack"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/All"
+  op: "All"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Equal"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Const"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Assert/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "Expected shape for Tensor seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/sequence_length:0 is "
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Assert/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: " but saw shape: "
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Assert/Assert/data_0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "Expected shape for Tensor seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/sequence_length:0 is "
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Assert/Assert/data_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: " but saw shape: "
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Assert/Assert"
+  op: "Assert"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/All"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Assert/Assert/data_0"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Assert/Assert/data_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Shape_1"
+  attr {
+    key: "T"
+    value {
+      list {
+        type: DT_STRING
+        type: DT_INT32
+        type: DT_STRING
+        type: DT_INT32
+      }
+    }
+  }
+  attr {
+    key: "summarize"
+    value {
+      i: 3
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/CheckSeqLen"
+  op: "Identity"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/sequence_length"
+  input: "^seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Assert/Assert"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Shape_2"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/transpose"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Shape_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice_1/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Shape_3"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/transpose"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Shape_3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice_2/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 128
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/concat_1/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/concat_1"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Const_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/zeros/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/zeros"
+  op: "Fill"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/concat_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/zeros/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Const_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Min"
+  op: "Min"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/CheckSeqLen"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Const_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Const_3"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Max"
+  op: "Max"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/CheckSeqLen"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Const_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/time"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArray"
+  op: "TensorArrayV3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice_1"
+
+  attr {
+    key: "clear_after_read"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "dynamic_size"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        dim {
+          size: -1
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "identical_element_shapes"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "tensor_array_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/dynamic_rnn/output_0"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArray_1"
+  op: "TensorArrayV3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice_1"
+
+  attr {
+    key: "clear_after_read"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "dynamic_size"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        dim {
+          size: -1
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "identical_element_shapes"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "tensor_array_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/dynamic_rnn/input_0"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayUnstack/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/transpose"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayUnstack/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayUnstack/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayUnstack/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayUnstack/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayUnstack/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayUnstack/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayUnstack/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayUnstack/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayUnstack/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayUnstack/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayUnstack/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayUnstack/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayUnstack/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayUnstack/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayUnstack/TensorArrayScatter/TensorArrayScatterV3"
+  op: "TensorArrayScatterV3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArray_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayUnstack/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArray_1:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/transpose"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/iteration_counter"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/iteration_counter"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Enter_1"
+  op: "Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/time"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Enter_2"
+  op: "Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArray:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Enter_3"
+  op: "Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/GRUCellZeroState/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Merge"
+  op: "Merge"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/NextIteration"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Merge_1"
+  op: "Merge"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Enter_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/NextIteration_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Merge_2"
+  op: "Merge"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Enter_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/NextIteration_2"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Merge_3"
+  op: "Merge"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Enter_3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/NextIteration_3"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Merge"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Less/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Less/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Less_1"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Merge_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Less/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/LogicalAnd"
+  op: "LogicalAnd"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Less_1"
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/LoopCond"
+  op: "LoopCond"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/LogicalAnd"
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Switch"
+  op: "Switch"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Merge"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Merge"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Switch_1"
+  op: "Switch"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Merge_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Merge_1"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Switch_2"
+  op: "Switch"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Merge_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Merge_2"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Switch_3"
+  op: "Switch"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Merge_3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Merge_3"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Identity"
+  op: "Identity"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Switch:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Identity_1"
+  op: "Identity"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Switch_1:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Identity_2"
+  op: "Identity"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Switch_2:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Identity_3"
+  op: "Identity"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Switch_3:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/add/y"
+  op: "Const"
+  input: "^seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/add"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Identity"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/add/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/TensorArrayReadV3"
+  op: "TensorArrayReadV3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/TensorArrayReadV3/Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Identity_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/TensorArrayReadV3/Enter_1"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/TensorArrayReadV3/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArray_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_RESOURCE
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/TensorArrayReadV3/Enter_1"
+  op: "Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayUnstack/TensorArrayScatter/TensorArrayScatterV3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\001\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.10825317353
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.10825317353
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/bias/Initializer/Const"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/bias"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/bias/Initializer/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/bias/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\001\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.125
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.125
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/bias"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/bias/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/concat/axis"
+  op: "Const"
+  input: "^seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/concat"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/TensorArrayReadV3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Identity_3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/concat"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/MatMul/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/MatMul/Enter"
+  op: "Enter"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/kernel/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/BiasAdd/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/BiasAdd/Enter"
+  op: "Enter"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/gates/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/Const"
+  op: "Const"
+  input: "^seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/split/split_dim"
+  op: "Const"
+  input: "^seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/split"
+  op: "Split"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/split/split_dim"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/Sigmoid"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/split"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Identity_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/concat_1/axis"
+  op: "Const"
+  input: "^seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/concat_1"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/TensorArrayReadV3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/MatMul_1"
+  op: "MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/concat_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/MatMul_1/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/MatMul_1/Enter"
+  op: "Enter"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/kernel/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/BiasAdd_1"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/MatMul_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/BiasAdd_1/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/BiasAdd_1/Enter"
+  op: "Enter"
+  input: "seq2seq/encoder/cbhg/bi_gru/fw/gru_cell/candidate/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/Tanh"
+  op: "Tanh"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/BiasAdd_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/split:1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Identity_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/sub/x"
+  op: "Const"
+  input: "^seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/sub/x"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/mul_2"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/sub"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/Tanh"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/add"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/mul_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/mul_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/GreaterEqual"
+  op: "GreaterEqual"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Identity_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/GreaterEqual/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/GreaterEqual/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/CheckSeqLen"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Select"
+  op: "Select"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/GreaterEqual"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Select/Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/add"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Select/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/add"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/GreaterEqual_1"
+  op: "GreaterEqual"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Identity_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/GreaterEqual/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Select_1"
+  op: "Select"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/GreaterEqual_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Identity_3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/add"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/TensorArrayWrite/TensorArrayWriteV3"
+  op: "TensorArrayWriteV3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/TensorArrayWrite/TensorArrayWriteV3/Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Identity_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Select"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Identity_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/add"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/TensorArrayWrite/TensorArrayWriteV3/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArray"
+  attr {
+    key: "T"
+    value {
+      type: DT_RESOURCE
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/gru_cell/add"
+      }
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/add_1/y"
+  op: "Const"
+  input: "^seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Identity_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/add_1/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/NextIteration"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/NextIteration_1"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/NextIteration_2"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/TensorArrayWrite/TensorArrayWriteV3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/NextIteration_3"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Select_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Exit"
+  op: "Exit"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Switch"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Exit_1"
+  op: "Exit"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Switch_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Exit_2"
+  op: "Exit"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Switch_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Exit_3"
+  op: "Exit"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Switch_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayStack/TensorArraySizeV3"
+  op: "TensorArraySizeV3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArray"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Exit_2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArray"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayStack/range/start"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArray"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayStack/range/delta"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArray"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayStack/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayStack/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayStack/TensorArraySizeV3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayStack/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArray"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayStack/TensorArrayGatherV3"
+  op: "TensorArrayGatherV3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArray"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayStack/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/while/Exit_2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArray"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        dim {
+          size: -1
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Const_4"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 128
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Rank_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/range_1/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/range_1/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/range_1"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/range_1/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/Rank_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/range_1/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/concat_2/values_0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/concat_2/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/concat_2"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/concat_2/values_0"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/range_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/concat_2/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/transpose_1"
+  op: "Transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/TensorArrayStack/TensorArrayGatherV3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/concat_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/ReverseSequence"
+  op: "ReverseSequence"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/mul_1"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tlen"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "batch_dim"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seq_dim"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Rank"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Rank"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/concat/values_0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/concat/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/concat"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/concat/values_0"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/transpose"
+  op: "Transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/ReverseSequence"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/sequence_length"
+  op: "Identity"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/transpose"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/GRUCellZeroState/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/GRUCellZeroState/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/GRUCellZeroState/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/GRUCellZeroState/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 128
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/GRUCellZeroState/concat/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/GRUCellZeroState/concat"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/GRUCellZeroState/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/GRUCellZeroState/Const"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/GRUCellZeroState/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/GRUCellZeroState/zeros/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/GRUCellZeroState/zeros"
+  op: "Fill"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/GRUCellZeroState/concat"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/GRUCellZeroState/zeros/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/GRUCellZeroState/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/GRUCellZeroState/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/GRUCellZeroState/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/GRUCellZeroState/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 128
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Shape_1"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/sequence_length"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/stack"
+  op: "Pack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Equal"
+  op: "Equal"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Shape_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/stack"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/All"
+  op: "All"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Equal"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Const"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Assert/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "Expected shape for Tensor seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/sequence_length:0 is "
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Assert/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: " but saw shape: "
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Assert/Assert/data_0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "Expected shape for Tensor seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/sequence_length:0 is "
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Assert/Assert/data_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: " but saw shape: "
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Assert/Assert"
+  op: "Assert"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/All"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Assert/Assert/data_0"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Assert/Assert/data_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Shape_1"
+  attr {
+    key: "T"
+    value {
+      list {
+        type: DT_STRING
+        type: DT_INT32
+        type: DT_STRING
+        type: DT_INT32
+      }
+    }
+  }
+  attr {
+    key: "summarize"
+    value {
+      i: 3
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/CheckSeqLen"
+  op: "Identity"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/sequence_length"
+  input: "^seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Assert/Assert"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Shape_2"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/transpose"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Shape_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice_1/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Shape_3"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/transpose"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Shape_3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice_2/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 128
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/concat_1/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/concat_1"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/ExpandDims"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Const_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/zeros/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/zeros"
+  op: "Fill"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/concat_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/zeros/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Const_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Min"
+  op: "Min"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/CheckSeqLen"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Const_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Const_3"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Max"
+  op: "Max"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/CheckSeqLen"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Const_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/time"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArray"
+  op: "TensorArrayV3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice_1"
+
+  attr {
+    key: "clear_after_read"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "dynamic_size"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        dim {
+          size: -1
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "identical_element_shapes"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "tensor_array_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/dynamic_rnn/output_0"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArray_1"
+  op: "TensorArrayV3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice_1"
+
+  attr {
+    key: "clear_after_read"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "dynamic_size"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        dim {
+          size: -1
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "identical_element_shapes"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "tensor_array_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/dynamic_rnn/input_0"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayUnstack/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/transpose"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayUnstack/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayUnstack/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayUnstack/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayUnstack/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayUnstack/Shape"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayUnstack/strided_slice/stack"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayUnstack/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayUnstack/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayUnstack/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayUnstack/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayUnstack/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayUnstack/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayUnstack/strided_slice"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayUnstack/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayUnstack/TensorArrayScatter/TensorArrayScatterV3"
+  op: "TensorArrayScatterV3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArray_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayUnstack/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArray_1:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/transpose"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/iteration_counter"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/iteration_counter"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Enter_1"
+  op: "Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/time"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Enter_2"
+  op: "Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArray:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Enter_3"
+  op: "Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/GRUCellZeroState/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Merge"
+  op: "Merge"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/NextIteration"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Merge_1"
+  op: "Merge"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Enter_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/NextIteration_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Merge_2"
+  op: "Merge"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Enter_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/NextIteration_2"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Merge_3"
+  op: "Merge"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Enter_3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/NextIteration_3"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Merge"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Less/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Less/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Less_1"
+  op: "Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Merge_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Less/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/LogicalAnd"
+  op: "LogicalAnd"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Less"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Less_1"
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/LoopCond"
+  op: "LoopCond"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/LogicalAnd"
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Switch"
+  op: "Switch"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Merge"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Merge"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Switch_1"
+  op: "Switch"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Merge_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Merge_1"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Switch_2"
+  op: "Switch"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Merge_2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Merge_2"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Switch_3"
+  op: "Switch"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Merge_3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Merge_3"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Identity"
+  op: "Identity"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Switch:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Identity_1"
+  op: "Identity"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Switch_1:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Identity_2"
+  op: "Identity"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Switch_2:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Identity_3"
+  op: "Identity"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Switch_3:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/add/y"
+  op: "Const"
+  input: "^seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/add"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Identity"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/add/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/TensorArrayReadV3"
+  op: "TensorArrayReadV3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/TensorArrayReadV3/Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Identity_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/TensorArrayReadV3/Enter_1"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/TensorArrayReadV3/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArray_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_RESOURCE
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/TensorArrayReadV3/Enter_1"
+  op: "Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayUnstack/TensorArrayScatter/TensorArrayScatterV3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\001\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.10825317353
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.10825317353
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/bias/Initializer/Const"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/bias"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/bias/Initializer/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/bias/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\001\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.125
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.125
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/bias"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/bias/read"
+  op: "Identity"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/concat/axis"
+  op: "Const"
+  input: "^seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/concat"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/TensorArrayReadV3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Identity_3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/concat"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/MatMul/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/MatMul/Enter"
+  op: "Enter"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/kernel/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/BiasAdd/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/BiasAdd/Enter"
+  op: "Enter"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/gates/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/Const"
+  op: "Const"
+  input: "^seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/split/split_dim"
+  op: "Const"
+  input: "^seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/split"
+  op: "Split"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/split/split_dim"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/Sigmoid"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/split"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Identity_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/concat_1/axis"
+  op: "Const"
+  input: "^seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/concat_1"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/TensorArrayReadV3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/MatMul_1"
+  op: "MatMul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/concat_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/MatMul_1/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/MatMul_1/Enter"
+  op: "Enter"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/kernel/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/BiasAdd_1"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/MatMul_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/BiasAdd_1/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/BiasAdd_1/Enter"
+  op: "Enter"
+  input: "seq2seq/encoder/cbhg/bi_gru/bw/gru_cell/candidate/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/Tanh"
+  op: "Tanh"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/BiasAdd_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/split:1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Identity_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/sub/x"
+  op: "Const"
+  input: "^seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/sub/x"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/mul_2"
+  op: "Mul"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/sub"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/Tanh"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/add"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/mul_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/mul_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/GreaterEqual"
+  op: "GreaterEqual"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Identity_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/GreaterEqual/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/GreaterEqual/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/CheckSeqLen"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Select"
+  op: "Select"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/GreaterEqual"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Select/Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/add"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Select/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/add"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/GreaterEqual_1"
+  op: "GreaterEqual"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Identity_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/GreaterEqual/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Select_1"
+  op: "Select"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/GreaterEqual_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Identity_3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/add"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/TensorArrayWrite/TensorArrayWriteV3"
+  op: "TensorArrayWriteV3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/TensorArrayWrite/TensorArrayWriteV3/Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Identity_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Select"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Identity_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/add"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/TensorArrayWrite/TensorArrayWriteV3/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArray"
+  attr {
+    key: "T"
+    value {
+      type: DT_RESOURCE
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/gru_cell/add"
+      }
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/add_1/y"
+  op: "Const"
+  input: "^seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Identity_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/add_1/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/NextIteration"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/NextIteration_1"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/NextIteration_2"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/TensorArrayWrite/TensorArrayWriteV3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/NextIteration_3"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Select_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Exit"
+  op: "Exit"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Switch"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Exit_1"
+  op: "Exit"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Switch_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Exit_2"
+  op: "Exit"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Switch_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Exit_3"
+  op: "Exit"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Switch_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayStack/TensorArraySizeV3"
+  op: "TensorArraySizeV3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArray"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Exit_2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArray"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayStack/range/start"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArray"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayStack/range/delta"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArray"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayStack/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayStack/range/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayStack/TensorArraySizeV3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayStack/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArray"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayStack/TensorArrayGatherV3"
+  op: "TensorArrayGatherV3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArray"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayStack/range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/while/Exit_2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArray"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        dim {
+          size: -1
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Const_4"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 128
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Rank_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/range_1/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/range_1/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/range_1"
+  op: "Range"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/range_1/start"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/Rank_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/range_1/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/concat_2/values_0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/concat_2/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/concat_2"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/concat_2/values_0"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/range_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/concat_2/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/transpose_1"
+  op: "Transpose"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/TensorArrayStack/TensorArrayGatherV3"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/concat_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/ReverseSequence"
+  op: "ReverseSequence"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/bw/bw/transpose_1"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tlen"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "batch_dim"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seq_dim"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/concat/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder/encoder/cbhg/concat"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/bi_gru/fw/fw/transpose_1"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/ReverseSequence"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/encoder_top"
+  op: "Identity"
+  input: "seq2seq/seq2seq/encoder/encoder/cbhg/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "encoder_top_pre_conditioning"
+  op: "Identity"
+  input: "seq2seq/seq2seq/encoder_top"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "@\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.136930644512
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.136930644512
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights/Initializer/random_uniform/max"
+  input: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 64
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights"
+  input: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights/read"
+  op: "Identity"
+  input: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/biases"
+  input: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/biases/read"
+  op: "Identity"
+  input: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/MatMul"
+  op: "MatMul"
+  input: "speaker_embedding_lookup"
+  input: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/MatMul"
+  input: "seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/speaker_conditioning/encoder_top/projection_0/Dropout/Identity"
+  op: "Identity"
+  input: "seq2seq/seq2seq/speaker_conditioning/encoder_top/projection_0/fully_connected/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/speaker_conditioning/encoder_top/combination_0/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq/speaker_conditioning/encoder_top/projection_0/Dropout/Identity"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/speaker_conditioning/encoder_top/combination_0/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/speaker_conditioning/encoder_top/combination_0/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/speaker_conditioning/encoder_top/combination_0/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/speaker_conditioning/encoder_top/combination_0/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/speaker_conditioning/encoder_top/combination_0/Shape"
+  input: "seq2seq/seq2seq/speaker_conditioning/encoder_top/combination_0/strided_slice/stack"
+  input: "seq2seq/seq2seq/speaker_conditioning/encoder_top/combination_0/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/speaker_conditioning/encoder_top/combination_0/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/speaker_conditioning/encoder_top/combination_0/Reshape/shape/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/speaker_conditioning/encoder_top/combination_0/Reshape/shape/2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 256
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/speaker_conditioning/encoder_top/combination_0/Reshape/shape"
+  op: "Pack"
+  input: "seq2seq/seq2seq/speaker_conditioning/encoder_top/combination_0/strided_slice"
+  input: "seq2seq/seq2seq/speaker_conditioning/encoder_top/combination_0/Reshape/shape/1"
+  input: "seq2seq/seq2seq/speaker_conditioning/encoder_top/combination_0/Reshape/shape/2"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/speaker_conditioning/encoder_top/combination_0/Reshape"
+  op: "Reshape"
+  input: "seq2seq/seq2seq/speaker_conditioning/encoder_top/projection_0/Dropout/Identity"
+  input: "seq2seq/seq2seq/speaker_conditioning/encoder_top/combination_0/Reshape/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/speaker_conditioning/encoder_top/combination_0/add"
+  op: "Add"
+  input: "encoder_top_pre_conditioning"
+  input: "seq2seq/seq2seq/speaker_conditioning/encoder_top/combination_0/Reshape"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq/speaker_conditioning/encoder_top/combination_0/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/Shape"
+  input: "seq2seq/seq2seq/strided_slice/stack"
+  input: "seq2seq/seq2seq/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq/strided_slice"
+  input: "seq2seq/seq2seq/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/sequence_length_mask/range"
+  input: "seq2seq/seq2seq/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "encoder_input_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "encoder_input_lengths"
+  input: "seq2seq/seq2seq/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq/speaker_conditioning/encoder_top/combination_0/add"
+  input: "seq2seq/seq2seq/ExpandDims"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_1/Shape"
+  input: "seq2seq/seq2seq_1/strided_slice/stack"
+  input: "seq2seq/seq2seq_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/sub/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/sub"
+  op: "Sub"
+  input: "encoder_input_mask"
+  input: "seq2seq/seq2seq_1/sub/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/mul/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1000.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/mul/x"
+  input: "seq2seq/seq2seq_1/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_1/GmmAttention/Shape"
+  input: "seq2seq/seq2seq_1/GmmAttention/strided_slice/stack"
+  input: "seq2seq/seq2seq_1/GmmAttention/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_1/GmmAttention/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/ToFloat"
+  op: "Cast"
+  input: "seq2seq/seq2seq_1/GmmAttention/strided_slice"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/DropoutWrapperInit/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/DropoutWrapperInit/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/DropoutWrapperInit/Const_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/range/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_1/GmmAttention/range/start"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/range/Cast_2"
+  op: "Cast"
+  input: "seq2seq/seq2seq_1/GmmAttention/range/delta"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_1/GmmAttention/range/Cast"
+  input: "seq2seq/seq2seq_1/GmmAttention/ToFloat"
+  input: "seq2seq/seq2seq_1/GmmAttention/range/Cast_2"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_1/GmmAttention/range"
+  input: "seq2seq/seq2seq_1/GmmAttention/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/strided_slice"
+  input: "seq2seq/seq2seq_1/GmmAttention/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_1/GmmAttention/ExpandDims"
+  input: "seq2seq/seq2seq_1/GmmAttention/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_1/GmmAttention/Tile"
+  input: "seq2seq/seq2seq_1/GmmAttention/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/strided_slice_1/stack"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/strided_slice_1/stack_1"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/strided_slice_1/stack_2"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq/mul"
+  input: "seq2seq/seq2seq_1/GmmAttention/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_1/GmmAttention/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_1/GmmAttention/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 5
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 5
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/GmmAttention/first_enc_timestep_proj/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/GmmAttention/first_enc_timestep_proj/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\001\000\000\000\002\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/GmmAttention/first_enc_timestep_proj/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/GmmAttention/first_enc_timestep_proj/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.0883883461356
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/GmmAttention/first_enc_timestep_proj/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/GmmAttention/first_enc_timestep_proj/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0883883461356
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/GmmAttention/first_enc_timestep_proj/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/GmmAttention/first_enc_timestep_proj/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/GmmAttention/first_enc_timestep_proj/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/GmmAttention/first_enc_timestep_proj/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/GmmAttention/first_enc_timestep_proj/weights/Initializer/random_uniform/max"
+  input: "seq2seq/GmmAttention/first_enc_timestep_proj/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/GmmAttention/first_enc_timestep_proj/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/GmmAttention/first_enc_timestep_proj/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/GmmAttention/first_enc_timestep_proj/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/GmmAttention/first_enc_timestep_proj/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/GmmAttention/first_enc_timestep_proj/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/GmmAttention/first_enc_timestep_proj/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/GmmAttention/first_enc_timestep_proj/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/GmmAttention/first_enc_timestep_proj/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/GmmAttention/first_enc_timestep_proj/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/GmmAttention/first_enc_timestep_proj/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/GmmAttention/first_enc_timestep_proj/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+        dim {
+          size: 512
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/GmmAttention/first_enc_timestep_proj/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/GmmAttention/first_enc_timestep_proj/weights"
+  input: "seq2seq/GmmAttention/first_enc_timestep_proj/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/GmmAttention/first_enc_timestep_proj/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/GmmAttention/first_enc_timestep_proj/weights/read"
+  op: "Identity"
+  input: "seq2seq/GmmAttention/first_enc_timestep_proj/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/GmmAttention/first_enc_timestep_proj/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/GmmAttention/first_enc_timestep_proj/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/GmmAttention/first_enc_timestep_proj/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 512
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/GmmAttention/first_enc_timestep_proj/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/GmmAttention/first_enc_timestep_proj/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 512
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/GmmAttention/first_enc_timestep_proj/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/GmmAttention/first_enc_timestep_proj/biases"
+  input: "seq2seq/GmmAttention/first_enc_timestep_proj/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/GmmAttention/first_enc_timestep_proj/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/GmmAttention/first_enc_timestep_proj/biases/read"
+  op: "Identity"
+  input: "seq2seq/GmmAttention/first_enc_timestep_proj/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/GmmAttention/first_enc_timestep_proj/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/first_enc_timestep_proj/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq_1/GmmAttention/strided_slice_1"
+  input: "seq2seq/GmmAttention/first_enc_timestep_proj/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/first_enc_timestep_proj/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_1/GmmAttention/first_enc_timestep_proj/MatMul"
+  input: "seq2seq/GmmAttention/first_enc_timestep_proj/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/split/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/split"
+  op: "Split"
+  input: "seq2seq/seq2seq_1/GmmAttention/split/split_dim"
+  input: "seq2seq/seq2seq_1/GmmAttention/first_enc_timestep_proj/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/zeros/shape/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 256
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/zeros/shape"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/strided_slice"
+  input: "seq2seq/seq2seq_1/GmmAttention/zeros/shape/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/zeros/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/zeros"
+  op: "Fill"
+  input: "seq2seq/seq2seq_1/GmmAttention/zeros/shape"
+  input: "seq2seq/seq2seq_1/GmmAttention/zeros/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/zeros_1/shape/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 5
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/zeros_1/shape"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/strided_slice"
+  input: "seq2seq/seq2seq_1/GmmAttention/zeros_1/shape/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/zeros_1/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/GmmAttention/zeros_1"
+  op: "Fill"
+  input: "seq2seq/seq2seq_1/GmmAttention/zeros_1/shape"
+  input: "seq2seq/seq2seq_1/GmmAttention/zeros_1/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/AttentionAggregator/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/Shape"
+  op: "Shape"
+  input: "decoder_target"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_1/attention_decoder/Shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice/stack"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT64
+        tensor_shape {
+        }
+        int64_val: 63
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/Fill/dims"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/strided_slice"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/Fill"
+  op: "Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/Fill/dims"
+  input: "seq2seq/seq2seq_1/attention_decoder/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/assert_type/statically_determined_correct_type"
+  op: "NoOp"
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/transpose/perm"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\000\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/transpose"
+  op: "Transpose"
+  input: "decoder_target"
+  input: "seq2seq/seq2seq_1/attention_decoder/transpose/perm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/transpose_1/perm"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/transpose_1"
+  op: "Transpose"
+  input: "seq2seq/seq2seq_1/attention_decoder/Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/transpose_1/perm"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_1/stack"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_1/stack_1"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_1/stack_2"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\002\000\000\000\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_1/attention_decoder/transpose"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 6
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 7
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\002\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_1/attention_decoder/transpose_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/div/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/div"
+  op: "FloorDiv"
+  input: "decoder_target_lengths"
+  input: "seq2seq/seq2seq_1/attention_decoder/div/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/div_1/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/div_1"
+  op: "FloorDiv"
+  input: "max_decoder_output_length"
+  input: "seq2seq/seq2seq_1/attention_decoder/div_1/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/DropoutWrapperInit/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/DropoutWrapperInit/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/DropoutWrapperInit/Const_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/DropoutWrapperInit_1/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/DropoutWrapperInit_1/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/DropoutWrapperInit_1/Const_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/zeros"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 2
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 2
+          }
+          dim {
+            size: 80
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/zeros_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/zeros_2"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 2
+          }
+          dim {
+            size: 64
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 2
+          }
+          dim {
+            size: 64
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/zeros_3"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT64
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        int64_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/Fill_1/dims/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 80
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/Fill_1/dims"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/strided_slice"
+  input: "seq2seq/seq2seq_1/attention_decoder/Fill_1/dims/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/Fill_1/value"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/Fill_1"
+  op: "Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/Fill_1/dims"
+  input: "seq2seq/seq2seq_1/attention_decoder/Fill_1/value"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/zeros_4/shape"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/zeros_4/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT64
+        tensor_shape {
+        }
+        int64_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/zeros_4"
+  op: "Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/zeros_4/shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/zeros_4/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_1/strided_slice"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 256
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/concat/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/concat"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/Const"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/zeros/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/zeros"
+  op: "Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/concat"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/zeros/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_1/strided_slice"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 256
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims_2/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims_2"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_1/strided_slice"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims_2/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/Const_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 256
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/concat_1/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/concat_1"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims_2"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/Const_2"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/zeros_1/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/zeros_1"
+  op: "Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/concat_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/zeros_1/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims_3/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims_3"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_1/strided_slice"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims_3/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/Const_3"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 256
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_1/strided_slice"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 256
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/concat/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/concat"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/Const"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/zeros/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/zeros"
+  op: "Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/concat"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/zeros/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_1/strided_slice"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 256
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims_2/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims_2"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_1/strided_slice"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims_2/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/Const_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 256
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/concat_1/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/concat_1"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims_2"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/Const_2"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/zeros_1/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/zeros_1"
+  op: "Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/concat_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/zeros_1/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims_3/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims_3"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_1/strided_slice"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/ExpandDims_3/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/Const_3"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 256
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "@\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.136930644512
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.136930644512
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights/Initializer/random_uniform/max"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 64
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/biases"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/biases/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/MatMul"
+  op: "MatMul"
+  input: "speaker_embedding_lookup"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/MatMul"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/projection_0/Dropout/Identity"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/projection_0/fully_connected/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/combination_0/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/zeros"
+  input: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/projection_0/Dropout/Identity"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "@\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.136930644512
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.136930644512
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights/Initializer/random_uniform/max"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 64
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/biases"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/biases/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/MatMul"
+  op: "MatMul"
+  input: "speaker_embedding_lookup"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/MatMul"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/projection_1/Dropout/Identity"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/projection_1/fully_connected/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/combination_1/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState/DropoutWrapperZeroState/LSTMCellZeroState/zeros_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/projection_1/Dropout/Identity"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "@\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.136930644512
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.136930644512
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights/Initializer/random_uniform/max"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 64
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/biases"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/biases/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/MatMul"
+  op: "MatMul"
+  input: "speaker_embedding_lookup"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/MatMul"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/projection_2/Dropout/Identity"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/projection_2/fully_connected/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/combination_2/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/zeros"
+  input: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/projection_2/Dropout/Identity"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "@\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.136930644512
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.136930644512
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights/Initializer/random_uniform/max"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 64
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/biases"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/biases/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/MatMul"
+  op: "MatMul"
+  input: "speaker_embedding_lookup"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/MatMul"
+  input: "seq2seq/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/projection_3/Dropout/Identity"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/projection_3/fully_connected/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/combination_3/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/MultiRNNCellZeroState/ResidualWrapperZeroState_1/DropoutWrapperZeroState/LSTMCellZeroState/zeros_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/projection_3/Dropout/Identity"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/zeros_5/shape"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/zeros_5/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_BOOL
+        tensor_shape {
+        }
+        bool_val: false
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/zeros_5"
+  op: "Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/zeros_5/shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/zeros_5/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/zeros_6"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/zeros_7"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/Shape_1"
+  op: "Shape"
+  input: "seq2seq/seq2seq/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_3/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_3/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_3/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_3"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_1/attention_decoder/Shape_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_3/stack"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_3/stack_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_3/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/zeros_8/shape"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_3"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/zeros_8/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/zeros_8"
+  op: "Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/zeros_8/shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/zeros_8/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/Shape_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\002\000\000\000P\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/Shape_3"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/Shape_4"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\002\000\000\000@\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/Shape_5"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/Shape_6"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/Shape_7"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/Shape_8"
+  op: "Shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/zeros_8"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/Fill/dims"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/Fill/value"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_BOOL
+        tensor_shape {
+        }
+        bool_val: false
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/Fill"
+  op: "Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/Fill/dims"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/Fill/value"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat/values_0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat/values_0"
+  input: "seq2seq/seq2seq_1/attention_decoder/Shape_2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros"
+  op: "Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 2
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_1/values_0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_1/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_1"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_1/values_0"
+  input: "seq2seq/seq2seq_1/attention_decoder/Shape_3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_1/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_1"
+  op: "Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_1/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_2/values_0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_2/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_2"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_2/values_0"
+  input: "seq2seq/seq2seq_1/attention_decoder/Shape_4"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_2/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_2/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_2"
+  op: "Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_2/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 2
+          }
+          dim {
+            size: 64
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_3/values_0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_3/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_3"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_3/values_0"
+  input: "seq2seq/seq2seq_1/attention_decoder/Shape_5"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_3/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_3/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT64
+        tensor_shape {
+        }
+        int64_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_3"
+  op: "Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_3/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_4/values_0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_4/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_4"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_4/values_0"
+  input: "seq2seq/seq2seq_1/attention_decoder/Shape_6"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_4/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_4/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_4"
+  op: "Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_4"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_4/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_5/values_0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_5/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_5"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_5/values_0"
+  input: "seq2seq/seq2seq_1/attention_decoder/Shape_7"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_5/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_5/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_5"
+  op: "Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_5"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_5/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_6/values_0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_6/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_6"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_6/values_0"
+  input: "seq2seq/seq2seq_1/attention_decoder/Shape_8"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_6/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_6/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_6"
+  op: "Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_6"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_6/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/LessEqual/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/LessEqual"
+  op: "LessEqual"
+  input: "seq2seq/seq2seq_1/attention_decoder/div_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/LessEqual/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/LogicalOr"
+  op: "LogicalOr"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/LessEqual"
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_like/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/LogicalOr"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_like/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_like"
+  op: "Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_like/Shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_like/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray/size"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray"
+  op: "TensorArrayV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray/size"
+
+  attr {
+    key: "clear_after_read"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "dynamic_size"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        unknown_rank: true
+      }
+    }
+  }
+  attr {
+    key: "identical_element_shapes"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "tensor_array_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_1/size"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_1"
+  op: "TensorArrayV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_1/size"
+
+  attr {
+    key: "clear_after_read"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "dynamic_size"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        unknown_rank: true
+      }
+    }
+  }
+  attr {
+    key: "identical_element_shapes"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "tensor_array_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_2/size"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_2"
+  op: "TensorArrayV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_2/size"
+
+  attr {
+    key: "clear_after_read"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "dynamic_size"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        unknown_rank: true
+      }
+    }
+  }
+  attr {
+    key: "identical_element_shapes"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "tensor_array_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_3/size"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_3"
+  op: "TensorArrayV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_3/size"
+
+  attr {
+    key: "clear_after_read"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "dynamic_size"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        unknown_rank: true
+      }
+    }
+  }
+  attr {
+    key: "identical_element_shapes"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "tensor_array_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_4/size"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_4"
+  op: "TensorArrayV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_4/size"
+
+  attr {
+    key: "clear_after_read"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "dynamic_size"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        unknown_rank: true
+      }
+    }
+  }
+  attr {
+    key: "identical_element_shapes"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "tensor_array_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_5/size"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_5"
+  op: "TensorArrayV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_5/size"
+
+  attr {
+    key: "clear_after_read"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "dynamic_size"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        unknown_rank: true
+      }
+    }
+  }
+  attr {
+    key: "identical_element_shapes"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "tensor_array_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_6/size"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_6"
+  op: "TensorArrayV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_6/size"
+
+  attr {
+    key: "clear_after_read"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "dynamic_size"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        unknown_rank: true
+      }
+    }
+  }
+  attr {
+    key: "identical_element_shapes"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "tensor_array_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_1"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_2"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_1:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_3"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_2:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_4"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_3:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_5"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_4:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_6"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_5:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_7"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_6:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_8"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/combination_0/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_9"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/combination_1/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_10"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/combination_2/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_11"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/speaker_conditioning/decoder_state/combination_3/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_12"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/GmmAttention/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_13"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/GmmAttention/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_14"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/GmmAttention/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_15"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/GmmAttention/zeros_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_16"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/AttentionAggregator/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_17"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/zeros_5"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_18"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/Fill_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_19"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/Fill_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_20"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/zeros_4"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_21"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/zeros_4"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_22"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/LogicalOr"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_23"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_like"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge"
+  op: "Merge"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_1"
+  op: "Merge"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_2"
+  op: "Merge"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_2"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_3"
+  op: "Merge"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_3"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_4"
+  op: "Merge"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_4"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_4"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_5"
+  op: "Merge"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_5"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_5"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_6"
+  op: "Merge"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_6"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_6"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_7"
+  op: "Merge"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_7"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_7"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_8"
+  op: "Merge"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_8"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_8"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_9"
+  op: "Merge"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_9"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_9"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_10"
+  op: "Merge"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_10"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_10"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_11"
+  op: "Merge"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_11"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_11"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_12"
+  op: "Merge"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_12"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_12"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_13"
+  op: "Merge"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_13"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_13"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_14"
+  op: "Merge"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_14"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_14"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_15"
+  op: "Merge"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_15"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_15"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 5
+          }
+        }
+        shape {
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_16"
+  op: "Merge"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_16"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_16"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_17"
+  op: "Merge"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_17"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_17"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_18"
+  op: "Merge"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_18"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_18"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+        shape {
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_19"
+  op: "Merge"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_19"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_19"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+        shape {
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_20"
+  op: "Merge"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_20"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_20"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_21"
+  op: "Merge"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_21"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_21"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_22"
+  op: "Merge"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_22"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_22"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_23"
+  op: "Merge"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Enter_23"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_23"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Const"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/All"
+  op: "All"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_22"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Const"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LogicalNot"
+  op: "LogicalNot"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/All"
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LoopCond"
+  op: "LoopCond"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LogicalNot"
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch"
+  op: "Switch"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_1"
+  op: "Switch"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_1"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_2"
+  op: "Switch"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_2"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_3"
+  op: "Switch"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_3"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_4"
+  op: "Switch"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_4"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_4"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_5"
+  op: "Switch"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_5"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_5"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_6"
+  op: "Switch"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_6"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_6"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_7"
+  op: "Switch"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_7"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_7"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_8"
+  op: "Switch"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_8"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_8"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_9"
+  op: "Switch"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_9"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_9"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_10"
+  op: "Switch"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_10"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_10"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_11"
+  op: "Switch"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_11"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_11"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_12"
+  op: "Switch"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_12"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_12"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_13"
+  op: "Switch"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_13"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_13"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_14"
+  op: "Switch"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_14"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_14"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_15"
+  op: "Switch"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_15"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_15"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 5
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_16"
+  op: "Switch"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_16"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_16"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_17"
+  op: "Switch"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_17"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_17"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_18"
+  op: "Switch"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_18"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_18"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_19"
+  op: "Switch"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_19"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_19"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_20"
+  op: "Switch"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_20"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_20"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_21"
+  op: "Switch"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_21"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_21"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_22"
+  op: "Switch"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_22"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_22"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_23"
+  op: "Switch"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_23"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Merge_23"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_1"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_1:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_2"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_2:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_3"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_3:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_4"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_4:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_5"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_5:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_6"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_6:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_7"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_7:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_8"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_8:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_9"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_9:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_10"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_10:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_11"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_11:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_12"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_12:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_13"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_13:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_14"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_14:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_15"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_15:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_16"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_16:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_17"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_17:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_18"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_18:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_19"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_19:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_20"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_20:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_21"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_21:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_22"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_22:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_23"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_23:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "@\000\000\000P\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.20412415266
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.20412415266
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights/Initializer/random_uniform/max"
+  input: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 64
+        }
+        dim {
+          size: 80
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights"
+  input: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 80
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 80
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/biases"
+  input: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/biases/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/before_prenet/projection_0/fully_connected/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/before_prenet/projection_0/fully_connected/MatMul/Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/before_prenet/projection_0/fully_connected/MatMul/Enter_1"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/before_prenet/projection_0/fully_connected/MatMul/Enter"
+  op: "Enter"
+  input: "speaker_embedding_lookup"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 64
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/before_prenet/projection_0/fully_connected/MatMul/Enter_1"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/before_prenet/projection_0/fully_connected/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/before_prenet/projection_0/fully_connected/MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/before_prenet/projection_0/fully_connected/BiasAdd/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/before_prenet/projection_0/fully_connected/BiasAdd/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/speaker_conditioning/before_prenet/projection_0/fully_connected/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/before_prenet/projection_0/Dropout/Identity"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/before_prenet/projection_0/fully_connected/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/before_prenet/combination_0/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_19"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/before_prenet/projection_0/Dropout/Identity"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "P\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.133630618453
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.133630618453
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 80
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights/Initializer/random_uniform/max"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 80
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 80
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 80
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 80
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 80
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 80
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/biases"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/biases/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/fully_connected/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/before_prenet/combination_0/add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/fully_connected/MatMul/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/fully_connected/MatMul/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 80
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/fully_connected/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/fully_connected/MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/fully_connected/BiasAdd/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/fully_connected/BiasAdd/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/fully_connected/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/fully_connected/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout/dropout/keep_prob"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.5
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout/dropout/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/fully_connected/Relu"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout/dropout/random_uniform/min"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout/dropout/random_uniform/max"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout/dropout/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout/dropout/Shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout/dropout/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout/dropout/random_uniform/max"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout/dropout/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout/dropout/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout/dropout/random_uniform/RandomUniform"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout/dropout/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout/dropout/random_uniform"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout/dropout/random_uniform/mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout/dropout/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout/dropout/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout/dropout/keep_prob"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout/dropout/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout/dropout/Floor"
+  op: "Floor"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout/dropout/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout/dropout/div"
+  op: "RealDiv"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/fully_connected/Relu"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout/dropout/keep_prob"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout/dropout/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout/dropout/div"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout/dropout/Floor"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\001\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.125
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.125
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights/Initializer/random_uniform/max"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/biases"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/biases/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/fully_connected_1/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout/dropout/mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/fully_connected_1/MatMul/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/fully_connected_1/MatMul/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/fully_connected_1/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/fully_connected_1/MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/fully_connected_1/BiasAdd/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/fully_connected_1/BiasAdd/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/pre_dec_rnn_net/fully_connected_1/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/fully_connected_1/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/fully_connected_1/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout_1/dropout/keep_prob"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.5
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout_1/dropout/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/fully_connected_1/Relu"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout_1/dropout/random_uniform/min"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout_1/dropout/random_uniform/max"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout_1/dropout/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout_1/dropout/Shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout_1/dropout/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout_1/dropout/random_uniform/max"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout_1/dropout/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout_1/dropout/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout_1/dropout/random_uniform/RandomUniform"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout_1/dropout/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout_1/dropout/random_uniform"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout_1/dropout/random_uniform/mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout_1/dropout/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout_1/dropout/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout_1/dropout/keep_prob"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout_1/dropout/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout_1/dropout/Floor"
+  op: "Floor"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout_1/dropout/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout_1/dropout/div"
+  op: "RealDiv"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/fully_connected_1/Relu"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout_1/dropout/keep_prob"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout_1/dropout/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout_1/dropout/div"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout_1/dropout/Floor"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/zeros/shape"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/zeros/shape/Enter"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/zeros/shape/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/zeros/Const"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/zeros"
+  op: "Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/zeros/shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/zeros/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/concat/axis"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/concat"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/pre_dec_rnn_net/Dropout_1/dropout/mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_12"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 384
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\200\002\000\000\000\004\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.060048058629
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.060048058629
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 640
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 640
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 640
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 640
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 640
+        }
+        dim {
+          size: 1024
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 640
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 640
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 1024
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1024
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/bias"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/bias/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/concat/axis"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/concat"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/concat"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_14"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 640
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/concat"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/MatMul/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/MatMul/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/kernel/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 640
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/BiasAdd/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/BiasAdd/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/lstm_cell/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/Const"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 4
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/split/split_dim"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/split"
+  op: "Split"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/split/split_dim"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 4
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/add/y"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/split:2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/add/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/Sigmoid"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_13"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/Sigmoid_1"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/Tanh"
+  op: "Tanh"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/Sigmoid_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/Tanh"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/mul_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/clip_by_value/Minimum/y"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 10.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/clip_by_value/Minimum"
+  op: "Minimum"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/add_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/clip_by_value/Minimum/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/clip_by_value/y"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -10.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/clip_by_value"
+  op: "Maximum"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/clip_by_value/Minimum"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/clip_by_value/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/Sigmoid_2"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/split:3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/Tanh_1"
+  op: "Tanh"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/clip_by_value"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/mul_2"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/Sigmoid_2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/Tanh_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Const"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.10000000149
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/zoneout/mul/x"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.899999976158
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/zoneout/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/zoneout/mul/x"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/clip_by_value"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/zoneout/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Const"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_13"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/zoneout/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/zoneout/mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/zoneout/mul_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/zoneout/mul_2/x"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.899999976158
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/zoneout/mul_2"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/zoneout/mul_2/x"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/mul_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/zoneout/mul_3"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Const"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_14"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/zoneout/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/zoneout/mul_2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/zoneout/mul_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\001\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.125
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.125
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/bias"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/bias/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/gmm_mlp_hidden/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/mul_2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/gmm_mlp_hidden/MatMul/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/gmm_mlp_hidden/MatMul/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/kernel/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/gmm_mlp_hidden/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/gmm_mlp_hidden/MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/gmm_mlp_hidden/BiasAdd/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/gmm_mlp_hidden/BiasAdd/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_hidden/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/gmm_mlp_hidden/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/gmm_mlp_hidden/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\200\000\000\000\017\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.204836621881
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.204836621881
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 15
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 15
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 15
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 15
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+        dim {
+          size: 15
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 15
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 15
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 15
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 15
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 15
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 15
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/bias"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 15
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/bias/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 15
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/gmm_mlp_output/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/gmm_mlp_hidden/Relu"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/gmm_mlp_output/MatMul/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 15
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/gmm_mlp_output/MatMul/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/kernel/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+          dim {
+            size: 15
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/gmm_mlp_output/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/gmm_mlp_output/MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/gmm_mlp_output/BiasAdd/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 15
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/gmm_mlp_output/BiasAdd/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/AttentionAggregator/GmmAttention/gmm_mlp_output/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 15
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Const_1"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/split/split_dim"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/split"
+  op: "Split"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/split/split_dim"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/gmm_mlp_output/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 5
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 5
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 3
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Exp"
+  op: "Exp"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/split:2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Softmax"
+  op: "Softmax"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Exp_1"
+  op: "Exp"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_15"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Exp_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Minimum"
+  op: "Minimum"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Minimum/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Minimum/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/GmmAttention/ToFloat"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/ExpandDims/dim"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Softmax"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/ExpandDims_1/dim"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Minimum"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/ExpandDims_2/dim"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/ExpandDims_2"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Exp"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/ExpandDims_2/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/mul/x"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 6.28318548203
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/mul/x"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/ExpandDims_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/add_1/y"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 9.99999993923e-09
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/add_1/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Rsqrt"
+  op: "Rsqrt"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/ExpandDims"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Rsqrt"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/sub/Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/sub/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/GmmAttention/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/pow/y"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 2.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/pow"
+  op: "Pow"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/sub"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/pow/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Neg"
+  op: "Neg"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/pow"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/mul_2/x"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 2.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/mul_2"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/mul_2/x"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/ExpandDims_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/add_2/y"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 9.99999993923e-09
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/add_2"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/mul_2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/add_2/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/div"
+  op: "RealDiv"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Neg"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/add_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Exp_2"
+  op: "Exp"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/div"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/mul_3"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/mul_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Exp_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Sum/reduction_indices"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Sum"
+  op: "Sum"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/mul_3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Sum/reduction_indices"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/mul_4"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Sum"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/mul_4/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/mul_4/Enter"
+  op: "Enter"
+  input: "encoder_input_mask"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/ExpandDims_3/dim"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/ExpandDims_3"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/mul_4"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/ExpandDims_3/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/MatMul"
+  op: "BatchMatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/ExpandDims_3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/MatMul/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "adj_x"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "adj_y"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/MatMul/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/MatMul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/concat_1/axis"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/concat_1"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Squeeze"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/lstm_cell/mul_2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/zeros/shape"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/zeros/shape/Enter"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/zeros/Const"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/zeros"
+  op: "Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/zeros/shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/zeros/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/zeros"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/zeros_1/shape/1"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/zeros_1/shape"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/zeros/shape/Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/zeros_1/shape/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/zeros_1/Const"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/zeros_1"
+  op: "Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/zeros_1/shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/zeros_1/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/Const"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "@\000\000\000\000\002\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.10206207633
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.10206207633
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights/Initializer/random_uniform/max"
+  input: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 64
+        }
+        dim {
+          size: 512
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights"
+  input: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 512
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 512
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/biases"
+  input: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/biases/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/before_prenet/projection_0/fully_connected/MatMul/Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/MatMul/Enter"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/MatMul/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/BiasAdd/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/BiasAdd/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/context_and_cell_output/projection_0/Dropout/Identity"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/context_and_cell_output/projection_0/fully_connected/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/context_and_cell_output/combination_0/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/concat_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/context_and_cell_output/projection_0/Dropout/Identity"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "@\000\000\000\000\002\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.10206207633
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.10206207633
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform/max"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 64
+        }
+        dim {
+          size: 512
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 512
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 512
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/biases"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/biases/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/before_prenet/projection_0/fully_connected/MatMul/Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/MatMul/Enter"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/MatMul/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/BiasAdd/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/BiasAdd/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/Dropout/Identity"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/fully_connected/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/combination_0/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/context_and_cell_output/combination_0/add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/projection_0/Dropout/Identity"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\003\000\000\000\004\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.0578637570143
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0578637570143
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 768
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 768
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 768
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 768
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 768
+        }
+        dim {
+          size: 1024
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 768
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 768
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 1024
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1024
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/bias"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/bias/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/concat/axis"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/concat"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/combination_0/add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_9"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 768
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/concat"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/MatMul/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/MatMul/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/kernel/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 768
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/BiasAdd/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/BiasAdd/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/Const"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 4
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/split/split_dim"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/split"
+  op: "Split"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/split/split_dim"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 4
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/add/y"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/split:2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/add/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/Sigmoid"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_8"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/Sigmoid_1"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/Tanh"
+  op: "Tanh"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/Sigmoid_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/Tanh"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/mul_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/clip_by_value/Minimum/y"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 10.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/clip_by_value/Minimum"
+  op: "Minimum"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/add_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/clip_by_value/Minimum/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/clip_by_value/y"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -10.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/clip_by_value"
+  op: "Maximum"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/clip_by_value/Minimum"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/clip_by_value/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/Sigmoid_2"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/split:3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/Tanh_1"
+  op: "Tanh"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/clip_by_value"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/mul_2"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/Sigmoid_2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/Tanh_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/Const"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.10000000149
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/zoneout/mul/x"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.899999976158
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/zoneout/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/zoneout/mul/x"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/clip_by_value"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/zoneout/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/Const"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_8"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/zoneout/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/zoneout/mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/zoneout/mul_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/zoneout/mul_2/x"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.899999976158
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/zoneout/mul_2"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/zoneout/mul_2/x"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/mul_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/zoneout/mul_3"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/Const"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_9"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/zoneout/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/zoneout/mul_2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/zoneout/mul_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\002\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.0883883461356
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0883883461356
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights/Initializer/random_uniform/max"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 512
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/biases"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/biases/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/residual_projection/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/speaker_conditioning/combination_0/add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/residual_projection/MatMul/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/residual_projection/MatMul/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/residual_projection/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/residual_projection/MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/residual_projection/BiasAdd/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/residual_projection/BiasAdd/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_0/residual_projection/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/residual_projection/BiasAdd"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/lstm_cell/mul_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "@\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.136930644512
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.136930644512
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform/max"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 64
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/biases"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/biases/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/speaker_conditioning/before_prenet/projection_0/fully_connected/MatMul/Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/MatMul/Enter"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/MatMul/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 64
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/BiasAdd/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/BiasAdd/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/Dropout/Identity"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/fully_connected/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/combination_0/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/projection_0/Dropout/Identity"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\002\000\000\000\004\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.0625
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0625
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 512
+        }
+        dim {
+          size: 1024
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 1024
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1024
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/bias"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/bias/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/concat/axis"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/concat"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/combination_0/add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_11"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 512
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/concat"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/MatMul/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/MatMul/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 512
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/BiasAdd/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/BiasAdd/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1024
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/Const"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 4
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/split/split_dim"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/split"
+  op: "Split"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/split/split_dim"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 4
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/add/y"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/split:2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/add/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/Sigmoid"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_10"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/Sigmoid_1"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/Tanh"
+  op: "Tanh"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/Sigmoid_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/Tanh"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/mul_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/clip_by_value/Minimum/y"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 10.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/clip_by_value/Minimum"
+  op: "Minimum"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/add_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/clip_by_value/Minimum/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/clip_by_value/y"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -10.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/clip_by_value"
+  op: "Maximum"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/clip_by_value/Minimum"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/clip_by_value/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/Sigmoid_2"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/split:3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/Tanh_1"
+  op: "Tanh"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/clip_by_value"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/mul_2"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/Sigmoid_2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/Tanh_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/Const"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.10000000149
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/zoneout/mul/x"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.899999976158
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/zoneout/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/zoneout/mul/x"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/clip_by_value"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/zoneout/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/Const"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_10"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/zoneout/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/zoneout/mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/zoneout/mul_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/zoneout/mul_2/x"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.899999976158
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/zoneout/mul_2"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/zoneout/mul_2/x"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/mul_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/zoneout/mul_3"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/Const"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_11"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/zoneout/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/zoneout/mul_2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/zoneout/mul_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/speaker_conditioning/combination_0/add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/lstm_cell/mul_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/add/y"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/add/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice/stack/1"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice/stack/2"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice/stack"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice/stack/1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice/stack/2"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice/stack_1/1"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice/stack_1/2"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice/stack_1"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice/stack_1/1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice/stack_1/2"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice/stack_2"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice/Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice/stack"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 6
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 6
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/add_1/y"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/add_1/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_1/stack/1"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_1/stack"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_1/stack/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_1/stack_1/1"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_1/stack_1"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/add_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_1/stack_1/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_1/stack_2"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_1/Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_1/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/strided_slice/stack"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/strided_slice/stack_1"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/strided_slice/stack_2"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/Shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/strided_slice/stack"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/FullyConnected/fully_connected/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/FullyConnected/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\001\000\000\240\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/FullyConnected/fully_connected/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/FullyConnected/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.120096117258
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/FullyConnected/fully_connected/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/FullyConnected/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.120096117258
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/FullyConnected/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/attention_decoder/FullyConnected/fully_connected/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/FullyConnected/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 160
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/FullyConnected/fully_connected/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/attention_decoder/FullyConnected/fully_connected/weights/Initializer/random_uniform/max"
+  input: "seq2seq/attention_decoder/FullyConnected/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/FullyConnected/fully_connected/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/attention_decoder/FullyConnected/fully_connected/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/attention_decoder/FullyConnected/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/attention_decoder/FullyConnected/fully_connected/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/FullyConnected/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 160
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/FullyConnected/fully_connected/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/attention_decoder/FullyConnected/fully_connected/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/attention_decoder/FullyConnected/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/FullyConnected/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 160
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/FullyConnected/fully_connected/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/FullyConnected/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 160
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+        dim {
+          size: 160
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/FullyConnected/fully_connected/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/FullyConnected/fully_connected/weights"
+  input: "seq2seq/attention_decoder/FullyConnected/fully_connected/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/FullyConnected/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 160
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/FullyConnected/fully_connected/weights/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/FullyConnected/fully_connected/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 160
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/FullyConnected/fully_connected/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/FullyConnected/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 160
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 160
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/FullyConnected/fully_connected/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/FullyConnected/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 160
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 160
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/FullyConnected/fully_connected/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/FullyConnected/fully_connected/biases"
+  input: "seq2seq/attention_decoder/FullyConnected/fully_connected/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/FullyConnected/fully_connected/biases"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 160
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/FullyConnected/fully_connected/biases/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/FullyConnected/fully_connected/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 160
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/fully_connected/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/fully_connected/MatMul/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 160
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/fully_connected/MatMul/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/FullyConnected/fully_connected/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 160
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/fully_connected/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/fully_connected/MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/fully_connected/BiasAdd/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 160
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/fully_connected/BiasAdd/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/FullyConnected/fully_connected/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 160
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/Reshape/shape/1"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/Reshape/shape/2"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 80
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/Reshape/shape"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/strided_slice"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/Reshape/shape/1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/Reshape/shape/2"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/Reshape"
+  op: "Reshape"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/fully_connected/BiasAdd"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/Reshape/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 2
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\001\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.152794972062
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.152794972062
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights/Initializer/random_uniform/max"
+  input: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights/Initializer/random_uniform/mul"
+  input: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+        dim {
+          size: 1
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights"
+  input: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/biases/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/biases"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/biases"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/biases"
+      }
+    }
+  }
+
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/biases/Assign"
+  op: "Assign"
+  input: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/biases"
+  input: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/biases/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/biases"
+      }
+    }
+  }
+
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/biases/read"
+  op: "Identity"
+  input: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/biases"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/EndOfSequenceOutputLayer/fully_connected/MatMul"
+  op: "MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/EndOfSequenceOutputLayer/fully_connected/MatMul/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/EndOfSequenceOutputLayer/fully_connected/MatMul/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/weights/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/EndOfSequenceOutputLayer/fully_connected/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/EndOfSequenceOutputLayer/fully_connected/MatMul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/EndOfSequenceOutputLayer/fully_connected/BiasAdd/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/EndOfSequenceOutputLayer/fully_connected/BiasAdd/Enter"
+  op: "Enter"
+  input: "seq2seq/attention_decoder/EndOfSequenceOutputLayer/fully_connected/biases/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/EndOfSequenceOutputLayer/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/EndOfSequenceOutputLayer/fully_connected/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/EndOfSequenceOutputLayer/Squeeze"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Greater/y"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.990000009537
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Greater"
+  op: "Greater"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Sigmoid"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Greater/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LogicalOr"
+  op: "LogicalOr"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Greater"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_17"
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/zeros/shape/1"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/zeros/shape/2"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 64
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/zeros/shape"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/zeros/shape/Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/zeros/shape/1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/zeros/shape/2"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/zeros/Const"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/zeros"
+  op: "Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/zeros/shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/zeros/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 2
+          }
+          dim {
+            size: 64
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/zeros_1/shape/1"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/zeros_1/shape"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/zeros/shape/Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/zeros_1/shape/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/zeros_1/Const"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT64
+        tensor_shape {
+        }
+        int64_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/zeros_1"
+  op: "Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/zeros_1/shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/zeros_1/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_2/stack"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\000\000\000\000\377\377\377\377\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_2/stack_1"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_2/stack_2"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/Reshape"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 5
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 5
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_3/stack"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\377\377\377\377"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_3/stack_1"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_3/stack_2"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_3"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/zeros_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_3/stack"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_3/stack_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_3/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LogicalOr_1"
+  op: "LogicalOr"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LogicalOr"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_22"
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/add_2/y"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/add_2"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/add_2/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/GreaterEqual"
+  op: "GreaterEqual"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/add_2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/GreaterEqual/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/GreaterEqual/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/div_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LogicalOr_2"
+  op: "LogicalOr"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LogicalOr_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/GreaterEqual"
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LogicalNot_1"
+  op: "LogicalNot"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_22"
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LogicalAnd"
+  op: "LogicalAnd"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LogicalNot_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LogicalOr_2"
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_23"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/add_3/y"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/add_3"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/add_3/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Fill"
+  op: "Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/add_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select"
+  op: "Select"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LogicalAnd"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Fill"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_23"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_1"
+  op: "Select"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_22"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_1/Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/FullyConnected/Reshape"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 2
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_1/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 2
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_2"
+  op: "Select"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_22"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_2/Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/EndOfSequenceOutputLayer/Squeeze"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_2/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_3"
+  op: "Select"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_22"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_3/Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 2
+          }
+          dim {
+            size: 64
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_3/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 2
+          }
+          dim {
+            size: 64
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_4"
+  op: "Select"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_22"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_4/Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/zeros_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_4/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_5"
+  op: "Select"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_22"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_5/Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_5/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_4"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_6"
+  op: "Select"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_22"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_6/Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/zeros_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_6/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_5"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_7"
+  op: "Select"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_22"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_7/Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/mul_4"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_7/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/zeros_6"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_8"
+  op: "Select"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_22"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_8"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/zoneout/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_9"
+  op: "Select"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_22"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_9"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_0/zoneout/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_10"
+  op: "Select"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_22"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_10"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/zoneout/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_11"
+  op: "Select"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_22"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_11"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/attention_decoder/multi_rnn_cell/cell_1/zoneout/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_12"
+  op: "Select"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_22"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_12"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Squeeze"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_13"
+  op: "Select"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_22"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_13"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/zoneout/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_14"
+  op: "Select"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_22"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_14"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/zoneout/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_15"
+  op: "Select"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_22"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_15"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/GmmAttention/Minimum"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_16"
+  op: "Select"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_22"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_17"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LogicalOr"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite/TensorArrayWriteV3"
+  op: "TensorArrayWriteV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite/TensorArrayWriteV3/Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_1"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite/TensorArrayWriteV3/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray"
+  attr {
+    key: "T"
+    value {
+      type: DT_RESOURCE
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_1"
+      }
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite_1/TensorArrayWriteV3"
+  op: "TensorArrayWriteV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite_1/TensorArrayWriteV3/Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_2"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite_1/TensorArrayWriteV3/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_RESOURCE
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_2"
+      }
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite_2/TensorArrayWriteV3"
+  op: "TensorArrayWriteV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite_2/TensorArrayWriteV3/Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_3"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite_2/TensorArrayWriteV3/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_RESOURCE
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_3"
+      }
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite_3/TensorArrayWriteV3"
+  op: "TensorArrayWriteV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite_3/TensorArrayWriteV3/Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_4"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_4"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_4"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite_3/TensorArrayWriteV3/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_RESOURCE
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_4"
+      }
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite_4/TensorArrayWriteV3"
+  op: "TensorArrayWriteV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite_4/TensorArrayWriteV3/Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_5"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_5"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_5"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite_4/TensorArrayWriteV3/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_4"
+  attr {
+    key: "T"
+    value {
+      type: DT_RESOURCE
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_5"
+      }
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite_5/TensorArrayWriteV3"
+  op: "TensorArrayWriteV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite_5/TensorArrayWriteV3/Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_6"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_6"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_6"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite_5/TensorArrayWriteV3/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_5"
+  attr {
+    key: "T"
+    value {
+      type: DT_RESOURCE
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_6"
+      }
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite_6/TensorArrayWriteV3"
+  op: "TensorArrayWriteV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite_6/TensorArrayWriteV3/Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_7"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity_7"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_7"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite_6/TensorArrayWriteV3/Enter"
+  op: "Enter"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_6"
+  attr {
+    key: "T"
+    value {
+      type: DT_RESOURCE
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_7"
+      }
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/add_4/y"
+  op: "Const"
+  input: "^seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/add_4"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/add_4/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/add_4"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_1"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite/TensorArrayWriteV3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_2"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite_1/TensorArrayWriteV3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_3"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite_2/TensorArrayWriteV3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_4"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite_3/TensorArrayWriteV3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_5"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite_4/TensorArrayWriteV3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_6"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite_5/TensorArrayWriteV3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_7"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/TensorArrayWrite_6/TensorArrayWriteV3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_8"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_8"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_9"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_9"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_10"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_10"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_11"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_11"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_12"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_12"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_13"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_13"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_14"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_14"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_15"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_15"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_16"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/AttentionAggregator/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_17"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select_16"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_18"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_19"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_20"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_21"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/strided_slice_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_22"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/LogicalOr_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/NextIteration_23"
+  op: "NextIteration"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Select"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit"
+  op: "Exit"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_1"
+  op: "Exit"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_2"
+  op: "Exit"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_3"
+  op: "Exit"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_4"
+  op: "Exit"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_4"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_5"
+  op: "Exit"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_5"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_6"
+  op: "Exit"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_6"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_7"
+  op: "Exit"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_7"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_8"
+  op: "Exit"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_8"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_9"
+  op: "Exit"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_9"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_10"
+  op: "Exit"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_10"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_11"
+  op: "Exit"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_11"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_12"
+  op: "Exit"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_12"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_13"
+  op: "Exit"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_13"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_14"
+  op: "Exit"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_14"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_15"
+  op: "Exit"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_15"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 5
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_16"
+  op: "Exit"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_16"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_17"
+  op: "Exit"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_17"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_18"
+  op: "Exit"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_18"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_19"
+  op: "Exit"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_19"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_20"
+  op: "Exit"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_20"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_21"
+  op: "Exit"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_21"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_22"
+  op: "Exit"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_22"
+  attr {
+    key: "T"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_23"
+  op: "Exit"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Switch_23"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack/TensorArraySizeV3"
+  op: "TensorArraySizeV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_1"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack/range/start"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack/range/delta"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack/range/start"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack/TensorArraySizeV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack/TensorArrayGatherV3"
+  op: "TensorArrayGatherV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack/range"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_1"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 2
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        dim {
+          size: -1
+        }
+        dim {
+          size: 2
+        }
+        dim {
+          size: 80
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_1/TensorArraySizeV3"
+  op: "TensorArraySizeV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_1"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_1/range/start"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_1"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_1/range/delta"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_1"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_1/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_1/range/start"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_1/TensorArraySizeV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_1"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_1/TensorArrayGatherV3"
+  op: "TensorArrayGatherV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_1/range"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_1"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        dim {
+          size: -1
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_2/TensorArraySizeV3"
+  op: "TensorArraySizeV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_3"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_2"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_2/range/start"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_2"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_2/range/delta"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_2"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_2/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_2/range/start"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_2/TensorArraySizeV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_2/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_2"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_2/TensorArrayGatherV3"
+  op: "TensorArrayGatherV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_2/range"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_3"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_2"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 2
+          }
+          dim {
+            size: 64
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        dim {
+          size: -1
+        }
+        dim {
+          size: 2
+        }
+        dim {
+          size: 64
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_3/TensorArraySizeV3"
+  op: "TensorArraySizeV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_4"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_3"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_3/range/start"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_3"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_3/range/delta"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_3"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_3/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_3/range/start"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_3/TensorArraySizeV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_3/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_3"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_3/TensorArrayGatherV3"
+  op: "TensorArrayGatherV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_3/range"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_4"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_3"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        dim {
+          size: -1
+        }
+        dim {
+          size: 2
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_4/TensorArraySizeV3"
+  op: "TensorArraySizeV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_4"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_5"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_4"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_4/range/start"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_4"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_4/range/delta"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_4"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_4/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_4/range/start"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_4/TensorArraySizeV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_4/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_4"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_4/TensorArrayGatherV3"
+  op: "TensorArrayGatherV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_4"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_4/range"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_5"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_4"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        dim {
+          size: -1
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_5/TensorArraySizeV3"
+  op: "TensorArraySizeV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_5"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_6"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_5"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_5/range/start"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_5"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_5/range/delta"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_5"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_5/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_5/range/start"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_5/TensorArraySizeV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_5/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_5"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_5/TensorArrayGatherV3"
+  op: "TensorArrayGatherV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_5"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_5/range"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_6"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_5"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        dim {
+          size: -1
+        }
+        dim {
+          size: 1
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_6/TensorArraySizeV3"
+  op: "TensorArraySizeV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_6"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_7"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_6"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_6/range/start"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_6"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_6/range/delta"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_6"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_6/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_6/range/start"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_6/TensorArraySizeV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_6/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_6"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_6/TensorArrayGatherV3"
+  op: "TensorArrayGatherV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_6"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_6/range"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_7"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArray_6"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        dim {
+          size: -1
+        }
+        dim {
+          size: -1
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/Rank"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 4
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range/start"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/Rank"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_7/values_0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_7/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_7"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_7/values_0"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_7/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/transpose"
+  op: "Transpose"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack/TensorArrayGatherV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_7"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 2
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/Rank_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_1/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_1/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_1"
+  op: "Range"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_1/start"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/Rank_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_1/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_8/values_0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_8/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_8"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_8/values_0"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_8/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/transpose_1"
+  op: "Transpose"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_1/TensorArrayGatherV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_8"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/Rank_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 4
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_2/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_2/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_2"
+  op: "Range"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_2/start"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/Rank_2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_2/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_9/values_0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_9/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_9"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_9/values_0"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_9/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/transpose_2"
+  op: "Transpose"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_2/TensorArrayGatherV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_9"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 2
+          }
+          dim {
+            size: 64
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/Rank_3"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_3/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_3/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_3"
+  op: "Range"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_3/start"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/Rank_3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_3/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_10/values_0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_10/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_10"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_10/values_0"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_10/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/transpose_3"
+  op: "Transpose"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_3/TensorArrayGatherV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_10"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/Rank_4"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_4/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_4/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_4"
+  op: "Range"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_4/start"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/Rank_4"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_4/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_11/values_0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_11/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_11"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_11/values_0"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_4"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_11/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/transpose_4"
+  op: "Transpose"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_4/TensorArrayGatherV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_11"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/Rank_5"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_5/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_5/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_5"
+  op: "Range"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_5/start"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/Rank_5"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_5/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_12/values_0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_12/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_12"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_12/values_0"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_5"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_12/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/transpose_5"
+  op: "Transpose"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_5/TensorArrayGatherV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_12"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/Rank_6"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_6/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_6/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_6"
+  op: "Range"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_6/start"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/Rank_6"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_6/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_13/values_0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_13/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_13"
+  op: "ConcatV2"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_13/values_0"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/range_6"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_13/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/transpose_6"
+  op: "Transpose"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/TensorArrayStack_6/TensorArrayGatherV3"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/concat_13"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/Const_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/Mean"
+  op: "Mean"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/transpose_4"
+  input: "seq2seq/seq2seq_1/attention_decoder/Const_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/comb_weights/tag"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "seq2seq/seq2seq_1/attention_decoder/comb_weights"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/comb_weights"
+  op: "HistogramSummary"
+  input: "seq2seq/seq2seq_1/attention_decoder/comb_weights/tag"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/transpose_6"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/Shape_9"
+  op: "Shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/transpose"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 4
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_4/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_4/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_4/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_4"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_1/attention_decoder/Shape_9"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_4/stack"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_4/stack_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_4/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/mul/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_4"
+  input: "seq2seq/seq2seq_1/attention_decoder/mul/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/Reshape/shape/2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 80
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/Reshape/shape"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/strided_slice"
+  input: "seq2seq/seq2seq_1/attention_decoder/mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/Reshape/shape/2"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/Reshape"
+  op: "Reshape"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/transpose"
+  input: "seq2seq/seq2seq_1/attention_decoder/Reshape/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/mul_1/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/while/Exit_23"
+  input: "seq2seq/seq2seq_1/attention_decoder/mul_1/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq_1/attention_decoder/mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/range"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/mul_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_1/attention_decoder/mul_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sub/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq_1/attention_decoder/sub/x"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask/Cast"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_1/attention_decoder/sub"
+  input: "seq2seq/seq2seq_1/attention_decoder/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/mul_2"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/ExpandDims"
+  input: "Maximum_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/Reshape"
+  input: "seq2seq/seq2seq_1/attention_decoder/mul_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/Shape"
+  op: "Shape"
+  input: "decoder_target"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/Shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/strided_slice/stack"
+  input: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq_1/attention_decoder/mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/Maximum/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/Maximum"
+  op: "Maximum"
+  input: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/sub"
+  input: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/Maximum/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/PadV2/paddings/1/0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/PadV2/paddings/1"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/PadV2/paddings/1/0"
+  input: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/Maximum"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/PadV2/paddings/0_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/PadV2/paddings/2_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/PadV2/paddings"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/PadV2/paddings/0_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/PadV2/paddings/1"
+  input: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/PadV2/paddings/2_1"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/PadV2"
+  op: "PadV2"
+  input: "decoder_target"
+  input: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/PadV2/paddings"
+  input: "Maximum_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/Slice/begin"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/Slice/size/0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/Slice/size/2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/Slice/size"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/Slice/size/0"
+  input: "seq2seq/seq2seq_1/attention_decoder/mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/Slice/size/2"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/Slice"
+  op: "Slice"
+  input: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/PadV2"
+  input: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/Slice/begin"
+  input: "seq2seq/seq2seq_1/attention_decoder/pad_or_truncate_sequence_tensor/Slice/size"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/mul_3/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/mul_3"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/mul_3/x"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_4"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/range/start"
+  input: "seq2seq/seq2seq_1/attention_decoder/mul_3"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/range"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/Shape"
+  op: "Shape"
+  input: "decoder_target_lengths"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/Shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/strided_slice/stack"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/strided_slice"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/ExpandDims"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "decoder_target_lengths"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/Tile"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_5/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_5/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_5/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_5"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_1/Cast"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_5/stack"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_5/stack_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_5/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sub_1/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 9
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sub_1"
+  op: "Sub"
+  input: "decoder_target_lengths"
+  input: "seq2seq/seq2seq_1/attention_decoder/sub_1/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/mul_4/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/mul_4"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/mul_4/x"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_4"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/range/start"
+  input: "seq2seq/seq2seq_1/attention_decoder/mul_4"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/range"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/sub_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/Shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/strided_slice/stack"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/strided_slice"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/ExpandDims"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_1/attention_decoder/sub_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/Tile"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_6/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_6/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_6/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_6"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_1/attention_decoder/sequence_length_mask_2/Cast"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_6/stack"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_6/stack_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_6/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sub_2/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/sub_2"
+  op: "Sub"
+  input: "seq2seq/seq2seq_1/attention_decoder/sub_2/x"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_6"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_7/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_7/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_7/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_7"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_1/attention_decoder/sub"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_7/stack"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_7/stack_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_7/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/mul_5/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1000.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/mul_5"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_7"
+  input: "seq2seq/seq2seq_1/attention_decoder/mul_5/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/transpose_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/mul_5"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/Shape_10"
+  op: "Shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_8/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_8/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_8/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/strided_slice_8"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_1/attention_decoder/Shape_10"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_8/stack"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_8/stack_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_8/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/logistic_loss/zeros_like"
+  op: "ZerosLike"
+  input: "seq2seq/seq2seq_1/attention_decoder/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/logistic_loss/GreaterEqual"
+  op: "GreaterEqual"
+  input: "seq2seq/seq2seq_1/attention_decoder/add_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/logistic_loss/zeros_like"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/logistic_loss/Select"
+  op: "Select"
+  input: "seq2seq/seq2seq_1/attention_decoder/logistic_loss/GreaterEqual"
+  input: "seq2seq/seq2seq_1/attention_decoder/add_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/logistic_loss/zeros_like"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/logistic_loss/Neg"
+  op: "Neg"
+  input: "seq2seq/seq2seq_1/attention_decoder/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/logistic_loss/Select_1"
+  op: "Select"
+  input: "seq2seq/seq2seq_1/attention_decoder/logistic_loss/GreaterEqual"
+  input: "seq2seq/seq2seq_1/attention_decoder/logistic_loss/Neg"
+  input: "seq2seq/seq2seq_1/attention_decoder/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/logistic_loss/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/add_1"
+  input: "seq2seq/seq2seq_1/attention_decoder/sub_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/logistic_loss/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq_1/attention_decoder/logistic_loss/Select"
+  input: "seq2seq/seq2seq_1/attention_decoder/logistic_loss/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/logistic_loss/Exp"
+  op: "Exp"
+  input: "seq2seq/seq2seq_1/attention_decoder/logistic_loss/Select_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/logistic_loss/Log1p"
+  op: "Log1p"
+  input: "seq2seq/seq2seq_1/attention_decoder/logistic_loss/Exp"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/logistic_loss"
+  op: "Add"
+  input: "seq2seq/seq2seq_1/attention_decoder/logistic_loss/sub"
+  input: "seq2seq/seq2seq_1/attention_decoder/logistic_loss/Log1p"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/mul_6"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_5"
+  input: "seq2seq/seq2seq_1/attention_decoder/logistic_loss"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/Const_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/Sum"
+  op: "Sum"
+  input: "seq2seq/seq2seq_1/attention_decoder/mul_6"
+  input: "seq2seq/seq2seq_1/attention_decoder/Const_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/ToFloat"
+  op: "Cast"
+  input: "seq2seq/seq2seq_1/attention_decoder/strided_slice_8"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/truediv"
+  op: "RealDiv"
+  input: "seq2seq/seq2seq_1/attention_decoder/Sum"
+  input: "seq2seq/seq2seq_1/attention_decoder/ToFloat"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/eos_loss"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/truediv"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/mul_7/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.00999999977648
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_1/attention_decoder/mul_7"
+  op: "Mul"
+  input: "seq2seq/seq2seq_1/attention_decoder/mul_7/x"
+  input: "seq2seq/seq2seq_1/attention_decoder/eos_loss"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient"
+  op: "StopGradient"
+  input: "seq2seq/seq2seq_1/attention_decoder/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  op: "StopGradient"
+  input: "seq2seq/seq2seq_1/attention_decoder/mul_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/ExpandDims"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000P\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.169841557741
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.169841557741
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 80
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 80
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 80
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 80
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 80
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 80
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 80
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 80
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000P\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.169841557741
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.169841557741
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 80
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 80
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 80
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 80
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 80
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 80
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 80
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 80
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/conv1d_2/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/conv1d_2/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/conv1d_2/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/conv1d_2/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/conv1d_2/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/conv1d_2/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/conv1d_2/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/conv1d_2/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/conv1d_2/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/conv1d_2/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/conv1d_2/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/conv1d_2/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/gated_unit/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/gated_unit/split/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/gated_unit/split"
+  op: "Split"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/gated_unit/split/split_dim"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/conv1d/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/gated_unit/Tanh"
+  op: "Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/gated_unit/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/gated_unit/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/gated_unit/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/gated_unit/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/gated_unit/Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/gated_unit/Sigmoid"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_residual_in/BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_residual/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/ExpandDims"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/1x1_skip_in/BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/1x1_skip/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/Shape_1"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/Shape_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/sequence_length_mask_1/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/filter_shape"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/stack"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\002\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/stack_1"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/mod"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/dilation_rate"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/mod"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/mod_1"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/add_2"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/mod_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/add_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/paddings/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/paddings"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/paddings/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/mod_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/crops/0/0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/crops/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/crops/0/0"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/crops"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/crops/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/paddings"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/concat/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/concat"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/required_space_to_batch_paddings/crops"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/concat_1/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/concat_1"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/strided_slice_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/SpaceToBatchND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/SpaceToBatchND"
+  op: "SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/SpaceToBatchND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/conv1d_2/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/conv1d_2/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/conv1d_2/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/conv1d_2/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/conv1d_2/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/conv1d_2/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/conv1d_2/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/conv1d_2/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/conv1d_2/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "VALID"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/conv1d_2/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/conv1d_2/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/BatchToSpaceND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/BatchToSpaceND"
+  op: "BatchToSpaceND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/conv1d_2/Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/BatchToSpaceND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tcrops"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/BatchToSpaceND"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/gated_unit/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/gated_unit/split/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/gated_unit/split"
+  op: "Split"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/gated_unit/split/split_dim"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/conv1d/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/gated_unit/Tanh"
+  op: "Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/gated_unit/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/gated_unit/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/gated_unit/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/gated_unit/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/gated_unit/Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/gated_unit/Sigmoid"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_residual/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/ExpandDims"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_0/mul_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/1x1_skip/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/Shape_1"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/Shape_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/sequence_length_mask_1/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 4
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/filter_shape"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/stack"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\004\000\000\000\004\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/stack_1"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/mod"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/dilation_rate"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/mod"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/mod_1"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/add_2"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/mod_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/add_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/paddings/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/paddings"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/paddings/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/mod_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/crops/0/0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/crops/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/crops/0/0"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/crops"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/crops/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/paddings"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/concat/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/concat"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/required_space_to_batch_paddings/crops"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/concat_1/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/concat_1"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/strided_slice_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/SpaceToBatchND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 4
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/SpaceToBatchND"
+  op: "SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/SpaceToBatchND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/conv1d_2/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/conv1d_2/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/conv1d_2/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/conv1d_2/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/conv1d_2/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/conv1d_2/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/conv1d_2/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/conv1d_2/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/conv1d_2/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "VALID"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/conv1d_2/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/conv1d_2/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/BatchToSpaceND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 4
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/BatchToSpaceND"
+  op: "BatchToSpaceND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/conv1d_2/Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/BatchToSpaceND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tcrops"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/BatchToSpaceND"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/gated_unit/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/gated_unit/split/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/gated_unit/split"
+  op: "Split"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/gated_unit/split/split_dim"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/conv1d/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/gated_unit/Tanh"
+  op: "Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/gated_unit/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/gated_unit/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/gated_unit/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/gated_unit/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/gated_unit/Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/gated_unit/Sigmoid"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_residual/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/ExpandDims"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_1/mul_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/1x1_skip/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/Shape_1"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/Shape_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/sequence_length_mask_1/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 8
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/filter_shape"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/stack"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\010\000\000\000\010\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/stack_1"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/mod"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/dilation_rate"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/mod"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/mod_1"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/add_2"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/mod_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/add_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/paddings/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/paddings"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/paddings/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/mod_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/crops/0/0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/crops/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/crops/0/0"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/crops"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/crops/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/paddings"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/concat/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/concat"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/required_space_to_batch_paddings/crops"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/concat_1/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/concat_1"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/strided_slice_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/SpaceToBatchND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 8
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/SpaceToBatchND"
+  op: "SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/SpaceToBatchND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/conv1d_2/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/conv1d_2/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/conv1d_2/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/conv1d_2/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/conv1d_2/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/conv1d_2/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/conv1d_2/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/conv1d_2/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/conv1d_2/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "VALID"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/conv1d_2/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/conv1d_2/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/BatchToSpaceND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 8
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/BatchToSpaceND"
+  op: "BatchToSpaceND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/conv1d_2/Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/BatchToSpaceND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tcrops"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/BatchToSpaceND"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/gated_unit/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/gated_unit/split/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/gated_unit/split"
+  op: "Split"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/gated_unit/split/split_dim"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/conv1d/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/gated_unit/Tanh"
+  op: "Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/gated_unit/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/gated_unit/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/gated_unit/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/gated_unit/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/gated_unit/Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/gated_unit/Sigmoid"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_residual/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/ExpandDims"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_2/mul_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/1x1_skip/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/Shape_1"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/Shape_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/sequence_length_mask_1/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 16
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/filter_shape"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/stack"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\020\000\000\000\020\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/stack_1"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/mod"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/dilation_rate"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/mod"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/mod_1"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/add_2"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/mod_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/add_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/paddings/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/paddings"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/paddings/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/mod_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/crops/0/0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/crops/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/crops/0/0"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/crops"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/crops/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/paddings"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/concat/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/concat"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/required_space_to_batch_paddings/crops"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/concat_1/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/concat_1"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/strided_slice_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/SpaceToBatchND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 16
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/SpaceToBatchND"
+  op: "SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/SpaceToBatchND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/conv1d_2/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/conv1d_2/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/conv1d_2/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/conv1d_2/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/conv1d_2/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/conv1d_2/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/conv1d_2/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/conv1d_2/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/conv1d_2/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "VALID"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/conv1d_2/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/conv1d_2/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/BatchToSpaceND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 16
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/BatchToSpaceND"
+  op: "BatchToSpaceND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/conv1d_2/Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/BatchToSpaceND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tcrops"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/BatchToSpaceND"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/gated_unit/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/gated_unit/split/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/gated_unit/split"
+  op: "Split"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/gated_unit/split/split_dim"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/conv1d/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/gated_unit/Tanh"
+  op: "Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/gated_unit/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/gated_unit/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/gated_unit/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/gated_unit/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/gated_unit/Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/gated_unit/Sigmoid"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_residual/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/ExpandDims"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_3/mul_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/1x1_skip/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/Shape_1"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/Shape_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/sequence_length_mask_1/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 32
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/filter_shape"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/stack"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: " \000\000\000 \000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/stack_1"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/mod"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/dilation_rate"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/mod"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/mod_1"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/add_2"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/mod_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/add_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/paddings/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/paddings"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/paddings/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/mod_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/crops/0/0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/crops/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/crops/0/0"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/crops"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/crops/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/paddings"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/concat/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/concat"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/required_space_to_batch_paddings/crops"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/concat_1/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/concat_1"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/strided_slice_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/SpaceToBatchND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 32
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/SpaceToBatchND"
+  op: "SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/SpaceToBatchND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/conv1d_2/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/conv1d_2/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/conv1d_2/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/conv1d_2/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/conv1d_2/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/conv1d_2/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/conv1d_2/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/conv1d_2/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/conv1d_2/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "VALID"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/conv1d_2/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/conv1d_2/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/BatchToSpaceND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 32
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/BatchToSpaceND"
+  op: "BatchToSpaceND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/conv1d_2/Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/BatchToSpaceND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tcrops"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/BatchToSpaceND"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/gated_unit/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/gated_unit/split/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/gated_unit/split"
+  op: "Split"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/gated_unit/split/split_dim"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/conv1d/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/gated_unit/Tanh"
+  op: "Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/gated_unit/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/gated_unit/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/gated_unit/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/gated_unit/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/gated_unit/Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/gated_unit/Sigmoid"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_residual/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/ExpandDims"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_4/mul_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/1x1_skip/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/Shape_1"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/Shape_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/sequence_length_mask_1/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 64
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/filter_shape"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/stack"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "@\000\000\000@\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/stack_1"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/mod"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/dilation_rate"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/mod"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/mod_1"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/add_2"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/mod_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/add_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/paddings/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/paddings"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/paddings/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/mod_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/crops/0/0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/crops/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/crops/0/0"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/crops"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/crops/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/paddings"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/concat/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/concat"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/required_space_to_batch_paddings/crops"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/concat_1/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/concat_1"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/strided_slice_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/SpaceToBatchND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 64
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/SpaceToBatchND"
+  op: "SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/SpaceToBatchND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/conv1d_2/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/conv1d_2/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/conv1d_2/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/conv1d_2/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/conv1d_2/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/conv1d_2/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/conv1d_2/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/conv1d_2/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/conv1d_2/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "VALID"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/conv1d_2/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/conv1d_2/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/BatchToSpaceND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 64
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/BatchToSpaceND"
+  op: "BatchToSpaceND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/conv1d_2/Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/BatchToSpaceND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tcrops"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/BatchToSpaceND"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/gated_unit/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/gated_unit/split/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/gated_unit/split"
+  op: "Split"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/gated_unit/split/split_dim"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/conv1d/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/gated_unit/Tanh"
+  op: "Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/gated_unit/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/gated_unit/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/gated_unit/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/gated_unit/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/gated_unit/Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/gated_unit/Sigmoid"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_residual/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/ExpandDims"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_5/mul_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/1x1_skip/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/Shape_1"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/Shape_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/sequence_length_mask_1/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 128
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/filter_shape"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/stack"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/stack_1"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/mod"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/dilation_rate"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/mod"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/mod_1"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/add_2"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/mod_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/add_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/paddings/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/paddings"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/paddings/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/mod_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/crops/0/0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/crops/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/crops/0/0"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/crops"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/crops/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/paddings"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/concat/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/concat"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/required_space_to_batch_paddings/crops"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/concat_1/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/concat_1"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/strided_slice_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/SpaceToBatchND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 128
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/SpaceToBatchND"
+  op: "SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/SpaceToBatchND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/conv1d_2/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/conv1d_2/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/conv1d_2/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/conv1d_2/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/conv1d_2/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/conv1d_2/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/conv1d_2/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/conv1d_2/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/conv1d_2/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "VALID"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/conv1d_2/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/conv1d_2/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/BatchToSpaceND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 128
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/BatchToSpaceND"
+  op: "BatchToSpaceND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/conv1d_2/Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/BatchToSpaceND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tcrops"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/BatchToSpaceND"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/gated_unit/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/gated_unit/split/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/gated_unit/split"
+  op: "Split"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/gated_unit/split/split_dim"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/conv1d/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/gated_unit/Tanh"
+  op: "Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/gated_unit/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/gated_unit/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/gated_unit/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/gated_unit/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/gated_unit/Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/gated_unit/Sigmoid"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_residual/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/ExpandDims"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_6/mul_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/1x1_skip/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/Shape_1"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/Shape_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/sequence_length_mask_1/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 256
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/filter_shape"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/stack"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\001\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/stack_1"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/mod"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/dilation_rate"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/mod"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/mod_1"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/add_2"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/mod_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/add_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/paddings/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/paddings"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/paddings/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/mod_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/crops/0/0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/crops/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/crops/0/0"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/crops"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/crops/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/paddings"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/concat/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/concat"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/required_space_to_batch_paddings/crops"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/concat_1/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/concat_1"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/strided_slice_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/SpaceToBatchND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 256
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/SpaceToBatchND"
+  op: "SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/SpaceToBatchND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/conv1d_2/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/conv1d_2/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/conv1d_2/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/conv1d_2/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/conv1d_2/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/conv1d_2/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/conv1d_2/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/conv1d_2/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/conv1d_2/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "VALID"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/conv1d_2/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/conv1d_2/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/BatchToSpaceND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 256
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/BatchToSpaceND"
+  op: "BatchToSpaceND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/conv1d_2/Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/BatchToSpaceND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tcrops"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/BatchToSpaceND"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/gated_unit/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/gated_unit/split/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/gated_unit/split"
+  op: "Split"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/gated_unit/split/split_dim"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/conv1d/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/gated_unit/Tanh"
+  op: "Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/gated_unit/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/gated_unit/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/gated_unit/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/gated_unit/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/gated_unit/Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/gated_unit/Sigmoid"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_residual/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/ExpandDims"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_7/mul_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/1x1_skip/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/Shape_1"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/Shape_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/sequence_length_mask_1/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 512
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/filter_shape"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/stack"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\002\000\000\000\002\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/stack_1"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/mod"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/dilation_rate"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/mod"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/mod_1"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/add_2"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/mod_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/add_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/paddings/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/paddings"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/paddings/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/mod_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/crops/0/0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/crops/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/crops/0/0"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/crops"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/crops/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/paddings"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/concat/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/concat"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/required_space_to_batch_paddings/crops"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/concat_1/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/concat_1"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/strided_slice_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/SpaceToBatchND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 512
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/SpaceToBatchND"
+  op: "SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/SpaceToBatchND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/conv1d_2/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/conv1d_2/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/conv1d_2/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/conv1d_2/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/conv1d_2/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/conv1d_2/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/conv1d_2/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/conv1d_2/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/conv1d_2/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "VALID"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/conv1d_2/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/conv1d_2/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/BatchToSpaceND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 512
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/BatchToSpaceND"
+  op: "BatchToSpaceND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/conv1d_2/Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/BatchToSpaceND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tcrops"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/BatchToSpaceND"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/gated_unit/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/gated_unit/split/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/gated_unit/split"
+  op: "Split"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/gated_unit/split/split_dim"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/conv1d/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/gated_unit/Tanh"
+  op: "Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/gated_unit/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/gated_unit/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/gated_unit/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/gated_unit/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/gated_unit/Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/gated_unit/Sigmoid"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_residual/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/ExpandDims"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_8/mul_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/1x1_skip/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/Shape_1"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/Shape_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/sequence_length_mask_1/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/conv1d_2/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/conv1d_2/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/conv1d_2/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/conv1d_2/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/conv1d_2/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/conv1d_2/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/conv1d_2/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/conv1d_2/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/conv1d_2/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/conv1d_2/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/conv1d_2/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/conv1d_2/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/gated_unit/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/gated_unit/split/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/gated_unit/split"
+  op: "Split"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/gated_unit/split/split_dim"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/conv1d/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/gated_unit/Tanh"
+  op: "Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/gated_unit/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/gated_unit/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/gated_unit/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/gated_unit/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/gated_unit/Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/gated_unit/Sigmoid"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_residual/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/ExpandDims"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_9/mul_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/1x1_skip/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/Shape_1"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/Shape_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/sequence_length_mask_1/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/filter_shape"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/stack"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\002\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/stack_1"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/mod"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/dilation_rate"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/mod"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/mod_1"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/add_2"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/mod_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/add_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/paddings/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/paddings"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/paddings/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/mod_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/crops/0/0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/crops/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/crops/0/0"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/crops"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/crops/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/paddings"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/concat/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/concat"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/required_space_to_batch_paddings/crops"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/concat_1/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/concat_1"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/strided_slice_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/SpaceToBatchND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/SpaceToBatchND"
+  op: "SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/SpaceToBatchND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/conv1d_2/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/conv1d_2/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/conv1d_2/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/conv1d_2/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/conv1d_2/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/conv1d_2/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/conv1d_2/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/conv1d_2/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/conv1d_2/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "VALID"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/conv1d_2/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/conv1d_2/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/BatchToSpaceND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/BatchToSpaceND"
+  op: "BatchToSpaceND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/conv1d_2/Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/BatchToSpaceND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tcrops"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/BatchToSpaceND"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/gated_unit/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/gated_unit/split/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/gated_unit/split"
+  op: "Split"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/gated_unit/split/split_dim"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/conv1d/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/gated_unit/Tanh"
+  op: "Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/gated_unit/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/gated_unit/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/gated_unit/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/gated_unit/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/gated_unit/Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/gated_unit/Sigmoid"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_residual/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/ExpandDims"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_10/mul_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/1x1_skip/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/Shape_1"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/Shape_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/sequence_length_mask_1/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 4
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/filter_shape"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/stack"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\004\000\000\000\004\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/stack_1"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/mod"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/dilation_rate"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/mod"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/mod_1"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/add_2"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/mod_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/add_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/paddings/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/paddings"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/paddings/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/mod_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/crops/0/0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/crops/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/crops/0/0"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/crops"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/crops/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/paddings"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/concat/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/concat"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/required_space_to_batch_paddings/crops"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/concat_1/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/concat_1"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/strided_slice_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/SpaceToBatchND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 4
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/SpaceToBatchND"
+  op: "SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/SpaceToBatchND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/conv1d_2/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/conv1d_2/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/conv1d_2/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/conv1d_2/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/conv1d_2/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/conv1d_2/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/conv1d_2/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/conv1d_2/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/conv1d_2/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "VALID"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/conv1d_2/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/conv1d_2/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/BatchToSpaceND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 4
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/BatchToSpaceND"
+  op: "BatchToSpaceND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/conv1d_2/Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/BatchToSpaceND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tcrops"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/BatchToSpaceND"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/gated_unit/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/gated_unit/split/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/gated_unit/split"
+  op: "Split"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/gated_unit/split/split_dim"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/conv1d/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/gated_unit/Tanh"
+  op: "Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/gated_unit/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/gated_unit/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/gated_unit/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/gated_unit/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/gated_unit/Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/gated_unit/Sigmoid"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_residual/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/ExpandDims"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_11/mul_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/1x1_skip/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/Shape_1"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/Shape_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/sequence_length_mask_1/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 8
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/filter_shape"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/stack"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\010\000\000\000\010\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/stack_1"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/mod"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/dilation_rate"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/mod"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/mod_1"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/add_2"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/mod_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/add_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/paddings/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/paddings"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/paddings/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/mod_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/crops/0/0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/crops/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/crops/0/0"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/crops"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/crops/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/paddings"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/concat/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/concat"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/required_space_to_batch_paddings/crops"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/concat_1/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/concat_1"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/strided_slice_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/SpaceToBatchND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 8
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/SpaceToBatchND"
+  op: "SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/SpaceToBatchND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/conv1d_2/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/conv1d_2/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/conv1d_2/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/conv1d_2/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/conv1d_2/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/conv1d_2/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/conv1d_2/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/conv1d_2/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/conv1d_2/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "VALID"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/conv1d_2/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/conv1d_2/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/BatchToSpaceND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 8
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/BatchToSpaceND"
+  op: "BatchToSpaceND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/conv1d_2/Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/BatchToSpaceND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tcrops"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/BatchToSpaceND"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/gated_unit/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/gated_unit/split/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/gated_unit/split"
+  op: "Split"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/gated_unit/split/split_dim"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/conv1d/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/gated_unit/Tanh"
+  op: "Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/gated_unit/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/gated_unit/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/gated_unit/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/gated_unit/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/gated_unit/Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/gated_unit/Sigmoid"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_residual/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/ExpandDims"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_12/mul_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/1x1_skip/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/Shape_1"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/Shape_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/sequence_length_mask_1/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 16
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/filter_shape"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/stack"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\020\000\000\000\020\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/stack_1"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/mod"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/dilation_rate"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/mod"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/mod_1"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/add_2"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/mod_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/add_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/paddings/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/paddings"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/paddings/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/mod_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/crops/0/0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/crops/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/crops/0/0"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/crops"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/crops/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/paddings"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/concat/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/concat"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/required_space_to_batch_paddings/crops"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/concat_1/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/concat_1"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/strided_slice_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/SpaceToBatchND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 16
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/SpaceToBatchND"
+  op: "SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/SpaceToBatchND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/conv1d_2/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/conv1d_2/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/conv1d_2/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/conv1d_2/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/conv1d_2/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/conv1d_2/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/conv1d_2/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/conv1d_2/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/conv1d_2/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "VALID"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/conv1d_2/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/conv1d_2/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/BatchToSpaceND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 16
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/BatchToSpaceND"
+  op: "BatchToSpaceND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/conv1d_2/Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/BatchToSpaceND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tcrops"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/BatchToSpaceND"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/gated_unit/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/gated_unit/split/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/gated_unit/split"
+  op: "Split"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/gated_unit/split/split_dim"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/conv1d/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/gated_unit/Tanh"
+  op: "Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/gated_unit/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/gated_unit/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/gated_unit/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/gated_unit/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/gated_unit/Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/gated_unit/Sigmoid"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_residual/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/ExpandDims"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_13/mul_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/1x1_skip/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/Shape_1"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/Shape_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/sequence_length_mask_1/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 32
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/filter_shape"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/stack"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: " \000\000\000 \000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/stack_1"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/mod"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/dilation_rate"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/mod"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/mod_1"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/add_2"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/mod_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/add_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/paddings/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/paddings"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/paddings/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/mod_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/crops/0/0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/crops/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/crops/0/0"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/crops"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/crops/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/paddings"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/concat/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/concat"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/required_space_to_batch_paddings/crops"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/concat_1/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/concat_1"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/strided_slice_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/SpaceToBatchND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 32
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/SpaceToBatchND"
+  op: "SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/SpaceToBatchND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/conv1d_2/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/conv1d_2/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/conv1d_2/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/conv1d_2/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/conv1d_2/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/conv1d_2/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/conv1d_2/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/conv1d_2/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/conv1d_2/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "VALID"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/conv1d_2/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/conv1d_2/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/BatchToSpaceND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 32
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/BatchToSpaceND"
+  op: "BatchToSpaceND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/conv1d_2/Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/BatchToSpaceND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tcrops"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/BatchToSpaceND"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/gated_unit/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/gated_unit/split/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/gated_unit/split"
+  op: "Split"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/gated_unit/split/split_dim"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/conv1d/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/gated_unit/Tanh"
+  op: "Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/gated_unit/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/gated_unit/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/gated_unit/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/gated_unit/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/gated_unit/Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/gated_unit/Sigmoid"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_residual/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/ExpandDims"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_14/mul_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/1x1_skip/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/Shape_1"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/Shape_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/sequence_length_mask_1/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 64
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/filter_shape"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/stack"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "@\000\000\000@\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/stack_1"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/mod"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/dilation_rate"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/mod"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/mod_1"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/add_2"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/mod_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/add_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/paddings/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/paddings"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/paddings/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/mod_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/crops/0/0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/crops/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/crops/0/0"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/crops"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/crops/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/paddings"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/concat/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/concat"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/required_space_to_batch_paddings/crops"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/concat_1/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/concat_1"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/strided_slice_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/SpaceToBatchND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 64
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/SpaceToBatchND"
+  op: "SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/SpaceToBatchND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/conv1d_2/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/conv1d_2/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/conv1d_2/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/conv1d_2/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/conv1d_2/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/conv1d_2/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/conv1d_2/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/conv1d_2/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/conv1d_2/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "VALID"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/conv1d_2/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/conv1d_2/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/BatchToSpaceND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 64
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/BatchToSpaceND"
+  op: "BatchToSpaceND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/conv1d_2/Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/BatchToSpaceND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tcrops"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/BatchToSpaceND"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/gated_unit/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/gated_unit/split/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/gated_unit/split"
+  op: "Split"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/gated_unit/split/split_dim"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/conv1d/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/gated_unit/Tanh"
+  op: "Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/gated_unit/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/gated_unit/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/gated_unit/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/gated_unit/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/gated_unit/Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/gated_unit/Sigmoid"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_residual/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/ExpandDims"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_15/mul_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/1x1_skip/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/Shape_1"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/Shape_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/sequence_length_mask_1/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 128
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/filter_shape"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/stack"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/stack_1"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/mod"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/dilation_rate"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/mod"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/mod_1"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/add_2"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/mod_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/add_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/paddings/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/paddings"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/paddings/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/mod_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/crops/0/0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/crops/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/crops/0/0"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/crops"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/crops/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/paddings"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/concat/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/concat"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/required_space_to_batch_paddings/crops"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/concat_1/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/concat_1"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/strided_slice_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/SpaceToBatchND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 128
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/SpaceToBatchND"
+  op: "SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/SpaceToBatchND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/conv1d_2/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/conv1d_2/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/conv1d_2/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/conv1d_2/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/conv1d_2/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/conv1d_2/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/conv1d_2/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/conv1d_2/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/conv1d_2/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "VALID"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/conv1d_2/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/conv1d_2/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/BatchToSpaceND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 128
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/BatchToSpaceND"
+  op: "BatchToSpaceND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/conv1d_2/Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/BatchToSpaceND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tcrops"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/BatchToSpaceND"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/gated_unit/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/gated_unit/split/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/gated_unit/split"
+  op: "Split"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/gated_unit/split/split_dim"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/conv1d/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/gated_unit/Tanh"
+  op: "Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/gated_unit/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/gated_unit/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/gated_unit/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/gated_unit/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/gated_unit/Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/gated_unit/Sigmoid"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_residual/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/ExpandDims"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_16/mul_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/1x1_skip/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/Shape_1"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/Shape_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/sequence_length_mask_1/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 256
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/filter_shape"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/stack"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\001\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/stack_1"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/mod"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/dilation_rate"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/mod"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/mod_1"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/add_2"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/mod_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/add_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/paddings/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/paddings"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/paddings/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/mod_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/crops/0/0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/crops/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/crops/0/0"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/crops"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/crops/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/paddings"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/concat/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/concat"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/required_space_to_batch_paddings/crops"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/concat_1/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/concat_1"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/strided_slice_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/SpaceToBatchND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 256
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/SpaceToBatchND"
+  op: "SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/SpaceToBatchND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/conv1d_2/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/conv1d_2/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/conv1d_2/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/conv1d_2/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/conv1d_2/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/conv1d_2/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/conv1d_2/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/conv1d_2/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/conv1d_2/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "VALID"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/conv1d_2/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/conv1d_2/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/BatchToSpaceND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 256
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/BatchToSpaceND"
+  op: "BatchToSpaceND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/conv1d_2/Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/BatchToSpaceND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tcrops"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/BatchToSpaceND"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/gated_unit/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/gated_unit/split/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/gated_unit/split"
+  op: "Split"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/gated_unit/split/split_dim"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/conv1d/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/gated_unit/Tanh"
+  op: "Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/gated_unit/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/gated_unit/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/gated_unit/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/gated_unit/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/gated_unit/Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/gated_unit/Sigmoid"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_residual/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/ExpandDims"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_17/mul_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/1x1_skip/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/Shape_1"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/Shape_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/sequence_length_mask_1/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0721687823534
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 3
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 256
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 256
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 512
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/filter_shape"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\003\000\000\000\200\000\000\000\000\001\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/stack"
+  op: "Const"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\002\000\000\000\002\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/stack_1"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/strided_slice"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/mod"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/dilation_rate"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/mod"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/mod_1"
+  op: "FloorMod"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/dilation_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/add_2"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/mod_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/add_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_3/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_3/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/paddings/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_2"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_3"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/paddings"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/paddings/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/mod_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_4/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_4/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/crops/0/0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/crops/0"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/crops/0/0"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/strided_slice_4"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/crops"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/crops/0"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/paddings"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/concat/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/concat"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/strided_slice_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/strided_slice_2"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/required_space_to_batch_paddings/crops"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/strided_slice_2/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/strided_slice_2/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/concat_1/concat_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/concat_1"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/strided_slice_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/SpaceToBatchND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 512
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/SpaceToBatchND"
+  op: "SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/SpaceToBatchND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/conv1d_2/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/conv1d_2/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/SpaceToBatchND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/conv1d_2/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/conv1d_2/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/conv1d_2/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/conv1d_2/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/conv1d_2/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/conv1d_2/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/conv1d_2/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "VALID"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/conv1d_2/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/conv1d_2/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/BatchToSpaceND/block_shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 512
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/BatchToSpaceND"
+  op: "BatchToSpaceND"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/conv1d_2/Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/BatchToSpaceND/block_shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tblock_shape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tcrops"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/BatchToSpaceND"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 256
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/gated_unit/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/gated_unit/split/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/gated_unit/split"
+  op: "Split"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/gated_unit/split/split_dim"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/conv1d/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/gated_unit/Tanh"
+  op: "Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/gated_unit/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/gated_unit/Sigmoid"
+  op: "Sigmoid"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/gated_unit/split:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/gated_unit/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/gated_unit/Tanh"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/gated_unit/Sigmoid"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_residual/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/ExpandDims"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/gated_unit/mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/add_1"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_18/mul_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/1x1_skip/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/Shape_1"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/strided_slice_1"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/Shape_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/strided_slice_1/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/strided_slice_1/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/strided_slice_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/sequence_length_mask_1/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/mul_1"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/add_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/dilation_layer_19/mul_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\200\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.153093114495
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 128
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 128
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/Relu"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/1x1_output/BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/ExpandDims"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/Relu"
+  op: "Relu"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_0/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel/Initializer/random_uniform/shape"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\001\000\000\000\200\000\000\000\001\004\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel/Initializer/random_uniform/min"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -0.0721374824643
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel/Initializer/random_uniform/max"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0721374824643
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel/Initializer/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel/Initializer/random_uniform/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel/Initializer/random_uniform/sub"
+  op: "Sub"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel/Initializer/random_uniform/max"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel"
+      }
+    }
+  }
+
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel/Initializer/random_uniform/mul"
+  op: "Mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel/Initializer/random_uniform/RandomUniform"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel/Initializer/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel/Initializer/random_uniform"
+  op: "Add"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel/Initializer/random_uniform/mul"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel/Initializer/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 128
+        }
+        dim {
+          size: 1025
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel/Initializer/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/bias/Initializer/zeros"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 1025
+          }
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/bias"
+  op: "VariableV2"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "container"
+    value {
+      s: "local"
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1025
+        }
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/bias/Assign"
+  op: "Assign"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/bias"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/bias/Initializer/zeros"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/bias/read"
+  op: "Identity"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/bias"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/dilation_rate"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/conv1d/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/conv1d/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/Relu"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/conv1d/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 128
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/conv1d/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/conv1d/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/kernel/read"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/conv1d/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: 128
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/conv1d/Conv2D"
+  op: "Conv2D"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/conv1d/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/conv1d/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "dilations"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "padding"
+    value {
+      s: "SAME"
+    }
+  }
+  attr {
+    key: "strides"
+    value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    key: "use_cudnn_on_gpu"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/conv1d/Squeeze"
+  op: "Squeeze"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/conv1d/Conv2D"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "squeeze_dims"
+    value {
+      list {
+        i: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/BiasAdd"
+  op: "BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/conv1d/Squeeze"
+  input: "seq2seq/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/bias/read"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/BiasAdd"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/range"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/sequence_length_mask/Cast"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/1x1_output/BiasAdd"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/ExpandDims"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/Shape"
+  input: "seq2seq/seq2seq_2/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/sequence_length_mask/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/sequence_length_mask/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/sequence_length_mask/range"
+  op: "Range"
+  input: "seq2seq/seq2seq_2/sequence_length_mask/range/start"
+  input: "seq2seq/seq2seq_2/strided_slice"
+  input: "seq2seq/seq2seq_2/sequence_length_mask/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/sequence_length_mask/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/sequence_length_mask/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/sequence_length_mask/range"
+  input: "seq2seq/seq2seq_2/sequence_length_mask/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/sequence_length_mask/Shape"
+  op: "Shape"
+  input: "seq2seq/seq2seq_1/attention_decoder/mul_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/sequence_length_mask/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/sequence_length_mask/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/sequence_length_mask/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/sequence_length_mask/strided_slice"
+  op: "StridedSlice"
+  input: "seq2seq/seq2seq_2/sequence_length_mask/Shape"
+  input: "seq2seq/seq2seq_2/sequence_length_mask/strided_slice/stack"
+  input: "seq2seq/seq2seq_2/sequence_length_mask/strided_slice/stack_1"
+  input: "seq2seq/seq2seq_2/sequence_length_mask/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/sequence_length_mask/Tile/multiples/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/sequence_length_mask/Tile/multiples"
+  op: "Pack"
+  input: "seq2seq/seq2seq_2/sequence_length_mask/strided_slice"
+  input: "seq2seq/seq2seq_2/sequence_length_mask/Tile/multiples/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/sequence_length_mask/Tile"
+  op: "Tile"
+  input: "seq2seq/seq2seq_2/sequence_length_mask/ExpandDims"
+  input: "seq2seq/seq2seq_2/sequence_length_mask/Tile/multiples"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tmultiples"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/sequence_length_mask/ExpandDims_1/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/sequence_length_mask/ExpandDims_1"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_1/attention_decoder/mul_1"
+  input: "seq2seq/seq2seq_2/sequence_length_mask/ExpandDims_1/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/sequence_length_mask/Less"
+  op: "Less"
+  input: "seq2seq/seq2seq_2/sequence_length_mask/Tile"
+  input: "seq2seq/seq2seq_2/sequence_length_mask/ExpandDims_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/sequence_length_mask/Cast"
+  op: "Cast"
+  input: "seq2seq/seq2seq_2/sequence_length_mask/Less"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/sub/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/sub"
+  op: "Sub"
+  input: "seq2seq/seq2seq_2/sub/x"
+  input: "seq2seq/seq2seq_2/sequence_length_mask/Cast"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "seq2seq/seq2seq_2/ExpandDims/dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/ExpandDims"
+  op: "ExpandDims"
+  input: "seq2seq/seq2seq_2/sub"
+  input: "seq2seq/seq2seq_2/ExpandDims/dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tdim"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/mul"
+  op: "Mul"
+  input: "seq2seq/seq2seq_2/ExpandDims"
+  input: "Maximum_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "seq2seq/seq2seq_2/add"
+  op: "Add"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/dilated_conv1d_stack/output_layer_1/mul"
+  input: "seq2seq/seq2seq_2/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "attention_matrix"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/transpose_6"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "transcript_attention_comb_weights"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/transpose_6"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "attention_controller_output"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/attention_decoder/transpose_5"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "decoder_outputs"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 80
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "decoder_output_lengths"
+  op: "Identity"
+  input: "seq2seq/seq2seq_1/attention_decoder/mul_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "postnet_outputs"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "postnet_output_lengths"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "synthesis_outputs"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "synthesis_output_lengths"
+  op: "Identity"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "Exp"
+  op: "Exp"
+  input: "seq2seq/seq2seq_2/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "pow_2/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1.20000004768
+      }
+    }
+  }
+}
+node {
+  name: "pow_2"
+  op: "Pow"
+  input: "Exp"
+  input: "pow_2/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/Shape"
+  op: "Shape"
+  input: "pow_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "g_lim/random_uniform/min"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/random_uniform/max"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/random_uniform/RandomUniform"
+  op: "RandomUniform"
+  input: "g_lim/Shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "seed"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "seed2"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/random_uniform/sub"
+  op: "Sub"
+  input: "g_lim/random_uniform/max"
+  input: "g_lim/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "g_lim/random_uniform/mul"
+  op: "Mul"
+  input: "g_lim/random_uniform/RandomUniform"
+  input: "g_lim/random_uniform/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/random_uniform"
+  op: "Add"
+  input: "g_lim/random_uniform/mul"
+  input: "g_lim/random_uniform/min"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/mul/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 2.0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/mul"
+  op: "Mul"
+  input: "g_lim/mul/x"
+  input: "g_lim/random_uniform"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/sub/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/sub"
+  op: "Sub"
+  input: "g_lim/mul"
+  input: "g_lim/sub/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/mul_1/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 3.14159274101
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/mul_1"
+  op: "Mul"
+  input: "g_lim/mul_1/x"
+  input: "g_lim/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/imag"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/Complex"
+  op: "Complex"
+  input: "pow_2"
+  input: "g_lim/imag"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tout"
+    value {
+      type: DT_COMPLEX64
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/real"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/Complex_1"
+  op: "Complex"
+  input: "g_lim/real"
+  input: "g_lim/mul_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tout"
+    value {
+      type: DT_COMPLEX64
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/Exp"
+  op: "Exp"
+  input: "g_lim/Complex_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_COMPLEX64
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/mul_2"
+  op: "Mul"
+  input: "g_lim/Complex"
+  input: "g_lim/Exp"
+  attr {
+    key: "T"
+    value {
+      type: DT_COMPLEX64
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/Enter"
+  op: "Enter"
+  input: "g_lim/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "g_lim/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "g_lim/while/Enter_1"
+  op: "Enter"
+  input: "g_lim/mul_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_COMPLEX64
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "g_lim/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "g_lim/while/Merge"
+  op: "Merge"
+  input: "g_lim/while/Enter"
+  input: "g_lim/while/NextIteration"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/Merge_1"
+  op: "Merge"
+  input: "g_lim/while/Enter_1"
+  input: "g_lim/while/NextIteration_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_COMPLEX64
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+        shape {
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/Less/y"
+  op: "Const"
+  input: "^g_lim/while/Merge"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 99
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/Less"
+  op: "Less"
+  input: "g_lim/while/Merge"
+  input: "g_lim/while/Less/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/LoopCond"
+  op: "LoopCond"
+  input: "g_lim/while/Less"
+
+}
+node {
+  name: "g_lim/while/Switch"
+  op: "Switch"
+  input: "g_lim/while/Merge"
+  input: "g_lim/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@g_lim/while/Merge"
+      }
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/Switch_1"
+  op: "Switch"
+  input: "g_lim/while/Merge_1"
+  input: "g_lim/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_COMPLEX64
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@g_lim/while/Merge_1"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/Identity"
+  op: "Identity"
+  input: "g_lim/while/Switch:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/Identity_1"
+  op: "Identity"
+  input: "g_lim/while/Switch_1:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_COMPLEX64
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/frame_length"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1200
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/frame_step"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 300
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/fft_length"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2048
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/irfft/packed"
+  op: "Pack"
+  input: "g_lim/while/gl_ifft_ola/fft_length"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/irfft"
+  op: "IRFFT"
+  input: "g_lim/while/Identity_1"
+  input: "g_lim/while/gl_ifft_ola/irfft/packed"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 2048
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/strided_slice/stack"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/strided_slice/stack_1"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\260\004\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/strided_slice/stack_2"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/strided_slice"
+  op: "StridedSlice"
+  input: "g_lim/while/gl_ifft_ola/irfft"
+  input: "g_lim/while/gl_ifft_ola/strided_slice/stack"
+  input: "g_lim/while/gl_ifft_ola/strided_slice/stack_1"
+  input: "g_lim/while/gl_ifft_ola/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/hw/periodic"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_BOOL
+        tensor_shape {
+        }
+        bool_val: true
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/hw/Cast"
+  op: "Cast"
+  input: "g_lim/while/gl_ifft_ola/hw/periodic"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/hw/FloorMod/y"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/hw/FloorMod"
+  op: "FloorMod"
+  input: "g_lim/while/gl_ifft_ola/frame_length"
+  input: "g_lim/while/gl_ifft_ola/hw/FloorMod/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/hw/sub/x"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/hw/sub"
+  op: "Sub"
+  input: "g_lim/while/gl_ifft_ola/hw/sub/x"
+  input: "g_lim/while/gl_ifft_ola/hw/FloorMod"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/hw/mul"
+  op: "Mul"
+  input: "g_lim/while/gl_ifft_ola/hw/Cast"
+  input: "g_lim/while/gl_ifft_ola/hw/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/hw/add"
+  op: "Add"
+  input: "g_lim/while/gl_ifft_ola/frame_length"
+  input: "g_lim/while/gl_ifft_ola/hw/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/hw/sub_1/y"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/hw/sub_1"
+  op: "Sub"
+  input: "g_lim/while/gl_ifft_ola/hw/add"
+  input: "g_lim/while/gl_ifft_ola/hw/sub_1/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/hw/Cast_1"
+  op: "Cast"
+  input: "g_lim/while/gl_ifft_ola/hw/sub_1"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/hw/range/start"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/hw/range/delta"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/hw/range"
+  op: "Range"
+  input: "g_lim/while/gl_ifft_ola/hw/range/start"
+  input: "g_lim/while/gl_ifft_ola/frame_length"
+  input: "g_lim/while/gl_ifft_ola/hw/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/hw/Cast_2"
+  op: "Cast"
+  input: "g_lim/while/gl_ifft_ola/hw/range"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/hw/Const"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 6.28318548203
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/hw/mul_1"
+  op: "Mul"
+  input: "g_lim/while/gl_ifft_ola/hw/Const"
+  input: "g_lim/while/gl_ifft_ola/hw/Cast_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/hw/truediv"
+  op: "RealDiv"
+  input: "g_lim/while/gl_ifft_ola/hw/mul_1"
+  input: "g_lim/while/gl_ifft_ola/hw/Cast_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/hw/Cos"
+  op: "Cos"
+  input: "g_lim/while/gl_ifft_ola/hw/truediv"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/hw/mul_2/x"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.5
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/hw/mul_2"
+  op: "Mul"
+  input: "g_lim/while/gl_ifft_ola/hw/mul_2/x"
+  input: "g_lim/while/gl_ifft_ola/hw/Cos"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/hw/sub_2/x"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.5
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/hw/sub_2"
+  op: "Sub"
+  input: "g_lim/while/gl_ifft_ola/hw/sub_2/x"
+  input: "g_lim/while/gl_ifft_ola/hw/mul_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/mul"
+  op: "Mul"
+  input: "g_lim/while/gl_ifft_ola/strided_slice"
+  input: "g_lim/while/gl_ifft_ola/hw/sub_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/Shape"
+  op: "Shape"
+  input: "g_lim/while/gl_ifft_ola/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice/stack"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice/stack_1"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: -2
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice/stack_2"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice"
+  op: "StridedSlice"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/Shape"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice/stack"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice/stack_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/Rank"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice_1/stack"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: -2
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice_1/stack_1"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice_1/stack_2"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice_1"
+  op: "StridedSlice"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/Shape"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice_1/stack"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice_1/stack_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice_2/stack"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice_2/stack_1"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice_2/stack_2"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice_2"
+  op: "StridedSlice"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/Shape"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice_2/stack"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice_2/stack_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Enter"
+  op: "Enter"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Enter_1"
+  op: "Enter"
+  input: "g_lim/while/gl_ifft_ola/frame_step"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Merge"
+  op: "Merge"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Enter"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/NextIteration"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Merge_1"
+  op: "Merge"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Enter_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/NextIteration_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/zeros_like"
+  op: "Const"
+  input: "^g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Merge"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Greater"
+  op: "Greater"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Merge_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/zeros_like"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/LoopCond"
+  op: "LoopCond"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Greater"
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Switch"
+  op: "Switch"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Merge"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Merge"
+      }
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Switch_1"
+  op: "Switch"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Merge_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Merge_1"
+      }
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Identity"
+  op: "Identity"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Switch:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Identity_1"
+  op: "Identity"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Switch_1:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/FloorMod"
+  op: "FloorMod"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Identity"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Identity_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/NextIteration"
+  op: "NextIteration"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Identity_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/NextIteration_1"
+  op: "NextIteration"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/FloorMod"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Exit"
+  op: "Exit"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Switch"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Exit_1"
+  op: "Exit"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Switch_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/floordiv"
+  op: "FloorDiv"
+  input: "g_lim/while/gl_ifft_ola/frame_step"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Exit"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/floordiv_1"
+  op: "FloorDiv"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice_2"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Exit"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/sub/y"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/sub"
+  op: "Sub"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/sub/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/mul"
+  op: "Mul"
+  input: "g_lim/while/gl_ifft_ola/frame_step"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/add"
+  op: "Add"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/mul"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/floordiv_2"
+  op: "FloorDiv"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/add"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Exit"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/concat/values_1/0"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/concat/values_1"
+  op: "Pack"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/concat/values_1/0"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/gcd/while/Exit"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/concat/axis"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/concat"
+  op: "ConcatV2"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/concat/values_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/Reshape"
+  op: "Reshape"
+  input: "g_lim/while/gl_ifft_ola/mul"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/k"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/Rank_1"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/range/start"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/range/delta"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/range"
+  op: "Range"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/range/start"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/Rank_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/sub_1"
+  op: "Sub"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/Rank_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/k"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/packed"
+  op: "Pack"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/sub_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/k"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/split/split_dim"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/split"
+  op: "SplitV"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/range"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/packed"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/split/split_dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tlen"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/concat_1/axis"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/concat_1"
+  op: "ConcatV2"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/split:1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/split"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/transpose"
+  op: "Transpose"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/Reshape"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/range_1/start"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/range_1/delta"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/range_1"
+  op: "Range"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/range_1/start"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/floordiv_2"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/range_1/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/axis"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Rank"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/range/start"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/range/delta"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/range"
+  op: "Range"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/range/start"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Rank"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/add/y"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/add"
+  op: "Add"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/axis"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/add/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/strided_slice/stack"
+  op: "Pack"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/axis"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/strided_slice/stack_1"
+  op: "Pack"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/add"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/strided_slice/stack_2"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/strided_slice"
+  op: "StridedSlice"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/range"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/strided_slice/stack"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/strided_slice/stack_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Shape"
+  op: "Shape"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/range_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/sub/y"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/sub"
+  op: "Sub"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Rank"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/sub/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/sub_1"
+  op: "Sub"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/sub"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/packed/1"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/packed"
+  op: "Pack"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/strided_slice"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/packed/1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/sub_1"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/split/split_dim"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/split"
+  op: "SplitV"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Shape"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/packed"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/split/split_dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tlen"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "num_split"
+    value {
+      i: 3
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Reshape/shape"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Reshape"
+  op: "Reshape"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/split:1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Reshape/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Size"
+  op: "Size"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Size_1"
+  op: "Size"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/split:2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/sub_2"
+  op: "Sub"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Reshape"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/floordiv_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/floordiv"
+  op: "FloorDiv"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/sub_2"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/floordiv"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/add_1/x"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/add_1"
+  op: "Add"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/add_1/x"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/floordiv"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Maximum/x"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Maximum"
+  op: "Maximum"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Maximum/x"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Enter"
+  op: "Enter"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/floordiv_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Enter_1"
+  op: "Enter"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/floordiv"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Merge"
+  op: "Merge"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Enter"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/NextIteration"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Merge_1"
+  op: "Merge"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Enter_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/NextIteration_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/zeros_like"
+  op: "Const"
+  input: "^g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Merge"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Greater"
+  op: "Greater"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Merge_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/zeros_like"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/LoopCond"
+  op: "LoopCond"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Greater"
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Switch"
+  op: "Switch"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Merge"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Merge"
+      }
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Switch_1"
+  op: "Switch"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Merge_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Merge_1"
+      }
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Identity"
+  op: "Identity"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Switch:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Identity_1"
+  op: "Identity"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Switch_1:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/FloorMod"
+  op: "FloorMod"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Identity"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Identity_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/NextIteration"
+  op: "NextIteration"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Identity_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/NextIteration_1"
+  op: "NextIteration"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/FloorMod"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Exit"
+  op: "Exit"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Switch"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Exit_1"
+  op: "Exit"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Switch_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/floordiv_1"
+  op: "FloorDiv"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/floordiv_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Exit"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/floordiv_2"
+  op: "FloorDiv"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/floordiv"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Exit"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/floordiv_3"
+  op: "FloorDiv"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Reshape"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Exit"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/mul"
+  op: "Mul"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/floordiv_3"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Exit"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/concat/values_1"
+  op: "Pack"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/mul"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/concat/axis"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/concat"
+  op: "ConcatV2"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/split"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/concat/values_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/split:2"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/concat_1/values_1"
+  op: "Pack"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/floordiv_3"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/gcd/while/Exit"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/concat_1/axis"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/concat_1"
+  op: "ConcatV2"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/split"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/concat_1/values_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/split:2"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/zeros_like"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/ones_like/Shape"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/ones_like/Const"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/ones_like"
+  op: "Fill"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/ones_like/Shape"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/ones_like/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/StridedSlice"
+  op: "StridedSlice"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/range_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/zeros_like"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/concat"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/ones_like"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Reshape_1"
+  op: "Reshape"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/StridedSlice"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/range_1/start"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/range_1/delta"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/range_1"
+  op: "Range"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/range_1/start"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Maximum"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/range_1/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/mul_1"
+  op: "Mul"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/range_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/floordiv_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Reshape_2/shape/1"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Reshape_2/shape"
+  op: "Pack"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Maximum"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Reshape_2/shape/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Reshape_2"
+  op: "Reshape"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/mul_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Reshape_2/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/range_2/start"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/range_2/delta"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/range_2"
+  op: "Range"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/range_2/start"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/floordiv_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/range_2/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Reshape_3/shape/0"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Reshape_3/shape"
+  op: "Pack"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Reshape_3/shape/0"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/floordiv_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Reshape_3"
+  op: "Reshape"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/range_2"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Reshape_3/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/add_2"
+  op: "Add"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Reshape_2"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Reshape_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/GatherV2"
+  op: "GatherV2"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Reshape_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/add_2"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/strided_slice"
+  attr {
+    key: "Taxis"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/concat_2/values_1"
+  op: "Pack"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Maximum"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/floordiv_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/concat_2/axis"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/concat_2"
+  op: "ConcatV2"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/split"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/concat_2/values_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/split:2"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/concat_2/axis"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Reshape_4"
+  op: "Reshape"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/GatherV2"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/concat_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/Reshape_1/shape"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/Reshape_1"
+  op: "Reshape"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/frame/Reshape_4"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/Reshape_1/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/UnsortedSegmentSum"
+  op: "UnsortedSegmentSum"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/transpose"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/Reshape_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/floordiv_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tnumsegments"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/concat_2/values_1"
+  op: "Pack"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/add"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/concat_2/axis"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/concat_2"
+  op: "ConcatV2"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/strided_slice"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/concat_2/values_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/concat_2/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/sub_2/y"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/sub_2"
+  op: "Sub"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/Rank"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/sub_2/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/Rank_2"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/range_2/start"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/range_2/delta"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/range_2"
+  op: "Range"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/range_2/start"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/Rank_2"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/range_2/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/sub_3"
+  op: "Sub"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/Rank_2"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/sub_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/packed_1"
+  op: "Pack"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/sub_3"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/sub_2"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/split_1/split_dim"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/split_1"
+  op: "SplitV"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/range_2"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/packed_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/split_1/split_dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tlen"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/concat_3/axis"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/concat_3"
+  op: "ConcatV2"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/split_1:1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/split_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/concat_3/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/transpose_1"
+  op: "Transpose"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/UnsortedSegmentSum"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/concat_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_ifft_ola/overlap_and_add/Reshape_2"
+  op: "Reshape"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/transpose_1"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/concat_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/frame_length"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1200
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame_step"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 300
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/fft_length"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2048
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/axis"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/Rank"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/range/start"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/range/delta"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/range"
+  op: "Range"
+  input: "g_lim/while/gl_stft/frame/range/start"
+  input: "g_lim/while/gl_stft/frame/Rank"
+  input: "g_lim/while/gl_stft/frame/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/frame/add/y"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/add"
+  op: "Add"
+  input: "g_lim/while/gl_stft/frame/axis"
+  input: "g_lim/while/gl_stft/frame/add/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/frame/strided_slice/stack"
+  op: "Pack"
+  input: "g_lim/while/gl_stft/frame/axis"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/strided_slice/stack_1"
+  op: "Pack"
+  input: "g_lim/while/gl_stft/frame/add"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/strided_slice/stack_2"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/strided_slice"
+  op: "StridedSlice"
+  input: "g_lim/while/gl_stft/frame/range"
+  input: "g_lim/while/gl_stft/frame/strided_slice/stack"
+  input: "g_lim/while/gl_stft/frame/strided_slice/stack_1"
+  input: "g_lim/while/gl_stft/frame/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/Shape"
+  op: "Shape"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/Reshape_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/sub/y"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/sub"
+  op: "Sub"
+  input: "g_lim/while/gl_stft/frame/Rank"
+  input: "g_lim/while/gl_stft/frame/sub/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/frame/sub_1"
+  op: "Sub"
+  input: "g_lim/while/gl_stft/frame/sub"
+  input: "g_lim/while/gl_stft/frame/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/frame/packed/1"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/packed"
+  op: "Pack"
+  input: "g_lim/while/gl_stft/frame/strided_slice"
+  input: "g_lim/while/gl_stft/frame/packed/1"
+  input: "g_lim/while/gl_stft/frame/sub_1"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/split/split_dim"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/split"
+  op: "SplitV"
+  input: "g_lim/while/gl_stft/frame/Shape"
+  input: "g_lim/while/gl_stft/frame/packed"
+  input: "g_lim/while/gl_stft/frame/split/split_dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tlen"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "num_split"
+    value {
+      i: 3
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/Reshape/shape"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/Reshape"
+  op: "Reshape"
+  input: "g_lim/while/gl_stft/frame/split:1"
+  input: "g_lim/while/gl_stft/frame/Reshape/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/frame/Size"
+  op: "Size"
+  input: "g_lim/while/gl_stft/frame/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/Size_1"
+  op: "Size"
+  input: "g_lim/while/gl_stft/frame/split:2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/sub_2"
+  op: "Sub"
+  input: "g_lim/while/gl_stft/frame/Reshape"
+  input: "g_lim/while/gl_stft/frame_length"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/frame/floordiv"
+  op: "FloorDiv"
+  input: "g_lim/while/gl_stft/frame/sub_2"
+  input: "g_lim/while/gl_stft/frame_step"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/frame/add_1/x"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/add_1"
+  op: "Add"
+  input: "g_lim/while/gl_stft/frame/add_1/x"
+  input: "g_lim/while/gl_stft/frame/floordiv"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/frame/Maximum/x"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/Maximum"
+  op: "Maximum"
+  input: "g_lim/while/gl_stft/frame/Maximum/x"
+  input: "g_lim/while/gl_stft/frame/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/frame/gcd/Const"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 300
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/floordiv_1"
+  op: "FloorDiv"
+  input: "g_lim/while/gl_stft/frame_length"
+  input: "g_lim/while/gl_stft/frame/gcd/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/frame/floordiv_2"
+  op: "FloorDiv"
+  input: "g_lim/while/gl_stft/frame_step"
+  input: "g_lim/while/gl_stft/frame/gcd/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/frame/floordiv_3"
+  op: "FloorDiv"
+  input: "g_lim/while/gl_stft/frame/Reshape"
+  input: "g_lim/while/gl_stft/frame/gcd/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/frame/mul"
+  op: "Mul"
+  input: "g_lim/while/gl_stft/frame/floordiv_3"
+  input: "g_lim/while/gl_stft/frame/gcd/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/frame/concat/values_1"
+  op: "Pack"
+  input: "g_lim/while/gl_stft/frame/mul"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/concat/axis"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/concat"
+  op: "ConcatV2"
+  input: "g_lim/while/gl_stft/frame/split"
+  input: "g_lim/while/gl_stft/frame/concat/values_1"
+  input: "g_lim/while/gl_stft/frame/split:2"
+  input: "g_lim/while/gl_stft/frame/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/frame/concat_1/values_1"
+  op: "Pack"
+  input: "g_lim/while/gl_stft/frame/floordiv_3"
+  input: "g_lim/while/gl_stft/frame/gcd/Const"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/concat_1/axis"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/concat_1"
+  op: "ConcatV2"
+  input: "g_lim/while/gl_stft/frame/split"
+  input: "g_lim/while/gl_stft/frame/concat_1/values_1"
+  input: "g_lim/while/gl_stft/frame/split:2"
+  input: "g_lim/while/gl_stft/frame/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/frame/zeros_like"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/ones_like/Shape"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/ones_like/Const"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/ones_like"
+  op: "Fill"
+  input: "g_lim/while/gl_stft/frame/ones_like/Shape"
+  input: "g_lim/while/gl_stft/frame/ones_like/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/StridedSlice"
+  op: "StridedSlice"
+  input: "g_lim/while/gl_ifft_ola/overlap_and_add/Reshape_2"
+  input: "g_lim/while/gl_stft/frame/zeros_like"
+  input: "g_lim/while/gl_stft/frame/concat"
+  input: "g_lim/while/gl_stft/frame/ones_like"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/Reshape_1"
+  op: "Reshape"
+  input: "g_lim/while/gl_stft/frame/StridedSlice"
+  input: "g_lim/while/gl_stft/frame/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/frame/range_1/start"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/range_1/delta"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/range_1"
+  op: "Range"
+  input: "g_lim/while/gl_stft/frame/range_1/start"
+  input: "g_lim/while/gl_stft/frame/Maximum"
+  input: "g_lim/while/gl_stft/frame/range_1/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/frame/mul_1"
+  op: "Mul"
+  input: "g_lim/while/gl_stft/frame/range_1"
+  input: "g_lim/while/gl_stft/frame/floordiv_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/frame/Reshape_2/shape/1"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/Reshape_2/shape"
+  op: "Pack"
+  input: "g_lim/while/gl_stft/frame/Maximum"
+  input: "g_lim/while/gl_stft/frame/Reshape_2/shape/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/Reshape_2"
+  op: "Reshape"
+  input: "g_lim/while/gl_stft/frame/mul_1"
+  input: "g_lim/while/gl_stft/frame/Reshape_2/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/frame/range_2/start"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/range_2/delta"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/range_2"
+  op: "Range"
+  input: "g_lim/while/gl_stft/frame/range_2/start"
+  input: "g_lim/while/gl_stft/frame/floordiv_1"
+  input: "g_lim/while/gl_stft/frame/range_2/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/frame/Reshape_3/shape/0"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/Reshape_3/shape"
+  op: "Pack"
+  input: "g_lim/while/gl_stft/frame/Reshape_3/shape/0"
+  input: "g_lim/while/gl_stft/frame/floordiv_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/Reshape_3"
+  op: "Reshape"
+  input: "g_lim/while/gl_stft/frame/range_2"
+  input: "g_lim/while/gl_stft/frame/Reshape_3/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/frame/add_2"
+  op: "Add"
+  input: "g_lim/while/gl_stft/frame/Reshape_2"
+  input: "g_lim/while/gl_stft/frame/Reshape_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/frame/GatherV2"
+  op: "GatherV2"
+  input: "g_lim/while/gl_stft/frame/Reshape_1"
+  input: "g_lim/while/gl_stft/frame/add_2"
+  input: "g_lim/while/gl_stft/frame/strided_slice"
+  attr {
+    key: "Taxis"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/frame/concat_2/values_1"
+  op: "Pack"
+  input: "g_lim/while/gl_stft/frame/Maximum"
+  input: "g_lim/while/gl_stft/frame_length"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/concat_2/axis"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/frame/concat_2"
+  op: "ConcatV2"
+  input: "g_lim/while/gl_stft/frame/split"
+  input: "g_lim/while/gl_stft/frame/concat_2/values_1"
+  input: "g_lim/while/gl_stft/frame/split:2"
+  input: "g_lim/while/gl_stft/frame/concat_2/axis"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/frame/Reshape_4"
+  op: "Reshape"
+  input: "g_lim/while/gl_stft/frame/GatherV2"
+  input: "g_lim/while/gl_stft/frame/concat_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/hw/periodic"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_BOOL
+        tensor_shape {
+        }
+        bool_val: true
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/hw/Cast"
+  op: "Cast"
+  input: "g_lim/while/gl_stft/hw/periodic"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/hw/FloorMod/y"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/hw/FloorMod"
+  op: "FloorMod"
+  input: "g_lim/while/gl_stft/frame_length"
+  input: "g_lim/while/gl_stft/hw/FloorMod/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/hw/sub/x"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/hw/sub"
+  op: "Sub"
+  input: "g_lim/while/gl_stft/hw/sub/x"
+  input: "g_lim/while/gl_stft/hw/FloorMod"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/hw/mul"
+  op: "Mul"
+  input: "g_lim/while/gl_stft/hw/Cast"
+  input: "g_lim/while/gl_stft/hw/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/hw/add"
+  op: "Add"
+  input: "g_lim/while/gl_stft/frame_length"
+  input: "g_lim/while/gl_stft/hw/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/hw/sub_1/y"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/hw/sub_1"
+  op: "Sub"
+  input: "g_lim/while/gl_stft/hw/add"
+  input: "g_lim/while/gl_stft/hw/sub_1/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/hw/Cast_1"
+  op: "Cast"
+  input: "g_lim/while/gl_stft/hw/sub_1"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/gl_stft/hw/range/start"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/hw/range/delta"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/hw/range"
+  op: "Range"
+  input: "g_lim/while/gl_stft/hw/range/start"
+  input: "g_lim/while/gl_stft/frame_length"
+  input: "g_lim/while/gl_stft/hw/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/hw/Cast_2"
+  op: "Cast"
+  input: "g_lim/while/gl_stft/hw/range"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/hw/Const"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 6.28318548203
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/hw/mul_1"
+  op: "Mul"
+  input: "g_lim/while/gl_stft/hw/Const"
+  input: "g_lim/while/gl_stft/hw/Cast_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/hw/truediv"
+  op: "RealDiv"
+  input: "g_lim/while/gl_stft/hw/mul_1"
+  input: "g_lim/while/gl_stft/hw/Cast_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/hw/Cos"
+  op: "Cos"
+  input: "g_lim/while/gl_stft/hw/truediv"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/hw/mul_2/x"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.5
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/hw/mul_2"
+  op: "Mul"
+  input: "g_lim/while/gl_stft/hw/mul_2/x"
+  input: "g_lim/while/gl_stft/hw/Cos"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/hw/sub_2/x"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.5
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/hw/sub_2"
+  op: "Sub"
+  input: "g_lim/while/gl_stft/hw/sub_2/x"
+  input: "g_lim/while/gl_stft/hw/mul_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/mul"
+  op: "Mul"
+  input: "g_lim/while/gl_stft/frame/Reshape_4"
+  input: "g_lim/while/gl_stft/hw/sub_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/rfft/packed"
+  op: "Pack"
+  input: "g_lim/while/gl_stft/fft_length"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/rfft/Pad/paddings"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 2
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000P\003\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/rfft/Pad"
+  op: "Pad"
+  input: "g_lim/while/gl_stft/mul"
+  input: "g_lim/while/gl_stft/rfft/Pad/paddings"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tpaddings"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 2048
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/gl_stft/rfft"
+  op: "RFFT"
+  input: "g_lim/while/gl_stft/rfft/Pad"
+  input: "g_lim/while/gl_stft/rfft/packed"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/Imag"
+  op: "Imag"
+  input: "g_lim/while/gl_stft/rfft"
+  attr {
+    key: "T"
+    value {
+      type: DT_COMPLEX64
+    }
+  }
+  attr {
+    key: "Tout"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/Real"
+  op: "Real"
+  input: "g_lim/while/gl_stft/rfft"
+  attr {
+    key: "T"
+    value {
+      type: DT_COMPLEX64
+    }
+  }
+  attr {
+    key: "Tout"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/div"
+  op: "RealDiv"
+  input: "g_lim/while/Imag"
+  input: "g_lim/while/Real"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/Atan"
+  op: "Atan"
+  input: "g_lim/while/atan2/div"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/Less/y"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/Less"
+  op: "Less"
+  input: "g_lim/while/Real"
+  input: "g_lim/while/atan2/Less/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/Equal/y"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/Equal"
+  op: "Equal"
+  input: "g_lim/while/Real"
+  input: "g_lim/while/atan2/Equal/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/Less_1/y"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/Less_1"
+  op: "Less"
+  input: "g_lim/while/Imag"
+  input: "g_lim/while/atan2/Less_1/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/ones_like/Shape"
+  op: "Shape"
+  input: "g_lim/while/Real"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/ones_like/Const"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/ones_like"
+  op: "Fill"
+  input: "g_lim/while/atan2/ones_like/Shape"
+  input: "g_lim/while/atan2/ones_like/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/zeros_like"
+  op: "ZerosLike"
+  input: "g_lim/while/Real"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/mul/x"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: nan
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/mul"
+  op: "Mul"
+  input: "g_lim/while/atan2/mul/x"
+  input: "g_lim/while/atan2/ones_like"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/Greater/y"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/Greater"
+  op: "Greater"
+  input: "g_lim/while/Real"
+  input: "g_lim/while/atan2/Greater/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/Select"
+  op: "Select"
+  input: "g_lim/while/atan2/Greater"
+  input: "g_lim/while/atan2/Atan"
+  input: "g_lim/while/atan2/zeros_like"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/GreaterEqual/y"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/GreaterEqual"
+  op: "GreaterEqual"
+  input: "g_lim/while/Imag"
+  input: "g_lim/while/atan2/GreaterEqual/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/LogicalAnd"
+  op: "LogicalAnd"
+  input: "g_lim/while/atan2/Less"
+  input: "g_lim/while/atan2/GreaterEqual"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/add/y"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 3.14159274101
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/add"
+  op: "Add"
+  input: "g_lim/while/atan2/Atan"
+  input: "g_lim/while/atan2/add/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/Select_1"
+  op: "Select"
+  input: "g_lim/while/atan2/LogicalAnd"
+  input: "g_lim/while/atan2/add"
+  input: "g_lim/while/atan2/Select"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/LogicalAnd_1"
+  op: "LogicalAnd"
+  input: "g_lim/while/atan2/Less"
+  input: "g_lim/while/atan2/Less_1"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/sub/y"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 3.14159274101
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/sub"
+  op: "Sub"
+  input: "g_lim/while/atan2/Atan"
+  input: "g_lim/while/atan2/sub/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/Select_2"
+  op: "Select"
+  input: "g_lim/while/atan2/LogicalAnd_1"
+  input: "g_lim/while/atan2/sub"
+  input: "g_lim/while/atan2/Select_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/Greater_1/y"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/Greater_1"
+  op: "Greater"
+  input: "g_lim/while/Imag"
+  input: "g_lim/while/atan2/Greater_1/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/LogicalAnd_2"
+  op: "LogicalAnd"
+  input: "g_lim/while/atan2/Equal"
+  input: "g_lim/while/atan2/Greater_1"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/mul_1/x"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 1.57079637051
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/mul_1"
+  op: "Mul"
+  input: "g_lim/while/atan2/mul_1/x"
+  input: "g_lim/while/atan2/ones_like"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/Select_3"
+  op: "Select"
+  input: "g_lim/while/atan2/LogicalAnd_2"
+  input: "g_lim/while/atan2/mul_1"
+  input: "g_lim/while/atan2/Select_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/LogicalAnd_3"
+  op: "LogicalAnd"
+  input: "g_lim/while/atan2/Equal"
+  input: "g_lim/while/atan2/Less_1"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/mul_2/x"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: -1.57079637051
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/mul_2"
+  op: "Mul"
+  input: "g_lim/while/atan2/mul_2/x"
+  input: "g_lim/while/atan2/ones_like"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/Select_4"
+  op: "Select"
+  input: "g_lim/while/atan2/LogicalAnd_3"
+  input: "g_lim/while/atan2/mul_2"
+  input: "g_lim/while/atan2/Select_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/IsNan"
+  op: "IsNan"
+  input: "g_lim/while/Real"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/IsNan_1"
+  op: "IsNan"
+  input: "g_lim/while/Imag"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/LogicalOr"
+  op: "LogicalOr"
+  input: "g_lim/while/atan2/IsNan"
+  input: "g_lim/while/atan2/IsNan_1"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/atan2/Select_5"
+  op: "Select"
+  input: "g_lim/while/atan2/LogicalOr"
+  input: "g_lim/while/atan2/mul"
+  input: "g_lim/while/atan2/Select_4"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/imag"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/Complex"
+  op: "Complex"
+  input: "g_lim/while/Complex/Enter"
+  input: "g_lim/while/imag"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tout"
+    value {
+      type: DT_COMPLEX64
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/Complex/Enter"
+  op: "Enter"
+  input: "pow_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "frame_name"
+    value {
+      s: "g_lim/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "g_lim/while/real"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.0
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/Complex_1"
+  op: "Complex"
+  input: "g_lim/while/real"
+  input: "g_lim/while/atan2/Select_5"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tout"
+    value {
+      type: DT_COMPLEX64
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/Exp"
+  op: "Exp"
+  input: "g_lim/while/Complex_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_COMPLEX64
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/mul"
+  op: "Mul"
+  input: "g_lim/while/Complex"
+  input: "g_lim/while/Exp"
+  attr {
+    key: "T"
+    value {
+      type: DT_COMPLEX64
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/add/y"
+  op: "Const"
+  input: "^g_lim/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/add"
+  op: "Add"
+  input: "g_lim/while/Identity"
+  input: "g_lim/while/add/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/NextIteration"
+  op: "NextIteration"
+  input: "g_lim/while/add"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/NextIteration_1"
+  op: "NextIteration"
+  input: "g_lim/while/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_COMPLEX64
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "g_lim/while/Exit"
+  op: "Exit"
+  input: "g_lim/while/Switch"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "g_lim/while/Exit_1"
+  op: "Exit"
+  input: "g_lim/while/Switch_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_COMPLEX64
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1025
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/frame_length"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1200
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/frame_step"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 300
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/fft_length"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2048
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/irfft/packed"
+  op: "Pack"
+  input: "inverse_stft/fft_length"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "inverse_stft/irfft"
+  op: "IRFFT"
+  input: "g_lim/while/Exit_1"
+  input: "inverse_stft/irfft/packed"
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 2048
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\260\004\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/strided_slice"
+  op: "StridedSlice"
+  input: "inverse_stft/irfft"
+  input: "inverse_stft/strided_slice/stack"
+  input: "inverse_stft/strided_slice/stack_1"
+  input: "inverse_stft/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "begin_mask"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "inverse_stft/hw/periodic"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_BOOL
+        tensor_shape {
+        }
+        bool_val: true
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/hw/Cast"
+  op: "Cast"
+  input: "inverse_stft/hw/periodic"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_BOOL
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/hw/FloorMod/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/hw/FloorMod"
+  op: "FloorMod"
+  input: "inverse_stft/frame_length"
+  input: "inverse_stft/hw/FloorMod/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/hw/sub/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/hw/sub"
+  op: "Sub"
+  input: "inverse_stft/hw/sub/x"
+  input: "inverse_stft/hw/FloorMod"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/hw/mul"
+  op: "Mul"
+  input: "inverse_stft/hw/Cast"
+  input: "inverse_stft/hw/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/hw/add"
+  op: "Add"
+  input: "inverse_stft/frame_length"
+  input: "inverse_stft/hw/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/hw/sub_1/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/hw/sub_1"
+  op: "Sub"
+  input: "inverse_stft/hw/add"
+  input: "inverse_stft/hw/sub_1/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/hw/Cast_1"
+  op: "Cast"
+  input: "inverse_stft/hw/sub_1"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/hw/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/hw/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/hw/range"
+  op: "Range"
+  input: "inverse_stft/hw/range/start"
+  input: "inverse_stft/frame_length"
+  input: "inverse_stft/hw/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/hw/Cast_2"
+  op: "Cast"
+  input: "inverse_stft/hw/range"
+  attr {
+    key: "DstT"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "SrcT"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/hw/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 6.28318548203
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/hw/mul_1"
+  op: "Mul"
+  input: "inverse_stft/hw/Const"
+  input: "inverse_stft/hw/Cast_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/hw/truediv"
+  op: "RealDiv"
+  input: "inverse_stft/hw/mul_1"
+  input: "inverse_stft/hw/Cast_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/hw/Cos"
+  op: "Cos"
+  input: "inverse_stft/hw/truediv"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/hw/mul_2/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.5
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/hw/mul_2"
+  op: "Mul"
+  input: "inverse_stft/hw/mul_2/x"
+  input: "inverse_stft/hw/Cos"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/hw/sub_2/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.5
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/hw/sub_2"
+  op: "Sub"
+  input: "inverse_stft/hw/sub_2/x"
+  input: "inverse_stft/hw/mul_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/mul"
+  op: "Mul"
+  input: "inverse_stft/strided_slice"
+  input: "inverse_stft/hw/sub_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: 1200
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/Shape"
+  op: "Shape"
+  input: "inverse_stft/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: -2
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/strided_slice"
+  op: "StridedSlice"
+  input: "inverse_stft/overlap_and_add/Shape"
+  input: "inverse_stft/overlap_and_add/strided_slice/stack"
+  input: "inverse_stft/overlap_and_add/strided_slice/stack_1"
+  input: "inverse_stft/overlap_and_add/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/Rank"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: -2
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/strided_slice_1"
+  op: "StridedSlice"
+  input: "inverse_stft/overlap_and_add/Shape"
+  input: "inverse_stft/overlap_and_add/strided_slice_1/stack"
+  input: "inverse_stft/overlap_and_add/strided_slice_1/stack_1"
+  input: "inverse_stft/overlap_and_add/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/strided_slice_2"
+  op: "StridedSlice"
+  input: "inverse_stft/overlap_and_add/Shape"
+  input: "inverse_stft/overlap_and_add/strided_slice_2/stack"
+  input: "inverse_stft/overlap_and_add/strided_slice_2/stack_1"
+  input: "inverse_stft/overlap_and_add/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/gcd/while/Enter"
+  op: "Enter"
+  input: "inverse_stft/overlap_and_add/strided_slice_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "inverse_stft/overlap_and_add/gcd/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/gcd/while/Enter_1"
+  op: "Enter"
+  input: "inverse_stft/frame_step"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "inverse_stft/overlap_and_add/gcd/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/gcd/while/Merge"
+  op: "Merge"
+  input: "inverse_stft/overlap_and_add/gcd/while/Enter"
+  input: "inverse_stft/overlap_and_add/gcd/while/NextIteration"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/gcd/while/Merge_1"
+  op: "Merge"
+  input: "inverse_stft/overlap_and_add/gcd/while/Enter_1"
+  input: "inverse_stft/overlap_and_add/gcd/while/NextIteration_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/gcd/while/zeros_like"
+  op: "Const"
+  input: "^inverse_stft/overlap_and_add/gcd/while/Merge"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/gcd/while/Greater"
+  op: "Greater"
+  input: "inverse_stft/overlap_and_add/gcd/while/Merge_1"
+  input: "inverse_stft/overlap_and_add/gcd/while/zeros_like"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/gcd/while/LoopCond"
+  op: "LoopCond"
+  input: "inverse_stft/overlap_and_add/gcd/while/Greater"
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/gcd/while/Switch"
+  op: "Switch"
+  input: "inverse_stft/overlap_and_add/gcd/while/Merge"
+  input: "inverse_stft/overlap_and_add/gcd/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@inverse_stft/overlap_and_add/gcd/while/Merge"
+      }
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/gcd/while/Switch_1"
+  op: "Switch"
+  input: "inverse_stft/overlap_and_add/gcd/while/Merge_1"
+  input: "inverse_stft/overlap_and_add/gcd/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@inverse_stft/overlap_and_add/gcd/while/Merge_1"
+      }
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/gcd/while/Identity"
+  op: "Identity"
+  input: "inverse_stft/overlap_and_add/gcd/while/Switch:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/gcd/while/Identity_1"
+  op: "Identity"
+  input: "inverse_stft/overlap_and_add/gcd/while/Switch_1:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/gcd/while/FloorMod"
+  op: "FloorMod"
+  input: "inverse_stft/overlap_and_add/gcd/while/Identity"
+  input: "inverse_stft/overlap_and_add/gcd/while/Identity_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/gcd/while/NextIteration"
+  op: "NextIteration"
+  input: "inverse_stft/overlap_and_add/gcd/while/Identity_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/gcd/while/NextIteration_1"
+  op: "NextIteration"
+  input: "inverse_stft/overlap_and_add/gcd/while/FloorMod"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/gcd/while/Exit"
+  op: "Exit"
+  input: "inverse_stft/overlap_and_add/gcd/while/Switch"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/gcd/while/Exit_1"
+  op: "Exit"
+  input: "inverse_stft/overlap_and_add/gcd/while/Switch_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/floordiv"
+  op: "FloorDiv"
+  input: "inverse_stft/frame_step"
+  input: "inverse_stft/overlap_and_add/gcd/while/Exit"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/floordiv_1"
+  op: "FloorDiv"
+  input: "inverse_stft/overlap_and_add/strided_slice_2"
+  input: "inverse_stft/overlap_and_add/gcd/while/Exit"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/sub/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/sub"
+  op: "Sub"
+  input: "inverse_stft/overlap_and_add/strided_slice_1"
+  input: "inverse_stft/overlap_and_add/sub/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/mul"
+  op: "Mul"
+  input: "inverse_stft/frame_step"
+  input: "inverse_stft/overlap_and_add/sub"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/add"
+  op: "Add"
+  input: "inverse_stft/overlap_and_add/mul"
+  input: "inverse_stft/overlap_and_add/strided_slice_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/floordiv_2"
+  op: "FloorDiv"
+  input: "inverse_stft/overlap_and_add/add"
+  input: "inverse_stft/overlap_and_add/gcd/while/Exit"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/concat/values_1/0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/concat/values_1"
+  op: "Pack"
+  input: "inverse_stft/overlap_and_add/concat/values_1/0"
+  input: "inverse_stft/overlap_and_add/gcd/while/Exit"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/concat/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/concat"
+  op: "ConcatV2"
+  input: "inverse_stft/overlap_and_add/strided_slice"
+  input: "inverse_stft/overlap_and_add/concat/values_1"
+  input: "inverse_stft/overlap_and_add/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/Reshape"
+  op: "Reshape"
+  input: "inverse_stft/mul"
+  input: "inverse_stft/overlap_and_add/concat"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/k"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/Rank_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/range"
+  op: "Range"
+  input: "inverse_stft/overlap_and_add/range/start"
+  input: "inverse_stft/overlap_and_add/Rank_1"
+  input: "inverse_stft/overlap_and_add/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/sub_1"
+  op: "Sub"
+  input: "inverse_stft/overlap_and_add/Rank_1"
+  input: "inverse_stft/overlap_and_add/k"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/packed"
+  op: "Pack"
+  input: "inverse_stft/overlap_and_add/sub_1"
+  input: "inverse_stft/overlap_and_add/k"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/split/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/split"
+  op: "SplitV"
+  input: "inverse_stft/overlap_and_add/range"
+  input: "inverse_stft/overlap_and_add/packed"
+  input: "inverse_stft/overlap_and_add/split/split_dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tlen"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/concat_1/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/concat_1"
+  op: "ConcatV2"
+  input: "inverse_stft/overlap_and_add/split:1"
+  input: "inverse_stft/overlap_and_add/split"
+  input: "inverse_stft/overlap_and_add/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/transpose"
+  op: "Transpose"
+  input: "inverse_stft/overlap_and_add/Reshape"
+  input: "inverse_stft/overlap_and_add/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/range_1/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/range_1/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/range_1"
+  op: "Range"
+  input: "inverse_stft/overlap_and_add/range_1/start"
+  input: "inverse_stft/overlap_and_add/floordiv_2"
+  input: "inverse_stft/overlap_and_add/range_1/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/Rank"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/range/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/range/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/range"
+  op: "Range"
+  input: "inverse_stft/overlap_and_add/frame/range/start"
+  input: "inverse_stft/overlap_and_add/frame/Rank"
+  input: "inverse_stft/overlap_and_add/frame/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/add/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/add"
+  op: "Add"
+  input: "inverse_stft/overlap_and_add/frame/axis"
+  input: "inverse_stft/overlap_and_add/frame/add/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/strided_slice/stack"
+  op: "Pack"
+  input: "inverse_stft/overlap_and_add/frame/axis"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/strided_slice/stack_1"
+  op: "Pack"
+  input: "inverse_stft/overlap_and_add/frame/add"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/strided_slice"
+  op: "StridedSlice"
+  input: "inverse_stft/overlap_and_add/frame/range"
+  input: "inverse_stft/overlap_and_add/frame/strided_slice/stack"
+  input: "inverse_stft/overlap_and_add/frame/strided_slice/stack_1"
+  input: "inverse_stft/overlap_and_add/frame/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/Shape"
+  op: "Shape"
+  input: "inverse_stft/overlap_and_add/range_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/sub/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/sub"
+  op: "Sub"
+  input: "inverse_stft/overlap_and_add/frame/Rank"
+  input: "inverse_stft/overlap_and_add/frame/sub/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/sub_1"
+  op: "Sub"
+  input: "inverse_stft/overlap_and_add/frame/sub"
+  input: "inverse_stft/overlap_and_add/frame/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/packed/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/packed"
+  op: "Pack"
+  input: "inverse_stft/overlap_and_add/frame/strided_slice"
+  input: "inverse_stft/overlap_and_add/frame/packed/1"
+  input: "inverse_stft/overlap_and_add/frame/sub_1"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/split/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/split"
+  op: "SplitV"
+  input: "inverse_stft/overlap_and_add/frame/Shape"
+  input: "inverse_stft/overlap_and_add/frame/packed"
+  input: "inverse_stft/overlap_and_add/frame/split/split_dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tlen"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "num_split"
+    value {
+      i: 3
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/Reshape/shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/Reshape"
+  op: "Reshape"
+  input: "inverse_stft/overlap_and_add/frame/split:1"
+  input: "inverse_stft/overlap_and_add/frame/Reshape/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/Size"
+  op: "Size"
+  input: "inverse_stft/overlap_and_add/frame/split"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/Size_1"
+  op: "Size"
+  input: "inverse_stft/overlap_and_add/frame/split:2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/sub_2"
+  op: "Sub"
+  input: "inverse_stft/overlap_and_add/frame/Reshape"
+  input: "inverse_stft/overlap_and_add/floordiv_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/floordiv"
+  op: "FloorDiv"
+  input: "inverse_stft/overlap_and_add/frame/sub_2"
+  input: "inverse_stft/overlap_and_add/floordiv"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/add_1/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/add_1"
+  op: "Add"
+  input: "inverse_stft/overlap_and_add/frame/add_1/x"
+  input: "inverse_stft/overlap_and_add/frame/floordiv"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/Maximum/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/Maximum"
+  op: "Maximum"
+  input: "inverse_stft/overlap_and_add/frame/Maximum/x"
+  input: "inverse_stft/overlap_and_add/frame/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/gcd/while/Enter"
+  op: "Enter"
+  input: "inverse_stft/overlap_and_add/floordiv_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "inverse_stft/overlap_and_add/frame/gcd/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/gcd/while/Enter_1"
+  op: "Enter"
+  input: "inverse_stft/overlap_and_add/floordiv"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "inverse_stft/overlap_and_add/frame/gcd/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/gcd/while/Merge"
+  op: "Merge"
+  input: "inverse_stft/overlap_and_add/frame/gcd/while/Enter"
+  input: "inverse_stft/overlap_and_add/frame/gcd/while/NextIteration"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/gcd/while/Merge_1"
+  op: "Merge"
+  input: "inverse_stft/overlap_and_add/frame/gcd/while/Enter_1"
+  input: "inverse_stft/overlap_and_add/frame/gcd/while/NextIteration_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/gcd/while/zeros_like"
+  op: "Const"
+  input: "^inverse_stft/overlap_and_add/frame/gcd/while/Merge"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/gcd/while/Greater"
+  op: "Greater"
+  input: "inverse_stft/overlap_and_add/frame/gcd/while/Merge_1"
+  input: "inverse_stft/overlap_and_add/frame/gcd/while/zeros_like"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/gcd/while/LoopCond"
+  op: "LoopCond"
+  input: "inverse_stft/overlap_and_add/frame/gcd/while/Greater"
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/gcd/while/Switch"
+  op: "Switch"
+  input: "inverse_stft/overlap_and_add/frame/gcd/while/Merge"
+  input: "inverse_stft/overlap_and_add/frame/gcd/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@inverse_stft/overlap_and_add/frame/gcd/while/Merge"
+      }
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/gcd/while/Switch_1"
+  op: "Switch"
+  input: "inverse_stft/overlap_and_add/frame/gcd/while/Merge_1"
+  input: "inverse_stft/overlap_and_add/frame/gcd/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@inverse_stft/overlap_and_add/frame/gcd/while/Merge_1"
+      }
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/gcd/while/Identity"
+  op: "Identity"
+  input: "inverse_stft/overlap_and_add/frame/gcd/while/Switch:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/gcd/while/Identity_1"
+  op: "Identity"
+  input: "inverse_stft/overlap_and_add/frame/gcd/while/Switch_1:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/gcd/while/FloorMod"
+  op: "FloorMod"
+  input: "inverse_stft/overlap_and_add/frame/gcd/while/Identity"
+  input: "inverse_stft/overlap_and_add/frame/gcd/while/Identity_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/gcd/while/NextIteration"
+  op: "NextIteration"
+  input: "inverse_stft/overlap_and_add/frame/gcd/while/Identity_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/gcd/while/NextIteration_1"
+  op: "NextIteration"
+  input: "inverse_stft/overlap_and_add/frame/gcd/while/FloorMod"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/gcd/while/Exit"
+  op: "Exit"
+  input: "inverse_stft/overlap_and_add/frame/gcd/while/Switch"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/gcd/while/Exit_1"
+  op: "Exit"
+  input: "inverse_stft/overlap_and_add/frame/gcd/while/Switch_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/floordiv_1"
+  op: "FloorDiv"
+  input: "inverse_stft/overlap_and_add/floordiv_1"
+  input: "inverse_stft/overlap_and_add/frame/gcd/while/Exit"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/floordiv_2"
+  op: "FloorDiv"
+  input: "inverse_stft/overlap_and_add/floordiv"
+  input: "inverse_stft/overlap_and_add/frame/gcd/while/Exit"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/floordiv_3"
+  op: "FloorDiv"
+  input: "inverse_stft/overlap_and_add/frame/Reshape"
+  input: "inverse_stft/overlap_and_add/frame/gcd/while/Exit"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/mul"
+  op: "Mul"
+  input: "inverse_stft/overlap_and_add/frame/floordiv_3"
+  input: "inverse_stft/overlap_and_add/frame/gcd/while/Exit"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/concat/values_1"
+  op: "Pack"
+  input: "inverse_stft/overlap_and_add/frame/mul"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/concat/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/concat"
+  op: "ConcatV2"
+  input: "inverse_stft/overlap_and_add/frame/split"
+  input: "inverse_stft/overlap_and_add/frame/concat/values_1"
+  input: "inverse_stft/overlap_and_add/frame/split:2"
+  input: "inverse_stft/overlap_and_add/frame/concat/axis"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/concat_1/values_1"
+  op: "Pack"
+  input: "inverse_stft/overlap_and_add/frame/floordiv_3"
+  input: "inverse_stft/overlap_and_add/frame/gcd/while/Exit"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/concat_1/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/concat_1"
+  op: "ConcatV2"
+  input: "inverse_stft/overlap_and_add/frame/split"
+  input: "inverse_stft/overlap_and_add/frame/concat_1/values_1"
+  input: "inverse_stft/overlap_and_add/frame/split:2"
+  input: "inverse_stft/overlap_and_add/frame/concat_1/axis"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/zeros_like"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/ones_like/Shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/ones_like/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/ones_like"
+  op: "Fill"
+  input: "inverse_stft/overlap_and_add/frame/ones_like/Shape"
+  input: "inverse_stft/overlap_and_add/frame/ones_like/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "index_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/StridedSlice"
+  op: "StridedSlice"
+  input: "inverse_stft/overlap_and_add/range_1"
+  input: "inverse_stft/overlap_and_add/frame/zeros_like"
+  input: "inverse_stft/overlap_and_add/frame/concat"
+  input: "inverse_stft/overlap_and_add/frame/ones_like"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/Reshape_1"
+  op: "Reshape"
+  input: "inverse_stft/overlap_and_add/frame/StridedSlice"
+  input: "inverse_stft/overlap_and_add/frame/concat_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/range_1/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/range_1/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/range_1"
+  op: "Range"
+  input: "inverse_stft/overlap_and_add/frame/range_1/start"
+  input: "inverse_stft/overlap_and_add/frame/Maximum"
+  input: "inverse_stft/overlap_and_add/frame/range_1/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/mul_1"
+  op: "Mul"
+  input: "inverse_stft/overlap_and_add/frame/range_1"
+  input: "inverse_stft/overlap_and_add/frame/floordiv_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/Reshape_2/shape/1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/Reshape_2/shape"
+  op: "Pack"
+  input: "inverse_stft/overlap_and_add/frame/Maximum"
+  input: "inverse_stft/overlap_and_add/frame/Reshape_2/shape/1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/Reshape_2"
+  op: "Reshape"
+  input: "inverse_stft/overlap_and_add/frame/mul_1"
+  input: "inverse_stft/overlap_and_add/frame/Reshape_2/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/range_2/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/range_2/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/range_2"
+  op: "Range"
+  input: "inverse_stft/overlap_and_add/frame/range_2/start"
+  input: "inverse_stft/overlap_and_add/frame/floordiv_1"
+  input: "inverse_stft/overlap_and_add/frame/range_2/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/Reshape_3/shape/0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/Reshape_3/shape"
+  op: "Pack"
+  input: "inverse_stft/overlap_and_add/frame/Reshape_3/shape/0"
+  input: "inverse_stft/overlap_and_add/frame/floordiv_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/Reshape_3"
+  op: "Reshape"
+  input: "inverse_stft/overlap_and_add/frame/range_2"
+  input: "inverse_stft/overlap_and_add/frame/Reshape_3/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/add_2"
+  op: "Add"
+  input: "inverse_stft/overlap_and_add/frame/Reshape_2"
+  input: "inverse_stft/overlap_and_add/frame/Reshape_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/GatherV2"
+  op: "GatherV2"
+  input: "inverse_stft/overlap_and_add/frame/Reshape_1"
+  input: "inverse_stft/overlap_and_add/frame/add_2"
+  input: "inverse_stft/overlap_and_add/frame/strided_slice"
+  attr {
+    key: "Taxis"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tparams"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/concat_2/values_1"
+  op: "Pack"
+  input: "inverse_stft/overlap_and_add/frame/Maximum"
+  input: "inverse_stft/overlap_and_add/floordiv_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/concat_2/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/concat_2"
+  op: "ConcatV2"
+  input: "inverse_stft/overlap_and_add/frame/split"
+  input: "inverse_stft/overlap_and_add/frame/concat_2/values_1"
+  input: "inverse_stft/overlap_and_add/frame/split:2"
+  input: "inverse_stft/overlap_and_add/frame/concat_2/axis"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/frame/Reshape_4"
+  op: "Reshape"
+  input: "inverse_stft/overlap_and_add/frame/GatherV2"
+  input: "inverse_stft/overlap_and_add/frame/concat_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/Reshape_1/shape"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/Reshape_1"
+  op: "Reshape"
+  input: "inverse_stft/overlap_and_add/frame/Reshape_4"
+  input: "inverse_stft/overlap_and_add/Reshape_1/shape"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/UnsortedSegmentSum"
+  op: "UnsortedSegmentSum"
+  input: "inverse_stft/overlap_and_add/transpose"
+  input: "inverse_stft/overlap_and_add/Reshape_1"
+  input: "inverse_stft/overlap_and_add/floordiv_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tindices"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tnumsegments"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/concat_2/values_1"
+  op: "Pack"
+  input: "inverse_stft/overlap_and_add/add"
+  attr {
+    key: "N"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/concat_2/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/concat_2"
+  op: "ConcatV2"
+  input: "inverse_stft/overlap_and_add/strided_slice"
+  input: "inverse_stft/overlap_and_add/concat_2/values_1"
+  input: "inverse_stft/overlap_and_add/concat_2/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/sub_2/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 2
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/sub_2"
+  op: "Sub"
+  input: "inverse_stft/overlap_and_add/Rank"
+  input: "inverse_stft/overlap_and_add/sub_2/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/Rank_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/range_2/start"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/range_2/delta"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/range_2"
+  op: "Range"
+  input: "inverse_stft/overlap_and_add/range_2/start"
+  input: "inverse_stft/overlap_and_add/Rank_2"
+  input: "inverse_stft/overlap_and_add/range_2/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: 3
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/sub_3"
+  op: "Sub"
+  input: "inverse_stft/overlap_and_add/Rank_2"
+  input: "inverse_stft/overlap_and_add/sub_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/packed_1"
+  op: "Pack"
+  input: "inverse_stft/overlap_and_add/sub_3"
+  input: "inverse_stft/overlap_and_add/sub_2"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/split_1/split_dim"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/split_1"
+  op: "SplitV"
+  input: "inverse_stft/overlap_and_add/range_2"
+  input: "inverse_stft/overlap_and_add/packed_1"
+  input: "inverse_stft/overlap_and_add/split_1/split_dim"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tlen"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "num_split"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/concat_3/axis"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/concat_3"
+  op: "ConcatV2"
+  input: "inverse_stft/overlap_and_add/split_1:1"
+  input: "inverse_stft/overlap_and_add/split_1"
+  input: "inverse_stft/overlap_and_add/concat_3/axis"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "inverse_stft/overlap_and_add/transpose_1"
+  op: "Transpose"
+  input: "inverse_stft/overlap_and_add/UnsortedSegmentSum"
+  input: "inverse_stft/overlap_and_add/concat_3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "inverse_stft/overlap_and_add/Reshape_2"
+  op: "Reshape"
+  input: "inverse_stft/overlap_and_add/transpose_1"
+  input: "inverse_stft/overlap_and_add/concat_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tshape"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "de_emphasis/Shape"
+  op: "Shape"
+  input: "inverse_stft/overlap_and_add/Reshape_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "out_type"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "de_emphasis/strided_slice/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: -1
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/strided_slice/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/strided_slice/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/strided_slice"
+  op: "StridedSlice"
+  input: "de_emphasis/Shape"
+  input: "de_emphasis/strided_slice/stack"
+  input: "de_emphasis/strided_slice/stack_1"
+  input: "de_emphasis/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 1
+    }
+  }
+}
+node {
+  name: "de_emphasis/TensorArray"
+  op: "TensorArrayV3"
+  input: "de_emphasis/strided_slice"
+
+  attr {
+    key: "clear_after_read"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "dynamic_size"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        unknown_rank: true
+      }
+    }
+  }
+  attr {
+    key: "identical_element_shapes"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "tensor_array_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "de_emphasis/strided_slice_1/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/strided_slice_1/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/strided_slice_1/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/strided_slice_1"
+  op: "StridedSlice"
+  input: "inverse_stft/overlap_and_add/Reshape_2"
+  input: "de_emphasis/strided_slice_1/stack"
+  input: "de_emphasis/strided_slice_1/stack_1"
+  input: "de_emphasis/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "de_emphasis/TensorArrayWrite/TensorArrayWriteV3/index"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@de_emphasis/strided_slice_1"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/TensorArrayWrite/TensorArrayWriteV3"
+  op: "TensorArrayWriteV3"
+  input: "de_emphasis/TensorArray"
+  input: "de_emphasis/TensorArrayWrite/TensorArrayWriteV3/index"
+  input: "de_emphasis/strided_slice_1"
+  input: "de_emphasis/TensorArray:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@de_emphasis/strided_slice_1"
+      }
+    }
+  }
+
+}
+node {
+  name: "de_emphasis/Const"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/strided_slice_2/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/strided_slice_2/stack_1"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/strided_slice_2/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/strided_slice_2"
+  op: "StridedSlice"
+  input: "inverse_stft/overlap_and_add/Reshape_2"
+  input: "de_emphasis/strided_slice_2/stack"
+  input: "de_emphasis/strided_slice_2/stack_1"
+  input: "de_emphasis/strided_slice_2/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "de_emphasis/while/Enter"
+  op: "Enter"
+  input: "de_emphasis/Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "de_emphasis/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "de_emphasis/while/Enter_1"
+  op: "Enter"
+  input: "de_emphasis/TensorArrayWrite/TensorArrayWriteV3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "de_emphasis/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "de_emphasis/while/Enter_2"
+  op: "Enter"
+  input: "de_emphasis/strided_slice_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "de_emphasis/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "de_emphasis/while/Merge"
+  op: "Merge"
+  input: "de_emphasis/while/Enter"
+  input: "de_emphasis/while/NextIteration"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "de_emphasis/while/Merge_1"
+  op: "Merge"
+  input: "de_emphasis/while/Enter_1"
+  input: "de_emphasis/while/NextIteration_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "de_emphasis/while/Merge_2"
+  op: "Merge"
+  input: "de_emphasis/while/Enter_2"
+  input: "de_emphasis/while/NextIteration_2"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+        }
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/while/Less"
+  op: "Less"
+  input: "de_emphasis/while/Merge"
+  input: "de_emphasis/while/Less/Enter"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "de_emphasis/while/Less/Enter"
+  op: "Enter"
+  input: "de_emphasis/strided_slice"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "de_emphasis/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "de_emphasis/while/LoopCond"
+  op: "LoopCond"
+  input: "de_emphasis/while/Less"
+
+}
+node {
+  name: "de_emphasis/while/Switch"
+  op: "Switch"
+  input: "de_emphasis/while/Merge"
+  input: "de_emphasis/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@de_emphasis/while/Merge"
+      }
+    }
+  }
+
+}
+node {
+  name: "de_emphasis/while/Switch_1"
+  op: "Switch"
+  input: "de_emphasis/while/Merge_1"
+  input: "de_emphasis/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@de_emphasis/while/Merge_1"
+      }
+    }
+  }
+
+}
+node {
+  name: "de_emphasis/while/Switch_2"
+  op: "Switch"
+  input: "de_emphasis/while/Merge_2"
+  input: "de_emphasis/while/LoopCond"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@de_emphasis/while/Merge_2"
+      }
+    }
+  }
+  attr {
+    key: "_output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+        shape {
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/while/Identity"
+  op: "Identity"
+  input: "de_emphasis/while/Switch:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "de_emphasis/while/Identity_1"
+  op: "Identity"
+  input: "de_emphasis/while/Switch_1:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "de_emphasis/while/Identity_2"
+  op: "Identity"
+  input: "de_emphasis/while/Switch_2:1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "de_emphasis/while/add/y"
+  op: "Const"
+  input: "^de_emphasis/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/while/add"
+  op: "Add"
+  input: "de_emphasis/while/Identity"
+  input: "de_emphasis/while/add/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "de_emphasis/while/strided_slice/stack/0"
+  op: "Const"
+  input: "^de_emphasis/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/while/strided_slice/stack"
+  op: "Pack"
+  input: "de_emphasis/while/strided_slice/stack/0"
+  input: "de_emphasis/while/Identity"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "de_emphasis/while/strided_slice/stack_1/0"
+  op: "Const"
+  input: "^de_emphasis/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/while/strided_slice/stack_1"
+  op: "Pack"
+  input: "de_emphasis/while/strided_slice/stack_1/0"
+  input: "de_emphasis/while/add"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "de_emphasis/while/strided_slice/stack_2"
+  op: "Const"
+  input: "^de_emphasis/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/while/strided_slice"
+  op: "StridedSlice"
+  input: "de_emphasis/while/strided_slice/Enter"
+  input: "de_emphasis/while/strided_slice/stack"
+  input: "de_emphasis/while/strided_slice/stack_1"
+  input: "de_emphasis/while/strided_slice/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "de_emphasis/while/strided_slice/Enter"
+  op: "Enter"
+  input: "inverse_stft/overlap_and_add/Reshape_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "de_emphasis/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "de_emphasis/while/mul/x"
+  op: "Const"
+  input: "^de_emphasis/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.97000002861
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/while/mul"
+  op: "Mul"
+  input: "de_emphasis/while/mul/x"
+  input: "de_emphasis/while/Identity_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "de_emphasis/while/add_1"
+  op: "Add"
+  input: "de_emphasis/while/strided_slice"
+  input: "de_emphasis/while/mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "de_emphasis/while/add_2/y"
+  op: "Const"
+  input: "^de_emphasis/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/while/add_2"
+  op: "Add"
+  input: "de_emphasis/while/Identity"
+  input: "de_emphasis/while/add_2/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "de_emphasis/while/strided_slice_1/stack/0"
+  op: "Const"
+  input: "^de_emphasis/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/while/strided_slice_1/stack"
+  op: "Pack"
+  input: "de_emphasis/while/strided_slice_1/stack/0"
+  input: "de_emphasis/while/Identity"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "de_emphasis/while/strided_slice_1/stack_1/0"
+  op: "Const"
+  input: "^de_emphasis/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/while/strided_slice_1/stack_1"
+  op: "Pack"
+  input: "de_emphasis/while/strided_slice_1/stack_1/0"
+  input: "de_emphasis/while/add_2"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "de_emphasis/while/strided_slice_1/stack_2"
+  op: "Const"
+  input: "^de_emphasis/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/while/strided_slice_1"
+  op: "StridedSlice"
+  input: "de_emphasis/while/strided_slice/Enter"
+  input: "de_emphasis/while/strided_slice_1/stack"
+  input: "de_emphasis/while/strided_slice_1/stack_1"
+  input: "de_emphasis/while/strided_slice_1/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 2
+    }
+  }
+}
+node {
+  name: "de_emphasis/while/mul_1/x"
+  op: "Const"
+  input: "^de_emphasis/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.97000002861
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/while/mul_1"
+  op: "Mul"
+  input: "de_emphasis/while/mul_1/x"
+  input: "de_emphasis/while/Identity_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "de_emphasis/while/add_3"
+  op: "Add"
+  input: "de_emphasis/while/strided_slice_1"
+  input: "de_emphasis/while/mul_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "de_emphasis/while/TensorArrayWrite/TensorArrayWriteV3"
+  op: "TensorArrayWriteV3"
+  input: "de_emphasis/while/TensorArrayWrite/TensorArrayWriteV3/Enter"
+  input: "de_emphasis/while/Identity"
+  input: "de_emphasis/while/add_3"
+  input: "de_emphasis/while/Identity_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@de_emphasis/strided_slice_1"
+      }
+    }
+  }
+
+}
+node {
+  name: "de_emphasis/while/TensorArrayWrite/TensorArrayWriteV3/Enter"
+  op: "Enter"
+  input: "de_emphasis/TensorArray"
+  attr {
+    key: "T"
+    value {
+      type: DT_RESOURCE
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@de_emphasis/strided_slice_1"
+      }
+    }
+  }
+
+  attr {
+    key: "frame_name"
+    value {
+      s: "de_emphasis/while/while_context"
+    }
+  }
+  attr {
+    key: "is_constant"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "parallel_iterations"
+    value {
+      i: 10
+    }
+  }
+}
+node {
+  name: "de_emphasis/while/add_4/y"
+  op: "Const"
+  input: "^de_emphasis/while/Identity"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/while/add_4"
+  op: "Add"
+  input: "de_emphasis/while/Identity"
+  input: "de_emphasis/while/add_4/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "de_emphasis/while/NextIteration"
+  op: "NextIteration"
+  input: "de_emphasis/while/add_4"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "de_emphasis/while/NextIteration_1"
+  op: "NextIteration"
+  input: "de_emphasis/while/TensorArrayWrite/TensorArrayWriteV3"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "de_emphasis/while/NextIteration_2"
+  op: "NextIteration"
+  input: "de_emphasis/while/add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "de_emphasis/while/Exit"
+  op: "Exit"
+  input: "de_emphasis/while/Switch"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "de_emphasis/while/Exit_1"
+  op: "Exit"
+  input: "de_emphasis/while/Switch_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "de_emphasis/while/Exit_2"
+  op: "Exit"
+  input: "de_emphasis/while/Switch_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "de_emphasis/TensorArrayStack/TensorArraySizeV3"
+  op: "TensorArraySizeV3"
+  input: "de_emphasis/TensorArray"
+  input: "de_emphasis/while/Exit_1"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@de_emphasis/TensorArray"
+      }
+    }
+  }
+
+}
+node {
+  name: "de_emphasis/TensorArrayStack/range/start"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@de_emphasis/TensorArray"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/TensorArrayStack/range/delta"
+  op: "Const"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@de_emphasis/TensorArray"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/TensorArrayStack/range"
+  op: "Range"
+  input: "de_emphasis/TensorArrayStack/range/start"
+  input: "de_emphasis/TensorArrayStack/TensorArraySizeV3"
+  input: "de_emphasis/TensorArrayStack/range/delta"
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@de_emphasis/TensorArray"
+      }
+    }
+  }
+
+}
+node {
+  name: "de_emphasis/TensorArrayStack/TensorArrayGatherV3"
+  op: "TensorArrayGatherV3"
+  input: "de_emphasis/TensorArray"
+  input: "de_emphasis/TensorArrayStack/range"
+  input: "de_emphasis/while/Exit_1"
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@de_emphasis/TensorArray"
+      }
+    }
+  }
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "element_shape"
+    value {
+      shape {
+        dim {
+          size: -1
+        }
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/transpose/perm"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "de_emphasis/transpose"
+  op: "Transpose"
+  input: "de_emphasis/TensorArrayStack/TensorArrayGatherV3"
+  input: "de_emphasis/transpose/perm"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tperm"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "sub_1/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "sub_1"
+  op: "Sub"
+  input: "seq2seq/seq2seq_2/convert_to_lin_specgram/StopGradient_1"
+  input: "sub_1/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "mul_2/y"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 300
+      }
+    }
+  }
+}
+node {
+  name: "mul_2"
+  op: "Mul"
+  input: "sub_1"
+  input: "mul_2/y"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "add_5/x"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1200
+      }
+    }
+  }
+}
+node {
+  name: "add_5"
+  op: "Add"
+  input: "add_5/x"
+  input: "mul_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+}
+node {
+  name: "decoder_reconstruction_lengths"
+  op: "Identity"
+  input: "add_5"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "Const_13"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "Max_1"
+  op: "Max"
+  input: "decoder_reconstruction_lengths"
+  input: "Const_13"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "strided_slice_8/stack"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\000\000\000\000\000\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice_8/stack_1/0"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice_8/stack_1"
+  op: "Pack"
+  input: "strided_slice_8/stack_1/0"
+  input: "Max_1"
+  attr {
+    key: "N"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "axis"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "strided_slice_8/stack_2"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\001\000\000\000\001\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "strided_slice_8"
+  op: "StridedSlice"
+  input: "de_emphasis/transpose"
+  input: "strided_slice_8/stack"
+  input: "strided_slice_8/stack_1"
+  input: "strided_slice_8/stack_2"
+  attr {
+    key: "Index"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+  attr {
+    key: "begin_mask"
+    value {
+      i: 2
+    }
+  }
+  attr {
+    key: "ellipsis_mask"
+    value {
+      i: 1
+    }
+  }
+  attr {
+    key: "end_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "new_axis_mask"
+    value {
+      i: 0
+    }
+  }
+  attr {
+    key: "shrink_axis_mask"
+    value {
+      i: 0
+    }
+  }
+}
+node {
+  name: "decoder_reconstruction"
+  op: "Identity"
+  input: "strided_slice_8"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "decoder_reconstruction_sample_rate"
+  op: "Identity"
+  input: "decoder_target_sample_rate"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "Abs_3"
+  op: "Abs"
+  input: "decoder_reconstruction"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+
+}
+node {
+  name: "Max_2/reduction_indices"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "Max_2"
+  op: "Max"
+  input: "Abs_3"
+  input: "Max_2/reduction_indices"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "Tidx"
+    value {
+      type: DT_INT32
+    }
+  }
+
+  attr {
+    key: "keep_dims"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "decoder_reconstruction_normalized"
+  op: "RealDiv"
+  input: "decoder_reconstruction"
+  input: "Max_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+}
+node {
+  name: "decoder_reconstruction_1/tag"
+  op: "Const"
+
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "decoder_reconstruction_1"
+      }
+    }
+  }
+}
+node {
+  name: "decoder_reconstruction_1"
+  op: "AudioSummaryV2"
+  input: "decoder_reconstruction_1/tag"
+  input: "decoder_reconstruction_normalized"
+  input: "decoder_reconstruction_sample_rate"
+
+  attr {
+    key: "max_outputs"
+    value {
+      i: 5
+    }
+  }
+}
+node {
+  name: "Merge/MergeSummary"
+  op: "MergeSummary"
+  input: "decoder_input_sample_prob"
+  input: "seq2seq/seq2seq_1/attention_decoder/comb_weights"
+  input: "decoder_reconstruction_1"
+  attr {
+    key: "N"
+    value {
+      i: 3
+    }
+  }
+}
+
+versions {
+  producer: 24
+}
diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
index b1e04ceec827763b7c5019e432a3f90aa49d522f..6bc136a3f89c9a1dbfd4be15c143d4c893897494 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
@@ -25,11 +25,13 @@ namespace tensorflow {
 namespace grappler {
 
 constexpr int kOpsPerMac = 2;
+constexpr char kConst[] = "Const";
 constexpr char kConv2d[] = "Conv2D";
 constexpr char kConv2dBackpropFilter[] = "Conv2DBackpropFilter";
 constexpr char kConv2dBackpropInput[] = "Conv2DBackpropInput";
 constexpr char kMatMul[] = "MatMul";
 constexpr char kSparseMatMul[] = "SparseMatMul";
+constexpr char kPlaceholder[] = "Placeholder";
 constexpr char kIdentity[] = "Identity";
 constexpr char kRefIdentity[] = "RefIdentity";
 constexpr char kNoOp[] = "NoOp";
@@ -159,6 +161,9 @@ OpLevelCostEstimator::OpLevelCostEstimator() {
        wrap(&OpLevelCostEstimator::PredictConv2DBackpropInput)},
       {kMatMul, wrap(&OpLevelCostEstimator::PredictMatMul)},
       {kSparseMatMul, wrap(&OpLevelCostEstimator::PredictMatMul)},
+      {kBatchMatMul, wrap(&OpLevelCostEstimator::PredictBatchMatMul)},
+
+      {kPlaceholder, wrap(&OpLevelCostEstimator::PredictNoOp)},
       {kIdentity, wrap(&OpLevelCostEstimator::PredictNoOp)},
       {kRefIdentity, wrap(&OpLevelCostEstimator::PredictNoOp)},
       {kStopGradient, wrap(&OpLevelCostEstimator::PredictNoOp)},
@@ -167,9 +172,10 @@ OpLevelCostEstimator::OpLevelCostEstimator() {
       {kReshape, wrap(&OpLevelCostEstimator::PredictNoOp)},
       {kRecv, wrap(&OpLevelCostEstimator::PredictNoOp)},
       {kSend, wrap(&OpLevelCostEstimator::PredictNoOp)},
+      {kConst, wrap(&OpLevelCostEstimator::PredictNoOp)},
       {kVariable, wrap(&OpLevelCostEstimator::PredictNoOp)},
       {kVariableV2, wrap(&OpLevelCostEstimator::PredictNoOp)},
-      {kBatchMatMul, wrap(&OpLevelCostEstimator::PredictBatchMatMul)},
+
       {kRank, wrap(&OpLevelCostEstimator::PredictMetadata)},
       {kShape, wrap(&OpLevelCostEstimator::PredictMetadata)},
       {kSize, wrap(&OpLevelCostEstimator::PredictMetadata)}};
@@ -221,6 +227,8 @@ OpLevelCostEstimator::OpLevelCostEstimator() {
                      Eigen::internal::scalar_square_op<float>>::Cost},
       {"Tanh", Eigen::internal::functor_traits<
                    Eigen::internal::scalar_tanh_op<float>>::Cost},
+      {"Relu", Eigen::internal::functor_traits<
+                   Eigen::internal::scalar_max_op<float>>::Cost},
       {"Sigmoid", Eigen::internal::functor_traits<
                       Eigen::internal::scalar_sigmoid_op<float>>::Cost},
       {"Sign", Eigen::internal::functor_traits<
@@ -283,8 +291,10 @@ Costs OpLevelCostEstimator::PredictCosts(const OpContext& op_context) const {
     if (elementwise_ops_.find(op_features.op()) != elementwise_ops_.end()) {
       return PredictCwiseOp(op_context);
     }
-    VLOG(1) << "Missing implementation for op: " << op_features.op();
-    return DummyExecutionTime(op_context);
+
+    VLOG(1) << "Missing accurate estimator for op: " << op_features.op();
+
+    return PredictCostOfAnUnknownOp(op_context);
   }
 
   std::function<Costs(const OpContext&)> estimator = it->second;
@@ -366,19 +376,27 @@ Costs OpLevelCostEstimator::PredictCwiseOp(const OpContext& op_context) const {
   }
 
   int op_cost = 1;
+  bool is_known_elementwise_op = false;
   auto it = elementwise_ops_.find(op_features.op());
   if (it != elementwise_ops_.end()) {
     op_cost = it->second;
+    is_known_elementwise_op = true;
+  } else {
+    LOG(WARNING) << "Not a cwise op: " << op_features.op();
   }
+
   Costs costs = PredictOpCountBasedCost(op_count * op_cost, op_features);
-  costs.inaccurate = found_unknown_shapes;
+  if (found_unknown_shapes || !is_known_elementwise_op) {
+    costs.inaccurate = true;
+  }
   return costs;
 }
 
-Costs OpLevelCostEstimator::DummyExecutionTime(
+Costs OpLevelCostEstimator::PredictCostOfAnUnknownOp(
     const OpContext& op_context) const {
-  // Use CwiseOp time as an estimation
-  auto costs = PredictCwiseOp(op_context);
+  // Don't assume the operation is cwise, return cost based on input/output size
+  // and admit that it is inaccurate...
+  auto costs = PredictOpCountBasedCost(0, op_context.op_info);
   costs.inaccurate = true;
   return costs;
 }
@@ -391,11 +409,11 @@ Costs OpLevelCostEstimator::PredictOpCountBasedCost(
           << " Execution Time (ns):" << compute_cost.count();
 
   bool found_unknown_shapes = false;
-  double total_input_size =
+  const double total_input_size =
       CalculateInputSize(op_features, &found_unknown_shapes);
-  double total_output_size =
+  const double total_output_size =
       CalculateOutputSize(op_features, &found_unknown_shapes);
-  double total_io_size = total_input_size + total_output_size;
+  const double total_io_size = total_input_size + total_output_size;
 
   Costs::NanoSeconds memory_cost(
       std::ceil(total_io_size / device_perf.gb_per_sec));
diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.h b/tensorflow/core/grappler/costs/op_level_cost_estimator.h
index 3a8385dd732d1747eca690339e098d741f68effc..5f541ccf04dc74eb868d26365a50d2e3542ea7d9 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator.h
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.h
@@ -35,7 +35,6 @@ class OpLevelCostEstimator {
 
   virtual Costs PredictCosts(const OpContext& op_context) const;
 
- protected:
   // Basic device performance info, sufficient for roofline estimate.
   struct DeviceInfo {
     double gigaops;     // Billions of operations executed per second.
@@ -45,11 +44,12 @@ class OpLevelCostEstimator {
   // Returns basic device performance info.
   virtual DeviceInfo GetDeviceInfo(const DeviceProperties& device) const;
 
-  // For operations for which we haven't yet built estimates, returns a dummy
-  // value based on input size.
-  Costs DummyExecutionTime(const OpContext& op_context) const;
+ protected:
+  // Predict cost of an op for which no accurate estimator is defined.
+  Costs PredictCostOfAnUnknownOp(const OpContext& op_context) const;
 
-  // Naive cost estimate based on operations divided by device ops/sec.
+  // Naive cost estimate based on operations divided by device ops/sec,
+  // and input/output tensor sizes.
   Costs PredictOpCountBasedCost(double operations,
                                 const OpInfo& op_features) const;
 
diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc
index f19be4a0ee53609fa0196405da4ecb8b94fa39e6..60fc783472d2b6a1d50eb52e912da1fccbe8cf08 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc
@@ -167,8 +167,8 @@ class OpLevelCostEstimatorTest : public ::testing::Test {
 TEST_F(OpLevelCostEstimatorTest, DummyExecutionTime) {
   auto cost = PredictCosts(DescribeOp("Dummy", 1000, 1));
   EXPECT_EQ(Costs::Duration(2000), cost.memory_time);
-  EXPECT_EQ(Costs::Duration(200), cost.compute_time);
-  EXPECT_EQ(Costs::Duration(2200), cost.execution_time);
+  EXPECT_EQ(Costs::Duration(0), cost.compute_time);
+  EXPECT_EQ(Costs::Duration(2000), cost.execution_time);
   EXPECT_TRUE(cost.inaccurate);
 }
 
@@ -176,7 +176,7 @@ TEST_F(OpLevelCostEstimatorTest, ExecutionTimeSumOrMax) {
   SetComputeMemoryOverlap(true);
   auto cost = PredictCosts(DescribeOp("Dummy", 1000, 1));
   EXPECT_EQ(Costs::Duration(2000), cost.memory_time);
-  EXPECT_EQ(Costs::Duration(200), cost.compute_time);
+  EXPECT_EQ(Costs::Duration(0), cost.compute_time);
   EXPECT_EQ(Costs::Duration(2000), cost.execution_time);  // max(2000, 200)
   EXPECT_TRUE(cost.inaccurate);
   SetComputeMemoryOverlap(false);  // Set it back to default.
diff --git a/tensorflow/core/grappler/costs/op_performance_data.proto b/tensorflow/core/grappler/costs/op_performance_data.proto
index 1a111b71dc5ee82650cd5c772dfce9abcb32931b..1d623b8db8e5cc3b4e7e6b32d83695ab4ed4c0ec 100644
--- a/tensorflow/core/grappler/costs/op_performance_data.proto
+++ b/tensorflow/core/grappler/costs/op_performance_data.proto
@@ -96,13 +96,12 @@ message OpPerformance {
     // The output information may have memory usage and output shapes.
     repeated int64 output_memory = 1;
 
-    // Temporary memory allocated by this node.
-    int64 host_temp_memory = 2;
-    int64 device_temp_memory = 3;
+    // Temp and persistent memory allocated by this node.
+    int64 temp_memory = 2;
+    int64 persistent_memory = 4;
 
-    // The persisted_memory doesn't include outputs.
-    int64 host_persistent_memory = 4;
-    int64 device_persistent_memory = 5;
+    int64 device_temp_memory = 3 [deprecated = true];
+    int64 device_persistent_memory = 5 [deprecated = true];
   }
   OpMemory op_memory = 9;
 }
diff --git a/tensorflow/core/grappler/costs/utils.cc b/tensorflow/core/grappler/costs/utils.cc
index ade0ad53fb71c45f07b7a87824610b575676847c..602f69f12ea9d24ebd94da73a2a76d1992f3bfb1 100644
--- a/tensorflow/core/grappler/costs/utils.cc
+++ b/tensorflow/core/grappler/costs/utils.cc
@@ -285,14 +285,10 @@ OpPerformanceList CostGraphToOpPerformanceData(const CostGraphDef& cost_graph,
       perf->mutable_op_memory()->add_output_memory(output_info.size());
     }
 
-    perf->mutable_op_memory()->set_host_temp_memory(
-        cost_node->host_temp_memory_size());
-    perf->mutable_op_memory()->set_device_temp_memory(
-        cost_node->device_temp_memory_size());
-    perf->mutable_op_memory()->set_host_persistent_memory(
-        cost_node->host_persistent_memory_size());
-    perf->mutable_op_memory()->set_device_persistent_memory(
-        cost_node->device_persistent_memory_size());
+    perf->mutable_op_memory()->set_temp_memory(
+        cost_node->temporary_memory_size());
+    perf->mutable_op_memory()->set_persistent_memory(
+        cost_node->persistent_memory_size());
   }
   return ret;
 }
diff --git a/tensorflow/core/grappler/costs/virtual_placer.h b/tensorflow/core/grappler/costs/virtual_placer.h
index 7ccb1ebb9999989f17548aeb88d1d64abdcc5341..fee5ce0f510014988656f418b857a73b8d68b807 100644
--- a/tensorflow/core/grappler/costs/virtual_placer.h
+++ b/tensorflow/core/grappler/costs/virtual_placer.h
@@ -41,7 +41,7 @@ class VirtualPlacer {
  private:
   // Converts given device name to Lowercase Fully-Qualified Name (LFQN) string.
   // This helps us disambiguate device names internally and simplify matching.
-  // If device_name couldn't be parsed succesfully, returns empty string.
+  // If device_name couldn't be parsed successfully, returns empty string.
   string to_lfqn_or_empty(const string& device_name) const;
 
   // Map based on the cluster info: cluster device name -> device properties.
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc
index e5e1ee32926a4a77d7580d9b16812bd0c60ce984..d7d07ee7a55665a2d809588f45fbfd166bd2f76a 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.cc
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc
@@ -43,6 +43,9 @@ Costs CombineCosts(const Costs& left, const Costs& right) {
 
   Costs result = left;
   result.execution_time += right.execution_time;
+  if (right.inaccurate) {
+    result.inaccurate = true;
+  }
   if (right.max_memory != kMemoryUnknown) {
     result.max_memory += right.max_memory;
   }
@@ -72,7 +75,7 @@ struct RecvNodeDescriptor {
       : node(node_), port_num(port_num_), device(device_) {}
 };
 
-struct RecvNodeDescritorHash {
+struct RecvNodeDescriptorHash {
   std::size_t operator()(const RecvNodeDescriptor& recv_node) const {
     return std::hash<const NodeDef*>()(recv_node.node) ^
            std::hash<int>()(recv_node.port_num) ^
@@ -88,10 +91,188 @@ struct RecvNodeDescriptorEqual {
 };
 }  // namespace
 
+// ReadyNodeManager
+const NodeDef* LIFOManager::GetCurrNode() {
+  CHECK(!nodes_.empty()) << "GetCurrNode(), but there's no ready node";
+  if (curr_pos_ == nodes_.end()) {
+    curr_pos_ = --(nodes_.rbegin().base());  // Last one in the list.
+  }
+  // Once curr_pos_ is set to a valid entry in the list, we keep using the
+  // cached curr_pos_ until RemoveCurrNode() is called. AddNode() will not
+  // change the GetCurrNode() return value.
+  return *curr_pos_;
+}
+
+void LIFOManager::RemoveCurrNode() {
+  // Make sure we have curr_pos_ ready to be removed.
+  GetCurrNode();
+  // Note curr_pos_ may not be pointing the last element if some nodes are
+  // added.
+  nodes_.erase(curr_pos_);
+
+  curr_pos_ = nodes_.end();  // Reset curr_pos_.
+}
+
+FirstReadyManager::FirstReadyManager() : ReadyNodeManager() {
+  std::make_heap(nodes_.begin(), nodes_.end());
+}
+
+void FirstReadyManager::Init(
+    const std::unordered_map<const NodeDef*, NodeState>* node_state) {
+  // Reset the node state since different instances of the scheduler can reuse
+  // the same node_manager.
+  node_state_ = node_state;
+  nodes_.clear();
+  waiting_queue_.clear();
+  greater_ = [this](const NodeDef* a, const NodeDef* b) -> bool {
+    if (node_state_->at(a).time_ready == node_state_->at(b).time_ready) {
+      // Use Node name as tie-breaker for deterministic node scheduling.
+      return a->name().compare(b->name()) > 0;
+    } else {
+      // Note: we need a node with minimum time_ready, not
+      // maximum; hence, using a > b for comparison function.
+      return node_state_->at(a).time_ready > node_state_->at(b).time_ready;
+    }
+  };
+}
+
+const NodeDef* FirstReadyManager::GetCurrNode() {
+  if (nodes_.empty()) {
+    // Nothing in the node_; probably, the very first call. Move
+    // waiting_queue_ to node_.
+    DrainWaitingQueue();
+    CHECK(!nodes_.empty()) << "GetCurrNode(), but there's no ready node";
+  }
+  return nodes_.front();
+}
+
+void FirstReadyManager::RemoveCurrNode() {
+  if (nodes_.empty()) {
+    // Make sure that there is a node to be removed at the front of nodes_.
+    GetCurrNode();
+  }
+  std::pop_heap(nodes_.begin(), nodes_.end(), greater_);
+  nodes_.pop_back();
+  DrainWaitingQueue();
+}
+
+bool FirstReadyManager::Empty() const {
+  return nodes_.empty() && waiting_queue_.empty();
+}
+
+void FirstReadyManager::DrainWaitingQueue() {
+  for (const auto* node : waiting_queue_) {
+    // push_heap in AddNode() and pop_heap in RemoveCurrNode() guarantees that
+    // the first element is the node with minimum time_ready.
+    nodes_.push_back(node);
+    std::push_heap(nodes_.begin(), nodes_.end(), greater_);
+  }
+  waiting_queue_.clear();
+}
+
+CompositeNodeManager::CompositeNodeManager()
+    : ReadyNodeManager(), send_manager_(), recv_manager_() {}
+
+void CompositeNodeManager::Init(
+    const std::unordered_map<const NodeDef*, NodeState>* node_state) {
+  node_state_ = node_state;
+  send_manager_.Init(node_state);
+  recv_manager_.Init(node_state);
+  curr_node_ = nullptr;
+}
+
+void CompositeNodeManager::AddNode(const NodeDef* node) {
+  if (IsSend(*node)) {
+    send_manager_.AddNode(node);
+  } else if (IsRecv(*node)) {
+    recv_manager_.AddNode(node);
+  } else {
+    const auto& device = node_state_->at(node).device_name;
+    ops_lifo_map_[device].AddNode(node);
+  }
+}
+
+const NodeDef* CompositeNodeManager::GetCurrNode() {
+  if (curr_node_) return curr_node_;
+
+  // Per-device LIFO for normal ops (not _Send / _Recv),
+  // FirstReady for _Send and _Recv (separately),
+  // Globally (among the LIFO-selected ops from each device and _Send and
+  // _Recv) FirstReady,
+  // Priorty order: _Send, _Recv, and then the rest, if time_ready is equal.
+  std::vector<std::pair<const NodeDef*, Costs::Duration>> candidates;
+  for (auto& ops_lifo : ops_lifo_map_) {
+    if (!ops_lifo.second.Empty()) {
+      const auto* op = ops_lifo.second.GetCurrNode();
+      candidates.emplace_back(op, node_state_->at(op).time_ready);
+    }
+  }
+  if (!send_manager_.Empty()) {
+    const auto* send = send_manager_.GetCurrNode();
+    candidates.emplace_back(send, node_state_->at(send).time_ready);
+  }
+  if (!recv_manager_.Empty()) {
+    const auto* recv = recv_manager_.GetCurrNode();
+    candidates.emplace_back(recv, node_state_->at(recv).time_ready);
+  }
+  CHECK(!candidates.empty());
+  auto first_ready = std::min_element(
+      candidates.begin(), candidates.end(),
+      [](const std::pair<const NodeDef*, Costs::Duration>& a,
+         const std::pair<const NodeDef*, Costs::Duration>& b) {
+        if (a.second == b.second) {
+          // Note that there can be only 1 Send and only 1 Recv in candidates,
+          // at most; hence, score is 2 for Send, 1 for Recv, and 0 for a
+          // normap op, and a_score and b_score are equal only if both are
+          // normal ops.
+          int a_score = 2 * IsSend(*a.first) + IsRecv(*a.first);
+          int b_score = 2 * IsSend(*b.first) + IsRecv(*b.first);
+          if (a_score == b_score) {
+            // Both are normal ops; use node name as tie breaker.
+            return a.first->name().compare(b.first->name()) < 0;
+          } else {
+            // Priortize by op type: _Send, _Recv, and normap ops.
+            return a_score > b_score;
+          }
+        } else {
+          return a.second < b.second;
+        }
+      });
+  // Next time we call GetCurrNode(), it just returns the cached one,
+  // curr_node_ until we call RemovCurrNode().
+  curr_node_ = first_ready->first;
+
+  return curr_node_;
+}
+
+void CompositeNodeManager::RemoveCurrNode() {
+  const auto* node = GetCurrNode();
+  if (IsSend(*node)) {
+    send_manager_.RemoveCurrNode();
+  } else if (IsRecv(*node)) {
+    recv_manager_.RemoveCurrNode();
+  } else {
+    const auto device = node_state_->at(node).device_name;
+    ops_lifo_map_[device].RemoveCurrNode();
+  }
+  // Reset curr_node_ so that GetCurrNode() finds another node.
+  curr_node_ = nullptr;
+}
+
+bool CompositeNodeManager::Empty() const {
+  // Empty if all the ready managers are empty.
+  bool empty = true;
+  for (const auto& ops_lifo : ops_lifo_map_) {
+    empty &= ops_lifo.second.Empty();
+  }
+  return empty && send_manager_.Empty() && recv_manager_.Empty();
+}
+
 VirtualScheduler::VirtualScheduler(const GrapplerItem* grappler_item,
                                    const bool use_static_shapes,
-                                   Cluster* cluster)
-    : ready_nodes_(ReadyNodeManagerFactory("FirstReady")),
+                                   Cluster* cluster,
+                                   ReadyNodeManager* ready_nodes)
+    : ready_nodes_(ready_nodes),
       graph_costs_(Costs::ZeroCosts()),
       graph_properties_(*grappler_item),
       cluster_(cluster),
@@ -108,7 +289,9 @@ ReadyNodeManager* VirtualScheduler::ReadyNodeManagerFactory(
   } else if (ready_node_manager == "LIFO") {
     return new LIFOManager();
   } else if (ready_node_manager == "FirstReady") {
-    return new FirstReadyManager(GetNodeStates());
+    return new FirstReadyManager();
+  } else if (ready_node_manager == "Composite") {
+    return new CompositeNodeManager();
   }
   LOG(FATAL) << "Not a valid ready node manager: " << ready_node_manager;
 }
@@ -118,11 +301,11 @@ Status VirtualScheduler::Init() {
   // necessary information for emulating tensorflow op scheduling and
   // construct internal data structures (NodeState and DeviceState) for virtual
   // scheduling.
-
+  ready_nodes_->Init(GetNodeStates());
   // Construct graph properties.
   Status status;
   if (use_static_shapes_) {
-    status = graph_properties_.InferStatically();
+    status = graph_properties_.InferStatically(true);
   } else {
     status = graph_properties_.InferDynamically(cluster_);
   }
@@ -158,14 +341,14 @@ Status VirtualScheduler::Init() {
   // to _Recv as control dependency when creating GrapplerItem.
   std::unordered_map<string, const NodeDef*> name_to_send;
   for (const auto& node : graph.node()) {
-    if (node.op() == "_Send") {
+    if (IsSend(node)) {
       const auto& attr = node.attr();
       name_to_send[attr.at("tensor_name").s()] = &node;
     }
   }
 
   // To reuse _Recv ops.
-  std::unordered_map<RecvNodeDescriptor, const NodeDef*, RecvNodeDescritorHash,
+  std::unordered_map<RecvNodeDescriptor, const NodeDef*, RecvNodeDescriptorHash,
                      RecvNodeDescriptorEqual>
       cached_recv_nodes;
 
@@ -538,7 +721,8 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) {
   string node_description = GetOpDescription(op_context.op_info);
   op_counts_[node_description] += 1;
   op_costs_[node_description] =
-      node_costs.execution_time.asMicroSeconds().count();
+      std::make_pair(node_costs.execution_time.asMicroSeconds().count(),
+                     !node_costs.inaccurate);
 
   auto& op_cost = FindOrCreateZero(op_name, &op_to_cost_);
   op_cost = CombineCosts(op_cost, node_costs);
@@ -647,8 +831,10 @@ Costs VirtualScheduler::Summary() const {
   for (const auto& op_cost_pair : op_to_cost_) {
     const auto& op = op_cost_pair.first;
     const auto& cost = op_cost_pair.second.execution_time.count();
+    const bool is_op_cost_accurate = !op_cost_pair.second.inaccurate;
     if (cost) {  // Skip printing out zero-cost ops.
-      VLOG(1) << " + " << op << " : " << cost;
+      VLOG(1) << " + " << op << " : " << (is_op_cost_accurate ? "" : "~")
+              << cost;
     }
   }
 
@@ -699,10 +885,16 @@ Costs VirtualScheduler::Summary() const {
           CalculateOutputSize(node_map_.at(node).output_properties, port);
     }
     Costs::NanoSeconds total_compute_time_ns;
+    bool is_total_cost_accurate = true;
     for (const auto& op_cost_pair : state.op_to_cost) {
       const auto& op = op_cost_pair.first;
       const auto& cost = op_cost_pair.second.execution_time.count();
       total_compute_time_ns += op_cost_pair.second.execution_time;
+      const bool is_op_cost_accurate = !op_cost_pair.second.inaccurate;
+      if (!is_op_cost_accurate) {
+        is_total_cost_accurate = false;
+      }
+
       int64 op_mem_usage = 0;
       auto it = op_to_memory.find(op);
       if (it != op_to_memory.end()) {
@@ -714,9 +906,9 @@ Costs VirtualScheduler::Summary() const {
                                : 0.0;
       if (cost || mem_usage_percent > 1.0) {
         // Print out only non-zero cost ops or ops with > 1% memory usage.
-        VLOG(1) << " + " << op << " : " << cost << " ("
-                << strings::HumanReadableNumBytes(op_mem_usage) << " ["
-                << mem_usage_percent << "%] "
+        VLOG(1) << " + " << op << " : " << (is_op_cost_accurate ? "" : "~")
+                << cost << " (" << strings::HumanReadableNumBytes(op_mem_usage)
+                << " [" << mem_usage_percent << "%] "
                 << (persisent_ops.count(op) > 0 ? ": persistent op)" : ")");
       }
     }
@@ -725,8 +917,9 @@ Costs VirtualScheduler::Summary() const {
     if (wall_time_ns.count() > 0) {
       utilization = total_compute_time_ns.count() * 100 / wall_time_ns.count();
     }
-    VLOG(1) << "Device = " << name
-            << ", total_compute_time_ns = " << total_compute_time_ns.count()
+    VLOG(1) << "Device = " << name << ", total_compute_time_ns = "
+            << (is_total_cost_accurate ? "" : "~")
+            << total_compute_time_ns.count()
             << ", utilization = " << utilization << "%";
 
     if (critical_path_costs.execution_time <= state.GetCurrTime()) {
@@ -738,8 +931,11 @@ Costs VirtualScheduler::Summary() const {
     // Also log the op description and their corresponding counts.
     VLOG(2) << "Node description, counts, cost:";
     for (const auto& item : op_counts_) {
+      int cost;
+      bool is_cost_accurate;
+      std::tie(cost, is_cost_accurate) = op_costs_.at(item.first);
       VLOG(2) << "Node: " << item.first << ", Count: " << item.second
-              << ", Individual Cost: " << op_costs_.at(item.first);
+              << ", Individual Cost: " << (is_cost_accurate ? "" : "~") << cost;
     }
   }
 
@@ -788,21 +984,12 @@ Costs VirtualScheduler::Summary(RunMetadata* metadata) {
             nodestate.time_scheduled.asMicroSeconds().count());
         auto* mem_stats = node_stats->mutable_memory_stats();
         // VirtualScheduler does not specify scratch pad memory usage.
-        mem_stats->set_host_temp_memory_size(0);
-        mem_stats->set_device_temp_memory_size(0);
-        int64 host_persistent_memory_size = 0;
-        int64 device_persistent_memory_size = 0;
+        mem_stats->set_temp_memory_size(0);
+        int64 persistent_memory_size = 0;
         if (IsPersistentNode(node_def)) {
-          if (device.first.find("cpu") != string::npos ||
-              device.first.find("CPU") != string::npos) {
-            host_persistent_memory_size = total_output_size;
-          } else {
-            device_persistent_memory_size = total_output_size;
-          }
+          persistent_memory_size = total_output_size;
         }
-        mem_stats->set_host_persistent_memory_size(host_persistent_memory_size);
-        mem_stats->set_device_persistent_memory_size(
-            device_persistent_memory_size);
+        mem_stats->set_persistent_memory_size(persistent_memory_size);
         *device_partition_graph->add_node() = *node_def;
       }
     }
@@ -810,5 +997,16 @@ Costs VirtualScheduler::Summary(RunMetadata* metadata) {
   return Summary();
 }
 
+const std::unordered_map<string, int64> VirtualScheduler::GetPeakMemoryUsage()
+    const {
+  std::unordered_map<string, int64> result;
+  for (const auto& device : device_) {
+    const string& name = device.first;
+    const DeviceState& state = device.second;
+    result[name] = state.max_memory_usage;
+  }
+  return result;
+}
+
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.h b/tensorflow/core/grappler/costs/virtual_scheduler.h
index c74d80c2bee9b99afbcd68cfc8a7d4177e3160bc..c1802509089645a72c5cf06d9b5375553d053841 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.h
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.h
@@ -127,6 +127,8 @@ class ReadyNodeManager {
  public:
   ReadyNodeManager() {}
   virtual ~ReadyNodeManager() {}
+  virtual void Init(
+      const std::unordered_map<const NodeDef*, NodeState>* node_state) {}
   virtual void AddNode(const NodeDef* node) = 0;
   virtual const NodeDef* GetCurrNode() = 0;
   virtual void RemoveCurrNode() = 0;
@@ -137,6 +139,8 @@ class FIFOManager : public ReadyNodeManager {
  public:
   FIFOManager() : ReadyNodeManager() {}
   ~FIFOManager() override {}
+  virtual void Init(
+      const std::unordered_map<const NodeDef*, NodeState>* node_state) {}
   void AddNode(const NodeDef* node) override { nodes_.push_back(node); }
   const NodeDef* GetCurrNode() override {
     CHECK(!nodes_.empty()) << "GetCurrNode(), but there's no ready node";
@@ -157,26 +161,11 @@ class LIFOManager : public ReadyNodeManager {
  public:
   LIFOManager() : ReadyNodeManager() {}
   ~LIFOManager() override {}
+  void Init(const std::unordered_map<const NodeDef*, NodeState>* node_state)
+      override {}
   void AddNode(const NodeDef* node) override { nodes_.push_back(node); }
-  const NodeDef* GetCurrNode() override {
-    CHECK(!nodes_.empty()) << "GetCurrNode(), but there's no ready node";
-    if (curr_pos_ == nodes_.end()) {
-      curr_pos_ = --(nodes_.rbegin().base());  // Last one in the list.
-    }
-    // Once curr_pos_ is set to a valid entry in the list, we keep using the
-    // cached curr_pos_ until RemoveCurrNode() is called. AddNode() will not
-    // change the GetCurrNode() return value.
-    return *curr_pos_;
-  }
-  void RemoveCurrNode() override {
-    // Make sure we have curr_pos_ ready to be removed.
-    GetCurrNode();
-    // Note curr_pos_ may not be pointing the last element if some nodes are
-    // added.
-    nodes_.erase(curr_pos_);
-
-    curr_pos_ = nodes_.end();  // Reset curr_pos_.
-  }
+  const NodeDef* GetCurrNode() override;
+  void RemoveCurrNode() override;
   bool Empty() const override { return nodes_.empty(); }
 
  private:
@@ -193,55 +182,18 @@ class LIFOManager : public ReadyNodeManager {
 // time_ready value (it depends on C++ STL push_heap and pop_heap).
 class FirstReadyManager : public ReadyNodeManager {
  public:
-  FirstReadyManager(
-      const std::unordered_map<const NodeDef*, NodeState>* node_state)
-      : ReadyNodeManager(), node_state_(node_state) {
-    std::make_heap(nodes_.begin(), nodes_.end());
-    greater_ = [this](const NodeDef* a, const NodeDef* b) -> bool {
-      // Note: we need a node with minimum time_ready, not
-      // maximum; hence, using a > b for comparison function.
-      return node_state_->at(a).time_ready > node_state_->at(b).time_ready;
-    };
-  }
+  FirstReadyManager();
+  void Init(
+      const std::unordered_map<const NodeDef*, NodeState>* node_state) override;
   ~FirstReadyManager() override {}
-
   void AddNode(const NodeDef* node) override { waiting_queue_.push_back(node); }
-
-  const NodeDef* GetCurrNode() override {
-    if (nodes_.empty()) {
-      // Nothing in the node_; probably, the very first call. Move
-      // waiting_queue_ to node_.
-      _DrainWaitingQueue();
-      CHECK(!nodes_.empty()) << "GetCurrNode(), but there's no ready node";
-    }
-    return nodes_.front();
-  }
-
-  void RemoveCurrNode() override {
-    if (nodes_.empty()) {
-      // Make sure that there is a node to be removed at the front of nodes_.
-      GetCurrNode();
-    }
-    std::pop_heap(nodes_.begin(), nodes_.end(), greater_);
-    nodes_.pop_back();
-    _DrainWaitingQueue();
-  }
-
-  bool Empty() const override {
-    return nodes_.empty() && waiting_queue_.empty();
-  }
+  const NodeDef* GetCurrNode() override;
+  void RemoveCurrNode() override;
+  bool Empty() const override;
 
  private:
   // Move all the nodes in the waiting_queue_ to nodes_.
-  void _DrainWaitingQueue() {
-    for (const auto* node : waiting_queue_) {
-      // push_heap in AddNode() and pop_heap in RemoveCurrNode() guarantees that
-      // the first element is the node with minimum time_ready.
-      nodes_.push_back(node);
-      std::push_heap(nodes_.begin(), nodes_.end(), greater_);
-    }
-    waiting_queue_.clear();
-  }
+  void DrainWaitingQueue();
 
   // nodes_ is the main queue, where we construct heap, and the front is the
   // current node.
@@ -259,13 +211,49 @@ class FirstReadyManager : public ReadyNodeManager {
   const std::unordered_map<const NodeDef*, NodeState>* node_state_;
 };
 
+// CompositeNodeManager has a few other NodeManagers: per-device LIFO for normal
+// ops (neither _Send nor _Recv) and FirstyReadyManagers for _Send ops and _Recv
+// ops, and then it chooses FirstReady among the ops chosen from each
+// internal NodeManagers. The objective is to maximize producer-consumer
+// locality within device, while processing nodes across devices, including
+// _Send and _Recv, fairly, in terms of their time_ready.
+class CompositeNodeManager : public ReadyNodeManager {
+ public:
+  CompositeNodeManager();
+  ~CompositeNodeManager() override {}
+
+  void Init(
+      const std::unordered_map<const NodeDef*, NodeState>* node_state) override;
+  void AddNode(const NodeDef* node) override;
+  const NodeDef* GetCurrNode() override;
+  void RemoveCurrNode() override;
+  bool Empty() const override;
+
+ private:
+  // Internal ready node managers:
+  // LIFO for normal ops to maximize producer consumer locality.
+  // One LIFO per device.
+  std::unordered_map<string, LIFOManager> ops_lifo_map_;
+  // FirstReady for send and recv. Handle send and recv separately ensures that
+  // send and recv do not block previously read ops with LIFO schedule.
+  FirstReadyManager send_manager_;
+  FirstReadyManager recv_manager_;
+
+  // NodeState structure from VirtualScheduler to get time_ready of ready nodes.
+  // Not owned by FirstReadyManager.
+  const std::unordered_map<const NodeDef*, NodeState>* node_state_;
+
+  // Cached curr node. Set back to nullptr from RemoveCurrNode().
+  const NodeDef* curr_node_;
+};
+
 // The virtual scheduler emulates execution of nodes in a graph, considering
 // dependencies, device, etc.
 class VirtualScheduler {
  public:
   VirtualScheduler(const GrapplerItem* grappler_item,
-                   const bool use_static_shapes, Cluster* cluster);
-
+                   const bool use_static_shapes, Cluster* cluster,
+                   ReadyNodeManager* ready_nodes);
   // Initializes NodeState and DeviceState from grappler_item_ and
   // graph_properties_.
   Status Init();
@@ -280,6 +268,12 @@ class VirtualScheduler {
   // Like the above, but writes detailed stats to RunMetadata.
   // If metadata is nullptr, then just calls and return Summary().
   Costs Summary(RunMetadata* metadata);
+  // Methods called from constructor.
+  static ReadyNodeManager* ReadyNodeManagerFactory(
+      const string& ready_node_manager);
+
+  // Return per device peak memory usage.
+  const std::unordered_map<string, int64> GetPeakMemoryUsage() const;
 
  protected:
   const std::unordered_map<string, DeviceState>* GetDeviceStates() const {
@@ -302,9 +296,6 @@ class VirtualScheduler {
   const string kAttrDstDevice = "dst_device_";
   const string kChannelDevice = "Channel";
 
-  // Methods called from constructor.
-  ReadyNodeManager* ReadyNodeManagerFactory(const string& ready_node_manager);
-
   // Methods called from Init(). Fails if initialize_ is set.
   void MaybeUpdateInputOutput(const NodeDef* node);
   NodeState& GetNodeStateOrCreateIt(const NodeDef* node);
@@ -321,7 +312,7 @@ class VirtualScheduler {
   bool IsPersistentNode(const NodeDef* node) const;
 
   // Scheduler states:
-  std::unique_ptr<ReadyNodeManager> ready_nodes_;
+  ReadyNodeManager* ready_nodes_;  // Not owned.
   std::unordered_map<const NodeDef*, NodeState> node_map_;
   std::unordered_map<string, DeviceState> device_;
 
@@ -330,7 +321,10 @@ class VirtualScheduler {
 
   // Stats:
   std::map<string, int> op_counts_;  // Op counts with key with input shape.
-  std::map<string, int> op_costs_;   // Individual op costs (with input shapes).
+  // Individual op costs (with input shapes).
+  // Boolean field for whether the cost is accurate.
+  std::map<string, std::pair<int, bool>> op_costs_;
+
   Costs graph_costs_;                // Graph cost.
   std::map<string, Costs> op_to_cost_;  // Per-op cost.
 
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
index 412b494be730c21bf8b3d8bd791cc42dcbf15794..53dcb497a6453dfa70c1215352e74e96796ebeb7 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
+++ b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
@@ -29,7 +29,8 @@ class TestVirtualScheduler : public VirtualScheduler {
  public:
   TestVirtualScheduler(const GrapplerItem* grappler_item,
                        const bool use_static_shapes, Cluster* cluster)
-      : VirtualScheduler(grappler_item, use_static_shapes, cluster) {}
+      : VirtualScheduler(grappler_item, use_static_shapes, cluster,
+                         &ready_node_manager_) {}
 
   FRIEND_TEST(VirtualSchedulerTest, CalculateOutputSize);
   FRIEND_TEST(VirtualSchedulerTest, MemoryUsage);
@@ -37,6 +38,9 @@ class TestVirtualScheduler : public VirtualScheduler {
   FRIEND_TEST(VirtualSchedulerTest, ComplexDependency);
   FRIEND_TEST(VirtualSchedulerTest, Variable);
   FRIEND_TEST(VirtualSchedulerTest, InterDeviceTransfer);
+
+ protected:
+  FirstReadyManager ready_node_manager_;
 };
 
 class VirtualSchedulerTest : public ::testing::Test {
@@ -44,8 +48,15 @@ class VirtualSchedulerTest : public ::testing::Test {
   NodeDef node1_, node2_, node3_, node4_, node5_, node6_;
   std::unordered_map<const NodeDef*, NodeState> node_states_;
 
+  // Device names:
   const string kCPU0 = "/job:localhost/replica:0/task:0/cpu:0";
   const string kCPU1 = "/job:localhost/replica:0/task:0/cpu:1";
+  const string kChannelFrom0To1 = "Channel from CPU0 to CPU1";
+  const string kChannelFrom1To0 = "Channel from CPU1 to CPU0";
+  // Op names:
+  const string kSend = "_Send";
+  const string kRecv = "_Recv";
+  const string kConv2D = "Conv2D";
 
   DeviceProperties GetDummyCPUDevice() {
     // Create CPU with 2 cores, 4 Ghz freq, 2 GB/s mem bandwidth.
@@ -59,29 +70,26 @@ class VirtualSchedulerTest : public ::testing::Test {
     return cpu_device;
   }
 
+  void NodeSetUp(const string& name, const string& op_name,
+                 const string& device_name, const uint64 time_ready,
+                 NodeDef* node) {
+    node->set_name(name);
+    node->set_op(op_name);
+    node->set_device(device_name);
+
+    node_states_[node] = NodeState();
+    node_states_[node].time_ready = time_ready;
+    node_states_[node].device_name = device_name;
+  }
+
   void SetUp() override {
-    // Initializes nodes for manager
-    node1_.set_name("Node1");
-    node2_.set_name("Node2");
-    node3_.set_name("Node3");
-    node4_.set_name("Node4");
-    node5_.set_name("Node5");
-    node6_.set_name("Node6");
-
-    // Initialize node_states, with time_ready in reverse order.
-    node_states_[&node1_] = NodeState();
-    node_states_[&node2_] = NodeState();
-    node_states_[&node3_] = NodeState();
-    node_states_[&node4_] = NodeState();
-    node_states_[&node5_] = NodeState();
-    node_states_[&node6_] = NodeState();
-
-    node_states_[&node6_].time_ready = 1000;
-    node_states_[&node5_].time_ready = 2000;
-    node_states_[&node4_].time_ready = 3000;
-    node_states_[&node3_].time_ready = 4000;
-    node_states_[&node2_].time_ready = 5000;
-    node_states_[&node1_].time_ready = 6000;
+    // node1_ to node6_ on kCPU0, with time_ready in reverse_order.
+    NodeSetUp("Node1", kConv2D, kCPU0, 6000, &node1_);
+    NodeSetUp("Node2", kConv2D, kCPU0, 5000, &node2_);
+    NodeSetUp("Node3", kConv2D, kCPU0, 4000, &node3_);
+    NodeSetUp("Node4", kConv2D, kCPU0, 3000, &node4_);
+    NodeSetUp("Node5", kConv2D, kCPU0, 2000, &node5_);
+    NodeSetUp("Node6", kConv2D, kCPU0, 1000, &node6_);
 
     // Initializes cluster_ and placer_.
     std::unordered_map<string, DeviceProperties> devices;
@@ -1144,23 +1152,24 @@ TEST_F(VirtualSchedulerTest, AddAndRemoveMultipleLIFOManager) {
 }
 
 TEST_F(VirtualSchedulerTest, GetSingleNodeFirstReadyManager) {
-  FirstReadyManager manager = FirstReadyManager(&node_states_);
+  FirstReadyManager manager;
+  manager.Init(&node_states_);
 
   manager.AddNode(&node1_);
   EXPECT_EQ("Node1", manager.GetCurrNode()->name());
 }
 
 TEST_F(VirtualSchedulerTest, RemoveSingleNodeFirstReadyManager) {
-  FirstReadyManager manager = FirstReadyManager(&node_states_);
-
+  FirstReadyManager manager;
+  manager.Init(&node_states_);
   manager.AddNode(&node1_);
   manager.RemoveCurrNode();
   EXPECT_TRUE(manager.Empty());
 }
 
 TEST_F(VirtualSchedulerTest, GetAndRemoveMultipleFirstReadyManager) {
-  FirstReadyManager manager = FirstReadyManager(&node_states_);
-
+  FirstReadyManager manager;
+  manager.Init(&node_states_);
   // Insert nodes in some random order.
   manager.AddNode(&node2_);
   manager.AddNode(&node1_);
@@ -1187,8 +1196,8 @@ TEST_F(VirtualSchedulerTest, GetAndRemoveMultipleFirstReadyManager) {
 }
 
 TEST_F(VirtualSchedulerTest, GetCurrNodeFirstReadyManager) {
-  FirstReadyManager manager = FirstReadyManager(&node_states_);
-
+  FirstReadyManager manager;
+  manager.Init(&node_states_);
   // Insert nodes in some random order.
   manager.AddNode(&node2_);
   manager.AddNode(&node1_);
@@ -1207,15 +1216,9 @@ TEST_F(VirtualSchedulerTest, GetCurrNodeFirstReadyManager) {
   NodeDef node7;
   NodeDef node8;
   NodeDef node9;
-  node7.set_name("Node7");
-  node8.set_name("Node8");
-  node9.set_name("Node9");
-  node_states_[&node7] = NodeState();
-  node_states_[&node8] = NodeState();
-  node_states_[&node9] = NodeState();
-  node_states_[&node7].time_ready = 5;
-  node_states_[&node8].time_ready = 4;
-  node_states_[&node9].time_ready = 3;
+  NodeSetUp("Node7", kConv2D, kCPU0, 5, &node7);
+  NodeSetUp("Node8", kConv2D, kCPU0, 4, &node8);
+  NodeSetUp("Node9", kConv2D, kCPU0, 3, &node9);
 
   manager.AddNode(&node7);
   EXPECT_EQ("Node6", manager.GetCurrNode()->name());
@@ -1249,6 +1252,296 @@ TEST_F(VirtualSchedulerTest, GetCurrNodeFirstReadyManager) {
   EXPECT_TRUE(manager.Empty());
 }
 
+TEST_F(VirtualSchedulerTest, DeterminismInFirstReadyManager) {
+  FirstReadyManager manager1;
+  manager1.Init(&node_states_);
+  FirstReadyManager manager2;
+  manager2.Init(&node_states_);
+
+  // 6 nodes with same time_ready.
+  NodeDef node7;
+  NodeDef node8;
+  NodeDef node9;
+  NodeDef node10;
+  NodeDef node11;
+  NodeDef node12;
+  NodeSetUp("Node7", kConv2D, kCPU0, 1000, &node7);
+  NodeSetUp("Node8", kConv2D, kCPU0, 1000, &node8);
+  NodeSetUp("Node9", kConv2D, kCPU0, 1000, &node9);
+  NodeSetUp("Node10", kConv2D, kCPU0, 1000, &node10);
+  NodeSetUp("Node11", kConv2D, kCPU0, 1000, &node11);
+  NodeSetUp("Node12", kConv2D, kCPU0, 1000, &node12);
+
+  // Add the above 6 nodes to manager1.
+  manager1.AddNode(&node7);
+  manager1.AddNode(&node8);
+  manager1.AddNode(&node9);
+  manager1.AddNode(&node10);
+  manager1.AddNode(&node11);
+  manager1.AddNode(&node12);
+
+  // Add the above 6 nodes to manager2, but in a different order.
+  manager2.AddNode(&node8);
+  manager2.AddNode(&node11);
+  manager2.AddNode(&node9);
+  manager2.AddNode(&node10);
+  manager2.AddNode(&node7);
+  manager2.AddNode(&node12);
+
+  // Expect both managers return the same nodes for deterministic node
+  // scheduling.
+  EXPECT_EQ(manager1.GetCurrNode()->name(), manager2.GetCurrNode()->name());
+  manager1.RemoveCurrNode();
+  manager2.RemoveCurrNode();
+
+  EXPECT_EQ(manager1.GetCurrNode()->name(), manager2.GetCurrNode()->name());
+  manager1.RemoveCurrNode();
+  manager2.RemoveCurrNode();
+
+  EXPECT_EQ(manager1.GetCurrNode()->name(), manager2.GetCurrNode()->name());
+  manager1.RemoveCurrNode();
+  manager2.RemoveCurrNode();
+
+  EXPECT_EQ(manager1.GetCurrNode()->name(), manager2.GetCurrNode()->name());
+  manager1.RemoveCurrNode();
+  manager2.RemoveCurrNode();
+
+  EXPECT_EQ(manager1.GetCurrNode()->name(), manager2.GetCurrNode()->name());
+  manager1.RemoveCurrNode();
+  manager2.RemoveCurrNode();
+
+  EXPECT_EQ(manager1.GetCurrNode()->name(), manager2.GetCurrNode()->name());
+  manager1.RemoveCurrNode();
+  manager2.RemoveCurrNode();
+
+  EXPECT_TRUE(manager1.Empty());
+  EXPECT_TRUE(manager2.Empty());
+}
+
+TEST_F(VirtualSchedulerTest, RemoveSingleNodeCompositeNodeManager) {
+  CompositeNodeManager manager;
+  manager.Init(&node_states_);
+  manager.AddNode(&node1_);
+  manager.RemoveCurrNode();
+  EXPECT_TRUE(manager.Empty());
+}
+
+TEST_F(VirtualSchedulerTest, RemoveSingleNodeComopsiteNodeManager) {
+  CompositeNodeManager manager;
+  manager.Init(&node_states_);
+
+  manager.AddNode(&node1_);
+  manager.RemoveCurrNode();
+  EXPECT_TRUE(manager.Empty());
+}
+
+TEST_F(VirtualSchedulerTest, GetAndRemoveMultipleComopsiteNodeManager) {
+  CompositeNodeManager manager;
+  manager.Init(&node_states_);
+
+  // Add the nodes to LIFOManager.
+  manager.AddNode(&node1_);
+  manager.AddNode(&node2_);
+  manager.AddNode(&node3_);
+  manager.AddNode(&node4_);
+
+  // Keep checking current node as nodes are removed and added.
+  EXPECT_EQ("Node4", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  EXPECT_EQ("Node3", manager.GetCurrNode()->name());
+  manager.AddNode(&node5_);
+  // GetCurrNode()  should return the same node even if some nodes are added,
+  // until RemoveCurrNode() is called.
+  EXPECT_EQ("Node3", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  EXPECT_EQ("Node5", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  EXPECT_EQ("Node2", manager.GetCurrNode()->name());
+  manager.AddNode(&node6_);
+  EXPECT_EQ("Node2", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  EXPECT_EQ("Node6", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  EXPECT_EQ("Node1", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  EXPECT_TRUE(manager.Empty());
+}
+
+TEST_F(VirtualSchedulerTest, MultiDeviceSendRecvComopsiteNodeManager) {
+  CompositeNodeManager manager;
+  manager.Init(&node_states_);
+  // Additional nodes on kCPU1
+  NodeDef node7;
+  NodeDef node8;
+  NodeDef node9;
+  NodeSetUp("Node7", kConv2D, kCPU1, 1001, &node7);
+  NodeSetUp("Node8", kConv2D, kCPU1, 2001, &node8);
+  NodeSetUp("Node9", kConv2D, kCPU1, 3001, &node9);
+
+  // Send and Recv nodes.
+  NodeDef send1;
+  NodeDef send2;
+  NodeDef recv1;
+  NodeDef recv2;
+  NodeSetUp("Send1", kSend, kChannelFrom0To1, 2002, &send1);
+  NodeSetUp("Send2", kSend, kChannelFrom1To0, 2005, &send2);
+  NodeSetUp("Recv1", kRecv, kCPU0, 2003, &recv1);
+  NodeSetUp("Recv2", kRecv, kCPU1, 2004, &recv2);
+
+  // Insert nodes.
+  manager.AddNode(&node1_);
+  manager.AddNode(&node2_);
+  manager.AddNode(&node3_);
+  manager.AddNode(&node4_);
+  manager.AddNode(&node5_);
+  manager.AddNode(&node6_);
+  manager.AddNode(&node7);
+  manager.AddNode(&node8);
+  manager.AddNode(&node9);
+  manager.AddNode(&send1);
+  manager.AddNode(&send2);
+  manager.AddNode(&recv1);
+  manager.AddNode(&recv2);
+
+  // on kCPU0; last one is node6_, on kCPU1: last one is node9;
+  // so choose one that has earliest time_ready among node6_, node9,
+  // Send1, Send2, Recv1, and Recv2.
+  EXPECT_EQ("Node6", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  // Then, the next one on kCPU0 is node5_; choose the earliest time_ready node
+  // among node5_, node9, Send1, Send2, Recv1, and Recv2.
+  EXPECT_EQ("Node5", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  // Next, choose among node4_, node9, Send1, Send2, Recv1, and Recv2.
+  EXPECT_EQ("Send1", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  // Next, choose among node4_, node9, Sen2, Recv1, and Recv2.
+  EXPECT_EQ("Recv1", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  // Next, choose among node4_, node9, Send2, and Recv2.
+  EXPECT_EQ("Recv2", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  // Next, choose among node4_, node9, and Send2.
+  EXPECT_EQ("Send2", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  // Next, choose between node4_, node9.
+  EXPECT_EQ("Node4", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  // Next, choose between node3_, node9.
+  EXPECT_EQ("Node9", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  // Next, choose between node3_, node8.
+  EXPECT_EQ("Node8", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  // Next, choose between node3_, node7.
+  EXPECT_EQ("Node7", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  // Then, just the nodes on kCPU1 -- LIFO.
+  EXPECT_EQ("Node3", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  EXPECT_EQ("Node2", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  EXPECT_EQ("Node1", manager.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  EXPECT_TRUE(manager.Empty());
+}
+
+TEST_F(VirtualSchedulerTest, DeterminismInCompositeNodeManager) {
+  CompositeNodeManager manager;
+  manager.Init(&node_states_);
+  CompositeNodeManager manager2;
+  manager2.Init(&node_states_);
+
+  // 6 nodes with same time_ready.
+  NodeDef node7;
+  NodeDef node8;
+  NodeDef node9;
+  NodeDef node10;
+  NodeDef node11;
+  NodeDef node12;
+  NodeSetUp("Node7", kConv2D, kCPU0, 1000, &node7);
+  NodeSetUp("Node8", kSend, kCPU0, 1000, &node8);
+  NodeSetUp("Node9", kRecv, kCPU0, 1000, &node9);
+  NodeSetUp("Node10", kConv2D, kCPU0, 999, &node10);
+  NodeSetUp("Node11", kRecv, kCPU0, 999, &node11);
+  NodeSetUp("Node12", kConv2D, kCPU1, 1000, &node12);
+
+  // Add Nodes 7 to 9 to manager.
+  manager.AddNode(&node7);
+  manager.AddNode(&node8);
+  manager.AddNode(&node9);
+
+  // It should return _Send, Recv, and the other op order, when the candidate
+  // nodes have same time_ready.
+  EXPECT_EQ("Node8", manager.GetCurrNode()->name());
+  EXPECT_EQ(kSend, manager.GetCurrNode()->op());
+  manager.RemoveCurrNode();
+  EXPECT_EQ("Node9", manager.GetCurrNode()->name());
+  EXPECT_EQ(kRecv, manager.GetCurrNode()->op());
+  manager.RemoveCurrNode();
+  EXPECT_EQ("Node7", manager.GetCurrNode()->name());
+  EXPECT_EQ(kConv2D, manager.GetCurrNode()->op());
+  manager.RemoveCurrNode();
+  EXPECT_TRUE(manager.Empty());
+
+  // Add Nodes 7 to 9 to manager, but in a different order.
+  manager.AddNode(&node9);
+  manager.AddNode(&node8);
+  manager.AddNode(&node7);
+
+  // Expect same order (_Send, _Recv, and the other op), regardless of Add
+  // order.
+  EXPECT_EQ("Node8", manager.GetCurrNode()->name());
+  EXPECT_EQ(kSend, manager.GetCurrNode()->op());
+  manager.RemoveCurrNode();
+  EXPECT_EQ("Node9", manager.GetCurrNode()->name());
+  EXPECT_EQ(kRecv, manager.GetCurrNode()->op());
+  manager.RemoveCurrNode();
+  EXPECT_EQ("Node7", manager.GetCurrNode()->name());
+  EXPECT_EQ(kConv2D, manager.GetCurrNode()->op());
+  manager.RemoveCurrNode();
+  EXPECT_TRUE(manager.Empty());
+
+  // Conv2D's time_ready < Send's time_ready; Expect Conv2D first.
+  manager.AddNode(&node8);
+  manager.AddNode(&node10);
+  EXPECT_EQ("Node10", manager.GetCurrNode()->name());
+  EXPECT_EQ(kConv2D, manager.GetCurrNode()->op());
+  manager.RemoveCurrNode();
+  EXPECT_EQ("Node8", manager.GetCurrNode()->name());
+  EXPECT_EQ(kSend, manager.GetCurrNode()->op());
+  manager.RemoveCurrNode();
+  EXPECT_TRUE(manager.Empty());
+
+  // Recv's time_ready < Send' time_ready; Expect Recv first.
+  manager.AddNode(&node11);
+  manager.AddNode(&node8);
+  EXPECT_EQ("Node11", manager.GetCurrNode()->name());
+  EXPECT_EQ(kRecv, manager.GetCurrNode()->op());
+  manager.RemoveCurrNode();
+  EXPECT_EQ("Node8", manager.GetCurrNode()->name());
+  EXPECT_EQ(kSend, manager.GetCurrNode()->op());
+  manager.RemoveCurrNode();
+  EXPECT_TRUE(manager.Empty());
+
+  // Node7 and 12 are normal ops with the same time_ready, placed on different
+  // devices. These two nodes are added to manager and manager2, but in
+  // different orders; Expect GetCurrNode() returns the nodes in the same order.
+  manager.AddNode(&node7);
+  manager.AddNode(&node12);
+
+  manager2.AddNode(&node12);
+  manager2.AddNode(&node7);
+
+  EXPECT_EQ(manager.GetCurrNode()->name(), manager2.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  manager2.RemoveCurrNode();
+  EXPECT_EQ(manager.GetCurrNode()->name(), manager2.GetCurrNode()->name());
+  manager.RemoveCurrNode();
+  manager2.RemoveCurrNode();
+  EXPECT_TRUE(manager.Empty());
+}
+
 // Create small graph, run predict costs on it, make sure the costs from the
 // summary match the hand-calculated costs.
 TEST_F(VirtualSchedulerTest, SummaryCostTest) {
@@ -1580,7 +1873,7 @@ TEST_F(VirtualSchedulerTest, WhileLoop) {
   EXPECT_NE(exit_start_micro, exit_1_start_micro);
 
   // Check dependency among the nodes; no matter what scheduling mechanism we
-  // use, the scheduled ops should follow these depedency chains.
+  // use, the scheduled ops should follow these dependency chains.
   // Note that currently, VirtualScheduler executes while/Merge twice; hence,
   // we're not testing dependency chains related to while/Merge.
   // TODO(dyoon): after fixing while loop behavior correctly (run nodes in the
@@ -1634,20 +1927,20 @@ TEST_F(VirtualSchedulerTest, InterDeviceTransfer) {
     const auto& name = x.first;
     const auto& node_info = x.second;
     const auto& op = node_info.op_info.op();
-    if (op == "_Recv") {
+    if (op == kRecv) {
       recv_op_names[get_port_num(name)] = name;
-    } else if (op == "_Send") {
+    } else if (op == kSend) {
       send_op_names[get_port_num(name)] = name;
     }
     op_count[op]++;
   }
 
   // Same number of _Send and _Recv.
-  EXPECT_EQ(op_count.at("_Send"), op_count.at("_Recv"));
+  EXPECT_EQ(op_count.at(kSend), op_count.at(kRecv));
 
   // Expect 4 Send and Recvs each: port 0, 1, and, 2, and control dependency.
-  EXPECT_EQ(op_count.at("_Recv"), 4);
-  EXPECT_EQ(op_count.at("_Send"), 4);
+  EXPECT_EQ(op_count.at(kRecv), 4);
+  EXPECT_EQ(op_count.at(kSend), 4);
 
   // Helper lambda for extracting output Tensor size.
   auto get_output_size = [this, ops_executed](const string& name) -> int64 {
diff --git a/tensorflow/core/grappler/graph_view.cc b/tensorflow/core/grappler/graph_view.cc
index bf8a98a722a1bb87ecf9c3c625a16145d74f9b01..0d3f94854b65cfc06c3d68fc5ac7bc3aa68f9a34 100644
--- a/tensorflow/core/grappler/graph_view.cc
+++ b/tensorflow/core/grappler/graph_view.cc
@@ -24,7 +24,7 @@ GraphView::GraphView(GraphDef* graph) : graph_(graph) {
     auto node = graph_->mutable_node(i);
     auto rslt = nodes_.insert(std::make_pair(node->name(), node));
     // Check that the graph doesn't contain multiple nodes with the same name.
-    CHECK(rslt.second);
+    CHECK(rslt.second) << "Non unique node name detected: " << node->name();
   }
   for (NodeDef& node : *graph_->mutable_node()) {
     for (int i = 0; i < node.input_size(); ++i) {
@@ -38,6 +38,8 @@ GraphView::GraphView(GraphDef* graph) : graph_(graph) {
         input.port_id = -1;
       } else {
         input.port_id = i;
+        num_regular_outputs_[fanin.node] =
+            std::max(num_regular_outputs_[fanin.node], fanin.port_id);
       }
 
       fanouts_[fanin].insert(input);
@@ -80,7 +82,7 @@ GraphView::GetFanout(const GraphView::OutputPort& port) const {
   return it->second;
 }
 
-const std::unordered_set<GraphView::OutputPort, GraphView::HashPort>
+std::unordered_set<GraphView::OutputPort, GraphView::HashPort>
 GraphView::GetFanin(const GraphView::InputPort& port) const {
   std::unordered_set<GraphView::OutputPort, GraphView::HashPort> result;
   if (port.port_id >= 0) {
@@ -118,5 +120,58 @@ const GraphView::OutputPort GraphView::GetRegularFanin(
   return fanin;
 }
 
+std::unordered_set<GraphView::InputPort, GraphView::HashPort>
+GraphView::GetFanouts(const NodeDef& node,
+                      bool include_controlled_nodes) const {
+  std::unordered_set<InputPort, HashPort> result;
+  OutputPort port;
+  port.node = const_cast<NodeDef*>(&node);
+  const int first_port_id = include_controlled_nodes ? -1 : 0;
+  auto it = num_regular_outputs_.find(&node);
+  const int last_port_id = (it != num_regular_outputs_.end()) ? it->second : -1;
+
+  for (int i = first_port_id; i <= last_port_id; ++i) {
+    port.port_id = i;
+    auto it = fanouts_.find(port);
+    if (it != fanouts_.end()) {
+      result.insert(it->second.begin(), it->second.end());
+    }
+  }
+  return result;
+}
+
+std::unordered_set<GraphView::OutputPort, GraphView::HashPort>
+GraphView::GetFanins(const NodeDef& node,
+                     bool include_controlling_nodes) const {
+  std::unordered_set<OutputPort, HashPort> result;
+  for (int i = 0; i < node.input_size(); ++i) {
+    OutputPort fanin;
+    string fanin_name = ParseNodeName(node.input(i), &fanin.port_id);
+    if (fanin.port_id < 0) {
+      if (!include_controlling_nodes) {
+        break;
+      }
+    }
+    auto it = nodes_.find(fanin_name);
+    if (it != nodes_.end()) {
+      fanin.node = it->second;
+      result.insert(fanin);
+    }
+  }
+  return result;
+}
+
+int GraphView::NumFanins(const NodeDef& node,
+                         bool include_controlling_nodes) const {
+  int count = 0;
+  for (const string& input : node.input()) {
+    if (!include_controlling_nodes && IsControlInput(input)) {
+      break;
+    }
+    count += 1;
+  }
+  return count;
+}
+
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/graph_view.h b/tensorflow/core/grappler/graph_view.h
index a24310ad1a40b7e84e2fa67686c1bf0575ac5881..f4e2de75a60182f3b2bbc366c076052bd0fae118 100644
--- a/tensorflow/core/grappler/graph_view.h
+++ b/tensorflow/core/grappler/graph_view.h
@@ -29,8 +29,8 @@ namespace grappler {
 class GraphView {
  public:
   struct Port {
-    NodeDef* node;
-    int port_id;
+    NodeDef* node = nullptr;
+    int port_id = -1;
 
     bool operator==(const Port& other) const {
       return node == other.node && port_id == other.port_id;
@@ -51,19 +51,33 @@ class GraphView {
   // used to access the controlling nodes (i.e. the nodes connected to node_name
   // through an incoming control dependency).
   InputPort GetInputPort(const string& node_name, int port_id) const;
-  // Get the specified input port. Note that the special '-1' port_id can be
+  // Get the specified output port. Note that the special '-1' port_id can be
   // used to access the controlled nodes (i.e. the nodes connected to node_name
   // through an outgoing control dependency).
-
-  // Special case: regular (i.e. non-control) ports can only have one fanin.
   OutputPort GetOutputPort(const string& node_name, int port_id) const;
 
+  // Get the input (resp. output) port(s) in the immediate fanout (resp. fanin)
+  // of an output (resp. input) port.
   const std::unordered_set<InputPort, HashPort>& GetFanout(
       const OutputPort& port) const;
-  const std::unordered_set<OutputPort, HashPort> GetFanin(
+  std::unordered_set<OutputPort, HashPort> GetFanin(
       const InputPort& port) const;
+  // Special case: regular (i.e. non-control) input ports can only have one
+  // fanin.
   const OutputPort GetRegularFanin(const InputPort& port) const;
 
+  // Get all the input (resp. output) ports in the immediate fanout (resp fanin)
+  // of a node. Include the controlling nodes iff include_controlling_nodes is
+  // true.
+  std::unordered_set<InputPort, HashPort> GetFanouts(
+      const NodeDef& node, bool include_controlled_nodes) const;
+  std::unordered_set<OutputPort, HashPort> GetFanins(
+      const NodeDef& node, bool include_controlling_nodes) const;
+
+  // Get the number of ports in the immediate fanin of a node. Count the
+  // controlling nodes iff include_controlling_nodes is true.
+  int NumFanins(const NodeDef& node, bool include_controlling_nodes) const;
+
  private:
   GraphDef* graph_;
   std::unordered_map<string, NodeDef*> nodes_;
@@ -71,7 +85,7 @@ class GraphView {
   std::unordered_map<OutputPort, std::unordered_set<InputPort, HashPort>,
                      HashPort>
       fanouts_;
-  std::unordered_map<NodeDef*, std::unordered_set<NodeDef*>> controlled_nodes_;
+  std::unordered_map<const NodeDef*, int> num_regular_outputs_;
 };
 
 }  // end namespace grappler
diff --git a/tensorflow/core/grappler/graph_view_test.cc b/tensorflow/core/grappler/graph_view_test.cc
index 15bed07d017a18d53973da012e7add4085380a74..958eb921fb72c8ff16657c1c7d6269878a5a69ae 100644
--- a/tensorflow/core/grappler/graph_view_test.cc
+++ b/tensorflow/core/grappler/graph_view_test.cc
@@ -58,6 +58,22 @@ TEST_F(GraphViewTest, BasicGraph) {
       EXPECT_FALSE(true);
     }
   }
+
+  const NodeDef* add_node = graph.GetNode("AddN");
+  EXPECT_NE(nullptr, add_node);
+  string fanouts;
+  for (const auto& fo : graph.GetFanouts(*add_node, false)) {
+    strings::StrAppend(&fanouts,
+                       strings::StrCat(fo.node->name(), ":", fo.port_id, " "));
+  }
+  EXPECT_EQ("AddN_2:0 AddN_3:0 ", fanouts);
+
+  string fanins;
+  for (const auto& fi : graph.GetFanins(*add_node, false)) {
+    strings::StrAppend(&fanins,
+                       strings::StrCat(fi.node->name(), ":", fi.port_id, " "));
+  }
+  EXPECT_EQ("Square_1:0 Square:0 ", fanins);
 }
 
 TEST_F(GraphViewTest, ControlDependencies) {
diff --git a/tensorflow/core/grappler/grappler_item_builder.cc b/tensorflow/core/grappler/grappler_item_builder.cc
index 36c7f92c49e74244494af1c4ba5e45c23f6f49b0..7a9ad50519c6ace696cb615d1b6c5855589a429f 100644
--- a/tensorflow/core/grappler/grappler_item_builder.cc
+++ b/tensorflow/core/grappler/grappler_item_builder.cc
@@ -126,9 +126,6 @@ Status OptimizeGraph(const GraphDef& graph_def_arg, GraphDef* output_graph_def,
   graph_ctor_opts.allow_internal_ops = true;
   graph_ctor_opts.expect_device_spec = false;
   std::unique_ptr<Graph> graphptr(new Graph(function_library));
-  // Populate default attrs to the NodeDefs in the GraphDef.
-  TF_RETURN_IF_ERROR(
-      AddDefaultAttrsToGraphDef(&graph_def, *graphptr->op_registry(), 0));
 
   TF_RETURN_IF_ERROR(
       ConvertGraphDefToGraph(graph_ctor_opts, graph_def, graphptr.get()));
@@ -138,7 +135,10 @@ Status OptimizeGraph(const GraphDef& graph_def_arg, GraphDef* output_graph_def,
   optimizer.Optimize(flr, env, devices[0], &graphptr, /*shape_map=*/nullptr);
   graphptr->ToGraphDef(output_graph_def);
 
-  return Status::OK();
+  // The default values of attributes might have been stripped by the optimizer.
+  // Add them back.
+  return AddDefaultAttrsToGraphDef(output_graph_def, *graphptr->op_registry(),
+                                   0);
 }
 
 // Applies the same graph pruning logic to the graph as Session.Run in TF.
@@ -173,7 +173,7 @@ std::unique_ptr<GrapplerItem> GrapplerItemFromMetaGraphDef(
                  << ", skipping this input.";
       return nullptr;
     }
-    LOG(INFO) << "Will use feed node " << feed_name;
+    VLOG(1) << "Will use feed node " << feed_name;
     new_item->feed.emplace_back(feed_name, Tensor());
   }
 
@@ -188,7 +188,7 @@ std::unique_ptr<GrapplerItem> GrapplerItemFromMetaGraphDef(
                      << ", skipping this input";
           return nullptr;
         }
-        LOG(INFO) << "Will use fetch node " << name;
+        VLOG(1) << "Will use fetch node " << name;
         new_item->fetch.push_back(name);
       }
     }
@@ -449,6 +449,18 @@ std::unique_ptr<GrapplerItem> GrapplerItemFromMetaGraphDef(
     new_item->save_restore_loc_tensor = saver.filename_tensor_name();
   }
 
+  // Instantiate all the missing attributes with their default values.
+  Status attr_status = AddDefaultAttrsToGraphDef(
+      &new_item->graph,
+      FunctionLibraryDefinition(OpRegistry::Global(),
+                                new_item->graph.library()),
+      0);
+  if (!attr_status.ok()) {
+    LOG(ERROR) << "Failed to instantiate default attribute values: "
+               << attr_status.error_message();
+    return nullptr;
+  }
+
   // Optimize the graph (function inlining, l1 optimizations, etc).
   VLOG(1) << "Number of nodes in graph before OptimizeGraph: "
           << new_item->graph.node_size();
@@ -498,5 +510,103 @@ std::unique_ptr<GrapplerItem> GrapplerItemFromMetaGraphDef(
   return new_item;
 }
 
+std::unique_ptr<GrapplerItem> GrapplerItemFromFunctionDef(
+    const string& id, const FunctionDef& func,
+    const std::unordered_map<string, AttrValue>& func_attr) {
+  if (id.empty()) {
+    LOG(ERROR) << "id must be non-empty.";
+    return nullptr;
+  }
+  std::unique_ptr<GrapplerItem> new_item(new GrapplerItem());
+  new_item->id = id;
+
+  std::unordered_map<string, string> port_map;
+
+  // Add the function inputs as placeholder
+  for (const auto& inp : func.signature().input_arg()) {
+    NodeDef* ph = new_item->graph.add_node();
+    ph->set_name(inp.name());
+    ph->set_op("Placeholder");
+    if (inp.type() != DT_INVALID) {
+      (*ph->mutable_attr())["T"].set_type(inp.type());
+    } else {
+      auto it = func_attr.find(inp.type_attr());
+      if (it == func_attr.end()) {
+        LOG(ERROR) << "Unknown type attribute " << inp.type_attr()
+                   << " for function input " << inp.name();
+        return nullptr;
+      } else {
+        (*ph->mutable_attr())["T"] = it->second;
+      }
+    }
+    port_map[inp.name()] = inp.name();
+  }
+
+  // Add the function body to the graph.
+  for (const NodeDef& node : func.node_def()) {
+    NodeDef* new_node = new_item->graph.add_node();
+    *new_node = node;
+    // Replace the placeholder attribute values with the specified value.
+    for (auto& attr : *new_node->mutable_attr()) {
+      const string& ph_name = attr.second.placeholder();
+      auto it = func_attr.find(ph_name);
+      if (it != func_attr.end()) {
+        attr.second = it->second;
+      }
+    }
+
+    // Functions use a custom format to encode connectivity. Map these custom
+    // strings to regular ones.
+    const OpDef* op_def = nullptr;
+    Status status = OpRegistry::Global()->LookUpOpDef(node.op(), &op_def);
+    if (!status.ok()) {
+      LOG(ERROR) << "Op " << node.op() << " not registered: " << status;
+      return nullptr;
+    }
+    tensorflow::NameRangeMap inputs;
+    tensorflow::NameRangeMap outputs;
+    status = tensorflow::NameRangesForNode(node, *op_def, &inputs, &outputs);
+    if (!status.ok()) {
+      LOG(ERROR) << "Op " << node.op() << " invalid: " << status;
+      return nullptr;
+    }
+    for (const auto& name_range : outputs) {
+      string port_prefix =
+          strings::StrCat(node.name(), ":", name_range.first, ":");
+      int index_start = name_range.second.first;
+      int index_end = name_range.second.second;
+      for (int i = index_start; i < index_end; ++i) {
+        string port_id = strings::StrCat(port_prefix, i - index_start);
+        string port_name = strings::StrCat(node.name(), ":", i);
+        port_map[port_id] = port_name;
+      }
+    }
+  }
+
+  for (auto& node : *new_item->graph.mutable_node()) {
+    // Rewrite the inputs to use the normal naming convention.
+    for (int i = 0; i < node.input_size(); ++i) {
+      const string& input = node.input(i);
+      auto it = port_map.find(input);
+      if (it == port_map.end()) {
+        LOG(ERROR) << "Unknown input: " << input;
+        return nullptr;
+      }
+      node.set_input(i, it->second);
+    }
+  }
+
+  // Add the function outputs to the list of fetch nodes.
+  for (const auto& out : func.signature().output_arg()) {
+    new_item->fetch.emplace_back(out.name());
+  }
+  // Add the function inputs to the list of feeds.
+  for (const auto& inp : func.signature().input_arg()) {
+    new_item->feed.emplace_back(inp.name(), Tensor());
+  }
+
+  return new_item;
+}
+
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/grappler_item_builder.h b/tensorflow/core/grappler/grappler_item_builder.h
index 85151aabea107d40d7770da2ec398c1d305355a4..fa6f9faa099cafb6e1fe235bfd36fc8ad0d15c14 100644
--- a/tensorflow/core/grappler/grappler_item_builder.h
+++ b/tensorflow/core/grappler/grappler_item_builder.h
@@ -19,6 +19,7 @@ limitations under the License.
 #include <memory>
 #include <set>
 #include <string>
+#include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/grappler/grappler_item.h"
 
 namespace tensorflow {
@@ -57,6 +58,12 @@ struct ItemConfig {
 std::unique_ptr<GrapplerItem> GrapplerItemFromMetaGraphDef(
     const string& id, const MetaGraphDef& meta_graph, const ItemConfig& cfg);
 
+// Factory method for creating a GrapplerItem from a FunctionDef.
+// Returns nullptr if the given function def cannot be converted.
+std::unique_ptr<GrapplerItem> GrapplerItemFromFunctionDef(
+    const string& id, const FunctionDef& func,
+    const std::unordered_map<string, AttrValue>& func_attr);
+
 }  // end namespace grappler
 }  // end namespace tensorflow
 
diff --git a/tensorflow/core/grappler/grappler_item_builder_test.cc b/tensorflow/core/grappler/grappler_item_builder_test.cc
index 4272179d3cbef35362dc3330b5d1b3076df9bdb1..87377a02583d816ec87900750c54f99c666f24c9 100644
--- a/tensorflow/core/grappler/grappler_item_builder_test.cc
+++ b/tensorflow/core/grappler/grappler_item_builder_test.cc
@@ -19,8 +19,10 @@ limitations under the License.
 #include "tensorflow/cc/gradients/grad_testutil.h"
 #include "tensorflow/cc/ops/functional_ops.h"
 #include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/core/framework/function_testlib.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/node_def_util.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
 #include "tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/io/path.h"
@@ -253,6 +255,150 @@ TEST_F(GrapplerItemBuilderTest, AssetFilepathOverrideTest_FileNotAccessible) {
   ASSERT_TRUE(item == nullptr);
 }
 
+TEST_F(GrapplerItemBuilderTest, GraphWithFunctions) {
+  MetaGraphDef meta_graph;
+  // y = XTimesTwo(x)
+  constexpr char device[] = "/cpu:0";
+  *meta_graph.mutable_graph_def() = test::function::GDef(
+      {test::function::NDef("x", "Const", {}, {{"dtype", DT_FLOAT}}, device),
+       test::function::NDef("y", "XTimesTwo", {"x"}, {{"T", DT_FLOAT}},
+                            device)},
+      // FunctionLib
+      {
+          test::function::XTimesTwo(),
+      });
+
+  CollectionDef train_op;
+  train_op.mutable_node_list()->add_value("y");
+  (*meta_graph.mutable_collection_def())["train_op"] = train_op;
+
+  ItemConfig cfg;
+  cfg.inline_functions = false;
+
+  std::unique_ptr<GrapplerItem> item =
+      GrapplerItemFromMetaGraphDef("0", meta_graph, cfg);
+  ASSERT_TRUE(item != nullptr);
+}
+
+TEST_F(GrapplerItemBuilderTest, FromSimpleFunctionDef) {
+  const Tensor kTwo = test::AsScalar<int64>(2);
+  FunctionDef func = FunctionDefHelper::Define(
+      // Name
+      "XTimesTwo",
+      // Args
+      {"x: T"},
+      // Return values
+      {"y: T"},
+      // Attr def
+      {"T: {float, double, int32, int64}"},
+      // Nodes
+      {
+          {{"two"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_INT64}}},
+          {{"scale"}, "Cast", {"two"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}},
+          {{"y"}, "Mul", {"x", "scale"}, {{"T", "$T"}}},
+      });
+
+  std::unordered_map<string, AttrValue> func_attr;
+  func_attr["T"].set_type(DT_FLOAT);
+  std::unique_ptr<GrapplerItem> item =
+      GrapplerItemFromFunctionDef("test", func, func_attr);
+  CHECK(item);
+  EXPECT_EQ(4, item->graph.node_size());
+  EXPECT_EQ(std::vector<string>({"y"}), item->fetch);
+  EXPECT_EQ(1, item->feed.size());
+  EXPECT_EQ("x", item->feed[0].first);
+
+  for (const NodeDef &node : item->graph.node()) {
+    if (node.name() == "x") {
+      EXPECT_EQ("Placeholder", node.op());
+      EXPECT_EQ(DT_FLOAT, node.attr().at("T").type());
+      EXPECT_EQ(0, node.input_size());
+    } else if (node.name() == "two") {
+      EXPECT_EQ("Const", node.op());
+      EXPECT_EQ(0, node.input_size());
+    } else if (node.name() == "scale") {
+      EXPECT_EQ("Cast", node.op());
+      EXPECT_EQ(DT_FLOAT, node.attr().at("DstT").type());
+      EXPECT_EQ(1, node.input_size());
+      EXPECT_EQ("two:0", node.input(0));
+    } else if (node.name() == "y") {
+      EXPECT_EQ("Mul", node.op());
+      EXPECT_EQ(DT_FLOAT, node.attr().at("T").type());
+      EXPECT_EQ(2, node.input_size());
+      EXPECT_EQ("x", node.input(0));
+      EXPECT_EQ("scale:0", node.input(1));
+    }
+  }
+}
+
+TEST_F(GrapplerItemBuilderTest, FromFunctionDefWithMultiOutputNodes) {
+  // Gradient graph for the Subtract operation
+  std::vector<FunctionDefHelper::Node> nodes = {
+      {{"sx"}, "Shape", {"x"}},
+      {{"sy"}, "Shape", {"y"}},
+      {{"gx"}, "Identity", {"dz"}},
+      {{"gy"}, "Neg", {"dz"}},
+      {{"rx", "ry"}, "BroadcastGradientArgs", {"sx", "sy"}},
+      {{"sum_gx"}, "Sum", {"gx", "rx"}},
+      {{"dx"}, "Reshape", {"sum_gx", "sx"}},
+      {{"sum_gy"}, "Sum", {"gy", "ry"}},
+      {{"dy"}, "Reshape", {"sum_gy", "sy"}},
+  };
+
+  for (auto &n : nodes) {
+    // "BroadcastGradientArgs" doesn't need any attrs.
+    if (n.attr.empty() && n.op != "BroadcastGradientArgs") {
+      n.attr = {{"T", "$T"}};
+    }
+  }
+  FunctionDef func = FunctionDefHelper::Define(
+      // Name
+      "SubGrad",
+      // Arg defs
+      {"x: T", "y: T", "dz: T"},
+      // Ret val defs
+      {"dx: T", "dy: T"},
+      // Attr defs
+      {{"T: {half, float, double}"}},
+      // Nodes
+      nodes);
+
+  std::unordered_map<string, AttrValue> func_attr;
+  func_attr["T"].set_type(DT_FLOAT);
+  std::unique_ptr<GrapplerItem> item =
+      GrapplerItemFromFunctionDef("test", func, func_attr);
+  CHECK(item);
+  EXPECT_EQ(12, item->graph.node_size());
+  EXPECT_EQ(std::vector<string>({"dx", "dy"}), item->fetch);
+  EXPECT_EQ(3, item->feed.size());
+  EXPECT_EQ("x", item->feed[0].first);
+  EXPECT_EQ("y", item->feed[1].first);
+  EXPECT_EQ("dz", item->feed[2].first);
+
+  for (const NodeDef &node : item->graph.node()) {
+    if (node.name() == "x" || node.name() == "y" || node.name() == "dz") {
+      EXPECT_EQ("Placeholder", node.op());
+      EXPECT_EQ(DT_FLOAT, node.attr().at("T").type());
+      EXPECT_EQ(0, node.input_size());
+    } else if (node.name() == "rx") {
+      EXPECT_EQ("BroadcastGradientArgs", node.op());
+      EXPECT_EQ(2, node.input_size());
+      EXPECT_EQ("sx:0", node.input(0));
+      EXPECT_EQ("sy:0", node.input(1));
+    } else if (node.name() == "sum_gx") {
+      EXPECT_EQ("Sum", node.op());
+      EXPECT_EQ(2, node.input_size());
+      EXPECT_EQ("gx:0", node.input(0));
+      EXPECT_EQ("rx:0", node.input(1));
+    } else if (node.name() == "sum_gy") {
+      EXPECT_EQ("Sum", node.op());
+      EXPECT_EQ(2, node.input_size());
+      EXPECT_EQ("gy:0", node.input(0));
+      EXPECT_EQ("rx:1", node.input(1));
+    }
+  }
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.cc b/tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.cc
index 6d25556770d13058ba65045eff787b12c0ca12de..ec54bd5c7598a5acb5bf653bb2902f6c3aba38f6 100644
--- a/tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.cc
+++ b/tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.cc
@@ -31,8 +31,6 @@ namespace {
 GraphDef CreateGraphDef(int num_stages, int width, int tensor_size,
                         bool use_multiple_devices, bool insert_queue,
                         const std::vector<string>& device_names) {
-  CHECK_GE(device_names.size(), width);
-
   using namespace ::tensorflow::ops;  // NOLINT(build/namespaces)
 
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
@@ -49,13 +47,17 @@ GraphDef CreateGraphDef(int num_stages, int width, int tensor_size,
     std::vector<Output> this_stage;
     for (int j = 0; j < width; j++) {
       if (last_stage.size() == 1) {
-        Output unary_op =
-            Square(s.WithDevice(device_names[use_multiple_devices ? j : 0]),
-                   last_stage[0]);
+        Output unary_op = Square(
+            s.WithDevice(
+                device_names[use_multiple_devices ? j % device_names.size()
+                                                  : 0]),
+            last_stage[0]);
         this_stage.push_back(unary_op);
       } else {
         Output combine =
-            AddN(s.WithDevice(device_names[use_multiple_devices ? j : 0]),
+            AddN(s.WithDevice(
+                     device_names[use_multiple_devices ? j % device_names.size()
+                                                       : 0]),
                  last_stage);
         this_stage.push_back(combine);
       }
diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index 15fcaa857e3024bc91a7629fc14d10d57c624c8e..fdf4540540b4b9f3d64ea767240ca4ea0c353d48 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include <unordered_set>
 
+#include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/grappler/op_types.h"
@@ -25,21 +26,58 @@ namespace tensorflow {
 namespace grappler {
 
 bool IsAdd(const NodeDef& node) {
-  return node.op() == "Add" || node.op() == "AddV2";
+  if (node.op() == "AddV2" || node.op() == "Add") {
+    DataType type = node.attr().at("T").type();
+    return type != DT_STRING;
+  }
+  return false;
 }
 
 bool IsAddN(const NodeDef& node) { return node.op() == "AddN"; }
 
+bool IsAll(const NodeDef& node) { return node.op() == "All"; }
+
+bool IsAngle(const NodeDef& node) { return node.op() == "Angle"; }
+
+bool IsAny(const NodeDef& node) { return node.op() == "Any"; }
+
+bool IsAnyDiv(const NodeDef& node) {
+  return node.op() == "RealDiv" || node.op() == "Div" ||
+         node.op() == "FloorDiv" || node.op() == "TruncateDiv";
+}
+
+bool IsApproximateEqual(const NodeDef& node) {
+  return node.op() == "ApproximateEqual";
+}
+
 bool IsAvgPoolGrad(const NodeDef& node) { return node.op() == "AvgPoolGrad"; }
 
 bool IsAssert(const NodeDef& node) { return node.op() == "Assert"; }
 
+bool IsAtan2(const NodeDef& node) { return node.op() == "Atan2"; }
+
+bool IsBetainc(const NodeDef& node) { return node.op() == "Betainc"; }
+
+bool IsBiasAdd(const NodeDef& node) {
+  return node.op() == "BiasAdd" || node.op() == "BiasAddV1";
+}
+
 bool IsBiasAddGrad(const NodeDef& node) { return node.op() == "BiasAddGrad"; }
 
+bool IsBitcast(const NodeDef& node) { return node.op() == "Bitcast"; }
+
+bool IsCast(const NodeDef& node) { return node.op() == "Cast"; }
+
+bool IsComplex(const NodeDef& node) { return node.op() == "Complex"; }
+
+bool IsComplexAbs(const NodeDef& node) { return node.op() == "ComplexAbs"; }
+
 bool IsConcatOffset(const NodeDef& node) { return node.op() == "ConcatOffset"; }
 
 bool IsConstant(const NodeDef& node) { return node.op() == "Const"; }
 
+bool IsConj(const NodeDef& node) { return node.op() == "Conj"; }
+
 bool IsConv2D(const NodeDef& node) { return node.op() == "Conv2D"; }
 
 bool IsConv2DBackpropFilter(const NodeDef& node) {
@@ -69,20 +107,39 @@ bool IsDequeueOp(const NodeDef& node) {
          op == "QueueDequeueUpToV2" || op == "QueueDequeueUpTo";
 }
 
+bool IsDiv(const NodeDef& node) { return node.op() == "Div"; }
+
+bool IsEluGrad(const NodeDef& node) { return node.op() == "EluGrad"; }
+
 bool IsEnter(const NodeDef& node) {
   const auto& op = node.op();
   return op == "Enter" || op == "RefEnter";
 }
 
+bool IsEqual(const NodeDef& node) { return node.op() == "Equal"; }
+
 bool IsExit(const NodeDef& node) {
   const auto& op = node.op();
   return op == "Exit" || op == "RefExit";
 }
 
+bool IsFill(const NodeDef& node) { return node.op() == "Fill"; }
+
+bool IsFloorDiv(const NodeDef& node) { return node.op() == "FloorDiv"; }
+
 bool IsFloorMod(const NodeDef& node) { return node.op() == "FloorMod"; }
 
-bool IsFusedBatchNormGradV1(const NodeDef& node) {
-  return node.op() == "FusedBatchNormGrad";
+bool IsFusedBatchNormGrad(const NodeDef& node) {
+  const auto& op = node.op();
+  return op == "FusedBatchNormGrad" || op == "FusedBatchNormGradV2";
+}
+
+bool IsGreater(const NodeDef& node) { return node.op() == "Greater"; }
+
+bool IsGreaterEqual(const NodeDef& node) { return node.op() == "GreaterEqual"; }
+
+bool IsHistogramSummary(const NodeDef& node) {
+  return node.op() == "HistogramSummary";
 }
 
 bool IsIdentity(const NodeDef& node) {
@@ -90,33 +147,97 @@ bool IsIdentity(const NodeDef& node) {
   return op == "Identity" || op == "RefIdentity";
 }
 
+bool IsIdentityN(const NodeDef& node) {
+  const auto& op = node.op();
+  return op == "IdentityN";
+}
+
+bool IsIgamma(const NodeDef& node) { return node.op() == "Igamma"; }
+
+bool IsIgammac(const NodeDef& node) { return node.op() == "Igammac"; }
+
+bool IsImag(const NodeDef& node) { return node.op() == "Imag"; }
+
+bool IsInvGrad(const NodeDef& node) { return node.op() == "InvGrad"; }
+
+bool IsLess(const NodeDef& node) { return node.op() == "Less"; }
+
+bool IsLessEqual(const NodeDef& node) { return node.op() == "LessEqual"; }
+
+bool IsLogicalAnd(const NodeDef& node) { return node.op() == "LogicalAnd"; }
+
+bool IsLogicalNot(const NodeDef& node) { return node.op() == "LogicalNot"; }
+
+bool IsLogicalOr(const NodeDef& node) { return node.op() == "LogicalOr"; }
+
+bool IsMatMul(const NodeDef& node) {
+  const auto& op = node.op();
+  return op == "MatMul" || op == "BatchMatMul" || op == "QuantizedMatMul" ||
+         op == "SparseMatMul";
+}
+
+bool IsMax(const NodeDef& node) { return node.op() == "Max"; }
+
+bool IsMaximum(const NodeDef& node) { return node.op() == "Maximum"; }
+
+bool IsMean(const NodeDef& node) { return node.op() == "Mean"; }
+
 bool IsMerge(const NodeDef& node) {
-  const auto op = node.op();
+  const auto& op = node.op();
   return op == "Merge" || op == "RefMerge";
 }
 
+bool IsMin(const NodeDef& node) { return node.op() == "Min"; }
+
+bool IsMinimum(const NodeDef& node) { return node.op() == "Minimum"; }
+
+bool IsMirrorPad(const NodeDef& node) { return node.op() == "MirrorPad"; }
+
+bool IsMirrorPadGrad(const NodeDef& node) {
+  return node.op() == "MirrorPadGrad";
+}
+
+bool IsMod(const NodeDef& node) { return node.op() == "Mod"; }
+
 bool IsMul(const NodeDef& node) { return node.op() == "Mul"; }
 
 bool IsNoOp(const NodeDef& node) { return node.op() == "NoOp"; }
 
+bool IsNotEqual(const NodeDef& node) { return node.op() == "NotEqual"; }
+
 bool IsNextIteration(const NodeDef& node) {
   const auto& op = node.op();
   return op == "NextIteration" || op == "RefNextIteration";
 }
 
-bool IsPad(const NodeDef& node) { return node.op() == "Pad"; }
+bool IsPad(const NodeDef& node) {
+  const auto& op = node.op();
+  return op == "Pad" || op == "PadV2";
+}
 
 bool IsPlaceholder(const NodeDef& node) {
-  const auto op = node.op();
+  const auto& op = node.op();
   return op == "Placeholder" || op == "PlaceholderV2" ||
          op == "PlaceholderWithDefault";
 }
 
+bool IsPolygamma(const NodeDef& node) { return node.op() == "Polygamma"; }
+
+bool IsPow(const NodeDef& node) { return node.op() == "Pow"; }
+
+bool IsProd(const NodeDef& node) { return node.op() == "Prod"; }
+
+bool IsReal(const NodeDef& node) { return node.op() == "Real"; }
+
 bool IsRealDiv(const NodeDef& node) { return node.op() == "RealDiv"; }
 
-bool IsReluGrad(const NodeDef& node) { return node.op() == "ReluGrad"; }
+bool IsReciprocalGrad(const NodeDef& node) {
+  return node.op() == "ReciprocalGrad";
+}
 
-bool IsRecv(const NodeDef& node) { return node.op() == "_Recv"; }
+bool IsRecv(const NodeDef& node) {
+  return node.op() == "_Recv" || node.op() == "_HostRecv";
+}
 
 bool IsReduction(const NodeDef& node) {
   const auto& op = node.op();
@@ -124,6 +245,10 @@ bool IsReduction(const NodeDef& node) {
          op == "Mean" || op == "Any" || op == "All";
 }
 
+bool IsReluGrad(const NodeDef& node) { return node.op() == "ReluGrad"; }
+
+bool IsRelu6Grad(const NodeDef& node) { return node.op() == "Relu6Grad"; }
+
 bool IsReshape(const NodeDef& node) { return (node.op() == "Reshape"); }
 
 bool IsRestore(const NodeDef& node) {
@@ -131,12 +256,36 @@ bool IsRestore(const NodeDef& node) {
           node.op() == "RestoreSlice");
 }
 
-bool IsSend(const NodeDef& node) { return node.op() == "_Send"; }
+bool IsReverseV2(const NodeDef& node) { return node.op() == "ReverseV2"; }
+
+bool IsRsqrtGrad(const NodeDef& node) { return node.op() == "RsqrtGrad"; }
+
+bool IsSelect(const NodeDef& node) { return node.op() == "Select"; }
+
+bool IsSeluGrad(const NodeDef& node) { return node.op() == "SeluGrad"; }
+
+bool IsSend(const NodeDef& node) {
+  return node.op() == "_Send" || node.op() == "_HostSend";
+}
+
+bool IsShape(const NodeDef& node) { return node.op() == "Shape"; }
+
+bool IsShapeN(const NodeDef& node) { return node.op() == "ShapeN"; }
+
+bool IsSigmoidGrad(const NodeDef& node) { return node.op() == "SigmoidGrad"; }
 
 bool IsSlice(const NodeDef& node) { return node.op() == "Slice"; }
 
+bool IsSoftplusGrad(const NodeDef& node) { return node.op() == "SoftplusGrad"; }
+
+bool IsSoftsignGrad(const NodeDef& node) { return node.op() == "SoftsignGrad"; }
+
 bool IsSplit(const NodeDef& node) { return node.op() == "Split"; }
 
+bool IsSplitV(const NodeDef& node) { return node.op() == "SplitV"; }
+
+bool IsSqrtGrad(const NodeDef& node) { return node.op() == "SqrtGrad"; }
+
 bool IsSquaredDifference(const NodeDef& node) {
   return node.op() == "SquaredDifference";
 }
@@ -148,6 +297,12 @@ bool IsStopGradient(const NodeDef& node) {
   return op == "StopGradient" || op == "PreventGradient";
 }
 
+bool IsStridedSlice(const NodeDef& node) { return node.op() == "StridedSlice"; }
+
+bool IsStridedSliceGrad(const NodeDef& node) {
+  return node.op() == "StridedSliceGrad";
+}
+
 bool IsSub(const NodeDef& node) { return node.op() == "Sub"; }
 
 bool IsSum(const NodeDef& node) { return node.op() == "Sum"; }
@@ -157,14 +312,34 @@ bool IsSwitch(const NodeDef& node) {
   return op == "Switch" || op == "RefSwitch";
 }
 
+bool IsTanhGrad(const NodeDef& node) { return node.op() == "TanhGrad"; }
+
+bool IsTile(const NodeDef& node) { return node.op() == "Tile"; }
+
 bool IsTranspose(const NodeDef& node) { return node.op() == "Transpose"; }
 
+bool IsTruncateDiv(const NodeDef& node) { return node.op() == "TruncateDiv"; }
+
+bool IsTruncateMod(const NodeDef& node) { return node.op() == "TruncateMod"; }
+
 bool IsVariable(const NodeDef& node) {
   const auto& op = node.op();
   return op == "Variable" || op == "VariableV2" || op == "AutoReloadVariable" ||
          op == "VarHandleOp" || op == "ReadVariableOp";
 }
 
+bool IsZeta(const NodeDef& node) { return node.op() == "Zeta"; }
+
+namespace {
+bool GetBoolAttr(const NodeDef& node, const string& name) {
+  return node.attr().count(name) > 0 && node.attr().at(name).b();
+}
+}  // namespace
+
+bool IsPersistent(const NodeDef& node) {
+  return IsConstant(node) || IsVariable(node);
+}
+
 bool IsFreeOfSideEffect(const NodeDef& node) {
   // Placeholders must be preserved to keep the graph feedable.
   if (IsPlaceholder(node)) {
@@ -184,6 +359,10 @@ bool IsFreeOfSideEffect(const NodeDef& node) {
       return false;
     }
   }
+  // Some nodes do in-place updates on regular tensor inputs.
+  if (GetBoolAttr(node, "in_place") || GetBoolAttr(node, "inplace")) {
+    return false;
+  }
   return true;
 }
 
@@ -224,5 +403,10 @@ bool IsValuePreserving(const NodeDef& node) {
   return value_preserving_ops.count(node.op()) > 0;
 }
 
+bool HasOpDef(const NodeDef& node) {
+  const OpDef* op_def = nullptr;
+  return OpRegistry::Global()->LookUpOpDef(node.op(), &op_def).ok();
+}
+
 }  // namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h
index b1d81448afb8b2c311d216a9af5d07031ed36b98..9cda40c0a6515caa9754d0c2f4f50a32f9fe8d98 100644
--- a/tensorflow/core/grappler/op_types.h
+++ b/tensorflow/core/grappler/op_types.h
@@ -24,9 +24,22 @@ namespace grappler {
 
 bool IsAdd(const NodeDef& node);
 bool IsAddN(const NodeDef& node);
+bool IsAll(const NodeDef& node);
+bool IsAngle(const NodeDef& node);
+bool IsAny(const NodeDef& node);
+bool IsAnyDiv(const NodeDef& node);
+bool IsApproximateEqual(const NodeDef& node);
 bool IsAvgPoolGrad(const NodeDef& node);
 bool IsAssert(const NodeDef& node);
+bool IsAtan2(const NodeDef& node);
+bool IsBetainc(const NodeDef& node);
+bool IsBiasAdd(const NodeDef& node);
 bool IsBiasAddGrad(const NodeDef& node);
+bool IsBitcast(const NodeDef& node);
+bool IsCast(const NodeDef& node);
+bool IsComplex(const NodeDef& node);
+bool IsComplexAbs(const NodeDef& node);
+bool IsConj(const NodeDef& node);
 bool IsConcatOffset(const NodeDef& node);
 bool IsConstant(const NodeDef& node);
 bool IsConv2D(const NodeDef& node);
@@ -36,34 +49,86 @@ bool IsDepthwiseConv2dNative(const NodeDef& node);
 bool IsDepthwiseConv2dNativeBackpropFilter(const NodeDef& node);
 bool IsDepthwiseConv2dNativeBackpropInput(const NodeDef& node);
 bool IsDequeueOp(const NodeDef& node);
+bool IsDiv(const NodeDef& node);
+bool IsEluGrad(const NodeDef& node);
 bool IsEnter(const NodeDef& node);
+bool IsEqual(const NodeDef& node);
 bool IsExit(const NodeDef& node);
+bool IsFill(const NodeDef& node);
+bool IsFloorDiv(const NodeDef& node);
 bool IsFloorMod(const NodeDef& node);
-bool IsFusedBatchNormGradV1(const NodeDef& node);
+bool IsFusedBatchNormGrad(const NodeDef& node);
+bool IsGreater(const NodeDef& node);
+bool IsGreaterEqual(const NodeDef& node);
+bool IsHistogramSummary(const NodeDef& node);
 bool IsIdentity(const NodeDef& node);
+bool IsIdentityN(const NodeDef& node);
+bool IsIgamma(const NodeDef& node);
+bool IsIgammac(const NodeDef& node);
+bool IsImag(const NodeDef& node);
+bool IsInvGrad(const NodeDef& node);
+bool IsLess(const NodeDef& node);
+bool IsLessEqual(const NodeDef& node);
+bool IsLogicalAnd(const NodeDef& node);
+bool IsLogicalNot(const NodeDef& node);
+bool IsLogicalOr(const NodeDef& node);
+bool IsMax(const NodeDef& node);
+bool IsMaximum(const NodeDef& node);
+bool IsMean(const NodeDef& node);
 bool IsMerge(const NodeDef& node);
+bool IsMin(const NodeDef& node);
+bool IsMinimum(const NodeDef& node);
+bool IsMirrorPad(const NodeDef& node);
+bool IsMirrorPadGrad(const NodeDef& node);
+bool IsMod(const NodeDef& node);
 bool IsMul(const NodeDef& node);
+bool IsMatMul(const NodeDef& node);
 bool IsNextIteration(const NodeDef& node);
 bool IsPad(const NodeDef& node);
 bool IsNoOp(const NodeDef& node);
+bool IsNotEqual(const NodeDef& node);
 bool IsPlaceholder(const NodeDef& node);
+bool IsPolygamma(const NodeDef& node);
+bool IsProd(const NodeDef& node);
+bool IsPow(const NodeDef& node);
+bool IsReal(const NodeDef& node);
 bool IsRealDiv(const NodeDef& node);
+bool IsRelu6Grad(const NodeDef& node);
 bool IsReluGrad(const NodeDef& node);
+bool IsReciprocalGrad(const NodeDef& node);
 bool IsRecv(const NodeDef& node);
 bool IsReduction(const NodeDef& node);
 bool IsReshape(const NodeDef& node);
 bool IsRestore(const NodeDef& node);
+bool IsReverseV2(const NodeDef& node);
+bool IsRsqrtGrad(const NodeDef& node);
+bool IsSelect(const NodeDef& node);
+bool IsSeluGrad(const NodeDef& node);
 bool IsSend(const NodeDef& node);
 bool IsSlice(const NodeDef& node);
+bool IsShape(const NodeDef& node);
+bool IsShapeN(const NodeDef& node);
+bool IsSigmoidGrad(const NodeDef& node);
+bool IsSoftplusGrad(const NodeDef& node);
+bool IsSoftsignGrad(const NodeDef& node);
 bool IsSplit(const NodeDef& node);
+bool IsSplitV(const NodeDef& node);
+bool IsSqrtGrad(const NodeDef& node);
 bool IsSquaredDifference(const NodeDef& node);
 bool IsSqueeze(const NodeDef& node);
 bool IsStopGradient(const NodeDef& node);
+bool IsStridedSlice(const NodeDef& node);
+bool IsStridedSliceGrad(const NodeDef& node);
 bool IsSub(const NodeDef& node);
 bool IsSum(const NodeDef& node);
 bool IsSwitch(const NodeDef& node);
+bool IsTanhGrad(const NodeDef& node);
+bool IsTile(const NodeDef& node);
 bool IsTranspose(const NodeDef& node);
+bool IsTruncateDiv(const NodeDef& node);
+bool IsTruncateMod(const NodeDef& node);
 bool IsVariable(const NodeDef& node);
+bool IsZeta(const NodeDef& node);
 
 // Return true if the op is an aggregation (e.g. Add, AddN).
 // Returns false if it could not be determined to be so.
@@ -73,6 +138,10 @@ bool IsAggregate(const NodeDef& node);
 // Returns false if it could not be determined to be so.
 bool IsCommutative(const NodeDef& node);
 
+// Returns true if the node is known to use persistent memory to store its
+// value.
+bool IsPersistent(const NodeDef& node);
+
 bool IsFreeOfSideEffect(const NodeDef& node);
 bool ModifiesFrameInfo(const NodeDef& node);
 
@@ -85,6 +154,9 @@ bool IsInvolution(const NodeDef& node);
 // function returns true if the op commutes with all element-wise operations.
 bool IsValuePreserving(const NodeDef& node);
 
+// Returns true if we can find an opdef corresponding to the op of the node.
+bool HasOpDef(const NodeDef& node);
+
 }  // end namespace grappler
 }  // end namespace tensorflow
 
diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD
index 5d9eb8e0b128188be3cdcf8019cb0bca94c6b1cf..791ad34bbed6a4c7d270f3a06ac34ed0f08b9b1a 100644
--- a/tensorflow/core/grappler/optimizers/BUILD
+++ b/tensorflow/core/grappler/optimizers/BUILD
@@ -96,6 +96,7 @@ cc_library(
         ":graph_optimizer",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/grappler:grappler_item",
         "//tensorflow/core/grappler:op_types",
@@ -212,6 +213,7 @@ cc_library(
         "//tensorflow/core/grappler:op_types",
         "//tensorflow/core/grappler:utils",
         "//tensorflow/core/grappler/costs:graph_properties",
+        "//tensorflow/core/grappler/utils:topological_sort",
     ],
 )
 
@@ -230,6 +232,7 @@ tf_cc_test(
         "//tensorflow/core/grappler:grappler_item",
         "//tensorflow/core/grappler:utils",
         "//tensorflow/core/grappler/inputs:trivial_test_graph_input_yielder",
+        "//tensorflow/core/grappler/utils:topological_sort",
     ],
 )
 
@@ -276,6 +279,7 @@ cc_library(
         ":graph_optimizer",
         ":graph_rewriter",
         ":static_schedule",
+        "//tensorflow/core:framework",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/grappler:graph_view",
         "//tensorflow/core/grappler:grappler_item",
@@ -332,6 +336,11 @@ tf_cc_test(
     deps = [
         ":layout_optimizer",
         "//tensorflow/cc:cc_ops",
+        "//tensorflow/cc:cc_ops_internal",
+        "//tensorflow/core:all_kernels",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index 3cfc4f61e43d51a178054926d97f3faeae7746ce..990a07c86c2e144d24505dd45a092884f4ef77bc 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -245,14 +245,34 @@ void SetSourceDataType(DataType dtype, NodeDef* node) {
   SetDataTypeToAttr(dtype, SourceDataTypeAttrName(*node), node);
 }
 
-bool IsNumberType(DataType dtype) {
-  DataTypeVector number_types = NumberTypes();
-  return std::find(number_types.begin(), number_types.end(), dtype) !=
-         number_types.end();
-}
+bool IsNumberType(DataType dtype) { return kNumberTypes.Contains(dtype); }
 
 const char kOutputShapesAttr[] = "_output_shapes";
 
+PartialTensorShape GetInputShape(const string& input, const NodeMap& node_map) {
+  int output_pos;
+  string node_name = ParseNodeName(input, &output_pos);
+  const NodeDef* input_node = node_map.GetNode(node_name);
+  return input_node->attr().at(kOutputShapesAttr).list().shape(output_pos);
+}
+
+bool ShapesEqual(const string& input_x, const string& input_y,
+                 const NodeMap& node_map) {
+  PartialTensorShape x_shape = GetInputShape(input_x, node_map);
+  PartialTensorShape y_shape = GetInputShape(input_y, node_map);
+  if (x_shape.unknown_rank() || y_shape.unknown_rank() ||
+      x_shape.dims() != y_shape.dims()) {
+    return false;
+  }
+  for (int i = 0; i < x_shape.dims(); ++i) {
+    if (x_shape.dim_size(i) == -1 || y_shape.dim_size(i) == -1 ||
+        x_shape.dim_size(i) != y_shape.dim_size(i)) {
+      return false;
+    }
+  }
+  return true;
+}
+
 // Returns whether `reshape` is an identity op. The tensor that `reshape`
 // reshapes is the `output_pos`-th output of node `input`.
 bool ReshapeIsIdentity(const NodeDef& reshape, const NodeDef& input,
@@ -423,7 +443,7 @@ NodeDef* ArithmeticOptimizer::AddNode(const string& name,
       AddPrefixToNodeName(name, kArithmeticOptimizer);
   node_map_->AddNode(NodeName(name_with_prefix), new_node);
   if (node_to_copy != nullptr) {
-    new_node->CopyFrom(*node_to_copy);
+    *new_node = *node_to_copy;
   }
   new_node->set_name(name_with_prefix);
   return new_node;
@@ -494,7 +514,7 @@ void ArithmeticOptimizer::DedupComputations() {
   } while (!stop);
 
   // Delete duplicates
-  if (!duplicates.empty()) {
+  if (fetch_nodes_known_ && !duplicates.empty()) {
     int last = optimized_graph_->node_size() - 1;
     for (auto it = duplicates.rbegin(); it != duplicates.rend(); ++it) {
       int index = *it;
@@ -608,12 +628,11 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
     }
 
     // If the reshape is a no-op, forward its input to its consumers. This is
-    // considered aggressive and turned off by default, because users may state
-    // that the placeholder outputs tensors of shape [M, N] while feeding it
-    // with tensors of shape [M*N] (or worse). The reshape nodes are then
-    // necessary to update the tensor metadata to the required shape.
-    if (opt_level_ == RewriterConfig::AGGRESSIVE &&
-        ReshapeIsIdentity(*reshape, *input, output_pos)) {
+    // considered aggressive, because users may state that the placeholder
+    // outputs tensors of shape [M, N] while feeding it with tensors of shape
+    // [M*N] (or worse). The reshape nodes are then necessary to update the
+    // tensor metadata to the required shape.
+    if (ReshapeIsIdentity(*reshape, *input, output_pos)) {
       return reshape->input(0);
     }
   }
@@ -868,8 +887,11 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
   // multiplication over addition to hoist common factors out of aggregate nodes
   // where all the inputs are Mul nodes. This pattern occurs frequently in
   // regularization terms for the gradients during training.
-  // TODO(rmlarsen): Check shapes and enable for AddN.
-  if (IsAdd(*node) && NumNonControlInputs(*node) > 1 &&
+  // For example, we can rewrite an expression of the form:
+  //   AddN(Mul(x, y1), Mul(y2, x), Mul(x, y3), ... Mul(x, yn))
+  // to the following:
+  //   Mul(x, AddN(y1, y2, y3, ... yn))
+  if (IsAggregate(*node) && NumNonControlInputs(*node) > 1 &&
       !OptimizedNodeExists(StrCat(node->name(), "_hoist_add"))) {
     // Determine the set of common factors if the input nodes are all Mul nodes.
     std::set<string> common_factors;
@@ -899,24 +921,15 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
     }
     if (common_factors.size() == 1) {
       const string& common_factor = *common_factors.begin();
-      // In this case we have an expression of the form
-      //   AddN(Mul(x, y1), Mul(y2, x), Mul(x, y3), ... Mul(x, yn))
-      // that can be rewritten as
-      //   Mul(x, AddN(y1, y2, y3, ... yn))
-
-      // 1. Use a copy of the first Mul node for the outer multiplication.
-      NodeDef* new_mul_node = AddNode(StrCat(node->name(), "_hoist_mul"),
-                                      node_map_->GetNode(node->input(0)));
-      NodeDef* new_add_node = AddNode(StrCat(node->name(), "_hoist_add"), node);
-      new_mul_node->set_device(node->device());
-      new_mul_node->set_input(0, common_factor);
-      node_map_->AddOutput(common_factor, new_mul_node->name());
-      new_mul_node->set_input(1, new_add_node->name());
-      node_map_->AddOutput(new_add_node->name(), new_mul_node->name());
-
-      // 2. Hoist non-shared factors up into the new AddN node.
-      nodes_to_simplify->PushBack(new_add_node);
-      for (int i = 0; i < node->input_size(); ++i) {
+
+      // Gather up the non-shared factors (the y's in the example).
+      // Unless the aggregation is Add, we have to make sure that all the y's
+      // have the same shape since the other aggregation ops do not support
+      // broadcasting.
+      std::vector<string> unique_factors;
+      unique_factors.reserve(node->input_size());
+      bool shapes_match = true;
+      for (int i = 0; i < node->input_size() && shapes_match; ++i) {
         const string& input = node->input(i);
         if (IsControlInput(input)) {
           break;
@@ -924,15 +937,41 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses(
         const NodeDef* mul_node = node_map_->GetNode(input);
         const int unique_factor_index =
             mul_node->input(0) == common_factor ? 1 : 0;
-        const string unique_factor = mul_node->input(unique_factor_index);
-        new_add_node->set_input(i, unique_factor);
+        unique_factors.push_back(mul_node->input(unique_factor_index));
+        if (i > 0 && !IsAdd(*node)) {
+          shapes_match = ShapesEqual(unique_factors.front(),
+                                     unique_factors.back(), *node_map_);
+        }
       }
 
-      // 4. Add frame dependencies that the original node might have had.
-      AddFrameControlDeps(node, {new_add_node, new_mul_node}, common_factor,
-                          {new_add_node});
+      if (shapes_match) {
+        // 1. Use a copy of the first Mul node for the outer multiplication.
+        NodeDef* new_mul_node = AddNode(StrCat(node->name(), "_hoist_mul"),
+                                        node_map_->GetNode(node->input(0)));
+        NodeDef* new_add_node =
+            AddNode(StrCat(node->name(), "_hoist_add"), node);
+        new_mul_node->set_device(node->device());
+        new_mul_node->set_input(0, common_factor);
+        node_map_->AddOutput(common_factor, new_mul_node->name());
+        new_mul_node->set_input(1, new_add_node->name());
+        node_map_->AddOutput(new_add_node->name(), new_mul_node->name());
+
+        // 2. Hoist non-shared factors up into the new AddN node.
+        nodes_to_simplify->PushBack(new_add_node);
+        for (int i = 0; i < node->input_size(); ++i) {
+          const string& input = node->input(i);
+          if (IsControlInput(input)) {
+            break;
+          }
+          new_add_node->set_input(i, unique_factors[i]);
+        }
+
+        // 3. Add frame dependencies that the original node might have had.
+        AddFrameControlDeps(node, {new_add_node, new_mul_node}, common_factor,
+                            {new_add_node});
 
-      return new_mul_node->name();
+        return new_mul_node->name();
+      }
     }
   }
 
@@ -1064,13 +1103,10 @@ Status ArithmeticOptimizer::Optimize(Cluster* /*cluster*/,
   int num_frames;
   TF_RETURN_IF_ERROR(IdentifyFramesWithNodeMap(*optimized_graph_, *node_map_,
                                                &frame_map_, &num_frames));
-  if (opt_level_ == RewriterConfig::AGGRESSIVE) {
-    graph_properties_.reset(new GraphProperties(item));
-    // Shapes are only needed in aggressive mode.
-    TF_RETURN_IF_ERROR(graph_properties_->InferStatically());
-    TF_RETURN_IF_ERROR(
-        graph_properties_->AnnotateOutputShapes(optimized_graph_));
-  }
+  // Shapes are only needed in aggressive mode.
+  graph_properties_.reset(new GraphProperties(item));
+  TF_RETURN_IF_ERROR(graph_properties_->InferStatically(false));
+  TF_RETURN_IF_ERROR(graph_properties_->AnnotateOutputShapes(optimized_graph_));
 
   // Perform the optimizations.
   DedupComputations();
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
index e8a18ff9d9f5db8593c725e382229101e389848f..b5b1ec7021e5b901195bc1e6b6b2247410d5ff1b 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
@@ -32,6 +32,21 @@ string OptimizedName(const string& name) {
   return AddPrefixToNodeName(name, kArithmeticOptimizer);
 }
 
+void VerifyGraphsMatch(const GraphDef& original_graph,
+                       const GraphDef& optimized_graph, int line) {
+  EXPECT_EQ(original_graph.node_size(), optimized_graph.node_size()) << line;
+  for (int i = 0; i < original_graph.node_size(); ++i) {
+    const NodeDef& original = original_graph.node(i);
+    const NodeDef& optimized = optimized_graph.node(i);
+    EXPECT_EQ(original.name(), optimized.name()) << line;
+    EXPECT_EQ(original.op(), optimized.op()) << line;
+    EXPECT_EQ(original.input_size(), optimized.input_size()) << line;
+    for (int j = 0; j < original.input_size(); ++j) {
+      EXPECT_EQ(original.input(j), optimized.input(j)) << line;
+    }
+  }
+}
+
 class ArithmeticOptimizerTest : public ::testing::Test {};
 
 TEST_F(ArithmeticOptimizerTest, NoOp) {
@@ -44,18 +59,7 @@ TEST_F(ArithmeticOptimizerTest, NoOp) {
   GraphDef output;
   Status status = optimizer.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
-
-  EXPECT_EQ(item.graph.node_size(), output.node_size());
-  for (int i = 0; i < item.graph.node_size(); ++i) {
-    const NodeDef& original = item.graph.node(i);
-    const NodeDef& optimized = output.node(i);
-    EXPECT_EQ(original.name(), optimized.name());
-    EXPECT_EQ(original.op(), optimized.op());
-    EXPECT_EQ(original.input_size(), optimized.input_size());
-    for (int j = 0; j < original.input_size(); ++j) {
-      EXPECT_EQ(original.input(j), optimized.input(j));
-    }
-  }
+  VerifyGraphsMatch(item.graph, output, __LINE__);
 }
 
 TEST_F(ArithmeticOptimizerTest, OpDedupping) {
@@ -65,6 +69,7 @@ TEST_F(ArithmeticOptimizerTest, OpDedupping) {
   Output div = ops::Div(s.WithOpName("div"), c1, c2);
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  item.fetch = {"div"};
 
   ArithmeticOptimizer optimizer;
   GraphDef output;
@@ -98,6 +103,7 @@ TEST_F(ArithmeticOptimizerTest, OpDeduppingAssertAndCheckNumerics) {
                         check1, check2);
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  item.fetch = {"div"};
 
   ArithmeticOptimizer optimizer;
   GraphDef output;
@@ -126,6 +132,7 @@ TEST_F(ArithmeticOptimizerTest, OpDedupCommutative) {
   Output div1 = ops::Div(s.WithOpName("div1"), mul1, mul2);
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  item.fetch = {"div"};
 
   ArithmeticOptimizer optimizer;
   GraphDef output;
@@ -398,39 +405,51 @@ TEST_F(ArithmeticOptimizerTest, TrivialSumsRepeatedAdd) {
 }
 
 TEST_F(ArithmeticOptimizerTest, HoistFactor) {
-  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  Output x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2});
-  Output y1 = ops::Const(s.WithOpName("y1"), {3.0f, 4.0f}, {1, 2});
-  Output y2 = ops::Const(s.WithOpName("y2"), {5.0f, 6.0f}, {1, 2});
-  Output mul1 = ops::Mul(s.WithOpName("mul1"), x, y1);
-  Output mul2 = ops::Mul(s.WithOpName("mul2"), y2, x);
-  Output add = ops::Add(s.WithOpName("add"), mul1, mul2);
-  Output id = ops::Identity(s.WithOpName("id"), add);
-
-  GrapplerItem item;
-  TF_CHECK_OK(s.ToGraphDef(&item.graph));
-
-  ArithmeticOptimizer optimizer;
-  GraphDef output;
-  Status status = optimizer.Optimize(nullptr, item, &output);
-  TF_EXPECT_OK(status);
-  // Run the optimizer twice to make sure the rewrite is idempotent.
-  item.graph.Swap(&output);
-  status = optimizer.Optimize(nullptr, item, &output);
-  TF_EXPECT_OK(status);
-
-  EXPECT_EQ(9, output.node_size());
-  const NodeDef& new_add = output.node(8);
-  EXPECT_EQ(OptimizedName("add_hoist_add"), new_add.name());
-  EXPECT_EQ("y1", new_add.input(0));
-  EXPECT_EQ("y2", new_add.input(1));
-  const NodeDef& new_mul = output.node(7);
-  EXPECT_EQ(OptimizedName("add_hoist_mul"), new_mul.name());
-  EXPECT_EQ("x", new_mul.input(0));
-  EXPECT_EQ(OptimizedName("add_hoist_add"), new_mul.input(1));
-  const NodeDef& new_id = output.node(6);
-  EXPECT_EQ("id", new_id.name());
-  EXPECT_EQ(OptimizedName("add_hoist_mul"), new_id.input(0));
+  for (bool matching_shapes : {true, false}) {
+    for (bool use_addn : {true, false}) {
+      tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+      Output x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2});
+      Output y1 = ops::Const(s.WithOpName("y1"), {3.0f, 4.0f}, {1, 2});
+      Output y2 = matching_shapes
+                      ? ops::Const(s.WithOpName("y2"), {5.0f, 6.0f}, {1, 2})
+                      : ops::Const(s.WithOpName("y2"), {5.0f}, {1, 1});
+      Output mul1 = ops::Mul(s.WithOpName("mul1"), x, y1);
+      Output mul2 = ops::Mul(s.WithOpName("mul2"), y2, x);
+      Output id =
+          use_addn ? ops::Identity(s.WithOpName("id"),
+                                   ops::AddN(s.WithOpName("add"), {mul1, mul2}))
+                   : ops::Identity(s.WithOpName("id"),
+                                   ops::Add(s.WithOpName("add"), mul1, mul2));
+
+      GrapplerItem item;
+      TF_CHECK_OK(s.ToGraphDef(&item.graph));
+      ArithmeticOptimizer optimizer;
+      GraphDef output;
+      Status status = optimizer.Optimize(nullptr, item, &output);
+      TF_EXPECT_OK(status);
+      // Run the optimizer twice to make sure the rewrite is idempotent.
+      item.graph.Swap(&output);
+      status = optimizer.Optimize(nullptr, item, &output);
+      TF_EXPECT_OK(status);
+
+      if (use_addn && !matching_shapes) {
+        VerifyGraphsMatch(item.graph, output, __LINE__);
+      } else {
+        EXPECT_EQ(9, output.node_size());
+        const NodeDef& new_add = output.node(8);
+        EXPECT_EQ(OptimizedName("add_hoist_add"), new_add.name());
+        EXPECT_EQ("y1", new_add.input(0));
+        EXPECT_EQ("y2", new_add.input(1));
+        const NodeDef& new_mul = output.node(7);
+        EXPECT_EQ(OptimizedName("add_hoist_mul"), new_mul.name());
+        EXPECT_EQ("x", new_mul.input(0));
+        EXPECT_EQ(OptimizedName("add_hoist_add"), new_mul.input(1));
+        const NodeDef& new_id = output.node(6);
+        EXPECT_EQ("id", new_id.name());
+        EXPECT_EQ(OptimizedName("add_hoist_mul"), new_id.input(0));
+      }
+    }
+  }
 }
 
 TEST_F(ArithmeticOptimizerTest, FuseConjAndTranspose) {
@@ -606,8 +625,7 @@ TEST_F(ArithmeticOptimizerTest, IdentityReshape) {
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
 
   GraphDef output;
-  TF_EXPECT_OK(ArithmeticOptimizer(RewriterConfig::AGGRESSIVE)
-                   .Optimize(nullptr, item, &output));
+  TF_EXPECT_OK(ArithmeticOptimizer().Optimize(nullptr, item, &output));
 
   item.graph = output;
   TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output));
@@ -631,8 +649,7 @@ TEST_F(ArithmeticOptimizerTest, NotIdentityReshape) {
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
 
   GraphDef output;
-  TF_EXPECT_OK(ArithmeticOptimizer(RewriterConfig::AGGRESSIVE)
-                   .Optimize(nullptr, item, &output));
+  TF_EXPECT_OK(ArithmeticOptimizer().Optimize(nullptr, item, &output));
 
   item.graph = output;
   TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output));
@@ -654,8 +671,7 @@ TEST_F(ArithmeticOptimizerTest, NotIdentityReshapeTooManyUnknownDimSizes) {
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
 
   GraphDef output;
-  TF_EXPECT_OK(ArithmeticOptimizer(RewriterConfig::AGGRESSIVE)
-                   .Optimize(nullptr, item, &output));
+  TF_EXPECT_OK(ArithmeticOptimizer().Optimize(nullptr, item, &output));
 
   item.graph = output;
   TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output));
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index c77b2badf429d8674f60635b172f96891ed84961..6860447fb895c3dc3e0c0a087b1ec0d36898ab28 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -30,13 +30,16 @@ limitations under the License.
 #include "tensorflow/core/grappler/costs/graph_properties.h"
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/op_types.h"
+#include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/tensor_coding.h"
 #include "tensorflow/core/public/version.h"
 #include "tensorflow/core/util/bcast.h"
+#include "tensorflow/core/util/saved_tensor_slice_util.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -95,7 +98,38 @@ class DeviceSimple : public DeviceBase {
   std::unique_ptr<Eigen::ThreadPoolDevice> eigen_device_;
 };
 
+template <typename T>
+bool AllValuesAre(const TensorProto& tensor, const T& value) {
+  // TensorProto represents the content of the tensor in either <type>_val or
+  // tensor_content.
+  typename checkpoint::SaveTypeTraits<T>::RepeatedField* tensor_values =
+      checkpoint::MutableTensorProtoData<T>(const_cast<TensorProto*>(&tensor));
+  if (!tensor_values->empty()) {
+    for (const T& tensor_value : *tensor_values) {
+      if (tensor_value != value) {
+        return false;
+      }
+    }
+    return true;
+  }
+  const auto tensor_content_size = tensor.tensor_content().size();
+  if (tensor_content_size > 0) {
+    CHECK_EQ(0, tensor_content_size % sizeof(T));
+    std::vector<T> raw_values(tensor_content_size / sizeof(T));
+    port::CopyToArray(tensor.tensor_content(),
+                      reinterpret_cast<char*>(raw_values.data()));
+    for (int i = 0; i < tensor_content_size / sizeof(T); ++i) {
+      if (raw_values[i] != value) {
+        return false;
+      }
+    }
+    return true;
+  }
+  return false;
+}
+
 }  // namespace
+
 ConstantFolding::ConstantFolding(RewriterConfig::Toggle opt_level,
                                  DeviceBase* cpu_device)
     : opt_level_(opt_level), cpu_device_(cpu_device) {
@@ -109,6 +143,9 @@ ConstantFolding::ConstantFolding(DeviceBase* cpu_device)
 string ConstantFolding::AddControlDependency(const string& input_name,
                                              GraphDef* graph,
                                              NodeMap* node_map) {
+  if (IsControlInput(input_name)) {
+    return input_name;
+  }
   const NodeDef* node = node_map->GetNode(input_name);
   if (!IsSwitch(*node)) {
     return AsControlDependency(*node);
@@ -120,10 +157,10 @@ string ConstantFolding::AddControlDependency(const string& input_name,
     // We start by looking for an identity node connected to the output of the
     // switch node, and use it to anchor the control dependency.
     auto outputs = node_map->GetOutputs(node->name());
-    for (const NodeDef* node : outputs) {
-      if (IsIdentity(*node)) {
+    for (const NodeDef* output : outputs) {
+      if (IsIdentity(*output)) {
         if (IsSameInput(node->input(0), input_name)) {
-          return AsControlDependency(*node);
+          return AsControlDependency(*output);
         }
       }
     }
@@ -135,15 +172,18 @@ string ConstantFolding::AddControlDependency(const string& input_name,
     ctrl_dep_name = AddPrefixToNodeName(ctrl_dep_name, kConstantFoldingCtrl);
     const DataType output_type = node->attr().at("T").type();
 
-    NodeDef* added_node = graph->add_node();
-    added_node->set_name(ctrl_dep_name);
-    added_node->set_op("Identity");
-    added_node->set_device(node->device());
+    NodeDef* added_node = node_map->GetNode(ctrl_dep_name);
+    if (added_node == nullptr) {
+      added_node = graph->add_node();
+      added_node->set_name(ctrl_dep_name);
+      added_node->set_op("Identity");
+      added_node->set_device(node->device());
 
-    (*added_node->mutable_attr())["T"].set_type(output_type);
-    *added_node->add_input() = input_name;
-    node_map->AddNode(added_node->name(), added_node);
-    node_map->AddOutput(node->name(), added_node->name());
+      (*added_node->mutable_attr())["T"].set_type(output_type);
+      *added_node->add_input() = input_name;
+      node_map->AddNode(added_node->name(), added_node);
+      node_map->AddOutput(node->name(), added_node->name());
+    }
     return AsControlDependency(*added_node);
   }
 }
@@ -190,23 +230,45 @@ Status ConvertShapeToConstant(const string& op, const DataType& type,
   return Status::OK();
 }
 
-Status ConstantFolding::MaterializeShapes(const GrapplerItem& item,
-                                          const GraphProperties& properties) {
+// TODO(rmlarsen): Perhaps we should move this to the GraphOptimizer base class.
+bool ConstantFolding::OptimizedNodeExists(const NodeDef& node,
+                                          StringPiece suffix) const {
+  return node_map_->NodeExists(OptimizedNodeName(node, suffix));
+}
+
+string ConstantFolding::OptimizedNodeName(const NodeDef& node,
+                                          StringPiece suffix) const {
+  return AddPrefixToNodeName(strings::StrCat(node.name(), suffix),
+                             kConstantFoldingConst);
+}
+
+bool ConstantFolding::IsReallyConstant(const NodeDef& node) const {
+  if (!IsConstant(node)) {
+    return false;
+  }
+  // If the node is fed it's not constant anymore.
+  return feed_nodes_.find(node.name()) == feed_nodes_.end();
+}
+
+Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) {
   // We may add some nodes to the graph to encode control dependencies: there is
   // no need to process these, so only iterate over the nodes of the input
   // graph.
-  const int node_count = graph_.node_size();
+  const int node_count = graph_->node_size();
   for (int i = 0; i < node_count; ++i) {
-    NodeDef& node = *graph_.mutable_node(i);
+    NodeDef& node = *graph_->mutable_node(i);
     const string op = node.op();
     if (op != "Shape" && op != "Size" && op != "Rank" && op != "ShapeN") {
       continue;
     }
 
-    std::vector<OpInfo::TensorProperties> output =
+    const std::vector<OpInfo::TensorProperties>& output =
         properties.GetOutputProperties(node.name());
-    std::vector<OpInfo::TensorProperties> input =
+    const std::vector<OpInfo::TensorProperties>& input =
         properties.GetInputProperties(node.name());
+    if (input.empty() || output.empty()) {
+      continue;
+    }
     if (op == "Shape" || op == "Size" || op == "Rank") {
       CHECK_EQ(1, output.size());
       CHECK_EQ(1, input.size());
@@ -241,7 +303,7 @@ Status ConstantFolding::MaterializeShapes(const GrapplerItem& item,
           // cases where the shape/rank/size would have been run in
           // the original graph. Additional inputs are extra control
           string ctrl_dep =
-              AddControlDependency(node.input(0), &graph_, node_map_.get());
+              AddControlDependency(node.input(0), graph_, node_map_.get());
           node.set_input(0, ctrl_dep);
           node_map_->AddOutput(NodeName(ctrl_dep), node.name());
         } else {
@@ -252,11 +314,10 @@ Status ConstantFolding::MaterializeShapes(const GrapplerItem& item,
               string node_name = ParseNodeName(output->input(k), &port);
               if (node_name == node.name() && port == j) {
                 // Create a const node as ShapeN's output if not already.
-                string const_name =
-                    AddPrefixToNodeName(strings::StrCat(node.name(), "-", j),
-                                        kConstantFoldingConst);
+                const string const_name =
+                    OptimizedNodeName(node, strings::StrCat("-matshapes-", j));
                 if (node_map_->GetNode(const_name) == nullptr) {
-                  NodeDef* added_node = graph_.add_node();
+                  NodeDef* added_node = graph_->add_node();
                   added_node->set_name(const_name);
                   added_node->set_op("Const");
                   added_node->set_device(node.device());
@@ -267,7 +328,7 @@ Status ConstantFolding::MaterializeShapes(const GrapplerItem& item,
                   // We add a control dependency to the original ShapeN node,
                   // so that the node will only be run if all inputs of the
                   // original ShapeN node are run.
-                  string ctrl_dep = AddControlDependency(node.name(), &graph_,
+                  string ctrl_dep = AddControlDependency(node.name(), graph_,
                                                          node_map_.get());
                   *added_node->add_input() = ctrl_dep;
                   node_map_->AddOutput(NodeName(ctrl_dep), added_node->name());
@@ -285,6 +346,7 @@ Status ConstantFolding::MaterializeShapes(const GrapplerItem& item,
   return Status::OK();
 }
 
+namespace {
 bool ShapesEqual(const TensorShapeProto& shape1,
                  const TensorShapeProto& shape2) {
   if (shape1.unknown_rank() || shape2.unknown_rank()) {
@@ -297,11 +359,13 @@ bool ShapesEqual(const TensorShapeProto& shape1,
     if (shape1.dim(i).size() != shape2.dim(i).size()) {
       return false;
     }
+    if (shape1.dim(i).size() == -1 || shape2.dim(i).size() == -1) {
+      return false;
+    }
   }
   return true;
 }
 
-namespace {
 bool ExtractShape(const NodeDef& shape_node, const GraphProperties& properties,
                   BCast::Vec* shape, int64* min_id) {
   if (shape_node.op() == "Shape") {
@@ -344,11 +408,12 @@ Status ConstantFolding::MaterializeBroadcastGradientArgs(
   const NodeDef* shape_node1 = node_map_->GetNode(node.input(0));
   const NodeDef* shape_node2 = node_map_->GetNode(node.input(1));
   if (shape_node1 == nullptr ||
-      (shape_node1->op() != "Shape" && shape_node1->op() != "Const") ||
+      (shape_node1->op() != "Shape" && !IsReallyConstant(*shape_node1)) ||
       shape_node2 == nullptr ||
-      (shape_node2->op() != "Shape" && shape_node2->op() != "Const")) {
+      (shape_node2->op() != "Shape" && !IsReallyConstant(*shape_node2))) {
     return Status::OK();
   }
+
   int64 min_id = 0;
   BCast::Vec shape1;
   if (!ExtractShape(*shape_node1, properties, &shape1, &min_id)) {
@@ -371,10 +436,42 @@ Status ConstantFolding::MaterializeBroadcastGradientArgs(
       id = --min_id;
     }
   }
+
+  // Beware: the reduction dimensions computed by the BCast class are valid iff
+  // we assume that two distinct symbolic dimensions can't be equal and a
+  // symbolic dimension can't be equal to 1. This is often but not always true,
+  // so to make this optimization safe we filter out these cases.
+  const int common_dims = std::min(shape1.size(), shape2.size());
+  for (int i = 0; i < common_dims; ++i) {
+    if (shape1[i] >= 0 && shape2[i] >= 0) {
+      continue;
+    }
+    if (shape1[i] != shape2[i]) {
+      // We're either dealing with 2 different symbolic dimensions or a symbolic
+      // and a know dimensions. We can't be sure whether both are equal or not,
+      // so we can't be sure whether we'll be broadcasting or not.
+      return Status::OK();
+    }
+  }
+  // These extra dims could be equal to 1, in which case there is no
+  // broadcasting. It could also be greater than 1, in which case there would
+  // be broadcasting. Since we don't know, we'll just punt.
+  for (int i = common_dims; i < shape1.size(); ++i) {
+    if (shape1[i] < 0) {
+      return Status::OK();
+    }
+  }
+  for (int i = common_dims; i < shape2.size(); ++i) {
+    if (shape2[i] < 0) {
+      return Status::OK();
+    }
+  }
+
   BCast bcast(shape1, shape2);
   if (!bcast.IsValid()) {
     return Status::OK();
   }
+
   BCast::Vec reduce_dims[2];
   reduce_dims[0] = bcast.grad_x_reduce_idx();
   reduce_dims[1] = bcast.grad_y_reduce_idx();
@@ -382,31 +479,32 @@ Status ConstantFolding::MaterializeBroadcastGradientArgs(
   const DataType type = node.attr().at("T").type();
   NodeDef* out[2];
   for (int j = 0; j < 2; ++j) {
-    if (!reduce_dims[j].empty()) {
-      // This is the case when a tensor dimension of 1 is matched against an
-      // unknown dimension. The unknown dimension could also be equal to 1, in
-      // which case there would be no reduction.
-      out[j] = nullptr;
-    } else {
-      string const_name = AddPrefixToNodeName(
-          strings::StrCat(node.name(), "-", j), kConstantFoldingConst);
-      out[j] = node_map_->GetNode(const_name);
-      if (out[j] == nullptr) {
-        out[j] = graph_.add_node();
-        Tensor value(type, TensorShape({0}));
-        *out[j] = CreateNodeDef(const_name, TensorValue(&value));
-        out[j]->set_device(node.device());
-        node_map_->AddNode(const_name, out[j]);
-        string ctrl_dep =
-            AddControlDependency(node.name(), &graph_, node_map_.get());
-        *out[j]->add_input() = ctrl_dep;
-        node_map_->AddOutput(NodeName(ctrl_dep), const_name);
+    int reduction_indices = reduce_dims[j].size();
+    Tensor value(type, TensorShape({reduction_indices}));
+    for (int i = 0; i < reduction_indices; ++i) {
+      if (type == DT_INT32) {
+        value.vec<int32>()(i) = reduce_dims[j][i];
+      } else {
+        value.vec<int64>()(i) = reduce_dims[j][i];
       }
     }
+    string const_name =
+        OptimizedNodeName(node, strings::StrCat("-bcastargs-", j));
+    out[j] = node_map_->GetNode(const_name);
+    if (out[j] == nullptr) {
+      out[j] = graph_->add_node();
+      *out[j] = CreateNodeDef(const_name, TensorValue(&value));
+      out[j]->set_device(node.device());
+      node_map_->AddNode(const_name, out[j]);
+      string ctrl_dep =
+          AddControlDependency(node.name(), graph_, node_map_.get());
+      *out[j]->add_input() = ctrl_dep;
+      node_map_->AddOutput(NodeName(ctrl_dep), const_name);
+    }
   }
 
-  auto outputs = node_map_->GetOutputs(node.name());
-  for (const auto& output : outputs) {
+  const std::set<NodeDef*> outputs = node_map_->GetOutputs(node.name());
+  for (NodeDef* output : outputs) {
     for (int k = 0; k < output->input_size(); ++k) {
       int port;
       string node_name = ParseNodeName(output->input(k), &port);
@@ -426,13 +524,17 @@ Status ConstantFolding::MaterializeReductionIndices(
     return Status::OK();
   }
   const NodeDef* indices = node_map_->GetNode(node->input(1));
-  if (!indices || IsConstant(*indices)) {
+  if (!indices || IsReallyConstant(*indices)) {
     // The reduction indices are already constant, there's nothing to do.
     return Status::OK();
   }
 
-  const OpInfo::TensorProperties& input_prop =
-      properties.GetInputProperties(node->name())[0];
+  const std::vector<OpInfo::TensorProperties>& input_props =
+      properties.GetInputProperties(node->name());
+  if (input_props.size() != 2) {
+    return Status::OK();
+  }
+  const OpInfo::TensorProperties& input_prop = input_props[0];
   if (input_prop.shape().unknown_rank()) {
     // We can't do anything if we don't know the rank of the input.
     return Status::OK();
@@ -442,17 +544,31 @@ Status ConstantFolding::MaterializeReductionIndices(
     // Unexpected graph, don't try to change it.
     return Status::OK();
   }
-  const OpInfo::TensorProperties& output_prop =
-      properties.GetOutputProperties(node->name())[0];
+  const std::vector<OpInfo::TensorProperties>& output_props =
+      properties.GetOutputProperties(node->name());
+  if (output_props.size() != 1) {
+    return Status::OK();
+  }
+  const bool keep_dims =
+      node->attr().count("keep_dims") && node->attr().at("keep_dims").b();
+  const OpInfo::TensorProperties& output_prop = output_props[0];
   PartialTensorShape output_shape(output_prop.shape());
   if (output_shape.num_elements() != 1) {
     bool full_reduction = false;
     for (const NodeDef* fanout : node_map_->GetOutputs(node->name())) {
-      if (!IsReshape(*fanout)) {
-        continue;
+      if (!IsReshape(*fanout) && !keep_dims) {
+        // Depending on how it's setup, a full reduction will generate a tensor
+        // of shape [], [1], [1, 1], [1, 1, ...]. If keep_dims isn't true, we
+        // rely on the existence of a reshape node following the reduction to
+        // ensure that the fanout is fed a scalar of the right shape.
+        return Status::OK();
       }
-      const OpInfo::TensorProperties& reshape_prop =
-          properties.GetOutputProperties(fanout->name())[0];
+      const std::vector<OpInfo::TensorProperties>& reshape_props =
+          properties.GetOutputProperties(fanout->name());
+      if (reshape_props.size() != 1) {
+        return Status::OK();
+      }
+      const OpInfo::TensorProperties& reshape_prop = reshape_props[0];
       PartialTensorShape shape(reshape_prop.shape());
       if (shape.num_elements() != 1) {
         return Status::OK();
@@ -465,21 +581,18 @@ Status ConstantFolding::MaterializeReductionIndices(
     }
   }
 
-  const OpInfo::TensorProperties& reduction_prop =
-      properties.GetInputProperties(node->name())[1];
+  const OpInfo::TensorProperties& reduction_prop = input_props[1];
   DataType dtype = reduction_prop.dtype();
   if (dtype != DT_INT32 && dtype != DT_INT64) {
     return Status::OK();
   }
   // We know it's a full reduction. We can generate the set of indices to
   // reduce.
-  string const_name =
-      AddPrefixToNodeName(strings::StrCat(node->name(), "-reduction_indices"),
-                          kConstantFoldingConst);
+  string const_name = OptimizedNodeName(*node, "-reduction_indices");
   if (node_map_->GetNode(const_name)) {
     return Status::OK();
   }
-  NodeDef* reduction_indices = graph_.add_node();
+  NodeDef* reduction_indices = graph_->add_node();
   Tensor value(dtype, TensorShape({rank}));
   for (int i = 0; i < rank; ++i) {
     if (dtype == DT_INT32) {
@@ -491,7 +604,7 @@ Status ConstantFolding::MaterializeReductionIndices(
   *reduction_indices = CreateNodeDef(const_name, TensorValue(&value));
   reduction_indices->set_device(node->device());
   string ctrl_dep =
-      AddControlDependency(node->input(1), &graph_, node_map_.get());
+      AddControlDependency(node->input(1), graph_, node_map_.get());
   *reduction_indices->add_input() = ctrl_dep;
   node_map_->AddNode(const_name, reduction_indices);
   node_map_->AddOutput(NodeName(ctrl_dep), const_name);
@@ -504,10 +617,10 @@ Status ConstantFolding::MaterializeReductionIndices(
 }
 
 Status ConstantFolding::MaterializeConstants(
-    const GrapplerItem& item, const GraphProperties& properties) {
-  const int node_count = graph_.node_size();
+    const GraphProperties& properties) {
+  const int node_count = graph_->node_size();
   for (int i = 0; i < node_count; ++i) {
-    NodeDef& node = *graph_.mutable_node(i);
+    NodeDef& node = *graph_->mutable_node(i);
     const string& op = node.op();
     if (op == "BroadcastGradientArgs") {
       TF_RETURN_IF_ERROR(MaterializeBroadcastGradientArgs(node, properties));
@@ -523,24 +636,23 @@ bool ConstantFolding::IsFoldable(const NodeDef& node) const {
   if (node.input().empty()) {
     return false;
   }
-
   // Skips nodes that must be preserved except whitelisted nodes.
   if (nodes_to_preserve_.find(node.name()) != nodes_to_preserve_.end() &&
       nodes_whitelist_.find(node.name()) == nodes_whitelist_.end()) {
     return false;
   }
-
-  // Skips ops that don't benefit from folding.
-  const string& op = node.op();
-  // Skip constants, they're already folded
-  if (op == "Const") {
+  // Skip control flow nodes, they can't be folded
+  if (ModifiesFrameInfo(node)) {
     return false;
   }
-  // Skip constrol flow nodes, they can't be folded
-  if (op == "Enter" || op == "RefEnter" || op == "Exit" || op == "RefExit" ||
-      op == "NextIteration" || op == "RefNextIteration") {
+  // Skip constants, they're already folded
+  if (IsConstant(node)) {
     return false;
   }
+
+  // Skips ops that don't benefit from folding.
+  const string& op = node.op();
+
   if (op.find("Placeholder") == 0) {
     return false;
   }
@@ -594,7 +706,7 @@ bool ConstantFolding::IsFoldable(const NodeDef& node) const {
     if (!input_node) {
       return false;
     }
-    bool is_const = IsConstant(*input_node);
+    bool is_const = IsReallyConstant(*input_node);
     if (!is_const && !is_merge) {
       return false;
     }
@@ -612,6 +724,37 @@ bool ConstantFolding::IsFoldable(const NodeDef& node) const {
   return true;
 }
 
+namespace {
+
+#define SET_TENSOR_VAL_CASE(DTYPE, TYPE, NAME)     \
+  case DTYPE:                                      \
+    t->add_##NAME##_val(static_cast<TYPE>(value)); \
+    break;
+
+Status CreateConstantTensorAttrValue(DataType type, double value,
+                                     const TensorShapeProto& shape,
+                                     AttrValue* attr_tensor) {
+  TensorProto* t = attr_tensor->mutable_tensor();
+  t->set_dtype(type);
+  *t->mutable_tensor_shape() = shape;
+  switch (type) {
+    SET_TENSOR_VAL_CASE(DT_FLOAT, float, float);
+    SET_TENSOR_VAL_CASE(DT_DOUBLE, double, double);
+    SET_TENSOR_VAL_CASE(DT_INT64, int64, int64);
+    SET_TENSOR_VAL_CASE(DT_INT32, int32, int);
+    SET_TENSOR_VAL_CASE(DT_INT16, int32, int);
+    SET_TENSOR_VAL_CASE(DT_INT8, int32, int);
+    SET_TENSOR_VAL_CASE(DT_UINT8, int32, int);
+    SET_TENSOR_VAL_CASE(DT_BOOL, bool, bool);
+    default:
+      return errors::InvalidArgument("Unsupported type: ", type);
+  }
+  return Status::OK();
+}
+
+#undef SET_TENSOR_CAL_CASE
+}  // namespace
+
 // static
 NodeDef ConstantFolding::CreateNodeDef(const string& name,
                                        const TensorValue& tensor) {
@@ -652,6 +795,14 @@ NodeDef ConstantFolding::CreateNodeDef(const string& name,
       POPULATE_TENSOR_PROTO(tensor, t, int64, int64)
     } else if (tensor->dtype() == DT_INT32) {
       POPULATE_TENSOR_PROTO(tensor, t, int32, int)
+    } else if (tensor->dtype() == DT_INT16) {
+      POPULATE_TENSOR_PROTO(tensor, t, int16, int)
+    } else if (tensor->dtype() == DT_INT8) {
+      POPULATE_TENSOR_PROTO(tensor, t, int8, int)
+    } else if (tensor->dtype() == DT_UINT8) {
+      POPULATE_TENSOR_PROTO(tensor, t, uint8, int)
+    } else if (tensor->dtype() == DT_BOOL) {
+      POPULATE_TENSOR_PROTO(tensor, t, bool, bool)
     }
   }
   if (optimized) {
@@ -720,7 +871,7 @@ Status ConstantFolding::EvaluateOneFoldable(const NodeDef& node,
       break;
     }
     const NodeDef* input_node = node_map_->GetNode(input);
-    if (!IsConstant(*input_node)) {
+    if (!IsReallyConstant(*input_node)) {
       return Status(error::INVALID_ARGUMENT,
                     strings::StrCat("Can't fold ", node.name(), ", its ", input,
                                     " isn't constant"));
@@ -737,7 +888,7 @@ Status ConstantFolding::EvaluateOneFoldable(const NodeDef& node,
   }
 
   for (size_t i = 0; i < output_tensors.size(); i++) {
-    string node_name = AddPrefixToNodeName(node.name(), kConstantFoldingConst);
+    string node_name = OptimizedNodeName(node, "-folded");
     if (output_tensors.size() > 1) {
       node_name = strings::StrCat(node_name, "-", i);
     }
@@ -774,7 +925,7 @@ Status ConstantFolding::FoldNode(NodeDef* node, GraphDef* output_graph) {
         continue;
       }
       NodeDef* input_node = node_map_->GetNode(input);
-      if (!IsConstant(*input_node)) {
+      if (!IsReallyConstant(*input_node)) {
         continue;
       }
       bool valid_input = true;
@@ -789,16 +940,14 @@ Status ConstantFolding::FoldNode(NodeDef* node, GraphDef* output_graph) {
         continue;
       }
 
-      string const_out_name =
-          AddPrefixToNodeName(node->name(), kConstantFoldingConst);
-      string const_index_name = AddPrefixToNodeName(
-          strings::StrCat(node->name(), "_index"), kConstantFoldingConst);
+      string const_out_name = OptimizedNodeName(*node, "_const");
+      string const_index_name = OptimizedNodeName(*node, "_index");
       if (node_map_->GetNode(const_out_name) ||
           node_map_->GetNode(const_index_name)) {
         // Intended name already exists.
         return errors::AlreadyExists(
             strings::StrCat(const_out_name, " or ", const_index_name,
-                            "already present in the graph"));
+                            " already present in the graph"));
       }
 
       NodeDef* const_out = output_graph->add_node();
@@ -896,7 +1045,7 @@ Status ConstantFolding::FoldNode(NodeDef* node, GraphDef* output_graph) {
       if (node_map_->GetNode(const_node->name())) {
         // Intended name already exists.
         return errors::AlreadyExists(strings::StrCat(
-            const_node->name(), "already present in the graph"));
+            const_node->name(), " already present in the graph"));
       }
       NodeDef* added_node = output_graph->add_node();
       *added_node = *const_node;
@@ -955,8 +1104,8 @@ Status ConstantFolding::FoldNode(NodeDef* node, GraphDef* output_graph) {
 Status ConstantFolding::FoldGraph(GraphDef* output) {
   std::unordered_set<string> processed_nodes;
   std::deque<NodeDef*> queue;
-  for (int i = 0; i < graph_.node_size(); i++) {
-    auto node = graph_.mutable_node(i);
+  for (int i = 0; i < graph_->node_size(); i++) {
+    auto node = graph_->mutable_node(i);
     if (IsFoldable(*node)) {
       queue.push_back(node);
     }
@@ -969,6 +1118,7 @@ Status ConstantFolding::FoldGraph(GraphDef* output) {
     }
     // We need to record a copy of output nodes before FoldNode() modifies it.
     std::set<NodeDef*> outputs = node_map_->GetOutputs(node->name());
+
     Status s = FoldNode(node, output);
     processed_nodes.insert(node->name());
     if (!s.ok()) {
@@ -995,7 +1145,7 @@ Status ConstantFolding::FoldGraph(GraphDef* output) {
   output->mutable_node()->DeleteSubrange(last + 1,
                                          output->node_size() - last - 1);
 
-  for (const auto& node : graph_.node()) {
+  for (const auto& node : graph_->node()) {
     // If no fetch nodes is provided, we conservatively
     // keep all nodes in the original graph in case users need to fetch
     // their values.
@@ -1016,7 +1166,7 @@ bool ConstantFolding::IsSimplifiableReduction(const NodeDef& node) const {
   if (IsReduction(node)) {
     CHECK_LE(2, node.input_size());
     const NodeDef* reductions_indices = node_map_->GetNode(node.input(1));
-    if (IsConstant(*reductions_indices)) {
+    if (IsReallyConstant(*reductions_indices)) {
       TensorVector output;
       Status s = EvaluateNode(*reductions_indices, TensorVector(), &output);
       if (!s.ok()) {
@@ -1040,7 +1190,7 @@ bool ConstantFolding::IsSimplifiableReshape(
   }
   CHECK_LE(2, node.input_size());
   const NodeDef* new_shape = node_map_->GetNode(node.input(1));
-  if (!IsConstant(*new_shape)) {
+  if (!IsReallyConstant(*new_shape)) {
     return false;
   }
   TensorVector outputs;
@@ -1090,49 +1240,352 @@ bool ConstantFolding::IsSimplifiableReshape(
   return shape.IsCompatibleWith(new_dims);
 }
 
+#define IS_VALUE_CASE(DTYPE, VALUE)                   \
+  case DTYPE:                                         \
+    return AllValuesAre<EnumToDataType<DTYPE>::Type>( \
+        node.attr().at("value").tensor(), EnumToDataType<DTYPE>::Type(VALUE))
+
+#define IS_ONES_CASE(TYPE) IS_VALUE_CASE(TYPE, 1)
+#define IS_ZEROS_CASE(TYPE) IS_VALUE_CASE(TYPE, 0)
+
+bool ConstantFolding::IsOnes(const NodeDef& node) const {
+  if (feed_nodes_.find(node.name()) != feed_nodes_.end()) {
+    return false;
+  }
+  if (node.op() == "OnesLike") {
+    return true;
+  }
+  if (node.op() != "Const") {
+    return false;
+  }
+  const auto dtype = node.attr().at("dtype").type();
+  switch (dtype) {
+    // TODO(rmlarsen): Make DT_HALF case compile.
+    //    IS_ONES_CASE(DT_HALF);
+    IS_ONES_CASE(DT_FLOAT);
+    IS_ONES_CASE(DT_DOUBLE);
+    IS_ONES_CASE(DT_UINT8);
+    IS_ONES_CASE(DT_INT8);
+    IS_ONES_CASE(DT_UINT16);
+    IS_ONES_CASE(DT_INT16);
+    IS_ONES_CASE(DT_INT32);
+    IS_ONES_CASE(DT_INT64);
+    IS_ONES_CASE(DT_COMPLEX64);
+    IS_ONES_CASE(DT_COMPLEX128);
+    default:
+      VLOG(1) << "Unsupported type " << DataTypeString(dtype);
+      return false;
+  }
+  return false;
+}
+
+bool ConstantFolding::IsZeros(const NodeDef& node) const {
+  if (feed_nodes_.find(node.name()) != feed_nodes_.end()) {
+    return false;
+  }
+  if (node.op() == "ZerosLike") {
+    return true;
+  }
+  if (!IsConstant(node)) {
+    return false;
+  }
+  const auto dtype = node.attr().at("dtype").type();
+  switch (dtype) {
+    // TODO(rmlarsen): Make DT_HALF case compile.
+    //    IS_ZEROS_CASE(DT_HALF);
+    IS_ZEROS_CASE(DT_FLOAT);
+    IS_ZEROS_CASE(DT_DOUBLE);
+    IS_ZEROS_CASE(DT_UINT8);
+    IS_ZEROS_CASE(DT_INT8);
+    IS_ZEROS_CASE(DT_UINT16);
+    IS_ZEROS_CASE(DT_INT16);
+    IS_ZEROS_CASE(DT_INT32);
+    IS_ZEROS_CASE(DT_INT64);
+    IS_ZEROS_CASE(DT_COMPLEX64);
+    IS_ZEROS_CASE(DT_COMPLEX128);
+    default:
+      VLOG(1) << "Unsupported type " << DataTypeString(dtype);
+      return false;
+  }
+  return false;
+}
+
+void ConstantFolding::ReplaceOperationWithIdentity(int input_to_forward,
+                                                   NodeDef* node,
+                                                   GraphDef* graph) {
+  node->set_op("Identity");
+  DataType dtype = node->attr().at("T").type();
+  node->clear_attr();
+  (*node->mutable_attr())["T"].set_type(dtype);
+
+  // Propagate the designated input through the identity.
+  node->mutable_input()->SwapElements(0, input_to_forward);
+  // Add all other inputs as control dependencies.
+  for (int i = 1; i < node->input_size(); ++i) {
+    if (IsControlInput(node->input(i))) {
+      break;
+    }
+    const string ctrl_dep =
+        AddControlDependency(node->input(i), graph, node_map_.get());
+    node_map_->UpdateInput(node->name(), node->input(i), ctrl_dep);
+    node->set_input(i, ctrl_dep);
+  }
+  graph_modified_ = true;
+}
+
+void ConstantFolding::ReplaceDivisionOfOnesByReciprocal(NodeDef* node,
+                                                        GraphDef* graph) {
+  node->set_op("Reciprocal");
+  node->mutable_input()->SwapElements(0, 1);
+  const string ctrl_dep =
+      AddControlDependency(node->input(1), graph, node_map_.get());
+  node_map_->UpdateInput(node->name(), node->input(1), ctrl_dep);
+  node->set_input(1, ctrl_dep);
+  graph_modified_ = true;
+}
+
+Status ConstantFolding::ReplaceOperationWithConstant(
+    double value, const TensorShapeProto& shape, NodeDef* node,
+    GraphDef* graph) {
+  AttrValue dtype_attr = node->attr().at("T");
+  AttrValue tensor_attr;
+  TF_RETURN_IF_ERROR(CreateConstantTensorAttrValue(dtype_attr.type(), value,
+                                                   shape, &tensor_attr));
+  node->clear_attr();
+  node->mutable_attr()->insert({"dtype", dtype_attr});
+  node->mutable_attr()->insert({"value", tensor_attr});
+  node->set_op("Const");
+  // Convert all inputs to control dependencies.
+  for (int i = 0; i < node->input_size(); ++i) {
+    if (IsControlInput(node->input(i))) {
+      break;
+    }
+    const string ctrl_dep =
+        AddControlDependency(node->input(i), graph, node_map_.get());
+    node_map_->UpdateInput(node->name(), node->input(i), ctrl_dep);
+    node->set_input(i, ctrl_dep);
+  }
+  graph_modified_ = true;
+  return Status::OK();
+}
+
 Status ConstantFolding::SimplifyGraph(GraphDef* output,
-                                      const GraphProperties& properties) {
-  for (auto& node : *output->mutable_node()) {
-    if (IsSimplifiableReduction(node)) {
+                                      const GraphProperties& properties,
+                                      bool use_shape_info) {
+  const bool is_aggressive = opt_level_ == RewriterConfig::AGGRESSIVE;
+  for (int i = 0; i < output->node_size(); ++i) {
+    NodeDef* node = output->mutable_node(i);
+    if (IsSimplifiableReduction(*node)) {
       // Replace the reduction node with an identity node, that can be further
       // optimized by the model pruner.
-      const NodeDef* reductions_indices = node_map_->GetNode(node.input(1));
       DataType output_type;
-      if (node.attr().count("T") > 0) {
-        output_type = node.attr().at("T").type();
+      if (node->attr().count("T") > 0) {
+        output_type = node->attr().at("T").type();
       } else {
         // This is an 'any' or 'all' reduction. The output is always boolean.
         output_type = DT_BOOL;
       }
-      node.set_op("Identity");
-      node.clear_attr();
-      (*node.mutable_attr())["T"].set_type(output_type);
-      if (node.input_size() > 2) {
-        node.mutable_input()->SwapElements(1, node.input_size() - 1);
+      node->set_op("Identity");
+      node->clear_attr();
+      (*node->mutable_attr())["T"].set_type(output_type);
+      *node->mutable_input(1) = AsControlDependency(node->input(1));
+      graph_modified_ = true;
+      continue;
+    }
+    if (use_shape_info && IsSimplifiableReshape(*node, properties)) {
+      DataType output_type = node->attr().at("T").type();
+      node->set_op("Identity");
+      node->clear_attr();
+      (*node->mutable_attr())["T"].set_type(output_type);
+      *node->mutable_input(1) = AsControlDependency(node->input(1));
+      graph_modified_ = true;
+      continue;
+    }
+    const bool safe_to_use_shapes =
+        use_shape_info && (feed_nodes_.empty() || is_aggressive);
+    const bool is_mul = IsMul(*node);
+    const bool is_matmul = IsMatMul(*node);
+    const bool is_add = IsAdd(*node) || IsBiasAdd(*node);
+    const bool is_sub = IsSub(*node);
+    const bool is_any_div = IsAnyDiv(*node);
+    // Simplify arithmetic operations with ones or zeros.
+    if (safe_to_use_shapes &&
+        (is_mul || is_matmul || is_add || is_sub || is_any_div) &&
+        properties.HasInputProperties(node->name()) &&
+        properties.HasOutputProperties(node->name())) {
+      const NodeDef* x = node_map_->GetNode(node->input(0));
+      const NodeDef* y = node_map_->GetNode(node->input(1));
+      if (x == nullptr || y == nullptr) {
+        return errors::InvalidArgument("Invalid inputs to node: ",
+                                       node->DebugString());
+      }
+      const TensorShapeProto& output_shape =
+          properties.GetOutputProperties(node->name())[0].shape();
+
+      // Simplify element-wise multiplication by ones or addition/subtraction
+      // of zeros.
+      const TensorShapeProto& y_shape =
+          properties.GetInputProperties(node->name())[1].shape();
+      const bool x_is_zero = IsZeros(*x);
+      const bool x_is_one = IsOnes(*x);
+      const bool y_matches_output_shape = ShapesEqual(output_shape, y_shape);
+      if (y_matches_output_shape &&
+          ((is_mul && x_is_one) || (is_add && x_is_zero))) {
+        // TODO(rmlarsen): Handle subtraction 0 - y.
+        // 1 * y = y or 0 + y = y.
+        ReplaceOperationWithIdentity(1, node, output);
+        continue;
+      }
+
+      // Replace 1 / y with Reciprocal op.
+      if (y_matches_output_shape && is_any_div && x_is_one) {
+        DataType type = node->attr().at("T").type();
+        if (DataTypeIsFloating(type) || DataTypeIsComplex(type)) {
+          ReplaceDivisionOfOnesByReciprocal(node, output);
+          continue;
+        }
+      }
+
+      const TensorShapeProto& x_shape =
+          properties.GetInputProperties(node->name())[0].shape();
+      const bool y_is_zero = IsZeros(*y);
+      const bool y_is_one = IsOnes(*y);
+      const bool x_matches_output_shape = ShapesEqual(output_shape, x_shape);
+      if (x_matches_output_shape &&
+          (((is_mul || is_any_div) && y_is_one) ||
+           ((is_add || is_sub) && y_is_zero && is_aggressive))) {
+        // x * 1 = x or x / 1 = x or x +/- 0 = x
+        ReplaceOperationWithIdentity(0, node, output);
+        continue;
+      }
+
+      // Simplify multiplication and matmul by zeros.
+      // Also optimize zeros divided by a tensor, but only if we are in
+      // aggressive mode, since we might get rid of divisions by zero.
+      bool optimize_zeros_divided_by_y =
+          is_any_div && x_is_zero && is_aggressive;
+      if ((x_is_zero || y_is_zero) &&
+          (is_mul || is_matmul || optimize_zeros_divided_by_y)) {
+        const PartialTensorShape shp(output_shape);
+        if (shp.IsFullyDefined()) {
+          TF_RETURN_IF_ERROR(
+              ReplaceOperationWithConstant(0, output_shape, node, output));
+          continue;
+        }
+        // Even if an input shape is only partially known, we may known that it
+        // matches the output shape and thus forward the corresponding zero
+        // input.
+        if ((is_mul || is_any_div) && x_is_zero && x_matches_output_shape) {
+          ReplaceOperationWithIdentity(0, node, output);
+          continue;
+        } else if (is_mul && y_is_zero && y_matches_output_shape) {
+          ReplaceOperationWithIdentity(1, node, output);
+          continue;
+        }
+      }
+    }
+
+    // Strength reduce floating point division by a constant Div(x, const) to
+    // multiplication by the reciprocal Mul(x, Reciprocal(const)). This in turn
+    // will be constant folded to Mul(x, 1.0/const).
+    if (node->input_size() >= 2 && (IsRealDiv(*node) || IsDiv(*node))) {
+      const string& const_input = node->input(1);
+      const NodeDef* denom = node_map_->GetNode(const_input);
+      CHECK(denom != nullptr);
+      if (!IsReallyConstant(*denom)) {
+        continue;
       }
-      node.mutable_input()->RemoveLast();
-      for (const auto& input : reductions_indices->input()) {
-        DCHECK(IsControlInput(input));
-        *node.add_input() = input;
+      if (node->attr().count("T") == 0) {
+        continue;
+      }
+      DataType type = node->attr().at("T").type();
+      if (IsDiv(*node) &&
+          !(DataTypeIsFloating(type) || DataTypeIsComplex(type))) {
+        continue;
       }
+      // Insert new reciprocal op and change node from Div to Mul.
+      NodeDef* reciprocal_node = output->add_node();
+      reciprocal_node->set_name(AddPrefixToNodeName(
+          strings::StrCat(node->name(), "_recip"), kConstantFoldingConst));
+      reciprocal_node->set_op("Reciprocal");
+      reciprocal_node->set_device(node->device());
+      node->set_op("Mul");
+      // Re-wire inputs and outputs.
+      reciprocal_node->add_input(const_input);
+      (*reciprocal_node->mutable_attr())["T"].set_type(type);
+      node->set_input(1, reciprocal_node->name());
+      node_map_->AddNode(reciprocal_node->name(), reciprocal_node);
+      node_map_->UpdateOutput(node->name(), const_input,
+                              reciprocal_node->name());
+      graph_modified_ = true;
     }
-    // It's possible to feed a placeholder with a tensor that doesn't have the
-    // proper shape, and reshape this tensor later on. Therefore only remove
-    // reshapes in graphs that don't have placeholders.
-    if (IsSimplifiableReshape(node, properties)) {
-      const NodeDef* new_shape = node_map_->GetNode(node.input(1));
-      DataType output_type = node.attr().at("T").type();
-      node.set_op("Identity");
-      node.clear_attr();
-      (*node.mutable_attr())["T"].set_type(output_type);
-      if (node.input_size() > 2) {
-        node.mutable_input()->SwapElements(1, node.input_size() - 1);
+
+    // Consider the transformation
+    //
+    //                      +                +       = parent
+    //                     / \              / \
+    //                  Const +    -- >    X   +     = children
+    //                       / \              / \
+    //                      X   Y          Const Y   = leaves
+    //
+    // where '+' denotes an associative and commutative operator like addition
+    // or multiplication. This optimization pushes constants down in the tree
+    // to canonicalize it. Moreoever, in cases where the child node has a
+    // constant input we will create a node that can be folded, e.g.
+    //
+    //    Add(C1, Add(C2, X)) -> Add(X, Add(C1, C2)) -> Add(X, C1 + C2)
+    //
+    // TODO(rmlarsen): Handle non-associative/non-commutative operators like
+    // subtraction and division, as well as mixed subtraction/addition,
+    // division/multiplication.
+    // Don't touch BiasAdd since they can't handle vectors as their first
+    // inputs.
+    if ((IsAdd(*node) || is_mul) && NumNonControlInputs(*node) == 2) {
+      NodeDef* left_child = node_map_->GetNode(node->input(0));
+      NodeDef* right_child = node_map_->GetNode(node->input(1));
+      // One child must be constant, and the other the same op as the parent.
+      if (node->op() != left_child->op() && node->op() != right_child->op()) {
+        continue;
       }
-      node.mutable_input()->RemoveLast();
-      for (const auto& input : new_shape->input()) {
-        DCHECK(IsControlInput(input));
-        *node.add_input() = input;
+      const bool left_child_is_constant = IsReallyConstant(*left_child);
+      const bool right_child_is_constant = IsReallyConstant(*right_child);
+      if (!left_child_is_constant && !right_child_is_constant) {
+        continue;
+      }
+      if (node->device() != left_child->device() ||
+          node->device() != right_child->device()) {
+        continue;
+      }
+      NodeDef* child_node = left_child_is_constant ? right_child : left_child;
+      // Make sure that it is safe to change the value of the child node->
+      if (child_node->input_size() < 2 ||
+          NumNonControlOutputs(*child_node, *node_map_) > 1 || !has_fetch_ ||
+          nodes_to_preserve_.find(child_node->name()) !=
+              nodes_to_preserve_.end()) {
+        continue;
       }
+
+      // Identify the nodes to swap.
+      const NodeDef* left_leaf = node_map_->GetNode(child_node->input(0));
+      const NodeDef* right_leaf = node_map_->GetNode(child_node->input(1));
+      const bool left_leaf_is_constant = IsReallyConstant(*left_leaf);
+      const bool right_leaf_is_constant = IsReallyConstant(*right_leaf);
+      if (left_leaf_is_constant && right_leaf_is_constant) {
+        // Child is already foldable, leave it alone.
+        continue;
+      }
+      const int non_const_leaf_input = left_leaf_is_constant ? 1 : 0;
+      const int parent_const_input = left_child_is_constant ? 0 : 1;
+
+      // Swap the constant child with a non-constant leaf node.
+      node_map_->UpdateInput(node->name(), node->input(parent_const_input),
+                             child_node->input(non_const_leaf_input));
+      node_map_->UpdateInput(child_node->name(),
+                             child_node->input(non_const_leaf_input),
+                             node->input(parent_const_input));
+      std::swap(*node->mutable_input(parent_const_input),
+                *child_node->mutable_input(non_const_leaf_input));
+      graph_modified_ = true;
     }
   }
   return Status::OK();
@@ -1141,7 +1594,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
 Status ConstantFolding::RunOptimizationPass(Cluster* cluster,
                                             const GrapplerItem& item,
                                             GraphDef* output) {
-  node_map_.reset(new NodeMap(&graph_));
+  node_map_.reset(new NodeMap(graph_));
   nodes_whitelist_.clear();
   // Fold fetch nodes iff it has a single fanout. Note that if a fetch node
   // has a single fanout, it would be rewritten as a constant with the same
@@ -1158,36 +1611,30 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster,
   }
 
   GraphProperties properties(item);
-  const bool has_feed = !item.feed.empty();
-  bool needs_shapes = !has_feed || opt_level_ == RewriterConfig::AGGRESSIVE;
-  Status s = errors::Unknown(
-      "The graph properties are needed but were not initialized");
-  if (needs_shapes) {
-    s = properties.InferStatically();
-  }
+  // It's possible to feed a placeholder with a tensor of any shape: make sure
+  // that the shape inference deals with this conservatively unless we're in
+  // aggressive mode.
+  const bool assume_valid_feeds = opt_level_ == RewriterConfig::AGGRESSIVE;
+  Status s = properties.InferStatically(assume_valid_feeds);
+  const bool can_use_shape_info = s.ok();
 
-  if (!has_feed && s.ok()) {
-    // Only use static shape information when there is no feed in the
-    // graph. That's because it's possible to feed a placeholder with a tensor
-    // of any shape, which could make the static information inconsistent with
-    // the shapes actually fed.
-    TF_RETURN_IF_ERROR(MaterializeShapes(item, properties));
-  }
-  if (opt_level_ == RewriterConfig::AGGRESSIVE && s.ok()) {
-    TF_RETURN_IF_ERROR(MaterializeConstants(item, properties));
+  if (can_use_shape_info) {
+    TF_RETURN_IF_ERROR(MaterializeShapes(properties));
+    TF_RETURN_IF_ERROR(MaterializeConstants(properties));
   }
 
   TF_RETURN_IF_ERROR(FoldGraph(output));
-
-  if (!has_feed && s.ok()) {
-    TF_RETURN_IF_ERROR(SimplifyGraph(output, properties));
-  }
+  node_map_.reset(new NodeMap(output));
+  TF_RETURN_IF_ERROR(SimplifyGraph(output, properties, can_use_shape_info));
   return Status::OK();
 }
 
 Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item,
                                  GraphDef* output) {
   nodes_to_preserve_ = item.NodesToPreserve();
+  for (const auto& feed : item.feed) {
+    feed_nodes_.insert(NodeName(feed.first));
+  }
 
   if (cpu_device_ == nullptr) {
     owned_device_.reset(new DeviceSimple());
@@ -1200,13 +1647,13 @@ Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item,
   *output = item.graph;
   int64 node_count;
   do {
-    graph_.Swap(output);
-    item_to_optimize.graph = graph_;
+    graph_modified_ = false;
+    item_to_optimize.graph.Swap(output);
+    graph_ = &item_to_optimize.graph;
     *output = GraphDef();
-    node_count = graph_.node_size();
+    node_count = graph_->node_size();
     TF_RETURN_IF_ERROR(RunOptimizationPass(cluster, item_to_optimize, output));
-  } while (output->node_size() != node_count);
-
+  } while (graph_modified_ || output->node_size() != node_count);
   *output->mutable_library() = item.graph.library();
   *output->mutable_versions() = item.graph.versions();
 
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h
index f04f413c10a7e8e19520cc462f88b2a9a2d0fecd..6aadd9750893bd008b353e6227d82723166edd6e 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.h
+++ b/tensorflow/core/grappler/optimizers/constant_folding.h
@@ -51,16 +51,20 @@ class ConstantFolding : public GraphOptimizer {
                 const GraphDef& optimize_output, double result) override;
 
  private:
-  Status MaterializeShapes(const GrapplerItem& item,
-                           const GraphProperties& properties);
+  string OptimizedNodeName(const NodeDef& node, StringPiece suffix) const;
+  string OptimizedNodeName(const NodeDef& node) const;
+  bool OptimizedNodeExists(const NodeDef& node, StringPiece suffix) const;
+
+  bool IsReallyConstant(const NodeDef& node) const;
+
+  Status MaterializeShapes(const GraphProperties& properties);
 
   Status MaterializeBroadcastGradientArgs(const NodeDef& node,
                                           const GraphProperties& properties);
   Status MaterializeReductionIndices(NodeDef* node,
                                      const GraphProperties& properties);
 
-  Status MaterializeConstants(const GrapplerItem& item,
-                              const GraphProperties& properties);
+  Status MaterializeConstants(const GraphProperties& properties);
   bool IsFoldable(const NodeDef& node) const;
 
   Status EvaluateNode(const NodeDef& node,
@@ -72,12 +76,21 @@ class ConstantFolding : public GraphOptimizer {
 
   Status FoldNode(NodeDef* node, GraphDef* output_graph);
 
+  bool IsOnes(const NodeDef& node) const;
+  bool IsZeros(const NodeDef& node) const;
+  void ReplaceOperationWithIdentity(int input_to_forward, NodeDef* node,
+                                    GraphDef* graph);
+  Status ReplaceOperationWithConstant(double value,
+                                      const TensorShapeProto& shape,
+                                      NodeDef* node, GraphDef* graph);
+  void ReplaceDivisionOfOnesByReciprocal(NodeDef* node, GraphDef* graph);
   Status FoldGraph(GraphDef* output);
 
   bool IsSimplifiableReduction(const NodeDef& node) const;
   bool IsSimplifiableReshape(const NodeDef& node,
                              const GraphProperties& properties) const;
-  Status SimplifyGraph(GraphDef* output, const GraphProperties& properties);
+  Status SimplifyGraph(GraphDef* output, const GraphProperties& properties,
+                       bool use_shape_info);
 
   Status RunOptimizationPass(Cluster* cluster, const GrapplerItem& item,
                              GraphDef* output);
@@ -88,11 +101,13 @@ class ConstantFolding : public GraphOptimizer {
   std::unique_ptr<DeviceBase> owned_device_;
 
   std::unique_ptr<ResourceMgr> resource_mgr_;
-  GraphDef graph_;
+  GraphDef* graph_;
   std::unique_ptr<NodeMap> node_map_;
   std::unordered_set<string> nodes_to_preserve_;
   std::unordered_set<string> nodes_whitelist_;
+  std::unordered_set<string> feed_nodes_;
   bool has_fetch_;
+  bool graph_modified_;
 };
 
 }  // end namespace grappler
diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
index b2d9b02c68358fc3e22881bba60a34feb3d4211e..2db3dc699341ab3b582e6ee17b2611410cf27366 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
@@ -77,11 +77,483 @@ TEST_F(ConstantFoldingTest, SimpleFolding) {
   test::ExpectTensorEqual<float>(tensors_expected[0], tensors[0]);
 }
 
+TEST_F(ConstantFoldingTest, AddTree) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+
+  Output c1 = ops::Const(s.WithOpName("c1"), 2.0f, {1});
+  Output c2 = ops::Const(s.WithOpName("c2"), 2.0f, {2});
+  Output c4 = ops::Const(s.WithOpName("c4"), 4.0f, {2});
+  Output x = ops::Placeholder(s.WithOpName("x"), DT_FLOAT,
+                              ops::Placeholder::Shape(TensorShape({2, 2})));
+  Output add_child = ops::Add(s.WithOpName("add_child"), c2, x);
+  Output add_parent = ops::Add(s.WithOpName("add_parent"), c1, add_child);
+  Output mul_child = ops::Mul(s.WithOpName("mul_child"), c2, x);
+  Output mul_parent = ops::Mul(s.WithOpName("mul_parent"), c1, mul_child);
+  Output addmul_child = ops::Add(s.WithOpName("addmul_child"), c2, x);
+  Output addmul_parent =
+      ops::Mul(s.WithOpName("addmul_parent"), c1, addmul_child);
+
+  GrapplerItem item;
+  item.fetch = {"add_parent", "mul_parent", "addmul_parent"};
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+
+  ConstantFolding fold(nullptr /* cpu_device */);
+  GraphDef output;
+  Status status = fold.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+
+  EXPECT_EQ(9, output.node_size());
+
+  // We expect the following rewrite(s) to occur (for both Add and Mul):
+  //    +                +             +
+  //   / \              / \           / \
+  // 2.0   +     -->   x   +    -->  x  4.0
+  //      / \             / \
+  //    2.0  x          2.0 2.0
+
+  for (const auto& node : output.node()) {
+    if (node.name() == "add_child") {
+      EXPECT_EQ("Const", node.op());
+      TensorProto t = node.attr().at("value").tensor();
+      EXPECT_EQ(1, t.tensor_shape().dim_size());
+      EXPECT_EQ(2, t.tensor_shape().dim(0).size());
+    } else if (node.name() == "add_parent") {
+      EXPECT_EQ("Add", node.op());
+      EXPECT_EQ(2, node.input_size());
+      EXPECT_EQ("x", node.input(0));
+      EXPECT_EQ("add_child", node.input(1));
+    } else if (node.name() == "mul_child") {
+      EXPECT_EQ("Const", node.op());
+      TensorProto t = node.attr().at("value").tensor();
+      EXPECT_EQ(1, t.tensor_shape().dim_size());
+      EXPECT_EQ(2, t.tensor_shape().dim(0).size());
+    } else if (node.name() == "mul_parent") {
+      EXPECT_EQ("Mul", node.op());
+      EXPECT_EQ(2, node.input_size());
+      EXPECT_EQ("x", node.input(0));
+      EXPECT_EQ("mul_child", node.input(1));
+    } else if (node.name() == "addmul_child") {
+      // Unchanged.
+      EXPECT_EQ("Add", node.op());
+      EXPECT_EQ(2, node.input_size());
+      EXPECT_EQ("c2", node.input(0));
+      EXPECT_EQ("x", node.input(1));
+    }
+  }
+
+  // Check that the reciprocals have the expected value.
+  std::vector<string> fetch = {"c4"};
+  auto tensor_expected = EvaluateNodes(item.graph, fetch);
+  EXPECT_EQ(fetch.size(), tensor_expected.size());
+  fetch = {"add_child", "mul_child"};
+  auto tensors = EvaluateNodes(output, fetch);
+  EXPECT_EQ(fetch.size(), tensors.size());
+  for (int i = 0; i < fetch.size(); i++) {
+    test::ExpectTensorEqual<float>(tensor_expected[0], tensors[i]);
+  }
+}
+
+TEST_F(ConstantFoldingTest, NeutralElement) {
+  for (bool use_const : {true, false}) {
+    tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+    Output x = ops::Placeholder(s.WithOpName("x"), DT_FLOAT,
+                                ops::Placeholder::Shape(TensorShape({2, 2})));
+    Output y = ops::Placeholder(s.WithOpName("y"), DT_FLOAT,
+                                ops::Placeholder::Shape(TensorShape({2, 2})));
+    Output a = ops::Placeholder(s.WithOpName("a"), DT_FLOAT,
+                                ops::Placeholder::Shape(TensorShape({3, 2})));
+    Output b = ops::Placeholder(s.WithOpName("b"), DT_FLOAT,
+                                ops::Placeholder::Shape(TensorShape({2, 3})));
+    Output bias = ops::Placeholder(s.WithOpName("bias"), DT_FLOAT,
+                                   ops::Placeholder::Shape(TensorShape({2})));
+    Output zeros = !use_const ? ops::ZerosLike(s.WithOpName("zeros"), x)
+                              : ops::Const(s.WithOpName("zeros"), 0.0f, {2, 2});
+    Output zeros_1d = ops::Const(s.WithOpName("zeros_1d"), 0.0f, {2});
+    Output ones = !use_const ? ops::OnesLike(s.WithOpName("ones"), x)
+                             : ops::Const(s.WithOpName("ones"), 1.0f, {2, 2});
+    Output mul1 = ops::Mul(s.WithOpName("mul1"), x, zeros);
+    Output mul2 = ops::Mul(s.WithOpName("mul2"), zeros, y);
+    Output mul3 = ops::Mul(s.WithOpName("mul3"), x, ones);
+    Output mul4 = ops::Mul(s.WithOpName("mul4"), ones, y);
+    Output mul5 = ops::Mul(s.WithOpName("mul5"), x, zeros_1d);
+    Output mul6 = ops::Mul(s.WithOpName("mul6"), zeros_1d, y);
+    Output div1 = ops::Div(s.WithOpName("div1"), x, ones);
+    Output div2 = ops::Div(s.WithOpName("div2"), ones, y);
+    Output matmul1 = ops::MatMul(s.WithOpName("matmul1"), x, zeros);
+    Output matmul2 = ops::MatMul(s.WithOpName("matmul2"), zeros, y);
+    Output matmul3 = ops::MatMul(s.WithOpName("matmul3"), a, zeros);
+    Output matmul4 = ops::MatMul(s.WithOpName("matmul4"), zeros, b);
+    Output add1 = ops::Add(s.WithOpName("add1"), x, zeros);
+    Output add2 = ops::Add(s.WithOpName("add2"), zeros, y);
+    Output bias_add1 = ops::BiasAdd(s.WithOpName("bias_add1"), x, zeros_1d);
+    Output bias_add2 = ops::BiasAdd(s.WithOpName("bias_add2"), zeros, bias);
+    Output sub1 = ops::Sub(s.WithOpName("sub1"), x, zeros);
+    Output sub2 = ops::Sub(s.WithOpName("sub2"), zeros, y);
+    Output addn =
+        ops::AddN(s.WithOpName("addn"),
+                  {mul1, mul2, mul3, mul4, mul5, mul6, div1, div2, matmul1,
+                   matmul2, add1, add2, bias_add1, bias_add2, sub1, sub2});
+    GrapplerItem item;
+    TF_CHECK_OK(s.ToGraphDef(&item.graph));
+    item.fetch = {"addn", "matmul3", "matmul4"};
+
+    ConstantFolding optimizer(RewriterConfig::AGGRESSIVE,
+                              nullptr /* cpu_device */);
+    GraphDef output;
+    Status status = optimizer.Optimize(nullptr, item, &output);
+    TF_EXPECT_OK(status);
+
+    EXPECT_EQ(27, output.node_size());
+    for (int i = 0; i < output.node_size(); ++i) {
+      const NodeDef& node = output.node(i);
+      const string& name = node.name();
+      if (name == "mul1") {
+        EXPECT_EQ("Const", node.op());
+        EXPECT_EQ("^x", node.input(0));
+        EXPECT_EQ("^zeros", node.input(1));
+      } else if (name == "mul2") {
+        EXPECT_EQ("Const", node.op());
+        EXPECT_EQ("^zeros", node.input(0));
+        EXPECT_EQ("^y", node.input(1));
+      } else if (name == "mul3") {
+        EXPECT_EQ("Identity", node.op());
+        EXPECT_EQ("x", node.input(0));
+        EXPECT_EQ("^ones", node.input(1));
+      } else if (name == "mul4") {
+        EXPECT_EQ("Identity", node.op());
+        EXPECT_EQ("y", node.input(0));
+        EXPECT_EQ("^ones", node.input(1));
+      } else if (name == "mul5") {
+        EXPECT_EQ("Const", node.op());
+        EXPECT_EQ("^x", node.input(0));
+        EXPECT_EQ("^zeros_1d", node.input(1));
+      } else if (name == "mul6") {
+        EXPECT_EQ("Const", node.op());
+        EXPECT_EQ("^zeros_1d", node.input(0));
+        EXPECT_EQ("^y", node.input(1));
+      } else if (name == "div1") {
+        EXPECT_EQ("Identity", node.op());
+        EXPECT_EQ("x", node.input(0));
+        EXPECT_EQ("^ones", node.input(1));
+      } else if (name == "div2") {
+        EXPECT_EQ("Reciprocal", node.op());
+        EXPECT_EQ("y", node.input(0));
+        EXPECT_EQ("^ones", node.input(1));
+      } else if (name == "matmul1") {
+        EXPECT_EQ("Const", node.op());
+        EXPECT_EQ("^x", node.input(0));
+        EXPECT_EQ("^zeros", node.input(1));
+      } else if (name == "matmul2") {
+        EXPECT_EQ("Const", node.op());
+        EXPECT_EQ("^zeros", node.input(0));
+        EXPECT_EQ("^y", node.input(1));
+      } else if (name == "matmul3") {
+        EXPECT_EQ("Const", node.op());
+        EXPECT_EQ("^a", node.input(0));
+        EXPECT_EQ("^zeros", node.input(1));
+        TensorProto t = node.attr().at("value").tensor();
+        EXPECT_EQ(1, t.float_val_size());
+        EXPECT_EQ(0, t.float_val(0));
+        EXPECT_EQ(2, t.tensor_shape().dim_size());
+        EXPECT_EQ(3, t.tensor_shape().dim(0).size());
+        EXPECT_EQ(2, t.tensor_shape().dim(1).size());
+      } else if (name == "matmul4") {
+        EXPECT_EQ("Const", node.op());
+        EXPECT_EQ("^zeros", node.input(0));
+        EXPECT_EQ("^b", node.input(1));
+        TensorProto t = node.attr().at("value").tensor();
+        EXPECT_EQ(1, t.float_val_size());
+        EXPECT_EQ(0, t.float_val(0));
+        EXPECT_EQ(2, t.tensor_shape().dim_size());
+        EXPECT_EQ(2, t.tensor_shape().dim(0).size());
+        EXPECT_EQ(3, t.tensor_shape().dim(1).size());
+      } else if (name == "add1") {
+        EXPECT_EQ("Identity", node.op());
+        EXPECT_EQ("x", node.input(0));
+        EXPECT_EQ("^zeros", node.input(1));
+      } else if (name == "add2") {
+        EXPECT_EQ("Identity", node.op());
+        EXPECT_EQ("y", node.input(0));
+        EXPECT_EQ("^zeros", node.input(1));
+      } else if (name == "bias_add1") {
+        EXPECT_EQ("Identity", node.op());
+        EXPECT_EQ("x", node.input(0));
+        EXPECT_EQ("^zeros_1d", node.input(1));
+      } else if (name == "bias_add2") {
+        // We don't eliminate this one, because it requires broadcasting.
+        EXPECT_EQ("BiasAdd", node.op());
+        EXPECT_EQ("zeros", node.input(0));
+        EXPECT_EQ("bias", node.input(1));
+      } else if (name == "sub1") {
+        EXPECT_EQ("Identity", node.op());
+        EXPECT_EQ("x", node.input(0));
+        EXPECT_EQ("^zeros", node.input(1));
+      } else if (name == "sub2") {
+        // We don't handle this case yet.
+        EXPECT_EQ("Sub", node.op());
+        EXPECT_EQ("zeros", node.input(0));
+        EXPECT_EQ("y", node.input(1));
+      }
+      const std::set<string> square_zero_const{"mul1", "mul2",    "mul5",
+                                               "mul6", "matmul1", "matmul2"};
+      if (square_zero_const.count(name) > 0) {
+        TensorProto t = node.attr().at("value").tensor();
+        EXPECT_EQ(1, t.float_val_size());
+        EXPECT_EQ(0, t.float_val(0));
+        EXPECT_EQ(2, t.tensor_shape().dim_size());
+        EXPECT_EQ(2, t.tensor_shape().dim(0).size());
+        EXPECT_EQ(2, t.tensor_shape().dim(1).size());
+      }
+    }
+  }
+}
+
+TEST_F(ConstantFoldingTest, StrengthReduce_Reciprocal) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output cf_half = ops::Const(s.WithOpName("cf_half"), 0.5f, {1});
+  Output xf = ops::Placeholder(s.WithOpName("xf"), DT_FLOAT,
+                               ops::Placeholder::Shape(TensorShape({2, 2})));
+  Output xi = ops::Placeholder(s.WithOpName("xi"), DT_INT32,
+                               ops::Placeholder::Shape(TensorShape({2, 2})));
+  Output ci = ops::Const(s.WithOpName("ci"), 2, {1});
+  Output cf = ops::Const(s.WithOpName("cf"), 2.0f, {1});
+  Output div_i = ops::Div(s.WithOpName("div_i"), xi, ci);
+  Output div_f = ops::Div(s.WithOpName("div_f"), xf, cf);
+  Output realdiv = ops::RealDiv(s.WithOpName("realdiv"), xf, cf);
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  item.fetch = {"div_f", "div_i", "realdiv"};
+  ConstantFolding optimizer(RewriterConfig::AGGRESSIVE,
+                            nullptr /* cpu_device */);
+  GraphDef output;
+  Status status = optimizer.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+
+  EXPECT_EQ(8, output.node_size());
+  for (int i = 0; i < output.node_size(); ++i) {
+    const NodeDef& node = output.node(i);
+    const string& name = node.name();
+    if (name == "div_i") {
+      // Integer division is unchanged.
+      EXPECT_EQ("Div", node.op());
+      EXPECT_EQ("xi", node.input(0));
+      EXPECT_EQ("ci", node.input(1));
+    } else if (name == "div_f") {
+      EXPECT_EQ("Mul", node.op());
+      EXPECT_EQ("xf", node.input(0));
+      EXPECT_EQ("ConstantFolding/div_f_recip", node.input(1));
+    } else if (name == "realdiv") {
+      EXPECT_EQ("Mul", node.op());
+      EXPECT_EQ("xf", node.input(0));
+      EXPECT_EQ("ConstantFolding/realdiv_recip", node.input(1));
+    } else if (name == "ConstantFolding/div_f_recip") {
+      EXPECT_EQ("Const", node.op());
+      EXPECT_EQ(DT_FLOAT, node.attr().at("dtype").type());
+      TensorProto t = node.attr().at("value").tensor();
+      EXPECT_EQ(DT_FLOAT, t.dtype());
+      EXPECT_EQ(1, t.tensor_shape().dim_size());
+      EXPECT_EQ(1, t.tensor_shape().dim(0).size());
+    } else if (name == "ConstantFolding/realdiv_recip") {
+      EXPECT_EQ("Const", node.op());
+      EXPECT_EQ(DT_FLOAT, node.attr().at("dtype").type());
+      TensorProto t = node.attr().at("value").tensor();
+      EXPECT_EQ(DT_FLOAT, t.dtype());
+      EXPECT_EQ(1, t.tensor_shape().dim_size());
+      EXPECT_EQ(1, t.tensor_shape().dim(0).size());
+    }
+  }
+
+  // Check that the reciprocals have the expected value.
+  std::vector<string> fetch = {"cf_half"};
+  auto tensor_expected = EvaluateNodes(item.graph, fetch);
+  EXPECT_EQ(fetch.size(), tensor_expected.size());
+  fetch = {"ConstantFolding/div_f_recip", "ConstantFolding/realdiv_recip"};
+  auto tensors = EvaluateNodes(output, fetch);
+  EXPECT_EQ(fetch.size(), tensors.size());
+  for (int i = 0; i < fetch.size(); i++) {
+    test::ExpectTensorEqual<float>(tensor_expected[0], tensors[i]);
+  }
+}
+
+TEST_F(ConstantFoldingTest, NeutralElement_PartialShape_UnknownOutputShape) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output x_known =
+      ops::Placeholder(s.WithOpName("x_known"), DT_FLOAT,
+                       ops::Placeholder::Shape(TensorShape({2, 2})));
+  Output x_partially_known =
+      ops::Placeholder(s.WithOpName("x_partially_unknown"), DT_FLOAT,
+                       ops::Placeholder::Shape(PartialTensorShape({-1, -1})));
+  Output x_unknown = ops::Placeholder(s.WithOpName("x_unknown"), DT_FLOAT);
+  Output zeros_known = ops::ZerosLike(s.WithOpName("zeros_known"), x_known);
+  Output zeros_partially_known =
+      ops::ZerosLike(s.WithOpName("zeros_partially_known"), x_partially_known);
+  Output zeros_unknown =
+      ops::ZerosLike(s.WithOpName("zeros_unknown"), x_unknown);
+
+  // Multiplies without any additional ops to supply the output shape.
+  int count = 0;
+  std::vector<Output> muls;
+  std::unordered_set<string> not_converted;
+  std::unordered_set<string> to_const;
+  std::unordered_set<string> to_identity;
+  for (const auto* x : {&x_known, &x_partially_known, &x_unknown}) {
+    for (const auto* zeros :
+         {&zeros_known, &zeros_partially_known, &zeros_unknown}) {
+      const string name = strings::StrCat("mul_", count++);
+      muls.push_back(ops::Mul(s.WithOpName(name), *x, *zeros));
+      if (x == &x_partially_known && zeros == &zeros_partially_known) {
+        to_identity.insert(name);
+      } else if (x == &x_unknown || zeros == &zeros_unknown) {
+        not_converted.insert(name);
+      } else {
+        to_const.insert(name);
+      }
+    }
+  }
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+
+  ConstantFolding optimizer(RewriterConfig::AGGRESSIVE,
+                            nullptr /* cpu_device */);
+  GraphDef output;
+  Status status = optimizer.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+  LOG(INFO) << output.DebugString();
+
+  EXPECT_EQ(15, output.node_size());
+  for (int i = 0; i < output.node_size(); ++i) {
+    const NodeDef& node = output.node(i);
+    const string& name = node.name();
+    if (to_const.count(name) > 0) {
+      EXPECT_EQ("Const", node.op()) << node.name();
+    } else if (to_identity.count(name) > 0) {
+      EXPECT_EQ("Identity", node.op()) << node.name();
+    } else if (not_converted.count(name) > 0) {
+      EXPECT_EQ("Mul", node.op()) << node.name();
+    }
+  }
+}
+
+TEST_F(ConstantFoldingTest, NeutralElement_PartialShape_KnownOutputShape) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output known_shape = ops::Const(s.WithOpName("known_shape"), 0.0f, {2, 2});
+  Output x_partially_known =
+      ops::Placeholder(s.WithOpName("x_partially_unknown"), DT_FLOAT,
+                       ops::Placeholder::Shape(PartialTensorShape({-1, -1})));
+  Output x_unknown = ops::Placeholder(s.WithOpName("x_unknown"), DT_FLOAT);
+  Output zeros_partially_known =
+      ops::ZerosLike(s.WithOpName("zeros_partially_known"), x_partially_known);
+  Output zeros_unknown =
+      ops::ZerosLike(s.WithOpName("zeros_unknown"), x_unknown);
+
+  // If at least one of the inputs to AddN has a known shape, shape inference
+  // will propagate the shape back to the inputs of AddN, making the
+  // output shapes of all its inputs known
+  std::vector<Output> muls_deduced_output_shape;
+  std::unordered_set<string> to_const;
+  int count = 0;
+  for (const auto& x : {x_partially_known, x_unknown}) {
+    for (const auto& zeros : {zeros_partially_known, zeros_unknown}) {
+      const string name = strings::StrCat("mul_", count++);
+      muls_deduced_output_shape.push_back(
+          ops::Mul(s.WithOpName(name), x, zeros));
+      to_const.insert(name);
+    }
+  }
+  // We add a known shape as input to AddN to propagate it back to the
+  // multiplies above, which means they can all be turned into Const nodes.
+  muls_deduced_output_shape.push_back(known_shape);
+  Output addn1 = ops::AddN(s.WithOpName("addn1"), muls_deduced_output_shape);
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+
+  ConstantFolding optimizer(RewriterConfig::AGGRESSIVE,
+                            nullptr /* cpu_device */);
+  GraphDef output;
+  Status status = optimizer.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+  LOG(INFO) << output.DebugString();
+
+  EXPECT_EQ(10, output.node_size());
+  for (int i = 0; i < output.node_size(); ++i) {
+    const NodeDef& node = output.node(i);
+    const string& name = node.name();
+    if (to_const.count(name) > 0) {
+      EXPECT_EQ("Const", node.op()) << node.name();
+      EXPECT_EQ(2, node.input_size());
+      EXPECT_TRUE(IsControlInput(node.input(0)));
+      EXPECT_TRUE(IsControlInput(node.input(1)));
+    }
+  }
+}
+
+TEST_F(ConstantFoldingTest, CreateConstNodes) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+
+#define MAKE_TEST_GRAPH(TYPE)                                               \
+  Output TYPE##_const =                                                     \
+      ops::Const(s.WithOpName(#TYPE "_const"), static_cast<TYPE>(10), {5}); \
+  Output TYPE##_mul =                                                       \
+      ops::Mul(s.WithOpName(#TYPE "_mul"), TYPE##_const, TYPE##_const);     \
+  Output TYPE##_id = ops::Identity(s.WithOpName(#TYPE "_id"), TYPE##_mul)
+
+  MAKE_TEST_GRAPH(float);
+  MAKE_TEST_GRAPH(double);
+  MAKE_TEST_GRAPH(int64);
+  MAKE_TEST_GRAPH(int32);
+  MAKE_TEST_GRAPH(int16);
+  MAKE_TEST_GRAPH(int8);
+  MAKE_TEST_GRAPH(uint8);
+#undef MAKE_TEST_GRAPH
+
+  Output bool_const = ops::Const(s.WithOpName("bool_const"), true, {5});
+  Output bool_and =
+      ops::LogicalAnd(s.WithOpName("bool_and"), bool_const, bool_const);
+  Output bool_id = ops::Identity(s.WithOpName("bool_id"), bool_and);
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  ConstantFolding fold(nullptr /* cpu_device */);
+  GraphDef output;
+  Status status = fold.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+
+  EXPECT_EQ(24, output.node_size());
+  for (const NodeDef& node : output.node()) {
+#define CHECK_RESULT(TYPE, FIELD)                                             \
+  if (node.name() == #TYPE "_mul") {                                          \
+    EXPECT_EQ(5,                                                              \
+              node.attr().at("value").tensor().tensor_shape().dim(0).size()); \
+    EXPECT_EQ(1, node.attr().at("value").tensor().FIELD##_val_size());        \
+    EXPECT_EQ(10 * 10, node.attr().at("value").tensor().FIELD##_val(0));      \
+  }
+
+    CHECK_RESULT(float, float);
+    CHECK_RESULT(double, double);
+    CHECK_RESULT(int64, int64);
+    CHECK_RESULT(int32, int);
+    CHECK_RESULT(int16, int);
+    CHECK_RESULT(int8, int);
+    CHECK_RESULT(uint8, int);
+#undef CHECK_RESULT
+
+    if (node.name() == "bool_and") {
+      EXPECT_EQ(5,
+                node.attr().at("value").tensor().tensor_shape().dim(0).size());
+      EXPECT_EQ(1, node.attr().at("value").tensor().bool_val_size());
+      EXPECT_EQ(true && true, node.attr().at("value").tensor().bool_val(0));
+    }
+  }
+}
+
 TEST_F(ConstantFoldingTest, FoldingNodeWithTwoOutputs) {
   // Build a simple graph with a few trivially prunable ops.
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
 
-  Output a = ops::Const(s.WithOpName("a"), 10, {3});
+  Output a = ops::Const(s.WithOpName("a"), 10, {5});
   auto b = ops::Unique(s.WithOpName("b"), {a});
   Output c = ops::Identity(s.WithOpName("c"), {b.y});
   Output d = ops::Identity(s.WithOpName("d"), {b.idx});
@@ -445,8 +917,10 @@ TEST_F(ConstantFoldingTest, ShapeMaterializationShapeN) {
   TF_EXPECT_OK(status);
   int found = 0;
   for (const auto& node : output.node()) {
-    EXPECT_NE(AddPrefixToNodeName("s-0", kConstantFoldingConst), node.name());
-    EXPECT_NE(AddPrefixToNodeName("s-1", kConstantFoldingConst), node.name());
+    EXPECT_NE(AddPrefixToNodeName("s-matshapes-0", kConstantFoldingConst),
+              node.name());
+    EXPECT_NE(AddPrefixToNodeName("s-matshapes-1", kConstantFoldingConst),
+              node.name());
     if (node.name() == "i1a" || node.name() == "i1b") {
       ++found;
       EXPECT_EQ("s", node.input(0));
@@ -457,7 +931,7 @@ TEST_F(ConstantFoldingTest, ShapeMaterializationShapeN) {
     }
     if (node.name() == "i3a" || node.name() == "i3b") {
       ++found;
-      EXPECT_EQ(AddPrefixToNodeName("s-2", kConstantFoldingConst),
+      EXPECT_EQ(AddPrefixToNodeName("s-matshapes-2", kConstantFoldingConst),
                 node.input(0));
     }
     if (node.name() == "s") {
@@ -467,7 +941,8 @@ TEST_F(ConstantFoldingTest, ShapeMaterializationShapeN) {
       EXPECT_EQ("v2", node.input(1));
       EXPECT_EQ("v3", node.input(2));
     }
-    if (node.name() == AddPrefixToNodeName("s-2", kConstantFoldingConst)) {
+    if (node.name() ==
+        AddPrefixToNodeName("s-matshapes-2", kConstantFoldingConst)) {
       ++found;
       EXPECT_EQ("Const", node.op());
       EXPECT_EQ("^s", node.input(0));
@@ -735,7 +1210,7 @@ TEST_F(ConstantFoldingTest, NoOpReduction) {
       EXPECT_EQ("Identity", node.op());
       EXPECT_EQ(2, node.input_size());
       EXPECT_EQ("v", node.input(0));
-      EXPECT_EQ("^v", node.input(1));
+      EXPECT_EQ("^i", node.input(1));
     }
   }
   EXPECT_TRUE(found);
@@ -794,20 +1269,20 @@ TEST_F(ConstantFoldingTest, NoOpReshape) {
       EXPECT_EQ("Identity", node.op());
       ASSERT_EQ(3, node.input_size());
       EXPECT_EQ("v1", node.input(0));
-      EXPECT_EQ("^d1", node.input(1));
-      EXPECT_EQ("^v1", node.input(2));
+      EXPECT_EQ("^i1", node.input(1));
+      EXPECT_EQ("^d1", node.input(2));
     } else if (node.name() == "r3") {
       ++found;
       EXPECT_EQ("Identity", node.op());
       ASSERT_EQ(2, node.input_size());
       EXPECT_EQ("v3", node.input(0));
-      EXPECT_EQ("^v3", node.input(1));
+      EXPECT_EQ("^i3", node.input(1));
     } else if (node.name() == "r4") {
       ++found;
       EXPECT_EQ("Identity", node.op());
       ASSERT_EQ(2, node.input_size());
       EXPECT_EQ("v4", node.input(0));
-      EXPECT_EQ("^v4", node.input(1));
+      EXPECT_EQ("^i4", node.input(1));
     } else if (node.name() == "r2") {
       ++found;
       EXPECT_EQ("Reshape", node.op());
@@ -879,19 +1354,19 @@ TEST_F(ConstantFoldingTest, MaterializeBroadcastGradientArgs) {
     if (node.name() == "o1") {
       ++found;
       EXPECT_EQ(1, node.input_size());
-      EXPECT_EQ("ConstantFolding/f-0", node.input(0));
+      EXPECT_EQ("ConstantFolding/f-bcastargs-0", node.input(0));
     } else if (node.name() == "o2") {
       ++found;
       EXPECT_EQ(1, node.input_size());
-      EXPECT_EQ("ConstantFolding/f-1", node.input(0));
-    } else if (node.name() == "ConstantFolding/f-0") {
+      EXPECT_EQ("ConstantFolding/f-bcastargs-1", node.input(0));
+    } else if (node.name() == "ConstantFolding/f-bcastargs-0") {
       ++found;
       EXPECT_EQ("Const", node.op());
       EXPECT_EQ(1, node.input_size());
       EXPECT_EQ("^f", node.input(0));
       EXPECT_EQ(0, TensorShape(node.attr().at("value").tensor().tensor_shape())
                        .num_elements());
-    } else if (node.name() == "ConstantFolding/f-1") {
+    } else if (node.name() == "ConstantFolding/f-bcastargs-1") {
       ++found;
       EXPECT_EQ("Const", node.op());
       EXPECT_EQ(1, node.input_size());
@@ -901,21 +1376,14 @@ TEST_F(ConstantFoldingTest, MaterializeBroadcastGradientArgs) {
     } else if (node.name() == "p1") {
       ++found;
       EXPECT_EQ(1, node.input_size());
-      EXPECT_EQ("ConstantFolding/i-0", node.input(0));
+      EXPECT_EQ("i", node.input(0));
     } else if (node.name() == "p2") {
       ++found;
       EXPECT_EQ(1, node.input_size());
       EXPECT_EQ("i:1", node.input(0));
-    } else if (node.name() == "ConstantFolding/i-0") {
-      ++found;
-      EXPECT_EQ("Const", node.op());
-      EXPECT_EQ(1, node.input_size());
-      EXPECT_EQ("^i", node.input(0));
-      EXPECT_EQ(0, TensorShape(node.attr().at("value").tensor().tensor_shape())
-                       .num_elements());
     }
   }
-  EXPECT_EQ(7, found);
+  EXPECT_EQ(6, found);
 }
 
 TEST_F(ConstantFoldingTest, MaterializeReductionIndices) {
@@ -963,3 +1431,5 @@ TEST_F(ConstantFoldingTest, MaterializeReductionIndices) {
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
+
+//  LocalWords:  NewRootScope
diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
index bd8a58d81452fbe93e6dfa8e67c939b84803bdac..1f68ecbade9147b652ac970aa1c5ec4b056209c7 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/grappler/optimizers/dependency_optimizer.h"
 
+#include <unordered_map>
 #include <unordered_set>
 
 #include "tensorflow/core/framework/node_def.pb.h"
@@ -23,8 +24,10 @@ limitations under the License.
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/op_types.h"
 #include "tensorflow/core/grappler/optimizers/constant_folding.h"
+#include "tensorflow/core/grappler/utils/topological_sort.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/util/device_name_utils.h"
 
@@ -40,7 +43,7 @@ int RemoveInput(NodeDef* node, const string& input, NodeMap* node_map) {
     if (node->input(pos) == input) {
       node->mutable_input()->SwapElements(pos, node->input_size() - 1);
       node->mutable_input()->RemoveLast();
-      node_map->RemoveOutput(node->name(), NodeName(input));
+      node_map->RemoveOutput(NodeName(input), node->name());
     } else {
       ++pos;
     }
@@ -49,7 +52,7 @@ int RemoveInput(NodeDef* node, const string& input, NodeMap* node_map) {
   return num_removed;
 }
 
-// Remove dulicate control inputs.
+// Remove duplicate control inputs.
 void PruneControlInputs(NodeDef* node) {
   std::unordered_set<string> inputs;
   int pos = 0;
@@ -77,6 +80,7 @@ bool DependencyOptimizer::SafeToConvertToNoOp(const NodeDef& node) {
     return false;
   }
   if (!fetch_nodes_known_ || NumNonControlOutputs(node, *node_map_) > 0) {
+    // The output values of this node may be needed.
     return false;
   }
   if (IsMerge(node) || IsSwitch(node)) {
@@ -88,6 +92,9 @@ bool DependencyOptimizer::SafeToConvertToNoOp(const NodeDef& node) {
   if (!IsFreeOfSideEffect(node)) {
     return false;
   }
+  if (node.op() == "ControlTrigger") {
+    return false;
+  }
   if (node.op().rfind("Submodel", 0) == 0) {
     return false;
   }
@@ -97,17 +104,68 @@ bool DependencyOptimizer::SafeToConvertToNoOp(const NodeDef& node) {
     return false;
   }
 
-  // TODO(rmlarsen): We have to skip Identity nodes to make an obsolete test in
-  // python/training/session_manager_test.py pass. See if we can fix or get rid
-  // of that test.
+  // Don't turn Identity nodes inserted by Grappler after Switch into NoOp,
+  // since we cannot anchor control dependencies on Switch nodes.
+  // Don't remove Identity nodes corresponding to Variable reads.
+  if (IsIdentity(node)) {
+    const NodeDef* input = node_map_->GetNode(NodeName(node.input(0)));
+    if (input != nullptr) {
+      if (IsVariable(*input) ||
+          (StringPiece(node.name()).starts_with(kConstantFoldingCtrl) &&
+           IsSwitch(*input))) {
+        return false;
+      }
+    }
+  }
+
   const std::unordered_set<string> do_not_rewrite_ops{
-      "Assert", "CheckNumerics",         "Identity",    "_Retval",
-      "_Arg",   "_ParallelConcatUpdate", "_TPUExecute", "_TPUCompile"};
+      "Assert",     "CheckNumerics",         "_Retval",
+      "_Arg",       "_ParallelConcatUpdate", "_TPUExecute",
+      "_TPUCompile"};
   return do_not_rewrite_ops.find(node.op()) == do_not_rewrite_ops.end();
 }
 
-string DependencyOptimizer::TryOptimizeDependencies(
-    NodeDef* node, SetVector<NodeDef*>* nodes_to_simplify) {
+void DependencyOptimizer::OptimizeNode(int node_idx,
+                                       SetVector<int>* nodes_to_simplify,
+                                       std::set<int>* nodes_to_delete) {
+  NodeDef* node = optimized_graph_->mutable_node(node_idx);
+
+  // Constant nodes with no input control dependency are always executed early,
+  // so we can prune all their output control dependencies.
+  if (IsConstant(*node) && node->input_size() == 0) {
+    const std::set<NodeDef*> output_nodes = node_map_->GetOutputs(node->name());
+    for (NodeDef* fanout : output_nodes) {
+      bool optimize_fanout = false;
+      bool data_connection = false;
+      for (int i = fanout->input_size() - 1; i >= 0; --i) {
+        int pos;
+        string input_name = ParseNodeName(fanout->input(i), &pos);
+        if (input_name == node->name()) {
+          if (pos < 0) {
+            fanout->mutable_input()->SwapElements(i, fanout->input_size() - 1);
+            fanout->mutable_input()->RemoveLast();
+            optimize_fanout = true;
+          } else {
+            data_connection = true;
+          }
+        }
+      }
+      if (optimize_fanout) {
+        nodes_to_simplify->PushBack(node_to_idx_[fanout]);
+        if (!data_connection) {
+          node_map_->RemoveOutput(node->name(), fanout->name());
+        }
+      }
+    }
+    if (node_map_->GetOutputs(node->name()).empty() && fetch_nodes_known_ &&
+        nodes_to_preserve_.find(node->name()) == nodes_to_preserve_.end()) {
+      // Mark the node for deletion.
+      nodes_to_delete->insert(node_to_idx_[node]);
+    }
+
+    return;
+  }
+
   // Change ops that only have control dependencies as outputs to NoOps.
   if (node->op() != "NoOp" && SafeToConvertToNoOp(*node)) {
     VLOG(1) << "***** Replacing  " << node->name() << " (" << node->op()
@@ -117,7 +175,7 @@ string DependencyOptimizer::TryOptimizeDependencies(
     std::unordered_set<string> ctrl_inputs;
     int pos = 0;
     while (pos < node->input_size()) {
-      const string& old_input = node->input(pos);
+      const string old_input = node->input(pos);
       if (IsControlInput(old_input)) {
         if (!ctrl_inputs.insert(old_input).second) {
           // We found a duplicate control input. Remove it.
@@ -133,19 +191,18 @@ string DependencyOptimizer::TryOptimizeDependencies(
       if (ctrl_inputs.insert(ctrl_input).second) {
         node->set_input(pos, ctrl_input);
         node_map_->UpdateInput(node->name(), old_input, ctrl_input);
-        auto old_input_node = node_map_->GetNode(old_input);
-        nodes_to_simplify->PushBack(old_input_node);
+        const NodeDef* old_input_node = node_map_->GetNode(old_input);
+        nodes_to_simplify->PushBack(node_to_idx_[old_input_node]);
       }
       ++pos;
     }
     node->set_op("NoOp");
     node->clear_attr();
-    nodes_to_simplify->PushBack(node);
-    return "";
   }
 
-  // Remove NoOp nodes if their fan-in or fan-out is less than 2.
-  // The non-trivial rewrites take the following form:
+  // Remove NoOp nodes if the product of their fan-in and fan-out is less than
+  // or equal to the sum of the fan-in and fan-out. The non-trivial rewrites
+  // take the following form:
   //
   // Case a)
   //    x --^> +------+                x --^> +---+
@@ -158,20 +215,60 @@ string DependencyOptimizer::TryOptimizeDependencies(
   //    x --^> | NoOp | --^> b  ==>    | x | --^> b
   //           |      | ...            |   | ...
   //           +------+ --^> c         +---+ --^> c
-  if (node->op() == "NoOp" &&
-      nodes_to_preserve_.find(node->name()) == nodes_to_preserve_.end()) {
-    const auto output_nodes = node_map_->GetOutputs(node->name());
+  // Case c)
+  //           +------+                x ---^> a
+  //    x --^> | NoOp | --^> a  ==>      \/
+  //    y --^> |      | --^> b           /\
+  //           +------+                y ---^> b
+  //
+  // We only apply this optimization if we don't increase the number of control
+  // edges across device boundaries, e.g. in cases a) and b) if NoOp and
+  // a and x, respectively, are on the same device. Control edges across device
+  // boundaries require inter-device communication (Send/Recv pairs to be
+  // inserted in the graph), which is very costly.
+
+  if (node->op() == "NoOp") {
+    const auto& output_node_set = node_map_->GetOutputs(node->name());
+    const std::vector<NodeDef*> output_nodes(output_node_set.begin(),
+                                             output_node_set.end());
     const int num_outputs = output_nodes.size();
     const int num_inputs = node->input_size();
-    if (num_inputs > 1 && num_outputs > 1) {
-      return "";
+
+    if (num_inputs * num_outputs > num_inputs + num_outputs) {
+      return;
     }
-    VLOG(1) << "***** Rerouting input around  " << node->name();
+    VLOG(1) << "***** Rerouting input around " << node->name();
     std::vector<NodeDef*> input_nodes;
     for (int i = 0; i < num_inputs; ++i) {
       NodeDef* tmp = node_map_->GetNode(node->input(i));
-      if (tmp != nullptr) {
-        input_nodes.push_back(tmp);
+      CHECK_NE(tmp, nullptr);
+      input_nodes.push_back(tmp);
+    }
+
+    // Make sure that we don't increase the number of control edges that cross
+    // device boundaries.
+    if ((num_inputs == 1 && num_outputs > 1 &&
+         input_nodes[0]->device() != node->device()) ||
+        (num_inputs > 1 && num_outputs == 1 &&
+         output_nodes[0]->device() != node->device())) {
+      return;
+    }
+    if (num_inputs == 2 && num_outputs == 2) {
+      const string& noop_dev = node->device();
+      const string& in0_dev = input_nodes[0]->device();
+      const string& in1_dev = input_nodes[1]->device();
+      const string& out0_dev = output_nodes[0]->device();
+      const string& out1_dev = output_nodes[1]->device();
+      const int num_cross_before = static_cast<int>(in0_dev != noop_dev) +
+                                   static_cast<int>(in1_dev != noop_dev) +
+                                   static_cast<int>(out0_dev != noop_dev) +
+                                   static_cast<int>(out1_dev != noop_dev);
+      const int num_cross_after = static_cast<int>(in0_dev != out0_dev) +
+                                  static_cast<int>(in0_dev != out1_dev) +
+                                  static_cast<int>(in1_dev != out0_dev) +
+                                  static_cast<int>(in1_dev != out1_dev);
+      if (num_cross_after > num_cross_before) {
+        return;
       }
     }
     for (auto consumer : output_nodes) {
@@ -179,14 +276,14 @@ string DependencyOptimizer::TryOptimizeDependencies(
       VLOG(1) << "***** Considering consumer  " << consumer->name() << "\n"
               << consumer->DebugString();
       for (int i = 0; i < num_inputs; ++i) {
-        const string& input = node->input(i);
+        const NodeDef* input = input_nodes[i];
         // Forward dependency from input to consumer if it doesn't already
         // depend on it.
-        if (node_map_->GetOutputs(NodeName(input)).count(consumer) == 0) {
-          consumer->add_input(input);
+        if (node_map_->GetOutputs(input->name()).count(consumer) == 0) {
+          consumer->add_input(AsControlDependency(input->name()));
           updated_consumer = true;
-          node_map_->AddOutput(NodeName(input), consumer->name());
-          nodes_to_simplify->PushBack(input_nodes[i]);
+          node_map_->AddOutput(input->name(), consumer->name());
+          nodes_to_simplify->PushBack(node_to_idx_[input]);
         }
       }
       // Remove dependency on node from consumer.
@@ -195,83 +292,213 @@ string DependencyOptimizer::TryOptimizeDependencies(
       if (updated_consumer) {
         VLOG(1) << "***** Updated consumer  " << consumer->name() << " ("
                 << consumer->op() << ")";
-        nodes_to_simplify->PushBack(consumer);
+        nodes_to_simplify->PushBack(node_to_idx_[consumer]);
       }
     }
 
-    // Clear all (control) inputs to this NoOp node.
-    if (fetch_nodes_known_) {
+    node_map_->RemoveOutputs(node->name());
+    if (fetch_nodes_known_ &&
+        nodes_to_preserve_.find(node->name()) == nodes_to_preserve_.end()) {
+      // Mark the node for deletion.
+      nodes_to_delete->insert(node_idx);
+
+      // Unconnect the node from its inputs to enable further optimizations.
       node_map_->RemoveInputs(node->name());
       node->clear_input();
     }
   }
+}
 
-  return "";
+void DependencyOptimizer::CleanControlInputs() {
+  for (int i = 0; i < optimized_graph_->node_size(); ++i) {
+    PruneControlInputs(optimized_graph_->mutable_node(i));
+  }
+}
+
+void DependencyOptimizer::DeleteNodes(const std::set<int>& nodes_to_delete) {
+  int last = optimized_graph_->node_size() - 1;
+  for (auto it = nodes_to_delete.rbegin(); it != nodes_to_delete.rend(); ++it) {
+    const int index = *it;
+    optimized_graph_->mutable_node()->SwapElements(index, last);
+    last--;
+  }
+  optimized_graph_->mutable_node()->DeleteSubrange(last + 1,
+                                                   nodes_to_delete.size());
+  // Rebuild the NodeMap which was invalidated by the node swapping above.
+  node_map_.reset(new NodeMap(optimized_graph_));
+  BuildNodeToIdx();
 }
 
 Status DependencyOptimizer::OptimizeDependencies() {
-  // TODO(rmlarsen,bsteiner): The following code is similar to the control loop
-  // in the ArithmeticOptimizer. Dedup this.
-  SetVector<NodeDef*> nodes_to_simplify;
+  SetVector<int> nodes_to_simplify;
+  std::set<int> nodes_to_delete;
   for (int i = 0; i < optimized_graph_->node_size(); ++i) {
-    NodeDef* node = optimized_graph_->mutable_node(i);
-    if (node->op() == "NoOp" || SafeToConvertToNoOp(*node)) {
-      PruneControlInputs(node);
-      nodes_to_simplify.PushBack(node);
+    const NodeDef& node = optimized_graph_->node(i);
+    if (node.op() == "NoOp" || IsConstant(node) || SafeToConvertToNoOp(node)) {
+      nodes_to_simplify.PushBack(i);
     }
   }
   while (!nodes_to_simplify.Empty()) {
-    NodeDef* node = nodes_to_simplify.PopBack();
-    const string simplified_tensor =
-        TryOptimizeDependencies(node, &nodes_to_simplify);
-    if (!simplified_tensor.empty() &&
-        NodeName(simplified_tensor) != node->name()) {
-      // Always consider simplified_tensor for further optimizations.
-      NodeDef* simplified_node = node_map_->GetNode(simplified_tensor);
-      if (simplified_node != nullptr) {
-        nodes_to_simplify.PushBack(simplified_node);
+    OptimizeNode(nodes_to_simplify.PopBack(), &nodes_to_simplify,
+                 &nodes_to_delete);
+  }
+
+  if (fetch_nodes_known_) {
+    VLOG(1) << "Deleted " << nodes_to_delete.size() << " out of "
+            << optimized_graph_->node_size() << " nodes.";
+    DeleteNodes(nodes_to_delete);
+  }
+  return Status::OK();
+}
+
+Status DependencyOptimizer::TransitiveReduction() {
+  // PRECONDITION: optimized_graph_ must be sorted topologically.
+  const int num_nodes = optimized_graph_->node_size();
+  // Set up a compressed version of the graph to save a constant factor in the
+  // expensive algorithm below. Also cache the set of control outputs and the
+  // highest index of a target of any control output from each node.
+  int num_controls = 0;
+  std::vector<gtl::InlinedVector<int, 4>> inputs(num_nodes);
+  std::vector<gtl::InlinedVector<std::pair<int, int>, 2>> control_outputs(
+      num_nodes);
+  for (int node_idx = 0; node_idx < num_nodes; ++node_idx) {
+    const NodeDef& node = optimized_graph_->node(node_idx);
+    if (ModifiesFrameInfo(node) || !HasOpDef(node)) {
+      // Ignore function nodes and nodes that modify frame info.
+      continue;
+    }
+    for (int input_slot = 0; input_slot < node.input_size(); ++input_slot) {
+      const string& input = node.input(input_slot);
+      const NodeDef* input_node = node_map_->GetNode(input);
+      if (ModifiesFrameInfo(*input_node) || IsMerge(*input_node)) {
+        // Ignore edges from nodes that modify frame info and from Merge nodes,
+        // because we cannot know which of it's input paths executes.
+        continue;
+      }
+      const int input_node_idx = node_to_idx_[input_node];
+      inputs[node_idx].push_back(input_node_idx);
+      if (IsControlInput(input)) {
+        ++num_controls;
+        control_outputs[input_node_idx].emplace_back(node_idx, input_slot);
       }
-      // When `node` is simplifed to another node rather than in-place, the
-      // consumers of `node` are already redirected to `simplified_tensor`.
-      // Re-push the consumers into `nodes_to_simplify` for further
-      // optimizations.
-      std::set<NodeDef*> consumers = node_map_->GetOutputs(node->name());
-      for (NodeDef* consumer : consumers) {
-        // Update `consumer`'s use of `node` to `input`'s operand.
-        for (int i = 0; i < consumer->input_size(); ++i) {
-          int operand_pos;
-          string operand_node_name =
-              ParseNodeName(consumer->input(i), &operand_pos);
-          if (operand_node_name == node->name()) {
-            *consumer->mutable_input(i) =
-                (operand_pos < 0
-                     ? AsControlDependency(NodeName(simplified_tensor))
-                     : simplified_tensor);
+    }
+  }
+
+  // Run the longest path in DAG algorithm for each source node that has control
+  // outputs. If, for any target node of a control output, there exists a path
+  // of length > 1, we can drop that control dependency.
+  int num_controls_removed = 0;
+  std::vector<int> longest_distance(num_nodes);
+  // Map from target_index -> set of (input_slot, source_index), representing
+  // the control edges to remove. We sort them in reverse order by input slot,
+  // such that when we swap them out so we don't clobber the
+  // node(target).input() repeated field.
+  typedef std::pair<int, int> InputSlotAndSource;
+  std::unordered_map<
+      int, std::set<InputSlotAndSource, std::greater<InputSlotAndSource>>>
+      control_edges_to_remove;
+  for (int source = 0; source < num_nodes; ++source) {
+    int highest_control_target = -1;
+    for (const auto& control_output : control_outputs[source]) {
+      if (control_output.first > highest_control_target) {
+        highest_control_target = control_output.first;
+      }
+    }
+    if (highest_control_target <= source) {
+      continue;
+    }
+    std::fill(longest_distance.begin() + source,
+              longest_distance.begin() + highest_control_target + 1, 0);
+    for (int target = source + 1; target <= highest_control_target; ++target) {
+      for (int input : inputs[target]) {
+        // If the input node is before source in the topo order, no path
+        // source -> input -> target can exits and we can skip it.
+        // Also only extend a path from the source itself or from nodes that
+        // have a path from source, indicated by longest_distance[input] > 0.
+        if (input == source ||
+            (input > source && longest_distance[input] > 0)) {
+          // If source -> input -> target is longer than the longest
+          // path so far from source -> target, update the longest_distance.
+          int candidate_longest_distance = longest_distance[input] + 1;
+          if (candidate_longest_distance > longest_distance[target]) {
+            longest_distance[target] = candidate_longest_distance;
           }
         }
-        node_map_->UpdateInput(consumer->name(), node->name(),
-                               simplified_tensor);
-        nodes_to_simplify.PushBack(consumer);
+      }
+    }
+
+    // If the longest path from source to target of a control dependency is
+    // longer than 1, there exists an alternate path, and we can eliminate the
+    // redundant direct control dependency.
+    for (const auto& control_output : control_outputs[source]) {
+      const int target = control_output.first;
+      if (longest_distance[target] > 1) {
+        const int input_slot = control_output.second;
+        control_edges_to_remove[target].emplace(input_slot, source);
+        VLOG(1) << "Removing edge from:\n"
+                << optimized_graph_->node(source).DebugString() << "\n\nto:\n\n"
+                << optimized_graph_->node(target).DebugString();
       }
     }
   }
-  for (int i = 0; i < optimized_graph_->node_size(); ++i) {
-    NodeDef* node = optimized_graph_->mutable_node(i);
-    PruneControlInputs(node);
+
+  for (const auto& it : control_edges_to_remove) {
+    const int target = it.first;
+    NodeDef* target_node = optimized_graph_->mutable_node(target);
+    for (const InputSlotAndSource& slot_and_source : it.second) {
+      const int input_slot = slot_and_source.first;
+      const int source = slot_and_source.second;
+      const NodeDef& source_node = optimized_graph_->node(source);
+      CHECK_LT(input_slot, target_node->input_size());
+      target_node->mutable_input()->SwapElements(input_slot,
+                                                 target_node->input_size() - 1);
+      node_map_->RemoveOutput(source_node.name(), target_node->name());
+      target_node->mutable_input()->RemoveLast();
+      ++num_controls_removed;
+    }
   }
+  VLOG(1) << "Removed " << num_controls_removed << " out of " << num_controls
+          << " control dependencies";
   return Status::OK();
 }
 
+void DependencyOptimizer::BuildNodeToIdx() {
+  // Set up &node -> index map.
+  node_to_idx_.clear();
+  for (int i = 0; i < optimized_graph_->node_size(); ++i) {
+    const NodeDef& node = optimized_graph_->node(i);
+    node_to_idx_[&node] = i;
+  }
+}
+
 Status DependencyOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
                                      GraphDef* optimized_graph) {
   optimized_graph_ = optimized_graph;
   *optimized_graph_ = item.graph;
   nodes_to_preserve_ = item.NodesToPreserve();
-  node_map_.reset(new NodeMap(optimized_graph));
   fetch_nodes_known_ = !item.fetch.empty();
-  VLOG(1) << "Graph before optimization:\n" << optimized_graph_->DebugString();
-  TF_RETURN_IF_ERROR(OptimizeDependencies());
-  VLOG(1) << "Graph after optimization:\n" << optimized_graph_->DebugString();
+
+  CleanControlInputs();
+  const int num_iterations = 2;
+  for (int iteration = 0; iteration < num_iterations; ++iteration) {
+    Status topo_sort_status;
+    // Perform topological sort to prepare the graph for transitive reduction.
+    topo_sort_status = TopologicalSort(optimized_graph_);
+
+    // Set up index-based graph datastructures to speed up analysis steps below.
+    node_map_.reset(new NodeMap(optimized_graph_));
+    BuildNodeToIdx();
+
+    if (topo_sort_status.ok()) {
+      // Remove redundant control dependencies.
+      TF_RETURN_IF_ERROR(TransitiveReduction());
+    } else {
+      LOG(ERROR) << topo_sort_status.error_message();
+    }
+
+    // Turn nodes with only control outputs into NoOps, prune NoOps.
+    TF_RETURN_IF_ERROR(OptimizeDependencies());
+  }
 
   return Status::OK();
 }
diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.h b/tensorflow/core/grappler/optimizers/dependency_optimizer.h
index a9d33227449ac05e74bbd26c5e51c2deac5644fd..3f6f418bee69cc86d8865bccd266803ade2ef2c1 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.h
@@ -43,25 +43,30 @@ class DependencyOptimizer : public GraphOptimizer {
                 const GraphDef& optimized_graph, double result) override;
 
  private:
-  Status OptimizeDependencies();
-
   // Returns true if it is safe to convert node to NoOp.
   bool SafeToConvertToNoOp(const NodeDef& node);
-
-  // Tries to simplify the expression that roots at `node` and replaces the uses
-  // of `node` to the simplified expression. Returns the name of the simplified
-  // tensor (e.g. "split:1") or an empty string if no simplification is
-  // performed.
-  string TryOptimizeDependencies(NodeDef* node,
-                                 SetVector<NodeDef*>* nodes_to_simplify);
-
-  bool HasOnlyControlOutputs(const NodeDef* node);
+  // Removes all duplicate control dependencies.
+  void CleanControlInputs();
+  // Builds a map from the &optimized_graph_->node(i) to i.
+  void BuildNodeToIdx();
+  // Removes the given set of nodes from the graph.
+  void DeleteNodes(const std::set<int>& nodes_to_delete);
+  // Tries to optimize the node with the given index, possibly additional
+  // optimizations by inserting nodes in nodes_to_simplify, and pruning nodes by
+  // inserting them in nodes_to_delete.
+  void OptimizeNode(int node_idx, SetVector<int>* nodes_to_simplify,
+                    std::set<int>* nodes_to_delete);
+  // Eliminates redundant control dependencies by computing the transitive
+  // reduction of the graph.
+  Status TransitiveReduction();
+  // Main driver of dependency optimizations.
+  Status OptimizeDependencies();
 
   RewriterConfig::Toggle opt_level_;
-
   bool fetch_nodes_known_;
   std::unordered_set<string> nodes_to_preserve_;
   std::unique_ptr<NodeMap> node_map_;
+  std::unordered_map<const NodeDef*, int> node_to_idx_;
   GraphDef* optimized_graph_;  // Not owned.
 };
 
diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc
index 90f5ec8c3fca8fdb8473f9d3c9868a710fa72b2b..f5027a4a99e4f28b4b49df914e9247a008036c20 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/optimizers/constant_folding.h"
 #include "tensorflow/core/grappler/optimizers/model_pruner.h"
 #include "tensorflow/core/grappler/utils.h"
+#include "tensorflow/core/grappler/utils/topological_sort.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/test.h"
 
@@ -59,10 +60,47 @@ TEST_F(DependencyOptimizerTest, NoOp) {
   VerifyGraphsEqual(item.graph, output, __FUNCTION__);
 }
 
-TEST_F(DependencyOptimizerTest, ChangeToNoop) {
+TEST_F(DependencyOptimizerTest, DependenciesDrivenByConstants) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   Output x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2});
   Output y = ops::Const(s.WithOpName("y"), {1.0f, 2.0f}, {1, 2});
+  Output z = ops::Const(s.WithOpName("z"), {1.0f, 2.0f}, {1, 2});
+  Output add = ops::Add(s.WithOpName("add"), x, y);
+  Output id1 =
+      ops::Identity(s.WithOpName("id1").WithControlDependencies(x), add);
+  Output id2 = ops::Identity(
+      s.WithOpName("id2").WithControlDependencies(y).WithControlDependencies(z),
+      add);
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  item.fetch.push_back("id1");
+  item.fetch.push_back("id2");
+
+  DependencyOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+  // Run the optimizer twice to make sure the rewrite is idempotent.
+  item.graph.Swap(&output);
+  status = optimizer.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+
+  // The 'z' node should have been optimized away leaving only 5 nodes.
+  EXPECT_EQ(5, output.node_size());
+
+  for (const NodeDef& node : item.graph.node()) {
+    if (node.name() == "id1" || node.name() == "id2") {
+      EXPECT_EQ(1, node.input_size());
+      EXPECT_EQ("add", node.input(0));
+    }
+  }
+}
+
+TEST_F(DependencyOptimizerTest, ChangeToNoop) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output x = ops::RandomUniform(s.WithOpName("x"), {1, 2}, DT_FLOAT);
+  Output y = ops::RandomUniform(s.WithOpName("y"), {1, 2}, DT_FLOAT);
   Output add = ops::Add(s.WithOpName("add"), x, y);
   Output id1 =
       ops::Identity(s.WithOpName("id1").WithControlDependencies(add), x);
@@ -85,21 +123,73 @@ TEST_F(DependencyOptimizerTest, ChangeToNoop) {
 
   EXPECT_EQ(item.graph.node_size(), output.node_size());
   for (int i = 0; i < item.graph.node_size(); ++i) {
-    const NodeDef& original = item.graph.node(i);
-    const NodeDef& optimized = output.node(i);
-    EXPECT_EQ(original.name(), optimized.name());
-    if (original.name() == "add") {
-      EXPECT_EQ("NoOp", optimized.op());
-    } else {
-      EXPECT_EQ(original.op(), optimized.op());
+    const NodeDef& node = item.graph.node(i);
+    if (node.name() == "add") {
+      EXPECT_EQ("NoOp", node.op());
+      EXPECT_EQ(2, node.input_size());
+      EXPECT_EQ("^x", node.input(0));
+      EXPECT_EQ("^y", node.input(1));
+    } else if (node.name() == "id1") {
+      EXPECT_EQ("Identity", node.op());
+      EXPECT_EQ(2, node.input_size());
+      EXPECT_EQ("x", node.input(0));
+      EXPECT_EQ("^y", node.input(1));
+    } else if (node.name() == "id2") {
+      EXPECT_EQ("Identity", node.op());
+      EXPECT_EQ(2, node.input_size());
+      EXPECT_EQ("y", node.input(0));
+      EXPECT_EQ("^x", node.input(1));
     }
-    EXPECT_EQ(original.input_size(), optimized.input_size());
-    for (int j = 0; j < original.input_size(); ++j) {
-      if (original.name() == "add") {
-        EXPECT_EQ(AsControlDependency(original.input(j)), optimized.input(j));
-      } else {
-        EXPECT_EQ(original.input(j), optimized.input(j));
-      }
+  }
+}
+
+TEST_F(DependencyOptimizerTest, ChangeToNoop_SwitchIdentity) {
+  // This tests that we don't try to repeatedly add Identity nodes
+  // with names like "ConstantFoldingCtrl/foo/bar/switch_$port" when
+  // multiple nodes reading the same output of a Switch node get
+  // optimized (e.g. constant folded or turned into NoOps).
+  tensorflow::Scope scope = tensorflow::Scope::NewRootScope();
+  ops::Variable v_in(scope.WithOpName("v_in"), {3}, DT_FLOAT);
+  ops::Variable v_ctrl(scope.WithOpName("v_ctrl"), {}, DT_BOOL);
+  ops::Switch s(scope.WithOpName("switch"), v_in, v_ctrl);
+  // "neg" should be turned into a NoOp with a control dependency from
+  // the existing Identity node "ConstantFoldingCtrl/switch_1" and
+  // subsequently eliminated completely from the graph.
+  Output neg = ops::Neg(scope.WithOpName("neg"), s.output_true);
+  // c1 could be a result of constant folding some node fed by neg.
+  Output c1 = ops::Const(scope.WithOpName("c1").WithControlDependencies(neg),
+                         {1.0f, 2.0f}, {1, 2});
+  Output ctrl_dep_id = ops::Identity(
+      scope.WithOpName("ConstantFoldingCtrl/switch_1"), s.output_true);
+  // c2 could be a result of constant folding a node fed by s, which also
+  // added the ctrl_dep_id node.
+  Output c2 =
+      ops::Const(scope.WithOpName("c2").WithControlDependencies(ctrl_dep_id),
+                 {1.0f, 2.0f}, {1, 2});
+  Output neg1 = ops::Neg(scope.WithOpName("neg1"), s.output_false);
+
+  GrapplerItem item;
+  TF_CHECK_OK(scope.ToGraphDef(&item.graph));
+  item.fetch.push_back("c1");
+  item.fetch.push_back("c2");
+  item.fetch.push_back("neg1");
+
+  DependencyOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+
+  EXPECT_EQ(item.graph.node_size() - 1, output.node_size());
+  for (int i = 0; i < output.node_size(); ++i) {
+    const NodeDef& node = output.node(i);
+    // "neg" should be eliminated.
+    EXPECT_NE("neg", node.name());
+    // A control dep from "^ConstantFoldingCtrl/switch_1"
+    // should be attached to "c1".
+    if (node.name() == "c1") {
+      EXPECT_EQ("Const", node.op());
+      EXPECT_EQ(1, node.input_size());
+      EXPECT_EQ("^ConstantFoldingCtrl/switch_1", node.input(0));
     }
   }
 }
@@ -107,8 +197,8 @@ TEST_F(DependencyOptimizerTest, ChangeToNoop) {
 // TODO(rmlarsen): Add test to make sure we skip Switch and Merge.
 TEST_F(DependencyOptimizerTest, ChangeToNoop_NoFetch) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  Output x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2});
-  Output y = ops::Const(s.WithOpName("y"), {1.0f, 2.0f}, {1, 2});
+  Output x = ops::RandomUniform(s.WithOpName("x"), {1, 2}, DT_FLOAT);
+  Output y = ops::RandomUniform(s.WithOpName("y"), {1, 2}, DT_FLOAT);
   Output add = ops::Add(s.WithOpName("add"), x, y);
   Output id1 =
       ops::Identity(s.WithOpName("id1").WithControlDependencies(add), x);
@@ -123,12 +213,13 @@ TEST_F(DependencyOptimizerTest, ChangeToNoop_NoFetch) {
   Status status = optimizer.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
 
+  TF_CHECK_OK(TopologicalSort(&item.graph));
   VerifyGraphsEqual(item.graph, output, __FUNCTION__);
 }
 
 TEST_F(DependencyOptimizerTest, RemoveNoOps_EmptyInputOrOutput) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  Output x = ops::Const(s, {1.0f, 2.0f}, {1, 2});
+  Output x = ops::RandomUniform(s, {1, 2}, DT_FLOAT);
   auto noop1 = ops::NoOp(s);
   auto noop2 = ops::NoOp(s.WithControlDependencies(x));
   Output id = ops::Identity(s.WithControlDependencies({noop1.operation}), x);
@@ -152,15 +243,50 @@ TEST_F(DependencyOptimizerTest, RemoveNoOps_EmptyInputOrOutput) {
       EXPECT_EQ(0, node.input_size());
     } else if (node.name() == "Identity") {
       EXPECT_EQ(1, node.input_size());
-      EXPECT_EQ("Const", node.input(0));
+      EXPECT_EQ("RandomUniform", node.input(0));
     }
   }
 }
 
+TEST_F(DependencyOptimizerTest, RemoveNoOps_DeviceBoundaries) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output x = ops::RandomUniform(s.WithOpName("x").WithDevice("/CPU:0"), {1, 2},
+                                DT_FLOAT);
+  Output y = ops::RandomUniform(s.WithOpName("y").WithDevice("/CPU:0"), {1, 2},
+                                DT_FLOAT);
+  // NoOp with a single input- and two output dependencies.
+  auto noop = ops::NoOp(s.WithControlDependencies(x).WithDevice("/CPU:1"));
+  // NoOp with a two input- and a single output dependency.
+  auto noop_1 = ops::NoOp(
+      s.WithControlDependencies(x).WithControlDependencies(y).WithDevice(
+          "/CPU:0"));
+  Output id = ops::Identity(
+      s.WithControlDependencies({noop.operation}).WithDevice("/CPU:1"), x);
+  Output id_1 = ops::Identity(
+      s.WithControlDependencies({noop.operation, noop_1.operation})
+          .WithDevice("/CPU:1"),
+      y);
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  item.fetch.push_back("Identity");
+  item.fetch.push_back("Identity_1");
+
+  DependencyOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+
+  // The optimization should be disabled to prevent increasing the number of
+  // nodes crossing device boundaries.
+  TF_CHECK_OK(TopologicalSort(&item.graph));
+  VerifyGraphsEqual(item.graph, output, __FUNCTION__);
+}
+
 TEST_F(DependencyOptimizerTest, RemoveNoOps_SingleInputOrOutput) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  Output x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2});
-  Output y = ops::Const(s.WithOpName("y"), {1.0f, 2.0f}, {1, 2});
+  Output x = ops::RandomUniform(s.WithOpName("x"), {1, 2}, DT_FLOAT);
+  Output y = ops::RandomUniform(s.WithOpName("y"), {1, 2}, DT_FLOAT);
   // NoOp with a single input- and two output dependencies.
   auto noop = ops::NoOp(s.WithControlDependencies(x));
   // NoOp with a two input- and a single output dependency.
@@ -197,6 +323,72 @@ TEST_F(DependencyOptimizerTest, RemoveNoOps_SingleInputOrOutput) {
   }
 }
 
+TEST_F(DependencyOptimizerTest, Transitive_Reduction_Simple) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output c = ops::Const(s.WithOpName("c"), {1.0f, 2.0f}, {1, 2});
+  Output x = ops::Square(s.WithOpName("x"), c);
+  Output id1 = ops::Identity(s.WithOpName("id1"), x);
+  Output id2 =
+      ops::Identity(s.WithOpName("id2").WithControlDependencies({x}), id1);
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  item.fetch.push_back("id2");
+  DependencyOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+  EXPECT_EQ(4, output.node_size());
+  EXPECT_EQ("id2", output.node(3).name());
+  EXPECT_EQ(1, output.node(3).input_size());
+  EXPECT_EQ("id1", output.node(3).input(0));
+}
+
+TEST_F(DependencyOptimizerTest, ChangeToNoop_Identity) {
+  tensorflow::Scope scope = tensorflow::Scope::NewRootScope();
+  ops::Variable v_in(scope.WithOpName("v_in"), {3}, DT_FLOAT);
+  Output id_after_var = ops::Identity(scope.WithOpName("id_after_var"), v_in);
+  ops::Variable v_ctrl(scope.WithOpName("v_ctrl"), {}, DT_BOOL);
+  ops::Switch s(
+      scope.WithOpName("switch").WithControlDependencies(id_after_var), v_in,
+      v_ctrl);
+  Output id0 = ops::Identity(scope.WithOpName("id0"), s.output_true);
+  Output grappler_added_id = ops::Identity(
+      scope.WithOpName("ConstantFoldingCtrl/switch_1"), s.output_true);
+  Output c1 = ops::Const(scope.WithOpName("c1")
+                             .WithControlDependencies(id0)
+                             .WithControlDependencies(id_after_var)
+                             .WithControlDependencies(grappler_added_id),
+                         {1.0f, 2.0f}, {1, 2});
+  Output id1 = ops::Identity(scope.WithOpName("id1"), c1);
+  Output fetch =
+      ops::Identity(scope.WithOpName("fetch").WithControlDependencies(id1), c1);
+
+  GrapplerItem item;
+  TF_CHECK_OK(scope.ToGraphDef(&item.graph));
+  item.fetch.push_back("c1");
+  item.fetch.push_back("fetch");
+
+  DependencyOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+
+  EXPECT_EQ(item.graph.node_size() - 2, output.node_size());
+  for (int i = 0; i < output.node_size(); ++i) {
+    const NodeDef& node = output.node(i);
+    // "id0" and "id1" but neither "ConstantFoldingCtrl/switch_1" nor
+    // "id_after_var" should be eliminated.
+    EXPECT_NE("id0", node.name());
+    EXPECT_NE("id1", node.name());
+    if (node.name() == "c1") {
+      EXPECT_EQ("Const", node.op());
+      EXPECT_EQ(1, node.input_size());
+      EXPECT_EQ("^ConstantFoldingCtrl/switch_1", node.input(0));
+    }
+  }
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/graph_optimizer.h b/tensorflow/core/grappler/optimizers/graph_optimizer.h
index 55a90dce88f91bf88e6c6ad4ff5f9d2804d539f9..42d9837312d25f3504c85f12883c4ac818157cdd 100644
--- a/tensorflow/core/grappler/optimizers/graph_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/graph_optimizer.h
@@ -41,7 +41,7 @@ class GraphOptimizer {
                           GraphDef* optimized_graph) = 0;
 
   // Method invoked by the framework so that it can provide feedback
-  // on how well the "optimize_output" (produced as *output from a
+  // on how well the "optimized_graph" (produced as *optimized_graph from a
   // call to Optimize) performed.  Lower "result" scores are better.
   virtual void Feedback(Cluster* cluster, const GrapplerItem& item,
                         const GraphDef& optimized_graph, double result) = 0;
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index d5563e9d4c62967e7bde2e85d419f3f6725e2b35..ea7b05d3810f7a4b9f6388e040df930526f6e47e 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <deque>
 #include <unordered_set>
 
 #include "tensorflow/core/framework/attr_value.pb.h"
@@ -35,17 +36,17 @@ namespace tensorflow {
 namespace grappler {
 namespace {
 
-const char kConcatConst[] = "LayoutOptimizerConcatConst";
-const char kSplitConst[] = "LayoutOptimizerSplitConst";
-const char kPermNHWCToNCHW[] = "LayoutOptimizerPermConstNHWCToNCHW";
-const char kPermNCHWToNHWC[] = "LayoutOptimizerPermConstNCHWToNHWC";
-const char kGatherAxisConst[] = "LayoutOptimizerGatherAxisConst";
-const char kTransposeNHWCToNCHW[] = "LayoutOptimizerTransposeNHWCToNCHW";
-const char kTransposeNCHWToNHWC[] = "LayoutOptimizerTransposeNCHWToNHWC";
-const char kPermVecNHWCToNCHW[] = "LayoutOptimizerPermVecNHWCToNCHW";
-const char kReshapeNHWCToNCHW[] = "LayoutOptimizerReshapeNHWCToNCHW";
-const char kReshapeConst[] = "LayoutOptimizerReshapeConst";
-const char kReductionConst[] = "LayoutOptimizerReductionConst";
+const char kSuffix[] = "LayoutOptimizer";
+const char kPermNHWCToNCHW[] = "PermConstNHWCToNCHW";
+const char kPermNCHWToNHWC[] = "PermConstNCHWToNHWC";
+const char kTransposeNHWCToNCHW[] = "TransposeNHWCToNCHW";
+const char kTransposeNCHWToNHWC[] = "TransposeNCHWToNHWC";
+const char kDimMapNHWCToNCHW[] = "DimMapNHWCToNCHW";
+const char kDimMapNCHWToNHWC[] = "DimMapNCHWToNHWC";
+const char kVecPermuteNHWCToNCHW[] = "VecPermuteNHWCToNCHW";
+const char kVecPermuteNCHWToNHWC[] = "VecPermuteNCHWToNHWC";
+const char kReshapeNHWCToNCHW[] = "ReshapeNHWCToNCHW";
+const char kReshapeConst[] = "ReshapeConst";
 
 std::set<string> GetOpsFormatSupported() {
   std::set<string> ops_format_supported = {
@@ -60,54 +61,193 @@ std::set<string> GetOpsFormatSupported() {
       "DepthwiseConv2dNativeBackpropInput",
       "DepthwiseConv2dNativeBackpropFilter",
       "FusedBatchNorm",
+      "FusedBatchNormV2",
       "FusedBatchNormGrad",
+      "FusedBatchNormGradV2",
       "FusedConv2DBiasActivation",
       "MaxPool",
+      "MaxPoolV2",
       "MaxPoolGrad",
+      "MaxPoolGradGrad",
+      "MaxPoolGradV2",
+      "MaxPoolGradGradV2",
       "SpaceToDepth",
       "DepthToSpace"};
   return ops_format_supported;
 }
 
 std::set<string> GetOpsFormatAgnostic() {
-  std::set<string> ops_format_agnostic = {"Add",
+  std::set<string> ops_format_agnostic = {"Abs",
+                                          "Add",
                                           "AddN",
+                                          "AddV2",
+                                          "Acos",
+                                          "Acosh",
+                                          "All",
+                                          "Angle",
+                                          "Any",
+                                          "ApproximateEqual",
+                                          "Asin",
+                                          "Asinh",
+                                          "Atan",
+                                          "Atan2",
+                                          "Atanh",
+                                          "Betainc",
+                                          "Bitcast",
+                                          "Cast",
+                                          "Ceil",
+                                          "CheckNumerics",
+                                          "Complex",
+                                          "ComplexAbs",
                                           "Concat",
                                           "ConcatV2",
+                                          "Conj",
+                                          "Cos",
+                                          "Cosh",
+                                          "Digamma",
+                                          "Div",
+                                          "Elu",
+                                          "EluGrad",
+                                          "Enter",
+                                          "Equal",
+                                          "Erf",
+                                          "Erfc",
+                                          "Exit",
+                                          "Exp",
+                                          "Expm1",
+                                          "Fill",
                                           "Floor",
+                                          "FloorDiv",
+                                          "FloorMod",
+                                          "Greater",
+                                          "GreaterEqual",
+                                          "GuaranteeConst",
+                                          "HistogramSummary",
                                           "Identity",
+                                          "IdentityN",
+                                          "Igamma",
+                                          "Igammac",
+                                          "Imag",
+                                          "Inv",
+                                          "InvGrad",
+                                          "IsFinite",
+                                          "IsInf",
+                                          "IsNan",
+                                          "Less",
+                                          "LessEqual",
+                                          "Lgamma",
+                                          "Log",
+                                          "LogicalAnd",
+                                          "LogicalNot",
+                                          "LogicalOr",
+                                          "Log1p",
+                                          "Max",
+                                          "Maximum",
+                                          "Mean",
+                                          "Merge",
+                                          "Min",
+                                          "Minimum",
+                                          "Mod",
                                           "Mul",
                                           "Neg",
+                                          "NextIteration",
+                                          "NotEqual",
+                                          "OnesLike",
                                           "Pad",
+                                          "PreventGradient",
+                                          "Prod",
+                                          "Polygamma",
+                                          "Pow",
+                                          "Real",
                                           "RealDiv",
+                                          "Reciprocal",
+                                          "ReciprocalGrad",
                                           "Relu",
                                           "Relu6",
+                                          "Relu6Grad",
                                           "ReluGrad",
+                                          "Rint",
+                                          "Select",
+                                          "Selu",
+                                          "SeluGrad",
+                                          "Shape",
+                                          "ShapeN",
                                           "Sigmoid",
+                                          "SigmoidGrad",
+                                          "Sign",
+                                          "Sin",
+                                          "Sinh",
                                           "Slice",
+                                          "Snapshot",
+                                          "Softplus",
+                                          "SoftplusGrad",
                                           "Split",
+                                          "SplitV",
+                                          "StridedSlice",
+                                          "StridedSliceGrad",
+                                          "Switch",
+                                          "Tile",
+                                          "TruncateDiv",
+                                          "TruncateMod",
+                                          "ReverseV2",
+                                          "Round",
+                                          "Rsqrt",
+                                          "RsqrtGrad",
+                                          "Sqrt",
+                                          "SqrtGrad",
+                                          "Square",
                                           "SquaredDifference",
                                           "Squeeze",
-                                          "Sub"};
+                                          "StopGradient",
+                                          "Sub",
+                                          "Sum",
+                                          "Tan",
+                                          "Tanh",
+                                          "TanhGrad",
+                                          "ZerosLike",
+                                          "Zeta"};
   return ops_format_agnostic;
 }
 
-bool IsNodeNHWCToNCHW(const string& node_name) {
-  const string transpose_node_prefix = kTransposeNHWCToNCHW;
-  string prefix = node_name.substr(0, transpose_node_prefix.length());
-  if (prefix.compare(transpose_node_prefix) == 0) {
+bool EndWith(const string& str, const string& ending) {
+  if (str.size() < ending.size()) return false;
+  if (str.substr(str.size() - ending.size(), ending.size()) == ending)
     return true;
-  }
   return false;
 }
 
-bool IsNodeNCHWToNHWC(const string& node_name) {
-  const string transpose_node_prefix = kTransposeNCHWToNHWC;
-  string prefix = node_name.substr(0, transpose_node_prefix.length());
-  if (prefix.compare(transpose_node_prefix) == 0) {
-    return true;
-  }
-  return false;
+bool IsNodeByLayoutOptimizer(const string& node_name) {
+  const string suffix = kSuffix;
+  return EndWith(node_name, suffix);
+}
+
+bool IsNodeType(const string& node_name, const string& type) {
+  const string suffix = strings::StrCat(type, "-", kSuffix);
+  return EndWith(node_name, suffix);
+}
+
+bool IsTransposeNHWCToNCHW(const string& node_name) {
+  return IsNodeType(node_name, kTransposeNHWCToNCHW);
+}
+
+bool IsTransposeNCHWToNHWC(const string& node_name) {
+  return IsNodeType(node_name, kTransposeNCHWToNHWC);
+}
+
+bool IsDimMapNHWCToNCHW(const string& node_name) {
+  return IsNodeType(node_name, kDimMapNHWCToNCHW);
+}
+
+bool IsDimMapNCHWToNHWC(const string& node_name) {
+  return IsNodeType(node_name, kDimMapNCHWToNHWC);
+}
+
+bool IsVecPermuteNHWCToNCHW(const string& node_name) {
+  return IsNodeType(node_name, kVecPermuteNHWCToNCHW);
+}
+
+bool IsVecPermuteNCHWToNHWC(const string& node_name) {
+  return IsNodeType(node_name, kVecPermuteNCHWToNHWC);
 }
 
 bool IsConcat(const NodeDef& node) {
@@ -120,17 +260,121 @@ bool IsConcatV1(const NodeDef& node) {
   return op == "Concat";
 }
 
+bool IsMaxPoolV2(const NodeDef& node) {
+  const auto& op = node.op();
+  return op == "MaxPoolV2";
+}
+
 bool IsMaxPoolGradV1(const NodeDef& node) {
   const auto& op = node.op();
   return op == "MaxPoolGrad";
 }
 
+bool IsMaxPoolGradV2(const NodeDef& node) {
+  const auto& op = node.op();
+  return op == "MaxPoolGradV2";
+}
+
+bool IsMaxPoolGradGradV1(const NodeDef& node) {
+  const auto& op = node.op();
+  return op == "MaxPoolGradGrad";
+}
+
+bool IsMaxPoolGradGradV2(const NodeDef& node) {
+  const auto& op = node.op();
+  return op == "MaxPoolGradGradV2";
+}
+
+bool IsUnaryGrad(const NodeDef& node) {
+  bool is_unary_grad =
+      IsEluGrad(node) || IsInvGrad(node) || IsReciprocalGrad(node) ||
+      IsRelu6Grad(node) || IsReluGrad(node) || IsRsqrtGrad(node) ||
+      IsSeluGrad(node) || IsSigmoidGrad(node) || IsSoftplusGrad(node) ||
+      IsSoftsignGrad(node) || IsSqrtGrad(node) || IsTanhGrad(node);
+  return is_unary_grad;
+}
+
+bool IsComparisonOp(const NodeDef& node) {
+  bool is_compare = IsApproximateEqual(node) || IsEqual(node) ||
+                    IsGreater(node) || IsGreaterEqual(node) || IsLess(node) ||
+                    IsLessEqual(node) || IsNotEqual(node);
+  return is_compare;
+}
+
+bool IsLogicalOp(const NodeDef& node) {
+  return IsLogicalAnd(node) || IsLogicalNot(node) || IsLogicalOr(node);
+}
+
+bool IsReduceOp(const NodeDef& node) {
+  return IsSum(node) || IsMean(node) || IsProd(node) || IsMax(node) ||
+         IsMin(node) || IsAll(node) || IsAny(node);
+}
+
+bool IsBinaryOp(const NodeDef& node) {
+  bool is_binary =
+      IsAdd(node) || IsAtan2(node) || IsComparisonOp(node) || IsComplex(node) ||
+      IsDiv(node) || IsFloorDiv(node) || IsIgamma(node) || IsIgammac(node) ||
+      IsLogicalAnd(node) || IsLogicalOr(node) || IsMaximum(node) ||
+      IsMinimum(node) || IsMod(node) || IsMul(node) || IsPolygamma(node) ||
+      IsPow(node) || IsRealDiv(node) || IsSquaredDifference(node) ||
+      IsSub(node) || IsTruncateDiv(node) || IsTruncateMod(node) || IsZeta(node);
+  return is_binary;
+}
+
+std::vector<int> NonControlInputs(const NodeDef& node) {
+  std::vector<int> pos;
+  for (int i = 0; i < node.input_size(); i++) {
+    if (!IsControlInput(node.input(i))) {
+      pos.push_back(i);
+    }
+  }
+  return pos;
+}
+
+std::vector<int> DataInputPosConcat(const NodeDef& node) {
+  int n = node.attr().at("N").i();
+  std::vector<int> input_pos;
+  int start = (IsConcatV1(node)) ? 1 : 0;
+  int end = start + n;
+  for (int i = start; i < end; i++) {
+    input_pos.push_back(i);
+  }
+  return input_pos;
+}
+
+std::vector<int> DataInputPos(const NodeDef& node) {
+  if (IsSplit(node) || IsHistogramSummary(node)) {
+    return {1};
+  }
+  if (IsStridedSliceGrad(node)) {
+    return {4};
+  }
+  if (IsBinaryOp(node) || IsUnaryGrad(node)) {
+    return {0, 1};
+  }
+  if (IsBetainc(node) || IsSelect(node)) {
+    return {0, 1, 2};
+  }
+  if (IsShapeN(node) || IsIdentityN(node) || IsAddN(node)) {
+    return NonControlInputs(node);
+  }
+  if (IsConcat(node)) {
+    return DataInputPosConcat(node);
+  }
+  if (node.input_size() > 0 && !IsControlInput(node.input(0))) {
+    return {0};
+  }
+  return {};
+}
+
 class GraphProcessor {
  public:
-  GraphProcessor(const VirtualPlacer& virtual_placer,
+  GraphProcessor(const GraphProperties& graph_properties,
+                 const VirtualPlacer& virtual_placer,
                  const std::unordered_set<string>& nodes_to_preserve,
                  GraphDef* graph, NodeMap* node_map)
-      : virtual_placer_(virtual_placer),
+      : graph_properties_(graph_properties),
+        virtual_placer_(virtual_placer),
         nodes_to_preserve_(nodes_to_preserve),
         graph_(graph),
         node_map_(node_map) {}
@@ -186,33 +430,11 @@ class GraphProcessor {
     return node;
   }
 
-  NodeDef* AddNodeReductionConst(const string& name, const string& device) {
-    NodeDef* node = graph_->add_node();
-    node_map_->AddNode(name, node);
-    node->set_name(name);
-    node->set_op("Const");
-    AttrValue attr_data_type;
-    attr_data_type.set_type(DT_INT32);
-    node->mutable_attr()->insert({"dtype", attr_data_type});
-
-    AttrValue attr_tensor;
-    Tensor tensor(DT_INT32, TensorShape({3}));
-    std::vector<int> axis = {0, 2, 3};
-    for (int i = 0; static_cast<size_t>(i) < axis.size(); i++) {
-      tensor.flat<int>()(i) = axis[i];
-    }
-    tensor.AsProtoTensorContent(attr_tensor.mutable_tensor());
-    node->mutable_attr()->insert({"value", attr_tensor});
-    string device_name;
-    if (device.empty()) {
-      device_name = virtual_placer_.get_canonical_device_name(*node);
-    } else {
-      device_name = device;
-    }
-    node->set_device(device_name);
-    return node;
+  string LayoutOptimizerNode(const string& base_name) {
+    return strings::StrCat(base_name, "-", kSuffix);
   }
 
+  const GraphProperties& graph_properties_;
   const VirtualPlacer& virtual_placer_;
   const std::unordered_set<string>& nodes_to_preserve_;
   GraphDef* graph_;
@@ -221,18 +443,21 @@ class GraphProcessor {
 
 struct OptimizeContext {
   OptimizeContext(GraphDef* graph, NodeDef* node, NodeMap* node_map,
+                  const GraphProperties& graph_properties,
                   const VirtualPlacer& virtual_placer,
                   const std::unordered_set<string>& nodes_to_preserve,
                   bool is_in_frame)
       : graph(graph),
         node(node),
         node_map(node_map),
+        graph_properties(graph_properties),
         virtual_placer(virtual_placer),
         nodes_to_preserve(nodes_to_preserve),
         is_in_frame(is_in_frame) {}
   GraphDef* graph;
   NodeDef* node;
   NodeMap* node_map;
+  const GraphProperties& graph_properties;
   const VirtualPlacer& virtual_placer;
   const std::unordered_set<string>& nodes_to_preserve;
   bool is_in_frame;
@@ -241,8 +466,9 @@ struct OptimizeContext {
 class NodeProcessor : public GraphProcessor {
  public:
   explicit NodeProcessor(const OptimizeContext& opt_cxt)
-      : GraphProcessor(opt_cxt.virtual_placer, opt_cxt.nodes_to_preserve,
-                       opt_cxt.graph, opt_cxt.node_map),
+      : GraphProcessor(opt_cxt.graph_properties, opt_cxt.virtual_placer,
+                       opt_cxt.nodes_to_preserve, opt_cxt.graph,
+                       opt_cxt.node_map),
         node_(opt_cxt.node),
         is_in_frame_(opt_cxt.is_in_frame) {}
   virtual ~NodeProcessor() {}
@@ -260,17 +486,34 @@ class NodeProcessor : public GraphProcessor {
   }
 
  protected:
-  bool IsDimsN(const NodeDef& node, int n) const {
+  bool IsPortDimsN(const NodeDef& node, int port, int n) const {
     if (node.attr().find("_output_shapes") != node.attr().end()) {
-      auto shape = node.attr().at("_output_shapes").list().shape(0);
-      if (shape.dim_size() == n) {
-        return true;
+      if (node.attr().at("_output_shapes").list().shape_size() > port) {
+        auto shape = node.attr().at("_output_shapes").list().shape(port);
+        if (shape.unknown_rank()) {
+          return false;
+        }
+        if (shape.dim_size() == n) {
+          return true;
+        }
       }
     }
     return false;
   }
 
-  bool IsDimsFour(const NodeDef& node) const { return IsDimsN(node, 4); }
+  bool IsPortZeroDimsN(const NodeDef& node, int n) const {
+    return IsPortDimsN(node, 0, n);
+  }
+
+  bool IsPortZeroDimsFour(const NodeDef& node) const {
+    return NodeProcessor::IsPortZeroDimsN(node, 4) ||
+           IsTransposeNCHWToNHWC(node.name());
+  }
+
+  bool IsPortDimsFour(const NodeDef& node, int port) const {
+    return NodeProcessor::IsPortDimsN(node, port, 4) ||
+           IsTransposeNCHWToNHWC(node.name());
+  }
 
   bool IsNHWC() const {
     if (node_->attr().find("data_format") != node_->attr().end()) {
@@ -298,12 +541,7 @@ class NodeProcessor : public GraphProcessor {
     return nodes_to_preserve_.find(node_->name()) != nodes_to_preserve_.end();
   }
 
-  virtual bool ShouldProcess() const {
-    return !MustPreserve() && IsNHWC() && IsDimsFour(*node_) && HasOutputs() &&
-           IsOnGPU();
-  }
-
-  virtual bool IsOnGPU() const {
+  bool IsOnGPU() const {
     string device_name;
     if (node_->device().empty()) {
       device_name = virtual_placer_.get_canonical_device_name(*node_);
@@ -320,33 +558,154 @@ class NodeProcessor : public GraphProcessor {
     return false;
   }
 
-  void UpdateAttrDataFormat() {
-    if (node_->attr().find("data_format") != node_->attr().end()) {
-      if (node_->attr().at("data_format").s().compare("NHWC") == 0) {
-        string* data_format =
-            node_->mutable_attr()->at("data_format").mutable_s();
-        *data_format = "NCHW";
-      }
-    }
+  virtual bool ShouldProcess() const {
+    return !MustPreserve() && IsNHWC() && IsPortZeroDimsFour(*node_) &&
+           HasOutputs() && IsOnGPU();
   }
 
   virtual void UpdateAttrShape() {
     if (node_->attr().find("_output_shapes") != node_->attr().end()) {
-      auto shape = node_->mutable_attr()
-                       ->at("_output_shapes")
-                       .mutable_list()
-                       ->mutable_shape(0);
-      if (shape->dim_size() == 4) {
-        int64 h = shape->dim(1).size();
-        int64 w = shape->dim(2).size();
-        int64 c = shape->dim(3).size();
-        shape->mutable_dim(1)->set_size(c);
-        shape->mutable_dim(2)->set_size(h);
-        shape->mutable_dim(3)->set_size(w);
+      for (const auto& pos : GetOutputPos()) {
+        auto shape = node_->mutable_attr()
+                         ->at("_output_shapes")
+                         .mutable_list()
+                         ->mutable_shape(pos);
+        if (shape->dim_size() == 4) {
+          int64 h = shape->dim(1).size();
+          int64 w = shape->dim(2).size();
+          int64 c = shape->dim(3).size();
+          shape->mutable_dim(1)->set_size(c);
+          shape->mutable_dim(2)->set_size(h);
+          shape->mutable_dim(3)->set_size(w);
+        }
+      }
+    }
+  }
+
+  Status UpdateAttrValueOfInput(int input_index, bool permute) {
+    auto input_node = node_map_->GetNode(node_->input(input_index));
+    // We created a copy of the node, so that we don't modify the original node,
+    // which might be used elsewhere. Note that this copy also copies the
+    // control dependency input in the case this node is inside a loop,
+    // to ensure added_node is in the same frame with node_.
+    NodeDef* added_node = graph_->add_node();
+    *added_node = *input_node;
+    string base_name = strings::StrCat(node_->name(), "-", input_node->name());
+    string node_name = LayoutOptimizerNode(base_name);
+    added_node->set_name(node_name);
+    *node_->mutable_input(input_index) = node_name;
+    node_map_->AddNode(node_name, added_node);
+    node_map_->AddOutput(node_name, node_->name());
+    return UpdateAttrValue(added_node, permute);
+  }
+
+  virtual std::vector<int> GetInputPos() const { return {0}; }
+
+  virtual std::set<int> GetOutputPos() const {
+    // For most nodes, no need to process control nodes or nodes that use an
+    // output other than the first output: only the first output is of
+    // 4D NCHW/NHWC format and thus relevant here.
+    std::set<int> output_pos = {0};
+    return output_pos;
+  }
+
+  virtual Status AddLayoutTransposeToInputs() {
+    std::vector<int> input_pos = GetInputPos();
+    for (const auto& pos : input_pos) {
+      string node_name = LayoutOptimizerNode(
+          strings::StrCat(node_->name(), "-", pos, "-", kTransposeNHWCToNCHW));
+      DataType dtype =
+          graph_properties_.GetInputProperties(node_->name())[pos].dtype();
+      auto input_node = node_map_->GetNode(node_->input(pos));
+      TF_RETURN_IF_ERROR(HasAttribute(*input_node, "_output_shapes"));
+      string const_name = GetOrAddNodePermNHWCToNCHW(pos);
+      int output_pos;
+      ParseNodeName(node_->input(pos), &output_pos);
+      AddNodeTranspose(
+          node_name, node_->input(pos), const_name, dtype,
+          input_node->attr().at("_output_shapes").list().shape(output_pos),
+          true);
+      node_map_->UpdateOutput(NodeName(node_->input(pos)), node_->name(),
+                              node_name);
+      node_map_->AddOutput(node_name, node_->name());
+      *node_->mutable_input(pos) = node_name;
+    }
+    return Status::OK();
+  }
+
+  Status AddTransformToOutputs(const string& op) {
+    auto outputs = node_map_->GetOutputs(node_->name());
+    string const_name = GetOrAddNodePermNCHWToNHWC();
+    int output_count = 0;
+    for (const auto& output : outputs) {
+      int connections = 0;
+      int connections_removed = 0;
+      for (int i = 0; i < output->input_size(); i++) {
+        auto& input = *output->mutable_input(i);
+        int input_port;
+        string input_name = ParseNodeName(input, &input_port);
+        auto output_pos = GetOutputPos();
+        if (input_name == node_->name()) {
+          connections++;
+          if (output_pos.find(input_port) != output_pos.end()) {
+            connections_removed++;
+            string added_node_base_name =
+                strings::StrCat(node_->name(), "-", output_count, "-", i);
+            string added_node_name;
+            DataType dtype =
+                graph_properties_.GetOutputProperties(node_->name())[input_port]
+                    .dtype();
+            if (op == "Transpose") {
+              added_node_name = LayoutOptimizerNode(strings::StrCat(
+                  added_node_base_name, "-", kTransposeNCHWToNHWC));
+              TF_RETURN_IF_ERROR(HasAttribute(*node_, "_output_shapes"));
+              AddNodeTranspose(
+                  added_node_name, input, const_name, dtype,
+                  node_->attr().at("_output_shapes").list().shape(input_port),
+                  false);
+            } else if (op == "DataFormatVecPermute") {
+              added_node_name = LayoutOptimizerNode(strings::StrCat(
+                  added_node_base_name, "-", kVecPermuteNCHWToNHWC));
+              AddNodeDataFormatOp(added_node_name, input, op, dtype, false);
+            } else {
+              return errors::InvalidArgument("Unsupported op type: ", op);
+            }
+            input = added_node_name;
+            node_map_->AddOutput(node_->name(), added_node_name);
+            node_map_->AddOutput(added_node_name, output->name());
+          }
+        }
       }
+      if (connections == connections_removed) {
+        node_map_->RemoveOutput(node_->name(), output->name());
+      }
+      output_count++;
+    }
+    return Status::OK();
+  }
+
+  virtual Status AddLayoutTransposeToOutputs() {
+    return AddTransformToOutputs("Transpose");
+  }
+
+  virtual Status CustomizedProcessing() { return Status::OK(); }
+
+  Status UpdateOrTransformParamInput(int param_index, const string& op,
+                                     DataType dtype) {
+    auto param_node = node_map_->GetNode(node_->input(param_index));
+    bool permute = (op == "DataFormatVecPermute") ? true : false;
+    if (IsConstant(*param_node)) {
+      TF_RETURN_IF_ERROR(UpdateAttrValueOfInput(param_index, permute));
+    } else {
+      AddDataFormatTranformToParamInput(op, param_index, dtype);
     }
+    return Status::OK();
   }
 
+  NodeDef* node_;
+  bool is_in_frame_;
+
+ private:
   void UpdateAttrKSize() {
     if (node_->attr().find("ksize") != node_->attr().end()) {
       auto list = node_->mutable_attr()->at("ksize").mutable_list();
@@ -361,7 +720,17 @@ class NodeProcessor : public GraphProcessor {
     }
   }
 
-  Status UpdateAttrValue(NodeDef* node) {
+  void UpdateAttrDataFormat() {
+    if (node_->attr().find("data_format") != node_->attr().end()) {
+      if (node_->attr().at("data_format").s().compare("NHWC") == 0) {
+        string* data_format =
+            node_->mutable_attr()->at("data_format").mutable_s();
+        *data_format = "NCHW";
+      }
+    }
+  }
+
+  Status UpdateAttrValue(NodeDef* node, bool permute) {
     TF_RETURN_IF_ERROR(HasAttribute(*node, "value"));
     Tensor tensor;
     auto success =
@@ -369,46 +738,51 @@ class NodeProcessor : public GraphProcessor {
     if (!success) {
       LOG(ERROR) << "Failed to parse TensorProto.";
     }
-    if (tensor.dims() == 1) {
-      int c = tensor.flat<int>()(3);
-      tensor.flat<int>()(3) = tensor.flat<int>()(2);
-      tensor.flat<int>()(2) = tensor.flat<int>()(1);
-      tensor.flat<int>()(1) = c;
-    } else if (tensor.dims() == 2) {
-      for (int i = 0; i < 2; i++) {
-        int c = tensor.matrix<int>()(3, i);
-        tensor.matrix<int>()(3, i) = tensor.matrix<int>()(2, i);
-        tensor.matrix<int>()(2, i) = tensor.matrix<int>()(1, i);
-        tensor.matrix<int>()(1, i) = c;
+
+    if (permute) {
+      if (tensor.dims() == 1) {
+        if (tensor.flat<int>().size() == 4) {
+          int c = tensor.flat<int>()(3);
+          tensor.flat<int>()(3) = tensor.flat<int>()(2);
+          tensor.flat<int>()(2) = tensor.flat<int>()(1);
+          tensor.flat<int>()(1) = c;
+        } else {
+          return Status(error::INVALID_ARGUMENT,
+                        strings::StrCat("Unsupported tensor size: ",
+                                        tensor.flat<int>().size()));
+        }
+      } else if (tensor.dims() == 2) {
+        for (int i = 0; i < 2; i++) {
+          int c = tensor.matrix<int>()(3, i);
+          tensor.matrix<int>()(3, i) = tensor.matrix<int>()(2, i);
+          tensor.matrix<int>()(2, i) = tensor.matrix<int>()(1, i);
+          tensor.matrix<int>()(1, i) = c;
+        }
+      } else {
+        return Status(
+            error::INVALID_ARGUMENT,
+            strings::StrCat("Unsupported dimension size: ", tensor.dims()));
       }
     } else {
-      return Status(
-          error::INVALID_ARGUMENT,
-          strings::StrCat("Unsupported dimension size: ", tensor.dims()));
+      for (int i = 0; i < tensor.flat<int>().size(); i++) {
+        int value = tensor.flat<int>()(i);
+        value = (value >= 0) ? value : value + 4;
+        if (value == 1 || value == 2) {
+          value = value + 1;
+        } else if (value == 3) {
+          value = 1;
+        }
+        tensor.flat<int>()(i) = value;
+      }
     }
-    tensor.AsProtoTensorContent(
-        node->mutable_attr()->at({"value"}).mutable_tensor());
-    return Status::OK();
-  }
-
-  Status UpdateAttrValueOfInput(int input_index) {
-    auto input_node = node_map_->GetNode(node_->input(input_index));
-    // We created a copy of the node, so that we don't modify the original node,
-    // which might be used elsewhere.
-    NodeDef* added_node = graph_->add_node();
-    *added_node = *input_node;
-    string base_name = strings::StrCat(node_->name(), "-", input_node->name());
-    string node_name = AddPrefixToNodeName(base_name, "LayoutOptimizer", "-");
-    added_node->set_name(node_name);
-    *node_->mutable_input(input_index) = node_name;
-    node_map_->AddNode(node_name, added_node);
-    node_map_->AddOutput(node_name, node_->name());
-    return UpdateAttrValue(added_node);
-  }
 
-  virtual std::vector<int> GetInputPos() const {
-    std::vector<int> input_pos = {0};
-    return input_pos;
+    if (tensor.dims() == 0) {
+      tensor.AsProtoField(node->mutable_attr()->at({"value"}).mutable_tensor());
+    } else {
+      tensor.AsProtoTensorContent(
+          node->mutable_attr()->at({"value"}).mutable_tensor());
+    }
+    return Status::OK();
   }
 
   NodeDef* AddNodeTranspose(const string& node_name, const string& input_name,
@@ -447,106 +821,47 @@ class NodeProcessor : public GraphProcessor {
     return node;
   }
 
-  virtual Status AddLayoutTransposeToInputs() {
-    std::vector<int> input_pos = GetInputPos();
-    for (const auto& pos : input_pos) {
-      int output_pos;
-      string input_node_name = ParseNodeName(node_->input(pos), &output_pos);
-      string base_name =
-          strings::StrCat(node_->name(), "-", input_node_name, "-", output_pos);
-      string node_name =
-          AddPrefixToNodeName(base_name, kTransposeNHWCToNCHW, "-");
-      auto input_node = node_map_->GetNode(node_->input(pos));
-      TF_RETURN_IF_ERROR(HasAttribute(*node_, "T"));
-      TF_RETURN_IF_ERROR(HasAttribute(*input_node, "_output_shapes"));
-      string const_name = GetOrAddNodePermNHWCToNCHW(pos);
-      AddNodeTranspose(
-          node_name, node_->input(pos), const_name,
-          node_->attr().at("T").type(),
-          input_node->attr().at("_output_shapes").list().shape(output_pos),
-          true);
-      node_map_->UpdateOutput(node_->input(pos), node_->name(), node_name);
-      node_map_->AddOutput(node_name, node_->name());
-      *node_->mutable_input(pos) = node_name;
-    }
-    return Status::OK();
-  }
-
-  virtual Status AddLayoutTransposeToOutputs() {
-    auto outputs = node_map_->GetOutputs(node_->name());
-    string const_name = GetOrAddNodePermNCHWToNHWC();
-    for (const auto& output : outputs) {
-      string base_name = strings::StrCat(node_->name(), "-", output->name());
-      string node_name =
-          AddPrefixToNodeName(base_name, kTransposeNCHWToNHWC, "-");
-      // TODO(yaozhang): handle the rare case where node A is connected to more
-      // than one input of node B.
-      auto it = std::find_if(output->mutable_input()->begin(),
-                             output->mutable_input()->end(),
-                             [this](const string& input) {
-                               string node_name = NodeName(input);
-                               return node_name.compare(node_->name()) == 0;
-                             });
-      if (it == output->mutable_input()->end()) {
-        return Status(error::INVALID_ARGUMENT,
-                      strings::StrCat("Expect ", node_->name(),
-                                      " to be an input of ", output->name()));
-      }
-      int output_pos = NodePosition(*it);
-      // No need to process control nodes or nodes that use an output
-      // other than the first output: only the first output is of 4D NCHW/NHWC
-      // format and thus relevant here.
-      if (output_pos != 0) {
-        continue;
-      }
-      TF_RETURN_IF_ERROR(HasAttribute(*node_, "T"));
-      TF_RETURN_IF_ERROR(HasAttribute(*node_, "_output_shapes"));
-      AddNodeTranspose(
-          node_name, node_->name(), const_name, node_->attr().at("T").type(),
-          node_->attr().at("_output_shapes").list().shape(0), false);
-      *it = node_name;
-      node_map_->UpdateOutput(node_->name(), output->name(), node_name);
-      node_map_->AddOutput(node_name, output->name());
-    }
-    return Status::OK();
-  }
-
-  virtual Status CustomizedProcessing() { return Status::OK(); }
-
-  NodeDef* AddNodePermNHWCToNCHW(const string& suffix,
+  NodeDef* AddNodePermNHWCToNCHW(const string& base_name,
                                  const string& depended_node,
                                  const string& device) {
-    auto const_node = AddNodePermConst(
-        strings::StrCat(kPermNHWCToNCHW, "-", suffix), device, {0, 3, 1, 2});
+    string name =
+        LayoutOptimizerNode(strings::StrCat(base_name, "-", kPermNHWCToNCHW));
+    auto const_node = AddNodePermConst(name, device, {0, 3, 1, 2});
     // This is to ensure the transpose node and the const node are in the
     // same frame.
     *const_node->add_input() = AsControlDependency(depended_node);
     return const_node;
   }
 
-  NodeDef* AddNodePermNCHWToNHWC(const string& suffix,
+  NodeDef* AddNodePermNCHWToNHWC(const string& base_name,
                                  const string& depended_node,
                                  const string& device) {
     auto const_node = AddNodePermConst(
-        strings::StrCat(kPermNCHWToNHWC, "-", suffix), device, {0, 2, 3, 1});
+        LayoutOptimizerNode(strings::StrCat(base_name, "-", kPermNCHWToNHWC)),
+        device, {0, 2, 3, 1});
     // This is to ensure the transpose node and the const node are in the same
     // frame.
     *const_node->add_input() = AsControlDependency(depended_node);
     return const_node;
   }
 
-  NodeDef* node_;
-  bool is_in_frame_;
-
- private:
   string GetOrAddNodePermNHWCToNCHW(int pos) {
     string const_name;
     if (is_in_frame_) {
-      auto const_node = AddNodePermNHWCToNCHW(
-          node_->input(pos), NodeName(node_->input(pos)), node_->device());
+      string base_name = strings::StrCat(node_->name(), "-", pos);
+      string input = NodeName(node_->input(pos));
+      string depended_node;
+      if (!IsTransposeNCHWToNHWC(input)) {
+        depended_node = input;
+      } else {
+        auto input_node = node_map_->GetNode(input);
+        depended_node = NodeName(input_node->input(0));
+      }
+      auto const_node =
+          AddNodePermNHWCToNCHW(base_name, depended_node, node_->device());
       const_name = const_node->name();
     } else {
-      const_name = kPermNHWCToNCHW;
+      const_name = LayoutOptimizerNode(kPermNHWCToNCHW);
     }
     return const_name;
   }
@@ -558,7 +873,7 @@ class NodeProcessor : public GraphProcessor {
           AddNodePermNCHWToNHWC(node_->name(), node_->name(), node_->device());
       const_name = const_node->name();
     } else {
-      const_name = kPermNCHWToNHWC;
+      const_name = LayoutOptimizerNode(kPermNCHWToNHWC);
     }
     return const_name;
   }
@@ -571,6 +886,42 @@ class NodeProcessor : public GraphProcessor {
     list->set_i(2, h);
     list->set_i(3, w);
   }
+
+  NodeDef* AddNodeDataFormatOp(const string& name, const string& input_name,
+                               const string& op, DataType dtype,
+                               bool nhwc_to_nchw) {
+    NodeDef* added_node = graph_->add_node();
+    added_node->set_name(name);
+    added_node->set_op(op);
+    node_map_->AddNode(added_node->name(), added_node);
+    added_node->set_device(node_->device());
+    AttrValue attr_data_type;
+    attr_data_type.set_type(dtype);
+    added_node->mutable_attr()->insert({"T", attr_data_type});
+    string src_format = (nhwc_to_nchw) ? "NHWC" : "NCHW";
+    string dst_format = (nhwc_to_nchw) ? "NCHW" : "NHWC";
+    AttrValue attr_format;
+    attr_format.set_s(src_format);
+    added_node->mutable_attr()->insert({"src_format", attr_format});
+    attr_format.set_s(dst_format);
+    added_node->mutable_attr()->insert({"dst_format", attr_format});
+    *added_node->add_input() = input_name;
+    return added_node;
+  }
+
+  void AddDataFormatTranformToParamInput(const string& op, int input_pos,
+                                         DataType dtype) {
+    string suffix = (op == "DataFormatVecPermute") ? kVecPermuteNHWCToNCHW
+                                                   : kDimMapNHWCToNCHW;
+    string name = LayoutOptimizerNode(
+        strings::StrCat(node_->name(), "-", input_pos, "-", suffix));
+    auto added_node =
+        AddNodeDataFormatOp(name, node_->input(input_pos), op, dtype, true);
+    *node_->mutable_input(input_pos) = added_node->name();
+    node_map_->UpdateOutput(NodeName(added_node->input(0)), node_->name(),
+                            added_node->name());
+    node_map_->AddOutput(added_node->name(), node_->name());
+  }
 };
 
 class AvgPoolGradProcessor : public NodeProcessor {
@@ -579,11 +930,10 @@ class AvgPoolGradProcessor : public NodeProcessor {
       : NodeProcessor(opt_cxt) {}
 
  protected:
-  std::vector<int> GetInputPos() const override {
-    std::vector<int> input_pos = {1};
-    return input_pos;
+  std::vector<int> GetInputPos() const override { return {1}; }
+  Status CustomizedProcessing() override {
+    return UpdateOrTransformParamInput(0, "DataFormatVecPermute", DT_INT32);
   }
-  Status CustomizedProcessing() override { return UpdateAttrValueOfInput(0); }
 };
 
 class BiasAddGradProcessor : public NodeProcessor {
@@ -601,7 +951,9 @@ class BiasAddGradProcessor : public NodeProcessor {
     }
     auto input = node_map_->GetNode(node_->input(0));
     if (input) {
-      if ((IsNHWC() && IsDimsFour(*input)) || IsNodeNCHWToNHWC(input->name())) {
+      int port;
+      ParseNodeName(node_->input(0), &port);
+      if (IsNHWC() && IsPortDimsFour(*input, port)) {
         return true;
       }
     }
@@ -618,8 +970,8 @@ class Conv2DProcessor : public NodeProcessor {
 
  protected:
   bool ShouldProcess() const override {
-    return !MustPreserve() && IsNHWC() && IsDimsFour(*node_) && HasOutputs() &&
-           (!IsGemmUsed() || no_gemm_) && IsOnGPU();
+    return !MustPreserve() && IsNHWC() && IsPortZeroDimsFour(*node_) &&
+           HasOutputs() && (!IsGemmUsed() || no_gemm_) && IsOnGPU();
   }
 
   TensorShapeProto GetShape(const string& input_name) const {
@@ -692,10 +1044,7 @@ class Conv2DBackpropFilterProcessor : public Conv2DProcessor {
     return Conv2DProcessor::IsGemmUsed(filter_shape, input_shape);
   }
 
-  std::vector<int> GetInputPos() const override {
-    std::vector<int> input_pos = {0, 2};
-    return input_pos;
-  }
+  std::vector<int> GetInputPos() const override { return {0, 2}; }
 
   Status AddLayoutTransposeToOutputs() override { return Status::OK(); }
   // No need to update output shape, as it is always of shape
@@ -716,12 +1065,11 @@ class Conv2DBackpropInputProcessor : public Conv2DProcessor {
     return Conv2DProcessor::IsGemmUsed(filter_shape, input_shape);
   }
 
-  std::vector<int> GetInputPos() const override {
-    std::vector<int> input_pos = {2};
-    return input_pos;
-  }
+  std::vector<int> GetInputPos() const override { return {2}; }
 
-  Status CustomizedProcessing() override { return UpdateAttrValueOfInput(0); }
+  Status CustomizedProcessing() override {
+    return UpdateOrTransformParamInput(0, "DataFormatVecPermute", DT_INT32);
+  }
 };
 
 class FusedBatchNormGradProcessor : public NodeProcessor {
@@ -734,10 +1082,7 @@ class FusedBatchNormGradProcessor : public NodeProcessor {
     return NodeProcessor::ShouldProcess() && IsTraining();
   }
 
-  std::vector<int> GetInputPos() const override {
-    std::vector<int> input_pos = {0, 1};
-    return input_pos;
-  }
+  std::vector<int> GetInputPos() const override { return {0, 1}; }
 
  private:
   bool IsTraining() const {
@@ -756,9 +1101,47 @@ class MaxPoolGradProcessor : public NodeProcessor {
       : NodeProcessor(opt_cxt) {}
 
  protected:
-  std::vector<int> GetInputPos() const override {
-    std::vector<int> input_pos = {0, 1, 2};
-    return input_pos;
+  std::vector<int> GetInputPos() const override { return {0, 1, 2}; }
+};
+
+class MaxPoolGradV2Processor : public MaxPoolGradProcessor {
+ public:
+  explicit MaxPoolGradV2Processor(const OptimizeContext& opt_cxt)
+      : MaxPoolGradProcessor(opt_cxt) {}
+
+ protected:
+  Status CustomizedProcessing() override {
+    for (int i = 3; i <= 4; i++) {
+      TF_RETURN_IF_ERROR(
+          UpdateOrTransformParamInput(i, "DataFormatVecPermute", DT_INT32));
+    }
+    return Status::OK();
+  }
+};
+
+class MaxPoolV2Processor : public NodeProcessor {
+ public:
+  explicit MaxPoolV2Processor(const OptimizeContext& opt_cxt)
+      : NodeProcessor(opt_cxt) {}
+
+ protected:
+  bool ShouldProcess() const override {
+    // We check data_input's shape instead, because the shape inference of
+    // MaxPoolV2 is not able to infer the shape when ksize or strides is not
+    // constant.
+    auto data_input = node_map_->GetNode(node_->input(0));
+    int port;
+    ParseNodeName(node_->input(0), &port);
+    return !MustPreserve() && IsNHWC() && IsPortDimsFour(*data_input, port) &&
+           HasOutputs() && IsOnGPU();
+  }
+
+  Status CustomizedProcessing() override {
+    for (int i = 1; i <= 2; i++) {
+      TF_RETURN_IF_ERROR(
+          UpdateOrTransformParamInput(i, "DataFormatVecPermute", DT_INT32));
+    }
+    return Status::OK();
   }
 };
 
@@ -769,30 +1152,43 @@ class AgnosticNodeProcessor : public NodeProcessor {
 
  protected:
   bool ShouldProcess() const override {
-    return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() &&
+    return !MustPreserve() && IsPortZeroDimsFour(*node_) && HasOutputs() &&
            IsNodeAfterNCHWToNHWC() && IsOnGPU();
   }
 
-  bool IsNodeAfterNCHWToNHWC() const {
+  bool IsNodeAfterNCHWToNHWC(const NodeDef& node) const {
     std::set<string> ops_format_agnostic = GetOpsFormatAgnostic();
-    auto node = node_map_->GetNode(node_->name());
-    while (node->input_size() > 0) {
-      int data_input_pos = 0;
-      if (IsConcatV1(*node) || IsSplit(*node)) {
-        data_input_pos = 1;
-      }
-      node = node_map_->GetNode(node->input(data_input_pos));
-      if (IsNodeNCHWToNHWC(node->name())) {
+    std::deque<NodeDef*> queue;
+    auto data_node_pos = DataInputPos(node);
+    for (const auto& pos : data_node_pos) {
+      auto input_node = node_map_->GetNode(node.input(pos));
+      queue.push_back(input_node);
+    }
+    // The code will exit this while loop in one iteration in most cases, as the
+    // graph is already topologically sorted.
+    while (!queue.empty()) {
+      NodeDef* current_node = queue.front();
+      queue.pop_front();
+      if (IsTransposeNCHWToNHWC(current_node->name()) ||
+          IsDimMapNCHWToNHWC(current_node->name()) ||
+          IsVecPermuteNCHWToNHWC(current_node->name())) {
         return true;
       }
-      bool connected =
-          ops_format_agnostic.find(node->op()) != ops_format_agnostic.end();
-      if (!connected) {
-        return false;
+      // We only continue searching if the path is connected through
+      // format-agnostic nodes.
+      if (ops_format_agnostic.find(current_node->op()) !=
+          ops_format_agnostic.end()) {
+        auto current_node_pos = DataInputPos(*current_node);
+        for (const auto& pos : current_node_pos) {
+          auto input_node = node_map_->GetNode(current_node->input(pos));
+          queue.push_back(input_node);
+        }
       }
     }
     return false;
   }
+
+  bool IsNodeAfterNCHWToNHWC() const { return IsNodeAfterNCHWToNHWC(*node_); }
 };
 
 class AddNProcessor : public AgnosticNodeProcessor {
@@ -802,56 +1198,62 @@ class AddNProcessor : public AgnosticNodeProcessor {
 
  protected:
   std::vector<int> GetInputPos() const override {
-    std::vector<int> input_pos;
-    input_pos.reserve(node_->input_size());
-    for (int i = 0; i < node_->input_size(); i++) {
-      input_pos.push_back(i);
-    }
-    return input_pos;
+    return NonControlInputs(*node_);
   }
 };
 
 class BinaryOpProcessor : public AgnosticNodeProcessor {
  public:
   explicit BinaryOpProcessor(const OptimizeContext& opt_cxt)
-      : AgnosticNodeProcessor(opt_cxt) {
-    is_4d_with_vector_ = Is4DOperateWithVector();
-  }
+      : AgnosticNodeProcessor(opt_cxt) {}
 
  protected:
   bool ShouldProcess() const override {
-    return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() &&
+    return !MustPreserve() && IsPortZeroDimsFour(*node_) && HasOutputs() &&
            IsNodeAfterNCHWToNHWC() &&
-           (Is4DOperateWithND(4) || Is4DOperateWithScalar() ||
-            Is4DOperateWithVector()) &&
+           (IsNDOperateWithMD(4, 0) || IsNDOperateWithMD(4, 1) ||
+            IsNDOperateWithMD(4, 4) || IsNDOperateWithMD(0, 4) ||
+            IsNDOperateWithMD(1, 4)) &&
            IsOnGPU();
   }
 
   std::vector<int> GetInputPos() const override {
-    std::vector<int> input_pos = {0};
-    if (Is4DOperateWithND(4)) {
+    std::vector<int> input_pos;
+    auto input0 = node_map_->GetNode(node_->input(0));
+    auto input1 = node_map_->GetNode(node_->input(1));
+    int input0_port;
+    ParseNodeName(node_->input(0), &input0_port);
+    int input1_port;
+    ParseNodeName(node_->input(1), &input1_port);
+    if (IsPortDimsFour(*input0, input0_port)) {
+      input_pos.push_back(0);
+    }
+    if (IsPortDimsFour(*input1, input1_port)) {
       input_pos.push_back(1);
     }
     return input_pos;
   }
 
-  bool Is4DOperateWithND(int n) const {
+  bool IsNDOperateWithMD(int n, int m) const {
     auto input0 = node_map_->GetNode(node_->input(0));
     auto input1 = node_map_->GetNode(node_->input(1));
+    int input0_port;
+    ParseNodeName(node_->input(0), &input0_port);
+    int input1_port;
+    ParseNodeName(node_->input(1), &input1_port);
+
     if (input0 && input1) {
-      return (IsDimsFour(*input0) || IsNodeNCHWToNHWC(input0->name())) &&
-             ((n == 4)
-                  ? (IsDimsFour(*input1) || IsNodeNCHWToNHWC(input1->name()))
-                  : IsDimsN(*input1, n));
+      bool input0_is_n = (n == 4) ? IsPortDimsFour(*input0, input0_port)
+                                  : IsPortDimsN(*input0, input0_port, n);
+      bool input1_is_m = (m == 4) ? IsPortDimsFour(*input1, input1_port)
+                                  : IsPortDimsN(*input1, input1_port, m);
+      return input0_is_n && input1_is_m;
     }
     return false;
   }
 
-  bool Is4DOperateWithScalar() const { return Is4DOperateWithND(0); }
-
-  bool Is4DOperateWithVector() const { return Is4DOperateWithND(1); }
-
-  NodeDef* AddNodeShapeConst(const string& name, int num_channels) {
+  NodeDef* AddNodeShapeConst(const string& name, int num_channels,
+                             const string& depended_node) {
     NodeDef* node = graph_->add_node();
     node_map_->AddNode(name, node);
     node->set_name(name);
@@ -869,6 +1271,11 @@ class BinaryOpProcessor : public AgnosticNodeProcessor {
     }
     tensor.AsProtoTensorContent(attr_tensor.mutable_tensor());
     node->mutable_attr()->insert({"value", attr_tensor});
+    if (is_in_frame_) {
+      // This is to ensure the transpose node and the const node are in the
+      // same frame.
+      *node->add_input() = AsControlDependency(depended_node);
+    }
     return node;
   }
 
@@ -894,31 +1301,41 @@ class BinaryOpProcessor : public AgnosticNodeProcessor {
   }
 
   Status CustomizedProcessing() override {
-    if (is_4d_with_vector_) {
-      string base_name = strings::StrCat(node_->name(), "-", node_->input(1));
-      string reshape_node_name =
-          AddPrefixToNodeName(base_name, kReshapeNHWCToNCHW, "-");
+    int vector_index = -1;
+    if (IsNDOperateWithMD(4, 1)) {
+      vector_index = 1;
+    } else if (IsNDOperateWithMD(1, 4)) {
+      vector_index = 0;
+    }
+    if (vector_index != -1) {
+      string base_name = strings::StrCat(node_->name(), "-", vector_index);
+      string reshape_node_name = LayoutOptimizerNode(
+          strings::StrCat(base_name, "-", kReshapeNHWCToNCHW));
       string shape_const_node_name =
-          AddPrefixToNodeName(base_name, kReshapeConst, "-");
-      auto input_node = node_map_->GetNode(node_->input(1));
+          LayoutOptimizerNode(strings::StrCat(base_name, "-", kReshapeConst));
+      auto input_node = node_map_->GetNode(node_->input(vector_index));
       TF_RETURN_IF_ERROR(HasAttribute(*input_node, "_output_shapes"));
-      int vector_size =
-          input_node->attr().at("_output_shapes").list().shape(0).dim(0).size();
-      AddNodeShapeConst(shape_const_node_name, vector_size);
+      int port;
+      ParseNodeName(node_->input(vector_index), &port);
+      int vector_size = input_node->attr()
+                            .at("_output_shapes")
+                            .list()
+                            .shape(port)
+                            .dim(0)
+                            .size();
+      AddNodeShapeConst(shape_const_node_name, vector_size,
+                        NodeName(node_->input(vector_index)));
       TF_RETURN_IF_ERROR(HasAttribute(*node_, "T"));
-      AddNodeReshape(reshape_node_name, node_->input(1), shape_const_node_name,
-                     node_->attr().at("T").type());
+      AddNodeReshape(reshape_node_name, node_->input(vector_index),
+                     shape_const_node_name, node_->attr().at("T").type());
       node_map_->AddOutput(shape_const_node_name, reshape_node_name);
-      node_map_->UpdateOutput(node_->input(1), node_->name(),
-                              reshape_node_name);
+      node_map_->UpdateOutput(NodeName(node_->input(vector_index)),
+                              node_->name(), reshape_node_name);
       node_map_->AddOutput(reshape_node_name, node_->name());
-      *node_->mutable_input(1) = reshape_node_name;
+      *node_->mutable_input(vector_index) = reshape_node_name;
     }
     return Status::OK();
   }
-
- private:
-  bool is_4d_with_vector_;
 };
 
 class ConcatProcessor : public AgnosticNodeProcessor {
@@ -926,344 +1343,333 @@ class ConcatProcessor : public AgnosticNodeProcessor {
   explicit ConcatProcessor(const OptimizeContext& opt_cxt)
       : AgnosticNodeProcessor(opt_cxt) {
     // For Concat,  the concat axis is the first input; for ConcatV2,
-    // the last input.
-    axis_node_pos_ = (IsConcatV1(*node_)) ? 0 : (node_->input_size() - 1);
+    // the last input. Note that if with control inputs, the number of inputs
+    // is larger than the integer attribute N.
+    int n = node_->attr().at("N").i();
+    axis_node_pos_ = (IsConcatV1(*node_)) ? 0 : n;
   }
 
  protected:
-  bool ShouldProcess() const override {
-    return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() &&
-           IsNodeAfterNCHWToNHWC() && IsAlongDimC() && IsOnGPU();
-  }
-
   std::vector<int> GetInputPos() const override {
-    std::vector<int> input_pos;
-    int start = (IsConcatV1(*node_)) ? 1 : 0;
-    int end =
-        (IsConcatV1(*node_)) ? node_->input_size() : (node_->input_size() - 1);
-    for (int i = start; i < end; i++) {
-      input_pos.push_back(i);
-    }
-    return input_pos;
+    return DataInputPosConcat(*node_);
   }
 
   Status CustomizedProcessing() override {
-    string concat_const_name = GetOrAddNodeConcatConst();
-    node_map_->AddOutput(concat_const_name, node_->name());
-    *node_->mutable_input(axis_node_pos_) = concat_const_name;
-    return Status::OK();
-  }
-
-  bool IsAlongDimC() const {
-    auto axis_node = node_map_->GetNode(node_->input(axis_node_pos_));
-    if (axis_node->attr().find("value") != axis_node->attr().end()) {
-      return axis_node->attr().at("value").tensor().int_val(0) == 3;
-    }
-    return false;
+    DataType dtype =
+        (IsConcatV1(*node_)) ? DT_INT32 : node_->attr().at("Tidx").type();
+    return UpdateOrTransformParamInput(axis_node_pos_, "DataFormatDimMap",
+                                       dtype);
   }
 
   int axis_node_pos_;
+};
 
- private:
-  NodeDef* AddNodeConcatConst(const string& suffix, const string& depended_node,
-                              const string& device) {
-    auto const_node = AddNodeConstScalar(
-        strings::StrCat(kConcatConst, "-", suffix), device, DT_INT32, 1);
-    // This is to ensure the concat node and the const node are
-    // in the same frame.
-    *const_node->add_input() = AsControlDependency(depended_node);
-    return const_node;
+class FillProcessor : public AgnosticNodeProcessor {
+ public:
+  explicit FillProcessor(const OptimizeContext& opt_cxt)
+      : AgnosticNodeProcessor(opt_cxt) {}
+
+ protected:
+  std::vector<int> GetInputPos() const override { return {}; }
+
+  Status CustomizedProcessing() override {
+    DataType dtype = node_->attr().at("index_type").type();
+    return UpdateOrTransformParamInput(0, "DataFormatVecPermute", dtype);
   }
+};
 
-  string GetOrAddNodeConcatConst() {
-    string const_name;
-    if (is_in_frame_) {
-      int value_node_pos = (axis_node_pos_ == 0) ? 1 : 0;
-      auto const_node = AddNodeConcatConst(
-          node_->name(), NodeName(node_->input(value_node_pos)),
-          node_->device());
-      const_name = const_node->name();
-    } else {
-      const_name = kConcatConst;
-    }
-    return const_name;
+class HistogramSummaryProcessor : public AgnosticNodeProcessor {
+ public:
+  explicit HistogramSummaryProcessor(const OptimizeContext& opt_cxt)
+      : AgnosticNodeProcessor(opt_cxt) {}
+
+ protected:
+  bool ShouldProcess() const override {
+    auto input1 = node_map_->GetNode(node_->input(1));
+    int port;
+    ParseNodeName(node_->input(1), &port);
+    return !MustPreserve() && HasOutputs() && IsNodeAfterNCHWToNHWC() &&
+           IsPortDimsFour(*input1, port) && IsOnGPU();
   }
+
+  std::vector<int> GetInputPos() const override { return {1}; }
+
+  Status AddLayoutTransposeToOutputs() override { return Status::OK(); }
 };
 
-class PadProcessor : public AgnosticNodeProcessor {
+class IdentityNProcessor : public AgnosticNodeProcessor {
  public:
-  explicit PadProcessor(const OptimizeContext& opt_cxt)
+  explicit IdentityNProcessor(const OptimizeContext& opt_cxt)
       : AgnosticNodeProcessor(opt_cxt) {}
 
  protected:
   bool ShouldProcess() const override {
-    return !MustPreserve() && IsDimsFour(*node_) && HasOutputs() &&
-           IsNodeAfterNCHWToNHWC() && PaddingSupported() && IsOnGPU();
+    return !MustPreserve() && HasOutputs() && IsNodeAfterNCHWToNHWC() &&
+           IsOnGPU();
   }
-  Status CustomizedProcessing() override { return UpdateAttrValueOfInput(1); }
 
- private:
-  bool PaddingSupported() const {
-    auto pad_const = node_map_->GetNode(node_->input(1));
-    bool is_const = IsConstant(*pad_const);
-    bool is_4D = false;
-    if (HasAttribute(*pad_const, "value").ok()) {
-      Tensor tensor;
-      if (tensor.FromProto(pad_const->mutable_attr()->at({"value"}).tensor())) {
-        if (tensor.dims() == 2) {
-          if (tensor.dim_size(0) == 4 && tensor.dim_size(1) == 2) {
-            is_4D = true;
-          }
+  std::vector<int> GetInputPos() const override {
+    std::vector<int> input_pos;
+    for (int i = 0; i < node_->input_size(); i++) {
+      auto input = node_map_->GetNode(node_->input(i));
+      int port;
+      ParseNodeName(node_->input(i), &port);
+      // Skip control input.
+      if (port != -1) {
+        if (IsPortDimsFour(*input, port) &&
+            (IsNodeAfterNCHWToNHWC(*input) ||
+             IsTransposeNCHWToNHWC(input->name()))) {
+          input_pos.push_back(i);
         }
       }
     }
-    return is_const && is_4D;
+    return input_pos;
+  }
+
+  std::set<int> GetOutputPos() const override {
+    std::set<int> output_pos{};
+    for (const auto& input_pos : GetInputPos()) {
+      output_pos.insert(input_pos);
+    }
+    return output_pos;
   }
 };
 
-class SplitProcessor : public AgnosticNodeProcessor {
+class ShapeProcessor : public IdentityNProcessor {
  public:
-  explicit SplitProcessor(const OptimizeContext& opt_cxt)
+  explicit ShapeProcessor(const OptimizeContext& opt_cxt)
+      : IdentityNProcessor(opt_cxt) {}
+
+ protected:
+  Status AddLayoutTransposeToOutputs() override { return Status::OK(); }
+
+  Status CustomizedProcessing() override {
+    return AddTransformToOutputs("DataFormatVecPermute");
+  }
+};
+
+class MergeProcessor : public AgnosticNodeProcessor {
+ public:
+  explicit MergeProcessor(const OptimizeContext& opt_cxt)
       : AgnosticNodeProcessor(opt_cxt) {}
 
  protected:
   bool ShouldProcess() const override {
-    return AgnosticNodeProcessor::ShouldProcess() && SplitSupported();
+    return !MustPreserve() && IsPortZeroDimsFour(*node_) && HasOutputs() &&
+           IsEveryInputAfterNCHWToNHWC() && IsOnGPU();
   }
 
   std::vector<int> GetInputPos() const override {
-    std::vector<int> input_pos = {1};
+    std::vector<int> input_pos;
+    int n = node_->attr().at("N").i();
+    input_pos.reserve(n);
+    for (int i = 0; i < n; i++) {
+      input_pos.push_back(i);
+    }
     return input_pos;
   }
 
-  Status CustomizedProcessing() override {
-    string split_const_name = AddNodeSplitConst()->name();
-    node_map_->AddOutput(split_const_name, node_->name());
-    *node_->mutable_input(0) = split_const_name;
-    return Status::OK();
-  }
-
  private:
-  bool SplitSupported() const {
-    auto dim_node = node_map_->GetNode(node_->input(0));
-    if (!IsConstant(*dim_node)) {
-      return false;
-    }
-    if (HasAttribute(*dim_node, "value").ok()) {
-      auto tensor = dim_node->attr().at({"value"}).tensor();
-      if (tensor.tensor_shape().dim_size() == 0 && tensor.int_val_size() == 1) {
-        if (tensor.int_val(0) < 4 && tensor.int_val(0) >= -4) {
-          return true;
-        }
+  bool IsEveryInputAfterNCHWToNHWC() const {
+    for (const auto& input : node_->input()) {
+      auto input_node = node_map_->GetNode(input);
+      if (IsNodeAfterNCHWToNHWC(*input_node) ||
+          IsTransposeNCHWToNHWC(input_node->name())) {
+        continue;
       }
+      return false;
     }
-    return false;
-  }
-
-  NodeDef* AddNodeSplitConst() {
-    auto dim_node = node_map_->GetNode(node_->input(0));
-    auto tensor = dim_node->attr().at({"value"}).tensor();
-    int value = tensor.int_val(0);
-    value = (value >= 0) ? value : value + 4;
-    if (value == 1 || value == 2) {
-      value = value + 1;
-    } else if (value == 3) {
-      value = 1;
-    }
-    // We created a copy of the node, so that we don't modify the original node,
-    // which might be used elsewhere. Note that this copy also copies the
-    // control dependency input in the case this node is inside a loop,
-    // to ensure added_node is in the same frame with the Split node.
-    NodeDef* added_node = graph_->add_node();
-    *added_node = *dim_node;
-    added_node->set_name(strings::StrCat(kSplitConst, "-", node_->name()));
-    added_node->mutable_attr()->at({"value"}).mutable_tensor()->set_int_val(
-        0, value);
-    return added_node;
+    return true;
   }
 };
 
-class ReluGradProcessor : public AgnosticNodeProcessor {
+class PadProcessor : public AgnosticNodeProcessor {
  public:
-  explicit ReluGradProcessor(const OptimizeContext& opt_cxt)
+  explicit PadProcessor(const OptimizeContext& opt_cxt)
       : AgnosticNodeProcessor(opt_cxt) {}
 
  protected:
-  std::vector<int> GetInputPos() const override {
-    std::vector<int> input_pos = {0, 1};
-    return input_pos;
+  Status CustomizedProcessing() override {
+    DataType dtype = node_->attr().at("Tpaddings").type();
+    return UpdateOrTransformParamInput(1, "DataFormatVecPermute", dtype);
   }
 };
 
-class SliceProcessor : public AgnosticNodeProcessor {
+class ReverseProcessor : public AgnosticNodeProcessor {
  public:
-  explicit SliceProcessor(const OptimizeContext& opt_cxt)
+  explicit ReverseProcessor(const OptimizeContext& opt_cxt)
       : AgnosticNodeProcessor(opt_cxt) {}
 
  protected:
   Status CustomizedProcessing() override {
-    // Skip the first input, which is the data to be sliced.
-    for (int i = 1; i < node_->input_size(); i++) {
-      string base_name = strings::StrCat(node_->name(), "-input", i);
-      string node_name =
-          AddPrefixToNodeName(base_name, kPermVecNHWCToNCHW, "-");
-      TF_RETURN_IF_ERROR(HasAttribute(*node_, "Index"));
-      AddNodePermVec(node_name, node_->input(i),
-                     node_->attr().at("Index").type(), true);
-      node_map_->UpdateOutput(node_->input(i), node_->name(), node_name);
-      node_map_->AddOutput(node_name, node_->name());
-      *node_->mutable_input(i) = node_name;
-    }
-    return Status::OK();
+    DataType dtype = node_->attr().at("Tidx").type();
+    return UpdateOrTransformParamInput(1, "DataFormatDimMap", dtype);
   }
+};
 
- private:
-  NodeDef* AddNodeGatherAxisConst(const string& suffix,
-                                  const string& depended_node,
-                                  const string& device) {
-    auto const_node = AddNodeConstScalar(
-        strings::StrCat(kGatherAxisConst, "-", suffix), device, DT_INT32, 0);
-    // This is to ensure the Slice node and the const node are
-    // in the same frame.
-    *const_node->add_input() = AsControlDependency(depended_node);
-    return const_node;
+class SplitProcessor : public AgnosticNodeProcessor {
+ public:
+  explicit SplitProcessor(const OptimizeContext& opt_cxt)
+      : AgnosticNodeProcessor(opt_cxt) {
+    axis_node_pos_ = 0;
   }
 
-  string GetOrAddNodeGatherAxisConst() {
-    string const_name;
-    if (is_in_frame_) {
-      auto const_node = AddNodeGatherAxisConst(
-          node_->name(), NodeName(node_->input(0)), node_->device());
-      const_name = const_node->name();
-    } else {
-      const_name = kGatherAxisConst;
+ protected:
+  std::vector<int> GetInputPos() const override { return {1}; }
+
+  std::set<int> GetOutputPos() const override {
+    std::set<int> output_pos{0};
+    if (HasAttribute(*node_, "num_split").ok()) {
+      for (int i = 1; i < node_->attr().at("num_split").i(); i++) {
+        output_pos.insert(i);
+      }
     }
-    return const_name;
+    return output_pos;
   }
 
-  string GetOrAddNodePermNHWCToNCHW() {
-    string const_name;
-    if (is_in_frame_) {
-      auto const_node = AddNodePermNHWCToNCHW(
-          node_->name(), NodeName(node_->input(0)), node_->device());
-      const_name = const_node->name();
-    } else {
-      const_name = kPermNHWCToNCHW;
-    }
-    return const_name;
+  Status CustomizedProcessing() override {
+    return UpdateOrTransformParamInput(axis_node_pos_, "DataFormatDimMap",
+                                       DT_INT32);
   }
 
-  string GetOrAddNodePermNCHWToNHWC() {
-    string const_name;
-    if (is_in_frame_) {
-      auto const_node = AddNodePermNCHWToNHWC(
-          node_->name(), NodeName(node_->input(0)), node_->device());
-      const_name = const_node->name();
-    } else {
-      const_name = kPermNCHWToNHWC;
-    }
-    return const_name;
+  int axis_node_pos_;
+};
+
+class SplitVProcessor : public SplitProcessor {
+ public:
+  explicit SplitVProcessor(const OptimizeContext& opt_cxt)
+      : SplitProcessor(opt_cxt) {
+    axis_node_pos_ = 2;
   }
 
-  void AddNodePermVec(const string& node_name, const string& input_name,
-                      DataType data_type, bool NHWCToNCHW) {
-    NodeDef* node = graph_->add_node();
-    node_map_->AddNode(node_name, node);
-    node->set_name(node_name);
-    *node->add_input() = input_name;
-    *node->add_input() = NHWCToNCHW ? GetOrAddNodePermNHWCToNCHW()
-                                    : GetOrAddNodePermNCHWToNHWC();
-    *node->add_input() = GetOrAddNodeGatherAxisConst();
-    node->set_op("GatherV2");
+ protected:
+  std::vector<int> GetInputPos() const override { return {0}; }
+};
 
-    AttrValue attr_type_indices;
-    attr_type_indices.set_type(DT_INT32);
-    node->mutable_attr()->insert({"Tindices", attr_type_indices});
+class TernaryOpProcessor : public AgnosticNodeProcessor {
+ public:
+  explicit TernaryOpProcessor(const OptimizeContext& opt_cxt)
+      : AgnosticNodeProcessor(opt_cxt) {}
 
-    AttrValue attr_type_axis;
-    attr_type_axis.set_type(DT_INT32);
-    node->mutable_attr()->insert({"Taxis", attr_type_axis});
+ protected:
+  std::vector<int> GetInputPos() const override { return {0, 1, 2}; }
+};
 
-    AttrValue attr_type_params;
-    attr_type_params.set_type(data_type);
-    node->mutable_attr()->insert({"Tparams", attr_type_params});
+class SelectProcessor : public AgnosticNodeProcessor {
+ public:
+  explicit SelectProcessor(const OptimizeContext& opt_cxt)
+      : AgnosticNodeProcessor(opt_cxt) {}
 
-    AttrValue attr_validate;
-    attr_validate.set_b(true);
-    node->mutable_attr()->insert({"validate_indices", attr_validate});
+ protected:
+  std::vector<int> GetInputPos() const override {
+    auto input0 = node_map_->GetNode(node_->input(0));
+    int input0_port;
+    ParseNodeName(node_->input(0), &input0_port);
+    // Input 0 could be a scalar, a vector with size matching the first
+    // dimension of input 1 and 2, or must have the same shape as input 1 and 2.
+    if (IsPortDimsFour(*input0, input0_port)) {
+      return {0, 1, 2};
+    } else {
+      return {1, 2};
+    }
   }
 };
 
-// Specialized SliceProcessor, used if the second and third input are const
-// nodes, which could be the case if a constant folding pass is applied
-// before this optimization.
-class SliceProcessorConst : public AgnosticNodeProcessor {
+class UnaryGradProcessor : public AgnosticNodeProcessor {
  public:
-  explicit SliceProcessorConst(const OptimizeContext& opt_cxt)
+  explicit UnaryGradProcessor(const OptimizeContext& opt_cxt)
       : AgnosticNodeProcessor(opt_cxt) {}
 
  protected:
-  Status CustomizedProcessing() override {
+  std::vector<int> GetInputPos() const override { return {0, 1}; }
+};
+
+class SliceProcessor : public AgnosticNodeProcessor {
+ public:
+  explicit SliceProcessor(const OptimizeContext& opt_cxt)
+      : AgnosticNodeProcessor(opt_cxt) {
     // Skip the first input, which is the data to be sliced.
-    for (int i = 1; i < node_->input_size(); i++) {
-      TF_RETURN_IF_ERROR(UpdateAttrValueOfInput(i));
+    start_ = 1;
+    // Note that we can't use node_->input_size() here because there
+    // could be control inputs.
+    end_ = 2;
+  }
+
+ protected:
+  Status ProcessInputs() {
+    for (int i = start_; i <= end_; i++) {
+      DataType dtype = node_->attr().at("Index").type();
+      TF_RETURN_IF_ERROR(
+          UpdateOrTransformParamInput(i, "DataFormatVecPermute", dtype));
     }
     return Status::OK();
   }
+
+  Status CustomizedProcessing() override { return ProcessInputs(); }
+
+  int start_;
+  int end_;
 };
 
-// Specialized SliceProcessor, used if the second input is ConcatOffset. An
-// example use case is in the gradient computation of Concat for InceptionV3.
-class SliceProcessorConcatOffset : public AgnosticNodeProcessor {
+class StridedSliceProcessor : public SliceProcessor {
  public:
-  explicit SliceProcessorConcatOffset(const OptimizeContext& opt_cxt)
-      : AgnosticNodeProcessor(opt_cxt) {}
+  explicit StridedSliceProcessor(const OptimizeContext& opt_cxt)
+      : SliceProcessor(opt_cxt) {
+    start_ = 1;
+    end_ = 3;
+  }
 
  protected:
+  bool ShouldProcess() const override {
+    return AgnosticNodeProcessor::ShouldProcess() && IsOnlyBeginEndMask();
+  }
+
   Status CustomizedProcessing() override {
-    auto maybe_concatoffset_node =
-        node_map_->GetNode(NodeName(node_->input(1)));
-    if (IsConcatOffset(*maybe_concatoffset_node)) {
-      auto maybe_axis_node =
-          node_map_->GetNode(maybe_concatoffset_node->input(0));
-      NodeDef* axis_node;
-      if (IsConstant(*maybe_axis_node)) {
-        axis_node = maybe_axis_node;
-        // A FloorMod node might be added between ConcatOffset and the concat
-        // dimension const node to handle a negative dimension index -1, meaning
-        // the last dimension, which is consistent with the python's notation
-        // for negative index.
-      } else if (IsFloorMod(*maybe_axis_node)) {
-        axis_node = node_map_->GetNode(maybe_axis_node->input(0));
-      } else {
-        return Status(error::INVALID_ARGUMENT,
-                      strings::StrCat("Expect either Const or FloorMod for the "
-                                      "input 1 of ConcatOffset"));
-      }
-      // Need to process if the channel is at dimension 3, which indicates the
-      // NHWC format is being used. As multiple Slice nodes may share the same
-      // ConcatOffset node, the NHWC to NCHW conversion may have already
-      // been performed when processing other Slice nodes.
-      TF_RETURN_IF_ERROR(HasAttribute(*axis_node, "value"));
-      int concat_dim = axis_node->attr().at("value").tensor().int_val(0);
-      if (concat_dim == -1 || concat_dim == 3) {
-        // Update the dimension order for shape input nodes. Note that the input
-        // 2 of Slice also shares one of the shape nodes.
-        for (int i = 1; i < maybe_concatoffset_node->input_size(); i++) {
-          auto shape_node =
-              node_map_->GetNode(maybe_concatoffset_node->input(i));
-          TF_RETURN_IF_ERROR(UpdateAttrValue(shape_node));
-        }
-        // Set the channel dimension to 1, as we have converted the vector
-        // element order from NHWC to NCHW.
-        axis_node->mutable_attr()->at("value").mutable_tensor()->set_int_val(0,
-                                                                             1);
-      }
+    TF_RETURN_IF_ERROR(UpdateMask("begin_mask"));
+    TF_RETURN_IF_ERROR(UpdateMask("end_mask"));
+    TF_RETURN_IF_ERROR(ProcessInputs());
+    return Status::OK();
+  }
+
+ private:
+  bool IsMaskZero(const string& mask) const {
+    return node_->attr().at(mask).i() == 0;
+  }
+
+  bool IsOnlyBeginEndMask() const {
+    return IsMaskZero("ellipsis_mask") && IsMaskZero("new_axis_mask") &&
+           IsMaskZero("shrink_axis_mask");
+  }
+
+  Status UpdateMask(const string& mask) {
+    int i = node_->attr().at(mask).i();
+    if (i < 0 || i > 15) {
+      return errors::InvalidArgument("invalid mask value: ", i);
     }
+    if (i == 0 || i == 1 || i == 14 || i == 15) return Status::OK();
+    if (i == 2 || i == 3) i += 2;
+    if (i == 4 || i == 5) i += 4;
+    if (i == 6 || i == 7) i += 6;
+    if (i == 8 || i == 9) i -= 6;
+    if (i == 10 || i == 11) i -= 4;
+    if (i == 12 || i == 13) i -= 2;
+    node_->mutable_attr()->at(mask).set_i(i);
     return Status::OK();
   }
 };
 
+class StridedSliceGradProcessor : public StridedSliceProcessor {
+ public:
+  explicit StridedSliceGradProcessor(const OptimizeContext& opt_cxt)
+      : StridedSliceProcessor(opt_cxt) {
+    start_ = 0;
+    end_ = 3;
+  }
+
+ protected:
+  std::vector<int> GetInputPos() const override { return {4}; }
+};
+
 class SqueezeProcessor : public AgnosticNodeProcessor {
  public:
   explicit SqueezeProcessor(const OptimizeContext& opt_cxt)
@@ -1271,7 +1677,7 @@ class SqueezeProcessor : public AgnosticNodeProcessor {
 
  protected:
   bool ShouldProcess() const override {
-    return !MustPreserve() && IsDimsN(*node_, 2) && HasOutputs() &&
+    return !MustPreserve() && IsPortZeroDimsN(*node_, 2) && HasOutputs() &&
            IsNodeAfterNCHWToNHWC() && IsInputConvertible() && IsAlongDimHW() &&
            IsOnGPU();
   }
@@ -1279,12 +1685,11 @@ class SqueezeProcessor : public AgnosticNodeProcessor {
   Status AddLayoutTransposeToOutputs() override { return Status::OK(); }
 
   bool IsInputConvertible() const {
+    int input_port;
     auto input = node_map_->GetNode(node_->input(0));
-    if (IsNodeNCHWToNHWC(input->name())) {
-      input = node_map_->GetNode(input->input(0));
-    }
+    ParseNodeName(node_->input(0), &input_port);
     if (input->attr().find("_output_shapes") != input->attr().end()) {
-      auto shape = input->attr().at("_output_shapes").list().shape(0);
+      auto shape = input->attr().at("_output_shapes").list().shape(input_port);
       if (shape.dim_size() != 4) {
         return false;
       }
@@ -1298,8 +1703,12 @@ class SqueezeProcessor : public AgnosticNodeProcessor {
   bool IsAlongDimHW() const {
     if (node_->attr().find("squeeze_dims") != node_->attr().end()) {
       auto list = node_->attr().at("squeeze_dims").list();
-      if (list.i(0) == 1 && list.i(1) == 2) {
-        return true;
+      // If list is empty, Squeeze op will squeeze all dimensions of size 1.
+      if (list.i_size() == 0) return true;
+      if (list.i_size() == 2) {
+        if (list.i(0) == 1 && list.i(1) == 2) {
+          return true;
+        }
       }
     }
     return false;
@@ -1308,87 +1717,112 @@ class SqueezeProcessor : public AgnosticNodeProcessor {
   Status CustomizedProcessing() override {
     TF_RETURN_IF_ERROR(HasAttribute(*node_, "squeeze_dims"));
     auto list = node_->mutable_attr()->at("squeeze_dims").mutable_list();
-    list->set_i(0, 2);
-    list->set_i(1, 3);
+    if (list->i_size() == 2) {
+      list->set_i(0, 2);
+      list->set_i(1, 3);
+    }
     return Status::OK();
   }
 };
 
-class SumProcessor : public AgnosticNodeProcessor {
+class ReduceProcessor : public AgnosticNodeProcessor {
  public:
-  explicit SumProcessor(const OptimizeContext& opt_cxt)
+  explicit ReduceProcessor(const OptimizeContext& opt_cxt)
       : AgnosticNodeProcessor(opt_cxt) {}
 
  protected:
   bool ShouldProcess() const override {
     auto input0 = node_map_->GetNode(node_->input(0));
+    int port;
+    ParseNodeName(node_->input(0), &port);
     return !MustPreserve() && HasOutputs() && IsNodeAfterNCHWToNHWC() &&
-           (IsDimsFour(*input0) || IsNodeNCHWToNHWC(input0->name())) &&
-           IsAlongDimNHW() && IsOnGPU();
+           IsPortDimsFour(*input0, port) && IsReduceAxisSupported() &&
+           IsOnGPU();
   }
 
-  Status AddLayoutTransposeToOutputs() override { return Status::OK(); }
-
   Status CustomizedProcessing() override {
-    node_map_->AddOutput(kReductionConst, node_->name());
-    *node_->mutable_input(1) = GetOrAddNodeReductionConst();
+    if (IsAlongNHW() || IsAlongHW() || IsAlongC()) {
+      DataType dtype = node_->attr().at("Tidx").type();
+      TF_RETURN_IF_ERROR(
+          UpdateOrTransformParamInput(1, "DataFormatDimMap", dtype));
+    }
     return Status::OK();
   }
 
+  Status AddLayoutTransposeToOutputs() override { return Status::OK(); }
+
  private:
-  bool IsAlongDimNHW() const {
-    NodeDef* node = node_map_->GetNode(node_->input(1));
-    Tensor tensor;
-    if (node->attr().find({"value"}) == node->attr().end()) {
-      return false;
-    }
-    auto success = tensor.FromProto(node->attr().at({"value"}).tensor());
-    if (!success) {
-      LOG(ERROR) << "Failed to parse TensorProto.";
-      return false;
-    }
-    if (tensor.flat<int>().size() != 3) {
+  bool IsReduceAxisSupported() const {
+    return IsAlongAllFourDims() || IsAlongHWC() ||
+           ((IsAlongNHW() || IsAlongHW() || IsAlongC()) && !KeepDims());
+  }
+
+  bool IsAlongAxis(const std::vector<int>& axis) const {
+    auto axis_node = node_map_->GetNode(node_->input(1));
+    if (!IsConstant(*axis_node)) {
       return false;
     }
-    if (tensor.flat<int>()(0) == 0 && tensor.flat<int>()(1) == 1 &&
-        tensor.flat<int>()(2) == 2) {
-      return true;
+    if (HasAttribute(*axis_node, "value").ok()) {
+      Tensor tensor;
+      auto success = tensor.FromProto(axis_node->attr().at({"value"}).tensor());
+      if (!success) {
+        LOG(ERROR) << "Failed to parse TensorProto.";
+      }
+      if (tensor.dims() == 1 && tensor.dim_size(0) == axis.size()) {
+        bool along_axis = true;
+        for (int i = 0; i < axis.size(); i++) {
+          along_axis = along_axis && (tensor.flat<int>()(i) == axis[i]);
+        }
+        if (along_axis) return true;
+      }
     }
     return false;
   }
 
-  NodeDef* AddNodeReductionConst(const string& suffix,
-                                 const string& depended_node,
-                                 const string& device) {
-    auto const_node = GraphProcessor::AddNodeReductionConst(
-        strings::StrCat(kReductionConst, "-", suffix), device);
-    // This is to ensure the Sum node and the const node are in the
-    // same frame.
-    *const_node->add_input() = AsControlDependency(depended_node);
-    return const_node;
-  }
+  bool IsAlongAllFourDims() const { return IsAlongAxis({0, 1, 2, 3}); }
 
-  string GetOrAddNodeReductionConst() {
-    string const_name;
-    if (is_in_frame_) {
-      auto const_node = AddNodeReductionConst(
-          node_->name(), NodeName(node_->input(0)), node_->device());
-      const_name = const_node->name();
-    } else {
-      const_name = kReductionConst;
-    }
-    return const_name;
+  bool IsAlongHWC() const { return IsAlongAxis({1, 2, 3}); }
+
+  bool IsAlongNHW() const { return IsAlongAxis({0, 1, 2}); }
+
+  bool IsAlongHW() const { return IsAlongAxis({1, 2}); }
+
+  bool IsAlongC() const { return IsAlongAxis({3}); }
+
+  bool KeepDims() const { return node_->attr().at("keep_dims").b(); }
+};
+
+class SwitchProcessor : public AgnosticNodeProcessor {
+ public:
+  explicit SwitchProcessor(const OptimizeContext& opt_cxt)
+      : AgnosticNodeProcessor(opt_cxt) {}
+
+ protected:
+  std::set<int> GetOutputPos() const override { return {0, 1}; }
+};
+
+class TileProcessor : public AgnosticNodeProcessor {
+ public:
+  explicit TileProcessor(const OptimizeContext& opt_cxt)
+      : AgnosticNodeProcessor(opt_cxt) {}
+
+ protected:
+  Status CustomizedProcessing() override {
+    DataType dtype = node_->attr().at("Tmultiples").type();
+    return UpdateOrTransformParamInput(1, "DataFormatVecPermute", dtype);
   }
 };
 
 class DataLayoutOptimizer : GraphProcessor {
  public:
   explicit DataLayoutOptimizer(
+      const GraphProperties& graph_properties,
       const VirtualPlacer& virtual_placer,
       const LayoutOptimizer::TuningConfig& config,
       const std::unordered_set<string>& nodes_to_preserve, GraphDef* graph,
       NodeMap* node_map)
-      : GraphProcessor(virtual_placer, nodes_to_preserve, graph, node_map),
+      : GraphProcessor(graph_properties, virtual_placer, nodes_to_preserve,
+                       graph, node_map),
         config_(config) {}
 
   Status Optimize() {
@@ -1402,23 +1836,13 @@ class DataLayoutOptimizer : GraphProcessor {
 
  private:
   NodeDef* AddNodePermNHWCToNCHW() {
-    return AddNodePermConst(kPermNHWCToNCHW, "", {0, 3, 1, 2});
+    return AddNodePermConst(LayoutOptimizerNode(kPermNHWCToNCHW), "",
+                            {0, 3, 1, 2});
   }
 
   NodeDef* AddNodePermNCHWToNHWC() {
-    return AddNodePermConst(kPermNCHWToNHWC, "", {0, 2, 3, 1});
-  }
-
-  NodeDef* AddNodeConcatConst() {
-    return AddNodeConstScalar(kConcatConst, "", DT_INT32, 1);
-  }
-
-  NodeDef* AddNodeGatherAxisConst() {
-    return AddNodeConstScalar(kGatherAxisConst, "", DT_INT32, 0);
-  }
-
-  NodeDef* AddNodeReductionConst() {
-    return GraphProcessor::AddNodeReductionConst(kReductionConst, "");
+    return AddNodePermConst(LayoutOptimizerNode(kPermNCHWToNHWC), "",
+                            {0, 2, 3, 1});
   }
 
   // Expand all nodes which is in NHWC, but supports NCHW or is layout agnostic.
@@ -1431,12 +1855,17 @@ class DataLayoutOptimizer : GraphProcessor {
     // This is the first pass where we expand the nodes which support NCHW.
     std::set<string> ops_format_supported = GetOpsFormatSupported();
     for (int i = 0; i < node_size_original; i++) {
+      if (IsNodeByLayoutOptimizer(graph_->node(i).name())) {
+        return Status(error::INVALID_ARGUMENT,
+                      "The graph is already optimized by layout optimizer.");
+      }
       if (ops_format_supported.find(graph_->node(i).op()) !=
           ops_format_supported.end()) {
         auto node = graph_->mutable_node(i);
         bool is_in_frame = !frames[node].empty();
-        OptimizeContext opt_cxt(graph_, node, node_map_, virtual_placer_,
-                                nodes_to_preserve_, is_in_frame);
+        OptimizeContext opt_cxt(graph_, node, node_map_, graph_properties_,
+                                virtual_placer_, nodes_to_preserve_,
+                                is_in_frame);
         std::unique_ptr<NodeProcessor> node_processor;
         if (IsAvgPoolGrad(*node)) {
           node_processor.reset(new AvgPoolGradProcessor(opt_cxt));
@@ -1457,10 +1886,14 @@ class DataLayoutOptimizer : GraphProcessor {
               new Conv2DBackpropFilterProcessor(opt_cxt, true));
         } else if (IsDepthwiseConv2dNativeBackpropInput(*node)) {
           node_processor.reset(new Conv2DBackpropInputProcessor(opt_cxt, true));
-        } else if (IsFusedBatchNormGradV1(*node)) {
+        } else if (IsFusedBatchNormGrad(*node)) {
           node_processor.reset(new FusedBatchNormGradProcessor(opt_cxt));
-        } else if (IsMaxPoolGradV1(*node)) {
+        } else if (IsMaxPoolV2(*node)) {
+          node_processor.reset(new MaxPoolV2Processor(opt_cxt));
+        } else if (IsMaxPoolGradV1(*node) || IsMaxPoolGradGradV1(*node)) {
           node_processor.reset(new MaxPoolGradProcessor(opt_cxt));
+        } else if (IsMaxPoolGradV2(*node) || IsMaxPoolGradGradV2(*node)) {
+          node_processor.reset(new MaxPoolGradV2Processor(opt_cxt));
         } else {
           node_processor.reset(new NodeProcessor(opt_cxt));
         }
@@ -1474,45 +1907,61 @@ class DataLayoutOptimizer : GraphProcessor {
     if (graph_->node_size() > node_size_original) {
       NodeDef* n = AddNodePermNHWCToNCHW();
       n = AddNodePermNCHWToNHWC();
-      n = AddNodeConcatConst();
-      n = AddNodeGatherAxisConst();
-      n = AddNodeReductionConst();
       std::set<string> ops_format_agnostic = GetOpsFormatAgnostic();
       for (int i = 0; i < graph_->node_size(); i++) {
         if (ops_format_agnostic.find(graph_->node(i).op()) !=
             ops_format_agnostic.end()) {
           auto node = graph_->mutable_node(i);
           bool is_in_frame = !frames[node].empty();
-          OptimizeContext opt_cxt(graph_, node, node_map_, virtual_placer_,
-                                  nodes_to_preserve_, is_in_frame);
+          OptimizeContext opt_cxt(graph_, node, node_map_, graph_properties_,
+                                  virtual_placer_, nodes_to_preserve_,
+                                  is_in_frame);
           std::unique_ptr<NodeProcessor> node_processor;
           if (IsAddN(*node)) {
             node_processor.reset(new AddNProcessor(opt_cxt));
-          } else if (IsAdd(*node) || IsMul(*node) || IsRealDiv(*node) ||
-                     IsSquaredDifference(*node) || IsSub(*node)) {
+          } else if (IsBetainc(*node)) {
+            node_processor.reset(new TernaryOpProcessor(opt_cxt));
+          } else if (IsBinaryOp(*node)) {
             node_processor.reset(new BinaryOpProcessor(opt_cxt));
           } else if (IsConcat(*node)) {
             node_processor.reset(new ConcatProcessor(opt_cxt));
-          } else if (IsPad(*node)) {
+          } else if (IsFill(*node)) {
+            node_processor.reset(new FillProcessor(opt_cxt));
+          } else if (IsHistogramSummary(*node)) {
+            node_processor.reset(new HistogramSummaryProcessor(opt_cxt));
+          } else if (IsIdentityN(*node)) {
+            node_processor.reset(new IdentityNProcessor(opt_cxt));
+          } else if (IsMerge(*node)) {
+            node_processor.reset(new MergeProcessor(opt_cxt));
+          } else if (IsPad(*node) || IsMirrorPad(*node) ||
+                     IsMirrorPadGrad(*node)) {
             node_processor.reset(new PadProcessor(opt_cxt));
-          } else if (IsReluGrad(*node)) {
-            node_processor.reset(new ReluGradProcessor(opt_cxt));
+          } else if (IsReduceOp(*node)) {
+            node_processor.reset(new ReduceProcessor(opt_cxt));
+          } else if (IsReverseV2(*node)) {
+            node_processor.reset(new ReverseProcessor(opt_cxt));
+          } else if (IsSelect(*node)) {
+            node_processor.reset(new SelectProcessor(opt_cxt));
           } else if (IsSlice(*node)) {
-            auto input1 = node_map_->GetNode(NodeName(node->input(1)));
-            auto input2 = node_map_->GetNode(NodeName(node->input(2)));
-            if (IsConcatOffset(*input1)) {
-              node_processor.reset(new SliceProcessorConcatOffset(opt_cxt));
-            } else if (IsConstant(*input1) && IsConstant(*input2)) {
-              node_processor.reset(new SliceProcessorConst(opt_cxt));
-            } else {
-              node_processor.reset(new SliceProcessor(opt_cxt));
-            }
+            node_processor.reset(new SliceProcessor(opt_cxt));
+          } else if (IsStridedSlice(*node)) {
+            node_processor.reset(new StridedSliceProcessor(opt_cxt));
+          } else if (IsShape(*node) || IsShapeN(*node)) {
+            node_processor.reset(new ShapeProcessor(opt_cxt));
           } else if (IsSplit(*node)) {
             node_processor.reset(new SplitProcessor(opt_cxt));
+          } else if (IsSplitV(*node)) {
+            node_processor.reset(new SplitVProcessor(opt_cxt));
           } else if (IsSqueeze(*node)) {
             node_processor.reset(new SqueezeProcessor(opt_cxt));
-          } else if (IsSum(*node)) {
-            node_processor.reset(new SumProcessor(opt_cxt));
+          } else if (IsStridedSliceGrad(*node)) {
+            node_processor.reset(new StridedSliceGradProcessor(opt_cxt));
+          } else if (IsSwitch(*node)) {
+            node_processor.reset(new SwitchProcessor(opt_cxt));
+          } else if (IsTile(*node)) {
+            node_processor.reset(new TileProcessor(opt_cxt));
+          } else if (IsUnaryGrad(*node)) {
+            node_processor.reset(new UnaryGradProcessor(opt_cxt));
           } else {
             node_processor.reset(new AgnosticNodeProcessor(opt_cxt));
           }
@@ -1530,8 +1979,16 @@ class DataLayoutOptimizer : GraphProcessor {
     for (int i = 0; i < graph_->node_size(); i++) {
       auto node = graph_->mutable_node(i);
       node->mutable_attr()->erase("_output_shapes");
-      if (IsNodeNHWCToNCHW(node->name())) {
-        if (IsNodeNCHWToNHWC(node->input(0))) {
+      if (IsTransposeNHWCToNCHW(node->name()) ||
+          IsDimMapNHWCToNCHW(node->name()) ||
+          IsVecPermuteNHWCToNCHW(node->name())) {
+        bool transpose_pair = IsTransposeNHWCToNCHW(node->name()) &&
+                              IsTransposeNCHWToNHWC(node->input(0));
+        bool dim_map_pair = IsDimMapNHWCToNCHW(node->name()) &&
+                            IsDimMapNCHWToNHWC(node->input(0));
+        bool vec_permute_pair = IsVecPermuteNHWCToNCHW(node->name()) &&
+                                IsVecPermuteNCHWToNHWC(node->input(0));
+        if (transpose_pair || dim_map_pair || vec_permute_pair) {
           const string& trans_first = node->input(0);
           const string& trans_second = node->name();
           auto outputs = node_map_->GetOutputs(trans_second);
@@ -1603,8 +2060,9 @@ Status LayoutOptimizer::Tune(const GrapplerItem& item,
     return status;
   }
   NodeMap node_map(output);
-  DataLayoutOptimizer layout_optimizer(*virtual_placer_, config,
-                                       nodes_to_preserve_, output, &node_map);
+  DataLayoutOptimizer layout_optimizer(graph_properties, *virtual_placer_,
+                                       config, nodes_to_preserve_, output,
+                                       &node_map);
   status = layout_optimizer.Optimize();
   return status;
 }
@@ -1620,27 +2078,20 @@ Status LayoutOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
   virtual_placer_.reset(new VirtualPlacer(cluster));
   nodes_to_preserve_ = item.NodesToPreserve();
   GraphProperties graph_properties(item);
-  auto status = graph_properties.InferStatically();
+  auto status = graph_properties.InferStatically(false);
   if (!status.ok()) {
     *output = item.graph;
     return status;
   }
 
   TuningConfig config;
-  config.no_gemm = false;
+  config.no_gemm = true;
+  // TODO(yaozhang): Enable tuning with various TuningConfig choices wtih
+  // the measurement-based estimator.
   status = Tune(item, graph_properties, config, output);
-  // This is based on an empirical observation that if the introduced Transpose
-  // nodes is more than 30, not using GEMM implementation would result in better
-  // performance.
-  if (status.ok() && GetNumTranspose(*output) > 30) {
-    config.no_gemm = true;
-    status = Tune(item, graph_properties, config, output);
-  }
-
   if (!status.ok()) {
     *output = item.graph;
   }
-
   return status;
 }
 
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
index 8c89f6744ba583f6a83362c4bf436eb811908796..587642c96e879f62f7ead809e7d01888ef320f93 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/protobuf/device_properties.pb.h"
 
@@ -44,7 +45,7 @@ class LayoutOptimizerTest : public ::testing::Test {
 
   Output SimpleConv2D(tensorflow::Scope* s, int input_size, int filter_size,
                       const string& padding, const string& device) {
-    int batch_size = 128;
+    int batch_size = 8;
     int input_height = input_size;
     int input_width = input_size;
     int input_depth = 3;
@@ -71,6 +72,12 @@ class LayoutOptimizerTest : public ::testing::Test {
 
   Output SimpleConv2DBackpropInput(tensorflow::Scope* s, int input_size,
                                    int filter_size, const string& padding) {
+    return SimpleConv2DBackpropInput(s, input_size, filter_size, padding, true);
+  }
+
+  Output SimpleConv2DBackpropInput(tensorflow::Scope* s, int input_size,
+                                   int filter_size, const string& padding,
+                                   bool const_input_size) {
     int batch_size = 128;
     int input_height = input_size;
     int input_width = input_size;
@@ -100,11 +107,18 @@ class LayoutOptimizerTest : public ::testing::Test {
     Output output =
         ops::Const(s->WithOpName("Output"), Input::Initializer(output_data));
 
-    Output conv_backprop_input = ops::Conv2DBackpropInput(
-        s->WithOpName("Conv2DBackpropInput"), input_sizes, filter, output,
-        {1, stride, stride, 1}, padding);
-    TensorShape input_shape(
-        {batch_size, input_height, input_width, input_depth});
+    Output conv_backprop_input;
+    Output input_sizes_i =
+        ops::Identity(s->WithOpName("InputSizesIdentity"), input_sizes);
+    if (const_input_size) {
+      conv_backprop_input = ops::Conv2DBackpropInput(
+          s->WithOpName("Conv2DBackpropInput"), input_sizes, filter, output,
+          {1, stride, stride, 1}, padding);
+    } else {
+      conv_backprop_input = ops::Conv2DBackpropInput(
+          s->WithOpName("Conv2DBackpropInput"), input_sizes_i, filter, output,
+          {1, stride, stride, 1}, padding);
+    }
     return conv_backprop_input;
   }
 
@@ -158,8 +172,8 @@ TEST_F(LayoutOptimizerTest, Conv2DBackpropInput) {
 
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
-  string input_name = AddPrefixToNodeName("Conv2DBackpropInput-InputSizes",
-                                          "LayoutOptimizer", "-");
+  string input_name =
+      strings::StrCat("Conv2DBackpropInput-InputSizes", "-", "LayoutOptimizer");
   auto input_sizes_node = node_map.GetNode(input_name);
   CHECK(input_sizes_node);
   auto conv2d_backprop_node = node_map.GetNode("Conv2DBackpropInput");
@@ -171,6 +185,28 @@ TEST_F(LayoutOptimizerTest, Conv2DBackpropInput) {
   test::ExpectTensorEqual<int>(input_sizes_expected, input_sizes);
 }
 
+TEST_F(LayoutOptimizerTest, Conv2DBackpropInputNonConstInputSizes) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2DBackpropInput(&s, 7, 2, "SAME", false);
+  Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv});
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto conv2d_backprop_node = node_map.GetNode("Conv2DBackpropInput");
+  CHECK(conv2d_backprop_node);
+  EXPECT_EQ(conv2d_backprop_node->input(0),
+            "Conv2DBackpropInput-0-VecPermuteNHWCToNCHW-LayoutOptimizer");
+  auto input_sizes_node = node_map.GetNode(
+      "Conv2DBackpropInput-0-VecPermuteNHWCToNCHW-LayoutOptimizer");
+  CHECK(input_sizes_node);
+  EXPECT_EQ(input_sizes_node->input(0), "InputSizesIdentity");
+  EXPECT_EQ(input_sizes_node->op(), "DataFormatVecPermute");
+}
+
 TEST_F(LayoutOptimizerTest, FilterSizeIsOne) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   auto conv = SimpleConv2D(&s, 2, 1, "SAME");
@@ -181,8 +217,7 @@ TEST_F(LayoutOptimizerTest, FilterSizeIsOne) {
   GraphDef output;
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
-  EXPECT_FALSE(
-      node_map.GetNode("LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Input"));
+  EXPECT_TRUE(node_map.GetNode("Conv2D-0-TransposeNHWCToNCHW-LayoutOptimizer"));
 }
 
 TEST_F(LayoutOptimizerTest, FilterSizeNotOne) {
@@ -195,8 +230,7 @@ TEST_F(LayoutOptimizerTest, FilterSizeNotOne) {
   GraphDef output;
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
-  EXPECT_FALSE(
-      node_map.GetNode("LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Input"));
+  EXPECT_TRUE(node_map.GetNode("Conv2D-0-TransposeNHWCToNCHW-LayoutOptimizer"));
 }
 
 TEST_F(LayoutOptimizerTest, EqualSizeWithValidPadding) {
@@ -209,8 +243,7 @@ TEST_F(LayoutOptimizerTest, EqualSizeWithValidPadding) {
   GraphDef output;
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
-  EXPECT_FALSE(
-      node_map.GetNode("LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Input"));
+  EXPECT_TRUE(node_map.GetNode("Conv2D-0-TransposeNHWCToNCHW-LayoutOptimizer"));
 }
 
 TEST_F(LayoutOptimizerTest, EqualSizeWithSamePadding) {
@@ -223,13 +256,12 @@ TEST_F(LayoutOptimizerTest, EqualSizeWithSamePadding) {
   GraphDef output;
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
-  EXPECT_TRUE(
-      node_map.GetNode("LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Input-0"));
+  EXPECT_TRUE(node_map.GetNode("Conv2D-0-TransposeNHWCToNCHW-LayoutOptimizer"));
 }
 
 TEST_F(LayoutOptimizerTest, NotEqualSizeWithValidPadding) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
   Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv});
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
@@ -237,13 +269,12 @@ TEST_F(LayoutOptimizerTest, NotEqualSizeWithValidPadding) {
   GraphDef output;
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
-  EXPECT_TRUE(
-      node_map.GetNode("LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Input-0"));
+  EXPECT_TRUE(node_map.GetNode("Conv2D-0-TransposeNHWCToNCHW-LayoutOptimizer"));
 }
 
 TEST_F(LayoutOptimizerTest, Pad) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
   auto c = ops::Const(s.WithOpName("c"), {1, 2, 3, 4, 5, 6, 7, 8}, {4, 2});
   auto p = ops::Pad(s.WithOpName("p"), conv, c);
   auto o = ops::Identity(s.WithOpName("o"), p);
@@ -257,7 +288,7 @@ TEST_F(LayoutOptimizerTest, Pad) {
   auto pad = node_map.GetNode("p");
   EXPECT_EQ(pad->input(0), "Conv2D");
 
-  auto pad_const = node_map.GetNode("LayoutOptimizer-p-c");
+  auto pad_const = node_map.GetNode("p-c-LayoutOptimizer");
   EXPECT_TRUE(pad_const);
   EXPECT_TRUE(pad_const->attr().find("value") != pad_const->attr().end());
   Tensor tensor;
@@ -270,7 +301,7 @@ TEST_F(LayoutOptimizerTest, Pad) {
 
 TEST_F(LayoutOptimizerTest, Connectivity) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
   auto i1 = ops::Identity(s.WithOpName("i1"), conv);
   auto i2 = ops::Identity(s.WithOpName("i2"), i1);
   auto i3 = ops::Identity(s.WithOpName("i3"), i2);
@@ -298,9 +329,42 @@ TEST_F(LayoutOptimizerTest, Connectivity) {
   EXPECT_EQ(node_i2_output->input(0), "i1");
 }
 
+TEST_F(LayoutOptimizerTest, ConnectivityBinaryOpWithInputScalarAnd4D) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
+  auto i1 = ops::Identity(s.WithOpName("i1"), conv);
+  auto i2 = ops::Identity(s.WithOpName("i2"), i1);
+  auto scalar_sub = ops::Const(s.WithOpName("scalar_sub"), 3.0f, {});
+  auto sub = ops::Sub(s.WithOpName("sub"), scalar_sub, i2);
+  auto i3 = ops::Identity(s.WithOpName("i3"), sub);
+  auto i4 = ops::Identity(s.WithOpName("i4"), i3);
+  auto i5 = ops::Identity(s.WithOpName("i5"), i4);
+  auto scalar_mul = ops::Const(s.WithOpName("scalar_mul"), 3.0f, {});
+  auto mul = ops::Mul(s.WithOpName("mul"), scalar_mul, i5);
+  auto i6 = ops::Identity(s.WithOpName("i6"), mul);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  // Make the graph not in topological order to test the handling of multi-hop
+  // connectivity (here we say two nodes are connected if all nodes in the
+  // middle are layout agnostic). If the graph is already in topological order,
+  // the problem is easier, where layout optimizer only needs to check
+  // single-hop connectivity.
+  NodeMap node_map_original(&item.graph);
+  auto node_i1 = node_map_original.GetNode("i1");
+  auto node_mul = node_map_original.GetNode("mul");
+  node_mul->Swap(node_i1);
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map_output(&output);
+  auto mul_node = node_map_output.GetNode("mul");
+  EXPECT_EQ(mul_node->input(0), "scalar_mul");
+  EXPECT_EQ(mul_node->input(1), "i5");
+}
+
 TEST_F(LayoutOptimizerTest, PreserveFetch) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
   auto i = ops::Identity(s.WithOpName("i"), conv);
   GrapplerItem item;
   item.fetch.push_back("Conv2D");
@@ -315,7 +379,7 @@ TEST_F(LayoutOptimizerTest, PreserveFetch) {
 
 TEST_F(LayoutOptimizerTest, EmptyDevice) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
   Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv});
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
@@ -330,7 +394,7 @@ TEST_F(LayoutOptimizerTest, EmptyDevice) {
 TEST_F(LayoutOptimizerTest, GPUDevice) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   auto conv =
-      SimpleConv2D(&s, 3, 2, "VALID", "/job:w/replica:0/task:0/device:gpu:0");
+      SimpleConv2D(&s, 4, 2, "VALID", "/job:w/replica:0/task:0/device:gpu:0");
   Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv});
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
@@ -345,7 +409,7 @@ TEST_F(LayoutOptimizerTest, GPUDevice) {
 TEST_F(LayoutOptimizerTest, CPUDeviceLowercase) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   auto conv =
-      SimpleConv2D(&s, 3, 2, "VALID", "/job:w/replica:0/task:0/device:cpu:0");
+      SimpleConv2D(&s, 4, 2, "VALID", "/job:w/replica:0/task:0/device:cpu:0");
   Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv});
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
@@ -359,7 +423,7 @@ TEST_F(LayoutOptimizerTest, CPUDeviceLowercase) {
 
 TEST_F(LayoutOptimizerTest, CPUDeviceUppercase) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID", "/CPU:0");
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID", "/CPU:0");
   Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv});
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
@@ -401,7 +465,7 @@ TEST_F(LayoutOptimizerTest, FusedBatchNormGradTrainingFalse) {
 
 TEST_F(LayoutOptimizerTest, SplitDimC) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 5, 2, "VALID");
   auto c = ops::Const(s.WithOpName("c"), 3, {});
   auto split = ops::Split(s.WithOpName("split"), c, conv, 2);
   auto i = ops::Identity(s.WithOpName("i"), split[0]);
@@ -412,16 +476,16 @@ TEST_F(LayoutOptimizerTest, SplitDimC) {
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   auto split_node = node_map.GetNode("split");
-  EXPECT_EQ(split_node->input(0), "LayoutOptimizerSplitConst-split");
+  EXPECT_EQ(split_node->input(0), "split-c-LayoutOptimizer");
   EXPECT_EQ(split_node->input(1), "Conv2D");
-  auto split_const = node_map.GetNode("LayoutOptimizerSplitConst-split");
+  auto split_const = node_map.GetNode("split-c-LayoutOptimizer");
   EXPECT_EQ(split_const->op(), "Const");
   EXPECT_EQ(split_const->attr().at({"value"}).tensor().int_val(0), 1);
 }
 
 TEST_F(LayoutOptimizerTest, SplitDimH) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 6, 2, "SAME");
   auto c = ops::Const(s.WithOpName("c"), 1, {});
   auto split = ops::Split(s.WithOpName("split"), c, conv, 2);
   auto i = ops::Identity(s.WithOpName("i"), split[0]);
@@ -432,16 +496,16 @@ TEST_F(LayoutOptimizerTest, SplitDimH) {
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   auto split_node = node_map.GetNode("split");
-  EXPECT_EQ(split_node->input(0), "LayoutOptimizerSplitConst-split");
+  EXPECT_EQ(split_node->input(0), "split-c-LayoutOptimizer");
   EXPECT_EQ(split_node->input(1), "Conv2D");
-  auto split_const = node_map.GetNode("LayoutOptimizerSplitConst-split");
+  auto split_const = node_map.GetNode("split-c-LayoutOptimizer");
   EXPECT_EQ(split_const->op(), "Const");
   EXPECT_EQ(split_const->attr().at({"value"}).tensor().int_val(0), 2);
 }
 
 TEST_F(LayoutOptimizerTest, SplitDimW) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 5, 2, "VALID");
   auto c = ops::Const(s.WithOpName("c"), 2, {});
   auto split = ops::Split(s.WithOpName("split"), c, conv, 2);
   auto i = ops::Identity(s.WithOpName("i"), split[0]);
@@ -452,16 +516,16 @@ TEST_F(LayoutOptimizerTest, SplitDimW) {
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   auto split_node = node_map.GetNode("split");
-  EXPECT_EQ(split_node->input(0), "LayoutOptimizerSplitConst-split");
+  EXPECT_EQ(split_node->input(0), "split-c-LayoutOptimizer");
   EXPECT_EQ(split_node->input(1), "Conv2D");
-  auto split_const = node_map.GetNode("LayoutOptimizerSplitConst-split");
+  auto split_const = node_map.GetNode("split-c-LayoutOptimizer");
   EXPECT_EQ(split_const->op(), "Const");
   EXPECT_EQ(split_const->attr().at({"value"}).tensor().int_val(0), 3);
 }
 
 TEST_F(LayoutOptimizerTest, SplitDimN) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 5, 2, "VALID");
   auto c = ops::Const(s.WithOpName("c"), 0, {});
   auto split = ops::Split(s.WithOpName("split"), c, conv, 2);
   auto i = ops::Identity(s.WithOpName("i"), split[0]);
@@ -472,16 +536,16 @@ TEST_F(LayoutOptimizerTest, SplitDimN) {
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   auto split_node = node_map.GetNode("split");
-  EXPECT_EQ(split_node->input(0), "LayoutOptimizerSplitConst-split");
+  EXPECT_EQ(split_node->input(0), "split-c-LayoutOptimizer");
   EXPECT_EQ(split_node->input(1), "Conv2D");
-  auto split_const = node_map.GetNode("LayoutOptimizerSplitConst-split");
+  auto split_const = node_map.GetNode("split-c-LayoutOptimizer");
   EXPECT_EQ(split_const->op(), "Const");
   EXPECT_EQ(split_const->attr().at({"value"}).tensor().int_val(0), 0);
 }
 
 TEST_F(LayoutOptimizerTest, SplitNonConstDim) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  auto conv = SimpleConv2D(&s, 3, 2, "VALID");
+  auto conv = SimpleConv2D(&s, 5, 2, "VALID");
   auto c = ops::Const(s.WithOpName("c"), 0, {});
   auto i1 = ops::Identity(s.WithOpName("i1"), c);
   auto split = ops::Split(s.WithOpName("split"), i1, conv, 2);
@@ -493,11 +557,552 @@ TEST_F(LayoutOptimizerTest, SplitNonConstDim) {
   Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
   NodeMap node_map(&output);
   auto split_node = node_map.GetNode("split");
-  EXPECT_EQ(split_node->input(0), "i1");
-  EXPECT_EQ(split_node->input(1),
-            "LayoutOptimizerTransposeNCHWToNHWC-Conv2D-split");
+  EXPECT_EQ(split_node->input(0), "split-0-DimMapNHWCToNCHW-LayoutOptimizer");
+  EXPECT_EQ(split_node->input(1), "Conv2D");
+  auto map_node = node_map.GetNode("split-0-DimMapNHWCToNCHW-LayoutOptimizer");
+  EXPECT_EQ(map_node->op(), "DataFormatDimMap");
+  EXPECT_EQ(map_node->input(0), "i1");
+}
+
+TEST_F(LayoutOptimizerTest, SplitSamePortToMultipleInputsOfSameNode) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 5, 2, "VALID");
+  auto axis = ops::Const(s.WithOpName("axis"), 3);
+  auto split = ops::Split(s.WithOpName("split"), axis, conv, 2);
+  auto concat =
+      ops::Concat(s.WithOpName("concat"), {split[1], split[1], split[1]}, axis);
+  auto o = ops::Identity(s.WithOpName("o"), concat);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto concat_node = node_map.GetNode("concat");
+  EXPECT_EQ(concat_node->input(0), "split:1");
+  EXPECT_EQ(concat_node->input(1), "split:1");
+  EXPECT_EQ(concat_node->input(2), "split:1");
+  EXPECT_EQ(concat_node->input(3), "concat-axis-LayoutOptimizer");
+  auto concat_dim = node_map.GetNode("concat-axis-LayoutOptimizer");
+  EXPECT_EQ(concat_dim->attr().at({"value"}).tensor().int_val(0), 1);
+}
+
+TEST_F(LayoutOptimizerTest, ConcatDimH) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 4, 2, "SAME");
+  auto axis = ops::Const(s.WithOpName("axis"), 1);
+  auto split = ops::Split(s.WithOpName("split"), axis, conv, 2);
+  auto concat = ops::Concat(s.WithOpName("concat"), {split[0], split[1]}, axis);
+  auto o = ops::Identity(s.WithOpName("o"), concat);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto concat_node = node_map.GetNode("concat");
+  EXPECT_EQ(concat_node->input(0), "split");
+  EXPECT_EQ(concat_node->input(1), "split:1");
+  EXPECT_EQ(concat_node->input(2), "concat-axis-LayoutOptimizer");
+  auto concat_dim = node_map.GetNode("concat-axis-LayoutOptimizer");
+  EXPECT_EQ(concat_dim->attr().at({"value"}).tensor().int_val(0), 2);
+}
+
+TEST_F(LayoutOptimizerTest, ConcatNonConst) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 4, 2, "SAME");
+  auto axis = ops::Const(s.WithOpName("axis"), 1);
+  auto i = ops::Identity(s.WithOpName("i"), axis);
+  auto split = ops::Split(s.WithOpName("split"), axis, conv, 2);
+  auto concat = ops::Concat(s.WithOpName("concat"), {split[0], split[1]}, i);
+  auto o = ops::Identity(s.WithOpName("o"), concat);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto concat_node = node_map.GetNode("concat");
+  EXPECT_EQ(concat_node->input(0), "split");
+  EXPECT_EQ(concat_node->input(1), "split:1");
+  EXPECT_EQ(concat_node->input(2), "concat-2-DimMapNHWCToNCHW-LayoutOptimizer");
+  auto concat_dim =
+      node_map.GetNode("concat-2-DimMapNHWCToNCHW-LayoutOptimizer");
+  EXPECT_EQ(concat_dim->op(), "DataFormatDimMap");
+  EXPECT_EQ(concat_dim->input(0), "i");
+}
+
+TEST_F(LayoutOptimizerTest, ConcatDimW) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 4, 2, "SAME");
+  auto axis = ops::Const(s.WithOpName("axis"), 2);
+  auto split = ops::Split(s.WithOpName("split"), axis, conv, 2);
+  auto concat = ops::Concat(s.WithOpName("concat"), {split[0], split[1]}, axis);
+  auto o = ops::Identity(s.WithOpName("o"), concat);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto concat_node = node_map.GetNode("concat");
+  EXPECT_EQ(concat_node->input(0), "split");
+  EXPECT_EQ(concat_node->input(1), "split:1");
+  EXPECT_EQ(concat_node->input(2), "concat-axis-LayoutOptimizer");
+  auto concat_dim = node_map.GetNode("concat-axis-LayoutOptimizer");
+  EXPECT_EQ(concat_dim->attr().at({"value"}).tensor().int_val(0), 3);
+}
+
+TEST_F(LayoutOptimizerTest, ConcatDimN) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
+  auto axis = ops::Const(s.WithOpName("axis"), 0);
+  auto split = ops::Split(s.WithOpName("split"), axis, conv, 2);
+  auto concat = ops::Concat(s.WithOpName("concat"), {split[0], split[1]}, axis);
+  auto o = ops::Identity(s.WithOpName("o"), concat);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto concat_node = node_map.GetNode("concat");
+  EXPECT_EQ(concat_node->input(0), "split");
+  EXPECT_EQ(concat_node->input(1), "split:1");
+  EXPECT_EQ(concat_node->input(2), "concat-axis-LayoutOptimizer");
+  auto concat_dim = node_map.GetNode("concat-axis-LayoutOptimizer");
+  EXPECT_EQ(concat_dim->attr().at({"value"}).tensor().int_val(0), 0);
+}
+
+TEST_F(LayoutOptimizerTest, ConcatDimC) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
+  auto axis = ops::Const(s.WithOpName("axis"), 3);
+  auto split = ops::Split(s.WithOpName("split"), axis, conv, 2);
+  auto concat = ops::Concat(s.WithOpName("concat"), {split[0], split[1]}, axis);
+  auto o = ops::Identity(s.WithOpName("o"), concat);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto concat_node = node_map.GetNode("concat");
+  EXPECT_EQ(concat_node->input(0), "split");
+  EXPECT_EQ(concat_node->input(1), "split:1");
+  EXPECT_EQ(concat_node->input(2), "concat-axis-LayoutOptimizer");
+  auto concat_dim = node_map.GetNode("concat-axis-LayoutOptimizer");
+  EXPECT_EQ(concat_dim->attr().at({"value"}).tensor().int_val(0), 1);
+}
+
+TEST_F(LayoutOptimizerTest, Sum) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
+  auto reduction_indices =
+      ops::Const(s.WithOpName("reduction_indices"), {0, 1, 2}, {3});
+  auto sum = ops::Sum(s.WithOpName("sum"), conv, reduction_indices);
+  auto o = ops::Identity(s.WithOpName("o"), sum);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  // TODO(yaozhang): enable SumProcessor with auto-tuning. Currently disabled
+  // because of the worse performance in some cases.
+  /*
+  NodeMap node_map(&output);
+  auto sum_node = node_map.GetNode("sum");
+  EXPECT_EQ(sum_node->input(0), "Conv2D");
+  EXPECT_EQ(sum_node->input(1), "LayoutOptimizer-sum-reduction_indices");
+  auto sum_const = node_map.GetNode("LayoutOptimizer-sum-reduction_indices");
+  Tensor tensor;
+  EXPECT_TRUE(
+      tensor.FromProto(sum_const->mutable_attr()->at({"value"}).tensor()));
+  Tensor tensor_expected(DT_INT32, {3});
+  test::FillValues<int>(&tensor_expected, {0, 2, 3});
+  test::ExpectTensorEqual<int>(tensor_expected, tensor);
+  */
+}
+
+TEST_F(LayoutOptimizerTest, MulScalarAnd4D) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
+  auto scalar = ops::Const(s.WithOpName("scalar"), 3.0f, {});
+  auto mul = ops::Mul(s.WithOpName("mul"), scalar, conv);
+  auto o = ops::Identity(s.WithOpName("o"), mul);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto mul_node = node_map.GetNode("mul");
+  EXPECT_EQ(mul_node->input(0), "scalar");
+  EXPECT_EQ(mul_node->input(1), "Conv2D");
+}
+
+TEST_F(LayoutOptimizerTest, Mul4DAndScalar) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
+  auto scalar = ops::Const(s.WithOpName("scalar"), 3.0f, {});
+  auto mul = ops::Mul(s.WithOpName("mul"), conv, scalar);
+  auto o = ops::Identity(s.WithOpName("o"), mul);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto mul_node = node_map.GetNode("mul");
+  EXPECT_EQ(mul_node->input(0), "Conv2D");
+  EXPECT_EQ(mul_node->input(1), "scalar");
+}
+
+TEST_F(LayoutOptimizerTest, Mul4DAndUnknownRank) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
+  auto unknown_rank =
+      ops::Placeholder(s.WithOpName("unknown"), DT_FLOAT,
+                       ops::Placeholder::Shape(PartialTensorShape()));
+  Output c = ops::Const(s.WithOpName("c"), 3.0f, {8, 2, 2, 2});
+  Output mul = ops::Mul(s.WithOpName("mul"), conv, unknown_rank);
+  auto o = ops::AddN(s.WithOpName("o"), {mul, c});
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto mul_node = node_map.GetNode("mul");
+  // Node mul should not be processed by layout optimizer, because one of its
+  // inputs is of unknown rank.
+  EXPECT_EQ(mul_node->input(0),
+            "Conv2D-0-0-TransposeNCHWToNHWC-LayoutOptimizer");
+  EXPECT_EQ(mul_node->input(1), "unknown");
 }
 
+TEST_F(LayoutOptimizerTest, Mul4DAnd4D) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
+  auto i = ops::Identity(s.WithOpName("i"), conv);
+  auto mul = ops::Mul(s.WithOpName("mul"), conv, i);
+  auto o = ops::Identity(s.WithOpName("o"), mul);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto mul_node = node_map.GetNode("mul");
+  EXPECT_EQ(mul_node->input(0), "Conv2D");
+  EXPECT_EQ(mul_node->input(1), "i");
+}
+
+TEST_F(LayoutOptimizerTest, Mul4DAndVector) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
+  auto vector = ops::Const(s.WithOpName("vector"), {3.0f, 7.0f}, {2});
+  auto mul = ops::Mul(s.WithOpName("mul"), conv, vector);
+  auto o = ops::Identity(s.WithOpName("o"), mul);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto mul_node = node_map.GetNode("mul");
+  EXPECT_EQ(mul_node->input(0), "Conv2D");
+  EXPECT_EQ(mul_node->input(1), "mul-1-ReshapeNHWCToNCHW-LayoutOptimizer");
+  auto mul_const = node_map.GetNode("mul-1-ReshapeConst-LayoutOptimizer");
+  Tensor tensor;
+  EXPECT_TRUE(
+      tensor.FromProto(mul_const->mutable_attr()->at({"value"}).tensor()));
+  Tensor tensor_expected(DT_INT32, {4});
+  test::FillValues<int>(&tensor_expected, {1, 2, 1, 1});
+  test::ExpectTensorEqual<int>(tensor_expected, tensor);
+}
+
+TEST_F(LayoutOptimizerTest, MulVectorAnd4D) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
+  auto vector = ops::Const(s.WithOpName("vector"), {3.0f, 7.0f}, {2});
+  auto mul = ops::Mul(s.WithOpName("mul"), vector, conv);
+  auto o = ops::Identity(s.WithOpName("o"), mul);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto mul_node = node_map.GetNode("mul");
+  EXPECT_EQ(mul_node->input(0), "mul-0-ReshapeNHWCToNCHW-LayoutOptimizer");
+  EXPECT_EQ(mul_node->input(1), "Conv2D");
+  auto mul_const = node_map.GetNode("mul-0-ReshapeConst-LayoutOptimizer");
+  Tensor tensor;
+  EXPECT_TRUE(
+      tensor.FromProto(mul_const->mutable_attr()->at({"value"}).tensor()));
+  Tensor tensor_expected(DT_INT32, {4});
+  test::FillValues<int>(&tensor_expected, {1, 2, 1, 1});
+  test::ExpectTensorEqual<int>(tensor_expected, tensor);
+}
+
+TEST_F(LayoutOptimizerTest, SliceConst) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 5, 2, "VALID");
+  auto begin = ops::Const(s.WithOpName("begin"), {0, 2, 3, 1}, {4});
+  auto size = ops::Const(s.WithOpName("size"), {4, 1, 2, 4}, {4});
+  auto slice = ops::Slice(s.WithOpName("slice"), conv, begin, size);
+  auto o = ops::Identity(s.WithOpName("o"), slice);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto slice_node = node_map.GetNode("slice");
+  EXPECT_EQ(slice_node->input(0), "Conv2D");
+  EXPECT_EQ(slice_node->input(1), "slice-begin-LayoutOptimizer");
+  EXPECT_EQ(slice_node->input(2), "slice-size-LayoutOptimizer");
+
+  auto begin_const = node_map.GetNode("slice-begin-LayoutOptimizer");
+  Tensor begin_tensor;
+  EXPECT_TRUE(begin_tensor.FromProto(
+      begin_const->mutable_attr()->at({"value"}).tensor()));
+  Tensor begin_tensor_expected(DT_INT32, {4});
+  test::FillValues<int>(&begin_tensor_expected, {0, 1, 2, 3});
+  test::ExpectTensorEqual<int>(begin_tensor_expected, begin_tensor);
+
+  auto size_const = node_map.GetNode("slice-size-LayoutOptimizer");
+  Tensor size_tensor;
+  EXPECT_TRUE(size_tensor.FromProto(
+      size_const->mutable_attr()->at({"value"}).tensor()));
+  Tensor size_tensor_expected(DT_INT32, {4});
+  test::FillValues<int>(&size_tensor_expected, {4, 4, 1, 2});
+  test::ExpectTensorEqual<int>(size_tensor_expected, size_tensor);
+}
+
+TEST_F(LayoutOptimizerTest, SliceNonConst) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 5, 2, "VALID");
+  auto begin = ops::Const(s.WithOpName("begin"), {0, 2, 3, 1}, {4});
+  auto ibegin = ops::Identity(s.WithOpName("ibegin"), begin);
+  auto size = ops::Const(s.WithOpName("size"), {4, 1, 2, 4}, {4});
+  auto isize = ops::Identity(s.WithOpName("isize"), size);
+  auto slice = ops::Slice(s.WithOpName("slice"), conv, ibegin, isize);
+  auto o = ops::Identity(s.WithOpName("o"), slice);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto slice_node = node_map.GetNode("slice");
+  EXPECT_EQ(slice_node->input(0), "Conv2D");
+  EXPECT_EQ(slice_node->input(1),
+            "slice-1-VecPermuteNHWCToNCHW-LayoutOptimizer");
+  EXPECT_EQ(slice_node->input(2),
+            "slice-2-VecPermuteNHWCToNCHW-LayoutOptimizer");
+  auto perm1 = node_map.GetNode("slice-1-VecPermuteNHWCToNCHW-LayoutOptimizer");
+  EXPECT_EQ(perm1->op(), "DataFormatVecPermute");
+  EXPECT_EQ(perm1->input(0), "ibegin");
+  auto perm2 = node_map.GetNode("slice-2-VecPermuteNHWCToNCHW-LayoutOptimizer");
+  EXPECT_EQ(perm1->op(), "DataFormatVecPermute");
+  EXPECT_EQ(perm2->input(0), "isize");
+}
+
+TEST_F(LayoutOptimizerTest, DoNotApplyOptimizerTwice) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto scalar =
+      ops::Const(s.WithOpName("AlreadyApplied-LayoutOptimizer"), 3.0f, {});
+  auto mul = ops::Mul(s.WithOpName("mul"), scalar, scalar);
+  auto o = ops::Identity(s.WithOpName("o"), mul);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  EXPECT_TRUE(errors::IsInvalidArgument(status));
+}
+
+TEST_F(LayoutOptimizerTest, ShapeNWithInputs4DAnd4D) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
+  auto shapen = ops::ShapeN(s.WithOpName("shapen"), {conv, conv});
+  auto add = ops::Add(s.WithOpName("add"), shapen[0], shapen[1]);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto shapen_node = node_map.GetNode("shapen");
+  EXPECT_EQ(shapen_node->input(0), "Conv2D");
+  EXPECT_EQ(shapen_node->input(1), "Conv2D");
+  auto add_node = node_map.GetNode("add");
+  EXPECT_EQ(add_node->input(0),
+            "shapen-0-0-VecPermuteNCHWToNHWC-LayoutOptimizer");
+  EXPECT_EQ(add_node->input(1),
+            "shapen-0-1-VecPermuteNCHWToNHWC-LayoutOptimizer");
+  auto vec_permute1 =
+      node_map.GetNode("shapen-0-0-VecPermuteNCHWToNHWC-LayoutOptimizer");
+  EXPECT_EQ(vec_permute1->input(0), "shapen");
+  EXPECT_EQ(vec_permute1->op(), "DataFormatVecPermute");
+  auto vec_permute2 =
+      node_map.GetNode("shapen-0-1-VecPermuteNCHWToNHWC-LayoutOptimizer");
+  EXPECT_EQ(vec_permute2->input(0), "shapen:1");
+  EXPECT_EQ(vec_permute2->op(), "DataFormatVecPermute");
+}
+
+TEST_F(LayoutOptimizerTest, ShapeNWithInputsVectorAnd4D) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
+  auto vector = ops::Const(s.WithOpName("vector"), 3.0f, {7});
+  auto shapen = ops::ShapeN(s.WithOpName("shapen"), {vector, conv});
+  auto add = ops::Add(s.WithOpName("add"), shapen[0], shapen[1]);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto shapen_node = node_map.GetNode("shapen");
+  EXPECT_EQ(shapen_node->input(0), "vector");
+  EXPECT_EQ(shapen_node->input(1), "Conv2D");
+  auto add_node = node_map.GetNode("add");
+  EXPECT_EQ(add_node->input(0), "shapen");
+  EXPECT_EQ(add_node->input(1),
+            "shapen-0-1-VecPermuteNCHWToNHWC-LayoutOptimizer");
+  auto vec_permute =
+      node_map.GetNode("shapen-0-1-VecPermuteNCHWToNHWC-LayoutOptimizer");
+  EXPECT_EQ(vec_permute->input(0), "shapen:1");
+  EXPECT_EQ(vec_permute->op(), "DataFormatVecPermute");
+}
+
+TEST_F(LayoutOptimizerTest, ShapeNWithInputs4DAndNoNeedToTransform4D) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
+  auto tensor_4d = ops::Const(s.WithOpName("tensor_4d"), 3.0f, {1, 1, 1, 3});
+  auto i1 = ops::Identity(s.WithOpName("i1"), tensor_4d);
+  Output i2 = ops::Identity(s.WithOpName("i2"), i1);
+  auto shapen = ops::ShapeN(s.WithOpName("shapen"), {conv, i2});
+  auto add = ops::Add(s.WithOpName("add"), shapen[0], shapen[1]);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto shapen_node = node_map.GetNode("shapen");
+  EXPECT_EQ(shapen_node->input(0), "Conv2D");
+  EXPECT_EQ(shapen_node->input(1), "i2");
+}
+
+TEST_F(LayoutOptimizerTest, Switch) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
+  ops::Variable ctrl(s.WithOpName("ctrl"), {}, DT_BOOL);
+  auto sw = ops::Switch(s.WithOpName("switch"), conv, ctrl);
+  auto i1 = ops::Identity(s.WithOpName("i1"), sw.output_true);
+  auto i2 = ops::Identity(s.WithOpName("i2"), sw.output_false);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto switch_node = node_map.GetNode("switch");
+  EXPECT_EQ(switch_node->input(0), "Conv2D");
+  EXPECT_EQ(switch_node->input(1), "ctrl");
+  auto i1_node = node_map.GetNode("i1");
+  auto i2_node = node_map.GetNode("i2");
+  auto trans1 = node_map.GetNode(i1_node->input(0));
+  EXPECT_EQ(trans1->input(0), "switch:1");
+  auto trans2 = node_map.GetNode(i2_node->input(0));
+  EXPECT_EQ(trans2->input(0), "switch");
+}
+
+TEST_F(LayoutOptimizerTest, MergeBothInputsConvertible) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
+  Output i1 = ops::Identity(s.WithOpName("i1"), conv);
+  auto merge = ops::Merge(s.WithOpName("merge"), {conv, i1});
+  auto i2 = ops::Identity(s.WithOpName("i2"), merge.output);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto merge_node = node_map.GetNode("merge");
+  EXPECT_EQ(merge_node->input(0), "Conv2D");
+  EXPECT_EQ(merge_node->input(1), "i1");
+  auto i2_node = node_map.GetNode("i2");
+  EXPECT_EQ(i2_node->input(0), "merge-0-0-TransposeNCHWToNHWC-LayoutOptimizer");
+  auto transpose =
+      node_map.GetNode("merge-0-0-TransposeNCHWToNHWC-LayoutOptimizer");
+  EXPECT_EQ(transpose->input(0), "merge");
+}
+
+TEST_F(LayoutOptimizerTest, MergeOneInputNotConvertible) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
+  auto tensor_4d = ops::Const(s.WithOpName("tensor_4d"), 3.0f, {1, 1, 1, 3});
+  auto merge = ops::Merge(s.WithOpName("merge"), {tensor_4d, conv});
+  auto i2 = ops::Identity(s.WithOpName("i2"), merge.output);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto merge_node = node_map.GetNode("merge");
+  EXPECT_EQ(merge_node->input(0), "tensor_4d");
+  EXPECT_EQ(merge_node->input(1),
+            "Conv2D-0-1-TransposeNCHWToNHWC-LayoutOptimizer");
+}
+
+TEST_F(LayoutOptimizerTest, Complex) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
+  auto comp = ops::Complex(s.WithOpName("complex"), conv, conv);
+  auto i = ops::Identity(s.WithOpName("i"), comp);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto merge_node = node_map.GetNode("complex");
+  EXPECT_EQ(merge_node->input(0), "Conv2D");
+  EXPECT_EQ(merge_node->input(1), "Conv2D");
+  auto trans =
+      node_map.GetNode("complex-0-0-TransposeNCHWToNHWC-LayoutOptimizer");
+  EXPECT_EQ(trans->attr().at("T").type(), DT_COMPLEX64);
+}
+
+TEST_F(LayoutOptimizerTest, IdentityNWithInputsVectorAnd4D) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto conv = SimpleConv2D(&s, 4, 2, "VALID");
+  auto vector = ops::Const(s.WithOpName("vector"), 3.0f, {2});
+  auto identity_n = ops::IdentityN(s.WithOpName("identity_n"), {vector, conv});
+  auto add = ops::Add(s.WithOpName("add"), identity_n[0], identity_n[1]);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  LayoutOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(virtual_cluster_.get(), item, &output);
+  NodeMap node_map(&output);
+  auto i = node_map.GetNode("identity_n");
+  EXPECT_EQ(i->input(0), "vector");
+  EXPECT_EQ(i->input(1), "Conv2D");
+  auto trans =
+      node_map.GetNode("identity_n-0-1-TransposeNCHWToNHWC-LayoutOptimizer");
+  EXPECT_EQ(trans->input(0), "identity_n:1");
+  auto add_node = node_map.GetNode("add");
+  EXPECT_EQ(add_node->input(0), "identity_n");
+  EXPECT_EQ(add_node->input(1),
+            "identity_n-0-1-TransposeNCHWToNHWC-LayoutOptimizer");
+}
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.cc b/tensorflow/core/grappler/optimizers/memory_optimizer.cc
index 7c44ce15c6efee1ca375665976db1dc15dc01096..8418abd80f84a7675dd34414dc582fb31089672b 100644
--- a/tensorflow/core/grappler/optimizers/memory_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/memory_optimizer.cc
@@ -23,6 +23,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/grappler/costs/graph_memory.h"
 #include "tensorflow/core/grappler/costs/graph_properties.h"
@@ -412,6 +413,12 @@ void RecomputeSubgraph(
 void RecomputationRewritingPass(RewriterConfig::MemOptType optimization_level,
                                 const string& recomputation_targets_name_prefix,
                                 GraphDef* graph, const GrapplerItem& item) {
+  if (optimization_level != RewriterConfig::RECOMPUTATION_HEURISTICS &&
+      optimization_level != RewriterConfig::HEURISTICS &&
+      optimization_level != RewriterConfig::MANUAL) {
+    // Nothing to do
+    return;
+  }
   // The topological numberings and NodeMap will be stale as soon as we start
   // modifying the graph in RecomputeSubgraph. However, RecomputeSubgraph only
   // looks up nodes which were in the original graph, and preserves the graph
@@ -419,7 +426,7 @@ void RecomputationRewritingPass(RewriterConfig::MemOptType optimization_level,
   // We don't use the results of this topological sort until later, but this
   // call invalidates all NodeDef pointers, so it needs to be done before we
   // start collecting those.
-  TopologicalSort(graph);
+  TF_CHECK_OK(TopologicalSort(graph));
   NodeMap node_map(graph);
   std::vector<RecomputedSubGraph> recomputed_subgraphs;
   // Do not recompute nodes which are fed, since the recomputed node would not
@@ -480,19 +487,175 @@ void RecomputationRewritingPass(RewriterConfig::MemOptType optimization_level,
   }
 }
 
-std::pair<NodeDef*, NodeDef*> BuildSwapPair(NodeDef* node, int input_to_swap,
-                                            GraphDef* graph) {
+bool SchedulingPass(Cluster* cluster, GrapplerItem* item) {
+  // Look for AddN nodes and record input names.
+  GraphView view(&item->graph);
+
+  std::unordered_map<string, std::unordered_set<NodeDef*>> addn_list;
+  for (NodeDef& node : *item->graph.mutable_node()) {
+    if (!IsAddN(node)) {
+      continue;
+    }
+    // There is nothing to gain by optimizing nodes with 2 inputs of fewer.
+    if (view.NumFanins(node, false) <= 2) {
+      continue;
+    }
+    for (const auto& input : view.GetFanins(node, false)) {
+      if (input.node->device() == node.device()) {
+        string tensor_name =
+            strings::StrCat(input.node->name(), ":", input.port_id);
+        addn_list[tensor_name].insert(&node);
+      }
+    }
+  }
+
+  GraphMemory memory(*item);
+  const std::unordered_map<string, DeviceProperties>& devices =
+      cluster->GetDevices();
+  Status s = memory.InferStatically(devices);
+  if (!s.ok()) {
+    VLOG(1) << "Failed to infer memory usage: " << s.error_message();
+    return false;
+  }
+
+  std::unordered_set<NodeDef*> addn_to_rewrite;
+  for (const auto& device : devices) {
+    const string& name = device.first;
+    const DeviceProperties& prop = device.second;
+    if (prop.memory_size() <= 0) {
+      VLOG(1) << "Available memory unknown for device " << name;
+      continue;
+    }
+    const GraphMemory::MemoryUsage& mem_usage = memory.GetPeakMemoryUsage(name);
+
+    if (mem_usage.used_memory <= prop.memory_size() * 0.8) {
+      continue;
+    }
+
+    for (const auto& live : mem_usage.live_tensors) {
+      string tensor_name = strings::StrCat(live.node, ":", live.output_id);
+      auto it = addn_list.find(tensor_name);
+      if (it != addn_list.end()) {
+        addn_to_rewrite.insert(it->second.begin(), it->second.end());
+      }
+    }
+  }
+
+  if (addn_to_rewrite.empty()) {
+    return false;
+  }
+  GraphProperties properties(*item);
+  s = properties.InferStatically(false);
+  if (!s.ok()) {
+    VLOG(1) << "Failed to infer shapes: " << s.error_message();
+    return false;
+  }
+
+  bool updated_graph = false;
+  // Rewrite the AddN.
+  for (NodeDef* node : addn_to_rewrite) {
+    if (!properties.HasOutputProperties(node->name())) {
+      VLOG(1) << "Missing properties for " << node->name();
+      continue;
+    }
+    const TensorShapeProto& shape =
+        properties.GetOutputProperties(node->name())[0].shape();
+    DataType dtype = node->attr().at("T").type();
+    const string& device = node->device();
+
+    // Create the temporary variable that will hold intermediate results
+    NodeDef* tmp_var = item->graph.add_node();
+    tmp_var->set_name(strings::StrCat(node->name(), "/tmp_var"));
+    tmp_var->set_op("TemporaryVariable");
+    tmp_var->set_device(device);
+    (*tmp_var->mutable_attr())["dtype"].set_type(dtype);
+    *(*tmp_var->mutable_attr())["shape"].mutable_shape() = shape;
+    (*tmp_var->mutable_attr())["var_name"].set_s(tmp_var->name());
+
+    // Initialize it to zero
+    NodeDef* zeros = item->graph.add_node();
+    zeros->set_name(strings::StrCat(node->name(), "/tmp_var_zeros"));
+    zeros->set_op("ZerosLike");
+    zeros->set_device(device);
+    (*zeros->mutable_attr())["T"].set_type(dtype);
+    *zeros->add_input() = node->input(0);
+
+    NodeDef* initialize = item->graph.add_node();
+    initialize->set_name(strings::StrCat(node->name(), "/tmp_var_initializer"));
+    initialize->set_op("Assign");
+    initialize->set_device(device);
+    (*initialize->mutable_attr())["T"].set_type(dtype);
+    *initialize->add_input() = tmp_var->name();
+    *initialize->add_input() = zeros->name();
+
+    // Add the assignadd nodes
+    std::vector<NodeDef*> accumulates;
+    for (int i = 0; i < node->input_size(); ++i) {
+      const string& input = node->input(i);
+      if (IsControlInput(input)) {
+        *zeros->add_input() = input;
+      } else {
+        NodeDef* accumulate = item->graph.add_node();
+        accumulate->set_name(
+            strings::StrCat(node->name(), "/tmp_var_accum_", i));
+        accumulate->set_op("AssignAdd");
+        accumulate->set_device(device);
+        (*accumulate->mutable_attr())["T"].set_type(dtype);
+        *accumulate->add_input() = initialize->name();
+        *accumulate->add_input() = input;
+        accumulates.push_back(accumulate);
+      }
+    }
+
+    // Rewrite the AddN node as a DestroyTemporaryVariable ops
+    node->set_op("DestroyTemporaryVariable");
+    node->clear_input();
+    node->clear_attr();
+    (*node->mutable_attr())["T"].set_type(dtype);
+    (*node->mutable_attr())["var_name"].set_s(tmp_var->name());
+    *node->add_input() = initialize->name();
+    for (const NodeDef* accum : accumulates) {
+      *node->add_input() = AsControlDependency(accum->name());
+    }
+    updated_graph = true;
+  }
+
+  return updated_graph;
+}
+
+Status BuildSwapPair(NodeDef* node, int input_to_swap,
+                     const std::unordered_map<string, const NodeDef*>& name_map,
+                     GraphDef* graph,
+                     std::pair<NodeDef*, NodeDef*>* swap_pair) {
+  const OpDef* op_def;
+  TF_RETURN_IF_ERROR(OpRegistry::Global()->LookUpOpDef(node->op(), &op_def));
+  DataType input_type;
+  TF_RETURN_IF_ERROR(
+      InputTypeForNode(*node, *op_def, input_to_swap, &input_type));
+  if (IsRefType(input_type)) {
+    return errors::InvalidArgument("Can't swap input ", input_to_swap,
+                                   " of node ", node->name(),
+                                   " since it expects a reference");
+  }
+
   string tensor_to_swap = strings::StrCat(node->name(), "_", input_to_swap);
+  string swap_out_name = strings::StrCat("swap_out_", tensor_to_swap);
+  string swap_in_name = strings::StrCat("swap_in_", tensor_to_swap);
+  if (name_map.find(swap_out_name) != name_map.end() ||
+      name_map.find(swap_in_name) != name_map.end()) {
+    return errors::InvalidArgument("Input ", input_to_swap, " of node ",
+                                   node->name(), " is already swapped");
+  }
 
   // Force the tensor to be copied to cpu.
   NodeDef* swap_out_node = graph->add_node();
-  swap_out_node->set_name(strings::StrCat("swap_out_", tensor_to_swap));
+  swap_out_node->set_name(swap_out_name);
   swap_out_node->set_op("Identity");
-  swap_out_node->set_device("/CPU");
+  swap_out_node->set_device("/device:CPU:0");
 
   // Force the tensor to be restored to the device.
   NodeDef* swap_in_node = graph->add_node();
-  swap_in_node->set_name(strings::StrCat("swap_in_", tensor_to_swap));
+  swap_in_node->set_name(swap_in_name);
   swap_in_node->set_op("Identity");
   *swap_in_node->add_input() = swap_out_node->name();
 
@@ -501,10 +664,11 @@ std::pair<NodeDef*, NodeDef*> BuildSwapPair(NodeDef* node, int input_to_swap,
   (*swap_in_node->mutable_attr())["_class"].mutable_list()->add_s(coloc_group);
   (*node->mutable_attr())["_class"].mutable_list()->add_s(coloc_group);
 
-  const DataType input_type = node->attr().at("T").type();
   (*swap_in_node->mutable_attr())["T"].set_type(input_type);
   (*swap_out_node->mutable_attr())["T"].set_type(input_type);
-  return std::make_pair(swap_out_node, swap_in_node);
+  *swap_pair = std::make_pair(swap_out_node, swap_in_node);
+
+  return Status::OK();
 }
 
 static int64 EstimateSize(const OpInfo::TensorProperties& t) {
@@ -531,7 +695,7 @@ struct SwapInfo {
   Costs::NanoSeconds time_to_swap = 0;
 };
 
-static const NodeDef* FindSwapTrigger(
+static const NodeDef* FindSwapInTrigger(
     const NodeDef* node, const SwapInfo& swap_info,
     const std::unordered_map<string, const NodeDef*>& name_map,
     const std::unordered_map<const NodeDef*, Costs::NanoSeconds>&
@@ -568,9 +732,12 @@ static const NodeDef* FindSwapTrigger(
   max_trigger_time -= swap_info.time_to_swap;
 
   std::map<Costs::NanoSeconds, const NodeDef*> candidates;
+  std::set<string> already_processed;
+
   while (!possible_inputs.empty()) {
     const string input_node_name = *possible_inputs.begin();
     possible_inputs.erase(possible_inputs.begin());
+    already_processed.insert(input_node_name);
     auto it1 = name_map.find(input_node_name);
     if (it1 == name_map.end()) {
       return nullptr;
@@ -579,7 +746,7 @@ static const NodeDef* FindSwapTrigger(
     // Don't jump over frames, since adding a control dependency from one frame
     // to the next isn't supported. Don't go through branches, since we don't
     // know whether they'll be executed or not.
-    if (IsNextIteration(*input_node) || IsSwitch(*input_node) ||
+    if (ModifiesFrameInfo(*input_node) || IsSwitch(*input_node) ||
         IsMerge(*input_node)) {
       continue;
     }
@@ -591,7 +758,10 @@ static const NodeDef* FindSwapTrigger(
       candidates[it2->second] = input_node;
     } else {
       for (const string& fanin : input_node->input()) {
-        possible_inputs.insert(NodeName(fanin));
+        string name = NodeName(fanin);
+        if (already_processed.find(name) == already_processed.end()) {
+          possible_inputs.insert(name);
+        }
       }
     }
   }
@@ -605,16 +775,104 @@ static const NodeDef* FindSwapTrigger(
   return nullptr;
 }
 
-static void IdentifySwappingCandidates(Cluster* cluster,
-                                       const GrapplerItem& item,
-                                       GraphDef* optimized_graph) {
-  GraphMemory memory(item);
+static bool IsSwappable(const GraphView& graph, GraphView::OutputPort output) {
+  const NodeDef& node = *output.node;
+  // There is no point in swapping out persistent tensors, since the tensor will
+  // continue to use memory.
+  if (IsPersistent(node)) {
+    return false;
+  }
+
+  const OpDef* op_def;
+  if (!OpRegistry::Global()->LookUpOpDef(node.op(), &op_def).ok()) {
+    return false;
+  }
+  DataType dtype;
+  if (!OutputTypeForNode(node, *op_def, output.port_id, &dtype).ok()) {
+    return false;
+  }
+  // References can only refer to persistent memory: therefore the node isn't
+  // swappable.
+  if (IsRefType(dtype)) {
+    return false;
+  }
+
+  if (output.node->op() == "Identity" || output.node->op() == "Reshape") {
+    // If placed on the same device, these nodes are just forwarding references
+    // to their input. Therefore they are swappable iff their fanin is swappable
+    // or it resides on a different device.
+    GraphView::InputPort input;
+    input.node = output.node;
+    input.port_id = 0;
+    GraphView::OutputPort fanin = graph.GetRegularFanin(input);
+    if (fanin.node->device() == node.device()) {
+      return IsSwappable(graph, fanin);
+    }
+  }
+  return true;
+}
+
+static NodeDef* FindSwapOutTrigger(
+    const NodeDef* node, int input_id, const GraphView& view,
+    const std::unordered_map<const NodeDef*, Costs::NanoSeconds>&
+        execution_times) {
+  // Find the output port that generated the tensor to swap.
+  GraphView::InputPort swap;
+  swap.node = const_cast<NodeDef*>(node);
+  swap.port_id = input_id;
+  GraphView::OutputPort generator = view.GetRegularFanin(swap);
+  if (!generator.node) {
+    return nullptr;
+  }
+
+  const std::unordered_set<GraphView::InputPort, GraphView::HashPort>& fanout =
+      view.GetFanout(generator);
+  NodeDef* trigger = nullptr;
+  Costs::NanoSeconds earliest_fanout(
+      static_cast<double>(std::numeric_limits<int>::max()));
+
+  for (const auto& port : fanout) {
+    if (port.node == node) {
+      continue;
+    }
+    auto it = execution_times.find(port.node);
+    if (it != execution_times.end() && it->second < earliest_fanout) {
+      earliest_fanout = it->second;
+      trigger = port.node;
+    }
+  }
+
+  return trigger;
+}
+
+static bool IsSwappable(GraphView::InputPort input) {
+  const NodeDef& node = *input.node;
+
+  const OpDef* op_def;
+  if (!OpRegistry::Global()->LookUpOpDef(node.op(), &op_def).ok()) {
+    return false;
+  }
+
+  DataType dtype;
+  if (!InputTypeForNode(node, *op_def, input.port_id, &dtype).ok()) {
+    return false;
+  }
+
+  return !IsRefType(dtype);
+}
+
+static bool IdentifySwappingCandidates(Cluster* cluster, GrapplerItem* item,
+                                       std::unordered_set<string>* skip_list) {
+  GraphMemory memory(*item);
   const std::unordered_map<string, DeviceProperties>& devices =
       cluster->GetDevices();
-  if (!memory.InferStatically(devices).ok()) {
-    return;
+  Status s = memory.InferStatically(devices);
+  if (!s.ok()) {
+    VLOG(1) << "Failed to infer memory usage: " << s.error_message();
+    return false;
   }
 
+  bool updated_graph = false;
   for (const auto& device : devices) {
     const string& name = device.first;
     const DeviceProperties& prop = device.second;
@@ -622,36 +880,62 @@ static void IdentifySwappingCandidates(Cluster* cluster,
       continue;
     }
     if (prop.memory_size() <= 0) {
+      VLOG(1) << "Peak memory usage unknown for device " << name;
       continue;
     }
     const GraphMemory::MemoryUsage& mem_usage = memory.GetPeakMemoryUsage(name);
+
     if (mem_usage.used_memory <= prop.memory_size()) {
       continue;
     }
     int64 required_savings = mem_usage.used_memory - prop.memory_size();
     // TODO(bsteiner): sort the tensors by how long they're live.
 
-    std::unordered_map<const NodeDef*, Costs::NanoSeconds> execution_times;
-    if (!EstimateEarliestExecutionTimes(item, cluster, &execution_times).ok()) {
-      return;
+    std::unordered_map<string, Costs::NanoSeconds> execution_times;
+    {
+      std::unordered_map<const NodeDef*, Costs::NanoSeconds>
+          tmp_execution_times;
+      if (!EstimateEarliestExecutionTimes(*item, cluster, &tmp_execution_times)
+               .ok()) {
+        return false;
+      }
+      for (const auto& exec_time : tmp_execution_times) {
+        execution_times.emplace(exec_time.first->name(), exec_time.second);
+      }
     }
-    GraphView graph(optimized_graph);
+
+    GraphView graph(&item->graph);
     for (const auto& live_tensor : mem_usage.live_tensors) {
       if (live_tensor.deallocation_time - live_tensor.allocation_time <=
           Costs::Duration(1e6)) {
         // Not enough time to swap.
+        VLOG(1) << "Not enough time to swap: skipping " << live_tensor.node;
         continue;
       }
       if (live_tensor.memory_used <= 1024) {
         // Don't bother with small tensors.
         continue;
       }
-      Costs::NanoSeconds execution_time(-1);
-      GraphView::InputPort fanout_to_swap;
       GraphView::OutputPort port =
           graph.GetOutputPort(live_tensor.node, live_tensor.output_id);
+      if (!IsSwappable(graph, port)) {
+        continue;
+      }
+      Costs::NanoSeconds execution_time(-1);
+      GraphView::InputPort fanout_to_swap;
       for (GraphView::InputPort input : graph.GetFanout(port)) {
-        auto it = execution_times.find(input.node);
+        if (skip_list->find(input.node->name()) != skip_list->end()) {
+          continue;
+        }
+        string input_name =
+            strings::StrCat(input.node->name(), ":", input.port_id);
+        if (skip_list->find(input_name) != skip_list->end()) {
+          continue;
+        }
+        if (!IsSwappable(input)) {
+          continue;
+        }
+        auto it = execution_times.find(input.node->name());
         if (it != execution_times.end()) {
           if (it->second > execution_time) {
             fanout_to_swap = input;
@@ -661,40 +945,48 @@ static void IdentifySwappingCandidates(Cluster* cluster,
       }
       // Annotate the fanout to request the tensor to be swapped if it's not
       // already been done.
-      AttrValue& val = (*fanout_to_swap.node->mutable_attr())["_swap_to_host"];
       bool found = false;
-      for (int port_id : val.list().i()) {
-        if (port_id == fanout_to_swap.port_id) {
-          found = true;
-          break;
+      if (!fanout_to_swap.node) {
+        continue;
+      }
+      auto it = fanout_to_swap.node->attr().find("_swap_to_host");
+      if (it != fanout_to_swap.node->attr().end()) {
+        const AttrValue& val = it->second;
+        for (int port_id : val.list().i()) {
+          if (port_id == fanout_to_swap.port_id) {
+            found = true;
+            break;
+          }
         }
       }
       if (!found) {
+        AttrValue& val =
+            (*fanout_to_swap.node->mutable_attr())["_swap_to_host"];
         val.mutable_list()->add_i(fanout_to_swap.port_id);
         required_savings -= live_tensor.memory_used;
+        updated_graph = true;
         if (required_savings < 0) {
           break;
         }
       }
     }
   }
-}
 
-Status MemoryOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
-                                 GraphDef* optimized_graph) {
-  *optimized_graph = item.graph;
-
-  RecomputationRewritingPass(optimization_level_,
-                             recomputation_targets_name_prefix_,
-                             optimized_graph, item);
+  return updated_graph;
+}
 
-  if (optimization_level_ == RewriterConfig::SWAPPING_HEURISTICS) {
-    IdentifySwappingCandidates(cluster, item, optimized_graph);
+bool SwappingPass(RewriterConfig::MemOptType optimization_level,
+                  Cluster* cluster, GrapplerItem* item,
+                  std::unordered_set<string>* skip_list) {
+  bool updated_graph = false;
+  if (optimization_level == RewriterConfig::SWAPPING_HEURISTICS ||
+      optimization_level == RewriterConfig::HEURISTICS) {
+    // Use heuristics to figure out what needs to be swapped;
+    updated_graph = IdentifySwappingCandidates(cluster, item, skip_list);
   }
-
-  // Figure out what needs to be swapped;
+  // Look for manual annotatations in the graph.
   std::unordered_map<NodeDef*, SwapInfo> nodes_to_swap;
-  for (auto& node : *optimized_graph->mutable_node()) {
+  for (auto& node : *item->graph.mutable_node()) {
     if (node.attr().count("_swap_to_host") != 0) {
       SwapInfo& swap_info = nodes_to_swap[&node];
       const AttrValue& val = node.attr().at("_swap_to_host");
@@ -710,61 +1002,128 @@ Status MemoryOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
   }
   if (nodes_to_swap.empty()) {
     // Nothing to do.
-    return Status::OK();
-  }
-
-  {
-    // Estimate the size of the data to swap for each node.
-    GraphProperties properties(item);
-    TF_RETURN_IF_ERROR(properties.InferStatically());
-    for (auto& swap : nodes_to_swap) {
-      const NodeDef* node = swap.first;
-      std::vector<OpInfo::TensorProperties> props =
-          properties.GetInputProperties(node->name());
-      SwapInfo& swap_info = swap.second;
-      int64 bytes_to_swap = 0;
-      for (int64 input_id : swap_info.inputs_to_swap) {
-        const OpInfo::TensorProperties& t = props[input_id];
-        bytes_to_swap += EstimateSize(t);
-      }
-      // Let's assume we're going to swap over PCIe running at 16 GBps.
-      swap_info.time_to_swap = bytes_to_swap / 16;
+    return false;
+  }
+
+  // Estimate the size of the data to swap for each node.
+  GraphProperties properties(*item);
+  if (!properties.InferStatically(true).ok()) {
+    return false;
+  }
+  for (auto& swap : nodes_to_swap) {
+    const NodeDef* node = swap.first;
+    std::vector<OpInfo::TensorProperties> props =
+        properties.GetInputProperties(node->name());
+    SwapInfo& swap_info = swap.second;
+    int64 bytes_to_swap = 0;
+    for (int64 input_id : swap_info.inputs_to_swap) {
+      const OpInfo::TensorProperties& t = props[input_id];
+      bytes_to_swap += EstimateSize(t);
     }
+    // Let's assume we're going to swap over PCIe running at 16 GBps.
+    swap_info.time_to_swap = bytes_to_swap / 16;
   }
 
   std::unordered_map<const NodeDef*, Costs::NanoSeconds> execution_times;
-  TF_RETURN_IF_ERROR(
-      EstimateEarliestExecutionTimes(item, cluster, &execution_times));
+  if (!EstimateEarliestExecutionTimes(*item, cluster, &execution_times).ok()) {
+    return false;
+  }
 
   std::unordered_map<string, const NodeDef*> name_map;
-  for (const auto& node : item.graph.node()) {
+  for (const auto& node : item->graph.node()) {
     name_map[node.name()] = &node;
   }
+  GraphView view(&item->graph);
 
   for (auto& swap : nodes_to_swap) {
     NodeDef* node = swap.first;
-    SwapInfo& swap_info = swap.second;
+    const SwapInfo& swap_info = swap.second;
+
+    if (skip_list->find(node->name()) != skip_list->end()) {
+      continue;
+    }
 
     // Make sure the tensor isn't swapped back in right away: look for node that
     // will execute just before we need to swap the data back, and add a control
     // dependency from that node to the swap node.
-    const NodeDef* trigger =
-        FindSwapTrigger(node, swap_info, name_map, execution_times);
-    if (!trigger) {
+    const NodeDef* in_trigger =
+        FindSwapInTrigger(node, swap_info, name_map, execution_times);
+    // If we failed, don't attempt to reprocess this node in a subsequent pass.
+    if (!in_trigger) {
+      skip_list->insert(node->name());
       continue;
     }
+
     // Swap all the tensors that are marked with the 'swap_to_host' attribute.
     for (int input_id : swap_info.inputs_to_swap) {
-      std::pair<NodeDef*, NodeDef*> swap_nodes =
-          BuildSwapPair(node, input_id, optimized_graph);
+      string input_name = strings::StrCat(node->name(), ":", input_id);
+      if (skip_list->find(input_name) != skip_list->end()) {
+        continue;
+      } else {
+        // Don't attempt to reprocess this input in a subsequent pass.
+        skip_list->insert(input_name);
+      }
+
+      // Make sure the tensor isn't swapped out quickly look for node that
+      // will execute just after the tensor is generated and add a control
+      // dependency from the swap out node to that node.
+      NodeDef* out_trigger =
+          FindSwapOutTrigger(node, input_id, view, execution_times);
+      if (!out_trigger) {
+        continue;
+      }
+
+      std::pair<NodeDef*, NodeDef*> swap_nodes;
+      if (!BuildSwapPair(node, input_id, name_map, &item->graph, &swap_nodes)
+               .ok()) {
+        continue;
+      }
       *swap_nodes.first->add_input() = node->input(input_id);
       *node->mutable_input(input_id) = swap_nodes.second->name();
 
-      // Add the control dependency needed to delay the execution of the swap.
-      *swap_nodes.second->add_input() = strings::StrCat("^", trigger->name());
+      // Add the control dependencies needed to delay the execution of the swap.
+      out_trigger->add_input(strings::StrCat("^", swap_nodes.first->name()));
+      swap_nodes.second->add_input(strings::StrCat("^", in_trigger->name()));
+
+      // Make sure we won't try to swap the swap nodes in subsequent passes.
+      skip_list->insert(swap_nodes.first->name());
+      skip_list->insert(swap_nodes.second->name());
+    }
+  }
+  return updated_graph;
+}
+
+Status MemoryOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
+                                 GraphDef* optimized_graph) {
+  *optimized_graph = item.graph;
+
+  RecomputationRewritingPass(optimization_level_,
+                             recomputation_targets_name_prefix_,
+                             optimized_graph, item);
+
+  GrapplerItem optimized_item(item, std::move(*optimized_graph));
+  std::unordered_set<string> skip_list;
+  // Bound the number of rewrite passes to avoid long processing times on graphs
+  // that simply won't fit in memory.
+  bool updated_graph = true;
+  for (int i = 0; i < 25 && updated_graph; ++i) {
+    updated_graph = false;
+    if ((optimization_level_ == RewriterConfig::SCHEDULING_HEURISTICS ||
+         optimization_level_ == RewriterConfig::HEURISTICS) &&
+        cluster != nullptr) {
+      updated_graph |= SchedulingPass(cluster, &optimized_item);
+    }
+
+    if ((optimization_level_ == RewriterConfig::SWAPPING_HEURISTICS ||
+         optimization_level_ == RewriterConfig::HEURISTICS ||
+         optimization_level_ == RewriterConfig::MANUAL) &&
+        cluster != nullptr) {
+      updated_graph |= SwappingPass(optimization_level_, cluster,
+                                    &optimized_item, &skip_list);
     }
   }
 
+  optimized_graph->Swap(&optimized_item.graph);
   return Status::OK();
 }
 
diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc b/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc
index 6fa4731a863cea9d6124e379641682030ca80bed..185ac6040c4ce85ca5e7f8eadbe41b05fbe339df 100644
--- a/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc
@@ -201,8 +201,16 @@ class MemoryOptimizerTest : public ::testing::Test {
     cpu_device.set_frequency(1000);
     cpu_device.set_num_cores(4);
     cpu_device.set_bandwidth(32);
+    DeviceProperties gpu_device;
+    gpu_device.set_type("GPU");
+    gpu_device.set_frequency(1000);
+    gpu_device.set_num_cores(24);
+    gpu_device.set_bandwidth(128);
+    gpu_device.set_memory_size(1024 * 1024);
+    gpu_device.mutable_environment()->insert({"architecture", "6"});
     std::unordered_map<string, DeviceProperties> devices;
     devices["/job:localhost/replica:0/task:0/cpu:0"] = cpu_device;
+    devices["/job:localhost/replica:0/task:0/gpu:0"] = gpu_device;
     return std::unique_ptr<VirtualCluster>(new VirtualCluster(devices));
   }
 };
@@ -250,6 +258,122 @@ TEST_F(MemoryOptimizerTest, SimpleSwapping) {
   EXPECT_EQ(NodeName(b.name()), swap_out.input(0));
   EXPECT_EQ(NodeName(swap_out.name()), swap_in.input(0));
   EXPECT_EQ("^c", swap_in.input(1));
+
+  const NodeDef& new_c = output.node(2);
+  EXPECT_EQ(NodeName(c.name()), new_c.name());
+  EXPECT_EQ("^swap_out_e_0", new_c.input(1));
+
+  // Run the optimizer a second time to ensure it's idempotent.
+  item.graph.Swap(&output);
+  status = optimizer.Optimize(cluster.get(), item, &output);
+  TF_EXPECT_OK(status);
+}
+
+TEST_F(MemoryOptimizerTest, SwappingHeuristics) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output v = ops::Variable(s.WithOpName("v").WithDevice("/gpu:0"),
+                           {128, 128, 8}, DT_FLOAT);
+  Output a = ops::Identity(s.WithOpName("a").WithDevice("/gpu:0"), v);
+  Output b = ops::Square(s.WithOpName("b").WithDevice("/gpu:0"), v);
+  Output c = ops::Sqrt(s.WithOpName("c").WithDevice("/gpu:0"), a);
+  Output d = ops::Identity(s.WithOpName("d").WithDevice("/gpu:0"), b);
+  Output axis = ops::Const(s.WithOpName("axis"), 0);
+  Output e =
+      ops::Concat(s.WithOpName("e").WithDevice("/gpu:0"), {a, b, c, d}, axis);
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  item.fetch = {"e"};
+
+  std::unique_ptr<VirtualCluster> cluster(CreateVirtualCluster());
+
+  MemoryOptimizer optimizer(RewriterConfig::SWAPPING_HEURISTICS);
+  GraphDef output;
+  Status status = optimizer.Optimize(cluster.get(), item, &output);
+  TF_EXPECT_OK(status);
+
+  for (const auto& node : output.node()) {
+    if (node.name() == "e") {
+      EXPECT_TRUE(node.attr().count("_swap_to_host") > 0);
+      const AttrValue& val = node.attr().at("_swap_to_host");
+      EXPECT_TRUE(val.has_list());
+      std::set<int> inputs_to_swap;
+      for (int64 input_id : val.list().i()) {
+        inputs_to_swap.insert(input_id);
+      }
+      EXPECT_EQ(std::set<int>({1, 2, 3}), inputs_to_swap);
+    }
+  }
+}
+
+TEST_F(MemoryOptimizerTest, UnswappableInputs) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output v = ops::Variable(s.WithOpName("v").WithDevice("/gpu:0"),
+                           {128, 128, 8}, DT_FLOAT);
+  Output a = ops::Square(s.WithOpName("a").WithDevice("/gpu:0"), v);
+  Output b = ops::Identity(s.WithOpName("b").WithDevice("/gpu:0"), {a});
+  Output c = ops::Identity(s.WithOpName("c").WithDevice("/gpu:0"), {a});
+  Output index = ops::Const(s.WithOpName("index"), {0});
+  Output indices = ops::Tile(s.WithOpName("indices"), index, {128});
+  Output d =
+      ops::ScatterAdd(s.WithOpName("d").WithDevice("/gpu:0"), v, indices, c);
+  Output axis = ops::Const(s.WithOpName("axis"), 0);
+  Output e =
+      ops::Concat(s.WithOpName("e").WithDevice("/gpu:0"), {b, c, d}, axis);
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  item.fetch = {"e"};
+
+  std::unique_ptr<VirtualCluster> cluster(CreateVirtualCluster());
+
+  MemoryOptimizer optimizer(RewriterConfig::SWAPPING_HEURISTICS);
+  GraphDef output;
+  Status status = optimizer.Optimize(cluster.get(), item, &output);
+  TF_EXPECT_OK(status);
+
+  for (const auto& node : output.node()) {
+    if (node.name() == "d") {
+      EXPECT_EQ(1, node.attr().count("_swap_to_host"));
+      EXPECT_EQ(2, node.attr().at("_swap_to_host").list().i(0));
+    }
+  }
+}
+
+TEST_F(MemoryOptimizerTest, AccumulationRewrites) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output a = ops::Variable(s.WithOpName("a").WithDevice("/gpu:0"),
+                           {128, 128, 8}, DT_FLOAT);
+  Output b = ops::Variable(s.WithOpName("b").WithDevice("/gpu:0"),
+                           {128, 128, 8}, DT_FLOAT);
+  Output c = ops::Variable(s.WithOpName("c").WithDevice("/gpu:0"),
+                           {128, 128, 8}, DT_FLOAT);
+  Output d = ops::AddN(s.WithOpName("d").WithDevice("/gpu:0"), {a, b, c});
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  item.fetch = {"d"};
+
+  std::unique_ptr<VirtualCluster> cluster(CreateVirtualCluster());
+  MemoryOptimizer optimizer(RewriterConfig::SCHEDULING_HEURISTICS);
+  GraphDef output;
+  Status status = optimizer.Optimize(cluster.get(), item, &output);
+  TF_EXPECT_OK(status);
+
+  int count = 0;
+  for (const auto& node : output.node()) {
+    if (node.name() == "d") {
+      EXPECT_EQ("DestroyTemporaryVariable", node.op());
+      count++;
+    } else if (node.name() == "d/tmp_var_initializer") {
+      EXPECT_EQ("Assign", node.op());
+      count++;
+    } else if (node.name() == "d/tmp_var") {
+      EXPECT_EQ("TemporaryVariable", node.op());
+      count++;
+    }
+  }
+  EXPECT_EQ(3, count);
 }
 
 }  // namespace
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index d2df8cacb73ff8cc271373c8d8d5a3947ae18509..4228e7baba9741cf9160d4789d6bef04c50a7409 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -30,6 +30,23 @@ limitations under the License.
 namespace tensorflow {
 namespace grappler {
 
+namespace {
+int64 NumEdges(const GraphDef& graph) {
+  int64 num_edges = 0;
+  for (const auto& node : graph.node()) {
+    num_edges += node.input_size();
+  }
+  return num_edges;
+}
+
+string PrintSizesBeforeAfter(const GraphDef& before, const GraphDef& after) {
+  return strings::StrCat("Graph size before: ", before.node_size(), " nodes, ",
+                         NumEdges(before),
+                         " edges. Graph size after: ", after.node_size(),
+                         " nodes, ", NumEdges(after), " edges.");
+}
+}  // namespace
+
 std::unique_ptr<GraphOptimizer> MetaOptimizer::NewOptimizer(
     const string& optimizer) {
   VLOG(1) << "Adding graph optimization pass: " << optimizer;
@@ -76,7 +93,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
       optimizers.push_back(std::unique_ptr<GraphOptimizer>(
           new ArithmeticOptimizer(cfg_.arithmetic_optimization())));
     }
-    if (cfg_.dependency_optimization() == RewriterConfig::ON) {
+    if (cfg_.dependency_optimization() != RewriterConfig::OFF) {
       optimizers.push_back(std::unique_ptr<GraphOptimizer>(
           new DependencyOptimizer(cfg_.dependency_optimization())));
     }
@@ -128,10 +145,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
       } else {
         already_optimized = true;
         result = strings::StrCat(
-            "OK. "
-            "Graph size before: ",
-            item.graph.node_size(),
-            ". Graph size after: ", optimized_graph->node_size());
+            "OK. ", PrintSizesBeforeAfter(item.graph, *optimized_graph));
       }
       result_.push_back(std::make_pair(optimizer->name(), result));
       VLOG(1) << "Optimizer " << optimizer->name()
@@ -148,10 +162,8 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
         result = status.ToString();
       } else {
         result = strings::StrCat(
-            "OK. "
-            "Graph size before: ",
-            optimized_item.graph.node_size(),
-            ". Graph size after: ", optimized_graph->node_size());
+            "OK. ",
+            PrintSizesBeforeAfter(optimized_item.graph, *optimized_graph));
       }
       result_.push_back(std::make_pair(optimizer->name(), result));
       VLOG(1) << "Optimizer " << optimizer->name()
@@ -160,7 +172,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
   }
 
   if (already_optimized) {
-    TopologicalSort(optimized_graph);
+    TF_RETURN_IF_ERROR(TopologicalSort(optimized_graph));
     // Make sure that the optimizers preserved the graph version and library.
     DCHECK_GE(optimized_graph->library().function_size(),
               item.graph.library().function_size());
@@ -191,7 +203,7 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) {
   return !cfg.disable_model_pruning() ||
          cfg.layout_optimizer() == RewriterConfig::ON ||
          cfg.constant_folding() != RewriterConfig::OFF ||
-         cfg.dependency_optimization() == RewriterConfig::ON ||
+         cfg.dependency_optimization() != RewriterConfig::OFF ||
          cfg.arithmetic_optimization() != RewriterConfig::OFF ||
          cfg.auto_parallel().enable() || cfg.memory_optimization() > 1 ||
          !cfg.optimizers().empty();
diff --git a/tensorflow/core/grappler/optimizers/static_schedule.cc b/tensorflow/core/grappler/optimizers/static_schedule.cc
index 6ce6deef2ceacdfe44b49659109e432b87739f97..450e85340796fdde9afdfebbd0eb9a724cb9440a 100644
--- a/tensorflow/core/grappler/optimizers/static_schedule.cc
+++ b/tensorflow/core/grappler/optimizers/static_schedule.cc
@@ -86,7 +86,7 @@ Status EstimateEarliestExecutionTimes(
   name_map.clear();
 
   GraphProperties properties(item);
-  TF_RETURN_IF_ERROR(properties.InferStatically());
+  TF_RETURN_IF_ERROR(properties.InferStatically(true));
   OpLevelCostEstimator estimator;
   VirtualPlacer placer(cluster);
 
@@ -154,7 +154,7 @@ Status EstimateRequiredTimes(
     }
   }
   GraphProperties properties(item);
-  TF_RETURN_IF_ERROR(properties.InferStatically());
+  TF_RETURN_IF_ERROR(properties.InferStatically(true));
   OpLevelCostEstimator estimator;
   VirtualPlacer placer(cluster);
 
diff --git a/tensorflow/core/grappler/optimizers/static_schedule_test.cc b/tensorflow/core/grappler/optimizers/static_schedule_test.cc
index 5de593358727bf8b1f247c0fb9ec8f52b2819e4c..08580d92842377c2dd999950b2e01bef01e2fee6 100644
--- a/tensorflow/core/grappler/optimizers/static_schedule_test.cc
+++ b/tensorflow/core/grappler/optimizers/static_schedule_test.cc
@@ -64,17 +64,17 @@ TEST_F(StaticScheduleTest, BasicGraph) {
     if (time.first->name() == "Const/Const") {
       EXPECT_EQ(Costs::NanoSeconds(1), time.second);
     } else if (time.first->name() == "x") {
-      EXPECT_EQ(Costs::NanoSeconds(250002), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(250001), time.second);
     } else if (time.first->name() == "Square") {
-      EXPECT_EQ(Costs::NanoSeconds(1500005), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(1500004), time.second);
     } else if (time.first->name() == "Square_1") {
-      EXPECT_EQ(Costs::NanoSeconds(2750008), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(2750007), time.second);
     } else if (time.first->name() == "Square_2") {
-      EXPECT_EQ(Costs::NanoSeconds(4000011), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(4000010), time.second);
     } else if (time.first->name() == "Square_3") {
-      EXPECT_EQ(Costs::NanoSeconds(5250014), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(5250013), time.second);
     } else if (time.first->name() == "y") {
-      EXPECT_EQ(Costs::NanoSeconds(6500017), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(6500013), time.second);
     }
   }
 }
@@ -110,13 +110,13 @@ TEST_F(StaticScheduleTest, BasicGraphWithCtrlDependencies) {
     if (time.first->name() == "a") {
       EXPECT_EQ(Costs::NanoSeconds(1), time.second);
     } else if (time.first->name() == "b") {
-      EXPECT_EQ(Costs::NanoSeconds(12500026), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(12500001), time.second);
     } else if (time.first->name() == "c") {
-      EXPECT_EQ(Costs::NanoSeconds(12500027), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(12500002), time.second);
     } else if (time.first->name() == "d") {
-      EXPECT_EQ(Costs::NanoSeconds(12500028), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(12500003), time.second);
     } else if (time.first->name() == "e") {
-      EXPECT_EQ(Costs::NanoSeconds(25000053), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(25000003), time.second);
     }
   }
 }
@@ -142,17 +142,17 @@ TEST_F(StaticScheduleTest, RequiredTimes) {
 
   for (auto time : required_times) {
     if (time.first->name() == "Const/Const") {
-      EXPECT_EQ(Costs::NanoSeconds(-6500016), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(-6500012), time.second);
     } else if (time.first->name() == "x") {
-      EXPECT_EQ(Costs::NanoSeconds(-6250015), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(-6250012), time.second);
     } else if (time.first->name() == "Square") {
-      EXPECT_EQ(Costs::NanoSeconds(-5000012), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(-5000009), time.second);
     } else if (time.first->name() == "Square_1") {
-      EXPECT_EQ(Costs::NanoSeconds(-3750009), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(-3750006), time.second);
     } else if (time.first->name() == "Square_2") {
-      EXPECT_EQ(Costs::NanoSeconds(-2500006), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(-2500003), time.second);
     } else if (time.first->name() == "Square_3") {
-      EXPECT_EQ(Costs::NanoSeconds(-1250003), time.second);
+      EXPECT_EQ(Costs::NanoSeconds(-1250000), time.second);
     } else if (time.first->name() == "y") {
       EXPECT_EQ(Costs::NanoSeconds(0), time.second);
     }
diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc
index 07cf2cfc05d82f4caaf7a302146a37f94226927a..8099214c2bd81e642bbcc8fc913d1ec3307d6251 100644
--- a/tensorflow/core/grappler/utils.cc
+++ b/tensorflow/core/grappler/utils.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include <memory>
+#include <vector>
 
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/op.h"
@@ -113,41 +114,6 @@ void NodeMap::UpdateOutput(const string& node_name,
   outputs.insert(nodes_[NodeName(new_output_name)]);
 }
 
-OutputMap::OutputMap(GraphDef* graph) : graph_(graph) {
-  for (int i = 0; i < graph_->node_size(); i++) {
-    auto node = graph_->mutable_node(i);
-    auto rslt = nodes_.emplace(node->name(), node);
-    // Check that the graph doesn't contain multiple nodes with the same name.
-    CHECK(rslt.second);
-    for (const auto& input : node->input()) {
-      string input_node = NodeName(input);
-      if (outputs_[input_node].count(node) == 0) {
-        outputs_[input_node].insert(std::make_pair(node, 1));
-      } else {
-        outputs_[input_node][node]++;
-      }
-    }
-  }
-}
-
-NodeDef* OutputMap::GetNode(const string& name) const {
-  string node_name = NodeName(name);
-  auto it = nodes_.find(node_name);
-  if (it == nodes_.end()) {
-    return nullptr;
-  }
-  return it->second;
-}
-
-const std::unordered_map<NodeDef*, int>& OutputMap::GetOutputs(
-    const string& node_name) const {
-  auto it = outputs_.find(node_name);
-  if (it == outputs_.end()) {
-    return empty_map_;
-  }
-  return it->second;
-}
-
 bool IsSameInput(const string& name1, const string& name2) {
   if (name1 == name2) {
     return true;
@@ -317,5 +283,103 @@ NodeDef* GetTailOfChain(const NodeDef& source, const NodeMap& node_map,
   return const_cast<NodeDef*>(current);
 }
 
+// Every permutation is a product of one or more cycles. Iterate over the cycles
+// in the permutation, and convert each of those into a product of
+// transpositions (swaps): https://en.wikipedia.org/wiki/Cyclic_permutation
+void PermuteNodesInPlace(GraphDef* graph, std::vector<int>* permutation,
+                         bool invert_permutation) {
+  CHECK_EQ(graph->node_size(), permutation->size());
+  std::vector<int> inv_perm(permutation->size(), 0);
+  if (invert_permutation) {
+    for (size_t n = 0; n < permutation->size(); ++n) {
+      inv_perm[(*permutation)[n]] = n;
+    }
+    permutation->swap(inv_perm);
+  }
+  for (std::size_t n = 0; n + 1 < permutation->size(); ++n) {
+    while (n != (*permutation)[n]) {
+      std::size_t r = (*permutation)[n];
+      graph->mutable_node()->SwapElements(n, r);
+      std::swap((*permutation)[n], (*permutation)[r]);
+    }
+  }
+}
+
+namespace {
+template <typename T>
+inline void STLSortAndRemoveDuplicates(T* v) {
+  std::sort(v->begin(), v->end());
+  v->erase(std::unique(v->begin(), v->end()), v->end());
+}
+}  // namespace
+
+Status SimpleGraphView::Initialize(const GraphDef& graph, bool dedup_inputs,
+                                   bool dedup_outputs) {
+  const int num_nodes = graph.node_size();
+  inputs_.clear();
+  inputs_.resize(num_nodes);
+  outputs_.clear();
+  outputs_.resize(num_nodes);
+  name_to_index_.clear();
+  name_to_index_.reserve(num_nodes);
+  index_to_name_.clear();
+  index_to_name_.reserve(num_nodes);
+
+  // Build map from name to index and vice versa.
+  for (int node_idx = 0; node_idx < num_nodes; ++node_idx) {
+    const NodeDef& node = graph.node(node_idx);
+    name_to_index_.emplace(node.name(), node_idx);
+    index_to_name_.push_back(node.name());
+  }
+
+  // Build forward and reverse adjacency lists.
+  for (int node_idx = 0; node_idx < num_nodes; ++node_idx) {
+    const NodeDef& node = graph.node(node_idx);
+    inputs_[node_idx].reserve(node.input_size());
+    for (const string& input : node.input()) {
+      auto it = name_to_index_.find(NodeName(input));
+      if (it == name_to_index_.end()) {
+        return errors::InvalidArgument("Non-existent input ", input,
+                                       " for node ", node.name());
+      }
+      const int input_idx = it->second;
+      inputs_[node_idx].push_back(input_idx);
+      outputs_[input_idx].push_back(node_idx);
+    }
+    if (dedup_inputs) {
+      // Dedup the input list while it's still hot in cache.
+      STLSortAndRemoveDuplicates(&inputs_[node_idx]);
+    }
+  }
+
+  // Dedup outputs.
+  if (dedup_outputs) {
+    for (int node_idx = 0; node_idx < num_nodes; ++node_idx) {
+      STLSortAndRemoveDuplicates(&outputs_[node_idx]);
+    }
+  }
+  return Status::OK();
+}
+
+string SimpleGraphView::PrintToString() const {
+  string str;
+  for (int i = 0; i < num_nodes(); ++i) {
+    strings::StrAppend(&str, "Node ", i, "'", node_name(i), "'\n", "Inputs: [");
+    for (int input : inputs(i)) {
+      strings::StrAppend(&str, input, " '", node_name(input), "', ");
+    }
+    strings::StrAppend(&str, "]\n", "Outputs: [");
+    for (int j = 0; j < outputs(i).size(); ++j) {
+      const int output = outputs(i)[j];
+      if (j > 0) {
+        strings::StrAppend(&str, ", ");
+      }
+      strings::StrAppend(&str, output, " '", node_name(output), "'");
+    }
+    strings::StrAppend(&str, "]\n");
+  }
+  return str;
+}
+
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/utils.h b/tensorflow/core/grappler/utils.h
index 411e44d487da2acf7e74d1db8669c1f809e592c1..c04a9a666dd68c42f378543bd2fc997a4bde872c 100644
--- a/tensorflow/core/grappler/utils.h
+++ b/tensorflow/core/grappler/utils.h
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/lib/gtl/inlined_vector.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -58,22 +59,6 @@ class NodeMap {
   std::unordered_map<string, std::set<NodeDef*>> outputs_;
 };
 
-// A utility class to lookup a node's outputs and the number of times it
-// presents in each output.
-class OutputMap {
- public:
-  explicit OutputMap(GraphDef* graph);
-  NodeDef* GetNode(const string& name) const;
-  const std::unordered_map<NodeDef*, int>& GetOutputs(
-      const string& node_name) const;
-
- private:
-  GraphDef* graph_;
-  std::unordered_map<NodeDef*, int> empty_map_;
-  std::unordered_map<string, NodeDef*> nodes_;
-  std::unordered_map<string, std::unordered_map<NodeDef*, int>> outputs_;
-};
-
 // A vector with a set. The set stores the same elements as the vector, and
 // quickly answers whether a value is in the vector. Duplicated elements are not
 // allowed for now.
@@ -175,6 +160,43 @@ NodeDef* GetTailOfChain(const NodeDef& source, const NodeMap& node_map,
                         bool follow_control_input,
                         const std::function<bool(const NodeDef&)>& pred_fn);
 
+// Permute the nodes of graph in place according to the permutation.
+void PermuteNodesInPlace(GraphDef* graph, std::vector<int>* permutation,
+                         bool invert_permutation);
+
+class SimpleGraphView {
+ public:
+  Status Initialize(const GraphDef& graph) {
+    return Initialize(graph, true, true);
+  }
+  Status Initialize(const GraphDef& graph, bool dedup_inputs,
+                    bool dedup_outputs);
+
+  inline int num_nodes() const { return index_to_name_.size(); }
+  inline const int index(const string& node_name) const {
+    const auto& it = name_to_index_.find(node_name);
+    DCHECK(it != name_to_index_.end());
+    return it == name_to_index_.end() ? -1 : it->second;
+  }
+  inline const string& node_name(int node_idx) const {
+    return index_to_name_[node_idx];
+  }
+  inline const gtl::InlinedVector<int, 4>& inputs(int node_idx) const {
+    return inputs_[node_idx];
+  }
+  inline const gtl::InlinedVector<int, 2>& outputs(int node_idx) const {
+    return outputs_[node_idx];
+  }
+
+  string PrintToString() const;
+
+ private:
+  std::vector<string> index_to_name_;
+  std::unordered_map<string, int> name_to_index_;
+  std::vector<gtl::InlinedVector<int, 4>> inputs_;
+  std::vector<gtl::InlinedVector<int, 2>> outputs_;
+};
+
 }  // end namespace grappler
 }  // end namespace tensorflow
 
diff --git a/tensorflow/core/grappler/utils/BUILD b/tensorflow/core/grappler/utils/BUILD
index 21243833accff6ca3423c505091900564094557d..534f7a063fe90bf72f8a2afba7ae8f75b8472a36 100644
--- a/tensorflow/core/grappler/utils/BUILD
+++ b/tensorflow/core/grappler/utils/BUILD
@@ -53,6 +53,7 @@ cc_library(
     hdrs = ["topological_sort.h"],
     visibility = ["//visibility:public"],
     deps = [
+        "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/grappler:op_types",
diff --git a/tensorflow/core/grappler/utils/topological_sort.cc b/tensorflow/core/grappler/utils/topological_sort.cc
index 77d4702d21e75b1689875eb17fbd2cda41aa1ba8..8d8ff4da3a8df5a2868f1a3a0ac6a5d0c2fd66ad 100644
--- a/tensorflow/core/grappler/utils/topological_sort.cc
+++ b/tensorflow/core/grappler/utils/topological_sort.cc
@@ -19,61 +19,56 @@ limitations under the License.
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/grappler/op_types.h"
 #include "tensorflow/core/grappler/utils.h"
+#include "tensorflow/core/lib/core/status.h"
 
 namespace tensorflow {
 namespace grappler {
 
 // Kahn's algorithm is implemented.
 // For details, see https://en.wikipedia.org/wiki/Topological_sorting
-void TopologicalSort(GraphDef* graph) {
-  OutputMap output_map(graph);
-  std::vector<NodeDef*> ready_nodes;
-  ready_nodes.reserve(graph->node_size());
+Status TopologicalSort(GraphDef* graph) {
+  SimpleGraphView graph_view;
+  TF_RETURN_IF_ERROR(graph_view.Initialize(*graph));
+
+  std::vector<int> ready_nodes;
+  ready_nodes.reserve(graph_view.num_nodes());
+
   int front = 0;
   int back = 0;
-  std::unordered_map<const NodeDef*, int> ready_inputs;
-  for (int i = 0; i < graph->node_size(); i++) {
-    auto node = graph->mutable_node(i);
-    if (node->input_size() == 0) {
-      ready_nodes.push_back(node);
+  std::vector<int> num_ready_inputs(graph_view.num_nodes(), 0);
+  for (int i = 0; i < graph_view.num_nodes(); i++) {
+    if (graph_view.inputs(i).empty()) {
+      ready_nodes.push_back(i);
       back++;
     }
-    if (IsMerge(*node)) {
-      ready_inputs[node] = 0;
-      for (const auto& input : node->input()) {
-        if (IsNextIteration(*output_map.GetNode(input))) {
-          ready_inputs[node]++;
+    if (IsMerge(graph->node(i))) {
+      for (int input : graph_view.inputs(i)) {
+        if (IsNextIteration(graph->node(input))) {
+          num_ready_inputs[i]++;
         }
       }
-    } else {
-      ready_inputs[node] = 0;
     }
   }
 
   while (front != back) {
-    auto ready_node = ready_nodes[front];
-    for (const auto& fanout_pair : output_map.GetOutputs(ready_node->name())) {
-      auto fanout = fanout_pair.first;
-      ready_inputs[fanout] += fanout_pair.second;
-      if (ready_inputs[fanout] == fanout->input_size()) {
+    int ready_node = ready_nodes[front];
+    for (int fanout : graph_view.outputs(ready_node)) {
+      ++num_ready_inputs[fanout];
+      if (num_ready_inputs[fanout] == graph_view.inputs(fanout).size()) {
         ready_nodes.push_back(fanout);
-        back++;
+        ++back;
       }
     }
-    front++;
+    ++front;
   }
 
-  if (back == graph->node_size()) {
-    GraphDef new_graph;
-    new_graph.mutable_node()->Reserve(graph->node_size());
-    for (int i = 0; i < graph->node_size(); i++) {
-      auto new_node = new_graph.add_node();
-      new_node->Swap(ready_nodes[i]);
-    }
-    graph->mutable_node()->Swap(new_graph.mutable_node());
-  } else {
-    LOG(ERROR) << "The graph couldn't be sorted in topological order.";
+  if (back != graph_view.num_nodes()) {
+    return errors::InvalidArgument(
+        "The graph couldn't be sorted in topological order.");
   }
+
+  PermuteNodesInPlace(graph, &ready_nodes, /*invert_permutation=*/true);
+  return Status::OK();
 }
 
 }  // namespace grappler
diff --git a/tensorflow/core/grappler/utils/topological_sort.h b/tensorflow/core/grappler/utils/topological_sort.h
index d4d8034ef577a0282dbce161aed8ba440bf248ab..f2c9bbfa4ebce373a4fa80f399ce3d2b59a576f4 100644
--- a/tensorflow/core/grappler/utils/topological_sort.h
+++ b/tensorflow/core/grappler/utils/topological_sort.h
@@ -17,12 +17,13 @@ limitations under the License.
 #define THIRD_PARTY_TENSORFLOW_CORE_GRAPPLER_UTILS_TOPOLOGICAL_SORT_H_
 
 #include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/lib/core/status.h"
 
 namespace tensorflow {
 namespace grappler {
 
 // Sort a graph in topological order.
-void TopologicalSort(GraphDef* graph);
+Status TopologicalSort(GraphDef* graph);
 
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/core/grappler/utils/topological_sort_test.cc b/tensorflow/core/grappler/utils/topological_sort_test.cc
index dc99cb1052ce9db3035401a2cd75e838281fb748..c96f15b0e8424d70e8dd1393cf254b52f69200d2 100644
--- a/tensorflow/core/grappler/utils/topological_sort_test.cc
+++ b/tensorflow/core/grappler/utils/topological_sort_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/grappler/utils/topological_sort.h"
 #include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/test.h"
 
@@ -51,7 +52,7 @@ TEST_F(TopologicalSortTest, NoLoop) {
   *graph.add_node() = CreateNode("5", {});
   *graph.add_node() = CreateNode("4", {});
 
-  TopologicalSort(&graph);
+  TF_EXPECT_OK(TopologicalSort(&graph));
   std::vector<string> order = {"5", "4", "2", "0", "3", "1"};
   for (int i = 0; i < order.size(); i++) {
     EXPECT_EQ(graph.node(i).name(), order[i]);
@@ -67,7 +68,7 @@ TEST_F(TopologicalSortTest, WithLoop) {
   *graph.add_node() = CreateNode("5", "NextIteration", {"4"});
   *graph.add_node() = CreateNode("1", {});
 
-  TopologicalSort(&graph);
+  TF_EXPECT_OK(TopologicalSort(&graph));
   std::vector<string> order = {"1", "2", "3", "4", "5"};
   for (int i = 0; i < order.size(); i++) {
     EXPECT_EQ(graph.node(i).name(), order[i]);
@@ -82,7 +83,7 @@ TEST_F(TopologicalSortTest, WithIllegalLoop) {
   *graph.add_node() = CreateNode("3", {"2"});
   *graph.add_node() = CreateNode("1", {});
 
-  TopologicalSort(&graph);
+  EXPECT_FALSE(TopologicalSort(&graph).ok());
   std::vector<string> order = {"2", "3", "1"};
   for (int i = 0; i < order.size(); i++) {
     EXPECT_EQ(graph.node(i).name(), order[i]);
@@ -94,13 +95,34 @@ TEST_F(TopologicalSortTest, DuplicatedInputs) {
   *graph.add_node() = CreateNode("2", {"1", "1"});
   *graph.add_node() = CreateNode("1", {});
 
-  TopologicalSort(&graph);
+  TF_EXPECT_OK(TopologicalSort(&graph));
   std::vector<string> order = {"1", "2"};
   for (int i = 0; i < order.size(); i++) {
     EXPECT_EQ(graph.node(i).name(), order[i]);
   }
 }
 
+TEST_F(TopologicalSortTest, Idempotent) {
+  GraphDef graph;
+  *graph.add_node() = CreateNode("1", {});
+  *graph.add_node() = CreateNode("2", {});
+  *graph.add_node() = CreateNode("3", {"1", "2"});
+  *graph.add_node() = CreateNode("4", {"1", "3"});
+  *graph.add_node() = CreateNode("5", {"2", "3"});
+
+  TF_EXPECT_OK(TopologicalSort(&graph));
+  std::vector<string> order = {"1", "2", "3", "4", "5"};
+  for (int i = 0; i < order.size(); i++) {
+    EXPECT_EQ(graph.node(i).name(), order[i]);
+  }
+
+  // Run topo sort again to verify that it is idenpotent.
+  TF_EXPECT_OK(TopologicalSort(&graph));
+  for (int i = 0; i < order.size(); i++) {
+    EXPECT_EQ(graph.node(i).name(), order[i]);
+  }
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 21411097e8432c90dc94ed12c57aac4dab4b3700..f40074f1afb4d854b7d88e4d91d97445f285895f 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -38,6 +38,7 @@ load(
     "tf_mkl_kernel_library",
     "cc_header_only_library",
     "if_not_windows",
+    "if_override_eigen_strong_inline",
 )
 load("@local_config_sycl//sycl:build_defs.bzl", "if_sycl")
 load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_test")
@@ -194,10 +195,9 @@ cc_library(
     ],
 )
 
-cc_library(
+tf_kernel_library(
     name = "fill_functor",
-    srcs = ["fill_functor.cc"],
-    hdrs = ["fill_functor.h"],
+    prefix = "fill_functor",
     deps = [
         "//tensorflow/core:framework",
         "//third_party/eigen3",
@@ -269,13 +269,11 @@ cc_library(
 cc_library(
     name = "conv_ops_gpu_hdrs",
     hdrs = ["conv_ops_gpu.h"],
-    deps = ["//third_party/eigen3"],
 )
 
 cc_library(
     name = "gpu_util_hdrs",
     hdrs = ["gpu_utils.h"],
-    deps = ["//third_party/eigen3"],
 )
 
 tf_cc_test(
@@ -589,6 +587,7 @@ cc_library(
         ":extract_image_patches_op",
         ":gather_nd_op",
         ":gather_op",
+        ":guarantee_const_op",
         ":identity_n_op",
         ":identity_op",
         ":inplace_ops",
@@ -606,6 +605,7 @@ cc_library(
         ":reverse_sequence_op",
         ":shape_ops",
         ":slice_op",
+        ":snapshot_op",
         ":split_op",
         ":split_v_op",
         ":strided_slice_op",
@@ -635,6 +635,12 @@ tf_kernel_library(
     deps = ARRAY_DEPS,
 )
 
+tf_kernel_library(
+    name = "guarantee_const_op",
+    prefix = "guarantee_const_op",
+    deps = ARRAY_DEPS,
+)
+
 tf_kernel_library(
     name = "constant_op",
     prefix = "constant_op",
@@ -796,6 +802,12 @@ tf_kernel_library(
     deps = ARRAY_DEPS + [":strided_slice_op"],
 )
 
+tf_kernel_library(
+    name = "snapshot_op",
+    prefix = "snapshot_op",
+    deps = ARRAY_DEPS,
+)
+
 tf_kernel_library(
     name = "split_op",
     gpu_srcs = ["cuda_device_array.h"],
@@ -1193,6 +1205,25 @@ tf_cuda_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "guarantee_const_op_test",
+    size = "small",
+    srcs = ["guarantee_const_op_test.cc"],
+    deps = [
+        ":guarantee_const_op",
+        ":ops_testutil",
+        ":ops_util",
+        ":variable_ops",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ],
+)
+
 tf_cc_test(
     name = "identity_op_test",
     size = "small",
@@ -1836,6 +1867,7 @@ tf_kernel_library(
     srcs = ["resource_variable_ops.cc"],
     deps = [
         ":bounds_check",
+        ":critical_section",
         ":dense_update_functor",
         ":gather_functor",
         ":scatter_functor",
@@ -1849,6 +1881,23 @@ tf_kernel_library(
     ],
 )
 
+tf_kernel_library(
+    name = "list_kernels",
+    srcs = ["list_kernels.cc"],
+    hdrs = ["list_kernels.h"],
+    gpu_srcs = [
+        "list_kernels.cu.cc",
+        "list_kernels.h",
+    ],
+    deps = [
+        ":concat_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:list_ops_op_lib",
+        "//third_party/eigen3",
+    ],
+)
+
 tf_kernel_library(
     name = "fact_op",
     prefix = "fact_op",
@@ -2298,6 +2347,7 @@ cc_library(
         ":determinant_op",
         ":matrix_exponential_op",
         ":matrix_inverse_op",
+        ":matrix_logarithm_op",
         ":matrix_solve_ls_op",
         ":matrix_solve_op",
         ":matrix_triangular_solve_op",
@@ -2369,6 +2419,12 @@ tf_kernel_library(
     deps = LINALG_DEPS,
 )
 
+tf_kernel_library(
+    name = "matrix_logarithm_op",
+    prefix = "matrix_logarithm_op",
+    deps = LINALG_DEPS,
+)
+
 tf_kernel_library(
     name = "self_adjoint_eig_op",
     prefix = "self_adjoint_eig_op",
@@ -3012,6 +3068,7 @@ tf_kernel_library(
         "//conditions:default": [],
     }),
     hdrs = [
+        "fill_functor.h",
         "conv_grad_ops.h",
         "deep_conv2d.h",
         "gemm_functors.h",
@@ -3020,6 +3077,10 @@ tf_kernel_library(
         ":xsmm": ["xsmm_conv2d.h"],
         "//conditions:default": [],
     }),
+    # Override EIGEN_STRONG_INLINE to inline when --define=override_eigen_strong_inline=true,
+    # So that it doesn't take 20 minutes to compile conv_grad_ops_3d.cc and conv_ops_3d.cc
+    # on Windows. See https://github.com/tensorflow/tensorflow/issues/10521
+    copts = if_override_eigen_strong_inline(["/DEIGEN_STRONG_INLINE=inline"]),
     defines = select({
         ":xsmm": [
             "TENSORFLOW_USE_LIBXSMM",
@@ -3036,6 +3097,7 @@ tf_kernel_library(
         ":conv_2d",
         ":conv_3d",
         ":image_resizer_state",
+        ":fill_functor",
         ":ops_util",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
@@ -3089,6 +3151,7 @@ cc_library(
         ":batch_norm_op",
         ":bias_op",
         ":conv_ops",
+        ":data_format_ops",
         ":depthwise_conv_grad_op",
         ":depthwise_conv_op",
         ":dilation_ops",
@@ -3126,6 +3189,12 @@ tf_kernel_library(
     deps = NN_DEPS,
 )
 
+tf_kernel_library(
+    name = "data_format_ops",
+    prefix = "data_format_ops",
+    deps = NN_DEPS,
+)
+
 tf_kernel_library(
     name = "bias_op",
     prefix = "bias_op",
@@ -3135,7 +3204,9 @@ tf_kernel_library(
 tf_kernel_library(
     name = "fused_batch_norm_op",
     prefix = "fused_batch_norm_op",
-    deps = NN_DEPS,
+    deps = NN_DEPS + [
+        ":fill_functor",
+    ],
 )
 
 tf_kernel_library(
@@ -3332,6 +3403,7 @@ tf_kernel_library(
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
         "//tensorflow/core:nn_ops_op_lib",
         "//third_party/eigen3",
     ],
@@ -3457,6 +3529,7 @@ tf_kernel_library(
 cc_library(
     name = "parsing",
     deps = [
+        ":decode_compressed_op",
         ":decode_csv_op",
         ":decode_raw_op",
         ":example_parsing_ops",
@@ -3485,6 +3558,14 @@ tf_kernel_library(
     deps = PARSING_DEPS,
 )
 
+tf_kernel_library(
+    name = "decode_compressed_op",
+    prefix = "decode_compressed_op",
+    deps = [
+        "//tensorflow/core:lib_internal",
+    ] + PARSING_DEPS,
+)
+
 tf_kernel_library(
     name = "example_parsing_ops",
     prefix = "example_parsing_ops",
@@ -3914,6 +3995,8 @@ tf_kernel_library(
         "scatter_nd_op_cpu_impl_3.cc",
         "scatter_nd_op_cpu_impl_4.cc",
         "scatter_nd_op_cpu_impl_5.cc",
+        "scatter_nd_op_cpu_impl_6.cc",
+        "scatter_nd_op_cpu_impl_7.cc",
     ],
     hdrs = [
         "scatter_nd_op.h",
@@ -3923,7 +4006,11 @@ tf_kernel_library(
         "scatter_nd_op.h",
         "scatter_nd_op_gpu.cu.cc",
     ],
-    deps = STATE_DEPS + [":dense_update_functor"],
+    deps = STATE_DEPS + [
+        ":dense_update_functor",
+        ":training_op_helpers",
+        ":variable_ops",
+    ],
 )
 
 tf_kernel_library(
@@ -3932,6 +4019,12 @@ tf_kernel_library(
     deps = STATE_DEPS,
 )
 
+tf_kernel_library(
+    name = "critical_section",
+    prefix = "critical_section",
+    deps = STATE_DEPS + [":captured_function"],
+)
+
 tf_cc_test(
     name = "scatter_op_test",
     size = "small",
@@ -4482,6 +4575,8 @@ filegroup(
         "gather_nd_op_cpu_impl_3.cc",
         "gather_nd_op_cpu_impl_4.cc",
         "gather_nd_op_cpu_impl_5.cc",
+        "gather_nd_op_cpu_impl_6.cc",
+        "gather_nd_op_cpu_impl_7.cc",
         "gather_op.cc",
         "identity_n_op.cc",
         "identity_n_op.h",
@@ -4569,6 +4664,7 @@ filegroup(
         "control_flow_ops.h",
         "conv_2d.h",
         "conv_ops.h",
+        "data_format_ops.h",
         "depthtospace_op.h",
         "depthwise_conv_op.h",
         "fake_quant_ops_functor.h",
@@ -4682,6 +4778,7 @@ filegroup(
         "cwise_op_squared_difference.cc",
         "cwise_op_sub.cc",
         "cwise_op_tanh.cc",
+        "data_format_ops.cc",
         "decode_wav_op.cc",
         "deep_conv2d.cc",
         "deep_conv2d.h",
@@ -4874,10 +4971,10 @@ filegroup(
             "summary_interface.*",
             "summary_kernels.*",
             "spectrogram_convert_test_data.cc",
-            "sql_dataset_ops.cc",
             # Excluded due to experimental status:
             "debug_ops.*",
             "scatter_nd_op*",
+            "critical_section.*",
         ],
     ),
     visibility = ["//visibility:public"],
@@ -5732,6 +5829,22 @@ tf_mkl_kernel_library(
     ]),
 )
 
+tf_mkl_kernel_library(
+    name = "mkl_softmax_op",
+    prefix = "mkl_softmax",
+    deps = [
+        ":bounds_check",
+        ":ops_util",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:nn_ops_op_lib",
+        "//third_party/mkl:intel_binary_blob",
+        "@mkl_dnn//:mkl_dnn",
+    ],
+)
+
 tf_mkl_kernel_library(
     name = "mkl_fused_batch_norm_op",
     srcs = ["mkl_fused_batch_norm_op.cc"],
@@ -5798,26 +5911,6 @@ tf_mkl_kernel_library(
     ],
 )
 
-cc_library(
-    name = "stats_aggregator",
-    hdrs = ["stats_aggregator.h"],
-    deps = [
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-    ],
-)
-
-tf_kernel_library(
-    name = "stats_aggregator_ops",
-    srcs = ["stats_aggregator_ops.cc"],
-    deps = [
-        ":stats_aggregator",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:protos_all_cc",
-    ],
-)
-
 cc_library(
     name = "batch_util",
     srcs = ["batch_util.cc"],
@@ -5828,509 +5921,36 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "dataset",
-    srcs = ["dataset.cc"],
-    hdrs = ["dataset.h"],
-    deps = [
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core/util/tensor_bundle",
-    ],
-)
-
-cc_library(
-    name = "dataset_utils",
-    srcs = ["dataset_utils.cc"],
-    hdrs = ["dataset_utils.h"],
-    deps = [
-        ":captured_function",
-        ":dataset",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
 cc_library(
     name = "captured_function",
-    srcs = ["captured_function.cc"],
     hdrs = ["captured_function.h"],
     deps = [
-        ":dataset",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-        "//tensorflow/core:proto_text",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:session_options",
-        "//tensorflow/core/kernels:variable_ops",
+        "//tensorflow/core/kernels/data:captured_function",
     ],
 )
 
 cc_library(
-    name = "window_dataset",
-    srcs = ["window_dataset.cc"],
-    hdrs = ["window_dataset.h"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "batch_dataset_op",
-    srcs = ["batch_dataset_op.cc"],
-    deps = [
-        ":batch_util",
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "padded_batch_dataset_op",
-    srcs = ["padded_batch_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "dense_to_sparse_batch_dataset_op",
-    srcs = ["dense_to_sparse_batch_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "group_by_window_dataset_op",
-    srcs = ["group_by_window_dataset_op.cc"],
-    deps = [
-        ":captured_function",
-        ":dataset",
-        ":window_dataset",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "filter_dataset_op",
-    srcs = ["filter_dataset_op.cc"],
-    deps = [
-        ":captured_function",
-        ":dataset",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "map_dataset_op",
-    srcs = ["map_dataset_op.cc"],
-    deps = [
-        ":captured_function",
-        ":dataset",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "map_and_batch_dataset_op",
-    srcs = ["map_and_batch_dataset_op.cc"],
-    deps = [
-        ":captured_function",
-        ":dataset",
-        ":inplace_ops",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "parallel_map_dataset_op",
-    srcs = ["parallel_map_dataset_op.cc"],
-    deps = [
-        ":captured_function",
-        ":dataset",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "scan_dataset_op",
-    srcs = ["scan_dataset_op.cc"],
-    deps = [
-        ":captured_function",
-        ":dataset",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "flat_map_dataset_op",
-    srcs = ["flat_map_dataset_op.cc"],
-    deps = [
-        ":captured_function",
-        ":dataset",
-        ":dataset_utils",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "interleave_dataset_op",
-    srcs = ["interleave_dataset_op.cc"],
-    deps = [
-        ":captured_function",
-        ":dataset",
-        ":dataset_utils",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "parallel_interleave_dataset_op",
-    srcs = ["parallel_interleave_dataset_op.cc"],
-    deps = [
-        ":captured_function",
-        ":dataset",
-        ":dataset_utils",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "prefetch_dataset_op",
-    srcs = ["prefetch_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-        "//tensorflow/core:protos_all_cc",
-    ],
-)
-
-tf_kernel_library(
-    name = "repeat_dataset_op",
-    srcs = ["repeat_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "take_dataset_op",
-    srcs = ["take_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "skip_dataset_op",
-    srcs = ["skip_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "ignore_errors_dataset_op",
-    srcs = ["ignore_errors_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "stats_dataset_ops",
-    srcs = ["stats_dataset_ops.cc"],
-    deps = [
-        ":dataset",
-        ":stats_aggregator",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "range_dataset_op",
-    srcs = ["range_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "shuffle_dataset_op",
-    srcs = ["shuffle_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "sparse_tensor_slice_dataset_op",
-    srcs = ["sparse_tensor_slice_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "tensor_dataset_op",
-    srcs = ["tensor_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "tensor_slice_dataset_op",
-    srcs = ["tensor_slice_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "zip_dataset_op",
-    srcs = ["zip_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "concatenate_dataset_op",
-    srcs = ["concatenate_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "reader_dataset_ops",
-    srcs = ["reader_dataset_ops.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
-tf_kernel_library(
-    name = "sql_dataset_ops",
-    srcs = [
-        "sql/driver_manager.cc",
-        "sql/sqlite_query_connection.cc",
-        "sql_dataset_ops.cc",
-    ],
-    hdrs = [
-        "sql/driver_manager.h",
-        "sql/query_connection.h",
-        "sql/sqlite_query_connection.h",
-    ],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-        "//tensorflow/core/lib/db:sqlite",
-        "@sqlite_archive//:sqlite",
-    ],
-)
-
-tf_kernel_library(
-    name = "iterator_ops",
-    srcs = ["iterator_ops.cc"],
-    deps = [
-        ":dataset",
-        ":ops_util",
-        ":stats_aggregator",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-        "//tensorflow/core:protos_all_cc",
-    ],
-)
-
-tf_kernel_library(
-    name = "cache_dataset_ops",
-    srcs = ["cache_dataset_ops.cc"],
+    name = "dataset",
+    hdrs = ["dataset.h"],
     deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-        "//tensorflow/core/util/tensor_bundle",
+        "//tensorflow/core/kernels/data:dataset",
     ],
 )
 
 tf_kernel_library(
     name = "dataset_ops",
     deps = [
-        ":batch_dataset_op",
-        ":cache_dataset_ops",
-        ":concatenate_dataset_op",
-        ":dense_to_sparse_batch_dataset_op",
-        ":filter_dataset_op",
-        ":flat_map_dataset_op",
-        ":group_by_window_dataset_op",
-        ":ignore_errors_dataset_op",
-        ":interleave_dataset_op",
-        ":iterator_ops",
-        ":map_and_batch_dataset_op",
-        ":map_dataset_op",
-        ":padded_batch_dataset_op",
-        ":parallel_interleave_dataset_op",
-        ":parallel_map_dataset_op",
-        ":prefetch_dataset_op",
-        ":range_dataset_op",
-        ":reader_dataset_ops",
-        ":repeat_dataset_op",
-        ":scan_dataset_op",
-        ":shuffle_dataset_op",
-        ":skip_dataset_op",
-        ":sparse_tensor_slice_dataset_op",
-        ":sql_dataset_ops",
-        ":stats_aggregator_ops",
-        ":stats_dataset_ops",
-        ":take_dataset_op",
-        ":tensor_dataset_op",
-        ":tensor_slice_dataset_op",
-        ":zip_dataset_op",
+        "//tensorflow/core/kernels/data:dataset_ops",
     ],
 )
 
 cc_library(
     name = "summary_interface",
-    srcs = ["summary_interface.cc"],
     hdrs = ["summary_interface.h"],
     deps = [
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:ptr_util",
-    ],
-)
-
-tf_cc_test(
-    name = "summary_interface_test",
-    srcs = ["summary_interface_test.cc"],
-    deps = [
-        ":summary_interface",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
     ],
 )
 
@@ -6338,8 +5958,9 @@ tf_kernel_library(
     name = "summary_kernels",
     srcs = ["summary_kernels.cc"],
     deps = [
-        ":summary_interface",
+        "//tensorflow/contrib/tensorboard/db:schema",
         "//tensorflow/contrib/tensorboard/db:summary_db_writer",
+        "//tensorflow/contrib/tensorboard/db:summary_file_writer",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
@@ -6362,3 +5983,31 @@ filegroup(
     ),
     visibility = ["//tensorflow:__subpackages__"],
 )
+
+# Library to link with when compiling the cwise_op kernels directly,
+# e.g. for selective registration.
+# should not be linked by projects that also link the cwise_op library.
+cc_library(
+    name = "cwise_lib",
+    srcs = [
+        "cwise_ops_common.cc",
+        "meta_support.cc",
+        "quantization_utils.cc",
+    ],
+    hdrs = [
+        "cwise_ops.h",
+        "cwise_ops_common.h",
+        "cwise_ops_gpu_common.cu.h",
+        "cwise_ops_gpu_gradients.cu.h",
+        "cwise_ops_gradients.h",
+        "meta_support.h",
+        "quantization_utils.h",
+    ],
+    deps = [
+        ":bounds_check",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//third_party/eigen3",
+        "@gemmlowp//:gemmlowp",
+    ],
+)
diff --git a/tensorflow/core/kernels/avgpooling_op.cc b/tensorflow/core/kernels/avgpooling_op.cc
index f9180236933d04d707eb1744de3993b9396b3dfa..ec9cbc2a9b5d4c1ac6d91913fc015e139fa2a068 100644
--- a/tensorflow/core/kernels/avgpooling_op.cc
+++ b/tensorflow/core/kernels/avgpooling_op.cc
@@ -56,7 +56,9 @@ class AvgPoolingOp : public UnaryOp<T> {
                 errors::InvalidArgument("Invalid data format"));
     OP_REQUIRES(
         context, data_format_ == FORMAT_NHWC,
-        errors::InvalidArgument("Default AvgPoolingOp only supports NHWC."));
+        errors::InvalidArgument("Default AvgPoolingOp only supports NHWC ",
+                                "on device type ",
+                                DeviceTypeString(context->device_type())));
     OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_));
     OP_REQUIRES(context, ksize_.size() == 4,
                 errors::InvalidArgument("Sliding window ksize field must "
@@ -211,9 +213,11 @@ class AvgPoolingGradOp : public OpKernel {
     OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
     OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
                 errors::InvalidArgument("Invalid data format"));
-    OP_REQUIRES(context, data_format_ == FORMAT_NHWC,
-                errors::InvalidArgument(
-                    "Default AvgPoolingGradOp only supports NHWC."));
+    OP_REQUIRES(
+        context, data_format_ == FORMAT_NHWC,
+        errors::InvalidArgument("Default AvgPoolingGradOp only supports NHWC ",
+                                "on device type ",
+                                DeviceTypeString(context->device_type())));
     OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_));
     OP_REQUIRES(context, ksize_.size() == 4,
                 errors::InvalidArgument("Sliding window ksize field must "
diff --git a/tensorflow/core/kernels/batch_util.cc b/tensorflow/core/kernels/batch_util.cc
index 298e15657961be9b899437373fb5baad28e5c73b..7f2df95e2d55ac93f8a934010244dcbd1dcd28c8 100644
--- a/tensorflow/core/kernels/batch_util.cc
+++ b/tensorflow/core/kernels/batch_util.cc
@@ -24,7 +24,21 @@ namespace batch_util {
 
 namespace {
 
-// Copies element into the index^th slice of parent (in the 0th dimension).
+Status ValidateInput(const Tensor& parent, const Tensor& element, int64 index) {
+  DCHECK_NE(parent.dim_size(0), 0);
+  DCHECK_GE(index, 0);
+  if (element.NumElements() != (parent.NumElements() / parent.dim_size(0))) {
+    TensorShape chip_shape = parent.shape();
+    chip_shape.RemoveDim(0);
+    return errors::Internal(
+        "ValidateInput Cannot perform copy: number of elements does not match. "
+        " Shapes are: [element]: ",
+        element.shape().DebugString(),
+        ", [parent slice]: ", chip_shape.DebugString());
+  }
+  return Status::OK();
+}
+
 template <typename T>
 Status HandleElementToSlice(Tensor element, Tensor* parent, int64 index,
                             bool /* can_move */) {
@@ -47,18 +61,22 @@ Status HandleElementToSlice<string>(Tensor element, Tensor* parent, int64 index,
   return Status::OK();
 }
 
+// TODO(jsimsa): Add HandleElementToSlice<variant> specialization that moves
+// the data when possible.
+
+template <typename T>
+static Status HandleSliceToElement(const Tensor& parent, Tensor* element,
+                                   int64 index) {
+  element->flat<T>() = parent.flat_outer_dims<T>().chip(index, 0);
+  return Status::OK();
+}
+
 }  // namespace
 
+// Copies element into the index^th slice of parent (in the 0th dimension).
 Status CopyElementToSlice(Tensor element, Tensor* parent, int64 index) {
-  if (element.NumElements() != (parent->NumElements() / parent->dim_size(0))) {
-    TensorShape chip_shape = parent->shape();
-    chip_shape.RemoveDim(0);
-    return errors::InvalidArgument(
-        "HandleElementToSlice Cannot copy slice: number of elements does "
-        "not match. Shapes are: [element]: ",
-        element.shape().DebugString(),
-        ", [parent slice]: ", chip_shape.DebugString());
-  }
+  TF_RETURN_IF_ERROR(ValidateInput(*parent, element, index));
+
   bool can_move = element.RefCountIsOne();
 #define HANDLE_TYPE(T)                                                \
   case DataTypeToEnum<T>::value: {                                    \
@@ -69,6 +87,7 @@ Status CopyElementToSlice(Tensor element, Tensor* parent, int64 index) {
   switch (element.dtype()) {
     TF_CALL_ALL_TYPES(HANDLE_TYPE);
     TF_CALL_QUANTIZED_TYPES(HANDLE_TYPE);
+    TF_CALL_variant(HANDLE_TYPE);
 #undef HANDLE_TYPE
     default:
       return errors::Unimplemented("CopyElementToSlice Unhandled data type: ",
@@ -76,5 +95,25 @@ Status CopyElementToSlice(Tensor element, Tensor* parent, int64 index) {
   }
 }
 
+// Copies the index^th slice of parent (in the 0th dimension) into element.
+Status CopySliceToElement(const Tensor& parent, Tensor* element, int64 index) {
+  TF_RETURN_IF_ERROR(ValidateInput(parent, *element, index));
+
+#define HANDLE_TYPE(T)                                      \
+  case DataTypeToEnum<T>::value: {                          \
+    return HandleSliceToElement<T>(parent, element, index); \
+  }
+
+  switch (parent.dtype()) {
+    TF_CALL_ALL_TYPES(HANDLE_TYPE);
+    TF_CALL_QUANTIZED_TYPES(HANDLE_TYPE);
+    TF_CALL_variant(HANDLE_TYPE);
+#undef HANDLE_TYPE
+    default:
+      return errors::Unimplemented("CopySliceToElement Unhandled data type: ",
+                                   element->dtype());
+  }
+}
+
 }  // namespace batch_util
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/batch_util.h b/tensorflow/core/kernels/batch_util.h
index 065011a699a5264ae06cdec3a42fde19be46e884..b066e2a5748e6c2e0a63ef7e27a528be99067b83 100644
--- a/tensorflow/core/kernels/batch_util.h
+++ b/tensorflow/core/kernels/batch_util.h
@@ -29,6 +29,9 @@ namespace batch_util {
 // for DT_STRING tensors.
 Status CopyElementToSlice(Tensor element, Tensor* parent, int64 index);
 
+// Copies the index^th slice of parent (in the 0th dimension) into element.
+Status CopySliceToElement(const Tensor& parent, Tensor* element, int64 index);
+
 }  // namespace batch_util
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/kernels/batching_util/BUILD b/tensorflow/core/kernels/batching_util/BUILD
new file mode 100644
index 0000000000000000000000000000000000000000..4397410a5cee839a70bde69f34ca72e31530565f
--- /dev/null
+++ b/tensorflow/core/kernels/batching_util/BUILD
@@ -0,0 +1,186 @@
+# Description: Utilities.
+
+package(
+    default_visibility = ["//tensorflow:internal"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+load("//tensorflow:tensorflow.bzl", "tf_cc_test")
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+            "**/google_*",
+        ],
+    ),
+)
+
+cc_library(
+    name = "periodic_function_dynamic",
+    srcs = ["periodic_function.cc"],
+    hdrs = ["periodic_function.h"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/core:protos_all_cc",
+    ],
+)
+
+cc_library(
+    name = "periodic_function",
+    visibility = ["//visibility:public"],
+    deps = [
+        ":periodic_function_dynamic",
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_cc_test(
+    name = "periodic_function_test",
+    srcs = ["periodic_function_test.cc"],
+    deps = [
+        ":fake_clock_env",
+        ":periodic_function_dynamic",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
+
+cc_library(
+    name = "batch_scheduler_hdrs",
+    hdrs = ["batch_scheduler.h"],
+    deps = [
+        "//tensorflow/core:framework_headers_lib",
+    ],
+)
+
+cc_library(
+    name = "batch_scheduler",
+    hdrs = ["batch_scheduler.h"],
+    deps = [
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_cc_test(
+    name = "batch_scheduler_test",
+    srcs = ["batch_scheduler_test.cc"],
+    deps = [
+        ":batch_scheduler",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
+
+cc_library(
+    name = "shared_batch_scheduler_hdrs",
+    hdrs = ["shared_batch_scheduler.h"],
+    deps = [
+        ":batch_scheduler_hdrs",
+        ":periodic_function_dynamic",
+        "//tensorflow/core:framework_headers_lib",
+    ],
+)
+
+cc_library(
+    name = "shared_batch_scheduler",
+    hdrs = ["shared_batch_scheduler.h"],
+    deps = [
+        ":batch_scheduler",
+        ":periodic_function_dynamic",
+        "//tensorflow/core:lib",
+    ],
+    alwayslink = 1,
+)
+
+tf_cc_test(
+    name = "shared_batch_scheduler_test",
+    srcs = ["shared_batch_scheduler_test.cc"],
+    deps = [
+        ":fake_clock_env",
+        ":shared_batch_scheduler",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
+
+cc_library(
+    name = "adaptive_shared_batch_scheduler",
+    hdrs = ["adaptive_shared_batch_scheduler.h"],
+    deps = [
+        ":batch_scheduler",
+        ":periodic_function_dynamic",
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_cc_test(
+    name = "adaptive_shared_batch_scheduler_test",
+    srcs = ["adaptive_shared_batch_scheduler_test.cc"],
+    tags = [
+        "local",
+        "manual",
+    ],
+    deps = [
+        ":adaptive_shared_batch_scheduler",
+        ":fake_clock_env",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
+
+cc_library(
+    name = "basic_batch_scheduler",
+    hdrs = ["basic_batch_scheduler.h"],
+    deps = [
+        ":shared_batch_scheduler",
+    ],
+)
+
+tf_cc_test(
+    name = "basic_batch_scheduler_test",
+    srcs = ["basic_batch_scheduler_test.cc"],
+    deps = [
+        ":basic_batch_scheduler",
+        ":batch_scheduler",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
+
+tf_cc_test(
+    name = "basic_batch_scheduler_benchmark",
+    srcs = ["basic_batch_scheduler_benchmark_test.cc"],
+    tags = [
+        "local",
+        "manual",
+    ],
+    deps = [
+        ":basic_batch_scheduler",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:tensorflow",
+        "//tensorflow/core:test",
+    ],
+)
+
+cc_library(
+    name = "fake_clock_env",
+    testonly = 1,
+    srcs = ["fake_clock_env.cc"],
+    hdrs = ["fake_clock_env.h"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow/core:lib",
+        "//tensorflow/core:tensorflow",
+    ],
+)
diff --git a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h
new file mode 100644
index 0000000000000000000000000000000000000000..ff8ebb349f66df63bb23f4985212240f69efc542
--- /dev/null
+++ b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h
@@ -0,0 +1,660 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_ADAPTIVE_SHARED_BATCH_SCHEDULER_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_ADAPTIVE_SHARED_BATCH_SCHEDULER_H_
+
+
+#include <algorithm>
+#include <functional>
+#include <memory>
+#include <queue>
+#include <random>
+#include <unordered_map>
+#include <vector>
+
+#include "tensorflow/core/kernels/batching_util/batch_scheduler.h"
+#include "tensorflow/core/kernels/batching_util/periodic_function.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/platform/cpu_info.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/thread_annotations.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+namespace serving {
+namespace internal {
+template <typename TaskType>
+class ASBSBatch;
+
+template <typename TaskType>
+class ASBSQueue;
+}  // namespace internal
+
+// EXPERIMENTAL: API MAY BE SUBJECTED TO SUDDEN CHANGES.
+//
+// Shared batch scheduler designed to minimize latency. The scheduler keeps
+// track of a number of queues (one per model or model version) which are
+// continuously enqueuing requests. The scheduler groups the requests into
+// batches which it periodically sends off for processing (see
+// shared_batch_scheduler.h for more details). The AdaptiveSharedBatchScheduler
+// prioritizes batches by age (i.e. the batch's oldest request) irrespective of
+// queue or batch size.
+//
+// The scheduling decision currently exists in two flavors, controlled by the
+// option use_in_flight_batches_implementation. It is expected that setting this
+// option to true will give universally better results; after a period of
+// testing to confirm, the old implementation will be removed.
+//
+// If use_in_flight_batches_implementation is set to true, the scheduler
+// limits the number of batches which can be processed concurrently.  If a new
+// batch is created, and the number of in flight batches is below the limit,
+// the next (i.e. oldest) batch is immediately scheduled.  Similarly, when a
+// batch finishes processing, the limit is rechecked, and another batch may be
+// scheduled.  To avoid the need to carefully tune the limit for workload,
+// model type, platform, etc, it is dynamically adjusted in order to provide the
+// lowest latency.
+//
+// If use_in_flight_batches_implementation is set to false, the scheduler will
+// process the oldest batch at an adjustable rate, regardless of batch size.
+// The user can provide feedback to help set this rate to achieve some goal
+// (i.e. minimize overall latency, limit cpu usage, etc). The rate (or rather,
+// the corresponding period) is adjusted each time a batch is processed, using
+// an exponentially weighted moving average to smooth noisy feedback:
+// ewma_feedback = ((N - 1) * ewma_feedback + feedback()) / N
+// period *= (1 + K * emwa_feedback)
+//
+// Some potential use cases:
+// Hardware Accelerators (GPUs & TPUs) - If some phase of batch processing
+//   involves serial processing by a device, from a latency perspective it is
+//   desirable to keep the device evenly loaded, avoiding the need to wait for
+//   the device to process prior batches.
+//   feedback = num_pending_on_device() - desired_pending.
+// CPU utilization - If the batch processing is cpu dominated, you can reap
+//   latency gains when underutilized by increasing the processing rate, but
+//   back the rate off when the load increases to avoid overload.
+//   feedback = cpu_rate() - desired_cpu_rate.
+
+template <typename TaskType>
+class AdaptiveSharedBatchScheduler
+    : public std::enable_shared_from_this<
+          AdaptiveSharedBatchScheduler<TaskType>> {
+ public:
+  ~AdaptiveSharedBatchScheduler() {
+    // Finish processing batches before destorying other class members.
+    batch_thread_pool_.reset();
+  }
+
+  struct Options {
+    // The name to use for the pool of batch threads.
+    string thread_pool_name = {"batch_threads"};
+    // Number of batch processing threads; equivalently the maximum number of
+    // concurrently running batches.
+    int64 num_batch_threads = port::NumSchedulableCPUs();
+    // The environment to use (typically only overridden by test code).
+    Env* env = Env::Default();
+    // Which implementation to use (described in class comments above).
+    bool use_in_flight_batches_implementation = false;
+    // Initial limit for number of batches being concurrently processed.
+    // Non-integer values correspond to probabilistic limits - i.e. a value of
+    // 3.2 results in an actual cap of 3 80% of the time, and 4 20% of the time.
+    double initial_in_flight_batches_limit = 3;
+    // Number of batches between adjustments of in_flight_batches_limit.  Larger
+    // numbers will give less noisy latency measurements, but will be less
+    // responsive to changes in workload.
+    int64 batches_to_average_over = 1000;
+
+    // TODO(kte): remove the rate based implementation and corresponding options
+    // below once testing confirms the superiority of the in flight batches
+    // implementation.
+    // Initial batch scheduling period in microseconds. Will be altered for
+    // non-zero rate_feedback.
+    double initial_scheduling_period_micros = 500;
+    // Minimum batch scheduling period in microseconds. Recommend setting this
+    // value greater than 0, otherwise it may take a while to recover from a
+    // sustained time of negative scheduling_period_feedback (which may occur
+    // under low load).
+    double min_scheduling_period_micros = 100;
+    // Maximum batch scheduling period in microseconds.
+    double max_scheduling_period_micros = 10000;
+    // Feedback function used to modify the scheduling period each time a batch
+    // is scheduled.  Should return values roughly O(1), with positive values
+    // resulting in an increased period.
+    std::function<double()> scheduling_period_feedback{[] { return 0.; }};
+    // To handle potentially noisy scheduling_period_feedback, the period is
+    // adjusted using an exponentially weighted moving average over the previous
+    // feedback_smoothing_batches batches.  Must be greater than 0.
+    int64 feedback_smoothing_batches = 10;
+  };
+
+  // Ownership is shared between the caller of Create() and any queues created
+  // via AddQueue().
+  static Status Create(
+      const Options& options,
+      std::shared_ptr<AdaptiveSharedBatchScheduler<TaskType>>* scheduler);
+
+  struct QueueOptions {
+    // Maximum size of each batch.
+    int max_batch_size = 1000;
+    // Maximum number of enqueued (i.e. non-scheduled) batches.
+    int max_enqueued_batches = 10;
+  };
+
+  using BatchProcessor = std::function<void(std::unique_ptr<Batch<TaskType>>)>;
+
+  // Adds queue (and its callback) to be managed by this scheduler.
+  Status AddQueue(const QueueOptions& options,
+                  BatchProcessor process_batch_callback,
+                  std::unique_ptr<BatchScheduler<TaskType>>* queue);
+
+  double in_flight_batches_limit() {
+    mutex_lock l(mu_);
+    return in_flight_batches_limit_;
+  }
+
+ private:
+  // access to AddBatch, RemoveQueue, GetEnv.
+  friend class internal::ASBSQueue<TaskType>;
+
+  explicit AdaptiveSharedBatchScheduler(const Options& options);
+
+  // Batch scheduling function which runs every scheduling_period_ microseconds.
+  // Only used when options_.use_in_flight_batches_implementation == false.
+  void ProcessOneBatch();
+
+  // Tracks processing latency and adjusts in_flight_batches_limit to minimize.
+  // Only used when options_.use_in_flight_batches_implementation == true.
+  void CallbackWrapper(const internal::ASBSBatch<TaskType>* batch,
+                       BatchProcessor callback);
+
+  // Schedules batch if in_flight_batches_limit_ is not met.
+  // Only used when options_.use_in_flight_batches_implementation == true.
+  void MaybeScheduleNextBatch() EXCLUSIVE_LOCKS_REQUIRED(mu_);
+
+  // Notifies scheduler of non-empty batch which is eligible for processing.
+  void AddBatch(const internal::ASBSBatch<TaskType>* batch);
+
+  // Removes queue from scheduler.
+  void RemoveQueue(const internal::ASBSQueue<TaskType>* queue);
+
+  Env* GetEnv() const { return options_.env; }
+
+  const Options options_;
+
+  struct BatchCompare {
+    bool operator()(const internal::ASBSBatch<TaskType>* a,
+                    const internal::ASBSBatch<TaskType>* b);
+  };
+
+  // Collection of batches added by AddBatch, ordered by age. Owned by scheduler
+  // until they are released for processing.
+  std::priority_queue<const internal::ASBSBatch<TaskType>*,
+                      std::vector<const internal::ASBSBatch<TaskType>*>,
+                      BatchCompare>
+      batches_ GUARDED_BY(mu_);
+
+  // Unowned queues and callbacks added by AddQueue.
+  std::unordered_map<const internal::ASBSQueue<TaskType>*, BatchProcessor>
+      queues_and_callbacks_ GUARDED_BY(mu_);
+
+  mutex mu_;
+
+  // Responsible for running ProcessOneBatch. PeriodicFunction was used in order
+  // to check for deletion so that the thread can be shut down.
+  // Only used when options_.use_in_flight_batches_implementation == false.
+  std::unique_ptr<PeriodicFunction> scheduling_thread_;
+
+  // Responsible for running the batch processing callbacks.
+  std::unique_ptr<thread::ThreadPool> batch_thread_pool_;
+
+  // Time interval in microseconds between successive ProcessOneBatch calls.
+  // Only used when options_.use_in_flight_batches_implementation == false.
+  double scheduling_period_;
+
+  // Exponentially weighted moving average of
+  // options_.scheduling_period_feedback() evaluated in each ProcessOneBatch
+  // call.
+  // Only used when options_.use_in_flight_batches_implementation == false.
+  double ewma_feedback_ = 0;
+
+  // Limit on number of batches which can be concurrently processed.
+  // Non-integer values correspond to probabilistic limits - i.e. a value of 3.2
+  // results in an actual cap of 3 80% of the time, and 4 20% of the time.
+  // Only used when options_.use_in_flight_batches_implementation == true.
+  double in_flight_batches_limit_ GUARDED_BY(mu_);
+
+  // Number of batches currently being processed.
+  // Only used when options_.use_in_flight_batches_implementation == true.
+  int64 in_flight_batches_ GUARDED_BY(mu_) = 0;
+
+  // RNG engine and distribution.
+  // Only used when options_.use_in_flight_batches_implementation == true.
+  std::default_random_engine rand_engine_;
+  std::uniform_real_distribution<double> rand_double_;
+
+  // Fields controlling the dynamic adjustment of in_flight_batches_limit_.
+  // Only used when options_.use_in_flight_batches_implementation == true.
+  // Number of batches since the last in_flight_batches_limit_ adjustment.
+  int64 batch_count_ GUARDED_BY(mu_) = 0;
+  // Sum of processing latency for batches counted by batch_count_.
+  int64 batch_latency_sum_ GUARDED_BY(mu_) = 0;
+  // Average batch latency for previous value of in_flight_batches_limit_.
+  double last_avg_latency_ms_ GUARDED_BY(mu_) = 0;
+  // Did last_avg_latency_ms_ decrease from the previous last_avg_latency_ms_?
+  bool last_latency_decreased_ GUARDED_BY(mu_) = false;
+  // Current direction (+-) to adjust in_flight_batches_limit_
+  int step_direction_ GUARDED_BY(mu_) = 1;
+  // Max adjustment size (as a fraction of in_flight_batches_limit_).
+  constexpr static double kMaxStepSizeMultiplier = 0.125;  // 1/8;
+  // Min adjustment size (as a fraction of in_flight_batches_limit_).
+  constexpr static double kMinStepSizeMultiplier = 0.0078125;  // 1/128
+  // Current adjustment size (as a fraction of in_flight_batches_limit_).
+  double step_size_multiplier_ GUARDED_BY(mu_) = kMaxStepSizeMultiplier;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(AdaptiveSharedBatchScheduler);
+};
+
+//////////////////////////////////////////////////////////
+// Implementation details follow. API users need not read.
+
+namespace internal {
+// Consolidates tasks into batches, passing them off to the
+// AdaptiveSharedBatchScheduler for processing.
+template <typename TaskType>
+class ASBSQueue : public BatchScheduler<TaskType> {
+ public:
+  using QueueOptions =
+      typename AdaptiveSharedBatchScheduler<TaskType>::QueueOptions;
+
+  ASBSQueue(std::shared_ptr<AdaptiveSharedBatchScheduler<TaskType>> scheduler,
+            const QueueOptions& options);
+
+  ~ASBSQueue() override;
+
+  // Adds task to current batch. Fails if the task size is larger than the batch
+  // size or if the current batch is full and this queue's number of outstanding
+  // batches is at its maximum.
+  Status Schedule(std::unique_ptr<TaskType>* task) override;
+
+  // Number of tasks waiting to be scheduled.
+  size_t NumEnqueuedTasks() const override;
+
+  // Number of size 1 tasks which could currently be scheduled without failing.
+  size_t SchedulingCapacity() const override;
+
+  // Notifies queue that a batch is about to be scheduled; the queue should not
+  // place any more tasks in this batch.
+  void ReleaseBatch(const ASBSBatch<TaskType>* batch);
+
+  size_t max_task_size() const override { return options_.max_batch_size; }
+
+ private:
+  std::shared_ptr<AdaptiveSharedBatchScheduler<TaskType>> scheduler_;
+  const QueueOptions options_;
+  // Owned by scheduler_.
+  ASBSBatch<TaskType>* current_batch_ GUARDED_BY(mu_) = nullptr;
+  int64 num_enqueued_batches_ GUARDED_BY(mu_) = 0;
+  int64 num_enqueued_tasks_ GUARDED_BY(mu_) = 0;
+  mutable mutex mu_;
+  TF_DISALLOW_COPY_AND_ASSIGN(ASBSQueue);
+};
+
+// Batch which remembers when and by whom it was created.
+template <typename TaskType>
+class ASBSBatch : public Batch<TaskType> {
+ public:
+  ASBSBatch(ASBSQueue<TaskType>* queue, int64 creation_time_micros)
+      : queue_(queue), creation_time_micros_(creation_time_micros) {}
+
+  ~ASBSBatch() override {}
+
+  ASBSQueue<TaskType>* queue() const { return queue_; }
+
+  int64 creation_time_micros() const { return creation_time_micros_; }
+
+ private:
+  ASBSQueue<TaskType>* queue_;
+  const int64 creation_time_micros_;
+  TF_DISALLOW_COPY_AND_ASSIGN(ASBSBatch);
+};
+}  // namespace internal
+
+// ---------------- AdaptiveSharedBatchScheduler ----------------
+
+template <typename TaskType>
+constexpr double AdaptiveSharedBatchScheduler<TaskType>::kMaxStepSizeMultiplier;
+
+template <typename TaskType>
+constexpr double AdaptiveSharedBatchScheduler<TaskType>::kMinStepSizeMultiplier;
+
+template <typename TaskType>
+Status AdaptiveSharedBatchScheduler<TaskType>::Create(
+    const Options& options,
+    std::shared_ptr<AdaptiveSharedBatchScheduler<TaskType>>* scheduler) {
+  if (options.num_batch_threads < 1) {
+    return errors::InvalidArgument("num_batch_threads must be positive; was ",
+                                   options.num_batch_threads);
+  }
+  if (options.min_scheduling_period_micros < 0) {
+    return errors::InvalidArgument(
+        "min_scheduling_period_micros must be >= 0; was ",
+        options.min_scheduling_period_micros);
+  }
+  if (options.min_scheduling_period_micros >
+      options.initial_scheduling_period_micros) {
+    return errors::InvalidArgument(
+        "initial_scheduling_period_micros (",
+        options.initial_scheduling_period_micros,
+        ") must be >= min_scheduling_period_micros (",
+        options.min_scheduling_period_micros, ")");
+  }
+  if (options.initial_scheduling_period_micros >
+      options.max_scheduling_period_micros) {
+    return errors::InvalidArgument(
+        "initial_scheduling_period_micros (",
+        options.initial_scheduling_period_micros,
+        ") must be <= max_scheduling_period_micros (",
+        options.max_scheduling_period_micros, ")");
+  }
+  if (options.feedback_smoothing_batches < 1) {
+    return errors::InvalidArgument(
+        "feedback_smoothing_batches must be positive; was ",
+        options.feedback_smoothing_batches);
+  }
+  if (options.initial_in_flight_batches_limit > options.num_batch_threads) {
+    return errors::InvalidArgument(
+        "initial_in_flight_batches_limit (",
+        options.initial_in_flight_batches_limit,
+        ") should not be larger than num_batch_threads (",
+        options.num_batch_threads, ")");
+  }
+  if (options.initial_in_flight_batches_limit < 1) {
+    return errors::InvalidArgument(
+        "initial_in_flight_batches_limit should be "
+        "greater than or equal to 1; was ",
+        options.initial_in_flight_batches_limit);
+  }
+  if (options.batches_to_average_over < 1) {
+    return errors::InvalidArgument(
+        "batches_to_average_over should be "
+        "greater than or equal to 1; was ",
+        options.batches_to_average_over);
+  }
+  scheduler->reset(new AdaptiveSharedBatchScheduler<TaskType>(options));
+  return Status::OK();
+}
+
+template <typename TaskType>
+AdaptiveSharedBatchScheduler<TaskType>::AdaptiveSharedBatchScheduler(
+    const Options& options)
+    : options_(options),
+      scheduling_period_(options.initial_scheduling_period_micros),
+      in_flight_batches_limit_(options.initial_in_flight_batches_limit),
+      rand_double_(0.0, 1.0) {
+  std::random_device device;
+  rand_engine_.seed(device());
+  PeriodicFunction::Options opts;
+  opts.thread_name_prefix = "scheduling_thread";
+  opts.env = GetEnv();
+  batch_thread_pool_.reset(new thread::ThreadPool(
+      GetEnv(), options.thread_pool_name, options.num_batch_threads));
+  if (!options.use_in_flight_batches_implementation) {
+    scheduling_thread_.reset(
+        new PeriodicFunction([this] { ProcessOneBatch(); }, 0, opts));
+  }
+}
+
+template <typename TaskType>
+Status AdaptiveSharedBatchScheduler<TaskType>::AddQueue(
+    const QueueOptions& options, BatchProcessor process_batch_callback,
+    std::unique_ptr<BatchScheduler<TaskType>>* queue) {
+  if (options.max_batch_size <= 0) {
+    return errors::InvalidArgument("max_batch_size must be positive; was ",
+                                   options.max_batch_size);
+  }
+  if (options.max_enqueued_batches <= 0) {
+    return errors::InvalidArgument(
+        "max_enqueued_batches must be positive; was ",
+        options.max_enqueued_batches);
+  }
+  internal::ASBSQueue<TaskType>* asbs_queue_raw;
+  queue->reset(asbs_queue_raw = new internal::ASBSQueue<TaskType>(
+                   this->shared_from_this(), options));
+  mutex_lock l(mu_);
+  queues_and_callbacks_[asbs_queue_raw] = process_batch_callback;
+  return Status::OK();
+}
+
+template <typename TaskType>
+void AdaptiveSharedBatchScheduler<TaskType>::AddBatch(
+    const internal::ASBSBatch<TaskType>* batch) {
+  mutex_lock l(mu_);
+  batches_.push(batch);
+  if (options_.use_in_flight_batches_implementation) {
+    MaybeScheduleNextBatch();
+  }
+}
+
+template <typename TaskType>
+void AdaptiveSharedBatchScheduler<TaskType>::RemoveQueue(
+    const internal::ASBSQueue<TaskType>* queue) {
+  mutex_lock l(mu_);
+  queues_and_callbacks_.erase(queue);
+}
+
+template <typename TaskType>
+void AdaptiveSharedBatchScheduler<TaskType>::MaybeScheduleNextBatch() {
+  if (batches_.empty() || in_flight_batches_ >= in_flight_batches_limit_)
+    return;
+  // Non-integer limit handled probabilistially.
+  if (in_flight_batches_limit_ - in_flight_batches_ < 1 &&
+      rand_double_(rand_engine_) >
+          (in_flight_batches_limit_ - in_flight_batches_))
+    return;
+  const internal::ASBSBatch<TaskType>* batch = batches_.top();
+  batches_.pop();
+  // Queue may destroy itself after ReleaseBatch is called.
+  batch->queue()->ReleaseBatch(batch);
+  batch_thread_pool_->Schedule(
+      std::bind(&AdaptiveSharedBatchScheduler<TaskType>::CallbackWrapper, this,
+                batch, queues_and_callbacks_[batch->queue()]));
+  in_flight_batches_++;
+}
+
+template <typename TaskType>
+void AdaptiveSharedBatchScheduler<TaskType>::CallbackWrapper(
+    const internal::ASBSBatch<TaskType>* batch,
+    AdaptiveSharedBatchScheduler<TaskType>::BatchProcessor callback) {
+  int64 start_time = batch->creation_time_micros();
+  callback(std::unique_ptr<Batch<TaskType>>(
+      const_cast<internal::ASBSBatch<TaskType>*>(batch)));
+  int64 end_time = GetEnv()->NowMicros();
+  mutex_lock l(mu_);
+  in_flight_batches_--;
+  batch_count_++;
+  batch_latency_sum_ += end_time - start_time;
+  // Occasionally adjust in_flight_batches_limit_ to minimize average latency.
+  // Although the optimal value may depend on the workload, the latency should
+  // be a simple convex function of in_flight_batches_limit_, allowing us to
+  // locate the global minimum relatively quickly.
+  if (batch_count_ == options_.batches_to_average_over) {
+    double current_avg_latency_ms = (batch_latency_sum_ / 1000.) / batch_count_;
+    bool current_latency_decreased =
+        current_avg_latency_ms < last_avg_latency_ms_;
+    if (current_latency_decreased) {
+      // If latency improvement was because we're moving in the correct
+      // direction, increase step_size so that we can get to the minimum faster.
+      // If latency improvement was due to backtracking from a previous failure,
+      // decrease step_size in order to refine our location.
+      step_size_multiplier_ *= (last_latency_decreased_ ? 2 : 0.5);
+      step_size_multiplier_ =
+          std::min(step_size_multiplier_, kMaxStepSizeMultiplier);
+      step_size_multiplier_ =
+          std::max(step_size_multiplier_, kMinStepSizeMultiplier);
+    } else {
+      // Return (nearly) to previous position and confirm that latency is better
+      // there before decreasing step size.
+      step_direction_ = -step_direction_;
+    }
+    in_flight_batches_limit_ +=
+        step_direction_ * in_flight_batches_limit_ * step_size_multiplier_;
+    in_flight_batches_limit_ =
+        std::min(in_flight_batches_limit_,
+                 static_cast<double>(options_.num_batch_threads));
+    in_flight_batches_limit_ = std::max(in_flight_batches_limit_, 1.0);
+    last_avg_latency_ms_ = current_avg_latency_ms;
+    last_latency_decreased_ = current_latency_decreased;
+    batch_count_ = 0;
+    batch_latency_sum_ = 0;
+  }
+  MaybeScheduleNextBatch();
+}
+
+template <typename TaskType>
+void AdaptiveSharedBatchScheduler<TaskType>::ProcessOneBatch() {
+  static const double kFeedbackMultiplier = .001;
+  const internal::ASBSBatch<TaskType>* batch = nullptr;
+  BatchProcessor callback;
+  const int64 start_time_micros = GetEnv()->NowMicros();
+  {
+    mutex_lock l(mu_);
+    if (!batches_.empty()) {
+      batch = batches_.top();
+      batches_.pop();
+      callback = queues_and_callbacks_[batch->queue()];
+    }
+  }
+  if (batch != nullptr) {
+    double feedback = options_.scheduling_period_feedback();
+    const int64 N = options_.feedback_smoothing_batches;
+    ewma_feedback_ = ((N - 1) * ewma_feedback_ + feedback) / N;
+    scheduling_period_ *= (1 + kFeedbackMultiplier * ewma_feedback_);
+    if (scheduling_period_ < options_.min_scheduling_period_micros) {
+      scheduling_period_ = options_.min_scheduling_period_micros;
+    } else if (scheduling_period_ > options_.max_scheduling_period_micros) {
+      scheduling_period_ = options_.max_scheduling_period_micros;
+    }
+    // Queue may destroy itself after ReleaseBatch is called.
+    batch->queue()->ReleaseBatch(batch);
+    batch_thread_pool_->Schedule([callback, batch] {
+      callback(std::unique_ptr<Batch<TaskType>>(
+          const_cast<internal::ASBSBatch<TaskType>*>(batch)));
+    });
+  }
+  const int64 sleep_time =
+      scheduling_period_ - (GetEnv()->NowMicros() - start_time_micros);
+  if (sleep_time > 0) {
+    GetEnv()->SleepForMicroseconds(sleep_time);
+  }
+}
+
+template <typename TaskType>
+bool AdaptiveSharedBatchScheduler<TaskType>::BatchCompare::operator()(
+    const internal::ASBSBatch<TaskType>* a,
+    const internal::ASBSBatch<TaskType>* b) {
+  return a->creation_time_micros() > b->creation_time_micros();
+}
+
+// ---------------- ASBSQueue ----------------
+
+namespace internal {
+template <typename TaskType>
+ASBSQueue<TaskType>::ASBSQueue(
+    std::shared_ptr<AdaptiveSharedBatchScheduler<TaskType>> scheduler,
+    const QueueOptions& options)
+    : scheduler_(scheduler), options_(options) {}
+
+template <typename TaskType>
+ASBSQueue<TaskType>::~ASBSQueue() {
+  // Wait until last batch has been scheduled.
+  const int kSleepMicros = 1000;
+  for (;;) {
+    {
+      mutex_lock l(mu_);
+      if (num_enqueued_batches_ == 0) {
+        break;
+      }
+    }
+    scheduler_->GetEnv()->SleepForMicroseconds(kSleepMicros);
+  }
+  scheduler_->RemoveQueue(this);
+}
+
+template <typename TaskType>
+Status ASBSQueue<TaskType>::Schedule(std::unique_ptr<TaskType>* task) {
+  ASBSBatch<TaskType>* new_batch = nullptr;
+  size_t size = (*task)->size();
+  if (size > options_.max_batch_size) {
+    return errors::InvalidArgument("Task size ", size,
+                                   " is larger than maximum batch size ",
+                                   options_.max_batch_size);
+  }
+  {
+    mutex_lock l(mu_);
+    // Current batch is full, create another if allowed.
+    if (current_batch_ &&
+        current_batch_->size() + size > options_.max_batch_size) {
+      if (num_enqueued_batches_ >= options_.max_enqueued_batches) {
+        return errors::Unavailable("The batch scheduling queue is full");
+      }
+      current_batch_->Close();
+      current_batch_ = nullptr;
+    }
+    if (!current_batch_) {
+      num_enqueued_batches_++;
+      current_batch_ = new_batch =
+          new ASBSBatch<TaskType>(this, scheduler_->GetEnv()->NowMicros());
+    }
+    current_batch_->AddTask(std::move(*task));
+    num_enqueued_tasks_++;
+  }
+  // AddBatch must be called outside of lock, since it may call ReleaseBatch.
+  if (new_batch != nullptr) scheduler_->AddBatch(new_batch);
+  return Status::OK();
+}
+
+template <typename TaskType>
+void ASBSQueue<TaskType>::ReleaseBatch(const ASBSBatch<TaskType>* batch) {
+  mutex_lock l(mu_);
+  num_enqueued_batches_--;
+  num_enqueued_tasks_ -= batch->num_tasks();
+  if (batch == current_batch_) {
+    current_batch_->Close();
+    current_batch_ = nullptr;
+  }
+}
+
+template <typename TaskType>
+size_t ASBSQueue<TaskType>::NumEnqueuedTasks() const {
+  mutex_lock l(mu_);
+  return num_enqueued_tasks_;
+}
+
+template <typename TaskType>
+size_t ASBSQueue<TaskType>::SchedulingCapacity() const {
+  mutex_lock l(mu_);
+  const int current_batch_capacity =
+      current_batch_ ? options_.max_batch_size - current_batch_->size() : 0;
+  const int spare_batches =
+      options_.max_enqueued_batches - num_enqueued_batches_;
+  return spare_batches * options_.max_batch_size + current_batch_capacity;
+}
+}  // namespace internal
+}  // namespace serving
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_ADAPTIVE_SHARED_BATCH_SCHEDULER_H_
diff --git a/tensorflow/contrib/batching/adaptive_shared_batch_scheduler_test.cc b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc
similarity index 78%
rename from tensorflow/contrib/batching/adaptive_shared_batch_scheduler_test.cc
rename to tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc
index a07cd6d834fa28904bf7748b16972cca217503c1..8ae8ca02eca20b5d1184e6e588f013d59d10464a 100644
--- a/tensorflow/contrib/batching/adaptive_shared_batch_scheduler_test.cc
+++ b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc
@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/batching/adaptive_shared_batch_scheduler.h"
+#include "tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h"
 
-#include "tensorflow/contrib/batching/test_util/fake_clock_env.h"
+#include "tensorflow/core/kernels/batching_util/fake_clock_env.h"
 #include "tensorflow/core/lib/core/notification.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/macros.h"
@@ -141,6 +141,16 @@ TEST(AdaptiveSharedBatchSchedulerTest, BadOptions) {
   options = Scheduler::Options();
   options.feedback_smoothing_batches = 0;
   EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok());
+  options = Scheduler::Options();
+  options.initial_in_flight_batches_limit = 0.5;
+  EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok());
+  options = Scheduler::Options();
+  options.num_batch_threads = 5;
+  options.initial_in_flight_batches_limit = 8;
+  EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok());
+  options = Scheduler::Options();
+  options.batches_to_average_over = -5;
+  EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok());
 }
 
 TEST(AdaptiveSharedBatchSchedulerTest, ObeysQueueOptions) {
@@ -186,6 +196,7 @@ TEST(AdaptiveSharedBatchSchedulerTest, ObeysQueueOptions) {
     queue_options.max_enqueued_batches = 2;
     TF_ASSERT_OK(
         scheduler->AddQueue(queue_options, queue_0_callback, &queue_0));
+    EXPECT_EQ(10, queue_0->max_task_size());
     queue_options.max_batch_size = 0;
     // Queue must have max_batch_size > 0.
     EXPECT_FALSE(
@@ -433,6 +444,106 @@ TEST(AdaptiveSharedBatchSchedulerTest, QueueCapacityInfo) {
   }
   stop_teardown.Notify();
 }
+
+TEST(AdaptiveSharedBatchSchedulerTest, InFlightBatchesImplementation) {
+  AdaptiveSharedBatchScheduler<FakeTask>::Options options;
+  options.use_in_flight_batches_implementation = true;
+  options.initial_in_flight_batches_limit = 2;
+  options.batches_to_average_over = 1000;
+  mutex mu;
+  int processed_batches = 0;
+  Notification finish_processing;
+  auto queue_callback = [&mu, &processed_batches, &finish_processing](
+                            std::unique_ptr<Batch<FakeTask>> batch) {
+    ASSERT_TRUE(batch->IsClosed());
+    EXPECT_GT(batch->num_tasks(), 0);
+    mu.lock();
+    int batch_num = ++processed_batches;
+    mu.unlock();
+    if (batch_num == 2) {
+      // Give third batch a chance to process if it's going to.
+      Env::Default()->SleepForMicroseconds(1000);
+      finish_processing.Notify();
+    }
+    if (batch_num == 3) {
+      ASSERT_TRUE(finish_processing.HasBeenNotified());
+    }
+    finish_processing.WaitForNotification();
+  };
+  std::shared_ptr<AdaptiveSharedBatchScheduler<FakeTask>> scheduler;
+  TF_ASSERT_OK(
+      AdaptiveSharedBatchScheduler<FakeTask>::Create(options, &scheduler));
+  std::unique_ptr<BatchScheduler<FakeTask>> queue;
+  TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue));
+
+  // Enqueue 3 batches.
+  for (int i = 0; i < 3; i++) {
+    TF_ASSERT_OK(ScheduleTask(100, queue.get()));
+  }
+}
+
+TEST(AdaptiveSharedBatchSchedulerTest, InFlightBatchesLimitTuning) {
+  test_util::FakeClockEnv env(Env::Default());
+  Notification start_teardown, stop_teardown;
+  std::unique_ptr<Thread> teardown_thread =
+      CreateFakeClockAdvancerThread(&env, &start_teardown, &stop_teardown);
+  {
+    AdaptiveSharedBatchScheduler<FakeTask>::Options options;
+    options.env = &env;
+    options.use_in_flight_batches_implementation = true;
+    options.initial_in_flight_batches_limit = 2;
+    options.batches_to_average_over = 1;
+    auto queue_callback = [&env](std::unique_ptr<Batch<FakeTask>> batch) {
+      ASSERT_TRUE(batch->IsClosed());
+      switch (batch->size()) {
+        case 0:
+          env.AdvanceByMicroseconds(10);
+          break;
+        case 1:
+          env.AdvanceByMicroseconds(15);
+          break;
+        case 2:
+          env.AdvanceByMicroseconds(10);
+          break;
+        case 3:
+          env.AdvanceByMicroseconds(11);
+          break;
+      }
+    };
+    std::shared_ptr<AdaptiveSharedBatchScheduler<FakeTask>> scheduler;
+    TF_ASSERT_OK(
+        AdaptiveSharedBatchScheduler<FakeTask>::Create(options, &scheduler));
+    std::unique_ptr<BatchScheduler<FakeTask>> queue;
+    TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue));
+
+    TF_ASSERT_OK(ScheduleTask(0, queue.get()));
+    double in_flight_batches_limit = 2;
+    while (scheduler->in_flight_batches_limit() == in_flight_batches_limit) {
+    }
+    // Initial direction will be negative.
+    EXPECT_LT(scheduler->in_flight_batches_limit(), in_flight_batches_limit);
+    in_flight_batches_limit = scheduler->in_flight_batches_limit();
+    TF_ASSERT_OK(ScheduleTask(1, queue.get()));
+    while (scheduler->in_flight_batches_limit() == in_flight_batches_limit) {
+    }
+    // Latency increased -> change direction.
+    EXPECT_GT(scheduler->in_flight_batches_limit(), in_flight_batches_limit);
+    in_flight_batches_limit = scheduler->in_flight_batches_limit();
+    TF_ASSERT_OK(ScheduleTask(2, queue.get()));
+    while (scheduler->in_flight_batches_limit() == in_flight_batches_limit) {
+    }
+    // Latency decreased -> keep going in same direction.
+    EXPECT_GT(scheduler->in_flight_batches_limit(), in_flight_batches_limit);
+    in_flight_batches_limit = scheduler->in_flight_batches_limit();
+    TF_ASSERT_OK(ScheduleTask(3, queue.get()));
+    while (scheduler->in_flight_batches_limit() == in_flight_batches_limit) {
+    }
+    // Latency increased -> change direction.
+    EXPECT_LT(scheduler->in_flight_batches_limit(), in_flight_batches_limit);
+    start_teardown.Notify();
+  }
+  stop_teardown.Notify();
+}
 }  // namespace anonymous
 }  // namespace serving
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/batching_util/basic_batch_scheduler.h b/tensorflow/core/kernels/batching_util/basic_batch_scheduler.h
new file mode 100644
index 0000000000000000000000000000000000000000..920797210079bf7ba095c4652fe952510664c47d
--- /dev/null
+++ b/tensorflow/core/kernels/batching_util/basic_batch_scheduler.h
@@ -0,0 +1,268 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_BASIC_BATCH_SCHEDULER_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_BASIC_BATCH_SCHEDULER_H_
+
+#include <stddef.h>
+#include <cstddef>
+#include <functional>
+#include <memory>
+#include <string>
+
+#include "tensorflow/core/kernels/batching_util/shared_batch_scheduler.h"
+
+namespace tensorflow {
+namespace serving {
+
+// A BatchScheduler implementation geared toward handling a single request type
+// running on a specific set of hardware resources. A typical scenario is one in
+// which all requests invoke the same machine-learned model on one GPU.
+//
+// If there are, say, two GPUs and two models each bound to one of the GPUs, one
+// could use two BasicBatchScheduler instances to schedule the two model/GPU
+// combinations independently. If multiple models must share a given GPU or
+// other hardware resource, consider using SharedBatchScheduler instead.
+//
+//
+// PARAMETERS AND BEHAVIOR:
+//
+// BasicBatchScheduler runs a fixed pool of threads, which it uses to process
+// batches of tasks. It enforces a maximum batch size, and enqueues a bounded
+// number of tasks. If the queue is nearly empty, such that a full batch cannot
+// be formed, when a thread becomes free, it anyway schedules a batch
+// immediately if a task has been in the queue for longer than a given timeout
+// parameter. If the timeout parameter is set to 0, then the batch threads will
+// always be kept busy (unless there are zero tasks waiting to be processed).
+//
+// For online serving, it is recommended to set the maximum number of enqueued
+// batches worth of tasks equal to the number of batch threads, which allows
+// enqueuing of enough tasks s.t. if every thread becomes available it can be
+// kept busy, but no more. For bulk processing jobs and throughput-oriented
+// benchmarks, you may want to set it much higher.
+//
+// When Schedule() is called, if the queue is full the call will fail with an
+// UNAVAILABLE error (after which the client may retry again later). If the call
+// succeeds, the maximum time the task will spend in the queue before being
+// placed in a batch and assigned to a thread for processing, is the greater of:
+//  - the maximum time to process ceil(max_enqueued_batches/num_batch_threads)
+//    (1 in the recommended configuration) batches of previously-submitted tasks
+//  - the configured timeout parameter (which can be 0, as mentioned above)
+//
+// Unlike StreamingBatchScheduler, when BasicBatchScheduler assigns a batch to a
+// thread, it closes the batch. The process-batch callback may assume that every
+// batch it receives is closed at the outset.
+//
+//
+// RECOMMENDED USE-CASES:
+//
+// BasicBatchScheduler is suitable for use-cases that feature a single kind of
+// request (e.g. a server performing inference with a single machine-learned
+// model, possibly evolving over time), with loose versioning semantics.
+// Concretely, the following conditions should hold:
+//
+//  A. All requests batched onto a given resource (e.g. a hardware accelerator,
+//     or a pool accelerators) are of the same type. For example, they all
+//     invoke the same machine-learned model.
+//
+//     These variations are permitted:
+//      - The model may reside in a single servable, or it may be spread across
+//        multiple servables that are used in unison (e.g. a vocabulary lookup
+//        table servable and a tensorflow session servable).
+//      - The model's servable(s) may be static, or they may evolve over time
+//        (successive servable versions).
+//      - Zero or more of the servables are used in the request thread; the rest
+//        are used in the batch thread. In our running example, the vocabulary
+//        lookups and tensorflow runs may both be performed in the batch thread,
+//        or alternatively the vocabulary lookup may occur in the request thread
+//        with only the tensorflow run performed in the batch thread.
+//
+//     In contrast, BasicBatchScheduler is not a good fit if the server
+//     hosts multiple distinct models running on a pool accelerators, with each
+//     request specifying which model it wants to use. BasicBatchScheduler
+//     has no facility to time-multiplex the batch threads across multiple
+//     models in a principled way. More basically, it cannot ensure that a given
+//     batch doesn't contain a mixture of requests for different models.
+//
+//  B. Requests do not specify a particular version of the servable(s) that must
+//     be used. Instead, each request is content to use the "latest" version.
+//
+//     BasicBatchScheduler does not constrain which requests get grouped
+//     together into a batch, so using this scheduler there is no way to achieve
+//     cohesion of versioned requests to version-specific batches.
+//
+//  C. No servable version coordination needs to be performed between the
+//     request threads and the batch threads. Often, servables are only used in
+//     the batch threads, in which case this condition trivially holds. If
+//     servables are used in both threads, then the use-case must tolerate
+//     version skew across the servables used in the two kinds of threads.
+//
+//
+// EXAMPLE USE-CASE FLOW:
+//
+// For such use-cases, request processing via BasicBatchScheduler generally
+// follows this flow (given for illustration; variations are possible):
+//  1. Optionally perform some pre-processing on each request in the request
+//     threads.
+//  2. Route the requests to the batch scheduler, as batching::Task objects.
+//     (Since all requests are of the same type and are not versioned, the
+//     scheduler is free to group them into batches arbitrarily.)
+//  3. Merge the requests into a single batched representation B.
+//  4. Obtain handles to the servable(s) needed to process B. The simplest
+//     approach is to obtain the latest version of each servable. Alternatively,
+//     if cross-servable consistency is required (e.g. the vocabulary lookup
+//     table's version number must match that of the tensorflow session),
+//     identify an appropriate version number and obtain the servable handles
+//     accordingly.
+//  5. Process B using the obtained servable handles, and split the result into
+//     individual per-request units.
+//  6. Perform any post-processing in the batch thread and/or request thread.
+//
+//
+// PERFORMANCE TUNING: See README.md.
+//
+template <typename TaskType>
+class BasicBatchScheduler : public BatchScheduler<TaskType> {
+ public:
+  // TODO(b/25089730): Tune defaults based on best practices as they develop.
+  // (Keep them mirrored to the ones in SharedBatchScheduler::QueueOptions and
+  // SharedBatchScheduler::Options.)
+  struct Options {
+    // The maximum size of each batch.
+    //
+    // The scheduler may form batches of any size between 1 and this number
+    // (inclusive). If there is a need to quantize the batch sizes, i.e. only
+    // submit batches whose size is in a small set of allowed sizes, that can be
+    // done by adding padding in the process-batch callback.
+    int max_batch_size = 1000;
+
+    // If a task has been enqueued for this amount of time (in microseconds),
+    // and a thread is available, the scheduler will immediately form a batch
+    // from enqueued tasks and assign the batch to the thread for processing,
+    // even if the batch's size is below 'max_batch_size'.
+    //
+    // This parameter offers a way to bound queue latency, so that a task isn't
+    // stuck in the queue indefinitely waiting for enough tasks to arrive to
+    // make a full batch. (The latency bound is given in the class documentation
+    // above.)
+    //
+    // The goal is to smooth out batch sizes under low request rates, and thus
+    // avoid latency spikes.
+    int64 batch_timeout_micros = 0;
+
+    // The name to use for the pool of batch threads.
+    string thread_pool_name = {"batch_threads"};
+
+    // The number of threads to use to process batches.
+    // Must be >= 1, and should be tuned carefully.
+    int num_batch_threads = port::NumSchedulableCPUs();
+
+    // The maximum allowable number of enqueued (accepted by Schedule() but
+    // not yet being processed on a batch thread) tasks in terms of batches.
+    // If this limit is reached, Schedule() will return an UNAVAILABLE error.
+    // See the class documentation above for guidelines on how to tune this
+    // parameter.
+    int max_enqueued_batches = 10;
+
+    // The following options are typically only overridden by test code.
+
+    // The environment to use.
+    Env* env = Env::Default();
+  };
+  static Status Create(const Options& options,
+                       std::function<void(std::unique_ptr<Batch<TaskType>>)>
+                           process_batch_callback,
+                       std::unique_ptr<BasicBatchScheduler>* scheduler);
+
+  ~BasicBatchScheduler() override = default;
+
+  Status Schedule(std::unique_ptr<TaskType>* task) override;
+  size_t NumEnqueuedTasks() const override;
+  size_t SchedulingCapacity() const override;
+
+  size_t max_task_size() const override {
+    return shared_scheduler_queue_->max_task_size();
+  }
+
+ private:
+  explicit BasicBatchScheduler(
+      std::unique_ptr<BatchScheduler<TaskType>> shared_scheduler_queue);
+
+  // This class is merely a thin wrapper around a SharedBatchScheduler with a
+  // single queue.
+  std::unique_ptr<BatchScheduler<TaskType>> shared_scheduler_queue_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(BasicBatchScheduler);
+};
+
+//////////
+// Implementation details follow. API users need not read.
+
+template <typename TaskType>
+Status BasicBatchScheduler<TaskType>::Create(
+    const Options& options,
+    std::function<void(std::unique_ptr<Batch<TaskType>>)>
+        process_batch_callback,
+    std::unique_ptr<BasicBatchScheduler>* scheduler) {
+  typename SharedBatchScheduler<TaskType>::Options shared_scheduler_options;
+  shared_scheduler_options.thread_pool_name = options.thread_pool_name;
+  shared_scheduler_options.num_batch_threads = options.num_batch_threads;
+  shared_scheduler_options.env = options.env;
+  std::shared_ptr<SharedBatchScheduler<TaskType>> shared_scheduler;
+  TF_RETURN_IF_ERROR(SharedBatchScheduler<TaskType>::Create(
+      shared_scheduler_options, &shared_scheduler));
+
+  typename SharedBatchScheduler<TaskType>::QueueOptions
+      shared_scheduler_queue_options;
+  shared_scheduler_queue_options.max_batch_size = options.max_batch_size;
+  shared_scheduler_queue_options.batch_timeout_micros =
+      options.batch_timeout_micros;
+  shared_scheduler_queue_options.max_enqueued_batches =
+      options.max_enqueued_batches;
+  std::unique_ptr<BatchScheduler<TaskType>> shared_scheduler_queue;
+  TF_RETURN_IF_ERROR(shared_scheduler->AddQueue(shared_scheduler_queue_options,
+                                                process_batch_callback,
+                                                &shared_scheduler_queue));
+
+  scheduler->reset(
+      new BasicBatchScheduler<TaskType>(std::move(shared_scheduler_queue)));
+  return Status::OK();
+}
+
+template <typename TaskType>
+Status BasicBatchScheduler<TaskType>::Schedule(
+    std::unique_ptr<TaskType>* task) {
+  return shared_scheduler_queue_->Schedule(task);
+}
+
+template <typename TaskType>
+size_t BasicBatchScheduler<TaskType>::NumEnqueuedTasks() const {
+  return shared_scheduler_queue_->NumEnqueuedTasks();
+}
+
+template <typename TaskType>
+size_t BasicBatchScheduler<TaskType>::SchedulingCapacity() const {
+  return shared_scheduler_queue_->SchedulingCapacity();
+}
+
+template <typename TaskType>
+BasicBatchScheduler<TaskType>::BasicBatchScheduler(
+    std::unique_ptr<BatchScheduler<TaskType>> shared_scheduler_queue)
+    : shared_scheduler_queue_(std::move(shared_scheduler_queue)) {}
+
+}  // namespace serving
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_BASIC_BATCH_SCHEDULER_H_
diff --git a/tensorflow/contrib/batching/basic_batch_scheduler_benchmark.cc b/tensorflow/core/kernels/batching_util/basic_batch_scheduler_benchmark_test.cc
similarity index 99%
rename from tensorflow/contrib/batching/basic_batch_scheduler_benchmark.cc
rename to tensorflow/core/kernels/batching_util/basic_batch_scheduler_benchmark_test.cc
index ab6c81043359cd10d90668fcf88d61a5e0ea7ee0..65c9c00da57b7a5a163e590df8b52cd130be6fe1 100644
--- a/tensorflow/contrib/batching/basic_batch_scheduler_benchmark.cc
+++ b/tensorflow/core/kernels/batching_util/basic_batch_scheduler_benchmark_test.cc
@@ -16,7 +16,7 @@ limitations under the License.
 // Benchmarks for performance (throughput and latency) of BasicBatchScheduler
 // under various rates of task injection.
 
-#include "tensorflow/contrib/batching/basic_batch_scheduler.h"
+#include "tensorflow/core/kernels/batching_util/basic_batch_scheduler.h"
 #include "tensorflow/core/lib/histogram/histogram.h"
 #include "tensorflow/core/platform/init_main.h"
 #include "tensorflow/core/platform/logging.h"
diff --git a/tensorflow/contrib/batching/basic_batch_scheduler_test.cc b/tensorflow/core/kernels/batching_util/basic_batch_scheduler_test.cc
similarity index 94%
rename from tensorflow/contrib/batching/basic_batch_scheduler_test.cc
rename to tensorflow/core/kernels/batching_util/basic_batch_scheduler_test.cc
index e020301795c7dadee2815c0e0d727e53e5fb9e6e..494ba0c74c3efcdcc414aa49334a596dd625052c 100644
--- a/tensorflow/contrib/batching/basic_batch_scheduler_test.cc
+++ b/tensorflow/core/kernels/batching_util/basic_batch_scheduler_test.cc
@@ -13,11 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/batching/basic_batch_scheduler.h"
+#include "tensorflow/core/kernels/batching_util/basic_batch_scheduler.h"
 
 #include <utility>
 
-#include "tensorflow/contrib/batching/batch_scheduler.h"
+#include "tensorflow/core/kernels/batching_util/batch_scheduler.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/macros.h"
@@ -73,6 +73,7 @@ TEST(BasicBatchSchedulerTest, Basic) {
     std::unique_ptr<BasicBatchScheduler<FakeTask>> scheduler;
     TF_ASSERT_OK(
         BasicBatchScheduler<FakeTask>::Create(options, callback, &scheduler));
+    EXPECT_EQ(10, scheduler->max_task_size());
     EXPECT_EQ(0, scheduler->NumEnqueuedTasks());
     EXPECT_EQ(3 * 10, scheduler->SchedulingCapacity());
     TF_ASSERT_OK(ScheduleTask(3, scheduler.get()));
diff --git a/tensorflow/core/kernels/batching_util/batch_scheduler.h b/tensorflow/core/kernels/batching_util/batch_scheduler.h
new file mode 100644
index 0000000000000000000000000000000000000000..a5316f152b19db2de239ff54dbca0858314d2a25
--- /dev/null
+++ b/tensorflow/core/kernels/batching_util/batch_scheduler.h
@@ -0,0 +1,281 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Abstractions for processing small tasks in a batched fashion, to reduce
+// processing times and costs that can be amortized across multiple tasks.
+//
+// The core class is BatchScheduler, which groups tasks into batches.
+//
+// BatchScheduler encapsulates logic for aggregating multiple tasks into a
+// batch, and kicking off processing of a batch on a thread pool it manages.
+//
+// This file defines an abstract BatchScheduler class.
+
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_BATCH_SCHEDULER_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_BATCH_SCHEDULER_H_
+
+#include <stddef.h>
+#include <algorithm>
+#include <functional>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "tensorflow/core/lib/core/notification.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/thread_annotations.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+namespace serving {
+
+// The abstract superclass for a unit of work to be done as part of a batch.
+//
+// An implementing subclass typically contains (or points to):
+//  (a) input data;
+//  (b) a thread-safe completion signal (e.g. a Notification);
+//  (c) a place to store the outcome (success, or some error), upon completion;
+//  (d) a place to store the output data, upon success.
+//
+// Items (b), (c) and (d) are typically non-owned pointers to data homed
+// elsewhere, because a task's ownership gets transferred to a BatchScheduler
+// (see below) and it may be deleted as soon as it is done executing.
+class BatchTask {
+ public:
+  virtual ~BatchTask() = default;
+
+  // Returns the size of the task, in terms of how much it contributes to the
+  // size of a batch. (A batch's size is the sum of its task sizes.)
+  virtual size_t size() const = 0;
+};
+
+// A thread-safe collection of BatchTasks, to be executed together in some
+// fashion.
+//
+// At a given time, a batch is either "open" or "closed": an open batch can
+// accept new tasks; a closed one cannot. A batch is monotonic: initially it is
+// open and tasks can be added to it; then it is closed and its set of tasks
+// remains fixed for the remainder of its life. A closed batch cannot be re-
+// opened. Tasks can never be removed from a batch.
+//
+// Type parameter TaskType must be a subclass of BatchTask.
+template <typename TaskType>
+class Batch {
+ public:
+  Batch() = default;
+  virtual ~Batch();  // Blocks until the batch is closed.
+
+  // Appends 'task' to the batch. After calling AddTask(), the newly-added task
+  // can be accessed via task(num_tasks()-1) or mutable_task(num_tasks()-1).
+  // Dies if the batch is closed.
+  void AddTask(std::unique_ptr<TaskType> task);
+
+  // Removes the most recently added task. Returns nullptr if the batch is
+  // empty.
+  std::unique_ptr<TaskType> RemoveTask();
+
+  // Returns the number of tasks in the batch.
+  int num_tasks() const;
+
+  // Returns true iff the batch contains 0 tasks.
+  bool empty() const;
+
+  // Returns a reference to the ith task (in terms of insertion order).
+  const TaskType& task(int i) const;
+
+  // Returns a pointer to the ith task (in terms of insertion order).
+  TaskType* mutable_task(int i);
+
+  // Returns the sum of the task sizes.
+  size_t size() const;
+
+  // Returns true iff the batch is currently closed.
+  bool IsClosed() const;
+
+  // Blocks until the batch is closed.
+  void WaitUntilClosed() const;
+
+  // Marks the batch as closed. Dies if called more than once.
+  void Close();
+
+ private:
+  mutable mutex mu_;
+
+  // The tasks in the batch.
+  std::vector<std::unique_ptr<TaskType>> tasks_ GUARDED_BY(mu_);
+
+  // The sum of the sizes of the tasks in 'tasks_'.
+  size_t size_ GUARDED_BY(mu_) = 0;
+
+  // Whether the batch has been closed.
+  Notification closed_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(Batch);
+};
+
+// An abstract batch scheduler class. Collects individual tasks into batches,
+// and processes each batch on a pool of "batch threads" that it manages. The
+// actual logic for processing a batch is accomplished via a callback.
+//
+// Type parameter TaskType must be a subclass of BatchTask.
+template <typename TaskType>
+class BatchScheduler {
+ public:
+  virtual ~BatchScheduler() = default;
+
+  // Submits a task to be processed as part of a batch.
+  //
+  // Ownership of '*task' is transferred to the callee iff the method returns
+  // Status::OK. In that case, '*task' is left as nullptr. Otherwise, '*task' is
+  // left as-is.
+  //
+  // If no batch processing capacity is available to process this task at the
+  // present time, and any task queue maintained by the implementing subclass is
+  // full, this method returns an UNAVAILABLE error code. The client may retry
+  // later.
+  //
+  // Other problems, such as the task size being larger than the maximum batch
+  // size, yield other, permanent error types.
+  //
+  // In all cases, this method returns "quickly" without blocking for any
+  // substantial amount of time. If the method returns Status::OK, the task is
+  // processed asynchronously, and any errors that occur during the processing
+  // of the batch that includes the task can be reported to 'task'.
+  virtual Status Schedule(std::unique_ptr<TaskType>* task) = 0;
+
+  // Returns the number of tasks that have been scheduled (i.e. accepted by
+  // Schedule()), but have yet to be handed to a thread for execution as part of
+  // a batch. Note that this returns the number of tasks, not the aggregate task
+  // size (so if there is one task of size 3 and one task of size 5, this method
+  // returns 2 rather than 8).
+  virtual size_t NumEnqueuedTasks() const = 0;
+
+  // Returns a guaranteed number of size 1 tasks that can be Schedule()d without
+  // getting an UNAVAILABLE error. In a typical implementation, returns the
+  // available space on a queue.
+  //
+  // There are two important caveats:
+  //  1. The guarantee does not extend to varying-size tasks due to possible
+  //     internal fragmentation of batches.
+  //  2. The guarantee only holds in a single-thread environment or critical
+  //     section, i.e. if an intervening thread cannot call Schedule().
+  //
+  // This method is useful for monitoring, or for guaranteeing a future slot in
+  // the schedule (but being mindful about the caveats listed above).
+  virtual size_t SchedulingCapacity() const = 0;
+
+  // Returns the maximum allowed size of tasks submitted to the scheduler. (This
+  // is typically equal to a configured maximum batch size.)
+  virtual size_t max_task_size() const = 0;
+};
+
+//////////
+// Implementation details follow. API users need not read.
+
+template <typename TaskType>
+Batch<TaskType>::~Batch() {
+  WaitUntilClosed();
+}
+
+template <typename TaskType>
+void Batch<TaskType>::AddTask(std::unique_ptr<TaskType> task) {
+  DCHECK(!IsClosed());
+  {
+    mutex_lock l(mu_);
+    size_ += task->size();
+    tasks_.push_back(std::move(task));
+  }
+}
+
+template <typename TaskType>
+std::unique_ptr<TaskType> Batch<TaskType>::RemoveTask() {
+  {
+    mutex_lock l(mu_);
+    if (tasks_.empty()) {
+      return nullptr;
+    }
+    std::unique_ptr<TaskType> task = std::move(tasks_.back());
+    size_ -= task->size();
+    tasks_.pop_back();
+    return task;
+  }
+}
+
+template <typename TaskType>
+int Batch<TaskType>::num_tasks() const {
+  {
+    mutex_lock l(mu_);
+    return tasks_.size();
+  }
+}
+
+template <typename TaskType>
+bool Batch<TaskType>::empty() const {
+  {
+    mutex_lock l(mu_);
+    return tasks_.empty();
+  }
+}
+
+template <typename TaskType>
+const TaskType& Batch<TaskType>::task(int i) const {
+  DCHECK_GE(i, 0);
+  {
+    mutex_lock l(mu_);
+    DCHECK_LT(i, tasks_.size());
+    return *tasks_[i].get();
+  }
+}
+
+template <typename TaskType>
+TaskType* Batch<TaskType>::mutable_task(int i) {
+  DCHECK_GE(i, 0);
+  {
+    mutex_lock l(mu_);
+    DCHECK_LT(i, tasks_.size());
+    return tasks_[i].get();
+  }
+}
+
+template <typename TaskType>
+size_t Batch<TaskType>::size() const {
+  {
+    mutex_lock l(mu_);
+    return size_;
+  }
+}
+
+template <typename TaskType>
+bool Batch<TaskType>::IsClosed() const {
+  return const_cast<Notification*>(&closed_)->HasBeenNotified();
+}
+
+template <typename TaskType>
+void Batch<TaskType>::WaitUntilClosed() const {
+  const_cast<Notification*>(&closed_)->WaitForNotification();
+}
+
+template <typename TaskType>
+void Batch<TaskType>::Close() {
+  closed_.Notify();
+}
+
+}  // namespace serving
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_BATCH_SCHEDULER_H_
diff --git a/tensorflow/contrib/batching/batch_scheduler_test.cc b/tensorflow/core/kernels/batching_util/batch_scheduler_test.cc
similarity index 96%
rename from tensorflow/contrib/batching/batch_scheduler_test.cc
rename to tensorflow/core/kernels/batching_util/batch_scheduler_test.cc
index f15d8cc8e57300dddc06dcffb24ec98920e193ef..2357a320a8a477460dce8ba3f30973a0af4a369f 100644
--- a/tensorflow/contrib/batching/batch_scheduler_test.cc
+++ b/tensorflow/core/kernels/batching_util/batch_scheduler_test.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/batching/batch_scheduler.h"
+#include "tensorflow/core/kernels/batching_util/batch_scheduler.h"
 
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/macros.h"
@@ -74,7 +74,9 @@ TEST(BatchTest, Basic) {
   EXPECT_EQ(task1->size(), batch.task(1).size());
 
   EXPECT_EQ(7, batch.RemoveTask()->size());
+  EXPECT_EQ(3, batch.size());
   EXPECT_EQ(3, batch.RemoveTask()->size());
+  EXPECT_EQ(0, batch.size());
   EXPECT_TRUE(batch.empty());
 }
 
diff --git a/tensorflow/contrib/batching/test_util/fake_clock_env.cc b/tensorflow/core/kernels/batching_util/fake_clock_env.cc
similarity index 97%
rename from tensorflow/contrib/batching/test_util/fake_clock_env.cc
rename to tensorflow/core/kernels/batching_util/fake_clock_env.cc
index 166d6703bde1054a4a44842ecea382b5a1fb79e7..6a757d871681051432ae737e710655b3285f2d24 100644
--- a/tensorflow/contrib/batching/test_util/fake_clock_env.cc
+++ b/tensorflow/core/kernels/batching_util/fake_clock_env.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/batching/test_util/fake_clock_env.h"
+#include "tensorflow/core/kernels/batching_util/fake_clock_env.h"
 
 #include <string>
 
diff --git a/tensorflow/core/kernels/batching_util/fake_clock_env.h b/tensorflow/core/kernels/batching_util/fake_clock_env.h
new file mode 100644
index 0000000000000000000000000000000000000000..b2848afe0741fc0a7d0cacce8d20bbb7ce027295
--- /dev/null
+++ b/tensorflow/core/kernels/batching_util/fake_clock_env.h
@@ -0,0 +1,76 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_FAKE_CLOCK_ENV_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_FAKE_CLOCK_ENV_H_
+
+#include <functional>
+#include <string>
+#include <vector>
+
+#include "tensorflow/core/lib/core/notification.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/thread_annotations.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+namespace serving {
+namespace test_util {
+
+// An Env implementation with a fake clock for NowMicros() and
+// SleepForMicroseconds(). The clock doesn't advance on its own; it advances via
+// an explicit Advance() method.
+// All other Env virtual methods pass through to a wrapped Env.
+class FakeClockEnv : public EnvWrapper {
+ public:
+  explicit FakeClockEnv(Env* wrapped);
+  ~FakeClockEnv() override = default;
+
+  // Advance the clock by a certain number of microseconds.
+  void AdvanceByMicroseconds(int micros);
+
+  // Blocks until there is a sleeping thread that is scheduled to wake up at
+  // the given (absolute) time.
+  void BlockUntilSleepingThread(uint64 wake_time);
+
+  // Blocks until there are at least num_threads sleeping.
+  void BlockUntilThreadsAsleep(int num_threads);
+
+  // Methods that this class implements.
+  uint64 NowMicros() override;
+  void SleepForMicroseconds(int64 micros) override;
+
+ private:
+  mutex mu_;
+
+  uint64 current_time_ GUARDED_BY(mu_) = 0;
+
+  struct SleepingThread {
+    uint64 wake_time;
+    Notification* wake_notification;
+  };
+  std::vector<SleepingThread> sleeping_threads_ GUARDED_BY(mu_);
+
+  TF_DISALLOW_COPY_AND_ASSIGN(FakeClockEnv);
+};
+
+}  // namespace test_util
+}  // namespace serving
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_FAKE_CLOCK_ENV_H_
diff --git a/tensorflow/contrib/batching/util/periodic_function.cc b/tensorflow/core/kernels/batching_util/periodic_function.cc
similarity index 98%
rename from tensorflow/contrib/batching/util/periodic_function.cc
rename to tensorflow/core/kernels/batching_util/periodic_function.cc
index b7e4838da50c2daf70a5b2c7b7f630caa0be96fa..9726d04ac27bb4b2258275426e9cbb6ce5994435 100644
--- a/tensorflow/contrib/batching/util/periodic_function.cc
+++ b/tensorflow/core/kernels/batching_util/periodic_function.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/batching/util/periodic_function.h"
+#include "tensorflow/core/kernels/batching_util/periodic_function.h"
 
 #include <algorithm>
 
diff --git a/tensorflow/core/kernels/batching_util/periodic_function.h b/tensorflow/core/kernels/batching_util/periodic_function.h
new file mode 100644
index 0000000000000000000000000000000000000000..6811cd015edfc02da70e979bdc9902b8b310c791
--- /dev/null
+++ b/tensorflow/core/kernels/batching_util/periodic_function.h
@@ -0,0 +1,135 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// PeriodicFunction will periodically call the given function with a specified
+// period in a background thread.  After Start() returns, the thread is
+// guaranteed to have started. The destruction of the class causes the
+// background thread to be destroyed as well.  Start() should not be called more
+// than once.
+//
+// PeriodicFunction runs the function as soon as any previous run both is
+// complete and was started more than "interval_micros" earlier.  Thus, runs are
+// both serialized, and normally have a period of "interval_micros" if no run
+// exceeds the time.
+//
+// Note that, if the function takes longer than two interval_micross to finish,
+// then PeriodicFunction will "skip" at least one call to the function.  For
+// instance, if the period is 50ms and the function starts runs at time 0 for
+// 150ms, then the function will immediately start executing again at time 150,
+// but there will be no function runs corresponding to times 50 or 100.  This is
+// especially important to remember when using an environment with a simulated
+// clock: advancing simulated time atomically over N interval_micross will not
+// cause the function to be called N times.
+//
+// This object is thread-safe.
+//
+// Example:
+//
+//   class Foo {
+//    public:
+//     Foo() : periodic_function_([this]() { Bar(); },
+//                               1000 /* 1000us == 1ms*/) {
+//     }
+//
+//    private:
+//     void Bar() { ... }
+//
+//     PeriodicFunction periodic_function_;
+//   };
+
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_PERIODIC_FUNCTION_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_PERIODIC_FUNCTION_H_
+
+
+#include "tensorflow/core/kernels/batching_util/periodic_function.h"
+
+#include <functional>
+#include <memory>
+#include <string>
+
+#include "tensorflow/core/lib/core/notification.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/thread_annotations.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+namespace serving {
+
+namespace internal {
+class PeriodicFunctionTestAccess;
+}
+
+class PeriodicFunction {
+ public:
+  // Provides the ability to customize several aspects of the PeriodicFunction.
+  // Passed to constructor of PeriodicFunction.
+  struct Options {
+    Options() {}
+
+    // Any standard thread options, such as stack size, should
+    // be passed via "thread_options".
+    ThreadOptions thread_options;
+
+    // Specifies the thread name prefix (see the description in class
+    // Thread).
+    string thread_name_prefix = "periodic_function";
+
+    // The environment to use. Does not take ownership, but must remain alive
+    // for as long as the PeriodicFunction exists.
+    Env* env = Env::Default();
+
+    // Specifies the length of sleep before the first invocation of the
+    // function.
+    // This can be used for adding a random jitter to avoid synchronous behavior
+    // across multiple periodic functions.
+    int64 startup_delay_micros = 0;
+  };
+
+  // Also starts the background thread which will be calling the function.
+  PeriodicFunction(const std::function<void()>& function, int64 interval_micros,
+                   const Options& options = Options());
+
+  ~PeriodicFunction();
+
+ private:
+  friend class internal::PeriodicFunctionTestAccess;
+
+  // Notifies the background thread to stop.
+  void NotifyStop();
+
+  // (Blocking.) Loops forever calling "function_" every "interval_micros_".
+  void RunLoop(int64 start) LOCKS_EXCLUDED(mutex_);
+
+  const std::function<void()> function_;  // Actual client function
+  const int64 interval_micros_;    // Interval between calls.
+  const Options options_;
+
+  // Protects state below.
+  mutable mutex mutex_;
+  // Used to notify the thread to stop.
+  Notification stop_thread_;
+
+  // Thread for running "function_"
+  std::unique_ptr<Thread> thread_ GUARDED_BY(mutex_) = nullptr;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(PeriodicFunction);
+};
+
+}  // namespace serving
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_PERIODIC_FUNCTION_H_
diff --git a/tensorflow/contrib/batching/util/periodic_function_test.cc b/tensorflow/core/kernels/batching_util/periodic_function_test.cc
similarity index 98%
rename from tensorflow/contrib/batching/util/periodic_function_test.cc
rename to tensorflow/core/kernels/batching_util/periodic_function_test.cc
index 15179611160e1962bbd28b03ddbaa2eec35eb8ea..7682f0e1dd1506d27cefbf9d3f913c4ac5cdb7cc 100644
--- a/tensorflow/contrib/batching/util/periodic_function_test.cc
+++ b/tensorflow/core/kernels/batching_util/periodic_function_test.cc
@@ -13,12 +13,12 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/batching/util/periodic_function.h"
+#include "tensorflow/core/kernels/batching_util/periodic_function.h"
 
 #include <memory>
 #include <string>
 
-#include "tensorflow/contrib/batching/test_util/fake_clock_env.h"
+#include "tensorflow/core/kernels/batching_util/fake_clock_env.h"
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/batching_util/shared_batch_scheduler.h b/tensorflow/core/kernels/batching_util/shared_batch_scheduler.h
new file mode 100644
index 0000000000000000000000000000000000000000..3736d8ef64d84a37c49814823d0e04db3a21ccfb
--- /dev/null
+++ b/tensorflow/core/kernels/batching_util/shared_batch_scheduler.h
@@ -0,0 +1,705 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_SHARED_BATCH_SCHEDULER_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_SHARED_BATCH_SCHEDULER_H_
+
+#include <stddef.h>
+#include <deque>
+#include <functional>
+#include <list>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "tensorflow/core/kernels/batching_util/batch_scheduler.h"
+#include "tensorflow/core/kernels/batching_util/periodic_function.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/cpu_info.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/thread_annotations.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+namespace serving {
+namespace internal {
+template <typename TaskType>
+class Queue;
+}  // namespace internal
+}  // namespace serving
+}  // namespace tensorflow
+
+namespace tensorflow {
+namespace serving {
+
+// A batch scheduler for server instances that service multiple request types
+// (e.g. multiple machine-learned models, or multiple versions of a model served
+// concurrently), or even multiple distinct tasks for a given request. The
+// scheduler multiplexes batches of different kinds of tasks onto a fixed-size
+// thread pool (each batch contains tasks of a single type), in a carefully
+// controlled manner. A common configuration is to set the number of threads
+// equal to the number of hardware accelerator units, in which case the
+// scheduler takes care of multiplexing the task types onto the shared hardware,
+// in a manner that is both fair and efficient.
+//
+// Semantically, SharedBatchScheduler behaves like having N instances of
+// BasicBatchScheduler (see basic_batch_scheduler.h), one per task type. The
+// difference is that under the covers there is a single shared thread pool,
+// instead of N independent ones, with their sharing deliberately coordinated.
+//
+// SharedBatchScheduler does not implement the BatchScheduler API; rather, it
+// presents an abstraction of "queues", where each queue corresponds to one type
+// of task. Tasks submitted to a given queue are placed in their own batches,
+// and cannot be mixed with other tasks. Queues can be added and deleted
+// dynamically, to accommodate e.g. versions of a model being brought up and
+// down over the lifetime of a server.
+//
+// The batch thread pool round-robins through the queues, running one batch
+// from a queue and then moving to the next queue. Each queue behaves like a
+// BasicBatchScheduler instance, in the sense that it has maximum batch size and
+// timeout parameters, which govern when a batch is eligible to be processed.
+//
+// Each queue is independently configured with a maximum size (in terms of the
+// maximum number of batches worth of enqueued tasks). For online serving, it is
+// recommended that the queue sizes be configured such that the sum of the sizes
+// of the active queues roughly equal the number of batch threads. (The idea is
+// that if all threads become available at roughly the same time, there will be
+// enough enqueued work for them to take on, but no more.)
+//
+// If queue sizes are configured in the manner suggested above, the maximum time
+// a task can spend in a queue before being placed in a batch and assigned to a
+// thread for processing, is the greater of:
+//  - the maximum time to process one batch of tasks from any active queue
+//  - the configured timeout parameter for the task's queue (which can be 0)
+//
+// For bulk processing jobs and throughput-oriented benchmarks, you may want to
+// set the maximum queue size to a large value.
+//
+// TODO(b/26539183): Support queue servicing policies other than round-robin.
+// E.g. let each queue specify a "share" (an int >= 1), so e.g. with queues A
+// and B having shares 1 and 2 respectively, the servicing pattern is ABBABB...
+//
+//
+// PERFORMANCE TUNING: See README.md.
+//
+template <typename TaskType>
+class SharedBatchScheduler
+    : public std::enable_shared_from_this<SharedBatchScheduler<TaskType>> {
+ public:
+  // TODO(b/25089730): Tune defaults based on best practices as they develop.
+  struct Options {
+    // The name to use for the pool of batch threads.
+    string thread_pool_name = {"batch_threads"};
+
+    // The number of threads to use to process batches.
+    // Must be >= 1, and should be tuned carefully.
+    int num_batch_threads = port::NumSchedulableCPUs();
+
+    // The environment to use.
+    // (Typically only overridden by test code.)
+    Env* env = Env::Default();
+  };
+  // Ownership is shared between the caller of Create() and any queues created
+  // via AddQueue().
+  static Status Create(
+      const Options& options,
+      std::shared_ptr<SharedBatchScheduler<TaskType>>* scheduler);
+
+  ~SharedBatchScheduler();
+
+  // Adds a queue to which tasks may be submitted. The returned queue implements
+  // the BatchScheduler API. Each queue has its own set of scheduling options,
+  // and its own callback to process batches of tasks submitted to the queue.
+  //
+  // The returned queue's destructor blocks until all tasks submitted to it have
+  // been processed.
+  struct QueueOptions {
+    // The maximum size of each batch.
+    //
+    // The scheduler may form batches of any size between 1 and this number
+    // (inclusive). If there is a need to quantize the batch sizes, i.e. only
+    // submit batches whose size is in a small set of allowed sizes, that can be
+    // done by adding padding in the process-batch callback.
+    int max_batch_size = 1000;
+
+    // If a task has been enqueued for this amount of time (in microseconds),
+    // and a thread is available, the scheduler will immediately form a batch
+    // from enqueued tasks and assign the batch to the thread for processing,
+    // even if the batch's size is below 'max_batch_size'.
+    //
+    // This parameter offers a way to bound queue latency, so that a task isn't
+    // stuck in the queue indefinitely waiting for enough tasks to arrive to
+    // make a full batch. (The latency bound is given in the class documentation
+    // above.)
+    //
+    // The goal is to smooth out batch sizes under low request rates, and thus
+    // avoid latency spikes.
+    int64 batch_timeout_micros = 0;
+
+    // The maximum allowable number of enqueued (accepted by Schedule() but
+    // not yet being processed on a batch thread) tasks in terms of batches.
+    // If this limit is reached, Schedule() will return an UNAVAILABLE error.
+    // See the class documentation above for guidelines on how to tune this
+    // parameter.
+    int max_enqueued_batches = 10;
+  };
+  Status AddQueue(const QueueOptions& options,
+                  std::function<void(std::unique_ptr<Batch<TaskType>>)>
+                      process_batch_callback,
+                  std::unique_ptr<BatchScheduler<TaskType>>* queue);
+
+ private:
+  explicit SharedBatchScheduler(const Options& options);
+
+  // The code executed in 'batch_threads_'. Obtains a batch to process from the
+  // queue pointed to by 'next_queue_to_schedule_', and processes it. If that
+  // queue declines to provide a batch to process, moves onto the next queue. If
+  // no queues provide a batch to process, just sleeps briefly and exits.
+  void ThreadLogic();
+
+  const Options options_;
+
+  mutex mu_;
+
+  // A list of queues. (We use std::list instead of std::vector to ensure that
+  // iterators are not invalidated by adding/removing elements. It also offers
+  // efficient removal of elements from the middle.)
+  using QueueList = std::list<std::unique_ptr<internal::Queue<TaskType>>>;
+
+  // All "active" queues, i.e. ones that either:
+  //  - have not been removed, or
+  //  - have been removed but are not yet empty.
+  QueueList queues_ GUARDED_BY(mu_);
+
+  // An iterator over 'queues_', pointing to the queue from which the next
+  // available batch thread should grab work.
+  typename QueueList::iterator next_queue_to_schedule_ GUARDED_BY(mu_);
+
+  // Used by idle batch threads to wait for work to enter the system. Notified
+  // whenever a batch becomes schedulable.
+  condition_variable schedulable_batch_cv_;
+
+  // Threads that process batches obtained from the queues.
+  std::vector<std::unique_ptr<PeriodicFunction>> batch_threads_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(SharedBatchScheduler);
+};
+
+//////////
+// Implementation details follow. API users need not read.
+
+namespace internal {
+
+// A task queue for SharedBatchScheduler. Accepts tasks and accumulates them
+// into batches, and dispenses those batches to be processed via a "pull"
+// interface. The queue's behavior is governed by maximum batch size, timeout
+// and maximum queue length parameters; see their documentation in
+// SharedBatchScheduler.
+//
+// The queue is implemented as a deque of batches, with these invariants:
+//  - The number of batches is between 1 and 'options_.max_enqueued_batches'.
+//  - The back-most batch is open; the rest are closed.
+//
+// Submitted tasks are added to the open batch. If that batch doesn't have room
+// but the queue isn't full, then that batch is closed and a new open batch is
+// started.
+//
+// Batch pull requests are handled by dequeuing the front-most batch if it is
+// closed. If the front-most batch is open (i.e. the queue contains only one
+// batch) and has reached the timeout, it is immediately closed and returned;
+// otherwise no batch is returned for the request.
+template <typename TaskType>
+class Queue {
+ public:
+  using ProcessBatchCallback =
+      std::function<void(std::unique_ptr<Batch<TaskType>>)>;
+  using SchedulableBatchCallback = std::function<void()>;
+  Queue(const typename SharedBatchScheduler<TaskType>::QueueOptions& options,
+        Env* env, ProcessBatchCallback process_batch_callback,
+        SchedulableBatchCallback schdulable_batch_callback);
+
+  // Illegal to destruct unless the queue is empty.
+  ~Queue();
+
+  // Submits a task to the queue, with the same semantics as
+  // BatchScheduler::Schedule().
+  Status Schedule(std::unique_ptr<TaskType>* task);
+
+  // Returns the number of enqueued tasks, with the same semantics as
+  // BatchScheduler::NumEnqueuedTasks().
+  size_t NumEnqueuedTasks() const;
+
+  // Returns the queue capacity, with the same semantics as
+  // BatchScheduler::SchedulingCapacity().
+  size_t SchedulingCapacity() const;
+
+  // Returns the maximum allowed size of tasks submitted to the queue.
+  size_t max_task_size() const { return options_.max_batch_size; }
+
+  // Called by a thread that is ready to process a batch, to request one from
+  // this queue. Either returns a batch that is ready to be processed, or
+  // nullptr if the queue declines to schedule a batch at this time. If it
+  // returns a batch, the batch is guaranteed to be closed.
+  std::unique_ptr<Batch<TaskType>> ScheduleBatch();
+
+  // Processes a batch that has been returned earlier by ScheduleBatch().
+  void ProcessBatch(std::unique_ptr<Batch<TaskType>> batch);
+
+  // Determines whether the queue is empty, i.e. has no tasks waiting or being
+  // processed.
+  bool IsEmpty() const;
+
+  // Marks the queue closed, and waits until it is empty.
+  void CloseAndWaitUntilEmpty();
+
+  bool closed() const {
+    mutex_lock l(mu_);
+    return closed_;
+  }
+
+ private:
+  // Same as IsEmpty(), but assumes the caller already holds a lock on 'mu_'.
+  bool IsEmptyInternal() const EXCLUSIVE_LOCKS_REQUIRED(mu_);
+
+  // Closes the open batch residing at the back of 'batches_', and inserts a
+  // fresh open batch behind it.
+  void StartNewBatch() EXCLUSIVE_LOCKS_REQUIRED(mu_);
+
+  // Determines whether the open batch residing at the back of 'batches_' is
+  // currently schedulable.
+  bool IsOpenBatchSchedulable() const EXCLUSIVE_LOCKS_REQUIRED(mu_);
+
+  const typename SharedBatchScheduler<TaskType>::QueueOptions options_;
+
+  // The environment to use.
+  Env* env_;
+
+  // A callback invoked to processes a batch of work units. Always invoked from
+  // a batch thread.
+  ProcessBatchCallback process_batch_callback_;
+
+  // A callback invoked to notify the scheduler that a new batch has become
+  // schedulable.
+  SchedulableBatchCallback schedulable_batch_callback_;
+
+  mutable mutex mu_;
+
+  // Whether this queue can accept new tasks. This variable is monotonic: it
+  // starts as false, and then at some point gets set to true and remains true
+  // for the duration of this object's life.
+  bool closed_ GUARDED_BY(mu_) = false;
+
+  // The enqueued batches. See the invariants in the class comments above.
+  std::deque<std::unique_ptr<Batch<TaskType>>> batches_ GUARDED_BY(mu_);
+
+  // The time at which the first task was added to the open (back-most) batch
+  // in 'batches_'. Valid iff that batch contains at least one task.
+  uint64 open_batch_start_time_micros_ GUARDED_BY(mu_);
+
+  // Whether this queue contains a batch that is eligible to be scheduled. Used
+  // to keep track of when to call 'schedulable_batch_callback_'.
+  bool schedulable_batch_ GUARDED_BY(mu_) = false;
+
+  // The number of batches currently being processed by batch threads.
+  // Incremented in ScheduleBatch() and decremented in ProcessBatch().
+  int num_batches_being_processed_ GUARDED_BY(mu_) = 0;
+
+  // Used by CloseAndWaitUntilEmpty() to wait until the queue is empty, for the
+  // case in which the queue is not empty when CloseAndWaitUntilEmpty() starts.
+  // When ProcessBatch() dequeues the last batch and makes the queue empty, if
+  // 'empty_notification_' is non-null it calls 'empty_notification_->Notify()'.
+  Notification* empty_notification_ GUARDED_BY(mu_) = nullptr;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(Queue);
+};
+
+// A RAII-style object that points to a Queue and implements
+// the BatchScheduler API. To be handed out to clients who call AddQueue().
+template <typename TaskType>
+class QueueHandle : public BatchScheduler<TaskType> {
+ public:
+  QueueHandle(std::shared_ptr<SharedBatchScheduler<TaskType>> scheduler,
+              Queue<TaskType>* queue);
+  ~QueueHandle() override;
+
+  Status Schedule(std::unique_ptr<TaskType>* task) override;
+  size_t NumEnqueuedTasks() const override;
+  size_t SchedulingCapacity() const override;
+
+  size_t max_task_size() const override { return queue_->max_task_size(); }
+
+ private:
+  // The scheduler that owns 'queue_'.
+  std::shared_ptr<SharedBatchScheduler<TaskType>> scheduler_;
+
+  // The queue this handle wraps. Owned by 'scheduler_', which keeps it alive at
+  // least until this class's destructor closes it.
+  Queue<TaskType>* queue_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(QueueHandle);
+};
+
+}  // namespace internal
+
+template <typename TaskType>
+Status SharedBatchScheduler<TaskType>::Create(
+    const Options& options,
+    std::shared_ptr<SharedBatchScheduler<TaskType>>* scheduler) {
+  if (options.num_batch_threads < 1) {
+    return errors::InvalidArgument("num_batch_threads must be positive; was ",
+                                   options.num_batch_threads);
+  }
+  scheduler->reset(new SharedBatchScheduler<TaskType>(options));
+  return Status::OK();
+}
+
+template <typename TaskType>
+SharedBatchScheduler<TaskType>::~SharedBatchScheduler() {
+  // Wait until the batch threads finish clearing out and deleting the closed
+  // queues.
+  for (;;) {
+    {
+      mutex_lock l(mu_);
+      if (queues_.empty()) {
+        break;
+      }
+    }
+    const int64 kSleepTimeMicros = 100;
+    options_.env->SleepForMicroseconds(kSleepTimeMicros);
+  }
+  // Delete the batch threads before allowing state the threads may access (e.g.
+  // 'mu_') to be deleted.
+  batch_threads_.clear();
+}
+
+template <typename TaskType>
+Status SharedBatchScheduler<TaskType>::AddQueue(
+    const QueueOptions& options,
+    std::function<void(std::unique_ptr<Batch<TaskType>>)>
+        process_batch_callback,
+    std::unique_ptr<BatchScheduler<TaskType>>* queue) {
+  if (options.max_batch_size <= 0) {
+    return errors::InvalidArgument("max_batch_size must be positive; was ",
+                                   options.max_batch_size);
+  }
+  if (options.batch_timeout_micros < 0) {
+    return errors::InvalidArgument(
+        "batch_timeout_micros must be non-negative; was ",
+        options.batch_timeout_micros);
+  }
+  if (options.max_enqueued_batches < 0) {
+    return errors::InvalidArgument(
+        "max_enqueued_batches must be non-negative; was ",
+        options.max_enqueued_batches);
+  }
+
+  auto schedulable_batch_callback = [this] {
+    mutex_lock l(mu_);
+    schedulable_batch_cv_.notify_one();
+  };
+  auto internal_queue =
+      std::unique_ptr<internal::Queue<TaskType>>(new internal::Queue<TaskType>(
+          options, options_.env, process_batch_callback,
+          schedulable_batch_callback));
+  auto handle = std::unique_ptr<BatchScheduler<TaskType>>(
+      new internal::QueueHandle<TaskType>(this->shared_from_this(),
+                                          internal_queue.get()));
+  {
+    mutex_lock l(mu_);
+    queues_.push_back(std::move(internal_queue));
+    if (next_queue_to_schedule_ == queues_.end()) {
+      next_queue_to_schedule_ = queues_.begin();
+    }
+  }
+  *queue = std::move(handle);
+  return Status::OK();
+}
+
+template <typename TaskType>
+SharedBatchScheduler<TaskType>::SharedBatchScheduler(const Options& options)
+    : options_(options), next_queue_to_schedule_(queues_.end()) {
+  // Kick off the batch threads.
+  PeriodicFunction::Options periodic_fn_options;
+  periodic_fn_options.thread_name_prefix =
+      strings::StrCat(options.thread_pool_name, "_");
+  for (int i = 0; i < options.num_batch_threads; ++i) {
+    std::unique_ptr<PeriodicFunction> thread(new PeriodicFunction(
+        [this] { this->ThreadLogic(); },
+        0 /* function invocation interval time */, periodic_fn_options));
+    batch_threads_.push_back(std::move(thread));
+  }
+}
+
+template <typename TaskType>
+void SharedBatchScheduler<TaskType>::ThreadLogic() {
+  // A batch to process next (or nullptr if no work to do).
+  std::unique_ptr<Batch<TaskType>> batch_to_process;
+  // The queue with which 'batch_to_process' is associated.
+  internal::Queue<TaskType>* queue_for_batch = nullptr;
+  {
+    mutex_lock l(mu_);
+
+    const int num_queues = queues_.size();
+    for (int num_queues_tried = 0;
+         batch_to_process == nullptr && num_queues_tried < num_queues;
+         ++num_queues_tried) {
+      DCHECK(next_queue_to_schedule_ != queues_.end());
+
+      // If a closed queue responds to ScheduleBatch() with nullptr, the queue
+      // will never yield any further batches so we can drop it. To avoid a
+      // race, we take a snapshot of the queue's closedness state *before*
+      // calling ScheduleBatch().
+      const bool queue_closed = (*next_queue_to_schedule_)->closed();
+
+      // Ask '*next_queue_to_schedule_' if it wants us to process a batch.
+      batch_to_process = (*next_queue_to_schedule_)->ScheduleBatch();
+      if (batch_to_process != nullptr) {
+        queue_for_batch = next_queue_to_schedule_->get();
+      }
+
+      // Advance 'next_queue_to_schedule_'.
+      if (queue_closed && (*next_queue_to_schedule_)->IsEmpty() &&
+          batch_to_process == nullptr) {
+        // We've encountered a closed queue with no work to do. Drop it.
+        DCHECK_NE(queue_for_batch, next_queue_to_schedule_->get());
+        next_queue_to_schedule_ = queues_.erase(next_queue_to_schedule_);
+      } else {
+        ++next_queue_to_schedule_;
+      }
+      if (next_queue_to_schedule_ == queues_.end() && !queues_.empty()) {
+        // We've hit the end. Wrap to the first queue.
+        next_queue_to_schedule_ = queues_.begin();
+      }
+    }
+
+    if (batch_to_process == nullptr) {
+      // We couldn't find any work to do. Wait until a new batch becomes
+      // schedulable, or some time has elapsed, before checking again.
+      const int64 kTimeoutMillis = 1;  // The smallest accepted granule of time.
+      WaitForMilliseconds(&l, &schedulable_batch_cv_, kTimeoutMillis);
+      return;
+    }
+  }
+
+  queue_for_batch->ProcessBatch(std::move(batch_to_process));
+}
+
+namespace internal {
+
+template <typename TaskType>
+Queue<TaskType>::Queue(
+    const typename SharedBatchScheduler<TaskType>::QueueOptions& options,
+    Env* env, ProcessBatchCallback process_batch_callback,
+    SchedulableBatchCallback schedulable_batch_callback)
+    : options_(options),
+      env_(env),
+      process_batch_callback_(process_batch_callback),
+      schedulable_batch_callback_(schedulable_batch_callback) {
+  // Create an initial, open batch.
+  batches_.emplace_back(new Batch<TaskType>);
+}
+
+template <typename TaskType>
+Queue<TaskType>::~Queue() {
+  mutex_lock l(mu_);
+  DCHECK(IsEmptyInternal());
+
+  // Close the (empty) open batch, so its destructor doesn't block.
+  batches_.back()->Close();
+}
+
+template <typename TaskType>
+Status Queue<TaskType>::Schedule(std::unique_ptr<TaskType>* task) {
+  if ((*task)->size() > options_.max_batch_size) {
+    return errors::InvalidArgument("Task size ", (*task)->size(),
+                                   " is larger than maximum batch size ",
+                                   options_.max_batch_size);
+  }
+
+  bool notify_of_schedulable_batch = false;
+  {
+    mutex_lock l(mu_);
+
+    DCHECK(!closed_);
+
+    if (batches_.back()->size() + (*task)->size() > options_.max_batch_size) {
+      if (batches_.size() >= options_.max_enqueued_batches) {
+        return errors::Unavailable(
+            "The batch scheduling queue to which this task was submitted is "
+            "full");
+      }
+      StartNewBatch();
+    }
+    if (batches_.back()->empty()) {
+      open_batch_start_time_micros_ = env_->NowMicros();
+    }
+    batches_.back()->AddTask(std::move(*task));
+
+    if (!schedulable_batch_) {
+      if (batches_.size() > 1 || IsOpenBatchSchedulable()) {
+        schedulable_batch_ = true;
+        notify_of_schedulable_batch = true;
+      }
+    }
+  }
+
+  if (notify_of_schedulable_batch) {
+    schedulable_batch_callback_();
+  }
+
+  return Status::OK();
+}
+
+template <typename TaskType>
+size_t Queue<TaskType>::NumEnqueuedTasks() const {
+  mutex_lock l(mu_);
+  size_t num_enqueued_tasks = 0;
+  for (const auto& batch : batches_) {
+    num_enqueued_tasks += batch->num_tasks();
+  }
+  return num_enqueued_tasks;
+}
+
+template <typename TaskType>
+size_t Queue<TaskType>::SchedulingCapacity() const {
+  mutex_lock l(mu_);
+  const int num_new_batches_schedulable =
+      options_.max_enqueued_batches - batches_.size();
+  const int open_batch_capacity =
+      options_.max_batch_size - batches_.back()->size();
+  return (num_new_batches_schedulable * options_.max_batch_size) +
+         open_batch_capacity;
+}
+
+template <typename TaskType>
+std::unique_ptr<Batch<TaskType>> Queue<TaskType>::ScheduleBatch() {
+  // The batch to schedule, which we may populate below. (If left as nullptr,
+  // that means we are electing not to schedule a batch at this time.)
+  std::unique_ptr<Batch<TaskType>> batch_to_schedule;
+
+  {
+    mutex_lock l(mu_);
+
+    // Consider closing the open batch at this time, to schedule it.
+    if (batches_.size() == 1 && IsOpenBatchSchedulable()) {
+      StartNewBatch();
+    }
+
+    if (batches_.size() >= 2) {
+      // There is at least one closed batch that is ready to be scheduled.
+      ++num_batches_being_processed_;
+      batch_to_schedule = std::move(batches_.front());
+      batches_.pop_front();
+    } else {
+      schedulable_batch_ = false;
+    }
+  }
+
+  return batch_to_schedule;
+}
+
+template <typename TaskType>
+void Queue<TaskType>::ProcessBatch(std::unique_ptr<Batch<TaskType>> batch) {
+  process_batch_callback_(std::move(batch));
+
+  {
+    mutex_lock l(mu_);
+    --num_batches_being_processed_;
+    if (empty_notification_ != nullptr && IsEmptyInternal()) {
+      empty_notification_->Notify();
+    }
+  }
+}
+
+template <typename TaskType>
+bool Queue<TaskType>::IsEmpty() const {
+  mutex_lock l(mu_);
+  return IsEmptyInternal();
+}
+
+template <typename TaskType>
+void Queue<TaskType>::CloseAndWaitUntilEmpty() {
+  Notification empty;
+  {
+    mutex_lock l(mu_);
+    closed_ = true;
+    if (IsEmptyInternal()) {
+      empty.Notify();
+    } else {
+      // Arrange for ProcessBatch() to notify when the queue becomes empty.
+      empty_notification_ = &empty;
+    }
+  }
+  empty.WaitForNotification();
+}
+
+template <typename TaskType>
+bool Queue<TaskType>::IsEmptyInternal() const {
+  return num_batches_being_processed_ == 0 && batches_.size() == 1 &&
+         batches_.back()->empty();
+}
+
+template <typename TaskType>
+void Queue<TaskType>::StartNewBatch() {
+  batches_.back()->Close();
+  batches_.emplace_back(new Batch<TaskType>);
+}
+
+template <typename TaskType>
+bool Queue<TaskType>::IsOpenBatchSchedulable() const {
+  Batch<TaskType>* open_batch = batches_.back().get();
+  if (open_batch->empty()) {
+    return false;
+  }
+  return closed_ || open_batch->size() >= options_.max_batch_size ||
+         env_->NowMicros() >=
+             open_batch_start_time_micros_ + options_.batch_timeout_micros;
+}
+
+template <typename TaskType>
+QueueHandle<TaskType>::QueueHandle(
+    std::shared_ptr<SharedBatchScheduler<TaskType>> scheduler,
+    Queue<TaskType>* queue)
+    : scheduler_(scheduler), queue_(queue) {}
+
+template <typename TaskType>
+QueueHandle<TaskType>::~QueueHandle() {
+  queue_->CloseAndWaitUntilEmpty();
+}
+
+template <typename TaskType>
+Status QueueHandle<TaskType>::Schedule(std::unique_ptr<TaskType>* task) {
+  return queue_->Schedule(task);
+}
+
+template <typename TaskType>
+size_t QueueHandle<TaskType>::NumEnqueuedTasks() const {
+  return queue_->NumEnqueuedTasks();
+}
+
+template <typename TaskType>
+size_t QueueHandle<TaskType>::SchedulingCapacity() const {
+  return queue_->SchedulingCapacity();
+}
+
+}  // namespace internal
+
+}  // namespace serving
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_SHARED_BATCH_SCHEDULER_H_
diff --git a/tensorflow/contrib/batching/shared_batch_scheduler_test.cc b/tensorflow/core/kernels/batching_util/shared_batch_scheduler_test.cc
similarity index 99%
rename from tensorflow/contrib/batching/shared_batch_scheduler_test.cc
rename to tensorflow/core/kernels/batching_util/shared_batch_scheduler_test.cc
index 3e924ae5f13519b4fe9a3f4b510773ca2bddaf23..d73dcf0fa0e1b2b387b3ed53acd63d5c65683fd4 100644
--- a/tensorflow/contrib/batching/shared_batch_scheduler_test.cc
+++ b/tensorflow/core/kernels/batching_util/shared_batch_scheduler_test.cc
@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/batching/shared_batch_scheduler.h"
+#include "tensorflow/core/kernels/batching_util/shared_batch_scheduler.h"
 
-#include "tensorflow/contrib/batching/test_util/fake_clock_env.h"
+#include "tensorflow/core/kernels/batching_util/fake_clock_env.h"
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/lib/core/notification.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
@@ -429,6 +429,7 @@ TEST(SharedBatchSchedulerTest, ConstMethods) {
     queue_options.max_enqueued_batches = max_enqueued_batches;
     std::unique_ptr<BatchScheduler<FakeTask>> queue;
     TF_ASSERT_OK(scheduler->AddQueue(queue_options, callback, &queue));
+    EXPECT_EQ(2, queue->max_task_size());
     EXPECT_EQ(0, queue->NumEnqueuedTasks());
     EXPECT_EQ(max_enqueued_batches * 2, queue->SchedulingCapacity());
 
diff --git a/tensorflow/core/kernels/bcast_ops.cc b/tensorflow/core/kernels/bcast_ops.cc
index 2ad2c4163649cb97f5a0b03dfeffa1b5fd53e208..7fc4b1762d0e56271bef586f0f8db0a2a66ff87d 100644
--- a/tensorflow/core/kernels/bcast_ops.cc
+++ b/tensorflow/core/kernels/bcast_ops.cc
@@ -22,11 +22,10 @@ limitations under the License.
 namespace tensorflow {
 
 // Given shapes of two tensors, computes the broadcast shape.
+template <typename T>
 class BCastArgsOp : public OpKernel {
  public:
-  explicit BCastArgsOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
-    OP_REQUIRES_OK(ctx, ctx->MatchSignature({DT_INT32, DT_INT32}, {DT_INT32}));
-  }
+  explicit BCastArgsOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
 
   void Compute(OpKernelContext* ctx) override {
     OP_REQUIRES(
@@ -40,7 +39,7 @@ class BCastArgsOp : public OpKernel {
                                           in.shape().DebugString()));
       BCast::Vec vec;
       for (int64 i = 0; i < in.NumElements(); ++i) {
-        vec.push_back(in.vec<int32>()(i));
+        vec.push_back(in.vec<T>()(i));
       }
       shapes.push_back(vec);
     }
@@ -60,7 +59,7 @@ class BCastArgsOp : public OpKernel {
     Tensor* o = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(idx, TensorShape({len}), &o));
     for (int64 i = 0; i < len; ++i) {
-      o->flat<int32>()(i) = static_cast<int32>(v[i]);
+      o->flat<T>()(i) = static_cast<T>(v[i]);
     }
   }
 
@@ -72,12 +71,10 @@ class BCastArgsOp : public OpKernel {
 //
 // TODO(zhifengc):
 //   1. Adds support for n-ary (n >= 2).
+template <typename T>
 class BCastGradArgsOp : public OpKernel {
  public:
-  explicit BCastGradArgsOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
-    OP_REQUIRES_OK(
-        ctx, ctx->MatchSignature({DT_INT32, DT_INT32}, {DT_INT32, DT_INT32}));
-  }
+  explicit BCastGradArgsOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
 
   void Compute(OpKernelContext* ctx) override {
     OP_REQUIRES(
@@ -91,7 +88,7 @@ class BCastGradArgsOp : public OpKernel {
                                           in.shape().DebugString()));
       BCast::Vec vec;
       for (int64 i = 0; i < in.NumElements(); ++i) {
-        vec.push_back(in.vec<int32>()(i));
+        vec.push_back(in.vec<T>()(i));
       }
       shapes.push_back(vec);
     }
@@ -112,7 +109,7 @@ class BCastGradArgsOp : public OpKernel {
     Tensor* o = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(idx, TensorShape({len}), &o));
     for (int64 i = 0; i < len; ++i) {
-      o->flat<int32>()(i) = static_cast<int32>(v[i]);
+      o->flat<T>()(i) = static_cast<T>(v[i]);
     }
   }
 
@@ -125,14 +122,28 @@ REGISTER_KERNEL_BUILDER(Name("BroadcastArgs")
                             .HostMemory("s0")
                             .HostMemory("s1")
                             .HostMemory("r0"),
-                        BCastArgsOp);
+                        BCastArgsOp<int32>);
+REGISTER_KERNEL_BUILDER(Name("BroadcastArgs")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<int64>("T")
+                            .HostMemory("s0")
+                            .HostMemory("s1")
+                            .HostMemory("r0"),
+                        BCastArgsOp<int64>);
 REGISTER_KERNEL_BUILDER(Name("BroadcastArgs")
                             .Device(DEVICE_GPU)
                             .TypeConstraint<int32>("T")
                             .HostMemory("s0")
                             .HostMemory("s1")
                             .HostMemory("r0"),
-                        BCastArgsOp);
+                        BCastArgsOp<int32>);
+REGISTER_KERNEL_BUILDER(Name("BroadcastArgs")
+                            .Device(DEVICE_GPU)
+                            .TypeConstraint<int64>("T")
+                            .HostMemory("s0")
+                            .HostMemory("s1")
+                            .HostMemory("r0"),
+                        BCastArgsOp<int64>);
 
 #if TENSORFLOW_USE_SYCL
 REGISTER_KERNEL_BUILDER(Name("BroadcastArgs")
@@ -141,7 +152,14 @@ REGISTER_KERNEL_BUILDER(Name("BroadcastArgs")
                             .HostMemory("s0")
                             .HostMemory("s1")
                             .HostMemory("r0"),
-                        BCastArgsOp);
+                        BCastArgsOp<int32>);
+REGISTER_KERNEL_BUILDER(Name("BroadcastArgs")
+                            .Device(DEVICE_SYCL)
+                            .TypeConstraint<int64>("T")
+                            .HostMemory("s0")
+                            .HostMemory("s1")
+                            .HostMemory("r0"),
+                        BCastArgsOp<int32>);
 #endif
 
 REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
@@ -151,7 +169,15 @@ REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
                             .HostMemory("s1")
                             .HostMemory("r0")
                             .HostMemory("r1"),
-                        BCastGradArgsOp);
+                        BCastGradArgsOp<int32>);
+REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<int64>("T")
+                            .HostMemory("s0")
+                            .HostMemory("s1")
+                            .HostMemory("r0")
+                            .HostMemory("r1"),
+                        BCastGradArgsOp<int64>);
 REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
                             .Device(DEVICE_GPU)
                             .TypeConstraint<int32>("T")
@@ -159,7 +185,15 @@ REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
                             .HostMemory("s1")
                             .HostMemory("r0")
                             .HostMemory("r1"),
-                        BCastGradArgsOp);
+                        BCastGradArgsOp<int32>);
+REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
+                            .Device(DEVICE_GPU)
+                            .TypeConstraint<int64>("T")
+                            .HostMemory("s0")
+                            .HostMemory("s1")
+                            .HostMemory("r0")
+                            .HostMemory("r1"),
+                        BCastGradArgsOp<int64>);
 
 #if TENSORFLOW_USE_SYCL
 REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
@@ -169,6 +203,14 @@ REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
                             .HostMemory("s1")
                             .HostMemory("r0")
                             .HostMemory("r1"),
-                        BCastGradArgsOp);
+                        BCastGradArgsOp<int32>);
+REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
+                            .Device(DEVICE_SYCL)
+                            .TypeConstraint<int64>("T")
+                            .HostMemory("s0")
+                            .HostMemory("s1")
+                            .HostMemory("r0")
+                            .HostMemory("r1"),
+                        BCastGradArgsOp<int64>);
 #endif
 }  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/bucketize_op.cc b/tensorflow/core/kernels/bucketize_op.cc
index c1693de53894228865af675746f8da13073574f8..4e4b6d52154cd1bacc621535f7dd9c56045a3c57 100644
--- a/tensorflow/core/kernels/bucketize_op.cc
+++ b/tensorflow/core/kernels/bucketize_op.cc
@@ -25,10 +25,8 @@ limitations under the License.
 
 namespace tensorflow {
 
-using thread::ThreadPool;
-
-typedef Eigen::ThreadPoolDevice CPUDevice;
-typedef Eigen::GpuDevice GPUDevice;
+using CPUDevice = Eigen::ThreadPoolDevice;
+using GPUDevice = Eigen::GpuDevice;
 
 namespace functor {
 
@@ -49,6 +47,7 @@ struct BucketizeFunctor<CPUDevice, T> {
     return Status::OK();
   }
 };
+
 }  // namespace functor
 
 template <typename Device, typename T>
diff --git a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc
index 325dee793b3eef4e045e2b3d5ad2f96dbf3943d8..551d77f4950d08e869c49cbc245c564a1050c047 100644
--- a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc
@@ -33,11 +33,28 @@ namespace tensorflow {
 
 typedef Eigen::GpuDevice GPUDevice;
 
-template <typename T>
+template <typename T, bool useSharedMem>
 __global__ void BucketizeCustomKernel(
     const int32 size_in, const T* in, const int32 size_boundaries,
     CudaDeviceArrayStruct<float> boundaries_array, int32* out) {
   const float* boundaries = GetCudaDeviceArrayOnDevice(&boundaries_array);
+
+  extern __shared__ __align__(sizeof(float)) unsigned char shared_mem[];
+  float* shared_mem_boundaries = reinterpret_cast<float*>(shared_mem);
+
+  if (useSharedMem) {
+    int32 lidx = threadIdx.y * blockDim.x + threadIdx.x;
+    int32 blockSize = blockDim.x * blockDim.y;
+
+    for (int32 i = lidx; i < size_boundaries; i += blockSize) {
+      shared_mem_boundaries[i] = boundaries[i];
+    }
+
+    __syncthreads();
+
+    boundaries = shared_mem_boundaries;
+  }
+
   CUDA_1D_KERNEL_LOOP(i, size_in) {
     T value = in[i];
     int32 bucket = 0;
@@ -77,11 +94,20 @@ struct BucketizeFunctor<GPUDevice, T> {
     TF_RETURN_IF_ERROR(boundaries_array.Finalize());
 
     CudaLaunchConfig config = GetCudaLaunchConfig(input.size(), d);
-    BucketizeCustomKernel<T>
-        <<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
-            input.size(), input.data(), boundaries_vector.size(),
-            boundaries_array.data(), output.data());
-
+    int32 shared_mem_size = sizeof(float) * boundaries_vector.size();
+    const int32 kMaxSharedMemBytes = 16384;
+    if (shared_mem_size < d.sharedMemPerBlock() &&
+        shared_mem_size < kMaxSharedMemBytes) {
+      BucketizeCustomKernel<T, true>
+          <<<config.block_count, config.thread_per_block, shared_mem_size,
+             d.stream()>>>(input.size(), input.data(), boundaries_vector.size(),
+                           boundaries_array.data(), output.data());
+    } else {
+      BucketizeCustomKernel<T, false>
+          <<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
+              input.size(), input.data(), boundaries_vector.size(),
+              boundaries_array.data(), output.data());
+    }
     return Status::OK();
   }
 };
diff --git a/tensorflow/core/kernels/captured_function.cc b/tensorflow/core/kernels/captured_function.cc
deleted file mode 100644
index 00cdc1eff2d3003cb55e868389033f8504e01588..0000000000000000000000000000000000000000
--- a/tensorflow/core/kernels/captured_function.cc
+++ /dev/null
@@ -1,196 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/core/kernels/captured_function.h"
-
-#include <utility>
-
-#include "tensorflow/core/common_runtime/threadpool_device.h"
-#include "tensorflow/core/framework/allocator.h"
-#include "tensorflow/core/framework/device_attributes.pb.h"
-#include "tensorflow/core/framework/lookup_interface.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/queue_interface.h"
-#include "tensorflow/core/framework/reader_interface.h"
-#include "tensorflow/core/framework/resource_handle.pb_text.h"
-#include "tensorflow/core/kernels/dataset.h"
-#include "tensorflow/core/kernels/variable_ops.h"
-#include "tensorflow/core/platform/notification.h"
-#include "tensorflow/core/public/session_options.h"
-
-
-namespace tensorflow {
-
-/* static */
-Status CapturedFunction::Create(
-    OpKernelContext* ctx, const NameAttrList& func, int graph_def_version,
-    std::vector<Tensor> captured_inputs,
-    std::unique_ptr<CapturedFunction>* out_function) {
-  // NOTE(mrry): We need to assign a name to the device, and we choose
-  // the same name as the calling context's device so that we do not
-  // need to rewrite resource handles that are found in `captured_inputs`.
-  Device* device =
-      new ThreadPoolDevice(SessionOptions(), ctx->device()->attributes().name(),
-                           Bytes(256 << 20), DeviceLocality(), cpu_allocator());
-
-// TODO(mrry): Handle arbitrary resource types, which might require a
-// redesign (or opening up access to `ResourceMgr::DoLookup()` and
-// `ResourceMgr::DoCreate()` to this code).
-#define HANDLE_RESOURCE_TYPE(ResourceType)                                     \
-  if (input_handle.hash_code() == MakeTypeIndex<ResourceType>().hash_code()) { \
-    ResourceType* resource;                                                    \
-    Status s = LookupResource(ctx, input_handle, &resource);                   \
-    if (errors::IsNotFound(s)) {                                               \
-      return errors::FailedPrecondition(                                       \
-          "Failed to capture resource named \"", input_handle.name(),          \
-          "\" in a dataset function. You may need to initialize it "           \
-          "explicitly before initializing an iterator that uses it.");         \
-    } else if (!s.ok()) {                                                      \
-      return s;                                                                \
-    }                                                                          \
-    ResourceType* already_created_resource;                                    \
-    /* Look up the resource in the this function's resource manager, in case   \
-     * it has already been created. */                                         \
-    s = device->resource_manager()->Lookup(input_handle.container(),           \
-                                           input_handle.name(),                \
-                                           &already_created_resource);         \
-    if (s.ok()) {                                                              \
-      CHECK_EQ(resource, already_created_resource);                            \
-      resource->Unref();                                                       \
-      already_created_resource->Unref();                                       \
-    } else {                                                                   \
-      if (errors::IsNotFound(s)) {                                             \
-        TF_RETURN_IF_ERROR(device->resource_manager()->Create(                 \
-            input_handle.container(), input_handle.name(), resource));         \
-      } else {                                                                 \
-        return s;                                                              \
-      }                                                                        \
-    }                                                                          \
-    continue;                                                                  \
-  }
-
-  for (size_t i = 0; i < captured_inputs.size(); ++i) {
-    if (captured_inputs[i].dtype() == DT_RESOURCE) {
-      // Extract the resource from `ctx->resource_manager()` and
-      // insert it into `device->resource_manager()` so that it can be
-      // used when the function executes.
-      ResourceHandle input_handle =
-          captured_inputs[i].scalar<ResourceHandle>()();
-      HANDLE_RESOURCE_TYPE(lookup::LookupInterface);
-      HANDLE_RESOURCE_TYPE(QueueInterface);
-      HANDLE_RESOURCE_TYPE(Var);
-      return errors::Unimplemented(
-          "Cannot currently capture resource '",
-          ProtoDebugString(input_handle),
-          "' in a dataset function (type not supported).");
-    }
-  }
-#undef HANDLE_RESOURCE_TYPE
-
-  std::unique_ptr<DeviceMgr> device_mgr(new DeviceMgr({device}));
-  std::unique_ptr<FunctionLibraryDefinition> flib_def(
-      new FunctionLibraryDefinition(
-          *ctx->function_library()->GetFunctionLibraryDefinition()));
-  std::unique_ptr<ProcessFunctionLibraryRuntime> pflr(
-      new ProcessFunctionLibraryRuntime(device_mgr.get(), ctx->env(),
-                                        graph_def_version, flib_def.get(),
-                                        {} /* TODO(mrry): OptimizerOptions? */,
-                                        nullptr /* TODO(mrry): ClusterFLR */));
-
-  FunctionLibraryRuntime* lib = pflr->GetFLR(device->name());
-
-  FunctionLibraryRuntime::Handle f_handle;
-  TF_RETURN_IF_ERROR(
-      lib->Instantiate(func.name(), AttrSlice(&func.attr()), &f_handle));
-
-  out_function->reset(new CapturedFunction(
-      device, std::move(device_mgr), std::move(flib_def), std::move(pflr), lib,
-      f_handle, std::move(captured_inputs)));
-  return Status::OK();
-}
-
-Status CapturedFunction::Run(FunctionLibraryRuntime::Options f_opts,
-                             gtl::ArraySlice<Tensor> args,
-                             std::vector<Tensor>* rets) {
-  Notification n;
-  Status s;
-  auto done_callback = [&n, &s](Status func_status) {
-    s.Update(func_status);
-    n.Notify();
-  };
-  // TODO(mrry): Add cancellation manager support to IteratorContext
-  // so that we can cancel running map functions. The local
-  // cancellation manager here is created so that we can run kernels
-  // (such as queue kernels) that depend on the non-nullness
-  // `OpKernelContext::cancellation_manager()`, but additional effort
-  // will be required to plumb it through the `IteratorContext`.
-  CancellationManager c_mgr;
-  f_opts.cancellation_manager = &c_mgr;
-  RunHelper(std::move(f_opts), args, rets, std::move(done_callback));
-  n.WaitForNotification();
-  return s;
-}
-
-void CapturedFunction::RunAsync(FunctionLibraryRuntime::Options f_opts,
-                                gtl::ArraySlice<Tensor> args,
-                                std::vector<Tensor>* rets,
-                                FunctionLibraryRuntime::DoneCallback done) {
-  auto c_mgr = new CancellationManager;
-  f_opts.cancellation_manager = c_mgr;
-  FunctionLibraryRuntime::DoneCallback wrapped_done = std::bind(
-      [c_mgr](FunctionLibraryRuntime::DoneCallback done,
-              // Begin unbound arguments.
-              Status s) {
-        delete c_mgr;
-        done(s);
-      },
-      std::move(done), std::placeholders::_1);
-  RunHelper(std::move(f_opts), args, rets, std::move(wrapped_done));
-}
-
-void CapturedFunction::RunHelper(FunctionLibraryRuntime::Options f_opts,
-                                 gtl::ArraySlice<Tensor> args,
-                                 std::vector<Tensor>* rets,
-                                 FunctionLibraryRuntime::DoneCallback done) {
-  // TODO(mrry): Implement a synchronous version of
-  // FunctionLibraryRuntime::Run() that avoids a context switch for small
-  // functions.
-  if (captured_inputs_.empty()) {
-    lib_->Run(f_opts, f_handle_, args, rets, std::move(done));
-  } else {
-    std::vector<Tensor> args_with_captured;
-    args_with_captured.reserve(args.size() + captured_inputs_.size());
-    args_with_captured.insert(args_with_captured.end(), args.begin(),
-                              args.end());
-    args_with_captured.insert(args_with_captured.end(),
-                              captured_inputs_.begin(), captured_inputs_.end());
-    lib_->Run(f_opts, f_handle_, args_with_captured, rets, std::move(done));
-  }
-}
-
-CapturedFunction::CapturedFunction(
-    Device* device, std::unique_ptr<DeviceMgr> device_mgr,
-    std::unique_ptr<FunctionLibraryDefinition> flib_def,
-    std::unique_ptr<ProcessFunctionLibraryRuntime> pflr,
-    FunctionLibraryRuntime* lib, FunctionLibraryRuntime::Handle f_handle,
-    std::vector<Tensor> captured_inputs)
-    : device_(device),
-      device_mgr_(std::move(device_mgr)),
-      flib_def_(std::move(flib_def)),
-      pflr_(std::move(pflr)),
-      lib_(lib),
-      f_handle_(f_handle),
-      captured_inputs_(std::move(captured_inputs)) {}
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/captured_function.h b/tensorflow/core/kernels/captured_function.h
index 9430127600a26df6cafd14022aa271e9e18ed78a..cdf191f4c768c2ed3bd15b0ff45fdfa27800653c 100644
--- a/tensorflow/core/kernels/captured_function.h
+++ b/tensorflow/core/kernels/captured_function.h
@@ -12,99 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_KERNELS_CAPTURED_FUNCTION_H_
-#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_KERNELS_CAPTURED_FUNCTION_H_
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_CAPTURED_FUNCTION_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_CAPTURED_FUNCTION_H_
 
-#include <memory>
-#include <vector>
+#include "tensorflow/core/kernels/data/captured_function.h"
 
-#include "tensorflow/core/common_runtime/function.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/array_slice.h"
-#include "tensorflow/core/lib/random/random.h"
-#include "tensorflow/core/platform/macros.h"
-
-namespace tensorflow {
-
-class Device;
-class OpKernelContext;
-class ResourceMgr;
-
-// A `CapturedFunction` encapsulates a TensorFlow function and all of
-// the runtime support required to execute it.
-//
-// The `Dataset`-related classes use `CapturedFunction` to execute
-// TensorFlow functions outside a the normal `OpKernel::Compute()`
-// context.
-//
-// NOTE(mrry): Here we are taking a conservative approach to dealing with
-// ownership of the various framework and runtime objects that are needed
-// to execute functions. We copy the function library *definition* (i.e.
-// a set of FunctionDefs) out of this kernel's context's function library
-// *runtime*, then we use that together with a specially-created
-// ThreadPoolDevice to build a new FunctionLibraryRuntime for the Dataset.
-//
-// We need to do this (or refactor the ownership of framework components
-// in each of the session implementations) to make it possible to close
-// down a ParallelMapDataset::Iterator when its session is closed.
-//
-// TODO(mrry): Clean this up. Investigate whether it would be possible to
-// reuse the session's FunctionLibraryRuntime(s) or Device(s).
-class CapturedFunction {
- public:
-  // NOTE(mrry): The `captured_inputs` are passed by value. For
-  // efficiency, you are recommended to move this argument into the call.
-  static Status Create(OpKernelContext* ctx, const NameAttrList& func,
-                       int graph_def_version,
-                       std::vector<Tensor> captured_inputs,
-                       std::unique_ptr<CapturedFunction>* out_function);
-
-  Status Run(FunctionLibraryRuntime::Options f_opts,
-             gtl::ArraySlice<Tensor> args, std::vector<Tensor>* rets);
-
-  void RunAsync(FunctionLibraryRuntime::Options f_opts,
-                gtl::ArraySlice<Tensor> args, std::vector<Tensor>* rets,
-                FunctionLibraryRuntime::DoneCallback done);
-
-  const Device* device() const { return device_; }
-
-  ResourceMgr* resource_manager() const { return device_->resource_manager(); }
-
-  const std::vector<Tensor>& captured_inputs() { return captured_inputs_; }
-
-  static int64 generate_step_id() {
-    // Choose a step ID that is guaranteed not to clash with any
-    // Session-generated step ID. DirectSession only generates
-    // non-negative step IDs (contiguous, starting from 0), and
-    // MasterSession generates 56-bit random step IDs whose MSB is
-    // always 0, so a negative random step ID should suffice.
-    return -std::abs(static_cast<int64>(random::New64()));
-  }
-
- private:
-  CapturedFunction(Device* device, std::unique_ptr<DeviceMgr> device_mgr,
-                   std::unique_ptr<FunctionLibraryDefinition> flib_def,
-                   std::unique_ptr<ProcessFunctionLibraryRuntime> pflr,
-                   FunctionLibraryRuntime* lib,
-                   FunctionLibraryRuntime::Handle f_handle,
-                   std::vector<Tensor> captured_inputs);
-
-  void RunHelper(FunctionLibraryRuntime::Options f_opts,
-                 gtl::ArraySlice<Tensor> args, std::vector<Tensor>* rets,
-                 FunctionLibraryRuntime::DoneCallback done);
-
-  Device* const device_;  // owned by device_mgr_.
-  const std::unique_ptr<DeviceMgr> device_mgr_;
-  const std::unique_ptr<FunctionLibraryDefinition> flib_def_;
-  const std::unique_ptr<ProcessFunctionLibraryRuntime> pflr_;
-  FunctionLibraryRuntime* const lib_;  // owned by pflr_.
-  const FunctionLibraryRuntime::Handle f_handle_;
-  const std::vector<Tensor> captured_inputs_;
-
-  TF_DISALLOW_COPY_AND_ASSIGN(CapturedFunction);
-};
-
-}  // namespace tensorflow
-
-#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_KERNELS_CAPTURED_FUNCTION_H_
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_CAPTURED_FUNCTION_H_
diff --git a/tensorflow/core/kernels/concat_lib.h b/tensorflow/core/kernels/concat_lib.h
index 14e6e1bc32455fd169cd71d74b42ab8f159738ab..526f9420d72fa25ff21bf60b3594649fa1faa0ba 100644
--- a/tensorflow/core/kernels/concat_lib.h
+++ b/tensorflow/core/kernels/concat_lib.h
@@ -23,6 +23,22 @@ limitations under the License.
 
 namespace tensorflow {
 
+// Functors to concatenate tensors. These always take a rank-2 tensor (i.e a
+// matrix) and concatenate it along the axis 1 ("putting them next to each
+// other" as opposed to "putting them on top of one another").
+//
+// Any concatenation of n-dimensional tensors across any axis can be reduced to
+// a concatenation of two-dimensional tensors across the axis 1 by first
+// partitioning the axes of the original tensors into those less than the axis
+// to be concatenated across and the rest. Then reshape the tensors into a
+// two-dimensional tensor by collapsing these two sets of axes and concatenate
+// the resulting matrices across the axis 1, finally reshaping the result to
+// have the proper shape.
+//
+// So, for example, when stacking N tensors, reshape each to have shape
+// {1, Numelements} and reshape the result matrix to have shape
+// {1, N * NumElements} before passing it to this functor.
+
 // Assumes all inputs are nonempty
 template <typename T>
 void ConcatCPU(DeviceBase* d,
diff --git a/tensorflow/core/kernels/concat_lib_cpu.cc b/tensorflow/core/kernels/concat_lib_cpu.cc
index b0bec0c5dcd30f4a630cd927e6ea922105249676..43731114c0b9a87598da19466c0fd9c7e05644bb 100644
--- a/tensorflow/core/kernels/concat_lib_cpu.cc
+++ b/tensorflow/core/kernels/concat_lib_cpu.cc
@@ -72,13 +72,14 @@ REGISTER(qint8)
 REGISTER(quint16)
 REGISTER(qint16)
 REGISTER(qint32)
-REGISTER(bfloat16)
+TF_CALL_variant(REGISTER)
 
 #if defined(IS_MOBILE_PLATFORM) && !defined(SUPPORT_SELECTIVE_REGISTRATION) && \
     !defined(__ANDROID_TYPES_FULL__)
-// Primarily used for SavedModel support on mobile. Registering it here only if
-// __ANDROID_TYPES_FULL__ is not defined, as that already register strings
-REGISTER(string);
+    // Primarily used for SavedModel support on mobile. Registering it here only
+    // if __ANDROID_TYPES_FULL__ is not defined (which already registers string)
+    // to avoid duplicate registration.
+    REGISTER(string);
 #endif  // defined(IS_MOBILE_PLATFORM) &&
         // !defined(SUPPORT_SELECTIVE_REGISTRATION) &&
         // !defined(__ANDROID_TYPES_FULL__)
diff --git a/tensorflow/core/kernels/concat_lib_gpu.cc b/tensorflow/core/kernels/concat_lib_gpu.cc
index 319ead49efd709932bed20e1e76a73749b1c4f19..d8643c0b2fb2633f6b640b4f54dc2f8c92da654d 100644
--- a/tensorflow/core/kernels/concat_lib_gpu.cc
+++ b/tensorflow/core/kernels/concat_lib_gpu.cc
@@ -116,8 +116,8 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER);
 TF_CALL_complex64(REGISTER);
 TF_CALL_complex128(REGISTER);
 TF_CALL_int64(REGISTER);
-REGISTER(bfloat16);
-REGISTER(bool);
+TF_CALL_bfloat16(REGISTER);
+TF_CALL_bool(REGISTER);
 
 #undef REGISTER
 
diff --git a/tensorflow/core/kernels/concat_op.cc b/tensorflow/core/kernels/concat_op.cc
index 8e480aa99524cd57bfe4dda2383d03bcd243b79f..ae1b5da32ea12d94a01ae67563f03dda42d6ead4 100644
--- a/tensorflow/core/kernels/concat_op.cc
+++ b/tensorflow/core/kernels/concat_op.cc
@@ -172,7 +172,6 @@ REGISTER_CONCAT(qint8);
 REGISTER_CONCAT(quint16);
 REGISTER_CONCAT(qint16);
 REGISTER_CONCAT(qint32);
-REGISTER_CONCAT(bfloat16);
 
 #undef REGISTER_CONCAT
 
diff --git a/tensorflow/core/kernels/conditional_accumulator_base.h b/tensorflow/core/kernels/conditional_accumulator_base.h
index 05ee855daee8a7ffe4730ec4a18c65a7bd91733a..27db6ee78533c59f26f538bc59956e50c6111ee7 100644
--- a/tensorflow/core/kernels/conditional_accumulator_base.h
+++ b/tensorflow/core/kernels/conditional_accumulator_base.h
@@ -162,10 +162,12 @@ class ConditionalAccumulatorBase : public ResourceBase {
  * function can get an indication that a failure has occurred.
 */
 #define OP_REQUIRES_BOOLEAN(CTX, EXP, STATUS) \
-  if (!TF_PREDICT_TRUE(EXP)) {                \
-    (CTX)->CtxFailure((STATUS));              \
-    return false;                             \
-  }
+  do {                                        \
+    if (!TF_PREDICT_TRUE(EXP)) {              \
+      (CTX)->CtxFailure((STATUS));            \
+      return false;                           \
+    }                                         \
+  } while (0)
 
 #define OP_REQUIRES_OK_BOOLEAN(CTX, STATUS) \
   do {                                      \
diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc
index 72132574a4ccee474734425233ff687e955022ef..59f9f69315e1a1b8740ee787fa93df686dfa01d8 100644
--- a/tensorflow/core/kernels/constant_op.cc
+++ b/tensorflow/core/kernels/constant_op.cc
@@ -57,12 +57,7 @@ ConstantOp::ConstantOp(OpKernelConstruction* ctx)
 void ConstantOp::Compute(OpKernelContext* ctx) {
   ctx->set_output(0, tensor_);
   if (TF_PREDICT_FALSE(ctx->track_allocations())) {
-    AllocatorAttributes attr;
-    if (ctx->allocate_on_host(attr)) {
-      ctx->record_host_persistent_memory_allocation(tensor_.AllocatedBytes());
-    } else {
-      ctx->record_device_persistent_memory_allocation(tensor_.AllocatedBytes());
-    }
+    ctx->record_persistent_memory_allocation(tensor_.AllocatedBytes());
   }
 }
 
@@ -151,38 +146,25 @@ typedef Eigen::GpuDevice GPUDevice;
 typedef Eigen::SyclDevice SYCLDevice;
 #endif  // TENSORFLOW_USE_SYCL
 
-namespace functor {
-
-// Partial specialization of FillFunctor<Device=CPUDevice, T>.
-template <typename T>
-struct FillFunctor<CPUDevice, T> {
-  void operator()(const CPUDevice& d, typename TTypes<T>::Flat out,
-                  typename TTypes<T>::ConstScalar in) {
-    out.device(d) = out.constant(in());
-  }
-};
-
-}  // end namespace functor
 
-template <typename Device, typename T>
+template <typename Device, typename T, typename Index>
 class FillOp : public OpKernel {
  public:
   explicit FillOp(OpKernelConstruction* context) : OpKernel(context) {}
 
   void Compute(OpKernelContext* context) override {
     const Tensor& Tdims = context->input(0);
-    OP_REQUIRES(
-        context, IsLegacyVector(Tdims.shape()),
-        errors::InvalidArgument("dims must be a vector of int32, got shape ",
-                                Tdims.shape().DebugString()));
+    OP_REQUIRES(context, IsLegacyVector(Tdims.shape()),
+                errors::InvalidArgument("dims must be a vector, got shape ",
+                                        Tdims.shape().DebugString()));
     const Tensor& Tvalue = context->input(1);
     OP_REQUIRES(context, IsLegacyScalar(Tvalue.shape()),
                 errors::InvalidArgument("value must be a scalar, got shape ",
                                         Tvalue.shape().DebugString()));
-    auto dims = Tdims.flat<int32>();
+    auto dims = Tdims.flat<Index>();
     TensorShape shape;
     OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
-                                reinterpret_cast<const int32*>(dims.data()),
+                                reinterpret_cast<const Index*>(dims.data()),
                                 dims.size(), &shape));
     Tensor* out = nullptr;
     OP_REQUIRES_OK(context, context->allocate_output(0, shape, &out));
@@ -192,34 +174,19 @@ class FillOp : public OpKernel {
   }
 };
 
-#ifdef TENSORFLOW_USE_SYCL
-
-namespace functor {
-// Partial specialization of FillFunctor<Device=SYCLDevice, T>.
-template <typename T>
-struct FillFunctor<SYCLDevice, T> {
-  void operator()(const SYCLDevice& d, typename TTypes<T>::Flat out,
-                  typename TTypes<T>::ConstScalar in) {
-#if !defined(EIGEN_HAS_INDEX_LIST)
-    Eigen::array<int, 1> rank1{1};
-#else
-    Eigen::IndexList<Eigen::type2index<1> > rank1;
-#endif
-    const int size = out.dimension(0);
-    Eigen::array<int, 1> broadcast_dims{size};
-
-    To32Bit(out).device(d) = in.reshape(rank1).broadcast(broadcast_dims);
-  }
-};
-}  // namespace functor
-#endif  // TENSORFLOW_USE_SYCL
-
-#define REGISTER_KERNEL(D, TYPE)                         \
-  REGISTER_KERNEL_BUILDER(Name("Fill")                   \
-                              .Device(DEVICE_##D)        \
-                              .TypeConstraint<TYPE>("T") \
-                              .HostMemory("dims"),       \
-                          FillOp<D##Device, TYPE>);
+#define REGISTER_KERNEL(D, TYPE)                                   \
+  REGISTER_KERNEL_BUILDER(Name("Fill")                             \
+                              .Device(DEVICE_##D)                  \
+                              .TypeConstraint<TYPE>("T")           \
+                              .TypeConstraint<int32>("index_type") \
+                              .HostMemory("dims"),                 \
+                          FillOp<D##Device, TYPE, int32>);         \
+  REGISTER_KERNEL_BUILDER(Name("Fill")                             \
+                              .Device(DEVICE_##D)                  \
+                              .TypeConstraint<TYPE>("T")           \
+                              .TypeConstraint<int64>("index_type") \
+                              .HostMemory("dims"),                 \
+                          FillOp<D##Device, TYPE, int64>);
 
 #define REGISTER_CPU_KERNEL(TYPE) REGISTER_KERNEL(CPU, TYPE)
 TF_CALL_ALL_TYPES(REGISTER_CPU_KERNEL);
@@ -241,15 +208,17 @@ REGISTER_KERNEL(SYCL, int64);
 REGISTER_KERNEL_BUILDER(Name("Fill")
                             .Device(DEVICE_SYCL)
                             .TypeConstraint<int32>("T")
+                            .TypeConstraint<int32>("index_type")
                             .HostMemory("dims")
                             .HostMemory("value")
                             .HostMemory("output"),
-                        FillOp<CPUDevice, int32>);
+                        FillOp<CPUDevice, int32, int32>);
 #undef REGISTER_KERNEL_SYCL
 #endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
 REGISTER_KERNEL(GPU, Eigen::half);
+REGISTER_KERNEL(GPU, bfloat16);
 REGISTER_KERNEL(GPU, float);
 REGISTER_KERNEL(GPU, double);
 REGISTER_KERNEL(GPU, uint8);
@@ -266,10 +235,11 @@ REGISTER_KERNEL(GPU, bool);
 REGISTER_KERNEL_BUILDER(Name("Fill")
                             .Device(DEVICE_GPU)
                             .TypeConstraint<int32>("T")
+                            .TypeConstraint<int32>("index_type")
                             .HostMemory("dims")
                             .HostMemory("value")
                             .HostMemory("output"),
-                        FillOp<CPUDevice, int32>);
+                        FillOp<CPUDevice, int32, int32>);
 #endif
 
 #undef REGISTER_KERNEL
@@ -328,6 +298,7 @@ REGISTER_KERNEL_BUILDER(Name("ZerosLike")
 #if GOOGLE_CUDA
 REGISTER_KERNEL(bool, GPU);
 REGISTER_KERNEL(Eigen::half, GPU);
+REGISTER_KERNEL(bfloat16, GPU);
 REGISTER_KERNEL(float, GPU);
 REGISTER_KERNEL(double, GPU);
 REGISTER_KERNEL(complex64, GPU);
@@ -380,6 +351,7 @@ REGISTER_KERNEL_BUILDER(Name("OnesLike")
 #if GOOGLE_CUDA
 REGISTER_KERNEL(bool, GPU);
 REGISTER_KERNEL(Eigen::half, GPU);
+REGISTER_KERNEL(bfloat16, GPU);
 REGISTER_KERNEL(float, GPU);
 REGISTER_KERNEL(double, GPU);
 REGISTER_KERNEL(complex64, GPU);
diff --git a/tensorflow/core/kernels/constant_op_gpu.cu.cc b/tensorflow/core/kernels/constant_op_gpu.cu.cc
index d1a1e34ec365da444a8465b34dd67f8865d29f5e..3487606778eabde386335f8450d627b7bf74ad42 100644
--- a/tensorflow/core/kernels/constant_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/constant_op_gpu.cu.cc
@@ -77,7 +77,7 @@ struct FillFunctor<GPUDevice, T> {
 
 #define DEFINE_FILL_GPU(T) template struct FillFunctor<GPUDevice, T>;
 TF_CALL_REAL_NUMBER_TYPES(DEFINE_FILL_GPU);
-DEFINE_FILL_GPU(bool);
+TF_CALL_bool(DEFINE_FILL_GPU);
 #undef DEFINE_FILL_GPU
 
 // Partial specialization of FillFunctor<Device=GPUDevice, T>.
@@ -88,15 +88,9 @@ struct SetZeroFunctor<GPUDevice, T> {
   }
 };
 
-#define DEFINE_SETZERO_GPU(T) template struct SetZeroFunctor<GPUDevice, T>
-DEFINE_SETZERO_GPU(bool);
-DEFINE_SETZERO_GPU(Eigen::half);
-DEFINE_SETZERO_GPU(float);
-DEFINE_SETZERO_GPU(double);
-DEFINE_SETZERO_GPU(complex64);
-DEFINE_SETZERO_GPU(complex128);
-DEFINE_SETZERO_GPU(int32);
-DEFINE_SETZERO_GPU(int64);
+#define DEFINE_SETZERO_GPU(T) template struct SetZeroFunctor<GPUDevice, T>;
+TF_CALL_NUMBER_TYPES(DEFINE_SETZERO_GPU);
+TF_CALL_bool(DEFINE_SETZERO_GPU);
 #undef DEFINE_SETZERO_GPU
 
 // Partial specialization of FillFunctor<Device=GPUDevice, T>.
@@ -107,15 +101,9 @@ struct SetOneFunctor<GPUDevice, T> {
   }
 };
 
-#define DEFINE_SETONE_GPU(T) template struct SetOneFunctor<GPUDevice, T>
-DEFINE_SETONE_GPU(bool);
-DEFINE_SETONE_GPU(Eigen::half);
-DEFINE_SETONE_GPU(float);
-DEFINE_SETONE_GPU(double);
-DEFINE_SETONE_GPU(complex64);
-DEFINE_SETONE_GPU(complex128);
-DEFINE_SETONE_GPU(int32);
-DEFINE_SETONE_GPU(int64);
+#define DEFINE_SETONE_GPU(T) template struct SetOneFunctor<GPUDevice, T>;
+TF_CALL_NUMBER_TYPES(DEFINE_SETONE_GPU);
+TF_CALL_bool(DEFINE_SETONE_GPU);
 #undef DEFINE_SETONE_GPU
 
 }  // end namespace functor
diff --git a/tensorflow/core/kernels/constant_op_test.cc b/tensorflow/core/kernels/constant_op_test.cc
index 62cc67c7360ad55d9f2c487e8e6d3ae37d9a47ed..7a05d9371d8c19e2cfe943f7a44e458c8baf634a 100644
--- a/tensorflow/core/kernels/constant_op_test.cc
+++ b/tensorflow/core/kernels/constant_op_test.cc
@@ -72,9 +72,9 @@ void ConstantOpTest::PersistentMemoryTrackingTest(bool on_gpu) {
   TF_EXPECT_OK(ctx.status());
 
   if (on_gpu) {
-    EXPECT_EQ(ctx.device_persistent_memory_allocated(), 512);
+    EXPECT_EQ(ctx.persistent_memory_allocated(), 512);
   } else {
-    EXPECT_EQ(ctx.host_persistent_memory_allocated(), 480);
+    EXPECT_EQ(ctx.persistent_memory_allocated(), 480);
   }
 
   // Remove memry leak errors.
diff --git a/tensorflow/core/kernels/conv_2d.h b/tensorflow/core/kernels/conv_2d.h
index f78a162a8efbd7aeae16d59665afda50d2868b40..2142207b0d89a4b2f02c7f7b5d320c3b4b48462c 100644
--- a/tensorflow/core/kernels/conv_2d.h
+++ b/tensorflow/core/kernels/conv_2d.h
@@ -91,27 +91,25 @@ struct SpatialConvolutionBackwardInput {
   void operator()(const Device& d, typename TTypes<T, 4>::Tensor input_backward,
                   typename TTypes<T, 4>::ConstTensor kernel,
                   typename TTypes<T, 4>::ConstTensor output_backward,
-                  int input_rows, int input_cols, int row_stride,
-                  int col_stride) {
+                  int row_stride, int col_stride) {
     // Need to swap row/col when calling Eigen.
     input_backward.device(d) = Eigen::SpatialConvolutionBackwardInput(
-        kernel, output_backward, input_cols, input_rows, col_stride,
-        row_stride);
+        kernel, output_backward, input_backward.dimension(2),
+        input_backward.dimension(1), col_stride, row_stride);
   }
 };
 
 template <typename Device, typename T>
-struct SpatialConvolutionBackwardKernel {
+struct SpatialConvolutionBackwardFilter {
   void operator()(const Device& d,
                   typename TTypes<T, 4>::Tensor kernel_backward,
                   typename TTypes<T, 4>::ConstTensor input,
                   typename TTypes<T, 4>::ConstTensor output_backward,
-                  int kernel_rows, int kernel_cols, int row_stride,
-                  int col_stride) {
+                  int row_stride, int col_stride) {
     // Need to swap row/col when calling Eigen.
     kernel_backward.device(d) = Eigen::SpatialConvolutionBackwardKernel(
-        input, output_backward, kernel_cols, kernel_rows, col_stride,
-        row_stride);
+        input, output_backward, kernel_backward.dimension(1),
+        kernel_backward.dimension(0), col_stride, row_stride);
   }
 };
 
diff --git a/tensorflow/core/kernels/conv_grad_filter_ops.cc b/tensorflow/core/kernels/conv_grad_filter_ops.cc
index 3d2bb57aff6b7c4a1de2f9221aea4b384fea45c3..512bcc6c01bf3eb4aed92f90eebb060abda8a7fc 100644
--- a/tensorflow/core/kernels/conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_filter_ops.cc
@@ -30,6 +30,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_slice.h"
 #include "tensorflow/core/kernels/conv_2d.h"
+#include "tensorflow/core/kernels/fill_functor.h"
 #ifdef TENSORFLOW_USE_LIBXSMM
 #include "tensorflow/core/kernels/xsmm_conv2d.h"
 #endif
@@ -92,16 +93,15 @@ typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
 
 template <typename T>
-struct LaunchConv2DBackpropInputOp<CPUDevice, T> {
+struct LaunchConv2DBackpropFilterOp<CPUDevice, T> {
   void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune,
                   const Tensor& out_backprop, const Tensor& input,
                   int row_stride, int col_stride, const Padding& padding,
                   Tensor* filter_backprop, TensorFormat data_format) {
     const CPUDevice& d = ctx->eigen_device<CPUDevice>();
-    functor::SpatialConvolutionBackwardInput<CPUDevice, T>()(
+    functor::SpatialConvolutionBackwardFilter<CPUDevice, T>()(
         d, filter_backprop->tensor<T, 4>(), input.tensor<T, 4>(),
-        out_backprop.tensor<T, 4>(), filter_backprop->dim_size(0),
-        filter_backprop->dim_size(1), row_stride, col_stride);
+        out_backprop.tensor<T, 4>(), row_stride, col_stride);
   }
 };
 
@@ -194,7 +194,23 @@ class Conv2DFastBackpropFilterOp : public OpKernel {
         context, (strides_[0] == 1 && strides_[3] == 1),
         errors::InvalidArgument("Current implementation does not yet support "
                                 "strides in the batch and depth dimensions."));
+    OP_REQUIRES(context, strides_[1] > 0 && strides_[2] > 0,
+                errors::InvalidArgument(
+                    "Row and column strides should be larger than 0."));
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+    OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_));
+    OP_REQUIRES(context, dilations_.size() == 4,
+                errors::InvalidArgument("Sliding window dilations field must "
+                                        "specify 4 dimensions"));
+    OP_REQUIRES(context, (dilations_[0] == 1 && dilations_[3] == 1),
+                errors::InvalidArgument(
+                    "Current implementation does not yet support "
+                    "dilations in the batch and depth dimensions."));
+    // TODO(yangzihao): Add a CPU implementation for dilated convolution.
+    OP_REQUIRES(context, (dilations_[1] == 1 && dilations_[2] == 1),
+                errors::InvalidArgument(
+                    "Current Eigen and libxsmm implementations do not "
+                    "yet support dilation rates larger than 1."));
   }
 
   void Compute(OpKernelContext* context) override {
@@ -256,12 +272,13 @@ class Conv2DFastBackpropFilterOp : public OpKernel {
     }
 #endif
 
-    LaunchConv2DBackpropInputOp<Device, T>()(
+    LaunchConv2DBackpropFilterOp<Device, T>()(
         context, false, false, out_backprop, input, dims.spatial_dims[0].stride,
         dims.spatial_dims[1].stride, padding_, filter_backprop, data_format_);
   }
 
  private:
+  std::vector<int32> dilations_;
   std::vector<int32> strides_;
   Padding padding_;
   TensorFormat data_format_;
@@ -290,7 +307,23 @@ class Conv2DCustomBackpropFilterOp : public OpKernel {
         context, (strides_[0] == 1 && strides_[3] == 1),
         errors::InvalidArgument("Current implementation does not yet support "
                                 "strides in the batch and depth dimensions."));
+    OP_REQUIRES(context, strides_[1] > 0 && strides_[2] > 0,
+                errors::InvalidArgument(
+                    "Row and column strides should be larger than 0."));
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+    OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_));
+    OP_REQUIRES(context, dilations_.size() == 4,
+                errors::InvalidArgument("Sliding window dilations field must "
+                                        "specify 4 dimensions"));
+    OP_REQUIRES(context, (dilations_[0] == 1 && dilations_[3] == 1),
+                errors::InvalidArgument(
+                    "Current implementation does not yet support "
+                    "dilations in the batch and depth dimensions."));
+    // TODO(yangzihao): Add a CPU implementation for dilated convolution.
+    OP_REQUIRES(context, (dilations_[1] == 1 && dilations_[2] == 1),
+                errors::InvalidArgument(
+                    "Current libxsmm and customized CPU implementations do "
+                    "not yet support dilation rates larger than 1."));
   }
 
   void Compute(OpKernelContext* context) override {
@@ -459,6 +492,7 @@ class Conv2DCustomBackpropFilterOp : public OpKernel {
   }
 
  private:
+  std::vector<int32> dilations_;
   std::vector<int32> strides_;
   Padding padding_;
   TensorFormat data_format_;
@@ -510,10 +544,30 @@ class Conv2DSlowBackpropFilterOp : public OpKernel {
     OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
     int stride_n = GetTensorDim(strides_, data_format_, 'N');
     int stride_c = GetTensorDim(strides_, data_format_, 'C');
+    int stride_h = GetTensorDim(strides_, data_format_, 'H');
+    int stride_w = GetTensorDim(strides_, data_format_, 'W');
     OP_REQUIRES(
         context, (stride_n == 1 && stride_c == 1),
         errors::InvalidArgument("Current implementation does not yet support "
                                 "strides in the batch and depth dimensions."));
+    OP_REQUIRES(context, stride_h > 0 && stride_w > 0,
+                errors::InvalidArgument(
+                    "Row and column strides should be larger than 0."));
+    OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_));
+    OP_REQUIRES(context, dilations_.size() == 4,
+                errors::InvalidArgument("Sliding window dilations field must "
+                                        "specify 4 dimensions"));
+    int dilation_n = GetTensorDim(dilations_, data_format_, 'N');
+    int dilation_c = GetTensorDim(dilations_, data_format_, 'C');
+    int dilation_h = GetTensorDim(dilations_, data_format_, 'H');
+    int dilation_w = GetTensorDim(dilations_, data_format_, 'W');
+    OP_REQUIRES(context, dilation_n == 1 && dilation_c == 1,
+                errors::InvalidArgument(
+                    "Current implementation does not yet support "
+                    "dilations in the batch and depth dimensions."));
+    OP_REQUIRES(
+        context, dilation_h > 0 && dilation_w > 0,
+        errors::InvalidArgument("Dilated rates should be larger than 0."));
     OP_REQUIRES_OK(context, context->GetAttr("use_cudnn_on_gpu", &use_cudnn_));
     use_cudnn_ &= CanUseCudnn();
     cudnn_use_autotune_ = CudnnUseAutotune();
@@ -541,18 +595,27 @@ class Conv2DSlowBackpropFilterOp : public OpKernel {
     if (filter_shape.num_elements() == 0) {
       return;
     }
+    // If input is empty, set gradients to zero.
+    if (input.shape().num_elements() == 0) {
+      functor::SetZeroFunctor<Device, T> f;
+      f(context->eigen_device<Device>(), filter_backprop->flat<T>());
+      return;
+    }
 
     // For now we take the stride from the second and third dimensions only (we
     // do not support striding on the batch or depth dimension).
     const int stride_rows = GetTensorDim(strides_, data_format_, 'H');
     const int stride_cols = GetTensorDim(strides_, data_format_, 'W');
+    const int dilation_rows = GetTensorDim(dilations_, data_format_, 'H');
+    const int dilation_cols = GetTensorDim(dilations_, data_format_, 'W');
 
     launcher_(context, use_cudnn_, cudnn_use_autotune_, out_backprop, input,
-              stride_rows, stride_cols, padding_, filter_backprop,
-              data_format_);
+              dilation_rows, dilation_cols, stride_rows, stride_cols, padding_,
+              filter_backprop, data_format_);
   }
 
  private:
+  std::vector<int32> dilations_;
   std::vector<int32> strides_;
   Padding padding_;
   bool use_cudnn_;
@@ -566,38 +629,46 @@ class Conv2DSlowBackpropFilterOp : public OpKernel {
 template <typename T>
 void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
     OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune,
-    const Tensor& out_backprop, const Tensor& input, int row_stride,
-    int col_stride, const Padding& padding, Tensor* filter_backprop,
-    TensorFormat data_format) {
+    const Tensor& out_backprop, const Tensor& input, int row_dilation,
+    int col_dilation, int row_stride, int col_stride, const Padding& padding,
+    Tensor* filter_backprop, TensorFormat data_format) {
   using perftools::gputools::dnn::AlgorithmConfig;
   using perftools::gputools::dnn::AlgorithmDesc;
   using perftools::gputools::dnn::ProfileResult;
 
+  std::vector<int32> dilations(4, 1);
+  dilations[GetTensorDimIndex(data_format, 'H')] = row_dilation;
+  dilations[GetTensorDimIndex(data_format, 'W')] = col_dilation;
+
   std::vector<int32> strides(4, 1);
   strides[GetTensorDimIndex(data_format, 'H')] = row_stride;
   strides[GetTensorDimIndex(data_format, 'W')] = col_stride;
   TensorShape filter_shape = filter_backprop->shape();
 
   ConvBackpropDimensions dims;
-  OP_REQUIRES_OK(ctx, ConvBackpropComputeDimensions(
+  OP_REQUIRES_OK(ctx, ConvBackpropComputeDimensionsV2(
                           "Conv2DSlowBackpropFilter", /*num_spatial_dims=*/2,
                           input.shape(), filter_shape, out_backprop.shape(),
-                          strides, padding, data_format, &dims));
+                          dilations, strides, padding, data_format, &dims));
 
+  // TODO(yangzihao): The padding computations should be done in
+  // GetWindowedOutputSize() functions.
   const int padding_rows =
       (padding == VALID)
           ? 0
           : std::max<int>(0, (dims.spatial_dims[0].output_size - 1) *
                                      dims.spatial_dims[0].stride +
-                                 dims.spatial_dims[0].filter_size -
-                                 dims.spatial_dims[0].input_size);
+                                 (dims.spatial_dims[0].filter_size - 1) *
+                                     dims.spatial_dims[0].dilation +
+                                 1 - dims.spatial_dims[0].input_size);
   const int padding_cols =
       (padding == VALID)
           ? 0
           : std::max<int>(0, (dims.spatial_dims[1].output_size - 1) *
                                      dims.spatial_dims[1].stride +
-                                 dims.spatial_dims[1].filter_size -
-                                 dims.spatial_dims[1].input_size);
+                                 (dims.spatial_dims[1].filter_size - 1) *
+                                     dims.spatial_dims[1].dilation +
+                                 1 - dims.spatial_dims[1].input_size);
 
   // TODO(zhengxq): cuDNN only supports equal padding on both sides, so only
   // calling it when that is true. Remove this check when (if?) cuDNN starts
@@ -730,7 +801,9 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
       .set_input_feature_map_count(dims.in_depth)
       .set_output_feature_map_count(dims.out_depth);
   perftools::gputools::dnn::ConvolutionDescriptor conv_desc;
-  conv_desc.set_vertical_filter_stride(dims.spatial_dims[0].stride)
+  conv_desc.set_vertical_dilation_rate(dims.spatial_dims[0].dilation)
+      .set_horizontal_dilation_rate(dims.spatial_dims[1].dilation)
+      .set_vertical_filter_stride(dims.spatial_dims[0].stride)
       .set_horizontal_filter_stride(dims.spatial_dims[1].stride)
       .set_zero_padding_height(padding_rows / 2)
       .set_zero_padding_width(padding_cols / 2);
@@ -821,6 +894,8 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
       dims.out_depth,                        // out_depths
       {{dims.spatial_dims[0].filter_size,    // filter_rows
         dims.spatial_dims[1].filter_size}},  // filter_cols
+      {{dims.spatial_dims[0].dilation,       // dilation_rows
+        dims.spatial_dims[1].dilation}},     // dilation_cols
       {{dims.spatial_dims[0].stride,         // stride_rows
         dims.spatial_dims[1].stride}},       // stride_cols
       {{padding_rows,                        // padding_rows
diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc
index d28f6b4d107647f8e2dc232dc5477cd7ee37f696..0356ff4c0f4240ec806d1e337546cfce6771d92f 100644
--- a/tensorflow/core/kernels/conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_input_ops.cc
@@ -106,8 +106,7 @@ struct LaunchConv2DBackpropInputOp<CPUDevice, T> {
     const CPUDevice& d = ctx->eigen_device<CPUDevice>();
     functor::SpatialConvolutionBackwardInput<CPUDevice, T>()(
         d, in_backprop->tensor<T, 4>(), filter.tensor<T, 4>(),
-        out_backprop.tensor<T, 4>(), in_backprop->dim_size(1),
-        in_backprop->dim_size(2), row_stride, col_stride);
+        out_backprop.tensor<T, 4>(), row_stride, col_stride);
   }
 };
 
@@ -198,7 +197,23 @@ class Conv2DFastBackpropInputOp : public OpKernel {
         context, (strides_[0] == 1 && strides_[3] == 1),
         errors::InvalidArgument("Current implementation does not yet support "
                                 "strides in the batch and depth dimensions."));
+    OP_REQUIRES(context, strides_[1] > 0 && strides_[2] > 0,
+                errors::InvalidArgument(
+                    "Row and column strides should be larger than 0."));
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+    OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_));
+    OP_REQUIRES(context, dilations_.size() == 4,
+                errors::InvalidArgument("Sliding window dilations field must "
+                                        "specify 4 dimensions"));
+    OP_REQUIRES(context, (dilations_[0] && dilations_[3]),
+                errors::InvalidArgument(
+                    "Current implementation does not yet support "
+                    "dilations in the batch and depth dimensions."));
+    // TODO(yangzihao): Add a CPU implementation for dilated convolution.
+    OP_REQUIRES(context, (dilations_[1] == 1 && dilations_[2] == 1),
+                errors::InvalidArgument(
+                    "Current Eigen and libxsmm implementations do not "
+                    "yet support dilation rates larger than 1."));
   }
 
   void Compute(OpKernelContext* context) override {
@@ -268,6 +283,7 @@ class Conv2DFastBackpropInputOp : public OpKernel {
   }
 
  private:
+  std::vector<int32> dilations_;
   std::vector<int32> strides_;
   Padding padding_;
   TensorFormat data_format_;
@@ -296,7 +312,23 @@ class Conv2DCustomBackpropInputOp : public OpKernel {
         context, (strides_[0] == 1 && strides_[3] == 1),
         errors::InvalidArgument("Current implementation does not yet support "
                                 "strides in the batch and depth dimensions."));
+    OP_REQUIRES(context, strides_[1] > 0 && strides_[2] > 0,
+                errors::InvalidArgument(
+                    "Row and column strides should be larger than 0."));
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+    OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_));
+    OP_REQUIRES(context, dilations_.size() == 4,
+                errors::InvalidArgument("Sliding window dilations field must "
+                                        "specify 4 dimensions"));
+    OP_REQUIRES(context, (dilations_[0] == 1 && dilations_[3] == 1),
+                errors::InvalidArgument(
+                    "Current implementation does not yet support "
+                    "dilations in the batch and depth dimensions."));
+    // TODO(yangzihao): Add a CPU implementation for dilated convolution.
+    OP_REQUIRES(context, (dilations_[1] == 1 && dilations_[2] == 1),
+                errors::InvalidArgument(
+                    "Current libxsmm and customized CPU implementations do "
+                    "not yet support dilation rates larger than 1."));
   }
 
   void Compute(OpKernelContext* context) override {
@@ -532,6 +564,7 @@ class Conv2DCustomBackpropInputOp : public OpKernel {
   }
 
  private:
+  std::vector<int32> dilations_;
   std::vector<int32> strides_;
   Padding padding_;
   TensorFormat data_format_;
@@ -586,10 +619,30 @@ class Conv2DSlowBackpropInputOp : public OpKernel {
                                         "specify 4 dimensions"));
     int stride_n = GetTensorDim(strides_, data_format_, 'N');
     int stride_c = GetTensorDim(strides_, data_format_, 'C');
+    int stride_h = GetTensorDim(strides_, data_format_, 'H');
+    int stride_w = GetTensorDim(strides_, data_format_, 'W');
     OP_REQUIRES(
         context, (stride_n == 1 && stride_c == 1),
         errors::InvalidArgument("Current implementation does not yet support "
                                 "strides in the batch and depth dimensions."));
+    OP_REQUIRES(context, stride_h > 0 && stride_w > 0,
+                errors::InvalidArgument(
+                    "Row and column strides should be larger than 0."));
+    OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_));
+    OP_REQUIRES(context, dilations_.size() == 4,
+                errors::InvalidArgument("Sliding window dilations field must "
+                                        "specify 4 dimensions"));
+    int dilation_n = GetTensorDim(dilations_, data_format_, 'N');
+    int dilation_c = GetTensorDim(dilations_, data_format_, 'C');
+    int dilation_h = GetTensorDim(dilations_, data_format_, 'H');
+    int dilation_w = GetTensorDim(dilations_, data_format_, 'W');
+    OP_REQUIRES(context, (dilation_n == 1 && dilation_c == 1),
+                errors::InvalidArgument(
+                    "Current implementation does not yet support "
+                    "dilations in the batch and depth dimensions."));
+    OP_REQUIRES(
+        context, dilation_h > 0 && dilation_w > 0,
+        errors::InvalidArgument("Dilated rates should be larger than 0."));
     OP_REQUIRES_OK(context, context->GetAttr("use_cudnn_on_gpu", &use_cudnn_));
     use_cudnn_ &= CanUseCudnn();
     cudnn_use_autotune_ = CudnnUseAutotune();
@@ -622,12 +675,16 @@ class Conv2DSlowBackpropInputOp : public OpKernel {
     // do not support striding on the batch or depth dimension).
     const int stride_rows = GetTensorDim(strides_, data_format_, 'H');
     const int stride_cols = GetTensorDim(strides_, data_format_, 'W');
+    const int dilation_rows = GetTensorDim(dilations_, data_format_, 'H');
+    const int dilation_cols = GetTensorDim(dilations_, data_format_, 'W');
 
     launcher_(context, use_cudnn_, cudnn_use_autotune_, out_backprop, filter,
-              stride_rows, stride_cols, padding_, in_backprop, data_format_);
+              dilation_rows, dilation_cols, stride_rows, stride_cols, padding_,
+              in_backprop, data_format_);
   }
 
  private:
+  std::vector<int32> dilations_;
   std::vector<int32> strides_;
   Padding padding_;
   bool use_cudnn_;
@@ -641,39 +698,48 @@ class Conv2DSlowBackpropInputOp : public OpKernel {
 template <typename T>
 void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
     OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune,
-    const Tensor& out_backprop, const Tensor& filter, int row_stride,
-    int col_stride, const Padding& padding, Tensor* in_backprop,
-    TensorFormat data_format) {
+    const Tensor& out_backprop, const Tensor& filter, int row_dilation,
+    int col_dilation, int row_stride, int col_stride, const Padding& padding,
+    Tensor* in_backprop, TensorFormat data_format) {
   using perftools::gputools::dnn::AlgorithmConfig;
   using perftools::gputools::dnn::AlgorithmDesc;
   using perftools::gputools::dnn::ProfileResult;
 
   std::vector<int32> strides(4, 1);
-  strides[GetTensorDimIndex(data_format, 'H')] = row_stride;
-  strides[GetTensorDimIndex(data_format, 'W')] = col_stride;
+  std::vector<int32> dilations(4, 1);
+  auto input_h = GetTensorDimIndex(data_format, 'H');
+  auto input_w = GetTensorDimIndex(data_format, 'W');
+  strides[input_h] = row_stride;
+  strides[input_w] = col_stride;
+  dilations[input_h] = row_dilation;
+  dilations[input_w] = col_dilation;
   TensorShape input_shape = in_backprop->shape();
 
   const TensorShape& filter_shape = filter.shape();
   ConvBackpropDimensions dims;
-  OP_REQUIRES_OK(ctx, ConvBackpropComputeDimensions(
+  OP_REQUIRES_OK(ctx, ConvBackpropComputeDimensionsV2(
                           "Conv2DSlowBackpropInput", /*num_spatial_dims=*/2,
                           input_shape, filter_shape, out_backprop.shape(),
-                          strides, padding, data_format, &dims));
+                          dilations, strides, padding, data_format, &dims));
 
+  // TODO(yangzihao): The padding computations should be done in
+  // GetWindowedOutputSize() functions.
   const int padding_rows =
       (padding == VALID)
           ? 0
           : std::max<int>(0, (dims.spatial_dims[0].output_size - 1) *
                                      dims.spatial_dims[0].stride +
-                                 dims.spatial_dims[0].filter_size -
-                                 dims.spatial_dims[0].input_size);
+                                 (dims.spatial_dims[0].filter_size - 1) *
+                                     dims.spatial_dims[0].dilation +
+                                 1 - dims.spatial_dims[0].input_size);
   const int padding_cols =
       (padding == VALID)
           ? 0
           : std::max<int>(0, (dims.spatial_dims[1].output_size - 1) *
                                      dims.spatial_dims[1].stride +
-                                 dims.spatial_dims[1].filter_size -
-                                 dims.spatial_dims[1].input_size);
+                                 (dims.spatial_dims[1].filter_size - 1) *
+                                     dims.spatial_dims[1].dilation +
+                                 1 - dims.spatial_dims[1].input_size);
 
   // TODO(keveman): cuDNN only supports equal padding on both sides, so only
   // calling it when that is true. Remove this check when (if?) cuDNN starts
@@ -789,7 +855,9 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
       .set_input_feature_map_count(dims.in_depth)
       .set_output_feature_map_count(dims.out_depth);
   perftools::gputools::dnn::ConvolutionDescriptor conv_desc;
-  conv_desc.set_vertical_filter_stride(dims.spatial_dims[0].stride)
+  conv_desc.set_vertical_dilation_rate(dims.spatial_dims[0].dilation)
+      .set_horizontal_dilation_rate(dims.spatial_dims[1].dilation)
+      .set_vertical_filter_stride(dims.spatial_dims[0].stride)
       .set_horizontal_filter_stride(dims.spatial_dims[1].stride)
       .set_zero_padding_height(padding_rows / 2)
       .set_zero_padding_width(padding_cols / 2);
@@ -875,6 +943,8 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
       dims.out_depth,                        // out_depths
       {{dims.spatial_dims[0].filter_size,    // filter_rows
         dims.spatial_dims[1].filter_size}},  // filter_cols
+      {{dims.spatial_dims[0].dilation,       // dilation_rows
+        dims.spatial_dims[1].dilation}},     // dilation_cols
       {{dims.spatial_dims[0].stride,         // stride_rows
         dims.spatial_dims[1].stride}},       // stride_cols
       {{padding_rows,                        // padding_rows
diff --git a/tensorflow/core/kernels/conv_grad_ops.h b/tensorflow/core/kernels/conv_grad_ops.h
index e068fb86848f93a4c826e1b19fc85790ab2500a4..535586d53ac916808a22a6ea55577b3be43321f9 100644
--- a/tensorflow/core/kernels/conv_grad_ops.h
+++ b/tensorflow/core/kernels/conv_grad_ops.h
@@ -175,15 +175,17 @@ template <typename Device, typename T>
 struct LaunchConv2DBackpropInputOp {
   void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune,
                   const Tensor& out_backprop, const Tensor& filter,
-                  int row_stride, int col_stride, const Padding& padding,
-                  Tensor* in_backprop, TensorFormat data_format);
+                  int row_dilation, int col_dilation, int row_stride,
+                  int col_stride, const Padding& padding, Tensor* in_backprop,
+                  TensorFormat data_format);
 };
 
 template <typename Device, typename T>
 struct LaunchConv2DBackpropFilterOp {
   void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune,
                   const Tensor& out_backprop, const Tensor& input,
-                  int row_stride, int col_stride, const Padding& padding,
+                  int row_dilation, int col_dilation, int row_stride,
+                  int col_stride, const Padding& padding,
                   Tensor* filter_backprop, TensorFormat data_format);
 };
 
@@ -191,8 +193,9 @@ struct LaunchConv2DBackpropFilterOp {
 template <typename T>
 struct LaunchConv2DBackpropInputOp<Eigen::GpuDevice, T> {
   void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune,
-                  const Tensor& input, const Tensor& filter, int row_stride,
-                  int col_stride, const Padding& padding, Tensor* output,
+                  const Tensor& input, const Tensor& filter, int row_dilation,
+                  int col_dilation, int row_stride, int col_stride,
+                  const Padding& padding, Tensor* output,
                   TensorFormat data_format);
 };
 
@@ -200,7 +203,8 @@ template <typename T>
 struct LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T> {
   void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune,
                   const Tensor& out_backprop, const Tensor& input,
-                  int row_stride, int col_stride, const Padding& padding,
+                  int row_dilation, int col_dilation, int row_stride,
+                  int col_stride, const Padding& padding,
                   Tensor* filter_backprop, TensorFormat data_format);
 };
 #endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc
index c2d24d1f1208961af73901ddee432b98302090f3..3650ab53b2533e3c95a764ead2d1318c4006c9e7 100644
--- a/tensorflow/core/kernels/conv_grad_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc
@@ -645,6 +645,9 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
         {{input_size[0], input_size[1], input_size[2]}},
         out_depth,
         {{filter_size[0], filter_size[1], filter_size[2]}},
+        // TODO(yangzihao): Send in arbitrary dilation rates after the dilated
+        // conv is supported.
+        /*dilation=*/{{1, 1, 1}},
         {{strides[0], strides[1], strides[2]}},
         {{padding_planes, padding_rows, padding_cols}},
         dtype,
@@ -1011,6 +1014,7 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
         {{input_size[0], input_size[1], input_size[2]}},
         out_depth,
         {{filter_size[0], filter_size[1], filter_size[2]}},
+        {{1, 1, 1}},
         {{strides[0], strides[1], strides[2]}},
         {{padding_planes, padding_rows, padding_cols}},
         dtype,
diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc
index bb67113fb003ea58e2fb12ae6d79f02251cd3c3d..985586d6262b18e89b5fc5246cc00b10ba4924a7 100644
--- a/tensorflow/core/kernels/conv_ops.cc
+++ b/tensorflow/core/kernels/conv_ops.cc
@@ -112,8 +112,9 @@ struct LaunchGeneric {
 template <typename T>
 struct LaunchConv2DOp<CPUDevice, T> {
   void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune,
-                  const Tensor& input, const Tensor& filter, int row_stride,
-                  int col_stride, const Padding& padding, Tensor* output,
+                  const Tensor& input, const Tensor& filter, int row_dilation,
+                  int col_dilation, int row_stride, int col_stride,
+                  const Padding& padding, Tensor* output,
                   TensorFormat data_format) {
     if (data_format != FORMAT_NHWC) {
       ctx->SetStatus(
@@ -121,6 +122,13 @@ struct LaunchConv2DOp<CPUDevice, T> {
                                 "NHWC tensor format for now."));
       return;
     }
+    // TODO(yangzihao): Add the CPU implementation of dilated conv 2D.
+    if (row_dilation > 1 || col_dilation > 1) {
+      ctx->SetStatus(
+          errors::Unimplemented("Generic conv implementation only supports "
+                                "dilated rate of 1 for now."));
+      return;
+    }
     LaunchGeneric<CPUDevice, T>()(ctx, input, filter, row_stride, col_stride,
                                   padding, output, data_format);
   }
@@ -133,8 +141,10 @@ class LaunchDeepConvOp {
                   const Tensor& filter, int batch, int input_rows,
                   int input_cols, int in_depth, int filter_rows,
                   int filter_cols, int pad_rows, int pad_cols, int out_rows,
-                  int out_cols, int out_depth, int stride_rows, int stride_cols,
-                  Tensor* output, TensorFormat data_format) {
+                  int /*out_cols*/, int /*out_depth*/, int /*dilation_rows*/,
+                  int /*dilation_cols*/, int /*stride_rows*/,
+                  int /*stride_cols*/, Tensor* /*output*/,
+                  TensorFormat /*data_format*/) {
     return false;
   }
 };
@@ -147,9 +157,11 @@ class LaunchDeepConvOp<CPUDevice, float> {
                   const Tensor& filter, int batch, int input_rows,
                   int input_cols, int in_depth, int filter_rows,
                   int filter_cols, int pad_rows, int pad_cols, int out_rows,
-                  int out_cols, int out_depth, int stride_rows, int stride_cols,
+                  int out_cols, int out_depth, int dilation_rows,
+                  int dilation_cols, int stride_rows, int stride_cols,
                   Tensor* output, TensorFormat data_format) {
-    if (data_format != FORMAT_NHWC ||
+    if (data_format != FORMAT_NHWC || dilation_rows != 1 ||
+        dilation_cols != 1 ||
         !CanUseDeepConv2D(stride_rows, stride_cols, filter_rows, filter_cols,
                           in_depth, out_depth, out_rows, out_cols)) {
       return false;
@@ -187,7 +199,8 @@ class LaunchXsmmConvOp {
                   int input_cols, int in_depth, int filter_rows,
                   int filter_cols, int pad_rows, int pad_cols, int out_rows,
                   int out_cols, int out_depth, int stride_rows, int stride_cols,
-                  Tensor* output, TensorFormat data_format) {
+                  int dilation_rows, int dilation_cols, Tensor* output,
+                  TensorFormat data_format) {
     return false;
   }
 };
@@ -199,7 +212,8 @@ class LaunchXsmmConvOp<CPUDevice, float> {
                   const Tensor& filter, int batch, int input_rows,
                   int input_cols, int in_depth, int filter_rows,
                   int filter_cols, int pad_rows, int pad_cols, int out_rows,
-                  int out_cols, int out_depth, int stride_rows, int stride_cols,
+                  int out_cols, int out_depth, int dilation_rows,
+                  int dilation_cols, int stride_rows, int stride_cols,
                   Tensor* output, TensorFormat data_format) {
     auto num_threads =
         ctx->device()->tensorflow_cpu_worker_threads()->num_threads;
@@ -228,11 +242,8 @@ class LaunchXsmmConvOp<CPUDevice, float> {
     desc.options = LIBXSMM_DNN_CONV_OPTION_WU_EXT_FILTER_REDUCE_OVERWRITE;
     desc.datatype = LIBXSMM_DNN_DATATYPE_F32;
 
-    if (!CanUseXsmmConv2D(desc, data_format)) {
-      return false;
-    }
-
-    if (!CanUseXsmmConv2D(desc, data_format)) {
+    if (dilation_rows != 1 || dilation_cols != 1 ||
+        !CanUseXsmmConv2D(desc, data_format)) {
       return false;
     }
 
@@ -251,6 +262,7 @@ template <typename Device, typename T>
 class Conv2DOp : public BinaryOp<T> {
  public:
   explicit Conv2DOp(OpKernelConstruction* context) : BinaryOp<T>(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_));
     OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
     string data_format;
     OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
@@ -259,15 +271,35 @@ class Conv2DOp : public BinaryOp<T> {
     OP_REQUIRES_OK(context, context->GetAttr("use_cudnn_on_gpu", &use_cudnn_));
     use_cudnn_ &= CanUseCudnn();
     cudnn_use_autotune_ = CudnnUseAutotune();
+    OP_REQUIRES(context, dilations_.size() == 4,
+                errors::InvalidArgument("Sliding window dilations field must "
+                                        "specify 4 dimensions"));
     OP_REQUIRES(context, strides_.size() == 4,
                 errors::InvalidArgument("Sliding window strides field must "
                                         "specify 4 dimensions"));
     const int64 stride_n = GetTensorDim(strides_, data_format_, 'N');
     const int64 stride_c = GetTensorDim(strides_, data_format_, 'C');
+    const int64 stride_h = GetTensorDim(strides_, data_format_, 'H');
+    const int64 stride_w = GetTensorDim(strides_, data_format_, 'W');
     OP_REQUIRES(
         context, stride_n == 1 && stride_c == 1,
         errors::InvalidArgument("Current implementation does not yet support "
                                 "strides in the batch and depth dimensions."));
+    OP_REQUIRES(context, stride_h > 0 && stride_w > 0,
+                errors::InvalidArgument(
+                    "Row and column strides should be larger than 0."));
+
+    const int64 dilation_n = GetTensorDim(dilations_, data_format_, 'N');
+    const int64 dilation_c = GetTensorDim(dilations_, data_format_, 'C');
+    const int64 dilation_h = GetTensorDim(dilations_, data_format_, 'H');
+    const int64 dilation_w = GetTensorDim(dilations_, data_format_, 'W');
+    OP_REQUIRES(context, dilation_n == 1 && dilation_c == 1,
+                errors::InvalidArgument(
+                    "Current implementation does not yet support "
+                    "dilations in the batch and depth dimensions."));
+    OP_REQUIRES(
+        context, dilation_h > 0 && dilation_w > 0,
+        errors::InvalidArgument("Dilated rates should be larger than 0."));
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
   }
 
@@ -334,18 +366,22 @@ class Conv2DOp : public BinaryOp<T> {
                 errors::InvalidArgument("batch is too large"));
     const int batch = static_cast<int>(batch_raw);
 
-    // For now we take the stride from the second and third dimensions only (we
-    // do not support striding on the batch or depth dimension).
+    // For now we take the stride and dilation from the second and third
+    // dimensions only (we do not support striding or dilation on the batch or
+    // depth dimension).
     const int stride_rows = GetTensorDim(strides_, data_format_, 'H');
     const int stride_cols = GetTensorDim(strides_, data_format_, 'W');
 
+    const int dilation_rows = GetTensorDim(dilations_, data_format_, 'H');
+    const int dilation_cols = GetTensorDim(dilations_, data_format_, 'W');
+
     int64 out_rows = 0, out_cols = 0, pad_rows = 0, pad_cols = 0;
-    OP_REQUIRES_OK(context,
-                   GetWindowedOutputSize(input_rows, filter_rows, stride_rows,
-                                         padding_, &out_rows, &pad_rows));
-    OP_REQUIRES_OK(context,
-                   GetWindowedOutputSize(input_cols, filter_cols, stride_cols,
-                                         padding_, &out_cols, &pad_cols));
+    OP_REQUIRES_OK(context, GetWindowedOutputSizeV2(
+                                input_rows, filter_rows, dilation_rows,
+                                stride_rows, padding_, &out_rows, &pad_rows));
+    OP_REQUIRES_OK(context, GetWindowedOutputSizeV2(
+                                input_cols, filter_cols, dilation_cols,
+                                stride_cols, padding_, &out_cols, &pad_cols));
     TensorShape out_shape =
         ShapeFromFormat(data_format_, batch, out_rows, out_cols, out_depth);
 
@@ -361,6 +397,8 @@ class Conv2DOp : public BinaryOp<T> {
             << ", filter_rows = " << filter_rows
             << ", stride_rows = " << stride_rows
             << ", stride_cols = " << stride_cols
+            << ", dilation_rows = " << dilation_rows
+            << ", dilation_cols = " << dilation_cols
             << ", out_depth = " << out_depth;
 
     // If there is nothing to compute, return.
@@ -372,7 +410,8 @@ class Conv2DOp : public BinaryOp<T> {
     if (LaunchXsmmConvOp<Device, T>::Run(
             context, input, filter, batch, input_rows, input_cols, in_depth,
             filter_rows, filter_cols, pad_rows, pad_cols, out_rows, out_cols,
-            out_depth, stride_rows, stride_cols, output, data_format_)) {
+            out_depth, dilation_rows, dilation_cols, stride_rows, stride_cols,
+            output, data_format_)) {
       return;
     }
 #endif
@@ -380,15 +419,18 @@ class Conv2DOp : public BinaryOp<T> {
     if (LaunchDeepConvOp<Device, T>::Run(
             context, input, filter, batch, input_rows, input_cols, in_depth,
             filter_rows, filter_cols, pad_rows, pad_cols, out_rows, out_cols,
-            out_depth, stride_rows, stride_cols, output, data_format_)) {
+            out_depth, dilation_rows, dilation_cols, stride_rows, stride_cols,
+            output, data_format_)) {
       return;
     }
 
     launcher_(context, use_cudnn_, cudnn_use_autotune_, input, filter,
-              stride_rows, stride_cols, padding_, output, data_format_);
+              dilation_rows, dilation_cols, stride_rows, stride_cols, padding_,
+              output, data_format_);
   }
 
  private:
+  std::vector<int32> dilations_;
   std::vector<int32> strides_;
   bool use_cudnn_;
   Padding padding_;
@@ -443,9 +485,9 @@ typedef AutoTuneSingleton<ConvAutoTuneGroup, ConvParameters,
 template <typename T>
 void LaunchConv2DOp<GPUDevice, T>::operator()(
     OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune,
-    const Tensor& input_param, const Tensor& filter, int row_stride,
-    int col_stride, const Padding& padding, Tensor* output,
-    TensorFormat data_format) {
+    const Tensor& input_param, const Tensor& filter, int row_dilation,
+    int col_dilation, int row_stride, int col_stride, const Padding& padding,
+    Tensor* output, TensorFormat data_format) {
   using perftools::gputools::dnn::AlgorithmConfig;
   using perftools::gputools::dnn::AlgorithmDesc;
   using perftools::gputools::dnn::ProfileResult;
@@ -461,8 +503,9 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
 
   Tensor input = input_param;
 
-  if (filter.dim_size(0) == 1 && filter.dim_size(1) == 1 && row_stride == 1 &&
-      col_stride == 1 && data_format == FORMAT_NHWC) {
+  if (filter.dim_size(0) == 1 && filter.dim_size(1) == 1 && row_dilation == 1 &&
+      col_dilation == 1 && row_stride == 1 && col_stride == 1 &&
+      data_format == FORMAT_NHWC) {
     // 1x1 filter, so call cublas directly.
     const uint64 m = input.dim_size(0) * input.dim_size(1) * input.dim_size(2);
     const uint64 k = filter.dim_size(2);
@@ -487,7 +530,8 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
     }
     return;
   } else if (filter.dim_size(0) == input.dim_size(1) &&
-             filter.dim_size(1) == input.dim_size(2) && padding == VALID &&
+             filter.dim_size(1) == input.dim_size(2) && row_dilation == 1 &&
+             col_dilation == 1 && padding == VALID &&
              data_format == FORMAT_NHWC) {
     // The input data and filter have the same height/width, so call cublas
     // directly.
@@ -530,17 +574,19 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
   const int64 patch_cols = filter.dim_size(1);
   if (padding == SAME) {
     // Total padding on rows and cols is
-    // Pr = (R' - 1) * S + Kr - R
-    // Pc = (C' - 1) * S + Kc - C
+    // Pr = (R' - 1) * S + (Kr - 1) * Dr + 1 - R
+    // Pc = (C' - 1) * S + (Kc - 1) * Dc + 1 - C
     // where (R', C') are output dimensions, (R, C) are input dimensions, S
-    // is stride, (Kr, Kc) are filter dimensions.
+    // is stride, (Dr, Dc) are dilations, (Kr, Kc) are filter dimensions.
     // We pad Pr/2 on the left and Pr - Pr/2 on the right, Pc/2 on the top
     // and Pc - Pc/2 on the bottom.  When Pr or Pc is odd, this means
     // we pad more on the right and bottom than on the top and left.
     padding_rows =
-        std::max<int>(0, (out_rows - 1) * row_stride + patch_rows - in_rows);
+        std::max<int>(0, (out_rows - 1) * row_stride +
+                             (patch_rows - 1) * row_dilation + 1 - in_rows);
     padding_cols =
-        std::max<int>(0, (out_cols - 1) * col_stride + patch_cols - in_cols);
+        std::max<int>(0, (out_cols - 1) * col_stride +
+                             (patch_cols - 1) * col_dilation + 1 - in_cols);
     const bool rows_odd = (padding_rows % 2 != 0);
     const bool cols_odd = (padding_cols % 2 != 0);
     if (rows_odd || cols_odd) {
@@ -605,7 +651,9 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
       .set_input_feature_map_count(filter.dim_size(2))
       .set_output_feature_map_count(filter.dim_size(3));
   perftools::gputools::dnn::ConvolutionDescriptor conv_desc;
-  conv_desc.set_vertical_filter_stride(row_stride)
+  conv_desc.set_vertical_dilation_rate(row_dilation)
+      .set_horizontal_dilation_rate(col_dilation)
+      .set_vertical_filter_stride(row_stride)
       .set_horizontal_filter_stride(col_stride)
       .set_zero_padding_height(padding_rows / 2)
       .set_zero_padding_width(padding_cols / 2);
@@ -652,6 +700,8 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
       out_depths,        // out_depths
       {{patch_rows,      // filter_rows
         patch_cols}},    // filter_cols
+      {{row_dilation,    // dilation_rows
+        col_dilation}},  // dilation_cols
       {{row_stride,      // stride_rows
         col_stride}},    // stride_cols
       {{padding_rows,    // padding_rows
diff --git a/tensorflow/core/kernels/conv_ops.h b/tensorflow/core/kernels/conv_ops.h
index e29271dff278afbc1ff2c947c161824615640b66..09a3b78776c8bf114ccd42866bc7aded92c463b5 100644
--- a/tensorflow/core/kernels/conv_ops.h
+++ b/tensorflow/core/kernels/conv_ops.h
@@ -34,8 +34,9 @@ class OpKernelContext;
 template <typename Device, typename T>
 struct LaunchConv2DOp {
   void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune,
-                  const Tensor& input, const Tensor& filter, int row_stride,
-                  int col_stride, const Padding& padding, Tensor* output,
+                  const Tensor& input, const Tensor& filter, int row_dilation,
+                  int col_dilation, int row_stride, int col_stride,
+                  const Padding& padding, Tensor* output,
                   TensorFormat data_format);
 };
 
@@ -43,8 +44,9 @@ struct LaunchConv2DOp {
 template <typename T>
 struct LaunchConv2DOp<Eigen::GpuDevice, T> {
   void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune,
-                  const Tensor& input, const Tensor& filter, int row_stride,
-                  int col_stride, const Padding& padding, Tensor* output,
+                  const Tensor& input, const Tensor& filter, int row_dilation,
+                  int col_dilation, int row_stride, int col_stride,
+                  const Padding& padding, Tensor* output,
                   TensorFormat data_format);
 };
 #endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc
index 37cb67bc51112d42feaca25c37b3939775b66888..21c84b2a0ed15eaada88e308e1761dcb58cb07b3 100644
--- a/tensorflow/core/kernels/conv_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_ops_3d.cc
@@ -377,6 +377,9 @@ struct LaunchConvOp<GPUDevice, T> {
         {{in_planes, in_rows, in_cols}},
         out_depth,
         {{filter_planes, filter_rows, filter_cols}},
+        // TODO(yangzihao): Send in arbitrary dilation rates after the dilated
+        // conv is supported.
+        /*dilation=*/{{1, 1, 1}},
         {{strides[0], strides[1], strides[2]}},
         {{pad_planes, pad_rows, pad_cols}},
         dtype,
diff --git a/tensorflow/core/kernels/conv_ops_gpu.h b/tensorflow/core/kernels/conv_ops_gpu.h
index c852dc9991c2e879c8fa6a64b2bd8b5141606409..57e196c67cf067bc716d8253f05fc759eaeeba8d 100644
--- a/tensorflow/core/kernels/conv_ops_gpu.h
+++ b/tensorflow/core/kernels/conv_ops_gpu.h
@@ -91,13 +91,14 @@ class ConvParameters {
   using SpatialArray = gtl::InlinedVector<int64, 3>;
   ConvParameters(int64 batch, int64 in_depths, const SpatialArray& in,
                  int64 out_depths, const SpatialArray& filter,
-                 const SpatialArray& stride, const SpatialArray& padding,
-                 DataType dtype, int device_id)
+                 const SpatialArray& dilation, const SpatialArray& stride,
+                 const SpatialArray& padding, DataType dtype, int device_id)
       : batch_(batch),
         in_depths_(in_depths),
         out_depths_(out_depths),
         in_(in),
         filter_(filter),
+        dilation_(dilation),
         stride_(stride),
         padding_(padding),
         dtype_(dtype),
@@ -107,6 +108,7 @@ class ConvParameters {
     for (int64 val : in) hash_code_ = Hash64Combine(hash_code_, val);
     hash_code_ = Hash64Combine(hash_code_, out_depths);
     for (int64 val : filter) hash_code_ = Hash64Combine(hash_code_, val);
+    for (int64 val : dilation) hash_code_ = Hash64Combine(hash_code_, val);
     for (int64 val : stride) hash_code_ = Hash64Combine(hash_code_, val);
     for (int64 val : padding) hash_code_ = Hash64Combine(hash_code_, val);
     hash_code_ = Hash64Combine(hash_code_, dtype);
@@ -128,6 +130,7 @@ class ConvParameters {
         "(", str_util::Join(in_, ", "), "), ",
         out_depths_, ", ",
         "(", str_util::Join(filter_, ", "), "), ",
+        "(", str_util::Join(dilation_, ", "), "), ",
         "(", str_util::Join(stride_, ", "), "), ",
         "(", str_util::Join(padding_, ", "), "), ",
         dtype_, ", ",
@@ -143,7 +146,7 @@ class ConvParameters {
     int64 total_size = 16 * std::ceil(batch_ / 16.0) *
                        std::max(in_depths_, out_depths_) * in_[0] * in_[1] *
                        sizeof(T);
-    int64 threshold = 1L << 31;
+    int64 threshold = 1LL << 31;
     if (total_size >= threshold) {
       return false;
     } else {
@@ -154,11 +157,11 @@ class ConvParameters {
  protected:
   using ParameterDataType =
       std::tuple<int64, int64, SpatialArray, int64, SpatialArray, SpatialArray,
-                 SpatialArray, DataType, int>;
+                 SpatialArray, SpatialArray, DataType, int>;
 
   ParameterDataType get_data_as_tuple() const {
     return std::make_tuple(batch_, in_depths_, in_, out_depths_, filter_,
-                           stride_, padding_, dtype_, device_id_);
+                           dilation_, stride_, padding_, dtype_, device_id_);
   }
 
   uint64 hash_code_;
@@ -169,6 +172,7 @@ class ConvParameters {
   int64 out_depths_;
   SpatialArray in_;
   SpatialArray filter_;
+  SpatialArray dilation_;
   SpatialArray stride_;
   SpatialArray padding_;
   DataType dtype_;
diff --git a/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc b/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc
index 9a00a091bd29ca6bb3150c65e24833d6d99b2ffd..af6013c9747a717b95138c960abcdcc96f4dac73 100644
--- a/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc
+++ b/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc
@@ -19,12 +19,15 @@ limitations under the License.
 
 #include <algorithm>
 #include <array>
+#include <limits>
+#include <utility>
 
 #include "cuda/include/cuda.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/kernels/conv_2d.h"
 #include "tensorflow/core/util/cuda_kernel_helper.h"
 #include "tensorflow/core/util/tensor_format.h"
+#include "tensorflow/core/lib/math/math_util.h"
 
 namespace tensorflow {
 
@@ -223,185 +226,136 @@ __global__ void SwapDimension1And2InTensor3Simple(int nthreads, const T* input,
 // Use shared memory tiles to swap dimension-1 and dimension-2 of a 3D tensor,
 // where dimensions are zero-based: output[i][j][k] = input[i][k][j].
 //
-// Each thread block operates on a single tile, a square of dimensions TileSize
-// x TileSize.  We require that the thread block's X dimension equals TileSize,
-// and its Y dimension equals NumSubTiles.
+// Each thread block operates on a single tile, a rectangle of dimensions
+// TileSizeI x TileSizeJ.
 //
-// For best performance, you should probably set TileSize equal to the number of
-// threads in a warp (32 in nvidia GPUs).  With a TileSize of 32, NumSubTiles ==
-// 4 or 8 seems to get the best performance on K40 GPUs.
-template <typename T, int TileSize, int NumSubTiles, bool conjugate = false>
-__global__ void SwapDimension1And2InTensor3UsingTiles(const T* input,
-                                                      Dimension<3> input_dims,
-                                                      T* output) {
-  // One extra line in the inner dimension to avoid share memory bank conflict.
-  __shared__ T shared_memory_tile[TileSize][TileSize + 1];
-
-  static_assert(TileSize % NumSubTiles == 0,
-                "TileSize must be divisible by NumSubTiles");
-  eigen_assert(blockDim.x == TileSize);
-  eigen_assert(blockDim.y == NumSubTiles);
+// In general, for best performance, you should probably set TileSizeI,
+// TileSizeJ equal to the number of threads in a warp (32 in nvidia GPUs).
+// With a TileSizeI, TileSizeJ of 32, NumThreads of 128 or 256 seems to get
+// the best performance on K40 GPUs.
+template <typename T, int NumThreads, int TileSizeI, int TileSizeJ,
+          bool conjugate = false>
+__global__ void SwapDimension1And2InTensor3UsingTiles(
+    const T* __restrict__ input, Dimension<3> input_dims,
+    T* __restrict__ output) {
+  eigen_assert(blockDim.x == NumThreads);
+  eigen_assert(blockDim.y == 1);
   eigen_assert(blockDim.z == 1);
   eigen_assert(gridDim.y == 1);
   eigen_assert(gridDim.z == 1);
 
-  // We break down the tile into NumSubTiles groups, so each thread processes
-  // kSubTileSize elements (except at the edges of the input).
-  const int kSubTileSize = TileSize / NumSubTiles;
+  constexpr int ReadRowPerPass = NumThreads / TileSizeJ;
+  constexpr int WriteRowPerPass = NumThreads / TileSizeI;
+  // One extra line in the inner dimension to avoid share memory bank conflict.
+  __shared__ T shared_memory_tile[TileSizeI][TileSizeJ + 1];
 
   int x = threadIdx.x;
 
   Dimension<3> output_dims = {
-      input_dims[0],
-      input_dims[2],
-      input_dims[1],
+      input_dims[0], input_dims[2], input_dims[1],
   };
 
   Dimension<3> input_dims_in_tiles = {
-      input_dims[0],
-      (input_dims[1] + TileSize - 1) / TileSize,
-      (input_dims[2] + TileSize - 1) / TileSize,
+      input_dims[0], (input_dims[1] + TileSizeI - 1) / TileSizeI,
+      (input_dims[2] + TileSizeJ - 1) / TileSizeJ,
   };
 
   Index<3> input_tile_index =
       FlatToTensorIndex(blockIdx.x, input_dims_in_tiles);
 
   Index<3> input_tile_origin = {
-      input_tile_index[0],
-      input_tile_index[1] * TileSize,
-      input_tile_index[2] * TileSize,
+      input_tile_index[0], input_tile_index[1] * TileSizeI,
+      input_tile_index[2] * TileSizeJ,
   };
 
   int input_origin_flat_index =
       TensorIndexToFlat(input_tile_origin, input_dims);
 
-  int tile_width = TileSize;
+  bool full_tile = true;
+  int tile_width = TileSizeJ;
+
   // Only the last row or column may not have the full size.
   if (input_tile_index[2] == input_dims_in_tiles[2] - 1) {
-    tile_width = input_dims[2] - (input_dims_in_tiles[2] - 1) * TileSize;
+    tile_width = input_dims[2] - (input_dims_in_tiles[2] - 1) * TileSizeJ;
+    full_tile &= false;
   }
-  int tile_height = TileSize;
+
+  int tile_height = TileSizeI;
+
   if (input_tile_index[1] == input_dims_in_tiles[1] - 1) {
-    tile_height = input_dims[1] - (input_dims_in_tiles[1] - 1) * TileSize;
+    tile_height = input_dims[1] - (input_dims_in_tiles[1] - 1) * TileSizeI;
+    full_tile &= false;
   }
 
-  int input_flat_index = input_origin_flat_index + x;
-  int y_start = static_cast<int>(threadIdx.y) * kSubTileSize;
-
-  // Load the data from input memory to the shared memory tile.
-  if (x < tile_width) {
-    int y_end = min(y_start + kSubTileSize, tile_height);
-    for (int y = y_start; y < y_end; y++) {
-      shared_memory_tile[y][x] = maybe_conj<T, conjugate>::run(
-          input[input_flat_index + y * input_dims[2]]);
+  // Calculate effective thread number. This ensures that we use the largest
+  // number of threads available to form a regular thread block with no
+  // trailing incomplete lines.
+  constexpr int in_effective_thread_num = NumThreads / TileSizeJ * TileSizeJ;
+
+  if (x < in_effective_thread_num) {
+    // Orient the logical thread block with respect to the input array.
+    // ie. align the contiguous dimension of thread blocks with the contiguous
+    // dimension of the input array.
+    int ti = x / TileSizeJ;
+    int tj = x % TileSizeJ;
+    int input_index = input_origin_flat_index + ti * input_dims[2] + tj;
+    int input_increment = ReadRowPerPass * input_dims[2];
+
+    if (full_tile) {
+#pragma unroll
+      for (int i_loc = ti; i_loc < (TileSizeI); i_loc += ReadRowPerPass) {
+        shared_memory_tile[i_loc][tj] =
+            maybe_conj<T, conjugate>::run(input[input_index]);
+        input_index += input_increment;
+      }
+    } else {
+      if (tj < tile_width) {
+        for (int i_loc = ti; i_loc < (tile_height); i_loc += ReadRowPerPass) {
+          shared_memory_tile[i_loc][tj] =
+              maybe_conj<T, conjugate>::run(input[input_index]);
+          input_index += input_increment;
+        }
+      }
     }
   }
 
   __syncthreads();
 
   Index<3> output_tile_index = {
-      input_tile_index[0],
-      input_tile_index[2],
-      input_tile_index[1],
+      input_tile_index[0], input_tile_index[2], input_tile_index[1],
   };
 
   Index<3> output_tile_origin = {
-      output_tile_index[0],
-      output_tile_index[1] * TileSize,
-      output_tile_index[2] * TileSize,
+      output_tile_index[0], output_tile_index[1] * TileSizeJ,
+      output_tile_index[2] * TileSizeI,
   };
 
   int output_origin_flat_index =
       TensorIndexToFlat(output_tile_origin, output_dims);
 
-  int output_flat_index = output_origin_flat_index + x;
-
-  // Load the data from the shared memory tile to the output memory.
-  if (x < tile_height) {
-    int y_end = min(y_start + kSubTileSize, tile_width);
-    for (int y = y_start; y < y_end; y++) {
-      output[output_flat_index + y * output_dims[2]] = shared_memory_tile[x][y];
-    }
-  }
-}
-
-// Use shared memory tiles to swap dimension-1 and dimension-2 of a 3D tensor
-// when only one of the dimension sizes is smaller than 16,
-// where dimensions are zero-based: output[i][j][k] = input[i][k][j].
-//
-// small_dim = the_smaller_dimension_size
-// large_dim = the_larger_dimension_size
-// tile_num_per_block = blockDim.x
-// kTileLength = small_dim
-//
-// Each thread block operates on a single rectangle tile, where its width is
-// kTileLength (we currently set it to 64) and its height is small_dim,
-// We set the thread block's X dimension to be tile_num_per_block, and its Y
-// and Z to be one.
-template <typename T, int ShmemSize, bool SmallDim2, bool conjugate = false>
-__global__ void SwapDimension1And2InTensor3SmallDim(const T* input,
-                                                    int batch_per_block,
-                                                    Dimension<3> input_dims,
-                                                    T* output) {
-  // TODO(yangzihao) avoid share memory bank conflict.
-  __shared__ T shared_memory_tile[ShmemSize];
-
-  eigen_assert(blockDim.y == 1);
-  eigen_assert(blockDim.z == 1);
-  eigen_assert(gridDim.z == 1);
-
-  int block_offset = blockIdx.x * blockDim.x;
-
-  int x = threadIdx.x;
-  int tile_height = blockDim.x;
-
-  // Get tile height, width, and thread/block origin indices.
-  int small_dim = SmallDim2 ? input_dims[2] : input_dims[1];
-  int large_dim = SmallDim2 ? input_dims[1] : input_dims[2];
-
-  int global_offset = small_dim * large_dim * (blockIdx.y * batch_per_block) +
-                      (SmallDim2 ? block_offset * small_dim : block_offset);
-  if (global_offset >= (input_dims[0] * input_dims[1] * input_dims[2])) return;
-
-  for (int batch = 0; batch < batch_per_block; ++batch) {
-    int block_origin_idx =
-        small_dim * large_dim * (blockIdx.y * batch_per_block + batch);
-    int thread_origin_idx =
-        block_origin_idx +
-        (SmallDim2 ? block_offset * small_dim : block_offset) + x;
-
-    if (block_offset + blockDim.x > large_dim) {
-      tile_height = large_dim - block_offset;
-    }
-
-    __syncthreads();
-
-    // Load a continuous memory region to shared memory tile.
-    if (x < tile_height) {
-      for (int y = 0; y < small_dim; y++) {
-        int shmem_index =
-            SmallDim2 ? (x + y * tile_height) : (x * small_dim + y);
-        shared_memory_tile[shmem_index] = maybe_conj<T, conjugate>::run(
-            ldg(input + thread_origin_idx +
-                y * (SmallDim2 ? tile_height : large_dim)));
+  constexpr int out_effective_thread_num = NumThreads / TileSizeI * TileSizeI;
+
+  if (x < out_effective_thread_num) {
+    // Re-orient the logical thread block with respect to the output array.
+    // ie. align the contiguous dimension of thread blocks with contiguous
+    // dimension of the output array.
+    int ti = x / TileSizeI;
+    int tj = x % TileSizeI;
+    int output_index = output_origin_flat_index + ti * output_dims[2] + tj;
+    int output_increment = WriteRowPerPass * output_dims[2];
+
+    if (full_tile) {
+#pragma unroll
+      for (int i_loc = ti; i_loc < (TileSizeJ); i_loc += WriteRowPerPass) {
+        output[output_index] = shared_memory_tile[tj][i_loc];
+        output_index += output_increment;
       }
-    }
-
-    __syncthreads();
-
-    // Get block origin index for output array.
-    int output_block_offset = block_origin_idx;
-    int output_block_idx = SmallDim2 ? block_offset : block_offset * small_dim;
-    int output_block_origin_idx = output_block_offset + output_block_idx;
-
-    // Store the transposed memory region in shared memory to device.
-    if (x < tile_height) {
-      for (int y = 0; y < small_dim; y++) {
-        int output_idx = output_block_origin_idx + x +
-                         y * (SmallDim2 ? large_dim : tile_height);
-        int shmem_index =
-            SmallDim2 ? (x * small_dim + y) : (x + y * tile_height);
-        output[output_idx] = shared_memory_tile[shmem_index];
+    } else {
+      if (tj < tile_height) {
+        for (int i_loc = ti; i_loc < (tile_width); i_loc += WriteRowPerPass) {
+          output[output_index] = shared_memory_tile[tj][i_loc];
+          output_index += output_increment;
+        }
       }
     }
   }
@@ -548,6 +502,382 @@ struct PadInput<GPUDevice, T, int, NDIMS> {
   }
 };
 
+// We want std::equal_to and std::greater, but they're not constexpr until
+// C++14.
+struct EqualTo {
+  constexpr bool operator()(int a, int b) const { return a == b; }
+};
+
+struct GreaterThan {
+  constexpr bool operator()(int a, int b) const { return a > b; }
+};
+
+// For each data type, the tile size possibility frontier denotes the tile size
+// combinations that consume the most computational resources constrained by
+// - number of threads per SM limit,
+// - limit on size of the short dimension (<=15) due to the definition of
+//   narrow matrix,
+// - shared memory limit and
+// - some experimentally determined, type-specific constraint on the product of
+//   two side lengths to increase grid-level parallelism.
+//
+// A tile size combination lies on the frontier if and only if one or more
+// constraint mentioned above is hit. Tile size combinations lying outside this
+// frontier are either not possible, or are slower than the alternatives.
+//
+// It is instrumental to consider, for each data type, two subsets of the
+// corresponding frontier:
+// - long side frontier: the union of the biggest tile size combination for
+//   each legal long side len.
+// - non long side frontier: the frontier set minus the long side frontier.
+//
+// TileSizePossibilityFrontierCheck defines the frontier using only the long
+// side frontier tile size combinations (since one can easily extrapolate
+// the entire frontier from this subset). It serves as a utility function
+// to help us determine where a tile size combination of interest lies with
+// resepect to the frontier.
+template <typename Op>
+constexpr bool TileSizePossibilityFrontierCheck(int TileLongSide,
+                                                int TileShortSide,
+                                                int size_of_t, Op op) {
+  // clang-format off
+
+  return (size_of_t == 16 && ((TileLongSide == 32   && op(TileShortSide, 4))  ||
+                             (TileLongSide == 64   && op(TileShortSide, 4))  ||
+                             (TileLongSide == 128  && op(TileShortSide, 4))  ||
+                             (TileLongSide == 256  && op(TileShortSide, 2)))) ||
+          (size_of_t == 8 && ((TileLongSide == 32   && op(TileShortSide, 15)) ||
+                             (TileLongSide == 64   && op(TileShortSide, 15)) ||
+                             (TileLongSide == 128  && op(TileShortSide, 8))  ||
+                             (TileLongSide == 256  && op(TileShortSide, 4))  ||
+                             (TileLongSide == 512  && op(TileShortSide, 2)))) ||
+          (size_of_t == 4 && ((TileLongSide == 32   && op(TileShortSide, 15)) ||
+                             (TileLongSide == 64   && op(TileShortSide, 15)) ||
+                             (TileLongSide == 128  && op(TileShortSide, 15)) ||
+                             (TileLongSide == 256  && op(TileShortSide, 8))  ||
+                             (TileLongSide == 512  && op(TileShortSide, 4))  ||
+                             (TileLongSide == 1024 && op(TileShortSide, 2)))) ||
+          (size_of_t == 2 && ((TileLongSide == 32   && op(TileShortSide, 15)) ||
+                             (TileLongSide == 64   && op(TileShortSide, 15)) ||
+                             (TileLongSide == 128  && op(TileShortSide, 15)) ||
+                             (TileLongSide == 256  && op(TileShortSide, 8))  ||
+                             (TileLongSide == 512  && op(TileShortSide, 4))  ||
+                             (TileLongSide == 1024 && op(TileShortSide, 2)))) ||
+          (size_of_t == 1 && ((TileLongSide == 32   && op(TileShortSide, 15)) ||
+                             (TileLongSide == 64   && op(TileShortSide, 15)) ||
+                             (TileLongSide == 128  && op(TileShortSide, 15)) ||
+                             (TileLongSide == 256  && op(TileShortSide, 8))  ||
+                             (TileLongSide == 512  && op(TileShortSide, 4))  ||
+                             (TileLongSide == 1024 && op(TileShortSide, 2))));
+
+  // clang-format on
+}
+
+constexpr bool TileSizeOnLongSideFrontier(int TileLongSide, int TileShortSide,
+                                          int size_of_t) {
+  return TileSizePossibilityFrontierCheck(TileLongSide, TileShortSide,
+                                          size_of_t, EqualTo());
+}
+constexpr bool TileSizeOutsideFrontier(int TileLongSide, int TileShortSide,
+                                       int size_of_t) {
+  return TileSizePossibilityFrontierCheck(TileLongSide, TileShortSide,
+                                          size_of_t, GreaterThan());
+}
+constexpr bool TileSizeOnNonLongSideFrontier(int TileLongSide,
+                                             int TileShortSide, int size_of_t) {
+  // For a tile size combination (longside, shortside), lying on the frontier
+  // implies that (longside, shortside) is on or within the frontier but
+  // (longside*2, shortside) or (longside, shortside+1) is not. With the above
+  // critereon, we simply need to use !TileSizeOnLongSideFrontier to ensure that
+  // it is not on the long side frontier.
+  return !TileSizeOutsideFrontier(TileLongSide, TileShortSide, size_of_t) &&
+         (TileSizeOutsideFrontier(TileLongSide * 2, TileShortSide, size_of_t) ||
+          TileSizeOutsideFrontier(TileLongSide, TileShortSide + 1,
+                                  size_of_t)) &&
+         !TileSizeOnLongSideFrontier(TileLongSide, TileShortSide, size_of_t);
+}
+
+// Helper function to launch a batch narrow matirx transpose kernel.
+template <typename T, int TileLongSide, int TileShortSide>
+void LaunchBatchNarrowMatrixTransposeKernel(
+    const GPUDevice& d, int tile_size_i, int tile_size_j, int total_tiles_count,
+    const T* input, const Dimension<3>& input_dims, T* output) {
+  constexpr int NumThreads = TileLongSide;
+  if (tile_size_i <= TileLongSide && tile_size_j <= TileShortSide) {
+    SwapDimension1And2InTensor3UsingTiles<T, NumThreads, TileLongSide,
+                                          TileShortSide>
+        <<<total_tiles_count, NumThreads, 0, d.stream()>>>(input, input_dims,
+                                                           output);
+  } else {
+    SwapDimension1And2InTensor3UsingTiles<T, NumThreads, TileShortSide,
+                                          TileLongSide>
+        <<<total_tiles_count, NumThreads, 0, d.stream()>>>(input, input_dims,
+                                                           output);
+  }
+}
+
+// Recursive template function to search, in a trial-and-error manner, for the
+// minimum tile size configuration satisfying the requested tile side lengths.
+// An important invariant of this search procedure is that for an unsatisfied
+// request, we always try doubling the long side len first, and only after
+// the request is satisfied for the long side len do we begin incrementing
+// the short side len.
+//
+// We have three specializations of this search function depending on where the
+// current tile size combination lies with respect to the frontier.
+// - It lies within the frontier. If request is not satisfied, for the next tile
+// size combination, we first try doubling the long side len and if that does
+// not work, we then increment the short side len.
+// - It lies on the non long side frontier. If the request is not satisfied, we
+// can only increment the short side len.
+// - It lies on the long side frontier. We launch the kernel without checking if
+// the request is satisfied or not.
+template <typename T, int TileLongSide, int TileShortSide,
+          typename dummy = void>
+struct BatchNarrowMatrixTransposeDispatcher {
+  static void DoIt(const GPUDevice& d, int tile_size_i, int tile_size_j,
+                   int total_tiles_count, const T* input,
+                   const Dimension<3>& input_dims, T* output) {
+    static_assert(
+        (TileLongSide & (TileLongSide - 1)) == 0,
+        "The length of the longer side of the tile is always a power of 2.");
+    bool request_satisfied = max(tile_size_i, tile_size_j) <= TileLongSide &&
+                             min(tile_size_i, tile_size_j) <= TileShortSide;
+
+    if (request_satisfied) {
+      LaunchBatchNarrowMatrixTransposeKernel<T, TileLongSide, TileShortSide>(
+          d, tile_size_i, tile_size_j, total_tiles_count, input, input_dims,
+          output);
+      return;
+    }
+
+    // If the execution reaches here, then the kernel was not launched; we then
+    // determine whether it is the long side or the short side that falls short
+    // of the request and increase that parameter accordingly.
+    const bool long_side_request_not_satisfied =
+        max(tile_size_i, tile_size_j) > TileLongSide;
+
+    if (long_side_request_not_satisfied) {
+      BatchNarrowMatrixTransposeDispatcher<
+          T, TileLongSide * 2, TileShortSide>::DoIt(d, tile_size_i, tile_size_j,
+                                                    total_tiles_count, input,
+                                                    input_dims, output);
+    } else {
+      BatchNarrowMatrixTransposeDispatcher<
+          T, TileLongSide, TileShortSide + 1>::DoIt(d, tile_size_i, tile_size_j,
+                                                    total_tiles_count, input,
+                                                    input_dims, output);
+    }
+  }
+};
+
+template <typename T, int TileLongSide, int TileShortSide>
+struct BatchNarrowMatrixTransposeDispatcher<
+    T, TileLongSide, TileShortSide,
+    typename std::enable_if<TileSizeOnNonLongSideFrontier(
+                                TileLongSide, TileShortSide, sizeof(T)),
+                            void>::type> {
+  static void DoIt(const GPUDevice& d, int tile_size_i, int tile_size_j,
+                   int total_tiles_count, const T* input,
+                   const Dimension<3>& input_dims, T* output) {
+    static_assert(
+        (TileLongSide & (TileLongSide - 1)) == 0,
+        "The length of the longer side of the tile is always a power of 2.");
+    bool request_satisfied = max(tile_size_i, tile_size_j) <= TileLongSide &&
+                             min(tile_size_i, tile_size_j) <= TileShortSide;
+
+    if (request_satisfied) {
+      LaunchBatchNarrowMatrixTransposeKernel<T, TileLongSide, TileShortSide>(
+          d, tile_size_i, tile_size_j, total_tiles_count, input, input_dims,
+          output);
+      return;
+    }
+
+    // If the execution reaches here, then the kernel was not launched; since
+    // we are on the non long side frontier, we increment the short dimension
+    // and try again.
+    BatchNarrowMatrixTransposeDispatcher<
+        T, TileLongSide, TileShortSide + 1>::DoIt(d, tile_size_i, tile_size_j,
+                                                  total_tiles_count, input,
+                                                  input_dims, output);
+  }
+};
+
+template <typename T, int TileLongSide, int TileShortSide>
+struct BatchNarrowMatrixTransposeDispatcher<
+    T, TileLongSide, TileShortSide,
+    typename std::enable_if<TileSizeOnLongSideFrontier(
+                                TileLongSide, TileShortSide, sizeof(T)),
+                            void>::type> {
+  static void DoIt(const GPUDevice& d, int tile_size_i, int tile_size_j,
+                   int total_tiles_count, const T* input,
+                   const Dimension<3>& input_dims, T* output) {
+    static_assert(
+        (TileLongSide & (TileLongSide - 1)) == 0,
+        "The length of the longer side of the tile is always a power of 2.");
+
+    LaunchBatchNarrowMatrixTransposeKernel<T, TileLongSide, TileShortSide>(
+        d, tile_size_i, tile_size_j, total_tiles_count, input, input_dims,
+        output);
+  }
+};
+
+// This function tries to recover, in a brute force way, the frontier defined in
+// TileSizePossibilityFrontierCheck as a vector of tile size combinations lying
+// on the long side frontier. This vector is sufficient to determine the entire
+// frontier.
+//
+// Note that if one changes the frontier definition in
+// TileSizePossibilityFrontierCheck and forgets to set the largest short
+// side len of the largest legal long side len to 2, this function will fail
+// and crash the program.
+template <int SizeOfT>
+const std::vector<std::pair<int, int>>& GetTileSizesFrontier() {
+  static_assert(
+      SizeOfT <= 16,
+      "Currently, only data types of sizes 16 bytes or less are supported.");
+  static_assert((SizeOfT & (SizeOfT - 1)) == 0,
+                "Data types must have sizes that are powers of 2.");
+
+  // Expensive work to populate sizes, lazily run in a thread-safe
+  // manner the first time GetTileSizesFrontier<N> is called.
+  static auto* frontier = [] {
+    auto* frontier = new std::vector<std::pair<int, int>>();
+    const int kMaxLongSideLen = 1024;
+    const int kMaxShortSideLen = 15;
+    for (int long_side = 32; long_side <= kMaxLongSideLen; long_side *= 2) {
+      for (int short_side = 2; short_side <= kMaxShortSideLen;
+           short_side += 1) {
+        if (TileSizeOnLongSideFrontier(long_side, short_side, SizeOfT)) {
+          // The current combination lies on the frontier, thus we
+          // add it to the frontier definition.
+          frontier->push_back(std::make_pair(long_side, short_side));
+
+          // The long side length is the largest one allowed iff its
+          // corresponding short side length is 2.
+          if (short_side == 2) return frontier;
+
+          // We have exhausted all the possibilities in the frontier
+          // with the given long side length.
+          break;
+        }
+      }
+    }
+    LOG(FATAL)
+        << "The corresponding short side length of the largest long side "
+           "length has to be 2.";
+  }();
+  return *frontier;
+}
+
+// Helper structs to help determine which data type to use given the size of
+// the matrix data type. A transpose of elements of size N will use a kernel
+// which operates on an array of TransposeElemType<N>::type.
+template <int ElemBytes>
+struct TransposeElemType;
+template <>
+struct TransposeElemType<1> {
+  using type = uint8;
+};
+template <>
+struct TransposeElemType<2> {
+  using type = uint16;
+};
+template <>
+struct TransposeElemType<4> {
+  using type = uint32;
+};
+template <>
+struct TransposeElemType<8> {
+  using type = uint64;
+};
+template <>
+struct TransposeElemType<16> {
+  using type = float4;
+};
+
+// A helper function to make RunSwapDimension1And2InTensor3 concise. This
+// helper function looks at the data type and input matrix sizes and decides
+// the thread numbers and tile sizes to use.
+template <typename T, bool conjugate = false >
+void SwapDimension1And2InTensor3WithNarrowMatrices(
+    const GPUDevice& d, const T* input, const Dimension<3>& input_dims,
+    T* output, const int kMinDimensionToUseTiles) {
+  // Get available tile sizes here for the data type requested:
+  const auto& tile_spec = GetTileSizesFrontier<sizeof(T)>();
+
+  int tile_long_side_len = 0;
+  int tile_short_side_len = 0;
+  float lowest_cost = std::numeric_limits<float>::max();
+  int data_long_side = max(input_dims[1], input_dims[2]);
+
+  for (auto tile_size_pair : tile_spec) {
+    int proposed_tile_long_side_len = tile_size_pair.first;
+
+    // Number of threads that will not be doing anything useful when reading
+    // the matrix because the thread block size is bigger than the data block
+    // size.
+    int num_wasted_threads =
+        data_long_side - MathUtil::FloorOfRatio<int>(
+                             data_long_side, proposed_tile_long_side_len) *
+                             proposed_tile_long_side_len;
+
+    int num_full_tiles = MathUtil::FloorOfRatio<int>(
+        data_long_side, proposed_tile_long_side_len);
+
+    float cost = 0;
+
+    // However, if we can execute two or more full tiles, then we gladly
+    // accept any number of wasted threads and ignore its cost.
+    if (num_full_tiles <= 1) cost = num_wasted_threads;
+
+    // Using less than or equal to here because given the same cost, we
+    // would like to launch as many threads as possible.
+    if (cost <= lowest_cost) {
+      tile_long_side_len = proposed_tile_long_side_len;
+      tile_short_side_len = tile_size_pair.second;
+      lowest_cost = cost;
+    }
+  }
+
+  // Request tile sizes such that the longer side of threadblock aligns with
+  // the longer side of input data block to maximize read throughput.
+  // The ideal tile shape is one where the length of the shorter side of the
+  // tile is equal to the length of the shorter side of the input matrix.
+  int requested_tile_size_i = input_dims[1] >= kMinDimensionToUseTiles
+                                  ? tile_long_side_len
+                                  : input_dims[1];
+  int requested_tile_size_j = input_dims[1] >= kMinDimensionToUseTiles
+                                  ? input_dims[2]
+                                  : tile_long_side_len;
+
+  // Truncate the shorter size requested according to the manual limit set in
+  // tile_spec to make sure that we do not launch configurations violating
+  // hardware limits.
+  requested_tile_size_i = requested_tile_size_i == tile_long_side_len
+                              ? tile_long_side_len
+                              : min(requested_tile_size_i, tile_short_side_len);
+  requested_tile_size_j = requested_tile_size_j == tile_long_side_len
+                              ? tile_long_side_len
+                              : min(requested_tile_size_j, tile_short_side_len);
+
+  Dimension<3> input_dims_in_tiles = {
+      input_dims[0],
+      MathUtil::CeilOfRatio<int>(input_dims[1], requested_tile_size_i),
+      MathUtil::CeilOfRatio<int>(input_dims[2], requested_tile_size_j),
+  };
+
+  int total_tiles_count =
+      input_dims_in_tiles[0] * input_dims_in_tiles[1] * input_dims_in_tiles[2];
+
+  using ElemType = typename TransposeElemType<sizeof(T)>::type;
+  static_assert(alignof(T) >= alignof(ElemType), "Unexpected data alignment.");
+  BatchNarrowMatrixTransposeDispatcher<ElemType, 32, 2>::DoIt(
+      d, requested_tile_size_i, requested_tile_size_j, total_tiles_count,
+      reinterpret_cast<const ElemType*>(input), input_dims,
+      reinterpret_cast<ElemType*>(output));
+}
+
 // Launch the GPU kernel that would swap dimension-1 and dimension-2 in a
 // 3D tensor. It looks at the shape of the incoming data, and decides the best
 // strategy to launch.
@@ -558,60 +888,33 @@ void RunSwapDimension1And2InTensor3(const GPUDevice& d, const T* input,
   // If one dimension is trivial, use SmallDim kernel for swapping.
   // Otherwise, the trivial swapping relying on the ldg cache is more efficient.
   static const int kMinDimensionToUseTiles = 16;
-  bool use_tiles = (input_dims[1] >= kMinDimensionToUseTiles &&
-                    input_dims[2] >= kMinDimensionToUseTiles);
-  bool use_small_dim = ((input_dims[1] >= kMinDimensionToUseTiles &&
-                         input_dims[2] < kMinDimensionToUseTiles)) ||
-                       ((input_dims[1] < kMinDimensionToUseTiles &&
-                         input_dims[2] >= kMinDimensionToUseTiles));
-  static const int NumSubTiles = 8;
-
-  if (use_tiles) {
-    static const int TileSize = 32;
+  static const int kMinDimensionToUseRectTiles = 96;
+
+  bool large_matrix = input_dims[1] >= kMinDimensionToUseTiles &&
+                      input_dims[2] >= kMinDimensionToUseTiles;
+  bool narrow_matrix = input_dims[1] >= kMinDimensionToUseRectTiles ||
+                       input_dims[2] >= kMinDimensionToUseRectTiles;
+  if (large_matrix) {
+    // We get best performance when kTileSize is the number of threads in a warp
+    // (32 on our GPUs) and NumSubTiles is 8, so our block size is 8 * 32 = 256
+    // threads.
+    constexpr int kTileSize = 32;
+    constexpr int kNumThreads = 256;
+
     Dimension<3> input_dims_in_tiles = {
-        input_dims[0],
-        (input_dims[1] + TileSize - 1) / TileSize,
-        (input_dims[2] + TileSize - 1) / TileSize,
+        input_dims[0], MathUtil::CeilOfRatio<int>(input_dims[1], kTileSize),
+        MathUtil::CeilOfRatio<int>(input_dims[2], kTileSize),
     };
+
     int total_tiles_count = input_dims_in_tiles[0] * input_dims_in_tiles[1] *
                             input_dims_in_tiles[2];
-    // We get best performance when TileSize is the number of threads in a warp
-    // (32 on our GPUs) and NumSubTiles is 8, so our block size is 8 * 32 = 256
-    // threads.
-    SwapDimension1And2InTensor3UsingTiles<T, TileSize, NumSubTiles, conjugate>
-        <<<total_tiles_count, dim3(TileSize, NumSubTiles), 0, d.stream()>>>(
-            input, input_dims, output);
-  } else if (use_small_dim) {
-    // When only one of the dimensions is smaller than kMinDimensionToUseTiles,
-    // we use one block to process a rectangle region with the size of
-    // kTileLength * small_dim. We found that when set kTileLength to 64 on
-    // TitanX Maxwell GPU, it achieves the best performance.
-    //              large_dim
-    //            +---------------...--------+
-    //            |            |        |    |
-    // small_dim  |            |  ...   |    |
-    //            |            |        |    |
-    //            +--------------...---------+
-    //            \----- ------/         \- -/
-    //                  V                  V
-    //    kTileLength(tile_height)    tile_height
-    static const int kTileLength = 64;
-    static const int kGridDimY = 65535;
-    int large_dim = std::max(input_dims[2], input_dims[1]);
-    int tile_num_per_block = (large_dim + kTileLength - 1) / kTileLength;
-    int grid_dim_y = std::min(input_dims[0], kGridDimY);
-    int batch_per_block = (input_dims[0] + grid_dim_y - 1) / grid_dim_y;
-    if (input_dims[2] < input_dims[1]) {
-      SwapDimension1And2InTensor3SmallDim<
-          T, kTileLength * kMinDimensionToUseTiles, true, conjugate>
-          <<<dim3(tile_num_per_block, grid_dim_y), kTileLength, 0,
-             d.stream()>>>(input, batch_per_block, input_dims, output);
-    } else {
-      SwapDimension1And2InTensor3SmallDim<
-          T, kTileLength * kMinDimensionToUseTiles, false, conjugate>
-          <<<dim3(tile_num_per_block, grid_dim_y), kTileLength, 0,
-             d.stream()>>>(input, batch_per_block, input_dims, output);
-    }
+    SwapDimension1And2InTensor3UsingTiles<T, kNumThreads, kTileSize, kTileSize, conjugate>
+        <<<total_tiles_count, kNumThreads, 0, d.stream()>>>(input, input_dims,
+                                                            output);
+
+  } else if (narrow_matrix) {
+    SwapDimension1And2InTensor3WithNarrowMatrices<T, conjugate>(d, input, input_dims, output,
+                                                  kMinDimensionToUseTiles);
   } else {
     int total_element_count = input_dims[0] * input_dims[1] * input_dims[2];
     CudaLaunchConfig config = GetCudaLaunchConfig(total_element_count, d);
diff --git a/tensorflow/core/kernels/conv_ops_test.cc b/tensorflow/core/kernels/conv_ops_test.cc
index ea54d6cf6cbfb6f2d38ae10644fed348980ab622..666bca265c95febf3753e71bf010a7caf95c0541 100644
--- a/tensorflow/core/kernels/conv_ops_test.cc
+++ b/tensorflow/core/kernels/conv_ops_test.cc
@@ -43,6 +43,8 @@ TEST(ConvParameters, WinogradNonfusedAlgoSize) {
       128,       // out_depths
       {{3,       // filter_rows
         3}},     // filter_cols
+      {{1,       // dilation_rows
+        1}},     // dilation_cols
       {{1,       // stride_rows
         1}},     // stride_cols
       {{0,       // padding_rows
@@ -60,6 +62,8 @@ TEST(ConvParameters, WinogradNonfusedAlgoSize) {
       768,       // out_depths
       {{3,       // filter_rows
         3}},     // filter_cols
+      {{1,       // dilation_rows
+        1}},     // dilation_cols
       {{1,       // stride_rows
         1}},     // stride_cols
       {{0,       // padding_rows
diff --git a/tensorflow/core/kernels/critical_section.cc b/tensorflow/core/kernels/critical_section.cc
new file mode 100644
index 0000000000000000000000000000000000000000..30a9abf4ee78cdb336e4c25c217239daf89bae11
--- /dev/null
+++ b/tensorflow/core/kernels/critical_section.cc
@@ -0,0 +1,246 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define EIGEN_USE_THREADS
+
+#include <deque>
+#include <utility>
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/resource_mgr.h"
+#include "tensorflow/core/kernels/captured_function.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+
+class CriticalSection : public ResourceBase {
+ public:
+  explicit CriticalSection() : is_locked_(false) {}
+  ~CriticalSection() override {
+    // Wait for all closures to finish running.
+    mutex_lock lock(mu_);
+    while (!closures_.empty()) {
+      queue_empty_cv_.wait(lock);
+    }
+  }
+
+ private:
+  friend class ExecuteInCriticalSectionOp;
+
+  void Acquire(std::function<void()> closure) {
+    std::function<void()> next;
+    {
+      mutex_lock ml(mu_);
+      if (is_locked_) {
+        closures_.push_back(std::move(closure));
+      } else {
+        // This branch is the common case.  Avoid the queue.
+        is_locked_ = true;
+        next = std::move(closure);
+      }
+    }
+    if (next) {
+      next();
+    }
+  }
+
+  void Release() {
+    std::function<void()> next;
+    {
+      mutex_lock ml(mu_);
+      CHECK(is_locked_);
+      if (!closures_.empty()) {
+        // if queue is not empty, start the next entry off the queue.
+        std::swap(next, closures_.front());
+        closures_.pop_front();
+      } else {
+        is_locked_ = false;
+        queue_empty_cv_.notify_all();
+      }
+    }
+    if (next) {
+      next();
+    }
+  }
+
+  string DebugString() override {
+    tf_shared_lock ml(mu_);
+    return strings::StrCat("CriticalSection(locked: ", is_locked_,
+                           " queue_size: ", closures_.size(), ")");
+  }
+
+ private:
+  mutex mu_;
+  std::deque<std::function<void()>> closures_ GUARDED_BY(mu_);
+  bool is_locked_ GUARDED_BY(mu_);
+  condition_variable queue_empty_cv_ GUARDED_BY(mu_);
+};
+
+class ExecuteInCriticalSectionOp : public AsyncOpKernel {
+ public:
+  explicit ExecuteInCriticalSectionOp(OpKernelConstruction* c)
+      : AsyncOpKernel(c) {
+    OP_REQUIRES_OK(c, c->GetAttr("f", &func_));
+  }
+
+ public:
+  void ComputeAsync(OpKernelContext* c, DoneCallback done) override {
+    CriticalSection* critical_section = nullptr;
+    OP_REQUIRES_OK_ASYNC(c,
+                         LookupOrCreateResource<CriticalSection>(
+                             c, HandleFromInput(c, 0), &critical_section,
+                             [this, c](CriticalSection** ptr) {
+                               *ptr = new CriticalSection;
+                               return Status::OK();
+                             }),
+                         done);
+    // No need to Unref critical_section; the Closure below will take
+    // care of the Unref associated with this execution.
+
+    auto* execution = new Closure{std::move(done), c, critical_section, &func_};
+    execution->Start();
+  }
+
+ private:
+  class Closure {
+   public:
+    AsyncOpKernel::DoneCallback done_;
+    OpKernelContext* ctx_;
+    CriticalSection* cs_;
+    FunctionLibraryRuntime::Handle handle_;
+    FunctionLibraryRuntime::Options opts_;
+    std::vector<Tensor> arguments_t_;
+    std::vector<Tensor> output_t_;
+    NameAttrList* func_;
+
+    explicit Closure(AsyncOpKernel::DoneCallback done, OpKernelContext* ctx,
+                     CriticalSection* critical_section, NameAttrList* func)
+        : done_(std::move(done)),
+          ctx_(ctx),
+          cs_(critical_section),
+          handle_(-1),
+          func_(func) {}
+
+    ~Closure();
+
+    void Start() {
+      // Perform ExecuteFunction isnide a separate thread to avoid
+      // having lightweight Functions be inlined in this thread.
+      // That inlining would in turn inline DoneAndDelete inside the
+      // same thread.  Since DoneAndDelete can call the next
+      // ExecuteFunction in the CriticalSection, this can cause a
+      // stack overflow.
+      cs_->Acquire(
+          [this]() { (*ctx_->runner())([this]() { ExecuteFunction(); }); });
+    }
+
+   private:
+    void ExecuteFunction();
+    void DoneAndDelete(const Status& status);
+  };
+
+  NameAttrList func_;
+};
+
+void ExecuteInCriticalSectionOp::Closure::ExecuteFunction() {
+  // Arguments to a Function are in the order:
+  //   concat(<formal arguments>, <captured arguments>)
+  OpInputList arguments;
+  Status s = ctx_->input_list("arguments", &arguments);
+  if (!s.ok()) {
+    DoneAndDelete(s);
+    return;
+  }
+
+  arguments_t_.reserve(arguments.size());
+  for (const Tensor& t : arguments) {
+    arguments_t_.push_back(t);
+  }
+
+  auto* function_library = ctx_->function_library();
+  s = function_library->Instantiate(func_->name(), AttrSlice(&func_->attr()),
+                                    &handle_);
+  if (!s.ok()) {
+    DoneAndDelete(s);
+    return;
+  }
+
+  opts_.step_id = CapturedFunction::generate_step_id();
+  auto* step_container =
+      new ScopedStepContainer(opts_.step_id, [this](const string& name) {
+        ctx_->resource_manager()->Cleanup(name).IgnoreError();
+      });
+  opts_.cancellation_manager = ctx_->cancellation_manager();
+  opts_.step_container = step_container;
+  opts_.runner = ctx_->runner();
+
+  function_library->Run(opts_, handle_, arguments_t_, &output_t_,
+                        [this](const Status& s) { DoneAndDelete(s); });
+}
+
+void ExecuteInCriticalSectionOp::Closure::DoneAndDelete(const Status& status) {
+  cs_->Release();
+
+  if (!status.ok()) {
+    ctx_->SetStatus(status);
+  } else {
+    OpOutputList output;
+    const Status s = ctx_->output_list("outputs", &output);
+    if (!s.ok()) {
+      ctx_->SetStatus(s);
+    } else if (output_t_.size() != output.size()) {
+      ctx_->SetStatus(errors::Internal(
+          "Could not set all outputs.  Expected output size is ", output.size(),
+          " but function set ", output_t_.size(), " output values."));
+    } else {
+      for (int i = 0; i < output_t_.size(); ++i) {
+        output.set(i, output_t_[i]);
+      }
+    }
+  }
+
+  delete opts_.step_container;
+  opts_.step_container = nullptr;
+  done_();
+  cs_->Unref();
+  delete this;
+}
+
+ExecuteInCriticalSectionOp::Closure::~Closure() {
+  CHECK(!opts_.step_container)
+      << "Initialized closure destroyed without calling Done";
+}
+
+REGISTER_KERNEL_BUILDER(Name("ExecuteInCriticalSection").Device(DEVICE_CPU),
+                        ExecuteInCriticalSectionOp);
+
+REGISTER_KERNEL_BUILDER(Name("CriticalSectionOp").Device(DEVICE_CPU),
+                        ResourceHandleOp<CriticalSection>);
+
+// TODO(ebrevdo): Re-enable once the cross-device function execution works.
+#if GOOGLE_CUDA
+REGISTER_KERNEL_BUILDER(Name("ExecuteInCriticalSection")
+                            .Device(DEVICE_GPU)
+                            .HostMemory("critical_section"),
+                        ExecuteInCriticalSectionOp);
+REGISTER_KERNEL_BUILDER(
+    Name("CriticalSectionOp").Device(DEVICE_GPU).HostMemory("resource"),
+    ResourceHandleOp<CriticalSection>);
+#endif  // GOOGLE_CUDA
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cross_op.cc b/tensorflow/core/kernels/cross_op.cc
index 05a33a97b413d6e6484eed50532f359e22af017c..b29524f1f9e5d2c2aaefab957a4c54756e662033 100644
--- a/tensorflow/core/kernels/cross_op.cc
+++ b/tensorflow/core/kernels/cross_op.cc
@@ -105,6 +105,7 @@ TF_CALL_REAL_NUMBER_TYPES(DECLARE_GPU_KERNEL);
   REGISTER_KERNEL_BUILDER(                                        \
       Name("Cross").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
       CrossOp<GPUDevice, type>);
+
 TF_CALL_REAL_NUMBER_TYPES(REGISTER_GPU_KERNEL);
 #undef REGISTER_GPU_KERNEL
 #endif
diff --git a/tensorflow/core/kernels/cuda_solvers.cc b/tensorflow/core/kernels/cuda_solvers.cc
index a83671a471c35d5db238616652224b0c3830da9f..6cec032f9492020fa55c468fcd6a5b09effb0e81 100644
--- a/tensorflow/core/kernels/cuda_solvers.cc
+++ b/tensorflow/core/kernels/cuda_solvers.cc
@@ -314,6 +314,11 @@ Status CudaSolver::forward_input_or_allocate_scoped_tensor(
 // are sometimes inaccurate, e.g., are missing 'const' on pointers
 // to immutable arguments, while the actual headers have them as expected.
 // Check the actual declarations in the cusolver_api.h header file.
+//
+// NOTE: The cuSolver functions called below appear not to be threadsafe.
+// so we put a global lock around the calls. Since these functions only put a
+// kernel on the shared stream, it is not a big performance hit.
+// TODO(rmlarsen): Investigate if the locking is still needed in Cuda 9.
 //=============================================================================
 
 template <typename Scalar, typename SolverFnT>
@@ -324,6 +329,7 @@ static inline Status GeamImpl(SolverFnT solver, cublasHandle_t cublas_handle,
                               const Scalar* A, int lda,
                               const Scalar* beta, /* host or device pointer */
                               const Scalar* B, int ldb, Scalar* C, int ldc) {
+  mutex_lock lock(handle_map_mutex);
   using CudaScalar = typename CUDAComplexT<Scalar>::type;
   TF_RETURN_IF_CUBLAS_ERROR(solver(cublas_handle, transa, transb, m, n,
                                    reinterpret_cast<const CudaScalar*>(alpha),
@@ -355,6 +361,7 @@ static inline Status PotrfImpl(BufSizeFnT bufsize, SolverFnT solver,
                                cusolverDnHandle_t cusolver_dn_handle,
                                cublasFillMode_t uplo, int n, Scalar* A, int lda,
                                int* dev_lapack_info) {
+  mutex_lock lock(handle_map_mutex);
   /* Get amount of workspace memory required. */
   int lwork;
   TF_RETURN_IF_CUSOLVER_ERROR(
@@ -387,6 +394,7 @@ static inline Status GetrfImpl(BufSizeFnT bufsize, SolverFnT solver,
                                cusolverDnHandle_t cusolver_dn_handle, int m,
                                int n, Scalar* A, int lda, int* dev_pivots,
                                int* dev_lapack_info) {
+  mutex_lock lock(handle_map_mutex);
   /* Get amount of workspace memory required. */
   int lwork;
   TF_RETURN_IF_CUSOLVER_ERROR(
@@ -419,9 +427,6 @@ static inline Status GetrsImpl(SolverFnT solver, OpKernelContext* context,
                                cublasOperation_t trans, int n, int nrhs,
                                const Scalar* A, int lda, const int* pivots,
                                Scalar* B, int ldb, int* dev_lapack_info) {
-  // Note: The cuSolver functions called here appear not to be threadsafe.
-  // so we put a global lock around it. Since this function only puts a
-  // kernel on the stream, it is not a big performance hit.
   mutex_lock lock(handle_map_mutex);
   /* Launch the solver kernel. */
   TF_RETURN_IF_CUSOLVER_ERROR(solver(cusolver_dn_handle, trans, n, nrhs,
@@ -449,6 +454,7 @@ static inline Status GeqrfImpl(BufSizeFnT bufsize, SolverFnT solver,
                                cusolverDnHandle_t cusolver_dn_handle, int m,
                                int n, Scalar* A, int lda, Scalar* tau,
                                int* dev_lapack_info) {
+  mutex_lock lock(handle_map_mutex);
   /* Get amount of workspace memory required. */
   int lwork;
   TF_RETURN_IF_CUSOLVER_ERROR(
@@ -483,6 +489,7 @@ static inline Status UnmqrImpl(BufSizeFnT bufsize, SolverFnT solver,
                                int m, int n, int k, const Scalar* dev_a,
                                int lda, const Scalar* dev_tau, Scalar* dev_c,
                                int ldc, int* dev_lapack_info) {
+  mutex_lock lock(handle_map_mutex);
   /* Get amount of workspace memory required. */
   int lwork;
   TF_RETURN_IF_CUSOLVER_ERROR(
@@ -526,6 +533,7 @@ static inline Status UngqrImpl(BufSizeFnT bufsize, SolverFnT solver,
                                cusolverDnHandle_t cusolver_dn_handle, int m,
                                int n, int k, Scalar* dev_a, int lda,
                                const Scalar* dev_tau, int* dev_lapack_info) {
+  mutex_lock lock(handle_map_mutex);
   /* Get amount of workspace memory required. */
   int lwork;
   TF_RETURN_IF_CUSOLVER_ERROR(bufsize(cusolver_dn_handle, m, n, k,
@@ -606,17 +614,13 @@ static inline Status GesvdImpl(
     OpKernelContext* context, cusolverDnHandle_t cusolver_dn_handle,
     signed char jobu, signed char jobvt, int m, int n, Scalar* A, int lda,
     Scalar* S, Scalar* U, int ldu, Scalar* VT, int ldvt, int* dev_lapack_info) {
+  mutex_lock lock(handle_map_mutex);
   /* Get amount of workspace memory required. */
   int lwork;
   TF_RETURN_IF_CUSOLVER_ERROR(bufsize(cusolver_dn_handle, m, n, &lwork));
   /* Allocate device memory for workspace. */
   auto dev_workspace =
       cuda_solver->GetScratchSpace<Scalar>(lwork, "", /* on_host */ false);
-  // Note: The cuSolver functions called here appear not to be threadsafe.
-  // so we put a global lock around it. Since this function only puts a
-  // kernel on the stream, it is not a big performance hit.
-  mutex_lock lock(handle_map_mutex);
-  /* Launch the solver kernel. */
   TF_RETURN_IF_CUSOLVER_ERROR(solver(cusolver_dn_handle, jobu, jobvt, m, n,
                                      CUDAComplex(A), lda, S, CUDAComplex(U),
                                      ldu, CUDAComplex(VT), ldvt,
@@ -655,6 +659,7 @@ static inline Status GetrfBatchedImpl(SolverFnT solver, CudaSolver* cuda_solver,
                                       int lda, int* dev_pivots,
                                       DeviceLapackInfo* dev_lapack_info,
                                       int batch_size) {
+  mutex_lock lock(handle_map_mutex);
   using CudaScalar = typename CUDAComplexT<Scalar>::type;
   ScratchSpace<uint8> dev_a_dev_ptrs =
       cuda_solver->GetScratchSpace<uint8>(sizeof(CudaScalar*) * batch_size, "",
@@ -689,6 +694,7 @@ static inline Status GetrsBatchedImpl(
     const Scalar* const host_a_dev_ptrs[], int lda, const int* dev_pivots,
     const Scalar* const host_b_dev_ptrs[], int ldb,
     DeviceLapackInfo* dev_lapack_info, int batch_size) {
+  mutex_lock lock(handle_map_mutex);
   using CudaScalar = typename CUDAComplexT<Scalar>::type;
   ScratchSpace<uint8> dev_a_dev_ptrs =
       cuda_solver->GetScratchSpace<uint8>(sizeof(CudaScalar*) * batch_size, "",
@@ -734,6 +740,7 @@ static inline Status GetriBatchedImpl(
     cublasHandle_t cublas_handle, int n, const Scalar* const host_a_dev_ptrs[],
     int lda, const int* dev_pivots, const Scalar* const host_a_inv_dev_ptrs[],
     int ldainv, DeviceLapackInfo* dev_lapack_info, int batch_size) {
+  mutex_lock lock(handle_map_mutex);
   using CudaScalar = typename CUDAComplexT<Scalar>::type;
   ScratchSpace<uint8> dev_a_dev_ptrs =
       cuda_solver->GetScratchSpace<uint8>(sizeof(CudaScalar*) * batch_size, "",
@@ -776,6 +783,7 @@ static inline Status MatInvBatchedImpl(
     cublasHandle_t cublas_handle, int n, const Scalar* const host_a_dev_ptrs[],
     int lda, const Scalar* const host_a_inv_dev_ptrs[], int ldainv,
     DeviceLapackInfo* dev_lapack_info, int batch_size) {
+  mutex_lock lock(handle_map_mutex);
   using CudaScalar = typename CUDAComplexT<Scalar>::type;
   ScratchSpace<uint8> dev_a_dev_ptrs =
       cuda_solver->GetScratchSpace<uint8>(sizeof(CudaScalar*) * batch_size, "",
diff --git a/tensorflow/core/kernels/cwise_op_asinh.cc b/tensorflow/core/kernels/cwise_op_asinh.cc
index e6e1b83b30750e28d84c458236221e5f7749b5a0..0aec6aac3442a98309e352cf1431b920a87f62fe 100644
--- a/tensorflow/core/kernels/cwise_op_asinh.cc
+++ b/tensorflow/core/kernels/cwise_op_asinh.cc
@@ -4,7 +4,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
 
-    http://www.apache.org/licenses/LICENSE-2.0
+  http://www.apache.org/licenses/LICENSE-2.0
 
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
diff --git a/tensorflow/core/kernels/cwise_op_bitwise_and.cc b/tensorflow/core/kernels/cwise_op_bitwise_and.cc
index 017a2182dcff0f0121dd6343f1c012802cdf28d1..5a6cf4bad1609cebc0fded4d212e50fb19d22558 100644
--- a/tensorflow/core/kernels/cwise_op_bitwise_and.cc
+++ b/tensorflow/core/kernels/cwise_op_bitwise_and.cc
@@ -16,8 +16,8 @@ limitations under the License.
 #include "tensorflow/core/kernels/cwise_ops_common.h"
 
 namespace tensorflow {
-REGISTER6(BinaryOp, CPU, "BitwiseAnd", functor::bitwise_and, int8, int16, int32,
-          int64, uint8, uint16);
+REGISTER8(BinaryOp, CPU, "BitwiseAnd", functor::bitwise_and, int8, int16, int32,
+          int64, uint8, uint16, uint32, uint64);
 
 #if TENSORFLOW_USE_SYCL
 #define REGISTER_SYCL_KERNEL(TYPE)                                      \
@@ -30,13 +30,15 @@ REGISTER_SYCL_KERNEL(int32);
 REGISTER_SYCL_KERNEL(int64);
 REGISTER_SYCL_KERNEL(uint8);
 REGISTER_SYCL_KERNEL(uint16);
+REGISTER_SYCL_KERNEL(uint32);
+REGISTER_SYCL_KERNEL(uint64);
 #undef REGISTER_SYCL_KERNEL
 
 #endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
-REGISTER6(BinaryOp, GPU, "BitwiseAnd", functor::bitwise_and, int8, int16, int32,
-          int64, uint8, uint16);
+REGISTER8(BinaryOp, GPU, "BitwiseAnd", functor::bitwise_and, int8, int16, int32,
+          int64, uint8, uint16, uint32, uint64);
 #endif  // GOOGLE_CUDA
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_bitwise_or.cc b/tensorflow/core/kernels/cwise_op_bitwise_or.cc
index 36f45fe92dfce44c68a778b6c719c45d24bcaa90..201a10198a629b26429393c5c04404175399df73 100644
--- a/tensorflow/core/kernels/cwise_op_bitwise_or.cc
+++ b/tensorflow/core/kernels/cwise_op_bitwise_or.cc
@@ -16,8 +16,8 @@ limitations under the License.
 #include "tensorflow/core/kernels/cwise_ops_common.h"
 
 namespace tensorflow {
-REGISTER6(BinaryOp, CPU, "BitwiseOr", functor::bitwise_or, int8, int16, int32,
-          int64, uint8, uint16);
+REGISTER8(BinaryOp, CPU, "BitwiseOr", functor::bitwise_or, int8, int16, int32,
+          int64, uint8, uint16, uint32, uint64);
 
 #if TENSORFLOW_USE_SYCL
 #define REGISTER_SYCL_KERNEL(TYPE)                                     \
@@ -30,13 +30,15 @@ REGISTER_SYCL_KERNEL(int32);
 REGISTER_SYCL_KERNEL(int64);
 REGISTER_SYCL_KERNEL(uint8);
 REGISTER_SYCL_KERNEL(uint16);
+REGISTER_SYCL_KERNEL(uint32);
+REGISTER_SYCL_KERNEL(uint64);
 #undef REGISTER_SYCL_KERNEL
 
 #endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
-REGISTER6(BinaryOp, GPU, "BitwiseOr", functor::bitwise_or, int8, int16, int32,
-          int64, uint8, uint16);
+REGISTER8(BinaryOp, GPU, "BitwiseOr", functor::bitwise_or, int8, int16, int32,
+          int64, uint8, uint16, uint32, uint64);
 #endif  // GOOGLE_CUDA
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_bitwise_xor.cc b/tensorflow/core/kernels/cwise_op_bitwise_xor.cc
index 36432d851d99f20706b7e7f8535e6ac241b00937..2a7cd2699596a7ace6afd5ce688ff2e186650336 100644
--- a/tensorflow/core/kernels/cwise_op_bitwise_xor.cc
+++ b/tensorflow/core/kernels/cwise_op_bitwise_xor.cc
@@ -16,8 +16,8 @@ limitations under the License.
 #include "tensorflow/core/kernels/cwise_ops_common.h"
 
 namespace tensorflow {
-REGISTER6(BinaryOp, CPU, "BitwiseXor", functor::bitwise_xor, int8, int16, int32,
-          int64, uint8, uint16);
+REGISTER8(BinaryOp, CPU, "BitwiseXor", functor::bitwise_xor, int8, int16, int32,
+          int64, uint8, uint16, uint32, uint64);
 
 #if TENSORFLOW_USE_SYCL
 #define REGISTER_SYCL_KERNEL(TYPE)                                      \
@@ -30,13 +30,15 @@ REGISTER_SYCL_KERNEL(int32);
 REGISTER_SYCL_KERNEL(int64);
 REGISTER_SYCL_KERNEL(uint8);
 REGISTER_SYCL_KERNEL(uint16);
+REGISTER_SYCL_KERNEL(uint32);
+REGISTER_SYCL_KERNEL(uint64);
 #undef REGISTER_SYCL_KERNEL
 
 #endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
-REGISTER6(BinaryOp, GPU, "BitwiseXor", functor::bitwise_xor, int8, int16, int32,
-          int64, uint8, uint16);
+REGISTER8(BinaryOp, GPU, "BitwiseXor", functor::bitwise_xor, int8, int16, int32,
+          int64, uint8, uint16, uint32, uint64);
 #endif  // GOOGLE_CUDA
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_exp.cc b/tensorflow/core/kernels/cwise_op_exp.cc
index 9d4d65442762b88bb418bc0266b41ae37259e43f..66d7b7d22ebe63bf42da848aa028fcbafc26864b 100644
--- a/tensorflow/core/kernels/cwise_op_exp.cc
+++ b/tensorflow/core/kernels/cwise_op_exp.cc
@@ -20,7 +20,8 @@ REGISTER5(UnaryOp, CPU, "Exp", functor::exp, float, Eigen::half, double,
           complex64, complex128);
 
 #if GOOGLE_CUDA
-REGISTER3(UnaryOp, GPU, "Exp", functor::exp, float, Eigen::half, double);
+REGISTER5(UnaryOp, GPU, "Exp", functor::exp, float, Eigen::half, double,
+          complex64, complex128);
 #endif
 
 #if TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/kernels/cwise_op_gpu_bitwise_and.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_bitwise_and.cu.cc
index 27f973c90d73a1d7828ce180254363a0b7b4be76..3fbf69c114d3c546eafb9f6c504568a649c52e59 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_bitwise_and.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_bitwise_and.cu.cc
@@ -19,7 +19,8 @@ limitations under the License.
 
 namespace tensorflow {
 namespace functor {
-DEFINE_BINARY6(bitwise_and, int8, int16, int32, int64, uint8, uint16);
+DEFINE_BINARY8(bitwise_and, int8, int16, int32, int64, uint8, uint16, uint32,
+               uint64);
 }  // namespace functor
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/kernels/cwise_op_gpu_bitwise_or.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_bitwise_or.cu.cc
index a34c3a52cd6253527c67d2d1f8c1498756ff5be8..8bcb82266a2d3567c0f8d79b2fdccd5916b2ecbb 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_bitwise_or.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_bitwise_or.cu.cc
@@ -19,7 +19,8 @@ limitations under the License.
 
 namespace tensorflow {
 namespace functor {
-DEFINE_BINARY6(bitwise_or, int8, int16, int32, int64, uint8, uint16);
+DEFINE_BINARY8(bitwise_or, int8, int16, int32, int64, uint8, uint16, uint32,
+               uint64);
 }  // namespace functor
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/kernels/cwise_op_gpu_bitwise_xor.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_bitwise_xor.cu.cc
index a4531ab7c6f283f8e732dbc87b3c64d93a8a5bef..e62a87aba44eea0fc5b1cf13a74ddfed2ef294b6 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_bitwise_xor.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_bitwise_xor.cu.cc
@@ -19,7 +19,8 @@ limitations under the License.
 
 namespace tensorflow {
 namespace functor {
-DEFINE_BINARY6(bitwise_xor, int8, int16, int32, int64, uint8, uint16);
+DEFINE_BINARY8(bitwise_xor, int8, int16, int32, int64, uint8, uint16, uint32,
+               uint64);
 }  // namespace functor
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/kernels/cwise_op_gpu_exp.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_exp.cu.cc
index 0f492917bd54cc5b518e7fe76a8dd08b3934d1da..417e5da7588221b190d11092b6e03787a0dd15d4 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_exp.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_exp.cu.cc
@@ -19,7 +19,7 @@ limitations under the License.
 
 namespace tensorflow {
 namespace functor {
-DEFINE_UNARY3(exp, Eigen::half, float, double);
+DEFINE_UNARY5(exp, Eigen::half, float, double, complex64, complex128);
 }  // namespace functor
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h
index d32185b6bf48f7b6d49f355c0653004310bde533..da70b1e314e2fc1679401920f8a42dd37105e5af 100644
--- a/tensorflow/core/kernels/cwise_ops.h
+++ b/tensorflow/core/kernels/cwise_ops.h
@@ -26,24 +26,28 @@ limitations under the License.
 #include "tensorflow/core/kernels/bounds_check.h"
 
 namespace Eigen {
-namespace internal {
+namespace numext {
+#if GOOGLE_CUDA
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex<float> exp(
+    const std::complex<float>& x) {
+  auto com = ::expf(x.real());
+  auto res_real = com * ::cosf(x.imag());
+  auto res_imag = com * ::sinf(x.imag());
+  return std::complex<float>(res_real, res_imag);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex<double> exp(
+    const std::complex<double>& x) {
+  auto com = ::exp(x.real());
+  auto res_real = com * ::cos(x.imag());
+  auto res_imag = com * ::sin(x.imag());
+  return std::complex<double>(res_real, res_imag);
+}
+#endif
+}  // namespace numext
 
-// TODO(rmlarsen): Get rid of fmod2 once fmod is upstreamed to Eigen.
-template <typename T>
-struct scalar_fmod2_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_fmod2_op)
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& a,
-                                                           const T& b) const {
-    return std::fmod(a, b);
-  }
-};
-template <typename T>
-struct functor_traits<scalar_fmod2_op<T>> {
-  enum {
-    Cost = 13,  // Reciprocal throughput of FPREM on Haswell.
-    PacketAccess = false,
-  };
-};
+namespace internal {
 
 template <typename T>
 struct scalar_asinh_op {
@@ -702,7 +706,7 @@ struct safe_div : base<T, Eigen::internal::safe_div_or_mod_op<
 };
 
 template <typename T>
-struct fmod : base<T, Eigen::internal::scalar_fmod2_op<T>> {};
+struct fmod : base<T, Eigen::internal::scalar_fmod_op<T>> {};
 
 template <typename T>
 struct mod : base<T, Eigen::internal::scalar_mod2_op<T>> {};
diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD
new file mode 100644
index 0000000000000000000000000000000000000000..500ee7b43f2fbd730ae38c3820ed28ec67b9036c
--- /dev/null
+++ b/tensorflow/core/kernels/data/BUILD
@@ -0,0 +1,547 @@
+# Description:
+#   OpKernels for tf.data
+
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+load(
+    "//tensorflow:tensorflow.bzl",
+    "tf_kernel_library",
+)
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
+
+cc_library(
+    name = "stats_aggregator",
+    hdrs = ["stats_aggregator.h"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_kernel_library(
+    name = "stats_aggregator_ops",
+    srcs = ["stats_aggregator_ops.cc"],
+    deps = [
+        ":stats_aggregator",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+    ],
+)
+
+cc_library(
+    name = "dataset",
+    srcs = ["dataset.cc"],
+    hdrs = ["dataset.h"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:graph",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:protos_all_cc",
+    ],
+)
+
+cc_library(
+    name = "dataset_utils",
+    srcs = ["dataset_utils.cc"],
+    hdrs = ["dataset_utils.h"],
+    deps = [
+        ":captured_function",
+        ":dataset",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+cc_library(
+    name = "captured_function",
+    srcs = ["captured_function.cc"],
+    hdrs = ["captured_function.h"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:proto_text",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:session_options",
+        "//tensorflow/core/kernels:variable_ops",
+    ],
+)
+
+cc_library(
+    name = "window_dataset",
+    srcs = ["window_dataset.cc"],
+    hdrs = ["window_dataset.h"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "batch_dataset_op",
+    srcs = ["batch_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core/kernels:batch_util",
+    ],
+)
+
+tf_kernel_library(
+    name = "padded_batch_dataset_op",
+    srcs = ["padded_batch_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "dense_to_sparse_batch_dataset_op",
+    srcs = ["dense_to_sparse_batch_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "group_by_window_dataset_op",
+    srcs = ["group_by_window_dataset_op.cc"],
+    deps = [
+        ":captured_function",
+        ":dataset",
+        ":window_dataset",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "filter_dataset_op",
+    srcs = ["filter_dataset_op.cc"],
+    deps = [
+        ":captured_function",
+        ":dataset",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "map_dataset_op",
+    srcs = ["map_dataset_op.cc"],
+    deps = [
+        ":captured_function",
+        ":dataset",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "map_and_batch_dataset_op",
+    srcs = ["map_and_batch_dataset_op.cc"],
+    deps = [
+        ":captured_function",
+        ":dataset",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core/kernels:inplace_ops",
+    ],
+)
+
+tf_kernel_library(
+    name = "parallel_map_dataset_op",
+    srcs = ["parallel_map_dataset_op.cc"],
+    deps = [
+        ":captured_function",
+        ":dataset",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:protos_all_cc",
+    ],
+)
+
+tf_kernel_library(
+    name = "scan_dataset_op",
+    srcs = ["scan_dataset_op.cc"],
+    deps = [
+        ":captured_function",
+        ":dataset",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "flat_map_dataset_op",
+    srcs = ["flat_map_dataset_op.cc"],
+    deps = [
+        ":captured_function",
+        ":dataset",
+        ":dataset_utils",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "interleave_dataset_op",
+    srcs = ["interleave_dataset_op.cc"],
+    deps = [
+        ":captured_function",
+        ":dataset",
+        ":dataset_utils",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "parallel_interleave_dataset_op",
+    srcs = ["parallel_interleave_dataset_op.cc"],
+    deps = [
+        ":captured_function",
+        ":dataset",
+        ":dataset_utils",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "prefetch_dataset_op",
+    srcs = ["prefetch_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:protos_all_cc",
+    ],
+)
+
+tf_kernel_library(
+    name = "repeat_dataset_op",
+    srcs = ["repeat_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "take_dataset_op",
+    srcs = ["take_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "skip_dataset_op",
+    srcs = ["skip_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "ignore_errors_dataset_op",
+    srcs = ["ignore_errors_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "stats_dataset_ops",
+    srcs = ["stats_dataset_ops.cc"],
+    deps = [
+        ":dataset",
+        ":stats_aggregator",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "random_dataset_op",
+    srcs = ["random_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "range_dataset_op",
+    srcs = ["range_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "shuffle_dataset_op",
+    srcs = ["shuffle_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "sparse_tensor_slice_dataset_op",
+    srcs = ["sparse_tensor_slice_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "tensor_dataset_op",
+    srcs = ["tensor_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "tensor_slice_dataset_op",
+    srcs = ["tensor_slice_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core/kernels:batch_util",
+    ],
+)
+
+tf_kernel_library(
+    name = "zip_dataset_op",
+    srcs = ["zip_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "concatenate_dataset_op",
+    srcs = ["concatenate_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "reader_dataset_ops",
+    srcs = ["reader_dataset_ops.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "sql_dataset_ops",
+    srcs = [
+        "sql_dataset_ops.cc",
+    ],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core/kernels/data/sql",
+    ],
+)
+
+tf_kernel_library(
+    name = "iterator_ops",
+    srcs = ["iterator_ops.cc"],
+    deps = [
+        ":dataset",
+        ":stats_aggregator",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:session_options",
+        "//tensorflow/core/kernels:ops_util",
+    ],
+)
+
+tf_kernel_library(
+    name = "cache_dataset_ops",
+    srcs = ["cache_dataset_ops.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core/util/tensor_bundle",
+    ],
+)
+
+tf_kernel_library(
+    name = "unique_dataset_op",
+    srcs = ["unique_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "dataset_ops",
+    deps = [
+        ":batch_dataset_op",
+        ":cache_dataset_ops",
+        ":concatenate_dataset_op",
+        ":dense_to_sparse_batch_dataset_op",
+        ":filter_dataset_op",
+        ":flat_map_dataset_op",
+        ":group_by_window_dataset_op",
+        ":ignore_errors_dataset_op",
+        ":interleave_dataset_op",
+        ":iterator_ops",
+        ":map_and_batch_dataset_op",
+        ":map_dataset_op",
+        ":padded_batch_dataset_op",
+        ":parallel_interleave_dataset_op",
+        ":parallel_map_dataset_op",
+        ":prefetch_dataset_op",
+        ":random_dataset_op",
+        ":range_dataset_op",
+        ":reader_dataset_ops",
+        ":repeat_dataset_op",
+        ":scan_dataset_op",
+        ":shuffle_dataset_op",
+        ":skip_dataset_op",
+        ":sparse_tensor_slice_dataset_op",
+        ":sql_dataset_ops",
+        ":stats_aggregator_ops",
+        ":stats_dataset_ops",
+        ":take_dataset_op",
+        ":tensor_dataset_op",
+        ":tensor_slice_dataset_op",
+        ":unique_dataset_op",
+        ":zip_dataset_op",
+    ],
+)
diff --git a/tensorflow/core/kernels/batch_dataset_op.cc b/tensorflow/core/kernels/data/batch_dataset_op.cc
similarity index 92%
rename from tensorflow/core/kernels/batch_dataset_op.cc
rename to tensorflow/core/kernels/data/batch_dataset_op.cc
index 3dec4f71d8a6823d15f6173d139fd9e60e9df29d..2d6e06398f66c0b07ae17d4fd25d7ba6b5cfef03 100644
--- a/tensorflow/core/kernels/batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/batch_dataset_op.cc
@@ -12,11 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/batch_util.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 
 namespace tensorflow {
 
@@ -151,6 +150,16 @@ class BatchDatasetOp : public UnaryDatasetOpKernel {
           // Build the output tuple component by copying one slice
           // from each input element in the batch.
           for (size_t i = 0; i < num_batch_elements; ++i) {
+            if (batch_elements[i][component_index].shape() !=
+                first_element.shape()) {
+              return errors::InvalidArgument(
+                  "Cannot batch tensors with different shapes in component ",
+                  component_index, ". First element had shape ",
+                  first_element.shape().DebugString(), " and element ", i,
+                  " had shape ",
+                  batch_elements[i][component_index].shape().DebugString(),
+                  ".");
+            }
             TF_RETURN_IF_ERROR(batch_util::CopyElementToSlice(
                 std::move(batch_elements[i][component_index]), &batch_component,
                 i));
@@ -173,7 +182,7 @@ class BatchDatasetOp : public UnaryDatasetOpKernel {
         return Status::OK();
       }
 
-      Status RestoreInternal(OpKernelContext* ctx,
+      Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
         mutex_lock l(mu_);
         if (!reader->Contains(full_name("input_impl_empty"))) {
diff --git a/tensorflow/core/kernels/cache_dataset_ops.cc b/tensorflow/core/kernels/data/cache_dataset_ops.cc
similarity index 99%
rename from tensorflow/core/kernels/cache_dataset_ops.cc
rename to tensorflow/core/kernels/data/cache_dataset_ops.cc
index 137002b9d77a18fbd5660eb06bcf69d0c4ad3f13..f0a2192826e051586e4999d729c24ed5495be0ea 100644
--- a/tensorflow/core/kernels/cache_dataset_ops.cc
+++ b/tensorflow/core/kernels/data/cache_dataset_ops.cc
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/util/tensor_bundle/tensor_bundle.h"
diff --git a/tensorflow/core/kernels/data/captured_function.cc b/tensorflow/core/kernels/data/captured_function.cc
new file mode 100644
index 0000000000000000000000000000000000000000..1f6d32f8df39948a4529bdf53091ff742ba88edb
--- /dev/null
+++ b/tensorflow/core/kernels/data/captured_function.cc
@@ -0,0 +1,316 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/kernels/data/captured_function.h"
+
+#include <utility>
+
+#include "tensorflow/core/common_runtime/function.h"
+#include "tensorflow/core/framework/cancellation.h"
+#include "tensorflow/core/lib/gtl/optional.h"
+#include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/platform/notification.h"
+
+
+namespace tensorflow {
+
+/* static */
+Status CapturedFunction::Create(
+    const NameAttrList& func, std::vector<Tensor> captured_inputs,
+    std::unique_ptr<CapturedFunction>* out_function) {
+  out_function->reset(new CapturedFunction(func, std::move(captured_inputs)));
+  return Status::OK();
+}
+
+CapturedFunction::~CapturedFunction() {}
+
+namespace {
+class CallFrameBase : public CallFrameInterface {
+ public:
+  explicit CallFrameBase(DataTypeSlice ret_types)
+      : ret_types_(ret_types), retvals_(ret_types.size()) {}
+
+  // Caller methods.
+  Status ConsumeRetvals(std::vector<Tensor>* retvals) {
+    retvals->reserve(retvals_.size());
+    int i = 0;
+    for (auto&& val : retvals_) {
+      if (!val) {
+        return errors::Internal("No return value for index ", i, ".");
+      }
+      retvals->emplace_back(std::move(val.value()));
+      ++i;
+    }
+    return Status::OK();
+  }
+
+  size_t num_retvals() const override { return retvals_.size(); }
+
+  // Callee methods.
+  Status SetRetval(int index, const Tensor& val) override {
+    if (index < retvals_.size() && val.dtype() == ret_types_[index] &&
+        !retvals_[index]) {
+      retvals_[index] = val;
+      return Status::OK();
+    } else if (index >= retvals_.size()) {
+      return errors::InvalidArgument("Return value ", index,
+                                     " is out of range.");
+    } else if (val.dtype() != ret_types_[index]) {
+      return errors::InvalidArgument("Expected type ",
+                                     DataTypeString(ret_types_[index]),
+                                     " for return value ", index, " but got ",
+                                     DataTypeString(val.dtype()), ".");
+    } else {
+      return errors::Internal("Attempted to set return value ", index,
+                              " more than once.");
+    }
+  }
+
+ private:
+  DataTypeSlice ret_types_;
+  std::vector<gtl::optional<Tensor>> retvals_;
+  TF_DISALLOW_COPY_AND_ASSIGN(CallFrameBase);
+};
+
+class OwnedArgsCallFrame : public CallFrameBase {
+ public:
+  OwnedArgsCallFrame(std::vector<Tensor>&& args,
+                     const std::vector<Tensor>* captured_inputs,
+                     DataTypeSlice ret_types)
+      : CallFrameBase(ret_types),
+        args_(std::move(args)),
+        captured_inputs_(captured_inputs) {}
+
+  size_t num_args() const override {
+    return args_.size() + captured_inputs_->size();
+  }
+
+  // Callee methods.
+  Status GetArg(int index, Tensor* val) const override {
+    if (index < args_.size() && args_[index].IsInitialized()) {
+      // TODO(mrry): Consider making `CallFrameInterface::GetArg` non-const in
+      // order to be able to `std::move(args_[index])` into `*val`.
+      *val = args_[index];
+      return Status::OK();
+    } else if (index < args_.size() + captured_inputs_->size()) {
+      *val = (*captured_inputs_)[index - args_.size()];
+      return Status::OK();
+    } else if (index >= args_.size() + captured_inputs_->size()) {
+      return errors::InvalidArgument("Argument ", index, " is out of range.");
+    } else {
+      return errors::Internal("Attempted to get argument ", index,
+                              " more than once.");
+    }
+  }
+
+ private:
+  std::vector<Tensor> args_;
+  const std::vector<Tensor>* const captured_inputs_;  // Not owned.
+};
+
+class BorrowedArgsCallFrame : public CallFrameBase {
+ public:
+  BorrowedArgsCallFrame(const std::vector<Tensor>& args,
+                        const std::vector<Tensor>* captured_inputs,
+                        DataTypeSlice ret_types)
+      : CallFrameBase(ret_types),
+        args_(args),
+        captured_inputs_(captured_inputs) {}
+
+  size_t num_args() const override {
+    return args_.size() + captured_inputs_->size();
+  }
+
+  // Callee methods.
+  Status GetArg(int index, Tensor* val) const override {
+    if (index < args_.size() && args_[index].IsInitialized()) {
+      *val = args_[index];
+      return Status::OK();
+    } else if (index < args_.size() + captured_inputs_->size()) {
+      *val = (*captured_inputs_)[index - args_.size()];
+      return Status::OK();
+    } else if (index >= args_.size() + captured_inputs_->size()) {
+      return errors::InvalidArgument("Argument ", index, " is out of range.");
+    } else {
+      return errors::Internal("Attempted to get argument ", index,
+                              " more than once.");
+    }
+  }
+
+ private:
+  const std::vector<Tensor>& args_;                   // Not owned.
+  const std::vector<Tensor>* const captured_inputs_;  // Not owned.
+};
+
+}  // namespace
+
+Status CapturedFunction::MaybeInstantiate(
+    IteratorContext* ctx, FunctionLibraryRuntime::Handle* out_handle) {
+  mutex_lock l(mu_);
+  if (lib_ == nullptr) {
+    // The context's runtime will be used for all subsequent calls.
+    lib_ = ctx->lib();
+    DCHECK(f_handle_ == kInvalidHandle);
+    FunctionLibraryRuntime::InstantiateOptions inst_opts;
+    inst_opts.overlay_lib = ctx->function_library().get();
+    inst_opts.state_handle = std::to_string(random::New64());
+    TF_RETURN_IF_ERROR(lib_->Instantiate(func_.name(), AttrSlice(&func_.attr()),
+                                         inst_opts, &f_handle_));
+    const FunctionBody* fbody = lib_->GetFunctionBody(f_handle_);
+    if (fbody == nullptr) {
+      return errors::Internal("Failed to instantiate function body.");
+    }
+    ret_types_ = fbody->ret_types;
+  } else {
+    // TODO(mrry): Consider moving this under a shared lock, as it is
+    // the common case.
+    if (ctx->lib() != lib_) {
+      return errors::Internal(
+          "Captured function was called with a different "
+          "FunctionLibraryRuntime*, which is not permitted.");
+    }
+  }
+  *out_handle = f_handle_;
+  return Status::OK();
+}
+
+Status CapturedFunction::Run(IteratorContext* ctx,
+                             std::vector<Tensor>&& args,
+                             std::vector<Tensor>* rets) {
+  FunctionLibraryRuntime::Handle handle;
+  TF_RETURN_IF_ERROR(MaybeInstantiate(ctx, &handle));
+
+  FunctionLibraryRuntime::Options f_opts;
+  f_opts.step_id = CapturedFunction::generate_step_id();
+  ScopedStepContainer step_container(f_opts.step_id, [ctx](const string& name) {
+    ctx->lib()->device()->resource_manager()->Cleanup(name).IgnoreError();
+  });
+  f_opts.step_container = &step_container;
+  f_opts.runner = ctx->runner();
+  // TODO(mrry): Add cancellation manager support to IteratorContext
+  // so that we can cancel running map functions. The local
+  // cancellation manager here is created so that we can run kernels
+  // (such as queue kernels) that depend on the non-nullness of
+  // `OpKernelContext::cancellation_manager()`, but additional effort
+  // will be required to plumb it through the `IteratorContext`.
+  CancellationManager c_mgr;
+  f_opts.cancellation_manager = &c_mgr;
+
+  OwnedArgsCallFrame frame(std::move(args), &captured_inputs_, ret_types_);
+  Notification n;
+  Status s;
+  ctx->lib()->Run(f_opts, handle, &frame, [&n, &s](Status func_status) {
+    s.Update(func_status);
+    n.Notify();
+  });
+  n.WaitForNotification();
+  TF_RETURN_IF_ERROR(s);
+  return frame.ConsumeRetvals(rets);
+}
+
+Status CapturedFunction::RunWithBorrowedArgs(IteratorContext* ctx,
+                                             const std::vector<Tensor>& args,
+                                             std::vector<Tensor>* rets) {
+  FunctionLibraryRuntime::Handle handle;
+  TF_RETURN_IF_ERROR(MaybeInstantiate(ctx, &handle));
+
+  FunctionLibraryRuntime::Options f_opts;
+  f_opts.step_id = CapturedFunction::generate_step_id();
+  ScopedStepContainer step_container(f_opts.step_id, [ctx](const string& name) {
+    ctx->lib()->device()->resource_manager()->Cleanup(name).IgnoreError();
+  });
+  f_opts.step_container = &step_container;
+  f_opts.runner = ctx->runner();
+  // TODO(mrry): Add cancellation manager support to IteratorContext
+  // so that we can cancel running map functions. The local
+  // cancellation manager here is created so that we can run kernels
+  // (such as queue kernels) that depend on the non-nullness of
+  // `OpKernelContext::cancellation_manager()`, but additional effort
+  // will be required to plumb it through the `IteratorContext`.
+  CancellationManager c_mgr;
+  f_opts.cancellation_manager = &c_mgr;
+
+  BorrowedArgsCallFrame frame(args, &captured_inputs_, ret_types_);
+  Notification n;
+  Status s;
+
+  ctx->lib()->Run(f_opts, handle, &frame, [&n, &s](Status func_status) {
+    s.Update(func_status);
+    n.Notify();
+  });
+  n.WaitForNotification();
+  TF_RETURN_IF_ERROR(s);
+  return frame.ConsumeRetvals(rets);
+}
+
+void CapturedFunction::RunAsync(IteratorContext* ctx,
+                                std::vector<Tensor>&& args,
+                                std::vector<Tensor>* rets,
+                                FunctionLibraryRuntime::DoneCallback done) {
+  // NOTE(mrry): This method does not transfer ownership of `ctx`, and it may
+  // be deleted before `done` is called. Take care not to capture `ctx` in any
+  // code that may execute asynchronously in this function.
+  FunctionLibraryRuntime::Handle handle;
+  Status s = MaybeInstantiate(ctx, &handle);
+  if (!s.ok()) {
+    done(s);
+    return;
+  }
+  auto frame =
+      new OwnedArgsCallFrame(std::move(args), &captured_inputs_, ret_types_);
+
+  FunctionLibraryRuntime::Options f_opts;
+  f_opts.step_id = CapturedFunction::generate_step_id();
+  ResourceMgr* resource_mgr = ctx->lib()->device()->resource_manager();
+  auto step_container = new ScopedStepContainer(
+      f_opts.step_id, [resource_mgr](const string& name) {
+        resource_mgr->Cleanup(name).IgnoreError();
+      });
+  f_opts.step_container = step_container;
+  f_opts.runner = ctx->runner();
+  // TODO(mrry): Add cancellation manager support to IteratorContext
+  // so that we can cancel running map functions. The local
+  // cancellation manager here is created so that we can run kernels
+  // (such as queue kernels) that depend on the non-nullness of
+  // `OpKernelContext::cancellation_manager()`, but additional effort
+  // will be required to plumb it through the `IteratorContext`.
+  auto c_mgr = new CancellationManager;
+  f_opts.cancellation_manager = c_mgr;
+
+  tf_shared_lock l(mu_);
+  ctx->lib()->Run(f_opts, handle, frame,
+                  std::bind(
+                      [rets, step_container, c_mgr, frame](
+                          FunctionLibraryRuntime::DoneCallback done,
+                          // Begin unbound arguments.
+                          Status s) {
+                        delete step_container;
+                        delete c_mgr;
+                        if (s.ok()) {
+                          s = frame->ConsumeRetvals(rets);
+                        }
+                        delete frame;
+                        done(s);
+                      },
+                      std::move(done), std::placeholders::_1));
+}
+
+CapturedFunction::CapturedFunction(const NameAttrList& func,
+                                   std::vector<Tensor> captured_inputs)
+    : func_(func),
+      lib_(nullptr),
+      f_handle_(kInvalidHandle),
+      captured_inputs_(std::move(captured_inputs)) {}
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/data/captured_function.h b/tensorflow/core/kernels/data/captured_function.h
new file mode 100644
index 0000000000000000000000000000000000000000..99e0ef426e04b38027617dcd91f579c082638011
--- /dev/null
+++ b/tensorflow/core/kernels/data/captured_function.h
@@ -0,0 +1,108 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_CAPTURED_FUNCTION_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_CAPTURED_FUNCTION_H_
+
+#include <memory>
+#include <vector>
+
+#include "tensorflow/core/framework/function.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/platform/macros.h"
+
+namespace tensorflow {
+
+class Device;
+class OpKernelContext;
+class ResourceMgr;
+
+// A `CapturedFunction` encapsulates a TensorFlow function and all of
+// the runtime support required to execute it.
+//
+// The `Dataset`-related classes use `CapturedFunction` to execute
+// TensorFlow functions outside a the normal `OpKernel::Compute()`
+// context.
+class CapturedFunction {
+ public:
+  // NOTE(mrry): The `captured_inputs` are passed by value. For
+  // efficiency, you are recommended to move this argument into the call.
+  static Status Create(const NameAttrList& func,
+                       std::vector<Tensor> captured_inputs,
+                       std::unique_ptr<CapturedFunction>* out_function);
+
+  ~CapturedFunction();
+
+  // Runs the "Captured function" using the given FLR and caches the lib and
+  // handle generated during instantiation. If Run is called with a different
+  // lib afterwards, generates an error. This method takes ownership of the
+  // tensors in `args`, in order to be able to deallocate them as early as
+  // possible. Use `RunWithBorrowedArgs()` if the caller needs to retain
+  // ownership of the `args`.
+  Status Run(IteratorContext* ctx, std::vector<Tensor>&& args,
+             std::vector<Tensor>* rets);
+
+  // Synchronously runs the captured function on the given `args`, and stores
+  // the results in `*rets`. Prefer to use `Run()` or `RunAsync()` when
+  // possible.
+  Status RunWithBorrowedArgs(IteratorContext* ctx,
+                             const std::vector<Tensor>& args,
+                             std::vector<Tensor>* rets);
+
+  // Asynchronously runs the captured function on the given `args`, stores
+  // the results in `*rets`, and calls the given `done` callback when the
+  // function returns. This method takes ownership of the tensors in `args`,
+  // in order to be able to deallocate them as early as possible.
+  void RunAsync(IteratorContext* ctx, std::vector<Tensor>&& args,
+                std::vector<Tensor>* rets,
+                FunctionLibraryRuntime::DoneCallback done);
+
+  // Returns that additional captured inputs that will be passed to the function
+  // when `Run*()` is called.
+  const std::vector<Tensor>& captured_inputs() { return captured_inputs_; }
+
+  // Returns a step ID for use when running a `CapturedFunction`.
+  static int64 generate_step_id() {
+    // Choose a step ID that is guaranteed not to clash with any
+    // Session-generated step ID. DirectSession only generates
+    // non-negative step IDs (contiguous, starting from 0), and
+    // MasterSession generates 56-bit random step IDs whose MSB is
+    // always 0, so a negative random step ID should suffice.
+    return -std::abs(static_cast<int64>(random::New64()));
+  }
+
+ private:
+  CapturedFunction(const NameAttrList& func,
+                   std::vector<Tensor> captured_inputs);
+
+  Status MaybeInstantiate(IteratorContext* ctx,
+                          FunctionLibraryRuntime::Handle* out_handle);
+
+  mutex mu_;
+  const NameAttrList func_;
+  FunctionLibraryRuntime* lib_ GUARDED_BY(mu_);
+  FunctionLibraryRuntime::Handle f_handle_ GUARDED_BY(mu_);
+  const std::vector<Tensor> captured_inputs_;
+  DataTypeSlice ret_types_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(CapturedFunction);
+};
+
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_CAPTURED_FUNCTION_H_
diff --git a/tensorflow/core/kernels/concatenate_dataset_op.cc b/tensorflow/core/kernels/data/concatenate_dataset_op.cc
similarity index 93%
rename from tensorflow/core/kernels/concatenate_dataset_op.cc
rename to tensorflow/core/kernels/data/concatenate_dataset_op.cc
index ad78ba01869a862d496d66b8dcac1243cf09fe84..f11abc62a67a6937cfa7891022a1643c93439e97 100644
--- a/tensorflow/core/kernels/concatenate_dataset_op.cc
+++ b/tensorflow/core/kernels/data/concatenate_dataset_op.cc
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 
 namespace tensorflow {
 
@@ -128,14 +127,23 @@ class ConcatenateDatasetOp : public BinaryDatasetOpKernel {
       Status SaveInternal(IteratorStateWriter* writer) override {
         mutex_lock l(mu_);
         TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("i"), i_));
-        TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_));
+        if (input_impl_) {
+          TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_));
+        } else {
+          TF_RETURN_IF_ERROR(
+              writer->WriteScalar(full_name("input_impl_uninitialized"), ""));
+        }
         return Status::OK();
       }
 
-      Status RestoreInternal(OpKernelContext* ctx,
+      Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
         mutex_lock l(mu_);
         TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("i"), &i_));
+        if (reader->Contains(full_name("input_impl_uninitialized"))) {
+          input_impl_.reset();
+          return Status::OK();
+        }
         if (!TF_PREDICT_TRUE(i_ >= 0 && i_ <= 2))
           return errors::InvalidArgument("i_ must be in range [0, 2].");
         if (i_ == 1) {
diff --git a/tensorflow/core/kernels/data/dataset.cc b/tensorflow/core/kernels/data/dataset.cc
new file mode 100644
index 0000000000000000000000000000000000000000..2ea6875567604e4e5bf7c990ad6a42ed8c5dafaa
--- /dev/null
+++ b/tensorflow/core/kernels/data/dataset.cc
@@ -0,0 +1,271 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/graph/graph_def_builder.h"
+#include "tensorflow/core/graph/node_builder.h"
+
+namespace tensorflow {
+
+namespace {
+
+// A wrapper class for storing a `DatasetBase` instance in a DT_VARIANT tensor.
+// Objects of the wrapper class own a reference on an instance of `DatasetBase`,
+// and the wrapper's copy constructor and destructor take care of managing the
+// reference count.
+//
+// NOTE(mrry): This is not a feature-complete implementation of the DT_VARIANT
+// specification. In particular, we cannot currently serialize an arbitrary
+// `DatasetBase` object, so the `Encode()` and `Decode()` methods are not
+// implemented.
+class DatasetVariantWrapper {
+ public:
+  DatasetVariantWrapper() : dataset_(nullptr) {}
+
+  // Transfers ownership of `dataset` to `*this`.
+  explicit DatasetVariantWrapper(DatasetBase* dataset) : dataset_(dataset) {}
+
+  DatasetVariantWrapper(const DatasetVariantWrapper& other)
+      : dataset_(other.dataset_) {
+    if (dataset_) dataset_->Ref();
+  }
+
+  ~DatasetVariantWrapper() {
+    if (dataset_) dataset_->Unref();
+  }
+
+  DatasetBase* get() const { return dataset_; }
+
+  string TypeName() const { return "tensorflow::DatasetVariantWrapper"; }
+  string DebugString() const {
+    if (dataset_) {
+      return dataset_->DebugString();
+    } else {
+      return "<Uninitialized DatasetVariantWrapper>";
+    }
+  }
+  void Encode(VariantTensorData* data) const {
+    LOG(ERROR) << "The Encode() method is not implemented for "
+                  "DatasetVariantWrapper objects.";
+  }
+  bool Decode(const VariantTensorData& data) {
+    LOG(ERROR) << "The Decode() method is not implemented for "
+                  "DatasetVariantWrapper objects.";
+    return false;
+  }
+
+ private:
+  DatasetBase* const dataset_;  // Owns one reference.
+};
+
+}  // namespace
+
+Status GraphDefBuilderWrapper::AddDataset(
+    const GraphDatasetBase* dataset,
+    const std::vector<std::pair<size_t, Node*>>& inputs,
+    const std::vector<std::pair<size_t, gtl::ArraySlice<Node*>>>& list_inputs,
+    const std::vector<std::pair<StringPiece, AttrValue>>& attrs,
+    Node** output) {
+  const string& op_type_name = dataset->op_name();
+  std::unique_ptr<const GraphDefBuilder::Options> opts(
+      new GraphDefBuilder::Options(b_->opts()));
+  // TODO(srbs|mrry): Not all datasets have output_types and output_shapes
+  // attributes defined. It will be nice to have a consistent pattern.
+  bool has_output_types_attr = HasAttr(op_type_name, "output_types");
+  bool has_output_shapes_attr = HasAttr(op_type_name, "output_shapes");
+  if (has_output_shapes_attr) {
+    opts.reset(new GraphDefBuilder::Options(
+        opts->WithAttr("output_shapes", dataset->output_shapes())));
+  }
+  if (has_output_types_attr) {
+    opts.reset(new GraphDefBuilder::Options(
+        opts->WithAttr("output_types", dataset->output_dtypes())));
+  }
+  for (auto attr : attrs) {
+    opts.reset(
+        new GraphDefBuilder::Options(opts->WithAttr(attr.first, attr.second)));
+  }
+  if (opts->HaveError()) {
+    return errors::Internal("AddDataset: Failed to build Options with error ",
+                            opts->StatusToString());
+  }
+  NodeBuilder node_builder(opts->GetNameForOp(op_type_name), op_type_name,
+                           opts->op_registry());
+  {
+    size_t total_size = inputs.size() + list_inputs.size();
+    auto inputs_iter = inputs.begin();
+    auto list_inputs_iter = list_inputs.begin();
+    for (int i = 0; i < total_size; i++) {
+      if (inputs_iter != inputs.end() && inputs_iter->first == i) {
+        node_builder.Input(NodeBuilder::NodeOut(inputs_iter->second));
+        inputs_iter++;
+      } else if (list_inputs_iter != list_inputs.end() &&
+                 list_inputs_iter->first == i) {
+        std::vector<NodeBuilder::NodeOut> nodeout_inputs;
+        nodeout_inputs.reserve(list_inputs_iter->second.size());
+        for (Node* n : list_inputs_iter->second) {
+          nodeout_inputs.emplace_back(n);
+        }
+        node_builder.Input(nodeout_inputs);
+        list_inputs_iter++;
+      } else {
+        return errors::InvalidArgument("No input found for index ", i);
+      }
+    }
+  }
+  *output = opts->FinalizeBuilder(&node_builder);
+  if (*output == nullptr) {
+    return errors::Internal("AddDataset: Failed to build ", op_type_name,
+                            " op with error ", opts->StatusToString());
+  }
+  return Status::OK();
+}
+
+Status GraphDefBuilderWrapper::AddFunction(OpKernelContext* ctx,
+                                           const string& function_name) {
+  if (b_->HasFunction(function_name)) {
+    LOG(INFO) << "Function with name " << function_name << "already exists in"
+              << " the graph. It will not be added again.";
+    return Status::OK();
+  }
+  TF_RETURN_IF_ERROR(EnsureFunctionIsStateless(ctx, function_name));
+  const FunctionLibraryDefinition* flib_def =
+      ctx->function_library()->GetFunctionLibraryDefinition();
+  const FunctionDef* f_def = flib_def->Find(function_name);
+  if (f_def == nullptr) {
+    return errors::InvalidArgument("Unable to find FunctionDef for ",
+                                   function_name, " in the registry.");
+  }
+  FunctionDefLibrary def;
+  *def.add_function() = *f_def;
+  const string gradient_func = flib_def->FindGradient(function_name);
+  if (!gradient_func.empty()) {
+    GradientDef* g_def = def.add_gradient();
+    g_def->set_function_name(function_name);
+    g_def->set_gradient_func(gradient_func);
+  }
+  TF_RETURN_IF_ERROR(b_->AddFunctionLibrary(def));
+
+  // Recursively add functions in inputs of function_name.
+  for (const NodeDef& node_def : f_def->node_def()) {
+    const OpRegistrationData* op_reg_data = nullptr;
+    TF_RETURN_IF_ERROR(flib_def->LookUp(node_def.op(), &op_reg_data));
+    if (op_reg_data->is_function_op) {
+      TF_RETURN_IF_ERROR(AddFunction(ctx, op_reg_data->op_def.name()));
+    }
+    // Recursively add functions in attrs of this NodeDef.
+    for (const auto& pair : node_def.attr()) {
+      TF_RETURN_IF_ERROR(AddAttrFunctions(pair.second, ctx));
+    }
+  }
+
+  // Recursively add functions in attrs of function_name.
+  for (auto iter = f_def->attr().begin(); iter != f_def->attr().end(); iter++) {
+    TF_RETURN_IF_ERROR(AddAttrFunctions(iter->second, ctx));
+  }
+  return Status::OK();
+}
+
+void GraphDefBuilderWrapper::AddTensorInternal(const Tensor& val,
+                                               Node** output) {
+  *output = ops::SourceOp(
+      "Const",
+      b_->opts().WithAttr("dtype", val.dtype()).WithAttr("value", val));
+}
+
+bool GraphDefBuilderWrapper::HasAttr(const string& op_type_name,
+                                     const string& attr_name) const {
+  const OpDef* op_def = nullptr;
+  Status s = b_->opts().op_registry()->LookUpOpDef(op_type_name, &op_def);
+  if (!s.ok() || op_def == nullptr) {
+    return false;
+  }
+  return HasAttr(op_def, attr_name);
+}
+
+Status GraphDatasetBase::Serialize(OpKernelContext* ctx,
+                                   string* serialized_graph_def,
+                                   string* output_node) const {
+  GraphDefBuilder b;
+  DatasetGraphDefBuilder db(&b);
+  Node* node = nullptr;
+  TF_RETURN_IF_ERROR(AsGraphDefInternal(ctx, &db, &node));
+  *output_node = node->name();
+  GraphDef graph_def;
+  TF_RETURN_IF_ERROR(b.ToGraphDef(&graph_def));
+  graph_def.SerializeToString(serialized_graph_def);
+  return Status::OK();
+}
+
+Status GetDatasetFromVariantTensor(const Tensor& tensor,
+                                   DatasetBase** out_dataset) {
+  if (!(tensor.dtype() == DT_VARIANT ||
+        TensorShapeUtils::IsScalar(tensor.shape()))) {
+    return errors::InvalidArgument(
+        "Dataset tensor must be a scalar of dtype DT_VARIANT.");
+  }
+  const Variant& variant = tensor.scalar<Variant>()();
+  const DatasetVariantWrapper* wrapper = variant.get<DatasetVariantWrapper>();
+  if (wrapper == nullptr) {
+    return errors::InvalidArgument("Tensor must be a Dataset object.");
+  }
+  *out_dataset = wrapper->get();
+  if (*out_dataset == nullptr) {
+    return errors::Internal("Read uninitialized Dataset variant.");
+  }
+  return Status::OK();
+}
+
+Status StoreDatasetInVariantTensor(DatasetBase* dataset, Tensor* tensor) {
+  if (!(tensor->dtype() == DT_VARIANT ||
+        TensorShapeUtils::IsScalar(tensor->shape()))) {
+    return errors::InvalidArgument(
+        "Dataset tensor must be a scalar of dtype DT_VARIANT.");
+  }
+  tensor->scalar<Variant>()() = DatasetVariantWrapper(dataset);
+  return Status::OK();
+}
+
+void DatasetOpKernel::Compute(OpKernelContext* ctx) {
+  DatasetBase* dataset = nullptr;
+  MakeDataset(ctx, &dataset);
+  if (ctx->status().ok()) {
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &output));
+    OP_REQUIRES_OK(ctx, StoreDatasetInVariantTensor(dataset, output));
+  }
+}
+
+void UnaryDatasetOpKernel::MakeDataset(OpKernelContext* ctx,
+                                       DatasetBase** output) {
+  DatasetBase* input;
+  OP_REQUIRES_OK(ctx, GetDatasetFromVariantTensor(ctx->input(0), &input));
+  MakeDataset(ctx, input, output);
+}
+
+void BinaryDatasetOpKernel::MakeDataset(OpKernelContext* ctx,
+                                        DatasetBase** output) {
+  DatasetBase* input;
+  OP_REQUIRES_OK(ctx, GetDatasetFromVariantTensor(ctx->input(0), &input));
+  DatasetBase* another_input;
+  OP_REQUIRES_OK(ctx,
+                 GetDatasetFromVariantTensor(ctx->input(1), &another_input));
+  MakeDataset(ctx, input, another_input, output);
+}
+
+const char GraphDatasetBase::kDatasetGraphKey[] = "_DATASET_GRAPH";
+const char GraphDatasetBase::kDatasetGraphOutputNodeKey[] =
+    "_DATASET_GRAPH_OUTPUT_NODE";
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/data/dataset.h b/tensorflow/core/kernels/data/dataset.h
new file mode 100644
index 0000000000000000000000000000000000000000..3cb3c08a327d00cda565a09851a1faf6d79a4842
--- /dev/null
+++ b/tensorflow/core/kernels/data/dataset.h
@@ -0,0 +1,599 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_DATASET_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_DATASET_H_
+
+#include <memory>
+
+#include "tensorflow/core/framework/attr_value.pb.h"
+#include "tensorflow/core/framework/attr_value_util.h"
+#include "tensorflow/core/framework/function.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/variant_encode_decode.h"
+#include "tensorflow/core/framework/variant_tensor_data.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/tracing.h"
+
+// Polymorphic datasets should support all primitive TensorFlow
+// types. Use this macro to expand `m(T)` once for each primitive type
+// `T`, e.g. to build a `switch` statement.
+#define TF_CALL_DATASET_TYPES(m) TF_CALL_ALL_TYPES(m) TF_CALL_QUANTIZED_TYPES(m)
+
+namespace tensorflow {
+
+// Interface for reading values from a key-value store.
+// Used for restoring iterator state.
+class IteratorStateReader {
+ public:
+  virtual Status ReadScalar(StringPiece key, int64* val) = 0;
+  virtual Status ReadScalar(StringPiece key, string* val) = 0;
+  virtual Status ReadTensor(StringPiece key, Tensor* val) = 0;
+  virtual bool Contains(StringPiece key) = 0;
+
+  virtual ~IteratorStateReader() {}
+};
+
+// Interface for writing values to a key-value store.
+// Used for saving iterator state.
+class IteratorStateWriter {
+ public:
+  virtual Status WriteScalar(StringPiece key, const int64 val) = 0;
+  virtual Status WriteScalar(StringPiece key, const string& val) = 0;
+  virtual Status WriteTensor(StringPiece key, const Tensor& val) = 0;
+
+  virtual ~IteratorStateWriter() {}
+};
+
+// Forward declarations to avoid introducing a dependency on headers in
+// "tensorflow/core/graph/...".
+class GraphDefBuilder;
+class GraphDatasetBase;
+class Node;
+
+// Wrapper around GraphDefBuilder. Used to serialize Dataset graph.
+class GraphDefBuilderWrapper {
+ public:
+  explicit GraphDefBuilderWrapper(GraphDefBuilder* b) : b_(b) {}
+
+  // Adds a Const node with scalar value to the Graph.
+  // `*output` contains a pointer to the output `Node`. It is guaranteed to be
+  // non-null if the method returns with an OK status.
+  // The returned Node pointer is owned by the backing Graph of GraphDefBuilder.
+  template <typename T>
+  Status AddScalar(const T& val, Node** output) {
+    Tensor val_t = Tensor(DataTypeToEnum<T>::v(), TensorShape({}));
+    val_t.scalar<T>()() = val;
+    AddTensorInternal(val_t, output);
+    if (*output == nullptr) {
+      return errors::Internal("AddScalar: Failed to build Const op.");
+    }
+    return Status::OK();
+  }
+
+  // Adds a Const node with vector value to the Graph.
+  // `*output` contains a pointer to the output `Node`. It is guaranteed to be
+  // non-null if the method returns with an OK status.
+  // The returned Node pointer is owned by the backing Graph of GraphDefBuilder.
+  // TODO(shivaniagrawal): Consider changing to gtl::ArraySlice?
+  template <typename T>
+  Status AddVector(const std::vector<T>& val, Node** output) {
+    Tensor val_t = Tensor(DataTypeToEnum<T>::v(),
+                          TensorShape({static_cast<int64>(val.size())}));
+    for (int i = 0; i < val.size(); i++) {
+      val_t.flat<T>()(i) = val[i];
+    }
+    AddTensorInternal(val_t, output);
+    if (*output == nullptr) {
+      return errors::Internal("AddVector: Failed to build Const op.");
+    }
+    return Status::OK();
+  }
+
+  // Adds a Const node with Tensor value to the Graph.
+  // `*output` contains a pointer to the output `Node`. It is guaranteed to be
+  // non-null if the method returns with an OK status.
+  // The returned Node pointer is owned by the backing Graph of GraphDefBuilder.
+  Status AddTensor(const Tensor& val, Node** output) {
+    AddTensorInternal(val, output);
+    if (*output == nullptr) {
+      return errors::Internal("AddTensor: Failed to build Const op.");
+    }
+    return Status::OK();
+  }
+
+  Status AddDataset(const GraphDatasetBase* dataset,
+                    const std::vector<Node*>& inputs, Node** output) {
+    return AddDataset(dataset, inputs, {}, output);
+  }
+
+  // Adds a node corresponding to the `DatasetType` to the Graph.
+  // Return value of `DatasetType::op_name()` is used as the op type for the
+  // node.
+  // Values for the output_types and output_shapes node attributes are also
+  // written if those attributes are defined in the OpDef.
+  // `*output` contains a pointer to the output `Node`. It is guaranteed to be
+  // non-null if the method returns with an OK status.
+  // The returned Node pointer is owned by the backing Graph of GraphDefBuilder.
+  Status AddDataset(const GraphDatasetBase* dataset,
+                    const std::vector<Node*>& inputs,
+                    const std::vector<std::pair<StringPiece, AttrValue>>& attrs,
+                    Node** output) {
+    std::vector<std::pair<size_t, Node*>> enumerated_inputs(inputs.size());
+    for (int i = 0; i < inputs.size(); i++) {
+      enumerated_inputs[i] = std::make_pair(i, inputs[i]);
+    }
+    return AddDataset(dataset, enumerated_inputs, {}, attrs, output);
+  }
+
+  Status AddDataset(
+      const GraphDatasetBase* dataset,
+      const std::vector<std::pair<size_t, Node*>>& inputs,
+      const std::vector<std::pair<size_t, gtl::ArraySlice<Node*>>>& list_inputs,
+      const std::vector<std::pair<StringPiece, AttrValue>>& attrs,
+      Node** output);
+
+  // Adds a user-defined function with name `function_name` to the graph and
+  // recursively adds all functions it references. If a function with a matching
+  // name has already been added, returns with OK status. If a user-defined with
+  // name `function_name` is not found in the FunctionLibraryDefinition, returns
+  // an InvalidArgumentError. If the function with name `function_name` or any
+  // of its dependent functions are stateful, returns an InvalidArgument error.
+  Status AddFunction(OpKernelContext* ctx, const string& function_name);
+
+  template <typename T>
+  void BuildAttrValue(const T& value, AttrValue* attr) {
+    SetAttrValue(value, attr);
+  }
+
+ private:
+  void AddTensorInternal(const Tensor& val, Node** output);
+
+  Status EnsureFunctionIsStateless(OpKernelContext* ctx,
+                                   const string& function_name) const {
+    const FunctionLibraryDefinition* lib_def =
+        ctx->function_library()->GetFunctionLibraryDefinition();
+    const FunctionDef* function_def = lib_def->Find(function_name);
+    if (!function_def) {
+      return errors::InvalidArgument("Unable to find FunctionDef for ",
+                                     function_name, " in registry.");
+    }
+    for (const NodeDef& node_def : function_def->node_def()) {
+      const OpDef* op_def;
+      TF_RETURN_IF_ERROR(lib_def->LookUpOpDef(node_def.op(), &op_def));
+      // TODO(b/65524810): Hack to allow functions to capture Dataset op
+      // nodes needed for FlatMap. Currently, source datasets nodes have been
+      // marked stateful to avoid constant folding since we do not have a
+      // good way of serializing them.
+      if (IsOpWhitelisted(op_def)) {
+        continue;
+      }
+      if (op_def->is_stateful()) {
+        return errors::InvalidArgument(
+            "Op[name: ", node_def.name(), ", type: ", node_def.op(), "] ",
+            "in function ", function_name, " is stateful. ",
+            "Saving stateful functions is not supported yet.");
+      }
+    }
+    return Status::OK();
+  }
+
+  bool IsOpWhitelisted(const OpDef* op_def) const {
+    return StringPiece(op_def->name()).ends_with("Dataset") &&
+           HasAttr(op_def, "output_shapes");
+  }
+
+  bool HasAttr(const string& op_type_name, const string& attr_name) const;
+
+  bool HasAttr(const OpDef* op_def, const string& attr_name) const {
+    for (auto attr : op_def->attr()) {
+      if (attr.name() == attr_name) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  Status AddAttrFunctions(const AttrValue& attr_value, OpKernelContext* ctx) {
+    if (attr_value.has_func()) {
+      TF_RETURN_IF_ERROR(AddFunction(ctx, attr_value.func().name()));
+    } else if (attr_value.has_list()) {
+      for (const NameAttrList& name_attr_list : attr_value.list().func()) {
+        TF_RETURN_IF_ERROR(AddFunction(ctx, name_attr_list.name()));
+      }
+    }
+    return Status::OK();
+  }
+
+  GraphDefBuilder* b_;
+};
+
+class StatsAggregator;
+
+// A cut-down version of OpKernelContext for running computations in
+// iterators. Note that we cannot simply use OpKernelContext here
+// because we might run computation in an iterator whose lifetime is
+// not nested within the lifetime of a single OpKernelContext
+// (e.g. asynchronous prefetching).
+//
+// TODO(mrry): We will probably need to support more of
+// OpKernelContext here. For example, should allocation be handled by
+// the IteratorContext?
+// TODO(mrry): We're making some daring assumptions about the lifetime
+// of the runner passed in here. A runner will be deleted when the original
+// step ends, but all existing runners only close over session-lifetime (or
+// longer-lived) state, so we can make a copy of the function. There's nothing
+// in the definition of the API from which we took the runner to guarantee that
+// what we are doing is safe. We should formalize the properties here.
+class IteratorContext {
+ public:
+  struct Params {
+    // Interface to operating system functionality.
+    Env* env;
+
+    // Function call support.
+    std::function<void(std::function<void()>)> runner = nullptr;
+
+    // A function that returns the current `StatsAggregator` instance to be
+    // used when recording statistics about the iterator.
+    //
+    // NOTE(mrry): This is somewhat awkward, because (i) the `StatsAggregator`
+    // is a property of the `IteratorResource` (which this class does not know
+    // about), and (ii) it can change after the `IteratorContext` has been
+    // created. Better suggestions are welcome!
+    std::function<std::shared_ptr<StatsAggregator>()> stats_aggregator_getter =
+        nullptr;
+
+    // The FunctionLibraryRuntime object to be used to make function calls.
+    FunctionLibraryRuntime* lib = nullptr;
+    std::shared_ptr<const FunctionLibraryDefinition> function_library = nullptr;
+  };
+
+  explicit IteratorContext(Params params) : params_(std::move(params)) {}
+
+  Env* env() const { return params_.env; }
+
+  std::function<void(std::function<void()>)>* runner() {
+    return &params_.runner;
+  }
+
+  std::shared_ptr<StatsAggregator> stats_aggregator() {
+    if (params_.stats_aggregator_getter) {
+      return params_.stats_aggregator_getter();
+    } else {
+      return nullptr;
+    }
+  }
+
+  std::shared_ptr<const FunctionLibraryDefinition> function_library() {
+    return params_.function_library;
+  }
+
+  FunctionLibraryRuntime* lib() { return params_.lib; }
+
+  void set_lib(FunctionLibraryRuntime* lib) { params_.lib = lib; }
+
+ private:
+  Params params_;
+};
+
+// Represents the current position in a range of outputs, where the
+// range of outputs is typically represented by an `DatasetBase`,
+// defined below.
+class IteratorBase {
+ public:
+  virtual ~IteratorBase() {}
+
+  // Gets the next output from the range that this iterator is traversing.
+  //
+  // If at least one output remains in this iterator's range, that
+  // output will be stored in `*out_tensors` and `false` will be
+  // stored in `*end_of_sequence`.
+  //
+  // If no more outputs remain in this iterator's range, `true` will
+  // be stored in `*end_of_sequence`, and the content of
+  // `*out_tensors` will be undefined.
+  //
+  // This method is thread-safe.
+  //
+  // TODO(mrry): Define `GetNextAsync()` or `GetNextManyAsync()`, and
+  // potentially remove this method.
+  virtual Status GetNext(IteratorContext* ctx, std::vector<Tensor>* out_tensors,
+                         bool* end_of_sequence) = 0;
+
+  // Returns a vector of DataType values, representing the respective
+  // element types of each tuple component in the outputs of this
+  // iterator.
+  virtual const DataTypeVector& output_dtypes() const = 0;
+
+  // Returns a vector of tensor shapes, representing the respective
+  // (and possibly partially defined) shapes of each tuple component
+  // in the outputs of this iterator.
+  virtual const std::vector<PartialTensorShape>& output_shapes() const = 0;
+
+  // Saves the state of this iterator.
+  virtual Status Save(OpKernelContext* ctx, IteratorStateWriter* writer) {
+    return SaveInternal(writer);
+  }
+
+  // Restores the state of this iterator.
+  virtual Status Restore(IteratorContext* ctx, IteratorStateReader* reader) {
+    return RestoreInternal(ctx, reader);
+  }
+
+ protected:
+  // This is needed so that sub-classes of IteratorBase can call
+  // `SaveInternal` on their parent iterators, e.g., in
+  // `RepeatDataasetOp::Dataset`.
+  Status SaveParent(IteratorStateWriter* writer,
+                    const std::unique_ptr<IteratorBase>& parent) {
+    return parent->SaveInternal(writer);
+  }
+
+  // This is needed so that sub-classes of IteratorBase can call
+  // `RestoreInternal` on their parent iterators, e.g., in
+  // `RepeatDataasetOp::Dataset`.
+  Status RestoreParent(IteratorContext* ctx, IteratorStateReader* reader,
+                       const std::unique_ptr<IteratorBase>& parent) {
+    return parent->RestoreInternal(ctx, reader);
+  }
+
+  // Saves the state of this iterator recursively.
+  virtual Status SaveInternal(IteratorStateWriter* writer) {
+    return errors::Unimplemented("SaveInternal");
+  }
+
+  // Restores the state of this iterator recursively.
+  virtual Status RestoreInternal(IteratorContext* ctx,
+                                 IteratorStateReader* reader) {
+    return errors::Unimplemented("RestoreInternal");
+  }
+};
+
+// Represents a (potentially infinite) range of outputs, where each
+// output is a tuple of tensors.
+class DatasetBase : public core::RefCounted {
+ public:
+  // Returns a new iterator for iterating over the range of elements in
+  // this dataset.
+  //
+  // This method may be called multiple times on the same instance,
+  // and the resulting iterators will have distinct state. Each
+  // iterator will traverse all elements in this dataset from the
+  // start.
+  //
+  // Ownership of the created iterator will be transferred to the caller.
+  //
+  // The prefix identifies the sequence of iterators leading up to the newly
+  // created iterator.
+  virtual std::unique_ptr<IteratorBase> MakeIterator(
+      const string& prefix) const = 0;
+
+  // Returns a vector of DataType values, representing the respective
+  // element types of each tuple component in the outputs of this
+  // dataset.
+  virtual const DataTypeVector& output_dtypes() const = 0;
+
+  // Returns a vector of tensor shapes, representing the respective
+  // (and possibly partially defined) shapes of each tuple component
+  // in the outputs of this dataset.
+  virtual const std::vector<PartialTensorShape>& output_shapes() const = 0;
+
+  // A human-readable debug string for this dataset.
+  virtual string DebugString() = 0;
+
+  // Serializes the dataset and writes it to the `writer`.
+  virtual Status Save(OpKernelContext* ctx, IteratorStateWriter* writer) const {
+    return errors::Unimplemented("DatasetBase::Save");
+  }
+
+ protected:
+  // TODO(srbs): Ideally all graph related logic should reside in
+  // GraphDatasetBase. However, that would require Datasets defined in all ops
+  // to derive from GraphDatasetBase. Once that is done we can move
+  // DatasetGraphDefBuilder and AsGraphDefInternal to GraphDatasetBase.
+  class DatasetGraphDefBuilder : public GraphDefBuilderWrapper {
+   public:
+    DatasetGraphDefBuilder(GraphDefBuilder* b) : GraphDefBuilderWrapper(b) {}
+    Status AddParentDataset(OpKernelContext* ctx, const DatasetBase* dataset,
+                            Node** output) {
+      return dataset->AsGraphDefInternal(ctx, this, output);
+    }
+  };
+
+  virtual Status AsGraphDefInternal(OpKernelContext* ctx,
+                                    DatasetGraphDefBuilder* b,
+                                    Node** node) const {
+    return AsGraphDefInternal(b, node);
+  }
+
+  virtual Status AsGraphDefInternal(DatasetGraphDefBuilder* b,
+                                    Node** node) const {
+    return errors::Unimplemented("AsGraphDefInternal");
+  }
+};
+
+// Base-class for datasets that are built by ops.
+class GraphDatasetBase : public DatasetBase {
+ public:
+  GraphDatasetBase(OpKernelContext* ctx)
+      : op_name_(ctx->op_kernel().type_string()) {}
+
+  const string op_name() const { return op_name_; }
+
+  Status Save(OpKernelContext* ctx,
+              IteratorStateWriter* writer) const override {
+    string serialized_graph_def;
+    string output_node;
+    TF_RETURN_IF_ERROR(Serialize(ctx, &serialized_graph_def, &output_node));
+    TF_RETURN_IF_ERROR(
+        writer->WriteScalar(kDatasetGraphKey, serialized_graph_def));
+    TF_RETURN_IF_ERROR(
+        writer->WriteScalar(kDatasetGraphOutputNodeKey, output_node));
+    return Status::OK();
+  }
+
+  // Key for storing the Dataset graph in the serialized format.
+  static const char kDatasetGraphKey[];
+
+  // Key for storing the output node of the Dataset graph in the serialized
+  // format.
+  static const char kDatasetGraphOutputNodeKey[];
+
+ private:
+  Status Serialize(OpKernelContext* ctx, string* serialized_graph_def,
+                   string* output_node) const;
+
+  const string op_name_;
+};
+
+// Represents an iterator that is associated with a particular parent dataset.
+template <class DatasetType>
+class DatasetIterator : public IteratorBase {
+ public:
+  struct Params {
+    // Owns one reference on the shared dataset resource.
+    const DatasetType* dataset;
+
+    // Identifies the sequence of iterators leading up to this iterator.
+    const string prefix;
+  };
+
+  explicit DatasetIterator(const Params& params) : params_(params) {
+    params_.dataset->Ref();
+  }
+
+  ~DatasetIterator() override { params_.dataset->Unref(); }
+
+  // The dataset from which this iterator was created.
+  const DatasetType* dataset() const { return params_.dataset; }
+
+  // The sequence of iterators leading up to this iterator.
+  const string prefix() const { return params_.prefix; }
+
+  const DataTypeVector& output_dtypes() const override {
+    return params_.dataset->output_dtypes();
+  }
+
+  const std::vector<PartialTensorShape>& output_shapes() const override {
+    return params_.dataset->output_shapes();
+  }
+
+  Status GetNext(IteratorContext* ctx, std::vector<Tensor>* out_tensors,
+                 bool* end_of_sequence) final {
+    port::Tracing::TraceMe activity(params_.prefix);
+    Status s = GetNextInternal(ctx, out_tensors, end_of_sequence);
+    if (TF_PREDICT_FALSE(errors::IsOutOfRange(s) && !*end_of_sequence)) {
+      s = errors::Internal(
+          "Iterator \"", params_.prefix,
+          "\" returned OutOfRange without setting `*end_of_sequence`. This "
+          "indicates that an error may have occurred. Original message: ",
+          s.error_message());
+      LOG(ERROR) << s;
+    }
+    return s;
+  }
+
+  Status Save(OpKernelContext* ctx, IteratorStateWriter* writer) final {
+    TF_RETURN_IF_ERROR(dataset()->Save(ctx, writer));
+    return IteratorBase::Save(ctx, writer);
+  }
+
+ protected:
+  // Internal implementation of GetNext that is wrapped in tracing logic.
+  virtual Status GetNextInternal(IteratorContext* ctx,
+                                 std::vector<Tensor>* out_tensors,
+                                 bool* end_of_sequence) = 0;
+
+  string full_name(const string& name) const {
+    return strings::StrCat(prefix(), ":", name);
+  }
+
+ private:
+  Params params_;
+};
+
+// Encapsulates the work required to plug a DatasetBase into the core TensorFlow
+// graph execution engine.
+class DatasetOpKernel : public OpKernel {
+ public:
+  DatasetOpKernel(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+  void Compute(OpKernelContext* ctx) final;
+
+ protected:
+  // Subclasses should implement this method. It will be called during Compute
+  // execution.
+  virtual void MakeDataset(OpKernelContext* ctx, DatasetBase** output) = 0;
+
+  template <typename T>
+  Status ParseScalarArgument(OpKernelContext* ctx,
+                             const StringPiece& argument_name, T* output) {
+    const Tensor* argument_t;
+    TF_RETURN_IF_ERROR(ctx->input(argument_name, &argument_t));
+    if (!TensorShapeUtils::IsScalar(argument_t->shape())) {
+      return errors::InvalidArgument(argument_name, " must be a scalar");
+    }
+    *output = argument_t->scalar<T>()();
+    return Status::OK();
+  }
+};
+
+// Encapsulates the work required to plug unary Datasets into the core
+// TensorFlow graph execution engine.
+class UnaryDatasetOpKernel : public DatasetOpKernel {
+ public:
+  UnaryDatasetOpKernel(OpKernelConstruction* ctx) : DatasetOpKernel(ctx) {}
+
+ protected:
+  void MakeDataset(OpKernelContext* ctx, DatasetBase** output) final;
+  virtual void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
+                           DatasetBase** output) = 0;
+};
+
+// Encapsulates the work required to plug binary Datasets into the core
+// TensorFlow graph execution engine.
+class BinaryDatasetOpKernel : public DatasetOpKernel {
+ public:
+  BinaryDatasetOpKernel(OpKernelConstruction* ctx) : DatasetOpKernel(ctx) {}
+
+ protected:
+  void MakeDataset(OpKernelContext* ctx, DatasetBase** output) final;
+  virtual void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
+                           DatasetBase* another_input,
+                           DatasetBase** output) = 0;
+};
+
+// Validates and extracts a `DatasetBase` object from `tensor`.
+//
+// `tensor` must have been written by a call to SetVariantTensorToDataset().
+//
+// The retrieved pointer is a borrowed reference to the dataset, which is owned
+// by the tensor. The consumer must either acquire its own reference to the
+// dataset by calling `(*out_dataset)->Ref()`, or ensure that `tensor` is not
+// destroyed or mutated while the retrieved pointer is in use.
+Status GetDatasetFromVariantTensor(const Tensor& tensor,
+                                   DatasetBase** out_dataset);
+
+// Stores a `DatasetBase` object in `tensor`.
+//
+// The ownership of `dataset` is transferred to `tensor`.
+Status StoreDatasetInVariantTensor(DatasetBase* dataset, Tensor* tensor);
+
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_DATASET_H_
diff --git a/tensorflow/core/kernels/dataset_utils.cc b/tensorflow/core/kernels/data/dataset_utils.cc
similarity index 68%
rename from tensorflow/core/kernels/dataset_utils.cc
rename to tensorflow/core/kernels/data/dataset_utils.cc
index cd58c8091211ae75265f6cfecb65746965f98d2f..e3a3601ee847148c459ab33decb8528f8b96521d 100644
--- a/tensorflow/core/kernels/dataset_utils.cc
+++ b/tensorflow/core/kernels/data/dataset_utils.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/dataset_utils.h"
+#include "tensorflow/core/kernels/data/dataset_utils.h"
 
 namespace tensorflow {
 
@@ -23,21 +23,10 @@ Status MakeIteratorFromInputElement(
     IteratorContext* ctx, const std::vector<Tensor>& input_element,
     int64 thread_index, CapturedFunction* captured_func, StringPiece prefix,
     std::unique_ptr<IteratorBase>* out_iterator) {
-  FunctionLibraryRuntime::Options opts;
-  opts.runner = ctx->runner();
-  // Choose a step ID that is guaranteed not to clash with any
-  // Session-generated step ID. DirectSession only generates
-  // non-negative step IDs (contiguous, starting from 0), and
-  // MasterSession generates 56-bit random step IDs whose MSB
-  // is always 0, so a negative random step ID should suffice.
-  opts.step_id = CapturedFunction::generate_step_id();
-  ScopedStepContainer step_container(
-      opts.step_id, [captured_func, ctx](const string& name) {
-        captured_func->resource_manager()->Cleanup(name).IgnoreError();
-      });
-  opts.step_container = &step_container;
   std::vector<Tensor> return_values;
-  TF_RETURN_IF_ERROR(captured_func->Run(opts, input_element, &return_values));
+
+  TF_RETURN_IF_ERROR(
+      captured_func->RunWithBorrowedArgs(ctx, input_element, &return_values));
 
   if (!(return_values.size() == 1 && return_values[0].dtype() == DT_VARIANT &&
         TensorShapeUtils::IsScalar(return_values[0].shape()))) {
diff --git a/tensorflow/core/kernels/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h
similarity index 77%
rename from tensorflow/core/kernels/dataset_utils.h
rename to tensorflow/core/kernels/data/dataset_utils.h
index eea2b8802b813808f752659a469c3818a52162d2..40bc8735847f56157d81f6d5fb7a2d02291232fe 100644
--- a/tensorflow/core/kernels/dataset_utils.h
+++ b/tensorflow/core/kernels/data/dataset_utils.h
@@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATASET_UTILS_H_
-#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATASET_UTILS_H_
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_DATASET_UTILS_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_DATASET_UTILS_H_
 
 #include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/kernels/captured_function.h"
-#include "tensorflow/core/kernels/dataset.h"
+#include "tensorflow/core/kernels/data/captured_function.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 
 namespace tensorflow {
 
@@ -32,4 +32,4 @@ Status MakeIteratorFromInputElement(
 
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATASET_UTILS_H_
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_DATASET_UTILS_H_
diff --git a/tensorflow/core/kernels/dense_to_sparse_batch_dataset_op.cc b/tensorflow/core/kernels/data/dense_to_sparse_batch_dataset_op.cc
similarity index 77%
rename from tensorflow/core/kernels/dense_to_sparse_batch_dataset_op.cc
rename to tensorflow/core/kernels/data/dense_to_sparse_batch_dataset_op.cc
index e80d11eaea1640c54c21a7b94a2f043099c790f3..e7224bb547f60f943c7c91c37edfbbf561f5351a 100644
--- a/tensorflow/core/kernels/dense_to_sparse_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/dense_to_sparse_batch_dataset_op.cc
@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/variant.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 
 namespace tensorflow {
 
@@ -56,10 +56,10 @@ class DenseToSparseBatchDatasetOp : public UnaryDatasetOpKernel {
 
     *output = nullptr;
 
-#define HANDLE_TYPE(T)                                      \
-  case DataTypeToEnum<T>::value: {                          \
-    *output = new Dataset<T>(batch_size, row_shape, input); \
-    break;                                                  \
+#define HANDLE_TYPE(T)                                           \
+  case DataTypeToEnum<T>::value: {                               \
+    *output = new Dataset<T>(ctx, batch_size, row_shape, input); \
+    break;                                                       \
   }
 
     switch (input->output_dtypes()[0]) {
@@ -76,18 +76,20 @@ class DenseToSparseBatchDatasetOp : public UnaryDatasetOpKernel {
  private:
   // TODO(mrry): Push the templated code down to the raw copying routine.
   template <class T>
-  class Dataset : public DatasetBase {
+  class Dataset : public GraphDatasetBase {
    public:
-    Dataset(int64 batch_size, const PartialTensorShape& row_shape,
-            const DatasetBase* input)
-        : batch_size_(batch_size), row_shape_(row_shape), input_(input) {
+    Dataset(OpKernelContext* ctx, int64 batch_size,
+            const PartialTensorShape& row_shape, const DatasetBase* input)
+        : GraphDatasetBase(ctx),
+          batch_size_(batch_size),
+          row_shape_(row_shape),
+          input_(input) {
       input_->Ref();
 
-      output_shapes_.reserve(3);
-      // Outputs represent a SparseTensor as (indices, values, dense_shape).
-      output_shapes_.push_back({-1, row_shape_.dims() + 1});
-      output_shapes_.push_back({-1});
-      output_shapes_.push_back({row_shape_.dims() + 1});
+      output_shapes_.reserve(1);
+      PartialTensorShape output_shape({-1});
+      output_shape.AppendShape(row_shape_);
+      output_shapes_.push_back(output_shape);
     }
 
     ~Dataset() override { input_->Unref(); }
@@ -99,8 +101,7 @@ class DenseToSparseBatchDatasetOp : public UnaryDatasetOpKernel {
     }
 
     const DataTypeVector& output_dtypes() const override {
-      static DataTypeVector* output_dtypes_ =
-          new DataTypeVector({DT_INT64, DataTypeToEnum<T>::value, DT_INT64});
+      static DataTypeVector* output_dtypes_ = new DataTypeVector({DT_VARIANT});
       return *output_dtypes_;
     }
 
@@ -113,6 +114,25 @@ class DenseToSparseBatchDatasetOp : public UnaryDatasetOpKernel {
                              ")::Dataset");
     }
 
+   protected:
+    Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b,
+                              Node** output) const override {
+      Node* input_node;
+      TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_node));
+      Node* batch_size_node;
+      TF_RETURN_IF_ERROR(b->AddScalar(batch_size_, &batch_size_node));
+      Node* row_shape_node;
+      std::vector<int64> row_shape;
+      row_shape.reserve(
+          row_shape_.dims());  // not an unknown rank PartialTensorShape
+      for (int i = 0; i < row_shape_.dims(); i++)
+        row_shape.emplace_back(row_shape_.dim_size(i));
+      TF_RETURN_IF_ERROR(b->AddVector(row_shape, &row_shape_node));
+      TF_RETURN_IF_ERROR(b->AddDataset(
+          this, {input_node, batch_size_node, row_shape_node}, output));
+      return Status::OK();
+    }
+
    private:
     class Iterator : public DatasetIterator<Dataset<T>> {
      public:
@@ -199,7 +219,7 @@ class DenseToSparseBatchDatasetOp : public UnaryDatasetOpKernel {
                        {total_elements, row_ndims + 1});
         Tensor values(
             cpu_allocator(),
-            DatasetIterator<Dataset<T>>::dataset()->output_dtypes()[1],
+            DatasetIterator<Dataset<T>>::dataset()->input_->output_dtypes()[0],
             {total_elements});
         auto indices_matrix = indices.matrix<int64>();
         auto values_flat = values.flat<T>();
@@ -235,14 +255,31 @@ class DenseToSparseBatchDatasetOp : public UnaryDatasetOpKernel {
 
         dense_shape_vec(0) = batch_elements.size();
 
-        out_tensors->push_back(std::move(indices));
-        out_tensors->push_back(std::move(values));
-        out_tensors->push_back(std::move(dense_shape));
+        Tensor serialized_sparse(DT_VARIANT, TensorShape({3}));
+        auto serialized_sparse_t = serialized_sparse.vec<Variant>();
+        serialized_sparse_t(0) = std::move(indices);
+        serialized_sparse_t(1) = std::move(values);
+        serialized_sparse_t(2) = std::move(dense_shape);
+        out_tensors->push_back(std::move(serialized_sparse));
 
         *end_of_sequence = false;
         return Status::OK();
       }
 
+     protected:
+      Status SaveInternal(IteratorStateWriter* writer) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(Iterator::SaveParent(writer, input_impl_));
+        return Status::OK();
+      }
+
+      Status RestoreInternal(IteratorContext* ctx,
+                             IteratorStateReader* reader) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(Iterator::RestoreParent(ctx, reader, input_impl_));
+        return Status::OK();
+      }
+
      private:
       mutex mu_;
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
diff --git a/tensorflow/core/kernels/filter_dataset_op.cc b/tensorflow/core/kernels/data/filter_dataset_op.cc
similarity index 84%
rename from tensorflow/core/kernels/filter_dataset_op.cc
rename to tensorflow/core/kernels/data/filter_dataset_op.cc
index e4d80e4ce3a0d2070b9165dff598b11b190139eb..d16b5b7d416b85695287ccbab4bc4398a222c139 100644
--- a/tensorflow/core/kernels/filter_dataset_op.cc
+++ b/tensorflow/core/kernels/data/filter_dataset_op.cc
@@ -12,15 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/captured_function.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/random/random.h"
 
-#include "tensorflow/core/kernels/captured_function.h"
-
 namespace tensorflow {
 
 namespace {
@@ -47,9 +45,8 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
     }
 
     std::unique_ptr<CapturedFunction> captured_func;
-    OP_REQUIRES_OK(ctx, CapturedFunction::Create(ctx, func_, graph_def_version_,
-                                                 std::move(other_arguments),
-                                                 &captured_func));
+    OP_REQUIRES_OK(ctx, CapturedFunction::Create(
+                            func_, std::move(other_arguments), &captured_func));
 
     *output = new Dataset(ctx, input, func_, std::move(captured_func));
   }
@@ -95,7 +92,7 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
 
       DataTypeVector other_arguments_types;
       other_arguments_types.reserve(captured_func_->captured_inputs().size());
-      std::vector<NodeBuilder::NodeOut> other_arguments;
+      std::vector<Node*> other_arguments;
       other_arguments.reserve(captured_func_->captured_inputs().size());
       for (const Tensor& t : captured_func_->captured_inputs()) {
         Node* node;
@@ -146,28 +143,14 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
             return Status::OK();
           }
 
-          FunctionLibraryRuntime::Options opts;
-          opts.step_id = CapturedFunction::generate_step_id();
-          ScopedStepContainer step_container(
-              opts.step_id, [this, ctx](const string& name) {
-                dataset()
-                    ->captured_func_->resource_manager()
-                    ->Cleanup(name)
-                    .IgnoreError();
-              });
-          opts.step_container = &step_container;
-          opts.runner = ctx->runner();
           // TODO(mrry): Avoid blocking a threadpool thread. We will need to
           // stack-rip the iterators and use async kernels.
-          Notification n;
-          Status ret;
           std::vector<Tensor> result;
-          ret = dataset()->captured_func_->Run(opts, *out_tensors, &result);
+          TF_RETURN_IF_ERROR(dataset()->captured_func_->RunWithBorrowedArgs(
+              ctx, *out_tensors, &result));
 
-          if (!ret.ok()) {
-            return ret;
-          } else if (result.size() != 1 || result[0].dtype() != DT_BOOL ||
-                     result[0].NumElements() != 1) {
+          if (result.size() != 1 || result[0].dtype() != DT_BOOL ||
+              result[0].NumElements() != 1) {
             return errors::InvalidArgument(
                 "Filter predicate `f` must return a scalar bool.");
           }
@@ -192,7 +175,7 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
         return Status::OK();
       }
 
-      Status RestoreInternal(OpKernelContext* ctx,
+      Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
         mutex_lock l(mu_);
         if (reader->Contains(full_name("input_impls_empty")))
diff --git a/tensorflow/core/kernels/flat_map_dataset_op.cc b/tensorflow/core/kernels/data/flat_map_dataset_op.cc
similarity index 95%
rename from tensorflow/core/kernels/flat_map_dataset_op.cc
rename to tensorflow/core/kernels/data/flat_map_dataset_op.cc
index ac1689e5bf19b350c1baf486e060019aa9d17c2c..77a48a2aa9b0a2be22ef9112cf985964457d65bf 100644
--- a/tensorflow/core/kernels/flat_map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/flat_map_dataset_op.cc
@@ -12,16 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/captured_function.h"
+#include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/lib/random/random.h"
 
-#include "tensorflow/core/kernels/captured_function.h"
-#include "tensorflow/core/kernels/dataset_utils.h"
-
 namespace tensorflow {
 
 namespace {
@@ -50,9 +48,8 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel {
     }
 
     std::unique_ptr<CapturedFunction> captured_func;
-    OP_REQUIRES_OK(ctx, CapturedFunction::Create(ctx, func_, graph_def_version_,
-                                                 std::move(other_arguments),
-                                                 &captured_func));
+    OP_REQUIRES_OK(ctx, CapturedFunction::Create(
+                            func_, std::move(other_arguments), &captured_func));
 
     *output = new Dataset(ctx, input, func_, std::move(captured_func),
                           output_types_, output_shapes_);
@@ -102,7 +99,7 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel {
 
       DataTypeVector other_arguments_types;
       other_arguments_types.reserve(captured_func_->captured_inputs().size());
-      std::vector<NodeBuilder::NodeOut> other_arguments;
+      std::vector<Node*> other_arguments;
       other_arguments.reserve(captured_func_->captured_inputs().size());
       for (const Tensor& t : captured_func_->captured_inputs()) {
         Node* node;
@@ -197,7 +194,7 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel {
         return Status::OK();
       }
 
-      Status RestoreInternal(OpKernelContext* ctx,
+      Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
         mutex_lock l(mu_);
         input_impl_.reset();
@@ -252,6 +249,7 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel {
         IteratorContext::Params params;
         params.env = ctx->env();
         params.runner = *(ctx->runner());
+        params.lib = ctx->function_library();
         IteratorContext iter_ctx(std::move(params));
         return BuildCurrentElementIteratorLocked(&iter_ctx);
       }
diff --git a/tensorflow/core/kernels/group_by_window_dataset_op.cc b/tensorflow/core/kernels/data/group_by_window_dataset_op.cc
similarity index 50%
rename from tensorflow/core/kernels/group_by_window_dataset_op.cc
rename to tensorflow/core/kernels/data/group_by_window_dataset_op.cc
index 8644bcf9b509b7aaf335791b583ad8e82073f471..eb047e10ecf738c90c18b9fea25f1b49fdf441c4 100644
--- a/tensorflow/core/kernels/group_by_window_dataset_op.cc
+++ b/tensorflow/core/kernels/data/group_by_window_dataset_op.cc
@@ -17,12 +17,11 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/captured_function.h"
+#include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/window_dataset.h"
 #include "tensorflow/core/lib/random/random.h"
 
-#include "tensorflow/core/kernels/captured_function.h"
-#include "tensorflow/core/kernels/dataset.h"
-#include "tensorflow/core/kernels/window_dataset.h"
-
 namespace tensorflow {
 
 namespace {
@@ -74,36 +73,42 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
     // TODO(mrry): Refactor CapturedFunction to share the runtime
     // state between multiple functions?
     std::unique_ptr<CapturedFunction> captured_key_func;
-    OP_REQUIRES_OK(ctx,
-                   CapturedFunction::Create(ctx, key_func_, graph_def_version_,
-                                            std::move(key_func_other_arguments),
-                                            &captured_key_func));
+    OP_REQUIRES_OK(ctx, CapturedFunction::Create(
+                            key_func_, std::move(key_func_other_arguments),
+                            &captured_key_func));
     std::unique_ptr<CapturedFunction> captured_reduce_func;
     OP_REQUIRES_OK(
-        ctx, CapturedFunction::Create(ctx, reduce_func_, graph_def_version_,
+        ctx, CapturedFunction::Create(reduce_func_,
                                       std::move(reduce_func_other_arguments),
                                       &captured_reduce_func));
     std::unique_ptr<CapturedFunction> captured_window_size_func;
-    OP_REQUIRES_OK(ctx, CapturedFunction::Create(
-                            ctx, window_size_func_, graph_def_version_,
-                            std::move(window_size_func_other_arguments),
-                            &captured_window_size_func));
+    OP_REQUIRES_OK(
+        ctx, CapturedFunction::Create(
+                 window_size_func_, std::move(window_size_func_other_arguments),
+                 &captured_window_size_func));
 
     *output = new Dataset(
-        input, std::move(captured_key_func), std::move(captured_reduce_func),
+        ctx, input, key_func_, reduce_func_, window_size_func_,
+        std::move(captured_key_func), std::move(captured_reduce_func),
         std::move(captured_window_size_func), output_types_, output_shapes_);
   }
 
  private:
-  class Dataset : public DatasetBase {
+  class Dataset : public GraphDatasetBase {
    public:
-    Dataset(const DatasetBase* input,
+    Dataset(OpKernelContext* ctx, const DatasetBase* input,
+            const NameAttrList& key_func, const NameAttrList& reduce_func,
+            const NameAttrList& window_size_func,
             std::unique_ptr<CapturedFunction> captured_key_func,
             std::unique_ptr<CapturedFunction> captured_reduce_func,
             std::unique_ptr<CapturedFunction> captured_window_size_func,
             const DataTypeVector& output_types,
             const std::vector<PartialTensorShape>& output_shapes)
-        : input_(input),
+        : GraphDatasetBase(ctx),
+          input_(input),
+          key_func_(key_func),
+          reduce_func_(reduce_func),
+          window_size_func_(window_size_func),
           captured_key_func_(std::move(captured_key_func)),
           captured_reduce_func_(std::move(captured_reduce_func)),
           captured_window_size_func_(std::move(captured_window_size_func)),
@@ -129,6 +134,67 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
 
     string DebugString() override { return "GroupByWindowDatasetOp::Dataset"; }
 
+   protected:
+    Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b,
+                              Node** output) const override {
+      TF_RETURN_IF_ERROR(b->AddFunction(ctx, key_func_.name()));
+      TF_RETURN_IF_ERROR(b->AddFunction(ctx, reduce_func_.name()));
+      TF_RETURN_IF_ERROR(b->AddFunction(ctx, window_size_func_.name()));
+      Node* input_graph_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node));
+
+      std::vector<Node*> key_func_other_arguments_node;
+      DataTypeVector key_func_other_arguments_types;
+      TF_RETURN_IF_ERROR(OtherArgumentsNodeAndType(
+          b, captured_key_func_, &key_func_other_arguments_node,
+          &key_func_other_arguments_types));
+
+      std::vector<Node*> reduce_func_other_arguments_node;
+      DataTypeVector reduce_func_other_arguments_types;
+      TF_RETURN_IF_ERROR(OtherArgumentsNodeAndType(
+          b, captured_reduce_func_, &reduce_func_other_arguments_node,
+          &reduce_func_other_arguments_types));
+
+      std::vector<Node*> window_size_func_other_arguments_node;
+      DataTypeVector window_size_func_other_arguments_types;
+      TF_RETURN_IF_ERROR(OtherArgumentsNodeAndType(
+          b, captured_window_size_func_, &window_size_func_other_arguments_node,
+          &window_size_func_other_arguments_types));
+
+      AttrValue key_func;
+      b->BuildAttrValue(key_func_, &key_func);
+      AttrValue reduce_func;
+      b->BuildAttrValue(reduce_func_, &reduce_func);
+      AttrValue window_size_func;
+      b->BuildAttrValue(window_size_func_, &window_size_func);
+
+      AttrValue key_func_other_arguments_types_attr;
+      b->BuildAttrValue(key_func_other_arguments_types,
+                        &key_func_other_arguments_types_attr);
+      AttrValue reduce_func_other_arguments_types_attr;
+      b->BuildAttrValue(reduce_func_other_arguments_types,
+                        &reduce_func_other_arguments_types_attr);
+      AttrValue window_size_func_other_arguments_types_attr;
+      b->BuildAttrValue(window_size_func_other_arguments_types,
+                        &window_size_func_other_arguments_types_attr);
+
+      TF_RETURN_IF_ERROR(b->AddDataset(
+          this, {{0, input_graph_node}},
+          {{1, key_func_other_arguments_node},
+           {2, reduce_func_other_arguments_node},
+           {3, window_size_func_other_arguments_node}},
+          {{"key_func", key_func},
+           {"reduce_func", reduce_func},
+           {"window_size_func", window_size_func},
+           {"Tkey_func_other_arguments", key_func_other_arguments_types_attr},
+           {"Treduce_func_other_arguments",
+            reduce_func_other_arguments_types_attr},
+           {"Twindow_size_func_other_arguments",
+            window_size_func_other_arguments_types_attr}},
+          output));
+      return Status::OK();
+    }
+
    private:
     class Iterator : public DatasetIterator<Dataset> {
      public:
@@ -155,6 +221,7 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
             // We have reached the end of the current group, so maybe move on
             // to the next group.
             current_group_iterator_.reset();
+            groups_.erase(current_key_);
           }
 
           // Iterate through the input dataset until we get a full
@@ -165,23 +232,12 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
                 input_impl_->GetNext(ctx, &next_input_element, &end_of_input_));
 
             if (!end_of_input_) {
-              FunctionLibraryRuntime::Options opts;
-              opts.step_id = CapturedFunction::generate_step_id();
-              opts.runner = ctx->runner();
-              ScopedStepContainer step_container(
-                  opts.step_id, [this, ctx](const string& name) {
-                    dataset()
-                        ->captured_key_func_->resource_manager()
-                        ->Cleanup(name)
-                        .IgnoreError();
-                  });
-              opts.step_container = &step_container;
-
               // Run the key function on the input element to identify its
               // group.
               std::vector<Tensor> key_func_output;
-              TF_RETURN_IF_ERROR(dataset()->captured_key_func_->Run(
-                  opts, next_input_element, &key_func_output));
+              TF_RETURN_IF_ERROR(
+                  dataset()->captured_key_func_->RunWithBorrowedArgs(
+                      ctx, next_input_element, &key_func_output));
 
               if (key_func_output.size() != 1 ||
                   key_func_output[0].dtype() != DT_INT64 ||
@@ -193,24 +249,11 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
               const int64 key = key_func_output[0].scalar<int64>()();
 
               if (window_sizes_.find(key) == window_sizes_.end()) {
-                // Run window_size function
-                FunctionLibraryRuntime::Options opts2;
-                opts2.step_id = CapturedFunction::generate_step_id();
-                opts2.runner = ctx->runner();
-                ScopedStepContainer step_container2(
-                    opts2.step_id, [this, ctx](const string& name) {
-                      dataset()
-                          ->captured_window_size_func_->resource_manager()
-                          ->Cleanup(name)
-                          .IgnoreError();
-                    });
-                opts2.step_container = &step_container2;
-
                 // Run the window size function on the key to identify its
                 // window size.
                 std::vector<Tensor> window_size_func_output;
                 TF_RETURN_IF_ERROR(dataset()->captured_window_size_func_->Run(
-                    opts2, key_func_output, &window_size_func_output));
+                    ctx, std::move(key_func_output), &window_size_func_output));
 
                 if (window_size_func_output.size() != 1 ||
                     window_size_func_output[0].dtype() != DT_INT64 ||
@@ -230,6 +273,7 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
               group.push_back(std::move(next_input_element));
 
               if (group.size() == window_size) {
+                current_key_ = key;
                 TF_RETURN_IF_ERROR(StartFlushingGroup(ctx, key));
                 break;
               }
@@ -240,6 +284,7 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
             if (!groups_.empty()) {
               // We have consumed all of the input, so flush an
               // arbitrarily chosen group.
+              current_key_ = groups_.begin()->first;
               TF_RETURN_IF_ERROR(
                   StartFlushingGroup(ctx, groups_.begin()->first));
             }
@@ -250,26 +295,162 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
         return Status::OK();
       }
 
+     protected:
+      Status SaveInternal(IteratorStateWriter* writer) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_));
+
+        if (end_of_input_) {
+          TF_RETURN_IF_ERROR(
+              writer->WriteScalar(full_name("end_of_input"), ""));
+        }
+
+        // Saving groups_
+        if (!groups_.empty()) {
+          TF_RETURN_IF_ERROR(
+              writer->WriteScalar(full_name("groups_size"), groups_.size()));
+          int idx = 0;
+          for (auto it = groups_.begin(); it != groups_.end(); it++) {
+            int64 key = it->first;
+            TF_RETURN_IF_ERROR(writer->WriteScalar(
+                full_name(strings::StrCat("groups_[", idx, "]->key")), key));
+            TF_RETURN_IF_ERROR(SaveGroup(
+                writer, full_name(strings::StrCat("groups_[", idx, "]")),
+                it->second));
+            idx++;
+          }
+        }
+
+        // Saving window_sizes_
+        if (!window_sizes_.empty()) {
+          TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("window_sizes_size"),
+                                                 window_sizes_.size()));
+          int idx = 0;
+          for (auto it = window_sizes_.begin(); it != window_sizes_.end();
+               it++) {
+            TF_RETURN_IF_ERROR(writer->WriteScalar(
+                full_name(strings::StrCat("window_sizes_[", idx, "]->key")),
+                it->first));
+            TF_RETURN_IF_ERROR(writer->WriteScalar(
+                full_name(strings::StrCat("window_sizes_[", idx, "]->value")),
+                it->second));
+            idx++;
+          }
+        }
+
+        if (current_group_iterator_) {
+          TF_RETURN_IF_ERROR(SaveParent(writer, current_group_iterator_));
+
+          // Saving current_key_
+          TF_RETURN_IF_ERROR(
+              writer->WriteScalar(full_name("current_key"), current_key_));
+        } else {
+          TF_RETURN_IF_ERROR(writer->WriteScalar(
+              full_name("current_iterator_not_initialized"), ""));
+        }
+
+        return Status::OK();
+      }
+
+      Status RestoreInternal(IteratorContext* ctx,
+                             IteratorStateReader* reader) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_));
+
+        if (reader->Contains(full_name("end_of_input"))) end_of_input_ = true;
+
+        // Restoring groups
+        if (reader->Contains(full_name("groups_size"))) {
+          int64 size;
+          TF_RETURN_IF_ERROR(
+              reader->ReadScalar(full_name("groups_size"), &size));
+          for (int idx = 0; idx < size; idx++) {
+            int64 key;
+            TF_RETURN_IF_ERROR(reader->ReadScalar(
+                full_name(strings::StrCat("groups_[", idx, "]->key")), &key));
+            std::vector<std::vector<Tensor>> group;
+            TF_RETURN_IF_ERROR(RestoreGroup(
+                reader, full_name(strings::StrCat("groups_[", idx, "]")),
+                &group));
+            groups_[key] = group;
+          }
+        }
+
+        // Restoring Windows
+        if (reader->Contains(full_name("window_sizes_size"))) {
+          int64 size;
+          TF_RETURN_IF_ERROR(
+              reader->ReadScalar(full_name("window_sizes_size"), &size));
+          for (int idx = 0; idx < size; idx++) {
+            int64 key;
+            TF_RETURN_IF_ERROR(reader->ReadScalar(
+                full_name(strings::StrCat("window_sizes_[", idx, "]->key")),
+                &key));
+            TF_RETURN_IF_ERROR(reader->ReadScalar(
+                full_name(strings::StrCat("window_sizes_[", idx, "]->value")),
+                &window_sizes_[key]));
+          }
+        }
+
+        if (reader->Contains(full_name("current_iterator_not_initialized"))) {
+          current_group_iterator_.reset();
+        } else {
+          // Restore current_key_
+          TF_RETURN_IF_ERROR(
+              reader->ReadScalar(full_name("current_key"), &current_key_));
+
+          // Initialize current_group_iterator_
+          TF_RETURN_IF_ERROR(StartFlushingGroup(ctx, current_key_));
+          // Restore current_group_iterator_ state
+          TF_RETURN_IF_ERROR(
+              RestoreParent(ctx, reader, current_group_iterator_));
+        }
+        return Status::OK();
+      }
+
      private:
-      Status StartFlushingGroup(IteratorContext* ctx, int64 key)
+      Status SaveGroup(IteratorStateWriter* writer, const string& name,
+                       const std::vector<std::vector<Tensor>>& group)
+          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+        TF_RETURN_IF_ERROR(
+            writer->WriteScalar(strings::StrCat(name, "_size"), group.size()));
+        for (int i = 0; i < group.size(); i++) {
+          TF_RETURN_IF_ERROR(writer->WriteScalar(
+              strings::StrCat(name, "[", i, "]_size"), group[i].size()));
+          for (int j = 0; j < group[i].size(); j++) {
+            TF_RETURN_IF_ERROR(writer->WriteTensor(
+                strings::StrCat(name, "[", i, "][", j, "]"), group[i][j]));
+          }
+        }
+        return Status::OK();
+      }
+
+      Status RestoreGroup(IteratorStateReader* reader, const string& name,
+                          std::vector<std::vector<Tensor>>* group)
           EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-        FunctionLibraryRuntime::Options opts;
-        opts.step_id = CapturedFunction::generate_step_id();
-        opts.runner = ctx->runner();
-        ScopedStepContainer step_container(
-            opts.step_id, [this, ctx](const string& name) {
-              dataset()
-                  ->captured_reduce_func_->resource_manager()
-                  ->Cleanup(name)
-                  .IgnoreError();
-            });
-        opts.step_container = &step_container;
+        int64 group_size;
+        TF_RETURN_IF_ERROR(
+            reader->ReadScalar(strings::StrCat(name, "_size"), &group_size));
+        group->resize(group_size);
+        for (int i = 0; i < group_size; i++) {
+          int64 vector_size;
+          TF_RETURN_IF_ERROR(reader->ReadScalar(
+              strings::StrCat(name, "[", i, "]_size"), &vector_size));
+          group->at(i).resize(vector_size);
+          for (int j = 0; j < vector_size; j++) {
+            TF_RETURN_IF_ERROR(reader->ReadTensor(
+                strings::StrCat(name, "[", i, "][", j, "]"), &group->at(i)[j]));
+          }
+        }
+        return Status::OK();
+      }
 
+      Status StartFlushingGroup(IteratorContext* ctx, int64 key)
+          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
         DatasetBase* group_dataset;
         TF_RETURN_IF_ERROR(NewWindowDataset(
-            std::move(groups_[key]), dataset()->input_->output_dtypes(),
+            groups_[key], dataset()->input_->output_dtypes(),
             dataset()->input_->output_shapes(), &group_dataset));
-        groups_.erase(key);
 
         Tensor key_arg(DT_INT64, TensorShape({}));
         key_arg.scalar<int64>()() = key;
@@ -281,9 +462,8 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
         std::vector<Tensor> args(
             {std::move(key_arg), std::move(group_dataset_arg)});
         std::vector<Tensor> return_values;
-
-        TF_RETURN_IF_ERROR(
-            dataset()->captured_reduce_func_->Run(opts, args, &return_values));
+        TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Run(
+            ctx, std::move(args), &return_values));
 
         if (!(return_values.size() == 1 &&
               return_values[0].dtype() == DT_VARIANT &&
@@ -304,20 +484,40 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
         return Status::OK();
       }
 
-      const std::unique_ptr<IteratorBase> input_impl_;
       mutex mu_;
+      std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
       // TODO(mrry): Optimize for dense key space if appropriate.
       bool end_of_input_ GUARDED_BY(mu_) = false;
+      int64 current_key_ GUARDED_BY(mu_);
       std::map<int64, std::vector<std::vector<Tensor>>> groups_ GUARDED_BY(mu_);
       std::unique_ptr<IteratorBase> current_group_iterator_ GUARDED_BY(mu_);
       std::map<int64, int64> window_sizes_ GUARDED_BY(mu_);
     };
 
+    Status OtherArgumentsNodeAndType(
+        DatasetGraphDefBuilder* b,
+        const std::unique_ptr<CapturedFunction>& captured_func,
+        std::vector<Node*>* other_arguments_node,
+        DataTypeVector* other_arguments_types) const {
+      other_arguments_node->reserve(captured_func->captured_inputs().size());
+      other_arguments_types->reserve(captured_func->captured_inputs().size());
+      for (const Tensor& t : captured_func->captured_inputs()) {
+        Node* node;
+        TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        other_arguments_node->emplace_back(node);
+        other_arguments_types->emplace_back(t.dtype());
+      }
+      return Status::OK();
+    }
+
     // A resource name for the temporary window dataset that is
     // created as the input to the reduce function.
     static constexpr const char* kWindowResourceName = "__window_dataset";
 
     const DatasetBase* const input_;
+    const NameAttrList key_func_;
+    const NameAttrList reduce_func_;
+    const NameAttrList window_size_func_;
     const std::unique_ptr<CapturedFunction> captured_key_func_;
     const std::unique_ptr<CapturedFunction> captured_reduce_func_;
     const std::unique_ptr<CapturedFunction> captured_window_size_func_;
diff --git a/tensorflow/core/kernels/ignore_errors_dataset_op.cc b/tensorflow/core/kernels/data/ignore_errors_dataset_op.cc
similarity index 97%
rename from tensorflow/core/kernels/ignore_errors_dataset_op.cc
rename to tensorflow/core/kernels/data/ignore_errors_dataset_op.cc
index 8cf263d87fed601ed987e5d13909dd433391e5bd..99df699d719b896df37515fc4147cd48db52a113 100644
--- a/tensorflow/core/kernels/ignore_errors_dataset_op.cc
+++ b/tensorflow/core/kernels/data/ignore_errors_dataset_op.cc
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/random/random.h"
 
 namespace tensorflow {
@@ -109,7 +108,7 @@ class IgnoreErrorsDatasetOp : public UnaryDatasetOpKernel {
         return Status::OK();
       }
 
-      Status RestoreInternal(OpKernelContext* ctx,
+      Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
         mutex_lock l(mu_);
         if (reader->Contains(full_name("input_impls_empty")))
diff --git a/tensorflow/core/kernels/interleave_dataset_op.cc b/tensorflow/core/kernels/data/interleave_dataset_op.cc
similarity index 93%
rename from tensorflow/core/kernels/interleave_dataset_op.cc
rename to tensorflow/core/kernels/data/interleave_dataset_op.cc
index cbee68b2dbed807ca101e1a887daebe289646da0..bce3f28d62bf898e5137568c4241aff4392db65b 100644
--- a/tensorflow/core/kernels/interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/interleave_dataset_op.cc
@@ -13,16 +13,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/captured_function.h"
+#include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/lib/random/random.h"
 
-#include "tensorflow/core/kernels/captured_function.h"
-#include "tensorflow/core/kernels/dataset_utils.h"
-
 namespace tensorflow {
 
 namespace {
@@ -69,9 +67,8 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel {
         errors::InvalidArgument("block_length must be greater than zero."));
 
     std::unique_ptr<CapturedFunction> captured_func;
-    OP_REQUIRES_OK(ctx, CapturedFunction::Create(ctx, func_, graph_def_version_,
-                                                 std::move(other_arguments),
-                                                 &captured_func));
+    OP_REQUIRES_OK(ctx, CapturedFunction::Create(
+                            func_, std::move(other_arguments), &captured_func));
 
     *output =
         new Dataset(ctx, input, func_, std::move(captured_func), cycle_length,
@@ -126,7 +123,7 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel {
       TF_RETURN_IF_ERROR(b->AddScalar(block_length_, &block_length_node));
       DataTypeVector other_arguments_types;
       other_arguments_types.reserve(captured_func_->captured_inputs().size());
-      std::vector<NodeBuilder::NodeOut> other_arguments;
+      std::vector<Node*> other_arguments;
       other_arguments.reserve(captured_func_->captured_inputs().size());
       for (const Tensor& t : captured_func_->captured_inputs()) {
         Node* node;
@@ -230,7 +227,7 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel {
         return Status::OK();
       }
 
-      Status RestoreInternal(OpKernelContext* ctx,
+      Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
         mutex_lock l(mu_);
         TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_));
@@ -268,13 +265,9 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel {
         return Status::OK();
       }
 
-      Status RestoreCurrentElements(OpKernelContext* ctx,
+      Status RestoreCurrentElements(IteratorContext* ctx,
                                     IteratorStateReader* reader)
           EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-        IteratorContext::Params params;
-        params.env = ctx->env();
-        params.runner = *(ctx->runner());
-        IteratorContext iter_ctx(std::move(params));
         for (int idx = 0; idx < current_elements_.size(); idx++) {
           if (reader->Contains(
                   full_name(strings::StrCat("args_size[", idx, "]")))) {
@@ -289,9 +282,8 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel {
                   &args_list_[idx][i]));
             }
             TF_RETURN_IF_ERROR(dataset::MakeIteratorFromInputElement(
-                &iter_ctx, args_list_[idx], idx,
-                dataset()->captured_func_.get(), prefix(),
-                &current_elements_[idx]));
+                ctx, args_list_[idx], idx, dataset()->captured_func_.get(),
+                prefix(), &current_elements_[idx]));
             TF_RETURN_IF_ERROR(
                 RestoreParent(ctx, reader, current_elements_[idx]));
           } else {
diff --git a/tensorflow/core/kernels/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc
similarity index 77%
rename from tensorflow/core/kernels/iterator_ops.cc
rename to tensorflow/core/kernels/data/iterator_ops.cc
index 439775157bc936d44845e7b175e62c2fc088e6cf..244df137cbdc325da236f25e0d45cf2b37269015 100644
--- a/tensorflow/core/kernels/iterator_ops.cc
+++ b/tensorflow/core/kernels/data/iterator_ops.cc
@@ -14,21 +14,23 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/common_runtime/graph_runner.h"
+#include "tensorflow/core/common_runtime/threadpool_device.h"
 #include "tensorflow/core/framework/iterator.pb.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/resource_op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/variant_op_registry.h"
 #include "tensorflow/core/graph/graph_constructor.h"
-#include "tensorflow/core/kernels/dataset.h"
+#include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/stats_aggregator.h"
 #include "tensorflow/core/kernels/ops_util.h"
-#include "tensorflow/core/kernels/stats_aggregator.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/public/session_options.h"
 
 namespace tensorflow {
 
@@ -80,8 +82,16 @@ class IteratorResource : public ResourceBase {
  public:
   IteratorResource(const DataTypeVector& output_dtypes,
                    const std::vector<PartialTensorShape>& output_shapes,
-                   const int graph_def_version)
-      : iterator_(nullptr),
+                   const int graph_def_version,
+                   std::unique_ptr<DeviceMgr> device_mgr,
+                   std::unique_ptr<FunctionLibraryDefinition> flib_def,
+                   std::unique_ptr<ProcessFunctionLibraryRuntime> pflr,
+                   FunctionLibraryRuntime* lib)
+      : device_mgr_(std::move(device_mgr)),
+        flib_def_(std::move(flib_def)),
+        pflr_(std::move(pflr)),
+        lib_(lib),
+        iterator_(nullptr),
         output_dtypes_(output_dtypes),
         output_shapes_(output_shapes),
         graph_def_version_(graph_def_version) {}
@@ -90,6 +100,9 @@ class IteratorResource : public ResourceBase {
                  bool* end_of_sequence) {
     std::shared_ptr<IteratorBase> captured_iterator(iterator_);
     if (captured_iterator) {
+      if (lib_ != nullptr) {
+        ctx->set_lib(lib_);
+      }
       return captured_iterator->GetNext(ctx, out_tensors, end_of_sequence);
     } else {
       return errors::FailedPrecondition(
@@ -129,26 +142,30 @@ class IteratorResource : public ResourceBase {
     GraphRunner graph_runner(ctx->env());
 
     // Build a new FLR that knows about the functions in the graph.
-    std::unique_ptr<FunctionLibraryDefinition> flib_def(
+    std::shared_ptr<FunctionLibraryDefinition> flib_def(
         new FunctionLibraryDefinition(
             *ctx->function_library()->GetFunctionLibraryDefinition()));
     TF_RETURN_IF_ERROR(flib_def->AddLibrary(graph_def.library()));
-    std::unique_ptr<ProcessFunctionLibraryRuntime> pflr(
-        new ProcessFunctionLibraryRuntime(nullptr, ctx->env(),
-                                          graph_def_version_, flib_def.get(),
-                                          {}, nullptr));
-    FunctionLibraryRuntime* lib =
-        pflr->GetFLR(ProcessFunctionLibraryRuntime::kDefaultFLRDevice);
 
     TF_RETURN_IF_ERROR(
-        graph_runner.Run(&graph, lib, {}, {output_node}, &outputs));
+        graph_runner.Run(&graph, lib_, {}, {output_node}, &outputs));
     TF_RETURN_IF_ERROR(GetDatasetFromVariantTensor(outputs[0], &dataset));
 
     TF_RETURN_IF_ERROR(set_iterator(dataset->MakeIterator("Iterator")));
     std::shared_ptr<IteratorBase> captured_iterator(iterator_);
 
     if (captured_iterator) {
-      return captured_iterator->Restore(ctx, reader);
+      IteratorContext::Params params;
+      params.env = ctx->env();
+      params.runner = *(ctx->runner());
+      params.function_library = flib_def;
+      params.lib = lib_;
+      IteratorContext iter_ctx(std::move(params));
+
+      TF_RETURN_IF_ERROR(captured_iterator->Restore(&iter_ctx, reader));
+      mutex_lock l(mu_);
+      lib_def_ = std::move(flib_def);
+      return Status::OK();
     } else {
       return errors::FailedPrecondition(
           "Failed to restore iterator. Make sure the checkpoint ",
@@ -157,6 +174,11 @@ class IteratorResource : public ResourceBase {
     }
   }
 
+  std::shared_ptr<const FunctionLibraryDefinition> function_library() {
+    tf_shared_lock l(mu_);
+    return lib_def_;
+  }
+
   // Transfers ownership of iterator to this. This method is thread-safe.
   Status set_iterator(std::unique_ptr<IteratorBase> iterator) {
     if (iterator) {
@@ -188,9 +210,17 @@ class IteratorResource : public ResourceBase {
   }
 
  private:
+  // The following (device_mgr_, flib_def_, pflr_) are only used when the
+  // IteratorResource is shared between sessions and in that case we create
+  // a new FLR. Otherwise these are set to null.
+  std::unique_ptr<DeviceMgr> device_mgr_;
+  std::unique_ptr<FunctionLibraryDefinition> flib_def_;
+  std::unique_ptr<ProcessFunctionLibraryRuntime> pflr_;
+  FunctionLibraryRuntime* lib_ = nullptr;  // not owned.
   std::shared_ptr<IteratorBase> iterator_;
   mutex mu_;
   std::shared_ptr<StatsAggregator> stats_aggregator_ GUARDED_BY(mu_);
+  std::shared_ptr<const FunctionLibraryDefinition> lib_def_ GUARDED_BY(mu_);
   const DataTypeVector output_dtypes_;
   const std::vector<PartialTensorShape> output_shapes_;
   const int graph_def_version_;
@@ -401,24 +431,94 @@ REGISTER_UNARY_VARIANT_DECODE_FUNCTION(IteratorStateVariant,
                                        kIteratorVariantTypeName);
 
 // TODO(mrry): Can we simply use the template kernel here?
-class IteratorHandleOp : public ResourceOpKernel<IteratorResource> {
+class IteratorHandleOp : public OpKernel {
  public:
   explicit IteratorHandleOp(OpKernelConstruction* ctx)
-      : ResourceOpKernel<IteratorResource>(ctx),
-        graph_def_version_(ctx->graph_def_version()) {
+      : OpKernel(ctx), graph_def_version_(ctx->graph_def_version()) {
+    OP_REQUIRES_OK(ctx, ctx->allocate_persistent(DT_STRING, TensorShape({2}),
+                                                 &handle_, nullptr));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_dtypes_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("shared_name", &name_));
   }
 
- private:
-  Status CreateResource(IteratorResource** ret) override
-      EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-    *ret = new IteratorResource(output_dtypes_, output_shapes_,
-                                graph_def_version_);
-    return Status::OK();
+  // The resource is deleted from the resource manager only when it is private
+  // to kernel. Ideally the resource should be deleted when it is no longer held
+  // by anyone, but it would break backward compatibility.
+  ~IteratorHandleOp() override {
+    if (resource_ != nullptr) {
+      resource_->Unref();
+      if (cinfo_.resource_is_private_to_kernel()) {
+        if (!cinfo_.resource_manager()
+                 ->template Delete<IteratorResource>(cinfo_.container(),
+                                                     cinfo_.name())
+                 .ok()) {
+          // Do nothing; the resource can have been deleted by session resets.
+        }
+      }
+    }
   }
 
-  Status VerifyResource(IteratorResource* resource) override {
+  void Compute(OpKernelContext* context) override LOCKS_EXCLUDED(mu_) {
+    mutex_lock l(mu_);
+    FunctionLibraryRuntime* lib = context->function_library();
+    std::unique_ptr<DeviceMgr> device_mgr(nullptr);
+    std::unique_ptr<FunctionLibraryDefinition> flib_def(nullptr);
+    std::unique_ptr<ProcessFunctionLibraryRuntime> pflr(nullptr);
+    // If the iterator is shared then we construct a new FLR, and pass that in.
+    // NOTE(mrry,rohanj): In this case it is not possible to call remote
+    // functions from the iterator. We may add this functionality if there
+    // is sufficient demand, but it will require a significant refactoring.
+    if (!name_.empty()) {
+      lib = CreateFLR(context, &device_mgr, &flib_def, &pflr);
+    }
+
+    if (resource_ == nullptr) {
+      ResourceMgr* mgr = context->resource_manager();
+      OP_REQUIRES_OK(context, cinfo_.Init(mgr, def()));
+
+      IteratorResource* resource;
+      OP_REQUIRES_OK(
+          context,
+          mgr->LookupOrCreate<IteratorResource>(
+              cinfo_.container(), cinfo_.name(), &resource,
+              [lib, &device_mgr, &flib_def, &pflr, this](IteratorResource** ret)
+                  EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+                    *ret = new IteratorResource(
+                        output_dtypes_, output_shapes_, graph_def_version_,
+                        std::move(device_mgr), std::move(flib_def),
+                        std::move(pflr), lib);
+                    return Status::OK();
+                  }));
+
+      Status s = VerifyResource(resource);
+      if (TF_PREDICT_FALSE(!s.ok())) {
+        resource->Unref();
+        context->SetStatus(s);
+        return;
+      }
+
+      auto h = handle_.AccessTensor(context)->template flat<string>();
+      h(0) = cinfo_.container();
+      h(1) = cinfo_.name();
+      resource_ = resource;
+    }
+    if (context->expected_output_dtype(0) == DT_RESOURCE) {
+      OP_REQUIRES_OK(context, MakeResourceHandleToOutput(
+                                  context, 0, cinfo_.container(), cinfo_.name(),
+                                  MakeTypeIndex<IteratorResource>()));
+    } else {
+      context->set_output_ref(0, &mu_, handle_.AccessTensor(context));
+    }
+  }
+
+ private:
+  // During the first Compute(), resource is either created or looked up using
+  // shared_name. In the latter case, the resource found should be verified if
+  // it is compatible with this op's configuration. The verification may fail in
+  // cases such as two graphs asking queues of the same shared name to have
+  // inconsistent capacities.
+  Status VerifyResource(IteratorResource* resource) {
     TF_RETURN_IF_ERROR(
         VerifyTypesMatch(output_dtypes_, resource->output_dtypes()));
     TF_RETURN_IF_ERROR(
@@ -426,10 +526,33 @@ class IteratorHandleOp : public ResourceOpKernel<IteratorResource> {
     return Status::OK();
   }
 
- private:
+  FunctionLibraryRuntime* CreateFLR(
+      OpKernelContext* ctx, std::unique_ptr<DeviceMgr>* device_mgr,
+      std::unique_ptr<FunctionLibraryDefinition>* flib_def,
+      std::unique_ptr<ProcessFunctionLibraryRuntime>* pflr) {
+    Device* device = new ThreadPoolDevice(
+        SessionOptions(), ctx->device()->attributes().name(), Bytes(256 << 20),
+        DeviceLocality(), cpu_allocator());
+
+    device_mgr->reset(new DeviceMgr({device}));
+    flib_def->reset(new FunctionLibraryDefinition(
+        *ctx->function_library()->GetFunctionLibraryDefinition()));
+    pflr->reset(new ProcessFunctionLibraryRuntime(
+        device_mgr->get(), ctx->env(), graph_def_version_, flib_def->get(),
+        {} /* TODO(mrry): OptimizerOptions? */,
+        nullptr /* TODO(mrry): ClusterFLR */));
+
+    return (*pflr)->GetFLR(device->name());
+  }
+
+  mutex mu_;
+  ContainerInfo cinfo_ GUARDED_BY(mu_);
+  IteratorResource* resource_ GUARDED_BY(mu_) = nullptr;
+  PersistentTensor handle_ GUARDED_BY(mu_);
   DataTypeVector output_dtypes_;
   std::vector<PartialTensorShape> output_shapes_;
   const int graph_def_version_;
+  string name_;
 };
 
 class MakeIteratorOp : public OpKernel {
@@ -448,40 +571,61 @@ class MakeIteratorOp : public OpKernel {
   }
 };
 
-class ToSingleElementOp : public OpKernel {
+class ToSingleElementOp : public AsyncOpKernel {
  public:
-  explicit ToSingleElementOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+  explicit ToSingleElementOp(OpKernelConstruction* ctx)
+      : AsyncOpKernel(ctx),
+        thread_pool_(new thread::ThreadPool(
+            ctx->env(), ThreadOptions(),
+            strings::StrCat("to_single_element_op_thread_",
+                            SanitizeThreadSuffix(name())),
+            1 /* num_threads */, false /* low_latency_hint */)) {}
 
-  void Compute(OpKernelContext* ctx) override {
-    DatasetBase* dataset;
-    OP_REQUIRES_OK(ctx, GetDatasetFromVariantTensor(ctx->input(0), &dataset));
-    auto iterator = dataset->MakeIterator("SingleElementIterator");
+  void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override {
+    // The call to `iterator->GetNext()` may block and depend on an
+    // inter-op thread pool thread, so we issue the call from the
+    // owned thread pool.
+    thread_pool_->Schedule([ctx, done]() {
+      DatasetBase* dataset;
+      OP_REQUIRES_OK_ASYNC(
+          ctx, GetDatasetFromVariantTensor(ctx->input(0), &dataset), done);
+      auto iterator = dataset->MakeIterator("SingleElementIterator");
 
-    IteratorContext::Params params;
-    params.env = ctx->env();
-    params.runner = *(ctx->runner());
-    IteratorContext iter_ctx(std::move(params));
+      IteratorContext::Params params;
+      params.env = ctx->env();
+      params.runner = *(ctx->runner());
+      params.lib = ctx->function_library();
+      IteratorContext iter_ctx(std::move(params));
 
-    std::vector<Tensor> components;
-    components.reserve(dataset->output_dtypes().size());
-    bool end_of_sequence;
+      std::vector<Tensor> components;
+      components.reserve(dataset->output_dtypes().size());
+      bool end_of_sequence;
 
-    OP_REQUIRES_OK(ctx,
-                   iterator->GetNext(&iter_ctx, &components, &end_of_sequence));
-    OP_REQUIRES(ctx, !end_of_sequence,
-                errors::InvalidArgument("Dataset was empty."));
+      OP_REQUIRES_OK_ASYNC(
+          ctx, iterator->GetNext(&iter_ctx, &components, &end_of_sequence),
+          done);
+      OP_REQUIRES_ASYNC(ctx, !end_of_sequence,
+                        errors::InvalidArgument("Dataset was empty."), done);
 
-    for (int i = 0; i < components.size(); ++i) {
-      // TODO(mrry): Check that the shapes match the shape attrs.
-      ctx->set_output(i, components[i]);
-    }
+      for (int i = 0; i < components.size(); ++i) {
+        // TODO(mrry): Check that the shapes match the shape attrs.
+        ctx->set_output(i, components[i]);
+      }
 
-    components.clear();
-    OP_REQUIRES_OK(ctx,
-                   iterator->GetNext(&iter_ctx, &components, &end_of_sequence));
-    OP_REQUIRES(ctx, end_of_sequence,
-                errors::InvalidArgument("Dataset had more than one element."));
+      components.clear();
+      OP_REQUIRES_OK_ASYNC(
+          ctx, iterator->GetNext(&iter_ctx, &components, &end_of_sequence),
+          done);
+      OP_REQUIRES_ASYNC(
+          ctx, end_of_sequence,
+          errors::InvalidArgument("Dataset had more than one element."), done);
+
+      done();
+    });
   }
+
+ private:
+  std::unique_ptr<thread::ThreadPool> thread_pool_;
 };
 
 class OneShotIteratorOp : public AsyncOpKernel {
@@ -571,14 +715,16 @@ class OneShotIteratorOp : public AsyncOpKernel {
   Status TryInit(OpKernelContext* ctx, IteratorResource** iterator,
                  ContainerInfo* cinfo) {
     TF_RETURN_IF_ERROR(cinfo->Init(ctx->resource_manager(), def()));
+    FunctionLibraryRuntime* lib = ctx->function_library();
 
     // Create an IteratorResource that will hold the iterator for this op.
     TF_RETURN_IF_ERROR(
         ctx->resource_manager()->LookupOrCreate<IteratorResource>(
             cinfo->container(), cinfo->name(), iterator,
-            [this](IteratorResource** ret) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+            [lib, this](IteratorResource** ret) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
               *ret = new IteratorResource(output_dtypes_, output_shapes_,
-                                          graph_def_version_);
+                                          graph_def_version_, nullptr, nullptr,
+                                          nullptr, lib);
               return Status::OK();
             }));
 
@@ -685,37 +831,39 @@ class IteratorGetNextOp : public AsyncOpKernel {
     IteratorResource* iterator;
     OP_REQUIRES_OK(ctx,
                    LookupResource(ctx, HandleFromInput(ctx, 0), &iterator));
-
     // The call to `iterator->GetNext()` may block and depend on an
     // inter-op thread pool thread, so we issue the call from the
     // owned thread pool.
-    thread_pool_->Schedule([this, ctx, iterator, done]() {
-      core::ScopedUnref unref_iterator(iterator);
-
-      std::vector<Tensor> components;
-      bool end_of_sequence = false;
-
-      IteratorContext::Params params;
-      params.env = ctx->env();
-      params.stats_aggregator_getter = [iterator]() {
-        return iterator->stats_aggregator();
-      };
-      params.runner = *(ctx->runner());
-      IteratorContext iter_ctx(std::move(params));
-
-      OP_REQUIRES_OK_ASYNC(
-          ctx, iterator->GetNext(&iter_ctx, &components, &end_of_sequence),
-          done);
-      OP_REQUIRES_ASYNC(ctx, !end_of_sequence,
-                        errors::OutOfRange("End of sequence"), done);
-
-      for (int i = 0; i < components.size(); ++i) {
-        // TODO(mrry): Check that the shapes match the shape attrs.
-        ctx->set_output(i, components[i]);
-      }
-
-      done();
-    });
+    thread_pool_->Schedule(std::bind(
+        [this, ctx, iterator](DoneCallback done) {
+          core::ScopedUnref unref_iterator(iterator);
+
+          std::vector<Tensor> components;
+          bool end_of_sequence = false;
+
+          IteratorContext::Params params;
+          params.env = ctx->env();
+          params.stats_aggregator_getter = [iterator]() {
+            return iterator->stats_aggregator();
+          };
+          params.runner = *(ctx->runner());
+          params.function_library = iterator->function_library();
+          IteratorContext iter_ctx(std::move(params));
+
+          OP_REQUIRES_OK_ASYNC(
+              ctx, iterator->GetNext(&iter_ctx, &components, &end_of_sequence),
+              done);
+          OP_REQUIRES_ASYNC(ctx, !end_of_sequence,
+                            errors::OutOfRange("End of sequence"), done);
+
+          for (int i = 0; i < components.size(); ++i) {
+            // TODO(mrry): Check that the shapes match the shape attrs.
+            ctx->set_output(i, components[i]);
+          }
+
+          done();
+        },
+        std::move(done)));
   }
 
  private:
diff --git a/tensorflow/core/kernels/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
similarity index 65%
rename from tensorflow/core/kernels/map_and_batch_dataset_op.cc
rename to tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index ad1e356dbd32b89e7d59f0908f4ea634ac476e6b..c529f671f2bb7fd3eb5277c23867e25ba70fd046 100644
--- a/tensorflow/core/kernels/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -14,13 +14,13 @@ limitations under the License.
 ==============================================================================*/
 #define EIGEN_USE_THREADS
 
-#include "tensorflow/core/lib/core/blocking_counter.h"
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/kernels/captured_function.h"
-#include "tensorflow/core/kernels/dataset.h"
+#include "tensorflow/core/kernels/data/captured_function.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/kernels/inplace_ops_functor.h"
+#include "tensorflow/core/lib/core/blocking_counter.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/tracing.h"
@@ -67,9 +67,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
                     "num_parallel_batches must be greater than zero."));
 
     std::unique_ptr<CapturedFunction> captured_func;
-    OP_REQUIRES_OK(ctx, CapturedFunction::Create(ctx, func_, graph_def_version_,
-                                                 std::move(other_arguments),
-                                                 &captured_func));
+    OP_REQUIRES_OK(ctx, CapturedFunction::Create(
+                            func_, std::move(other_arguments), &captured_func));
 
     *output = new Dataset(input, batch_size, num_parallel_batches,
                           output_types_, output_shapes_,
@@ -132,7 +131,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         if (current_batch_index_ != -1) {
           for (size_t batch_index = 0;
                batch_index < dataset()->num_parallel_batches_; ++batch_index) {
-            WaitForBatch(batch_index).IgnoreError();
+            int64 num_elements;
+            WaitForBatch(batch_index, &num_elements).IgnoreError();
             // Deallocate tensors allocated for the output.
             batch_results_[batch_index].output.clear();
           }
@@ -166,17 +166,35 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
           }
         }
 
-        if (end_of_input_) {
+        int64 num_elements = 0;
+        Status status = WaitForBatch(current_batch_index_, &num_elements);
+        if (num_elements == 0) {
           *end_of_sequence = true;
           return Status::OK();
         }
-
-        Status status = WaitForBatch(current_batch_index_);
         if (!status.ok()) {
           // Deallocate tensors allocated for the output.
           batch_results_[current_batch_index_].output.clear();
         } else {
-          *out_tensors = std::move(batch_results_[current_batch_index_].output);
+          if (num_elements < dataset()->batch_size_) {
+            const std::vector<Tensor>& output =
+                batch_results_[current_batch_index_].output;
+            for (size_t i = 0; i < output.size(); ++i) {
+              TensorShape component_shape(
+                  batch_results_[current_batch_index_].output[i].shape());
+              component_shape.set_dim(0, num_elements);
+              Tensor component(cpu_allocator(), output[i].dtype(),
+                               component_shape);
+              TF_RETURN_IF_ERROR(
+                  CopyPartialBatch(&component, output[i], num_elements));
+              out_tensors->emplace_back(std::move(component));
+            }
+            // Deallocate tensors allocated for the output.
+            batch_results_[current_batch_index_].output.clear();
+          } else {
+            *out_tensors =
+                std::move(batch_results_[current_batch_index_].output);
+          }
           *end_of_sequence = false;
         }
         StartInvocationBatch(ctx, current_batch_index_);
@@ -195,6 +213,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
       struct InvocationResult {
         Status status;
+        bool end_of_input;
         std::vector<Tensor> return_values;
       };
 
@@ -202,6 +221,29 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         return batch_index * dataset()->batch_size_ + offset;
       }
 
+      Status CopyPartialBatch(Tensor* output, const Tensor& value,
+                              int64 num_elements) {
+        switch (value.dtype()) {
+#define CASE(type)                                                \
+  case DataTypeToEnum<type>::value: {                             \
+    auto output_t = output->flat_outer_dims<type>();              \
+    auto value_t = value.flat_outer_dims<type>();                 \
+    for (size_t i = 0; i < num_elements; i++) {                   \
+      output_t.template chip<0>(i) = value_t.template chip<0>(i); \
+    }                                                             \
+    return Status::OK();                                          \
+  }
+          TF_CALL_NUMBER_TYPES(CASE);
+          TF_CALL_string(CASE);
+          TF_CALL_variant(CASE);
+#undef CASE
+          default:
+            return errors::InvalidArgument("Unsupported data type: ",
+                                           value.dtype());
+        }
+        return Status::OK();
+      }
+
       void EnsureOutputAllocated(BatchResult* batch_result,
                                  const std::vector<Tensor>& return_values) {
         mutex_lock l(batch_result->mu);
@@ -228,8 +270,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         // Get the next input element.
         std::vector<Tensor> input_element;
         result->status =
-            input_impl_->GetNext(ctx, &input_element, &end_of_input_);
-        if (end_of_input_ || !result->status.ok()) {
+            input_impl_->GetNext(ctx, &input_element, &result->end_of_input);
+        if (result->end_of_input || !result->status.ok()) {
           batch_result->counter->DecrementCount();
           return;
         }
@@ -237,58 +279,55 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         // Call `captured_func_(input_element)`, store the result in
         // `result->return_values`, and notify `batch_result->counter`
         // to unblock a consumer.
-        FunctionLibraryRuntime::Options opts;
-        opts.step_id = CapturedFunction::generate_step_id();
-        ScopedStepContainer* step_container = new ScopedStepContainer(
-            opts.step_id, [this, ctx](const string& name) {
-              dataset()
-                  ->captured_func_->resource_manager()
-                  ->Cleanup(name)
-                  .IgnoreError();
-            });
-        opts.step_container = step_container;
-        opts.runner = ctx->runner();
-        dataset()->captured_func_->RunAsync(
-            opts, input_element, &result->return_values,
-            [this, result, step_container, batch_result,
-             offset](Status ret_status) {
-              delete step_container;
-              result->status.Update(ret_status);
-              if (ret_status.ok()) {
-                EnsureOutputAllocated(batch_result, result->return_values);
-                const size_t num_components = result->return_values.size();
-                for (size_t i = 0; i < num_components; ++i) {
-                  const Tensor& tensor = result->return_values[i];
-                  Tensor* batch = &(batch_result->output)[i];
-                  if (tensor.NumElements() !=
-                      (batch->NumElements() / batch->dim_size(0))) {
-                    TensorShape batch_shape = batch->shape();
-                    batch_shape.RemoveDim(0);
-                    result->status.Update(errors::InvalidArgument(
-                        "Cannot add tensor to the batch: number of "
-                        "elements does not match. Shapes are: [tensor]: ",
-                        tensor.shape().DebugString(),
-                        ", [batch]: ", batch_shape.DebugString()));
-                    break;
-                  }
-                  // TODO(mrry): Add a version of DoParallelConcat that allows
-                  // us to move `tensor` where possible, to speed up string
-                  // tensor batching.
-                  Status copy_status = ::tensorflow::functor::DoParallelConcat(
-                      *dataset()->device_, tensor, offset, batch);
-                  if (!copy_status.ok()) {
-                    result->status.Update(copy_status);
-                    break;
-                  }
-                }
-              }
-              // NOTE(mrry): We clear the return values here to release any
-              // memory associated with them and to paralellize the destruction
-              // of the tensors (which can be surprisingly expensive for
-              // map functions with large numbers of return values).
-              result->return_values.clear();
-              batch_result->counter->DecrementCount();
-            });
+        (*ctx->runner())(std::bind(
+            [this, result, batch_result, offset](
+                IteratorContext* ctx, std::vector<Tensor> input_element) {
+              dataset()->captured_func_->RunAsync(
+                  ctx, std::move(input_element), &result->return_values,
+                  [this, ctx, result, batch_result, offset](Status ret_status) {
+                    delete ctx;
+                    result->status.Update(ret_status);
+                    if (ret_status.ok()) {
+                      EnsureOutputAllocated(batch_result,
+                                            result->return_values);
+                      const size_t num_components =
+                          result->return_values.size();
+                      for (size_t i = 0; i < num_components; ++i) {
+                        const Tensor& tensor = result->return_values[i];
+                        Tensor* batch = &(batch_result->output)[i];
+                        if (tensor.NumElements() !=
+                            (batch->NumElements() / batch->dim_size(0))) {
+                          TensorShape batch_shape = batch->shape();
+                          batch_shape.RemoveDim(0);
+                          result->status.Update(errors::InvalidArgument(
+                              "Cannot add tensor to the batch: number of "
+                              "elements does not match. Shapes are: [tensor]: ",
+                              tensor.shape().DebugString(),
+                              ", [batch]: ", batch_shape.DebugString()));
+                          break;
+                        }
+                        // TODO(mrry): Add a version of DoParallelConcat that
+                        // allows us to move `tensor` where possible, to speed
+                        // up string tensor batching.
+                        Status copy_status =
+                            ::tensorflow::functor::DoParallelConcat(
+                                *dataset()->device_, tensor, offset, batch);
+                        if (!copy_status.ok()) {
+                          result->status.Update(copy_status);
+                          break;
+                        }
+                      }
+                    }
+                    // NOTE(mrry): We clear the return values here to release
+                    // any memory associated with them and to paralellize the
+                    // destruction of the tensors (which can be surprisingly
+                    // expensive for map functions with large numbers of return
+                    // values).
+                    result->return_values.clear();
+                    batch_result->counter->DecrementCount();
+                  });
+            },
+            new IteratorContext(*ctx), std::move(input_element)));
       }
 
       void StartInvocationBatch(IteratorContext* ctx, int64 batch_index)
@@ -305,9 +344,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         for (size_t i = 0; i < dataset()->batch_size_; ++i) {
           size_t index = ComputeInvocationIndex(batch_index, i);
           InvocationResult* result = &invocation_results_[index];
-          // Reset the state of `result`.
-          // NOTE(mrry): `result->return_values` were cleared when the previous
-          // invocation completed.
+          // Reset the state of `result`; `result->return_values` was cleared
+          // when the previous invocation completed.
+          result->end_of_input = false;
           result->status = Status::OK();
         }
         // Start individual invocations.
@@ -316,13 +355,18 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         }
       }
 
-      Status WaitForBatch(int64 batch_index) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+      Status WaitForBatch(int64 batch_index, int64* num_elements)
+          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
         port::Tracing::TraceMe activity(strings::StrCat(prefix(), "::Wait"));
         batch_results_[batch_index].counter->Wait();
         Status status = Status::OK();
-        for (size_t i = 0; i < dataset()->batch_size_; ++i) {
+        for (size_t i = 0; i < dataset()->batch_size_; ++i, ++*num_elements) {
           size_t index = ComputeInvocationIndex(batch_index, i);
           InvocationResult* result = &invocation_results_[index];
+          if (result->end_of_input) {
+            VLOG(3) << "end of input encountered at element[" << i << "]: ";
+            return Status::OK();
+          }
           if (!result->status.ok()) {
             VLOG(3) << "failed to process element[" << i
                     << "]: " << result->status;
@@ -337,7 +381,6 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       const std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
       std::vector<InvocationResult> invocation_results_ GUARDED_BY(mu_);
       std::vector<BatchResult> batch_results_ GUARDED_BY(mu_);
-      bool end_of_input_ GUARDED_BY(mu_) = false;
     };
 
     const DatasetBase* const input_;
diff --git a/tensorflow/core/kernels/map_dataset_op.cc b/tensorflow/core/kernels/data/map_dataset_op.cc
similarity index 87%
rename from tensorflow/core/kernels/map_dataset_op.cc
rename to tensorflow/core/kernels/data/map_dataset_op.cc
index 4ba09bc335e9682eef2a0c2042aa98e9b428d562..01f9b9fa09621562fae38a7e8b6c7957a8e5538e 100644
--- a/tensorflow/core/kernels/map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_dataset_op.cc
@@ -12,15 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/captured_function.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/random/random.h"
 
-#include "tensorflow/core/kernels/captured_function.h"
-
 namespace tensorflow {
 
 namespace {
@@ -49,9 +47,8 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
     }
 
     std::unique_ptr<CapturedFunction> captured_func;
-    OP_REQUIRES_OK(ctx, CapturedFunction::Create(ctx, func_, graph_def_version_,
-                                                 std::move(other_arguments),
-                                                 &captured_func));
+    OP_REQUIRES_OK(ctx, CapturedFunction::Create(
+                            func_, std::move(other_arguments), &captured_func));
 
     *output = new Dataset(ctx, input, func_, std::move(captured_func),
                           output_types_, output_shapes_);
@@ -100,7 +97,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
 
       DataTypeVector other_arguments_types(
           captured_func_->captured_inputs().size());
-      std::vector<NodeBuilder::NodeOut> other_arguments(
+      std::vector<Node*> other_arguments(
           captured_func_->captured_inputs().size());
       for (const Tensor& t : captured_func_->captured_inputs()) {
         Node* node;
@@ -143,20 +140,10 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
           return Status::OK();
         }
 
-        FunctionLibraryRuntime::Options opts;
-        opts.step_id = CapturedFunction::generate_step_id();
-        ScopedStepContainer step_container(
-            opts.step_id, [this, ctx](const string& name) {
-              dataset()
-                  ->captured_func_->resource_manager()
-                  ->Cleanup(name)
-                  .IgnoreError();
-            });
-        opts.step_container = &step_container;
-        opts.runner = ctx->runner();
         // TODO(mrry): Avoid blocking a threadpool thread. We will need to
         // stack-rip the iterators and use async kernels.
-        Status s = dataset()->captured_func_->Run(opts, args, out_tensors);
+        Status s =
+            dataset()->captured_func_->Run(ctx, std::move(args), out_tensors);
         if (errors::IsOutOfRange(s)) {
           // `f` may deliberately raise `errors::OutOfRange` to indicate
           // that we should terminate the iteration early.
@@ -173,7 +160,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
         return Status::OK();
       }
 
-      Status RestoreInternal(OpKernelContext* ctx,
+      Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
         TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_));
         return Status::OK();
diff --git a/tensorflow/core/kernels/padded_batch_dataset_op.cc b/tensorflow/core/kernels/data/padded_batch_dataset_op.cc
similarity index 98%
rename from tensorflow/core/kernels/padded_batch_dataset_op.cc
rename to tensorflow/core/kernels/data/padded_batch_dataset_op.cc
index 7c28d955e1a2ce129110f112fe87a9bce05a14a0..346eca0bb2ab1c7a82ddba98063c0ccb71b4e58f 100644
--- a/tensorflow/core/kernels/padded_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/padded_batch_dataset_op.cc
@@ -12,11 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_util.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 
 namespace tensorflow {
 
@@ -242,7 +241,7 @@ class PaddedBatchDatasetOp : public UnaryDatasetOpKernel {
       Node* batch_size = nullptr;
       TF_RETURN_IF_ERROR(b->AddScalar(batch_size_, &batch_size));
 
-      std::vector<NodeBuilder::NodeOut> padded_shapes;
+      std::vector<Node*> padded_shapes;
       padded_shapes.reserve(padded_shapes_.size());
       for (int i = 0; i < padded_shapes_.size(); i++) {
         Node* node;
@@ -254,7 +253,7 @@ class PaddedBatchDatasetOp : public UnaryDatasetOpKernel {
         padded_shapes.emplace_back(node);
       }
 
-      std::vector<NodeBuilder::NodeOut> padding_values;
+      std::vector<Node*> padding_values;
       padding_values.reserve(padding_values_.size());
       for (const Tensor& t : padding_values_) {
         Node* node;
@@ -408,7 +407,7 @@ class PaddedBatchDatasetOp : public UnaryDatasetOpKernel {
         return Status::OK();
       }
 
-      Status RestoreInternal(OpKernelContext* ctx,
+      Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
         mutex_lock l(mu_);
         if (reader->Contains(full_name("exhausted"))) {
diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..3f88d6dee80ea7b07ef1ce88ee76edba65cddcde
--- /dev/null
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
@@ -0,0 +1,503 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <deque>
+
+#include "tensorflow/core/common_runtime/function.h"
+#include "tensorflow/core/framework/partial_tensor_shape.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/captured_function.h"
+#include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/dataset_utils.h"
+#include "tensorflow/core/lib/gtl/cleanup.h"
+#include "tensorflow/core/lib/random/random.h"
+
+namespace tensorflow {
+
+namespace {
+
+// See documentation in ../ops/dataset_ops.cc for a high-level
+// description of the following op.
+
+class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
+ public:
+  explicit ParallelInterleaveDatasetOp(OpKernelConstruction* ctx)
+      : UnaryDatasetOpKernel(ctx),
+        graph_def_version_(ctx->graph_def_version()) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &func_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_));
+  }
+
+  void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
+                   DatasetBase** output) override {
+    OpInputList inputs;
+    OP_REQUIRES_OK(ctx, ctx->input_list("other_arguments", &inputs));
+    std::vector<Tensor> other_arguments;
+    other_arguments.reserve(inputs.size());
+    for (const Tensor& t : inputs) {
+      other_arguments.push_back(t);
+    }
+
+    int64 cycle_length = 0;
+    OP_REQUIRES_OK(ctx,
+                   ParseScalarArgument(ctx, "cycle_length", &cycle_length));
+    OP_REQUIRES(ctx, cycle_length > 0,
+                errors::InvalidArgument("`cycle_length` must be > 0"));
+
+    int64 block_length = 0;
+    OP_REQUIRES_OK(ctx,
+                   ParseScalarArgument(ctx, "block_length", &block_length));
+    OP_REQUIRES(ctx, block_length > 0,
+                errors::InvalidArgument("`block_length` must be > 0"));
+
+    bool sloppy = false;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "sloppy", &sloppy));
+
+    int64 buffer_output_elements = 0;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "buffer_output_elements",
+                                            &buffer_output_elements));
+    OP_REQUIRES(
+        ctx, buffer_output_elements > 0,
+        errors::InvalidArgument("`buffer_output_elements` must be > 0"));
+
+    int64 prefetch_input_elements = 0;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "prefetch_input_elements",
+                                            &prefetch_input_elements));
+    OP_REQUIRES(
+        ctx, prefetch_input_elements >= 0,
+        errors::InvalidArgument("`prefetch_input_elements` must be >= 0"));
+
+    std::unique_ptr<CapturedFunction> captured_func;
+    OP_REQUIRES_OK(ctx, CapturedFunction::Create(
+                            func_, std::move(other_arguments), &captured_func));
+
+    *output =
+        new Dataset(input, std::move(captured_func), cycle_length, block_length,
+                    sloppy, buffer_output_elements, prefetch_input_elements,
+                    output_types_, output_shapes_);
+  }
+
+ private:
+  class Dataset : public DatasetBase {
+   public:
+    Dataset(const DatasetBase* input,
+            std::unique_ptr<CapturedFunction> captured_func, int64 cycle_length,
+            int64 block_length, bool sloppy, int64 buffer_output_elements,
+            int64 prefetch_input_elements, const DataTypeVector& output_types,
+            const std::vector<PartialTensorShape>& output_shapes)
+        : input_(input),
+          captured_func_(std::move(captured_func)),
+          cycle_length_(cycle_length),
+          block_length_(block_length),
+          sloppy_(sloppy),
+          buffer_output_elements_(buffer_output_elements),
+          prefetch_input_elements_(prefetch_input_elements),
+          output_types_(output_types),
+          output_shapes_(output_shapes) {
+      input_->Ref();
+    }
+
+    ~Dataset() override { input_->Unref(); }
+
+    std::unique_ptr<IteratorBase> MakeIterator(
+        const string& prefix) const override {
+      return std::unique_ptr<IteratorBase>(new Iterator(
+          {this, strings::StrCat(prefix, "::ParallelInterleave")}));
+    }
+
+    const DataTypeVector& output_dtypes() const override {
+      return output_types_;
+    }
+    const std::vector<PartialTensorShape>& output_shapes() const override {
+      return output_shapes_;
+    }
+
+    string DebugString() override {
+      return "ParallelInterleaveDatasetOp::Dataset";
+    }
+
+   private:
+    int64 num_threads() const {
+      return cycle_length_ + prefetch_input_elements_;
+    }
+
+    // Parallel interleave's implementation is designed around a few principles:
+    //  1. Thread creation is relatively expensive. (Not reusing
+    //     threads causes a number of indirect costs such as poorer tcmalloc
+    //     performance due to thread-local caches, etc.) We allocate a fixed
+    //     number of threads at the start and never change. This is why we've
+    //     fused functionality that is theoretically orthogonal (i.e.
+    //     .prefetch()) into the implementation.
+    //  2. Drop-in replacement for standard interleave. The goal will be to
+    //     auto-opt people into an optimized implementation without any work
+    //     on the customer's part. We thus go through great pains to maintain
+    //     identical iteration orders, full determinism (disabled only via a
+    //     flag, etc.)
+    //  3. Performance across a variety of environments and I/O envelopes.
+    //
+    // The actual implementation centers around a collection of worker threads
+    // and their corresponding worker state (tracked in the `workers_` vector).
+    // Worker threads repeatedly receive a vector of Tensors that are used as
+    // input to the flat-map function (`captured_func_`). The output of this
+    // function must be a dataset. The worker thread then repeatedly calls
+    // `GetNext()`, maintaining a buffer of elements to minimize the likelihood
+    // that a caller will block waiting for an element to be produced.
+    //
+    // Pointers to these worker states are kept in 2 disjoint data structures:
+    //  1. `interleave_` is a vector containing pointers to `WorkerState`s that
+    //  we
+    //     are interleaving. Worker threads backing these WorkerStates should
+    //     be regularly producing values.
+    //  2. `staging_` is a deque containing pointers to WorkerStates that we
+    //     will move to `interleave_` when an iterator in `interleave_` is
+    //     exhausted.
+    //
+    // The client calls `GetNext[Internal]()` to retrieve an output element. The
+    // internal implementation updates the state of `interleave_` and `staging_`
+    // as output iterators (run by the worker threads) are exhausted.
+    //
+    // `input_impl_` is the input iterator that generates arguments for the
+    // flat-map function (`captured_func_`). It is set to an iterator at
+    // Iterator construction, and is fixed until we consume all input elements.
+    // Once it is exhausted, we reset the unique_ptr to eagerly deallocate
+    // memory.
+    //
+    // A few invariants are maintained:
+    //  1. No element in interleave_ should be a nullptr unless `staging_` is
+    //     empty and `input_impl_` is empty.
+    //  2. Every `worker_` element is pointed to by at most one element of the
+    //     union of `interleave_` and `staging_`.
+    //  3. Unless `input_impl_` is empty, every `worker_` must be pointed to by
+    //     an element in `interleave_` or `staging_`.
+    class Iterator : public DatasetIterator<Dataset> {
+     public:
+      explicit Iterator(const Params& params)
+          : DatasetIterator<Dataset>(params),
+            input_impl_(params.dataset->input_->MakeIterator(params.prefix)),
+            workers_(dataset()->num_threads()) {}
+
+      ~Iterator() override {
+        mutex_lock l(mu_);
+        cancelled_ = true;
+        // Notify all workers in case they are blocked.
+        for (auto& worker : workers_) {
+          worker.cond_var.notify_all();
+        }
+      }
+
+      // It is implemented so that it matches the deterministic interleave
+      // unless getting the next element would block and we are allowed to be
+      // sloppy.
+      Status GetNextInternal(IteratorContext* ctx,
+                             std::vector<Tensor>* out_tensors,
+                             bool* end_of_sequence) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(EnsureWorkerThreadsStarted(ctx));
+        while (!cancelled_) {
+          // Wait for an item to become available, blocking if necessary. If we
+          // are allowed to be sloppy, we can skip over input datasets that do
+          // not have an item readily available.
+          bool can_produce_elements = false;
+          bool must_wait_for_input = true;
+          for (int64 i = 0; i < interleave_.size(); ++i) {
+            int64 index = (next_index_ + i) % interleave_.size();
+            WorkerState* current_worker = interleave_[index];
+            if (!current_worker) continue;  // Empty interleave elements.
+            can_produce_elements |= current_worker->MayHaveElements();
+            if (!current_worker->outputs.empty()) {
+              // We have an element!
+              next_index_ = index;
+              if (i == 0) {
+                block_count_++;
+                if (block_count_ == dataset()->block_length_) {
+                  next_index_ = (index + 1) % interleave_.size();
+                  block_count_ = 0;
+                }
+              } else {
+                block_count_ = 0;
+              }
+              *end_of_sequence = false;
+              Status s = current_worker->outputs.front().status;
+              current_worker->outputs.front().output.swap(*out_tensors);
+              current_worker->outputs.pop_front();
+              current_worker->cond_var.notify_one();
+              return s;
+            } else if (current_worker->is_producing && !dataset()->sloppy_) {
+              // current_worker.outputs.empty(), and we must wait for this
+              // iterator.
+              if (next_index_ != index) {
+                // We have advanced to a new iterator; reset block counts.
+                next_index_ = index;
+                block_count_ = 0;
+              }
+              break;
+            } else if (!current_worker->is_producing) {
+              // This iterator has reached end of input.
+              interleave_[index] = nullptr;
+              if (input_impl_) {
+                // Start prefetching a new iterator.
+                std::vector<Tensor> args;
+                bool end_of_input = false;
+                Status s = input_impl_->GetNext(ctx, &args, &end_of_input);
+                if (end_of_input) {
+                  input_impl_.reset();
+                } else {
+                  current_worker->SetInputs(s, std::move(args));
+                  staging_.emplace_back(current_worker);
+                }
+              }
+
+              if (!staging_.empty()) {
+                // Move a worker from `staging_` to `interleave_`.
+                interleave_[index] = staging_.front();
+                staging_.pop_front();
+
+                next_index_ = (index + 1) % interleave_.size();
+                block_count_ = 0;
+                // Restart the inner [for] loop
+                can_produce_elements = true;
+                must_wait_for_input = false;
+                break;
+              }
+            }
+          }
+
+          if (!can_produce_elements && !input_impl_) {
+            // No potential for future values.
+            *end_of_sequence = true;
+            return Status::OK();
+          }
+
+          if (must_wait_for_input) {
+            // Wait for elements to become available.
+            if (dataset()->sloppy_) {
+              sloppy_cond_var_.wait(l);
+            } else {
+              interleave_[next_index_]->cond_var.wait(l);
+            }
+          }
+        }
+        return errors::Cancelled(
+            "ParallelInterleaveDatasetOp::Dataset::Iterator::GetNext");
+      }
+
+     private:
+      // OutputElem contains the information from a call to GetNext by an output
+      // iterator.
+      struct OutputElem {
+        // The output iterator sets `status` if getting the output element
+        // fails.
+        Status status;
+        // The buffered data element.
+        std::vector<Tensor> output;
+
+        explicit OutputElem(const Status& s) : status(s) {}
+      };
+
+      // Worker threads operate on their relevant WorkerState structs.
+      //
+      // WorkerState's fields are all protected by mu_;
+      struct WorkerState {
+        // The arguments to be used to construct an output iterator.
+        std::vector<Tensor> input;
+        // The buffered output elements.
+        std::deque<OutputElem> outputs;
+        // Set to true iff the worker thread expects to append more elements to
+        // outputs. is_producing can be false despite !outputs.empty().
+        // Concretely, all output elements will have been consumed only when:
+        // is_producing == false && outputs.empty();
+        bool is_producing = false;
+        // Condition variable used to coordinate between threads. The worker
+        // thread waits on this condition variable when it is either (1) waiting
+        // for the main thread to add arguments to `input`, or (2) waiting for
+        // the main thread to consume an element of `outputs`. The main thread
+        // waits on cond_var if it is waiting for the worker thread to produce
+        // an element into `outputs` (this implies sloppy_==false).
+        condition_variable cond_var;
+
+        inline bool MayHaveElements() const {
+          return is_producing || !outputs.empty();
+        }
+
+        // Sets inputs for a worker thread and notifies it to start processing.
+        void SetInputs(const Status& s, std::vector<Tensor> input_arguments) {
+          if (s.ok()) {
+            DCHECK(!MayHaveElements())
+                << "Tried to start inputs, despite already producing!";
+            input = std::move(input_arguments);
+            is_producing = true;
+            cond_var.notify_one();
+          } else {
+            outputs.emplace_back(s);
+          }
+        }
+      };
+
+      Status EnsureWorkerThreadsStarted(IteratorContext* ctx)
+          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+        if (worker_threads_.empty()) {
+          worker_threads_.reserve(dataset()->num_threads());
+          for (int64 i = 0; i < dataset()->num_threads(); ++i) {
+            std::vector<Tensor> args;
+            bool end_of_input = false;
+            Status s = input_impl_->GetNext(ctx, &args, &end_of_input);
+            if (end_of_input) {
+              input_impl_.reset();
+              return Status::OK();
+            }
+            workers_[i].SetInputs(s, std::move(args));
+            worker_threads_.emplace_back(ctx->env()->StartThread(
+                {}, "worker_thread",
+                std::bind(&Iterator::WorkerThread, this,
+                          new IteratorContext(*ctx), i)));
+            if (i < dataset()->cycle_length_) {
+              interleave_.push_back(&workers_[i]);
+            } else {
+              staging_.push_back(&workers_[i]);
+            }
+          }
+          DCHECK(interleave_.size() == dataset()->cycle_length_);
+          DCHECK(staging_.size() == dataset()->prefetch_input_elements_);
+        }
+        return Status::OK();
+      }
+
+      // Produces elements into the worker's output buffers.
+      void WorkerThread(IteratorContext* ctx_ptr, const int64 thread_index) {
+        // std::function arguments are copy-constructable, so we pass raw
+        // pointers, and then immediately wrap them to ensure correct ownership.
+        std::unique_ptr<IteratorContext> ctx(ctx_ptr);
+        auto cleanup = gtl::MakeCleanup([this, thread_index] {
+          mutex_lock l(mu_);
+          workers_[thread_index].cond_var.notify_all();
+        });
+
+        while (true) {
+          // 1. Wait for input.
+          std::vector<Tensor> input;
+          {
+            mutex_lock l(mu_);
+            while (!cancelled_ && !workers_[thread_index].is_producing) {
+              workers_[thread_index].cond_var.wait(l);
+            }
+            if (cancelled_) return;
+            input.swap(workers_[thread_index].input);
+          }
+
+          // 2. Run the user defined function to produce a new iterator.
+          std::unique_ptr<IteratorBase> iterator;
+          Status s = dataset::MakeIteratorFromInputElement(
+              ctx.get(), input, thread_index, dataset()->captured_func_.get(),
+              prefix(), &iterator);
+          input.clear();  // Release memory as early as possible.
+
+          if (!s.ok()) {
+            mutex_lock l(mu_);
+            workers_[thread_index].outputs.emplace_back(s);
+            workers_[thread_index].is_producing = false;
+            workers_[thread_index].cond_var.notify_one();
+          } else {
+            // 3. Produce elements
+            bool end_of_sequence = false;
+            while (!end_of_sequence) {
+              // 3.a Produce an element!
+              std::vector<Tensor> output_elem;
+              s = iterator->GetNext(ctx.get(), &output_elem, &end_of_sequence);
+
+              // 3.b Make it available to the client.
+              {
+                mutex_lock l(mu_);
+
+                // Wait for space in the prefetch queue.
+                while (!cancelled_ && workers_[thread_index].outputs.size() ==
+                                          dataset()->buffer_output_elements_) {
+                  workers_[thread_index].cond_var.wait(l);
+                }
+                if (cancelled_) return;
+
+                // Output the element.
+                workers_[thread_index].is_producing = !end_of_sequence;
+                if (!end_of_sequence) {
+                  workers_[thread_index].outputs.emplace_back(s);
+                  workers_[thread_index].outputs.back().output.swap(
+                      output_elem);
+                }
+                if (dataset()->sloppy_) {
+                  sloppy_cond_var_.notify_one();
+                } else {
+                  workers_[thread_index].cond_var.notify_one();
+                }
+              }
+            }
+          }
+        }
+      }
+
+      // Mutex & condition variable to guard mutable iterator internals and
+      // coordinate among worker threads and client thread[s].
+      mutex mu_;
+      // The main thread waits on this condition variable if running in sloppy
+      // mode and no values are available.
+      condition_variable sloppy_cond_var_;
+
+      // The iterator producing elements which are converted to datasets by
+      // the dataset()->captured_func_ then interleaved together.
+      // input_impl_ is reset when we have exhausted its input.
+      std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
+
+      // The WorkerState structs the worker threads operate on.
+      // workers_ elements are in at most one of interleave_ and staging_.
+      std::vector<WorkerState> workers_ GUARDED_BY(mu_);
+
+      // The iterators to interleave
+      std::vector<WorkerState*> interleave_ GUARDED_BY(mu_);
+      // Prefetched iterators
+      std::deque<WorkerState*> staging_ GUARDED_BY(mu_);
+
+      // The index into output_elements_ for next element to produce.
+      size_t next_index_ GUARDED_BY(mu_) = 0;
+      // The number of items produced so far within the block
+      size_t block_count_ GUARDED_BY(mu_) = 0;
+      // Flag to instruct the worker threads to exit.
+      bool cancelled_ GUARDED_BY(mu_) = false;
+      // The worker threads. This must be last to ensure the
+      // threads have exited before any other members are deallocated.
+      // TODO(b/65178177): Avoid allocating additional threads.
+      std::vector<std::unique_ptr<Thread>> worker_threads_ GUARDED_BY(mu_);
+    };
+
+    const DatasetBase* const input_;
+    const std::unique_ptr<CapturedFunction> captured_func_;
+    const int64 cycle_length_;
+    const int64 block_length_;
+    const bool sloppy_;
+    const int64 buffer_output_elements_;
+    const int64 prefetch_input_elements_;
+    const DataTypeVector output_types_;
+    const std::vector<PartialTensorShape> output_shapes_;
+  };
+
+  const int graph_def_version_;
+  DataTypeVector output_types_;
+  std::vector<PartialTensorShape> output_shapes_;
+  NameAttrList func_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("ParallelInterleaveDataset").Device(DEVICE_CPU),
+                        ParallelInterleaveDatasetOp);
+
+}  // namespace
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f09871d98d3eac325b91b52c7f7b6d4e18e6012e
--- /dev/null
+++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
@@ -0,0 +1,403 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <deque>
+
+#include "tensorflow/core/common_runtime/function.h"
+#include "tensorflow/core/framework/partial_tensor_shape.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/captured_function.h"
+#include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/lib/core/error_codes.pb.h"
+#include "tensorflow/core/lib/random/random.h"
+
+namespace tensorflow {
+
+namespace {
+
+// See documentation in ../ops/dataset_ops.cc for a high-level
+// description of the following op.
+
+class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
+ public:
+  explicit ParallelMapDatasetOp(OpKernelConstruction* ctx)
+      : UnaryDatasetOpKernel(ctx),
+        graph_def_version_(ctx->graph_def_version()) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &func_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_));
+  }
+
+ protected:
+  void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
+                   DatasetBase** output) override {
+    OpInputList inputs;
+    OP_REQUIRES_OK(ctx, ctx->input_list("other_arguments", &inputs));
+    std::vector<Tensor> other_arguments;
+    other_arguments.reserve(inputs.size());
+    for (const Tensor& t : inputs) {
+      other_arguments.push_back(t);
+    }
+
+    int32 num_parallel_calls;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "num_parallel_calls",
+                                            &num_parallel_calls));
+    OP_REQUIRES(ctx, num_parallel_calls > 0,
+                errors::InvalidArgument(
+                    "num_parallel_calls must be greater than zero."));
+
+    std::unique_ptr<CapturedFunction> captured_func;
+    OP_REQUIRES_OK(ctx, CapturedFunction::Create(
+                            func_, std::move(other_arguments), &captured_func));
+
+    *output = new Dataset(ctx, input, func_, num_parallel_calls, output_types_,
+                          output_shapes_, std::move(captured_func));
+  }
+
+ private:
+  class Dataset : public GraphDatasetBase {
+   public:
+    Dataset(OpKernelContext* ctx, const DatasetBase* input,
+            const NameAttrList& func, int32 num_parallel_calls,
+            const DataTypeVector& output_types,
+            const std::vector<PartialTensorShape>& output_shapes,
+            std::unique_ptr<CapturedFunction> captured_func)
+        : GraphDatasetBase(ctx),
+          input_(input),
+          func_(func),
+          num_parallel_calls_(num_parallel_calls),
+          output_types_(output_types),
+          output_shapes_(output_shapes),
+          captured_func_(std::move(captured_func)) {
+      input_->Ref();
+    }
+
+    ~Dataset() override { input_->Unref(); }
+
+    std::unique_ptr<IteratorBase> MakeIterator(
+        const string& prefix) const override {
+      return std::unique_ptr<IteratorBase>(
+          new Iterator({this, strings::StrCat(prefix, "::ParallelMap")}));
+    }
+
+    const DataTypeVector& output_dtypes() const override {
+      return output_types_;
+    }
+
+    const std::vector<PartialTensorShape>& output_shapes() const override {
+      return output_shapes_;
+    }
+
+    string DebugString() override { return "ParallelMapDatasetOp::Dataset"; }
+
+   protected:
+    Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b,
+                              Node** output) const override {
+      // Input: input_dataset
+      Node* input_graph_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node));
+
+      // Input: other_arguments
+      DataTypeVector other_arguments_types(
+          captured_func_->captured_inputs().size());
+      std::vector<Node*> other_arguments(
+          captured_func_->captured_inputs().size());
+      for (const Tensor& t : captured_func_->captured_inputs()) {
+        Node* node;
+        TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        other_arguments.emplace_back(node);
+        other_arguments_types.emplace_back(t.dtype());
+      }
+
+      // Input: num_parallel_calls
+      Node* num_parallel_calls = nullptr;
+      TF_RETURN_IF_ERROR(
+          b->AddScalar(num_parallel_calls_, &num_parallel_calls));
+
+      // Attr: f
+      TF_RETURN_IF_ERROR(b->AddFunction(ctx, func_.name()));
+      AttrValue f;
+      b->BuildAttrValue(func_, &f);
+
+      // Attr: Targuments
+      AttrValue other_arguments_types_attr;
+      b->BuildAttrValue(other_arguments_types, &other_arguments_types_attr);
+
+      TF_RETURN_IF_ERROR(b->AddDataset(
+          this,
+          {std::make_pair(0, input_graph_node),
+           std::make_pair(2, num_parallel_calls)},  // Single tensor inputs.
+          {std::make_pair(1, other_arguments)},     // Tensor list inputs.
+          {std::make_pair("f", f),
+           std::make_pair("Targuments", other_arguments_types_attr)},  // Attrs
+          output));
+      return Status::OK();
+    }
+
+   private:
+    class Iterator : public DatasetIterator<Dataset> {
+     public:
+      explicit Iterator(const Params& params)
+          : DatasetIterator<Dataset>(params),
+            input_impl_(params.dataset->input_->MakeIterator(params.prefix)),
+            invocation_results_(params.dataset->num_parallel_calls_) {}
+
+      ~Iterator() override {
+        // TODO(mrry): Replace this cancellation logic with a
+        // CancellationManager. The syntax would be more heavyweight,
+        // but it would be possible to thread a cancellation manager
+        // through the IteratorContext to upstream,
+        // potentially-blocking iterators, when we add these.
+        {
+          mutex_lock l(mu_);
+          for (size_t i = 0; i < dataset()->num_parallel_calls_; ++i) {
+            if (invocation_results_[i].notification) {
+              invocation_results_[i].notification->WaitForNotification();
+            }
+          }
+        }
+      }
+
+      Status GetNextInternal(IteratorContext* ctx,
+                             std::vector<Tensor>* out_tensors,
+                             bool* end_of_sequence) override {
+        mutex_lock l(mu_);
+
+        // Ensure that there are `dataset()->num_parallel_calls_`
+        // invocations of `func_` outstanding at once.
+        while (input_impl_ && (num_inputs_consumed_ - num_outputs_consumed_ <
+                               dataset()->num_parallel_calls_)) {
+          InvokeFunctionLocked(ctx);
+        }
+
+        if (!input_impl_ && num_inputs_consumed_ == num_outputs_consumed_) {
+          *end_of_sequence = true;
+          return Status::OK();
+        }
+
+        // Read the next result out of `invocation_results_`, which
+        // acts as a circular buffer.
+        const size_t result_index =
+            num_outputs_consumed_ % dataset()->num_parallel_calls_;
+        InvocationResult* result = &invocation_results_[result_index];
+        *end_of_sequence = false;
+        if (result->notification) {
+          result->notification->WaitForNotification();
+          if (result->status.ok()) {
+            std::swap(*out_tensors, result->return_values);
+          }
+        }
+        ++num_outputs_consumed_;
+        return result->status;
+      }
+
+     protected:
+      Status SaveInternal(IteratorStateWriter* writer) override {
+        mutex_lock l(mu_);
+        if (input_impl_) {
+          TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_));
+        } else {
+          TF_RETURN_IF_ERROR(
+              writer->WriteScalar(full_name("end_of_input"), ""));
+        }
+        TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("num_inputs_consumed"),
+                                               num_inputs_consumed_));
+        TF_RETURN_IF_ERROR(writer->WriteScalar(
+            full_name("num_outputs_consumed"), num_outputs_consumed_));
+
+        for (size_t i = 0; i < dataset()->num_parallel_calls_; i++) {
+          if (invocation_results_[i].notification) {
+            invocation_results_[i].notification->WaitForNotification();
+            TF_RETURN_IF_ERROR(
+                WriteStatusLocked(writer, i, invocation_results_[i].status));
+            TF_RETURN_IF_ERROR(writer->WriteScalar(
+                full_name(strings::StrCat("invocation_results[", i, "].size")),
+                invocation_results_[i].return_values.size()));
+            for (size_t j = 0; j < invocation_results_[i].return_values.size();
+                 j++) {
+              TF_RETURN_IF_ERROR(writer->WriteTensor(
+                  full_name(
+                      strings::StrCat("invocation_results[", i, "][", j, "]")),
+                  invocation_results_[i].return_values[j]));
+            }
+          } else {
+            TF_RETURN_IF_ERROR(writer->WriteScalar(
+                full_name(strings::StrCat("invocation_results[", i, "]_empty")),
+                ""));
+          }
+        }
+
+        return Status::OK();
+      }
+
+      Status RestoreInternal(IteratorContext* ctx,
+                             IteratorStateReader* reader) override {
+        mutex_lock l(mu_);
+        if (reader->Contains(full_name("end_of_input"))) {
+          input_impl_.reset();
+        } else {
+          TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_));
+        }
+        TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("num_inputs_consumed"),
+                                              &num_inputs_consumed_));
+        TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("num_outputs_consumed"),
+                                              &num_outputs_consumed_));
+        for (size_t i = 0; i < dataset()->num_parallel_calls_; i++) {
+          InvocationResult* result = &invocation_results_[i];
+          *result = InvocationResult();
+          if (!reader->Contains(full_name(
+                  strings::StrCat("invocation_results[", i, "]_empty")))) {
+            result->notification.reset(new Notification);
+            result->notification->Notify();
+            TF_RETURN_IF_ERROR(ReadStatusLocked(reader, i, &result->status));
+            size_t num_return_values;
+            {
+              int64 size;
+              TF_RETURN_IF_ERROR(
+                  reader->ReadScalar(full_name(strings::StrCat(
+                                         "invocation_results[", i, "].size")),
+                                     &size));
+              num_return_values = static_cast<size_t>(size);
+              if (num_return_values != size) {
+                return errors::InvalidArgument(strings::StrCat(
+                    full_name(
+                        strings::StrCat("invocation_results[", i, "].size")),
+                    ": ", size, " is not a valid value of type size_t."));
+              }
+            }
+            result->return_values.reserve(num_return_values);
+            for (size_t j = 0; j < num_return_values; j++) {
+              result->return_values.emplace_back();
+              TF_RETURN_IF_ERROR(reader->ReadTensor(
+                  full_name(
+                      strings::StrCat("invocation_results[", i, "][", j, "]")),
+                  &result->return_values.back()));
+            }
+          }
+        }
+        return Status::OK();
+      }
+
+     private:
+      struct InvocationResult {
+        Status status;
+        std::unique_ptr<Notification> notification;
+        std::vector<Tensor> return_values;
+      };
+
+      void InvokeFunctionLocked(IteratorContext* ctx)
+          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+        DCHECK(input_impl_);
+        DCHECK(num_inputs_consumed_ - num_outputs_consumed_ <
+               dataset()->num_parallel_calls_);
+
+        // The result of invoking the function will be written into the next
+        // slot in `invocation_results_`, which acts as a circular buffer.
+        const size_t result_index =
+            num_inputs_consumed_ % dataset()->num_parallel_calls_;
+        InvocationResult* result = &invocation_results_[result_index];
+        *result = InvocationResult();
+
+        // Get the next input element.
+        std::vector<Tensor> input_element;
+        bool end_of_input;
+        result->status =
+            input_impl_->GetNext(ctx, &input_element, &end_of_input);
+        if (end_of_input) {
+          input_impl_.reset();
+          result->status = errors::OutOfRange("");
+        } else {
+          ++num_inputs_consumed_;
+        }
+
+        if (result->status.ok()) {
+          // Call `func_(input_element)`, store the result in
+          // `result->return_values`, and notify `result->notification`
+          // to unblock a consumer.
+          result->notification.reset(new Notification);
+          dataset()->captured_func_->RunAsync(
+              ctx, std::move(input_element), &result->return_values,
+              [result, result_index](Status ret_status) {
+                result->status.Update(ret_status);
+                result->notification->Notify();
+              });
+        }
+      }
+
+      Status WriteStatusLocked(IteratorStateWriter* writer, size_t index,
+                               const Status& status)
+          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+        TF_RETURN_IF_ERROR(writer->WriteScalar(
+            CodeKey(index), static_cast<int64>(status.code())));
+        if (!status.ok()) {
+          TF_RETURN_IF_ERROR(writer->WriteScalar(ErrorMessageKey(index),
+                                                 status.error_message()));
+        }
+        return Status::OK();
+      }
+
+      Status ReadStatusLocked(IteratorStateReader* reader, size_t index,
+                              Status* status) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+        int64 code_int;
+        TF_RETURN_IF_ERROR(reader->ReadScalar(CodeKey(index), &code_int));
+        error::Code code = static_cast<error::Code>(code_int);
+
+        if (code != error::Code::OK) {
+          string error_message;
+          TF_RETURN_IF_ERROR(
+              reader->ReadScalar(ErrorMessageKey(index), &error_message));
+          *status = Status(code, error_message);
+        } else {
+          *status = Status::OK();
+        }
+        return Status::OK();
+      }
+
+      string CodeKey(size_t index) {
+        return full_name(
+            strings::StrCat("invocation_results[", index, "].code"));
+      }
+
+      string ErrorMessageKey(size_t index) {
+        return full_name(
+            strings::StrCat("invocation_results[", index, "].error_message"));
+      }
+
+      mutex mu_;
+      std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
+      std::vector<InvocationResult> invocation_results_ GUARDED_BY(mu_);
+      int64 num_inputs_consumed_ GUARDED_BY(mu_) = 0;
+      int64 num_outputs_consumed_ GUARDED_BY(mu_) = 0;
+    };
+
+    const DatasetBase* const input_;
+    const NameAttrList func_;
+    const int32 num_parallel_calls_;
+    const DataTypeVector output_types_;
+    const std::vector<PartialTensorShape> output_shapes_;
+    const std::unique_ptr<CapturedFunction> captured_func_;
+  };
+
+  const int graph_def_version_;
+  DataTypeVector output_types_;
+  std::vector<PartialTensorShape> output_shapes_;
+  NameAttrList func_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("ParallelMapDataset").Device(DEVICE_CPU),
+                        ParallelMapDatasetOp);
+
+}  // namespace
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/prefetch_dataset_op.cc b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
similarity index 98%
rename from tensorflow/core/kernels/prefetch_dataset_op.cc
rename to tensorflow/core/kernels/data/prefetch_dataset_op.cc
index b02269f525a8bec3b6ddb01a5039316a7c47a309..1c548a30d2c8e7f33db85000d0f480b3151d6ecf 100644
--- a/tensorflow/core/kernels/prefetch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
@@ -16,7 +16,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/kernels/dataset.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 
 namespace tensorflow {
@@ -164,7 +164,7 @@ class PrefetchDatasetOp : public UnaryDatasetOpKernel {
                 buffer_element.value.size()));
             for (size_t j = 0; j < buffer_element.value.size(); j++) {
               TF_RETURN_IF_ERROR(writer->WriteTensor(
-                  strings::StrCat("buffer[", i, "][", j, "]"),
+                  full_name(strings::StrCat("buffer[", i, "][", j, "]")),
                   buffer_element.value[j]));
             }
           }
@@ -172,7 +172,7 @@ class PrefetchDatasetOp : public UnaryDatasetOpKernel {
         return Status::OK();
       }
 
-      Status RestoreInternal(OpKernelContext* ctx,
+      Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
         mutex_lock parent_l(parent_mu_);
         mutex_lock l(mu_);
@@ -201,7 +201,7 @@ class PrefetchDatasetOp : public UnaryDatasetOpKernel {
             for (size_t j = 0; j < value_size; j++) {
               buffer_element.value.emplace_back();
               TF_RETURN_IF_ERROR(reader->ReadTensor(
-                  strings::StrCat("buffer[", i, "][", j, "]"),
+                  full_name(strings::StrCat("buffer[", i, "][", j, "]")),
                   &buffer_element.value.back()));
             }
           }
diff --git a/tensorflow/core/kernels/data/random_dataset_op.cc b/tensorflow/core/kernels/data/random_dataset_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..bc638864b0147f4d71b3382ea320453e972ba8d7
--- /dev/null
+++ b/tensorflow/core/kernels/data/random_dataset_op.cc
@@ -0,0 +1,154 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/partial_tensor_shape.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/lib/random/philox_random.h"
+#include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/lib/random/random_distributions.h"
+
+namespace tensorflow {
+
+namespace {
+
+// See documentation in ../ops/dataset_ops.cc for a high-level
+// description of the following op.
+
+class RandomDatasetOp : public DatasetOpKernel {
+ public:
+  explicit RandomDatasetOp(OpKernelConstruction* ctx) : DatasetOpKernel(ctx) {}
+
+  void MakeDataset(OpKernelContext* ctx, DatasetBase** output) override {
+    int64 seed;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument<int64>(ctx, "seed", &seed));
+
+    int64 seed2;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument<int64>(ctx, "seed2", &seed2));
+
+    // By TensorFlow convention, passing 0 for both seeds indicates
+    // that the shuffling should be seeded non-deterministically.
+    if (seed == 0 && seed2 == 0) {
+      seed = random::New64();
+      seed2 = random::New64();
+    }
+
+    *output = new Dataset(ctx, seed, seed2);
+  }
+
+ private:
+  class Dataset : public GraphDatasetBase {
+   public:
+    Dataset(OpKernelContext* ctx, int64 seed, int64 seed2)
+        : GraphDatasetBase(ctx), seed_(seed), seed2_(seed2) {}
+
+    std::unique_ptr<IteratorBase> MakeIterator(
+        const string& prefix) const override {
+      return std::unique_ptr<IteratorBase>(
+          new Iterator({this, strings::StrCat(prefix, "::Random")}));
+    }
+
+    const DataTypeVector& output_dtypes() const override {
+      static DataTypeVector* dtypes = new DataTypeVector({DT_INT64});
+      return *dtypes;
+    }
+
+    const std::vector<PartialTensorShape>& output_shapes() const override {
+      static std::vector<PartialTensorShape>* shapes =
+          new std::vector<PartialTensorShape>({{}});
+      return *shapes;
+    }
+
+    string DebugString() override {
+      return strings::StrCat("RandomDatasetOp(", seed_, ", ", seed2_,
+                             ")::Dataset");
+    }
+
+   protected:
+    Status AsGraphDefInternal(DatasetGraphDefBuilder* b,
+                              Node** output) const override {
+      Node* seed = nullptr;
+      Node* seed2 = nullptr;
+      TF_RETURN_IF_ERROR(b->AddScalar(seed_, &seed));
+      TF_RETURN_IF_ERROR(b->AddScalar(seed2_, &seed2));
+      TF_RETURN_IF_ERROR(b->AddDataset(this, {seed, seed2}, output));
+      return Status::OK();
+    }
+
+   private:
+    class Iterator : public DatasetIterator<Dataset> {
+     public:
+      explicit Iterator(const Params& params)
+          : DatasetIterator<Dataset>(params),
+            parent_generator_(dataset()->seed_, dataset()->seed2_),
+            generator_(&parent_generator_) {}
+
+      Status GetNextInternal(IteratorContext* ctx,
+                             std::vector<Tensor>* out_tensors,
+                             bool* end_of_sequence) override {
+        mutex_lock l(mu_);
+        Tensor value_tensor(cpu_allocator(), DT_INT64, {});
+        value_tensor.scalar<int64>()() = Random();
+        out_tensors->emplace_back(std::move(value_tensor));
+        *end_of_sequence = false;
+        return Status::OK();
+      }
+
+     protected:
+      Status SaveInternal(IteratorStateWriter* writer) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("num_random_samples"),
+                                               num_random_samples_));
+        return Status::OK();
+      }
+
+      Status RestoreInternal(IteratorContext* ctx,
+                             IteratorStateReader* reader) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("num_random_samples"),
+                                              &num_random_samples_));
+        parent_generator_ =
+            random::PhiloxRandom(dataset()->seed_, dataset()->seed2_);
+        generator_ = random::SingleSampleAdapter<random::PhiloxRandom>(
+            &parent_generator_);
+        generator_.Skip(num_random_samples_);
+        return Status::OK();
+      }
+
+     private:
+      random::SingleSampleAdapter<random::PhiloxRandom>::ResultType Random()
+          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+        num_random_samples_++;
+        auto out = generator_();
+        return out;
+      }
+      mutex mu_;
+      random::PhiloxRandom parent_generator_ GUARDED_BY(mu_);
+      random::SingleSampleAdapter<random::PhiloxRandom> generator_
+          GUARDED_BY(mu_);
+      int64 num_random_samples_ GUARDED_BY(mu_) = 0;
+    };
+
+    const int64 seed_;
+    const int64 seed2_;
+  };
+};
+
+REGISTER_KERNEL_BUILDER(Name("RandomDataset").Device(DEVICE_CPU),
+                        RandomDatasetOp);
+
+}  // namespace
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/range_dataset_op.cc b/tensorflow/core/kernels/data/range_dataset_op.cc
similarity index 97%
rename from tensorflow/core/kernels/range_dataset_op.cc
rename to tensorflow/core/kernels/data/range_dataset_op.cc
index e7ae840fc7d023cda8c11ecd1f7cde3842a9da00..d0bc61acd99afae14ddc8a3e678acb4197fcea71 100644
--- a/tensorflow/core/kernels/range_dataset_op.cc
+++ b/tensorflow/core/kernels/data/range_dataset_op.cc
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 
 namespace tensorflow {
 
@@ -117,7 +116,7 @@ class RangeDatasetOp : public DatasetOpKernel {
         return Status::OK();
       }
 
-      Status RestoreInternal(OpKernelContext* ctx,
+      Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
         mutex_lock l(mu_);
         TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("next"), &next_));
diff --git a/tensorflow/core/kernels/reader_dataset_ops.cc b/tensorflow/core/kernels/data/reader_dataset_ops.cc
similarity index 96%
rename from tensorflow/core/kernels/reader_dataset_ops.cc
rename to tensorflow/core/kernels/data/reader_dataset_ops.cc
index d942ddc4a7b9042038c6b7a2a52e46c1bf45b2a9..aa39fffc2e344db8143b700cbba4c29bdb134964 100644
--- a/tensorflow/core/kernels/reader_dataset_ops.cc
+++ b/tensorflow/core/kernels/data/reader_dataset_ops.cc
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/io/buffered_inputstream.h"
 #include "tensorflow/core/lib/io/inputbuffer.h"
 #include "tensorflow/core/lib/io/random_inputstream.h"
@@ -183,7 +182,7 @@ class TextLineDatasetOp : public DatasetOpKernel {
         return Status::OK();
       }
 
-      Status RestoreInternal(OpKernelContext* ctx,
+      Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
         mutex_lock l(mu_);
         ResetStreamsLocked();
@@ -410,6 +409,20 @@ class FixedLengthRecordDatasetOp : public DatasetOpKernel {
           TF_RETURN_IF_ERROR(ctx->env()->GetFileSize(
               dataset()->filenames_[current_file_index_], &file_size));
           file_pos_limit_ = file_size - dataset()->footer_bytes_;
+
+          uint64 body_size =
+              file_size - (dataset()->header_bytes_ + dataset()->footer_bytes_);
+
+          if (body_size % dataset()->record_bytes_ != 0) {
+            return errors::InvalidArgument(
+                "Excluding the header (", dataset()->header_bytes_,
+                " bytes) and footer (", dataset()->footer_bytes_,
+                " bytes), input file \"",
+                dataset()->filenames_[current_file_index_],
+                "\" has body length ", body_size,
+                " bytes, which is not an exact multiple of the record length (",
+                dataset()->record_bytes_, " bytes).");
+          }
           TF_RETURN_IF_ERROR(ctx->env()->NewRandomAccessFile(
               dataset()->filenames_[current_file_index_], &file_));
           input_buffer_.reset(
@@ -434,7 +447,7 @@ class FixedLengthRecordDatasetOp : public DatasetOpKernel {
         return Status::OK();
       }
 
-      Status RestoreInternal(OpKernelContext* ctx,
+      Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
         mutex_lock l(mu_);
         int64 current_file_index;
@@ -615,7 +628,7 @@ class TFRecordDatasetOp : public DatasetOpKernel {
         return Status::OK();
       }
 
-      Status RestoreInternal(OpKernelContext* ctx,
+      Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
         mutex_lock l(mu_);
         ResetStreamsLocked();
diff --git a/tensorflow/core/kernels/repeat_dataset_op.cc b/tensorflow/core/kernels/data/repeat_dataset_op.cc
similarity index 88%
rename from tensorflow/core/kernels/repeat_dataset_op.cc
rename to tensorflow/core/kernels/data/repeat_dataset_op.cc
index 3d977a0fa38be77ac812cb12aade2af20b871fb8..1cb533158bb5b8bd4b950192ce67e17c0f9d5447 100644
--- a/tensorflow/core/kernels/repeat_dataset_op.cc
+++ b/tensorflow/core/kernels/data/repeat_dataset_op.cc
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 
 namespace tensorflow {
 
@@ -100,7 +99,7 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel {
       Status SaveInternal(IteratorStateWriter* writer) override {
         return Status::OK();
       }
-      Status RestoreInternal(OpKernelContext* ctx,
+      Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
         return Status::OK();
       }
@@ -148,7 +147,7 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel {
         return Status::OK();
       }
 
-      Status RestoreInternal(OpKernelContext* ctx,
+      Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
         mutex_lock l(mu_);
         TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("i"), &i_));
@@ -176,30 +175,25 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel {
                              bool* end_of_sequence) override {
         mutex_lock l(mu_);  // TODO(mrry): Make locking less conservative.
         do {
+          bool first_call = false;
           if (!input_impl_) {
+            first_call = true;
             input_impl_ = dataset()->input_->MakeIterator(prefix());
-            TF_RETURN_IF_ERROR(
-                input_impl_->GetNext(ctx, out_tensors, end_of_sequence));
-            // If the first call to GetNext() fails because the end of
-            // sequence has been reached, we return an OutOfRange
-            // error to terminate the iteration. (Otherwise, this
-            // iterator would loop infinitely and never produce a
-            // value.)
-            if (!*end_of_sequence) {
-              return Status::OK();
-            } else {
-              input_impl_.reset();
+          }
+          TF_RETURN_IF_ERROR(
+              input_impl_->GetNext(ctx, out_tensors, end_of_sequence));
+          if (!*end_of_sequence) {
+            return Status::OK();
+          } else {
+            input_impl_.reset();
+            if (first_call) {
+              // If the first call to GetNext() fails because the end of
+              // sequence has been reached, we return an OutOfRange error to
+              // terminate the iteration. (Otherwise, this iterator would loop
+              // infinitely and never produce a value.)
               return errors::OutOfRange(
                   "Attempted to repeat an empty dataset infinitely.");
             }
-          } else {
-            TF_RETURN_IF_ERROR(
-                input_impl_->GetNext(ctx, out_tensors, end_of_sequence));
-            if (!*end_of_sequence) {
-              return Status::OK();
-            } else {
-              input_impl_.reset();
-            }
           }
         } while (true);
       }
@@ -215,7 +209,7 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel {
         return Status::OK();
       }
 
-      Status RestoreInternal(OpKernelContext* ctx,
+      Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
         mutex_lock l(mu_);
         if (reader->Contains(full_name("uninitialized"))) {
diff --git a/tensorflow/core/kernels/scan_dataset_op.cc b/tensorflow/core/kernels/data/scan_dataset_op.cc
similarity index 65%
rename from tensorflow/core/kernels/scan_dataset_op.cc
rename to tensorflow/core/kernels/data/scan_dataset_op.cc
index 76c219f1ae6352f047035b1bfd3231689d0d3771..5dd6ff848eb4836dd9cbc51b9408d01a652241f0 100644
--- a/tensorflow/core/kernels/scan_dataset_op.cc
+++ b/tensorflow/core/kernels/data/scan_dataset_op.cc
@@ -18,8 +18,8 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/kernels/captured_function.h"
-#include "tensorflow/core/kernels/dataset.h"
+#include "tensorflow/core/kernels/data/captured_function.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/random/random.h"
 
 namespace tensorflow {
@@ -60,24 +60,26 @@ class ScanDatasetOp : public UnaryDatasetOpKernel {
     }
 
     std::unique_ptr<CapturedFunction> captured_func;
-    OP_REQUIRES_OK(ctx, CapturedFunction::Create(ctx, func_, graph_def_version_,
-                                                 std::move(other_arguments),
-                                                 &captured_func));
+    OP_REQUIRES_OK(ctx, CapturedFunction::Create(
+                            func_, std::move(other_arguments), &captured_func));
 
-    *output =
-        new Dataset(input, std::move(initial_state), std::move(captured_func),
-                    state_types_, output_types_, output_shapes_);
+    *output = new Dataset(ctx, input, func_, std::move(initial_state),
+                          std::move(captured_func), state_types_, output_types_,
+                          output_shapes_);
   }
 
  private:
-  class Dataset : public DatasetBase {
+  class Dataset : public GraphDatasetBase {
    public:
-    Dataset(const DatasetBase* input, std::vector<Tensor> initial_state,
+    Dataset(OpKernelContext* ctx, const DatasetBase* input,
+            const NameAttrList& func, std::vector<Tensor> initial_state,
             std::unique_ptr<CapturedFunction> captured_func,
             const DataTypeVector& state_types,
             const DataTypeVector& output_types,
             const std::vector<PartialTensorShape>& output_shapes)
-        : input_(input),
+        : GraphDatasetBase(ctx),
+          input_(input),
+          func_(func),
           initial_state_(std::move(initial_state)),
           captured_func_(std::move(captured_func)),
           state_types_(state_types),
@@ -103,6 +105,45 @@ class ScanDatasetOp : public UnaryDatasetOpKernel {
 
     string DebugString() override { return "ScanDatasetOp::Dataset"; }
 
+   protected:
+    Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b,
+                              Node** output) const override {
+      TF_RETURN_IF_ERROR(b->AddFunction(ctx, func_.name()));
+      Node* input_node;
+      TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_node));
+      std::vector<Node*> initial_state_nodes;
+      initial_state_nodes.reserve(initial_state_.size());
+      for (const Tensor& t : initial_state_) {
+        Node* node;
+        TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        initial_state_nodes.emplace_back(node);
+      }
+      std::vector<Node*> other_arguments;
+      other_arguments.reserve(captured_func_->captured_inputs().size());
+      DataTypeVector other_arguments_types;
+      other_arguments_types.reserve(captured_func_->captured_inputs().size());
+      for (const Tensor& t : captured_func_->captured_inputs()) {
+        Node* node;
+        TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        other_arguments.emplace_back(node);
+        other_arguments_types.emplace_back(t.dtype());
+      }
+      AttrValue f;
+      b->BuildAttrValue(func_, &f);
+      AttrValue state_types;
+      b->BuildAttrValue(state_types_, &state_types);
+      AttrValue other_arguments_types_attr;
+      b->BuildAttrValue(other_arguments_types, &other_arguments_types_attr);
+      TF_RETURN_IF_ERROR(
+          b->AddDataset(this, {{0, input_node}},
+                        {{1, initial_state_nodes}, {2, other_arguments}},
+                        {{"f", f},
+                         {"Tstate", state_types},
+                         {"Targuments", other_arguments_types_attr}},
+                        output));
+      return Status::OK();
+    }
+
    private:
     class Iterator : public DatasetIterator<Dataset> {
      public:
@@ -129,22 +170,12 @@ class ScanDatasetOp : public UnaryDatasetOpKernel {
         std::copy(next_element.begin(), next_element.end(),
                   std::back_inserter(args));
 
-        FunctionLibraryRuntime::Options opts;
-        opts.step_id = CapturedFunction::generate_step_id();
-        ScopedStepContainer step_container(
-            opts.step_id, [this, ctx](const string& name) {
-              dataset()
-                  ->captured_func_->resource_manager()
-                  ->Cleanup(name)
-                  .IgnoreError();
-            });
-        opts.step_container = &step_container;
-        opts.runner = ctx->runner();
         std::vector<Tensor> state_and_output;
         state_and_output.reserve(dataset()->state_types_.size() +
                                  output_dtypes().size());
-        Status s =
-            dataset()->captured_func_->Run(opts, args, &state_and_output);
+
+        Status s = dataset()->captured_func_->Run(ctx, std::move(args),
+                                                  &state_and_output);
         if (s.ok()) {
           state_.clear();
           size_t i = 0;
@@ -185,6 +216,38 @@ class ScanDatasetOp : public UnaryDatasetOpKernel {
         return s;
       }
 
+     protected:
+      Status SaveInternal(IteratorStateWriter* writer) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_));
+        if (!state_.empty()) {
+          TF_RETURN_IF_ERROR(
+              writer->WriteScalar(full_name("state_size"), state_.size()));
+          for (int idx = 0; idx < state_.size(); idx++) {
+            TF_RETURN_IF_ERROR(writer->WriteTensor(
+                full_name(strings::StrCat("state[", idx, "]")), state_[idx]));
+          }
+        }
+        return Status::OK();
+      }
+
+      Status RestoreInternal(IteratorContext* ctx,
+                             IteratorStateReader* reader) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_));
+        if (reader->Contains(full_name("state_size"))) {
+          int64 size;
+          TF_RETURN_IF_ERROR(
+              reader->ReadScalar(full_name("state_size"), &size));
+          state_.resize(size);
+          for (int idx = 0; idx < size; idx++) {
+            TF_RETURN_IF_ERROR(reader->ReadTensor(
+                full_name(strings::StrCat("state[", idx, "]")), &state_[idx]));
+          }
+        }
+        return Status::OK();
+      }
+
      private:
       mutex mu_;
       const std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
@@ -192,6 +255,7 @@ class ScanDatasetOp : public UnaryDatasetOpKernel {
     };
 
     const DatasetBase* const input_;
+    const NameAttrList func_;
     const std::vector<Tensor> initial_state_;
     const std::unique_ptr<CapturedFunction> captured_func_;
     const DataTypeVector state_types_;
diff --git a/tensorflow/core/kernels/shuffle_dataset_op.cc b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
similarity index 50%
rename from tensorflow/core/kernels/shuffle_dataset_op.cc
rename to tensorflow/core/kernels/data/shuffle_dataset_op.cc
index 72facb3a0d0cc13a559b3d8005592e19b97fed6f..1dde236c1711afd794ff397859631a48984b5ba8 100644
--- a/tensorflow/core/kernels/shuffle_dataset_op.cc
+++ b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
@@ -12,10 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
+
+#include <deque>
+#include <vector>
 
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/random/philox_random.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/lib/random/random_distributions.h"
@@ -29,50 +32,21 @@ const int64 kLogIntervalMicros = 10 * 1000000;  // 10 seconds.
 // See documentation in ../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
-class ShuffleDatasetOp : public UnaryDatasetOpKernel {
+class ShuffleDatasetOpBase : public UnaryDatasetOpKernel {
  public:
-  explicit ShuffleDatasetOp(OpKernelConstruction* ctx)
-      : UnaryDatasetOpKernel(ctx) {
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("reshuffle_each_iteration",
-                                     &reshuffle_each_iteration_));
-  }
-
-  void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
-                   DatasetBase** output) override {
-    int64 buffer_size;
-    OP_REQUIRES_OK(
-        ctx, ParseScalarArgument<int64>(ctx, "buffer_size", &buffer_size));
-    OP_REQUIRES(
-        ctx, buffer_size > 0,
-        errors::InvalidArgument("buffer_size must be greater than zero."));
-
-    int64 seed;
-    OP_REQUIRES_OK(ctx, ParseScalarArgument<int64>(ctx, "seed", &seed));
-
-    int64 seed2;
-    OP_REQUIRES_OK(ctx, ParseScalarArgument<int64>(ctx, "seed2", &seed2));
-
-    // By TensorFlow convention, passing 0 for both seeds indicates
-    // that the shuffling should be seeded non-deterministically.
-    if (seed == 0 && seed2 == 0) {
-      seed = random::New64();
-      seed2 = random::New64();
-    }
-
-    if (reshuffle_each_iteration_) {
-      *output = new ReshufflingDataset(ctx, input, buffer_size, seed, seed2);
-    } else {
-      *output = new FixedSeedDataset(ctx, input, buffer_size, seed, seed2);
-    }
-  }
+  explicit ShuffleDatasetOpBase(OpKernelConstruction* ctx)
+      : UnaryDatasetOpKernel(ctx) {}
 
- private:
+ protected:
   // Abstract base dataset that implements a shuffling iterator.
   class ShuffleDatasetBase : public GraphDatasetBase {
    public:
     ShuffleDatasetBase(OpKernelContext* ctx, const DatasetBase* input,
-                       int64 buffer_size)
-        : GraphDatasetBase(ctx), input_(input), buffer_size_(buffer_size) {
+                       int64 buffer_size, int64 count)
+        : GraphDatasetBase(ctx),
+          input_(input),
+          buffer_size_(buffer_size),
+          count_(count) {
       input_->Ref();
     }
 
@@ -91,12 +65,15 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel {
      public:
       explicit Iterator(const Params& params, int64 seed, int64 seed2)
           : DatasetIterator<ShuffleDatasetBase>(params),
-            input_impl_(params.dataset->input_->MakeIterator(params.prefix)),
+            input_impl_(nullptr),
             seed_(seed),
             seed2_(seed2),
+            epoch_(0),
+            num_elements_(0),
             parent_generator_(seed, seed2),
             generator_(&parent_generator_) {
-        buffer_.reserve(params.dataset->buffer_size_);
+        buffer_.reset(new std::vector<Tensor>[params.dataset->buffer_size_]);
+        slices_.emplace_back(new Slice{0, 0});
       }
 
       Status GetNextInternal(IteratorContext* ctx,
@@ -105,19 +82,46 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel {
         mutex_lock l(mu_);
         int64 start_micros = ctx->env()->NowMicros();
         int64 num_log_entries = 0;
-        while (input_impl_ && buffer_.size() < dataset()->buffer_size_) {
+        bool first_call = false;
+        if (!input_impl_ && epoch_ == 0) {
+          first_call = true;
+          input_impl_ = dataset()->input_->MakeIterator(prefix());
+        }
+        while (input_impl_ && num_elements_ < dataset()->buffer_size_) {
           if (ctx->env()->NowMicros() >
               ((num_log_entries + 1) * kLogIntervalMicros) + start_micros) {
             num_log_entries++;
             LOG(INFO) << "Filling up shuffle buffer (this may take a while): "
-                      << buffer_.size() << " of " << dataset()->buffer_size_;
+                      << num_elements_ << " of " << dataset()->buffer_size_;
           }
           std::vector<Tensor> input_element;
-          bool end_of_input_sequence;
-          TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, &input_element,
-                                                  &end_of_input_sequence));
+          bool end_of_input_sequence = false;
+          while (dataset()->count_ == -1 || epoch_ < dataset()->count_) {
+            TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, &input_element,
+                                                    &end_of_input_sequence));
+            if (!end_of_input_sequence) {
+              first_call = false;
+              break;
+            }
+            if (first_call && dataset()->count_ == -1) {
+              // If the first call to GetNext() fails because the end of
+              // sequence has been reached, we return an OutOfRange error to
+              // terminate the iteration. (Otherwise, this iterator may loop
+              // infinitely and never produce a value.)
+              *end_of_sequence = true;
+              return errors::OutOfRange(
+                  "Attempted to repeat an empty dataset infinitely.");
+            }
+            epoch_++;
+            int64 n = slices_.back()->end;
+            slices_.emplace_back(new Slice{n, n});
+            input_impl_ = dataset()->input_->MakeIterator(prefix());
+          }
           if (!end_of_input_sequence) {
-            buffer_.emplace_back(std::move(input_element));
+            buffer_[slices_.back()->end % dataset()->buffer_size_] =
+                std::move(input_element);
+            num_elements_++;
+            slices_.back()->end++;
           } else {
             input_impl_.reset();
           }
@@ -126,14 +130,25 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel {
           LOG(INFO) << "Shuffle buffer filled.";
         }
 
-        if (!buffer_.empty()) {
+        if (num_elements_ > 0) {
           *end_of_sequence = false;
-          // Choose an element to produce uniformly at random, and
-          // swap the last element into its place in the buffer.
-          int64 index = Random() % buffer_.size();
+          // Garbage collect all empty slices.
+          while (!slices_.empty() &&
+                 slices_.front()->start == slices_.front()->end) {
+            slices_.pop_front();
+          }
+          DCHECK(!slices_.empty());
+          // Choose an element to produce uniformly at random from the first
+          // slice, and then remove the element from the slice.
+          int64 offset =
+              Random() % (slices_.front()->end - slices_.front()->start);
+          int64 index =
+              (slices_.front()->start + offset) % dataset()->buffer_size_;
           *out_tensors = std::move(buffer_[index]);
-          std::swap(buffer_[index], buffer_.back());
-          buffer_.pop_back();
+          std::swap(buffer_[index],
+                    buffer_[slices_.front()->start % dataset()->buffer_size_]);
+          slices_.front()->start++;
+          num_elements_--;
         } else {
           DCHECK(input_impl_ == nullptr);
           *end_of_sequence = true;
@@ -145,20 +160,6 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel {
       Status SaveInternal(IteratorStateWriter* writer) override {
         mutex_lock l(mu_);
 
-        // Save the tensors in the buffer.
-        TF_RETURN_IF_ERROR(
-            writer->WriteScalar(full_name("buffer_size"), buffer_.size()));
-        for (size_t i = 0; i < buffer_.size(); i++) {
-          TF_RETURN_IF_ERROR(writer->WriteScalar(
-              full_name(strings::StrCat("buffer_", i, "_size")),
-              buffer_[i].size()));
-          for (size_t j = 0; j < buffer_[i].size(); j++) {
-            TF_RETURN_IF_ERROR(writer->WriteTensor(
-                full_name(strings::StrCat("buffer_", i, "_", j)),
-                buffer_[i][j]));
-          }
-        }
-
         // Save state needed to restore the random number generators.
         TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("num_random_samples"),
                                                num_random_samples_));
@@ -171,34 +172,38 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel {
         } else {
           TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_));
         }
+
+        // Save the epoch counter, buffer, and buffer slices.
+        TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("epoch"), epoch_));
+        TF_RETURN_IF_ERROR(
+            writer->WriteScalar(full_name("num_elements"), num_elements_));
+        TF_RETURN_IF_ERROR(
+            writer->WriteScalar(full_name("slices_size"), slices_.size()));
+        for (size_t i = 0; i < slices_.size(); ++i) {
+          TF_RETURN_IF_ERROR(writer->WriteScalar(
+              full_name(strings::StrCat("slices_start_", i)),
+              slices_[i]->start));
+          TF_RETURN_IF_ERROR(writer->WriteScalar(
+              full_name(strings::StrCat("slices_end_", i)), slices_[i]->end));
+          for (size_t j = slices_[i]->start; j < slices_[i]->end; ++j) {
+            size_t index = j % dataset()->buffer_size_;
+            TF_RETURN_IF_ERROR(writer->WriteScalar(
+                full_name(strings::StrCat("buffer_", index, "_size")),
+                buffer_[index].size()));
+            for (size_t k = 0; k < buffer_[index].size(); ++k) {
+              TF_RETURN_IF_ERROR(writer->WriteTensor(
+                  full_name(strings::StrCat("buffer_", index, "_", k)),
+                  buffer_[index][k]));
+            }
+          }
+        }
+
         return Status::OK();
       }
 
-      Status RestoreInternal(OpKernelContext* ctx,
+      Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
         mutex_lock l(mu_);
-        buffer_.clear();
-
-        // Restore the buffer.
-        size_t buffer_size;
-        {
-          int64 temp;
-          TF_RETURN_IF_ERROR(
-              reader->ReadScalar(full_name("buffer_size"), &temp));
-          buffer_size = static_cast<size_t>(temp);
-        }
-        buffer_.reserve(buffer_size);
-        for (size_t i = 0; i < buffer_size; i++) {
-          int64 list_size;
-          TF_RETURN_IF_ERROR(reader->ReadScalar(
-              full_name(strings::StrCat("buffer_", i, "_size")), &list_size));
-          buffer_.emplace_back(std::vector<Tensor>(list_size));
-          for (int j = 0; j < list_size; j++) {
-            TF_RETURN_IF_ERROR(reader->ReadTensor(
-                full_name(strings::StrCat("buffer_", i, "_", j)),
-                &buffer_[i][j]));
-          }
-        }
 
         // Restore the random number generators.
         TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("num_random_samples"),
@@ -212,10 +217,58 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel {
         } else {
           input_impl_.reset();
         }
+
+        // Restore the epoch counter, buffer, and buffer slices.
+        TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("epoch"), &epoch_));
+        TF_RETURN_IF_ERROR(
+            reader->ReadScalar(full_name("num_elements"), &num_elements_));
+        size_t slices_size;
+        {
+          int64 temp;
+          TF_RETURN_IF_ERROR(
+              reader->ReadScalar(full_name("slices_size"), &temp));
+          slices_size = static_cast<size_t>(temp);
+        }
+        buffer_.reset(new std::vector<Tensor>[dataset()->buffer_size_]);
+        for (size_t i = 0; i < slices_size; ++i) {
+          int64 start;
+          TF_RETURN_IF_ERROR(reader->ReadScalar(
+              full_name(strings::StrCat("slices_start_", i)), &start));
+          int64 end;
+          TF_RETURN_IF_ERROR(reader->ReadScalar(
+              full_name(strings::StrCat("slices_end_", i)), &end));
+          slices_.emplace_back(new Slice{start, end});
+          for (size_t j = start; j < end; ++j) {
+            size_t index = j % dataset()->buffer_size_;
+            int64 list_size;
+            TF_RETURN_IF_ERROR(reader->ReadScalar(
+                full_name(strings::StrCat("buffer_", index, "_size")),
+                &list_size));
+            buffer_[index] = std::vector<Tensor>(list_size);
+            for (int k = 0; k < list_size; ++k) {
+              TF_RETURN_IF_ERROR(reader->ReadTensor(
+                  full_name(strings::StrCat("buffer_", index, "_", k)),
+                  &buffer_[index][k]));
+            }
+          }
+        }
+
         return Status::OK();
       }
 
      private:
+      // Used to represent slices of `buffer_` that belong to different epochs.
+      // The invariant maintained by the implementation is: `start` <= `end`.
+      // When using `start` and `end` to index into `buffer_`, their values
+      // should be taken modulo the size of `buffer_` as their absolute value
+      // can be greater than the range of `buffer_`.
+      struct Slice {
+        Slice(int64 start, int64 end) : start(start), end(end) {}
+
+        int64 start;
+        int64 end;
+      };
+
       random::SingleSampleAdapter<random::PhiloxRandom>::ResultType Random()
           EXCLUSIVE_LOCKS_REQUIRED(mu_) {
         num_random_samples_++;
@@ -232,10 +285,13 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel {
       }
 
       mutex mu_;
-      std::vector<std::vector<Tensor>> buffer_ GUARDED_BY(mu_);
+      std::unique_ptr<std::vector<Tensor>[]> buffer_ GUARDED_BY(mu_);
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
       const int64 seed_ GUARDED_BY(mu_);
       const int64 seed2_ GUARDED_BY(mu_);
+      int64 epoch_ GUARDED_BY(mu_);
+      int64 num_elements_ GUARDED_BY(mu_);
+      std::deque<std::unique_ptr<Slice>> slices_ GUARDED_BY(mu_);
       random::PhiloxRandom parent_generator_ GUARDED_BY(mu_);
       random::SingleSampleAdapter<random::PhiloxRandom> generator_
           GUARDED_BY(mu_);
@@ -244,15 +300,58 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel {
 
     const DatasetBase* const input_;
     const int64 buffer_size_;
+    const int64 count_;
   };
+};
+
+class ShuffleDatasetOp : public ShuffleDatasetOpBase {
+ public:
+  explicit ShuffleDatasetOp(OpKernelConstruction* ctx)
+      : ShuffleDatasetOpBase(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("reshuffle_each_iteration",
+                                     &reshuffle_each_iteration_));
+  }
+
+  void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
+                   DatasetBase** output) override {
+    int64 buffer_size;
+    OP_REQUIRES_OK(
+        ctx, ParseScalarArgument<int64>(ctx, "buffer_size", &buffer_size));
+    OP_REQUIRES(
+        ctx, buffer_size > 0,
+        errors::InvalidArgument("buffer_size must be greater than zero."));
+
+    int64 seed;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument<int64>(ctx, "seed", &seed));
+
+    int64 seed2;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument<int64>(ctx, "seed2", &seed2));
+
+    // By TensorFlow convention, passing 0 for both seeds indicates
+    // that the shuffling should be seeded non-deterministically.
+    if (seed == 0 && seed2 == 0) {
+      seed = random::New64();
+      seed2 = random::New64();
+    }
+
+    int64 count = 1;
+    if (reshuffle_each_iteration_) {
+      *output =
+          new ReshufflingDataset(ctx, input, buffer_size, seed, seed2, count);
+    } else {
+      *output =
+          new FixedSeedDataset(ctx, input, buffer_size, seed, seed2, count);
+    }
+  }
 
+ private:
   // A dataset that uses a pseduorandom sequence of seeds for the iterators
   // created from it. Used when `reshuffle_each_iteration` is true.
   class ReshufflingDataset : public ShuffleDatasetBase {
    public:
     ReshufflingDataset(OpKernelContext* ctx, const DatasetBase* input,
-                       int64 buffer_size, int64 seed, int64 seed2)
-        : ShuffleDatasetBase(ctx, input, buffer_size),
+                       int64 buffer_size, int64 seed, int64 seed2, int64 count)
+        : ShuffleDatasetBase(ctx, input, buffer_size, count),
           seed_(seed),
           seed2_(seed2),
           parent_generator_(seed, seed2),
@@ -291,8 +390,8 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel {
   class FixedSeedDataset : public ShuffleDatasetBase {
    public:
     FixedSeedDataset(OpKernelContext* ctx, const DatasetBase* input,
-                     int64 buffer_size, int64 seed, int64 seed2)
-        : ShuffleDatasetBase(ctx, input, buffer_size),
+                     int64 buffer_size, int64 seed, int64 seed2, int64 count)
+        : ShuffleDatasetBase(ctx, input, buffer_size, count),
           seed_(seed),
           seed2_(seed) {}
 
@@ -337,9 +436,93 @@ class ShuffleDatasetOp : public UnaryDatasetOpKernel {
   bool reshuffle_each_iteration_;
 };
 
+class ShuffleAndRepeatDatasetOp : public ShuffleDatasetOpBase {
+ public:
+  explicit ShuffleAndRepeatDatasetOp(OpKernelConstruction* ctx)
+      : ShuffleDatasetOpBase(ctx) {}
+
+  void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
+                   DatasetBase** output) override {
+    int64 buffer_size;
+    OP_REQUIRES_OK(
+        ctx, ParseScalarArgument<int64>(ctx, "buffer_size", &buffer_size));
+    OP_REQUIRES(
+        ctx, buffer_size > 0,
+        errors::InvalidArgument("buffer_size must be greater than zero."));
+
+    int64 seed;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument<int64>(ctx, "seed", &seed));
+
+    int64 seed2;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument<int64>(ctx, "seed2", &seed2));
+
+    int64 count;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument<int64>(ctx, "count", &count));
+
+    // By TensorFlow convention, if both seeds are 0, then shuffling should be
+    // seeded non-deterministically.
+    if (seed == 0 && seed2 == 0) {
+      seed = random::New64();
+      seed2 = random::New64();
+    }
+
+    *output = new Dataset(ctx, input, buffer_size, seed, seed2, count);
+  }
+
+ private:
+  class Dataset : public ShuffleDatasetBase {
+   public:
+    Dataset(OpKernelContext* ctx, const DatasetBase* input, int64 buffer_size,
+            int64 seed, int64 seed2, int64 count)
+        : ShuffleDatasetBase(ctx, input, buffer_size, count),
+          seed_(seed),
+          seed2_(seed2) {}
+
+    string DebugString() override {
+      return strings::StrCat("ShuffleAndRepeatDatasetOp(", buffer_size_, ", ",
+                             seed_, ", ", seed2_, ", ", count_, ")::Dataset");
+    }
+
+    std::unique_ptr<IteratorBase> MakeIterator(
+        const string& prefix) const override {
+      return std::unique_ptr<IteratorBase>(new ShuffleDatasetBase::Iterator(
+          {this, strings::StrCat(prefix, "::ShuffleAndRepeat")}, seed_,
+          seed2_));
+    }
+
+   protected:
+    Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b,
+                              Node** output) const override {
+      Node* input_graph_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node));
+      Node* buffer_size = nullptr;
+      Node* seed = nullptr;
+      Node* seed2 = nullptr;
+      Node* count = nullptr;
+
+      TF_RETURN_IF_ERROR(b->AddScalar(buffer_size_, &buffer_size));
+      TF_RETURN_IF_ERROR(b->AddScalar(seed_, &seed));
+      TF_RETURN_IF_ERROR(b->AddScalar(seed2_, &seed2));
+      TF_RETURN_IF_ERROR(b->AddScalar(count_, &count));
+      TF_RETURN_IF_ERROR(b->AddDataset(
+          this, {input_graph_node, buffer_size, seed, seed2, count},  // Inputs
+          {},                                                         // Attrs
+          output));
+      return Status::OK();
+    }
+
+   private:
+    const int64 seed_;
+    const int64 seed2_;
+  };
+};
+
 REGISTER_KERNEL_BUILDER(Name("ShuffleDataset").Device(DEVICE_CPU),
                         ShuffleDatasetOp);
 
+REGISTER_KERNEL_BUILDER(Name("ShuffleAndRepeatDataset").Device(DEVICE_CPU),
+                        ShuffleAndRepeatDatasetOp);
+
 }  // namespace
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/skip_dataset_op.cc b/tensorflow/core/kernels/data/skip_dataset_op.cc
similarity index 97%
rename from tensorflow/core/kernels/skip_dataset_op.cc
rename to tensorflow/core/kernels/data/skip_dataset_op.cc
index 1fe49271e299f042b9dc88a30d88d3d26a9e65f2..13c2501bbbd43bdb6c3c521db4c3830934ee91db 100644
--- a/tensorflow/core/kernels/skip_dataset_op.cc
+++ b/tensorflow/core/kernels/data/skip_dataset_op.cc
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 
 namespace tensorflow {
 
@@ -100,7 +99,7 @@ class SkipDatasetOp : public UnaryDatasetOpKernel {
         return Status::OK();
       }
 
-      Status RestoreInternal(OpKernelContext* ctx,
+      Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
         return Status::OK();
       }
@@ -162,7 +161,7 @@ class SkipDatasetOp : public UnaryDatasetOpKernel {
         return Status::OK();
       }
 
-      Status RestoreInternal(OpKernelContext* ctx,
+      Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
         mutex_lock l(mu_);
         TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("i"), &i_));
diff --git a/tensorflow/core/kernels/sparse_tensor_slice_dataset_op.cc b/tensorflow/core/kernels/data/sparse_tensor_slice_dataset_op.cc
similarity index 99%
rename from tensorflow/core/kernels/sparse_tensor_slice_dataset_op.cc
rename to tensorflow/core/kernels/data/sparse_tensor_slice_dataset_op.cc
index de5ab1a3678b981a95de533dc2f59cc16dd7705c..fcf17ad68bb1bb5fca7fd7767e12fe9fbc50e0ab 100644
--- a/tensorflow/core/kernels/sparse_tensor_slice_dataset_op.cc
+++ b/tensorflow/core/kernels/data/sparse_tensor_slice_dataset_op.cc
@@ -14,11 +14,10 @@ limitations under the License.
 ==============================================================================*/
 #include <numeric>
 
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/util/sparse/sparse_tensor.h"
 
 namespace tensorflow {
@@ -168,7 +167,7 @@ class Dataset : public GraphDatasetBase {
       return Status::OK();
     }
 
-    Status RestoreInternal(OpKernelContext* ctx,
+    Status RestoreInternal(IteratorContext* ctx,
                            IteratorStateReader* reader) override {
       mutex_lock l(mu_);
       TF_RETURN_IF_ERROR(reader->ReadScalar(Iterator::full_name("i"), &i_));
diff --git a/tensorflow/core/kernels/data/sql/BUILD b/tensorflow/core/kernels/data/sql/BUILD
new file mode 100644
index 0000000000000000000000000000000000000000..0286825af3ef7c04fff6911ddf7daec76479a715
--- /dev/null
+++ b/tensorflow/core/kernels/data/sql/BUILD
@@ -0,0 +1,38 @@
+# Description:
+#   SQL library.
+
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        include = ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
+
+cc_library(
+    name = "sql",
+    srcs = [
+        "driver_manager.cc",
+        "sqlite_query_connection.cc",
+    ],
+    hdrs = [
+        "driver_manager.h",
+        "query_connection.h",
+        "sqlite_query_connection.h",
+    ],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core/lib/db:sqlite",
+    ],
+)
diff --git a/tensorflow/core/kernels/sql/driver_manager.cc b/tensorflow/core/kernels/data/sql/driver_manager.cc
similarity index 89%
rename from tensorflow/core/kernels/sql/driver_manager.cc
rename to tensorflow/core/kernels/data/sql/driver_manager.cc
index 9a5d5aa853c438ef4e893fac2322af17ae863fa8..ffabda1a8a1fe8bce629ed34590c058a231f3cfc 100644
--- a/tensorflow/core/kernels/sql/driver_manager.cc
+++ b/tensorflow/core/kernels/data/sql/driver_manager.cc
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/sql/driver_manager.h"
-#include "tensorflow/core/kernels/sql/sqlite_query_connection.h"
+#include "tensorflow/core/kernels/data/sql/driver_manager.h"
+#include "tensorflow/core/kernels/data/sql/sqlite_query_connection.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/sql/driver_manager.h b/tensorflow/core/kernels/data/sql/driver_manager.h
similarity index 82%
rename from tensorflow/core/kernels/sql/driver_manager.h
rename to tensorflow/core/kernels/data/sql/driver_manager.h
index 53350268d30f4f7215eb543a28ae3fedf837ac0d..0d0c38eb58314962554b929d1a5c4a387ab68e55 100644
--- a/tensorflow/core/kernels/sql/driver_manager.h
+++ b/tensorflow/core/kernels/data/sql/driver_manager.h
@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_DRIVER_MANAGER_H_
-#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_DRIVER_MANAGER_H_
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_SQL_DRIVER_MANAGER_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_SQL_DRIVER_MANAGER_H_
 
-#include "tensorflow/core/kernels/sql/query_connection.h"
+#include "tensorflow/core/kernels/data/sql/query_connection.h"
 
 namespace tensorflow {
 
@@ -38,4 +38,4 @@ class DriverManager {
 
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_DRIVER_MANAGER_H_
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_SQL_DRIVER_MANAGER_H_
diff --git a/tensorflow/core/kernels/sql/query_connection.h b/tensorflow/core/kernels/data/sql/query_connection.h
similarity index 92%
rename from tensorflow/core/kernels/sql/query_connection.h
rename to tensorflow/core/kernels/data/sql/query_connection.h
index f9945aee7dc6ac59df8cc9063ab5c4d9aedf4018..194714897221f73ffec51c50c5202860b1bd0b46 100644
--- a/tensorflow/core/kernels/sql/query_connection.h
+++ b/tensorflow/core/kernels/data/sql/query_connection.h
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_QUERY_CONNECTION_H_
-#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_QUERY_CONNECTION_H_
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_SQL_QUERY_CONNECTION_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_SQL_QUERY_CONNECTION_H_
 
 #include "tensorflow/core/framework/tensor.h"
 
@@ -64,4 +64,4 @@ class QueryConnection {
 
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_QUERY_CONNECTION_H_
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_SQL_QUERY_CONNECTION_H_
diff --git a/tensorflow/core/kernels/data/sql/sqlite_query_connection.cc b/tensorflow/core/kernels/data/sql/sqlite_query_connection.cc
new file mode 100644
index 0000000000000000000000000000000000000000..029a0aab97290e30783e415274323a1e43f9740b
--- /dev/null
+++ b/tensorflow/core/kernels/data/sql/sqlite_query_connection.cc
@@ -0,0 +1,116 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/kernels/data/sql/sqlite_query_connection.h"
+
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+
+namespace tensorflow {
+
+namespace sql {
+
+SqliteQueryConnection::SqliteQueryConnection() {}
+
+SqliteQueryConnection::~SqliteQueryConnection() {
+  if (db_ != nullptr) db_->Unref();
+}
+
+Status SqliteQueryConnection::Open(const string& data_source_name,
+                                   const string& query,
+                                   const DataTypeVector& output_types) {
+  if (db_ != nullptr) {
+    return errors::FailedPrecondition(
+        "Failed to open query connection: Connection already opened.");
+  }
+  TF_RETURN_IF_ERROR(Sqlite::Open(
+      data_source_name, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, &db_));
+  query_ = query;
+  output_types_ = output_types;
+  return Status::OK();
+}
+
+Status SqliteQueryConnection::Close() {
+  stmt_ = SqliteStatement();
+  db_->Unref();
+  db_ = nullptr;
+  return Status::OK();
+}
+
+Status SqliteQueryConnection::GetNext(std::vector<Tensor>* out_tensors,
+                                      bool* end_of_sequence) {
+  if (!stmt_) TF_RETURN_IF_ERROR(PrepareQuery());
+  TF_RETURN_IF_ERROR(stmt_.Step(end_of_sequence));
+  if (!*end_of_sequence) {
+    for (int i = 0; i < column_count_; i++) {
+      DataType dt = output_types_[i];
+      Tensor tensor(cpu_allocator(), dt, {});
+      FillTensorWithResultSetEntry(dt, i, &tensor);
+      out_tensors->emplace_back(std::move(tensor));
+    }
+  }
+  return Status::OK();
+}
+
+Status SqliteQueryConnection::PrepareQuery() {
+  TF_RETURN_IF_ERROR(db_->Prepare(query_, &stmt_));
+  int column_count = stmt_.ColumnCount();
+  if (column_count != output_types_.size()) {
+    stmt_ = SqliteStatement();
+    return errors::InvalidArgument(tensorflow::strings::Printf(
+        "The number of columns in query (%d) must match the number of "
+        "elements in output_types (%zu).",
+        column_count, output_types_.size()));
+  }
+  column_count_ = column_count;
+  return Status::OK();
+}
+
+void SqliteQueryConnection::FillTensorWithResultSetEntry(
+    const DataType& data_type, int column_index, Tensor* tensor) {
+#define CASE(T, M)                                                 \
+  case DataTypeToEnum<T>::value:                                   \
+    tensor->scalar<T>()() = static_cast<T>(stmt_.M(column_index)); \
+    break;
+#define INT_CASE(T) CASE(T, ColumnInt)
+#define DOUBLE_CASE(T) CASE(T, ColumnDouble)
+#define STRING_CASE(T) CASE(T, ColumnString)
+  // clang-format off
+  switch (data_type) {
+    TF_CALL_int8(INT_CASE)
+    TF_CALL_uint8(INT_CASE)
+    TF_CALL_int16(INT_CASE)
+    TF_CALL_uint16(INT_CASE)
+    TF_CALL_int32(INT_CASE)
+    TF_CALL_uint32(INT_CASE)
+    TF_CALL_int64(INT_CASE)
+    TF_CALL_uint64(INT_CASE)
+    TF_CALL_float(DOUBLE_CASE)
+    TF_CALL_double(DOUBLE_CASE)
+    TF_CALL_string(STRING_CASE)
+    case DT_BOOL:
+      tensor->scalar<bool>()() = stmt_.ColumnInt(column_index) != 0;
+      break;
+    // Error preemptively thrown by SqlDatasetOp::MakeDataset in this case.
+    default:
+      LOG(FATAL)
+          << "Use of unsupported TensorFlow data type by 'SqlQueryConnection': "
+          << DataTypeString(data_type) << ".";
+  }
+  // clang-format on
+}
+
+}  // namespace sql
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/sql/sqlite_query_connection.h b/tensorflow/core/kernels/data/sql/sqlite_query_connection.h
similarity index 83%
rename from tensorflow/core/kernels/sql/sqlite_query_connection.h
rename to tensorflow/core/kernels/data/sql/sqlite_query_connection.h
index 435dd8e234ca7a8fb9a3ef6ffeef0ca4dda7a221..b36b69eae4e5ba6fc65e4075703be8ad5720c8b4 100644
--- a/tensorflow/core/kernels/sql/sqlite_query_connection.h
+++ b/tensorflow/core/kernels/data/sql/sqlite_query_connection.h
@@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_SQLITE_QUERY_CONNECTION_H_
-#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_SQLITE_QUERY_CONNECTION_H_
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_SQL_SQLITE_QUERY_CONNECTION_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_SQL_SQLITE_QUERY_CONNECTION_H_
 
 #include <memory>
 
-#include "tensorflow/core/kernels/sql/query_connection.h"
+#include "tensorflow/core/kernels/data/sql/query_connection.h"
 #include "tensorflow/core/lib/db/sqlite.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -42,7 +42,7 @@ class SqliteQueryConnection : public QueryConnection {
   // `stmt_`.
   void FillTensorWithResultSetEntry(const DataType& data_type, int column_index,
                                     Tensor* tensor);
-  std::shared_ptr<Sqlite> db_ = nullptr;
+  Sqlite* db_ = nullptr;
   SqliteStatement stmt_;
   int column_count_ = 0;
   string query_;
@@ -53,4 +53,4 @@ class SqliteQueryConnection : public QueryConnection {
 
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_SQLITE_QUERY_CONNECTION_H_
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_SQL_SQLITE_QUERY_CONNECTION_H_
diff --git a/tensorflow/core/kernels/sql_dataset_ops.cc b/tensorflow/core/kernels/data/sql_dataset_ops.cc
similarity index 97%
rename from tensorflow/core/kernels/sql_dataset_ops.cc
rename to tensorflow/core/kernels/data/sql_dataset_ops.cc
index 23846d65bb8426ad8e5c3343047f72d24653c101..72302190802d17f2cb1ed5471017180238aedff3 100644
--- a/tensorflow/core/kernels/sql_dataset_ops.cc
+++ b/tensorflow/core/kernels/data/sql_dataset_ops.cc
@@ -16,9 +16,9 @@ limitations under the License.
 
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/kernels/dataset.h"
-#include "tensorflow/core/kernels/sql/driver_manager.h"
-#include "tensorflow/core/kernels/sql/query_connection.h"
+#include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/sql/driver_manager.h"
+#include "tensorflow/core/kernels/data/sql/query_connection.h"
 #include "tensorflow/core/lib/io/inputbuffer.h"
 #include "tensorflow/core/lib/io/record_reader.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
diff --git a/tensorflow/core/kernels/stats_aggregator.h b/tensorflow/core/kernels/data/stats_aggregator.h
similarity index 93%
rename from tensorflow/core/kernels/stats_aggregator.h
rename to tensorflow/core/kernels/data/stats_aggregator.h
index 5f602c5f3bf4dc275538ae7884f9f552c71fc65a..4cb8dba5cbb4a3866b94101df0f1e9a8e52d9cf2 100644
--- a/tensorflow/core/kernels/stats_aggregator.h
+++ b/tensorflow/core/kernels/data/stats_aggregator.h
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_STATS_AGGREGATOR_H_
-#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_STATS_AGGREGATOR_H_
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_STATS_AGGREGATOR_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_STATS_AGGREGATOR_H_
 
 #include <memory>
 #include <string>
@@ -81,4 +81,4 @@ class StatsAggregatorResource : public ResourceBase {
 
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_STATS_AGGREGATOR_H_
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_STATS_AGGREGATOR_H_
diff --git a/tensorflow/core/kernels/stats_aggregator_ops.cc b/tensorflow/core/kernels/data/stats_aggregator_ops.cc
similarity index 98%
rename from tensorflow/core/kernels/stats_aggregator_ops.cc
rename to tensorflow/core/kernels/data/stats_aggregator_ops.cc
index 037ec64a83b58fd0f32789cd7560317959529225..5a2dd9c43dbcbf5250d4dcd4bd803ed4979999e0 100644
--- a/tensorflow/core/kernels/stats_aggregator_ops.cc
+++ b/tensorflow/core/kernels/data/stats_aggregator_ops.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/stats_aggregator.h"
+#include "tensorflow/core/kernels/data/stats_aggregator.h"
 
 #include <memory>
 
diff --git a/tensorflow/core/kernels/stats_dataset_ops.cc b/tensorflow/core/kernels/data/stats_dataset_ops.cc
similarity index 68%
rename from tensorflow/core/kernels/stats_dataset_ops.cc
rename to tensorflow/core/kernels/data/stats_dataset_ops.cc
index 7b1853aba61d7eeabceeebe76187535567509252..4dc1343e21faf947afc4e49539a45cdd1b38c0e9 100644
--- a/tensorflow/core/kernels/stats_dataset_ops.cc
+++ b/tensorflow/core/kernels/data/stats_dataset_ops.cc
@@ -15,8 +15,8 @@ limitations under the License.
 
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/kernels/dataset.h"
-#include "tensorflow/core/kernels/stats_aggregator.h"
+#include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/stats_aggregator.h"
 #include "tensorflow/core/lib/random/random.h"
 
 namespace tensorflow {
@@ -43,14 +43,14 @@ class LatencyStatsDatasetOp : public UnaryDatasetOpKernel {
                    DatasetBase** output) override {
     string tag;
     OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "tag", &tag));
-    *output = new Dataset(input, std::move(tag));
+    *output = new Dataset(ctx, input, std::move(tag));
   }
 
  private:
-  class Dataset : public DatasetBase {
+  class Dataset : public GraphDatasetBase {
    public:
-    explicit Dataset(const DatasetBase* input, string tag)
-        : input_(input), tag_(std::move(tag)) {
+    explicit Dataset(OpKernelContext* ctx, const DatasetBase* input, string tag)
+        : GraphDatasetBase(ctx), input_(input), tag_(std::move(tag)) {
       input_->Ref();
     }
 
@@ -71,6 +71,17 @@ class LatencyStatsDatasetOp : public UnaryDatasetOpKernel {
 
     string DebugString() override { return "LatencyStatsDatasetOp::Dataset"; }
 
+   protected:
+    Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b,
+                              Node** output) const override {
+      Node* input_node;
+      TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_node));
+      Node* tag_node;
+      TF_RETURN_IF_ERROR(b->AddScalar(tag_, &tag_node));
+      TF_RETURN_IF_ERROR(b->AddDataset(this, {input_node, tag_node}, output));
+      return Status::OK();
+    }
+
    private:
     class Iterator : public DatasetIterator<Dataset> {
      public:
@@ -81,6 +92,7 @@ class LatencyStatsDatasetOp : public UnaryDatasetOpKernel {
       Status GetNextInternal(IteratorContext* ctx,
                              std::vector<Tensor>* out_tensors,
                              bool* end_of_sequence) override {
+        tf_shared_lock l(mu_);
         uint64 start = ctx->env()->NowMicros();
         Status s = input_impl_->GetNext(ctx, out_tensors, end_of_sequence);
         uint64 end = ctx->env()->NowMicros();
@@ -92,8 +104,23 @@ class LatencyStatsDatasetOp : public UnaryDatasetOpKernel {
         return s;
       }
 
+     protected:
+      Status SaveInternal(IteratorStateWriter* writer) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_));
+        return Status::OK();
+      }
+
+      Status RestoreInternal(IteratorContext* ctx,
+                             IteratorStateReader* reader) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_));
+        return Status::OK();
+      }
+
      private:
-      const std::unique_ptr<IteratorBase> input_impl_;
+      mutex mu_;
+      std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
     };
 
     const DatasetBase* const input_;
@@ -110,14 +137,14 @@ class BytesProducedStatsDatasetOp : public UnaryDatasetOpKernel {
                    DatasetBase** output) override {
     string tag;
     OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "tag", &tag));
-    *output = new Dataset(input, std::move(tag));
+    *output = new Dataset(ctx, input, std::move(tag));
   }
 
  private:
-  class Dataset : public DatasetBase {
+  class Dataset : public GraphDatasetBase {
    public:
-    explicit Dataset(const DatasetBase* input, string tag)
-        : input_(input), tag_(std::move(tag)) {
+    explicit Dataset(OpKernelContext* ctx, const DatasetBase* input, string tag)
+        : GraphDatasetBase(ctx), input_(input), tag_(std::move(tag)) {
       input_->Ref();
     }
 
@@ -140,6 +167,17 @@ class BytesProducedStatsDatasetOp : public UnaryDatasetOpKernel {
       return "BytesProducedStatsDatasetOp::Dataset";
     }
 
+   protected:
+    Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b,
+                              Node** output) const override {
+      Node* input_node;
+      TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_node));
+      Node* tag_node;
+      TF_RETURN_IF_ERROR(b->AddScalar(tag_, &tag_node));
+      TF_RETURN_IF_ERROR(b->AddDataset(this, {input_node, tag_node}, output));
+      return Status::OK();
+    }
+
    private:
     class Iterator : public DatasetIterator<Dataset> {
      public:
@@ -150,6 +188,7 @@ class BytesProducedStatsDatasetOp : public UnaryDatasetOpKernel {
       Status GetNextInternal(IteratorContext* ctx,
                              std::vector<Tensor>* out_tensors,
                              bool* end_of_sequence) override {
+        tf_shared_lock l(mu_);
         Status s = input_impl_->GetNext(ctx, out_tensors, end_of_sequence);
         auto stats_aggregator = ctx->stats_aggregator();
         if (stats_aggregator && s.ok() && !*end_of_sequence) {
@@ -163,8 +202,23 @@ class BytesProducedStatsDatasetOp : public UnaryDatasetOpKernel {
         return s;
       }
 
+     protected:
+      Status SaveInternal(IteratorStateWriter* writer) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_));
+        return Status::OK();
+      }
+
+      Status RestoreInternal(IteratorContext* ctx,
+                             IteratorStateReader* reader) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_));
+        return Status::OK();
+      }
+
      private:
-      const std::unique_ptr<IteratorBase> input_impl_;
+      mutex mu_;
+      std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
     };
 
     const DatasetBase* const input_;
diff --git a/tensorflow/core/kernels/take_dataset_op.cc b/tensorflow/core/kernels/data/take_dataset_op.cc
similarity index 97%
rename from tensorflow/core/kernels/take_dataset_op.cc
rename to tensorflow/core/kernels/data/take_dataset_op.cc
index 7a6d20d6c7cb5a9bc5142e877c5c0c5285c1fd90..3bea46a747e002633a0db269434b26bad761a771 100644
--- a/tensorflow/core/kernels/take_dataset_op.cc
+++ b/tensorflow/core/kernels/data/take_dataset_op.cc
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 
 namespace tensorflow {
 
@@ -101,7 +100,7 @@ class TakeDatasetOp : public UnaryDatasetOpKernel {
         return Status::OK();
       }
 
-      Status RestoreInternal(OpKernelContext* ctx,
+      Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
         return Status::OK();
       }
@@ -149,7 +148,7 @@ class TakeDatasetOp : public UnaryDatasetOpKernel {
         return Status::OK();
       }
 
-      Status RestoreInternal(OpKernelContext* ctx,
+      Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
         mutex_lock l(mu_);
         TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("i"), &i_));
diff --git a/tensorflow/core/kernels/tensor_dataset_op.cc b/tensorflow/core/kernels/data/tensor_dataset_op.cc
similarity index 96%
rename from tensorflow/core/kernels/tensor_dataset_op.cc
rename to tensorflow/core/kernels/data/tensor_dataset_op.cc
index fe53434d176d77c0064574a044a18db05146e62d..8c8994b1c3f470532cc7c45dabde4639e841dc4b 100644
--- a/tensorflow/core/kernels/tensor_dataset_op.cc
+++ b/tensorflow/core/kernels/data/tensor_dataset_op.cc
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 
 namespace tensorflow {
 
@@ -70,7 +69,7 @@ class TensorDatasetOp : public DatasetOpKernel {
    protected:
     Status AsGraphDefInternal(DatasetGraphDefBuilder* b,
                               Node** output) const override {
-      std::vector<NodeBuilder::NodeOut> components;
+      std::vector<Node*> components;
       components.reserve(tensors_.size());
       for (const Tensor& t : tensors_) {
         Node* node;
@@ -113,7 +112,7 @@ class TensorDatasetOp : public DatasetOpKernel {
         return Status::OK();
       }
 
-      Status RestoreInternal(OpKernelContext* ctx,
+      Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
         mutex_lock l(mu_);
         produced_ = reader->Contains(full_name("produced"));
diff --git a/tensorflow/core/kernels/tensor_slice_dataset_op.cc b/tensorflow/core/kernels/data/tensor_slice_dataset_op.cc
similarity index 77%
rename from tensorflow/core/kernels/tensor_slice_dataset_op.cc
rename to tensorflow/core/kernels/data/tensor_slice_dataset_op.cc
index e85f59b584720cae0f00cf45a265862e688b157c..18adae1ea32316ffd995a95fb25198309fda3361 100644
--- a/tensorflow/core/kernels/tensor_slice_dataset_op.cc
+++ b/tensorflow/core/kernels/data/tensor_slice_dataset_op.cc
@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/batch_util.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 
 namespace tensorflow {
 
@@ -86,7 +86,7 @@ class TensorSliceDatasetOp : public DatasetOpKernel {
    protected:
     Status AsGraphDefInternal(DatasetGraphDefBuilder* b,
                               Node** output) const override {
-      std::vector<NodeBuilder::NodeOut> components;
+      std::vector<Node*> components;
       components.reserve(tensors_.size());
       for (const Tensor& t : tensors_) {
         Node* node;
@@ -101,41 +101,6 @@ class TensorSliceDatasetOp : public DatasetOpKernel {
     }
 
    private:
-    template <typename T>
-    static Status HandleSliceToElement(const Tensor& parent, Tensor* element,
-                                       int64 index) {
-      DCHECK_NE(parent.dim_size(0), 0);
-      DCHECK_GE(index, 0);
-      if (element->NumElements() !=
-          (parent.NumElements() / parent.dim_size(0))) {
-        TensorShape chip_shape = parent.shape();
-        chip_shape.RemoveDim(0);
-        return errors::Internal(
-            "HandleSliceToElement Cannot copy slice: number of elements does "
-            "not match.  Shapes are: [element]: ",
-            element->shape().DebugString(), ", [parent slice]: ",
-            chip_shape.DebugString());
-      }
-      auto parent_as_matrix = parent.flat_outer_dims<T>();
-      element->flat<T>() = parent_as_matrix.chip(index, 0);
-      return Status::OK();
-    }
-
-    static Status CopySliceToElement(const Tensor& parent, Tensor* element,
-                                     int64 index) {
-#define HANDLE_TYPE(T)                                      \
-  case DataTypeToEnum<T>::value: {                          \
-    return HandleSliceToElement<T>(parent, element, index); \
-  }
-
-      switch (parent.dtype()) {
-        TF_CALL_DATASET_TYPES(HANDLE_TYPE);
-        default:
-          return errors::Unimplemented(
-              "CopySliceToElement Unhandled data type: ", element->dtype());
-      }
-    }
-
     class Iterator : public DatasetIterator<Dataset> {
      public:
       explicit Iterator(const Params& params)
@@ -154,7 +119,7 @@ class TensorSliceDatasetOp : public DatasetOpKernel {
             const Tensor& t = dataset()->tensors_[i];
             Tensor t_slice(cpu_allocator(), t.dtype(),
                            TensorShape(dataset()->shapes_[i].dim_sizes()));
-            TF_RETURN_IF_ERROR(CopySliceToElement(t, &t_slice, i_));
+            TF_RETURN_IF_ERROR(batch_util::CopySliceToElement(t, &t_slice, i_));
             out_tensors->emplace_back(std::move(t_slice));
           }
           ++i_;
@@ -172,7 +137,7 @@ class TensorSliceDatasetOp : public DatasetOpKernel {
         return Status::OK();
       }
 
-      Status RestoreInternal(OpKernelContext* ctx,
+      Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
         mutex_lock l(mu_);
         TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("i"), &i_));
diff --git a/tensorflow/core/kernels/data/unique_dataset_op.cc b/tensorflow/core/kernels/data/unique_dataset_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..7726ee0edf71b34cb65fe5fceb2b60dd30bb58e2
--- /dev/null
+++ b/tensorflow/core/kernels/data/unique_dataset_op.cc
@@ -0,0 +1,219 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/framework/partial_tensor_shape.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/lib/hash/hash.h"
+
+namespace tensorflow {
+
+namespace {
+
+// See documentation in ../ops/dataset_ops.cc for a high-level
+// description of the following op.
+
+class UniqueDatasetOp : public UnaryDatasetOpKernel {
+ public:
+  explicit UniqueDatasetOp(OpKernelConstruction* ctx)
+      : UnaryDatasetOpKernel(ctx) {}
+
+  void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
+                   DatasetBase** output) override {
+    OP_REQUIRES(ctx, input->output_dtypes().size() == 1,
+                errors::InvalidArgument("UniqueDataset only supports "
+                                        "inputs with a single component."));
+
+    DataType input_dtype = input->output_dtypes()[0];
+    OP_REQUIRES(ctx,
+                input_dtype == DT_INT32 || input_dtype == DT_INT64 ||
+                    input_dtype == DT_STRING,
+                errors::InvalidArgument(
+                    "UniqueDataset only supports inputs with a single "
+                    "`tf.int32`, `tf.int64`, or `tf.string` component."));
+
+    *output = new Dataset(ctx, input);
+  }
+
+ private:
+  class Dataset : public GraphDatasetBase {
+   public:
+    Dataset(OpKernelContext* ctx, const DatasetBase* input)
+        : GraphDatasetBase(ctx), input_(input) {
+      input_->Ref();
+    }
+
+    ~Dataset() override { input_->Unref(); }
+
+    std::unique_ptr<IteratorBase> MakeIterator(
+        const string& prefix) const override {
+      return std::unique_ptr<IteratorBase>(
+          new Iterator({this, strings::StrCat(prefix, "::Unique")}));
+    }
+
+    const DataTypeVector& output_dtypes() const override {
+      return input_->output_dtypes();
+    }
+
+    const std::vector<PartialTensorShape>& output_shapes() const override {
+      return input_->output_shapes();
+    }
+
+    string DebugString() override {
+      return strings::StrCat("UniqueDatasetOp::Dataset");
+    }
+
+   protected:
+    Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b,
+                              Node** output) const override {
+      Node* input_graph_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node));
+      TF_RETURN_IF_ERROR(b->AddDataset(this, {input_graph_node}, output));
+      return Status::OK();
+    }
+
+   private:
+    class Iterator : public DatasetIterator<Dataset> {
+     public:
+      explicit Iterator(const typename Iterator::Params& params)
+          : DatasetIterator<Dataset>(params),
+            input_impl_(params.dataset->input_->MakeIterator(params.prefix)) {}
+
+      Status GetNextInternal(IteratorContext* ctx,
+                             std::vector<Tensor>* out_tensors,
+                             bool* end_of_sequence) override {
+        mutex_lock l(mu_);
+        bool saw_new_value;
+        do {
+          saw_new_value = false;
+          out_tensors->clear();
+          TF_RETURN_IF_ERROR(
+              input_impl_->GetNext(ctx, out_tensors, end_of_sequence));
+          if (*end_of_sequence) {
+            break;
+          }
+          DCHECK_EQ(1, out_tensors->size());
+          saw_new_value = unique_elements_.insert((*out_tensors)[0]).second;
+        } while (!saw_new_value);
+        return Status::OK();
+      }
+
+     protected:
+      Status SaveInternal(IteratorStateWriter* writer) override {
+        mutex_lock l(mu_);
+        if (input_impl_) {
+          TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_));
+        } else {
+          TF_RETURN_IF_ERROR(
+              writer->WriteScalar(full_name("input_impl_empty"), ""));
+        }
+        TF_RETURN_IF_ERROR(writer->WriteScalar(
+            full_name("unique_elements_size"), unique_elements_.size()));
+        size_t i = 0;
+        for (const Tensor& t : unique_elements_) {
+          TF_RETURN_IF_ERROR(writer->WriteTensor(
+              full_name(strings::StrCat("unique_elements[", i++, "]")), t));
+        }
+        return Status::OK();
+      }
+
+      Status RestoreInternal(IteratorContext* ctx,
+                             IteratorStateReader* reader) override {
+        mutex_lock l(mu_);
+        if (!reader->Contains(full_name("input_impl_empty"))) {
+          TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_));
+        } else {
+          input_impl_.reset();
+        }
+        int64 num_unique_elements;
+        unique_elements_.clear();
+        TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("unique_elements_size"),
+                                              &num_unique_elements));
+        for (int64 i = 0; i < num_unique_elements; ++i) {
+          Tensor unique_element;
+          TF_RETURN_IF_ERROR(reader->ReadTensor(
+              full_name(strings::StrCat("unique_elements[", i, "]")),
+              &unique_element));
+          auto insert_result = unique_elements_.insert(unique_element);
+          if (!insert_result.second) {
+            return errors::InvalidArgument(
+                "Checkpoint contained two unique elements with the same "
+                "value.");
+          }
+        }
+        return Status::OK();
+      }
+
+     private:
+      struct TensorHash {
+        size_t operator()(const Tensor& t) const {
+          if (t.dtype() == DT_INT32 || t.dtype() == DT_INT64) {
+            return Hash64(t.tensor_data().data(), t.tensor_data().size());
+          } else {
+            DCHECK_EQ(DT_STRING, t.dtype());
+            auto flat_t = t.flat<string>();
+            uint64 hash = 0;
+            for (int64 i = 0; i < t.NumElements(); ++i) {
+              hash = Hash64Combine(hash, Hash64(flat_t(i)));
+            }
+            return static_cast<size_t>(hash);
+          }
+        }
+      };
+
+      struct TensorKeyEqual {
+        bool operator()(const Tensor& lhs, const Tensor& rhs) const {
+          if (lhs.shape() != rhs.shape() || lhs.dtype() != rhs.dtype()) {
+            return false;
+          }
+          switch (lhs.dtype()) {
+#define HANDLE_TYPE(T)                                     \
+  case T:                                                  \
+    do {                                                   \
+      auto lhs_flat = lhs.flat<EnumToDataType<T>::Type>(); \
+      auto rhs_flat = rhs.flat<EnumToDataType<T>::Type>(); \
+      for (int64 i = 0; i < lhs.NumElements(); ++i) {      \
+        if (lhs_flat(i) != rhs_flat(i)) {                  \
+          return false;                                    \
+        }                                                  \
+      }                                                    \
+      return true;                                         \
+    } while (0)
+
+            HANDLE_TYPE(DT_INT32);
+            HANDLE_TYPE(DT_INT64);
+            HANDLE_TYPE(DT_STRING);
+            default:
+              LOG(FATAL) << "UniqueDataset unhandled data type: "
+                         << DataTypeString(lhs.dtype());
+          }
+        }
+      };
+
+      mutex mu_;
+      std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
+      std::unordered_set<Tensor, TensorHash, TensorKeyEqual> unique_elements_
+          GUARDED_BY(mu_);
+    };
+
+    const DatasetBase* const input_;
+  };
+};
+
+REGISTER_KERNEL_BUILDER(Name("UniqueDataset").Device(DEVICE_CPU),
+                        UniqueDatasetOp);
+
+}  // namespace
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/window_dataset.cc b/tensorflow/core/kernels/data/window_dataset.cc
similarity index 84%
rename from tensorflow/core/kernels/window_dataset.cc
rename to tensorflow/core/kernels/data/window_dataset.cc
index 77345fd3dfb7e39184605ed1bb4cab3251a62ea1..e24bdea4ac70b76edb926419fa9180f13cf51fb0 100644
--- a/tensorflow/core/kernels/window_dataset.cc
+++ b/tensorflow/core/kernels/data/window_dataset.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/window_dataset.h"
+#include "tensorflow/core/kernels/data/window_dataset.h"
 
 namespace tensorflow {
 namespace {
@@ -59,6 +59,21 @@ class WindowDataset : public DatasetBase {
       return Status::OK();
     }
 
+    Status SaveInternal(IteratorStateWriter* writer) override {
+      mutex_lock l(mu_);
+      TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("i"), i_));
+      return Status::OK();
+    }
+
+    Status RestoreInternal(IteratorContext* ctx,
+                           IteratorStateReader* reader) override {
+      mutex_lock l(mu_);
+      int64 i;
+      TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("i"), &i));
+      i_ = size_t(i);
+      return Status::OK();
+    }
+
     mutex mu_;
     size_t i_ GUARDED_BY(mu_) = 0;
   };
diff --git a/tensorflow/core/kernels/window_dataset.h b/tensorflow/core/kernels/data/window_dataset.h
similarity index 87%
rename from tensorflow/core/kernels/window_dataset.h
rename to tensorflow/core/kernels/data/window_dataset.h
index a4fccf17b4c7cc064c1aec57554bb88bb7b59578..25396bd3e72f01eb40922a83e6dd18d1fc81e077 100644
--- a/tensorflow/core/kernels/window_dataset.h
+++ b/tensorflow/core/kernels/data/window_dataset.h
@@ -12,15 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_WINDOW_DATASET_H_
-#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_WINDOW_DATASET_H_
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_WINDOW_DATASET_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_WINDOW_DATASET_H_
 
 #include <vector>
 
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/kernels/dataset.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 
 namespace tensorflow {
 
@@ -45,4 +45,4 @@ Status NewWindowDataset(std::vector<std::vector<Tensor>> elements,
 
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_WINDOW_DATASET_H_
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATA_WINDOW_DATASET_H_
diff --git a/tensorflow/core/kernels/zip_dataset_op.cc b/tensorflow/core/kernels/data/zip_dataset_op.cc
similarity index 97%
rename from tensorflow/core/kernels/zip_dataset_op.cc
rename to tensorflow/core/kernels/data/zip_dataset_op.cc
index 9381915ae9894a91a7418ade2c8648e407b8735b..0f79eac94710fafd3cbf5686876f629dac7bac09 100644
--- a/tensorflow/core/kernels/zip_dataset_op.cc
+++ b/tensorflow/core/kernels/data/zip_dataset_op.cc
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
 
 namespace tensorflow {
 
@@ -80,7 +79,7 @@ class ZipDatasetOp : public DatasetOpKernel {
    protected:
     Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b,
                               Node** output) const override {
-      std::vector<NodeBuilder::NodeOut> input_graph_nodes;
+      std::vector<Node*> input_graph_nodes;
       input_graph_nodes.reserve(inputs_.size());
       for (const auto& input : inputs_) {
         Node* input_node;
@@ -145,7 +144,7 @@ class ZipDatasetOp : public DatasetOpKernel {
         return Status::OK();
       }
 
-      Status RestoreInternal(OpKernelContext* ctx,
+      Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
         mutex_lock l(mu_);
         if (reader->Contains(full_name("input_impls_empty"))) {
diff --git a/tensorflow/core/kernels/data_format_ops.cc b/tensorflow/core/kernels/data_format_ops.cc
new file mode 100644
index 0000000000000000000000000000000000000000..fa67545a0dad0332cce55c173fc39ba25c055902
--- /dev/null
+++ b/tensorflow/core/kernels/data_format_ops.cc
@@ -0,0 +1,176 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/nn_ops.cc.
+
+#define EIGEN_USE_THREADS
+
+#include "tensorflow/core/kernels/data_format_ops.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+
+namespace tensorflow {
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+typedef Eigen::GpuDevice GPUDevice;
+
+template <typename Device, typename T>
+class DataFormatDimMapOp : public OpKernel {
+ public:
+  explicit DataFormatDimMapOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    string src_format;
+    OP_REQUIRES_OK(context, context->GetAttr("src_format", &src_format));
+    string dst_format;
+    OP_REQUIRES_OK(context, context->GetAttr("dst_format", &dst_format));
+    OP_REQUIRES(
+        context, src_format == "NHWC",
+        errors::InvalidArgument(strings::StrCat(
+            "Current implementation doesn't support source data format ",
+            src_format)));
+    OP_REQUIRES(context, dst_format == "NCHW",
+                errors::InvalidArgument(strings::StrCat(
+                    "Current implementation doesn't support dst data format ",
+                    dst_format)));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    const Tensor& input = context->input(0);
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, input.shape(), &output));
+    functor::DataFormatDimMap<Device, T>()(context->eigen_device<Device>(),
+                                           input.flat<T>(), output->flat<T>());
+  }
+};
+
+template <typename Device, typename T>
+class DataFormatVecPermuteOp : public OpKernel {
+ public:
+  explicit DataFormatVecPermuteOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    string src_format;
+    OP_REQUIRES_OK(context, context->GetAttr("src_format", &src_format));
+    string dst_format;
+    OP_REQUIRES_OK(context, context->GetAttr("dst_format", &dst_format));
+    OP_REQUIRES(context,
+                (src_format == "NHWC" && dst_format == "NCHW") ||
+                    (src_format == "NCHW" && dst_format == "NHWC"),
+                errors::InvalidArgument(strings::StrCat(
+                    "Current implementation only supports NCHW-to-NHWC and "
+                    "NHWC-to-NCHW format conversion; got source format ",
+                    src_format, " and destination format ", dst_format)));
+    nhwc_to_nchw_ = (src_format == "NHWC") ? true : false;
+  }
+
+  void Compute(OpKernelContext* context) override {
+    const Tensor& input = context->input(0);
+    OP_REQUIRES(context, input.dims() == 1 || input.dims() == 2,
+                errors::InvalidArgument(
+                    "input must be a vector or 2D tensor, but got shape ",
+                    input.shape().DebugString()));
+    if (input.dims() == 1) {
+      OP_REQUIRES(
+          context, input.NumElements() == 4,
+          errors::InvalidArgument("1D input must be of size 4, but got shape ",
+                                  input.shape().DebugString()));
+    } else if (input.dims() == 2) {
+      OP_REQUIRES(
+          context, input.dim_size(0) == 4,
+          errors::InvalidArgument(
+              "First dimension of 2D input must be of size 4, but got shape ",
+              input.shape().DebugString()));
+      OP_REQUIRES(
+          context, input.dim_size(1) == 2,
+          errors::InvalidArgument(
+              "Second dimension of 2D input must be of size 2, but got shape ",
+              input.shape().DebugString()));
+    }
+
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, input.shape(), &output));
+    functor::DataFormatVecPermute<Device, T>()(
+        context->eigen_device<Device>(), input.flat<T>(), output->flat<T>(),
+        nhwc_to_nchw_);
+  }
+
+ private:
+  bool nhwc_to_nchw_;
+};
+
+#define REGISTER_KERNEL(T)                                                \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("DataFormatDimMap").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      DataFormatDimMapOp<CPUDevice, T>);
+TF_CALL_int32(REGISTER_KERNEL);
+TF_CALL_int64(REGISTER_KERNEL);
+#undef REGISTER_KERNEL
+
+#define REGISTER_KERNEL(T)                                                    \
+  REGISTER_KERNEL_BUILDER(                                                    \
+      Name("DataFormatVecPermute").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      DataFormatVecPermuteOp<CPUDevice, T>);
+TF_CALL_int32(REGISTER_KERNEL);
+TF_CALL_int64(REGISTER_KERNEL);
+#undef REGISTER_KERNEL
+
+#if GOOGLE_CUDA
+// Forward declarations of the functor specializations for GPU.
+namespace functor {
+#define DECLARE_GPU_SPEC(T)                                \
+  template <>                                              \
+  void DataFormatDimMap<GPUDevice, T>::operator()(         \
+      const GPUDevice& d, typename TTypes<T>::ConstFlat x, \
+      typename TTypes<T>::Flat y);                         \
+  extern template struct DataFormatDimMap<GPUDevice, T>;
+#define DECLARE_GPU_SPECS(T) DECLARE_GPU_SPEC(T);
+TF_CALL_int32(DECLARE_GPU_SPECS);
+TF_CALL_int64(DECLARE_GPU_SPECS);
+#undef DECLARE_GPU_SPEC
+
+#define DECLARE_GPU_SPEC(T)                                \
+  template <>                                              \
+  void DataFormatVecPermute<GPUDevice, T>::operator()(     \
+      const GPUDevice& d, typename TTypes<T>::ConstFlat x, \
+      typename TTypes<T>::Vec y, bool nhwc_to_nchw);       \
+  extern template struct DataFormatVecPermute<GPUDevice, T>;
+#define DECLARE_GPU_SPECS(T) DECLARE_GPU_SPEC(T);
+TF_CALL_int32(DECLARE_GPU_SPECS);
+TF_CALL_int64(DECLARE_GPU_SPECS);
+#undef DECLARE_GPU_SPEC
+}  // namespace functor
+
+// Registration of the GPU implementations.
+#define REGISTER_GPU_KERNEL(T)                                            \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("DataFormatDimMap").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
+      DataFormatDimMapOp<GPUDevice, T>);
+TF_CALL_int32(REGISTER_GPU_KERNEL);
+TF_CALL_int64(REGISTER_GPU_KERNEL);
+#undef REGISTER_GPU_KERNEL
+
+#define REGISTER_GPU_KERNEL(T)                                                \
+  REGISTER_KERNEL_BUILDER(                                                    \
+      Name("DataFormatVecPermute").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
+      DataFormatVecPermuteOp<GPUDevice, T>);
+TF_CALL_int32(REGISTER_GPU_KERNEL);
+TF_CALL_int64(REGISTER_GPU_KERNEL);
+#undef REGISTER_GPU_KERNEL
+#endif  // GOOGLE_CUDA
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/data_format_ops.h b/tensorflow/core/kernels/data_format_ops.h
new file mode 100644
index 0000000000000000000000000000000000000000..bf704cc35cf2ff18b38202db5d192b460b415fbb
--- /dev/null
+++ b/tensorflow/core/kernels/data_format_ops.h
@@ -0,0 +1,116 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_KERNELS_DATA_FORMAT_OPS_H_
+#define TENSORFLOW_KERNELS_DATA_FORMAT_OPS_H_
+// Functor definition for data format dim mapping ops, must be compilable
+// by nvcc.
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/tensor_types.h"
+
+namespace tensorflow {
+namespace functor {
+
+// Functor used by DataFormatDimMapOP to do the computations.
+template <typename Device, typename T>
+struct DataFormatDimMap {
+  void operator()(const Device& d, typename TTypes<T>::ConstFlat x,
+                  typename TTypes<T>::Flat y) {
+    auto zero = x.constant(0);
+    auto one = x.constant(1);
+    auto three = x.constant(3);
+    auto four = x.constant(4);
+    auto x_mod = (x + four) % 4;
+    auto is_zero = (x_mod == zero);
+    auto is_three = (x_mod == three);
+    y.device(d) = is_zero.select(zero, is_three.select(one, x_mod + one));
+  }
+};
+
+template <typename T>
+struct VecPermuteNHWCToNCHW {
+  Eigen::DSizes<Eigen::DenseIndex, 1> dimensions(
+      typename TTypes<T>::ConstFlat input) const {
+    Eigen::DSizes<Eigen::DenseIndex, 1> result;
+    result[0] = input.dimension(0);
+    return result;
+  }
+  template <typename Output, typename Device>
+  void eval(typename TTypes<T>::ConstFlat input, Output& output,
+            const Device& d) const {
+    if (input.size() == 8) {
+      output.template chip<0>(0).device(d) = input.template chip<0>(0);
+      output.template chip<0>(1).device(d) = input.template chip<0>(1);
+      output.template chip<0>(2).device(d) = input.template chip<0>(6);
+      output.template chip<0>(3).device(d) = input.template chip<0>(7);
+      output.template chip<0>(4).device(d) = input.template chip<0>(2);
+      output.template chip<0>(5).device(d) = input.template chip<0>(3);
+      output.template chip<0>(6).device(d) = input.template chip<0>(4);
+      output.template chip<0>(7).device(d) = input.template chip<0>(5);
+    } else {
+      output.template chip<0>(0).device(d) = input.template chip<0>(0);
+      output.template chip<0>(1).device(d) = input.template chip<0>(3);
+      output.template chip<0>(2).device(d) = input.template chip<0>(1);
+      output.template chip<0>(3).device(d) = input.template chip<0>(2);
+    }
+  }
+};
+
+template <typename T>
+struct VecPermuteNCHWToNHWC {
+  Eigen::DSizes<Eigen::DenseIndex, 1> dimensions(
+      typename TTypes<T>::ConstFlat input) const {
+    Eigen::DSizes<Eigen::DenseIndex, 1> result;
+    result[0] = input.dimension(0);
+    return result;
+  }
+  template <typename Output, typename Device>
+  void eval(typename TTypes<T>::ConstFlat input, Output& output,
+            const Device& d) const {
+    if (input.size() == 8) {
+      output.template chip<0>(0).device(d) = input.template chip<0>(0);
+      output.template chip<0>(1).device(d) = input.template chip<0>(1);
+      output.template chip<0>(2).device(d) = input.template chip<0>(4);
+      output.template chip<0>(3).device(d) = input.template chip<0>(5);
+      output.template chip<0>(4).device(d) = input.template chip<0>(6);
+      output.template chip<0>(5).device(d) = input.template chip<0>(7);
+      output.template chip<0>(6).device(d) = input.template chip<0>(2);
+      output.template chip<0>(7).device(d) = input.template chip<0>(3);
+    } else {
+      output.template chip<0>(0).device(d) = input.template chip<0>(0);
+      output.template chip<0>(1).device(d) = input.template chip<0>(2);
+      output.template chip<0>(2).device(d) = input.template chip<0>(3);
+      output.template chip<0>(3).device(d) = input.template chip<0>(1);
+    }
+  }
+};
+
+// Functor used by DataFormatVecPermuteOp to do the computations.
+template <typename Device, typename T>
+struct DataFormatVecPermute {
+  void operator()(const Device& d, typename TTypes<T>::ConstFlat x,
+                  typename TTypes<T>::Flat y, bool nhwc_to_nchw) {
+    if (nhwc_to_nchw) {
+      y.device(d) = x.customOp(VecPermuteNHWCToNCHW<T>());
+    } else {
+      y.device(d) = x.customOp(VecPermuteNCHWToNHWC<T>());
+    }
+  }
+};
+
+}  // namespace functor
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_KERNELS_DATA_FORMAT_OPS_H_
diff --git a/tensorflow/core/kernels/data_format_ops_gpu.cu.cc b/tensorflow/core/kernels/data_format_ops_gpu.cu.cc
new file mode 100644
index 0000000000000000000000000000000000000000..38ce7c28fea662cea7004c47a46c0031875e3c36
--- /dev/null
+++ b/tensorflow/core/kernels/data_format_ops_gpu.cu.cc
@@ -0,0 +1,33 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/kernels/data_format_ops.h"
+
+namespace tensorflow {
+
+typedef Eigen::GpuDevice GPUDevice;
+template struct functor::DataFormatDimMap<GPUDevice, int32>;
+template struct functor::DataFormatDimMap<GPUDevice, int64>;
+template struct functor::DataFormatVecPermute<GPUDevice, int32>;
+template struct functor::DataFormatVecPermute<GPUDevice, int64>;
+
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/dataset.cc b/tensorflow/core/kernels/dataset.cc
deleted file mode 100644
index fcfa2956f782fc9617448ad75e53b7c36963d222..0000000000000000000000000000000000000000
--- a/tensorflow/core/kernels/dataset.cc
+++ /dev/null
@@ -1,133 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/kernels/dataset.h"
-
-namespace tensorflow {
-
-namespace {
-
-// A wrapper class for storing a `DatasetBase` instance in a DT_VARIANT tensor.
-// Objects of the wrapper class own a reference on an instance of `DatasetBase`,
-// and the wrapper's copy constructor and destructor take care of managing the
-// reference count.
-//
-// NOTE(mrry): This is not a feature-complete implementation of the DT_VARIANT
-// specification. In particular, we cannot currently serialize an arbitrary
-// `DatasetBase` object, so the `Encode()` and `Decode()` methods are not
-// implemented.
-class DatasetVariantWrapper {
- public:
-  DatasetVariantWrapper() : dataset_(nullptr) {}
-
-  // Transfers ownership of `dataset` to `*this`.
-  explicit DatasetVariantWrapper(DatasetBase* dataset) : dataset_(dataset) {}
-
-  DatasetVariantWrapper(const DatasetVariantWrapper& other)
-      : dataset_(other.dataset_) {
-    if (dataset_) dataset_->Ref();
-  }
-
-  ~DatasetVariantWrapper() {
-    if (dataset_) dataset_->Unref();
-  }
-
-  DatasetBase* get() const { return dataset_; }
-
-  string TypeName() const { return "tensorflow::DatasetVariantWrapper"; }
-  string DebugString() const {
-    if (dataset_) {
-      return dataset_->DebugString();
-    } else {
-      return "<Uninitialized DatasetVariantWrapper>";
-    }
-  }
-  void Encode(VariantTensorData* data) const {
-    LOG(ERROR) << "The Encode() method is not implemented for "
-                  "DatasetVariantWrapper objects.";
-  }
-  bool Decode(const VariantTensorData& data) {
-    LOG(ERROR) << "The Decode() method is not implemented for "
-                  "DatasetVariantWrapper objects.";
-    return false;
-  }
-
- private:
-  DatasetBase* const dataset_;  // Owns one reference.
-};
-
-}  // namespace
-
-Status GetDatasetFromVariantTensor(const Tensor& tensor,
-                                   DatasetBase** out_dataset) {
-  if (!(tensor.dtype() == DT_VARIANT ||
-        TensorShapeUtils::IsScalar(tensor.shape()))) {
-    return errors::InvalidArgument(
-        "Dataset tensor must be a scalar of dtype DT_VARIANT.");
-  }
-  const Variant& variant = tensor.scalar<Variant>()();
-  const DatasetVariantWrapper* wrapper = variant.get<DatasetVariantWrapper>();
-  if (wrapper == nullptr) {
-    return errors::InvalidArgument("Tensor must be a Dataset object.");
-  }
-  *out_dataset = wrapper->get();
-  if (*out_dataset == nullptr) {
-    return errors::Internal("Read uninitialized Dataset variant.");
-  }
-  return Status::OK();
-}
-
-Status StoreDatasetInVariantTensor(DatasetBase* dataset, Tensor* tensor) {
-  if (!(tensor->dtype() == DT_VARIANT ||
-        TensorShapeUtils::IsScalar(tensor->shape()))) {
-    return errors::InvalidArgument(
-        "Dataset tensor must be a scalar of dtype DT_VARIANT.");
-  }
-  tensor->scalar<Variant>()() = DatasetVariantWrapper(dataset);
-  return Status::OK();
-}
-
-void DatasetOpKernel::Compute(OpKernelContext* ctx) {
-  DatasetBase* dataset = nullptr;
-  MakeDataset(ctx, &dataset);
-  if (ctx->status().ok()) {
-    Tensor* output = nullptr;
-    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &output));
-    OP_REQUIRES_OK(ctx, StoreDatasetInVariantTensor(dataset, output));
-  }
-}
-
-void UnaryDatasetOpKernel::MakeDataset(OpKernelContext* ctx,
-                                       DatasetBase** output) {
-  DatasetBase* input;
-  OP_REQUIRES_OK(ctx, GetDatasetFromVariantTensor(ctx->input(0), &input));
-  MakeDataset(ctx, input, output);
-}
-
-void BinaryDatasetOpKernel::MakeDataset(OpKernelContext* ctx,
-                                        DatasetBase** output) {
-  DatasetBase* input;
-  OP_REQUIRES_OK(ctx, GetDatasetFromVariantTensor(ctx->input(0), &input));
-  DatasetBase* another_input;
-  OP_REQUIRES_OK(ctx,
-                 GetDatasetFromVariantTensor(ctx->input(1), &another_input));
-  MakeDataset(ctx, input, another_input, output);
-}
-
-const char GraphDatasetBase::kDatasetGraphKey[] = "_DATASET_GRAPH";
-const char GraphDatasetBase::kDatasetGraphOutputNodeKey[] =
-    "_DATASET_GRAPH_OUTPUT_NODE";
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/dataset.h b/tensorflow/core/kernels/dataset.h
index afbebb0692d0a9bd246a77be7dc0ba2eae06b112..2aa6dbe6f3e1602e0fb94b8b196d41e29d644fd8 100644
--- a/tensorflow/core/kernels/dataset.h
+++ b/tensorflow/core/kernels/dataset.h
@@ -15,677 +15,6 @@ limitations under the License.
 #ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATASET_H_
 #define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATASET_H_
 
-#include <memory>
-
-#include "tensorflow/core/framework/attr_value.pb.h"
-#include "tensorflow/core/framework/attr_value_util.h"
-#include "tensorflow/core/framework/graph.pb.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/register_types.h"
-#include "tensorflow/core/framework/variant_encode_decode.h"
-#include "tensorflow/core/framework/variant_tensor_data.h"
-#include "tensorflow/core/graph/graph_def_builder.h"
-#include "tensorflow/core/lib/strings/str_util.h"
-#include "tensorflow/core/lib/strings/strcat.h"
-#include "tensorflow/core/platform/tracing.h"
-
-// Polymorphic datasets should support all primitive TensorFlow
-// types. Use this macro to expand `m(T)` once for each primitive type
-// `T`, e.g. to build a `switch` statement.
-#define TF_CALL_DATASET_TYPES(m) TF_CALL_ALL_TYPES(m) TF_CALL_QUANTIZED_TYPES(m)
-
-namespace tensorflow {
-
-// Interface for reading values from a key-value store.
-// Used for restoring iterator state.
-class IteratorStateReader {
- public:
-  virtual Status ReadScalar(StringPiece key, int64* val) = 0;
-  virtual Status ReadScalar(StringPiece key, string* val) = 0;
-  virtual Status ReadTensor(StringPiece key, Tensor* val) = 0;
-  virtual bool Contains(StringPiece key) = 0;
-
-  virtual ~IteratorStateReader() {}
-};
-
-// Interface for writing values to a key-value store.
-// Used for saving iterator state.
-class IteratorStateWriter {
- public:
-  virtual Status WriteScalar(StringPiece key, const int64 val) = 0;
-  virtual Status WriteScalar(StringPiece key, const string& val) = 0;
-  virtual Status WriteTensor(StringPiece key, const Tensor& val) = 0;
-
-  virtual ~IteratorStateWriter() {}
-};
-
-// Wrapper around GraphDefBuilder. Used to serialize Dataset graph.
-class GraphDefBuilderWrapper {
- public:
-  explicit GraphDefBuilderWrapper(GraphDefBuilder* b) : b_(b) {}
-
-  // Adds a Const node with scalar value to the Graph.
-  // `*output` contains a pointer to the output `Node`. It is guaranteed to be
-  // non-null if the method returns with an OK status.
-  // The returned Node pointer is owned by the backing Graph of GraphDefBuilder.
-  template <typename T>
-  Status AddScalar(const T& val, Node** output) {
-    Tensor val_t = Tensor(DataTypeToEnum<T>::v(), TensorShape({}));
-    val_t.scalar<T>()() = val;
-    AddTensorInternal(val_t, output);
-    if (*output == nullptr) {
-      return errors::Internal("AddScalar: Failed to build Const op.");
-    }
-    return Status::OK();
-  }
-
-  // Adds a Const node with vector value to the Graph.
-  // `*output` contains a pointer to the output `Node`. It is guaranteed to be
-  // non-null if the method returns with an OK status.
-  // The returned Node pointer is owned by the backing Graph of GraphDefBuilder.
-  // TODO(shivaniagrawal): Consider changing to gtl::ArraySlice?
-  template <typename T>
-  Status AddVector(const std::vector<T>& val, Node** output) {
-    Tensor val_t = Tensor(DataTypeToEnum<T>::v(),
-                          TensorShape({static_cast<int64>(val.size())}));
-    for (int i = 0; i < val.size(); i++) {
-      val_t.flat<T>()(i) = val[i];
-    }
-    AddTensorInternal(val_t, output);
-    if (*output == nullptr) {
-      return errors::Internal("AddVector: Failed to build Const op.");
-    }
-    return Status::OK();
-  }
-
-  // Adds a Const node with Tensor value to the Graph.
-  // `*output` contains a pointer to the output `Node`. It is guaranteed to be
-  // non-null if the method returns with an OK status.
-  // The returned Node pointer is owned by the backing Graph of GraphDefBuilder.
-  Status AddTensor(const Tensor& val, Node** output) {
-    AddTensorInternal(val, output);
-    if (*output == nullptr) {
-      return errors::Internal("AddTesor: Failed to build Const op.");
-    }
-    return Status::OK();
-  }
-
-  template <class DatasetType>
-  Status AddDataset(const DatasetType* dataset,
-                    const std::vector<NodeBuilder::NodeOut>& inputs,
-                    Node** output) {
-    return AddDataset(dataset, inputs, {}, output);
-  }
-
-  // Adds a node corresponding to the `DatasetType` to the Graph.
-  // Return value of `DatasetType::op_name()` is used as the op type for the
-  // node.
-  // Values for the output_types and output_shapes node attributes are also
-  // written if those attributes are defined in the OpDef.
-  // `*output` contains a pointer to the output `Node`. It is guaranteed to be
-  // non-null if the method returns with an OK status.
-  // The returned Node pointer is owned by the backing Graph of GraphDefBuilder.
-  template <class DatasetType>
-  Status AddDataset(const DatasetType* dataset,
-                    const std::vector<NodeBuilder::NodeOut>& inputs,
-                    const std::vector<std::pair<StringPiece, AttrValue>>& attrs,
-                    Node** output) {
-    std::vector<std::pair<size_t, NodeBuilder::NodeOut>> enumerated_inputs(
-        inputs.size());
-    for (int i = 0; i < inputs.size(); i++) {
-      enumerated_inputs[i] = std::make_pair(i, inputs[i]);
-    }
-    return AddDataset(dataset, enumerated_inputs, {}, attrs, output);
-  }
-
-  template <class DatasetType>
-  Status AddDataset(
-      const DatasetType* dataset,
-      const std::vector<std::pair<size_t, NodeBuilder::NodeOut>>& inputs,
-      const std::vector<
-          std::pair<size_t, gtl::ArraySlice<NodeBuilder::NodeOut>>>&
-          list_inputs,
-      const std::vector<std::pair<StringPiece, AttrValue>>& attrs,
-      Node** output) {
-    const string& op_type_name = dataset->op_name();
-    std::unique_ptr<const GraphDefBuilder::Options> opts(
-        new GraphDefBuilder::Options(b_->opts()));
-    // TODO(srbs|mrry): Not all datasets have output_types and output_shapes
-    // attributes defined. It will be nice to have a consistent pattern.
-    bool has_output_types_attr = HasAttr(op_type_name, "output_types");
-    bool has_output_shapes_attr = HasAttr(op_type_name, "output_shapes");
-    if (has_output_shapes_attr) {
-      opts.reset(new GraphDefBuilder::Options(
-          opts->WithAttr("output_shapes", dataset->output_shapes())));
-    }
-    if (has_output_types_attr) {
-      opts.reset(new GraphDefBuilder::Options(
-          opts->WithAttr("output_types", dataset->output_dtypes())));
-    }
-    for (auto attr : attrs) {
-      opts.reset(new GraphDefBuilder::Options(
-          opts->WithAttr(attr.first, attr.second)));
-    }
-    if (opts->HaveError()) {
-      return errors::Internal("AddDataset: Failed to build Options with error ",
-                              opts->StatusToString());
-    }
-    NodeBuilder node_builder(opts->GetNameForOp(op_type_name), op_type_name,
-                             opts->op_registry());
-    {
-      size_t total_size = inputs.size() + list_inputs.size();
-      auto inputs_iter = inputs.begin();
-      auto list_inputs_iter = list_inputs.begin();
-      for (int i = 0; i < total_size; i++) {
-        if (inputs_iter != inputs.end() && inputs_iter->first == i) {
-          node_builder.Input(inputs_iter->second);
-          inputs_iter++;
-        } else if (list_inputs_iter != list_inputs.end() &&
-                   list_inputs_iter->first == i) {
-          node_builder.Input(list_inputs_iter->second);
-          list_inputs_iter++;
-        } else {
-          return errors::InvalidArgument("No input found for index ", i);
-        }
-      }
-    }
-    *output = opts->FinalizeBuilder(&node_builder);
-    if (*output == nullptr) {
-      return errors::Internal("AddDataset: Failed to build ", op_type_name,
-                              " op with error ", opts->StatusToString());
-    }
-    return Status::OK();
-  }
-
-  // Adds a user-defined function with name `function_name` to the graph and
-  // recursively adds all functions it references. If a function with a matching
-  // name has already been added, returns with OK status. If a user-defined with
-  // name `function_name` is not found in the FunctionLibraryDefinition, returns
-  // an InvalidArgumentError. If the function with name `function_name` or any
-  // of its dependent functions are stateful, returns an InvalidArgument error.
-  Status AddFunction(OpKernelContext* ctx, const string& function_name) {
-    if (b_->HasFunction(function_name)) {
-      LOG(INFO) << "Function with name " << function_name << "already exists in"
-                << " the graph. It will not be added again.";
-      return Status::OK();
-    }
-    TF_RETURN_IF_ERROR(EnsureFunctionIsStateless(ctx, function_name));
-    const FunctionLibraryDefinition* flib_def =
-        ctx->function_library()->GetFunctionLibraryDefinition();
-    const FunctionDef* f_def = flib_def->Find(function_name);
-    if (f_def == nullptr) {
-      return errors::InvalidArgument("Unable to find FunctionDef for ",
-                                     function_name, " in the registry.");
-    }
-    FunctionDefLibrary def;
-    *def.add_function() = *f_def;
-    const string gradient_func = flib_def->FindGradient(function_name);
-    if (!gradient_func.empty()) {
-      GradientDef* g_def = def.add_gradient();
-      g_def->set_function_name(function_name);
-      g_def->set_gradient_func(gradient_func);
-    }
-    TF_RETURN_IF_ERROR(b_->AddFunctionLibrary(def));
-
-    // Recursively add functions in inputs of function_name.
-    for (const NodeDef& node_def : f_def->node_def()) {
-      const OpRegistrationData* op_reg_data = nullptr;
-      TF_RETURN_IF_ERROR(flib_def->LookUp(node_def.op(), &op_reg_data));
-      if (op_reg_data->is_function_op) {
-        TF_RETURN_IF_ERROR(AddFunction(ctx, op_reg_data->op_def.name()));
-      }
-      // Recursively add functions in attrs of this NodeDef.
-      for (const auto& pair : node_def.attr()) {
-        TF_RETURN_IF_ERROR(AddAttrFunctions(pair.second, ctx));
-      }
-    }
-
-    // Recursively add functions in attrs of function_name.
-    for (auto iter = f_def->attr().begin(); iter != f_def->attr().end();
-         iter++) {
-      TF_RETURN_IF_ERROR(AddAttrFunctions(iter->second, ctx));
-    }
-    return Status::OK();
-  }
-
-  template <typename T>
-  void BuildAttrValue(const T& value, AttrValue* attr) {
-    SetAttrValue(value, attr);
-  }
-
- private:
-  void AddTensorInternal(const Tensor& val, Node** output) {
-    *output = ops::SourceOp(
-        "Const",
-        b_->opts().WithAttr("dtype", val.dtype()).WithAttr("value", val));
-  }
-
-  Status EnsureFunctionIsStateless(OpKernelContext* ctx,
-                                   const string& function_name) const {
-    const FunctionLibraryDefinition* lib_def =
-        ctx->function_library()->GetFunctionLibraryDefinition();
-    const FunctionDef* function_def = lib_def->Find(function_name);
-    if (!function_def) {
-      return errors::InvalidArgument("Unable to find FunctionDef for ",
-                                     function_name, " in registry.");
-    }
-    for (const NodeDef& node_def : function_def->node_def()) {
-      const OpDef* op_def;
-      TF_RETURN_IF_ERROR(lib_def->LookUpOpDef(node_def.op(), &op_def));
-      // TODO(b/65524810): Hack to allow functions to capture Dataset op
-      // nodes needed for FlatMap. Currently, source datasets nodes have been
-      // marked stateful to avoid constant folding since we do not have a
-      // good way of serializing them.
-      if (IsOpWhitelisted(op_def)) {
-        continue;
-      }
-      if (op_def->is_stateful()) {
-        return errors::InvalidArgument(
-            "Op[name: ", node_def.name(), ", type: ", node_def.op(), "] ",
-            "in function ", function_name, " is stateful. ",
-            "Saving stateful functions is not supported yet.");
-      }
-    }
-    return Status::OK();
-  }
-
-  bool IsOpWhitelisted(const OpDef* op_def) const {
-    return StringPiece(op_def->name()).ends_with("Dataset") &&
-           HasAttr(op_def, "output_shapes");
-  }
-
-  bool HasAttr(const string& op_type_name, const string& attr_name) const {
-    const OpDef* op_def = nullptr;
-    Status s = b_->opts().op_registry()->LookUpOpDef(op_type_name, &op_def);
-    if (!s.ok() || op_def == nullptr) {
-      return false;
-    }
-    return HasAttr(op_def, attr_name);
-  }
-
-  bool HasAttr(const OpDef* op_def, const string& attr_name) const {
-    for (auto attr : op_def->attr()) {
-      if (attr.name() == attr_name) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  Status AddAttrFunctions(const AttrValue& attr_value, OpKernelContext* ctx) {
-    if (attr_value.has_func()) {
-      TF_RETURN_IF_ERROR(AddFunction(ctx, attr_value.func().name()));
-    } else if (attr_value.has_list()) {
-      for (const NameAttrList& name_attr_list : attr_value.list().func()) {
-        TF_RETURN_IF_ERROR(AddFunction(ctx, name_attr_list.name()));
-      }
-    }
-    return Status::OK();
-  }
-
-  GraphDefBuilder* b_;
-};
-
-class StatsAggregator;
-
-// A cut-down version of OpKernelContext for running computations in
-// iterators. Note that we cannot simply use OpKernelContext here
-// because we might run computation in an iterator whose lifetime is
-// not nested within the lifetime of a single OpKernelContext
-// (e.g. asynchronous prefetching).
-//
-// TODO(mrry): We will probably need to support more of
-// OpKernelContext here. For example, should allocation be handled by
-// the IteratorContext?
-// TODO(mrry): We're making some daring assumptions about the lifetime
-// of the runner passed in here. A runner will be deleted when the original
-// step ends, but all existing runners only close over session-lifetime (or
-// longer-lived) state, so we can make a copy of the function. There's nothing
-// in the definition of the API from which we took the runner to guarantee that
-// what we are doing is safe. We should formalize the properties here.
-class IteratorContext {
- public:
-  struct Params {
-    // Interface to operating system functionality.
-    Env* env;
-
-    // Function call support.
-    std::function<void(std::function<void()>)> runner = nullptr;
-
-    // A function that returns the current `StatsAggregator` instance to be
-    // used when recording statistics about the iterator.
-    //
-    // NOTE(mrry): This is somewhat awkward, because (i) the `StatsAggregator`
-    // is a property of the `IteratorResource` (which this class does not know
-    // about), and (ii) it can change after the `IteratorContext` has been
-    // created. Better suggestions are welcome!
-    std::function<std::shared_ptr<StatsAggregator>()> stats_aggregator_getter =
-        nullptr;
-  };
-
-  explicit IteratorContext(Params params) : params_(std::move(params)) {}
-
-  Env* env() const { return params_.env; }
-
-  std::function<void(std::function<void()>)>* runner() {
-    return &params_.runner;
-  }
-
-  std::shared_ptr<StatsAggregator> stats_aggregator() {
-    if (params_.stats_aggregator_getter) {
-      return params_.stats_aggregator_getter();
-    } else {
-      return nullptr;
-    }
-  }
-
- private:
-  Params params_;
-};
-
-// Represents the current position in a range of outputs, where the
-// range of outputs is typically represented by an `DatasetBase`,
-// defined below.
-class IteratorBase {
- public:
-  virtual ~IteratorBase() {}
-
-  // Gets the next output from the range that this iterator is traversing.
-  //
-  // If at least one output remains in this iterator's range, that
-  // output will be stored in `*out_tensors` and `false` will be
-  // stored in `*end_of_sequence`.
-  //
-  // If no more outputs remain in this iterator's range, `true` will
-  // be stored in `*end_of_sequence`, and the content of
-  // `*out_tensors` will be undefined.
-  //
-  // This method is thread-safe.
-  //
-  // TODO(mrry): Define `GetNextAsync()` or `GetNextManyAsync()`, and
-  // potentially remove this method.
-  virtual Status GetNext(IteratorContext* ctx, std::vector<Tensor>* out_tensors,
-                         bool* end_of_sequence) = 0;
-
-  // Returns a vector of DataType values, representing the respective
-  // element types of each tuple component in the outputs of this
-  // iterator.
-  virtual const DataTypeVector& output_dtypes() const = 0;
-
-  // Returns a vector of tensor shapes, representing the respective
-  // (and possibly partially defined) shapes of each tuple component
-  // in the outputs of this iterator.
-  virtual const std::vector<PartialTensorShape>& output_shapes() const = 0;
-
-  // Saves the state of this iterator.
-  virtual Status Save(OpKernelContext* ctx, IteratorStateWriter* writer) {
-    return SaveInternal(writer);
-  }
-
-  // Restores the state of this iterator.
-  virtual Status Restore(OpKernelContext* ctx, IteratorStateReader* reader) {
-    return RestoreInternal(ctx, reader);
-  }
-
- protected:
-  // This is needed so that sub-classes of IteratorBase can call
-  // `SaveInternal` on their parent iterators, e.g., in
-  // `RepeatDataasetOp::Dataset`.
-  Status SaveParent(IteratorStateWriter* writer,
-                    const std::unique_ptr<IteratorBase>& parent) {
-    return parent->SaveInternal(writer);
-  }
-
-  // This is needed so that sub-classes of IteratorBase can call
-  // `RestoreInternal` on their parent iterators, e.g., in
-  // `RepeatDataasetOp::Dataset`.
-  Status RestoreParent(OpKernelContext* ctx, IteratorStateReader* reader,
-                       const std::unique_ptr<IteratorBase>& parent) {
-    return parent->RestoreInternal(ctx, reader);
-  }
-
-  // Saves the state of this iterator recursively.
-  virtual Status SaveInternal(IteratorStateWriter* writer) {
-    return errors::Unimplemented("SaveInternal");
-  }
-
-  // Restores the state of this iterator recursively.
-  virtual Status RestoreInternal(OpKernelContext* ctx,
-                                 IteratorStateReader* reader) {
-    return errors::Unimplemented("RestoreInternal");
-  }
-};
-
-// Represents a (potentially infinite) range of outputs, where each
-// output is a tuple of tensors.
-class DatasetBase : public core::RefCounted {
- public:
-  // Returns a new iterator for iterating over the range of elements in
-  // this dataset.
-  //
-  // This method may be called multiple times on the same instance,
-  // and the resulting iterators will have distinct state. Each
-  // iterator will traverse all elements in this dataset from the
-  // start.
-  //
-  // Ownership of the created iterator will be transferred to the caller.
-  //
-  // The prefix identifies the sequence of iterators leading up to the newly
-  // created iterator.
-  virtual std::unique_ptr<IteratorBase> MakeIterator(
-      const string& prefix) const = 0;
-
-  // Returns a vector of DataType values, representing the respective
-  // element types of each tuple component in the outputs of this
-  // dataset.
-  virtual const DataTypeVector& output_dtypes() const = 0;
-
-  // Returns a vector of tensor shapes, representing the respective
-  // (and possibly partially defined) shapes of each tuple component
-  // in the outputs of this dataset.
-  virtual const std::vector<PartialTensorShape>& output_shapes() const = 0;
-
-  // A human-readable debug string for this dataset.
-  virtual string DebugString() = 0;
-
-  // Serializes the dataset and writes it to the `writer`.
-  virtual Status Save(OpKernelContext* ctx, IteratorStateWriter* writer) const {
-    return errors::Unimplemented("DatasetBase::Save");
-  }
-
- protected:
-  // TODO(srbs): Ideally all graph related logic should reside in
-  // GraphDatasetBase. However, that would require Datasets defined in all ops
-  // to derive from GraphDatasetBase. Once that is done we can move
-  // DatasetGraphDefBuilder and AsGraphDefInternal to GraphDatasetBase.
-  class DatasetGraphDefBuilder : public GraphDefBuilderWrapper {
-   public:
-    DatasetGraphDefBuilder(GraphDefBuilder* b) : GraphDefBuilderWrapper(b) {}
-    Status AddParentDataset(OpKernelContext* ctx, const DatasetBase* dataset,
-                            Node** output) {
-      return dataset->AsGraphDefInternal(ctx, this, output);
-    }
-  };
-
-  virtual Status AsGraphDefInternal(OpKernelContext* ctx,
-                                    DatasetGraphDefBuilder* b,
-                                    Node** node) const {
-    return AsGraphDefInternal(b, node);
-  }
-
-  virtual Status AsGraphDefInternal(DatasetGraphDefBuilder* b,
-                                    Node** node) const {
-    return errors::Unimplemented("AsGraphDefInternal");
-  }
-};
-
-// Base-class for datasets that are built by ops.
-class GraphDatasetBase : public DatasetBase {
- public:
-  GraphDatasetBase(OpKernelContext* ctx)
-      : op_name_(ctx->op_kernel().type_string()) {}
-
-  const string op_name() const { return op_name_; }
-
-  Status Save(OpKernelContext* ctx,
-              IteratorStateWriter* writer) const override {
-    string serialized_graph_def;
-    string output_node;
-    TF_RETURN_IF_ERROR(Serialize(ctx, &serialized_graph_def, &output_node));
-    TF_RETURN_IF_ERROR(
-        writer->WriteScalar(kDatasetGraphKey, serialized_graph_def));
-    TF_RETURN_IF_ERROR(
-        writer->WriteScalar(kDatasetGraphOutputNodeKey, output_node));
-    return Status::OK();
-  }
-
-  // Key for storing the Dataset graph in the serialized format.
-  static const char kDatasetGraphKey[];
-
-  // Key for storing the output node of the Dataset graph in the serialized
-  // format.
-  static const char kDatasetGraphOutputNodeKey[];
-
- private:
-  Status Serialize(OpKernelContext* ctx, string* serialized_graph_def,
-                   string* output_node) const {
-    GraphDefBuilder b;
-    DatasetGraphDefBuilder db(&b);
-    Node* node = nullptr;
-    TF_RETURN_IF_ERROR(AsGraphDefInternal(ctx, &db, &node));
-    *output_node = node->name();
-    GraphDef graph_def;
-    TF_RETURN_IF_ERROR(b.ToGraphDef(&graph_def));
-    graph_def.SerializeToString(serialized_graph_def);
-    return Status::OK();
-  }
-
-  const string op_name_;
-};
-
-// Represents an iterator that is associated with a particular parent dataset.
-template <class DatasetType>
-class DatasetIterator : public IteratorBase {
- public:
-  struct Params {
-    // Owns one reference on the shared dataset resource.
-    const DatasetType* dataset;
-
-    // Identifies the sequence of iterators leading up to this iterator.
-    const string prefix;
-  };
-
-  explicit DatasetIterator(const Params& params) : params_(params) {
-    params_.dataset->Ref();
-  }
-
-  ~DatasetIterator() override { params_.dataset->Unref(); }
-
-  // The dataset from which this iterator was created.
-  const DatasetType* dataset() const { return params_.dataset; }
-
-  // The sequence of iterators leading up to this iterator.
-  const string prefix() const { return params_.prefix; }
-
-  const DataTypeVector& output_dtypes() const override {
-    return params_.dataset->output_dtypes();
-  }
-
-  const std::vector<PartialTensorShape>& output_shapes() const override {
-    return params_.dataset->output_shapes();
-  }
-
-  Status GetNext(IteratorContext* ctx, std::vector<Tensor>* out_tensors,
-                 bool* end_of_sequence) final {
-    port::Tracing::TraceMe activity(params_.prefix);
-    return GetNextInternal(ctx, out_tensors, end_of_sequence);
-  }
-
-  Status Save(OpKernelContext* ctx, IteratorStateWriter* writer) final {
-    TF_RETURN_IF_ERROR(dataset()->Save(ctx, writer));
-    return IteratorBase::Save(ctx, writer);
-  }
-
- protected:
-  // Internal implementation of GetNext that is wrapped in tracing logic.
-  virtual Status GetNextInternal(IteratorContext* ctx,
-                                 std::vector<Tensor>* out_tensors,
-                                 bool* end_of_sequence) = 0;
-
-  string full_name(const string& name) const {
-    return strings::StrCat(prefix(), ":", name);
-  }
-
- private:
-  Params params_;
-};
-
-// Encapsulates the work required to plug a DatasetBase into the core TensorFlow
-// graph execution engine.
-class DatasetOpKernel : public OpKernel {
- public:
-  DatasetOpKernel(OpKernelConstruction* ctx) : OpKernel(ctx) {}
-  void Compute(OpKernelContext* ctx) final;
-
- protected:
-  // Subclasses should implement this method. It will be called during Compute
-  // execution.
-  virtual void MakeDataset(OpKernelContext* ctx, DatasetBase** output) = 0;
-
-  template <typename T>
-  Status ParseScalarArgument(OpKernelContext* ctx,
-                             const StringPiece& argument_name, T* output) {
-    const Tensor* argument_t;
-    TF_RETURN_IF_ERROR(ctx->input(argument_name, &argument_t));
-    if (!TensorShapeUtils::IsScalar(argument_t->shape())) {
-      return errors::InvalidArgument(argument_name, " must be a scalar");
-    }
-    *output = argument_t->scalar<T>()();
-    return Status::OK();
-  }
-};
-
-// Encapsulates the work required to plug unary Datasets into the core
-// TensorFlow graph execution engine.
-class UnaryDatasetOpKernel : public DatasetOpKernel {
- public:
-  UnaryDatasetOpKernel(OpKernelConstruction* ctx) : DatasetOpKernel(ctx) {}
-
- protected:
-  void MakeDataset(OpKernelContext* ctx, DatasetBase** output) final;
-  virtual void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
-                           DatasetBase** output) = 0;
-};
-
-// Encapsulates the work required to plug binary Datasets into the core
-// TensorFlow graph execution engine.
-class BinaryDatasetOpKernel : public DatasetOpKernel {
- public:
-  BinaryDatasetOpKernel(OpKernelConstruction* ctx) : DatasetOpKernel(ctx) {}
-
- protected:
-  void MakeDataset(OpKernelContext* ctx, DatasetBase** output) final;
-  virtual void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
-                           DatasetBase* another_input,
-                           DatasetBase** output) = 0;
-};
-
-// Validates and extracts a `DatasetBase` object from `tensor`.
-//
-// `tensor` must have been written by a call to SetVariantTensorToDataset().
-//
-// The retrieved pointer is a borrowed reference to the dataset, which is owned
-// by the tensor. The consumer must either acquire its own reference to the
-// dataset by calling `(*out_dataset)->Ref()`, or ensure that `tensor` is not
-// destroyed or mutated while the retrieved pointer is in use.
-Status GetDatasetFromVariantTensor(const Tensor& tensor,
-                                   DatasetBase** out_dataset);
-
-// Stores a `DatasetBase` object in `tensor`.
-//
-// The ownership of `dataset` is transferred to `tensor`.
-Status StoreDatasetInVariantTensor(DatasetBase* dataset, Tensor* tensor);
-
-}  // namespace tensorflow
+#include "tensorflow/core/kernels/data/dataset.h"
 
 #endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATASET_H_
diff --git a/tensorflow/core/kernels/decode_compressed_op.cc b/tensorflow/core/kernels/decode_compressed_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..3c3d49e1f8f187c6d6a880c386b2348246117faa
--- /dev/null
+++ b/tensorflow/core/kernels/decode_compressed_op.cc
@@ -0,0 +1,125 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/parse_ops.cc.
+
+#include <algorithm>
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/io/zlib_compression_options.h"
+#include "tensorflow/core/lib/io/zlib_inputstream.h"
+
+namespace tensorflow {
+namespace {
+// Wrap memory buffer into InputStreamInterface
+class MemoryInputStream : public io::InputStreamInterface {
+ public:
+  explicit MemoryInputStream(const char* buffer, size_t length)
+      : buf_(buffer), len_(length), pos_(0) {}
+
+  ~MemoryInputStream() override {}
+
+  Status ReadNBytes(int64 bytes_to_read, string* result) override {
+    result->clear();
+    if (bytes_to_read < 0) {
+      return errors::InvalidArgument("Can't read a negative number of bytes: ",
+                                     bytes_to_read);
+    }
+    int64 bytes = bytes_to_read;
+    Status s = Status::OK();
+    if (pos_ + bytes_to_read > len_) {
+      bytes = len_ - pos_;
+      s = errors::OutOfRange("reached end of file");
+    }
+    if (bytes > 0) {
+      result->resize(bytes);
+      memcpy(&(*result)[0], &buf_[pos_], bytes);
+      pos_ += bytes;
+    }
+    return s;
+  }
+
+  int64 Tell() const override { return pos_; }
+
+  Status Reset() override {
+    pos_ = 0;
+    return Status::OK();
+  }
+
+ private:
+  const char* buf_;  // Not owned.
+  int64 len_;
+  int64 pos_ = 0;  // Tracks where we are in the file.
+};
+}  // namespace
+
+class DecodeCompressedOp : public OpKernel {
+ public:
+  explicit DecodeCompressedOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context,
+                   context->GetAttr("compression_type", &compression_type_));
+    OP_REQUIRES(context,
+                (compression_type_.empty() || compression_type_ == "ZLIB" ||
+                 compression_type_ == "GZIP"),
+                errors::InvalidArgument(
+                    "Only ZLIB, GZIP or NONE are supported compressions"));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    const Tensor* bytes_tensor;
+    OP_REQUIRES_OK(context, context->input("bytes", &bytes_tensor));
+    const auto& bytes_flat = bytes_tensor->flat<string>();
+
+    Tensor* output_tensor = nullptr;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output("output", bytes_tensor->shape(),
+                                            &output_tensor));
+    auto output_flat = output_tensor->flat<string>();
+    if (compression_type_.empty()) {
+      for (int64 i = 0; i < bytes_flat.size(); i++) {
+        output_flat(i) = bytes_flat(i);
+      }
+    } else {
+      const io::ZlibCompressionOptions zlib_options =
+          compression_type_ == "ZLIB" ? io::ZlibCompressionOptions::DEFAULT()
+                                      : io::ZlibCompressionOptions::GZIP();
+      for (int64 i = 0; i < bytes_flat.size(); i++) {
+        std::unique_ptr<MemoryInputStream> input_stream(
+            new MemoryInputStream(bytes_flat(i).data(), bytes_flat(i).size()));
+        std::unique_ptr<io::ZlibInputStream> zlib_stream(
+            new io::ZlibInputStream(
+                input_stream.get(), static_cast<size_t>(kBufferSize),
+                static_cast<size_t>(kBufferSize), zlib_options));
+        string output_string;
+        Status s = zlib_stream->ReadNBytes(INT_MAX, &output_string);
+        OP_REQUIRES(context, (s.ok() || errors::IsOutOfRange(s)), s);
+        output_flat(i) = output_string;
+      }
+    }
+  }
+
+ private:
+  enum { kBufferSize = 256 << 10 /* 256 kB */ };
+  string compression_type_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("DecodeCompressed").Device(DEVICE_CPU),
+                        DecodeCompressedOp)
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/decode_raw_op.cc b/tensorflow/core/kernels/decode_raw_op.cc
index 1c0085cfeab3498acfe388b6727ad4baa6c6c44e..bacacb94ae4384151bc4282960dd810cbf1299a0 100644
--- a/tensorflow/core/kernels/decode_raw_op.cc
+++ b/tensorflow/core/kernels/decode_raw_op.cc
@@ -51,7 +51,7 @@ class DecodeRawOp : public OpKernel {
     }
     TensorShape out_shape = input.shape();
     if (str_size == -1 || str_size == 0) {  // Empty input
-      out_shape.AddDim(1);
+      out_shape.AddDim(0);
       Tensor* output_tensor = nullptr;
       OP_REQUIRES_OK(context, context->allocate_output("output", out_shape,
                                                        &output_tensor));
diff --git a/tensorflow/core/kernels/decode_wav_op_test.cc b/tensorflow/core/kernels/decode_wav_op_test.cc
index fc323a5e04205b81bc64e2335df4b9fcee5db8b7..84dc649dabacd021ca19b277ad5f271ee12b9745 100644
--- a/tensorflow/core/kernels/decode_wav_op_test.cc
+++ b/tensorflow/core/kernels/decode_wav_op_test.cc
@@ -32,8 +32,8 @@ limitations under the License.
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
-
-using namespace ops;  // NOLINT(build/namespaces)
+namespace ops {
+namespace {
 
 TEST(DecodeWavOpTest, DecodeWavTest) {
   Scope root = Scope::NewRootScope();
@@ -121,4 +121,6 @@ TEST(DecodeWavOpTest, DecodeWav_ShapeFn) {
   INFER_ERROR("channels must be non-negative, got -2", op, "[]");
 }
 
+}  // namespace
+}  // namespace ops
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc b/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc
index 208401cb24e9c7ebf28e42ccb2762764474a5377..c9c97dc072c93e3ab840a8a9c9d81eadd2adaa3c 100644
--- a/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc
+++ b/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc
@@ -62,6 +62,8 @@ TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS);
 #define DEFINE_GPU_KERNELS(T) \
   template struct functor::DenseUpdate<GPUDevice, T, ASSIGN>;
 TF_CALL_GPU_ALL_TYPES(DEFINE_GPU_KERNELS);
+TF_CALL_int32(DEFINE_GPU_KERNELS);
+TF_CALL_int64(DEFINE_GPU_KERNELS);
 #undef DEFINE_GPU_KERNELS
 
 }  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/depthwise_conv_op.cc b/tensorflow/core/kernels/depthwise_conv_op.cc
index 2759ecb2f1157b037b700cc5b4662a35b175c08c..a5fd07fbe177f2206ef9b6b3252556211b9e3905 100644
--- a/tensorflow/core/kernels/depthwise_conv_op.cc
+++ b/tensorflow/core/kernels/depthwise_conv_op.cc
@@ -373,8 +373,11 @@ class DepthwiseConv2dNativeOp : public BinaryOp<T> {
     // If in_depth==1, this operation is just a standard convolution, so
     // invoke that op.
     if (std::is_same<T, float>::value && in_depth == 1) {
+      // TODO(yangzihao): Send in arbitrary dilation rates after the dilated
+      // conv is supported.
       launcher_(context, use_cudnn_, cudnn_use_autotune_, input, filter,
-                stride_, stride_, padding_, output, data_format_);
+                /*row_dilation=*/1, /*col_dilation=*/1, stride_, stride_,
+                padding_, output, data_format_);
       return;
     }
 
diff --git a/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc b/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc
index 872921efa581bec64e98623a8fe3d955cd3cf3f7..9bb58b13f382970c60b551f448243a2b75e30df3 100644
--- a/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc
@@ -130,7 +130,7 @@ class BoundedOutputIterator
     // Constructor
     __host__ __device__ __forceinline__
     BoundedReference(int32* ptr, int32* base, IdentityOp op, int32 limit)
-        : Reference(ptr, op), base(base), limit(limit) {}
+        : Reference(ptr, op), limit(limit), base(base) {}
 
     // Assignment
     __host__ __device__ __forceinline__ int32 operator=(int32 val) {
@@ -146,11 +146,11 @@ class BoundedOutputIterator
   __host__ __device__ __forceinline__ BoundedOutputIterator(int32* ptr,
                                                             IdentityOp op,
                                                             int32 size)
-      : TransformOutputIterator(ptr, op), base(ptr), limit(size) {}
+      : TransformOutputIterator(ptr, op), limit(size), base(ptr) {}
 
   __host__ __device__ __forceinline__
   BoundedOutputIterator(int32* ptr, int32* base, IdentityOp op, int32 size)
-      : TransformOutputIterator(ptr, op), base(base), limit(size) {}
+      : TransformOutputIterator(ptr, op), limit(size), base(base) {}
 
   // Indirection
   __host__ __device__ __forceinline__ reference operator*() const {
diff --git a/tensorflow/core/kernels/encode_wav_op_test.cc b/tensorflow/core/kernels/encode_wav_op_test.cc
index 34138ac9a04fc7d233a3ec30383cb0b96c0126e6..b3c61e2c995b01dbea9c1080cb353d6108d87672 100644
--- a/tensorflow/core/kernels/encode_wav_op_test.cc
+++ b/tensorflow/core/kernels/encode_wav_op_test.cc
@@ -31,8 +31,8 @@ limitations under the License.
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
-
-using namespace ops;  // NOLINT(build/namespaces)
+namespace ops {
+namespace {
 
 TEST(EncodeWavOpTest, EncodeWavTest) {
   Scope root = Scope::DisabledShapeInferenceScope();
@@ -77,4 +77,6 @@ TEST(EncodeWavOpTest, EncodeWavTest) {
   EXPECT_EQ(44100, sample_rate);
 }
 
+}  // namespace
+}  // namespace ops
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/example_parsing_ops.cc b/tensorflow/core/kernels/example_parsing_ops.cc
index 2db844e410cea679291aec67748ed15297a0e36a..268a059275acc62432d59df239abd5869f546064 100644
--- a/tensorflow/core/kernels/example_parsing_ops.cc
+++ b/tensorflow/core/kernels/example_parsing_ops.cc
@@ -34,9 +34,9 @@ limitations under the License.
 
 namespace tensorflow {
 
-class ExampleParserOp : public OpKernel {
+class ParseExampleOp : public OpKernel {
  public:
-  explicit ExampleParserOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
+  explicit ParseExampleOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
     OP_REQUIRES_OK(ctx, attrs_.Init(ctx));
   }
 
@@ -162,11 +162,107 @@ class ExampleParserOp : public OpKernel {
   }
 
  protected:
-  ParseSingleExampleAttrs attrs_;
+  ParseExampleAttrs attrs_;
 };
 
 REGISTER_KERNEL_BUILDER(Name("ParseExample").Device(DEVICE_CPU),
-                        ExampleParserOp);
+                        ParseExampleOp);
+
+class ParseSingleExampleOp : public OpKernel {
+ public:
+  explicit ParseSingleExampleOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, attrs_.Init(ctx));
+  }
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor* serialized;
+    OpInputList dense_defaults;
+
+    // Grab the input list arguments.
+    OP_REQUIRES_OK(ctx, ctx->input("serialized", &serialized));
+    OP_REQUIRES_OK(ctx, ctx->input_list("dense_defaults", &dense_defaults));
+
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(serialized->shape()),
+                errors::InvalidArgument(
+                    "Expected serialized to be a scalar, got shape: ",
+                    serialized->shape().DebugString()));
+    OP_REQUIRES(ctx, dense_defaults.size() == attrs_.dense_keys.size(),
+                errors::InvalidArgument(
+                    "Expected len(dense_defaults) == len(dense_keys) but got: ",
+                    dense_defaults.size(), " vs. ", attrs_.dense_keys.size()));
+
+    for (size_t d = 0; d < attrs_.dense_keys.size(); ++d) {
+      const Tensor& def_value = dense_defaults[d];
+      if (attrs_.variable_length[d]) {
+        OP_REQUIRES(ctx, def_value.NumElements() == 1,
+                    errors::InvalidArgument(
+                        "dense_shape[", d, "] is a variable length shape: ",
+                        attrs_.dense_shapes[d].DebugString(),
+                        ", therefore "
+                        "def_value[",
+                        d,
+                        "] must contain a single element ("
+                        "the padding element).  But its shape is: ",
+                        def_value.shape().DebugString()));
+      } else if (def_value.NumElements() > 0) {
+        OP_REQUIRES(ctx,
+                    attrs_.dense_shapes[d].IsCompatibleWith(def_value.shape()),
+                    errors::InvalidArgument(
+                        "def_value[", d,
+                        "].shape() == ", def_value.shape().DebugString(),
+                        " is not compatible with dense_shapes_[", d,
+                        "] == ", attrs_.dense_shapes[d].DebugString()));
+      }
+      OP_REQUIRES(ctx, def_value.dtype() == attrs_.dense_types[d],
+                  errors::InvalidArgument(
+                      "dense_defaults[", d, "].dtype() == ",
+                      DataTypeString(def_value.dtype()), " != dense_types_[", d,
+                      "] == ", DataTypeString(attrs_.dense_types[d])));
+    }
+
+    example::Result result;
+
+    // TODO(mrry): Build the configuration once and cache it.
+    example::FastParseExampleConfig config;
+    for (int d = 0; d < attrs_.dense_keys.size(); ++d) {
+      config.dense.push_back({attrs_.dense_keys[d], attrs_.dense_types[d],
+                              attrs_.dense_shapes[d], dense_defaults[d],
+                              attrs_.variable_length[d],
+                              attrs_.elements_per_stride[d]});
+    }
+    for (int d = 0; d < attrs_.sparse_keys.size(); ++d) {
+      config.sparse.push_back({attrs_.sparse_keys[d], attrs_.sparse_types[d]});
+    }
+
+    const string& serialized_proto = serialized->scalar<string>()();
+
+    OP_REQUIRES_OK(ctx,
+                   FastParseSingleExample(config, serialized_proto, &result));
+
+    OpOutputList dense_values;
+    OpOutputList sparse_indices;
+    OpOutputList sparse_values;
+    OpOutputList sparse_shapes;
+    OP_REQUIRES_OK(ctx, ctx->output_list("dense_values", &dense_values));
+    OP_REQUIRES_OK(ctx, ctx->output_list("sparse_indices", &sparse_indices));
+    OP_REQUIRES_OK(ctx, ctx->output_list("sparse_values", &sparse_values));
+    OP_REQUIRES_OK(ctx, ctx->output_list("sparse_shapes", &sparse_shapes));
+    for (int d = 0; d < attrs_.dense_keys.size(); ++d) {
+      dense_values.set(d, result.dense_values[d]);
+    }
+    for (int d = 0; d < attrs_.sparse_keys.size(); ++d) {
+      sparse_indices.set(d, result.sparse_indices[d]);
+      sparse_values.set(d, result.sparse_values[d]);
+      sparse_shapes.set(d, result.sparse_shapes[d]);
+    }
+  }
+
+ protected:
+  ParseSingleExampleAttrs attrs_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("ParseSingleExample").Device(DEVICE_CPU),
+                        ParseSingleExampleOp);
 
 class SingleSequenceExampleParserOp : public OpKernel {
  public:
diff --git a/tensorflow/core/kernels/example_parsing_ops_test.cc b/tensorflow/core/kernels/example_parsing_ops_test.cc
index 29dbfd3b1bdd07ba362094609c0965ffeb6f7225..5d06eda79e7544951ea7ee10179c8e76dcbb58af 100644
--- a/tensorflow/core/kernels/example_parsing_ops_test.cc
+++ b/tensorflow/core/kernels/example_parsing_ops_test.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <mutex>
 #include <unordered_map>
 
 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
@@ -80,6 +81,26 @@ class FloatFiller {
 
 template <typename T>
 struct ExampleStore {
+ private:
+  static ExampleTensorMap serialized_example;
+  static std::once_flag flags_init;
+
+ public:
+  static ExampleTensorMap& GetSerializedExample() {
+    std::call_once(flags_init, [] {
+      AddExample(&serialized_example, 10, 1, 1);
+      AddExample(&serialized_example, 100, 1, 1);
+      AddExample(&serialized_example, 1000, 1, 1);
+      AddExample(&serialized_example, 10, 128, 1);
+      AddExample(&serialized_example, 100, 128, 1);
+      AddExample(&serialized_example, 1000, 128, 1);
+      AddExample(&serialized_example, 10, 512, 1);
+      AddExample(&serialized_example, 100, 512, 1);
+      AddExample(&serialized_example, 1000, 512, 1);
+      AddExample(&serialized_example, 1, 1, 1000000);
+    });
+    return serialized_example;
+  }
   typedef T Filler;
   static void AddExample(ExampleTensorMap* examples, int num_keys,
                          int batch_size, int feature_size) {
@@ -101,31 +122,15 @@ struct ExampleStore {
     (*examples)[std::make_tuple(batch_size, num_keys, feature_size)] =
         record_string;
   }
-  static ExampleTensorMap GetSerializedExamples() {
-    ExampleTensorMap examples;
-    AddExample(&examples, 10, 128, 1);
-    AddExample(&examples, 100, 128, 1);
-    AddExample(&examples, 1000, 128, 1);
-    AddExample(&examples, 10, 512, 1);
-    AddExample(&examples, 100, 512, 1);
-    AddExample(&examples, 1000, 512, 1);
-    AddExample(&examples, 1, 1, 1000000);
-    return examples;
-  }
-  static ExampleTensorMap serialized_example;
 };
+template <typename T>
+ExampleTensorMap ExampleStore<T>::serialized_example;
+template <typename T>
+std::once_flag ExampleStore<T>::flags_init;
 
-template <>
-ExampleTensorMap ExampleStore<BytesFiller>::serialized_example =
-    ExampleStore<BytesFiller>::GetSerializedExamples();
-
-template <>
-ExampleTensorMap ExampleStore<Int64Filler>::serialized_example =
-    ExampleStore<Int64Filler>::GetSerializedExamples();
-
-template <>
-ExampleTensorMap ExampleStore<FloatFiller>::serialized_example =
-    ExampleStore<FloatFiller>::GetSerializedExamples();
+template class ExampleStore<BytesFiller>;
+template class ExampleStore<Int64Filler>;
+template class ExampleStore<FloatFiller>;
 
 enum BenchmarkType { kDense, kSparse, kVarLenDense };
 
@@ -139,7 +144,7 @@ struct BenchmarkOptions {
 template <typename Options>
 static Graph* ParseExample(int batch_size, int num_keys, int feature_size) {
   Graph* g = new Graph(OpRegistry::Global());
-  Tensor& serialized = Options::Store::serialized_example[std::make_tuple(
+  Tensor& serialized = Options::Store::GetSerializedExample()[std::make_tuple(
       batch_size, num_keys, feature_size)];
   Tensor names(DT_STRING, TensorShape({batch_size}));
 
@@ -186,6 +191,56 @@ static Graph* ParseExample(int batch_size, int num_keys, int feature_size) {
   return g;
 }
 
+template <typename Options>
+static Graph* ParseSingleExample(int num_keys, int feature_size) {
+  Graph* g = new Graph(OpRegistry::Global());
+  Tensor& serialized_batch_1 =
+      Options::Store::GetSerializedExample()[std::make_tuple(1, num_keys,
+                                                             feature_size)];
+  Tensor serialized(DT_STRING, TensorShape());
+  serialized.scalar<string>()() = serialized_batch_1.vec<string>()(0);
+
+  std::vector<string> sparse_keys;
+  std::vector<string> dense_keys;
+  std::vector<NodeBuilder::NodeOut> dense_defaults;
+  std::vector<DataType> sparse_types;
+  std::vector<PartialTensorShape> dense_shapes;
+  Options opt;
+  for (int i = 0; i < num_keys; ++i) {
+    string key = strings::Printf("feature_%d", i);
+    switch (opt.benchmark_type) {
+      case kDense:
+        dense_keys.push_back(key),
+            dense_defaults.emplace_back(test::graph::Constant(
+                g, opt.filler.make_dense_default(feature_size)));
+        dense_shapes.push_back(PartialTensorShape({feature_size}));
+        break;
+      case kVarLenDense:
+        dense_keys.push_back(key),
+            dense_defaults.emplace_back(
+                test::graph::Constant(g, opt.filler.make_dense_default(1)));
+        dense_shapes.push_back(PartialTensorShape({-1}));
+        break;
+      case kSparse:
+        sparse_keys.push_back(key), sparse_types.push_back(opt.filler.dtype);
+        break;
+    }
+  }
+
+  Node* ret;
+  TF_EXPECT_OK(NodeBuilder(g->NewName("n"), "ParseSingleExample")
+                   .Input(test::graph::Constant(g, serialized))
+                   .Input(dense_defaults)
+                   .Attr<int64>("num_sparse", sparse_keys.size())
+                   .Attr("sparse_keys", sparse_keys)
+                   .Attr("sparse_types", sparse_types)
+                   .Attr("dense_keys", dense_keys)
+                   .Attr("dense_shapes", dense_shapes)
+                   .Finalize(g, &ret));
+
+  return g;
+}
+
 // Benchmark settings (Sparse, Dense) X (Bytes, Int64, Float)
 typedef BenchmarkOptions<ExampleStore<BytesFiller>, kSparse> SparseString;
 typedef BenchmarkOptions<ExampleStore<BytesFiller>, kDense> DenseString;
@@ -212,10 +267,13 @@ typedef BenchmarkOptions<ExampleStore<FloatFiller>, kVarLenDense>
   BENCHMARK(BM_ParseExample##_##TYPE##_##B##_##K##_##F);
 
 #define BM_AllParseExample(Type)       \
+  BM_ParseExample(Type, 1, 10, 1);     \
   BM_ParseExample(Type, 128, 10, 1);   \
   BM_ParseExample(Type, 512, 10, 1);   \
+  BM_ParseExample(Type, 1, 100, 1);    \
   BM_ParseExample(Type, 128, 100, 1);  \
   BM_ParseExample(Type, 512, 100, 1);  \
+  BM_ParseExample(Type, 1, 1000, 1);   \
   BM_ParseExample(Type, 128, 1000, 1); \
   BM_ParseExample(Type, 512, 1000, 1); \
   BM_ParseExample(Type, 1, 1, 1000000);
@@ -230,4 +288,31 @@ BM_AllParseExample(SparseFloat);
 BM_AllParseExample(DenseFloat);
 BM_AllParseExample(VarLenDenseFloat);
 
+// K == num_keys. F == feature_size.
+// K must be one of 10, 100, 1000
+#define BM_ParseSingleExample(TYPE, K, F)                                \
+  static void BM_ParseSingleExample##_##TYPE##_1_##K##_##F(int iters) {  \
+    int64 items_per_iter = K * F;                                        \
+    testing::UseRealTime();                                              \
+    testing::ItemsProcessed(static_cast<int64>(iters) * items_per_iter); \
+    test::Benchmark("cpu", ParseSingleExample<TYPE>(K, F)).Run(iters);   \
+  }                                                                      \
+  BENCHMARK(BM_ParseSingleExample##_##TYPE##_1_##K##_##F);
+
+#define BM_AllParseSingleExample(Type)  \
+  BM_ParseSingleExample(Type, 10, 1);   \
+  BM_ParseSingleExample(Type, 100, 1);  \
+  BM_ParseSingleExample(Type, 1000, 1); \
+  BM_ParseSingleExample(Type, 1, 1000000);
+
+BM_AllParseSingleExample(SparseString);
+BM_AllParseSingleExample(DenseString);
+BM_AllParseSingleExample(VarLenDenseString);
+BM_AllParseSingleExample(SparseInt64);
+BM_AllParseSingleExample(DenseInt64);
+BM_AllParseSingleExample(VarLenDenseInt64);
+BM_AllParseSingleExample(SparseFloat);
+BM_AllParseSingleExample(DenseFloat);
+BM_AllParseSingleExample(VarLenDenseFloat);
+
 }  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/fifo_queue.cc b/tensorflow/core/kernels/fifo_queue.cc
index 9fd82e2168383917b9002d43a894a2da064c2a34..82ec87911985abe714490ad74fa19105f850b536 100644
--- a/tensorflow/core/kernels/fifo_queue.cc
+++ b/tensorflow/core/kernels/fifo_queue.cc
@@ -95,7 +95,7 @@ Status FIFOQueue::GetElementComponentFromBatch(const FIFOQueue::Tuple& tuple,
   TF_RETURN_IF_ERROR(ctx->allocate_persistent(
       tuple[component].dtype(), element_shape, out_tensor, &element_access));
   TF_RETURN_IF_ERROR(
-      CopySliceToElement(tuple[component], element_access, index));
+      batch_util::CopySliceToElement(tuple[component], element_access, index));
   return Status::OK();
 }
 
diff --git a/tensorflow/core/kernels/fill_functor.cc b/tensorflow/core/kernels/fill_functor.cc
index ea0cc139f3da4fb7a5fc7d092cff6c5b0be792d5..bde39770dee0a3e66746bb47562f799ab8bb1224 100644
--- a/tensorflow/core/kernels/fill_functor.cc
+++ b/tensorflow/core/kernels/fill_functor.cc
@@ -19,6 +19,7 @@ limitations under the License.
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/framework/variant_encode_decode.h"
 
@@ -41,6 +42,7 @@ void SetZeroFunctor<Eigen::ThreadPoolDevice, string>::operator()(
   template struct SetZeroFunctor<Eigen::ThreadPoolDevice, T>;
 DEFINE_SETZERO_CPU(bool);
 DEFINE_SETZERO_CPU(Eigen::half);
+DEFINE_SETZERO_CPU(bfloat16);
 DEFINE_SETZERO_CPU(float);
 DEFINE_SETZERO_CPU(double);
 DEFINE_SETZERO_CPU(uint8);
@@ -74,6 +76,7 @@ DEFINE_SETZERO_SYCL(int32);
 DEFINE_SETZERO_SYCL(int64);
 #undef DEFINE_SETZERO_SYCL
 #endif  // TENSORFLOW_USE_SYCL
+
 template <typename T>
 void SetOneFunctor<Eigen::ThreadPoolDevice, T>::operator()(
     const Eigen::ThreadPoolDevice& d, typename TTypes<T>::Flat out) {
@@ -85,6 +88,7 @@ void SetOneFunctor<Eigen::ThreadPoolDevice, T>::operator()(
   template struct SetOneFunctor<Eigen::ThreadPoolDevice, T>;
 DEFINE_SETONE_CPU(bool);
 DEFINE_SETONE_CPU(Eigen::half);
+DEFINE_SETONE_CPU(bfloat16);
 DEFINE_SETONE_CPU(float);
 DEFINE_SETONE_CPU(double);
 DEFINE_SETONE_CPU(uint8);
@@ -112,5 +116,47 @@ DEFINE_SETONE_SYCL(double);
 #undef DEFINE_SETONE_SYCL
 #endif  // TENSORFLOW_USE_SYCL
 
+template <typename T>
+struct FillFunctor<Eigen::ThreadPoolDevice, T> {
+  void operator()(const Eigen::ThreadPoolDevice& d, typename TTypes<T>::Flat out,
+                  typename TTypes<T>::ConstScalar in) {
+    out.device(d) = out.constant(in());
+  }
+};
+
+// Explicit instantiations.
+#define DEFINE_FILL_CPU(T) \
+  template struct FillFunctor<Eigen::ThreadPoolDevice, T>;
+
+TF_CALL_ALL_TYPES(DEFINE_FILL_CPU);
+DEFINE_FILL_CPU(quint8);
+DEFINE_FILL_CPU(quint16);
+#undef DEFINE_FILL_CPU
+
+#ifdef TENSORFLOW_USE_SYCL
+template <typename T>
+struct FillFunctor<Eigen::SyclDevice, T> {
+  void operator()(const Eigen::SyclDevice& d, typename TTypes<T>::Flat out,
+                  typename TTypes<T>::ConstScalar in) {
+#if !defined(EIGEN_HAS_INDEX_LIST)
+    Eigen::array<int, 1> rank1{1};
+#else
+    Eigen::IndexList<Eigen::type2index<1> > rank1;
+#endif
+    const int size = out.dimension(0);
+    Eigen::array<int, 1> broadcast_dims{size};
+
+    To32Bit(out).device(d) = in.reshape(rank1).broadcast(broadcast_dims);
+  }
+};
+
+#define DEFINE_FILL_SYCL(T) \
+  template struct FillFunctor<Eigen::SyclDevice, T>;
+DEFINE_FILL_SYCL(float);
+DEFINE_FILL_SYCL(double);
+TF_CALL_INTEGRAL_TYPES(DEFINE_FILL_SYCL)
+#undef DEFINE_FILL_SYCL
+#endif  // TENSORFLOW_USE_SYCL
+
 }  // namespace functor
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/fill_functor.cu.cc b/tensorflow/core/kernels/fill_functor.cu.cc
new file mode 100644
index 0000000000000000000000000000000000000000..3487606778eabde386335f8450d627b7bf74ad42
--- /dev/null
+++ b/tensorflow/core/kernels/fill_functor.cu.cc
@@ -0,0 +1,112 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/kernels/fill_functor.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace Eigen {
+namespace internal {
+
+template <typename T>
+struct scalar_const_op {
+  typedef typename packet_traits<T>::type Packet;
+
+  const T* val;
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  scalar_const_op(const scalar_const_op& x)
+      : val(x.val) {}
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_const_op(const T* v) : val(v) {}
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()() const {
+    return *val;
+  }
+
+  template <typename PacketType = Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetOp() const {
+    return internal::pset1<PacketType>(*val);
+  }
+};
+
+template <typename T>
+struct functor_traits<scalar_const_op<T> > {
+  enum {
+    Cost = 1,
+    PacketAccess = packet_traits<T>::Vectorizable,
+    IsRepeatable = true
+  };
+};
+
+}  // end namespace internal
+}  // end namespace Eigen
+
+namespace tensorflow {
+
+namespace functor {
+
+typedef Eigen::GpuDevice GPUDevice;
+
+// Partial specialization FillFunctor<Device=GPUDevice, T>
+template <typename T>
+struct FillFunctor<GPUDevice, T> {
+  void operator()(const GPUDevice& d, typename TTypes<T>::Flat out,
+                  typename TTypes<T>::ConstScalar in) {
+    Eigen::internal::scalar_const_op<T> f(in.data());
+    To32Bit(out).device(d) = To32Bit(out).nullaryExpr(f);
+  }
+};
+
+#define DEFINE_FILL_GPU(T) template struct FillFunctor<GPUDevice, T>;
+TF_CALL_REAL_NUMBER_TYPES(DEFINE_FILL_GPU);
+TF_CALL_bool(DEFINE_FILL_GPU);
+#undef DEFINE_FILL_GPU
+
+// Partial specialization of FillFunctor<Device=GPUDevice, T>.
+template <typename T>
+struct SetZeroFunctor<GPUDevice, T> {
+  void operator()(const GPUDevice& d, typename TTypes<T>::Flat out) {
+    To32Bit(out).device(d) = To32Bit(out).constant(T(0));
+  }
+};
+
+#define DEFINE_SETZERO_GPU(T) template struct SetZeroFunctor<GPUDevice, T>;
+TF_CALL_NUMBER_TYPES(DEFINE_SETZERO_GPU);
+TF_CALL_bool(DEFINE_SETZERO_GPU);
+#undef DEFINE_SETZERO_GPU
+
+// Partial specialization of FillFunctor<Device=GPUDevice, T>.
+template <typename T>
+struct SetOneFunctor<GPUDevice, T> {
+  void operator()(const GPUDevice& d, typename TTypes<T>::Flat out) {
+    To32Bit(out).device(d) = To32Bit(out).constant(T(1));
+  }
+};
+
+#define DEFINE_SETONE_GPU(T) template struct SetOneFunctor<GPUDevice, T>;
+TF_CALL_NUMBER_TYPES(DEFINE_SETONE_GPU);
+TF_CALL_bool(DEFINE_SETONE_GPU);
+#undef DEFINE_SETONE_GPU
+
+}  // end namespace functor
+}  // end namespace tensorflow
+
+#endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/fractional_avg_pool_op.cc b/tensorflow/core/kernels/fractional_avg_pool_op.cc
index bfdb7b4a1e4cc9af9745896c5ff1341f00efdffe..47f4189c30f10644ca7b040677ebadf439a9dc75 100644
--- a/tensorflow/core/kernels/fractional_avg_pool_op.cc
+++ b/tensorflow/core/kernels/fractional_avg_pool_op.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/util/guarded_philox_random.h"
@@ -47,9 +48,20 @@ class FractionalAvgPoolOp : public OpKernel {
         errors::Unimplemented("Fractional average pooling is not yet "
                               "supported on the batch nor channel dimension."));
     OP_REQUIRES_OK(context, context->GetAttr("deterministic", &deterministic_));
-    pooling_region_generated_ = false;
-    // Initialize philox random generator.
-    OP_REQUIRES_OK(context, generator_.Init(context));
+    OP_REQUIRES_OK(context, context->GetAttr("seed", &seed_));
+    OP_REQUIRES_OK(context, context->GetAttr("seed2", &seed2_));
+    if (deterministic_) {
+      // If both seeds are not set when deterministic_ is true, force set seeds.
+      if ((seed_ == 0) && (seed2_ == 0)) {
+        seed_ = random::New64();
+        seed2_ = random::New64();
+      }
+    } else {
+      OP_REQUIRES(
+          context, (seed_ == 0) && (seed2_ == 0),
+          errors::InvalidArgument(
+              "Both seed and seed2 should be 0 if deterministic is false."));
+    }
   }
 
   void Compute(OpKernelContext* context) override {
@@ -64,47 +76,35 @@ class FractionalAvgPoolOp : public OpKernel {
     OP_REQUIRES(context, tensor_in.dims() == tensor_in_and_out_dims,
                 errors::InvalidArgument("tensor_in must be 4-dimensional"));
 
+    std::vector<int> input_size(tensor_in_and_out_dims);
+    std::vector<int> output_size(tensor_in_and_out_dims);
     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
-      input_size_.push_back(tensor_in.dim_size(i));
+      input_size[i] = tensor_in.dim_size(i);
     }
     // Output size.
     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
-      output_size_.push_back(
-          static_cast<int>(floor(input_size_[i] / pooling_ratio_[i])));
-      DCHECK_GT(output_size_[i], 0);
+      output_size[i] =
+          static_cast<int>(floor(input_size[i] / pooling_ratio_[i]));
+      DCHECK_GT(output_size[i], 0);
     }
 
     // Generate pooling sequence.
     std::vector<int64> row_cum_seq;
     std::vector<int64> col_cum_seq;
-    if (deterministic_) {
-      if (pooling_region_generated_) {
-        row_cum_seq = row_cum_seq_;
-        col_cum_seq = col_cum_seq_;
-      } else {
-        row_cum_seq = GeneratePoolingSequence(input_size_[1], output_size_[1],
-                                              &generator_, pseudo_random_);
-        col_cum_seq = GeneratePoolingSequence(input_size_[2], output_size_[2],
-                                              &generator_, pseudo_random_);
-        mutex_lock lock(mu_);
-        row_cum_seq_ = row_cum_seq;
-        col_cum_seq_ = col_cum_seq;
-        pooling_region_generated_ = true;
-      }
-    } else {
-      row_cum_seq = GeneratePoolingSequence(input_size_[1], output_size_[1],
-                                            &generator_, pseudo_random_);
-      col_cum_seq = GeneratePoolingSequence(input_size_[2], output_size_[2],
-                                            &generator_, pseudo_random_);
-    }
+    GuardedPhiloxRandom generator;
+    generator.Init(seed_, seed2_);
+    row_cum_seq = GeneratePoolingSequence(input_size[1], output_size[1],
+                                          &generator, pseudo_random_);
+    col_cum_seq = GeneratePoolingSequence(input_size[2], output_size[2],
+                                          &generator, pseudo_random_);
 
     // Prepare output.
     Tensor* output_tensor = nullptr;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(
-                       0, TensorShape({output_size_[0], output_size_[1],
-                                       output_size_[2], output_size_[3]}),
-                       &output_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(
+                                0,
+                                TensorShape({output_size[0], output_size[1],
+                                             output_size[2], output_size[3]}),
+                                &output_tensor));
     Tensor* output_row_seq_tensor = nullptr;
     OP_REQUIRES_OK(context,
                    context->allocate_output(
@@ -116,12 +116,11 @@ class FractionalAvgPoolOp : public OpKernel {
                        2, TensorShape({static_cast<int64>(col_cum_seq.size())}),
                        &output_col_seq_tensor));
 
-    ConstEigenMatrixMap in_mat(
-        tensor_in.flat<T>().data(), input_size_[3],
-        input_size_[2] * input_size_[1] * input_size_[0]);
+    ConstEigenMatrixMap in_mat(tensor_in.flat<T>().data(), input_size[3],
+                               input_size[2] * input_size[1] * input_size[0]);
 
-    EigenMatrixMap out_mat(output_tensor->flat<T>().data(), output_size_[3],
-                           output_size_[2] * output_size_[1] * output_size_[0]);
+    EigenMatrixMap out_mat(output_tensor->flat<T>().data(), output_size[3],
+                           output_size[2] * output_size[1] * output_size[0]);
     // out_count corresponds to number of elements in each pooling cell.
     Eigen::Matrix<T, Eigen::Dynamic, 1> out_count(out_mat.cols());
 
@@ -146,9 +145,9 @@ class FractionalAvgPoolOp : public OpKernel {
     // 1: row / row
     // 2: col / col
     // 3: depth / channel
-    const int64 row_max = input_size_[1] - 1;
-    const int64 col_max = input_size_[2] - 1;
-    for (int64 b = 0; b < input_size_[0]; ++b) {
+    const int64 row_max = input_size[1] - 1;
+    const int64 col_max = input_size[2] - 1;
+    for (int64 b = 0; b < input_size[0]; ++b) {
       // row sequence.
       for (int64 hs = 0; hs < row_cum_seq.size() - 1; ++hs) {
         // row start and end.
@@ -160,7 +159,7 @@ class FractionalAvgPoolOp : public OpKernel {
         // col sequence.
         for (int64 ws = 0; ws < col_cum_seq.size() - 1; ++ws) {
           const int64 out_offset =
-              (b * output_size_[1] + hs) * output_size_[2] + ws;
+              (b * output_size[1] + hs) * output_size[2] + ws;
           // col start and end.
           const int64 col_start = col_cum_seq[ws];
           int64 col_end =
@@ -169,7 +168,7 @@ class FractionalAvgPoolOp : public OpKernel {
           for (int64 h = row_start; h <= row_end; ++h) {
             for (int64 w = col_start; w <= col_end; ++w) {
               const int64 in_offset =
-                  (b * input_size_[1] + h) * input_size_[2] + w;
+                  (b * input_size[1] + h) * input_size[2] + w;
               out_mat.col(out_offset) += in_mat.col(in_offset);
               out_count(out_offset)++;
             }
@@ -183,18 +182,11 @@ class FractionalAvgPoolOp : public OpKernel {
 
  private:
   bool deterministic_;
-  // meaningful only when deterministic_ is true.
-  mutex mu_;
-  std::vector<int64> row_cum_seq_;
-  std::vector<int64> col_cum_seq_;
-  bool pooling_region_generated_;
-
-  std::vector<int32> input_size_;
-  std::vector<int32> output_size_;
+  int64 seed_;
+  int64 seed2_;
   std::vector<float> pooling_ratio_;
   bool pseudo_random_;
   bool overlapping_;
-  GuardedPhiloxRandom generator_;
 };
 
 #define REGISTER_FRACTIONALAVGPOOL(type)                                      \
diff --git a/tensorflow/core/kernels/fractional_max_pool_op.cc b/tensorflow/core/kernels/fractional_max_pool_op.cc
index 33d73c84776341cf08243d828ee372456554e2cf..cf580adab256bf055f206f44a5996c1e5487540a 100644
--- a/tensorflow/core/kernels/fractional_max_pool_op.cc
+++ b/tensorflow/core/kernels/fractional_max_pool_op.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/util/guarded_philox_random.h"
@@ -50,9 +51,20 @@ class FractionalMaxPoolOp : public OpKernel {
                               "supported on the batch nor channel dimension."));
 
     OP_REQUIRES_OK(context, context->GetAttr("deterministic", &deterministic_));
-    pooling_region_generated_ = false;
-    // Initialize philox random generator.
-    OP_REQUIRES_OK(context, generator_.Init(context));
+    OP_REQUIRES_OK(context, context->GetAttr("seed", &seed_));
+    OP_REQUIRES_OK(context, context->GetAttr("seed2", &seed2_));
+    if (deterministic_) {
+      // If both seeds are not set when deterministic_ is true, force set seeds.
+      if ((seed_ == 0) && (seed2_ == 0)) {
+        seed_ = random::New64();
+        seed2_ = random::New64();
+      }
+    } else {
+      OP_REQUIRES(
+          context, (seed_ == 0) && (seed2_ == 0),
+          errors::InvalidArgument(
+              "Both seed and seed2 should be 0 if deterministic is false."));
+    }
   }
 
   void Compute(OpKernelContext* context) override {
@@ -67,49 +79,37 @@ class FractionalMaxPoolOp : public OpKernel {
     OP_REQUIRES(context, tensor_in.dims() == tensor_in_and_out_dims,
                 errors::InvalidArgument("tensor_in must be 4-dimensional"));
 
+    std::vector<int> input_size(tensor_in_and_out_dims);
+    std::vector<int> output_size(tensor_in_and_out_dims);
     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
-      input_size_.push_back(tensor_in.dim_size(i));
+      input_size[i] = tensor_in.dim_size(i);
     }
     // Output size.
     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
       // This must match the same logic in the shape function in
       // core/ops/nn_ops.cc.
-      output_size_.push_back(
-          static_cast<int>(floor(input_size_[i] / pooling_ratio_[i])));
-      DCHECK_GT(output_size_[i], 0);
+      output_size[i] =
+          static_cast<int>(floor(input_size[i] / pooling_ratio_[i]));
+      DCHECK_GT(output_size[i], 0);
     }
 
     // Generate pooling sequence.
     std::vector<int64> height_cum_seq;
     std::vector<int64> width_cum_seq;
-    if (deterministic_) {
-      if (pooling_region_generated_) {
-        height_cum_seq = height_cum_seq_;
-        width_cum_seq = width_cum_seq_;
-      } else {
-        height_cum_seq = GeneratePoolingSequence(
-            input_size_[1], output_size_[1], &generator_, pseudo_random_);
-        width_cum_seq = GeneratePoolingSequence(input_size_[2], output_size_[2],
-                                                &generator_, pseudo_random_);
-        mutex_lock lock(mu_);
-        height_cum_seq_ = height_cum_seq;
-        width_cum_seq_ = width_cum_seq;
-        pooling_region_generated_ = true;
-      }
-    } else {
-      height_cum_seq = GeneratePoolingSequence(input_size_[1], output_size_[1],
-                                               &generator_, pseudo_random_);
-      width_cum_seq = GeneratePoolingSequence(input_size_[2], output_size_[2],
-                                              &generator_, pseudo_random_);
-    }
+    GuardedPhiloxRandom generator;
+    generator.Init(seed_, seed2_);
+    height_cum_seq = GeneratePoolingSequence(input_size[1], output_size[1],
+                                             &generator, pseudo_random_);
+    width_cum_seq = GeneratePoolingSequence(input_size[2], output_size[2],
+                                            &generator, pseudo_random_);
 
     // Prepare output.
     Tensor* output_tensor = nullptr;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(
-                       0, TensorShape({output_size_[0], output_size_[1],
-                                       output_size_[2], output_size_[3]}),
-                       &output_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(
+                                0,
+                                TensorShape({output_size[0], output_size[1],
+                                             output_size[2], output_size[3]}),
+                                &output_tensor));
     Tensor* output_height_seq_tensor = nullptr;
     OP_REQUIRES_OK(
         context,
@@ -122,12 +122,11 @@ class FractionalMaxPoolOp : public OpKernel {
                      2, TensorShape({static_cast<int64>(width_cum_seq.size())}),
                      &output_width_seq_tensor));
 
-    ConstEigenMatrixMap in_mat(
-        tensor_in.flat<T>().data(), input_size_[3],
-        input_size_[2] * input_size_[1] * input_size_[0]);
+    ConstEigenMatrixMap in_mat(tensor_in.flat<T>().data(), input_size[3],
+                               input_size[2] * input_size[1] * input_size[0]);
 
-    EigenMatrixMap out_mat(output_tensor->flat<T>().data(), output_size_[3],
-                           output_size_[2] * output_size_[1] * output_size_[0]);
+    EigenMatrixMap out_mat(output_tensor->flat<T>().data(), output_size[3],
+                           output_size[2] * output_size[1] * output_size[0]);
 
     // Initializes the output tensor with MIN<T>.
     output_tensor->flat<T>().setConstant(Eigen::NumTraits<T>::lowest());
@@ -149,9 +148,9 @@ class FractionalMaxPoolOp : public OpKernel {
     // 1: height / row
     // 2: width / col
     // 3: depth / channel
-    const int64 height_max = input_size_[1] - 1;
-    const int64 width_max = input_size_[2] - 1;
-    for (int64 b = 0; b < input_size_[0]; ++b) {
+    const int64 height_max = input_size[1] - 1;
+    const int64 width_max = input_size[2] - 1;
+    for (int64 b = 0; b < input_size[0]; ++b) {
       // height sequence.
       for (int64 hs = 0; hs < height_cum_seq.size() - 1; ++hs) {
         // height start and end.
@@ -163,7 +162,7 @@ class FractionalMaxPoolOp : public OpKernel {
         // width sequence.
         for (int64 ws = 0; ws < width_cum_seq.size() - 1; ++ws) {
           const int64 out_offset =
-              (b * output_size_[1] + hs) * output_size_[2] + ws;
+              (b * output_size[1] + hs) * output_size[2] + ws;
           // width start and end.
           const int64 width_start = width_cum_seq[ws];
           int64 width_end =
@@ -172,7 +171,7 @@ class FractionalMaxPoolOp : public OpKernel {
           for (int64 h = height_start; h <= height_end; ++h) {
             for (int64 w = width_start; w <= width_end; ++w) {
               const int64 in_offset =
-                  (b * input_size_[1] + h) * input_size_[2] + w;
+                  (b * input_size[1] + h) * input_size[2] + w;
               out_mat.col(out_offset) =
                   out_mat.col(out_offset).cwiseMax(in_mat.col(in_offset));
             }
@@ -184,18 +183,11 @@ class FractionalMaxPoolOp : public OpKernel {
 
  private:
   bool deterministic_;
-  // meaningful only when deterministic_ is true.
-  mutex mu_;
-  std::vector<int64> height_cum_seq_;
-  std::vector<int64> width_cum_seq_;
-  bool pooling_region_generated_;
-
-  std::vector<int32> input_size_;
-  std::vector<int32> output_size_;
+  int64 seed_;
+  int64 seed2_;
   std::vector<float> pooling_ratio_;
   bool pseudo_random_;
   bool overlapping_;
-  GuardedPhiloxRandom generator_;
 };
 
 #define REGISTER_FRACTIONALMAXPOOL(type)                                      \
@@ -243,15 +235,13 @@ class FractionalMaxPoolGradOp : public OpKernel {
 
     // Just to make it similar to FractionalMaxPoolOp.
     constexpr int tensor_in_and_out_dims = 4;
-    std::vector<int64> input_size;
-    std::vector<int64> output_size;
-    input_size.reserve(tensor_in_and_out_dims);
+    std::vector<int64> input_size(tensor_in_and_out_dims);
+    std::vector<int64> output_size(tensor_in_and_out_dims);
     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
-      input_size.push_back(tensor_in.dim_size(i));
+      input_size[i] = tensor_in.dim_size(i);
     }
-    output_size.reserve(tensor_in_and_out_dims);
     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
-      output_size.push_back(tensor_out.dim_size(i));
+      output_size[i] = tensor_out.dim_size(i);
     }
 
     // ---------
diff --git a/tensorflow/core/kernels/function_ops.cc b/tensorflow/core/kernels/function_ops.cc
index f2290e87a5fdac44629ed6b81c8661cf74c2054e..ef9e8484132d25e517367862364518ca0baf38af 100644
--- a/tensorflow/core/kernels/function_ops.cc
+++ b/tensorflow/core/kernels/function_ops.cc
@@ -53,6 +53,8 @@ class ArgOp : public OpKernel {
     ctx->set_output(0, val);
   }
 
+  bool IsExpensive() override { return false; }
+
  private:
   int index_;
   DataType dtype_;
@@ -78,6 +80,8 @@ class RetvalOp : public OpKernel {
     OP_REQUIRES_OK(ctx, frame->SetRetval(index_, val));
   }
 
+  bool IsExpensive() override { return false; }
+
  private:
   int index_;
   DataType dtype_;
@@ -292,21 +296,21 @@ class RemoteCallOp : public AsyncOpKernel {
   void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override {
     const Tensor* target;
     OP_REQUIRES_OK_ASYNC(ctx, ctx->input("target", &target), done);
-    AttrValueMap attr_values = func_.attr();
-    AttrValue v;
     const string& target_device =
         DeviceNameUtils::CanonicalizeDeviceName(target->scalar<string>()());
-    v.set_s(target_device);
-    AddAttr("_target", v, &attr_values);
 
     FunctionLibraryRuntime* lib = ctx->function_library();
     OP_REQUIRES_ASYNC(ctx, lib != nullptr,
                       errors::Internal("No function library is provided."),
                       done);
+    AttrValueMap attr_values = func_.attr();
+    FunctionLibraryRuntime::InstantiateOptions instantiate_opts;
+    instantiate_opts.target = target_device;
     FunctionLibraryRuntime::Handle handle;
-    OP_REQUIRES_OK_ASYNC(
-        ctx, lib->Instantiate(func_.name(), AttrSlice(&attr_values), &handle),
-        done);
+    OP_REQUIRES_OK_ASYNC(ctx,
+                         lib->Instantiate(func_.name(), AttrSlice(&attr_values),
+                                          instantiate_opts, &handle),
+                         done);
 
     OpInputList arguments;
     OP_REQUIRES_OK_ASYNC(ctx, ctx->input_list("args", &arguments), done);
@@ -318,7 +322,7 @@ class RemoteCallOp : public AsyncOpKernel {
     if (opts.source_device != target_device) {
       opts.remote_execution = true;
     }
-    opts.rendezvous = ctx->rendezvous();
+    opts.create_rendezvous = true;
     std::vector<Tensor> args;
     args.reserve(arguments.size());
     for (const Tensor& argument : arguments) {
diff --git a/tensorflow/core/kernels/fused_batch_norm_op.cc b/tensorflow/core/kernels/fused_batch_norm_op.cc
index 1688674eb784369ae8fbb2622695561cb5bebcae..9b4dca851138235d7b4a95906f3c8a0e5d592aa7 100644
--- a/tensorflow/core/kernels/fused_batch_norm_op.cc
+++ b/tensorflow/core/kernels/fused_batch_norm_op.cc
@@ -27,6 +27,7 @@ limitations under the License.
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/kernels/fill_functor.h"
 #include "tensorflow/core/kernels/fused_batch_norm_op.h"
 #include "tensorflow/core/util/tensor_format.h"
 
@@ -239,6 +240,14 @@ struct FusedBatchNorm<GPUDevice, T, U> {
             << " offset shape: " << offset.shape().DebugString()
             << " tensor format: " << tensor_format;
 
+    // If input is empty, return NaN mean/variance
+    if (x.shape().num_elements() == 0) {
+      functor::SetNanFunctor<U> f;
+      f(context->eigen_device<GPUDevice>(), batch_mean->flat<U>());
+      f(context->eigen_device<GPUDevice>(), batch_var->flat<U>());
+      return;
+    }
+
     Tensor x_maybe_transformed = x;
     Tensor x_transformed;
     Tensor y_transformed;
@@ -623,14 +632,26 @@ class FusedBatchNormGradOp : public OpKernel {
     Tensor* offset_backprop = nullptr;
     OP_REQUIRES_OK(context, context->allocate_output(2, scale_offset_shape,
                                                      &offset_backprop));
-    // two placeholders for estimated_mean and estimated_variance, which are
+    // Two placeholders for estimated_mean and estimated_variance, which are
     // used for inference and thus not needed here for gradient computation.
+    // They are filled with zeros so as to avoid NaN outputs.
     Tensor* placeholder_1 = nullptr;
     OP_REQUIRES_OK(
         context, context->allocate_output(3, TensorShape({}), &placeholder_1));
+    functor::SetZeroFunctor<Device, float> f;
+    f(context->eigen_device<Device>(), placeholder_1->flat<U>());
     Tensor* placeholder_2 = nullptr;
     OP_REQUIRES_OK(
         context, context->allocate_output(4, TensorShape({}), &placeholder_2));
+    f(context->eigen_device<Device>(), placeholder_2->flat<U>());
+
+    // If input is empty, set gradients w.r.t scale/offset to zero.
+    if (x.shape().num_elements() == 0) {
+      functor::SetZeroFunctor<Device, U> f;
+      f(context->eigen_device<Device>(), scale_backprop->flat<U>());
+      f(context->eigen_device<Device>(), offset_backprop->flat<U>());
+      return;
+    }
 
     if (is_training_) {
       functor::FusedBatchNormGrad<Device, T, U>()(
diff --git a/tensorflow/core/kernels/fused_batch_norm_op.cu.cc b/tensorflow/core/kernels/fused_batch_norm_op.cu.cc
index dc956066ecffe2ad3a38506fb6e76dd402def5b3..a8484390b928105cb51216e18e419957f12ad2ac 100644
--- a/tensorflow/core/kernels/fused_batch_norm_op.cu.cc
+++ b/tensorflow/core/kernels/fused_batch_norm_op.cu.cc
@@ -65,8 +65,15 @@ void InvVarianceToVariance<T>::operator()(const Eigen::GpuDevice& d,
                                               epsilon, sample_size, variance);
 }
 
+template <class T>
+void SetNanFunctor<T>::operator()(const Eigen::GpuDevice& d,
+                                  typename TTypes<T>::Flat out) {
+  To32Bit(out).device(d) = To32Bit(out).constant(Eigen::NumTraits<T>::quiet_NaN());
+}
+
 template class VarianceToInvVariance<float>;
 template class InvVarianceToVariance<float>;
+template class SetNanFunctor<float>;
 }  // namespace functor
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/kernels/fused_batch_norm_op.h b/tensorflow/core/kernels/fused_batch_norm_op.h
index 3af104bf954257b260215d6a79b0a365227d7b23..d6c68df986117df0ab4f8c24fb1a713901b468f7 100644
--- a/tensorflow/core/kernels/fused_batch_norm_op.h
+++ b/tensorflow/core/kernels/fused_batch_norm_op.h
@@ -49,6 +49,12 @@ struct InvVarianceToVariance {
                   int channels, T* variance);
 };
 
+// This function sets a GPU tensor to NaNs.
+template <class T>
+struct SetNanFunctor {
+  void operator()(const Eigen::GpuDevice& d, typename TTypes<T>::Flat out);
+};
+
 #endif  // GOOGLE_CUDA
 
 // Functor used by FusedBatchNormGradOp to do the computations when
diff --git a/tensorflow/core/kernels/gather_nd_op.cc b/tensorflow/core/kernels/gather_nd_op.cc
index 5dc74d720ab22e2f2f10baf8309b59661740184f..7e5a9e1ec5aac26706d95646a29539dd0f4be2ed 100644
--- a/tensorflow/core/kernels/gather_nd_op.cc
+++ b/tensorflow/core/kernels/gather_nd_op.cc
@@ -176,10 +176,12 @@ Status DoGatherNd(OpKernelContext* c, const Tensor& params,
       PARAMS_CASE(3);
       PARAMS_CASE(4);
       PARAMS_CASE(5);
+      PARAMS_CASE(6);
+      PARAMS_CASE(7);
 #undef PARAMS_CASE
       default:
         return errors::InvalidArgument(
-            "Only indices.shape[-1] values between 1 and 5 "
+            "Only indices.shape[-1] values between 1 and 7 "
             "are currently supported.  Requested rank: ",
             indices_nd);
     }
@@ -218,7 +220,9 @@ namespace functor {
   DECLARE_GPU_SPECS_INDEX_NDIM(T, Index, 2); \
   DECLARE_GPU_SPECS_INDEX_NDIM(T, Index, 3); \
   DECLARE_GPU_SPECS_INDEX_NDIM(T, Index, 4); \
-  DECLARE_GPU_SPECS_INDEX_NDIM(T, Index, 5);
+  DECLARE_GPU_SPECS_INDEX_NDIM(T, Index, 5); \
+  DECLARE_GPU_SPECS_INDEX_NDIM(T, Index, 6); \
+  DECLARE_GPU_SPECS_INDEX_NDIM(T, Index, 7);
 
 #define DECLARE_GPU_SPECS(T)         \
   DECLARE_GPU_SPECS_INDEX(T, int32); \
diff --git a/tensorflow/core/kernels/gather_nd_op_cpu_impl_6.cc b/tensorflow/core/kernels/gather_nd_op_cpu_impl_6.cc
new file mode 100644
index 0000000000000000000000000000000000000000..2aec872448ec02581faf95e30844e5e1e80cd277
--- /dev/null
+++ b/tensorflow/core/kernels/gather_nd_op_cpu_impl_6.cc
@@ -0,0 +1,18 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define CPU_PROVIDED_IXDIM 6
+#include "tensorflow/core/kernels/gather_nd_op_cpu_impl.h"
+#undef CPU_PROVIDED_IXDIM
diff --git a/tensorflow/core/kernels/gather_nd_op_cpu_impl_7.cc b/tensorflow/core/kernels/gather_nd_op_cpu_impl_7.cc
new file mode 100644
index 0000000000000000000000000000000000000000..9222cb07695cb1c05b12da59b0c0bbc96bebb388
--- /dev/null
+++ b/tensorflow/core/kernels/gather_nd_op_cpu_impl_7.cc
@@ -0,0 +1,18 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define CPU_PROVIDED_IXDIM 7
+#include "tensorflow/core/kernels/gather_nd_op_cpu_impl.h"
+#undef CPU_PROVIDED_IXDIM
diff --git a/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc b/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc
index ed5240c20abd247404cb926dd9a455af901c0d7c..b03efc684ffca4abde99b31952983aad5f805ee3 100644
--- a/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc
@@ -111,7 +111,9 @@ struct GatherNdSlice<GPUDevice, T, Index, IXDIM> {
   DEFINE_GPU_SPECS_INDEX_NDIM(T, Index, 2); \
   DEFINE_GPU_SPECS_INDEX_NDIM(T, Index, 3); \
   DEFINE_GPU_SPECS_INDEX_NDIM(T, Index, 4); \
-  DEFINE_GPU_SPECS_INDEX_NDIM(T, Index, 5);
+  DEFINE_GPU_SPECS_INDEX_NDIM(T, Index, 5); \
+  DEFINE_GPU_SPECS_INDEX_NDIM(T, Index, 6); \
+  DEFINE_GPU_SPECS_INDEX_NDIM(T, Index, 7);
 
 #define DEFINE_GPU_SPECS(T)         \
   DEFINE_GPU_SPECS_INDEX(T, int32); \
diff --git a/tensorflow/core/kernels/guarantee_const_op.cc b/tensorflow/core/kernels/guarantee_const_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..de3a2a1148b7e7922a08cfce159fb05ccdb9fe30
--- /dev/null
+++ b/tensorflow/core/kernels/guarantee_const_op.cc
@@ -0,0 +1,47 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace tensorflow {
+namespace {
+
+// Refer to the Op description for detailed comments.
+class GuaranteeConstOp : public OpKernel {
+ public:
+  explicit GuaranteeConstOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    const DataType input_dtype = ctx->input_dtype(0);
+    OP_REQUIRES(ctx, input_dtype != DT_RESOURCE,
+                errors::InvalidArgument(
+                    "Input tensor cannot be a resource variable handle."));
+    const Tensor& input_tensor = ctx->input(0);
+    Tensor* output = nullptr;
+    if (!ctx->forward_input_to_output_with_shape(0, 0, input_tensor.shape(),
+                                                 &output)) {
+      ctx->set_output(0, input_tensor);
+    }
+  }
+
+  bool IsExpensive() override { return false; }
+};
+
+REGISTER_KERNEL_BUILDER(Name("GuaranteeConst").Device(DEVICE_CPU),
+                        GuaranteeConstOp);
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/guarantee_const_op_test.cc b/tensorflow/core/kernels/guarantee_const_op_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..01461fbb8c22a2bfb9669bef680759ecab324a61
--- /dev/null
+++ b/tensorflow/core/kernels/guarantee_const_op_test.cc
@@ -0,0 +1,75 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/fake_input.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/kernels/variable_ops.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace {
+
+class GuaranteeConstOpTest : public OpsTestBase {
+ protected:
+  Status Init(DataType input_type) {
+    TF_CHECK_OK(NodeDefBuilder("op", "GuaranteeConst")
+                    .Input(FakeInput(input_type))
+                    .Finalize(node_def()));
+    return InitOp();
+  }
+};
+
+TEST_F(GuaranteeConstOpTest, Int32Success_6) {
+  TF_ASSERT_OK(Init(DT_INT32));
+  AddInputFromArray<int32>(TensorShape({6}), {1, 2, 3, 4, 5, 6});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_INT32, TensorShape({6}));
+  test::FillValues<int32>(&expected, {1, 2, 3, 4, 5, 6});
+  test::ExpectTensorEqual<int32>(expected, *GetOutput(0));
+}
+
+TEST_F(GuaranteeConstOpTest, Int32Success_2_3) {
+  TF_ASSERT_OK(Init(DT_INT32));
+  AddInputFromArray<int32>(TensorShape({2, 3}), {1, 2, 3, 4, 5, 6});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_INT32, TensorShape({2, 3}));
+  test::FillValues<int32>(&expected, {1, 2, 3, 4, 5, 6});
+  test::ExpectTensorEqual<int32>(expected, *GetOutput(0));
+}
+
+TEST_F(GuaranteeConstOpTest, StringSuccess) {
+  TF_ASSERT_OK(Init(DT_STRING));
+  AddInputFromArray<string>(TensorShape({6}), {"A", "b", "C", "d", "E", "f"});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({6}));
+  test::FillValues<string>(&expected, {"A", "b", "C", "d", "E", "f"});
+  test::ExpectTensorEqual<string>(expected, *GetOutput(0));
+}
+
+TEST_F(GuaranteeConstOpTest, ResourceInputError) {
+  TF_ASSERT_OK(Init(DT_RESOURCE));
+  AddResourceInput("", "resource", new Var(DT_INT32));
+  const auto status = RunOpKernel();
+  ASSERT_EQ(error::INVALID_ARGUMENT, status.code());
+}
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/identity_op.cc b/tensorflow/core/kernels/identity_op.cc
index 7b8abf5494d23ddf6a7b590c58ae0c73c05d516d..1db9263e5d396b4cdb0920db18e5189149128758 100644
--- a/tensorflow/core/kernels/identity_op.cc
+++ b/tensorflow/core/kernels/identity_op.cc
@@ -42,6 +42,8 @@ REGISTER_KERNEL_BUILDER(Name("RefIdentity").Device(DEVICE_CPU), IdentityOp);
 // Python).
 REGISTER_KERNEL_BUILDER(Name("DebugGradientIdentity").Device(DEVICE_CPU),
                         IdentityOp);
+REGISTER_KERNEL_BUILDER(Name("DebugGradientRefIdentity").Device(DEVICE_CPU),
+                        IdentityOp);
 
 #if TENSORFLOW_USE_SYCL
 #define REGISTER_SYCL_KERNEL(type)                                           \
@@ -102,7 +104,6 @@ REGISTER_SYCL_HOST_KERNEL(bool);
                           IdentityOp)
 
 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL);
-REGISTER_GPU_KERNEL(bfloat16);
 REGISTER_GPU_KERNEL(Variant);
 
 #undef REGISTER_GPU_KERNEL
diff --git a/tensorflow/core/kernels/inplace_ops.cc b/tensorflow/core/kernels/inplace_ops.cc
index 01ae5a83c1eec9eb4ccb74841555b5bb1b6cd60f..7728ba850c94aa79feb31d137712692df0f89176 100644
--- a/tensorflow/core/kernels/inplace_ops.cc
+++ b/tensorflow/core/kernels/inplace_ops.cc
@@ -52,6 +52,7 @@ Status DoParallelConcat(const CPUDevice& d, const Tensor& value, int32 loc,
     return DoParallelConcatUpdate<CPUDevice, type>(d, value, loc, output);
     TF_CALL_NUMBER_TYPES(CASE);
     TF_CALL_string(CASE);
+    TF_CALL_variant(CASE);
 #undef CASE
     default:
       return errors::InvalidArgument("Unsupported data type: ", value.dtype());
diff --git a/tensorflow/core/kernels/list_kernels.cc b/tensorflow/core/kernels/list_kernels.cc
new file mode 100644
index 0000000000000000000000000000000000000000..5e405f16a4d141f344532fec8342eea754a80f5e
--- /dev/null
+++ b/tensorflow/core/kernels/list_kernels.cc
@@ -0,0 +1,343 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <limits>
+
+#define EIGEN_USE_THREADS
+#if GOOGLE_CUDA
+#define EIGEN_USE_GPU
+#endif  // GOOGLE_CUDA
+
+#include "tensorflow/core/kernels/list_kernels.h"
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/framework/variant.h"
+#include "tensorflow/core/framework/variant_op_registry.h"
+#include "tensorflow/core/kernels/concat_lib.h"
+#include "tensorflow/core/lib/core/coding.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/util/util.h"
+
+namespace tensorflow {
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+
+// Variant compatible type for a list of tensors. This is mutable but instances
+// should never be mutated after stored in a variant tensor.
+TensorList::TensorList(const TensorList& other)
+    : tensors(other.tensors),
+      element_shape(other.element_shape),
+      element_dtype(other.element_dtype) {}
+
+void TensorList::Encode(VariantTensorData* data) const {
+  data->set_type_name(TypeName());
+  for (const Tensor& t : tensors) {
+    *data->add_tensors() = t;
+  }
+  string metadata;
+  core::PutVarint64(&metadata, static_cast<uint64>(element_dtype));
+  if (!element_shape.unknown_rank()) {
+    for (TensorShapeDim dim : element_shape) {
+      if (dim.size > 0) {
+        core::PutVarint64(&metadata, dim.size);
+      } else {
+        core::PutVarint64(&metadata, std::numeric_limits<uint64>::max());
+      }
+    }
+  }
+  data->set_metadata(metadata);
+}
+
+static Status TensorListDeviceCopy(
+    const TensorList& from, TensorList* to,
+    const UnaryVariantOpRegistry::AsyncTensorDeviceCopyFn& copy) {
+  to->element_shape = from.element_shape;
+  to->element_dtype = from.element_dtype;
+  to->tensors.reserve(from.tensors.size());
+  for (const Tensor& t : from.tensors) {
+    Tensor tmp(t.dtype());
+    TF_RETURN_IF_ERROR(copy(t, &tmp));
+    to->tensors.push_back(tmp);
+  }
+  return Status::OK();
+}
+
+#define REGISTER_LIST_COPY(DIRECTION)                   \
+  INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION( \
+      TensorList, DIRECTION, TensorList::kTypeName, TensorListDeviceCopy)
+
+REGISTER_LIST_COPY(VariantDeviceCopyDirection::HOST_TO_DEVICE);
+REGISTER_LIST_COPY(VariantDeviceCopyDirection::DEVICE_TO_HOST);
+REGISTER_LIST_COPY(VariantDeviceCopyDirection::DEVICE_TO_DEVICE);
+
+REGISTER_UNARY_VARIANT_DECODE_FUNCTION(TensorList, TensorList::kTypeName);
+
+bool TensorList::Decode(const VariantTensorData& data) {
+  tensors = data.tensors();
+  string metadata;
+  data.get_metadata(&metadata);
+  uint64 scratch;
+  StringPiece iter(metadata);
+  core::GetVarint64(&iter, &scratch);
+  element_dtype = static_cast<DataType>(scratch);
+  std::vector<int64> dims;
+  while (!iter.empty()) {
+    core::GetVarint64(&iter, &scratch);
+    if (scratch == std::numeric_limits<uint64>::max()) {
+      dims.push_back(-1);
+    } else {
+      dims.push_back(scratch);
+    }
+  }
+  return true;
+}
+
+Status TensorShapeFromTensor(const Tensor& t, PartialTensorShape* out) {
+  if (t.shape() == TensorShape({})) {
+    if ((t.dtype() == DT_INT32 && t.scalar<int32>()() == -1) ||
+        (t.dtype() == DT_INT64 && t.scalar<int64>()() == -1)) {
+      return Status::OK();
+    }
+    return errors::InvalidArgument(
+        "The only valid scalar shape tensor is the fully unknown shape "
+        "specified as -1.");
+  }
+  if (t.dtype() == DT_INT32) {
+    return PartialTensorShape::MakePartialShape(t.vec<int32>().data(),
+                                                t.NumElements(), out);
+  } else if (t.dtype() == DT_INT64) {
+    return PartialTensorShape::MakePartialShape(t.vec<int64>().data(),
+                                                t.NumElements(), out);
+  }
+  return errors::InvalidArgument(
+      "Expected an int32 or int64 shape tensor; found ",
+      DataTypeString(t.dtype()));
+}
+
+class EmptyTensorList : public OpKernel {
+ public:
+  explicit EmptyTensorList(OpKernelConstruction* ctx) : OpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("element_dtype", &element_dtype_));
+  }
+
+  void Compute(OpKernelContext* ctx) override {
+    Tensor* result;
+    AllocatorAttributes attr;
+    attr.set_on_host(true);
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape{}, &result, attr));
+    TensorList empty;
+    empty.element_dtype = element_dtype_;
+    PartialTensorShape element_shape;
+    OP_REQUIRES_OK(ctx, TensorShapeFromTensor(ctx->input(0), &element_shape));
+    empty.element_shape = element_shape;
+    result->scalar<Variant>()() = std::move(empty);
+  }
+
+ private:
+  DataType element_dtype_;
+};
+
+const char TensorList::kTypeName[] = "tensorflow::TensorList";
+
+REGISTER_KERNEL_BUILDER(Name("EmptyTensorList").Device(DEVICE_CPU),
+                        EmptyTensorList);
+
+#if GOOGLE_CUDA
+
+REGISTER_KERNEL_BUILDER(
+    Name("EmptyTensorList").Device(DEVICE_GPU).HostMemory("element_shape"),
+    EmptyTensorList);
+
+#endif  // GOOGLE_CUDA
+
+class TensorListPushBack : public OpKernel {
+ public:
+  explicit TensorListPushBack(OpKernelConstruction* c) : OpKernel(c) {
+    OP_REQUIRES_OK(c, c->GetAttr("element_dtype", &element_dtype_));
+  }
+
+  ~TensorListPushBack() override {}
+
+  void Compute(OpKernelContext* c) override {
+    const Tensor& input = c->input(1);
+    OP_REQUIRES(c, element_dtype_ == input.dtype(),
+                errors::InvalidArgument("Invalid data types; list elements ",
+                                        DataTypeString(element_dtype_),
+                                        " but tried to append ",
+                                        DataTypeString(input.dtype())));
+
+    const TensorList* l = c->input(0).scalar<Variant>()().get<TensorList>();
+    OP_REQUIRES(c, l != nullptr,
+                errors::InvalidArgument(
+                    "Input handle is not a list. Saw: '",
+                    c->input(0).scalar<Variant>()().DebugString(), "'"));
+    OP_REQUIRES(c, l->element_shape.IsCompatibleWith(input.shape()),
+                errors::InvalidArgument(
+                    "Tried to append a tensor with incompatible shape to a "
+                    "list. Op element shape: ",
+                    input.shape().DebugString(),
+                    " list shape: ", l->element_shape.DebugString()));
+    OP_REQUIRES(c, element_dtype_ == l->element_dtype,
+                errors::InvalidArgument("Invalid data types; op elements ",
+                                        DataTypeString(element_dtype_),
+                                        " but list elements ",
+                                        DataTypeString(l->element_dtype)));
+
+    TensorList output;
+    output = *l;
+    output.tensors.push_back(input);
+    Tensor* result;
+    AllocatorAttributes attr;
+    attr.set_on_host(true);
+    OP_REQUIRES_OK(c, c->allocate_output(0, TensorShape{}, &result, attr));
+    result->scalar<Variant>()() = std::move(output);
+  }
+
+ private:
+  DataType element_dtype_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("TensorListPushBack").Device(DEVICE_CPU),
+                        TensorListPushBack);
+
+#if GOOGLE_CUDA
+
+REGISTER_KERNEL_BUILDER(Name("TensorListPushBack").Device(DEVICE_GPU),
+                        TensorListPushBack);
+
+#endif  // GOOGLE_CUDA
+
+class TensorListLength : public OpKernel {
+ public:
+  explicit TensorListLength(OpKernelConstruction* c) : OpKernel(c) {}
+  ~TensorListLength() override {}
+
+  void Compute(OpKernelContext* c) override {
+    const TensorList* l = c->input(0).scalar<Variant>()().get<TensorList>();
+    OP_REQUIRES(
+        c, l != nullptr,
+        errors::InvalidArgument(
+            "TensorListLength received a variant which is not a list. Saw: '",
+            c->input(0).scalar<Variant>()().DebugString(), "'"));
+    Tensor* result;
+    OP_REQUIRES_OK(c, c->allocate_output(0, TensorShape{}, &result));
+    result->scalar<int32>()() = l->tensors.size();
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("TensorListLength").Device(DEVICE_CPU),
+                        TensorListLength);
+
+#if GOOGLE_CUDA
+
+REGISTER_KERNEL_BUILDER(
+    Name("TensorListLength").Device(DEVICE_GPU).HostMemory("length"),
+    TensorListLength);
+
+#endif  // GOOGLE_CUDA
+
+class TensorListPopBack : public OpKernel {
+ public:
+  explicit TensorListPopBack(OpKernelConstruction* c) : OpKernel(c) {
+    OP_REQUIRES_OK(c, c->GetAttr("element_dtype", &element_dtype_));
+  }
+
+  ~TensorListPopBack() override {}
+
+  void Compute(OpKernelContext* c) override {
+    const TensorList* l = c->input(0).scalar<Variant>()().get<TensorList>();
+    OP_REQUIRES(c, l != nullptr,
+                errors::InvalidArgument(
+                    "Input handle is not a list. Saw: '",
+                    c->input(0).scalar<Variant>()().DebugString(), "'"));
+    OP_REQUIRES(c, element_dtype_ == l->element_dtype,
+                errors::InvalidArgument("Invalid data types; op elements ",
+                                        DataTypeString(element_dtype_),
+                                        " but list elements ",
+                                        DataTypeString(l->element_dtype)));
+
+    OP_REQUIRES(c, !l->tensors.empty(),
+                errors::InvalidArgument("Trying to pop from an empty list."));
+
+    c->set_output(1, l->tensors.back());
+    TensorList output;
+    output = *l;
+    output.tensors.pop_back();
+    Tensor* result;
+    AllocatorAttributes attr;
+    attr.set_on_host(true);
+    OP_REQUIRES_OK(c, c->allocate_output(0, TensorShape{}, &result, attr));
+    result->scalar<Variant>()() = std::move(output);
+  }
+
+ private:
+  DataType element_dtype_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("TensorListPopBack").Device(DEVICE_CPU),
+                        TensorListPopBack);
+
+#if GOOGLE_CUDA
+
+REGISTER_KERNEL_BUILDER(Name("TensorListPopBack").Device(DEVICE_GPU),
+                        TensorListPopBack);
+
+#endif  // GOOGLE_CUDA
+
+#define REGISTER_TENSOR_LIST_STACK_CPU(T)                         \
+  REGISTER_KERNEL_BUILDER(Name("TensorListStack")                 \
+                              .TypeConstraint<T>("element_dtype") \
+                              .Device(DEVICE_CPU),                \
+                          TensorListStack<CPUDevice, T>)
+
+TF_CALL_POD_STRING_TYPES(REGISTER_TENSOR_LIST_STACK_CPU);
+REGISTER_TENSOR_LIST_STACK_CPU(quint8);
+REGISTER_TENSOR_LIST_STACK_CPU(qint8);
+REGISTER_TENSOR_LIST_STACK_CPU(quint16);
+REGISTER_TENSOR_LIST_STACK_CPU(qint16);
+REGISTER_TENSOR_LIST_STACK_CPU(qint32);
+REGISTER_TENSOR_LIST_STACK_CPU(bfloat16);
+
+#undef REGISTER_TENSOR_LIST_STACK_CPU
+
+#define REGISTER_TENSOR_LIST_FROM_TENSOR_CPU(T)                   \
+  REGISTER_KERNEL_BUILDER(Name("TensorListFromTensor")            \
+                              .TypeConstraint<T>("element_dtype") \
+                              .Device(DEVICE_CPU),                \
+                          TensorListFromTensor<CPUDevice, T>)
+
+TF_CALL_POD_STRING_TYPES(REGISTER_TENSOR_LIST_FROM_TENSOR_CPU);
+REGISTER_TENSOR_LIST_FROM_TENSOR_CPU(quint8);
+REGISTER_TENSOR_LIST_FROM_TENSOR_CPU(qint8);
+REGISTER_TENSOR_LIST_FROM_TENSOR_CPU(quint16);
+REGISTER_TENSOR_LIST_FROM_TENSOR_CPU(qint16);
+REGISTER_TENSOR_LIST_FROM_TENSOR_CPU(qint32);
+REGISTER_TENSOR_LIST_FROM_TENSOR_CPU(bfloat16);
+
+#undef REGISTER_TENSOR_LIST_FROM_TENSOR_CPU
+
+REGISTER_UNARY_VARIANT_BINARY_OP_FUNCTION(ADD_VARIANT_BINARY_OP, DEVICE_CPU,
+                                          TensorList, TensorList::kTypeName,
+                                          TensorListBinaryAdd<CPUDevice>);
+
+REGISTER_UNARY_VARIANT_UNARY_OP_FUNCTION(ZEROS_LIKE_VARIANT_UNARY_OP,
+                                         DEVICE_CPU, TensorList,
+                                         TensorList::kTypeName,
+                                         TensorListZerosLike<CPUDevice>);
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/list_kernels.cu.cc b/tensorflow/core/kernels/list_kernels.cu.cc
new file mode 100644
index 0000000000000000000000000000000000000000..935f892dd0515025e97e02c8e941b96f21ed3b3e
--- /dev/null
+++ b/tensorflow/core/kernels/list_kernels.cu.cc
@@ -0,0 +1,79 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <limits>
+
+#define EIGEN_USE_THREADS
+#if GOOGLE_CUDA
+#define EIGEN_USE_GPU
+
+#include "tensorflow/core/kernels/list_kernels.h"
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/framework/variant.h"
+#include "tensorflow/core/framework/variant_op_registry.h"
+#include "tensorflow/core/kernels/concat_lib.h"
+#include "tensorflow/core/lib/core/coding.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/util/util.h"
+
+namespace tensorflow {
+
+typedef Eigen::GpuDevice GPUDevice;
+
+#define REGISTER_TENSOR_LIST_STACK_GPU(T)                         \
+  REGISTER_KERNEL_BUILDER(Name("TensorListStack")                 \
+                              .TypeConstraint<T>("element_dtype") \
+                              .Device(DEVICE_GPU),                \
+                          TensorListStack<GPUDevice, T>)
+
+TF_CALL_GPU_NUMBER_TYPES(REGISTER_TENSOR_LIST_STACK_GPU);
+REGISTER_TENSOR_LIST_STACK_GPU(bfloat16);
+TF_CALL_complex64(REGISTER_TENSOR_LIST_STACK_GPU);
+TF_CALL_complex128(REGISTER_TENSOR_LIST_STACK_GPU);
+TF_CALL_int64(REGISTER_TENSOR_LIST_STACK_GPU);
+REGISTER_TENSOR_LIST_STACK_GPU(bool);
+
+#undef REGISTER_TENSOR_LIST_STACK_GPU
+
+#define REGISTER_TENSOR_LIST_FROM_TENSOR_GPU(T)                   \
+  REGISTER_KERNEL_BUILDER(Name("TensorListFromTensor")            \
+                              .TypeConstraint<T>("element_dtype") \
+                              .Device(DEVICE_GPU)                 \
+                              .HostMemory("element_shape"),       \
+                          TensorListFromTensor<GPUDevice, T>)
+
+TF_CALL_GPU_NUMBER_TYPES(REGISTER_TENSOR_LIST_FROM_TENSOR_GPU);
+REGISTER_TENSOR_LIST_FROM_TENSOR_GPU(bfloat16);
+TF_CALL_complex64(REGISTER_TENSOR_LIST_FROM_TENSOR_GPU);
+TF_CALL_complex128(REGISTER_TENSOR_LIST_FROM_TENSOR_GPU);
+TF_CALL_int64(REGISTER_TENSOR_LIST_FROM_TENSOR_GPU);
+REGISTER_TENSOR_LIST_FROM_TENSOR_GPU(bool);
+
+#undef REGISTER_TENSOR_LIST_FROM_TENSOR_GPU
+
+REGISTER_UNARY_VARIANT_BINARY_OP_FUNCTION(ADD_VARIANT_BINARY_OP, DEVICE_GPU,
+                                          TensorList, TensorList::kTypeName,
+                                          TensorListBinaryAdd<GPUDevice>);
+REGISTER_UNARY_VARIANT_UNARY_OP_FUNCTION(ZEROS_LIKE_VARIANT_UNARY_OP,
+                                         DEVICE_GPU, TensorList,
+                                         TensorList::kTypeName,
+                                         TensorListZerosLike<GPUDevice>);
+
+}  // namespace tensorflow
+#endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/list_kernels.h b/tensorflow/core/kernels/list_kernels.h
new file mode 100644
index 0000000000000000000000000000000000000000..6a2a572b6d7476cb4d457d19c2264c7e8217b7cb
--- /dev/null
+++ b/tensorflow/core/kernels/list_kernels.h
@@ -0,0 +1,257 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_LIST_KERNELS_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_LIST_KERNELS_H_
+
+#define EIGEN_USE_THREADS
+#if GOOGLE_CUDA
+#define EIGEN_USE_GPU
+#endif  // GOOGLE_CUDA
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/framework/variant.h"
+#include "tensorflow/core/framework/variant_op_registry.h"
+#include "tensorflow/core/kernels/concat_lib.h"
+#include "tensorflow/core/lib/core/coding.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/util/util.h"
+
+namespace tensorflow {
+
+// Variant compatible type for a list of tensors. This is mutable but instances
+// should never be mutated after stored in a variant tensor.
+struct TensorList {
+ public:
+  TensorList() {}
+  TensorList(const TensorList& other);
+
+  static const char kTypeName[];
+  string TypeName() const { return kTypeName; }
+
+  void Encode(VariantTensorData* data) const;
+
+  bool Decode(const VariantTensorData& data);
+
+  // TODO(apassos) fill this out
+  string DebugString() const { return "TensorList"; }
+
+  std::vector<Tensor> tensors;
+  PartialTensorShape element_shape;
+  DataType element_dtype;
+};
+
+Status TensorShapeFromTensor(const Tensor& t, PartialTensorShape* out);
+
+template <typename Device, typename T>
+class TensorListStack : public OpKernel {
+ public:
+  typedef std::vector<std::unique_ptr<typename TTypes<T, 2>::ConstMatrix>>
+      ConstMatrixVector;
+  explicit TensorListStack(OpKernelConstruction* c) : OpKernel(c) {
+    OP_REQUIRES_OK(c, c->GetAttr("element_dtype", &element_dtype_));
+    OP_REQUIRES_OK(c, c->GetAttr("num_elements", &num_elements_));
+  }
+
+  ~TensorListStack() {}
+
+  void Compute(OpKernelContext* c) override {
+    const TensorList* l = c->input(0).scalar<Variant>()().get<TensorList>();
+    OP_REQUIRES(c, l != nullptr,
+                errors::InvalidArgument(
+                    "Input handle is not a list. Saw: '",
+                    c->input(0).scalar<Variant>()().DebugString(), "'"));
+    OP_REQUIRES(c, l->element_shape.IsFullyDefined(),
+                errors::InvalidArgument("Tried to stack elements from a list "
+                                        "with non-fully-defined shape."));
+    OP_REQUIRES(c, element_dtype_ == l->element_dtype,
+                errors::InvalidArgument("Invalid data types; op elements ",
+                                        DataTypeString(element_dtype_),
+                                        " but list elements ",
+                                        DataTypeString(l->element_dtype)));
+    if (num_elements_ != -1) {
+      OP_REQUIRES(c, l->tensors.size() == num_elements_,
+                  errors::InvalidArgument("Operation expected a list with ",
+                                          num_elements_,
+                                          " elements but got a list with ",
+                                          l->tensors.size(), " elements."));
+    }
+    TensorShape resulting_shape;
+    resulting_shape.AddDim(l->tensors.size());
+    for (TensorShapeDim s : l->element_shape) {
+      resulting_shape.AddDim(s.size);
+    }
+    Tensor* output;
+    OP_REQUIRES_OK(c, c->allocate_output(0, resulting_shape, &output));
+
+    ConstMatrixVector inputs_flat;
+    inputs_flat.reserve(l->tensors.size());
+    for (const auto& t : l->tensors) {
+      inputs_flat.emplace_back(new typename TTypes<T, 2>::ConstMatrix(
+          t.shaped<T, 2>({1, t.NumElements()})));
+    }
+    auto output_flat =
+        output->shaped<T, 2>({1, static_cast<int64>(l->tensors.size()) *
+                                     l->element_shape.num_elements()});
+
+#if GOOGLE_CUDA
+    if (std::is_same<Device, Eigen::GpuDevice>::value) {
+      ConcatGPU<T>(c, inputs_flat, output, &output_flat);
+      return;
+    }
+#endif  // GOOGLE_CUDA
+    ConcatCPU<T>(c->device(), inputs_flat, &output_flat);
+  }
+
+ private:
+  int num_elements_;
+  DataType element_dtype_;
+};
+
+template <typename Device, typename T>
+class TensorListFromTensor : public OpKernel {
+ public:
+  TensorListFromTensor(OpKernelConstruction* c) : OpKernel(c) {}
+
+  void Compute(OpKernelContext* c) override {
+    Tensor* output_tensor;
+    AllocatorAttributes attr;
+    attr.set_on_host(true);
+    OP_REQUIRES_OK(c, c->allocate_output(0, {}, &output_tensor, attr));
+    PartialTensorShape element_shape;
+    OP_REQUIRES_OK(c, TensorShapeFromTensor(c->input(1), &element_shape));
+    TensorList output_list;
+    const Tensor& t = c->input(0);
+    output_list.element_dtype = t.dtype();
+    TensorShape output_shape(t.shape());
+    output_shape.RemoveDim(0);
+    OP_REQUIRES(c, element_shape.IsCompatibleWith(output_shape),
+                errors::InvalidArgument(
+                    "Specified a list with shape ", element_shape.DebugString(),
+                    " from a tensor with shape ", output_shape.DebugString()));
+    output_list.element_shape = element_shape;
+    output_list.tensors.reserve(t.shape().dim_size(0));
+    for (int i = 0; i < t.shape().dim_size(0); ++i) {
+      Tensor tmp = t.Slice(i, i + 1);
+      TensorShape tmp_shape = tmp.shape();
+      tmp_shape.RemoveDim(0);
+      OP_REQUIRES(c, tmp.CopyFrom(tmp, tmp_shape),
+                  errors::Unknown("Unexpected shape error."));
+      if (tmp.IsAligned() || !DataTypeCanUseMemcpy(DataTypeToEnum<T>::value)) {
+        output_list.tensors.push_back(tmp);
+      } else {
+        Tensor aligned;
+        OP_REQUIRES_OK(c, c->allocate_temp(tmp.dtype(), tmp.shape(), &aligned));
+        aligned.flat<T>().device(c->eigen_device<Device>()) =
+            tmp.unaligned_flat<T>();
+        output_list.tensors.push_back(aligned);
+      }
+    }
+    output_tensor->scalar<Variant>()() = std::move(output_list);
+  }
+};
+
+template <typename Device>
+Status TensorListBinaryAdd(OpKernelContext* c, const TensorList& a,
+                           const TensorList& b, TensorList* out) {
+  if (a.element_dtype != b.element_dtype) {
+    return errors::InvalidArgument(
+        "Trying to add two lists of tensors of different dtypes. One is ",
+        DataTypeString(a.element_dtype), " and the other is ",
+        DataTypeString(b.element_dtype));
+  }
+  out->element_dtype = a.element_dtype;
+  if (!a.element_shape.IsCompatibleWith(b.element_shape)) {
+    return errors::InvalidArgument(
+        "Trying to add two lists of tensors with incompatible element shapes. "
+        "One is ",
+        a.element_shape.DebugString(), " and the other is ",
+        b.element_shape.DebugString());
+  }
+
+  TF_RETURN_IF_ERROR(
+      a.element_shape.MergeWith(b.element_shape, &out->element_shape));
+  if (a.tensors.size() != b.tensors.size()) {
+    return errors::InvalidArgument(
+        "Trying to add two lists of tensors with different lengths. One is ",
+        a.tensors.size(), " and the other is ", b.tensors.size());
+  }
+  out->tensors.reserve(a.tensors.size());
+  for (int i = 0; i < a.tensors.size(); ++i) {
+    const Tensor& a_tensor = a.tensors[i];
+    const Tensor& b_tensor = b.tensors[i];
+    if (a_tensor.shape() != b_tensor.shape()) {
+      // TODO(apassos) support broadcasting additions here?
+      return errors::InvalidArgument(
+          "Trying to add two tensors with incompatible element shapes. "
+          "One is ",
+          a_tensor.shape().DebugString(), " and the other is ",
+          b_tensor.shape().DebugString());
+    }
+    Tensor out_tensor;
+    TF_RETURN_IF_ERROR(
+        c->allocate_temp(a_tensor.dtype(), a_tensor.shape(), &out_tensor));
+    switch (out_tensor.dtype()) {
+#define DTYPE_CASE(dtype)                                        \
+  case DataTypeToEnum<dtype>::value:                             \
+    out_tensor.flat<dtype>().device(c->eigen_device<Device>()) = \
+        a_tensor.flat<dtype>() + b_tensor.flat<dtype>();         \
+    break;
+
+      TF_CALL_NUMBER_TYPES(DTYPE_CASE)
+
+#undef DTYPE_CASE
+      default:
+        return errors::InvalidArgument("Trying to add unsupported dtype ",
+                                       out_tensor.dtype());
+    }
+  }
+  return Status::OK();
+}
+
+template <typename Device>
+Status TensorListZerosLike(OpKernelContext* c, const TensorList& x,
+                           TensorList* y) {
+  y->element_dtype = x.element_dtype;
+  y->element_shape = x.element_shape;
+  y->tensors.reserve(x.tensors.size());
+  for (const Tensor& t : x.tensors) {
+    Tensor out_tensor;
+    TF_RETURN_IF_ERROR(c->allocate_temp(t.dtype(), t.shape(), &out_tensor));
+    switch (out_tensor.dtype()) {
+#define DTYPE_CASE(dtype)                                        \
+  case DataTypeToEnum<dtype>::value:                             \
+    out_tensor.flat<dtype>().device(c->eigen_device<Device>()) = \
+        out_tensor.flat<dtype>().constant(dtype(0));             \
+    break;
+
+      TF_CALL_NUMBER_TYPES(DTYPE_CASE)
+
+#undef DTYPE_CASE
+      default:
+        return errors::InvalidArgument(
+            "Trying to compute zeros_like for unsupported dtype",
+            out_tensor.dtype());
+    }
+  }
+  return Status::OK();
+}
+
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_LIST_KERNELS_H_
diff --git a/tensorflow/core/kernels/logging_ops.cc b/tensorflow/core/kernels/logging_ops.cc
index 67d603dd0ae9851d1135e0d031efc16ca612f680..bacf3e77408a12a8a95bf7e7ab8f3a580e675675 100644
--- a/tensorflow/core/kernels/logging_ops.cc
+++ b/tensorflow/core/kernels/logging_ops.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <iostream>
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -76,7 +77,7 @@ class PrintOp : public OpKernel {
       strings::StrAppend(&msg, "[", ctx->input(i).SummarizeValue(summarize_),
                          "]");
     }
-    LOG(INFO) << msg;
+    std::cerr << msg << std::endl;
   }
 
  private:
diff --git a/tensorflow/core/kernels/lookup_table_init_op.cc b/tensorflow/core/kernels/lookup_table_init_op.cc
index 38adcada6d21bf78aee4c080bfa7fb68efeb1bf8..b352dd257ce9e60edc35ae6c142207d6f19495f7 100644
--- a/tensorflow/core/kernels/lookup_table_init_op.cc
+++ b/tensorflow/core/kernels/lookup_table_init_op.cc
@@ -82,8 +82,8 @@ class InitializeTableOp : public OpKernel {
     }
     OP_REQUIRES_OK(ctx, table->Initialize(iter));
     if (ctx->track_allocations()) {
-      ctx->record_host_persistent_memory_allocation(table->MemoryUsed() -
-                                                    memory_used_before);
+      ctx->record_persistent_memory_allocation(table->MemoryUsed() -
+                                               memory_used_before);
     }
   }
 
@@ -144,8 +144,8 @@ class InitializeTableFromTextFileOp : public OpKernel {
                             vocab_filename, vocab_size_, delimiter_, key_index_,
                             value_index_, ctx->env(), table));
     if (ctx->track_allocations()) {
-      ctx->record_host_persistent_memory_allocation(table->MemoryUsed() -
-                                                    memory_used_before);
+      ctx->record_persistent_memory_allocation(table->MemoryUsed() -
+                                               memory_used_before);
     }
   }
 
diff --git a/tensorflow/core/kernels/lookup_table_op.cc b/tensorflow/core/kernels/lookup_table_op.cc
index e774c771b8e28c1a3c19cfafb6e7597c81e4eb5c..e3872fee0edcae543b9193e0dcf6850d194ef067 100644
--- a/tensorflow/core/kernels/lookup_table_op.cc
+++ b/tensorflow/core/kernels/lookup_table_op.cc
@@ -709,8 +709,8 @@ class LookupTableInsertOp : public OpKernel {
     }
     OP_REQUIRES_OK(ctx, table->Insert(ctx, keys, values));
     if (ctx->track_allocations()) {
-      ctx->record_host_persistent_memory_allocation(table->MemoryUsed() -
-                                                    memory_used_before);
+      ctx->record_persistent_memory_allocation(table->MemoryUsed() -
+                                               memory_used_before);
     }
   }
 };
@@ -786,8 +786,8 @@ class LookupTableImportOp : public OpKernel {
     }
     OP_REQUIRES_OK(ctx, table->ImportValues(ctx, keys, values));
     if (ctx->track_allocations()) {
-      ctx->record_host_persistent_memory_allocation(table->MemoryUsed() -
-                                                    memory_used_before);
+      ctx->record_persistent_memory_allocation(table->MemoryUsed() -
+                                               memory_used_before);
     }
   }
 };
@@ -823,6 +823,7 @@ REGISTER_KERNEL(int64, int64);
 REGISTER_KERNEL(int64, float);
 REGISTER_KERNEL(string, string);
 REGISTER_KERNEL(string, bool);
+REGISTER_KERNEL(int32, int32);
 
 #undef REGISTER_KERNEL
 
diff --git a/tensorflow/core/kernels/lookup_table_op.h b/tensorflow/core/kernels/lookup_table_op.h
index ff23a09a24f3c291aaec546577ead757e3eaa422..5ba9b936e4ea309ceda645f63e9630f01a99c985 100644
--- a/tensorflow/core/kernels/lookup_table_op.h
+++ b/tensorflow/core/kernels/lookup_table_op.h
@@ -64,7 +64,7 @@ class LookupTableOp : public OpKernel {
         return ctx->status();
       }
       if (ctx->track_allocations()) {
-        ctx->record_host_persistent_memory_allocation(
+        ctx->record_persistent_memory_allocation(
             container->MemoryUsed() + table_handle_.AllocatedBytes());
       }
       *ret = container;
diff --git a/tensorflow/core/kernels/lookup_util.cc b/tensorflow/core/kernels/lookup_util.cc
index e87a72f210c1d9476b2b5a68a94d2751ebddafc9..c7ce1c3747ea9f329f96d62af27708b0c9f4eb68 100644
--- a/tensorflow/core/kernels/lookup_util.cc
+++ b/tensorflow/core/kernels/lookup_util.cc
@@ -359,8 +359,8 @@ Status InitializeTableFromTextFile(const string& filename, int64 vocab_size,
   // time.
   Status s = table->Initialize(iter);
   if (errors::IsFailedPrecondition(s) && table->is_initialized()) {
-    LOG(WARNING) << "Table trying to initialize from file " << filename
-                 << " is already initialized.";
+    LOG(INFO) << "Table trying to initialize from file " << filename
+              << " is already initialized.";
     return Status::OK();
   }
   return s;
diff --git a/tensorflow/core/kernels/matmul_op.cc b/tensorflow/core/kernels/matmul_op.cc
index 12d02a10c7a2b439475c1840b4e777bdd6809856..cb68690f2847709fe6ff38f3eecd974613856dcf 100644
--- a/tensorflow/core/kernels/matmul_op.cc
+++ b/tensorflow/core/kernels/matmul_op.cc
@@ -535,13 +535,16 @@ struct MatMulFunctor<SYCLDevice, T> {
 
 }  // end namespace functor
 
-#define REGISTER_CPU(T)                                                        \
-  REGISTER_KERNEL_BUILDER(                                                     \
-      Name("MatMul").Device(DEVICE_CPU).TypeConstraint<T>("T"),                \
-      MatMulOp<CPUDevice, T, false /* cublas, ignored for CPU */>);            \
+#define REGISTER_CPU_EIGEN(T)                                                  \
   REGISTER_KERNEL_BUILDER(                                                     \
       Name("MatMul").Device(DEVICE_CPU).TypeConstraint<T>("T").Label("eigen"), \
-      MatMulOp<CPUDevice, T, false /* cublas, ignored for CPU */>)
+      MatMulOp<CPUDevice, T, false /* cublas, ignored for CPU */>);
+
+#define REGISTER_CPU(T)                                             \
+  REGISTER_KERNEL_BUILDER(                                          \
+      Name("MatMul").Device(DEVICE_CPU).TypeConstraint<T>("T"),     \
+      MatMulOp<CPUDevice, T, false /* cublas, ignored for CPU */>); \
+  REGISTER_CPU_EIGEN(T);
 
 #define REGISTER_GPU(T)                                            \
   REGISTER_KERNEL_BUILDER(                                         \
@@ -556,9 +559,14 @@ struct MatMulFunctor<SYCLDevice, T> {
 #if defined(INTEL_MKL)
 // MKL does not support half and int32 types for matrix-multiplication, so
 // register the kernel to use default Eigen based implementations for these
-// types
+// types. Registration for NO-LABEL version is in mkl_matmul_op.cc
+TF_CALL_float(REGISTER_CPU_EIGEN);
+TF_CALL_double(REGISTER_CPU_EIGEN);
 TF_CALL_half(REGISTER_CPU);
+
 TF_CALL_int32(REGISTER_CPU);
+TF_CALL_complex64(REGISTER_CPU_EIGEN);
+TF_CALL_complex128(REGISTER_CPU_EIGEN);
 #else
 TF_CALL_float(REGISTER_CPU);
 TF_CALL_double(REGISTER_CPU);
diff --git a/tensorflow/core/kernels/matrix_inverse_op.cc b/tensorflow/core/kernels/matrix_inverse_op.cc
index c61a091c7b7b82dbcb6e7b7f016e9cd2361f3f51..52afdd15ba6c2e25f3d03973c1226404a6723f87 100644
--- a/tensorflow/core/kernels/matrix_inverse_op.cc
+++ b/tensorflow/core/kernels/matrix_inverse_op.cc
@@ -210,7 +210,7 @@ class MatrixInverseOpGpu : public AsyncOpKernel {
             done);
       }
     } else {
-      // For large matrices, we wompute the inverse of each matrix in the batch
+      // For large matrices, we compute the inverse of each matrix in the batch
       // sequentially. Here we use the cuSolver methods GETRF/GETRS because they
       // are MUCH faster than their batched cuBlas equivalents for large
       // matrices.
diff --git a/tensorflow/core/kernels/matrix_logarithm_op.cc b/tensorflow/core/kernels/matrix_logarithm_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..cf0007b5b6776d0c8a297067f3a49ca21a132ac0
--- /dev/null
+++ b/tensorflow/core/kernels/matrix_logarithm_op.cc
@@ -0,0 +1,61 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/linalg_ops.cc.
+
+#include "third_party/eigen3/Eigen/Core"
+#include "third_party/eigen3/unsupported/Eigen/MatrixFunctions"
+#include "tensorflow/core/framework/kernel_def_builder.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/types.h"
+
+
+namespace tensorflow {
+
+template <class Scalar>
+class MatrixLogarithmOp : public LinearAlgebraOp<Scalar> {
+ public:
+  INHERIT_LINALG_TYPEDEFS(Scalar);
+
+  explicit MatrixLogarithmOp(OpKernelConstruction* context) : Base(context) {}
+
+  void ComputeMatrix(OpKernelContext* context, const ConstMatrixMaps& inputs,
+                     MatrixMaps* outputs) final {
+    const ConstMatrixMap& input = inputs[0];
+    if (input.rows() == 0) return;
+    using Matrix = Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
+    Matrix tmp = input;
+    outputs->at(0) = tmp.log();
+  }
+
+ private:
+  TF_DISALLOW_COPY_AND_ASSIGN(MatrixLogarithmOp);
+};
+
+// For real-valued matrices, this Op would return the real part of the matrix
+// logarithm. If all eigenvalues are positive, then this returns the correct
+// logarithm, however checking for positive definiteness adds significant
+// overhead. Therefore at present we only register this Op for complex types.
+REGISTER_LINALG_OP("MatrixLogarithm",
+                   (MatrixLogarithmOp<complex64>), complex64);
+REGISTER_LINALG_OP("MatrixLogarithm",
+                   (MatrixLogarithmOp<complex128>), complex128);
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/mfcc.h b/tensorflow/core/kernels/mfcc.h
index c39f10499091f0b5c6c74a3e70a812169b84c807..0d5d9fb90f8bd137aea5d7f3b8c08dfcd1495c18 100644
--- a/tensorflow/core/kernels/mfcc.h
+++ b/tensorflow/core/kernels/mfcc.h
@@ -33,10 +33,11 @@ class Mfcc {
   bool Initialize(int input_length,
                   double input_sample_rate);
 
-  // Input is a single magnitude spectrogram frame. The input spectrum
-  // is filtered into bands using a triangular mel filterbank and a
-  // discrete cosine transform (DCT) of the values is taken. Output is
-  // populated with the lowest dct_coefficient_count of these values.
+  // Input is a single squared-magnitude spectrogram frame. The input spectrum
+  // is converted to linear magnitude and weighted into bands using a
+  // triangular mel filterbank, and a discrete cosine transform (DCT) of the
+  // values is taken. Output is populated with the lowest dct_coefficient_count
+  // of these values.
   void Compute(const std::vector<double>& spectrogram_frame,
                std::vector<double>* output) const;
 
diff --git a/tensorflow/core/kernels/mfcc_mel_filterbank.h b/tensorflow/core/kernels/mfcc_mel_filterbank.h
index 33ea1bdb5bc3e2a2326913c99f2f6713bd82f096..a766a20cbca4a7772a62a2701334c87a5ed57531 100644
--- a/tensorflow/core/kernels/mfcc_mel_filterbank.h
+++ b/tensorflow/core/kernels/mfcc_mel_filterbank.h
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-// Basic class for applying a mel-scale filterbank to an input.
+// Basic class for applying a mel-scale mapping to a power spectrum.
 
 #ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_MFCC_MEL_FILTERBANK_H_
 #define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_MFCC_MEL_FILTERBANK_H_
@@ -32,8 +32,9 @@ class MfccMelFilterbank {
                   double lower_frequency_limit,
                   double upper_frequency_limit);
 
-  // Takes a magnitude spectrogram slice as input, computes a
-  // traingular mel filterbank and places the result in output.
+  // Takes a squared-magnitude spectrogram slice as input, computes a
+  // triangular-mel-weighted linear-magnitude filterbank, and places the result
+  // in output.
   void Compute(const std::vector<double>& input,
                std::vector<double>* output) const;
 
diff --git a/tensorflow/core/kernels/mfcc_op_test.cc b/tensorflow/core/kernels/mfcc_op_test.cc
index 57391128f9e1471d863b566bebf6f061dd68a415..43e2a4594f0d9ffa15fd072396c29afac2488029 100644
--- a/tensorflow/core/kernels/mfcc_op_test.cc
+++ b/tensorflow/core/kernels/mfcc_op_test.cc
@@ -31,8 +31,8 @@ limitations under the License.
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
-
-using namespace ops;  // NOLINT(build/namespaces)
+namespace ops {
+namespace {
 
 TEST(MfccOpTest, SimpleTest) {
   Scope root = Scope::DisabledShapeInferenceScope();
@@ -74,4 +74,6 @@ TEST(MfccOpTest, SimpleTest) {
       1e-3);
 }
 
+}  // namespace
+}  // namespace ops
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/mkl_aggregate_ops.cc b/tensorflow/core/kernels/mkl_aggregate_ops.cc
index 935eb81dd05897b49446cc285222a946be3d2931..44b94be3a05662db2d3c190f5955d13a45a6d299 100644
--- a/tensorflow/core/kernels/mkl_aggregate_ops.cc
+++ b/tensorflow/core/kernels/mkl_aggregate_ops.cc
@@ -19,7 +19,6 @@ limitations under the License.
 #define EIGEN_USE_THREADS
 
 #include <numeric>
-
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
@@ -29,10 +28,17 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
-namespace tensorflow {
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+using mkldnn::stream;
+using mkldnn::sum;
+#endif
 
+namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
+#ifndef INTEL_MKL_DNN
+
 template <typename Device, typename T>
 class MklAddNOp : public OpKernel {
  public:
@@ -41,17 +47,18 @@ class MklAddNOp : public OpKernel {
   void Compute(OpKernelContext* ctx) override {
     const int num = ctx->num_inputs();
     OP_REQUIRES(ctx, num / 2 == 2,
-                errors::InvalidArgument("Only additions of two arguments "
+                errors::InvalidArgument("Only additions of two tensors "
                                         "supported by MKL. Num inputs: ",
                                         num));
 
     MklAddNOpContext mkl_context;
-    const Tensor& input0 = MklGetInput(ctx, 0);
-    GetMklShape(ctx, 0, &(mkl_context.input1_shape));
+    size_t src1_idx = 0, src2_idx = 1;
+    const Tensor& input0 = MklGetInput(ctx, src1_idx);
+    GetMklShape(ctx, src1_idx, &(mkl_context.input1_shape));
     bool input1_in_mkl_format = mkl_context.input1_shape.IsMklTensor();
 
-    const Tensor& input1 = MklGetInput(ctx, 1);
-    GetMklShape(ctx, 1, &(mkl_context.input2_shape));
+    const Tensor& input1 = MklGetInput(ctx, src2_idx);
+    GetMklShape(ctx, src2_idx, &(mkl_context.input2_shape));
     bool input2_in_mkl_format = mkl_context.input2_shape.IsMklTensor();
 
     // handle the case of a scalar
@@ -59,13 +66,12 @@ class MklAddNOp : public OpKernel {
       const TensorShape& o_shape = input0.shape();
       Tensor* out_tensor = nullptr;
       mkl_context.output_shape.SetMklTensor(false);
-      AllocateOutputSetMklShape(ctx, 0, &out_tensor, o_shape,
+      AllocateOutputSetMklShape(ctx, src1_idx, &out_tensor, o_shape,
                                 mkl_context.output_shape);
       float user_i1 = (input0.scalar<T>()());
-      ;
       float user_i2 = (input1.scalar<T>()());
-      ;
-      out_tensor->scalar<T>()() = std::plus<float>{}(user_i1, user_i2);
+      out_tensor->scalar<T>()() =
+          std::plus<float>{}(user_i1, user_i2);
       return;
     }
 
@@ -82,8 +88,8 @@ class MklAddNOp : public OpKernel {
       if (o_shape.num_elements() == 0) {
         Tensor* out_tensor = nullptr;
         mkl_context.output_shape.SetMklTensor(false);
-        AllocateOutputSetMklShape(ctx, 0, &out_tensor, o_shape,
-                                  mkl_context.output_shape);
+        AllocateOutputSetMklShape(ctx, src1_idx, &out_tensor, o_shape,
+                                 mkl_context.output_shape);
         return;
       }
     }
@@ -92,9 +98,9 @@ class MklAddNOp : public OpKernel {
     mkl_context.in_strides = new size_t[mkl_context.in_dims];
     // Generate size, stride for input if input is in MKL format.
     if (input1_in_mkl_format || input2_in_mkl_format) {
-      const MklShape* tmp_mkl_shape = (input1_in_mkl_format)
-                                          ? &mkl_context.input1_shape
-                                          : &mkl_context.input2_shape;
+      const MklShape* tmp_mkl_shape =
+        (input1_in_mkl_format) ? &mkl_context.input1_shape :
+        &mkl_context.input2_shape;
       for (int i = 0; i < mkl_context.in_dims; i++) {
         mkl_context.in_sizes[i] = tmp_mkl_shape->GetSizes()[i];
         mkl_context.in_strides[i] = tmp_mkl_shape->GetStrides()[i];
@@ -110,7 +116,6 @@ class MklAddNOp : public OpKernel {
             mkl_context.in_strides[i - 1] * mkl_context.in_sizes[i - 1];
       }
     }
-
     std::vector<float> coeff(2, 1.0);
     mkl_context.MklCreateInputLayouts(ctx);
     CHECK_EQ(dnnSumCreate_F32(&mkl_context.Eltwise, mkl_context.attributes, 2,
@@ -127,7 +132,7 @@ class MklAddNOp : public OpKernel {
      mkl_context.output_shape.SetMklLayout(mkl_context.Eltwise, dnnResourceDst);
 
      mkl_context.output_shape.SetTfLayout(
-         mkl_context.in_dims, mkl_context.in_sizes, mkl_context.in_strides);
+        mkl_context.in_dims, mkl_context.in_sizes, mkl_context.in_strides);
      if (input1_in_mkl_format == true) {
       mkl_context.output_shape.SetTfDimOrder(mkl_context.in_dims,
       mkl_context.input1_shape.GetTfToMklDimMap());
@@ -139,12 +144,12 @@ class MklAddNOp : public OpKernel {
                         mkl_context.output_shape.GetMklLayout())) /
                     sizeof(T));
 
-     AllocateOutputSetMklShape(ctx, 0, &output, tf_shape,
+     AllocateOutputSetMklShape(ctx, src1_idx, &output, tf_shape,
                               mkl_context.output_shape);
     } else {
      const TensorShape& o_shape = input1.shape();
      mkl_context.output_shape.SetMklTensor(false);
-     AllocateOutputSetMklShape(ctx, 0, &output, o_shape,
+     AllocateOutputSetMklShape(ctx, src1_idx, &output, o_shape,
                                 mkl_context.output_shape);
     }
 
@@ -172,16 +177,18 @@ class MklAddNOp : public OpKernel {
     void MklCreateInputLayouts(OpKernelContext* context) {
       bool input1_in_mkl_format = input1_shape.IsMklTensor();
       if (!input1_in_mkl_format) {
-        CHECK_EQ(dnnLayoutCreate_F32(&lt_input1, in_dims, in_sizes, in_strides),
-                 E_SUCCESS);
+        CHECK_EQ(
+            dnnLayoutCreate_F32(&lt_input1, in_dims, in_sizes, in_strides),
+            E_SUCCESS);
       } else {
         lt_input1 = static_cast<dnnLayout_t>(input1_shape.GetCurLayout());
       }
 
       bool input2_in_mkl_format = input2_shape.IsMklTensor();
       if (!input2_in_mkl_format) {
-        CHECK_EQ(dnnLayoutCreate_F32(&lt_input2, in_dims, in_sizes, in_strides),
-                 E_SUCCESS);
+        CHECK_EQ(
+            dnnLayoutCreate_F32(&lt_input2, in_dims, in_sizes, in_strides),
+            E_SUCCESS);
       } else {
         lt_input2 = static_cast<dnnLayout_t>(input2_shape.GetCurLayout());
       }
@@ -257,8 +264,8 @@ class MklAddNOp : public OpKernel {
       bool input2_in_mkl_format = input2_shape.IsMklTensor();
       dnnDelete_F32(Eltwise);
       if (!input1_in_mkl_format || !input2_in_mkl_format) {
-        delete[] in_sizes;
-        delete[] in_strides;
+         delete [] in_sizes;
+         delete [] in_strides;
       }
       if (!input1_in_mkl_format) {
          dnnLayoutDelete_F32(lt_input1);
@@ -270,6 +277,204 @@ class MklAddNOp : public OpKernel {
   } MklAddNOpContext;
 };
 
+#else  // INTEL_MKL_DNN
+template <typename Device, typename T>
+class MklAddNOp : public OpKernel {
+ public:
+  ~MklAddNOp() {}
+  explicit MklAddNOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    const int num = ctx->num_inputs();
+    // Only additions of 2 input tensors is supported now
+    OP_REQUIRES(ctx, num / 2 == 2,
+                errors::InvalidArgument("Only additions of two tensors "
+                                        "supported by MKL. Num inputs: ",
+                                        num));
+
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      size_t src1_idx = 0, src2_idx = 1, output_idx = 0;
+      const Tensor& src1_tensor = MklGetInput(ctx, src1_idx);
+      const Tensor& src2_tensor = MklGetInput(ctx, src2_idx);
+
+      MklDnnShape src1_mkl_shape, src2_mkl_shape;
+      GetMklShape(ctx, src1_idx, &src1_mkl_shape);
+      GetMklShape(ctx, src2_idx, &src2_mkl_shape);
+      bool input1_in_mkl_format = src1_mkl_shape.IsMklTensor();
+      bool input2_in_mkl_format = src2_mkl_shape.IsMklTensor();
+      int src1_dims_size = input1_in_mkl_format?
+       src1_mkl_shape.GetDimension(): src1_tensor.dims();
+      int src2_dims_size = input2_in_mkl_format?
+       src2_mkl_shape.GetDimension(): src2_tensor.dims();
+
+      if (!input1_in_mkl_format && src1_dims_size == 0) {
+         Tensor* dst_tensor = nullptr;
+         MklShape mkl_shape_dst;
+         mkl_shape_dst.SetMklTensor(false);
+         AllocateOutputSetMklShape(ctx, output_idx, &dst_tensor,
+         src1_tensor.shape(), mkl_shape_dst);
+         float user_i1 = (src1_tensor.scalar<T>()());
+         float user_i2 = (src2_tensor.scalar<T>()());
+         dst_tensor->scalar<T>()() =
+           std::plus<float>{}(user_i1, user_i2);
+         return;
+       }
+
+      // If there is nothing to compute, return.
+      if (!input1_in_mkl_format && !input2_in_mkl_format) {
+        if (src1_tensor.shape().num_elements() == 0) {
+           Tensor* dst_tensor = nullptr;
+           MklShape mkl_shape_dst;
+           mkl_shape_dst.SetMklTensor(false);
+           AllocateOutputSetMklShape(ctx, output_idx, &dst_tensor,
+           src1_tensor.shape(), mkl_shape_dst);
+           return;
+        }
+      }
+
+      std::vector<double> coeff(2, 1.0);
+      MklDnnData<T> src1(&cpu_engine);
+      MklDnnData<T> src2(&cpu_engine);
+      MklDnnData<T> dst(&cpu_engine);
+
+      int tmp_size = input1_in_mkl_format ? src2_dims_size: src1_dims_size;
+      memory::dims dims(tmp_size);
+      memory::dims strides(tmp_size);
+      memory::desc md1({}, memory::data_undef, memory::format_undef);
+      memory::desc md2({}, memory::data_undef, memory::format_undef);
+
+      // For creating Sum primitive, we need to ensure that all inputs are in
+      // same format. What that means is if we have a mixed input case - where
+      // one input is in Tensorflow format and one input is in MKL format -,
+      // then we need to ensure that all inputs are in same format for
+      // primitive construction. For performance reason, we say that all inputs
+      // are in MKL format in such case, and insert reorder for input that is
+      // in Tensorflow format into MKL format. On the other hand, if both the
+      // inputs are in MKL format or both are in Tensorflow format, then we
+      // dont need reorder.
+      if (!input1_in_mkl_format && !input2_in_mkl_format) {
+        // If both the inputs are in Tensorflow format, we create blocked memory
+        // descriptor.
+        dims = TFShapeToMklDnnDims(src1_tensor.shape());
+        strides = CalculateTFStrides(dims);
+        md1 = MklDnnData<T>::CreateBlockedMemDesc(dims, strides);
+        md2 = md1;
+      } else if (input1_in_mkl_format && !input2_in_mkl_format) {
+        // If one input is in MKL format and other is in Tensorflow, then
+        // create respective descriptors describing the actual case. For input
+        // in Mkl format, we just get Mkl layout from MklDnnShape. For input in
+        // Tensorflow format, we create memory descriptor using data format.
+        md1 = src1_mkl_shape.GetMklLayout();
+
+        memory::format src1_mkl_data_format = src1_mkl_shape.GetTfDataFormat();
+        auto src1_tf_data_format = MklDnnDataFormatToTFDataFormat(
+                                    src1_mkl_data_format);
+        auto src2_dims = TFShapeToMklDnnDimsInNCHW(src2_tensor.shape(),
+                                    src1_tf_data_format);
+        md2 = memory::desc(src2_dims, MklDnnType<T>(),
+                           src1_mkl_data_format);
+      } else if (input2_in_mkl_format && !input1_in_mkl_format) {
+        // Same comment as above.
+        memory::format src2_mkl_data_format = src2_mkl_shape.GetTfDataFormat();
+        auto src2_tf_data_format = MklDnnDataFormatToTFDataFormat(
+                                     src2_mkl_data_format);
+        auto src1_dims = TFShapeToMklDnnDimsInNCHW(src1_tensor.shape(),
+                                    src2_tf_data_format);
+        md1 = memory::desc(src1_dims, MklDnnType<T>(),
+                           src2_mkl_data_format);
+
+        md2 = src2_mkl_shape.GetMklLayout();
+      } else {
+        // If both the inputs are in MKL format, we use Mkl layout of the input
+        // tensors.
+        md1 = src1_mkl_shape.GetMklLayout();
+        md2 = src2_mkl_shape.GetMklLayout();
+      }
+      src1.SetUsrMem(md1, &src1_tensor);
+      src2.SetUsrMem(md2, &src2_tensor);
+
+      // As per comment above, we tell MKLDNN that both the inputs are in same
+      // format. So we set common memory descriptor in MKL format, if any of the
+      // inputs are in MKL format. Let's get memory descriptor that we will use
+      // for both the inputs.
+      // We set output memory descriptor in MKL format, if any of the
+      // inputs are in MKL format.
+      memory::desc common_md({}, memory::data_undef, memory::format_undef);
+      if (input1_in_mkl_format || input2_in_mkl_format) {
+        common_md = input1_in_mkl_format ? md1 : md2;
+        dst.SetUsrMem(common_md);
+      } else {
+        // Since both the inputs are in Tensorflow format, and have
+        // same shape, we can get memory descriptor from any input.
+        common_md = md1;
+        dst.SetUsrMem(common_md);
+      }
+
+      std::vector<memory::primitive_desc> srcs_pd;
+      // Memory descriptor for 1st input
+      srcs_pd.push_back(memory::primitive_desc(common_md, cpu_engine));
+      // Memory descriptor for 2nd input
+      srcs_pd.push_back(memory::primitive_desc(common_md, cpu_engine));
+      auto sum_pd = sum::primitive_desc(dst.GetUsrMemDesc(), coeff, srcs_pd);
+
+      // Now we setup resources for primitive execution.
+      // First, we need to check if any of the inputs need to be reordered as
+      // per the logic described above. Since output will be in MKL format if
+      // atleast one input is in MKL format, we choose output descriptor for
+      // reorder.
+      std::vector<primitive::at> inputs;
+      std::vector<primitive> net;
+      // Check if actual input format of the tensor is different than common_pd
+      // we told MKLDNN. In that case, we will need reorder.
+      src1.CheckReorderToOpMem(srcs_pd[0], &net);
+      src2.CheckReorderToOpMem(srcs_pd[1], &net);
+      inputs.push_back(src1.GetOpMem());
+      inputs.push_back(src2.GetOpMem());
+
+      // Allocate output tensor now.
+      Tensor* dst_tensor = nullptr;
+      MklDnnShape output_mkl_shape;
+      TensorShape output_tf_shape;
+
+      if (input2_in_mkl_format || input1_in_mkl_format) {
+        output_mkl_shape.SetMklTensor(true);
+        auto output_pd = dst.GetUsrMemPrimDesc();
+        output_mkl_shape.SetMklLayout(&output_pd);
+        output_mkl_shape.SetElemType(MklDnnType<T>());
+        if (input1_in_mkl_format) {
+          output_mkl_shape.SetTfLayout(src1_dims_size,
+                                       src1_mkl_shape.GetSizesAsMklDnnDims(),
+                                       src1_mkl_shape.GetTfDataFormat());
+        } else {
+          output_mkl_shape.SetTfLayout(src2_dims_size,
+                                       src2_mkl_shape.GetSizesAsMklDnnDims(),
+                                       src2_mkl_shape.GetTfDataFormat());
+        }
+        output_tf_shape.AddDim((output_pd.get_size() / sizeof(T)));
+      } else {
+        output_mkl_shape.SetMklTensor(false);
+        output_tf_shape = src1_tensor.shape();
+      }
+      AllocateOutputSetMklShape(ctx, output_idx, &dst_tensor,
+                                output_tf_shape, output_mkl_shape);
+      dst.SetUsrMemDataHandle(dst_tensor);
+
+      // Create Sum op, and submit net for execution.
+      net.push_back(sum(sum_pd, inputs, dst.GetOpMem()));
+      stream(stream::kind::eager).submit(net).wait();
+    } catch (mkldnn::error &e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                       ", message: " + string(e.message) +
+                       ", in file " + string(__FILE__) + ":" +
+                       std::to_string(__LINE__);
+      OP_REQUIRES_OK(ctx, errors::Aborted("Operation received an exception:",
+                                            error_msg));
+    }
+  }
+};
+
+#endif
 #define REGISTER_MKL_CPU(T)                                         \
   REGISTER_KERNEL_BUILDER(Name("_MklAddN")                          \
                               .Device(DEVICE_CPU)                   \
diff --git a/tensorflow/core/kernels/mkl_avgpooling_op.cc b/tensorflow/core/kernels/mkl_avgpooling_op.cc
index d90baee069c17e9b25169dcb2650681f6103f9b1..d751a70fc86b40d8ca656322484848cf906359fd 100644
--- a/tensorflow/core/kernels/mkl_avgpooling_op.cc
+++ b/tensorflow/core/kernels/mkl_avgpooling_op.cc
@@ -24,10 +24,25 @@
 
 #include "tensorflow/core/kernels/mkl_pooling_ops_common.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+using mkldnn::memory;
+using mkldnn::error;
+using mkldnn::pooling_forward;
+using mkldnn::pooling_backward;
+using mkldnn::padding_kind;
+using mkldnn::engine;
+using mkldnn::prop_kind;
+using mkldnn::algorithm;
+#endif
+
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
+// For now, MKL-ML is default. So making MKL-DNN not a default choice.
+#ifndef INTEL_MKL_DNN
+
 template <typename Device, typename T>
 class MklAvgPoolingOp : public OpKernel {
  public:
@@ -132,7 +147,7 @@ class MklAvgPoolingOp : public OpKernel {
         E_SUCCESS);
 
     mkl_context.MklCleanup();
-  }
+  }  // Compute
 
  private:
   typedef struct {
@@ -411,7 +426,293 @@ class MklAvgPoolingGradOp : public OpKernel {
   std::vector<int32> stride_;
   Padding padding_;
   TensorFormat data_format_;
-};
+};  // MklAvgPoolingGradOp
+
+
+#else  // INTEL_MKL_DNN is defined
+
+template <typename Device, typename T>
+class MklAvgPoolingOp : public MklPoolingForwardOpBase<T> {
+ public:
+  explicit MklAvgPoolingOp(OpKernelConstruction* context)
+  : MklPoolingForwardOpBase<T>(context) {
+    // Workspace is an MKLDNN construct that is only used in Max Pooling.
+    // So set workspace_enabled_ to false.
+    this->workspace_enabled_ = false;
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      const Tensor& input_tensor = MklGetInput(context,
+              this->kInputTensorIndexInput);
+      MklDnnShape dnn_shape_input;
+      GetMklShape(context, this->kInputTensorIndexInput, &dnn_shape_input);
+      this->SanityCheckInput(context, input_tensor, dnn_shape_input);
+      if (!context->status().ok()) return;
+
+      MklDnnData<T> dnn_data_input(&cpu_engine);
+      MklDnnData<T> dnn_data_output(&cpu_engine);
+
+      // initialize variables for the pooling op
+      MklPoolParameters pool_params;
+      // Get the input tensor and initialize the pooling parameters
+      this->ConfigureInput(context, dnn_shape_input,
+                          input_tensor, &pool_params,
+                          &dnn_data_input);
+      OP_REQUIRES_OK(context, context->status());
+
+      // Declare output tensor
+      Tensor* output_tensor = nullptr;
+      memory::dims output_dims_mkl_order;
+      this->GetOutputDims(pool_params, &output_dims_mkl_order);
+
+      // If input is in Mkl layout, then just get the memory format from it
+      // directly, instead of using input data_format to AvgPool.
+      if (dnn_shape_input.IsMklTensor()) {
+        dnn_data_output.SetUsrMem(output_dims_mkl_order,
+                static_cast<memory::format>(dnn_data_input.GetUsrMemDesc()
+                    .data.format));
+
+      } else {
+          dnn_data_output.SetUsrMem(output_dims_mkl_order,
+              this->data_format_mkldnn_);
+      }
+
+        // describe the memory layout
+      dnn_data_output.SetOpMemDesc(output_dims_mkl_order, memory::format::any);
+
+      // 3. create a pooling primitive descriptor
+      auto pool_desc = pooling_forward::desc(prop_kind::forward,
+              algorithm::pooling_avg_exclude_padding,
+              dnn_data_input.GetUsrMemDesc(),
+              dnn_data_output.GetUsrMemDesc(),
+              memory::dims({  pool_params.row_stride,
+                              pool_params.col_stride}),
+              memory::dims({  pool_params.window_rows,
+                              pool_params.window_cols}),
+              memory::dims({  static_cast<int>(pool_params.pad_top),
+                              static_cast<int>(pool_params.pad_left)}),
+              memory::dims({  static_cast<int>(pool_params.pad_bottom),
+                              static_cast<int>(pool_params.pad_right)}),
+              TFPaddingToMklDnnPadding(this->padding_));
+      auto pool_prim_desc = pooling_forward::primitive_desc(pool_desc,
+                                                 cpu_engine);
+
+      this->AllocateOutputTensor(context, pool_prim_desc, output_dims_mkl_order,
+                            this->data_format_mkldnn_, &output_tensor);
+      CHECK_NOTNULL(output_tensor);
+
+      OP_REQUIRES_OK(context, context->status());
+      dnn_data_output.SetUsrMemDataHandle(output_tensor);
+
+      this->PrepareAndExecuteNet(pool_prim_desc,
+                                &dnn_data_input,
+                                &dnn_data_output);
+    } catch (mkldnn::error &e) {
+        string error_msg = "Status: " + std::to_string(e.status) +
+                        ", message: " + string(e.message) +
+                        ", in file " + string(__FILE__) + ":" +
+                        std::to_string(__LINE__);
+        OP_REQUIRES_OK(context,
+                        errors::Aborted("Operation received an exception:",
+                                         error_msg));
+    }
+  }  // Compute
+};  // MklAvgPoolingOp
+
+//-----------------------------------------------------------------------------
+
+template <class Device, class T>
+class MklAvgPoolingGradOp : public MklPoolingBackwardOpBase<T> {
+ public:
+  explicit MklAvgPoolingGradOp(OpKernelConstruction* context)
+      : MklPoolingBackwardOpBase<T>(context) {
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      MklDnnShape original_input_mkl_shape, input_gradient_mkl_shape;
+      const Tensor& tensor_in_shape = MklGetInput(context,
+          kInputTensorIndexInputShape);
+      const Tensor& input_gradient_tensor = MklGetInput(context,
+          kInputTensorIndexInputGradient);
+      GetMklShape(context, kInputTensorIndexInputShape,
+            &original_input_mkl_shape);
+      GetMklShape(context, kInputTensorIndexInputGradient,
+            &input_gradient_mkl_shape);
+
+
+      SanityCheckInputs(context, tensor_in_shape,
+                        input_gradient_tensor,
+                        original_input_mkl_shape,
+                        input_gradient_mkl_shape);
+      if (!context->status().ok()) return;
+
+      // Used to allocate output_diff_src/diff_src
+      // and create pool_fwd mdm desc
+      // 0. Input("orig_input_shape: int32") //NOT a T Tensor!
+      // 1. Input("grad: T")
+
+      MklDnnData<T> input_gradient_diff_dst(&cpu_engine);
+      MklDnnData<T> output_diff_src(&cpu_engine);
+      Tensor* output_tensor_diff_src = nullptr;
+      TensorShape original_input_shape;
+      MklPoolParameters pool_params;
+      memory::dims output_dims_mkl_order, original_input_dims_nchw;
+      // Configure the original input memory descriptor
+      memory::desc original_input_md = ConfigureOriginalInput(context,
+                                      tensor_in_shape,
+                                      original_input_mkl_shape,
+                                      &original_input_dims_nchw,
+                                      &pool_params,
+                                      &original_input_shape);
+
+      // configure the original output memory descriptor
+      // by definition, the shape of the original output is the same
+      // as the shape of the gradient diff_dst
+      memory::desc original_output_md = this->ConfigureOriginalOutput(
+                pool_params, input_gradient_mkl_shape, output_dims_mkl_order);
+
+      memory::desc target_diff_dst_md = this->ConfigureInputGradient(
+                                    input_gradient_mkl_shape,
+                                    input_gradient_tensor,
+                                    &input_gradient_diff_dst,
+                                    original_output_md);
+      // The shape of the output diff src needs to be the same shape as the
+      // original input. But we will set its format to be same as the format of
+      // input gradient. We won't use format of original input since it will
+      // always be in Tensorflow layout (given that AvgPoolGrad gets shape of
+      // the input rather than actual input).
+      output_diff_src.SetUsrMem(original_input_dims_nchw,
+                                static_cast<memory::format>(
+                                  target_diff_dst_md.data.format));
+
+      // Create the forward pooling primitive descriptor so we can reference it
+      // in the backward pooling primitive descriptor
+      auto pool_fwd_desc = pooling_forward::desc(prop_kind::forward,
+              algorithm::pooling_avg_exclude_padding,
+              original_input_md,
+              original_output_md,
+              memory::dims({  pool_params.row_stride,
+                              pool_params.col_stride}),
+              memory::dims({  pool_params.window_rows,
+                              pool_params.window_cols}),
+              memory::dims({  static_cast<int>(pool_params.pad_top),
+                              static_cast<int>(pool_params.pad_left)}),
+              memory::dims({  static_cast<int>(pool_params.pad_bottom),
+                              static_cast<int>(pool_params.pad_right)}),
+              TFPaddingToMklDnnPadding(this->padding_));
+      auto pool_fwd_prim_desc
+              = pooling_forward::primitive_desc(pool_fwd_desc,
+                                                  cpu_engine);
+
+      auto pool_bkwd_desc = pooling_backward::desc(
+              algorithm::pooling_avg_exclude_padding,
+              output_diff_src.GetUsrMemDesc(),
+              target_diff_dst_md,
+              memory::dims({  pool_params.row_stride,
+                              pool_params.col_stride}),
+              memory::dims({  pool_params.window_rows,
+                              pool_params.window_cols}),
+              memory::dims({  static_cast<int>(pool_params.pad_top),
+                              static_cast<int>(pool_params.pad_left)}),
+              memory::dims({  static_cast<int>(pool_params.pad_bottom),
+                              static_cast<int>(pool_params.pad_right)}),
+              TFPaddingToMklDnnPadding(this->padding_));
+      auto pool_bkwd_prim_desc
+                = pooling_backward::primitive_desc(pool_bkwd_desc,
+                                              cpu_engine,
+                                              pool_fwd_prim_desc);
+      this->AllocateOutputTensor(context, pool_bkwd_prim_desc,
+                      original_input_dims_nchw,
+                      this->data_format_mkldnn_,
+                      &output_tensor_diff_src);
+
+      output_diff_src.SetUsrMemDataHandle(output_tensor_diff_src);
+
+      this->PrepareAndExecuteNet(pool_bkwd_prim_desc,
+                          &input_gradient_diff_dst,
+                          &output_diff_src,
+                          memory::primitive_desc(
+                              target_diff_dst_md,
+                              cpu_engine));
+    } catch (mkldnn::error &e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                      ", message: " + string(e.message) +
+                      ", in file " + string(__FILE__) + ":" +
+                      std::to_string(__LINE__);
+      OP_REQUIRES_OK(context,
+                      errors::Aborted("Compute received an exception:",
+                                      error_msg));
+    }
+  }  // Compute
+
+ private:
+  // 0. Input("orig_input_shape: int32")
+  // 1. Input("grad: T")
+  const int kInputTensorIndexInputShape = 0;
+  const int kInputTensorIndexInputGradient = 1;
+
+  memory::desc ConfigureOriginalInput(OpKernelContext* context,
+        const Tensor& tensor_original_input_shape,
+        const MklDnnShape& original_input_mkl_shape,
+        memory::dims* original_input_dims_mkl_order,
+        MklPoolParameters* pool_params,
+        TensorShape* input_tensor_shape) {
+    CHECK_NOTNULL(original_input_dims_mkl_order);
+    CHECK_NOTNULL(pool_params);
+    CHECK_NOTNULL(input_tensor_shape);
+    // For AvgPoolGrad, we only get the size of the original input because
+    // The original data is irrelvant.
+    auto shape_vec = tensor_original_input_shape.vec<int32>();
+    for (int64 i = 0; i < tensor_original_input_shape.NumElements(); ++i) {
+      input_tensor_shape->AddDim(shape_vec(i));
+    }
+
+    return MklPoolingBackwardOpBase<T>::ConfigureOriginalInput(
+                                              context,
+                                              tensor_original_input_shape,
+                                              original_input_mkl_shape,
+                                              original_input_dims_mkl_order,
+                                              pool_params,
+                                              *input_tensor_shape);
+}
+
+  void SanityCheckInputs(OpKernelContext* context,
+                        const Tensor& tensor_in_shape,
+                        const Tensor& input_gradient_tensor,
+                        const MklDnnShape& original_input_mkl_shape,
+                        const MklDnnShape& input_gradient_mkl_shape) {
+    if (!original_input_mkl_shape.IsMklTensor()) {
+      OP_REQUIRES(context, tensor_in_shape.dims() == 1 &&
+          tensor_in_shape.NumElements() == 4,
+          errors::InvalidArgument("original input shape must be "
+                "1-dimensional and 4 elements"));
+    } else {
+      OP_REQUIRES(context, original_input_mkl_shape.GetDimension() == 1 &&
+          original_input_mkl_shape.DimSize(0) == 4,
+          errors::InvalidArgument("original input shape must be "
+                "1-dimensional and 4 elements"));
+    }
+
+    if (!input_gradient_mkl_shape.IsMklTensor()) {
+      // For avgpooling, input_gradient_diff_dst should have 4 dimensions.
+      OP_REQUIRES(context, input_gradient_tensor.dims() == 4,
+          errors::InvalidArgument("Gradient shape must be "
+                              "4-dimensional"));
+    } else {
+      OP_REQUIRES(context, input_gradient_mkl_shape.GetDimension() == 4,
+          errors::InvalidArgument("Gradient shape must be "
+                              "4-dimensional"));
+    }
+  }
+};  // MklAvgPoolingGradOp
+
+
+
+#endif  // INTEL_MKL_DNN
 
 REGISTER_KERNEL_BUILDER(Name("_MklAvgPool")
                             .Device(DEVICE_CPU)
@@ -427,3 +728,4 @@ REGISTER_KERNEL_BUILDER(Name("_MklAvgPoolGrad")
 
 }  // namespace tensorflow
 #endif  // INTEL_MKL
+
diff --git a/tensorflow/core/kernels/mkl_batch_matmul_op.cc b/tensorflow/core/kernels/mkl_batch_matmul_op.cc
index 138acdf29885cbd526086ce092e930b0c24aac13..9fee94f946555480fce8acf904a7909622404524 100644
--- a/tensorflow/core/kernels/mkl_batch_matmul_op.cc
+++ b/tensorflow/core/kernels/mkl_batch_matmul_op.cc
@@ -28,6 +28,7 @@ limitations under the License.
 #if defined(INTEL_MKL)
 #include <vector>
 #include "mkl_cblas.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/numeric_types.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -72,10 +73,10 @@ class BatchMatMulMkl : public OpKernel {
     TensorShape out_shape;
     for (int i = 0; i < ndims - 2; ++i) {
       OP_REQUIRES(ctx, lhs.dim_size(i) == rhs.dim_size(i),
-                  errors::InvalidArgument("lhs.dim(", i, ") and rhs.dim(", i,
-                                          ") must be the same: ",
-                                          lhs.shape().DebugString(), " vs ",
-                                          rhs.shape().DebugString()));
+                  errors::InvalidArgument(
+                      "lhs.dim(", i, ") and rhs.dim(", i,
+                      ") must be the same: ", lhs.shape().DebugString(), " vs ",
+                      rhs.shape().DebugString()));
       out_shape.AddDim(lhs.dim_size(i));
     }
     auto batch_size = (ndims == 2) ? 1 : out_shape.num_elements();
@@ -109,7 +110,7 @@ class BatchMatMulMkl : public OpKernel {
     const uint64 M = lhs_reshaped.dimension(adj_x_ ? 2 : 1);
     const uint64 K = lhs_reshaped.dimension(adj_x_ ? 1 : 2);
     const uint64 N = rhs_reshaped.dimension(adj_y_ ? 1 : 2);
-    
+
     std::vector<MKL_INT> m_array(batch_size, M);
     std::vector<MKL_INT> n_array(batch_size, N);
     std::vector<MKL_INT> k_array(batch_size, K);
@@ -128,7 +129,7 @@ class BatchMatMulMkl : public OpKernel {
       b_array.push_back(&rhs_reshaped(i, 0, 0));
       c_array.push_back(&out_reshaped(i, 0, 0));
     }
-    
+
     MklCblasGemmBatch(CblasRowMajor, adj_x_, adj_y_, &m_array[0], &n_array[0],
                       &k_array[0], &a_array[0], &lda_array[0], &b_array[0],
                       &ldb_array[0], &c_array[0], &ldc_array[0], 1,
diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl_concat_op.cc
index e6673b2ffb7dc4a2e0127c363b4402c98a023b17..d109bb6bcfe6360af12086bad452752336357f35 100644
--- a/tensorflow/core/kernels/mkl_concat_op.cc
+++ b/tensorflow/core/kernels/mkl_concat_op.cc
@@ -1,11 +1,8 @@
 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
-
     http://www.apache.org/licenses/LICENSE-2.0
-
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -33,11 +30,22 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+
+using mkldnn::stream;
+using mkldnn::concat;
+#endif
+
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
+// List of TensorShape objects. Used in Concat/Split layers.
+typedef std::vector<TensorShape> TensorShapeList;
+
 enum AxisArgumentName { NAME_IS_AXIS, NAME_IS_CONCAT_DIM };
 
+
 // TODO(intelft) Check if we can reuse existing EigenConcatOp using Mutable
 // reference inputs.
 // --------------------------------------------------------------------------
@@ -55,6 +63,8 @@ class EigenConcatBaseOp : public OpKernel {
   // we need to have empty Compute because Compute is pure virtual function.
   void Compute(OpKernelContext* c) {}
 
+#ifndef INTEL_MKL_DNN
+
   void Compute(OpKernelContext* c, const std::vector<Tensor>& values) {
     const Tensor* concat_dim_tensor;
     const char* axis_attribute_name =
@@ -139,8 +149,89 @@ class EigenConcatBaseOp : public OpKernel {
       ConcatCPU<T>(c->device(), inputs_flat, &output_flat);
     }
   }
+
+#else  // MKL_DNN
+
+void Compute(OpKernelContext* c, const std::vector<Tensor>& values,
+                        const TensorShapeList& input_shapes) {
+    const Tensor* concat_dim_tensor;
+    const char* axis_attribute_name =
+        AxisArgName == NAME_IS_AXIS
+            ? "axis"
+            : AxisArgName == NAME_IS_CONCAT_DIM ? "concat_dim" : "<invalid>";
+    OP_REQUIRES_OK(c, c->input(axis_attribute_name, &concat_dim_tensor));
+    OP_REQUIRES(c, IsLegacyScalar(concat_dim_tensor->shape()),
+                errors::InvalidArgument(
+                    axis_attribute_name,
+                    " tensor should be a scalar integer, but got shape ",
+                    concat_dim_tensor->shape().DebugString()));
+    const int32 concat_dim =
+        internal::SubtleMustCopy(concat_dim_tensor->scalar<int32>()());
+    // Instead of accessing values from context, we use input to Compute.
+    const int N = values.size();
+    const int input_dims = input_shapes[0].dims();
+    const TensorShape& input_shape = input_shapes[0];
+
+    int32 axis = concat_dim < 0 ? concat_dim + input_dims : concat_dim;
+    OP_REQUIRES(c,
+                (0 <= axis && axis < input_dims) ||
+                    (allow_legacy_scalars() && concat_dim == 0),
+                errors::InvalidArgument(
+                    "ConcatOp : Expected concatenating dimensions in the range "
+                    "[",
+                    -input_dims, ", ", input_dims, "), but got ", concat_dim));
+    // Note that we reduce the concat of n-dimensional tensors into a two
+    // dimensional concat. Assuming the dimensions of any input/output
+    // tensor are {x0, x1,...,xn-1, y0, y1,...,ym-1}, where the concat is along
+    // the dimension indicated with size y0, we flatten it to {x, y}, where y =
+    // Prod_i(yi) and x = ((n > 0) ? Prod_i(xi) : 1).
+    ConstMatrixVector inputs_flat;
+    inputs_flat.reserve(N);
+    int64 inputs_flat_dim0 = 1;
+    for (int d = 0; d < axis; ++d) {
+      inputs_flat_dim0 *= input_shape.dim_size(d);
+    }
+    int64 output_concat_dim = 0;
+    const bool input_is_scalar = IsLegacyScalar(input_shape);
+    for (int i = 0; i < N; ++i) {
+      const auto in = values[i];
+      const bool in_is_scalar = IsLegacyScalar(input_shapes[i]);
+      OP_REQUIRES(
+          c, (input_shapes[i].dims() == input_dims) ||
+              (input_is_scalar && in_is_scalar),
+          errors::InvalidArgument(
+              "ConcatOp : Ranks of all input tensors should match: shape[0] = ",
+              input_shape.DebugString(), " vs. shape[", i,
+              "] = ", input_shapes[i].DebugString()));
+      if (in.NumElements() > 0) {
+        int64 inputs_flat_dim1 = in.NumElements() / inputs_flat_dim0;
+        inputs_flat.emplace_back(new typename TTypes<T, 2>::ConstMatrix(
+            in.shaped<T, 2>({inputs_flat_dim0, inputs_flat_dim1})));
+      }
+      output_concat_dim += input_shapes[i].dims() > 0 ?
+                           input_shapes[i].dim_size(axis) : 1;
+    }
+
+    TensorShape output_shape(input_shape);
+    if (output_shape.dims() == 0) {
+      output_shape.AddDim(output_concat_dim);
+    } else {
+      output_shape.set_dim(axis, output_concat_dim);
+    }
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(c, c->allocate_output(0, output_shape, &output));
+    if (output->NumElements() > 0) {
+      int64 output_dim1 = output->NumElements() / inputs_flat_dim0;
+      auto output_flat = output->shaped<T, 2>({inputs_flat_dim0, output_dim1});
+      ConcatCPU<T>(c->device(), inputs_flat, &output_flat);
+    }
+  }
+
+#endif
 };
 
+#ifndef INTEL_MKL_DNN
+
 // --------------------------------------------------------------------------
 //                      Mkl Concat Op
 // --------------------------------------------------------------------------
@@ -327,6 +418,7 @@ class MklConcatOp : public OpKernel {
     OP_REQUIRES_OK(context, context->status());
   }
 
+
  private:
   typedef struct {
     TensorFormat data_format;
@@ -435,8 +527,282 @@ class MklConcatOp : public OpKernel {
         mkl_tensor->flat<uint8>().data(),
         mkl_tensor->flat<uint8>().size() * sizeof(uint8));
   }
+
+  // overloading methods with input shapes as a list of TensorShape's
+  void CallEigenVersion(OpKernelContext* context, const OpInputList& values,
+                        const TensorShapeList& input_shapes) {
+    CHECK_EQ(values.size(), input_shapes.size());
+
+    std::vector<Tensor> converted_values;
+    for (int i = 0; i < input_shapes.size(); i++) {
+      converted_values.push_back(values[i]);
+    }
+
+    // Call Eigen concat.
+    eigen_concat_op_.Compute(context, converted_values);
+
+    // Set dummy Mkl tensor as output Mkl tensor for this op.
+    MklShape mkl_tensor_mkl_shape;
+    mkl_tensor_mkl_shape.SetMklTensor(false);
+    mkl_tensor_mkl_shape.SetDimensions(4);
+    Tensor* mkl_tensor = nullptr;
+    TensorShape mkl_tensor_tf_shape;
+    mkl_tensor_tf_shape.AddDim(
+        SIZE_OF_MKL_SERIAL_DATA(mkl_tensor_mkl_shape.GetDimension()));
+    int tf_output_index = 0;
+    context->allocate_output(
+        GetTensorMetaDataIndex(tf_output_index, context->num_outputs()),
+        mkl_tensor_tf_shape, &mkl_tensor);
+    mkl_tensor_mkl_shape.SerializeMklShape(
+        mkl_tensor->flat<uint8>().data(),
+        mkl_tensor->flat<uint8>().size() * sizeof(uint8));
+  }
 };
 
+#else
+
+// --------------------------------------------------------------------------
+//                      Mkl Concat Op
+// --------------------------------------------------------------------------
+
+template <typename Device, typename T, AxisArgumentName AxisArgName>
+class MklConcatOp : public OpKernel {
+ private:
+  TensorFormat data_format_;
+  EigenConcatBaseOp<Device, T, AxisArgName> eigen_concat_op_;
+
+ public:
+  typedef std::vector<std::unique_ptr<typename TTypes<T, 2>::ConstMatrix>>
+      ConstMatrixVector;
+
+  explicit MklConcatOp(OpKernelConstruction* c)
+      : OpKernel(c), eigen_concat_op_(c) {}
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      OpInputList input_tensors;
+      GetMklInputList(context, "values", &input_tensors);
+      const int N = input_tensors.size();
+
+      // Get Tensor shapes.
+      std::vector<MklDnnShape> input_shapes(N);
+      GetMklShapeList(context, "values", &input_shapes);
+
+      const Tensor& concat_dim_tensor = (AxisArgName == NAME_IS_CONCAT_DIM)
+                    ? MklGetInput(context, 0) : MklGetInput(context, N);
+      // Sanity checks
+      OP_REQUIRES(context, IsLegacyScalar(concat_dim_tensor.shape()),
+        errors::InvalidArgument(
+            "Concat dim tensor should be a scalar integer, but got shape ",
+            concat_dim_tensor.shape().DebugString()));
+      int32 concat_dim = internal::SubtleMustCopy(
+                           concat_dim_tensor.scalar<int32>()());
+
+      // check that ranks of all tensors match
+      // and that their shapes match except for concat_dim.
+      int i = 0;
+      bool invoke_eigen = false;
+      bool are_all_mkl_inputs = true, are_all_tf_inputs = true;
+      const TensorShape expected_shape = input_shapes[0].IsMklTensor() ?
+                                         input_shapes[0].GetTfShape() :
+                                         input_tensors[0].shape();
+      size_t expected_dims = expected_shape.dims();
+
+      if (concat_dim < 0) concat_dim = expected_dims + concat_dim;
+
+      for (auto& s : input_shapes) {
+        if (s == expected_shape) {++i; continue;}
+
+        TensorShape s_shape = s.IsMklTensor() ? s.GetTfShape() :
+                      input_tensors[i].shape();
+        size_t s_dims = s_shape.dims();
+
+        OP_REQUIRES(context, s_dims == expected_dims,
+                  errors::InvalidArgument(
+                      "_MklConcatOp : Ranks of all input tensors should match:"
+                      " input dimensions = ",
+                      s_dims, " vs. expected rank = ", expected_dims));
+
+        for (int d = 0; d < expected_dims; ++d) {
+          if (d == concat_dim) continue;
+
+          size_t expected_size = expected_shape.dim_size(d);
+          size_t s_size = s_shape.dim_size(d);
+          OP_REQUIRES(
+            context, expected_size == s_size,
+            errors::InvalidArgument("_MklConcatOp : Dimensions of inputs "
+                    "should match: shape[0][", d, "]= ", expected_size,
+                    " vs. shape[", i, "][", d, "] = ", s_size));
+        }
+
+        if (s.IsMklTensor())
+          are_all_tf_inputs = false;
+        else
+          are_all_mkl_inputs = false;
+
+        if (s_dims != 4) invoke_eigen = true;
+        ++i;
+      }
+
+      // All inputs are not in one format (TF or MKL). This is mixed input case.
+      // We can potentially optimize this case by converting all TF inputs
+      // to Mkl format. But currently, we fall to Eigen for this case.
+      // It may be possible to convert inputs that in TF format to Mkl
+      // format and avoid calling eigen version.
+      if (!are_all_tf_inputs && !are_all_mkl_inputs) invoke_eigen = true;
+
+      // Call Eigen library
+      if (invoke_eigen) {
+        TensorShapeList tf_input_shapes;
+        i = 0;
+        for (auto& s : input_shapes) {
+          TensorShape s_shape = s.IsMklTensor() ? s.GetTfShape() :
+                                input_tensors[i].shape();
+          tf_input_shapes.push_back(s_shape);
+          ++i;
+        }
+        CallEigenVersion(context, input_tensors, tf_input_shapes);
+        return;
+      }
+
+      memory::dims dst_dims;
+      if (are_all_mkl_inputs)
+        dst_dims = TFShapeToMklDnnDims(input_shapes[0].GetTfShape());
+      else
+        // When all the inputs are in Tensorflow format, we don't know
+        // what is the input data format. In that case, we just use
+        // output format that is same as input formats.
+        dst_dims = TFShapeToMklDnnDims(input_tensors[0].shape());
+
+      std::vector<memory::primitive_desc> srcs_pd;
+      std::vector<MklDnnData<T>> srcs(N, MklDnnData<T>(&cpu_engine));
+      int64 dst_concat_dim_size = 0;
+      for (int k =0; k < N; k++) {
+        bool is_mkl_tensor = input_shapes[k].IsMklTensor();
+        memory::dims src_dims;
+
+        // Same comment as dst_dims for src_dims.
+        src_dims = (is_mkl_tensor) ?
+                   TFShapeToMklDnnDims(input_shapes[k].GetTfShape()) :
+                   TFShapeToMklDnnDims(input_tensors[k].shape());
+
+        dst_concat_dim_size += src_dims[concat_dim];
+        auto src_md = is_mkl_tensor ? input_shapes[k].GetMklLayout() :
+          // It does not matter what data format we use here (NHWC or NCHW).
+          // We just need to ensure that output of Concat uses same data format
+          // as input.
+                  memory::desc(src_dims, MklDnnType<T>(), memory::format::nchw);
+
+        srcs[k].SetUsrMem(src_md, &input_tensors[k]);
+        auto src_mpd = srcs[k].GetUsrMemPrimDesc();
+        srcs_pd.push_back(src_mpd);
+      }
+      dst_dims[concat_dim] = dst_concat_dim_size;
+
+      MklDnnData<T> dst(&cpu_engine);
+      memory::desc dst_md({}, memory::data_undef, memory::format_undef);
+      memory::dims dst_dims_in_nchw;
+      if (are_all_mkl_inputs) {
+        // Since we are passing a specific format for destination,
+        // we need to have dst_dims in MklDnn order (NCHW).
+        auto orig_tf_format = input_shapes[0].GetTfDataFormat();
+        dst_dims_in_nchw = MklDnnDimsInNCHW(dst_dims,
+                               MklDnnDataFormatToTFDataFormat(orig_tf_format));
+        // We will set the output in the same format as input to avoid layout
+        // conversions.
+        // Currently we are setting dst format same as input format.
+        // See if we can make this choice in a better way.
+        dst_md = memory::desc(dst_dims_in_nchw, MklDnnType<T>(),
+                 (memory::format) input_shapes[0].GetMklLayout().data.format);
+      } else {
+        // Again, format does not matter here. We just need to make it same as
+        // input format.
+        dst_md = memory::desc(dst_dims, MklDnnType<T>(), memory::format::nchw);
+      }
+
+      std::vector<primitive::at> inputs;
+      for (int k=0; k < input_tensors.size(); k++)
+        inputs.push_back(srcs[k].GetOpMem());
+
+      // If all inputs are in MKL format, then meaning of concat_dim needs to
+      // change. Value of concat_dim is tied to input Tensorflow data format
+      // (NHWC or NCHW). MklDnn dimensions are in NCHW order. So if Tensorflow
+      // tensors are in NCHW order, then concat_dim semantics is preserved.
+      // But ifinput tensors are in NHWC order, then semantics need to change.
+      // E.g., if we are concatinating over Channel (dimension 3 for NHWC),
+      // then since MklDnn order is NCHW, concat_dim needs to be 1.
+      if (are_all_mkl_inputs)
+        concat_dim = input_shapes[0].TfDimIdx(concat_dim);
+
+      auto concat_pd = concat::primitive_desc(dst_md, concat_dim, srcs_pd);
+
+      MklDnnShape dnn_shape_dst;
+      TensorShape tf_shape_dst;
+      Tensor* dst_tensor = nullptr;
+      if (are_all_mkl_inputs) {
+        dnn_shape_dst.SetMklTensor(true);
+        auto dst_pd = concat_pd.dst_primitive_desc();
+        dnn_shape_dst.SetMklLayout(&dst_pd);
+        dnn_shape_dst.SetElemType(MklDnnType<T>());
+        dnn_shape_dst.SetTfLayout(dst_dims.size(), dst_dims_in_nchw,
+                                  input_shapes[0].GetTfDataFormat());
+        tf_shape_dst.AddDim((dst_pd.get_size() / sizeof(T)));
+      } else {
+        dnn_shape_dst.SetMklTensor(false);
+        tf_shape_dst = MklDnnDimsToTFShape(dst_dims);
+      }
+      AllocateOutputSetMklShape(context, 0, &dst_tensor,
+                                tf_shape_dst, dnn_shape_dst);
+      CHECK_NOTNULL(dst_tensor);
+
+      dst_md = dnn_shape_dst.IsMklTensor() ?
+               dnn_shape_dst.GetMklLayout() : dst_md;
+      dst.SetUsrMem(dst_md, dst_tensor);
+
+      auto concat_op = concat(concat_pd, inputs, dst.GetOpMem());
+      std::vector<primitive> net;
+      net.push_back(concat_op);
+      stream(stream::kind::eager).submit(net).wait();
+    } catch (mkldnn::error &e) {
+        string error_msg = "Status: " + std::to_string(e.status) +
+               ", message: " + string(e.message) + ", in file " +
+               string(__FILE__) + ":" + std::to_string(__LINE__);
+        OP_REQUIRES_OK(context, errors::Aborted(
+                "Operation received an exception:", error_msg));
+    }
+  }
+
+  void CallEigenVersion(OpKernelContext* context, const OpInputList& values,
+                        const TensorShapeList& input_shapes) {
+    CHECK_EQ(values.size(), input_shapes.size());
+
+    std::vector<Tensor> converted_values;
+    for (int i = 0; i < input_shapes.size(); i++)
+      converted_values.push_back(values[i]);
+
+    // Call Eigen concat.
+    eigen_concat_op_.Compute(context, converted_values, input_shapes);
+
+    // Set output Mkl tensor for this op.
+    MklDnnShape dnn_shape_output;
+    dnn_shape_output.SetMklTensor(false);
+    dnn_shape_output.SetDimensions(4);
+    Tensor* output_tensor = nullptr;
+    TensorShape tf_shape_output;
+    tf_shape_output.AddDim(
+        dnn_shape_output.GetSerializeBufferSize());
+    context->allocate_output(
+        GetTensorMetaDataIndex(0, context->num_outputs()),
+        tf_shape_output, &output_tensor);
+    dnn_shape_output.SerializeMklDnnShape(
+        output_tensor->flat<uint8>().data(),
+        output_tensor->flat<uint8>().size() * sizeof(uint8));
+  }
+};
+
+#endif
+
 /* Use optimized concat for float type only */
 #define REGISTER_MKL_CPU(type)                                              \
   REGISTER_KERNEL_BUILDER(Name("_MklConcat")                                \
diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
index f291281108d36465ef670cb990714dbb8a0a5715..54d4916d4943be4957bb60b273cdbf2d6ce1ffdc 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
@@ -47,11 +47,8 @@ limitations under the License.
 
 using mkldnn::stream;
 using mkldnn::prop_kind;
-
-using mkldnn::convolution_forward;
 using mkldnn::convolution_backward_weights;
-using mkldnn::convolution_direct;
-
+using mkldnn::memory;
 #endif
 
 namespace tensorflow {
@@ -426,183 +423,236 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel {
   TensorFormat data_format_;
 };
 
+#define REGISTER_MKL_FILTER_KERNELS(T)                              \
+  REGISTER_KERNEL_BUILDER(Name("_MklConv2DBackpropFilter")          \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<T>("T")               \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+              MklConv2DCustomBackpropFilterOp<CPUDevice, T>);
+TF_CALL_float(REGISTER_MKL_FILTER_KERNELS);
+#undef REGISTER_MKL_FILTER_KERNELS
+
 #else
 
-template <typename Device, class T>
-class MklConv2DCustomBackpropFilterOp : public OpKernel {
+template <typename Device, class T, bool biasEnabled>
+class MklConv2DCustomBackpropFilterOp :
+  public MklConv2DBackpropCommonOp<Device, T> {
  public:
   explicit MklConv2DCustomBackpropFilterOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    string data_format;
-    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
-    OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
-                errors::InvalidArgument("Invalid data format"));
+      : MklConv2DBackpropCommonOp<Device, T>(context) { }
+  ~MklConv2DCustomBackpropFilterOp() {}
 
-    OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
-    int stride_n = GetTensorDim(strides_, data_format_, 'N');
-    int stride_c = GetTensorDim(strides_, data_format_, 'C');
-    OP_REQUIRES(
-        context, (stride_n == 1 && stride_c == 1),
-        errors::InvalidArgument("Current implementation does not yet support "
-                                "strides in the batch and depth dimensions."));
-    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+ private:
+  void ValidateMklShapes(const MklDnnShape& input_mkl_shape,
+                         const MklDnnShape& filter_mkl_shape,
+                         const MklDnnShape& obp_mkl_shape) {
+    CHECK(!filter_mkl_shape.IsMklTensor())
+      << "Conv2DBackpropFilter: filter should not be in MKL Layout";
   }
 
-  void Compute(OpKernelContext* context) override {
-    try {
-      auto cpu_engine = engine(engine::cpu, 0);
+  size_t GetInputTensorIndexWithSizes() { return 1; /* filter index */ }
 
-      MklDnnData<T> input(&cpu_engine);
-      MklDnnData<T> outbackprop(&cpu_engine);
-      MklDnnData<T> output(&cpu_engine);
+  TensorShape MakeInputTfShape(OpKernelContext* context,
+                               const Tensor& input_tensor) {
+    size_t input_idx = 0;
+    return GetTfShape(context, input_idx);
+  }
 
-      // Input tensors
-      const Tensor& input_tensor = MklGetInput(context, 0);
-      const Tensor& filter_tensor = MklGetInput(context, 1);
-      const Tensor& obp_tensor = MklGetInput(context, 2);  // Outbackprop
+  TensorShape MakeFilterTfShape(OpKernelContext* context,
+                                const Tensor& filter_tensor) {
+    TensorShape filter_tf_shape;
+    CHECK_EQ(TensorShapeUtils::IsVector(filter_tensor.shape()), true);
+    CHECK_EQ(TensorShapeUtils::MakeShape(
+             filter_tensor.vec<int32>(), &filter_tf_shape).ok(), true);
+    return filter_tf_shape;
+  }
 
-      // Generate input shapes.
-      TensorShape filter_shape;
-      OP_REQUIRES(context, TensorShapeUtils::IsVector(filter_tensor.shape()),
-        errors::InvalidArgument(
-              "Conv2DBackpropFilter: filter_sizes input must be 1-dim, not ",
-              filter_tensor.dims()));
-      OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
-                        filter_tensor.vec<int32>(), &filter_shape));
-      TensorShape input_shape = input_tensor.shape();
-      TensorShape obp_shape = obp_tensor.shape();
-
-      // By default, all dims are in MKL order. Only dims in TF order
-      // are those with prefix tf_order.
-      memory::dims obp_dims, fwd_input_dims, fwd_filter_dims;
-      memory::dims padding_l, padding_r, strides, fwd_output_dims;
-      memory::dims fwd_output_dims_tf_order;
-
-      // Get forward convolution parameters.
-      MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
-      conv_utl.GetConvFwdSizesInMklOrder(input_shape, filter_shape,
-                                         &fwd_input_dims, &fwd_filter_dims,
-                                         &strides,
-                                         &fwd_output_dims_tf_order,
-                                         &fwd_output_dims,
-                                         &padding_l, &padding_r);
-      if (!context->status().ok()) return;
-
-      // Create Convolution forward descriptor since Convolution backward
-      // API needs it. For that, we first need to create input, filter
-      // and output memory descriptors.
-      auto mkl_data_format = TFDataFormatToMklDnnDataFormat(data_format_);
-      auto fwd_src_md = memory::desc(fwd_input_dims, MklDnnType<T>(),
-                                     mkl_data_format);
-      auto fwd_filter_md = memory::desc(fwd_filter_dims, MklDnnType<T>(),
-                                        memory::format::hwio);
-      auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType<T>(),
-                                     mkl_data_format);
-      auto fwd_desc = convolution_forward::desc(prop_kind::forward,
-            convolution_direct, fwd_src_md, fwd_filter_md, fwd_out_md,
-            strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
-      auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine);
-
-      // Allocate output tensor and shape
-      // TODO(nhasabni): Update this when support for MKL layout is added.
-      // Shape of output of Conv2DBackpropInput is same as 'input' of Conv2D.
-      TensorShape tf_output_shape(filter_shape);
-      MklShape mkl_output_mkl_shape;
-      mkl_output_mkl_shape.SetMklTensor(false);
-      Tensor* output_tensor = nullptr;
-      AllocateOutputSetMklShape(context, 0, &output_tensor, tf_output_shape,
-                                mkl_output_mkl_shape);
-
-      // Create memory for user data.
-      // Describe how the inputs and outputs of Convolution look like. Also
-      // specify buffers containing actual input and output data.
-      // Although input shape required is in MKL-DNN order, the layout is
-      // Tensorflow's layout (NHWC or NCHW depending on data format).
-      input.SetUsrMem(fwd_input_dims, mkl_data_format, &input_tensor);
-      // Outbackprop shape is NHWC or NCHW depending on data format. Since
-      // GetInputSizeInMklOrder function returns size in that order we just use
-      // use that function directly.
-      conv_utl.GetInputSizeInMklOrder(obp_shape, &obp_dims);
-      if (!context->status().ok()) return;
-      outbackprop.SetUsrMem(obp_dims, mkl_data_format, &obp_tensor);
-      // Although output shape required is in MKL-DNN order,
-      // layout is Tensorflow's filter layout (HWIO)
-      // Shape of output of Conv2DBackpropInput is same as shape of filter.
-      memory::dims bwd_output_dims = fwd_filter_dims;
-      output.SetUsrMem(bwd_output_dims, memory::format::hwio, output_tensor);
-
-      // Create memory descriptors for convolution data w/ no specified format.
-      input.SetOpMemDesc(fwd_input_dims, memory::format::any);
-      outbackprop.SetOpMemDesc(obp_dims, memory::format::any);
-      output.SetOpMemDesc(bwd_output_dims, memory::format::any);
-
-      // Create convolution backward weights primitive.
-      auto bwd_desc = convolution_backward_weights::desc(convolution_direct,
-                          input.GetOpMemDesc(), output.GetOpMemDesc(),
-                          outbackprop.GetOpMemDesc(), strides, padding_l,
-                          padding_r, TFPaddingToMklDnnPadding(padding_));
-
-      auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc,
-                                                              cpu_engine,
-                                                              fwd_pd);
-
-      PrepareAndExecutePrimitive(bwd_pd, &input, &outbackprop, &output);
-    } catch (mkldnn::error &e) {
-     string error_msg = "Status: " + std::to_string(e.status) +
-                       ", message: " + string(e.message) +
-                       ", in file " + string(__FILE__) + ":" +
-                       std::to_string(__LINE__);
-     OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:",
-                                            error_msg));
+  TensorShape GetOutputTfShape(const TensorShape& input_shape,
+                               const TensorShape& filter_shape,
+                               const TensorShape& outbprop_shape) {
+    // Shape of output of Conv2DBackpropFilter is same as shape of filter.
+    return filter_shape;
+  }
+
+  const memory::dims& GetOutputDims(const memory::dims& fwd_input_dims,
+                                    const memory::dims& fwd_filter_dims) {
+    // Shape of output of Conv2DBackpropFilter is same as shape of filter.
+    return fwd_filter_dims;
+  }
+
+  memory::format GetOutputFormat(const memory::format data_format) {
+    // Output layout is Tensorflow's filter layout (HWIO).
+    return memory::format::hwio;
+  }
+
+  void CreatePrimitive(OpKernelContext* context,
+                       const engine& cpu_engine,
+                       const convolution_forward::primitive_desc& conv_fwd_pd,
+                       MklDnnData<T>* input, MklDnnData<T>* filter,
+                       MklDnnData<T>* outbackprop, MklDnnData<T>* output,
+                       Tensor** output_tensor,
+                       const memory::dims& strides,
+                       const memory::dims& padding_l,
+                       const memory::dims& padding_r,
+                       padding_kind padding,
+                       const memory::dims& bwd_output_dims,
+                       memory::format bwd_output_format) {
+    CHECK_NOTNULL(context);
+    CHECK_NOTNULL(input);
+    CHECK_NOTNULL(filter);
+    CHECK_NOTNULL(outbackprop);
+    CHECK_NOTNULL(output);
+    CHECK_NOTNULL(output_tensor);
+
+    MklDnnData<T>* bias_grad = nullptr;
+    int depth = 0;
+    if (biasEnabled) {
+      // Data structure for bias_grad
+      bias_grad = new MklDnnData<T> (&cpu_engine);
+      TensorShape obp_tf_shape = GetTfShape(context, 2);
+      depth = (MklConv2DBackpropCommonOp<Device, T>::GetTFDataFormat()
+                == FORMAT_NCHW) ?
+          obp_tf_shape.dim_size(1) : obp_tf_shape.dim_size(3);
+      memory::dims bias_grad_dims = {depth};
+      bias_grad->SetOpMemDesc(bias_grad_dims, memory::format::x);
+    }
+
+    // Create convolution backward weights primitive.
+    auto bwd_desc = (biasEnabled && (bias_grad != nullptr))?
+        convolution_backward_weights::desc(convolution_direct,
+                                input->GetOpMemDesc(), output->GetOpMemDesc(),
+                                bias_grad->GetOpMemDesc(),
+                                outbackprop->GetOpMemDesc(), strides, padding_l,
+                                padding_r, padding) :
+        convolution_backward_weights::desc(convolution_direct,
+                          input->GetOpMemDesc(), output->GetOpMemDesc(),
+                          outbackprop->GetOpMemDesc(), strides, padding_l,
+                          padding_r, padding);
+
+    auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc,
+                                                            cpu_engine,
+                                                            conv_fwd_pd);
+
+    // Allocate output tensor.
+    AllocateOutputTensor(context, bwd_pd, bwd_output_dims,
+                         bwd_output_format, output_tensor);
+
+    CHECK_NOTNULL(*output_tensor);
+    // Set buffer handle using allocated output tensor.
+    output->SetUsrMemDataHandle(*output_tensor);
+
+    if (biasEnabled && (bias_grad != nullptr)) {
+      // Allocate bias_grad tensor
+      TensorShape bias_grad_shape({depth});
+      Tensor* bias_grad_tensor = nullptr;
+      AllocateBiasGradTensor(context, bias_grad_shape, &bias_grad_tensor);
+      memory::dims bias_grad_dims = {depth};
+      // Since Bias is 1D, we use format::x from MKLDNN to represent it.
+      auto bias_grad_md = memory::desc({bias_grad_dims}, MklDnnType<T>(),
+                                       memory::format::x);
+      bias_grad->SetUsrMem(bias_grad_md, bias_grad_tensor);
+      bias_grad->SetUsrMemDataHandle(bias_grad_tensor);
+    }
+
+    if (biasEnabled && (bias_grad != nullptr)) {
+      PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output, bias_grad);
+    } else {
+      PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output);
     }
   }
 
- private:
-  std::vector<int32> strides_;
-  Padding padding_;
-  TensorFormat data_format_;
+  // Allocate output tensor.
+  void AllocateOutputTensor(OpKernelContext* context,
+                  const convolution_backward_weights::primitive_desc& conv_pd,
+                  const memory::dims& output_dims_mkl_order,
+                  memory::format output_tf_format, Tensor** output_tensor) {
+      CHECK_NOTNULL(output_tensor);
+
+      // For BackpropFilter, we convert the output tensor back in Tensorflow
+      // layout. Because typically, BackpropFilter is the last operator in the
+      // graph that emit filter gradient that is provided to ApplyGradient
+      // method to update the filter. But it may be possible to eliminate this
+      // by forwarding filter in MKL layout if we support ApplyGradient method
+      // for MKL layout propagation.
+      MklDnnShape output_mkl_shape;
+      output_mkl_shape.SetMklTensor(false);
+      // output_dims_mkl_order is in OIHW format.
+      // Allocate shape of TF tensor in HWIO format.
+      TensorShape output_tf_shape({output_dims_mkl_order[MklDnnDims::Dim_H],
+                                   output_dims_mkl_order[MklDnnDims::Dim_W],
+                                   output_dims_mkl_order[MklDnnDims::Dim_I],
+                                   output_dims_mkl_order[MklDnnDims::Dim_O]});
+      AllocateOutputSetMklShape(context, 0, output_tensor, output_tf_shape,
+                                output_mkl_shape);
+  }
+
+  // Allocate tensor for bias grad
+  void AllocateBiasGradTensor(OpKernelContext* context,
+                              const TensorShape& bias_grad_shape,
+                              Tensor** bias_grad_tensor) {
+    CHECK_NOTNULL(bias_grad_tensor);
+
+    MklDnnShape bias_grad_mkl_shape;
+    bias_grad_mkl_shape.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, 1, bias_grad_tensor, bias_grad_shape,
+                              bias_grad_mkl_shape);
+  }
 
   // Prepare and execute net - checks for input and output reorders.
   void PrepareAndExecutePrimitive(
                   const convolution_backward_weights::primitive_desc& conv_pd,
                   MklDnnData<T>* input, MklDnnData<T>* obp,
-                  MklDnnData<T>* output) {
+                  MklDnnData<T>* output, MklDnnData<T>* bias_grad = nullptr) {
     // Create reorders between user layout and MKL layout if it is needed and
     // add it to the net before convolution.
     std::vector<primitive> net;
     input->CheckReorderToOpMem(conv_pd.src_primitive_desc(), &net);
     obp->CheckReorderToOpMem(conv_pd.diff_dst_primitive_desc(), &net);
 
-    // Memory for output of convolution. Since we may need reorder on the
-    // output side, we will prepare reorder primitive in case output
-    // reorder to user memory is required.
+    // For BackpropFilter, we convert the output tensor back in Tensorflow
+    // layout.
     bool output_reorder_required = output->PrepareReorderToUserMemIfReq(
                                       conv_pd.diff_weights_primitive_desc());
 
-    net.push_back(convolution_backward_weights(conv_pd, input->GetOpMem(),
-                                    obp->GetOpMem(), output->GetOpMem()));
+    if (biasEnabled && (bias_grad != nullptr)) {
+      net.push_back(convolution_backward_weights(conv_pd, input->GetOpMem(),
+                                      obp->GetOpMem(), output->GetOpMem(),
+                                      bias_grad->GetOpMem()));
+    } else {
+      net.push_back(convolution_backward_weights(conv_pd, input->GetOpMem(),
+                                      obp->GetOpMem(), output->GetOpMem()));
+    }
 
-    // Insert reorder primitive in the net for output reorder if reorder is
-    // required.
     if (output_reorder_required) {
       output->InsertReorderToUserMem(&net);
     }
 
-    // Handle output reorder
     stream(stream::kind::eager).submit(net).wait();
   }
 };
-#endif
 
 #define REGISTER_MKL_FILTER_KERNELS(T)                              \
   REGISTER_KERNEL_BUILDER(Name("_MklConv2DBackpropFilter")          \
                               .Device(DEVICE_CPU)                   \
                               .TypeConstraint<T>("T")               \
                               .Label(mkl_op_registry::kMklOpLabel), \
-                          MklConv2DCustomBackpropFilterOp<CPUDevice, T>);
+              MklConv2DCustomBackpropFilterOp<CPUDevice, T, false>);\
+  REGISTER_KERNEL_BUILDER(Name("_MklConv2DBackpropFilterWithBias")  \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<T>("T")               \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+              MklConv2DCustomBackpropFilterOp<CPUDevice, T, true>); \
+  REGISTER_KERNEL_BUILDER(Name("__MklDummyConv2DBackpropFilterWithBias")  \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<T>("T")               \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+              MklDummyOp<CPUDevice, T>);
 
 TF_CALL_float(REGISTER_MKL_FILTER_KERNELS);
 #undef REGISTER_MKL_FILTER_KERNELS
+
+#endif  // INTEL_MKL_DNN
+
 }  // namespace tensorflow
 
 #endif  // INTEL_MKL
diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
index 4a47d0463ef778430d59fed32202bff02233a9e9..ef6db58d31f125487bd5beefb53710569b0584d8 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
@@ -49,9 +49,6 @@ limitations under the License.
 
 using mkldnn::stream;
 using mkldnn::prop_kind;
-
-using mkldnn::convolution_forward;
-using mkldnn::convolution_direct;
 using mkldnn::convolution_backward_data;
 #endif
 
@@ -362,143 +359,126 @@ class MklConv2DCustomBackpropInputOp : public OpKernel {
 #else
 
 template <typename Device, class T>
-class MklConv2DCustomBackpropInputOp : public OpKernel {
+class MklConv2DCustomBackpropInputOp :
+  public MklConv2DBackpropCommonOp<Device, T> {
  public:
-  ~MklConv2DCustomBackpropInputOp() {}
   explicit MklConv2DCustomBackpropInputOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    string data_format_str;
-    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format_str));
-    OP_REQUIRES(context, FormatFromString(data_format_str, &data_format_),
-                errors::InvalidArgument("Invalid data format"));
-    OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
-    int stride_n = GetTensorDim(strides_, data_format_, 'N');
-    int stride_c = GetTensorDim(strides_, data_format_, 'C');
-    OP_REQUIRES(
-        context, (stride_n == 1 && stride_c == 1),
-        errors::InvalidArgument("Current implementation does not yet support "
-                                "strides in the batch and depth dimensions."));
+      : MklConv2DBackpropCommonOp<Device, T>(context) { }
+  ~MklConv2DCustomBackpropInputOp() {}
 
-    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+ private:
+  const int kInputIndex_Filter = 1,
+            kInputIndex_InputSizes = 0,
+            kInputIndex_OutBackProp = 2;
+  void ValidateMklShapes(const MklDnnShape& input_mkl_shape,
+                         const MklDnnShape& filter_mkl_shape,
+                         const MklDnnShape& obp_mkl_shape) {
+    // Tensor that feeds to 'Input' slot of BackpropInput is always just a shape
+    // of the Tensor and never an actual tensor. So it will never be in MKL
+    // layout.
+    CHECK(!input_mkl_shape.IsMklTensor())
+      << "Conv2DBackpropInput: input should not be in MKL Layout";
   }
 
-  void Compute(OpKernelContext* context) override {
-    try {
-      auto cpu_engine = engine(engine::cpu, 0);
+  size_t GetInputTensorIndexWithSizes() { return kInputIndex_InputSizes; }
 
-      MklDnnData<T> filter(&cpu_engine);
-      MklDnnData<T> outbackprop(&cpu_engine);
-      MklDnnData<T> output(&cpu_engine);
+  TensorShape MakeInputTfShape(OpKernelContext* context,
+                               const Tensor& input_tensor) {
+    TensorShape input_tf_shape;
+    CHECK_EQ(TensorShapeUtils::IsVector(input_tensor.shape()), true);
+    CHECK_EQ(TensorShapeUtils::MakeShape(input_tensor.vec<int32>(),
+                                         &input_tf_shape).ok(), true);
+    return input_tf_shape;
+  }
 
-      // Input tensors
-      const Tensor& input_tensor = MklGetInput(context, 0);
-      const Tensor& filter_tensor = MklGetInput(context, 1);
-      const Tensor& obp_tensor = MklGetInput(context, 2);  // Outbackprop
+  TensorShape MakeFilterTfShape(OpKernelContext* context,
+                                const Tensor& filter_tensor) {
+    return GetTfShape(context, kInputIndex_Filter);
+  }
 
-      // Generate input shape.
-      TensorShape input_shape;
-      OP_REQUIRES(context, TensorShapeUtils::IsVector(input_tensor.shape()),
-        errors::InvalidArgument(
-              "Conv2DBackpropInput: input_sizes input must be 1-dim, not ",
-              input_tensor.dims()));
-      OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
-                        input_tensor.vec<int32>(), &input_shape));
-      TensorShape filter_shape = filter_tensor.shape();
-      TensorShape obp_shape = obp_tensor.shape();
-
-      // By default, all dims are in MKL order. Only dims in TF order
-      // are those with prefix tf_order.
-      memory::dims obp_dims, fwd_input_dims, fwd_filter_dims;
-      memory::dims padding_l, padding_r, strides, fwd_output_dims;
-      memory::dims fwd_output_dims_tf_order;
-
-      // Get forward convolution parameters.
-      MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
-      conv_utl.GetConvFwdSizesInMklOrder(input_shape, filter_shape,
-                                         &fwd_input_dims, &fwd_filter_dims,
-                                         &strides,
-                                         &fwd_output_dims_tf_order,
-                                         &fwd_output_dims,
-                                         &padding_l, &padding_r);
-      if (!context->status().ok()) return;
-
-      // Create Convolution forward descriptor since Convolution backward
-      // API needs it. For that, we first need to create input, filter
-      // and output memory descriptors.
-      auto mkl_data_format = TFDataFormatToMklDnnDataFormat(data_format_);
-      auto fwd_src_md = memory::desc(fwd_input_dims, MklDnnType<T>(),
-                                     mkl_data_format);
-      auto fwd_filter_md = memory::desc(fwd_filter_dims, MklDnnType<T>(),
-                                        memory::format::hwio);
-      auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType<T>(),
-                                     mkl_data_format);
-      auto fwd_desc = convolution_forward::desc(prop_kind::forward,
-            convolution_direct, fwd_src_md, fwd_filter_md, fwd_out_md,
-            strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
-      auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine);
-
-      // Allocate output tensor and shape
-      // TODO(nhasabni): Update this when support for MKL layout is added.
-      // Shape of output of Conv2DBackpropInput is same as 'input' of Conv2D.
-      TensorShape tf_output_shape(input_shape);
-      MklShape mkl_output_mkl_shape;
-      mkl_output_mkl_shape.SetMklTensor(false);
-      Tensor* output_tensor = nullptr;
-      AllocateOutputSetMklShape(context, 0, &output_tensor, tf_output_shape,
-                                mkl_output_mkl_shape);
-
-      // Create memory for user data.
-      // Describe how the inputs and outputs of Convolution look like. Also
-      // specify buffers containing actual input and output data.
-      // Although input shape required is in MKL-DNN order, the layout is
-      // Tensorflow's layout (NHWC or NCHW depending on data format).
-      // Although filter shape (filter_dims) required is in MKL-DNN order,
-      // the layout is Tensorflow's layout (HWIO).
-      // Shape of Conv2DBackpropInput's filter is same as that of Conv2D filter.
-      filter.SetUsrMem(fwd_filter_dims, memory::format::hwio, &filter_tensor);
-      // Outbackprop shape is NHWC or NCHW depending on data format. Since
-      // GetInputSizeInMklOrder function returns size in that order we just use
-      // use that function directly.
-      conv_utl.GetInputSizeInMklOrder(obp_shape, &obp_dims);
-      if (!context->status().ok()) return;
-      outbackprop.SetUsrMem(obp_dims, mkl_data_format, &obp_tensor);
-      // Although output shape required is in MKL-DNN order,
-      // layout is Tensorflow's layout (NHWC or NCHW depending on data format).
-      // Shape of output of Conv2DBackpropInput is same as shape of 'input'
-      // of Conv2D.
-      memory::dims bwd_output_dims = fwd_input_dims;
-      output.SetUsrMem(bwd_output_dims, mkl_data_format, output_tensor);
-
-      // Create memory descriptors for convolution data w/ no specified format.
-      filter.SetOpMemDesc(fwd_filter_dims, memory::format::any);
-      outbackprop.SetOpMemDesc(obp_dims, memory::format::any);
-      output.SetOpMemDesc(bwd_output_dims, memory::format::any);
-
-      // Create convolution backward data primitive.
-      auto bwd_desc = convolution_backward_data::desc(convolution_direct,
-                          output.GetOpMemDesc(), filter.GetOpMemDesc(),
-                          outbackprop.GetOpMemDesc(), strides, padding_l,
-                          padding_r, TFPaddingToMklDnnPadding(padding_));
-
-      auto bwd_pd = convolution_backward_data::primitive_desc(bwd_desc,
-                                                              cpu_engine,
-                                                              fwd_pd);
-
-      PrepareAndExecutePrimitive(bwd_pd, &filter, &outbackprop, &output);
-    } catch (mkldnn::error &e) {
-     string error_msg = "Status: " + std::to_string(e.status) +
-                       ", message: " + string(e.message) +
-                       ", in file " + string(__FILE__) + ":" +
-                       std::to_string(__LINE__);
-     OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:",
-                                            error_msg));
-    }
+  TensorShape GetOutputTfShape(const TensorShape& input_shape,
+                               const TensorShape& filter_shape,
+                               const TensorShape& outbprop_shape) {
+    // Output Shape of Conv2DBackpropInput is same as shape of Conv2D 'input'.
+    return input_shape;
   }
 
- private:
-  std::vector<int32> strides_;
-  Padding padding_;
-  TensorFormat data_format_;
+  const memory::dims& GetOutputDims(const memory::dims& fwd_input_dims,
+                                    const memory::dims& fwd_filter_dims) {
+    // Output Shape of Conv2DBackpropInput is same as shape of Conv2D 'input'.
+    return fwd_input_dims;
+  }
+
+  memory::format GetOutputFormat(const memory::format data_format) {
+    // Output layout is Tensorflow's layout in data format order.
+    return data_format;
+  }
+
+  void CreatePrimitive(OpKernelContext* context,
+                       const engine& cpu_engine,
+                       const convolution_forward::primitive_desc& conv_fwd_pd,
+                       MklDnnData<T>* input, MklDnnData<T>* filter,
+                       MklDnnData<T>* outbackprop, MklDnnData<T>* output,
+                       Tensor** output_tensor,
+                       const memory::dims& strides,
+                       const memory::dims& padding_l,
+                       const memory::dims& padding_r,
+                       padding_kind padding,
+                       const memory::dims& bwd_output_dims,
+                       memory::format bwd_output_format) {
+    CHECK_NOTNULL(context);
+    CHECK_NOTNULL(input);
+    CHECK_NOTNULL(filter);
+    CHECK_NOTNULL(outbackprop);
+    CHECK_NOTNULL(output);
+    CHECK_NOTNULL(output_tensor);
+
+    // Create convolution backward data primitive.
+    auto bwd_desc = convolution_backward_data::desc(convolution_direct,
+                      output->GetOpMemDesc(), filter->GetOpMemDesc(),
+                      outbackprop->GetOpMemDesc(), strides, padding_l,
+                      padding_r, padding);
+
+    auto bwd_pd = convolution_backward_data::primitive_desc(bwd_desc,
+                                                          cpu_engine,
+                                                          conv_fwd_pd);
+
+
+    // Allocate output tensor in TensorFlow and MKL layout.
+    AllocateOutputTensor(context, bwd_pd, bwd_output_dims,
+                         bwd_output_format, output_tensor);
+    CHECK_NOTNULL(*output_tensor);
+    // Set buffer handle using allocated output tensor.
+    output->SetUsrMemDataHandle(*output_tensor);
+
+    PrepareAndExecutePrimitive(bwd_pd, filter, outbackprop, output);
+  }
+
+  // Allocate output tensor.
+  void AllocateOutputTensor(OpKernelContext* context,
+                  const convolution_backward_data::primitive_desc& conv_pd,
+                  const memory::dims& output_dims_mkl_order,
+                  memory::format output_tf_format, Tensor** output_tensor) {
+      CHECK_NOTNULL(output_tensor);
+
+      // Output primitive descriptor for backward data is diff_src.
+      auto dst_pd = conv_pd.diff_src_primitive_desc();
+
+      // Allocate shape of Mkl tensor.
+      MklDnnShape output_mkl_shape;
+      output_mkl_shape.SetMklTensor(true);
+      output_mkl_shape.SetMklLayout(&dst_pd);
+      output_mkl_shape.SetElemType(MklDnnType<T>());
+      output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(),
+                                   output_dims_mkl_order, output_tf_format);
+
+      // Allocate shape of TF tensor.
+      TensorShape output_tf_shape;
+      output_tf_shape.AddDim(dst_pd.get_size() / sizeof(T));
+
+      AllocateOutputSetMklShape(context, 0, output_tensor, output_tf_shape,
+                                output_mkl_shape);
+  }
 
   // Prepare and execute net - checks for input and output reorders.
   void PrepareAndExecutePrimitive(
@@ -511,22 +491,9 @@ class MklConv2DCustomBackpropInputOp : public OpKernel {
     filter->CheckReorderToOpMem(conv_pd.weights_primitive_desc(), &net);
     obp->CheckReorderToOpMem(conv_pd.diff_dst_primitive_desc(), &net);
 
-    // Memory for output of convolution. Since we may need reorder on the
-    // output side, we will prepare reorder primitive in case output
-    // reorder to user memory is required.
-    bool output_reorder_required = output->PrepareReorderToUserMemIfReq(
-                                      conv_pd.diff_src_primitive_desc());
-
     net.push_back(convolution_backward_data(conv_pd, obp->GetOpMem(),
                                     filter->GetOpMem(), output->GetOpMem()));
 
-    // Insert reorder primitive in the net for output reorder if reorder is
-    // required.
-    if (output_reorder_required) {
-      output->InsertReorderToUserMem(&net);
-    }
-
-    // Handle output reorder
     stream(stream::kind::eager).submit(net).wait();
   }
 };
diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc
index a9872b8d6d3ea89da0a73017af19cabbc25f78ce..0e77b45993c17815889005c4d313c5489ae2f14b 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@@ -40,8 +40,7 @@ limitations under the License.
 #include "tensorflow/core/util/tensor_format.h"
 
 #include "tensorflow/core/util/mkl_util.h"
-#include "mkl_dnn.h"
-#include "mkl_dnn_types.h"
+
 
 #ifdef INTEL_MKL_DNN
 #include "mkldnn.hpp"
@@ -51,6 +50,9 @@ using mkldnn::prop_kind;
 
 using mkldnn::convolution_forward;
 using mkldnn::convolution_direct;
+#else
+#include "mkl_dnn.h"
+#include "mkl_dnn_types.h"
 #endif
 
 namespace tensorflow {
@@ -288,10 +290,8 @@ class MklConv2DOp : public OpKernel {
     mkl_filter_output_mkl_shape.SetMklLayout(mkl_context.prim_fwd,
                                              dnnResourceFilter);
 
-    size_t filter_sizes[4] = {static_cast<size_t>(filter.dim_size(0)),
-                              static_cast<size_t>(filter.dim_size(1)),
-                              static_cast<size_t>(filter.dim_size(2)),
-                              static_cast<size_t>(filter.dim_size(3))};
+    size_t filter_sizes[4] = {filter.dim_size(0), filter.dim_size(1),
+                              filter.dim_size(2), filter.dim_size(3)};
     mkl_filter_output_mkl_shape.SetTfLayout(filter.dims(), filter_sizes,
                                             mkl_context.filter_strides);
 
@@ -510,9 +510,15 @@ class MklConv2DOp : public OpKernel {
       auto cpu_engine = engine(engine::cpu, 0);
 
       // Input tensors
-      size_t src_idx = 0, filter_idx = 1;
-      const Tensor& src_tensor = MklGetInput(context, src_idx);
-      const Tensor& filter_tensor = MklGetInput(context, filter_idx);
+      const Tensor& src_tensor = MklGetInput(context, kInputIndex_Src);
+      const Tensor& filter_tensor = MklGetInput(context, kInputIndex_Filter);
+
+      MklDnnShape src_mkl_shape, filter_mkl_shape;
+      GetMklShape(context, kInputIndex_Src, &src_mkl_shape);
+      GetMklShape(context, kInputIndex_Filter, &filter_mkl_shape);
+      OP_REQUIRES(context, filter_mkl_shape.IsMklTensor() == false,
+            errors::InvalidArgument("Filter should not be in "
+            "Mkl Layout"));
 
       MklDnnData<T> src(&cpu_engine);
       MklDnnData<T> filter(&cpu_engine);
@@ -523,8 +529,9 @@ class MklConv2DOp : public OpKernel {
 
       // Get shapes of input tensors in MKL-DNN order
       MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
-      conv_utl.GetConvFwdSizesInMklOrder(src_tensor.shape(),
-                                         filter_tensor.shape(),
+      auto src_tf_shape = GetTfShape(context, kInputIndex_Src);
+      auto filter_tf_shape = GetTfShape(context, kInputIndex_Filter);
+      conv_utl.GetConvFwdSizesInMklOrder(src_tf_shape, filter_tf_shape,
                                          &src_dims, &filter_dims, &strides,
                                          &output_dims_tf_order,
                                          &output_dims_mkl_order, &padding_l,
@@ -532,58 +539,52 @@ class MklConv2DOp : public OpKernel {
       if (!context->status().ok()) return;
 
       // Check for corner case - if there is nothing to compute, return.
-      TensorShape tf_output_shape({output_dims_tf_order[0],
-                                output_dims_tf_order[1],
-                                output_dims_tf_order[2],
-                                output_dims_tf_order[3]});
-      Tensor* output_tensor = nullptr;
-      MklShape mkl_output_mkl_shape;
-      mkl_output_mkl_shape.SetMklTensor(false);
-      AllocateOutputSetMklShape(context, 0, &output_tensor, tf_output_shape,
-                                mkl_output_mkl_shape);
-
-      // Forward filter in TF format from input at index 1 to output at index 1.
-      ForwardTfTensorInToOut(context, 1, 1);
+      TensorShape output_tf_shape = MklDnnDimsToTFShape(output_dims_tf_order);
 
-      if (tf_output_shape.num_elements() == 0) {
+      // Corner cases: output with 0 elements and 0 batch size.
+      Tensor* output_tensor = nullptr;
+      if (output_tf_shape.num_elements() == 0 ||
+          output_dims_tf_order[0] == 0) {
         // TODO(jbobba): Verify correctness here
         //               Need semantics for Null MKL tensor
+        MklDnnShape output_mkl_shape;
+        output_mkl_shape.SetMklTensor(false);
+        AllocateOutputSetMklShape(context, kOutputIndex_Dst, &output_tensor,
+                                    src_tf_shape, output_mkl_shape);
+
+        // MklConv2D also outputs converted filter as 2nd output of Conv2D.
+        filter_mkl_shape.SetMklTensor(false);
+        Tensor* output_filter_tensor = nullptr;
+        AllocateOutputSetMklShape(context, kOutputIndex_Filter,
+                                  &output_filter_tensor,
+                                  filter_tf_shape, filter_mkl_shape);
         return;
       }
 
-      // Corner case to handle 0 batch size.
-      if (output_dims_tf_order[0] == 0) {
-        // Nothing to do, allocate output tensor and return
-        // TODO(nhasabni): remove this code later once serialization
-        // in MKL-DNN is supported.
-        AllocateOutputSetMklShape(context, 0, &output_tensor,
-                                  src_tensor.shape(), mkl_output_mkl_shape);
-        return;
-      } else {
-        // Otherwise regular output tensor allocation
-        // Allocate output tensor.
-      }
-      CHECK_NOTNULL(output_tensor);
-
       // Create memory for user data.
       // Describe how the inputs and outputs of Convolution look like. Also
       // specify buffers containing actual input and output data.
-      // Although input shape (src_dims) required is in MKL-DNN order,
-      // the layout is Tensorflow's layout (NHWC or NCHW depending on data
-      // format).
-      src.SetUsrMem(src_dims, TFDataFormatToMklDnnDataFormat(data_format_),
-                    const_cast<void*>(static_cast<const void*>(
-                    src_tensor.flat<T>().data())));
+      auto tf_fmt = TFDataFormatToMklDnnDataFormat(data_format_);
+      // If input is in MKL layout, then simply grab input layout; otherwise,
+      // construct input Tf layout. For TF layout, although input shape
+      // (src_dims) required is in MKL-DNN order, the layout is Tensorflow's
+      // layout (NHWC or NCHW depending on data format).
+      auto src_md = src_mkl_shape.IsMklTensor()
+                    ? src_mkl_shape.GetMklLayout()
+                    : memory::desc(src_dims, MklDnnType<T>(), tf_fmt);
+      src.SetUsrMem(src_md, &src_tensor);
       // Although filter shape (filter_dims) required is in MKL-DNN order,
       // the layout is Tensorflow's layout (HWIO).
-      filter.SetUsrMem(filter_dims, memory::format::hwio,
-                       const_cast<void*>(static_cast<const void*>(
-                       filter_tensor.flat<T>().data())));
-      // Although output shape (output_dims) required is in MKL-DNN order,
-      // layout is Tensorflow's layout (NHWC or NCHW depending on data format).
-      output.SetUsrMem(output_dims_mkl_order,
-                       TFDataFormatToMklDnnDataFormat(data_format_),
-                       output_tensor->flat<T>().data());
+      auto filter_md = filter_mkl_shape.IsMklTensor()  // Should NEVER be true
+                    ? filter_mkl_shape.GetMklLayout()
+          : memory::desc(filter_dims, MklDnnType<T>(), memory::format::hwio);
+      filter.SetUsrMem(filter_md, &filter_tensor);
+
+      // Set output shape (output_dims) required in MKL-DNN order.
+      // Currently, we set output layout as Tensorflow's layout (NHWC or NCHW
+      // depending on data format). But later we propagate Mkl layout of the
+      // output to the next op directly.
+      output.SetUsrMem(output_dims_mkl_order, tf_fmt);
 
       // Create memory descriptors for convolution data w/ no specified format.
       src.SetOpMemDesc(src_dims, memory::format::any);
@@ -594,11 +595,9 @@ class MklConv2DOp : public OpKernel {
       if (biasEnabled) {
         MklDnnData<T> bias(&cpu_engine);
         memory::dims bias_size;
-        conv_utl.GetBiasSizeInMklOrder(2 /* bias idx */, &bias_size);
-        const Tensor& bias_tensor = MklGetInput(context, 2);
-        bias.SetUsrMem(bias_size, memory::format::x,
-                       const_cast<void*>(static_cast<const void*>(
-                       bias_tensor.flat<T>().data())));
+        conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_size);
+        const Tensor& bias_tensor = MklGetInput(context, kInputIndex_Bias);
+        bias.SetUsrMem(bias_size, memory::format::x, &bias_tensor);
         bias.SetOpMemDesc(bias_size, memory::format::any);
 
         // Create convolution primitive with Bias.
@@ -609,7 +608,18 @@ class MklConv2DOp : public OpKernel {
 
         auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc,
                                                                 cpu_engine);
-        PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output);
+        AllocateOutputTensor(context, conv_prim_desc,
+                             output_dims_mkl_order, tf_fmt, &output_tensor);
+        // Set data handle for output.
+        output.SetUsrMemDataHandle(output_tensor);
+
+        Tensor* filter_out_tensor = nullptr;
+        AllocateFilterOutputTensor(context, conv_prim_desc,
+                        TFShapeToMklDnnDims(filter_tf_shape),
+                        &filter_out_tensor);
+
+        PrepareAndExecuteNet(conv_prim_desc, &src, &filter,
+                            &bias, &output, filter_out_tensor);
       } else {
         // Create convolution primitive without Bias.
         auto conv_desc = convolution_forward::desc(prop_kind::forward,
@@ -619,7 +629,17 @@ class MklConv2DOp : public OpKernel {
 
         auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc,
                                                                 cpu_engine);
-        PrepareAndExecuteNet(conv_prim_desc, &src, &filter, nullptr, &output);
+        AllocateOutputTensor(context, conv_prim_desc, output_dims_mkl_order,
+                             tf_fmt, &output_tensor);
+        // Set data handle for output.
+        output.SetUsrMemDataHandle(output_tensor);
+
+        Tensor* filter_out_tensor = nullptr;
+        AllocateFilterOutputTensor(context, conv_prim_desc,
+                TFShapeToMklDnnDims(filter_tf_shape),
+                &filter_out_tensor);
+        PrepareAndExecuteNet(conv_prim_desc, &src, &filter,
+                            nullptr, &output, filter_out_tensor);
       }
     } catch (mkldnn::error &e) {
       string error_msg = "Status: " + std::to_string(e.status) +
@@ -635,23 +655,83 @@ class MklConv2DOp : public OpKernel {
   std::vector<int32> strides_;
   Padding padding_;
   TensorFormat data_format_;
+  const int kInputIndex_Src = 0,
+            kInputIndex_Filter = 1,
+            kInputIndex_Bias = 2;
+  const int kOutputIndex_Dst = 0, kOutputIndex_Filter = 1;
+
+  // Allocate output tensor.
+  void AllocateOutputTensor(
+                  OpKernelContext* context,
+                  const convolution_forward::primitive_desc& conv_prim_desc,
+                  const memory::dims& output_dims_mkl_order,
+                  memory::format output_tf_format, Tensor** output_tensor) {
+      CHECK_NOTNULL(output_tensor);
+      auto dst_pd = conv_prim_desc.dst_primitive_desc();
+
+      // Allocate shape of Mkl tensor.
+      MklDnnShape output_mkl_shape;
+      output_mkl_shape.SetMklTensor(true);
+      output_mkl_shape.SetMklLayout(&dst_pd);
+      output_mkl_shape.SetElemType(MklDnnType<T>());
+      output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(),
+              output_dims_mkl_order, output_tf_format);
+
+      // Allocate shape of TF tensor.
+      TensorShape output_tf_shape;
+      output_tf_shape.AddDim((dst_pd.get_size() / sizeof(T)));
+
+      AllocateOutputSetMklShape(context, kOutputIndex_Dst, output_tensor,
+                                output_tf_shape, output_mkl_shape);
+  }
+
+  // Allocate output tensor.
+  void AllocateFilterOutputTensor(
+                  OpKernelContext* context,
+                  const convolution_forward::primitive_desc& conv_prim_desc,
+                  const memory::dims& filter_dims_tf_order,
+                  Tensor** filter_tensor) {
+      CHECK_NOTNULL(filter_tensor);
+      auto filter_pd = conv_prim_desc.weights_primitive_desc();
+
+      // Allocate shape of Mkl tensor.
+      MklDnnShape filter_mkl_shape;
+      filter_mkl_shape.SetMklTensor(true);
+      filter_mkl_shape.SetMklLayout(&filter_pd);
+      filter_mkl_shape.SetElemType(MklDnnType<T>());
+
+      // The format of the filter is actually OIhw8i8o, but TF doesn't support
+      // this format. Just use format::blocked for now because the layout
+      // is stored in the MKL data.
+      filter_mkl_shape.SetTfLayout(filter_dims_tf_order.size(),
+                  filter_dims_tf_order, memory::format::blocked);
+
+      // Allocate the data space for the filter to propagate as TF tensor.
+      TensorShape filter_tf_shape;
+      filter_tf_shape.AddDim((filter_pd.get_size() / sizeof(T)));
+
+      AllocateOutputSetMklShape(context, kOutputIndex_Filter, filter_tensor,
+              filter_tf_shape, filter_mkl_shape);
+  }
 
   // Prepare and execute net - checks for input and output reorders.
   void PrepareAndExecuteNet(
                   const convolution_forward::primitive_desc& conv_prim_desc,
                   MklDnnData<T>* src, MklDnnData<T>* filter,
-                  MklDnnData<T>* bias, MklDnnData<T>* output) {
+                  MklDnnData<T>* bias, MklDnnData<T>* output,
+                  Tensor* filter_out_tensor) {
+    CHECK_NOTNULL(filter_out_tensor);
+
     // Create reorders between user layout and MKL layout if it is needed and
-    // add it to the net before convolution.
+    // add it to the net before convolution. No need to check for output
+    // reorder as we propagate output layout to the next layer.
     std::vector<primitive> net;
     src->CheckReorderToOpMem(conv_prim_desc.src_primitive_desc(), &net);
-    filter->CheckReorderToOpMem(conv_prim_desc.weights_primitive_desc(), &net);
 
-    // Memory for output of convolution. Since we may need reorder on the
-    // output side, we will prepare reorder primitive in case output
-    // reorder to user memory is required.
-    bool output_reorder_required = output->PrepareReorderToUserMemIfReq(
-                                      conv_prim_desc.dst_primitive_desc());
+    // rather than re-order to a temp buffer, reorder directly to the
+    // filter output tensor
+    filter->CheckReorderToOpMem(conv_prim_desc.weights_primitive_desc(),
+                    filter->GetTensorBuffer(filter_out_tensor), &net);
 
     // Create convolution primitive and add it to net.
     if (bias) {
@@ -665,13 +745,6 @@ class MklConv2DOp : public OpKernel {
                                     filter->GetOpMem(), output->GetOpMem()));
     }
 
-    // Insert reorder primitive in the net for output reorder if reorder is
-    // required.
-    if (output_reorder_required) {
-      output->InsertReorderToUserMem(&net);
-    }
-
-    // Handle output reorder
     stream(stream::kind::eager).submit(net).wait();
   }
 };
@@ -688,7 +761,12 @@ class MklConv2DOp : public OpKernel {
                               .Device(DEVICE_CPU)                   \
                               .TypeConstraint<T>("T")               \
                               .Label(mkl_op_registry::kMklOpLabel), \
-                          MklConv2DOp<CPUDevice, T, true>);
+                          MklConv2DOp<CPUDevice, T, true>);         \
+  REGISTER_KERNEL_BUILDER(Name("__MklDummyConv2DWithBias")          \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<T>("T")               \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklDummyOp<CPUDevice, T>);
 
 TF_CALL_float(REGISTER_MKL_CPU);
 
diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h
index f0cb37f8a42c19cad183af2e0de7db2931cf299a..c6456bd5c330d8a5672a99dc7f649f3bab4d3519 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.h
+++ b/tensorflow/core/kernels/mkl_conv_ops.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <vector>
 #include <limits>
+#include <string>
 
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -41,6 +42,12 @@ limitations under the License.
 
 #ifdef INTEL_MKL_DNN
 #include "mkldnn.hpp"
+
+using mkldnn::stream;
+using mkldnn::prop_kind;
+
+using mkldnn::convolution_forward;
+using mkldnn::convolution_direct;
 #endif
 
 namespace tensorflow {
@@ -108,7 +115,13 @@ class MklDnnConvUtil {
   #undef CHECK_BOUNDS
 
     // MKL-DNN always requires input in NCHW format.
-    *input_dims = {input_batch, input_depth, input_rows, input_cols};
+    std::vector<int> mkldnn_sizes(4, -1);
+    mkldnn_sizes[MklDnnDims::Dim_N] = input_batch;
+    mkldnn_sizes[MklDnnDims::Dim_C] = input_depth;
+    mkldnn_sizes[MklDnnDims::Dim_H] = input_rows;
+    mkldnn_sizes[MklDnnDims::Dim_W] = input_cols;
+
+    *input_dims = mkldnn_sizes;
   }
 
   // Calculate Convolution filter size in MKL-DNN order. MKL-DNN
@@ -156,7 +169,13 @@ class MklDnnConvUtil {
 
     // MKL-DNN always needs filter in OIHW format.
     // OIHW = (out_depth, in_depth, rows, cols)
-    *filter_dims = {out_depth, in_depth, filter_rows, filter_cols};
+    std::vector<int> mkldnn_sizes(4, -1);
+    mkldnn_sizes[MklDnnDims::Dim_O] = out_depth;
+    mkldnn_sizes[MklDnnDims::Dim_I] = in_depth;
+    mkldnn_sizes[MklDnnDims::Dim_H] = filter_rows;
+    mkldnn_sizes[MklDnnDims::Dim_W] = filter_cols;
+
+    *filter_dims = mkldnn_sizes;
   }
 
   // Calculate Convolution filter size in MKL-DNN order. MKL-DNN
@@ -167,9 +186,9 @@ class MklDnnConvUtil {
   GetFilterSizeInMklOrder(size_t src_index, size_t filter_index,
                           memory::dims *filter_dims) {
     CHECK_NOTNULL(filter_dims);
-    const Tensor& input = MklGetInput(context_, src_index);
-    const Tensor& filter = MklGetInput(context_, filter_index);
-    GetFilterSizeInMklOrder(input.shape(), filter.shape(), filter_dims);
+    GetFilterSizeInMklOrder(GetTfShape(context_, src_index),
+                            GetTfShape(context_, filter_index),
+                            filter_dims);
   }
 
   // Calculate Bias size for 2D Convolution. Function does not return
@@ -238,8 +257,12 @@ class MklDnnConvUtil {
     *output_dims_tf_order = TFShapeToMklDnnDims(out_shape);
 
     // MKL-DNN always needs output in NCHW format.
-    *output_dims_mkl_order = {out_batch, out_depth, static_cast<int>(out_rows),
-                   static_cast<int>(out_cols)};
+    std::vector<int> mkldnn_sizes(4, -1);
+    mkldnn_sizes[MklDnnDims::Dim_N] = out_batch;
+    mkldnn_sizes[MklDnnDims::Dim_C] = out_depth;
+    mkldnn_sizes[MklDnnDims::Dim_H] = static_cast<int>(out_rows);
+    mkldnn_sizes[MklDnnDims::Dim_W] = static_cast<int>(out_cols);
+    *output_dims_mkl_order = mkldnn_sizes;
 
     // Now handle padding. MKL-DNN uses asymetric padding.
     *pad_l = {static_cast<int>(pad_top), static_cast<int>(pad_left)};
@@ -261,14 +284,14 @@ class MklDnnConvUtil {
     CHECK_NOTNULL(pad_l);
     CHECK_NOTNULL(pad_r);
 
-    const Tensor& input = MklGetInput(context_, src_index);
-    const Tensor& filter = MklGetInput(context_, filter_index);
+    auto input_tf_shape = GetTfShape(context_, src_index);
+    auto filter_tf_shape = GetTfShape(context_, filter_index);
 
-    OP_REQUIRES(context_, input.dims() == 4,
+    OP_REQUIRES(context_, input_tf_shape.dims() == 4,
                 errors::InvalidArgument("input must be 4-dimensional",
-                                          input.shape().DebugString()));
+                                          input_tf_shape.DebugString()));
 
-    GetOutputAndPadSizeInMklOrder(input.shape(), filter.shape(),
+    GetOutputAndPadSizeInMklOrder(input_tf_shape, filter_tf_shape,
                                   strides, output_dims_tf_order,
                                   output_dims_mkl_order, pad_l, pad_r);
   }
@@ -309,8 +332,258 @@ class MklDnnConvUtil {
   }
 };
 
+/////////////////////////////////////////////////////////////////////
+///  Common class that implements Conv2DBackpropFilter and Input
+/////////////////////////////////////////////////////////////////////
+
+template <typename Device, class T>
+class MklConv2DBackpropCommonOp :  public OpKernel {
+ public:
+  ~MklConv2DBackpropCommonOp() {}
+  explicit MklConv2DBackpropCommonOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    string data_format_str;
+    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format_str));
+    OP_REQUIRES(context, FormatFromString(data_format_str, &data_format_),
+                errors::InvalidArgument("Invalid data format"));
+    OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
+    int stride_n = GetTensorDim(strides_, data_format_, 'N');
+    int stride_c = GetTensorDim(strides_, data_format_, 'C');
+    OP_REQUIRES(
+        context, (stride_n == 1 && stride_c == 1),
+        errors::InvalidArgument("Current implementation does not yet support "
+                                "strides in the batch and depth dimensions."));
+
+    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+
+      // Prepare common tensors for Conv2DBackpropInput and
+      // Conv2DBackpropFilter.
+      MklDnnData<T> input(&cpu_engine);
+      MklDnnData<T> filter(&cpu_engine);
+      MklDnnData<T> outbackprop(&cpu_engine);
+      MklDnnData<T> output(&cpu_engine);
+
+      // Input tensors
+      const int kInputIdx = 0, kFilterIdx = 1, kOutbpropIdx = 2;
+      const Tensor& input_tensor = MklGetInput(context, kInputIdx);
+      const Tensor& filter_tensor = MklGetInput(context, kFilterIdx);
+      const Tensor& outbprop_tensor = MklGetInput(context, kOutbpropIdx);
+
+      MklDnnShape input_mkl_shape, filter_mkl_shape, outbprop_mkl_shape;
+      GetMklShape(context, kInputIdx, &input_mkl_shape);
+      GetMklShape(context, kFilterIdx, &filter_mkl_shape);
+      GetMklShape(context, kOutbpropIdx, &outbprop_mkl_shape);
+      // Allow operator-specific sanity checking of shapes.
+      ValidateMklShapes(input_mkl_shape, filter_mkl_shape, outbprop_mkl_shape);
+
+      // Allow operator-specific generation of shapes.
+      // E.g., Conv2DBackpropFilter gets filter as filter_sizes. It is a
+      // tensor containing shape of filter. So filter.shape() is not
+      // a correct way to get filter shape. These operator-specific calls
+      // allow this class to handle this case.
+      TensorShape input_tf_shape = MakeInputTfShape(context, input_tensor);
+      TensorShape filter_tf_shape = MakeFilterTfShape(context, filter_tensor);
+      TensorShape outbprop_tf_shape = GetTfShape(context, kOutbpropIdx);
+
+      // Corner cases: output with 0 elements and 0 batch size.
+      Tensor* output_tensor = nullptr;
+      if (input_tf_shape.num_elements() == 0 ||
+          filter_tf_shape.num_elements() == 0 ||
+          outbprop_tf_shape.num_elements() == 0) {
+        MklDnnShape output_mkl_shape;
+        output_mkl_shape.SetMklTensor(false);
+        TensorShape output_tf_shape = GetOutputTfShape(input_tf_shape,
+                                                       filter_tf_shape,
+                                                       outbprop_tf_shape);
+        const int kOutputIdx = 0;
+        AllocateOutputSetMklShape(context, kOutputIdx, &output_tensor,
+                                    output_tf_shape, output_mkl_shape);
+        CHECK_NOTNULL(output_tensor);
+
+        // if output tensor has more than 0 elements, we need to 0 them out.
+        for (size_t i = 0; i < output_tf_shape.num_elements(); ++i) {
+          output_tensor->flat<T>().data()[i] = 0;
+        }
+
+        return;
+      }
+
+      // By default, all dims are in MKL order. Only dims in TF order
+      // are those with prefix tf_order.
+      memory::dims outbprop_dims, fwd_input_dims, fwd_filter_dims;
+      memory::dims padding_l, padding_r, strides, fwd_output_dims;
+      memory::dims fwd_output_dims_tf_order;
+
+      // Get forward convolution parameters.
+      MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
+      conv_utl.GetConvFwdSizesInMklOrder(input_tf_shape, filter_tf_shape,
+                                         &fwd_input_dims, &fwd_filter_dims,
+                                         &strides,
+                                         &fwd_output_dims_tf_order,
+                                         &fwd_output_dims,
+                                         &padding_l, &padding_r);
+      if (!context->status().ok()) return;
+
+      // Create Convolution forward descriptor since Convolution backward
+      // API needs it. For that, we first need to create input, filter
+      // and output memory descriptors.
+      auto tf_fmt = TFDataFormatToMklDnnDataFormat(data_format_);
+      // If input is in MKL layout, then simply grab input layout; otherwise,
+      // construct input TF layout. For TF layout, although input shape
+      // required is in MKL-DNN order, the layout is Tensorflow's layout
+      // (NHWC or NCHW depending on data format).
+      auto fwd_input_md = input_mkl_shape.IsMklTensor() ?
+                          input_mkl_shape.GetMklLayout() :
+                       memory::desc(fwd_input_dims, MklDnnType<T>(), tf_fmt);
+      // If filter is in MKL layout, then simply grab filter layout; otherwise
+      // construct filter in TF layout. For TF layout, filter is in HWIO format.
+      auto fwd_filter_md = filter_mkl_shape.IsMklTensor() ?
+                          filter_mkl_shape.GetMklLayout() :
+                          memory::desc(fwd_filter_dims, MklDnnType<T>(),
+                                       memory::format::hwio);
+      // Tensorflow Output of Conv2D is in data_format order.
+      auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType<T>(), tf_fmt);
+      auto fwd_desc = convolution_forward::desc(prop_kind::forward,
+            convolution_direct, fwd_input_md, fwd_filter_md, fwd_out_md,
+            strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_));
+      auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine);
+
+      // Create memory for user data. Describe how the inputs and outputs of
+      // Convolution look like. Also specify buffers containing actual input
+      // and output data.
+
+      // Since this is a common class for both Conv2DBackpropFilter and
+      // Conv2DBackpropInput, we skip SetUsrMem call for input tensor (for
+      // Conv2DBackpropInput) and for filter tensor (for
+      // conv2DBackpropFilter) depending on which tensor is int32 type.
+      size_t input_with_sizes = GetInputTensorIndexWithSizes();
+      if (input_with_sizes != kInputIdx) {
+        // Shape of Conv2DBackpropFilter's input is same as Conv2D input.
+        input.SetUsrMem(fwd_input_md, &input_tensor);
+      } else if (input_with_sizes != kFilterIdx) {
+        // Shape of Conv2DBackpropInput's filter is same as Conv2D filter.
+        filter.SetUsrMem(fwd_filter_md, &filter_tensor);
+      }
+
+      conv_utl.GetInputSizeInMklOrder(outbprop_tf_shape, &outbprop_dims);
+      if (!context->status().ok()) return;
+      if (outbprop_mkl_shape.IsMklTensor()) {
+        // If outbackprop is in Mkl layout, then simply grab it.
+        auto outbprop_md = outbprop_mkl_shape.GetMklLayout();
+        outbackprop.SetUsrMem(outbprop_md, &outbprop_tensor);
+      } else {
+        // If outbackprop is in TensorFlow layout, then we need to create memory
+        // descriptor for it. Outbackprop shape is data format order.
+        outbackprop.SetUsrMem(outbprop_dims, tf_fmt, &outbprop_tensor);
+      }
+
+      // Operator specific call to get output shape and data_format.
+      auto bwd_output_dims = GetOutputDims(fwd_input_dims, fwd_filter_dims);
+      auto bwd_output_format = GetOutputFormat(tf_fmt);
+      output.SetUsrMem(bwd_output_dims, bwd_output_format);
+
+      // Create memory descriptors for convolution data w/ no specified format.
+      input.SetOpMemDesc(fwd_input_dims, memory::format::any);
+      filter.SetOpMemDesc(fwd_filter_dims, memory::format::any);
+      outbackprop.SetOpMemDesc(outbprop_dims, memory::format::any);
+      output.SetOpMemDesc(bwd_output_dims, memory::format::any);
+
+      // Operator-specific call to create and execute primitive.
+      CreatePrimitive(context, cpu_engine, fwd_pd, &input, &filter,
+                      &outbackprop, &output, &output_tensor,
+                      strides, padding_l, padding_r,
+                      TFPaddingToMklDnnPadding(padding_),
+                      bwd_output_dims, bwd_output_format);
+    } catch (mkldnn::error &e) {
+     string error_msg = "Status: " + std::to_string(e.status) +
+                       ", message: " + string(e.message) +
+                       ", in file " + string(__FILE__) + ":" +
+                       std::to_string(__LINE__);
+     OP_REQUIRES_OK(context, errors::Aborted("Operation received an exception:",
+                                            error_msg));
+    }
+  }
+
+  /// Pure virtual function to allow operator to check for validity of input
+  /// shapes. Function asserts that input shapes are valid.
+  virtual void ValidateMklShapes(const MklDnnShape& input_mkl_shape,
+                                 const MklDnnShape& filter_mkl_shape,
+                                 const MklDnnShape& outbprop_mkl_shape) = 0;
+
+  /// Operator-specific function that returns index of input that is
+  /// representing input sizes. For Conv2DBackpropFilter it returns 1 since
+  /// filter for this operator is filter shape. For Conv2DBackpropInput it
+  /// returns 0 (for input).
+  virtual size_t GetInputTensorIndexWithSizes() = 0;
+
+  /// Get TensorFlow shape of input tensor.
+  virtual TensorShape MakeInputTfShape(OpKernelContext* context,
+                                      const Tensor& input_tensor) = 0;
+
+  /// Get TensorFlow shape of filter tensor.
+  virtual TensorShape MakeFilterTfShape(OpKernelContext* context,
+                                       const Tensor& filter_tensor) = 0;
+
+  /// Get the TensorFlow shape of output tensor.
+  virtual TensorShape GetOutputTfShape(const TensorShape& input_shape,
+                                       const TensorShape& filter_shape,
+                                       const TensorShape& outbprop_shape) = 0;
+
+  /// Get shape of output in MKL-DNN order. Computes shape of output from
+  /// input shape (fwd_input_dims) and filter shape (fwd_filter_dims).
+  virtual
+  const memory::dims& GetOutputDims(const memory::dims& fwd_input_dims,
+                                    const memory::dims& fwd_filter_dims) = 0;
+
+  /// Get data_format of output in MKL-DNN order. If output data format is
+  /// same as input data format, then it simply returns value of data_format
+  /// parameter as it is.
+  virtual memory::format GetOutputFormat(const memory::format data_format) = 0;
+
+  /// Create and execute the primitive storing output in the output_tensor.
+  virtual void CreatePrimitive(OpKernelContext* context,
+    const engine& cpu_engine,
+    const convolution_forward::primitive_desc& conv_fwd_pd,
+    MklDnnData<T>* input, MklDnnData<T>* filter, MklDnnData<T>* outbackprop,
+    MklDnnData<T>* output, Tensor** output_tensor, const memory::dims& strides,
+    const memory::dims& padding_l, const memory::dims& padding_r,
+    padding_kind padding, const memory::dims& bwd_output_dims,
+    memory::format bwd_output_format) = 0;
+
+  // Get the data_format {NCHW, NHWC}
+  TensorFormat GetTFDataFormat () { return data_format_; }
+
+ private:
+  std::vector<int32> strides_;
+  Padding padding_;
+  TensorFormat data_format_;
+};
 #endif  // INTEL_MKL_DNN
 
+/////////////////////////////////////////////////////////////////////
+///  Dummy Mkl op that is just used for operators that are intermediate
+///  output of node fusion in the graph
+/////////////////////////////////////////////////////////////////////
+
+template <typename Device, typename T>
+class MklDummyOp : public OpKernel {
+ public:
+  ~MklDummyOp() {}
+
+  explicit MklDummyOp(OpKernelConstruction* context) :
+    OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    TF_CHECK_OK(errors::Unimplemented("This is a dummy op."
+                                      "It should not have been invoked."));
+  }
+};
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_KERNELS_MKL_CONV_OPS_H_
diff --git a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc
index bc9e906c39a9a7f5f4b2ae83afc6774aecb38c48..8340a91d059de16dfbabf53067f24fbca1bc1385 100644
--- a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc
+++ b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc
@@ -25,10 +25,24 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+
+using mkldnn::stream;
+using mkldnn::prop_kind;
+using mkldnn::use_scale_shift;
+using mkldnn::use_global_stats;
+using mkldnn::batch_normalization_forward;
+using mkldnn::batch_normalization_backward;
+#endif
+
 // TODO(inteltf) Address comments from PR 8968.
 
 namespace tensorflow {
 using CPUDevice = Eigen::ThreadPoolDevice;
+
+#ifndef INTEL_MKL_DNN
+
 template <typename Device, typename T>
 class MklFusedBatchNormOp : public OpKernel {
  public:
@@ -46,7 +60,6 @@ class MklFusedBatchNormOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     MklFusedBatchNormOpContext mkl_context;
-
     const Tensor& input = MklGetInput(context, 0);
     const Tensor& scale = MklGetInput(context, 1);
     const Tensor& shift = MklGetInput(context, 2);
@@ -55,6 +68,7 @@ class MklFusedBatchNormOp : public OpKernel {
 
     GetMklShape(context, 0, &(mkl_context.mkl_shape_input_shape));
     bool input_in_mkl_format = mkl_context.mkl_shape_input_shape.IsMklTensor();
+
     if (!input_in_mkl_format) {
       OP_REQUIRES(context, input.dims() == 4,
                   errors::InvalidArgument("input must be 4-dimensional",
@@ -69,10 +83,12 @@ class MklFusedBatchNormOp : public OpKernel {
     OP_REQUIRES(context, est_mean.dims() == 1,
                 errors::InvalidArgument("estimated_mean must be 1-dimensional",
                                         est_mean.shape().DebugString()));
+
     OP_REQUIRES(
         context, est_variance.dims() == 1,
         errors::InvalidArgument("estimated_variance must be 1-dimensional",
                                 est_variance.shape().DebugString()));
+
     if (is_training_) {
       OP_REQUIRES(context, est_mean.dim_size(0) == 0,
                   errors::InvalidArgument("estimated_mean empty for training",
@@ -258,7 +274,6 @@ class MklFusedBatchNormOp : public OpKernel {
             E_SUCCESS);
       }
     }
-
     void MklPrepareContextInputs(OpKernelContext* context,
                                  Tensor* mkl_tmp_input_buf_tensor,
                                  Tensor* mkl_tmp_scale_shift_buf_tensor) {
@@ -325,15 +340,6 @@ class MklFusedBatchNormOp : public OpKernel {
   } MklFusedBatchNormOpContext;
 };
 
-#define REGISTER_MKL_CPU(T)                                         \
-  REGISTER_KERNEL_BUILDER(Name("_MklFusedBatchNorm")                \
-                              .Device(DEVICE_CPU)                   \
-                              .TypeConstraint<T>("T")               \
-                              .Label(mkl_op_registry::kMklOpLabel), \
-                          MklFusedBatchNormOp<CPUDevice, T>);
-TF_CALL_float(REGISTER_MKL_CPU);
-#undef REGISTER_MKL_CPU
-
 template <typename Device, typename T>
 class MklFusedBatchNormGradOp : public OpKernel {
  public:
@@ -595,7 +601,7 @@ class MklFusedBatchNormGradOp : public OpKernel {
       mkl_res_batchnorm_bwd[dnnResourceSrc] =
           (mkl_convert_input) ? mkl_buf_converted_input : mkl_buf_input;
 
-      bool mkl_convert_out_backprop;
+     bool mkl_convert_out_backprop;
       dnnPrimitive_t mkl_prim_convert_out_backprop = nullptr;
       dnnLayout_t mkl_lt_internal_out_backprop = nullptr;
       void* mkl_buf_converted_out_backprop = nullptr;
@@ -675,6 +681,758 @@ class MklFusedBatchNormGradOp : public OpKernel {
     }
   } MklFusedBatchNormGradOpContext;
 };
+#endif
+
+#ifdef INTEL_MKL_DNN
+
+template <typename Device, typename T>
+class MklFusedBatchNormOp : public OpKernel {
+ public:
+  explicit MklFusedBatchNormOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    float epsilon;
+    OP_REQUIRES_OK(context, context->GetAttr("epsilon", &epsilon));
+    epsilon_ = T(epsilon);
+    string tensor_format;
+    OP_REQUIRES_OK(context, context->GetAttr("data_format", &tensor_format));
+    OP_REQUIRES(context, FormatFromString(tensor_format, &tensor_format_),
+                errors::InvalidArgument("Invalid data format"));
+    OP_REQUIRES_OK(context, context->GetAttr("is_training", &is_training_));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      const size_t kSrcIndex = 0;       // index of src input tensor
+      const size_t kScaleIndex = 1;     // index of scale tensor
+      const size_t kShiftIndex = 2;     // index of shift tensor
+      const size_t kMeanIndex = 3;      // index of est_mean tensor
+      const size_t kVarianceIndex = 4;  // index of est_variance tensor
+
+      const Tensor& src_tensor          = MklGetInput(context, kSrcIndex);
+      const Tensor& scale_tensor        = MklGetInput(context, kScaleIndex);
+      const Tensor& shift_tensor        = MklGetInput(context, kShiftIndex);
+      const Tensor& est_mean_tensor     = MklGetInput(context, kMeanIndex);
+      const Tensor& est_variance_tensor = MklGetInput(context,
+                                                      kVarianceIndex);
+
+      TensorShape tf_shape_src;
+      MklDnnShape dnn_shape_src;
+      GetMklShape(context, kSrcIndex, &dnn_shape_src);
+
+      if (dnn_shape_src.IsMklTensor()) {
+        tf_shape_src = dnn_shape_src.GetTfShape();
+        OP_REQUIRES(context, dnn_shape_src.GetDimension() == 4,
+                    errors::InvalidArgument(
+                        "input must be 4-dimensional",
+                        src_tensor.shape().DebugString()));
+      } else {
+        tf_shape_src = src_tensor.shape();
+        OP_REQUIRES(context, src_tensor.dims() == 4,
+                    errors::InvalidArgument(
+                        "input must be 4-dimensional",
+                        src_tensor.shape().DebugString()));
+      }
+      OP_REQUIRES(context, scale_tensor.dims() == 1,
+                  errors::InvalidArgument(
+                      "scale must be 1-dimensional",
+                      scale_tensor.shape().DebugString()));
+      OP_REQUIRES(context, shift_tensor.dims() == 1,
+                  errors::InvalidArgument("offset must be 1-dimensional",
+                                        shift_tensor.shape().DebugString()));
+      OP_REQUIRES(context, est_mean_tensor.dims() == 1,
+                  errors::InvalidArgument(
+                      "estimated_mean must be 1-dimensional",
+                      est_mean_tensor.shape().DebugString()));
+      OP_REQUIRES(context, est_variance_tensor.dims() == 1,
+                  errors::InvalidArgument(
+                      "estimated_variance must be 1-dimensional",
+                      est_variance_tensor.shape().DebugString()));
+
+      if (is_training_) {
+        OP_REQUIRES(context, est_mean_tensor.dim_size(0) == 0,
+                    errors::InvalidArgument(
+                        "estimated_mean must be empty for training",
+                        est_mean_tensor.shape().DebugString()));
+        OP_REQUIRES(context, est_variance_tensor.dim_size(0) == 0,
+                    errors::InvalidArgument(
+                        "estimated_variance must be empty for training",
+                        est_variance_tensor.shape().DebugString()));
+      }
+
+      // special case: input with 0 element and 0 batch size
+      Tensor* dst_tensor = nullptr;
+      if (tf_shape_src.num_elements() == 0) {
+         HandleEmptyInput(context,
+                          tf_shape_src,
+                          scale_tensor.shape(),
+                          &dst_tensor);
+         return;
+      }
+
+      if (dnn_shape_src.IsMklTensor())
+        depth_ = dnn_shape_src.DimSize(MklDnnDims::Dim_C);
+      else
+        ExtractParams(context);
+
+      // Indices of output tensors
+      const size_t kDstIndex = 0;
+
+      // allocate 4 output TF tensors
+      Tensor* batch_mean_tensor = nullptr;
+      Tensor* batch_variance_tensor = nullptr;
+      Tensor* saved_mean_tensor = nullptr;
+      Tensor* saved_variance_tensor = nullptr;
+      AllocateTFOutputs(context,
+                        scale_tensor.shape(),
+                        &batch_mean_tensor,
+                        &batch_variance_tensor,
+                        &saved_mean_tensor,
+                        &saved_variance_tensor);
+
+      if (is_training_)
+        SetMeanVariance(*batch_mean_tensor, *batch_variance_tensor);
+      else
+        SetMeanVariance(est_mean_tensor, est_variance_tensor);
+
+      MklDnnData<T> src(&cpu_engine);
+      MklDnnData<T> dst(&cpu_engine);
+
+      memory::format format_m;
+      if (dnn_shape_src.IsMklTensor()) {
+        if (dnn_shape_src.IsTensorInNCHWFormat()) {
+          format_m = memory::format::nchw;
+        } else {
+          format_m = memory::format::nhwc;
+        }
+      } else {
+        format_m = TFDataFormatToMklDnnDataFormat(tensor_format_);
+      }
+
+      // set src primitive
+      memory::dims src_dims;
+      if (dnn_shape_src.IsMklTensor()) {
+        src_dims = TFShapeToMklDnnDimsInNCHW(dnn_shape_src.GetTfShape(),
+                                             tensor_format_);
+      } else {
+        src_dims = TFShapeToMklDnnDimsInNCHW(src_tensor.shape(),
+                                             tensor_format_);
+      }
+
+      auto src_md = dnn_shape_src.IsMklTensor()
+                    ? dnn_shape_src.GetMklLayout()
+                    : memory::desc(src_dims, MklDnnType<T>(), format_m);
+      src.SetUsrMem(src_md, &src_tensor);
+
+      // set weights primitive
+      // MKL-DNN packs scale & shift as "weights":
+      // <scale>...<scale><shift>...<shift>
+      auto weights_desc = memory::desc({2, depth_},
+                                       MklDnnType<T>(),
+                                       memory::format::nc);
+      auto weights_pd = memory::primitive_desc(weights_desc, cpu_engine);
+      auto weights_m = memory(weights_pd);
+      T* weights_data = reinterpret_cast<T*>(
+                        weights_m.get_data_handle());
+      T* scale_tf = reinterpret_cast<T*>(
+                    const_cast<T*>(scale_tensor.flat<T>().data()));
+      T* shift_tf = reinterpret_cast<T*>(
+                    const_cast<T*>(shift_tensor.flat<T>().data()));
+
+      for (int k=0; k < depth_; k++) {
+        weights_data[k] = scale_tf[k];
+        weights_data[k + depth_] = shift_tf[k];
+      }
+
+      // set mean primitive
+      auto mean_desc = memory::desc({1, depth_},
+                                    MklDnnType<T>(),
+                                    memory::format::nc);
+      auto mean_pd = memory::primitive_desc(mean_desc, cpu_engine);
+      char* saved_mean_data_tf = reinterpret_cast<char*>
+                                 (saved_mean_tensor->flat<T>().data());
+      std::memcpy(saved_mean_data_tf,
+                  reinterpret_cast<char*>(mean_values_),
+                  depth_*sizeof(T));
+      auto mean_m = memory(mean_pd,
+                           reinterpret_cast<void*>(saved_mean_data_tf));
+
+      // set variance primitive
+      auto variance_desc = memory::desc({1, depth_},
+                                    MklDnnType<T>(),
+                                    memory::format::nc);
+      auto variance_pd = memory::primitive_desc(variance_desc, cpu_engine);
+      char* saved_variance_data_tf = reinterpret_cast<char*>
+                  (saved_variance_tensor->flat<T>().data());
+      std::memcpy(saved_variance_data_tf,
+                  reinterpret_cast<char*>(variance_values_),
+                  depth_*sizeof(T));
+      auto variance_m = memory(variance_pd, saved_variance_data_tf);
+
+      prop_kind pk = (is_training_) ?
+                     prop_kind::forward_training :
+                     prop_kind::forward_scoring;
+      auto bnrm_fwd_desc = batch_normalization_forward::desc(
+                               pk, src.GetUsrMemDesc(), epsilon_,
+                               is_training_ ? use_scale_shift :
+                               (use_scale_shift | use_global_stats));
+      auto bnrm_fwd_pd = batch_normalization_forward::primitive_desc(
+                             bnrm_fwd_desc, cpu_engine);
+
+      // allocate dst tensor
+      MklDnnShape dnn_shape_dst;
+      TensorShape tf_shape_dst;
+      if (dnn_shape_src.IsMklTensor()) {
+        dnn_shape_dst.SetMklTensor(true);
+        auto dst_pd = bnrm_fwd_pd.dst_primitive_desc();
+        dnn_shape_dst.SetMklLayout(&dst_pd);
+        dnn_shape_dst.SetElemType(MklDnnType<T>());
+        dnn_shape_dst.SetTfLayout(dnn_shape_src.GetDimension(),
+                                  src_dims, format_m);
+        tf_shape_dst.AddDim(dst_pd.get_size()/sizeof(T));
+      } else {
+        dnn_shape_dst.SetMklTensor(false);
+        tf_shape_dst = src_tensor.shape();
+      }
+      AllocateOutputSetMklShape(context, kDstIndex, &dst_tensor,
+                                tf_shape_dst, dnn_shape_dst);
+
+      // Output of batchnorm has same shape as input.
+      dst.SetUsrMem(src_md, dst_tensor);
+
+      primitive bnrm_fwd_op;
+      if (is_training_) {
+        bnrm_fwd_op = batch_normalization_forward(
+                          bnrm_fwd_pd,
+                          src.GetOpMem(),
+                          weights_m,
+                          dst.GetOpMem(),
+                          mean_m,
+                          variance_m);
+      } else {
+        bnrm_fwd_op = batch_normalization_forward(
+                          bnrm_fwd_pd,
+                          src.GetOpMem(),
+                          mean_m,
+                          variance_m,
+                          (const primitive::at) weights_m,
+                          dst.GetOpMem());
+      }
+      std::vector<primitive> net;
+      net.push_back(bnrm_fwd_op);
+      stream(stream::kind::eager).submit(net).wait();
+
+      // copy batch_mean data
+      T* batch_mean_data_tf = reinterpret_cast<T*>(
+                                batch_mean_tensor->flat<T>().data());
+      std::memcpy(reinterpret_cast<char*>(batch_mean_data_tf),
+                  reinterpret_cast<char*>(mean_m.get_data_handle()),
+                  depth_*sizeof(T));
+
+      // copy batch_variance data with Bessel's correction
+      // if training mode is on
+      float adjust_factor = 1.0;
+      if (is_training_) {
+        size_t orig_size = src_dims[0] * src_dims[2] * src_dims[3];
+        size_t adjust_size = orig_size - 1;
+        adjust_factor = (static_cast<float>(orig_size)) / adjust_size;
+      }
+      for (int k=0; k < depth_; k++)
+        batch_variance_tensor->flat<T>().data()[k] =
+            (reinterpret_cast<T*>(variance_m.get_data_handle()))[k]
+            * adjust_factor;
+    } catch (mkldnn::error &e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                         ", message: " + string(e.message) +
+                         ", in file " + string(__FILE__) + ":" +
+                         std::to_string(__LINE__);
+      OP_REQUIRES_OK(context,
+                     errors::Aborted("Operation received an exception:",
+                     error_msg));
+    }
+  }
+
+ private:
+  T epsilon_;
+  TensorFormat tensor_format_;
+  bool is_training_;
+  T* mean_values_;
+  T* variance_values_;
+  size_t depth_;          // batch normalization is done for per channel.
+
+  void ExtractParams(OpKernelContext* context) {
+    const Tensor& input = MklGetInput(context, 0);
+    depth_ = static_cast<int>(GetTensorDim(input, tensor_format_, 'C'));
+  }
+
+  void SetMeanVariance(const Tensor& mean, const Tensor& variance) {
+    mean_values_ = reinterpret_cast<T*>(
+                       const_cast<T*>(mean.flat<T>().data()));
+    variance_values_ = reinterpret_cast<T*>(
+                       const_cast<T*>(variance.flat<T>().data()));
+  }
+
+  void HandleEmptyInput(OpKernelContext* context,
+                        TensorShape tf_shape_src,
+                        TensorShape tf_shape_scale,
+                        Tensor** dst_tensor) {
+    CHECK_NOTNULL(dst_tensor);
+
+    const size_t kDstIndex = 0;
+    MklDnnShape dnn_shape_dst;
+    dnn_shape_dst.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, kDstIndex, dst_tensor,
+                              tf_shape_src, dnn_shape_dst);
+    CHECK_NOTNULL(*dst_tensor);
+    memset(const_cast<char*>((*dst_tensor)->tensor_data().data()), 0,
+           (*dst_tensor)->tensor_data().size());
+
+    Tensor* batch_mean_tensor = nullptr;
+    Tensor* batch_variance_tensor = nullptr;
+    Tensor* saved_mean_tensor = nullptr;
+    Tensor* saved_variance_tensor = nullptr;
+    AllocateTFOutputs(context, tf_shape_scale,
+                      &batch_mean_tensor,
+                      &batch_variance_tensor,
+                      &saved_mean_tensor,
+                      &saved_variance_tensor);
+  }
+
+  void AllocateTFOutputs(OpKernelContext* context,
+                         TensorShape tf_shape_scale,
+                         Tensor** batch_mean_tensor,
+                         Tensor** batch_variance_tensor,
+                         Tensor** saved_mean_tensor,
+                         Tensor** saved_variance_tensor) {
+    CHECK_NOTNULL(batch_mean_tensor);
+    CHECK_NOTNULL(batch_variance_tensor);
+    CHECK_NOTNULL(saved_mean_tensor);
+    CHECK_NOTNULL(saved_variance_tensor);
+
+    const size_t kBatchMeanIndex = 1;
+    const size_t kBatchVarianceIndex = 2;
+    const size_t kSavedMeanIndex = 3;
+    const size_t kSavedVarianceIndex = 4;
+
+    // allocate batch mean output tensor
+    MklDnnShape mkl_shape_batch_mean;
+    mkl_shape_batch_mean.SetMklTensor(false);
+    AllocateOutputSetMklShape(context,
+                              kBatchMeanIndex,
+                              batch_mean_tensor,
+                              tf_shape_scale,
+                              mkl_shape_batch_mean);
+    CHECK_NOTNULL(*batch_mean_tensor);
+    // set NAN mean value in case of empty input tensor
+    for (int k=0; k < tf_shape_scale.num_elements(); k++)
+      (*batch_mean_tensor)->flat<T>().data()[k] = NAN;
+
+    // allocate batch variance output tensor
+    MklDnnShape mkl_shape_batch_variance;
+    mkl_shape_batch_variance.SetMklTensor(false);
+    AllocateOutputSetMklShape(context,
+                              kBatchVarianceIndex,
+                              batch_variance_tensor,
+                              tf_shape_scale,
+                              mkl_shape_batch_variance);
+    CHECK_NOTNULL(*batch_variance_tensor);
+    // set NAN variance value in case of empty input tensor
+    for (int k=0; k < tf_shape_scale.num_elements(); k++)
+      (*batch_variance_tensor)->flat<T>().data()[k] = NAN;
+
+    // Mean and variance (without Bessel's correction) saved for backward
+    // computation to serve as pre-computed mean and variance.
+    MklDnnShape mkl_shape_saved_mean;
+    mkl_shape_saved_mean.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, kSavedMeanIndex,
+                              saved_mean_tensor,
+                              tf_shape_scale,
+                              mkl_shape_saved_mean);
+    CHECK_NOTNULL(*saved_mean_tensor);
+    // set NAN mean value in case of empty input tensor
+    for (int k=0; k < tf_shape_scale.num_elements(); k++)
+      (*saved_mean_tensor)->flat<T>().data()[k] = NAN;
+
+    MklDnnShape mkl_shape_saved_variance;
+    mkl_shape_saved_variance.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, kSavedVarianceIndex,
+                              saved_variance_tensor,
+                              tf_shape_scale,
+                              mkl_shape_saved_variance);
+    CHECK_NOTNULL(*saved_variance_tensor);
+    // set NAN variance value in case of empty input tensor
+    for (int k=0; k < tf_shape_scale.num_elements(); k++)
+      (*saved_variance_tensor)->flat<T>().data()[k] = NAN;
+  }
+};
+
+template <typename Device, typename T>
+class MklFusedBatchNormGradOp : public OpKernel {
+ public:
+  explicit MklFusedBatchNormGradOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    float epsilon;
+    OP_REQUIRES_OK(context, context->GetAttr("epsilon", &epsilon));
+    epsilon_ = T(epsilon);
+    string tensor_format;
+    OP_REQUIRES_OK(context, context->GetAttr("data_format", &tensor_format));
+    OP_REQUIRES(context, FormatFromString(tensor_format, &tensor_format_),
+                errors::InvalidArgument("Invalid data format"));
+    OP_REQUIRES_OK(context, context->GetAttr("is_training", &is_training_));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      const size_t kDiffDstIndex = 0;   // index of diff_dst tensor
+      const size_t kSrcIndex = 1;       // index of src input tensor
+      const size_t kScaleIndex = 2;     // index of scale tensor
+      const size_t kMeanIndex = 3;      // index of saved_mean tensor
+      const size_t kVarianceIndex = 4;  // index of saved_variance tensor
+      const Tensor& diff_dst_tensor = MklGetInput(context, kDiffDstIndex);
+      const Tensor& src_tensor = MklGetInput(context, kSrcIndex);
+      const Tensor& scale_tensor = MklGetInput(context, kScaleIndex);
+      const Tensor& saved_mean_tensor = MklGetInput(context, kMeanIndex);
+      const Tensor& saved_variance_tensor = MklGetInput(context,
+                                            kVarianceIndex);
+
+      MklDnnShape dnn_shape_src, dnn_shape_diff_dst;
+      GetMklShape(context, kSrcIndex, &dnn_shape_src);
+      GetMklShape(context, kDiffDstIndex, &dnn_shape_diff_dst);
+      TensorShape tf_shape_src, tf_shape_diff_dst;
+
+      if (dnn_shape_diff_dst.IsMklTensor()) {
+        tf_shape_diff_dst = dnn_shape_diff_dst.GetTfShape();
+        OP_REQUIRES(context, dnn_shape_diff_dst.GetDimension() == 4,
+                    errors::InvalidArgument(
+                        "input must be 4-dimensional",
+                        diff_dst_tensor.shape().DebugString()));
+      } else {
+        tf_shape_diff_dst = diff_dst_tensor.shape();
+        OP_REQUIRES(context, diff_dst_tensor.dims() == 4,
+                    errors::InvalidArgument(
+                        "input must be 4-dimensional",
+                        diff_dst_tensor.shape().DebugString()));
+      }
+
+      if (dnn_shape_src.IsMklTensor()) {
+        tf_shape_src = dnn_shape_src.GetTfShape();
+        OP_REQUIRES(context, dnn_shape_src.GetDimension() == 4,
+                    errors::InvalidArgument(
+                        "input must be 4-dimensional",
+                         src_tensor.shape().DebugString()));
+      } else {
+        tf_shape_src = src_tensor.shape();
+        OP_REQUIRES(context, src_tensor.dims() == 4,
+                    errors::InvalidArgument(
+                        "input must be 4-dimensional",
+                        src_tensor.shape().DebugString()));
+      }
+
+      OP_REQUIRES(context, scale_tensor.dims() == 1,
+                  errors::InvalidArgument(
+                      "scale must be 1-dimensional",
+                      scale_tensor.shape().DebugString()));
+      OP_REQUIRES(context, saved_mean_tensor.dims() == 1,
+                  errors::InvalidArgument(
+                      "saved mean must be 1-dimensional",
+                       saved_mean_tensor.shape().DebugString()));
+
+      OP_REQUIRES(context, saved_variance_tensor.dims() == 1,
+                  errors::InvalidArgument(
+                      "saved variance must be 1-dimensional",
+                      saved_variance_tensor.shape().DebugString()));
+
+      Tensor* diff_src_tensor = nullptr;
+      if (tf_shape_src.num_elements() == 0 ||
+          tf_shape_diff_dst.num_elements() == 0) {
+         HandleEmptyInput(context, tf_shape_src,
+                          scale_tensor.shape(),
+                          &diff_src_tensor);
+         return;
+      }
+
+      if (dnn_shape_src.IsMklTensor())
+        depth_ = dnn_shape_src.DimSize(MklDnnDims::Dim_C);
+      else
+        ExtractParams(context);
+
+      memory::format format_m;
+      if (dnn_shape_src.IsMklTensor()) {
+        if (dnn_shape_src.IsTensorInNCHWFormat())
+          format_m = memory::format::nchw;
+        else
+          format_m = memory::format::nhwc;
+      } else {
+        format_m = TFDataFormatToMklDnnDataFormat(tensor_format_);
+      }
+
+      MklDnnData<T> src(&cpu_engine);
+      MklDnnData<T> mean(&cpu_engine);
+      MklDnnData<T> variance(&cpu_engine);
+      MklDnnData<T> diff_dst(&cpu_engine);
+      MklDnnData<T> diff_src(&cpu_engine);
+
+      memory::dims src_dims, diff_dst_dims;
+      if (dnn_shape_src.IsMklTensor())
+        src_dims = TFShapeToMklDnnDimsInNCHW(
+                       dnn_shape_src.GetTfShape(), tensor_format_);
+      else
+        src_dims = TFShapeToMklDnnDimsInNCHW(
+                       src_tensor.shape(), tensor_format_);
+
+      if (dnn_shape_diff_dst.IsMklTensor())
+        diff_dst_dims = TFShapeToMklDnnDimsInNCHW(
+                            dnn_shape_diff_dst.GetTfShape(),
+                            tensor_format_);
+      else
+        diff_dst_dims = TFShapeToMklDnnDimsInNCHW(
+                            diff_dst_tensor.shape(),
+                            tensor_format_);
+
+      // set src and diff_dst primitives
+      memory::desc src_md({}, memory::data_undef, memory::format_undef);
+      memory::desc diff_dst_md({}, memory::data_undef, memory::format_undef);
+      if (dnn_shape_src.IsMklTensor() || dnn_shape_diff_dst.IsMklTensor()) {
+        if (dnn_shape_src.IsMklTensor()) {
+          src_md = dnn_shape_src.GetMklLayout();
+          diff_dst_md = src_md;
+        } else {
+          diff_dst_md = dnn_shape_diff_dst.GetMklLayout();
+          src_md = diff_dst_md;
+        }
+      } else {
+        src_md =  memory::desc(src_dims, MklDnnType<T>(), format_m);
+        diff_dst_md = src_md;
+      }
+      src.SetUsrMem(src_md, &src_tensor);
+      diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor);
+
+      // weights -- DNN packs scales/shifts as weights in order of
+      // scale, ..., scale, shift, ..., shift
+      auto weights_desc = memory::desc({2, depth_},
+                                       MklDnnType<T>(),
+                                       memory::format::nc);
+      auto weights_pd = memory::primitive_desc(weights_desc, cpu_engine);
+      auto weights_m = memory(weights_pd);
+      T* weights_data = reinterpret_cast<T*>(weights_m.get_data_handle());
+      T* scale_tf = reinterpret_cast<T*>(const_cast<T*>
+                                        (scale_tensor.flat<T>().data()));
+      for (int k=0; k < depth_; k++) {
+        weights_data[k] = scale_tf[k];
+        weights_data[k + depth_] = 0;
+      }
+
+      // set mean primitive
+      memory::dims mv_dims = GetMeanVarianceDims();
+      mean.SetUsrMem(mv_dims,
+                     memory::format::nc,
+                     const_cast<void*>(static_cast<const void*>
+                     (saved_mean_tensor.flat<T>().data())));
+      mean.SetOpMemDesc(mv_dims, memory::format::nc);
+
+      // set variance primitive
+      variance.SetUsrMem(mv_dims,  memory::format::nc,
+                         const_cast<void*>(static_cast<const void*>
+                         (saved_variance_tensor.flat<T>().data())));
+      variance.SetOpMemDesc(mv_dims, memory::format::nc);
+
+      // set diff_weight primitive
+      auto diff_weights_desc = memory::desc(
+                                 {2, depth_},
+                                 MklDnnType<T>(),
+                                 memory::format::nc);
+      auto diff_weights_pd = memory::primitive_desc(
+                                diff_weights_desc,
+                                cpu_engine);
+      auto diff_weights_m = memory(diff_weights_pd);
+
+      auto bnrm_fwd_desc = batch_normalization_forward::desc(
+                               prop_kind::forward_training,
+                               src.GetUsrMemDesc(),
+                               epsilon_,
+                               is_training_ ? use_scale_shift :
+                               (use_scale_shift | use_global_stats));
+      auto bnrm_fwd_pd = batch_normalization_forward::primitive_desc(
+                                bnrm_fwd_desc,
+                                cpu_engine);
+
+      // Indices of output tensors
+      const size_t kDiffSrcIndex = 0;    // index of diff_src tensor
+
+      // allocate diff_src tensor
+      MklDnnShape dnn_shape_diff_src;
+      TensorShape tf_shape_diff_src;
+      if (dnn_shape_src.IsMklTensor()) {
+        dnn_shape_diff_src.SetMklTensor(true);
+        auto diff_src_pd = bnrm_fwd_pd.dst_primitive_desc();
+        dnn_shape_diff_src.SetMklLayout(&diff_src_pd);
+        dnn_shape_diff_src.SetElemType(MklDnnType<T>());
+        dnn_shape_diff_src.SetTfLayout(
+                              dnn_shape_src.GetDimension(),
+                              src_dims,
+                              format_m);
+        dnn_shape_diff_src.SetTfDimOrder(
+                              dnn_shape_src.GetDimension(),
+                              tensor_format_);
+        tf_shape_diff_src.AddDim(diff_src_pd.get_size()/sizeof(T));
+      } else {
+        dnn_shape_diff_src.SetMklTensor(false);
+        tf_shape_diff_src = src_tensor.shape();
+      }
+      AllocateOutputSetMklShape(context, kDiffSrcIndex, &diff_src_tensor,
+                                tf_shape_diff_src, dnn_shape_diff_src);
+
+      diff_src.SetUsrMem(src_md, diff_src_tensor);
+
+      prop_kind pk = prop_kind::backward;
+      auto bnrm_bwd_desc = batch_normalization_backward::desc(
+                               pk,
+                               diff_src.GetUsrMemDesc(),
+                               src.GetUsrMemDesc(),
+                               epsilon_,
+                               /* for inference, specify use_global_stats
+                                  1. on fwd prop, use mean and variance
+                                     provided as inputs
+                                  2. on bwd prop, mean and variance are
+                                     considered as constants. Thus, 
+                                     reduce the amout of MKL computations
+                               */
+                               is_training_ ? use_scale_shift :
+                               (use_scale_shift | use_global_stats));
+      auto bnrm_bwd_pd = batch_normalization_backward::primitive_desc(
+                               bnrm_bwd_desc,
+                               cpu_engine,
+                               bnrm_fwd_pd);
+
+      auto bnrm_bwd_op = batch_normalization_backward(
+                               bnrm_bwd_pd,
+                               src.GetOpMem(),
+                               mean.GetOpMem(),
+                               variance.GetOpMem(),
+                               diff_dst.GetOpMem(),
+                               weights_m,
+                               diff_src.GetOpMem(),
+                               diff_weights_m);
+
+      std::vector<primitive> net;
+      net.push_back(bnrm_bwd_op);
+      stream(stream::kind::eager).submit(net).wait();
+
+      // allocate 4 output TF tensors
+      Tensor* diff_scale_tensor = nullptr;
+      Tensor* diff_shift_tensor = nullptr;
+      AllocateTFOutputs(context, scale_tensor.shape(),
+                        &diff_scale_tensor,
+                        &diff_shift_tensor);
+
+      // copy data: diff_scale and diff_shift
+      T* diff_weights_data_dnn = reinterpret_cast<T*>
+                                 (diff_weights_m.get_data_handle());
+      for (int i = 0; i < depth_; i++) {
+        diff_scale_tensor->flat<T>().data()[i] =
+                              diff_weights_data_dnn[i];
+        diff_shift_tensor->flat<T>().data()[i] =
+                              diff_weights_data_dnn[i + depth_];
+      }
+    } catch (mkldnn::error &e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                          ", message: " + string(e.message) +
+                          ", in file " + string(__FILE__) + ":" +
+                          std::to_string(__LINE__);
+      OP_REQUIRES_OK(context,
+                     errors::Aborted("Operation received an exception:",
+                     error_msg));
+    }
+  }
+
+ private:
+  T epsilon_;
+  TensorFormat tensor_format_;
+  int depth_;             // batch normalization is done for per channel.
+  bool is_training_;
+
+  void ExtractParams(OpKernelContext* context) {
+      const Tensor& input = MklGetInput(context, 0);
+      depth_ = static_cast<int>(GetTensorDim(input, tensor_format_, 'C'));
+  }
+
+  void HandleEmptyInput(OpKernelContext* context,
+                        TensorShape tf_shape_src,
+                        TensorShape tf_shape_scale_shift,
+                        Tensor** diff_src_tensor) {
+    const size_t kDiffSrcIndex = 0;
+
+    MklDnnShape dnn_shape_diff_src;
+    dnn_shape_diff_src.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, kDiffSrcIndex, diff_src_tensor,
+                                tf_shape_src, dnn_shape_diff_src);
+    for (size_t i=0; i < (*diff_src_tensor)->shape().num_elements(); i++)
+       (*diff_src_tensor)->flat<T>().data()[i] = 0;
+
+    Tensor* diff_scale_tensor = nullptr;
+    Tensor* diff_shift_tensor = nullptr;
+    AllocateTFOutputs(context,
+                      tf_shape_scale_shift,
+                      &diff_scale_tensor,
+                      &diff_shift_tensor);
+  }
+
+  void AllocateTFOutputs(OpKernelContext* context,
+                        TensorShape tf_shape_scale_shift,
+                        Tensor** diff_scale_tensor,
+                        Tensor** diff_shift_tensor) {
+    CHECK_NOTNULL(diff_scale_tensor);
+    CHECK_NOTNULL(diff_shift_tensor);
+
+    const size_t kDiffScaleIndex = 1;
+    const size_t kDiffShiftIndex = 2;
+    const size_t kP1Index = 3;
+    const size_t kP2Index = 4;
+
+    // separate out scale and shift grad and copy to individual tensors
+    MklDnnShape mkl_shape_diff_scale;
+    mkl_shape_diff_scale.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, kDiffScaleIndex, diff_scale_tensor,
+                              tf_shape_scale_shift, mkl_shape_diff_scale);
+    CHECK_NOTNULL(*diff_scale_tensor);
+    for (size_t i=0; i < (*diff_scale_tensor)->shape().num_elements(); i++)
+       (*diff_scale_tensor)->flat<T>().data()[i] = 0;
+
+    MklDnnShape mkl_shape_diff_shift;
+    mkl_shape_diff_shift.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, kDiffShiftIndex, diff_shift_tensor,
+                              tf_shape_scale_shift, mkl_shape_diff_shift);
+    CHECK_NOTNULL(*diff_shift_tensor);
+    for (size_t i=0; i < (*diff_shift_tensor)->shape().num_elements(); i++)
+       (*diff_shift_tensor)->flat<T>().data()[i] = 0;
+
+    // Placeholders for estimated_mean and estimated_variance, which are
+    // used for inference and thus not needed here for gradient computation.
+    Tensor* p1_tensor = nullptr, *p2_tensor = nullptr;
+    MklDnnShape mkl_shape_p;
+    mkl_shape_p.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, kP1Index, &p1_tensor,
+                              TensorShape({}), mkl_shape_p);
+    AllocateOutputSetMklShape(context, kP2Index, &p2_tensor,
+                              TensorShape({}), mkl_shape_p);
+  }
+
+  memory::dims GetMeanVarianceDims() {
+    return memory::dims({1, depth_});
+  }
+};
+
+#endif
+
+#define REGISTER_MKL_CPU(T)                                         \
+  REGISTER_KERNEL_BUILDER(Name("_MklFusedBatchNorm")                \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<T>("T")               \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklFusedBatchNormOp<CPUDevice, T>);
+TF_CALL_float(REGISTER_MKL_CPU);
+#undef REGISTER_MKL_CPU
 
 #define REGISTER_MKL_CPU(T)                                         \
   REGISTER_KERNEL_BUILDER(Name("_MklFusedBatchNormGrad")            \
diff --git a/tensorflow/core/kernels/mkl_identity_op.cc b/tensorflow/core/kernels/mkl_identity_op.cc
index f31e7afd46873a02c10277283862a7e5e2384803..9ee27ee21c8d23c8ce314a7687ac9b79a1d9ea30 100644
--- a/tensorflow/core/kernels/mkl_identity_op.cc
+++ b/tensorflow/core/kernels/mkl_identity_op.cc
@@ -28,8 +28,15 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+#endif
+
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
+
+#ifndef INTEL_MKL_DNN
+
 template <typename Device, typename T>
 class MklIdentityOp : public OpKernel {
  public:
@@ -50,6 +57,32 @@ class MklIdentityOp : public OpKernel {
   bool IsExpensive() override { return false; }
 };
 
+#else
+
+template <typename Device, typename T>
+class MklIdentityOp : public OpKernel {
+ public:
+  explicit MklIdentityOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    MklDnnShape dnn_shape_input;
+    const int kInputIdx = 0, kOutputIdx = 0;
+    GetMklShape(context, kInputIdx, &dnn_shape_input);
+
+    if (dnn_shape_input.IsMklTensor()) {
+      ForwardMklTensorInToOut(context, kInputIdx, kOutputIdx);
+    } else {
+      ForwardTfTensorInToOut(context, kInputIdx, kOutputIdx);
+    }
+  }
+
+  // TensorFlow's IdentityOp has the following member function, so kept it
+  // as it is.
+  bool IsExpensive() override { return false; }
+};
+
+#endif
+
 #define REGISTER_MKL_CPU(T)                                         \
   REGISTER_KERNEL_BUILDER(Name("_MklIdentity")                      \
                               .Device(DEVICE_CPU)                   \
diff --git a/tensorflow/core/kernels/mkl_input_conversion_op.cc b/tensorflow/core/kernels/mkl_input_conversion_op.cc
index b58e44e39800c8c047d5557ab3c84113bb78d3ca..001834b13bdd64ffd0d536897fbc4a170c4c4117 100644
--- a/tensorflow/core/kernels/mkl_input_conversion_op.cc
+++ b/tensorflow/core/kernels/mkl_input_conversion_op.cc
@@ -31,6 +31,12 @@ limitations under the License.
 #include "tensorflow/core/kernels/mkl_tfconv_op.h"
 #include "tensorflow/core/util/mkl_util.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+
+using mkldnn::stream;
+#endif
+
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
@@ -44,15 +50,16 @@ typedef Eigen::ThreadPoolDevice CPUDevice;
 // else if both inputs are in mkl format:
 //   if both have the same shape:
 //     pass the inputs through to the output
-// 	else:
-// 		convert both to TF
+//   else:
+//     convert both to TF
 // else if one is TF and one is MKL:
-// 	if broadcast is needed:
-// 		convert the MKL format input to TF format
-// 	else:
-// 		convert the TF format input to MKL format
+//   if broadcast is needed:
+//     convert the MKL format input to TF format
+//   else:
+//     convert the TF format input to MKL format
 ///////////////////////////////////////////////////////////
 
+#ifndef INTEL_MKL_DNN
 template <typename Device, typename T>
 class MklInputConversionOp : public OpKernel {
  public:
@@ -242,6 +249,199 @@ class MklInputConversionOp : public OpKernel {
   bool has_avx512f_ = false;
 };
 
+#else
+
+template <typename Device, typename T>
+class MklInputConversionOp : public OpKernel {
+ public:
+  explicit MklInputConversionOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format_str));
+    OP_REQUIRES_OK(context, context->GetAttr("T", &op_data_type));
+    has_avx512f_ = port::TestCPUFeature(port::CPUFeature::AVX512F);
+  }
+
+ private:
+  void Compute(OpKernelContext* context) override {
+    const Tensor& input_tensor_0 = MklGetInput(context, 0);
+    MklDnnShape input_shape_0;
+    GetMklShape(context, 0, &input_shape_0);
+
+    const Tensor& input_tensor_1 = MklGetInput(context, 1);
+    MklDnnShape input_shape_1;
+    GetMklShape(context, 1, &input_shape_1);
+
+    bool tf_shapes_are_same = context->input(0).shape() ==
+                              context->input(1).shape();
+
+    VLOG(1) << "MklInputConversionOp: Input shapes are "
+            << (tf_shapes_are_same ? "*same*" : "*different*") << ": "
+            << context->input(0).shape().DebugString() << " and "
+            << context->input(1).shape().DebugString();
+
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    // if both inputs are in TF format, just copy input tensors to output.
+    if (!input_shape_0.IsMklTensor() && !input_shape_1.IsMklTensor()) {
+      VLOG(1) << "MklInputConversionOp: No conversion needed, "
+              << "copying TF inputs to output";
+
+      ForwardTfTensorInToOut(context, 0, 0);
+      ForwardTfTensorInToOut(context, 1, 1);
+      return;
+    }
+
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    // If both inputs are in MKL format
+    if (input_shape_0.IsMklTensor() && input_shape_1.IsMklTensor()) {
+      // If both have the same shape, pass them through
+      if (tf_shapes_are_same) {
+        VLOG(1) << "MklInputConversionOp: No conversion needed, "
+                << "copying MKL inputs with identical shapes to output";
+
+        ForwardMklTensorInToOut(context, 0, 0);
+        ForwardMklTensorInToOut(context, 1, 1);
+        return;
+      }
+
+      // Sanity check
+      bool mkl_shapes_are_same = input_shape_0 == input_shape_1;
+      if (mkl_shapes_are_same) {
+        CHECK(false) << "MklInputConversionOp: Unexpected: TF shapes are "
+                        "different but MKL shapes are same";
+      }
+
+      // Both have different shapes, so broadcast will be necessary.
+      // Convert to TF and pass both tensors through (we can't do broadcast
+      // with MKL tensors)
+      VLOG(1) << "MklInputConversionOp: Broadcast needed, "
+              << "converted MKL inputs to TF format";
+
+      MklToTfOp<Device, T>::ConvertMklToTf(this, context, data_format_str,
+                                           op_data_type, has_avx512f_, 0);
+      MklToTfOp<Device, T>::ConvertMklToTf(this, context, data_format_str,
+                                           op_data_type, has_avx512f_, 1);
+      SetDummyMklShapeOutput(context, 0);
+      SetDummyMklShapeOutput(context, 1);
+      return;
+    }
+
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    // One input is MKL and one is TF. If no broadcast is needed, convert
+    // the TF tensor to MKL, otherwise convert the MKL tensor to TF format
+    VLOG(1) << "MklInputConversionOp: Inputs in different formats (MKL/TF)";
+
+    const Tensor* mkl_tensor;
+    const MklDnnShape* mkl_shape;
+    const Tensor* tf_tensor;
+    MklDnnShape* tf_mkl_shape;
+    uint mkl_tensor_index;
+    uint tf_tensor_index;
+    if (input_shape_0.IsMklTensor() && !input_shape_1.IsMklTensor()) {
+      mkl_tensor = &input_tensor_0;
+      mkl_shape = &input_shape_0;
+      mkl_tensor_index = 0;
+      tf_tensor = &input_tensor_1;
+      tf_mkl_shape = &input_shape_1;
+      tf_tensor_index = 1;
+    } else if (!input_shape_0.IsMklTensor() && input_shape_1.IsMklTensor()) {
+      mkl_tensor = &input_tensor_1;
+      mkl_shape = &input_shape_1;
+      mkl_tensor_index = 1;
+      tf_tensor = &input_tensor_0;
+      tf_mkl_shape = &input_shape_0;
+      tf_tensor_index = 0;
+    } else {
+      CHECK(false) << "MklInputConversionOp: Unexpected combination of input "
+                      "shapes for MKL "
+                   << "element-wise op";
+    }
+
+    // Broadcast is needed if the shapes are not the same
+    bool broadcast_needed;
+
+    size_t in0_size = 1;
+    for (size_t i = 0; i < mkl_shape->GetDimension(); ++i)
+      in0_size *= mkl_shape->TfDimSize(i);
+
+    size_t in1_size = 1;
+    for (size_t i = 0; i < tf_tensor->shape().dims(); ++i)
+      in1_size *= tf_tensor->shape().dim_size(i);
+
+    broadcast_needed = (in0_size != in1_size);
+
+    if (!broadcast_needed) {
+      // Both shapes are same, convert the TF input to MKL
+      VLOG(1) << "MklInputConversionOp: No broadcast needed.";
+      VLOG(1) << "MklInputConversionOp: Converting input " << tf_tensor_index
+              << " to MKL format";
+
+      // Create MklDnnShape for output Mkl tensor.
+      Tensor* tensor_out;
+      MklDnnShape mkl_output_mkl_shape;
+      mkl_output_mkl_shape.SetMklTensor(true);
+      mkl_output_mkl_shape.SetElemType(MklDnnType<T>());
+      mkl_output_mkl_shape.SetTfLayout(mkl_shape->GetDimension(),
+                                       mkl_shape->GetSizesAsMklDnnDims(),
+                                       mkl_shape->GetTfDataFormat());
+      // ** Temporarily borrow the layout from the MKL input **
+      auto output_mkl_md = mkl_shape->GetMklLayout();
+      mkl_output_mkl_shape.SetMklLayout(&output_mkl_md);
+
+      // Create output Mkl tensor
+      AllocateOutputSetMklShape(context, tf_tensor_index, &tensor_out,
+                                mkl_tensor->shape(), mkl_output_mkl_shape);
+
+      // Create MklDnnData object for input tensor. Input tensor is in
+      // Tensorflow layout.
+      auto cpu_engine = engine(engine::cpu, 0);
+      MklDnnData<T> tf_input(&cpu_engine);
+      auto input_tf_md = mkl_output_mkl_shape.GetTfLayout();
+      tf_input.SetUsrMem(input_tf_md, &tf_tensor);
+
+      // Create reorder between tensorflow layout and Mkl layout.
+      std::vector<primitive> net;
+      CHECK_EQ(tf_input.CheckReorderToOpMem(memory::primitive_desc(
+                                            output_mkl_md, cpu_engine),
+                                            tensor_out, &net),
+               true);
+      stream(stream::kind::eager).submit(net).wait();
+
+      // -- The tensor in MKL format passes through --
+      ForwardMklTensorInToOut(context, mkl_tensor_index, mkl_tensor_index);
+    } else {
+      // Broadcast is needed, so convert the MKL input to TF
+      VLOG(1) << "MklInputConversionOp: Broadcast needed.";
+      VLOG(1) << "MklInputConversionOp: Converting input " << mkl_tensor_index
+              << " to TF format";
+      MklToTfOp<Device, T>::ConvertMklToTf(this, context, data_format_str,
+                                           op_data_type, has_avx512f_,
+                                           mkl_tensor_index);
+      SetDummyMklShapeOutput(context, mkl_tensor_index);
+
+      // The tensor in TF format passes through
+      ForwardTfTensorInToOut(context, tf_tensor_index, tf_tensor_index);
+    }
+
+    VLOG(1) << "MklInputConversionOp: Shapes (output): "
+            << context->mutable_output(0)->shape().DebugString() << " and "
+            << context->mutable_output(1)->shape().DebugString();
+
+    VLOG(1) << "MklInputConversion completed successfully.";
+  }
+
+ private:
+  /// Data format of the operation
+  string data_format_str;
+
+  /// Data type of the operation
+  DataType op_data_type;
+
+  /// CPUIDInfo
+  bool has_avx512f_ = false;
+};
+
+#endif
+
 ///////////////////////////////////////////////////////////
 //               Register kernel
 ///////////////////////////////////////////////////////////
@@ -253,7 +453,10 @@ class MklInputConversionOp : public OpKernel {
                               .Label(mkl_op_registry::kMklOpLabel), \
                           MklInputConversionOp<CPUDevice, T>);
 
-TF_CALL_NUMBER_TYPES(REGISTER_CPU);
+// TODO(nhasabni): We cannot support all number types since MklDnn does
+// not support types.
+// TF_CALL_NUMBER_TYPES(REGISTER_CPU);
+TF_CALL_float(REGISTER_CPU);
 #undef REGISTER_CPU
 }  // namespace tensorflow
 #endif  // INTEL_MKL
diff --git a/tensorflow/core/kernels/mkl_lrn_op.cc b/tensorflow/core/kernels/mkl_lrn_op.cc
index aa08e93924c588cfb5b4a22a20055e5c74a43b3a..66bc7dd8eedbda08e052c8c3c1bd552c7b955ecb 100644
--- a/tensorflow/core/kernels/mkl_lrn_op.cc
+++ b/tensorflow/core/kernels/mkl_lrn_op.cc
@@ -38,6 +38,15 @@ limitations under the License.
 #include "tensorflow/core/util/work_sharder.h"
 #endif
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+using mkldnn::lrn_forward;
+using mkldnn::lrn_backward;
+using mkldnn::prop_kind;
+using mkldnn::algorithm::lrn_across_channels;
+using mkldnn::stream;
+#endif
+
 namespace tensorflow {
 
 namespace {
@@ -58,6 +67,8 @@ void GetBandMatrix(int depth, int depth_radius,
 
 }  // namespace
 
+#ifndef INTEL_MKL_DNN
+
 template <typename T>
 class MklLRNOp : public OpKernel {
  public:
@@ -328,6 +339,7 @@ class MklLRNOp : public OpKernel {
   float beta_;
 };
 
+
 template <typename T>
 class MklLRNGradOp : public OpKernel {
  public:
@@ -648,6 +660,7 @@ class MklLRNGradOp : public OpKernel {
       const auto nodes = cols * rows;
 
       auto grads_shaped = in_grads.shaped<T, 2>({nodes * batch, depth});
+
       auto in_shaped = in_image.shaped<T, 2>({nodes * batch, depth});
       auto activations = out_image.shaped<T, 2>({nodes * batch, depth});
 
@@ -717,6 +730,657 @@ class MklLRNGradOp : public OpKernel {
   float beta_;
 };
 
+#else
+
+template <typename T>
+class MklLRNOp : public OpKernel {
+ public:
+  ~MklLRNOp() {}
+
+  explicit MklLRNOp(OpKernelConstruction* context) : OpKernel(context) {
+    int64 depth_radius64;
+    OP_REQUIRES_OK(context, context->GetAttr("depth_radius", &depth_radius64));
+    OP_REQUIRES(context, FastBoundsCheck(depth_radius64,
+                                         std::numeric_limits<int>::max()),
+                errors::InvalidArgument("depth_radius = ", depth_radius64,
+                                        " larger than int max"));
+    depth_radius_ = static_cast<size_t>(depth_radius64);
+
+    OP_REQUIRES_OK(context, context->GetAttr("bias", &bias_));
+    OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_));
+    OP_REQUIRES_OK(context, context->GetAttr("beta", &beta_));
+    workspace_enabled_ = false;
+    context->GetAttr("workspace_enabled", &workspace_enabled_);
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      SanityCheckInputs(context);
+      if (!context->status().ok()) return;
+
+      auto cpu_engine = engine(engine::cpu, 0);
+      const Tensor& src_tensor = MklGetInput(context, kIdxInput);
+      MklDnnShape src_dnn_shape;
+      GetMklShape(context, kIdxInput, &src_dnn_shape);
+
+      // MKL-DNN has a notion of kernel_size and not depth_radius.
+      int kernel_size = 2 * depth_radius_ + 1;
+      float new_alpha = alpha_ * kernel_size;
+
+      // if the input tensor is not an MKL Tensor, or if the last
+      // dimension is not channel, then just use Eigen.
+      // MKL only support normalization over the channel dimension.
+      if (!src_dnn_shape.IsMklTensor()) {
+        MklDefaultToEigen(context, src_tensor);
+        return;
+      } else if (!src_dnn_shape.IsMklChannelDim(
+                  src_dnn_shape.GetDimension() - 1) ) {
+        Tensor converted_tensor =
+          ConvertMklToTF<T>(context, src_tensor, src_dnn_shape);
+        MklDefaultToEigen(context, converted_tensor);
+        return;
+      }
+      // At this point, we can assume that the src is an MklTensor
+      // and we can enable the workspace
+      workspace_enabled_ = true;
+
+      MklDnnData<T> src_dnn_data(&cpu_engine);
+      MklDnnData<T> dst_dnn_data(&cpu_engine);
+      MklDnnData<uint8> workspace_dnn_data(&cpu_engine);
+
+      TensorShape tf_output_shape = src_tensor.shape();
+
+      memory::desc src_md = src_dnn_shape.GetCurLayout();
+      memory::dims input_dims = src_dnn_shape.GetSizesAsMklDnnDims();
+
+      // Create memory for user input.
+      // Since Tensorflow always performs normalization over last dimension,
+      // and MKL-DNN performs normalization over Channel, we tell MKL-DNN
+      // that input is in NHWC layout with Channel being the last dimension.
+      src_dnn_data.SetUsrMem(src_md, &src_tensor);
+      src_dnn_data.SetOpMemDesc(input_dims, memory::format::nhwc);
+
+      // output_dnn_data and workspace both have the same shape as input
+      dst_dnn_data.SetUsrMem(src_md);
+      dst_dnn_data.SetOpMemDesc(input_dims, memory::format::nhwc);
+
+      // Create LRN primitive descriptor.
+      // Tensorflow's normalization semantics is across channels.
+      // MKL-DNN also supports normalization within channel.
+      auto lrn_desc = lrn_forward::desc(prop_kind::forward,
+                                        lrn_across_channels,
+                                        src_dnn_data.GetUsrMemDesc(),
+                                        kernel_size,
+                                        new_alpha, beta_, bias_);
+      auto lrn_prim_desc = lrn_forward::primitive_desc(lrn_desc, cpu_engine);
+
+      // Allocate output_dnn_data tensor.
+      Tensor* output_tensor = nullptr;
+      memory::format input_format = src_dnn_shape.GetTfDataFormat();
+      AllocateOutputTensor(context, lrn_prim_desc, input_dims,
+                    input_format, &output_tensor);
+      OP_REQUIRES_OK(context, context->status());
+      CHECK_NOTNULL(output_tensor);
+      dst_dnn_data.SetUsrMemDataHandle(output_tensor);
+
+      // Handle workspace required for MKL-DNN.
+      AllocateWorkspaceTensor(context, lrn_prim_desc, &workspace_dnn_data);
+      OP_REQUIRES_OK(context, context->status());
+
+      PrepareAndExecuteNet(lrn_prim_desc, &src_dnn_data,
+                          &dst_dnn_data, &workspace_dnn_data);
+    } catch (mkldnn::error &e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                       ", message: " + string(e.message) +
+                       ", in file " + string(__FILE__) + ":" +
+                       std::to_string(__LINE__);
+      OP_REQUIRES_OK(context,
+                     errors::Aborted("Operation received an exception:",
+                     error_msg));
+    }
+  }
+
+ private:
+  void PrepareAndExecuteNet(
+                const lrn_forward::primitive_desc& lrn_fwd_desc,
+                MklDnnData<T>* src_dnn_data,
+                MklDnnData<T>* dst_dnn_data,
+                MklDnnData<uint8>* wksp_dnn_data = nullptr) {
+    std::vector<primitive> net;
+
+    // Check for input reorder
+    src_dnn_data->CheckReorderToOpMem(lrn_fwd_desc.src_primitive_desc(), &net);
+
+    // Create pooling primitive and add it to net
+    if (wksp_dnn_data != nullptr) {
+        net.push_back(lrn_forward(lrn_fwd_desc,
+                        src_dnn_data->GetOpMem(),
+                        wksp_dnn_data->GetOpMem(),
+                        dst_dnn_data->GetOpMem()));
+    } else {
+        net.push_back(lrn_forward(lrn_fwd_desc,
+            src_dnn_data->GetOpMem(),
+            dst_dnn_data->GetOpMem()));
+    }
+    stream(stream::kind::eager).submit(net).wait();
+  }
+
+  void AllocateOutputTensor(OpKernelContext* context,
+            const lrn_forward::primitive_desc& lrn_fwd_prim_desc,
+            const memory::dims output_dims_mkl_order,
+            const memory::format& output_tf_format,
+            Tensor** output_tensor) {
+    CHECK_NOTNULL(output_tensor);
+    memory::primitive_desc dst_pd = lrn_fwd_prim_desc.dst_primitive_desc();
+
+    MklDnnShape output_mkl_shape;
+    // We only handle the case when the inputs and output are in Mkl format
+    // Any other case is handled by Eigen
+    output_mkl_shape.SetMklTensor(true);
+    output_mkl_shape.SetMklLayout(&dst_pd);
+    output_mkl_shape.SetElemType(MklDnnType<T>());
+    output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(),
+                              output_dims_mkl_order,
+                              output_tf_format);
+    TensorShape output_tf_shape;
+    // only allocate enough space for the elements we need.
+    size_t num_bytes = dst_pd.get_size();
+    CHECK_EQ(num_bytes % sizeof(T), 0);
+    output_tf_shape.AddDim(num_bytes / sizeof(T));
+    AllocateOutputSetMklShape(context, kIdxOutput,
+                            output_tensor,
+                            output_tf_shape, output_mkl_shape);
+    }
+
+    // Fallback implementation - Taken from lrn_op.cc
+    // TODO(inteltf) Check if we can use EigenLRNOp directly instead of making a
+    // copy.
+    void MklDefaultToEigen(OpKernelContext* context,
+                           const Tensor& input) {
+      const int batch = static_cast<int>(input.dim_size(0));
+      const int rows = static_cast<int>(input.dim_size(1));
+      const int cols = static_cast<int>(input.dim_size(2));
+      const int depth = static_cast<int>(input.dim_size(3));
+      const int nodes = cols * rows;
+
+      auto in_shaped = input.shaped<T, 2>({nodes * batch, depth});
+      // Multiplying the input with the band matrix has the effect of reducing
+      // the
+      // correct patch along the depth.
+      Eigen::Tensor<T, 2, Eigen::RowMajor> multiplier(depth, depth);
+      GetBandMatrix<T>(depth, depth_radius_, &multiplier);
+
+      Tensor *output_dnn_data = nullptr;
+      MklDnnShape mkl_output_mkl_shape;
+      mkl_output_mkl_shape.SetMklTensor(false);
+      mkl_output_mkl_shape.SetDimensions(4);
+      AllocateOutputSetMklShape(context, kIdxOutput, &output_dnn_data,
+                                input.shape(), mkl_output_mkl_shape);
+      CHECK_NOTNULL(output_dnn_data);
+
+      Tensor* workspace_tensor = nullptr;
+      MklDnnShape workspace_mkl_shape;
+      workspace_mkl_shape.SetMklTensor(false);
+      TensorShape workspace_tf_shape;
+      workspace_tf_shape.AddDim(0);
+      AllocateOutputSetMklShape(context, kIdxWorkspace,
+                              &workspace_tensor,
+                              workspace_tf_shape, workspace_mkl_shape);
+      CHECK_NOTNULL(workspace_tensor);
+
+      auto out_shaped = output_dnn_data->shaped<T, 2>({nodes * batch, depth});
+      Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}};
+      auto tmp = in_shaped.square().contract(multiplier, dims) * alpha_ + bias_;
+      if (beta_ == T(1)) {
+        out_shaped.device(context->eigen_cpu_device()) =
+            in_shaped * tmp.inverse();
+      } else if (beta_ == T(0.5)) {
+        out_shaped.device(context->eigen_cpu_device()) =
+            in_shaped * tmp.rsqrt();
+      } else {
+        out_shaped.device(context->eigen_cpu_device()) =
+            in_shaped * (tmp.log() * -beta_).exp();
+      }
+    }
+
+    void AllocateWorkspaceTensor(OpKernelContext* context,
+                const lrn_forward::primitive_desc& lrn_fwd_prim_desc,
+                MklDnnData<uint8>* dnn_data_wksp) {
+      CHECK_NOTNULL(dnn_data_wksp);
+      Tensor* workspace_tensor = nullptr;
+      memory::primitive_desc workspace_pd
+                  = lrn_fwd_prim_desc.workspace_primitive_desc();
+      size_t workspace_bytes = workspace_pd.get_size();
+      MklDnnShape workspace_mkl_shape;
+      // the workspace tensor is a uint8 tensor that has
+      // exactly the number of bytes necessary
+      workspace_mkl_shape.SetMklTensor(false);
+      TensorShape workspace_tf_shape;
+      workspace_tf_shape.AddDim(workspace_bytes);
+      AllocateOutputSetMklShape(context, kIdxWorkspace,
+                              &workspace_tensor,
+                              workspace_tf_shape, workspace_mkl_shape);
+      CHECK_NOTNULL(workspace_tensor);
+      dnn_data_wksp->SetUsrMem(workspace_pd, workspace_tensor);
+    }
+
+  void SanityCheckInputs(OpKernelContext* context) {
+    const Tensor& src_tensor = MklGetInput(context, kIdxInput);
+    MklDnnShape src_dnn_shape;
+    GetMklShape(context, kIdxInput, &src_dnn_shape);
+    if (src_dnn_shape.IsMklTensor()) {
+        OP_REQUIRES(context, src_dnn_shape.GetDimension() == 4,
+                    errors::InvalidArgument("input must be 4-dimensional"));
+        OP_REQUIRES(context, FastBoundsCheck(src_tensor.NumElements(),
+                                            std::numeric_limits<int>::max()),
+                        errors::InvalidArgument("argument to LRN too large"));
+    } else {
+        OP_REQUIRES(context, src_tensor.dims() == 4,
+                    errors::InvalidArgument("input must be 4-dimensional"));
+        OP_REQUIRES(context, FastBoundsCheck(src_tensor.NumElements(),
+                                            std::numeric_limits<int>::max()),
+                        errors::InvalidArgument("argument to LRN too large"));
+    }
+  }
+  const int kIdxInput = 0,
+            kIdxOutput = 0,
+            kIdxWorkspace = 1;
+
+  typedef typename Eigen::Tensor<T, 1, Eigen::RowMajor>::DimensionPair DimPair;
+  bool workspace_enabled_;
+  int depth_radius_;
+  float bias_;
+  float alpha_;
+  float beta_;
+};
+
+
+template <typename T>
+class MklLRNGradOp : public OpKernel {
+ public:
+  explicit MklLRNGradOp(OpKernelConstruction* context) : OpKernel(context) {
+    int64 depth_radius64;
+    OP_REQUIRES_OK(context, context->GetAttr("depth_radius", &depth_radius64));
+    OP_REQUIRES(context, FastBoundsCheck(depth_radius64,
+                                         std::numeric_limits<int>::max()),
+                errors::InvalidArgument("depth_radius = ", depth_radius64,
+                                        " larger than int max"));
+    depth_radius_ = static_cast<int>(depth_radius64);
+    OP_REQUIRES_OK(context, context->GetAttr("bias", &bias_));
+    OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_));
+    OP_REQUIRES_OK(context, context->GetAttr("beta", &beta_));
+    workspace_enabled_ = false;
+    context->GetAttr("workspace_enabled", &workspace_enabled_);
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      SanityCheckInputs(context);
+      if (!context->status().ok()) return;
+
+      auto cpu_engine = engine(engine::cpu, 0);
+      MklDnnData<T> input_grad_dnn_data(&cpu_engine);
+      MklDnnData<T> orig_input_dnn_data(&cpu_engine);
+      MklDnnData<T> orig_output_dnn_data(&cpu_engine);
+      MklDnnData<T> output_dnn_data(&cpu_engine);
+
+      MklDnnShape input_grad_dnn_shape, orig_input_dnn_shape,
+                  orig_output_dnn_shape;
+      GetMklShape(context, kIdxGradient, &input_grad_dnn_shape);
+      GetMklShape(context, kIdxOrigInput, &orig_input_dnn_shape);
+      GetMklShape(context, kIdxOrigOutput, &orig_output_dnn_shape);
+
+      // We only use MKLDNN if all of the necessary inputs are present
+      // in mkldnn format, and Channel is the last dimension
+      bool can_use_mkldnn = workspace_enabled_ &&
+                            input_grad_dnn_shape.IsMklTensor() &&
+                            orig_input_dnn_shape.IsMklTensor() &&
+                            orig_output_dnn_shape.IsMklTensor() &&
+                            input_grad_dnn_shape.IsMklChannelDim(
+                              input_grad_dnn_shape.GetDimension() - 1) &&
+                            orig_input_dnn_shape.IsMklChannelDim(
+                              orig_input_dnn_shape.GetDimension() - 1) &&
+                            orig_output_dnn_shape.IsMklChannelDim(
+                              orig_output_dnn_shape.GetDimension() - 1);
+
+      if (!can_use_mkldnn) {
+          // Fallback to eigen
+          MklDefaultToEigen(context);
+          return;
+      }
+      // At this point, we have the all clear to use MklDnn constructs
+      // Naming: diff_dst is input_gradient_tensor; src is orig_input_tensor.
+      const Tensor& input_grad_tensor = MklGetInput(context, kIdxGradient);
+      const Tensor& orig_input_tensor = MklGetInput(context, kIdxOrigInput);
+      const Tensor& orig_output_tensor = MklGetInput(context, kIdxOrigOutput);
+
+      // Get input sizes in MKL-DNN required NCHW format.
+      // LRN does not have data_format attribute. But by default it has
+      // NHWC format.
+      memory::desc original_output_md = orig_output_dnn_shape.GetCurLayout();
+      memory::desc target_diff_dst_md = ConfigureInputGradient(
+                                input_grad_tensor,
+                                input_grad_dnn_shape,
+                                &input_grad_dnn_data);
+
+      memory::desc orig_input_md = orig_input_dnn_shape.GetCurLayout();
+      memory::dims orig_input_dims =
+                orig_input_dnn_shape.GetSizesAsMklDnnDims();
+      orig_input_dnn_data.SetUsrMem(orig_input_md, &orig_input_tensor);
+      orig_input_dnn_data.SetOpMemDesc(orig_input_dims, memory::format::nhwc);
+
+      // output_dnn_data has the same shape as original input
+      output_dnn_data.SetUsrMem(orig_input_md);
+      output_dnn_data.SetOpMemDesc(orig_input_dims, memory::format::nhwc);
+
+      // MKL-DNN has a notion of kernel_size and not depth_radius.
+      int kernel_size = 2 * depth_radius_ + 1;
+      float new_alpha = alpha_ * kernel_size;
+
+      // Create LRN backward primitive descriptor. It requires LRN forward
+      // primitive descriptor also.
+      auto lrn_fwd_desc = lrn_forward::desc(prop_kind::forward,
+                                        lrn_across_channels,
+                                        orig_input_md,
+                                        kernel_size,
+                                        new_alpha, beta_, bias_);
+      auto lrn_fwd_prim_desc = lrn_forward::primitive_desc(lrn_fwd_desc,
+                                                           cpu_engine);
+      auto lrn_bwd_desc = lrn_backward::desc(lrn_across_channels,
+                                        original_output_md,
+                                        target_diff_dst_md,
+                                        kernel_size,
+                                        new_alpha, beta_, bias_);
+      auto lrn_bwd_prim_desc = lrn_backward::primitive_desc(lrn_bwd_desc,
+                                                          cpu_engine,
+                                                          lrn_fwd_prim_desc);
+
+      Tensor* output_tensor = nullptr;
+      memory::format orig_input_format
+                    = orig_input_dnn_shape.GetTfDataFormat();
+      AllocateOutputTensor(context, lrn_bwd_prim_desc,
+            orig_input_dims, orig_input_format, &output_tensor);
+      OP_REQUIRES_OK(context, context->status());
+      CHECK_NOTNULL(output_tensor);
+      output_dnn_data.SetUsrMemDataHandle(output_tensor);
+
+      // Create LRN primitive and add it to the net
+      // At this point, workspace is enabled, so we don't need
+      // to check. Pass input workspace to LRN backward primitive.
+      const Tensor& workspace_tensor = MklGetInput(context, kIdxWorkspace);
+      MklDnnData<uint8> workspace_dnn_data(&cpu_engine);
+      ConfigureWorkspace(workspace_tensor,
+                          lrn_fwd_prim_desc.workspace_primitive_desc(),
+                          &workspace_dnn_data);
+
+      PrepareAndExecuteNet(lrn_bwd_prim_desc,
+                lrn_fwd_prim_desc,
+                &orig_input_dnn_data,
+                &input_grad_dnn_data,
+                &output_dnn_data,
+                memory::primitive_desc(target_diff_dst_md, cpu_engine),
+                &workspace_dnn_data);
+    } catch (mkldnn::error &e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                       ", message: " + string(e.message) +
+                       ", in file " + string(__FILE__) + ":" +
+                       std::to_string(__LINE__);
+      OP_REQUIRES_OK(context,
+                     errors::Aborted("Operation received an exception:",
+                     error_msg));
+    }
+  }
+
+  void AllocateOutputTensor(OpKernelContext* context,
+            const lrn_backward::primitive_desc& lrn_bkwd_prim_desc,
+            const memory::dims output_dims_mkl_order,
+            const memory::format& output_tf_format,
+            Tensor** output_tensor) {
+    CHECK_NOTNULL(output_tensor);
+    memory::primitive_desc dst_pd
+                = lrn_bkwd_prim_desc.diff_src_primitive_desc();
+    MklDnnShape output_mkl_shape;
+
+    // We assume that all outputs at this point are MKL Tensors
+    output_mkl_shape.SetMklTensor(true);
+    output_mkl_shape.SetMklLayout(&dst_pd);
+    output_mkl_shape.SetElemType(MklDnnType<T>());
+    output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(),
+                              output_dims_mkl_order,
+                              output_tf_format);
+
+    TensorShape output_tf_shape;
+    size_t num_bytes = dst_pd.get_size();
+    CHECK_EQ(num_bytes % sizeof(T), 0);
+    output_tf_shape.AddDim(num_bytes / sizeof(T));
+    AllocateOutputSetMklShape(context, kIdxOutput,
+                            output_tensor,
+                            output_tf_shape, output_mkl_shape);
+  }
+
+  memory::desc ConfigureInputGradient(const Tensor& input_grad_tensor,
+                    const MklDnnShape& input_grad_dnn_shape,
+                    MklDnnData<T> *input_grad_dnn_data) {
+    CHECK_NOTNULL(input_grad_dnn_data);
+    // This shouldn't be necessary at this point, but just in case
+    CHECK_EQ(input_grad_dnn_shape.IsMklTensor(), true);
+
+    memory::desc input_grad_md = input_grad_dnn_shape.GetCurLayout();
+    memory::dims orig_input_dims =
+              input_grad_dnn_shape.GetSizesAsMklDnnDims();
+    input_grad_dnn_data->SetUsrMem(input_grad_md, &input_grad_tensor);
+    input_grad_dnn_data->SetOpMemDesc(orig_input_dims, memory::format::nhwc);
+    return input_grad_md;
+  }
+
+  void PrepareAndExecuteNet(
+        const lrn_backward::primitive_desc& lrn_bkwd_desc,
+        const lrn_forward::primitive_desc& lrn_fwd_desc,
+        MklDnnData<T>* src_dnn_data,
+        MklDnnData<T>* input_gradient_diff_dst,
+        MklDnnData<T>* output_diff_src,
+        const memory::primitive_desc& target_diff_dst_pd,
+        const MklDnnData<uint8>* workspace_dnn_data = nullptr) {
+    std::vector<primitive> net;
+
+    // Check for input reordering on the diff dst input
+    input_gradient_diff_dst->CheckReorderToOpMem(
+                lrn_bkwd_desc.diff_dst_primitive_desc(), &net);
+
+    // Check for input reordering on the original input
+    src_dnn_data->CheckReorderToOpMem(lrn_fwd_desc.src_primitive_desc(),
+                                      &net);
+    // Create pooling primitive and add it to net
+    if (nullptr == workspace_dnn_data) {
+      net.push_back(lrn_backward(lrn_bkwd_desc,
+                              src_dnn_data->GetOpMem(),
+                              input_gradient_diff_dst->GetOpMem(),
+                              output_diff_src->GetOpMem()));
+    } else {
+      net.push_back(lrn_backward(lrn_bkwd_desc,
+                                src_dnn_data->GetOpMem(),
+                                  input_gradient_diff_dst->GetOpMem(),
+                                  workspace_dnn_data->GetOpMem(),
+                                  output_diff_src->GetOpMem()));
+    }
+    stream(stream::kind::eager).submit(net).wait();
+  }
+
+  void ConfigureWorkspace(const Tensor& workspace_tensor,
+                    memory::primitive_desc workspace_pd,
+                    MklDnnData<uint8> *workspace_dnn_data) {
+    CHECK_NOTNULL(workspace_dnn_data);
+
+    workspace_dnn_data->SetUsrMem(workspace_pd, &workspace_tensor);
+  }
+
+    // Fallback implementation - Taken from lrn_op.cc
+    // TODO(intelft) Check if we can use EigenLRNOp directly instead of making a
+    // copy.
+    void MklDefaultToEigen(OpKernelContext* context) {
+      Tensor input_gradient_tensor;
+      Tensor orig_input_tensor;
+      Tensor orig_output_tensor;
+
+      MklDnnShape input_grad_dnn_shape, orig_input_dnn_shape,
+                  orig_output_dnn_shape;
+      GetMklShape(context, kIdxGradient, &input_grad_dnn_shape);
+      GetMklShape(context, kIdxOrigInput, &orig_input_dnn_shape);
+      GetMklShape(context, kIdxOrigOutput, &orig_output_dnn_shape);
+
+      if (input_grad_dnn_shape.IsMklTensor()) {
+        input_gradient_tensor =
+            ConvertMklToTF<T>(context,
+                              MklGetInput(context, kIdxGradient),
+                              input_grad_dnn_shape);
+      } else {
+        input_gradient_tensor = MklGetInput(context, kIdxGradient);
+      }
+
+      if (orig_input_dnn_shape.IsMklTensor()) {
+        orig_input_tensor =
+            ConvertMklToTF<T>(context,
+                              MklGetInput(context, kIdxOrigInput),
+                              orig_input_dnn_shape);
+      } else {
+        orig_input_tensor = MklGetInput(context, kIdxOrigInput);
+      }
+
+      if (orig_output_dnn_shape.IsMklTensor()) {
+        orig_output_tensor =
+            ConvertMklToTF<T>(context,
+                              MklGetInput(context, kIdxOrigOutput),
+                              orig_output_dnn_shape);
+      } else {
+        orig_output_tensor = MklGetInput(context, kIdxOrigOutput);
+      }
+
+      const int64 batch = static_cast<int64>(input_gradient_tensor.dim_size(0));
+      const int64 rows = static_cast<int64>(input_gradient_tensor.dim_size(1));
+      const int64 cols = static_cast<int64>(input_gradient_tensor.dim_size(2));
+      const int64 depth = static_cast<int64>(input_gradient_tensor.dim_size(3));
+      const auto nodes = cols * rows;
+
+      auto grads_shaped =
+          input_gradient_tensor.shaped<T, 2>({nodes * batch, depth});
+
+      auto in_shaped = orig_input_tensor.shaped<T, 2>({nodes * batch, depth});
+      auto activations =
+              orig_output_tensor.shaped<T, 2>({nodes * batch, depth});
+
+      Tensor* output_dnn_data;
+      MklShape mkl_output_mkl_shape;
+      mkl_output_mkl_shape.SetMklTensor(false);
+      mkl_output_mkl_shape.SetDimensions(4);
+      AllocateOutputSetMklShape(context, kIdxOutput,
+                      &output_dnn_data,
+                      input_gradient_tensor.shape(),
+                      mkl_output_mkl_shape);
+
+      auto out_shaped = output_dnn_data->shaped<T, 2>({nodes * batch, depth});
+      out_shaped.setZero();
+      auto shard = [this, activations, in_shaped, grads_shaped, out_shaped,
+                    depth](int64 begin, int64 end) {
+        for (int64 i = begin; i < end; ++i) {
+          for (int64 j = 0; j < depth; ++j) {
+            int64 depth_begin = std::max<int64>(0, j - depth_radius_);
+            int64 depth_end = std::min<int64>(depth, j + depth_radius_ + 1);
+
+            T norm(0);
+            for (int64 k = depth_begin; k < depth_end; ++k) {
+              norm += in_shaped(i, k) * in_shaped(i, k);
+            }
+            norm = alpha_ * norm + bias_;
+            DCHECK_GT(norm, T(1e-6));
+            for (int64 k = depth_begin; k < depth_end; ++k) {
+              T dyi = T(-2) * alpha_ * beta_ * in_shaped(i, k) *
+                      activations(i, j) / norm;
+              if (k == j) {
+                dyi += Eigen::numext::pow(norm, -beta_);
+              }
+              dyi *= grads_shaped(i, j);
+              const_cast<typename TTypes<T, 2>::Tensor&>(out_shaped)(i, k) +=
+                  dyi;
+            }
+          }
+        }
+      };
+      auto worker_threads =
+          *(context->device()->tensorflow_cpu_worker_threads());
+      Shard(worker_threads.num_threads, worker_threads.workers, nodes * batch,
+            depth * depth, shard);
+    }
+
+  void SanityCheckInputs(OpKernelContext* context) {
+    const Tensor& input_gradient_tensor = MklGetInput(context, kIdxGradient);
+    const Tensor& orig_input_tensor = MklGetInput(context, kIdxOrigInput);
+    const Tensor& orig_output_tensor = MklGetInput(context, kIdxOrigOutput);
+    const Tensor& workspace_tensor = MklGetInput(context, kIdxWorkspace);
+    MklDnnShape in_grads_dnn_shape, in_image_dnn_shape, out_image_dnn_shape,
+                workspace_dnn_shape;
+    GetMklShape(context, kIdxGradient, &in_grads_dnn_shape);
+    GetMklShape(context, kIdxOrigInput, &in_image_dnn_shape);
+    GetMklShape(context, kIdxOrigOutput, &out_image_dnn_shape);
+    GetMklShape(context, kIdxWorkspace, &workspace_dnn_shape);
+    if (in_grads_dnn_shape.IsMklTensor()) {
+      OP_REQUIRES(context, in_grads_dnn_shape.GetDimension() == 4,
+                errors::InvalidArgument("Input gradient must be "
+                "4-dimensional"));
+    } else {
+      OP_REQUIRES(context, input_gradient_tensor.dims() == 4,
+            errors::InvalidArgument("input gradient must be 4-dimensional"));
+    }
+
+    if (in_image_dnn_shape.IsMklTensor()) {
+      OP_REQUIRES(context, in_image_dnn_shape.GetDimension() == 4,
+                errors::InvalidArgument("input images must be "
+                "4-dimensional"));
+    } else {
+      OP_REQUIRES(context, orig_input_tensor.dims() == 4,
+                  errors::InvalidArgument("input images must be "
+                  "4-dimensional"));
+    }
+
+    if (out_image_dnn_shape.IsMklTensor()) {
+      OP_REQUIRES(context, out_image_dnn_shape.GetDimension() == 4,
+                errors::InvalidArgument("Output image must be "
+                "4-dimensional"));
+    } else {
+      OP_REQUIRES(context, orig_output_tensor.dims() == 4,
+            errors::InvalidArgument("Output image must be 4-dimensional"));
+    }
+
+    if (workspace_enabled_) {
+      if (workspace_dnn_shape.IsMklTensor()) {
+        OP_REQUIRES(context, workspace_dnn_shape.IsMklTensor() == false,
+              errors::InvalidArgument("Workspace should not be MKL Tensor."));
+      } else {
+        OP_REQUIRES(context, workspace_tensor.dims() == 1,
+              errors::InvalidArgument("Workspace must be 1-dimensional"));
+      }
+    }
+  }
+
+// Input("input_grads: T")
+// Input("input_image: T")
+// Input("output_image: T")
+// Input("workspace: uint8")
+  const int kIdxGradient = 0,
+            kIdxOrigInput = 1,
+            kIdxOrigOutput = 2,
+            kIdxWorkspace = 3,
+            kIdxOutput = 0;
+
+  typedef typename Eigen::Tensor<T, 1, Eigen::RowMajor>::DimensionPair DimPair;
+  bool workspace_enabled_;
+  int depth_radius_;
+  float bias_;
+  float alpha_;
+  float beta_;
+};
+
+#endif  // INTEL_MKL_DNN
+
 #define REGISTER_MKL_LRN_CPU(T)                                     \
   REGISTER_KERNEL_BUILDER(Name("_MklLRN")                           \
                               .Device(DEVICE_CPU)                   \
@@ -729,6 +1393,7 @@ class MklLRNGradOp : public OpKernel {
                               .Label(mkl_op_registry::kMklOpLabel), \
                           MklLRNGradOp<T>);
 
+
 TF_CALL_float(REGISTER_MKL_LRN_CPU);
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/mkl_maxpooling_op.cc b/tensorflow/core/kernels/mkl_maxpooling_op.cc
index 846bb5710ded92c303567e4078c49a56b3746706..82c5229bab0cfef51799d521d6ced6fab804176c 100644
--- a/tensorflow/core/kernels/mkl_maxpooling_op.cc
+++ b/tensorflow/core/kernels/mkl_maxpooling_op.cc
@@ -16,17 +16,32 @@ limitations under the License.
 // See docs in ../ops/nn_ops.cc.
 #ifdef INTEL_MKL
 #define EIGEN_USE_THREADS
-
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/kernels/mkl_pooling_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/util/mkl_util.h"
 #include "tensorflow/core/util/padding.h"
 
+#ifdef INTEL_MKL_DNN
+#include <algorithm>
+#include "mkldnn.hpp"
+using mkldnn::memory;
+using mkldnn::error;
+using mkldnn::pooling_forward;
+using mkldnn::pooling_backward;
+using mkldnn::padding_kind;
+using mkldnn::engine;
+using mkldnn::prop_kind;
+using mkldnn::algorithm;
+#endif
+
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
+// For now, MKL-ML is default. So making MKL-DNN not a default choice.
+#ifndef INTEL_MKL_DNN
+
 // An implementation of MaxPooling (forward).
 template <typename Device, typename T>
 class MklMaxPoolingOp : public OpKernel {
@@ -475,8 +490,348 @@ class MklMaxPoolingGradOp : public OpKernel {
   TensorFormat data_format_;
 
   bool workspace_enabled_;
+};  // MklMaxPoolingGradOp
+
+#else  // INTEL_MKL_DNN is defined
+
+// An implementation of MaxPooling (forward).
+template <typename Device, typename T>
+class MklMaxPoolingOp : public MklPoolingForwardOpBase<T> {
+ public:
+  explicit MklMaxPoolingOp(OpKernelConstruction* context)
+            : MklPoolingForwardOpBase<T>(context) {
+    // In Max Pooling, MKLDNN does not allow passing workspace as NULL.
+    // So we set workspace_enabled_ to true.
+    this->workspace_enabled_ = true;
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      const Tensor& input_tensor = MklGetInput(context,
+                this->kInputTensorIndexInput);
+      MklDnnShape dnn_shape_input;
+      GetMklShape(context, this->kInputTensorIndexInput, &dnn_shape_input);
+      this->SanityCheckInput(context, input_tensor, dnn_shape_input);
+      if (!context->status().ok()) return;
+
+      MklDnnData<T> dnn_data_input(&cpu_engine);
+      MklDnnData<T> dnn_data_output(&cpu_engine);
+      MklDnnData<uint8> dnn_data_wksp(&cpu_engine);
+
+      // initialize variables for the pooling op
+      MklPoolParameters pool_params;
+      // Get the input tensor and initialize the pooling parameters
+      this->ConfigureInput(context, dnn_shape_input,
+                        input_tensor, &pool_params,
+                        &dnn_data_input);
+      OP_REQUIRES_OK(context, context->status());
+
+      // Declare output tensor
+      Tensor* output_tensor = nullptr;
+      memory::dims output_dims_mkl_order;
+      this->GetOutputDims(pool_params, &output_dims_mkl_order);
+
+      // If input is in Mkl layout, then just get the memory format from it
+      // directly, instead of using input data_format to MaxPool.
+      if (dnn_shape_input.IsMklTensor()) {
+        dnn_data_output.SetUsrMem(output_dims_mkl_order,
+                                  static_cast<memory::format>(
+              dnn_data_input.GetUsrMemDesc().data.format));
+      } else {
+        dnn_data_output.SetUsrMem(output_dims_mkl_order,
+                                  this->data_format_mkldnn_);
+      }
+
+      // describe the memory layout; let mkl-dnn choose the best for the op
+      dnn_data_output.SetOpMemDesc(output_dims_mkl_order, memory::format::any);
+
+      auto pool_desc = pooling_forward::desc(prop_kind::forward,
+            algorithm::pooling_max,
+            dnn_data_input.GetUsrMemDesc(),
+            dnn_data_output.GetUsrMemDesc(),
+            memory::dims({  pool_params.row_stride,
+                            pool_params.col_stride}),
+            memory::dims({  pool_params.window_rows,
+                            pool_params.window_cols}),
+            memory::dims({  static_cast<int>(pool_params.pad_top),
+                            static_cast<int>(pool_params.pad_left)}),
+            memory::dims({  static_cast<int>(pool_params.pad_bottom),
+                            static_cast<int>(pool_params.pad_right)}),
+            TFPaddingToMklDnnPadding(this->padding_));
+        auto pool_fwd_desc = pooling_forward::primitive_desc(pool_desc,
+            cpu_engine);
+
+      this->AllocateOutputTensor(context, pool_fwd_desc, output_dims_mkl_order,
+                            this->data_format_mkldnn_, &output_tensor);
+      OP_REQUIRES_OK(context, context->status());
+      dnn_data_output.SetUsrMemDataHandle(output_tensor);
+
+      AllocateWorkspaceTensor(context, pool_fwd_desc, &dnn_data_wksp);
+      OP_REQUIRES_OK(context, context->status());
+
+      this->PrepareAndExecuteNet(pool_fwd_desc, &dnn_data_input,
+                        &dnn_data_output, &dnn_data_wksp);
+    } catch (mkldnn::error &e) {
+        string error_msg = "Status: " + std::to_string(e.status) +
+                        ", message: " + string(e.message) +
+                        ", in file " + string(__FILE__) + ":" +
+                        std::to_string(__LINE__);
+        OP_REQUIRES_OK(context,
+                        errors::Aborted("Compute received an exception:",
+                                         error_msg));
+    }
+  }  // Compute
+
+ private:
+    const int kOutputTensorIndexWorkspace = 1;
+
+    void AllocateWorkspaceTensor(OpKernelContext* context,
+                const pooling_forward::primitive_desc& pool_fwd_prim_desc,
+                MklDnnData<uint8>* dnn_data_wksp) {
+        CHECK_NOTNULL(dnn_data_wksp);
+        Tensor* workspace_tensor = nullptr;
+        memory::primitive_desc workspace_pd
+                    = pool_fwd_prim_desc.workspace_primitive_desc();
+        size_t workspace_bytes = workspace_pd.get_size();
+        MklDnnShape workspace_mkl_shape;
+        workspace_mkl_shape.SetMklTensor(false);
+        TensorShape workspace_tf_shape;
+        workspace_tf_shape.AddDim(workspace_bytes);
+        AllocateOutputSetMklShape(context, kOutputTensorIndexWorkspace,
+                                &workspace_tensor,
+                                workspace_tf_shape, workspace_mkl_shape);
+        CHECK_NOTNULL(workspace_tensor);
+        dnn_data_wksp->SetUsrMem(workspace_pd, workspace_tensor);
+    }
 };
 
+// The operation to compute MaxPool gradients.
+// It takes three inputs:
+//   - The original input tensor
+//   - The original output tensor
+//   - Backprop tensor for output
+// It produces one output: backprop tensor for input.
+template <class Device, class T>
+class MklMaxPoolingGradOp : public MklPoolingBackwardOpBase<T> {
+ public:
+  explicit MklMaxPoolingGradOp(OpKernelConstruction* context)
+      : MklPoolingBackwardOpBase<T>(context) {
+  }
+
+  void Compute(OpKernelContext* context) override {
+    try {
+        auto cpu_engine = engine(engine::cpu, 0);
+        const Tensor& orig_input_tensor = MklGetInput(context,
+            kInputTensorIndexOrigInput);
+        const Tensor& orig_output_tensor = MklGetInput(context,
+            kInputTensorIndexOrigOutput);
+        const Tensor& grad_tensor = MklGetInput(context,
+            kInputTensorIndexGradient);
+        const Tensor& workspace_tensor = MklGetInput(context,
+            kInputTensorIndexWorkspace);
+        MklDnnShape orig_input_mkl_shape,
+                    orig_output_mkl_shape,
+                    grad_mkl_shape,
+                    workspace_mkl_shape;
+        GetMklShape(context, kInputTensorIndexOrigInput,
+            &orig_input_mkl_shape);
+        GetMklShape(context, kInputTensorIndexOrigOutput,
+            &orig_output_mkl_shape);
+        GetMklShape(context, kInputTensorIndexGradient,
+            &grad_mkl_shape);
+        GetMklShape(context, kInputTensorIndexWorkspace,
+            &workspace_mkl_shape);
+
+        SanityCheckInputs(context,
+                            orig_input_tensor, orig_output_tensor,
+                            grad_tensor, workspace_tensor,
+                            orig_input_mkl_shape, orig_output_mkl_shape,
+                            grad_mkl_shape, workspace_mkl_shape);
+        if (!context->status().ok()) return;
+
+        MklDnnData<T> grad_dnn_data(&cpu_engine);
+        MklDnnData<uint8> workspace_dnn_data(&cpu_engine);
+        MklDnnData<T> output_dnn_data(&cpu_engine);
+        Tensor* output_tensor = nullptr;
+        MklPoolParameters pool_params;
+        TensorShape orig_input_shape;
+        memory::dims output_dims_mkl_order, orig_input_dims_mkl_order;
+        memory::desc original_input_md = ConfigureOriginalInput(context,
+                                orig_input_tensor,
+                                orig_input_mkl_shape,
+                                &orig_input_dims_mkl_order,
+                                &pool_params,
+                                &orig_input_shape);
+
+        memory::desc original_output_md = this->ConfigureOriginalOutput(
+                                pool_params,
+                                orig_output_mkl_shape,
+                                output_dims_mkl_order);
+
+        memory::desc target_diff_dst_md =  this->ConfigureInputGradient(
+                                        grad_mkl_shape,
+                                        grad_tensor,
+                                        &grad_dnn_data,
+                                        original_output_md);
+
+        output_dnn_data.SetUsrMem(original_input_md);
+
+        // Create the forward pooling primitive descriptor so we can
+        // pass it as a hint to the backward pooling primitive descriptor
+        auto pool_fwd_desc = pooling_forward::desc(prop_kind::forward,
+                algorithm::pooling_max,
+                original_input_md,
+                original_output_md,
+                memory::dims({  pool_params.row_stride,
+                                pool_params.col_stride}),
+                memory::dims({  pool_params.window_rows,
+                                pool_params.window_cols}),
+                memory::dims({  static_cast<int>(pool_params.pad_top),
+                                static_cast<int>(pool_params.pad_left)}),
+                memory::dims({  static_cast<int>(pool_params.pad_bottom),
+                                static_cast<int>(pool_params.pad_right)}),
+                TFPaddingToMklDnnPadding(this->padding_));
+        auto pool_fwd_prim_desc
+                = pooling_forward::primitive_desc(pool_fwd_desc,
+                                                    cpu_engine);
+
+        auto pool_bkwd_desc = pooling_backward::desc(
+                algorithm::pooling_max,
+                output_dnn_data.GetUsrMemDesc(),
+                target_diff_dst_md,
+                memory::dims({  pool_params.row_stride,
+                                pool_params.col_stride}),
+                memory::dims({  pool_params.window_rows,
+                                pool_params.window_cols}),
+                memory::dims({  static_cast<int>(pool_params.pad_top),
+                                static_cast<int>(pool_params.pad_left)}),
+                memory::dims({  static_cast<int>(pool_params.pad_bottom),
+                                static_cast<int>(pool_params.pad_right)}),
+                TFPaddingToMklDnnPadding(this->padding_));
+        auto pool_bkwd_prim_desc
+            = pooling_backward::primitive_desc(pool_bkwd_desc,
+                                                cpu_engine,
+                                                pool_fwd_prim_desc);
+
+        this->AllocateOutputTensor(context, pool_bkwd_prim_desc,
+            orig_input_dims_mkl_order,
+            this->data_format_mkldnn_,
+            &output_tensor);
+        output_dnn_data.SetUsrMemDataHandle(output_tensor);
+
+        ConfigureWorkspace(workspace_tensor,
+                pool_fwd_prim_desc.workspace_primitive_desc(),
+                &workspace_dnn_data);
+        this->PrepareAndExecuteNet(pool_bkwd_prim_desc,
+                            &grad_dnn_data,
+                            &output_dnn_data,
+                            memory::primitive_desc(
+                                target_diff_dst_md,
+                                cpu_engine),
+                            &workspace_dnn_data);
+    } catch (mkldnn::error &e) {
+        string error_msg = "Status: " + std::to_string(e.status) +
+                        ", message: " + string(e.message) +
+                        ", in file " + string(__FILE__) + ":" +
+                        std::to_string(__LINE__);
+        OP_REQUIRES_OK(context,
+                        errors::Aborted("Compute received an exception:",
+                                         error_msg));
+    }
+  }  // Compute
+
+ private:
+    // .Input("orig_input: T")
+    // .Input("orig_output: T")
+    // .Input("grad: T")
+    // .Input("workspace: T")
+    const int kInputTensorIndexOrigInput = 0;
+    const int kInputTensorIndexOrigOutput = 1;
+    const int kInputTensorIndexGradient = 2;
+    const int kInputTensorIndexWorkspace = 3;
+    //  Output("output: T") in Base Class
+
+    memory::desc ConfigureOriginalInput(OpKernelContext* context,
+                                const Tensor& tensor_original_input,
+                                const MklDnnShape& original_input_mkl_shape,
+                                memory::dims* original_input_dims_mkl_order,
+                                MklPoolParameters* pool_params,
+                                TensorShape* input_tensor_shape) {
+        *input_tensor_shape = tensor_original_input.shape();
+        return MklPoolingBackwardOpBase<T>::ConfigureOriginalInput(
+                                        context,
+                                        tensor_original_input,
+                                        original_input_mkl_shape,
+                                        original_input_dims_mkl_order,
+                                        pool_params,
+                                        *input_tensor_shape);
+    }
+
+    void ConfigureWorkspace(const Tensor& workspace_tensor,
+                        memory::primitive_desc workspace_pd,
+                        MklDnnData<uint8> *workspace_dnn_data) {
+        CHECK_NOTNULL(workspace_dnn_data);
+
+        workspace_dnn_data->SetUsrMem(workspace_pd, &workspace_tensor);
+    }
+
+    void SanityCheckInputs(OpKernelContext* context,
+                            const Tensor& orig_input_tensor,
+                            const Tensor& orig_output_tensor,
+                            const Tensor& grad_tensor,
+                            const Tensor& workspace_tensor,
+                            const MklDnnShape& orig_input_mkl_shape,
+                            const MklDnnShape& orig_output_mkl_shape,
+                            const MklDnnShape& grad_mkl_shape,
+                            const MklDnnShape& workspace_mkl_shape) {
+        if (!orig_input_mkl_shape.IsMklTensor()) {
+            OP_REQUIRES(context, orig_input_tensor.dims() == 4,
+                errors::InvalidArgument("Original input shape must be "
+                "4-dimensional"));
+        } else {
+            OP_REQUIRES(context, orig_input_mkl_shape.GetDimension() == 4,
+                    errors::InvalidArgument("Original input shape must be "
+                    "4-dimensional"));
+        }
+        if (!orig_output_mkl_shape.IsMklTensor()) {
+            OP_REQUIRES(context, orig_output_tensor.dims() == 4,
+                errors::InvalidArgument("Original output must be "
+                        "4-dimensional"));
+        } else {
+            OP_REQUIRES(context, orig_output_mkl_shape.GetDimension() == 4,
+                    errors::InvalidArgument("Original output must be "
+                    "4-dimensional"));
+        }
+        if (!grad_mkl_shape.IsMklTensor()) {
+            OP_REQUIRES(context, grad_tensor.dims() == 4,
+                errors::InvalidArgument("Gradient must be 4-dimensional"));
+        } else {
+            OP_REQUIRES(context, grad_mkl_shape.GetDimension() == 4,
+                    errors::InvalidArgument("Gradient must be "
+                    "4-dimensional"));
+        }
+        if (this->workspace_enabled_) {
+            // The workspace should not be an MKL tensor
+            OP_REQUIRES(context, workspace_mkl_shape.IsMklTensor() == false,
+                    errors::InvalidArgument("Workspace tensor should not"
+                                            " be an MKL Tensor."));
+            // It should only have one dimension
+            OP_REQUIRES(context, workspace_tensor.dims() == 1,
+                    errors::InvalidArgument("Workspace tensor must be "
+                                "1-dimensional"));
+        } else {
+            OP_REQUIRES(context, this->workspace_enabled_,
+                    errors::Unimplemented("MKL-DNN Max Pooling does not "
+                                "yet support the use case "
+                                "where MaxPoolGrad is called without first"
+                                " calling MaxPool."));
+        }
+    }
+};  // MklMaxPoolingGradOp
+
+#endif  // INTEL_MKL_DNN
+
 REGISTER_KERNEL_BUILDER(Name("_MklMaxPool")
                             .Device(DEVICE_CPU)
                             .TypeConstraint<float>("T")
diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.cc b/tensorflow/core/kernels/mkl_pooling_ops_common.cc
index 65e8852cfb11a2dd78395860a7ca7b2cc550be34..f7cadffd39c11bdedaca6a07e48f222e7ac5e0cb 100644
--- a/tensorflow/core/kernels/mkl_pooling_ops_common.cc
+++ b/tensorflow/core/kernels/mkl_pooling_ops_common.cc
@@ -14,10 +14,13 @@ limitations under the License.
 ==============================================================================*/
 
 #ifdef INTEL_MKL
+
 #include <vector>
+#include <limits>
 #include "tensorflow/core/kernels/mkl_pooling_ops_common.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/kernels/bounds_check.h"
 
 namespace tensorflow {
 
@@ -39,6 +42,7 @@ void MklPoolParameters::Init(OpKernelContext* context,
   Init(context, ksize, stride, padding, data_format);
 }
 
+#ifndef INTEL_MKL_DNN
 // Initialization for MKL format
 void MklPoolParameters::Init(OpKernelContext* context,
                              const std::vector<int32>& ksize,
@@ -53,7 +57,22 @@ void MklPoolParameters::Init(OpKernelContext* context,
 
   Init(context, ksize, stride, padding, data_format);
 }
+#else
+// Initialization for MKL format
+void MklPoolParameters::Init(OpKernelContext* context,
+                             const std::vector<int32>& ksize,
+                             const std::vector<int32>& stride, Padding padding,
+                             TensorFormat data_format,
+                             const MklDnnShape* mklInputShape) {
+  // Get the input sizes
+  depth = mklInputShape->GetDimension('C');
+  tensor_in_cols = mklInputShape->GetDimension('W');
+  tensor_in_rows = mklInputShape->GetDimension('H');
+  tensor_in_batch = mklInputShape->GetDimension('N');
 
+  Init(context, ksize, stride, padding, data_format);
+}
+#endif  // INTEL_MKL_DNN
 // Common Initialization for TensorFlow and MKL formats
 void MklPoolParameters::Init(OpKernelContext* context,
                              const std::vector<int32>& ksize,
@@ -80,7 +99,7 @@ void MklPoolParameters::Init(OpKernelContext* context,
                   "MaxPooling supports exactly one of pooling across depth "
                   "or pooling across width/height."));
 
-  if (depth_window == 1) {
+  if (depth_window == 1) {  // we are pooling in the H and W
     OP_REQUIRES_OK(context, GetWindowedOutputSizeVerbose(
                                 tensor_in_rows, window_rows, row_stride,
                                 padding, &out_height, &pad_top, &pad_bottom));
@@ -88,7 +107,21 @@ void MklPoolParameters::Init(OpKernelContext* context,
     OP_REQUIRES_OK(context, GetWindowedOutputSizeVerbose(
                                 tensor_in_cols, window_cols, col_stride,
                                 padding, &out_width, &pad_left, &pad_right));
-  } else {
+#ifdef INTEL_MKL_DNN
+    // TF can work with int64, but mkldnn only supports int32
+    // Fail if the height or width are greater than MAX_INT
+
+    OP_REQUIRES(context, FastBoundsCheck(out_height,
+                                         std::numeric_limits<int>::max()),
+                errors::InvalidArgument("output height is too large"));
+
+    OP_REQUIRES(context, FastBoundsCheck(out_width,
+                                         std::numeric_limits<int>::max()),
+                errors::InvalidArgument("output width is too large"));
+
+#endif
+    out_depth = depth;  // output will have the same depth as the input
+  } else {  // we are pooling in the depth dimension
     // Our current version of depthwise max pooling does not support
     // any padding, and expects the depth_window to equal the depth
     // stride (no overlapping).
@@ -109,7 +142,6 @@ void MklPoolParameters::Init(OpKernelContext* context,
                 errors::Unimplemented("Depthwise max pooling is currently "
                                       "only implemented for CPU devices."));
 
-    pad_depth = 0;
     out_depth = depth / depth_window;
   }
 }
diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.h b/tensorflow/core/kernels/mkl_pooling_ops_common.h
index 92ea2beb25aa1fd4cab7fd787b04c4d086ca1b05..b974b2c59afe91b955af45f3851c7371d8a86610 100644
--- a/tensorflow/core/kernels/mkl_pooling_ops_common.h
+++ b/tensorflow/core/kernels/mkl_pooling_ops_common.h
@@ -18,9 +18,18 @@ limitations under the License.
 
 #ifdef INTEL_MKL
 #include <vector>
+#include <string>
 #include "tensorflow/core/util/mkl_util.h"
 #include "tensorflow/core/util/padding.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+using mkldnn::memory;
+using mkldnn::pooling_forward;
+using mkldnn::pooling_backward;
+using mkldnn::stream;
+#endif
+
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
@@ -51,14 +60,28 @@ struct MklPoolParameters {
   int pad_depth;
 
   TensorFormat data_format;
+  MklPoolParameters()
+    : depth(0)
+    , tensor_in_cols(0), tensor_in_rows(0), tensor_in_batch(0)
+    , window_rows(0), window_cols(0), depth_window(0)
+    , row_stride(0), col_stride(0), depth_stride(0)
+    , out_height(0), out_width(0), out_depth(0)
+    , pad_left(0), pad_right(0), pad_top(0), pad_bottom(0), pad_depth(0)
+    , data_format(TensorFormat::FORMAT_NCHW) {}
 
   // Updates context->status if there is an invalid input.
   void Init(OpKernelContext* context, const std::vector<int32>& ksize,
             const std::vector<int32>& stride, Padding padding,
             TensorFormat data_format, const TensorShape& tensor_in_shape);
+#ifndef INTEL_MKL_DNN
   void Init(OpKernelContext* context, const std::vector<int32>& ksize,
             const std::vector<int32>& stride, Padding padding,
             TensorFormat data_format, const MklShape* mkl_in_shape);
+#else
+  void Init(OpKernelContext* context, const std::vector<int32>& ksize,
+            const std::vector<int32>& stride, Padding padding,
+            TensorFormat data_format, const MklDnnShape* mkl_in_shape);
+#endif
 
  private:
   // Common initialization for TensorFlow and MKL formats
@@ -67,6 +90,325 @@ struct MklPoolParameters {
             TensorFormat data_format);
 };
 
+#ifdef INTEL_MKL_DNN
+
+template <class T>
+class MklPoolingOpBase : public OpKernel {
+ public:
+  explicit MklPoolingOpBase(OpKernelConstruction* context)
+            : OpKernel(context)
+            , workspace_enabled_(false) {
+      string data_format;
+      OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
+      OP_REQUIRES(context,
+            FormatFromString(data_format, &this->data_format_tf_),
+            errors::InvalidArgument("Invalid data format"));
+      this->data_format_mkldnn_
+                = TFDataFormatToMklDnnDataFormat(this->data_format_tf_);
+      OP_REQUIRES_OK(context, context->GetAttr("ksize", &this->ksize_));
+      OP_REQUIRES(context, this->ksize_.size() == 4,
+                  errors::InvalidArgument("Sliding window ksize field must "
+                                          "specify 4 dimensions"));
+      OP_REQUIRES_OK(context, context->GetAttr("strides", &this->stride_));
+      OP_REQUIRES(context, this->stride_.size() == 4,
+                  errors::InvalidArgument("Sliding window strides field must "
+                                          "specify 4 dimensions"));
+      OP_REQUIRES_OK(context, context->GetAttr("padding", &this->padding_));
+      OP_REQUIRES(context, this->ksize_[0] == 1 && this->stride_[0] == 1,
+                  errors::Unimplemented("Pooling is not yet supported on the "
+                                        "batch dimension."));
+
+      // We may not get this attribute for this node if it does not go through
+      // graph rewrite pass. So we do not check for error while retrieving this
+      // attribute value.
+      context->GetAttr("workspace_enabled", &this->workspace_enabled_);
+    }
+  void Compute(OpKernelContext* context) override = 0;
+
+ protected:
+  // Calculate output shape of pooling op in MKL-DNN and TensorFlow order.
+  // MKL-DNN uses NCHW for output order. But TensorFlow output will be in
+  // NHWC or NCHW format depending on data format. Function expects
+  // output height and output width to have already been int32
+  // bounds-checked
+  void GetOutputDims(const MklPoolParameters& mkl_pool_params,
+                    memory::dims* output_dims_mkl_order) {
+    // MKL-DNN always needs output in NCHW format.
+    *output_dims_mkl_order = { mkl_pool_params.tensor_in_batch,
+                              mkl_pool_params.out_depth,
+                              static_cast<int>(mkl_pool_params.out_height),
+                              static_cast<int>(mkl_pool_params.out_width)};
+  }
+
+  void InitMklPoolParameters(OpKernelContext* context,
+                      MklPoolParameters* pool_params,
+                      const MklDnnShape& original_input_mkl_shape,
+                      const TensorShape& input_tensor_shape) {
+    if (!original_input_mkl_shape.IsMklTensor()) {
+      pool_params->Init(context, this->ksize_, this->stride_, this->padding_,
+          this->data_format_tf_, input_tensor_shape);
+    } else {
+      pool_params->Init(context, this->ksize_, this->stride_, this->padding_,
+          this->data_format_tf_, &original_input_mkl_shape);
+    }
+  }
+
+  // Checks to make sure that the memory we need to allocate
+  // is a multiple of sizeof(T)
+  // returns the number of elements
+  size_t GetNumTElements(const memory::primitive_desc& pd) {
+    size_t num_bytes = pd.get_size();
+    size_t ret_val = num_bytes / sizeof(T);
+    if ( num_bytes % sizeof(T) != 0 ) {
+        ret_val++;
+    }
+    return ret_val;
+  }
+
+
+  std::vector<int32> ksize_;
+  std::vector<int32> stride_;
+  Padding padding_;
+  TensorFormat data_format_tf_;
+  memory::format data_format_mkldnn_;
+  bool workspace_enabled_;
+};
+
+template <class T>
+class MklPoolingForwardOpBase : public MklPoolingOpBase<T> {
+ public:
+  explicit MklPoolingForwardOpBase<T>(OpKernelConstruction* context)
+      : MklPoolingOpBase<T>(context) {}
+  void Compute(OpKernelContext* context) override = 0;
+
+ protected:
+  void ConfigureInput(OpKernelContext* context,
+                    const MklDnnShape& input_mkl_shape,
+                    const Tensor& input_tensor,
+                    MklPoolParameters* pool_params,
+                    MklDnnData<T>* dnn_data_input) {
+    CHECK_NOTNULL(pool_params);
+    CHECK_NOTNULL(dnn_data_input);
+    TensorShape input_tensor_shape = input_tensor.shape();
+    memory::desc input_md = input_mkl_shape.IsMklTensor()
+                        ? input_mkl_shape.GetMklLayout()
+                        : memory::desc(
+                              TFShapeToMklDnnDimsInNCHW(
+                                  input_tensor_shape, this->data_format_tf_),
+                              MklDnnType<T>(),
+                              this->data_format_mkldnn_);
+    dnn_data_input->SetUsrMem(input_md, &input_tensor);
+    this->InitMklPoolParameters(context, pool_params,
+                      input_mkl_shape, input_tensor_shape);
+  }
+
+  void AllocateOutputTensor(OpKernelContext* context,
+            const pooling_forward::primitive_desc& pool_fwd_prim_desc,
+            const memory::dims output_dims_mkl_order,
+            const memory::format& output_tf_format,
+            Tensor** output_tensor) {
+    CHECK_NOTNULL(output_tensor);
+    memory::primitive_desc dst_pd = pool_fwd_prim_desc.dst_primitive_desc();
+
+    MklDnnShape output_mkl_shape;
+    output_mkl_shape.SetMklTensor(true);
+    output_mkl_shape.SetMklLayout(&dst_pd);
+    output_mkl_shape.SetElemType(MklDnnType<T>());
+    output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(),
+                              output_dims_mkl_order,
+                              output_tf_format);
+    TensorShape output_tf_shape;
+
+    // only allocate enough space for the elements we need.
+    output_tf_shape.AddDim(this->GetNumTElements(dst_pd));
+    AllocateOutputSetMklShape(context, kOutputTensorIndexOutput,
+                            output_tensor,
+                            output_tf_shape, output_mkl_shape);
+    CHECK_NOTNULL(*output_tensor);
+  }
+
+  void PrepareAndExecuteNet(
+                  const pooling_forward::primitive_desc& pool_fwd_desc,
+                  const MklDnnData<T>* src,
+                  MklDnnData<T>* dst,
+                  MklDnnData<uint8>* wksp = nullptr) {
+    std::vector<primitive> net;
+
+    // Create pooling primitive and add it to net
+    if (wksp != nullptr) {
+        net.push_back(pooling_forward(pool_fwd_desc,
+                        src->GetOpMem(),
+                        dst->GetOpMem(),
+                        wksp->GetOpMem()));
+    } else {
+        net.push_back(pooling_forward(pool_fwd_desc,
+            src->GetOpMem(),
+            dst->GetOpMem()));
+    }
+    stream(stream::kind::eager).submit(net).wait();
+  }
+
+
+  void SanityCheckInput(OpKernelContext* context,
+                  const Tensor& input_tensor,
+                  const MklDnnShape& input_mkl_shape) {
+    if (!input_mkl_shape.IsMklTensor()) {
+      OP_REQUIRES(context, input_tensor.dims() == 4,
+          errors::InvalidArgument("Input must be 4-dimensional"));
+    } else {
+        OP_REQUIRES(context, input_mkl_shape.GetDimension() == 4,
+                errors::InvalidArgument("Input shape must be "
+                "4-dimensional"));
+    }
+  }
+  // .Input("value: T")
+  // .Output("output: T")
+  const int kInputTensorIndexInput = 0;
+  const int kOutputTensorIndexOutput = 0;
+};  // MklPoolingForwardBaseOp
+
+
+template <class T>
+class MklPoolingBackwardOpBase : public MklPoolingOpBase<T> {
+ public:
+  explicit MklPoolingBackwardOpBase<T>(OpKernelConstruction* context)
+          : MklPoolingOpBase<T>(context) { }
+  void Compute(OpKernelContext* context) override = 0;
+
+ protected:
+  const int kOutputTensorIndexOutput = 0;
+
+  void AllocateOutputTensor(OpKernelContext* context,
+            const pooling_backward::primitive_desc& pool_bkwd_prim_desc,
+            const memory::dims output_dims_mkl_order,
+            const memory::format& output_tf_format,
+            Tensor** output_tensor) {
+    CHECK_NOTNULL(output_tensor);
+    memory::primitive_desc dst_pd
+                = pool_bkwd_prim_desc.diff_src_primitive_desc();
+    MklDnnShape output_mkl_shape;
+    output_mkl_shape.SetMklTensor(true);
+    output_mkl_shape.SetMklLayout(&dst_pd);
+    output_mkl_shape.SetElemType(MklDnnType<T>());
+    output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(),
+                              output_dims_mkl_order,
+                              output_tf_format);
+
+    TensorShape output_tf_shape;
+    output_tf_shape.AddDim(this->GetNumTElements(dst_pd));
+    AllocateOutputSetMklShape(context, kOutputTensorIndexOutput,
+                            output_tensor,
+                            output_tf_shape, output_mkl_shape);
+    CHECK_NOTNULL(*output_tensor);
+  }
+
+  void PrepareAndExecuteNet(
+    const pooling_backward::primitive_desc& pool_bkwd_desc,
+    MklDnnData<T>* input_gradient_diff_dst,
+    MklDnnData<T>* output_diff_src,
+    const memory::primitive_desc& target_diff_dst_pd,
+    const MklDnnData<uint8>* workspace = nullptr) {
+
+    std::vector<primitive> net;
+
+    // If the input gradient isn't in the same format as the output
+    // reorder it to the same format as the output
+    input_gradient_diff_dst->CheckReorderToOpMem(
+            target_diff_dst_pd,
+            &net);
+
+    // Create pooling primitive and add it to net
+    if (nullptr == workspace) {
+      net.push_back(pooling_backward(pool_bkwd_desc,
+                              input_gradient_diff_dst->GetOpMem(),
+                              output_diff_src->GetOpMem()));
+    } else {
+      net.push_back(pooling_backward(pool_bkwd_desc,
+                                  input_gradient_diff_dst->GetOpMem(),
+                                  workspace->GetOpMem(),
+                                  output_diff_src->GetOpMem()));
+    }
+    stream(stream::kind::eager).submit(net).wait();
+  }
+
+  // Max Pooling and Avg Pooling have slightly different implementations
+  // Takes the Tensor containing original input data and the original
+  // mkl Dnn Shape and populates other data
+  memory::desc ConfigureOriginalInput(OpKernelContext* context,
+                              const Tensor& tensor_original_input_shape,
+                              const MklDnnShape& original_input_mkl_shape,
+                              memory::dims* original_input_dims_nchw,
+                              MklPoolParameters* pool_params,
+                              const TensorShape& input_tensor_shape) {
+    CHECK_NOTNULL(original_input_dims_nchw);
+    CHECK_NOTNULL(pool_params);
+    this->InitMklPoolParameters(context, pool_params,
+                          original_input_mkl_shape,
+                          input_tensor_shape);
+
+    *original_input_dims_nchw
+          = original_input_mkl_shape.IsMklTensor()
+          ? original_input_mkl_shape.GetSizesAsMklDnnDims()
+          : TFShapeToMklDnnDimsInNCHW(input_tensor_shape,
+        this->data_format_tf_);
+
+    return  original_input_mkl_shape.IsMklTensor()
+      ? original_input_mkl_shape.GetMklLayout()
+      : memory::desc(*original_input_dims_nchw,
+                      MklDnnType<T>(),
+                      this->data_format_mkldnn_);
+  }
+
+  memory::desc ConfigureOriginalOutput(const MklPoolParameters& pool_params,
+                                const MklDnnShape& original_output_mkl_shape,
+                                      memory::dims output_dims_mkl_order) {
+    this->GetOutputDims(pool_params, &output_dims_mkl_order);
+
+    return original_output_mkl_shape.IsMklTensor()
+            ? original_output_mkl_shape.GetMklLayout()
+            : memory::desc(output_dims_mkl_order,
+                         MklDnnType<T>(),
+                         this->data_format_mkldnn_);
+  }
+
+  memory::desc ConfigureInputGradient(
+        const MklDnnShape& input_gradient_mkl_shape,
+        const Tensor& input_gradient_tensor,
+        MklDnnData<T>* input_gradient_dnn_data,
+        const memory::desc& original_output_md) {
+    // Configure the gradient as is
+    memory::desc original_input_grad_md
+          = input_gradient_mkl_shape.IsMklTensor()
+          ? input_gradient_mkl_shape.GetMklLayout()
+          : memory::desc(TFShapeToMklDnnDimsInNCHW(
+                    input_gradient_tensor.shape(),
+                    this->data_format_tf_),
+                    MklDnnType<T>(), this->data_format_mkldnn_);
+
+    input_gradient_dnn_data->SetUsrMem(original_input_grad_md,
+                &input_gradient_tensor);
+
+    // Check to see if input grad diff dst is in the right format
+    // Create a new memory descriptor with the same shape as the
+    // original, but the format of the other tensors.
+    memory::format original_output_format =
+            static_cast<memory::format>(original_output_md.data.format);
+    bool grad_reorder_needed = input_gradient_dnn_data->IsReorderNeeded(
+                                    original_output_format);
+    memory::dims diff_dst_dims = input_gradient_mkl_shape.IsMklTensor()
+        ? input_gradient_mkl_shape.GetSizesAsMklDnnDims()
+        : TFShapeToMklDnnDimsInNCHW(input_gradient_tensor.shape(),
+                    this->data_format_tf_);
+    memory::desc target_diff_dst_md = memory::desc(diff_dst_dims,
+        MklDnnType<T>(), original_output_format);
+
+    return grad_reorder_needed
+            ? target_diff_dst_md
+            : original_input_grad_md;
+  }
+};
+#endif  // INTEL_MKL_DNN
+
 //-------------------------------------------------------------------
 // Utility functions
 
diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc
index 86a77d769a52d7592d15627b504ae60278b45058..dc899d8c7ee231af403e6ca98ca60d94f78d0a81 100644
--- a/tensorflow/core/kernels/mkl_relu_op.cc
+++ b/tensorflow/core/kernels/mkl_relu_op.cc
@@ -28,6 +28,19 @@ limitations under the License.
 #include "mkl_dnn.h"
 #include "mkl_dnn_types.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+
+using mkldnn::stream;
+using mkldnn::prop_kind;
+using mkldnn::algorithm;
+using mkldnn::relu_forward;
+using mkldnn::relu_backward;
+using mkldnn::eltwise_relu;
+using mkldnn::eltwise_elu;
+using mkldnn::eltwise_tanh;
+#endif
+
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
@@ -45,6 +58,8 @@ struct MklReluHelpers {
   }
 };
 
+#ifndef INTEL_MKL_DNN
+
 template <typename Device, typename T>
 class MklReluOp : public OpKernel {
  public:
@@ -59,6 +74,7 @@ class MklReluOp : public OpKernel {
     GetMklShape(context, 0, &mkl_context.input_shape);
     void* user_i = static_cast<void*>(const_cast<T*>(input.flat<T>().data()));
     bool input_in_mkl_format = mkl_context.input_shape.IsMklTensor();
+
     if (!input_in_mkl_format && !input.dims()) {  // handle the case of a scalar
       const TensorShape& o_shape = input.shape();
       Tensor* out_tensor = nullptr;
@@ -164,6 +180,7 @@ class MklReluOp : public OpKernel {
   } MklReluOpContext;
 };
 
+
 template <typename Device, typename T>
 class MklReluGradOp : public OpKernel {
  public:
@@ -189,18 +206,18 @@ class MklReluGradOp : public OpKernel {
       const Tensor& a = MklGetInput(context, 1);
       void* buf_input = static_cast<void*>(const_cast<T*>(a.flat<T>().data()));
       void* mkl_buffer_convert = nullptr;
+
       dnnPrimitive_t cv_input_to_grad = nullptr;
 
-      // if input and grad are not in the same layout, do a conversion between
-      // them.
+      // if input and grad are not in the same layout,
+      // do a conversion between them.
       if (!dnnLayoutCompare_F32(lt_input, lt_grad)) {
         AllocTmpBuffer(context, mkl_tmp_input_buf_tensor, lt_grad,
                        &mkl_buffer_convert);
         CHECK_EQ(dnnConversionCreate_F32(&cv_input_to_grad, lt_input,
                    lt_grad), E_SUCCESS);
         CHECK_EQ(dnnConversionExecute_F32(cv_input_to_grad, buf_input,
-                                          mkl_buffer_convert),
-                 E_SUCCESS);
+                                          mkl_buffer_convert), E_SUCCESS);
         relu_res[dnnResourceSrc] = mkl_buffer_convert;
         dnnDelete_F32(cv_input_to_grad);
       } else {
@@ -246,7 +263,6 @@ class MklReluGradOp : public OpKernel {
 };
 
 template <typename Device, typename T>
-
 void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
   MklReluGradOpContext mkl_context;
   const Tensor& g = MklGetInput(context, 0);
@@ -264,20 +280,21 @@ void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
       !MklReluHelpers::ValidateSameSize(context, g, a))
     return;
   Tensor* output = nullptr;
-  if (!input_is_mkl && !grad_is_mkl &&
-      !a.dims()) {  // handle the case of a scalar
-    // Allocate space for g and
+
+  if (!input_is_mkl && !grad_is_mkl && !a.dims()) {
+    // handle the scalar case
     const TensorShape& g_shape = g.shape();
     mkl_context.output_shape.SetMklTensor(false);
     AllocateOutputSetMklShape(context, 0, &output, g_shape,
                               mkl_context.output_shape);
+
     void* out_o = static_cast<void*>(output->flat<T>().data());
     (static_cast<T*>(out_o))[0] =
         (static_cast<T*>(user_g))[0] * ((static_cast<T*>(user_i))[0] > 0);
     return;
   }
 
-  // Generate size, stride for input if input/grad is in MKL format.
+  // generate size, stride for input if input/grad is in mkl format.
   if (grad_is_mkl || input_is_mkl) {
     const MklShape* tmp_mkl_shape =
         (grad_is_mkl) ? &mkl_context.grad_shape : &mkl_context.input_shape;
@@ -308,21 +325,20 @@ void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
   float negative_slope = 0.0;
   CHECK_EQ(dnnReLUCreateBackward_F32(&mkl_context.prim_relu_bwd, NULL,
                                      mkl_context.lt_grad, mkl_context.lt_grad,
-                                     negative_slope),
-           E_SUCCESS);
+                                     negative_slope), E_SUCCESS);
   Tensor mkl_tmp_input_buf_tensor;
   mkl_context.MklPrepareReluGradInputs(context, &mkl_tmp_input_buf_tensor);
 
   if (input_is_mkl ||
-      grad_is_mkl) { /*if  grad or input are MKL leave it in MKL*/
+      grad_is_mkl) { /*if  grad or input are mkl leave it in mkl*/
     TensorShape tf_shape;
     mkl_context.output_shape.SetMklTensor(true);
     mkl_context.output_shape.SetMklLayout(mkl_context.prim_relu_bwd,
                                           dnnResourceDiffSrc);
     mkl_context.output_shape.SetTfLayout(
         mkl_context.in_dims, mkl_context.in_sizes, mkl_context.in_strides);
-    // If input_is_mkl or grad_is_mkl, then we copy strides and sizes from Mkl
-    // shape of one that is in MKL layout.
+    // if input_is_mkl or grad_is_mkl, then we copy strides and sizes from mkl
+    // shape of one that is in mkl layout.
     if (grad_is_mkl == true) {
       mkl_context.output_shape.SetTfDimOrder(
           mkl_context.in_dims, mkl_context.grad_shape.GetTfToMklDimMap());
@@ -332,11 +348,9 @@ void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
     }
 
     tf_shape.AddDim(dnnLayoutGetMemorySize_F32(static_cast<dnnLayout_t>(
-                        mkl_context.output_shape.GetMklLayout())) /
-                    sizeof(T));
+                    mkl_context.output_shape.GetMklLayout())) / sizeof(T));
     AllocateOutputSetMklShape(context, 0, &output, tf_shape,
                               mkl_context.output_shape);
-
   } else {
     const TensorShape& o_shape = g.shape();
     mkl_context.output_shape.SetMklTensor(false);
@@ -347,13 +361,486 @@ void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
   mkl_context.relu_res[dnnResourceDiffSrc] =
       static_cast<void*>(output->flat<T>().data());
 
-  CHECK_EQ(dnnExecute_F32(mkl_context.prim_relu_bwd, mkl_context.relu_res),
-           E_SUCCESS);
+  CHECK_EQ(dnnExecute_F32(mkl_context.prim_relu_bwd,
+                          mkl_context.relu_res),
+                          E_SUCCESS);
   mkl_context.MklCleanup();
 }
 
-/* Register DNN kernels for supported operations and supported types - right now
- * it is only Relu and f32*/
+
+#else  // INTEL_MKL_DNN
+
+template <typename Device, typename T, algorithm alg_kind>
+class MklReluOpBase : public OpKernel {
+ public:
+  ~MklReluOpBase() {}
+
+  explicit MklReluOpBase(OpKernelConstruction* context) : OpKernel(context) {
+  }
+
+  virtual void Compute_Scalar(OpKernelContext* context) = 0;
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      const size_t src_index = 0;  // index of src input tensor
+      const size_t dst_index = 0;  // index of dst output tensor
+      const Tensor& src_tensor = MklGetInput(context, src_index);
+      MklDnnShape dnn_shape_src;
+      GetMklShape(context, src_index, &dnn_shape_src);
+
+      Tensor* dst_tensor = nullptr;
+      if (src_tensor.dims() == 0) {
+        Compute_Scalar(context);
+        return;
+      }
+
+      // Create relu primitive.
+      MklDnnData<T> src(&cpu_engine);
+      MklDnnData<T> dst(&cpu_engine);
+
+      // Set DNN primitive - src
+      memory::desc src_md({}, memory::data_undef, memory::format_undef);
+      if (dnn_shape_src.IsMklTensor()) {
+        src_md = dnn_shape_src.GetMklLayout();
+      } else {
+        auto src_dims = TFShapeToMklDnnDims(src_tensor.shape());
+        auto src_strides = CalculateTFStrides(src_dims);
+        // Create blocked memory descriptor
+        src_md = MklDnnData<T>::CreateBlockedMemDesc(src_dims, src_strides);
+      }
+      src.SetUsrMem(src_md, &src_tensor);
+
+      T alpha = 0, beta = 0;
+      std::shared_ptr<relu_forward::primitive_desc> relu_fwd_pd;
+      auto relu_fwd_desc = relu_forward::desc(prop_kind::forward_training,
+          // Operator memory descriptor is same as user memory descriptor.
+                                              alg_kind, src.GetUsrMemDesc(),
+                                              alpha, beta);
+      relu_fwd_pd.reset(new relu_forward::primitive_desc(relu_fwd_desc,
+                                                         cpu_engine));
+
+      // allocate dst tensor
+      MklDnnShape dnn_shape_dst;
+      TensorShape tf_shape_dst;
+      if (dnn_shape_src.IsMklTensor()) {
+        dnn_shape_dst.SetMklTensor(true);
+        auto dst_pd = relu_fwd_pd->dst_primitive_desc();
+        dnn_shape_dst.SetMklLayout(&dst_pd);
+        dnn_shape_dst.SetElemType(MklDnnType<T>());
+        dnn_shape_dst.SetTfLayout(dnn_shape_src.GetDimension(),
+                                  dnn_shape_src.GetSizesAsMklDnnDims(),
+                                  dnn_shape_src.GetTfDataFormat());
+        tf_shape_dst.AddDim(dst_pd.get_size()/sizeof(T));
+      } else {
+        dnn_shape_dst.SetMklTensor(false);
+        tf_shape_dst = src_tensor.shape();
+      }
+      AllocateOutputSetMklShape(context, dst_index, &dst_tensor, tf_shape_dst,
+                                dnn_shape_dst);
+
+      // Destination memory descriptor is same as source memory descriptor.
+      auto dst_md = src_md;
+      dst.SetUsrMem(dst_md, dst_tensor);
+
+      // execute net
+      std::vector<primitive> net;
+      auto relu_fwd = relu_forward(*relu_fwd_pd, src.GetOpMem(),
+                                   dst.GetOpMem());
+      net.push_back(relu_fwd);
+      stream(stream::kind::eager).submit(net).wait();
+    } catch (mkldnn::error &e) {
+      string error_msg = "Status: " + std::to_string(e.status) +
+                         ", message: " + string(e.message) +
+                         ", in file " + string(__FILE__) + ":" +
+                         std::to_string(__LINE__);
+      OP_REQUIRES_OK(context,
+                     errors::Aborted("Operation received an exception:",
+                        error_msg));
+    }
+  }
+};
+
+
+template <typename Device, typename T, algorithm alg_kind>
+class MklReluGradOpBase : public OpKernel {
+ public:
+  ~MklReluGradOpBase() {}
+
+  explicit MklReluGradOpBase(OpKernelConstruction* context) :
+    OpKernel(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) = 0;
+
+  void Compute(OpKernelContext* context)  {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+      MklDnnData<T> src(&cpu_engine);
+      MklDnnData<T> diff_dst(&cpu_engine);
+      MklDnnData<T> diff_src(&cpu_engine);
+
+      const size_t diff_dst_index = 0;  // index of diff_dst input tensor
+      const size_t src_index = 1;       // index of src input tensor
+      const size_t diff_src_index = 0;  // index of diff_src output tensor
+
+      const Tensor& src_tensor      = MklGetInput(context, src_index);
+      const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index);
+      Tensor* diff_src_tensor       = nullptr;
+
+      MklDnnShape dnn_shape_src, dnn_shape_diff_dst;
+      GetMklShape(context, src_index, &dnn_shape_src);
+      GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
+
+      int src_dims_size = src_tensor.dims();
+      if (src_dims_size == 0) {
+        Compute_Scalar(context);
+        return;
+      }
+
+      // Set DNN primitives for src & diff_dst
+      memory::desc src_md({}, memory::data_undef, memory::format_undef);
+      memory::desc diff_dst_md({}, memory::data_undef, memory::format_undef);
+
+      // For creating Sum primitive, we need to ensure that all inputs are in
+      // same format. What that means is if we have a mixed input case - where
+      // one input is in Tensorflow format and one input is in MKL format -,
+      // then we need to ensure that all inputs are in same format for
+      // primitive construction. For performance reason, we say that all inputs
+      // are in MKL format in such case, and insert reorder for input that is
+      // in Tensorflow format into MKL format. On the other hand, if both the
+      // inputs are in MKL format or both are in Tensorflow format, then we
+      // dont need reorder.
+      if (!dnn_shape_src.IsMklTensor() && !dnn_shape_diff_dst.IsMklTensor()) {
+        // If both the inputs are in Tensorflow format, we create blocked memory
+        // descriptor.
+        auto src_dims = TFShapeToMklDnnDims(src_tensor.shape());
+        auto src_strides = CalculateTFStrides(src_dims);
+        src_md = MklDnnData<T>::CreateBlockedMemDesc(src_dims, src_strides);
+        diff_dst_md = src_md;
+      } else if (dnn_shape_src.IsMklTensor() &&
+                 !dnn_shape_diff_dst.IsMklTensor()) {
+        // If one input is in MKL format and other is in Tensorflow, then
+        // create respective descriptors describing the actual case. For input
+        // in Mkl format, we just get Mkl layout from MklDnnShape. For input in
+        // Tensorflow format, we create memory descriptor using data format.
+        src_md = dnn_shape_src.GetMklLayout();
+
+        memory::format src_mkl_data_format = dnn_shape_src.GetTfDataFormat();
+        auto src_tf_data_format = MklDnnDataFormatToTFDataFormat(
+                                    src_mkl_data_format);
+        auto diff_dst_dims = TFShapeToMklDnnDimsInNCHW(diff_dst_tensor.shape(),
+                                                       src_tf_data_format);
+        diff_dst_md = memory::desc(diff_dst_dims, MklDnnType<T>(),
+                                   src_mkl_data_format);
+      } else if (!dnn_shape_src.IsMklTensor() &&
+                  dnn_shape_diff_dst.IsMklTensor()) {
+        // Same comment as above.
+        diff_dst_md = dnn_shape_diff_dst.GetMklLayout();
+
+        memory::format diff_dst_mkl_data_format =
+          dnn_shape_diff_dst.GetTfDataFormat();
+        auto diff_dst_tf_data_format = MklDnnDataFormatToTFDataFormat(
+                                          diff_dst_mkl_data_format);
+        auto src_dims = TFShapeToMklDnnDimsInNCHW(src_tensor.shape(),
+                                                  diff_dst_tf_data_format);
+        src_md = memory::desc(src_dims, MklDnnType<T>(),
+                              diff_dst_mkl_data_format);
+      } else {
+        // If both the inputs are in MKL format, we use Mkl layout of the input
+        // tensors.
+        src_md = dnn_shape_src.GetMklLayout();
+        diff_dst_md = dnn_shape_diff_dst.GetMklLayout();
+      }
+
+      src.SetUsrMem(src_md, &src_tensor);
+      diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor);
+
+      // As per comment above, we tell MKLDNN that both the inputs are in same
+      // format. So we set common memory descriptor in MKL format, if any of the
+      // inputs are in MKL format. Let's get memory descriptor that we will use
+      // for both the inputs.
+      memory::desc common_md({}, memory::data_undef, memory::format_undef);
+      if (dnn_shape_src.IsMklTensor() || dnn_shape_diff_dst.IsMklTensor()) {
+        common_md = dnn_shape_src.IsMklTensor() ? src_md : diff_dst_md;
+      } else {
+        // Since both the inputs are in Tensorflow format, and have
+        // same shape, we can get memory descriptor from any input.
+        common_md = src_md;
+      }
+
+      T alpha = 0, beta = 0;
+      std::shared_ptr<relu_forward::primitive_desc> relu_fwd_pd;
+      auto relu_fwd_desc = relu_forward::desc(prop_kind::forward_training,
+                                              alg_kind, src_md, alpha, beta);
+      relu_fwd_pd.reset(new relu_forward::primitive_desc(relu_fwd_desc,
+                                                         cpu_engine));
+      auto relu_bwd_desc = relu_backward::desc(alg_kind, common_md, common_md,
+                                                alpha, beta);
+      auto relu_bwd_pd  = relu_backward::primitive_desc(relu_bwd_desc,
+                                                cpu_engine, *relu_fwd_pd);
+
+      // allocate diff_src tensor
+      MklDnnShape dnn_shape_diff_src;
+      TensorShape tf_shape_diff_src;
+      if (dnn_shape_src.IsMklTensor()) {
+        dnn_shape_diff_src.SetMklTensor(true);
+        auto diff_src_pd = relu_bwd_pd.diff_src_primitive_desc();
+        dnn_shape_diff_src.SetMklLayout(&diff_src_pd);
+        dnn_shape_diff_src.SetElemType(MklDnnType<T>());
+        dnn_shape_diff_src.SetTfLayout(dnn_shape_src.GetDimension(),
+                                       dnn_shape_src.GetSizesAsMklDnnDims(),
+                                       dnn_shape_src.GetTfDataFormat());
+        tf_shape_diff_src.AddDim(diff_src_pd.get_size()/sizeof(T));
+      } else {
+        dnn_shape_diff_src.SetMklTensor(false);
+        tf_shape_diff_src = src_tensor.shape();
+      }
+      AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
+                                 tf_shape_diff_src, dnn_shape_diff_src);
+
+      // diff_src memory descriptor is same as memory descriptor for both
+      // inputs.
+      diff_src.SetUsrMem(common_md, diff_src_tensor);
+
+      PrepareAndExecuteNet(relu_bwd_pd, &src, &diff_src, &diff_dst);
+     } catch (mkldnn::error &e) {
+       string error_msg = "Status: " + std::to_string(e.status) +
+                          ", message: " + string(e.message) +
+                          ", in file " + string(__FILE__) + ":" +
+                          std::to_string(__LINE__);
+       OP_REQUIRES_OK(context,
+                      errors::Aborted("Operation received an exception:",
+                                      error_msg));
+    }
+  }
+
+  void PrepareAndExecuteNet(const relu_backward::primitive_desc& relu_prim_desc,
+                  MklDnnData<T>* src, MklDnnData<T>* diff_src, MklDnnData<T>*
+                  diff_dst) {
+    std::vector<primitive> net;
+
+    // Check if we need to reorder original input tensors into common_md layout
+    // that we set for primitive creation. diff_src_primitive_desc is same as
+    // common_md.
+    src->CheckReorderToOpMem(relu_prim_desc.diff_src_primitive_desc(), &net);
+    diff_dst->CheckReorderToOpMem(relu_prim_desc.diff_src_primitive_desc(),
+                                  &net);
+
+    net.push_back(relu_backward(relu_prim_desc, src->GetOpMem(),
+                                diff_dst->GetOpMem(), diff_src->GetOpMem()));
+    stream(stream::kind::eager).submit(net).wait();
+  }
+};
+
+
+template <typename Device, typename T>
+class MklReluOp : public MklReluOpBase<Device, T, eltwise_relu> {
+ public:
+  ~MklReluOp() {}
+
+  explicit MklReluOp(OpKernelConstruction* context) :
+  MklReluOpBase<Device, T, eltwise_relu>(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) {
+    const size_t src_index = 0;  // index of src input tensor
+    const size_t dst_index = 0;  // index of dst output tensor
+    const Tensor& src_tensor = MklGetInput(context, src_index);
+    MklDnnShape dnn_shape_src;
+    GetMklShape(context, src_index, &dnn_shape_src);
+
+    Tensor* dst_tensor = nullptr;
+    void* user_i = static_cast<void*>(const_cast<T*>(
+                         src_tensor.flat<T>().data()));
+    MklDnnShape dnn_shape_dst;
+    dnn_shape_dst.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, dst_index, &dst_tensor,
+                              src_tensor.shape(), dnn_shape_dst);
+    void* out_o = static_cast<void*>(dst_tensor->flat<T>().data());
+    (static_cast<T*>(out_o))[0] =
+              std::max((static_cast<T*>(user_i))[0], static_cast<T>(0));
+    return;
+  }
+};
+
+template <typename Device, typename T>
+class MklReluGradOp : public MklReluGradOpBase<Device, T, eltwise_relu> {
+ public:
+  ~MklReluGradOp() {}
+
+  explicit MklReluGradOp(OpKernelConstruction* context) :
+  MklReluGradOpBase<Device, T, eltwise_relu>(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) {
+    const size_t diff_dst_index = 0;  // index of diff_dst input tensor
+    const size_t src_index = 1;       // index of src input tensor
+    const size_t diff_src_index = 0;  // index of diff_src output tensor
+    const Tensor& src_tensor    = MklGetInput(context, src_index);
+    const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index);
+    Tensor* diff_src_tensor = nullptr;
+
+    MklDnnShape dnn_shape_diff_dst;
+    GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
+
+    MklDnnShape dnn_shape_diff_src;
+    dnn_shape_diff_src.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
+                              diff_dst_tensor.shape(), dnn_shape_diff_src);
+    void* out_o = static_cast<void*>(diff_src_tensor->flat<T>().data());
+    void* user_i =
+          static_cast<void*>(const_cast<T*>(src_tensor.flat<T>().data()));
+    void* user_g =
+          static_cast<void*>(const_cast<T*>(diff_dst_tensor.flat<T>().data()));
+    (static_cast<T*>(out_o))[0] = (static_cast<T*>(user_g))[0] *
+                                  ((static_cast<T*>(user_i))[0] > 0);
+    return;
+  }
+};
+
+template <typename Device, typename T>
+class MklEluOp : public MklReluOpBase<Device, T, eltwise_elu> {
+ public:
+  ~MklEluOp() {}
+
+  explicit MklEluOp(OpKernelConstruction* context) :
+  MklReluOpBase<Device, T, eltwise_elu>(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) {
+    const size_t src_index = 0;  // index of src input tensor
+    const size_t dst_index = 0;  // index of dst output tensor
+    const Tensor& src_tensor = MklGetInput(context, src_index);
+    MklDnnShape dnn_shape_src;
+    GetMklShape(context, src_index, &dnn_shape_src);
+
+    Tensor* dst_tensor = nullptr;
+    void* user_i = static_cast<void*>(const_cast<T*>(
+                         src_tensor.flat<T>().data()));
+    MklDnnShape dnn_shape_dst;
+    dnn_shape_dst.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, dst_index, &dst_tensor,
+                              src_tensor.shape(), dnn_shape_dst);
+    void* out_o = static_cast<void*>(dst_tensor->flat<T>().data());
+    // return exp(feature) - 1 if feature > 0; feature otherwise
+    T feature = (static_cast<T*>(user_i))[0];
+    if (feature < 0)
+      (static_cast<T*>(out_o))[0] = std::exp(feature);
+    else
+      (static_cast<T*>(out_o))[0] = feature;
+    return;
+  }
+};
+
+template <typename Device, typename T>
+class MklEluGradOp : public MklReluGradOpBase<Device, T, eltwise_elu> {
+ public:
+  ~MklEluGradOp() {}
+
+  explicit MklEluGradOp(OpKernelConstruction* context) :
+  MklReluGradOpBase<Device, T, eltwise_elu>(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) {
+    const size_t diff_dst_index = 0;  // index of diff_dst input tensor
+    const size_t src_index = 1;       // index of src input tensor
+    const size_t diff_src_index = 0;  // index of diff_src output tensor
+    const Tensor& src_tensor    = MklGetInput(context, src_index);
+    const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index);
+    Tensor* diff_src_tensor = nullptr;
+
+    MklDnnShape dnn_shape_diff_dst;
+    GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
+
+    MklDnnShape dnn_shape_diff_src;
+    dnn_shape_diff_src.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
+                              diff_dst_tensor.shape(), dnn_shape_diff_src);
+    void* out_o = static_cast<void*>(diff_src_tensor->flat<T>().data());
+    void* user_i =
+          static_cast<void*>(const_cast<T*>(src_tensor.flat<T>().data()));
+    void* user_g =
+          static_cast<void*>(const_cast<T*>(diff_dst_tensor.flat<T>().data()));
+    // gradient of elu(x) = 1 if x > 0; elu(x) + 1 otherwise
+    T feature = (static_cast<T*>(user_i))[0];
+    if (feature > 0) {
+      (static_cast<T*>(out_o))[0] = (static_cast<T*>(user_g))[0];
+    } else {
+      T elu = std::exp(feature) - 1;
+      (static_cast<T*>(out_o))[0] = (static_cast<T*>(user_g))[0] * (elu + 1);
+    }
+  }
+};
+
+template <typename Device, typename T>
+class MklTanhOp : public MklReluOpBase<Device, T, eltwise_tanh> {
+ public:
+  ~MklTanhOp() {}
+
+  explicit MklTanhOp(OpKernelConstruction* context) :
+  MklReluOpBase<Device, T, eltwise_tanh>(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) {
+    const size_t src_index = 0;  // index of src input tensor
+    const size_t dst_index = 0;  // index of dst output tensor
+    const Tensor& src_tensor = MklGetInput(context, src_index);
+    MklDnnShape dnn_shape_src;
+    GetMklShape(context, src_index, &dnn_shape_src);
+
+    Tensor* dst_tensor = nullptr;
+    void* user_i = static_cast<void*>(const_cast<T*>(
+                         src_tensor.flat<T>().data()));
+    MklDnnShape dnn_shape_dst;
+    dnn_shape_dst.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, dst_index, &dst_tensor,
+                              src_tensor.shape(), dnn_shape_dst);
+    void* out_o = static_cast<void*>(dst_tensor->flat<T>().data());
+    // tanh(x) = (e^x - e^(-x))/ (e^x + e^(-x))
+    T feature = (static_cast<T*>(user_i))[0];
+    T e1 = std::exp(feature);
+    T e2 = std::exp(-feature);
+    (static_cast<T*>(out_o))[0] = (e1 - e2)/(e1 + e2);
+    return;
+  }
+};
+
+template <typename Device, typename T>
+class MklTanhGradOp : public MklReluGradOpBase<Device, T, eltwise_tanh> {
+ public:
+  ~MklTanhGradOp() {}
+
+  explicit MklTanhGradOp(OpKernelConstruction* context) :
+  MklReluGradOpBase<Device, T, eltwise_tanh>(context) {}
+
+  virtual void Compute_Scalar(OpKernelContext* context) {
+    const size_t diff_dst_index = 0;  // index of diff_dst input tensor
+    const size_t src_index = 1;       // index of src input tensor
+    const size_t diff_src_index = 0;  // index of diff_src output tensor
+    const Tensor& src_tensor    = MklGetInput(context, src_index);
+    const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index);
+    Tensor* diff_src_tensor = nullptr;
+
+    MklDnnShape dnn_shape_diff_dst;
+    GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
+
+    MklDnnShape dnn_shape_diff_src;
+    dnn_shape_diff_src.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
+                              diff_dst_tensor.shape(), dnn_shape_diff_src);
+    void* out_o = static_cast<void*>(diff_src_tensor->flat<T>().data());
+    void* user_i =
+          static_cast<void*>(const_cast<T*>(src_tensor.flat<T>().data()));
+    // gradient of tanh(x) = 1 - tanh(x)^2
+    T feature = (static_cast<T*>(user_i))[0];
+    T e1 = std::exp(feature);
+    T e2 = std::exp(-feature);
+    T tanh = (e1 - e2)/(e1 + e2);
+    void* user_g =
+          static_cast<void*>(const_cast<T*>(diff_dst_tensor.flat<T>().data()));
+    (static_cast<T*>(out_o))[0] = (static_cast<T*>(user_g))[0] *
+                                  (1 - tanh * tanh);
+  }
+};
+
+#endif
+
+// register dnn kernels for supported operations and supported types
 #define REGISTER_RELU_MKL_SUPPORTED_KERNELS_TYPES(type)             \
   REGISTER_KERNEL_BUILDER(Name("_MklRelu")                          \
                               .Device(DEVICE_CPU)                   \
@@ -367,6 +854,38 @@ void MklReluGradOp<Device, T>::Compute(OpKernelContext* context) {
                           MklReluGradOp<CPUDevice, type>);
 TF_CALL_float(REGISTER_RELU_MKL_SUPPORTED_KERNELS_TYPES);
 
+#ifdef INTEL_MKL_DNN
+
+// register dnn kernels for supported operations and supported types
+#define REGISTER_ELU_MKL_SUPPORTED_KERNELS_TYPES(type)             \
+  REGISTER_KERNEL_BUILDER(Name("_MklElu")                          \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<type>("T")            \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklEluOp<CPUDevice, type>);              \
+  REGISTER_KERNEL_BUILDER(Name("_MklEluGrad")                      \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<type>("T")            \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklEluGradOp<CPUDevice, type>);
+TF_CALL_float(REGISTER_ELU_MKL_SUPPORTED_KERNELS_TYPES);
+
+#define REGISTER_TANH_MKL_SUPPORTED_KERNELS_TYPES(type)             \
+  REGISTER_KERNEL_BUILDER(Name("_MklTanh")                          \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<type>("T")            \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklTanhOp<CPUDevice, type>);              \
+  REGISTER_KERNEL_BUILDER(Name("_MklTanhGrad")                      \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<type>("T")            \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklTanhGradOp<CPUDevice, type>);
+TF_CALL_float(REGISTER_TANH_MKL_SUPPORTED_KERNELS_TYPES);
+
+#endif
+
 }  // namespace tensorflow
 
 #endif  // INTEL_MKL
+
diff --git a/tensorflow/core/kernels/mkl_reshape_op.cc b/tensorflow/core/kernels/mkl_reshape_op.cc
index 5e985824750befb702f8fa7a59d699f853f40267..b41e529357b2e93570377aaf350c99e0c8f2bd3c 100644
--- a/tensorflow/core/kernels/mkl_reshape_op.cc
+++ b/tensorflow/core/kernels/mkl_reshape_op.cc
@@ -28,6 +28,11 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "tensorflow/core/util/mkl_util.h"
 
+#ifdef INTEL_MKL_DNN
+#include "mkldnn.hpp"
+using mkldnn::stream;
+#endif
+
 namespace tensorflow {
 using CPUDevice = Eigen::ThreadPoolDevice;
 template <typename Device, typename T>
@@ -35,6 +40,7 @@ class MklReshapeOp : public OpKernel {
  public:
   explicit MklReshapeOp(OpKernelConstruction* context) : OpKernel(context) {}
 
+#ifndef INTEL_MKL_DNN
   void Compute(OpKernelContext* context) override {
     const Tensor& input = MklGetInput(context, 0);
     const Tensor& sizes = MklGetInput(context, 1);
@@ -129,7 +135,190 @@ class MklReshapeOp : public OpKernel {
     }
   }
 
+#else
+
  private:
+  // When the input tensor is in MKL layout and we are reshaping the tensor to a
+  // different shape than its actual shape, then we use MKLDNN reorder primitive
+  // to put tensor back in Tensorflow layout. But we can skip this reordering
+  // some times. This function checks for all such cases.
+  bool SkipReorder(const MklDnnShape& mkl_shape_input,
+                   const TensorShape& reshape_to) {
+    CHECK_EQ(mkl_shape_input.IsMklTensor(), true);
+    bool ret = false;
+
+    // If Tensorflow's data format and the underlying format maintained by
+    // MKLDNN are equivalent (both are NHWC or both are NCHW), then we can
+    // safely return true.
+    auto input_mkl_md = mkl_shape_input.GetMklLayout();
+    if (mkl_shape_input.GetTfDataFormat() == input_mkl_md.data.format) {
+      ret = true;
+    }
+
+    return ret;
+  }
+
+ public:
+  void Compute(OpKernelContext* context) override {
+    const Tensor& input_tensor = MklGetInput(context, 0);
+    const Tensor& sizes = MklGetInput(context, 1);
+
+    MklDnnShape mkl_shape_input;
+    GetMklShape(context, kInputSlotIdx, &mkl_shape_input);
+    bool input_in_mkl_format = mkl_shape_input.IsMklTensor();
+    const int64 nelems = input_in_mkl_format ?
+                         mkl_shape_input.GetTfShape().num_elements()
+                         : input_tensor.NumElements();
+
+    // Preliminary validation of sizes.
+    OP_REQUIRES(context, IsLegacyVector(sizes.shape()),
+                errors::InvalidArgument("sizes input must be 1-D, not shape ",
+                                        sizes.shape().DebugString()));
+
+    // Compute the output shape.  Determine product of specified
+    // dimensions, and find the index of the unspecified one.
+    TensorShape shape;
+    int64 product = 1;
+    int unknown_index = -1;
+    switch (sizes.dtype()) {
+      case DT_INT32:
+        OP_REQUIRES_OK(context, ValidateSizes<int32>(sizes, &product,
+                                                     &unknown_index, &shape));
+        break;
+      case DT_INT64:
+        OP_REQUIRES_OK(context, ValidateSizes<int64>(sizes, &product,
+                                                     &unknown_index, &shape));
+        break;
+      default:
+        context->CtxFailure(errors::InvalidArgument(
+            "desired shape must be a DT_INT32 or DT_INT64 vector, not a ",
+            DataTypeString(sizes.dtype())));
+        return;
+    }
+    if (unknown_index != -1) {
+      OP_REQUIRES(
+          context, product > 0,
+          errors::InvalidArgument("Reshape cannot infer the missing input size "
+                                  "for an empty tensor unless all specified "
+                                  "input sizes are non-zero"));
+      const int64 missing = nelems / product;
+      OP_REQUIRES(
+          context, product * missing == nelems,
+          errors::InvalidArgument(
+              "Input to reshape is a tensor with ", nelems,
+              " values, but the requested shape requires a multiple of ",
+              product));
+      shape.set_dim(unknown_index, missing);
+    }
+    OP_REQUIRES(context, shape.num_elements() == nelems,
+                errors::InvalidArgument("Input to reshape is a tensor with ",
+                                        nelems,
+                                        " values, but the requested shape has ",
+                                        shape.num_elements()));
+
+    if (input_in_mkl_format) {
+      TensorShape& shape_to = shape;
+      TensorShape shape_from = mkl_shape_input.GetTfShape();
+      if (shape_from == shape_to) {
+        CopyMklTensorInToOut(context, kInputSlotIdx, kOutputSlotIdx);
+        return;
+      } else {
+        try {
+          auto cpu_engine = engine(engine::cpu, 0);
+          MklDnnData<T> dnn_data_input(&cpu_engine);
+          // Reshape is just a logical view change operation for a tensor.
+          // It does not change underlying layout. But MKLDNN may maintain
+          // tensor data in different layout than that specified by Tensorflow.
+          // If MKLDNN maintains input tensor in different layout than that
+          // specified by Tensorflow, we will need to reorder tensor and then
+          // put it in the shape expected by Tensorflow. But if MKLDNN has
+          // maintained input tensor in the same layout as it is expected by
+          // Tensorflow, we don't need to reorder tensor contents, we just
+          // need to update MklDnnShape object associated with the input
+          // tensor to reflect the shape change expected by reshape.
+          if (!SkipReorder(mkl_shape_input, shape_to)) {
+              // If dimensions that are being expanded or collapsed are not
+              // maintained contiguously by MKLDNN, then we use reorder.
+
+              // Get Mkl layout of input tensor.
+              auto input_mkl_md = mkl_shape_input.GetMklLayout();
+              // Set input Mkl layout as the user layout.
+              dnn_data_input.SetUsrMem(input_mkl_md, &input_tensor);
+              // Get expected Tensorflow layout of input tensor.
+              auto output_tf_md = mkl_shape_input.GetTfLayout();
+              auto output_tf_pd = memory::primitive_desc(output_tf_md,
+                                                         cpu_engine);
+
+              Tensor* output_tensor = nullptr;
+              MklShape mkl_shape_output;
+              mkl_shape_output.SetMklTensor(false);
+              // We allocate output tensor in the shape expected by Reshape.
+              AllocateOutputSetMklShape(context, kOutputSlotIdx, &output_tensor,
+                                        shape_to, mkl_shape_output);
+
+              // Insert reorder between Mkl layout and TensorFlow layout if
+              // needed. If reorder is not needed but reshape is needed (since
+              // shape_from != shape_to), then we just copy input tensor to
+              // output tensor with target shape (we cannot forward Mkl layout
+              // in such case because shape has changed.)
+              std::vector<primitive> net;
+              if (dnn_data_input.CheckReorderToOpMem(output_tf_pd,
+                       output_tensor, &net)) {
+                stream(stream::kind::eager).submit(net).wait();
+              } else {
+                output_tensor->CopyFrom(input_tensor, shape_to);
+              }
+              return;
+          } else {
+            // If dimensions that are being expanded or collapsed are
+            // maintained contiguously by MKLDNN, then we skip reorder, just
+            // update MklDnnShape object for the tensorflow tensor, and forward
+            // Tensorflow tensor as it is to the output.
+            auto output_dims = TFShapeToMklDnnDims(shape_to);
+            auto output_strides = CalculateTFStrides(output_dims);
+            auto output_tf_md = MklDnnData<T>::CreateBlockedMemDesc(output_dims,
+                                                               output_strides);
+            auto output_tf_pd = memory::primitive_desc(output_tf_md,
+                                                       cpu_engine);
+
+            // Set MklDnnShape
+            MklDnnShape mkl_shape_output;
+            mkl_shape_output.SetMklTensor(true);
+            mkl_shape_output.SetMklLayout(&output_tf_pd);
+            mkl_shape_output.SetElemType(MklDnnType<T>());
+            mkl_shape_output.SetTfLayout(output_dims.size(), output_dims,
+                                         memory::format::blocked);
+
+            // We now simply forward input Mkl tensor to output and change its
+            // output MklDnnShape object.
+            ForwardMklTensorInToOutWithMklShape(context, kInputSlotIdx,
+                                              kOutputSlotIdx, mkl_shape_output);
+            return;
+          }
+        } catch (mkldnn::error &e) {
+          string error_msg = "Status: " + std::to_string(e.status) +
+                       ", message: " + string(e.message) +
+                       ", in file " + string(__FILE__) + ":" +
+                       std::to_string(__LINE__);
+          OP_REQUIRES_OK(context,
+                   errors::Aborted("Operation received an exception:",
+                      error_msg));
+        }
+      }
+    } else {
+      // If input tensor is not in Mkl format, then just copy Tensorflow tensor
+      // to output with specified shape.
+      CopyTfTensorInToOutWithShape(context, kInputSlotIdx, kOutputSlotIdx,
+                                   shape);
+    }
+  }
+
+#endif  // INTEL_MKL_DNN
+
+ private:
+  const int kInputSlotIdx = 0;
+  const int kOutputSlotIdx = 0;
+
   template <typename Tshape>
   Status ValidateSizes(const Tensor& sizes, int64* product, int* unknown_index,
                        TensorShape* shape) {
diff --git a/tensorflow/core/kernels/mkl_softmax_op.cc b/tensorflow/core/kernels/mkl_softmax_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..896d56293303b06adb554cef7e2f3ef16a5a8eda
--- /dev/null
+++ b/tensorflow/core/kernels/mkl_softmax_op.cc
@@ -0,0 +1,163 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/nn_ops.cc.
+#ifdef INTEL_MKL
+#ifdef INTEL_MKL_DNN
+
+#include "tensorflow/core/framework/numeric_op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/util/tensor_format.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+#include "mkldnn.h"
+#include "mkldnn_types.h"
+#include "tensorflow/core/platform/default/logging.h"
+#include "tensorflow/core/util/mkl_util.h"
+
+#include "mkldnn.hpp"
+using mkldnn::stream;
+using mkldnn::prop_kind;
+using mkldnn::softmax_forward;
+
+namespace tensorflow {
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+
+
+
+template <typename Device, typename T>
+class MklSoftmaxOp : public OpKernel {
+ public:
+  ~MklSoftmaxOp() {}
+
+  explicit MklSoftmaxOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    try {
+      auto cpu_engine = engine(engine::cpu, 0);
+
+      // src_tensor now points to the 0-th input of global data struct "context"
+      size_t src_idx = 0;
+      const Tensor& src_tensor = MklGetInput(context, src_idx);
+
+      // Add: get MklShape
+      MklDnnShape src_mkl_shape;
+      GetMklShape(context, src_idx, &src_mkl_shape);
+
+
+      // src_dims is the dimenstion of src_tensor
+      // dim of the dst will also be same as src_dims
+      auto src_tf_shape = src_mkl_shape.IsMklTensor() ?
+                          src_mkl_shape.GetTfShape() : src_tensor.shape();
+      auto src_dims = TFShapeToMklDnnDims(src_tf_shape);
+      auto output_dims = src_dims;
+
+      // Create softmax memory for src, dst: both are defined in mkl_util.h,
+      // they are wrapper
+      MklDnnData<T> src(&cpu_engine);
+      MklDnnData<T> dst(&cpu_engine);
+
+      // If input is in MKL layout, then simply grab input layout; otherwise,
+      // construct input Tf layout. For TF layout, although input shape
+      // (src_dims) required is in MKL-DNN order, the layout is Tensorflow's
+      // layout
+      auto src_md = src_mkl_shape.IsMklTensor()
+                    ? src_mkl_shape.GetMklLayout()
+                    : memory::desc(src_dims, MklDnnType<T>(),
+                                         memory::format::nc);
+
+      // src: setting memory descriptor and op memory descriptor
+      // Basically following two functions maps the TF "src_tensor" to mkl
+      // tensor object "src"
+      // following functions are in mkl_util.h
+      // data format is "nc" for src and dst; since the src and dst buffer is
+      // always in 2D shape
+      src.SetUsrMem(src_md, &src_tensor);
+      src.SetOpMemDesc(src_dims, memory::format::nc);
+
+      // creating a memory descriptor
+      int axis = 1;  // axis to which softmax will be applied
+      auto softmax_fwd_desc = softmax_forward::desc(prop_kind::forward_scoring,
+                                                    src.GetOpMemDesc(), axis);
+      auto softmax_fwd_pd = softmax_forward::primitive_desc(softmax_fwd_desc,
+                                                            cpu_engine);
+
+      // add: output
+      Tensor* output_tensor = nullptr;
+      MklDnnShape output_mkl_shape;
+      TensorShape output_tf_shape;  // shape of output TF tensor.
+      // Softmax MklDnn output layout is same as input layout.
+      auto dst_pd = src.GetUsrMemPrimDesc();
+
+      // if input is MKL shape, ouput is also MKL shape.
+      // if input is TF shape, output is also TF shape
+      if (src_mkl_shape.IsMklTensor()) {
+        output_mkl_shape.SetMklTensor(true);
+        output_mkl_shape.SetMklLayout(&dst_pd);
+        output_mkl_shape.SetElemType(MklDnnType<T>());
+        output_mkl_shape.SetTfLayout(output_dims.size(), output_dims,
+                                     memory::format::nc);
+        output_tf_shape.AddDim((dst_pd.get_size() / sizeof(T)));
+      } else {  // then output is also TF shape
+        output_mkl_shape.SetMklTensor(false);
+        output_tf_shape = MklDnnDimsToTFShape(output_dims);
+      }
+      // Allocate output shape (MKL or TF based on the above)
+      AllocateOutputSetMklShape(context, 0, &output_tensor, output_tf_shape,
+                                output_mkl_shape);
+
+      // Output_dims and input_dims are same
+      dst.SetUsrMem(src_md, output_tensor);
+
+      // finally creating the "softmax op" using the primitive descriptor, src
+      // and dst
+      auto softmax_fwd =
+          softmax_forward(softmax_fwd_pd, src.GetOpMem(), dst.GetOpMem());
+
+      // execute net (pushing to the stream)
+      // following 3 are common for all mkl dnn ops
+      std::vector<primitive> net;
+      net.push_back(softmax_fwd);
+      stream(stream::kind::eager).submit(net).wait();
+    } catch (mkldnn::error& e) {
+      string error_msg = "Status: " + std::to_string(e.status) + ", message: " +
+                         string(e.message) + ", in file " + string(__FILE__) +
+                         ":" + std::to_string(__LINE__);
+      OP_REQUIRES_OK(
+          context,
+          errors::Aborted("Operation received an exception:", error_msg));
+    }
+  }
+};
+
+/* Register DNN kernels for supported operations and supported types - right now
+ * it is only Softmax and f32 */
+#define REGISTER_SOFTMAX_MKL_SUPPORTED_KERNELS_TYPES(type)             \
+  REGISTER_KERNEL_BUILDER(Name("_MklSoftmax")                       \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<type>("T")            \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklSoftmaxOp<CPUDevice, type>);
+TF_CALL_float(REGISTER_SOFTMAX_MKL_SUPPORTED_KERNELS_TYPES);
+
+
+}  // namespace tensorflow
+
+#endif  // INTEL_MKL_DNN
+#endif  // INTEL_MKL
diff --git a/tensorflow/core/kernels/multinomial_op.cc b/tensorflow/core/kernels/multinomial_op.cc
index 8c0109f5c87ce5f73621a1683471bbcb8a936ea4..d086abb24760f1ab946605fd422a4fd0d5fc866d 100644
--- a/tensorflow/core/kernels/multinomial_op.cc
+++ b/tensorflow/core/kernels/multinomial_op.cc
@@ -40,7 +40,7 @@ typedef Eigen::GpuDevice GPUDevice;
 
 namespace functor {
 
-template <typename Device, typename T>
+template <typename Device, typename T, typename OutputType>
 struct MultinomialFunctor {
   void operator()(OpKernelContext* ctx, const Device& d,
                   typename TTypes<T>::ConstMatrix logits,
@@ -49,11 +49,11 @@ struct MultinomialFunctor {
                   typename TTypes<float>::Flat scratch, int batch_size,
                   int num_classes, int num_samples,
                   const random::PhiloxRandom& gen,
-                  typename TTypes<int64>::Matrix output);
+                  typename TTypes<OutputType>::Matrix output);
 };
 
-template <typename T>
-struct MultinomialFunctor<CPUDevice, T> {
+template <typename T, typename OutputType>
+struct MultinomialFunctor<CPUDevice, T, OutputType> {
   void operator()(OpKernelContext* ctx, const CPUDevice& d,
                   typename TTypes<T>::ConstMatrix logits,
                   typename TTypes<float>::Flat /* noises */,
@@ -61,7 +61,7 @@ struct MultinomialFunctor<CPUDevice, T> {
                   typename TTypes<float>::Flat /* scratch */, int batch_size,
                   int num_classes, int num_samples,
                   const random::PhiloxRandom& gen,
-                  typename TTypes<int64>::Matrix output) {
+                  typename TTypes<OutputType>::Matrix output) {
     auto worker_threads = *(ctx->device()->tensorflow_cpu_worker_threads());
 
     // The implementation only parallelizes by batch.
@@ -128,7 +128,7 @@ struct MultinomialFunctor<CPUDevice, T> {
 }  // namespace functor
 
 // Samples from a multinomial distribution.
-template <typename Device, typename T>
+template <typename Device, typename T, typename OutputType>
 class MultinomialOp : public OpKernel {
  public:
   explicit MultinomialOp(OpKernelConstruction* context) : OpKernel(context) {
@@ -195,11 +195,11 @@ class MultinomialOp : public OpKernel {
       if (std::is_same<Device, CPUDevice>::value) num_samples_ceil_4 *= 2;
       auto rng =
           generator_.ReserveRandomOutputs(batch_size * num_samples_ceil_4, 256);
-      functor::MultinomialFunctor<Device, T>()(
+      functor::MultinomialFunctor<Device, T, OutputType>()(
           ctx, ctx->eigen_device<Device>(), logits_t.matrix<T>(),
           noises.flat<float>(), scores.flat<float>(), scratch.flat<float>(),
           batch_size, num_classes, num_samples, rng,
-          samples_t->matrix<int64>());
+          samples_t->matrix<OutputType>());
     }
   }
 
@@ -209,10 +209,17 @@ class MultinomialOp : public OpKernel {
   TF_DISALLOW_COPY_AND_ASSIGN(MultinomialOp);
 };
 
-#define REGISTER(TYPE)                                                  \
-  REGISTER_KERNEL_BUILDER(                                              \
-      Name("Multinomial").Device(DEVICE_CPU).TypeConstraint<TYPE>("T"), \
-      MultinomialOp<CPUDevice, TYPE>);
+#define REGISTER(TYPE)                                                   \
+  REGISTER_KERNEL_BUILDER(Name("Multinomial")                            \
+                              .Device(DEVICE_CPU)                        \
+                              .TypeConstraint<TYPE>("T")                 \
+                              .TypeConstraint("output_dtype", DT_INT32), \
+                          MultinomialOp<CPUDevice, TYPE, int32>);        \
+  REGISTER_KERNEL_BUILDER(Name("Multinomial")                            \
+                              .Device(DEVICE_CPU)                        \
+                              .TypeConstraint<TYPE>("T")                 \
+                              .TypeConstraint("output_dtype", DT_INT64), \
+                          MultinomialOp<CPUDevice, TYPE, int64>);
 
 TF_CALL_half(REGISTER);
 TF_CALL_float(REGISTER);
@@ -220,12 +227,20 @@ TF_CALL_double(REGISTER);
 #undef REGISTER
 
 #if GOOGLE_CUDA
-#define REGISTER(TYPE)                                    \
-  REGISTER_KERNEL_BUILDER(Name("Multinomial")             \
-                              .Device(DEVICE_GPU)         \
-                              .HostMemory("num_samples")  \
-                              .TypeConstraint<TYPE>("T"), \
-                          MultinomialOp<GPUDevice, TYPE>)
+#define REGISTER(TYPE)                                                   \
+  REGISTER_KERNEL_BUILDER(Name("Multinomial")                            \
+                              .Device(DEVICE_GPU)                        \
+                              .HostMemory("num_samples")                 \
+                              .TypeConstraint<TYPE>("T")                 \
+                              .TypeConstraint("output_dtype", DT_INT32), \
+                          MultinomialOp<GPUDevice, TYPE, int32>)         \
+  REGISTER_KERNEL_BUILDER(Name("Multinomial")                            \
+                              .Device(DEVICE_GPU)                        \
+                              .HostMemory("num_samples")                 \
+                              .TypeConstraint<TYPE>("T")                 \
+                              .TypeConstraint("output_dtype", DT_INT64), \
+                          MultinomialOp<GPUDevice, TYPE, int64>)
+
 TF_CALL_half(REGISTER);
 TF_CALL_float(REGISTER);
 TF_CALL_double(REGISTER);
diff --git a/tensorflow/core/kernels/multinomial_op.h b/tensorflow/core/kernels/multinomial_op.h
index af5e81f219c802857fd6d5eb27e4962cc890a058..6e41060aa414b0611dd7dca31374444f8dd364ec 100644
--- a/tensorflow/core/kernels/multinomial_op.h
+++ b/tensorflow/core/kernels/multinomial_op.h
@@ -21,7 +21,7 @@ namespace tensorflow {
 namespace functor {
 
 // Generic helper functor for the Multinomial Op.
-template <typename Device, typename T>
+template <typename Device, typename T, typename OutputType>
 struct MultinomialFunctor;
 
 }  // namespace functor
diff --git a/tensorflow/core/kernels/multinomial_op_gpu.cu.cc b/tensorflow/core/kernels/multinomial_op_gpu.cu.cc
index 19b4f3ca559f56d93fae203df77f0ef35718db1b..5cc5877cceb19320023423d35a352c5ba3db13e2 100644
--- a/tensorflow/core/kernels/multinomial_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/multinomial_op_gpu.cu.cc
@@ -37,20 +37,22 @@ using GPUDevice = Eigen::GpuDevice;
 
 // Kernel for Multinomial op.  Data is interpreted to have the following shapes:
 //   scores: [B, S, C];  maxima: [B, S];  output: [B, S].
+template <typename OutputType>
 __global__ void MultinomialKernel(int32 nthreads, const int32 num_classes,
                                   const int32 num_samples, const float* scores,
-                                  const float* maxima, int64* output) {
+                                  const float* maxima, OutputType* output) {
   CUDA_1D_KERNEL_LOOP(index, nthreads) {
     const int maxima_idx = index / num_classes;
     if (ldg(maxima + maxima_idx) == ldg(scores + index)) {
-      CudaAtomicMax(reinterpret_cast<uint64*>(output + maxima_idx),
-                    static_cast<uint64>(index % num_classes));
+      using UnsignedOutputType = typename std::make_unsigned<OutputType>::type;
+      CudaAtomicMax(reinterpret_cast<UnsignedOutputType*>(output + maxima_idx),
+                    static_cast<UnsignedOutputType>(index % num_classes));
     }
   }
 }
 
-template <typename T>
-struct MultinomialFunctor<GPUDevice, T> {
+template <typename T, typename OutputType>
+struct MultinomialFunctor<GPUDevice, T, OutputType> {
   void operator()(OpKernelContext* ctx, const GPUDevice& d,
                   typename TTypes<T>::ConstMatrix logits,
                   typename TTypes<float>::Flat noises,
@@ -58,7 +60,7 @@ struct MultinomialFunctor<GPUDevice, T> {
                   typename TTypes<float>::Flat maxima, int batch_size,
                   int num_classes, int num_samples,
                   const random::PhiloxRandom& gen,
-                  typename TTypes<int64>::Matrix output) {
+                  typename TTypes<OutputType>::Matrix output) {
     // Uniform, [0, 1).
     typedef random::UniformDistribution<random::PhiloxRandom, float> Dist;
     functor::FillPhiloxRandom<GPUDevice, Dist>()(ctx, d, gen, noises.data(),
@@ -111,11 +113,17 @@ struct MultinomialFunctor<GPUDevice, T> {
 };
 
 // Explicit instantiation of the GPU functors.
-template struct MultinomialFunctor<GPUDevice, Eigen::half>;
-template struct MultinomialFunctor<GPUDevice, float>;
-template struct MultinomialFunctor<GPUDevice, double>;
-template struct MultinomialFunctor<GPUDevice, int32>;
-template struct MultinomialFunctor<GPUDevice, int64>;
+template struct MultinomialFunctor<GPUDevice, Eigen::half, int32>;
+template struct MultinomialFunctor<GPUDevice, float, int32>;
+template struct MultinomialFunctor<GPUDevice, double, int32>;
+template struct MultinomialFunctor<GPUDevice, int32, int32>;
+template struct MultinomialFunctor<GPUDevice, int64, int32>;
+
+template struct MultinomialFunctor<GPUDevice, Eigen::half, int64>;
+template struct MultinomialFunctor<GPUDevice, float, int64>;
+template struct MultinomialFunctor<GPUDevice, double, int64>;
+template struct MultinomialFunctor<GPUDevice, int32, int64>;
+template struct MultinomialFunctor<GPUDevice, int64, int64>;
 
 }  // namespace functor
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/nn_ops_test.cc b/tensorflow/core/kernels/nn_ops_test.cc
index 0db7c63b8b6a25f1d495dd937d49ec9d0615ab0a..a841291ddd7d4f64b0ab2b611c59307f4d11150f 100644
--- a/tensorflow/core/kernels/nn_ops_test.cc
+++ b/tensorflow/core/kernels/nn_ops_test.cc
@@ -653,6 +653,8 @@ BM_ConvFloatDepthwiseFwd(32, 7, 7, 1024, 1, 1024, 3, 3, 1, SAME, conv6);
 // Benchmarks with different stride and padding options.
 BM_ConvFloatDepthwiseFwd(32, 112, 112, 3, 8, 24, 3, 3, 2, SAME, conv7);
 BM_ConvFloatDepthwiseFwd(32, 112, 112, 3, 8, 24, 3, 3, 2, VALID, conv8);
+BM_ConvFloatDepthwiseFwd(1, 100, 100, 72, 1, 72, 3, 3, 1, SAME, conv9);
+BM_ConvFloatDepthwiseFwd(1, 100, 100, 72, 1, 72, 5, 5, 1, SAME, conv10);
 
 #define BM_ConvFloatDepthwiseBk(BS, R, C, ID, DM, OD, KR, KC, STR, PAD, LABEL) \
   static void BM_ConvFloatDepthwiseBkInCPU1_##LABEL(int iters) {               \
diff --git a/tensorflow/core/kernels/pack_op.cc b/tensorflow/core/kernels/pack_op.cc
index 814128d99ac2acb4a10cfcb2907edb735eaca382..2923c38662e3c2b74df5c72c513b5e3ecab9f5e5 100644
--- a/tensorflow/core/kernels/pack_op.cc
+++ b/tensorflow/core/kernels/pack_op.cc
@@ -140,6 +140,7 @@ class PackOp : public OpKernel {
 TF_CALL_ALL_TYPES(REGISTER_PACK);
 TF_CALL_QUANTIZED_TYPES(REGISTER_PACK);
 TF_CALL_bfloat16(REGISTER_PACK);
+TF_CALL_variant(REGISTER_PACK);
 
 #if defined(IS_MOBILE_PLATFORM) && !defined(SUPPORT_SELECTIVE_REGISTRATION)
 // Primarily used for SavedModel support on mobile.
@@ -157,6 +158,7 @@ REGISTER_PACK(string);
       PackOp<GPUDevice, type>)
 
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
+TF_CALL_bfloat16(REGISTER_GPU);
 TF_CALL_int64(REGISTER_GPU);
 REGISTER_GPU(bool);
 #undef REGISTER_GPU
diff --git a/tensorflow/core/kernels/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/parallel_interleave_dataset_op.cc
deleted file mode 100644
index 56942a5c01f3c2be5617aa1a9e1eadea12857911..0000000000000000000000000000000000000000
--- a/tensorflow/core/kernels/parallel_interleave_dataset_op.cc
+++ /dev/null
@@ -1,402 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/core/kernels/dataset.h"
-
-#include "tensorflow/core/common_runtime/function.h"
-#include "tensorflow/core/framework/partial_tensor_shape.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/kernels/captured_function.h"
-#include "tensorflow/core/kernels/dataset_utils.h"
-#include "tensorflow/core/lib/gtl/cleanup.h"
-#include "tensorflow/core/lib/random/random.h"
-
-namespace tensorflow {
-
-namespace {
-
-// See documentation in ../ops/dataset_ops.cc for a high-level
-// description of the following op.
-
-class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
- public:
-  explicit ParallelInterleaveDatasetOp(OpKernelConstruction* ctx)
-      : UnaryDatasetOpKernel(ctx),
-        graph_def_version_(ctx->graph_def_version()) {
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &func_));
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_));
-  }
-
-  void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
-                   DatasetBase** output) override {
-    OpInputList inputs;
-    OP_REQUIRES_OK(ctx, ctx->input_list("other_arguments", &inputs));
-    std::vector<Tensor> other_arguments;
-    other_arguments.reserve(inputs.size());
-    for (const Tensor& t : inputs) {
-      other_arguments.push_back(t);
-    }
-
-    int64 cycle_length;
-    OP_REQUIRES_OK(ctx,
-                   ParseScalarArgument(ctx, "cycle_length", &cycle_length));
-    OP_REQUIRES(ctx, cycle_length > 0,
-                errors::InvalidArgument("`cycle_length` must be > 0"));
-
-    int64 block_length;
-    OP_REQUIRES_OK(ctx,
-                   ParseScalarArgument(ctx, "block_length", &block_length));
-    OP_REQUIRES(ctx, block_length > 0,
-                errors::InvalidArgument("`block_length` must be > 0"));
-
-    bool sloppy;
-    OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "sloppy", &sloppy));
-
-    std::unique_ptr<CapturedFunction> captured_func;
-    OP_REQUIRES_OK(ctx, CapturedFunction::Create(ctx, func_, graph_def_version_,
-                                                 std::move(other_arguments),
-                                                 &captured_func));
-
-    *output = new Dataset(input, std::move(captured_func), cycle_length,
-                          block_length, sloppy, output_types_, output_shapes_);
-  }
-
- private:
-  class Dataset : public DatasetBase {
-   public:
-    Dataset(const DatasetBase* input,
-            std::unique_ptr<CapturedFunction> captured_func, int64 cycle_length,
-            int64 block_length, bool sloppy, const DataTypeVector& output_types,
-            const std::vector<PartialTensorShape>& output_shapes)
-        : input_(input),
-          captured_func_(std::move(captured_func)),
-          cycle_length_(cycle_length),
-          block_length_(block_length),
-          sloppy_(sloppy),
-          output_types_(output_types),
-          output_shapes_(output_shapes) {
-      input_->Ref();
-    }
-
-    ~Dataset() override { input_->Unref(); }
-
-    std::unique_ptr<IteratorBase> MakeIterator(
-        const string& prefix) const override {
-      return std::unique_ptr<IteratorBase>(new Iterator(
-          {this, strings::StrCat(prefix, "::ParallelInterleave")}));
-    }
-
-    const DataTypeVector& output_dtypes() const override {
-      return output_types_;
-    }
-    const std::vector<PartialTensorShape>& output_shapes() const override {
-      return output_shapes_;
-    }
-
-    string DebugString() override {
-      return "ParallelInterleaveDatasetOp::Dataset";
-    }
-
-   private:
-    class Iterator : public DatasetIterator<Dataset> {
-     public:
-      explicit Iterator(const Params& params)
-          : DatasetIterator<Dataset>(params),
-            input_impl_(params.dataset->input_->MakeIterator(params.prefix)),
-            output_elements_(params.dataset->cycle_length_) {}
-
-      ~Iterator() override {
-        mutex_lock l(mu_);
-        cancelled_ = true;
-        // Notify all workers in case they are blocked.
-        for (int64 i = 0; i < dataset()->cycle_length_; ++i) {
-          output_elements_[i].cond_var.notify_all();
-        }
-      }
-
-      // It is implemented so that it matches the deterministic interleave
-      // unless we would block waiting for an element, at which point it skips
-      // along to the next available value.
-      Status GetNextInternal(IteratorContext* ctx,
-                             std::vector<Tensor>* out_tensors,
-                             bool* end_of_sequence) override {
-        mutex_lock l(mu_);
-        TF_RETURN_IF_ERROR(EnsureWorkerThreadsStarted(ctx));
-        const int64 num_workers = worker_threads_.size();
-        if (num_workers == 0) {
-          *end_of_sequence = true;
-          return Status::OK();
-        }
-        while (!cancelled_) {
-          // Wait for an item to become available, blocking if necessary. If we
-          // are allowed to be sloppy, we can skip over input datasets that do
-          // not have an item readily available.
-          const int64 n = dataset()->sloppy_ ? num_workers : 1LL;
-          for (int64 i = 0; i < n; ++i) {
-            int64 index = (next_index_ + i) % num_workers;
-            if (output_elements_[index].is_produced) {
-              next_index_ = index;
-              if (i == 0) {
-                block_count_++;
-                if (block_count_ == dataset()->block_length_) {
-                  next_index_ = (index + 1) % num_workers;
-                  block_count_ = 0;
-                }
-              } else {
-                block_count_ = 0;
-              }
-              // If we encounter an EoF, advance to the next iterator
-              if (output_elements_[index].end_of_sequence) {
-                output_elements_[index].is_produced = false;
-                output_elements_[index].cond_var.notify_one();
-                next_index_ = (index + 1) % num_workers;
-                block_count_ = 0;
-                i = -1;  // Restart the inner loop
-                continue;
-              }
-              *end_of_sequence = false;
-              if (output_elements_[index].output_status.ok()) {
-                output_elements_[index].output_value.swap(*out_tensors);
-              }
-              output_elements_[index].is_produced = false;
-              output_elements_[index].cond_var.notify_one();
-              return output_elements_[index].output_status;
-            }
-          }
-
-          if (num_active_threads_ == 0) {
-            // No potential for future values.
-            //
-            // Note: this condition check must occur after checking the output
-            // buffer, as its possible for there to be values in the output
-            // buffer, even if the number of live threads is zero.
-            *end_of_sequence = true;
-            return Status::OK();
-          }
-
-          // If we are not allowed to be sloppy and
-          // `worker_threads_[next_index]` has finished, advance `next_index`.
-          if (!dataset()->sloppy_ && worker_threads_[next_index_].finished) {
-            next_index_ = (next_index_ + 1) % num_workers;
-            continue;
-          }
-
-          // No values available; wait until woken up.
-          // TODO(jsimsa): Use slot-specific condition variable for
-          // coordination of elements consumption.
-          cond_var_.wait(l);
-        }
-        return errors::Cancelled(
-            "ParallelInterleaveDatasetOp::Dataset::Iterator::GetNext");
-      }
-
-     private:
-      // Internal structure to manage thread coordination. All values are
-      // guarded by the enclosing Iterator's mu_.
-      struct OutputBufferElement {
-        // The producer must set `is_produced` to `true` after
-        // `output_status` or `output_value` has been written.
-        bool is_produced = false;
-        // The producer sets `output_status` if either getting the input element
-        // or applying the function to it fails.
-        Status output_status;
-        // Reached end of sequence for the underlying iterator.
-        bool end_of_sequence = false;
-        // The output data element.
-        std::vector<Tensor> output_value;
-        // The producer thread waits on this condition variable after having
-        // produced an element. The reader thread notifies this condition
-        // variable after reading the value.
-        condition_variable cond_var;
-      };
-
-      struct ThreadStatus {
-        // The underlying thread uses `finished` to communicate to the producer
-        // that it has finished.
-        bool finished = false;
-        // The underlying thread object.
-        std::unique_ptr<Thread> thread;
-
-        explicit ThreadStatus(Thread* thread) : thread(thread) {}
-      };
-
-      Status EnsureWorkerThreadsStarted(IteratorContext* ctx)
-          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-        if (worker_threads_.empty()) {
-          for (int64 i = 0; i < dataset()->cycle_length_; ++i) {
-            // Serialize the creation of the workers and their corresponding
-            // input elements to ensure we match the standard interleave when
-            // the underlying iterators induce no delay.
-            std::vector<Tensor> args;
-            TF_RETURN_IF_ERROR(
-                input_impl_->GetNext(ctx, &args, &end_of_input_));
-            if (end_of_input_) {
-              LOG(WARNING) << "Input iterator exhausted after " << i
-                           << " elements; cannot start all "
-                           << dataset()->cycle_length_ << " worker threads.";
-              return Status::OK();
-            }
-            std::unique_ptr<IteratorBase> itr;
-            TF_RETURN_IF_ERROR(dataset::MakeIteratorFromInputElement(
-                ctx, args, i, dataset()->captured_func_.get(), prefix(), &itr));
-            worker_threads_.emplace_back(ctx->env()->StartThread(
-                {}, "worker_thread",
-                std::bind(&Iterator::WorkerThread, this,
-                          new IteratorContext(*ctx), i, itr.release())));
-            num_active_threads_ = i + 1;
-          }
-        }
-        return Status::OK();
-      }
-
-      void BlockAndUpdateOutputBuffer(mutex_lock* l, const int64 thread_index,
-                                      const Status& status,
-                                      bool end_of_sequence,
-                                      std::vector<Tensor>* out_tensors)
-          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-        // We have produced an element; push it into the output buffer
-        // when space is available.
-        while (!cancelled_ && output_elements_[thread_index].is_produced) {
-          output_elements_[thread_index].cond_var.wait(*l);
-        }
-        if (cancelled_) {
-          return;
-        }
-        output_elements_[thread_index].is_produced = true;
-        output_elements_[thread_index].output_status = status;
-        output_elements_[thread_index].end_of_sequence = end_of_sequence;
-        if (status.ok()) {
-          output_elements_[thread_index].output_value.swap(*out_tensors);
-        } else {
-          output_elements_[thread_index].output_value.clear();
-        }
-        cond_var_.notify_one();
-      }
-
-      // Races to produce elements into the output queue buffers.
-      void WorkerThread(IteratorContext* ctx_ptr, const int64 thread_index,
-                        IteratorBase* out_iterator_ptr) {
-        // std::function arguments are copy-constructable, so we pass raw
-        // pointers, and then immediately wrap them to ensure correct ownership.
-        std::unique_ptr<IteratorContext> ctx(ctx_ptr);
-        std::unique_ptr<IteratorBase> out_iterator(out_iterator_ptr);
-        auto cleanup = gtl::MakeCleanup([this, thread_index] {
-          mutex_lock l(mu_);
-          worker_threads_[thread_index].finished = true;
-          num_active_threads_--;
-          cond_var_.notify_all();
-        });
-        while (true) {
-          // Attempt to produce an element.
-          bool end_of_out_itr_input = false;
-          std::vector<Tensor> out_tensors;
-          Status element_status = out_iterator->GetNext(ctx.get(), &out_tensors,
-                                                        &end_of_out_itr_input);
-          // Handle output.
-          {
-            mutex_lock l(mu_);
-            BlockAndUpdateOutputBuffer(&l, thread_index, element_status,
-                                       end_of_out_itr_input, &out_tensors);
-            if (end_of_out_itr_input) {
-              // We have exhausted our current iterator; get a new iterator;
-              // loop to handle errors.
-              while (!cancelled_) {
-                if (end_of_input_) {
-                  // No more iterator inputs; we're done!
-                  return;
-                }
-                std::vector<Tensor> args;
-                // BlockAndUpdateOutputBuffer() sequences calls to
-                // input_impl_->GetNext when the out_iterator doesn't cause
-                // slopping.
-                Status input_status =
-                    input_impl_->GetNext(ctx.get(), &args, &end_of_input_);
-                if (end_of_input_) {
-                  // No more elements to produce, stop the worker thread.
-                  return;
-                }
-                if (input_status.ok()) {
-                  input_status = dataset::MakeIteratorFromInputElement(
-                      ctx.get(), args, thread_index,
-                      dataset()->captured_func_.get(), prefix(), &out_iterator);
-                }
-                if (input_status.ok()) {
-                  // Successfully have a new out_iterator; restart the outer
-                  // loop to produce an element.
-                  break;
-                }
-
-                // We encountered an error; push the error to the output buffer.
-                BlockAndUpdateOutputBuffer(&l, thread_index, input_status,
-                                           /* end_of_sequence = */ false,
-                                           &out_tensors);
-              }
-            }
-
-            // Check if we should exit.
-            if (cancelled_) {
-              return;
-            }
-          }
-        }
-      }
-
-      // Mutex & condition variable to guard mutable iterator internals and
-      // coordinate among worker threads and client thread[s].
-      mutex mu_;
-      condition_variable cond_var_;
-      // The iterator producing elements which are converted to datasets by
-      // the dataset()->captured_func_ then interleaved together.
-      const std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
-      // Whether the input_impl_ can produce future elements.
-      bool end_of_input_ GUARDED_BY(mu_) = false;
-      // The buffer of elements to be produced. Each worker thread operates
-      // on a single OutputBufferElement.
-      std::vector<OutputBufferElement> output_elements_ GUARDED_BY(mu_);
-      // The index into output_elements_ for next element to produce.
-      size_t next_index_ GUARDED_BY(mu_) = 0;
-      // The number of items produced so far within the block
-      size_t block_count_ GUARDED_BY(mu_) = 0;
-      // Number of active threads.
-      size_t num_active_threads_ GUARDED_BY(mu_) = 0;
-      // Flag to instruct the worker threads to exit.
-      bool cancelled_ GUARDED_BY(mu_) = false;
-      // Pointers to the worker threads. This must be last to ensure the
-      // threads have exited before any other members are deallocated.
-      // TODO(b/65178177): Avoid allocating additional threads.
-      std::vector<ThreadStatus> worker_threads_ GUARDED_BY(mu_);
-    };
-
-    const DatasetBase* const input_;
-    const std::unique_ptr<CapturedFunction> captured_func_;
-    const int64 cycle_length_;
-    const int64 block_length_;
-    const bool sloppy_;
-    const DataTypeVector output_types_;
-    const std::vector<PartialTensorShape> output_shapes_;
-  };
-
-  const int graph_def_version_;
-  DataTypeVector output_types_;
-  std::vector<PartialTensorShape> output_shapes_;
-  NameAttrList func_;
-};
-
-REGISTER_KERNEL_BUILDER(Name("ParallelInterleaveDataset").Device(DEVICE_CPU),
-                        ParallelInterleaveDatasetOp);
-
-}  // namespace
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/parallel_map_dataset_op.cc b/tensorflow/core/kernels/parallel_map_dataset_op.cc
deleted file mode 100644
index 2be87f4bde6f28596213433fe287d351ccf0c721..0000000000000000000000000000000000000000
--- a/tensorflow/core/kernels/parallel_map_dataset_op.cc
+++ /dev/null
@@ -1,244 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include <deque>
-
-#include "tensorflow/core/kernels/dataset.h"
-
-#include "tensorflow/core/common_runtime/function.h"
-#include "tensorflow/core/framework/partial_tensor_shape.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/lib/random/random.h"
-
-#include "tensorflow/core/kernels/captured_function.h"
-
-namespace tensorflow {
-
-namespace {
-
-// See documentation in ../ops/dataset_ops.cc for a high-level
-// description of the following op.
-
-class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
- public:
-  explicit ParallelMapDatasetOp(OpKernelConstruction* ctx)
-      : UnaryDatasetOpKernel(ctx),
-        graph_def_version_(ctx->graph_def_version()) {
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &func_));
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_));
-  }
-
- protected:
-  void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
-                   DatasetBase** output) override {
-    OpInputList inputs;
-    OP_REQUIRES_OK(ctx, ctx->input_list("other_arguments", &inputs));
-    std::vector<Tensor> other_arguments;
-    other_arguments.reserve(inputs.size());
-    for (const Tensor& t : inputs) {
-      other_arguments.push_back(t);
-    }
-
-    int32 num_parallel_calls;
-    OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "num_parallel_calls",
-                                            &num_parallel_calls));
-    OP_REQUIRES(ctx, num_parallel_calls > 0,
-                errors::InvalidArgument(
-                    "num_parallel_calls must be greater than zero."));
-
-    std::unique_ptr<CapturedFunction> captured_func;
-    OP_REQUIRES_OK(ctx, CapturedFunction::Create(ctx, func_, graph_def_version_,
-                                                 std::move(other_arguments),
-                                                 &captured_func));
-
-    *output = new Dataset(input, num_parallel_calls, output_types_,
-                          output_shapes_, std::move(captured_func));
-  }
-
- private:
-  class Dataset : public DatasetBase {
-   public:
-    Dataset(const DatasetBase* input, int32 num_parallel_calls,
-            const DataTypeVector& output_types,
-            const std::vector<PartialTensorShape>& output_shapes,
-            std::unique_ptr<CapturedFunction> captured_func)
-        : input_(input),
-          num_parallel_calls_(num_parallel_calls),
-          output_types_(output_types),
-          output_shapes_(output_shapes),
-          captured_func_(std::move(captured_func)) {
-      input_->Ref();
-    }
-
-    ~Dataset() override { input_->Unref(); }
-
-    std::unique_ptr<IteratorBase> MakeIterator(
-        const string& prefix) const override {
-      return std::unique_ptr<IteratorBase>(
-          new Iterator({this, strings::StrCat(prefix, "::ParallelMap")}));
-    }
-
-    const DataTypeVector& output_dtypes() const override {
-      return output_types_;
-    }
-
-    const std::vector<PartialTensorShape>& output_shapes() const override {
-      return output_shapes_;
-    }
-
-    string DebugString() override { return "ParallelMapDatasetOp::Dataset"; }
-
-   private:
-    class Iterator : public DatasetIterator<Dataset> {
-     public:
-      explicit Iterator(const Params& params)
-          : DatasetIterator<Dataset>(params),
-            input_impl_(params.dataset->input_->MakeIterator(params.prefix)),
-            invocation_results_(params.dataset->num_parallel_calls_) {}
-
-      ~Iterator() override {
-        // TODO(mrry): Replace this cancellation logic with a
-        // CancellationManager. The syntax would be more heavyweight,
-        // but it would be possible to thread a cancellation manager
-        // through the IteratorContext to upstream,
-        // potentially-blocking iterators, when we add these.
-        {
-          mutex_lock l(mu_);
-          for (size_t i = 0; i < dataset()->num_parallel_calls_; ++i) {
-            if (invocation_results_[i].notification) {
-              invocation_results_[i].notification->WaitForNotification();
-            }
-          }
-        }
-      }
-
-      Status GetNextInternal(IteratorContext* ctx,
-                             std::vector<Tensor>* out_tensors,
-                             bool* end_of_sequence) override {
-        mutex_lock l(mu_);
-
-        // Ensure that there are `dataset()->num_parallel_calls_`
-        // invocations of `func_` outstanding at once.
-        while (!end_of_input_ && (num_inputs_consumed_ - num_outputs_consumed_ <
-                                  dataset()->num_parallel_calls_)) {
-          InvokeFunctionLocked(ctx);
-        }
-
-        if (end_of_input_ && num_inputs_consumed_ == num_outputs_consumed_) {
-          *end_of_sequence = true;
-          return Status::OK();
-        }
-
-        // Read the next result out of `invocation_results_`, which
-        // acts as a circular buffer.
-        const size_t result_index =
-            num_outputs_consumed_ % dataset()->num_parallel_calls_;
-        InvocationResult* result = &invocation_results_[result_index];
-        *end_of_sequence = false;
-        if (result->notification) {
-          result->notification->WaitForNotification();
-          if (result->status.ok()) {
-            std::swap(*out_tensors, result->return_values);
-          }
-        }
-        ++num_outputs_consumed_;
-        return result->status;
-      }
-
-     private:
-      struct InvocationResult {
-        Status status;
-        std::unique_ptr<Notification> notification;
-        std::vector<Tensor> return_values;
-      };
-
-      void InvokeFunctionLocked(IteratorContext* ctx)
-          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-        DCHECK(!end_of_input_);
-        DCHECK(num_inputs_consumed_ - num_outputs_consumed_ <
-               dataset()->num_parallel_calls_);
-
-        // The result of invoking the function will be written into the next
-        // slot in `invocation_results_`, which acts as a circular buffer.
-        const size_t result_index =
-            num_inputs_consumed_ % dataset()->num_parallel_calls_;
-        InvocationResult* result = &invocation_results_[result_index];
-        *result = InvocationResult();
-
-        // Get the next input element.
-        std::vector<Tensor> input_element;
-        result->status =
-            input_impl_->GetNext(ctx, &input_element, &end_of_input_);
-        if (end_of_input_) {
-          result->status = errors::OutOfRange("");
-        } else {
-          ++num_inputs_consumed_;
-        }
-
-        if (result->status.ok()) {
-          // Call `func_(input_element)`, store the result in
-          // `result->return_values`, and notify `result->notification`
-          // to unblock a consumer.
-          result->notification.reset(new Notification);
-
-          FunctionLibraryRuntime::Options opts;
-          opts.step_id = CapturedFunction::generate_step_id();
-          ScopedStepContainer* step_container = new ScopedStepContainer(
-              opts.step_id, [this, ctx](const string& name) {
-                dataset()
-                    ->captured_func_->resource_manager()
-                    ->Cleanup(name)
-                    .IgnoreError();
-              });
-          opts.step_container = step_container;
-          opts.runner = ctx->runner();
-          dataset()->captured_func_->RunAsync(
-              opts, input_element, &result->return_values,
-              [result, step_container, result_index](Status ret_status) {
-                delete step_container;
-                result->status.Update(ret_status);
-                result->notification->Notify();
-              });
-        }
-      }
-
-      mutex mu_;
-      const std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
-      std::vector<InvocationResult> invocation_results_ GUARDED_BY(mu_);
-      bool end_of_input_ GUARDED_BY(mu_) = false;
-      int64 num_inputs_consumed_ GUARDED_BY(mu_) = 0;
-      int64 num_outputs_consumed_ GUARDED_BY(mu_) = 0;
-    };
-
-    const DatasetBase* const input_;
-    const NameAttrList func_;
-    const int32 num_parallel_calls_;
-    const DataTypeVector output_types_;
-    const std::vector<PartialTensorShape> output_shapes_;
-    const std::unique_ptr<CapturedFunction> captured_func_;
-  };
-
-  const int graph_def_version_;
-  DataTypeVector output_types_;
-  std::vector<PartialTensorShape> output_shapes_;
-  NameAttrList func_;
-};
-
-REGISTER_KERNEL_BUILDER(Name("ParallelMapDataset").Device(DEVICE_CPU),
-                        ParallelMapDatasetOp);
-
-}  // namespace
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/pooling_ops_common.cc b/tensorflow/core/kernels/pooling_ops_common.cc
index ac90f67ce0bb8d9acffc3868acffc1cdfbe0f492..6a52a15c931290fcdaabbb259f9dbd86f1824a30 100644
--- a/tensorflow/core/kernels/pooling_ops_common.cc
+++ b/tensorflow/core/kernels/pooling_ops_common.cc
@@ -147,6 +147,9 @@ void DnnPoolingOp<T>::Compute(
   Tensor* tensor_out = nullptr;
   OP_REQUIRES_OK(context,
                  context->allocate_output(0, tensor_out_shape, &tensor_out));
+  if (tensor_in.shape().num_elements() == 0) {
+    return;
+  }
 
   PoolParameters params{context, size,        stride,
                         padding, data_format, tensor_in.shape()};
@@ -247,6 +250,9 @@ void DnnPoolingGradOp<T>::Compute(
   Tensor* input_backprop = nullptr;
   OP_REQUIRES_OK(context,
                  context->allocate_output(0, tensor_in_shape, &input_backprop));
+  if (tensor_in_shape.num_elements() == 0) {
+    return;
+  }
 
   PoolParameters params{context, size,        stride,
                         padding, data_format, tensor_in_shape};
diff --git a/tensorflow/core/kernels/pooling_ops_common.h b/tensorflow/core/kernels/pooling_ops_common.h
index 75a6fc371b4585695def1f15e7983be37417acf6..e3131b804f2412c890016dcfb3aace1648729172 100644
--- a/tensorflow/core/kernels/pooling_ops_common.h
+++ b/tensorflow/core/kernels/pooling_ops_common.h
@@ -86,7 +86,9 @@ class MaxPoolingOp : public OpKernel {
                   errors::InvalidArgument("Invalid data format"));
       OP_REQUIRES(
           context, data_format_ == FORMAT_NHWC,
-          errors::InvalidArgument("Default MaxPoolingOp only supports NHWC."));
+          errors::InvalidArgument("Default MaxPoolingOp only supports NHWC ",
+                                  "on device type ",
+                                  DeviceTypeString(context->device_type())));
     } else {
       data_format_ = FORMAT_NHWC;
     }
diff --git a/tensorflow/core/kernels/priority_queue.cc b/tensorflow/core/kernels/priority_queue.cc
index 5c487edbe3357753d44c21432765dce7b6d29e60..bab94f7f0ad1fd7609761aaabc4f76ae6eafeb7b 100644
--- a/tensorflow/core/kernels/priority_queue.cc
+++ b/tensorflow/core/kernels/priority_queue.cc
@@ -123,7 +123,7 @@ Status PriorityQueue::GetElementComponentFromBatch(
   TF_RETURN_IF_ERROR(ctx->allocate_persistent(
       tuple[component].dtype(), element_shape, out_tensor, &element_access));
   TF_RETURN_IF_ERROR(
-      CopySliceToElement(tuple[component], element_access, index));
+      batch_util::CopySliceToElement(tuple[component], element_access, index));
   return Status::OK();
 }
 
diff --git a/tensorflow/core/kernels/quantization_utils_test.cc b/tensorflow/core/kernels/quantization_utils_test.cc
index a73581fbbc1e9db4af621b109496088ba2c7c7de..d148c9f78d61d9b1840cc7a14f82c9254a4d434c 100644
--- a/tensorflow/core/kernels/quantization_utils_test.cc
+++ b/tensorflow/core/kernels/quantization_utils_test.cc
@@ -743,7 +743,8 @@ template <int POW>
 void TestDivide64x2Pow(int64 val, int64 ref) {
   const int64x2_t val_64x2 = vmovq_n_s64(val);
   const int64x2_t ret = Divide64x2Pow<POW>(val_64x2);
-  int64 rets[2];
+  // TODO(b/70947959) Change back to int64 when possible
+  int64_t rets[2];
   vst1q_s64(rets, ret);
   EXPECT_EQ(rets[0], ref);
   EXPECT_EQ(rets[1], ref);
@@ -754,7 +755,8 @@ template <int POW>
 void TestDivide64x2PowRound(int64 val, int64 ref) {
   const int64x2_t val_64x2 = vmovq_n_s64(val);
   const int64x2_t shifted = Divide64x2PowRound<POW>(val_64x2);
-  int64 rets[2];
+  // TODO(b/70947959) Change back to int64 when possible
+  int64_t rets[2];
   vst1q_s64(rets, shifted);
   EXPECT_EQ(rets[0], ref) << "in = " << val << ", " << POW
                           << ", act = " << rets[0] << ", ref = " << ref;
diff --git a/tensorflow/core/kernels/quantized_add_op_test.cc b/tensorflow/core/kernels/quantized_add_op_test.cc
index 90bd145ad0c9b1da8805ecac7c49bd94c1db22ed..376fe34c4b5448ff46f3e657fead29753fb3c129 100644
--- a/tensorflow/core/kernels/quantized_add_op_test.cc
+++ b/tensorflow/core/kernels/quantized_add_op_test.cc
@@ -32,9 +32,7 @@ limitations under the License.
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
-
-using namespace ops;  // NOLINT(build/namespaces)
-
+namespace ops {
 namespace {
 
 void TestAdd(const std::vector<int64>& x_shape,
@@ -184,8 +182,6 @@ void TimeAdd(const std::vector<int64>& x_shape,
             << ", total_duration=" << total_duration;
 }
 
-}  // namespace
-
 void TestManualScalar() {
   TestAdd(
       {10}, {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f}, 0.0f,
@@ -276,10 +272,12 @@ void BenchmarkVectorPlusTensor() {
   TimeAdd({100000, 100}, {100}, 1);
 }
 
-}  // end namespace tensorflow
+}  // namespace
+}  // namespace ops
+}  // namespace tensorflow
 
 #define RUN_TEST(t) \
-  TEST(QuantizedAddOpTest, t) { tensorflow::t(); }
+  TEST(QuantizedAddOpTest, t) { tensorflow::ops::t(); }
 
 RUN_TEST(TestManualScalar);
 RUN_TEST(TestManualVector);
diff --git a/tensorflow/core/kernels/quantized_concat_op.cc b/tensorflow/core/kernels/quantized_concat_op.cc
index ee573f1bb805107299fed89df211275a1e81c35d..d67f1ab3ec28934bc08c11997a8b2f448c30ad91 100644
--- a/tensorflow/core/kernels/quantized_concat_op.cc
+++ b/tensorflow/core/kernels/quantized_concat_op.cc
@@ -174,13 +174,13 @@ class QuantizedConcatOp : public OpKernel {
     OP_REQUIRES(context, (input_mins.size() == N),
                 errors::InvalidArgument(
                     "QuantizedConcatOp : Expected mins input list length ",
-                    input_mins.size(), " to equal values length ", N))
+                    input_mins.size(), " to equal values length ", N));
     OpInputList input_maxes;
     OP_REQUIRES_OK(context, context->input_list("input_maxes", &input_maxes));
     OP_REQUIRES(context, (input_maxes.size() == N),
                 errors::InvalidArgument(
                     "QuantizedConcatOp : Expected maxes input list length ",
-                    input_maxes.size(), " to equal values length ", N))
+                    input_maxes.size(), " to equal values length ", N));
     const int input_dims = values[0].dims();
     const TensorShape& input_shape = values[0].shape();
     OP_REQUIRES(
diff --git a/tensorflow/core/kernels/quantized_conv_ops.cc b/tensorflow/core/kernels/quantized_conv_ops.cc
index 3b0764bb9bf9ff00c71173c53cdb78b6ab3ac6ca..1921b83d12c0688a96bad0c561080a0189e49bbe 100644
--- a/tensorflow/core/kernels/quantized_conv_ops.cc
+++ b/tensorflow/core/kernels/quantized_conv_ops.cc
@@ -268,6 +268,13 @@ class Im2ColConvFunctor {
     Im2ColBufferResource<T1, chunk_value_count>* im2col_buffer_resource;
     std::function<Status(Im2ColBufferResource<T1, chunk_value_count>**)>
         creator = [](Im2ColBufferResource<T1, chunk_value_count>** resource) {
+#ifdef _MSC_VER
+          // MSVC complains about the capture of chunk_value_count which oddly
+          // works fine in conv_ops_using_gemm.cc for example.
+          // Define chunk_value_count inside the lambda for now.
+          const int64 chunk_value_count =
+              (kMaxChunkSize + (sizeof(T1) - 1)) / sizeof(T1);
+#endif
           *resource = new Im2ColBufferResource<T1, chunk_value_count>();
           return Status::OK();
         };
@@ -457,6 +464,19 @@ class QuantizedConv2DOp : public OpKernel {
         context, (strides_[0] == 1 && strides_[3] == 1),
         errors::InvalidArgument("Current implementation does not yet support "
                                 "strides in the batch and depth dimensions."));
+    std::vector<int32> dilations;
+    OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations));
+    OP_REQUIRES(context, dilations.size() == 4,
+                errors::InvalidArgument("Dilations field must "
+                                        "specify 4 dimensions"));
+    OP_REQUIRES(context, dilations[1] == 1 && dilations[2] == 1,
+                errors::InvalidArgument(
+                    "Current implementation only supports dilated rate as 1 "
+                    "in the row and column dimensions."));
+    OP_REQUIRES(context, (dilations[0] == 1 && dilations[3] == 1),
+                errors::InvalidArgument(
+                    "Current implementation does not yet support "
+                    "dilations in the batch and depth dimensions."));
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
   }
 
diff --git a/tensorflow/core/kernels/quantized_instance_norm_test.cc b/tensorflow/core/kernels/quantized_instance_norm_test.cc
index d2b15ee20bb89a28c9d7f8398435352107eb4d79..896fe046e7ef2a99e8f854340c4c786095679a6e 100644
--- a/tensorflow/core/kernels/quantized_instance_norm_test.cc
+++ b/tensorflow/core/kernels/quantized_instance_norm_test.cc
@@ -22,6 +22,8 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_testutil.h"
 
 namespace tensorflow {
+namespace ops {
+namespace {
 
 void ReferenceImpl(const quint8* inp, float inp_min, float inp_max,
                    const TensorShape& shape, float var_eps, float* out) {
@@ -78,10 +80,6 @@ void ReferenceImpl(const quint8* inp, float inp_min, float inp_max,
   }
 }
 
-using namespace ops;  // NOLINT(build/namespaces)
-
-namespace {
-
 void Expect(const Tensor& input, float x_min, float x_max,
             bool output_range_given, float give_y_min, float given_y_max) {
   Scope root = Scope::NewRootScope();
@@ -123,8 +121,6 @@ void Expect(const Tensor& input, float x_min, float x_max,
   LOG(INFO) << "max diff " << max_diff();
 }
 
-}  // end namespace
-
 void TestBasic() {
   Tensor input_tensor(DT_QUINT8, {1, 4, 4, 32});
   auto input = input_tensor.flat<quint8>();
@@ -173,10 +169,12 @@ void TestClamp() {
   Expect(input_tensor, -10.0f, 10.0f, true, 0.0f, 1.0f);
 }
 
-}  // end namespace tensorflow
+}  // namespace
+}  // namespace ops
+}  // namespace tensorflow
 
 #define RUN_TEST(t) \
-  TEST(QuantizedAddOpTest, t) { tensorflow::t(); }
+  TEST(QuantizedInstanceNormTest, t) { tensorflow::ops::t(); }
 
 RUN_TEST(TestBasic);
 RUN_TEST(TestZeroInput);
diff --git a/tensorflow/core/kernels/quantized_mul_op_test.cc b/tensorflow/core/kernels/quantized_mul_op_test.cc
index 5f858eb8ce03be7d130649f814db5f1f9c68f18c..b0550c8260c0ec7e40eeab4e07a5ecaf4cb8e32b 100644
--- a/tensorflow/core/kernels/quantized_mul_op_test.cc
+++ b/tensorflow/core/kernels/quantized_mul_op_test.cc
@@ -32,9 +32,7 @@ limitations under the License.
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
-
-using namespace ops;  // NOLINT(build/namespaces)
-
+namespace ops {
 namespace {
 
 void TestMul(const std::vector<int64>& x_shape,
@@ -184,8 +182,6 @@ void TimeMul(const std::vector<int64>& x_shape,
             << ", total_duration=" << total_duration;
 }
 
-}  // namespace
-
 void TestManualScalar() {
   TestMul(
       {10}, {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f}, 0.0f,
@@ -276,10 +272,12 @@ void BenchmarkVectorTimesTensor() {
   TimeMul({100000, 100}, {100}, 100);
 }
 
-}  // end namespace tensorflow
+}  // namespace
+}  // namespace ops
+}  // namespace tensorflow
 
 #define RUN_TEST(t) \
-  TEST(QuantizedAddOpTest, t) { tensorflow::t(); }
+  TEST(QuantizedAddOpTest, t) { tensorflow::ops::t(); }
 
 RUN_TEST(TestManualScalar);
 RUN_TEST(TestManualVector);
diff --git a/tensorflow/core/kernels/queue_base.cc b/tensorflow/core/kernels/queue_base.cc
index 6c91d0cd94c8aee6b90115fabd82f99ddd29263e..330d161c32bc1a48b671765cacc21618545fa71a 100644
--- a/tensorflow/core/kernels/queue_base.cc
+++ b/tensorflow/core/kernels/queue_base.cc
@@ -336,32 +336,7 @@ void QueueBase::FlushUnlocked() {
 
 Status QueueBase::CopySliceToElement(const Tensor& parent, Tensor* element,
                                      int64 index) {
-#define HANDLE_TYPE(DT)                                                   \
-  if (parent.dtype() == DT) {                                             \
-    TF_RETURN_IF_ERROR(HandleSliceToElement<DT>(parent, element, index)); \
-    return Status::OK();                                                  \
-  }
-  HANDLE_TYPE(DT_FLOAT);
-  HANDLE_TYPE(DT_HALF);
-  HANDLE_TYPE(DT_DOUBLE);
-  HANDLE_TYPE(DT_INT32);
-  HANDLE_TYPE(DT_UINT8);
-  HANDLE_TYPE(DT_INT16);
-  HANDLE_TYPE(DT_INT8);
-  HANDLE_TYPE(DT_STRING);
-  HANDLE_TYPE(DT_COMPLEX64);
-  HANDLE_TYPE(DT_COMPLEX128);
-  HANDLE_TYPE(DT_INT64);
-  HANDLE_TYPE(DT_BOOL);
-  HANDLE_TYPE(DT_QINT8);
-  HANDLE_TYPE(DT_QUINT8);
-  HANDLE_TYPE(DT_QINT32);
-  HANDLE_TYPE(DT_QINT16);
-  HANDLE_TYPE(DT_QUINT16);
-  HANDLE_TYPE(DT_UINT16);
-#undef HANDLE_TYPE
-  return errors::Unimplemented("CopySliceToElement Unhandled data type: ",
-                               parent.dtype());
+  return batch_util::CopySliceToElement(parent, element, index);
 }
 
 /* static */
diff --git a/tensorflow/core/kernels/queue_op.h b/tensorflow/core/kernels/queue_op.h
index 2d68ac7a298f1835b41750152f4ebff922cf019c..ad606803ee7017380b33819dca7718023daa3900 100644
--- a/tensorflow/core/kernels/queue_op.h
+++ b/tensorflow/core/kernels/queue_op.h
@@ -44,8 +44,7 @@ class QueueOp : public ResourceOpKernel<QueueInterface> {
   void Compute(OpKernelContext* context) override {
     ResourceOpKernel<QueueInterface>::Compute(context);
     if (resource_ && context->track_allocations()) {
-      context->record_host_persistent_memory_allocation(
-          resource_->MemoryUsed());
+      context->record_persistent_memory_allocation(resource_->MemoryUsed());
     }
   }
 
diff --git a/tensorflow/core/kernels/random_op_test.cc b/tensorflow/core/kernels/random_op_test.cc
index f93a0d130d8ebb3835637a35d67a99bda75b1941..47d94ad902852c26382ffe5a10daa44be4787751 100644
--- a/tensorflow/core/kernels/random_op_test.cc
+++ b/tensorflow/core/kernels/random_op_test.cc
@@ -17,11 +17,13 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/lib/math/math_util.h"
 #include "tensorflow/core/lib/random/philox_random.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/test_benchmark.h"
 
 namespace tensorflow {
+namespace {
 
 Tensor VecShape(int64 v) {
   if (v >= std::numeric_limits<int32>::max()) {
@@ -57,7 +59,7 @@ Graph* TruncatedNormal(int64 n) {
 }
 
 #define BM_RNG(DEVICE, RNG)                                   \
-  static void BM_##DEVICE##_##RNG(int iters, int arg) {       \
+  void BM_##DEVICE##_##RNG(int iters, int arg) {              \
     testing::ItemsProcessed(static_cast<int64>(iters) * arg); \
     test::Benchmark(#DEVICE, RNG(arg)).Run(iters);            \
   }                                                           \
@@ -76,12 +78,13 @@ Tensor VecAlphas(int64 n) {
   for (int i = 0; i < n; i++) {
     // Alternate back and forth between small-and-growing (.25) and
     // large-and-shrinking (26.67) alpha.
-    alphas.vec<double>()(i) = 0.25 + std::pow(1.1, i % 2 == 0 ? i : n - i);
+    alphas.vec<double>()(i) =
+        0.25 + MathUtil::IPow(1.1, i % 2 == 0 ? i : n - i);
   }
   return alphas;
 }
 
-static void BM_cpu_RandomGamma(int iters, int nsamp, int nalpha) {
+void BM_cpu_RandomGamma(int iters, int nsamp, int nalpha) {
   testing::ItemsProcessed(static_cast<int64>(iters) * nsamp * nalpha);
   Graph* g = new Graph(OpRegistry::Global());
   test::graph::RandomGamma(g, test::graph::Constant(g, VecShape(nsamp)),
@@ -90,7 +93,7 @@ static void BM_cpu_RandomGamma(int iters, int nsamp, int nalpha) {
 }
 BENCHMARK(BM_cpu_RandomGamma)->RangePair(1 << 14, 4 << 15, 2, 50);
 
-static void BM_PhiloxRandom(int iters) {
+void BM_PhiloxRandom(int iters) {
   // Fill 2M random numbers
   int count = 2 << 20;
 
@@ -114,7 +117,7 @@ static void BM_PhiloxRandom(int iters) {
 }
 BENCHMARK(BM_PhiloxRandom);
 
-static void BM_StdMTRandom(int iters) {
+void BM_StdMTRandom(int iters) {
   // Fill 2M random numbers
   int count = 2 << 20;
 
@@ -138,4 +141,5 @@ static void BM_StdMTRandom(int iters) {
 }
 BENCHMARK(BM_StdMTRandom);
 
-}  // end namespace tensorflow
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/random_shuffle_queue_op.cc b/tensorflow/core/kernels/random_shuffle_queue_op.cc
index 7a40e9ddf20023152a50363dfdc540a4f15823ac..e9695cfde30945c9c99db85f33e44030e5d45054 100644
--- a/tensorflow/core/kernels/random_shuffle_queue_op.cc
+++ b/tensorflow/core/kernels/random_shuffle_queue_op.cc
@@ -171,7 +171,7 @@ Status RandomShuffleQueue::GetElementComponentFromBatch(
   TF_RETURN_IF_ERROR(ctx->allocate_persistent(
       tuple[component].dtype(), element_shape, out_tensor, &element_access));
   TF_RETURN_IF_ERROR(
-      CopySliceToElement(tuple[component], element_access, index));
+      batch_util::CopySliceToElement(tuple[component], element_access, index));
   return Status::OK();
 }
 
diff --git a/tensorflow/core/kernels/record_input_op.cc b/tensorflow/core/kernels/record_input_op.cc
index 878996c9d6a9923404791d4e8995b817ecdf9799..841f9dc4b8e08b5c2a5346e8c2abd585ebd0cb39 100644
--- a/tensorflow/core/kernels/record_input_op.cc
+++ b/tensorflow/core/kernels/record_input_op.cc
@@ -36,14 +36,18 @@ class RecordInputOp : public OpKernel {
     GETATTR(int64, file_buffer_size);
     GETATTR(int64, file_parallelism);
     GETATTR(int64, batch_size);
+    GETATTR(string, compression_type);
 #undef GETATTR
 
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("compression_type", &compression_type));
+
     RecordYielder::Options yopts;
     yopts.file_pattern = file_pattern;
     yopts.seed = file_random_seed;
     yopts.bufsize = file_buffer_size;
     yopts.file_shuffle_shift_ratio = file_shuffle_shift_ratio;
     yopts.parallelism = file_parallelism;
+    yopts.compression_type = compression_type;
     yielder_ = std::unique_ptr<RecordYielder>(new RecordYielder(ctx, yopts));
 
     batch_size_ = batch_size;
diff --git a/tensorflow/core/kernels/record_yielder.cc b/tensorflow/core/kernels/record_yielder.cc
index e4fa0ed322df57789f95efe584fe91a3efe561ec..3fd9bf9defe4aeedde1f0456638e60ea1e5e2cdb 100644
--- a/tensorflow/core/kernels/record_yielder.cc
+++ b/tensorflow/core/kernels/record_yielder.cc
@@ -206,7 +206,10 @@ void RecordYielder::ShardLoop(Shard* shard) {
       shard->status = errors::InvalidArgument("Can't open ", filename);
       break;
     }
-    io::RecordReader rdr(file.get());
+    io::RecordReaderOptions options =
+        io::RecordReaderOptions::CreateRecordReaderOptions(
+            opts_.compression_type);
+    io::RecordReader rdr(file.get(), options);
     uint64 offset = 0;
     string record;
     while (true) {
diff --git a/tensorflow/core/kernels/record_yielder.h b/tensorflow/core/kernels/record_yielder.h
index c6301812213bf569d47c1fd3b7deba3c57a31ae5..34817ad51b6e4f21e6b6b0f516c438a845b30e3b 100644
--- a/tensorflow/core/kernels/record_yielder.h
+++ b/tensorflow/core/kernels/record_yielder.h
@@ -78,6 +78,8 @@ class RecordYielder {
     // Uses these many concurrent tfrecord iterators to iterate through
     // tfrecords.
     int32 parallelism = 1;
+
+    string compression_type;
   };
 
   explicit RecordYielder(OpKernelConstruction* context,
diff --git a/tensorflow/core/kernels/reduction_ops_common.h b/tensorflow/core/kernels/reduction_ops_common.h
index 9da992ccd18d7bf107a1bc2a7b91ec9fb1a85fd5..d7bebfb24c82275da07fb5b548f7169b77ea3cb9 100644
--- a/tensorflow/core/kernels/reduction_ops_common.h
+++ b/tensorflow/core/kernels/reduction_ops_common.h
@@ -240,14 +240,7 @@ class ReductionOp : public OpKernel {
       ctx->SetStatus(errors::Internal("Error during reduction copy."));
     }
     if (ctx->track_allocations()) {
-      // The temporary memory becomes the output memory.
-      if (ctx->allocate_on_host(alloc_attr)) {
-        ctx->record_host_temp_memory_size(
-            -static_cast<int64>(out.AllocatedBytes()));
-      } else {
-        ctx->record_device_temp_memory_size(
-            -static_cast<int64>(out.AllocatedBytes()));
-      }
+      ctx->record_temp_memory_size(-static_cast<int64>(out.AllocatedBytes()));
     }
     ctx->set_output(0, out);
   }
diff --git a/tensorflow/core/kernels/reduction_ops_min.cc b/tensorflow/core/kernels/reduction_ops_min.cc
index 807ac0a4567790ef3fb95b4c12a91a1562f83fa7..5c537c5b9c75afef2b8f4ea5446f3d4012ed0cbb 100644
--- a/tensorflow/core/kernels/reduction_ops_min.cc
+++ b/tensorflow/core/kernels/reduction_ops_min.cc
@@ -50,6 +50,7 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
           .TypeConstraint<int64>("Tidx")                                       \
           .HostMemory("reduction_indices"),                                    \
       ReductionOp<GPUDevice, type, int64, Eigen::internal::MinReducer<type>>);
+REGISTER_GPU_KERNELS(Eigen::half);
 REGISTER_GPU_KERNELS(float);
 REGISTER_GPU_KERNELS(double);
 
diff --git a/tensorflow/core/kernels/reduction_ops_test.cc b/tensorflow/core/kernels/reduction_ops_test.cc
index 9bbe993a2f93e522688738abaf41a518e95ef871..fe8ea59f1be521166d0e42295e79d1bb5a242750 100644
--- a/tensorflow/core/kernels/reduction_ops_test.cc
+++ b/tensorflow/core/kernels/reduction_ops_test.cc
@@ -174,6 +174,11 @@ static void BM_Min2DToScalarGPU(int iters, int num_x, int num_y) {
 }
 BENCHMARK(BM_Min2DToScalarGPU)->RangePair(2048, 8192, 2048, 8192);
 
+static void BM_Min2DToScalarGPUHalf(int iters, int num_x, int num_y) {
+  ReduceToScalar<Eigen::half>(iters, "gpu", "Min", num_x, num_y);
+}
+BENCHMARK(BM_Min2DToScalarGPUHalf)->RangePair(2048, 8192, 2048, 8192);
+
 static void BM_Bool2DToScalarGPU(int iters, int num_x, int num_y) {
   ReduceToScalar<bool>(iters, "gpu", "All", num_x, num_y);
 }
diff --git a/tensorflow/core/kernels/reshape_op.cc b/tensorflow/core/kernels/reshape_op.cc
index 18ebf70c1738747ab64545f7770309a3e0865f1a..8b86596721aa41c124b35b19cac7aac264b6f574 100644
--- a/tensorflow/core/kernels/reshape_op.cc
+++ b/tensorflow/core/kernels/reshape_op.cc
@@ -43,7 +43,8 @@ REGISTER_KERNEL_BUILDER(Name("Reshape")
                               .TypeConstraint<int64>("Tshape"), \
                           ReshapeOp);
 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL);
-REGISTER_GPU_KERNEL(bool);
+TF_CALL_bfloat16(REGISTER_GPU_KERNEL);
+TF_CALL_bool(REGISTER_GPU_KERNEL);
 #undef REGISTER_GPU_KERNEL
 
 #ifdef TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/kernels/resize_area_op_test.cc b/tensorflow/core/kernels/resize_area_op_test.cc
index cc5244d3a07031a843f3bb77e0d409cf9d64b4f2..a7e06ef15a1dd15c4c1428f44dbcd5e560b5e993 100644
--- a/tensorflow/core/kernels/resize_area_op_test.cc
+++ b/tensorflow/core/kernels/resize_area_op_test.cc
@@ -41,7 +41,7 @@ class ResizeAreaOpTest : public OpsTestBase {
     bool is_ref = IsRefType(input_types_[inputs_.size()]);
     Tensor* input = new Tensor(device_->GetAllocator(AllocatorAttributes()),
                                DataTypeToEnum<float>::v(), shape);
-    input->flat<float>().setZero();
+    input->flat<float>().setRandom();
     tensors_.push_back(input);
     if (is_ref) {
       CHECK_EQ(RemoveRefType(input_types_[inputs_.size()]),
diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc
index 0ae8a8fdbc14af81650fb756fdd20bb0d983e71e..9cc8e03e3ac6b17f16d65f1a9ade04d8fdcba034 100644
--- a/tensorflow/core/kernels/resource_variable_ops.cc
+++ b/tensorflow/core/kernels/resource_variable_ops.cc
@@ -55,6 +55,7 @@ limitations under the License.
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/resource_mgr.h"
 #include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/framework/variant_op_registry.h"
 #include "tensorflow/core/kernels/bounds_check.h"
 #include "tensorflow/core/kernels/dense_update_functor.h"
 #include "tensorflow/core/kernels/gather_functor.h"
@@ -82,7 +83,7 @@ class ReadVariableOp : public OpKernel {
     ResourceHandle handle = HandleFromInput(ctx, 0);
     const auto status = LookupResource(ctx, handle, &variable);
     OP_REQUIRES(ctx, status.ok(),
-                errors::NotFound(
+                errors::FailedPrecondition(
                     "Error while reading resource variable ", handle.name(),
                     " from Container: ", handle.container(),
                     ". This could mean that the variable was uninitialized. ",
@@ -110,7 +111,6 @@ REGISTER_KERNEL_BUILDER(Name("ReadVariableOp").Device(DEVICE_CPU),
                         ReadVariableOp);
 
 #if GOOGLE_CUDA
-
 REGISTER_KERNEL_BUILDER(
     Name("ReadVariableOp").Device(DEVICE_GPU).HostMemory("resource"),
     ReadVariableOp);
@@ -130,6 +130,7 @@ REGISTER_KERNEL_BUILDER(
                           ResourceHandleOp<Var>)
 
 TF_CALL_GPU_ALL_TYPES(REGISTER_GPU_KERNELS);
+TF_CALL_variant(REGISTER_GPU_KERNELS);
 #undef REGISTER_GPU_KERNELS
 #endif  // GOOGLE_CUDA
 
@@ -275,6 +276,64 @@ class AssignVariableOp : public OpKernel {
   DataType dtype_;
 };
 
+template <typename Device>
+Status VariantCopyFn(OpKernelContext* context, const Tensor& from, Tensor* to);
+
+#define CPU_DENSE_COPY(T)                                                \
+  case DataTypeToEnum<T>::value: {                                       \
+    functor::DenseUpdate<CPUDevice, T, ASSIGN> copy_functor_;            \
+    copy_functor_(context->eigen_device<CPUDevice>(), tensor->flat<T>(), \
+                  from.flat<T>());                                       \
+    break;                                                               \
+  }
+
+#define INSTANTIATE_GET_VARIANT_COPY_FN(Device, TYPE_CALLER, TYPE_DENSE_COPY) \
+  template <>                                                                 \
+  Status VariantCopyFn<Device>(OpKernelContext * context, const Tensor& from, \
+                               Tensor* to) {                                  \
+    PersistentTensor tmp;                                                     \
+    Tensor* tensor;                                                           \
+    AllocatorAttributes attr;                                                 \
+    attr.set_gpu_compatible(true);                                            \
+    attr.set_nic_compatible(true);                                            \
+    TF_RETURN_IF_ERROR(context->allocate_persistent(                          \
+        from.dtype(), from.shape(), &tmp, &tensor, attr));                    \
+    switch (from.dtype()) {                                                   \
+      TYPE_CALLER(TYPE_DENSE_COPY);                                           \
+      default:                                                                \
+        return errors::InvalidArgument(                                       \
+            "VariantCopyFn: Could not perform a deep copy of variant "        \
+            "element of type: ",                                              \
+            DataTypeString(from.dtype()),                                     \
+            " using device: ", context->device()->name());                    \
+    }                                                                         \
+    *to = *tensor;                                                            \
+    return Status::OK();                                                      \
+  }
+
+INSTANTIATE_GET_VARIANT_COPY_FN(CPUDevice, TF_CALL_ALL_TYPES, CPU_DENSE_COPY);
+
+#if GOOGLE_CUDA
+#define GPU_DENSE_COPY(T)                                                \
+  case DataTypeToEnum<T>::value: {                                       \
+    functor::DenseUpdate<GPUDevice, T, ASSIGN> copy_functor_;            \
+    copy_functor_(context->eigen_device<GPUDevice>(), tensor->flat<T>(), \
+                  from.flat<T>());                                       \
+    break;                                                               \
+  }
+#define TF_CALL_GPU_AND_ADDITIONAL_TYPES(T) \
+  TF_CALL_GPU_ALL_TYPES(T);                 \
+  TF_CALL_int32(T);                         \
+  TF_CALL_int64(T);
+INSTANTIATE_GET_VARIANT_COPY_FN(GPUDevice, TF_CALL_GPU_AND_ADDITIONAL_TYPES,
+                                GPU_DENSE_COPY);
+#undef TF_CALL_GPU_AND_ADDITIONAL_TYPES
+#undef GPU_DENSE_COPY
+#endif  // GOOGLE_CUDA
+
+#undef CPU_DENSE_COPY
+#undef INSTANTIATE_GET_VARIANT_COPY_FN
+
 template <typename Device>
 class AssignVariableOp<Device, Variant> : public OpKernel {
  public:
@@ -287,21 +346,15 @@ class AssignVariableOp<Device, Variant> : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& value = context->input(1);
-    OP_REQUIRES(context, dtype_ == value.dtype(),
-                errors::InvalidArgument(
-                    "Variable and value dtypes don't match; respectively, ",
-                    dtype_, " and ", context->input(1).dtype()));
-
     Var* variable = nullptr;
     OP_REQUIRES_OK(context, LookupOrCreateResource<Var>(
                                 context, HandleFromInput(context, 0), &variable,
                                 [this, context](Var** ptr) {
-                                  *ptr = new Var(dtype_);
-                                  // Create an empty new Variant tensor.
+                                  // Created on host.
+                                  *ptr = new Var(DT_VARIANT);
                                   return Status::OK();
                                 }));
     core::ScopedUnref s(variable);
-
     OP_REQUIRES(context, variable->tensor()->dtype() == DT_VARIANT,
                 errors::InvalidArgument(
                     "Trying to assign variable with wrong dtype. Expected ",
@@ -309,16 +362,17 @@ class AssignVariableOp<Device, Variant> : public OpKernel {
                     DataTypeString(DT_VARIANT)));
 
     mutex_lock ml(*variable->mu());
-    // TODO(ebrevdo): Add a proper Variant deep copy / assign registry
-    // entry and use that here.  For now, use a serialization
-    // roundtrip to perform the copy on CPU.  This is OK because this
-    // op is not registered for GPU.
-    *variable->tensor() = Tensor();
-    TensorProto tmp;
-    value.AsProtoTensorContent(&tmp);
-    OP_REQUIRES(context, variable->tensor()->FromProto(tmp),
-                errors::Internal("Could not properly reserialize values "
-                                 "Variant.  Check logs for more details."));
+
+    *variable->tensor() = Tensor(DT_VARIANT, value.shape());
+    const auto elements_in = value.flat<Variant>();
+    auto elements_out = variable->tensor()->flat<Variant>();
+    auto copy_fn = std::bind(&VariantCopyFn<Device>, context,
+                             std::placeholders::_1, std::placeholders::_2);
+    for (int64 i = 0; i < elements_in.size(); ++i) {
+      OP_REQUIRES_OK(context, VariantDeviceCopy(
+                                  VariantDeviceCopyDirection::DEVICE_TO_DEVICE,
+                                  elements_in(i), &elements_out(i), copy_fn));
+    };
   }
 
  private:
@@ -345,6 +399,7 @@ TF_CALL_variant(REGISTER_KERNELS);
                           AssignVariableOp<GPUDevice, type>);
 
 TF_CALL_GPU_ALL_TYPES(REGISTER_GPU_KERNELS);
+TF_CALL_variant(REGISTER_GPU_KERNELS);
 #undef REGISTER_GPU_KERNELS
 #endif  // GOOGLE_CUDA
 
@@ -464,8 +519,7 @@ class ResourceGatherOp : public OpKernel {
       auto out_flat = out->shaped<T, 3>({1, N, out->NumElements() / N});
 
       functor::GatherFunctor<Device, T, Index> functor;
-      int64 bad_i = functor(c, params_flat,
-                            indices_flat, out_flat);
+      int64 bad_i = functor(c, params_flat, indices_flat, out_flat);
 
       OP_REQUIRES(
           c, bad_i < 0,
diff --git a/tensorflow/core/kernels/restore_op.cc b/tensorflow/core/kernels/restore_op.cc
index 0593a07b80cfb043ee2ea3c99932cc12d9334cc5..d9bbcb14ab3ccf436e8575d48507be0daef66d1b 100644
--- a/tensorflow/core/kernels/restore_op.cc
+++ b/tensorflow/core/kernels/restore_op.cc
@@ -41,7 +41,7 @@ class RestoreOp : public OpKernel {
   }
   void Compute(OpKernelContext* context) override {
     RestoreTensor(context, &checkpoint::OpenTableTensorSliceReader,
-                  preferred_shard_, false);
+                  preferred_shard_, false, 0);
   }
 
  private:
@@ -67,7 +67,7 @@ class RestoreSliceOp : public OpKernel {
   }
   void Compute(OpKernelContext* context) override {
     RestoreTensor(context, &checkpoint::OpenTableTensorSliceReader,
-                  preferred_shard_, true);
+                  preferred_shard_, true, 0);
   }
 
  private:
diff --git a/tensorflow/core/kernels/reverse_op.cc b/tensorflow/core/kernels/reverse_op.cc
index 7ac34d1c62376f40f9d30397cad71233db9468dc..8f82784d936c05d64317e8f27dd8703502083b9b 100644
--- a/tensorflow/core/kernels/reverse_op.cc
+++ b/tensorflow/core/kernels/reverse_op.cc
@@ -182,9 +182,9 @@ class ReverseOp : public OpKernel {
       OP_REQUIRES_OK(context,
                      context->allocate_output(0, input.shape(), &output));
 
-#define HANDLE_REVERSE(NDIMS)                                                 \
-  case NDIMS:                                                                 \
-    HandleReverseCase<Device, T, NDIMS>(context, dims.vec<bool>(), output);   \
+#define HANDLE_REVERSE(NDIMS)                                               \
+  case NDIMS:                                                               \
+    HandleReverseCase<Device, T, NDIMS>(context, dims.vec<bool>(), output); \
     return;
 
       switch (input_dims) {
@@ -228,7 +228,7 @@ void HandleReverseV2Case(OpKernelContext* context,
                                        result->tensor<T, NDIMS>());
 }
 
-template <typename Device, typename T>
+template <typename Device, typename T, typename Tidx>
 class ReverseV2Op : public OpKernel {
  public:
   explicit ReverseV2Op(OpKernelConstruction* context) : OpKernel(context) {}
@@ -242,15 +242,15 @@ class ReverseV2Op : public OpKernel {
     } else {
       const int input_dims = input.dims();
       const TensorShape& sparse_dims_shape = sparse_dims.shape();
-      const auto& axes_sparse_flat = sparse_dims.flat<int32>();
+      const auto& axes_sparse_flat = sparse_dims.flat<Tidx>();
 
       OP_REQUIRES(context, TensorShapeUtils::IsVector(sparse_dims_shape),
                   errors::InvalidArgument("'dims' must be 1-dimension, not ",
                                           sparse_dims.dims()));
       gtl::InlinedVector<bool, 8> axes_dense(input_dims, false);
       for (int dummy = 0; dummy < axes_sparse_flat.size(); dummy++) {
-        int32 axis = internal::SubtleMustCopy<int32>(axes_sparse_flat(dummy));
-        int32 canonical_axis = axis < 0 ? input_dims + axis : axis;
+        Tidx axis = internal::SubtleMustCopy<Tidx>(axes_sparse_flat(dummy));
+        Tidx canonical_axis = axis < 0 ? input_dims + axis : axis;
         OP_REQUIRES(context, canonical_axis >= 0 && canonical_axis < input_dims,
                     errors::InvalidArgument("'axis'[", dummy, "] = ", axis,
                                             " is out of valid range [", 0, ", ",
@@ -306,7 +306,13 @@ class ReverseV2Op : public OpKernel {
                               .TypeConstraint<T>("T")        \
                               .TypeConstraint<int32>("Tidx") \
                               .HostMemory("axis"),           \
-                          ReverseV2Op<CPUDevice, T>)
+                          ReverseV2Op<CPUDevice, T, int32>)  \
+  REGISTER_KERNEL_BUILDER(Name("ReverseV2")                  \
+                              .Device(DEVICE_CPU)            \
+                              .TypeConstraint<T>("T")        \
+                              .TypeConstraint<int64>("Tidx") \
+                              .HostMemory("axis"),           \
+                          ReverseV2Op<CPUDevice, T, int64>)
 TF_CALL_POD_TYPES(REGISTER_KERNELS);
 TF_CALL_string(REGISTER_KERNELS);
 #undef REGISTER_KERNELS
@@ -358,7 +364,13 @@ TF_CALL_complex128(DECLARE_GPU_SPEC);
                               .TypeConstraint<T>("T")        \
                               .TypeConstraint<int32>("Tidx") \
                               .HostMemory("axis"),           \
-                          ReverseV2Op<GPUDevice, T>)
+                          ReverseV2Op<GPUDevice, T, int32>)  \
+  REGISTER_KERNEL_BUILDER(Name("ReverseV2")                  \
+                              .Device(DEVICE_GPU)            \
+                              .TypeConstraint<T>("T")        \
+                              .TypeConstraint<int64>("Tidx") \
+                              .HostMemory("axis"),           \
+                          ReverseV2Op<GPUDevice, T, int64>)
 TF_CALL_uint8(REGISTER_GPU_KERNELS);
 TF_CALL_int8(REGISTER_GPU_KERNELS);
 // TODO decide whether we want to enable the bool kernel.
@@ -387,7 +399,15 @@ REGISTER_KERNEL_BUILDER(Name("ReverseV2")
                             .HostMemory("tensor")
                             .HostMemory("axis")
                             .HostMemory("output"),
-                        ReverseV2Op<CPUDevice, int32>);
+                        ReverseV2Op<CPUDevice, int32, int32>);
+REGISTER_KERNEL_BUILDER(Name("ReverseV2")
+                            .Device(DEVICE_GPU)
+                            .TypeConstraint<int32>("T")
+                            .TypeConstraint<int64>("Tidx")
+                            .HostMemory("tensor")
+                            .HostMemory("axis")
+                            .HostMemory("output"),
+                        ReverseV2Op<CPUDevice, int32, int64>);
 #endif  // GOOGLE_CUDA
 
 #ifdef TENSORFLOW_USE_SYCL
@@ -402,7 +422,13 @@ REGISTER_KERNEL_BUILDER(Name("ReverseV2")
                               .TypeConstraint<T>("T")        \
                               .TypeConstraint<int32>("Tidx") \
                               .HostMemory("axis"),           \
-                          ReverseV2Op<SYCLDevice, T>)
+                          ReverseV2Op<SYCLDevice, T, int32>) \
+  REGISTER_KERNEL_BUILDER(Name("ReverseV2")                  \
+                              .Device(DEVICE_SYCL)           \
+                              .TypeConstraint<T>("T")        \
+                              .TypeConstraint<int64>("Tidx") \
+                              .HostMemory("axis"),           \
+                          ReverseV2Op<SYCLDevice, T, int64>)
 TF_CALL_uint8(REGISTER_SYCL_KERNELS);
 TF_CALL_int8(REGISTER_SYCL_KERNELS);
 TF_CALL_float(REGISTER_SYCL_KERNELS);
@@ -422,6 +448,14 @@ REGISTER_KERNEL_BUILDER(Name("ReverseV2")
                             .HostMemory("tensor")
                             .HostMemory("axis")
                             .HostMemory("output"),
-                        ReverseV2Op<CPUDevice, int32>);
-#endif // TENSORFLOW_USE_SYCL
+                        ReverseV2Op<CPUDevice, int32, int32>);
+REGISTER_KERNEL_BUILDER(Name("ReverseV2")
+                            .Device(DEVICE_SYCL)
+                            .TypeConstraint<int32>("T")
+                            .TypeConstraint<int64>("Tidx")
+                            .HostMemory("tensor")
+                            .HostMemory("axis")
+                            .HostMemory("output"),
+                        ReverseV2Op<CPUDevice, int32, int64>);
+#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/save_restore_tensor.cc b/tensorflow/core/kernels/save_restore_tensor.cc
index 6b06cf650a849d3ff606b62b00f437ac9accb013..df60eda75978ff9f6a9d7059b9594f86831aa6f5 100644
--- a/tensorflow/core/kernels/save_restore_tensor.cc
+++ b/tensorflow/core/kernels/save_restore_tensor.cc
@@ -13,11 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "tensorflow/core/kernels/save_restore_tensor.h"
+#include <numeric>
 #include <unordered_map>
-
 #include <utility>
 #include <vector>
-#include "tensorflow/core/kernels/save_restore_tensor.h"
 
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
@@ -85,7 +85,17 @@ void SaveTensors(
   Status s;
   auto tensor_names_flat = tensor_names_t.flat<string>();
 
-  for (int i = 0; i < N; ++i) {
+  // Process tensors in sorted name order.  This allows us to avoid seeking
+  // during restoration in the common case where we are restoring a full
+  // checkpoint.
+  std::vector<size_t> sorted_name_idx(tensor_names_flat.size());
+  std::iota(sorted_name_idx.begin(), sorted_name_idx.end(), 0);
+  std::sort(sorted_name_idx.begin(), sorted_name_idx.end(),
+            [&tensor_names_flat](size_t a, size_t b) {
+              return tensor_names_flat(a) < tensor_names_flat(b);
+            });
+
+  for (size_t i : sorted_name_idx) {
     const string& name = tensor_names_flat(i);
     const Tensor& input = context->input(i + kFixedInputs);
     TensorShape shape(input.shape());
@@ -109,8 +119,7 @@ void SaveTensors(
     break;
 
     switch (input.dtype()) {
-      TF_CALL_POD_STRING_TYPES(WRITER_ADD)
-      TF_CALL_QUANTIZED_TYPES(WRITER_ADD)
+      TF_CALL_SAVE_RESTORE_TYPES(WRITER_ADD)
       default:
         context->SetStatus(errors::Unimplemented("Saving data type ",
                                                  DataTypeString(input.dtype()),
@@ -132,7 +141,7 @@ void SaveTensors(
 
 void RestoreTensor(OpKernelContext* context,
                    checkpoint::TensorSliceReader::OpenTableFunction open_func,
-                   int preferred_shard, bool restore_slice) {
+                   int preferred_shard, bool restore_slice, int restore_index) {
   const Tensor& file_pattern_t = context->input(0);
   {
     const int64 size = file_pattern_t.NumElements();
@@ -145,26 +154,7 @@ void RestoreTensor(OpKernelContext* context,
   const string& file_pattern = file_pattern_t.flat<string>()(0);
 
   const Tensor& tensor_name_t = context->input(1);
-  {
-    const int64 size = tensor_name_t.NumElements();
-    OP_REQUIRES(
-        context, size == 1,
-        errors::InvalidArgument(
-            "Input 1 (tensor_name) must be a string scalar; got a tensor of ",
-            size, "elements"));
-  }
-  const string& tensor_name = tensor_name_t.flat<string>()(0);
-
-  const string* tensor_shape_and_slice_ptr = nullptr;
-  if (restore_slice) {
-    const Tensor& tensor_shape_and_slice_t = context->input(2);
-    OP_REQUIRES(
-        context, tensor_shape_and_slice_t.NumElements() == 1,
-        errors::InvalidArgument("Expected 1 element for the tensor "
-                                "shape and slice but got ",
-                                tensor_shape_and_slice_t.NumElements()));
-    tensor_shape_and_slice_ptr = tensor_shape_and_slice_t.flat<string>().data();
-  }
+  const string& tensor_name = tensor_name_t.flat<string>()(restore_index);
 
   // If we cannot find a cached reader we will allocate our own.
   std::unique_ptr<checkpoint::TensorSliceReader> allocated_reader;
@@ -187,7 +177,7 @@ void RestoreTensor(OpKernelContext* context,
       errors::NotFound("Tensor name \"", tensor_name,
                        "\" not found in checkpoint files ", file_pattern));
   OP_REQUIRES(
-      context, type == context->expected_output_dtype(0),
+      context, type == context->expected_output_dtype(restore_index),
       errors::InvalidArgument("Expected to restore a tensor of type ",
                               DataTypeString(context->expected_output_dtype(0)),
                               ", got a tensor of type ", DataTypeString(type),
@@ -196,23 +186,26 @@ void RestoreTensor(OpKernelContext* context,
   // Shape of the output and slice to load.
   TensorShape output_shape(saved_shape);
   TensorSlice slice_to_load(saved_shape.dims());
-  if (restore_slice && !tensor_shape_and_slice_ptr[0].empty()) {
-    const string& shape_spec = tensor_shape_and_slice_ptr[0];
-    TensorShape parsed_shape;
-    OP_REQUIRES_OK(
-        context, checkpoint::ParseShapeAndSlice(shape_spec, &parsed_shape,
-                                                &slice_to_load, &output_shape));
-    OP_REQUIRES(
-        context, parsed_shape.IsSameSize(saved_shape),
-        errors::InvalidArgument(
-            "Shape in shape_and_slice spec does not match the shape in the "
-            "save file: ",
-            parsed_shape.DebugString(), ", save file shape: ",
-            saved_shape.DebugString()));
+  if (restore_slice) {
+    const string& shape_spec = context->input(2).flat<string>()(restore_index);
+    if (!shape_spec.empty()) {
+      TensorShape parsed_shape;
+      OP_REQUIRES_OK(context, checkpoint::ParseShapeAndSlice(
+                                  shape_spec, &parsed_shape, &slice_to_load,
+                                  &output_shape));
+      OP_REQUIRES(
+          context, parsed_shape.IsSameSize(saved_shape),
+          errors::InvalidArgument(
+              "Shape in shape_and_slice spec does not match the shape in the "
+              "save file: ",
+              parsed_shape.DebugString(),
+              ", save file shape: ", saved_shape.DebugString()));
+    }
   }
 
   Tensor* t = nullptr;
-  OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &t));
+  OP_REQUIRES_OK(context,
+                 context->allocate_output(restore_index, output_shape, &t));
 
   if (output_shape.num_elements() == 0) return;
 
@@ -225,8 +218,7 @@ void RestoreTensor(OpKernelContext* context,
     break;
 
   switch (type) {
-    TF_CALL_POD_STRING_TYPES(READER_COPY)
-    TF_CALL_QUANTIZED_TYPES(READER_COPY)
+    TF_CALL_SAVE_RESTORE_TYPES(READER_COPY)
     default:
       context->SetStatus(errors::Unimplemented(
           "Restoring data type ", DataTypeString(type), " not yet supported"));
@@ -239,9 +231,18 @@ Status RestoreTensorsV2(OpKernelContext* context, const Tensor& prefix,
                         const Tensor& shape_and_slices,
                         gtl::ArraySlice<DataType> dtypes) {
   const string& prefix_string = prefix.scalar<string>()();
+
   const auto& tensor_names_flat = tensor_names.flat<string>();
   const auto& shape_and_slices_flat = shape_and_slices.flat<string>();
 
+  // Sort lookup keys to improve locality when reading multiple tensors.
+  std::vector<size_t> sorted_name_idx(tensor_names_flat.size());
+  std::iota(sorted_name_idx.begin(), sorted_name_idx.end(), 0);
+  std::sort(sorted_name_idx.begin(), sorted_name_idx.end(),
+            [&tensor_names_flat](size_t a, size_t b) {
+              return tensor_names_flat(a) < tensor_names_flat(b);
+            });
+
   BundleReader reader(Env::Default(), prefix_string);
   TF_RETURN_IF_ERROR(reader.status());
 
@@ -250,9 +251,10 @@ Status RestoreTensorsV2(OpKernelContext* context, const Tensor& prefix,
   // within a fixed memory budget.
   TensorShape restored_full_shape;
   Tensor* restored_tensor = nullptr;
-  for (size_t i = 0; i < tensor_names_flat.size(); ++i) {
+  for (auto i : sorted_name_idx) {
     const string& tensor_name = tensor_names_flat(i);
     const string& shape_and_slice = shape_and_slices_flat(i);
+
     TF_RETURN_IF_ERROR(
         reader.LookupTensorShape(tensor_name, &restored_full_shape));
 
diff --git a/tensorflow/core/kernels/save_restore_tensor.h b/tensorflow/core/kernels/save_restore_tensor.h
index 1e87e5c30b75754777f5b03ef58dd6c3102e27ec..5b74b586e84f5b33c179c986bc8aeacf65835f61 100644
--- a/tensorflow/core/kernels/save_restore_tensor.h
+++ b/tensorflow/core/kernels/save_restore_tensor.h
@@ -37,18 +37,21 @@ void SaveTensors(
     checkpoint::TensorSliceWriter::CreateBuilderFunction builder_func,
     bool save_slices);
 
-// Reads a tensor from the reader built from open_func() and produces it as
-// context->output(0).  "preferred_shard" is the same the TensorSliceReader
-// preferred_shard parameter.
+// Reads a single tensor from the reader built from open_func() and produces
+// it as context->output(restore_index).  "preferred_shard" is the same the
+// TensorSliceReader preferred_shard parameter.
 //
 // context must have the following inputs:
 //  0: a single element string tensor that contains the file name.
-//  1: a single element string tensor that names the output to be restored.
+//  1: string tensor that names the outputs to be restored.
 // If restore_slice is true:
-//  2: shape and slice specification of the tensor to restore.
+//  2: shape and slice specification of the tensors to restore.
+//
+// restore_index indicates the variable name and slice to lookup
+// in context(1) and (2).
 void RestoreTensor(OpKernelContext* context,
                    checkpoint::TensorSliceReader::OpenTableFunction open_func,
-                   int preferred_shard, bool restore_slice);
+                   int preferred_shard, bool restore_slice, int restore_index);
 
 // V2 checkpoint format.
 
diff --git a/tensorflow/core/kernels/save_restore_v2_ops.cc b/tensorflow/core/kernels/save_restore_v2_ops.cc
index c665bc5b03ca741abfa868a4a089d19e97f47536..3acf290ea209923c53333a4233301568e3874219 100644
--- a/tensorflow/core/kernels/save_restore_v2_ops.cc
+++ b/tensorflow/core/kernels/save_restore_v2_ops.cc
@@ -169,8 +169,14 @@ class RestoreV2 : public OpKernel {
         paths.empty()) {
       // Cannot find V2's metadata file, so "prefix_string" does not point to a
       // V2 checkpoint.  Invokes the V1 read path instead.
-      RestoreTensor(context, &checkpoint::OpenTableTensorSliceReader,
-                    /* preferred_shard */ -1, /* restore_slice */ true);
+      for (size_t i = 0; i < tensor_names.NumElements(); ++i) {
+        RestoreTensor(context, &checkpoint::OpenTableTensorSliceReader,
+                      /* preferred_shard */ -1, /* restore_slice */ true,
+                      /* restore_index */ i);
+        if (!context->status().ok()) {
+          return;
+        }
+      }
       return;
     }
     // If found, invokes the V2 reader.
diff --git a/tensorflow/core/kernels/scatter_nd_op.cc b/tensorflow/core/kernels/scatter_nd_op.cc
index 484932ab0157dee1685b2b90a6c013c11dac061d..3a95dd1773398509e81a514f07fd79f5cb9a0928 100644
--- a/tensorflow/core/kernels/scatter_nd_op.cc
+++ b/tensorflow/core/kernels/scatter_nd_op.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #endif  // GOOGLE_CUDA
 
 #include "tensorflow/core/kernels/scatter_nd_op.h"
+
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -28,6 +29,8 @@ limitations under the License.
 #include "tensorflow/core/kernels/bounds_check.h"
 #include "tensorflow/core/kernels/dense_update_functor.h"
 #include "tensorflow/core/kernels/fill_functor.h"
+#include "tensorflow/core/kernels/training_op_helpers.h"
+#include "tensorflow/core/kernels/variable_ops.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/types.h"
@@ -83,7 +86,10 @@ class ScatterNdUpdateOp : public OpKernel {
     const DataType dt = DataTypeToEnum<T>::v();
     const DataType dt_ref = DataTypeToEnum<T>::ref();
     const DataType index_t = DataTypeToEnum<Index>::v();
-    if (IsRefType(c->input_type(0))) {
+    dtype_ = c->input_type(0);
+    if (c->input_type(0) == DT_RESOURCE) {
+      // TODO(apassos): what to validate here?
+    } else if (IsRefType(c->input_type(0))) {
       OP_REQUIRES_OK(c, c->MatchSignature({dt_ref, index_t, dt}, {dt_ref}));
       OP_REQUIRES_OK(c, c->GetAttr("use_locking", &use_exclusive_lock_));
     } else {
@@ -93,7 +99,16 @@ class ScatterNdUpdateOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* c) override {
-    if (use_exclusive_lock_) {
+    if (dtype_ == DT_RESOURCE) {
+      if (use_exclusive_lock_) {
+        Var* v;
+        OP_REQUIRES_OK(c, LookupResource(c, HandleFromInput(c, 0), &v));
+        mutex_lock m(*v->mu());
+        DoCompute(c);
+      } else {
+        DoCompute(c);
+      }
+    } else if (use_exclusive_lock_) {
       // If we're here, it means the input type is a ref.
       DCHECK(IsRefType(c->input_dtype(0)));
       // Hold mutex while we apply updates
@@ -105,6 +120,7 @@ class ScatterNdUpdateOp : public OpKernel {
   }
 
  private:
+  DataType dtype_;
   bool use_exclusive_lock_;
 
   void DoCompute(OpKernelContext* c) {
@@ -113,7 +129,20 @@ class ScatterNdUpdateOp : public OpKernel {
     Tensor params;
     TensorShape params_shape;
 
-    if (IsRefType(c->input_dtype(0))) {
+    if (dtype_ == DT_RESOURCE) {
+      Var* v;
+      OP_REQUIRES_OK(c, LookupResource(c, HandleFromInput(c, 0), &v));
+      Tensor* t = v->tensor();
+      if (!use_exclusive_lock_) {
+        // We're not holding the lock in the outer scope so need it here.
+        mutex_lock m(*v->mu());
+        OP_REQUIRES_OK(c, PrepareToUpdateVariable<Device, T>(c, t));
+      } else {
+        OP_REQUIRES_OK(c, PrepareToUpdateVariable<Device, T>(c, t));
+      }
+      params = *t;
+      params_shape = params.shape();
+    } else if (IsRefType(c->input_dtype(0))) {
       params = c->mutable_input(0, use_exclusive_lock_);
       params_shape = params.shape();
       c->forward_ref_input_to_ref_output(0, 0);
@@ -159,6 +188,16 @@ class ScatterNdUpdateOp : public OpKernel {
           .TypeConstraint<index_type>("Tindices"),                           \
       ScatterNdUpdateOp<dev##Device, type, index_type, op>)
 
+#define REGISTER_RESOURCE_SCATTER_ND_UPDATE_KERNEL_INDEX(type, index_type, \
+                                                         dev, name, op)    \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name(name)                                                           \
+          .Device(DEVICE_##dev)                                            \
+          .TypeConstraint<type>("T")                                       \
+          .TypeConstraint<index_type>("Tindices")                          \
+          .HostMemory("ref"),                                              \
+      ScatterNdUpdateOp<dev##Device, type, index_type, op>)
+
 #define REGISTER_SCATTER_ND_KERNEL(type, dev, name)         \
   REGISTER_SCATTER_ND_KERNEL_INDEX(type, int32, dev, name); \
   REGISTER_SCATTER_ND_KERNEL_INDEX(type, int64, dev, name)
@@ -167,6 +206,11 @@ class ScatterNdUpdateOp : public OpKernel {
   REGISTER_SCATTER_ND_UPDATE_KERNEL_INDEX(type, int32, dev, name, op); \
   REGISTER_SCATTER_ND_UPDATE_KERNEL_INDEX(type, int64, dev, name, op)
 
+#define REGISTER_RESOURCE_SCATTER_ND_UPDATE_KERNEL(type, dev, name, op)    \
+  REGISTER_RESOURCE_SCATTER_ND_UPDATE_KERNEL_INDEX(type, int32, dev, name, \
+                                                   op);                    \
+  REGISTER_RESOURCE_SCATTER_ND_UPDATE_KERNEL_INDEX(type, int64, dev, name, op)
+
 #define REGISTER_SCATTER_ND_ADD_SUB(type, dev)                            \
   REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdAdd",            \
                                     scatter_nd_op::UpdateOp::ADD);        \
@@ -178,9 +222,11 @@ class ScatterNdUpdateOp : public OpKernel {
 #define REGISTER_SCATTER_ND(type, dev) \
   REGISTER_SCATTER_ND_KERNEL(type, dev, "ScatterNd");
 
-#define REGISTER_SCATTER_ND_UPDATE(type, dev)                     \
-  REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdUpdate", \
-                                    scatter_nd_op::UpdateOp::ASSIGN);
+#define REGISTER_SCATTER_ND_UPDATE(type, dev)                         \
+  REGISTER_SCATTER_ND_UPDATE_KERNEL(type, dev, "ScatterNdUpdate",     \
+                                    scatter_nd_op::UpdateOp::ASSIGN); \
+  REGISTER_RESOURCE_SCATTER_ND_UPDATE_KERNEL(                         \
+      type, dev, "ResourceScatterNdUpdate", scatter_nd_op::UpdateOp::ASSIGN);
 
 // Registers CPU kernels.
 #define REGISTER_SCATTER_ND_ADD_SUB_CPU(type) \
@@ -281,8 +327,7 @@ Status ValidateUpdateShape(const TensorShape& params_shape,
 }
 
 template <typename Index>
-Status PrepareAndValidateInputs(OpKernelContext* c,
-                                const TensorShape& params_shape,
+Status PrepareAndValidateInputs(const TensorShape& params_shape,
                                 const Tensor& indices, const Tensor& updates,
                                 int64* slice_dim, Index* num_updates,
                                 Index* slice_size) {
@@ -396,7 +441,7 @@ Status DoScatterNd(OpKernelContext* c, const Tensor& indices,
   Index num_updates;
   Index slice_size;
   TF_RETURN_IF_ERROR(PrepareAndValidateInputs<Index>(
-      c, shape, indices, updates, &slice_dim, &num_updates, &slice_size));
+      shape, indices, updates, &slice_dim, &num_updates, &slice_size));
 
   IndexFlattener<Device, Index> index_flattener;
   auto indices_flat = index_flattener(c, indices);
@@ -442,6 +487,8 @@ Status DoScatterNd(OpKernelContext* c, const Tensor& indices,
       PARAMS_CASE(3);
       PARAMS_CASE(4);
       PARAMS_CASE(5);
+      PARAMS_CASE(6);
+      PARAMS_CASE(7);
 #undef PARAMS_CASE
       default:
         return errors::InvalidArgument(
@@ -480,7 +527,9 @@ namespace functor {
   DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 2); \
   DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 3); \
   DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 4); \
-  DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 5);
+  DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 5); \
+  DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 6); \
+  DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 7);
 
 #define DECLARE_GPU_SPECS_INDEX(T, Index)                                \
   DECLARE_GPU_SPECS_INDEX_OP(T, Index, scatter_nd_op::UpdateOp::ASSIGN); \
diff --git a/tensorflow/core/kernels/scatter_nd_op_cpu_impl_6.cc b/tensorflow/core/kernels/scatter_nd_op_cpu_impl_6.cc
new file mode 100644
index 0000000000000000000000000000000000000000..d98412e2551b5eacb9190838b922cadd26d7aaf2
--- /dev/null
+++ b/tensorflow/core/kernels/scatter_nd_op_cpu_impl_6.cc
@@ -0,0 +1,18 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define CPU_PROVIDED_IXDIM 6
+#include "tensorflow/core/kernels/scatter_nd_op_cpu_impl.h"
+#undef CPU_PROVIDED_IXDIM
diff --git a/tensorflow/core/kernels/scatter_nd_op_cpu_impl_7.cc b/tensorflow/core/kernels/scatter_nd_op_cpu_impl_7.cc
new file mode 100644
index 0000000000000000000000000000000000000000..a008b55603c060953015a463cf49f5768bde637a
--- /dev/null
+++ b/tensorflow/core/kernels/scatter_nd_op_cpu_impl_7.cc
@@ -0,0 +1,19 @@
+
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define CPU_PROVIDED_IXDIM 7
+#include "tensorflow/core/kernels/scatter_nd_op_cpu_impl.h"
+#undef CPU_PROVIDED_IXDIM
diff --git a/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc b/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc
index 0eb3cf32dd33705cffe4c37dbe91eb0ffc31563a..31f74671cabdabce2884fcae61a6e56dbfdefe8b 100644
--- a/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc
@@ -136,7 +136,9 @@ struct ScatterNdFunctor<GPUDevice, T, Index, op, IXDIM> {
   DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 2); \
   DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 3); \
   DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 4); \
-  DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 5);
+  DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 5); \
+  DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 6); \
+  DECLARE_GPU_SPECS_INDEX_OP_IXDIM(T, Index, op, 7);
 
 #define DECLARE_GPU_SPECS_INDEX(T, Index)                                \
   DECLARE_GPU_SPECS_INDEX_OP(T, Index, scatter_nd_op::UpdateOp::ASSIGN); \
diff --git a/tensorflow/core/kernels/sdca_internal.cc b/tensorflow/core/kernels/sdca_internal.cc
index 5042cfafc0ebd942508df92c25d3720a8a7f1b72..863c123b43f781239dab62e6b57719376fc49dad 100644
--- a/tensorflow/core/kernels/sdca_internal.cc
+++ b/tensorflow/core/kernels/sdca_internal.cc
@@ -21,10 +21,10 @@ limitations under the License.
 #include <random>
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/lib/math/math_util.h"
 #include "tensorflow/core/lib/random/simple_philox.h"
 
 namespace tensorflow {
-
 namespace sdca {
 
 using UnalignedFloatVector = TTypes<const float>::UnalignedConstVec;
@@ -278,7 +278,7 @@ Status Examples::SampleAdaptativeProbabilities(
   int num_retries = 0;
   while (id < num_examples() && num_retries < num_examples()) {
     int picked_id = sampler.Sample(&random);
-    if (dis(gen) > std::pow(0.1, sampled_count_[picked_id])) {
+    if (dis(gen) > MathUtil::IPow(0.1, sampled_count_[picked_id])) {
       num_retries++;
       continue;
     }
@@ -520,5 +520,4 @@ void Examples::ComputeSquaredNormPerExample(
 }
 
 }  // namespace sdca
-
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/segment_reduction_ops.cc b/tensorflow/core/kernels/segment_reduction_ops.cc
index 2334e50f1dcb08e9662615e83d721f8b08568102..3ef1cd1e062b5f5abecca2f4f788e3fed20e33e9 100644
--- a/tensorflow/core/kernels/segment_reduction_ops.cc
+++ b/tensorflow/core/kernels/segment_reduction_ops.cc
@@ -553,10 +553,11 @@ class SparseSegmentReductionOpBase : public OpKernel {
  public:
   explicit SparseSegmentReductionOpBase(OpKernelConstruction* context,
                                         bool is_mean, bool is_sqrtn,
-                                        T default_value)
+                                        bool has_num_segments, T default_value)
       : OpKernel(context),
         is_mean_(is_mean),
         is_sqrtn_(is_sqrtn),
+        has_num_segments_(has_num_segments),
         default_value_(default_value) {}
 
   void Compute(OpKernelContext* context) override {
@@ -564,6 +565,19 @@ class SparseSegmentReductionOpBase : public OpKernel {
     const Tensor& indices = context->input(1);
     const Tensor& segment_ids = context->input(2);
 
+    Index output_rows = -1;
+    if (has_num_segments_) {
+      const Tensor& num_segments = context->input(3);
+
+      OP_REQUIRES(
+          context, num_segments.shape().dims() == 0,
+          errors::InvalidArgument("num_segments should be a scalar, not shape ",
+                                  num_segments.shape().DebugString()));
+      output_rows = internal::SubtleMustCopy(num_segments.scalar<int32>()());
+      OP_REQUIRES(context, output_rows >= 0,
+                  errors::InvalidArgument("segment ids must be >= 0"));
+    }
+
     OP_REQUIRES(context, TensorShapeUtils::IsVector(indices.shape()),
                 errors::InvalidArgument("indices should be a vector."));
     OP_REQUIRES(context, TensorShapeUtils::IsVector(segment_ids.shape()),
@@ -581,10 +595,17 @@ class SparseSegmentReductionOpBase : public OpKernel {
     const auto segment_vec = segment_ids.vec<OutputRow>();
     // Note that the current implementation assumes that segment_vec values are
     // sorted.
-    const OutputRow output_rows =
+    const OutputRow last_segment_id_plus_one =
         num_indices > 0
             ? internal::SubtleMustCopy(segment_vec(num_indices - 1)) + 1
             : 0;
+    if (has_num_segments_) {
+      OP_REQUIRES(
+          context, output_rows >= last_segment_id_plus_one,
+          errors::InvalidArgument("segment ids must be < num_segments"));
+    } else {
+      output_rows = last_segment_id_plus_one;
+    }
     OP_REQUIRES(context, output_rows >= 0,
                 errors::InvalidArgument("segment ids must be >= 0"));
 
@@ -646,11 +667,20 @@ class SparseSegmentReductionOpBase : public OpKernel {
                       indices_vec(start + bad_offset), " out of range [0, ",
                       input_flat.dimension(0), ")"));
 
-      if (end >= num_indices) break;
       start = end;
       ++end;
       uninitialized_index = out_index + 1;
       out_index = next_index;
+      if (end > num_indices) break;
+    }
+
+    // Fill the gap at the end with the default value.
+    if (uninitialized_index < output_rows) {
+      Eigen::DSizes<Eigen::DenseIndex, 2> gap_slice_shape(
+          output_rows - uninitialized_index, num_col);
+      Eigen::TensorMap<Eigen::Tensor<T, 2, Eigen::RowMajor>, Eigen::Unaligned>
+          gap_slice(&output_flat(uninitialized_index, 0), gap_slice_shape);
+      gap_slice.setConstant(default_value_);
     }
   }
 
@@ -786,6 +816,7 @@ class SparseSegmentReductionOpBase : public OpKernel {
 
   const bool is_mean_;
   const bool is_sqrtn_;
+  const bool has_num_segments_;
   const T default_value_;
 };
 
@@ -794,9 +825,20 @@ class SparseSegmentReductionMeanOp
     : public SparseSegmentReductionOpBase<Device, T> {
  public:
   explicit SparseSegmentReductionMeanOp(OpKernelConstruction* context)
-      : SparseSegmentReductionOpBase<Device, T>(context, true /*is_mean*/,
-                                                false /*is_sqrtn*/,
-                                                T(0) /* default_value */) {}
+      : SparseSegmentReductionOpBase<Device, T>(
+            context, true /*is_mean*/, false /*is_sqrtn*/,
+            false /* has_num_segments */, T(0) /* default_value */) {}
+};
+
+template <typename Device, class T>
+class SparseSegmentReductionMeanWithNumSegmentsOp
+    : public SparseSegmentReductionOpBase<Device, T> {
+ public:
+  explicit SparseSegmentReductionMeanWithNumSegmentsOp(
+      OpKernelConstruction* context)
+      : SparseSegmentReductionOpBase<Device, T>(
+            context, true /*is_mean*/, false /*is_sqrtn*/,
+            true /* has_num_segments */, T(0) /* default_value */) {}
 };
 
 template <typename Device, class T>
@@ -804,9 +846,20 @@ class SparseSegmentReductionSqrtNOp
     : public SparseSegmentReductionOpBase<Device, T> {
  public:
   explicit SparseSegmentReductionSqrtNOp(OpKernelConstruction* context)
-      : SparseSegmentReductionOpBase<Device, T>(context, false /*is_mean*/,
-                                                true /*is_sqrtn*/,
-                                                T(0) /* default_value */) {}
+      : SparseSegmentReductionOpBase<Device, T>(
+            context, false /*is_mean*/, true /*is_sqrtn*/,
+            false /* has_num_segments */, T(0) /* default_value */) {}
+};
+
+template <typename Device, class T>
+class SparseSegmentReductionSqrtNWithNumSegmentsOp
+    : public SparseSegmentReductionOpBase<Device, T> {
+ public:
+  explicit SparseSegmentReductionSqrtNWithNumSegmentsOp(
+      OpKernelConstruction* context)
+      : SparseSegmentReductionOpBase<Device, T>(
+            context, false /*is_mean*/, true /*is_sqrtn*/,
+            true /* has_num_segments */, T(0) /* default_value */) {}
 };
 
 template <typename Device, class T>
@@ -814,37 +867,65 @@ class SparseSegmentReductionSumOp
     : public SparseSegmentReductionOpBase<Device, T> {
  public:
   explicit SparseSegmentReductionSumOp(OpKernelConstruction* context)
-      : SparseSegmentReductionOpBase<Device, T>(context, false /*is_mean*/,
-                                                false /*is_sqrtn*/,
-                                                T(0) /* default_value */) {}
+      : SparseSegmentReductionOpBase<Device, T>(
+            context, false /*is_mean*/, false /*is_sqrtn*/,
+            false /* has_num_segments */, T(0) /* default_value */) {}
 };
 
-#define REGISTER_CPU_SPARSE_KERNELS(type)                     \
-  REGISTER_KERNEL_BUILDER(Name("SparseSegmentSum")            \
-                              .Device(DEVICE_CPU)             \
-                              .TypeConstraint<type>("T")      \
-                              .TypeConstraint<int32>("Tidx"), \
-                          SparseSegmentReductionSumOp<CPUDevice, type>);
+template <typename Device, class T>
+class SparseSegmentReductionSumWithNumSegmentsOp
+    : public SparseSegmentReductionOpBase<Device, T> {
+ public:
+  explicit SparseSegmentReductionSumWithNumSegmentsOp(
+      OpKernelConstruction* context)
+      : SparseSegmentReductionOpBase<Device, T>(
+            context, false /*is_mean*/, false /*is_sqrtn*/,
+            true /* has_num_segments */, T(0) /* default_value */) {}
+};
 
+#define REGISTER_CPU_SPARSE_KERNELS(type)                                \
+  REGISTER_KERNEL_BUILDER(Name("SparseSegmentSum")                       \
+                              .Device(DEVICE_CPU)                        \
+                              .TypeConstraint<type>("T")                 \
+                              .TypeConstraint<int32>("Tidx"),            \
+                          SparseSegmentReductionSumOp<CPUDevice, type>); \
+  REGISTER_KERNEL_BUILDER(                                               \
+      Name("SparseSegmentSumWithNumSegments")                            \
+          .Device(DEVICE_CPU)                                            \
+          .TypeConstraint<type>("T")                                     \
+          .TypeConstraint<int32>("Tidx"),                                \
+      SparseSegmentReductionSumWithNumSegmentsOp<CPUDevice, type>);
 TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_SPARSE_KERNELS);
 #undef REGISTER_CPU_SPARSE_KERNELS
 
-#define REGISTER_CPU_SPARSE_KERNELS(type)                     \
-  REGISTER_KERNEL_BUILDER(Name("SparseSegmentMean")           \
-                              .Device(DEVICE_CPU)             \
-                              .TypeConstraint<type>("T")      \
-                              .TypeConstraint<int32>("Tidx"), \
-                          SparseSegmentReductionMeanOp<CPUDevice, type>);
+#define REGISTER_CPU_SPARSE_KERNELS(type)                                 \
+  REGISTER_KERNEL_BUILDER(Name("SparseSegmentMean")                       \
+                              .Device(DEVICE_CPU)                         \
+                              .TypeConstraint<type>("T")                  \
+                              .TypeConstraint<int32>("Tidx"),             \
+                          SparseSegmentReductionMeanOp<CPUDevice, type>); \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("SparseSegmentMeanWithNumSegments")                            \
+          .Device(DEVICE_CPU)                                             \
+          .TypeConstraint<type>("T")                                      \
+          .TypeConstraint<int32>("Tidx"),                                 \
+      SparseSegmentReductionMeanWithNumSegmentsOp<CPUDevice, type>);
 REGISTER_CPU_SPARSE_KERNELS(float);
 REGISTER_CPU_SPARSE_KERNELS(double);
 #undef REGISTER_CPU_SPARSE_KERNELS
 
-#define REGISTER_CPU_SPARSE_KERNELS(type)                     \
-  REGISTER_KERNEL_BUILDER(Name("SparseSegmentSqrtN")          \
-                              .Device(DEVICE_CPU)             \
-                              .TypeConstraint<type>("T")      \
-                              .TypeConstraint<int32>("Tidx"), \
-                          SparseSegmentReductionSqrtNOp<CPUDevice, type>);
+#define REGISTER_CPU_SPARSE_KERNELS(type)                                  \
+  REGISTER_KERNEL_BUILDER(Name("SparseSegmentSqrtN")                       \
+                              .Device(DEVICE_CPU)                          \
+                              .TypeConstraint<type>("T")                   \
+                              .TypeConstraint<int32>("Tidx"),              \
+                          SparseSegmentReductionSqrtNOp<CPUDevice, type>); \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("SparseSegmentSqrtNWithNumSegments")                            \
+          .Device(DEVICE_CPU)                                              \
+          .TypeConstraint<type>("T")                                       \
+          .TypeConstraint<int32>("Tidx"),                                  \
+      SparseSegmentReductionSqrtNWithNumSegmentsOp<CPUDevice, type>);
 REGISTER_CPU_SPARSE_KERNELS(float);
 REGISTER_CPU_SPARSE_KERNELS(double);
 #undef REGISTER_CPU_SPARSE_KERNELS
@@ -889,9 +970,10 @@ class SparseSegmentGradOpBase : public OpKernel {
 
     // Note that similar to SparseSegmentMean, we assume that segment_vec is
     // already sorted and has non-negative values.
-    const SegmentId num_segments =
+    const SegmentId num_segments = input.dim_size(0);
+    const SegmentId last_segment_id_plus_one =
         internal::SubtleMustCopy(segment_vec(N - 1)) + 1;
-    OP_REQUIRES(context, input.dim_size(0) == num_segments,
+    OP_REQUIRES(context, last_segment_id_plus_one <= num_segments,
                 errors::InvalidArgument("Invalid number of segments"));
 
     // Compute scaling factors for input.
diff --git a/tensorflow/core/kernels/serialize_sparse_op.cc b/tensorflow/core/kernels/serialize_sparse_op.cc
index cfb86904d573cd7577fc7dca0d48a6d146ceb058..61e40caef99c019914fc331bee5d8beab0883f41 100644
--- a/tensorflow/core/kernels/serialize_sparse_op.cc
+++ b/tensorflow/core/kernels/serialize_sparse_op.cc
@@ -27,23 +27,31 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.pb.h"
 #include "tensorflow/core/framework/tensor_util.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/variant.h"
+#include "tensorflow/core/framework/variant_encode_decode.h"
 #include "tensorflow/core/kernels/reshape_util.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
+#include "tensorflow/core/lib/gtl/optional.h"
 #include "tensorflow/core/util/sparse/sparse_tensor.h"
 
 namespace tensorflow {
 
 using sparse::SparseTensor;
 
+template <typename T>
 class SerializeSparseOp : public OpKernel {
  public:
   explicit SerializeSparseOp(OpKernelConstruction* context)
       : OpKernel(context) {}
 
+  Status Initialize(Tensor* result);
+  Status Serialize(const Tensor& input, T* result);
+
   void Compute(OpKernelContext* context) override {
     const Tensor* input_indices;
     const Tensor* input_values;
     const Tensor* input_shape;
+
     OP_REQUIRES_OK(context, context->input("sparse_indices", &input_indices));
     OP_REQUIRES_OK(context, context->input("sparse_values", &input_values));
     OP_REQUIRES_OK(context, context->input("sparse_shape", &input_shape));
@@ -62,34 +70,75 @@ class SerializeSparseOp : public OpKernel {
                     "Input shape should be a vector but received shape ",
                     input_shape->shape().DebugString()));
 
-    TensorProto proto_indices;
-    TensorProto proto_values;
-    TensorProto proto_shape;
-
-    input_indices->AsProtoTensorContent(&proto_indices);
-    input_values->AsProtoTensorContent(&proto_values);
-    input_shape->AsProtoTensorContent(&proto_shape);
+    Tensor serialized_sparse;
+    OP_REQUIRES_OK(context, Initialize(&serialized_sparse));
 
-    Tensor serialized_sparse(DT_STRING, TensorShape({3}));
-    auto serialized_sparse_t = serialized_sparse.vec<string>();
-
-    serialized_sparse_t(0) = proto_indices.SerializeAsString();
-    serialized_sparse_t(1) = proto_values.SerializeAsString();
-    serialized_sparse_t(2) = proto_shape.SerializeAsString();
+    auto serialized_sparse_t = serialized_sparse.vec<T>();
+    OP_REQUIRES_OK(context, Serialize(*input_indices, &serialized_sparse_t(0)));
+    OP_REQUIRES_OK(context, Serialize(*input_values, &serialized_sparse_t(1)));
+    OP_REQUIRES_OK(context, Serialize(*input_shape, &serialized_sparse_t(2)));
 
     context->set_output(0, serialized_sparse);
   }
 };
 
-REGISTER_KERNEL_BUILDER(Name("SerializeSparse").Device(DEVICE_CPU),
-                        SerializeSparseOp);
+template <>
+Status SerializeSparseOp<string>::Initialize(Tensor* result) {
+  *result = Tensor(DT_STRING, TensorShape({3}));
+  return Status::OK();
+}
+
+template <>
+Status SerializeSparseOp<string>::Serialize(const Tensor& input,
+                                            string* result) {
+  TensorProto proto;
+  input.AsProtoTensorContent(&proto);
+  *result = proto.SerializeAsString();
+  return Status::OK();
+}
+
+REGISTER_KERNEL_BUILDER(Name("SerializeSparse")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<string>("out_type"),
+                        SerializeSparseOp<string>);
+
+template <>
+Status SerializeSparseOp<Variant>::Initialize(Tensor* result) {
+  *result = Tensor(DT_VARIANT, TensorShape({3}));
+  return Status::OK();
+}
+
+template <>
+Status SerializeSparseOp<Variant>::Serialize(const Tensor& input,
+                                             Variant* result) {
+  *result = input;
+  return Status::OK();
+}
+
+REGISTER_KERNEL_BUILDER(Name("SerializeSparse")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<Variant>("out_type"),
+                        SerializeSparseOp<Variant>);
 
 template <typename T>
-class SerializeManySparseOp : public OpKernel {
+class SerializeManySparseOpBase : public OpKernel {
  public:
-  explicit SerializeManySparseOp(OpKernelConstruction* context)
+  explicit SerializeManySparseOpBase(OpKernelConstruction* context)
       : OpKernel(context) {}
 
+  void Compute(OpKernelContext* context) override {}
+
+ protected:
+  Status Initialize(const int64 n, Tensor* result);
+  Status Serialize(const Tensor& input, T* result);
+};
+
+template <typename T, typename U>
+class SerializeManySparseOp : public SerializeManySparseOpBase<U> {
+ public:
+  explicit SerializeManySparseOp(OpKernelConstruction* context)
+      : SerializeManySparseOpBase<U>(context) {}
+
   void Compute(OpKernelContext* context) override {
     const Tensor* input_indices;
     const Tensor* input_values;
@@ -127,37 +176,31 @@ class SerializeManySparseOp : public OpKernel {
 
     auto input_shape_t = input_shape->vec<int64>();
     const int64 N = input_shape_t(0);
-
-    Tensor serialized_sparse(DT_STRING, TensorShape({N, 3}));
-    auto serialized_sparse_t = serialized_sparse.matrix<string>();
+    Tensor serialized_sparse;
+    OP_REQUIRES_OK(context, this->Initialize(N, &serialized_sparse));
+    auto serialized_sparse_t = serialized_sparse.matrix<U>();
 
     OP_REQUIRES_OK(context, input_st.IndicesValid());
 
-    // We can generate the output shape proto string now, for all
-    // minibatch entries.
-    Tensor output_shape(DT_INT64, {rank - 1});
-    auto output_shape_t = output_shape.vec<int64>();
-    for (int d = 1; d < rank; d++) output_shape_t(d - 1) = input_shape_t(d);
-    TensorProto proto_shape;
-    output_shape.AsProtoTensorContent(&proto_shape);
-    const string proto_shape_string = proto_shape.SerializeAsString();
-
+    // Initialize output with empty values and the proper shapes.
     Tensor output_blank_indices(DT_INT64, {0, rank - 1});
-    Tensor output_blank_values(DataTypeToEnum<T>::value, {0});
-    TensorProto proto_blank_indices;
-    TensorProto proto_blank_values;
-    output_blank_indices.AsProtoTensorContent(&proto_blank_indices);
-    output_blank_values.AsProtoTensorContent(&proto_blank_values);
+    U serialized_indices;
+    OP_REQUIRES_OK(context,
+                   this->Serialize(output_blank_indices, &serialized_indices));
+    serialized_sparse_t.template chip<1>(0).setConstant(serialized_indices);
 
-    const string proto_blank_indices_string =
-        proto_blank_indices.SerializeAsString();
-    const string proto_blank_values_string =
-        proto_blank_values.SerializeAsString();
+    Tensor output_blank_values(DataTypeToEnum<T>::value, {0});
+    U serialized_values;
+    OP_REQUIRES_OK(context,
+                   this->Serialize(output_blank_values, &serialized_values));
+    serialized_sparse_t.template chip<1>(1).setConstant(serialized_values);
 
-    // Initialize output with empty values and the proper shapes.
-    serialized_sparse_t.chip<1>(0).setConstant(proto_blank_indices_string);
-    serialized_sparse_t.chip<1>(1).setConstant(proto_blank_values_string);
-    serialized_sparse_t.chip<1>(2).setConstant(proto_shape_string);
+    Tensor output_shape(DT_INT64, {rank - 1});
+    auto output_shape_t = output_shape.vec<int64>();
+    for (int d = 1; d < rank; d++) output_shape_t(d - 1) = input_shape_t(d);
+    U serialized_shape;
+    OP_REQUIRES_OK(context, this->Serialize(output_shape, &serialized_shape));
+    serialized_sparse_t.template chip<1>(2).setConstant(serialized_shape);
 
     // Get groups by minibatch dimension
     sparse::GroupIterable minibatch = input_st.group({0});
@@ -186,24 +229,62 @@ class SerializeManySparseOp : public OpKernel {
         output_values_t(i) = values(i);
       }
 
-      TensorProto proto_indices;
-      TensorProto proto_values;
-      output_indices.AsProtoTensorContent(&proto_indices);
-      output_values.AsProtoTensorContent(&proto_values);
-
-      serialized_sparse_t(b, 0) = proto_indices.SerializeAsString();
-      serialized_sparse_t(b, 1) = proto_values.SerializeAsString();
+      OP_REQUIRES_OK(
+          context, this->Serialize(output_indices, &serialized_sparse_t(b, 0)));
+      OP_REQUIRES_OK(
+          context, this->Serialize(output_values, &serialized_sparse_t(b, 1)));
     }
 
     context->set_output(0, serialized_sparse);
   }
 };
 
-#define REGISTER_KERNELS(type)                            \
-  REGISTER_KERNEL_BUILDER(Name("SerializeManySparse")     \
-                              .Device(DEVICE_CPU)         \
-                              .TypeConstraint<type>("T"), \
-                          SerializeManySparseOp<type>)
+template <>
+Status SerializeManySparseOpBase<string>::Initialize(const int64 n,
+                                                     Tensor* result) {
+  *result = Tensor(DT_STRING, TensorShape({n, 3}));
+  return Status::OK();
+}
+
+template <>
+Status SerializeManySparseOpBase<string>::Serialize(const Tensor& input,
+                                                    string* result) {
+  TensorProto proto;
+  input.AsProtoTensorContent(&proto);
+  *result = proto.SerializeAsString();
+  return Status::OK();
+}
+
+#define REGISTER_KERNELS(type)                                     \
+  REGISTER_KERNEL_BUILDER(Name("SerializeManySparse")              \
+                              .Device(DEVICE_CPU)                  \
+                              .TypeConstraint<type>("T")           \
+                              .TypeConstraint<string>("out_type"), \
+                          SerializeManySparseOp<type, string>)
+
+TF_CALL_ALL_TYPES(REGISTER_KERNELS);
+#undef REGISTER_KERNELS
+
+template <>
+Status SerializeManySparseOpBase<Variant>::Initialize(const int64 n,
+                                                      Tensor* result) {
+  *result = Tensor(DT_VARIANT, TensorShape({n, 3}));
+  return Status::OK();
+}
+
+template <>
+Status SerializeManySparseOpBase<Variant>::Serialize(const Tensor& input,
+                                                     Variant* result) {
+  *result = input;
+  return Status::OK();
+}
+
+#define REGISTER_KERNELS(type)                                      \
+  REGISTER_KERNEL_BUILDER(Name("SerializeManySparse")               \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<type>("T")            \
+                              .TypeConstraint<Variant>("out_type"), \
+                          SerializeManySparseOp<type, Variant>)
 
 TF_CALL_ALL_TYPES(REGISTER_KERNELS);
 #undef REGISTER_KERNELS
@@ -212,7 +293,9 @@ template <typename T>
 class DeserializeSparseOp : public OpKernel {
  public:
   explicit DeserializeSparseOp(OpKernelConstruction* context)
-      : OpKernel(context) {}
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("dtype", &dtype_));
+  }
 
   void Compute(OpKernelContext* context) override {
     const Tensor& serialized_sparse = context->input(0);
@@ -240,86 +323,43 @@ class DeserializeSparseOp : public OpKernel {
             "but has a zero dimension ",
             serialized_sparse.shape().DebugString()));
 
+    if (num_sparse_tensors == 0 && serialized_sparse.shape().dims() == 1) {
+      // Special case with a single sparse tensor. We can avoid data
+      // motion in the Concat and Reshape.
+      const auto& serialized_sparse_t = serialized_sparse.vec<T>();
+
+      Tensor output_indices;
+      Tensor output_values;
+      Tensor output_shape;
+      OP_REQUIRES_OK(context,
+                     this->GetAndValidateSparseTensor(
+                         serialized_sparse_t(0), serialized_sparse_t(1),
+                         serialized_sparse_t(2), dtype_, 0 /* index */,
+                         &output_indices, &output_values, &output_shape));
+      context->set_output(0, output_indices);
+      context->set_output(1, output_values);
+      context->set_output(2, output_shape);
+      return;
+    }
+
     std::vector<Tensor> indices;
     std::vector<Tensor> values;
     TensorShape shape;
     indices.reserve(num_sparse_tensors);
     values.reserve(num_sparse_tensors);
 
-    const auto& serialized_sparse_t =
-        serialized_sparse.flat_inner_dims<string, 2>();
-
+    const auto& serialized_sparse_t = serialized_sparse.flat_inner_dims<T, 2>();
     for (int i = 0; i < num_sparse_tensors; ++i) {
-      Tensor output_indices(DT_INT64);
-      Tensor output_values(DataTypeToEnum<T>::value);
-      Tensor output_shape(DT_INT64);
-      TensorProto proto_indices;
-      TensorProto proto_values;
-      TensorProto proto_shape;
-
-      OP_REQUIRES(
-          context,
-          ParseProtoUnlimited(&proto_indices, serialized_sparse_t(i, 0)),
-          errors::InvalidArgument("Could not parse serialized_sparse[", i,
-                                  ", 0]"));
-      OP_REQUIRES(context,
-                  ParseProtoUnlimited(&proto_values, serialized_sparse_t(i, 1)),
-                  errors::InvalidArgument("Could not parse serialized_sparse[",
-                                          i, ", 1]"));
-      OP_REQUIRES(context,
-                  ParseProtoUnlimited(&proto_shape, serialized_sparse_t(i, 2)),
-                  errors::InvalidArgument("Could not parse serialized_sparse[",
-                                          i, ", 2]"));
-
-      OP_REQUIRES(context, output_indices.FromProto(proto_indices),
-                  errors::InvalidArgument(
-                      "Could not construct Tensor serialized_sparse[", i,
-                      ", 0] (indices)"));
-      OP_REQUIRES(context, TensorShapeUtils::IsMatrix(output_indices.shape()),
-                  errors::InvalidArgument(
-                      "Expected serialized_sparse[", i,
-                      ", 0] to represent an index matrix but received shape ",
-                      output_indices.shape().DebugString()));
-      OP_REQUIRES(context, output_values.FromProto(proto_values),
-                  errors::InvalidArgument(
-                      "Could not construct Tensor serialized_sparse[", i,
-                      ", 1] (values)"));
-      OP_REQUIRES(context, TensorShapeUtils::IsVector(output_values.shape()),
-                  errors::InvalidArgument(
-                      "Expected serialized_sparse[", i,
-                      ", 1] to represent a values vector but received shape ",
-                      output_values.shape().DebugString()));
-      OP_REQUIRES(context, output_shape.FromProto(proto_shape),
-                  errors::InvalidArgument(
-                      "Could not construct Tensor serialized_sparse[", i,
-                      ", 2] (shape)"));
-      OP_REQUIRES(
-          context, TensorShapeUtils::IsVector(output_shape.shape()),
-          errors::InvalidArgument("Expected serialized_sparse[", i,
-                                  ", 1] to be a shape vector but its shape is ",
-                                  output_shape.shape().DebugString()));
-
-      OP_REQUIRES(
-          context, DataTypeToEnum<T>::value == output_values.dtype(),
-          errors::InvalidArgument(
-              "Requested SparseTensor of type ",
-              DataTypeString(DataTypeToEnum<T>::value), " but SparseTensor[", i,
-              "].values.dtype() == ", DataTypeString(output_values.dtype())));
-
+      Tensor output_indices;
+      Tensor output_values;
+      Tensor output_shape;
+      OP_REQUIRES_OK(context,
+                     this->GetAndValidateSparseTensor(
+                         serialized_sparse_t(i, 0), serialized_sparse_t(i, 1),
+                         serialized_sparse_t(i, 2), dtype_, i, &output_indices,
+                         &output_values, &output_shape));
       int64 num_entries = output_indices.dim_size(0);
-      OP_REQUIRES(context, num_entries == output_values.dim_size(0),
-                  errors::InvalidArgument(
-                      "Expected row counts of SparseTensor[", i,
-                      "].indices and SparseTensor[", i,
-                      "].values to match but they do not: ", num_entries,
-                      " vs. ", output_values.dim_size(0)));
       int rank = output_indices.dim_size(1);
-      OP_REQUIRES(
-          context, rank == output_shape.dim_size(0),
-          errors::InvalidArgument("Expected column counts of SparseTensor[", i,
-                                  "].indices to match size of SparseTensor[", i,
-                                  "].shape but they do not: ", rank, " vs. ",
-                                  output_shape.dim_size(0)));
 
       // Now we expand each SparseTensors' indices and shape by
       // prefixing a dimension
@@ -376,7 +416,25 @@ class DeserializeSparseOp : public OpKernel {
       tensors.emplace_back(indices[i], values[i], shape, std_order);
     }
 
-    SparseTensor output = SparseTensor::Concat<T>(tensors);
+    gtl::optional<SparseTensor> maybe_output;
+#define HANDLE_TYPE(T)                               \
+  case DataTypeToEnum<T>::value: {                   \
+    maybe_output = SparseTensor::Concat<T>(tensors); \
+    break;                                           \
+  }
+
+    switch (dtype_) {
+      TF_CALL_ALL_TYPES(HANDLE_TYPE);
+      TF_CALL_QUANTIZED_TYPES(HANDLE_TYPE);
+      TF_CALL_variant(HANDLE_TYPE);
+#undef HANDLE_TYPE
+      default:
+        OP_REQUIRES(context, false,
+                    errors::Unimplemented(
+                        "DeserializeSparse Unhandled data type: ", dtype_));
+    }
+    DCHECK(maybe_output);
+    SparseTensor& output = maybe_output.value();
 
     // Compute the input shape for the reshape operation.
     Tensor input_shape(DT_INT64, TensorShape({output.dims()}));
@@ -398,198 +456,101 @@ class DeserializeSparseOp : public OpKernel {
             0 /* output indices index */, 2 /* output shape index */);
     context->set_output(1, output.values());
   }
-};
-
-#define REGISTER_KERNELS(type)                                \
-  REGISTER_KERNEL_BUILDER(Name("DeserializeSparse")           \
-                              .Device(DEVICE_CPU)             \
-                              .TypeConstraint<type>("dtype"), \
-                          DeserializeSparseOp<type>)
-
-TF_CALL_ALL_TYPES(REGISTER_KERNELS);
-#undef REGISTER_KERNELS
-
-template <typename T>
-class DeserializeManySparseOp : public OpKernel {
- public:
-  explicit DeserializeManySparseOp(OpKernelConstruction* context)
-      : OpKernel(context) {}
-
-  void Compute(OpKernelContext* context) override {
-    const Tensor& serialized_sparse = context->input(0);
-    OP_REQUIRES(context, TensorShapeUtils::IsMatrix(serialized_sparse.shape()),
-                errors::InvalidArgument(
-                    "Serialized sparse should be a matrix but received shape ",
-                    serialized_sparse.shape().DebugString()));
-    OP_REQUIRES(
-        context, serialized_sparse.shape().dim_size(1) == 3,
-        errors::InvalidArgument(
-            "Serialized sparse should have 3 columns but received shape ",
-            serialized_sparse.shape().DebugString()));
-
-    int num_sparse_tensors = serialized_sparse.shape().dim_size(0);
-
-    OP_REQUIRES(
-        context, num_sparse_tensors > 0,
-        errors::InvalidArgument("Must have at least 1 serialized SparseTensor, "
-                                "but input matrix has 0 rows"));
-
-    std::vector<Tensor> indices_to_concat;
-    std::vector<Tensor> values_to_concat;
-    std::vector<TensorShape> shapes_to_concat;
-
-    const auto& serialized_sparse_t = serialized_sparse.matrix<string>();
-
-    for (int i = 0; i < num_sparse_tensors; ++i) {
-      Tensor output_indices(DT_INT64);
-      Tensor output_values(DataTypeToEnum<T>::value);
-      Tensor output_shape(DT_INT64);
-      TensorProto proto_indices;
-      TensorProto proto_values;
-      TensorProto proto_shape;
-
-      OP_REQUIRES(
-          context,
-          ParseProtoUnlimited(&proto_indices, serialized_sparse_t(i, 0)),
-          errors::InvalidArgument("Could not parse serialized_sparse[", i,
-                                  ", 0]"));
-      OP_REQUIRES(context,
-                  ParseProtoUnlimited(&proto_values, serialized_sparse_t(i, 1)),
-                  errors::InvalidArgument("Could not parse serialized_sparse[",
-                                          i, ", 1]"));
-      OP_REQUIRES(context,
-                  ParseProtoUnlimited(&proto_shape, serialized_sparse_t(i, 2)),
-                  errors::InvalidArgument("Could not parse serialized_sparse[",
-                                          i, ", 2]"));
-
-      OP_REQUIRES(context, output_indices.FromProto(proto_indices),
-                  errors::InvalidArgument(
-                      "Could not construct Tensor serialized_sparse[", i,
-                      ", 0] (indices)"));
-      OP_REQUIRES(context, TensorShapeUtils::IsMatrix(output_indices.shape()),
-                  errors::InvalidArgument(
-                      "Expected serialized_sparse[", i,
-                      ", 0] to represent an index matrix but received shape ",
-                      output_indices.shape().DebugString()));
-      OP_REQUIRES(context, output_values.FromProto(proto_values),
-                  errors::InvalidArgument(
-                      "Could not construct Tensor serialized_sparse[", i,
-                      ", 1] (values)"));
-      OP_REQUIRES(context, TensorShapeUtils::IsVector(output_values.shape()),
-                  errors::InvalidArgument(
-                      "Expected serialized_sparse[", i,
-                      ", 1] to represent a values vector but received shape ",
-                      output_values.shape().DebugString()));
-      OP_REQUIRES(context, output_shape.FromProto(proto_shape),
-                  errors::InvalidArgument(
-                      "Could not construct Tensor serialized_sparse[", i,
-                      ", 2] (shape)"));
-      OP_REQUIRES(
-          context, TensorShapeUtils::IsVector(output_shape.shape()),
-          errors::InvalidArgument("Expected serialized_sparse[", i,
-                                  ", 1] to be a shape vector but its shape is ",
-                                  output_shape.shape().DebugString()));
-
-      OP_REQUIRES(
-          context, DataTypeToEnum<T>::value == output_values.dtype(),
-          errors::InvalidArgument(
-              "Requested SparseTensor of type ",
-              DataTypeString(DataTypeToEnum<T>::value), " but SparseTensor[", i,
-              "].values.dtype() == ", DataTypeString(output_values.dtype())));
-
-      int64 num_entries = output_indices.dim_size(0);
-      OP_REQUIRES(context, num_entries == output_values.dim_size(0),
-                  errors::InvalidArgument(
-                      "Expected row counts of SparseTensor[", i,
-                      "].indices and SparseTensor[", i,
-                      "].values to match but they do not: ", num_entries,
-                      " vs. ", output_values.dim_size(0)));
-      int rank = output_indices.dim_size(1);
-      OP_REQUIRES(
-          context, rank == output_shape.dim_size(0),
-          errors::InvalidArgument("Expected column counts of SparseTensor[", i,
-                                  "].indices to match size of SparseTensor[", i,
-                                  "].shape "
-                                  "but they do not: ",
-                                  rank, " vs. ", output_shape.dim_size(0)));
 
-      // Now we expand each SparseTensors' indices and shape by
-      // prefixing a dimension
-      Tensor expanded_indices(
-          DT_INT64, TensorShape({num_entries, 1 + output_indices.dim_size(1)}));
-      Tensor expanded_shape(DT_INT64,
-                            TensorShape({1 + output_shape.dim_size(0)}));
-      const auto& output_indices_t = output_indices.matrix<int64>();
-      const auto& output_shape_t = output_shape.vec<int64>();
-      auto expanded_indices_t = expanded_indices.matrix<int64>();
-      auto expanded_shape_t = expanded_shape.vec<int64>();
-      expanded_indices_t.chip<1>(0).setZero();
-      Eigen::DSizes<Eigen::DenseIndex, 2> indices_start(0, 1);
-      Eigen::DSizes<Eigen::DenseIndex, 2> indices_sizes(num_entries, rank);
-      expanded_indices_t.slice(indices_start, indices_sizes) = output_indices_t;
-      expanded_shape_t(0) = 1;
-      std::copy_n(&output_shape_t(0), rank, &expanded_shape_t(1));
-
-      TensorShape expanded_tensor_shape(expanded_shape.vec<int64>());
-
-      indices_to_concat.push_back(expanded_indices);
-      values_to_concat.push_back(output_values);
-      shapes_to_concat.push_back(expanded_tensor_shape);
+ protected:
+  Status Deserialize(const T& serialized, Tensor* result);
+
+  Status GetAndValidateSparseTensor(
+      const T& serialized_indices, const T& serialized_values,
+      const T& serialized_shape, DataType values_dtype, int index,
+      Tensor* output_indices, Tensor* output_values, Tensor* output_shape) {
+    // Deserialize and validate the indices.
+    TF_RETURN_IF_ERROR(this->Deserialize(serialized_indices, output_indices));
+    if (!TensorShapeUtils::IsMatrix(output_indices->shape())) {
+      return errors::InvalidArgument(
+          "Expected serialized_sparse[", index,
+          ", 0] to represent an index matrix but received shape ",
+          output_indices->shape().DebugString());
     }
-
-    int rank = -1;
-    for (int i = 0; i < num_sparse_tensors; ++i) {
-      if (rank < 0) rank = shapes_to_concat[i].dims();
-      OP_REQUIRES(context, rank == shapes_to_concat[i].dims(),
-                  errors::InvalidArgument(
-                      "Inconsistent rank across SparseTensors: rank prior to "
-                      "SparseTensor[",
-                      i, "] was: ", rank, " but rank of SparseTensor[", i,
-                      "] is: ", shapes_to_concat[i].dims()));
+    int64 num_entries = output_indices->dim_size(0);
+    int rank = output_indices->dim_size(1);
+
+    // Deserialize and validate the values.
+    TF_RETURN_IF_ERROR(this->Deserialize(serialized_values, output_values));
+    if (!TensorShapeUtils::IsVector(output_values->shape())) {
+      return errors::InvalidArgument(
+          "Expected serialized_sparse[", index,
+          ", 1] to represent a values vector but received shape ",
+          output_values->shape().DebugString());
     }
-
-    // SparseTensor::Concat requires consistent shape for all but the
-    // primary order dimension (dimension 0 in this case).  So we get
-    // the maximum value across all the input SparseTensors for each
-    // dimension and use that.
-    TensorShape preconcat_shape(shapes_to_concat[0]);
-    for (int i = 0; i < num_sparse_tensors; ++i) {
-      for (int d = 0; d < rank; ++d) {
-        preconcat_shape.set_dim(d, std::max(preconcat_shape.dim_size(d),
-                                            shapes_to_concat[i].dim_size(d)));
-      }
+    if (values_dtype != output_values->dtype()) {
+      return errors::InvalidArgument(
+          "Requested SparseTensor of type ", DataTypeString(values_dtype),
+          " but SparseTensor[", index,
+          "].values.dtype() == ", DataTypeString(output_values->dtype()));
     }
-
-    // Dimension 0 is the primary dimension.
-    gtl::InlinedVector<int64, 8> std_order(rank);
-    std::iota(std_order.begin(), std_order.end(), 0);
-
-    std::vector<SparseTensor> tensors_to_concat;
-    tensors_to_concat.reserve(num_sparse_tensors);
-    for (int i = 0; i < num_sparse_tensors; ++i) {
-      tensors_to_concat.emplace_back(indices_to_concat[i], values_to_concat[i],
-                                     preconcat_shape, std_order);
+    if (num_entries != output_values->dim_size(0)) {
+      return errors::InvalidArgument(
+          "Expected row counts of SparseTensor[", index,
+          "].indices and SparseTensor[", index,
+          "].values to match but they do not: ", num_entries, " vs. ",
+          output_values->dim_size(0));
     }
 
-    SparseTensor output = SparseTensor::Concat<T>(tensors_to_concat);
-
-    Tensor final_output_shape(DT_INT64, TensorShape({output.dims()}));
-
-    std::copy_n(output.shape().data(), output.dims(),
-                final_output_shape.vec<int64>().data());
-
-    context->set_output(0, output.indices());
-    context->set_output(1, output.values());
-    context->set_output(2, final_output_shape);
+    // Deserialize and validate the shape.
+    TF_RETURN_IF_ERROR(this->Deserialize(serialized_shape, output_shape));
+    if (!TensorShapeUtils::IsVector(output_shape->shape())) {
+      return errors::InvalidArgument(
+          "Expected serialized_sparse[", index,
+          ", 1] to be a shape vector but its shape is ",
+          output_shape->shape().DebugString());
+    }
+    if (rank != output_shape->dim_size(0)) {
+      return errors::InvalidArgument("Expected column counts of SparseTensor[",
+                                     index,
+                                     "].indices to match size of SparseTensor[",
+                                     index, "].shape but they do not: ", rank,
+                                     " vs. ", output_shape->dim_size(0));
+    }
+    return Status::OK();
   }
+
+  DataType dtype_;
 };
 
-#define REGISTER_KERNELS(type)                                \
-  REGISTER_KERNEL_BUILDER(Name("DeserializeManySparse")       \
-                              .Device(DEVICE_CPU)             \
-                              .TypeConstraint<type>("dtype"), \
-                          DeserializeManySparseOp<type>)
+template <>
+Status DeserializeSparseOp<string>::Deserialize(const string& serialized,
+                                                Tensor* result) {
+  TensorProto proto;
+  if (!ParseProtoUnlimited(&proto, serialized)) {
+    return errors::InvalidArgument("Could not parse serialized proto");
+  }
+  Tensor tensor;
+  if (!tensor.FromProto(proto)) {
+    return errors::InvalidArgument("Could not construct tensor from proto");
+  }
+  *result = tensor;
+  return Status::OK();
+}
+
+REGISTER_KERNEL_BUILDER(Name("DeserializeSparse")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<string>("Tserialized"),
+                        DeserializeSparseOp<string>)
+
+REGISTER_KERNEL_BUILDER(Name("DeserializeManySparse").Device(DEVICE_CPU),
+                        DeserializeSparseOp<string>)
+
+template <>
+Status DeserializeSparseOp<Variant>::Deserialize(const Variant& serialized,
+                                                 Tensor* result) {
+  *result = *serialized.get<Tensor>();
+  return Status::OK();
+}
+
+REGISTER_KERNEL_BUILDER(Name("DeserializeSparse")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<Variant>("Tserialized"),
+                        DeserializeSparseOp<Variant>)
 
-TF_CALL_ALL_TYPES(REGISTER_KERNELS);
-#undef REGISTER_KERNELS
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/set_kernels.cc b/tensorflow/core/kernels/set_kernels.cc
index 5a2b18b41ca4160327645f8655c2c70adb4b427d..e836c764acf859ed728f760d2e8e9c57ea86080f 100644
--- a/tensorflow/core/kernels/set_kernels.cc
+++ b/tensorflow/core/kernels/set_kernels.cc
@@ -216,7 +216,7 @@ void PopulateFromDenseGroup(OpKernelContext* ctx, const Tensor& input_tensor,
   result->clear();
   auto input_flat = input_tensor.flat<T>();
   const auto start = std::inner_product(
-      group_indices.begin(), group_indices.end(), input_strides.begin(), 0L);
+      group_indices.begin(), group_indices.end(), input_strides.begin(), 0LL);
   const TensorShape& input_shape = input_tensor.shape();
   const auto end = start + input_shape.dim_size(input_shape.dims() - 1);
   for (int64 i = start; i < end; ++i) {
@@ -279,7 +279,7 @@ void SetSizeOp<T>::Compute(OpKernelContext* ctx) {
 
     const auto group_key = group.group();
     const auto output_index = std::inner_product(
-        group_key.begin(), group_key.end(), output_strides.begin(), 0L);
+        group_key.begin(), group_key.end(), output_strides.begin(), 0LL);
     out(output_index) = group_set.size();
   }
 }
diff --git a/tensorflow/core/kernels/slice_op.cc b/tensorflow/core/kernels/slice_op.cc
index d46701749bec84352208337a8ce17a2b7ee2213a..82595de77947fab01a2107e009982f6db96601e5 100644
--- a/tensorflow/core/kernels/slice_op.cc
+++ b/tensorflow/core/kernels/slice_op.cc
@@ -439,7 +439,6 @@ namespace functor {
   DECLARE_CPU_SPEC(T, 7);
 
 TF_CALL_ALL_TYPES(DECLARE_FOR_N);
-DECLARE_FOR_N(bfloat16);
 
 #undef DECLARE_FOR_N
 #undef DECLARE_CPU_SPEC
@@ -456,7 +455,6 @@ DECLARE_FOR_N(bfloat16);
 
 TF_CALL_POD_STRING_TYPES(REGISTER_SLICE);
 TF_CALL_QUANTIZED_TYPES(REGISTER_SLICE);
-REGISTER_SLICE(bfloat16);
 #undef REGISTER_SLICE
 #else
 #define REGISTER_SLICE(type)                             \
@@ -469,7 +467,6 @@ REGISTER_SLICE(bfloat16);
 
 TF_CALL_POD_STRING_TYPES(REGISTER_SLICE);
 TF_CALL_QUANTIZED_TYPES(REGISTER_SLICE);
-REGISTER_SLICE(bfloat16);
 #undef REGISTER_SLICE
 #endif  // INTEL_MKL
 
@@ -497,6 +494,7 @@ namespace functor {
 TF_CALL_GPU_NUMBER_TYPES(DECLARE_FOR_N);
 TF_CALL_complex64(DECLARE_FOR_N);
 TF_CALL_complex128(DECLARE_FOR_N);
+TF_CALL_bfloat16(DECLARE_FOR_N);
 DECLARE_FOR_N(int32);
 
 #undef DECLARE_FOR_N
@@ -515,6 +513,7 @@ DECLARE_FOR_N(int32);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
 TF_CALL_complex64(REGISTER_GPU);
 TF_CALL_complex128(REGISTER_GPU);
+TF_CALL_bfloat16(REGISTER_GPU);
 
 // A special GPU kernel for int32.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
diff --git a/tensorflow/core/kernels/slice_op_cpu_impl.h b/tensorflow/core/kernels/slice_op_cpu_impl.h
index a70805658e8134ded229aa44ed86bb63762ab8b0..58dc7df3e0f46fee5b2035ca9d9b59b29941829f 100644
--- a/tensorflow/core/kernels/slice_op_cpu_impl.h
+++ b/tensorflow/core/kernels/slice_op_cpu_impl.h
@@ -30,7 +30,6 @@ using CpuDevice = Eigen::ThreadPoolDevice;
   template struct functor::Slice<CpuDevice, T, CPU_PROVIDED_IXDIM>;
 
 TF_CALL_ALL_TYPES(DEFINE_CPU_KERNELS);
-DEFINE_CPU_KERNELS(bfloat16);
 
 #undef DEFINE_CPU_KERNELS
 
diff --git a/tensorflow/core/kernels/slice_op_gpu.cu.cc b/tensorflow/core/kernels/slice_op_gpu.cu.cc
index a301986f2ff23a467de8f7d3169bfb6cf6ff03d1..9d51f8978c0a24afb2f98845a4de4e8b51a29aeb 100644
--- a/tensorflow/core/kernels/slice_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/slice_op_gpu.cu.cc
@@ -39,6 +39,7 @@ typedef Eigen::GpuDevice GPUDevice;
 TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS);
 TF_CALL_complex64(DEFINE_GPU_KERNELS);
 TF_CALL_complex128(DEFINE_GPU_KERNELS);
+TF_CALL_bfloat16(DEFINE_GPU_KERNELS);
 DEFINE_GPU_KERNELS(int32);
 
 #undef DEFINE_GPU_KERNELS
diff --git a/tensorflow/core/kernels/snapshot_op.cc b/tensorflow/core/kernels/snapshot_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..50157d5d48f93bfe61cbac95246123ef0a7d446e
--- /dev/null
+++ b/tensorflow/core/kernels/snapshot_op.cc
@@ -0,0 +1,46 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/array_ops.cc.
+#include "tensorflow/core/kernels/snapshot_op.h"
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+
+namespace tensorflow {
+typedef Eigen::ThreadPoolDevice CPUDevice;
+
+#define REGISTER_KERNEL(TYPE)                                        \
+  REGISTER_KERNEL_BUILDER(                                           \
+      Name("Snapshot").Device(DEVICE_CPU).TypeConstraint<TYPE>("T"), \
+      SnapshotOp<CPUDevice, TYPE>);
+
+TF_CALL_POD_TYPES(REGISTER_KERNEL);
+#undef REGISTER_KERNEL
+
+#if TENSORFLOW_USE_SYCL
+typedef Eigen::SyclDevice SyclDevice;
+#define REGISTER_SYCL_KERNEL(TYPE)                                    \
+  REGISTER_KERNEL_BUILDER(                                            \
+      Name("Snapshot").Device(DEVICE_SYCL).TypeConstraint<TYPE>("T"), \
+      SnapshotOp<SyclDevice, TYPE>);
+
+TF_CALL_POD_TYPES(REGISTER_SYCL_KERNEL);
+
+#undef REGISTER_SYCL_KERNEL
+#endif  // TENSORFLOW_USE_SYCL
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/snapshot_op.h b/tensorflow/core/kernels/snapshot_op.h
new file mode 100644
index 0000000000000000000000000000000000000000..2c79893b49661519515a7b4a537ff3caeceba2be
--- /dev/null
+++ b/tensorflow/core/kernels/snapshot_op.h
@@ -0,0 +1,49 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_KERNELS_SNAPSHOT_OP_H_
+#define TENSORFLOW_KERNELS_SNAPSHOT_OP_H_
+
+#if GOOGLE_CUDA
+#define EIGEN_USE_GPU
+#endif
+
+#define EIGEN_USE_THREADS
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/op_kernel.h"
+
+namespace tensorflow {
+
+template <typename Device, typename Scalar>
+class SnapshotOp : public OpKernel {
+ public:
+  explicit SnapshotOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    const Tensor& input = context->input(0);
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, input.shape(), &output));
+    const Device& device = context->eigen_device<Device>();
+    device.memcpy(output->template flat<Scalar>().data(),
+                  input.template flat<Scalar>().data(),
+                  input.NumElements() * sizeof(Scalar));
+  }
+};
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_KERNELS_SNAPSHOT_OP_H_
diff --git a/tensorflow/core/kernels/snapshot_op_gpu.cu.cc b/tensorflow/core/kernels/snapshot_op_gpu.cu.cc
new file mode 100644
index 0000000000000000000000000000000000000000..52070be838d65d21813dfe097db9c395ef5a8448
--- /dev/null
+++ b/tensorflow/core/kernels/snapshot_op_gpu.cu.cc
@@ -0,0 +1,37 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#if GOOGLE_CUDA
+
+// See docs in ../ops/array_ops.cc.
+#include "tensorflow/core/kernels/snapshot_op.h"
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+#define REGISTER_KERNEL(TYPE)                                        \
+  REGISTER_KERNEL_BUILDER(                                           \
+      Name("Snapshot").Device(DEVICE_GPU).TypeConstraint<TYPE>("T"), \
+      SnapshotOp<GPUDevice, TYPE>);
+
+TF_CALL_POD_TYPES(REGISTER_KERNEL);
+#undef REGISTER_KERNEL
+
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/softmax_op_functor.h b/tensorflow/core/kernels/softmax_op_functor.h
index 1f38bdce8c3a8f70e89efe62ad6c6f385bb5dfc0..d3a267ed877eedf8ed3845ebd11255f0690b3106 100644
--- a/tensorflow/core/kernels/softmax_op_functor.h
+++ b/tensorflow/core/kernels/softmax_op_functor.h
@@ -64,23 +64,21 @@ struct SoftmaxEigenImpl {
     one_by_class.set(1, num_classes);
 #endif
     // shifted_logits = logits - max(logits along classes);
-    auto shifted_logits = (logits -
-                           logits.maximum(along_class)
-                               .eval()
-                               .reshape(batch_by_one)
-                               .broadcast(one_by_class));
+    auto shifted_logits = (logits - logits.maximum(along_class)
+                                        .eval()
+                                        .reshape(batch_by_one)
+                                        .broadcast(one_by_class));
     if (log) {
       // Calculate the log of the softmax
       // softmax = logits - max(logits along classes);
       softmax.device(d) = shifted_logits;
       // softmax = softmax - log(sum(exp(softmax along classes)));
-      softmax.device(d) = (softmax -
-                           softmax.exp()
-                               .sum(along_class)
-                               .eval()
-                               .reshape(batch_by_one)
-                               .log()
-                               .broadcast(one_by_class));
+      softmax.device(d) = (softmax - softmax.exp()
+                                         .sum(along_class)
+                                         .log()
+                                         .eval()
+                                         .reshape(batch_by_one)
+                                         .broadcast(one_by_class));
     } else {
       // NOTE(touts): If you modify this implementation please run
       // the BM_ImageNetSoftmaxFwd benchmark in nn_ops_test.cc.
@@ -88,12 +86,11 @@ struct SoftmaxEigenImpl {
       // softmax = exp(logits - max(logits along classes));
       softmax.device(d) = shifted_logits.exp();
       // softmax = softmax * (1 / sum(softmax along classes));
-      softmax.device(d) = (softmax *
-                           softmax.sum(along_class)
-                               .inverse()
-                               .eval()
-                               .reshape(batch_by_one)
-                               .broadcast(one_by_class));
+      softmax.device(d) = (softmax * softmax.sum(along_class)
+                                         .inverse()
+                                         .eval()
+                                         .reshape(batch_by_one)
+                                         .broadcast(one_by_class));
     }
   }
 };
diff --git a/tensorflow/core/kernels/spectrogram_op_test.cc b/tensorflow/core/kernels/spectrogram_op_test.cc
index 5c3cbeeeb93fb37c7718cd522d16fc582ff8ba13..d34a7c99ecbffc18d819f4182389c98635418934 100644
--- a/tensorflow/core/kernels/spectrogram_op_test.cc
+++ b/tensorflow/core/kernels/spectrogram_op_test.cc
@@ -31,8 +31,8 @@ limitations under the License.
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
-
-using namespace ops;  // NOLINT(build/namespaces)
+namespace ops {
+namespace {
 
 TEST(SpectrogramOpTest, SimpleTest) {
   Scope root = Scope::NewRootScope();
@@ -101,4 +101,6 @@ TEST(SpectrogramOpTest, SquaredTest) {
       test::AsTensor<float>({0, 1, 4, 1, 0}, TensorShape({1, 1, 5})), 1e-3);
 }
 
+}  // namespace
+}  // namespace ops
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/split_lib_cpu.cc b/tensorflow/core/kernels/split_lib_cpu.cc
index 6583f96a9172e0bd79fdc463ad249c71c99ffef9..25026208d1ee78cb614e4ad41dccb7a0fa0f7817 100644
--- a/tensorflow/core/kernels/split_lib_cpu.cc
+++ b/tensorflow/core/kernels/split_lib_cpu.cc
@@ -41,7 +41,6 @@ void Split<Eigen::ThreadPoolDevice, T>::operator()(
 
 TF_CALL_ALL_TYPES(DEFINE_CPU_KERNELS)
 DEFINE_CPU_KERNELS(quint8)
-DEFINE_CPU_KERNELS(bfloat16)
 
 #ifdef TENSORFLOW_USE_SYCL
 template <typename T>
diff --git a/tensorflow/core/kernels/split_lib_gpu.cu.cc b/tensorflow/core/kernels/split_lib_gpu.cu.cc
index dd6fc6115f7b5bce60f5373c8556e7b1642afd6a..9f234fc0935be0662b0d8df1a6bd1c109ab24fd9 100644
--- a/tensorflow/core/kernels/split_lib_gpu.cu.cc
+++ b/tensorflow/core/kernels/split_lib_gpu.cu.cc
@@ -52,7 +52,7 @@ void SplitCustom<Device, T>::operator()(
 TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS);
 TF_CALL_complex64(DEFINE_GPU_KERNELS);
 TF_CALL_complex128(DEFINE_GPU_KERNELS);
-DEFINE_GPU_KERNELS(bfloat16);
+TF_CALL_bfloat16(DEFINE_GPU_KERNELS);
 
 #undef DEFINE_GPU_KERNELS
 #define DEFINE_GPU_KERNELS(T) template struct SplitCustom<Eigen::GpuDevice, T>;
@@ -60,7 +60,7 @@ DEFINE_GPU_KERNELS(bfloat16);
 TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS);
 TF_CALL_complex64(DEFINE_GPU_KERNELS);
 TF_CALL_complex128(DEFINE_GPU_KERNELS);
-DEFINE_GPU_KERNELS(bfloat16);
+TF_CALL_bfloat16(DEFINE_GPU_KERNELS);
 
 #undef DEFINE_GPU_KERNELS
 
@@ -243,6 +243,7 @@ struct SplitVOpGPULaunch {
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNEL);
 TF_CALL_complex64(REGISTER_GPU_KERNEL);
 TF_CALL_complex128(REGISTER_GPU_KERNEL);
+TF_CALL_bfloat16(REGISTER_GPU_KERNEL);
 #undef REGISTER_GPU_KERNEL
 #define REGISTER_GPU_KERNEL(T)                 \
   template struct SplitVOpGPULaunch<T, int32>; \
@@ -251,7 +252,7 @@ TF_CALL_complex128(REGISTER_GPU_KERNEL);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNEL);
 TF_CALL_complex64(REGISTER_GPU_KERNEL);
 TF_CALL_complex128(REGISTER_GPU_KERNEL);
-REGISTER_GPU_KERNEL(bfloat16);
+TF_CALL_bfloat16(REGISTER_GPU_KERNEL);
 #undef REGISTER_GPU_KERNEL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/split_op.cc b/tensorflow/core/kernels/split_op.cc
index 58e1a73be61cf04aba05ebadb8d8e49f6aacef6b..78badde27e5c4ca33faa00073e7b412e85d82970 100644
--- a/tensorflow/core/kernels/split_op.cc
+++ b/tensorflow/core/kernels/split_op.cc
@@ -375,6 +375,7 @@ REGISTER_SPLIT(quint8);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
 TF_CALL_complex64(REGISTER_GPU);
 TF_CALL_complex128(REGISTER_GPU);
+REGISTER_GPU(bfloat16);
 #undef REGISTER_GPU
 
 #endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/split_v_op.cc b/tensorflow/core/kernels/split_v_op.cc
index 3316e5fcc920166a8bd4f49f4ce1752b4c8910cb..f1078ac349c979bb14f3949c05a7c493c9355567 100644
--- a/tensorflow/core/kernels/split_v_op.cc
+++ b/tensorflow/core/kernels/split_v_op.cc
@@ -406,7 +406,6 @@ class SplitVOpGPU : public SplitVOpBase<GPUDevice, T, Tlen> {
   REGISTER_SPLIT(type, int64);
 
 TF_CALL_ALL_TYPES(REGISTER_SPLIT_LEN);
-REGISTER_SPLIT_LEN(bfloat16);
 
 #undef REGISTER_SPLIT_LEN
 #undef REGISTER_SPLIT
diff --git a/tensorflow/core/kernels/sql/sqlite_query_connection.cc b/tensorflow/core/kernels/sql/sqlite_query_connection.cc
deleted file mode 100644
index 1330506d28ca96b4a9e668219dc67cbb1c3b796d..0000000000000000000000000000000000000000
--- a/tensorflow/core/kernels/sql/sqlite_query_connection.cc
+++ /dev/null
@@ -1,135 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/core/kernels/sql/sqlite_query_connection.h"
-
-#include "tensorflow/core/lib/strings/stringprintf.h"
-
-namespace tensorflow {
-
-namespace sql {
-
-SqliteQueryConnection::SqliteQueryConnection() {}
-SqliteQueryConnection::~SqliteQueryConnection() {}
-
-Status SqliteQueryConnection::Open(const string& data_source_name,
-                                   const string& query,
-                                   const DataTypeVector& output_types) {
-  if (db_ != nullptr) {
-    return errors::FailedPrecondition(
-        "Failed to open query connection: Connection already opeend.");
-  }
-  auto s = Sqlite::Open(data_source_name);
-  if (s.ok()) {
-    db_ = std::move(s.ValueOrDie());
-    query_ = query;
-    output_types_ = output_types;
-  }
-  return s.status();
-}
-
-Status SqliteQueryConnection::Close() {
-  Status s;
-  s.Update(stmt_.Close());
-  s.Update(db_->Close());
-  return s;
-}
-
-Status SqliteQueryConnection::GetNext(std::vector<Tensor>* out_tensors,
-                                      bool* end_of_sequence) {
-  if (!stmt_) {
-    Status s = PrepareQuery();
-    if (!s.ok()) {
-      return s;
-    }
-  }
-  Status s = stmt_.Step(end_of_sequence);
-  if (!*end_of_sequence) {
-    for (int i = 0; i < column_count_; i++) {
-      DataType dt = output_types_[i];
-      Tensor tensor(cpu_allocator(), dt, {});
-      FillTensorWithResultSetEntry(dt, i, &tensor);
-      out_tensors->emplace_back(std::move(tensor));
-    }
-  }
-  return s;
-}
-
-Status SqliteQueryConnection::PrepareQuery() {
-  stmt_ = db_->Prepare(query_);
-  Status s = stmt_.status();
-  if (s.ok()) {
-    int column_count = stmt_.ColumnCount();
-    if (column_count != output_types_.size()) {
-      return errors::InvalidArgument(tensorflow::strings::Printf(
-          "The number of columns in query (%d) must match the number of "
-          "elements in output_types (%zu).",
-          column_count, output_types_.size()));
-    }
-    column_count_ = column_count;
-  }
-  return s;
-}
-
-void SqliteQueryConnection::FillTensorWithResultSetEntry(
-    const DataType& data_type, int column_index, Tensor* tensor) {
-  switch (data_type) {
-    case DT_STRING:
-      tensor->scalar<string>()() = stmt_.ColumnString(column_index);
-      break;
-    case DT_INT8:
-      tensor->scalar<int8>()() =
-          static_cast<int8>(stmt_.ColumnInt(column_index));
-      break;
-    case DT_INT16:
-      tensor->scalar<int16>()() =
-          static_cast<int16>(stmt_.ColumnInt(column_index));
-      break;
-    case DT_INT32:
-      tensor->scalar<int32>()() =
-          static_cast<int32>(stmt_.ColumnInt(column_index));
-      break;
-    case DT_INT64:
-      tensor->scalar<int64>()() = stmt_.ColumnInt(column_index);
-      break;
-    case DT_UINT8:
-      tensor->scalar<uint8>()() =
-          static_cast<uint8>(stmt_.ColumnInt(column_index));
-      break;
-    case DT_UINT16:
-      tensor->scalar<uint16>()() =
-          static_cast<uint16>(stmt_.ColumnInt(column_index));
-      break;
-    case DT_BOOL:
-      tensor->scalar<bool>()() = stmt_.ColumnInt(column_index) != 0;
-      break;
-    case DT_FLOAT:
-      tensor->scalar<float>()() =
-          static_cast<float>(stmt_.ColumnDouble(column_index));
-      break;
-    case DT_DOUBLE:
-      tensor->scalar<double>()() = stmt_.ColumnDouble(column_index);
-      break;
-      // Error preemptively thrown by SqlDatasetOp::MakeDataset in this case.
-    default: {
-      LOG(FATAL)
-          << "Use of unsupported TensorFlow data type by 'SqlQueryConnection': "
-          << DataTypeString(data_type) << ".";
-    }
-  }
-}
-
-}  // namespace sql
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc
index 8fc40db3cc22060eb18b64c2246188925626b8bf..7c213e14d21efd6fcf033d3cd341c35838fe9f7b 100644
--- a/tensorflow/core/kernels/strided_slice_op.cc
+++ b/tensorflow/core/kernels/strided_slice_op.cc
@@ -386,7 +386,6 @@ class StridedSliceAssignOp : public OpKernel {
                           StridedSliceAssignOp<CPUDevice, type>)
 
 TF_CALL_ALL_TYPES(REGISTER_STRIDED_SLICE);
-REGISTER_STRIDED_SLICE(bfloat16);
 
 #undef REGISTER_STRIDED_SLICE
 
@@ -427,6 +426,7 @@ REGISTER_STRIDED_SLICE(bfloat16);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
 TF_CALL_complex64(REGISTER_GPU);
 TF_CALL_complex128(REGISTER_GPU);
+TF_CALL_int64(REGISTER_GPU);
 
 // A special GPU kernel for int32.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
diff --git a/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc b/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc
index a8487f49f4488269e058c6b7ee94d0f82aeb5270..8ca27e3b920e7c0cd36343d0c9db5a6098b6bede 100644
--- a/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc
@@ -53,6 +53,7 @@ typedef Eigen::GpuDevice GPUDevice;
 TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS);
 TF_CALL_complex64(DEFINE_GPU_KERNELS);
 TF_CALL_complex128(DEFINE_GPU_KERNELS);
+TF_CALL_int64(DEFINE_GPU_KERNELS);
 DEFINE_GPU_KERNELS(int32);
 
 #undef DEFINE_GPU_KERNELS
diff --git a/tensorflow/core/kernels/strided_slice_op_impl.h b/tensorflow/core/kernels/strided_slice_op_impl.h
index de6514757242c1e1079752427b444e31a80bc5ef..a84ba38ef41486f86f5e37bd95287b8ae6c9bb2e 100644
--- a/tensorflow/core/kernels/strided_slice_op_impl.h
+++ b/tensorflow/core/kernels/strided_slice_op_impl.h
@@ -284,10 +284,10 @@ TF_CALL_GPU_NUMBER_TYPES(DECLARE_FOR_N_GPU);
 TF_CALL_complex64(DECLARE_FOR_N_GPU);
 TF_CALL_complex128(DECLARE_FOR_N_GPU);
 DECLARE_FOR_N_GPU(int32);
+DECLARE_FOR_N_GPU(int64);
 #endif  // END GOOGLE_CUDA
 
 TF_CALL_ALL_TYPES(DECLARE_FOR_N_CPU);
-DECLARE_FOR_N_CPU(bfloat16);
 
 #ifdef TENSORFLOW_USE_SYCL
 #define PREVENT_FOR_N_SYCL(T) \
@@ -299,6 +299,7 @@ DECLARE_FOR_N_CPU(bfloat16);
 TF_CALL_SYCL_PROXY_TYPES(PREVENT_FOR_N_SYCL);
 TF_CALL_GPU_NUMBER_TYPES_NO_HALF(DECLARE_FOR_N_SYCL);
 DECLARE_FOR_N_SYCL(int32);
+DECLARE_FOR_N_SYCL(int64);
 
 #undef DECLARE_FOR_N_SYCL
 #endif // TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/kernels/string_to_number_op.cc b/tensorflow/core/kernels/string_to_number_op.cc
index d583e4e6bba27d76ac2795eb8b7d11147282a04d..70dbd15c46cb341d8ad6ed6013b5b9ff8a5d61da 100644
--- a/tensorflow/core/kernels/string_to_number_op.cc
+++ b/tensorflow/core/kernels/string_to_number_op.cc
@@ -49,43 +49,15 @@ class StringToNumberOp : public OpKernel {
     auto output_flat = output_tensor->flat<OutputType>();
 
     for (int i = 0; i < input_flat.size(); ++i) {
-      Convert(input_flat(i), &output_flat(i), context);
+      OP_REQUIRES(
+          context,
+          strings::SafeStringToNumeric<OutputType>(input_flat(i).c_str(),
+                                                   &output_flat(i)),
+          errors::InvalidArgument(kErrorMessage, input_flat(i).c_str()));
     }
   }
-
- private:
-  void Convert(const string& s, OutputType* output_data,
-               OpKernelContext* context);
 };
 
-template <>
-void StringToNumberOp<float>::Convert(const string& s, float* output_data,
-                                      OpKernelContext* context) {
-  OP_REQUIRES(context, strings::safe_strtof(s.c_str(), output_data),
-              errors::InvalidArgument(kErrorMessage, s));
-}
-
-template <>
-void StringToNumberOp<double>::Convert(const string& s, double* output_data,
-                                       OpKernelContext* context) {
-  OP_REQUIRES(context, strings::safe_strtod(s.c_str(), output_data),
-              errors::InvalidArgument(kErrorMessage, s));
-}
-
-template <>
-void StringToNumberOp<int32>::Convert(const string& s, int32* output_data,
-                                      OpKernelContext* context) {
-  OP_REQUIRES(context, strings::safe_strto32(s, output_data),
-              errors::InvalidArgument(kErrorMessage, s));
-}
-
-template <>
-void StringToNumberOp<int64>::Convert(const string& s, int64* output_data,
-                                      OpKernelContext* context) {
-  OP_REQUIRES(context, strings::safe_strto64(s, output_data),
-              errors::InvalidArgument(kErrorMessage, s));
-}
-
 // Registers the currently supported output types.
 #define REGISTER(type)                                           \
   REGISTER_KERNEL_BUILDER(Name("StringToNumber")                 \
diff --git a/tensorflow/core/kernels/summary_interface.cc b/tensorflow/core/kernels/summary_interface.cc
deleted file mode 100644
index 97c0c2c099cfceaa98a577d9642710020621e7e6..0000000000000000000000000000000000000000
--- a/tensorflow/core/kernels/summary_interface.cc
+++ /dev/null
@@ -1,462 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/core/kernels/summary_interface.h"
-
-#include <utility>
-
-#include "tensorflow/core/framework/graph.pb.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/resource_mgr.h"
-#include "tensorflow/core/framework/summary.pb.h"
-#include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/framework/types.pb.h"
-#include "tensorflow/core/lib/histogram/histogram.h"
-#include "tensorflow/core/lib/io/path.h"
-#include "tensorflow/core/lib/png/png_io.h"
-#include "tensorflow/core/lib/wav/wav_io.h"
-#include "tensorflow/core/util/events_writer.h"
-#include "tensorflow/core/util/ptr_util.h"
-
-namespace tensorflow {
-namespace {
-template <typename T>
-Status TensorValueAt(Tensor t, int64 index, T* out) {
-  switch (t.dtype()) {
-    case DT_FLOAT:
-      *out = t.flat<float>()(index);
-      break;
-    case DT_DOUBLE:
-      *out = t.flat<double>()(index);
-      break;
-    case DT_HALF:
-      *out = T(t.flat<Eigen::half>()(index));
-      break;
-    case DT_INT32:
-      *out = t.flat<int32>()(index);
-      break;
-    case DT_UINT8:
-      *out = t.flat<uint8>()(index);
-      break;
-    case DT_INT16:
-      *out = t.flat<int16>()(index);
-      break;
-    case DT_INT8:
-      *out = t.flat<int8>()(index);
-      break;
-    case DT_BOOL:
-      *out = t.flat<bool>()(index);
-      break;
-    case DT_INT64:
-      *out = t.flat<int64>()(index);
-      break;
-    default:
-      return errors::Unimplemented("Scalar summary for dtype ",
-                                   DataTypeString(t.dtype()),
-                                   " is not supported.");
-  }
-  return Status::OK();
-}
-
-typedef Eigen::Tensor<uint8, 2, Eigen::RowMajor> Uint8Image;
-
-// Add the sequence of images specified by ith_image to the summary.
-//
-// Factoring this loop out into a helper function lets ith_image behave
-// differently in the float and uint8 cases: the float case needs a temporary
-// buffer which can be shared across calls to ith_image, but the uint8 case
-// does not.
-Status AddImages(const string& tag, int max_images, int batch_size, int w,
-                 int h, int depth,
-                 const std::function<Uint8Image(int)>& ith_image, Summary* s) {
-  const int N = std::min<int>(max_images, batch_size);
-  for (int i = 0; i < N; ++i) {
-    Summary::Value* v = s->add_value();
-    // The tag depends on the number of requested images (not the number
-    // produced.)
-    //
-    // Note that later on avisu uses "/" to figure out a consistent naming
-    // convention for display, so we append "/image" to guarantee that the
-    // image(s) won't be displayed in the global scope with no name.
-    if (max_images > 1) {
-      v->set_tag(strings::StrCat(tag, "/image/", i));
-    } else {
-      v->set_tag(strings::StrCat(tag, "/image"));
-    }
-
-    const auto image = ith_image(i);
-    Summary::Image* si = v->mutable_image();
-    si->set_height(h);
-    si->set_width(w);
-    si->set_colorspace(depth);
-    const int channel_bits = 8;
-    const int compression = -1;  // Use zlib default
-    if (!png::WriteImageToBuffer(image.data(), w, h, w * depth, depth,
-                                 channel_bits, compression,
-                                 si->mutable_encoded_image_string(), nullptr)) {
-      return errors::Internal("PNG encoding failed");
-    }
-  }
-  return Status::OK();
-}
-
-template <class T>
-void NormalizeFloatImage(int hw, int depth,
-                         typename TTypes<T>::ConstMatrix values,
-                         typename TTypes<uint8>::ConstVec bad_color,
-                         Uint8Image* image) {
-  if (!image->size()) return;  // Nothing to do for empty images
-
-  // Rescale the image to uint8 range.
-  //
-  // We are trying to generate an RGB image from a float/half tensor.  We do
-  // not have any info about the expected range of values in the tensor
-  // but the generated image needs to have all RGB values within [0, 255].
-  //
-  // We use two different algorithms to generate these values.  If the
-  // tensor has only positive values we scale them all by 255/max(values).
-  // If the tensor has both negative and positive values we scale them by
-  // the max of their absolute values and center them around 127.
-  //
-  // This works for most cases, but does not respect the relative dynamic
-  // range across different instances of the tensor.
-
-  // Compute min and max ignoring nonfinite pixels
-  float image_min = std::numeric_limits<float>::infinity();
-  float image_max = -image_min;
-  for (int i = 0; i < hw; i++) {
-    bool finite = true;
-    for (int j = 0; j < depth; j++) {
-      if (!Eigen::numext::isfinite(values(i, j))) {
-        finite = false;
-        break;
-      }
-    }
-    if (finite) {
-      for (int j = 0; j < depth; j++) {
-        float value(values(i, j));
-        image_min = std::min(image_min, value);
-        image_max = std::max(image_max, value);
-      }
-    }
-  }
-
-  // Pick an affine transform into uint8
-  const float kZeroThreshold = 1e-6;
-  T scale, offset;
-  if (image_min < 0) {
-    const float max_val = std::max(std::abs(image_min), std::abs(image_max));
-    scale = T(max_val < kZeroThreshold ? 0.0f : 127.0f / max_val);
-    offset = T(128.0f);
-  } else {
-    scale = T(image_max < kZeroThreshold ? 0.0f : 255.0f / image_max);
-    offset = T(0.0f);
-  }
-
-  // Transform image, turning nonfinite values to bad_color
-  for (int i = 0; i < hw; i++) {
-    bool finite = true;
-    for (int j = 0; j < depth; j++) {
-      if (!Eigen::numext::isfinite(values(i, j))) {
-        finite = false;
-        break;
-      }
-    }
-    if (finite) {
-      image->chip<0>(i) =
-          (values.template chip<0>(i) * scale + offset).template cast<uint8>();
-    } else {
-      image->chip<0>(i) = bad_color;
-    }
-  }
-}
-
-template <class T>
-Status NormalizeAndAddImages(const Tensor& tensor, int max_images, int h, int w,
-                             int hw, int depth, int batch_size,
-                             const string& base_tag, Tensor bad_color_tensor,
-                             Summary* s) {
-  // For float and half images, nans and infs are replaced with bad_color.
-  if (bad_color_tensor.dim_size(0) < depth) {
-    return errors::InvalidArgument(
-        "expected depth <= bad_color.size, got depth = ", depth,
-        ", bad_color.size = ", bad_color_tensor.dim_size(0));
-  }
-  auto bad_color_full = bad_color_tensor.vec<uint8>();
-  typename TTypes<uint8>::ConstVec bad_color(bad_color_full.data(), depth);
-
-  // Float images must be scaled and translated.
-  Uint8Image image(hw, depth);
-  auto ith_image = [&tensor, &image, bad_color, batch_size, hw, depth](int i) {
-    auto tensor_eigen = tensor.template shaped<T, 3>({batch_size, hw, depth});
-    typename TTypes<T>::ConstMatrix values(
-        &tensor_eigen(i, 0, 0), Eigen::DSizes<Eigen::DenseIndex, 2>(hw, depth));
-    NormalizeFloatImage<T>(hw, depth, values, bad_color, &image);
-    return image;
-  };
-  return AddImages(base_tag, max_images, batch_size, w, h, depth, ith_image, s);
-}
-
-}  // namespace
-
-class SummaryWriterImpl : public SummaryWriterInterface {
- public:
-  SummaryWriterImpl(int max_queue, int flush_millis, Env* env)
-      : SummaryWriterInterface(),
-        is_initialized_(false),
-        max_queue_(max_queue),
-        flush_millis_(flush_millis),
-        env_(env) {}
-
-  Status Initialize(const string& logdir, const string& filename_suffix) {
-    const Status is_dir = env_->IsDirectory(logdir);
-    if (!is_dir.ok()) {
-      if (is_dir.code() != tensorflow::error::NOT_FOUND) {
-        return is_dir;
-      }
-      TF_RETURN_IF_ERROR(env_->CreateDir(logdir));
-    }
-    mutex_lock ml(mu_);
-    events_writer_ =
-        tensorflow::MakeUnique<EventsWriter>(io::JoinPath(logdir, "events"));
-    if (!events_writer_->InitWithSuffix(filename_suffix)) {
-      return errors::Unknown("Could not initialize events writer.");
-    }
-    last_flush_ = env_->NowMicros();
-    is_initialized_ = true;
-    return Status::OK();
-  }
-
-  Status Flush() override {
-    mutex_lock ml(mu_);
-    if (!is_initialized_) {
-      return errors::FailedPrecondition("Class was not properly initialized.");
-    }
-    return InternalFlush();
-  }
-
-  ~SummaryWriterImpl() override {
-    (void)Flush();  // Ignore errors.
-  }
-
-  Status WriteTensor(int64 global_step, Tensor t, const string& tag,
-                     const string& serialized_metadata) override {
-    std::unique_ptr<Event> e{new Event};
-    e->set_step(global_step);
-    e->set_wall_time(GetWallTime());
-    Summary::Value* v = e->mutable_summary()->add_value();
-    t.AsProtoTensorContent(v->mutable_tensor());
-    v->set_tag(tag);
-    if (!serialized_metadata.empty()) {
-      v->mutable_metadata()->ParseFromString(serialized_metadata);
-    }
-    return WriteEvent(std::move(e));
-  }
-
-  Status WriteScalar(int64 global_step, Tensor t, const string& tag) override {
-    std::unique_ptr<Event> e{new Event};
-    e->set_step(global_step);
-    e->set_wall_time(GetWallTime());
-    Summary::Value* v = e->mutable_summary()->add_value();
-    v->set_tag(tag);
-    float value;
-    TF_RETURN_IF_ERROR(TensorValueAt<float>(t, 0, &value));
-    v->set_simple_value(value);
-    return WriteEvent(std::move(e));
-  }
-
-  Status WriteHistogram(int64 global_step, Tensor t,
-                        const string& tag) override {
-    std::unique_ptr<Event> e{new Event};
-    e->set_step(global_step);
-    e->set_wall_time(GetWallTime());
-    Summary::Value* v = e->mutable_summary()->add_value();
-    v->set_tag(tag);
-    histogram::Histogram histo;
-    for (int64 i = 0; i < t.NumElements(); i++) {
-      double double_val;
-      TF_RETURN_IF_ERROR(TensorValueAt<double>(t, i, &double_val));
-      if (Eigen::numext::isnan(double_val)) {
-        return errors::InvalidArgument("Nan in summary histogram for: ", tag);
-      } else if (Eigen::numext::isinf(double_val)) {
-        return errors::InvalidArgument("Infinity in summary histogram for: ",
-                                       tag);
-      }
-      histo.Add(double_val);
-    }
-
-    histo.EncodeToProto(v->mutable_histo(), false /* Drop zero buckets */);
-    return WriteEvent(std::move(e));
-  }
-
-  Status WriteImage(int64 global_step, Tensor tensor, const string& tag,
-                    int max_images, Tensor bad_color) override {
-    if (!(tensor.dims() == 4 &&
-          (tensor.dim_size(3) == 1 || tensor.dim_size(3) == 3 ||
-           tensor.dim_size(3) == 4))) {
-      return errors::InvalidArgument(
-          "Tensor must be 4-D with last dim 1, 3, or 4, not ",
-          tensor.shape().DebugString());
-    }
-    if (!(tensor.dim_size(0) < (1LL << 31) &&
-          tensor.dim_size(1) < (1LL << 31) &&
-          tensor.dim_size(2) < (1LL << 31) &&
-          (tensor.dim_size(1) * tensor.dim_size(2)) < (1LL << 29))) {
-      return errors::InvalidArgument("Tensor too large for summary ",
-                                     tensor.shape().DebugString());
-    }
-    std::unique_ptr<Event> e{new Event};
-    e->set_step(global_step);
-    e->set_wall_time(GetWallTime());
-    Summary* s = e->mutable_summary();
-    // The casts and h * w cannot overflow because of the limits above.
-    const int batch_size = static_cast<int>(tensor.dim_size(0));
-    const int h = static_cast<int>(tensor.dim_size(1));
-    const int w = static_cast<int>(tensor.dim_size(2));
-    const int hw = h * w;  // Compact these two dims for simplicity
-    const int depth = static_cast<int>(tensor.dim_size(3));
-    if (tensor.dtype() == DT_UINT8) {
-      // For uint8 input, no normalization is necessary
-      auto ith_image = [&tensor, batch_size, hw, depth](int i) {
-        auto values = tensor.shaped<uint8, 3>({batch_size, hw, depth});
-        return typename TTypes<uint8>::ConstMatrix(
-            &values(i, 0, 0), Eigen::DSizes<Eigen::DenseIndex, 2>(hw, depth));
-      };
-      TF_RETURN_IF_ERROR(
-          AddImages(tag, max_images, batch_size, w, h, depth, ith_image, s));
-    } else if (tensor.dtype() == DT_HALF) {
-      TF_RETURN_IF_ERROR(NormalizeAndAddImages<Eigen::half>(
-          tensor, max_images, h, w, hw, depth, batch_size, tag, bad_color, s));
-    } else if (tensor.dtype() == DT_FLOAT) {
-      TF_RETURN_IF_ERROR(NormalizeAndAddImages<float>(
-          tensor, max_images, h, w, hw, depth, batch_size, tag, bad_color, s));
-    } else {
-      return errors::InvalidArgument(
-          "Only DT_INT8, DT_HALF, and DT_FLOAT images are supported. Got ",
-          DataTypeString(tensor.dtype()));
-    }
-
-    return WriteEvent(std::move(e));
-  }
-
-  Status WriteAudio(int64 global_step, Tensor tensor, const string& tag,
-                    int max_outputs, float sample_rate) override {
-    if (sample_rate <= 0.0f) {
-      return errors::InvalidArgument("sample_rate must be > 0");
-    }
-    const int batch_size = tensor.dim_size(0);
-    const int64 length_frames = tensor.dim_size(1);
-    const int64 num_channels =
-        tensor.dims() == 2 ? 1 : tensor.dim_size(tensor.dims() - 1);
-    std::unique_ptr<Event> e{new Event};
-    e->set_step(global_step);
-    e->set_wall_time(GetWallTime());
-    Summary* s = e->mutable_summary();
-    const int N = std::min<int>(max_outputs, batch_size);
-    for (int i = 0; i < N; ++i) {
-      Summary::Value* v = s->add_value();
-      if (max_outputs > 1) {
-        v->set_tag(strings::StrCat(tag, "/audio/", i));
-      } else {
-        v->set_tag(strings::StrCat(tag, "/audio"));
-      }
-
-      Summary::Audio* sa = v->mutable_audio();
-      sa->set_sample_rate(sample_rate);
-      sa->set_num_channels(num_channels);
-      sa->set_length_frames(length_frames);
-      sa->set_content_type("audio/wav");
-
-      auto values =
-          tensor.shaped<float, 3>({batch_size, length_frames, num_channels});
-      auto channels_by_frames = typename TTypes<float>::ConstMatrix(
-          &values(i, 0, 0),
-          Eigen::DSizes<Eigen::DenseIndex, 2>(length_frames, num_channels));
-      size_t sample_rate_truncated = lrintf(sample_rate);
-      if (sample_rate_truncated == 0) {
-        sample_rate_truncated = 1;
-      }
-      TF_RETURN_IF_ERROR(wav::EncodeAudioAsS16LEWav(
-          channels_by_frames.data(), sample_rate_truncated, num_channels,
-          length_frames, sa->mutable_encoded_audio_string()));
-    }
-    return WriteEvent(std::move(e));
-  }
-
-  Status WriteGraph(int64 global_step,
-                    std::unique_ptr<GraphDef> graph) override {
-    std::unique_ptr<Event> e{new Event};
-    e->set_step(global_step);
-    e->set_wall_time(GetWallTime());
-    graph->SerializeToString(e->mutable_graph_def());
-    return WriteEvent(std::move(e));
-  }
-
-  Status WriteEvent(std::unique_ptr<Event> event) override {
-    mutex_lock ml(mu_);
-    queue_.emplace_back(std::move(event));
-    if (queue_.size() >= max_queue_ ||
-        env_->NowMicros() - last_flush_ > 1000 * flush_millis_) {
-      return InternalFlush();
-    }
-    return Status::OK();
-  }
-
-  string DebugString() override { return "SummaryWriterImpl"; }
-
- private:
-  double GetWallTime() {
-    return static_cast<double>(env_->NowMicros()) / 1.0e6;
-  }
-
-  Status InternalFlush() EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-    for (const std::unique_ptr<Event>& e : queue_) {
-      events_writer_->WriteEvent(*e);
-    }
-    queue_.clear();
-    if (!events_writer_->Flush()) {
-      return errors::InvalidArgument("Could not flush events file.");
-    }
-    last_flush_ = env_->NowMicros();
-    return Status::OK();
-  }
-
-  bool is_initialized_;
-  const int max_queue_;
-  const int flush_millis_;
-  uint64 last_flush_;
-  Env* env_;
-  mutex mu_;
-  std::vector<std::unique_ptr<Event>> queue_ GUARDED_BY(mu_);
-  // A pointer to allow deferred construction.
-  std::unique_ptr<EventsWriter> events_writer_ GUARDED_BY(mu_);
-  std::vector<std::pair<string, SummaryMetadata>> registered_summaries_
-      GUARDED_BY(mu_);
-};
-
-Status CreateSummaryWriter(int max_queue, int flush_millis,
-                           const string& logdir, const string& filename_suffix,
-                           Env* env, SummaryWriterInterface** result) {
-  SummaryWriterImpl* w = new SummaryWriterImpl(max_queue, flush_millis, env);
-  const Status s = w->Initialize(logdir, filename_suffix);
-  if (!s.ok()) {
-    w->Unref();
-    *result = nullptr;
-    return s;
-  }
-  *result = w;
-  return Status::OK();
-}
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/summary_interface.h b/tensorflow/core/kernels/summary_interface.h
index da1c28709fb35372b1f0b28faba757a23bcd9ac4..02391e967a84b2d2ff015d541969163807b9adc2 100644
--- a/tensorflow/core/kernels/summary_interface.h
+++ b/tensorflow/core/kernels/summary_interface.h
@@ -19,6 +19,8 @@ limitations under the License.
 
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/resource_mgr.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/event.pb.h"
 
 namespace tensorflow {
@@ -53,16 +55,6 @@ class SummaryWriterInterface : public ResourceBase {
   virtual Status WriteEvent(std::unique_ptr<Event> e) = 0;
 };
 
-// Creates a SummaryWriterInterface instance which writes to a file. It will
-// enqueue up to max_queue summaries, and flush at least every flush_millis
-// milliseconds. The summaries will be written to the directory specified by
-// logdir and with the filename suffixed by filename_suffix. The caller owns a
-// reference to result if the returned status is ok. The Env object must not
-// be destroyed until after the returned writer.
-Status CreateSummaryWriter(int max_queue, int flush_millis,
-                           const string& logdir, const string& filename_suffix,
-                           Env* env, SummaryWriterInterface** result);
-
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_KERNELS_SUMMARY_INTERFACE_H_
diff --git a/tensorflow/core/kernels/summary_kernels.cc b/tensorflow/core/kernels/summary_kernels.cc
index 7487e70acc22634edafd69b9b8d0a06481bcc4ed..41cbece1d648f3e2dba112375e494d2ed8192db9 100644
--- a/tensorflow/core/kernels/summary_kernels.cc
+++ b/tensorflow/core/kernels/summary_kernels.cc
@@ -13,11 +13,12 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "tensorflow/contrib/tensorboard/db/schema.h"
 #include "tensorflow/contrib/tensorboard/db/summary_db_writer.h"
+#include "tensorflow/contrib/tensorboard/db/summary_file_writer.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/resource_mgr.h"
-#include "tensorflow/core/kernels/summary_interface.h"
 #include "tensorflow/core/lib/db/sqlite.h"
 #include "tensorflow/core/platform/protobuf.h"
 
@@ -42,8 +43,9 @@ class CreateSummaryFileWriterOp : public OpKernel {
     OP_REQUIRES_OK(ctx, ctx->input("filename_suffix", &tmp));
     const string filename_suffix = tmp->scalar<string>()();
     SummaryWriterInterface* s;
-    OP_REQUIRES_OK(ctx, CreateSummaryWriter(max_queue, flush_millis, logdir,
-                                            filename_suffix, ctx->env(), &s));
+    OP_REQUIRES_OK(ctx,
+                   CreateSummaryFileWriter(max_queue, flush_millis, logdir,
+                                           filename_suffix, ctx->env(), &s));
     OP_REQUIRES_OK(ctx, CreateResource(ctx, HandleFromInput(ctx, 0), s));
   }
 };
@@ -65,10 +67,14 @@ class CreateSummaryDbWriterOp : public OpKernel {
     OP_REQUIRES_OK(ctx, ctx->input("user_name", &tmp));
     const string user_name = tmp->scalar<string>()();
     SummaryWriterInterface* s;
-    auto db = Sqlite::Open(db_uri);
-    OP_REQUIRES_OK(ctx, db.status());
+    Sqlite* db;
+    OP_REQUIRES_OK(ctx, Sqlite::Open(db_uri,
+                                     SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE,
+                                     &db));
+    core::ScopedUnref unref(db);
+    OP_REQUIRES_OK(ctx, SetupTensorboardSqliteDb(db));
     OP_REQUIRES_OK(
-        ctx, CreateSummaryDbWriter(std::move(db.ValueOrDie()), experiment_name,
+        ctx, CreateSummaryDbWriter(db, experiment_name,
                                    run_name, user_name, ctx->env(), &s));
     OP_REQUIRES_OK(ctx, CreateResource(ctx, HandleFromInput(ctx, 0), s));
   }
diff --git a/tensorflow/core/kernels/tensor_array_ops.cc b/tensorflow/core/kernels/tensor_array_ops.cc
index cca6d0e35f2ee11d2a97f68581dd6f8dc87d929d..af93d814ec06ff86c6c7eb3312d97224dee485f2 100644
--- a/tensorflow/core/kernels/tensor_array_ops.cc
+++ b/tensorflow/core/kernels/tensor_array_ops.cc
@@ -336,8 +336,7 @@ class TensorArrayGradOp : public TensorArrayCreationOp {
           tensor_array->HasIdenticalElementShapes(), false /* dynamic_size */,
           true /* multiple_writes_aggregate */, true /* is_grad */,
           marked_size /* marked_size */, true /* close_after_read */);
-      TF_RETURN_IF_ERROR((*ret)->CopyShapesFrom(tensor_array));
-      return Status::OK();
+      return (*ret)->CopyShapesFrom(tensor_array);
     };
 
     Status s = rm->LookupOrCreate<TensorArray>(
@@ -709,7 +708,6 @@ TF_CALL_POD_STRING_TYPES(REGISTER_GATHER_AND_PACK);
 REGISTER_GATHER_AND_PACK(quint8);
 REGISTER_GATHER_AND_PACK(qint8);
 REGISTER_GATHER_AND_PACK(qint32);
-REGISTER_GATHER_AND_PACK(bfloat16);
 
 #undef REGISTER_GATHER_AND_PACK
 
@@ -940,7 +938,6 @@ TF_CALL_POD_STRING_TYPES(REGISTER_CONCAT);
 REGISTER_CONCAT(quint8);
 REGISTER_CONCAT(qint8);
 REGISTER_CONCAT(qint32);
-REGISTER_CONCAT(bfloat16);
 
 #undef REGISTER_CONCAT
 
diff --git a/tensorflow/core/kernels/topk_op.cc b/tensorflow/core/kernels/topk_op.cc
index 7648536c43d5a14effde0e530711de4dbee430e3..7fdce6cb7190ffa5f799853e27d18b9e33f2971a 100644
--- a/tensorflow/core/kernels/topk_op.cc
+++ b/tensorflow/core/kernels/topk_op.cc
@@ -64,7 +64,9 @@ class TopK : public OpKernel {
                 errors::InvalidArgument("input must be >= 1-D, got shape ",
                                         input_in.shape().DebugString()));
     OP_REQUIRES(context, input_in.dim_size(input_in.dims() - 1) >= k,
-                errors::InvalidArgument("input must have at least k columns"));
+                errors::InvalidArgument(
+                    "input must have at least k columns. Had ",
+                    input_in.dim_size(input_in.dims() - 1), ", needed ", k));
 
     const auto& input = input_in.flat_inner_dims<T>();
 
diff --git a/tensorflow/core/kernels/training_ops.cc b/tensorflow/core/kernels/training_ops.cc
index b8d601389ba18b9d37d1adb23c3d7dec9614346f..38e77ab60fb7126bcdedc09bfe9e2ec7de88c0ad 100644
--- a/tensorflow/core/kernels/training_ops.cc
+++ b/tensorflow/core/kernels/training_ops.cc
@@ -536,8 +536,9 @@ class ApplyAdadeltaOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override {
-    if (use_exclusive_lock_) {
-      mutex_lock l1(*GetTrainingVariableMutex(ctx, 0));
+    mutex* mu = GetTrainingVariableMutex(ctx, 0);
+    if (use_exclusive_lock_ && mu != nullptr) {
+      mutex_lock l1(*mu);
       // Don't try to acquire a lock on the second ref as they share the same
       // mutex.
       //
@@ -682,15 +683,21 @@ class SparseApplyAdadeltaOp : public OpKernel {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_));
   }
 
-  void Compute(OpKernelContext* ctx) override NO_THREAD_SAFETY_ANALYSIS {
-    mutex* mu_var = GetTrainingVariableMutex(ctx, 0);
+  void Compute(OpKernelContext* ctx) override {
+    mutex* mu = GetTrainingVariableMutex(ctx, 0);
     // mu_accum is actually the same mutex as mu_var since currently we use a
     // global mutex.
     //
     // mutex* mu_accum = ctx->input_ref_mutex(1);
-    if (use_exclusive_lock_) {
-      mu_var->lock();
+    if (use_exclusive_lock_ && mu != nullptr) {
+      mutex_lock ml(*mu);
+      DoCompute(ctx);
+    } else {
+      DoCompute(ctx);
     }
+  }
+
+  void DoCompute(OpKernelContext* ctx) {
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<CPUDevice, T>(
                             ctx, 0, use_exclusive_lock_, true, &var));
@@ -791,9 +798,6 @@ class SparseApplyAdadeltaOp : public OpKernel {
             update.square() * update.constant(static_cast<T>(1) - rho_scalar);
       }
     }
-    if (use_exclusive_lock_) {
-      mu_var->unlock();
-    }
 
     MaybeForwardRefInputToRefOutput(ctx, 0, 0);
   }
diff --git a/tensorflow/core/kernels/transpose_op.cc b/tensorflow/core/kernels/transpose_op.cc
index 96c051c636e54b671fec259d38218dcf7cc0837c..2e0d18b634a8aebeaf2b7a0118ea8a9367804086 100644
--- a/tensorflow/core/kernels/transpose_op.cc
+++ b/tensorflow/core/kernels/transpose_op.cc
@@ -230,7 +230,6 @@ Status ConjugateTransposeCpuOp::DoTranspose(OpKernelContext* ctx,
                               .HostMemory("perm"),    \
                           MklConjugateTransposeCpuOp);
 TF_CALL_ALL_TYPES(REGISTER);
-REGISTER(bfloat16);
 #undef REGISTER
 
 #else  // INTEL_MKL
@@ -247,7 +246,6 @@ REGISTER(bfloat16);
                               .HostMemory("perm"),    \
                           ConjugateTransposeCpuOp);
 TF_CALL_ALL_TYPES(REGISTER)
-REGISTER(bfloat16);
 #undef REGISTER
 #endif  // INTEL_MKL
 
diff --git a/tensorflow/core/kernels/unique_op.cc b/tensorflow/core/kernels/unique_op.cc
index d087784c8a0bd2a53438af4582754b2d47620545..0ef8724b10e492373c7663a58420bfe236be7df7 100644
--- a/tensorflow/core/kernels/unique_op.cc
+++ b/tensorflow/core/kernels/unique_op.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/kernels/bounds_check.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/hash/hash.h"
 
@@ -63,8 +64,17 @@ class UniqueOp : public OpKernel {
         OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()),
                     errors::InvalidArgument("unique expects a 1D vector."));
       } else {
-        auto axis_vec = axis_tensor.vec<int64>();
-        axis = axis_vec(0);
+        OP_REQUIRES(context,
+                    (axis_tensor.dtype() == DT_INT32 ||
+                     axis_tensor.dtype() == DT_INT64),
+                    errors::InvalidArgument(
+                        "axis tensor should be int32 or int64, but got ",
+                        axis_tensor.dtype()));
+        if (axis_tensor.dtype() == DT_INT32) {
+          axis = internal::SubtleMustCopy(axis_tensor.scalar<int32>()());
+        } else {
+          axis = internal::SubtleMustCopy(axis_tensor.scalar<int64>()());
+        }
         axis = axis < 0 ? axis + input.dims() : axis;
         OP_REQUIRES(context, 0 <= axis && axis < input.dims(),
                     errors::InvalidArgument("axis has to be between [0, ",
@@ -83,69 +93,100 @@ class UniqueOp : public OpKernel {
       }
     }
 
-    auto Tin = input.shaped<T, 3>(new_sizes);
-
     Tensor* idx = nullptr;
     OP_REQUIRES_OK(context, context->allocate_output(
-                                1, TensorShape({Tin.dimension(1)}), &idx));
+                                1, TensorShape({new_sizes[1]}), &idx));
     auto idx_vec = idx->template vec<TIndex>();
 
-    auto hash_fn = [&Tin](const int64& key) -> unsigned long {
-      size_t h = 0;
-      for (int64 i = 0; i < Tin.dimension(0); i++) {
-        for (int64 j = 0; j < Tin.dimension(2); j++) {
-          h = Hash64Combine(h, hash<T>{}(Tin(i, key, j)));
+    int64 uniq_size;
+    if (new_sizes[0] == 1 && new_sizes[2] == 1) {
+      // Specialized and faster implementation when unique is run over single
+      // elements. Here we put T directly into the map rather than ints pointing
+      // to them as in the general case.
+      auto Tin = input.flat<T>();
+      const int64 N = static_cast<int64>(Tin.size());
+
+      std::unordered_map<T, TIndex> uniq;
+      uniq.reserve(2 * N);
+      for (int64 i = 0, j = 0; i < N; ++i) {
+        auto it = uniq.insert(std::make_pair(Tin(i), j));
+        idx_vec(i) = it.first->second;
+        if (it.second) {
+          ++j;
         }
       }
-      return h;
-    };
 
-    auto equal_to_fn = [&Tin](const int64& lhs, const int64& rhs) {
-      for (int64 i = 0; i < Tin.dimension(0); i++) {
-        for (int64 j = 0; j < Tin.dimension(2); j++) {
-          if (Tin(i, lhs, j) != Tin(i, rhs, j)) {
-            return false;
+      uniq_size = static_cast<int64>(uniq.size());
+      TensorShape output_shape(input.shape());
+      output_shape.set_dim(axis, uniq_size);
+      Tensor* output = nullptr;
+      OP_REQUIRES_OK(context,
+                     context->allocate_output(0, output_shape, &output));
+      auto Tout = output->flat<T>();
+
+      for (auto it : uniq) {
+        Tout(it.second) = it.first;
+      }
+    } else {
+      // General implementation when unique is run over multiple elements.
+      auto Tin = input.shaped<T, 3>(new_sizes);
+
+      auto hash_fn = [&Tin](const int64& key) {
+        size_t h = 0;
+        for (int64 i = 0; i < Tin.dimension(0); i++) {
+          for (int64 j = 0; j < Tin.dimension(2); j++) {
+            h = Hash64Combine(h, hash<T>{}(Tin(i, key, j)));
           }
         }
-      }
-      return true;
-    };
+        return h;
+      };
+
+      auto equal_to_fn = [&Tin](const int64& lhs, const int64& rhs) {
+        for (int64 i = 0; i < Tin.dimension(0); i++) {
+          for (int64 j = 0; j < Tin.dimension(2); j++) {
+            if (Tin(i, lhs, j) != Tin(i, rhs, j)) {
+              return false;
+            }
+          }
+        }
+        return true;
+      };
 
-    std::unordered_map<int64, int64, decltype(hash_fn), decltype(equal_to_fn)>
-        uniq(0, hash_fn, equal_to_fn);
+      std::unordered_map<int64, int64, decltype(hash_fn), decltype(equal_to_fn)>
+          uniq(0, hash_fn, equal_to_fn);
 
-    uniq.reserve(2 * Tin.dimension(1));
+      uniq.reserve(2 * Tin.dimension(1));
 
-    for (int64 i = 0, j = 0; i < Tin.dimension(1); ++i) {
-      auto it = uniq.insert(std::make_pair(i, j));
-      idx_vec(i) = it.first->second;
-      if (it.second) {
-        ++j;
+      for (int64 i = 0, j = 0; i < Tin.dimension(1); ++i) {
+        auto it = uniq.insert(std::make_pair(i, j));
+        idx_vec(i) = it.first->second;
+        if (it.second) {
+          ++j;
+        }
       }
-    }
 
-    int64 uniq_size = static_cast<int64>(uniq.size());
-    new_sizes[1] = uniq_size;
-    TensorShape output_shape(input.shape());
-    output_shape.set_dim(axis, uniq_size);
-    Tensor* output = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output));
-    auto Tout = output->shaped<T, 3>(new_sizes);
+      uniq_size = static_cast<int64>(uniq.size());
+      new_sizes[1] = uniq_size;
+      TensorShape output_shape(input.shape());
+      output_shape.set_dim(axis, uniq_size);
+      Tensor* output = nullptr;
+      OP_REQUIRES_OK(context,
+                     context->allocate_output(0, output_shape, &output));
+      auto Tout = output->shaped<T, 3>(new_sizes);
 
-    for (auto it : uniq) {
-      for (int64 i = 0; i < Tin.dimension(0); i++) {
-        for (int64 j = 0; j < Tin.dimension(2); j++) {
-          Tout(i, it.second, j) = Tin(i, it.first, j);
-        }
+      for (auto it : uniq) {
+        Tout.chip(it.second, 1) = Tin.chip(it.first, 1);
       }
     }
 
     if (num_outputs() > 2) {
+      Tensor* output = nullptr;
       OP_REQUIRES_OK(context, context->allocate_output(
                                   2, TensorShape({uniq_size}), &output));
       auto count_output_vec = output->template vec<TIndex>();
       count_output_vec.setZero();
-      for (int64 i = 0; i < Tin.dimension(1); ++i) {
+      const int N = idx_vec.size();
+      for (int64 i = 0; i < N; ++i) {
         count_output_vec(idx_vec(i))++;
       }
     }
diff --git a/tensorflow/core/kernels/unpack_op.cc b/tensorflow/core/kernels/unpack_op.cc
index 7fd1def1fe02e8418882bc4cb19c4318779c5282..397bdd56708d766d06e5a68f3b049a5b928195e1 100644
--- a/tensorflow/core/kernels/unpack_op.cc
+++ b/tensorflow/core/kernels/unpack_op.cc
@@ -142,6 +142,7 @@ TF_CALL_ALL_TYPES(REGISTER_UNPACK);
       UnpackOp<GPUDevice, type>)
 
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU);
+TF_CALL_bfloat16(REGISTER_GPU);
 #undef REGISTER_GPU
 
 // A special GPU kernel for int32.
@@ -153,6 +154,12 @@ REGISTER_KERNEL_BUILDER(Name("Unpack")
                             .HostMemory("output")
                             .TypeConstraint<int32>("T"),
                         UnpackOp<CPUDevice, int32>);
+REGISTER_KERNEL_BUILDER(Name("Unpack")
+                            .Device(DEVICE_GPU)
+                            .HostMemory("value")
+                            .HostMemory("output")
+                            .TypeConstraint<int64>("T"),
+                        UnpackOp<CPUDevice, int64>);
 
 #endif  // GOOGLE_CUDA
 
@@ -170,6 +177,13 @@ REGISTER_KERNEL_BUILDER(Name("Unpack")
                             .HostMemory("output")
                             .TypeConstraint<int32>("T"),
                         UnpackOp<CPUDevice, int32>);
+
+REGISTER_KERNEL_BUILDER(Name("Unpack")
+                            .Device(DEVICE_SYCL)
+                            .HostMemory("value")
+                            .HostMemory("output")
+                            .TypeConstraint<int64>("T"),
+                        UnpackOp<CPUDevice, int64>);
 #undef REGISTER_SYCL
 #endif  // TENSORFLOW_USE_SYCL
 
diff --git a/tensorflow/core/kernels/variable_ops.cc b/tensorflow/core/kernels/variable_ops.cc
index 36b8ff09d7381a0b8bbb8b6f8d71b14e47fa4663..10ccc85b7cd63db7f8d329a4253784abed7174cf 100644
--- a/tensorflow/core/kernels/variable_ops.cc
+++ b/tensorflow/core/kernels/variable_ops.cc
@@ -23,6 +23,160 @@ limitations under the License.
 
 namespace tensorflow {
 
+// Resource stored by variables in the resource manager
+// (legacy, ref-style version).
+class LegacyVar : public ResourceBase {
+ public:
+  explicit LegacyVar(DataType dtype) : tensor_(dtype) {}
+  // Not copyable or movable.
+  LegacyVar(const LegacyVar&) = delete;
+  LegacyVar& operator=(const LegacyVar&) = delete;
+
+  mutex* mu() { return &mu_; }
+  Tensor* tensor() { return &tensor_; }
+
+  string DebugString() override {
+    return strings::StrCat(DataTypeString(tensor_.dtype()), "/",
+                           tensor_.shape().DebugString());
+  }
+
+ private:
+  mutex mu_;
+  Tensor tensor_;
+
+  ~LegacyVar() override {}
+};
+
+VariableOp::VariableOp(OpKernelConstruction* context) : OpKernel(context) {
+  OP_REQUIRES_OK(context, context->GetAttr("shape", &shape_));
+  dtype_ = RemoveRefType(context->output_type(0));
+}
+
+void VariableOp::Compute(OpKernelContext* ctx) {
+  mutex_lock l(init_mu_);
+  if (!initialized_) {
+    OP_REQUIRES_OK(ctx, cinfo_.Init(ctx->resource_manager(), def(),
+                                    true /* use name() */));
+    initialized_ = true;
+  }
+  auto creator = [this](LegacyVar** var) {
+    *var = new LegacyVar(dtype_);
+    (*var)->tensor()->set_shape(shape_);
+    return Status::OK();
+  };
+  LegacyVar* var;
+  OP_REQUIRES_OK(ctx, cinfo_.resource_manager()->LookupOrCreate<LegacyVar>(
+                          cinfo_.container(), cinfo_.name(), &var, creator));
+  // Output a reference to our tensor, so it may be updated.
+  //
+  // As long as the resource manager hasn't been cleared the ref we return
+  // here is valid because it owns a ref on var.
+  ctx->set_output_ref(0, var->mu(), var->tensor());
+  if (ctx->track_allocations() && var->tensor()->IsInitialized()) {
+    AllocatorAttributes attr;
+    attr.set_gpu_compatible(true);
+    attr.set_nic_compatible(true);
+    ctx->record_persistent_memory_allocation(var->tensor()->AllocatedBytes());
+  }
+  var->Unref();
+}
+
+class TemporaryVariableOp : public OpKernel {
+ public:
+  explicit TemporaryVariableOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("shape", &shape_));
+    OP_REQUIRES_OK(context, context->GetAttr("dtype", &dtype_));
+    OP_REQUIRES_OK(context, context->GetAttr("var_name", &var_name_));
+    // Variable name defaults to op name if not specified explicitly.
+    if (var_name_.empty()) var_name_ = name();
+  }
+
+  void Compute(OpKernelContext* context) override {
+    Status s;
+    ResourceMgr* rm = context->resource_manager();
+    OP_REQUIRES(context, rm, errors::Internal("No per-step resource manager."));
+    auto* tmp_var = new TmpVar;
+    OP_REQUIRES(context, tmp_var,
+                errors::ResourceExhausted("Could not allocate TmpVar."));
+    tmp_var->name = var_name_;
+    s = context->allocate_temp(dtype_, shape_, &tmp_var->val);
+    if (!s.ok()) tmp_var->Unref();
+    OP_REQUIRES_OK(context, s);
+    OP_REQUIRES_OK(context, rm->Create(context->step_container()->name(),
+                                       var_name_, tmp_var));
+    context->set_output_ref(0, &tmp_var->mu, &tmp_var->val);
+    if (context->track_allocations()) {
+      context->record_persistent_memory_allocation(
+          tmp_var->val.AllocatedBytes());
+    }
+  }
+
+ private:
+  // Refcounted temporary variable resource.
+  friend class DestroyTemporaryVariableOp;
+  struct TmpVar : public ResourceBase {
+    mutex mu;
+    Tensor val;
+    string name;
+    string DebugString() override { return name; }
+    ~TmpVar() override { VLOG(3) << "TmpVar " << name << " deleted"; }
+  };
+
+  TensorShape shape_;
+  DataType dtype_;
+  string var_name_;
+};
+
+class DestroyTemporaryVariableOp : public OpKernel {
+ public:
+  explicit DestroyTemporaryVariableOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES(context, IsRefType(context->input_type(0)),
+                errors::InvalidArgument("lhs input needs to be a ref type"));
+    OP_REQUIRES_OK(context, context->GetAttr("var_name", &var_name_));
+    OP_REQUIRES(context, !var_name_.empty(),
+                errors::InvalidArgument("Missing var_name attribute"));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    // NOTE(pbar): All other mutators of the Tensor Ref *must* have completed
+    // their execution before this DestroyTemporaryVariable op executes.
+    // This is typically achieved using control dependencies.
+    CHECK(IsRefType(context->input_dtype(0)));
+    Tensor tmpvar = context->mutable_input(0, false);
+    context->set_output(0, tmpvar);
+    ResourceMgr* rm = context->resource_manager();
+    OP_REQUIRES(context, rm, errors::Internal("No per-step resource manager."));
+    OP_REQUIRES_OK(context, rm->Delete<TemporaryVariableOp::TmpVar>(
+                                context->step_container()->name(), var_name_));
+    if (context->track_allocations()) {
+      context->record_persistent_memory_allocation(
+          -static_cast<int64>(tmpvar.AllocatedBytes()));
+    }
+  }
+
+ private:
+  string var_name_;
+};
+
+class IsVariableInitializedOp : public OpKernel {
+ public:
+  explicit IsVariableInitializedOp(OpKernelConstruction* context)
+      : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    // Get a mutable input tensor of the Ref input.
+    const Tensor& input_tensor = context->mutable_input(0, false);
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, TensorShape({}), &output));
+    auto output_tensor = output->tensor<bool, 0>();
+    bool result = input_tensor.IsInitialized();
+    output_tensor() = result;
+  }
+};
+
 REGISTER_KERNEL_BUILDER(Name("Variable").Device(DEVICE_CPU), VariableOp);
 REGISTER_KERNEL_BUILDER(Name("VariableV2").Device(DEVICE_CPU), VariableOp);
 REGISTER_KERNEL_BUILDER(Name("TemporaryVariable").Device(DEVICE_CPU),
@@ -33,30 +187,30 @@ REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized").Device(DEVICE_CPU),
                         IsVariableInitializedOp);
 
 #ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(type)                                         \
-  REGISTER_KERNEL_BUILDER(                                                 \
-      Name("Variable").Device(DEVICE_SYCL).TypeConstraint<type>("dtype"),  \
-      VariableOp);                                                         \
-  REGISTER_KERNEL_BUILDER(                                                 \
-      Name("VariableV2").Device(DEVICE_SYCL).TypeConstraint<type>("dtype"),\
-      VariableOp);                                                         \
-  REGISTER_KERNEL_BUILDER(Name("TemporaryVariable")                        \
-                              .Device(DEVICE_SYCL)                         \
-                              .TypeConstraint<type>("dtype"),              \
-                          TemporaryVariableOp);                            \
-  REGISTER_KERNEL_BUILDER(Name("DestroyTemporaryVariable")                 \
-                              .Device(DEVICE_SYCL)                         \
-                              .TypeConstraint<type>("T"),                  \
-                          DestroyTemporaryVariableOp);                     \
-  REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized")                    \
-                              .Device(DEVICE_SYCL)                         \
-                              .TypeConstraint<type>("dtype")               \
-                              .HostMemory("is_initialized"),               \
+#define REGISTER_SYCL_KERNEL(type)                                          \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("Variable").Device(DEVICE_SYCL).TypeConstraint<type>("dtype"),   \
+      VariableOp);                                                          \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("VariableV2").Device(DEVICE_SYCL).TypeConstraint<type>("dtype"), \
+      VariableOp);                                                          \
+  REGISTER_KERNEL_BUILDER(Name("TemporaryVariable")                         \
+                              .Device(DEVICE_SYCL)                          \
+                              .TypeConstraint<type>("dtype"),               \
+                          TemporaryVariableOp);                             \
+  REGISTER_KERNEL_BUILDER(Name("DestroyTemporaryVariable")                  \
+                              .Device(DEVICE_SYCL)                          \
+                              .TypeConstraint<type>("T"),                   \
+                          DestroyTemporaryVariableOp);                      \
+  REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized")                     \
+                              .Device(DEVICE_SYCL)                          \
+                              .TypeConstraint<type>("dtype")                \
+                              .HostMemory("is_initialized"),                \
                           IsVariableInitializedOp);
 
 TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNEL);
 #undef REGISTER_SYCL_KERNEL
-#endif // TENSORFLOW_USE_SYCL
+#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
 // Only register 'Variable' on GPU for the subset of types also supported by
diff --git a/tensorflow/core/kernels/variable_ops.h b/tensorflow/core/kernels/variable_ops.h
index 355140d44c5c53c8496d5bd2b3028e9ae9b3940b..83134bad378bfef18c3e93be5cc3c6b70ab4f523 100644
--- a/tensorflow/core/kernels/variable_ops.h
+++ b/tensorflow/core/kernels/variable_ops.h
@@ -27,10 +27,16 @@ limitations under the License.
 
 namespace tensorflow {
 
-// Resource stored by variables in the resource manager.
+// Resource stored by variables in the resource manager
+// (new, resource-style version).
 class Var : public ResourceBase {
  public:
   explicit Var(DataType dtype) : tensor_(dtype) {}
+  // Not copyable or movable.
+  Var(const Var&) = delete;
+  Var& operator=(const Var&) = delete;
+
+  // TODO(ebrevdo): Use LockSet instead of exposing mu.
   mutex* mu() { return &mu_; }
   Tensor* tensor() { return &tensor_; }
 
@@ -44,52 +50,12 @@ class Var : public ResourceBase {
   Tensor tensor_;
 
   ~Var() override {}
-  TF_DISALLOW_COPY_AND_ASSIGN(Var);
 };
 
 class VariableOp : public OpKernel {
  public:
-  explicit VariableOp(OpKernelConstruction* context) : OpKernel(context) {
-    OP_REQUIRES_OK(context, context->GetAttr("shape", &shape_));
-    dtype_ = RemoveRefType(context->output_type(0));
-  }
-
-  void Compute(OpKernelContext* ctx) override {
-    mutex_lock l(init_mu_);
-    if (!initialized_) {
-      OP_REQUIRES_OK(
-          ctx,
-          cinfo_.Init(ctx->resource_manager(), def(), true /* use name() */));
-      initialized_ = true;
-    }
-    auto creator = [this](Var** var) {
-      *var = new Var(dtype_);
-      (*var)->tensor()->set_shape(shape_);
-      return Status::OK();
-    };
-    Var* var;
-    OP_REQUIRES_OK(ctx,
-                   cinfo_.resource_manager()->LookupOrCreate<Var>(
-                       cinfo_.container(), cinfo_.name(), &var, creator));
-    // Output a reference to our tensor, so it may be updated.
-    //
-    // As long as the resource manager hasn't been cleared the ref we return
-    // here is valid because it owns a ref on var.
-    ctx->set_output_ref(0, var->mu(), var->tensor());
-    if (ctx->track_allocations() && var->tensor()->IsInitialized()) {
-      AllocatorAttributes attr;
-      attr.set_gpu_compatible(true);
-      attr.set_nic_compatible(true);
-      if (ctx->allocate_on_host(attr)) {
-        ctx->record_host_persistent_memory_allocation(
-            var->tensor()->AllocatedBytes());
-      } else {
-        ctx->record_device_persistent_memory_allocation(
-            var->tensor()->AllocatedBytes());
-      }
-    }
-    var->Unref();
-  }
+  explicit VariableOp(OpKernelConstruction* context);
+  void Compute(OpKernelContext* ctx) override;
 
  private:
   DataType dtype_;
@@ -102,112 +68,6 @@ class VariableOp : public OpKernel {
   TF_DISALLOW_COPY_AND_ASSIGN(VariableOp);
 };
 
-class TemporaryVariableOp : public OpKernel {
- public:
-  explicit TemporaryVariableOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    OP_REQUIRES_OK(context, context->GetAttr("shape", &shape_));
-    OP_REQUIRES_OK(context, context->GetAttr("dtype", &dtype_));
-    OP_REQUIRES_OK(context, context->GetAttr("var_name", &var_name_));
-    // Variable name defaults to op name if not specified explicitly.
-    if (var_name_ == "") var_name_ = name();
-  }
-
-  void Compute(OpKernelContext* context) override {
-    Status s;
-    ResourceMgr* rm = context->resource_manager();
-    OP_REQUIRES(context, rm, errors::Internal("No per-step resource manager."));
-    auto* tmp_var = new TmpVar;
-    OP_REQUIRES(context, tmp_var,
-                errors::ResourceExhausted("Could not allocate TmpVar."));
-    tmp_var->name = var_name_;
-    s = context->allocate_temp(dtype_, shape_, &tmp_var->val);
-    if (!s.ok()) tmp_var->Unref();
-    OP_REQUIRES_OK(context, s);
-    OP_REQUIRES_OK(context, rm->Create(context->step_container()->name(),
-                                       var_name_, tmp_var));
-    context->set_output_ref(0, &tmp_var->mu, &tmp_var->val);
-    if (context->track_allocations()) {
-      AllocatorAttributes attr;
-      if (context->allocate_on_host(attr)) {
-        context->record_host_persistent_memory_allocation(
-            tmp_var->val.AllocatedBytes());
-      } else {
-        context->record_device_persistent_memory_allocation(
-            tmp_var->val.AllocatedBytes());
-      }
-    }
-  }
-
- private:
-  // Refcounted temporary variable resource.
-  friend class DestroyTemporaryVariableOp;
-  struct TmpVar : public ResourceBase {
-    mutex mu;
-    Tensor val;
-    string name;
-    string DebugString() override { return name; }
-    ~TmpVar() override { VLOG(3) << "TmpVar " << name << " deleted"; }
-  };
-
-  TensorShape shape_;
-  DataType dtype_;
-  string var_name_;
-};
-
-class DestroyTemporaryVariableOp : public OpKernel {
- public:
-  explicit DestroyTemporaryVariableOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    OP_REQUIRES(context, IsRefType(context->input_type(0)),
-                errors::InvalidArgument("lhs input needs to be a ref type"))
-    OP_REQUIRES_OK(context, context->GetAttr("var_name", &var_name_));
-    OP_REQUIRES(context, var_name_ != "",
-                errors::InvalidArgument("Missing var_name attribute"));
-  }
-
-  void Compute(OpKernelContext* context) override {
-    // NOTE(pbar): All other mutators of the Tensor Ref *must* have completed
-    // their execution before this DestroyTemporaryVariable op executes.
-    // This is typically achieved using control dependencies.
-    CHECK(IsRefType(context->input_dtype(0)));
-    Tensor tmpvar = context->mutable_input(0, false);
-    context->set_output(0, tmpvar);
-    ResourceMgr* rm = context->resource_manager();
-    OP_REQUIRES(context, rm, errors::Internal("No per-step resource manager."));
-    OP_REQUIRES_OK(context, rm->Delete<TemporaryVariableOp::TmpVar>(
-                                context->step_container()->name(), var_name_));
-    if (context->track_allocations()) {
-      if (context->allocate_on_host(AllocatorAttributes())) {
-        context->record_host_persistent_memory_allocation(
-            -static_cast<int64>(tmpvar.AllocatedBytes()));
-      } else {
-        context->record_device_persistent_memory_allocation(
-            -static_cast<int64>(tmpvar.AllocatedBytes()));
-      }
-    }
-  }
-
- private:
-  string var_name_;
-};
-
-class IsVariableInitializedOp : public OpKernel {
- public:
-  IsVariableInitializedOp(OpKernelConstruction* context) : OpKernel(context) {}
-
-  void Compute(OpKernelContext* context) override {
-    // Get a mutable input tensor of the Ref input.
-    const Tensor& input_tensor = context->mutable_input(0, false);
-    Tensor* output = nullptr;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(0, TensorShape({}), &output));
-    auto output_tensor = output->tensor<bool, 0>();
-    bool result = input_tensor.IsInitialized();
-    output_tensor() = result;
-  }
-};
-
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_KERNELS_VARIABLE_OPS_H_
diff --git a/tensorflow/core/kernels/where_op.cc b/tensorflow/core/kernels/where_op.cc
index 42d1365e64592c6609c6daf83678f7dbd056a23f..f92c4ed17af501eaf79523bc6977e614b8168720 100644
--- a/tensorflow/core/kernels/where_op.cc
+++ b/tensorflow/core/kernels/where_op.cc
@@ -55,14 +55,14 @@ namespace functor {
 namespace {
 template <typename T>
 int64 CountAccumulator(const T* begin, const T* end) {
-  return std::accumulate(begin, end, 0L, [](int64 accum, const T& val) {
+  return std::accumulate(begin, end, 0LL, [](int64 accum, const T& val) {
     return accum + (val != T(0));
   });
 }
 
 template <>
 int64 CountAccumulator<bool>(const bool* begin, const bool* end) {
-  return std::accumulate(begin, end, 0L);
+  return std::accumulate(begin, end, 0LL);
 }
 
 }  // namespace
@@ -131,7 +131,7 @@ class WhereCPUOp : public OpKernel {
     OP_REQUIRES(
         context, input.dtype() != DT_HALF,
         errors::Unimplemented("No WhereOp available for float16/half type on "
-                              "GPU; dying in CPU WhereOp to avoid silently "
+                              "CPU; dying in CPU WhereOp to avoid silently "
                               "creating costly copies from device."));
 
     const int input_dims = input.dims();
diff --git a/tensorflow/core/lib/bfloat16/bfloat16.cc b/tensorflow/core/lib/bfloat16/bfloat16.cc
new file mode 100644
index 0000000000000000000000000000000000000000..a591717fd1abfc3d959d219d9ce2bde1272fd8ea
--- /dev/null
+++ b/tensorflow/core/lib/bfloat16/bfloat16.cc
@@ -0,0 +1,25 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/lib/bfloat16/bfloat16.h"
+
+#include "third_party/eigen3/Eigen/Core"
+
+namespace tensorflow {
+
+B16_DEVICE_FUNC bfloat16::operator Eigen::half() const {
+  return static_cast<Eigen::half>(float(*this));
+}
+}  // end namespace tensorflow
diff --git a/tensorflow/core/lib/bfloat16/bfloat16.h b/tensorflow/core/lib/bfloat16/bfloat16.h
new file mode 100644
index 0000000000000000000000000000000000000000..f9cca0ef2ab90c677e47d979a4636b3fc25ec919
--- /dev/null
+++ b/tensorflow/core/lib/bfloat16/bfloat16.h
@@ -0,0 +1,276 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_LIB_BFLOAT16_BFLOAT16_H_
+#define TENSORFLOW_CORE_LIB_BFLOAT16_BFLOAT16_H_
+
+#include <complex>
+
+#ifdef __CUDACC__
+// All functions callable from CUDA code must be qualified with __device__
+#define B16_DEVICE_FUNC __host__ __device__
+
+#else
+#define B16_DEVICE_FUNC
+
+#endif
+
+namespace Eigen {
+struct half;
+}
+
+namespace tensorflow {
+
+// Single precision complex.
+typedef std::complex<float> complex64;
+// Double precision complex.
+typedef std::complex<double> complex128;
+
+// see framework/bfloat16.h for description.
+struct bfloat16 {
+  B16_DEVICE_FUNC bfloat16() {}
+
+  B16_DEVICE_FUNC explicit bfloat16(const float v) {
+    if (float_isnan(v)) {
+      value = NAN_VALUE;
+      return;
+    }
+    const uint16_t* p = reinterpret_cast<const uint16_t*>(&v);
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+    value = p[0];
+#else
+    value = p[1];
+#endif
+  }
+
+  B16_DEVICE_FUNC explicit bfloat16(const double val)
+      : bfloat16(static_cast<float>(val)) {}
+  // Following the convention of numpy, converting between complex and
+  // float will lead to loss of imag value.
+  B16_DEVICE_FUNC explicit bfloat16(const complex64& val)
+      : bfloat16(val.real()) {}
+
+  B16_DEVICE_FUNC explicit bfloat16(const complex128& val)
+      : bfloat16(static_cast<float>(val.real())) {}
+
+  B16_DEVICE_FUNC explicit bfloat16(const unsigned short val)
+      : bfloat16(static_cast<float>(val)) {}
+
+  B16_DEVICE_FUNC explicit bfloat16(const unsigned int val)
+      : bfloat16(static_cast<float>(val)) {}
+
+  B16_DEVICE_FUNC explicit bfloat16(const int val)
+      : bfloat16(static_cast<float>(val)) {}
+
+  B16_DEVICE_FUNC explicit bfloat16(const long val)
+      : bfloat16(static_cast<float>(val)) {}
+
+  B16_DEVICE_FUNC explicit bfloat16(const long long val)
+      : bfloat16(static_cast<float>(val)) {}
+
+  template <class T>
+  B16_DEVICE_FUNC explicit bfloat16(const T& val)
+      : bfloat16(static_cast<float>(val)) {}
+
+  B16_DEVICE_FUNC explicit operator float() const {
+    float result;
+
+    uint16_t* q = reinterpret_cast<uint16_t*>(&result);
+
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+    q[0] = value;
+    q[1] = 0;
+#else
+    q[0] = 0;
+    q[1] = value;
+#endif
+    return result;
+  }
+
+  B16_DEVICE_FUNC explicit operator bool() const {
+    return static_cast<bool>(float(*this));
+  }
+
+  B16_DEVICE_FUNC explicit operator Eigen::half() const;
+
+  B16_DEVICE_FUNC explicit operator short() const {
+    return static_cast<short>(float(*this));
+  }
+
+  B16_DEVICE_FUNC explicit operator int() const {
+    return static_cast<int>(float(*this));
+  }
+
+  B16_DEVICE_FUNC explicit operator long() const {
+    return static_cast<long>(float(*this));
+  }
+
+  B16_DEVICE_FUNC explicit operator char() const {
+    return static_cast<char>(float(*this));
+  }
+
+  B16_DEVICE_FUNC explicit operator signed char() const {
+    return static_cast<signed char>(float(*this));
+  }
+
+  B16_DEVICE_FUNC explicit operator unsigned char() const {
+    return static_cast<unsigned char>(float(*this));
+  }
+
+  B16_DEVICE_FUNC explicit operator unsigned short() const {
+    return static_cast<unsigned short>(float(*this));
+  }
+
+  B16_DEVICE_FUNC explicit operator unsigned int() const {
+    return static_cast<unsigned int>(float(*this));
+  }
+
+  B16_DEVICE_FUNC explicit operator unsigned long() const {
+    return static_cast<unsigned long>(float(*this));
+  }
+
+  B16_DEVICE_FUNC explicit operator unsigned long long() const {
+    return static_cast<unsigned long long>(float(*this));
+  }
+
+  B16_DEVICE_FUNC explicit operator long long() const {
+    return static_cast<long long>(float(*this));
+  }
+
+  B16_DEVICE_FUNC explicit operator double() const {
+    return static_cast<double>(float(*this));
+  }
+
+  B16_DEVICE_FUNC explicit operator complex64() const {
+    return complex64(float(*this), float(0.0));
+  }
+
+  B16_DEVICE_FUNC explicit operator complex128() const {
+    return complex128(double(*this), double(0.0));
+  }
+
+  static bfloat16 epsilon() {
+    bfloat16 x;
+    x.value = 0x3c00;  // 0x1.0p-7
+    return x;
+  }
+
+  uint16_t value;
+
+  // A value that represents "not a number".
+  static const uint16_t NAN_VALUE = 0x7FC0;
+
+ private:
+  B16_DEVICE_FUNC bool float_isnan(const float& x) {
+#ifdef __CUDA_ARCH__
+    return ::isnan(x);
+#else
+    return std::isnan(x);
+#endif
+  }
+};
+
+B16_DEVICE_FUNC inline std::ostream& operator<<(std::ostream& os,
+                                                const bfloat16& dt) {
+  os << static_cast<float>(dt);
+  return os;
+}
+
+B16_DEVICE_FUNC inline bfloat16 operator+(bfloat16 a, bfloat16 b) {
+  return bfloat16(static_cast<float>(a) + static_cast<float>(b));
+}
+B16_DEVICE_FUNC inline bfloat16 operator+(bfloat16 a, int b) {
+  return bfloat16(static_cast<float>(a) + static_cast<float>(b));
+}
+B16_DEVICE_FUNC inline bfloat16 operator+(int a, bfloat16 b) {
+  return bfloat16(static_cast<float>(a) + static_cast<float>(b));
+}
+B16_DEVICE_FUNC inline bfloat16 operator-(bfloat16 a, bfloat16 b) {
+  return bfloat16(static_cast<float>(a) - static_cast<float>(b));
+}
+B16_DEVICE_FUNC inline bfloat16 operator*(bfloat16 a, bfloat16 b) {
+  return bfloat16(static_cast<float>(a) * static_cast<float>(b));
+}
+B16_DEVICE_FUNC inline bfloat16 operator/(bfloat16 a, bfloat16 b) {
+  return bfloat16(static_cast<float>(a) / static_cast<float>(b));
+}
+B16_DEVICE_FUNC inline bfloat16 operator-(bfloat16 a) {
+  a.value ^= 0x8000;
+  return a;
+}
+B16_DEVICE_FUNC inline bool operator<(bfloat16 a, bfloat16 b) {
+  return static_cast<float>(a) < static_cast<float>(b);
+}
+B16_DEVICE_FUNC inline bool operator<=(bfloat16 a, bfloat16 b) {
+  return static_cast<float>(a) <= static_cast<float>(b);
+}
+B16_DEVICE_FUNC inline bool operator==(bfloat16 a, bfloat16 b) {
+  return static_cast<float>(a) == static_cast<float>(b);
+}
+B16_DEVICE_FUNC inline bool operator!=(bfloat16 a, bfloat16 b) {
+  return static_cast<float>(a) != static_cast<float>(b);
+}
+B16_DEVICE_FUNC inline bool operator>(bfloat16 a, bfloat16 b) {
+  return static_cast<float>(a) > static_cast<float>(b);
+}
+B16_DEVICE_FUNC inline bool operator>=(bfloat16 a, bfloat16 b) {
+  return static_cast<float>(a) >= static_cast<float>(b);
+}
+B16_DEVICE_FUNC inline bfloat16& operator+=(bfloat16& a, bfloat16 b) {
+  a = a + b;
+  return a;
+}
+B16_DEVICE_FUNC inline bfloat16& operator-=(bfloat16& a, bfloat16 b) {
+  a = a - b;
+  return a;
+}
+B16_DEVICE_FUNC inline bfloat16 operator++(bfloat16& a) {
+  a += bfloat16(1);
+  return a;
+}
+B16_DEVICE_FUNC inline bfloat16 operator--(bfloat16& a) {
+  a -= bfloat16(1);
+  return a;
+}
+B16_DEVICE_FUNC inline bfloat16 operator++(bfloat16& a, int) {
+  bfloat16 original_value = a;
+  ++a;
+  return original_value;
+}
+B16_DEVICE_FUNC inline bfloat16 operator--(bfloat16& a, int) {
+  bfloat16 original_value = a;
+  --a;
+  return original_value;
+}
+B16_DEVICE_FUNC inline bfloat16& operator*=(bfloat16& a, bfloat16 b) {
+  a = a * b;
+  return a;
+}
+B16_DEVICE_FUNC inline bfloat16& operator/=(bfloat16& a, bfloat16 b) {
+  a = a / b;
+  return a;
+}
+}  // end namespace tensorflow
+
+namespace std {
+template <>
+struct hash<tensorflow::bfloat16> {
+  size_t operator()(const tensorflow::bfloat16& v) const {
+    return hash<float>()(static_cast<float>(v));
+  }
+};
+}  // namespace std
+
+#endif  // TENSORFLOW_CORE_LIB_BFLOAT16_BFLOAT16_H_
diff --git a/tensorflow/core/lib/core/arena.cc b/tensorflow/core/lib/core/arena.cc
index 2a04f7bd39df98a97ec7ed0f82dfdfbd8222a2da..55e481d0e60a004f2baebdcac444dd7e7cf93e66 100644
--- a/tensorflow/core/lib/core/arena.cc
+++ b/tensorflow/core/lib/core/arena.cc
@@ -28,6 +28,7 @@ limitations under the License.
 #include <algorithm>
 #include <vector>
 
+#include "tensorflow/core/lib/math/math_util.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/mem.h"
@@ -113,24 +114,11 @@ void Arena::MakeNewBlock(const uint32 alignment) {
   CHECK(SatisfyAlignment(alignment));
 }
 
-// The following simple numeric routines also exist in util/math/mathutil.h
-// but we don't want to depend on that library.
-
-// Euclid's algorithm for Greatest Common Denominator.
-static uint32 GCD(uint32 x, uint32 y) {
-  while (y != 0) {
-    uint32 r = x % y;
-    x = y;
-    y = r;
-  }
-  return x;
-}
-
 static uint32 LeastCommonMultiple(uint32 a, uint32 b) {
   if (a > b) {
-    return (a / GCD(a, b)) * b;
+    return (a / MathUtil::GCD<uint32>(a, b)) * b;
   } else if (a < b) {
-    return (b / GCD(b, a)) * a;
+    return (b / MathUtil::GCD<uint32>(b, a)) * a;
   } else {
     return a;
   }
diff --git a/tensorflow/core/lib/core/status.h b/tensorflow/core/lib/core/status.h
index 3b8a322854f562c0b066e6175e23697ca6445633..58a50a70c26a63a9edd55349e2253a9ace16f1f2 100644
--- a/tensorflow/core/lib/core/status.h
+++ b/tensorflow/core/lib/core/status.h
@@ -127,9 +127,9 @@ inline tensorflow::string* TfCheckOpHelper(::tensorflow::Status v,
   return TfCheckOpHelperOutOfLine(v, msg);
 }
 
-#define TF_DO_CHECK_OK(val, level)                  \
-  while (auto _result = TfCheckOpHelper(val, #val)) \
-    LOG(level) << *(_result)
+#define TF_DO_CHECK_OK(val, level)                                \
+  while (auto _result = ::tensorflow::TfCheckOpHelper(val, #val)) \
+  LOG(level) << *(_result)
 
 #define TF_CHECK_OK(val)  TF_DO_CHECK_OK(val, FATAL)
 #define TF_QCHECK_OK(val) TF_DO_CHECK_OK(val, QFATAL)
diff --git a/tensorflow/core/lib/core/stringpiece.h b/tensorflow/core/lib/core/stringpiece.h
index 89a1e26b812bf559c5e5413a58da7f5ed8947c7f..caa9642774bebec05a28b7a0c2ea71d18d6ebd1a 100644
--- a/tensorflow/core/lib/core/stringpiece.h
+++ b/tensorflow/core/lib/core/stringpiece.h
@@ -42,7 +42,7 @@ class StringPiece {
   typedef size_t size_type;
 
   // Create an empty slice.
-  StringPiece() : data_(""), size_(0) {}
+  StringPiece() : data_(nullptr), size_(0) {}
 
   // Create a slice that refers to d[0,n-1].
   StringPiece(const char* d, size_t n) : data_(d), size_(n) {}
diff --git a/tensorflow/core/lib/db/BUILD b/tensorflow/core/lib/db/BUILD
index 41b7af1b6993d967370e54f080fcd63a4483d4b6..9ff87e8d66d2575966c703a896ac9ff0bc51661a 100644
--- a/tensorflow/core/lib/db/BUILD
+++ b/tensorflow/core/lib/db/BUILD
@@ -5,21 +5,35 @@ package(default_visibility = ["//tensorflow:internal"])
 
 licenses(["notice"])  # Apache 2.0
 
-load("//tensorflow:tensorflow.bzl", "tf_cc_test")
+load("//tensorflow:tensorflow.bzl", "tf_cc_test", "tf_copts")
 
 cc_library(
     name = "sqlite",
     srcs = ["sqlite.cc"],
     hdrs = ["sqlite.h"],
+    copts = tf_copts(),
     deps = [
-        "//tensorflow/compiler/xla:statusor",
+        ":snapfn",
         "//tensorflow/core:lib",
-        "@sqlite_archive//:sqlite",
+        "//tensorflow/core:lib_internal",
+        "@org_sqlite",
+    ],
+)
+
+cc_library(
+    name = "snapfn",
+    srcs = ["snapfn.cc"],
+    copts = tf_copts() + ["-DSQLITE_OMIT_LOAD_EXTENSION"],
+    linkstatic = 1,
+    deps = [
+        "@org_sqlite",
+        "@snappy",
     ],
 )
 
 tf_cc_test(
     name = "sqlite_test",
+    size = "small",
     srcs = ["sqlite_test.cc"],
     deps = [
         ":sqlite",
diff --git a/tensorflow/core/lib/db/snapfn.cc b/tensorflow/core/lib/db/snapfn.cc
new file mode 100644
index 0000000000000000000000000000000000000000..4a659f41ed99ff50ebd0d0498f70a57dd715f49e
--- /dev/null
+++ b/tensorflow/core/lib/db/snapfn.cc
@@ -0,0 +1,253 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+/// \brief SQLite extension for Snappy compression
+///
+/// Snappy a compression library that trades ratio for speed, almost going a
+/// tenth as fast as memcpy().
+///
+/// FUNCTIONS
+///
+/// - snap(value: BLOB|TEXT) -> BLOB
+/// - snap(value: NULL|INT|REAL) -> value
+///
+///   Applies Snappy compression. If value is TEXT or BLOB, then it is
+///   compressed and a BLOB is returned with a byte prepended to indicate the
+///   original type. Other types are returned as-is.
+///
+/// - unsnap(value: BLOB) -> TEXT|BLOB
+/// - unsnap(value: TEXT) -> SQLITE_MISMATCH
+/// - unsnap(value: NULL|INT|REAL) -> value
+///
+///   Decompresses value created by snap(). If value is empty, then an empty
+///   blob is returned. Otherwise the original type is restored from the first
+///   byte and the remaining ones are decompressed. TEXT is not allowed as an
+///   input type. Remaining types are returned as-is.
+///
+/// PERFORMANCE CONSIDERATIONS
+///
+/// These functions are deterministic. This means SQLite ≥3.8.3 will factor
+/// them out of inner loops when constant arguments are provided. In SQLite
+/// ≥3.15.0 they can be used in the WHERE clause of partial indexes. Currently
+/// there is no support for common sub-expression elimination.
+///
+/// SQLite environments that aren't universally UTF8 will work, but should
+/// encounter superfluous charset transcodings; as this implementation encodes
+/// only UTF8 TEXT for the sake of simplicity. Contributions are welcome that
+/// register multiple sister functions for the various charsets, which use the
+/// higher order bits of the type byte to indicate encoding.
+///
+/// SUPPORT MATRIX
+///
+/// - 3.20.0 (2016-05-18) What FOSS TensorFlow uses
+/// - 3.13.0 (2016-05-18) What Google uses c. 2017-12
+/// - 3.8.2  (2013-12-06) Used by Ubuntu 14.04
+///
+/// MANUAL COMPILATION
+///
+/// $ sudo apt-get install libsqlite3-dev libsnappy-dev
+/// $ c++ -shared --std=c++11 -o libsnapfn.so -fPIC snapfn.cc -lsnappy
+///
+/// $ sqlite3
+/// sqlite> .load libsnapfn.so
+/// sqlite> select hex(snap('aaaaaaaaaaaaaaaaa'));
+/// 031100613E0100
+/// sqlite> select unsnap(x'031100613E0100');
+/// aaaaaaaaaaaaaaaaa
+///
+/// $ python
+/// >>> import sqlite3
+/// >>> db = sqlite3.connect(':memory:')
+/// >>> db.enable_load_extension(True)
+/// >>> db.execute('select load_extension("libsnapfn.so")')
+/// >>> db.enable_load_extension(False)
+/// >>> db.execute('select hex(snap("aaaaaaaaaaaaaaaaa"))').fetchone()[0]
+/// u'031100613E0100'
+
+#include "sqlite3ext.h"
+#include "snappy.h"
+
+SQLITE_EXTENSION_INIT1
+
+static void snap(sqlite3_context* ctx, int /*argc*/, sqlite3_value** argv) {
+  const char* data;
+  int type = sqlite3_value_type(argv[0]);
+  switch (type) {
+    case SQLITE_NULL:
+      return;
+    case SQLITE_INTEGER:
+      sqlite3_result_int64(ctx, sqlite3_value_int64(argv[0]));
+      return;
+    case SQLITE_FLOAT:
+      sqlite3_result_double(ctx, sqlite3_value_double(argv[0]));
+      return;
+    case SQLITE_BLOB:
+      data = reinterpret_cast<const char*>(sqlite3_value_blob(argv[0]));
+      break;
+    case SQLITE_TEXT:
+      data = reinterpret_cast<const char*>(sqlite3_value_text(argv[0]));
+      break;
+    default:
+      sqlite3_result_error(ctx, "snap() invalid type", -1);
+      sqlite3_result_error_code(ctx, SQLITE_MISMATCH);
+      return;
+  }
+  int size = sqlite3_value_bytes(argv[0]);
+  if (size <= 0) {
+    char result[] = {static_cast<char>(type)};
+    sqlite3_result_blob(ctx, result, sizeof(result), SQLITE_TRANSIENT);
+    return;
+  }
+  size_t output_size =
+      snappy::MaxCompressedLength(static_cast<size_t>(size)) + 1;
+  if (output_size >
+      static_cast<size_t>(sqlite3_limit(sqlite3_context_db_handle(ctx),
+                                        SQLITE_LIMIT_LENGTH, -1))) {
+    sqlite3_result_error_toobig(ctx);
+    return;
+  }
+  auto output =
+      static_cast<char*>(sqlite3_malloc(static_cast<int>(output_size)));
+  if (output == nullptr) {
+    sqlite3_result_error_nomem(ctx);
+    return;
+  }
+  *output++ = static_cast<char>(type), --output_size;
+  snappy::RawCompress(data, static_cast<size_t>(size), output, &output_size);
+  sqlite3_result_blob(ctx, output - 1, static_cast<int>(output_size + 1),
+                      sqlite3_free);
+}
+
+static void unsnap(sqlite3_context* ctx, int /*argc*/, sqlite3_value** argv) {
+  int type = sqlite3_value_type(argv[0]);
+  switch (type) {
+    case SQLITE_NULL:
+      return;
+    case SQLITE_INTEGER:
+      sqlite3_result_int64(ctx, sqlite3_value_int64(argv[0]));
+      return;
+    case SQLITE_FLOAT:
+      sqlite3_result_double(ctx, sqlite3_value_double(argv[0]));
+      return;
+    case SQLITE_BLOB:
+      break;
+    default:
+      sqlite3_result_error(ctx, "unsnap() invalid type", -1);
+      sqlite3_result_error_code(ctx, SQLITE_MISMATCH);
+      return;
+  }
+  int size = sqlite3_value_bytes(argv[0]);
+  auto blob = reinterpret_cast<const char*>(sqlite3_value_blob(argv[0]));
+  if (size <= 0) {
+    sqlite3_result_zeroblob(ctx, 0);
+    return;
+  }
+  type = static_cast<int>(*blob++), --size;
+  if (type != SQLITE_BLOB && type != SQLITE_TEXT) {
+    sqlite3_result_error(ctx, "unsnap() first byte is invalid type", -1);
+    sqlite3_result_error_code(ctx, SQLITE_CORRUPT);
+    return;
+  }
+  if (size == 0) {
+    if (type == SQLITE_TEXT) {
+      sqlite3_result_text(ctx, "", 0, SQLITE_STATIC);
+    } else {
+      sqlite3_result_zeroblob(ctx, 0);
+    }
+    return;
+  }
+  size_t output_size;
+  if (!snappy::GetUncompressedLength(blob, static_cast<size_t>(size),
+                                     &output_size)) {
+    sqlite3_result_error(ctx, "snappy parse error", -1);
+    sqlite3_result_error_code(ctx, SQLITE_CORRUPT);
+    return;
+  }
+  if (output_size >
+      static_cast<size_t>(sqlite3_limit(sqlite3_context_db_handle(ctx),
+                                        SQLITE_LIMIT_LENGTH, -1))) {
+    sqlite3_result_error_toobig(ctx);
+    return;
+  }
+  auto output =
+      static_cast<char*>(sqlite3_malloc(static_cast<int>(output_size)));
+  if (output == nullptr) {
+    sqlite3_result_error_nomem(ctx);
+    return;
+  }
+  if (!snappy::RawUncompress(blob, static_cast<size_t>(size), output)) {
+    sqlite3_result_error(ctx, "snappy message corruption", -1);
+    sqlite3_result_error_code(ctx, SQLITE_CORRUPT);
+    sqlite3_free(output);
+    return;
+  }
+  if (type == SQLITE_TEXT) {
+    sqlite3_result_text(ctx, output, static_cast<int>(output_size),
+                        sqlite3_free);
+  } else {
+    sqlite3_result_blob(ctx, output, static_cast<int>(output_size),
+                        sqlite3_free);
+  }
+}
+
+extern "C" {
+
+#ifndef SQLITE_DETERMINISTIC
+#define SQLITE_DETERMINISTIC 0
+#endif
+
+#ifndef SQLITE_CALLBACK
+#define SQLITE_CALLBACK
+#endif
+
+SQLITE_CALLBACK int sqlite3_snapfn_init(sqlite3* db, const char** /*pzErrMsg*/,
+                                        const sqlite3_api_routines* pApi) {
+  SQLITE_EXTENSION_INIT2(pApi);
+  int rc;
+
+  rc = sqlite3_create_function_v2(
+      db,
+      "snap",                              // zFunctionName
+      1,                                   // nArg
+      SQLITE_UTF8 | SQLITE_DETERMINISTIC,  // eTextRep
+      nullptr,                             // pApp
+      snap,                                // xFunc
+      nullptr,                             // xStep
+      nullptr,                             // xFinal
+      nullptr                              // xDestroy
+  );
+  if (rc != SQLITE_OK) {
+    return rc;
+  }
+
+  rc = sqlite3_create_function_v2(
+      db,
+      "unsnap",                            // zFunctionName
+      1,                                   // nArg
+      SQLITE_UTF8 | SQLITE_DETERMINISTIC,  // eTextRep
+      nullptr,                             // pApp
+      unsnap,                              // xFunc
+      nullptr,                             // xStep
+      nullptr,                             // xFinal
+      nullptr                              // xDestroy
+  );
+  if (rc != SQLITE_OK) {
+    return rc;
+  }
+
+  return SQLITE_OK;
+}
+
+}  // extern "C"
diff --git a/tensorflow/core/lib/db/sqlite.cc b/tensorflow/core/lib/db/sqlite.cc
index 701655f622a7ec0288f1cb53818877e65839643e..cb6943379d4ebe38c79ba9097d4c3183c7b8c205 100644
--- a/tensorflow/core/lib/db/sqlite.cc
+++ b/tensorflow/core/lib/db/sqlite.cc
@@ -14,176 +14,268 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/lib/db/sqlite.h"
 
-#include "tensorflow/core/lib/io/record_reader.h"
-#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
 
-namespace tensorflow {
+extern "C" int sqlite3_snapfn_init(sqlite3*, const char**, const void*);
 
-/* static */
-xla::StatusOr<std::shared_ptr<Sqlite>> Sqlite::Open(const string& uri) {
-  sqlite3* sqlite = nullptr;
-  Status s = MakeStatus(sqlite3_open(uri.c_str(), &sqlite));
-  if (s.ok()) {
-    return std::shared_ptr<Sqlite>(new Sqlite(sqlite));
-  }
-  return s;
-}
+namespace tensorflow {
+namespace {
 
-/* static */ Status Sqlite::MakeStatus(int resultCode) {
+error::Code GetTfErrorCode(int code) {
   // See: https://sqlite.org/rescode.html
-  switch (resultCode & 0xff) {
-    case SQLITE_OK:
-    case SQLITE_ROW:   // sqlite3_step() has another row ready
-    case SQLITE_DONE:  // sqlite3_step() has finished executing
-      return Status::OK();
+  switch (code & 0xff) {
+    case SQLITE_OK:    // Successful result
+    case SQLITE_ROW:   // Step has another row ready
+    case SQLITE_DONE:  // Step has finished executing
+      return error::OK;
     case SQLITE_ABORT:  // Callback routine requested an abort
-      return errors::Aborted(sqlite3_errstr(resultCode));
+      return error::ABORTED;
     case SQLITE_READONLY:  // Attempt to write a readonly database
     case SQLITE_MISMATCH:  // Data type mismatch
-      return errors::FailedPrecondition(sqlite3_errstr(resultCode));
+      return error::FAILED_PRECONDITION;
     case SQLITE_MISUSE:    // Library used incorrectly
     case SQLITE_INTERNAL:  // Internal logic error in SQLite
-      return errors::Internal(sqlite3_errstr(resultCode));
+      return error::INTERNAL;
     case SQLITE_RANGE:  // 2nd parameter to sqlite3_bind out of range
-      return errors::OutOfRange(sqlite3_errstr(resultCode));
+      return error::OUT_OF_RANGE;
     case SQLITE_CANTOPEN:    // Unable to open the database file
     case SQLITE_CONSTRAINT:  // Abort due to constraint violation
     case SQLITE_NOTFOUND:    // Unknown opcode or statement parameter name
     case SQLITE_NOTADB:      // File opened that is not a database file
-      return errors::InvalidArgument(sqlite3_errstr(resultCode));
+      return error::INVALID_ARGUMENT;
     case SQLITE_CORRUPT:  // The database disk image is malformed
-      return errors::DataLoss(sqlite3_errstr(resultCode));
+      return error::DATA_LOSS;
     case SQLITE_AUTH:  // Authorization denied
     case SQLITE_PERM:  // Access permission denied
-      return errors::PermissionDenied(sqlite3_errstr(resultCode));
+      return error::PERMISSION_DENIED;
     case SQLITE_FULL:    // Insertion failed because database is full
     case SQLITE_TOOBIG:  // String or BLOB exceeds size limit
     case SQLITE_NOLFS:   // Uses OS features not supported on host
-      return errors::ResourceExhausted(sqlite3_errstr(resultCode));
+      return error::RESOURCE_EXHAUSTED;
     case SQLITE_BUSY:      // The database file is locked
     case SQLITE_LOCKED:    // A table in the database is locked
     case SQLITE_PROTOCOL:  // Database lock protocol error
-    case SQLITE_NOMEM:     // A malloc() failed
-      return errors::Unavailable(sqlite3_errstr(resultCode));
+    case SQLITE_NOMEM:     // Out of heap or perhaps lookaside memory
+      return error::UNAVAILABLE;
     case SQLITE_INTERRUPT:  // Operation terminated by sqlite3_interrupt
-      return errors::Cancelled(sqlite3_errstr(resultCode));
+      return error::CANCELLED;
     case SQLITE_ERROR:   // SQL error or missing database
     case SQLITE_IOERR:   // Some kind of disk I/O error occurred
     case SQLITE_SCHEMA:  // The database schema changed
     default:
-      return errors::Unknown(sqlite3_errstr(resultCode));
+      return error::UNKNOWN;
   }
 }
 
-Sqlite::Sqlite(sqlite3* db) : db_(db) {}
-
-Sqlite::~Sqlite() {
-  // close_v2 doesn't care if a stmt hasn't been GC'd yet
-  int rc = sqlite3_close_v2(db_);
-  if (rc != SQLITE_OK) {
-    LOG(ERROR) << "destruct sqlite3: " << MakeStatus(rc);
-  }
-}
-
-Status Sqlite::Close() {
-  if (db_ == nullptr) {
-    return Status::OK();
-  }
-  // If Close is explicitly called, ordering must be correct.
-  Status s = MakeStatus(sqlite3_close(db_));
-  if (s.ok()) {
-    db_ = nullptr;
-  }
-  return s;
+template <typename... Args>
+Status PrintfStatus(int rc, const char* fmt, Args&&... args) {
+  return {GetTfErrorCode(rc),
+          strings::Printf(fmt, std::forward<Args>(args)...)};
 }
 
-SqliteStatement Sqlite::Prepare(const string& sql) {
+sqlite3_stmt* PrepareRawOrDie(sqlite3* db, const char* sql) {
   sqlite3_stmt* stmt = nullptr;
-  int rc = sqlite3_prepare_v2(db_, sql.c_str(), sql.size() + 1, &stmt, nullptr);
-  if (rc == SQLITE_OK) {
-    return {stmt, SQLITE_OK, std::unique_ptr<string>(nullptr)};
-  } else {
-    return {nullptr, rc, std::unique_ptr<string>(new string(sql))};
-  }
+  int rc = sqlite3_prepare_v2(db, sql, -1, &stmt, nullptr);
+  CHECK_EQ(SQLITE_OK, rc) << sql;
+  return stmt;
 }
 
-Status SqliteStatement::status() const {
-  Status s = Sqlite::MakeStatus(error_);
-  if (!s.ok()) {
-    if (stmt_ != nullptr) {
-      errors::AppendToMessage(&s, sqlite3_sql(stmt_));
-    } else {
-      errors::AppendToMessage(&s, *prepare_error_sql_);
+Status SetPragma(Sqlite* db, const char* pragma, const StringPiece& value) {
+  if (value.empty()) return Status::OK();
+  for (auto p = value.begin(); p < value.end(); ++p) {
+    if (!(('0' <= *p && *p <= '9') || ('A' <= *p && *p <= 'Z') ||
+          ('a' <= *p && *p <= 'z') || *p == '-')) {
+      return errors::InvalidArgument("Illegal pragma character");
     }
   }
-  return s;
+  SqliteStatement stmt;
+  TF_RETURN_IF_ERROR(  // We can't use Bind*() pragma statements.
+      db->Prepare(strings::StrCat("PRAGMA ", pragma, "=", value), &stmt));
+  bool unused_done;
+  return stmt.Step(&unused_done);
 }
 
-void SqliteStatement::CloseOrLog() {
-  if (stmt_ != nullptr) {
-    int rc = sqlite3_finalize(stmt_);
-    if (rc != SQLITE_OK) {
-      LOG(ERROR) << "destruct sqlite3_stmt: " << Sqlite::MakeStatus(rc);
-    }
-    stmt_ = nullptr;
-  }
+const StringPiece GetEnv(const char* var) {
+  const char* val = std::getenv(var);
+  return (val == nullptr) ? StringPiece() : StringPiece(val);
+}
+
+Status EnvPragma(Sqlite* db, const char* pragma, const char* var) {
+  TF_RETURN_WITH_CONTEXT_IF_ERROR(SetPragma(db, pragma, GetEnv(var)), "getenv(",
+                                  var, ")");
+  return Status::OK();
 }
 
-Status SqliteStatement::Close() {
-  if (stmt_ == nullptr) {
-    return Status::OK();
+}  // namespace
+
+/* static */
+Status Sqlite::Open(const string& path, int flags, Sqlite** db) {
+  flags |= SQLITE_OPEN_PRIVATECACHE;
+  sqlite3* sqlite = nullptr;
+  int rc = sqlite3_open_v2(path.c_str(), &sqlite, flags, nullptr);
+  if (rc != SQLITE_OK) {
+    *db = nullptr;
+    return PrintfStatus(rc, "Sqlite::Open(%s) failed: %s", path.c_str(),
+                        sqlite3_errstr(rc));
   }
-  int rc = sqlite3_finalize(stmt_);
-  if (rc == SQLITE_OK) {
-    stmt_ = nullptr;
+  CHECK_EQ(SQLITE_OK, sqlite3_extended_result_codes(sqlite, 1));
+  CHECK_EQ(SQLITE_OK, sqlite3_snapfn_init(sqlite, nullptr, nullptr));
+  // Prepare these tiny privileged statements for SqliteTransaction
+  // so it can do less work, particularly in its constructor, per
+  // Google C++ Style.
+  sqlite3_stmt* begin = PrepareRawOrDie(sqlite, "BEGIN");
+  sqlite3_stmt* commit = PrepareRawOrDie(sqlite, "COMMIT");
+  sqlite3_stmt* rollback = PrepareRawOrDie(sqlite, "ROLLBACK");
+  *db = new Sqlite(sqlite, begin, commit, rollback);
+  Status s = Status::OK();
+  // Up until 2016 the default SQLite page_size was 1024. This ensures
+  // the new default regardless of linkage unless configured otherwise.
+  s.Update(SetPragma(*db, "page_size", "4096"));
+  // TensorFlow is designed to work well in all SQLite modes. However
+  // users might find tuning some these pragmas rewarding, depending on
+  // various considerations. Pragmas are set on a best-effort basis and
+  // might be ignored.
+  s.Update(EnvPragma(*db, "secure_delete", "TF_SQLITE_SECURE_DELETE"));
+  s.Update(EnvPragma(*db, "page_size", "TF_SQLITE_PAGE_SIZE"));
+  s.Update(EnvPragma(*db, "journal_mode", "TF_SQLITE_JOURNAL_MODE"));
+  s.Update(EnvPragma(*db, "synchronous", "TF_SQLITE_SYNCHRONOUS"));
+  s.Update(EnvPragma(*db, "mmap_size", "TF_SQLITE_MMAP_SIZE"));
+  s.Update(EnvPragma(*db, "locking_mode", "TF_SQLITE_LOCKING_MODE"));
+  s.Update(EnvPragma(*db, "cache_size", "TF_SQLITE_CACHE_SIZE"));
+  s.Update(EnvPragma(*db, "auto_vacuum", "TF_SQLITE_AUTO_VACUUM"));
+  DCHECK((*db)->RefCountIsOne());
+  if (!s.ok()) {
+    (*db)->Unref();
+    *db = nullptr;
   }
-  Update(rc);
-  return status();
+  return s;
 }
 
-void SqliteStatement::Reset() {
-  if (TF_PREDICT_TRUE(stmt_ != nullptr)) {
-    sqlite3_reset(stmt_);
-    sqlite3_clear_bindings(stmt_);  // not nullptr friendly
+Sqlite::~Sqlite() {
+  sqlite3_finalize(rollback_);
+  sqlite3_finalize(commit_);
+  sqlite3_finalize(begin_);
+  CHECK_EQ(SQLITE_OK, sqlite3_close(db_));
+}
+
+Status Sqlite::Prepare(const StringPiece& sql, SqliteStatement* stmt) {
+  SqliteLock lock(*this);
+  sqlite3_stmt* ps = nullptr;
+  int rc = sqlite3_prepare_v2(db_, sql.data(), static_cast<int>(sql.size()),
+                              &ps, nullptr);
+  if (rc != SQLITE_OK) {
+    *stmt = SqliteStatement();
+    return PrintfStatus(rc, "Prepare() failed: [%d] %s: %.*s", rc, errmsg(),
+                        sql.size(), sql.data());
   }
-  error_ = SQLITE_OK;
+  *stmt = SqliteStatement(this, ps);
+  return Status::OK();
 }
 
-Status SqliteStatement::Step(bool* isDone) {
-  if (TF_PREDICT_FALSE(error_ != SQLITE_OK)) {
-    *isDone = true;
-    return status();
+Status SqliteStatement::Step(bool* is_done) {
+  DCHECK(stmt_ != nullptr);
+  if (TF_PREDICT_FALSE(bind_error_ != SQLITE_OK)) {
+    *is_done = true;
+    return PrintfStatus(bind_error_, "Bind(%d) failed: %s: %s",
+                        bind_error_parameter_, sqlite3_errstr(bind_error_),
+                        sql());
   }
+  SqliteLock lock(*db_);
   int rc = sqlite3_step(stmt_);
   switch (rc) {
     case SQLITE_ROW:
-      *isDone = false;
+      *is_done = false;
       return Status::OK();
     case SQLITE_DONE:
-      *isDone = true;
+      *is_done = true;
       return Status::OK();
     default:
-      *isDone = true;
-      error_ = rc;
-      return status();
+      *is_done = true;
+      return PrintfStatus(rc, "Step() failed: [%d] %s: %s", rc, db_->errmsg(),
+                          sql());
   }
 }
 
-Status SqliteStatement::StepAndReset() {
-  if (TF_PREDICT_FALSE(error_ != SQLITE_OK)) {
-    return status();
+bool SqliteStatement::StepOrDie() {
+  bool is_done;
+  TF_CHECK_OK(Step(&is_done));
+  return !is_done;
+}
+
+Status SqliteStatement::StepOnce() {
+  bool is_done;
+  TF_RETURN_IF_ERROR(Step(&is_done));
+  if (TF_PREDICT_FALSE(is_done)) {
+    return errors::Internal("No rows returned: ", sql());
   }
-  Status s;
-  int rc = sqlite3_step(stmt_);
-  if (rc != SQLITE_DONE) {
-    if (rc == SQLITE_ROW) {
-      s.Update(errors::Internal("unexpected sqlite row"));
-    } else {
-      s.Update(Sqlite::MakeStatus(rc));
-    }
+  return Status::OK();
+}
+
+const SqliteStatement& SqliteStatement::StepOnceOrDie() {
+  TF_CHECK_OK(StepOnce());
+  return *this;
+}
+
+Status SqliteStatement::StepAndReset() {
+  bool is_done;
+  Status s = Step(&is_done);
+  if (TF_PREDICT_FALSE(s.ok() && !is_done)) {
+    s = errors::Internal("Unexpected row: ", sql());
   }
   Reset();
   return s;
 }
 
+void SqliteStatement::StepAndResetOrDie() { TF_CHECK_OK(StepAndReset()); }
+
+void SqliteStatement::Reset() {
+  if (TF_PREDICT_TRUE(stmt_ != nullptr)) {
+    sqlite3_reset(stmt_);
+    sqlite3_clear_bindings(stmt_);
+  }
+  bind_error_ = SQLITE_OK;
+  size_ = 0;
+}
+
+SqliteTransaction::SqliteTransaction(Sqlite& db) : db_(&db) {
+  sqlite3_mutex_enter(sqlite3_db_mutex(db_->db_));
+  CHECK(!db_->is_in_transaction_);
+  db_->is_in_transaction_ = true;
+  Begin();
+}
+
+SqliteTransaction::~SqliteTransaction() {
+  // Rollback should only return an error if there's no transaction.
+  // Since the API performs auto-rollbacks in some cases, we ignore.
+  sqlite3_step(db_->rollback_);
+  sqlite3_reset(db_->rollback_);
+  sqlite3_reset(db_->begin_);
+  db_->is_in_transaction_ = false;
+  sqlite3_mutex_leave(sqlite3_db_mutex(db_->db_));
+}
+
+void SqliteTransaction::Begin() {
+  // This shouldn't allocate memory or perform I/O. All it does is
+  // execute OP_AutoCommit(0, 0) a.k.a. BEGIN DEFERRED which flips
+  // the sqlite3::autoCommit bit.
+  if (sqlite3_step(db_->begin_) != SQLITE_DONE) {
+    // It shouldn't be possible for this to fail since we already
+    // performed the reentrancy check.
+    LOG(FATAL) << "BEGIN failed: " << sqlite3_errmsg(db_->db_);
+  }
+}
+
+Status SqliteTransaction::Commit() {
+  int rc = sqlite3_step(db_->commit_);
+  if (rc != SQLITE_DONE) {
+    return PrintfStatus(rc, "COMMIT failed: [%d] %s", rc,
+                        sqlite3_errmsg(db_->db_));
+  }
+  sqlite3_reset(db_->commit_);
+  sqlite3_reset(db_->begin_);
+  Begin();
+  return Status::OK();
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/lib/db/sqlite.h b/tensorflow/core/lib/db/sqlite.h
index 774852efea7b494406c89960654b1acdca1f4ac9..0faa458f1d692a103099d5b05d0400944ffdaad7 100644
--- a/tensorflow/core/lib/db/sqlite.h
+++ b/tensorflow/core/lib/db/sqlite.h
@@ -15,149 +15,211 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_LIB_DB_SQLITE_H_
 #define TENSORFLOW_CORE_LIB_DB_SQLITE_H_
 
-#include <stddef.h>
-#include <memory>
-#include <utility>
+#include <mutex>
 
 #include "sqlite3.h"
-#include "tensorflow/compiler/xla/statusor.h"
-#include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/thread_annotations.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/lib/core/refcount.h"
+
+/// TensorFlow SQLite Veneer
+///
+/// - Memory safety
+/// - Less boilerplate
+/// - Removes deprecated stuff
+/// - Pretends UTF16 doesn't exist
+/// - Transaction compile-time safety
+/// - Statically loads our native extensions
+/// - Error reporting via tensorflow::Status et al.
+///
+/// SQLite>=3.8.2 needs to be supported until April 2019, which is when
+/// Ubuntu 14.04 LTS becomes EOL.
 
 namespace tensorflow {
 
+class SqliteLock;
 class SqliteStatement;
+class SqliteTransaction;
 
 /// \brief SQLite connection object.
 ///
-/// This class is a thin wrapper around `sqlite3` that makes it easier
-/// and safer to use SQLite in the TensorFlow C++ codebase. It removes
-/// deprecated APIs, improves the safety of others, adds helpers, and
-/// pretends UTF16 doesn't exist.
+/// The SQLite connection is closed automatically by the destructor.
+/// Reference counting ensures that happens after its statements are
+/// destructed.
 ///
-/// Instances are thread safe, with the exception of Close().
-class Sqlite {
+/// Instances are reference counted and can be shared between threads.
+/// This class offers the same thread safety behaviors as the SQLite
+/// API itself.
+///
+/// This veneer uses auto-commit mode by default, which means a 4ms
+/// fsync() happens after every write unless a SqliteTransaction is
+/// used or WAL mode is enabled beforehand.
+class LOCKABLE Sqlite : public core::RefCounted {
  public:
+  /// \brief Closes SQLite connection, which can take milliseconds.
+  virtual ~Sqlite();
+
   /// \brief Opens SQLite database file.
   ///
-  /// The `uri` parameter can be a filename, or a proper URI like
-  /// `file:/tmp/tf.sqlite?mode=ro&cache=private`. It can also be
-  /// `file::memory:` for testing.
+  /// Most users will want to set flags to SQLITE_OPEN_READWRITE |
+  /// SQLITE_OPEN_CREATE. There are many other open flags; here are
+  /// notes on a few of them:
   ///
-  /// See https://sqlite.org/c3ref/open.html
-  static xla::StatusOr<std::shared_ptr<Sqlite>> Open(const string& uri);
-
-  /// \brief Makes tensorflow::Status for SQLite result code.
+  /// - SQLITE_OPEN_READONLY: Allowed if no WAL journal is active.
+  /// - SQLITE_OPEN_SHAREDCACHE: Will be ignored because this veneer
+  ///   doesn't support the unlock notify API.
+  /// - SQLITE_OPEN_NOMUTEX: Means access to this connection MUST be
+  ///   serialized by the caller in accordance with the same contracts
+  ///   implemented by this API.
   ///
-  /// See https://sqlite.org/rescode.html
-  static Status MakeStatus(int resultCode);
+  /// This function sets PRAGMA values from TF_SQLITE_* environment
+  /// variables. See sqlite.cc to learn more.
+  static Status Open(const string& path, int flags, Sqlite** db);
 
-  /// \brief Destroys object and frees resources.
+  /// \brief Creates SQLite statement.
   ///
-  /// This will free the underlying object if Close was not called. If
-  /// an error code is returned then it will be logged.
+  /// This routine should never fail if sql is valid and does not
+  /// reference tables. When tables are referenced, system calls are
+  /// needed which can take microseconds. When the schema changes, this
+  /// routine will retry automatically and then possibly fail.
   ///
-  /// Note: Unlike Close() this destructor maps to sqlite3_close_v2(),
-  /// which is lax about ordering and GC friendly.
-  ~Sqlite();
+  /// The returned statement holds a reference to this object.
+  Status Prepare(const StringPiece& sql, SqliteStatement* stmt);
+  SqliteStatement PrepareOrDie(const StringPiece& sql);
 
-  /// \brief Frees underlying SQLite object.
+  /// \brief Returns extended result code of last error.
   ///
-  /// Unlike the destructor, all SqliteStatement objects must be closed
-  /// beforehand. This is a no-op if already closed
-  Status Close();
+  /// If the most recent API call was successful, the result is
+  /// undefined. The legacy result code can be obtained by saying
+  /// errcode() & 0xff.
+  int errcode() const EXCLUSIVE_LOCKS_REQUIRED(this) {
+    return sqlite3_extended_errcode(db_);
+  }
 
-  /// \brief Creates SQLite statement.
-  ///
-  /// Call result.status() to determine whether or not this operation
-  /// failed. It is also possible to punt the error checking to after
-  /// the values have been binded and Step() or ExecuteWriteQuery() is
-  /// called.
-  SqliteStatement Prepare(const string& sql);
+  /// \brief Returns pointer to current error message state.
+  const char* errmsg() const EXCLUSIVE_LOCKS_REQUIRED(this) {
+    return sqlite3_errmsg(db_);
+  }
+
+  /// \brief Returns rowid assigned to last successful insert.
+  int64 last_insert_rowid() const EXCLUSIVE_LOCKS_REQUIRED(this) {
+    return sqlite3_last_insert_rowid(db_);
+  }
+
+  /// \brief Returns number of rows directly changed by last write.
+  int64 changes() const EXCLUSIVE_LOCKS_REQUIRED(this) {
+    return sqlite3_changes(db_);
+  }
 
  private:
-  explicit Sqlite(sqlite3* db);
-  sqlite3* db_;
+  friend class SqliteLock;
+  friend class SqliteStatement;
+  friend class SqliteTransaction;
+
+  Sqlite(sqlite3* db, sqlite3_stmt* begin, sqlite3_stmt* commit,
+         sqlite3_stmt* rollback) noexcept
+      : db_(db),
+        begin_(begin),
+        commit_(commit),
+        rollback_(rollback) {}
+
+  sqlite3* const db_;
+  sqlite3_stmt* const begin_;
+  sqlite3_stmt* const commit_;
+  sqlite3_stmt* const rollback_;
+  bool is_in_transaction_ = false;
+
   TF_DISALLOW_COPY_AND_ASSIGN(Sqlite);
 };
 
-/// \brief SQLite prepared statement cursor object.
+/// \brief SQLite prepared statement.
 ///
-/// This class tracks error state internally, like Status::Update.
+/// Instances can only be shared between threads if caller serializes
+/// access from first Bind*() to *Reset().
 ///
-/// Instances of this class are not thread safe.
+/// When reusing a statement in a loop, be certain to not have jumps
+/// betwixt Bind*() and *Reset().
 class SqliteStatement {
  public:
-  /// \brief Constructs empty statement that should be assigned later.
-  SqliteStatement() : stmt_(nullptr), error_(SQLITE_OK) {}
+  /// \brief Initializes an empty statement to be assigned later.
+  SqliteStatement() noexcept = default;
 
-  /// \brief Empties object and finalizes statement if needed.
-  ~SqliteStatement() { CloseOrLog(); }
+  /// \brief Finalizes statement.
+  ///
+  /// This can take milliseconds if it was blocking the Sqlite
+  /// connection object from being freed.
+  ~SqliteStatement() {
+    sqlite3_finalize(stmt_);
+    if (db_ != nullptr) db_->Unref();
+  }
 
-  /// \brief Move constructor, after which <other> should not be used.
-  SqliteStatement(SqliteStatement&& other);
+  /// \brief Returns true if statement is initialized.
+  explicit operator bool() const { return stmt_ != nullptr; }
 
-  /// \brief Move assignment, after which <other> should not be used.
-  SqliteStatement& operator=(SqliteStatement&& other);
+  /// \brief Returns SQL text from when this query was prepared.
+  const char* sql() const { return sqlite3_sql(stmt_); }
 
-  /// \brief Returns true if statement is not empty.
-  operator bool() const { return stmt_ != nullptr; }
+  /// \brief Number of bytes bound since last *Reset().
+  uint64 size() { return size_; }
 
-  /// \brief Returns SQLite result code state.
+  /// \brief Executes query for fetching arbitrary rows.
   ///
-  /// This will be SQLITE_OK unless an error happened. If multiple
-  /// errors happened, only the first error code will be returned.
-  int error() const { return error_; }
-
-  /// \brief Returns error() as a tensorflow::Status.
-  Status status() const;
-
-  /// \brief Finalize statement object.
+  /// `is_done` will always be set to true unless SQLITE_ROW is
+  /// returned by the underlying API. If status() is already in an
+  /// error state, then this method is a no-op and the existing status
+  /// is returned.
   ///
-  /// Please note that the destructor can also do this. This method is
-  /// a no-op if already closed.
-  Status Close();
+  /// The OrDie version returns `!is_done` which, if true, indicates a
+  /// row is available.
+  ///
+  /// This statement should be Reset() or destructed when when finished
+  /// with the result.
+  Status Step(bool* is_done);
+  bool StepOrDie() TF_MUST_USE_RESULT;
 
-  /// \brief Executes query and/or fetches next row.
+  /// \brief Executes query when only one row is desired.
+  ///
+  /// If a row isn't returned, an internal error Status is returned
+  /// that won't be reflected in the connection error state.
   ///
-  /// `isDone` will always be set to true unless SQLITE_ROW is returned
-  /// by the underlying API. If status() is already in an error state,
-  /// then this method is a no-op and the existing status is returned.
-  Status Step(bool* isDone);
+  /// This statement should be Reset() or destructed when when finished
+  /// with the result.
+  Status StepOnce();
+  const SqliteStatement& StepOnceOrDie();
 
-  /// \brief Executes query that returns no data.
+  /// \brief Executes query, ensures zero rows returned, then Reset().
   ///
-  /// This helper calls Step(), ensures SQLITE_DONE was returned, then
-  /// resets the statement and clears the bindings. If status() is
-  /// already in an error state, then this method is a no-op and the
-  /// existing status is returned.
+  /// If a row is returned, an internal error Status is returned that
+  /// won't be reflected in the connection error state.
   Status StepAndReset();
+  void StepAndResetOrDie();
 
   /// \brief Resets statement so it can be executed again.
   ///
-  /// - Resets the prepared statement
-  /// - Sets all Bind*() values to NULL
-  ///
-  /// Support for calling sqlite3_reset() and sqlite3_clear_bindings()
-  /// independently may be added in the future if a compelling use case
-  /// can be demonstrated.
+  /// Implementation note: This method diverges from canonical API
+  /// behavior by calling sqlite3_clear_bindings() in addition to
+  /// sqlite3_reset(). That makes the veneer safer; we haven't found a
+  /// super compelling reason yet to call them independently.
   void Reset();
 
   /// \brief Binds signed 64-bit integer to 1-indexed query parameter.
   void BindInt(int parameter, int64 value) {
-    Update(sqlite3_bind_int64(stmt_, parameter, value));
+    Update(sqlite3_bind_int64(stmt_, parameter, value), parameter);
+    size_ += sizeof(int64);
   }
-  void BindInt(const string& parameter, int64 value) {
+  void BindInt(const char* parameter, int64 value) {
     BindInt(GetParameterIndex(parameter), value);
   }
 
   /// \brief Binds double to 1-indexed query parameter.
   void BindDouble(int parameter, double value) {
-    Update(sqlite3_bind_double(stmt_, parameter, value));
+    Update(sqlite3_bind_double(stmt_, parameter, value), parameter);
+    size_ += sizeof(double);
   }
-  void BindDouble(const string& parameter, double value) {
+  void BindDouble(const char* parameter, double value) {
     BindDouble(GetParameterIndex(parameter), value);
   }
 
@@ -166,69 +228,67 @@ class SqliteStatement {
   /// If NUL characters are present, they will still go in the DB and
   /// be successfully retrieved by ColumnString(); however, the
   /// behavior of these values with SQLite functions is undefined.
-  void BindText(int parameter, const string& text) {
+  ///
+  /// When using the unsafe methods, the data must not be changed or
+  /// freed until this statement is Reset() or finalized.
+  void BindText(int parameter, const StringPiece& text) {
     Update(sqlite3_bind_text64(stmt_, parameter, text.data(), text.size(),
-                               SQLITE_TRANSIENT, SQLITE_UTF8));
+                               SQLITE_TRANSIENT, SQLITE_UTF8), parameter);
+    size_ += text.size();
   }
-  void BindText(const string& parameter, const string& text) {
+  void BindText(const char* parameter, const StringPiece& text) {
     BindText(GetParameterIndex(parameter), text);
   }
-
-  /// \brief Copies binary data to 1-indexed query parameter.
-  void BindBlob(int parameter, const string& blob) {
-    Update(sqlite3_bind_blob64(stmt_, parameter, blob.data(), blob.size(),
-                               SQLITE_TRANSIENT));
-  }
-  void BindBlob(const string& parameter, const string& blob) {
-    BindBlob(GetParameterIndex(parameter), blob);
-  }
-
-  /// \brief Binds UTF-8 text to 1-indexed query parameter.
-  ///
-  /// The contents of `text` must not be changed or freed until Reset()
-  /// or Close() is called.
-  ///
-  /// If NUL characters are present, they will still go in the DB and
-  /// be successfully retrieved by ColumnString(); however, the
-  /// behavior of these values with SQLite functions is undefined.
-  void BindTextUnsafe(int parameter, const string& text) {
+  void BindTextUnsafe(int parameter, const StringPiece& text) {
     Update(sqlite3_bind_text64(stmt_, parameter, text.data(), text.size(),
-                               SQLITE_STATIC, SQLITE_UTF8));
+                               SQLITE_STATIC, SQLITE_UTF8), parameter);
+    size_ += text.size();
   }
-  void BindTextUnsafe(const string& parameter, const string& text) {
+  void BindTextUnsafe(const char* parameter, const StringPiece& text) {
     BindTextUnsafe(GetParameterIndex(parameter), text);
   }
 
-  /// \brief Binds binary data to 1-indexed query parameter.
+  /// \brief Copies binary data to 1-indexed query parameter.
   ///
-  /// The contents of `blob` must not be changed or freed until Reset()
-  /// or Close() is called.
-  void BindBlobUnsafe(int parameter, const string& blob) {
+  /// When using the unsafe methods, the data must not be changed or
+  /// freed until this statement is Reset() or finalized.
+  void BindBlob(int parameter, const StringPiece& blob) {
+    Update(sqlite3_bind_blob64(stmt_, parameter, blob.data(), blob.size(),
+                               SQLITE_TRANSIENT), parameter);
+    size_ += blob.size();
+  }
+  void BindBlob(const char* parameter, const StringPiece& blob) {
+    BindBlob(GetParameterIndex(parameter), blob);
+  }
+  void BindBlobUnsafe(int parameter, const StringPiece& blob) {
     Update(sqlite3_bind_blob64(stmt_, parameter, blob.data(), blob.size(),
-                               SQLITE_STATIC));
+                               SQLITE_STATIC), parameter);
+    size_ += blob.size();
   }
-  void BindBlobUnsafe(const string& parameter, const string& text) {
+  void BindBlobUnsafe(const char* parameter, const StringPiece& text) {
     BindBlobUnsafe(GetParameterIndex(parameter), text);
   }
 
   /// \brief Returns number of columns in result set.
-  int ColumnCount() TF_MUST_USE_RESULT { return sqlite3_column_count(stmt_); }
+  int ColumnCount() const TF_MUST_USE_RESULT {
+    return sqlite3_column_count(stmt_);
+  }
 
   /// \brief Returns type of 0-indexed column value in row data.
   ///
   /// Please note that SQLite is dynamically typed and the type of a
   /// particular column can vary from row to row.
-  int ColumnType(int column) TF_MUST_USE_RESULT {
+  int ColumnType(int column) const TF_MUST_USE_RESULT {
     return sqlite3_column_type(stmt_, column);
   }
 
   /// \brief Returns 0-indexed column from row result coerced as an integer.
-  int64 ColumnInt(int column) TF_MUST_USE_RESULT {
+  int64 ColumnInt(int column) const TF_MUST_USE_RESULT {
     return sqlite3_column_int64(stmt_, column);
   }
 
   /// \brief Returns 0-indexed column from row result coerced as a double.
-  double ColumnDouble(int column) TF_MUST_USE_RESULT {
+  double ColumnDouble(int column) const TF_MUST_USE_RESULT {
     return sqlite3_column_double(stmt_, column);
   }
 
@@ -236,80 +296,153 @@ class SqliteStatement {
   ///
   /// NULL values are returned as empty string. This method should be
   /// used for both BLOB and TEXT columns. See also: ColumnType().
-  string ColumnString(int column) TF_MUST_USE_RESULT {
+  string ColumnString(int column) const TF_MUST_USE_RESULT {
     auto data = sqlite3_column_blob(stmt_, column);
-    if (data == nullptr) {
-      return "";
-    }
+    if (data == nullptr) return "";
     return {static_cast<const char*>(data),
             static_cast<size_t>(ColumnSize(column))};
   }
 
   /// \brief Returns pointer to binary data at 0-indexed column.
   ///
-  /// The returned memory will be mutated or freed the next time
-  /// Step() or Reset() is called. No NUL terminator is added. See
-  /// ColumnSize(). Please note that an empty BLOB is NULL.
-  const char* ColumnStringUnsafe(int column) TF_MUST_USE_RESULT {
-    return static_cast<const char*>(sqlite3_column_blob(stmt_, column));
+  /// Empty values are returned as NULL. The returned memory will no
+  /// longer be valid the next time Step() or Reset() is called. No NUL
+  /// terminator is added.
+  StringPiece ColumnStringUnsafe(int column) const TF_MUST_USE_RESULT {
+    return {static_cast<const char*>(sqlite3_column_blob(stmt_, column)),
+            static_cast<size_t>(ColumnSize(column))};
   }
 
   /// \brief Returns number of bytes stored at 0-indexed column.
-  int ColumnSize(int column) TF_MUST_USE_RESULT {
+  int ColumnSize(int column) const TF_MUST_USE_RESULT {
     return sqlite3_column_bytes(stmt_, column);
   }
 
+  /// \brief Move constructor, after which <other> is reset to empty.
+  SqliteStatement(SqliteStatement&& other) noexcept
+      : db_(other.db_),
+        stmt_(other.stmt_),
+        bind_error_(other.bind_error_) {
+    other.db_ = nullptr;
+    other.stmt_ = nullptr;
+    other.bind_error_ = SQLITE_OK;
+  }
+
+  /// \brief Move assignment, after which <other> is reset to empty.
+  SqliteStatement& operator=(SqliteStatement&& other) noexcept {
+    if (&other != this) {
+      if (db_ != nullptr) db_->Unref();
+      if (stmt_ != nullptr) sqlite3_finalize(stmt_);
+      db_ = other.db_;
+      stmt_ = other.stmt_;
+      bind_error_ = other.bind_error_;
+      size_ = other.size_;
+      other.db_ = nullptr;
+      other.stmt_ = nullptr;
+      other.bind_error_ = SQLITE_OK;
+      other.size_ = 0;
+    }
+    return *this;
+  }
+
  private:
-  friend Sqlite;
-  SqliteStatement(sqlite3_stmt* stmt, int error,
-                  std::unique_ptr<string> prepare_error_sql)
-      : stmt_(stmt),
-        error_(error),
-        prepare_error_sql_(std::move(prepare_error_sql)) {}
-  void CloseOrLog();
-
-  void Update(int rc) {
+  friend class Sqlite;
+
+  SqliteStatement(Sqlite* db, sqlite3_stmt* stmt) noexcept
+      : db_(db), stmt_(stmt) {
+    db_->Ref();
+  }
+
+  void Update(int rc, int parameter) {
+    // Binding strings can fail if they exceed length limit.
     if (TF_PREDICT_FALSE(rc != SQLITE_OK)) {
-      if (error_ == SQLITE_OK) {
-        error_ = rc;
+      if (bind_error_ == SQLITE_OK) {
+        bind_error_ = rc;
+        bind_error_parameter_ = parameter;
       }
     }
   }
 
-  int GetParameterIndex(const string& parameter) {
-    // Each call to this function requires O(n) strncmp().
-    int index = sqlite3_bind_parameter_index(stmt_, parameter.c_str());
-    if (TF_PREDICT_FALSE(index == 0)) {
-      Update(SQLITE_NOTFOUND);
-    }
+  int GetParameterIndex(const char* parameter) {
+    int index = sqlite3_bind_parameter_index(stmt_, parameter);
+    DCHECK(index > 0);  // OK to compile away since it'll fail again
     return index;
   }
 
-  sqlite3_stmt* stmt_;
-  int error_;
-  std::unique_ptr<string> prepare_error_sql_;
+  Sqlite* db_ = nullptr;
+  sqlite3_stmt* stmt_ = nullptr;
+  int bind_error_ = SQLITE_OK;
+  int bind_error_parameter_ = 0;
+  uint64 size_ = 0;
 
   TF_DISALLOW_COPY_AND_ASSIGN(SqliteStatement);
 };
 
-inline SqliteStatement::SqliteStatement(SqliteStatement&& other)
-    : stmt_(other.stmt_),
-      error_(other.error_),
-      prepare_error_sql_(std::move(other.prepare_error_sql_)) {
-  other.stmt_ = nullptr;
-  other.error_ = SQLITE_OK;
-}
-
-inline SqliteStatement& SqliteStatement::operator=(SqliteStatement&& other) {
-  if (&other != this) {
-    CloseOrLog();
-    stmt_ = other.stmt_;
-    error_ = other.error_;
-    prepare_error_sql_ = std::move(other.prepare_error_sql_);
-    other.stmt_ = nullptr;
-    other.error_ = SQLITE_OK;
+/// \brief Reentrant SQLite connection object lock
+///
+/// This is a no-op if SQLITE_OPEN_NOMUTEX was used.
+class SCOPED_LOCKABLE SqliteLock {
+ public:
+  explicit SqliteLock(Sqlite& db) EXCLUSIVE_LOCK_FUNCTION(db)
+      : mutex_(sqlite3_db_mutex(db.db_)) {
+    sqlite3_mutex_enter(mutex_);
   }
-  return *this;
+  SqliteLock(Sqlite& db, std::try_to_lock_t) EXCLUSIVE_LOCK_FUNCTION(db)
+      : mutex_(sqlite3_db_mutex(db.db_)) {
+    if (TF_PREDICT_FALSE(sqlite3_mutex_try(mutex_) != SQLITE_OK)) {
+      is_locked_ = false;
+    }
+  }
+  ~SqliteLock() UNLOCK_FUNCTION() {
+    if (is_locked_) sqlite3_mutex_leave(mutex_);
+  }
+  explicit operator bool() const { return is_locked_; }
+
+ private:
+  sqlite3_mutex* const mutex_;
+  bool is_locked_ = true;
+  TF_DISALLOW_COPY_AND_ASSIGN(SqliteLock);
+};
+#define SqliteLock(x) static_assert(0, "sqlite_lock_decl_missing_name");
+
+/// \brief SQLite transaction scope.
+///
+/// This class acquires an exclusive lock on the connection object (if
+/// mutexes weren't disabled) and runs BEGIN / ROLLBACK automatically.
+/// Unlike SqliteLock this scope is non-reentrant. To avoid program
+/// crashes, business logic should use the EXCLUSIVE_LOCK_FUNCTION and
+/// LOCKS_EXCLUDED annotations as much as possible.
+class SCOPED_LOCKABLE SqliteTransaction {
+ public:
+  /// \brief Locks db and begins deferred transaction.
+  ///
+  /// This will crash if a transaction is already active.
+  explicit SqliteTransaction(Sqlite& db) EXCLUSIVE_LOCK_FUNCTION(db);
+
+  /// \brief Runs ROLLBACK and unlocks.
+  ~SqliteTransaction() UNLOCK_FUNCTION();
+
+  /// \brief Commits transaction.
+  ///
+  /// If this is successful, a new transaction will be started, which
+  /// is rolled back when exiting the scope.
+  Status Commit();
+
+ private:
+  void Begin();
+  Sqlite* const db_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(SqliteTransaction);
+};
+
+#define SQLITE_EXCLUSIVE_TRANSACTIONS_REQUIRED(...) \
+  EXCLUSIVE_LOCKS_REQUIRED(__VA_ARGS__)
+#define SQLITE_TRANSACTIONS_EXCLUDED(...) LOCKS_EXCLUDED(__VA_ARGS__)
+
+inline SqliteStatement Sqlite::PrepareOrDie(const StringPiece& sql) {
+  SqliteStatement stmt;
+  TF_CHECK_OK(Prepare(sql, &stmt));
+  return stmt;
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/lib/db/sqlite_test.cc b/tensorflow/core/lib/db/sqlite_test.cc
index ba045274adc605fbbaece7736537e8157e27cbc7..c9c76ea5f2cd30b8abe7e3c9766ce4946ca25200 100644
--- a/tensorflow/core/lib/db/sqlite_test.cc
+++ b/tensorflow/core/lib/db/sqlite_test.cc
@@ -14,13 +14,13 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/lib/db/sqlite.h"
 
-#include <limits.h>
 #include <array>
+#include <climits>
 
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/io/path.h"
-#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
@@ -29,23 +29,27 @@ namespace {
 class SqliteTest : public ::testing::Test {
  protected:
   void SetUp() override {
-    db_ = Sqlite::Open(":memory:").ValueOrDie();
-    auto stmt = db_->Prepare("CREATE TABLE T (a BLOB, b BLOB)");
-    TF_ASSERT_OK(stmt.StepAndReset());
+    TF_ASSERT_OK(Sqlite::Open(":memory:", SQLITE_OPEN_READWRITE, &db_));
+    db_->PrepareOrDie("CREATE TABLE T (a BLOB, b BLOB)").StepAndResetOrDie();
+  }
+
+  void TearDown() override {
+    db_->Unref();
   }
-  std::shared_ptr<Sqlite> db_;
+
+  Sqlite* db_;
   bool is_done_;
 };
 
 TEST_F(SqliteTest, InsertAndSelectInt) {
-  auto stmt = db_->Prepare("INSERT INTO T (a, b) VALUES (?, ?)");
+  auto stmt = db_->PrepareOrDie("INSERT INTO T (a, b) VALUES (?, ?)");
   stmt.BindInt(1, 3);
   stmt.BindInt(2, -7);
   TF_ASSERT_OK(stmt.StepAndReset());
   stmt.BindInt(1, 123);
   stmt.BindInt(2, -123);
   TF_ASSERT_OK(stmt.StepAndReset());
-  stmt = db_->Prepare("SELECT a, b FROM T ORDER BY b");
+  stmt = db_->PrepareOrDie("SELECT a, b FROM T ORDER BY b");
   TF_ASSERT_OK(stmt.Step(&is_done_));
   ASSERT_FALSE(is_done_);
   EXPECT_EQ(123, stmt.ColumnInt(0));
@@ -59,11 +63,11 @@ TEST_F(SqliteTest, InsertAndSelectInt) {
 }
 
 TEST_F(SqliteTest, InsertAndSelectDouble) {
-  auto stmt = db_->Prepare("INSERT INTO T (a, b) VALUES (?, ?)");
+  auto stmt = db_->PrepareOrDie("INSERT INTO T (a, b) VALUES (?, ?)");
   stmt.BindDouble(1, 6.28318530);
   stmt.BindDouble(2, 1.61803399);
   TF_ASSERT_OK(stmt.StepAndReset());
-  stmt = db_->Prepare("SELECT a, b FROM T");
+  stmt = db_->PrepareOrDie("SELECT a, b FROM T");
   TF_ASSERT_OK(stmt.Step(&is_done_));
   EXPECT_EQ(6.28318530, stmt.ColumnDouble(0));
   EXPECT_EQ(1.61803399, stmt.ColumnDouble(1));
@@ -74,11 +78,11 @@ TEST_F(SqliteTest, InsertAndSelectDouble) {
 TEST_F(SqliteTest, NulCharsInString) {
   string s;  // XXX: Want to write {2, '\0'} but not sure why not.
   s.append(static_cast<size_t>(2), '\0');
-  auto stmt = db_->Prepare("INSERT INTO T (a, b) VALUES (?, ?)");
+  auto stmt = db_->PrepareOrDie("INSERT INTO T (a, b) VALUES (?, ?)");
   stmt.BindBlob(1, s);
   stmt.BindText(2, s);
   TF_ASSERT_OK(stmt.StepAndReset());
-  stmt = db_->Prepare("SELECT a, b FROM T");
+  stmt = db_->PrepareOrDie("SELECT a, b FROM T");
   TF_ASSERT_OK(stmt.Step(&is_done_));
   EXPECT_EQ(2, stmt.ColumnSize(0));
   EXPECT_EQ(2, stmt.ColumnString(0).size());
@@ -92,58 +96,38 @@ TEST_F(SqliteTest, NulCharsInString) {
 
 TEST_F(SqliteTest, Unicode) {
   string s = "要依法治国是赞美那些谁是公义的和惩罚恶人。 - 韩非";
-  auto stmt = db_->Prepare("INSERT INTO T (a, b) VALUES (?, ?)");
+  auto stmt = db_->PrepareOrDie("INSERT INTO T (a, b) VALUES (?, ?)");
   stmt.BindBlob(1, s);
   stmt.BindText(2, s);
   TF_ASSERT_OK(stmt.StepAndReset());
-  stmt = db_->Prepare("SELECT a, b FROM T");
+  stmt = db_->PrepareOrDie("SELECT a, b FROM T");
   TF_ASSERT_OK(stmt.Step(&is_done_));
   EXPECT_EQ(s, stmt.ColumnString(0));
   EXPECT_EQ(s, stmt.ColumnString(1));
 }
 
 TEST_F(SqliteTest, StepAndResetClearsBindings) {
-  auto stmt = db_->Prepare("INSERT INTO T (a, b) VALUES (?, ?)");
+  auto stmt = db_->PrepareOrDie("INSERT INTO T (a, b) VALUES (?, ?)");
   stmt.BindInt(1, 1);
   stmt.BindInt(2, 123);
   TF_ASSERT_OK(stmt.StepAndReset());
   stmt.BindInt(1, 2);
   TF_ASSERT_OK(stmt.StepAndReset());
-  stmt = db_->Prepare("SELECT b FROM T ORDER BY a");
+  stmt = db_->PrepareOrDie("SELECT b FROM T ORDER BY a");
   TF_ASSERT_OK(stmt.Step(&is_done_));
   EXPECT_EQ(123, stmt.ColumnInt(0));
   TF_ASSERT_OK(stmt.Step(&is_done_));
   EXPECT_EQ(SQLITE_NULL, stmt.ColumnType(0));
 }
 
-TEST_F(SqliteTest, CloseBeforeFinalizeFails) {
-  auto stmt = db_->Prepare("INSERT INTO T (a, b) VALUES (?, ?)");
-  Status s = db_->Close();
-  EXPECT_FALSE(s.ok());
-}
-
-// Rather than bothering to check the status code of creating a
-// statement and every single bind call afterwards, SqliteStatement
-// is designed to carry the first error state forward to Step().
-TEST_F(SqliteTest, ErrorPuntingDoesNotReportLibraryAbuse) {
-  auto stmt = db_->Prepare("lol cat");
-  EXPECT_FALSE(stmt.status().ok());
-  EXPECT_EQ(SQLITE_ERROR, stmt.error());
-  stmt.BindInt(1, 1);
-  stmt.BindInt(2, 2);
-  Status s = stmt.Step(&is_done_);
-  EXPECT_EQ(SQLITE_ERROR, stmt.error());  // first error of several
-  EXPECT_FALSE(s.ok());
-}
-
 TEST_F(SqliteTest, SafeBind) {
   string s = "hello";
-  auto stmt = db_->Prepare("INSERT INTO T (a, b) VALUES (?, ?)");
+  auto stmt = db_->PrepareOrDie("INSERT INTO T (a, b) VALUES (?, ?)");
   stmt.BindBlob(1, s);
   stmt.BindText(2, s);
   s.at(0) = 'y';
   TF_ASSERT_OK(stmt.StepAndReset());
-  stmt = db_->Prepare("SELECT a, b FROM T");
+  stmt = db_->PrepareOrDie("SELECT a, b FROM T");
   TF_ASSERT_OK(stmt.Step(&is_done_));
   EXPECT_EQ("hello", stmt.ColumnString(0));
   EXPECT_EQ("hello", stmt.ColumnString(1));
@@ -151,42 +135,42 @@ TEST_F(SqliteTest, SafeBind) {
 
 TEST_F(SqliteTest, UnsafeBind) {
   string s = "hello";
-  auto stmt = db_->Prepare("INSERT INTO T (a, b) VALUES (?, ?)");
+  auto stmt = db_->PrepareOrDie("INSERT INTO T (a, b) VALUES (?, ?)");
   stmt.BindBlobUnsafe(1, s);
   stmt.BindTextUnsafe(2, s);
   s.at(0) = 'y';
   TF_ASSERT_OK(stmt.StepAndReset());
-  stmt = db_->Prepare("SELECT a, b FROM T");
+  stmt = db_->PrepareOrDie("SELECT a, b FROM T");
   TF_ASSERT_OK(stmt.Step(&is_done_));
   EXPECT_EQ("yello", stmt.ColumnString(0));
   EXPECT_EQ("yello", stmt.ColumnString(1));
 }
 
 TEST_F(SqliteTest, UnsafeColumn) {
-  auto stmt = db_->Prepare("INSERT INTO T (a, b) VALUES (?, ?)");
+  auto stmt = db_->PrepareOrDie("INSERT INTO T (a, b) VALUES (?, ?)");
   stmt.BindInt(1, 1);
   stmt.BindText(2, "hello");
   TF_ASSERT_OK(stmt.StepAndReset());
   stmt.BindInt(1, 2);
   stmt.BindText(2, "there");
   TF_ASSERT_OK(stmt.StepAndReset());
-  stmt = db_->Prepare("SELECT b FROM T ORDER BY a");
+  stmt = db_->PrepareOrDie("SELECT b FROM T ORDER BY a");
   TF_ASSERT_OK(stmt.Step(&is_done_));
-  const char* p = stmt.ColumnStringUnsafe(0);
-  EXPECT_EQ('h', *p);
+  StringPiece p = stmt.ColumnStringUnsafe(0);
+  EXPECT_EQ('h', *p.data());
   TF_ASSERT_OK(stmt.Step(&is_done_));
   // This will actually happen, but it's not safe to test this behavior.
-  // EXPECT_EQ('t', *p);
+  // EXPECT_EQ('t', *p.data());
 }
 
 TEST_F(SqliteTest, NamedParameterBind) {
-  auto stmt = db_->Prepare("INSERT INTO T (a) VALUES (:a)");
+  auto stmt = db_->PrepareOrDie("INSERT INTO T (a) VALUES (:a)");
   stmt.BindText(":a", "lol");
   TF_ASSERT_OK(stmt.StepAndReset());
-  stmt = db_->Prepare("SELECT COUNT(*) FROM T");
+  stmt = db_->PrepareOrDie("SELECT COUNT(*) FROM T");
   TF_ASSERT_OK(stmt.Step(&is_done_));
   EXPECT_EQ(1, stmt.ColumnInt(0));
-  stmt = db_->Prepare("SELECT a FROM T");
+  stmt = db_->PrepareOrDie("SELECT a FROM T");
   TF_ASSERT_OK(stmt.Step(&is_done_));
   EXPECT_FALSE(is_done_);
   EXPECT_EQ("lol", stmt.ColumnString(0));
@@ -195,40 +179,111 @@ TEST_F(SqliteTest, NamedParameterBind) {
 TEST_F(SqliteTest, Statement_DefaultConstructor) {
   SqliteStatement stmt;
   EXPECT_FALSE(stmt);
-  EXPECT_FALSE(stmt.StepAndReset().ok());
-  stmt = db_->Prepare("INSERT INTO T (a) VALUES (1)");
+  stmt = db_->PrepareOrDie("INSERT INTO T (a) VALUES (1)");
   EXPECT_TRUE(stmt);
   EXPECT_TRUE(stmt.StepAndReset().ok());
 }
 
 TEST_F(SqliteTest, Statement_MoveConstructor) {
-  SqliteStatement stmt{db_->Prepare("INSERT INTO T (a) VALUES (1)")};
+  SqliteStatement stmt{db_->PrepareOrDie("INSERT INTO T (a) VALUES (1)")};
   EXPECT_TRUE(stmt.StepAndReset().ok());
 }
 
 TEST_F(SqliteTest, Statement_MoveAssignment) {
-  SqliteStatement stmt1 = db_->Prepare("INSERT INTO T (a) VALUES (1)");
+  SqliteStatement stmt1 = db_->PrepareOrDie("INSERT INTO T (a) VALUES (1)");
   SqliteStatement stmt2;
   EXPECT_TRUE(stmt1.StepAndReset().ok());
-  EXPECT_FALSE(stmt2.StepAndReset().ok());
+  EXPECT_FALSE(stmt2);
   stmt2 = std::move(stmt1);
   EXPECT_TRUE(stmt2.StepAndReset().ok());
 }
 
 TEST_F(SqliteTest, PrepareFailed) {
-  SqliteStatement s = db_->Prepare("SELECT");
-  EXPECT_FALSE(s.status().ok());
-  EXPECT_NE(string::npos, s.status().error_message().find("SELECT"));
+  SqliteLock lock(*db_);
+  SqliteStatement stmt;
+  Status s = db_->Prepare("SELECT", &stmt);
+  ASSERT_FALSE(s.ok());
+  EXPECT_NE(string::npos, s.error_message().find("SELECT"));
+  EXPECT_EQ(SQLITE_ERROR, db_->errcode());
 }
 
 TEST_F(SqliteTest, BindFailed) {
-  SqliteStatement s = db_->Prepare("INSERT INTO T (a) VALUES (123)");
-  EXPECT_TRUE(s.status().ok());
-  EXPECT_EQ("", s.status().error_message());
-  s.BindInt(1, 123);
-  EXPECT_FALSE(s.status().ok());
+  auto stmt = db_->PrepareOrDie("INSERT INTO T (a) VALUES (123)");
+  stmt.BindInt(1, 123);
+  Status s = stmt.StepOnce();
   EXPECT_NE(string::npos,
-            s.status().error_message().find("INSERT INTO T (a) VALUES (123)"));
+            s.error_message().find("INSERT INTO T (a) VALUES (123)"))
+            << s.error_message();
+}
+
+TEST_F(SqliteTest, SnappyExtension) {
+  auto stmt = db_->PrepareOrDie("SELECT UNSNAP(SNAP(?))");
+  stmt.BindText(1, "hello");
+  EXPECT_EQ("hello", stmt.StepOnceOrDie().ColumnString(0));
+}
+
+TEST_F(SqliteTest, SnappyBinaryCompatibility) {
+  EXPECT_EQ(
+      "today is the end of the republic",
+      db_->PrepareOrDie("SELECT UNSNAP(X'03207C746F6461792069732074686520656E64"
+                            "206F66207468652072657075626C6963')")
+          .StepOnceOrDie()
+          .ColumnString(0));
+}
+
+TEST(SqliteOpenTest, CloseConnectionBeforeStatement_KeepsConnectionOpen) {
+  Sqlite* db;
+  TF_ASSERT_OK(Sqlite::Open(":memory:", SQLITE_OPEN_READWRITE, &db));
+  SqliteStatement stmt = db->PrepareOrDie("SELECT ? + ?");
+  db->Unref();
+  stmt.BindInt(1, 7);
+  stmt.BindInt(2, 3);
+  EXPECT_EQ(10, stmt.StepOnceOrDie().ColumnInt(0));
+}
+
+TEST_F(SqliteTest, TransactionRollback) {
+  {
+    SqliteTransaction txn(*db_);
+    auto stmt = db_->PrepareOrDie("INSERT INTO T (a, b) VALUES (?, ?)");
+    stmt.BindDouble(1, 6.28318530);
+    stmt.BindDouble(2, 1.61803399);
+    TF_ASSERT_OK(stmt.StepAndReset());
+  }
+  EXPECT_EQ(
+      0,
+      db_->PrepareOrDie("SELECT COUNT(*) FROM T").StepOnceOrDie().ColumnInt(0));
+}
+
+TEST_F(SqliteTest, TransactionCommit) {
+  {
+    SqliteTransaction txn(*db_);
+    auto stmt = db_->PrepareOrDie("INSERT INTO T (a, b) VALUES (?, ?)");
+    stmt.BindDouble(1, 6.28318530);
+    stmt.BindDouble(2, 1.61803399);
+    TF_ASSERT_OK(stmt.StepAndReset());
+    TF_ASSERT_OK(txn.Commit());
+  }
+  EXPECT_EQ(
+      1,
+      db_->PrepareOrDie("SELECT COUNT(*) FROM T").StepOnceOrDie().ColumnInt(0));
+}
+
+TEST_F(SqliteTest, TransactionCommitMultipleTimes) {
+  {
+    SqliteTransaction txn(*db_);
+    auto stmt = db_->PrepareOrDie("INSERT INTO T (a, b) VALUES (?, ?)");
+    stmt.BindDouble(1, 6.28318530);
+    stmt.BindDouble(2, 1.61803399);
+    TF_ASSERT_OK(stmt.StepAndReset());
+    TF_ASSERT_OK(txn.Commit());
+    stmt.BindDouble(1, 6.28318530);
+    stmt.BindDouble(2, 1.61803399);
+    TF_ASSERT_OK(stmt.StepAndReset());
+    TF_ASSERT_OK(txn.Commit());
+  }
+  EXPECT_EQ(
+      2,
+      db_->PrepareOrDie("SELECT COUNT(*) FROM T").StepOnceOrDie().ColumnInt(0));
 }
 
 }  // namespace
diff --git a/tensorflow/core/lib/gtl/iterator_range.h b/tensorflow/core/lib/gtl/iterator_range.h
index e7fea7579db6e3bd8f6f2ce6f5f8c53a40dd3d20..0ba4587fde65f9d396716acb6a7e4f491ff51e32 100644
--- a/tensorflow/core/lib/gtl/iterator_range.h
+++ b/tensorflow/core/lib/gtl/iterator_range.h
@@ -37,6 +37,10 @@ namespace gtl {
 template <typename IteratorT>
 class iterator_range {
  public:
+  using value_type = decltype(*std::declval<IteratorT>());
+  using iterator = IteratorT;
+  using const_iterator = IteratorT;
+
   iterator_range() : begin_iterator_(), end_iterator_() {}
   iterator_range(IteratorT begin_iterator, IteratorT end_iterator)
       : begin_iterator_(std::move(begin_iterator)),
diff --git a/tensorflow/core/lib/hash/hash.h b/tensorflow/core/lib/hash/hash.h
index 0fb12966afeb98bf3365e0b1df8381bc900d9765..4d312ab7e830963671a8be9d4622a5b83488d295 100644
--- a/tensorflow/core/lib/hash/hash.h
+++ b/tensorflow/core/lib/hash/hash.h
@@ -64,6 +64,13 @@ struct hash<T*> {
   }
 };
 
+template <>
+struct hash<bfloat16> {
+  size_t operator()(const bfloat16& t) const {
+    return std::hash<float>()(static_cast<float>(t));
+  }
+};
+
 template <>
 struct hash<string> {
   size_t operator()(const string& s) const {
diff --git a/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc b/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc
index be1fa22c69c27a5c57e3c397076a66dfe05eb035..3c310167326721e8f569ab6148622517aaf82ce5 100644
--- a/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc
+++ b/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc
@@ -161,7 +161,7 @@ Status SnappyOutputBuffer::Deflate() {
   }
 
   // Write length of compressed block to output buffer.
-  char* compressed_length_array = new char[4];
+  char compressed_length_array[4];
   std::fill(compressed_length_array, compressed_length_array + 4, 0);
   for (int i = 0; i < 4; i++) {
     // Little endian.
@@ -173,7 +173,6 @@ Status SnappyOutputBuffer::Deflate() {
   TF_RETURN_IF_ERROR(AddToOutputBuffer(output.data(), output.size()));
   next_in_ += avail_in_;
   avail_in_ = 0;
-  delete[] compressed_length_array;
 
   return Status::OK();
 }
diff --git a/tensorflow/core/lib/math/math_util.h b/tensorflow/core/lib/math/math_util.h
index 6f279865e7b361d7b0d2c402747c7b3476e63448..41d486f2bd142954d288f1ccdcf30d960fa2c6a7 100644
--- a/tensorflow/core/lib/math/math_util.h
+++ b/tensorflow/core/lib/math/math_util.h
@@ -16,6 +16,8 @@ limitations under the License.
 #ifndef TENSORFLOW_LIB_MATH_MATH_UTIL_H_
 #define TENSORFLOW_LIB_MATH_MATH_UTIL_H_
 
+#include <type_traits>
+
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -59,6 +61,29 @@ class MathUtil {
   template <typename IntegralType, bool ceil>
   static IntegralType CeilOrFloorOfRatio(IntegralType numerator,
                                          IntegralType denominator);
+
+  template <typename IntegralType>
+  static IntegralType GCD(IntegralType x, IntegralType y);
+
+  // ----------------------------------------------------------------------
+  // IPow<T>
+  //   Computes the result of raising a number to a non-negative integral power.
+  //
+  //  * T: An integral type, floating-point type, or user-defined type for which
+  //    operator*= is defined.
+  //  * base: the base "v" of the operation
+  //  * exp: the exponent "i" of the operation; must be non-negative.
+  //
+  // Computes v^i, in a way that is faster than std::pow (which supports
+  // arbitrary real exponents).
+  //
+  // When T is a floating point type, this has the same semantics as std::pow,
+  // but it is much faster. When T is an integral type, computations are
+  // performed in the value domain of T, and overflow semantics are those of T.
+  //
+  // Input validity is DCHECKed.
+  template <typename T>
+  static T IPow(T base, int exp);
 };
 
 // ---- CeilOrFloorOfRatio ----
@@ -107,6 +132,32 @@ IntegralType MathUtil::CeilOrFloorOfRatio(IntegralType numerator,
   }
 }
 
+template <typename IntegralType>
+IntegralType MathUtil::GCD(IntegralType a, IntegralType b) {
+  static_assert(std::is_unsigned<IntegralType>::value,
+                "signed GCD not supported!");
+  while (b != 0) {
+    IntegralType r = a % b;
+    a = b;
+    b = r;
+  }
+  return a;
+}
+
+// ---- IPow ----
+// Implemented with the squared exponentiation method (a.k.a. double-and-add).
+//
+// Note that "exp >>= 1" is faster than "exp /= 2" on at least one platform.
+template <typename T>
+T MathUtil::IPow(T base, int exp) {
+  DCHECK_GE(exp, 0);
+  for (T result(1);; base *= base) {
+    if ((exp & 1) != 0) result *= base;
+    exp >>= 1;
+    if (exp == 0) return result;
+  }
+}
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_LIB_MATH_MATH_UTIL_H_
diff --git a/tensorflow/core/lib/math/math_util_test.cc b/tensorflow/core/lib/math/math_util_test.cc
index eaf8c31a431728d6f728abeb50e14c443bce6d85..cad5d0d8993b5c61e82489ca942744608f7fd37a 100644
--- a/tensorflow/core/lib/math/math_util_test.cc
+++ b/tensorflow/core/lib/math/math_util_test.cc
@@ -15,12 +15,17 @@ limitations under the License.
 
 #include "tensorflow/core/lib/math/math_util.h"
 
+#include <cmath>
+#include <limits>
 #include <vector>
+
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/test_benchmark.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
+namespace {
 
 // Number of arguments for each test of the CeilOrRatio method
 const int kNumTestArguments = 4;
@@ -195,4 +200,141 @@ TEST(MathUtil, CeilOfRatio) {
 #endif
 }
 
+struct GCDTestCase {
+  unsigned int x;
+  unsigned int y;
+  unsigned int gcd;
+};
+
+TEST(MathUtil, GCD) {
+  std::vector<GCDTestCase> testcases({
+      {10, 20, 10},  //
+      {27, 8, 1},    //
+      {4, 3, 1},     //
+      {6, 8, 2},     //
+      {5, 0, 5},     //
+      {5, 5, 5},     //
+      {0, 0, 0}      //
+  });
+
+  for (const auto& tc : testcases) {
+    EXPECT_EQ(tc.gcd, MathUtil::GCD<uint32>(tc.x, tc.y));
+    EXPECT_EQ(tc.gcd, MathUtil::GCD<uint32>(tc.y, tc.x));
+    EXPECT_EQ(tc.gcd, MathUtil::GCD<uint64>(tc.x, tc.y));
+    EXPECT_EQ(tc.gcd, MathUtil::GCD<uint64>(tc.y, tc.x));
+  }
+
+  const uint64 biggish_prime = 1666666667;
+  EXPECT_EQ(biggish_prime,
+            MathUtil::GCD<uint64>(biggish_prime * 3, biggish_prime * 4));
+}
+
+template <typename T>
+void TestOneIPowN() {
+  const T one{1};
+  for (int i = 0; i < 1024; ++i) {
+    // Computations are exact.
+    EXPECT_EQ(MathUtil::IPow(one, i), one);
+  }
+}
+
+template <typename T>
+void TestTwoIPowN() {
+  int limit = std::is_integral<T>::value ? std::numeric_limits<T>::digits : 63;
+  for (int i = 0; i < limit; ++i) {
+    // Computations are exact.
+    EXPECT_EQ(MathUtil::IPow(T{2}, i), static_cast<T>(1ull << i));
+  }
+}
+
+template <typename T>
+void TestFloatIPow(const int max_exponent, const T start, const T end,
+                   const T step) {
+  for (T f = start; f < end; f += step) {
+    for (int i = 0; i < max_exponent; ++i) {
+      EXPECT_FLOAT_EQ(MathUtil::IPow(f, i), pow(f, i));
+    }
+  }
+}
+
+TEST(MathUtil, IPow) {
+  TestOneIPowN<double>();
+  TestOneIPowN<float>();
+  TestOneIPowN<int>();
+  TestOneIPowN<int64>();
+  TestTwoIPowN<double>();
+  TestTwoIPowN<float>();
+  TestTwoIPowN<int>();
+  TestTwoIPowN<int64>();
+
+  EXPECT_EQ(MathUtil::IPow(3, 0), 1);
+  EXPECT_EQ(MathUtil::IPow(3, 1), 3);
+  EXPECT_EQ(MathUtil::IPow(3, 2), 9);
+  EXPECT_EQ(MathUtil::IPow(3, 3), 27);
+  EXPECT_EQ(MathUtil::IPow(3, 4), 81);
+  EXPECT_EQ(MathUtil::IPow(3, 5), 243);
+
+  TestFloatIPow<float>(13, -16.0f, 16.0f, 1.0f / 8);
+  TestFloatIPow<double>(13, -16.0, 16.0, 1.0 / 8);
+
+  TestFloatIPow<float>(13, -1.0f / (1 << 12), -1.0f / (1 << 12),
+                       1.0f / (1 << 16));
+  TestFloatIPow<double>(13, -1.0 / (1 << 12), -1.0 / (1 << 12),
+                        1.0 / (1 << 16));
+}
+
+TEST(MathUtil, IPowEdgeCases) {
+  constexpr const double kInf = std::numeric_limits<double>::infinity();
+
+  EXPECT_EQ(MathUtil::IPow(-12345.0, 79), -kInf);
+  EXPECT_EQ(MathUtil::IPow(-12345.0, 80), +kInf);
+
+  // The semantics of the edge cases that follow  are defined in the standard:
+  // http://en.cppreference.com/w/cpp/numeric/math/pow for a summary.
+
+  // 1 - These edge cases apply.
+  // pow(+0, exp), where exp is a positive odd integer, returns +0
+  EXPECT_EQ(MathUtil::IPow(+0.0, 3), +0.0);
+  // pow(-0, exp), where exp is a positive odd integer, returns -0
+  EXPECT_EQ(MathUtil::IPow(-0.0, 3), -0.0);
+  // pow(±0, exp), where exp is positive non-integer or a positive even integer,
+  // returns +0
+  EXPECT_EQ(MathUtil::IPow(+0.0, 42), +0.0);
+  EXPECT_EQ(MathUtil::IPow(-0.0, 42), +0.0);
+  // pow(base, ±0) returns 1 for any base, even when base is NaN
+  EXPECT_EQ(MathUtil::IPow(-kInf, 0.0), 1.0);
+  EXPECT_EQ(MathUtil::IPow(-2.0, 0.0), 1.0);
+  EXPECT_EQ(MathUtil::IPow(-1.0, 0.0), 1.0);
+  EXPECT_EQ(MathUtil::IPow(-0.0, 0.0), 1.0);
+  EXPECT_EQ(MathUtil::IPow(+0.0, 0.0), 1.0);
+  EXPECT_EQ(MathUtil::IPow(+1.0, 0.0), 1.0);
+  EXPECT_EQ(MathUtil::IPow(+2.0, 0.0), 1.0);
+  EXPECT_EQ(MathUtil::IPow(+kInf, 0.0), 1.0);
+  EXPECT_EQ(MathUtil::IPow(std::numeric_limits<double>::quiet_NaN(), 0.0), 1.0);
+  // pow(-∞, exp) returns -∞ if exp is a positive odd integer
+  EXPECT_EQ(MathUtil::IPow(-kInf, 43), -kInf);
+  // pow(-∞, exp) returns +∞ if exp is a positive non-integer or even integer
+  EXPECT_EQ(MathUtil::IPow(-kInf, 42), +kInf);
+  // pow(+∞, exp) returns +∞ for any positive exp
+  EXPECT_EQ(MathUtil::IPow(+kInf, 42), +kInf);
+  EXPECT_EQ(MathUtil::IPow(+kInf, 43), +kInf);
+
+  // 2 - These do not apply due to the restricted exp range.
+  // pow(+0, exp), where exp is a negative odd integer, returns +∞ and raises
+  // FE_DIVBYZERO pow(-0, exp), where exp is a negative odd integer, returns -∞
+  // and raises FE_DIVBYZERO pow(±0, exp), where exp is negative, finite, and is
+  // an even integer or a non-integer, returns +∞ and raises FE_DIVBYZERO
+  // pow(-1, ±∞) returns 1
+  // pow(+1, exp) returns 1 for any exp, even when exp is NaN
+  // pow(±0, -∞) returns +∞ and may raise FE_DIVBYZERO
+  // pow(base, exp) returns NaN and raises FE_INVALID if base is finite and
+  // negative and exp is finite and non-integer. pow(base, -∞) returns +∞ for
+  // any |base|<1 pow(base, -∞) returns +0 for any |base|>1 pow(base, +∞)
+  // returns +0 for any |base|<1 pow(base, +∞) returns +∞ for any |base|>1
+  // pow(-∞, exp) returns -0 if exp is a negative odd integer
+  // pow(-∞, exp) returns +0 if exp is a negative non-integer or even integer
+  // pow(+∞, exp) returns +0 for any negative exp
+}
+
+}  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/core/lib/monitoring/collected_metrics.h b/tensorflow/core/lib/monitoring/collected_metrics.h
index fbef25619fd4f9ad6dc6927c43d2b8750ac51804..acdb0d86edb1a15631c324afe9d535e0660c4b98 100644
--- a/tensorflow/core/lib/monitoring/collected_metrics.h
+++ b/tensorflow/core/lib/monitoring/collected_metrics.h
@@ -88,6 +88,7 @@ struct Point {
   ValueType value_type;
   int64 int64_value;
   string string_value;
+  bool bool_value;
   HistogramProto histogram_value;
 
   // start_timestamp and end_timestamp indicate the time period over which this
diff --git a/tensorflow/core/lib/monitoring/collection_registry.h b/tensorflow/core/lib/monitoring/collection_registry.h
index 113d37e07d89f08ee725c7308122fee7d5031556..2c8e250c5631ee8a56d6871c1a61ef17efc97c82 100644
--- a/tensorflow/core/lib/monitoring/collection_registry.h
+++ b/tensorflow/core/lib/monitoring/collection_registry.h
@@ -224,6 +224,12 @@ inline void CollectValue(const string& value, Point* const point) {
   point->string_value = value;
 }
 
+template <>
+inline void CollectValue(const bool& value, Point* const point) {
+  point->value_type = ValueType::kBool;
+  point->bool_value = value;
+}
+
 template <>
 inline void CollectValue(const HistogramProto& value, Point* const point) {
   point->value_type = ValueType::kHistogram;
diff --git a/tensorflow/core/lib/monitoring/gauge.h b/tensorflow/core/lib/monitoring/gauge.h
index 75471cfb22956deac0b0a5841fdde8ee538da30e..ec978a91935890cb0563f39ba0e6554a03d7c86e 100644
--- a/tensorflow/core/lib/monitoring/gauge.h
+++ b/tensorflow/core/lib/monitoring/gauge.h
@@ -86,8 +86,29 @@ class GaugeCell<int64> {
   TF_DISALLOW_COPY_AND_ASSIGN(GaugeCell);
 };
 
+// Explicit specialization of GaugeCell<bool>. Compared to the primary
+// template, it uses atomic values as opposed to mutex. This class is
+// thread-safe.
+template <>
+class GaugeCell<bool> {
+ public:
+  explicit GaugeCell(bool value) : value_(value) {}
+  ~GaugeCell() {}
+
+  // Atomically sets the value.
+  void Set(bool value);
+
+  // Retrieves the current value.
+  bool value() const;
+
+ private:
+  std::atomic<bool> value_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(GaugeCell);
+};
+
 // A stateful class for updating a gauge-like metric. Allowed ValueType are
-// int64 and string.
+// int64, string and bool.
 //
 // This class encapsulates a set of values (or a single value for a label-less
 // metric). Each value is identified by a tuple of labels. The class allows the
@@ -117,6 +138,9 @@ class Gauge {
   //
   // auto* integer_gauge = Gauge<int64, 0>::New("/tensorflow/integer_gauge",
   //   "Integer gauge")
+  //
+  // auto* bool_gauge = Gauge<bool, 0>::New("/tensorflow/bool_gauge",
+  //   "Bool gauge")
   template <typename... MetricDefArgs>
   static Gauge* New(MetricDefArgs&&... metric_def_args);
 
@@ -172,12 +196,17 @@ inline void GaugeCell<int64>::Set(int64 value) { value_ = value; }
 
 inline int64 GaugeCell<int64>::value() const { return value_; }
 
+inline void GaugeCell<bool>::Set(bool value) { value_ = value; }
+
+inline bool GaugeCell<bool>::value() const { return value_; }
+
 template <typename ValueType, int NumLabels>
 template <typename... MetricDefArgs>
 Gauge<ValueType, NumLabels>* Gauge<ValueType, NumLabels>::New(
     MetricDefArgs&&... metric_def_args) {
   static_assert(std::is_same<ValueType, int64>::value ||
-                    std::is_same<ValueType, string>::value,
+                    std::is_same<ValueType, string>::value ||
+                    std::is_same<ValueType, bool>::value,
                 "Gauge only allows int64 and string types.");
   return new Gauge<ValueType, NumLabels>(
       MetricDef<MetricKind::kGauge, ValueType, NumLabels>(
diff --git a/tensorflow/core/lib/monitoring/gauge_test.cc b/tensorflow/core/lib/monitoring/gauge_test.cc
index f98cfe2a3b34cfb0630865e2fd0eeef6ea4f734d..c8f673db38928b96bd4f97cbb72c1007fdc9e9bb 100644
--- a/tensorflow/core/lib/monitoring/gauge_test.cc
+++ b/tensorflow/core/lib/monitoring/gauge_test.cc
@@ -87,6 +87,28 @@ TEST(GaugeOfStringValue, GetCell) {
   EXPECT_EQ("bar", same_cell->value());
 }
 
+auto* bool_gauge =
+    Gauge<bool, 0>::New("/tensorflow/test/bool_gauge", "Gauge of bool value.");
+
+TEST(GaugeOfBoolValue, InitializedWithFalseValue) {
+  EXPECT_EQ(false, bool_gauge->GetCell()->value());
+}
+
+TEST(GaugeOfBoolValue, GetCell) {
+  auto* cell = bool_gauge->GetCell();
+  EXPECT_EQ(false, cell->value());
+
+  cell->Set(true);
+  EXPECT_EQ(true, cell->value());
+
+  auto* same_cell = bool_gauge->GetCell();
+  EXPECT_EQ(true, cell->value());
+
+  same_cell->Set(false);
+  EXPECT_EQ(false, cell->value());
+  EXPECT_EQ(false, same_cell->value());
+}
+
 }  // namespace
 }  // namespace monitoring
 }  // namespace tensorflow
diff --git a/tensorflow/core/lib/monitoring/metric_def.h b/tensorflow/core/lib/monitoring/metric_def.h
index a7f14f9c94e67d7543382d59308ec0bd4445c190..f046842618a03f7a161a11d3b493b71be50ad988 100644
--- a/tensorflow/core/lib/monitoring/metric_def.h
+++ b/tensorflow/core/lib/monitoring/metric_def.h
@@ -28,16 +28,16 @@ namespace monitoring {
 // The different metric kinds available.
 //
 // Gauge indicates that the metric's values are instantaneous measurements of a
-// (typically) continuously varying quantity or a string value. Examples: a
-// process's current heap size, a queue's current length, the name of the binary
-// used by a process.
+// (typically) continuously varying value. Examples: a process's current heap
+// size, a queue's current length, the name of the binary used by a process,
+// whether a task is complete.
 //
 // Cumulative indicates that the metric's values represent non-negative changes
 // over specified time periods. Example: the number of rpc calls to a service.
 enum class MetricKind : int { kGauge = 0, kCumulative };
 
 // The type of the metric values.
-enum class ValueType : int { kInt64 = 0, kHistogram, kString };
+enum class ValueType : int { kInt64 = 0, kHistogram, kString, kBool };
 
 // Everything in the internal namespace is implementation details. Do not depend
 // on this.
@@ -61,6 +61,11 @@ inline ValueType GetValueType<string>() {
   return ValueType::kString;
 }
 
+template <>
+inline ValueType GetValueType<bool>() {
+  return ValueType::kBool;
+}
+
 }  // namespace internal
 
 // Abstract base class for a metric definition.
diff --git a/tensorflow/core/lib/random/random_distributions_test.cc b/tensorflow/core/lib/random/random_distributions_test.cc
index bd574cba2f38ee23aca3dda68b9def6025bdd36e..90d0dba4a7793f51472b2e5434489448eb40a498 100644
--- a/tensorflow/core/lib/random/random_distributions_test.cc
+++ b/tensorflow/core/lib/random/random_distributions_test.cc
@@ -18,9 +18,11 @@ limitations under the License.
 #include <math.h>
 #include <algorithm>
 #include <functional>
+#include <numeric>
 #include <unordered_map>
 #include <vector>
 
+#include "tensorflow/core/lib/math/math_util.h"
 #include "tensorflow/core/lib/random/philox_random.h"
 #include "tensorflow/core/lib/random/philox_random_test_utils.h"
 #include "tensorflow/core/lib/random/random.h"
@@ -104,12 +106,12 @@ bool CheckSamplesMoments(const std::vector<T>& samples,
 
   for (int i = 1; i <= max_moments; ++i) {
     // Calculate the theoretical mean and variance
-    const double moments_i_mean = (stride == 0)
-                                      ? theoretical_moments(i)
-                                      : std::pow(theoretical_moments(1), i);
-    const double moments_i_squared = (stride == 0)
-                                         ? theoretical_moments(2 * i)
-                                         : std::pow(theoretical_moments(2), i);
+    const double moments_i_mean =
+        (stride == 0) ? theoretical_moments(i)
+                      : MathUtil::IPow(theoretical_moments(1), i);
+    const double moments_i_squared =
+        (stride == 0) ? theoretical_moments(2 * i)
+                      : MathUtil::IPow(theoretical_moments(2), i);
     const double moments_i_var =
         moments_i_squared - moments_i_mean * moments_i_mean;
 
@@ -150,8 +152,8 @@ void UniformMomentsTest(int count, int max_moments,
   PhiloxRandom gen(seed);
   FillRandoms<UniformDistribution<PhiloxRandom, T> >(gen, &v1[0], v1.size());
   for (int stride : strides) {
-    bool status = CheckSamplesMoments<T>(v1, uniform_moments, max_moments,
-                                         stride, z_limit);
+    bool status =
+        CheckSamplesMoments(v1, uniform_moments, max_moments, stride, z_limit);
     ASSERT_TRUE(status) << " UniformMomentsTest failing. seed: " << seed;
   }
 }
@@ -182,8 +184,8 @@ void NormalMomentsTest(int count, int max_moments,
   FillRandoms<NormalDistribution<PhiloxRandom, T> >(gen, &v1[0], v1.size());
 
   for (int stride : strides) {
-    bool status = CheckSamplesMoments<T>(v1, normal_moments, max_moments,
-                                         stride, z_limit);
+    bool status =
+        CheckSamplesMoments(v1, normal_moments, max_moments, stride, z_limit);
     ASSERT_TRUE(status) << " NormalMomentsTest failing. seed: " << seed;
   }
 }
@@ -213,7 +215,7 @@ class TruncatedNormalMoments {
     }
 
     // The real computation of the moment.
-    double bias = 2.0 * std::pow(kV, n - 1) * kFV / (2.0 * kPhiV - 1.0);
+    double bias = 2.0 * MathUtil::IPow(kV, n - 1) * kFV / (2.0 * kPhiV - 1.0);
     double moment_n_minus_2 = (*this)(n - 2);
     double moment_n = (n - 1) * moment_n_minus_2 - bias;
 
@@ -244,8 +246,8 @@ void RandomParametersMomentsTest(int count, int max_moments,
       gen, &v1[0], v1.size());
 
   for (int stride : strides) {
-    bool status = CheckSamplesMoments<T>(v1, TruncatedNormalMoments(),
-                                         max_moments, stride, z_limit);
+    bool status = CheckSamplesMoments(v1, TruncatedNormalMoments(), max_moments,
+                                      stride, z_limit);
     ASSERT_TRUE(status) << " NormalMomentsTest failing. seed: " << seed;
   }
 }
diff --git a/tensorflow/core/lib/strings/numbers.cc b/tensorflow/core/lib/strings/numbers.cc
index b3cca504e1d0f04ccf56bd517426e8434b57e3b6..f5822fad8e3d3b8559d19c79ee2885e580ea3e11 100644
--- a/tensorflow/core/lib/strings/numbers.cc
+++ b/tensorflow/core/lib/strings/numbers.cc
@@ -23,9 +23,6 @@ limitations under the License.
 #include <locale>
 #include <unordered_map>
 
-#include "double-conversion/double-conversion.h"
-
-#include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
@@ -35,15 +32,74 @@ namespace tensorflow {
 
 namespace {
 
-static inline const double_conversion::StringToDoubleConverter& StringToFloatConverter() {
-    const static double_conversion::StringToDoubleConverter converter(
-        double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES
-        | double_conversion::StringToDoubleConverter::ALLOW_HEX
-        | double_conversion::StringToDoubleConverter::ALLOW_TRAILING_SPACES
-        | double_conversion::StringToDoubleConverter::ALLOW_CASE_INSENSIBILITY,
-        0., 0., "inf", "nan"
-    );
-    return converter;
+template <typename T>
+T locale_independent_strtonum(const char* str, const char** endptr) {
+  static const std::unordered_map<string, T> special_nums = {
+      {"inf", std::numeric_limits<T>::infinity()},
+      {"+inf", std::numeric_limits<T>::infinity()},
+      {"-inf", -std::numeric_limits<T>::infinity()},
+      {"infinity", std::numeric_limits<T>::infinity()},
+      {"+infinity", std::numeric_limits<T>::infinity()},
+      {"-infinity", -std::numeric_limits<T>::infinity()},
+      {"nan", std::numeric_limits<T>::quiet_NaN()},
+      {"+nan", std::numeric_limits<T>::quiet_NaN()},
+      {"-nan", -std::numeric_limits<T>::quiet_NaN()},
+  };
+  std::stringstream s(str);
+
+  // Check if str is one of the special numbers.
+  string special_num_str;
+  s >> special_num_str;
+
+  for (int i = 0; i < special_num_str.length(); ++i) {
+    special_num_str[i] =
+        std::tolower(special_num_str[i], std::locale::classic());
+  }
+
+  auto entry = special_nums.find(special_num_str);
+  if (entry != special_nums.end()) {
+    *endptr = str + (s.eof() ? static_cast<std::iostream::pos_type>(strlen(str))
+                             : s.tellg());
+    return entry->second;
+  } else {
+    // Perhaps it's a hex number
+    if (special_num_str.compare(0, 2, "0x") == 0 ||
+        special_num_str.compare(0, 3, "-0x") == 0) {
+      return strtol(str, const_cast<char**>(endptr), 16);
+    }
+  }
+  // Reset the stream
+  s.str(str);
+  s.clear();
+  // Use the "C" locale
+  s.imbue(std::locale::classic());
+
+  T result;
+  s >> result;
+
+  // Set to result to what strto{f,d} functions would have returned. If the
+  // number was outside the range, the stringstream sets the fail flag, but
+  // returns the +/-max() value, whereas strto{f,d} functions return +/-INF.
+  if (s.fail()) {
+    if (result == std::numeric_limits<T>::max() ||
+        result == std::numeric_limits<T>::infinity()) {
+      result = std::numeric_limits<T>::infinity();
+      s.clear(s.rdstate() & ~std::ios::failbit);
+    } else if (result == -std::numeric_limits<T>::max() ||
+               result == -std::numeric_limits<T>::infinity()) {
+      result = -std::numeric_limits<T>::infinity();
+      s.clear(s.rdstate() & ~std::ios::failbit);
+    }
+  }
+
+  if (endptr) {
+    *endptr =
+        str +
+        (s.fail() ? static_cast<std::iostream::pos_type>(0)
+                  : (s.eof() ? static_cast<std::iostream::pos_type>(strlen(str))
+                             : s.tellg()));
+  }
+  return result;
 }
 
 }  // namespace
@@ -111,8 +167,8 @@ char* DoubleToBuffer(double value, char* buffer) {
     // larger than the precision we asked for.
     DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize);
 
-    auto parsed_value = double{};
-    full_precision_needed = !safe_strtod(buffer, &parsed_value) || parsed_value != value;
+    full_precision_needed =
+        locale_independent_strtonum<double>(buffer, nullptr) != value;
   }
 
   if (full_precision_needed) {
@@ -248,23 +304,25 @@ bool safe_strtou32(StringPiece str, uint32* value) {
 }
 
 bool safe_strtof(const char* str, float* value) {
-  int processed_characters_count = -1;
-  auto len = str_util::Strnlen(str, kFastToBufferSize);
-  *value = StringToFloatConverter().StringToFloat(
-      str,
-      len,
-      &processed_characters_count);
-  return processed_characters_count > 0;
+  const char* endptr;
+  *value = locale_independent_strtonum<float>(str, &endptr);
+  while (isspace(*endptr)) ++endptr;
+  // Ignore range errors from strtod/strtof.
+  // The values it returns on underflow and
+  // overflow are the right fallback in a
+  // robust setting.
+  return *str != '\0' && *endptr == '\0';
 }
 
 bool safe_strtod(const char* str, double* value) {
-  int processed_characters_count = -1;
-  auto len = str_util::Strnlen(str, kFastToBufferSize);
-  *value = StringToFloatConverter().StringToDouble(
-      str,
-      len,
-      &processed_characters_count);
-  return processed_characters_count > 0;
+  const char* endptr;
+  *value = locale_independent_strtonum<double>(str, &endptr);
+  while (isspace(*endptr)) ++endptr;
+  // Ignore range errors from strtod/strtof.
+  // The values it returns on underflow and
+  // overflow are the right fallback in a
+  // robust setting.
+  return *str != '\0' && *endptr == '\0';
 }
 
 char* FloatToBuffer(float value, char* buffer) {
diff --git a/tensorflow/core/lib/strings/numbers.h b/tensorflow/core/lib/strings/numbers.h
index 31b6abbac682bf682c8043caafce0d38348b8f1a..3c45b9027401999ba4e6c32005456312970cccba 100644
--- a/tensorflow/core/lib/strings/numbers.h
+++ b/tensorflow/core/lib/strings/numbers.h
@@ -122,6 +122,38 @@ bool safe_strtof(const char* str, float* value);
 // Values may be rounded on over- and underflow.
 bool safe_strtod(const char* str, double* value);
 
+inline bool ProtoParseNumeric(StringPiece s, int32* value) {
+  return safe_strto32(s, value);
+}
+
+inline bool ProtoParseNumeric(StringPiece s, uint32* value) {
+  return safe_strtou32(s, value);
+}
+
+inline bool ProtoParseNumeric(StringPiece s, int64* value) {
+  return safe_strto64(s, value);
+}
+
+inline bool ProtoParseNumeric(StringPiece s, uint64* value) {
+  return safe_strtou64(s, value);
+}
+
+inline bool ProtoParseNumeric(StringPiece s, float* value) {
+  return safe_strtof(s.ToString().c_str(), value);
+}
+
+inline bool ProtoParseNumeric(StringPiece s, double* value) {
+  return safe_strtod(s.ToString().c_str(), value);
+}
+
+// Convert strings to number of type T.
+// Leading and trailing spaces are allowed.
+// Values may be rounded on over- and underflow.
+template <typename T>
+bool SafeStringToNumeric(StringPiece s, T* value) {
+  return ProtoParseNumeric(s, value);
+}
+
 // Converts from an int64 to a human readable string representing the
 // same number, using decimal powers.  e.g. 1200000 -> "1.20M".
 string HumanReadableNum(int64 value);
diff --git a/tensorflow/core/lib/strings/numbers_test.cc b/tensorflow/core/lib/strings/numbers_test.cc
index df395c301e04217fab42219570557a6905722292..e15161de66c75ced0c9cbc9ccb2a6900dc8c7d02 100644
--- a/tensorflow/core/lib/strings/numbers_test.cc
+++ b/tensorflow/core/lib/strings/numbers_test.cc
@@ -16,7 +16,6 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/numbers.h"
 
 #include <string>
-#include <cmath>
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
@@ -278,40 +277,7 @@ TEST(safe_strtof, Float) {
   EXPECT_TRUE(safe_strtof("-0x2A", &result));
   EXPECT_EQ(-42.0f, result);
 
-  EXPECT_TRUE(safe_strtof(" -0x2", &result));
-  EXPECT_EQ(-2.0f, result);
-
-  EXPECT_TRUE(safe_strtof("8 \t", &result));
-  EXPECT_EQ(8.0f, result);
-
-  EXPECT_TRUE(safe_strtof("\t20.0\t ", &result));
-  EXPECT_EQ(20.0f, result);
-
   EXPECT_FALSE(safe_strtof("-infinity is awesome", &result));
-
-  EXPECT_TRUE(safe_strtof("-inf", &result));
-  EXPECT_EQ(-std::numeric_limits<float>::infinity(), result);
-
-  EXPECT_TRUE(safe_strtof("+inf", &result));
-  EXPECT_EQ(std::numeric_limits<float>::infinity(), result);
-
-  EXPECT_TRUE(safe_strtof("InF", &result));
-  EXPECT_EQ(std::numeric_limits<float>::infinity(), result);
-
-  EXPECT_TRUE(safe_strtof("-INF", &result));
-  EXPECT_EQ(-std::numeric_limits<float>::infinity(), result);
-
-  EXPECT_TRUE(safe_strtof("nan", &result));
-  EXPECT_TRUE(std::isnan(result));
-
-  EXPECT_TRUE(safe_strtof("-nan", &result));
-  EXPECT_TRUE(std::isnan(result));
-
-  EXPECT_TRUE(safe_strtof("-NaN", &result));
-  EXPECT_TRUE(std::isnan(result));
-
-  EXPECT_TRUE(safe_strtof("+NAN", &result));
-  EXPECT_TRUE(std::isnan(result));
 }
 
 TEST(safe_strtod, Double) {
@@ -330,41 +296,6 @@ TEST(safe_strtod, Double) {
 
   EXPECT_TRUE(safe_strtod("1e-325", &result));
   EXPECT_EQ(0, result);
-
-  EXPECT_TRUE(safe_strtod(" -0x1c", &result));
-  EXPECT_EQ(-28.0, result);
-
-  EXPECT_TRUE(safe_strtod("50 \t", &result));
-  EXPECT_EQ(50.0, result);
-
-  EXPECT_TRUE(safe_strtod("\t82.0\t ", &result));
-  EXPECT_EQ(82.0, result);
-
-  EXPECT_FALSE(safe_strtod("infinity", &result));
-
-  EXPECT_TRUE(safe_strtod("-inf", &result));
-  EXPECT_EQ(-std::numeric_limits<double>::infinity(), result);
-
-  EXPECT_TRUE(safe_strtod("+inf", &result));
-  EXPECT_EQ(std::numeric_limits<double>::infinity(), result);
-
-  EXPECT_TRUE(safe_strtod("InF", &result));
-  EXPECT_EQ(std::numeric_limits<double>::infinity(), result);
-
-  EXPECT_TRUE(safe_strtod("-INF", &result));
-  EXPECT_EQ(-std::numeric_limits<double>::infinity(), result);
-
-  EXPECT_TRUE(safe_strtod("nan", &result));
-  EXPECT_TRUE(std::isnan(result));
-
-  EXPECT_TRUE(safe_strtod("-nan", &result));
-  EXPECT_TRUE(std::isnan(result));
-
-  EXPECT_TRUE(safe_strtod("-NaN", &result));
-  EXPECT_TRUE(std::isnan(result));
-
-  EXPECT_TRUE(safe_strtod("+NAN", &result));
-  EXPECT_TRUE(std::isnan(result));
 }
 
 }  // namespace strings
diff --git a/tensorflow/core/lib/strings/proto_text_util.h b/tensorflow/core/lib/strings/proto_text_util.h
index 3d0c6e4a376268e03c84270a869a4ec73b7c731d..ed6d0af0105c37e77debdab1db549d131752d615 100644
--- a/tensorflow/core/lib/strings/proto_text_util.h
+++ b/tensorflow/core/lib/strings/proto_text_util.h
@@ -118,30 +118,6 @@ class ProtoTextOutput {
   TF_DISALLOW_COPY_AND_ASSIGN(ProtoTextOutput);
 };
 
-inline bool ProtoParseNumeric(StringPiece s, int32* value) {
-  return ::tensorflow::strings::safe_strto32(s, value);
-}
-
-inline bool ProtoParseNumeric(StringPiece s, uint32* value) {
-  return ::tensorflow::strings::safe_strtou32(s, value);
-}
-
-inline bool ProtoParseNumeric(StringPiece s, int64* value) {
-  return ::tensorflow::strings::safe_strto64(s, value);
-}
-
-inline bool ProtoParseNumeric(StringPiece s, uint64* value) {
-  return ::tensorflow::strings::safe_strtou64(s, value);
-}
-
-inline bool ProtoParseNumeric(StringPiece s, float* value) {
-  return ::tensorflow::strings::safe_strtof(s.ToString().c_str(), value);
-}
-
-inline bool ProtoParseNumeric(StringPiece s, double* value) {
-  return ::tensorflow::strings::safe_strtod(s.ToString().c_str(), value);
-}
-
 inline void ProtoSpaceAndComments(Scanner* scanner) {
   for (;;) {
     scanner->AnySpace();
@@ -174,7 +150,7 @@ bool ProtoParseNumericFromScanner(Scanner* scanner, T* value) {
   }
 
   ProtoSpaceAndComments(scanner);
-  return ProtoParseNumeric(numeric_str, value);
+  return SafeStringToNumeric<T>(numeric_str, value);
 }
 
 // Parse the next boolean value from <scanner>, returning false if parsing
diff --git a/tensorflow/core/lib/strings/str_util.cc b/tensorflow/core/lib/strings/str_util.cc
index 0ae6c66080a4686127237101151ed66779d3b38b..d28857803d7ef1edd66ae6c1a6b81a7ed1dbce85 100644
--- a/tensorflow/core/lib/strings/str_util.cc
+++ b/tensorflow/core/lib/strings/str_util.cc
@@ -452,13 +452,5 @@ bool SplitAndParseAsFloats(StringPiece text, char delim,
                                     result);
 }
 
-size_t Strnlen(const char* str, const size_t string_max_len) {
-  size_t len = 0;
-  while (len < string_max_len && str[len] != '\0') {
-    ++len;
-  }
-  return len;
-}
-
 }  // namespace str_util
 }  // namespace tensorflow
diff --git a/tensorflow/core/lib/strings/str_util.h b/tensorflow/core/lib/strings/str_util.h
index b0d774a05ce445b794cf631776970ad273fda0d5..44c52850fa99f7688fb496784a18b651c147bb8b 100644
--- a/tensorflow/core/lib/strings/str_util.h
+++ b/tensorflow/core/lib/strings/str_util.h
@@ -83,7 +83,7 @@ string Uppercase(StringPiece s);
 
 // Converts "^2ILoveYou!" to "i_love_you_". More specifically:
 // - converts all non-alphanumeric characters to underscores
-// - replaces each occurence of a capital letter (except the very
+// - replaces each occurrence of a capital letter (except the very
 //   first character and if there is already an '_' before it) with '_'
 //   followed by this letter in lower case
 // - Skips leading non-alpha characters
@@ -209,11 +209,6 @@ std::vector<string> Split(StringPiece text, char delims, Predicate p) {
   return Split(text, StringPiece(&delims, 1), p);
 }
 
-// Returns the length of the given null-terminated byte string 'str'.
-// Returns 'string_max_len' if the null character was not found in the first
-// 'string_max_len' bytes of 'str'.
-size_t Strnlen(const char* str, const size_t string_max_len);
-
 }  // namespace str_util
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/lib/strings/str_util_test.cc b/tensorflow/core/lib/strings/str_util_test.cc
index 3a8de7c96b5fd01be24c981edadfd92a3f7d44b7..6d461241f7e9c5a29064c015991039d5bf95a80f 100644
--- a/tensorflow/core/lib/strings/str_util_test.cc
+++ b/tensorflow/core/lib/strings/str_util_test.cc
@@ -305,7 +305,7 @@ TEST(SplitAndParseAsInts, Int64) {
   EXPECT_EQ(nums[0], 134);
   EXPECT_EQ(nums[1], 2);
   EXPECT_EQ(nums[2], 13);
-  EXPECT_EQ(nums[3], -4000000000);
+  EXPECT_EQ(nums[3], static_cast<int64>(-4000000000ull));
 
   EXPECT_FALSE(str_util::SplitAndParseAsInts("abc", ',', &nums));
 
@@ -430,12 +430,4 @@ TEST(StringReplace, EmptyStringReplaceAll) {
   EXPECT_EQ("", str_util::StringReplace("", "a", "X", /*replace_all=*/true));
 }
 
-TEST(Strnlen, Basic) {
-  EXPECT_EQ(0, str_util::Strnlen("ab", 0));
-  EXPECT_EQ(1, str_util::Strnlen("a", 1));
-  EXPECT_EQ(2, str_util::Strnlen("abcd", 2));
-  EXPECT_EQ(3, str_util::Strnlen("abc", 10));
-  EXPECT_EQ(4, str_util::Strnlen("a \t\n", 10));
-}
-
 }  // namespace tensorflow
diff --git a/tensorflow/core/lib/strings/strcat.h b/tensorflow/core/lib/strings/strcat.h
index 8e35549ed4bdd9afa497011c1f10504b59a0f350..5835b0101d9ede219a71acf554c5928e4b624ce7 100644
--- a/tensorflow/core/lib/strings/strcat.h
+++ b/tensorflow/core/lib/strings/strcat.h
@@ -119,6 +119,9 @@ class AlphaNum {
 
   AlphaNum(float f)  // NOLINT(runtime/explicit)
       : piece_(digits_, strlen(FloatToBuffer(f, digits_))) {}
+  AlphaNum(bfloat16 f)  // NOLINT(runtime/explicit)
+      : piece_(digits_, strlen(FloatToBuffer(static_cast<float>(f), digits_))) {
+  }
   AlphaNum(double f)  // NOLINT(runtime/explicit)
       : piece_(digits_, strlen(DoubleToBuffer(f, digits_))) {}
 
diff --git a/tensorflow/core/lib/strings/strcat_test.cc b/tensorflow/core/lib/strings/strcat_test.cc
index c556b1f676b24caefdc1ad9eb9cbcaa08e943a8e..7cb186e6375fae4d8a7140dd2f9ee6e7e64ddd1a 100644
--- a/tensorflow/core/lib/strings/strcat_test.cc
+++ b/tensorflow/core/lib/strings/strcat_test.cc
@@ -46,19 +46,19 @@ TEST(StrCat, Ints) {
   const intptr_t intptr = -12;
   const uintptr_t uintptr = 13;
   string answer;
-  answer = StrCat(s, us);
+  answer = tensorflow::strings::StrCat(s, us);
   EXPECT_EQ(answer, "-12");
-  answer = StrCat(i, ui);
+  answer = tensorflow::strings::StrCat(i, ui);
   EXPECT_EQ(answer, "-34");
-  answer = StrCat(l, ul);
+  answer = tensorflow::strings::StrCat(l, ul);
   EXPECT_EQ(answer, "-56");
-  answer = StrCat(ll, ull);
+  answer = tensorflow::strings::StrCat(ll, ull);
   EXPECT_EQ(answer, "-78");
-  answer = StrCat(ptrdiff, size);
+  answer = tensorflow::strings::StrCat(ptrdiff, size);
   EXPECT_EQ(answer, "-910");
-  answer = StrCat(ssize, intptr);
+  answer = tensorflow::strings::StrCat(ssize, intptr);
   EXPECT_EQ(answer, "-11-12");
-  answer = StrCat(uintptr, 0);
+  answer = tensorflow::strings::StrCat(uintptr, 0);
   EXPECT_EQ(answer, "130");
 }
 
@@ -74,118 +74,137 @@ TEST(StrCat, Basics) {
   int32 i32s[] = {'H', 'C', 'W'};
   uint64 ui64s[] = {12345678910LL, 10987654321LL};
 
-  result = StrCat(false, true, 2, 3);
+  result = tensorflow::strings::StrCat(false, true, 2, 3);
   EXPECT_EQ(result, "0123");
 
-  result = StrCat(-1);
+  result = tensorflow::strings::StrCat(-1);
   EXPECT_EQ(result, "-1");
 
-  result = StrCat(0.5);
+  result = tensorflow::strings::StrCat(0.5);
   EXPECT_EQ(result, "0.5");
 
-  result = StrCat(strs[1], pieces[2]);
+  result = tensorflow::strings::StrCat(strs[1], pieces[2]);
   EXPECT_EQ(result, "CruelWorld");
 
-  result = StrCat(strs[0], ", ", pieces[2]);
+  result = tensorflow::strings::StrCat(strs[0], ", ", pieces[2]);
   EXPECT_EQ(result, "Hello, World");
 
-  result = StrCat(strs[0], ", ", strs[1], " ", strs[2], "!");
+  result =
+      tensorflow::strings::StrCat(strs[0], ", ", strs[1], " ", strs[2], "!");
   EXPECT_EQ(result, "Hello, Cruel World!");
 
-  result = StrCat(pieces[0], ", ", pieces[1], " ", pieces[2]);
+  result =
+      tensorflow::strings::StrCat(pieces[0], ", ", pieces[1], " ", pieces[2]);
   EXPECT_EQ(result, "Hello, Cruel World");
 
-  result = StrCat(c_strs[0], ", ", c_strs[1], " ", c_strs[2]);
+  result =
+      tensorflow::strings::StrCat(c_strs[0], ", ", c_strs[1], " ", c_strs[2]);
   EXPECT_EQ(result, "Hello, Cruel World");
 
-  result = StrCat("ASCII ", i32s[0], ", ", i32s[1], " ", i32s[2], "!");
+  result = tensorflow::strings::StrCat("ASCII ", i32s[0], ", ", i32s[1], " ",
+                                       i32s[2], "!");
   EXPECT_EQ(result, "ASCII 72, 67 87!");
 
-  result = StrCat(ui64s[0], ", ", ui64s[1], "!");
+  result = tensorflow::strings::StrCat(ui64s[0], ", ", ui64s[1], "!");
   EXPECT_EQ(result, "12345678910, 10987654321!");
 
   string one = "1";  // Actually, it's the size of this string that we want; a
                      // 64-bit build distinguishes between size_t and uint64,
                      // even though they're both unsigned 64-bit values.
-  result = StrCat("And a ", one.size(), " and a ", &result[2] - &result[0],
-                  " and a ", one, " 2 3 4", "!");
+  result = tensorflow::strings::StrCat("And a ", one.size(), " and a ",
+                                       &result[2] - &result[0], " and a ", one,
+                                       " 2 3 4", "!");
   EXPECT_EQ(result, "And a 1 and a 2 and a 1 2 3 4!");
 
   // result = StrCat("Single chars won't compile", '!');
   // result = StrCat("Neither will NULLs", NULL);
-  result = StrCat("To output a char by ASCII/numeric value, use +: ", '!' + 0);
+  result = tensorflow::strings::StrCat(
+      "To output a char by ASCII/numeric value, use +: ", '!' + 0);
   EXPECT_EQ(result, "To output a char by ASCII/numeric value, use +: 33");
 
   float f = 100000.5;
-  result = StrCat("A hundred K and a half is ", f);
+  result = tensorflow::strings::StrCat("A hundred K and a half is ", f);
   EXPECT_EQ(result, "A hundred K and a half is 100000.5");
 
   double d = f;
   d *= d;
-  result = StrCat("A hundred K and a half squared is ", d);
+  result = tensorflow::strings::StrCat("A hundred K and a half squared is ", d);
   EXPECT_EQ(result, "A hundred K and a half squared is 10000100000.25");
 
   Eigen::half h(10007.0f);
-  result = StrCat("Ten thousand seven is approximately ", h);
+  result =
+      tensorflow::strings::StrCat("Ten thousand seven is approximately ", h);
   EXPECT_EQ(result, "Ten thousand seven is approximately 10008");
 
-  result = StrCat(1, 2, 333, 4444, 55555, 666666, 7777777, 88888888, 999999999);
+  result = tensorflow::strings::StrCat(1, 2, 333, 4444, 55555, 666666, 7777777,
+                                       88888888, 999999999);
   EXPECT_EQ(result, "12333444455555666666777777788888888999999999");
 }
 
 TEST(StrCat, MaxArgs) {
   string result;
   // Test 10 up to 26 arguments, the current maximum
-  result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a");
+  result = tensorflow::strings::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a");
   EXPECT_EQ(result, "123456789a");
-  result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b");
+  result = tensorflow::strings::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b");
   EXPECT_EQ(result, "123456789ab");
-  result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c");
+  result =
+      tensorflow::strings::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c");
   EXPECT_EQ(result, "123456789abc");
-  result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d");
+  result = tensorflow::strings::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c",
+                                       "d");
   EXPECT_EQ(result, "123456789abcd");
-  result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e");
+  result = tensorflow::strings::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c",
+                                       "d", "e");
   EXPECT_EQ(result, "123456789abcde");
-  result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f");
+  result = tensorflow::strings::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c",
+                                       "d", "e", "f");
   EXPECT_EQ(result, "123456789abcdef");
-  result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", "g");
+  result = tensorflow::strings::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c",
+                                       "d", "e", "f", "g");
   EXPECT_EQ(result, "123456789abcdefg");
-  result =
-      StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", "g", "h");
+  result = tensorflow::strings::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c",
+                                       "d", "e", "f", "g", "h");
   EXPECT_EQ(result, "123456789abcdefgh");
-  result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", "g",
-                  "h", "i");
+  result = tensorflow::strings::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c",
+                                       "d", "e", "f", "g", "h", "i");
   EXPECT_EQ(result, "123456789abcdefghi");
-  result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", "g",
-                  "h", "i", "j");
+  result = tensorflow::strings::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c",
+                                       "d", "e", "f", "g", "h", "i", "j");
   EXPECT_EQ(result, "123456789abcdefghij");
-  result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", "g",
-                  "h", "i", "j", "k");
+  result = tensorflow::strings::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c",
+                                       "d", "e", "f", "g", "h", "i", "j", "k");
   EXPECT_EQ(result, "123456789abcdefghijk");
-  result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", "g",
-                  "h", "i", "j", "k", "l");
+  result =
+      tensorflow::strings::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d",
+                                  "e", "f", "g", "h", "i", "j", "k", "l");
   EXPECT_EQ(result, "123456789abcdefghijkl");
-  result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", "g",
-                  "h", "i", "j", "k", "l", "m");
+  result =
+      tensorflow::strings::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d",
+                                  "e", "f", "g", "h", "i", "j", "k", "l", "m");
   EXPECT_EQ(result, "123456789abcdefghijklm");
-  result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", "g",
-                  "h", "i", "j", "k", "l", "m", "n");
+  result = tensorflow::strings::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c",
+                                       "d", "e", "f", "g", "h", "i", "j", "k",
+                                       "l", "m", "n");
   EXPECT_EQ(result, "123456789abcdefghijklmn");
-  result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", "g",
-                  "h", "i", "j", "k", "l", "m", "n", "o");
+  result = tensorflow::strings::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c",
+                                       "d", "e", "f", "g", "h", "i", "j", "k",
+                                       "l", "m", "n", "o");
   EXPECT_EQ(result, "123456789abcdefghijklmno");
-  result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", "g",
-                  "h", "i", "j", "k", "l", "m", "n", "o", "p");
+  result = tensorflow::strings::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c",
+                                       "d", "e", "f", "g", "h", "i", "j", "k",
+                                       "l", "m", "n", "o", "p");
   EXPECT_EQ(result, "123456789abcdefghijklmnop");
-  result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", "g",
-                  "h", "i", "j", "k", "l", "m", "n", "o", "p", "q");
+  result = tensorflow::strings::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c",
+                                       "d", "e", "f", "g", "h", "i", "j", "k",
+                                       "l", "m", "n", "o", "p", "q");
   EXPECT_EQ(result, "123456789abcdefghijklmnopq");
   // No limit thanks to C++11's variadic templates
-  result = StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, "a", "b", "c", "d", "e", "f",
-                  "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r",
-                  "s", "t", "u", "v", "w", "x", "y", "z", "A", "B", "C", "D",
-                  "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P",
-                  "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z");
+  result = tensorflow::strings::StrCat(
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, "a", "b", "c", "d", "e", "f", "g", "h",
+      "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w",
+      "x", "y", "z", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L",
+      "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z");
   EXPECT_EQ(result,
             "12345678910abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
 }
@@ -203,78 +222,85 @@ TEST(StrAppend, Basics) {
   uint64 ui64s[] = {12345678910LL, 10987654321LL};
 
   string::size_type old_size = result.size();
-  StrAppend(&result, strs[0]);
+  tensorflow::strings::StrAppend(&result, strs[0]);
   EXPECT_EQ(result.substr(old_size), "Hello");
 
   old_size = result.size();
-  StrAppend(&result, strs[1], pieces[2]);
+  tensorflow::strings::StrAppend(&result, strs[1], pieces[2]);
   EXPECT_EQ(result.substr(old_size), "CruelWorld");
 
   old_size = result.size();
-  StrAppend(&result, strs[0], ", ", pieces[2]);
+  tensorflow::strings::StrAppend(&result, strs[0], ", ", pieces[2]);
   EXPECT_EQ(result.substr(old_size), "Hello, World");
 
   old_size = result.size();
-  StrAppend(&result, strs[0], ", ", strs[1], " ", strs[2], "!");
+  tensorflow::strings::StrAppend(&result, strs[0], ", ", strs[1], " ", strs[2],
+                                 "!");
   EXPECT_EQ(result.substr(old_size), "Hello, Cruel World!");
 
   old_size = result.size();
-  StrAppend(&result, pieces[0], ", ", pieces[1], " ", pieces[2]);
+  tensorflow::strings::StrAppend(&result, pieces[0], ", ", pieces[1], " ",
+                                 pieces[2]);
   EXPECT_EQ(result.substr(old_size), "Hello, Cruel World");
 
   old_size = result.size();
-  StrAppend(&result, c_strs[0], ", ", c_strs[1], " ", c_strs[2]);
+  tensorflow::strings::StrAppend(&result, c_strs[0], ", ", c_strs[1], " ",
+                                 c_strs[2]);
   EXPECT_EQ(result.substr(old_size), "Hello, Cruel World");
 
   old_size = result.size();
-  StrAppend(&result, "ASCII ", i32s[0], ", ", i32s[1], " ", i32s[2], "!");
+  tensorflow::strings::StrAppend(&result, "ASCII ", i32s[0], ", ", i32s[1], " ",
+                                 i32s[2], "!");
   EXPECT_EQ(result.substr(old_size), "ASCII 72, 67 87!");
 
   old_size = result.size();
-  StrAppend(&result, ui64s[0], ", ", ui64s[1], "!");
+  tensorflow::strings::StrAppend(&result, ui64s[0], ", ", ui64s[1], "!");
   EXPECT_EQ(result.substr(old_size), "12345678910, 10987654321!");
 
   string one = "1";  // Actually, it's the size of this string that we want; a
                      // 64-bit build distinguishes between size_t and uint64,
                      // even though they're both unsigned 64-bit values.
   old_size = result.size();
-  StrAppend(&result, "And a ", one.size(), " and a ", &result[2] - &result[0],
-            " and a ", one, " 2 3 4", "!");
+  tensorflow::strings::StrAppend(&result, "And a ", one.size(), " and a ",
+                                 &result[2] - &result[0], " and a ", one,
+                                 " 2 3 4", "!");
   EXPECT_EQ(result.substr(old_size), "And a 1 and a 2 and a 1 2 3 4!");
 
   // result = StrCat("Single chars won't compile", '!');
   // result = StrCat("Neither will NULLs", NULL);
   old_size = result.size();
-  StrAppend(&result, "To output a char by ASCII/numeric value, use +: ",
-            '!' + 0);
+  tensorflow::strings::StrAppend(
+      &result, "To output a char by ASCII/numeric value, use +: ", '!' + 0);
   EXPECT_EQ(result.substr(old_size),
             "To output a char by ASCII/numeric value, use +: 33");
 
   float f = 100000.5;
   old_size = result.size();
-  StrAppend(&result, "A hundred K and a half is ", f);
+  tensorflow::strings::StrAppend(&result, "A hundred K and a half is ", f);
   EXPECT_EQ(result.substr(old_size), "A hundred K and a half is 100000.5");
 
   double d = f;
   d *= d;
   old_size = result.size();
-  StrAppend(&result, "A hundred K and a half squared is ", d);
+  tensorflow::strings::StrAppend(&result, "A hundred K and a half squared is ",
+                                 d);
   EXPECT_EQ(result.substr(old_size),
             "A hundred K and a half squared is 10000100000.25");
 
   // Test 9 arguments, the old maximum
   old_size = result.size();
-  StrAppend(&result, 1, 22, 333, 4444, 55555, 666666, 7777777, 88888888, 9);
+  tensorflow::strings::StrAppend(&result, 1, 22, 333, 4444, 55555, 666666,
+                                 7777777, 88888888, 9);
   EXPECT_EQ(result.substr(old_size), "1223334444555556666667777777888888889");
 
   // No limit thanks to C++11's variadic templates
   old_size = result.size();
-  StrAppend(&result, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, "a", "b", "c", "d", "e",
-            "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r",
-            "s", "t", "u", "v", "w", "x", "y", "z", "A", "B", "C", "D", "E",
-            "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R",
-            "S", "T", "U", "V", "W", "X", "Y", "Z",
-            "No limit thanks to C++11's variadic templates");
+  tensorflow::strings::StrAppend(
+      &result, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, "a", "b", "c", "d", "e", "f", "g",
+      "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v",
+      "w", "x", "y", "z", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K",
+      "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
+      "No limit thanks to C++11's variadic templates");
   EXPECT_EQ(result.substr(old_size),
             "12345678910abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
             "No limit thanks to C++11's variadic templates");
@@ -282,8 +308,8 @@ TEST(StrAppend, Basics) {
 
 TEST(StrAppend, Death) {
   string s = "self";
-  EXPECT_DEBUG_DEATH(StrAppend(&s, s.c_str() + 1), "Check failed:");
-  EXPECT_DEBUG_DEATH(StrAppend(&s, s), "Check failed:");
+  EXPECT_DEBUG_DEATH(strings::StrAppend(&s, s.c_str() + 1), "Check failed:");
+  EXPECT_DEBUG_DEATH(strings::StrAppend(&s, s), "Check failed:");
 }
 
 static void CheckHex64(uint64 v) {
diff --git a/tensorflow/core/ops/array_grad.cc b/tensorflow/core/ops/array_grad.cc
index 325dbc48835d2f975ecd2530486be239fdcf96c6..38bd851da89357238360dcb3dd465b5e4f6a5fdd 100644
--- a/tensorflow/core/ops/array_grad.cc
+++ b/tensorflow/core/ops/array_grad.cc
@@ -333,6 +333,25 @@ Status TransposeGrad(const AttrSlice& attrs, FunctionDef* g) {
 }
 REGISTER_OP_GRADIENT("Transpose", TransposeGrad);
 
+Status ConjugateTransposeGrad(const AttrSlice& attrs, FunctionDef* g) {
+  *g = FDH::Define(
+      // Arg defs
+      {"x: T", "p: int32", "dy: T"},
+      // Ret val defs
+      {"dx: T", "dp: int32"},
+      // Attr defs
+      {"T: type"},
+      // Nodes
+      {
+          {{"q"}, "InvertPermutation", {"p"}, {}},
+          {{"dx"}, "ConjugateTranspose", {"dy", "q"}, {{"T", "$T"}}},
+          {{"dp"}, "ZerosLike", {"p"}, {{"T", DT_INT32}}},
+      });
+  VLOG(1) << "ConjugateTransposeGrad " << DebugString(*g);
+  return Status::OK();
+}
+REGISTER_OP_GRADIENT("ConjugateTranspose", ConjugateTransposeGrad);
+
 Status ReverseGrad(const AttrSlice& attrs, FunctionDef* g) {
   *g = FDH::Define(
       // Arg defs
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 9fa6423d59d37dfb2a1086305c7e7dc7e5b2ebd2..279a5876f962bb32b09a4b832794dfdcfffc6d46 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -261,33 +261,7 @@ REGISTER_OP("ParallelConcat")
       c->set_output(0, passed_shape);
 
       return Status::OK();
-    })
-    .Doc(R"doc(
-Concatenates a list of `N` tensors along the first dimension.
-
-The input tensors are all required to have size 1 in the first dimension.
-
-For example:
-
-```
-# 'x' is [[1, 4]]
-# 'y' is [[2, 5]]
-# 'z' is [[3, 6]]
-parallel_concat([x, y, z]) => [[1, 4], [2, 5], [3, 6]]  # Pack along first dim.
-```
-
-The difference between concat and parallel_concat is that concat requires all
-of the inputs be computed before the operation will begin but doesn't require
-that the input shapes be known during graph construction.  Parallel concat
-will copy pieces of the input into the output as they become available, in
-some situations this can provide a performance benefit.
-
-values: Tensors to be concatenated. All must have size 1 in the first dimension
- and same shape.
-output: The concatenated tensor.
-shape: the final shape of the result; should be equal to the shapes of any input
- but with the number of input values in the first dimension.
-)doc");
+    });
 
 REGISTER_OP("Pack")
     .Input("values: N * T")
@@ -323,35 +297,7 @@ REGISTER_OP("Pack")
 
       c->set_output(0, c->MakeShape(dims));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Packs a list of `N` rank-`R` tensors into one rank-`(R+1)` tensor.
-
-Packs the `N` tensors in `values` into a tensor with rank one higher than each
-tensor in `values`, by packing them along the `axis` dimension.
-Given a list of tensors of shape `(A, B, C)`;
-
-if `axis == 0` then the `output` tensor will have the shape `(N, A, B, C)`.
-if `axis == 1` then the `output` tensor will have the shape `(A, N, B, C)`.
-Etc.
-
-For example:
-
-```
-# 'x' is [1, 4]
-# 'y' is [2, 5]
-# 'z' is [3, 6]
-pack([x, y, z]) => [[1, 4], [2, 5], [3, 6]]  # Pack along first dim.
-pack([x, y, z], axis=1) => [[1, 2, 3], [4, 5, 6]]
-```
-
-This is the opposite of `unpack`.
-
-values: Must be of same shape and type.
-axis: Dimension along which to pack.  Negative values wrap around, so the
-  valid range is `[-(R+1), R+1)`.
-output: The packed tensor.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Unpack")
@@ -387,28 +333,7 @@ REGISTER_OP("Unpack")
       }
       for (int i = 0; i < c->num_outputs(); ++i) c->set_output(i, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Unpacks a given dimension of a rank-`R` tensor into `num` rank-`(R-1)` tensors.
-
-Unpacks `num` tensors from `value` by chipping it along the `axis` dimension.
-For example, given a tensor of shape `(A, B, C, D)`;
-
-If `axis == 0` then the i'th tensor in `output` is the slice `value[i, :, :, :]`
-  and each tensor in `output` will have shape `(B, C, D)`. (Note that the
-  dimension unpacked along is gone, unlike `split`).
-
-If `axis == 1` then the i'th tensor in `output` is the slice `value[:, i, :, :]`
-  and each tensor in `output` will have shape `(A, C, D)`.
-Etc.
-
-This is the opposite of `pack`.
-
-value: 1-D or higher, with `axis` dimension size equal to `num`.
-axis: Dimension along which to unpack.  Negative values wrap around, so the
-  valid range is `[-R, R)`.
-output: The list of tensors unpacked from `value`.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 // TODO(josh11b): Remove the >= 2 constraint, once we can rewrite the graph
@@ -421,18 +346,7 @@ REGISTER_OP("Concat")
     .Attr("T: type")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::ConcatShape(c, c->num_inputs() - 1);
-    })
-    .Doc(R"doc(
-Concatenates tensors along one dimension.
-
-concat_dim: 0-D.  The dimension along which to concatenate.  Must be in the
-  range [0, rank(values)).
-values: The `N` Tensors to concatenate. Their ranks and types must match,
-  and their sizes must match in all dimensions except `concat_dim`.
-output: A `Tensor` with the concatenation of values stacked along the
-  `concat_dim` dimension.  This tensor's shape matches that of `values` except
-  in `concat_dim` where it has the sum of the sizes.
-)doc");
+    });
 
 REGISTER_OP("ConcatV2")
     .Input("values: N * T")
@@ -441,18 +355,7 @@ REGISTER_OP("ConcatV2")
     .Attr("N: int >= 2")
     .Attr("T: type")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(shape_inference::ConcatV2Shape)
-    .Doc(R"doc(
-Concatenates tensors along one dimension.
-
-values: List of `N` Tensors to concatenate. Their ranks and types must match,
-  and their sizes must match in all dimensions except `concat_dim`.
-axis: 0-D.  The dimension along which to concatenate.  Must be in the
-  range [-rank(values), rank(values)).
-output: A `Tensor` with the concatenation of values stacked along the
-  `concat_dim` dimension.  This tensor's shape matches that of `values` except
-  in `concat_dim` where it has the sum of the sizes.
-)doc");
+    .SetShapeFn(shape_inference::ConcatV2Shape);
 
 // TODO(vivek.v.rane@intel.com): Prefix the op names with underscore if the ops
 // are not to be made user-accessible.
@@ -486,26 +389,7 @@ REGISTER_OP("ConcatOffset")
         c->set_output(i - 1, c->input(i));
       }
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes offsets of concat inputs within its output.
-
-For example:
-
-```
-# 'x' is [2, 2, 7]
-# 'y' is [2, 3, 7]
-# 'z' is [2, 5, 7]
-concat_offset(2, [x, y, z]) => [0, 0, 0], [0, 2, 0], [0, 5, 0]
-```
-
-This is typically used by gradient computations for a concat operation.
-
-concat_dim: The dimension along which to concatenate.
-shape: The `N` int32 vectors representing shape of tensors being concatenated.
-offset: The `N` int32 vectors representing the starting offset
-        of input tensors within the concatenated output.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Split")
@@ -540,19 +424,7 @@ REGISTER_OP("Split")
       }
       for (int i = 0; i < num_split; ++i) c->set_output(i, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Splits a tensor into `num_split` tensors along one dimension.
-
-split_dim: 0-D.  The dimension along which to split.  Must be in the range
-  `[-rank(value), rank(value))`.
-num_split: The number of ways to split.  Must evenly divide
-  `value.shape[split_dim]`.
-value: The tensor to split.
-output: They are identically shaped tensors, whose shape matches that of `value`
-  except along `split_dim`, where their sizes are
-  `values.shape[split_dim] / num_split`.
-)doc");
+    });
 
 REGISTER_OP("SplitV")
     .Input("value: T")
@@ -647,20 +519,7 @@ REGISTER_OP("SplitV")
       }
 
       return Status::OK();
-    })
-    .Doc(R"doc(
-Splits a tensor into `num_split` tensors along one dimension.
-
-value: The tensor to split.
-size_splits: list containing the sizes of each output tensor along the split
-             dimension. Must sum to the dimension of value along split_dim.
-             Can contain one -1 indicating that dimension is to be inferred.
-split_dim: 0-D.  The dimension along which to split.  Must be in the range
-  `[-rank(value), rank(value))`.
-output: Tensors whose shape matches that of `value`
-  except along `split_dim`, where their sizes are
-  `size_splits[i]`.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Const")
@@ -679,12 +538,7 @@ REGISTER_OP("Const")
       }
       c->set_output(0, c->MakeShape(dims));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Returns a constant tensor.
-
-value: Attr `value` is the tensor to return.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 // TODO(mgubin): Update the doc when the freeze_graph script supports converting
@@ -694,51 +548,39 @@ REGISTER_OP("ImmutableConst")
     .Attr("shape: shape")
     .Attr("memory_region_name: string")
     .Output("tensor: dtype")
-    .SetShapeFn(shape_inference::ExplicitShape)
-    .Doc(R"doc(
-Returns immutable tensor from memory region.
-
-The current implementation memmaps the tensor from a file.
+    .SetShapeFn(shape_inference::ExplicitShape);
 
-dtype: Type of the returned tensor.
-shape: Shape of the returned tensor.
-memory_region_name: Name of readonly memory region used by the tensor, see
-  NewReadOnlyMemoryRegionFromFile in tensorflow::Env.
-)doc");
+REGISTER_OP("GuaranteeConst")
+    .Input("input: T")
+    .Output("output: T")
+    .Attr("T: type")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      return UnchangedShape(c);
+    })
+    // We don't want this to be optimized away.
+    .SetIsStateful();
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ZerosLike")
     .Input("x: T")
     .Output("y: T")
     .Attr("T: type")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Returns a tensor of zeros with the same shape and type as x.
-
-x: a tensor of type T.
-y: a tensor of the same shape and type as x but filled with zeros.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("OnesLike")
     .Input("x: T")
     .Output("y: T")
     .Attr(
-        "T: {float, double, int8, uint8, int16, uint16, int32, int64, "
-        "complex64, complex128, bool}")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Returns a tensor of ones with the same shape and type as x.
-
-x: a tensor of type T.
-y: a tensor of the same shape and type as x but filled with ones.
-)doc");
+        "T: {bfloat16, float, double, int8, uint8, int16, uint16, int32, "
+        "int64, complex64, complex128, bool}")
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Diag")
     .Input("diagonal: T")
     .Output("output: T")
-    .Attr("T: {float, double, int32, int64, complex64, complex128}")
+    .Attr("T: {bfloat16, float, double, int32, int64, complex64, complex128}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle in = c->input(0);
       TF_RETURN_IF_ERROR(c->WithRankAtLeast(in, 1, &in));
@@ -747,36 +589,13 @@ REGISTER_OP("Diag")
       TF_RETURN_IF_ERROR(c->Concatenate(in, in, &out));
       c->set_output(0, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Returns a diagonal tensor with a given diagonal values.
-
-Given a `diagonal`, this operation returns a tensor with the `diagonal` and
-everything else padded with zeros. The diagonal is computed as follows:
-
-Assume `diagonal` has dimensions [D1,..., Dk], then the output is a tensor of
-rank 2k with dimensions [D1,..., Dk, D1,..., Dk] where:
-
-`output[i1,..., ik, i1,..., ik] = diagonal[i1, ..., ik]` and 0 everywhere else.
-
-For example:
-
-```
-# 'diagonal' is [1, 2, 3, 4]
-tf.diag(diagonal) ==> [[1, 0, 0, 0]
-                       [0, 2, 0, 0]
-                       [0, 0, 3, 0]
-                       [0, 0, 0, 4]]
-```
-
-diagonal: Rank k tensor where k is at most 1.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("DiagPart")
     .Input("input: T")
     .Output("diagonal: T")
-    .Attr("T: {float, double, int32, int64, complex64, complex128}")
+    .Attr("T: {bfloat16, float, double, int32, int64, complex64, complex128}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle in = c->input(0);
       if (!c->RankKnown(in)) {
@@ -799,33 +618,7 @@ REGISTER_OP("DiagPart")
       }
       c->set_output(0, c->MakeShape(dims));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Returns the diagonal part of the tensor.
-
-This operation returns a tensor with the `diagonal` part
-of the `input`. The `diagonal` part is computed as follows:
-
-Assume `input` has dimensions `[D1,..., Dk, D1,..., Dk]`, then the output is a
-tensor of rank `k` with dimensions `[D1,..., Dk]` where:
-
-`diagonal[i1,..., ik] = input[i1, ..., ik, i1,..., ik]`.
-
-For example:
-
-```
-# 'input' is [[1, 0, 0, 0]
-              [0, 2, 0, 0]
-              [0, 0, 3, 0]
-              [0, 0, 0, 4]]
-
-tf.diag_part(input) ==> [1, 2, 3, 4]
-```
-
-input: Rank k tensor where k is even and not zero.
-diagonal: The extracted diagonal.
-
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("MatrixDiag")
@@ -845,40 +638,7 @@ REGISTER_OP("MatrixDiag")
           c->Concatenate(in, c->Vector(c->Dim(in, rank - 1)), &out));
       c->set_output(0, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Returns a batched diagonal tensor with a given batched diagonal values.
-
-Given a `diagonal`, this operation returns a tensor with the `diagonal` and
-everything else padded with zeros. The diagonal is computed as follows:
-
-Assume `diagonal` has `k` dimensions `[I, J, K, ..., N]`, then the output is a
-tensor of rank `k+1` with dimensions [I, J, K, ..., N, N]` where:
-
-`output[i, j, k, ..., m, n] = 1{m=n} * diagonal[i, j, k, ..., n]`.
-
-For example:
-
-```
-# 'diagonal' is [[1, 2, 3, 4], [5, 6, 7, 8]]
-
-and diagonal.shape = (2, 4)
-
-tf.matrix_diag(diagonal) ==> [[[1, 0, 0, 0]
-                                     [0, 2, 0, 0]
-                                     [0, 0, 3, 0]
-                                     [0, 0, 0, 4]],
-                                    [[5, 0, 0, 0]
-                                     [0, 6, 0, 0]
-                                     [0, 0, 7, 0]
-                                     [0, 0, 0, 8]]]
-
-which has shape (2, 4, 4)
-```
-
-diagonal: Rank `k`, where `k >= 1`.
-output: Rank `k+1`, with `output.shape = diagonal.shape + [diagonal.shape[-1]]`.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("MatrixSetDiag")
@@ -911,27 +671,7 @@ REGISTER_OP("MatrixSetDiag")
       }
       c->set_output(0, output);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Returns a batched matrix tensor with new batched diagonal values.
-
-Given `input` and `diagonal`, this operation returns a tensor with the
-same shape and values as `input`, except for the main diagonal of the
-innermost matrices.  These will be overwritten by the values in `diagonal`.
-
-The output is computed as follows:
-
-Assume `input` has `k+1` dimensions `[I, J, K, ..., M, N]` and `diagonal` has
-`k` dimensions `[I, J, K, ..., min(M, N)]`.  Then the output is a
-tensor of rank `k+1` with dimensions `[I, J, K, ..., M, N]` where:
-
-  * `output[i, j, k, ..., m, n] = diagonal[i, j, k, ..., n]` for `m == n`.
-  * `output[i, j, k, ..., m, n] = input[i, j, k, ..., m, n]` for `m != n`.
-
-input: Rank `k+1`, where `k >= 1`.
-diagonal: Rank `k`, where `k >= 1`.
-output: Rank `k+1`, with `output.shape = input.shape`.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("MatrixDiagPart")
@@ -956,43 +696,7 @@ REGISTER_OP("MatrixDiagPart")
       dims.push_back(min_dim);
       c->set_output(0, c->MakeShape(dims));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Returns the batched diagonal part of a batched tensor.
-
-This operation returns a tensor with the `diagonal` part
-of the batched `input`. The `diagonal` part is computed as follows:
-
-Assume `input` has `k` dimensions `[I, J, K, ..., M, N]`, then the output is a
-tensor of rank `k - 1` with dimensions `[I, J, K, ..., min(M, N)]` where:
-
-`diagonal[i, j, k, ..., n] = input[i, j, k, ..., n, n]`.
-
-The input must be at least a matrix.
-
-For example:
-
-```
-# 'input' is [[[1, 0, 0, 0]
-               [0, 2, 0, 0]
-               [0, 0, 3, 0]
-               [0, 0, 0, 4]],
-              [[5, 0, 0, 0]
-               [0, 6, 0, 0]
-               [0, 0, 7, 0]
-               [0, 0, 0, 8]]]
-
-and input.shape = (2, 4, 4)
-
-tf.matrix_diag_part(input) ==> [[1, 2, 3, 4], [5, 6, 7, 8]]
-
-which has shape (2, 4)
-```
-
-input: Rank `k` tensor where `k >= 2`.
-diagonal: The extracted diagonal(s) having shape
-  `diagonal.shape = input.shape[:-2] + [min(input.shape[-2:])]`.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("MatrixBandPart")
@@ -1001,57 +705,7 @@ REGISTER_OP("MatrixBandPart")
     .Input("num_upper: int64")
     .Output("band: T")
     .Attr("T: type")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Copy a tensor setting everything outside a central band in each innermost matrix
-to zero.
-
-The `band` part is computed as follows:
-Assume `input` has `k` dimensions `[I, J, K, ..., M, N]`, then the output is a
-tensor with the same shape where
-
-`band[i, j, k, ..., m, n] = in_band(m, n) * input[i, j, k, ..., m, n]`.
-
-The indicator function
-
-`in_band(m, n) = (num_lower < 0 || (m-n) <= num_lower)) &&
-                 (num_upper < 0 || (n-m) <= num_upper)`.
-
-For example:
-
-```
-# if 'input' is [[ 0,  1,  2, 3]
-                 [-1,  0,  1, 2]
-                 [-2, -1,  0, 1]
-                 [-3, -2, -1, 0]],
-
-tf.matrix_band_part(input, 1, -1) ==> [[ 0,  1,  2, 3]
-                                       [-1,  0,  1, 2]
-                                       [ 0, -1,  0, 1]
-                                       [ 0,  0, -1, 0]],
-
-tf.matrix_band_part(input, 2, 1) ==> [[ 0,  1,  0, 0]
-                                      [-1,  0,  1, 0]
-                                      [-2, -1,  0, 1]
-                                      [ 0, -2, -1, 0]]
-```
-
-Useful special cases:
-
-```
- tf.matrix_band_part(input, 0, -1) ==> Upper triangular part.
- tf.matrix_band_part(input, -1, 0) ==> Lower triangular part.
- tf.matrix_band_part(input, 0, 0) ==> Diagonal.
-```
-
-input: Rank `k` tensor.
-num_lower: 0-D tensor. Number of subdiagonals to keep. If negative, keep entire
-           lower triangle.
-num_upper: 0-D tensor. Number of superdiagonals to keep. If negative, keep
-           entire upper triangle.
-band: Rank `k` tensor of the same shape as input. The extracted banded tensor.
-
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Reverse")
@@ -1059,9 +713,8 @@ REGISTER_OP("Reverse")
     .Input("dims: bool")
     .Output("output: T")
     .Attr(
-        "T: {uint8, int8, uint16, int16, int32, int64, bool, half, float, "
-        "double, complex64, "
-        "complex128, string}")
+        "T: {uint8, int8, uint16, int16, int32, int64, bool, half, "
+        "float, double, complex64, complex128, string}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle input = c->input(0);
       ShapeHandle dims;
@@ -1076,59 +729,7 @@ REGISTER_OP("Reverse")
       }
       c->set_output(0, input);
       return Status::OK();
-    })
-    .Doc(R"Doc(
-Reverses specific dimensions of a tensor.
-
-Given a `tensor`, and a `bool` tensor `dims` representing the dimensions
-of `tensor`, this operation reverses each dimension i of `tensor` where
-`dims[i]` is `True`.
-
-`tensor` can have up to 8 dimensions. The number of dimensions
-of `tensor` must equal the number of elements in `dims`. In other words:
-
-`rank(tensor) = size(dims)`
-
-For example:
-
-```
-# tensor 't' is [[[[ 0,  1,  2,  3],
-#                  [ 4,  5,  6,  7],
-#                  [ 8,  9, 10, 11]],
-#                 [[12, 13, 14, 15],
-#                  [16, 17, 18, 19],
-#                  [20, 21, 22, 23]]]]
-# tensor 't' shape is [1, 2, 3, 4]
-
-# 'dims' is [False, False, False, True]
-reverse(t, dims) ==> [[[[ 3,  2,  1,  0],
-                        [ 7,  6,  5,  4],
-                        [ 11, 10, 9, 8]],
-                       [[15, 14, 13, 12],
-                        [19, 18, 17, 16],
-                        [23, 22, 21, 20]]]]
-
-# 'dims' is [False, True, False, False]
-reverse(t, dims) ==> [[[[12, 13, 14, 15],
-                        [16, 17, 18, 19],
-                        [20, 21, 22, 23]
-                       [[ 0,  1,  2,  3],
-                        [ 4,  5,  6,  7],
-                        [ 8,  9, 10, 11]]]]
-
-# 'dims' is [False, False, True, False]
-reverse(t, dims) ==> [[[[8, 9, 10, 11],
-                        [4, 5, 6, 7],
-                        [0, 1, 2, 3]]
-                       [[20, 21, 22, 23],
-                        [16, 17, 18, 19],
-                        [12, 13, 14, 15]]]]
-```
-
-tensor: Up to 8-D.
-dims: 1-D. The dimensions to reverse.
-output: The same shape as `tensor`.
-)Doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ReverseV2")
@@ -1137,9 +738,8 @@ REGISTER_OP("ReverseV2")
     .Output("output: T")
     .Attr("Tidx: {int32, int64} = DT_INT32")
     .Attr(
-        "T: {uint8, int8, uint16, int16, int32, int64, bool, half, float, "
-        "double, complex64, "
-        "complex128, string}")
+        "T: {uint8, int8, uint16, int16, int32, int64, bool, half, bfloat16, "
+        "float, double, complex64, complex128, string}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle input = c->input(0);
       ShapeHandle axis;
@@ -1151,62 +751,7 @@ REGISTER_OP("ReverseV2")
       }
       c->set_output(0, input);
       return Status::OK();
-    })
-    .Doc(R"Doc(
-Reverses specific dimensions of a tensor.
-
-NOTE `tf.reverse` has now changed behavior in preparation for 1.0.
-`tf.reverse_v2` is currently an alias that will be deprecated before TF 1.0.
-
-Given a `tensor`, and a `int32` tensor `axis` representing the set of
-dimensions of `tensor` to reverse. This operation reverses each dimension
-`i` for which there exists `j` s.t. `axis[j] == i`.
-
-`tensor` can have up to 8 dimensions. The number of dimensions specified
-in `axis` may be 0 or more entries. If an index is specified more than
-once, a InvalidArgument error is raised.
-
-For example:
-
-```
-# tensor 't' is [[[[ 0,  1,  2,  3],
-#                  [ 4,  5,  6,  7],
-#                  [ 8,  9, 10, 11]],
-#                 [[12, 13, 14, 15],
-#                  [16, 17, 18, 19],
-#                  [20, 21, 22, 23]]]]
-# tensor 't' shape is [1, 2, 3, 4]
-
-# 'dims' is [3] or 'dims' is [-1]
-reverse(t, dims) ==> [[[[ 3,  2,  1,  0],
-                        [ 7,  6,  5,  4],
-                        [ 11, 10, 9, 8]],
-                       [[15, 14, 13, 12],
-                        [19, 18, 17, 16],
-                        [23, 22, 21, 20]]]]
-
-# 'dims' is '[1]' (or 'dims' is '[-3]')
-reverse(t, dims) ==> [[[[12, 13, 14, 15],
-                        [16, 17, 18, 19],
-                        [20, 21, 22, 23]
-                       [[ 0,  1,  2,  3],
-                        [ 4,  5,  6,  7],
-                        [ 8,  9, 10, 11]]]]
-
-# 'dims' is '[2]' (or 'dims' is '[-2]')
-reverse(t, dims) ==> [[[[8, 9, 10, 11],
-                        [4, 5, 6, 7],
-                        [0, 1, 2, 3]]
-                       [[20, 21, 22, 23],
-                        [16, 17, 18, 19],
-                        [12, 13, 14, 15]]]]
-```
-
-tensor: Up to 8-D.
-axis: 1-D. The indices of the dimensions to reverse. Must be in the range
-  `[-rank(tensor), rank(tensor))`.
-output: The same shape as `tensor`.
-)Doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("EditDistance")
@@ -1248,73 +793,21 @@ REGISTER_OP("EditDistance")
 
       c->set_output(0, c->MakeShape(dims));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes the (possibly normalized) Levenshtein Edit Distance.
-
-The inputs are variable-length sequences provided by SparseTensors
-  (hypothesis_indices, hypothesis_values, hypothesis_shape)
-and
-  (truth_indices, truth_values, truth_shape).
-
-The inputs are:
-
-hypothesis_indices: The indices of the hypothesis list SparseTensor.
-  This is an N x R int64 matrix.
-hypothesis_values: The values of the hypothesis list SparseTensor.
-  This is an N-length vector.
-hypothesis_shape: The shape of the hypothesis list SparseTensor.
-  This is an R-length vector.
-truth_indices: The indices of the truth list SparseTensor.
-  This is an M x R int64 matrix.
-truth_values: The values of the truth list SparseTensor.
-  This is an M-length vector.
-truth_shape: The shape of the truth list SparseTensor.
-  This is an R-length vector.
-truth_shape: truth indices, vector.
-normalize: boolean (if true, edit distances are normalized by length of truth).
-
-The output is:
-
-output: A dense float tensor with rank R - 1.
-
-For the example input:
-
-    // hypothesis represents a 2x1 matrix with variable-length values:
-    //   (0,0) = ["a"]
-    //   (1,0) = ["b"]
-    hypothesis_indices = [[0, 0, 0],
-                          [1, 0, 0]]
-    hypothesis_values = ["a", "b"]
-    hypothesis_shape = [2, 1, 1]
-
-    // truth represents a 2x2 matrix with variable-length values:
-    //   (0,0) = []
-    //   (0,1) = ["a"]
-    //   (1,0) = ["b", "c"]
-    //   (1,1) = ["a"]
-    truth_indices = [[0, 1, 0],
-                     [1, 0, 0],
-                     [1, 0, 1],
-                     [1, 1, 0]]
-    truth_values = ["a", "b", "c", "a"]
-    truth_shape = [2, 2, 2]
-    normalize = true
-
-The output will be:
-
-    // output is a 2x2 matrix with edit distances normalized by truth lengths.
-    output = [[inf, 1.0],  // (0,0): no truth, (0,1): no hypothesis
-              [0.5, 1.0]]  // (1,0): addition, (1,1): no hypothesis
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Fill")
-    .Input("dims: int32")
+    .Input("dims: index_type")
     .Input("value: T")
     .Output("output: T")
     .Attr("T: type")
+    .Attr("index_type: {int32, int64} = DT_INT32")
     .SetShapeFn([](InferenceContext* c) {
+      DataType index_type = DT_INT32;
+      Status s = c->GetAttr("index_type", &index_type);
+      if (!s.ok() && s.code() != error::NOT_FOUND) {
+        return s;
+      }
       ShapeHandle unused;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &unused));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
@@ -1322,7 +815,8 @@ REGISTER_OP("Fill")
       const Tensor* t = c->input_tensor(0);
       if (t != nullptr) {
         for (int i = 0; i < t->NumElements(); ++i) {
-          if (t->vec<int32>()(i) < 0) {
+          if ((index_type == DT_INT32 && t->vec<int32>()(i) < 0) ||
+              (index_type == DT_INT64 && t->vec<int64>()(i) < 0)) {
             return errors::InvalidArgument("Fill dimensions must be >= 0");
           }
         }
@@ -1332,27 +826,7 @@ REGISTER_OP("Fill")
       TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &out));
       c->set_output(0, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Creates a tensor filled with a scalar value.
-
-This operation creates a tensor of shape `dims` and fills it with `value`.
-
-For example:
-
-```
-# Output tensor has shape [2, 3].
-fill([2, 3], 9) ==> [[9, 9, 9]
-                     [9, 9, 9]]
-```
-
-dims: 1-D. Represents the shape of the output tensor.
-value: 0-D (scalar). Value to fill the returned tensor.
-
-@compatibility(numpy)
-Equivalent to np.full
-@end_compatibility
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("_ParallelConcatStart")
@@ -1414,36 +888,7 @@ REGISTER_OP("Gather")
       TF_RETURN_IF_ERROR(c->Concatenate(indices_shape, params_subshape, &out));
       c->set_output(0, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Gather slices from `params` according to `indices`.
-
-`indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
-Produces an output tensor with shape `indices.shape + params.shape[1:]` where:
-
-```python
-    # Scalar indices
-    output[:, ..., :] = params[indices, :, ... :]
-
-    # Vector indices
-    output[i, :, ..., :] = params[indices[i], :, ... :]
-
-    # Higher rank indices
-    output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :]
-```
-
-If `indices` is a permutation and `len(indices) == params.shape[0]` then
-this operation will permute `params` accordingly.
-
-`validate_indices`: DEPRECATED. If this operation is assigned to CPU, values in
-`indices` are always validated to be within range. If assigned to GPU,
-out-of-bound indices result in safe but unspecified behavior, which may include
-raising an error.
-
-<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="https://www.tensorflow.org/images/Gather.png" alt>
-</div>
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("GatherV2")
@@ -1509,40 +954,7 @@ REGISTER_OP("GatherV2")
 
       c->set_output(0, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Gather slices from `params` axis `axis` according to `indices`.
-
-`indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
-Produces an output tensor with shape `params.shape[:axis] + indices.shape +
-params.shape[axis + 1:]` where:
-
-```python
-    # Scalar indices (output is rank(params) - 1).
-    output[a_0, ..., a_n, b_0, ..., b_n] =
-      params[a_0, ..., a_n, indices, b_0, ..., b_n]
-
-    # Vector indices (output is rank(params)).
-    output[a_0, ..., a_n, i, b_0, ..., b_n] =
-      params[a_0, ..., a_n, indices[i], b_0, ..., b_n]
-
-    # Higher rank indices (output is rank(params) + rank(indices) - 1).
-    output[a_0, ..., a_n, i, ..., j, b_0, ... b_n] =
-      params[a_0, ..., a_n, indices[i, ..., j], b_0, ..., b_n]
-```
-
-<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="https://www.tensorflow.org/images/Gather.png" alt>
-</div>
-
-params: The tensor from which to gather values. Must be at least rank
-  `axis + 1`.
-indices: Index tensor. Must be in range `[0, params.shape[axis])`.
-axis: The axis in `params` to gather `indices` from. Defaults to the first
-  dimension. Supports negative indexes.
-output: Values from `params` gathered from indices given by `indices`, with
-  shape `params.shape[:axis] + indices.shape + params.shape[axis + 1:]`.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("GatherNd")
@@ -1578,114 +990,7 @@ REGISTER_OP("GatherNd")
       TF_RETURN_IF_ERROR(c->Concatenate(indices_slice, params_slice, &out));
       c->set_output(0, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Gather slices from `params` into a Tensor with shape specified by `indices`.
-
-`indices` is an K-dimensional integer tensor, best thought of as a
-(K-1)-dimensional tensor of indices into `params`, where each element defines a
-slice of `params`:
-
-    output[i_0, ..., i_{K-2}] = params[indices[i0, ..., i_{K-2}]]
-
-Whereas in @{tf.gather} `indices` defines slices into the first
-dimension of `params`, in `tf.gather_nd`, `indices` defines slices into the
-first `N` dimensions of `params`, where `N = indices.shape[-1]`.
-
-The last dimension of `indices` can be at most the rank of
-`params`:
-
-    indices.shape[-1] <= params.rank
-
-The last dimension of `indices` corresponds to elements
-(if `indices.shape[-1] == params.rank`) or slices
-(if `indices.shape[-1] < params.rank`) along dimension `indices.shape[-1]`
-of `params`.  The output tensor has shape
-
-    indices.shape[:-1] + params.shape[indices.shape[-1]:]
-
-Some examples below.
-
-Simple indexing into a matrix:
-
-```python
-    indices = [[0, 0], [1, 1]]
-    params = [['a', 'b'], ['c', 'd']]
-    output = ['a', 'd']
-```
-
-Slice indexing into a matrix:
-
-```python
-    indices = [[1], [0]]
-    params = [['a', 'b'], ['c', 'd']]
-    output = [['c', 'd'], ['a', 'b']]
-```
-
-Indexing into a 3-tensor:
-
-```python
-    indices = [[1]]
-    params = [[['a0', 'b0'], ['c0', 'd0']],
-              [['a1', 'b1'], ['c1', 'd1']]]
-    output = [[['a1', 'b1'], ['c1', 'd1']]]
-
-
-    indices = [[0, 1], [1, 0]]
-    params = [[['a0', 'b0'], ['c0', 'd0']],
-              [['a1', 'b1'], ['c1', 'd1']]]
-    output = [['c0', 'd0'], ['a1', 'b1']]
-
-
-    indices = [[0, 0, 1], [1, 0, 1]]
-    params = [[['a0', 'b0'], ['c0', 'd0']],
-              [['a1', 'b1'], ['c1', 'd1']]]
-    output = ['b0', 'b1']
-```
-
-Batched indexing into a matrix:
-
-```python
-    indices = [[[0, 0]], [[0, 1]]]
-    params = [['a', 'b'], ['c', 'd']]
-    output = [['a'], ['b']]
-```
-
-Batched slice indexing into a matrix:
-
-```python
-    indices = [[[1]], [[0]]]
-    params = [['a', 'b'], ['c', 'd']]
-    output = [[['c', 'd']], [['a', 'b']]]
-```
-
-Batched indexing into a 3-tensor:
-
-```python
-    indices = [[[1]], [[0]]]
-    params = [[['a0', 'b0'], ['c0', 'd0']],
-              [['a1', 'b1'], ['c1', 'd1']]]
-    output = [[[['a1', 'b1'], ['c1', 'd1']]],
-              [[['a0', 'b0'], ['c0', 'd0']]]]
-
-    indices = [[[0, 1], [1, 0]], [[0, 0], [1, 1]]]
-    params = [[['a0', 'b0'], ['c0', 'd0']],
-              [['a1', 'b1'], ['c1', 'd1']]]
-    output = [[['c0', 'd0'], ['a1', 'b1']],
-              [['a0', 'b0'], ['c1', 'd1']]]
-
-
-    indices = [[[0, 0, 1], [1, 0, 1]], [[0, 1, 1], [1, 1, 0]]]
-    params = [[['a0', 'b0'], ['c0', 'd0']],
-              [['a1', 'b1'], ['c1', 'd1']]]
-    output = [['b0', 'b1'], ['d0', 'c1']]
-```
-
-params: The tensor from which to gather values.
-indices: Index tensor.
-output: Values from `params` gathered from indices given by `indices`, with
-  shape `indices.shape[:-1] + params.shape[indices.shape[-1]:]`.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Identity")
@@ -1699,10 +1004,20 @@ REGISTER_OP("Identity")
         c->set_output_handle_shapes_and_types(0, *handle_data);
       }
       return Status::OK();
-    })
-    .Doc(R"Doc(
-Return a tensor with the same shape and contents as the input tensor or value.
-)Doc");
+    });
+
+REGISTER_OP("Snapshot")
+    .Input("input: T")
+    .Output("output: T")
+    .Attr("T: type")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      c->set_output(0, c->input(0));
+      auto* handle_data = c->input_handle_shapes_and_types(0);
+      if (handle_data != nullptr) {
+        c->set_output_handle_shapes_and_types(0, *handle_data);
+      }
+      return Status::OK();
+    });
 
 #ifdef INTEL_MKL
 REGISTER_OP("_MklIdentity")
@@ -1732,25 +1047,7 @@ REGISTER_OP("IdentityN")
       TF_RETURN_IF_ERROR(c->input("input", &input));
       TF_RETURN_IF_ERROR(c->set_output("output", input));
       return Status::OK();
-    })
-    .Doc(R"Doc(
-Returns a list of tensors with the same shapes and contents as the input
-tensors.
-
-This op can be used to override the gradient for complicated functions. For
-example, suppose y = f(x) and we wish to apply a custom function g for backprop
-such that dx = g(dy). In Python,
-
-```python
-with tf.get_default_graph().gradient_override_map(
-    {'IdentityN': 'OverrideGradientWithG'}):
-  y, _ = identity_n([f(x), x])
-
-@tf.RegisterGradient('OverrideGradientWithG')
-def ApplyG(op, dy, _):
-  return [None, g(dy)]  # Do not backprop to f(x).
-```
-)Doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("RefIdentity")
@@ -1758,10 +1055,7 @@ REGISTER_OP("RefIdentity")
     .Output("output: Ref(T)")
     .Attr("T: type")
     .SetShapeFn(shape_inference::UnchangedShape)
-    .SetAllowsUninitializedInput()
-    .Doc(R"Doc(
-Return the same ref tensor as the input ref tensor.
-)Doc");
+    .SetAllowsUninitializedInput();
 
 // --------------------------------------------------------------------------
 REGISTER_OP("DebugGradientIdentity")
@@ -1769,82 +1063,36 @@ REGISTER_OP("DebugGradientIdentity")
     .Output("output: T")
     .Attr("T: type")
     .SetShapeFn(shape_inference::UnchangedShape)
-    .SetAllowsUninitializedInput()
-    .Doc(R"Doc(
-Identity op for gradient debugging.
+    .SetAllowsUninitializedInput();
 
-This op is hidden from public in Python. It is used by TensorFlow Debugger to
-register gradient tensors for gradient debugging.
-)Doc");
+REGISTER_OP("DebugGradientRefIdentity")
+    .Input("input: Ref(T)")
+    .Output("output: Ref(T)")
+    .Attr("T: type")
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .SetAllowsUninitializedInput();
 
 // --------------------------------------------------------------------------
 REGISTER_OP("StopGradient")
     .Input("input: T")
     .Output("output: T")
     .Attr("T: type")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"Doc(
-Stops gradient computation.
-
-When executed in a graph, this op outputs its input tensor as-is.
-
-When building ops to compute gradients, this op prevents the contribution of
-its inputs to be taken into account.  Normally, the gradient generator adds ops
-to a graph to compute the derivatives of a specified 'loss' by recursively
-finding out inputs that contributed to its computation.  If you insert this op
-in the graph it inputs are masked from the gradient generator.  They are not
-taken into account for computing gradients.
-
-This is useful any time you want to compute a value with TensorFlow but need
-to pretend that the value was a constant. Some examples include:
-
-*  The *EM* algorithm where the *M-step* should not involve backpropagation
-   through the output of the *E-step*.
-*  Contrastive divergence training of Boltzmann machines where, when
-   differentiating the energy function, the training must not backpropagate
-   through the graph that generated the samples from the model.
-*  Adversarial training, where no backprop should happen through the adversarial
-   example generation process.
-)Doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("PreventGradient")
     .Input("input: T")
     .Output("output: T")
     .Attr("T: type")
     .Attr("message: string = ''")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"Doc(
-An identity op that triggers an error if a gradient is requested.
-
-When executed in a graph, this op outputs its input tensor as-is.
-
-When building ops to compute gradients, the TensorFlow gradient system
-will return an error when trying to lookup the gradient of this op,
-because no gradient must ever be registered for this function.  This
-op exists to prevent subtle bugs from silently returning unimplemented
-gradients in some corner cases.
-
-input: any tensor.
-output: the same input tensor.
-message: Will be printed in the error when anyone tries to differentiate
-this operation.
-)Doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("CheckNumerics")
     .Input("tensor: T")
     .Output("output: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .Attr("message: string")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Checks a tensor for NaN and Inf values.
-
-When run, reports an `InvalidArgument` error if `tensor` has any values
-that are not a number (NaN) or infinity (Inf). Otherwise, passes `tensor` as-is.
-
-message: Prefix of the error message.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Reshape")
@@ -1853,69 +1101,9 @@ REGISTER_OP("Reshape")
     .Output("output: T")
     .Attr("T: type")
     .Attr("Tshape: {int32, int64} = DT_INT32")
-    .SetShapeFn([](InferenceContext* c) { return SetOutputShapeForReshape(c); })
-    .Doc(R"Doc(
-Reshapes a tensor.
-
-Given `tensor`, this operation returns a tensor that has the same values
-as `tensor` with shape `shape`.
-
-If one component of `shape` is the special value -1, the size of that dimension
-is computed so that the total size remains constant.  In particular, a `shape`
-of `[-1]` flattens into 1-D.  At most one component of `shape` can be -1.
-
-If `shape` is 1-D or higher, then the operation returns a tensor with shape
-`shape` filled with the values of `tensor`. In this case, the number of elements
-implied by `shape` must be the same as the number of elements in `tensor`.
-
-For example:
-
-```
-# tensor 't' is [1, 2, 3, 4, 5, 6, 7, 8, 9]
-# tensor 't' has shape [9]
-reshape(t, [3, 3]) ==> [[1, 2, 3],
-                        [4, 5, 6],
-                        [7, 8, 9]]
-
-# tensor 't' is [[[1, 1], [2, 2]],
-#                [[3, 3], [4, 4]]]
-# tensor 't' has shape [2, 2, 2]
-reshape(t, [2, 4]) ==> [[1, 1, 2, 2],
-                        [3, 3, 4, 4]]
-
-# tensor 't' is [[[1, 1, 1],
-#                 [2, 2, 2]],
-#                [[3, 3, 3],
-#                 [4, 4, 4]],
-#                [[5, 5, 5],
-#                 [6, 6, 6]]]
-# tensor 't' has shape [3, 2, 3]
-# pass '[-1]' to flatten 't'
-reshape(t, [-1]) ==> [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6]
-
-# -1 can also be used to infer the shape
-
-# -1 is inferred to be 9:
-reshape(t, [2, -1]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3],
-                         [4, 4, 4, 5, 5, 5, 6, 6, 6]]
-# -1 is inferred to be 2:
-reshape(t, [-1, 9]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3],
-                         [4, 4, 4, 5, 5, 5, 6, 6, 6]]
-# -1 is inferred to be 3:
-reshape(t, [ 2, -1, 3]) ==> [[[1, 1, 1],
-                              [2, 2, 2],
-                              [3, 3, 3]],
-                             [[4, 4, 4],
-                              [5, 5, 5],
-                              [6, 6, 6]]]
-
-# tensor 't' is [7]
-# shape `[]` reshapes to a scalar
-reshape(t, []) ==> 7
-```
-
-shape: Defines the shape of the output tensor.
-)Doc");
+    .SetShapeFn([](InferenceContext* c) {
+      return SetOutputShapeForReshape(c);
+    });
 
 #ifdef INTEL_MKL
 REGISTER_OP("_MklReshape")
@@ -1942,29 +1130,7 @@ REGISTER_OP("InvertPermutation")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &x));
       c->set_output(0, x);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes the inverse permutation of a tensor.
-
-This operation computes the inverse of an index permutation. It takes a 1-D
-integer tensor `x`, which represents the indices of a zero-based array, and
-swaps each value with its index position. In other words, for an output tensor
-`y` and an input tensor `x`, this operation computes the following:
-
-`y[x[i]] = i for i in [0, 1, ..., len(x) - 1]`
-
-The values must include 0. There can be no duplicate values or negative values.
-
-For example:
-
-```
-# tensor `x` is [3, 4, 0, 2, 1]
-invert_permutation(x) ==> [2, 4, 3, 0, 1]
-```
-
-x: 1-D.
-y: 1-D.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Transpose")
@@ -1973,13 +1139,7 @@ REGISTER_OP("Transpose")
     .Output("y: T")
     .Attr("T: type")
     .Attr("Tperm: {int32, int64} = DT_INT32")
-    .SetShapeFn(TransposeShapeFn)
-    .Doc(R"doc(
-Shuffle dimensions of x according to a permutation.
-
-The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy:
-  `y.shape[i] == x.shape[perm[i]] for i in [0, 1, ..., rank(x) - 1]`
-)doc");
+    .SetShapeFn(TransposeShapeFn);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ConjugateTranspose")
@@ -1988,14 +1148,7 @@ REGISTER_OP("ConjugateTranspose")
     .Output("y: T")
     .Attr("T: type")
     .Attr("Tperm: {int32, int64} = DT_INT32")
-    .SetShapeFn(TransposeShapeFn)
-    .Doc(R"doc(
-Shuffle dimensions of x according to a permutation and conjugate the result.
-
-The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy:
-  `y.shape[i] == x.shape[perm[i]] for i in [0, 1, ..., rank(x) - 1]`
-  `y[i,j,k,...,s,t,u] == conj(x[perm[i], perm[j], perm[k],...,perm[s], perm[t], perm[u]])`
-)doc");
+    .SetShapeFn(TransposeShapeFn);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Unique")
@@ -2008,70 +1161,21 @@ REGISTER_OP("Unique")
       c->set_output(0, c->Vector(InferenceContext::kUnknownDim));
       c->set_output(1, c->input(0));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Finds unique elements in a 1-D tensor.
-
-This operation returns a tensor `y` containing all of the unique elements of `x`
-sorted in the same order that they occur in `x`. This operation also returns a
-tensor `idx` the same size as `x` that contains the index of each value of `x`
-in the unique output `y`. In other words:
-
-`y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
-
-For example:
-
-```
-# tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
-y, idx = unique(x)
-y ==> [1, 2, 4, 7, 8]
-idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
-```
-
-x: 1-D.
-y: 1-D.
-idx: 1-D.
-)doc");
+    });
 
 REGISTER_OP("UniqueV2")
     .Input("x: T")
-    .Input("axis: int64")
+    .Input("axis: Taxis")
     .Output("y: T")
     .Output("idx: out_idx")
     .Attr("T: type")
+    .Attr("Taxis: {int32,int64} = DT_INT64")
     .Attr("out_idx: {int32, int64} = DT_INT32")
     .SetShapeFn([](InferenceContext* c) {
       c->set_output(0, c->Vector(InferenceContext::kUnknownDim));
       c->set_output(1, c->input(0));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Finds unique elements in a 1-D tensor.
-
-This operation returns a tensor `y` containing all of the unique elements of `x`
-sorted in the same order that they occur in `x`. This operation also returns a
-tensor `idx` the same size as `x` that contains the index of each value of `x`
-in the unique output `y`. In other words:
-
-`y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
-
-For example:
-
-```
-# tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
-y, idx = unique(x)
-y ==> [1, 2, 4, 7, 8]
-idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
-```
-
-
-x: A `Tensor`.
-axis: A `Tensor` of type `int64` (default: 0). The axis of the Tensor to
-  find the unique elements.
-y: A `Tensor`. Unique elements along the `axis` of `Tensor` x.
-idx: A 1-D Tensor. Has the same type as x that contains the index of each
-  value of x in the output y.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("UniqueWithCounts")
@@ -2087,33 +1191,7 @@ REGISTER_OP("UniqueWithCounts")
       c->set_output(1, c->input(0));
       c->set_output(2, uniq);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Finds unique elements in a 1-D tensor.
-
-This operation returns a tensor `y` containing all of the unique elements of `x`
-sorted in the same order that they occur in `x`. This operation also returns a
-tensor `idx` the same size as `x` that contains the index of each value of `x`
-in the unique output `y`. Finally, it returns a third tensor `count` that
-contains the count of each element of `y` in `x`. In other words:
-
-`y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
-
-For example:
-
-```
-# tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
-y, idx, count = unique_with_counts(x)
-y ==> [1, 2, 4, 7, 8]
-idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
-count ==> [2, 1, 3, 1, 2]
-```
-
-x: 1-D.
-y: 1-D.
-idx: 1-D.
-count: 1-D.
-)doc");
+    });
 
 namespace {
 
@@ -2138,20 +1216,7 @@ REGISTER_OP("Shape")
     .Output("output: out_type")
     .Attr("T: type")
     .Attr("out_type: {int32, int64} = DT_INT32")
-    .SetShapeFn(ShapeShapeFn)
-    .Doc(R"doc(
-Returns the shape of a tensor.
-
-This operation returns a 1-D integer tensor representing the shape of `input`.
-
-For example:
-
-```
-# 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]
-shape(t) ==> [2, 2, 3]
-```
-
-)doc");
+    .SetShapeFn(ShapeShapeFn);
 
 REGISTER_OP("ShapeN")
     .Input("input: N * T")
@@ -2159,12 +1224,7 @@ REGISTER_OP("ShapeN")
     .Attr("N: int")
     .Attr("T: type")
     .Attr("out_type: {int32, int64} = DT_INT32")
-    .SetShapeFn(ShapeShapeFn)
-    .Doc(R"doc(
-Returns shape of tensors.
-
-This operation returns N 1-D integer tensors representing shape of `input[i]s`.
-)doc");
+    .SetShapeFn(ShapeShapeFn);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ReverseSequence")
@@ -2210,96 +1270,14 @@ REGISTER_OP("ReverseSequence")
           c->ReplaceDim(input, batch_dim, batch_dim_dim, &output_shape));
       c->set_output(0, output_shape);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Reverses variable length slices.
-
-This op first slices `input` along the dimension `batch_dim`, and for each
-slice `i`, reverses the first `seq_lengths[i]` elements along
-the dimension `seq_dim`.
-
-The elements of `seq_lengths` must obey `seq_lengths[i] <= input.dims[seq_dim]`,
-and `seq_lengths` must be a vector of length `input.dims[batch_dim]`.
-
-The output slice `i` along dimension `batch_dim` is then given by input
-slice `i`, with the first `seq_lengths[i]` slices along dimension
-`seq_dim` reversed.
-
-For example:
-
-```
-# Given this:
-batch_dim = 0
-seq_dim = 1
-input.dims = (4, 8, ...)
-seq_lengths = [7, 2, 3, 5]
-
-# then slices of input are reversed on seq_dim, but only up to seq_lengths:
-output[0, 0:7, :, ...] = input[0, 7:0:-1, :, ...]
-output[1, 0:2, :, ...] = input[1, 2:0:-1, :, ...]
-output[2, 0:3, :, ...] = input[2, 3:0:-1, :, ...]
-output[3, 0:5, :, ...] = input[3, 5:0:-1, :, ...]
-
-# while entries past seq_lens are copied through:
-output[0, 7:, :, ...] = input[0, 7:, :, ...]
-output[1, 2:, :, ...] = input[1, 2:, :, ...]
-output[2, 3:, :, ...] = input[2, 3:, :, ...]
-output[3, 2:, :, ...] = input[3, 2:, :, ...]
-```
-
-In contrast, if:
-
-```
-# Given this:
-batch_dim = 2
-seq_dim = 0
-input.dims = (8, ?, 4, ...)
-seq_lengths = [7, 2, 3, 5]
-
-# then slices of input are reversed on seq_dim, but only up to seq_lengths:
-output[0:7, :, 0, :, ...] = input[7:0:-1, :, 0, :, ...]
-output[0:2, :, 1, :, ...] = input[2:0:-1, :, 1, :, ...]
-output[0:3, :, 2, :, ...] = input[3:0:-1, :, 2, :, ...]
-output[0:5, :, 3, :, ...] = input[5:0:-1, :, 3, :, ...]
-
-# while entries past seq_lens are copied through:
-output[7:, :, 0, :, ...] = input[7:, :, 0, :, ...]
-output[2:, :, 1, :, ...] = input[2:, :, 1, :, ...]
-output[3:, :, 2, :, ...] = input[3:, :, 2, :, ...]
-output[2:, :, 3, :, ...] = input[2:, :, 3, :, ...]
-```
-
-input: The input to reverse.
-seq_lengths: 1-D with length `input.dims(batch_dim)` and
-  `max(seq_lengths) <= input.dims(seq_dim)`
-seq_dim: The dimension which is partially reversed.
-batch_dim: The dimension along which reversal is performed.
-output: The partially reversed input. It has the same shape as `input`.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Rank")
     .Input("input: T")
     .Output("output: int32")
     .Attr("T: type")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Returns the rank of a tensor.
-
-This operation returns an integer representing the rank of `input`.
-
-For example:
-
-```
-# 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]
-# shape of tensor 't' is [2, 2, 3]
-rank(t) ==> 3
-```
-
-**Note**: The rank of a tensor is not the same as the rank of a matrix. The rank
-of a tensor is the number of indices required to uniquely select each element
-of the tensor. Rank is also known as "order", "degree", or "ndims."
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Size")
@@ -2307,21 +1285,7 @@ REGISTER_OP("Size")
     .Output("output: out_type")
     .Attr("T: type")
     .Attr("out_type: {int32, int64} = DT_INT32")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Returns the size of a tensor.
-
-This operation returns an integer representing the number of elements in
-`input`.
-
-For example:
-
-```
-# 't' is [[[1, 1,, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]]
-size(t) ==> 12
-```
-
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 namespace {
 
@@ -2438,24 +1402,7 @@ REGISTER_OP("Slice")
       }
 
       return Status::OK();
-    })
-    .Doc(R"doc(
-Return a slice from 'input'.
-
-The output tensor is a tensor with dimensions described by 'size'
-whose values are extracted from 'input' starting at the offsets in
-'begin'.
-
-*Requirements*:
-  0 <= begin[i] <= begin[i] + size[i] <= Di  for i in [0, n)
-
-begin: begin[i] specifies the offset into the 'i'th dimension of
-  'input' to slice from.
-size: size[i] specifies the number of elements of the 'i'th dimension
-  of 'input' to slice. If size[i] is -1, all remaining elements in dimension
-  i are included in the slice (i.e. this is equivalent to setting
-  size[i] = input.dim_size(i) - begin[i]).
-)doc");
+    });
 
 REGISTER_OP("StridedSlice")
     .Input("input: T")
@@ -2520,133 +1467,7 @@ REGISTER_OP("StridedSlice")
       c->set_output(0, out);
 
       return Status::OK();
-    })
-    .Doc(R"doc(
-Return a strided slice from `input`.
-
-Note, most python users will want to use the Python `Tensor.__getitem__`
-or `Variable.__getitem__` rather than this op directly.
-
-The goal of this op is to produce a new tensor with a subset of
-the elements from the `n` dimensional `input` tensor. The subset is chosen using
-a sequence of `m` sparse range specifications encoded into the arguments
-of this function. Note, in some cases
-`m` could be equal to `n`, but this need not be the case. Each
-range specification entry can be one of the following:
-
-- An ellipsis (...). Ellipses are used to imply zero or more
-  dimensions of full-dimension selection and are produced using
-  `ellipsis_mask`. For example, `foo[...]` is the identity slice.
-
-- A new axis. This is used to insert a new shape=1 dimension and is
-  produced using `new_axis_mask`. For example, `foo[:, ...]` where
-  `foo` is shape `(3, 4)` produces a `(1, 3, 4)` tensor.
-
-
-- A range `begin:end:stride`. This is used to specify how much to choose from
-  a given dimension. `stride` can be any integer but 0.  `begin` is an integer
-  which represents the index of the first value to select while `end` represents
-  the index of the last value to select. The number of values selected in each
-  dimension is `end - begin` if `stride > 0` and `begin - end` if `stride < 0`.
-  `begin` and `end` can be negative where `-1` is the last element, `-2` is
-  the second to last. `begin_mask` controls whether to replace the explicitly
-  given `begin` with an implicit effective value of `0` if `stride > 0` and
-  `-1` if `stride < 0`. `end_mask` is analogous but produces the number
-  required to create the largest open interval. For example, given a shape
-  `(3,)` tensor `foo[:]`, the effective `begin` and `end` are `0` and `3`. Do
-  not assume this is equivalent to `foo[0:-1]` which has an effective `begin`
-  and `end` of `0` and `2`. Another example is `foo[-2::-1]` which reverses the
-  first dimension of a tensor while dropping the last two (in the original
-  order elements). For example `foo = [1,2,3,4]; foo[-2::-1]` is `[4,3]`.
-
-- A single index. This is used to keep only elements that have a given
-  index. For example (`foo[2, :]` on a shape `(5,6)` tensor produces a
-  shape `(6,)` tensor. This is encoded in `begin` and `end` and
-  `shrink_axis_mask`.
-
-Each conceptual range specification is encoded in the op's argument. This
-encoding is best understand by considering a non-trivial example. In
-particular,
-`foo[1, 2:4, None, ..., :-3:-1, :]` will be encoded as
-
-```
-begin = [1, 2, x, x, 0, x] # x denotes don't care (usually 0)
-end = [2, 4, x, x, -3, x]
-strides = [1, 1, x, x, -1, 1]
-begin_mask = 1<<4 | 1 << 5 = 48
-end_mask = 1<<5 = 32
-ellipsis_mask = 1<<3 = 8
-new_axis_mask = 1<<2 4
-shrink_axis_mask = 1<<0
-```
-
-In this case if `foo.shape` is (5, 5, 5, 5, 5, 5) the final shape of
-the slice becomes (2, 1, 5, 5, 2, 5).
-Let us walk step by step through each argument specification.
-
-1.  The first argument in the example slice is turned into `begin = 1` and
-`end = begin + 1 = 2`. To disambiguate from the original spec `2:4` we
-also set the appropriate bit in `shrink_axis_mask`.
-
-2. `2:4` is contributes 2, 4, 1 to begin, end, and stride. All masks have
-zero bits contributed.
-
-3. None is a synonym for `tf.newaxis`. This means insert a dimension of size 1
-dimension in the final shape. Dummy values are contributed to begin,
-end and stride, while the new_axis_mask bit is set.
-
-4. `...` grab the full ranges from as many dimensions as needed to
-fully specify a slice for every dimension of the input shape.
-
-5. `:-3:-1` shows the use of negative indices. A negative index `i` associated
-with a dimension that has shape `s` is converted to a positive index
-`s + i`. So `-1` becomes `s-1` (i.e. the last element). This conversion
-is done internally so begin, end and strides receive x, -3, and -1.
-The appropriate begin_mask bit is set to indicate the start range is the
-full range (ignoring the x).
-
-6. `:` indicates that the entire contents of the corresponding dimension
-is selected. This is equivalent to `::` or `0::1`. begin, end, and strides
-receive 0, 0, and 1, respectively. The appropriate bits in `begin_mask` and
-`end_mask` are also set.
-
-*Requirements*:
-  `0 != strides[i] for i in [0, m)`
-  `ellipsis_mask must be a power of two (only one ellipsis)`
-
-begin: `begin[k]` specifies the offset into the `k`th range specification.
-  The exact dimension this corresponds to will be determined by context.
-  Out-of-bounds values will be silently clamped. If the `k`th bit of
-  `begin_mask` then `begin[k]` is ignored and the full range of the
-  appropriate dimension is used instead. Negative values causes indexing
-  to start from the highest element e.g. If `foo==[1,2,3]` then `foo[-1]==3`.
-end: `end[i]` is like `begin` with the exception that `end_mask` is
-  used to determine full ranges.
-strides: `strides[i]` specifies the increment in the `i`th specification
-  after extracting a given element. Negative indices will reverse
-  the original order. Out or range values are
-  clamped to `[0,dim[i]) if slice[i]>0` or `[-1,dim[i]-1] if slice[i] < 0`
-begin_mask: a bitmask where a bit i being 1 means to ignore the begin
-  value and instead use the largest interval possible. At runtime
-  begin[i] will be replaced with `[0, n-1) if `stride[i] > 0` or
-  `[-1, n-1]` if `stride[i] < 0`
-end_mask: analogous to `begin_mask`
-ellipsis_mask: a bitmask where bit `i` being 1 means the `i`th
-  position is actually an ellipsis. One bit at most can be 1.
-  If `ellipsis_mask == 0`, then an implicit ellipsis mask of `1 << (m+1)`
-  is provided. This means that `foo[3:5] == foo[3:5, ...]`. An ellipsis
-  implicitly creates as many range specifications as necessary to fully
-  specify the sliced range for every dimension. For example for a 4-dimensional
-  tensor `foo` the slice `foo[2, ..., 5:8]` implies `foo[2, :, :, 5:8]`.
-new_axis_mask: a bitmask where bit `i` being 1 means the `i`th
-  specification creates a new shape 1 dimension. For example
-  `foo[:4, tf.newaxis, :2]` would produce a shape `(4, 1, 2)` tensor.
-shrink_axis_mask: a bitmask where bit `i` implies that the `i`th
-  specification should shrink the dimensionality. begin and end
-  must imply a slice of size 1 in the dimension. For example in
-  python one might do `foo[:, 3, :]` which would result in
-  `shrink_axis_mask` being 2.
-)doc");
+    });
 
 REGISTER_OP("StridedSliceGrad")
     .Input("shape: Index")
@@ -2667,19 +1488,7 @@ REGISTER_OP("StridedSliceGrad")
       TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &out));
       c->set_output(0, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Returns the gradient of `StridedSlice`.
-
-Since `StridedSlice` cuts out pieces of its `input` which is size
-`shape`, its gradient will have the same shape (which is passed here
-as `shape`). The gradient will be zero in any element that the slice
-does not select.
-
-Arguments are the same as StridedSliceGrad with the exception that
-`dy` is the input gradient to be propagated and `shape` is the
-shape of `StridedSlice`'s `input`.
-)doc");
+    });
 
 REGISTER_OP("StridedSliceAssign")
     .Input("ref: Ref(T)")
@@ -2695,18 +1504,7 @@ REGISTER_OP("StridedSliceAssign")
     .Attr("ellipsis_mask: int = 0")
     .Attr("new_axis_mask: int = 0")
     .Attr("shrink_axis_mask: int = 0")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Assign `value` to the sliced l-value reference of `ref`.
-
-The values of `value` are assigned to the positions in the variable
-`ref` that are selected by the slice parameters. The slice parameters
-`begin, `end`, `strides`, etc. work exactly as in `StridedSlice`.
-
-NOTE this op currently does not support broadcasting and so `value`'s
-shape must be exactly the shape produced by the slice of `ref`.
-
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 // TODO(aselle): Fix this documentation once StridedSliceAssign Supports
 // broadcasting.
 // --------------------------------------------------------------------------
@@ -2724,18 +1522,7 @@ REGISTER_OP("ResourceStridedSliceAssign")
     .Attr("ellipsis_mask: int = 0")
     .Attr("new_axis_mask: int = 0")
     .Attr("shrink_axis_mask: int = 0")
-    .SetShapeFn(shape_inference::NoOutputs)
-    .Doc(R"doc(
-Assign `value` to the sliced l-value reference of `ref`.
-
-The values of `value` are assigned to the positions in the variable
-`ref` that are selected by the slice parameters. The slice parameters
-`begin, `end`, `strides`, etc. work exactly as in `StridedSlice`.
-
-NOTE this op currently does not support broadcasting and so `value`'s
-shape must be exactly the shape produced by the slice of `ref`.
-
-)doc");
+    .SetShapeFn(shape_inference::NoOutputs);
 
 REGISTER_OP("Tile")
     .Input("input: T")
@@ -2767,19 +1554,7 @@ REGISTER_OP("Tile")
       }
       c->set_output(0, c->MakeShape(dims));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Constructs a tensor by tiling a given tensor.
-
-This operation creates a new tensor by replicating `input` `multiples` times.
-The output tensor's i'th dimension has `input.dims(i) * multiples[i]` elements,
-and the values of `input` are replicated `multiples[i]` times along the 'i'th
-dimension. For example, tiling `[a b c d]` by `[2]` produces
-`[a b c d a b c d]`.
-
-input: 1-D or higher.
-multiples: 1-D. Length must be the same as the number of dimensions in `input`
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("TileGrad")
@@ -2788,14 +1563,7 @@ REGISTER_OP("TileGrad")
     .Output("output: T")
     .Attr("T: type")
     .Deprecated(3, "TileGrad has been replaced with reduce_sum")
-    .SetShapeFn(tensorflow::shape_inference::UnknownShape)
-    .Doc(R"doc(
-Returns the gradient of `Tile`.
-
-Since `Tile` takes an input and repeats the input `multiples` times
-along each dimension, `TileGrad` takes in `multiples` and aggregates
-each repeated tile of `input` into `output`.
-)doc");
+    .SetShapeFn(tensorflow::shape_inference::UnknownShape);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Where")
@@ -2805,71 +1573,7 @@ REGISTER_OP("Where")
     .SetShapeFn([](InferenceContext* c) {
       c->set_output(0, c->Matrix(c->UnknownDim(), c->Rank(c->input(0))));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Returns locations of nonzero / true values in a tensor.
-
-This operation returns the coordinates of true elements in `input`. The
-coordinates are returned in a 2-D tensor where the first dimension (rows)
-represents the number of true elements, and the second dimension (columns)
-represents the coordinates of the true elements. Keep in mind, the shape of
-the output tensor can vary depending on how many true values there are in
-`input`. Indices are output in row-major order.
-
-For example:
-
-```
-# 'input' tensor is [[True, False]
-#                    [True, False]]
-# 'input' has two true values, so output has two coordinates.
-# 'input' has rank of 2, so coordinates have two indices.
-where(input) ==> [[0, 0],
-                  [1, 0]]
-
-# `input` tensor is [[[True, False]
-#                     [True, False]]
-#                    [[False, True]
-#                     [False, True]]
-#                    [[False, False]
-#                     [False, True]]]
-# 'input' has 5 true values, so output has 5 coordinates.
-# 'input' has rank of 3, so coordinates have three indices.
-where(input) ==> [[0, 0, 0],
-                  [0, 1, 0],
-                  [1, 0, 1],
-                  [1, 1, 1],
-                  [2, 1, 1]]
-
-# `input` tensor is [[[1.5,  0.0]
-#                     [-0.5, 0.0]]
-#                    [[0.0,  0.25]
-#                     [0.0,  0.75]]
-#                    [[0.0,  0.0]
-#                     [0.0,  0.01]]]
-# 'input' has 5 nonzero values, so output has 5 coordinates.
-# 'input' has rank of 3, so coordinates have three indices.
-where(input) ==> [[0, 0, 0],
-                  [0, 1, 0],
-                  [1, 0, 1],
-                  [1, 1, 1],
-                  [2, 1, 1]]
-
-# `input` tensor is [[[1.5 + 0.0j, 0.0  + 0.0j]
-#                     [0.0 + 0.5j, 0.0  + 0.0j]]
-#                    [[0.0 + 0.0j, 0.25 + 1.5j]
-#                     [0.0 + 0.0j, 0.75 + 0.0j]]
-#                    [[0.0 + 0.0j, 0.0  + 0.0j]
-#                     [0.0 + 0.0j, 0.01 + 0.0j]]]
-# 'input' has 5 nonzero magnitude values, so output has 5 coordinates.
-# 'input' has rank of 3, so coordinates have three indices.
-where(input) ==> [[0, 0, 0],
-                  [0, 1, 0],
-                  [1, 0, 1],
-                  [1, 1, 1],
-                  [2, 1, 1]]
-```
-
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("BroadcastArgs")
@@ -2896,13 +1600,7 @@ REGISTER_OP("BroadcastArgs")
       // Broadcasted shape is going to be as large as the largest dimension.
       c->set_output(0, c->Vector(std::max(x_dim, y_dim)));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Return the shape of s0 op s1 with broadcast.
-
-Given `s0` and `s1`, tensors that represent shapes, compute `r0`, the
-broadcasted shape. `s0`, `s1` and `r0` are all integer vectors.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("BroadcastGradientArgs")
@@ -2919,12 +1617,7 @@ REGISTER_OP("BroadcastGradientArgs")
       c->set_output(0, c->Vector(InferenceContext::kUnknownDim));
       c->set_output(1, c->Vector(InferenceContext::kUnknownDim));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Return the reduction indices for computing gradients of s0 op s1 with broadcast.
-
-This is typically used by gradient computations for a broadcasting operation.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Pad")
@@ -2933,34 +1626,7 @@ REGISTER_OP("Pad")
     .Output("output: T")
     .Attr("T: type")
     .Attr("Tpaddings: {int32, int64} = DT_INT32")
-    .SetShapeFn(PadShapeFn)
-    .Doc(R"doc(
-Pads a tensor with zeros.
-
-This operation pads a `input` with zeros according to the `paddings` you
-specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is the
-rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates
-how many zeros to add before the contents of `input` in that dimension, and
-`paddings[D, 1]` indicates how many zeros to add after the contents of `input`
-in that dimension.
-
-The padded size of each dimension D of the output is:
-
-`paddings(D, 0) + input.dim_size(D) + paddings(D, 1)`
-
-For example:
-
-```
-# 't' is [[1, 1], [2, 2]]
-# 'paddings' is [[1, 1], [2, 2]]
-# rank of 't' is 2
-pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0]
-                      [0, 0, 1, 1, 0, 0]
-                      [0, 0, 2, 2, 0, 0]
-                      [0, 0, 0, 0, 0, 0]]
-```
-
-)doc");
+    .SetShapeFn(PadShapeFn);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("PadV2")
@@ -2970,36 +1636,7 @@ REGISTER_OP("PadV2")
     .Output("output: T")
     .Attr("T: type")
     .Attr("Tpaddings: {int32, int64} = DT_INT32")
-    .SetShapeFn(PadShapeFn)
-    .Doc(R"doc(
-Pads a tensor.
-
-This operation pads `input` according to the `paddings` and `constant_values`
-you specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is
-the rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates
-how many padding values to add before the contents of `input` in that dimension,
-and `paddings[D, 1]` indicates how many padding values to add after the contents
-of `input` in that dimension. `constant_values` is a scalar tensor of the same
-type as `input` that indicates the value to use for padding `input`.
-
-The padded size of each dimension D of the output is:
-
-`paddings(D, 0) + input.dim_size(D) + paddings(D, 1)`
-
-For example:
-
-```
-# 't' is [[1, 1], [2, 2]]
-# 'paddings' is [[1, 1], [2, 2]]
-# 'constant_values' is 0
-# rank of 't' is 2
-pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0]
-                      [0, 0, 1, 1, 0, 0]
-                      [0, 0, 2, 2, 0, 0]
-                      [0, 0, 0, 0, 0, 0]]
-```
-
-)doc");
+    .SetShapeFn(PadShapeFn);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("MirrorPad")
@@ -3009,46 +1646,7 @@ REGISTER_OP("MirrorPad")
     .Attr("T: type")
     .Attr("Tpaddings: {int32, int64} = DT_INT32")
     .Attr(GetMirrorPadModeAttrString())
-    .SetShapeFn(PadShapeFn)
-    .Doc(R"doc(
-Pads a tensor with mirrored values.
-
-This operation pads a `input` with mirrored values according to the `paddings`
-you specify. `paddings` is an integer tensor with shape `[n, 2]`, where n is
-the rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates
-how many values to add before the contents of `input` in that dimension, and
-`paddings[D, 1]` indicates how many values to add after the contents of `input`
-in that dimension. Both `paddings[D, 0]` and `paddings[D, 1]` must be no greater
-than `input.dim_size(D)` (or `input.dim_size(D) - 1`) if `copy_border` is true
-(if false, respectively).
-
-The padded size of each dimension D of the output is:
-
-`paddings(D, 0) + input.dim_size(D) + paddings(D, 1)`
-
-For example:
-
-```
-# 't' is [[1, 2, 3], [4, 5, 6]].
-# 'paddings' is [[1, 1]], [2, 2]].
-# 'mode' is SYMMETRIC.
-# rank of 't' is 2.
-pad(t, paddings) ==> [[2, 1, 1, 2, 3, 3, 2]
-                      [2, 1, 1, 2, 3, 3, 2]
-                      [5, 4, 4, 5, 6, 6, 5]
-                      [5, 4, 4, 5, 6, 6, 5]]
-```
-
-input: The input tensor to be padded.
-paddings: A two-column matrix specifying the padding sizes. The number of
-  rows must be the same as the rank of `input`.
-mode: Either `REFLECT` or `SYMMETRIC`. In reflect mode the padded regions
-  do not include the borders, while in symmetric mode the padded regions
-  do include the borders. For example, if `input` is `[1, 2, 3]` and `paddings`
-  is `[0, 2]`, then the output is `[1, 2, 3, 2, 1]` in reflect mode, and
-  it is `[1, 2, 3, 3, 2]` in symmetric mode.
-output: The padded tensor.
-)doc");
+    .SetShapeFn(PadShapeFn);
 
 // --------------------------------------------------------------------------
 namespace {
@@ -3110,35 +1708,7 @@ REGISTER_OP("MirrorPadGrad")
       } else {
         return MirrorPadKnown<int64>(c, input, paddings_t, input_rank);
       }
-    })
-    .Doc(R"doc(
-Gradient op for `MirrorPad` op. This op folds a mirror-padded tensor.
-
-This operation folds the padded areas of `input` by `MirrorPad` according to the
-`paddings` you specify. `paddings` must be the same as `paddings` argument
-given to the corresponding `MirrorPad` op.
-
-The folded size of each dimension D of the output is:
-
-`input.dim_size(D) - paddings(D, 0) - paddings(D, 1)`
-
-For example:
-
-```
-# 't' is [[1, 2, 3], [4, 5, 6], [7, 8, 9]].
-# 'paddings' is [[0, 1]], [0, 1]].
-# 'mode' is SYMMETRIC.
-# rank of 't' is 2.
-pad(t, paddings) ==> [[ 1,  5]
-                      [11, 28]]
-```
-
-input: The input tensor to be folded.
-paddings: A two-column matrix specifying the padding sizes. The number of
-  rows must be the same as the rank of `input`.
-mode: The mode used in the `MirrorPad` op.
-output: The folded tensor.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Placeholder")
@@ -3160,19 +1730,7 @@ REGISTER_OP("Placeholder")
       TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(shape, &out));
       c->set_output(0, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-A placeholder op for a value that will be fed into the computation.
-
-N.B. This operation will fail with an error if it is executed. It is
-intended as a way to represent a value that will always be fed, and to
-provide attrs that enable the fed value to be checked at runtime.
-
-output: A placeholder tensor that must be replaced using the feed mechanism.
-dtype: The type of elements in the tensor.
-shape: (Optional) The shape of the tensor. If the shape has 0 dimensions, the
-  shape is unconstrained.
-)doc");
+    });
 
 // Placeholder was modified in a backwards compatible way to do what
 // PlaceholderV2 did, so we have deprecated V2 (no one was really
@@ -3182,19 +1740,7 @@ REGISTER_OP("PlaceholderV2")
     .Attr("dtype: type")
     .Attr("shape: shape")
     .SetShapeFn(shape_inference::ExplicitShape)
-    .Deprecated(23, "Placeholder now behaves the same as PlaceholderV2.")
-    .Doc(R"doc(
-A placeholder op for a value that will be fed into the computation.
-
-N.B. This operation will fail with an error if it is executed. It is
-intended as a way to represent a value that will always be fed, and to
-provide attrs that enable the fed value to be checked at runtime.
-
-output: A placeholder tensor that must be replaced using the feed mechanism.
-dtype: The type of elements in the tensor.
-shape: The shape of the tensor. The shape can be any partially-specified
-   shape.  To be unconstrained, pass in a shape with unknown rank.
-)doc");
+    .Deprecated(23, "Placeholder now behaves the same as PlaceholderV2.");
 
 // --------------------------------------------------------------------------
 REGISTER_OP("PlaceholderWithDefault")
@@ -3215,15 +1761,7 @@ REGISTER_OP("PlaceholderWithDefault")
       TF_RETURN_IF_ERROR(c->Merge(input, out, &unused));
       c->set_output(0, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-A placeholder op that passes through `input` when its output is not fed.
-
-input: The default value to produce when `output` is not fed.
-output: A placeholder tensor that defaults to `input` if it is not fed.
-dtype: The type of elements in the tensor.
-shape: The (possibly partial) shape of the tensor.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ExpandDims")
@@ -3263,57 +1801,17 @@ REGISTER_OP("ExpandDims")
         dim += rank + 1;
       }
 
-      ShapeHandle end;
-      TF_RETURN_IF_ERROR(c->Subshape(input, dim, &end));
-
-      // Build output as start + 1 + end.
-      ShapeHandle output;
-      TF_RETURN_IF_ERROR(c->Subshape(input, 0, dim, &output));
-      TF_RETURN_IF_ERROR(c->Concatenate(output, c->Vector(1), &output));
-      TF_RETURN_IF_ERROR(c->Concatenate(output, end, &output));
-      c->set_output(0, output);
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Inserts a dimension of 1 into a tensor's shape.
-
-Given a tensor `input`, this operation inserts a dimension of 1 at the
-dimension index `dim` of `input`'s shape. The dimension index `dim` starts at
-zero; if you specify a negative number for `dim` it is counted backward from
-the end.
-
-This operation is useful if you want to add a batch dimension to a single
-element. For example, if you have a single image of shape `[height, width,
-channels]`, you can make it a batch of 1 image with `expand_dims(image, 0)`,
-which will make the shape `[1, height, width, channels]`.
-
-Other examples:
-
-```
-# 't' is a tensor of shape [2]
-shape(expand_dims(t, 0)) ==> [1, 2]
-shape(expand_dims(t, 1)) ==> [2, 1]
-shape(expand_dims(t, -1)) ==> [2, 1]
-
-# 't2' is a tensor of shape [2, 3, 5]
-shape(expand_dims(t2, 0)) ==> [1, 2, 3, 5]
-shape(expand_dims(t2, 2)) ==> [2, 3, 1, 5]
-shape(expand_dims(t2, 3)) ==> [2, 3, 5, 1]
-```
-
-This operation requires that:
-
-`-1-input.dims() <= dim <= input.dims()`
-
-This operation is related to `squeeze()`, which removes dimensions of
-size 1.
+      ShapeHandle end;
+      TF_RETURN_IF_ERROR(c->Subshape(input, dim, &end));
 
-dim: 0-D (scalar). Specifies the dimension index at which to
-  expand the shape of `input`. Must be in the range
-  `[-rank(input) - 1, rank(input)]`.
-output: Contains the same data as `input`, but its shape has an additional
-  dimension of size 1 added.
-)doc");
+      // Build output as start + 1 + end.
+      ShapeHandle output;
+      TF_RETURN_IF_ERROR(c->Subshape(input, 0, dim, &output));
+      TF_RETURN_IF_ERROR(c->Concatenate(output, c->Vector(1), &output));
+      TF_RETURN_IF_ERROR(c->Concatenate(output, end, &output));
+      c->set_output(0, output);
+      return Status::OK();
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Squeeze")
@@ -3381,36 +1879,7 @@ REGISTER_OP("Squeeze")
 
       c->set_output(0, c->MakeShape(result_shape));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Removes dimensions of size 1 from the shape of a tensor.
-
-Given a tensor `input`, this operation returns a tensor of the same type with
-all dimensions of size 1 removed. If you don't want to remove all size 1
-dimensions, you can remove specific size 1 dimensions by specifying
-`squeeze_dims`.
-
-For example:
-
-```
-# 't' is a tensor of shape [1, 2, 1, 3, 1, 1]
-shape(squeeze(t)) ==> [2, 3]
-```
-
-Or, to remove specific size 1 dimensions:
-
-```
-# 't' is a tensor of shape [1, 2, 1, 3, 1, 1]
-shape(squeeze(t, [2, 4])) ==> [1, 2, 3, 1]
-```
-
-input: The `input` to squeeze.
-squeeze_dims: If specified, only squeezes the dimensions listed. The dimension
-  index starts at 0. It is an error to squeeze a dimension that is not 1. Must
-  be in the range `[-rank(input), rank(input))`.
-output: Contains the same data as `input`, but has one or more dimensions of
-  size 1 removed.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ListDiff")
@@ -3429,37 +1898,7 @@ REGISTER_OP("ListDiff")
       c->set_output(0, out);
       c->set_output(1, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes the difference between two lists of numbers or strings.
-
-Given a list `x` and a list `y`, this operation returns a list `out` that
-represents all values that are in `x` but not in `y`. The returned list `out`
-is sorted in the same order that the numbers appear in `x` (duplicates are
-preserved). This operation also returns a list `idx` that represents the
-position of each `out` element in `x`. In other words:
-
-`out[i] = x[idx[i]] for i in [0, 1, ..., len(out) - 1]`
-
-For example, given this input:
-
-```
-x = [1, 2, 3, 4, 5, 6]
-y = [1, 3, 5]
-```
-
-This operation would return:
-
-```
-out ==> [2, 4, 6]
-idx ==> [1, 3, 5]
-```
-
-x: 1-D. Values to keep.
-y: 1-D. Values to remove.
-out: 1-D. Values present in `x` but not in `y`.
-idx: 1-D. Positions of `x` values preserved in `out`.
-)doc");
+    });
 
 namespace {
 
@@ -3647,133 +2086,7 @@ REGISTER_OP("SpaceToBatchND")
       return SpaceToBatchShapeHelper(c, c->input(0), c->input(1),
                                      c->input_tensor(1), c->input(2),
                                      c->input_tensor(2));
-    })
-    .Doc(R"doc(
-SpaceToBatch for N-D tensors of type T.
-
-This operation divides "spatial" dimensions `[1, ..., M]` of the input into a
-grid of blocks of shape `block_shape`, and interleaves these blocks with the
-"batch" dimension (0) such that in the output, the spatial dimensions
-`[1, ..., M]` correspond to the position within the grid, and the batch
-dimension combines both the position within a spatial block and the original
-batch position.  Prior to division into blocks, the spatial dimensions of the
-input are optionally zero padded according to `paddings`.  See below for a
-precise description.
-
-input: N-D with shape `input_shape = [batch] + spatial_shape + remaining_shape`,
-  where spatial_shape has `M` dimensions.
-
-block_shape: 1-D with shape `[M]`, all values must be >= 1.
-
-paddings: 2-D with shape `[M, 2]`, all values must be >= 0.
-  `paddings[i] = [pad_start, pad_end]` specifies the padding for input dimension
-  `i + 1`, which corresponds to spatial dimension `i`.  It is required that
-  `block_shape[i]` divides `input_shape[i + 1] + pad_start + pad_end`.
-
-This operation is equivalent to the following steps:
-
-1. Zero-pad the start and end of dimensions `[1, ..., M]` of the
-   input according to `paddings` to produce `padded` of shape `padded_shape`.
-
-2. Reshape `padded` to `reshaped_padded` of shape:
-
-     [batch] +
-     [padded_shape[1] / block_shape[0],
-       block_shape[0],
-      ...,
-      padded_shape[M] / block_shape[M-1],
-      block_shape[M-1]] +
-     remaining_shape
-
-3. Permute dimensions of `reshaped_padded` to produce
-   `permuted_reshaped_padded` of shape:
-
-     block_shape +
-     [batch] +
-     [padded_shape[1] / block_shape[0],
-      ...,
-      padded_shape[M] / block_shape[M-1]] +
-     remaining_shape
-
-4. Reshape `permuted_reshaped_padded` to flatten `block_shape` into the batch
-   dimension, producing an output tensor of shape:
-
-     [batch * prod(block_shape)] +
-     [padded_shape[1] / block_shape[0],
-      ...,
-      padded_shape[M] / block_shape[M-1]] +
-     remaining_shape
-
-Some examples:
-
-(1) For the following input of shape `[1, 2, 2, 1]`, `block_shape = [2, 2]`, and
-    `paddings = [[0, 0], [0, 0]]`:
-
-```
-x = [[[[1], [2]], [[3], [4]]]]
-```
-
-The output tensor has shape `[4, 1, 1, 1]` and value:
-
-```
-[[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
-```
-
-(2) For the following input of shape `[1, 2, 2, 3]`, `block_shape = [2, 2]`, and
-    `paddings = [[0, 0], [0, 0]]`:
-
-```
-x = [[[[1, 2, 3], [4, 5, 6]],
-      [[7, 8, 9], [10, 11, 12]]]]
-```
-
-The output tensor has shape `[4, 1, 1, 3]` and value:
-
-```
-[[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]
-```
-
-(3) For the following input of shape `[1, 4, 4, 1]`, `block_shape = [2, 2]`, and
-    `paddings = [[0, 0], [0, 0]]`:
-
-```
-x = [[[[1],   [2],  [3],  [4]],
-      [[5],   [6],  [7],  [8]],
-      [[9],  [10], [11],  [12]],
-      [[13], [14], [15],  [16]]]]
-```
-
-The output tensor has shape `[4, 2, 2, 1]` and value:
-
-```
-x = [[[[1], [3]], [[9], [11]]],
-     [[[2], [4]], [[10], [12]]],
-     [[[5], [7]], [[13], [15]]],
-     [[[6], [8]], [[14], [16]]]]
-```
-
-(4) For the following input of shape `[2, 2, 4, 1]`, block_shape = `[2, 2]`, and
-    paddings = `[[0, 0], [2, 0]]`:
-
-```
-x = [[[[1],   [2],  [3],  [4]],
-      [[5],   [6],  [7],  [8]]],
-     [[[9],  [10], [11],  [12]],
-      [[13], [14], [15],  [16]]]]
-```
-
-The output tensor has shape `[8, 1, 3, 1]` and value:
-
-```
-x = [[[[0], [1], [3]]], [[[0], [9], [11]]],
-     [[[0], [2], [4]]], [[[0], [10], [12]]],
-     [[[0], [5], [7]]], [[[0], [13], [15]]],
-     [[[0], [6], [8]]], [[[0], [14], [16]]]]
-```
-
-Among others, this operation is useful for reducing atrous convolution into
-regular convolution.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("SpaceToBatch")
@@ -3798,106 +2111,7 @@ REGISTER_OP("SpaceToBatch")
       return SpaceToBatchShapeHelper(c, input_shape, c->MakeShape({2}),
                                      &block_shape, c->input(1),
                                      c->input_tensor(1));
-    })
-    .Doc(R"doc(
-SpaceToBatch for 4-D tensors of type T.
-
-This is a legacy version of the more general SpaceToBatchND.
-
-Zero-pads and then rearranges (permutes) blocks of spatial data into batch.
-More specifically, this op outputs a copy of the input tensor where values from
-the `height` and `width` dimensions are moved to the `batch` dimension. After
-the zero-padding, both `height` and `width` of the input must be divisible by the
-block size.
-
-input: 4-D with shape `[batch, height, width, depth]`.
-
-paddings: 2-D tensor of non-negative integers with shape `[2, 2]`. It specifies
-  the padding of the input with zeros across the spatial dimensions as follows:
-
-      paddings = [[pad_top, pad_bottom], [pad_left, pad_right]]
-
-  The effective spatial dimensions of the zero-padded input tensor will be:
-
-      height_pad = pad_top + height + pad_bottom
-      width_pad = pad_left + width + pad_right
-
-The attr `block_size` must be greater than one. It indicates the block size.
-
-  * Non-overlapping blocks of size `block_size x block size` in the height and
-    width dimensions are rearranged into the batch dimension at each location.
-  * The batch of the output tensor is `batch * block_size * block_size`.
-  * Both height_pad and width_pad must be divisible by block_size.
-
-The shape of the output will be:
-
-    [batch*block_size*block_size, height_pad/block_size, width_pad/block_size,
-     depth]
-
-Some examples:
-
-(1) For the following input of shape `[1, 2, 2, 1]` and block_size of 2:
-
-```
-x = [[[[1], [2]], [[3], [4]]]]
-```
-
-The output tensor has shape `[4, 1, 1, 1]` and value:
-
-```
-[[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
-```
-
-(2) For the following input of shape `[1, 2, 2, 3]` and block_size of 2:
-
-```
-x = [[[[1, 2, 3], [4, 5, 6]],
-      [[7, 8, 9], [10, 11, 12]]]]
-```
-
-The output tensor has shape `[4, 1, 1, 3]` and value:
-
-```
-[[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]
-```
-
-(3) For the following input of shape `[1, 4, 4, 1]` and block_size of 2:
-
-```
-x = [[[[1],   [2],  [3],  [4]],
-      [[5],   [6],  [7],  [8]],
-      [[9],  [10], [11],  [12]],
-      [[13], [14], [15],  [16]]]]
-```
-
-The output tensor has shape `[4, 2, 2, 1]` and value:
-
-```
-x = [[[[1], [3]], [[9], [11]]],
-     [[[2], [4]], [[10], [12]]],
-     [[[5], [7]], [[13], [15]]],
-     [[[6], [8]], [[14], [16]]]]
-```
-
-(4) For the following input of shape `[2, 2, 4, 1]` and block_size of 2:
-
-```
-x = [[[[1],   [2],  [3],  [4]],
-      [[5],   [6],  [7],  [8]]],
-     [[[9],  [10], [11],  [12]],
-      [[13], [14], [15],  [16]]]]
-```
-
-The output tensor has shape `[8, 1, 2, 1]` and value:
-
-```
-x = [[[[1], [3]]], [[[9], [11]]], [[[2], [4]]], [[[10], [12]]],
-     [[[5], [7]]], [[[13], [15]]], [[[6], [8]]], [[[14], [16]]]]
-```
-
-Among others, this operation is useful for reducing atrous convolution into
-regular convolution.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("BatchToSpaceND")
@@ -3912,132 +2126,7 @@ REGISTER_OP("BatchToSpaceND")
       return BatchToSpaceShapeHelper(c, c->input(0), c->input(1),
                                      c->input_tensor(1), c->input(2),
                                      c->input_tensor(2));
-    })
-    .Doc(R"doc(
-BatchToSpace for N-D tensors of type T.
-
-This operation reshapes the "batch" dimension 0 into `M + 1` dimensions of shape
-`block_shape + [batch]`, interleaves these blocks back into the grid defined by
-the spatial dimensions `[1, ..., M]`, to obtain a result with the same rank as
-the input.  The spatial dimensions of this intermediate result are then
-optionally cropped according to `crops` to produce the output.  This is the
-reverse of SpaceToBatch.  See below for a precise description.
-
-input: N-D with shape `input_shape = [batch] + spatial_shape + remaining_shape`,
-  where spatial_shape has M dimensions.
-
-block_shape: 1-D with shape `[M]`, all values must be >= 1.
-
-crops: 2-D with shape `[M, 2]`, all values must be >= 0.
-  `crops[i] = [crop_start, crop_end]` specifies the amount to crop from input
-  dimension `i + 1`, which corresponds to spatial dimension `i`.  It is
-  required that
-  `crop_start[i] + crop_end[i] <= block_shape[i] * input_shape[i + 1]`.
-
-This operation is equivalent to the following steps:
-
-1. Reshape `input` to `reshaped` of shape:
-     [block_shape[0], ..., block_shape[M-1],
-      batch / prod(block_shape),
-      input_shape[1], ..., input_shape[N-1]]
-
-2. Permute dimensions of `reshaped` to produce `permuted` of shape
-     [batch / prod(block_shape),
-
-      input_shape[1], block_shape[0],
-      ...,
-      input_shape[M], block_shape[M-1],
-
-      input_shape[M+1], ..., input_shape[N-1]]
-
-3. Reshape `permuted` to produce `reshaped_permuted` of shape
-     [batch / prod(block_shape),
-
-      input_shape[1] * block_shape[0],
-      ...,
-      input_shape[M] * block_shape[M-1],
-
-      input_shape[M+1],
-      ...,
-      input_shape[N-1]]
-
-4. Crop the start and end of dimensions `[1, ..., M]` of
-   `reshaped_permuted` according to `crops` to produce the output of shape:
-     [batch / prod(block_shape),
-
-      input_shape[1] * block_shape[0] - crops[0,0] - crops[0,1],
-      ...,
-      input_shape[M] * block_shape[M-1] - crops[M-1,0] - crops[M-1,1],
-
-      input_shape[M+1], ..., input_shape[N-1]]
-
-Some examples:
-
-(1) For the following input of shape `[4, 1, 1, 1]`, `block_shape = [2, 2]`, and
-    `crops = [[0, 0], [0, 0]]`:
-
-```
-[[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
-```
-
-The output tensor has shape `[1, 2, 2, 1]` and value:
-
-```
-x = [[[[1], [2]], [[3], [4]]]]
-```
-
-(2) For the following input of shape `[4, 1, 1, 3]`, `block_shape = [2, 2]`, and
-    `crops = [[0, 0], [0, 0]]`:
-
-```
-[[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]
-```
-
-The output tensor has shape `[1, 2, 2, 3]` and value:
-
-```
-x = [[[[1, 2, 3], [4, 5, 6]],
-      [[7, 8, 9], [10, 11, 12]]]]
-```
-
-(3) For the following input of shape `[4, 2, 2, 1]`, `block_shape = [2, 2]`, and
-    `crops = [[0, 0], [0, 0]]`:
-
-```
-x = [[[[1], [3]], [[9], [11]]],
-     [[[2], [4]], [[10], [12]]],
-     [[[5], [7]], [[13], [15]]],
-     [[[6], [8]], [[14], [16]]]]
-```
-
-The output tensor has shape `[1, 4, 4, 1]` and value:
-
-```
-x = [[[1],   [2],  [3],  [4]],
-     [[5],   [6],  [7],  [8]],
-     [[9],  [10], [11],  [12]],
-     [[13], [14], [15],  [16]]]
-```
-
-(4) For the following input of shape `[8, 1, 3, 1]`, `block_shape = [2, 2]`, and
-    `crops = [[0, 0], [2, 0]]`:
-
-```
-x = [[[[0], [1], [3]]], [[[0], [9], [11]]],
-     [[[0], [2], [4]]], [[[0], [10], [12]]],
-     [[[0], [5], [7]]], [[[0], [13], [15]]],
-     [[[0], [6], [8]]], [[[0], [14], [16]]]]
-```
-
-The output tensor has shape `[2, 2, 4, 1]` and value:
-
-```
-x = [[[[1],   [2],  [3],  [4]],
-      [[5],   [6],  [7],  [8]]],
-     [[[9],  [10], [11],  [12]],
-      [[13], [14], [15],  [16]]]]
-```
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("BatchToSpace")
@@ -4062,97 +2151,7 @@ REGISTER_OP("BatchToSpace")
       return BatchToSpaceShapeHelper(c, input_shape, c->MakeShape({2}),
                                      &block_shape, c->input(1),
                                      c->input_tensor(1));
-    })
-    .Doc(R"doc(
-BatchToSpace for 4-D tensors of type T.
-
-This is a legacy version of the more general BatchToSpaceND.
-
-Rearranges (permutes) data from batch into blocks of spatial data, followed by
-cropping. This is the reverse transformation of SpaceToBatch. More specifically,
-this op outputs a copy of the input tensor where values from the `batch`
-dimension are moved in spatial blocks to the `height` and `width` dimensions,
-followed by cropping along the `height` and `width` dimensions.
-
-input: 4-D tensor with shape
- `[batch*block_size*block_size, height_pad/block_size, width_pad/block_size,
-   depth]`. Note that the batch size of the input tensor must be divisible by
- `block_size * block_size`.
-
-crops: 2-D tensor of non-negative integers with shape `[2, 2]`. It specifies
-  how many elements to crop from the intermediate result across the spatial
-  dimensions as follows:
-
-      crops = [[crop_top, crop_bottom], [crop_left, crop_right]]
-
-output: 4-D with shape `[batch, height, width, depth]`, where:
-
-      height = height_pad - crop_top - crop_bottom
-      width = width_pad - crop_left - crop_right
-
-The attr `block_size` must be greater than one. It indicates the block size.
-
-Some examples:
-
-(1) For the following input of shape `[4, 1, 1, 1]` and block_size of 2:
-
-```
-[[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
-```
-
-The output tensor has shape `[1, 2, 2, 1]` and value:
-
-```
-x = [[[[1], [2]], [[3], [4]]]]
-```
-
-(2) For the following input of shape `[4, 1, 1, 3]` and block_size of 2:
-
-```
-[[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]
-```
-
-The output tensor has shape `[1, 2, 2, 3]` and value:
-
-```
-x = [[[[1, 2, 3], [4, 5, 6]],
-      [[7, 8, 9], [10, 11, 12]]]]
-```
-
-(3) For the following input of shape `[4, 2, 2, 1]` and block_size of 2:
-
-```
-x = [[[[1], [3]], [[9], [11]]],
-     [[[2], [4]], [[10], [12]]],
-     [[[5], [7]], [[13], [15]]],
-     [[[6], [8]], [[14], [16]]]]
-```
-
-The output tensor has shape `[1, 4, 4, 1]` and value:
-
-```
-x = [[[1],   [2],  [3],  [4]],
-     [[5],   [6],  [7],  [8]],
-     [[9],  [10], [11],  [12]],
-     [[13], [14], [15],  [16]]]
-```
-
-(4) For the following input of shape `[8, 1, 2, 1]` and block_size of 2:
-
-```
-x = [[[[1], [3]]], [[[9], [11]]], [[[2], [4]]], [[[10], [12]]],
-     [[[5], [7]]], [[[13], [15]]], [[[6], [8]]], [[[14], [16]]]]
-```
-
-The output tensor has shape `[2, 2, 4, 1]` and value:
-
-```
-x = [[[[1], [3]], [[5], [7]]],
-     [[[2], [4]], [[10], [12]]],
-     [[[5], [7]], [[13], [15]]],
-     [[[6], [8]], [[14], [16]]]]
-```
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("SpaceToDepth")
@@ -4206,96 +2205,7 @@ REGISTER_OP("SpaceToDepth")
 
       c->set_output(0, output_shape);
       return Status::OK();
-    })
-    .Doc(R"doc(
-SpaceToDepth for tensors of type T.
-
-Rearranges blocks of spatial data, into depth. More specifically,
-this op outputs a copy of the input tensor where values from the `height`
-and `width` dimensions are moved to the `depth` dimension.
-The attr `block_size` indicates the input block size.
-
-  * Non-overlapping blocks of size `block_size x block size` are rearranged
-    into depth at each location.
-  * The depth of the output tensor is `block_size * block_size * input_depth`.
-  * The Y, X coordinates within each block of the input become the high order
-    component of the output channel index.
-  * The input tensor's height and width must be divisible by block_size.
-
-The `data_format` attr specifies the layout of the input and output tensors
-with the following options:
-  "NHWC": `[ batch, height, width, channels ]`
-  "NCHW": `[ batch, channels, height, width ]`
-  "NCHW_VECT_C":
-      `qint8 [ batch, channels / 4, height, width, channels % 4 ]`
-
-It is useful to consider the operation as transforming a 6-D Tensor.
-e.g. for data_format = NHWC,
-     Each element in the input tensor can be specified via 6 coordinates,
-     ordered by decreasing memory layout significance as:
-     n,oY,bY,oX,bX,iC  (where n=batch index, oX, oY means X or Y coordinates
-                        within the output image, bX, bY means coordinates
-                        within the input block, iC means input channels).
-     The output would be a transpose to the following layout:
-     n,oY,oX,bY,bX,iC
-
-This operation is useful for resizing the activations between convolutions
-(but keeping all data), e.g. instead of pooling. It is also useful for training
-purely convolutional models.
-
-For example, given an input of shape `[1, 2, 2, 1]`, data_format = "NHWC" and
-block_size = 2:
-
-```
-x = [[[[1], [2]],
-      [[3], [4]]]]
-```
-
-This operation will output a tensor of shape `[1, 1, 1, 4]`:
-
-```
-[[[[1, 2, 3, 4]]]]
-```
-
-Here, the input has a batch of 1 and each batch element has shape `[2, 2, 1]`,
-the corresponding output will have a single element (i.e. width and height are
-both 1) and will have a depth of 4 channels (1 * block_size * block_size).
-The output element shape is `[1, 1, 4]`.
-
-For an input tensor with larger depth, here of shape `[1, 2, 2, 3]`, e.g.
-
-```
-x = [[[[1, 2, 3], [4, 5, 6]],
-      [[7, 8, 9], [10, 11, 12]]]]
-```
-
-This operation, for block_size of 2, will return the following tensor of shape
-`[1, 1, 1, 12]`
-
-```
-[[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]]
-```
-
-Similarly, for the following input of shape `[1 4 4 1]`, and a block size of 2:
-
-```
-x = [[[[1],   [2],  [5],  [6]],
-      [[3],   [4],  [7],  [8]],
-      [[9],  [10], [13],  [14]],
-      [[11], [12], [15],  [16]]]]
-```
-
-the operator will return the following tensor of shape `[1 2 2 4]`:
-
-```
-x = [[[[1, 2, 3, 4],
-       [5, 6, 7, 8]],
-      [[9, 10, 11, 12],
-       [13, 14, 15, 16]]]]
-```
-
-block_size: The size of the spatial block.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("DepthToSpace")
@@ -4347,102 +2257,7 @@ REGISTER_OP("DepthToSpace")
 
       c->set_output(0, output_shape);
       return Status::OK();
-    })
-    .Doc(R"doc(
-DepthToSpace for tensors of type T.
-
-Rearranges data from depth into blocks of spatial data.
-This is the reverse transformation of SpaceToDepth. More specifically,
-this op outputs a copy of the input tensor where values from the `depth`
-dimension are moved in spatial blocks to the `height` and `width` dimensions.
-The attr `block_size` indicates the input block size and how the data is moved.
-
-  * Chunks of data of size `block_size * block_size` from depth are rearranged
-    into non-overlapping blocks of size `block_size x block_size`
-  * The width the output tensor is `input_depth * block_size`, whereas the
-    height is `input_height * block_size`.
-  * The Y, X coordinates within each block of the output image are determined
-    by the high order component of the input channel index.
-  * The depth of the input tensor must be divisible by
-    `block_size * block_size`.
-
-The `data_format` attr specifies the layout of the input and output tensors
-with the following options:
-  "NHWC": `[ batch, height, width, channels ]`
-  "NCHW": `[ batch, channels, height, width ]`
-  "NCHW_VECT_C":
-      `qint8 [ batch, channels / 4, height, width, channels % 4 ]`
-
-It is useful to consider the operation as transforming a 6-D Tensor.
-e.g. for data_format = NHWC,
-     Each element in the input tensor can be specified via 6 coordinates,
-     ordered by decreasing memory layout significance as:
-     n,iY,iX,bY,bX,oC  (where n=batch index, iX, iY means X or Y coordinates
-                        within the input image, bX, bY means coordinates
-                        within the output block, oC means output channels).
-     The output would be the input transposed to the following layout:
-     n,iY,bY,iX,bX,oC
-
-This operation is useful for resizing the activations between convolutions
-(but keeping all data), e.g. instead of pooling. It is also useful for training
-purely convolutional models.
-
-For example, given an input of shape `[1, 1, 1, 4]`, data_format = "NHWC" and
-block_size = 2:
-
-```
-x = [[[[1, 2, 3, 4]]]]
-
-```
-
-This operation will output a tensor of shape `[1, 2, 2, 1]`:
-
-```
-   [[[[1], [2]],
-     [[3], [4]]]]
-```
-
-Here, the input has a batch of 1 and each batch element has shape `[1, 1, 4]`,
-the corresponding output will have 2x2 elements and will have a depth of
-1 channel (1 = `4 / (block_size * block_size)`).
-The output element shape is `[2, 2, 1]`.
-
-For an input tensor with larger depth, here of shape `[1, 1, 1, 12]`, e.g.
-
-```
-x = [[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]]
-```
-
-This operation, for block size of 2, will return the following tensor of shape
-`[1, 2, 2, 3]`
-
-```
-   [[[[1, 2, 3], [4, 5, 6]],
-     [[7, 8, 9], [10, 11, 12]]]]
-
-```
-
-Similarly, for the following input of shape `[1 2 2 4]`, and a block size of 2:
-
-```
-x =  [[[[1, 2, 3, 4],
-       [5, 6, 7, 8]],
-      [[9, 10, 11, 12],
-       [13, 14, 15, 16]]]]
-```
-
-the operator will return the following tensor of shape `[1 4 4 1]`:
-
-```
-x = [[[ [1],   [2],  [5],  [6]],
-      [ [3],   [4],  [7],  [8]],
-      [ [9],  [10], [13],  [14]],
-      [ [11], [12], [15],  [16]]]]
-
-```
-
-block_size: The size of the spatial block, same as in Space2Depth.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 
@@ -4529,34 +2344,7 @@ REGISTER_OP("ExtractImagePatches")
           {batch_size_dim, output_rows, output_cols, output_depth_dim});
       c->set_output(0, output_shape);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Extract `patches` from `images` and put them in the "depth" output dimension.
-
-images: 4-D Tensor with shape `[batch, in_rows, in_cols, depth]`.
-patches: 4-D Tensor with shape `[batch, out_rows, out_cols, ksize_rows *
-  ksize_cols * depth]` containing image patches with size
-  `ksize_rows x ksize_cols x depth` vectorized in the "depth" dimension. Note
-  `out_rows` and `out_cols` are the dimensions of the output patches.
-ksizes: The size of the sliding window for each dimension of `images`.
-strides: 1-D of length 4. How far the centers of two consecutive patches are in
-  the images. Must be: `[1, stride_rows, stride_cols, 1]`.
-rates: 1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the
-  input stride, specifying how far two consecutive patch samples are in the
-  input. Equivalent to extracting patches with
-  `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by
-  subsampling them spatially by a factor of `rates`. This is equivalent to
-  `rate` in dilated (a.k.a. Atrous) convolutions.
-padding: The type of padding algorithm to use.
-
-We specify the size-related attributes as:
-
-```python
-      ksizes = [1, ksize_rows, ksize_cols, 1]
-      strides = [1, strides_rows, strides_cols, 1]
-      rates = [1, rates_rows, rates_cols, 1]
-```
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 
@@ -4565,12 +2353,12 @@ REGISTER_OP("Bitcast")
     .Output("output: type")
     // All supported dtypes are listed here to include qint16 and quint16.
     .Attr(
-        "T: {float, double, int64, int32, uint8, uint16, int8, int16,"
+        "T: {bfloat16, float, double, int64, int32, uint8, uint16, int8, int16,"
         " complex64, complex128, qint8, quint8, qint16, quint16, qint32,"
         " half}")
     .Attr(
-        "type: {float, double, int64, int32, uint8, uint16, int8, int16,"
-        " complex64, complex128, qint8, quint8, qint16, quint16, qint32,"
+        "type: {bfloat16, float, double, int64, int32, uint8, uint16, int8, "
+        "int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32,"
         " half}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle input = c->input(0);
@@ -4620,23 +2408,7 @@ REGISTER_OP("Bitcast")
 
       c->set_output(0, new_shape);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Bitcasts a tensor from one type to another without copying data.
-
-Given a tensor `input`, this operation returns a tensor that has the same buffer
-data as `input` with datatype `type`.
-
-If the input datatype `T` is larger than the output datatype `type` then the
-shape changes from [...] to [..., sizeof(`T`)/sizeof(`type`)].
-
-If `T` is smaller than `type`, the operator requires that the rightmost
-dimension be equal to sizeof(`type`)/sizeof(`T`). The shape then goes from
-[..., sizeof(`type`)/sizeof(`T`)] to [...].
-
-*NOTE*: Bitcast is implemented as a low-level cast, so machines with different
-endian orderings will give different results.
-)doc");
+    });
 
 REGISTER_OP("OneHot")
     .Input("indices: TI")
@@ -4672,106 +2444,7 @@ REGISTER_OP("OneHot")
       TF_RETURN_IF_ERROR(c->Concatenate(front, back, &out));
       c->set_output(0, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Returns a one-hot tensor.
-
-The locations represented by indices in `indices` take value `on_value`,
-while all other locations take value `off_value`.
-
-If the input `indices` is rank `N`, the output will have rank `N+1`,
-The new axis is created at dimension `axis` (default: the new axis is
-appended at the end).
-
-If `indices` is a scalar the output shape will be a vector of length `depth`.
-
-If `indices` is a vector of length `features`, the output shape will be:
-```
-  features x depth if axis == -1
-  depth x features if axis == 0
-```
-
-If `indices` is a matrix (batch) with shape `[batch, features]`,
-the output shape will be:
-```
-  batch x features x depth if axis == -1
-  batch x depth x features if axis == 1
-  depth x batch x features if axis == 0
-```
-
-
-Examples
-=========
-
-Suppose that
-
-```
-  indices = [0, 2, -1, 1]
-  depth = 3
-  on_value = 5.0
-  off_value = 0.0
-  axis = -1
-```
-
-Then output is `[4 x 3]`:
-
-    ```output =
-      [5.0 0.0 0.0]  // one_hot(0)
-      [0.0 0.0 5.0]  // one_hot(2)
-      [0.0 0.0 0.0]  // one_hot(-1)
-      [0.0 5.0 0.0]  // one_hot(1)
-    ```
-
-Suppose that
-
-```
-  indices = [0, 2, -1, 1]
-  depth = 3
-  on_value = 0.0
-  off_value = 3.0
-  axis = 0
-```
-
-Then output is `[3 x 4]`:
-
-    ```output =
-      [0.0 3.0 3.0 3.0]
-      [3.0 3.0 3.0 0.0]
-      [3.0 3.0 3.0 3.0]
-      [3.0 0.0 3.0 3.0]
-    //  ^                one_hot(0)
-    //      ^            one_hot(2)
-    //          ^        one_hot(-1)
-    //              ^    one_hot(1)
-    ```
-Suppose that
-
-```
-  indices = [[0, 2], [1, -1]]
-  depth = 3
-  on_value = 1.0
-  off_value = 0.0
-  axis = -1
-```
-
-Then output is `[2 x 2 x 3]`:
-
-    ```output =
-      [
-        [1.0, 0.0, 0.0]  // one_hot(0)
-        [0.0, 0.0, 1.0]  // one_hot(2)
-      ][
-        [0.0, 1.0, 0.0]  // one_hot(1)
-        [0.0, 0.0, 0.0]  // one_hot(-1)
-      ]```
-
-indices: A tensor of indices.
-depth: A scalar defining the depth of the one hot dimension.
-on_value: A scalar defining the value to fill in output when `indices[j] = i`.
-off_value: A scalar defining the value to fill in output when `indices[j] != i`.
-axis: The axis to fill (default: -1, a new inner-most axis).
-output: The one-hot tensor.
-)doc");
+    });
 
 // EXPERIMENTAL. DO NOT USE OR DEPEND ON THIS YET.
 REGISTER_OP("QuantizeAndDequantize")
@@ -4782,12 +2455,9 @@ REGISTER_OP("QuantizeAndDequantize")
     .Attr("input_min: float = 0")
     .Attr("input_max: float = 0")
     .Output("output: T")
-    .Attr("T: {float, double}")
+    .Attr("T: {bfloat16, float, double}")
     .SetShapeFn(shape_inference::UnchangedShape)
-    .Deprecated(22, "Replaced by QuantizeAndDequantizeV2")
-    .Doc(R"doc(
-Use QuantizeAndDequantizeV2 instead.
-)doc");
+    .Deprecated(22, "Replaced by QuantizeAndDequantizeV2");
 
 // TODO(suharshs): Deprecate QuantizeAndDequantizeV2.
 REGISTER_OP("QuantizeAndDequantizeV2")
@@ -4798,76 +2468,14 @@ REGISTER_OP("QuantizeAndDequantizeV2")
     .Attr("num_bits: int = 8")
     .Attr("range_given: bool = false")
     .Output("output: T")
-    .Attr("T: {float, double}")
+    .Attr("T: {bfloat16, float, double}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle unused;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
       c->set_output(0, c->input(0));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Quantizes then dequantizes a tensor.
-
-This op simulates the precision loss from the quantized forward pass by:
-1. Quantizing the tensor to fixed point numbers, which should match the target
-   quantization method when it is used in inference.
-2. Dequantizing it back to floating point numbers for the following ops, most
-   likely matmul.
-
-There are different ways to quantize. This version does not use the full range
-of the output type, choosing to elide the lowest possible value for symmetry
-(e.g., output range is -127 to 127, not -128 to 127 for signed 8 bit
-quantization), so that 0.0 maps to 0.
-
-To perform this op, we first find the range of values in our tensor. The range
-we use is always centered on 0, so we find m such that
-
-1. m = max(abs(input_min), abs(input_max)) if range_given is true,
-2. m = max(abs(min_elem(input)), abs(max_elem(input))) otherwise.
-
-Our input tensor range is then [-m, m].
-
-Next, we choose our fixed-point quantization buckets, [min_fixed, max_fixed].
-If signed_input is true, this is
-
-  [min_fixed, max_fixed ] =
-      [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1].
-
-Otherwise, if signed_input is false, the fixed-point range is
-
-  [min_fixed, max_fixed] = [0, (1 << num_bits) - 1].
-
-From this we compute our scaling factor, s:
-
-  s = (max_fixed - min_fixed) / (2 * m).
-
-Now we can quantize and dequantize the elements of our tensor.  An element e
-is transformed into e':
-
-  e' = (e * s).round_to_nearest() / s.
-
-Note that we have a different number of buckets in the signed vs. unsigned
-cases.  For example, if num_bits == 8, we get 254 buckets in the signed case
-vs. 255 in the unsigned case.
-
-For example, suppose num_bits = 8 and m = 1.  Then
-
-  [min_fixed, max_fixed] = [-127, 127], and
-  s = (127 + 127) / 2 = 127.
-
-Given the vector {-1, -0.5, 0, 0.3}, this is quantized to
-{-127, -63, 0, 38}, and dequantized to {-1, -63.0/127, 0, 38.0/127}.
-
-input: Tensor to quantize and then dequantize.
-signed_input: If the quantization is signed or unsigned.
-num_bits: The bitwidth of the quantization.
-range_given: If the range is given or should be computed from the tensor.
-input_min: If range_given, this is the min of the range, otherwise this input
-           will be ignored.
-input_max: If range_given, this is the max of the range, otherwise this input
-           will be ignored.
-)doc");
+    });
 
 REGISTER_OP("QuantizeAndDequantizeV3")
     .Input("input: T")
@@ -4877,7 +2485,7 @@ REGISTER_OP("QuantizeAndDequantizeV3")
     .Attr("signed_input: bool = true")
     .Attr("range_given: bool = true")
     .Output("output: T")
-    .Attr("T: {float, double}")
+    .Attr("T: {bfloat16, float, double}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle unused;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
@@ -4885,13 +2493,7 @@ REGISTER_OP("QuantizeAndDequantizeV3")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
       c->set_output(0, c->input(0));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Quantizes then dequantizes a tensor.
-
-This is almost identical to QuantizeAndDequantizeV2, except that num_bits is a
-tensor, so its value can change during training.
-)doc");
+    });
 
 REGISTER_OP("QuantizeV2")
     .Input("input: float")
@@ -4913,110 +2515,7 @@ REGISTER_OP("QuantizeV2")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    })
-    .Doc(R"doc(
-Quantize the 'input' tensor of type float to 'output' tensor of type 'T'.
-
-[min_range, max_range] are scalar floats that specify the range for
-the 'input' data. The 'mode' attribute controls exactly which calculations are
-used to convert the float values to their quantized equivalents.  The
-'round_mode' attribute controls which rounding tie-breaking algorithm is used
-when rounding float values to their quantized equivalents.
-
-In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
-
-```
-out[i] = (in[i] - min_range) * range(T) / (max_range - min_range)
-if T == qint8, out[i] -= (range(T) + 1) / 2.0
-```
-here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
-
-*MIN_COMBINED Mode Example*
-
-Assume the input is type float and has a possible range of [0.0, 6.0] and the
-output type is quint8 ([0, 255]). The min_range and max_range values should be
-specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each
-value of the input by 255/6 and cast to quint8.
-
-If the output type was qint8 ([-128, 127]), the operation will additionally
-subtract each value by 128 prior to casting, so that the range of values aligns
-with the range of qint8.
-
-If the mode is 'MIN_FIRST', then this approach is used:
-
-```
-num_discrete_values = 1 << (# of bits in T)
-range_adjust = num_discrete_values / (num_discrete_values - 1)
-range = (range_max - range_min) * range_adjust
-range_scale = num_discrete_values / range
-quantized = round(input * range_scale) - round(range_min * range_scale) +
-  numeric_limits<T>::min()
-quantized = max(quantized, numeric_limits<T>::min())
-quantized = min(quantized, numeric_limits<T>::max())
-```
-
-The biggest difference between this and MIN_COMBINED is that the minimum range
-is rounded first, before it's subtracted from the rounded value. With
-MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing
-and dequantizing will introduce a larger and larger error.
-
-*SCALED mode Example*
-
-`SCALED` mode matches the quantization approach used in
-`QuantizeAndDequantize{V2|V3}`.
-
-If the mode is `SCALED`, we do not use the full range of the output type,
-choosing to elide the lowest possible value for symmetry (e.g., output range is
--127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to
-0.
-
-We first find the range of values in our tensor. The
-range we use is always centered on 0, so we find m such that
-```c++
-  m = max(abs(input_min), abs(input_max))
-```
-
-Our input tensor range is then `[-m, m]`.
-
-Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`.
-If T is signed, this is
-```
-  num_bits = sizeof(T) * 8
-  [min_fixed, max_fixed] =
-      [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1]
-```
-
-Otherwise, if T is unsigned, the fixed-point range is
-```
-  [min_fixed, max_fixed] = [0, (1 << num_bits) - 1]
-```
-
-From this we compute our scaling factor, s:
-```c++
-  s = (max_fixed - min_fixed) / (2 * m)
-```
-
-Now we can quantize the elements of our tensor:
-```c++
-result = round(input * s)
-```
-
-One thing to watch out for is that the operator may choose to adjust the
-requested minimum and maximum values slightly during the quantization process,
-so you should always use the output ports as the range for further calculations.
-For example, if the requested minimum and maximum values are close to equal,
-they will be separated by a small epsilon value to prevent ill-formed quantized
-buffers from being created. Otherwise, you can end up with buffers where all the
-quantized values map to the same float value, which causes problems for
-operations that have to perform further calculations on them.
-
-min_range: The minimum scalar value possibly produced for the input.
-max_range: The maximum scalar value possibly produced for the input.
-output: The quantized data produced from the float input.
-output_min: The actual minimum scalar value used for the output.
-output_max: The actual maximum scalar value used for the output.
-
-)doc");
+    });
 
 REGISTER_OP("Dequantize")
     .Input("input: T")
@@ -5031,88 +2530,7 @@ REGISTER_OP("Dequantize")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Dequantize the 'input' tensor into a float Tensor.
-
-[min_range, max_range] are scalar floats that specify the range for
-the 'input' data. The 'mode' attribute controls exactly which calculations are
-used to convert the float values to their quantized equivalents.
-
-In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
-
-```
-if T == qint8, in[i] += (range(T) + 1)/ 2.0
-out[i] = min_range + (in[i]* (max_range - min_range) / range(T))
-```
-here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
-
-*MIN_COMBINED Mode Example*
-
-If the input comes from a QuantizedRelu6, the output type is
-quint8 (range of 0-255) but the possible range of QuantizedRelu6 is
-0-6.  The min_range and max_range values are therefore 0.0 and 6.0.
-Dequantize on quint8 will take each value, cast to float, and multiply
-by 6 / 255.
-Note that if quantizedtype is qint8, the operation will additionally add
-each value by 128 prior to casting.
-
-If the mode is 'MIN_FIRST', then this approach is used:
-
-```c++
-num_discrete_values = 1 << (# of bits in T)
-range_adjust = num_discrete_values / (num_discrete_values - 1)
-range = (range_max - range_min) * range_adjust
-range_scale = range / num_discrete_values
-const double offset_input = static_cast<double>(input) - lowest_quantized;
-result = range_min + ((input - numeric_limits<T>::min()) * range_scale)
-```
-
-*SCALED mode Example*
-
-`SCALED` mode matches the quantization approach used in
-`QuantizeAndDequantize{V2|V3}`.
-
-If the mode is `SCALED`, we do not use the full range of the output type,
-choosing to elide the lowest possible value for symmetry (e.g., output range is
--127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to
-0.
-
-We first find the range of values in our tensor. The
-range we use is always centered on 0, so we find m such that
-```c++
-  m = max(abs(input_min), abs(input_max))
-```
-
-Our input tensor range is then `[-m, m]`.
-
-Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`.
-If T is signed, this is
-```
-  num_bits = sizeof(T) * 8
-  [min_fixed, max_fixed] =
-      [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1]
-```
-
-Otherwise, if T is unsigned, the fixed-point range is
-```
-  [min_fixed, max_fixed] = [0, (1 << num_bits) - 1]
-```
-
-From this we compute our scaling factor, s:
-```c++
-  s = (2 * m) / (max_fixed - min_fixed)
-```
-
-Now we can dequantize the elements of our tensor:
-```c++
-result = input * s
-```
-
-min_range: The minimum scalar value possibly produced for the input.
-max_range: The maximum scalar value possibly produced for the input.
-
-)doc");
+    });
 
 REGISTER_OP("QuantizedConcat")
     .Input("concat_dim: int32")
@@ -5134,22 +2552,7 @@ REGISTER_OP("QuantizedConcat")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    })
-    .Doc(R"doc(
-Concatenates quantized tensors along one dimension.
-
-concat_dim: 0-D.  The dimension along which to concatenate.  Must be in the
-  range [0, rank(values)).
-values: The `N` Tensors to concatenate. Their ranks and types must match,
-  and their sizes must match in all dimensions except `concat_dim`.
-input_mins: The minimum scalar values for each of the input tensors.
-input_maxes: The maximum scalar values for each of the input tensors.
-output_min: The float value that the minimum quantized output value represents.
-output_max: The float value that the maximum quantized output value represents.
-output: A `Tensor` with the concatenation of values stacked along the
-  `concat_dim` dimension.  This tensor's shape matches that of `values` except
-  in `concat_dim` where it has the sum of the sizes.
-)doc");
+    });
 
 REGISTER_OP("QuantizedReshape")
     .Input("tensor: T")
@@ -5169,17 +2572,7 @@ REGISTER_OP("QuantizedReshape")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    })
-    .Doc(R"Doc(
-Reshapes a quantized tensor as per the Reshape op.
-```
-
-shape: Defines the shape of the output tensor.
-input_min: The minimum value of the input.
-input_max: The maximum value of the input.
-output_min: This value is copied from input_min.
-output_max: This value is copied from input_max.
-)Doc");
+    });
 
 REGISTER_OP("QuantizedInstanceNorm")
     .Input("x: T")
@@ -5207,24 +2600,7 @@ REGISTER_OP("QuantizedInstanceNorm")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    })
-    .Doc(R"doc(
-Quantized Instance normalization.
-
-x: A 4D input Tensor.
-x_min: The value represented by the lowest quantized input.
-x_max: The value represented by the highest quantized input.
-y: A 4D Tensor.
-y_min: The value represented by the lowest quantized output.
-y_max: The value represented by the highest quantized output.
-output_range_given: If True, `given_y_min` and `given_y_min`
-  and `given_y_max` are used as the output range. Otherwise,
-  the implementation computes the output range.
-given_y_min: Output in `y_min` if `output_range_given` is True.
-given_y_max: Output in `y_max` if `output_range_given` is True.
-variance_epsilon: A small float number to avoid dividing by 0.
-min_separation: Minimum value of `y_max - y_min`
-)doc");
+    });
 
 namespace {
 
@@ -5298,88 +2674,7 @@ REGISTER_OP("ScatterNd")
     .Output("output: T")
     .Attr("T: type")
     .Attr("Tindices: {int32, int64}")
-    .SetShapeFn(ScatterNdShape)
-    .Doc(R"doc(
-Scatter `updates` into a new (initially zero) tensor according to `indices`.
-
-Creates a new tensor by applying sparse `updates` to individual
-values or slices within a zero tensor of the given `shape` according to
-indices.  This operator is the inverse of the @{tf.gather_nd} operator which
-extracts values or slices from a given tensor.
-
-**WARNING**: The order in which updates are applied is nondeterministic, so the
-output will be nondeterministic if `indices` contains duplicates.
-
-`indices` is an integer tensor containing indices into a new tensor of shape
-`shape`.  The last dimension of `indices` can be at most the rank of `shape`:
-
-    indices.shape[-1] <= shape.rank
-
-The last dimension of `indices` corresponds to indices into elements
-(if `indices.shape[-1] = shape.rank`) or slices
-(if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of
-`shape`.  `updates` is a tensor with shape
-
-    indices.shape[:-1] + shape[indices.shape[-1]:]
-
-The simplest form of scatter is to insert individual elements in a tensor by
-index. For example, say we want to insert 4 scattered elements in a rank-1
-tensor with 8 elements.
-
-<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="https://www.tensorflow.org/images/ScatterNd1.png" alt>
-</div>
-
-In Python, this scatter operation would look like this:
-
-```python
-    indices = tf.constant([[4], [3], [1], [7]])
-    updates = tf.constant([9, 10, 11, 12])
-    shape = tf.constant([8])
-    scatter = tf.scatter_nd(indices, updates, shape)
-    with tf.Session() as sess:
-      print(sess.run(scatter))
-```
-
-The resulting tensor would look like this:
-
-    [0, 11, 0, 10, 9, 0, 0, 12]
-
-We can also, insert entire slices of a higher rank tensor all at once. For
-example, if we wanted to insert two slices in the first dimension of a
-rank-3 tensor with two matrices of new values.
-
-<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="https://www.tensorflow.org/images/ScatterNd2.png" alt>
-</div>
-
-In Python, this scatter operation would look like this:
-
-```python
-    indices = tf.constant([[0], [2]])
-    updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6],
-                            [7, 7, 7, 7], [8, 8, 8, 8]],
-                           [[5, 5, 5, 5], [6, 6, 6, 6],
-                            [7, 7, 7, 7], [8, 8, 8, 8]]])
-    shape = tf.constant([4, 4, 4])
-    scatter = tf.scatter_nd(indices, updates, shape)
-    with tf.Session() as sess:
-      print(sess.run(scatter))
-```
-
-The resulting tensor would look like this:
-
-    [[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],
-     [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
-     [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],
-     [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]]
-
-indices: Index tensor.
-updates: Updates to scatter into output.
-shape: 1-D. The shape of the resulting tensor.
-output: A new tensor with the given shape and updates applied according
-  to the indices.
-)doc");
+    .SetShapeFn(ScatterNdShape);
 
 REGISTER_OP("ScatterNdNonAliasingAdd")
     .Input("input: T")
@@ -5388,53 +2683,7 @@ REGISTER_OP("ScatterNdNonAliasingAdd")
     .Output("output: T")
     .Attr("T: numbertype")
     .Attr("Tindices: {int32, int64}")
-    .SetShapeFn(shape_inference::ScatterNdUpdateShape)
-    .Doc(R"doc(
-Applies sparse addition to `input` using individual values or slices
-from `updates` according to indices `indices`.  The updates are non-aliasing:
-`input` is only modified in-place if no other operations will use it.
-Otherwise, a copy of `input` is made.  This operation has a gradient with
-respect to both `input` and `updates`.
-
-`input` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
-
-`indices` must be integer tensor, containing indices into `input`.
-It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
-
-The innermost dimension of `indices` (with length `K`) corresponds to
-indices into elements (if `K = P`) or `(P-K)`-dimensional slices
-(if `K < P`) along the `K`th dimension of `input`.
-
-`updates` is `Tensor` of rank `Q-1+P-K` with shape:
-
-```
-[d_0, ..., d_{Q-2}, input.shape[K], ..., input.shape[P-1]].
-```
-
-For example, say we want to add 4 scattered elements to a rank-1 tensor to 8
-elements. In Python, that addition would look like this:
-
-    input = tf.constant([1, 2, 3, 4, 5, 6, 7, 8])
-    indices = tf.constant([[4], [3], [1], [7]])
-    updates = tf.constant([9, 10, 11, 12])
-    output = tf.scatter_nd_non_aliasing_add(input, indices, updates)
-    with tf.Session() as sess:
-      print(sess.run(output))
-
-The resulting value `output` would look like this:
-
-    [1, 13, 3, 14, 14, 6, 7, 20]
-
-See @{tf.scatter_nd} for more details about how to make updates to slices.
-
-input: A Tensor.
-indices: A Tensor. Must be one of the following types: `int32`, `int64`.
-  A tensor of indices into `input`.
-updates: A Tensor. Must have the same type as ref. A tensor of updated values
-  to add to `input`.
-output: A `Tensor` with the same shape as `input`, containing values of `input`
-  updated with `updates`.
-)doc");
+    .SetShapeFn(shape_inference::ScatterNdUpdateShape);
 
 REGISTER_OP("FakeQuantWithMinMaxArgs")
     .Attr("min: float = -6.0")
@@ -5443,18 +2692,7 @@ REGISTER_OP("FakeQuantWithMinMaxArgs")
     .Attr("narrow_range: bool = false")
     .Input("inputs: float")
     .Output("outputs: float")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Fake-quantize the 'inputs' tensor, type float to 'outputs' tensor of same type.
-
-Attributes `[min; max]` define the clamping range for the `inputs` data.
-`inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`
-when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and
-then de-quantized and output as floats in `[min; max]` interval.
-`num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive.
-
-Quantization is called fake since the output is still in floating point.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("FakeQuantWithMinMaxArgsGradient")
     .Attr("min: float = -6.0")
@@ -5464,15 +2702,7 @@ REGISTER_OP("FakeQuantWithMinMaxArgsGradient")
     .Input("gradients: float")
     .Input("inputs: float")
     .Output("backprops: float")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Compute gradients for a FakeQuantWithMinMaxArgs operation.
-
-gradients: Backpropagated gradients above the FakeQuantWithMinMaxArgs operation.
-inputs: Values passed as inputs to the FakeQuantWithMinMaxArgs operation.
-backprops: Backpropagated gradients below the FakeQuantWithMinMaxArgs operation:
-  `gradients * (inputs >= min && inputs <= max)`.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("FakeQuantWithMinMaxVars")
     .Attr("num_bits: int = 8")
@@ -5487,20 +2717,7 @@ REGISTER_OP("FakeQuantWithMinMaxVars")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Fake-quantize the 'inputs' tensor of type float via global float scalars `min`
-and `max` to 'outputs' tensor of same shape as `inputs`.
-
-`[min; max]` define the clamping range for the `inputs` data.
-`inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`
-when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and
-then de-quantized and output as floats in `[min; max]` interval.
-`num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive.
-
-This operation has a gradient and thus allows for training `min` and `max`
-values.
-)doc");
+    });
 
 REGISTER_OP("FakeQuantWithMinMaxVarsGradient")
     .Attr("num_bits: int = 8")
@@ -5526,22 +2743,7 @@ REGISTER_OP("FakeQuantWithMinMaxVarsGradient")
       c->set_output(1, min_max);
       c->set_output(2, min_max);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Compute gradients for a FakeQuantWithMinMaxVars operation.
-
-gradients: Backpropagated gradients above the FakeQuantWithMinMaxVars operation.
-inputs: Values passed as inputs to the FakeQuantWithMinMaxVars operation.
-min, max: Quantization interval, scalar floats.
-num_bits: The bitwidth of the quantization; between 2 and 8, inclusive.
-narrow_range: Whether to quantize into 2^num_bits - 1 distinct values.
-backprops_wrt_input: Backpropagated gradients w.r.t. inputs:
-  `gradients * (inputs >= min && inputs <= max)`.
-backprop_wrt_min: Backpropagated gradients w.r.t. min parameter:
-  `sum(gradients * (inputs < min))`.
-backprop_wrt_max: Backpropagated gradients w.r.t. max parameter:
-  `sum(gradients * (inputs > max))`.
-)doc");
+    });
 
 REGISTER_OP("FakeQuantWithMinMaxVarsPerChannel")
     .Attr("num_bits: int = 8")
@@ -5563,21 +2765,7 @@ REGISTER_OP("FakeQuantWithMinMaxVarsPerChannel")
 
       c->set_output(0, input);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Fake-quantize the 'inputs' tensor of type float and one of the shapes: `[d]`,
-`[b, d]` `[b, h, w, d]` via per-channel floats `min` and `max` of shape `[d]`
-to 'outputs' tensor of same shape as `inputs`.
-
-`[min; max]` define the clamping range for the `inputs` data.
-`inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`
-when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and
-then de-quantized and output as floats in `[min; max]` interval.
-`num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive.
-
-This operation has a gradient and thus allows for training `min` and `max`
-values.
-)doc");
+    });
 
 REGISTER_OP("FakeQuantWithMinMaxVarsPerChannelGradient")
     .Attr("num_bits: int = 8")
@@ -5606,25 +2794,7 @@ REGISTER_OP("FakeQuantWithMinMaxVarsPerChannelGradient")
       c->set_output(1, min_max);
       c->set_output(2, min_max);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Compute gradients for a FakeQuantWithMinMaxVarsPerChannel operation.
-
-gradients: Backpropagated gradients above the FakeQuantWithMinMaxVars operation,
-  shape one of: `[d]`, `[b, d]`,  `[b, h, w, d]`.
-inputs: Values passed as inputs to the FakeQuantWithMinMaxVars operation, shape
-  same as `gradients`.
-min, max: Quantization interval, floats of shape `[d]`.
-num_bits: The bitwidth of the quantization; between 2 and 8, inclusive.
-narrow_range: Whether to quantize into 2^num_bits - 1 distinct values.
-backprops_wrt_input: Backpropagated gradients w.r.t. inputs, shape same as
-  `inputs`:
-    `gradients * (inputs >= min && inputs <= max)`.
-backprop_wrt_min: Backpropagated gradients w.r.t. min parameter, shape `[d]`:
-  `sum_per_d(gradients * (inputs < min))`.
-backprop_wrt_max: Backpropagated gradients w.r.t. max parameter, shape `[d]`:
-  `sum_per_d(gradients * (inputs > max))`.
-)doc");
+    });
 
 #ifdef INTEL_MKL
 REGISTER_OP("_MklConcat")
diff --git a/tensorflow/core/ops/array_ops_test.cc b/tensorflow/core/ops/array_ops_test.cc
index 94eb120175555d8d51b9be1ff98676a9dc4fff07..a182fd1c475ad44dcd0f05d42a9cbd6eeab16469 100644
--- a/tensorflow/core/ops/array_ops_test.cc
+++ b/tensorflow/core/ops/array_ops_test.cc
@@ -158,6 +158,13 @@ TEST(ArrayOpsTest, UnchangedShapes_ShapeFn) {
   INFER_OK(op, "[1,2,?,4,5];?;?", "in0");
 }
 
+TEST(ArrayOpsTest, GuaranteeConst_ShapeFn) {
+  ShapeInferenceTestOp op("GuaranteeConst");
+  INFER_OK(op, "?", "in0");
+  INFER_OK(op, "[]", "in0");
+  INFER_OK(op, "[1,2,?,4,5]", "in0");
+}
+
 TEST(ArrayOpsTest, Identity_ShapeFnHandles) {
   const char* op_name = "Identity";
   ShapeInferenceTestOp op(op_name);
@@ -246,6 +253,7 @@ TEST(ArrayOpsTest, ReverseV2_ShapeFn) {
 
 TEST(ArrayOpsTest, Fill_ShapeFn) {
   ShapeInferenceTestOp op("Fill");
+  AddNodeAttr("index_type", DT_INT32, &op.node_def);
   op.input_tensors.resize(2);
   INFER_OK(op, "?;?", "?");
   INFER_OK(op, "[?];?", "?");
@@ -514,7 +522,7 @@ TEST(ArrayOpsTest, MatrixSetDiag_ShapeFn) {
   INFER_ERROR("Dimensions must be equal, but are 2 and 3", op, "[2,3];[3]");
 
   // Output matches input.
-  INFER_OK(op, "?;?", "?");
+  INFER_OK(op, "?;?", "in0");
   INFER_OK(op, "[1,2,2];[1,2]", "in0");
   INFER_OK(op, "[1,2,3];?", "in0");
   INFER_OK(op, "[1,3,2];?", "in0");
@@ -1612,7 +1620,7 @@ TEST(ArrayOpsTest, UnchangedWithQuantizationScalars_ShapeFn) {
 TEST(ArrayOpsTest, FakeQuantWithMinMaxVarsPerChannel) {
   ShapeInferenceTestOp op("FakeQuantWithMinMaxVarsPerChannel");
 
-  INFER_OK(op, "?;?;?", "?");
+  INFER_OK(op, "?;?;?", "in0");
   INFER_OK(op, "[?];?;?", "in0");
   INFER_OK(op, "[1,?,3];[3];[3]", "in0");
   INFER_OK(op, "[3];[3];[3]", "in0");
@@ -1631,7 +1639,7 @@ TEST(ArrayOpsTest, FakeQuantWithMinMaxVarsPerChannel) {
 TEST(ArrayOpsTest, FakeQuantWithMinMaxVarsPerChannelGradient) {
   ShapeInferenceTestOp op("FakeQuantWithMinMaxVarsPerChannelGradient");
 
-  INFER_OK(op, "?;?;?;?", "?;[?];[?]");
+  INFER_OK(op, "?;?;?;?", "in0;[?];[?]");
   INFER_OK(op, "[3];[3];[3];[3]", "in0;in3;in3");
   INFER_OK(op, "[1,3];[1,3];[3];[3]", "in0;in3;in3");
   INFER_OK(op, "[1,2,3,4];[1,2,3,4];[4];[4]", "in0;in3;in3");
diff --git a/tensorflow/core/ops/audio_ops.cc b/tensorflow/core/ops/audio_ops.cc
index d944e385a8cba2eee8311c36deed689d42150ef8..bcc46761c130565d2462584a4fb06493f5a9841c 100644
--- a/tensorflow/core/ops/audio_ops.cc
+++ b/tensorflow/core/ops/audio_ops.cc
@@ -128,52 +128,13 @@ REGISTER_OP("DecodeWav")
     .Attr("desired_samples: int = -1")
     .Output("audio: float")
     .Output("sample_rate: int32")
-    .SetShapeFn(DecodeWavShapeFn)
-    .Doc(R"doc(
-Decode a 16-bit PCM WAV file to a float tensor.
-
-The -32768 to 32767 signed 16-bit values will be scaled to -1.0 to 1.0 in float.
-
-When desired_channels is set, if the input contains fewer channels than this
-then the last channel will be duplicated to give the requested number, else if
-the input has more channels than requested then the additional channels will be
-ignored.
-
-If desired_samples is set, then the audio will be cropped or padded with zeroes
-to the requested length.
-
-The first output contains a Tensor with the content of the audio samples. The
-lowest dimension will be the number of channels, and the second will be the
-number of samples. For example, a ten-sample-long stereo WAV file should give an
-output shape of [10, 2].
-
-contents: The WAV-encoded audio, usually from a file.
-desired_channels: Number of sample channels wanted.
-desired_samples: Length of audio requested.
-audio: 2-D with shape `[length, channels]`.
-sample_rate: Scalar holding the sample rate found in the WAV header.
-)doc");
+    .SetShapeFn(DecodeWavShapeFn);
 
 REGISTER_OP("EncodeWav")
     .Input("audio: float")
     .Input("sample_rate: int32")
     .Output("contents: string")
-    .SetShapeFn(EncodeWavShapeFn)
-    .Doc(R"doc(
-Encode audio data using the WAV file format.
-
-This operation will generate a string suitable to be saved out to create a .wav
-audio file. It will be encoded in the 16-bit PCM format. It takes in float
-values in the range -1.0f to 1.0f, and any outside that value will be clamped to
-that range.
-
-`audio` is a 2-D float Tensor of shape `[length, channels]`.
-`sample_rate` is a scalar Tensor holding the rate to use (e.g. 44100).
-
-audio: 2-D with shape `[length, channels]`.
-sample_rate: Scalar containing the sample frequency.
-contents: 0-D. WAV-encoded file contents.
-)doc");
+    .SetShapeFn(EncodeWavShapeFn);
 
 REGISTER_OP("AudioSpectrogram")
     .Input("input: float")
@@ -181,44 +142,7 @@ REGISTER_OP("AudioSpectrogram")
     .Attr("stride: int")
     .Attr("magnitude_squared: bool = false")
     .Output("spectrogram: float")
-    .SetShapeFn(SpectrogramShapeFn)
-    .Doc(R"doc(
-Produces a visualization of audio data over time.
-
-Spectrograms are a standard way of representing audio information as a series of
-slices of frequency information, one slice for each window of time. By joining
-these together into a sequence, they form a distinctive fingerprint of the sound
-over time.
-
-This op expects to receive audio data as an input, stored as floats in the range
--1 to 1, together with a window width in samples, and a stride specifying how
-far to move the window between slices. From this it generates a three
-dimensional output. The lowest dimension has an amplitude value for each
-frequency during that time slice. The next dimension is time, with successive
-frequency slices. The final dimension is for the channels in the input, so a
-stereo audio input would have two here for example.
-
-This means the layout when converted and saved as an image is rotated 90 degrees
-clockwise from a typical spectrogram. Time is descending down the Y axis, and
-the frequency decreases from left to right.
-
-Each value in the result represents the square root of the sum of the real and
-imaginary parts of an FFT on the current window of samples. In this way, the
-lowest dimension represents the power of each frequency in the current window,
-and adjacent windows are concatenated in the next dimension.
-
-To get a more intuitive and visual look at what this operation does, you can run
-tensorflow/examples/wav_to_spectrogram to read in an audio file and save out the
-resulting spectrogram as a PNG image.
-
-input: Float representation of audio data.
-window_size: How wide the input window is in samples. For the highest efficiency
-  this should be a power of two, but other values are accepted.
-stride: How widely apart the center of adjacent sample windows should be.
-magnitude_squared: Whether to return the squared magnitude or just the
-  magnitude. Using squared magnitude can avoid extra calculations.
-spectrogram: 3D representation of the audio frequencies as an image.
-)doc");
+    .SetShapeFn(SpectrogramShapeFn);
 
 REGISTER_OP("Mfcc")
     .Input("spectrogram: float")
@@ -228,26 +152,6 @@ REGISTER_OP("Mfcc")
     .Attr("filterbank_channel_count: int = 40")
     .Attr("dct_coefficient_count: int = 13")
     .Output("output: float")
-    .SetShapeFn(MfccShapeFn)
-    .Doc(R"doc(
-Transforms a spectrogram into a form that's useful for speech recognition.
-
-Mel Frequency Cepstral Coefficients are a way of representing audio data that's
-been effective as an input feature for machine learning. They are created by
-taking the spectrum of a spectrogram (a 'cepstrum'), and discarding some of the
-higher frequencies that are less significant to the human ear. They have a long
-history in the speech recognition world, and https://en.wikipedia.org/wiki/Mel-frequency_cepstrum
-is a good resource to learn more.
-
-spectrogram: Typically produced by the Spectrogram op, with magnitude_squared
-  set to true.
-sample_rate: How many samples per second the source audio used.
-upper_frequency_limit: The highest frequency to use when calculating the
-  ceptstrum.
-lower_frequency_limit: The lowest frequency to use when calculating the
-  ceptstrum.
-filterbank_channel_count: Resolution of the Mel bank used internally.
-dct_coefficient_count: How many output channels to produce per time slice.
-)doc");
+    .SetShapeFn(MfccShapeFn);
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/bitwise_ops.cc b/tensorflow/core/ops/bitwise_ops.cc
index 2889953bdbc614bc4e56245e45c08d913cfd5255..39acf5f358b9c1388d56d884e4f27dc4656d9514 100644
--- a/tensorflow/core/ops/bitwise_ops.cc
+++ b/tensorflow/core/ops/bitwise_ops.cc
@@ -24,13 +24,7 @@ REGISTER_OP("Invert")
     .Input("x: T")
     .Output("y: T")
     .Attr("T: {int8, int16, int32, int64, uint8, uint16, uint32, uint64}")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Flips all bits elementwise.
-
-The result will have exactly those bits set, that are not set in `x`. The
-computation is performed on the underlying representation of x.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 #define BINARY_BITWISE()                                                     \
   Input("x: T")                                                              \
@@ -38,70 +32,22 @@ computation is performed on the underlying representation of x.
       .Output("z: T")                                                        \
       .SetIsCommutative()                                                    \
       .Attr("T: {int8, int16, int32, int64, uint8, uint16, uint32, uint64}") \
-      .SetShapeFn(shape_inference::UnchangedShape)
+      .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
 
 REGISTER_OP("PopulationCount")
     .Input("x: T")
     .Output("y: uint8")
     .Attr("T: {int8, int16, int32, int64, uint8, uint16, uint32, uint64}")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Computes element-wise population count (a.k.a. popcount, bitsum, bitcount).
-
-For each entry in `x`, calculates the number of `1` (on) bits in the binary
-representation of that entry.
-
-**NOTE**: It is more efficient to first `tf.bitcast` your tensors into
-`int32` or `int64` and perform the bitcount on the result, than to feed in
-8- or 16-bit inputs and then aggregate the resulting counts.
-)doc");
-
-REGISTER_OP("BitwiseAnd")
-    .BINARY_BITWISE()
-    .Doc(R"doc(
-Elementwise computes the bitwise AND of `x` and `y`.
-
-The result will have those bits set, that are set in both `x` and `y`. The
-computation is performed on the underlying representations of `x` and `y`.
-)doc");
-
-REGISTER_OP("BitwiseOr")
-    .BINARY_BITWISE()
-    .Doc(R"doc(
-Elementwise computes the bitwise OR of `x` and `y`.
-
-The result will have those bits set, that are set in `x`, `y` or both. The
-computation is performed on the underlying representations of `x` and `y`.
-)doc");
-
-REGISTER_OP("BitwiseXor")
-    .BINARY_BITWISE()
-    .Doc(R"doc(
-Elementwise computes the bitwise XOR of `x` and `y`.
-
-The result will have those bits set, that are different in `x` and `y`. The
-computation is performed on the underlying representations of `x` and `y`.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
-REGISTER_OP("LeftShift")
-    .BINARY_BITWISE()
-    .Doc(R"doc(
-Elementwise computes the bitwise left-shift of `x` and `y`.
+REGISTER_OP("BitwiseAnd").BINARY_BITWISE();
 
-If `y` is negative, or greater than or equal to the width of `x` in bits the
-result is implementation defined.
-)doc");
+REGISTER_OP("BitwiseOr").BINARY_BITWISE();
 
-REGISTER_OP("RightShift")
-    .BINARY_BITWISE()
-    .Doc(R"doc(
-Elementwise computes the bitwise right-shift of `x` and `y`.
+REGISTER_OP("BitwiseXor").BINARY_BITWISE();
 
-Performs a logical shift for unsigned integer types, and an arithmetic shift
-for signed integer types.
+REGISTER_OP("LeftShift").BINARY_BITWISE();
 
-If `y` is negative, or greater than or equal to than the width of `x` in bits
-the result is implementation defined.
-)doc");
+REGISTER_OP("RightShift").BINARY_BITWISE();
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/candidate_sampling_ops.cc b/tensorflow/core/ops/candidate_sampling_ops.cc
index 18700be67a667359d7a86d8f81ada383be973a0a..6e4d100b04fba22c170a654c9314e3a7e26fadda 100644
--- a/tensorflow/core/ops/candidate_sampling_ops.cc
+++ b/tensorflow/core/ops/candidate_sampling_ops.cc
@@ -55,42 +55,7 @@ REGISTER_OP("UniformCandidateSampler")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
     .SetShapeFn(CandidateSamplerShapeFn)
-    .SetIsStateful()
-    .Doc(R"doc(
-Generates labels for candidate sampling with a uniform distribution.
-
-See explanations of candidate sampling and the data formats at
-go/candidate-sampling.
-
-For each batch, this op picks a single set of sampled candidate labels.
-
-The advantages of sampling candidates per-batch are simplicity and the
-possibility of efficient dense matrix multiplication. The disadvantage is that
-the sampled candidates must be chosen independently of the context and of the
-true labels.
-
-true_classes: A batch_size * num_true matrix, in which each row contains the
-  IDs of the num_true target_classes in the corresponding original label.
-sampled_candidates: A vector of length num_sampled, in which each element is
-  the ID of a sampled candidate.
-true_expected_count: A batch_size * num_true matrix, representing
-  the number of times each candidate is expected to occur in a batch
-  of sampled candidates. If unique=true, then this is a probability.
-sampled_expected_count: A vector of length num_sampled, for each sampled
-  candidate representing the number of times the candidate is expected
-  to occur in a batch of sampled candidates.  If unique=true, then this is a
-  probability.
-num_true: Number of true labels per context.
-num_sampled: Number of candidates to randomly sample.
-unique: If unique is true, we sample with rejection, so that all sampled
-  candidates in a batch are unique. This requires some approximation to
-  estimate the post-rejection sampling probabilities.
-range_max: The sampler will sample integers from the interval [0, range_max).
-seed: If either seed or seed2 are set to be non-zero, the random number
-  generator is seeded by the given seed.  Otherwise, it is seeded by a
-  random seed.
-seed2: An second seed to avoid seed collision.
-)doc");
+    .SetIsStateful();
 
 REGISTER_OP("LogUniformCandidateSampler")
     .Input("true_classes: int64")
@@ -104,43 +69,7 @@ REGISTER_OP("LogUniformCandidateSampler")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
     .SetShapeFn(CandidateSamplerShapeFn)
-    .SetIsStateful()
-    .Doc(R"doc(
-Generates labels for candidate sampling with a log-uniform distribution.
-
-See explanations of candidate sampling and the data formats at
-go/candidate-sampling.
-
-For each batch, this op picks a single set of sampled candidate labels.
-
-The advantages of sampling candidates per-batch are simplicity and the
-possibility of efficient dense matrix multiplication. The disadvantage is that
-the sampled candidates must be chosen independently of the context and of the
-true labels.
-
-
-true_classes: A batch_size * num_true matrix, in which each row contains the
-  IDs of the num_true target_classes in the corresponding original label.
-sampled_candidates: A vector of length num_sampled, in which each element is
-  the ID of a sampled candidate.
-true_expected_count: A batch_size * num_true matrix, representing
-  the number of times each candidate is expected to occur in a batch
-  of sampled candidates. If unique=true, then this is a probability.
-sampled_expected_count: A vector of length num_sampled, for each sampled
-  candidate representing the number of times the candidate is expected
-  to occur in a batch of sampled candidates.  If unique=true, then this is a
-  probability.
-num_true: Number of true labels per context.
-num_sampled: Number of candidates to randomly sample.
-unique: If unique is true, we sample with rejection, so that all sampled
-  candidates in a batch are unique. This requires some approximation to
-  estimate the post-rejection sampling probabilities.
-range_max: The sampler will sample integers from the interval [0, range_max).
-seed: If either seed or seed2 are set to be non-zero, the random number
-  generator is seeded by the given seed.  Otherwise, it is seeded by a
-  random seed.
-seed2: An second seed to avoid seed collision.
-)doc");
+    .SetIsStateful();
 
 REGISTER_OP("LearnedUnigramCandidateSampler")
     .Input("true_classes: int64")
@@ -154,42 +83,7 @@ REGISTER_OP("LearnedUnigramCandidateSampler")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
     .SetShapeFn(CandidateSamplerShapeFn)
-    .SetIsStateful()
-    .Doc(R"doc(
-Generates labels for candidate sampling with a learned unigram distribution.
-
-See explanations of candidate sampling and the data formats at
-go/candidate-sampling.
-
-For each batch, this op picks a single set of sampled candidate labels.
-
-The advantages of sampling candidates per-batch are simplicity and the
-possibility of efficient dense matrix multiplication. The disadvantage is that
-the sampled candidates must be chosen independently of the context and of the
-true labels.
-
-true_classes: A batch_size * num_true matrix, in which each row contains the
-  IDs of the num_true target_classes in the corresponding original label.
-sampled_candidates: A vector of length num_sampled, in which each element is
-  the ID of a sampled candidate.
-true_expected_count: A batch_size * num_true matrix, representing
-  the number of times each candidate is expected to occur in a batch
-  of sampled candidates. If unique=true, then this is a probability.
-sampled_expected_count: A vector of length num_sampled, for each sampled
-  candidate representing the number of times the candidate is expected
-  to occur in a batch of sampled candidates.  If unique=true, then this is a
-  probability.
-num_true: Number of true labels per context.
-num_sampled: Number of candidates to randomly sample.
-unique: If unique is true, we sample with rejection, so that all sampled
-  candidates in a batch are unique. This requires some approximation to
-  estimate the post-rejection sampling probabilities.
-range_max: The sampler will sample integers from the interval [0, range_max).
-seed: If either seed or seed2 are set to be non-zero, the random number
-  generator is seeded by the given seed.  Otherwise, it is seeded by a
-  random seed.
-seed2: An second seed to avoid seed collision.
-)doc");
+    .SetIsStateful();
 
 REGISTER_OP("ThreadUnsafeUnigramCandidateSampler")
     .Input("true_classes: int64")
@@ -203,42 +97,7 @@ REGISTER_OP("ThreadUnsafeUnigramCandidateSampler")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
     .SetShapeFn(CandidateSamplerShapeFn)
-    .SetIsStateful()
-    .Doc(R"doc(
-Generates labels for candidate sampling with a learned unigram distribution.
-
-See explanations of candidate sampling and the data formats at
-go/candidate-sampling.
-
-For each batch, this op picks a single set of sampled candidate labels.
-
-The advantages of sampling candidates per-batch are simplicity and the
-possibility of efficient dense matrix multiplication. The disadvantage is that
-the sampled candidates must be chosen independently of the context and of the
-true labels.
-
-true_classes: A batch_size * num_true matrix, in which each row contains the
-  IDs of the num_true target_classes in the corresponding original label.
-sampled_candidates: A vector of length num_sampled, in which each element is
-  the ID of a sampled candidate.
-true_expected_count: A batch_size * num_true matrix, representing
-  the number of times each candidate is expected to occur in a batch
-  of sampled candidates. If unique=true, then this is a probability.
-sampled_expected_count: A vector of length num_sampled, for each sampled
-  candidate representing the number of times the candidate is expected
-  to occur in a batch of sampled candidates.  If unique=true, then this is a
-  probability.
-num_true: Number of true labels per context.
-num_sampled: Number of candidates to randomly sample.
-unique: If unique is true, we sample with rejection, so that all sampled
-  candidates in a batch are unique. This requires some approximation to
-  estimate the post-rejection sampling probabilities.
-range_max: The sampler will sample integers from the interval [0, range_max).
-seed: If either seed or seed2 are set to be non-zero, the random number
-  generator is seeded by the given seed.  Otherwise, it is seeded by a
-  random seed.
-seed2: An second seed to avoid seed collision.
-)doc");
+    .SetIsStateful();
 
 REGISTER_OP("FixedUnigramCandidateSampler")
     .Input("true_classes: int64")
@@ -258,70 +117,7 @@ REGISTER_OP("FixedUnigramCandidateSampler")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
     .SetShapeFn(CandidateSamplerShapeFn)
-    .SetIsStateful()
-    .Doc(R"doc(
-Generates labels for candidate sampling with a learned unigram distribution.
-
-A unigram sampler could use a fixed unigram distribution read from a
-file or passed in as an in-memory array instead of building up the distribution
-from data on the fly. There is also an option to skew the distribution by
-applying a distortion power to the weights.
-
-The vocabulary file should be in CSV-like format, with the last field
-being the weight associated with the word.
-
-For each batch, this op picks a single set of sampled candidate labels.
-
-The advantages of sampling candidates per-batch are simplicity and the
-possibility of efficient dense matrix multiplication. The disadvantage is that
-the sampled candidates must be chosen independently of the context and of the
-true labels.
-
-true_classes: A batch_size * num_true matrix, in which each row contains the
-  IDs of the num_true target_classes in the corresponding original label.
-sampled_candidates: A vector of length num_sampled, in which each element is
-  the ID of a sampled candidate.
-true_expected_count: A batch_size * num_true matrix, representing
-  the number of times each candidate is expected to occur in a batch
-  of sampled candidates. If unique=true, then this is a probability.
-sampled_expected_count: A vector of length num_sampled, for each sampled
-  candidate representing the number of times the candidate is expected
-  to occur in a batch of sampled candidates.  If unique=true, then this is a
-  probability.
-num_true: Number of true labels per context.
-num_sampled: Number of candidates to randomly sample.
-unique: If unique is true, we sample with rejection, so that all sampled
-  candidates in a batch are unique. This requires some approximation to
-  estimate the post-rejection sampling probabilities.
-range_max: The sampler will sample integers from the interval [0, range_max).
-vocab_file: Each valid line in this file (which should have a CSV-like format)
-  corresponds to a valid word ID. IDs are in sequential order, starting from
-  num_reserved_ids. The last entry in each line is expected to be a value
-  corresponding to the count or relative probability. Exactly one of vocab_file
-  and unigrams needs to be passed to this op.
-distortion: The distortion is used to skew the unigram probability distribution.
-  Each weight is first raised to the distortion's power before adding to the
-  internal unigram distribution. As a result, distortion = 1.0 gives regular
-  unigram sampling (as defined by the vocab file), and distortion = 0.0 gives
-  a uniform distribution.
-num_reserved_ids: Optionally some reserved IDs can be added in the range [0,
-  ..., num_reserved_ids) by the users. One use case is that a special unknown
-  word token is used as ID 0. These IDs will have a sampling probability of 0.
-num_shards: A sampler can be used to sample from a subset of the original range
-  in order to speed up the whole computation through parallelism. This parameter
-  (together with 'shard') indicates the number of partitions that are being
-  used in the overall computation.
-shard: A sampler can be used to sample from a subset of the original range
-  in order to speed up the whole computation through parallelism. This parameter
-  (together with 'num_shards') indicates the particular partition number of a
-  sampler op, when partitioning is being used.
-unigrams: A list of unigram counts or probabilities, one per ID in sequential
-  order. Exactly one of vocab_file and unigrams should be passed to this op.
-seed: If either seed or seed2 are set to be non-zero, the random number
-  generator is seeded by the given seed.  Otherwise, it is seeded by a
-  random seed.
-seed2: An second seed to avoid seed collision.
-)doc");
+    .SetIsStateful();
 
 REGISTER_OP("AllCandidateSampler")
     .Input("true_classes: int64")
@@ -334,41 +130,7 @@ REGISTER_OP("AllCandidateSampler")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
     .SetShapeFn(CandidateSamplerShapeFn)
-    .SetIsStateful()
-    .Doc(R"doc(
-Generates labels for candidate sampling with a learned unigram distribution.
-
-See explanations of candidate sampling and the data formats at
-go/candidate-sampling.
-
-For each batch, this op picks a single set of sampled candidate labels.
-
-The advantages of sampling candidates per-batch are simplicity and the
-possibility of efficient dense matrix multiplication. The disadvantage is that
-the sampled candidates must be chosen independently of the context and of the
-true labels.
-
-true_classes: A batch_size * num_true matrix, in which each row contains the
-  IDs of the num_true target_classes in the corresponding original label.
-sampled_candidates: A vector of length num_sampled, in which each element is
-  the ID of a sampled candidate.
-true_expected_count: A batch_size * num_true matrix, representing
-  the number of times each candidate is expected to occur in a batch
-  of sampled candidates. If unique=true, then this is a probability.
-sampled_expected_count: A vector of length num_sampled, for each sampled
-  candidate representing the number of times the candidate is expected
-  to occur in a batch of sampled candidates.  If unique=true, then this is a
-  probability.
-num_true: Number of true labels per context.
-num_sampled: Number of candidates to produce.
-unique: If unique is true, we sample with rejection, so that all sampled
-  candidates in a batch are unique. This requires some approximation to
-  estimate the post-rejection sampling probabilities.
-seed: If either seed or seed2 are set to be non-zero, the random number
-  generator is seeded by the given seed.  Otherwise, it is seeded by a
-  random seed.
-seed2: An second seed to avoid seed collision.
-)doc");
+    .SetIsStateful();
 
 REGISTER_OP("ComputeAccidentalHits")
     .Input("true_classes: int64")
@@ -396,27 +158,6 @@ REGISTER_OP("ComputeAccidentalHits")
       c->set_output(1, v);
       c->set_output(2, v);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes the ids of the positions in sampled_candidates that match true_labels.
-
-When doing log-odds NCE, the result of this op should be passed through a
-SparseToDense op, then added to the logits of the sampled candidates. This has
-the effect of 'removing' the sampled labels that match the true labels by
-making the classifier sure that they are sampled labels.
-
-true_classes: The true_classes output of UnpackSparseLabels.
-sampled_candidates: The sampled_candidates output of CandidateSampler.
-indices: A vector of indices corresponding to rows of true_candidates.
-ids: A vector of IDs of positions in sampled_candidates that match a true_label
-  for the row with the corresponding index in indices.
-weights: A vector of the same length as indices and ids, in which each element
-  is -FLOAT_MAX.
-num_true: Number of true labels per context.
-seed: If either seed or seed2 are set to be non-zero, the random number
-  generator is seeded by the given seed.  Otherwise, it is seeded by a
-  random seed.
-seed2: An second seed to avoid seed collision.
-)doc");
+    });
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/checkpoint_ops.cc b/tensorflow/core/ops/checkpoint_ops.cc
index 08b00c8255c8e44cea9a2e0d4c97378ecc3bb998..5fe82e165313683b732d39e40266df2d31c71231 100644
--- a/tensorflow/core/ops/checkpoint_ops.cc
+++ b/tensorflow/core/ops/checkpoint_ops.cc
@@ -38,49 +38,7 @@ REGISTER_OP("GenerateVocabRemapping")
       c->set_output(0, c->Vector(num_new_vocab));
       c->set_output(1, c->Scalar());
       return Status::OK();
-    })
-    .Doc(R"doc(
-Given a path to new and old vocabulary files, returns a remapping Tensor of
-length `num_new_vocab`, where `remapping[i]` contains the row number in the old
-vocabulary that corresponds to row `i` in the new vocabulary (starting at line
-`new_vocab_offset` and up to `num_new_vocab` entities), or `-1` if entry `i`
-in the new vocabulary is not in the old vocabulary.  The old vocabulary is
-constrained to the first `old_vocab_size` entries if `old_vocab_size` is not the
-default value of -1.
-
-`num_vocab_offset` enables
-use in the partitioned variable case, and should generally be set through
-examining partitioning info.  The format of the files should be a text file,
-with each line containing a single entity within the vocabulary.
-
-For example, with `new_vocab_file` a text file containing each of the following
-elements on a single line: `[f0, f1, f2, f3]`, old_vocab_file = [f1, f0, f3],
-`num_new_vocab = 3, new_vocab_offset = 1`, the returned remapping would be
-`[0, -1, 2]`.
-
-The op also returns a count of how many entries in the new vocabulary
-were present in the old vocabulary, which is used to calculate the number of
-values to initialize in a weight matrix remapping
-
-This functionality can be used to remap both row vocabularies (typically,
-features) and column vocabularies (typically, classes) from TensorFlow
-checkpoints.  Note that the partitioning logic relies on contiguous vocabularies
-corresponding to div-partitioned variables.  Moreover, the underlying remapping
-uses an IndexTable (as opposed to an inexact CuckooTable), so client code should
-use the corresponding index_table_from_file() as the FeatureColumn framework
-does (as opposed to tf.feature_to_id(), which uses a CuckooTable).
-
-new_vocab_file: Path to the new vocab file.
-old_vocab_file: Path to the old vocab file.
-new_vocab_offset: How many entries into the new vocab file to start reading.
-num_new_vocab: Number of entries in the new vocab file to remap.
-old_vocab_size: Number of entries in the old vocab file to consider.  If -1,
-  use the entire old vocabulary.
-remapping: A Tensor of length num_new_vocab where the element at index i
-  is equal to the old ID that maps to the new ID i.  This element is -1 for any
-  new ID that is not found in the old vocabulary.
-num_present: Number of new vocab entries found in old vocab.
-)doc");
+    });
 
 REGISTER_OP("LoadAndRemapMatrix")
     .Input("ckpt_path: string")
@@ -109,63 +67,5 @@ REGISTER_OP("LoadAndRemapMatrix")
 
       c->set_output(0, c->Matrix(num_rows, num_cols));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Loads a 2-D (matrix) `Tensor` with name `old_tensor_name` from the checkpoint
-at `ckpt_path` and potentially reorders its rows and columns using the
-specified remappings.
-
-Most users should use one of the wrapper initializers (such as
-`tf.contrib.framework.load_and_remap_matrix_initializer`) instead of this
-function directly.
-
-The remappings are 1-D tensors with the following properties:
-
-* `row_remapping` must have exactly `num_rows` entries. Row `i` of the output
-  matrix will be initialized from the row corresponding to index
-  `row_remapping[i]` in the old `Tensor` from the checkpoint.
-* `col_remapping` must have either 0 entries (indicating that no column
-  reordering is needed) or `num_cols` entries. If specified, column `j` of the
-  output matrix will be initialized from the column corresponding to index
-  `col_remapping[j]` in the old `Tensor` from the checkpoint.
-* A value of -1 in either of the remappings signifies a "missing" entry. In that
-  case, values from the `initializing_values` tensor will be used to fill that
-  missing row or column. If `row_remapping` has `r` missing entries and
-  `col_remapping` has `c` missing entries, then the following condition must be
-  true:
-
-`(r * num_cols) + (c * num_rows) - (r * c) == len(initializing_values)`
-
-The remapping tensors can be generated using the GenerateVocabRemapping op.
-
-As an example, with row_remapping = [1, 0, -1], col_remapping = [0, 2, -1],
-initializing_values = [0.5, -0.5, 0.25, -0.25, 42], and w(i, j) representing
-the value from row i, column j of the old tensor in the checkpoint, the output
-matrix will look like the following:
-
-[[w(1, 0),  w(1, 2),  0.5],
- [w(0, 0),  w(0, 2), -0.5],
- [0.25,    -0.25,      42]]
-
-ckpt_path: Path to the TensorFlow checkpoint (version 2, `TensorBundle`) from
-  which the old matrix `Tensor` will be loaded.
-old_tensor_name: Name of the 2-D `Tensor` to load from checkpoint.
-row_remapping: An int `Tensor` of row remappings (generally created by
-  `generate_vocab_remapping`).  Even if no row remapping is needed, this must
-  still be an index-valued Tensor (e.g. [0, 1, 2, ...]), or a shifted
-  index-valued `Tensor` (e.g. [8, 9, 10, ...], for partitioned `Variables`).
-col_remapping: An int `Tensor` of column remappings (generally created by
-  `generate_vocab_remapping`).  May be a size-0 `Tensor` if only row remapping
-  is to be done (e.g. column ordering is the same).
-initializing_values: A float `Tensor` containing  values to fill in for cells
-  in the output matrix that are not loaded from the checkpoint. Length must be
-  exactly the same as the number of missing / new cells.
-num_rows: Number of rows (length of the 1st dimension) in the output matrix.
-num_cols: Number of columns (length of the 2nd dimension) in the output matrix.
-max_rows_in_memory: The maximum number of rows to load from the checkpoint at
-  once. If less than or equal to 0, the entire matrix will be loaded into
-  memory. Setting this arg trades increased disk reads for lower memory usage.
-output_matrix: Output matrix containing existing values loaded from the
-  checkpoint, and with any missing values filled in from initializing_values.
-)doc");
+    });
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/compat/op_compatibility_lib.cc b/tensorflow/core/ops/compat/op_compatibility_lib.cc
index 61243d2bd23b6407b539171d4c39a7792b9fae91..45017c9da5ef28828329989c00ff0409994a7ce5 100644
--- a/tensorflow/core/ops/compat/op_compatibility_lib.cc
+++ b/tensorflow/core/ops/compat/op_compatibility_lib.cc
@@ -146,6 +146,11 @@ Status OpCompatibilityLib::ValidateCompatible(Env* env, int* changed_ops,
               OpDefCompatible(in_op_history.op(i), op_list_.op(cur)));
         }
 
+        // Verify default value of attrs has not been added/removed/modified
+        // as compared to only the last historical version.
+        TF_RETURN_IF_ERROR(OpDefAttrDefaultsUnchanged(in_op_history.op(end - 1),
+                                                      op_list_.op(cur)));
+
         // Check that attrs missing from in_op_history.op(start) don't
         // change their defaults.
         if (start < end - 1) {
diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index c7a296d9381b5263617ae9cb014856f234733fd9..08b685319eaf725b01ab460903b82680c6bd247f 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -39,6 +39,79 @@ op {
     }
   }
 }
+op {
+  name: "Abs"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "AccumulateNV2"
+  input_arg {
+    name: "inputs"
+    type_attr: "T"
+    number_attr: "N"
+  }
+  output_arg {
+    name: "sum"
+    type_attr: "T"
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "shape"
+    type: "shape"
+  }
+  is_aggregate: true
+  is_commutative: true
+}
 op {
   name: "AccumulateNV2"
   input_arg {
@@ -77,6 +150,56 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "shape"
+    type: "shape"
+  }
+  is_aggregate: true
+  is_commutative: true
+}
+op {
+  name: "AccumulateNV2"
+  input_arg {
+    name: "inputs"
+    type_attr: "T"
+    number_attr: "N"
+  }
+  output_arg {
+    name: "sum"
+    type_attr: "T"
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -165,6 +288,88 @@ op {
     }
   }
 }
+op {
+  name: "AccumulatorApplyGradient"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "local_step"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "gradient"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "AccumulatorApplyGradient"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "local_step"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "gradient"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
 op {
   name: "AccumulatorNumAccumulated"
   input_arg {
@@ -267,6 +472,114 @@ op {
     }
   }
 }
+op {
+  name: "AccumulatorTakeGradient"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "num_required"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "average"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "AccumulatorTakeGradient"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "num_required"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "average"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "Acos"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
 op {
   name: "Acos"
   input_arg {
@@ -283,6 +596,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -317,6 +631,65 @@ op {
     }
   }
 }
+op {
+  name: "Acosh"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "Add"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_STRING
+      }
+    }
+  }
+}
 op {
   name: "Add"
   input_arg {
@@ -337,6 +710,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_UINT8
@@ -519,6 +893,98 @@ op {
   is_aggregate: true
   is_commutative: true
 }
+op {
+  name: "AddN"
+  input_arg {
+    name: "inputs"
+    type_attr: "T"
+    number_attr: "N"
+  }
+  output_arg {
+    name: "sum"
+    type_attr: "T"
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+        type: DT_VARIANT
+      }
+    }
+  }
+  is_aggregate: true
+  is_commutative: true
+}
+op {
+  name: "AddN"
+  input_arg {
+    name: "inputs"
+    type_attr: "T"
+    number_attr: "N"
+  }
+  output_arg {
+    name: "sum"
+    type_attr: "T"
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_VARIANT
+      }
+    }
+  }
+  is_aggregate: true
+  is_commutative: true
+}
 op {
   name: "AddSparseToTensorsMap"
   input_arg {
@@ -592,6 +1058,42 @@ op {
   is_aggregate: true
   is_commutative: true
 }
+op {
+  name: "AddV2"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  is_aggregate: true
+  is_commutative: true
+}
 op {
   name: "AdjustContrast"
   input_arg {
@@ -1023,7 +1525,7 @@ op {
   }
 }
 op {
-  name: "ApplyAdagrad"
+  name: "ApplyAdadelta"
   input_arg {
     name: "var"
     type_attr: "T"
@@ -1034,15 +1536,28 @@ op {
     type_attr: "T"
     is_ref: true
   }
+  input_arg {
+    name: "accum_update"
+    type_attr: "T"
+    is_ref: true
+  }
   input_arg {
     name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "grad"
+    name: "rho"
     type_attr: "T"
   }
-  output_arg {
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
     name: "out"
     type_attr: "T"
     is_ref: true
@@ -1066,6 +1581,9 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -1078,7 +1596,7 @@ op {
   }
 }
 op {
-  name: "ApplyAdagrad"
+  name: "ApplyAdadelta"
   input_arg {
     name: "var"
     type_attr: "T"
@@ -1089,10 +1607,23 @@ op {
     type_attr: "T"
     is_ref: true
   }
+  input_arg {
+    name: "accum_update"
+    type_attr: "T"
+    is_ref: true
+  }
   input_arg {
     name: "lr"
     type_attr: "T"
   }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
   input_arg {
     name: "grad"
     type_attr: "T"
@@ -1109,17 +1640,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -1135,42 +1667,25 @@ op {
   }
 }
 op {
-  name: "ApplyAdagradDA"
+  name: "ApplyAdagrad"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "gradient_accumulator"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "gradient_squared_accumulator"
+    name: "accum"
     type_attr: "T"
     is_ref: true
   }
-  input_arg {
-    name: "grad"
-    type_attr: "T"
-  }
   input_arg {
     name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "l1"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l2"
+    name: "grad"
     type_attr: "T"
   }
-  input_arg {
-    name: "global_step"
-    type: DT_INT64
-  }
   output_arg {
     name: "out"
     type_attr: "T"
@@ -1207,42 +1722,25 @@ op {
   }
 }
 op {
-  name: "ApplyAdagradDA"
+  name: "ApplyAdagrad"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "gradient_accumulator"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "gradient_squared_accumulator"
+    name: "accum"
     type_attr: "T"
     is_ref: true
   }
-  input_arg {
-    name: "grad"
-    type_attr: "T"
-  }
   input_arg {
     name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "l1"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l2"
+    name: "grad"
     type_attr: "T"
   }
-  input_arg {
-    name: "global_step"
-    type: DT_INT64
-  }
   output_arg {
     name: "out"
     type_attr: "T"
@@ -1281,44 +1779,77 @@ op {
   }
 }
 op {
-  name: "ApplyAdam"
+  name: "ApplyAdagrad"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "m"
+    name: "accum"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "v"
+    name: "lr"
     type_attr: "T"
-    is_ref: true
   }
   input_arg {
-    name: "beta1_power"
+    name: "grad"
     type_attr: "T"
   }
-  input_arg {
-    name: "beta2_power"
+  output_arg {
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
+}
+op {
+  name: "ApplyAdagrad"
   input_arg {
-    name: "beta1"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "beta2"
+    name: "accum"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "epsilon"
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
@@ -1337,18 +1868,21 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -1361,28 +1895,24 @@ op {
   }
 }
 op {
-  name: "ApplyAdam"
+  name: "ApplyAdagradDA"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "m"
+    name: "gradient_accumulator"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "v"
+    name: "gradient_squared_accumulator"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "beta1_power"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "beta2_power"
+    name: "grad"
     type_attr: "T"
   }
   input_arg {
@@ -1390,20 +1920,16 @@ op {
     type_attr: "T"
   }
   input_arg {
-    name: "beta1"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "beta2"
+    name: "l1"
     type_attr: "T"
   }
   input_arg {
-    name: "epsilon"
+    name: "l2"
     type_attr: "T"
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "global_step"
+    type: DT_INT64
   }
   output_arg {
     name: "out"
@@ -1439,37 +1965,26 @@ op {
       b: false
     }
   }
-  attr {
-    name: "use_nesterov"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "ApplyAdam"
+  name: "ApplyAdagradDA"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "m"
+    name: "gradient_accumulator"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "v"
+    name: "gradient_squared_accumulator"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "beta1_power"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "beta2_power"
+    name: "grad"
     type_attr: "T"
   }
   input_arg {
@@ -1477,20 +1992,16 @@ op {
     type_attr: "T"
   }
   input_arg {
-    name: "beta1"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "beta2"
+    name: "l1"
     type_attr: "T"
   }
   input_arg {
-    name: "epsilon"
+    name: "l2"
     type_attr: "T"
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "global_step"
+    type: DT_INT64
   }
   output_arg {
     name: "out"
@@ -1528,46 +2039,44 @@ op {
       b: false
     }
   }
-  attr {
-    name: "use_nesterov"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "ApplyAddSign"
+  name: "ApplyAdagradDA"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "m"
+    name: "gradient_accumulator"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "lr"
+    name: "gradient_squared_accumulator"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "alpha"
+    name: "grad"
     type_attr: "T"
   }
   input_arg {
-    name: "sign_decay"
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "beta"
+    name: "l1"
     type_attr: "T"
   }
   input_arg {
-    name: "grad"
+    name: "l2"
     type_attr: "T"
   }
+  input_arg {
+    name: "global_step"
+    type: DT_INT64
+  }
   output_arg {
     name: "out"
     type_attr: "T"
@@ -1594,6 +2103,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -1606,46 +2116,41 @@ op {
   }
 }
 op {
-  name: "ApplyCenteredRMSProp"
+  name: "ApplyAdagradDA"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "mg"
+    name: "gradient_accumulator"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "ms"
+    name: "gradient_squared_accumulator"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "mom"
+    name: "grad"
     type_attr: "T"
-    is_ref: true
   }
   input_arg {
     name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "rho"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "momentum"
+    name: "l1"
     type_attr: "T"
   }
   input_arg {
-    name: "epsilon"
+    name: "l2"
     type_attr: "T"
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "global_step"
+    type: DT_INT64
   }
   output_arg {
     name: "out"
@@ -1659,18 +2164,21 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -1683,37 +2191,40 @@ op {
   }
 }
 op {
-  name: "ApplyCenteredRMSProp"
+  name: "ApplyAdam"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "mg"
+    name: "m"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "ms"
+    name: "v"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "mom"
+    name: "beta1_power"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta2_power"
     type_attr: "T"
-    is_ref: true
   }
   input_arg {
     name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "rho"
+    name: "beta1"
     type_attr: "T"
   }
   input_arg {
-    name: "momentum"
+    name: "beta2"
     type_attr: "T"
   }
   input_arg {
@@ -1748,8 +2259,6 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
@@ -1762,24 +2271,28 @@ op {
   }
 }
 op {
-  name: "ApplyFtrl"
+  name: "ApplyAdam"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "accum"
+    name: "m"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "linear"
+    name: "v"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "grad"
+    name: "beta1_power"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta2_power"
     type_attr: "T"
   }
   input_arg {
@@ -1787,15 +2300,19 @@ op {
     type_attr: "T"
   }
   input_arg {
-    name: "l1"
+    name: "beta1"
     type_attr: "T"
   }
   input_arg {
-    name: "l2"
+    name: "beta2"
     type_attr: "T"
   }
   input_arg {
-    name: "lr_power"
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
@@ -1832,26 +2349,37 @@ op {
       b: false
     }
   }
+  attr {
+    name: "use_nesterov"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "ApplyFtrl"
+  name: "ApplyAdam"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "accum"
+    name: "m"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "linear"
+    name: "v"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "grad"
+    name: "beta1_power"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta2_power"
     type_attr: "T"
   }
   input_arg {
@@ -1859,15 +2387,19 @@ op {
     type_attr: "T"
   }
   input_arg {
-    name: "l1"
+    name: "beta1"
     type_attr: "T"
   }
   input_arg {
-    name: "l2"
+    name: "beta2"
     type_attr: "T"
   }
   input_arg {
-    name: "lr_power"
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
@@ -1906,26 +2438,37 @@ op {
       b: false
     }
   }
+  attr {
+    name: "use_nesterov"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "ApplyFtrlV2"
+  name: "ApplyAdam"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "accum"
+    name: "m"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "linear"
+    name: "v"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "grad"
+    name: "beta1_power"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta2_power"
     type_attr: "T"
   }
   input_arg {
@@ -1933,19 +2476,19 @@ op {
     type_attr: "T"
   }
   input_arg {
-    name: "l1"
+    name: "beta1"
     type_attr: "T"
   }
   input_arg {
-    name: "l2"
+    name: "beta2"
     type_attr: "T"
   }
   input_arg {
-    name: "l2_shrinkage"
+    name: "epsilon"
     type_attr: "T"
   }
   input_arg {
-    name: "lr_power"
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
@@ -1972,6 +2515,9 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -1982,26 +2528,37 @@ op {
       b: false
     }
   }
+  attr {
+    name: "use_nesterov"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "ApplyFtrlV2"
+  name: "ApplyAdam"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "accum"
+    name: "m"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "linear"
+    name: "v"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "grad"
+    name: "beta1_power"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta2_power"
     type_attr: "T"
   }
   input_arg {
@@ -2009,19 +2566,19 @@ op {
     type_attr: "T"
   }
   input_arg {
-    name: "l1"
+    name: "beta1"
     type_attr: "T"
   }
   input_arg {
-    name: "l2"
+    name: "beta2"
     type_attr: "T"
   }
   input_arg {
-    name: "l2_shrinkage"
+    name: "epsilon"
     type_attr: "T"
   }
   input_arg {
-    name: "lr_power"
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
@@ -2036,17 +2593,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -2060,70 +2618,44 @@ op {
       b: false
     }
   }
+  attr {
+    name: "use_nesterov"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "ApplyGradientDescent"
+  name: "ApplyAddSign"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "alpha"
+    name: "m"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "delta"
+    name: "lr"
     type_attr: "T"
   }
-  output_arg {
-    name: "out"
+  input_arg {
+    name: "alpha"
     type_attr: "T"
-    is_ref: true
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-      }
-    }
-  }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-}
-op {
-  name: "ApplyGradientDescent"
   input_arg {
-    name: "var"
+    name: "sign_decay"
     type_attr: "T"
-    is_ref: true
   }
   input_arg {
-    name: "alpha"
+    name: "beta"
     type_attr: "T"
   }
   input_arg {
-    name: "delta"
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
@@ -2164,14 +2696,14 @@ op {
   }
 }
 op {
-  name: "ApplyMomentum"
+  name: "ApplyAddSign"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "accum"
+    name: "m"
     type_attr: "T"
     is_ref: true
   }
@@ -2180,11 +2712,19 @@ op {
     type_attr: "T"
   }
   input_arg {
-    name: "grad"
+    name: "alpha"
     type_attr: "T"
   }
   input_arg {
-    name: "momentum"
+    name: "sign_decay"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
@@ -2211,6 +2751,9 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -2221,23 +2764,16 @@ op {
       b: false
     }
   }
-  attr {
-    name: "use_nesterov"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "ApplyMomentum"
+  name: "ApplyAddSign"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "accum"
+    name: "m"
     type_attr: "T"
     is_ref: true
   }
@@ -2246,11 +2782,19 @@ op {
     type_attr: "T"
   }
   input_arg {
-    name: "grad"
+    name: "alpha"
     type_attr: "T"
   }
   input_arg {
-    name: "momentum"
+    name: "sign_decay"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
@@ -2265,17 +2809,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -2289,23 +2834,26 @@ op {
       b: false
     }
   }
-  attr {
-    name: "use_nesterov"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "ApplyPowerSign"
+  name: "ApplyCenteredRMSProp"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "m"
+    name: "mg"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "ms"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "mom"
     type_attr: "T"
     is_ref: true
   }
@@ -2314,15 +2862,15 @@ op {
     type_attr: "T"
   }
   input_arg {
-    name: "logbase"
+    name: "rho"
     type_attr: "T"
   }
   input_arg {
-    name: "sign_decay"
+    name: "momentum"
     type_attr: "T"
   }
   input_arg {
-    name: "beta"
+    name: "epsilon"
     type_attr: "T"
   }
   input_arg {
@@ -2353,8 +2901,6 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
@@ -2367,14 +2913,24 @@ op {
   }
 }
 op {
-  name: "ApplyProximalAdagrad"
+  name: "ApplyCenteredRMSProp"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "accum"
+    name: "mg"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "ms"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "mom"
     type_attr: "T"
     is_ref: true
   }
@@ -2383,11 +2939,15 @@ op {
     type_attr: "T"
   }
   input_arg {
-    name: "l1"
+    name: "rho"
     type_attr: "T"
   }
   input_arg {
-    name: "l2"
+    name: "momentum"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
     type_attr: "T"
   }
   input_arg {
@@ -2418,6 +2978,8 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -2430,14 +2992,24 @@ op {
   }
 }
 op {
-  name: "ApplyProximalAdagrad"
+  name: "ApplyCenteredRMSProp"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "accum"
+    name: "mg"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "ms"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "mom"
     type_attr: "T"
     is_ref: true
   }
@@ -2446,11 +3018,15 @@ op {
     type_attr: "T"
   }
   input_arg {
-    name: "l1"
+    name: "rho"
     type_attr: "T"
   }
   input_arg {
-    name: "l2"
+    name: "momentum"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
     type_attr: "T"
   }
   input_arg {
@@ -2483,6 +3059,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -2495,26 +3072,45 @@ op {
   }
 }
 op {
-  name: "ApplyProximalGradientDescent"
+  name: "ApplyCenteredRMSProp"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "alpha"
+    name: "mg"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "l1"
+    name: "ms"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "l2"
+    name: "mom"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "delta"
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
@@ -2529,18 +3125,21 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -2553,14 +3152,28 @@ op {
   }
 }
 op {
-  name: "ApplyProximalGradientDescent"
+  name: "ApplyFtrl"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "alpha"
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "linear"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
@@ -2572,7 +3185,7 @@ op {
     type_attr: "T"
   }
   input_arg {
-    name: "delta"
+    name: "lr_power"
     type_attr: "T"
   }
   output_arg {
@@ -2599,8 +3212,6 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
@@ -2613,40 +3224,40 @@ op {
   }
 }
 op {
-  name: "ApplyRMSProp"
+  name: "ApplyFtrl"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "ms"
+    name: "accum"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "mom"
+    name: "linear"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "lr"
+    name: "grad"
     type_attr: "T"
   }
   input_arg {
-    name: "rho"
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "momentum"
+    name: "l1"
     type_attr: "T"
   }
   input_arg {
-    name: "epsilon"
+    name: "l2"
     type_attr: "T"
   }
   input_arg {
-    name: "grad"
+    name: "lr_power"
     type_attr: "T"
   }
   output_arg {
@@ -2673,6 +3284,8 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -2685,40 +3298,40 @@ op {
   }
 }
 op {
-  name: "ApplyRMSProp"
+  name: "ApplyFtrl"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "ms"
+    name: "accum"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "mom"
+    name: "linear"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "lr"
+    name: "grad"
     type_attr: "T"
   }
   input_arg {
-    name: "rho"
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "momentum"
+    name: "l1"
     type_attr: "T"
   }
   input_arg {
-    name: "epsilon"
+    name: "l2"
     type_attr: "T"
   }
   input_arg {
-    name: "grad"
+    name: "lr_power"
     type_attr: "T"
   }
   output_arg {
@@ -2747,6 +3360,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -2759,18 +3373,46 @@ op {
   }
 }
 op {
-  name: "ApproximateEqual"
+  name: "ApplyFtrl"
   input_arg {
-    name: "x"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "y"
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "linear"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
-    type: DT_BOOL
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -2779,43 +3421,77 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "tolerance"
-    type: "float"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      f: 1e-05
+      b: false
     }
   }
-  is_commutative: true
 }
 op {
-  name: "ApproximateEqual"
+  name: "ApplyFtrlV2"
   input_arg {
-    name: "x"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "y"
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "linear"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2_shrinkage"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
-    type: DT_BOOL
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -2836,33 +3512,62 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "tolerance"
-    type: "float"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      f: 1e-05
+      b: false
     }
   }
-  is_commutative: true
 }
 op {
-  name: "ArgMax"
+  name: "ApplyFtrlV2"
   input_arg {
-    name: "input"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "dimension"
-    type_attr: "Tidx"
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "linear"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2_shrinkage"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type: DT_INT64
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -2883,36 +3588,64 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "Tidx"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
+      b: false
     }
   }
 }
 op {
-  name: "ArgMax"
+  name: "ApplyFtrlV2"
   input_arg {
-    name: "input"
+    name: "var"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "accum"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "dimension"
-    type_attr: "Tidx"
+    name: "linear"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2_shrinkage"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type_attr: "output_type"
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -2933,49 +3666,118 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "Tidx"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      type: DT_INT32
+      b: false
     }
+  }
+}
+op {
+  name: "ApplyFtrlV2"
+  input_arg {
+    name: "var"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "linear"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2_shrinkage"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "out"
+    type_attr: "T"
+    is_ref: true
+  }
+  attr {
+    name: "T"
+    type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
         type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "output_type"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      type: DT_INT64
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
+      b: false
     }
   }
 }
 op {
-  name: "ArgMax"
+  name: "ApplyGradientDescent"
   input_arg {
-    name: "input"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "dimension"
-    type_attr: "Tidx"
+    name: "alpha"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "delta"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type_attr: "output_type"
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -2996,51 +3798,36 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
-  }
-  attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
       }
     }
   }
   attr {
-    name: "output_type"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      type: DT_INT64
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
+      b: false
     }
   }
 }
 op {
-  name: "ArgMin"
+  name: "ApplyGradientDescent"
   input_arg {
-    name: "input"
+    name: "var"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "alpha"
     type_attr: "T"
   }
   input_arg {
-    name: "dimension"
-    type_attr: "Tidx"
+    name: "delta"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type: DT_INT64
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -3061,36 +3848,38 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "Tidx"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
+      b: false
     }
   }
 }
 op {
-  name: "ArgMin"
+  name: "ApplyGradientDescent"
   input_arg {
-    name: "input"
+    name: "var"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "alpha"
     type_attr: "T"
   }
   input_arg {
-    name: "dimension"
-    type_attr: "Tidx"
+    name: "delta"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type_attr: "output_type"
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -3111,49 +3900,39 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "output_type"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      type: DT_INT64
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
+      b: false
     }
   }
 }
 op {
-  name: "ArgMin"
+  name: "ApplyGradientDescent"
   input_arg {
-    name: "input"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "dimension"
-    type_attr: "Tidx"
+    name: "alpha"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "delta"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type_attr: "output_type"
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -3162,17 +3941,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -3180,217 +3960,242 @@ op {
     }
   }
   attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "output_type"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      type: DT_INT64
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
+      b: false
     }
   }
 }
 op {
-  name: "AsString"
+  name: "ApplyMomentum"
   input_arg {
-    name: "input"
+    name: "var"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "momentum"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type: DT_STRING
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_COMPLEX64
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_BOOL
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
         type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "precision"
-    type: "int"
-    default_value {
-      i: -1
-    }
-  }
-  attr {
-    name: "scientific"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
     }
   }
   attr {
-    name: "shortest"
+    name: "use_nesterov"
     type: "bool"
     default_value {
       b: false
     }
   }
-  attr {
-    name: "width"
-    type: "int"
-    default_value {
-      i: -1
-    }
-  }
-  attr {
-    name: "fill"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
 }
 op {
-  name: "Asin"
+  name: "ApplyMomentum"
   input_arg {
-    name: "x"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
-  output_arg {
-    name: "y"
+  input_arg {
+    name: "accum"
     type_attr: "T"
+    is_ref: true
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-      }
-    }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-}
-op {
-  name: "Asinh"
   input_arg {
-    name: "x"
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "momentum"
     type_attr: "T"
   }
   output_arg {
-    name: "y"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-}
-op {
-  name: "Assert"
-  input_arg {
-    name: "condition"
-    type: DT_BOOL
-  }
-  input_arg {
-    name: "data"
-    type_list_attr: "T"
-  }
   attr {
-    name: "T"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
   attr {
-    name: "summarize"
-    type: "int"
+    name: "use_nesterov"
+    type: "bool"
     default_value {
-      i: 3
+      b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "Assign"
+  name: "ApplyMomentum"
   input_arg {
-    name: "ref"
+    name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "value"
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "momentum"
     type_attr: "T"
   }
   output_arg {
-    name: "output_ref"
+    name: "out"
     type_attr: "T"
     is_ref: true
   }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
   }
   attr {
-    name: "validate_shape"
+    name: "use_locking"
     type: "bool"
     default_value {
-      b: true
+      b: false
     }
   }
   attr {
-    name: "use_locking"
+    name: "use_nesterov"
     type: "bool"
     default_value {
-      b: true
+      b: false
     }
   }
-  allows_uninitialized_input: true
 }
 op {
-  name: "AssignAdd"
+  name: "ApplyMomentum"
   input_arg {
-    name: "ref"
+    name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "value"
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "momentum"
     type_attr: "T"
   }
   output_arg {
-    name: "output_ref"
+    name: "out"
     type_attr: "T"
     is_ref: true
   }
@@ -3401,18 +4206,21 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -3423,20 +4231,48 @@ op {
       b: false
     }
   }
+  attr {
+    name: "use_nesterov"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "AssignAdd"
+  name: "ApplyPowerSign"
   input_arg {
-    name: "ref"
+    name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "value"
+    name: "m"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "logbase"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sign_decay"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
-    name: "output_ref"
+    name: "out"
     type_attr: "T"
     is_ref: true
   }
@@ -3473,34 +4309,39 @@ op {
   }
 }
 op {
-  name: "AssignAddVariableOp"
+  name: "ApplyPowerSign"
   input_arg {
-    name: "resource"
-    type: DT_RESOURCE
+    name: "var"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "value"
-    type_attr: "dtype"
+    name: "m"
+    type_attr: "T"
+    is_ref: true
   }
-  attr {
-    name: "dtype"
-    type: "type"
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  is_stateful: true
-}
-op {
-  name: "AssignSub"
   input_arg {
-    name: "ref"
+    name: "logbase"
     type_attr: "T"
-    is_ref: true
   }
   input_arg {
-    name: "value"
+    name: "sign_decay"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
-    name: "output_ref"
+    name: "out"
     type_attr: "T"
     is_ref: true
   }
@@ -3523,6 +4364,9 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -3535,18 +4379,39 @@ op {
   }
 }
 op {
-  name: "AssignSub"
+  name: "ApplyPowerSign"
   input_arg {
-    name: "ref"
+    name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "value"
+    name: "m"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "logbase"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sign_decay"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
-    name: "output_ref"
+    name: "out"
     type_attr: "T"
     is_ref: true
   }
@@ -3557,17 +4422,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -3583,132 +4449,127 @@ op {
   }
 }
 op {
-  name: "AssignSubVariableOp"
+  name: "ApplyProximalAdagrad"
   input_arg {
-    name: "resource"
-    type: DT_RESOURCE
+    name: "var"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "value"
-    type_attr: "dtype"
-  }
-  attr {
-    name: "dtype"
-    type: "type"
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
   }
-  is_stateful: true
-}
-op {
-  name: "AssignVariableOp"
   input_arg {
-    name: "resource"
-    type: DT_RESOURCE
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "value"
-    type_attr: "dtype"
+    name: "l1"
+    type_attr: "T"
   }
-  attr {
-    name: "dtype"
-    type: "type"
+  input_arg {
+    name: "l2"
+    type_attr: "T"
   }
-  is_stateful: true
-}
-op {
-  name: "Atan"
   input_arg {
-    name: "x"
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
-    name: "y"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "Atan2"
+  name: "ApplyProximalAdagrad"
   input_arg {
-    name: "y"
+    name: "var"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "accum"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "x"
+    name: "lr"
     type_attr: "T"
   }
-  output_arg {
-    name: "z"
+  input_arg {
+    name: "l1"
     type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
   }
-}
-op {
-  name: "Atanh"
   input_arg {
-    name: "x"
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
-    name: "y"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-}
-op {
-  name: "AudioSpectrogram"
-  input_arg {
-    name: "input"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "spectrogram"
-    type: DT_FLOAT
-  }
-  attr {
-    name: "window_size"
-    type: "int"
-  }
-  attr {
-    name: "stride"
-    type: "int"
-  }
   attr {
-    name: "magnitude_squared"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
@@ -3716,108 +4577,37 @@ op {
   }
 }
 op {
-  name: "AudioSummary"
+  name: "ApplyProximalAdagrad"
   input_arg {
-    name: "tag"
-    type: DT_STRING
+    name: "var"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "tensor"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "summary"
-    type: DT_STRING
-  }
-  attr {
-    name: "sample_rate"
-    type: "float"
-  }
-  attr {
-    name: "max_outputs"
-    type: "int"
-    default_value {
-      i: 3
-    }
-    has_minimum: true
-    minimum: 1
-  }
-  deprecation {
-    version: 15
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
   }
-}
-op {
-  name: "AudioSummaryV2"
   input_arg {
-    name: "tag"
-    type: DT_STRING
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "tensor"
-    type: DT_FLOAT
+    name: "l1"
+    type_attr: "T"
   }
   input_arg {
-    name: "sample_rate"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "summary"
-    type: DT_STRING
-  }
-  attr {
-    name: "max_outputs"
-    type: "int"
-    default_value {
-      i: 3
-    }
-    has_minimum: true
-    minimum: 1
+    name: "l2"
+    type_attr: "T"
   }
-}
-op {
-  name: "AvgPool"
   input_arg {
-    name: "value"
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "out"
     type_attr: "T"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
-  }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
-    }
+    is_ref: true
   }
   attr {
     name: "T"
@@ -3825,56 +4615,65 @@ op {
     allowed_values {
       list {
         type: DT_FLOAT
-        type: DT_HALF
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "AvgPool"
+  name: "ApplyProximalAdagrad"
   input_arg {
-    name: "value"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "accum"
     type_attr: "T"
+    is_ref: true
   }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+  input_arg {
+    name: "l1"
+    type_attr: "T"
   }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
   }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
-    }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -3883,99 +4682,117 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "AvgPool"
+  name: "ApplyProximalGradientDescent"
   input_arg {
-    name: "value"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "alpha"
     type_attr: "T"
   }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+  input_arg {
+    name: "l1"
+    type_attr: "T"
   }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+  input_arg {
+    name: "l2"
+    type_attr: "T"
   }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+  input_arg {
+    name: "delta"
+    type_attr: "T"
   }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
-    }
+  output_arg {
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "AvgPool3D"
+  name: "ApplyProximalGradientDescent"
   input_arg {
-    name: "input"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "alpha"
     type_attr: "T"
   }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
+  input_arg {
+    name: "l1"
+    type_attr: "T"
   }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
+  input_arg {
+    name: "l2"
+    type_attr: "T"
   }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+  input_arg {
+    name: "delta"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -3984,54 +4801,58 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "AvgPool3D"
+  name: "ApplyProximalGradientDescent"
   input_arg {
-    name: "input"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "alpha"
     type_attr: "T"
   }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
+  input_arg {
+    name: "l1"
+    type_attr: "T"
   }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
+  input_arg {
+    name: "l2"
+    type_attr: "T"
   }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+  input_arg {
+    name: "delta"
+    type_attr: "T"
   }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NDHWC"
-    }
-    allowed_values {
-      list {
-        s: "NDHWC"
-        s: "NCDHW"
-      }
-    }
+  output_arg {
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -4040,45 +4861,59 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "AvgPool3DGrad"
+  name: "ApplyProximalGradientDescent"
   input_arg {
-    name: "orig_input_shape"
-    type: DT_INT32
+    name: "var"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "grad"
+    name: "alpha"
     type_attr: "T"
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "l1"
     type_attr: "T"
   }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
+  input_arg {
+    name: "l2"
+    type_attr: "T"
   }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
+  input_arg {
+    name: "delta"
+    type_attr: "T"
   }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+  output_arg {
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -4087,118 +4922,73 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-}
-op {
-  name: "AvgPool3DGrad"
-  input_arg {
-    name: "orig_input_shape"
-    type: DT_INT32
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
+}
+op {
+  name: "ApplyRMSProp"
   input_arg {
-    name: "grad"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "ms"
     type_attr: "T"
+    is_ref: true
   }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
+  input_arg {
+    name: "mom"
+    type_attr: "T"
+    is_ref: true
   }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NDHWC"
-    }
-    allowed_values {
-      list {
-        s: "NDHWC"
-        s: "NCDHW"
-      }
-    }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
   }
-}
-op {
-  name: "AvgPoolGrad"
   input_arg {
-    name: "orig_input_shape"
-    type: DT_INT32
+    name: "epsilon"
+    type_attr: "T"
   }
   input_arg {
     name: "grad"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "out"
     type_attr: "T"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
-  }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
-    }
+    is_ref: true
   }
   attr {
     name: "T"
@@ -4206,330 +4996,267 @@ op {
     allowed_values {
       list {
         type: DT_FLOAT
-        type: DT_HALF
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "AvgPoolGrad"
+  name: "ApplyRMSProp"
   input_arg {
-    name: "orig_input_shape"
-    type: DT_INT32
+    name: "var"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "grad"
+    name: "ms"
     type_attr: "T"
+    is_ref: true
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "mom"
     type_attr: "T"
+    is_ref: true
   }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
-    }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_HALF
-      }
-    }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
   }
-}
-op {
-  name: "AvgPoolGrad"
   input_arg {
-    name: "orig_input_shape"
-    type: DT_INT32
+    name: "epsilon"
+    type_attr: "T"
   }
   input_arg {
     name: "grad"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "out"
     type_attr: "T"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
-  }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
-    }
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-}
-op {
-  name: "Barrier"
-  output_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "shapes"
-    type: "list(shape)"
-    default_value {
-      list {
-      }
-    }
-    has_minimum: true
-  }
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: -1
-    }
-  }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
   attr {
-    name: "shared_name"
-    type: "string"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      s: ""
+      b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "BarrierClose"
+  name: "ApplyRMSProp"
   input_arg {
-    name: "handle"
-    type: DT_STRING
+    name: "var"
+    type_attr: "T"
     is_ref: true
   }
-  attr {
-    name: "cancel_pending_enqueues"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  input_arg {
+    name: "ms"
+    type_attr: "T"
+    is_ref: true
   }
-}
-op {
-  name: "BarrierIncompleteSize"
   input_arg {
-    name: "handle"
-    type: DT_STRING
+    name: "mom"
+    type_attr: "T"
     is_ref: true
   }
-  output_arg {
-    name: "size"
-    type: DT_INT32
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-}
-op {
-  name: "BarrierInsertMany"
   input_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
+    name: "rho"
+    type_attr: "T"
   }
   input_arg {
-    name: "keys"
-    type: DT_STRING
+    name: "momentum"
+    type_attr: "T"
   }
   input_arg {
-    name: "values"
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
   }
   attr {
-    name: "component_index"
-    type: "int"
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
 }
 op {
-  name: "BarrierReadySize"
+  name: "ApplyRMSProp"
   input_arg {
-    name: "handle"
-    type: DT_STRING
+    name: "var"
+    type_attr: "T"
     is_ref: true
   }
-  output_arg {
-    name: "size"
-    type: DT_INT32
-  }
-}
-op {
-  name: "BarrierTakeMany"
   input_arg {
-    name: "handle"
-    type: DT_STRING
+    name: "ms"
+    type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "num_elements"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "indices"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "keys"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "values"
-    type_list_attr: "component_types"
+    name: "mom"
+    type_attr: "T"
+    is_ref: true
   }
-  attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  attr {
-    name: "allow_small_batch"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
   }
-  attr {
-    name: "wait_for_incomplete"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
   }
-  attr {
-    name: "timeout_ms"
-    type: "int"
-    default_value {
-      i: -1
-    }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
   }
-}
-op {
-  name: "BatchCholesky"
   input_arg {
-    name: "input"
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
         type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-  deprecation {
-    version: 13
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
 }
 op {
-  name: "BatchCholeskyGrad"
+  name: "ApproximateEqual"
   input_arg {
-    name: "l"
+    name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "grad"
+    name: "y"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "z"
+    type: DT_BOOL
   }
   attr {
     name: "T"
@@ -4538,154 +5265,79 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
-  deprecation {
-    version: 13
-  }
-}
-op {
-  name: "BatchDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "batch_size"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "tolerance"
+    type: "float"
+    default_value {
+      f: 1e-05
+    }
   }
-  is_stateful: true
+  is_commutative: true
 }
 op {
-  name: "BatchDataset"
+  name: "ApproximateEqual"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "x"
+    type_attr: "T"
   }
   input_arg {
-    name: "batch_size"
-    type: DT_INT64
+    name: "y"
+    type_attr: "T"
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    name: "z"
+    type: DT_BOOL
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
-  }
-}
-op {
-  name: "BatchFFT"
-  input_arg {
-    name: "input"
-    type: DT_COMPLEX64
-  }
-  output_arg {
-    name: "output"
-    type: DT_COMPLEX64
-  }
-  deprecation {
-    version: 15
-  }
-}
-op {
-  name: "BatchFFT2D"
-  input_arg {
-    name: "input"
-    type: DT_COMPLEX64
-  }
-  output_arg {
-    name: "output"
-    type: DT_COMPLEX64
-  }
-  deprecation {
-    version: 15
-  }
-}
-op {
-  name: "BatchFFT3D"
-  input_arg {
-    name: "input"
-    type: DT_COMPLEX64
-  }
-  output_arg {
-    name: "output"
-    type: DT_COMPLEX64
-  }
-  deprecation {
-    version: 15
-  }
-}
-op {
-  name: "BatchIFFT"
-  input_arg {
-    name: "input"
-    type: DT_COMPLEX64
-  }
-  output_arg {
-    name: "output"
-    type: DT_COMPLEX64
-  }
-  deprecation {
-    version: 15
-  }
-}
-op {
-  name: "BatchIFFT2D"
-  input_arg {
-    name: "input"
-    type: DT_COMPLEX64
-  }
-  output_arg {
-    name: "output"
-    type: DT_COMPLEX64
-  }
-  deprecation {
-    version: 15
-  }
-}
-op {
-  name: "BatchIFFT3D"
-  input_arg {
-    name: "input"
-    type: DT_COMPLEX64
-  }
-  output_arg {
-    name: "output"
-    type: DT_COMPLEX64
-  }
-  deprecation {
-    version: 15
+    name: "tolerance"
+    type: "float"
+    default_value {
+      f: 1e-05
+    }
   }
+  is_commutative: true
 }
 op {
-  name: "BatchMatMul"
+  name: "ApproximateEqual"
   input_arg {
     name: "x"
     type_attr: "T"
@@ -4695,73 +5347,56 @@ op {
     type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "z"
+    type: DT_BOOL
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
         type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "adj_x"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "adj_y"
-    type: "bool"
+    name: "tolerance"
+    type: "float"
     default_value {
-      b: false
+      f: 1e-05
     }
   }
+  is_commutative: true
 }
 op {
-  name: "BatchMatrixBandPart"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "num_lower"
-    type: DT_INT64
-  }
+  name: "ApproximateEqual"
   input_arg {
-    name: "num_upper"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "band"
+    name: "x"
     type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
-  }
-  deprecation {
-    version: 14
-  }
-}
-op {
-  name: "BatchMatrixDeterminant"
   input_arg {
-    name: "input"
+    name: "y"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "z"
+    type: DT_BOOL
   }
   attr {
     name: "T"
@@ -4770,22 +5405,46 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-  deprecation {
-    version: 13
+  attr {
+    name: "tolerance"
+    type: "float"
+    default_value {
+      f: 1e-05
+    }
   }
+  is_commutative: true
 }
 op {
-  name: "BatchMatrixDeterminant"
+  name: "ArgMax"
   input_arg {
     name: "input"
     type_attr: "T"
   }
+  input_arg {
+    name: "dimension"
+    type_attr: "Tidx"
+  }
   output_arg {
     name: "output"
-    type_attr: "T"
+    type: DT_INT64
   }
   attr {
     name: "T"
@@ -4794,245 +5453,308 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
-  deprecation {
-    version: 13
-  }
-}
-op {
-  name: "BatchMatrixDiag"
-  input_arg {
-    name: "diagonal"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
   attr {
-    name: "T"
+    name: "Tidx"
     type: "type"
-  }
-  deprecation {
-    version: 14
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
 }
 op {
-  name: "BatchMatrixDiagPart"
+  name: "ArgMax"
   input_arg {
     name: "input"
     type_attr: "T"
   }
+  input_arg {
+    name: "dimension"
+    type_attr: "Tidx"
+  }
   output_arg {
-    name: "diagonal"
-    type_attr: "T"
+    name: "output"
+    type_attr: "output_type"
   }
   attr {
     name: "T"
     type: "type"
-  }
-  deprecation {
-    version: 14
-  }
-}
-op {
-  name: "BatchMatrixInverse"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
   }
   attr {
-    name: "adjoint"
-    type: "bool"
+    name: "Tidx"
+    type: "type"
     default_value {
-      b: false
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   attr {
-    name: "T"
+    name: "output_type"
     type: "type"
+    default_value {
+      type: DT_INT64
+    }
     allowed_values {
       list {
-        type: DT_DOUBLE
-        type: DT_FLOAT
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
-  deprecation {
-    version: 13
-  }
 }
 op {
-  name: "BatchMatrixSetDiag"
+  name: "ArgMax"
   input_arg {
     name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "diagonal"
-    type_attr: "T"
+    name: "dimension"
+    type_attr: "Tidx"
   }
   output_arg {
     name: "output"
-    type_attr: "T"
+    type_attr: "output_type"
   }
   attr {
     name: "T"
     type: "type"
-  }
-  deprecation {
-    version: 14
-  }
-}
-op {
-  name: "BatchMatrixSolve"
-  input_arg {
-    name: "matrix"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "rhs"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
   attr {
-    name: "adjoint"
-    type: "bool"
+    name: "Tidx"
+    type: "type"
     default_value {
-      b: false
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   attr {
-    name: "T"
+    name: "output_type"
     type: "type"
+    default_value {
+      type: DT_INT64
+    }
     allowed_values {
       list {
-        type: DT_DOUBLE
-        type: DT_FLOAT
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
-  deprecation {
-    version: 13
-  }
 }
 op {
-  name: "BatchMatrixSolveLs"
-  input_arg {
-    name: "matrix"
-    type_attr: "T"
-  }
+  name: "ArgMax"
   input_arg {
-    name: "rhs"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "l2_regularizer"
-    type: DT_DOUBLE
+    name: "dimension"
+    type_attr: "Tidx"
   }
   output_arg {
     name: "output"
-    type_attr: "T"
+    type_attr: "output_type"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
         type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "fast"
-    type: "bool"
+    name: "Tidx"
+    type: "type"
     default_value {
-      b: true
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
-  deprecation {
-    version: 13
+  attr {
+    name: "output_type"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
 }
 op {
-  name: "BatchMatrixTriangularSolve"
+  name: "ArgMax"
   input_arg {
-    name: "matrix"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "rhs"
-    type_attr: "T"
+    name: "dimension"
+    type_attr: "Tidx"
   }
   output_arg {
     name: "output"
-    type_attr: "T"
+    type_attr: "output_type"
   }
   attr {
-    name: "lower"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
   attr {
-    name: "adjoint"
-    type: "bool"
+    name: "Tidx"
+    type: "type"
     default_value {
-      b: false
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   attr {
-    name: "T"
+    name: "output_type"
     type: "type"
+    default_value {
+      type: DT_INT64
+    }
     allowed_values {
       list {
-        type: DT_DOUBLE
-        type: DT_FLOAT
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
-  deprecation {
-    version: 13
-  }
 }
 op {
-  name: "BatchNormWithGlobalNormalization"
-  input_arg {
-    name: "t"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "m"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "v"
-    type_attr: "T"
-  }
+  name: "ArgMin"
   input_arg {
-    name: "beta"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "gamma"
-    type_attr: "T"
+    name: "dimension"
+    type_attr: "Tidx"
   }
   output_arg {
-    name: "result"
-    type_attr: "T"
+    name: "output"
+    type: DT_INT64
   }
   attr {
     name: "T"
@@ -5057,42 +5779,32 @@ op {
     }
   }
   attr {
-    name: "variance_epsilon"
-    type: "float"
-  }
-  attr {
-    name: "scale_after_normalization"
-    type: "bool"
-  }
-  deprecation {
-    version: 9
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
 }
 op {
-  name: "BatchNormWithGlobalNormalization"
-  input_arg {
-    name: "t"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "m"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "v"
-    type_attr: "T"
-  }
+  name: "ArgMin"
   input_arg {
-    name: "beta"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "gamma"
-    type_attr: "T"
+    name: "dimension"
+    type_attr: "Tidx"
   }
   output_arg {
-    name: "result"
-    type_attr: "T"
+    name: "output"
+    type_attr: "output_type"
   }
   attr {
     name: "T"
@@ -5113,64 +5825,49 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "variance_epsilon"
-    type: "float"
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
   attr {
-    name: "scale_after_normalization"
-    type: "bool"
-  }
-  deprecation {
-    version: 9
+    name: "output_type"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
 }
 op {
-  name: "BatchNormWithGlobalNormalizationGrad"
-  input_arg {
-    name: "t"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "m"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "v"
-    type_attr: "T"
-  }
+  name: "ArgMin"
   input_arg {
-    name: "gamma"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "backprop"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "dx"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "dm"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "dv"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "db"
-    type_attr: "T"
+    name: "dimension"
+    type_attr: "Tidx"
   }
   output_arg {
-    name: "dg"
-    type_attr: "T"
+    name: "output"
+    type_attr: "output_type"
   }
   attr {
     name: "T"
@@ -5191,62 +5888,51 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "variance_epsilon"
-    type: "float"
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
   attr {
-    name: "scale_after_normalization"
-    type: "bool"
-  }
-  deprecation {
-    version: 9
+    name: "output_type"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
 }
 op {
-  name: "BatchNormWithGlobalNormalizationGrad"
-  input_arg {
-    name: "t"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "m"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "v"
-    type_attr: "T"
-  }
+  name: "ArgMin"
   input_arg {
-    name: "gamma"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "backprop"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "dx"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "dm"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "dv"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "db"
-    type_attr: "T"
+    name: "dimension"
+    type_attr: "Tidx"
   }
   output_arg {
-    name: "dg"
-    type_attr: "T"
+    name: "output"
+    type_attr: "output_type"
   }
   attr {
     name: "T"
@@ -5269,231 +5955,225 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "variance_epsilon"
-    type: "float"
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
   attr {
-    name: "scale_after_normalization"
-    type: "bool"
-  }
-  deprecation {
-    version: 9
+    name: "output_type"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
 }
 op {
-  name: "BatchSelfAdjointEig"
+  name: "ArgMin"
   input_arg {
     name: "input"
     type_attr: "T"
   }
+  input_arg {
+    name: "dimension"
+    type_attr: "Tidx"
+  }
   output_arg {
     name: "output"
-    type_attr: "T"
+    type_attr: "output_type"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
         type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-  deprecation {
-    version: 11
-  }
-}
-op {
-  name: "BatchSelfAdjointEigV2"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "e"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "v"
-    type_attr: "T"
-  }
   attr {
-    name: "compute_v"
-    type: "bool"
+    name: "Tidx"
+    type: "type"
     default_value {
-      b: true
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   attr {
-    name: "T"
+    name: "output_type"
     type: "type"
+    default_value {
+      type: DT_INT64
+    }
     allowed_values {
       list {
-        type: DT_DOUBLE
-        type: DT_FLOAT
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
-  deprecation {
-    version: 13
-  }
 }
 op {
-  name: "BatchSvd"
+  name: "AsString"
   input_arg {
     name: "input"
     type_attr: "T"
   }
   output_arg {
-    name: "s"
-    type_attr: "T"
+    name: "output"
+    type: DT_STRING
   }
-  output_arg {
-    name: "u"
-    type_attr: "T"
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_BOOL
+        type: DT_INT8
+      }
+    }
   }
-  output_arg {
-    name: "v"
-    type_attr: "T"
+  attr {
+    name: "precision"
+    type: "int"
+    default_value {
+      i: -1
+    }
   }
   attr {
-    name: "compute_uv"
+    name: "scientific"
     type: "bool"
     default_value {
-      b: true
+      b: false
     }
   }
   attr {
-    name: "full_matrices"
+    name: "shortest"
     type: "bool"
     default_value {
       b: false
     }
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_DOUBLE
-        type: DT_FLOAT
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-      }
+    name: "width"
+    type: "int"
+    default_value {
+      i: -1
     }
   }
-  deprecation {
-    version: 13
+  attr {
+    name: "fill"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
 }
 op {
-  name: "BatchToSpace"
+  name: "Asin"
   input_arg {
-    name: "input"
+    name: "x"
     type_attr: "T"
   }
-  input_arg {
-    name: "crops"
-    type_attr: "Tidx"
-  }
   output_arg {
-    name: "output"
+    name: "y"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "block_size"
-    type: "int"
-    has_minimum: true
-    minimum: 2
-  }
-  attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "BatchToSpaceND"
+  name: "Asin"
   input_arg {
-    name: "input"
+    name: "x"
     type_attr: "T"
   }
-  input_arg {
-    name: "block_shape"
-    type_attr: "Tblock_shape"
-  }
-  input_arg {
-    name: "crops"
-    type_attr: "Tcrops"
-  }
   output_arg {
-    name: "output"
+    name: "y"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "Tblock_shape"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "Tcrops"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "Betainc"
-  input_arg {
-    name: "a"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "b"
-    type_attr: "T"
-  }
+  name: "Asinh"
   input_arg {
     name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
+    name: "y"
     type_attr: "T"
   }
   attr {
@@ -5501,24 +6181,23 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "BiasAdd"
-  input_arg {
-    name: "value"
-    type_attr: "T"
-  }
+  name: "Asinh"
   input_arg {
-    name: "bias"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "y"
     type_attr: "T"
   }
   attr {
@@ -5526,98 +6205,92 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
       }
     }
   }
+}
+op {
+  name: "Assert"
+  input_arg {
+    name: "condition"
+    type: DT_BOOL
+  }
+  input_arg {
+    name: "data"
+    type_list_attr: "T"
+  }
   attr {
-    name: "data_format"
-    type: "string"
+    name: "T"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "summarize"
+    type: "int"
     default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
+      i: 3
     }
   }
+  is_stateful: true
 }
 op {
-  name: "BiasAdd"
+  name: "Assign"
   input_arg {
-    name: "value"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "bias"
+    name: "value"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
   }
   attr {
-    name: "data_format"
-    type: "string"
+    name: "validate_shape"
+    type: "bool"
     default_value {
-      s: "NHWC"
+      b: true
     }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: true
     }
   }
+  allows_uninitialized_input: true
 }
 op {
-  name: "BiasAddGrad"
+  name: "AssignAdd"
   input_arg {
-    name: "out_backprop"
+    name: "ref"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "value"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -5642,28 +6315,28 @@ op {
     }
   }
   attr {
-    name: "data_format"
-    type: "string"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
+      b: false
     }
   }
 }
 op {
-  name: "BiasAddGrad"
+  name: "AssignAdd"
   input_arg {
-    name: "out_backprop"
+    name: "ref"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "value"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -5690,32 +6363,28 @@ op {
     }
   }
   attr {
-    name: "data_format"
-    type: "string"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
+      b: false
     }
   }
 }
 op {
-  name: "BiasAddV1"
+  name: "AssignAdd"
   input_arg {
-    name: "value"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "bias"
+    name: "value"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -5736,23 +6405,35 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "BiasAddV1"
+  name: "AssignAdd"
   input_arg {
-    name: "value"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "bias"
+    name: "value"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -5761,64 +6442,63 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "Bincount"
-  input_arg {
-    name: "arr"
-    type: DT_INT32
-  }
+  name: "AssignAddVariableOp"
   input_arg {
-    name: "size"
-    type: DT_INT32
+    name: "resource"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "weights"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "bins"
-    type_attr: "T"
+    name: "value"
+    type_attr: "dtype"
   }
   attr {
-    name: "T"
+    name: "dtype"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
   }
+  is_stateful: true
 }
 op {
-  name: "Bitcast"
+  name: "AssignSub"
   input_arg {
-    name: "input"
+    name: "ref"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "value"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type_attr: "type"
+    name: "output_ref"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -5843,7 +6523,31 @@ op {
     }
   }
   attr {
-    name: "type"
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
+op {
+  name: "AssignSub"
+  input_arg {
+    name: "ref"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "value"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_ref"
+    type_attr: "T"
+    is_ref: true
+  }
+  attr {
+    name: "T"
     type: "type"
     allowed_values {
       list {
@@ -5861,19 +6565,34 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "Bitcast"
+  name: "AssignSub"
   input_arg {
-    name: "input"
+    name: "ref"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "value"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type_attr: "type"
+    name: "output_ref"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -5886,148 +6605,117 @@ op {
         type: DT_INT32
         type: DT_UINT8
         type: DT_UINT16
-        type: DT_INT8
         type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "type"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-        type: DT_HALF
-      }
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
 }
 op {
-  name: "BitwiseAnd"
+  name: "AssignSub"
   input_arg {
-    name: "x"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "y"
+    name: "value"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT8
-        type: DT_INT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
         type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-  is_commutative: true
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "BitwiseAnd"
+  name: "AssignSubVariableOp"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "resource"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "y"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "z"
-    type_attr: "T"
+    name: "value"
+    type_attr: "dtype"
   }
   attr {
-    name: "T"
+    name: "dtype"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
   }
-  is_commutative: true
+  is_stateful: true
 }
 op {
-  name: "BitwiseOr"
+  name: "AssignVariableOp"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "resource"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "y"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "z"
-    type_attr: "T"
+    name: "value"
+    type_attr: "dtype"
   }
   attr {
-    name: "T"
+    name: "dtype"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_UINT16
-      }
-    }
   }
-  is_commutative: true
+  is_stateful: true
 }
 op {
-  name: "BitwiseOr"
+  name: "Atan"
   input_arg {
     name: "x"
     type_attr: "T"
   }
-  input_arg {
-    name: "y"
-    type_attr: "T"
-  }
   output_arg {
-    name: "z"
+    name: "y"
     type_attr: "T"
   }
   attr {
@@ -6035,31 +6723,25 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_INT8
-        type: DT_INT16
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_UINT32
-        type: DT_UINT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
-  is_commutative: true
 }
 op {
-  name: "BitwiseXor"
+  name: "Atan"
   input_arg {
     name: "x"
     type_attr: "T"
   }
-  input_arg {
-    name: "y"
-    type_attr: "T"
-  }
   output_arg {
-    name: "z"
+    name: "y"
     type_attr: "T"
   }
   attr {
@@ -6067,25 +6749,26 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_INT8
-        type: DT_INT16
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_UINT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
-  is_commutative: true
 }
 op {
-  name: "BitwiseXor"
+  name: "Atan2"
   input_arg {
-    name: "x"
+    name: "y"
     type_attr: "T"
   }
   input_arg {
-    name: "y"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
@@ -6097,388 +6780,330 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_UINT32
-        type: DT_UINT64
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
-  is_commutative: true
 }
 op {
-  name: "BroadcastArgs"
+  name: "Atan2"
   input_arg {
-    name: "s0"
+    name: "y"
     type_attr: "T"
   }
   input_arg {
-    name: "s1"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "r0"
+    name: "z"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
 }
 op {
-  name: "BroadcastGradientArgs"
-  input_arg {
-    name: "s0"
-    type_attr: "T"
-  }
+  name: "Atanh"
   input_arg {
-    name: "s1"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "r0"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "r1"
+    name: "y"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "Bucketize"
+  name: "Atanh"
   input_arg {
-    name: "input"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type: DT_INT32
+    name: "y"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
-  attr {
-    name: "boundaries"
-    type: "list(float)"
-  }
 }
 op {
-  name: "BytesProducedStatsDataset"
+  name: "AudioSpectrogram"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "tag"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
-  }
-}
-op {
-  name: "CTCBeamSearchDecoder"
-  input_arg {
-    name: "inputs"
+    name: "input"
     type: DT_FLOAT
   }
-  input_arg {
-    name: "sequence_length"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "decoded_indices"
-    type: DT_INT64
-    number_attr: "top_paths"
-  }
-  output_arg {
-    name: "decoded_values"
-    type: DT_INT64
-    number_attr: "top_paths"
-  }
-  output_arg {
-    name: "decoded_shape"
-    type: DT_INT64
-    number_attr: "top_paths"
-  }
   output_arg {
-    name: "log_probability"
+    name: "spectrogram"
     type: DT_FLOAT
   }
   attr {
-    name: "beam_width"
+    name: "window_size"
     type: "int"
-    has_minimum: true
-    minimum: 1
   }
   attr {
-    name: "top_paths"
+    name: "stride"
     type: "int"
-    has_minimum: true
-    minimum: 1
   }
   attr {
-    name: "merge_repeated"
+    name: "magnitude_squared"
     type: "bool"
     default_value {
-      b: true
+      b: false
     }
   }
 }
 op {
-  name: "CTCGreedyDecoder"
+  name: "AudioSummary"
   input_arg {
-    name: "inputs"
-    type: DT_FLOAT
+    name: "tag"
+    type: DT_STRING
   }
   input_arg {
-    name: "sequence_length"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "decoded_indices"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "decoded_values"
-    type: DT_INT64
+    name: "tensor"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "decoded_shape"
-    type: DT_INT64
+    name: "summary"
+    type: DT_STRING
   }
-  output_arg {
-    name: "log_probability"
-    type: DT_FLOAT
+  attr {
+    name: "sample_rate"
+    type: "float"
   }
   attr {
-    name: "merge_repeated"
-    type: "bool"
+    name: "max_outputs"
+    type: "int"
     default_value {
-      b: false
+      i: 3
     }
+    has_minimum: true
+    minimum: 1
+  }
+  deprecation {
+    version: 15
   }
 }
 op {
-  name: "CTCLoss"
-  input_arg {
-    name: "inputs"
-    type: DT_FLOAT
-  }
+  name: "AudioSummaryV2"
   input_arg {
-    name: "labels_indices"
-    type: DT_INT64
+    name: "tag"
+    type: DT_STRING
   }
   input_arg {
-    name: "labels_values"
-    type: DT_INT32
+    name: "tensor"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "sequence_length"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "loss"
+    name: "sample_rate"
     type: DT_FLOAT
   }
   output_arg {
-    name: "gradient"
-    type: DT_FLOAT
-  }
-  attr {
-    name: "preprocess_collapse_repeated"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "summary"
+    type: DT_STRING
   }
   attr {
-    name: "ctc_merge_repeated"
-    type: "bool"
+    name: "max_outputs"
+    type: "int"
     default_value {
-      b: true
+      i: 3
     }
+    has_minimum: true
+    minimum: 1
   }
 }
 op {
-  name: "CTCLoss"
-  input_arg {
-    name: "inputs"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "labels_indices"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "labels_values"
-    type: DT_INT32
-  }
+  name: "AvgPool"
   input_arg {
-    name: "sequence_length"
-    type: DT_INT32
+    name: "value"
+    type_attr: "T"
   }
   output_arg {
-    name: "loss"
-    type: DT_FLOAT
+    name: "output"
+    type_attr: "T"
   }
-  output_arg {
-    name: "gradient"
-    type: DT_FLOAT
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
-    name: "preprocess_collapse_repeated"
-    type: "bool"
-    default_value {
-      b: false
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
     }
   }
   attr {
-    name: "ctc_merge_repeated"
-    type: "bool"
+    name: "data_format"
+    type: "string"
     default_value {
-      b: true
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
     }
   }
   attr {
-    name: "ignore_longer_outputs_than_inputs"
-    type: "bool"
-    default_value {
-      b: false
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_HALF
+        type: DT_DOUBLE
+      }
     }
   }
 }
 op {
-  name: "CacheDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
+  name: "AvgPool"
   input_arg {
-    name: "filename"
-    type: DT_STRING
+    name: "value"
+    type_attr: "T"
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
+    name: "ksize"
+    type: "list(int)"
     has_minimum: true
-    minimum: 1
+    minimum: 4
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
+    name: "strides"
+    type: "list(int)"
     has_minimum: true
-    minimum: 1
-  }
-  is_stateful: true
-}
-op {
-  name: "CacheDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "filename"
-    type: DT_STRING
+    minimum: 4
   }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_HALF
+      }
+    }
   }
 }
 op {
-  name: "Cast"
+  name: "AvgPool"
   input_arg {
-    name: "x"
-    type_attr: "SrcT"
+    name: "value"
+    type_attr: "T"
   }
   output_arg {
-    name: "y"
-    type_attr: "DstT"
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "SrcT"
-    type: "type"
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
-    name: "DstT"
-    type: "type"
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
-}
-op {
-  name: "Ceil"
-  input_arg {
-    name: "x"
-    type_attr: "T"
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
   }
-  output_arg {
-    name: "y"
-    type_attr: "T"
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
   }
   attr {
     name: "T"
@@ -6493,9 +7118,9 @@ op {
   }
 }
 op {
-  name: "CheckNumerics"
+  name: "AvgPool"
   input_arg {
-    name: "tensor"
+    name: "value"
     type_attr: "T"
   }
   output_arg {
@@ -6503,44 +7128,55 @@ op {
     type_attr: "T"
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
   attr {
-    name: "message"
+    name: "data_format"
     type: "string"
-  }
-}
-op {
-  name: "Cholesky"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
 }
 op {
-  name: "Cholesky"
+  name: "AvgPool3D"
   input_arg {
     name: "input"
     type_attr: "T"
@@ -6550,32 +7186,27 @@ op {
     type_attr: "T"
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_DOUBLE
-        type: DT_FLOAT
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
-}
-op {
-  name: "CholeskyGrad"
-  input_arg {
-    name: "l"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "grad"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
   attr {
     name: "T"
     type: "type"
@@ -6588,402 +7219,233 @@ op {
   }
 }
 op {
-  name: "CompareAndBitpack"
+  name: "AvgPool3D"
   input_arg {
     name: "input"
     type_attr: "T"
   }
-  input_arg {
-    name: "threshold"
-    type_attr: "T"
-  }
   output_arg {
     name: "output"
-    type: DT_UINT8
+    type_attr: "T"
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_BOOL
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
-}
-op {
-  name: "Complex"
-  input_arg {
-    name: "real"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "imag"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "out"
-    type_attr: "Tout"
-  }
   attr {
-    name: "T"
-    type: "type"
+    name: "data_format"
+    type: "string"
     default_value {
-      type: DT_FLOAT
+      s: "NDHWC"
     }
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        s: "NDHWC"
+        s: "NCDHW"
       }
     }
   }
   attr {
-    name: "Tout"
+    name: "T"
     type: "type"
-    default_value {
-      type: DT_COMPLEX64
-    }
     allowed_values {
       list {
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
 }
 op {
-  name: "ComplexAbs"
+  name: "AvgPool3D"
   input_arg {
-    name: "x"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
-    name: "y"
-    type_attr: "Tout"
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "T"
-    type: "type"
-    default_value {
-      type: DT_COMPLEX64
-    }
-    allowed_values {
-      list {
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-      }
-    }
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
   }
   attr {
-    name: "Tout"
-    type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
-}
-op {
-  name: "ComputeAccidentalHits"
-  input_arg {
-    name: "true_classes"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "sampled_candidates"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "indices"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "ids"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "weights"
-    type: DT_FLOAT
-  }
   attr {
-    name: "num_true"
-    type: "int"
-  }
-  attr {
-    name: "seed"
-    type: "int"
+    name: "data_format"
+    type: "string"
     default_value {
-      i: 0
+      s: "NDHWC"
+    }
+    allowed_values {
+      list {
+        s: "NDHWC"
+        s: "NCDHW"
+      }
     }
   }
   attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
     }
   }
 }
 op {
-  name: "Concat"
+  name: "AvgPool3DGrad"
   input_arg {
-    name: "concat_dim"
+    name: "orig_input_shape"
     type: DT_INT32
   }
   input_arg {
-    name: "values"
+    name: "grad"
     type_attr: "T"
-    number_attr: "N"
   }
   output_arg {
     name: "output"
     type_attr: "T"
   }
   attr {
-    name: "N"
-    type: "int"
+    name: "ksize"
+    type: "list(int)"
     has_minimum: true
-    minimum: 2
-  }
-  attr {
-    name: "T"
-    type: "type"
-  }
-}
-op {
-  name: "ConcatOffset"
-  input_arg {
-    name: "concat_dim"
-    type: DT_INT32
-  }
-  input_arg {
-    name: "shape"
-    type: DT_INT32
-    number_attr: "N"
-  }
-  output_arg {
-    name: "offset"
-    type: DT_INT32
-    number_attr: "N"
+    minimum: 5
   }
   attr {
-    name: "N"
-    type: "int"
+    name: "strides"
+    type: "list(int)"
     has_minimum: true
-    minimum: 2
-  }
-}
-op {
-  name: "ConcatV2"
-  input_arg {
-    name: "values"
-    type_attr: "T"
-    number_attr: "N"
-  }
-  input_arg {
-    name: "axis"
-    type_attr: "Tidx"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
+    minimum: 5
   }
   attr {
-    name: "N"
-    type: "int"
-    has_minimum: true
-    minimum: 2
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
 }
 op {
-  name: "ConcatenateDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "another_dataset"
-    type: DT_VARIANT
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
-  }
-  is_stateful: true
-}
-op {
-  name: "ConcatenateDataset"
+  name: "AvgPool3DGrad"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "orig_input_shape"
+    type: DT_INT32
   }
   input_arg {
-    name: "another_dataset"
-    type: DT_VARIANT
+    name: "grad"
+    type_attr: "T"
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
+    name: "ksize"
+    type: "list(int)"
     has_minimum: true
-    minimum: 1
+    minimum: 5
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
+    name: "strides"
+    type: "list(int)"
     has_minimum: true
-    minimum: 1
-  }
-}
-op {
-  name: "ConditionalAccumulator"
-  output_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
+    minimum: 5
   }
   attr {
-    name: "dtype"
-    type: "type"
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
   attr {
-    name: "shape"
-    type: "shape"
-  }
-  attr {
-    name: "container"
+    name: "data_format"
     type: "string"
     default_value {
-      s: ""
+      s: "NDHWC"
     }
-  }
-  attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
+    allowed_values {
+      list {
+        s: "NDHWC"
+        s: "NCDHW"
+      }
     }
   }
-  is_stateful: true
-}
-op {
-  name: "ConditionalAccumulator"
-  output_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
-  }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
     allowed_values {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
-  attr {
-    name: "shape"
-    type: "shape"
-  }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "Conj"
+  name: "AvgPool3DGrad"
   input_arg {
-    name: "input"
+    name: "orig_input_shape"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
@@ -6991,102 +7453,60 @@ op {
     type_attr: "T"
   }
   attr {
-    name: "T"
-    type: "type"
-    default_value {
-      type: DT_COMPLEX64
-    }
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
-}
-op {
-  name: "Conj"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
   attr {
-    name: "T"
-    type: "type"
+    name: "data_format"
+    type: "string"
     default_value {
-      type: DT_COMPLEX64
+      s: "NDHWC"
     }
     allowed_values {
       list {
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_VARIANT
+        s: "NDHWC"
+        s: "NCDHW"
       }
     }
   }
-}
-op {
-  name: "ConjugateTranspose"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "perm"
-    type_attr: "Tperm"
-  }
-  output_arg {
-    name: "y"
-    type_attr: "T"
-  }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "Tperm"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
 }
 op {
-  name: "Const"
-  output_arg {
-    name: "output"
-    type_attr: "dtype"
-  }
-  attr {
-    name: "value"
-    type: "tensor"
-  }
-  attr {
-    name: "dtype"
-    type: "type"
-  }
-}
-op {
-  name: "ControlTrigger"
-}
-op {
-  name: "Conv2D"
+  name: "AvgPoolGrad"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "orig_input_shape"
+    type: DT_INT32
   }
   input_arg {
-    name: "filter"
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
@@ -7094,25 +7514,16 @@ op {
     type_attr: "T"
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-      }
-    }
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
     name: "strides"
     type: "list(int)"
-  }
-  attr {
-    name: "use_cudnn_on_gpu"
-    type: "bool"
-    default_value {
-      b: true
-    }
+    has_minimum: true
+    minimum: 4
   }
   attr {
     name: "padding"
@@ -7137,19 +7548,26 @@ op {
       }
     }
   }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_HALF
+        type: DT_DOUBLE
+      }
+    }
+  }
 }
 op {
-  name: "Conv2DBackpropFilter"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
+  name: "AvgPoolGrad"
   input_arg {
-    name: "filter_sizes"
+    name: "orig_input_shape"
     type: DT_INT32
   }
   input_arg {
-    name: "out_backprop"
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
@@ -7157,25 +7575,16 @@ op {
     type_attr: "T"
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-      }
-    }
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
     name: "strides"
     type: "list(int)"
-  }
-  attr {
-    name: "use_cudnn_on_gpu"
-    type: "bool"
-    default_value {
-      b: true
-    }
+    has_minimum: true
+    minimum: 4
   }
   attr {
     name: "padding"
@@ -7200,19 +7609,26 @@ op {
       }
     }
   }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_HALF
+      }
+    }
+  }
 }
 op {
-  name: "Conv2DBackpropInput"
+  name: "AvgPoolGrad"
   input_arg {
-    name: "input_sizes"
+    name: "orig_input_shape"
     type: DT_INT32
   }
   input_arg {
-    name: "filter"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "out_backprop"
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
@@ -7220,25 +7636,16 @@ op {
     type_attr: "T"
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-      }
-    }
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
     name: "strides"
     type: "list(int)"
-  }
-  attr {
-    name: "use_cudnn_on_gpu"
-    type: "bool"
-    default_value {
-      b: true
-    }
+    has_minimum: true
+    minimum: 4
   }
   attr {
     name: "padding"
@@ -7263,56 +7670,26 @@ op {
       }
     }
   }
-}
-op {
-  name: "Conv3D"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "filter"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
-  }
 }
 op {
-  name: "Conv3D"
+  name: "AvgPoolGrad"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "orig_input_shape"
+    type: DT_INT32
   }
   input_arg {
-    name: "filter"
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
@@ -7320,20 +7697,16 @@ op {
     type_attr: "T"
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
     name: "strides"
     type: "list(int)"
     has_minimum: true
-    minimum: 5
+    minimum: 4
   }
   attr {
     name: "padding"
@@ -7349,181 +7722,193 @@ op {
     name: "data_format"
     type: "string"
     default_value {
-      s: "NDHWC"
+      s: "NHWC"
     }
     allowed_values {
       list {
-        s: "NDHWC"
-        s: "NCDHW"
+        s: "NHWC"
+        s: "NCHW"
       }
     }
   }
-}
-op {
-  name: "Conv3DBackpropFilter"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "filter"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "out_backprop"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
+}
+op {
+  name: "Barrier"
+  output_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
   attr {
-    name: "strides"
-    type: "list(int)"
+    name: "component_types"
+    type: "list(type)"
     has_minimum: true
-    minimum: 5
+    minimum: 1
   }
   attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
+    name: "shapes"
+    type: "list(shape)"
+    default_value {
       list {
-        s: "SAME"
-        s: "VALID"
       }
     }
+    has_minimum: true
   }
-  deprecation {
-    version: 10
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
+  is_stateful: true
 }
 op {
-  name: "Conv3DBackpropFilterV2"
+  name: "BarrierClose"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  attr {
+    name: "cancel_pending_enqueues"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
+}
+op {
+  name: "BarrierIncompleteSize"
   input_arg {
-    name: "filter_sizes"
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  output_arg {
+    name: "size"
     type: DT_INT32
   }
+}
+op {
+  name: "BarrierInsertMany"
   input_arg {
-    name: "out_backprop"
-    type_attr: "T"
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "keys"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "values"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
   }
   attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
+    name: "component_index"
+    type: "int"
   }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+}
+op {
+  name: "BarrierReadySize"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  output_arg {
+    name: "size"
+    type: DT_INT32
   }
 }
 op {
-  name: "Conv3DBackpropFilterV2"
+  name: "BarrierTakeMany"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
   }
   input_arg {
-    name: "filter_sizes"
+    name: "num_elements"
     type: DT_INT32
   }
-  input_arg {
-    name: "out_backprop"
-    type_attr: "T"
+  output_arg {
+    name: "indices"
+    type: DT_INT64
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "keys"
+    type: DT_STRING
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
+  output_arg {
+    name: "values"
+    type_list_attr: "component_types"
   }
   attr {
-    name: "strides"
-    type: "list(int)"
+    name: "component_types"
+    type: "list(type)"
     has_minimum: true
-    minimum: 5
+    minimum: 1
   }
   attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
+    name: "allow_small_batch"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
   attr {
-    name: "data_format"
-    type: "string"
+    name: "wait_for_incomplete"
+    type: "bool"
     default_value {
-      s: "NDHWC"
+      b: false
     }
-    allowed_values {
-      list {
-        s: "NDHWC"
-        s: "NCDHW"
-      }
+  }
+  attr {
+    name: "timeout_ms"
+    type: "int"
+    default_value {
+      i: -1
     }
   }
 }
 op {
-  name: "Conv3DBackpropInput"
+  name: "BatchCholesky"
   input_arg {
     name: "input"
     type_attr: "T"
   }
-  input_arg {
-    name: "filter"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "out_backprop"
-    type_attr: "T"
-  }
   output_arg {
     name: "output"
     type_attr: "T"
@@ -7533,43 +7918,23 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
         type: DT_DOUBLE
-      }
-    }
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_FLOAT
       }
     }
   }
   deprecation {
-    version: 10
+    version: 13
   }
 }
 op {
-  name: "Conv3DBackpropInputV2"
-  input_arg {
-    name: "input_sizes"
-    type: DT_INT32
-  }
+  name: "BatchCholeskyGrad"
   input_arg {
-    name: "filter"
+    name: "l"
     type_attr: "T"
   }
   input_arg {
-    name: "out_backprop"
+    name: "grad"
     type_attr: "T"
   }
   output_arg {
@@ -7586,223 +7951,163 @@ op {
       }
     }
   }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+  deprecation {
+    version: 13
   }
 }
 op {
-  name: "Conv3DBackpropInputV2"
-  input_arg {
-    name: "input_sizes"
-    type: DT_INT32
-  }
+  name: "BatchDataset"
   input_arg {
-    name: "filter"
-    type_attr: "T"
+    name: "input_dataset"
+    type: DT_VARIANT
   }
   input_arg {
-    name: "out_backprop"
-    type_attr: "T"
+    name: "batch_size"
+    type: DT_INT64
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
   attr {
-    name: "strides"
-    type: "list(int)"
+    name: "output_shapes"
+    type: "list(shape)"
     has_minimum: true
-    minimum: 5
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "BatchDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "batch_size"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
   attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NDHWC"
-    }
-    allowed_values {
-      list {
-        s: "NDHWC"
-        s: "NCDHW"
-      }
-    }
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
 }
 op {
-  name: "Copy"
+  name: "BatchFFT"
   input_arg {
     name: "input"
-    type_attr: "T"
+    type: DT_COMPLEX64
   }
   output_arg {
     name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "T"
-    type: "type"
+    type: DT_COMPLEX64
   }
-  attr {
-    name: "tensor_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  deprecation {
+    version: 15
   }
-  allows_uninitialized_input: true
 }
 op {
-  name: "Copy"
+  name: "BatchFFT2D"
   input_arg {
     name: "input"
-    type_attr: "T"
+    type: DT_COMPLEX64
   }
   output_arg {
     name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "T"
-    type: "type"
-  }
-  attr {
-    name: "tensor_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+    type: DT_COMPLEX64
   }
-  attr {
-    name: "debug_ops_spec"
-    type: "list(string)"
-    default_value {
-      list {
-      }
-    }
+  deprecation {
+    version: 15
   }
-  allows_uninitialized_input: true
 }
 op {
-  name: "CopyHost"
+  name: "BatchFFT3D"
   input_arg {
     name: "input"
-    type_attr: "T"
+    type: DT_COMPLEX64
   }
   output_arg {
     name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "T"
-    type: "type"
+    type: DT_COMPLEX64
   }
-  attr {
-    name: "tensor_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  deprecation {
+    version: 15
   }
-  allows_uninitialized_input: true
 }
 op {
-  name: "CopyHost"
+  name: "BatchIFFT"
   input_arg {
     name: "input"
-    type_attr: "T"
+    type: DT_COMPLEX64
   }
   output_arg {
     name: "output"
-    type_attr: "T"
+    type: DT_COMPLEX64
   }
-  attr {
-    name: "T"
-    type: "type"
+  deprecation {
+    version: 15
   }
-  attr {
-    name: "tensor_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+}
+op {
+  name: "BatchIFFT2D"
+  input_arg {
+    name: "input"
+    type: DT_COMPLEX64
   }
-  attr {
-    name: "debug_ops_spec"
-    type: "list(string)"
-    default_value {
-      list {
-      }
-    }
+  output_arg {
+    name: "output"
+    type: DT_COMPLEX64
+  }
+  deprecation {
+    version: 15
   }
-  allows_uninitialized_input: true
 }
 op {
-  name: "Cos"
+  name: "BatchIFFT3D"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "input"
+    type: DT_COMPLEX64
   }
   output_arg {
-    name: "y"
-    type_attr: "T"
+    name: "output"
+    type: DT_COMPLEX64
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-      }
-    }
+  deprecation {
+    version: 15
   }
 }
 op {
-  name: "Cosh"
+  name: "BatchMatMul"
   input_arg {
     name: "x"
     type_attr: "T"
   }
-  output_arg {
+  input_arg {
     name: "y"
     type_attr: "T"
   }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
   attr {
     name: "T"
     type: "type"
@@ -7811,275 +8116,223 @@ op {
         type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT32
         type: DT_COMPLEX64
         type: DT_COMPLEX128
       }
     }
   }
-}
-op {
-  name: "CountUpTo"
-  input_arg {
-    name: "ref"
-    type_attr: "T"
-    is_ref: true
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
   attr {
-    name: "limit"
-    type: "int"
+    name: "adj_x"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
+    name: "adj_y"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
 }
 op {
-  name: "CropAndResize"
+  name: "BatchMatMul"
   input_arg {
-    name: "image"
+    name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "boxes"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "box_ind"
-    type: DT_INT32
-  }
-  input_arg {
-    name: "crop_size"
-    type: DT_INT32
+    name: "y"
+    type_attr: "T"
   }
   output_arg {
-    name: "crops"
-    type: DT_FLOAT
+    name: "output"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
   attr {
-    name: "method"
-    type: "string"
+    name: "adj_x"
+    type: "bool"
     default_value {
-      s: "bilinear"
-    }
-    allowed_values {
-      list {
-        s: "bilinear"
-      }
+      b: false
     }
   }
   attr {
-    name: "extrapolation_value"
-    type: "float"
+    name: "adj_y"
+    type: "bool"
     default_value {
-      f: 0
+      b: false
     }
   }
 }
 op {
-  name: "CropAndResize"
+  name: "BatchMatrixBandPart"
   input_arg {
-    name: "image"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "boxes"
-    type: DT_FLOAT
+    name: "num_lower"
+    type: DT_INT64
   }
   input_arg {
-    name: "box_ind"
-    type: DT_INT32
+    name: "num_upper"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "band"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  deprecation {
+    version: 14
   }
+}
+op {
+  name: "BatchMatrixDeterminant"
   input_arg {
-    name: "crop_size"
-    type: DT_INT32
+    name: "input"
+    type_attr: "T"
   }
   output_arg {
-    name: "crops"
-    type: DT_FLOAT
+    name: "output"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  attr {
-    name: "method"
-    type: "string"
-    default_value {
-      s: "bilinear"
-    }
-    allowed_values {
-      list {
-        s: "bilinear"
-      }
-    }
-  }
-  attr {
-    name: "extrapolation_value"
-    type: "float"
-    default_value {
-      f: 0
-    }
+  deprecation {
+    version: 13
   }
 }
 op {
-  name: "CropAndResizeGradBoxes"
-  input_arg {
-    name: "grads"
-    type: DT_FLOAT
-  }
+  name: "BatchMatrixDeterminant"
   input_arg {
-    name: "image"
+    name: "input"
     type_attr: "T"
   }
-  input_arg {
-    name: "boxes"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "box_ind"
-    type: DT_INT32
-  }
   output_arg {
     name: "output"
-    type: DT_FLOAT
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
-  attr {
-    name: "method"
-    type: "string"
-    default_value {
-      s: "bilinear"
-    }
-    allowed_values {
-      list {
-        s: "bilinear"
-      }
-    }
+  deprecation {
+    version: 13
   }
 }
 op {
-  name: "CropAndResizeGradBoxes"
+  name: "BatchMatrixDiag"
   input_arg {
-    name: "grads"
-    type: DT_FLOAT
+    name: "diagonal"
+    type_attr: "T"
   }
-  input_arg {
-    name: "image"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
-  input_arg {
-    name: "boxes"
-    type: DT_FLOAT
+  attr {
+    name: "T"
+    type: "type"
+  }
+  deprecation {
+    version: 14
   }
+}
+op {
+  name: "BatchMatrixDiagPart"
   input_arg {
-    name: "box_ind"
-    type: DT_INT32
+    name: "input"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type: DT_FLOAT
+    name: "diagonal"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
+  }
+  deprecation {
+    version: 14
+  }
+}
+op {
+  name: "BatchMatrixInverse"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "method"
-    type: "string"
+    name: "adjoint"
+    type: "bool"
     default_value {
-      s: "bilinear"
+      b: false
     }
+  }
+  attr {
+    name: "T"
+    type: "type"
     allowed_values {
       list {
-        s: "bilinear"
+        type: DT_DOUBLE
+        type: DT_FLOAT
       }
     }
   }
+  deprecation {
+    version: 13
+  }
 }
 op {
-  name: "CropAndResizeGradImage"
-  input_arg {
-    name: "grads"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "boxes"
-    type: DT_FLOAT
-  }
+  name: "BatchMatrixSetDiag"
   input_arg {
-    name: "box_ind"
-    type: DT_INT32
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "image_size"
-    type: DT_INT32
+    name: "diagonal"
+    type_attr: "T"
   }
   output_arg {
     name: "output"
@@ -8088,71 +8341,62 @@ op {
   attr {
     name: "T"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_HALF
-        type: DT_DOUBLE
-      }
-    }
   }
-  attr {
-    name: "method"
-    type: "string"
-    default_value {
-      s: "bilinear"
-    }
-    allowed_values {
-      list {
-        s: "bilinear"
-      }
-    }
+  deprecation {
+    version: 14
   }
 }
 op {
-  name: "Cross"
+  name: "BatchMatrixSolve"
   input_arg {
-    name: "a"
+    name: "matrix"
     type_attr: "T"
   }
   input_arg {
-    name: "b"
+    name: "rhs"
     type_attr: "T"
   }
   output_arg {
-    name: "product"
+    name: "output"
     type_attr: "T"
   }
+  attr {
+    name: "adjoint"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
+        type: DT_FLOAT
       }
     }
   }
+  deprecation {
+    version: 13
+  }
 }
 op {
-  name: "Cross"
+  name: "BatchMatrixSolveLs"
   input_arg {
-    name: "a"
+    name: "matrix"
     type_attr: "T"
   }
   input_arg {
-    name: "b"
+    name: "rhs"
     type_attr: "T"
   }
+  input_arg {
+    name: "l2_regularizer"
+    type: DT_DOUBLE
+  }
   output_arg {
-    name: "product"
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -8160,44 +8404,45 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
+        type: DT_FLOAT
       }
     }
   }
+  attr {
+    name: "fast"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  deprecation {
+    version: 13
+  }
 }
 op {
-  name: "Cumprod"
+  name: "BatchMatrixTriangularSolve"
   input_arg {
-    name: "x"
+    name: "matrix"
     type_attr: "T"
   }
   input_arg {
-    name: "axis"
-    type_attr: "Tidx"
+    name: "rhs"
+    type_attr: "T"
   }
   output_arg {
-    name: "out"
+    name: "output"
     type_attr: "T"
   }
   attr {
-    name: "exclusive"
+    name: "lower"
     type: "bool"
     default_value {
-      b: false
+      b: true
     }
   }
   attr {
-    name: "reverse"
+    name: "adjoint"
     type: "bool"
     default_value {
       b: false
@@ -8208,64 +8453,40 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
+        type: DT_FLOAT
       }
     }
   }
-  attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+  deprecation {
+    version: 13
   }
 }
 op {
-  name: "Cumprod"
+  name: "BatchNormWithGlobalNormalization"
   input_arg {
-    name: "x"
+    name: "t"
     type_attr: "T"
   }
   input_arg {
-    name: "axis"
-    type_attr: "Tidx"
+    name: "m"
+    type_attr: "T"
   }
-  output_arg {
-    name: "out"
+  input_arg {
+    name: "v"
     type_attr: "T"
   }
-  attr {
-    name: "exclusive"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  input_arg {
+    name: "beta"
+    type_attr: "T"
   }
-  attr {
-    name: "reverse"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  input_arg {
+    name: "gamma"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "result"
+    type_attr: "T"
   }
   attr {
     name: "T"
@@ -8286,52 +8507,46 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+    name: "variance_epsilon"
+    type: "float"
+  }
+  attr {
+    name: "scale_after_normalization"
+    type: "bool"
+  }
+  deprecation {
+    version: 9
   }
 }
 op {
-  name: "Cumsum"
+  name: "BatchNormWithGlobalNormalization"
   input_arg {
-    name: "x"
+    name: "t"
     type_attr: "T"
   }
   input_arg {
-    name: "axis"
-    type_attr: "Tidx"
+    name: "m"
+    type_attr: "T"
   }
-  output_arg {
-    name: "out"
+  input_arg {
+    name: "v"
     type_attr: "T"
   }
-  attr {
-    name: "exclusive"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  input_arg {
+    name: "beta"
+    type_attr: "T"
   }
-  attr {
-    name: "reverse"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  input_arg {
+    name: "gamma"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "result"
+    type_attr: "T"
   }
   attr {
     name: "T"
@@ -8352,50 +8567,48 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+    name: "variance_epsilon"
+    type: "float"
+  }
+  attr {
+    name: "scale_after_normalization"
+    type: "bool"
+  }
+  deprecation {
+    version: 9
   }
 }
 op {
-  name: "Cumsum"
+  name: "BatchNormWithGlobalNormalization"
   input_arg {
-    name: "x"
+    name: "t"
     type_attr: "T"
   }
   input_arg {
-    name: "axis"
-    type_attr: "Tidx"
+    name: "m"
+    type_attr: "T"
   }
-  output_arg {
-    name: "out"
+  input_arg {
+    name: "v"
     type_attr: "T"
   }
-  attr {
-    name: "exclusive"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  input_arg {
+    name: "beta"
+    type_attr: "T"
   }
-  attr {
-    name: "reverse"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  input_arg {
+    name: "gamma"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "result"
+    type_attr: "T"
   }
   attr {
     name: "T"
@@ -8418,1169 +8631,1104 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "Tidx"
+    name: "variance_epsilon"
+    type: "float"
+  }
+  attr {
+    name: "scale_after_normalization"
+    type: "bool"
+  }
+  deprecation {
+    version: 9
+  }
+}
+op {
+  name: "BatchNormWithGlobalNormalization"
+  input_arg {
+    name: "t"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "m"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "v"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "gamma"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "result"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
     type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
         type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
+  attr {
+    name: "variance_epsilon"
+    type: "float"
+  }
+  attr {
+    name: "scale_after_normalization"
+    type: "bool"
+  }
+  deprecation {
+    version: 9
+  }
 }
 op {
-  name: "DatasetToSingleElement"
+  name: "BatchNormWithGlobalNormalizationGrad"
   input_arg {
-    name: "dataset"
-    type: DT_VARIANT
+    name: "t"
+    type_attr: "T"
   }
-  output_arg {
-    name: "components"
-    type_list_attr: "output_types"
+  input_arg {
+    name: "m"
+    type_attr: "T"
   }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "v"
+    type_attr: "T"
   }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "gamma"
+    type_attr: "T"
   }
-}
-op {
-  name: "DebugGradientIdentity"
   input_arg {
-    name: "input"
+    name: "backprop"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "dx"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "dm"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "dv"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "db"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "dg"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "variance_epsilon"
+    type: "float"
+  }
+  attr {
+    name: "scale_after_normalization"
+    type: "bool"
+  }
+  deprecation {
+    version: 9
   }
-  allows_uninitialized_input: true
 }
 op {
-  name: "DebugIdentity"
+  name: "BatchNormWithGlobalNormalizationGrad"
   input_arg {
-    name: "input"
+    name: "t"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "m"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "v"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "gamma"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "backprop"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "dx"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "dm"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "dv"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "db"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "dg"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
   attr {
-    name: "tensor_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+    name: "variance_epsilon"
+    type: "float"
   }
   attr {
-    name: "debug_urls"
-    type: "list(string)"
-    default_value {
-      list {
-      }
-    }
+    name: "scale_after_normalization"
+    type: "bool"
+  }
+  deprecation {
+    version: 9
   }
-  allows_uninitialized_input: true
 }
 op {
-  name: "DebugIdentity"
+  name: "BatchNormWithGlobalNormalizationGrad"
   input_arg {
-    name: "input"
+    name: "t"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "m"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "v"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "gamma"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "backprop"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "dx"
     type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
+  output_arg {
+    name: "dm"
+    type_attr: "T"
   }
-  attr {
-    name: "tensor_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  output_arg {
+    name: "dv"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "db"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "dg"
+    type_attr: "T"
   }
   attr {
-    name: "debug_urls"
-    type: "list(string)"
-    default_value {
+    name: "T"
+    type: "type"
+    allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "gated_grpc"
+    name: "variance_epsilon"
+    type: "float"
+  }
+  attr {
+    name: "scale_after_normalization"
     type: "bool"
-    default_value {
-      b: false
-    }
   }
-  allows_uninitialized_input: true
+  deprecation {
+    version: 9
+  }
 }
 op {
-  name: "DebugIdentity"
+  name: "BatchNormWithGlobalNormalizationGrad"
   input_arg {
-    name: "input"
+    name: "t"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "m"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "v"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "gamma"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "backprop"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "dx"
     type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
+  output_arg {
+    name: "dm"
+    type_attr: "T"
   }
-  attr {
-    name: "device_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  output_arg {
+    name: "dv"
+    type_attr: "T"
   }
-  attr {
-    name: "tensor_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  output_arg {
+    name: "db"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "dg"
+    type_attr: "T"
   }
   attr {
-    name: "debug_urls"
-    type: "list(string)"
-    default_value {
+    name: "T"
+    type: "type"
+    allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "gated_grpc"
+    name: "variance_epsilon"
+    type: "float"
+  }
+  attr {
+    name: "scale_after_normalization"
     type: "bool"
-    default_value {
-      b: false
-    }
   }
-  allows_uninitialized_input: true
+  deprecation {
+    version: 9
+  }
 }
 op {
-  name: "DebugNanCount"
+  name: "BatchSelfAdjointEig"
   input_arg {
     name: "input"
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    type: DT_INT64
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "tensor_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "debug_urls"
-    type: "list(string)"
-    default_value {
+    allowed_values {
       list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
       }
     }
   }
-  allows_uninitialized_input: true
+  deprecation {
+    version: 11
+  }
 }
 op {
-  name: "DebugNanCount"
+  name: "BatchSelfAdjointEigV2"
   input_arg {
     name: "input"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type: DT_INT64
+    name: "e"
+    type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
+  output_arg {
+    name: "v"
+    type_attr: "T"
   }
   attr {
-    name: "tensor_name"
-    type: "string"
+    name: "compute_v"
+    type: "bool"
     default_value {
-      s: ""
+      b: true
     }
   }
   attr {
-    name: "debug_urls"
-    type: "list(string)"
-    default_value {
+    name: "T"
+    type: "type"
+    allowed_values {
       list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
       }
     }
   }
-  attr {
-    name: "gated_grpc"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  deprecation {
+    version: 13
   }
-  allows_uninitialized_input: true
 }
 op {
-  name: "DebugNanCount"
+  name: "BatchSvd"
   input_arg {
     name: "input"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type: DT_INT64
+    name: "s"
+    type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
+  output_arg {
+    name: "u"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "v"
+    type_attr: "T"
   }
   attr {
-    name: "device_name"
-    type: "string"
+    name: "compute_uv"
+    type: "bool"
     default_value {
-      s: ""
+      b: true
     }
   }
   attr {
-    name: "tensor_name"
-    type: "string"
+    name: "full_matrices"
+    type: "bool"
     default_value {
-      s: ""
+      b: false
     }
   }
   attr {
-    name: "debug_urls"
-    type: "list(string)"
-    default_value {
+    name: "T"
+    type: "type"
+    allowed_values {
       list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
-  attr {
-    name: "gated_grpc"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  deprecation {
+    version: 13
   }
-  allows_uninitialized_input: true
 }
 op {
-  name: "DebugNumericSummary"
+  name: "BatchToSpace"
   input_arg {
     name: "input"
     type_attr: "T"
   }
+  input_arg {
+    name: "crops"
+    type_attr: "Tidx"
+  }
   output_arg {
     name: "output"
-    type: DT_DOUBLE
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
   }
   attr {
-    name: "tensor_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+    name: "block_size"
+    type: "int"
+    has_minimum: true
+    minimum: 2
   }
   attr {
-    name: "debug_urls"
-    type: "list(string)"
+    name: "Tidx"
+    type: "type"
     default_value {
+      type: DT_INT32
+    }
+    allowed_values {
       list {
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
-  allows_uninitialized_input: true
 }
 op {
-  name: "DebugNumericSummary"
+  name: "BatchToSpaceND"
   input_arg {
     name: "input"
     type_attr: "T"
   }
+  input_arg {
+    name: "block_shape"
+    type_attr: "Tblock_shape"
+  }
+  input_arg {
+    name: "crops"
+    type_attr: "Tcrops"
+  }
   output_arg {
     name: "output"
-    type: DT_DOUBLE
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
   }
   attr {
-    name: "tensor_name"
-    type: "string"
+    name: "Tblock_shape"
+    type: "type"
     default_value {
-      s: ""
+      type: DT_INT32
     }
-  }
-  attr {
-    name: "debug_urls"
-    type: "list(string)"
-    default_value {
+    allowed_values {
       list {
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
   attr {
-    name: "lower_bound"
-    type: "float"
-    default_value {
-      f: -inf
-    }
-  }
-  attr {
-    name: "upper_bound"
-    type: "float"
+    name: "Tcrops"
+    type: "type"
     default_value {
-      f: inf
+      type: DT_INT32
     }
-  }
-  attr {
-    name: "mute_if_healthy"
-    type: "bool"
-    default_value {
-      b: false
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
-  allows_uninitialized_input: true
 }
 op {
-  name: "DebugNumericSummary"
+  name: "Betainc"
   input_arg {
-    name: "input"
+    name: "a"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "b"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type: DT_DOUBLE
+    name: "z"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "tensor_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "debug_urls"
-    type: "list(string)"
-    default_value {
+    allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
-  attr {
-    name: "lower_bound"
-    type: "float"
-    default_value {
-      f: -inf
-    }
-  }
-  attr {
-    name: "upper_bound"
-    type: "float"
-    default_value {
-      f: inf
-    }
-  }
-  attr {
-    name: "mute_if_healthy"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "gated_grpc"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  allows_uninitialized_input: true
 }
 op {
-  name: "DebugNumericSummary"
+  name: "BiasAdd"
   input_arg {
-    name: "input"
+    name: "value"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "bias"
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    type: DT_DOUBLE
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "device_name"
-    type: "string"
-    default_value {
-      s: ""
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
     }
   }
   attr {
-    name: "tensor_name"
+    name: "data_format"
     type: "string"
     default_value {
-      s: ""
+      s: "NHWC"
     }
-  }
-  attr {
-    name: "debug_urls"
-    type: "list(string)"
-    default_value {
+    allowed_values {
       list {
+        s: "NHWC"
+        s: "NCHW"
       }
     }
   }
-  attr {
-    name: "lower_bound"
-    type: "float"
-    default_value {
-      f: -inf
-    }
-  }
-  attr {
-    name: "upper_bound"
-    type: "float"
-    default_value {
-      f: inf
-    }
-  }
-  attr {
-    name: "mute_if_healthy"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "gated_grpc"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  allows_uninitialized_input: true
 }
 op {
-  name: "DecodeAndCropJpeg"
+  name: "BiasAdd"
   input_arg {
-    name: "contents"
-    type: DT_STRING
+    name: "value"
+    type_attr: "T"
   }
   input_arg {
-    name: "crop_window"
-    type: DT_INT32
+    name: "bias"
+    type_attr: "T"
   }
   output_arg {
-    name: "image"
-    type: DT_UINT8
-  }
-  attr {
-    name: "channels"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "ratio"
-    type: "int"
-    default_value {
-      i: 1
-    }
-  }
-  attr {
-    name: "fancy_upscaling"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
-  attr {
-    name: "try_recover_truncated"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "acceptable_fraction"
-    type: "float"
-    default_value {
-      f: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
     }
   }
   attr {
-    name: "dct_method"
+    name: "data_format"
     type: "string"
     default_value {
-      s: ""
+      s: "NHWC"
     }
-  }
-}
-op {
-  name: "DecodeBase64"
-  input_arg {
-    name: "input"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "output"
-    type: DT_STRING
-  }
-}
-op {
-  name: "DecodeBmp"
-  input_arg {
-    name: "contents"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "image"
-    type: DT_UINT8
-  }
-  attr {
-    name: "channels"
-    type: "int"
-    default_value {
-      i: 0
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
     }
   }
 }
 op {
-  name: "DecodeCSV"
+  name: "BiasAdd"
   input_arg {
-    name: "records"
-    type: DT_STRING
+    name: "value"
+    type_attr: "T"
   }
   input_arg {
-    name: "record_defaults"
-    type_list_attr: "OUT_TYPE"
+    name: "bias"
+    type_attr: "T"
   }
   output_arg {
     name: "output"
-    type_list_attr: "OUT_TYPE"
+    type_attr: "T"
   }
   attr {
-    name: "OUT_TYPE"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
     allowed_values {
       list {
         type: DT_FLOAT
-        type: DT_INT32
+        type: DT_DOUBLE
         type: DT_INT64
-        type: DT_STRING
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "field_delim"
+    name: "data_format"
     type: "string"
     default_value {
-      s: ","
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
     }
   }
 }
 op {
-  name: "DecodeCSV"
+  name: "BiasAdd"
   input_arg {
-    name: "records"
-    type: DT_STRING
+    name: "value"
+    type_attr: "T"
   }
   input_arg {
-    name: "record_defaults"
-    type_list_attr: "OUT_TYPE"
+    name: "bias"
+    type_attr: "T"
   }
   output_arg {
     name: "output"
-    type_list_attr: "OUT_TYPE"
+    type_attr: "T"
   }
   attr {
-    name: "OUT_TYPE"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
     allowed_values {
       list {
         type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
         type: DT_INT64
-        type: DT_STRING
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "field_delim"
+    name: "data_format"
     type: "string"
     default_value {
-      s: ","
+      s: "NHWC"
     }
-  }
-  attr {
-    name: "use_quote_delim"
-    type: "bool"
-    default_value {
-      b: true
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
     }
   }
 }
 op {
-  name: "DecodeCSV"
-  input_arg {
-    name: "records"
-    type: DT_STRING
-  }
+  name: "BiasAddGrad"
   input_arg {
-    name: "record_defaults"
-    type_list_attr: "OUT_TYPE"
+    name: "out_backprop"
+    type_attr: "T"
   }
   output_arg {
     name: "output"
-    type_list_attr: "OUT_TYPE"
+    type_attr: "T"
   }
   attr {
-    name: "OUT_TYPE"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
     allowed_values {
       list {
         type: DT_FLOAT
-        type: DT_INT32
+        type: DT_DOUBLE
         type: DT_INT64
-        type: DT_STRING
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "field_delim"
+    name: "data_format"
     type: "string"
     default_value {
-      s: ","
-    }
-  }
-  attr {
-    name: "use_quote_delim"
-    type: "bool"
-    default_value {
-      b: true
+      s: "NHWC"
     }
-  }
-  attr {
-    name: "na_value"
-    type: "string"
-    default_value {
-      s: ""
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
     }
   }
 }
 op {
-  name: "DecodeCSV"
-  input_arg {
-    name: "records"
-    type: DT_STRING
-  }
+  name: "BiasAddGrad"
   input_arg {
-    name: "record_defaults"
-    type_list_attr: "OUT_TYPE"
+    name: "out_backprop"
+    type_attr: "T"
   }
   output_arg {
     name: "output"
-    type_list_attr: "OUT_TYPE"
+    type_attr: "T"
   }
   attr {
-    name: "OUT_TYPE"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
     allowed_values {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
-        type: DT_STRING
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "field_delim"
-    type: "string"
-    default_value {
-      s: ","
-    }
-  }
-  attr {
-    name: "use_quote_delim"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
-  attr {
-    name: "na_value"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-}
-op {
-  name: "DecodeGif"
-  input_arg {
-    name: "contents"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "image"
-    type: DT_UINT8
-  }
-}
-op {
-  name: "DecodeJSONExample"
-  input_arg {
-    name: "json_examples"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "binary_examples"
-    type: DT_STRING
-  }
-}
-op {
-  name: "DecodeJpeg"
-  input_arg {
-    name: "contents"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "image"
-    type: DT_UINT8
-  }
-  attr {
-    name: "channels"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "ratio"
-    type: "int"
-    default_value {
-      i: 1
-    }
-  }
-  attr {
-    name: "fancy_upscaling"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
-  attr {
-    name: "try_recover_truncated"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "acceptable_fraction"
-    type: "float"
-    default_value {
-      f: 1
-    }
-  }
-  attr {
-    name: "dct_method"
+    name: "data_format"
     type: "string"
     default_value {
-      s: ""
-    }
-  }
-}
-op {
-  name: "DecodePng"
-  input_arg {
-    name: "contents"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "image"
-    type_attr: "dtype"
-  }
-  attr {
-    name: "channels"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "dtype"
-    type: "type"
-    default_value {
-      type: DT_UINT8
+      s: "NHWC"
     }
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_UINT16
+        s: "NHWC"
+        s: "NCHW"
       }
     }
   }
 }
 op {
-  name: "DecodeRaw"
+  name: "BiasAddGrad"
   input_arg {
-    name: "bytes"
-    type: DT_STRING
+    name: "out_backprop"
+    type_attr: "T"
   }
   output_arg {
     name: "output"
-    type_attr: "out_type"
+    type_attr: "T"
   }
   attr {
-    name: "out_type"
+    name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
+        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "little_endian"
-    type: "bool"
+    name: "data_format"
+    type: "string"
     default_value {
-      b: true
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
     }
   }
 }
 op {
-  name: "DecodeRaw"
+  name: "BiasAddGrad"
   input_arg {
-    name: "bytes"
-    type: DT_STRING
+    name: "out_backprop"
+    type_attr: "T"
   }
   output_arg {
     name: "output"
-    type_attr: "out_type"
+    type_attr: "T"
   }
   attr {
-    name: "out_type"
+    name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_UINT16
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_COMPLEX64
         type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "little_endian"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
-}
-op {
-  name: "DecodeWav"
-  input_arg {
-    name: "contents"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "audio"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "sample_rate"
-    type: DT_INT32
-  }
-  attr {
-    name: "desired_channels"
-    type: "int"
+    name: "data_format"
+    type: "string"
     default_value {
-      i: -1
+      s: "NHWC"
     }
-  }
-  attr {
-    name: "desired_samples"
-    type: "int"
-    default_value {
-      i: -1
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
     }
   }
 }
 op {
-  name: "DeleteSessionTensor"
-  input_arg {
-    name: "handle"
-    type: DT_STRING
-  }
-}
-op {
-  name: "DenseToDenseSetOperation"
+  name: "BiasAddV1"
   input_arg {
-    name: "set1"
+    name: "value"
     type_attr: "T"
   }
   input_arg {
-    name: "set2"
+    name: "bias"
     type_attr: "T"
   }
   output_arg {
-    name: "result_indices"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "result_values"
+    name: "output"
     type_attr: "T"
   }
-  output_arg {
-    name: "result_shape"
-    type: DT_INT64
-  }
-  attr {
-    name: "set_operation"
-    type: "string"
-  }
-  attr {
-    name: "validate_indices"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
         type: DT_UINT16
-        type: DT_STRING
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
 }
 op {
-  name: "DenseToSparseBatchDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "batch_size"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "row_shape"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
-  }
-  is_stateful: true
-}
-op {
-  name: "DenseToSparseBatchDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "batch_size"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "row_shape"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
-  }
-}
-op {
-  name: "DenseToSparseSetOperation"
+  name: "BiasAddV1"
   input_arg {
-    name: "set1"
+    name: "value"
     type_attr: "T"
   }
   input_arg {
-    name: "set2_indices"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "set2_values"
+    name: "bias"
     type_attr: "T"
   }
-  input_arg {
-    name: "set2_shape"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "result_indices"
-    type: DT_INT64
-  }
   output_arg {
-    name: "result_values"
+    name: "output"
     type_attr: "T"
   }
-  output_arg {
-    name: "result_shape"
-    type: DT_INT64
-  }
-  attr {
-    name: "set_operation"
-    type: "string"
-  }
-  attr {
-    name: "validate_indices"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
         type: DT_UINT16
-        type: DT_STRING
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
 }
 op {
-  name: "DepthToSpace"
+  name: "BiasAddV1"
   input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
+    name: "value"
     type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
-  }
-  attr {
-    name: "block_size"
-    type: "int"
-    has_minimum: true
-    minimum: 2
-  }
-}
-op {
-  name: "DepthToSpace"
   input_arg {
-    name: "input"
+    name: "bias"
     type_attr: "T"
   }
   output_arg {
@@ -9590,36 +9738,37 @@ op {
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "block_size"
-    type: "int"
-    has_minimum: true
-    minimum: 2
-  }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
     allowed_values {
       list {
-        s: "NHWC"
-        s: "NCHW"
-        s: "NCHW_VECT_C"
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
 }
 op {
-  name: "DepthwiseConv2dNative"
+  name: "BiasAddV1"
   input_arg {
-    name: "input"
+    name: "value"
     type_attr: "T"
   }
   input_arg {
-    name: "filter"
+    name: "bias"
     type_attr: "T"
   }
   output_arg {
@@ -9633,36 +9782,41 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-      }
-    }
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
 }
 op {
-  name: "DepthwiseConv2dNative"
+  name: "Bincount"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "arr"
+    type: DT_INT32
   }
   input_arg {
-    name: "filter"
+    name: "size"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "weights"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "bins"
     type_attr: "T"
   }
   attr {
@@ -9670,56 +9824,23 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_INT32
+        type: DT_INT64
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  attr {
-    name: "strides"
-    type: "list(int)"
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
-  }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
-    }
-  }
 }
 op {
-  name: "DepthwiseConv2dNativeBackpropFilter"
+  name: "Bitcast"
   input_arg {
     name: "input"
     type_attr: "T"
   }
-  input_arg {
-    name: "filter_sizes"
-    type: DT_INT32
-  }
-  input_arg {
-    name: "out_backprop"
-    type_attr: "T"
-  }
   output_arg {
     name: "output"
-    type_attr: "T"
+    type_attr: "type"
   }
   attr {
     name: "T"
@@ -9728,41 +9849,53 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "strides"
-    type: "list(int)"
-  }
-  attr {
-    name: "padding"
-    type: "string"
+    name: "type"
+    type: "type"
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
 }
 op {
-  name: "DepthwiseConv2dNativeBackpropFilter"
+  name: "Bitcast"
   input_arg {
     name: "input"
     type_attr: "T"
   }
-  input_arg {
-    name: "filter_sizes"
-    type: DT_INT32
-  }
-  input_arg {
-    name: "out_backprop"
-    type_attr: "T"
-  }
   output_arg {
     name: "output"
-    type_attr: "T"
+    type_attr: "type"
   }
   attr {
     name: "T"
@@ -9771,96 +9904,121 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "strides"
-    type: "list(int)"
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
-  }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
+    name: "type"
+    type: "type"
     allowed_values {
       list {
-        s: "NHWC"
-        s: "NCHW"
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
 }
 op {
-  name: "DepthwiseConv2dNativeBackpropInput"
-  input_arg {
-    name: "input_sizes"
-    type: DT_INT32
-  }
-  input_arg {
-    name: "filter"
-    type_attr: "T"
-  }
+  name: "Bitcast"
   input_arg {
-    name: "out_backprop"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    type_attr: "T"
+    type_attr: "type"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "strides"
-    type: "list(int)"
-  }
-  attr {
-    name: "padding"
-    type: "string"
+    name: "type"
+    type: "type"
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
 }
 op {
-  name: "DepthwiseConv2dNativeBackpropInput"
-  input_arg {
-    name: "input_sizes"
-    type: DT_INT32
-  }
+  name: "BitwiseAnd"
   input_arg {
-    name: "filter"
+    name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "out_backprop"
+    name: "y"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "z"
     type_attr: "T"
   }
   attr {
@@ -9868,231 +10026,153 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
-  }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_UINT16
       }
     }
   }
+  is_commutative: true
 }
 op {
-  name: "Dequantize"
+  name: "BitwiseAnd"
   input_arg {
-    name: "input"
+    name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "min_range"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "max_range"
-    type: DT_FLOAT
+    name: "y"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type: DT_FLOAT
+    name: "z"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
-    }
-  }
-  attr {
-    name: "mode"
-    type: "string"
-    default_value {
-      s: "MIN_COMBINED"
-    }
-    allowed_values {
-      list {
-        s: "MIN_COMBINED"
-        s: "MIN_FIRST"
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
+  is_commutative: true
 }
 op {
-  name: "Dequantize"
+  name: "BitwiseOr"
   input_arg {
-    name: "input"
+    name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "min_range"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "max_range"
-    type: DT_FLOAT
+    name: "y"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type: DT_FLOAT
+    name: "z"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
-    }
-  }
-  attr {
-    name: "mode"
-    type: "string"
-    default_value {
-      s: "MIN_COMBINED"
-    }
-    allowed_values {
-      list {
-        s: "MIN_COMBINED"
-        s: "MIN_FIRST"
-        s: "SCALED"
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_UINT16
       }
     }
   }
+  is_commutative: true
 }
 op {
-  name: "DeserializeIterator"
-  input_arg {
-    name: "resource_handle"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "serialized"
-    type: DT_VARIANT
-  }
-  is_stateful: true
-}
-op {
-  name: "DeserializeManySparse"
+  name: "BitwiseOr"
   input_arg {
-    name: "serialized_sparse"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "sparse_indices"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "sparse_values"
-    type_attr: "dtype"
-  }
-  output_arg {
-    name: "sparse_shape"
-    type: DT_INT64
-  }
-  attr {
-    name: "dtype"
-    type: "type"
+    name: "x"
+    type_attr: "T"
   }
-}
-op {
-  name: "DeserializeSparse"
   input_arg {
-    name: "serialized_sparse"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "sparse_indices"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "sparse_values"
-    type_attr: "dtype"
+    name: "y"
+    type_attr: "T"
   }
   output_arg {
-    name: "sparse_shape"
-    type: DT_INT64
+    name: "z"
+    type_attr: "T"
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
+  is_commutative: true
 }
 op {
-  name: "DestroyResourceOp"
+  name: "BitwiseXor"
   input_arg {
-    name: "resource"
-    type: DT_RESOURCE
-  }
-  attr {
-    name: "ignore_lookup_error"
-    type: "bool"
-    default_value {
-      b: true
-    }
+    name: "x"
+    type_attr: "T"
   }
-  is_stateful: true
-}
-op {
-  name: "DestroyTemporaryVariable"
   input_arg {
-    name: "ref"
+    name: "y"
     type_attr: "T"
-    is_ref: true
   }
   output_arg {
-    name: "value"
+    name: "z"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_UINT16
+      }
+    }
   }
-  attr {
-    name: "var_name"
-    type: "string"
-  }
+  is_commutative: true
 }
 op {
-  name: "Diag"
+  name: "BitwiseXor"
   input_arg {
-    name: "diagonal"
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "z"
     type_attr: "T"
   }
   attr {
@@ -10100,307 +10180,387 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_INT8
+        type: DT_INT16
         type: DT_INT32
         type: DT_INT64
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
+  is_commutative: true
 }
 op {
-  name: "DiagPart"
+  name: "BroadcastArgs"
   input_arg {
-    name: "input"
+    name: "s0"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "s1"
     type_attr: "T"
   }
   output_arg {
-    name: "diagonal"
+    name: "r0"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
+    default_value {
+      type: DT_INT32
+    }
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "Digamma"
+  name: "BroadcastGradientArgs"
   input_arg {
-    name: "x"
+    name: "s0"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "s1"
     type_attr: "T"
   }
   output_arg {
-    name: "y"
+    name: "r0"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "r1"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
+    default_value {
+      type: DT_INT32
+    }
     allowed_values {
       list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "Dilation2D"
+  name: "Bucketize"
   input_arg {
     name: "input"
     type_attr: "T"
   }
-  input_arg {
-    name: "filter"
-    type_attr: "T"
-  }
   output_arg {
     name: "output"
-    type_attr: "T"
+    type: DT_INT32
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
+    name: "boundaries"
+    type: "list(float)"
+  }
+}
+op {
+  name: "BytesProducedStatsDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "tag"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
   attr {
-    name: "rates"
-    type: "list(int)"
+    name: "output_types"
+    type: "list(type)"
     has_minimum: true
-    minimum: 4
+    minimum: 1
   }
   attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
 }
 op {
-  name: "Dilation2D"
+  name: "CTCBeamSearchDecoder"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "filter"
-    type_attr: "T"
+    name: "sequence_length"
+    type: DT_INT32
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "decoded_indices"
+    type: DT_INT64
+    number_attr: "top_paths"
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
+  output_arg {
+    name: "decoded_values"
+    type: DT_INT64
+    number_attr: "top_paths"
+  }
+  output_arg {
+    name: "decoded_shape"
+    type: DT_INT64
+    number_attr: "top_paths"
+  }
+  output_arg {
+    name: "log_probability"
+    type: DT_FLOAT
   }
   attr {
-    name: "strides"
-    type: "list(int)"
+    name: "beam_width"
+    type: "int"
     has_minimum: true
-    minimum: 4
+    minimum: 1
   }
   attr {
-    name: "rates"
-    type: "list(int)"
+    name: "top_paths"
+    type: "int"
     has_minimum: true
-    minimum: 4
+    minimum: 1
   }
   attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
+    name: "merge_repeated"
+    type: "bool"
+    default_value {
+      b: true
     }
   }
 }
 op {
-  name: "Dilation2DBackpropFilter"
+  name: "CTCGreedyDecoder"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "filter"
-    type_attr: "T"
+    name: "sequence_length"
+    type: DT_INT32
   }
-  input_arg {
-    name: "out_backprop"
-    type_attr: "T"
+  output_arg {
+    name: "decoded_indices"
+    type: DT_INT64
   }
   output_arg {
-    name: "filter_backprop"
-    type_attr: "T"
+    name: "decoded_values"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "decoded_shape"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "log_probability"
+    type: DT_FLOAT
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-      }
+    name: "merge_repeated"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+}
+op {
+  name: "CTCLoss"
+  input_arg {
+    name: "inputs"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "labels_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "labels_values"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "sequence_length"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "loss"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "gradient"
+    type: DT_FLOAT
   }
   attr {
-    name: "rates"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "preprocess_collapse_repeated"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
   attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
+    name: "ctc_merge_repeated"
+    type: "bool"
+    default_value {
+      b: true
     }
   }
 }
 op {
-  name: "Dilation2DBackpropFilter"
+  name: "CTCLoss"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "filter"
-    type_attr: "T"
+    name: "labels_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "out_backprop"
-    type_attr: "T"
+    name: "labels_values"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "sequence_length"
+    type: DT_INT32
   }
   output_arg {
-    name: "filter_backprop"
-    type_attr: "T"
+    name: "loss"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "gradient"
+    type: DT_FLOAT
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
+    name: "preprocess_collapse_repeated"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
   attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "ctc_merge_repeated"
+    type: "bool"
+    default_value {
+      b: true
+    }
   }
   attr {
-    name: "rates"
-    type: "list(int)"
+    name: "ignore_longer_outputs_than_inputs"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
+op {
+  name: "CacheDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "filename"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
     has_minimum: true
-    minimum: 4
+    minimum: 1
   }
   attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
+  is_stateful: true
 }
 op {
-  name: "Dilation2DBackpropInput"
+  name: "CacheDataset"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "input_dataset"
+    type: DT_VARIANT
   }
   input_arg {
-    name: "filter"
-    type_attr: "T"
+    name: "filename"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
+}
+op {
+  name: "Cast"
   input_arg {
-    name: "out_backprop"
+    name: "x"
+    type_attr: "SrcT"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "DstT"
+  }
+  attr {
+    name: "SrcT"
+    type: "type"
+  }
+  attr {
+    name: "DstT"
+    type: "type"
+  }
+}
+op {
+  name: "Ceil"
+  input_arg {
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "in_backprop"
+    name: "y"
     type_attr: "T"
   }
   attr {
@@ -10408,57 +10568,44 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
       }
     }
   }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+}
+op {
+  name: "Ceil"
+  input_arg {
+    name: "x"
+    type_attr: "T"
   }
-  attr {
-    name: "rates"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+  output_arg {
+    name: "y"
+    type_attr: "T"
   }
   attr {
-    name: "padding"
-    type: "string"
+    name: "T"
+    type: "type"
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
 }
 op {
-  name: "Dilation2DBackpropInput"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "filter"
-    type_attr: "T"
-  }
+  name: "CheckNumerics"
   input_arg {
-    name: "out_backprop"
+    name: "tensor"
     type_attr: "T"
   }
   output_arg {
-    name: "in_backprop"
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -10466,55 +10613,25 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "rates"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "padding"
+    name: "message"
     type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
   }
 }
 op {
-  name: "Div"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
+  name: "CheckNumerics"
   input_arg {
-    name: "y"
+    name: "tensor"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -10523,30 +10640,23 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_UINT8
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
       }
     }
   }
+  attr {
+    name: "message"
+    type: "string"
+  }
 }
 op {
-  name: "DrawBoundingBoxes"
+  name: "Cholesky"
   input_arg {
-    name: "images"
+    name: "input"
     type_attr: "T"
   }
-  input_arg {
-    name: "boxes"
-    type: DT_FLOAT
-  }
   output_arg {
     name: "output"
     type_attr: "T"
@@ -10554,120 +10664,49 @@ op {
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
     allowed_values {
       list {
+        type: DT_DOUBLE
         type: DT_FLOAT
-        type: DT_HALF
       }
     }
   }
 }
 op {
-  name: "DynamicPartition"
+  name: "Cholesky"
   input_arg {
-    name: "data"
+    name: "input"
     type_attr: "T"
   }
-  input_arg {
-    name: "partitions"
-    type: DT_INT32
-  }
   output_arg {
-    name: "outputs"
+    name: "output"
     type_attr: "T"
-    number_attr: "num_partitions"
-  }
-  attr {
-    name: "num_partitions"
-    type: "int"
-    has_minimum: true
-    minimum: 1
   }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
   }
 }
 op {
-  name: "DynamicStitch"
+  name: "CholeskyGrad"
   input_arg {
-    name: "indices"
-    type: DT_INT32
-    number_attr: "N"
+    name: "l"
+    type_attr: "T"
   }
   input_arg {
-    name: "data"
+    name: "grad"
     type_attr: "T"
-    number_attr: "N"
   }
   output_arg {
-    name: "merged"
-    type_attr: "T"
-  }
-  attr {
-    name: "N"
-    type: "int"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "T"
-    type: "type"
-  }
-}
-op {
-  name: "EditDistance"
-  input_arg {
-    name: "hypothesis_indices"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "hypothesis_values"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "hypothesis_shape"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "truth_indices"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "truth_values"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "truth_shape"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "output"
-    type: DT_FLOAT
-  }
-  attr {
-    name: "normalize"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
-  attr {
-    name: "T"
-    type: "type"
-  }
-}
-op {
-  name: "Elu"
-  input_arg {
-    name: "features"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "activations"
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -10677,441 +10716,525 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_HALF
       }
     }
   }
 }
 op {
-  name: "Elu"
+  name: "CompareAndBitpack"
   input_arg {
-    name: "features"
+    name: "input"
     type_attr: "T"
   }
-  output_arg {
-    name: "activations"
+  input_arg {
+    name: "threshold"
     type_attr: "T"
   }
+  output_arg {
+    name: "output"
+    type: DT_UINT8
+  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_BOOL
         type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "EluGrad"
+  name: "Complex"
   input_arg {
-    name: "gradients"
+    name: "real"
     type_attr: "T"
   }
   input_arg {
-    name: "outputs"
+    name: "imag"
     type_attr: "T"
   }
   output_arg {
-    name: "backprops"
-    type_attr: "T"
+    name: "out"
+    type_attr: "Tout"
   }
   attr {
     name: "T"
     type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
     allowed_values {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_HALF
       }
     }
   }
-}
-op {
-  name: "EluGrad"
-  input_arg {
-    name: "gradients"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "outputs"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "backprops"
-    type_attr: "T"
-  }
   attr {
-    name: "T"
+    name: "Tout"
     type: "type"
+    default_value {
+      type: DT_COMPLEX64
+    }
     allowed_values {
       list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "EncodeBase64"
+  name: "ComplexAbs"
   input_arg {
-    name: "input"
-    type: DT_STRING
+    name: "x"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type: DT_STRING
+    name: "y"
+    type_attr: "Tout"
   }
   attr {
-    name: "pad"
-    type: "bool"
+    name: "T"
+    type: "type"
     default_value {
-      b: false
+      type: DT_COMPLEX64
+    }
+    allowed_values {
+      list {
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
     }
-  }
-}
-op {
-  name: "EncodeJpeg"
-  input_arg {
-    name: "image"
-    type: DT_UINT8
-  }
-  output_arg {
-    name: "contents"
-    type: DT_STRING
   }
   attr {
-    name: "format"
-    type: "string"
+    name: "Tout"
+    type: "type"
     default_value {
-      s: ""
+      type: DT_FLOAT
     }
     allowed_values {
       list {
-        s: ""
-        s: "grayscale"
-        s: "rgb"
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
-  attr {
-    name: "quality"
-    type: "int"
-    default_value {
-      i: 95
-    }
+}
+op {
+  name: "ComputeAccidentalHits"
+  input_arg {
+    name: "true_classes"
+    type: DT_INT64
   }
-  attr {
-    name: "progressive"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  input_arg {
+    name: "sampled_candidates"
+    type: DT_INT64
   }
-  attr {
-    name: "optimize_size"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  output_arg {
+    name: "indices"
+    type: DT_INT32
   }
-  attr {
-    name: "chroma_downsampling"
-    type: "bool"
-    default_value {
-      b: true
-    }
+  output_arg {
+    name: "ids"
+    type: DT_INT64
   }
-  attr {
-    name: "density_unit"
-    type: "string"
-    default_value {
-      s: "in"
-    }
-    allowed_values {
-      list {
-        s: "in"
-        s: "cm"
-      }
-    }
+  output_arg {
+    name: "weights"
+    type: DT_FLOAT
   }
   attr {
-    name: "x_density"
+    name: "num_true"
     type: "int"
-    default_value {
-      i: 300
-    }
   }
   attr {
-    name: "y_density"
+    name: "seed"
     type: "int"
     default_value {
-      i: 300
+      i: 0
     }
   }
   attr {
-    name: "xmp_metadata"
-    type: "string"
+    name: "seed2"
+    type: "int"
     default_value {
-      s: ""
+      i: 0
     }
   }
 }
 op {
-  name: "EncodePng"
+  name: "Concat"
   input_arg {
-    name: "image"
+    name: "concat_dim"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "values"
     type_attr: "T"
+    number_attr: "N"
   }
   output_arg {
-    name: "contents"
-    type: DT_STRING
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "compression"
+    name: "N"
     type: "int"
-    default_value {
-      i: -1
-    }
+    has_minimum: true
+    minimum: 2
   }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_UINT8
-    }
-    allowed_values {
-      list {
-        type: DT_UINT8
-        type: DT_UINT16
-      }
-    }
   }
 }
 op {
-  name: "EncodeWav"
+  name: "ConcatOffset"
   input_arg {
-    name: "audio"
-    type: DT_FLOAT
+    name: "concat_dim"
+    type: DT_INT32
   }
   input_arg {
-    name: "sample_rate"
+    name: "shape"
     type: DT_INT32
+    number_attr: "N"
   }
   output_arg {
-    name: "contents"
-    type: DT_STRING
+    name: "offset"
+    type: DT_INT32
+    number_attr: "N"
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 2
   }
 }
 op {
-  name: "Enter"
+  name: "ConcatV2"
   input_arg {
-    name: "data"
+    name: "values"
     type_attr: "T"
+    number_attr: "N"
+  }
+  input_arg {
+    name: "axis"
+    type_attr: "Tidx"
   }
   output_arg {
     name: "output"
     type_attr: "T"
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 2
   }
   attr {
-    name: "frame_name"
-    type: "string"
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "is_constant"
-    type: "bool"
+    name: "Tidx"
+    type: "type"
     default_value {
-      b: false
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
+}
+op {
+  name: "ConcatenateDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "another_dataset"
+    type: DT_VARIANT
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
   attr {
-    name: "parallel_iterations"
-    type: "int"
-    default_value {
-      i: 10
-    }
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
+  is_stateful: true
 }
 op {
-  name: "Equal"
+  name: "ConcatenateDataset"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "input_dataset"
+    type: DT_VARIANT
   }
   input_arg {
-    name: "y"
-    type_attr: "T"
+    name: "another_dataset"
+    type: DT_VARIANT
   }
   output_arg {
-    name: "z"
-    type: DT_BOOL
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "T"
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "ConditionalAccumulator"
+  output_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  attr {
+    name: "dtype"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
-        type: DT_INT8
+        type: DT_UINT16
         type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_QUINT8
+        type: DT_COMPLEX128
         type: DT_QINT8
+        type: DT_QUINT8
         type: DT_QINT32
-        type: DT_STRING
-        type: DT_BOOL
-        type: DT_COMPLEX128
+        type: DT_HALF
       }
     }
   }
-  is_commutative: true
-}
-op {
-  name: "Erf"
-  input_arg {
-    name: "x"
-    type_attr: "T"
+  attr {
+    name: "shape"
+    type: "shape"
   }
-  output_arg {
-    name: "y"
-    type_attr: "T"
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
     }
   }
+  is_stateful: true
 }
 op {
-  name: "Erfc"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
+  name: "ConditionalAccumulator"
   output_arg {
-    name: "y"
-    type_attr: "T"
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
   }
   attr {
-    name: "T"
+    name: "dtype"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-}
-op {
-  name: "Exit"
-  input_arg {
-    name: "data"
-    type_attr: "T"
+  attr {
+    name: "shape"
+    type: "shape"
   }
-  output_arg {
-    name: "output"
-    type_attr: "T"
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
+  is_stateful: true
 }
 op {
-  name: "Exp"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
+  name: "ConditionalAccumulator"
   output_arg {
-    name: "y"
-    type_attr: "T"
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
   }
   attr {
-    name: "T"
+    name: "dtype"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
-}
-op {
-  name: "ExpandDims"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "dim"
-    type_attr: "Tdim"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
+  attr {
+    name: "shape"
+    type: "shape"
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
   attr {
-    name: "Tdim"
-    type: "type"
+    name: "shared_name"
+    type: "string"
     default_value {
-      type: DT_INT32
+      s: ""
     }
+  }
+  is_stateful: true
+}
+op {
+  name: "ConditionalAccumulator"
+  output_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  attr {
+    name: "dtype"
+    type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
         type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
+  attr {
+    name: "shape"
+    type: "shape"
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "Expm1"
+  name: "Conj"
   input_arg {
-    name: "x"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
-    name: "y"
+    name: "output"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
+    default_value {
+      type: DT_COMPLEX64
+    }
     allowed_values {
       list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_COMPLEX64
         type: DT_COMPLEX128
       }
@@ -11119,148 +11242,175 @@ op {
   }
 }
 op {
-  name: "ExtractGlimpse"
+  name: "Conj"
   input_arg {
     name: "input"
-    type: DT_FLOAT
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_COMPLEX64
+    }
+    allowed_values {
+      list {
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_VARIANT
+      }
+    }
   }
+}
+op {
+  name: "ConjugateTranspose"
   input_arg {
-    name: "size"
-    type: DT_INT32
+    name: "x"
+    type_attr: "T"
   }
   input_arg {
-    name: "offsets"
-    type: DT_FLOAT
+    name: "perm"
+    type_attr: "Tperm"
   }
   output_arg {
-    name: "glimpse"
-    type: DT_FLOAT
+    name: "y"
+    type_attr: "T"
   }
   attr {
-    name: "centered"
-    type: "bool"
-    default_value {
-      b: true
-    }
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "normalized"
-    type: "bool"
+    name: "Tperm"
+    type: "type"
     default_value {
-      b: true
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
+}
+op {
+  name: "Const"
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
   attr {
-    name: "uniform_noise"
-    type: "bool"
-    default_value {
-      b: true
-    }
+    name: "value"
+    type: "tensor"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
   }
 }
 op {
-  name: "ExtractImagePatches"
+  name: "ControlTrigger"
+}
+op {
+  name: "Conv2D"
   input_arg {
-    name: "images"
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "filter"
     type_attr: "T"
   }
   output_arg {
-    name: "patches"
+    name: "output"
     type_attr: "T"
   }
   attr {
-    name: "ksizes"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+      }
+    }
   }
   attr {
     name: "strides"
     type: "list(int)"
-    has_minimum: true
-    minimum: 4
   }
   attr {
-    name: "rates"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "use_cudnn_on_gpu"
+    type: "bool"
+    default_value {
+      b: true
+    }
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
   attr {
-    name: "padding"
+    name: "data_format"
     type: "string"
+    default_value {
+      s: "NHWC"
+    }
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        s: "NHWC"
+        s: "NCHW"
       }
     }
   }
 }
 op {
-  name: "ExtractImagePatches"
+  name: "Conv2D"
   input_arg {
-    name: "images"
+    name: "input"
     type_attr: "T"
   }
-  output_arg {
-    name: "patches"
+  input_arg {
+    name: "filter"
     type_attr: "T"
   }
-  attr {
-    name: "ksizes"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "rates"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+  output_arg {
+    name: "output"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
+        type: DT_BFLOAT16
+        type: DT_FLOAT
       }
     }
   }
+  attr {
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "use_cudnn_on_gpu"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
   attr {
     name: "padding"
     type: "string"
@@ -11271,769 +11421,1024 @@ op {
       }
     }
   }
-}
-op {
-  name: "ExtractJpegShape"
-  input_arg {
-    name: "contents"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "image_shape"
-    type_attr: "output_type"
-  }
   attr {
-    name: "output_type"
-    type: "type"
+    name: "data_format"
+    type: "string"
     default_value {
-      type: DT_INT32
+      s: "NHWC"
     }
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        s: "NHWC"
+        s: "NCHW"
       }
     }
   }
-}
-op {
-  name: "FFT"
-  input_arg {
-    name: "input"
-    type: DT_COMPLEX64
-  }
-  output_arg {
-    name: "output"
-    type: DT_COMPLEX64
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
   }
 }
 op {
-  name: "FFT2D"
+  name: "Conv2DBackpropFilter"
   input_arg {
     name: "input"
-    type: DT_COMPLEX64
+    type_attr: "T"
   }
-  output_arg {
-    name: "output"
-    type: DT_COMPLEX64
+  input_arg {
+    name: "filter_sizes"
+    type: DT_INT32
   }
-}
-op {
-  name: "FFT3D"
   input_arg {
-    name: "input"
-    type: DT_COMPLEX64
+    name: "out_backprop"
+    type_attr: "T"
   }
   output_arg {
     name: "output"
-    type: DT_COMPLEX64
-  }
-}
-op {
-  name: "FIFOQueue"
-  output_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    type_attr: "T"
   }
   attr {
-    name: "shapes"
-    type: "list(shape)"
-    default_value {
+    name: "T"
+    type: "type"
+    allowed_values {
       list {
+        type: DT_HALF
+        type: DT_FLOAT
       }
     }
-    has_minimum: true
   }
   attr {
-    name: "capacity"
-    type: "int"
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "use_cudnn_on_gpu"
+    type: "bool"
     default_value {
-      i: -1
+      b: true
     }
   }
   attr {
-    name: "container"
+    name: "padding"
     type: "string"
-    default_value {
-      s: ""
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
     }
   }
   attr {
-    name: "shared_name"
+    name: "data_format"
     type: "string"
     default_value {
-      s: ""
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "FIFOQueueV2"
-  output_arg {
-    name: "handle"
-    type: DT_RESOURCE
+  name: "Conv2DBackpropFilter"
+  input_arg {
+    name: "input"
+    type_attr: "T"
   }
-  attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "filter_sizes"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "out_backprop"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "shapes"
-    type: "list(shape)"
-    default_value {
+    name: "T"
+    type: "type"
+    allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
       }
     }
-    has_minimum: true
   }
   attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: -1
-    }
+    name: "strides"
+    type: "list(int)"
   }
   attr {
-    name: "container"
-    type: "string"
+    name: "use_cudnn_on_gpu"
+    type: "bool"
     default_value {
-      s: ""
+      b: true
     }
   }
   attr {
-    name: "shared_name"
+    name: "padding"
     type: "string"
-    default_value {
-      s: ""
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
     }
   }
-  is_stateful: true
-}
-op {
-  name: "Fact"
-  output_arg {
-    name: "fact"
-    type: DT_STRING
-  }
-}
-op {
-  name: "FakeQuantWithMinMaxArgs"
-  input_arg {
-    name: "inputs"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "outputs"
-    type: DT_FLOAT
-  }
   attr {
-    name: "min"
-    type: "float"
+    name: "data_format"
+    type: "string"
     default_value {
-      f: -6
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
     }
   }
   attr {
-    name: "max"
-    type: "float"
+    name: "dilations"
+    type: "list(int)"
     default_value {
-      f: 6
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
     }
   }
 }
 op {
-  name: "FakeQuantWithMinMaxArgs"
+  name: "Conv2DBackpropInput"
   input_arg {
-    name: "inputs"
-    type: DT_FLOAT
+    name: "input_sizes"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "filter"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "out_backprop"
+    type_attr: "T"
   }
   output_arg {
-    name: "outputs"
-    type: DT_FLOAT
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "min"
-    type: "float"
-    default_value {
-      f: -6
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+      }
     }
   }
   attr {
-    name: "max"
-    type: "float"
-    default_value {
-      f: 6
-    }
+    name: "strides"
+    type: "list(int)"
   }
   attr {
-    name: "num_bits"
-    type: "int"
+    name: "use_cudnn_on_gpu"
+    type: "bool"
     default_value {
-      i: 8
+      b: true
     }
   }
-}
-op {
-  name: "FakeQuantWithMinMaxArgs"
-  input_arg {
-    name: "inputs"
-    type: DT_FLOAT
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+}
+op {
+  name: "Conv2DBackpropInput"
+  input_arg {
+    name: "input_sizes"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "filter"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "out_backprop"
+    type_attr: "T"
   }
   output_arg {
-    name: "outputs"
-    type: DT_FLOAT
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "min"
-    type: "float"
-    default_value {
-      f: -6
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+      }
     }
   }
   attr {
-    name: "max"
-    type: "float"
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "use_cudnn_on_gpu"
+    type: "bool"
     default_value {
-      f: 6
+      b: true
     }
   }
   attr {
-    name: "num_bits"
-    type: "int"
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
     default_value {
-      i: 8
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
     }
   }
   attr {
-    name: "narrow_range"
-    type: "bool"
+    name: "dilations"
+    type: "list(int)"
     default_value {
-      b: false
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
     }
   }
 }
 op {
-  name: "FakeQuantWithMinMaxArgsGradient"
+  name: "Conv3D"
   input_arg {
-    name: "gradients"
-    type: DT_FLOAT
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "inputs"
-    type: DT_FLOAT
+    name: "filter"
+    type_attr: "T"
   }
   output_arg {
-    name: "backprops"
-    type: DT_FLOAT
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "min"
-    type: "float"
-    default_value {
-      f: -6
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
     }
   }
   attr {
-    name: "max"
-    type: "float"
-    default_value {
-      f: 6
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
     }
   }
 }
 op {
-  name: "FakeQuantWithMinMaxArgsGradient"
+  name: "Conv3D"
   input_arg {
-    name: "gradients"
-    type: DT_FLOAT
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "inputs"
-    type: DT_FLOAT
+    name: "filter"
+    type_attr: "T"
   }
   output_arg {
-    name: "backprops"
-    type: DT_FLOAT
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "min"
-    type: "float"
-    default_value {
-      f: -6
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
     }
   }
   attr {
-    name: "max"
-    type: "float"
-    default_value {
-      f: 6
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
     }
   }
   attr {
-    name: "num_bits"
-    type: "int"
+    name: "data_format"
+    type: "string"
     default_value {
-      i: 8
+      s: "NDHWC"
+    }
+    allowed_values {
+      list {
+        s: "NDHWC"
+        s: "NCDHW"
+      }
     }
   }
 }
 op {
-  name: "FakeQuantWithMinMaxArgsGradient"
+  name: "Conv3D"
   input_arg {
-    name: "gradients"
-    type: DT_FLOAT
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "inputs"
-    type: DT_FLOAT
+    name: "filter"
+    type_attr: "T"
   }
   output_arg {
-    name: "backprops"
-    type: DT_FLOAT
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "min"
-    type: "float"
-    default_value {
-      f: -6
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
     }
   }
   attr {
-    name: "max"
-    type: "float"
-    default_value {
-      f: 6
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
     }
   }
   attr {
-    name: "num_bits"
-    type: "int"
+    name: "data_format"
+    type: "string"
     default_value {
-      i: 8
+      s: "NDHWC"
+    }
+    allowed_values {
+      list {
+        s: "NDHWC"
+        s: "NCDHW"
+      }
     }
   }
   attr {
-    name: "narrow_range"
-    type: "bool"
+    name: "dilations"
+    type: "list(int)"
     default_value {
-      b: false
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
     }
   }
 }
 op {
-  name: "FakeQuantWithMinMaxVars"
+  name: "Conv3DBackpropFilter"
   input_arg {
-    name: "inputs"
-    type: DT_FLOAT
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "min"
-    type: DT_FLOAT
+    name: "filter"
+    type_attr: "T"
   }
   input_arg {
-    name: "max"
-    type: DT_FLOAT
+    name: "out_backprop"
+    type_attr: "T"
   }
   output_arg {
-    name: "outputs"
-    type: DT_FLOAT
-  }
-}
-op {
-  name: "FakeQuantWithMinMaxVars"
-  input_arg {
-    name: "inputs"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "min"
-    type: DT_FLOAT
+    name: "output"
+    type_attr: "T"
   }
-  input_arg {
-    name: "max"
-    type: DT_FLOAT
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
-  output_arg {
-    name: "outputs"
-    type: DT_FLOAT
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
   }
   attr {
-    name: "num_bits"
-    type: "int"
-    default_value {
-      i: 8
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
     }
   }
+  deprecation {
+    version: 10
+  }
 }
 op {
-  name: "FakeQuantWithMinMaxVars"
+  name: "Conv3DBackpropFilter"
   input_arg {
-    name: "inputs"
-    type: DT_FLOAT
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "min"
-    type: DT_FLOAT
+    name: "filter"
+    type_attr: "T"
   }
   input_arg {
-    name: "max"
-    type: DT_FLOAT
+    name: "out_backprop"
+    type_attr: "T"
   }
   output_arg {
-    name: "outputs"
-    type: DT_FLOAT
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "num_bits"
-    type: "int"
-    default_value {
-      i: 8
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
     }
   }
   attr {
-    name: "narrow_range"
-    type: "bool"
-    default_value {
-      b: false
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
     }
   }
+  deprecation {
+    version: 10
+  }
 }
 op {
-  name: "FakeQuantWithMinMaxVarsGradient"
-  input_arg {
-    name: "gradients"
-    type: DT_FLOAT
-  }
+  name: "Conv3DBackpropFilterV2"
   input_arg {
-    name: "inputs"
-    type: DT_FLOAT
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "min"
-    type: DT_FLOAT
+    name: "filter_sizes"
+    type: DT_INT32
   }
   input_arg {
-    name: "max"
-    type: DT_FLOAT
+    name: "out_backprop"
+    type_attr: "T"
   }
   output_arg {
-    name: "backprops_wrt_input"
-    type: DT_FLOAT
+    name: "output"
+    type_attr: "T"
   }
-  output_arg {
-    name: "backprop_wrt_min"
-    type: DT_FLOAT
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
-  output_arg {
-    name: "backprop_wrt_max"
-    type: DT_FLOAT
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
   }
 }
 op {
-  name: "FakeQuantWithMinMaxVarsGradient"
-  input_arg {
-    name: "gradients"
-    type: DT_FLOAT
-  }
+  name: "Conv3DBackpropFilterV2"
   input_arg {
-    name: "inputs"
-    type: DT_FLOAT
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "min"
-    type: DT_FLOAT
+    name: "filter_sizes"
+    type: DT_INT32
   }
   input_arg {
-    name: "max"
-    type: DT_FLOAT
+    name: "out_backprop"
+    type_attr: "T"
   }
   output_arg {
-    name: "backprops_wrt_input"
-    type: DT_FLOAT
+    name: "output"
+    type_attr: "T"
   }
-  output_arg {
-    name: "backprop_wrt_min"
-    type: DT_FLOAT
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
-  output_arg {
-    name: "backprop_wrt_max"
-    type: DT_FLOAT
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
   }
   attr {
-    name: "num_bits"
-    type: "int"
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
     default_value {
-      i: 8
+      s: "NDHWC"
+    }
+    allowed_values {
+      list {
+        s: "NDHWC"
+        s: "NCDHW"
+      }
     }
   }
 }
 op {
-  name: "FakeQuantWithMinMaxVarsGradient"
-  input_arg {
-    name: "gradients"
-    type: DT_FLOAT
-  }
+  name: "Conv3DBackpropFilterV2"
   input_arg {
-    name: "inputs"
-    type: DT_FLOAT
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "min"
-    type: DT_FLOAT
+    name: "filter_sizes"
+    type: DT_INT32
   }
   input_arg {
-    name: "max"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "backprops_wrt_input"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "backprop_wrt_min"
-    type: DT_FLOAT
+    name: "out_backprop"
+    type_attr: "T"
   }
   output_arg {
-    name: "backprop_wrt_max"
-    type: DT_FLOAT
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "num_bits"
-    type: "int"
-    default_value {
-      i: 8
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
     }
   }
   attr {
-    name: "narrow_range"
-    type: "bool"
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
     default_value {
-      b: false
+      s: "NDHWC"
+    }
+    allowed_values {
+      list {
+        s: "NDHWC"
+        s: "NCDHW"
+      }
+    }
+  }
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
     }
   }
 }
 op {
-  name: "FakeQuantWithMinMaxVarsPerChannel"
+  name: "Conv3DBackpropInput"
   input_arg {
-    name: "inputs"
-    type: DT_FLOAT
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "min"
-    type: DT_FLOAT
+    name: "filter"
+    type_attr: "T"
   }
   input_arg {
-    name: "max"
-    type: DT_FLOAT
+    name: "out_backprop"
+    type_attr: "T"
   }
   output_arg {
-    name: "outputs"
-    type: DT_FLOAT
-  }
-}
-op {
-  name: "FakeQuantWithMinMaxVarsPerChannel"
-  input_arg {
-    name: "inputs"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "min"
-    type: DT_FLOAT
+    name: "output"
+    type_attr: "T"
   }
-  input_arg {
-    name: "max"
-    type: DT_FLOAT
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
-  output_arg {
-    name: "outputs"
-    type: DT_FLOAT
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
   }
   attr {
-    name: "num_bits"
-    type: "int"
-    default_value {
-      i: 8
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
     }
   }
+  deprecation {
+    version: 10
+  }
 }
 op {
-  name: "FakeQuantWithMinMaxVarsPerChannel"
+  name: "Conv3DBackpropInput"
   input_arg {
-    name: "inputs"
-    type: DT_FLOAT
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "min"
-    type: DT_FLOAT
+    name: "filter"
+    type_attr: "T"
   }
   input_arg {
-    name: "max"
-    type: DT_FLOAT
+    name: "out_backprop"
+    type_attr: "T"
   }
   output_arg {
-    name: "outputs"
-    type: DT_FLOAT
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "num_bits"
-    type: "int"
-    default_value {
-      i: 8
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
     }
   }
   attr {
-    name: "narrow_range"
-    type: "bool"
-    default_value {
-      b: false
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
     }
   }
+  deprecation {
+    version: 10
+  }
 }
 op {
-  name: "FakeQuantWithMinMaxVarsPerChannelGradient"
-  input_arg {
-    name: "gradients"
-    type: DT_FLOAT
-  }
+  name: "Conv3DBackpropInputV2"
   input_arg {
-    name: "inputs"
-    type: DT_FLOAT
+    name: "input_sizes"
+    type: DT_INT32
   }
   input_arg {
-    name: "min"
-    type: DT_FLOAT
+    name: "filter"
+    type_attr: "T"
   }
   input_arg {
-    name: "max"
-    type: DT_FLOAT
+    name: "out_backprop"
+    type_attr: "T"
   }
   output_arg {
-    name: "backprops_wrt_input"
-    type: DT_FLOAT
+    name: "output"
+    type_attr: "T"
   }
-  output_arg {
-    name: "backprop_wrt_min"
-    type: DT_FLOAT
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
-  output_arg {
-    name: "backprop_wrt_max"
-    type: DT_FLOAT
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
   }
 }
 op {
-  name: "FakeQuantWithMinMaxVarsPerChannelGradient"
-  input_arg {
-    name: "gradients"
-    type: DT_FLOAT
-  }
+  name: "Conv3DBackpropInputV2"
   input_arg {
-    name: "inputs"
-    type: DT_FLOAT
+    name: "input_sizes"
+    type: DT_INT32
   }
   input_arg {
-    name: "min"
-    type: DT_FLOAT
+    name: "filter"
+    type_attr: "T"
   }
   input_arg {
-    name: "max"
-    type: DT_FLOAT
+    name: "out_backprop"
+    type_attr: "T"
   }
   output_arg {
-    name: "backprops_wrt_input"
-    type: DT_FLOAT
+    name: "output"
+    type_attr: "T"
   }
-  output_arg {
-    name: "backprop_wrt_min"
-    type: DT_FLOAT
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
-  output_arg {
-    name: "backprop_wrt_max"
-    type: DT_FLOAT
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
   }
   attr {
-    name: "num_bits"
-    type: "int"
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
     default_value {
-      i: 8
+      s: "NDHWC"
+    }
+    allowed_values {
+      list {
+        s: "NDHWC"
+        s: "NCDHW"
+      }
     }
   }
 }
 op {
-  name: "FakeQuantWithMinMaxVarsPerChannelGradient"
-  input_arg {
-    name: "gradients"
-    type: DT_FLOAT
-  }
+  name: "Conv3DBackpropInputV2"
   input_arg {
-    name: "inputs"
-    type: DT_FLOAT
+    name: "input_sizes"
+    type: DT_INT32
   }
   input_arg {
-    name: "min"
-    type: DT_FLOAT
+    name: "filter"
+    type_attr: "T"
   }
   input_arg {
-    name: "max"
-    type: DT_FLOAT
+    name: "out_backprop"
+    type_attr: "T"
   }
   output_arg {
-    name: "backprops_wrt_input"
-    type: DT_FLOAT
+    name: "output"
+    type_attr: "T"
   }
-  output_arg {
-    name: "backprop_wrt_min"
-    type: DT_FLOAT
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
-  output_arg {
-    name: "backprop_wrt_max"
-    type: DT_FLOAT
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
   }
   attr {
-    name: "num_bits"
-    type: "int"
-    default_value {
-      i: 8
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
     }
   }
   attr {
-    name: "narrow_range"
-    type: "bool"
+    name: "data_format"
+    type: "string"
     default_value {
-      b: false
+      s: "NDHWC"
+    }
+    allowed_values {
+      list {
+        s: "NDHWC"
+        s: "NCDHW"
+      }
     }
   }
-}
-op {
-  name: "FakeQueue"
-  input_arg {
-    name: "resource"
-    type: DT_RESOURCE
-  }
-  output_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
   }
-  is_stateful: true
 }
 op {
-  name: "Fill"
-  input_arg {
-    name: "dims"
-    type: DT_INT32
-  }
+  name: "Copy"
   input_arg {
-    name: "value"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
@@ -12044,221 +12449,230 @@ op {
     name: "T"
     type: "type"
   }
+  attr {
+    name: "tensor_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  allows_uninitialized_input: true
 }
 op {
-  name: "FilterDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
+  name: "Copy"
   input_arg {
-    name: "other_arguments"
-    type_list_attr: "Targuments"
+    name: "input"
+    type_attr: "T"
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "predicate"
-    type: "func"
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "Targuments"
-    type: "list(type)"
-    has_minimum: true
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "tensor_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "debug_ops_spec"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
   }
-  is_stateful: true
+  allows_uninitialized_input: true
 }
 op {
-  name: "FilterDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
+  name: "CopyHost"
   input_arg {
-    name: "other_arguments"
-    type_list_attr: "Targuments"
+    name: "input"
+    type_attr: "T"
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "predicate"
-    type: "func"
-  }
-  attr {
-    name: "Targuments"
-    type: "list(type)"
-    has_minimum: true
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "tensor_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
+  allows_uninitialized_input: true
 }
 op {
-  name: "FixedLengthRecordDataset"
-  input_arg {
-    name: "filenames"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "header_bytes"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "record_bytes"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "footer_bytes"
-    type: DT_INT64
-  }
+  name: "CopyHost"
   input_arg {
-    name: "buffer_size"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    name: "input"
+    type_attr: "T"
   }
-  is_stateful: true
-}
-op {
-  name: "FixedLengthRecordReader"
   output_arg {
-    name: "reader_handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  attr {
-    name: "header_bytes"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "record_bytes"
-    type: "int"
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "footer_bytes"
-    type: "int"
-    default_value {
-      i: 0
-    }
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "container"
+    name: "tensor_name"
     type: "string"
     default_value {
       s: ""
     }
   }
   attr {
-    name: "shared_name"
-    type: "string"
+    name: "debug_ops_spec"
+    type: "list(string)"
     default_value {
-      s: ""
+      list {
+      }
     }
   }
-  is_stateful: true
+  allows_uninitialized_input: true
 }
 op {
-  name: "FixedLengthRecordReader"
+  name: "Cos"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
   output_arg {
-    name: "reader_handle"
-    type: DT_STRING
-    is_ref: true
+    name: "y"
+    type_attr: "T"
   }
   attr {
-    name: "header_bytes"
-    type: "int"
-    default_value {
-      i: 0
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
     }
   }
-  attr {
-    name: "record_bytes"
-    type: "int"
+}
+op {
+  name: "Cos"
+  input_arg {
+    name: "x"
+    type_attr: "T"
   }
-  attr {
-    name: "footer_bytes"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  output_arg {
+    name: "y"
+    type_attr: "T"
   }
   attr {
-    name: "hop_bytes"
-    type: "int"
-    default_value {
-      i: 0
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
     }
   }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+}
+op {
+  name: "Cosh"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
   }
   attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "FixedLengthRecordReaderV2"
+  name: "Cosh"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
   output_arg {
-    name: "reader_handle"
-    type: DT_RESOURCE
+    name: "y"
+    type_attr: "T"
   }
   attr {
-    name: "header_bytes"
-    type: "int"
-    default_value {
-      i: 0
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
     }
   }
+}
+op {
+  name: "CountUpTo"
+  input_arg {
+    name: "ref"
+    type_attr: "T"
+    is_ref: true
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
   attr {
-    name: "record_bytes"
+    name: "limit"
     type: "int"
   }
   attr {
-    name: "footer_bytes"
-    type: "int"
-    default_value {
-      i: 0
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
+}
+op {
+  name: "CriticalSectionOp"
+  output_arg {
+    name: "resource"
+    type: DT_RESOURCE
+  }
   attr {
     name: "container"
     type: "string"
@@ -12276,390 +12690,315 @@ op {
   is_stateful: true
 }
 op {
-  name: "FixedLengthRecordReaderV2"
-  output_arg {
-    name: "reader_handle"
-    type: DT_RESOURCE
+  name: "CropAndResize"
+  input_arg {
+    name: "image"
+    type_attr: "T"
   }
-  attr {
-    name: "header_bytes"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "boxes"
+    type: DT_FLOAT
   }
-  attr {
-    name: "record_bytes"
-    type: "int"
+  input_arg {
+    name: "box_ind"
+    type: DT_INT32
   }
-  attr {
-    name: "footer_bytes"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "crop_size"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "crops"
+    type: DT_FLOAT
   }
   attr {
-    name: "hop_bytes"
-    type: "int"
-    default_value {
-      i: 0
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
     }
   }
   attr {
-    name: "container"
+    name: "method"
     type: "string"
     default_value {
-      s: ""
+      s: "bilinear"
+    }
+    allowed_values {
+      list {
+        s: "bilinear"
+      }
     }
   }
   attr {
-    name: "shared_name"
-    type: "string"
+    name: "extrapolation_value"
+    type: "float"
     default_value {
-      s: ""
+      f: 0
     }
   }
-  is_stateful: true
 }
 op {
-  name: "FixedLengthRecordReaderV2"
-  output_arg {
-    name: "reader_handle"
-    type: DT_RESOURCE
+  name: "CropAndResize"
+  input_arg {
+    name: "image"
+    type_attr: "T"
   }
-  attr {
-    name: "header_bytes"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "boxes"
+    type: DT_FLOAT
   }
-  attr {
-    name: "record_bytes"
-    type: "int"
+  input_arg {
+    name: "box_ind"
+    type: DT_INT32
   }
-  attr {
-    name: "footer_bytes"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "crop_size"
+    type: DT_INT32
   }
-  attr {
-    name: "hop_bytes"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  output_arg {
+    name: "crops"
+    type: DT_FLOAT
   }
   attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
     }
   }
   attr {
-    name: "shared_name"
+    name: "method"
     type: "string"
     default_value {
-      s: ""
+      s: "bilinear"
+    }
+    allowed_values {
+      list {
+        s: "bilinear"
+      }
     }
   }
   attr {
-    name: "encoding"
-    type: "string"
+    name: "extrapolation_value"
+    type: "float"
     default_value {
-      s: ""
+      f: 0
     }
   }
-  is_stateful: true
 }
 op {
-  name: "FixedUnigramCandidateSampler"
+  name: "CropAndResizeGradBoxes"
   input_arg {
-    name: "true_classes"
-    type: DT_INT64
+    name: "grads"
+    type: DT_FLOAT
   }
-  output_arg {
-    name: "sampled_candidates"
-    type: DT_INT64
+  input_arg {
+    name: "image"
+    type_attr: "T"
   }
-  output_arg {
-    name: "true_expected_count"
+  input_arg {
+    name: "boxes"
     type: DT_FLOAT
   }
+  input_arg {
+    name: "box_ind"
+    type: DT_INT32
+  }
   output_arg {
-    name: "sampled_expected_count"
+    name: "output"
     type: DT_FLOAT
   }
   attr {
-    name: "num_true"
-    type: "int"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "num_sampled"
-    type: "int"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "unique"
-    type: "bool"
-  }
-  attr {
-    name: "range_max"
-    type: "int"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
   attr {
-    name: "vocab_file"
+    name: "method"
     type: "string"
     default_value {
-      s: ""
+      s: "bilinear"
     }
-  }
-  attr {
-    name: "distortion"
-    type: "float"
-    default_value {
-      f: 1
+    allowed_values {
+      list {
+        s: "bilinear"
+      }
     }
   }
-  attr {
-    name: "num_reserved_ids"
-    type: "int"
-    default_value {
-      i: 0
-    }
+}
+op {
+  name: "CropAndResizeGradBoxes"
+  input_arg {
+    name: "grads"
+    type: DT_FLOAT
   }
-  attr {
-    name: "num_shards"
-    type: "int"
-    default_value {
-      i: 1
-    }
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "image"
+    type_attr: "T"
   }
-  attr {
-    name: "shard"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
+  input_arg {
+    name: "boxes"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "box_ind"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type: DT_FLOAT
   }
   attr {
-    name: "unigrams"
-    type: "list(float)"
-    default_value {
+    name: "T"
+    type: "type"
+    allowed_values {
       list {
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "seed"
-    type: "int"
+    name: "method"
+    type: "string"
     default_value {
-      i: 0
+      s: "bilinear"
     }
-  }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
+    allowed_values {
+      list {
+        s: "bilinear"
+      }
     }
   }
 }
 op {
-  name: "FixedUnigramCandidateSampler"
+  name: "CropAndResizeGradImage"
   input_arg {
-    name: "true_classes"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "sampled_candidates"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "true_expected_count"
+    name: "grads"
     type: DT_FLOAT
   }
-  output_arg {
-    name: "sampled_expected_count"
+  input_arg {
+    name: "boxes"
     type: DT_FLOAT
   }
-  attr {
-    name: "num_true"
-    type: "int"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "num_sampled"
-    type: "int"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "box_ind"
+    type: DT_INT32
   }
-  attr {
-    name: "unique"
-    type: "bool"
+  input_arg {
+    name: "image_size"
+    type: DT_INT32
   }
-  attr {
-    name: "range_max"
-    type: "int"
-    has_minimum: true
-    minimum: 1
+  output_arg {
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "vocab_file"
-    type: "string"
-    default_value {
-      s: ""
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_HALF
+        type: DT_DOUBLE
+      }
     }
   }
   attr {
-    name: "distortion"
-    type: "float"
+    name: "method"
+    type: "string"
     default_value {
-      f: 1
+      s: "bilinear"
     }
-  }
-  attr {
-    name: "num_reserved_ids"
-    type: "int"
-    default_value {
-      i: 0
+    allowed_values {
+      list {
+        s: "bilinear"
+      }
     }
   }
-  attr {
-    name: "num_shards"
-    type: "int"
-    default_value {
-      i: 1
-    }
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "shard"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
-  }
-  attr {
-    name: "unigrams"
-    type: "list(float)"
-    default_value {
-      list {
-      }
-    }
-  }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "FlatMapDataset"
+  name: "Cross"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "a"
+    type_attr: "T"
   }
   input_arg {
-    name: "other_arguments"
-    type_list_attr: "Targuments"
+    name: "b"
+    type_attr: "T"
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "f"
-    type: "func"
-  }
-  attr {
-    name: "Targuments"
-    type: "list(type)"
-    has_minimum: true
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "product"
+    type_attr: "T"
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
   }
-  is_stateful: true
 }
 op {
-  name: "FlatMapDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
+  name: "Cross"
   input_arg {
-    name: "other_arguments"
-    type_list_attr: "Targuments"
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "f"
-    type: "func"
-  }
-  attr {
-    name: "Targuments"
-    type: "list(type)"
-    has_minimum: true
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "a"
+    type_attr: "T"
   }
-}
-op {
-  name: "Floor"
   input_arg {
-    name: "x"
+    name: "b"
     type_attr: "T"
   }
   output_arg {
-    name: "y"
+    name: "product"
     type_attr: "T"
   }
   attr {
@@ -12667,25 +13006,33 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
 }
 op {
-  name: "FloorDiv"
+  name: "Cross"
   input_arg {
-    name: "x"
+    name: "a"
     type_attr: "T"
   }
   input_arg {
-    name: "y"
+    name: "b"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
+    name: "product"
     type_attr: "T"
   }
   attr {
@@ -12693,33 +13040,34 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
+        type: DT_INT16
         type: DT_INT8
         type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
 }
 op {
-  name: "FloorMod"
+  name: "Cross"
   input_arg {
-    name: "x"
+    name: "a"
     type_attr: "T"
   }
   input_arg {
-    name: "y"
+    name: "b"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
+    name: "product"
     type_attr: "T"
   }
   attr {
@@ -12727,80 +13075,80 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
 }
 op {
-  name: "FractionalAvgPool"
+  name: "Cumprod"
   input_arg {
-    name: "value"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
+    name: "x"
     type_attr: "T"
   }
-  output_arg {
-    name: "row_pooling_sequence"
-    type: DT_INT64
+  input_arg {
+    name: "axis"
+    type_attr: "Tidx"
   }
   output_arg {
-    name: "col_pooling_sequence"
-    type: DT_INT64
-  }
-  attr {
-    name: "pooling_ratio"
-    type: "list(float)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "pseudo_random"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "out"
+    type_attr: "T"
   }
   attr {
-    name: "overlapping"
+    name: "exclusive"
     type: "bool"
     default_value {
       b: false
     }
   }
   attr {
-    name: "deterministic"
+    name: "reverse"
     type: "bool"
     default_value {
       b: false
     }
   }
   attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
     }
   }
   attr {
-    name: "seed2"
-    type: "int"
+    name: "Tidx"
+    type: "type"
     default_value {
-      i: 0
+      type: DT_INT32
     }
-  }
-  attr {
-    name: "T"
-    type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
       }
@@ -12808,29 +13156,28 @@ op {
   }
 }
 op {
-  name: "FractionalAvgPoolGrad"
-  input_arg {
-    name: "orig_input_tensor_shape"
-    type: DT_INT64
-  }
+  name: "Cumprod"
   input_arg {
-    name: "out_backprop"
+    name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "row_pooling_sequence"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "col_pooling_sequence"
-    type: DT_INT64
+    name: "axis"
+    type_attr: "Tidx"
   }
   output_arg {
-    name: "output"
+    name: "out"
     type_attr: "T"
   }
   attr {
-    name: "overlapping"
+    name: "exclusive"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "reverse"
     type: "bool"
     default_value {
       b: false
@@ -12843,6 +13190,31 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
         type: DT_INT32
         type: DT_INT64
       }
@@ -12850,71 +13222,66 @@ op {
   }
 }
 op {
-  name: "FractionalMaxPool"
+  name: "Cumprod"
   input_arg {
-    name: "value"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
+    name: "x"
     type_attr: "T"
   }
-  output_arg {
-    name: "row_pooling_sequence"
-    type: DT_INT64
+  input_arg {
+    name: "axis"
+    type_attr: "Tidx"
   }
   output_arg {
-    name: "col_pooling_sequence"
-    type: DT_INT64
-  }
-  attr {
-    name: "pooling_ratio"
-    type: "list(float)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "pseudo_random"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "out"
+    type_attr: "T"
   }
   attr {
-    name: "overlapping"
+    name: "exclusive"
     type: "bool"
     default_value {
       b: false
     }
   }
   attr {
-    name: "deterministic"
+    name: "reverse"
     type: "bool"
     default_value {
       b: false
     }
   }
   attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
     }
   }
   attr {
-    name: "seed2"
-    type: "int"
+    name: "Tidx"
+    type: "type"
     default_value {
-      i: 0
+      type: DT_INT32
     }
-  }
-  attr {
-    name: "T"
-    type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
       }
@@ -12922,33 +13289,28 @@ op {
   }
 }
 op {
-  name: "FractionalMaxPoolGrad"
-  input_arg {
-    name: "orig_input"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "orig_output"
-    type_attr: "T"
-  }
+  name: "Cumprod"
   input_arg {
-    name: "out_backprop"
+    name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "row_pooling_sequence"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "col_pooling_sequence"
-    type: DT_INT64
+    name: "axis"
+    type_attr: "Tidx"
   }
   output_arg {
-    name: "output"
+    name: "out"
     type_attr: "T"
   }
   attr {
-    name: "overlapping"
+    name: "exclusive"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "reverse"
     type: "bool"
     default_value {
       b: false
@@ -12961,6 +13323,32 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
         type: DT_INT32
         type: DT_INT64
       }
@@ -12968,46 +13356,32 @@ op {
   }
 }
 op {
-  name: "FusedBatchNorm"
+  name: "Cumsum"
   input_arg {
     name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "scale"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "offset"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "mean"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "variance"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "y"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "batch_mean"
-    type_attr: "T"
+    name: "axis"
+    type_attr: "Tidx"
   }
   output_arg {
-    name: "batch_variance"
+    name: "out"
     type_attr: "T"
   }
-  output_arg {
-    name: "reserve_space_1"
-    type_attr: "T"
+  attr {
+    name: "exclusive"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
-  output_arg {
-    name: "reserve_space_2"
-    type_attr: "T"
+  attr {
+    name: "reverse"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
   attr {
     name: "T"
@@ -13015,72 +13389,63 @@ op {
     allowed_values {
       list {
         type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "epsilon"
-    type: "float"
+    name: "Tidx"
+    type: "type"
     default_value {
-      f: 0.0001
+      type: DT_INT32
     }
-  }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-  }
-  attr {
-    name: "is_training"
-    type: "bool"
-    default_value {
-      b: true
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
 }
 op {
-  name: "FusedBatchNormGrad"
-  input_arg {
-    name: "y_backprop"
-    type_attr: "T"
-  }
+  name: "Cumsum"
   input_arg {
     name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "scale"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "reserve_space_1"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "reserve_space_2"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "x_backprop"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "scale_backprop"
-    type_attr: "T"
+    name: "axis"
+    type_attr: "Tidx"
   }
   output_arg {
-    name: "offset_backprop"
+    name: "out"
     type_attr: "T"
   }
-  output_arg {
-    name: "reserve_space_3"
-    type_attr: "T"
+  attr {
+    name: "exclusive"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
-  output_arg {
-    name: "reserve_space_4"
-    type_attr: "T"
+  attr {
+    name: "reverse"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
   attr {
     name: "T"
@@ -13088,209 +13453,309 @@ op {
     allowed_values {
       list {
         type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "epsilon"
-    type: "float"
+    name: "Tidx"
+    type: "type"
     default_value {
-      f: 0.0001
+      type: DT_INT32
     }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Cumsum"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "axis"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "out"
+    type_attr: "T"
   }
   attr {
-    name: "data_format"
-    type: "string"
+    name: "exclusive"
+    type: "bool"
     default_value {
-      s: "NHWC"
+      b: false
     }
   }
   attr {
-    name: "is_training"
+    name: "reverse"
     type: "bool"
     default_value {
-      b: true
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
 }
 op {
-  name: "FusedBatchNormGradV2"
-  input_arg {
-    name: "y_backprop"
-    type_attr: "T"
-  }
+  name: "Cumsum"
   input_arg {
     name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "scale"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "reserve_space_1"
-    type_attr: "U"
-  }
-  input_arg {
-    name: "reserve_space_2"
-    type_attr: "U"
+    name: "axis"
+    type_attr: "Tidx"
   }
   output_arg {
-    name: "x_backprop"
+    name: "out"
     type_attr: "T"
   }
-  output_arg {
-    name: "scale_backprop"
-    type_attr: "U"
-  }
-  output_arg {
-    name: "offset_backprop"
-    type_attr: "U"
-  }
-  output_arg {
-    name: "reserve_space_3"
-    type_attr: "U"
+  attr {
+    name: "exclusive"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
-  output_arg {
-    name: "reserve_space_4"
-    type_attr: "U"
+  attr {
+    name: "reverse"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "U"
+    name: "Tidx"
     type: "type"
+    default_value {
+      type: DT_INT32
+    }
     allowed_values {
       list {
-        type: DT_FLOAT
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
+}
+op {
+  name: "DataFormatDimMap"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
   attr {
-    name: "epsilon"
-    type: "float"
+    name: "T"
+    type: "type"
     default_value {
-      f: 0.0001
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   attr {
-    name: "data_format"
+    name: "src_format"
     type: "string"
     default_value {
       s: "NHWC"
     }
   }
   attr {
-    name: "is_training"
-    type: "bool"
+    name: "dst_format"
+    type: "string"
     default_value {
-      b: true
+      s: "NCHW"
     }
   }
 }
 op {
-  name: "FusedBatchNormV2"
+  name: "DataFormatVecPermute"
   input_arg {
     name: "x"
     type_attr: "T"
   }
-  input_arg {
-    name: "scale"
-    type_attr: "U"
-  }
-  input_arg {
-    name: "offset"
-    type_attr: "U"
-  }
-  input_arg {
-    name: "mean"
-    type_attr: "U"
-  }
-  input_arg {
-    name: "variance"
-    type_attr: "U"
-  }
   output_arg {
     name: "y"
     type_attr: "T"
   }
-  output_arg {
-    name: "batch_mean"
-    type_attr: "U"
-  }
-  output_arg {
-    name: "batch_variance"
-    type_attr: "U"
-  }
-  output_arg {
-    name: "reserve_space_1"
-    type_attr: "U"
-  }
-  output_arg {
-    name: "reserve_space_2"
-    type_attr: "U"
-  }
   attr {
     name: "T"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-      }
+    default_value {
+      type: DT_INT32
     }
-  }
-  attr {
-    name: "U"
-    type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
   attr {
-    name: "epsilon"
-    type: "float"
+    name: "src_format"
+    type: "string"
     default_value {
-      f: 0.0001
+      s: "NHWC"
     }
   }
   attr {
-    name: "data_format"
+    name: "dst_format"
     type: "string"
     default_value {
-      s: "NHWC"
+      s: "NCHW"
     }
   }
+}
+op {
+  name: "DatasetToSingleElement"
+  input_arg {
+    name: "dataset"
+    type: DT_VARIANT
+  }
+  output_arg {
+    name: "components"
+    type_list_attr: "output_types"
+  }
   attr {
-    name: "is_training"
-    type: "bool"
-    default_value {
-      b: true
-    }
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
 }
 op {
-  name: "FusedPadConv2D"
+  name: "DebugGradientIdentity"
   input_arg {
     name: "input"
     type_attr: "T"
   }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  allows_uninitialized_input: true
+}
+op {
+  name: "DebugGradientRefIdentity"
   input_arg {
-    name: "paddings"
-    type: DT_INT32
+    name: "input"
+    type_attr: "T"
+    is_ref: true
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+    is_ref: true
+  }
+  attr {
+    name: "T"
+    type: "type"
   }
+  allows_uninitialized_input: true
+}
+op {
+  name: "DebugIdentity"
   input_arg {
-    name: "filter"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
@@ -13300,55 +13765,30 @@ op {
   attr {
     name: "T"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-      }
-    }
   }
   attr {
-    name: "mode"
+    name: "tensor_name"
     type: "string"
-    allowed_values {
-      list {
-        s: "REFLECT"
-        s: "SYMMETRIC"
-      }
+    default_value {
+      s: ""
     }
   }
   attr {
-    name: "strides"
-    type: "list(int)"
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
+    name: "debug_urls"
+    type: "list(string)"
+    default_value {
       list {
-        s: "SAME"
-        s: "VALID"
       }
     }
   }
+  allows_uninitialized_input: true
 }
 op {
-  name: "FusedResizeAndPadConv2D"
+  name: "DebugIdentity"
   input_arg {
     name: "input"
     type_attr: "T"
   }
-  input_arg {
-    name: "size"
-    type: DT_INT32
-  }
-  input_arg {
-    name: "paddings"
-    type: DT_INT32
-  }
-  input_arg {
-    name: "filter"
-    type_attr: "T"
-  }
   output_arg {
     name: "output"
     type_attr: "T"
@@ -13356,962 +13796,1138 @@ op {
   attr {
     name: "T"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-      }
-    }
   }
   attr {
-    name: "resize_align_corners"
-    type: "bool"
+    name: "tensor_name"
+    type: "string"
     default_value {
-      b: false
+      s: ""
     }
   }
   attr {
-    name: "mode"
-    type: "string"
-    allowed_values {
+    name: "debug_urls"
+    type: "list(string)"
+    default_value {
       list {
-        s: "REFLECT"
-        s: "SYMMETRIC"
       }
     }
   }
   attr {
-    name: "strides"
-    type: "list(int)"
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
+    name: "gated_grpc"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
+  allows_uninitialized_input: true
 }
 op {
-  name: "Gather"
-  input_arg {
-    name: "params"
-    type_attr: "Tparams"
-  }
+  name: "DebugIdentity"
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "input"
+    type_attr: "T"
   }
   output_arg {
     name: "output"
-    type_attr: "Tparams"
+    type_attr: "T"
   }
   attr {
-    name: "validate_indices"
-    type: "bool"
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "device_name"
+    type: "string"
     default_value {
-      b: true
+      s: ""
     }
   }
   attr {
-    name: "Tparams"
-    type: "type"
+    name: "tensor_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
   attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
+    name: "debug_urls"
+    type: "list(string)"
+    default_value {
       list {
-        type: DT_INT32
-        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "gated_grpc"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  allows_uninitialized_input: true
 }
 op {
-  name: "GatherNd"
-  input_arg {
-    name: "params"
-    type_attr: "Tparams"
-  }
+  name: "DebugNanCount"
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "input"
+    type_attr: "T"
   }
   output_arg {
     name: "output"
-    type_attr: "Tparams"
+    type: DT_INT64
   }
   attr {
-    name: "Tparams"
+    name: "T"
     type: "type"
   }
   attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
+    name: "tensor_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "debug_urls"
+    type: "list(string)"
+    default_value {
       list {
-        type: DT_INT32
-        type: DT_INT64
       }
     }
   }
+  allows_uninitialized_input: true
 }
 op {
-  name: "GatherV2"
+  name: "DebugNanCount"
   input_arg {
-    name: "params"
-    type_attr: "Tparams"
-  }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
-  input_arg {
-    name: "axis"
-    type_attr: "Taxis"
+    name: "input"
+    type_attr: "T"
   }
   output_arg {
     name: "output"
-    type_attr: "Tparams"
+    type: DT_INT64
   }
   attr {
-    name: "Tparams"
+    name: "T"
     type: "type"
   }
   attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
+    name: "tensor_name"
+    type: "string"
+    default_value {
+      s: ""
     }
   }
   attr {
-    name: "Taxis"
-    type: "type"
-    allowed_values {
+    name: "debug_urls"
+    type: "list(string)"
+    default_value {
       list {
-        type: DT_INT32
-        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "gated_grpc"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  allows_uninitialized_input: true
 }
 op {
-  name: "GenerateVocabRemapping"
-  input_arg {
-    name: "new_vocab_file"
-    type: DT_STRING
-  }
+  name: "DebugNanCount"
   input_arg {
-    name: "old_vocab_file"
-    type: DT_STRING
+    name: "input"
+    type_attr: "T"
   }
   output_arg {
-    name: "remapping"
+    name: "output"
     type: DT_INT64
   }
-  output_arg {
-    name: "num_present"
-    type: DT_INT32
+  attr {
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "new_vocab_offset"
-    type: "int"
-    has_minimum: true
+    name: "device_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
   attr {
-    name: "num_new_vocab"
-    type: "int"
-    has_minimum: true
+    name: "tensor_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "debug_urls"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "gated_grpc"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
+  allows_uninitialized_input: true
 }
 op {
-  name: "GenerateVocabRemapping"
-  input_arg {
-    name: "new_vocab_file"
-    type: DT_STRING
-  }
+  name: "DebugNumericSummary"
   input_arg {
-    name: "old_vocab_file"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "remapping"
-    type: DT_INT64
+    name: "input"
+    type_attr: "T"
   }
   output_arg {
-    name: "num_present"
-    type: DT_INT32
+    name: "output"
+    type: DT_DOUBLE
   }
   attr {
-    name: "new_vocab_offset"
-    type: "int"
-    has_minimum: true
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "num_new_vocab"
-    type: "int"
-    has_minimum: true
+    name: "tensor_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
   attr {
-    name: "old_vocab_size"
-    type: "int"
+    name: "debug_urls"
+    type: "list(string)"
     default_value {
-      i: -1
+      list {
+      }
     }
-    has_minimum: true
-    minimum: -1
   }
+  allows_uninitialized_input: true
 }
 op {
-  name: "GetSessionHandle"
+  name: "DebugNumericSummary"
   input_arg {
-    name: "value"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
-    name: "handle"
-    type: DT_STRING
+    name: "output"
+    type: DT_DOUBLE
   }
   attr {
     name: "T"
     type: "type"
   }
+  attr {
+    name: "tensor_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "debug_urls"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "lower_bound"
+    type: "float"
+    default_value {
+      f: -inf
+    }
+  }
+  attr {
+    name: "upper_bound"
+    type: "float"
+    default_value {
+      f: inf
+    }
+  }
+  attr {
+    name: "mute_if_healthy"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  allows_uninitialized_input: true
 }
 op {
-  name: "GetSessionHandle"
+  name: "DebugNumericSummary"
   input_arg {
-    name: "value"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
-    name: "handle"
-    type: DT_STRING
+    name: "output"
+    type: DT_DOUBLE
   }
   attr {
     name: "T"
     type: "type"
   }
-  deprecation {
-    version: 23
+  attr {
+    name: "tensor_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "debug_urls"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "lower_bound"
+    type: "float"
+    default_value {
+      f: -inf
+    }
+  }
+  attr {
+    name: "upper_bound"
+    type: "float"
+    default_value {
+      f: inf
+    }
+  }
+  attr {
+    name: "mute_if_healthy"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "gated_grpc"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
+  allows_uninitialized_input: true
 }
 op {
-  name: "GetSessionHandle"
+  name: "DebugNumericSummary"
   input_arg {
-    name: "value"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
-    name: "handle"
-    type: DT_STRING
+    name: "output"
+    type: DT_DOUBLE
   }
   attr {
     name: "T"
     type: "type"
   }
+  attr {
+    name: "device_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "tensor_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "debug_urls"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "lower_bound"
+    type: "float"
+    default_value {
+      f: -inf
+    }
+  }
+  attr {
+    name: "upper_bound"
+    type: "float"
+    default_value {
+      f: inf
+    }
+  }
+  attr {
+    name: "mute_if_healthy"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "gated_grpc"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  allows_uninitialized_input: true
 }
 op {
-  name: "GetSessionHandleV2"
+  name: "DecodeAndCropJpeg"
   input_arg {
-    name: "value"
-    type_attr: "T"
+    name: "contents"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "crop_window"
+    type: DT_INT32
   }
   output_arg {
-    name: "handle"
-    type: DT_RESOURCE
+    name: "image"
+    type: DT_UINT8
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "channels"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "ratio"
+    type: "int"
+    default_value {
+      i: 1
+    }
+  }
+  attr {
+    name: "fancy_upscaling"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "try_recover_truncated"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "acceptable_fraction"
+    type: "float"
+    default_value {
+      f: 1
+    }
+  }
+  attr {
+    name: "dct_method"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
-  is_stateful: true
 }
 op {
-  name: "GetSessionTensor"
+  name: "DecodeBase64"
   input_arg {
-    name: "handle"
+    name: "input"
     type: DT_STRING
   }
   output_arg {
-    name: "value"
-    type_attr: "dtype"
+    name: "output"
+    type: DT_STRING
+  }
+}
+op {
+  name: "DecodeBmp"
+  input_arg {
+    name: "contents"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "image"
+    type: DT_UINT8
   }
   attr {
-    name: "dtype"
-    type: "type"
+    name: "channels"
+    type: "int"
+    default_value {
+      i: 0
+    }
   }
 }
 op {
-  name: "Greater"
+  name: "DecodeCSV"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "records"
+    type: DT_STRING
   }
   input_arg {
-    name: "y"
-    type_attr: "T"
+    name: "record_defaults"
+    type_list_attr: "OUT_TYPE"
   }
   output_arg {
-    name: "z"
-    type: DT_BOOL
+    name: "output"
+    type_list_attr: "OUT_TYPE"
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "OUT_TYPE"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
     allowed_values {
       list {
         type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
+        type: DT_STRING
       }
     }
   }
+  attr {
+    name: "field_delim"
+    type: "string"
+    default_value {
+      s: ","
+    }
+  }
 }
 op {
-  name: "Greater"
+  name: "DecodeCSV"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "records"
+    type: DT_STRING
   }
   input_arg {
-    name: "y"
-    type_attr: "T"
+    name: "record_defaults"
+    type_list_attr: "OUT_TYPE"
   }
   output_arg {
-    name: "z"
-    type: DT_BOOL
+    name: "output"
+    type_list_attr: "OUT_TYPE"
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "OUT_TYPE"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
     allowed_values {
       list {
         type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
+        type: DT_STRING
       }
     }
   }
+  attr {
+    name: "field_delim"
+    type: "string"
+    default_value {
+      s: ","
+    }
+  }
+  attr {
+    name: "use_quote_delim"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
 }
 op {
-  name: "GreaterEqual"
+  name: "DecodeCSV"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "records"
+    type: DT_STRING
   }
   input_arg {
-    name: "y"
-    type_attr: "T"
+    name: "record_defaults"
+    type_list_attr: "OUT_TYPE"
   }
   output_arg {
-    name: "z"
-    type: DT_BOOL
+    name: "output"
+    type_list_attr: "OUT_TYPE"
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "OUT_TYPE"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
     allowed_values {
       list {
         type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
+        type: DT_STRING
       }
     }
   }
+  attr {
+    name: "field_delim"
+    type: "string"
+    default_value {
+      s: ","
+    }
+  }
+  attr {
+    name: "use_quote_delim"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "na_value"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
 }
 op {
-  name: "GreaterEqual"
+  name: "DecodeCSV"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "records"
+    type: DT_STRING
   }
   input_arg {
-    name: "y"
-    type_attr: "T"
+    name: "record_defaults"
+    type_list_attr: "OUT_TYPE"
   }
   output_arg {
-    name: "z"
-    type: DT_BOOL
+    name: "output"
+    type_list_attr: "OUT_TYPE"
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "OUT_TYPE"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
     allowed_values {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
+        type: DT_STRING
       }
     }
   }
-}
-op {
-  name: "GroupByWindowDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "key_func_other_arguments"
-    type_list_attr: "Tkey_func_other_arguments"
-  }
-  input_arg {
-    name: "reduce_func_other_arguments"
-    type_list_attr: "Treduce_func_other_arguments"
-  }
-  input_arg {
-    name: "window_size_func_other_arguments"
-    type_list_attr: "Twindow_size_func_other_arguments"
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "key_func"
-    type: "func"
-  }
-  attr {
-    name: "reduce_func"
-    type: "func"
-  }
   attr {
-    name: "window_size_func"
-    type: "func"
+    name: "field_delim"
+    type: "string"
+    default_value {
+      s: ","
+    }
   }
   attr {
-    name: "Tkey_func_other_arguments"
-    type: "list(type)"
-    has_minimum: true
+    name: "use_quote_delim"
+    type: "bool"
+    default_value {
+      b: true
+    }
   }
   attr {
-    name: "Treduce_func_other_arguments"
-    type: "list(type)"
-    has_minimum: true
+    name: "na_value"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
-  attr {
-    name: "Twindow_size_func_other_arguments"
-    type: "list(type)"
-    has_minimum: true
+}
+op {
+  name: "DecodeCompressed"
+  input_arg {
+    name: "bytes"
+    type: DT_STRING
   }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  output_arg {
+    name: "output"
+    type: DT_STRING
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "compression_type"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
-  is_stateful: true
 }
 op {
-  name: "GroupByWindowDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
+  name: "DecodeGif"
   input_arg {
-    name: "key_func_other_arguments"
-    type_list_attr: "Tkey_func_other_arguments"
+    name: "contents"
+    type: DT_STRING
   }
-  input_arg {
-    name: "reduce_func_other_arguments"
-    type_list_attr: "Treduce_func_other_arguments"
+  output_arg {
+    name: "image"
+    type: DT_UINT8
   }
+}
+op {
+  name: "DecodeJSONExample"
   input_arg {
-    name: "window_size_func_other_arguments"
-    type_list_attr: "Twindow_size_func_other_arguments"
+    name: "json_examples"
+    type: DT_STRING
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "key_func"
-    type: "func"
-  }
-  attr {
-    name: "reduce_func"
-    type: "func"
-  }
-  attr {
-    name: "window_size_func"
-    type: "func"
-  }
-  attr {
-    name: "Tkey_func_other_arguments"
-    type: "list(type)"
-    has_minimum: true
-  }
-  attr {
-    name: "Treduce_func_other_arguments"
-    type: "list(type)"
-    has_minimum: true
-  }
-  attr {
-    name: "Twindow_size_func_other_arguments"
-    type: "list(type)"
-    has_minimum: true
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "binary_examples"
+    type: DT_STRING
   }
 }
 op {
-  name: "HSVToRGB"
+  name: "DecodeJpeg"
   input_arg {
-    name: "images"
-    type_attr: "T"
+    name: "contents"
+    type: DT_STRING
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "image"
+    type: DT_UINT8
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "channels"
+    type: "int"
     default_value {
-      type: DT_FLOAT
-    }
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
+      i: 0
     }
   }
-}
-op {
-  name: "HashTable"
-  output_arg {
-    name: "table_handle"
-    type: DT_STRING
-    is_ref: true
-  }
   attr {
-    name: "container"
-    type: "string"
+    name: "ratio"
+    type: "int"
     default_value {
-      s: ""
+      i: 1
     }
   }
   attr {
-    name: "shared_name"
-    type: "string"
+    name: "fancy_upscaling"
+    type: "bool"
     default_value {
-      s: ""
+      b: true
     }
   }
   attr {
-    name: "use_node_name_sharing"
+    name: "try_recover_truncated"
     type: "bool"
     default_value {
       b: false
     }
   }
   attr {
-    name: "key_dtype"
-    type: "type"
-  }
-  attr {
-    name: "value_dtype"
-    type: "type"
-  }
-  is_stateful: true
-}
-op {
-  name: "HashTableV2"
-  output_arg {
-    name: "table_handle"
-    type: DT_RESOURCE
-  }
-  attr {
-    name: "container"
-    type: "string"
+    name: "acceptable_fraction"
+    type: "float"
     default_value {
-      s: ""
+      f: 1
     }
   }
   attr {
-    name: "shared_name"
+    name: "dct_method"
     type: "string"
     default_value {
       s: ""
     }
   }
-  attr {
-    name: "use_node_name_sharing"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "key_dtype"
-    type: "type"
-  }
-  attr {
-    name: "value_dtype"
-    type: "type"
-  }
-  is_stateful: true
 }
 op {
-  name: "HistogramFixedWidth"
-  input_arg {
-    name: "values"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "value_range"
-    type_attr: "T"
-  }
+  name: "DecodePng"
   input_arg {
-    name: "nbins"
-    type: DT_INT32
+    name: "contents"
+    type: DT_STRING
   }
   output_arg {
-    name: "out"
+    name: "image"
     type_attr: "dtype"
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
+    name: "channels"
+    type: "int"
+    default_value {
+      i: 0
     }
   }
   attr {
     name: "dtype"
     type: "type"
     default_value {
-      type: DT_INT32
+      type: DT_UINT8
     }
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_UINT8
+        type: DT_UINT16
       }
     }
   }
 }
 op {
-  name: "HistogramSummary"
+  name: "DecodeRaw"
   input_arg {
-    name: "tag"
+    name: "bytes"
     type: DT_STRING
   }
-  input_arg {
-    name: "values"
-    type_attr: "T"
-  }
   output_arg {
-    name: "summary"
-    type: DT_STRING
+    name: "output"
+    type_attr: "out_type"
   }
   attr {
-    name: "T"
+    name: "out_type"
     type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
+        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "little_endian"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
 }
 op {
-  name: "HistogramSummary"
+  name: "DecodeRaw"
   input_arg {
-    name: "tag"
+    name: "bytes"
     type: DT_STRING
   }
-  input_arg {
-    name: "values"
-    type_attr: "T"
-  }
   output_arg {
-    name: "summary"
-    type: DT_STRING
+    name: "output"
+    type_attr: "out_type"
   }
   attr {
-    name: "T"
+    name: "out_type"
     type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
+        type: DT_UINT16
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
+        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "little_endian"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
 }
 op {
-  name: "IFFT"
+  name: "DecodeWav"
   input_arg {
-    name: "input"
-    type: DT_COMPLEX64
+    name: "contents"
+    type: DT_STRING
   }
   output_arg {
-    name: "output"
-    type: DT_COMPLEX64
-  }
-}
-op {
-  name: "IFFT2D"
-  input_arg {
-    name: "input"
-    type: DT_COMPLEX64
+    name: "audio"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "output"
-    type: DT_COMPLEX64
+    name: "sample_rate"
+    type: DT_INT32
   }
-}
-op {
-  name: "IFFT3D"
-  input_arg {
-    name: "input"
-    type: DT_COMPLEX64
+  attr {
+    name: "desired_channels"
+    type: "int"
+    default_value {
+      i: -1
+    }
   }
-  output_arg {
-    name: "output"
-    type: DT_COMPLEX64
+  attr {
+    name: "desired_samples"
+    type: "int"
+    default_value {
+      i: -1
+    }
   }
 }
 op {
-  name: "IRFFT"
-  input_arg {
-    name: "input"
-    type: DT_COMPLEX64
-  }
+  name: "DeleteSessionTensor"
   input_arg {
-    name: "fft_length"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "output"
-    type: DT_FLOAT
+    name: "handle"
+    type: DT_STRING
   }
 }
 op {
-  name: "IRFFT2D"
-  input_arg {
-    name: "input"
-    type: DT_COMPLEX64
-  }
+  name: "DeleteSessionTensor"
   input_arg {
-    name: "fft_length"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "output"
-    type: DT_FLOAT
+    name: "handle"
+    type: DT_STRING
   }
+  is_stateful: true
 }
 op {
-  name: "IRFFT3D"
+  name: "DenseToDenseSetOperation"
   input_arg {
-    name: "input"
-    type: DT_COMPLEX64
+    name: "set1"
+    type_attr: "T"
   }
   input_arg {
-    name: "fft_length"
-    type: DT_INT32
+    name: "set2"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type: DT_FLOAT
+    name: "result_indices"
+    type: DT_INT64
   }
-}
-op {
-  name: "Identity"
-  input_arg {
-    name: "input"
+  output_arg {
+    name: "result_values"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "result_shape"
+    type: DT_INT64
+  }
+  attr {
+    name: "set_operation"
+    type: "string"
+  }
+  attr {
+    name: "validate_indices"
+    type: "bool"
+    default_value {
+      b: true
+    }
   }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_STRING
+      }
+    }
   }
 }
 op {
-  name: "IdentityN"
+  name: "DenseToSparseBatchDataset"
   input_arg {
-    name: "input"
-    type_list_attr: "T"
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "batch_size"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "row_shape"
+    type: DT_INT64
   }
   output_arg {
-    name: "output"
-    type_list_attr: "T"
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "T"
+    name: "output_types"
     type: "list(type)"
     has_minimum: true
     minimum: 1
   }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
 }
 op {
-  name: "IdentityReader"
+  name: "DenseToSparseBatchDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "batch_size"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "row_shape"
+    type: DT_INT64
+  }
   output_arg {
-    name: "reader_handle"
-    type: DT_STRING
-    is_ref: true
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
   attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
-  is_stateful: true
 }
 op {
-  name: "IdentityReaderV2"
+  name: "DenseToSparseSetOperation"
+  input_arg {
+    name: "set1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "set2_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "set2_values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "set2_shape"
+    type: DT_INT64
+  }
   output_arg {
-    name: "reader_handle"
-    type: DT_RESOURCE
+    name: "result_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "result_values"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "result_shape"
+    type: DT_INT64
   }
   attr {
-    name: "container"
+    name: "set_operation"
     type: "string"
+  }
+  attr {
+    name: "validate_indices"
+    type: "bool"
     default_value {
-      s: ""
+      b: true
     }
   }
   attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_STRING
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "Igamma"
+  name: "DepthToSpace"
   input_arg {
-    name: "a"
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "block_size"
+    type: "int"
+    has_minimum: true
+    minimum: 2
+  }
+}
+op {
+  name: "DepthToSpace"
   input_arg {
-    name: "x"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
+    name: "output"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
+  }
+  attr {
+    name: "block_size"
+    type: "int"
+    has_minimum: true
+    minimum: 2
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        s: "NHWC"
+        s: "NCHW"
+        s: "NCHW_VECT_C"
       }
     }
   }
 }
 op {
-  name: "Igammac"
+  name: "DepthwiseConv2dNative"
   input_arg {
-    name: "a"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "x"
+    name: "filter"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -14324,576 +14940,749 @@ op {
       }
     }
   }
-}
-op {
-  name: "IgnoreErrorsDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "strides"
+    type: "list(int)"
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
   }
-  is_stateful: true
 }
 op {
-  name: "IgnoreErrorsDataset"
+  name: "DepthwiseConv2dNative"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "input"
+    type_attr: "T"
   }
-}
-op {
-  name: "Imag"
   input_arg {
-    name: "input"
+    name: "filter"
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    type_attr: "Tout"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_COMPLEX64
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
     }
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
   attr {
-    name: "Tout"
-    type: "type"
+    name: "data_format"
+    type: "string"
     default_value {
-      type: DT_FLOAT
+      s: "NHWC"
     }
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        s: "NHWC"
+        s: "NCHW"
       }
     }
   }
 }
 op {
-  name: "ImageSummary"
+  name: "DepthwiseConv2dNative"
   input_arg {
-    name: "tag"
-    type: DT_STRING
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "tensor"
+    name: "filter"
     type_attr: "T"
   }
   output_arg {
-    name: "summary"
-    type: DT_STRING
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "max_images"
-    type: "int"
-    default_value {
-      i: 3
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
     }
-    has_minimum: true
-    minimum: 1
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
     default_value {
-      type: DT_FLOAT
+      s: "NHWC"
     }
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_FLOAT
-        type: DT_HALF
+        s: "NHWC"
+        s: "NCHW"
       }
     }
   }
   attr {
-    name: "bad_color"
-    type: "tensor"
+    name: "dilations"
+    type: "list(int)"
     default_value {
-      tensor {
-        dtype: DT_UINT8
-        tensor_shape {
-          dim {
-            size: 4
-          }
-        }
-        int_val: 255
-        int_val: 0
-        int_val: 0
-        int_val: 255
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
       }
     }
   }
 }
 op {
-  name: "ImageSummary"
+  name: "DepthwiseConv2dNativeBackpropFilter"
   input_arg {
-    name: "tag"
-    type: DT_STRING
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "tensor"
+    name: "filter_sizes"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "out_backprop"
     type_attr: "T"
   }
   output_arg {
-    name: "summary"
-    type: DT_STRING
-  }
-  attr {
-    name: "max_images"
-    type: "int"
-    default_value {
-      i: 3
-    }
-    has_minimum: true
-    minimum: 1
+    name: "output"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
     allowed_values {
       list {
-        type: DT_UINT8
         type: DT_FLOAT
-        type: DT_HALF
         type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "bad_color"
-    type: "tensor"
-    default_value {
-      tensor {
-        dtype: DT_UINT8
-        tensor_shape {
-          dim {
-            size: 4
-          }
-        }
-        int_val: 255
-        int_val: 0
-        int_val: 0
-        int_val: 255
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
 }
 op {
-  name: "ImmutableConst"
+  name: "DepthwiseConv2dNativeBackpropFilter"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "filter_sizes"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "out_backprop"
+    type_attr: "T"
+  }
   output_arg {
-    name: "tensor"
-    type_attr: "dtype"
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
   attr {
-    name: "shape"
-    type: "shape"
+    name: "strides"
+    type: "list(int)"
   }
   attr {
-    name: "memory_region_name"
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
     type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
   }
 }
 op {
-  name: "InTopK"
+  name: "DepthwiseConv2dNativeBackpropFilter"
   input_arg {
-    name: "predictions"
-    type: DT_FLOAT
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "targets"
+    name: "filter_sizes"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "out_backprop"
     type_attr: "T"
   }
   output_arg {
-    name: "precision"
-    type: DT_BOOL
-  }
-  attr {
-    name: "k"
-    type: "int"
+    name: "output"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
     default_value {
-      type: DT_INT32
+      s: "NHWC"
     }
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
       }
     }
   }
 }
 op {
-  name: "InTopKV2"
+  name: "DepthwiseConv2dNativeBackpropInput"
   input_arg {
-    name: "predictions"
-    type: DT_FLOAT
+    name: "input_sizes"
+    type: DT_INT32
   }
   input_arg {
-    name: "targets"
+    name: "filter"
     type_attr: "T"
   }
   input_arg {
-    name: "k"
+    name: "out_backprop"
     type_attr: "T"
   }
   output_arg {
-    name: "precision"
-    type: DT_BOOL
+    name: "output"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_INT32
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
     }
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
 }
 op {
-  name: "InitializeTable"
+  name: "DepthwiseConv2dNativeBackpropInput"
   input_arg {
-    name: "table_handle"
-    type: DT_STRING
-    is_ref: true
+    name: "input_sizes"
+    type: DT_INT32
   }
   input_arg {
-    name: "keys"
-    type_attr: "Tkey"
+    name: "filter"
+    type_attr: "T"
   }
   input_arg {
-    name: "values"
-    type_attr: "Tval"
+    name: "out_backprop"
+    type_attr: "T"
   }
-  attr {
-    name: "Tkey"
-    type: "type"
+  output_arg {
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "Tval"
+    name: "T"
     type: "type"
-  }
-}
-op {
-  name: "InitializeTableFromTextFile"
-  input_arg {
-    name: "table_handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  input_arg {
-    name: "filename"
-    type: DT_STRING
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
   attr {
-    name: "key_index"
-    type: "int"
-    has_minimum: true
-    minimum: -2
+    name: "strides"
+    type: "list(int)"
   }
   attr {
-    name: "value_index"
-    type: "int"
-    has_minimum: true
-    minimum: -2
-  }
-  attr {
-    name: "vocab_size"
-    type: "int"
-    default_value {
-      i: -1
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
     }
-    has_minimum: true
-    minimum: -1
   }
   attr {
-    name: "delimiter"
+    name: "data_format"
     type: "string"
     default_value {
-      s: "\t"
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
     }
   }
 }
 op {
-  name: "InitializeTableFromTextFileV2"
+  name: "DepthwiseConv2dNativeBackpropInput"
   input_arg {
-    name: "table_handle"
-    type: DT_RESOURCE
+    name: "input_sizes"
+    type: DT_INT32
   }
   input_arg {
-    name: "filename"
-    type: DT_STRING
+    name: "filter"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "out_backprop"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "key_index"
-    type: "int"
-    has_minimum: true
-    minimum: -2
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
   attr {
-    name: "value_index"
-    type: "int"
-    has_minimum: true
-    minimum: -2
+    name: "strides"
+    type: "list(int)"
   }
   attr {
-    name: "vocab_size"
-    type: "int"
-    default_value {
-      i: -1
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
     }
-    has_minimum: true
-    minimum: -1
   }
   attr {
-    name: "delimiter"
+    name: "data_format"
     type: "string"
     default_value {
-      s: "\t"
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "InitializeTableV2"
+  name: "Dequantize"
   input_arg {
-    name: "table_handle"
-    type: DT_RESOURCE
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "keys"
-    type_attr: "Tkey"
+    name: "min_range"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "values"
-    type_attr: "Tval"
+    name: "max_range"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type: DT_FLOAT
   }
   attr {
-    name: "Tkey"
+    name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
   }
   attr {
-    name: "Tval"
-    type: "type"
+    name: "mode"
+    type: "string"
+    default_value {
+      s: "MIN_COMBINED"
+    }
+    allowed_values {
+      list {
+        s: "MIN_COMBINED"
+        s: "MIN_FIRST"
+      }
+    }
   }
-  is_stateful: true
 }
 op {
-  name: "InterleaveDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
+  name: "Dequantize"
   input_arg {
-    name: "other_arguments"
-    type_list_attr: "Targuments"
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "cycle_length"
-    type: DT_INT64
+    name: "min_range"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "block_length"
-    type: DT_INT64
+    name: "max_range"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    name: "output"
+    type: DT_FLOAT
   }
   attr {
-    name: "f"
-    type: "func"
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
   }
   attr {
-    name: "Targuments"
-    type: "list(type)"
-    has_minimum: true
+    name: "mode"
+    type: "string"
+    default_value {
+      s: "MIN_COMBINED"
+    }
+    allowed_values {
+      list {
+        s: "MIN_COMBINED"
+        s: "MIN_FIRST"
+        s: "SCALED"
+      }
+    }
+  }
+}
+op {
+  name: "Dequantize"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "min_range"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_range"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type: DT_FLOAT
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "mode"
+    type: "string"
+    default_value {
+      s: "MIN_COMBINED"
+    }
+    allowed_values {
+      list {
+        s: "MIN_COMBINED"
+        s: "MIN_FIRST"
+        s: "SCALED"
+      }
+    }
   }
-  is_stateful: true
 }
 op {
-  name: "InterleaveDataset"
+  name: "DeserializeIterator"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "resource_handle"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "other_arguments"
-    type_list_attr: "Targuments"
+    name: "serialized"
+    type: DT_VARIANT
   }
+  is_stateful: true
+}
+op {
+  name: "DeserializeManySparse"
   input_arg {
-    name: "cycle_length"
-    type: DT_INT64
+    name: "serialized_sparse"
+    type: DT_STRING
   }
-  input_arg {
-    name: "block_length"
+  output_arg {
+    name: "sparse_indices"
     type: DT_INT64
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    name: "sparse_values"
+    type_attr: "dtype"
   }
-  attr {
-    name: "f"
-    type: "func"
+  output_arg {
+    name: "sparse_shape"
+    type: DT_INT64
   }
   attr {
-    name: "Targuments"
-    type: "list(type)"
-    has_minimum: true
+    name: "dtype"
+    type: "type"
   }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+}
+op {
+  name: "DeserializeSparse"
+  input_arg {
+    name: "serialized_sparse"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "sparse_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "sparse_values"
+    type_attr: "dtype"
+  }
+  output_arg {
+    name: "sparse_shape"
+    type: DT_INT64
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "dtype"
+    type: "type"
   }
 }
 op {
-  name: "Inv"
+  name: "DeserializeSparse"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "serialized_sparse"
+    type_attr: "Tserialized"
   }
   output_arg {
-    name: "y"
-    type_attr: "T"
+    name: "sparse_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "sparse_values"
+    type_attr: "dtype"
+  }
+  output_arg {
+    name: "sparse_shape"
+    type: DT_INT64
   }
   attr {
-    name: "T"
+    name: "dtype"
     type: "type"
+  }
+  attr {
+    name: "Tserialized"
+    type: "type"
+    default_value {
+      type: DT_STRING
+    }
     allowed_values {
       list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_STRING
+        type: DT_VARIANT
       }
     }
   }
-  deprecation {
-    version: 17
-  }
 }
 op {
-  name: "InvGrad"
+  name: "DestroyResourceOp"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "resource"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "ignore_lookup_error"
+    type: "bool"
+    default_value {
+      b: true
+    }
   }
+  is_stateful: true
+}
+op {
+  name: "DestroyTemporaryVariable"
   input_arg {
-    name: "y"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   output_arg {
-    name: "z"
+    name: "value"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-      }
-    }
   }
-  deprecation {
-    version: 17
+  attr {
+    name: "var_name"
+    type: "string"
   }
 }
 op {
-  name: "InvGrad"
-  input_arg {
-    name: "y"
-    type_attr: "T"
-  }
+  name: "Diag"
   input_arg {
-    name: "dy"
+    name: "diagonal"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -14901,26 +15690,24 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
         type: DT_COMPLEX64
         type: DT_COMPLEX128
       }
     }
   }
-  deprecation {
-    version: 17
-  }
 }
 op {
-  name: "Invert"
+  name: "Diag"
   input_arg {
-    name: "x"
+    name: "diagonal"
     type_attr: "T"
   }
   output_arg {
-    name: "y"
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -14928,24 +15715,25 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_INT8
-        type: DT_INT16
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_UINT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "Invert"
+  name: "DiagPart"
   input_arg {
-    name: "x"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
-    name: "y"
+    name: "diagonal"
     type_attr: "T"
   }
   attr {
@@ -14953,51 +15741,51 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_INT8
-        type: DT_INT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_UINT32
-        type: DT_UINT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "InvertPermutation"
+  name: "DiagPart"
   input_arg {
-    name: "x"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
-    name: "y"
+    name: "diagonal"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "IsFinite"
+  name: "Digamma"
   input_arg {
     name: "x"
     type_attr: "T"
   }
   output_arg {
     name: "y"
-    type: DT_BOOL
+    type_attr: "T"
   }
   attr {
     name: "T"
@@ -15012,14 +15800,14 @@ op {
   }
 }
 op {
-  name: "IsInf"
+  name: "Digamma"
   input_arg {
     name: "x"
     type_attr: "T"
   }
   output_arg {
     name: "y"
-    type: DT_BOOL
+    type_attr: "T"
   }
   attr {
     name: "T"
@@ -15027,6 +15815,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -15034,188 +15823,123 @@ op {
   }
 }
 op {
-  name: "IsNan"
+  name: "Dilation2D"
   input_arg {
-    name: "x"
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "filter"
     type_attr: "T"
   }
   output_arg {
-    name: "y"
-    type: DT_BOOL
+    name: "output"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
       }
     }
   }
-}
-op {
-  name: "IsVariableInitialized"
-  input_arg {
-    name: "ref"
-    type_attr: "dtype"
-    is_ref: true
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
-  output_arg {
-    name: "is_initialized"
-    type: DT_BOOL
+  attr {
+    name: "rates"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
-    name: "dtype"
-    type: "type"
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
   }
-  allows_uninitialized_input: true
 }
 op {
-  name: "Iterator"
+  name: "Dilation2D"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "filter"
+    type_attr: "T"
+  }
   output_arg {
-    name: "handle"
-    type: DT_RESOURCE
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "shared_name"
-    type: "string"
-  }
-  attr {
-    name: "container"
-    type: "string"
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
-  }
-  is_stateful: true
-}
-op {
-  name: "IteratorFromStringHandle"
-  input_arg {
-    name: "string_handle"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "resource_handle"
-    type: DT_RESOURCE
-  }
-  is_stateful: true
-}
-op {
-  name: "IteratorFromStringHandle"
-  input_arg {
-    name: "string_handle"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "resource_handle"
-    type: DT_RESOURCE
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    default_value {
-      list {
-      }
-    }
-    has_minimum: true
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    default_value {
+    name: "T"
+    type: "type"
+    allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
-    has_minimum: true
-  }
-  is_stateful: true
-}
-op {
-  name: "IteratorGetNext"
-  input_arg {
-    name: "iterator"
-    type: DT_RESOURCE
-  }
-  output_arg {
-    name: "components"
-    type_list_attr: "output_types"
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
+    name: "strides"
+    type: "list(int)"
     has_minimum: true
-    minimum: 1
+    minimum: 4
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
+    name: "rates"
+    type: "list(int)"
     has_minimum: true
-    minimum: 1
-  }
-  is_stateful: true
-}
-op {
-  name: "IteratorSetStatsAggregator"
-  input_arg {
-    name: "iterator_handle"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "stats_aggregator_handle"
-    type: DT_RESOURCE
-  }
-  is_stateful: true
-}
-op {
-  name: "IteratorToStringHandle"
-  input_arg {
-    name: "resource_handle"
-    type: DT_RESOURCE
-  }
-  output_arg {
-    name: "string_handle"
-    type: DT_STRING
-  }
-  is_stateful: true
-}
-op {
-  name: "L2Loss"
-  input_arg {
-    name: "t"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
+    minimum: 4
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_HALF
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
 }
 op {
-  name: "L2Loss"
+  name: "Dilation2D"
   input_arg {
-    name: "t"
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "filter"
     type_attr: "T"
   }
   output_arg {
@@ -15227,298 +15951,175 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
-}
-op {
-  name: "LMDBReader"
-  output_arg {
-    name: "reader_handle"
-    type: DT_STRING
-    is_ref: true
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+    name: "rates"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
-    name: "shared_name"
+    name: "padding"
     type: "string"
-    default_value {
-      s: ""
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "LRN"
+  name: "Dilation2D"
   input_arg {
     name: "input"
     type_attr: "T"
   }
+  input_arg {
+    name: "filter"
+    type_attr: "T"
+  }
   output_arg {
     name: "output"
     type_attr: "T"
   }
   attr {
-    name: "depth_radius"
-    type: "int"
-    default_value {
-      i: 5
-    }
-  }
-  attr {
-    name: "bias"
-    type: "float"
-    default_value {
-      f: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
     }
   }
   attr {
-    name: "alpha"
-    type: "float"
-    default_value {
-      f: 1
-    }
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
-    name: "beta"
-    type: "float"
-    default_value {
-      f: 0.5
-    }
+    name: "rates"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
-    name: "T"
-    type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_HALF
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
 }
 op {
-  name: "LRNGrad"
+  name: "Dilation2DBackpropFilter"
   input_arg {
-    name: "input_grads"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "input_image"
+    name: "filter"
     type_attr: "T"
   }
   input_arg {
-    name: "output_image"
+    name: "out_backprop"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "filter_backprop"
     type_attr: "T"
   }
-  attr {
-    name: "depth_radius"
-    type: "int"
-    default_value {
-      i: 5
-    }
-  }
-  attr {
-    name: "bias"
-    type: "float"
-    default_value {
-      f: 1
-    }
-  }
-  attr {
-    name: "alpha"
-    type: "float"
-    default_value {
-      f: 1
-    }
-  }
-  attr {
-    name: "beta"
-    type: "float"
-    default_value {
-      f: 0.5
-    }
-  }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
     allowed_values {
       list {
         type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
         type: DT_HALF
       }
     }
   }
-}
-op {
-  name: "LatencyStatsDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "tag"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
-  }
-}
-op {
-  name: "LearnedUnigramCandidateSampler"
-  input_arg {
-    name: "true_classes"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "sampled_candidates"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "true_expected_count"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "sampled_expected_count"
-    type: DT_FLOAT
-  }
-  attr {
-    name: "num_true"
-    type: "int"
-    has_minimum: true
-    minimum: 1
-  }
   attr {
-    name: "num_sampled"
-    type: "int"
+    name: "strides"
+    type: "list(int)"
     has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "unique"
-    type: "bool"
+    minimum: 4
   }
   attr {
-    name: "range_max"
-    type: "int"
+    name: "rates"
+    type: "list(int)"
     has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
+    minimum: 4
   }
   attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
     }
   }
 }
 op {
-  name: "LearnedUnigramCandidateSampler"
+  name: "Dilation2DBackpropFilter"
   input_arg {
-    name: "true_classes"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "sampled_candidates"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "true_expected_count"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "sampled_expected_count"
-    type: DT_FLOAT
-  }
-  attr {
-    name: "num_true"
-    type: "int"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "num_sampled"
-    type: "int"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "unique"
-    type: "bool"
-  }
-  attr {
-    name: "range_max"
-    type: "int"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
+    name: "input"
+    type_attr: "T"
   }
-  is_stateful: true
-}
-op {
-  name: "LeftShift"
   input_arg {
-    name: "x"
+    name: "filter"
     type_attr: "T"
   }
   input_arg {
-    name: "y"
+    name: "out_backprop"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
+    name: "filter_backprop"
     type_attr: "T"
   }
   attr {
@@ -15526,64 +16127,60 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_INT8
-        type: DT_INT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
         type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
         type: DT_UINT16
+        type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
-  is_commutative: true
-}
-op {
-  name: "Less"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "y"
-    type_attr: "T"
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
-  output_arg {
-    name: "z"
-    type: DT_BOOL
+  attr {
+    name: "rates"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
 }
 op {
-  name: "Less"
+  name: "Dilation2DBackpropFilter"
   input_arg {
-    name: "x"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "y"
+    name: "filter"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "out_backprop"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
-    type: DT_BOOL
+    name: "filter_backprop"
+    type_attr: "T"
   }
   attr {
     name: "T"
@@ -15601,23 +16198,50 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "rates"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
 }
 op {
-  name: "LessEqual"
+  name: "Dilation2DBackpropFilter"
   input_arg {
-    name: "x"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "y"
+    name: "filter"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "out_backprop"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
-    type: DT_BOOL
+    name: "filter_backprop"
+    type_attr: "T"
   }
   attr {
     name: "T"
@@ -15627,29 +16251,58 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "rates"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
 }
 op {
-  name: "LessEqual"
+  name: "Dilation2DBackpropInput"
   input_arg {
-    name: "x"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "y"
+    name: "filter"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "out_backprop"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
-    type: DT_BOOL
+    name: "in_backprop"
+    type_attr: "T"
   }
   attr {
     name: "T"
@@ -15665,50 +16318,48 @@ op {
         type: DT_INT8
         type: DT_UINT16
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
-}
-op {
-  name: "Lgamma"
-  input_arg {
-    name: "x"
-    type_attr: "T"
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
-  output_arg {
-    name: "y"
-    type_attr: "T"
+  attr {
+    name: "rates"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
 }
 op {
-  name: "LinSpace"
+  name: "Dilation2DBackpropInput"
   input_arg {
-    name: "start"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "stop"
+    name: "filter"
     type_attr: "T"
   }
   input_arg {
-    name: "num"
-    type_attr: "Tidx"
+    name: "out_backprop"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "in_backprop"
     type_attr: "T"
   }
   attr {
@@ -15718,113 +16369,118 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "rates"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
 }
 op {
-  name: "ListDiff"
+  name: "Dilation2DBackpropInput"
   input_arg {
-    name: "x"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "y"
+    name: "filter"
     type_attr: "T"
   }
-  output_arg {
-    name: "out"
+  input_arg {
+    name: "out_backprop"
     type_attr: "T"
   }
   output_arg {
-    name: "idx"
-    type_attr: "out_idx"
+    name: "in_backprop"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "out_idx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
-}
-op {
-  name: "LoadAndRemapMatrix"
-  input_arg {
-    name: "ckpt_path"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "old_tensor_name"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "row_remapping"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "col_remapping"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "initializing_values"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "output_matrix"
-    type: DT_FLOAT
-  }
   attr {
-    name: "num_rows"
-    type: "int"
+    name: "strides"
+    type: "list(int)"
     has_minimum: true
+    minimum: 4
   }
   attr {
-    name: "num_cols"
-    type: "int"
+    name: "rates"
+    type: "list(int)"
     has_minimum: true
-    minimum: 1
+    minimum: 4
   }
   attr {
-    name: "max_rows_in_memory"
-    type: "int"
-    default_value {
-      i: -1
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "Log"
+  name: "Dilation2DBackpropInput"
   input_arg {
-    name: "x"
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "filter"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "out_backprop"
     type_attr: "T"
   }
   output_arg {
-    name: "y"
+    name: "in_backprop"
     type_attr: "T"
   }
   attr {
@@ -15832,25 +16488,58 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "rates"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
 }
 op {
-  name: "Log1p"
+  name: "Div"
   input_arg {
     name: "x"
     type_attr: "T"
   }
-  output_arg {
+  input_arg {
     name: "y"
     type_attr: "T"
   }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
   attr {
     name: "T"
     type: "type"
@@ -15859,6 +16548,12 @@ op {
         type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
         type: DT_COMPLEX64
         type: DT_COMPLEX128
       }
@@ -15866,17 +16561,17 @@ op {
   }
 }
 op {
-  name: "LogMatrixDeterminant"
+  name: "Div"
   input_arg {
-    name: "input"
+    name: "x"
     type_attr: "T"
   }
-  output_arg {
-    name: "sign"
+  input_arg {
+    name: "y"
     type_attr: "T"
   }
   output_arg {
-    name: "log_abs_determinant"
+    name: "z"
     type_attr: "T"
   }
   attr {
@@ -15884,8 +16579,16 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
         type: DT_COMPLEX64
         type: DT_COMPLEX128
       }
@@ -15893,949 +16596,758 @@ op {
   }
 }
 op {
-  name: "LogSoftmax"
+  name: "DrawBoundingBoxes"
   input_arg {
-    name: "logits"
+    name: "images"
     type_attr: "T"
   }
+  input_arg {
+    name: "boxes"
+    type: DT_FLOAT
+  }
   output_arg {
-    name: "logsoftmax"
+    name: "output"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_HALF
       }
     }
   }
 }
 op {
-  name: "LogUniformCandidateSampler"
+  name: "DynamicPartition"
   input_arg {
-    name: "true_classes"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "sampled_candidates"
-    type: DT_INT64
+    name: "data"
+    type_attr: "T"
   }
-  output_arg {
-    name: "true_expected_count"
-    type: DT_FLOAT
+  input_arg {
+    name: "partitions"
+    type: DT_INT32
   }
   output_arg {
-    name: "sampled_expected_count"
-    type: DT_FLOAT
+    name: "outputs"
+    type_attr: "T"
+    number_attr: "num_partitions"
   }
   attr {
-    name: "num_true"
+    name: "num_partitions"
     type: "int"
     has_minimum: true
     minimum: 1
   }
   attr {
-    name: "num_sampled"
-    type: "int"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
   }
-  attr {
-    name: "unique"
-    type: "bool"
+}
+op {
+  name: "DynamicStitch"
+  input_arg {
+    name: "indices"
+    type: DT_INT32
+    number_attr: "N"
+  }
+  input_arg {
+    name: "data"
+    type_attr: "T"
+    number_attr: "N"
+  }
+  output_arg {
+    name: "merged"
+    type_attr: "T"
   }
   attr {
-    name: "range_max"
+    name: "N"
     type: "int"
     has_minimum: true
     minimum: 1
   }
   attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
+    name: "T"
+    type: "type"
   }
 }
 op {
-  name: "LogUniformCandidateSampler"
+  name: "EagerPyFunc"
   input_arg {
-    name: "true_classes"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "sampled_candidates"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "true_expected_count"
-    type: DT_FLOAT
+    name: "input"
+    type_list_attr: "Tin"
   }
   output_arg {
-    name: "sampled_expected_count"
-    type: DT_FLOAT
+    name: "output"
+    type_list_attr: "Tout"
   }
   attr {
-    name: "num_true"
-    type: "int"
-    has_minimum: true
-    minimum: 1
+    name: "token"
+    type: "string"
   }
   attr {
-    name: "num_sampled"
-    type: "int"
+    name: "Tin"
+    type: "list(type)"
     has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "unique"
-    type: "bool"
   }
   attr {
-    name: "range_max"
-    type: "int"
+    name: "Tout"
+    type: "list(type)"
     has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
   }
   is_stateful: true
 }
 op {
-  name: "LogicalAnd"
+  name: "EditDistance"
   input_arg {
-    name: "x"
-    type: DT_BOOL
+    name: "hypothesis_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "y"
-    type: DT_BOOL
-  }
-  output_arg {
-    name: "z"
-    type: DT_BOOL
+    name: "hypothesis_values"
+    type_attr: "T"
   }
-  is_commutative: true
-}
-op {
-  name: "LogicalNot"
   input_arg {
-    name: "x"
-    type: DT_BOOL
-  }
-  output_arg {
-    name: "y"
-    type: DT_BOOL
+    name: "hypothesis_shape"
+    type: DT_INT64
   }
-}
-op {
-  name: "LogicalOr"
   input_arg {
-    name: "x"
-    type: DT_BOOL
+    name: "truth_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "y"
-    type: DT_BOOL
-  }
-  output_arg {
-    name: "z"
-    type: DT_BOOL
+    name: "truth_values"
+    type_attr: "T"
   }
-  is_commutative: true
-}
-op {
-  name: "LookupTableExport"
   input_arg {
-    name: "table_handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  output_arg {
-    name: "keys"
-    type_attr: "Tkeys"
+    name: "truth_shape"
+    type: DT_INT64
   }
   output_arg {
-    name: "values"
-    type_attr: "Tvalues"
+    name: "output"
+    type: DT_FLOAT
   }
   attr {
-    name: "Tkeys"
-    type: "type"
+    name: "normalize"
+    type: "bool"
+    default_value {
+      b: true
+    }
   }
   attr {
-    name: "Tvalues"
+    name: "T"
     type: "type"
   }
 }
 op {
-  name: "LookupTableExportV2"
+  name: "Elu"
   input_arg {
-    name: "table_handle"
-    type: DT_RESOURCE
-  }
-  output_arg {
-    name: "keys"
-    type_attr: "Tkeys"
+    name: "features"
+    type_attr: "T"
   }
   output_arg {
-    name: "values"
-    type_attr: "Tvalues"
-  }
-  attr {
-    name: "Tkeys"
-    type: "type"
+    name: "activations"
+    type_attr: "T"
   }
   attr {
-    name: "Tvalues"
+    name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_HALF
+      }
+    }
   }
-  is_stateful: true
 }
 op {
-  name: "LookupTableFind"
-  input_arg {
-    name: "table_handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  input_arg {
-    name: "keys"
-    type_attr: "Tin"
-  }
+  name: "Elu"
   input_arg {
-    name: "default_value"
-    type_attr: "Tout"
+    name: "features"
+    type_attr: "T"
   }
   output_arg {
-    name: "values"
-    type_attr: "Tout"
-  }
-  attr {
-    name: "Tin"
-    type: "type"
+    name: "activations"
+    type_attr: "T"
   }
   attr {
-    name: "Tout"
+    name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
 }
 op {
-  name: "LookupTableFindV2"
-  input_arg {
-    name: "table_handle"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "keys"
-    type_attr: "Tin"
-  }
+  name: "Elu"
   input_arg {
-    name: "default_value"
-    type_attr: "Tout"
+    name: "features"
+    type_attr: "T"
   }
   output_arg {
-    name: "values"
-    type_attr: "Tout"
-  }
-  attr {
-    name: "Tin"
-    type: "type"
+    name: "activations"
+    type_attr: "T"
   }
   attr {
-    name: "Tout"
+    name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
-  is_stateful: true
 }
 op {
-  name: "LookupTableImport"
-  input_arg {
-    name: "table_handle"
-    type: DT_STRING
-    is_ref: true
-  }
+  name: "EluGrad"
   input_arg {
-    name: "keys"
-    type_attr: "Tin"
+    name: "gradients"
+    type_attr: "T"
   }
   input_arg {
-    name: "values"
-    type_attr: "Tout"
+    name: "outputs"
+    type_attr: "T"
   }
-  attr {
-    name: "Tin"
-    type: "type"
+  output_arg {
+    name: "backprops"
+    type_attr: "T"
   }
   attr {
-    name: "Tout"
+    name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_HALF
+      }
+    }
   }
 }
 op {
-  name: "LookupTableImportV2"
-  input_arg {
-    name: "table_handle"
-    type: DT_RESOURCE
-  }
+  name: "EluGrad"
   input_arg {
-    name: "keys"
-    type_attr: "Tin"
+    name: "gradients"
+    type_attr: "T"
   }
   input_arg {
-    name: "values"
-    type_attr: "Tout"
+    name: "outputs"
+    type_attr: "T"
   }
-  attr {
-    name: "Tin"
-    type: "type"
+  output_arg {
+    name: "backprops"
+    type_attr: "T"
   }
   attr {
-    name: "Tout"
+    name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
-  is_stateful: true
 }
 op {
-  name: "LookupTableInsert"
-  input_arg {
-    name: "table_handle"
-    type: DT_STRING
-    is_ref: true
-  }
+  name: "EluGrad"
   input_arg {
-    name: "keys"
-    type_attr: "Tin"
+    name: "gradients"
+    type_attr: "T"
   }
   input_arg {
-    name: "values"
-    type_attr: "Tout"
+    name: "outputs"
+    type_attr: "T"
   }
-  attr {
-    name: "Tin"
-    type: "type"
+  output_arg {
+    name: "backprops"
+    type_attr: "T"
   }
   attr {
-    name: "Tout"
+    name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
 }
 op {
-  name: "LookupTableInsertV2"
+  name: "EmptyTensorList"
   input_arg {
-    name: "table_handle"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "keys"
-    type_attr: "Tin"
+    name: "element_shape"
+    type_attr: "shape_type"
   }
-  input_arg {
-    name: "values"
-    type_attr: "Tout"
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "Tin"
+    name: "element_dtype"
     type: "type"
   }
   attr {
-    name: "Tout"
+    name: "shape_type"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
-  is_stateful: true
-}
-op {
-  name: "LookupTableSize"
-  input_arg {
-    name: "table_handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  output_arg {
-    name: "size"
-    type: DT_INT64
-  }
-}
-op {
-  name: "LookupTableSizeV2"
-  input_arg {
-    name: "table_handle"
-    type: DT_RESOURCE
-  }
-  output_arg {
-    name: "size"
-    type: DT_INT64
-  }
-  is_stateful: true
 }
 op {
-  name: "LoopCond"
+  name: "EncodeBase64"
   input_arg {
     name: "input"
-    type: DT_BOOL
+    type: DT_STRING
   }
   output_arg {
     name: "output"
-    type: DT_BOOL
-  }
-}
-op {
-  name: "MakeIterator"
-  input_arg {
-    name: "dataset"
-    type: DT_VARIANT
+    type: DT_STRING
   }
-  input_arg {
-    name: "iterator"
-    type: DT_RESOURCE
+  attr {
+    name: "pad"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
-  is_stateful: true
 }
 op {
-  name: "MapAndBatchDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "other_arguments"
-    type_list_attr: "Targuments"
-  }
-  input_arg {
-    name: "batch_size"
-    type: DT_INT64
-  }
+  name: "EncodeJpeg"
   input_arg {
-    name: "num_parallel_batches"
-    type: DT_INT64
+    name: "image"
+    type: DT_UINT8
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    name: "contents"
+    type: DT_STRING
   }
   attr {
-    name: "f"
-    type: "func"
+    name: "format"
+    type: "string"
+    default_value {
+      s: ""
+    }
+    allowed_values {
+      list {
+        s: ""
+        s: "grayscale"
+        s: "rgb"
+      }
+    }
   }
   attr {
-    name: "Targuments"
-    type: "list(type)"
-    has_minimum: true
+    name: "quality"
+    type: "int"
+    default_value {
+      i: 95
+    }
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "progressive"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "optimize_size"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
-}
-op {
-  name: "MapClear"
   attr {
-    name: "capacity"
-    type: "int"
+    name: "chroma_downsampling"
+    type: "bool"
     default_value {
-      i: 0
+      b: true
     }
-    has_minimum: true
   }
   attr {
-    name: "memory_limit"
-    type: "int"
+    name: "density_unit"
+    type: "string"
     default_value {
-      i: 0
+      s: "in"
+    }
+    allowed_values {
+      list {
+        s: "in"
+        s: "cm"
+      }
     }
-    has_minimum: true
   }
   attr {
-    name: "dtypes"
-    type: "list(type)"
+    name: "x_density"
+    type: "int"
+    default_value {
+      i: 300
+    }
   }
   attr {
-    name: "container"
-    type: "string"
+    name: "y_density"
+    type: "int"
     default_value {
-      s: ""
+      i: 300
     }
   }
   attr {
-    name: "shared_name"
+    name: "xmp_metadata"
     type: "string"
     default_value {
       s: ""
     }
   }
-  is_stateful: true
 }
 op {
-  name: "MapDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
+  name: "EncodePng"
   input_arg {
-    name: "other_arguments"
-    type_list_attr: "Targuments"
+    name: "image"
+    type_attr: "T"
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "f"
-    type: "func"
-  }
-  attr {
-    name: "Targuments"
-    type: "list(type)"
-    has_minimum: true
+    name: "contents"
+    type: DT_STRING
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "compression"
+    type: "int"
+    default_value {
+      i: -1
+    }
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_UINT8
+    }
+    allowed_values {
+      list {
+        type: DT_UINT8
+        type: DT_UINT16
+      }
+    }
   }
-  is_stateful: true
 }
 op {
-  name: "MapDataset"
+  name: "EncodeWav"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "audio"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "other_arguments"
-    type_list_attr: "Targuments"
+    name: "sample_rate"
+    type: DT_INT32
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "f"
-    type: "func"
-  }
-  attr {
-    name: "Targuments"
-    type: "list(type)"
-    has_minimum: true
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "contents"
+    type: DT_STRING
   }
 }
 op {
-  name: "MapIncompleteSize"
-  output_arg {
-    name: "size"
-    type: DT_INT32
+  name: "Enter"
+  input_arg {
+    name: "data"
+    type_attr: "T"
   }
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
+  output_arg {
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "memory_limit"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "dtypes"
-    type: "list(type)"
+    name: "frame_name"
+    type: "string"
   }
   attr {
-    name: "container"
-    type: "string"
+    name: "is_constant"
+    type: "bool"
     default_value {
-      s: ""
+      b: false
     }
   }
   attr {
-    name: "shared_name"
-    type: "string"
+    name: "parallel_iterations"
+    type: "int"
     default_value {
-      s: ""
+      i: 10
     }
   }
-  is_stateful: true
 }
 op {
-  name: "MapPeek"
+  name: "Equal"
   input_arg {
-    name: "key"
-    type: DT_INT64
+    name: "x"
+    type_attr: "T"
   }
   input_arg {
-    name: "indices"
-    type: DT_INT32
+    name: "y"
+    type_attr: "T"
   }
   output_arg {
-    name: "values"
-    type_list_attr: "dtypes"
-  }
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
-  }
-  attr {
-    name: "memory_limit"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
+    name: "z"
+    type: DT_BOOL
   }
   attr {
-    name: "dtypes"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_QUINT8
+        type: DT_QINT8
+        type: DT_QINT32
+        type: DT_STRING
+        type: DT_BOOL
+        type: DT_COMPLEX128
+      }
     }
   }
-  is_stateful: true
+  is_commutative: true
 }
 op {
-  name: "MapSize"
-  output_arg {
-    name: "size"
-    type: DT_INT32
-  }
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
-  }
-  attr {
-    name: "memory_limit"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
+  name: "Equal"
+  input_arg {
+    name: "x"
+    type_attr: "T"
   }
-  attr {
-    name: "dtypes"
-    type: "list(type)"
+  input_arg {
+    name: "y"
+    type_attr: "T"
   }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
   }
   attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_QUINT8
+        type: DT_QINT8
+        type: DT_QINT32
+        type: DT_STRING
+        type: DT_BOOL
+        type: DT_COMPLEX128
+      }
     }
   }
-  is_stateful: true
+  is_commutative: true
 }
 op {
-  name: "MapStage"
-  input_arg {
-    name: "key"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "indices"
-    type: DT_INT32
-  }
+  name: "Erf"
   input_arg {
-    name: "values"
-    type_list_attr: "fake_dtypes"
+    name: "x"
+    type_attr: "T"
   }
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
+  output_arg {
+    name: "y"
+    type_attr: "T"
   }
   attr {
-    name: "memory_limit"
-    type: "int"
-    default_value {
-      i: 0
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
     }
-    has_minimum: true
-  }
-  attr {
-    name: "dtypes"
-    type: "list(type)"
   }
-  attr {
-    name: "fake_dtypes"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+}
+op {
+  name: "Erf"
+  input_arg {
+    name: "x"
+    type_attr: "T"
   }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  output_arg {
+    name: "y"
+    type_attr: "T"
   }
   attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "MapUnstage"
-  input_arg {
-    name: "key"
-    type: DT_INT64
-  }
+  name: "Erfc"
   input_arg {
-    name: "indices"
-    type: DT_INT32
+    name: "x"
+    type_attr: "T"
   }
   output_arg {
-    name: "values"
-    type_list_attr: "dtypes"
-  }
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
+    name: "y"
+    type_attr: "T"
   }
   attr {
-    name: "memory_limit"
-    type: "int"
-    default_value {
-      i: 0
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
     }
-    has_minimum: true
   }
-  attr {
-    name: "dtypes"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+}
+op {
+  name: "Erfc"
+  input_arg {
+    name: "x"
+    type_attr: "T"
   }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  output_arg {
+    name: "y"
+    type_attr: "T"
   }
   attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "MapUnstageNoKey"
+  name: "ExecuteInCriticalSection"
   input_arg {
-    name: "indices"
-    type: DT_INT32
+    name: "critical_section"
+    type: DT_RESOURCE
   }
-  output_arg {
-    name: "key"
-    type: DT_INT64
+  input_arg {
+    name: "arguments"
+    type_list_attr: "Targuments"
   }
   output_arg {
-    name: "values"
-    type_list_attr: "dtypes"
+    name: "outputs"
+    type_list_attr: "output_types"
   }
   attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
+    name: "f"
+    type: "func"
   }
   attr {
-    name: "memory_limit"
-    type: "int"
-    default_value {
-      i: 0
-    }
+    name: "Targuments"
+    type: "list(type)"
     has_minimum: true
   }
   attr {
-    name: "dtypes"
+    name: "output_types"
     type: "list(type)"
     has_minimum: true
     minimum: 1
   }
   attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
   is_stateful: true
 }
 op {
-  name: "MatMul"
+  name: "ExecuteInCriticalSection"
   input_arg {
-    name: "a"
-    type_attr: "T"
+    name: "critical_section"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "b"
-    type_attr: "T"
+    name: "arguments"
+    type_list_attr: "Targuments"
   }
   output_arg {
-    name: "product"
-    type_attr: "T"
+    name: "outputs"
+    type_list_attr: "output_types"
   }
   attr {
-    name: "transpose_a"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "f"
+    type: "func"
   }
   attr {
-    name: "transpose_b"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-      }
-    }
-  }
-}
-op {
-  name: "MatchingFiles"
-  input_arg {
-    name: "pattern"
-    type: DT_STRING
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
   }
-  output_arg {
-    name: "filenames"
-    type: DT_STRING
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
   }
+  is_stateful: true
 }
 op {
-  name: "MatrixBandPart"
+  name: "Exit"
   input_arg {
-    name: "input"
+    name: "data"
     type_attr: "T"
   }
-  input_arg {
-    name: "num_lower"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "num_upper"
-    type: DT_INT64
-  }
   output_arg {
-    name: "band"
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -16844,13 +17356,13 @@ op {
   }
 }
 op {
-  name: "MatrixDeterminant"
+  name: "Exp"
   input_arg {
-    name: "input"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "y"
     type_attr: "T"
   }
   attr {
@@ -16858,20 +17370,23 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "MatrixDeterminant"
+  name: "Exp"
   input_arg {
-    name: "input"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "y"
     type_attr: "T"
   }
   attr {
@@ -16879,6 +17394,8 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -16888,11 +17405,15 @@ op {
   }
 }
 op {
-  name: "MatrixDiag"
+  name: "ExpandDims"
   input_arg {
-    name: "diagonal"
+    name: "input"
     type_attr: "T"
   }
+  input_arg {
+    name: "dim"
+    type_attr: "Tdim"
+  }
   output_arg {
     name: "output"
     type_attr: "T"
@@ -16901,30 +17422,28 @@ op {
     name: "T"
     type: "type"
   }
-}
-op {
-  name: "MatrixDiagPart"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "diagonal"
-    type_attr: "T"
-  }
   attr {
-    name: "T"
+    name: "Tdim"
     type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
 }
 op {
-  name: "MatrixExponential"
+  name: "Expm1"
   input_arg {
-    name: "input"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "y"
     type_attr: "T"
   }
   attr {
@@ -16932,8 +17451,9 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
+        type: DT_HALF
         type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_COMPLEX64
         type: DT_COMPLEX128
       }
@@ -16941,290 +17461,153 @@ op {
   }
 }
 op {
-  name: "MatrixInverse"
+  name: "Expm1"
   input_arg {
-    name: "input"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "y"
     type_attr: "T"
   }
-  attr {
-    name: "adjoint"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "MatrixInverse"
+  name: "ExtractGlimpse"
   input_arg {
     name: "input"
-    type_attr: "T"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "offsets"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "glimpse"
+    type: DT_FLOAT
   }
   attr {
-    name: "adjoint"
+    name: "centered"
     type: "bool"
     default_value {
-      b: false
+      b: true
     }
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_DOUBLE
-        type: DT_FLOAT
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-      }
+    name: "normalized"
+    type: "bool"
+    default_value {
+      b: true
     }
   }
-}
-op {
-  name: "MatrixSetDiag"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "diagonal"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
   attr {
-    name: "T"
-    type: "type"
+    name: "uniform_noise"
+    type: "bool"
+    default_value {
+      b: true
+    }
   }
 }
 op {
-  name: "MatrixSolve"
-  input_arg {
-    name: "matrix"
-    type_attr: "T"
-  }
+  name: "ExtractImagePatches"
   input_arg {
-    name: "rhs"
+    name: "images"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "patches"
     type_attr: "T"
   }
   attr {
-    name: "adjoint"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "ksizes"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_DOUBLE
-        type: DT_FLOAT
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-      }
-    }
-  }
-}
-op {
-  name: "MatrixSolveLs"
-  input_arg {
-    name: "matrix"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "rhs"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l2_regularizer"
-    type: DT_DOUBLE
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_DOUBLE
-        type: DT_FLOAT
-      }
-    }
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
-    name: "fast"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
-}
-op {
-  name: "MatrixSolveLs"
-  input_arg {
-    name: "matrix"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "rhs"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l2_regularizer"
-    type: DT_DOUBLE
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "rates"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
         type: DT_FLOAT
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "fast"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
-}
-op {
-  name: "MatrixTriangularSolve"
-  input_arg {
-    name: "matrix"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "rhs"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "lower"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
-  attr {
-    name: "adjoint"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "T"
-    type: "type"
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_DOUBLE
-        type: DT_FLOAT
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
 }
 op {
-  name: "MatrixTriangularSolve"
-  input_arg {
-    name: "matrix"
-    type_attr: "T"
-  }
+  name: "ExtractImagePatches"
   input_arg {
-    name: "rhs"
+    name: "images"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "patches"
     type_attr: "T"
   }
   attr {
-    name: "lower"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
-  attr {
-    name: "adjoint"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "ksizes"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_DOUBLE
-        type: DT_FLOAT
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-      }
-    }
-  }
-}
-op {
-  name: "Max"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "reduction_indices"
-    type_attr: "Tidx"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
-    name: "keep_dims"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "rates"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
     name: "T"
@@ -17233,55 +17616,56 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_UINT16
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
 }
 op {
-  name: "Max"
+  name: "ExtractImagePatches"
   input_arg {
-    name: "input"
+    name: "images"
     type_attr: "T"
   }
-  input_arg {
-    name: "reduction_indices"
-    type_attr: "Tidx"
-  }
   output_arg {
-    name: "output"
+    name: "patches"
     type_attr: "T"
   }
   attr {
-    name: "keep_dims"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "ksizes"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "rates"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
     name: "T"
@@ -17290,62 +17674,42 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
 }
 op {
-  name: "MaxPool"
+  name: "ExtractImagePatches"
   input_arg {
-    name: "input"
+    name: "images"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "patches"
     type_attr: "T"
   }
   attr {
-    name: "T"
-    type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_HALF
-      }
-    }
-  }
-  attr {
-    name: "ksize"
+    name: "ksizes"
     type: "list(int)"
     has_minimum: true
     minimum: 4
@@ -17357,71 +17721,31 @@ op {
     minimum: 4
   }
   attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
-  }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
-    }
-  }
-}
-op {
-  name: "MaxPool"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "rates"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
     allowed_values {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
   attr {
     name: "padding"
     type: "string"
@@ -17432,2140 +17756,1449 @@ op {
       }
     }
   }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
-    }
-  }
 }
 op {
-  name: "MaxPool"
+  name: "ExtractJpegShape"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "contents"
+    type: DT_STRING
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "image_shape"
+    type_attr: "output_type"
   }
   attr {
-    name: "T"
+    name: "output_type"
     type: "type"
     default_value {
-      type: DT_FLOAT
+      type: DT_INT32
     }
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_QINT8
       }
     }
   }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+}
+op {
+  name: "FFT"
+  input_arg {
+    name: "input"
+    type: DT_COMPLEX64
   }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+  output_arg {
+    name: "output"
+    type: DT_COMPLEX64
   }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+}
+op {
+  name: "FFT2D"
+  input_arg {
+    name: "input"
+    type: DT_COMPLEX64
   }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-        s: "NCHW_VECT_C"
-      }
-    }
+  output_arg {
+    name: "output"
+    type: DT_COMPLEX64
   }
 }
 op {
-  name: "MaxPool3D"
+  name: "FFT3D"
   input_arg {
     name: "input"
-    type_attr: "T"
+    type: DT_COMPLEX64
   }
   output_arg {
     name: "output"
-    type_attr: "T"
+    type: DT_COMPLEX64
+  }
+}
+op {
+  name: "FIFOQueue"
+  output_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
   }
   attr {
-    name: "ksize"
-    type: "list(int)"
+    name: "component_types"
+    type: "list(type)"
     has_minimum: true
-    minimum: 5
+    minimum: 1
   }
   attr {
-    name: "strides"
-    type: "list(int)"
+    name: "shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
     has_minimum: true
-    minimum: 5
   }
   attr {
-    name: "padding"
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  attr {
+    name: "container"
     type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
+    default_value {
+      s: ""
     }
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-      }
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
     }
   }
+  is_stateful: true
 }
 op {
-  name: "MaxPool3D"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
+  name: "FIFOQueueV2"
   output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
+    name: "handle"
+    type: DT_RESOURCE
   }
   attr {
-    name: "strides"
-    type: "list(int)"
+    name: "component_types"
+    type: "list(type)"
     has_minimum: true
-    minimum: 5
+    minimum: 1
   }
   attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
+    name: "shapes"
+    type: "list(shape)"
+    default_value {
       list {
-        s: "SAME"
-        s: "VALID"
       }
     }
+    has_minimum: true
   }
   attr {
-    name: "data_format"
-    type: "string"
+    name: "capacity"
+    type: "int"
     default_value {
-      s: "NDHWC"
+      i: -1
     }
-    allowed_values {
-      list {
-        s: "NDHWC"
-        s: "NCDHW"
-      }
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
     }
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-      }
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
     }
   }
+  is_stateful: true
 }
 op {
-  name: "MaxPool3DGrad"
-  input_arg {
-    name: "orig_input"
-    type: DT_FLOAT
+  name: "Fact"
+  output_arg {
+    name: "fact"
+    type: DT_STRING
   }
+}
+op {
+  name: "FakeQuantWithMinMaxArgs"
   input_arg {
-    name: "orig_output"
+    name: "inputs"
     type: DT_FLOAT
   }
-  input_arg {
-    name: "grad"
-    type_attr: "T"
-  }
   output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
+    name: "outputs"
+    type: DT_FLOAT
   }
   attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
+    name: "min"
+    type: "float"
+    default_value {
+      f: -6
     }
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-      }
+    name: "max"
+    type: "float"
+    default_value {
+      f: 6
     }
   }
 }
 op {
-  name: "MaxPool3DGrad"
-  input_arg {
-    name: "orig_input"
-    type: DT_FLOAT
-  }
+  name: "FakeQuantWithMinMaxArgs"
   input_arg {
-    name: "orig_output"
+    name: "inputs"
     type: DT_FLOAT
   }
-  input_arg {
-    name: "grad"
-    type_attr: "T"
-  }
   output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
+    name: "outputs"
+    type: DT_FLOAT
   }
   attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
+    name: "min"
+    type: "float"
+    default_value {
+      f: -6
     }
   }
   attr {
-    name: "data_format"
-    type: "string"
+    name: "max"
+    type: "float"
     default_value {
-      s: "NDHWC"
-    }
-    allowed_values {
-      list {
-        s: "NDHWC"
-        s: "NCDHW"
-      }
+      f: 6
     }
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-      }
+    name: "num_bits"
+    type: "int"
+    default_value {
+      i: 8
     }
   }
 }
 op {
-  name: "MaxPool3DGrad"
-  input_arg {
-    name: "orig_input"
-    type_attr: "TInput"
-  }
-  input_arg {
-    name: "orig_output"
-    type_attr: "TInput"
-  }
+  name: "FakeQuantWithMinMaxArgs"
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
+    name: "outputs"
+    type: DT_FLOAT
   }
   attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
+    name: "min"
+    type: "float"
+    default_value {
+      f: -6
     }
   }
   attr {
-    name: "data_format"
-    type: "string"
+    name: "max"
+    type: "float"
     default_value {
-      s: "NDHWC"
-    }
-    allowed_values {
-      list {
-        s: "NDHWC"
-        s: "NCDHW"
-      }
+      f: 6
     }
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "num_bits"
+    type: "int"
     default_value {
-      type: DT_FLOAT
-    }
-    allowed_values {
-      list {
-        type: DT_FLOAT
-      }
+      i: 8
     }
   }
   attr {
-    name: "TInput"
-    type: "type"
+    name: "narrow_range"
+    type: "bool"
     default_value {
-      type: DT_FLOAT
-    }
-    allowed_values {
-      list {
-        type: DT_FLOAT
-      }
+      b: false
     }
   }
 }
 op {
-  name: "MaxPool3DGradGrad"
-  input_arg {
-    name: "orig_input"
-    type_attr: "T"
-  }
+  name: "FakeQuantWithMinMaxArgsGradient"
   input_arg {
-    name: "orig_output"
-    type_attr: "T"
+    name: "gradients"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 5
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+    name: "backprops"
+    type: DT_FLOAT
   }
   attr {
-    name: "data_format"
-    type: "string"
+    name: "min"
+    type: "float"
     default_value {
-      s: "NDHWC"
-    }
-    allowed_values {
-      list {
-        s: "NDHWC"
-        s: "NCDHW"
-      }
+      f: -6
     }
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-      }
+    name: "max"
+    type: "float"
+    default_value {
+      f: 6
     }
   }
 }
 op {
-  name: "MaxPoolGrad"
-  input_arg {
-    name: "orig_input"
-    type_attr: "T"
-  }
+  name: "FakeQuantWithMinMaxArgsGradient"
   input_arg {
-    name: "orig_output"
-    type_attr: "T"
+    name: "gradients"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "backprops"
+    type: DT_FLOAT
   }
   attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
+    name: "min"
+    type: "float"
+    default_value {
+      f: -6
     }
   }
   attr {
-    name: "data_format"
-    type: "string"
+    name: "max"
+    type: "float"
     default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
+      f: 6
     }
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "num_bits"
+    type: "int"
     default_value {
-      type: DT_FLOAT
-    }
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_HALF
-      }
+      i: 8
     }
   }
 }
 op {
-  name: "MaxPoolGrad"
-  input_arg {
-    name: "orig_input"
-    type_attr: "T"
-  }
+  name: "FakeQuantWithMinMaxArgsGradient"
   input_arg {
-    name: "orig_output"
-    type_attr: "T"
+    name: "gradients"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "backprops"
+    type: DT_FLOAT
   }
   attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "min"
+    type: "float"
+    default_value {
+      f: -6
+    }
   }
   attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
+    name: "max"
+    type: "float"
+    default_value {
+      f: 6
     }
   }
   attr {
-    name: "data_format"
-    type: "string"
+    name: "num_bits"
+    type: "int"
     default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
+      i: 8
     }
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "narrow_range"
+    type: "bool"
     default_value {
-      type: DT_FLOAT
-    }
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-      }
+      b: false
     }
   }
 }
 op {
-  name: "MaxPoolGrad"
+  name: "FakeQuantWithMinMaxVars"
   input_arg {
-    name: "orig_input"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "orig_output"
-    type_attr: "T"
+    name: "min"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "max"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "outputs"
+    type: DT_FLOAT
   }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+}
+op {
+  name: "FakeQuantWithMinMaxVars"
+  input_arg {
+    name: "inputs"
+    type: DT_FLOAT
   }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+  input_arg {
+    name: "min"
+    type: DT_FLOAT
   }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+  input_arg {
+    name: "max"
+    type: DT_FLOAT
   }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
-    }
+  output_arg {
+    name: "outputs"
+    type: DT_FLOAT
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "num_bits"
+    type: "int"
     default_value {
-      type: DT_FLOAT
-    }
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
+      i: 8
     }
   }
 }
 op {
-  name: "MaxPoolGradGrad"
+  name: "FakeQuantWithMinMaxVars"
   input_arg {
-    name: "orig_input"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "orig_output"
-    type_attr: "T"
+    name: "min"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "max"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+    name: "outputs"
+    type: DT_FLOAT
   }
   attr {
-    name: "data_format"
-    type: "string"
+    name: "num_bits"
+    type: "int"
     default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
+      i: 8
     }
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-      }
+    name: "narrow_range"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
 }
 op {
-  name: "MaxPoolGradGrad"
+  name: "FakeQuantWithMinMaxVarsGradient"
   input_arg {
-    name: "orig_input"
-    type_attr: "T"
+    name: "gradients"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "orig_output"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "min"
+    type: DT_FLOAT
   }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+  input_arg {
+    name: "max"
+    type: DT_FLOAT
   }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+  output_arg {
+    name: "backprops_wrt_input"
+    type: DT_FLOAT
   }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
-    }
+  output_arg {
+    name: "backprop_wrt_min"
+    type: DT_FLOAT
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
+  output_arg {
+    name: "backprop_wrt_max"
+    type: DT_FLOAT
   }
 }
 op {
-  name: "MaxPoolGradGradV2"
+  name: "FakeQuantWithMinMaxVarsGradient"
   input_arg {
-    name: "orig_input"
-    type_attr: "T"
+    name: "gradients"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "orig_output"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "min"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "ksize"
-    type: DT_INT32
+    name: "max"
+    type: DT_FLOAT
   }
-  input_arg {
-    name: "strides"
-    type: DT_INT32
+  output_arg {
+    name: "backprops_wrt_input"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "backprop_wrt_min"
+    type: DT_FLOAT
   }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+  output_arg {
+    name: "backprop_wrt_max"
+    type: DT_FLOAT
   }
   attr {
-    name: "data_format"
-    type: "string"
+    name: "num_bits"
+    type: "int"
     default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
-    }
-  }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-      }
+      i: 8
     }
   }
 }
 op {
-  name: "MaxPoolGradGradV2"
+  name: "FakeQuantWithMinMaxVarsGradient"
   input_arg {
-    name: "orig_input"
-    type_attr: "T"
+    name: "gradients"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "orig_output"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "min"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "ksize"
-    type: DT_INT32
+    name: "max"
+    type: DT_FLOAT
   }
-  input_arg {
-    name: "strides"
-    type: DT_INT32
+  output_arg {
+    name: "backprops_wrt_input"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "backprop_wrt_min"
+    type: DT_FLOAT
   }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+  output_arg {
+    name: "backprop_wrt_max"
+    type: DT_FLOAT
   }
   attr {
-    name: "data_format"
-    type: "string"
+    name: "num_bits"
+    type: "int"
     default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
+      i: 8
     }
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
+    name: "narrow_range"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
 }
 op {
-  name: "MaxPoolGradGradWithArgmax"
+  name: "FakeQuantWithMinMaxVarsPerChannel"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "min"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "argmax"
-    type_attr: "Targmax"
+    name: "max"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
-  }
-  attr {
-    name: "Targmax"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-      }
-    }
+    name: "outputs"
+    type: DT_FLOAT
   }
 }
 op {
-  name: "MaxPoolGradGradWithArgmax"
+  name: "FakeQuantWithMinMaxVarsPerChannel"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "min"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "argmax"
-    type_attr: "Targmax"
+    name: "max"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
-  }
-  attr {
-    name: "Targmax"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+    name: "outputs"
+    type: DT_FLOAT
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
+    name: "num_bits"
+    type: "int"
+    default_value {
+      i: 8
     }
   }
 }
 op {
-  name: "MaxPoolGradV2"
-  input_arg {
-    name: "orig_input"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "orig_output"
-    type_attr: "T"
-  }
+  name: "FakeQuantWithMinMaxVarsPerChannel"
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "ksize"
-    type: DT_INT32
+    name: "min"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "strides"
-    type: DT_INT32
+    name: "max"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+    name: "outputs"
+    type: DT_FLOAT
   }
   attr {
-    name: "data_format"
-    type: "string"
+    name: "num_bits"
+    type: "int"
     default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
+      i: 8
     }
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "narrow_range"
+    type: "bool"
     default_value {
-      type: DT_FLOAT
-    }
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-      }
+      b: false
     }
   }
 }
 op {
-  name: "MaxPoolGradV2"
-  input_arg {
-    name: "orig_input"
-    type_attr: "T"
-  }
+  name: "FakeQuantWithMinMaxVarsPerChannelGradient"
   input_arg {
-    name: "orig_output"
-    type_attr: "T"
+    name: "gradients"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "ksize"
-    type: DT_INT32
+    name: "min"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "strides"
-    type: DT_INT32
+    name: "max"
+    type: DT_FLOAT
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "backprops_wrt_input"
+    type: DT_FLOAT
   }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+  output_arg {
+    name: "backprop_wrt_min"
+    type: DT_FLOAT
   }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
-    }
+  output_arg {
+    name: "backprop_wrt_max"
+    type: DT_FLOAT
+  }
+}
+op {
+  name: "FakeQuantWithMinMaxVarsPerChannelGradient"
+  input_arg {
+    name: "gradients"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "inputs"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "backprops_wrt_input"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "backprop_wrt_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "backprop_wrt_max"
+    type: DT_FLOAT
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "num_bits"
+    type: "int"
     default_value {
-      type: DT_FLOAT
-    }
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
+      i: 8
     }
   }
 }
 op {
-  name: "MaxPoolGradWithArgmax"
+  name: "FakeQuantWithMinMaxVarsPerChannelGradient"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "gradients"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "inputs"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "argmax"
-    type_attr: "Targmax"
+    name: "min"
+    type: DT_FLOAT
   }
-  output_arg {
-    name: "output"
-    type_attr: "T"
+  input_arg {
+    name: "max"
+    type: DT_FLOAT
   }
-  attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+  output_arg {
+    name: "backprops_wrt_input"
+    type: DT_FLOAT
   }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+  output_arg {
+    name: "backprop_wrt_min"
+    type: DT_FLOAT
   }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+  output_arg {
+    name: "backprop_wrt_max"
+    type: DT_FLOAT
   }
   attr {
-    name: "Targmax"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
+    name: "num_bits"
+    type: "int"
+    default_value {
+      i: 8
     }
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "narrow_range"
+    type: "bool"
     default_value {
-      type: DT_FLOAT
-    }
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_HALF
-      }
+      b: false
     }
   }
 }
 op {
-  name: "MaxPoolGradWithArgmax"
+  name: "FakeQueue"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "resource"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
   }
+  is_stateful: true
+}
+op {
+  name: "Fill"
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "dims"
+    type: DT_INT32
   }
   input_arg {
-    name: "argmax"
-    type_attr: "Targmax"
+    name: "value"
+    type_attr: "T"
   }
   output_arg {
     name: "output"
     type_attr: "T"
   }
   attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "T"
+    type: "type"
   }
-  attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+}
+op {
+  name: "Fill"
+  input_arg {
+    name: "dims"
+    type_attr: "index_type"
   }
-  attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+  input_arg {
+    name: "value"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "Targmax"
+    name: "T"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
   }
   attr {
-    name: "T"
+    name: "index_type"
     type: "type"
+    default_value {
+      type: DT_INT32
+    }
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
       }
     }
   }
 }
 op {
-  name: "MaxPoolGradWithArgmax"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
+  name: "FilterDataset"
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "input_dataset"
+    type: DT_VARIANT
   }
   input_arg {
-    name: "argmax"
-    type_attr: "Targmax"
+    name: "other_arguments"
+    type_list_attr: "Targuments"
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "predicate"
+    type: "func"
   }
   attr {
-    name: "strides"
-    type: "list(int)"
+    name: "Targuments"
+    type: "list(type)"
     has_minimum: true
-    minimum: 4
   }
   attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
-  }
-  attr {
-    name: "Targmax"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
+  is_stateful: true
 }
 op {
-  name: "MaxPoolV2"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
+  name: "FilterDataset"
   input_arg {
-    name: "ksize"
-    type: DT_INT32
+    name: "input_dataset"
+    type: DT_VARIANT
   }
   input_arg {
-    name: "strides"
-    type: DT_INT32
+    name: "other_arguments"
+    type_list_attr: "Targuments"
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "T"
-    type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-      }
-    }
+    name: "predicate"
+    type: "func"
   }
   attr {
-    name: "padding"
-    type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
-    }
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
   }
   attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-      }
-    }
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
 }
 op {
-  name: "MaxPoolV2"
+  name: "FixedLengthRecordDataset"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "filenames"
+    type: DT_STRING
   }
   input_arg {
-    name: "ksize"
-    type: DT_INT32
+    name: "header_bytes"
+    type: DT_INT64
   }
   input_arg {
-    name: "strides"
-    type: DT_INT32
+    name: "record_bytes"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "footer_bytes"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "buffer_size"
+    type: DT_INT64
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "handle"
+    type: DT_VARIANT
+  }
+  is_stateful: true
+}
+op {
+  name: "FixedLengthRecordReader"
+  output_arg {
+    name: "reader_handle"
+    type: DT_STRING
+    is_ref: true
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "header_bytes"
+    type: "int"
     default_value {
-      type: DT_FLOAT
+      i: 0
     }
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_QINT8
-      }
+  }
+  attr {
+    name: "record_bytes"
+    type: "int"
+  }
+  attr {
+    name: "footer_bytes"
+    type: "int"
+    default_value {
+      i: 0
     }
   }
   attr {
-    name: "padding"
+    name: "container"
     type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
+    default_value {
+      s: ""
     }
   }
   attr {
-    name: "data_format"
+    name: "shared_name"
     type: "string"
     default_value {
-      s: "NHWC"
-    }
-    allowed_values {
-      list {
-        s: "NHWC"
-        s: "NCHW"
-        s: "NCHW_VECT_C"
-      }
+      s: ""
     }
   }
+  is_stateful: true
 }
 op {
-  name: "MaxPoolWithArgmax"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
+  name: "FixedLengthRecordReader"
   output_arg {
-    name: "argmax"
-    type_attr: "Targmax"
+    name: "reader_handle"
+    type: DT_STRING
+    is_ref: true
   }
   attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "header_bytes"
+    type: "int"
+    default_value {
+      i: 0
+    }
   }
   attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "record_bytes"
+    type: "int"
   }
   attr {
-    name: "Targmax"
-    type: "type"
+    name: "footer_bytes"
+    type: "int"
     default_value {
-      type: DT_INT64
+      i: 0
     }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
+  }
+  attr {
+    name: "hop_bytes"
+    type: "int"
+    default_value {
+      i: 0
     }
   }
   attr {
-    name: "padding"
+    name: "container"
     type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
+    default_value {
+      s: ""
     }
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "shared_name"
+    type: "string"
     default_value {
-      type: DT_FLOAT
-    }
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_HALF
-      }
+      s: ""
     }
   }
+  is_stateful: true
 }
 op {
-  name: "MaxPoolWithArgmax"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
+  name: "FixedLengthRecordReaderV2"
   output_arg {
-    name: "argmax"
-    type_attr: "Targmax"
+    name: "reader_handle"
+    type: DT_RESOURCE
   }
   attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "header_bytes"
+    type: "int"
+    default_value {
+      i: 0
+    }
   }
   attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "record_bytes"
+    type: "int"
   }
   attr {
-    name: "Targmax"
-    type: "type"
+    name: "footer_bytes"
+    type: "int"
     default_value {
-      type: DT_INT64
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
+      i: 0
     }
   }
   attr {
-    name: "padding"
+    name: "container"
     type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
+    default_value {
+      s: ""
     }
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-      }
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
     }
   }
+  is_stateful: true
 }
 op {
-  name: "MaxPoolWithArgmax"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
+  name: "FixedLengthRecordReaderV2"
   output_arg {
-    name: "argmax"
-    type_attr: "Targmax"
+    name: "reader_handle"
+    type: DT_RESOURCE
   }
   attr {
-    name: "ksize"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "header_bytes"
+    type: "int"
+    default_value {
+      i: 0
+    }
   }
   attr {
-    name: "strides"
-    type: "list(int)"
-    has_minimum: true
-    minimum: 4
+    name: "record_bytes"
+    type: "int"
   }
   attr {
-    name: "Targmax"
-    type: "type"
+    name: "footer_bytes"
+    type: "int"
     default_value {
-      type: DT_INT64
+      i: 0
     }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
+  }
+  attr {
+    name: "hop_bytes"
+    type: "int"
+    default_value {
+      i: 0
     }
   }
   attr {
-    name: "padding"
+    name: "container"
     type: "string"
-    allowed_values {
-      list {
-        s: "SAME"
-        s: "VALID"
-      }
+    default_value {
+      s: ""
     }
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
     }
   }
+  is_stateful: true
 }
 op {
-  name: "Maximum"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "y"
-    type_attr: "T"
-  }
+  name: "FixedLengthRecordReaderV2"
   output_arg {
-    name: "z"
-    type_attr: "T"
+    name: "reader_handle"
+    type: DT_RESOURCE
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-      }
+    name: "header_bytes"
+    type: "int"
+    default_value {
+      i: 0
     }
   }
-  is_commutative: true
-}
-op {
-  name: "Mean"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "reduction_indices"
-    type_attr: "Tidx"
+  attr {
+    name: "record_bytes"
+    type: "int"
   }
-  output_arg {
-    name: "output"
-    type_attr: "T"
+  attr {
+    name: "footer_bytes"
+    type: "int"
+    default_value {
+      i: 0
+    }
   }
   attr {
-    name: "keep_dims"
-    type: "bool"
+    name: "hop_bytes"
+    type: "int"
     default_value {
-      b: false
+      i: 0
     }
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-      }
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
     }
   }
   attr {
-    name: "Tidx"
-    type: "type"
+    name: "shared_name"
+    type: "string"
     default_value {
-      type: DT_INT32
+      s: ""
     }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
+  }
+  attr {
+    name: "encoding"
+    type: "string"
+    default_value {
+      s: ""
     }
   }
+  is_stateful: true
 }
 op {
-  name: "Mean"
+  name: "FixedUnigramCandidateSampler"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "true_classes"
+    type: DT_INT64
   }
-  input_arg {
-    name: "reduction_indices"
-    type_attr: "Tidx"
+  output_arg {
+    name: "sampled_candidates"
+    type: DT_INT64
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "true_expected_count"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "sampled_expected_count"
+    type: DT_FLOAT
   }
   attr {
-    name: "keep_dims"
+    name: "num_true"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "num_sampled"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "unique"
     type: "bool"
+  }
+  attr {
+    name: "range_max"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "vocab_file"
+    type: "string"
     default_value {
-      b: false
+      s: ""
     }
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
+    name: "distortion"
+    type: "float"
+    default_value {
+      f: 1
     }
   }
   attr {
-    name: "Tidx"
-    type: "type"
+    name: "num_reserved_ids"
+    type: "int"
     default_value {
-      type: DT_INT32
+      i: 0
     }
-    allowed_values {
+  }
+  attr {
+    name: "num_shards"
+    type: "int"
+    default_value {
+      i: 1
+    }
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "shard"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "unigrams"
+    type: "list(float)"
+    default_value {
       list {
-        type: DT_INT32
-        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
 }
 op {
-  name: "Merge"
+  name: "FixedUnigramCandidateSampler"
   input_arg {
-    name: "inputs"
-    type_attr: "T"
-    number_attr: "N"
+    name: "true_classes"
+    type: DT_INT64
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "sampled_candidates"
+    type: DT_INT64
   }
   output_arg {
-    name: "value_index"
-    type: DT_INT32
+    name: "true_expected_count"
+    type: DT_FLOAT
   }
-  attr {
-    name: "T"
-    type: "type"
+  output_arg {
+    name: "sampled_expected_count"
+    type: DT_FLOAT
   }
   attr {
-    name: "N"
+    name: "num_true"
     type: "int"
     has_minimum: true
     minimum: 1
   }
-}
-op {
-  name: "MergeSummary"
-  input_arg {
-    name: "inputs"
-    type: DT_STRING
-    number_attr: "N"
-  }
-  output_arg {
-    name: "summary"
-    type: DT_STRING
-  }
   attr {
-    name: "N"
+    name: "num_sampled"
     type: "int"
     has_minimum: true
     minimum: 1
   }
-}
-op {
-  name: "MergeV2Checkpoints"
-  input_arg {
-    name: "checkpoint_prefixes"
-    type: DT_STRING
+  attr {
+    name: "unique"
+    type: "bool"
   }
-  input_arg {
-    name: "destination_prefix"
-    type: DT_STRING
+  attr {
+    name: "range_max"
+    type: "int"
+    has_minimum: true
+    minimum: 1
   }
   attr {
-    name: "delete_old_dirs"
-    type: "bool"
+    name: "vocab_file"
+    type: "string"
     default_value {
-      b: true
+      s: ""
     }
   }
-}
-op {
-  name: "MergeV2Checkpoints"
-  input_arg {
-    name: "checkpoint_prefixes"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "destination_prefix"
-    type: DT_STRING
-  }
   attr {
-    name: "delete_old_dirs"
-    type: "bool"
+    name: "distortion"
+    type: "float"
     default_value {
-      b: true
+      f: 1
     }
   }
-  is_stateful: true
-}
-op {
-  name: "Mfcc"
-  input_arg {
-    name: "spectrogram"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "sample_rate"
-    type: DT_INT32
+  attr {
+    name: "num_reserved_ids"
+    type: "int"
+    default_value {
+      i: 0
+    }
   }
-  output_arg {
-    name: "output"
-    type: DT_FLOAT
+  attr {
+    name: "num_shards"
+    type: "int"
+    default_value {
+      i: 1
+    }
+    has_minimum: true
+    minimum: 1
   }
   attr {
-    name: "upper_frequency_limit"
-    type: "float"
+    name: "shard"
+    type: "int"
     default_value {
-      f: 4000
+      i: 0
     }
+    has_minimum: true
   }
   attr {
-    name: "lower_frequency_limit"
-    type: "float"
+    name: "unigrams"
+    type: "list(float)"
     default_value {
-      f: 20
+      list {
+      }
     }
   }
   attr {
-    name: "filterbank_channel_count"
+    name: "seed"
     type: "int"
     default_value {
-      i: 40
+      i: 0
     }
   }
   attr {
-    name: "dct_coefficient_count"
+    name: "seed2"
     type: "int"
     default_value {
-      i: 13
+      i: 0
     }
   }
+  is_stateful: true
 }
 op {
-  name: "Min"
+  name: "FlatMapDataset"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "input_dataset"
+    type: DT_VARIANT
   }
   input_arg {
-    name: "reduction_indices"
-    type_attr: "Tidx"
+    name: "other_arguments"
+    type_list_attr: "Targuments"
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
-  }
+    name: "handle"
+    type: DT_VARIANT
+  }
   attr {
-    name: "keep_dims"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "f"
+    type: "func"
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-      }
-    }
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
   }
   attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
 }
 op {
-  name: "Min"
+  name: "FlatMapDataset"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "input_dataset"
+    type: DT_VARIANT
   }
   input_arg {
-    name: "reduction_indices"
-    type_attr: "Tidx"
+    name: "other_arguments"
+    type_list_attr: "Targuments"
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "keep_dims"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "Floor"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
-  }
-  attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "Minimum"
+  name: "Floor"
   input_arg {
     name: "x"
     type_attr: "T"
   }
-  input_arg {
-    name: "y"
-    type_attr: "T"
-  }
   output_arg {
-    name: "z"
+    name: "y"
     type_attr: "T"
   }
   attr {
@@ -19574,101 +19207,84 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
       }
     }
   }
-  is_commutative: true
 }
 op {
-  name: "MirrorPad"
+  name: "FloorDiv"
   input_arg {
-    name: "input"
+    name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "paddings"
-    type_attr: "Tpaddings"
+    name: "y"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "z"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "Tpaddings"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_INT16
         type: DT_INT32
         type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "mode"
-    type: "string"
-    allowed_values {
-      list {
-        s: "REFLECT"
-        s: "SYMMETRIC"
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "MirrorPadGrad"
+  name: "FloorDiv"
   input_arg {
-    name: "input"
+    name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "paddings"
-    type_attr: "Tpaddings"
+    name: "y"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "z"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "Tpaddings"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_INT16
         type: DT_INT32
         type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "mode"
-    type: "string"
-    allowed_values {
-      list {
-        s: "REFLECT"
-        s: "SYMMETRIC"
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "Mod"
+  name: "FloorMod"
   input_arg {
     name: "x"
     type_attr: "T"
@@ -19695,7 +19311,7 @@ op {
   }
 }
 op {
-  name: "Mul"
+  name: "FloorMod"
   input_arg {
     name: "x"
     type_attr: "T"
@@ -19713,36 +19329,60 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_UINT8
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_INT16
         type: DT_INT32
         type: DT_INT64
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
-  is_commutative: true
 }
 op {
-  name: "Multinomial"
+  name: "FractionalAvgPool"
   input_arg {
-    name: "logits"
+    name: "value"
     type_attr: "T"
   }
-  input_arg {
-    name: "num_samples"
-    type: DT_INT32
-  }
   output_arg {
     name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "row_pooling_sequence"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "col_pooling_sequence"
     type: DT_INT64
   }
+  attr {
+    name: "pooling_ratio"
+    type: "list(float)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "pseudo_random"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "overlapping"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "deterministic"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
   attr {
     name: "seed"
     type: "int"
@@ -19766,42 +19406,37 @@ op {
         type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
       }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "Multinomial"
+  name: "FractionalAvgPoolGrad"
   input_arg {
-    name: "logits"
+    name: "orig_input_tensor_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "out_backprop"
     type_attr: "T"
   }
   input_arg {
-    name: "num_samples"
-    type: DT_INT32
+    name: "row_pooling_sequence"
+    type: DT_INT64
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "col_pooling_sequence"
     type: DT_INT64
   }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  output_arg {
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "seed2"
-    type: "int"
+    name: "overlapping"
+    type: "bool"
     default_value {
-      i: 0
+      b: false
     }
   }
   attr {
@@ -19813,454 +19448,565 @@ op {
         type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "MutableDenseHashTable"
+  name: "FractionalMaxPool"
   input_arg {
-    name: "empty_key"
-    type_attr: "key_dtype"
+    name: "value"
+    type_attr: "T"
   }
   output_arg {
-    name: "table_handle"
-    type: DT_STRING
-    is_ref: true
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "row_pooling_sequence"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "col_pooling_sequence"
+    type: DT_INT64
   }
   attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+    name: "pooling_ratio"
+    type: "list(float)"
+    has_minimum: true
+    minimum: 4
   }
   attr {
-    name: "shared_name"
-    type: "string"
+    name: "pseudo_random"
+    type: "bool"
     default_value {
-      s: ""
+      b: false
     }
   }
   attr {
-    name: "use_node_name_sharing"
+    name: "overlapping"
     type: "bool"
     default_value {
       b: false
     }
   }
   attr {
-    name: "key_dtype"
-    type: "type"
-  }
-  attr {
-    name: "value_dtype"
-    type: "type"
+    name: "deterministic"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
   attr {
-    name: "value_shape"
-    type: "shape"
+    name: "seed"
+    type: "int"
     default_value {
-      shape {
-      }
+      i: 0
     }
   }
   attr {
-    name: "initial_num_buckets"
+    name: "seed2"
     type: "int"
     default_value {
-      i: 131072
+      i: 0
     }
   }
   attr {
-    name: "max_load_factor"
-    type: "float"
-    default_value {
-      f: 0.8
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "MutableDenseHashTableV2"
+  name: "FractionalMaxPoolGrad"
   input_arg {
-    name: "empty_key"
-    type_attr: "key_dtype"
+    name: "orig_input"
+    type_attr: "T"
   }
-  output_arg {
-    name: "table_handle"
-    type: DT_RESOURCE
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
   }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  input_arg {
+    name: "out_backprop"
+    type_attr: "T"
   }
-  attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  input_arg {
+    name: "row_pooling_sequence"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "col_pooling_sequence"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "use_node_name_sharing"
+    name: "overlapping"
     type: "bool"
     default_value {
       b: false
     }
   }
   attr {
-    name: "key_dtype"
+    name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
-  attr {
-    name: "value_dtype"
-    type: "type"
+}
+op {
+  name: "FusedBatchNorm"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "scale"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "offset"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "mean"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "variance"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "batch_mean"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "batch_variance"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "reserve_space_1"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "reserve_space_2"
+    type_attr: "T"
   }
   attr {
-    name: "value_shape"
-    type: "shape"
-    default_value {
-      shape {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
       }
     }
   }
   attr {
-    name: "initial_num_buckets"
-    type: "int"
+    name: "epsilon"
+    type: "float"
     default_value {
-      i: 131072
+      f: 0.0001
     }
   }
   attr {
-    name: "max_load_factor"
-    type: "float"
+    name: "data_format"
+    type: "string"
     default_value {
-      f: 0.8
+      s: "NHWC"
     }
   }
-  is_stateful: true
-}
-op {
-  name: "MutableHashTable"
-  output_arg {
-    name: "table_handle"
-    type: DT_STRING
-    is_ref: true
-  }
   attr {
-    name: "container"
-    type: "string"
+    name: "is_training"
+    type: "bool"
     default_value {
-      s: ""
+      b: true
     }
   }
-  attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+}
+op {
+  name: "FusedBatchNormGrad"
+  input_arg {
+    name: "y_backprop"
+    type_attr: "T"
   }
-  attr {
-    name: "use_node_name_sharing"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  input_arg {
+    name: "x"
+    type_attr: "T"
   }
-  attr {
-    name: "key_dtype"
-    type: "type"
+  input_arg {
+    name: "scale"
+    type_attr: "T"
   }
-  attr {
-    name: "value_dtype"
-    type: "type"
+  input_arg {
+    name: "reserve_space_1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reserve_space_2"
+    type_attr: "T"
   }
-  is_stateful: true
-}
-op {
-  name: "MutableHashTableOfTensors"
   output_arg {
-    name: "table_handle"
-    type: DT_STRING
-    is_ref: true
+    name: "x_backprop"
+    type_attr: "T"
   }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  output_arg {
+    name: "scale_backprop"
+    type_attr: "T"
   }
-  attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  output_arg {
+    name: "offset_backprop"
+    type_attr: "T"
   }
-  attr {
-    name: "use_node_name_sharing"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  output_arg {
+    name: "reserve_space_3"
+    type_attr: "T"
   }
-  attr {
-    name: "key_dtype"
-    type: "type"
+  output_arg {
+    name: "reserve_space_4"
+    type_attr: "T"
   }
   attr {
-    name: "value_dtype"
+    name: "T"
     type: "type"
-  }
-  attr {
-    name: "value_shape"
-    type: "shape"
-    default_value {
-      shape {
+    allowed_values {
+      list {
+        type: DT_FLOAT
       }
     }
   }
-  is_stateful: true
-}
-op {
-  name: "MutableHashTableOfTensorsV2"
-  output_arg {
-    name: "table_handle"
-    type: DT_RESOURCE
-  }
   attr {
-    name: "container"
-    type: "string"
+    name: "epsilon"
+    type: "float"
     default_value {
-      s: ""
+      f: 0.0001
     }
   }
   attr {
-    name: "shared_name"
+    name: "data_format"
     type: "string"
     default_value {
-      s: ""
+      s: "NHWC"
     }
   }
   attr {
-    name: "use_node_name_sharing"
+    name: "is_training"
     type: "bool"
     default_value {
-      b: false
+      b: true
     }
   }
-  attr {
-    name: "key_dtype"
-    type: "type"
+}
+op {
+  name: "FusedBatchNormGradV2"
+  input_arg {
+    name: "y_backprop"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "scale"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "reserve_space_1"
+    type_attr: "U"
+  }
+  input_arg {
+    name: "reserve_space_2"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "x_backprop"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "scale_backprop"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "offset_backprop"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "reserve_space_3"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "reserve_space_4"
+    type_attr: "U"
   }
   attr {
-    name: "value_dtype"
+    name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+      }
+    }
   }
   attr {
-    name: "value_shape"
-    type: "shape"
-    default_value {
-      shape {
+    name: "U"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
       }
     }
   }
-  is_stateful: true
-}
-op {
-  name: "MutableHashTableV2"
-  output_arg {
-    name: "table_handle"
-    type: DT_RESOURCE
-  }
   attr {
-    name: "container"
-    type: "string"
+    name: "epsilon"
+    type: "float"
     default_value {
-      s: ""
+      f: 0.0001
     }
   }
   attr {
-    name: "shared_name"
+    name: "data_format"
     type: "string"
     default_value {
-      s: ""
+      s: "NHWC"
     }
   }
   attr {
-    name: "use_node_name_sharing"
+    name: "is_training"
     type: "bool"
     default_value {
-      b: false
+      b: true
     }
   }
-  attr {
-    name: "key_dtype"
-    type: "type"
-  }
-  attr {
-    name: "value_dtype"
-    type: "type"
-  }
-  is_stateful: true
 }
 op {
-  name: "Neg"
+  name: "FusedBatchNormGradV2"
+  input_arg {
+    name: "y_backprop"
+    type_attr: "T"
+  }
   input_arg {
     name: "x"
     type_attr: "T"
   }
+  input_arg {
+    name: "scale"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "reserve_space_1"
+    type_attr: "U"
+  }
+  input_arg {
+    name: "reserve_space_2"
+    type_attr: "U"
+  }
   output_arg {
-    name: "y"
+    name: "x_backprop"
     type_attr: "T"
   }
+  output_arg {
+    name: "scale_backprop"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "offset_backprop"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "reserve_space_3"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "reserve_space_4"
+    type_attr: "U"
+  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    name: "U"
+    type: "type"
+    allowed_values {
+      list {
         type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
       }
     }
   }
+  attr {
+    name: "epsilon"
+    type: "float"
+    default_value {
+      f: 0.0001
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    name: "is_training"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
 }
 op {
-  name: "NegTrain"
+  name: "FusedBatchNormV2"
   input_arg {
-    name: "w_in"
-    type: DT_FLOAT
-    is_ref: true
+    name: "x"
+    type_attr: "T"
   }
   input_arg {
-    name: "w_out"
-    type: DT_FLOAT
-    is_ref: true
+    name: "scale"
+    type_attr: "U"
   }
   input_arg {
-    name: "examples"
-    type: DT_INT32
+    name: "offset"
+    type_attr: "U"
   }
   input_arg {
-    name: "labels"
-    type: DT_INT32
+    name: "mean"
+    type_attr: "U"
   }
   input_arg {
-    name: "lr"
-    type: DT_FLOAT
+    name: "variance"
+    type_attr: "U"
   }
-  attr {
-    name: "vocab_count"
-    type: "list(int)"
+  output_arg {
+    name: "y"
+    type_attr: "T"
   }
-  attr {
-    name: "num_negative_samples"
-    type: "int"
+  output_arg {
+    name: "batch_mean"
+    type_attr: "U"
   }
-  deprecation {
-    version: 19
+  output_arg {
+    name: "batch_variance"
+    type_attr: "U"
   }
-  is_stateful: true
-}
-op {
-  name: "NextIteration"
-  input_arg {
-    name: "data"
-    type_attr: "T"
+  output_arg {
+    name: "reserve_space_1"
+    type_attr: "U"
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "reserve_space_2"
+    type_attr: "U"
   }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+      }
+    }
   }
-}
-op {
-  name: "NoOp"
-}
-op {
-  name: "NonMaxSuppression"
-  input_arg {
-    name: "boxes"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "scores"
-    type: DT_FLOAT
+  attr {
+    name: "U"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+      }
+    }
   }
-  input_arg {
-    name: "max_output_size"
-    type: DT_INT32
+  attr {
+    name: "epsilon"
+    type: "float"
+    default_value {
+      f: 0.0001
+    }
   }
-  output_arg {
-    name: "selected_indices"
-    type: DT_INT32
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
   }
   attr {
-    name: "iou_threshold"
-    type: "float"
+    name: "is_training"
+    type: "bool"
     default_value {
-      f: 0.5
+      b: true
     }
   }
 }
 op {
-  name: "NonMaxSuppressionV2"
+  name: "FusedBatchNormV2"
   input_arg {
-    name: "boxes"
-    type: DT_FLOAT
+    name: "x"
+    type_attr: "T"
   }
   input_arg {
-    name: "scores"
-    type: DT_FLOAT
+    name: "scale"
+    type_attr: "U"
   }
   input_arg {
-    name: "max_output_size"
-    type: DT_INT32
+    name: "offset"
+    type_attr: "U"
   }
   input_arg {
-    name: "iou_threshold"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "selected_indices"
-    type: DT_INT32
+    name: "mean"
+    type_attr: "U"
   }
-}
-op {
-  name: "NotEqual"
   input_arg {
-    name: "x"
-    type_attr: "T"
+    name: "variance"
+    type_attr: "U"
   }
-  input_arg {
+  output_arg {
     name: "y"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
-    type: DT_BOOL
+    name: "batch_mean"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "batch_variance"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "reserve_space_1"
+    type_attr: "U"
+  }
+  output_arg {
+    name: "reserve_space_2"
+    type_attr: "U"
   }
   attr {
     name: "T"
@@ -20268,45 +20014,59 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_UINT8
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_COMPLEX64
-        type: DT_QUINT8
-        type: DT_QINT8
-        type: DT_QINT32
-        type: DT_STRING
-        type: DT_BOOL
-        type: DT_COMPLEX128
       }
     }
   }
-  is_commutative: true
+  attr {
+    name: "U"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    name: "epsilon"
+    type: "float"
+    default_value {
+      f: 0.0001
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    name: "is_training"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
 }
 op {
-  name: "NthElement"
+  name: "FusedPadConv2D"
   input_arg {
     name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "n"
+    name: "paddings"
     type: DT_INT32
   }
-  output_arg {
-    name: "values"
+  input_arg {
+    name: "filter"
     type_attr: "T"
   }
-  attr {
-    name: "reverse"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  output_arg {
+    name: "output"
+    type_attr: "T"
   }
   attr {
     name: "T"
@@ -20314,114 +20074,54 @@ op {
     allowed_values {
       list {
         type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
-}
-op {
-  name: "OneHot"
-  input_arg {
-    name: "indices"
-    type_attr: "TI"
-  }
-  input_arg {
-    name: "depth"
-    type: DT_INT32
-  }
-  input_arg {
-    name: "on_value"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "off_value"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
   attr {
-    name: "axis"
-    type: "int"
-    default_value {
-      i: -1
+    name: "mode"
+    type: "string"
+    allowed_values {
+      list {
+        s: "REFLECT"
+        s: "SYMMETRIC"
+      }
     }
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "strides"
+    type: "list(int)"
   }
   attr {
-    name: "TI"
-    type: "type"
-    default_value {
-      type: DT_INT64
-    }
+    name: "padding"
+    type: "string"
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_INT32
-        type: DT_INT64
+        s: "SAME"
+        s: "VALID"
       }
     }
   }
 }
 op {
-  name: "OneShotIterator"
-  output_arg {
-    name: "handle"
-    type: DT_RESOURCE
-  }
-  attr {
-    name: "dataset_factory"
-    type: "func"
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+  name: "FusedResizeAndPadConv2D"
+  input_arg {
+    name: "input"
+    type_attr: "T"
   }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  input_arg {
+    name: "size"
+    type: DT_INT32
   }
-  attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  input_arg {
+    name: "paddings"
+    type: DT_INT32
   }
-  is_stateful: true
-}
-op {
-  name: "OnesLike"
   input_arg {
-    name: "x"
+    name: "filter"
     type_attr: "T"
   }
   output_arg {
-    name: "y"
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -20430,745 +20130,645 @@ op {
     allowed_values {
       list {
         type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
       }
     }
   }
-}
-op {
-  name: "OrderedMapClear"
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
-  }
-  attr {
-    name: "memory_limit"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
-  }
-  attr {
-    name: "dtypes"
-    type: "list(type)"
-  }
   attr {
-    name: "container"
-    type: "string"
+    name: "resize_align_corners"
+    type: "bool"
     default_value {
-      s: ""
+      b: false
     }
   }
   attr {
-    name: "shared_name"
+    name: "mode"
     type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  is_stateful: true
-}
-op {
-  name: "OrderedMapIncompleteSize"
-  output_arg {
-    name: "size"
-    type: DT_INT32
-  }
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
-  }
-  attr {
-    name: "memory_limit"
-    type: "int"
-    default_value {
-      i: 0
+    allowed_values {
+      list {
+        s: "REFLECT"
+        s: "SYMMETRIC"
+      }
     }
-    has_minimum: true
-  }
-  attr {
-    name: "dtypes"
-    type: "list(type)"
   }
   attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+    name: "strides"
+    type: "list(int)"
   }
   attr {
-    name: "shared_name"
+    name: "padding"
     type: "string"
-    default_value {
-      s: ""
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "OrderedMapPeek"
+  name: "Gather"
   input_arg {
-    name: "key"
-    type: DT_INT64
+    name: "params"
+    type_attr: "Tparams"
   }
   input_arg {
     name: "indices"
-    type: DT_INT32
+    type_attr: "Tindices"
   }
   output_arg {
-    name: "values"
-    type_list_attr: "dtypes"
-  }
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
+    name: "output"
+    type_attr: "Tparams"
   }
   attr {
-    name: "memory_limit"
-    type: "int"
+    name: "validate_indices"
+    type: "bool"
     default_value {
-      i: 0
+      b: true
     }
-    has_minimum: true
-  }
-  attr {
-    name: "dtypes"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
   }
   attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+    name: "Tparams"
+    type: "type"
   }
   attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "OrderedMapSize"
-  output_arg {
-    name: "size"
-    type: DT_INT32
-  }
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
+  name: "GatherNd"
+  input_arg {
+    name: "params"
+    type_attr: "Tparams"
   }
-  attr {
-    name: "memory_limit"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
-  attr {
-    name: "dtypes"
-    type: "list(type)"
+  output_arg {
+    name: "output"
+    type_attr: "Tparams"
   }
   attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+    name: "Tparams"
+    type: "type"
   }
   attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "OrderedMapStage"
+  name: "GatherV2"
   input_arg {
-    name: "key"
-    type: DT_INT64
+    name: "params"
+    type_attr: "Tparams"
   }
   input_arg {
     name: "indices"
-    type: DT_INT32
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "values"
-    type_list_attr: "fake_dtypes"
-  }
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
-  }
-  attr {
-    name: "memory_limit"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
+    name: "axis"
+    type_attr: "Taxis"
   }
-  attr {
-    name: "dtypes"
-    type: "list(type)"
+  output_arg {
+    name: "output"
+    type_attr: "Tparams"
   }
   attr {
-    name: "fake_dtypes"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "Tparams"
+    type: "type"
   }
   attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
+    name: "Taxis"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "OrderedMapUnstage"
+  name: "GenerateVocabRemapping"
   input_arg {
-    name: "key"
-    type: DT_INT64
+    name: "new_vocab_file"
+    type: DT_STRING
   }
   input_arg {
-    name: "indices"
-    type: DT_INT32
+    name: "old_vocab_file"
+    type: DT_STRING
   }
   output_arg {
-    name: "values"
-    type_list_attr: "dtypes"
+    name: "remapping"
+    type: DT_INT64
   }
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
+  output_arg {
+    name: "num_present"
+    type: DT_INT32
   }
   attr {
-    name: "memory_limit"
+    name: "new_vocab_offset"
     type: "int"
-    default_value {
-      i: 0
-    }
     has_minimum: true
   }
   attr {
-    name: "dtypes"
-    type: "list(type)"
+    name: "num_new_vocab"
+    type: "int"
     has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
   }
-  is_stateful: true
 }
 op {
-  name: "OrderedMapUnstageNoKey"
+  name: "GenerateVocabRemapping"
   input_arg {
-    name: "indices"
-    type: DT_INT32
+    name: "new_vocab_file"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "old_vocab_file"
+    type: DT_STRING
   }
   output_arg {
-    name: "key"
+    name: "remapping"
     type: DT_INT64
   }
   output_arg {
-    name: "values"
-    type_list_attr: "dtypes"
+    name: "num_present"
+    type: DT_INT32
   }
   attr {
-    name: "capacity"
+    name: "new_vocab_offset"
     type: "int"
-    default_value {
-      i: 0
-    }
     has_minimum: true
   }
   attr {
-    name: "memory_limit"
+    name: "num_new_vocab"
     type: "int"
-    default_value {
-      i: 0
-    }
     has_minimum: true
   }
   attr {
-    name: "dtypes"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "shared_name"
-    type: "string"
+    name: "old_vocab_size"
+    type: "int"
     default_value {
-      s: ""
+      i: -1
     }
+    has_minimum: true
+    minimum: -1
   }
-  is_stateful: true
 }
 op {
-  name: "Pack"
+  name: "GetSessionHandle"
   input_arg {
-    name: "values"
+    name: "value"
     type_attr: "T"
-    number_attr: "N"
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "N"
-    type: "int"
-    has_minimum: true
-    minimum: 1
+    name: "handle"
+    type: DT_STRING
   }
   attr {
     name: "T"
     type: "type"
   }
-  attr {
-    name: "axis"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
 }
 op {
-  name: "Pad"
+  name: "GetSessionHandle"
   input_arg {
-    name: "input"
+    name: "value"
     type_attr: "T"
   }
-  input_arg {
-    name: "paddings"
-    type_attr: "Tpaddings"
-  }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "handle"
+    type: DT_STRING
   }
   attr {
     name: "T"
     type: "type"
   }
-  attr {
-    name: "Tpaddings"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+  deprecation {
+    version: 23
   }
 }
 op {
-  name: "PadV2"
+  name: "GetSessionHandle"
   input_arg {
-    name: "input"
+    name: "value"
     type_attr: "T"
   }
-  input_arg {
-    name: "paddings"
-    type_attr: "Tpaddings"
+  output_arg {
+    name: "handle"
+    type: DT_STRING
   }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "GetSessionHandle"
   input_arg {
-    name: "constant_values"
+    name: "value"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "handle"
+    type: DT_STRING
   }
   attr {
     name: "T"
     type: "type"
   }
-  attr {
-    name: "Tpaddings"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
+  is_stateful: true
 }
 op {
-  name: "PaddedBatchDataset"
+  name: "GetSessionHandleV2"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "value"
+    type_attr: "T"
   }
-  input_arg {
-    name: "batch_size"
-    type: DT_INT64
+  output_arg {
+    name: "handle"
+    type: DT_RESOURCE
   }
-  input_arg {
-    name: "padded_shapes"
-    type: DT_INT64
-    number_attr: "N"
+  attr {
+    name: "T"
+    type: "type"
   }
+  is_stateful: true
+}
+op {
+  name: "GetSessionTensor"
   input_arg {
-    name: "padding_values"
-    type_list_attr: "Toutput_types"
+    name: "handle"
+    type: DT_STRING
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    name: "value"
+    type_attr: "dtype"
   }
   attr {
-    name: "Toutput_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "dtype"
+    type: "type"
   }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+}
+op {
+  name: "GetSessionTensor"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "value"
+    type_attr: "dtype"
   }
   attr {
-    name: "N"
-    type: "int"
-    has_minimum: true
-    minimum: 1
+    name: "dtype"
+    type: "type"
   }
   is_stateful: true
 }
 op {
-  name: "PaddedBatchDataset"
+  name: "Greater"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "x"
+    type_attr: "T"
   }
   input_arg {
-    name: "batch_size"
-    type: DT_INT64
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
   }
+}
+op {
+  name: "Greater"
   input_arg {
-    name: "padded_shapes"
-    type: DT_INT64
-    number_attr: "N"
+    name: "x"
+    type_attr: "T"
   }
   input_arg {
-    name: "padding_values"
-    type_list_attr: "Toutput_types"
+    name: "y"
+    type_attr: "T"
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "Toutput_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "z"
+    type: DT_BOOL
   }
   attr {
-    name: "N"
-    type: "int"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
 }
 op {
-  name: "PaddingFIFOQueue"
-  output_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
+  name: "Greater"
+  input_arg {
+    name: "x"
+    type_attr: "T"
   }
-  attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
   }
   attr {
-    name: "shapes"
-    type: "list(shape)"
-    default_value {
+    name: "T"
+    type: "type"
+    allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
-    has_minimum: true
   }
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: -1
-    }
+}
+op {
+  name: "Greater"
+  input_arg {
+    name: "x"
+    type_attr: "T"
   }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
   }
   attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "PaddingFIFOQueueV2"
-  output_arg {
-    name: "handle"
-    type: DT_RESOURCE
+  name: "GreaterEqual"
+  input_arg {
+    name: "x"
+    type_attr: "T"
   }
-  attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
   }
   attr {
-    name: "shapes"
-    type: "list(shape)"
-    default_value {
+    name: "T"
+    type: "type"
+    allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
       }
     }
-    has_minimum: true
   }
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: -1
-    }
+}
+op {
+  name: "GreaterEqual"
+  input_arg {
+    name: "x"
+    type_attr: "T"
   }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
   }
   attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ParallelConcat"
+  name: "GreaterEqual"
   input_arg {
-    name: "values"
+    name: "x"
     type_attr: "T"
-    number_attr: "N"
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "y"
     type_attr: "T"
   }
-  attr {
-    name: "N"
-    type: "int"
-    has_minimum: true
-    minimum: 1
+  output_arg {
+    name: "z"
+    type: DT_BOOL
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "shape"
-    type: "shape"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
   }
 }
 op {
-  name: "ParallelDynamicStitch"
+  name: "GreaterEqual"
   input_arg {
-    name: "indices"
-    type: DT_INT32
-    number_attr: "N"
+    name: "x"
+    type_attr: "T"
   }
   input_arg {
-    name: "data"
+    name: "y"
     type_attr: "T"
-    number_attr: "N"
   }
   output_arg {
-    name: "merged"
-    type_attr: "T"
-  }
-  attr {
-    name: "N"
-    type: "int"
-    has_minimum: true
-    minimum: 1
+    name: "z"
+    type: DT_BOOL
   }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
 }
 op {
-  name: "ParallelInterleaveDataset"
+  name: "GroupByWindowDataset"
   input_arg {
     name: "input_dataset"
     type: DT_VARIANT
   }
   input_arg {
-    name: "other_arguments"
-    type_list_attr: "Targuments"
-  }
-  input_arg {
-    name: "cycle_length"
-    type: DT_INT64
+    name: "key_func_other_arguments"
+    type_list_attr: "Tkey_func_other_arguments"
   }
   input_arg {
-    name: "block_length"
-    type: DT_INT64
+    name: "reduce_func_other_arguments"
+    type_list_attr: "Treduce_func_other_arguments"
   }
   input_arg {
-    name: "sloppy"
-    type: DT_BOOL
+    name: "window_size_func_other_arguments"
+    type_list_attr: "Twindow_size_func_other_arguments"
   }
   output_arg {
     name: "handle"
     type: DT_VARIANT
   }
   attr {
-    name: "f"
+    name: "key_func"
     type: "func"
   }
   attr {
-    name: "Targuments"
-    type: "list(type)"
-    has_minimum: true
+    name: "reduce_func"
+    type: "func"
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "window_size_func"
+    type: "func"
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
+    name: "Tkey_func_other_arguments"
+    type: "list(type)"
     has_minimum: true
-    minimum: 1
-  }
-}
-op {
-  name: "ParallelMapDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "other_arguments"
-    type_list_attr: "Targuments"
-  }
-  input_arg {
-    name: "num_parallel_calls"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
   }
   attr {
-    name: "f"
-    type: "func"
+    name: "Treduce_func_other_arguments"
+    type: "list(type)"
+    has_minimum: true
   }
   attr {
-    name: "Targuments"
+    name: "Twindow_size_func_other_arguments"
     type: "list(type)"
     has_minimum: true
   }
@@ -21187,29 +20787,51 @@ op {
   is_stateful: true
 }
 op {
-  name: "ParallelMapDataset"
+  name: "GroupByWindowDataset"
   input_arg {
     name: "input_dataset"
     type: DT_VARIANT
   }
   input_arg {
-    name: "other_arguments"
-    type_list_attr: "Targuments"
+    name: "key_func_other_arguments"
+    type_list_attr: "Tkey_func_other_arguments"
   }
   input_arg {
-    name: "num_parallel_calls"
-    type: DT_INT32
+    name: "reduce_func_other_arguments"
+    type_list_attr: "Treduce_func_other_arguments"
+  }
+  input_arg {
+    name: "window_size_func_other_arguments"
+    type_list_attr: "Twindow_size_func_other_arguments"
   }
   output_arg {
     name: "handle"
     type: DT_VARIANT
   }
   attr {
-    name: "f"
+    name: "key_func"
     type: "func"
   }
   attr {
-    name: "Targuments"
+    name: "reduce_func"
+    type: "func"
+  }
+  attr {
+    name: "window_size_func"
+    type: "func"
+  }
+  attr {
+    name: "Tkey_func_other_arguments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "Treduce_func_other_arguments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "Twindow_size_func_other_arguments"
     type: "list(type)"
     has_minimum: true
   }
@@ -21227,457 +20849,21044 @@ op {
   }
 }
 op {
-  name: "ParameterizedTruncatedNormal"
+  name: "GuaranteeConst"
   input_arg {
-    name: "shape"
+    name: "input"
     type_attr: "T"
   }
-  input_arg {
-    name: "means"
-    type_attr: "dtype"
-  }
-  input_arg {
-    name: "stdevs"
-    type_attr: "dtype"
+  output_arg {
+    name: "output"
+    type_attr: "T"
   }
-  input_arg {
-    name: "minvals"
-    type_attr: "dtype"
+  attr {
+    name: "T"
+    type: "type"
   }
+  is_stateful: true
+}
+op {
+  name: "HSVToRGB"
   input_arg {
-    name: "maxvals"
-    type_attr: "dtype"
+    name: "images"
+    type_attr: "T"
   }
   output_arg {
     name: "output"
-    type_attr: "dtype"
-  }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
+    type_attr: "T"
   }
   attr {
-    name: "seed2"
-    type: "int"
+    name: "T"
+    type: "type"
     default_value {
-      i: 0
+      type: DT_FLOAT
     }
-  }
-  attr {
-    name: "dtype"
-    type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
+}
+op {
+  name: "HSVToRGB"
+  input_arg {
+    name: "images"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
   attr {
     name: "T"
     type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ParseExample"
-  input_arg {
-    name: "serialized"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "names"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "sparse_keys"
-    type: DT_STRING
-    number_attr: "Nsparse"
-  }
-  input_arg {
-    name: "dense_keys"
-    type: DT_STRING
-    number_attr: "Ndense"
-  }
-  input_arg {
-    name: "dense_defaults"
-    type_list_attr: "Tdense"
-  }
-  output_arg {
-    name: "sparse_indices"
-    type: DT_INT64
-    number_attr: "Nsparse"
-  }
-  output_arg {
-    name: "sparse_values"
-    type_list_attr: "sparse_types"
-  }
-  output_arg {
-    name: "sparse_shapes"
-    type: DT_INT64
-    number_attr: "Nsparse"
-  }
+  name: "HashTable"
   output_arg {
-    name: "dense_values"
-    type_list_attr: "Tdense"
+    name: "table_handle"
+    type: DT_STRING
+    is_ref: true
   }
   attr {
-    name: "Nsparse"
-    type: "int"
-    has_minimum: true
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
   attr {
-    name: "Ndense"
-    type: "int"
-    has_minimum: true
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
   attr {
-    name: "sparse_types"
-    type: "list(type)"
-    has_minimum: true
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_INT64
-        type: DT_STRING
-      }
+    name: "use_node_name_sharing"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
   attr {
-    name: "Tdense"
-    type: "list(type)"
-    has_minimum: true
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_INT64
-        type: DT_STRING
-      }
-    }
+    name: "key_dtype"
+    type: "type"
   }
   attr {
-    name: "dense_shapes"
-    type: "list(shape)"
-    has_minimum: true
+    name: "value_dtype"
+    type: "type"
   }
+  is_stateful: true
 }
 op {
-  name: "ParseSingleSequenceExample"
-  input_arg {
-    name: "serialized"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "feature_list_dense_missing_assumed_empty"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "context_sparse_keys"
-    type: DT_STRING
-    number_attr: "Ncontext_sparse"
-  }
-  input_arg {
-    name: "context_dense_keys"
-    type: DT_STRING
-    number_attr: "Ncontext_dense"
-  }
-  input_arg {
-    name: "feature_list_sparse_keys"
-    type: DT_STRING
-    number_attr: "Nfeature_list_sparse"
-  }
-  input_arg {
-    name: "feature_list_dense_keys"
-    type: DT_STRING
-    number_attr: "Nfeature_list_dense"
-  }
-  input_arg {
-    name: "context_dense_defaults"
-    type_list_attr: "Tcontext_dense"
-  }
-  input_arg {
-    name: "debug_name"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "context_sparse_indices"
-    type: DT_INT64
-    number_attr: "Ncontext_sparse"
-  }
-  output_arg {
-    name: "context_sparse_values"
-    type_list_attr: "context_sparse_types"
-  }
-  output_arg {
-    name: "context_sparse_shapes"
-    type: DT_INT64
-    number_attr: "Ncontext_sparse"
-  }
-  output_arg {
-    name: "context_dense_values"
-    type_list_attr: "Tcontext_dense"
-  }
-  output_arg {
-    name: "feature_list_sparse_indices"
-    type: DT_INT64
-    number_attr: "Nfeature_list_sparse"
-  }
-  output_arg {
-    name: "feature_list_sparse_values"
-    type_list_attr: "feature_list_sparse_types"
-  }
-  output_arg {
-    name: "feature_list_sparse_shapes"
-    type: DT_INT64
-    number_attr: "Nfeature_list_sparse"
-  }
+  name: "HashTableV2"
   output_arg {
-    name: "feature_list_dense_values"
-    type_list_attr: "feature_list_dense_types"
+    name: "table_handle"
+    type: DT_RESOURCE
   }
   attr {
-    name: "Ncontext_sparse"
-    type: "int"
+    name: "container"
+    type: "string"
     default_value {
-      i: 0
+      s: ""
     }
-    has_minimum: true
   }
   attr {
-    name: "Ncontext_dense"
-    type: "int"
+    name: "shared_name"
+    type: "string"
     default_value {
-      i: 0
+      s: ""
     }
-    has_minimum: true
   }
   attr {
-    name: "Nfeature_list_sparse"
-    type: "int"
+    name: "use_node_name_sharing"
+    type: "bool"
     default_value {
-      i: 0
+      b: false
     }
-    has_minimum: true
   }
   attr {
-    name: "Nfeature_list_dense"
-    type: "int"
-    default_value {
-      i: 0
-    }
-    has_minimum: true
+    name: "key_dtype"
+    type: "type"
   }
   attr {
-    name: "context_sparse_types"
-    type: "list(type)"
-    default_value {
-      list {
-      }
-    }
-    has_minimum: true
+    name: "value_dtype"
+    type: "type"
+  }
+  is_stateful: true
+}
+op {
+  name: "HistogramFixedWidth"
+  input_arg {
+    name: "values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "value_range"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "nbins"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "out"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "T"
+    type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
+        type: DT_INT32
         type: DT_INT64
-        type: DT_STRING
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "Tcontext_dense"
-    type: "list(type)"
+    name: "dtype"
+    type: "type"
     default_value {
-      list {
-      }
+      type: DT_INT32
     }
-    has_minimum: true
     allowed_values {
       list {
-        type: DT_FLOAT
+        type: DT_INT32
         type: DT_INT64
-        type: DT_STRING
       }
     }
   }
+}
+op {
+  name: "HistogramSummary"
+  input_arg {
+    name: "tag"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "values"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "summary"
+    type: DT_STRING
+  }
   attr {
-    name: "feature_list_dense_types"
-    type: "list(type)"
+    name: "T"
+    type: "type"
     default_value {
-      list {
-      }
+      type: DT_FLOAT
     }
-    has_minimum: true
     allowed_values {
       list {
         type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
         type: DT_INT64
-        type: DT_STRING
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
       }
     }
   }
-  attr {
-    name: "context_dense_shapes"
-    type: "list(shape)"
-    default_value {
-      list {
-      }
-    }
-    has_minimum: true
+}
+op {
+  name: "HistogramSummary"
+  input_arg {
+    name: "tag"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "values"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "summary"
+    type: DT_STRING
   }
   attr {
-    name: "feature_list_sparse_types"
-    type: "list(type)"
+    name: "T"
+    type: "type"
     default_value {
-      list {
-      }
+      type: DT_FLOAT
     }
-    has_minimum: true
     allowed_values {
       list {
         type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
         type: DT_INT64
-        type: DT_STRING
-      }
-    }
-  }
-  attr {
-    name: "feature_list_dense_shapes"
-    type: "list(shape)"
-    default_value {
-      list {
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
-    has_minimum: true
   }
 }
 op {
-  name: "ParseTensor"
+  name: "HistogramSummary"
   input_arg {
-    name: "serialized"
+    name: "tag"
     type: DT_STRING
   }
-  output_arg {
-    name: "output"
-    type_attr: "out_type"
-  }
-  attr {
-    name: "out_type"
-    type: "type"
+  input_arg {
+    name: "values"
+    type_attr: "T"
   }
-}
-op {
-  name: "Placeholder"
   output_arg {
-    name: "output"
-    type_attr: "dtype"
+    name: "summary"
+    type: DT_STRING
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
-  }
-  attr {
-    name: "shape"
-    type: "shape"
     default_value {
-      shape {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
 }
 op {
-  name: "Placeholder"
+  name: "HistogramSummary"
+  input_arg {
+    name: "tag"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "values"
+    type_attr: "T"
+  }
   output_arg {
-    name: "output"
-    type_attr: "dtype"
+    name: "summary"
+    type: DT_STRING
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
-  }
-  attr {
-    name: "shape"
-    type: "shape"
     default_value {
-      shape {
-        unknown_rank: true
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
 }
 op {
-  name: "PlaceholderV2"
+  name: "IFFT"
+  input_arg {
+    name: "input"
+    type: DT_COMPLEX64
+  }
   output_arg {
     name: "output"
-    type_attr: "dtype"
-  }
-  attr {
-    name: "dtype"
-    type: "type"
-  }
-  attr {
-    name: "shape"
-    type: "shape"
+    type: DT_COMPLEX64
   }
 }
 op {
-  name: "PlaceholderV2"
+  name: "IFFT2D"
+  input_arg {
+    name: "input"
+    type: DT_COMPLEX64
+  }
   output_arg {
     name: "output"
-    type_attr: "dtype"
-  }
-  attr {
-    name: "dtype"
-    type: "type"
+    type: DT_COMPLEX64
   }
-  attr {
-    name: "shape"
-    type: "shape"
+}
+op {
+  name: "IFFT3D"
+  input_arg {
+    name: "input"
+    type: DT_COMPLEX64
   }
-  deprecation {
-    version: 23
+  output_arg {
+    name: "output"
+    type: DT_COMPLEX64
   }
 }
 op {
-  name: "PlaceholderWithDefault"
+  name: "IRFFT"
+  input_arg {
+    name: "input"
+    type: DT_COMPLEX64
+  }
+  input_arg {
+    name: "fft_length"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type: DT_FLOAT
+  }
+}
+op {
+  name: "IRFFT2D"
+  input_arg {
+    name: "input"
+    type: DT_COMPLEX64
+  }
+  input_arg {
+    name: "fft_length"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type: DT_FLOAT
+  }
+}
+op {
+  name: "IRFFT3D"
+  input_arg {
+    name: "input"
+    type: DT_COMPLEX64
+  }
+  input_arg {
+    name: "fft_length"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type: DT_FLOAT
+  }
+}
+op {
+  name: "Identity"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "IdentityN"
+  input_arg {
+    name: "input"
+    type_list_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_list_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "IdentityReader"
+  output_arg {
+    name: "reader_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "IdentityReaderV2"
+  output_arg {
+    name: "reader_handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "Igamma"
+  input_arg {
+    name: "a"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "Igammac"
+  input_arg {
+    name: "a"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "IgnoreErrorsDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "IgnoreErrorsDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "Imag"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "Tout"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_COMPLEX64
+    }
+    allowed_values {
+      list {
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  attr {
+    name: "Tout"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "ImageSummary"
+  input_arg {
+    name: "tag"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "tensor"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "summary"
+    type: DT_STRING
+  }
+  attr {
+    name: "max_images"
+    type: "int"
+    default_value {
+      i: 3
+    }
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_UINT8
+        type: DT_FLOAT
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "bad_color"
+    type: "tensor"
+    default_value {
+      tensor {
+        dtype: DT_UINT8
+        tensor_shape {
+          dim {
+            size: 4
+          }
+        }
+        int_val: 255
+        int_val: 0
+        int_val: 0
+        int_val: 255
+      }
+    }
+  }
+}
+op {
+  name: "ImageSummary"
+  input_arg {
+    name: "tag"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "tensor"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "summary"
+    type: DT_STRING
+  }
+  attr {
+    name: "max_images"
+    type: "int"
+    default_value {
+      i: 3
+    }
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_UINT8
+        type: DT_FLOAT
+        type: DT_HALF
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "bad_color"
+    type: "tensor"
+    default_value {
+      tensor {
+        dtype: DT_UINT8
+        tensor_shape {
+          dim {
+            size: 4
+          }
+        }
+        int_val: 255
+        int_val: 0
+        int_val: 0
+        int_val: 255
+      }
+    }
+  }
+}
+op {
+  name: "ImmutableConst"
+  output_arg {
+    name: "tensor"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+  }
+  attr {
+    name: "shape"
+    type: "shape"
+  }
+  attr {
+    name: "memory_region_name"
+    type: "string"
+  }
+}
+op {
+  name: "InTopK"
+  input_arg {
+    name: "predictions"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "targets"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "precision"
+    type: DT_BOOL
+  }
+  attr {
+    name: "k"
+    type: "int"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "InTopKV2"
+  input_arg {
+    name: "predictions"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "targets"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "k"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "precision"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "InitializeTable"
+  input_arg {
+    name: "table_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "keys"
+    type_attr: "Tkey"
+  }
+  input_arg {
+    name: "values"
+    type_attr: "Tval"
+  }
+  attr {
+    name: "Tkey"
+    type: "type"
+  }
+  attr {
+    name: "Tval"
+    type: "type"
+  }
+}
+op {
+  name: "InitializeTableFromTextFile"
+  input_arg {
+    name: "table_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "filename"
+    type: DT_STRING
+  }
+  attr {
+    name: "key_index"
+    type: "int"
+    has_minimum: true
+    minimum: -2
+  }
+  attr {
+    name: "value_index"
+    type: "int"
+    has_minimum: true
+    minimum: -2
+  }
+  attr {
+    name: "vocab_size"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "delimiter"
+    type: "string"
+    default_value {
+      s: "\t"
+    }
+  }
+}
+op {
+  name: "InitializeTableFromTextFileV2"
+  input_arg {
+    name: "table_handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "filename"
+    type: DT_STRING
+  }
+  attr {
+    name: "key_index"
+    type: "int"
+    has_minimum: true
+    minimum: -2
+  }
+  attr {
+    name: "value_index"
+    type: "int"
+    has_minimum: true
+    minimum: -2
+  }
+  attr {
+    name: "vocab_size"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "delimiter"
+    type: "string"
+    default_value {
+      s: "\t"
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "InitializeTableV2"
+  input_arg {
+    name: "table_handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "keys"
+    type_attr: "Tkey"
+  }
+  input_arg {
+    name: "values"
+    type_attr: "Tval"
+  }
+  attr {
+    name: "Tkey"
+    type: "type"
+  }
+  attr {
+    name: "Tval"
+    type: "type"
+  }
+  is_stateful: true
+}
+op {
+  name: "InterleaveDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
+  }
+  input_arg {
+    name: "cycle_length"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "block_length"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "InterleaveDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
+  }
+  input_arg {
+    name: "cycle_length"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "block_length"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "Inv"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  deprecation {
+    version: 17
+  }
+}
+op {
+  name: "Inv"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  deprecation {
+    version: 17
+  }
+}
+op {
+  name: "Inv"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "Inv"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  deprecation {
+    version: 17
+  }
+}
+op {
+  name: "Inv"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "InvGrad"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  deprecation {
+    version: 17
+  }
+}
+op {
+  name: "InvGrad"
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "dy"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  deprecation {
+    version: 17
+  }
+}
+op {
+  name: "InvGrad"
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "dy"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  deprecation {
+    version: 17
+  }
+}
+op {
+  name: "InvGrad"
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "dy"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "InvGrad"
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "dy"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  deprecation {
+    version: 17
+  }
+}
+op {
+  name: "InvGrad"
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "dy"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "Invert"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_UINT16
+      }
+    }
+  }
+}
+op {
+  name: "Invert"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "InvertPermutation"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "IsFinite"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "IsFinite"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "IsInf"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "IsInf"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "IsNan"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "IsNan"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "IsVariableInitialized"
+  input_arg {
+    name: "ref"
+    type_attr: "dtype"
+    is_ref: true
+  }
+  output_arg {
+    name: "is_initialized"
+    type: DT_BOOL
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+  }
+  allows_uninitialized_input: true
+}
+op {
+  name: "Iterator"
+  output_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+  }
+  attr {
+    name: "container"
+    type: "string"
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "IteratorFromStringHandle"
+  input_arg {
+    name: "string_handle"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "resource_handle"
+    type: DT_RESOURCE
+  }
+  is_stateful: true
+}
+op {
+  name: "IteratorFromStringHandle"
+  input_arg {
+    name: "string_handle"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "resource_handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+  is_stateful: true
+}
+op {
+  name: "IteratorGetNext"
+  input_arg {
+    name: "iterator"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "components"
+    type_list_attr: "output_types"
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "IteratorSetStatsAggregator"
+  input_arg {
+    name: "iterator_handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "stats_aggregator_handle"
+    type: DT_RESOURCE
+  }
+  is_stateful: true
+}
+op {
+  name: "IteratorToStringHandle"
+  input_arg {
+    name: "resource_handle"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "string_handle"
+    type: DT_STRING
+  }
+  is_stateful: true
+}
+op {
+  name: "L2Loss"
+  input_arg {
+    name: "t"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "L2Loss"
+  input_arg {
+    name: "t"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "L2Loss"
+  input_arg {
+    name: "t"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "LMDBReader"
+  output_arg {
+    name: "reader_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "LRN"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "depth_radius"
+    type: "int"
+    default_value {
+      i: 5
+    }
+  }
+  attr {
+    name: "bias"
+    type: "float"
+    default_value {
+      f: 1
+    }
+  }
+  attr {
+    name: "alpha"
+    type: "float"
+    default_value {
+      f: 1
+    }
+  }
+  attr {
+    name: "beta"
+    type: "float"
+    default_value {
+      f: 0.5
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "LRN"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "depth_radius"
+    type: "int"
+    default_value {
+      i: 5
+    }
+  }
+  attr {
+    name: "bias"
+    type: "float"
+    default_value {
+      f: 1
+    }
+  }
+  attr {
+    name: "alpha"
+    type: "float"
+    default_value {
+      f: 1
+    }
+  }
+  attr {
+    name: "beta"
+    type: "float"
+    default_value {
+      f: 0.5
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+op {
+  name: "LRNGrad"
+  input_arg {
+    name: "input_grads"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_image"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "output_image"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "depth_radius"
+    type: "int"
+    default_value {
+      i: 5
+    }
+  }
+  attr {
+    name: "bias"
+    type: "float"
+    default_value {
+      f: 1
+    }
+  }
+  attr {
+    name: "alpha"
+    type: "float"
+    default_value {
+      f: 1
+    }
+  }
+  attr {
+    name: "beta"
+    type: "float"
+    default_value {
+      f: 0.5
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "LRNGrad"
+  input_arg {
+    name: "input_grads"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_image"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "output_image"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "depth_radius"
+    type: "int"
+    default_value {
+      i: 5
+    }
+  }
+  attr {
+    name: "bias"
+    type: "float"
+    default_value {
+      f: 1
+    }
+  }
+  attr {
+    name: "alpha"
+    type: "float"
+    default_value {
+      f: 1
+    }
+  }
+  attr {
+    name: "beta"
+    type: "float"
+    default_value {
+      f: 0.5
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+op {
+  name: "LatencyStatsDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "tag"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "LearnedUnigramCandidateSampler"
+  input_arg {
+    name: "true_classes"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "sampled_candidates"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "true_expected_count"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "sampled_expected_count"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "num_true"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "num_sampled"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "unique"
+    type: "bool"
+  }
+  attr {
+    name: "range_max"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+}
+op {
+  name: "LearnedUnigramCandidateSampler"
+  input_arg {
+    name: "true_classes"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "sampled_candidates"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "true_expected_count"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "sampled_expected_count"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "num_true"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "num_sampled"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "unique"
+    type: "bool"
+  }
+  attr {
+    name: "range_max"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "LeftShift"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  is_commutative: true
+}
+op {
+  name: "Less"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "Less"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "Less"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "Less"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "LessEqual"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "LessEqual"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "LessEqual"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "LessEqual"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "Lgamma"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "Lgamma"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "LinSpace"
+  input_arg {
+    name: "start"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "stop"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "num"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "LinSpace"
+  input_arg {
+    name: "start"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "stop"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "num"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "ListDiff"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "out"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "idx"
+    type_attr: "out_idx"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "out_idx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "LoadAndRemapMatrix"
+  input_arg {
+    name: "ckpt_path"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "old_tensor_name"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "row_remapping"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "col_remapping"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "initializing_values"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_matrix"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "num_rows"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "num_cols"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "max_rows_in_memory"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "Log"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "Log"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "Log1p"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "Log1p"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "LogMatrixDeterminant"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "sign"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "log_abs_determinant"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "LogSoftmax"
+  input_arg {
+    name: "logits"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "logsoftmax"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "LogSoftmax"
+  input_arg {
+    name: "logits"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "logsoftmax"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "LogUniformCandidateSampler"
+  input_arg {
+    name: "true_classes"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "sampled_candidates"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "true_expected_count"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "sampled_expected_count"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "num_true"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "num_sampled"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "unique"
+    type: "bool"
+  }
+  attr {
+    name: "range_max"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+}
+op {
+  name: "LogUniformCandidateSampler"
+  input_arg {
+    name: "true_classes"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "sampled_candidates"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "true_expected_count"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "sampled_expected_count"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "num_true"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "num_sampled"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "unique"
+    type: "bool"
+  }
+  attr {
+    name: "range_max"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "LogicalAnd"
+  input_arg {
+    name: "x"
+    type: DT_BOOL
+  }
+  input_arg {
+    name: "y"
+    type: DT_BOOL
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
+  }
+  is_commutative: true
+}
+op {
+  name: "LogicalNot"
+  input_arg {
+    name: "x"
+    type: DT_BOOL
+  }
+  output_arg {
+    name: "y"
+    type: DT_BOOL
+  }
+}
+op {
+  name: "LogicalOr"
+  input_arg {
+    name: "x"
+    type: DT_BOOL
+  }
+  input_arg {
+    name: "y"
+    type: DT_BOOL
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
+  }
+  is_commutative: true
+}
+op {
+  name: "LookupTableExport"
+  input_arg {
+    name: "table_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  output_arg {
+    name: "keys"
+    type_attr: "Tkeys"
+  }
+  output_arg {
+    name: "values"
+    type_attr: "Tvalues"
+  }
+  attr {
+    name: "Tkeys"
+    type: "type"
+  }
+  attr {
+    name: "Tvalues"
+    type: "type"
+  }
+}
+op {
+  name: "LookupTableExportV2"
+  input_arg {
+    name: "table_handle"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "keys"
+    type_attr: "Tkeys"
+  }
+  output_arg {
+    name: "values"
+    type_attr: "Tvalues"
+  }
+  attr {
+    name: "Tkeys"
+    type: "type"
+  }
+  attr {
+    name: "Tvalues"
+    type: "type"
+  }
+  is_stateful: true
+}
+op {
+  name: "LookupTableFind"
+  input_arg {
+    name: "table_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "keys"
+    type_attr: "Tin"
+  }
+  input_arg {
+    name: "default_value"
+    type_attr: "Tout"
+  }
+  output_arg {
+    name: "values"
+    type_attr: "Tout"
+  }
+  attr {
+    name: "Tin"
+    type: "type"
+  }
+  attr {
+    name: "Tout"
+    type: "type"
+  }
+}
+op {
+  name: "LookupTableFindV2"
+  input_arg {
+    name: "table_handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "keys"
+    type_attr: "Tin"
+  }
+  input_arg {
+    name: "default_value"
+    type_attr: "Tout"
+  }
+  output_arg {
+    name: "values"
+    type_attr: "Tout"
+  }
+  attr {
+    name: "Tin"
+    type: "type"
+  }
+  attr {
+    name: "Tout"
+    type: "type"
+  }
+  is_stateful: true
+}
+op {
+  name: "LookupTableImport"
+  input_arg {
+    name: "table_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "keys"
+    type_attr: "Tin"
+  }
+  input_arg {
+    name: "values"
+    type_attr: "Tout"
+  }
+  attr {
+    name: "Tin"
+    type: "type"
+  }
+  attr {
+    name: "Tout"
+    type: "type"
+  }
+}
+op {
+  name: "LookupTableImportV2"
+  input_arg {
+    name: "table_handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "keys"
+    type_attr: "Tin"
+  }
+  input_arg {
+    name: "values"
+    type_attr: "Tout"
+  }
+  attr {
+    name: "Tin"
+    type: "type"
+  }
+  attr {
+    name: "Tout"
+    type: "type"
+  }
+  is_stateful: true
+}
+op {
+  name: "LookupTableInsert"
+  input_arg {
+    name: "table_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "keys"
+    type_attr: "Tin"
+  }
+  input_arg {
+    name: "values"
+    type_attr: "Tout"
+  }
+  attr {
+    name: "Tin"
+    type: "type"
+  }
+  attr {
+    name: "Tout"
+    type: "type"
+  }
+}
+op {
+  name: "LookupTableInsertV2"
+  input_arg {
+    name: "table_handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "keys"
+    type_attr: "Tin"
+  }
+  input_arg {
+    name: "values"
+    type_attr: "Tout"
+  }
+  attr {
+    name: "Tin"
+    type: "type"
+  }
+  attr {
+    name: "Tout"
+    type: "type"
+  }
+  is_stateful: true
+}
+op {
+  name: "LookupTableSize"
+  input_arg {
+    name: "table_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  output_arg {
+    name: "size"
+    type: DT_INT64
+  }
+}
+op {
+  name: "LookupTableSizeV2"
+  input_arg {
+    name: "table_handle"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "size"
+    type: DT_INT64
+  }
+  is_stateful: true
+}
+op {
+  name: "LoopCond"
+  input_arg {
+    name: "input"
+    type: DT_BOOL
+  }
+  output_arg {
+    name: "output"
+    type: DT_BOOL
+  }
+}
+op {
+  name: "MakeIterator"
+  input_arg {
+    name: "dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "iterator"
+    type: DT_RESOURCE
+  }
+  is_stateful: true
+}
+op {
+  name: "MapAndBatchDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
+  }
+  input_arg {
+    name: "batch_size"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "num_parallel_batches"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "MapClear"
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "MapDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "MapDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "MapIncompleteSize"
+  output_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "MapPeek"
+  input_arg {
+    name: "key"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "indices"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "values"
+    type_list_attr: "dtypes"
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "MapSize"
+  output_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "MapStage"
+  input_arg {
+    name: "key"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "indices"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "values"
+    type_list_attr: "fake_dtypes"
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+  }
+  attr {
+    name: "fake_dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "MapUnstage"
+  input_arg {
+    name: "key"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "indices"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "values"
+    type_list_attr: "dtypes"
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "MapUnstageNoKey"
+  input_arg {
+    name: "indices"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "key"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "values"
+    type_list_attr: "dtypes"
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "MatMul"
+  input_arg {
+    name: "a"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "b"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "product"
+    type_attr: "T"
+  }
+  attr {
+    name: "transpose_a"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "transpose_b"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "MatMul"
+  input_arg {
+    name: "a"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "b"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "product"
+    type_attr: "T"
+  }
+  attr {
+    name: "transpose_a"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "transpose_b"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "MatchingFiles"
+  input_arg {
+    name: "pattern"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "filenames"
+    type: DT_STRING
+  }
+}
+op {
+  name: "MatrixBandPart"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "num_lower"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "num_upper"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "band"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "MatrixDeterminant"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "MatrixDeterminant"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "MatrixDiag"
+  input_arg {
+    name: "diagonal"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "MatrixDiagPart"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "diagonal"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "MatrixExponential"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "MatrixInverse"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "adjoint"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+op {
+  name: "MatrixInverse"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "adjoint"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "MatrixLogarithm"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "MatrixSetDiag"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "diagonal"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "MatrixSolve"
+  input_arg {
+    name: "matrix"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rhs"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "adjoint"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "MatrixSolveLs"
+  input_arg {
+    name: "matrix"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rhs"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2_regularizer"
+    type: DT_DOUBLE
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    name: "fast"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+}
+op {
+  name: "MatrixSolveLs"
+  input_arg {
+    name: "matrix"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rhs"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2_regularizer"
+    type: DT_DOUBLE
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  attr {
+    name: "fast"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+}
+op {
+  name: "MatrixTriangularSolve"
+  input_arg {
+    name: "matrix"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rhs"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "lower"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "adjoint"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+op {
+  name: "MatrixTriangularSolve"
+  input_arg {
+    name: "matrix"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rhs"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "lower"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "adjoint"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "Max"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Max"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Max"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Max"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "MaxPool"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+}
+op {
+  name: "MaxPool"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+}
+op {
+  name: "MaxPool"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_QINT8
+      }
+    }
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+        s: "NCHW_VECT_C"
+      }
+    }
+  }
+}
+op {
+  name: "MaxPool"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_QINT8
+      }
+    }
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+        s: "NCHW_VECT_C"
+      }
+    }
+  }
+}
+op {
+  name: "MaxPool3D"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+op {
+  name: "MaxPool3D"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NDHWC"
+    }
+    allowed_values {
+      list {
+        s: "NDHWC"
+        s: "NCDHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+op {
+  name: "MaxPool3D"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NDHWC"
+    }
+    allowed_values {
+      list {
+        s: "NDHWC"
+        s: "NCDHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+op {
+  name: "MaxPool3DGrad"
+  input_arg {
+    name: "orig_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "orig_output"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+op {
+  name: "MaxPool3DGrad"
+  input_arg {
+    name: "orig_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "orig_output"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NDHWC"
+    }
+    allowed_values {
+      list {
+        s: "NDHWC"
+        s: "NCDHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+op {
+  name: "MaxPool3DGrad"
+  input_arg {
+    name: "orig_input"
+    type_attr: "TInput"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "TInput"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NDHWC"
+    }
+    allowed_values {
+      list {
+        s: "NDHWC"
+        s: "NCDHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    name: "TInput"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+op {
+  name: "MaxPool3DGrad"
+  input_arg {
+    name: "orig_input"
+    type_attr: "TInput"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "TInput"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NDHWC"
+    }
+    allowed_values {
+      list {
+        s: "NDHWC"
+        s: "NCDHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    name: "TInput"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+op {
+  name: "MaxPool3DGradGrad"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 5
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NDHWC"
+    }
+    allowed_values {
+      list {
+        s: "NDHWC"
+        s: "NCDHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGrad"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGrad"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGrad"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGrad"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGrad"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradGrad"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradGrad"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradGrad"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradGrad"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradGradV2"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "ksize"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "strides"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradGradV2"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "ksize"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "strides"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradGradV2"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "ksize"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "strides"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradGradV2"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "ksize"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "strides"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradGradWithArgmax"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "argmax"
+    type_attr: "Targmax"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "Targmax"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradGradWithArgmax"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "argmax"
+    type_attr: "Targmax"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "Targmax"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradGradWithArgmax"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "argmax"
+    type_attr: "Targmax"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "Targmax"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradGradWithArgmax"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "argmax"
+    type_attr: "Targmax"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "Targmax"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradV2"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "ksize"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "strides"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradV2"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "ksize"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "strides"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradV2"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "ksize"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "strides"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradV2"
+  input_arg {
+    name: "orig_input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "orig_output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "ksize"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "strides"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradWithArgmax"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "argmax"
+    type_attr: "Targmax"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "Targmax"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradWithArgmax"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "argmax"
+    type_attr: "Targmax"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "Targmax"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradWithArgmax"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "argmax"
+    type_attr: "Targmax"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "Targmax"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradWithArgmax"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "argmax"
+    type_attr: "Targmax"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "Targmax"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolGradWithArgmax"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "argmax"
+    type_attr: "Targmax"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "Targmax"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolV2"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "ksize"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "strides"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolV2"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "ksize"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "strides"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_QINT8
+      }
+    }
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+        s: "NCHW_VECT_C"
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolV2"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "ksize"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "strides"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_QINT8
+      }
+    }
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+        s: "NCHW_VECT_C"
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolWithArgmax"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "argmax"
+    type_attr: "Targmax"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "Targmax"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolWithArgmax"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "argmax"
+    type_attr: "Targmax"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "Targmax"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolWithArgmax"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "argmax"
+    type_attr: "Targmax"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "Targmax"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolWithArgmax"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "argmax"
+    type_attr: "Targmax"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "Targmax"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "MaxPoolWithArgmax"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "argmax"
+    type_attr: "Targmax"
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    has_minimum: true
+    minimum: 4
+  }
+  attr {
+    name: "Targmax"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "Maximum"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_commutative: true
+}
+op {
+  name: "Maximum"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_commutative: true
+}
+op {
+  name: "Mean"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Mean"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Mean"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Mean"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Merge"
+  input_arg {
+    name: "inputs"
+    type_attr: "T"
+    number_attr: "N"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "value_index"
+    type: DT_INT32
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "MergeSummary"
+  input_arg {
+    name: "inputs"
+    type: DT_STRING
+    number_attr: "N"
+  }
+  output_arg {
+    name: "summary"
+    type: DT_STRING
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "MergeV2Checkpoints"
+  input_arg {
+    name: "checkpoint_prefixes"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "destination_prefix"
+    type: DT_STRING
+  }
+  attr {
+    name: "delete_old_dirs"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+}
+op {
+  name: "MergeV2Checkpoints"
+  input_arg {
+    name: "checkpoint_prefixes"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "destination_prefix"
+    type: DT_STRING
+  }
+  attr {
+    name: "delete_old_dirs"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "Mfcc"
+  input_arg {
+    name: "spectrogram"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "sample_rate"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "upper_frequency_limit"
+    type: "float"
+    default_value {
+      f: 4000
+    }
+  }
+  attr {
+    name: "lower_frequency_limit"
+    type: "float"
+    default_value {
+      f: 20
+    }
+  }
+  attr {
+    name: "filterbank_channel_count"
+    type: "int"
+    default_value {
+      i: 40
+    }
+  }
+  attr {
+    name: "dct_coefficient_count"
+    type: "int"
+    default_value {
+      i: 13
+    }
+  }
+}
+op {
+  name: "Min"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Min"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Min"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Min"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Minimum"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_commutative: true
+}
+op {
+  name: "Minimum"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_commutative: true
+}
+op {
+  name: "MirrorPad"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "paddings"
+    type_attr: "Tpaddings"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "Tpaddings"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "mode"
+    type: "string"
+    allowed_values {
+      list {
+        s: "REFLECT"
+        s: "SYMMETRIC"
+      }
+    }
+  }
+}
+op {
+  name: "MirrorPadGrad"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "paddings"
+    type_attr: "Tpaddings"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "Tpaddings"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "mode"
+    type: "string"
+    allowed_values {
+      list {
+        s: "REFLECT"
+        s: "SYMMETRIC"
+      }
+    }
+  }
+}
+op {
+  name: "Mod"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "Mod"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "Mul"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  is_commutative: true
+}
+op {
+  name: "Mul"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  is_commutative: true
+}
+op {
+  name: "Multinomial"
+  input_arg {
+    name: "logits"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "num_samples"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type: DT_INT64
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "Multinomial"
+  input_arg {
+    name: "logits"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "num_samples"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type: DT_INT64
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "Multinomial"
+  input_arg {
+    name: "logits"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "num_samples"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "output_dtype"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "output_dtype"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "Multinomial"
+  input_arg {
+    name: "logits"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "num_samples"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "output_dtype"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "output_dtype"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "MutableDenseHashTable"
+  input_arg {
+    name: "empty_key"
+    type_attr: "key_dtype"
+  }
+  output_arg {
+    name: "table_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "use_node_name_sharing"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "key_dtype"
+    type: "type"
+  }
+  attr {
+    name: "value_dtype"
+    type: "type"
+  }
+  attr {
+    name: "value_shape"
+    type: "shape"
+    default_value {
+      shape {
+      }
+    }
+  }
+  attr {
+    name: "initial_num_buckets"
+    type: "int"
+    default_value {
+      i: 131072
+    }
+  }
+  attr {
+    name: "max_load_factor"
+    type: "float"
+    default_value {
+      f: 0.8
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "MutableDenseHashTableV2"
+  input_arg {
+    name: "empty_key"
+    type_attr: "key_dtype"
+  }
+  output_arg {
+    name: "table_handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "use_node_name_sharing"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "key_dtype"
+    type: "type"
+  }
+  attr {
+    name: "value_dtype"
+    type: "type"
+  }
+  attr {
+    name: "value_shape"
+    type: "shape"
+    default_value {
+      shape {
+      }
+    }
+  }
+  attr {
+    name: "initial_num_buckets"
+    type: "int"
+    default_value {
+      i: 131072
+    }
+  }
+  attr {
+    name: "max_load_factor"
+    type: "float"
+    default_value {
+      f: 0.8
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "MutableHashTable"
+  output_arg {
+    name: "table_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "use_node_name_sharing"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "key_dtype"
+    type: "type"
+  }
+  attr {
+    name: "value_dtype"
+    type: "type"
+  }
+  is_stateful: true
+}
+op {
+  name: "MutableHashTableOfTensors"
+  output_arg {
+    name: "table_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "use_node_name_sharing"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "key_dtype"
+    type: "type"
+  }
+  attr {
+    name: "value_dtype"
+    type: "type"
+  }
+  attr {
+    name: "value_shape"
+    type: "shape"
+    default_value {
+      shape {
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "MutableHashTableOfTensorsV2"
+  output_arg {
+    name: "table_handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "use_node_name_sharing"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "key_dtype"
+    type: "type"
+  }
+  attr {
+    name: "value_dtype"
+    type: "type"
+  }
+  attr {
+    name: "value_shape"
+    type: "shape"
+    default_value {
+      shape {
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "MutableHashTableV2"
+  output_arg {
+    name: "table_handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "use_node_name_sharing"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "key_dtype"
+    type: "type"
+  }
+  attr {
+    name: "value_dtype"
+    type: "type"
+  }
+  is_stateful: true
+}
+op {
+  name: "Neg"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "Neg"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "NegTrain"
+  input_arg {
+    name: "w_in"
+    type: DT_FLOAT
+    is_ref: true
+  }
+  input_arg {
+    name: "w_out"
+    type: DT_FLOAT
+    is_ref: true
+  }
+  input_arg {
+    name: "examples"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "labels"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "lr"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "vocab_count"
+    type: "list(int)"
+  }
+  attr {
+    name: "num_negative_samples"
+    type: "int"
+  }
+  deprecation {
+    version: 19
+  }
+  is_stateful: true
+}
+op {
+  name: "NextIteration"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "NoOp"
+}
+op {
+  name: "NonMaxSuppression"
+  input_arg {
+    name: "boxes"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "scores"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_output_size"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "selected_indices"
+    type: DT_INT32
+  }
+  attr {
+    name: "iou_threshold"
+    type: "float"
+    default_value {
+      f: 0.5
+    }
+  }
+}
+op {
+  name: "NonMaxSuppressionV2"
+  input_arg {
+    name: "boxes"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "scores"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_output_size"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "iou_threshold"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "selected_indices"
+    type: DT_INT32
+  }
+}
+op {
+  name: "NotEqual"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_QUINT8
+        type: DT_QINT8
+        type: DT_QINT32
+        type: DT_STRING
+        type: DT_BOOL
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  is_commutative: true
+}
+op {
+  name: "NotEqual"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type: DT_BOOL
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_QUINT8
+        type: DT_QINT8
+        type: DT_QINT32
+        type: DT_STRING
+        type: DT_BOOL
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  is_commutative: true
+}
+op {
+  name: "NthElement"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "n"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "values"
+    type_attr: "T"
+  }
+  attr {
+    name: "reverse"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "NthElement"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "n"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "values"
+    type_attr: "T"
+  }
+  attr {
+    name: "reverse"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "NthElement"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "n"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "values"
+    type_attr: "T"
+  }
+  attr {
+    name: "reverse"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "OneHot"
+  input_arg {
+    name: "indices"
+    type_attr: "TI"
+  }
+  input_arg {
+    name: "depth"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "on_value"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "off_value"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "axis"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "TI"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_UINT8
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "OneShotIterator"
+  output_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "dataset_factory"
+    type: "func"
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "OnesLike"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "OnesLike"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT8
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_UINT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_BOOL
+      }
+    }
+  }
+}
+op {
+  name: "OrderedMapClear"
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "OrderedMapIncompleteSize"
+  output_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "OrderedMapPeek"
+  input_arg {
+    name: "key"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "indices"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "values"
+    type_list_attr: "dtypes"
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "OrderedMapSize"
+  output_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "OrderedMapStage"
+  input_arg {
+    name: "key"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "indices"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "values"
+    type_list_attr: "fake_dtypes"
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+  }
+  attr {
+    name: "fake_dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "OrderedMapUnstage"
+  input_arg {
+    name: "key"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "indices"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "values"
+    type_list_attr: "dtypes"
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "OrderedMapUnstageNoKey"
+  input_arg {
+    name: "indices"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "key"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "values"
+    type_list_attr: "dtypes"
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "memory_limit"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "Pack"
+  input_arg {
+    name: "values"
+    type_attr: "T"
+    number_attr: "N"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "axis"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+}
+op {
+  name: "Pad"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "paddings"
+    type_attr: "Tpaddings"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "Tpaddings"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "PadV2"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "paddings"
+    type_attr: "Tpaddings"
+  }
+  input_arg {
+    name: "constant_values"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "Tpaddings"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "PaddedBatchDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "batch_size"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "padded_shapes"
+    type: DT_INT64
+    number_attr: "N"
+  }
+  input_arg {
+    name: "padding_values"
+    type_list_attr: "Toutput_types"
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "Toutput_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "PaddedBatchDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "batch_size"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "padded_shapes"
+    type: DT_INT64
+    number_attr: "N"
+  }
+  input_arg {
+    name: "padding_values"
+    type_list_attr: "Toutput_types"
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "Toutput_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "PaddingFIFOQueue"
+  output_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  attr {
+    name: "component_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "PaddingFIFOQueueV2"
+  output_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "component_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ParallelConcat"
+  input_arg {
+    name: "values"
+    type_attr: "T"
+    number_attr: "N"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "shape"
+    type: "shape"
+  }
+}
+op {
+  name: "ParallelDynamicStitch"
+  input_arg {
+    name: "indices"
+    type: DT_INT32
+    number_attr: "N"
+  }
+  input_arg {
+    name: "data"
+    type_attr: "T"
+    number_attr: "N"
+  }
+  output_arg {
+    name: "merged"
+    type_attr: "T"
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "ParallelInterleaveDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
+  }
+  input_arg {
+    name: "cycle_length"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "block_length"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "sloppy"
+    type: DT_BOOL
+  }
+  input_arg {
+    name: "buffer_output_elements"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "prefetch_input_elements"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "ParallelMapDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
+  }
+  input_arg {
+    name: "num_parallel_calls"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "ParallelMapDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
+  }
+  input_arg {
+    name: "num_parallel_calls"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "ParameterizedTruncatedNormal"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "means"
+    type_attr: "dtype"
+  }
+  input_arg {
+    name: "stdevs"
+    type_attr: "dtype"
+  }
+  input_arg {
+    name: "minvals"
+    type_attr: "dtype"
+  }
+  input_arg {
+    name: "maxvals"
+    type_attr: "dtype"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ParameterizedTruncatedNormal"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "means"
+    type_attr: "dtype"
+  }
+  input_arg {
+    name: "stdevs"
+    type_attr: "dtype"
+  }
+  input_arg {
+    name: "minvals"
+    type_attr: "dtype"
+  }
+  input_arg {
+    name: "maxvals"
+    type_attr: "dtype"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ParseExample"
+  input_arg {
+    name: "serialized"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "names"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "sparse_keys"
+    type: DT_STRING
+    number_attr: "Nsparse"
+  }
+  input_arg {
+    name: "dense_keys"
+    type: DT_STRING
+    number_attr: "Ndense"
+  }
+  input_arg {
+    name: "dense_defaults"
+    type_list_attr: "Tdense"
+  }
+  output_arg {
+    name: "sparse_indices"
+    type: DT_INT64
+    number_attr: "Nsparse"
+  }
+  output_arg {
+    name: "sparse_values"
+    type_list_attr: "sparse_types"
+  }
+  output_arg {
+    name: "sparse_shapes"
+    type: DT_INT64
+    number_attr: "Nsparse"
+  }
+  output_arg {
+    name: "dense_values"
+    type_list_attr: "Tdense"
+  }
+  attr {
+    name: "Nsparse"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "Ndense"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "sparse_types"
+    type: "list(type)"
+    has_minimum: true
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_INT64
+        type: DT_STRING
+      }
+    }
+  }
+  attr {
+    name: "Tdense"
+    type: "list(type)"
+    has_minimum: true
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_INT64
+        type: DT_STRING
+      }
+    }
+  }
+  attr {
+    name: "dense_shapes"
+    type: "list(shape)"
+    has_minimum: true
+  }
+}
+op {
+  name: "ParseSingleExample"
+  input_arg {
+    name: "serialized"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "dense_defaults"
+    type_list_attr: "Tdense"
+  }
+  output_arg {
+    name: "sparse_indices"
+    type: DT_INT64
+    number_attr: "num_sparse"
+  }
+  output_arg {
+    name: "sparse_values"
+    type_list_attr: "sparse_types"
+  }
+  output_arg {
+    name: "sparse_shapes"
+    type: DT_INT64
+    number_attr: "num_sparse"
+  }
+  output_arg {
+    name: "dense_values"
+    type_list_attr: "Tdense"
+  }
+  attr {
+    name: "num_sparse"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "sparse_keys"
+    type: "list(string)"
+    has_minimum: true
+  }
+  attr {
+    name: "dense_keys"
+    type: "list(string)"
+    has_minimum: true
+  }
+  attr {
+    name: "sparse_types"
+    type: "list(type)"
+    has_minimum: true
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_INT64
+        type: DT_STRING
+      }
+    }
+  }
+  attr {
+    name: "Tdense"
+    type: "list(type)"
+    has_minimum: true
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_INT64
+        type: DT_STRING
+      }
+    }
+  }
+  attr {
+    name: "dense_shapes"
+    type: "list(shape)"
+    has_minimum: true
+  }
+}
+op {
+  name: "ParseSingleSequenceExample"
+  input_arg {
+    name: "serialized"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "feature_list_dense_missing_assumed_empty"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "context_sparse_keys"
+    type: DT_STRING
+    number_attr: "Ncontext_sparse"
+  }
+  input_arg {
+    name: "context_dense_keys"
+    type: DT_STRING
+    number_attr: "Ncontext_dense"
+  }
+  input_arg {
+    name: "feature_list_sparse_keys"
+    type: DT_STRING
+    number_attr: "Nfeature_list_sparse"
+  }
+  input_arg {
+    name: "feature_list_dense_keys"
+    type: DT_STRING
+    number_attr: "Nfeature_list_dense"
+  }
+  input_arg {
+    name: "context_dense_defaults"
+    type_list_attr: "Tcontext_dense"
+  }
+  input_arg {
+    name: "debug_name"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "context_sparse_indices"
+    type: DT_INT64
+    number_attr: "Ncontext_sparse"
+  }
+  output_arg {
+    name: "context_sparse_values"
+    type_list_attr: "context_sparse_types"
+  }
+  output_arg {
+    name: "context_sparse_shapes"
+    type: DT_INT64
+    number_attr: "Ncontext_sparse"
+  }
+  output_arg {
+    name: "context_dense_values"
+    type_list_attr: "Tcontext_dense"
+  }
+  output_arg {
+    name: "feature_list_sparse_indices"
+    type: DT_INT64
+    number_attr: "Nfeature_list_sparse"
+  }
+  output_arg {
+    name: "feature_list_sparse_values"
+    type_list_attr: "feature_list_sparse_types"
+  }
+  output_arg {
+    name: "feature_list_sparse_shapes"
+    type: DT_INT64
+    number_attr: "Nfeature_list_sparse"
+  }
+  output_arg {
+    name: "feature_list_dense_values"
+    type_list_attr: "feature_list_dense_types"
+  }
+  attr {
+    name: "Ncontext_sparse"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "Ncontext_dense"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "Nfeature_list_sparse"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "Nfeature_list_dense"
+    type: "int"
+    default_value {
+      i: 0
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "context_sparse_types"
+    type: "list(type)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_INT64
+        type: DT_STRING
+      }
+    }
+  }
+  attr {
+    name: "Tcontext_dense"
+    type: "list(type)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_INT64
+        type: DT_STRING
+      }
+    }
+  }
+  attr {
+    name: "feature_list_dense_types"
+    type: "list(type)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_INT64
+        type: DT_STRING
+      }
+    }
+  }
+  attr {
+    name: "context_dense_shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "feature_list_sparse_types"
+    type: "list(type)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_INT64
+        type: DT_STRING
+      }
+    }
+  }
+  attr {
+    name: "feature_list_dense_shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+}
+op {
+  name: "ParseTensor"
+  input_arg {
+    name: "serialized"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "output"
+    type_attr: "out_type"
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+  }
+}
+op {
+  name: "Placeholder"
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+  }
+  attr {
+    name: "shape"
+    type: "shape"
+    default_value {
+      shape {
+      }
+    }
+  }
+}
+op {
+  name: "Placeholder"
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+  }
+  attr {
+    name: "shape"
+    type: "shape"
+    default_value {
+      shape {
+        unknown_rank: true
+      }
+    }
+  }
+}
+op {
+  name: "PlaceholderV2"
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+  }
+  attr {
+    name: "shape"
+    type: "shape"
+  }
+}
+op {
+  name: "PlaceholderV2"
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+  }
+  attr {
+    name: "shape"
+    type: "shape"
+  }
+  deprecation {
+    version: 23
+  }
+}
+op {
+  name: "PlaceholderWithDefault"
+  input_arg {
+    name: "input"
+    type_attr: "dtype"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+  }
+  attr {
+    name: "shape"
+    type: "shape"
+  }
+}
+op {
+  name: "Polygamma"
+  input_arg {
+    name: "a"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "PopulationCount"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type: DT_UINT8
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_UINT16
+      }
+    }
+  }
+}
+op {
+  name: "PopulationCount"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type: DT_UINT8
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "Pow"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "Pow"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "PrefetchDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "buffer_size"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "PrefetchDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "buffer_size"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "PreventGradient"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "message"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+}
+op {
+  name: "Print"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "data"
+    type_list_attr: "U"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "U"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "message"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "first_n"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  attr {
+    name: "summarize"
+    type: "int"
+    default_value {
+      i: 3
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "Print"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "data"
+    type_list_attr: "U"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "U"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "message"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "first_n"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  attr {
+    name: "summarize"
+    type: "int"
+    default_value {
+      i: 3
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "PriorityQueue"
+  output_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  attr {
+    name: "component_types"
+    type: "list(type)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "shapes"
+    type: "list(shape)"
+    has_minimum: true
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "PriorityQueueV2"
+  output_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "component_types"
+    type: "list(type)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "shapes"
+    type: "list(shape)"
+    has_minimum: true
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "Prod"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Prod"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Prod"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Prod"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "PyFunc"
+  input_arg {
+    name: "input"
+    type_list_attr: "Tin"
+  }
+  output_arg {
+    name: "output"
+    type_list_attr: "Tout"
+  }
+  attr {
+    name: "token"
+    type: "string"
+  }
+  attr {
+    name: "Tin"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "Tout"
+    type: "list(type)"
+    has_minimum: true
+  }
+  is_stateful: true
+}
+op {
+  name: "PyFuncStateless"
+  input_arg {
+    name: "input"
+    type_list_attr: "Tin"
+  }
+  output_arg {
+    name: "output"
+    type_list_attr: "Tout"
+  }
+  attr {
+    name: "token"
+    type: "string"
+  }
+  attr {
+    name: "Tin"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "Tout"
+    type: "list(type)"
+    has_minimum: true
+  }
+}
+op {
+  name: "Qr"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "q"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "r"
+    type_attr: "T"
+  }
+  attr {
+    name: "full_matrices"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "QuantizeAndDequantize"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "signed_input"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "num_bits"
+    type: "int"
+    default_value {
+      i: 8
+    }
+  }
+  attr {
+    name: "range_given"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "input_min"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "input_max"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "QuantizeAndDequantize"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "signed_input"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "num_bits"
+    type: "int"
+    default_value {
+      i: 8
+    }
+  }
+  attr {
+    name: "range_given"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "input_min"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "input_max"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  deprecation {
+    version: 21
+  }
+}
+op {
+  name: "QuantizeAndDequantize"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "signed_input"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "num_bits"
+    type: "int"
+    default_value {
+      i: 8
+    }
+  }
+  attr {
+    name: "range_given"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "input_min"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "input_max"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  deprecation {
+    version: 22
+  }
+}
+op {
+  name: "QuantizeAndDequantize"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "signed_input"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "num_bits"
+    type: "int"
+    default_value {
+      i: 8
+    }
+  }
+  attr {
+    name: "range_given"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "input_min"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "input_max"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  deprecation {
+    version: 22
+  }
+}
+op {
+  name: "QuantizeAndDequantizeV2"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_min"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_max"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "signed_input"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "num_bits"
+    type: "int"
+    default_value {
+      i: 8
+    }
+  }
+  attr {
+    name: "range_given"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "QuantizeAndDequantizeV2"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_min"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_max"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "signed_input"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "num_bits"
+    type: "int"
+    default_value {
+      i: 8
+    }
+  }
+  attr {
+    name: "range_given"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "QuantizeAndDequantizeV3"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_min"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_max"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "num_bits"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "signed_input"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "range_given"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "QuantizeAndDequantizeV3"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_min"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_max"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "num_bits"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "signed_input"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "range_given"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "QuantizeDownAndShrinkRange"
+  input_arg {
+    name: "input"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "input_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "input_max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "output_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+}
+op {
+  name: "QuantizeDownAndShrinkRange"
+  input_arg {
+    name: "input"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "input_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "input_max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "output_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+}
+op {
+  name: "QuantizeV2"
+  input_arg {
+    name: "input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_range"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_range"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "mode"
+    type: "string"
+    default_value {
+      s: "MIN_COMBINED"
+    }
+    allowed_values {
+      list {
+        s: "MIN_COMBINED"
+        s: "MIN_FIRST"
+      }
+    }
+  }
+}
+op {
+  name: "QuantizeV2"
+  input_arg {
+    name: "input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_range"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_range"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "mode"
+    type: "string"
+    default_value {
+      s: "MIN_COMBINED"
+    }
+    allowed_values {
+      list {
+        s: "MIN_COMBINED"
+        s: "MIN_FIRST"
+        s: "SCALED"
+      }
+    }
+  }
+}
+op {
+  name: "QuantizeV2"
+  input_arg {
+    name: "input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_range"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_range"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "mode"
+    type: "string"
+    default_value {
+      s: "MIN_COMBINED"
+    }
+    allowed_values {
+      list {
+        s: "MIN_COMBINED"
+        s: "MIN_FIRST"
+        s: "SCALED"
+      }
+    }
+  }
+  attr {
+    name: "round_mode"
+    type: "string"
+    default_value {
+      s: "HALF_AWAY_FROM_ZERO"
+    }
+    allowed_values {
+      list {
+        s: "HALF_AWAY_FROM_ZERO"
+        s: "HALF_TO_EVEN"
+      }
+    }
+  }
+}
+op {
+  name: "QuantizeV2"
+  input_arg {
+    name: "input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_range"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_range"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "mode"
+    type: "string"
+    default_value {
+      s: "MIN_COMBINED"
+    }
+    allowed_values {
+      list {
+        s: "MIN_COMBINED"
+        s: "MIN_FIRST"
+        s: "SCALED"
+      }
+    }
+  }
+  attr {
+    name: "round_mode"
+    type: "string"
+    default_value {
+      s: "HALF_AWAY_FROM_ZERO"
+    }
+    allowed_values {
+      list {
+        s: "HALF_AWAY_FROM_ZERO"
+        s: "HALF_TO_EVEN"
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedAdd"
+  input_arg {
+    name: "x"
+    type_attr: "T1"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T2"
+  }
+  input_arg {
+    name: "min_x"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_x"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_y"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_y"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "z"
+    type_attr: "Toutput"
+  }
+  output_arg {
+    name: "min_z"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_z"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T1"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "T2"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "Toutput"
+    type: "type"
+    default_value {
+      type: DT_QINT32
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  is_commutative: true
+}
+op {
+  name: "QuantizedAdd"
+  input_arg {
+    name: "x"
+    type_attr: "T1"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T2"
+  }
+  input_arg {
+    name: "min_x"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_x"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_y"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_y"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "z"
+    type_attr: "Toutput"
+  }
+  output_arg {
+    name: "min_z"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_z"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T1"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "T2"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "Toutput"
+    type: "type"
+    default_value {
+      type: DT_QINT32
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  is_commutative: true
+}
+op {
+  name: "QuantizedAvgPool"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "min_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_input"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "min_output"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_output"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedAvgPool"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "min_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_input"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "min_output"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_output"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedBatchNormWithGlobalNormalization"
+  input_arg {
+    name: "t"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "t_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "t_max"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "m"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "m_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "m_max"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "v"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "v_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "v_max"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "beta"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "beta_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "beta_max"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "gamma"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "gamma_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "gamma_max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "result"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "result_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "result_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "variance_epsilon"
+    type: "float"
+  }
+  attr {
+    name: "scale_after_normalization"
+    type: "bool"
+  }
+}
+op {
+  name: "QuantizedBatchNormWithGlobalNormalization"
+  input_arg {
+    name: "t"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "t_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "t_max"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "m"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "m_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "m_max"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "v"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "v_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "v_max"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "beta"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "beta_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "beta_max"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "gamma"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "gamma_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "gamma_max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "result"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "result_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "result_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "variance_epsilon"
+    type: "float"
+  }
+  attr {
+    name: "scale_after_normalization"
+    type: "bool"
+  }
+}
+op {
+  name: "QuantizedBiasAdd"
+  input_arg {
+    name: "input"
+    type_attr: "T1"
+  }
+  input_arg {
+    name: "bias"
+    type_attr: "T2"
+  }
+  input_arg {
+    name: "min_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_bias"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_bias"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "min_out"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_out"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T1"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "T2"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedBiasAdd"
+  input_arg {
+    name: "input"
+    type_attr: "T1"
+  }
+  input_arg {
+    name: "bias"
+    type_attr: "T2"
+  }
+  input_arg {
+    name: "min_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_bias"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_bias"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "min_out"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_out"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T1"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "T2"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedConcat"
+  input_arg {
+    name: "concat_dim"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "values"
+    type_attr: "T"
+    number_attr: "N"
+  }
+  input_arg {
+    name: "input_mins"
+    type: DT_FLOAT
+    number_attr: "N"
+  }
+  input_arg {
+    name: "input_maxes"
+    type: DT_FLOAT
+    number_attr: "N"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 2
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "QuantizedConv2D"
+  input_arg {
+    name: "input"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "filter"
+    type_attr: "Tfilter"
+  }
+  input_arg {
+    name: "min_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_filter"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_filter"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "min_output"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_output"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "Tfilter"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_QINT32
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedConv2D"
+  input_arg {
+    name: "input"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "filter"
+    type_attr: "Tfilter"
+  }
+  input_arg {
+    name: "min_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_filter"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_filter"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "min_output"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_output"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "Tfilter"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_QINT32
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedConv2D"
+  input_arg {
+    name: "input"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "filter"
+    type_attr: "Tfilter"
+  }
+  input_arg {
+    name: "min_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_filter"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_filter"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "min_output"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_output"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "Tfilter"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_QINT32
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedInstanceNorm"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "x_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "x_max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "y_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "output_range_given"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "given_y_min"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "given_y_max"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "variance_epsilon"
+    type: "float"
+    default_value {
+      f: 1e-05
+    }
+  }
+  attr {
+    name: "min_separation"
+    type: "float"
+    default_value {
+      f: 0.001
+    }
+  }
+}
+op {
+  name: "QuantizedInstanceNorm"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "x_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "x_max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "y_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "output_range_given"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "given_y_min"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "given_y_max"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "variance_epsilon"
+    type: "float"
+    default_value {
+      f: 1e-05
+    }
+  }
+  attr {
+    name: "min_separation"
+    type: "float"
+    default_value {
+      f: 0.001
+    }
+  }
+}
+op {
+  name: "QuantizedMatMul"
+  input_arg {
+    name: "a"
+    type_attr: "T1"
+  }
+  input_arg {
+    name: "b"
+    type_attr: "T2"
+  }
+  input_arg {
+    name: "min_a"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_a"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_b"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_b"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "out"
+    type_attr: "Toutput"
+  }
+  output_arg {
+    name: "min_out"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_out"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T1"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "T2"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "Toutput"
+    type: "type"
+    default_value {
+      type: DT_QINT32
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "transpose_a"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "transpose_b"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "Tactivation"
+    type: "type"
+    default_value {
+      type: DT_QUINT8
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedMatMul"
+  input_arg {
+    name: "a"
+    type_attr: "T1"
+  }
+  input_arg {
+    name: "b"
+    type_attr: "T2"
+  }
+  input_arg {
+    name: "min_a"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_a"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_b"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_b"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "out"
+    type_attr: "Toutput"
+  }
+  output_arg {
+    name: "min_out"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_out"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T1"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "T2"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "Toutput"
+    type: "type"
+    default_value {
+      type: DT_QINT32
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "transpose_a"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "transpose_b"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "Tactivation"
+    type: "type"
+    default_value {
+      type: DT_QUINT8
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedMaxPool"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "min_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_input"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "min_output"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_output"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedMaxPool"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "min_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_input"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "min_output"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_output"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "ksize"
+    type: "list(int)"
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedMul"
+  input_arg {
+    name: "x"
+    type_attr: "T1"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T2"
+  }
+  input_arg {
+    name: "min_x"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_x"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_y"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_y"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "z"
+    type_attr: "Toutput"
+  }
+  output_arg {
+    name: "min_z"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_z"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T1"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "T2"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "Toutput"
+    type: "type"
+    default_value {
+      type: DT_QINT32
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  is_commutative: true
+}
+op {
+  name: "QuantizedMul"
+  input_arg {
+    name: "x"
+    type_attr: "T1"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T2"
+  }
+  input_arg {
+    name: "min_x"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_x"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_y"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_y"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "z"
+    type_attr: "Toutput"
+  }
+  output_arg {
+    name: "min_z"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_z"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T1"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "T2"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "Toutput"
+    type: "type"
+    default_value {
+      type: DT_QINT32
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  is_commutative: true
+}
+op {
+  name: "QuantizedRelu"
+  input_arg {
+    name: "features"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "min_features"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_features"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "activations"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "min_activations"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_activations"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_QUINT8
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedRelu"
+  input_arg {
+    name: "features"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "min_features"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_features"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "activations"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "min_activations"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_activations"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_QUINT8
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedRelu6"
+  input_arg {
+    name: "features"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "min_features"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_features"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "activations"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "min_activations"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_activations"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_QUINT8
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedRelu6"
+  input_arg {
+    name: "features"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "min_features"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_features"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "activations"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "min_activations"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_activations"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_QUINT8
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedReluX"
+  input_arg {
+    name: "features"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "max_value"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_features"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_features"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "activations"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "min_activations"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_activations"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_QUINT8
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedReluX"
+  input_arg {
+    name: "features"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "max_value"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_features"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_features"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "activations"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "min_activations"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_activations"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_QUINT8
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedReshape"
+  input_arg {
+    name: "tensor"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "shape"
+    type_attr: "Tshape"
+  }
+  input_arg {
+    name: "input_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "input_max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "Tshape"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "QuantizedResizeBilinear"
+  input_arg {
+    name: "images"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "resized_images"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "out_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "out_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    name: "align_corners"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
+op {
+  name: "QueueClose"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  attr {
+    name: "cancel_pending_enqueues"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
+op {
+  name: "QueueCloseV2"
+  input_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "cancel_pending_enqueues"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "QueueDequeue"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  output_arg {
+    name: "components"
+    type_list_attr: "component_types"
+  }
+  attr {
+    name: "component_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "timeout_ms"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+}
+op {
+  name: "QueueDequeueMany"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "n"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "components"
+    type_list_attr: "component_types"
+  }
+  attr {
+    name: "component_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "timeout_ms"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+}
+op {
+  name: "QueueDequeueManyV2"
+  input_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "n"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "components"
+    type_list_attr: "component_types"
+  }
+  attr {
+    name: "component_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "timeout_ms"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "QueueDequeueUpTo"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "n"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "components"
+    type_list_attr: "component_types"
+  }
+  attr {
+    name: "component_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "timeout_ms"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+}
+op {
+  name: "QueueDequeueUpToV2"
+  input_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "n"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "components"
+    type_list_attr: "component_types"
+  }
+  attr {
+    name: "component_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "timeout_ms"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "QueueDequeueV2"
+  input_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "components"
+    type_list_attr: "component_types"
+  }
+  attr {
+    name: "component_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "timeout_ms"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "QueueEnqueue"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "components"
+    type_list_attr: "Tcomponents"
+  }
+  attr {
+    name: "Tcomponents"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "timeout_ms"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+}
+op {
+  name: "QueueEnqueueMany"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "components"
+    type_list_attr: "Tcomponents"
+  }
+  attr {
+    name: "Tcomponents"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "timeout_ms"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+}
+op {
+  name: "QueueEnqueueManyV2"
+  input_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "components"
+    type_list_attr: "Tcomponents"
+  }
+  attr {
+    name: "Tcomponents"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "timeout_ms"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "QueueEnqueueV2"
+  input_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "components"
+    type_list_attr: "Tcomponents"
+  }
+  attr {
+    name: "Tcomponents"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "timeout_ms"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "QueueIsClosed"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  output_arg {
+    name: "is_closed"
+    type: DT_BOOL
+  }
+}
+op {
+  name: "QueueIsClosedV2"
+  input_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "is_closed"
+    type: DT_BOOL
+  }
+  is_stateful: true
+}
+op {
+  name: "QueueSize"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  output_arg {
+    name: "size"
+    type: DT_INT32
+  }
+}
+op {
+  name: "QueueSizeV2"
+  input_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  is_stateful: true
+}
+op {
+  name: "RFFT"
+  input_arg {
+    name: "input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "fft_length"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type: DT_COMPLEX64
+  }
+}
+op {
+  name: "RFFT2D"
+  input_arg {
+    name: "input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "fft_length"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type: DT_COMPLEX64
+  }
+}
+op {
+  name: "RFFT3D"
+  input_arg {
+    name: "input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "fft_length"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type: DT_COMPLEX64
+  }
+}
+op {
+  name: "RGBToHSV"
+  input_arg {
+    name: "images"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "RGBToHSV"
+  input_arg {
+    name: "images"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "RandomCrop"
+  input_arg {
+    name: "image"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "size"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  deprecation {
+    version: 8
+  }
+  is_stateful: true
+}
+op {
+  name: "RandomDataset"
+  input_arg {
+    name: "seed"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "seed2"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "RandomGamma"
+  input_arg {
+    name: "shape"
+    type_attr: "S"
+  }
+  input_arg {
+    name: "alpha"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "S"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "RandomPoisson"
+  input_arg {
+    name: "shape"
+    type_attr: "S"
+  }
+  input_arg {
+    name: "rate"
+    type_attr: "dtype"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "S"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "RandomPoisson"
+  input_arg {
+    name: "shape"
+    type_attr: "S"
+  }
+  input_arg {
+    name: "rate"
+    type_attr: "dtype"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "S"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  deprecation {
+    version: 25
+  }
+  is_stateful: true
+}
+op {
+  name: "RandomPoissonV2"
+  input_arg {
+    name: "shape"
+    type_attr: "S"
+  }
+  input_arg {
+    name: "rate"
+    type_attr: "R"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "S"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "R"
+    type: "type"
+    default_value {
+      type: DT_DOUBLE
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "RandomShuffle"
+  input_arg {
+    name: "value"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  is_stateful: true
+}
+op {
+  name: "RandomShuffleQueue"
+  output_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  attr {
+    name: "component_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  attr {
+    name: "min_after_dequeue"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "RandomShuffleQueueV2"
+  output_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "component_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "capacity"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  attr {
+    name: "min_after_dequeue"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "RandomStandardNormal"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "RandomStandardNormal"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "RandomUniform"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "RandomUniform"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "RandomUniformInt"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "minval"
+    type_attr: "Tout"
+  }
+  input_arg {
+    name: "maxval"
+    type_attr: "Tout"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "Tout"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "Tout"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "Range"
+  input_arg {
+    name: "start"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "limit"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "delta"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "Tidx"
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Range"
+  input_arg {
+    name: "start"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "limit"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "delta"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "Tidx"
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "RangeDataset"
+  input_arg {
+    name: "start"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "stop"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "step"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "Rank"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type: DT_INT32
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "ReadFile"
+  input_arg {
+    name: "filename"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "contents"
+    type: DT_STRING
+  }
+}
+op {
+  name: "ReadVariableOp"
+  input_arg {
+    name: "resource"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "value"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+  }
+  is_stateful: true
+}
+op {
+  name: "ReaderNumRecordsProduced"
+  input_arg {
+    name: "reader_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  output_arg {
+    name: "records_produced"
+    type: DT_INT64
+  }
+}
+op {
+  name: "ReaderNumRecordsProducedV2"
+  input_arg {
+    name: "reader_handle"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "records_produced"
+    type: DT_INT64
+  }
+  is_stateful: true
+}
+op {
+  name: "ReaderNumWorkUnitsCompleted"
+  input_arg {
+    name: "reader_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  output_arg {
+    name: "units_completed"
+    type: DT_INT64
+  }
+}
+op {
+  name: "ReaderNumWorkUnitsCompletedV2"
+  input_arg {
+    name: "reader_handle"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "units_completed"
+    type: DT_INT64
+  }
+  is_stateful: true
+}
+op {
+  name: "ReaderRead"
+  input_arg {
+    name: "reader_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "queue_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  output_arg {
+    name: "key"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "value"
+    type: DT_STRING
+  }
+}
+op {
+  name: "ReaderReadUpTo"
+  input_arg {
+    name: "reader_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "queue_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "num_records"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "keys"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "values"
+    type: DT_STRING
+  }
+}
+op {
+  name: "ReaderReadUpToV2"
+  input_arg {
+    name: "reader_handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "queue_handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "num_records"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "keys"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "values"
+    type: DT_STRING
+  }
+  is_stateful: true
+}
+op {
+  name: "ReaderReadV2"
+  input_arg {
+    name: "reader_handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "queue_handle"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "key"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "value"
+    type: DT_STRING
+  }
+  is_stateful: true
+}
+op {
+  name: "ReaderReset"
+  input_arg {
+    name: "reader_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+}
+op {
+  name: "ReaderResetV2"
+  input_arg {
+    name: "reader_handle"
+    type: DT_RESOURCE
+  }
+  is_stateful: true
+}
+op {
+  name: "ReaderRestoreState"
+  input_arg {
+    name: "reader_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "state"
+    type: DT_STRING
+  }
+}
+op {
+  name: "ReaderRestoreStateV2"
+  input_arg {
+    name: "reader_handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "state"
+    type: DT_STRING
+  }
+  is_stateful: true
+}
+op {
+  name: "ReaderSerializeState"
+  input_arg {
+    name: "reader_handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  output_arg {
+    name: "state"
+    type: DT_STRING
+  }
+}
+op {
+  name: "ReaderSerializeStateV2"
+  input_arg {
+    name: "reader_handle"
+    type: DT_RESOURCE
+  }
+  output_arg {
+    name: "state"
+    type: DT_STRING
+  }
+  is_stateful: true
+}
+op {
+  name: "Real"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "Tout"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_COMPLEX64
+    }
+    allowed_values {
+      list {
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  attr {
+    name: "Tout"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "RealDiv"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "RealDiv"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "Reciprocal"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "Reciprocal"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "ReciprocalGrad"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "ReciprocalGrad"
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "dy"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "ReciprocalGrad"
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "dy"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "RecordInput"
+  output_arg {
+    name: "records"
+    type: DT_STRING
+  }
+  attr {
+    name: "file_pattern"
+    type: "string"
+  }
+  attr {
+    name: "file_random_seed"
+    type: "int"
+    default_value {
+      i: 301
+    }
+  }
+  attr {
+    name: "file_shuffle_shift_ratio"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "file_buffer_size"
+    type: "int"
+    default_value {
+      i: 10000
+    }
+  }
+  attr {
+    name: "file_parallelism"
+    type: "int"
+    default_value {
+      i: 16
+    }
+  }
+  attr {
+    name: "batch_size"
+    type: "int"
+    default_value {
+      i: 32
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "RecordInput"
+  output_arg {
+    name: "records"
+    type: DT_STRING
+  }
+  attr {
+    name: "file_pattern"
+    type: "string"
+  }
+  attr {
+    name: "file_random_seed"
+    type: "int"
+    default_value {
+      i: 301
+    }
+  }
+  attr {
+    name: "file_shuffle_shift_ratio"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "file_buffer_size"
+    type: "int"
+    default_value {
+      i: 10000
+    }
+  }
+  attr {
+    name: "file_parallelism"
+    type: "int"
+    default_value {
+      i: 16
+    }
+  }
+  attr {
+    name: "batch_size"
+    type: "int"
+    default_value {
+      i: 32
+    }
+  }
+  attr {
+    name: "compression_type"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ReduceJoin"
+  input_arg {
+    name: "inputs"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "reduction_indices"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type: DT_STRING
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "separator"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+}
+op {
+  name: "RefEnter"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+    is_ref: true
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+    is_ref: true
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "frame_name"
+    type: "string"
+  }
+  attr {
+    name: "is_constant"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "parallel_iterations"
+    type: "int"
+    default_value {
+      i: 10
+    }
+  }
+}
+op {
+  name: "RefExit"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+    is_ref: true
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+    is_ref: true
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "RefIdentity"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+    is_ref: true
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+    is_ref: true
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  allows_uninitialized_input: true
+}
+op {
+  name: "RefMerge"
+  input_arg {
+    name: "inputs"
+    type_attr: "T"
+    number_attr: "N"
+    is_ref: true
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+    is_ref: true
+  }
+  output_arg {
+    name: "value_index"
+    type: DT_INT32
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "RefNextIteration"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+    is_ref: true
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+    is_ref: true
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "RefSelect"
+  input_arg {
+    name: "index"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "inputs"
+    type_attr: "T"
+    number_attr: "N"
+    is_ref: true
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+    is_ref: true
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "RefSwitch"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "pred"
+    type: DT_BOOL
+  }
+  output_arg {
+    name: "output_false"
+    type_attr: "T"
+    is_ref: true
+  }
+  output_arg {
+    name: "output_true"
+    type_attr: "T"
+    is_ref: true
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  allows_uninitialized_input: true
+}
+op {
+  name: "Relu"
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "activations"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "Relu"
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "activations"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "Relu"
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "activations"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "Relu"
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "activations"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "Relu6"
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "activations"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "Relu6"
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "activations"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "Relu6"
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "activations"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "Relu6"
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "activations"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "Relu6Grad"
+  input_arg {
+    name: "gradients"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "backprops"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "Relu6Grad"
+  input_arg {
+    name: "gradients"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "backprops"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "Relu6Grad"
+  input_arg {
+    name: "gradients"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "backprops"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "Relu6Grad"
+  input_arg {
+    name: "gradients"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "backprops"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "ReluGrad"
+  input_arg {
+    name: "gradients"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "backprops"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+}
+op {
+  name: "ReluGrad"
+  input_arg {
+    name: "gradients"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "backprops"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "ReluGrad"
+  input_arg {
+    name: "gradients"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "backprops"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "ReluGrad"
+  input_arg {
+    name: "gradients"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "backprops"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "RemoteCall"
+  input_arg {
+    name: "target"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "args"
+    type_list_attr: "Tin"
+  }
+  output_arg {
+    name: "output"
+    type_list_attr: "Tout"
+  }
+  attr {
+    name: "Tin"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "Tout"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+}
+op {
+  name: "RemoteFusedGraphExecute"
+  input_arg {
+    name: "inputs"
+    type_list_attr: "Tinputs"
+  }
+  output_arg {
+    name: "outputs"
+    type_list_attr: "Toutputs"
+  }
+  attr {
+    name: "Tinputs"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "Toutputs"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "serialized_remote_fused_graph_execute_info"
+    type: "string"
+  }
+}
+op {
+  name: "RepeatDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "count"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "RepeatDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "count"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "RequantizationRange"
+  input_arg {
+    name: "input"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "input_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "input_max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+}
+op {
+  name: "RequantizationRange"
+  input_arg {
+    name: "input"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "input_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "input_max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+}
+op {
+  name: "Requantize"
+  input_arg {
+    name: "input"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "input_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "input_max"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "requested_output_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "requested_output_max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "output_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_QINT32
+      }
+    }
+  }
+}
+op {
+  name: "Requantize"
+  input_arg {
+    name: "input"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "input_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "input_max"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "requested_output_min"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "requested_output_max"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "output_min"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_max"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+}
+op {
+  name: "Reshape"
+  input_arg {
+    name: "tensor"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "shape"
+    type_attr: "Tshape"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "Tshape"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "ResizeArea"
+  input_arg {
+    name: "images"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "resized_images"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "align_corners"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
+op {
+  name: "ResizeArea"
+  input_arg {
+    name: "images"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "resized_images"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT8
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_UINT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "align_corners"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
+op {
+  name: "ResizeBicubic"
+  input_arg {
+    name: "images"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "resized_images"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "align_corners"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
+op {
+  name: "ResizeBicubic"
+  input_arg {
+    name: "images"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "resized_images"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT8
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_UINT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "align_corners"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
+op {
+  name: "ResizeBicubicGrad"
+  input_arg {
+    name: "grads"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "original_image"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "align_corners"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
+op {
+  name: "ResizeBilinear"
+  input_arg {
+    name: "images"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "resized_images"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "align_corners"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
+op {
+  name: "ResizeBilinear"
+  input_arg {
+    name: "images"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "resized_images"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT8
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_UINT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "align_corners"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
+op {
+  name: "ResizeBilinearGrad"
+  input_arg {
+    name: "grads"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "original_image"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_HALF
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "align_corners"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
+op {
+  name: "ResizeNearestNeighbor"
+  input_arg {
+    name: "images"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "resized_images"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "align_corners"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
+op {
+  name: "ResizeNearestNeighbor"
+  input_arg {
+    name: "images"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "resized_images"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT8
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_UINT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "align_corners"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
+op {
+  name: "ResizeNearestNeighborGrad"
+  input_arg {
+    name: "grads"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT32
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "align_corners"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
+op {
+  name: "ResourceApplyAdadelta"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum_update"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyAdadelta"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum_update"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyAdadelta"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum_update"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyAdadelta"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum_update"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyAdagrad"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyAdagrad"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyAdagrad"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyAdagrad"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyAdagradDA"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "gradient_accumulator"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "gradient_squared_accumulator"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "global_step"
+    type: DT_INT64
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyAdagradDA"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "gradient_accumulator"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "gradient_squared_accumulator"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "global_step"
+    type: DT_INT64
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyAdagradDA"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "gradient_accumulator"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "gradient_squared_accumulator"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "global_step"
+    type: DT_INT64
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyAdagradDA"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "gradient_accumulator"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "gradient_squared_accumulator"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "global_step"
+    type: DT_INT64
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyAdam"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "m"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "v"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "beta1_power"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta2_power"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyAdam"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "m"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "v"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "beta1_power"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta2_power"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "use_nesterov"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyAdam"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "m"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "v"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "beta1_power"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta2_power"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "use_nesterov"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyAdam"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "m"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "v"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "beta1_power"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta2_power"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "use_nesterov"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyAdam"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "m"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "v"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "beta1_power"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta2_power"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "use_nesterov"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyAddSign"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "m"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "alpha"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sign_decay"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyAddSign"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "m"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "alpha"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sign_decay"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyAddSign"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "m"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "alpha"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sign_decay"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyCenteredRMSProp"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "mg"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "ms"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "mom"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyCenteredRMSProp"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "mg"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "ms"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "mom"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyCenteredRMSProp"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "mg"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "ms"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "mom"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyCenteredRMSProp"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "mg"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "ms"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "mom"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyFtrl"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "linear"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyFtrl"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "linear"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyFtrl"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "linear"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyFtrl"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "linear"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyFtrlV2"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "linear"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2_shrinkage"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyFtrlV2"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "linear"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2_shrinkage"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyFtrlV2"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "linear"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2_shrinkage"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyFtrlV2"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "linear"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2_shrinkage"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyGradientDescent"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "alpha"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "delta"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyGradientDescent"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "alpha"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "delta"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyGradientDescent"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "alpha"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "delta"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyGradientDescent"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "alpha"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "delta"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyMomentum"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "use_nesterov"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyMomentum"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "use_nesterov"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyMomentum"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "use_nesterov"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyMomentum"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "use_nesterov"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyPowerSign"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "m"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "logbase"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sign_decay"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyPowerSign"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "m"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "logbase"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sign_decay"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyPowerSign"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "m"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "logbase"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sign_decay"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "beta"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyProximalAdagrad"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyProximalAdagrad"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyProximalAdagrad"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyProximalAdagrad"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyProximalGradientDescent"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "alpha"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "delta"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyProximalGradientDescent"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "alpha"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "delta"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyProximalGradientDescent"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "alpha"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "delta"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyProximalGradientDescent"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "alpha"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "delta"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "ResourceApplyRMSProp"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
+  }
   input_arg {
-    name: "input"
-    type_attr: "dtype"
+    name: "ms"
+    type: DT_RESOURCE
   }
-  output_arg {
-    name: "output"
-    type_attr: "dtype"
+  input_arg {
+    name: "mom"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "dtype"
-    type: "type"
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  attr {
-    name: "shape"
-    type: "shape"
+  input_arg {
+    name: "rho"
+    type_attr: "T"
   }
-}
-op {
-  name: "Polygamma"
   input_arg {
-    name: "a"
+    name: "momentum"
     type_attr: "T"
   }
   input_arg {
-    name: "x"
+    name: "epsilon"
     type_attr: "T"
   }
-  output_arg {
-    name: "z"
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
   attr {
@@ -21687,74 +41896,197 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "PopulationCount"
+  name: "ResourceApplyRMSProp"
   input_arg {
-    name: "x"
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "ms"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "mom"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
-  output_arg {
-    name: "y"
-    type: DT_UINT8
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
         type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "PopulationCount"
+  name: "ResourceApplyRMSProp"
   input_arg {
-    name: "x"
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "ms"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "mom"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
-  output_arg {
-    name: "y"
-    type: DT_UINT8
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
         type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "Pow"
+  name: "ResourceApplyRMSProp"
   input_arg {
-    name: "x"
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "ms"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "mom"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "y"
+    name: "rho"
     type_attr: "T"
   }
-  output_arg {
-    name: "z"
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
   attr {
@@ -21762,297 +42094,351 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
         type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "PrefetchDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
+  name: "ResourceCountUpTo"
   input_arg {
-    name: "buffer_size"
-    type: DT_INT64
+    name: "resource"
+    type: DT_RESOURCE
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "limit"
+    type: "int"
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
   is_stateful: true
 }
 op {
-  name: "PrefetchDataset"
+  name: "ResourceGather"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "resource"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "buffer_size"
-    type: DT_INT64
+    name: "indices"
+    type_attr: "Tindices"
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    name: "output"
+    type_attr: "dtype"
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "validate_indices"
+    type: "bool"
+    default_value {
+      b: true
+    }
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "dtype"
+    type: "type"
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
+  is_stateful: true
 }
 op {
-  name: "PreventGradient"
+  name: "ResourceScatterAdd"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "resource"
+    type: DT_RESOURCE
   }
-  output_arg {
-    name: "output"
-    type_attr: "T"
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "updates"
+    type_attr: "dtype"
   }
   attr {
-    name: "T"
+    name: "dtype"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
   }
   attr {
-    name: "message"
-    type: "string"
-    default_value {
-      s: ""
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
+  is_stateful: true
 }
 op {
-  name: "Print"
+  name: "ResourceScatterAdd"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "resource"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "data"
-    type_list_attr: "U"
+    name: "indices"
+    type_attr: "Tindices"
   }
-  output_arg {
-    name: "output"
-    type_attr: "T"
+  input_arg {
+    name: "updates"
+    type_attr: "dtype"
   }
   attr {
-    name: "T"
+    name: "dtype"
     type: "type"
-  }
-  attr {
-    name: "U"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "message"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "first_n"
-    type: "int"
-    default_value {
-      i: -1
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
     }
   }
   attr {
-    name: "summarize"
-    type: "int"
-    default_value {
-      i: 3
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   is_stateful: true
 }
 op {
-  name: "Print"
+  name: "ResourceScatterAdd"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "resource"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "data"
-    type_list_attr: "U"
+    name: "indices"
+    type_attr: "Tindices"
   }
-  output_arg {
-    name: "output"
-    type_attr: "T"
+  input_arg {
+    name: "updates"
+    type_attr: "dtype"
   }
   attr {
-    name: "T"
+    name: "dtype"
     type: "type"
-  }
-  attr {
-    name: "U"
-    type: "list(type)"
-    has_minimum: true
-  }
-  attr {
-    name: "message"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "first_n"
-    type: "int"
-    default_value {
-      i: -1
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
     }
   }
   attr {
-    name: "summarize"
-    type: "int"
-    default_value {
-      i: 3
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   is_stateful: true
 }
 op {
-  name: "PriorityQueue"
-  output_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
+  name: "ResourceScatterAdd"
+  input_arg {
+    name: "resource"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "updates"
+    type_attr: "dtype"
   }
   attr {
-    name: "component_types"
-    type: "list(type)"
-    default_value {
+    name: "dtype"
+    type: "type"
+    allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
-    has_minimum: true
-  }
-  attr {
-    name: "shapes"
-    type: "list(shape)"
-    has_minimum: true
-  }
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: -1
-    }
-  }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
   }
   attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   is_stateful: true
 }
 op {
-  name: "PriorityQueueV2"
-  output_arg {
-    name: "handle"
+  name: "ResourceScatterNdUpdate"
+  input_arg {
+    name: "ref"
     type: DT_RESOURCE
   }
-  attr {
-    name: "component_types"
-    type: "list(type)"
-    default_value {
-      list {
-      }
-    }
-    has_minimum: true
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
-  attr {
-    name: "shapes"
-    type: "list(shape)"
-    has_minimum: true
+  input_arg {
+    name: "updates"
+    type_attr: "T"
   }
   attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: -1
-    }
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   attr {
-    name: "shared_name"
-    type: "string"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      s: ""
+      b: true
     }
   }
   is_stateful: true
 }
 op {
-  name: "Prod"
+  name: "ResourceScatterUpdate"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "resource"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "reduction_indices"
-    type_attr: "Tidx"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
-  attr {
-    name: "keep_dims"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  input_arg {
+    name: "updates"
+    type_attr: "dtype"
   }
   attr {
-    name: "T"
+    name: "dtype"
     type: "type"
     allowed_values {
       list {
@@ -22070,15 +42456,14 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "Tidx"
+    name: "Tindices"
     type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
         type: DT_INT32
@@ -22086,30 +42471,24 @@ op {
       }
     }
   }
+  is_stateful: true
 }
 op {
-  name: "Prod"
+  name: "ResourceScatterUpdate"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "resource"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "reduction_indices"
-    type_attr: "Tidx"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
-  attr {
-    name: "keep_dims"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  input_arg {
+    name: "updates"
+    type_attr: "dtype"
   }
   attr {
-    name: "T"
+    name: "dtype"
     type: "type"
     allowed_values {
       list {
@@ -22129,15 +42508,13 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "Tidx"
+    name: "Tindices"
     type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
         type: DT_INT32
@@ -22145,136 +42522,167 @@ op {
       }
     }
   }
+  is_stateful: true
 }
 op {
-  name: "PyFunc"
+  name: "ResourceScatterUpdate"
   input_arg {
-    name: "input"
-    type_list_attr: "Tin"
+    name: "resource"
+    type: DT_RESOURCE
   }
-  output_arg {
-    name: "output"
-    type_list_attr: "Tout"
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
-  attr {
-    name: "token"
-    type: "string"
+  input_arg {
+    name: "updates"
+    type_attr: "dtype"
   }
   attr {
-    name: "Tin"
-    type: "list(type)"
-    has_minimum: true
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
   attr {
-    name: "Tout"
-    type: "list(type)"
-    has_minimum: true
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
   is_stateful: true
 }
 op {
-  name: "PyFuncStateless"
+  name: "ResourceSparseApplyAdadelta"
   input_arg {
-    name: "input"
-    type_list_attr: "Tin"
-  }
-  output_arg {
-    name: "output"
-    type_list_attr: "Tout"
+    name: "var"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "token"
-    type: "string"
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "Tin"
-    type: "list(type)"
-    has_minimum: true
+  input_arg {
+    name: "accum_update"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "Tout"
-    type: "list(type)"
-    has_minimum: true
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-}
-op {
-  name: "Qr"
   input_arg {
-    name: "input"
+    name: "rho"
     type_attr: "T"
   }
-  output_arg {
-    name: "q"
+  input_arg {
+    name: "epsilon"
     type_attr: "T"
   }
-  output_arg {
-    name: "r"
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
-  attr {
-    name: "full_matrices"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
         type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
-}
-op {
-  name: "QuantizeAndDequantize"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "signed_input"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
   attr {
-    name: "num_bits"
-    type: "int"
-    default_value {
-      i: 8
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   attr {
-    name: "range_given"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
     }
   }
-  attr {
-    name: "input_min"
-    type: "float"
-    default_value {
-      f: 0
-    }
+  is_stateful: true
+}
+op {
+  name: "ResourceSparseApplyAdadelta"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "input_max"
-    type: "float"
-    default_value {
-      f: 0
-    }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum_update"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
   attr {
     name: "T"
@@ -22283,54 +42691,75 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-}
-op {
-  name: "QuantizeAndDequantize"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "signed_input"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
   attr {
-    name: "num_bits"
-    type: "int"
-    default_value {
-      i: 8
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   attr {
-    name: "range_given"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
     }
   }
-  attr {
-    name: "input_min"
-    type: "float"
-    default_value {
-      f: 0
-    }
+  is_stateful: true
+}
+op {
+  name: "ResourceSparseApplyAdadelta"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "input_max"
-    type: "float"
-    default_value {
-      f: 0
-    }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum_update"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
   attr {
     name: "T"
@@ -22339,57 +42768,142 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
-  deprecation {
-    version: 21
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
+  is_stateful: true
 }
 op {
-  name: "QuantizeAndDequantize"
+  name: "ResourceSparseApplyAdadelta"
   input_arg {
-    name: "input"
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum_update"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
   attr {
-    name: "signed_input"
-    type: "bool"
-    default_value {
-      b: true
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
     }
   }
   attr {
-    name: "num_bits"
-    type: "int"
-    default_value {
-      i: 8
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   attr {
-    name: "range_given"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
     }
   }
-  attr {
-    name: "input_min"
-    type: "float"
-    default_value {
-      f: 0
-    }
+  is_stateful: true
+}
+op {
+  name: "ResourceSparseApplyAdagrad"
+  input_arg {
+    name: "var"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "input_max"
-    type: "float"
-    default_value {
-      f: 0
-    }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
   attr {
     name: "T"
@@ -22398,98 +42912,126 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
-  deprecation {
-    version: 22
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
+  is_stateful: true
 }
 op {
-  name: "QuantizeAndDequantizeV2"
+  name: "ResourceSparseApplyAdagrad"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "input_min"
-    type_attr: "T"
+    name: "accum"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "input_max"
+    name: "lr"
     type_attr: "T"
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
   attr {
-    name: "signed_input"
-    type: "bool"
-    default_value {
-      b: true
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
     }
   }
   attr {
-    name: "num_bits"
-    type: "int"
-    default_value {
-      i: 8
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   attr {
-    name: "range_given"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
     }
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
-  }
+  is_stateful: true
 }
 op {
-  name: "QuantizeAndDequantizeV3"
+  name: "ResourceSparseApplyAdagrad"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "input_min"
-    type_attr: "T"
+    name: "accum"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "input_max"
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "num_bits"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "output"
+    name: "grad"
     type_attr: "T"
   }
-  attr {
-    name: "signed_input"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
-  attr {
-    name: "range_given"
-    type: "bool"
-    default_value {
-      b: true
-    }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
   attr {
     name: "T"
@@ -22498,1241 +43040,1421 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "QuantizeDownAndShrinkRange"
+  name: "ResourceSparseApplyAdagrad"
   input_arg {
-    name: "input"
-    type_attr: "Tinput"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "input_min"
-    type: DT_FLOAT
+    name: "accum"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "input_max"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "output"
-    type_attr: "out_type"
+    name: "lr"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output_min"
-    type: DT_FLOAT
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output_max"
-    type: DT_FLOAT
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
   attr {
-    name: "Tinput"
+    name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "out_type"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "QuantizeV2"
+  name: "ResourceSparseApplyAdagradDA"
   input_arg {
-    name: "input"
-    type: DT_FLOAT
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "min_range"
-    type: DT_FLOAT
+    name: "gradient_accumulator"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "max_range"
-    type: DT_FLOAT
+    name: "gradient_squared_accumulator"
+    type: DT_RESOURCE
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
-  output_arg {
-    name: "output_min"
-    type: DT_FLOAT
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
-  output_arg {
-    name: "output_max"
-    type: DT_FLOAT
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "global_step"
+    type: DT_INT64
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "mode"
-    type: "string"
-    default_value {
-      s: "MIN_COMBINED"
-    }
+    name: "Tindices"
+    type: "type"
     allowed_values {
       list {
-        s: "MIN_COMBINED"
-        s: "MIN_FIRST"
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "QuantizeV2"
+  name: "ResourceSparseApplyAdagradDA"
   input_arg {
-    name: "input"
-    type: DT_FLOAT
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "min_range"
-    type: DT_FLOAT
+    name: "gradient_accumulator"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "max_range"
-    type: DT_FLOAT
+    name: "gradient_squared_accumulator"
+    type: DT_RESOURCE
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
-  output_arg {
-    name: "output_min"
-    type: DT_FLOAT
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
-  output_arg {
-    name: "output_max"
-    type: DT_FLOAT
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "global_step"
+    type: DT_INT64
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "mode"
-    type: "string"
-    default_value {
-      s: "MIN_COMBINED"
-    }
+    name: "Tindices"
+    type: "type"
     allowed_values {
       list {
-        s: "MIN_COMBINED"
-        s: "MIN_FIRST"
-        s: "SCALED"
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "QuantizeV2"
+  name: "ResourceSparseApplyAdagradDA"
   input_arg {
-    name: "input"
-    type: DT_FLOAT
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "min_range"
-    type: DT_FLOAT
+    name: "gradient_accumulator"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "max_range"
-    type: DT_FLOAT
+    name: "gradient_squared_accumulator"
+    type: DT_RESOURCE
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
-  output_arg {
-    name: "output_min"
-    type: DT_FLOAT
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
-  output_arg {
-    name: "output_max"
-    type: DT_FLOAT
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "global_step"
+    type: DT_INT64
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "mode"
-    type: "string"
-    default_value {
-      s: "MIN_COMBINED"
-    }
-    allowed_values {
-      list {
-        s: "MIN_COMBINED"
-        s: "MIN_FIRST"
-        s: "SCALED"
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
   attr {
-    name: "round_mode"
-    type: "string"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      s: "HALF_AWAY_FROM_ZERO"
-    }
-    allowed_values {
-      list {
-        s: "HALF_AWAY_FROM_ZERO"
-        s: "HALF_TO_EVEN"
-      }
+      b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "QuantizedAdd"
+  name: "ResourceSparseApplyAdagradDA"
   input_arg {
-    name: "x"
-    type_attr: "T1"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "y"
-    type_attr: "T2"
+    name: "gradient_accumulator"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "min_x"
-    type: DT_FLOAT
+    name: "gradient_squared_accumulator"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "max_x"
-    type: DT_FLOAT
+    name: "grad"
+    type_attr: "T"
   }
   input_arg {
-    name: "min_y"
-    type: DT_FLOAT
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "max_y"
-    type: DT_FLOAT
+    name: "lr"
+    type_attr: "T"
   }
-  output_arg {
-    name: "z"
-    type_attr: "Toutput"
+  input_arg {
+    name: "l1"
+    type_attr: "T"
   }
-  output_arg {
-    name: "min_z"
-    type: DT_FLOAT
+  input_arg {
+    name: "l2"
+    type_attr: "T"
   }
-  output_arg {
-    name: "max_z"
-    type: DT_FLOAT
+  input_arg {
+    name: "global_step"
+    type: DT_INT64
   }
   attr {
-    name: "T1"
+    name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "T2"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
   attr {
-    name: "Toutput"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      type: DT_QINT32
-    }
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
+      b: false
     }
   }
-  is_commutative: true
+  is_stateful: true
 }
 op {
-  name: "QuantizedAvgPool"
+  name: "ResourceSparseApplyCenteredRMSProp"
   input_arg {
-    name: "input"
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "mg"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "ms"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "mom"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "min_input"
-    type: DT_FLOAT
+    name: "rho"
+    type_attr: "T"
   }
   input_arg {
-    name: "max_input"
-    type: DT_FLOAT
+    name: "momentum"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "epsilon"
     type_attr: "T"
   }
-  output_arg {
-    name: "min_output"
-    type: DT_FLOAT
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
-  output_arg {
-    name: "max_output"
-    type: DT_FLOAT
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "ksize"
-    type: "list(int)"
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-  }
-  attr {
-    name: "padding"
-    type: "string"
+    name: "Tindices"
+    type: "type"
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "QuantizedBatchNormWithGlobalNormalization"
-  input_arg {
-    name: "t"
-    type_attr: "Tinput"
-  }
-  input_arg {
-    name: "t_min"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "t_max"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "m"
-    type_attr: "Tinput"
-  }
-  input_arg {
-    name: "m_min"
-    type: DT_FLOAT
-  }
+  name: "ResourceSparseApplyCenteredRMSProp"
   input_arg {
-    name: "m_max"
-    type: DT_FLOAT
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "v"
-    type_attr: "Tinput"
+    name: "mg"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "v_min"
-    type: DT_FLOAT
+    name: "ms"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "v_max"
-    type: DT_FLOAT
+    name: "mom"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "beta"
-    type_attr: "Tinput"
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "beta_min"
-    type: DT_FLOAT
+    name: "rho"
+    type_attr: "T"
   }
   input_arg {
-    name: "beta_max"
-    type: DT_FLOAT
+    name: "momentum"
+    type_attr: "T"
   }
   input_arg {
-    name: "gamma"
-    type_attr: "Tinput"
+    name: "epsilon"
+    type_attr: "T"
   }
   input_arg {
-    name: "gamma_min"
-    type: DT_FLOAT
+    name: "grad"
+    type_attr: "T"
   }
   input_arg {
-    name: "gamma_max"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "result"
-    type_attr: "out_type"
-  }
-  output_arg {
-    name: "result_min"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "result_max"
-    type: DT_FLOAT
+    name: "indices"
+    type_attr: "Tindices"
   }
   attr {
-    name: "Tinput"
+    name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "out_type"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
   attr {
-    name: "variance_epsilon"
-    type: "float"
-  }
-  attr {
-    name: "scale_after_normalization"
+    name: "use_locking"
     type: "bool"
+    default_value {
+      b: false
+    }
   }
+  is_stateful: true
 }
 op {
-  name: "QuantizedBiasAdd"
+  name: "ResourceSparseApplyCenteredRMSProp"
   input_arg {
-    name: "input"
-    type_attr: "T1"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "bias"
-    type_attr: "T2"
+    name: "mg"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "min_input"
-    type: DT_FLOAT
+    name: "ms"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "max_input"
-    type: DT_FLOAT
+    name: "mom"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "min_bias"
-    type: DT_FLOAT
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "max_bias"
-    type: DT_FLOAT
+    name: "rho"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output"
-    type_attr: "out_type"
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
   }
-  output_arg {
-    name: "min_out"
-    type: DT_FLOAT
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
   }
-  output_arg {
-    name: "max_out"
-    type: DT_FLOAT
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
   attr {
-    name: "T1"
+    name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "T2"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
   attr {
-    name: "out_type"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "QuantizedConcat"
+  name: "ResourceSparseApplyCenteredRMSProp"
   input_arg {
-    name: "concat_dim"
-    type: DT_INT32
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "values"
-    type_attr: "T"
-    number_attr: "N"
+    name: "mg"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "input_mins"
-    type: DT_FLOAT
-    number_attr: "N"
+    name: "ms"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "input_maxes"
-    type: DT_FLOAT
-    number_attr: "N"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output_min"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "output_max"
-    type: DT_FLOAT
-  }
-  attr {
-    name: "N"
-    type: "int"
-    has_minimum: true
-    minimum: 2
-  }
-  attr {
-    name: "T"
-    type: "type"
+    name: "mom"
+    type: DT_RESOURCE
   }
-}
-op {
-  name: "QuantizedConv2D"
   input_arg {
-    name: "input"
-    type_attr: "Tinput"
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "filter"
-    type_attr: "Tfilter"
+    name: "rho"
+    type_attr: "T"
   }
   input_arg {
-    name: "min_input"
-    type: DT_FLOAT
+    name: "momentum"
+    type_attr: "T"
   }
   input_arg {
-    name: "max_input"
-    type: DT_FLOAT
+    name: "epsilon"
+    type_attr: "T"
   }
   input_arg {
-    name: "min_filter"
-    type: DT_FLOAT
+    name: "grad"
+    type_attr: "T"
   }
   input_arg {
-    name: "max_filter"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "output"
-    type_attr: "out_type"
-  }
-  output_arg {
-    name: "min_output"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "max_output"
-    type: DT_FLOAT
-  }
-  attr {
-    name: "Tinput"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
-    }
-  }
-  attr {
-    name: "Tfilter"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
-    }
+    name: "indices"
+    type_attr: "Tindices"
   }
   attr {
-    name: "out_type"
+    name: "T"
     type: "type"
-    default_value {
-      type: DT_QINT32
-    }
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "strides"
-    type: "list(int)"
-  }
-  attr {
-    name: "padding"
-    type: "string"
+    name: "Tindices"
+    type: "type"
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "QuantizedInstanceNorm"
+  name: "ResourceSparseApplyFtrl"
   input_arg {
-    name: "x"
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "linear"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
   input_arg {
-    name: "x_min"
-    type: DT_FLOAT
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "x_max"
-    type: DT_FLOAT
+    name: "lr"
+    type_attr: "T"
   }
-  output_arg {
-    name: "y"
+  input_arg {
+    name: "l1"
     type_attr: "T"
   }
-  output_arg {
-    name: "y_min"
-    type: DT_FLOAT
+  input_arg {
+    name: "l2"
+    type_attr: "T"
   }
-  output_arg {
-    name: "y_max"
-    type: DT_FLOAT
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "output_range_given"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "given_y_min"
-    type: "float"
-    default_value {
-      f: 0
-    }
-  }
-  attr {
-    name: "given_y_max"
-    type: "float"
-    default_value {
-      f: 0
-    }
-  }
-  attr {
-    name: "variance_epsilon"
-    type: "float"
-    default_value {
-      f: 1e-05
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   attr {
-    name: "min_separation"
-    type: "float"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      f: 0.001
+      b: false
     }
   }
+  is_stateful: true
 }
 op {
-  name: "QuantizedMatMul"
+  name: "ResourceSparseApplyFtrl"
   input_arg {
-    name: "a"
-    type_attr: "T1"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "b"
-    type_attr: "T2"
+    name: "accum"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "min_a"
-    type: DT_FLOAT
+    name: "linear"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "max_a"
-    type: DT_FLOAT
+    name: "grad"
+    type_attr: "T"
   }
   input_arg {
-    name: "min_b"
-    type: DT_FLOAT
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "max_b"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "out"
-    type_attr: "Toutput"
+    name: "lr"
+    type_attr: "T"
   }
-  output_arg {
-    name: "min_out"
-    type: DT_FLOAT
+  input_arg {
+    name: "l1"
+    type_attr: "T"
   }
-  output_arg {
-    name: "max_out"
-    type: DT_FLOAT
+  input_arg {
+    name: "l2"
+    type_attr: "T"
   }
-  attr {
-    name: "T1"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
-    }
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
   }
   attr {
-    name: "T2"
+    name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "Toutput"
+    name: "Tindices"
     type: "type"
-    default_value {
-      type: DT_QINT32
-    }
     allowed_values {
       list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
   attr {
-    name: "transpose_a"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "transpose_b"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
     }
   }
-  attr {
-    name: "Tactivation"
-    type: "type"
-    default_value {
-      type: DT_QUINT8
-    }
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
-    }
-  }
+  is_stateful: true
 }
 op {
-  name: "QuantizedMaxPool"
+  name: "ResourceSparseApplyFtrl"
   input_arg {
-    name: "input"
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "linear"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
   input_arg {
-    name: "min_input"
-    type: DT_FLOAT
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "max_input"
-    type: DT_FLOAT
+    name: "lr"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "l1"
     type_attr: "T"
   }
-  output_arg {
-    name: "min_output"
-    type: DT_FLOAT
+  input_arg {
+    name: "l2"
+    type_attr: "T"
   }
-  output_arg {
-    name: "max_output"
-    type: DT_FLOAT
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "ksize"
-    type: "list(int)"
-  }
-  attr {
-    name: "strides"
-    type: "list(int)"
-  }
-  attr {
-    name: "padding"
-    type: "string"
+    name: "Tindices"
+    type: "type"
     allowed_values {
       list {
-        s: "SAME"
-        s: "VALID"
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "QuantizedMul"
+  name: "ResourceSparseApplyFtrl"
   input_arg {
-    name: "x"
-    type_attr: "T1"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "y"
-    type_attr: "T2"
+    name: "accum"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "min_x"
-    type: DT_FLOAT
+    name: "linear"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "max_x"
-    type: DT_FLOAT
+    name: "grad"
+    type_attr: "T"
   }
   input_arg {
-    name: "min_y"
-    type: DT_FLOAT
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "max_y"
-    type: DT_FLOAT
+    name: "lr"
+    type_attr: "T"
   }
-  output_arg {
-    name: "z"
-    type_attr: "Toutput"
+  input_arg {
+    name: "l1"
+    type_attr: "T"
   }
-  output_arg {
-    name: "min_z"
-    type: DT_FLOAT
+  input_arg {
+    name: "l2"
+    type_attr: "T"
   }
-  output_arg {
-    name: "max_z"
-    type: DT_FLOAT
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
   }
   attr {
-    name: "T1"
+    name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "T2"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
   attr {
-    name: "Toutput"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      type: DT_QINT32
-    }
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
+      b: false
     }
   }
-  is_commutative: true
+  is_stateful: true
 }
 op {
-  name: "QuantizedRelu"
+  name: "ResourceSparseApplyFtrlV2"
   input_arg {
-    name: "features"
-    type_attr: "Tinput"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "min_features"
-    type: DT_FLOAT
+    name: "accum"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "max_features"
-    type: DT_FLOAT
+    name: "linear"
+    type: DT_RESOURCE
   }
-  output_arg {
-    name: "activations"
-    type_attr: "out_type"
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
-  output_arg {
-    name: "min_activations"
-    type: DT_FLOAT
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
-  output_arg {
-    name: "max_activations"
-    type: DT_FLOAT
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2_shrinkage"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
   }
   attr {
-    name: "Tinput"
+    name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "out_type"
+    name: "Tindices"
     type: "type"
-    default_value {
-      type: DT_QUINT8
-    }
     allowed_values {
       list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "QuantizedRelu6"
+  name: "ResourceSparseApplyFtrlV2"
   input_arg {
-    name: "features"
-    type_attr: "Tinput"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "min_features"
-    type: DT_FLOAT
+    name: "accum"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "max_features"
-    type: DT_FLOAT
+    name: "linear"
+    type: DT_RESOURCE
   }
-  output_arg {
-    name: "activations"
-    type_attr: "out_type"
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
   }
-  output_arg {
-    name: "min_activations"
-    type: DT_FLOAT
+  input_arg {
+    name: "l2_shrinkage"
+    type_attr: "T"
   }
-  output_arg {
-    name: "max_activations"
-    type: DT_FLOAT
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
   }
   attr {
-    name: "Tinput"
+    name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "out_type"
+    name: "Tindices"
     type: "type"
-    default_value {
-      type: DT_QUINT8
-    }
     allowed_values {
       list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "QuantizedReluX"
+  name: "ResourceSparseApplyFtrlV2"
   input_arg {
-    name: "features"
-    type_attr: "Tinput"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "max_value"
-    type: DT_FLOAT
+    name: "accum"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "min_features"
-    type: DT_FLOAT
+    name: "linear"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "max_features"
-    type: DT_FLOAT
+    name: "grad"
+    type_attr: "T"
   }
-  output_arg {
-    name: "activations"
-    type_attr: "out_type"
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
-  output_arg {
-    name: "min_activations"
-    type: DT_FLOAT
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  output_arg {
-    name: "max_activations"
-    type: DT_FLOAT
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2_shrinkage"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
   }
   attr {
-    name: "Tinput"
+    name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
         type: DT_QINT8
         type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
         type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "out_type"
+    name: "Tindices"
     type: "type"
-    default_value {
-      type: DT_QUINT8
-    }
     allowed_values {
       list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "QuantizedReshape"
+  name: "ResourceSparseApplyFtrlV2"
   input_arg {
-    name: "tensor"
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "linear"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
   input_arg {
-    name: "shape"
-    type_attr: "Tshape"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "input_min"
-    type: DT_FLOAT
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "input_max"
-    type: DT_FLOAT
+    name: "l1"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "l2"
     type_attr: "T"
   }
-  output_arg {
-    name: "output_min"
-    type: DT_FLOAT
+  input_arg {
+    name: "l2_shrinkage"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output_max"
-    type: DT_FLOAT
+  input_arg {
+    name: "lr_power"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
   attr {
-    name: "Tshape"
+    name: "Tindices"
     type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
         type: DT_INT32
@@ -23740,79 +44462,82 @@ op {
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "QuantizedResizeBilinear"
+  name: "ResourceSparseApplyMomentum"
   input_arg {
-    name: "images"
-    type_attr: "T"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "size"
-    type: DT_INT32
+    name: "accum"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "min"
-    type: DT_FLOAT
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "max"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "resized_images"
+    name: "grad"
     type_attr: "T"
   }
-  output_arg {
-    name: "out_min"
-    type: DT_FLOAT
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
-  output_arg {
-    name: "out_max"
-    type: DT_FLOAT
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
-        type: DT_FLOAT
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "align_corners"
-    type: "bool"
-    default_value {
-      b: false
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
-}
-op {
-  name: "QueueClose"
-  input_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
-  }
   attr {
-    name: "cancel_pending_enqueues"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
     }
   }
-}
-op {
-  name: "QueueCloseV2"
-  input_arg {
-    name: "handle"
-    type: DT_RESOURCE
-  }
   attr {
-    name: "cancel_pending_enqueues"
+    name: "use_nesterov"
     type: "bool"
     default_value {
       b: false
@@ -23821,862 +44546,946 @@ op {
   is_stateful: true
 }
 op {
-  name: "QueueDequeue"
+  name: "ResourceSparseApplyMomentum"
   input_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  output_arg {
-    name: "components"
-    type_list_attr: "component_types"
+    name: "var"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "timeout_ms"
-    type: "int"
-    default_value {
-      i: -1
-    }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-}
-op {
-  name: "QueueDequeueMany"
   input_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
+    name: "grad"
+    type_attr: "T"
   }
   input_arg {
-    name: "n"
-    type: DT_INT32
+    name: "indices"
+    type_attr: "Tindices"
   }
-  output_arg {
-    name: "components"
-    type_list_attr: "component_types"
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
   }
   attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
   attr {
-    name: "timeout_ms"
-    type: "int"
-    default_value {
-      i: -1
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
-}
-op {
-  name: "QueueDequeueManyV2"
-  input_arg {
-    name: "handle"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "n"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "components"
-    type_list_attr: "component_types"
-  }
   attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
   attr {
-    name: "timeout_ms"
-    type: "int"
+    name: "use_nesterov"
+    type: "bool"
     default_value {
-      i: -1
+      b: false
     }
   }
   is_stateful: true
 }
 op {
-  name: "QueueDequeueUpTo"
+  name: "ResourceSparseApplyMomentum"
   input_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "n"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "components"
-    type_list_attr: "component_types"
-  }
-  attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "accum"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "timeout_ms"
-    type: "int"
-    default_value {
-      i: -1
-    }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-}
-op {
-  name: "QueueDequeueUpToV2"
   input_arg {
-    name: "handle"
-    type: DT_RESOURCE
+    name: "grad"
+    type_attr: "T"
   }
   input_arg {
-    name: "n"
-    type: DT_INT32
+    name: "indices"
+    type_attr: "Tindices"
   }
-  output_arg {
-    name: "components"
-    type_list_attr: "component_types"
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
   }
   attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
   }
   attr {
-    name: "timeout_ms"
-    type: "int"
-    default_value {
-      i: -1
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
-  is_stateful: true
-}
-op {
-  name: "QueueDequeueV2"
-  input_arg {
-    name: "handle"
-    type: DT_RESOURCE
-  }
-  output_arg {
-    name: "components"
-    type_list_attr: "component_types"
-  }
   attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
   attr {
-    name: "timeout_ms"
-    type: "int"
+    name: "use_nesterov"
+    type: "bool"
     default_value {
-      i: -1
+      b: false
     }
   }
   is_stateful: true
 }
 op {
-  name: "QueueEnqueue"
+  name: "ResourceSparseApplyMomentum"
   input_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "components"
-    type_list_attr: "Tcomponents"
+    name: "accum"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "Tcomponents"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  attr {
-    name: "timeout_ms"
-    type: "int"
-    default_value {
-      i: -1
-    }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
-}
-op {
-  name: "QueueEnqueueMany"
   input_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "components"
-    type_list_attr: "Tcomponents"
+    name: "momentum"
+    type_attr: "T"
   }
   attr {
-    name: "Tcomponents"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
   attr {
-    name: "timeout_ms"
-    type: "int"
-    default_value {
-      i: -1
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
-}
-op {
-  name: "QueueEnqueueManyV2"
-  input_arg {
-    name: "handle"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "components"
-    type_list_attr: "Tcomponents"
-  }
   attr {
-    name: "Tcomponents"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
   attr {
-    name: "timeout_ms"
-    type: "int"
+    name: "use_nesterov"
+    type: "bool"
     default_value {
-      i: -1
+      b: false
     }
   }
   is_stateful: true
 }
 op {
-  name: "QueueEnqueueV2"
+  name: "ResourceSparseApplyProximalAdagrad"
   input_arg {
-    name: "handle"
+    name: "var"
     type: DT_RESOURCE
   }
   input_arg {
-    name: "components"
-    type_list_attr: "Tcomponents"
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
   attr {
-    name: "Tcomponents"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
   }
   attr {
-    name: "timeout_ms"
-    type: "int"
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
     default_value {
-      i: -1
+      b: false
     }
   }
   is_stateful: true
 }
 op {
-  name: "QueueIsClosed"
+  name: "ResourceSparseApplyProximalAdagrad"
   input_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  output_arg {
-    name: "is_closed"
-    type: DT_BOOL
+    name: "var"
+    type: DT_RESOURCE
   }
-}
-op {
-  name: "QueueIsClosedV2"
   input_arg {
-    name: "handle"
+    name: "accum"
     type: DT_RESOURCE
   }
-  output_arg {
-    name: "is_closed"
-    type: DT_BOOL
-  }
-  is_stateful: true
-}
-op {
-  name: "QueueSize"
   input_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  output_arg {
-    name: "size"
-    type: DT_INT32
+    name: "lr"
+    type_attr: "T"
   }
-}
-op {
-  name: "QueueSizeV2"
   input_arg {
-    name: "handle"
-    type: DT_RESOURCE
+    name: "l1"
+    type_attr: "T"
   }
-  output_arg {
-    name: "size"
-    type: DT_INT32
+  input_arg {
+    name: "l2"
+    type_attr: "T"
   }
-  is_stateful: true
-}
-op {
-  name: "RFFT"
   input_arg {
-    name: "input"
-    type: DT_FLOAT
+    name: "grad"
+    type_attr: "T"
   }
   input_arg {
-    name: "fft_length"
-    type: DT_INT32
+    name: "indices"
+    type_attr: "Tindices"
   }
-  output_arg {
-    name: "output"
-    type: DT_COMPLEX64
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
+  is_stateful: true
 }
 op {
-  name: "RFFT2D"
+  name: "ResourceSparseApplyProximalAdagrad"
   input_arg {
-    name: "input"
-    type: DT_FLOAT
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "fft_length"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "output"
-    type: DT_COMPLEX64
+    name: "accum"
+    type: DT_RESOURCE
   }
-}
-op {
-  name: "RFFT3D"
   input_arg {
-    name: "input"
-    type: DT_FLOAT
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "fft_length"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "output"
-    type: DT_COMPLEX64
+    name: "l1"
+    type_attr: "T"
   }
-}
-op {
-  name: "RGBToHSV"
   input_arg {
-    name: "images"
+    name: "l2"
     type_attr: "T"
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
     allowed_values {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "RandomCrop"
+  name: "ResourceSparseApplyProximalAdagrad"
   input_arg {
-    name: "image"
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "accum"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "size"
-    type: DT_INT64
+    name: "l1"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
         type: DT_UINT8
-        type: DT_INT8
         type: DT_INT16
-        type: DT_INT32
+        type: DT_INT8
+        type: DT_COMPLEX64
         type: DT_INT64
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   attr {
-    name: "seed2"
-    type: "int"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      i: 0
+      b: false
     }
   }
-  deprecation {
-    version: 8
-  }
   is_stateful: true
 }
 op {
-  name: "RandomGamma"
+  name: "ResourceSparseApplyProximalGradientDescent"
   input_arg {
-    name: "shape"
-    type_attr: "S"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
     name: "alpha"
     type_attr: "T"
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "l1"
     type_attr: "T"
   }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
   }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
   attr {
-    name: "S"
+    name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT32
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "T"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
   is_stateful: true
 }
 op {
-  name: "RandomPoisson"
+  name: "ResourceSparseApplyProximalGradientDescent"
   input_arg {
-    name: "shape"
-    type_attr: "S"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "rate"
-    type_attr: "dtype"
+    name: "alpha"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output"
-    type_attr: "dtype"
+  input_arg {
+    name: "l1"
+    type_attr: "T"
   }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
   attr {
-    name: "S"
+    name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT32
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "dtype"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
   is_stateful: true
 }
 op {
-  name: "RandomPoisson"
+  name: "ResourceSparseApplyProximalGradientDescent"
   input_arg {
-    name: "shape"
-    type_attr: "S"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "rate"
-    type_attr: "dtype"
+    name: "alpha"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output"
-    type_attr: "dtype"
+  input_arg {
+    name: "l1"
+    type_attr: "T"
   }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
   }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
   attr {
-    name: "S"
+    name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT32
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "dtype"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
-  deprecation {
-    version: 25
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
   is_stateful: true
 }
 op {
-  name: "RandomPoissonV2"
+  name: "ResourceSparseApplyProximalGradientDescent"
   input_arg {
-    name: "shape"
-    type_attr: "S"
+    name: "var"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "rate"
-    type_attr: "R"
+    name: "alpha"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output"
-    type_attr: "dtype"
+  input_arg {
+    name: "l1"
+    type_attr: "T"
   }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
   }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
   attr {
-    name: "S"
+    name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
         type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "R"
+    name: "Tindices"
     type: "type"
-    default_value {
-      type: DT_DOUBLE
-    }
     allowed_values {
       list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
       }
     }
   }
   attr {
-    name: "dtype"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      type: DT_INT64
-    }
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-      }
+      b: false
     }
   }
   is_stateful: true
 }
 op {
-  name: "RandomShuffle"
+  name: "ResourceSparseApplyRMSProp"
   input_arg {
-    name: "value"
-    type_attr: "T"
+    name: "var"
+    type: DT_RESOURCE
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "ms"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "mom"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
   }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
   }
-  is_stateful: true
-}
-op {
-  name: "RandomShuffleQueue"
-  output_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
-  attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
   attr {
-    name: "shapes"
-    type: "list(shape)"
-    default_value {
+    name: "T"
+    type: "type"
+    allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
-    has_minimum: true
-  }
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: -1
-    }
-  }
-  attr {
-    name: "min_after_dequeue"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
   }
   attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   attr {
-    name: "shared_name"
-    type: "string"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      s: ""
+      b: false
     }
   }
   is_stateful: true
 }
 op {
-  name: "RandomShuffleQueueV2"
-  output_arg {
-    name: "handle"
+  name: "ResourceSparseApplyRMSProp"
+  input_arg {
+    name: "var"
     type: DT_RESOURCE
   }
-  attr {
-    name: "component_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "shapes"
-    type: "list(shape)"
-    default_value {
-      list {
-      }
-    }
-    has_minimum: true
-  }
-  attr {
-    name: "capacity"
-    type: "int"
-    default_value {
-      i: -1
-    }
-  }
-  attr {
-    name: "min_after_dequeue"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "ms"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "mom"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
   }
-  is_stateful: true
-}
-op {
-  name: "RandomStandardNormal"
   input_arg {
-    name: "shape"
+    name: "momentum"
     type_attr: "T"
   }
-  output_arg {
-    name: "output"
-    type_attr: "dtype"
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
   }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "T"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
@@ -24685,45 +45494,80 @@ op {
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
   is_stateful: true
 }
 op {
-  name: "RandomUniform"
+  name: "ResourceSparseApplyRMSProp"
   input_arg {
-    name: "shape"
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "ms"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "mom"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
-  output_arg {
-    name: "output"
-    type_attr: "dtype"
+  input_arg {
+    name: "rho"
+    type_attr: "T"
   }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
   }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "T"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
@@ -24732,52 +45576,80 @@ op {
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
   is_stateful: true
 }
 op {
-  name: "RandomUniformInt"
+  name: "ResourceSparseApplyRMSProp"
   input_arg {
-    name: "shape"
+    name: "var"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "ms"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "mom"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "minval"
-    type_attr: "Tout"
+    name: "rho"
+    type_attr: "T"
   }
   input_arg {
-    name: "maxval"
-    type_attr: "Tout"
+    name: "momentum"
+    type_attr: "T"
   }
-  output_arg {
-    name: "output"
-    type_attr: "Tout"
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
   }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
   attr {
-    name: "Tout"
+    name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
         type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "T"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
@@ -24786,367 +45658,332 @@ op {
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
   is_stateful: true
 }
 op {
-  name: "Range"
+  name: "ResourceStridedSliceAssign"
   input_arg {
-    name: "start"
-    type_attr: "Tidx"
+    name: "ref"
+    type: DT_RESOURCE
   }
   input_arg {
-    name: "limit"
-    type_attr: "Tidx"
+    name: "begin"
+    type_attr: "Index"
   }
   input_arg {
-    name: "delta"
-    type_attr: "Tidx"
+    name: "end"
+    type_attr: "Index"
   }
-  output_arg {
-    name: "output"
-    type_attr: "Tidx"
+  input_arg {
+    name: "strides"
+    type_attr: "Index"
+  }
+  input_arg {
+    name: "value"
+    type_attr: "T"
   }
   attr {
-    name: "Tidx"
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "Index"
     type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
       }
     }
   }
-}
-op {
-  name: "RangeDataset"
-  input_arg {
-    name: "start"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "stop"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "step"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "begin_mask"
+    type: "int"
+    default_value {
+      i: 0
+    }
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "end_mask"
+    type: "int"
+    default_value {
+      i: 0
+    }
   }
-  is_stateful: true
-}
-op {
-  name: "Rank"
-  input_arg {
-    name: "input"
-    type_attr: "T"
+  attr {
+    name: "ellipsis_mask"
+    type: "int"
+    default_value {
+      i: 0
+    }
   }
-  output_arg {
-    name: "output"
-    type: DT_INT32
+  attr {
+    name: "new_axis_mask"
+    type: "int"
+    default_value {
+      i: 0
+    }
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "shrink_axis_mask"
+    type: "int"
+    default_value {
+      i: 0
+    }
   }
+  is_stateful: true
 }
 op {
-  name: "ReadFile"
+  name: "Restore"
   input_arg {
-    name: "filename"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "contents"
+    name: "file_pattern"
     type: DT_STRING
   }
-}
-op {
-  name: "ReadVariableOp"
   input_arg {
-    name: "resource"
-    type: DT_RESOURCE
+    name: "tensor_name"
+    type: DT_STRING
   }
   output_arg {
-    name: "value"
-    type_attr: "dtype"
+    name: "tensor"
+    type_attr: "dt"
   }
   attr {
-    name: "dtype"
+    name: "dt"
     type: "type"
   }
-  is_stateful: true
-}
-op {
-  name: "ReaderNumRecordsProduced"
-  input_arg {
-    name: "reader_handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  output_arg {
-    name: "records_produced"
-    type: DT_INT64
+  attr {
+    name: "preferred_shard"
+    type: "int"
+    default_value {
+      i: -1
+    }
   }
 }
 op {
-  name: "ReaderNumRecordsProducedV2"
+  name: "Restore"
   input_arg {
-    name: "reader_handle"
-    type: DT_RESOURCE
-  }
-  output_arg {
-    name: "records_produced"
-    type: DT_INT64
+    name: "file_pattern"
+    type: DT_STRING
   }
-  is_stateful: true
-}
-op {
-  name: "ReaderNumWorkUnitsCompleted"
   input_arg {
-    name: "reader_handle"
+    name: "tensor_name"
     type: DT_STRING
-    is_ref: true
   }
   output_arg {
-    name: "units_completed"
-    type: DT_INT64
+    name: "tensor"
+    type_attr: "dt"
   }
-}
-op {
-  name: "ReaderNumWorkUnitsCompletedV2"
-  input_arg {
-    name: "reader_handle"
-    type: DT_RESOURCE
+  attr {
+    name: "dt"
+    type: "type"
   }
-  output_arg {
-    name: "units_completed"
-    type: DT_INT64
+  attr {
+    name: "preferred_shard"
+    type: "int"
+    default_value {
+      i: -1
+    }
   }
   is_stateful: true
 }
 op {
-  name: "ReaderRead"
+  name: "RestoreSlice"
   input_arg {
-    name: "reader_handle"
+    name: "file_pattern"
     type: DT_STRING
-    is_ref: true
   }
   input_arg {
-    name: "queue_handle"
+    name: "tensor_name"
     type: DT_STRING
-    is_ref: true
   }
-  output_arg {
-    name: "key"
+  input_arg {
+    name: "shape_and_slice"
     type: DT_STRING
   }
   output_arg {
-    name: "value"
-    type: DT_STRING
+    name: "tensor"
+    type_attr: "dt"
+  }
+  attr {
+    name: "dt"
+    type: "type"
+  }
+  attr {
+    name: "preferred_shard"
+    type: "int"
+    default_value {
+      i: -1
+    }
   }
 }
 op {
-  name: "ReaderReadUpTo"
+  name: "RestoreSlice"
   input_arg {
-    name: "reader_handle"
+    name: "file_pattern"
     type: DT_STRING
-    is_ref: true
   }
   input_arg {
-    name: "queue_handle"
+    name: "tensor_name"
     type: DT_STRING
-    is_ref: true
   }
   input_arg {
-    name: "num_records"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "keys"
+    name: "shape_and_slice"
     type: DT_STRING
   }
   output_arg {
-    name: "values"
-    type: DT_STRING
-  }
-}
-op {
-  name: "ReaderReadUpToV2"
-  input_arg {
-    name: "reader_handle"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "queue_handle"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "num_records"
-    type: DT_INT64
+    name: "tensor"
+    type_attr: "dt"
   }
-  output_arg {
-    name: "keys"
-    type: DT_STRING
+  attr {
+    name: "dt"
+    type: "type"
   }
-  output_arg {
-    name: "values"
-    type: DT_STRING
+  attr {
+    name: "preferred_shard"
+    type: "int"
+    default_value {
+      i: -1
+    }
   }
   is_stateful: true
 }
 op {
-  name: "ReaderReadV2"
-  input_arg {
-    name: "reader_handle"
-    type: DT_RESOURCE
-  }
+  name: "RestoreV2"
   input_arg {
-    name: "queue_handle"
-    type: DT_RESOURCE
-  }
-  output_arg {
-    name: "key"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "value"
+    name: "prefix"
     type: DT_STRING
   }
-  is_stateful: true
-}
-op {
-  name: "ReaderReset"
   input_arg {
-    name: "reader_handle"
+    name: "tensor_names"
     type: DT_STRING
-    is_ref: true
   }
-}
-op {
-  name: "ReaderResetV2"
-  input_arg {
-    name: "reader_handle"
-    type: DT_RESOURCE
-  }
-  is_stateful: true
-}
-op {
-  name: "ReaderRestoreState"
   input_arg {
-    name: "reader_handle"
+    name: "shape_and_slices"
     type: DT_STRING
-    is_ref: true
   }
-  input_arg {
-    name: "state"
-    type: DT_STRING
+  output_arg {
+    name: "tensors"
+    type_list_attr: "dtypes"
+  }
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
 }
 op {
-  name: "ReaderRestoreStateV2"
+  name: "RestoreV2"
   input_arg {
-    name: "reader_handle"
-    type: DT_RESOURCE
+    name: "prefix"
+    type: DT_STRING
   }
   input_arg {
-    name: "state"
+    name: "tensor_names"
     type: DT_STRING
   }
-  is_stateful: true
-}
-op {
-  name: "ReaderSerializeState"
   input_arg {
-    name: "reader_handle"
+    name: "shape_and_slices"
     type: DT_STRING
-    is_ref: true
   }
   output_arg {
-    name: "state"
-    type: DT_STRING
-  }
-}
-op {
-  name: "ReaderSerializeStateV2"
-  input_arg {
-    name: "reader_handle"
-    type: DT_RESOURCE
+    name: "tensors"
+    type_list_attr: "dtypes"
   }
-  output_arg {
-    name: "state"
-    type: DT_STRING
+  attr {
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
   is_stateful: true
 }
 op {
-  name: "Real"
+  name: "Reverse"
   input_arg {
-    name: "input"
+    name: "tensor"
     type_attr: "T"
   }
+  input_arg {
+    name: "dims"
+    type: DT_BOOL
+  }
   output_arg {
     name: "output"
-    type_attr: "Tout"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    default_value {
-      type: DT_COMPLEX64
-    }
     allowed_values {
       list {
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_BOOL
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_COMPLEX64
         type: DT_COMPLEX128
       }
     }
   }
+}
+op {
+  name: "Reverse"
+  input_arg {
+    name: "tensor"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "dims"
+    type: DT_BOOL
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
   attr {
-    name: "Tout"
+    name: "T"
     type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
     allowed_values {
       list {
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_BOOL
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_STRING
       }
     }
   }
 }
 op {
-  name: "RealDiv"
+  name: "Reverse"
   input_arg {
-    name: "x"
+    name: "tensor"
     type_attr: "T"
   }
   input_arg {
-    name: "y"
-    type_attr: "T"
+    name: "dims"
+    type: DT_BOOL
   }
   output_arg {
-    name: "z"
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -25154,66 +45991,103 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_UINT8
         type: DT_INT8
         type: DT_UINT16
         type: DT_INT16
         type: DT_INT32
         type: DT_INT64
+        type: DT_BOOL
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_STRING
       }
     }
   }
 }
 op {
-  name: "Reciprocal"
+  name: "ReverseSequence"
   input_arg {
-    name: "x"
+    name: "input"
     type_attr: "T"
   }
+  input_arg {
+    name: "seq_lengths"
+    type_attr: "Tlen"
+  }
   output_arg {
-    name: "y"
+    name: "output"
     type_attr: "T"
   }
+  attr {
+    name: "seq_dim"
+    type: "int"
+  }
+  attr {
+    name: "batch_dim"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
   attr {
     name: "T"
     type: "type"
+  }
+  attr {
+    name: "Tlen"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
     allowed_values {
       list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "ReciprocalGrad"
+  name: "ReverseV2"
   input_arg {
-    name: "x"
+    name: "tensor"
     type_attr: "T"
   }
   input_arg {
-    name: "y"
-    type_attr: "T"
+    name: "axis"
+    type_attr: "Tidx"
   }
   output_arg {
-    name: "z"
+    name: "output"
     type_attr: "T"
   }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_BOOL
         type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
@@ -25224,287 +46098,287 @@ op {
   }
 }
 op {
-  name: "ReciprocalGrad"
+  name: "ReverseV2"
   input_arg {
-    name: "y"
+    name: "tensor"
     type_attr: "T"
   }
   input_arg {
-    name: "dy"
-    type_attr: "T"
+    name: "axis"
+    type_attr: "Tidx"
   }
   output_arg {
-    name: "z"
+    name: "output"
     type_attr: "T"
   }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_BOOL
         type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_STRING
       }
     }
   }
 }
 op {
-  name: "RecordInput"
-  output_arg {
-    name: "records"
-    type: DT_STRING
-  }
-  attr {
-    name: "file_pattern"
-    type: "string"
-  }
-  attr {
-    name: "file_random_seed"
-    type: "int"
-    default_value {
-      i: 301
-    }
-  }
-  attr {
-    name: "file_shuffle_shift_ratio"
-    type: "float"
-    default_value {
-      f: 0
-    }
-  }
-  attr {
-    name: "file_buffer_size"
-    type: "int"
-    default_value {
-      i: 10000
-    }
-  }
-  attr {
-    name: "file_parallelism"
-    type: "int"
-    default_value {
-      i: 16
-    }
-  }
-  attr {
-    name: "batch_size"
-    type: "int"
-    default_value {
-      i: 32
-    }
-  }
-  is_stateful: true
-}
-op {
-  name: "ReduceJoin"
+  name: "ReverseV2"
   input_arg {
-    name: "inputs"
-    type: DT_STRING
+    name: "tensor"
+    type_attr: "T"
   }
   input_arg {
-    name: "reduction_indices"
-    type: DT_INT32
+    name: "axis"
+    type_attr: "Tidx"
   }
   output_arg {
     name: "output"
-    type: DT_STRING
+    type_attr: "T"
   }
   attr {
-    name: "keep_dims"
-    type: "bool"
+    name: "Tidx"
+    type: "type"
     default_value {
-      b: false
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   attr {
-    name: "separator"
-    type: "string"
-    default_value {
-      s: ""
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_BOOL
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_STRING
+      }
     }
   }
 }
 op {
-  name: "RefEnter"
+  name: "ReverseV2"
   input_arg {
-    name: "data"
+    name: "tensor"
     type_attr: "T"
-    is_ref: true
+  }
+  input_arg {
+    name: "axis"
+    type_attr: "Tidx"
   }
   output_arg {
     name: "output"
     type_attr: "T"
-    is_ref: true
   }
   attr {
-    name: "T"
+    name: "Tidx"
     type: "type"
-  }
-  attr {
-    name: "frame_name"
-    type: "string"
-  }
-  attr {
-    name: "is_constant"
-    type: "bool"
     default_value {
-      b: false
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   attr {
-    name: "parallel_iterations"
-    type: "int"
-    default_value {
-      i: 10
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_BOOL
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_STRING
+      }
     }
   }
 }
 op {
-  name: "RefExit"
+  name: "RightShift"
   input_arg {
-    name: "data"
-    type_attr: "T"
-    is_ref: true
-  }
-  output_arg {
-    name: "output"
+    name: "x"
     type_attr: "T"
-    is_ref: true
-  }
-  attr {
-    name: "T"
-    type: "type"
   }
-}
-op {
-  name: "RefIdentity"
   input_arg {
-    name: "input"
+    name: "y"
     type_attr: "T"
-    is_ref: true
   }
   output_arg {
-    name: "output"
+    name: "z"
     type_attr: "T"
-    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_INT8
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
-  allows_uninitialized_input: true
+  is_commutative: true
 }
 op {
-  name: "RefMerge"
+  name: "Rint"
   input_arg {
-    name: "inputs"
+    name: "x"
     type_attr: "T"
-    number_attr: "N"
-    is_ref: true
   }
   output_arg {
-    name: "output"
+    name: "y"
     type_attr: "T"
-    is_ref: true
-  }
-  output_arg {
-    name: "value_index"
-    type: DT_INT32
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "N"
-    type: "int"
-    has_minimum: true
-    minimum: 1
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
 }
 op {
-  name: "RefNextIteration"
+  name: "Rint"
   input_arg {
-    name: "data"
+    name: "x"
     type_attr: "T"
-    is_ref: true
   }
   output_arg {
-    name: "output"
+    name: "y"
     type_attr: "T"
-    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
 }
 op {
-  name: "RefSelect"
-  input_arg {
-    name: "index"
-    type: DT_INT32
-  }
+  name: "Round"
   input_arg {
-    name: "inputs"
+    name: "x"
     type_attr: "T"
-    number_attr: "N"
-    is_ref: true
   }
   output_arg {
-    name: "output"
+    name: "y"
     type_attr: "T"
-    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "N"
-    type: "int"
-    has_minimum: true
-    minimum: 1
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
   }
 }
 op {
-  name: "RefSwitch"
-  input_arg {
-    name: "data"
-    type_attr: "T"
-    is_ref: true
-  }
+  name: "Round"
   input_arg {
-    name: "pred"
-    type: DT_BOOL
-  }
-  output_arg {
-    name: "output_false"
+    name: "x"
     type_attr: "T"
-    is_ref: true
   }
   output_arg {
-    name: "output_true"
+    name: "y"
     type_attr: "T"
-    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
   }
-  allows_uninitialized_input: true
 }
 op {
-  name: "Relu"
+  name: "Rsqrt"
   input_arg {
-    name: "features"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "activations"
+    name: "y"
     type_attr: "T"
   }
   attr {
@@ -25512,27 +46386,23 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "Relu"
+  name: "Rsqrt"
   input_arg {
-    name: "features"
+    name: "x"
     type_attr: "T"
   }
   output_arg {
-    name: "activations"
+    name: "y"
     type_attr: "T"
   }
   attr {
@@ -25540,29 +46410,28 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "Relu6"
+  name: "RsqrtGrad"
   input_arg {
-    name: "features"
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
     type_attr: "T"
   }
   output_arg {
-    name: "activations"
+    name: "z"
     type_attr: "T"
   }
   attr {
@@ -25570,27 +46439,27 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "Relu6"
+  name: "RsqrtGrad"
   input_arg {
-    name: "features"
+    name: "y"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "dy"
     type_attr: "T"
   }
   output_arg {
-    name: "activations"
+    name: "z"
     type_attr: "T"
   }
   attr {
@@ -25598,33 +46467,27 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "Relu6Grad"
+  name: "RsqrtGrad"
   input_arg {
-    name: "gradients"
+    name: "y"
     type_attr: "T"
   }
   input_arg {
-    name: "features"
+    name: "dy"
     type_attr: "T"
   }
   output_arg {
-    name: "backprops"
+    name: "z"
     type_attr: "T"
   }
   attr {
@@ -25632,526 +46495,543 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "Relu6Grad"
+  name: "SampleDistortedBoundingBox"
   input_arg {
-    name: "gradients"
+    name: "image_size"
     type_attr: "T"
   }
   input_arg {
-    name: "features"
+    name: "bounding_boxes"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "begin"
     type_attr: "T"
   }
   output_arg {
-    name: "backprops"
+    name: "size"
     type_attr: "T"
   }
+  output_arg {
+    name: "bboxes"
+    type: DT_FLOAT
+  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
-        type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "min_object_covered"
+    type: "float"
+    default_value {
+      f: 0.1
+    }
+  }
+  attr {
+    name: "aspect_ratio_range"
+    type: "list(float)"
+    default_value {
+      list {
+        f: 0.75
+        f: 1.33
+      }
+    }
+  }
+  attr {
+    name: "area_range"
+    type: "list(float)"
+    default_value {
+      list {
+        f: 0.05
+        f: 1
       }
     }
   }
+  attr {
+    name: "max_attempts"
+    type: "int"
+    default_value {
+      i: 100
+    }
+  }
+  attr {
+    name: "use_image_if_no_bounding_boxes"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "ReluGrad"
+  name: "SampleDistortedBoundingBoxV2"
   input_arg {
-    name: "gradients"
+    name: "image_size"
     type_attr: "T"
   }
   input_arg {
-    name: "features"
+    name: "bounding_boxes"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_object_covered"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "begin"
     type_attr: "T"
   }
   output_arg {
-    name: "backprops"
+    name: "size"
     type_attr: "T"
   }
+  output_arg {
+    name: "bboxes"
+    type: DT_FLOAT
+  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
-        type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
-}
-op {
-  name: "ReluGrad"
-  input_arg {
-    name: "gradients"
-    type_attr: "T"
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
   }
-  input_arg {
-    name: "features"
-    type_attr: "T"
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
   }
-  output_arg {
-    name: "backprops"
-    type_attr: "T"
+  attr {
+    name: "aspect_ratio_range"
+    type: "list(float)"
+    default_value {
+      list {
+        f: 0.75
+        f: 1.33
+      }
+    }
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
+    name: "area_range"
+    type: "list(float)"
+    default_value {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
+        f: 0.05
+        f: 1
       }
     }
   }
+  attr {
+    name: "max_attempts"
+    type: "int"
+    default_value {
+      i: 100
+    }
+  }
+  attr {
+    name: "use_image_if_no_bounding_boxes"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  is_stateful: true
 }
 op {
-  name: "RemoteCall"
+  name: "Save"
   input_arg {
-    name: "target"
+    name: "filename"
     type: DT_STRING
   }
   input_arg {
-    name: "args"
-    type_list_attr: "Tin"
-  }
-  output_arg {
-    name: "output"
-    type_list_attr: "Tout"
+    name: "tensor_names"
+    type: DT_STRING
   }
-  attr {
-    name: "Tin"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "data"
+    type_list_attr: "T"
   }
   attr {
-    name: "Tout"
+    name: "T"
     type: "list(type)"
     has_minimum: true
     minimum: 1
   }
-  attr {
-    name: "f"
-    type: "func"
-  }
 }
 op {
-  name: "RemoteFusedGraphExecute"
+  name: "Save"
   input_arg {
-    name: "inputs"
-    type_list_attr: "Tinputs"
+    name: "filename"
+    type: DT_STRING
   }
-  output_arg {
-    name: "outputs"
-    type_list_attr: "Toutputs"
+  input_arg {
+    name: "tensor_names"
+    type: DT_STRING
   }
-  attr {
-    name: "Tinputs"
-    type: "list(type)"
-    has_minimum: true
+  input_arg {
+    name: "data"
+    type_list_attr: "T"
   }
   attr {
-    name: "Toutputs"
+    name: "T"
     type: "list(type)"
     has_minimum: true
+    minimum: 1
   }
-  attr {
-    name: "serialized_remote_fused_graph_execute_info"
-    type: "string"
-  }
+  is_stateful: true
 }
 op {
-  name: "RepeatDataset"
+  name: "SaveSlices"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "filename"
+    type: DT_STRING
   }
   input_arg {
-    name: "count"
-    type: DT_INT64
+    name: "tensor_names"
+    type: DT_STRING
   }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
+  input_arg {
+    name: "shapes_and_slices"
+    type: DT_STRING
   }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "data"
+    type_list_attr: "T"
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
+    name: "T"
+    type: "list(type)"
     has_minimum: true
     minimum: 1
   }
-  is_stateful: true
 }
 op {
-  name: "RepeatDataset"
+  name: "SaveSlices"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "filename"
+    type: DT_STRING
   }
   input_arg {
-    name: "count"
-    type: DT_INT64
+    name: "tensor_names"
+    type: DT_STRING
   }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
+  input_arg {
+    name: "shapes_and_slices"
+    type: DT_STRING
   }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "data"
+    type_list_attr: "T"
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
+    name: "T"
+    type: "list(type)"
     has_minimum: true
     minimum: 1
   }
+  is_stateful: true
 }
 op {
-  name: "RequantizationRange"
+  name: "SaveV2"
   input_arg {
-    name: "input"
-    type_attr: "Tinput"
+    name: "prefix"
+    type: DT_STRING
   }
   input_arg {
-    name: "input_min"
-    type: DT_FLOAT
+    name: "tensor_names"
+    type: DT_STRING
   }
   input_arg {
-    name: "input_max"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "output_min"
-    type: DT_FLOAT
+    name: "shape_and_slices"
+    type: DT_STRING
   }
-  output_arg {
-    name: "output_max"
-    type: DT_FLOAT
+  input_arg {
+    name: "tensors"
+    type_list_attr: "dtypes"
   }
   attr {
-    name: "Tinput"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
-    }
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
 }
 op {
-  name: "Requantize"
-  input_arg {
-    name: "input"
-    type_attr: "Tinput"
-  }
+  name: "SaveV2"
   input_arg {
-    name: "input_min"
-    type: DT_FLOAT
+    name: "prefix"
+    type: DT_STRING
   }
   input_arg {
-    name: "input_max"
-    type: DT_FLOAT
+    name: "tensor_names"
+    type: DT_STRING
   }
   input_arg {
-    name: "requested_output_min"
-    type: DT_FLOAT
+    name: "shape_and_slices"
+    type: DT_STRING
   }
   input_arg {
-    name: "requested_output_max"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "output"
-    type_attr: "out_type"
-  }
-  output_arg {
-    name: "output_min"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "output_max"
-    type: DT_FLOAT
-  }
-  attr {
-    name: "Tinput"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
-    }
+    name: "tensors"
+    type_list_attr: "dtypes"
   }
   attr {
-    name: "out_type"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT16
-        type: DT_QUINT16
-        type: DT_QINT32
-      }
-    }
+    name: "dtypes"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
+  is_stateful: true
 }
 op {
-  name: "Reshape"
+  name: "ScalarSummary"
   input_arg {
-    name: "tensor"
-    type_attr: "T"
+    name: "tags"
+    type: DT_STRING
   }
   input_arg {
-    name: "shape"
-    type_attr: "Tshape"
+    name: "values"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "summary"
+    type: DT_STRING
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "Tshape"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
       }
     }
   }
 }
 op {
-  name: "ResizeArea"
+  name: "ScalarSummary"
   input_arg {
-    name: "images"
-    type_attr: "T"
+    name: "tags"
+    type: DT_STRING
   }
   input_arg {
-    name: "size"
-    type: DT_INT32
+    name: "values"
+    type_attr: "T"
   }
   output_arg {
-    name: "resized_images"
-    type: DT_FLOAT
+    name: "summary"
+    type: DT_STRING
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_INT8
-        type: DT_INT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
         type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-  attr {
-    name: "align_corners"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "ResizeArea"
+  name: "ScalarSummary"
   input_arg {
-    name: "images"
-    type_attr: "T"
+    name: "tags"
+    type: DT_STRING
   }
   input_arg {
-    name: "size"
-    type: DT_INT32
+    name: "values"
+    type_attr: "T"
   }
   output_arg {
-    name: "resized_images"
-    type: DT_FLOAT
+    name: "summary"
+    type: DT_STRING
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT8
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
+        type: DT_INT8
         type: DT_UINT16
-        type: DT_INT32
-        type: DT_INT64
         type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
-  attr {
-    name: "align_corners"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "ResizeBicubic"
+  name: "ScalarSummary"
   input_arg {
-    name: "images"
-    type_attr: "T"
+    name: "tags"
+    type: DT_STRING
   }
   input_arg {
-    name: "size"
-    type: DT_INT32
+    name: "values"
+    type_attr: "T"
   }
   output_arg {
-    name: "resized_images"
-    type: DT_FLOAT
+    name: "summary"
+    type: DT_STRING
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
         type: DT_UINT8
-        type: DT_INT8
         type: DT_INT16
-        type: DT_INT32
+        type: DT_INT8
         type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
         type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-  attr {
-    name: "align_corners"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "ResizeBicubic"
+  name: "ScanDataset"
   input_arg {
-    name: "images"
-    type_attr: "T"
+    name: "input_dataset"
+    type: DT_VARIANT
   }
   input_arg {
-    name: "size"
-    type: DT_INT32
+    name: "initial_state"
+    type_list_attr: "Tstate"
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
   }
   output_arg {
-    name: "resized_images"
-    type: DT_FLOAT
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT8
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_UINT16
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
+    name: "f"
+    type: "func"
   }
   attr {
-    name: "align_corners"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "Tstate"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
 }
 op {
-  name: "ResizeBicubicGrad"
+  name: "ScatterAdd"
   input_arg {
-    name: "grads"
-    type: DT_FLOAT
+    name: "ref"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "original_image"
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "updates"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -26160,49 +47040,33 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
   attr {
-    name: "align_corners"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-}
-op {
-  name: "ResizeBilinear"
-  input_arg {
-    name: "images"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "size"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "resized_images"
-    type: DT_FLOAT
-  }
-  attr {
-    name: "T"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_INT8
-        type: DT_INT16
         type: DT_INT32
         type: DT_INT64
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "align_corners"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
@@ -26210,38 +47074,61 @@ op {
   }
 }
 op {
-  name: "ResizeBilinear"
+  name: "ScatterAdd"
   input_arg {
-    name: "images"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "size"
-    type: DT_INT32
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "updates"
+    type_attr: "T"
   }
   output_arg {
-    name: "resized_images"
-    type: DT_FLOAT
+    name: "output_ref"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT8
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
-        type: DT_INT16
         type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
         type: DT_INT32
         type: DT_INT64
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "align_corners"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
@@ -26249,18 +47136,24 @@ op {
   }
 }
 op {
-  name: "ResizeBilinearGrad"
+  name: "ScatterAdd"
   input_arg {
-    name: "grads"
-    type: DT_FLOAT
+    name: "ref"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "original_image"
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "updates"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -26268,51 +47161,37 @@ op {
     allowed_values {
       list {
         type: DT_FLOAT
-        type: DT_HALF
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "align_corners"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-}
-op {
-  name: "ResizeNearestNeighbor"
-  input_arg {
-    name: "images"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "size"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "resized_images"
-    type_attr: "T"
-  }
-  attr {
-    name: "T"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_INT8
-        type: DT_INT16
         type: DT_INT32
         type: DT_INT64
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
       }
     }
   }
   attr {
-    name: "align_corners"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
@@ -26320,74 +47199,62 @@ op {
   }
 }
 op {
-  name: "ResizeNearestNeighbor"
+  name: "ScatterAdd"
   input_arg {
-    name: "images"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "size"
-    type: DT_INT32
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "updates"
+    type_attr: "T"
   }
   output_arg {
-    name: "resized_images"
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT8
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
         type: DT_UINT8
         type: DT_INT16
-        type: DT_UINT16
-        type: DT_INT32
+        type: DT_INT8
+        type: DT_COMPLEX64
         type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "align_corners"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-}
-op {
-  name: "ResizeNearestNeighborGrad"
-  input_arg {
-    name: "grads"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "size"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "T"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_INT8
         type: DT_INT32
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_INT64
       }
     }
   }
   attr {
-    name: "align_corners"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
@@ -26395,34 +47262,24 @@ op {
   }
 }
 op {
-  name: "ResourceApplyAdadelta"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum_update"
-    type: DT_RESOURCE
-  }
+  name: "ScatterDiv"
   input_arg {
-    name: "lr"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "rho"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "epsilon"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -26446,6 +47303,16 @@ op {
       }
     }
   }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
   attr {
     name: "use_locking"
     type: "bool"
@@ -26453,37 +47320,26 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyAdadelta"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum_update"
-    type: DT_RESOURCE
-  }
+  name: "ScatterDiv"
   input_arg {
-    name: "lr"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "rho"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "epsilon"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -26509,6 +47365,16 @@ op {
       }
     }
   }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
   attr {
     name: "use_locking"
     type: "bool"
@@ -26516,25 +47382,26 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyAdagrad"
+  name: "ScatterDiv"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "ref"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "accum"
-    type: DT_RESOURCE
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "lr"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -26555,6 +47422,19 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
@@ -26565,25 +47445,26 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyAdagrad"
+  name: "ScatterDiv"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "ref"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "accum"
-    type: DT_RESOURCE
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "lr"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -26592,23 +47473,34 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
   attr {
     name: "use_locking"
     type: "bool"
@@ -26616,41 +47508,26 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyAdagradDA"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "gradient_accumulator"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "gradient_squared_accumulator"
-    type: DT_RESOURCE
-  }
+  name: "ScatterMul"
   input_arg {
-    name: "grad"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "lr"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "l1"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "l2"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
-  }
-  input_arg {
-    name: "global_step"
-    type: DT_INT64
+    is_ref: true
   }
   attr {
     name: "T"
@@ -26674,6 +47551,16 @@ op {
       }
     }
   }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
   attr {
     name: "use_locking"
     type: "bool"
@@ -26681,41 +47568,26 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyAdagradDA"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "gradient_accumulator"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "gradient_squared_accumulator"
-    type: DT_RESOURCE
-  }
+  name: "ScatterMul"
   input_arg {
-    name: "grad"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "lr"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "l1"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "l2"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
-  }
-  input_arg {
-    name: "global_step"
-    type: DT_INT64
+    is_ref: true
   }
   attr {
     name: "T"
@@ -26741,6 +47613,16 @@ op {
       }
     }
   }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
   attr {
     name: "use_locking"
     type: "bool"
@@ -26748,49 +47630,26 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyAdam"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "m"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "v"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "beta1_power"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "beta2_power"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
+  name: "ScatterMul"
   input_arg {
-    name: "beta1"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "beta2"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "epsilon"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -26811,6 +47670,19 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
@@ -26821,49 +47693,26 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyAdam"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "m"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "v"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "beta1_power"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "beta2_power"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
+  name: "ScatterMul"
   input_arg {
-    name: "beta1"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "beta2"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "epsilon"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -26872,78 +47721,94 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   attr {
-    name: "use_nesterov"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyAdam"
+  name: "ScatterNd"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "m"
-    type: DT_RESOURCE
+    name: "updates"
+    type_attr: "T"
   }
   input_arg {
-    name: "v"
-    type: DT_RESOURCE
+    name: "shape"
+    type_attr: "Tindices"
   }
-  input_arg {
-    name: "beta1_power"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
-  input_arg {
-    name: "beta2_power"
-    type_attr: "T"
+  attr {
+    name: "T"
+    type: "type"
   }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
+}
+op {
+  name: "ScatterNdAdd"
   input_arg {
-    name: "beta1"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "beta2"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "epsilon"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -26964,56 +47829,46 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   attr {
-    name: "use_nesterov"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyAddSign"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "m"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
+  name: "ScatterNdAdd"
   input_arg {
-    name: "alpha"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "sign_decay"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "beta"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -27039,6 +47894,16 @@ op {
       }
     }
   }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
   attr {
     name: "use_locking"
     type: "bool"
@@ -27046,45 +47911,26 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyCenteredRMSProp"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "mg"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "ms"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "mom"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
+  name: "ScatterNdAdd"
   input_arg {
-    name: "rho"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "momentum"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "epsilon"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -27105,6 +47951,19 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
@@ -27115,45 +47974,26 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyCenteredRMSProp"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "mg"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "ms"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "mom"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
+  name: "ScatterNdAdd"
   input_arg {
-    name: "rho"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "momentum"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "epsilon"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -27162,23 +48002,34 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
   attr {
     name: "use_locking"
     type: "bool"
@@ -27186,40 +48037,23 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyFtrl"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "linear"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "grad"
-    type_attr: "T"
-  }
+  name: "ScatterNdNonAliasingAdd"
   input_arg {
-    name: "lr"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "l1"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "l2"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "lr_power"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -27245,46 +48079,32 @@ op {
     }
   }
   attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyFtrl"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "linear"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "grad"
-    type_attr: "T"
-  }
+  name: "ScatterNdNonAliasingAdd"
   input_arg {
-    name: "lr"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "l1"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "l2"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "lr_power"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -27312,50 +48132,32 @@ op {
     }
   }
   attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyFtrlV2"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "linear"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "grad"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
+  name: "ScatterNdNonAliasingAdd"
   input_arg {
-    name: "l1"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "l2"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "l2_shrinkage"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "lr_power"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -27377,54 +48179,39 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyFtrlV2"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "linear"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "grad"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
+  name: "ScatterNdNonAliasingAdd"
   input_arg {
-    name: "l1"
+    name: "input"
     type_attr: "T"
   }
   input_arg {
-    name: "l2"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "l2_shrinkage"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "lr_power"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -27434,17 +48221,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -27452,28 +48240,36 @@ op {
     }
   }
   attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyGradientDescent"
+  name: "ScatterNdSub"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "ref"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "alpha"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "delta"
+    name: "updates"
     type_attr: "T"
   }
+  output_arg {
+    name: "output_ref"
+    type_attr: "T"
+    is_ref: true
+  }
   attr {
     name: "T"
     type: "type"
@@ -27496,6 +48292,16 @@ op {
       }
     }
   }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
   attr {
     name: "use_locking"
     type: "bool"
@@ -27503,21 +48309,26 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyGradientDescent"
+  name: "ScatterNdSub"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "ref"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "alpha"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "delta"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -27543,6 +48354,16 @@ op {
       }
     }
   }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
   attr {
     name: "use_locking"
     type: "bool"
@@ -27550,29 +48371,26 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyMomentum"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
+  name: "ScatterNdSub"
   input_arg {
-    name: "accum"
-    type: DT_RESOURCE
+    name: "ref"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "lr"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "grad"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "momentum"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -27593,46 +48411,49 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   attr {
-    name: "use_nesterov"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyMomentum"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
+  name: "ScatterNdSub"
   input_arg {
-    name: "accum"
-    type: DT_RESOURCE
+    name: "ref"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "lr"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "grad"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "momentum"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -27641,17 +48462,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -27659,50 +48481,84 @@ op {
     }
   }
   attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   attr {
-    name: "use_nesterov"
+    name: "use_locking"
     type: "bool"
     default_value {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyPowerSign"
+  name: "ScatterNdUpdate"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "ref"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "m"
-    type: DT_RESOURCE
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "lr"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "logbase"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: true
+    }
   }
+}
+op {
+  name: "ScatterSub"
   input_arg {
-    name: "sign_decay"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "beta"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "grad"
+    name: "updates"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -27723,8 +48579,16 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
@@ -27735,33 +48599,26 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyProximalAdagrad"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
+  name: "ScatterSub"
   input_arg {
-    name: "lr"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "l1"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "l2"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -27782,6 +48639,18 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
@@ -27792,33 +48661,26 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyProximalAdagrad"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
+  name: "ScatterSub"
   input_arg {
-    name: "lr"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "l1"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "l2"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -27841,6 +48703,17 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
@@ -27851,29 +48724,26 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyProximalGradientDescent"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
+  name: "ScatterSub"
   input_arg {
-    name: "alpha"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "l1"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "l2"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "delta"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -27882,18 +48752,31 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
@@ -27904,51 +48787,38 @@ op {
       b: false
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyProximalGradientDescent"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
+  name: "ScatterUpdate"
   input_arg {
-    name: "alpha"
+    name: "ref"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "l1"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
   input_arg {
-    name: "l2"
+    name: "updates"
     type_attr: "T"
   }
-  input_arg {
-    name: "delta"
+  output_arg {
+    name: "output_ref"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
+        type: DT_INT64
       }
     }
   }
@@ -27956,108 +48826,215 @@ op {
     name: "use_locking"
     type: "bool"
     default_value {
-      b: false
+      b: true
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceApplyRMSProp"
+  name: "SdcaFprint"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "input"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "output"
+    type: DT_INT64
   }
+}
+op {
+  name: "SdcaOptimizer"
   input_arg {
-    name: "ms"
-    type: DT_RESOURCE
+    name: "sparse_example_indices"
+    type: DT_INT64
+    number_attr: "num_sparse_features"
   }
   input_arg {
-    name: "mom"
-    type: DT_RESOURCE
+    name: "sparse_feature_indices"
+    type: DT_INT64
+    number_attr: "num_sparse_features"
   }
   input_arg {
-    name: "lr"
-    type_attr: "T"
+    name: "sparse_feature_values"
+    type: DT_FLOAT
+    number_attr: "num_sparse_features_with_values"
   }
   input_arg {
-    name: "rho"
-    type_attr: "T"
+    name: "dense_features"
+    type: DT_FLOAT
+    number_attr: "num_dense_features"
   }
   input_arg {
-    name: "momentum"
-    type_attr: "T"
+    name: "example_weights"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "epsilon"
-    type_attr: "T"
+    name: "example_labels"
+    type: DT_FLOAT
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "sparse_indices"
+    type: DT_INT64
+    number_attr: "num_sparse_features"
+  }
+  input_arg {
+    name: "sparse_weights"
+    type: DT_FLOAT
+    number_attr: "num_sparse_features"
+  }
+  input_arg {
+    name: "dense_weights"
+    type: DT_FLOAT
+    number_attr: "num_dense_features"
+  }
+  input_arg {
+    name: "example_state_data"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "out_example_state_data"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "out_delta_sparse_weights"
+    type: DT_FLOAT
+    number_attr: "num_sparse_features"
+  }
+  output_arg {
+    name: "out_delta_dense_weights"
+    type: DT_FLOAT
+    number_attr: "num_dense_features"
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "loss_type"
+    type: "string"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
+        s: "logistic_loss"
+        s: "squared_loss"
+        s: "hinge_loss"
+        s: "smooth_hinge_loss"
       }
     }
   }
   attr {
-    name: "use_locking"
+    name: "adaptative"
     type: "bool"
     default_value {
       b: false
     }
   }
-  is_stateful: true
+  attr {
+    name: "num_sparse_features"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "num_sparse_features_with_values"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "num_dense_features"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "l1"
+    type: "float"
+  }
+  attr {
+    name: "l2"
+    type: "float"
+  }
+  attr {
+    name: "num_loss_partitions"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "num_inner_iterations"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
 }
 op {
-  name: "ResourceApplyRMSProp"
+  name: "SdcaShrinkL1"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "weights"
+    type: DT_FLOAT
+    number_attr: "num_features"
+    is_ref: true
   }
-  input_arg {
-    name: "ms"
-    type: DT_RESOURCE
+  attr {
+    name: "num_features"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "l1"
+    type: "float"
+  }
+  attr {
+    name: "l2"
+    type: "float"
   }
+}
+op {
+  name: "SegmentMax"
   input_arg {
-    name: "mom"
-    type: DT_RESOURCE
+    name: "data"
+    type_attr: "T"
   }
   input_arg {
-    name: "lr"
+    name: "segment_ids"
+    type_attr: "Tindices"
+  }
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
-  input_arg {
-    name: "rho"
-    type_attr: "T"
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
+}
+op {
+  name: "SegmentMax"
   input_arg {
-    name: "momentum"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
-    name: "epsilon"
-    type_attr: "T"
+    name: "segment_ids"
+    type_attr: "Tindices"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -28067,17 +49044,12 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -28085,30 +49057,52 @@ op {
     }
   }
   attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceCountUpTo"
+  name: "SegmentMax"
   input_arg {
-    name: "resource"
-    type: DT_RESOURCE
+    name: "data"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "segment_ids"
+    type_attr: "Tindices"
   }
   output_arg {
     name: "output"
     type_attr: "T"
   }
   attr {
-    name: "limit"
-    type: "int"
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
   }
   attr {
-    name: "T"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
@@ -28117,32 +49111,40 @@ op {
       }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceGather"
+  name: "SegmentMax"
   input_arg {
-    name: "resource"
-    type: DT_RESOURCE
+    name: "data"
+    type_attr: "T"
   }
   input_arg {
-    name: "indices"
+    name: "segment_ids"
     type_attr: "Tindices"
   }
   output_arg {
     name: "output"
-    type_attr: "dtype"
-  }
-  attr {
-    name: "validate_indices"
-    type: "bool"
-    default_value {
-      b: true
-    }
+    type_attr: "T"
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
   attr {
     name: "Tindices"
@@ -28154,40 +49156,34 @@ op {
       }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceScatterAdd"
+  name: "SegmentMean"
   input_arg {
-    name: "resource"
-    type: DT_RESOURCE
+    name: "data"
+    type_attr: "T"
   }
   input_arg {
-    name: "indices"
+    name: "segment_ids"
     type_attr: "Tindices"
   }
-  input_arg {
-    name: "updates"
-    type_attr: "dtype"
+  output_arg {
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
     allowed_values {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_UINT16
         type: DT_HALF
       }
     }
@@ -28202,40 +49198,34 @@ op {
       }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceScatterAdd"
+  name: "SegmentMean"
   input_arg {
-    name: "resource"
-    type: DT_RESOURCE
+    name: "data"
+    type_attr: "T"
   }
   input_arg {
-    name: "indices"
+    name: "segment_ids"
     type_attr: "Tindices"
   }
-  input_arg {
-    name: "updates"
-    type_attr: "dtype"
+  output_arg {
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
     allowed_values {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -28252,43 +49242,38 @@ op {
       }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceScatterUpdate"
+  name: "SegmentMean"
   input_arg {
-    name: "resource"
-    type: DT_RESOURCE
+    name: "data"
+    type_attr: "T"
   }
   input_arg {
-    name: "indices"
+    name: "segment_ids"
     type_attr: "Tindices"
   }
-  input_arg {
-    name: "updates"
-    type_attr: "dtype"
+  output_arg {
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
     allowed_values {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -28302,42 +49287,21 @@ op {
       }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyAdadelta"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum_update"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
+  name: "SegmentMean"
   input_arg {
-    name: "rho"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
-    name: "epsilon"
-    type_attr: "T"
+    name: "segment_ids"
+    type_attr: "Tindices"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
   attr {
     name: "T"
     type: "type"
@@ -28345,18 +49309,16 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -28370,49 +49332,21 @@ op {
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyAdadelta"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum_update"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
+  name: "SegmentMin"
   input_arg {
-    name: "rho"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
-    name: "epsilon"
-    type_attr: "T"
+    name: "segment_ids"
+    type_attr: "Tindices"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
   attr {
     name: "T"
     type: "type"
@@ -28420,20 +49354,13 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_UINT16
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
@@ -28447,37 +49374,21 @@ op {
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyAdagrad"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
+  name: "SegmentMin"
   input_arg {
-    name: "grad"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
-    name: "indices"
+    name: "segment_ids"
     type_attr: "Tindices"
   }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
   attr {
     name: "T"
     type: "type"
@@ -28485,18 +49396,15 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_UINT16
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -28510,37 +49418,21 @@ op {
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyAdagrad"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
+  name: "SegmentMin"
   input_arg {
-    name: "grad"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
-    name: "indices"
+    name: "segment_ids"
     type_attr: "Tindices"
   }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
   attr {
     name: "T"
     type: "type"
@@ -28548,20 +49440,16 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -28575,53 +49463,21 @@ op {
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyAdagradDA"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "gradient_accumulator"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "gradient_squared_accumulator"
-    type: DT_RESOURCE
-  }
+  name: "SegmentMin"
   input_arg {
-    name: "grad"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
-    name: "indices"
+    name: "segment_ids"
     type_attr: "Tindices"
   }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l1"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l2"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
-  input_arg {
-    name: "global_step"
-    type: DT_INT64
-  }
   attr {
     name: "T"
     type: "type"
@@ -28629,18 +49485,16 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -28654,53 +49508,21 @@ op {
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyAdagradDA"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "gradient_accumulator"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "gradient_squared_accumulator"
-    type: DT_RESOURCE
-  }
+  name: "SegmentProd"
   input_arg {
-    name: "grad"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
-    name: "indices"
+    name: "segment_ids"
     type_attr: "Tindices"
   }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l1"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l2"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
-  input_arg {
-    name: "global_step"
-    type: DT_INT64
-  }
   attr {
     name: "T"
     type: "type"
@@ -28720,8 +49542,6 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
@@ -28735,57 +49555,21 @@ op {
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyCenteredRMSProp"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "mg"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "ms"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "mom"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "rho"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "momentum"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "epsilon"
-    type_attr: "T"
-  }
+  name: "SegmentProd"
   input_arg {
-    name: "grad"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
-    name: "indices"
+    name: "segment_ids"
     type_attr: "Tindices"
   }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
   attr {
     name: "T"
     type: "type"
@@ -28805,6 +49589,8 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -28818,57 +49604,21 @@ op {
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyCenteredRMSProp"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "mg"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "ms"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "mom"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "rho"
-    type_attr: "T"
-  }
+  name: "SegmentProd"
   input_arg {
-    name: "momentum"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
-    name: "epsilon"
-    type_attr: "T"
+    name: "segment_ids"
+    type_attr: "Tindices"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
   attr {
     name: "T"
     type: "type"
@@ -28890,6 +49640,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -28903,51 +49654,19 @@ op {
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyFtrl"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "linear"
-    type: DT_RESOURCE
-  }
+  name: "SegmentProd"
   input_arg {
-    name: "grad"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
-    name: "indices"
+    name: "segment_ids"
     type_attr: "Tindices"
   }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l1"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l2"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "lr_power"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -28957,18 +49676,21 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -28982,51 +49704,19 @@ op {
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyFtrl"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "linear"
-    type: DT_RESOURCE
-  }
+  name: "SegmentSum"
   input_arg {
-    name: "grad"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
-    name: "indices"
+    name: "segment_ids"
     type_attr: "Tindices"
   }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l1"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l2"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "lr_power"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -29048,8 +49738,6 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
@@ -29063,55 +49751,19 @@ op {
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyFtrlV2"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "linear"
-    type: DT_RESOURCE
-  }
+  name: "SegmentSum"
   input_arg {
-    name: "grad"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
-    name: "indices"
+    name: "segment_ids"
     type_attr: "Tindices"
   }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l1"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l2"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l2_shrinkage"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "lr_power"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -29133,6 +49785,8 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -29146,55 +49800,19 @@ op {
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyFtrlV2"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "linear"
-    type: DT_RESOURCE
-  }
+  name: "SegmentSum"
   input_arg {
-    name: "grad"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
-    name: "indices"
+    name: "segment_ids"
     type_attr: "Tindices"
   }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l1"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l2"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l2_shrinkage"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "lr_power"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -29218,6 +49836,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -29231,39 +49850,19 @@ op {
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyMomentum"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
+  name: "SegmentSum"
   input_arg {
-    name: "grad"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
-    name: "indices"
+    name: "segment_ids"
     type_attr: "Tindices"
   }
-  input_arg {
-    name: "momentum"
+  output_arg {
+    name: "output"
     type_attr: "T"
   }
   attr {
@@ -29273,18 +49872,21 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -29298,46 +49900,177 @@ op {
       }
     }
   }
+}
+op {
+  name: "Select"
+  input_arg {
+    name: "condition"
+    type: DT_BOOL
+  }
+  input_arg {
+    name: "t"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "e"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
   attr {
-    name: "use_locking"
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "SelfAdjointEig"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+      }
+    }
+  }
+  deprecation {
+    version: 11
+  }
+}
+op {
+  name: "SelfAdjointEigV2"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "e"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "v"
+    type_attr: "T"
+  }
+  attr {
+    name: "compute_v"
     type: "bool"
     default_value {
-      b: false
+      b: true
     }
   }
   attr {
-    name: "use_nesterov"
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+op {
+  name: "SelfAdjointEigV2"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "e"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "v"
+    type_attr: "T"
+  }
+  attr {
+    name: "compute_v"
     type: "bool"
     default_value {
-      b: false
+      b: true
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyMomentum"
+  name: "Selu"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "features"
+    type_attr: "T"
   }
-  input_arg {
-    name: "accum"
-    type: DT_RESOURCE
+  output_arg {
+    name: "activations"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
+}
+op {
+  name: "Selu"
   input_arg {
-    name: "lr"
+    name: "features"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "activations"
     type_attr: "T"
   }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "SeluGrad"
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "gradients"
+    type_attr: "T"
   }
   input_arg {
-    name: "momentum"
+    name: "outputs"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "backprops"
     type_attr: "T"
   }
   attr {
@@ -29345,246 +50078,246 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
+}
+op {
+  name: "SeluGrad"
+  input_arg {
+    name: "gradients"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "outputs"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "backprops"
+    type_attr: "T"
+  }
   attr {
-    name: "Tindices"
+    name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
+}
+op {
+  name: "SerializeIterator"
+  input_arg {
+    name: "resource_handle"
+    type: DT_RESOURCE
   }
-  attr {
-    name: "use_nesterov"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  output_arg {
+    name: "serialized"
+    type: DT_VARIANT
   }
   is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyProximalAdagrad"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
+  name: "SerializeManySparse"
   input_arg {
-    name: "accum"
-    type: DT_RESOURCE
+    name: "sparse_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "lr"
+    name: "sparse_values"
     type_attr: "T"
   }
   input_arg {
-    name: "l1"
-    type_attr: "T"
+    name: "sparse_shape"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "serialized_sparse"
+    type: DT_STRING
+  }
+  attr {
+    name: "T"
+    type: "type"
   }
+}
+op {
+  name: "SerializeManySparse"
   input_arg {
-    name: "l2"
-    type_attr: "T"
+    name: "sparse_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "grad"
+    name: "sparse_values"
     type_attr: "T"
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "sparse_shape"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "serialized_sparse"
+    type_attr: "out_type"
   }
   attr {
     name: "T"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-      }
-    }
   }
   attr {
-    name: "Tindices"
+    name: "out_type"
     type: "type"
+    default_value {
+      type: DT_STRING
+    }
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_STRING
+        type: DT_VARIANT
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyProximalAdagrad"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
+  name: "SerializeSparse"
   input_arg {
-    name: "accum"
-    type: DT_RESOURCE
+    name: "sparse_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "lr"
+    name: "sparse_values"
     type_attr: "T"
   }
   input_arg {
-    name: "l1"
-    type_attr: "T"
+    name: "sparse_shape"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "serialized_sparse"
+    type: DT_STRING
   }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "SerializeSparse"
   input_arg {
-    name: "l2"
-    type_attr: "T"
+    name: "sparse_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "grad"
+    name: "sparse_values"
     type_attr: "T"
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "sparse_shape"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "serialized_sparse"
+    type_attr: "out_type"
   }
   attr {
     name: "T"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
   }
   attr {
-    name: "Tindices"
+    name: "out_type"
     type: "type"
+    default_value {
+      type: DT_STRING
+    }
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_STRING
+        type: DT_VARIANT
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyProximalGradientDescent"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
+  name: "SerializeTensor"
   input_arg {
-    name: "alpha"
+    name: "tensor"
     type_attr: "T"
   }
-  input_arg {
-    name: "l1"
-    type_attr: "T"
+  output_arg {
+    name: "serialized"
+    type: DT_STRING
+  }
+  attr {
+    name: "T"
+    type: "type"
   }
+}
+op {
+  name: "SetSize"
   input_arg {
-    name: "l2"
-    type_attr: "T"
+    name: "set_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "grad"
+    name: "set_values"
     type_attr: "T"
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "set_shape"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  attr {
+    name: "validate_indices"
+    type: "bool"
+    default_value {
+      b: true
+    }
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
+        type: DT_INT8
+        type: DT_INT16
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
         type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
+        type: DT_STRING
       }
     }
   }
+}
+op {
+  name: "Shape"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "out_type"
+  }
   attr {
-    name: "Tindices"
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "out_type"
     type: "type"
+    default_value {
+      type: DT_INT32
+    }
     allowed_values {
       list {
         type: DT_INT32
@@ -29592,68 +50325,35 @@ op {
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyProximalGradientDescent"
-  input_arg {
-    name: "var"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "alpha"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "l1"
-    type_attr: "T"
-  }
+  name: "ShapeN"
   input_arg {
-    name: "l2"
+    name: "input"
     type_attr: "T"
+    number_attr: "N"
   }
-  input_arg {
-    name: "grad"
-    type_attr: "T"
+  output_arg {
+    name: "output"
+    type_attr: "out_type"
+    number_attr: "N"
   }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 1
   }
   attr {
     name: "T"
     type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
   }
   attr {
-    name: "Tindices"
+    name: "out_type"
     type: "type"
+    default_value {
+      type: DT_INT32
+    }
     allowed_values {
       list {
         type: DT_INT32
@@ -29661,425 +50361,437 @@ op {
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyRMSProp"
+  name: "ShardedFilename"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "basename"
+    type: DT_STRING
   }
   input_arg {
-    name: "ms"
-    type: DT_RESOURCE
+    name: "shard"
+    type: DT_INT32
   }
   input_arg {
-    name: "mom"
-    type: DT_RESOURCE
+    name: "num_shards"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "filename"
+    type: DT_STRING
   }
+}
+op {
+  name: "ShardedFilespec"
   input_arg {
-    name: "lr"
-    type_attr: "T"
+    name: "basename"
+    type: DT_STRING
   }
   input_arg {
-    name: "rho"
-    type_attr: "T"
+    name: "num_shards"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "filename"
+    type: DT_STRING
   }
+}
+op {
+  name: "ShuffleAndRepeatDataset"
   input_arg {
-    name: "momentum"
-    type_attr: "T"
+    name: "input_dataset"
+    type: DT_VARIANT
   }
   input_arg {
-    name: "epsilon"
-    type_attr: "T"
+    name: "buffer_size"
+    type: DT_INT64
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "seed"
+    type: DT_INT64
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "seed2"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "count"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-      }
-    }
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
   attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "ShuffleDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "buffer_size"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "seed"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "seed2"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
   is_stateful: true
 }
 op {
-  name: "ResourceSparseApplyRMSProp"
+  name: "ShuffleDataset"
   input_arg {
-    name: "var"
-    type: DT_RESOURCE
+    name: "input_dataset"
+    type: DT_VARIANT
   }
   input_arg {
-    name: "ms"
-    type: DT_RESOURCE
+    name: "buffer_size"
+    type: DT_INT64
   }
   input_arg {
-    name: "mom"
-    type: DT_RESOURCE
+    name: "seed"
+    type: DT_INT64
   }
   input_arg {
-    name: "lr"
-    type_attr: "T"
+    name: "seed2"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
   }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "ShuffleDataset"
   input_arg {
-    name: "rho"
-    type_attr: "T"
+    name: "input_dataset"
+    type: DT_VARIANT
   }
   input_arg {
-    name: "momentum"
-    type_attr: "T"
+    name: "buffer_size"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "seed"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "seed2"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "reshuffle_each_iteration"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
   }
+}
+op {
+  name: "Sigmoid"
   input_arg {
-    name: "epsilon"
+    name: "x"
     type_attr: "T"
   }
-  input_arg {
-    name: "grad"
+  output_arg {
+    name: "y"
     type_attr: "T"
   }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
+}
+op {
+  name: "Sigmoid"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
   attr {
-    name: "Tindices"
+    name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "ResourceStridedSliceAssign"
-  input_arg {
-    name: "ref"
-    type: DT_RESOURCE
-  }
-  input_arg {
-    name: "begin"
-    type_attr: "Index"
-  }
+  name: "SigmoidGrad"
   input_arg {
-    name: "end"
-    type_attr: "Index"
+    name: "x"
+    type_attr: "T"
   }
   input_arg {
-    name: "strides"
-    type_attr: "Index"
+    name: "y"
+    type_attr: "T"
   }
-  input_arg {
-    name: "value"
+  output_arg {
+    name: "z"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "Index"
-    type: "type"
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
-  attr {
-    name: "begin_mask"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "end_mask"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "ellipsis_mask"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "new_axis_mask"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "shrink_axis_mask"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "Restore"
+  name: "SigmoidGrad"
   input_arg {
-    name: "file_pattern"
-    type: DT_STRING
+    name: "y"
+    type_attr: "T"
   }
   input_arg {
-    name: "tensor_name"
-    type: DT_STRING
+    name: "dy"
+    type_attr: "T"
   }
   output_arg {
-    name: "tensor"
-    type_attr: "dt"
+    name: "z"
+    type_attr: "T"
   }
   attr {
-    name: "dt"
+    name: "T"
     type: "type"
-  }
-  attr {
-    name: "preferred_shard"
-    type: "int"
-    default_value {
-      i: -1
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
     }
   }
 }
 op {
-  name: "Restore"
+  name: "SigmoidGrad"
   input_arg {
-    name: "file_pattern"
-    type: DT_STRING
+    name: "y"
+    type_attr: "T"
   }
   input_arg {
-    name: "tensor_name"
-    type: DT_STRING
+    name: "dy"
+    type_attr: "T"
   }
   output_arg {
-    name: "tensor"
-    type_attr: "dt"
+    name: "z"
+    type_attr: "T"
   }
   attr {
-    name: "dt"
+    name: "T"
     type: "type"
-  }
-  attr {
-    name: "preferred_shard"
-    type: "int"
-    default_value {
-      i: -1
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "RestoreSlice"
-  input_arg {
-    name: "file_pattern"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "tensor_name"
-    type: DT_STRING
-  }
+  name: "Sign"
   input_arg {
-    name: "shape_and_slice"
-    type: DT_STRING
+    name: "x"
+    type_attr: "T"
   }
   output_arg {
-    name: "tensor"
-    type_attr: "dt"
+    name: "y"
+    type_attr: "T"
   }
   attr {
-    name: "dt"
+    name: "T"
     type: "type"
-  }
-  attr {
-    name: "preferred_shard"
-    type: "int"
-    default_value {
-      i: -1
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
     }
   }
 }
 op {
-  name: "RestoreSlice"
-  input_arg {
-    name: "file_pattern"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "tensor_name"
-    type: DT_STRING
-  }
+  name: "Sign"
   input_arg {
-    name: "shape_and_slice"
-    type: DT_STRING
+    name: "x"
+    type_attr: "T"
   }
   output_arg {
-    name: "tensor"
-    type_attr: "dt"
+    name: "y"
+    type_attr: "T"
   }
   attr {
-    name: "dt"
+    name: "T"
     type: "type"
-  }
-  attr {
-    name: "preferred_shard"
-    type: "int"
-    default_value {
-      i: -1
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
     }
   }
-  is_stateful: true
 }
 op {
-  name: "RestoreV2"
-  input_arg {
-    name: "prefix"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "tensor_names"
-    type: DT_STRING
-  }
+  name: "Sin"
   input_arg {
-    name: "shape_and_slices"
-    type: DT_STRING
+    name: "x"
+    type_attr: "T"
   }
   output_arg {
-    name: "tensors"
-    type_list_attr: "dtypes"
+    name: "y"
+    type_attr: "T"
   }
   attr {
-    name: "dtypes"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
   }
 }
 op {
-  name: "RestoreV2"
-  input_arg {
-    name: "prefix"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "tensor_names"
-    type: DT_STRING
-  }
+  name: "Sin"
   input_arg {
-    name: "shape_and_slices"
-    type: DT_STRING
+    name: "x"
+    type_attr: "T"
   }
   output_arg {
-    name: "tensors"
-    type_list_attr: "dtypes"
+    name: "y"
+    type_attr: "T"
   }
   attr {
-    name: "dtypes"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
   }
-  is_stateful: true
 }
 op {
-  name: "Reverse"
+  name: "Sinh"
   input_arg {
-    name: "tensor"
+    name: "x"
     type_attr: "T"
   }
-  input_arg {
-    name: "dims"
-    type: DT_BOOL
-  }
   output_arg {
-    name: "output"
+    name: "y"
     type_attr: "T"
   }
   attr {
@@ -30087,11 +50799,6 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_INT8
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_BOOL
         type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
@@ -30102,17 +50809,13 @@ op {
   }
 }
 op {
-  name: "Reverse"
+  name: "Sinh"
   input_arg {
-    name: "tensor"
+    name: "x"
     type_attr: "T"
   }
-  input_arg {
-    name: "dims"
-    type: DT_BOOL
-  }
   output_arg {
-    name: "output"
+    name: "y"
     type_attr: "T"
   }
   attr {
@@ -30120,92 +50823,188 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_INT8
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_BOOL
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
         type: DT_COMPLEX128
-        type: DT_STRING
       }
     }
   }
 }
 op {
-  name: "Reverse"
+  name: "Size"
   input_arg {
-    name: "tensor"
+    name: "input"
     type_attr: "T"
   }
-  input_arg {
-    name: "dims"
-    type: DT_BOOL
-  }
   output_arg {
     name: "output"
-    type_attr: "T"
+    type_attr: "out_type"
   }
   attr {
     name: "T"
     type: "type"
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_INT16
         type: DT_INT32
         type: DT_INT64
-        type: DT_BOOL
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_STRING
       }
     }
   }
 }
 op {
-  name: "ReverseSequence"
+  name: "SkipDataset"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "input_dataset"
+    type: DT_VARIANT
   }
   input_arg {
-    name: "seq_lengths"
-    type_attr: "Tlen"
+    name: "count"
+    type: DT_INT64
   }
   output_arg {
-    name: "output"
-    type_attr: "T"
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "seq_dim"
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
+op {
+  name: "SkipDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "count"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "Skipgram"
+  output_arg {
+    name: "vocab_word"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "vocab_freq"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "words_per_epoch"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "current_epoch"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "total_words_processed"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "examples"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "labels"
+    type: DT_INT32
+  }
+  attr {
+    name: "filename"
+    type: "string"
+  }
+  attr {
+    name: "batch_size"
     type: "int"
   }
   attr {
-    name: "batch_dim"
+    name: "window_size"
     type: "int"
     default_value {
-      i: 0
+      i: 5
+    }
+  }
+  attr {
+    name: "min_count"
+    type: "int"
+    default_value {
+      i: 5
+    }
+  }
+  attr {
+    name: "subsample"
+    type: "float"
+    default_value {
+      f: 0.001
     }
   }
+  deprecation {
+    version: 19
+  }
+  is_stateful: true
+}
+op {
+  name: "Slice"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "begin"
+    type_attr: "Index"
+  }
+  input_arg {
+    name: "size"
+    type_attr: "Index"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
   attr {
     name: "T"
     type: "type"
   }
   attr {
-    name: "Tlen"
+    name: "Index"
     type: "type"
-    default_value {
-      type: DT_INT64
-    }
     allowed_values {
       list {
         type: DT_INT32
@@ -30215,159 +51014,134 @@ op {
   }
 }
 op {
-  name: "ReverseV2"
+  name: "Snapshot"
   input_arg {
-    name: "tensor"
+    name: "input"
     type_attr: "T"
   }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "Softmax"
   input_arg {
-    name: "axis"
-    type_attr: "Tidx"
+    name: "logits"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "softmax"
     type_attr: "T"
   }
   attr {
-    name: "Tidx"
+    name: "T"
     type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
-        type: DT_INT32
-        type: DT_INT64
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
+}
+op {
+  name: "Softmax"
+  input_arg {
+    name: "logits"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "softmax"
+    type_attr: "T"
+  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_INT8
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_BOOL
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
       }
     }
   }
 }
 op {
-  name: "ReverseV2"
+  name: "SoftmaxCrossEntropyWithLogits"
   input_arg {
-    name: "tensor"
+    name: "features"
     type_attr: "T"
   }
   input_arg {
-    name: "axis"
-    type_attr: "Tidx"
+    name: "labels"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "loss"
     type_attr: "T"
   }
-  attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+  output_arg {
+    name: "backprop"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_INT8
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_BOOL
         type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_STRING
       }
     }
   }
 }
 op {
-  name: "ReverseV2"
+  name: "SoftmaxCrossEntropyWithLogits"
   input_arg {
-    name: "tensor"
+    name: "features"
     type_attr: "T"
   }
   input_arg {
-    name: "axis"
-    type_attr: "Tidx"
+    name: "labels"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "loss"
     type_attr: "T"
   }
-  attr {
-    name: "Tidx"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+  output_arg {
+    name: "backprop"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_BOOL
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_STRING
       }
     }
   }
 }
 op {
-  name: "RightShift"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
+  name: "Softplus"
   input_arg {
-    name: "y"
+    name: "features"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
+    name: "activations"
     type_attr: "T"
   }
   attr {
@@ -30375,27 +51149,27 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_INT8
-        type: DT_INT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
         type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
         type: DT_UINT16
-        type: DT_UINT32
-        type: DT_UINT64
+        type: DT_HALF
       }
     }
   }
-  is_commutative: true
 }
 op {
-  name: "Rint"
+  name: "Softplus"
   input_arg {
-    name: "x"
+    name: "features"
     type_attr: "T"
   }
   output_arg {
-    name: "y"
+    name: "activations"
     type_attr: "T"
   }
   attr {
@@ -30405,18 +51179,27 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
 }
 op {
-  name: "Round"
+  name: "Softplus"
   input_arg {
-    name: "x"
+    name: "features"
     type_attr: "T"
   }
   output_arg {
-    name: "y"
+    name: "activations"
     type_attr: "T"
   }
   attr {
@@ -30424,25 +51207,30 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
 }
 op {
-  name: "Rsqrt"
+  name: "Softplus"
   input_arg {
-    name: "x"
+    name: "features"
     type_attr: "T"
   }
   output_arg {
-    name: "y"
+    name: "activations"
     type_attr: "T"
   }
   attr {
@@ -30450,27 +51238,34 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
 }
 op {
-  name: "RsqrtGrad"
+  name: "SoftplusGrad"
   input_arg {
-    name: "x"
+    name: "gradients"
     type_attr: "T"
   }
   input_arg {
-    name: "y"
+    name: "features"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
+    name: "backprops"
     type_attr: "T"
   }
   attr {
@@ -30478,27 +51273,31 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
       }
     }
   }
 }
 op {
-  name: "RsqrtGrad"
+  name: "SoftplusGrad"
   input_arg {
-    name: "y"
+    name: "gradients"
     type_attr: "T"
   }
   input_arg {
-    name: "dy"
+    name: "features"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
+    name: "backprops"
     type_attr: "T"
   }
   attr {
@@ -30506,354 +51305,290 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
 }
 op {
-  name: "SampleDistortedBoundingBox"
+  name: "SoftplusGrad"
   input_arg {
-    name: "image_size"
+    name: "gradients"
     type_attr: "T"
   }
   input_arg {
-    name: "bounding_boxes"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "begin"
+    name: "features"
     type_attr: "T"
   }
   output_arg {
-    name: "size"
+    name: "backprops"
     type_attr: "T"
   }
-  output_arg {
-    name: "bboxes"
-    type: DT_FLOAT
-  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_UINT8
-        type: DT_INT8
-        type: DT_INT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "min_object_covered"
-    type: "float"
-    default_value {
-      f: 0.1
-    }
-  }
-  attr {
-    name: "aspect_ratio_range"
-    type: "list(float)"
-    default_value {
-      list {
-        f: 0.75
-        f: 1.33
-      }
-    }
-  }
-  attr {
-    name: "area_range"
-    type: "list(float)"
-    default_value {
-      list {
-        f: 0.05
-        f: 1
-      }
-    }
-  }
-  attr {
-    name: "max_attempts"
-    type: "int"
-    default_value {
-      i: 100
-    }
-  }
-  attr {
-    name: "use_image_if_no_bounding_boxes"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "SampleDistortedBoundingBoxV2"
+  name: "SoftplusGrad"
   input_arg {
-    name: "image_size"
+    name: "gradients"
     type_attr: "T"
   }
   input_arg {
-    name: "bounding_boxes"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "min_object_covered"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "begin"
+    name: "features"
     type_attr: "T"
   }
   output_arg {
-    name: "size"
+    name: "backprops"
     type_attr: "T"
   }
-  output_arg {
-    name: "bboxes"
-    type: DT_FLOAT
-  }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
         type: DT_UINT8
-        type: DT_INT8
         type: DT_INT16
-        type: DT_INT32
+        type: DT_INT8
         type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "aspect_ratio_range"
-    type: "list(float)"
-    default_value {
-      list {
-        f: 0.75
-        f: 1.33
-      }
-    }
-  }
-  attr {
-    name: "area_range"
-    type: "list(float)"
-    default_value {
-      list {
-        f: 0.05
-        f: 1
-      }
-    }
-  }
-  attr {
-    name: "max_attempts"
-    type: "int"
-    default_value {
-      i: 100
-    }
-  }
-  attr {
-    name: "use_image_if_no_bounding_boxes"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "Save"
-  input_arg {
-    name: "filename"
-    type: DT_STRING
-  }
+  name: "Softsign"
   input_arg {
-    name: "tensor_names"
-    type: DT_STRING
+    name: "features"
+    type_attr: "T"
   }
-  input_arg {
-    name: "data"
-    type_list_attr: "T"
+  output_arg {
+    name: "activations"
+    type_attr: "T"
   }
   attr {
     name: "T"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
   }
 }
 op {
-  name: "Save"
-  input_arg {
-    name: "filename"
-    type: DT_STRING
-  }
+  name: "Softsign"
   input_arg {
-    name: "tensor_names"
-    type: DT_STRING
+    name: "features"
+    type_attr: "T"
   }
-  input_arg {
-    name: "data"
-    type_list_attr: "T"
+  output_arg {
+    name: "activations"
+    type_attr: "T"
   }
   attr {
     name: "T"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
-  is_stateful: true
 }
 op {
-  name: "SaveSlices"
-  input_arg {
-    name: "filename"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "tensor_names"
-    type: DT_STRING
-  }
+  name: "Softsign"
   input_arg {
-    name: "shapes_and_slices"
-    type: DT_STRING
+    name: "features"
+    type_attr: "T"
   }
-  input_arg {
-    name: "data"
-    type_list_attr: "T"
+  output_arg {
+    name: "activations"
+    type_attr: "T"
   }
   attr {
     name: "T"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
   }
 }
 op {
-  name: "SaveSlices"
-  input_arg {
-    name: "filename"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "tensor_names"
-    type: DT_STRING
-  }
+  name: "Softsign"
   input_arg {
-    name: "shapes_and_slices"
-    type: DT_STRING
+    name: "features"
+    type_attr: "T"
   }
-  input_arg {
-    name: "data"
-    type_list_attr: "T"
+  output_arg {
+    name: "activations"
+    type_attr: "T"
   }
   attr {
     name: "T"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
-  is_stateful: true
 }
 op {
-  name: "SaveV2"
-  input_arg {
-    name: "prefix"
-    type: DT_STRING
-  }
+  name: "SoftsignGrad"
   input_arg {
-    name: "tensor_names"
-    type: DT_STRING
+    name: "gradients"
+    type_attr: "T"
   }
   input_arg {
-    name: "shape_and_slices"
-    type: DT_STRING
+    name: "features"
+    type_attr: "T"
   }
-  input_arg {
-    name: "tensors"
-    type_list_attr: "dtypes"
+  output_arg {
+    name: "backprops"
+    type_attr: "T"
   }
   attr {
-    name: "dtypes"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
   }
 }
 op {
-  name: "SaveV2"
-  input_arg {
-    name: "prefix"
-    type: DT_STRING
-  }
+  name: "SoftsignGrad"
   input_arg {
-    name: "tensor_names"
-    type: DT_STRING
+    name: "gradients"
+    type_attr: "T"
   }
   input_arg {
-    name: "shape_and_slices"
-    type: DT_STRING
+    name: "features"
+    type_attr: "T"
   }
-  input_arg {
-    name: "tensors"
-    type_list_attr: "dtypes"
+  output_arg {
+    name: "backprops"
+    type_attr: "T"
   }
   attr {
-    name: "dtypes"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
-  is_stateful: true
 }
 op {
-  name: "ScalarSummary"
+  name: "SoftsignGrad"
   input_arg {
-    name: "tags"
-    type: DT_STRING
+    name: "gradients"
+    type_attr: "T"
   }
   input_arg {
-    name: "values"
+    name: "features"
     type_attr: "T"
   }
   output_arg {
-    name: "summary"
-    type: DT_STRING
+    name: "backprops"
+    type_attr: "T"
   }
   attr {
     name: "T"
@@ -30869,23 +51604,26 @@ op {
         type: DT_INT8
         type: DT_UINT16
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
 }
 op {
-  name: "ScalarSummary"
+  name: "SoftsignGrad"
   input_arg {
-    name: "tags"
-    type: DT_STRING
+    name: "gradients"
+    type_attr: "T"
   }
   input_arg {
-    name: "values"
+    name: "features"
     type_attr: "T"
   }
   output_arg {
-    name: "summary"
-    type: DT_STRING
+    name: "backprops"
+    type_attr: "T"
   }
   attr {
     name: "T"
@@ -30895,10 +51633,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -30908,74 +51647,174 @@ op {
   }
 }
 op {
-  name: "ScanDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
+  name: "SpaceToBatch"
   input_arg {
-    name: "initial_state"
-    type_list_attr: "Tstate"
+    name: "input"
+    type_attr: "T"
   }
   input_arg {
-    name: "other_arguments"
-    type_list_attr: "Targuments"
+    name: "paddings"
+    type_attr: "Tpaddings"
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "f"
-    type: "func"
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "Tstate"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "Tpaddings"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
   attr {
-    name: "Targuments"
-    type: "list(type)"
+    name: "block_size"
+    type: "int"
     has_minimum: true
+    minimum: 2
+  }
+}
+op {
+  name: "SpaceToBatchND"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "block_shape"
+    type_attr: "Tblock_shape"
+  }
+  input_arg {
+    name: "paddings"
+    type_attr: "Tpaddings"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "Tblock_shape"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tpaddings"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
 }
 op {
-  name: "ScatterAdd"
+  name: "SpaceToDepth"
   input_arg {
-    name: "ref"
+    name: "input"
     type_attr: "T"
-    is_ref: true
   }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
   }
+  attr {
+    name: "block_size"
+    type: "int"
+    has_minimum: true
+    minimum: 2
+  }
+}
+op {
+  name: "SpaceToDepth"
   input_arg {
-    name: "updates"
+    name: "input"
     type_attr: "T"
   }
   output_arg {
-    name: "output_ref"
+    name: "output"
     type_attr: "T"
-    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
+  }
+  attr {
+    name: "block_size"
+    type: "int"
+    has_minimum: true
+    minimum: 2
+  }
+  attr {
+    name: "data_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+    allowed_values {
+      list {
+        s: "NHWC"
+        s: "NCHW"
+        s: "NCHW_VECT_C"
+      }
+    }
+  }
+}
+op {
+  name: "SparseAccumulatorApplyGradient"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "local_step"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "gradient_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "gradient_values"
+    type_attr: "dtype"
+  }
+  input_arg {
+    name: "gradient_shape"
+    type: DT_INT64
+  }
+  attr {
+    name: "dtype"
+    type: "type"
     allowed_values {
       list {
         type: DT_FLOAT
@@ -30996,45 +51835,87 @@ op {
     }
   }
   attr {
-    name: "Tindices"
+    name: "has_known_shape"
+    type: "bool"
+  }
+}
+op {
+  name: "SparseAccumulatorApplyGradient"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "local_step"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "gradient_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "gradient_values"
+    type_attr: "dtype"
+  }
+  input_arg {
+    name: "gradient_shape"
+    type: DT_INT64
+  }
+  attr {
+    name: "dtype"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT32
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "use_locking"
+    name: "has_known_shape"
     type: "bool"
-    default_value {
-      b: false
-    }
   }
 }
 op {
-  name: "ScatterAdd"
+  name: "SparseAccumulatorApplyGradient"
   input_arg {
-    name: "ref"
-    type_attr: "T"
+    name: "handle"
+    type: DT_STRING
     is_ref: true
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "local_step"
+    type: DT_INT64
   }
   input_arg {
-    name: "updates"
-    type_attr: "T"
+    name: "gradient_indices"
+    type: DT_INT64
   }
-  output_arg {
-    name: "output_ref"
-    type_attr: "T"
-    is_ref: true
+  input_arg {
+    name: "gradient_values"
+    type_attr: "dtype"
+  }
+  input_arg {
+    name: "gradient_shape"
+    type: DT_INT64
   }
   attr {
-    name: "T"
+    name: "dtype"
     type: "type"
     allowed_values {
       list {
@@ -31054,49 +51935,93 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "Tindices"
+    name: "has_known_shape"
+    type: "bool"
+  }
+}
+op {
+  name: "SparseAccumulatorApplyGradient"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "local_step"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "gradient_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "gradient_values"
+    type_attr: "dtype"
+  }
+  input_arg {
+    name: "gradient_shape"
+    type: DT_INT64
+  }
+  attr {
+    name: "dtype"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
         type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "use_locking"
+    name: "has_known_shape"
     type: "bool"
-    default_value {
-      b: false
-    }
   }
 }
 op {
-  name: "ScatterDiv"
+  name: "SparseAccumulatorTakeGradient"
   input_arg {
-    name: "ref"
-    type_attr: "T"
+    name: "handle"
+    type: DT_STRING
     is_ref: true
   }
   input_arg {
+    name: "num_required"
+    type: DT_INT32
+  }
+  output_arg {
     name: "indices"
-    type_attr: "Tindices"
+    type: DT_INT64
   }
-  input_arg {
-    name: "updates"
-    type_attr: "T"
+  output_arg {
+    name: "values"
+    type_attr: "dtype"
   }
   output_arg {
-    name: "output_ref"
-    type_attr: "T"
-    is_ref: true
+    name: "shape"
+    type: DT_INT64
   }
   attr {
-    name: "T"
+    name: "dtype"
     type: "type"
     allowed_values {
       list {
@@ -31117,46 +52042,32 @@ op {
       }
     }
   }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "ScatterDiv"
+  name: "SparseAccumulatorTakeGradient"
   input_arg {
-    name: "ref"
-    type_attr: "T"
+    name: "handle"
+    type: DT_STRING
     is_ref: true
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "num_required"
+    type: DT_INT32
   }
-  input_arg {
-    name: "updates"
-    type_attr: "T"
+  output_arg {
+    name: "indices"
+    type: DT_INT64
   }
   output_arg {
-    name: "output_ref"
-    type_attr: "T"
-    is_ref: true
+    name: "values"
+    type_attr: "dtype"
+  }
+  output_arg {
+    name: "shape"
+    type: DT_INT64
   }
   attr {
-    name: "T"
+    name: "dtype"
     type: "type"
     allowed_values {
       list {
@@ -31179,46 +52090,32 @@ op {
       }
     }
   }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "ScatterMul"
+  name: "SparseAccumulatorTakeGradient"
   input_arg {
-    name: "ref"
-    type_attr: "T"
+    name: "handle"
+    type: DT_STRING
     is_ref: true
   }
   input_arg {
+    name: "num_required"
+    type: DT_INT32
+  }
+  output_arg {
     name: "indices"
-    type_attr: "Tindices"
+    type: DT_INT64
   }
-  input_arg {
-    name: "updates"
-    type_attr: "T"
+  output_arg {
+    name: "values"
+    type_attr: "dtype"
   }
   output_arg {
-    name: "output_ref"
-    type_attr: "T"
-    is_ref: true
+    name: "shape"
+    type: DT_INT64
   }
   attr {
-    name: "T"
+    name: "dtype"
     type: "type"
     allowed_values {
       list {
@@ -31236,46 +52133,103 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
+}
+op {
+  name: "SparseAccumulatorTakeGradient"
+  input_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  input_arg {
+    name: "num_required"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "values"
+    type_attr: "dtype"
+  }
+  output_arg {
+    name: "shape"
+    type: DT_INT64
+  }
   attr {
-    name: "Tindices"
+    name: "dtype"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
         type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "ScatterMul"
+  name: "SparseAdd"
   input_arg {
-    name: "ref"
+    name: "a_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "a_values"
     type_attr: "T"
-    is_ref: true
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "a_shape"
+    type: DT_INT64
   }
   input_arg {
-    name: "updates"
+    name: "b_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "b_values"
     type_attr: "T"
   }
+  input_arg {
+    name: "b_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "thresh"
+    type_attr: "Treal"
+  }
   output_arg {
-    name: "output_ref"
+    name: "sum_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "sum_values"
     type_attr: "T"
-    is_ref: true
+  }
+  output_arg {
+    name: "sum_shape"
+    type: DT_INT64
   }
   attr {
     name: "T"
@@ -31296,81 +52250,68 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "Tindices"
+    name: "Treal"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "ScatterNd"
+  name: "SparseAdd"
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "a_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "updates"
+    name: "a_values"
     type_attr: "T"
   }
   input_arg {
-    name: "shape"
-    type_attr: "Tindices"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "T"
-    type: "type"
+    name: "a_shape"
+    type: DT_INT64
   }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+  input_arg {
+    name: "b_indices"
+    type: DT_INT64
   }
-}
-op {
-  name: "ScatterNdAdd"
   input_arg {
-    name: "ref"
+    name: "b_values"
     type_attr: "T"
-    is_ref: true
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "b_shape"
+    type: DT_INT64
   }
   input_arg {
-    name: "updates"
-    type_attr: "T"
+    name: "thresh"
+    type_attr: "Treal"
   }
   output_arg {
-    name: "output_ref"
+    name: "sum_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "sum_values"
     type_attr: "T"
-    is_ref: true
+  }
+  output_arg {
+    name: "sum_shape"
+    type: DT_INT64
   }
   attr {
     name: "T"
@@ -31391,46 +52332,72 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "Tindices"
+    name: "Treal"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "ScatterNdAdd"
+  name: "SparseAdd"
   input_arg {
-    name: "ref"
+    name: "a_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "a_values"
     type_attr: "T"
-    is_ref: true
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "a_shape"
+    type: DT_INT64
   }
   input_arg {
-    name: "updates"
+    name: "b_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "b_values"
     type_attr: "T"
   }
+  input_arg {
+    name: "b_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "thresh"
+    type_attr: "Treal"
+  }
   output_arg {
-    name: "output_ref"
+    name: "sum_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "sum_values"
     type_attr: "T"
-    is_ref: true
+  }
+  output_arg {
+    name: "sum_shape"
+    type: DT_INT64
   }
   attr {
     name: "T"
@@ -31453,45 +52420,73 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "Tindices"
+    name: "Treal"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "ScatterNdNonAliasingAdd"
+  name: "SparseAdd"
   input_arg {
-    name: "input"
+    name: "a_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "a_values"
     type_attr: "T"
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "a_shape"
+    type: DT_INT64
   }
   input_arg {
-    name: "updates"
+    name: "b_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "b_values"
     type_attr: "T"
   }
+  input_arg {
+    name: "b_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "thresh"
+    type_attr: "Treal"
+  }
   output_arg {
-    name: "output"
+    name: "sum_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "sum_values"
     type_attr: "T"
   }
+  output_arg {
+    name: "sum_shape"
+    type: DT_INT64
+  }
   attr {
     name: "T"
     type: "type"
@@ -31499,48 +52494,69 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "Tindices"
+    name: "Treal"
     type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
         type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
 }
 op {
-  name: "ScatterNdNonAliasingAdd"
+  name: "SparseAddGrad"
   input_arg {
-    name: "input"
+    name: "backprop_val_grad"
     type_attr: "T"
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "a_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "updates"
+    name: "b_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "sum_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "a_val_grad"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "b_val_grad"
     type_attr: "T"
   }
   attr {
@@ -31562,41 +52578,35 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
-  }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
       }
     }
   }
 }
 op {
-  name: "ScatterNdSub"
+  name: "SparseAddGrad"
   input_arg {
-    name: "ref"
+    name: "backprop_val_grad"
     type_attr: "T"
-    is_ref: true
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "a_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "updates"
+    name: "b_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "sum_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "a_val_grad"
     type_attr: "T"
   }
   output_arg {
-    name: "output_ref"
+    name: "b_val_grad"
     type_attr: "T"
-    is_ref: true
   }
   attr {
     name: "T"
@@ -31617,46 +52627,37 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "ScatterNdSub"
+  name: "SparseAddGrad"
   input_arg {
-    name: "ref"
+    name: "backprop_val_grad"
     type_attr: "T"
-    is_ref: true
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "a_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "updates"
+    name: "b_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "sum_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "a_val_grad"
     type_attr: "T"
   }
   output_arg {
-    name: "output_ref"
+    name: "b_val_grad"
     type_attr: "T"
-    is_ref: true
   }
   attr {
     name: "T"
@@ -31679,86 +52680,102 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "ScatterNdUpdate"
+  name: "SparseAddGrad"
   input_arg {
-    name: "ref"
+    name: "backprop_val_grad"
     type_attr: "T"
-    is_ref: true
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "a_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "updates"
+    name: "b_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "sum_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "a_val_grad"
     type_attr: "T"
   }
   output_arg {
-    name: "output_ref"
+    name: "b_val_grad"
     type_attr: "T"
-    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "Tindices"
-    type: "type"
     allowed_values {
       list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
         type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: true
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
     }
   }
 }
 op {
-  name: "ScatterSub"
+  name: "SparseApplyAdadelta"
   input_arg {
-    name: "ref"
+    name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "updates"
+    name: "accum_update"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
   output_arg {
-    name: "output_ref"
+    name: "out"
     type_attr: "T"
     is_ref: true
   }
@@ -31803,22 +52820,44 @@ op {
   }
 }
 op {
-  name: "ScatterSub"
+  name: "SparseApplyAdadelta"
   input_arg {
-    name: "ref"
+    name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "updates"
+    name: "accum_update"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
   output_arg {
-    name: "output_ref"
+    name: "out"
     type_attr: "T"
     is_ref: true
   }
@@ -31865,211 +52904,46 @@ op {
   }
 }
 op {
-  name: "ScatterUpdate"
+  name: "SparseApplyAdadelta"
   input_arg {
-    name: "ref"
+    name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
-  input_arg {
-    name: "updates"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output_ref"
+    name: "accum"
     type_attr: "T"
     is_ref: true
   }
-  attr {
-    name: "T"
-    type: "type"
-  }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
-}
-op {
-  name: "SdcaFprint"
-  input_arg {
-    name: "input"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "output"
-    type: DT_INT64
-  }
-}
-op {
-  name: "SdcaOptimizer"
-  input_arg {
-    name: "sparse_example_indices"
-    type: DT_INT64
-    number_attr: "num_sparse_features"
-  }
-  input_arg {
-    name: "sparse_feature_indices"
-    type: DT_INT64
-    number_attr: "num_sparse_features"
-  }
-  input_arg {
-    name: "sparse_feature_values"
-    type: DT_FLOAT
-    number_attr: "num_sparse_features_with_values"
-  }
-  input_arg {
-    name: "dense_features"
-    type: DT_FLOAT
-    number_attr: "num_dense_features"
-  }
-  input_arg {
-    name: "example_weights"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "example_labels"
-    type: DT_FLOAT
-  }
-  input_arg {
-    name: "sparse_indices"
-    type: DT_INT64
-    number_attr: "num_sparse_features"
-  }
   input_arg {
-    name: "sparse_weights"
-    type: DT_FLOAT
-    number_attr: "num_sparse_features"
+    name: "accum_update"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "dense_weights"
-    type: DT_FLOAT
-    number_attr: "num_dense_features"
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "example_state_data"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "out_example_state_data"
-    type: DT_FLOAT
-  }
-  output_arg {
-    name: "out_delta_sparse_weights"
-    type: DT_FLOAT
-    number_attr: "num_sparse_features"
-  }
-  output_arg {
-    name: "out_delta_dense_weights"
-    type: DT_FLOAT
-    number_attr: "num_dense_features"
-  }
-  attr {
-    name: "loss_type"
-    type: "string"
-    allowed_values {
-      list {
-        s: "logistic_loss"
-        s: "squared_loss"
-        s: "hinge_loss"
-        s: "smooth_hinge_loss"
-      }
-    }
-  }
-  attr {
-    name: "adaptative"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "num_sparse_features"
-    type: "int"
-    has_minimum: true
-  }
-  attr {
-    name: "num_sparse_features_with_values"
-    type: "int"
-    has_minimum: true
-  }
-  attr {
-    name: "num_dense_features"
-    type: "int"
-    has_minimum: true
-  }
-  attr {
-    name: "l1"
-    type: "float"
-  }
-  attr {
-    name: "l2"
-    type: "float"
-  }
-  attr {
-    name: "num_loss_partitions"
-    type: "int"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "num_inner_iterations"
-    type: "int"
-    has_minimum: true
-    minimum: 1
+    name: "rho"
+    type_attr: "T"
   }
-}
-op {
-  name: "SdcaShrinkL1"
   input_arg {
-    name: "weights"
-    type: DT_FLOAT
-    number_attr: "num_features"
-    is_ref: true
-  }
-  attr {
-    name: "num_features"
-    type: "int"
-    has_minimum: true
-  }
-  attr {
-    name: "l1"
-    type: "float"
-  }
-  attr {
-    name: "l2"
-    type: "float"
+    name: "epsilon"
+    type_attr: "T"
   }
-}
-op {
-  name: "SegmentMax"
   input_arg {
-    name: "data"
+    name: "grad"
     type_attr: "T"
   }
   input_arg {
-    name: "segment_ids"
+    name: "indices"
     type_attr: "Tindices"
   }
   output_arg {
-    name: "output"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -32078,13 +52952,21 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
+        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -32098,106 +52980,55 @@ op {
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "SegmentMax"
+  name: "SparseApplyAdadelta"
   input_arg {
-    name: "data"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "segment_ids"
-    type_attr: "Tindices"
-  }
-  output_arg {
-    name: "output"
+    name: "accum"
     type_attr: "T"
+    is_ref: true
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
-  }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-}
-op {
-  name: "SegmentMean"
   input_arg {
-    name: "data"
+    name: "accum_update"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "segment_ids"
-    type_attr: "Tindices"
-  }
-  output_arg {
-    name: "output"
+    name: "lr"
     type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-      }
-    }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
   }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
   }
-}
-op {
-  name: "SegmentMean"
   input_arg {
-    name: "data"
+    name: "grad"
     type_attr: "T"
   }
   input_arg {
-    name: "segment_ids"
+    name: "indices"
     type_attr: "Tindices"
   }
   output_arg {
-    name: "output"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -32207,11 +53038,17 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
         type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -32228,62 +53065,42 @@ op {
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "SegmentMin"
+  name: "SparseApplyAdagrad"
   input_arg {
-    name: "data"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "segment_ids"
-    type_attr: "Tindices"
-  }
-  output_arg {
-    name: "output"
+    name: "accum"
     type_attr: "T"
+    is_ref: true
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-      }
-    }
-  }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-}
-op {
-  name: "SegmentMin"
   input_arg {
-    name: "data"
+    name: "grad"
     type_attr: "T"
   }
   input_arg {
-    name: "segment_ids"
+    name: "indices"
     type_attr: "Tindices"
   }
   output_arg {
-    name: "output"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -32292,15 +53109,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
+        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
@@ -32314,20 +53134,42 @@ op {
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "SegmentProd"
+  name: "SparseApplyAdagrad"
   input_arg {
-    name: "data"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "segment_ids"
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
     type_attr: "Tindices"
   }
   output_arg {
-    name: "output"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -32348,6 +53190,8 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -32361,20 +53205,42 @@ op {
       }
     }
   }
-}
-op {
-  name: "SegmentProd"
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
+op {
+  name: "SparseApplyAdagrad"
+  input_arg {
+    name: "var"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
   input_arg {
-    name: "data"
+    name: "grad"
     type_attr: "T"
   }
   input_arg {
-    name: "segment_ids"
+    name: "indices"
     type_attr: "Tindices"
   }
   output_arg {
-    name: "output"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -32397,6 +53263,7 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -32410,20 +53277,42 @@ op {
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "SegmentSum"
+  name: "SparseApplyAdagrad"
   input_arg {
-    name: "data"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "segment_ids"
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
     type_attr: "Tindices"
   }
   output_arg {
-    name: "output"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -32432,18 +53321,21 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -32457,20 +53349,59 @@ op {
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "SegmentSum"
+  name: "SparseApplyAdagradDA"
   input_arg {
-    name: "data"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "segment_ids"
+    name: "gradient_accumulator"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "gradient_squared_accumulator"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
     type_attr: "Tindices"
   }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "global_step"
+    type: DT_INT64
+  }
   output_arg {
-    name: "output"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -32491,8 +53422,6 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
@@ -32506,302 +53435,176 @@ op {
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "Select"
-  input_arg {
-    name: "condition"
-    type: DT_BOOL
-  }
+  name: "SparseApplyAdagradDA"
   input_arg {
-    name: "t"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "e"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
+    name: "gradient_accumulator"
     type_attr: "T"
+    is_ref: true
   }
-  attr {
-    name: "T"
-    type: "type"
-  }
-}
-op {
-  name: "SelfAdjointEig"
   input_arg {
-    name: "input"
+    name: "gradient_squared_accumulator"
     type_attr: "T"
+    is_ref: true
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_DOUBLE
-        type: DT_FLOAT
-      }
-    }
-  }
-  deprecation {
-    version: 11
-  }
-}
-op {
-  name: "SelfAdjointEigV2"
   input_arg {
-    name: "input"
-    type_attr: "T"
+    name: "indices"
+    type_attr: "Tindices"
   }
-  output_arg {
-    name: "e"
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
-  output_arg {
-    name: "v"
+  input_arg {
+    name: "l1"
     type_attr: "T"
   }
-  attr {
-    name: "compute_v"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_DOUBLE
-        type: DT_FLOAT
-      }
-    }
-  }
-}
-op {
-  name: "SelfAdjointEigV2"
   input_arg {
-    name: "input"
+    name: "l2"
     type_attr: "T"
   }
-  output_arg {
-    name: "e"
-    type_attr: "T"
+  input_arg {
+    name: "global_step"
+    type: DT_INT64
   }
   output_arg {
-    name: "v"
+    name: "out"
     type_attr: "T"
-  }
-  attr {
-    name: "compute_v"
-    type: "bool"
-    default_value {
-      b: true
-    }
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_DOUBLE
         type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-}
-op {
-  name: "Selu"
-  input_arg {
-    name: "features"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "activations"
-    type_attr: "T"
-  }
   attr {
-    name: "T"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
-}
-op {
-  name: "SeluGrad"
-  input_arg {
-    name: "gradients"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "outputs"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "backprops"
-    type_attr: "T"
-  }
   attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
 }
 op {
-  name: "SerializeIterator"
-  input_arg {
-    name: "resource_handle"
-    type: DT_RESOURCE
-  }
-  output_arg {
-    name: "serialized"
-    type: DT_VARIANT
-  }
-  is_stateful: true
-}
-op {
-  name: "SerializeManySparse"
-  input_arg {
-    name: "sparse_indices"
-    type: DT_INT64
-  }
+  name: "SparseApplyAdagradDA"
   input_arg {
-    name: "sparse_values"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "sparse_shape"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "serialized_sparse"
-    type: DT_STRING
-  }
-  attr {
-    name: "T"
-    type: "type"
+    name: "gradient_accumulator"
+    type_attr: "T"
+    is_ref: true
   }
-}
-op {
-  name: "SerializeSparse"
   input_arg {
-    name: "sparse_indices"
-    type: DT_INT64
+    name: "gradient_squared_accumulator"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "sparse_values"
+    name: "grad"
     type_attr: "T"
   }
   input_arg {
-    name: "sparse_shape"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "serialized_sparse"
-    type: DT_STRING
-  }
-  attr {
-    name: "T"
-    type: "type"
+    name: "indices"
+    type_attr: "Tindices"
   }
-}
-op {
-  name: "SerializeTensor"
   input_arg {
-    name: "tensor"
+    name: "lr"
     type_attr: "T"
   }
-  output_arg {
-    name: "serialized"
-    type: DT_STRING
-  }
-  attr {
-    name: "T"
-    type: "type"
-  }
-}
-op {
-  name: "SetSize"
   input_arg {
-    name: "set_indices"
-    type: DT_INT64
+    name: "l1"
+    type_attr: "T"
   }
   input_arg {
-    name: "set_values"
+    name: "l2"
     type_attr: "T"
   }
   input_arg {
-    name: "set_shape"
+    name: "global_step"
     type: DT_INT64
   }
   output_arg {
-    name: "size"
-    type: DT_INT32
-  }
-  attr {
-    name: "validate_indices"
-    type: "bool"
-    default_value {
-      b: true
-    }
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT8
-        type: DT_INT16
-        type: DT_INT32
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
         type: DT_UINT16
-        type: DT_STRING
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
-}
-op {
-  name: "Shape"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "out_type"
-  }
-  attr {
-    name: "T"
-    type: "type"
-  }
   attr {
-    name: "out_type"
+    name: "Tindices"
     type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
         type: DT_INT32
@@ -32809,364 +53612,366 @@ op {
       }
     }
   }
-}
-op {
-  name: "ShapeN"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-    number_attr: "N"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "out_type"
-    number_attr: "N"
-  }
-  attr {
-    name: "N"
-    type: "int"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "T"
-    type: "type"
-  }
   attr {
-    name: "out_type"
-    type: "type"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      type: DT_INT32
-    }
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
+      b: false
     }
   }
 }
 op {
-  name: "ShardedFilename"
-  input_arg {
-    name: "basename"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "shard"
-    type: DT_INT32
-  }
-  input_arg {
-    name: "num_shards"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "filename"
-    type: DT_STRING
-  }
-}
-op {
-  name: "ShardedFilespec"
-  input_arg {
-    name: "basename"
-    type: DT_STRING
-  }
+  name: "SparseApplyAdagradDA"
   input_arg {
-    name: "num_shards"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "filename"
-    type: DT_STRING
+    name: "var"
+    type_attr: "T"
+    is_ref: true
   }
-}
-op {
-  name: "ShuffleDataset"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "gradient_accumulator"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "buffer_size"
-    type: DT_INT64
+    name: "gradient_squared_accumulator"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "seed"
-    type: DT_INT64
+    name: "grad"
+    type_attr: "T"
   }
   input_arg {
-    name: "seed2"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "indices"
+    type_attr: "Tindices"
   }
-  is_stateful: true
-}
-op {
-  name: "ShuffleDataset"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "buffer_size"
-    type: DT_INT64
+    name: "l1"
+    type_attr: "T"
   }
   input_arg {
-    name: "seed"
-    type: DT_INT64
+    name: "l2"
+    type_attr: "T"
   }
   input_arg {
-    name: "seed2"
+    name: "global_step"
     type: DT_INT64
   }
   output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
 }
 op {
-  name: "ShuffleDataset"
+  name: "SparseApplyCenteredRMSProp"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "var"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "buffer_size"
-    type: DT_INT64
+    name: "mg"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "seed"
-    type: DT_INT64
+    name: "ms"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "seed2"
-    type: DT_INT64
+    name: "mom"
+    type_attr: "T"
+    is_ref: true
   }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  attr {
-    name: "reshuffle_each_iteration"
-    type: "bool"
-    default_value {
-      b: true
-    }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
   }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
   }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
   }
-}
-op {
-  name: "Sigmoid"
   input_arg {
-    name: "x"
+    name: "grad"
     type_attr: "T"
   }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
   output_arg {
-    name: "y"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "SigmoidGrad"
+  name: "SparseApplyCenteredRMSProp"
   input_arg {
-    name: "x"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "y"
+    name: "mg"
     type_attr: "T"
+    is_ref: true
   }
-  output_arg {
-    name: "z"
+  input_arg {
+    name: "ms"
     type_attr: "T"
+    is_ref: true
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-      }
-    }
+  input_arg {
+    name: "mom"
+    type_attr: "T"
+    is_ref: true
   }
-}
-op {
-  name: "SigmoidGrad"
   input_arg {
-    name: "y"
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "dy"
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
   output_arg {
-    name: "z"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-}
-op {
-  name: "Sign"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "y"
-    type_attr: "T"
-  }
   attr {
-    name: "T"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "Sin"
+  name: "SparseApplyCenteredRMSProp"
   input_arg {
-    name: "x"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
-  output_arg {
-    name: "y"
+  input_arg {
+    name: "mg"
     type_attr: "T"
+    is_ref: true
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-      }
-    }
+  input_arg {
+    name: "ms"
+    type_attr: "T"
+    is_ref: true
   }
-}
-op {
-  name: "Sinh"
   input_arg {
-    name: "x"
+    name: "mom"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "rho"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
   output_arg {
-    name: "y"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
-}
-op {
-  name: "Size"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "out_type"
-  }
-  attr {
-    name: "T"
-    type: "type"
-  }
   attr {
-    name: "out_type"
+    name: "Tindices"
     type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
         type: DT_INT32
@@ -33174,150 +53979,178 @@ op {
       }
     }
   }
-}
-op {
-  name: "SkipDataset"
-  input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "count"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
   attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
-  is_stateful: true
 }
 op {
-  name: "SkipDataset"
+  name: "SparseApplyCenteredRMSProp"
   input_arg {
-    name: "input_dataset"
-    type: DT_VARIANT
+    name: "var"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "count"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
+    name: "mg"
+    type_attr: "T"
+    is_ref: true
   }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "ms"
+    type_attr: "T"
+    is_ref: true
   }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
+  input_arg {
+    name: "mom"
+    type_attr: "T"
+    is_ref: true
   }
-}
-op {
-  name: "Skipgram"
-  output_arg {
-    name: "vocab_word"
-    type: DT_STRING
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  output_arg {
-    name: "vocab_freq"
-    type: DT_INT32
+  input_arg {
+    name: "rho"
+    type_attr: "T"
   }
-  output_arg {
-    name: "words_per_epoch"
-    type: DT_INT64
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
   }
-  output_arg {
-    name: "current_epoch"
-    type: DT_INT32
+  input_arg {
+    name: "epsilon"
+    type_attr: "T"
   }
-  output_arg {
-    name: "total_words_processed"
-    type: DT_INT64
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
-  output_arg {
-    name: "examples"
-    type: DT_INT32
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
   output_arg {
-    name: "labels"
-    type: DT_INT32
-  }
-  attr {
-    name: "filename"
-    type: "string"
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
-    name: "batch_size"
-    type: "int"
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
   attr {
-    name: "window_size"
-    type: "int"
-    default_value {
-      i: 5
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
     }
   }
   attr {
-    name: "min_count"
-    type: "int"
+    name: "use_locking"
+    type: "bool"
     default_value {
-      i: 5
+      b: false
     }
   }
-  attr {
-    name: "subsample"
-    type: "float"
-    default_value {
-      f: 0.001
-    }
+}
+op {
+  name: "SparseApplyFtrl"
+  input_arg {
+    name: "var"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "linear"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
   }
-  deprecation {
-    version: 19
+  input_arg {
+    name: "lr"
+    type_attr: "T"
   }
-  is_stateful: true
-}
-op {
-  name: "Slice"
   input_arg {
-    name: "input"
+    name: "l1"
     type_attr: "T"
   }
   input_arg {
-    name: "begin"
-    type_attr: "Index"
+    name: "l2"
+    type_attr: "T"
   }
   input_arg {
-    name: "size"
-    type_attr: "Index"
+    name: "lr_power"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+      }
+    }
   }
   attr {
-    name: "Index"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
@@ -33326,68 +54159,59 @@ op {
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "Softmax"
+  name: "SparseApplyFtrl"
   input_arg {
-    name: "logits"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
-  output_arg {
-    name: "softmax"
+  input_arg {
+    name: "accum"
     type_attr: "T"
+    is_ref: true
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
-  }
-}
-op {
-  name: "SoftmaxCrossEntropyWithLogits"
   input_arg {
-    name: "features"
+    name: "linear"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "labels"
+    name: "grad"
     type_attr: "T"
   }
-  output_arg {
-    name: "loss"
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
-  output_arg {
-    name: "backprop"
+  input_arg {
+    name: "l1"
     type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
   }
-}
-op {
-  name: "Softplus"
   input_arg {
-    name: "features"
+    name: "lr_power"
     type_attr: "T"
   }
   output_arg {
-    name: "activations"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -33396,92 +54220,86 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
+        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
-}
-op {
-  name: "Softplus"
-  input_arg {
-    name: "features"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "activations"
-    type_attr: "T"
-  }
   attr {
-    name: "T"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "SoftplusGrad"
+  name: "SparseApplyFtrl"
   input_arg {
-    name: "gradients"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "features"
+    name: "accum"
     type_attr: "T"
+    is_ref: true
   }
-  output_arg {
-    name: "backprops"
+  input_arg {
+    name: "linear"
     type_attr: "T"
+    is_ref: true
   }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_INT32
-        type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-      }
-    }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
-}
-op {
-  name: "SoftplusGrad"
   input_arg {
-    name: "gradients"
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "features"
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
     type_attr: "T"
   }
   output_arg {
-    name: "backprops"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -33490,56 +54308,87 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
+        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
-}
-op {
-  name: "Softsign"
-  input_arg {
-    name: "features"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "activations"
-    type_attr: "T"
-  }
   attr {
-    name: "T"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "Softsign"
+  name: "SparseApplyFtrl"
   input_arg {
-    name: "features"
+    name: "var"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "linear"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
     type_attr: "T"
   }
   output_arg {
-    name: "activations"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -33549,31 +54398,90 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
         type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "SoftsignGrad"
+  name: "SparseApplyFtrlV2"
   input_arg {
-    name: "gradients"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "features"
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "linear"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2_shrinkage"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
     type_attr: "T"
   }
   output_arg {
-    name: "backprops"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -33582,30 +54490,88 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
+        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
         type: DT_HALF
       }
     }
   }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "SoftsignGrad"
+  name: "SparseApplyFtrlV2"
   input_arg {
-    name: "gradients"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "features"
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "linear"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2_shrinkage"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
     type_attr: "T"
   }
   output_arg {
-    name: "backprops"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -33614,43 +54580,26 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
+        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
-}
-op {
-  name: "SpaceToBatch"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "paddings"
-    type_attr: "Tpaddings"
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  attr {
-    name: "T"
-    type: "type"
-  }
   attr {
-    name: "Tpaddings"
+    name: "Tindices"
     type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
         type: DT_INT32
@@ -33659,53 +54608,91 @@ op {
     }
   }
   attr {
-    name: "block_size"
-    type: "int"
-    has_minimum: true
-    minimum: 2
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
 }
 op {
-  name: "SpaceToBatchND"
+  name: "SparseApplyFtrlV2"
   input_arg {
-    name: "input"
+    name: "var"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "accum"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "block_shape"
-    type_attr: "Tblock_shape"
+    name: "linear"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2_shrinkage"
+    type_attr: "T"
   }
   input_arg {
-    name: "paddings"
-    type_attr: "Tpaddings"
+    name: "lr_power"
+    type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "Tblock_shape"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
-        type: DT_INT32
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "Tpaddings"
+    name: "Tindices"
     type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
         type: DT_INT32
@@ -33713,88 +54700,142 @@ op {
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "SpaceToDepth"
+  name: "SparseApplyFtrlV2"
   input_arg {
-    name: "input"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
-  output_arg {
-    name: "output"
+  input_arg {
+    name: "accum"
     type_attr: "T"
+    is_ref: true
   }
-  attr {
-    name: "T"
-    type: "type"
+  input_arg {
+    name: "linear"
+    type_attr: "T"
+    is_ref: true
   }
-  attr {
-    name: "block_size"
-    type: "int"
-    has_minimum: true
-    minimum: 2
+  input_arg {
+    name: "grad"
+    type_attr: "T"
   }
-}
-op {
-  name: "SpaceToDepth"
   input_arg {
-    name: "input"
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2_shrinkage"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "lr_power"
     type_attr: "T"
   }
   output_arg {
-    name: "output"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
   }
   attr {
-    name: "block_size"
-    type: "int"
-    has_minimum: true
-    minimum: 2
-  }
-  attr {
-    name: "data_format"
-    type: "string"
-    default_value {
-      s: "NHWC"
-    }
+    name: "Tindices"
+    type: "type"
     allowed_values {
       list {
-        s: "NHWC"
-        s: "NCHW"
-        s: "NCHW_VECT_C"
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "SparseAccumulatorApplyGradient"
+  name: "SparseApplyMomentum"
   input_arg {
-    name: "handle"
-    type: DT_STRING
+    name: "var"
+    type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "local_step"
-    type: DT_INT64
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "gradient_indices"
-    type: DT_INT64
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "gradient_values"
-    type_attr: "dtype"
+    name: "grad"
+    type_attr: "T"
   }
   input_arg {
-    name: "gradient_shape"
-    type: DT_INT64
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
     allowed_values {
       list {
@@ -33816,35 +54857,65 @@ op {
     }
   }
   attr {
-    name: "has_known_shape"
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "use_nesterov"
     type: "bool"
+    default_value {
+      b: false
+    }
   }
 }
 op {
-  name: "SparseAccumulatorApplyGradient"
+  name: "SparseApplyMomentum"
   input_arg {
-    name: "handle"
-    type: DT_STRING
+    name: "var"
+    type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "local_step"
-    type: DT_INT64
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "gradient_indices"
-    type: DT_INT64
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "gradient_values"
-    type_attr: "dtype"
+    name: "grad"
+    type_attr: "T"
   }
   input_arg {
-    name: "gradient_shape"
-    type: DT_INT64
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
     allowed_values {
       list {
@@ -33868,35 +54939,65 @@ op {
     }
   }
   attr {
-    name: "has_known_shape"
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "use_nesterov"
     type: "bool"
+    default_value {
+      b: false
+    }
   }
 }
 op {
-  name: "SparseAccumulatorTakeGradient"
+  name: "SparseApplyMomentum"
   input_arg {
-    name: "handle"
-    type: DT_STRING
+    name: "var"
+    type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "num_required"
-    type: DT_INT32
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
   }
-  output_arg {
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
     name: "indices"
-    type: DT_INT64
+    type_attr: "Tindices"
   }
-  output_arg {
-    name: "values"
-    type_attr: "dtype"
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
   }
   output_arg {
-    name: "shape"
-    type: DT_INT64
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
     allowed_values {
       list {
@@ -33914,99 +55015,156 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "use_nesterov"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "SparseAccumulatorTakeGradient"
+  name: "SparseApplyMomentum"
   input_arg {
-    name: "handle"
-    type: DT_STRING
+    name: "var"
+    type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "num_required"
-    type: DT_INT32
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
   }
-  output_arg {
+  input_arg {
+    name: "lr"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
     name: "indices"
-    type: DT_INT64
+    type_attr: "Tindices"
   }
-  output_arg {
-    name: "values"
-    type_attr: "dtype"
+  input_arg {
+    name: "momentum"
+    type_attr: "T"
   }
   output_arg {
-    name: "shape"
-    type: DT_INT64
+    name: "out"
+    type_attr: "T"
+    is_ref: true
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
     allowed_values {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "use_nesterov"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "SparseAdd"
+  name: "SparseApplyProximalAdagrad"
   input_arg {
-    name: "a_indices"
-    type: DT_INT64
+    name: "var"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "a_values"
+    name: "accum"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "a_shape"
-    type: DT_INT64
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "b_indices"
-    type: DT_INT64
+    name: "l1"
+    type_attr: "T"
   }
   input_arg {
-    name: "b_values"
+    name: "l2"
     type_attr: "T"
   }
   input_arg {
-    name: "b_shape"
-    type: DT_INT64
+    name: "grad"
+    type_attr: "T"
   }
   input_arg {
-    name: "thresh"
-    type_attr: "Treal"
-  }
-  output_arg {
-    name: "sum_indices"
-    type: DT_INT64
+    name: "indices"
+    type_attr: "Tindices"
   }
   output_arg {
-    name: "sum_values"
+    name: "out"
     type_attr: "T"
-  }
-  output_arg {
-    name: "sum_shape"
-    type: DT_INT64
+    is_ref: true
   }
   attr {
     name: "T"
@@ -34031,64 +55189,59 @@ op {
     }
   }
   attr {
-    name: "Treal"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "SparseAdd"
+  name: "SparseApplyProximalAdagrad"
   input_arg {
-    name: "a_indices"
-    type: DT_INT64
+    name: "var"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "a_values"
+    name: "accum"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "a_shape"
-    type: DT_INT64
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "b_indices"
-    type: DT_INT64
+    name: "l1"
+    type_attr: "T"
   }
   input_arg {
-    name: "b_values"
+    name: "l2"
     type_attr: "T"
   }
   input_arg {
-    name: "b_shape"
-    type: DT_INT64
+    name: "grad"
+    type_attr: "T"
   }
   input_arg {
-    name: "thresh"
-    type_attr: "Treal"
-  }
-  output_arg {
-    name: "sum_indices"
-    type: DT_INT64
+    name: "indices"
+    type_attr: "Tindices"
   }
   output_arg {
-    name: "sum_values"
+    name: "out"
     type_attr: "T"
-  }
-  output_arg {
-    name: "sum_shape"
-    type: DT_INT64
+    is_ref: true
   }
   attr {
     name: "T"
@@ -34115,50 +55268,59 @@ op {
     }
   }
   attr {
-    name: "Treal"
+    name: "Tindices"
     type: "type"
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT32
         type: DT_INT64
-        type: DT_UINT8
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "SparseAddGrad"
+  name: "SparseApplyProximalAdagrad"
   input_arg {
-    name: "backprop_val_grad"
+    name: "var"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
+  }
+  input_arg {
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "a_indices"
-    type: DT_INT64
+    name: "l1"
+    type_attr: "T"
   }
   input_arg {
-    name: "b_indices"
-    type: DT_INT64
+    name: "l2"
+    type_attr: "T"
   }
   input_arg {
-    name: "sum_indices"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "a_val_grad"
+    name: "grad"
     type_attr: "T"
   }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
   output_arg {
-    name: "b_val_grad"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -34179,35 +55341,66 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
       }
     }
   }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "SparseAddGrad"
+  name: "SparseApplyProximalAdagrad"
   input_arg {
-    name: "backprop_val_grad"
+    name: "var"
     type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "a_indices"
-    type: DT_INT64
+    name: "accum"
+    type_attr: "T"
+    is_ref: true
   }
   input_arg {
-    name: "b_indices"
-    type: DT_INT64
+    name: "lr"
+    type_attr: "T"
   }
   input_arg {
-    name: "sum_indices"
-    type: DT_INT64
+    name: "l1"
+    type_attr: "T"
   }
-  output_arg {
-    name: "a_val_grad"
+  input_arg {
+    name: "l2"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "grad"
     type_attr: "T"
   }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
   output_arg {
-    name: "b_val_grad"
+    name: "out"
     type_attr: "T"
+    is_ref: true
   }
   attr {
     name: "T"
@@ -34216,51 +55409,59 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
-  name: "SparseApplyAdadelta"
+  name: "SparseApplyProximalGradientDescent"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "accum"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "accum_update"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "lr"
+    name: "alpha"
     type_attr: "T"
   }
   input_arg {
-    name: "rho"
+    name: "l1"
     type_attr: "T"
   }
   input_arg {
-    name: "epsilon"
+    name: "l2"
     type_attr: "T"
   }
   input_arg {
@@ -34317,32 +55518,22 @@ op {
   }
 }
 op {
-  name: "SparseApplyAdadelta"
+  name: "SparseApplyProximalGradientDescent"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "accum"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "accum_update"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "lr"
+    name: "alpha"
     type_attr: "T"
   }
   input_arg {
-    name: "rho"
+    name: "l1"
     type_attr: "T"
   }
   input_arg {
-    name: "epsilon"
+    name: "l2"
     type_attr: "T"
   }
   input_arg {
@@ -34401,19 +55592,22 @@ op {
   }
 }
 op {
-  name: "SparseApplyAdagrad"
+  name: "SparseApplyProximalGradientDescent"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "accum"
+    name: "alpha"
     type_attr: "T"
-    is_ref: true
   }
   input_arg {
-    name: "lr"
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
     type_attr: "T"
   }
   input_arg {
@@ -34448,6 +55642,9 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -34470,19 +55667,22 @@ op {
   }
 }
 op {
-  name: "SparseApplyAdagrad"
+  name: "SparseApplyProximalGradientDescent"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "accum"
+    name: "alpha"
     type_attr: "T"
-    is_ref: true
   }
   input_arg {
-    name: "lr"
+    name: "l1"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "l2"
     type_attr: "T"
   }
   input_arg {
@@ -34505,17 +55705,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -34541,45 +55742,45 @@ op {
   }
 }
 op {
-  name: "SparseApplyAdagradDA"
+  name: "SparseApplyRMSProp"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "gradient_accumulator"
+    name: "ms"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "gradient_squared_accumulator"
+    name: "mom"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "grad"
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "rho"
+    type_attr: "T"
   }
   input_arg {
-    name: "lr"
+    name: "momentum"
     type_attr: "T"
   }
   input_arg {
-    name: "l1"
+    name: "epsilon"
     type_attr: "T"
   }
   input_arg {
-    name: "l2"
+    name: "grad"
     type_attr: "T"
   }
   input_arg {
-    name: "global_step"
-    type: DT_INT64
+    name: "indices"
+    type_attr: "Tindices"
   }
   output_arg {
     name: "out"
@@ -34627,45 +55828,45 @@ op {
   }
 }
 op {
-  name: "SparseApplyAdagradDA"
+  name: "SparseApplyRMSProp"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "gradient_accumulator"
+    name: "ms"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "gradient_squared_accumulator"
+    name: "mom"
     type_attr: "T"
     is_ref: true
   }
   input_arg {
-    name: "grad"
+    name: "lr"
     type_attr: "T"
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "rho"
+    type_attr: "T"
   }
   input_arg {
-    name: "lr"
+    name: "momentum"
     type_attr: "T"
   }
   input_arg {
-    name: "l1"
+    name: "epsilon"
     type_attr: "T"
   }
   input_arg {
-    name: "l2"
+    name: "grad"
     type_attr: "T"
   }
   input_arg {
-    name: "global_step"
-    type: DT_INT64
+    name: "indices"
+    type_attr: "Tindices"
   }
   output_arg {
     name: "out"
@@ -34715,17 +55916,12 @@ op {
   }
 }
 op {
-  name: "SparseApplyCenteredRMSProp"
+  name: "SparseApplyRMSProp"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
-  input_arg {
-    name: "mg"
-    type_attr: "T"
-    is_ref: true
-  }
   input_arg {
     name: "ms"
     type_attr: "T"
@@ -34784,6 +55980,9 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -34806,17 +56005,12 @@ op {
   }
 }
 op {
-  name: "SparseApplyCenteredRMSProp"
+  name: "SparseApplyRMSProp"
   input_arg {
     name: "var"
     type_attr: "T"
     is_ref: true
   }
-  input_arg {
-    name: "mg"
-    type_attr: "T"
-    is_ref: true
-  }
   input_arg {
     name: "ms"
     type_attr: "T"
@@ -34863,17 +56057,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -34899,53 +56094,58 @@ op {
   }
 }
 op {
-  name: "SparseApplyFtrl"
-  input_arg {
-    name: "var"
-    type_attr: "T"
-    is_ref: true
-  }
+  name: "SparseConcat"
   input_arg {
-    name: "accum"
-    type_attr: "T"
-    is_ref: true
+    name: "indices"
+    type: DT_INT64
+    number_attr: "N"
   }
   input_arg {
-    name: "linear"
+    name: "values"
     type_attr: "T"
-    is_ref: true
+    number_attr: "N"
   }
   input_arg {
-    name: "grad"
-    type_attr: "T"
+    name: "shapes"
+    type: DT_INT64
+    number_attr: "N"
   }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+  output_arg {
+    name: "output_indices"
+    type: DT_INT64
   }
-  input_arg {
-    name: "lr"
+  output_arg {
+    name: "output_values"
     type_attr: "T"
   }
-  input_arg {
-    name: "l1"
-    type_attr: "T"
+  output_arg {
+    name: "output_shape"
+    type: DT_INT64
   }
-  input_arg {
-    name: "l2"
-    type_attr: "T"
+  attr {
+    name: "concat_dim"
+    type: "int"
   }
-  input_arg {
-    name: "lr_power"
-    type_attr: "T"
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+    minimum: 2
+  }
+  attr {
+    name: "T"
+    type: "type"
   }
+}
+op {
+  name: "SparseConditionalAccumulator"
   output_arg {
-    name: "out"
-    type_attr: "T"
+    name: "handle"
+    type: DT_STRING
     is_ref: true
   }
   attr {
-    name: "T"
+    name: "dtype"
     type: "type"
     allowed_values {
       list {
@@ -34967,160 +56167,294 @@ op {
     }
   }
   attr {
-    name: "Tindices"
+    name: "shape"
+    type: "shape"
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "SparseConditionalAccumulator"
+  output_arg {
+    name: "handle"
+    type: DT_STRING
+    is_ref: true
+  }
+  attr {
+    name: "dtype"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT32
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
   attr {
-    name: "use_locking"
-    type: "bool"
+    name: "shape"
+    type: "shape"
+  }
+  attr {
+    name: "container"
+    type: "string"
     default_value {
-      b: false
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
     }
   }
+  is_stateful: true
 }
 op {
-  name: "SparseApplyFtrl"
-  input_arg {
-    name: "var"
-    type_attr: "T"
+  name: "SparseConditionalAccumulator"
+  output_arg {
+    name: "handle"
+    type: DT_STRING
     is_ref: true
   }
-  input_arg {
-    name: "accum"
-    type_attr: "T"
-    is_ref: true
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
   }
-  input_arg {
-    name: "linear"
-    type_attr: "T"
+  attr {
+    name: "shape"
+    type: "shape"
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "SparseConditionalAccumulator"
+  output_arg {
+    name: "handle"
+    type: DT_STRING
     is_ref: true
   }
-  input_arg {
-    name: "grad"
-    type_attr: "T"
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "shape"
+    type: "shape"
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
   }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
+op {
+  name: "SparseCross"
   input_arg {
     name: "indices"
-    type_attr: "Tindices"
+    type: DT_INT64
+    number_attr: "N"
   }
   input_arg {
-    name: "lr"
-    type_attr: "T"
+    name: "values"
+    type_list_attr: "sparse_types"
   }
   input_arg {
-    name: "l1"
-    type_attr: "T"
+    name: "shapes"
+    type: DT_INT64
+    number_attr: "N"
   }
   input_arg {
-    name: "l2"
-    type_attr: "T"
+    name: "dense_inputs"
+    type_list_attr: "dense_types"
+  }
+  output_arg {
+    name: "output_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_values"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "output_shape"
+    type: DT_INT64
+  }
+  attr {
+    name: "N"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "hashed_output"
+    type: "bool"
   }
-  input_arg {
-    name: "lr_power"
-    type_attr: "T"
+  attr {
+    name: "num_buckets"
+    type: "int"
+    has_minimum: true
   }
-  output_arg {
-    name: "out"
-    type_attr: "T"
-    is_ref: true
+  attr {
+    name: "hash_key"
+    type: "int"
   }
   attr {
-    name: "T"
-    type: "type"
+    name: "sparse_types"
+    type: "list(type)"
+    has_minimum: true
     allowed_values {
       list {
-        type: DT_FLOAT
-        type: DT_DOUBLE
         type: DT_INT64
-        type: DT_INT32
-        type: DT_UINT8
-        type: DT_UINT16
-        type: DT_INT16
-        type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
-        type: DT_HALF
-        type: DT_UINT32
-        type: DT_UINT64
+        type: DT_STRING
       }
     }
   }
   attr {
-    name: "Tindices"
+    name: "dense_types"
+    type: "list(type)"
+    has_minimum: true
+    allowed_values {
+      list {
+        type: DT_INT64
+        type: DT_STRING
+      }
+    }
+  }
+  attr {
+    name: "out_type"
     type: "type"
     allowed_values {
       list {
-        type: DT_INT32
         type: DT_INT64
+        type: DT_STRING
       }
     }
   }
   attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
+    name: "internal_type"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT64
+        type: DT_STRING
+      }
     }
   }
 }
 op {
-  name: "SparseApplyFtrlV2"
-  input_arg {
-    name: "var"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "accum"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "linear"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "grad"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
+  name: "SparseDenseCwiseAdd"
   input_arg {
-    name: "l1"
-    type_attr: "T"
+    name: "sp_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "l2"
+    name: "sp_values"
     type_attr: "T"
   }
   input_arg {
-    name: "l2_shrinkage"
-    type_attr: "T"
+    name: "sp_shape"
+    type: DT_INT64
   }
   input_arg {
-    name: "lr_power"
+    name: "dense"
     type_attr: "T"
   }
   output_arg {
-    name: "out"
+    name: "output"
     type_attr: "T"
-    is_ref: true
   }
   attr {
     name: "T"
@@ -35144,73 +56478,28 @@ op {
       }
     }
   }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "SparseApplyFtrlV2"
-  input_arg {
-    name: "var"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "accum"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "linear"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "grad"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
+  name: "SparseDenseCwiseAdd"
   input_arg {
-    name: "l1"
-    type_attr: "T"
+    name: "sp_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "l2"
+    name: "sp_values"
     type_attr: "T"
   }
   input_arg {
-    name: "l2_shrinkage"
-    type_attr: "T"
+    name: "sp_shape"
+    type: DT_INT64
   }
   input_arg {
-    name: "lr_power"
+    name: "dense"
     type_attr: "T"
   }
   output_arg {
-    name: "out"
+    name: "output"
     type_attr: "T"
-    is_ref: true
   }
   attr {
     name: "T"
@@ -35236,56 +56525,28 @@ op {
       }
     }
   }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "SparseApplyMomentum"
-  input_arg {
-    name: "var"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "accum"
-    type_attr: "T"
-    is_ref: true
-  }
+  name: "SparseDenseCwiseAdd"
   input_arg {
-    name: "lr"
-    type_attr: "T"
+    name: "sp_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "grad"
+    name: "sp_values"
     type_attr: "T"
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "sp_shape"
+    type: DT_INT64
   }
   input_arg {
-    name: "momentum"
+    name: "dense"
     type_attr: "T"
   }
   output_arg {
-    name: "out"
+    name: "output"
     type_attr: "T"
-    is_ref: true
   }
   attr {
     name: "T"
@@ -35306,66 +56567,34 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "use_nesterov"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "SparseApplyMomentum"
-  input_arg {
-    name: "var"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "accum"
-    type_attr: "T"
-    is_ref: true
-  }
+  name: "SparseDenseCwiseAdd"
   input_arg {
-    name: "lr"
-    type_attr: "T"
+    name: "sp_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "grad"
+    name: "sp_values"
     type_attr: "T"
   }
   input_arg {
-    name: "indices"
-    type_attr: "Tindices"
+    name: "sp_shape"
+    type: DT_INT64
   }
   input_arg {
-    name: "momentum"
+    name: "dense"
     type_attr: "T"
   }
   output_arg {
-    name: "out"
+    name: "output"
     type_attr: "T"
-    is_ref: true
   }
   attr {
     name: "T"
@@ -35374,84 +56603,46 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-  attr {
-    name: "use_nesterov"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "SparseApplyProximalAdagrad"
-  input_arg {
-    name: "var"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "accum"
-    type_attr: "T"
-    is_ref: true
-  }
+  name: "SparseDenseCwiseDiv"
   input_arg {
-    name: "lr"
-    type_attr: "T"
+    name: "sp_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "l1"
+    name: "sp_values"
     type_attr: "T"
   }
   input_arg {
-    name: "l2"
-    type_attr: "T"
+    name: "sp_shape"
+    type: DT_INT64
   }
   input_arg {
-    name: "grad"
+    name: "dense"
     type_attr: "T"
   }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
   output_arg {
-    name: "out"
+    name: "output"
     type_attr: "T"
-    is_ref: true
   }
   attr {
     name: "T"
@@ -35475,60 +56666,28 @@ op {
       }
     }
   }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "SparseApplyProximalAdagrad"
-  input_arg {
-    name: "var"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "accum"
-    type_attr: "T"
-    is_ref: true
-  }
+  name: "SparseDenseCwiseDiv"
   input_arg {
-    name: "lr"
-    type_attr: "T"
+    name: "sp_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "l1"
+    name: "sp_values"
     type_attr: "T"
   }
   input_arg {
-    name: "l2"
-    type_attr: "T"
+    name: "sp_shape"
+    type: DT_INT64
   }
   input_arg {
-    name: "grad"
+    name: "dense"
     type_attr: "T"
   }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
   output_arg {
-    name: "out"
+    name: "output"
     type_attr: "T"
-    is_ref: true
   }
   attr {
     name: "T"
@@ -35554,55 +56713,28 @@ op {
       }
     }
   }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "SparseApplyProximalGradientDescent"
-  input_arg {
-    name: "var"
-    type_attr: "T"
-    is_ref: true
-  }
+  name: "SparseDenseCwiseDiv"
   input_arg {
-    name: "alpha"
-    type_attr: "T"
+    name: "sp_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "l1"
+    name: "sp_values"
     type_attr: "T"
   }
   input_arg {
-    name: "l2"
-    type_attr: "T"
+    name: "sp_shape"
+    type: DT_INT64
   }
   input_arg {
-    name: "grad"
+    name: "dense"
     type_attr: "T"
   }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
   output_arg {
-    name: "out"
+    name: "output"
     type_attr: "T"
-    is_ref: true
   }
   attr {
     name: "T"
@@ -35623,58 +56755,34 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "SparseApplyProximalGradientDescent"
-  input_arg {
-    name: "var"
-    type_attr: "T"
-    is_ref: true
-  }
+  name: "SparseDenseCwiseDiv"
   input_arg {
-    name: "alpha"
-    type_attr: "T"
+    name: "sp_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "l1"
+    name: "sp_values"
     type_attr: "T"
   }
   input_arg {
-    name: "l2"
-    type_attr: "T"
+    name: "sp_shape"
+    type: DT_INT64
   }
   input_arg {
-    name: "grad"
+    name: "dense"
     type_attr: "T"
   }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
   output_arg {
-    name: "out"
+    name: "output"
     type_attr: "T"
-    is_ref: true
   }
   attr {
     name: "T"
@@ -35683,86 +56791,46 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
-        type: DT_UINT64
-      }
-    }
-  }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
-}
-op {
-  name: "SparseApplyRMSProp"
-  input_arg {
-    name: "var"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "ms"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "mom"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
+        type: DT_UINT64
+      }
+    }
   }
+}
+op {
+  name: "SparseDenseCwiseMul"
   input_arg {
-    name: "rho"
-    type_attr: "T"
+    name: "sp_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "momentum"
+    name: "sp_values"
     type_attr: "T"
   }
   input_arg {
-    name: "epsilon"
-    type_attr: "T"
+    name: "sp_shape"
+    type: DT_INT64
   }
   input_arg {
-    name: "grad"
+    name: "dense"
     type_attr: "T"
   }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
   output_arg {
-    name: "out"
+    name: "output"
     type_attr: "T"
-    is_ref: true
   }
   attr {
     name: "T"
@@ -35786,69 +56854,28 @@ op {
       }
     }
   }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "SparseApplyRMSProp"
-  input_arg {
-    name: "var"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "ms"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "mom"
-    type_attr: "T"
-    is_ref: true
-  }
-  input_arg {
-    name: "lr"
-    type_attr: "T"
-  }
+  name: "SparseDenseCwiseMul"
   input_arg {
-    name: "rho"
-    type_attr: "T"
+    name: "sp_indices"
+    type: DT_INT64
   }
   input_arg {
-    name: "momentum"
+    name: "sp_values"
     type_attr: "T"
   }
   input_arg {
-    name: "epsilon"
-    type_attr: "T"
+    name: "sp_shape"
+    type: DT_INT64
   }
   input_arg {
-    name: "grad"
+    name: "dense"
     type_attr: "T"
   }
-  input_arg {
-    name: "indices"
-    type_attr: "Tindices"
-  }
   output_arg {
-    name: "out"
+    name: "output"
     type_attr: "T"
-    is_ref: true
   }
   attr {
     name: "T"
@@ -35874,78 +56901,32 @@ op {
       }
     }
   }
-  attr {
-    name: "Tindices"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_INT32
-        type: DT_INT64
-      }
-    }
-  }
-  attr {
-    name: "use_locking"
-    type: "bool"
-    default_value {
-      b: false
-    }
-  }
 }
 op {
-  name: "SparseConcat"
+  name: "SparseDenseCwiseMul"
   input_arg {
-    name: "indices"
+    name: "sp_indices"
     type: DT_INT64
-    number_attr: "N"
   }
   input_arg {
-    name: "values"
+    name: "sp_values"
     type_attr: "T"
-    number_attr: "N"
   }
   input_arg {
-    name: "shapes"
-    type: DT_INT64
-    number_attr: "N"
-  }
-  output_arg {
-    name: "output_indices"
+    name: "sp_shape"
     type: DT_INT64
   }
-  output_arg {
-    name: "output_values"
+  input_arg {
+    name: "dense"
     type_attr: "T"
   }
   output_arg {
-    name: "output_shape"
-    type: DT_INT64
-  }
-  attr {
-    name: "concat_dim"
-    type: "int"
-  }
-  attr {
-    name: "N"
-    type: "int"
-    has_minimum: true
-    minimum: 2
+    name: "output"
+    type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-  }
-}
-op {
-  name: "SparseConditionalAccumulator"
-  output_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
-  }
-  attr {
-    name: "dtype"
-    type: "type"
     allowed_values {
       list {
         type: DT_FLOAT
@@ -35962,99 +56943,78 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
-  attr {
-    name: "shape"
-    type: "shape"
+}
+op {
+  name: "SparseDenseCwiseMul"
+  input_arg {
+    name: "sp_indices"
+    type: DT_INT64
   }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  input_arg {
+    name: "sp_values"
+    type_attr: "T"
   }
-  attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
+  input_arg {
+    name: "sp_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "dense"
+    type_attr: "T"
   }
-  is_stateful: true
-}
-op {
-  name: "SparseConditionalAccumulator"
   output_arg {
-    name: "handle"
-    type: DT_STRING
-    is_ref: true
+    name: "output"
+    type_attr: "T"
   }
   attr {
-    name: "dtype"
+    name: "T"
     type: "type"
     allowed_values {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
-  attr {
-    name: "shape"
-    type: "shape"
-  }
-  attr {
-    name: "container"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  attr {
-    name: "shared_name"
-    type: "string"
-    default_value {
-      s: ""
-    }
-  }
-  is_stateful: true
 }
 op {
-  name: "SparseCross"
+  name: "SparseFillEmptyRows"
   input_arg {
     name: "indices"
     type: DT_INT64
-    number_attr: "N"
   }
   input_arg {
     name: "values"
-    type_list_attr: "sparse_types"
+    type_attr: "T"
   }
   input_arg {
-    name: "shapes"
+    name: "dense_shape"
     type: DT_INT64
-    number_attr: "N"
   }
   input_arg {
-    name: "dense_inputs"
-    type_list_attr: "dense_types"
+    name: "default_value"
+    type_attr: "T"
   }
   output_arg {
     name: "output_indices"
@@ -36062,95 +57022,142 @@ op {
   }
   output_arg {
     name: "output_values"
-    type_attr: "out_type"
+    type_attr: "T"
   }
   output_arg {
-    name: "output_shape"
+    name: "empty_row_indicator"
+    type: DT_BOOL
+  }
+  output_arg {
+    name: "reverse_index_map"
     type: DT_INT64
   }
   attr {
-    name: "N"
-    type: "int"
-    has_minimum: true
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "SparseFillEmptyRowsGrad"
+  input_arg {
+    name: "reverse_index_map"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "grad_values"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "d_values"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "d_default_value"
+    type_attr: "T"
   }
   attr {
-    name: "hashed_output"
-    type: "bool"
+    name: "T"
+    type: "type"
+  }
+}
+op {
+  name: "SparseMatMul"
+  input_arg {
+    name: "a"
+    type_attr: "Ta"
+  }
+  input_arg {
+    name: "b"
+    type_attr: "Tb"
+  }
+  output_arg {
+    name: "product"
+    type: DT_FLOAT
   }
   attr {
-    name: "num_buckets"
-    type: "int"
-    has_minimum: true
+    name: "transpose_a"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
   attr {
-    name: "hash_key"
-    type: "int"
+    name: "transpose_b"
+    type: "bool"
+    default_value {
+      b: false
+    }
   }
   attr {
-    name: "sparse_types"
-    type: "list(type)"
-    has_minimum: true
-    allowed_values {
-      list {
-        type: DT_INT64
-        type: DT_STRING
-      }
+    name: "a_is_sparse"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
   attr {
-    name: "dense_types"
-    type: "list(type)"
-    has_minimum: true
-    allowed_values {
-      list {
-        type: DT_INT64
-        type: DT_STRING
-      }
+    name: "b_is_sparse"
+    type: "bool"
+    default_value {
+      b: false
     }
   }
   attr {
-    name: "out_type"
+    name: "Ta"
     type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
     allowed_values {
       list {
-        type: DT_INT64
-        type: DT_STRING
+        type: DT_FLOAT
+        type: DT_BFLOAT16
       }
     }
   }
   attr {
-    name: "internal_type"
+    name: "Tb"
     type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
     allowed_values {
       list {
-        type: DT_INT64
-        type: DT_STRING
+        type: DT_FLOAT
+        type: DT_BFLOAT16
       }
     }
   }
 }
 op {
-  name: "SparseDenseCwiseAdd"
+  name: "SparseReduceMax"
   input_arg {
-    name: "sp_indices"
+    name: "input_indices"
     type: DT_INT64
   }
   input_arg {
-    name: "sp_values"
+    name: "input_values"
     type_attr: "T"
   }
   input_arg {
-    name: "sp_shape"
+    name: "input_shape"
     type: DT_INT64
   }
   input_arg {
-    name: "dense"
-    type_attr: "T"
+    name: "reduction_axes"
+    type: DT_INT32
   }
   output_arg {
     name: "output"
     type_attr: "T"
   }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
   attr {
     name: "T"
     type: "type"
@@ -36158,44 +57165,46 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_UINT16
         type: DT_HALF
       }
     }
   }
 }
 op {
-  name: "SparseDenseCwiseAdd"
+  name: "SparseReduceMax"
   input_arg {
-    name: "sp_indices"
+    name: "input_indices"
     type: DT_INT64
   }
   input_arg {
-    name: "sp_values"
+    name: "input_values"
     type_attr: "T"
   }
   input_arg {
-    name: "sp_shape"
+    name: "input_shape"
     type: DT_INT64
   }
   input_arg {
-    name: "dense"
-    type_attr: "T"
+    name: "reduction_axes"
+    type: DT_INT32
   }
   output_arg {
     name: "output"
     type_attr: "T"
   }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
   attr {
     name: "T"
     type: "type"
@@ -36203,17 +57212,12 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -36222,27 +57226,34 @@ op {
   }
 }
 op {
-  name: "SparseDenseCwiseDiv"
+  name: "SparseReduceMax"
   input_arg {
-    name: "sp_indices"
+    name: "input_indices"
     type: DT_INT64
   }
   input_arg {
-    name: "sp_values"
+    name: "input_values"
     type_attr: "T"
   }
   input_arg {
-    name: "sp_shape"
+    name: "input_shape"
     type: DT_INT64
   }
   input_arg {
-    name: "dense"
-    type_attr: "T"
+    name: "reduction_axes"
+    type: DT_INT32
   }
   output_arg {
     name: "output"
     type_attr: "T"
   }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
   attr {
     name: "T"
     type: "type"
@@ -36250,44 +57261,49 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_UINT16
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
 }
 op {
-  name: "SparseDenseCwiseDiv"
+  name: "SparseReduceMax"
   input_arg {
-    name: "sp_indices"
+    name: "input_indices"
     type: DT_INT64
   }
   input_arg {
-    name: "sp_values"
+    name: "input_values"
     type_attr: "T"
   }
   input_arg {
-    name: "sp_shape"
+    name: "input_shape"
     type: DT_INT64
   }
   input_arg {
-    name: "dense"
-    type_attr: "T"
+    name: "reduction_axes"
+    type: DT_INT32
   }
   output_arg {
     name: "output"
     type_attr: "T"
   }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
   attr {
     name: "T"
     type: "type"
@@ -36295,17 +57311,13 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -36314,27 +57326,42 @@ op {
   }
 }
 op {
-  name: "SparseDenseCwiseMul"
+  name: "SparseReduceMaxSparse"
   input_arg {
-    name: "sp_indices"
+    name: "input_indices"
     type: DT_INT64
   }
   input_arg {
-    name: "sp_values"
+    name: "input_values"
     type_attr: "T"
   }
   input_arg {
-    name: "sp_shape"
+    name: "input_shape"
     type: DT_INT64
   }
   input_arg {
-    name: "dense"
-    type_attr: "T"
+    name: "reduction_axes"
+    type: DT_INT32
   }
   output_arg {
-    name: "output"
+    name: "output_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_values"
     type_attr: "T"
   }
+  output_arg {
+    name: "output_shape"
+    type: DT_INT64
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
   attr {
     name: "T"
     type: "type"
@@ -36342,44 +57369,54 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_UINT16
         type: DT_HALF
       }
     }
   }
 }
 op {
-  name: "SparseDenseCwiseMul"
+  name: "SparseReduceMaxSparse"
   input_arg {
-    name: "sp_indices"
+    name: "input_indices"
     type: DT_INT64
   }
   input_arg {
-    name: "sp_values"
+    name: "input_values"
     type_attr: "T"
   }
   input_arg {
-    name: "sp_shape"
+    name: "input_shape"
     type: DT_INT64
   }
   input_arg {
-    name: "dense"
-    type_attr: "T"
+    name: "reduction_axes"
+    type: DT_INT32
   }
   output_arg {
-    name: "output"
+    name: "output_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_values"
     type_attr: "T"
   }
+  output_arg {
+    name: "output_shape"
+    type: DT_INT64
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
   attr {
     name: "T"
     type: "type"
@@ -36387,17 +57424,12 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
+        type: DT_INT64
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
-        type: DT_QINT8
-        type: DT_QUINT8
-        type: DT_QINT32
+        type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -36406,22 +57438,22 @@ op {
   }
 }
 op {
-  name: "SparseFillEmptyRows"
+  name: "SparseReduceMaxSparse"
   input_arg {
-    name: "indices"
+    name: "input_indices"
     type: DT_INT64
   }
   input_arg {
-    name: "values"
+    name: "input_values"
     type_attr: "T"
   }
   input_arg {
-    name: "dense_shape"
+    name: "input_shape"
     type: DT_INT64
   }
   input_arg {
-    name: "default_value"
-    type_attr: "T"
+    name: "reduction_axes"
+    type: DT_INT32
   }
   output_arg {
     name: "output_indices"
@@ -36432,112 +57464,97 @@ op {
     type_attr: "T"
   }
   output_arg {
-    name: "empty_row_indicator"
-    type: DT_BOOL
-  }
-  output_arg {
-    name: "reverse_index_map"
+    name: "output_shape"
     type: DT_INT64
   }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
   attr {
     name: "T"
     type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
   }
 }
 op {
-  name: "SparseFillEmptyRowsGrad"
+  name: "SparseReduceMaxSparse"
   input_arg {
-    name: "reverse_index_map"
+    name: "input_indices"
     type: DT_INT64
   }
   input_arg {
-    name: "grad_values"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "d_values"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "d_default_value"
+    name: "input_values"
     type_attr: "T"
   }
-  attr {
-    name: "T"
-    type: "type"
-  }
-}
-op {
-  name: "SparseMatMul"
   input_arg {
-    name: "a"
-    type_attr: "Ta"
+    name: "input_shape"
+    type: DT_INT64
   }
   input_arg {
-    name: "b"
-    type_attr: "Tb"
+    name: "reduction_axes"
+    type: DT_INT32
   }
   output_arg {
-    name: "product"
-    type: DT_FLOAT
-  }
-  attr {
-    name: "transpose_a"
-    type: "bool"
-    default_value {
-      b: false
-    }
+    name: "output_indices"
+    type: DT_INT64
   }
-  attr {
-    name: "transpose_b"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  output_arg {
+    name: "output_values"
+    type_attr: "T"
   }
-  attr {
-    name: "a_is_sparse"
-    type: "bool"
-    default_value {
-      b: false
-    }
+  output_arg {
+    name: "output_shape"
+    type: DT_INT64
   }
   attr {
-    name: "b_is_sparse"
+    name: "keep_dims"
     type: "bool"
     default_value {
       b: false
     }
   }
   attr {
-    name: "Ta"
-    type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
-    allowed_values {
-      list {
-        type: DT_FLOAT
-        type: DT_BFLOAT16
-      }
-    }
-  }
-  attr {
-    name: "Tb"
+    name: "T"
     type: "type"
-    default_value {
-      type: DT_FLOAT
-    }
     allowed_values {
       list {
         type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
         type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
 }
 op {
-  name: "SparseReduceMax"
+  name: "SparseReduceSum"
   input_arg {
     name: "input_indices"
     type: DT_INT64
@@ -36572,19 +57589,24 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
+        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
         type: DT_HALF
       }
     }
   }
 }
 op {
-  name: "SparseReduceMax"
+  name: "SparseReduceSum"
   input_arg {
     name: "input_indices"
     type: DT_INT64
@@ -36619,12 +57641,17 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
+        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -36633,7 +57660,7 @@ op {
   }
 }
 op {
-  name: "SparseReduceMaxSparse"
+  name: "SparseReduceSum"
   input_arg {
     name: "input_indices"
     type: DT_INT64
@@ -36651,17 +57678,9 @@ op {
     type: DT_INT32
   }
   output_arg {
-    name: "output_indices"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "output_values"
+    name: "output"
     type_attr: "T"
   }
-  output_arg {
-    name: "output_shape"
-    type: DT_INT64
-  }
   attr {
     name: "keep_dims"
     type: "bool"
@@ -36676,19 +57695,27 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT32
         type: DT_INT64
+        type: DT_INT32
         type: DT_UINT8
+        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
-        type: DT_UINT16
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
 }
 op {
-  name: "SparseReduceMaxSparse"
+  name: "SparseReduceSum"
   input_arg {
     name: "input_indices"
     type: DT_INT64
@@ -36706,17 +57733,9 @@ op {
     type: DT_INT32
   }
   output_arg {
-    name: "output_indices"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "output_values"
+    name: "output"
     type_attr: "T"
   }
-  output_arg {
-    name: "output_shape"
-    type: DT_INT64
-  }
   attr {
     name: "keep_dims"
     type: "bool"
@@ -36732,11 +57751,17 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
         type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -36745,7 +57770,7 @@ op {
   }
 }
 op {
-  name: "SparseReduceSum"
+  name: "SparseReduceSumSparse"
   input_arg {
     name: "input_indices"
     type: DT_INT64
@@ -36763,9 +57788,17 @@ op {
     type: DT_INT32
   }
   output_arg {
-    name: "output"
+    name: "output_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_values"
     type_attr: "T"
   }
+  output_arg {
+    name: "output_shape"
+    type: DT_INT64
+  }
   attr {
     name: "keep_dims"
     type: "bool"
@@ -36797,7 +57830,7 @@ op {
   }
 }
 op {
-  name: "SparseReduceSum"
+  name: "SparseReduceSumSparse"
   input_arg {
     name: "input_indices"
     type: DT_INT64
@@ -36815,9 +57848,17 @@ op {
     type: DT_INT32
   }
   output_arg {
-    name: "output"
+    name: "output_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_values"
     type_attr: "T"
   }
+  output_arg {
+    name: "output_shape"
+    type: DT_INT64
+  }
   attr {
     name: "keep_dims"
     type: "bool"
@@ -36906,6 +57947,9 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -36954,17 +57998,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -37013,17 +58058,311 @@ op {
     name: "new_shape"
     type: DT_INT64
   }
-  output_arg {
-    name: "output_indices"
-    type: DT_INT64
+  output_arg {
+    name: "output_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_shape"
+    type: DT_INT64
+  }
+}
+op {
+  name: "SparseSegmentMean"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "segment_ids"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "SparseSegmentMeanGrad"
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "segment_ids"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "output_dim0"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "SparseSegmentMeanWithNumSegments"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "segment_ids"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "num_segments"
+    type_attr: "Tnumsegments"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tnumsegments"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "SparseSegmentSqrtN"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "segment_ids"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "SparseSegmentSqrtNGrad"
+  input_arg {
+    name: "grad"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "segment_ids"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "output_dim0"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "SparseSegmentSqrtNWithNumSegments"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "segment_ids"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "num_segments"
+    type_attr: "Tnumsegments"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
-  output_arg {
-    name: "output_shape"
-    type: DT_INT64
+  attr {
+    name: "Tnumsegments"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
   }
 }
 op {
-  name: "SparseSegmentMean"
+  name: "SparseSegmentSum"
   input_arg {
     name: "data"
     type_attr: "T"
@@ -37047,6 +58386,13 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
       }
     }
   }
@@ -37065,9 +58411,9 @@ op {
   }
 }
 op {
-  name: "SparseSegmentMeanGrad"
+  name: "SparseSegmentSum"
   input_arg {
-    name: "grad"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
@@ -37078,10 +58424,6 @@ op {
     name: "segment_ids"
     type: DT_INT32
   }
-  input_arg {
-    name: "output_dim0"
-    type: DT_INT32
-  }
   output_arg {
     name: "output"
     type_attr: "T"
@@ -37093,6 +58435,15 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -37111,7 +58462,7 @@ op {
   }
 }
 op {
-  name: "SparseSegmentSqrtN"
+  name: "SparseSegmentSum"
   input_arg {
     name: "data"
     type_attr: "T"
@@ -37135,6 +58486,16 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -37153,9 +58514,9 @@ op {
   }
 }
 op {
-  name: "SparseSegmentSqrtNGrad"
+  name: "SparseSegmentSum"
   input_arg {
-    name: "grad"
+    name: "data"
     type_attr: "T"
   }
   input_arg {
@@ -37166,10 +58527,6 @@ op {
     name: "segment_ids"
     type: DT_INT32
   }
-  input_arg {
-    name: "output_dim0"
-    type: DT_INT32
-  }
   output_arg {
     name: "output"
     type_attr: "T"
@@ -37181,6 +58538,16 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -37199,7 +58566,7 @@ op {
   }
 }
 op {
-  name: "SparseSegmentSum"
+  name: "SparseSegmentSumWithNumSegments"
   input_arg {
     name: "data"
     type_attr: "T"
@@ -37212,6 +58579,10 @@ op {
     name: "segment_ids"
     type: DT_INT32
   }
+  input_arg {
+    name: "num_segments"
+    type_attr: "Tnumsegments"
+  }
   output_arg {
     name: "output"
     type_attr: "T"
@@ -37230,6 +58601,9 @@ op {
         type: DT_INT8
         type: DT_UINT16
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
       }
     }
   }
@@ -37246,9 +58620,22 @@ op {
       }
     }
   }
+  attr {
+    name: "Tnumsegments"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
 }
 op {
-  name: "SparseSegmentSum"
+  name: "SparseSegmentSumWithNumSegments"
   input_arg {
     name: "data"
     type_attr: "T"
@@ -37261,6 +58648,10 @@ op {
     name: "segment_ids"
     type: DT_INT32
   }
+  input_arg {
+    name: "num_segments"
+    type_attr: "Tnumsegments"
+  }
   output_arg {
     name: "output"
     type_attr: "T"
@@ -37273,10 +58664,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -37297,6 +58689,19 @@ op {
       }
     }
   }
+  attr {
+    name: "Tnumsegments"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
 }
 op {
   name: "SparseSlice"
@@ -37409,6 +58814,102 @@ op {
     }
   }
 }
+op {
+  name: "SparseSoftmaxCrossEntropyWithLogits"
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "labels"
+    type_attr: "Tlabels"
+  }
+  output_arg {
+    name: "loss"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "backprop"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "Tlabels"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "SparseSparseMaximum"
+  input_arg {
+    name: "a_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "a_values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "a_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "b_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "b_values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "b_shape"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_values"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+      }
+    }
+  }
+}
 op {
   name: "SparseSparseMaximum"
   input_arg {
@@ -37457,6 +58958,8 @@ op {
         type: DT_INT8
         type: DT_UINT16
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -37511,6 +59014,119 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "SparseSparseMaximum"
+  input_arg {
+    name: "a_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "a_values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "a_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "b_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "b_values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "b_shape"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_values"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "SparseSparseMinimum"
+  input_arg {
+    name: "a_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "a_values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "a_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "b_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "b_values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "b_shape"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_values"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
       }
     }
   }
@@ -37568,6 +59184,8 @@ op {
         type: DT_QUINT8
         type: DT_QINT32
         type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -37627,6 +59245,67 @@ op {
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "SparseSparseMinimum"
+  input_arg {
+    name: "a_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "a_values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "a_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "b_indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "b_values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "b_shape"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_values"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
       }
     }
   }
@@ -37759,17 +59438,133 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "SparseTensorDenseAdd"
+  input_arg {
+    name: "a_indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "a_values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "a_shape"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "b"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "SparseTensorDenseAdd"
+  input_arg {
+    name: "a_indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "a_values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "a_shape"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "b"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -38140,6 +59935,31 @@ op {
     }
   }
 }
+op {
+  name: "Sqrt"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
 op {
   name: "SqrtGrad"
   input_arg {
@@ -38196,6 +60016,61 @@ op {
     }
   }
 }
+op {
+  name: "SqrtGrad"
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "dy"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "Square"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
 op {
   name: "Square"
   input_arg {
@@ -38212,6 +60087,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -38253,6 +60129,38 @@ op {
   }
   is_commutative: true
 }
+op {
+  name: "SquaredDifference"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  is_commutative: true
+}
 op {
   name: "Squeeze"
   input_arg {
@@ -38665,7 +60573,159 @@ op {
   }
 }
 op {
-  name: "StatelessRandomUniform"
+  name: "StatelessRandomNormal"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "seed"
+    type_attr: "Tseed"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tseed"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "StatelessRandomUniform"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "seed"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "StatelessRandomUniform"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "seed"
+    type_attr: "Tseed"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tseed"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "StatelessTruncatedNormal"
   input_arg {
     name: "shape"
     type_attr: "T"
@@ -38714,7 +60774,7 @@ op {
   }
   input_arg {
     name: "seed"
-    type: DT_INT64
+    type_attr: "Tseed"
   }
   output_arg {
     name: "output"
@@ -38747,6 +60807,19 @@ op {
       }
     }
   }
+  attr {
+    name: "Tseed"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
 }
 op {
   name: "StatsAggregatorHandle"
@@ -39270,6 +61343,41 @@ op {
     }
   }
 }
+op {
+  name: "Sub"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
 op {
   name: "Substr"
   input_arg {
@@ -39415,6 +61523,126 @@ op {
     }
   }
 }
+op {
+  name: "Sum"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "Sum"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reduction_indices"
+    type_attr: "Tidx"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "keep_dims"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "Svd"
   input_arg {
@@ -39708,6 +61936,33 @@ op {
     }
   }
 }
+op {
+  name: "Tan"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
 op {
   name: "Tanh"
   input_arg {
@@ -39732,6 +61987,31 @@ op {
     }
   }
 }
+op {
+  name: "Tanh"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
 op {
   name: "TanhGrad"
   input_arg {
@@ -39788,6 +62068,35 @@ op {
     }
   }
 }
+op {
+  name: "TanhGrad"
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "dy"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
 op {
   name: "TemporaryVariable"
   output_arg {
@@ -40776,6 +63085,106 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "TensorListFromTensor"
+  input_arg {
+    name: "tensor"
+    type_attr: "element_dtype"
+  }
+  input_arg {
+    name: "element_shape"
+    type_attr: "shape_type"
+  }
+  output_arg {
+    name: "output_handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "element_dtype"
+    type: "type"
+  }
+  attr {
+    name: "shape_type"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "TensorListLength"
+  input_arg {
+    name: "input_handle"
+    type: DT_VARIANT
+  }
+  output_arg {
+    name: "length"
+    type: DT_INT32
+  }
+}
+op {
+  name: "TensorListPopBack"
+  input_arg {
+    name: "input_handle"
+    type: DT_VARIANT
+  }
+  output_arg {
+    name: "output_handle"
+    type: DT_VARIANT
+  }
+  output_arg {
+    name: "tensor"
+    type_attr: "element_dtype"
+  }
+  attr {
+    name: "element_dtype"
+    type: "type"
+  }
+}
+op {
+  name: "TensorListPushBack"
+  input_arg {
+    name: "input_handle"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "tensor"
+    type_attr: "element_dtype"
+  }
+  output_arg {
+    name: "output_handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "element_dtype"
+    type: "type"
+  }
+}
+op {
+  name: "TensorListStack"
+  input_arg {
+    name: "input_handle"
+    type: DT_VARIANT
+  }
+  output_arg {
+    name: "tensor"
+    type_attr: "element_dtype"
+  }
+  attr {
+    name: "element_dtype"
+    type: "type"
+  }
+  attr {
+    name: "num_elements"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+}
 op {
   name: "TensorSliceDataset"
   input_arg {
@@ -41200,6 +63609,106 @@ op {
     version: 7
   }
 }
+op {
+  name: "TopK"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "values"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "indices"
+    type: DT_INT32
+  }
+  attr {
+    name: "k"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "sorted"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  deprecation {
+    version: 7
+  }
+}
+op {
+  name: "TopK"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "values"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "indices"
+    type: DT_INT32
+  }
+  attr {
+    name: "k"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "sorted"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  deprecation {
+    version: 7
+  }
+}
 op {
   name: "TopKV2"
   input_arg {
@@ -41289,39 +63798,227 @@ op {
   }
 }
 op {
-  name: "Transpose"
+  name: "TopKV2"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "k"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "values"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "indices"
+    type: DT_INT32
+  }
+  attr {
+    name: "sorted"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+}
+op {
+  name: "TopKV2"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "k"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "values"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "indices"
+    type: DT_INT32
+  }
+  attr {
+    name: "sorted"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+}
+op {
+  name: "Transpose"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "perm"
+    type_attr: "Tperm"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "Tperm"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "TruncateDiv"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "TruncateDiv"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
+op {
+  name: "TruncateMod"
   input_arg {
     name: "x"
     type_attr: "T"
   }
   input_arg {
-    name: "perm"
-    type_attr: "Tperm"
+    name: "y"
+    type_attr: "T"
   }
   output_arg {
-    name: "y"
+    name: "z"
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-  }
-  attr {
-    name: "Tperm"
-    type: "type"
-    default_value {
-      type: DT_INT32
-    }
     allowed_values {
       list {
         type: DT_INT32
         type: DT_INT64
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
 }
 op {
-  name: "TruncateDiv"
+  name: "TruncateMod"
   input_arg {
     name: "x"
     type_attr: "T"
@@ -41339,34 +64036,49 @@ op {
     type: "type"
     allowed_values {
       list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-        type: DT_UINT8
-        type: DT_INT8
-        type: DT_UINT16
-        type: DT_INT16
         type: DT_INT32
         type: DT_INT64
-        type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
       }
     }
   }
 }
 op {
-  name: "TruncateMod"
-  input_arg {
-    name: "x"
-    type_attr: "T"
-  }
+  name: "TruncatedNormal"
   input_arg {
-    name: "y"
+    name: "shape"
     type_attr: "T"
   }
   output_arg {
-    name: "z"
-    type_attr: "T"
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
   attr {
     name: "T"
@@ -41375,11 +64087,10 @@ op {
       list {
         type: DT_INT32
         type: DT_INT64
-        type: DT_FLOAT
-        type: DT_DOUBLE
       }
     }
   }
+  is_stateful: true
 }
 op {
   name: "TruncatedNormal"
@@ -41411,6 +64122,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -41571,6 +64283,114 @@ op {
     }
   }
 }
+op {
+  name: "UniqueDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "UniqueV2"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "axis"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "idx"
+    type_attr: "out_idx"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "out_idx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "UniqueV2"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "axis"
+    type_attr: "Taxis"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "idx"
+    type_attr: "out_idx"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "Taxis"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "out_idx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "UniqueWithCounts"
   input_arg {
@@ -41729,6 +64549,130 @@ op {
     }
   }
 }
+op {
+  name: "UnsortedSegmentMax"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "segment_ids"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "num_segments"
+    type_attr: "Tnumsegments"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tnumsegments"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "UnsortedSegmentMax"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "segment_ids"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "num_segments"
+    type_attr: "Tnumsegments"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tnumsegments"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "UnsortedSegmentSum"
   input_arg {
@@ -41833,6 +64777,140 @@ op {
     }
   }
 }
+op {
+  name: "UnsortedSegmentSum"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "segment_ids"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "num_segments"
+    type_attr: "Tnumsegments"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tnumsegments"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "UnsortedSegmentSum"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "segment_ids"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "num_segments"
+    type_attr: "Tnumsegments"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tnumsegments"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "Unstage"
   output_arg {
@@ -42084,6 +65162,86 @@ op {
     }
   }
 }
+op {
+  name: "Where"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "index"
+    type: DT_INT64
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_BOOL
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT64
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BFLOAT16
+        type: DT_BOOL
+      }
+    }
+  }
+}
+op {
+  name: "Where"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "index"
+    type: DT_INT64
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_BOOL
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+        type: DT_BOOL
+      }
+    }
+  }
+}
 op {
   name: "WholeFileReader"
   output_arg {
diff --git a/tensorflow/core/ops/control_flow_ops.cc b/tensorflow/core/ops/control_flow_ops.cc
index 61089658d71db9e5db95660c4addf3aec3849338..81e9fcfa959dc906f34a2a1bf6cc77aefe4aaeaf 100644
--- a/tensorflow/core/ops/control_flow_ops.cc
+++ b/tensorflow/core/ops/control_flow_ops.cc
@@ -47,20 +47,7 @@ REGISTER_OP("Switch")
     .Output("output_false: T")
     .Output("output_true: T")
     .Attr("T: type")
-    .SetShapeFn(SwitchShape)
-    .Doc(R"doc(
-Forwards `data` to the output port determined by `pred`.
-
-If `pred` is true, the `data` input is forwarded to `output_true`. Otherwise,
-the data goes to `output_false`.
-
-See also `RefSwitch` and `Merge`.
-
-data: The tensor to be forwarded to the appropriate output.
-pred: A scalar that specifies which output port will receive data.
-output_false: If `pred` is false, data will be forwarded to this output.
-output_true: If `pred` is true, data will be forwarded to this output.
-)doc");
+    .SetShapeFn(SwitchShape);
 
 REGISTER_OP("RefSwitch")
     .Input("data: Ref(T)")
@@ -69,20 +56,7 @@ REGISTER_OP("RefSwitch")
     .Output("output_true: Ref(T)")
     .Attr("T: type")
     .SetAllowsUninitializedInput()
-    .SetShapeFn(SwitchShape)
-    .Doc(R"doc(
-Forwards the ref tensor `data` to the output port determined by `pred`.
-
-If `pred` is true, the `data` input is forwarded to `output_true`. Otherwise,
-the data goes to `output_false`.
-
-See also `Switch` and `Merge`.
-
-data: The ref tensor to be forwarded to the appropriate output.
-pred: A scalar that specifies which output port will receive data.
-output_false: If `pred` is false, data will be forwarded to this output.
-output_true: If `pred` is true, data will be forwarded to this output.
-)doc");
+    .SetShapeFn(SwitchShape);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("RefSelect")
@@ -110,14 +84,7 @@ REGISTER_OP("RefSelect")
       }
       c->set_output(0, first_input);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Forwards the `index`th element of `inputs` to `output`.
-
-index: A scalar that determines the input that gets selected.
-inputs: A list of ref tensors, one of which will be forwarded to `output`.
-output: The forwarded tensor.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 namespace {
@@ -153,20 +120,7 @@ REGISTER_OP("Merge")
     .Output("value_index: int32")
     .Attr("T: type")
     .Attr("N: int >= 1")
-    .SetShapeFn(MergeShape)
-    .Doc(R"doc(
-Forwards the value of an available tensor from `inputs` to `output`.
-
-`Merge` waits for at least one of the tensors in `inputs` to become available.
-It is usually combined with `Switch` to implement branching.
-
-`Merge` forwards the first tensor to become available to `output`, and sets
-`value_index` to its index in `inputs`.
-
-inputs: The input tensors, exactly one of which will become available.
-output: Will be set to the available input tensor.
-value_index: The index of the chosen input tensor in `inputs`.
-)doc");
+    .SetShapeFn(MergeShape);
 
 REGISTER_OP("RefMerge")
     .Input("inputs: Ref(N * T)")
@@ -174,20 +128,7 @@ REGISTER_OP("RefMerge")
     .Output("value_index: int32")
     .Attr("T: type")
     .Attr("N: int >= 1")
-    .SetShapeFn(MergeShape)
-    .Doc(R"doc(
-Forwards the value of an available tensor from `inputs` to `output`.
-
-`Merge` waits for at least one of the tensors in `inputs` to become available.
-It is usually combined with `Switch` to implement branching.
-
-`Merge` forwards the first tensor for become available to `output`, and sets
-`value_index` to its index in `inputs`.
-
-inputs: The input tensors, exactly one of which will become available.
-output: Will be set to the available input tensor.
-value_index: The index of the chosen input tensor in `inputs`.
-)doc");
+    .SetShapeFn(MergeShape);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Enter")
@@ -214,22 +155,7 @@ REGISTER_OP("Enter")
       }
 
       return Status::OK();
-    })
-    .Doc(R"doc(
-Creates or finds a child frame, and makes `data` available to the child frame.
-
-This op is used together with `Exit` to create loops in the graph.
-The unique `frame_name` is used by the `Executor` to identify frames. If
-`is_constant` is true, `output` is a constant in the child frame; otherwise
-it may be changed in the child frame. At most `parallel_iterations` iterations
-are run in parallel in the child frame.
-
-data: The tensor to be made available to the child frame.
-frame_name: The name of the child frame.
-is_constant: If true, the output is constant within the child frame.
-parallel_iterations: The number of iterations allowed to run in parallel.
-output: The same tensor as `data`.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("RefEnter")
@@ -239,75 +165,33 @@ REGISTER_OP("RefEnter")
     .Attr("frame_name: string")
     .Attr("is_constant: bool = false")
     .Attr("parallel_iterations: int = 10")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Creates or finds a child frame, and makes `data` available to the child frame.
-
-The unique `frame_name` is used by the `Executor` to identify frames. If
-`is_constant` is true, `output` is a constant in the child frame; otherwise
-it may be changed in the child frame. At most `parallel_iterations` iterations
-are run in parallel in the child frame.
-
-data: The tensor to be made available to the child frame.
-frame_name: The name of the child frame.
-is_constant: If true, the output is constant within the child frame.
-parallel_iterations: The number of iterations allowed to run in parallel.
-output: The same tensor as `data`.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Exit")
     .Input("data: T")
     .Output("output: T")
     .Attr("T: type")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Exits the current frame to its parent frame.
-
-Exit makes its input `data` available to the parent frame.
-
-data: The tensor to be made available to the parent frame.
-output: The same tensor as `data`.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("RefExit")
     .Input("data: Ref(T)")
     .Output("output: Ref(T)")
     .Attr("T: type")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Exits the current frame to its parent frame.
-
-Exit makes its input `data` available to the parent frame.
-
-data: The tensor to be made available to the parent frame.
-output: The same tensor as `data`.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("NextIteration")
     .Input("data: T")
     .Output("output: T")
     .Attr("T: type")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Makes its input available to the next iteration.
-
-data: The tensor to be made available to the next iteration.
-output: The same tensor as `data`.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("RefNextIteration")
     .Input("data: Ref(T)")
     .Output("output: Ref(T)")
     .Attr("T: type")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Makes its input available to the next iteration.
-
-data: The tensor to be made available to the next iteration.
-output: The same tensor as `data`.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("LoopCond")
@@ -315,40 +199,15 @@ REGISTER_OP("LoopCond")
     .Output("output: bool")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRank(c, 0);
-    })
-    .Doc(R"doc(
-Forwards the input to the output.
-
-This operator represents the loop termination condition used by the
-"pivot" switches of a loop.
-
-input: A boolean scalar, representing the branch predicate of the Switch op.
-output: The same tensor as `input`.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
-REGISTER_OP("ControlTrigger")
-    .SetShapeFn(shape_inference::NoOutputs)
-    .Doc(R"docstring(
-Does nothing. Serves as a control trigger for scheduling.
-
-Only useful as a placeholder for control edges.
-)docstring");
+REGISTER_OP("ControlTrigger").SetShapeFn(shape_inference::NoOutputs);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Abort")
     .Attr("error_msg: string = ''")
     .Attr("exit_without_error: bool = false")
-    .SetShapeFn(shape_inference::NoOutputs)
-    .Doc(R"doc(
-Raise a exception to abort the process when called.
-
-If exit_without_error is true, the process will exit normally,
-otherwise it will exit with a SIGABORT signal.
-
-Returns nothing but an exception.
-
-error_msg: A string which is the message associated with the exception.
-)doc");
+    .SetShapeFn(shape_inference::NoOutputs);
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/ctc_ops.cc b/tensorflow/core/ops/ctc_ops.cc
index 1a69106d80ba9962f40a2353637b456294ae22d6..f2322c730bc8b18d2229b0ea4cc5ded0f8021ef5 100644
--- a/tensorflow/core/ops/ctc_ops.cc
+++ b/tensorflow/core/ops/ctc_ops.cc
@@ -59,30 +59,7 @@ REGISTER_OP("CTCLoss")
       c->set_output(0, c->Vector(batch_size));
       c->set_output(1, inputs);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Calculates the CTC Loss (log probability) for each batch entry.  Also calculates
-the gradient.  This class performs the softmax operation for you, so inputs
-should be e.g. linear projections of outputs by an LSTM.
-
-inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
-labels_indices: The indices of a `SparseTensor<int32, 2>`.
-  `labels_indices(i, :) == [b, t]` means `labels_values(i)` stores the id for
-  `(batch b, time t)`.
-labels_values: The values (labels) associated with the given batch and time.
-sequence_length: A vector containing sequence lengths (batch).
-preprocess_collapse_repeated: Scalar, if true then repeated labels are
-  collapsed prior to the CTC calculation.
-ctc_merge_repeated: Scalar.  If set to false, *during* CTC calculation
-  repeated non-blank labels will not be merged and are interpreted as
-  individual labels.  This is a simplified version of CTC.
-ignore_longer_outputs_than_inputs: Scalar. If set to true, during CTC
-  calculation, items that have longer output sequences than input sequences
-  are skipped: they don't contribute to the loss term and have zero-gradient.
-loss: A vector (batch) containing log-probabilities.
-gradient: The gradient of `loss`.  3-D, shape:
-  `(max_time x batch_size x num_classes)`.
-)doc");
+    });
 
 REGISTER_OP("CTCGreedyDecoder")
     .Input("inputs: float")
@@ -110,32 +87,7 @@ REGISTER_OP("CTCGreedyDecoder")
       c->set_output(2, c->Vector(2));
       c->set_output(3, c->Matrix(batch_size, 1));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Performs greedy decoding on the logits given in inputs.
-
-A note about the attribute merge_repeated: if enabled, when
-consecutive logits' maximum indices are the same, only the first of
-these is emitted.  Labeling the blank '*', the sequence "A B B * B B"
-becomes "A B B" if merge_repeated = True and "A B B B B" if
-merge_repeated = False.
-
-Regardless of the value of merge_repeated, if the maximum index of a given
-time and batch corresponds to the blank, index `(num_classes - 1)`, no new
-element is emitted.
-
-inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
-sequence_length: A vector containing sequence lengths, size `(batch_size)`.
-merge_repeated: If True, merge repeated classes in output.
-decoded_indices: Indices matrix, size `(total_decoded_outputs x 2)`,
-  of a `SparseTensor<int64, 2>`.  The rows store: [batch, time].
-decoded_values: Values vector, size: `(total_decoded_outputs)`,
-  of a `SparseTensor<int64, 2>`.  The vector stores the decoded classes.
-decoded_shape: Shape vector, size `(2)`, of the decoded SparseTensor.
-  Values are: `[batch_size, max_decoded_length]`.
-log_probability: Matrix, size `(batch_size x 1)`, containing sequence
-  log-probabilities.
-)doc");
+    });
 
 REGISTER_OP("CTCBeamSearchDecoder")
     .Input("inputs: float")
@@ -176,32 +128,6 @@ REGISTER_OP("CTCBeamSearchDecoder")
       }
       c->set_output(out_idx++, c->Matrix(batch_size, top_paths));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Performs beam search decoding on the logits given in input.
-
-A note about the attribute merge_repeated: For the beam search decoder,
-this means that if consecutive entries in a beam are the same, only
-the first of these is emitted.  That is, when the top path is "A B B B B",
-"A B" is returned if merge_repeated = True but "A B B B B" is
-returned if merge_repeated = False.
-
-inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
-sequence_length: A vector containing sequence lengths, size `(batch)`.
-beam_width: A scalar >= 0 (beam search beam width).
-top_paths: A scalar >= 0, <= beam_width (controls output size).
-merge_repeated: If true, merge repeated classes in output.
-decoded_indices: A list (length: top_paths) of indices matrices.  Matrix j,
-  size `(total_decoded_outputs[j] x 2)`, has indices of a
-  `SparseTensor<int64, 2>`.  The rows store: [batch, time].
-decoded_values: A list (length: top_paths) of values vectors.  Vector j,
-  size `(length total_decoded_outputs[j])`, has the values of a
-  `SparseTensor<int64, 2>`.  The vector stores the decoded classes for beam j.
-decoded_shape: A list (length: top_paths) of shape vector.  Vector j,
-  size `(2)`, stores the shape of the decoded `SparseTensor[j]`.
-  Its values are: `[batch_size, max_decoded_length[j]]`.
-log_probability: A matrix, shaped: `(batch_size x top_paths)`.  The
-  sequence log-probabilities.
-)doc");
+    });
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/data_flow_ops.cc b/tensorflow/core/ops/data_flow_ops.cc
index ac2dc601f1f6b48905f1269b8726ac30ba5dda67..cf949ed64777b545863fcff68257cf678d45ef97 100644
--- a/tensorflow/core/ops/data_flow_ops.cc
+++ b/tensorflow/core/ops/data_flow_ops.cc
@@ -84,51 +84,7 @@ REGISTER_OP("DynamicPartition")
       }
 
       return Status::OK();
-    })
-    .Doc(R"doc(
-Partitions `data` into `num_partitions` tensors using indices from `partitions`.
-
-For each index tuple `js` of size `partitions.ndim`, the slice `data[js, ...]`
-becomes part of `outputs[partitions[js]]`.  The slices with `partitions[js] = i`
-are placed in `outputs[i]` in lexicographic order of `js`, and the first
-dimension of `outputs[i]` is the number of entries in `partitions` equal to `i`.
-In detail,
-
-```python
-    outputs[i].shape = [sum(partitions == i)] + data.shape[partitions.ndim:]
-
-    outputs[i] = pack([data[js, ...] for js if partitions[js] == i])
-```
-
-`data.shape` must start with `partitions.shape`.
-
-For example:
-
-```python
-    # Scalar partitions.
-    partitions = 1
-    num_partitions = 2
-    data = [10, 20]
-    outputs[0] = []  # Empty with shape [0, 2]
-    outputs[1] = [[10, 20]]
-
-    # Vector partitions.
-    partitions = [0, 0, 1, 1, 0]
-    num_partitions = 2
-    data = [10, 20, 30, 40, 50]
-    outputs[0] = [10, 20, 50]
-    outputs[1] = [30, 40]
-```
-
-See `dynamic_stitch` for an example on how to merge partitions back.
-
-<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="https://www.tensorflow.org/images/DynamicPartition.png" alt>
-</div>
-
-partitions: Any shape.  Indices in the range `[0, num_partitions)`.
-num_partitions: The number of partitions to output.
-)doc");
+    });
 
 namespace {
 
@@ -189,73 +145,7 @@ REGISTER_OP("DynamicStitch")
     .Output("merged: T")
     .Attr("N : int >= 1")
     .Attr("T : type")
-    .SetShapeFn(DynamicStitchShapeFunction)
-    .Doc(R"doc(
-Interleave the values from the `data` tensors into a single tensor.
-
-Builds a merged tensor such that
-
-```python
-    merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...]
-```
-
-For example, if each `indices[m]` is scalar or vector, we have
-
-```python
-    # Scalar indices:
-    merged[indices[m], ...] = data[m][...]
-
-    # Vector indices:
-    merged[indices[m][i], ...] = data[m][i, ...]
-```
-
-Each `data[i].shape` must start with the corresponding `indices[i].shape`,
-and the rest of `data[i].shape` must be constant w.r.t. `i`.  That is, we
-must have `data[i].shape = indices[i].shape + constant`.  In terms of this
-`constant`, the output shape is
-
-    merged.shape = [max(indices)] + constant
-
-Values are merged in order, so if an index appears in both `indices[m][i]` and
-`indices[n][j]` for `(m,i) < (n,j)` the slice `data[n][j]` will appear in the
-merged result. If you do not need this guarantee, ParallelDynamicStitch might
-perform better on some devices.
-
-For example:
-
-```python
-    indices[0] = 6
-    indices[1] = [4, 1]
-    indices[2] = [[5, 2], [0, 3]]
-    data[0] = [61, 62]
-    data[1] = [[41, 42], [11, 12]]
-    data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]]
-    merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42],
-              [51, 52], [61, 62]]
-```
-
-This method can be used to merge partitions created by `dynamic_partition`
-as illustrated on the following example:
-
-```python
-    # Apply function (increments x_i) on elements for which a certain condition
-    # apply (x_i != -1 in this example).
-    x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4])
-    condition_mask=tf.not_equal(x,tf.constant(-1.))
-    partitioned_data = tf.dynamic_partition(
-        x, tf.cast(condition_mask, tf.int32) , 2)
-    partitioned_data[1] = partitioned_data[1] + 1.0
-    condition_indices = tf.dynamic_partition(
-        tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2)
-    x = tf.dynamic_stitch(condition_indices, partitioned_data)
-    # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain
-    # unchanged.
-```
-
-<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="https://www.tensorflow.org/images/DynamicStitch.png" alt>
-</div>
-)doc");
+    .SetShapeFn(DynamicStitchShapeFunction);
 
 REGISTER_OP("ParallelDynamicStitch")
     .Input("indices: N * int32")
@@ -263,72 +153,7 @@ REGISTER_OP("ParallelDynamicStitch")
     .Output("merged: T")
     .Attr("N : int >= 1")
     .Attr("T : type")
-    .SetShapeFn(DynamicStitchShapeFunction)
-    .Doc(R"doc(
-Interleave the values from the `data` tensors into a single tensor.
-
-Builds a merged tensor such that
-
-```python
-    merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...]
-```
-
-For example, if each `indices[m]` is scalar or vector, we have
-
-```python
-    # Scalar indices:
-    merged[indices[m], ...] = data[m][...]
-
-    # Vector indices:
-    merged[indices[m][i], ...] = data[m][i, ...]
-```
-
-Each `data[i].shape` must start with the corresponding `indices[i].shape`,
-and the rest of `data[i].shape` must be constant w.r.t. `i`.  That is, we
-must have `data[i].shape = indices[i].shape + constant`.  In terms of this
-`constant`, the output shape is
-
-    merged.shape = [max(indices)] + constant
-
-Values may be merged in parallel, so if an index appears in both `indices[m][i]`
-and `indices[n][j]`, the result may be invalid. This differs from the normal
-DynamicStitch operator that defines the behavior in that case.
-
-For example:
-
-```python
-    indices[0] = 6
-    indices[1] = [4, 1]
-    indices[2] = [[5, 2], [0, 3]]
-    data[0] = [61, 62]
-    data[1] = [[41, 42], [11, 12]]
-    data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]]
-    merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42],
-              [51, 52], [61, 62]]
-```
-
-This method can be used to merge partitions created by `dynamic_partition`
-as illustrated on the following example:
-
-```python
-    # Apply function (increments x_i) on elements for which a certain condition
-    # apply (x_i != -1 in this example).
-    x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4])
-    condition_mask=tf.not_equal(x,tf.constant(-1.))
-    partitioned_data = tf.dynamic_partition(
-        x, tf.cast(condition_mask, tf.int32) , 2)
-    partitioned_data[1] = partitioned_data[1] + 1.0
-    condition_indices = tf.dynamic_partition(
-        tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2)
-    x = tf.dynamic_stitch(condition_indices, partitioned_data)
-    # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain
-    # unchanged.
-```
-
-<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="https://www.tensorflow.org/images/DynamicStitch.png" alt>
-</div>
-)doc");
+    .SetShapeFn(DynamicStitchShapeFunction);
 
 // --------------------------------------------------------------------------
 
@@ -382,29 +207,7 @@ REGISTER_OP("RandomShuffleQueue")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput)
-    .Doc(R"doc(
-A queue that randomizes the order of elements.
-
-handle: The handle to the queue.
-component_types: The type of each component in a value.
-shapes: The shape of each component in a value. The length of this attr must
-  be either 0 or the same as the length of component_types. If the length of
-  this attr is 0, the shapes of queue elements are not constrained, and
-  only one element may be dequeued at a time.
-capacity: The upper bound on the number of elements in this queue.
-  Negative numbers mean no limit.
-min_after_dequeue: Dequeue will block unless there would be this
-  many elements after the dequeue or the queue is closed. This
-  ensures a minimum level of mixing of elements.
-seed: If either seed or seed2 is set to be non-zero, the random number
-  generator is seeded by the given seed.  Otherwise, a random seed is used.
-seed2: A second seed to avoid seed collision.
-container: If non-empty, this queue is placed in the given container.
-        Otherwise, a default container is used.
-shared_name: If non-empty, this queue will be shared under the given name
-  across multiple sessions.
-)doc");
+    .SetShapeFn(TwoElementOutput);
 
 REGISTER_OP("RandomShuffleQueueV2")
     .Output("handle: resource")
@@ -417,29 +220,7 @@ REGISTER_OP("RandomShuffleQueueV2")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-A queue that randomizes the order of elements.
-
-handle: The handle to the queue.
-component_types: The type of each component in a value.
-shapes: The shape of each component in a value. The length of this attr must
-  be either 0 or the same as the length of component_types. If the length of
-  this attr is 0, the shapes of queue elements are not constrained, and
-  only one element may be dequeued at a time.
-capacity: The upper bound on the number of elements in this queue.
-  Negative numbers mean no limit.
-min_after_dequeue: Dequeue will block unless there would be this
-  many elements after the dequeue or the queue is closed. This
-  ensures a minimum level of mixing of elements.
-seed: If either seed or seed2 is set to be non-zero, the random number
-  generator is seeded by the given seed.  Otherwise, a random seed is used.
-seed2: A second seed to avoid seed collision.
-container: If non-empty, this queue is placed in the given container.
-        Otherwise, a default container is used.
-shared_name: If non-empty, this queue will be shared under the given name
-  across multiple sessions.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("FIFOQueue")
     .Output("handle: Ref(string)")
@@ -449,23 +230,7 @@ REGISTER_OP("FIFOQueue")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput)
-    .Doc(R"doc(
-A queue that produces elements in first-in first-out order.
-
-handle: The handle to the queue.
-component_types: The type of each component in a value.
-shapes: The shape of each component in a value. The length of this attr must
-  be either 0 or the same as the length of component_types. If the length of
-  this attr is 0, the shapes of queue elements are not constrained, and
-  only one element may be dequeued at a time.
-capacity: The upper bound on the number of elements in this queue.
-  Negative numbers mean no limit.
-container: If non-empty, this queue is placed in the given container.
-        Otherwise, a default container is used.
-shared_name: If non-empty, this queue will be shared under the given name
-  across multiple sessions.
-)doc");
+    .SetShapeFn(TwoElementOutput);
 
 REGISTER_OP("FIFOQueueV2")
     .Output("handle: resource")
@@ -475,23 +240,7 @@ REGISTER_OP("FIFOQueueV2")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-A queue that produces elements in first-in first-out order.
-
-handle: The handle to the queue.
-component_types: The type of each component in a value.
-shapes: The shape of each component in a value. The length of this attr must
-  be either 0 or the same as the length of component_types. If the length of
-  this attr is 0, the shapes of queue elements are not constrained, and
-  only one element may be dequeued at a time.
-capacity: The upper bound on the number of elements in this queue.
-  Negative numbers mean no limit.
-container: If non-empty, this queue is placed in the given container.
-        Otherwise, a default container is used.
-shared_name: If non-empty, this queue will be shared under the given name
-  across multiple sessions.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("PaddingFIFOQueue")
     .Output("handle: Ref(string)")
@@ -501,31 +250,7 @@ REGISTER_OP("PaddingFIFOQueue")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput)
-    .Doc(R"doc(
-A queue that produces elements in first-in first-out order.
-
-Variable-size shapes are allowed by setting the corresponding shape dimensions
-to 0 in the shape attr.  In this case DequeueMany will pad up to the maximum
-size of any given element in the minibatch.  See below for details.
-
-handle: The handle to the queue.
-component_types: The type of each component in a value.
-shapes: The shape of each component in a value. The length of this attr must
-  be either 0 or the same as the length of component_types.
-  Shapes of fixed rank but variable size are allowed by setting
-  any shape dimension to -1.  In this case, the inputs' shape may vary along
-  the given dimension, and DequeueMany will pad the given dimension with
-  zeros up to the maximum shape of all elements in the given batch.
-  If the length of this attr is 0, different queue elements may have
-  different ranks and shapes, but only one element may be dequeued at a time.
-capacity: The upper bound on the number of elements in this queue.
-  Negative numbers mean no limit.
-container: If non-empty, this queue is placed in the given container.
-  Otherwise, a default container is used.
-shared_name: If non-empty, this queue will be shared under the given name
-  across multiple sessions.
-)doc");
+    .SetShapeFn(TwoElementOutput);
 
 REGISTER_OP("PaddingFIFOQueueV2")
     .Output("handle: resource")
@@ -535,31 +260,7 @@ REGISTER_OP("PaddingFIFOQueueV2")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-A queue that produces elements in first-in first-out order.
-
-Variable-size shapes are allowed by setting the corresponding shape dimensions
-to 0 in the shape attr.  In this case DequeueMany will pad up to the maximum
-size of any given element in the minibatch.  See below for details.
-
-handle: The handle to the queue.
-component_types: The type of each component in a value.
-shapes: The shape of each component in a value. The length of this attr must
-  be either 0 or the same as the length of component_types.
-  Shapes of fixed rank but variable size are allowed by setting
-  any shape dimension to -1.  In this case, the inputs' shape may vary along
-  the given dimension, and DequeueMany will pad the given dimension with
-  zeros up to the maximum shape of all elements in the given batch.
-  If the length of this attr is 0, different queue elements may have
-  different ranks and shapes, but only one element may be dequeued at a time.
-capacity: The upper bound on the number of elements in this queue.
-  Negative numbers mean no limit.
-container: If non-empty, this queue is placed in the given container.
-  Otherwise, a default container is used.
-shared_name: If non-empty, this queue will be shared under the given name
-  across multiple sessions.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("PriorityQueue")
     .Output("handle: Ref(string)")
@@ -569,29 +270,7 @@ REGISTER_OP("PriorityQueue")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput)
-    .Doc(R"doc(
-A queue that produces elements sorted by the first component value.
-
-Note that the PriorityQueue requires the first component of any element
-to be a scalar int64, in addition to the other elements declared by
-component_types.  Therefore calls to Enqueue and EnqueueMany (resp. Dequeue
-and DequeueMany) on a PriorityQueue will all require (resp. output) one extra
-entry in their input (resp. output) lists.
-
-handle: The handle to the queue.
-component_types: The type of each component in a value.
-shapes: The shape of each component in a value. The length of this attr must
-  be either 0 or the same as the length of component_types. If the length of
-  this attr is 0, the shapes of queue elements are not constrained, and
-  only one element may be dequeued at a time.
-capacity: The upper bound on the number of elements in this queue.
-  Negative numbers mean no limit.
-container: If non-empty, this queue is placed in the given container.
-  Otherwise, a default container is used.
-shared_name: If non-empty, this queue will be shared under the given name
-  across multiple sessions.
-)doc");
+    .SetShapeFn(TwoElementOutput);
 
 REGISTER_OP("PriorityQueueV2")
     .Output("handle: resource")
@@ -601,158 +280,48 @@ REGISTER_OP("PriorityQueueV2")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-A queue that produces elements sorted by the first component value.
-
-Note that the PriorityQueue requires the first component of any element
-to be a scalar int64, in addition to the other elements declared by
-component_types.  Therefore calls to Enqueue and EnqueueMany (resp. Dequeue
-and DequeueMany) on a PriorityQueue will all require (resp. output) one extra
-entry in their input (resp. output) lists.
-
-handle: The handle to the queue.
-component_types: The type of each component in a value.
-shapes: The shape of each component in a value. The length of this attr must
-  be either 0 or the same as the length of component_types. If the length of
-  this attr is 0, the shapes of queue elements are not constrained, and
-  only one element may be dequeued at a time.
-capacity: The upper bound on the number of elements in this queue.
-  Negative numbers mean no limit.
-container: If non-empty, this queue is placed in the given container.
-  Otherwise, a default container is used.
-shared_name: If non-empty, this queue will be shared under the given name
-  across multiple sessions.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("FakeQueue")
     .Input("resource: resource")
     .Output("handle: Ref(string)")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput)
-    .Doc("Deprecated. Do not use.");
+    .SetShapeFn(TwoElementOutput);
 
 REGISTER_OP("QueueEnqueue")
     .Input("handle: Ref(string)")
     .Input("components: Tcomponents")
     .Attr("Tcomponents: list(type) >= 1")
     .Attr("timeout_ms: int = -1")
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-Enqueues a tuple of one or more tensors in the given queue.
-
-The components input has k elements, which correspond to the components of
-tuples stored in the given queue.
-
-N.B. If the queue is full, this operation will block until the given
-element has been enqueued (or 'timeout_ms' elapses, if specified).
-
-handle: The handle to a queue.
-components: One or more tensors from which the enqueued tensors should be taken.
-timeout_ms: If the queue is full, this operation will block for up to
-  timeout_ms milliseconds.
-  Note: This option is not supported yet.
-)doc");
+    .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("QueueEnqueueV2")
     .Input("handle: resource")
     .Input("components: Tcomponents")
     .Attr("Tcomponents: list(type) >= 1")
     .Attr("timeout_ms: int = -1")
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-Enqueues a tuple of one or more tensors in the given queue.
-
-The components input has k elements, which correspond to the components of
-tuples stored in the given queue.
-
-N.B. If the queue is full, this operation will block until the given
-element has been enqueued (or 'timeout_ms' elapses, if specified).
-
-handle: The handle to a queue.
-components: One or more tensors from which the enqueued tensors should be taken.
-timeout_ms: If the queue is full, this operation will block for up to
-  timeout_ms milliseconds.
-  Note: This option is not supported yet.
-)doc");
+    .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("QueueEnqueueMany")
     .Input("handle: Ref(string)")
     .Input("components: Tcomponents")
     .Attr("Tcomponents: list(type) >= 1")
     .Attr("timeout_ms: int = -1")
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-Enqueues zero or more tuples of one or more tensors in the given queue.
-
-This operation slices each component tensor along the 0th dimension to
-make multiple queue elements. All of the tuple components must have the
-same size in the 0th dimension.
-
-The components input has k elements, which correspond to the components of
-tuples stored in the given queue.
-
-N.B. If the queue is full, this operation will block until the given
-elements have been enqueued (or 'timeout_ms' elapses, if specified).
-
-handle: The handle to a queue.
-components: One or more tensors from which the enqueued tensors should
-  be taken.
-timeout_ms: If the queue is too full, this operation will block for up
-  to timeout_ms milliseconds.
-  Note: This option is not supported yet.
-)doc");
+    .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("QueueEnqueueManyV2")
     .Input("handle: resource")
     .Input("components: Tcomponents")
     .Attr("Tcomponents: list(type) >= 1")
     .Attr("timeout_ms: int = -1")
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-Enqueues zero or more tuples of one or more tensors in the given queue.
-
-This operation slices each component tensor along the 0th dimension to
-make multiple queue elements. All of the tuple components must have the
-same size in the 0th dimension.
-
-The components input has k elements, which correspond to the components of
-tuples stored in the given queue.
-
-N.B. If the queue is full, this operation will block until the given
-elements have been enqueued (or 'timeout_ms' elapses, if specified).
-
-handle: The handle to a queue.
-components: One or more tensors from which the enqueued tensors should
-  be taken.
-timeout_ms: If the queue is too full, this operation will block for up
-  to timeout_ms milliseconds.
-  Note: This option is not supported yet.
-)doc");
+    .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("QueueDequeue")
     .Input("handle: Ref(string)")
     .Output("components: component_types")
     .Attr("component_types: list(type) >= 1")
     .Attr("timeout_ms: int = -1")
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-Dequeues a tuple of one or more tensors from the given queue.
-
-This operation has k outputs, where k is the number of components
-in the tuples stored in the given queue, and output i is the ith
-component of the dequeued tuple.
-
-N.B. If the queue is empty, this operation will block until an element
-has been dequeued (or 'timeout_ms' elapses, if specified).
-
-handle: The handle to a queue.
-components: One or more tensors that were dequeued as a tuple.
-component_types: The type of each component in a tuple.
-timeout_ms: If the queue is empty, this operation will block for up to
-  timeout_ms milliseconds.
-  Note: This option is not supported yet.
-)doc");
+    .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("QueueDequeueV2")
     .Input("handle: resource")
@@ -769,24 +338,7 @@ REGISTER_OP("QueueDequeueV2")
       } else {
         return shape_inference::UnknownShape(c);
       }
-    })
-    .Doc(R"doc(
-Dequeues a tuple of one or more tensors from the given queue.
-
-This operation has k outputs, where k is the number of components
-in the tuples stored in the given queue, and output i is the ith
-component of the dequeued tuple.
-
-N.B. If the queue is empty, this operation will block until an element
-has been dequeued (or 'timeout_ms' elapses, if specified).
-
-handle: The handle to a queue.
-components: One or more tensors that were dequeued as a tuple.
-component_types: The type of each component in a tuple.
-timeout_ms: If the queue is empty, this operation will block for up to
-  timeout_ms milliseconds.
-  Note: This option is not supported yet.
-)doc");
+    });
 
 REGISTER_OP("QueueDequeueMany")
     .Input("handle: Ref(string)")
@@ -794,32 +346,7 @@ REGISTER_OP("QueueDequeueMany")
     .Output("components: component_types")
     .Attr("component_types: list(type) >= 1")
     .Attr("timeout_ms: int = -1")
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-Dequeues `n` tuples of one or more tensors from the given queue.
-
-If the queue is closed and there are fewer than `n` elements, then an
-OutOfRange error is returned.
-
-This operation concatenates queue-element component tensors along the
-0th dimension to make a single component tensor.  All of the components
-in the dequeued tuple will have size `n` in the 0th dimension.
-
-This operation has `k` outputs, where `k` is the number of components in
-the tuples stored in the given queue, and output `i` is the ith
-component of the dequeued tuple.
-
-N.B. If the queue is empty, this operation will block until `n` elements
-have been dequeued (or 'timeout_ms' elapses, if specified).
-
-handle: The handle to a queue.
-n: The number of tuples to dequeue.
-components: One or more tensors that were dequeued as a tuple.
-component_types: The type of each component in a tuple.
-timeout_ms: If the queue has fewer than n elements, this operation
-  will block for up to timeout_ms milliseconds.
-  Note: This option is not supported yet.
-)doc");
+    .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("QueueDequeueManyV2")
     .Input("handle: resource")
@@ -839,32 +366,7 @@ REGISTER_OP("QueueDequeueManyV2")
         n_shape = c->Vector(n);
       }
       return DequeueManyV2Shape(c, n_shape);
-    })
-    .Doc(R"doc(
-Dequeues `n` tuples of one or more tensors from the given queue.
-
-If the queue is closed and there are fewer than `n` elements, then an
-OutOfRange error is returned.
-
-This operation concatenates queue-element component tensors along the
-0th dimension to make a single component tensor.  All of the components
-in the dequeued tuple will have size `n` in the 0th dimension.
-
-This operation has `k` outputs, where `k` is the number of components in
-the tuples stored in the given queue, and output `i` is the ith
-component of the dequeued tuple.
-
-N.B. If the queue is empty, this operation will block until `n` elements
-have been dequeued (or 'timeout_ms' elapses, if specified).
-
-handle: The handle to a queue.
-n: The number of tuples to dequeue.
-components: One or more tensors that were dequeued as a tuple.
-component_types: The type of each component in a tuple.
-timeout_ms: If the queue has fewer than n elements, this operation
-  will block for up to timeout_ms milliseconds.
-  Note: This option is not supported yet.
-)doc");
+    });
 
 REGISTER_OP("QueueDequeueUpTo")
     .Input("handle: Ref(string)")
@@ -872,36 +374,7 @@ REGISTER_OP("QueueDequeueUpTo")
     .Output("components: component_types")
     .Attr("component_types: list(type) >= 1")
     .Attr("timeout_ms: int = -1")
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-Dequeues `n` tuples of one or more tensors from the given queue.
-
-This operation is not supported by all queues.  If a queue does not support
-DequeueUpTo, then an Unimplemented error is returned.
-
-If the queue is closed and there are more than 0 but less than `n`
-elements remaining, then instead of returning an OutOfRange error like
-QueueDequeueMany, less than `n` elements are returned immediately.  If
-the queue is closed and there are 0 elements left in the queue, then
-an OutOfRange error is returned just like in QueueDequeueMany.
-Otherwise the behavior is identical to QueueDequeueMany:
-
-This operation concatenates queue-element component tensors along the
-0th dimension to make a single component tensor.  All of the components
-in the dequeued tuple will have size `n` in the 0th dimension.
-
-This operation has k outputs, where `k` is the number of components in
-the tuples stored in the given queue, and output `i` is the ith
-component of the dequeued tuple.
-
-handle: The handle to a queue.
-n: The number of tuples to dequeue.
-components: One or more tensors that were dequeued as a tuple.
-component_types: The type of each component in a tuple.
-timeout_ms: If the queue has fewer than n elements, this operation
-  will block for up to timeout_ms milliseconds.
-  Note: This option is not supported yet.
-)doc");
+    .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("QueueDequeueUpToV2")
     .Input("handle: resource")
@@ -911,133 +384,44 @@ REGISTER_OP("QueueDequeueUpToV2")
     .Attr("timeout_ms: int = -1")
     .SetShapeFn([](InferenceContext* c) {
       return DequeueManyV2Shape(c, c->Vector(InferenceContext::kUnknownDim));
-    })
-    .Doc(R"doc(
-Dequeues `n` tuples of one or more tensors from the given queue.
-
-This operation is not supported by all queues.  If a queue does not support
-DequeueUpTo, then an Unimplemented error is returned.
-
-If the queue is closed and there are more than 0 but less than `n`
-elements remaining, then instead of returning an OutOfRange error like
-QueueDequeueMany, less than `n` elements are returned immediately.  If
-the queue is closed and there are 0 elements left in the queue, then
-an OutOfRange error is returned just like in QueueDequeueMany.
-Otherwise the behavior is identical to QueueDequeueMany:
-
-This operation concatenates queue-element component tensors along the
-0th dimension to make a single component tensor.  All of the components
-in the dequeued tuple will have size n in the 0th dimension.
-
-This operation has `k` outputs, where `k` is the number of components in
-the tuples stored in the given queue, and output `i` is the ith
-component of the dequeued tuple.
-
-handle: The handle to a queue.
-n: The number of tuples to dequeue.
-components: One or more tensors that were dequeued as a tuple.
-component_types: The type of each component in a tuple.
-timeout_ms: If the queue has fewer than n elements, this operation
-  will block for up to timeout_ms milliseconds.
-  Note: This option is not supported yet.
-)doc");
+    });
 
 REGISTER_OP("QueueClose")
     .Input("handle: Ref(string)")
     .SetShapeFn(TwoElementVectorInputsAndScalarOutputs)
-    .Attr("cancel_pending_enqueues: bool = false")
-    .Doc(R"doc(
-Closes the given queue.
-
-This operation signals that no more elements will be enqueued in the
-given queue. Subsequent Enqueue(Many) operations will fail.
-Subsequent Dequeue(Many) operations will continue to succeed if
-sufficient elements remain in the queue. Subsequent Dequeue(Many)
-operations that would block will fail immediately.
-
-handle: The handle to a queue.
-cancel_pending_enqueues: If true, all pending enqueue requests that are
-  blocked on the given queue will be canceled.
-)doc");
+    .Attr("cancel_pending_enqueues: bool = false");
 
 REGISTER_OP("QueueCloseV2")
     .Input("handle: resource")
     .SetShapeFn(shape_inference::NoOutputs)
-    .Attr("cancel_pending_enqueues: bool = false")
-    .Doc(R"doc(
-Closes the given queue.
-
-This operation signals that no more elements will be enqueued in the
-given queue. Subsequent Enqueue(Many) operations will fail.
-Subsequent Dequeue(Many) operations will continue to succeed if
-sufficient elements remain in the queue. Subsequent Dequeue(Many)
-operations that would block will fail immediately.
-
-handle: The handle to a queue.
-cancel_pending_enqueues: If true, all pending enqueue requests that are
-  blocked on the given queue will be canceled.
-)doc");
+    .Attr("cancel_pending_enqueues: bool = false");
 
 REGISTER_OP("QueueIsClosed")
     .Input("handle: Ref(string)")
     .Output("is_closed: bool")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Returns true if queue is closed.
-
-This operation returns true if the queue is closed and false if the queue
-is open.
-
-handle: The handle to a queue.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("QueueIsClosedV2")
     .Input("handle: resource")
     .Output("is_closed: bool")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Returns true if queue is closed.
-
-This operation returns true if the queue is closed and false if the queue
-is open.
-
-handle: The handle to a queue.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("QueueSize")
     .Input("handle: Ref(string)")
     .Output("size: int32")
-    .SetShapeFn(TwoElementVectorInputsAndScalarOutputs)
-    .Doc(R"doc(
-Computes the number of elements in the given queue.
-
-handle: The handle to a queue.
-size: The number of elements in the given queue.
-)doc");
+    .SetShapeFn(TwoElementVectorInputsAndScalarOutputs);
 
 REGISTER_OP("QueueSizeV2")
     .Input("handle: resource")
     .Output("size: int32")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Computes the number of elements in the given queue.
-
-handle: The handle to a queue.
-size: The number of elements in the given queue.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 // --------------------------------------------------------------------------
 
 REGISTER_OP("AccumulatorNumAccumulated")
     .Input("handle: Ref(string)")
     .Output("num_accumulated: int32")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Returns the number of gradients aggregated in the given accumulators.
-
-handle: The handle to an accumulator.
-num_accumulated: The number of gradients aggregated in the given accumulator.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("AccumulatorSetGlobalStep")
     .Input("handle: Ref(string)")
@@ -1046,16 +430,7 @@ REGISTER_OP("AccumulatorSetGlobalStep")
       ShapeHandle unused;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Updates the accumulator with a new value for global_step.
-
-Logs warning if the accumulator's value is already higher than
-new_global_step.
-
-handle: The handle to an accumulator.
-new_global_step: The new global_step value to set.
-)doc");
+    });
 
 REGISTER_OP("ConditionalAccumulator")
     .Output("handle: Ref(string)")
@@ -1067,25 +442,7 @@ REGISTER_OP("ConditionalAccumulator")
     .SetShapeFn([](InferenceContext* c) {
       c->set_output(0, c->Vector(2));
       return Status::OK();
-    })
-    .Doc(R"doc(
-A conditional accumulator for aggregating gradients.
-
-The accumulator accepts gradients marked with local_step greater or
-equal to the most recent global_step known to the accumulator. The
-average can be extracted from the accumulator, provided sufficient
-gradients have been accumulated. Extracting the average automatically
-resets the aggregate to 0, and increments the global_step recorded by
-the accumulator.
-
-handle: The handle to the accumulator.
-dtype: The type of the value being accumulated.
-shape: The shape of the values, can be [], in which case shape is unknown.
-container: If non-empty, this accumulator is placed in the given container.
-  Otherwise, a default container is used.
-shared_name: If non-empty, this accumulator will be shared under the
-  given name across multiple sessions.
-)doc");
+    });
 
 REGISTER_OP("AccumulatorApplyGradient")
     .Input("handle: Ref(string)")
@@ -1096,18 +453,7 @@ REGISTER_OP("AccumulatorApplyGradient")
       ShapeHandle unused;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Applies a gradient to a given accumulator.
-
-Does not add if local_step is lesser than the accumulator's global_step.
-
-handle: The handle to a accumulator.
-local_step: The local_step value at which the gradient was computed.
-gradient: A tensor of the gradient to be accumulated.
-dtype: The data type of accumulated gradients. Needs to correspond to the type
-  of the accumulator.
-)doc");
+    });
 
 REGISTER_OP("AccumulatorTakeGradient")
     .Input("handle: Ref(string)")
@@ -1121,22 +467,7 @@ REGISTER_OP("AccumulatorTakeGradient")
       // shape information.
       return shape_inference::UnknownShape(c);
     })
-    .Attr("dtype: numbertype")
-    .Doc(R"doc(
-Extracts the average gradient in the given ConditionalAccumulator.
-
-The op blocks until sufficient (i.e., more than num_required)
-gradients have been accumulated.  If the accumulator has already
-aggregated more than num_required gradients, it returns the average of
-the accumulated gradients.  Also automatically increments the recorded
-global_step in the accumulator by 1, and resets the aggregate to 0.
-
-handle: The handle to an accumulator.
-num_required: Number of gradients required before we return an aggregate.
-average: The average of the accumulated gradients.
-dtype: The data type of accumulated gradients. Needs to correspond to the type
-  of the accumulator.
-)doc");
+    .Attr("dtype: numbertype");
 
 REGISTER_OP("SparseConditionalAccumulator")
     .Output("handle: Ref(string)")
@@ -1148,25 +479,7 @@ REGISTER_OP("SparseConditionalAccumulator")
     .SetShapeFn([](InferenceContext* c) {
       c->set_output(0, c->Vector(2));
       return Status::OK();
-    })
-    .Doc(R"doc(
-A conditional accumulator for aggregating sparse gradients.
-
-The accumulator accepts gradients marked with local_step greater or
-equal to the most recent global_step known to the accumulator. The
-average can be extracted from the accumulator, provided sufficient
-gradients have been accumulated. Extracting the average automatically
-resets the aggregate to 0, and increments the global_step recorded by
-the accumulator.
-
-handle: The handle to the accumulator.
-dtype: The type of the value being accumulated.
-shape: The shape of the values.
-container: If non-empty, this accumulator is placed in the given container.
-  Otherwise, a default container is used.
-shared_name: If non-empty, this accumulator will be shared under the given name
-  across multiple sessions.
-)doc");
+    });
 
 REGISTER_OP("SparseAccumulatorApplyGradient")
     .Input("handle: Ref(string)")
@@ -1180,26 +493,7 @@ REGISTER_OP("SparseAccumulatorApplyGradient")
       ShapeHandle unused;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Applies a sparse gradient to a given accumulator.
-
-Does not add if local_step is smaller than the accumulator's
-global_step.
-
-handle: The handle to a accumulator.
-local_step: The local_step value at which the sparse gradient was computed.
-gradient_indices: Indices of the sparse gradient to be accumulated. Must be a
-  vector.
-gradient_values: Values are the non-zero slices of the gradient, and must have
-  the same first dimension as indices, i.e., the nnz represented by indices and
-  values must be consistent.
-gradient_shape: Shape of the sparse gradient to be accumulated.
-dtype: The data type of accumulated gradients. Needs to correspond to the type
-  of the accumulator.
-has_known_shape: Boolean indicating whether gradient_shape is unknown, in which
-  case the input is ignored during validation.
-)doc");
+    });
 
 REGISTER_OP("SparseAccumulatorTakeGradient")
     .Input("handle: Ref(string)")
@@ -1215,25 +509,7 @@ REGISTER_OP("SparseAccumulatorTakeGradient")
       // by 'handle', but which is not available here, so we lose
       // shape information.
       return shape_inference::UnknownShape(c);
-    })
-    .Doc(R"doc(
-Extracts the average sparse gradient in a SparseConditionalAccumulator.
-
-The op will blocks until sufficient (i.e., more than num_required)
-gradients have been accumulated. If the accumulator has already
-aggregated more than num_required gradients, it will return its
-average of the accumulated gradients.  Also automatically increments
-the recorded global_step in the accumulator by 1, and resets the
-aggregate to 0.
-
-handle: The handle to a SparseConditionalAccumulator.
-num_required: Number of gradients required before we return an aggregate.
-indices: Indices of the average of the accumulated sparse gradients.
-values: Values of the average of the accumulated sparse gradients.
-shape: Shape of the average of the accumulated sparse gradients.
-dtype: The data type of accumulated gradients. Needs to correspond to the type
-  of the accumulator.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 
@@ -1243,17 +519,7 @@ REGISTER_OP("StackV2")
     .Attr("elem_type: type")
     .Attr("stack_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput)
-    .Doc(R"doc(
-A stack that produces elements in first-in last-out order.
-
-max_size: The maximum size of the stack if non-negative. If negative, the stack
-  size is unlimited.
-handle: The handle to the stack.
-elem_type: The type of the elements on the stack.
-stack_name: Overrides the name used for the temporary stack resource. Default
-value is the name of the 'Stack' op (which is guaranteed unique).
-)doc");
+    .SetShapeFn(TwoElementOutput);
 
 REGISTER_OP("StackPushV2")
     .Input("handle: resource")
@@ -1264,37 +530,17 @@ REGISTER_OP("StackPushV2")
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       c->set_output(0, c->input(1));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Push an element onto the stack.
-
-handle: The handle to a stack.
-elem: The tensor to be pushed onto the stack.
-output: The same tensor as the input 'elem'.
-swap_memory: Swap `elem` to CPU. Default to false.
-)doc");
+    });
 
 REGISTER_OP("StackPopV2")
     .Input("handle: resource")
     .Output("elem: elem_type")
     .Attr("elem_type: type")
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-Pop the element at the top of the stack.
-
-handle: The handle to a stack.
-elem: The tensor that is popped from the top of the stack.
-elem_type: The type of the elem that is popped.
-)doc");
+    .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("StackCloseV2")
     .Input("handle: resource")
-    .SetShapeFn(TwoElementVectorInputsAndScalarOutputs)
-    .Doc(R"doc(
-Delete the stack from its resource container.
-
-handle: The handle to a stack.
-)doc");
+    .SetShapeFn(TwoElementVectorInputsAndScalarOutputs);
 
 // Deprecated ref-typed variants of stack.
 
@@ -1303,10 +549,7 @@ REGISTER_OP("Stack")
     .Attr("elem_type: type")
     .Attr("stack_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput)
-    .Doc(R"doc(
-Deprecated, use StackV2.
-)doc");
+    .SetShapeFn(TwoElementOutput);
 
 REGISTER_OP("StackPush")
     .Input("handle: Ref(string)")
@@ -1317,26 +560,17 @@ REGISTER_OP("StackPush")
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       c->set_output(0, c->input(1));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Deprecated, use StackPushV2.
-)doc");
+    });
 
 REGISTER_OP("StackPop")
     .Input("handle: Ref(string)")
     .Output("elem: elem_type")
     .Attr("elem_type: type")
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-Deprecated, use StackPopV2.
-)doc");
+    .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("StackClose")
     .Input("handle: Ref(string)")
-    .SetShapeFn(TwoElementVectorInputsAndScalarOutputs)
-    .Doc(R"doc(
-Deprecated, use StackCloseV2.
-)doc");
+    .SetShapeFn(TwoElementVectorInputsAndScalarOutputs);
 
 // --------------------------------------------------------------------------
 
@@ -1356,35 +590,21 @@ REGISTER_OP("TensorArrayV3")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused));
       c->set_output(0, c->Vector(2));
       c->set_output(1, c->Scalar());
+      bool identical_shapes;
+      TF_RETURN_IF_ERROR(
+          c->GetAttr("identical_element_shapes", &identical_shapes));
+      DataType t;
+      TF_RETURN_IF_ERROR(c->GetAttr("dtype", &t));
+      PartialTensorShape p;
+      TF_RETURN_IF_ERROR(c->GetAttr("element_shape", &p));
+      ShapeHandle s;
+      TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(p, &s));
+      if (c->FullyDefined(s) || identical_shapes) {
+        c->set_output_handle_shapes_and_types(
+            0, std::vector<shape_inference::ShapeAndType>{{s, t}});
+      }
       return Status::OK();
-    })
-    .Doc(R"doc(
-An array of Tensors of given size.
-
-Write data via Write and read via Read or Pack.
-
-handle: The handle to the TensorArray.
-flow: A scalar used to control gradient flow.
-size: The size of the array.
-dtype: The type of the elements on the tensor_array.
-element_shape: The expected shape of an element, if known. Used to
-  validate the shapes of TensorArray elements. If this shape is not
-  fully specified, gathering zero-size TensorArrays is an error.
-dynamic_size: A boolean that determines whether writes to the TensorArray
-  are allowed to grow the size.  By default, this is not allowed.
-clear_after_read: If true (default), Tensors in the TensorArray are cleared
-  after being read.  This disables multiple read semantics but allows early
-  release of memory.
-identical_element_shapes: If true (default is false), then all
-  elements in the TensorArray will be expected to have have identical shapes.
-  This allows certain behaviors, like dynamically checking for
-  consistent shapes on write, and being able to fill in properly
-  shaped zero tensors on stack -- even if the element_shape attribute
-  is not fully defined.
-tensor_array_name: Overrides the name used for the temporary tensor_array
-  resource. Default value is the name of the 'TensorArray' op (which
-  is guaranteed unique).
-)doc");
+    });
 
 REGISTER_OP("TensorArrayGradV3")
     .Input("handle: resource")
@@ -1400,53 +620,12 @@ REGISTER_OP("TensorArrayGradV3")
       TF_RETURN_IF_ERROR(c->WithValue(c->Dim(handle, 0), 2, &unused_dim));
       c->set_output(0, c->Vector(2));
       c->set_output(1, c->Scalar());
+      if (c->input_handle_shapes_and_types(0)) {
+        c->set_output_handle_shapes_and_types(
+            0, *c->input_handle_shapes_and_types(0));
+      }
       return Status::OK();
-    })
-    .Doc(R"doc(
-Creates a TensorArray for storing the gradients of values in the given handle.
-
-If the given TensorArray gradient already exists, returns a reference to it.
-
-Locks the size of the original TensorArray by disabling its dynamic size flag.
-
-**A note about the input flow_in:**
-
-The handle flow_in forces the execution of the gradient lookup to occur
-only after certain other operations have occurred.  For example, when
-the forward TensorArray is dynamically sized, writes to this TensorArray
-may resize the object.  The gradient TensorArray is statically sized based
-on the size of the forward TensorArray when this operation executes.
-Furthermore, the size of the forward TensorArray is frozen by this call.
-As a result, the flow is used to ensure that the call to generate the gradient
-TensorArray only happens after all writes are executed.
-
-In the case of dynamically sized TensorArrays, gradient computation should
-only be performed on read operations that have themselves been chained via
-flow to occur only after all writes have executed. That way the final size
-of the forward TensorArray is known when this operation is called.
-
-**A note about the source attribute:**
-
-TensorArray gradient calls use an accumulator TensorArray object.  If
-multiple gradients are calculated and run in the same session, the multiple
-gradient nodes may accidentally flow through the same accumulator TensorArray.
-This double counts and generally breaks the TensorArray gradient flow.
-
-The solution is to identify which gradient call this particular
-TensorArray gradient is being called in.  This is performed by identifying
-a unique string (e.g. "gradients", "gradients_1", ...) from the input
-gradient Tensor's name.  This string is used as a suffix when creating
-the TensorArray gradient object here (the attribute `source`).
-
-The attribute `source` is added as a suffix to the forward TensorArray's
-name when performing the creation / lookup, so that each separate gradient
-calculation gets its own TensorArray accumulator.
-
-handle: The handle to the forward TensorArray.
-flow_in: A float scalar that enforces proper chaining of operations.
-source: The gradient source string, used to decide which gradient TensorArray
-  to return.
-)doc");
+    });
 
 REGISTER_OP("TensorArrayWriteV3")
     .Input("handle: resource")
@@ -1464,17 +643,17 @@ REGISTER_OP("TensorArrayWriteV3")
       ShapeHandle unused;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
-      return shape_inference::ScalarShape(c);
-    })
-    .Doc(R"doc(
-Push an element onto the tensor_array.
 
-handle: The handle to a TensorArray.
-index: The position to write to inside the TensorArray.
-value: The tensor to write to the TensorArray.
-flow_in: A float scalar that enforces proper chaining of operations.
-flow_out: A float scalar that enforces proper chaining of operations.
-)doc");
+      auto* handle_data = c->input_handle_shapes_and_types(0);
+      if (handle_data != nullptr && !handle_data->empty()) {
+        shape_inference::ShapeAndType shape_and_type = (*handle_data)[0];
+        ShapeHandle value_shape = c->input(2);
+        TF_RETURN_IF_ERROR(
+            c->Merge(shape_and_type.shape, value_shape, &unused));
+      }
+
+      return shape_inference::ScalarShape(c);
+    });
 
 REGISTER_OP("TensorArrayReadV3")
     .Input("handle: resource")
@@ -1490,16 +669,15 @@ REGISTER_OP("TensorArrayReadV3")
       ShapeHandle unused;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
-      return shape_inference::UnknownShape(c);
-    })
-    .Doc(R"doc(
-Read an element from the TensorArray into output `value`.
-
-handle: The handle to a TensorArray.
-dtype: The type of the elem that is returned.
-flow_in: A float scalar that enforces proper chaining of operations.
-value: The tensor that is read from the TensorArray.
-)doc");
+      auto shapes = c->input_handle_shapes_and_types(0);
+      if (shapes != nullptr && !shapes->empty()) {
+        ShapeHandle tensor_shape = shapes->at(0).shape;
+        c->set_output(0, tensor_shape);
+        return Status::OK();
+      } else {
+        return shape_inference::UnknownShape(c);
+      }
+    });
 
 REGISTER_OP("TensorArrayGatherV3")
     .Input("handle: resource")
@@ -1516,22 +694,7 @@ REGISTER_OP("TensorArrayGatherV3")
       TF_RETURN_IF_ERROR(c->WithValue(c->Dim(c->input(0), 0), 2, &unused_dim));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
       return shape_inference::UnknownShape(c);
-    })
-    .Doc(R"doc(
-Gather specific elements from the TensorArray into output `value`.
-
-All elements selected by `indices` must have the same shape.
-
-handle: The handle to a TensorArray.
-indices: The locations in the TensorArray from which to read tensor elements.
-dtype: The type of the elem that is returned.
-element_shape: The expected shape of an element, if known. Used to
-  validate the shapes of TensorArray elements. If this shape is not
-  fully specified, gathering zero-size TensorArrays is an error.
-flow_in: A float scalar that enforces proper chaining of operations.
-value: All of the elements in the TensorArray, concatenated along a new
-  axis (the new dimension 0).
-)doc");
+    });
 
 REGISTER_OP("TensorArrayScatterV3")
     .Input("handle: resource")
@@ -1548,18 +711,7 @@ REGISTER_OP("TensorArrayScatterV3")
       TF_RETURN_IF_ERROR(c->WithValue(c->Dim(c->input(0), 0), 2, &unused_dim));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
       return shape_inference::ScalarShape(c);
-    })
-    .Doc(R"doc(
-Scatter the data from the input value into specific TensorArray elements.
-
-`indices` must be a vector, its length must match the first dim of `value`.
-
-handle: The handle to a TensorArray.
-indices: The locations at which to write the tensor elements.
-value: The concatenated tensor to write to the TensorArray.
-flow_in: A float scalar that enforces proper chaining of operations.
-flow_out: A float scalar that enforces proper chaining of operations.
-)doc");
+    });
 
 REGISTER_OP("TensorArrayConcatV3")
     .Input("handle: resource")
@@ -1578,35 +730,7 @@ REGISTER_OP("TensorArrayConcatV3")
       c->set_output(0, c->UnknownShape());
       c->set_output(1, c->Vector(c->UnknownDim()));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Concat the elements from the TensorArray into value `value`.
-
-Takes `T` elements of shapes
-
-  ```
-  (n0 x d0 x d1 x ...), (n1 x d0 x d1 x ...), ..., (n(T-1) x d0 x d1 x ...)
-  ```
-
-and concatenates them into a Tensor of shape:
-
-  ```(n0 + n1 + ... + n(T-1) x d0 x d1 x ...)```
-
-All elements must have the same shape (excepting the first dimension).
-
-handle: The handle to a TensorArray.
-dtype: The type of the elem that is returned.
-flow_in: A float scalar that enforces proper chaining of operations.
-element_shape_except0: The expected shape of an element, if known,
-  excluding the first dimension. Used to validate the shapes of
-  TensorArray elements. If this shape is not fully specified, concatenating
-  zero-size TensorArrays is an error.
-value: All of the elements in the TensorArray, concatenated along the first
-  axis.
-lengths: A vector of the row sizes of the original T elements in the
-  value output.  In the example above, this would be the values:
-  `(n1, n2, ..., n(T-1))`.
-)doc");
+    });
 
 REGISTER_OP("TensorArraySplitV3")
     .Input("handle: resource")
@@ -1624,35 +748,7 @@ REGISTER_OP("TensorArraySplitV3")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
       return shape_inference::ScalarShape(c);
-    })
-    .Doc(R"doc(
-Split the data from the input value into TensorArray elements.
-
-Assuming that `lengths` takes on values
-
-  ```(n0, n1, ..., n(T-1))```
-
-and that `value` has shape
-
-  ```(n0 + n1 + ... + n(T-1) x d0 x d1 x ...)```,
-
-this splits values into a TensorArray with T tensors.
-
-TensorArray index t will be the subtensor of values with starting position
-
-  ```(n0 + n1 + ... + n(t-1), 0, 0, ...)```
-
-and having size
-
-  ```nt x d0 x d1 x ...```
-
-handle: The handle to a TensorArray.
-value: The concatenated tensor to write to the TensorArray.
-lengths: The vector of lengths, how to split the rows of value into the
-  TensorArray.
-flow_in: A float scalar that enforces proper chaining of operations.
-flow_out: A float scalar that enforces proper chaining of operations.
-)doc");
+    });
 
 REGISTER_OP("TensorArraySizeV3")
     .Input("handle: resource")
@@ -1664,14 +760,7 @@ REGISTER_OP("TensorArraySizeV3")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &handle));
       TF_RETURN_IF_ERROR(c->WithValue(c->Dim(handle, 0), 2, &unused_dim));
       return shape_inference::ScalarShape(c);
-    })
-    .Doc(R"doc(
-Get the current size of the TensorArray.
-
-handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad).
-flow_in: A float scalar that enforces proper chaining of operations.
-size: The current size of the TensorArray.
-)doc");
+    });
 
 REGISTER_OP("TensorArrayCloseV3")
     .Input("handle: resource")
@@ -1681,15 +770,7 @@ REGISTER_OP("TensorArrayCloseV3")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &handle));
       TF_RETURN_IF_ERROR(c->WithValue(c->Dim(handle, 0), 2, &unused_dim));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Delete the TensorArray from its resource container.
-
-This enables the user to close and release the resource in the middle
-of a step/run.
-
-handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad).
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 
@@ -1721,8 +802,7 @@ REGISTER_OP("TensorArrayV2")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused));
       c->set_output(0, c->Vector(2));
       return Status::OK();
-    })
-    .Doc("Deprecated. Use TensorArrayV3");
+    });
 REGISTER_OP("TensorArrayGrad")
     .Input("handle: string")
     .Input("flow_in: float")
@@ -1745,8 +825,7 @@ REGISTER_OP("TensorArrayGradV2")
       TF_RETURN_IF_ERROR(c->WithValue(c->Dim(handle, 0), 2, &unused_dim));
       c->set_output(0, c->Vector(2));
       return Status::OK();
-    })
-    .Doc("Deprecated. Use TensorArrayGradV3");
+    });
 REGISTER_OP("TensorArrayWrite")
     .Input("handle: Ref(string)")
     .Input("index: int32")
@@ -1774,8 +853,7 @@ REGISTER_OP("TensorArrayWriteV2")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
       return shape_inference::ScalarShape(c);
-    })
-    .Doc("Deprecated. Use TensorArrayGradV3");
+    });
 REGISTER_OP("TensorArrayRead")
     .Input("handle: Ref(string)")
     .Input("index: int32")
@@ -1800,8 +878,7 @@ REGISTER_OP("TensorArrayReadV2")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
       return shape_inference::UnknownShape(c);
-    })
-    .Doc("Deprecated. Use TensorArrayReadV3");
+    });
 REGISTER_OP("TensorArrayPack")
     .Input("handle: Ref(string)")
     .Input("flow_in: float")
@@ -1843,8 +920,7 @@ REGISTER_OP("TensorArrayGatherV2")
       TF_RETURN_IF_ERROR(c->WithValue(c->Dim(c->input(0), 0), 2, &unused_dim));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
       return shape_inference::UnknownShape(c);
-    })
-    .Doc("Deprecated. Use TensorArrayGatherV3");
+    });
 REGISTER_OP("TensorArrayScatter")
     .Input("handle: Ref(string)")
     .Input("indices: int32")
@@ -1870,8 +946,7 @@ REGISTER_OP("TensorArrayScatterV2")
       TF_RETURN_IF_ERROR(c->WithValue(c->Dim(c->input(0), 0), 2, &unused_dim));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
       return shape_inference::ScalarShape(c);
-    })
-    .Doc("Deprecated. Use TensorArrayScatterV3");
+    });
 REGISTER_OP("TensorArrayConcat")
     .Input("handle: Ref(string)")
     .Input("flow_in: float")
@@ -1898,8 +973,7 @@ REGISTER_OP("TensorArrayConcatV2")
       c->set_output(0, c->UnknownShape());
       c->set_output(1, c->Vector(c->UnknownDim()));
       return Status::OK();
-    })
-    .Doc("Deprecated. Use TensorArrayConcatV3");
+    });
 REGISTER_OP("TensorArraySplit")
     .Input("handle: Ref(string)")
     .Input("value: T")
@@ -1926,8 +1000,7 @@ REGISTER_OP("TensorArraySplitV2")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
       return shape_inference::ScalarShape(c);
-    })
-    .Doc("Deprecated. Use TensorArraySplitV3");
+    });
 REGISTER_OP("TensorArraySize")
     .Input("handle: Ref(string)")
     .Input("flow_in: float")
@@ -1945,8 +1018,7 @@ REGISTER_OP("TensorArraySizeV2")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &handle));
       TF_RETURN_IF_ERROR(c->WithValue(c->Dim(handle, 0), 2, &unused_dim));
       return shape_inference::ScalarShape(c);
-    })
-    .Doc("Deprecated. Use TensorArraySizeV3");
+    });
 REGISTER_OP("TensorArrayClose")
     .Input("handle: Ref(string)")
     .SetShapeFn([](InferenceContext* c) { return Status::OK(); })
@@ -1960,8 +1032,7 @@ REGISTER_OP("TensorArrayCloseV2")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &handle));
       TF_RETURN_IF_ERROR(c->WithValue(c->Dim(handle, 0), 2, &unused_dim));
       return Status::OK();
-    })
-    .Doc("Deprecated. Use TensorArrayCloseV3");
+    });
 
 // --------------------------------------------------------------------------
 
@@ -1973,31 +1044,7 @@ REGISTER_OP("Barrier")
     .Attr("capacity: int = -1")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
-    .SetShapeFn(TwoElementOutput)
-    .Doc(R"doc(
-Defines a barrier that persists across different graph executions.
-
-A barrier represents a key-value map, where each key is a string, and
-each value is a tuple of tensors.
-
-At runtime, the barrier contains 'complete' and 'incomplete'
-elements. A complete element has defined tensors for all components of
-its value tuple, and may be accessed using BarrierTakeMany. An
-incomplete element has some undefined components in its value tuple,
-and may be updated using BarrierInsertMany.
-
-handle: The handle to the barrier.
-component_types: The type of each component in a value.
-shapes: The shape of each component in a value. Each shape must be 1 in the
-  first dimension. The length of this attr must be the same as the length of
-  component_types.
-capacity: The capacity of the barrier.  The default capacity is MAX_INT32,
-  which is the largest capacity of the underlying queue.
-container: If non-empty, this barrier is placed in the given container.
-        Otherwise, a default container is used.
-shared_name: If non-empty, this barrier will be shared under the given name
-  across multiple sessions.
-)doc");
+    .SetShapeFn(TwoElementOutput);
 
 REGISTER_OP("BarrierInsertMany")
     .Input("handle: Ref(string)")
@@ -2016,21 +1063,7 @@ REGISTER_OP("BarrierInsertMany")
       TF_RETURN_IF_ERROR(c->WithRankAtLeast(values, 1, &values));
       TF_RETURN_IF_ERROR(c->Merge(keys, c->Vector(c->Dim(values, 0)), &handle));
       return Status::OK();
-    })
-    .Doc(R"doc(
-For each key, assigns the respective value to the specified component.
-
-If a key is not found in the barrier, this operation will create a new
-incomplete element. If a key is found in the barrier, and the element
-already has a value at component_index, this operation will fail with
-INVALID_ARGUMENT, and leave the barrier in an undefined state.
-
-handle: The handle to a barrier.
-component_index: The component of the barrier elements that is being assigned.
-keys: A one-dimensional tensor of keys, with length n.
-values: An any-dimensional tensor of values, which are associated with the
-  respective keys. The 0th dimension must have length n.
-)doc");
+    });
 
 REGISTER_OP("BarrierTakeMany")
     .Input("handle: Ref(string)")
@@ -2042,78 +1075,22 @@ REGISTER_OP("BarrierTakeMany")
     .Attr("allow_small_batch: bool = false")
     .Attr("wait_for_incomplete: bool = false")
     .Attr("timeout_ms: int = -1")
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-Takes the given number of completed elements from a barrier.
-
-This operation concatenates completed-element component tensors along
-the 0th dimension to make a single component tensor.
-
-Elements come out of the barrier when they are complete, and in the order
-in which they were placed into the barrier.  The indices output provides
-information about the batch in which each element was originally inserted
-into the barrier.
-
-handle: The handle to a barrier.
-num_elements: A single-element tensor containing the number of elements to
-  take.
-indices: A one-dimensional tensor of indices, with length num_elems.
-  These indices refer to the batch in which the values were placed into the
-  barrier (starting with MIN_LONG and increasing with each BarrierInsertMany).
-keys: A one-dimensional tensor of keys, with length num_elements.
-values: One any-dimensional tensor per component in a barrier element. All
-  values have length num_elements in the 0th dimension.
-component_types: The type of each component in a value.
-allow_small_batch: Allow to return less than num_elements items if barrier is
-  already closed.
-timeout_ms: If the queue is empty, this operation will block for up to
-  timeout_ms milliseconds.
-  Note: This option is not supported yet.
-)doc");
+    .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("BarrierClose")
     .Input("handle: Ref(string)")
     .SetShapeFn(TwoElementVectorInputsAndScalarOutputs)
-    .Attr("cancel_pending_enqueues: bool = false")
-    .Doc(R"doc(
-Closes the given barrier.
-
-This operation signals that no more new elements will be inserted in the
-given barrier. Subsequent InsertMany that try to introduce a new key will fail.
-Subsequent InsertMany operations that just add missing components to already
-existing elements will continue to succeed. Subsequent TakeMany operations will
-continue to succeed if sufficient completed elements remain in the barrier.
-Subsequent TakeMany operations that would block will fail immediately.
-
-handle: The handle to a barrier.
-cancel_pending_enqueues: If true, all pending enqueue requests that are
-  blocked on the barrier's queue will be canceled. InsertMany will fail, even
-  if no new key is introduced.
-)doc");
+    .Attr("cancel_pending_enqueues: bool = false");
 
 REGISTER_OP("BarrierReadySize")
     .Input("handle: Ref(string)")
     .Output("size: int32")
-    .SetShapeFn(TwoElementVectorInputsAndScalarOutputs)
-    .Doc(R"doc(
-Computes the number of complete elements in the given barrier.
-
-handle: The handle to a barrier.
-size: The number of complete elements (i.e. those with all of their value
-  components set) in the barrier.
-)doc");
+    .SetShapeFn(TwoElementVectorInputsAndScalarOutputs);
 
 REGISTER_OP("BarrierIncompleteSize")
     .Input("handle: Ref(string)")
     .Output("size: int32")
-    .SetShapeFn(TwoElementVectorInputsAndScalarOutputs)
-    .Doc(R"doc(
-Computes the number of incomplete elements in the given barrier.
-
-handle: The handle to a barrier.
-size: The number of incomplete elements (i.e. those with some of their value
-  components not set) in the barrier.
-)doc");
+    .SetShapeFn(TwoElementVectorInputsAndScalarOutputs);
 
 // --------------------------------------------------------------------------
 
@@ -2121,57 +1098,35 @@ REGISTER_OP("GetSessionHandle")
     .Input("value: T")
     .Output("handle: string")
     .Attr("T: type")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Store the input tensor in the state of the current session.
-
-value: The tensor to be stored.
-handle: The handle for the tensor stored in the session state, represented
-  as a string.
-)doc");
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("GetSessionHandleV2")
     .Input("value: T")
     .Output("handle: resource")
     .Attr("T: type")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Store the input tensor in the state of the current session.
-
-value: The tensor to be stored.
-handle: The handle for the tensor stored in the session state, represented
-  as a ResourceHandle object.
-)doc");
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("GetSessionTensor")
     .Input("handle: string")
     .Output("value: dtype")
     .Attr("dtype: type")
+    .SetIsStateful()
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle unused;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused));
       return shape_inference::UnknownShape(c);
-    })
-    .Doc(R"doc(
-Get the value of the tensor specified by its handle.
-
-handle: The handle for a tensor stored in the session state.
-value: The tensor for the given handle.
-dtype: The type of the output value.
-)doc");
+    });
 
 REGISTER_OP("DeleteSessionTensor")
     .Input("handle: string")
+    .SetIsStateful()
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle unused;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Delete the tensor specified by its handle in the session.
-
-handle: The handle for a tensor stored in the session state.
-)doc");
+    });
 
 REGISTER_OP("Stage")
     .Input("values: dtypes")
@@ -2181,23 +1136,7 @@ REGISTER_OP("Stage")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(shape_inference::UnknownShape)
-    .SetIsStateful()
-    .Doc(R"doc(
-Stage values similar to a lightweight Enqueue.
-
-The basic functionality of this Op is similar to a queue with many
-fewer capabilities and options.  This Op is optimized for performance.
-
-values: a list of tensors
-dtypes A list of data types that inserted values should adhere to.
-capacity: Maximum number of elements in the Staging Area. If > 0, inserts
-  on the container will block when the capacity is reached.
-memory_limit: The maximum number of bytes allowed for Tensors in the Staging Area.
-  If > 0, inserts will block until sufficient space is available.
-container: If non-empty, this queue is placed in the given container. Otherwise,
-  a default container is used.
-shared_name: It is necessary to match this name to the matching Unstage Op.
-)doc");
+    .SetIsStateful();
 
 REGISTER_OP("Unstage")
     .Output("values: dtypes")
@@ -2207,13 +1146,7 @@ REGISTER_OP("Unstage")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(shape_inference::UnknownShape)
-    .SetIsStateful()
-    .Doc(R"doc(
-Op is similar to a lightweight Dequeue.
-
-The basic functionality is similar to dequeue with many fewer
-capabilities and options.  This Op is optimized for performance.
-)doc");
+    .SetIsStateful();
 
 REGISTER_OP("StagePeek")
     .Input("index: int32")
@@ -2224,13 +1157,7 @@ REGISTER_OP("StagePeek")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(shape_inference::UnknownShape)
-    .SetIsStateful()
-    .Doc(R"doc(
-Op peeks at the values at the specified index.  If the
-underlying container does not contain sufficient elements
-this op will block until it does.   This Op is optimized for
-performance.
-    )doc");
+    .SetIsStateful();
 
 REGISTER_OP("StageSize")
     .Output("size: int32")
@@ -2240,10 +1167,7 @@ REGISTER_OP("StageSize")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(shape_inference::ScalarShape)
-    .SetIsStateful()
-    .Doc(R"doc(
-Op returns the number of elements in the underlying container.
-    )doc");
+    .SetIsStateful();
 
 REGISTER_OP("StageClear")
     .Attr("capacity: int >= 0 = 0")
@@ -2252,10 +1176,7 @@ REGISTER_OP("StageClear")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(shape_inference::UnknownShape)
-    .SetIsStateful()
-    .Doc(R"doc(
-Op removes all elements in the underlying container.
-    )doc");
+    .SetIsStateful();
 
 // UnorderedMap
 REGISTER_OP("MapStage")
@@ -2269,19 +1190,7 @@ REGISTER_OP("MapStage")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::NoOutputs)
-    .SetIsStateful()
-    .Doc(R"doc(
-Stage (key, values) in the underlying container which behaves like a hashtable.
-
-key: int64
-values: a list of tensors
-dtypes A list of data types that inserted values should adhere to.
-capacity: Maximum number of elements in the Staging Area. If > 0, inserts
-  on the container will block when the capacity is reached.
-container: If non-empty, this queue is placed in the given container. Otherwise,
-  a default container is used.
-shared_name: It is necessary to match this name to the matching Unstage Op.
-)doc");
+    .SetIsStateful();
 
 REGISTER_OP("MapPeek")
     .Input("key: int64")
@@ -2293,12 +1202,7 @@ REGISTER_OP("MapPeek")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::UnknownShape)
-    .SetIsStateful()
-    .Doc(R"doc(
-Op peeks at the values at the specified key.  If the
-underlying container does not contain this key
-this op will block until it does.
-    )doc");
+    .SetIsStateful();
 
 REGISTER_OP("MapUnstage")
     .Input("key: int64")
@@ -2310,12 +1214,7 @@ REGISTER_OP("MapUnstage")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::UnknownShape)
-    .SetIsStateful()
-    .Doc(R"doc(
-Op removes and returns the values associated with the key
-from the underlying container.   If the underlying container
-does not contain this key, the op will block until it does.
-    )doc");
+    .SetIsStateful();
 
 REGISTER_OP("MapUnstageNoKey")
     .Input("indices: int32")
@@ -2327,12 +1226,7 @@ REGISTER_OP("MapUnstageNoKey")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::UnknownShape)
-    .SetIsStateful()
-    .Doc(R"doc(
-Op removes and returns a random (key, value)
-from the underlying container.   If the underlying container
-does not contain elements, the op will block until it does.
-      )doc");
+    .SetIsStateful();
 
 REGISTER_OP("MapSize")
     .Output("size: int32")
@@ -2342,10 +1236,7 @@ REGISTER_OP("MapSize")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::ScalarShape)
-    .SetIsStateful()
-    .Doc(R"doc(
-Op returns the number of elements in the underlying container.
-    )doc");
+    .SetIsStateful();
 
 REGISTER_OP("MapIncompleteSize")
     .Output("size: int32")
@@ -2355,10 +1246,7 @@ REGISTER_OP("MapIncompleteSize")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::ScalarShape)
-    .SetIsStateful()
-    .Doc(R"doc(
-Op returns the number of incomplete elements in the underlying container.
-    )doc");
+    .SetIsStateful();
 
 REGISTER_OP("MapClear")
     .Attr("capacity: int >= 0 = 0")
@@ -2367,10 +1255,7 @@ REGISTER_OP("MapClear")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::NoOutputs)
-    .SetIsStateful()
-    .Doc(R"doc(
-Op removes all elements in the underlying container.
-    )doc");
+    .SetIsStateful();
 
 // OrderedMap
 REGISTER_OP("OrderedMapStage")
@@ -2384,20 +1269,7 @@ REGISTER_OP("OrderedMapStage")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::NoOutputs)
-    .SetIsStateful()
-    .Doc(R"doc(
-Stage (key, values) in the underlying container which behaves like a ordered
-associative container.   Elements are ordered by key.
-
-key: int64
-values: a list of tensors
-dtypes A list of data types that inserted values should adhere to.
-capacity: Maximum number of elements in the Staging Area. If > 0, inserts
-  on the container will block when the capacity is reached.
-container: If non-empty, this queue is placed in the given container. Otherwise,
-  a default container is used.
-shared_name: It is necessary to match this name to the matching Unstage Op.
-)doc");
+    .SetIsStateful();
 
 REGISTER_OP("OrderedMapPeek")
     .Input("key: int64")
@@ -2409,13 +1281,7 @@ REGISTER_OP("OrderedMapPeek")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::UnknownShape)
-    .SetIsStateful()
-    .Doc(R"doc(
-Op peeks at the values at the specified key.  If the
-underlying container does not contain this key
-this op will block until it does.   This Op is optimized for
-performance.
-    )doc");
+    .SetIsStateful();
 
 REGISTER_OP("OrderedMapUnstage")
     .Input("key: int64")
@@ -2427,12 +1293,7 @@ REGISTER_OP("OrderedMapUnstage")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::UnknownShape)
-    .SetIsStateful()
-    .Doc(R"doc(
-Op removes and returns the values associated with the key
-from the underlying container.   If the underlying container
-does not contain this key, the op will block until it does.
-    )doc");
+    .SetIsStateful();
 
 REGISTER_OP("OrderedMapUnstageNoKey")
     .Input("indices: int32")
@@ -2444,12 +1305,7 @@ REGISTER_OP("OrderedMapUnstageNoKey")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::UnknownShape)
-    .SetIsStateful()
-    .Doc(R"doc(
-Op removes and returns the (key, value) element with the smallest
-key from the underlying container.   If the underlying container
-does not contain elements, the op will block until it does.
-      )doc");
+    .SetIsStateful();
 
 REGISTER_OP("OrderedMapSize")
     .Output("size: int32")
@@ -2459,10 +1315,7 @@ REGISTER_OP("OrderedMapSize")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::ScalarShape)
-    .SetIsStateful()
-    .Doc(R"doc(
-Op returns the number of elements in the underlying container.
-    )doc");
+    .SetIsStateful();
 
 REGISTER_OP("OrderedMapIncompleteSize")
     .Output("size: int32")
@@ -2472,10 +1325,7 @@ REGISTER_OP("OrderedMapIncompleteSize")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::ScalarShape)
-    .SetIsStateful()
-    .Doc(R"doc(
-Op returns the number of incomplete elements in the underlying container.
-    )doc");
+    .SetIsStateful();
 
 REGISTER_OP("OrderedMapClear")
     .Attr("capacity: int >= 0 = 0")
@@ -2484,10 +1334,7 @@ REGISTER_OP("OrderedMapClear")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetShapeFn(tensorflow::shape_inference::NoOutputs)
-    .SetIsStateful()
-    .Doc(R"doc(
-Op removes all elements in the underlying container.
-    )doc");
+    .SetIsStateful();
 
 REGISTER_OP("RecordInput")
     .Output("records: string")
@@ -2497,19 +1344,8 @@ REGISTER_OP("RecordInput")
     .Attr("file_buffer_size: int = 10000")
     .Attr("file_parallelism: int = 16")
     .Attr("batch_size: int = 32")
+    .Attr("compression_type: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-Emits randomized records.
-
-records: A tensor of shape [batch_size].
-file_pattern: Glob pattern for the data files.
-file_random_seed: Random seeds used to produce randomized records.
-file_shuffle_shift_ratio: Shifts the list of files after the list is randomly
-    shuffled.
-file_buffer_size: The randomization shuffling buffer.
-file_parallelism: How many sstables are opened and concurrently iterated over.
-batch_size: The batch size.
-)doc");
+    .SetShapeFn(shape_inference::UnknownShape);
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc
index 6bf226e7a535f6c36b4b5b7e660641f22157829c..b86816bb5412e59fabbf01acb64a1856fc78bbed 100644
--- a/tensorflow/core/ops/dataset_ops.cc
+++ b/tensorflow/core/ops/dataset_ops.cc
@@ -39,13 +39,10 @@ REGISTER_OP("TensorDataset")
     .Attr("output_shapes: list(shape) >= 1")
     .SetIsStateful()  // TODO(b/65524810): Source dataset ops must be marked
                       // stateful to inhibit constant folding.
-    .SetShapeFn(shape_inference::ScalarShape)  // TODO(mrry): Validate that
-                                               // `components` have shapes
-                                               // compatible with
-                                               // `output_shapes`.
-    .Doc(R"doc(
-Creates a dataset that emits `components` as a tuple of tensors once.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);  // TODO(mrry): Validate that
+                                                // `components` have shapes
+                                                // compatible with
+                                                // `output_shapes`.
 
 REGISTER_OP("TensorSliceDataset")
     .Input("components: Toutput_types")
@@ -54,13 +51,10 @@ REGISTER_OP("TensorSliceDataset")
     .Attr("output_shapes: list(shape) >= 1")
     .SetIsStateful()  // TODO(b/65524810): Source dataset ops must be marked
                       // stateful to inhibit constant folding.
-    .SetShapeFn(shape_inference::ScalarShape)  // TODO(mrry): Validate that the
-                                               // dim-0 slices of `components`
-                                               // have shapes compatible with
-                                               // `output_shapes`.
-    .Doc(R"doc(
-Creates a dataset that emits each dim-0 slice of `components` once.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);  // TODO(mrry): Validate that the
+                                                // dim-0 slices of `components`
+                                                // have shapes compatible with
+                                                // `output_shapes`.
 
 REGISTER_OP("SparseTensorSliceDataset")
     .Input("indices: int64")
@@ -70,10 +64,7 @@ REGISTER_OP("SparseTensorSliceDataset")
     .Attr("Tvalues: type")
     .SetIsStateful()  // TODO(b/65524810): Source dataset ops must be marked
                       // stateful to inhibit constant folding.
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset that splits a SparseTensor into elements row-wise.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("ZipDataset")
     .Input("input_datasets: N * variant")
@@ -81,10 +72,7 @@ REGISTER_OP("ZipDataset")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
     .Attr("N: int >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset that zips together `input_datasets`.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("ConcatenateDataset")
     .Input("input_dataset: variant")
@@ -92,10 +80,7 @@ REGISTER_OP("ConcatenateDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset that concatenates `input_dataset` with `another_dataset`.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("RepeatDataset")
     .Input("input_dataset: variant")
@@ -103,14 +88,8 @@ REGISTER_OP("RepeatDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)  // TODO(mrry): Validate the shape
-                                               // of `count`.
-    .Doc(R"doc(
-Creates a dataset that emits the outputs of `input_dataset` `count` times.
-
-count: A scalar representing the number of times that `input_dataset` should
-  be repeated. A value of `-1` indicates that it should be repeated infinitely.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);  // TODO(mrry): Validate the
+                                                // shape of `count`.
 
 REGISTER_OP("TakeDataset")
     .Input("input_dataset: variant")
@@ -118,14 +97,7 @@ REGISTER_OP("TakeDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset that contains `count` elements from the `input_dataset`.
-
-count: A scalar representing the number of elements from the `input_dataset`
-  that should be taken. A value of `-1` indicates that all of `input_dataset`
-  is taken.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("SkipDataset")
     .Input("input_dataset: variant")
@@ -133,23 +105,14 @@ REGISTER_OP("SkipDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset that skips `count` elements from the `input_dataset`.
-
-count: A scalar representing the number of elements from the `input_dataset`
-  that should be skipped.  If count is -1, skips everything.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("IgnoreErrorsDataset")
     .Input("input_dataset: variant")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset that contains the elements of `input_dataset` ignoring errors.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("BytesProducedStatsDataset")
     .Input("input_dataset: variant")
@@ -157,10 +120,7 @@ REGISTER_OP("BytesProducedStatsDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Records the bytes size of each element of `input_dataset` in a StatsAggregator.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("LatencyStatsDataset")
     .Input("input_dataset: variant")
@@ -168,10 +128,7 @@ REGISTER_OP("LatencyStatsDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Records the latency of producing `input_dataset` elements in a StatsAggregator.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("MapDataset")
     .Input("input_dataset: variant")
@@ -181,10 +138,7 @@ REGISTER_OP("MapDataset")
     .Attr("Targuments: list(type) >= 0")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset that applies `f` to the outputs of `input_dataset`.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("ParallelMapDataset")
     .Input("input_dataset: variant")
@@ -195,16 +149,7 @@ REGISTER_OP("ParallelMapDataset")
     .Attr("Targuments: list(type) >= 0")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset that applies `f` to the outputs of `input_dataset`.
-
-Unlike a "MapDataset", which applies `f` sequentially, this dataset invokes up
-to `num_parallel_calls` copies of `f` in parallel.
-
-num_parallel_calls: The number of concurrent invocations of `f` that process
-  elements from `input_dataset` in parallel.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("MapAndBatchDataset")
     .Input("input_dataset: variant")
@@ -216,21 +161,7 @@ REGISTER_OP("MapAndBatchDataset")
     .Attr("Targuments: list(type) >= 0")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset that applies `f` to the outputs of `input_dataset` and then
-batches `batch_size` of them.
-
-Unlike a "MapDataset", which applies `f` sequentially, this dataset invokes up
-to `batch_size * num_parallel_batches` copies of `f` in parallel.
-
-batch_size: A scalar representing the number of elements to accumulate in a
-  batch. It determines the number of concurrent invocations of `f` that process
-  elements from `input_dataset` in parallel.
-num_parallel_batches: A scalar representing the number of batches to create in
-  parallel. Processing multiple batches in parallel benefits workloads prone to
-  stragglers.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("PrefetchDataset")
     .Input("input_dataset: variant")
@@ -238,13 +169,7 @@ REGISTER_OP("PrefetchDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset that asynchronously prefetches elements from `input_dataset`.
-
-buffer_size: The maximum number of elements to buffer in an iterator over
-  this dataset.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("ScanDataset")
     .Input("input_dataset: variant")
@@ -256,10 +181,7 @@ REGISTER_OP("ScanDataset")
     .Attr("Targuments: list(type) >= 0")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset successively reduces `f` over the elements of `input_dataset`.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("FlatMapDataset")
     .Input("input_dataset: variant")
@@ -269,18 +191,7 @@ REGISTER_OP("FlatMapDataset")
     .Attr("Targuments: list(type) >= 0")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset that applies `f` to the outputs of `input_dataset`.
-
-Unlike MapDataset, the `f` in FlatMapDataset is expected to return a
-Dataset variant, and FlatMapDataset will flatten successive results
-into a single Dataset.
-
-f: A function mapping elements of `input_dataset`, concatenated with
-  `other_arguments`, to a Dataset variant that contains elements matching
-  `output_types` and `output_shapes`.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("InterleaveDataset")
     .Input("input_dataset: variant")
@@ -292,20 +203,7 @@ REGISTER_OP("InterleaveDataset")
     .Attr("Targuments: list(type) >= 0")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset that applies `f` to the outputs of `input_dataset`.
-
-Unlike MapDataset, the `f` in InterleaveDataset is expected to return
-a Dataset variant, and InterleaveDataset will flatten successive
-results into a single Dataset. Unlike FlatMapDataset,
-InterleaveDataset will interleave sequences of up to `block_length`
-consecutive elements from `cycle_length` input elements.
-
-f: A function mapping elements of `input_dataset`, concatenated with
-  `other_arguments`, to a Dataset variant that contains elements matching
-  `output_types` and `output_shapes`.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("ParallelInterleaveDataset")
     .Input("input_dataset: variant")
@@ -313,27 +211,14 @@ REGISTER_OP("ParallelInterleaveDataset")
     .Input("cycle_length: int64")
     .Input("block_length: int64")
     .Input("sloppy: bool")
+    .Input("buffer_output_elements: int64")
+    .Input("prefetch_input_elements: int64")
     .Output("handle: variant")
     .Attr("f: func")
     .Attr("Targuments: list(type) >= 0")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset that applies `f` to the outputs of `input_dataset`.
-
-The resulting dataset is similar to the `InterleaveDataset`, with the exception
-that if retrieving the next value from a dataset would cause the requester to
-block, it will skip that input dataset. This dataset is especially useful
-when loading data from a variable-latency datastores (e.g. HDFS, GCS), as it
-allows the training step to proceed so long as some data is available.
-
-!! WARNING !! This dataset is not deterministic!
-
-f: A function mapping elements of `input_dataset`, concatenated with
-   `other_arguments`, to a Dataset variant that contains elements matching
-   `output_types` and `output_shapes`.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("GroupByWindowDataset")
     .Input("input_dataset: variant")
@@ -350,15 +235,7 @@ REGISTER_OP("GroupByWindowDataset")
     .Attr("Twindow_size_func_other_arguments: list(type) >= 0")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset that computes a windowed group-by on `input_dataset`.
-
-// TODO(mrry): Support non-int64 keys.
-
-key_func: A function mapping an element of `input_dataset`, concatenated
-  with `key_func_other_arguments` to a scalar value of type DT_INT64.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("FilterDataset")
     .Input("input_dataset: variant")
@@ -368,20 +245,7 @@ REGISTER_OP("FilterDataset")
     .Attr("Targuments: list(type) >= 0")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset containing elements of `input_dataset` matching `predicate`.
-
-The `predicate` function must return a scalar boolean and accept the
-following arguments:
-
-* One tensor for each component of an element of `input_dataset`.
-* One tensor for each value in `other_arguments`.
-
-predicate: A function returning a scalar boolean.
-other_arguments: A list of tensors, typically values that were captured when
-  building a closure for `predicate`.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("BatchDataset")
     .Input("input_dataset: variant")
@@ -389,13 +253,7 @@ REGISTER_OP("BatchDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset that batches `batch_size` elements from `input_dataset`.
-
-batch_size: A scalar representing the number of elements to accumulate in a
-  batch.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("PaddedBatchDataset")
     .Input("input_dataset: variant")
@@ -406,50 +264,26 @@ REGISTER_OP("PaddedBatchDataset")
     .Attr("Toutput_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
     .Attr("N: int >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)  // TODO(mrry): Validate that
-                                               // `padded_shapes` are all
-                                               // vectors, the lengths of
-                                               // `output_types` and
-                                               // `output_shapes` are `N`,
-                                               // the `output_shapes` are (as
-                                               // far as possible to tell
-                                               // statically) compatible with
-                                               // `padded_shapes`, and
-                                               // that `padding_values` are
-                                               // all scalars.
-    .Doc(R"doc(
-Creates a dataset that batches and pads `batch_size` elements from the input.
-
-batch_size: A scalar representing the number of elements to accumulate in a
-  batch.
-padded_shapes: A list of int64 tensors representing the desired padded shapes
-  of the corresponding output components. These shapes may be partially
-  specified, using `-1` to indicate that a particular dimension should be
-  padded to the maximum size of all batch elements.
-padding_values: A list of scalars containing the padding value to use for
-  each of the outputs.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);  // TODO(mrry): Validate that
+                                                // `padded_shapes` are all
+                                                // vectors, the lengths of
+                                                // `output_types` and
+                                                // `output_shapes` are `N`,
+                                                // the `output_shapes` are (as
+                                                // far as possible to tell
+                                                // statically) compatible with
+                                                // `padded_shapes`, and
+                                                // that `padding_values` are
+                                                // all scalars.
 
 REGISTER_OP("DenseToSparseBatchDataset")
     .Input("input_dataset: variant")
     .Input("batch_size: int64")
     .Input("row_shape: int64")
     .Output("handle: variant")
-    // NOTE(mrry): the 0th and 2nd elements will be DT_INT64.
     .Attr("output_types: list(type) >= 1")
-    // NOTE(mrry): the 1st and 2nd elements will be vectors.
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset that yields a SparseTensor for each element of the input.
-
-input_dataset: A handle to an input dataset. Must have a single component.
-batch_size: A scalar representing the number of elements to accumulate in a
-  batch.
-row_shape: A vector representing the dense shape of each row in the produced
-  SparseTensor. The shape may be partially specified, using `-1` to indicate
-  that a particular dimension should use the maximum size of all batch elements.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("RangeDataset")
     .Input("start: int64")
@@ -460,14 +294,17 @@ REGISTER_OP("RangeDataset")
     .Attr("output_shapes: list(shape) >= 1")
     .SetIsStateful()  // TODO(b/65524810): Source dataset ops must be marked
                       // stateful to inhibit constant folding.
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset with a range of values. Corresponds to python's xrange.
+    .SetShapeFn(shape_inference::ScalarShape);
 
-start: corresponds to start in python's xrange().
-stop: corresponds to stop in python's xrange().
-step: corresponds to step in python's xrange().
-)doc");
+REGISTER_OP("RandomDataset")
+    .Input("seed: int64")
+    .Input("seed2: int64")
+    .Output("handle: variant")
+    .Attr("output_types: list(type) >= 1")
+    .Attr("output_shapes: list(shape) >= 1")
+    .SetIsStateful()  // TODO(b/65524810): Source dataset ops must be marked
+                      // stateful to inhibit constant folding.
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("ShuffleDataset")
     .Input("input_dataset: variant")
@@ -478,23 +315,18 @@ REGISTER_OP("ShuffleDataset")
     .Attr("reshuffle_each_iteration: bool = true")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset that shuffles elements from `input_dataset` pseudorandomly.
-
-buffer_size: The number of output elements to buffer in an iterator over
-  this dataset. Compare with the `min_after_dequeue` attr when creating a
-  `RandomShuffleQueue`.
-reshuffle_each_iteration: If true, each iterator over this dataset will be given
-  a different pseudorandomly generated seed, based on a sequence seeded by the
-  `seed` and `seed2` inputs. If false, each iterator will be given the same
-  seed, and repeated iteration over this dataset will yield the exact same
-  sequence of results.
-seed: A scalar seed for the random number generator. If either seed or
-  seed2 is set to be non-zero, the random number generator is seeded
-  by the given seed.  Otherwise, a random seed is used.
-seed2: A second scalar seed to avoid seed collision.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
+
+REGISTER_OP("ShuffleAndRepeatDataset")
+    .Input("input_dataset: variant")
+    .Input("buffer_size: int64")
+    .Input("seed: int64")
+    .Input("seed2: int64")
+    .Input("count: int64")
+    .Output("handle: variant")
+    .Attr("output_types: list(type) >= 1")
+    .Attr("output_shapes: list(shape) >= 1")
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("CacheDataset")
     .Input("input_dataset: variant")
@@ -502,18 +334,14 @@ REGISTER_OP("CacheDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset that caches elements from `input_dataset`.
-
-A CacheDataset will iterate over the input_dataset, and store tensors. If the
-cache already exists, the cache will be used. If the cache is inappropriate
-(e.g. cannot be opened, contains tensors of the wrong shape / size), an error
-will the returned when used.
+    .SetShapeFn(shape_inference::ScalarShape);
 
-filename: A path on the filesystem where we should cache the dataset. Note: this
-  will be a directory.
-)doc");
+REGISTER_OP("UniqueDataset")
+    .Input("input_dataset: variant")
+    .Output("handle: variant")
+    .Attr("output_types: list(type) >= 1")
+    .Attr("output_shapes: list(shape) >= 1")
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("TextLineDataset")
     .Input("filenames: string")
@@ -522,19 +350,10 @@ REGISTER_OP("TextLineDataset")
     .Output("handle: variant")
     .SetIsStateful()  // TODO(b/65524810): Source dataset ops must be marked
                       // stateful to inhibit constant folding.
-    .SetShapeFn(shape_inference::ScalarShape)  // TODO(mrry): validate
-                                               // that `filenames` is
-                                               // a scalar or a
-                                               // vector.
-    .Doc(R"doc(
-Creates a dataset that emits the lines of one or more text files.
-
-filenames: A scalar or a vector containing the name(s) of the file(s) to be
-  read.
-compression_type: A scalar containing either (i) the empty string (no
-  compression), (ii) "ZLIB", or (iii) "GZIP".
-buffer_size: A scalar containing the number of bytes to buffer.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);  // TODO(mrry): validate
+                                                // that `filenames` is
+                                                // a scalar or a
+                                                // vector.
 
 REGISTER_OP("SqlDataset")
     .Input("driver_name: string")
@@ -545,14 +364,7 @@ REGISTER_OP("SqlDataset")
     .Attr("output_shapes: list(shape) >= 1")
     .SetIsStateful()  // TODO(b/65524810): Source dataset ops must be marked
                       // stateful to inhibit constant folding.
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset that executes a SQL query and emits rows of the result set.
-
-driver_name: The database type. Currently, the only supported type is 'sqlite'.
-data_source_name: A connection string to connect to the database.
-query: A SQL query to execute.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("FixedLengthRecordDataset")
     .Input("filenames: string")
@@ -563,19 +375,7 @@ REGISTER_OP("FixedLengthRecordDataset")
     .Output("handle: variant")
     .SetIsStateful()  // TODO(b/65524810): Source dataset ops must be marked
                       // stateful to inhibit constant folding.
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset that emits the records from one or more binary files.
-
-filenames: A scalar or a vector containing the name(s) of the file(s) to be
-  read.
-header_bytes: A scalar representing the number of bytes to skip at the
-  beginning of a file.
-record_bytes: A scalar representing the number of bytes in each record.
-footer_bytes: A scalar representing the number of bytes to skip at the end
-  of a file.
-buffer_size: A scalar representing the number of bytes to buffer. Must be > 0.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("TFRecordDataset")
     .Input("filenames: string")
@@ -584,17 +384,7 @@ REGISTER_OP("TFRecordDataset")
     .Output("handle: variant")
     .SetIsStateful()  // TODO(b/65524810): Source dataset ops must be marked
                       // stateful to inhibit constant folding.
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Creates a dataset that emits the records from one or more TFRecord files.
-
-filenames: A scalar or vector containing the name(s) of the file(s) to be
-  read.
-compression_type: A scalar containing either (i) the empty string (no
-  compression), (ii) "ZLIB", or (iii) "GZIP".
-buffer_size: A scalar representing the number of bytes to buffer. A value of
-  0 means no buffering will be performed.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("Iterator")
     .Output("handle: resource")
@@ -602,24 +392,12 @@ REGISTER_OP("Iterator")
     .Attr("container: string")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-A container for an iterator resource.
-
-handle: A handle to the iterator that can be passed to a "MakeIterator"
-  or "IteratorGetNext" op.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("MakeIterator")
     .Input("dataset: variant")
     .Input("iterator: resource")
-    .SetShapeFn(shape_inference::NoOutputs)
-    .Doc(R"doc(
-Makes a new iterator from the given `dataset` and stores it in `iterator`.
-
-This operation may be executed multiple times. Each execution will reset the
-iterator in `iterator` to the first element of `dataset`.
-)doc");
+    .SetShapeFn(shape_inference::NoOutputs);
 
 REGISTER_OP("OneShotIterator")
     .Output("handle: resource")
@@ -629,33 +407,7 @@ REGISTER_OP("OneShotIterator")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Makes a "one-shot" iterator that can be iterated only once.
-
-A one-shot iterator bundles the logic for defining the dataset and
-the state of the iterator in a single op, which allows simple input
-pipelines to be defined without an additional initialization
-("MakeIterator") step.
-
-One-shot iterators have the following limitations:
-
-* They do not support parameterization: all logic for creating the underlying
-  dataset must be bundled in the `dataset_factory` function.
-* They are not resettable. Once a one-shot iterator reaches the end of its
-  underlying dataset, subsequent "IteratorGetNext" operations on that
-  iterator will always produce an `OutOfRange` error.
-
-For greater flexibility, use "Iterator" and "MakeIterator" to define
-an iterator using an arbitrary subgraph, which may capture tensors
-(including fed values) as parameters, and which may be reset multiple
-times by rerunning "MakeIterator".
-
-handle: A handle to the iterator that can be passed to an "IteratorGetNext"
-  op.
-dataset_factory: A function of type `() -> DT_VARIANT`, where the returned
-  DT_VARIANT is a dataset.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("IteratorGetNext")
     .Input("iterator: resource")
@@ -679,10 +431,7 @@ REGISTER_OP("IteratorGetNext")
         c->set_output(static_cast<int>(i), output_shape_handle);
       }
       return Status::OK();
-    })
-    .Doc(R"doc(
-Gets the next output from the given iterator.
-)doc");
+    });
 
 REGISTER_OP("DatasetToSingleElement")
     .Input("dataset: variant")
@@ -706,89 +455,44 @@ REGISTER_OP("DatasetToSingleElement")
         c->set_output(static_cast<int>(i), output_shape_handle);
       }
       return Status::OK();
-    })
-    .Doc(R"doc(
-Outputs the single element from the given dataset.
-
-dataset: A handle to a dataset that contains a single element.
-components: The components of the single element of `input`.
-)doc");
+    });
 
 REGISTER_OP("IteratorToStringHandle")
     .Input("resource_handle: resource")
     .Output("string_handle: string")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Converts the given `resource_handle` representing an iterator to a string.
-
-resource_handle: A handle to an iterator resource.
-string_handle: A string representation of the given handle.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("IteratorFromStringHandle")
     .Input("string_handle: string")
     .Output("resource_handle: resource")
     .Attr("output_types: list(type) >= 0 = []")
     .Attr("output_shapes: list(shape) >= 0 = []")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Converts the given string representing a handle to an iterator to a resource.
-
-string_handle: A string representation of the given handle.
-resource_handle: A handle to an iterator resource.
-output_types: If specified, defines the type of each tuple component in an
-  element produced by the resulting iterator.
-output_shapes: If specified, defines the shape of each tuple component in an
-  element produced by the resulting iterator.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("SerializeIterator")
     .Input("resource_handle: resource")
     .Output("serialized: variant")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Converts the given `resource_handle` representing an iterator to a variant tensor.
-
-resource_handle: A handle to an iterator resource.
-serialized: A variant tensor storing the state of the iterator contained in the
-  resource.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("DeserializeIterator")
     .Input("resource_handle: resource")
     .Input("serialized: variant")
-    .SetShapeFn(shape_inference::NoOutputs)
-    .Doc(R"doc(
-Converts the given variant tensor to an iterator and stores it in the given resource.
-
-resource_handle: A handle to an iterator resource.
-serialized: A variant tensor storing the state of the iterator contained in the
-  resource.
-)doc");
+    .SetShapeFn(shape_inference::NoOutputs);
 
 REGISTER_OP("StatsAggregatorHandle")
     .Output("handle: resource")
     .SetShapeFn(shape_inference::ScalarShape)
     .Attr("container: string = ''")
-    .Attr("shared_name: string = ''")
-    .Doc(R"doc(
-Creates a statistics manager resource.
-)doc");
+    .Attr("shared_name: string = ''");
 
 REGISTER_OP("IteratorSetStatsAggregator")
     .Input("iterator_handle: resource")
     .Input("stats_aggregator_handle: resource")
-    .SetShapeFn(shape_inference::NoOutputs)
-    .Doc(R"doc(
-Associates the given iterator with the given statistics aggregator.
-)doc");
+    .SetShapeFn(shape_inference::NoOutputs);
 
 REGISTER_OP("StatsAggregatorSummary")
     .Input("iterator: resource")
     .Output("summary: string")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Produces a summary of any statistics recorded by the given statistics manager.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/functional_ops.cc b/tensorflow/core/ops/functional_ops.cc
index 5fd21ec88faef160d99122f1b8bc6e2f877d8694..515b31623bfbffe12f7722becd839d99279d4fdc 100644
--- a/tensorflow/core/ops/functional_ops.cc
+++ b/tensorflow/core/ops/functional_ops.cc
@@ -38,33 +38,7 @@ REGISTER_OP("SymbolicGradient")
         c->set_output(i, c->input(i));
       }
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes the gradient function for function f via backpropagation.
-
-input: a list of input tensors of size N + M;
-output: a list of output tensors of size N;
-Tin: the type list for the input list.
-Tout: the type list for the input list.
-f: The function we want to compute the gradient for.
-
-The function 'f' must be a numerical function which takes N inputs and
-produces M outputs. Its gradient function 'g', which is computed by
-this SymbolicGradient op is a function taking N + M inputs and
-produces N outputs.
-
-I.e. if we have
-   (y1, y2, ..., y_M) = f(x1, x2, ..., x_N),
-then, g is
-   (dL/dx1, dL/dx2, ..., dL/dx_N) = g(x1, x2, ..., x_N,
-                                     dL/dy1, dL/dy2, ..., dL/dy_M),
-
-where L is a scalar-value function of (x1, x2, ..., xN) (e.g., the
-loss function). dL/dx_i is the partial derivative of L with respect
-to x_i.
-
-(Needs some math expert to say the comment above better.)
-)doc");
+    });
 
 REGISTER_OP("RemoteCall")
     .Input("target: string")
@@ -73,15 +47,5 @@ REGISTER_OP("RemoteCall")
     .Attr("Tin: list(type)")
     .Attr("Tout: list(type)")
     .Attr("f: func")
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-Runs function `f` on a remote device indicated by `target`.
-
-target: A fully specified device name where we want to run the function.
-args: A list of arguments for the function.
-output: A list of return values.
-Tin: The type list for the arguments.
-Tout: The type list for the return values.
-f: The function to run remotely.
-)doc");
+    .SetShapeFn(shape_inference::UnknownShape);
 }  // end namespace tensorflow
diff --git a/tensorflow/core/ops/image_ops.cc b/tensorflow/core/ops/image_ops.cc
index 13fbd2fa515c5a7e0ec06cdc4c585f4dc691a928..31cc662d218802cc6e748b8e262e3c80c317fb22 100644
--- a/tensorflow/core/ops/image_ops.cc
+++ b/tensorflow/core/ops/image_ops.cc
@@ -153,26 +153,7 @@ REGISTER_OP("ResizeArea")
     .Output("resized_images: float")
     .Attr("T: {int8, uint8, int16, uint16, int32, int64, half, float, double}")
     .Attr("align_corners: bool = false")
-    .SetShapeFn(ResizeShapeFn)
-    .Doc(R"doc(
-Resize `images` to `size` using area interpolation.
-
-Input images can be of different types but output images are always float.
-
-Each output pixel is computed by first transforming the pixel's footprint into
-the input tensor and then averaging the pixels that intersect the footprint. An
-input pixel's contribution to the average is weighted by the fraction of its
-area that intersects the footprint.  This is the same as OpenCV's INTER_AREA.
-
-images: 4-D with shape `[batch, height, width, channels]`.
-size:= A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
-  new size for the images.
-align_corners: If true, rescale input by (new_height - 1) / (height - 1), which
-  exactly aligns the 4 corners of images and resized images. If false, rescale
-  by new_height / height. Treat similarly the width dimension.
-resized_images: 4-D with shape
-  `[batch, new_height, new_width, channels]`.
-)doc");
+    .SetShapeFn(ResizeShapeFn);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ResizeBicubic")
@@ -181,21 +162,7 @@ REGISTER_OP("ResizeBicubic")
     .Output("resized_images: float")
     .Attr("T: {int8, uint8, int16, uint16, int32, int64, half, float, double}")
     .Attr("align_corners: bool = false")
-    .SetShapeFn(ResizeShapeFn)
-    .Doc(R"doc(
-Resize `images` to `size` using bicubic interpolation.
-
-Input images can be of different types but output images are always float.
-
-images: 4-D with shape `[batch, height, width, channels]`.
-size:= A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
-  new size for the images.
-align_corners: If true, rescale input by (new_height - 1) / (height - 1), which
-  exactly aligns the 4 corners of images and resized images. If false, rescale
-  by new_height / height. Treat similarly the width dimension.
-resized_images: 4-D with shape
-  `[batch, new_height, new_width, channels]`.
-)doc");
+    .SetShapeFn(ResizeShapeFn);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ResizeBicubicGrad")
@@ -207,20 +174,7 @@ REGISTER_OP("ResizeBicubicGrad")
     .SetShapeFn([](InferenceContext* c) {
       c->set_output(0, c->input(1));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes the gradient of bicubic interpolation.
-
-grads: 4-D with shape `[batch, height, width, channels]`.
-original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`,
-  The image tensor that was resized.
-align_corners: If true, rescale grads by (orig_height - 1) / (height - 1), which
-  exactly aligns the 4 corners of grads and original_image. If false, rescale by
-  orig_height / height. Treat similarly the width dimension.
-output: 4-D with shape `[batch, orig_height, orig_width, channels]`.
-  Gradients with respect to the input image. Input image must have been
-  float or double.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ResizeBilinear")
@@ -229,21 +183,7 @@ REGISTER_OP("ResizeBilinear")
     .Output("resized_images: float")
     .Attr("T: {int8, uint8, int16, uint16, int32, int64, half, float, double}")
     .Attr("align_corners: bool = false")
-    .SetShapeFn(ResizeShapeFn)
-    .Doc(R"doc(
-Resize `images` to `size` using bilinear interpolation.
-
-Input images can be of different types but output images are always float.
-
-images: 4-D with shape `[batch, height, width, channels]`.
-size:= A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
-  new size for the images.
-align_corners: If true, rescale input by (new_height - 1) / (height - 1), which
-  exactly aligns the 4 corners of images and resized images. If false, rescale
-  by new_height / height. Treat similarly the width dimension.
-resized_images: 4-D with shape
-  `[batch, new_height, new_width, channels]`.
-)doc");
+    .SetShapeFn(ResizeShapeFn);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("QuantizedResizeBilinear")
@@ -265,21 +205,7 @@ REGISTER_OP("QuantizedResizeBilinear")
       c->set_output(1, c->MakeShape({}));
       c->set_output(2, c->MakeShape({}));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Resize quantized `images` to `size` using quantized bilinear interpolation.
-
-Input images and output images must be quantized types.
-
-images: 4-D with shape `[batch, height, width, channels]`.
-size:= A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
-  new size for the images.
-align_corners: If true, rescale input by (new_height - 1) / (height - 1), which
-  exactly aligns the 4 corners of images and resized images. If false, rescale
-  by new_height / height. Treat similarly the width dimension.
-resized_images: 4-D with shape
-  `[batch, new_height, new_width, channels]`.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ResizeBilinearGrad")
@@ -291,20 +217,7 @@ REGISTER_OP("ResizeBilinearGrad")
     .SetShapeFn([](InferenceContext* c) {
       c->set_output(0, c->input(1));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes the gradient of bilinear interpolation.
-
-grads: 4-D with shape `[batch, height, width, channels]`.
-original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`,
-  The image tensor that was resized.
-align_corners: If true, rescale grads by (orig_height - 1) / (height - 1), which
-  exactly aligns the 4 corners of grads and original_image. If false, rescale by
-  orig_height / height. Treat similarly the width dimension.
-output: 4-D with shape `[batch, orig_height, orig_width, channels]`.
-  Gradients with respect to the input image. Input image must have been
-  float or double.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ResizeNearestNeighbor")
@@ -313,19 +226,7 @@ REGISTER_OP("ResizeNearestNeighbor")
     .Output("resized_images: T")
     .Attr("T: {int8, uint8, int16, uint16, int32, int64, half, float, double}")
     .Attr("align_corners: bool = false")
-    .SetShapeFn(ResizeShapeFn)
-    .Doc(R"doc(
-Resize `images` to `size` using nearest neighbor interpolation.
-
-images: 4-D with shape `[batch, height, width, channels]`.
-size:= A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
-  new size for the images.
-align_corners: If true, rescale input by (new_height - 1) / (height - 1), which
-  exactly aligns the 4 corners of images and resized images. If false, rescale
-  by new_height / height. Treat similarly the width dimension.
-resized_images: 4-D with shape
-  `[batch, new_height, new_width, channels]`.
-)doc");
+    .SetShapeFn(ResizeShapeFn);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ResizeNearestNeighborGrad")
@@ -354,19 +255,7 @@ REGISTER_OP("ResizeNearestNeighborGrad")
       }
       c->set_output(0, input);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes the gradient of nearest neighbor interpolation.
-
-grads: 4-D with shape `[batch, height, width, channels]`.
-size:= A 1-D int32 Tensor of 2 elements: `orig_height, orig_width`. The
-  original input size.
-align_corners: If true, rescale grads by (orig_height - 1) / (height - 1), which
-  exactly aligns the 4 corners of grads and original_image. If false, rescale by
-  orig_height / height. Treat similarly the width dimension.
-output: 4-D with shape `[batch, orig_height, orig_width, channels]`. Gradients
-  with respect to the input image.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("RandomCrop")
@@ -399,25 +288,7 @@ REGISTER_OP("RandomCrop")
       }
       c->set_output(0, c->MakeShape({h, w, channels}));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Randomly crop `image`.
-
-`size` is a 1-D int64 tensor with 2 elements representing the crop height and
-width.  The values must be non negative.
-
-This Op picks a random location in `image` and crops a `height` by `width`
-rectangle from that location.  The random location is picked so the cropped
-area will fit inside the original image.
-
-image: 3-D of shape `[height, width, channels]`.
-size: 1-D of length 2 containing: `crop_height`, `crop_width`..
-seed: If either seed or seed2 are set to be non-zero, the random number
-  generator is seeded by the given seed.  Otherwise, it is seeded by a
-  random seed.
-seed2: An second seed to avoid seed collision.
-output: 3-D of shape `[crop_height, crop_width, channels].`
-)doc");
+    });
 // TODO(shlens): Support variable rank in RandomCrop.
 
 // --------------------------------------------------------------------------
@@ -430,17 +301,7 @@ REGISTER_OP("DecodeJpeg")
     .Attr("acceptable_fraction: float = 1.0")
     .Attr("dct_method: string = ''")
     .Output("image: uint8")
-    .SetShapeFn(DecodeImageShapeFn)
-    .Doc(strings::StrCat(R"doc(
-Decode a JPEG-encoded image to a uint8 tensor.
-)doc",
-                         kDecodeJpegCommonDocStr, R"doc(
-This op also supports decoding PNGs and non-animated GIFs since the interface is
-the same, though it is cleaner to use `tf.image.decode_image`.
-
-contents: 0-D.  The JPEG-encoded image.
-)doc",
-                         kDecodeJpegCommonParamsDocStr));
+    .SetShapeFn(DecodeImageShapeFn);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("DecodeAndCropJpeg")
@@ -482,18 +343,7 @@ REGISTER_OP("DecodeAndCropJpeg")
       }
       c->set_output(0, c->MakeShape({h, w, channels_dim}));
       return Status::OK();
-    })
-    .Doc(strings::StrCat(R"doc(
-Decode and Crop a JPEG-encoded image to a uint8 tensor.
-)doc",
-                         kDecodeJpegCommonDocStr, R"doc(
-It is equivalent to a combination of decode and crop, but much faster by only
-decoding partial jpeg image.
-
-contents: 0-D.  The JPEG-encoded image.
-crop_window: 1-D.  The crop window: [crop_y, crop_x, crop_height, crop_width].
-)doc",
-                         kDecodeJpegCommonParamsDocStr));
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("EncodeJpeg")
@@ -508,40 +358,7 @@ REGISTER_OP("EncodeJpeg")
     .Attr("y_density: int = 300")
     .Attr("xmp_metadata: string = ''")
     .Output("contents: string")
-    .SetShapeFn(EncodeImageShapeFn)
-    .Doc(R"doc(
-JPEG-encode an image.
-
-`image` is a 3-D uint8 Tensor of shape `[height, width, channels]`.
-
-The attr `format` can be used to override the color format of the encoded
-output.  Values can be:
-
-*   `''`: Use a default format based on the number of channels in the image.
-*   `grayscale`: Output a grayscale JPEG image.  The `channels` dimension
-    of `image` must be 1.
-*   `rgb`: Output an RGB JPEG image. The `channels` dimension
-    of `image` must be 3.
-
-If `format` is not specified or is the empty string, a default format is picked
-in function of the number of channels in `image`:
-
-*   1: Output a grayscale image.
-*   3: Output an RGB image.
-
-image: 3-D with shape `[height, width, channels]`.
-format: Per pixel image format.
-quality: Quality of the compression from 0 to 100 (higher is better and slower).
-progressive: If True, create a JPEG that loads progressively (coarse to fine).
-optimize_size: If True, spend CPU/RAM to reduce size with no quality change.
-chroma_downsampling: See http://en.wikipedia.org/wiki/Chroma_subsampling.
-density_unit: Unit used to specify `x_density` and `y_density`:
-   pixels per inch (`'in'`) or centimeter (`'cm'`).
-x_density: Horizontal pixels per density unit.
-y_density: Vertical pixels per density unit.
-xmp_metadata: If not empty, embed this XMP metadata in the image header.
-contents: 0-D. JPEG-encoded image.
-)doc");
+    .SetShapeFn(EncodeImageShapeFn);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ExtractJpegShape")
@@ -553,17 +370,7 @@ REGISTER_OP("ExtractJpegShape")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused));
       c->set_output(0, c->Vector(3));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Extract the shape information of a JPEG-encoded image.
-
-This op only parses the image header, so it is much faster than DecodeJpeg.
-
-contents: 0-D. The JPEG-encoded image.
-image_shape: 1-D. The image shape with format [height, width, channels].
-output_type: (Optional) The output type of the operation (int32 or int64).
-    Defaults to int32.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("AdjustContrast")
@@ -576,10 +383,7 @@ REGISTER_OP("AdjustContrast")
     .Deprecated(2, "Use AdjustContrastv2 instead")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 3);
-    })
-    .Doc(R"Doc(
-Deprecated. Disallowed in GraphDef version >= 2.
-)Doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("AdjustContrastv2")
@@ -588,24 +392,7 @@ REGISTER_OP("AdjustContrastv2")
     .Output("output: float")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 3);
-    })
-    .Doc(R"Doc(
-Adjust the contrast of one or more images.
-
-`images` is a tensor of at least 3 dimensions.  The last 3 dimensions are
-interpreted as `[height, width, channels]`.  The other dimensions only
-represent a collection of images, such as `[batch, height, width, channels].`
-
-Contrast is adjusted independently for each channel of each image.
-
-For each channel, the Op first computes the mean of the image pixels in the
-channel and then adjusts each component of each pixel to
-`(x - mean) * contrast_factor + mean`.
-
-images: Images to adjust.  At least 3-D.
-contrast_factor: A float multiplier for adjusting contrast.
-output: The contrast-adjusted image or images.
-)Doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("AdjustHue")
@@ -614,21 +401,7 @@ REGISTER_OP("AdjustHue")
     .Output("output: float")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 3);
-    })
-    .Doc(R"Doc(
-Adjust the hue of one or more images.
-
-`images` is a tensor of at least 3 dimensions.  The last dimension is
-interpretted as channels, and must be three.
-
-The input image is considered in the RGB colorspace. Conceptually, the RGB
-colors are first mapped into HSV. A delta is then applied all the hue values,
-and then remapped back to RGB colorspace.
-
-images: Images to adjust.  At least 3-D.
-delta: A float delta to add to the hue.
-output: The hue-adjusted image or images.
-)Doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("AdjustSaturation")
@@ -637,21 +410,7 @@ REGISTER_OP("AdjustSaturation")
     .Output("output: float")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 3);
-    })
-    .Doc(R"Doc(
-Adjust the saturation of one or more images.
-
-`images` is a tensor of at least 3 dimensions.  The last dimension is
-interpretted as channels, and must be three.
-
-The input image is considered in the RGB colorspace. Conceptually, the RGB
-colors are first mapped into HSV. A scale is then applied all the saturation
-values, and then remapped back to RGB colorspace.
-
-images: Images to adjust.  At least 3-D.
-scale: A float scale to add to the saturation.
-output: The hue-adjusted image or images.
-)Doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("DecodePng")
@@ -659,30 +418,7 @@ REGISTER_OP("DecodePng")
     .Attr("channels: int = 0")
     .Attr("dtype: {uint8, uint16} = DT_UINT8")
     .Output("image: dtype")
-    .SetShapeFn(DecodeImageShapeFn)
-    .Doc(R"doc(
-Decode a PNG-encoded image to a uint8 or uint16 tensor.
-
-The attr `channels` indicates the desired number of color channels for the
-decoded image.
-
-Accepted values are:
-
-*   0: Use the number of channels in the PNG-encoded image.
-*   1: output a grayscale image.
-*   3: output an RGB image.
-*   4: output an RGBA image.
-
-If needed, the PNG-encoded image is transformed to match the requested number
-of color channels.
-
-This op also supports decoding JPEGs and non-animated GIFs since the interface
-is the same, though it is cleaner to use `tf.image.decode_image`.
-
-contents: 0-D.  The PNG-encoded image.
-channels: Number of color channels for the decoded image.
-image: 3-D with shape `[height, width, channels]`.
-)doc");
+    .SetShapeFn(DecodeImageShapeFn);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("EncodePng")
@@ -690,48 +426,14 @@ REGISTER_OP("EncodePng")
     .Attr("T: {uint8, uint16} = DT_UINT8")
     .Input("image: T")
     .Output("contents: string")
-    .SetShapeFn(EncodeImageShapeFn)
-    .Doc(R"doc(
-PNG-encode an image.
-
-`image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]`
-where `channels` is:
-
-*   1: for grayscale.
-*   2: for grayscale + alpha.
-*   3: for RGB.
-*   4: for RGBA.
-
-The ZLIB compression level, `compression`, can be -1 for the PNG-encoder
-default or a value from 0 to 9.  9 is the highest compression level, generating
-the smallest output, but is slower.
-
-image: 3-D with shape `[height, width, channels]`.
-compression: Compression level.
-contents: 0-D. PNG-encoded image.
-)doc");
+    .SetShapeFn(EncodeImageShapeFn);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("DecodeBmp")
     .Input("contents: string")
     .Output("image: uint8")
     .Attr("channels: int = 0")
-    .SetShapeFn(DecodeImageShapeFn)
-    .Doc(R"doc(
-Decode the first frame of a BMP-encoded image to a uint8 tensor.
-
-The attr `channels` indicates the desired number of color channels for the
-decoded image.
-
-Accepted values are:
-
-*   0: Use the number of channels in the BMP-encoded image.
-*   3: output an RGB image.
-*   4: output an RGBA image.
-
-contents: 0-D.  The BMP-encoded image.
-image: 3-D with shape `[height, width, channels]`. RGB order
-)doc");
+    .SetShapeFn(DecodeImageShapeFn);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("DecodeGif")
@@ -744,61 +446,21 @@ REGISTER_OP("DecodeGif")
                                      InferenceContext::kUnknownDim,
                                      InferenceContext::kUnknownDim, 3}));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Decode the first frame of a GIF-encoded image to a uint8 tensor.
-
-GIF with frame or transparency compression are not supported
-convert animated GIF from compressed to uncompressed by:
-
-    convert $src.gif -coalesce $dst.gif
-
-This op also supports decoding JPEGs and PNGs, though it is cleaner to use
-`tf.image.decode_image`.
-
-contents: 0-D.  The GIF-encoded image.
-image: 4-D with shape `[num_frames, height, width, 3]`. RGB order
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("RGBToHSV")
     .Input("images: T")
     .Output("output: T")
-    .Attr("T: {float, double} = DT_FLOAT")
-    .SetShapeFn(ColorspaceShapeFn)
-    .Doc(R"doc(
-Converts one or more images from RGB to HSV.
-
-Outputs a tensor of the same shape as the `images` tensor, containing the HSV
-value of the pixels. The output is only well defined if the value in `images`
-are in `[0,1]`.
-
-`output[..., 0]` contains hue, `output[..., 1]` contains saturation, and
-`output[..., 2]` contains value. All HSV values are in `[0,1]`. A hue of 0
-corresponds to pure red, hue 1/3 is pure green, and 2/3 is pure blue.
-
-images: 1-D or higher rank. RGB data to convert. Last dimension must be size 3.
-output: `images` converted to HSV.
-)doc");
+    .Attr("T: {half, bfloat16, float, double} = DT_FLOAT")
+    .SetShapeFn(ColorspaceShapeFn);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("HSVToRGB")
     .Input("images: T")
     .Output("output: T")
-    .Attr("T: {float, double} = DT_FLOAT")
-    .SetShapeFn(ColorspaceShapeFn)
-    .Doc(R"doc(
-Convert one or more images from HSV to RGB.
-
-Outputs a tensor of the same shape as the `images` tensor, containing the RGB
-value of the pixels. The output is only well defined if the value in `images`
-are in `[0,1]`.
-
-See `rgb_to_hsv` for a description of the HSV encoding.
-
-images: 1-D or higher rank. HSV data to convert. Last dimension must be size 3.
-output: `images` converted to RGB.
-)doc");
+    .Attr("T: {half, bfloat16, float, double} = DT_FLOAT")
+    .SetShapeFn(ColorspaceShapeFn);
 
 // --------------------------------------------------------------------------
 REGISTER_OP("DrawBoundingBoxes")
@@ -808,28 +470,7 @@ REGISTER_OP("DrawBoundingBoxes")
     .Attr("T: {float, half} = DT_FLOAT")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 3);
-    })
-    .Doc(R"doc(
-Draw bounding boxes on a batch of images.
-
-Outputs a copy of `images` but draws on top of the pixels zero or more bounding
-boxes specified by the locations in `boxes`. The coordinates of the each
-bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. The
-bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
-height of the underlying image.
-
-For example, if an image is 100 x 200 pixels (height x width) and the bounding
-box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of
-the bounding box will be `(40, 10)` to `(100, 50)` (in (x,y) coordinates).
-
-Parts of the bounding box may fall outside the image.
-
-images: 4-D with shape `[batch, height, width, depth]`. A batch of images.
-boxes: 3-D with shape `[batch, num_bounding_boxes, 4]` containing bounding
-  boxes.
-output: 4-D with the same shape as `images`. The batch of input images with
-  bounding boxes drawn on the images.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("SampleDistortedBoundingBox")
@@ -852,77 +493,7 @@ REGISTER_OP("SampleDistortedBoundingBox")
       c->set_output(1, c->Vector(3));
       c->set_output(2, c->MakeShape({1, 1, 4}));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Generate a single randomly distorted bounding box for an image.
-
-Bounding box annotations are often supplied in addition to ground-truth labels
-in image recognition or object localization tasks. A common technique for
-training such a system is to randomly distort an image while preserving
-its content, i.e. *data augmentation*. This Op outputs a randomly distorted
-localization of an object, i.e. bounding box, given an `image_size`,
-`bounding_boxes` and a series of constraints.
-
-The output of this Op is a single bounding box that may be used to crop the
-original image. The output is returned as 3 tensors: `begin`, `size` and
-`bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
-image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize
-what the bounding box looks like.
-
-Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The
-bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
-height of the underlying image.
-
-For example,
-
-```python
-    # Generate a single distorted bounding box.
-    begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
-        tf.shape(image),
-        bounding_boxes=bounding_boxes)
-
-    # Draw the bounding box in an image summary.
-    image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
-                                                  bbox_for_draw)
-    tf.image_summary('images_with_box', image_with_box)
-
-    # Employ the bounding box to distort the image.
-    distorted_image = tf.slice(image, begin, size)
-```
-
-Note that if no bounding box information is available, setting
-`use_image_if_no_bounding_boxes = true` will assume there is a single implicit
-bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
-false and no bounding boxes are supplied, an error is raised.
-
-image_size: 1-D, containing `[height, width, channels]`.
-bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes
-  associated with the image.
-begin: 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to
-  `tf.slice`.
-size: 1-D, containing `[target_height, target_width, -1]`. Provide as input to
-  `tf.slice`.
-bboxes: 3-D with shape `[1, 1, 4]` containing the distorted bounding box.
-  Provide as input to `tf.image.draw_bounding_boxes`.
-seed: If either `seed` or `seed2` are set to non-zero, the random number
-  generator is seeded by the given `seed`.  Otherwise, it is seeded by a random
-  seed.
-seed2: A second seed to avoid seed collision.
-min_object_covered: The cropped area of the image must contain at least this
-  fraction of any bounding box supplied. The value of this parameter should be
-  non-negative. In the case of 0, the cropped area does not need to overlap
-  any of the bounding boxes supplied.
-aspect_ratio_range: The cropped area of the image must have an aspect ratio =
-  width / height within this range.
-area_range: The cropped area of the image must contain a fraction of the
-  supplied image within in this range.
-max_attempts: Number of attempts at generating a cropped region of the image
-  of the specified constraints. After `max_attempts` failures, return the entire
-  image.
-use_image_if_no_bounding_boxes: Controls behavior if no bounding boxes supplied.
-  If true, assume an implicit bounding box covering the whole input. If false,
-  raise an error.
-)doc");
+    });
 
 REGISTER_OP("SampleDistortedBoundingBoxV2")
     .Input("image_size: T")
@@ -944,77 +515,7 @@ REGISTER_OP("SampleDistortedBoundingBoxV2")
       c->set_output(1, c->Vector(3));
       c->set_output(2, c->MakeShape({1, 1, 4}));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Generate a single randomly distorted bounding box for an image.
-
-Bounding box annotations are often supplied in addition to ground-truth labels
-in image recognition or object localization tasks. A common technique for
-training such a system is to randomly distort an image while preserving
-its content, i.e. *data augmentation*. This Op outputs a randomly distorted
-localization of an object, i.e. bounding box, given an `image_size`,
-`bounding_boxes` and a series of constraints.
-
-The output of this Op is a single bounding box that may be used to crop the
-original image. The output is returned as 3 tensors: `begin`, `size` and
-`bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
-image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize
-what the bounding box looks like.
-
-Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The
-bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
-height of the underlying image.
-
-For example,
-
-```python
-    # Generate a single distorted bounding box.
-    begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
-        tf.shape(image),
-        bounding_boxes=bounding_boxes)
-
-    # Draw the bounding box in an image summary.
-    image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
-                                                  bbox_for_draw)
-    tf.image_summary('images_with_box', image_with_box)
-
-    # Employ the bounding box to distort the image.
-    distorted_image = tf.slice(image, begin, size)
-```
-
-Note that if no bounding box information is available, setting
-`use_image_if_no_bounding_boxes = true` will assume there is a single implicit
-bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
-false and no bounding boxes are supplied, an error is raised.
-
-image_size: 1-D, containing `[height, width, channels]`.
-bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes
-  associated with the image.
-min_object_covered: The cropped area of the image must contain at least this
-  fraction of any bounding box supplied. The value of this parameter should be
-  non-negative. In the case of 0, the cropped area does not need to overlap
-  any of the bounding boxes supplied.
-begin: 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to
-  `tf.slice`.
-size: 1-D, containing `[target_height, target_width, -1]`. Provide as input to
-  `tf.slice`.
-bboxes: 3-D with shape `[1, 1, 4]` containing the distorted bounding box.
-  Provide as input to `tf.image.draw_bounding_boxes`.
-seed: If either `seed` or `seed2` are set to non-zero, the random number
-  generator is seeded by the given `seed`.  Otherwise, it is seeded by a random
-  seed.
-seed2: A second seed to avoid seed collision.
-aspect_ratio_range: The cropped area of the image must have an aspect ratio =
-  width / height within this range.
-area_range: The cropped area of the image must contain a fraction of the
-  supplied image within in this range.
-max_attempts: Number of attempts at generating a cropped region of the image
-  of the specified constraints. After `max_attempts` failures, return the entire
-  image.
-use_image_if_no_bounding_boxes: Controls behavior if no bounding boxes supplied.
-  If true, assume an implicit bounding box covering the whole input. If false,
-  raise an error.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 
@@ -1046,48 +547,7 @@ REGISTER_OP("ExtractGlimpse")
 
       return SetOutputToSizedImage(c, batch_dim, 1 /* size_input_idx */,
                                    c->Dim(input, 3));
-    })
-    .Doc(R"doc(
-Extracts a glimpse from the input tensor.
-
-Returns a set of windows called glimpses extracted at location
-`offsets` from the input tensor. If the windows only partially
-overlaps the inputs, the non overlapping areas will be filled with
-random noise.
-
-The result is a 4-D tensor of shape `[batch_size, glimpse_height,
-glimpse_width, channels]`. The channels and batch dimensions are the
-same as that of the input tensor. The height and width of the output
-windows are specified in the `size` parameter.
-
-The argument `normalized` and `centered` controls how the windows are built:
-
-* If the coordinates are normalized but not centered, 0.0 and 1.0
-  correspond to the minimum and maximum of each height and width
-  dimension.
-* If the coordinates are both normalized and centered, they range from
-  -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper
-  left corner, the lower right corner is located at (1.0, 1.0) and the
-  center is at (0, 0).
-* If the coordinates are not normalized they are interpreted as
-  numbers of pixels.
-
-input: A 4-D float tensor of shape `[batch_size, height, width, channels]`.
-size: A 1-D tensor of 2 elements containing the size of the glimpses
-  to extract.  The glimpse height must be specified first, following
-  by the glimpse width.
-offsets: A 2-D integer tensor of shape `[batch_size, 2]` containing
-  the y, x locations of the center of each window.
-glimpse: A tensor representing the glimpses `[batch_size,
-  glimpse_height, glimpse_width, channels]`.
-centered: indicates if the offset coordinates are centered relative to
-  the image, in which case the (0, 0) offset is relative to the center
-  of the input images. If false, the (0,0) offset corresponds to the
-  upper left corner of the input images.
-normalized: indicates if the offset coordinates are normalized.
-uniform_noise: indicates if the noise should be generated using a
-  uniform distribution or a Gaussian distribution.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 
@@ -1120,44 +580,7 @@ REGISTER_OP("CropAndResize")
 
       return SetOutputToSizedImage(c, num_boxes_dim, 3 /* size_input_idx */,
                                    c->Dim(input, 3));
-    })
-    .Doc(R"doc(
-Extracts crops from the input image tensor and bilinearly resizes them (possibly
-with aspect ratio change) to a common output size specified by `crop_size`. This
-is more general than the `crop_to_bounding_box` op which extracts a fixed size
-slice from the input image and does not allow resizing or aspect ratio change.
-
-Returns a tensor with `crops` from the input `image` at positions defined at the
-bounding box locations in `boxes`. The cropped boxes are all resized (with
-bilinear interpolation) to a fixed `size = [crop_height, crop_width]`. The
-result is a 4-D tensor `[num_boxes, crop_height, crop_width, depth]`. The
-resizing is corner aligned. In particular, if `boxes = [[0, 0, 1, 1]]`, the
-method will give identical results to using `tf.image.resize_bilinear()`
-with `align_corners=True`.
-
-image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
-  Both `image_height` and `image_width` need to be positive.
-boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
-  specifies the coordinates of a box in the `box_ind[i]` image and is specified
-  in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
-  `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
-  `[0, 1]` interval of normalized image height is mapped to
-  `[0, image_height - 1]` in image height coordinates. We do allow `y1` > `y2`, in
-  which case the sampled crop is an up-down flipped version of the original
-  image. The width dimension is treated similarly. Normalized coordinates
-  outside the `[0, 1]` range are allowed, in which case we use
-  `extrapolation_value` to extrapolate the input image values.
-box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
-  The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
-crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`. All
-  cropped image patches are resized to this size. The aspect ratio of the image
-  content is not preserved. Both `crop_height` and `crop_width` need to be
-  positive.
-crops: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
-method: A string specifying the interpolation method. Only 'bilinear' is
-  supported for now.
-extrapolation_value: Value used for extrapolation, when applicable.
-)doc");
+    });
 
 REGISTER_OP("CropAndResizeGradImage")
     .Input("grads: float")
@@ -1173,30 +596,7 @@ REGISTER_OP("CropAndResizeGradImage")
       TF_RETURN_IF_ERROR(c->WithRank(out, 4, &out));
       c->set_output(0, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes the gradient of the crop_and_resize op wrt the input image tensor.
-
-grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
-boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
-  specifies the coordinates of a box in the `box_ind[i]` image and is specified
-  in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
-  `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
-  `[0, 1]` interval of normalized image height is mapped to
-  `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in
-  which case the sampled crop is an up-down flipped version of the original
-  image. The width dimension is treated similarly. Normalized coordinates
-  outside the `[0, 1]` range are allowed, in which case we use
-  `extrapolation_value` to extrapolate the input image values.
-box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
-  The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
-image_size: A 1-D tensor with value `[batch, image_height, image_width, depth]`
-  containing the original image size. Both `image_height` and `image_width` need
-  to be positive.
-output: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
-method: A string specifying the interpolation method. Only 'bilinear' is
-  supported for now.
-)doc");
+    });
 
 REGISTER_OP("CropAndResizeGradBoxes")
     .Input("grads: float")
@@ -1209,29 +609,7 @@ REGISTER_OP("CropAndResizeGradBoxes")
     .SetShapeFn([](InferenceContext* c) {
       c->set_output(0, c->input(2));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes the gradient of the crop_and_resize op wrt the input boxes tensor.
-
-grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
-image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
-  Both `image_height` and `image_width` need to be positive.
-boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
-  specifies the coordinates of a box in the `box_ind[i]` image and is specified
-  in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
-  `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
-  `[0, 1]` interval of normalized image height is mapped to
-  `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in
-  which case the sampled crop is an up-down flipped version of the original
-  image. The width dimension is treated similarly. Normalized coordinates
-  outside the `[0, 1]` range are allowed, in which case we use
-  `extrapolation_value` to extrapolate the input image values.
-box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
-  The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
-output: A 2-D tensor of shape `[num_boxes, 4]`.
-method: A string specifying the interpolation method. Only 'bilinear' is
-  supported for now.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 
@@ -1244,35 +622,7 @@ REGISTER_OP("NonMaxSuppression")
     .SetShapeFn([](InferenceContext* c) {
       c->set_output(0, c->Vector(c->UnknownDim()));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Greedily selects a subset of bounding boxes in descending order of score,
-pruning away boxes that have high intersection-over-union (IOU) overlap
-with previously selected boxes.  Bounding boxes are supplied as
-[y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
-diagonal pair of box corners and the coordinates can be provided as normalized
-(i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
-is agnostic to where the origin is in the coordinate system.  Note that this
-algorithm is invariant to orthogonal transformations and translations
-of the coordinate system; thus translating or reflections of the coordinate
-system result in the same boxes being selected by the algorithm.
-The output of this operation is a set of integers indexing into the input
-collection of bounding boxes representing the selected boxes.  The bounding
-box coordinates corresponding to the selected indices can then be obtained
-using the `tf.gather operation`.  For example:
-  selected_indices = tf.image.non_max_suppression(
-      boxes, scores, max_output_size, iou_threshold)
-  selected_boxes = tf.gather(boxes, selected_indices)
-boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
-scores: A 1-D float tensor of shape `[num_boxes]` representing a single
-  score corresponding to each box (each row of boxes).
-max_output_size: A scalar integer tensor representing the maximum number of
-  boxes to be selected by non max suppression.
-iou_threshold: A float representing the threshold for deciding whether boxes
-  overlap too much with respect to IOU.
-selected_indices: A 1-D integer tensor of shape `[M]` representing the selected
-  indices from the boxes tensor, where `M <= max_output_size`.
-)doc");
+    });
 
 REGISTER_OP("NonMaxSuppressionV2")
     .Input("boxes: float")
@@ -1283,37 +633,6 @@ REGISTER_OP("NonMaxSuppressionV2")
     .SetShapeFn([](InferenceContext* c) {
       c->set_output(0, c->Vector(c->UnknownDim()));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Greedily selects a subset of bounding boxes in descending order of score,
-pruning away boxes that have high intersection-over-union (IOU) overlap
-with previously selected boxes.  Bounding boxes are supplied as
-[y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
-diagonal pair of box corners and the coordinates can be provided as normalized
-(i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
-is agnostic to where the origin is in the coordinate system.  Note that this
-algorithm is invariant to orthogonal transformations and translations
-of the coordinate system; thus translating or reflections of the coordinate
-system result in the same boxes being selected by the algorithm.
-
-The output of this operation is a set of integers indexing into the input
-collection of bounding boxes representing the selected boxes.  The bounding
-box coordinates corresponding to the selected indices can then be obtained
-using the `tf.gather operation`.  For example:
-
-  selected_indices = tf.image.non_max_suppression_v2(
-      boxes, scores, max_output_size, iou_threshold)
-  selected_boxes = tf.gather(boxes, selected_indices)
-
-boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
-scores: A 1-D float tensor of shape `[num_boxes]` representing a single
-  score corresponding to each box (each row of boxes).
-max_output_size: A scalar integer tensor representing the maximum number of
-  boxes to be selected by non max suppression.
-iou_threshold: A 0-D float tensor representing the threshold for deciding whether
-  boxes overlap too much with respect to IOU.
-selected_indices: A 1-D integer tensor of shape `[M]` representing the selected
-  indices from the boxes tensor, where `M <= max_output_size`.
-)doc");
+    });
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/io_ops.cc b/tensorflow/core/ops/io_ops.cc
index 082d18c1d5fef539a144cea5285cdb90d661f62f..21f0d02ff27924c9361eafcbb545e394d47c7308 100644
--- a/tensorflow/core/ops/io_ops.cc
+++ b/tensorflow/core/ops/io_ops.cc
@@ -81,21 +81,7 @@ REGISTER_OP("SaveV2")
       // TODO(mrry): Attempt to parse the shapes_and_slices values and use
       // them to constrain the shape of the remaining inputs.
       return Status::OK();
-    })
-    .Doc(R"doc(
-Saves tensors in V2 checkpoint format.
-
-By default, saves the named tensors in full.  If the caller wishes to save
-specific slices of full tensors, "shape_and_slices" should be non-empty strings
-and correspondingly well-formed.
-
-prefix: Must have a single element. The prefix of the V2 checkpoint to which we
-  write the tensors.
-tensor_names: shape {N}. The names of the tensors to be saved.
-shape_and_slices: shape {N}.  The slice specs of the tensors to be saved.
-  Empty strings indicate that they are non-partitioned tensors.
-tensors: `N` tensors to save.
-)doc");
+    });
 
 REGISTER_OP("RestoreV2")
     .Input("prefix: string")
@@ -141,33 +127,7 @@ REGISTER_OP("RestoreV2")
       } else {
         return UnknownShape(c);
       }
-    })
-    .Doc(R"doc(
-Restores tensors from a V2 checkpoint.
-
-For backward compatibility with the V1 format, this Op currently allows
-restoring from a V1 checkpoint as well:
-  - This Op first attempts to find the V2 index file pointed to by "prefix", and
-    if found proceed to read it as a V2 checkpoint;
-  - Otherwise the V1 read path is invoked.
-Relying on this behavior is not recommended, as the ability to fall back to read
-V1 might be deprecated and eventually removed.
-
-By default, restores the named tensors in full.  If the caller wishes to restore
-specific slices of stored tensors, "shape_and_slices" should be non-empty
-strings and correspondingly well-formed.
-
-Callers must ensure all the named tensors are indeed stored in the checkpoint.
-
-prefix: Must have a single element.  The prefix of a V2 checkpoint.
-tensor_names: shape {N}.  The names of the tensors to be restored.
-shape_and_slices: shape {N}.  The slice specs of the tensors to be restored.
-  Empty strings indicate that they are non-partitioned tensors.
-dtypes: shape {N}.  The list of expected dtype for the tensors.  Must match
-  those stored in the checkpoint.
-tensors: shape {N}.  The restored tensors, whose shapes are read from the
-  checkpoint directly.
-)doc");
+    });
 
 REGISTER_OP("MergeV2Checkpoints")
     .Input("checkpoint_prefixes: string")
@@ -179,23 +139,7 @@ REGISTER_OP("MergeV2Checkpoints")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &unused));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       return Status::OK();
-    })
-    .Doc(R"doc(
-V2 format specific: merges the metadata files of sharded checkpoints.  The
-result is one logical checkpoint, with one physical metadata file and renamed
-data files.
-
-Intended for "grouping" multiple checkpoints in a sharded checkpoint setup.
-
-If delete_old_dirs is true, attempts to delete recursively the dirname of each
-path in the input checkpoint_prefixes.  This is useful when those paths are non
-user-facing temporary locations.
-
-checkpoint_prefixes: prefixes of V2 checkpoints to merge.
-destination_prefix: scalar.  The desired final prefix.  Allowed to be the same
-  as one of the checkpoint_prefixes.
-delete_old_dirs: see above.
-)doc");
+    });
 
 REGISTER_OP("Save")
     .Input("filename: string")
@@ -217,20 +161,7 @@ REGISTER_OP("Save")
           c->WithValue(c->Dim(s, 0), c->num_inputs() - 2, &unused_dim));
 
       return Status::OK();
-    })
-    .Doc(R"doc(
-Saves the input tensors to disk.
-
-The size of `tensor_names` must match the number of tensors in `data`. `data[i]`
-is written to `filename` with name `tensor_names[i]`.
-
-See also `SaveSlices`.
-
-filename: Must have a single element. The name of the file to which we write
-  the tensor.
-tensor_names: Shape `[N]`. The names of the tensors to be saved.
-data: `N` tensors to save.
-)doc");
+    });
 
 REGISTER_OP("SaveSlices")
     .Input("filename: string")
@@ -256,39 +187,7 @@ REGISTER_OP("SaveSlices")
       // TODO(mrry): Attempt to parse the shapes_and_slices values and use
       // them to constrain the shape of the remaining inputs.
       return Status::OK();
-    })
-    .Doc(R"doc(
-Saves input tensors slices to disk.
-
-This is like `Save` except that tensors can be listed in the saved file as being
-a slice of a larger tensor.  `shapes_and_slices` specifies the shape of the
-larger tensor and the slice that this tensor covers. `shapes_and_slices` must
-have as many elements as `tensor_names`.
-
-Elements of the `shapes_and_slices` input must either be:
-
-*  The empty string, in which case the corresponding tensor is
-   saved normally.
-*  A string of the form `dim0 dim1 ... dimN-1 slice-spec` where the
-   `dimI` are the dimensions of the larger tensor and `slice-spec`
-   specifies what part is covered by the tensor to save.
-
-`slice-spec` itself is a `:`-separated list: `slice0:slice1:...:sliceN-1`
-where each `sliceI` is either:
-
-*  The string `-` meaning that the slice covers all indices of this dimension
-*  `start,length` where `start` and `length` are integers.  In that
-   case the slice covers `length` indices starting at `start`.
-
-See also `Save`.
-
-filename: Must have a single element. The name of the file to which we write the
-  tensor.
-tensor_names: Shape `[N]`. The names of the tensors to be saved.
-shapes_and_slices: Shape `[N]`.  The shapes and slice specifications to use when
-  saving the tensors.
-data: `N` tensors to save.
-)doc");
+    });
 
 REGISTER_OP("Restore")
     .Input("file_pattern: string")
@@ -303,36 +202,7 @@ REGISTER_OP("Restore")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       c->set_output(0, c->UnknownShape());
       return Status::OK();
-    })
-    .Doc(R"doc(
-Restores a tensor from checkpoint files.
-
-Reads a tensor stored in one or several files. If there are several files (for
-instance because a tensor was saved as slices), `file_pattern` may contain
-wildcard symbols (`*` and `?`) in the filename portion only, not in the
-directory portion.
-
-If a `file_pattern` matches several files, `preferred_shard` can be used to hint
-in which file the requested tensor is likely to be found. This op will first
-open the file at index `preferred_shard` in the list of matching files and try
-to restore tensors from that file.  Only if some tensors or tensor slices are
-not found in that first file, then the Op opens all the files. Setting
-`preferred_shard` to match the value passed as the `shard` input
-of a matching `Save` Op may speed up Restore.  This attribute only affects
-performance, not correctness.  The default value -1 means files are processed in
-order.
-
-See also `RestoreSlice`.
-
-file_pattern: Must have a single element. The pattern of the files from
-  which we read the tensor.
-tensor_name: Must have a single element. The name of the tensor to be
-  restored.
-tensor: The restored tensor.
-dt: The type of the tensor to be restored.
-preferred_shard: Index of file to open first if multiple files match
-  `file_pattern`.
-)doc");
+    });
 
 REGISTER_OP("RestoreSlice")
     .Input("file_pattern: string")
@@ -371,48 +241,20 @@ REGISTER_OP("RestoreSlice")
         c->set_output(0, c->UnknownShape());
       }
       return Status::OK();
-    })
-    .Doc(R"doc(
-Restores a tensor from checkpoint files.
-
-This is like `Restore` except that restored tensor can be listed as filling
-only a slice of a larger tensor.  `shape_and_slice` specifies the shape of the
-larger tensor and the slice that the restored tensor covers.
-
-The `shape_and_slice` input has the same format as the
-elements of the `shapes_and_slices` input of the `SaveSlices` op.
-
-file_pattern: Must have a single element. The pattern of the files from
-  which we read the tensor.
-tensor_name: Must have a single element. The name of the tensor to be
-  restored.
-shape_and_slice: Scalar. The shapes and slice specifications to use when
-  restoring a tensors.
-tensor: The restored tensor.
-dt: The type of the tensor to be restored.
-preferred_shard: Index of file to open first if multiple files match
-  `file_pattern`. See the documentation for `Restore`.
-)doc");
+    });
 
 REGISTER_OP("ShardedFilename")
     .Input("basename: string")
     .Input("shard: int32")
     .Input("num_shards: int32")
     .Output("filename: string")
-    .SetShapeFn(ScalarInputsAndOutputs)
-    .Doc(R"doc(
-Generate a sharded filename. The filename is printf formatted as
-   %s-%05d-of-%05d, basename, shard, num_shards.
-)doc");
+    .SetShapeFn(ScalarInputsAndOutputs);
 
 REGISTER_OP("ShardedFilespec")
     .Input("basename: string")
     .Input("num_shards: int32")
     .Output("filename: string")
-    .SetShapeFn(ScalarInputsAndOutputs)
-    .Doc(R"doc(
-Generate a glob pattern matching all sharded file names.
-)doc");
+    .SetShapeFn(ScalarInputsAndOutputs);
 
 // Reader source ops ----------------------------------------------------------
 
@@ -421,38 +263,14 @@ REGISTER_OP("WholeFileReader")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput)
-    .Doc(R"doc(
-A Reader that outputs the entire contents of a file as a value.
-
-To use, enqueue filenames in a Queue.  The output of ReaderRead will
-be a filename (key) and the contents of that file (value).
-
-reader_handle: The handle to reference the Reader.
-container: If non-empty, this reader is placed in the given container.
-        Otherwise, a default container is used.
-shared_name: If non-empty, this reader is named in the given bucket
-             with this shared_name. Otherwise, the node name is used instead.
-)doc");
+    .SetShapeFn(TwoElementOutput);
 
 REGISTER_OP("WholeFileReaderV2")
     .Output("reader_handle: resource")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-A Reader that outputs the entire contents of a file as a value.
-
-To use, enqueue filenames in a Queue.  The output of ReaderRead will
-be a filename (key) and the contents of that file (value).
-
-reader_handle: The handle to reference the Reader.
-container: If non-empty, this reader is placed in the given container.
-        Otherwise, a default container is used.
-shared_name: If non-empty, this reader is named in the given bucket
-             with this shared_name. Otherwise, the node name is used instead.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 // TODO(cwhipkey): mark this deprecated in favor of V2.
 REGISTER_OP("TextLineReader")
@@ -461,17 +279,7 @@ REGISTER_OP("TextLineReader")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput)
-    .Doc(R"doc(
-A Reader that outputs the lines of a file delimited by '\n'.
-
-reader_handle: The handle to reference the Reader.
-skip_header_lines: Number of lines to skip from the beginning of every file.
-container: If non-empty, this reader is placed in the given container.
-        Otherwise, a default container is used.
-shared_name: If non-empty, this reader is named in the given bucket
-             with this shared_name. Otherwise, the node name is used instead.
-)doc");
+    .SetShapeFn(TwoElementOutput);
 
 REGISTER_OP("TextLineReaderV2")
     .Output("reader_handle: resource")
@@ -479,17 +287,7 @@ REGISTER_OP("TextLineReaderV2")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-A Reader that outputs the lines of a file delimited by '\n'.
-
-reader_handle: The handle to reference the Reader.
-skip_header_lines: Number of lines to skip from the beginning of every file.
-container: If non-empty, this reader is placed in the given container.
-        Otherwise, a default container is used.
-shared_name: If non-empty, this reader is named in the given bucket
-             with this shared_name. Otherwise, the node name is used instead.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 // TODO(cwhipkey): mark this deprecated in favor of V2.
 REGISTER_OP("FixedLengthRecordReader")
@@ -501,21 +299,7 @@ REGISTER_OP("FixedLengthRecordReader")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput)
-    .Doc(R"doc(
-A Reader that outputs fixed-length records from a file.
-
-reader_handle: The handle to reference the Reader.
-header_bytes: Number of bytes in the header, defaults to 0.
-record_bytes: Number of bytes in the record.
-footer_bytes: Number of bytes in the footer, defaults to 0.
-hop_bytes: Number of bytes to hop before each read. Default of 0 means using
-        record_bytes.
-container: If non-empty, this reader is placed in the given container.
-        Otherwise, a default container is used.
-shared_name: If non-empty, this reader is named in the given bucket
-             with this shared_name. Otherwise, the node name is used instead.
-)doc");
+    .SetShapeFn(TwoElementOutput);
 
 REGISTER_OP("FixedLengthRecordReaderV2")
     .Output("reader_handle: resource")
@@ -527,23 +311,7 @@ REGISTER_OP("FixedLengthRecordReaderV2")
     .Attr("shared_name: string = ''")
     .Attr("encoding: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-A Reader that outputs fixed-length records from a file.
-
-reader_handle: The handle to reference the Reader.
-header_bytes: Number of bytes in the header, defaults to 0.
-record_bytes: Number of bytes in the record.
-footer_bytes: Number of bytes in the footer, defaults to 0.
-hop_bytes: Number of bytes to hop before each read. Default of 0 means using
-        record_bytes.
-container: If non-empty, this reader is placed in the given container.
-        Otherwise, a default container is used.
-shared_name: If non-empty, this reader is named in the given bucket
-             with this shared_name. Otherwise, the node name is used instead.
-encoding: The type of encoding for the file. Currently ZLIB and GZIP
-        are supported. Defaults to none.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 // TODO(cwhipkey): mark this deprecated in favor of V2.
 REGISTER_OP("TFRecordReader")
@@ -552,16 +320,7 @@ REGISTER_OP("TFRecordReader")
     .Attr("shared_name: string = ''")
     .Attr("compression_type: string = ''")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput)
-    .Doc(R"doc(
-A Reader that outputs the records from a TensorFlow Records file.
-
-reader_handle: The handle to reference the Reader.
-container: If non-empty, this reader is placed in the given container.
-        Otherwise, a default container is used.
-shared_name: If non-empty, this reader is named in the given bucket
-             with this shared_name. Otherwise, the node name is used instead.
-)doc");
+    .SetShapeFn(TwoElementOutput);
 
 REGISTER_OP("TFRecordReaderV2")
     .Output("reader_handle: resource")
@@ -569,31 +328,14 @@ REGISTER_OP("TFRecordReaderV2")
     .Attr("shared_name: string = ''")
     .Attr("compression_type: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-A Reader that outputs the records from a TensorFlow Records file.
-
-reader_handle: The handle to reference the Reader.
-container: If non-empty, this reader is placed in the given container.
-        Otherwise, a default container is used.
-shared_name: If non-empty, this reader is named in the given bucket
-             with this shared_name. Otherwise, the node name is used instead.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("LMDBReader")
     .Output("reader_handle: Ref(string)")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput)
-    .Doc(R"doc(
-A Reader that outputs the records from a LMDB file.
-reader_handle: The handle to reference the Reader.
-container: If non-empty, this reader is placed in the given container.
-        Otherwise, a default container is used.
-shared_name: If non-empty, this reader is named in the given bucket
-             with this shared_name. Otherwise, the node name is used instead.
-)doc");
+    .SetShapeFn(TwoElementOutput);
 
 // TODO(cwhipkey): mark this deprecated in favor of V2.
 REGISTER_OP("IdentityReader")
@@ -601,38 +343,14 @@ REGISTER_OP("IdentityReader")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput)
-    .Doc(R"doc(
-A Reader that outputs the queued work as both the key and value.
-
-To use, enqueue strings in a Queue.  ReaderRead will take the front
-work string and output (work, work).
-
-reader_handle: The handle to reference the Reader.
-container: If non-empty, this reader is placed in the given container.
-        Otherwise, a default container is used.
-shared_name: If non-empty, this reader is named in the given bucket
-             with this shared_name. Otherwise, the node name is used instead.
-)doc");
+    .SetShapeFn(TwoElementOutput);
 
 REGISTER_OP("IdentityReaderV2")
     .Output("reader_handle: resource")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-A Reader that outputs the queued work as both the key and value.
-
-To use, enqueue strings in a Queue.  ReaderRead will take the front
-work string and output (work, work).
-
-reader_handle: The handle to reference the Reader.
-container: If non-empty, this reader is placed in the given container.
-        Otherwise, a default container is used.
-shared_name: If non-empty, this reader is named in the given bucket
-             with this shared_name. Otherwise, the node name is used instead.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 // Ops that operate on Readers ------------------------------------------------
 
@@ -641,38 +359,14 @@ REGISTER_OP("ReaderRead")
     .Input("queue_handle: Ref(string)")
     .Output("key: string")
     .Output("value: string")
-    .SetShapeFn(TwoElementVectorAndScalarOutputs)
-    .Doc(R"doc(
-Returns the next record (key, value pair) produced by a Reader.
-
-Will dequeue from the input queue if necessary (e.g. when the
-Reader needs to start reading from a new file since it has finished
-with the previous file).
-
-reader_handle: Handle to a Reader.
-queue_handle: Handle to a Queue, with string work items.
-key: A scalar.
-value: A scalar.
-)doc");
+    .SetShapeFn(TwoElementVectorAndScalarOutputs);
 
 REGISTER_OP("ReaderReadV2")
     .Input("reader_handle: resource")
     .Input("queue_handle: resource")
     .Output("key: string")
     .Output("value: string")
-    .SetShapeFn(ScalarInputsAndOutputs)
-    .Doc(R"doc(
-Returns the next record (key, value pair) produced by a Reader.
-
-Will dequeue from the input queue if necessary (e.g. when the
-Reader needs to start reading from a new file since it has finished
-with the previous file).
-
-reader_handle: Handle to a Reader.
-queue_handle: Handle to a Queue, with string work items.
-key: A scalar.
-value: A scalar.
-)doc");
+    .SetShapeFn(ScalarInputsAndOutputs);
 
 REGISTER_OP("ReaderReadUpTo")
     .Input("reader_handle: Ref(string)")
@@ -689,21 +383,7 @@ REGISTER_OP("ReaderReadUpTo")
       c->set_output(0, out);
       c->set_output(1, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Returns up to `num_records` (key, value) pairs produced by a Reader.
-
-Will dequeue from the input queue if necessary (e.g. when the
-Reader needs to start reading from a new file since it has finished
-with the previous file).
-It may return less than `num_records` even before the last batch.
-
-reader_handle: Handle to a `Reader`.
-queue_handle: Handle to a `Queue`, with string work items.
-num_records: number of records to read from `Reader`.
-keys: A 1-D tensor.
-values: A 1-D tensor.
-)doc");
+    });
 
 REGISTER_OP("ReaderReadUpToV2")
     .Input("reader_handle: resource")
@@ -720,93 +400,37 @@ REGISTER_OP("ReaderReadUpToV2")
       c->set_output(0, out);
       c->set_output(1, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Returns up to `num_records` (key, value) pairs produced by a Reader.
-
-Will dequeue from the input queue if necessary (e.g. when the
-Reader needs to start reading from a new file since it has finished
-with the previous file).
-It may return less than `num_records` even before the last batch.
-
-reader_handle: Handle to a `Reader`.
-queue_handle: Handle to a `Queue`, with string work items.
-num_records: number of records to read from `Reader`.
-keys: A 1-D tensor.
-values: A 1-D tensor.
-)doc");
+    });
 
 REGISTER_OP("ReaderNumRecordsProduced")
     .Input("reader_handle: Ref(string)")
     .Output("records_produced: int64")
-    .SetShapeFn(TwoElementVectorAndScalarOutputs)
-    .Doc(R"doc(
-Returns the number of records this Reader has produced.
-
-This is the same as the number of ReaderRead executions that have
-succeeded.
-
-reader_handle: Handle to a Reader.
-)doc");
+    .SetShapeFn(TwoElementVectorAndScalarOutputs);
 
 REGISTER_OP("ReaderNumRecordsProducedV2")
     .Input("reader_handle: resource")
     .Output("records_produced: int64")
-    .SetShapeFn(ScalarInputsAndOutputs)
-    .Doc(R"doc(
-Returns the number of records this Reader has produced.
-
-This is the same as the number of ReaderRead executions that have
-succeeded.
-
-reader_handle: Handle to a Reader.
-)doc");
+    .SetShapeFn(ScalarInputsAndOutputs);
 
 REGISTER_OP("ReaderNumWorkUnitsCompleted")
     .Input("reader_handle: Ref(string)")
     .Output("units_completed: int64")
-    .SetShapeFn(TwoElementVectorAndScalarOutputs)
-    .Doc(R"doc(
-Returns the number of work units this Reader has finished processing.
-
-reader_handle: Handle to a Reader.
-)doc");
+    .SetShapeFn(TwoElementVectorAndScalarOutputs);
 
 REGISTER_OP("ReaderNumWorkUnitsCompletedV2")
     .Input("reader_handle: resource")
     .Output("units_completed: int64")
-    .SetShapeFn(ScalarInputsAndOutputs)
-    .Doc(R"doc(
-Returns the number of work units this Reader has finished processing.
-
-reader_handle: Handle to a Reader.
-)doc");
+    .SetShapeFn(ScalarInputsAndOutputs);
 
 REGISTER_OP("ReaderSerializeState")
     .Input("reader_handle: Ref(string)")
     .Output("state: string")
-    .SetShapeFn(TwoElementVectorAndScalarOutputs)
-    .Doc(R"doc(
-Produce a string tensor that encodes the state of a Reader.
-
-Not all Readers support being serialized, so this can produce an
-Unimplemented error.
-
-reader_handle: Handle to a Reader.
-)doc");
+    .SetShapeFn(TwoElementVectorAndScalarOutputs);
 
 REGISTER_OP("ReaderSerializeStateV2")
     .Input("reader_handle: resource")
     .Output("state: string")
-    .SetShapeFn(ScalarInputsAndOutputs)
-    .Doc(R"doc(
-Produce a string tensor that encodes the state of a Reader.
-
-Not all Readers support being serialized, so this can produce an
-Unimplemented error.
-
-reader_handle: Handle to a Reader.
-)doc");
+    .SetShapeFn(ScalarInputsAndOutputs);
 
 REGISTER_OP("ReaderRestoreState")
     .Input("reader_handle: Ref(string)")
@@ -820,17 +444,7 @@ REGISTER_OP("ReaderRestoreState")
 
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Restore a reader to a previously saved state.
-
-Not all Readers support being restored, so this can produce an
-Unimplemented error.
-
-reader_handle: Handle to a Reader.
-state: Result of a ReaderSerializeState of a Reader with type
-  matching reader_handle.
-)doc");
+    });
 
 REGISTER_OP("ReaderRestoreStateV2")
     .Input("reader_handle: resource")
@@ -840,45 +454,22 @@ REGISTER_OP("ReaderRestoreStateV2")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Restore a reader to a previously saved state.
-
-Not all Readers support being restored, so this can produce an
-Unimplemented error.
-
-reader_handle: Handle to a Reader.
-state: Result of a ReaderSerializeState of a Reader with type
-  matching reader_handle.
-)doc");
+    });
 
 REGISTER_OP("ReaderReset")
     .Input("reader_handle: Ref(string)")
-    .SetShapeFn(TwoElementVectorAndScalarOutputs)
-    .Doc(R"doc(
-Restore a Reader to its initial clean state.
-
-reader_handle: Handle to a Reader.
-)doc");
+    .SetShapeFn(TwoElementVectorAndScalarOutputs);
 
 REGISTER_OP("ReaderResetV2")
     .Input("reader_handle: resource")
-    .SetShapeFn(ScalarInputsAndOutputs)
-    .Doc(R"doc(
-Restore a Reader to its initial clean state.
-
-reader_handle: Handle to a Reader.
-)doc");
+    .SetShapeFn(ScalarInputsAndOutputs);
 
 // Other input Ops ----------------------------------------------------------
 
 REGISTER_OP("ReadFile")
     .Input("filename: string")
     .Output("contents: string")
-    .SetShapeFn(ScalarInputsAndOutputs)
-    .Doc(R"doc(
-Reads and outputs the entire contents of the input filename.
-)doc");
+    .SetShapeFn(ScalarInputsAndOutputs);
 
 REGISTER_OP("WriteFile")
     .Input("filename: string")
@@ -888,14 +479,7 @@ REGISTER_OP("WriteFile")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused));
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Writes contents to the file at input filename. Creates file and recursively
-creates directory if not existing.
-
-filename: scalar. The name of the file to which we write the contents.
-contents: scalar. The content to be written to the output file.
-)doc");
+    });
 
 REGISTER_OP("MatchingFiles")
     .Input("pattern: string")
@@ -905,15 +489,6 @@ REGISTER_OP("MatchingFiles")
       TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(0), 1, &unused));
       c->set_output(0, c->Vector(InferenceContext::kUnknownDim));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Returns the set of files matching one or more glob patterns.
-
-Note that this routine only supports wildcard characters in the
-basename portion of the pattern, not in the directory portion.
-
-pattern: Shell wildcard pattern(s). Scalar or vector of type string.
-filenames: A vector of matching filenames.
-)doc");
+    });
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/linalg_ops.cc b/tensorflow/core/ops/linalg_ops.cc
index 53e2360d2321a21c658f5abb87bfbc78e2564f26..f37f79ddbf9614e9fcd128e8d23f71c0f354add2 100644
--- a/tensorflow/core/ops/linalg_ops.cc
+++ b/tensorflow/core/ops/linalg_ops.cc
@@ -202,17 +202,7 @@ REGISTER_OP("MatrixDeterminant")
       TF_RETURN_IF_ERROR(c->Subshape(input, 0, -2, &out));
       c->set_output(0, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes the determinant of one or more square matrices.
-
-The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-form square matrices. The output is a tensor containing the determinants
-for all input submatrices `[..., :, :]`.
-
-input: Shape is `[..., M, M]`.
-output: Shape is `[...]`.
-)doc");
+    });
 
 REGISTER_OP("LogMatrixDeterminant")
     .Input("input: T")
@@ -235,126 +225,39 @@ REGISTER_OP("LogMatrixDeterminant")
       TF_RETURN_IF_ERROR(c->Subshape(input, 0, -2, &out));
       c->set_output(1, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes the sign and the log of the absolute value of the determinant of
-one or more square matrices.
-
-The input is a tensor of shape `[N, M, M]` whose inner-most 2 dimensions
-form square matrices. The outputs are two tensors containing the signs and
-absolute values of the log determinants for all N input submatrices
-`[..., :, :]` such that the determinant = sign*exp(log_abs_determinant).
-The log_abs_determinant is computed as det(P)*sum(log(diag(LU))) where LU
-is the LU decomposition of the input and P is the corresponding
-permutation matrix.
-
-input: Shape is `[N, M, M]`.
-sign: The signs of the log determinants of the inputs. Shape is `[N]`.
-log_abs_determinant: The logs of the absolute values of the determinants
-of the N input matrices.  Shape is `[N]`.
-)doc");
+    });
 
 REGISTER_OP("MatrixInverse")
     .Input("input: T")
     .Output("output: T")
     .Attr("adjoint: bool = False")
     .Attr("T: {double, float, complex64, complex128}")
-    .SetShapeFn(BatchUnchangedSquareShapeFn)
-    .Doc(R"doc(
-Computes the inverse of one or more square invertible matrices or their
-adjoints (conjugate transposes).
-
-The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-form square matrices. The output is a tensor of the same shape as the input
-containing the inverse for all input submatrices `[..., :, :]`.
-
-The op uses LU decomposition with partial pivoting to compute the inverses.
-
-If a matrix is not invertible there is no guarantee what the op does. It
-may detect the condition and raise an exception or it may simply return a
-garbage result.
-
-input: Shape is `[..., M, M]`.
-output: Shape is `[..., M, M]`.
-
-@compatibility(numpy)
-Equivalent to np.linalg.inv
-@end_compatibility
-)doc");
+    .SetShapeFn(BatchUnchangedSquareShapeFn);
 
 REGISTER_OP("MatrixExponential")
     .Input("input: T")
     .Output("output: T")
     .Attr("T: {double, float, complex64, complex128}")
-    .SetShapeFn(BatchUnchangedSquareShapeFn)
-    .Doc(R"doc(
-Computes the matrix exponential of one or more square matrices:
-
-exp(A) = \sum_{n=0}^\infty A^n/n!
-
-The exponential is computed using a combination of the scaling and squaring
-method and the Pade approximation. Details can be founds in:
-Nicholas J. Higham, "The scaling and squaring method for the matrix exponential
-revisited," SIAM J. Matrix Anal. Applic., 26:1179-1193, 2005.
-
-The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-form square matrices. The output is a tensor of the same shape as the input
-containing the exponential for all input submatrices `[..., :, :]`.
-
-input: Shape is `[..., M, M]`.
-output: Shape is `[..., M, M]`.
+    .SetShapeFn(BatchUnchangedSquareShapeFn);
 
-@compatibility(scipy)
-Equivalent to scipy.linalg.expm
-@end_compatibility
-)doc");
+REGISTER_OP("MatrixLogarithm")
+    .Input("input: T")
+    .Output("output: T")
+    .Attr("T: {complex64, complex128}")
+    .SetShapeFn(BatchUnchangedSquareShapeFn);
 
 REGISTER_OP("Cholesky")
     .Input("input: T")
     .Output("output: T")
     .Attr("T: {double, float, complex64, complex128}")
-    .SetShapeFn(BatchUnchangedSquareShapeFn)
-    .Doc(R"doc(
-Computes the Cholesky decomposition of one or more square matrices.
-
-The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-form square matrices.
-
-The input has to be symmetric and positive definite. Only the lower-triangular
-part of the input will be used for this operation. The upper-triangular part
-will not be read.
-
-The output is a tensor of the same shape as the input
-containing the Cholesky decompositions for all input submatrices `[..., :, :]`.
-
-**Note**: The gradient computation on GPU is faster for large matrices but
-not for large batch dimensions when the submatrices are small. In this
-case it might be faster to use the CPU.
-
-input: Shape is `[..., M, M]`.
-output: Shape is `[..., M, M]`.
-)doc");
+    .SetShapeFn(BatchUnchangedSquareShapeFn);
 
 REGISTER_OP("CholeskyGrad")
     .Input("l: T")
     .Input("grad: T")
     .Output("output: T")
     .Attr("T: {float, double}")
-    .SetShapeFn(BatchUnchangedSquareShapeFn)
-    .Doc(R"doc(
-Computes the reverse mode backpropagated gradient of the Cholesky algorithm.
-
-For an explanation see "Differentiation of the Cholesky algorithm" by
-Iain Murray http://arxiv.org/abs/1602.07527.
-
-l: Output of batch Cholesky algorithm l = cholesky(A). Shape is `[..., M, M]`.
-  Algorithm depends only on lower triangular part of the innermost matrices of
-  this tensor.
-grad: df/dl where f is some scalar function. Shape is `[..., M, M]`.
-  Algorithm depends only on lower triangular part of the innermost matrices of
-  this tensor.
-output: Symmetrized version of df/dA . Shape is `[..., M, M]`
-)doc");
+    .SetShapeFn(BatchUnchangedSquareShapeFn);
 
 REGISTER_OP("SelfAdjointEig")
     .Input("input: T")
@@ -374,20 +277,7 @@ REGISTER_OP("SelfAdjointEig")
       TF_RETURN_IF_ERROR(c->Concatenate(s, c->Matrix(d_plus_1, d), &s));
       c->set_output(0, s);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes the Eigen Decomposition of a batch of square self-adjoint matrices.
-
-The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-form square matrices, with the same constraints as the single matrix
-SelfAdjointEig.
-
-The result is a [..., M+1, M] matrix with [..., 0,:] containing the
-eigenvalues, and subsequent [...,1:, :] containing the eigenvectors.
-
-input: Shape is `[..., M, M]`.
-output: Shape is `[..., M+1, M]`.
-)doc");
+    });
 
 REGISTER_OP("SelfAdjointEigV2")
     .Input("input: T")
@@ -395,27 +285,7 @@ REGISTER_OP("SelfAdjointEigV2")
     .Output("v: T")
     .Attr("compute_v: bool = True")
     .Attr("T: {double, float, complex64, complex128}")
-    .SetShapeFn(SelfAdjointEigV2ShapeFn)
-    .Doc(R"doc(
-Computes the eigen decomposition of one or more square self-adjoint matrices.
-
-Computes the eigenvalues and (optionally) eigenvectors of each inner matrix in
-`input` such that `input[..., :, :] = v[..., :, :] * diag(e[..., :])`.
-
-```python
-# a is a tensor.
-# e is a tensor of eigenvalues.
-# v is a tensor of eigenvectors.
-e, v = self_adjoint_eig(a)
-e = self_adjoint_eig(a, compute_v=False)
-```
-
-input: `Tensor` input of shape `[N, N]`.
-compute_v: If `True` then eigenvectors will be computed and returned in `v`.
-  Otherwise, only the eigenvalues will be computed.
-e: Eigenvalues. Shape is `[N]`.
-v: Eigenvectors. Shape is `[N, N]`.
-)doc");
+    .SetShapeFn(SelfAdjointEigV2ShapeFn);
 
 REGISTER_OP("MatrixSolve")
     .Input("matrix: T")
@@ -425,23 +295,7 @@ REGISTER_OP("MatrixSolve")
     .Attr("T: {double, float, complex64, complex128}")
     .SetShapeFn([](InferenceContext* c) {
       return MatrixSolveShapeFn(c, true /* square (*/);
-    })
-    .Doc(R"doc(
-Solves systems of linear equations.
-
-`Matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-form square matrices. `Rhs` is a tensor of shape `[..., M, K]`. The `output` is
-a tensor shape `[..., M, K]`.  If `adjoint` is `False` then each output matrix
-satisfies `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`.
-If `adjoint` is `True` then each output matrix satisfies
-`adjoint(matrix[..., :, :]) * output[..., :, :] = rhs[..., :, :]`.
-
-matrix: Shape is `[..., M, M]`.
-rhs: Shape is `[..., M, K]`.
-output: Shape is `[..., M, K]`.
-adjoint: Boolean indicating whether to solve with `matrix` or its (block-wise)
-         adjoint.
-)doc");
+    });
 
 REGISTER_OP("MatrixTriangularSolve")
     .Input("matrix: T")
@@ -452,37 +306,7 @@ REGISTER_OP("MatrixTriangularSolve")
     .Attr("T: {double, float, complex64, complex128}")
     .SetShapeFn([](InferenceContext* c) {
       return MatrixSolveShapeFn(c, true /* square (*/);
-    })
-    .Doc(R"doc(
-Solves systems of linear equations with upper or lower triangular matrices by
-backsubstitution.
-
-`matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions form
-square matrices. If `lower` is `True` then the strictly upper triangular part
-of each inner-most matrix is assumed to be zero and not accessed.
-If `lower` is False then the strictly lower triangular part of each inner-most
-matrix is assumed to be zero and not accessed.
-`rhs` is a tensor of shape `[..., M, K]`.
-
-The output is a tensor of shape `[..., M, K]`. If `adjoint` is
-`True` then the innermost matrices in `output` satisfy matrix equations
-`matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`.
-If `adjoint` is `False` then the strictly then the  innermost matrices in
-`output` satisfy matrix equations
-`adjoint(matrix[..., i, k]) * output[..., k, j] = rhs[..., i, j]`.
-
-matrix: Shape is `[..., M, M]`.
-rhs: Shape is `[..., M, K]`.
-output: Shape is `[..., M, K]`.
-lower: Boolean indicating whether the innermost matrices in `matrix` are
-       lower or upper triangular.
-adjoint: Boolean indicating whether to solve with `matrix` or its (block-wise)
-         adjoint.
-
-@compatibility(numpy)
-Equivalent to np.linalg.triangular_solve
-@end_compatibility
-)doc");
+    });
 
 REGISTER_OP("MatrixSolveLs")
     .Input("matrix: T")
@@ -495,54 +319,7 @@ REGISTER_OP("MatrixSolveLs")
       ShapeHandle l2_regularizer;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &l2_regularizer));
       return MatrixSolveShapeFn(c, false /* square */);
-    })
-    .Doc(R"doc(
-Solves one or more linear least-squares problems.
-
-`matrix` is a tensor of shape `[..., M, N]` whose inner-most 2 dimensions
-form real or complex matrices of size `[M, N]`. `Rhs` is a tensor of the same
-type as `matrix` and shape `[..., M, K]`.
-The output is a tensor shape `[..., N, K]` where each output matrix solves
-each of the equations
-`matrix[..., :, :]` * `output[..., :, :]` = `rhs[..., :, :]`
-in the least squares sense.
-
-We use the following notation for (complex) matrix and right-hand sides
-in the batch:
-
-`matrix`=\\(A \in \mathbb{C}^{m \times n}\\),
-`rhs`=\\(B  \in \mathbb{C}^{m \times k}\\),
-`output`=\\(X  \in \mathbb{C}^{n \times k}\\),
-`l2_regularizer`=\\(\lambda \in \mathbb{R}\\).
-
-If `fast` is `True`, then the solution is computed by solving the normal
-equations using Cholesky decomposition. Specifically, if \\(m \ge n\\) then
-\\(X = (A^H A + \lambda I)^{-1} A^H B\\), which solves the least-squares
-problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 +
-\lambda ||Z||_F^2\\). If \\(m \lt n\\) then `output` is computed as
-\\(X = A^H (A A^H + \lambda I)^{-1} B\\), which (for \\(\lambda = 0\\)) is the
-minimum-norm solution to the under-determined linear system, i.e.
-\\(X = \mathrm{argmin}_{Z \in \mathbb{C}^{n \times k} } ||Z||_F^2 \\),
-subject to \\(A Z = B\\). Notice that the fast path is only numerically stable
-when \\(A\\) is numerically full rank and has a condition number
-\\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach} } }\\) or\\(\lambda\\) is
-sufficiently large.
-
-If `fast` is `False` an algorithm based on the numerically robust complete
-orthogonal decomposition is used. This computes the minimum-norm
-least-squares solution, even when \\(A\\) is rank deficient. This path is
-typically 6-7 times slower than the fast path. If `fast` is `False` then
-`l2_regularizer` is ignored.
-
-matrix: Shape is `[..., M, N]`.
-rhs: Shape is `[..., M, K]`.
-output: Shape is `[..., N, K]`.
-l2_regularizer: Scalar tensor.
-
-@compatibility(numpy)
-Equivalent to np.linalg.lstsq
-@end_compatibility
-)doc");
+    });
 
 REGISTER_OP("Qr")
     .Input("input: T")
@@ -550,31 +327,7 @@ REGISTER_OP("Qr")
     .Output("r: T")
     .Attr("full_matrices: bool = False")
     .Attr("T: {double, float, complex64, complex128}")
-    .SetShapeFn(QrShapeFn)
-    .Doc(R"doc(
-Computes the QR decompositions of one or more matrices.
-
-Computes the QR decomposition of each inner matrix in `tensor` such that
-`tensor[..., :, :] = q[..., :, :] * r[..., :,:])`
-
-```python
-# a is a tensor.
-# q is a tensor of orthonormal matrices.
-# r is a tensor of upper triangular matrices.
-q, r = qr(a)
-q_full, r_full = qr(a, full_matrices=True)
-```
-
-input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions
-  form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`.
-q: Orthonormal basis for range of `a`. If `full_matrices` is `False` then
-  shape is `[..., M, P]`; if `full_matrices` is `True` then shape is
-  `[..., M, M]`.
-r: Triangular factor. If `full_matrices` is `False` then shape is
-  `[..., P, N]`. If `full_matrices` is `True` then shape is `[..., M, N]`.
-full_matrices: If true, compute full-sized `q` and `r`. If false
-  (the default), compute only the leading `P` columns of `q`.
-)doc");
+    .SetShapeFn(QrShapeFn);
 
 REGISTER_OP("Svd")
     .Input("input: T")
@@ -584,38 +337,7 @@ REGISTER_OP("Svd")
     .Attr("compute_uv: bool = True")
     .Attr("full_matrices: bool = False")
     .Attr("T: {double, float, complex64, complex128}")
-    .SetShapeFn(SvdShapeFn)
-    .Doc(R"doc(
-Computes the singular value decompositions of one or more matrices.
-
-Computes the SVD of each inner matrix in `input` such that
-`input[..., :, :] = u[..., :, :] * diag(s[..., :, :]) * transpose(v[..., :, :])`
-
-```python
-# a is a tensor containing a batch of matrices.
-# s is a tensor of singular values for each matrix.
-# u is the tensor containing of left singular vectors for each matrix.
-# v is the tensor containing of right singular vectors for each matrix.
-s, u, v = svd(a)
-s, _, _ = svd(a, compute_uv=False)
-```
-
-input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions
-  form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`.
-s: Singular values. Shape is `[..., P]`.
-u: Left singular vectors. If `full_matrices` is `False` then shape is
-  `[..., M, P]`; if `full_matrices` is `True` then shape is
-  `[..., M, M]`. Undefined if `compute_uv` is `False`.
-v: Left singular vectors. If `full_matrices` is `False` then shape is
-  `[..., N, P]`. If `full_matrices` is `True` then shape is `[..., N, N]`.
-  Undefined if `compute_uv` is false.
-compute_uv: If true, left and right singular vectors will be
-  computed and returned in `u` and `v`, respectively.
-  If false, `u` and `v` are not set and should never referenced.
-full_matrices: If true, compute full-sized `u` and `v`. If false
-  (the default), compute only the leading `P` singular vectors.
-  Ignored if `compute_uv` is `False`.
-)doc");
+    .SetShapeFn(SvdShapeFn);
 
 // Deprecated op registrations:
 
diff --git a/tensorflow/core/ops/list_ops.cc b/tensorflow/core/ops/list_ops.cc
new file mode 100644
index 0000000000000000000000000000000000000000..db534857720f5cc611a9b092a628a89b649d3783
--- /dev/null
+++ b/tensorflow/core/ops/list_ops.cc
@@ -0,0 +1,180 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+namespace {
+
+REGISTER_OP("EmptyTensorList")
+    .Input("element_shape: shape_type")
+    .Output("handle: variant")
+    .Attr("element_dtype: type")
+    .Attr("shape_type: {int32, int64}")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      c->set_output(0, c->Scalar());
+      DataType t;
+      TF_RETURN_IF_ERROR(c->GetAttr("element_dtype", &t));
+      shape_inference::ShapeHandle s;
+      TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s));
+      c->set_output_handle_shapes_and_types(
+          0, std::vector<shape_inference::ShapeAndType>{{s, t}});
+      return Status::OK();
+    });
+
+REGISTER_OP("TensorListPushBack")
+    .Input("input_handle: variant")
+    .Input("tensor: element_dtype")
+    .Output("output_handle: variant")
+    .Attr("element_dtype: type")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      c->set_output(0, c->Scalar());
+      DataType t;
+      TF_RETURN_IF_ERROR(c->GetAttr("element_dtype", &t));
+      shape_inference::ShapeHandle s = c->UnknownShape();
+
+      auto* handle_data = c->input_handle_shapes_and_types(0);
+      if (handle_data != nullptr && handle_data->size() != 1) {
+        return errors::InvalidArgument(
+            "Trying to push to list with wrong variant data.");
+      }
+      if (handle_data != nullptr) {
+        const shape_inference::ShapeAndType& list_shape_type =
+            (*handle_data)[0];
+        if (list_shape_type.dtype != t) {
+          return errors::InvalidArgument(
+              "Trying to push to list with wrong element dtype. List has type ",
+              DataTypeString(list_shape_type.dtype),
+              " but trying to push element with type ", DataTypeString(t));
+        }
+        shape_inference::ShapeHandle ignored;
+        TF_RETURN_IF_ERROR(c->Merge(s, list_shape_type.shape, &ignored));
+        s = list_shape_type.shape;
+      }
+      c->set_output_handle_shapes_and_types(
+          0, std::vector<shape_inference::ShapeAndType>{{s, t}});
+      return Status::OK();
+    });
+
+REGISTER_OP("TensorListLength")
+    .Input("input_handle: variant")
+    .Output("length: int32")
+    .SetShapeFn(shape_inference::ScalarShape);
+
+REGISTER_OP("TensorListPopBack")
+    .Input("input_handle: variant")
+    .Output("output_handle: variant")
+    .Output("tensor: element_dtype")
+    .Attr("element_dtype: type")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      DataType t;
+      TF_RETURN_IF_ERROR(c->GetAttr("element_dtype", &t));
+      shape_inference::ShapeHandle s = c->UnknownShape();
+      auto* handle_data = c->input_handle_shapes_and_types(0);
+      if (handle_data != nullptr && handle_data->size() != 1) {
+        return errors::InvalidArgument(
+            "Trying to read from list with invalid variant data.");
+      }
+      if (handle_data != nullptr) {
+        const shape_inference::ShapeAndType& list_shape_type =
+            (*handle_data)[0];
+        if (list_shape_type.dtype != t) {
+          return errors::InvalidArgument(
+              "Trying to read from list with wrong element dtype. List has "
+              "type ",
+              DataTypeString(list_shape_type.dtype),
+              " but trying to push element with type ", DataTypeString(t));
+        }
+        shape_inference::ShapeHandle ignored;
+        TF_RETURN_IF_ERROR(c->Merge(s, list_shape_type.shape, &ignored));
+        c->set_output_handle_shapes_and_types(0, *handle_data);
+        s = list_shape_type.shape;
+      }
+      c->set_output(1, s);
+      c->set_output(0, c->Scalar());
+      return Status::OK();
+    });
+
+REGISTER_OP("TensorListStack")
+    .Input("input_handle: variant")
+    .Output("tensor: element_dtype")
+    .Attr("element_dtype: type")
+    .Attr("num_elements: int = -1")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      DataType t;
+      TF_RETURN_IF_ERROR(c->GetAttr("element_dtype", &t));
+      shape_inference::ShapeHandle s = c->UnknownShape();
+      auto* handle_data = c->input_handle_shapes_and_types(0);
+      if (handle_data != nullptr && handle_data->size() != 1) {
+        return errors::InvalidArgument(
+            "Trying to read from list with wrong variant data.");
+      }
+      if (handle_data != nullptr) {
+        const shape_inference::ShapeAndType& list_shape_type =
+            (*handle_data)[0];
+        if (list_shape_type.dtype != t) {
+          return errors::InvalidArgument(
+              "Trying to read from list with wrong element dtype. List has "
+              "type ",
+              DataTypeString(list_shape_type.dtype), " but expectec type ",
+              DataTypeString(t));
+        }
+        shape_inference::ShapeHandle ignored;
+        TF_RETURN_IF_ERROR(c->Merge(s, list_shape_type.shape, &ignored));
+        if (!c->FullyDefined(s) || !c->FullyDefined(list_shape_type.shape)) {
+          return errors::InvalidArgument(
+              "Can only gather from a list with fully defined shapes.");
+        }
+        s = list_shape_type.shape;
+      }
+      int expected_num_elements = -1;
+      TF_RETURN_IF_ERROR(c->GetAttr("num_elements", &expected_num_elements));
+      shape_inference::ShapeHandle num_elements;
+      if (expected_num_elements == -1) {
+        num_elements = c->MakeShape({c->UnknownDim()});
+      } else {
+        num_elements = c->MakeShape({expected_num_elements});
+      }
+      shape_inference::ShapeHandle result;
+      TF_RETURN_IF_ERROR(c->Concatenate(num_elements, s, &result));
+      c->set_output(0, result);
+      return Status::OK();
+    });
+
+REGISTER_OP("TensorListFromTensor")
+    .Input("tensor: element_dtype")
+    .Input("element_shape: shape_type")
+    .Output("output_handle: variant")
+    .Attr("element_dtype: type")
+    .Attr("shape_type: {int32, int64}")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      c->set_output(0, c->Scalar());
+      DataType t;
+      TF_RETURN_IF_ERROR(c->GetAttr("element_dtype", &t));
+      shape_inference::ShapeHandle s = c->input(0);
+      shape_inference::ShapeHandle o;
+      TF_RETURN_IF_ERROR(c->Subshape(s, 1, &o));
+      shape_inference::ShapeHandle element_shape;
+      TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &element_shape));
+      TF_RETURN_IF_ERROR(c->Merge(o, element_shape, &o));
+      c->set_output_handle_shapes_and_types(
+          0, std::vector<shape_inference::ShapeAndType>{{element_shape, t}});
+      return Status::OK();
+    });
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/core/ops/logging_ops.cc b/tensorflow/core/ops/logging_ops.cc
index e6995821df700ef6d6a736645e4d18c961b089a8..d263dc25b29d5c867a10ef20ea1b39fa9b9662f1 100644
--- a/tensorflow/core/ops/logging_ops.cc
+++ b/tensorflow/core/ops/logging_ops.cc
@@ -25,17 +25,7 @@ REGISTER_OP("Assert")
     .SetIsStateful()
     .Attr("T: list(type)")
     .Attr("summarize: int = 3")
-    .SetShapeFn(shape_inference::NoOutputs)
-    .Doc(R"doc(
-Asserts that the given condition is true.
-
-If `condition` evaluates to false, print the list of tensors in `data`.
-`summarize` determines how many entries of the tensors to print.
-
-condition: The condition to evaluate.
-data: The tensors to print out when condition is false.
-summarize: Print this many entries of each tensor.
-)doc");
+    .SetShapeFn(shape_inference::NoOutputs);
 
 REGISTER_OP("Print")
     .Input("input: T")
@@ -47,19 +37,7 @@ REGISTER_OP("Print")
     .Attr("message: string = ''")
     .Attr("first_n: int = -1")
     .Attr("summarize: int = 3")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Prints a list of tensors.
-
-Passes `input` through to `output` and prints `data` when evaluating.
-
-input: The tensor passed to `output`
-data: A list of tensors to print out when op is evaluated.
-output:= The unmodified `input` tensor
-message: A string, prefix of the error message.
-first_n: Only log `first_n` number of times. -1 disables logging.
-summarize: Only print this many entries of each tensor.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 // ----------------------------------------------------------------------------
 // Operators that deal with SummaryProtos (encoded as DT_STRING tensors) as
@@ -73,15 +51,7 @@ REGISTER_OP("TensorSummaryV2")
     .Input("serialized_summary_metadata: string")
     .Output("summary: string")
     .Attr("T: type")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Outputs a `Summary` protocol buffer with a tensor and per-plugin data.
-
-tag: A string attached to this summary. Used for organization in TensorBoard.
-tensor: A tensor to serialize.
-serialized_summary_metadata: A serialized SummaryMetadata proto. Contains plugin
-  data.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("TensorSummary")
     .Input("tensor: T")
@@ -90,56 +60,21 @@ REGISTER_OP("TensorSummary")
     .Attr("description: string = ''")
     .Attr("labels: list(string) = []")
     .Attr("display_name: string = ''")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Outputs a `Summary` protocol buffer with a tensor.
-
-This op is being phased out in favor of TensorSummaryV2, which lets callers pass
-a tag as well as a serialized SummaryMetadata proto string that contains
-plugin-specific data. We will keep this op to maintain backwards compatibility.
-
-tensor: A tensor to serialize.
-description: A json-encoded SummaryDescription proto.
-labels: An unused list of strings.
-display_name: An unused string.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("ScalarSummary")
     .Input("tags: string")
     .Input("values: T")
     .Output("summary: string")
     .Attr("T: realnumbertype")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Outputs a `Summary` protocol buffer with scalar values.
-
-The input `tags` and `values` must have the same shape.  The generated summary
-has a summary value for each tag-value pair in `tags` and `values`.
-
-tags: Tags for the summary.
-values: Same shape as `tags.  Values for the summary.
-summary: Scalar.  Serialized `Summary` protocol buffer.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("HistogramSummary")
     .Input("tag: string")
     .Input("values: T")
     .Output("summary: string")
     .Attr("T: realnumbertype = DT_FLOAT")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Outputs a `Summary` protocol buffer with a histogram.
-
-The generated
-[`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
-has one summary value containing a histogram for `values`.
-
-This op reports an `InvalidArgument` error if any value is not finite.
-
-tag: Scalar.  Tag to use for the `Summary.Value`.
-values: Any shape. Values to use to build the histogram.
-summary: Scalar. Serialized `Summary` protocol buffer.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("ImageSummary")
     .Input("tag: string")
@@ -151,51 +86,7 @@ REGISTER_OP("ImageSummary")
         "bad_color: tensor = { dtype: DT_UINT8 "
         "tensor_shape: { dim { size: 4 } } "
         "int_val: 255 int_val: 0 int_val: 0 int_val: 255 }")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Outputs a `Summary` protocol buffer with images.
-
-The summary has up to `max_images` summary values containing images. The
-images are built from `tensor` which must be 4-D with shape `[batch_size,
-height, width, channels]` and where `channels` can be:
-
-*  1: `tensor` is interpreted as Grayscale.
-*  3: `tensor` is interpreted as RGB.
-*  4: `tensor` is interpreted as RGBA.
-
-The images have the same number of channels as the input tensor. For float
-input, the values are normalized one image at a time to fit in the range
-`[0, 255]`.  `uint8` values are unchanged.  The op uses two different
-normalization algorithms:
-
-*  If the input values are all positive, they are rescaled so the largest one
-   is 255.
-
-*  If any input value is negative, the values are shifted so input value 0.0
-   is at 127.  They are then rescaled so that either the smallest value is 0,
-   or the largest one is 255.
-
-The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
-build the `tag` of the summary values:
-
-*  If `max_images` is 1, the summary value tag is '*tag*/image'.
-*  If `max_images` is greater than 1, the summary value tags are
-   generated sequentially as '*tag*/image/0', '*tag*/image/1', etc.
-
-The `bad_color` argument is the color to use in the generated images for
-non-finite input values.  It is a `unit8` 1-D tensor of length `channels`.
-Each element must be in the range `[0, 255]` (It represents the value of a
-pixel in the output image).  Non-finite values in the input tensor are
-replaced by this tensor in the output image.  The default value is the color
-red.
-
-tag: Scalar. Used to build the `tag` attribute of the summary values.
-tensor: 4-D of shape `[batch_size, height, width, channels]` where
-  `channels` is 1, 3, or 4.
-max_images: Max number of batch elements to generate images for.
-bad_color: Color to use for pixels with non-finite values.
-summary: Scalar. Serialized `Summary` protocol buffer.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("AudioSummaryV2")
     .Input("tag: string")
@@ -203,28 +94,7 @@ REGISTER_OP("AudioSummaryV2")
     .Input("sample_rate: float")
     .Output("summary: string")
     .Attr("max_outputs: int >= 1 = 3")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Outputs a `Summary` protocol buffer with audio.
-
-The summary has up to `max_outputs` summary values containing audio. The
-audio is built from `tensor` which must be 3-D with shape `[batch_size,
-frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are
-assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`.
-
-The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
-build the `tag` of the summary values:
-
-*  If `max_outputs` is 1, the summary value tag is '*tag*/audio'.
-*  If `max_outputs` is greater than 1, the summary value tags are
-   generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc.
-
-tag: Scalar. Used to build the `tag` attribute of the summary values.
-tensor: 2-D of shape `[batch_size, frames]`.
-sample_rate: The sample rate of the signal in hertz.
-max_outputs: Max number of batch elements to generate audio for.
-summary: Scalar. Serialized `Summary` protocol buffer.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("AudioSummary")
     .Input("tag: string")
@@ -233,48 +103,12 @@ REGISTER_OP("AudioSummary")
     .Attr("sample_rate: float")
     .Attr("max_outputs: int >= 1 = 3")
     .SetShapeFn(shape_inference::ScalarShape)
-    .Deprecated(15, "Use AudioSummaryV2.")
-    .Doc(R"doc(
-Outputs a `Summary` protocol buffer with audio.
-
-The summary has up to `max_outputs` summary values containing audio. The
-audio is built from `tensor` which must be 3-D with shape `[batch_size,
-frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are
-assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`.
-
-The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
-build the `tag` of the summary values:
-
-*  If `max_outputs` is 1, the summary value tag is '*tag*/audio'.
-*  If `max_outputs` is greater than 1, the summary value tags are
-   generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc.
-
-tag: Scalar. Used to build the `tag` attribute of the summary values.
-tensor: 2-D of shape `[batch_size, frames]`.
-sample_rate: The sample rate of the signal in hertz.
-max_outputs: Max number of batch elements to generate audio for.
-summary: Scalar. Serialized `Summary` protocol buffer.
-)doc");
+    .Deprecated(15, "Use AudioSummaryV2.");
 
 REGISTER_OP("MergeSummary")
     .Input("inputs: N * string")
     .Output("summary: string")
     .Attr("N : int >= 1")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Merges summaries.
-
-This op creates a
-[`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
-protocol buffer that contains the union of all the values in the input
-summaries.
-
-When the Op is run, it reports an `InvalidArgument` error if multiple values
-in the summaries to merge use the same tag.
-
-inputs: Can be of any shape.  Each must contain serialized `Summary` protocol
-  buffers.
-summary: Scalar. Serialized `Summary` protocol buffer.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 }  // end namespace tensorflow
diff --git a/tensorflow/core/ops/lookup_ops.cc b/tensorflow/core/ops/lookup_ops.cc
index dac02dad8bb861fee0e16e0acb0c8e17688e05fb..a67267418d608e7c824030225f906b010794a160 100644
--- a/tensorflow/core/ops/lookup_ops.cc
+++ b/tensorflow/core/ops/lookup_ops.cc
@@ -83,21 +83,7 @@ REGISTER_OP("LookupTableFind")
       TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(2), 1, &unused));
       c->set_output(0, c->UnknownShape());
       return Status::OK();
-    })
-    .Doc(R"doc(
-Looks up keys in a table, outputs the corresponding values.
-
-The tensor `keys` must of the same type as the keys of the table.
-The output `values` is of the type of the table values.
-
-The scalar `default_value` is the value output for keys not present in the
-table. It must also be of the same type as the table values.
-
-table_handle: Handle to the table.
-keys:  Any shape.  Keys to look up.
-values: Same shape as `keys`.  Values found in the table, or `default_values`
-   for missing keys.
-)doc");
+    });
 
 REGISTER_OP("LookupTableFindV2")
     .Input("table_handle: resource")
@@ -115,21 +101,7 @@ REGISTER_OP("LookupTableFindV2")
       TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(2), 1, &unused));
       c->set_output(0, c->UnknownShape());
       return Status::OK();
-    })
-    .Doc(R"doc(
-Looks up keys in a table, outputs the corresponding values.
-
-The tensor `keys` must of the same type as the keys of the table.
-The output `values` is of the type of the table values.
-
-The scalar `default_value` is the value output for keys not present in the
-table. It must also be of the same type as the table values.
-
-table_handle: Handle to the table.
-keys:  Any shape.  Keys to look up.
-values: Same shape as `keys`.  Values found in the table, or `default_values`
-   for missing keys.
-)doc");
+    });
 
 REGISTER_OP("LookupTableInsert")
     .Input("table_handle: Ref(string)")
@@ -145,17 +117,7 @@ REGISTER_OP("LookupTableInsert")
 
       // TODO(ebrevdo): Validate keys and values shape.
       return Status::OK();
-    })
-    .Doc(R"doc(
-Updates the table to associates keys with values.
-
-The tensor `keys` must be of the same type as the keys of the table.
-The tensor `values` must be of the type of the table values.
-
-table_handle: Handle to the table.
-keys:  Any shape.  Keys to look up.
-values: Values to associate with keys.
-)doc");
+    });
 
 REGISTER_OP("LookupTableInsertV2")
     .Input("table_handle: resource")
@@ -169,39 +131,17 @@ REGISTER_OP("LookupTableInsertV2")
 
       // TODO: Validate keys and values shape.
       return Status::OK();
-    })
-    .Doc(R"doc(
-Updates the table to associates keys with values.
-
-The tensor `keys` must be of the same type as the keys of the table.
-The tensor `values` must be of the type of the table values.
-
-table_handle: Handle to the table.
-keys:  Any shape.  Keys to look up.
-values: Values to associate with keys.
-)doc");
+    });
 
 REGISTER_OP("LookupTableSize")
     .Input("table_handle: Ref(string)")
     .Output("size: int64")
-    .SetShapeFn(TwoElementVectorInputsAndScalarOutputs)
-    .Doc(R"doc(
-Computes the number of elements in the given table.
-
-table_handle: Handle to the table.
-size: Scalar that contains number of elements in the table.
-)doc");
+    .SetShapeFn(TwoElementVectorInputsAndScalarOutputs);
 
 REGISTER_OP("LookupTableSizeV2")
     .Input("table_handle: resource")
     .Output("size: int64")
-    .SetShapeFn(ScalarAndTwoElementVectorInputsAndScalarOutputs)
-    .Doc(R"doc(
-Computes the number of elements in the given table.
-
-table_handle: Handle to the table.
-size: Scalar that contains number of elements in the table.
-)doc");
+    .SetShapeFn(ScalarAndTwoElementVectorInputsAndScalarOutputs);
 
 REGISTER_OP("LookupTableExport")
     .Input("table_handle: Ref(string)")
@@ -221,14 +161,7 @@ REGISTER_OP("LookupTableExport")
       c->set_output(0, keys);
       c->set_output(1, values);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Outputs all keys and values in the table.
-
-table_handle: Handle to the table.
-keys: Vector of all keys present in the table.
-values: Tensor of all values in the table. Indexed in parallel with `keys`.
-)doc");
+    });
 
 REGISTER_OP("LookupTableExportV2")
     .Input("table_handle: resource")
@@ -246,14 +179,7 @@ REGISTER_OP("LookupTableExportV2")
       c->set_output(0, keys);
       c->set_output(1, values);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Outputs all keys and values in the table.
-
-table_handle: Handle to the table.
-keys: Vector of all keys present in the table.
-values: Tensor of all values in the table. Indexed in parallel with `keys`.
-)doc");
+    });
 
 REGISTER_OP("LookupTableImport")
     .Input("table_handle: Ref(string)")
@@ -269,17 +195,7 @@ REGISTER_OP("LookupTableImport")
 
       // TODO(ebrevdo): Validate keys and values shape.
       return Status::OK();
-    })
-    .Doc(R"doc(
-Replaces the contents of the table with the specified keys and values.
-
-The tensor `keys` must be of the same type as the keys of the table.
-The tensor `values` must be of the type of the table values.
-
-table_handle: Handle to the table.
-keys:  Any shape.  Keys to look up.
-values: Values to associate with keys.
-)doc");
+    });
 
 REGISTER_OP("LookupTableImportV2")
     .Input("table_handle: resource")
@@ -293,17 +209,7 @@ REGISTER_OP("LookupTableImportV2")
 
       // TODO: Validate keys and values shape.
       return Status::OK();
-    })
-    .Doc(R"doc(
-Replaces the contents of the table with the specified keys and values.
-
-The tensor `keys` must be of the same type as the keys of the table.
-The tensor `values` must be of the type of the table values.
-
-table_handle: Handle to the table.
-keys:  Any shape.  Keys to look up.
-values: Values to associate with keys.
-)doc");
+    });
 
 REGISTER_OP("HashTable")
     .Output("table_handle: Ref(string)")
@@ -313,24 +219,7 @@ REGISTER_OP("HashTable")
     .Attr("key_dtype: type")
     .Attr("value_dtype: type")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput)
-    .Doc(R"doc(
-Creates a non-initialized hash table.
-
-This op creates a hash table, specifying the type of its keys and values.
-Before using the table you will have to initialize it.  After initialization the
-table will be immutable.
-
-table_handle: Handle to a table.
-container: If non-empty, this table is placed in the given container.
-  Otherwise, a default container is used.
-shared_name: If non-empty, this table is shared under the given name across
-  multiple sessions.
-use_node_name_sharing: If true and shared_name is empty, the table is shared
-  using the node name.
-key_dtype: Type of the table keys.
-value_dtype: Type of the table values.
-)doc");
+    .SetShapeFn(TwoElementOutput);
 
 REGISTER_OP("HashTableV2")
     .Output("table_handle: resource")
@@ -340,24 +229,7 @@ REGISTER_OP("HashTableV2")
     .Attr("key_dtype: type")
     .Attr("value_dtype: type")
     .SetIsStateful()
-    .SetShapeFn(ScalarOutput)
-    .Doc(R"doc(
-Creates a non-initialized hash table.
-
-This op creates a hash table, specifying the type of its keys and values.
-Before using the table you will have to initialize it.  After initialization the
-table will be immutable.
-
-table_handle: Handle to a table.
-container: If non-empty, this table is placed in the given container.
-  Otherwise, a default container is used.
-shared_name: If non-empty, this table is shared under the given name across
-  multiple sessions.
-use_node_name_sharing: If true and shared_name is empty, the table is shared
-  using the node name.
-key_dtype: Type of the table keys.
-value_dtype: Type of the table values.
-)doc");
+    .SetShapeFn(ScalarOutput);
 
 REGISTER_OP("MutableHashTable")
     .Output("table_handle: Ref(string)")
@@ -367,24 +239,7 @@ REGISTER_OP("MutableHashTable")
     .Attr("key_dtype: type")
     .Attr("value_dtype: type")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput)
-    .Doc(R"doc(
-Creates an empty hash table.
-
-This op creates a mutable hash table, specifying the type of its keys and
-values. Each value must be a scalar. Data can be inserted into the table using
-the insert operations. It does not support the initialization operation.
-
-table_handle: Handle to a table.
-container: If non-empty, this table is placed in the given container.
-  Otherwise, a default container is used.
-shared_name: If non-empty, this table is shared under the given name across
-  multiple sessions.
-use_node_name_sharing: If true and shared_name is empty, the table is shared
-  using the node name.
-key_dtype: Type of the table keys.
-value_dtype: Type of the table values.
-)doc");
+    .SetShapeFn(TwoElementOutput);
 
 REGISTER_OP("MutableHashTableV2")
     .Output("table_handle: resource")
@@ -394,24 +249,7 @@ REGISTER_OP("MutableHashTableV2")
     .Attr("key_dtype: type")
     .Attr("value_dtype: type")
     .SetIsStateful()
-    .SetShapeFn(ScalarOutput)
-    .Doc(R"doc(
-Creates an empty hash table.
-
-This op creates a mutable hash table, specifying the type of its keys and
-values. Each value must be a scalar. Data can be inserted into the table using
-the insert operations. It does not support the initialization operation.
-
-table_handle: Handle to a table.
-container: If non-empty, this table is placed in the given container.
-  Otherwise, a default container is used.
-shared_name: If non-empty, this table is shared under the given name across
-  multiple sessions.
-use_node_name_sharing: If true and shared_name is empty, the table is shared
-  using the node name.
-key_dtype: Type of the table keys.
-value_dtype: Type of the table values.
-)doc");
+    .SetShapeFn(ScalarOutput);
 
 REGISTER_OP("MutableHashTableOfTensors")
     .Output("table_handle: Ref(string)")
@@ -422,22 +260,7 @@ REGISTER_OP("MutableHashTableOfTensors")
     .Attr("value_dtype: type")
     .Attr("value_shape: shape = {}")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput)
-    .Doc(R"doc(
-Creates an empty hash table.
-
-This op creates a mutable hash table, specifying the type of its keys and
-values. Each value must be a vector. Data can be inserted into the table using
-the insert operations. It does not support the initialization operation.
-
-table_handle: Handle to a table.
-container: If non-empty, this table is placed in the given container.
-  Otherwise, a default container is used.
-shared_name: If non-empty, this table is shared under the given name across
-  multiple sessions.
-key_dtype: Type of the table keys.
-value_dtype: Type of the table values.
-)doc");
+    .SetShapeFn(TwoElementOutput);
 
 REGISTER_OP("MutableHashTableOfTensorsV2")
     .Output("table_handle: resource")
@@ -448,22 +271,7 @@ REGISTER_OP("MutableHashTableOfTensorsV2")
     .Attr("value_dtype: type")
     .Attr("value_shape: shape = {}")
     .SetIsStateful()
-    .SetShapeFn(ScalarOutput)
-    .Doc(R"doc(
-Creates an empty hash table.
-
-This op creates a mutable hash table, specifying the type of its keys and
-values. Each value must be a vector. Data can be inserted into the table using
-the insert operations. It does not support the initialization operation.
-
-table_handle: Handle to a table.
-container: If non-empty, this table is placed in the given container.
-  Otherwise, a default container is used.
-shared_name: If non-empty, this table is shared under the given name across
-  multiple sessions.
-key_dtype: Type of the table keys.
-value_dtype: Type of the table values.
-)doc");
+    .SetShapeFn(ScalarOutput);
 
 REGISTER_OP("MutableDenseHashTable")
     .Input("empty_key: key_dtype")
@@ -477,32 +285,7 @@ REGISTER_OP("MutableDenseHashTable")
     .Attr("initial_num_buckets: int = 131072")  // 2^17
     .Attr("max_load_factor: float = 0.8")
     .SetIsStateful()
-    .SetShapeFn(TwoElementOutput)
-    .Doc(R"doc(
-Creates an empty hash table that uses tensors as the backing store.
-
-It uses "open addressing" with quadratic reprobing to resolve
-collisions.
-
-This op creates a mutable hash table, specifying the type of its keys and
-values. Each value must be a scalar. Data can be inserted into the table using
-the insert operations. It does not support the initialization operation.
-
-empty_key: The key used to represent empty key buckets internally. Must not
-  be used in insert or lookup operations.
-table_handle: Handle to a table.
-container: If non-empty, this table is placed in the given container.
-  Otherwise, a default container is used.
-shared_name: If non-empty, this table is shared under the given name across
-  multiple sessions.
-key_dtype: Type of the table keys.
-value_dtype: Type of the table values.
-value_shape: The shape of each value.
-initial_num_buckets: The initial number of hash table buckets. Must be a power
-  to 2.
-max_load_factor: The maximum ratio between number of entries and number of
-  buckets before growing the table. Must be between 0 and 1.
-)doc");
+    .SetShapeFn(TwoElementOutput);
 
 REGISTER_OP("MutableDenseHashTableV2")
     .Input("empty_key: key_dtype")
@@ -516,32 +299,7 @@ REGISTER_OP("MutableDenseHashTableV2")
     .Attr("initial_num_buckets: int = 131072")  // 2^17
     .Attr("max_load_factor: float = 0.8")
     .SetIsStateful()
-    .SetShapeFn(ScalarOutput)
-    .Doc(R"doc(
-Creates an empty hash table that uses tensors as the backing store.
-
-It uses "open addressing" with quadratic reprobing to resolve
-collisions.
-
-This op creates a mutable hash table, specifying the type of its keys and
-values. Each value must be a scalar. Data can be inserted into the table using
-the insert operations. It does not support the initialization operation.
-
-empty_key: The key used to represent empty key buckets internally. Must not
-  be used in insert or lookup operations.
-table_handle: Handle to a table.
-container: If non-empty, this table is placed in the given container.
-  Otherwise, a default container is used.
-shared_name: If non-empty, this table is shared under the given name across
-  multiple sessions.
-key_dtype: Type of the table keys.
-value_dtype: Type of the table values.
-value_shape: The shape of each value.
-initial_num_buckets: The initial number of hash table buckets. Must be a power
-  to 2.
-max_load_factor: The maximum ratio between number of entries and number of
-  buckets before growing the table. Must be between 0 and 1.
-)doc");
+    .SetShapeFn(ScalarOutput);
 
 REGISTER_OP("InitializeTable")
     .Input("table_handle: Ref(string)")
@@ -559,14 +317,7 @@ REGISTER_OP("InitializeTable")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &keys));
       TF_RETURN_IF_ERROR(c->Merge(keys, c->input(2), &keys));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Table initializer that takes two tensors for keys and values respectively.
-
-table_handle: Handle to a table which will be initialized.
-keys: Keys of type Tkey.
-values: Values of type Tval.
-)doc");
+    });
 
 REGISTER_OP("InitializeTableV2")
     .Input("table_handle: resource")
@@ -582,14 +333,7 @@ REGISTER_OP("InitializeTableV2")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &keys));
       TF_RETURN_IF_ERROR(c->Merge(keys, c->input(2), &keys));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Table initializer that takes two tensors for keys and values respectively.
-
-table_handle: Handle to a table which will be initialized.
-keys: Keys of type Tkey.
-values: Values of type Tval.
-)doc");
+    });
 
 REGISTER_OP("InitializeTableFromTextFile")
     .Input("table_handle: Ref(string)")
@@ -606,29 +350,7 @@ REGISTER_OP("InitializeTableFromTextFile")
 
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &handle));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Initializes a table from a text file.
-
-It inserts one key-value pair into the table for each line of the file.
-The key and value is extracted from the whole line content, elements from the
-split line based on `delimiter` or the line number (starting from zero).
-Where to extract the key and value from a line is specified by `key_index` and
-`value_index`.
-
-- A value of -1 means use the line number(starting from zero), expects `int64`.
-- A value of -2 means use the whole line content, expects `string`.
-- A value >= 0 means use the index (starting at zero) of the split line based
-  on `delimiter`.
-
-table_handle: Handle to a table which will be initialized.
-filename: Filename of a vocabulary text file.
-key_index: Column index in a line to get the table `key` values from.
-value_index: Column index that represents information of a line to get the table
-  `value` values from.
-vocab_size: Number of elements of the file, use -1 if unknown.
-delimiter: Delimiter to separate fields in a line.
-)doc");
+    });
 
 REGISTER_OP("InitializeTableFromTextFileV2")
     .Input("table_handle: resource")
@@ -643,28 +365,6 @@ REGISTER_OP("InitializeTableFromTextFileV2")
 
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &handle));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Initializes a table from a text file.
-
-It inserts one key-value pair into the table for each line of the file.
-The key and value is extracted from the whole line content, elements from the
-split line based on `delimiter` or the line number (starting from zero).
-Where to extract the key and value from a line is specified by `key_index` and
-`value_index`.
-
-- A value of -1 means use the line number(starting from zero), expects `int64`.
-- A value of -2 means use the whole line content, expects `string`.
-- A value >= 0 means use the index (starting at zero) of the split line based
-  on `delimiter`.
-
-table_handle: Handle to a table which will be initialized.
-filename: Filename of a vocabulary text file.
-key_index: Column index in a line to get the table `key` values from.
-value_index: Column index that represents information of a line to get the table
-  `value` values from.
-vocab_size: Number of elements of the file, use -1 if unknown.
-delimiter: Delimiter to separate fields in a line.
-)doc");
+    });
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index df75caca37a616f75263e35a0d5e725f36e1307b..dd484c3ee752b47f4a196cd45c6e26984b5ef0bd 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -40,12 +40,7 @@ REGISTER_OP("AddN")
       }
       c->set_output(0, cur);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Add all input tensors element wise.
-
-inputs: Must all be the same size and shape.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 
@@ -62,22 +57,7 @@ REGISTER_OP("AccumulateNV2")
     .Attr("shape: shape")
     .SetIsCommutative()
     .SetIsAggregate()
-    .SetShapeFn(shape_inference::ExplicitShape)
-    .Doc(R"doc(
-Returns the element-wise sum of a list of tensors.
-
-`tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not
-wait for all of its inputs to be ready before beginning to sum. This can
-save memory if inputs are ready at different times, since minimum temporary
-storage is proportional to the output size rather than the inputs size.
-
-Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable.
-
-Returns a `Tensor` of same shape and type as the elements of `inputs`.
-
-inputs: A list of `Tensor` objects, each with same shape and type.
-shape: Shape of elements of `inputs`.
-)doc");
+    .SetShapeFn(shape_inference::ExplicitShape);
 
 // --------------------------------------------------------------------------
 
@@ -85,7 +65,7 @@ REGISTER_OP("BatchMatMul")
     .Input("x: T")
     .Input("y: T")
     .Output("output: T")
-    .Attr("T: {half, float, double, int32, complex64, complex128}")
+    .Attr("T: {half, bfloat16, float, double, int32, complex64, complex128}")
     .Attr("adj_x: bool = false")
     .Attr("adj_y: bool = false")
     .SetShapeFn([](InferenceContext* c) {
@@ -120,35 +100,7 @@ REGISTER_OP("BatchMatMul")
           batch_dims, c->Matrix(output_rows, output_cols), &out));
       c->set_output(0, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Multiplies slices of two tensors in batches.
-
-Multiplies all slices of `Tensor` `x` and `y` (each slice can be
-viewed as an element of a batch), and arranges the individual results
-in a single output tensor of the same batch size. Each of the
-individual slices can optionally be adjointed (to adjoint a matrix
-means to transpose and conjugate it) before multiplication by setting
-the `adj_x` or `adj_y` flag to `True`, which are by default `False`.
-
-The input tensors `x` and `y` are 2-D or higher with shape `[..., r_x, c_x]`
-and `[..., r_y, c_y]`.
-
-The output tensor is 2-D or higher with shape `[..., r_o, c_o]`, where:
-
-    r_o = c_x if adj_x else r_x
-    c_o = r_y if adj_y else c_y
-
-It is computed as:
-
-    output[..., :, :] = matrix(x[..., :, :]) * matrix(y[..., :, :])
-
-x: 2-D or higher with shape `[..., r_x, c_x]`.
-y: 2-D or higher with shape `[..., r_y, c_y]`.
-output: 3-D or higher with shape `[..., r_o, c_o]`
-adj_x: If `True`, adjoint the slices of `x`. Defaults to `False`.
-adj_y: If `True`, adjoint the slices of `y`. Defaults to `False`.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 // Casting Ops
@@ -162,10 +114,7 @@ REGISTER_OP("Cast")
     .Output("y: DstT")
     .Attr("SrcT: type")
     .Attr("DstT: type")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Cast x of type SrcT to y of DstT.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("_HostCast")
     .Input("x: SrcT")
@@ -184,295 +133,111 @@ _HostCast requires its input and produces its output in host memory.
 REGISTER_OP("Abs")
     .Input("x: T")
     .Output("y: T")
-    .Attr("T: {half, float, double, int32, int64}")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Computes the absolute value of a tensor.
-
-Given a tensor `x`, this operation returns a tensor containing the absolute
-value of each element in `x`. For example, if x is an input element and y is
-an output element, this operation computes \\(y = |x|\\).
-)doc");
+    .Attr("T: {half, bfloat16, float, double, int32, int64}")
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("ComplexAbs")
     .Input("x: T")
     .Output("y: Tout")
     .Attr("T: {complex64, complex128} = DT_COMPLEX64")
     .Attr("Tout: {float, double} = DT_FLOAT")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Computes the complex absolute value of a tensor.
-
-Given a tensor `x` of complex numbers, this operation returns a tensor of type
-`float` or `double` that is the absolute value of each element in `x`. All
-elements in `x` must be complex numbers of the form \\(a + bj\\). The absolute
-value is computed as \\( \sqrt{a^2 + b^2}\\).
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 // Declares cwise unary operations signature: 't -> 't
-#define UNARY()                                                              \
-  Input("x: T")                                                              \
-      .Output("y: T")                                                        \
-      .Attr("T: {half, float, double, int32, int64, complex64, complex128}") \
+#define UNARY()                                                          \
+  Input("x: T")                                                          \
+      .Output("y: T")                                                    \
+      .Attr(                                                             \
+          "T: {half, bfloat16, float, double, int32, int64, complex64, " \
+          "complex128}")                                                 \
       .SetShapeFn(shape_inference::UnchangedShape)
 
-#define UNARY_REAL()                    \
-  Input("x: T")                         \
-      .Output("y: T")                   \
-      .Attr("T: {half, float, double}") \
+#define UNARY_REAL()                              \
+  Input("x: T")                                   \
+      .Output("y: T")                             \
+      .Attr("T: {half, bfloat16, float, double}") \
       .SetShapeFn(shape_inference::UnchangedShape)
 
-#define UNARY_COMPLEX()                                        \
-  Input("x: T")                                                \
-      .Output("y: T")                                          \
-      .Attr("T: {half, float, double, complex64, complex128}") \
+#define UNARY_COMPLEX()                                                  \
+  Input("x: T")                                                          \
+      .Output("y: T")                                                    \
+      .Attr("T: {half, bfloat16, float, double, complex64, complex128}") \
       .SetShapeFn(shape_inference::UnchangedShape)
 
-#define UNARY_GRADIENT_COMPLEX()                               \
-  Input("y: T")                                                \
-      .Input("dy: T")                                          \
-      .Output("z: T")                                          \
-      .Attr("T: {half, float, double, complex64, complex128}") \
+#define UNARY_GRADIENT_COMPLEX()                                         \
+  Input("y: T")                                                          \
+      .Input("dy: T")                                                    \
+      .Output("z: T")                                                    \
+      .Attr("T: {half, bfloat16, float, double, complex64, complex128}") \
       .SetShapeFn(shape_inference::UnchangedShape)
 
-REGISTER_OP("Neg")
-    .UNARY()
-    .Doc(R"doc(
-Computes numerical negative value element-wise.
-I.e., \\(y = -x\\).
-)doc");
+REGISTER_OP("Neg").UNARY();
 
-REGISTER_OP("Inv")
-    .UNARY()
-    .Doc(R"doc(
-Computes the reciprocal of x element-wise.
-I.e., \\(y = 1 / x\\).
-)doc")
-    .Deprecated(17, "Use Reciprocal");
+REGISTER_OP("Inv").UNARY();
 
-REGISTER_OP("InvGrad")
-    .UNARY_GRADIENT_COMPLEX()
-    .Doc(R"doc(
-Computes the gradient for the inverse of `x` wrt its input.
+REGISTER_OP("InvGrad").UNARY_GRADIENT_COMPLEX();
 
-Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy`
-is the corresponding input gradient.
-)doc")
-    .Deprecated(17, "Use ReciprocalGrad");
-
-REGISTER_OP("Reciprocal")
-    .UNARY()
-    .Doc(R"doc(
-Computes the reciprocal of x element-wise.
-I.e., \\(y = 1 / x\\).
-)doc");
-
-REGISTER_OP("ReciprocalGrad")
-    .UNARY_GRADIENT_COMPLEX()
-    .Doc(R"doc(
-Computes the gradient for the inverse of `x` wrt its input.
-
-Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy`
-is the corresponding input gradient.
-)doc");
-
-REGISTER_OP("Square")
-    .UNARY()
-    .Doc(R"doc(
-Computes square of x element-wise.
-I.e., \\(y = x * x = x^2\\).
-)doc");
-
-REGISTER_OP("Sqrt")
-    .UNARY_COMPLEX()
-    .Doc(R"doc(
-Computes square root of x element-wise.
-I.e., \\(y = \sqrt{x} = x^{1/2}\\).
-)doc");
+REGISTER_OP("Reciprocal").UNARY();
 
-REGISTER_OP("SqrtGrad")
-    .UNARY_GRADIENT_COMPLEX()
-    .Doc(R"doc(
-Computes the gradient for the sqrt of `x` wrt its input.
+REGISTER_OP("ReciprocalGrad").UNARY_GRADIENT_COMPLEX();
 
-Specifically, `grad = dy * 0.5 / y`, where `y = sqrt(x)`, and `dy`
-is the corresponding input gradient.
-)doc");
+REGISTER_OP("Square").UNARY();
 
-REGISTER_OP("Rsqrt")
-    .UNARY_COMPLEX()
-    .Doc(R"doc(
-Computes reciprocal of square root of x element-wise.
-I.e., \\(y = 1 / \sqrt{x}\\).
-)doc");
+REGISTER_OP("Sqrt").UNARY_COMPLEX();
 
-REGISTER_OP("Round")
-    .UNARY()
-    .Doc(R"doc(
-Rounds the values of a tensor to the nearest integer, element-wise.
+REGISTER_OP("SqrtGrad").UNARY_GRADIENT_COMPLEX();
 
-Rounds half to even.  Also known as bankers rounding. If you want to round
-according to the current system rounding mode use std::cint.
-)doc");
+REGISTER_OP("Rsqrt").UNARY_COMPLEX();
 
-REGISTER_OP("RsqrtGrad")
-    .UNARY_GRADIENT_COMPLEX()
-    .Doc(R"doc(
-Computes the gradient for the rsqrt of `x` wrt its input.
+REGISTER_OP("Round").UNARY();
 
-Specifically, `grad = dy * -0.5 * y^3`, where `y = rsqrt(x)`, and `dy`
-is the corresponding input gradient.
-)doc");
+REGISTER_OP("RsqrtGrad").UNARY_GRADIENT_COMPLEX();
 
-REGISTER_OP("Exp")
-    .UNARY_COMPLEX()
-    .Doc(R"doc(
-Computes exponential of x element-wise.  \\(y = e^x\\).
-)doc");
+REGISTER_OP("Exp").UNARY_COMPLEX();
 
-REGISTER_OP("Expm1")
-    .UNARY_COMPLEX()
-    .Doc(R"doc(
-Computes exponential of x - 1 element-wise.
-I.e., \\(y = (\exp x) - 1\\).
-)doc");
+REGISTER_OP("Expm1").UNARY_COMPLEX();
 
-REGISTER_OP("Log")
-    .UNARY_COMPLEX()
-    .Doc(R"doc(
-Computes natural logarithm of x element-wise.
-I.e., \\(y = \log_e x\\).
-)doc");
+REGISTER_OP("Log").UNARY_COMPLEX();
 
-REGISTER_OP("Log1p")
-    .UNARY_COMPLEX()
-    .Doc(R"doc(
-Computes natural logarithm of (1 + x) element-wise.
-I.e., \\(y = \log_e (1 + x)\\).
-)doc");
+REGISTER_OP("Log1p").UNARY_COMPLEX();
 
-REGISTER_OP("Sinh")
-    .UNARY_COMPLEX()
-    .Doc(R"doc(
-Computes hyperbolic sine of x element-wise.
-)doc");
+REGISTER_OP("Sinh").UNARY_COMPLEX();
 
-REGISTER_OP("Cosh")
-    .UNARY_COMPLEX()
-    .Doc(R"doc(
-Computes hyperbolic cosine of x element-wise.
-)doc");
+REGISTER_OP("Cosh").UNARY_COMPLEX();
 
-REGISTER_OP("Tanh")
-    .UNARY_COMPLEX()
-    .Doc(R"doc(
-Computes hyperbolic tangent of `x` element-wise.
-)doc");
+REGISTER_OP("Tanh").UNARY_COMPLEX();
 
-REGISTER_OP("Asinh")
-    .UNARY_COMPLEX()
-    .Doc(R"doc(
-Computes inverse hyperbolic sine of x element-wise.
-)doc");
+REGISTER_OP("Asinh").UNARY_COMPLEX();
 
-REGISTER_OP("Acosh")
-    .UNARY_COMPLEX()
-    .Doc(R"doc(
-Computes inverse hyperbolic cosine of x element-wise.
-)doc");
+REGISTER_OP("Acosh").UNARY_COMPLEX();
 
-REGISTER_OP("Atanh")
-    .UNARY_COMPLEX()
-    .Doc(R"doc(
-Computes inverse hyperbolic tangent of x element-wise.
-)doc");
+REGISTER_OP("Atanh").UNARY_COMPLEX();
 
-REGISTER_OP("TanhGrad")
-    .UNARY_GRADIENT_COMPLEX()
-    .Doc(R"doc(
-Computes the gradient for the tanh of `x` wrt its input.
+REGISTER_OP("TanhGrad").UNARY_GRADIENT_COMPLEX();
 
-Specifically, `grad = dy * (1 - y*y)`, where `y = tanh(x)`, and `dy`
-is the corresponding input gradient.
-)doc");
+REGISTER_OP("Lgamma").UNARY_REAL();
 
-REGISTER_OP("Lgamma")
-    .UNARY_REAL()
-    .Doc(R"doc(
-Computes the log of the absolute value of `Gamma(x)` element-wise.
-)doc");
+REGISTER_OP("Digamma").UNARY_REAL();
 
-REGISTER_OP("Digamma")
-    .UNARY_REAL()
-    .Doc(R"doc(
-Computes Psi, the derivative of Lgamma (the log of the absolute value of
-`Gamma(x)`), element-wise.
-)doc");
+REGISTER_OP("Erf").UNARY_REAL();
 
-REGISTER_OP("Erf")
-    .UNARY_REAL()
-    .Doc(R"doc(
-Computes the Gauss error function of `x` element-wise.
-)doc");
+REGISTER_OP("Erfc").UNARY_REAL();
 
-REGISTER_OP("Erfc")
-    .UNARY_REAL()
-    .Doc(R"doc(
-Computes the complementary error function of `x` element-wise.
-)doc");
+REGISTER_OP("Sigmoid").UNARY_COMPLEX();
 
-REGISTER_OP("Sigmoid")
-    .UNARY_COMPLEX()
-    .Doc(R"doc(
-Computes sigmoid of `x` element-wise.
+REGISTER_OP("SigmoidGrad").UNARY_GRADIENT_COMPLEX();
 
-Specifically, `y = 1 / (1 + exp(-x))`.
-)doc");
+REGISTER_OP("Sin").UNARY_COMPLEX();
 
-REGISTER_OP("SigmoidGrad")
-    .UNARY_GRADIENT_COMPLEX()
-    .Doc(R"doc(
-Computes the gradient of the sigmoid of `x` wrt its input.
+REGISTER_OP("Cos").UNARY_COMPLEX();
 
-Specifically, `grad = dy * y * (1 - y)`, where `y = sigmoid(x)`, and
-`dy` is the corresponding input gradient.
-)doc");
+REGISTER_OP("Tan").UNARY();
 
-REGISTER_OP("Sin")
-    .UNARY_COMPLEX()
-    .Doc(R"doc(
-Computes sin of x element-wise.
-)doc");
+REGISTER_OP("Asin").UNARY();
 
-REGISTER_OP("Cos")
-    .UNARY_COMPLEX()
-    .Doc(R"doc(
-Computes cos of x element-wise.
-)doc");
+REGISTER_OP("Acos").UNARY();
 
-REGISTER_OP("Tan")
-    .UNARY()
-    .Doc(R"doc(
-Computes tan of x element-wise.
-)doc");
-
-REGISTER_OP("Asin")
-    .UNARY()
-    .Doc(R"doc(
-Computes asin of x element-wise.
-)doc");
-
-REGISTER_OP("Acos")
-    .UNARY()
-    .Doc(R"doc(
-Computes acos of x element-wise.
-)doc");
-
-REGISTER_OP("Atan")
-    .UNARY()
-    .Doc(R"doc(
-Computes atan of x element-wise.
-)doc");
+REGISTER_OP("Atan").UNARY();
 
 #undef UNARY
 #undef UNARY_REAL
@@ -481,117 +246,67 @@ Computes atan of x element-wise.
 REGISTER_OP("IsNan")
     .Input("x: T")
     .Output("y: bool")
-    .Attr("T: {half, float, double}")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Returns which elements of x are NaN.
-
-@compatibility(numpy)
-Equivalent to np.isnan
-@end_compatibility
-)doc");
+    .Attr("T: {half, bfloat16, float, double}")
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("IsInf")
     .Input("x: T")
     .Output("y: bool")
-    .Attr("T: {half, float, double}")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Returns which elements of x are Inf.
-
-@compatibility(numpy)
-Equivalent to np.isinf
-@end_compatibility
-)doc");
+    .Attr("T: {half, bfloat16, float, double}")
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("IsFinite")
     .Input("x: T")
     .Output("y: bool")
-    .Attr("T: {half, float, double}")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Returns which elements of x are finite.
-
-@compatibility(numpy)
-Equivalent to np.isfinite
-@end_compatibility
-)doc");
+    .Attr("T: {half, bfloat16, float, double}")
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("Sign")
     .Input("x: T")
     .Output("y: T")
-    .Attr("T: {half, float, double, int32, int64, complex64, complex128}")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Returns an element-wise indication of the sign of a number.
-
-`y = sign(x) = -1` if `x < 0`; 0 if `x == 0`; 1 if `x > 0`.
-
-For complex numbers, `y = sign(x) = x / |x|` if `x != 0`, otherwise `y = 0`.
-)doc");
+    .Attr(
+        "T: {half, bfloat16, float, double, int32, int64, complex64, "
+        "complex128}")
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("Floor")
     .Input("x: T")
     .Output("y: T")
-    .Attr("T: {half, float, double}")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Returns element-wise largest integer not greater than x.
-)doc");
+    .Attr("T: {half, bfloat16, float, double}")
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("Ceil")
     .Input("x: T")
     .Output("y: T")
-    .Attr("T: {half, float, double}")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Returns element-wise smallest integer in not less than x.
-)doc");
+    .Attr("T: {half, bfloat16, float, double}")
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("Rint")
     .Input("x: T")
     .Output("y: T")
-    .Attr("T: {float, double}")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Returns element-wise integer closest to x.
-
-If the result is midway between two representable values,
-the even representable is chosen.
-For example:
-
-```
-rint(-1.5) ==> -2.0
-rint(0.5000001) ==> 1.0
-rint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) ==> [-2., -2., -0., 0., 2., 2., 2.]
-```
-)doc");
+    .Attr("T: {bfloat16, float, double}")
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 // Declares cwise binary operations signature: 't, 't -> 't.
 
-#define BINARY_MORE()                                                       \
-  Input("x: T").Input("y: T").Output("z: T").Attr(                          \
-      "T: {half, float, double, uint8, int8, uint16, int16, int32, int64, " \
-      "complex64, complex128}")
+#define BINARY_MORE()                                                          \
+  Input("x: T").Input("y: T").Output("z: T").Attr(                             \
+      "T: {half, bfloat16, float, double, uint8, int8, uint16, int16, int32, " \
+      "int64, complex64, complex128}")
 
-#define BINARY_FEWER()                             \
-  Input("x: T").Input("y: T").Output("z: T").Attr( \
-      "T: {half, float, double, int32, int64, complex64, complex128}")
+#define BINARY_FEWER()                                               \
+  Input("x: T").Input("y: T").Output("z: T").Attr(                   \
+      "T: {half, bfloat16, float, double, int32, int64, complex64, " \
+      "complex128}")
 
 REGISTER_OP("Add")
     .Input("x: T")
     .Input("y: T")
     .Output("z: T")
     .Attr(
-        "T: {half, float, double, uint8, int8, int16, int32, int64, complex64, "
-        "complex128, string}")
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
-    .Doc(R"doc(
-Returns x + y element-wise.
-
-*NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting
-[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-)doc");
+        "T: {half, bfloat16, float, double, uint8, int8, int16, int32, int64, "
+        "complex64, complex128, string}")
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
 
 // TODO(rmlarsen): Add a Python wrapper that swiches non-string instances to
 // use AddV2 (b/68646025).
@@ -600,17 +315,11 @@ REGISTER_OP("AddV2")
     .Input("y: T")
     .Output("z: T")
     .Attr(
-        "T: {half, float, double, uint8, int8, int16, int32, int64, complex64, "
-        "complex128}")
+        "T: {half, bfloat16, float, double, uint8, int8, int16, int32, int64, "
+        "complex64, complex128}")
     .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
     .SetIsAggregate()
-    .SetIsCommutative()
-    .Doc(R"doc(
-Returns x + y element-wise.
-
-*NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting
-[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-)doc");
+    .SetIsCommutative();
 
 REGISTER_OP("_MklAdd")
     .Input("x: T")
@@ -630,15 +339,8 @@ Returns x + y element-wise.
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
-REGISTER_OP("Sub")
-    .BINARY_MORE()
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
-    .Doc(R"doc(
-Returns x - y element-wise.
-
-*NOTE*: `Sub` supports broadcasting. More about broadcasting
-[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-)doc");
+REGISTER_OP("Sub").BINARY_MORE().SetShapeFn(
+    shape_inference::BroadcastBinaryOpShapeFn);
 
 REGISTER_OP("_MklSub")
     .BINARY_FEWER()
@@ -653,16 +355,8 @@ Returns x - y element-wise.
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
-REGISTER_OP("Mul")
-    .BINARY_MORE()
-    .SetIsCommutative()
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
-    .Doc(R"doc(
-Returns x * y element-wise.
-
-*NOTE*: `Mul` supports broadcasting. More about broadcasting
-[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-)doc");
+REGISTER_OP("Mul").BINARY_MORE().SetIsCommutative().SetShapeFn(
+    shape_inference::BroadcastBinaryOpShapeFn);
 
 REGISTER_OP("_MklMul")
     .BINARY_MORE()
@@ -678,63 +372,24 @@ Returns x * y element-wise.
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
-REGISTER_OP("Div")
-    .BINARY_MORE()
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
-    .Doc(R"doc(
-Returns x / y element-wise.
-
-*NOTE*: `Div` supports broadcasting. More about broadcasting
-[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-)doc");
+REGISTER_OP("Div").BINARY_MORE().SetShapeFn(
+    shape_inference::BroadcastBinaryOpShapeFn);
 
 REGISTER_OP("FloorDiv")
     .BINARY_MORE()
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
-    .Doc(R"doc(
-Returns x // y element-wise.
-
-*NOTE*: `FloorDiv` supports broadcasting. More about broadcasting
-[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-)doc");
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
 
 REGISTER_OP("TruncateDiv")
     .BINARY_MORE()
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
-    .Doc(R"doc(
-Returns x / y element-wise for integer types.
-
-Truncation designates that negative numbers will round fractional quantities
-toward zero. I.e. -7 / 5 = -1. This matches C semantics but it is different
-than Python semantics. See `FloorDiv` for a division function that matches
-Python Semantics.
-
-*NOTE*: `TruncateDiv` supports broadcasting. More about broadcasting
-[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-)doc");
-
-REGISTER_OP("RealDiv")
-    .BINARY_MORE()
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
-    .Doc(R"doc(
-Returns x / y element-wise for real types.
-
-If `x` and `y` are reals, this will return the floating-point division.
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
 
-*NOTE*: `Div` supports broadcasting. More about broadcasting
-[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-)doc");
+REGISTER_OP("RealDiv").BINARY_MORE().SetShapeFn(
+    shape_inference::BroadcastBinaryOpShapeFn);
 
 REGISTER_OP("SquaredDifference")
     .BINARY_FEWER()
     .SetIsCommutative()
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
-    .Doc(R"doc(
-Returns (x - y)(x - y) element-wise.
-
-*NOTE*: `SquaredDifference` supports broadcasting. More about broadcasting
-[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-)doc");
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
 
 REGISTER_OP("_MklSquaredDifference")
     .BINARY_FEWER()
@@ -757,15 +412,9 @@ REGISTER_OP("Maximum")
     .Input("x: T")
     .Input("y: T")
     .Output("z: T")
-    .Attr("T: {half, float, double, int32, int64}")
+    .Attr("T: {half, bfloat16, float, double, int32, int64}")
     .SetIsCommutative()
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
-    .Doc(R"doc(
-Returns the max of x and y (i.e. x > y ? x : y) element-wise.
-
-*NOTE*: `Maximum` supports broadcasting. More about broadcasting
-[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-)doc");
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
 
 REGISTER_OP("_MklMaximum")
     .Input("x: T")
@@ -788,174 +437,74 @@ REGISTER_OP("Minimum")
     .Input("x: T")
     .Input("y: T")
     .Output("z: T")
-    .Attr("T: {half, float, double, int32, int64}")
+    .Attr("T: {half, bfloat16, float, double, int32, int64}")
     .SetIsCommutative()
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
-    .Doc(R"doc(
-Returns the min of x and y (i.e. x < y ? x : y) element-wise.
-
-*NOTE*: `Minimum` supports broadcasting. More about broadcasting
-[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-)doc");
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
 
 REGISTER_OP("Mod")
     .Input("x: T")
     .Input("y: T")
     .Output("z: T")
-    .Attr("T: {int32, int64, float, double}")
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
-    .Doc(R"doc(
-Returns element-wise remainder of division. This emulates C semantics in that
-the result here is consistent with a truncating divide. E.g.
-`tf.truncatediv(x, y) * y + truncate_mod(x, y) = x`.
-
-*NOTE*: `Mod` supports broadcasting. More about broadcasting
-[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-)doc");
+    .Attr("T: {int32, int64, bfloat16, float, double}")
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
 
 REGISTER_OP("FloorMod")
     .Input("x: T")
     .Input("y: T")
     .Output("z: T")
-    .Attr("T: {int32, int64, float, double}")
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
-    .Doc(R"doc(
-Returns element-wise remainder of division. When `x < 0` xor `y < 0` is
-true, this follows Python semantics in that the result here is consistent
-with a flooring divide. E.g. `floor(x / y) * y + mod(x, y) = x`.
-
-*NOTE*: `FloorMod` supports broadcasting. More about broadcasting
-[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-)doc");
+    .Attr("T: {int32, int64, bfloat16, float, double}")
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
 
 REGISTER_OP("TruncateMod")
     .Input("x: T")
     .Input("y: T")
     .Output("z: T")
-    .Attr("T: {int32, int64, float, double}")
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
-    .Doc(R"doc(
-Returns element-wise remainder of division. This emulates C semantics in that
-the result here is consistent with a truncating divide. E.g. `truncate(x / y) *
-y + truncate_mod(x, y) = x`.
-
-*NOTE*: `TruncateMod` supports broadcasting. More about broadcasting
-[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-)doc");
+    .Attr("T: {int32, int64, bfloat16, float, double}")
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
 
 REGISTER_OP("Pow")
     .Input("x: T")
     .Input("y: T")
     .Output("z: T")
-    .Attr("T: {half, float, double, int32, int64, complex64, complex128}")
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
-    .Doc(R"doc(
-Computes the power of one value to another.
-
-Given a tensor `x` and a tensor `y`, this operation computes \\(x^y\\) for
-corresponding elements in `x` and `y`. For example:
-
-```
-# tensor 'x' is [[2, 2]], [3, 3]]
-# tensor 'y' is [[8, 16], [2, 3]]
-tf.pow(x, y) ==> [[256, 65536], [9, 27]]
-```
-)doc");
+    .Attr(
+        "T: {half, bfloat16, float, double, int32, int64, complex64, "
+        "complex128}")
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
 
 REGISTER_OP("Igammac")
     .Input("a: T")
     .Input("x: T")
     .Output("z: T")
     .Attr("T: {float, double}")
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
-    .Doc(R"doc(
-Compute the upper regularized incomplete Gamma function `Q(a, x)`.
-
-The upper regularized incomplete Gamma function is defined as:
-
-\\(Q(a, x) = Gamma(a, x) / Gamma(a) = 1 - P(a, x)\\)
-
-where
-
-\\(Gamma(a, x) = int_{x}^{\infty} t^{a-1} exp(-t) dt\\)
-
-is the upper incomplete Gama function.
-
-Note, above `P(a, x)` (`Igamma`) is the lower regularized complete
-Gamma function.
-)doc");
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
 
 REGISTER_OP("Igamma")
     .Input("a: T")
     .Input("x: T")
     .Output("z: T")
     .Attr("T: {float, double}")
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
-    .Doc(R"doc(
-Compute the lower regularized incomplete Gamma function `Q(a, x)`.
-
-The lower regularized incomplete Gamma function is defined as:
-
-
-\\(P(a, x) = gamma(a, x) / Gamma(a) = 1 - Q(a, x)\\)
-
-where
-
-\\(gamma(a, x) = int_{0}^{x} t^{a-1} exp(-t) dt\\)
-
-is the lower incomplete Gamma function.
-
-Note, above `Q(a, x)` (`Igammac`) is the upper regularized complete
-Gamma function.
-)doc");
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
 
 REGISTER_OP("Zeta")
     .Input("x: T")
     .Input("q: T")
     .Output("z: T")
     .Attr("T: {float, double}")
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
-    .Doc(R"doc(
-Compute the Hurwitz zeta function \\(\zeta(x, q)\\).
-
-The Hurwitz zeta function is defined as:
-
-
-\\(\zeta(x, q) = \sum_{n=0}^{\infty} (q + n)^{-x}\\)
-
-)doc");
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
 
 REGISTER_OP("Polygamma")
     .Input("a: T")
     .Input("x: T")
     .Output("z: T")
     .Attr("T: {float, double}")
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
-    .Doc(R"doc(
-Compute the polygamma function \\(\psi^{(n)}(x)\\).
-
-The polygamma function is defined as:
-
-
-\\(\psi^{(n)}(x) = \frac{d^n}{dx^n} \psi(x)\\)
-
-where \\(\psi(x)\\) is the digamma function.
-)doc");
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
 
 REGISTER_OP("Atan2")
     .Input("y: T")
     .Input("x: T")
     .Output("z: T")
-    .Attr("T: {float, double}")
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
-    .Doc(R"doc(
-Computes arctangent of `y/x` element-wise, respecting signs of the arguments.
-This is the angle \( \theta \in [-\pi, \pi] \) such that
-\[ x = r \cos(\theta) \]
-and
-\[ y = r \sin(\theta) \]
-where \(r = \sqrt(x^2 + y^2) \).
-)doc");
+    .Attr("T: {bfloat16, float, double}")
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
 
 REGISTER_OP("Betainc")
     .Input("a: T")
@@ -994,24 +543,7 @@ REGISTER_OP("Betainc")
 
       c->set_output(0, output);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Compute the regularized incomplete beta integral \\(I_x(a, b)\\).
-
-The regularized incomplete beta integral is defined as:
-
-
-\\(I_x(a, b) = \frac{B(x; a, b)}{B(a, b)}\\)
-
-where
-
-
-\\(B(x; a, b) = \int_0^x t^{a-1} (1 - t)^{b-1} dt\\)
-
-
-is the incomplete beta function and \\(B(a, b)\\) is the *complete*
-beta function.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 
@@ -1024,74 +556,32 @@ beta function.
       .Attr("T: realnumbertype") \
       .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
 
-REGISTER_OP("Less")
-    .COMPARISON()
-    .Doc(R"doc(
-Returns the truth value of (x < y) element-wise.
+REGISTER_OP("Less").COMPARISON();
 
-*NOTE*: `Less` supports broadcasting. More about broadcasting
-[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-)doc");
+REGISTER_OP("LessEqual").COMPARISON();
 
-REGISTER_OP("LessEqual")
-    .COMPARISON()
-    .Doc(R"doc(
-Returns the truth value of (x <= y) element-wise.
+REGISTER_OP("Greater").COMPARISON();
 
-*NOTE*: `LessEqual` supports broadcasting. More about broadcasting
-[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-)doc");
-
-REGISTER_OP("Greater")
-    .COMPARISON()
-    .Doc(R"doc(
-Returns the truth value of (x > y) element-wise.
-
-*NOTE*: `Greater` supports broadcasting. More about broadcasting
-[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-)doc");
-
-REGISTER_OP("GreaterEqual")
-    .COMPARISON()
-    .Doc(R"doc(
-Returns the truth value of (x >= y) element-wise.
-
-*NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting
-[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-)doc");
+REGISTER_OP("GreaterEqual").COMPARISON();
 
 #undef COMPARISON
 
 // --------------------------------------------------------------------------
 
-#define EQUALITY_COMPARISON()                                           \
-  Input("x: T")                                                         \
-      .Input("y: T")                                                    \
-      .Output("z: bool")                                                \
-      .SetIsCommutative()                                               \
-      .Attr(                                                            \
-          "T: {half, float, double, uint8, int8, int16, int32, int64, " \
-          "complex64, "                                                 \
-          "quint8, qint8, qint32, string, bool, complex128}")           \
+#define EQUALITY_COMPARISON()                                              \
+  Input("x: T")                                                            \
+      .Input("y: T")                                                       \
+      .Output("z: bool")                                                   \
+      .SetIsCommutative()                                                  \
+      .Attr(                                                               \
+          "T: {half, bfloat16, float, double, uint8, int8, int16, int32, " \
+          "int64, complex64, quint8, qint8, qint32, string, bool, "        \
+          "complex128}")                                                   \
       .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
 
-REGISTER_OP("Equal")
-    .EQUALITY_COMPARISON()
-    .Doc(R"doc(
-Returns the truth value of (x == y) element-wise.
+REGISTER_OP("Equal").EQUALITY_COMPARISON();
 
-*NOTE*: `Equal` supports broadcasting. More about broadcasting
-[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-)doc");
-
-REGISTER_OP("NotEqual")
-    .EQUALITY_COMPARISON()
-    .Doc(R"doc(
-Returns the truth value of (x != y) element-wise.
-
-*NOTE*: `NotEqual` supports broadcasting. More about broadcasting
-[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-)doc");
+REGISTER_OP("NotEqual").EQUALITY_COMPARISON();
 
 #undef EQUALITY_COMPARISON
 
@@ -1102,20 +592,14 @@ REGISTER_OP("ApproximateEqual")
     .SetIsCommutative()
     .Attr("T: numbertype")
     .Attr("tolerance: float = 0.00001")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Returns the truth value of abs(x-y) < tolerance element-wise.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 // --------------------------------------------------------------------------
 
 REGISTER_OP("LogicalNot")
     .Input("x: bool")
     .Output("y: bool")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Returns the truth value of NOT x element-wise.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 #define BINARY_LOGICAL()  \
   Input("x: bool")        \
@@ -1124,23 +608,9 @@ Returns the truth value of NOT x element-wise.
       .SetIsCommutative() \
       .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
 
-REGISTER_OP("LogicalAnd")
-    .BINARY_LOGICAL()
-    .Doc(R"doc(
-Returns the truth value of x AND y element-wise.
-
-*NOTE*: `LogicalAnd` supports broadcasting. More about broadcasting
-[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-)doc");
+REGISTER_OP("LogicalAnd").BINARY_LOGICAL();
 
-REGISTER_OP("LogicalOr")
-    .BINARY_LOGICAL()
-    .Doc(R"doc(
-Returns the truth value of x OR y element-wise.
-
-*NOTE*: `LogicalOr` supports broadcasting. More about broadcasting
-[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-)doc");
+REGISTER_OP("LogicalOr").BINARY_LOGICAL();
 
 #undef BINARY_LOGICAL
 
@@ -1233,55 +703,7 @@ REGISTER_OP("Select")
       c->set_output(0, data);
 
       return Status::OK();
-    })
-    .Doc(R"doc(
-Selects elements from `t` or `e`, depending on `condition`.
-
-The `t`, and `e` tensors must all have the same shape, and the
-output will also have that shape.
-
-The `condition` tensor must be a scalar if `t` and `e` are scalars.
-If `t` and `e` are vectors or higher rank, then `condition` must be either a
-scalar, a vector with size matching the first dimension of `t`, or must have
-the same shape as `t`.
-
-The `condition` tensor acts as a mask that chooses, based on the value at each
-element, whether the corresponding element / row in the output should be
-taken from `t` (if true) or `e` (if false).
-
-If `condition` is a vector and `t` and `e` are higher rank matrices, then
-it chooses which row (outer dimension) to copy from `t` and `e`.
-If `condition` has the same shape as `t` and `e`, then it chooses which
-element to copy from `t` and `e`.
-
-For example:
-
-```python
-# 'condition' tensor is [[True,  False]
-#                        [False, True]]
-# 't' is [[1, 2],
-#         [3, 4]]
-# 'e' is [[5, 6],
-#         [7, 8]]
-select(condition, t, e)  # => [[1, 6], [7, 4]]
-
-
-# 'condition' tensor is [True, False]
-# 't' is [[1, 2],
-#         [3, 4]]
-# 'e' is [[5, 6],
-#         [7, 8]]
-select(condition, t, e) ==> [[1, 2],
-                             [7, 8]]
-
-```
-
-t:= A `Tensor` which may have the same shape as `condition`.
-    If `condition` is rank 1, `t` may have higher rank,
-    but its first dimension must match the size of `condition`.
-e:= A `Tensor` with the same type and shape as `t`.
-output:= A `Tensor` with the same type and shape as `t` and `e`.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 
@@ -1291,22 +713,8 @@ REGISTER_OP("MatMul")
     .Output("product: T")
     .Attr("transpose_a: bool = false")
     .Attr("transpose_b: bool = false")
-    .Attr("T: {half, float, double, int32, complex64, complex128}")
-    .SetShapeFn(shape_inference::MatMulShape)
-    .Doc(R"doc(
-Multiply the matrix "a" by the matrix "b".
-
-The inputs must be two-dimensional matrices and the inner dimension of
-"a" (after being transposed if transpose_a is true) must match the
-outer dimension of "b" (after being transposed if transposed_b is
-true).
-
-*Note*: The default kernel implementation for MatMul on GPUs uses
-cublas.
-
-transpose_a: If true, "a" is transposed before multiplication.
-transpose_b: If true, "b" is transposed before multiplication.
-)doc");
+    .Attr("T: {half, bfloat16, float, double, int32, complex64, complex128}")
+    .SetShapeFn(shape_inference::MatMulShape);
 
 REGISTER_OP("SparseMatMul")
     .Input("a: Ta")
@@ -1318,18 +726,7 @@ REGISTER_OP("SparseMatMul")
     .Attr("b_is_sparse: bool = false")
     .Attr("Ta: {float, bfloat16} = DT_FLOAT")
     .Attr("Tb: {float, bfloat16} = DT_FLOAT")
-    .SetShapeFn(shape_inference::MatMulShape)
-    .Doc(R"doc(
-Multiply matrix "a" by matrix "b".
-
-The inputs must be two-dimensional matrices and the inner dimension of "a" must
-match the outer dimension of "b". This op is optimized for the case where at
-least one of "a" or "b" is sparse. The breakeven for using this versus a dense
-matrix multiply on one platform was 30% zero values in the sparse matrix.
-
-The gradient computation of this operation will only take advantage of sparsity
-in the input gradient when that gradient comes from a Relu.
-)doc");
+    .SetShapeFn(shape_inference::MatMulShape);
 
 // --------------------------------------------------------------------------
 
@@ -1342,21 +739,7 @@ REGISTER_OP("Sum")
     .Attr("keep_dims: bool = false")
     .Attr("T: numbertype")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(shape_inference::ReductionShape)
-    .Doc(R"doc(
-Computes the sum of elements across dimensions of a tensor.
-
-Reduces `input` along the dimensions given in `reduction_indices`. Unless
-`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-`reduction_indices`. If `keep_dims` is true, the reduced dimensions are
-retained with length 1.
-
-input: The tensor to reduce.
-reduction_indices: The dimensions to reduce. Must be in the range
-  `[-rank(input), rank(input))`.
-keep_dims: If true, retain reduced dimensions with length 1.
-output: The reduced tensor.
-)doc");
+    .SetShapeFn(shape_inference::ReductionShape);
 
 REGISTER_OP("Mean")
     .Input("input: T")
@@ -1365,21 +748,7 @@ REGISTER_OP("Mean")
     .Attr("keep_dims: bool = false")
     .Attr("T: numbertype")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(shape_inference::ReductionShape)
-    .Doc(R"doc(
-Computes the mean of elements across dimensions of a tensor.
-
-Reduces `input` along the dimensions given in `reduction_indices`. Unless
-`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-`reduction_indices`. If `keep_dims` is true, the reduced dimensions are
-retained with length 1.
-
-input: The tensor to reduce.
-reduction_indices: The dimensions to reduce. Must be in the range
-  `[-rank(input), rank(input))`.
-keep_dims: If true, retain reduced dimensions with length 1.
-output: The reduced tensor.
-)doc");
+    .SetShapeFn(shape_inference::ReductionShape);
 
 REGISTER_OP("Prod")
     .Input("input: T")
@@ -1388,21 +757,7 @@ REGISTER_OP("Prod")
     .Attr("keep_dims: bool = false")
     .Attr("T: numbertype")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(shape_inference::ReductionShape)
-    .Doc(R"doc(
-Computes the product of elements across dimensions of a tensor.
-
-Reduces `input` along the dimensions given in `reduction_indices`. Unless
-`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-`reduction_indices`. If `keep_dims` is true, the reduced dimensions are
-retained with length 1.
-
-input: The tensor to reduce.
-reduction_indices: The dimensions to reduce. Must be in the range
-  `[-rank(input), rank(input))`.
-keep_dims: If true, retain reduced dimensions with length 1.
-output: The reduced tensor.
-)doc");
+    .SetShapeFn(shape_inference::ReductionShape);
 
 REGISTER_OP("Min")
     .Input("input: T")
@@ -1411,21 +766,7 @@ REGISTER_OP("Min")
     .Attr("keep_dims: bool = false")
     .Attr("T: numbertype")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(shape_inference::ReductionShape)
-    .Doc(R"doc(
-Computes the minimum of elements across dimensions of a tensor.
-
-Reduces `input` along the dimensions given in `reduction_indices`. Unless
-`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-`reduction_indices`. If `keep_dims` is true, the reduced dimensions are
-retained with length 1.
-
-input: The tensor to reduce.
-reduction_indices: The dimensions to reduce. Must be in the range
-  `[-rank(input), rank(input))`.
-keep_dims: If true, retain reduced dimensions with length 1.
-output: The reduced tensor.
-)doc");
+    .SetShapeFn(shape_inference::ReductionShape);
 
 REGISTER_OP("Max")
     .Input("input: T")
@@ -1434,21 +775,7 @@ REGISTER_OP("Max")
     .Attr("keep_dims: bool = false")
     .Attr("T: numbertype")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(shape_inference::ReductionShape)
-    .Doc(R"doc(
-Computes the maximum of elements across dimensions of a tensor.
-
-Reduces `input` along the dimensions given in `reduction_indices`. Unless
-`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-`reduction_indices`. If `keep_dims` is true, the reduced dimensions are
-retained with length 1.
-
-input: The tensor to reduce.
-reduction_indices: The dimensions to reduce. Must be in the range
-  `[-rank(input), rank(input))`.
-keep_dims: If true, retain reduced dimensions with length 1.
-output: The reduced tensor.
-)doc");
+    .SetShapeFn(shape_inference::ReductionShape);
 
 namespace {
 
@@ -1516,16 +843,7 @@ REGISTER_OP("ArgMax")
     .Attr("T: numbertype")
     .Attr("Tidx: {int32, int64} = DT_INT32")
     .Attr("output_type: {int32, int64} = DT_INT64")
-    .SetShapeFn(ArgOpShape)
-    .Doc(R"doc(
-Returns the index with the largest value across dimensions of a tensor.
-
-Note that in case of ties the identity of the return value is not guaranteed.
-
-dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`.
-  Describes which dimension of the input Tensor to reduce across. For vectors,
-  use dimension = 0.
-)doc");
+    .SetShapeFn(ArgOpShape);
 
 REGISTER_OP("ArgMin")
     .Input("input: T")
@@ -1534,16 +852,7 @@ REGISTER_OP("ArgMin")
     .Attr("T: numbertype")
     .Attr("Tidx: {int32, int64} = DT_INT32")
     .Attr("output_type: {int32, int64} = DT_INT64")
-    .SetShapeFn(ArgOpShape)
-    .Doc(R"doc(
-Returns the index with the smallest value across dimensions of a tensor.
-
-Note that in case of ties the identity of the return value is not guaranteed.
-
-dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`.
-  Describes which dimension of the input Tensor to reduce across. For vectors,
-  use dimension = 0.
-)doc");
+    .SetShapeFn(ArgOpShape);
 
 namespace {
 
@@ -1587,40 +896,79 @@ Status SparseSegmentReductionShapeFn(InferenceContext* c) {
   return Status::OK();
 }
 
-Status SparseSegmentReductionGradShapeFn(InferenceContext* c) {
+Status SparseSegmentReductionGradShapeFn(InferenceContext* c) {
+  ShapeHandle data_shape;
+  TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), 1, &data_shape));
+
+  ShapeHandle indices_shape;
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &indices_shape));
+
+  // indices and segment_ids should merge cleanly.
+  ShapeHandle unused;
+  TF_RETURN_IF_ERROR(c->Merge(c->input(2), indices_shape, &unused));
+
+  // output_dim0 should be a scalar
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
+
+  ShapeHandle subshape;
+  TF_RETURN_IF_ERROR(c->Subshape(data_shape, 1, &subshape));
+
+  const Tensor* dim0 = c->input_tensor(3);
+  ShapeHandle dim0_shape;
+  if (dim0 == nullptr) {
+    // We don't have the value at inference time, so the output
+    // shape is unknown.
+    dim0_shape = c->Vector(InferenceContext::kUnknownDim);
+  } else {
+    auto dim0_value = dim0->scalar<int32>()();
+    if (dim0_value < 0) {
+      return errors::InvalidArgument(
+          "Cannot specify a negative value for output_dim0");
+    }
+    dim0_shape = c->Vector(dim0_value);
+  }
+
+  ShapeHandle out;
+  TF_RETURN_IF_ERROR(c->Concatenate(dim0_shape, subshape, &out));
+  c->set_output(0, out);
+  return Status::OK();
+}
+
+Status SparseSegmentReductionWithNumSegmentsShapeFn(InferenceContext* c) {
   ShapeHandle data_shape;
   TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), 1, &data_shape));
 
   ShapeHandle indices_shape;
   TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &indices_shape));
 
+  ShapeHandle segment_ids_shape;
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &segment_ids_shape));
+
+  ShapeHandle num_segments_shape;
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &num_segments_shape));
+
   // indices and segment_ids should merge cleanly.
   ShapeHandle unused;
-  TF_RETURN_IF_ERROR(c->Merge(c->input(2), indices_shape, &unused));
-
-  // output_dim0 should be a scalar
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
+  TF_RETURN_IF_ERROR(c->Merge(indices_shape, segment_ids_shape, &unused));
 
   ShapeHandle subshape;
   TF_RETURN_IF_ERROR(c->Subshape(data_shape, 1, &subshape));
 
+  ShapeHandle out;
   const Tensor* dim0 = c->input_tensor(3);
-  ShapeHandle dim0_shape;
   if (dim0 == nullptr) {
     // We don't have the value at inference time, so the output
     // shape is unknown.
-    dim0_shape = c->Vector(InferenceContext::kUnknownDim);
+    TF_RETURN_IF_ERROR(c->Concatenate(c->Vector(InferenceContext::kUnknownDim),
+                                      subshape, &out));
   } else {
     auto dim0_value = dim0->scalar<int32>()();
     if (dim0_value < 0) {
       return errors::InvalidArgument(
-          "Cannot specify a negative value for output_dim0");
+          "Cannot specify a negative value for num_segments");
     }
-    dim0_shape = c->Vector(dim0_value);
+    TF_RETURN_IF_ERROR(c->Concatenate(c->Vector(dim0_value), subshape, &out));
   }
-
-  ShapeHandle out;
-  TF_RETURN_IF_ERROR(c->Concatenate(dim0_shape, subshape, &out));
   c->set_output(0, out);
   return Status::OK();
 }
@@ -1663,29 +1011,7 @@ REGISTER_OP("SegmentSum")
     .Output("output: T")
     .Attr("T: numbertype")
     .Attr("Tindices: {int32,int64}")
-    .SetShapeFn(SegmentReductionShapeFn)
-    .Doc(R"doc(
-Computes the sum along segments of a tensor.
-
-Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
-segments.
-
-Computes a tensor such that
-\\(output_i = \sum_j data_j\\) where sum is over `j` such
-that `segment_ids[j] == i`.
-
-If the sum is empty for a given segment ID `i`, `output[i] = 0`.
-
-<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="https://www.tensorflow.org/images/SegmentSum.png" alt>
-</div>
-
-segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
-first dimension.  Values should be sorted and can be repeated.
-
-output: Has same shape as data, except for dimension 0 which
-  has size `k`, the number of segments.
-)doc");
+    .SetShapeFn(SegmentReductionShapeFn);
 
 REGISTER_OP("SegmentMean")
     .Input("data: T")
@@ -1693,30 +1019,7 @@ REGISTER_OP("SegmentMean")
     .Output("output: T")
     .Attr("T: realnumbertype")
     .Attr("Tindices: {int32,int64}")
-    .SetShapeFn(SegmentReductionShapeFn)
-    .Doc(R"doc(
-Computes the mean along segments of a tensor.
-
-Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
-segments.
-
-Computes a tensor such that
-\\(output_i = \frac{\sum_j data_j}{N}\\) where `mean` is
-over `j` such that `segment_ids[j] == i` and `N` is the total number of
-values summed.
-
-If the mean is empty for a given segment ID `i`, `output[i] = 0`.
-
-<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="https://www.tensorflow.org/images/SegmentMean.png" alt>
-</div>
-
-segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
-first dimension.  Values should be sorted and can be repeated.
-
-output: Has same shape as data, except for dimension 0 which
-  has size `k`, the number of segments.
-)doc");
+    .SetShapeFn(SegmentReductionShapeFn);
 
 REGISTER_OP("SegmentProd")
     .Input("data: T")
@@ -1724,29 +1027,7 @@ REGISTER_OP("SegmentProd")
     .Output("output: T")
     .Attr("T: numbertype")
     .Attr("Tindices: {int32,int64}")
-    .SetShapeFn(SegmentReductionShapeFn)
-    .Doc(R"doc(
-Computes the product along segments of a tensor.
-
-Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
-segments.
-
-Computes a tensor such that
-\\(output_i = \prod_j data_j\\) where the product is over `j` such
-that `segment_ids[j] == i`.
-
-If the product is empty for a given segment ID `i`, `output[i] = 1`.
-
-<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="https://www.tensorflow.org/images/SegmentProd.png" alt>
-</div>
-
-segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
-first dimension.  Values should be sorted and can be repeated.
-
-output: Has same shape as data, except for dimension 0 which
-  has size `k`, the number of segments.
-)doc");
+    .SetShapeFn(SegmentReductionShapeFn);
 
 REGISTER_OP("SegmentMin")
     .Input("data: T")
@@ -1754,29 +1035,7 @@ REGISTER_OP("SegmentMin")
     .Output("output: T")
     .Attr("T: realnumbertype")
     .Attr("Tindices: {int32,int64}")
-    .SetShapeFn(SegmentReductionShapeFn)
-    .Doc(R"doc(
-Computes the minimum along segments of a tensor.
-
-Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
-segments.
-
-Computes a tensor such that
-\\(output_i = \min_j(data_j)\\) where `min` is over `j` such
-that `segment_ids[j] == i`.
-
-If the min is empty for a given segment ID `i`, `output[i] = 0`.
-
-<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="https://www.tensorflow.org/images/SegmentMin.png" alt>
-</div>
-
-segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
-first dimension.  Values should be sorted and can be repeated.
-
-output: Has same shape as data, except for dimension 0 which
-  has size `k`, the number of segments.
-)doc");
+    .SetShapeFn(SegmentReductionShapeFn);
 
 REGISTER_OP("SegmentMax")
     .Input("data: T")
@@ -1784,103 +1043,28 @@ REGISTER_OP("SegmentMax")
     .Output("output: T")
     .Attr("T: realnumbertype")
     .Attr("Tindices: {int32,int64}")
-    .SetShapeFn(SegmentReductionShapeFn)
-    .Doc(R"doc(
-Computes the maximum along segments of a tensor.
-
-Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
-segments.
-
-Computes a tensor such that
-\\(output_i = \max_j(data_j)\\) where `max` is over `j` such
-that `segment_ids[j] == i`.
-
-If the max is empty for a given segment ID `i`, `output[i] = 0`.
-
-<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="https://www.tensorflow.org/images/SegmentMax.png" alt>
-</div>
-
-segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
-first dimension.  Values should be sorted and can be repeated.
-
-output: Has same shape as data, except for dimension 0 which
-  has size `k`, the number of segments.
-)doc");
+    .SetShapeFn(SegmentReductionShapeFn);
 
 REGISTER_OP("UnsortedSegmentSum")
     .Input("data: T")
     .Input("segment_ids: Tindices")
-    .Input("num_segments: int32")
+    .Input("num_segments: Tnumsegments")
     .Output("output: T")
     .Attr("T: numbertype")
     .Attr("Tindices: {int32,int64}")
-    .SetShapeFn(UnsortedSegmentReductionShapeFn)
-    .Doc(R"doc(
-Computes the sum along segments of a tensor.
-
-Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
-segments.
-
-Computes a tensor such that
-`(output[i] = sum_{j...} data[j...]` where the sum is over tuples `j...` such
-that `segment_ids[j...] == i`.  Unlike `SegmentSum`, `segment_ids`
-need not be sorted and need not cover all values in the full
-range of valid values.
-
-If the sum is empty for a given segment ID `i`, `output[i] = 0`.
-If the given segment ID `i` is negative, the value is dropped and will not be
-added to the sum of the segment.
-
-`num_segments` should equal the number of distinct segment IDs.
-
-<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="https://www.tensorflow.org/images/UnsortedSegmentSum.png" alt>
-</div>
-
-segment_ids: A tensor whose shape is a prefix of `data.shape`.
-
-output: Has same shape as data, except for the first `segment_ids.rank`
-  dimensions, which are replaced with a single dimension which has size
-  `num_segments`.
-
-)doc");
+    .Attr("Tnumsegments: {int32,int64} = DT_INT32")
+    .SetShapeFn(UnsortedSegmentReductionShapeFn);
 
 REGISTER_OP("UnsortedSegmentMax")
     .Input("data: T")
     .Input("segment_ids: Tindices")
-    .Input("num_segments: int32")
+    .Input("num_segments: Tnumsegments")
     .Output("output: T")
     .Attr("T: realnumbertype")
     .Attr("Tindices: {int32,int64}")
-    .SetShapeFn(UnsortedSegmentReductionShapeFn)
-    .Doc(R"doc(
-Computes the Max along segments of a tensor.
-
-Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
-segments.
-
-This operator is similar to the [unsorted segment sum operator](../../../api_docs/python/math_ops.md#UnsortedSegmentSum).
-Instead of computing the sum over segments, it computes the maximum
-such that:
-
-\\(output_i = \max_j data_j\\) where max is over `j` such
-that `segment_ids[j] == i`.
+    .Attr("Tnumsegments: {int32,int64} = DT_INT32")
+    .SetShapeFn(UnsortedSegmentReductionShapeFn);
 
-If the maximum is empty for a given segment ID `i`, it outputs the smallest possible value for specific numeric type,
- `output[i] = numeric_limits<T>::min()`.
-
-<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="https://www.tensorflow.org/images/UnsortedSegmentMax.png" alt>
-</div>
-
-segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
-first dimension.
-
-output: Has same shape as data, except for dimension 0 which
-has size `num_segments`.
-
-)doc");
 REGISTER_OP("SparseSegmentSum")
     .Input("data: T")
     .Input("indices: Tidx")
@@ -1888,46 +1072,18 @@ REGISTER_OP("SparseSegmentSum")
     .Output("output: T")
     .Attr("T: realnumbertype")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(SparseSegmentReductionShapeFn)
-    .Doc(R"doc(
-Computes the sum along sparse segments of a tensor.
-
-Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
-segments.
-
-Like `SegmentSum`, but `segment_ids` can have rank less than `data`'s first
-dimension, selecting a subset of dimension 0, specified by `indices`.
+    .SetShapeFn(SparseSegmentReductionShapeFn);
 
-For example:
-
-```python
-c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
-
-# Select two rows, one segment.
-tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 0]))
-# => [[0 0 0 0]]
-
-# Select two rows, two segment.
-tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 1]))
-# => [[ 1  2  3  4]
-#     [-1 -2 -3 -4]]
-
-# Select all rows, two segments.
-tf.sparse_segment_sum(c, tf.constant([0, 1, 2]), tf.constant([0, 0, 1]))
-# => [[0 0 0 0]
-#     [5 6 7 8]]
-
-# Which is equivalent to:
-tf.segment_sum(c, tf.constant([0, 0, 1]))
-```
-
-indices: A 1-D tensor. Has same rank as `segment_ids`.
-
-segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
-
-output: Has same shape as data, except for dimension 0 which
-  has size `k`, the number of segments.
-)doc");
+REGISTER_OP("SparseSegmentSumWithNumSegments")
+    .Input("data: T")
+    .Input("indices: Tidx")
+    .Input("segment_ids: int32")
+    .Input("num_segments: Tnumsegments")
+    .Output("output: T")
+    .Attr("T: realnumbertype")
+    .Attr("Tidx: {int32, int64} = DT_INT32")
+    .Attr("Tnumsegments: {int32,int64} = DT_INT32")
+    .SetShapeFn(SparseSegmentReductionWithNumSegmentsShapeFn);
 
 REGISTER_OP("SparseSegmentMean")
     .Input("data: T")
@@ -1936,24 +1092,18 @@ REGISTER_OP("SparseSegmentMean")
     .Output("output: T")
     .Attr("T: {float, double}")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(SparseSegmentReductionShapeFn)
-    .Doc(R"doc(
-Computes the mean along sparse segments of a tensor.
+    .SetShapeFn(SparseSegmentReductionShapeFn);
 
-Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
-segments.
-
-Like `SegmentMean`, but `segment_ids` can have rank less than `data`'s first
-dimension, selecting a subset of dimension 0, specified by `indices`.
-
-indices: A 1-D tensor. Has same rank as `segment_ids`.
-
-segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
-
-output: Has same shape as data, except for dimension 0 which
-  has size `k`, the number of segments.
-
-)doc");
+REGISTER_OP("SparseSegmentMeanWithNumSegments")
+    .Input("data: T")
+    .Input("indices: Tidx")
+    .Input("segment_ids: int32")
+    .Input("num_segments: Tnumsegments")
+    .Output("output: T")
+    .Attr("T: {float, double}")
+    .Attr("Tidx: {int32, int64} = DT_INT32")
+    .Attr("Tnumsegments: {int32,int64} = DT_INT32")
+    .SetShapeFn(SparseSegmentReductionWithNumSegmentsShapeFn);
 
 REGISTER_OP("SparseSegmentMeanGrad")
     .Input("grad: T")
@@ -1963,18 +1113,7 @@ REGISTER_OP("SparseSegmentMeanGrad")
     .Output("output: T")
     .Attr("T: {float, double}")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(SparseSegmentReductionGradShapeFn)
-    .Doc(R"doc(
-Computes gradients for SparseSegmentMean.
-
-Returns tensor "output" with same shape as grad, except for dimension 0 whose
-value is output_dim0.
-
-grad: gradient propagated to the SparseSegmentMean op.
-indices: indices passed to the corresponding SparseSegmentMean op.
-segment_ids: segment_ids passed to the corresponding SparseSegmentMean op.
-output_dim0: dimension 0 of "data" passed to SparseSegmentMean op.
-)doc");
+    .SetShapeFn(SparseSegmentReductionGradShapeFn);
 
 REGISTER_OP("SparseSegmentSqrtN")
     .Input("data: T")
@@ -1983,23 +1122,18 @@ REGISTER_OP("SparseSegmentSqrtN")
     .Output("output: T")
     .Attr("T: {float, double}")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(SparseSegmentReductionShapeFn)
-    .Doc(R"doc(
-Computes the sum along sparse segments of a tensor divided by the sqrt of N.
-
-N is the size of the segment being reduced.
-
-Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
-segments.
-
-indices: A 1-D tensor. Has same rank as `segment_ids`.
+    .SetShapeFn(SparseSegmentReductionShapeFn);
 
-segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
-
-output: Has same shape as data, except for dimension 0 which
-  has size `k`, the number of segments.
-
-)doc");
+REGISTER_OP("SparseSegmentSqrtNWithNumSegments")
+    .Input("data: T")
+    .Input("indices: Tidx")
+    .Input("segment_ids: int32")
+    .Input("num_segments: Tnumsegments")
+    .Output("output: T")
+    .Attr("T: {float, double}")
+    .Attr("Tidx: {int32, int64} = DT_INT32")
+    .Attr("Tnumsegments: {int32,int64} = DT_INT32")
+    .SetShapeFn(SparseSegmentReductionWithNumSegmentsShapeFn);
 
 REGISTER_OP("SparseSegmentSqrtNGrad")
     .Input("grad: T")
@@ -2009,18 +1143,7 @@ REGISTER_OP("SparseSegmentSqrtNGrad")
     .Output("output: T")
     .Attr("T: {float, double}")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(SparseSegmentReductionGradShapeFn)
-    .Doc(R"doc(
-Computes gradients for SparseSegmentSqrtN.
-
-Returns tensor "output" with same shape as grad, except for dimension 0 whose
-value is output_dim0.
-
-grad: gradient propagated to the SparseSegmentSqrtN op.
-indices: indices passed to the corresponding SparseSegmentSqrtN op.
-segment_ids: segment_ids passed to the corresponding SparseSegmentSqrtN op.
-output_dim0: dimension 0 of "data" passed to SparseSegmentSqrtN op.
-)doc");
+    .SetShapeFn(SparseSegmentReductionGradShapeFn);
 
 REGISTER_OP("All")
     .Input("input: bool")
@@ -2028,21 +1151,7 @@ REGISTER_OP("All")
     .Output("output: bool")
     .Attr("keep_dims: bool = false")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(shape_inference::ReductionShape)
-    .Doc(R"doc(
-Computes the "logical and" of elements across dimensions of a tensor.
-
-Reduces `input` along the dimensions given in `reduction_indices`. Unless
-`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-`reduction_indices`. If `keep_dims` is true, the reduced dimensions are
-retained with length 1.
-
-input: The tensor to reduce.
-reduction_indices: The dimensions to reduce. Must be in the range
-  `[-rank(input), rank(input))`.
-keep_dims: If true, retain reduced dimensions with length 1.
-output: The reduced tensor.
-)doc");
+    .SetShapeFn(shape_inference::ReductionShape);
 
 REGISTER_OP("Any")
     .Input("input: bool")
@@ -2050,21 +1159,7 @@ REGISTER_OP("Any")
     .Attr("keep_dims: bool = false")
     .Output("output: bool")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(shape_inference::ReductionShape)
-    .Doc(R"doc(
-Computes the "logical or" of elements across dimensions of a tensor.
-
-Reduces `input` along the dimensions given in `reduction_indices`. Unless
-`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-`reduction_indices`. If `keep_dims` is true, the reduced dimensions are
-retained with length 1.
-
-input: The tensor to reduce.
-reduction_indices: The dimensions to reduce. Must be in the range
-  `[-rank(input), rank(input))`.
-keep_dims: If true, retain reduced dimensions with length 1.
-output: The reduced tensor.
-)doc");
+    .SetShapeFn(shape_inference::ReductionShape);
 
 // --------------------------------------------------------------------------
 
@@ -2103,7 +1198,7 @@ REGISTER_OP("Range")
     .Input("limit: Tidx")
     .Input("delta: Tidx")
     .Output("output: Tidx")
-    .Attr("Tidx: {float, double, int32, int64} = DT_INT32")
+    .Attr("Tidx: {bfloat16, float, double, int32, int64} = DT_INT32")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle unused;
       TF_RETURN_WITH_CONTEXT_IF_ERROR(c->WithRank(c->input(0), 0, &unused),
@@ -2131,34 +1226,14 @@ REGISTER_OP("Range")
         return RangeSize<double>(start_t, limit_t, delta_t, c);
       }
       return Status::OK();
-    })
-    .Doc(R"doc(
-Creates a sequence of numbers.
-
-This operation creates a sequence of numbers that begins at `start` and
-extends by increments of `delta` up to but not including `limit`.
-
-For example:
-
-```
-# 'start' is 3
-# 'limit' is 18
-# 'delta' is 3
-tf.range(start, limit, delta) ==> [3, 6, 9, 12, 15]
-```
-
-start: 0-D (scalar). First entry in the sequence.
-limit: 0-D (scalar). Upper limit of sequence, exclusive.
-delta: 0-D (scalar). Optional. Default is 1. Number that increments `start`.
-output: 1-D.
-)doc");
+    });
 
 REGISTER_OP("LinSpace")
     .Input("start: T")
     .Input("stop: T")
     .Input("num: Tidx")
     .Output("output: T")
-    .Attr("T: {float, double}")
+    .Attr("T: {bfloat16, float, double}")
     .Attr("Tidx: {int32, int64} = DT_INT32")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle unused;
@@ -2183,25 +1258,7 @@ REGISTER_OP("LinSpace")
       if (num <= 0) return errors::InvalidArgument("Requires num > 0: ", num);
       c->set_output(0, c->Vector(num));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Generates values in an interval.
-
-A sequence of `num` evenly-spaced values are generated beginning at `start`.
-If `num > 1`, the values in the sequence increase by `stop - start / num - 1`,
-so that the last one is exactly `stop`.
-
-For example:
-
-```
-tf.linspace(10.0, 12.0, 3, name="linspace") => [ 10.0  11.0  12.0]
-```
-
-start: First entry in the range.
-stop: Last entry in the range.
-num: Number of values to generate.
-output: 1-D. The generated values.
-)doc");
+    });
 
 REGISTER_OP("Complex")
     .Input("real: T")
@@ -2209,120 +1266,34 @@ REGISTER_OP("Complex")
     .Output("out: Tout")
     .Attr("T: {float, double} = DT_FLOAT")
     .Attr("Tout: {complex64, complex128} = DT_COMPLEX64")
-    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
-    .Doc(R"doc(
-Converts two real numbers to a complex number.
-
-Given a tensor `real` representing the real part of a complex number, and a
-tensor `imag` representing the imaginary part of a complex number, this
-operation returns complex numbers elementwise of the form \\(a + bj\\), where
-*a* represents the `real` part and *b* represents the `imag` part.
-
-The input tensors `real` and `imag` must have the same shape.
-
-For example:
-
-```
-# tensor 'real' is [2.25, 3.25]
-# tensor `imag` is [4.75, 5.75]
-tf.complex(real, imag) ==> [[2.25 + 4.75j], [3.25 + 5.75j]]
-```
-)doc");
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
 
 REGISTER_OP("Real")
     .Input("input: T")
     .Output("output: Tout")
     .Attr("T: {complex64, complex128} = DT_COMPLEX64")
     .Attr("Tout: {float, double} = DT_FLOAT")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Returns the real part of a complex number.
-
-Given a tensor `input` of complex numbers, this operation returns a tensor of
-type `float` that is the real part of each element in `input`. All elements in
-`input` must be complex numbers of the form \\(a + bj\\), where *a* is the real
- part returned by this operation and *b* is the imaginary part.
-
-For example:
-
-```
-# tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
-tf.real(input) ==> [-2.25, 3.25]
-```
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("Imag")
     .Input("input: T")
     .Output("output: Tout")
     .Attr("T: {complex64, complex128} = DT_COMPLEX64")
     .Attr("Tout: {float, double} = DT_FLOAT")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Returns the imaginary part of a complex number.
-
-Given a tensor `input` of complex numbers, this operation returns a tensor of
-type `float` that is the imaginary part of each element in `input`. All
-elements in `input` must be complex numbers of the form \\(a + bj\\), where *a*
-is the real part and *b* is the imaginary part returned by this operation.
-
-For example:
-
-```
-# tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
-tf.imag(input) ==> [4.75, 5.75]
-```
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("Angle")
     .Input("input: T")
     .Output("output: Tout")
     .Attr("T: {complex64, complex128} = DT_COMPLEX64")
     .Attr("Tout: {float, double} = DT_FLOAT")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Returns the argument of a complex number.
-
-Given a tensor `input` of complex numbers, this operation returns a tensor of
-type `float` that is the argument of each element in `input`. All elements in
-`input` must be complex numbers of the form \\(a + bj\\), where *a*
-is the real part and *b* is the imaginary part.
-
-The argument returned by this operation is of the form \\(atan2(b, a)\\).
-
-For example:
-
-```
-# tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
-tf.angle(input) ==> [2.0132, 1.056]
-```
-
-@compatibility(numpy)
-Equivalent to np.angle.
-@end_compatibility
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("Conj")
     .Input("input: T")
     .Output("output: T")
     .Attr("T: {complex64, complex128, variant} = DT_COMPLEX64")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Returns the complex conjugate of a complex number.
-
-Given a tensor `input` of complex numbers, this operation returns a tensor of
-complex numbers that are the complex conjugate of each element in `input`. The
-complex numbers in `input` must be of the form \\(a + bj\\), where *a* is the
-real part and *b* is the imaginary part.
-
-The complex conjugate returned by this operation is of the form \\(a - bj\\).
-
-For example:
-
-```
-# tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
-tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j]
-```
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 // --------------------------------------------------------------------------
 
@@ -2349,18 +1320,7 @@ REGISTER_OP("Cross")
       }
       c->set_output(0, a_shape);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Compute the pairwise cross product.
-
-`a` and `b` must be the same shape; they can either be simple 3-element vectors,
-or any shape where the innermost dimension is 3. In the latter case, each pair
-of corresponding 3-element vectors is cross-multiplied independently.
-
-a: A tensor containing 3-element vectors.
-b: Another tensor, of same type and shape as `a`.
-product: Pairwise cross product of the vectors in `a` and `b`.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 
@@ -2381,33 +1341,7 @@ REGISTER_OP("HistogramFixedWidth")
         c->set_output(0, c->UnknownShapeOfRank(1));
       }
       return Status::OK();
-    })
-    .Doc(R"doc(
-Return histogram of values.
-
-Given the tensor `values`, this operation returns a rank 1 histogram counting
-the number of entries in `values` that fall into every bin.  The bins are
-equal width and determined by the arguments `value_range` and `nbins`.
-
-```python
-# Bins will be:  (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
-nbins = 5
-value_range = [0.0, 5.0]
-new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]
-
-with tf.get_default_session() as sess:
-  hist = tf.histogram_fixed_width(new_values, value_range, nbins=5)
-  variables.global_variables_initializer().run()
-  sess.run(hist) => [2, 1, 1, 0, 2]
-```
-
-values:  Numeric `Tensor`.
-value_range:  Shape [2] `Tensor` of same `dtype` as `values`.
-  values <= value_range[0] will be mapped to hist[0],
-  values >= value_range[1] will be mapped to hist[-1].
-nbins:  Scalar `int32 Tensor`.  Number of histogram bins.
-out: A 1-D `Tensor` holding histogram of values.
-)doc");
+    });
 
 REGISTER_OP("Bincount")
     .Input("arr: int32")
@@ -2418,27 +1352,7 @@ REGISTER_OP("Bincount")
     .SetShapeFn([](InferenceContext* c) {
       c->set_output(0, c->UnknownShapeOfRank(1));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Counts the number of occurrences of each value in an integer array.
-
-Outputs a vector with length `size` and the same dtype as `weights`. If
-`weights` are empty, then index `i` stores the number of times the value `i` is
-counted in `arr`. If `weights` are non-empty, then index `i` stores the sum of
-the value in `weights` at each index where the corresponding value in `arr` is
-`i`.
-
-Values in `arr` outside of the range [0, size) are ignored.
-
-arr: int32 `Tensor`.
-size: non-negative int32 scalar `Tensor`.
-weights: is an int32, int64, float32, or float64 `Tensor` with the same
-    shape as `arr`, or a length-0 `Tensor`, in which case it acts as all weights
-    equal to 1.
-
-bins: 1D `Tensor` with length equal to `size`. The counts or summed weights for
-    each value in the range [0, size).
-)doc");
+    });
 
 REGISTER_OP("Cumsum")
     .Input("x: T")
@@ -2448,47 +1362,7 @@ REGISTER_OP("Cumsum")
     .Output("out: T")
     .Attr("T: numbertype")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Compute the cumulative sum of the tensor `x` along `axis`.
-
-By default, this op performs an inclusive cumsum, which means that the first
-element of the input is identical to the first element of the output:
-
-```python
-tf.cumsum([a, b, c])  # => [a, a + b, a + b + c]
-```
-
-By setting the `exclusive` kwarg to `True`, an exclusive cumsum is
-performed instead:
-
-```python
-tf.cumsum([a, b, c], exclusive=True)  # => [0, a, a + b]
-```
-
-By setting the `reverse` kwarg to `True`, the cumsum is performed in the
-opposite direction:
-
-```python
-tf.cumsum([a, b, c], reverse=True)  # => [a + b + c, b + c, c]
-```
-
-This is more efficient than using separate `tf.reverse` ops.
-
-The `reverse` and `exclusive` kwargs can also be combined:
-
-```python
-tf.cumsum([a, b, c], exclusive=True, reverse=True)  # => [b + c, c, 0]
-```
-
-x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
-  `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
-  `complex128`, `qint8`, `quint8`, `qint32`, `half`.
-axis: A `Tensor` of type `int32` (default: 0). Must be in the range
-  `[-rank(x), rank(x))`.
-exclusive: If `True`, perform exclusive cumsum.
-reverse: A `bool` (default: False).
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("Cumprod")
     .Input("x: T")
@@ -2498,47 +1372,7 @@ REGISTER_OP("Cumprod")
     .Output("out: T")
     .Attr("T: numbertype")
     .Attr("Tidx: {int32, int64} = DT_INT32")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Compute the cumulative product of the tensor `x` along `axis`.
-
-By default, this op performs an inclusive cumprod, which means that the first
-element of the input is identical to the first element of the output:
-
-```python
-tf.cumprod([a, b, c])  # => [a, a * b, a * b * c]
-```
-
-By setting the `exclusive` kwarg to `True`, an exclusive cumprod is
-performed instead:
-
-```python
-tf.cumprod([a, b, c], exclusive=True)  # => [1, a, a * b]
-```
-
-By setting the `reverse` kwarg to `True`, the cumprod is performed in the
-opposite direction:
-
-```python
-tf.cumprod([a, b, c], reverse=True)  # => [a * b * c, b * c, c]
-```
-
-This is more efficient than using separate `tf.reverse` ops.
-
-The `reverse` and `exclusive` kwargs can also be combined:
-
-```python
-tf.cumprod([a, b, c], exclusive=True, reverse=True)  # => [b * c, c, 1]
-```
-
-x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
-  `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
-  `complex128`, `qint8`, `quint8`, `qint32`, `half`.
-axis: A `Tensor` of type `int32` (default: 0). Must be in the range
-  `[-rank(x), rank(x))`.
-exclusive: If `True`, perform exclusive cumprod.
-reverse: A `bool` (default: False).
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("QuantizedMatMul")
     .Input("a: T1")
@@ -2567,29 +1401,7 @@ REGISTER_OP("QuantizedMatMul")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    })
-    .Doc(R"doc(
-Perform a quantized matrix multiplication of  `a` by the matrix `b`.
-
-The inputs must be two-dimensional matrices and the inner dimension of
-`a` (after being transposed if `transpose_a` is non-zero) must match the
-outer dimension of `b` (after being transposed if `transposed_b` is
-non-zero).
-
-a: Must be a two-dimensional tensor.
-b: Must be a two-dimensional tensor.
-transpose_a: If true, `a` is transposed before multiplication.
-transpose_b: If true, `b` is transposed before multiplication.
-min_a: The float value that the lowest quantized `a` value represents.
-max_a: The float value that the highest quantized `a` value represents.
-min_b: The float value that the lowest quantized `b` value represents.
-max_b: The float value that the highest quantized `b` value represents.
-min_out: The float value that the lowest quantized output value represents.
-max_out: The float value that the highest quantized output value represents.
-Tactivation: The type of output produced by activation function
-    following this operation.
-
-)doc");
+    });
 
 REGISTER_OP("QuantizedMul")
     .Input("x: T1")
@@ -2610,20 +1422,7 @@ REGISTER_OP("QuantizedMul")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    })
-    .Doc(R"doc(
-Returns x * y element-wise, working on quantized buffers.
-
-min_x: The float value that the lowest quantized `x` value represents.
-max_x: The float value that the highest quantized `x` value represents.
-min_y: The float value that the lowest quantized `y` value represents.
-max_y: The float value that the highest quantized `y` value represents.
-min_z: The float value that the lowest quantized output value represents.
-max_z: The float value that the highest quantized output value represents.
-
-*NOTE*: `QuantizedMul` supports limited forms of broadcasting. More about
-broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-)doc");
+    });
 
 REGISTER_OP("QuantizedAdd")
     .Input("x: T1")
@@ -2644,20 +1443,7 @@ REGISTER_OP("QuantizedAdd")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    })
-    .Doc(R"doc(
-Returns x + y element-wise, working on quantized buffers.
-
-min_x: The float value that the lowest quantized `x` value represents.
-max_x: The float value that the highest quantized `x` value represents.
-min_y: The float value that the lowest quantized `y` value represents.
-max_y: The float value that the highest quantized `y` value represents.
-min_z: The float value that the lowest quantized output value represents.
-max_z: The float value that the highest quantized output value represents.
-
-*NOTE*: `QuantizedAdd` supports limited forms of broadcasting. More about
-broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-)doc");
+    });
 
 REGISTER_OP("QuantizeDownAndShrinkRange")
     .Input("input: Tinput")
@@ -2676,40 +1462,7 @@ REGISTER_OP("QuantizeDownAndShrinkRange")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    })
-    .Doc(R"doc(
-Convert the quantized 'input' tensor into a lower-precision 'output', using the
-actual distribution of the values to maximize the usage of the lower bit depth
-and adjusting the output min and max ranges accordingly.
-
-[input_min, input_max] are scalar floats that specify the range for the float
-interpretation of the 'input' data. For example, if input_min is -1.0f and
-input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
-value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
-
-This operator tries to squeeze as much precision as possible into an output with
-a lower bit depth by calculating the actual min and max values found in the
-data. For example, maybe that quint16 input has no values lower than 16,384 and
-none higher than 49,152. That means only half the range is actually needed, all
-the float interpretations are between -0.5f and 0.5f, so if we want to compress
-the data into a quint8 output, we can use that range rather than the theoretical
--1.0f to 1.0f that is suggested by the input min and max.
-
-In practice, this is most useful for taking output from operations like
-QuantizedMatMul that can produce higher bit-depth outputs than their inputs and
-may have large potential output ranges, but in practice have a distribution of
-input values that only uses a small fraction of the possible range. By feeding
-that output into this operator, we can reduce it from 32 bits down to 8 with
-minimal loss of accuracy.
-
-input_min: The float value that the minimum quantized input value represents.
-input_max: The float value that the maximum quantized input value represents.
-Tinput: The type of the input.
-output_min: The float value that the minimum quantized output value represents.
-output_max: The float value that the maximum quantized output value represents.
-out_type: The type of the output. Should be a lower bit depth than Tinput.
-
-)doc");
+    });
 
 REGISTER_OP("Requantize")
     .Input("input: Tinput")
@@ -2732,26 +1485,7 @@ REGISTER_OP("Requantize")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    })
-    .Doc(R"doc(
-Convert the quantized 'input' tensor into a lower-precision 'output', using the
-output range specified with 'requested_output_min' and 'requested_output_max'.
-
-[input_min, input_max] are scalar floats that specify the range for the float
-interpretation of the 'input' data. For example, if input_min is -1.0f and
-input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
-value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
-
-input_min: The float value that the minimum quantized input value represents.
-input_max: The float value that the maximum quantized input value represents.
-Tinput: The type of the input.
-requested_output_min: The float value that the minimum quantized output value represents.
-requested_output_max: The float value that the maximum quantized output value represents.
-output_min: The requested_output_min value is copied into this output.
-output_max: The requested_output_max value is copied into this output.
-out_type: The type of the output. Should be a lower bit depth than Tinput.
-
-)doc");
+    });
 
 REGISTER_OP("CompareAndBitpack")
     .Input("input: T")
@@ -2777,39 +1511,7 @@ REGISTER_OP("CompareAndBitpack")
       c->set_output(0, output);
 
       return Status::OK();
-    })
-    .Doc(R"doc(
-Compare values of `input` to `threshold` and pack resulting bits into a `uint8`.
-
-Each comparison returns a boolean `true` (if `input_value > threshold`)
-or and `false` otherwise.
-
-This operation is useful for Locality-Sensitive-Hashing (LSH) and other
-algorithms that use hashing approximations of cosine and `L2` distances;
-codes can be generated from an input via:
-
-```python
-codebook_size = 50
-codebook_bits = codebook_size * 32
-codebook = tf.get_variable('codebook', [x.shape[-1].value, codebook_bits],
-                           dtype=x.dtype,
-                           initializer=tf.orthogonal_initializer())
-codes = compare_and_threshold(tf.matmul(x, codebook), threshold=0.)
-codes = tf.bitcast(codes, tf.int32)  # go from uint8 to int32
-# now codes has shape x.shape[:-1] + [codebook_size]
-```
-
-**NOTE**: Currently, the innermost dimension of the tensor must be divisible
-by 8.
-
-Given an `input` shaped `[s0, s1, ..., s_n]`, the output is
-a `uint8` tensor shaped `[s0, s1, ..., s_n / 8]`.
-
-input: Values to compare against `threshold` and bitpack.
-threshold: Threshold to compare against.
-T: The type of the input and threshold.
-output: The bitpacked comparisons.
-)doc");
+    });
 
 REGISTER_OP("RequantizationRange")
     .Input("input: Tinput")
@@ -2825,20 +1527,7 @@ REGISTER_OP("RequantizationRange")
       c->set_output(0, c->Scalar());
       c->set_output(1, c->Scalar());
       return Status::OK();
-    })
-    .Doc(R"doc(
-Given a quantized tensor described by (input, input_min, input_max), outputs a
-range that covers the actual values present in that tensor.  This op is
-typically used to produce the requested_output_min and requested_output_max for
-Requantize.
-
-input_min: The float value that the minimum quantized input value represents.
-input_max: The float value that the maximum quantized input value represents.
-Tinput: The type of the input.
-output_min: The computed min output.
-output_max: the computed max output.
-
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 
@@ -2847,29 +1536,7 @@ REGISTER_OP("Bucketize")
     .Output("output: int32")
     .Attr("T: {int32, int64, float, double}")
     .Attr("boundaries: list(float)")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Bucketizes 'input' based on 'boundaries'.
-
-For example, if the inputs are
-    boundaries = [0, 10, 100]
-    input = [[-5, 10000]
-             [150,   10]
-             [5,    100]]
-
-then the output will be
-    output = [[0, 3]
-              [3, 2]
-              [1, 3]]
-
-input: Any shape of Tensor contains with int or float type.
-boundaries: A sorted list of floats gives the boundary of the buckets.
-output: Same shape with 'input', each value of input replaced with bucket index.
-
-@compatibility(numpy)
-Equivalent to np.digitize.
-@end_compatibility
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 #ifdef INTEL_MKL
 REGISTER_OP("_MklAddN")
diff --git a/tensorflow/core/ops/math_ops_test.cc b/tensorflow/core/ops/math_ops_test.cc
index 3dfa776d26f53c5f341332b3a2bdf5fd95067049..ca3772e6f89805b70f05f1c9fd5e36ee99f2d510 100644
--- a/tensorflow/core/ops/math_ops_test.cc
+++ b/tensorflow/core/ops/math_ops_test.cc
@@ -522,7 +522,7 @@ TEST(MathOpsTest, Cross_ShapeFn) {
   INFER_ERROR("Dimension 0 in both shapes must be equal, but", op, "[3];[5]");
   INFER_ERROR("Dimension must be 3 but", op, "[3,5];[3,5]");
 
-  INFER_OK(op, "?;?", "?");
+  INFER_OK(op, "?;?", "in0");
   INFER_OK(op, "[?];[?]", "in0");
   INFER_OK(op, "[1,?,3];[?,?,?]", "in0");
 }
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index 654e890b5739e8a4e6f817bb43f697200566e654..3f72b415699562a0d79fc1f41ff1b4a360bfc7db 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -73,25 +73,8 @@ REGISTER_OP("AvgPool")
     .Attr("strides: list(int) >= 4")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnetDataFormatAttrString())
-    .Attr("T: {half, float, double}")
-    .SetShapeFn(shape_inference::AvgPoolShape)
-    .Doc(R"doc(
-Performs average pooling on the input.
-
-Each entry in `output` is the mean of the corresponding size `ksize`
-window in `value`.
-
-value: 4-D with shape `[batch, height, width, channels]`.
-ksize: The size of the sliding window for each dimension of `value`.
-strides: The stride of the sliding window for each dimension of `value`.
-padding: The type of padding algorithm to use.
-data_format: Specify the data format of the input and output data. With the
-    default format "NHWC", the data is stored in the order of:
-        [batch, in_height, in_width, in_channels].
-    Alternatively, the format could be "NCHW", the data storage order of:
-        [batch, in_channels, in_height, in_width].
-output: The average pooled output tensor.
-)doc");
+    .Attr("T: {half, bfloat16, float, double}")
+    .SetShapeFn(shape_inference::AvgPoolShape);
 
 REGISTER_OP("AvgPoolGrad")
     .Input("orig_input_shape: int32")
@@ -101,30 +84,14 @@ REGISTER_OP("AvgPoolGrad")
     .Attr("strides: list(int) >= 4")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnetDataFormatAttrString())
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle s;
       TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s));
       TF_RETURN_IF_ERROR(c->WithRank(s, 4, &s));
       c->set_output(0, s);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes gradients of the average pooling function.
-
-orig_input_shape: 1-D.  Shape of the original input to `avg_pool`.
-grad: 4-D with shape `[batch, height, width, channels]`.  Gradients w.r.t.
-  the output of `avg_pool`.
-ksize: The size of the sliding window for each dimension of the input.
-strides: The stride of the sliding window for each dimension of the input.
-padding: The type of padding algorithm to use.
-data_format: Specify the data format of the input and output data. With the
-    default format "NHWC", the data is stored in the order of:
-        [batch, in_height, in_width, in_channels].
-    Alternatively, the format could be "NCHW", the data storage order of:
-        [batch, in_channels, in_height, in_width].
-output: 4-D.  Gradients w.r.t. the input of `avg_pool`.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 
@@ -154,28 +121,7 @@ REGISTER_OP("BatchNormWithGlobalNormalization")
       TF_RETURN_IF_ERROR(c->ReplaceDim(input, 3, last_dim, &out));
       c->set_output(0, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Batch normalization.
-
-This op is deprecated. Prefer `tf.nn.batch_normalization`.
-
-t: A 4D input Tensor.
-m: A 1D mean Tensor with size matching the last dimension of t.
-  This is the first output from tf.nn.moments,
-  or a saved moving average thereof.
-v: A 1D variance Tensor with size matching the last dimension of t.
-  This is the second output from tf.nn.moments,
-  or a saved moving average thereof.
-beta: A 1D beta Tensor with size matching the last dimension of t.
-  An offset to be added to the normalized tensor.
-gamma: A 1D gamma Tensor with size matching the last dimension of t.
-  If "scale_after_normalization" is true, this tensor will be multiplied
-  with the normalized tensor.
-variance_epsilon: A small float number to avoid dividing by 0.
-scale_after_normalization: A bool indicating whether the resulted tensor
-  needs to be multiplied with gamma.
-)doc");
+    });
 
 REGISTER_OP("BatchNormWithGlobalNormalizationGrad")
     .Input("t: T")
@@ -215,33 +161,7 @@ REGISTER_OP("BatchNormWithGlobalNormalizationGrad")
       c->set_output(3, vector_shape);
       c->set_output(4, vector_shape);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Gradients for batch normalization.
-
-This op is deprecated. See `tf.nn.batch_normalization`.
-
-t: A 4D input Tensor.
-m: A 1D mean Tensor with size matching the last dimension of t.
-  This is the first output from tf.nn.moments,
-  or a saved moving average thereof.
-v: A 1D variance Tensor with size matching the last dimension of t.
-  This is the second output from tf.nn.moments,
-  or a saved moving average thereof.
-gamma: A 1D gamma Tensor with size matching the last dimension of t.
-  If "scale_after_normalization" is true, this Tensor will be multiplied
-  with the normalized Tensor.
-backprop: 4D backprop Tensor.
-variance_epsilon: A small float number to avoid dividing by 0.
-scale_after_normalization: A bool indicating whether the resulted tensor
-  needs to be multiplied with gamma.
-
-dx: 4D backprop tensor for input.
-dm: 1D backprop tensor for mean.
-dv: 1D backprop tensor for variance.
-db: 1D backprop tensor for beta.
-dg: 1D backprop tensor for gamma.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 
@@ -260,34 +180,7 @@ REGISTER_OP("FusedBatchNorm")
     .Attr("epsilon: float = 0.0001")
     .Attr("data_format: string = 'NHWC'")
     .Attr("is_training: bool = true")
-    .SetShapeFn(shape_inference::FusedBatchNormShape)
-    .Doc(R"doc(
-Batch normalization.
-Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
-The size of 1D Tensors matches the dimension C of the 4D Tensors.
-
-x: A 4D Tensor for input data.
-scale: A 1D Tensor for scaling factor, to scale the normalized x.
-offset: A 1D Tensor for offset, to shift to the normalized x.
-mean: A 1D Tensor for population mean. Used for inference only;
-      must be empty for training.
-variance: A 1D Tensor for population variance. Used for inference only;
-          must be empty for training.
-y: A 4D Tensor for output data.
-batch_mean: A 1D Tensor for the computed batch mean, to be used by TensorFlow
-            to compute the running mean.
-batch_variance: A 1D Tensor for the computed batch variance, to be used by
-                TensorFlow to compute the running variance.
-reserve_space_1: A 1D Tensor for the computed batch mean, to be reused
-                 in the gradient computation.
-reserve_space_2: A 1D Tensor for the computed batch variance (inverted variance
-                 in the cuDNN case), to be reused in the gradient computation.
-T: The data type for the elements of input and output Tensors.
-epsilon: A small float number added to the variance of x.
-data_format: The data format for x and y. Either "NHWC" (default) or "NCHW".
-is_training: A bool value to indicate the operation is for training (default)
-             or inference.
-)doc");
+    .SetShapeFn(shape_inference::FusedBatchNormShape);
 
 REGISTER_OP("FusedBatchNormV2")
     .Input("x: T")
@@ -300,40 +193,12 @@ REGISTER_OP("FusedBatchNormV2")
     .Output("batch_variance: U")
     .Output("reserve_space_1: U")
     .Output("reserve_space_2: U")
-    .Attr("T: {half, float}")
+    .Attr("T: {half, bfloat16, float}")
     .Attr("U: {float}")
     .Attr("epsilon: float = 0.0001")
     .Attr("data_format: string = 'NHWC'")
     .Attr("is_training: bool = true")
-    .SetShapeFn(shape_inference::FusedBatchNormShape)
-    .Doc(R"doc(
-Batch normalization.
-Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
-The size of 1D Tensors matches the dimension C of the 4D Tensors.
-
-x: A 4D Tensor for input data.
-scale: A 1D Tensor for scaling factor, to scale the normalized x.
-offset: A 1D Tensor for offset, to shift to the normalized x.
-mean: A 1D Tensor for population mean. Used for inference only;
-      must be empty for training.
-variance: A 1D Tensor for population variance. Used for inference only;
-          must be empty for training.
-y: A 4D Tensor for output data.
-batch_mean: A 1D Tensor for the computed batch mean, to be used by TensorFlow
-            to compute the running mean.
-batch_variance: A 1D Tensor for the computed batch variance, to be used by
-                TensorFlow to compute the running variance.
-reserve_space_1: A 1D Tensor for the computed batch mean, to be reused
-                 in the gradient computation.
-reserve_space_2: A 1D Tensor for the computed batch variance (inverted variance
-                 in the cuDNN case), to be reused in the gradient computation.
-T: The data type for the elements of input and output Tensors.
-U: The data type for the scale, offset, mean, and variance.
-epsilon: A small float number added to the variance of x.
-data_format: The data format for x and y. Either "NHWC" (default) or "NCHW".
-is_training: A bool value to indicate the operation is for training (default)
-             or inference.
-)doc");
+    .SetShapeFn(shape_inference::FusedBatchNormShape);
 
 REGISTER_OP("FusedBatchNormGrad")
     .Input("y_backprop: T")
@@ -350,37 +215,7 @@ REGISTER_OP("FusedBatchNormGrad")
     .Attr("epsilon: float = 0.0001")
     .Attr("data_format: string = 'NHWC'")
     .Attr("is_training: bool = true")
-    .SetShapeFn(shape_inference::FusedBatchNormGradShape)
-    .Doc(R"doc(
-Gradient for batch normalization.
-Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
-The size of 1D Tensors matches the dimension C of the 4D Tensors.
-
-y_backprop: A 4D Tensor for the gradient with respect to y.
-x: A 4D Tensor for input data.
-scale: A 1D Tensor for scaling factor, to scale the normalized x.
-reserve_space_1: When is_training is True, a 1D Tensor for the computed batch
-                 mean to be reused in gradient computation. When is_training is
-                 False, a 1D Tensor for the population mean to be reused in both
-                 1st and 2nd order gradient computation.
-reserve_space_2: When is_training is True, a 1D Tensor for the computed batch
-                 variance (inverted variance in the cuDNN case) to be reused in
-                 gradient computation. When is_training is False, a 1D Tensor
-                 for the population variance to be reused in both 1st and 2nd
-                 order gradient computation.
-x_backprop: A 4D Tensor for the gradient with respect to x.
-scale_backprop: A 1D Tensor for the gradient with respect to scale.
-offset_backprop: A 1D Tensor for the gradient with respect to offset.
-reserve_space_3: Unused placeholder to match the mean input in FusedBatchNorm.
-reserve_space_4: Unused placeholder to match the variance input
-                 in FusedBatchNorm.
-T: The data type for the elements of input and output Tensors.
-epsilon: A small float number added to the variance of x.
-data_format: The data format for y_backprop, x, x_backprop.
-             Either "NHWC" (default) or "NCHW".
-is_training: A bool value to indicate the operation is for training (default)
-             or inference.
-)doc");
+    .SetShapeFn(shape_inference::FusedBatchNormGradShape);
 
 REGISTER_OP("FusedBatchNormGradV2")
     .Input("y_backprop: T")
@@ -393,43 +228,12 @@ REGISTER_OP("FusedBatchNormGradV2")
     .Output("offset_backprop: U")
     .Output("reserve_space_3: U")
     .Output("reserve_space_4: U")
-    .Attr("T: {half, float}")
+    .Attr("T: {half, bfloat16, float}")
     .Attr("U: {float}")
     .Attr("epsilon: float = 0.0001")
     .Attr("data_format: string = 'NHWC'")
     .Attr("is_training: bool = true")
-    .SetShapeFn(shape_inference::FusedBatchNormGradShape)
-    .Doc(R"doc(
-Gradient for batch normalization.
-Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
-The size of 1D Tensors matches the dimension C of the 4D Tensors.
-
-y_backprop: A 4D Tensor for the gradient with respect to y.
-x: A 4D Tensor for input data.
-scale: A 1D Tensor for scaling factor, to scale the normalized x.
-reserve_space_1: When is_training is True, a 1D Tensor for the computed batch
-                 mean to be reused in gradient computation. When is_training is
-                 False, a 1D Tensor for the population mean to be reused in both
-                 1st and 2nd order gradient computation.
-reserve_space_2: When is_training is True, a 1D Tensor for the computed batch
-                 variance (inverted variance in the cuDNN case) to be reused in
-                 gradient computation. When is_training is False, a 1D Tensor
-                 for the population variance to be reused in both 1st and 2nd
-                 order gradient computation.
-x_backprop: A 4D Tensor for the gradient with respect to x.
-scale_backprop: A 1D Tensor for the gradient with respect to scale.
-offset_backprop: A 1D Tensor for the gradient with respect to offset.
-reserve_space_3: Unused placeholder to match the mean input in FusedBatchNorm.
-reserve_space_4: Unused placeholder to match the variance input
-                 in FusedBatchNorm.
-T: The data type for the elements of input and output Tensors.
-U: The data type for the scale, offset, mean, and variance.
-epsilon: A small float number added to the variance of x.
-data_format: The data format for y_backprop, x, x_backprop.
-             Either "NHWC" (default) or "NCHW".
-is_training: A bool value to indicate the operation is for training (default)
-             or inference.
-)doc");
+    .SetShapeFn(shape_inference::FusedBatchNormGradShape);
 
 // --------------------------------------------------------------------------
 
@@ -439,24 +243,7 @@ REGISTER_OP("BiasAdd")
     .Input("bias: T")
     .Attr(GetConvnetDataFormatAttrString())
     .Output("output: T")
-    .SetShapeFn(shape_inference::BiasAddShape)
-    .Doc(R"doc(
-Adds `bias` to `value`.
-
-This is a special case of `tf.add` where `bias` is restricted to be 1-D.
-Broadcasting is supported, so `value` may have any number of dimensions.
-
-value: Any number of dimensions.
-bias: 1-D with size the last dimension of `value`.
-data_format: Specify the data format of the input and output data. With the
-    default format "NHWC", the bias tensor will be added to the last dimension
-    of the value tensor.
-    Alternatively, the format could be "NCHW", the data storage order of:
-        [batch, in_channels, in_height, in_width].
-    The tensor will be added to "in_channels", the third-to-the-last
-        dimension.
-output: Broadcasted sum of `value` and `bias`.
-)doc");
+    .SetShapeFn(shape_inference::BiasAddShape);
 // --------------------------------------------------------------------------
 
 REGISTER_OP("BiasAddGrad")
@@ -464,24 +251,7 @@ REGISTER_OP("BiasAddGrad")
     .Input("out_backprop: T")
     .Attr(GetConvnetDataFormatAttrString())
     .Output("output: T")
-    .SetShapeFn(shape_inference::BiasAddGradShape)
-    .Doc(R"doc(
-The backward operation for "BiasAdd" on the "bias" tensor.
-
-It accumulates all the values from out_backprop into the feature dimension.
-For NHWC data format, the feature dimension is the last. For NCHW data format,
-the feature dimension is the third-to-last.
-
-out_backprop: Any number of dimensions.
-output: 1-D with size the feature dimension of `out_backprop`.
-data_format: Specify the data format of the input and output data. With the
-    default format "NHWC", the bias tensor will be added to the last dimension
-    of the value tensor.
-    Alternatively, the format could be "NCHW", the data storage order of:
-        [batch, in_channels, in_height, in_width].
-    The tensor will be added to "in_channels", the third-to-the-last
-        dimension.
-)doc");
+    .SetShapeFn(shape_inference::BiasAddGradShape);
 // --------------------------------------------------------------------------
 
 REGISTER_OP("BiasAddV1")
@@ -489,111 +259,39 @@ REGISTER_OP("BiasAddV1")
     .Input("value: T")
     .Input("bias: T")
     .Output("output: T")
-    .SetShapeFn(shape_inference::BiasAddShape)
-    .Doc(R"doc(
-Adds `bias` to `value`.
-
-This is a deprecated version of BiasAdd and will be soon removed.
-
-This is a special case of `tf.add` where `bias` is restricted to be 1-D.
-Broadcasting is supported, so `value` may have any number of dimensions.
-
-value: Any number of dimensions.
-bias: 1-D with size the last dimension of `value`.
-output: Broadcasted sum of `value` and `bias`.
-)doc");
+    .SetShapeFn(shape_inference::BiasAddShape);
 // --------------------------------------------------------------------------
 
 REGISTER_OP("Conv2D")
     .Input("input: T")
     .Input("filter: T")
     .Output("output: T")
-    .Attr("T: {half, float}")
+    .Attr("T: {half, bfloat16, float}")
     .Attr("strides: list(int)")
     .Attr("use_cudnn_on_gpu: bool = true")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnetDataFormatAttrString())
-    .SetShapeFn(shape_inference::Conv2DShape)
-    .Doc(R"doc(
-Computes a 2-D convolution given 4-D `input` and `filter` tensors.
-
-Given an input tensor of shape `[batch, in_height, in_width, in_channels]`
-and a filter / kernel tensor of shape
-`[filter_height, filter_width, in_channels, out_channels]`, this op
-performs the following:
-
-1. Flattens the filter to a 2-D matrix with shape
-   `[filter_height * filter_width * in_channels, output_channels]`.
-2. Extracts image patches from the input tensor to form a *virtual*
-   tensor of shape `[batch, out_height, out_width,
-   filter_height * filter_width * in_channels]`.
-3. For each patch, right-multiplies the filter matrix and the image patch
-   vector.
-
-In detail, with the default NHWC format,
-
-    output[b, i, j, k] =
-        sum_{di, dj, q} input[b, strides[1] * i + di, strides[2] * j + dj, q] *
-                        filter[di, dj, q, k]
-
-Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
-horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
-
-input: A 4-D tensor. The dimension order is interpreted according to the value
-    of `data_format`, see below for details.
-filter: A 4-D tensor of shape
-    `[filter_height, filter_width, in_channels, out_channels]`
-output: A 4-D tensor. The dimension order is determined by the value of
-    `data_format`, see below for details.
-strides: 1-D tensor of length 4.  The stride of the sliding window for each
-  dimension of `input`. The dimension order is determined by the value of
-    `data_format`, see below for details.
-padding: The type of padding algorithm to use.
-data_format: Specify the data format of the input and output data. With the
-    default format "NHWC", the data is stored in the order of:
-        [batch, height, width, channels].
-    Alternatively, the format could be "NCHW", the data storage order of:
-        [batch, channels, height, width].
-)doc");
+    .Attr("dilations: list(int) = [1, 1, 1, 1]")
+    .SetShapeFn(shape_inference::Conv2DShape);
 
 REGISTER_OP("Conv2DBackpropInput")
     .Input("input_sizes: int32")
     .Input("filter: T")
     .Input("out_backprop: T")
     .Output("output: T")
-    .Attr("T: {half, float}")
+    .Attr("T: {half, bfloat16, float}")
     .Attr("strides: list(int)")
     .Attr("use_cudnn_on_gpu: bool = true")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnetDataFormatAttrString())
+    .Attr("dilations: list(int) = [1, 1, 1, 1]")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle s;
       TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s));
       TF_RETURN_IF_ERROR(c->WithRank(s, 4, &s));
       c->set_output(0, s);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes the gradients of convolution with respect to the input.
-
-input_sizes: An integer vector representing the shape of `input`,
-  where `input` is a 4-D `[batch, height, width, channels]` tensor.
-filter: 4-D with shape
-  `[filter_height, filter_width, in_channels, out_channels]`.
-out_backprop: 4-D with shape `[batch, out_height, out_width, out_channels]`.
-  Gradients w.r.t. the output of the convolution.
-strides: The stride of the sliding window for each dimension of the input
-  of the convolution. Must be in the same order as the dimension specified with
-  format.
-padding: The type of padding algorithm to use.
-output: 4-D with shape `[batch, in_height, in_width, in_channels]`.  Gradient
-  w.r.t. the input of the convolution.
-data_format: Specify the data format of the input and output data. With the
-    default format "NHWC", the data is stored in the order of:
-        [batch, in_height, in_width, in_channels].
-    Alternatively, the format could be "NCHW", the data storage order of:
-        [batch, in_channels, in_height, in_width].
-)doc");
+    });
 
 // TODO(jeff): Instead of 'use_cudnn_for_gpu', maybe we should have a
 // more general string attribute ('kernel_impl'?) that can be used to
@@ -603,40 +301,19 @@ REGISTER_OP("Conv2DBackpropFilter")
     .Input("filter_sizes: int32")
     .Input("out_backprop: T")
     .Output("output: T")
-    .Attr("T: {half, float}")
+    .Attr("T: {half, bfloat16, float}")
     .Attr("strides: list(int)")
     .Attr("use_cudnn_on_gpu: bool = true")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnetDataFormatAttrString())
+    .Attr("dilations: list(int) = [1, 1, 1, 1]")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle s;
       TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &s));
       TF_RETURN_IF_ERROR(c->WithRank(s, 4, &s));
       c->set_output(0, s);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes the gradients of convolution with respect to the filter.
-
-input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
-filter_sizes: An integer vector representing the tensor shape of `filter`,
-  where `filter` is a 4-D
-  `[filter_height, filter_width, in_channels, out_channels]` tensor.
-out_backprop: 4-D with shape `[batch, out_height, out_width, out_channels]`.
-  Gradients w.r.t. the output of the convolution.
-strides: The stride of the sliding window for each dimension of the input
-  of the convolution. Must be in the same order as the dimension specified with
-  format.
-padding: The type of padding algorithm to use.
-output: 4-D with shape
-  `[filter_height, filter_width, in_channels, out_channels]`.  Gradient w.r.t.
-  the `filter` input of the convolution.
-data_format: Specify the data format of the input and output data. With the
-    default format "NHWC", the data is stored in the order of:
-        [batch, in_height, in_width, in_channels].
-    Alternatively, the format could be "NCHW", the data storage order of:
-        [batch, in_channels, in_height, in_width].
-)doc");
+    });
 
 namespace {
 
@@ -733,6 +410,22 @@ Status CommonFusedConvCalculations(InferenceContext* c, bool has_resize) {
 
 }  // namespace
 
+REGISTER_OP("DataFormatDimMap")
+    .Input("x: T")
+    .Output("y: T")
+    .Attr("T: {int32, int64} = DT_INT32")
+    .Attr("src_format: string = 'NHWC'")
+    .Attr("dst_format: string = 'NCHW'")
+    .SetShapeFn(shape_inference::UnchangedShape);
+
+REGISTER_OP("DataFormatVecPermute")
+    .Input("x: T")
+    .Output("y: T")
+    .Attr("T: {int32, int64} = DT_INT32")
+    .Attr("src_format: string = 'NHWC'")
+    .Attr("dst_format: string = 'NCHW'")
+    .SetShapeFn(shape_inference::UnchangedShape);
+
 REGISTER_OP("FusedResizeAndPadConv2D")
     .Input("input: T")
     .Input("size: int32")
@@ -746,35 +439,7 @@ REGISTER_OP("FusedResizeAndPadConv2D")
     .Attr(GetPaddingAttrString())
     .SetShapeFn([](InferenceContext* c) {
       return CommonFusedConvCalculations(c, true /* has_resize */);
-    })
-    .Doc(R"doc(
-Performs a resize and padding as a preprocess during a convolution.
-
-It's often possible to do spatial transformations more efficiently as part of
-the packing stage of a convolution, so this op allows for an optimized
-implementation where these stages are fused together. This prevents the need to
-write out the intermediate results as whole tensors, reducing memory pressure,
-and we can get some latency gains by merging the transformation calculations.
-The data_format attribute for Conv2D isn't supported by this op, and defaults to
-'NHWC' order.
-Internally this op uses a single per-graph scratch buffer, which means that it
-will block if multiple versions are being run in parallel. This is because this
-operator is primarily an optimization to minimize memory usage.
-
-input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
-size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
-  new size for the images.
-paddings: A two-column matrix specifying the padding sizes. The number of
-  rows must be the same as the rank of `input`.
-filter: 4-D with shape
-  `[filter_height, filter_width, in_channels, out_channels]`.
-resize_align_corners: If true, rescale input by (new_height - 1) / (height - 1),
-  which exactly aligns the 4 corners of images and resized images. If false, rescale
-  by new_height / height. Treat similarly the width dimension.
-strides: 1-D of length 4.  The stride of the sliding window for each dimension
-   of `input`. Must be in the same order as the dimension specified with format.
-padding: The type of padding algorithm to use.
- )doc");
+    });
 
 REGISTER_OP("FusedPadConv2D")
     .Input("input: T")
@@ -787,31 +452,7 @@ REGISTER_OP("FusedPadConv2D")
     .Attr(GetPaddingAttrString())
     .SetShapeFn([](InferenceContext* c) {
       return CommonFusedConvCalculations(c, false /* has_resize */);
-    })
-    .Doc(R"doc(
-Performs a padding as a preprocess during a convolution.
-
-Similar to FusedResizeAndPadConv2d, this op allows for an optimized
-implementation where the spatial padding transformation stage is fused with the
-im2col lookup, but in this case without the bilinear filtering required for
-resizing. Fusing the padding prevents the need to write out the intermediate
-results as whole tensors, reducing memory pressure, and we can get some latency
-gains by merging the transformation calculations.
-The data_format attribute for Conv2D isn't supported by this op, and 'NHWC'
-order is used instead.
-Internally this op uses a single per-graph scratch buffer, which means that it
-will block if multiple versions are being run in parallel. This is because this
-operator is primarily an optimization to minimize memory usage.
-
-input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
-paddings: A two-column matrix specifying the padding sizes. The number of
-  rows must be the same as the rank of `input`.
-filter: 4-D with shape
-  `[filter_height, filter_width, in_channels, out_channels]`.
-strides: 1-D of length 4.  The stride of the sliding window for each dimension
-   of `input`. Must be in the same order as the dimension specified with format.
-padding: The type of padding algorithm to use.
- )doc");
+    });
 
 // --------------------------------------------------------------------------
 
@@ -819,158 +460,60 @@ REGISTER_OP("DepthwiseConv2dNative")
     .Input("input: T")
     .Input("filter: T")
     .Output("output: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .Attr("strides: list(int)")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnetDataFormatAttrString())
-    .SetShapeFn(shape_inference::DepthwiseConv2DNativeShape)
-    .Doc(R"doc(
-Computes a 2-D depthwise convolution given 4-D `input` and `filter` tensors.
-
-Given an input tensor of shape `[batch, in_height, in_width, in_channels]`
-and a filter / kernel tensor of shape
-`[filter_height, filter_width, in_channels, channel_multiplier]`, containing
-`in_channels` convolutional filters of depth 1, `depthwise_conv2d` applies
-a different filter to each input channel (expanding from 1 channel to
-`channel_multiplier` channels for each), then concatenates the results
-together. Thus, the output has `in_channels * channel_multiplier` channels.
-
-```
-for k in 0..in_channels-1
-  for q in 0..channel_multiplier-1
-    output[b, i, j, k * channel_multiplier + q] =
-      sum_{di, dj} input[b, strides[1] * i + di, strides[2] * j + dj, k] *
-                        filter[di, dj, k, q]
-```
-
-Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
-horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
-
-strides: 1-D of length 4.  The stride of the sliding window for each dimension
-  of `input`.
-padding: The type of padding algorithm to use.
-data_format: Specify the data format of the input and output data. With the
-    default format "NHWC", the data is stored in the order of:
-        [batch, height, width, channels].
-    Alternatively, the format could be "NCHW", the data storage order of:
-        [batch, channels, height, width].
-)doc");
+    .Attr("dilations: list(int) = [1, 1, 1, 1]")
+    .SetShapeFn(shape_inference::DepthwiseConv2DNativeShape);
 
 REGISTER_OP("DepthwiseConv2dNativeBackpropInput")
     .Input("input_sizes: int32")
     .Input("filter: T")
     .Input("out_backprop: T")
     .Output("output: T")
-    .Attr("T: {float, double}")
+    .Attr("T: {bfloat16, float, double}")
     .Attr("strides: list(int)")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnetDataFormatAttrString())
+    .Attr("dilations: list(int) = [1, 1, 1, 1]")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle s;
       TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s));
       TF_RETURN_IF_ERROR(c->WithRank(s, 4, &s));
       c->set_output(0, s);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes the gradients of depthwise convolution with respect to the input.
-
-input_sizes: An integer vector representing the shape of `input`, based
-  on `data_format`.  For example, if `data_format` is 'NHWC' then
-   `input` is a 4-D `[batch, height, width, channels]` tensor.
-filter: 4-D with shape
-  `[filter_height, filter_width, in_channels, depthwise_multiplier]`.
-out_backprop: 4-D with shape  based on `data_format`.
-  For example, if `data_format` is 'NHWC' then
-  out_backprop shape is `[batch, out_height, out_width, out_channels]`.
-  Gradients w.r.t. the output of the convolution.
-strides: The stride of the sliding window for each dimension of the input
-  of the convolution.
-padding: The type of padding algorithm to use.
-data_format: Specify the data format of the input and output data. With the
-    default format "NHWC", the data is stored in the order of:
-        [batch, height, width, channels].
-    Alternatively, the format could be "NCHW", the data storage order of:
-        [batch, channels, height, width].
-output: 4-D with shape according to `data_format`.  For example, if
-  `data_format` is 'NHWC', output shape is `[batch, in_height,
-  in_width, in_channels]`.  Gradient w.r.t. the input of the
-  convolution.
-)doc");
+    });
 
 REGISTER_OP("DepthwiseConv2dNativeBackpropFilter")
     .Input("input: T")
     .Input("filter_sizes: int32")
     .Input("out_backprop: T")
     .Output("output: T")
-    .Attr("T: {float, double}")
+    .Attr("T: {bfloat16, float, double}")
     .Attr("strides: list(int)")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnetDataFormatAttrString())
+    .Attr("dilations: list(int) = [1, 1, 1, 1]")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle s;
       TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &s));
       TF_RETURN_IF_ERROR(c->WithRank(s, 4, &s));
       c->set_output(0, s);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes the gradients of depthwise convolution with respect to the filter.
-
-input: 4-D with shape based on `data_format`.  For example, if
-  `data_format` is 'NHWC' then `input` is a 4-D `[batch, in_height,
-  in_width, in_channels]` tensor.
-filter_sizes: An integer vector representing the tensor shape of `filter`,
-  where `filter` is a 4-D
-  `[filter_height, filter_width, in_channels, depthwise_multiplier]` tensor.
-out_backprop: 4-D with shape  based on `data_format`.
-  For example, if `data_format` is 'NHWC' then
-  out_backprop shape is `[batch, out_height, out_width, out_channels]`.
-  Gradients w.r.t. the output of the convolution.
-strides: The stride of the sliding window for each dimension of the input
-  of the convolution.
-padding: The type of padding algorithm to use.
-data_format: Specify the data format of the input and output data. With the
-    default format "NHWC", the data is stored in the order of:
-        [batch, height, width, channels].
-    Alternatively, the format could be "NCHW", the data storage order of:
-        [batch, channels, height, width].
-output: 4-D with shape
-  `[filter_height, filter_width, in_channels, out_channels]`.  Gradient w.r.t.
-  the `filter` input of the convolution.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 REGISTER_OP("Conv3D")
     .Input("input: T")
     .Input("filter: T")
     .Output("output: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
-    .SetShapeFn(shape_inference::Conv3DShape)
-    .Doc(R"doc(
-Computes a 3-D convolution given 5-D `input` and `filter` tensors.
-
-In signal processing, cross-correlation is a measure of similarity of
-two waveforms as a function of a time-lag applied to one of them. This
-is also known as a sliding dot product or sliding inner-product.
-
-Our Conv3D implements a form of cross-correlation.
-
-input: Shape `[batch, in_depth, in_height, in_width, in_channels]`.
-filter: Shape `[filter_depth, filter_height, filter_width, in_channels,
-  out_channels]`. `in_channels` must match between `input` and `filter`.
-strides: 1-D tensor of length 5. The stride of the sliding window for each
-  dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-padding: The type of padding algorithm to use.
-data_format: The data format of the input and output data. With the
-    default format "NDHWC", the data is stored in the order of:
-        [batch, in_depth, in_height, in_width, in_channels].
-    Alternatively, the format could be "NCDHW", the data storage order is:
-        [batch, in_channels, in_depth, in_height, in_width].
-)doc");
+    .Attr("dilations: list(int) = [1, 1, 1, 1, 1]")
+    .SetShapeFn(shape_inference::Conv3DShape);
 
 REGISTER_OP("Conv3DBackpropInput")
     .Input("input: T")
@@ -983,20 +526,7 @@ REGISTER_OP("Conv3DBackpropInput")
     .Deprecated(10, "Use Conv3DBackpropInputV2")
     .SetShapeFn([](InferenceContext* c) {
       return UnchangedShapeWithRank(c, 5);
-    })
-    .Doc(R"doc(
-Computes the gradients of 3-D convolution with respect to the input.
-
-input: Shape `[batch, depth, rows, cols, in_channels]`.
-filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
-  `in_channels` must match between `input` and `filter`.
-out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
-  out_channels]`.
-strides: 1-D tensor of length 5. The stride of the sliding window for each
-  dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-padding: The type of padding algorithm to use.
-
-)doc");
+    });
 
 REGISTER_OP("Conv3DBackpropFilter")
     .Input("input: T")
@@ -1012,94 +542,43 @@ REGISTER_OP("Conv3DBackpropFilter")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 5, &out));
       c->set_output(0, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes the gradients of 3-D convolution with respect to the filter.
-
-input: Shape `[batch, depth, rows, cols, in_channels]`.
-filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
-  `in_channels` must match between `input` and `filter`.
-out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
-  out_channels]`.
-strides: 1-D tensor of length 5. The stride of the sliding window for each
-  dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-padding: The type of padding algorithm to use.
-
-)doc");
+    });
 
 REGISTER_OP("Conv3DBackpropInputV2")
     .Input("input_sizes: int32")
     .Input("filter: T")
     .Input("out_backprop: T")
     .Output("output: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
+    .Attr("dilations: list(int) = [1, 1, 1, 1, 1]")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle s;
       TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s));
       TF_RETURN_IF_ERROR(c->WithRank(s, 5, &s));
       c->set_output(0, s);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes the gradients of 3-D convolution with respect to the input.
-
-input_sizes: An integer vector representing the tensor shape of `input`,
-   where `input` is a 5-D
-   `[batch, depth, rows, cols, in_channels]` tensor.
-filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
-  `in_channels` must match between `input` and `filter`.
-out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
-  out_channels]`.
-strides: 1-D tensor of length 5. The stride of the sliding window for each
-  dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-padding: The type of padding algorithm to use.
-data_format: The data format of the input and output data. With the
-    default format "NDHWC", the data is stored in the order of:
-        [batch, in_depth, in_height, in_width, in_channels].
-    Alternatively, the format could be "NCDHW", the data storage order is:
-        [batch, in_channels, in_depth, in_height, in_width].
-
-)doc");
+    });
 
 REGISTER_OP("Conv3DBackpropFilterV2")
     .Input("input: T")
     .Input("filter_sizes: int32")
     .Input("out_backprop: T")
     .Output("output: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
+    .Attr("dilations: list(int) = [1, 1, 1, 1, 1]")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle s;
       TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &s));
       TF_RETURN_IF_ERROR(c->WithRank(s, 5, &s));
       c->set_output(0, s);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes the gradients of 3-D convolution with respect to the filter.
-
-input: Shape `[batch, depth, rows, cols, in_channels]`.
-filter_sizes: An integer vector representing the tensor shape of `filter`,
-  where `filter` is a 5-D
-  `[filter_depth, filter_height, filter_width, in_channels, out_channels]`
-  tensor.
-out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
-  out_channels]`.
-strides: 1-D tensor of length 5. The stride of the sliding window for each
-  dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-padding: The type of padding algorithm to use.
-data_format: The data format of the input and output data. With the
-    default format "NDHWC", the data is stored in the order of:
-        [batch, in_depth, in_height, in_width, in_channels].
-    Alternatively, the format could be "NCDHW", the data storage order is:
-        [batch, in_channels, in_depth, in_height, in_width].
-
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 
@@ -1110,24 +589,8 @@ REGISTER_OP("AvgPool3D")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
-    .Attr("T: {float, double}")
-    .SetShapeFn(shape_inference::Pool3DShape)
-    .Doc(R"doc(
-Performs 3D average pooling on the input.
-
-ksize: 1-D tensor of length 5. The size of the window for each dimension of
-  the input tensor. Must have `ksize[0] = ksize[4] = 1`.
-strides: 1-D tensor of length 5. The stride of the sliding window for each
-  dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-padding: The type of padding algorithm to use.
-input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over.
-output: The average pooled output tensor.
-data_format: The data format of the input and output data. With the
-    default format "NDHWC", the data is stored in the order of:
-        [batch, in_depth, in_height, in_width, in_channels].
-    Alternatively, the format could be "NCDHW", the data storage order is:
-        [batch, in_channels, in_depth, in_height, in_width].
-)doc");
+    .Attr("T: {bfloat16, float, double}")
+    .SetShapeFn(shape_inference::Pool3DShape);
 
 REGISTER_OP("AvgPool3DGrad")
     .Input("orig_input_shape: int32")
@@ -1137,31 +600,14 @@ REGISTER_OP("AvgPool3DGrad")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
-    .Attr("T: {float, double}")
+    .Attr("T: {bfloat16, float, double}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle s;
       TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s));
       TF_RETURN_IF_ERROR(c->WithRank(s, 5, &s));
       c->set_output(0, s);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes gradients of average pooling function.
-
-ksize: 1-D tensor of length 5. The size of the window for each dimension of
-  the input tensor. Must have `ksize[0] = ksize[4] = 1`.
-strides: 1-D tensor of length 5. The stride of the sliding window for each
-  dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-padding: The type of padding algorithm to use.
-orig_input_shape: The original input dimensions.
-grad: Output backprop of shape `[batch, depth, rows, cols, channels]`.
-output: The backprop for input.
-data_format: The data format of the input and output data. With the
-    default format "NDHWC", the data is stored in the order of:
-        [batch, in_depth, in_height, in_width, in_channels].
-    Alternatively, the format could be "NCDHW", the data storage order is:
-        [batch, in_channels, in_depth, in_height, in_width].
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 
@@ -1172,24 +618,8 @@ REGISTER_OP("MaxPool3D")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
-    .Attr("T: {float}")
-    .SetShapeFn(shape_inference::Pool3DShape)
-    .Doc(R"doc(
-Performs 3D max pooling on the input.
-
-ksize: 1-D tensor of length 5. The size of the window for each dimension of
-  the input tensor. Must have `ksize[0] = ksize[4] = 1`.
-strides: 1-D tensor of length 5. The stride of the sliding window for each
-  dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-padding: The type of padding algorithm to use.
-input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over.
-output: The max pooled output tensor.
-data_format: The data format of the input and output data. With the
-    default format "NDHWC", the data is stored in the order of:
-        [batch, in_depth, in_height, in_width, in_channels].
-    Alternatively, the format could be "NCDHW", the data storage order is:
-        [batch, in_channels, in_depth, in_height, in_width].
-)doc");
+    .Attr("T: {bfloat16, float}")
+    .SetShapeFn(shape_inference::Pool3DShape);
 
 REGISTER_OP("MaxPool3DGrad")
     .Input("orig_input: TInput")
@@ -1200,28 +630,11 @@ REGISTER_OP("MaxPool3DGrad")
     .Attr("strides: list(int) >= 5")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
-    .Attr("T: {float} = DT_FLOAT")
-    .Attr("TInput: {float} = DT_FLOAT")
+    .Attr("T: {bfloat16, float} = DT_FLOAT")
+    .Attr("TInput: {bfloat16, float} = DT_FLOAT")
     .SetShapeFn([](InferenceContext* c) {
       return UnchangedShapeWithRank(c, 5);
-    })
-    .Doc(R"doc(
-Computes gradients of max pooling function.
-
-ksize: 1-D tensor of length 5. The size of the window for each dimension of
-  the input tensor. Must have `ksize[0] = ksize[4] = 1`.
-strides: 1-D tensor of length 5. The stride of the sliding window for each
-  dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-padding: The type of padding algorithm to use.
-orig_input: The original input tensor.
-orig_output: The original output tensor.
-grad: Output backprop of shape `[batch, depth, rows, cols, channels]`.
-data_format: The data format of the input and output data. With the
-    default format "NDHWC", the data is stored in the order of:
-        [batch, in_depth, in_height, in_width, in_channels].
-    Alternatively, the format could be "NCDHW", the data storage order is:
-        [batch, in_channels, in_depth, in_height, in_width].
-)doc");
+    });
 
 REGISTER_OP("MaxPool3DGradGrad")
     .Input("orig_input: T")
@@ -1241,43 +654,15 @@ REGISTER_OP("MaxPool3DGradGrad")
       // Validate 'orig_output' is same shape as 'output'
       TF_RETURN_IF_ERROR(c->Merge(c->input(1), c->output(0), &unused));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes second-order gradients of the maxpooling function.
-
-ksize: 1-D tensor of length 5. The size of the window for each dimension of
-  the input tensor. Must have `ksize[0] = ksize[4] = 1`.
-strides: 1-D tensor of length 5. The stride of the sliding window for each
-  dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-padding: The type of padding algorithm to use.
-orig_input: The original input tensor.
-orig_output: The original output tensor.
-grad: Output backprop of shape `[batch, depth, rows, cols, channels]`.
-output: Gradients of gradients w.r.t. the input to `max_pool`.
-data_format: The data format of the input and output data. With the
-    default format "NDHWC", the data is stored in the order of:
-        [batch, in_depth, in_height, in_width, in_channels].
-    Alternatively, the format could be "NCDHW", the data storage order is:
-        [batch, in_channels, in_depth, in_height, in_width].
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 
 REGISTER_OP("L2Loss")
     .Input("t: T")
     .Output("output: T")
-    .Attr("T: {half, float, double}")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-L2 Loss.
-
-Computes half the L2 norm of a tensor without the `sqrt`:
-
-    output = sum(t ** 2) / 2
-
-t: Typically 2-D, but may have any dimensions.
-output: 0-D.
-)doc");
+    .Attr("T: {half, bfloat16, float, double}")
+    .SetShapeFn(shape_inference::ScalarShape);
 
 // --------------------------------------------------------------------------
 
@@ -1288,31 +673,10 @@ REGISTER_OP("LRN")
     .Attr("bias: float = 1.0")
     .Attr("alpha: float = 1.0")
     .Attr("beta: float = 0.5")
-    .Attr("T: {float, half} = DT_FLOAT")
+    .Attr("T: {half, bfloat16, float} = DT_FLOAT")
     .SetShapeFn([](InferenceContext* c) {
       return UnchangedShapeWithRank(c, 4);
-    })
-    .Doc(R"doc(
-Local Response Normalization.
-
-The 4-D `input` tensor is treated as a 3-D array of 1-D vectors (along the last
-dimension), and each vector is normalized independently.  Within a given vector,
-each component is divided by the weighted, squared sum of inputs within
-`depth_radius`.  In detail,
-
-    sqr_sum[a, b, c, d] =
-        sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2)
-    output = input / (bias + alpha * sqr_sum) ** beta
-
-For details, see [Krizhevsky et al., ImageNet classification with deep
-convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
-
-input: 4-D.
-depth_radius: 0-D.  Half-width of the 1-D normalization window.
-bias: An offset (usually positive to avoid dividing by 0).
-alpha: A scale factor, usually positive.
-beta: An exponent.
-)doc");
+    });
 
 REGISTER_OP("LRNGrad")
     .Input("input_grads: T")
@@ -1323,7 +687,7 @@ REGISTER_OP("LRNGrad")
     .Attr("bias: float = 1.0")
     .Attr("alpha: float = 1.0")
     .Attr("beta: float = 0.5")
-    .Attr("T: {float, half} = DT_FLOAT")
+    .Attr("T: {half, bfloat16, float} = DT_FLOAT")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle s;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &s));  // input_grads
@@ -1331,53 +695,26 @@ REGISTER_OP("LRNGrad")
       TF_RETURN_IF_ERROR(c->Merge(s, c->input(2), &s));     // output_image
       c->set_output(0, s);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Gradients for Local Response Normalization.
-
-input_grads: 4-D with shape `[batch, height, width, channels]`.
-input_image: 4-D with shape `[batch, height, width, channels]`.
-output_image: 4-D with shape `[batch, height, width, channels]`.
-depth_radius: A depth radius.
-bias: An offset (usually > 0 to avoid dividing by 0).
-alpha: A scale factor, usually positive.
-beta: An exponent.
-output: The gradients for LRN.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 
 REGISTER_OP("MaxPool")
     .Attr(
-        "T: {float, double, int32, int64, uint8, int16, int8, uint16, "
-        "half, qint8} = DT_FLOAT")
+        "T: {half, bfloat16, float, double, int32, int64, uint8, int16, int8, "
+        "uint16, qint8} = DT_FLOAT")
     .Attr("ksize: list(int) >= 4")
     .Attr("strides: list(int) >= 4")
     .Attr(GetPaddingAttrString())
     .Attr("data_format: {'NHWC', 'NCHW', 'NCHW_VECT_C'} = 'NHWC'")
     .Input("input: T")
     .Output("output: T")
-    .SetShapeFn(shape_inference::MaxPoolShape)
-    .Doc(R"doc(
-Performs max pooling on the input.
-
-ksize: The size of the window for each dimension of the input tensor.
-strides: The stride of the sliding window for each dimension of the
-  input tensor.
-padding: The type of padding algorithm to use.
-data_format: Specify the data format of the input and output data. With the
-    default format "NHWC", the data is stored in the order of:
-        [batch, in_height, in_width, in_channels].
-    Alternatively, the format could be "NCHW", the data storage order of:
-        [batch, in_channels, in_height, in_width].
-input: 4-D input to pool over.
-output: The max pooled output tensor.
-)doc");
+    .SetShapeFn(shape_inference::MaxPoolShape);
 
 REGISTER_OP("MaxPoolV2")
     .Attr(
-        "T: {float, double, int32, int64, uint8, int16, int8, uint16, "
-        "half, qint8} = DT_FLOAT")
+        "T: {half, bfloat16, float, double, int32, int64, uint8, int16, int8, "
+        "uint16, qint8} = DT_FLOAT")
     .Attr(GetPaddingAttrString())
     .Attr("data_format: {'NHWC', 'NCHW', 'NCHW_VECT_C'} = 'NHWC'")
     .Input("input: T")
@@ -1387,22 +724,7 @@ REGISTER_OP("MaxPoolV2")
     .SetShapeFn([](InferenceContext* c) {
       TF_RETURN_IF_ERROR(shape_inference::MaxPoolV2Shape(c, 3));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Performs max pooling on the input.
-
-ksize: The size of the window for each dimension of the input tensor.
-strides: The stride of the sliding window for each dimension of the
-  input tensor.
-padding: The type of padding algorithm to use.
-data_format: Specify the data format of the input and output data. With the
-    default format "NHWC", the data is stored in the order of:
-        [batch, in_height, in_width, in_channels].
-    Alternatively, the format could be "NCHW", the data storage order of:
-        [batch, in_channels, in_height, in_width].
-input: 4-D input to pool over.
-output: The max pooled output tensor.
-)doc");
+    });
 
 REGISTER_OP("MaxPoolGrad")
     .Attr("ksize: list(int) >= 4")
@@ -1416,24 +738,7 @@ REGISTER_OP("MaxPoolGrad")
     .Attr("T: realnumbertype = DT_FLOAT")
     .SetShapeFn([](InferenceContext* c) {
       return UnchangedShapeWithRank(c, 4);
-    })
-    .Doc(R"doc(
-Computes gradients of the maxpooling function.
-
-ksize: The size of the window for each dimension of the input tensor.
-strides: The stride of the sliding window for each dimension of the
-  input tensor.
-padding: The type of padding algorithm to use.
-data_format: Specify the data format of the input and output data. With the
-    default format "NHWC", the data is stored in the order of:
-        [batch, in_height, in_width, in_channels].
-    Alternatively, the format could be "NCHW", the data storage order of:
-        [batch, in_channels, in_height, in_width].
-orig_input: The original input tensor.
-orig_output: The original output tensor.
-grad: 4-D.  Gradients w.r.t. the output of `max_pool`.
-output: Gradients w.r.t. the input to `max_pool`.
-)doc");
+    });
 
 REGISTER_OP("MaxPoolGradV2")
     .Attr(GetPaddingAttrString())
@@ -1447,24 +752,7 @@ REGISTER_OP("MaxPoolGradV2")
     .Attr("T: realnumbertype = DT_FLOAT")
     .SetShapeFn([](InferenceContext* c) {
       return UnchangedShapeWithRank(c, 4);
-    })
-    .Doc(R"doc(
-Computes gradients of the maxpooling function.
-
-ksize: The size of the window for each dimension of the input tensor.
-strides: The stride of the sliding window for each dimension of the
-  input tensor.
-padding: The type of padding algorithm to use.
-data_format: Specify the data format of the input and output data. With the
-    default format "NHWC", the data is stored in the order of:
-        [batch, in_height, in_width, in_channels].
-    Alternatively, the format could be "NCHW", the data storage order of:
-        [batch, in_channels, in_height, in_width].
-orig_input: The original input tensor.
-orig_output: The original output tensor.
-grad: 4-D.  Gradients w.r.t. the output of `max_pool`.
-output: Gradients w.r.t. the input to `max_pool`.
-)doc");
+    });
 
 REGISTER_OP("MaxPoolGradGrad")
     .Attr("ksize: list(int) >= 4")
@@ -1484,24 +772,7 @@ REGISTER_OP("MaxPoolGradGrad")
       // Validate 'orig_output' is same shape as 'output'
       TF_RETURN_IF_ERROR(c->Merge(c->input(1), c->output(0), &unused));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes second-order gradients of the maxpooling function.
-
-ksize: The size of the window for each dimension of the input tensor.
-strides: The stride of the sliding window for each dimension of the
-  input tensor.
-padding: The type of padding algorithm to use.
-data_format: Specify the data format of the input and output data. With the
-    default format "NHWC", the data is stored in the order of:
-        [batch, in_height, in_width, in_channels].
-    Alternatively, the format could be "NCHW", the data storage order of:
-        [batch, in_channels, in_height, in_width].
-orig_input: The original input tensor.
-orig_output: The original output tensor.
-grad: 4-D.  Gradients of gradients w.r.t. the input of `max_pool`.
-output: Gradients of gradients w.r.t. the input to `max_pool`.
-)doc");
+    });
 
 REGISTER_OP("MaxPoolGradGradV2")
     .Attr(GetPaddingAttrString())
@@ -1521,24 +792,7 @@ REGISTER_OP("MaxPoolGradGradV2")
       // Validate 'orig_output' is same shape as 'output'
       TF_RETURN_IF_ERROR(c->Merge(c->input(1), c->output(0), &unused));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes second-order gradients of the maxpooling function.
-
-ksize: The size of the window for each dimension of the input tensor.
-strides: The stride of the sliding window for each dimension of the
-  input tensor.
-padding: The type of padding algorithm to use.
-data_format: Specify the data format of the input and output data. With the
-    default format "NHWC", the data is stored in the order of:
-        [batch, in_height, in_width, in_channels].
-    Alternatively, the format could be "NCHW", the data storage order of:
-        [batch, in_channels, in_height, in_width].
-orig_input: The original input tensor.
-orig_output: The original output tensor.
-grad: 4-D.  Gradients of gradients w.r.t. the input of `max_pool`.
-output: Gradients of gradients w.r.t. the input to `max_pool`.
-)doc");
+    });
 
 REGISTER_OP("MaxPoolWithArgmax")
     .Attr("ksize: list(int) >= 4")
@@ -1553,27 +807,7 @@ REGISTER_OP("MaxPoolWithArgmax")
       TF_RETURN_IF_ERROR(shape_inference::MaxPoolShape(c));
       c->set_output(1, c->output(0));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Performs max pooling on the input and outputs both max values and indices.
-
-The indices in `argmax` are flattened, so that a maximum value at position
-`[b, y, x, c]` becomes flattened index
-`((b * height + y) * width + x) * channels + c`.
-
-The indices returned are always in `[0, height) x [0, width)` before flattening,
-even if padding is involved and the mathematically correct answer is outside
-(either negative or too large).  This is a bug, but fixing it is difficult to do
-in a safe backwards compatible way, especially due to flattening.
-
-ksize: The size of the window for each dimension of the input tensor.
-strides: The stride of the sliding window for each dimension of the
-  input tensor.
-padding: The type of padding algorithm to use.
-input: 4-D with shape `[batch, height, width, channels]`.  Input to pool over.
-output: The max pooled output tensor.
-argmax: 4-D.  The flattened indices of the max values chosen for each output.
-)doc");
+    });
 
 REGISTER_OP("MaxPoolGradWithArgmax")
     .Attr("ksize: list(int) >= 4")
@@ -1587,20 +821,7 @@ REGISTER_OP("MaxPoolGradWithArgmax")
     .Attr("T: realnumbertype")
     .SetShapeFn([](InferenceContext* c) {
       return UnchangedShapeWithRank(c, 4);
-    })
-    .Doc(R"doc(
-Computes gradients of the maxpooling function.
-
-ksize: The size of the window for each dimension of the input tensor.
-strides: The stride of the sliding window for each dimension of the
-  input tensor.
-padding: The type of padding algorithm to use.
-input: The original input.
-grad: 4-D with shape `[batch, height, width, channels]`.  Gradients w.r.t. the
-  output of `max_pool`.
-argmax: The indices of the maximum values chosen for each output of `max_pool`.
-output: Gradients w.r.t. the input of `max_pool`.
-)doc");
+    });
 
 REGISTER_OP("MaxPoolGradGradWithArgmax")
     .Attr("ksize: list(int) >= 4")
@@ -1620,20 +841,7 @@ REGISTER_OP("MaxPoolGradGradWithArgmax")
       // Validate 'argmax' is same shape as 'output'
       TF_RETURN_IF_ERROR(c->Merge(c->input(2), c->output(0), &unused));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes second-order gradients of the maxpooling function.
-
-ksize: The size of the window for each dimension of the input tensor.
-strides: The stride of the sliding window for each dimension of the
-  input tensor.
-padding: The type of padding algorithm to use.
-input: The original input.
-grad: 4-D with shape `[batch, height, width, channels]`.  Gradients w.r.t. the
-  input of `max_pool`.
-argmax: The indices of the maximum values chosen for each output of `max_pool`.
-output: Gradients of gradients w.r.t. the input of `max_pool`.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 
@@ -1717,43 +925,7 @@ REGISTER_OP("Dilation2D")
           {batch_size_dim, output_rows, output_cols, output_depth_dim});
       c->set_output(0, output_shape);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes the grayscale dilation of 4-D `input` and 3-D `filter` tensors.
-
-The `input` tensor has shape `[batch, in_height, in_width, depth]` and the
-`filter` tensor has shape `[filter_height, filter_width, depth]`, i.e., each
-input channel is processed independently of the others with its own structuring
-function. The `output` tensor has shape
-`[batch, out_height, out_width, depth]`. The spatial dimensions of the output
-tensor depend on the `padding` algorithm. We currently only support the default
-"NHWC" `data_format`.
-
-In detail, the grayscale morphological 2-D dilation is the max-sum correlation
-(for consistency with `conv2d`, we use unmirrored filters):
-
-    output[b, y, x, c] =
-       max_{dy, dx} input[b,
-                          strides[1] * y + rates[1] * dy,
-                          strides[2] * x + rates[2] * dx,
-                          c] +
-                    filter[dy, dx, c]
-
-Max-pooling is a special case when the filter has size equal to the pooling
-kernel size and contains all zeros.
-
-Note on duality: The dilation of `input` by the `filter` is equal to the
-negation of the erosion of `-input` by the reflected `filter`.
-
-input: 4-D with shape `[batch, in_height, in_width, depth]`.
-filter: 3-D with shape `[filter_height, filter_width, depth]`.
-strides: The stride of the sliding window for each dimension of the input
- tensor. Must be: `[1, stride_height, stride_width, 1]`.
-rates: The input stride for atrous morphological dilation. Must be:
- `[1, rate_height, rate_width, 1]`.
-padding: The type of padding algorithm to use.
-output: 4-D with shape `[batch, out_height, out_width, depth]`.
-)doc");
+    });
 
 REGISTER_OP("Dilation2DBackpropInput")
     .Input("input: T")
@@ -1764,20 +936,7 @@ REGISTER_OP("Dilation2DBackpropInput")
     .Attr("strides: list(int) >= 4")
     .Attr("rates: list(int) >= 4")
     .Attr(GetPaddingAttrString())
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Computes the gradient of morphological 2-D dilation with respect to the input.
-
-input: 4-D with shape `[batch, in_height, in_width, depth]`.
-filter: 3-D with shape `[filter_height, filter_width, depth]`.
-out_backprop: 4-D with shape `[batch, out_height, out_width, depth]`.
-in_backprop: 4-D with shape `[batch, in_height, in_width, depth]`.
-strides: 1-D of length 4. The stride of the sliding window for each dimension of
-  the input tensor. Must be: `[1, stride_height, stride_width, 1]`.
-rates: 1-D of length 4. The input stride for atrous morphological dilation.
-  Must be: `[1, rate_height, rate_width, 1]`.
-padding: The type of padding algorithm to use.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("Dilation2DBackpropFilter")
     .Input("input: T")
@@ -1791,20 +950,7 @@ REGISTER_OP("Dilation2DBackpropFilter")
     .SetShapeFn([](InferenceContext* c) {
       c->set_output(0, c->input(1));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes the gradient of morphological 2-D dilation with respect to the filter.
-
-input: 4-D with shape `[batch, in_height, in_width, depth]`.
-filter: 3-D with shape `[filter_height, filter_width, depth]`.
-out_backprop: 4-D with shape `[batch, out_height, out_width, depth]`.
-filter_backprop: 3-D with shape `[filter_height, filter_width, depth]`.
-strides: 1-D of length 4. The stride of the sliding window for each dimension of
-  the input tensor. Must be: `[1, stride_height, stride_width, 1]`.
-rates: 1-D of length 4. The input stride for atrous morphological dilation.
-  Must be: `[1, rate_height, rate_width, 1]`.
-padding: The type of padding algorithm to use.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 
@@ -1812,190 +958,99 @@ REGISTER_OP("Relu")
     .Input("features: T")
     .Output("activations: T")
     .Attr("T: realnumbertype")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Computes rectified linear: `max(features, 0)`.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("ReluGrad")
     .Input("gradients: T")
     .Input("features: T")
     .Output("backprops: T")
     .Attr("T: realnumbertype")
-    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
-    .Doc(R"doc(
-Computes rectified linear gradients for a Relu operation.
-
-gradients: The backpropagated gradients to the corresponding Relu operation.
-features: The features passed as input to the corresponding Relu operation, OR
-  the outputs of that operation (both work equivalently).
-backprops: `gradients * (features > 0)`.
-)doc");
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn);
 
 REGISTER_OP("Relu6")
     .Input("features: T")
     .Output("activations: T")
     .Attr("T: realnumbertype")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Computes rectified linear 6: `min(max(features, 0), 6)`.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("Relu6Grad")
     .Input("gradients: T")
     .Input("features: T")
     .Output("backprops: T")
     .Attr("T: realnumbertype")
-    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
-    .Doc(R"doc(
-Computes rectified linear 6 gradients for a Relu6 operation.
-
-gradients: The backpropagated gradients to the corresponding Relu6 operation.
-features: The features passed as input to the corresponding Relu6 operation, or
-  its output; using either one produces the same result.
-backprops: The gradients:
-  `gradients * (features > 0) * (features < 6)`.
-)doc");
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn);
 
 REGISTER_OP("Elu")
     .Input("features: T")
     .Output("activations: T")
-    .Attr("T: {half, float, double}")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Computes exponential linear: `exp(features) - 1` if < 0, `features` otherwise.
-
-See [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)
-](http://arxiv.org/abs/1511.07289)
-)doc");
+    .Attr("T: {half, bfloat16, float, double}")
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("EluGrad")
     .Input("gradients: T")
     .Input("outputs: T")
     .Output("backprops: T")
-    .Attr("T: {half, float, double}")
-    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
-    .Doc(R"doc(
-Computes gradients for the exponential linear (Elu) operation.
-
-gradients: The backpropagated gradients to the corresponding Elu operation.
-outputs: The outputs of the corresponding Elu operation.
-backprops: The gradients: `gradients * (outputs + 1)` if outputs < 0,
-`gradients` otherwise.
-)doc");
+    .Attr("T: {half, bfloat16, float, double}")
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn);
 
 REGISTER_OP("Selu")
     .Input("features: T")
     .Output("activations: T")
-    .Attr("T: {half, float, double}")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)`
-if < 0, `scale * features` otherwise.
-
-See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515)
-)doc");
+    .Attr("T: {half, bfloat16, float, double}")
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("SeluGrad")
     .Input("gradients: T")
     .Input("outputs: T")
     .Output("backprops: T")
-    .Attr("T: {half, float, double}")
-    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
-    .Doc(R"doc(
-Computes gradients for the scaled exponential linear (Selu) operation.
-
-gradients: The backpropagated gradients to the corresponding Selu operation.
-outputs: The outputs of the corresponding Selu operation.
-backprops: The gradients: `gradients * (outputs + scale * alpha)`
-if outputs < 0, `scale * gradients` otherwise.
-)doc");
+    .Attr("T: {half, bfloat16, float, double}")
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn);
 
 REGISTER_OP("Softplus")
     .Input("features: T")
     .Output("activations: T")
     .Attr("T: realnumbertype")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Computes softplus: `log(exp(features) + 1)`.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("SoftplusGrad")
     .Input("gradients: T")
     .Input("features: T")
     .Output("backprops: T")
     .Attr("T: realnumbertype")
-    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
-    .Doc(R"doc(
-Computes softplus gradients for a softplus operation.
-
-gradients: The backpropagated gradients to the corresponding softplus operation.
-features: The features passed as input to the corresponding softplus operation.
-backprops: The gradients: `gradients / (1 + exp(-features))`.
-)doc");
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn);
 
 REGISTER_OP("Softsign")
     .Input("features: T")
     .Output("activations: T")
     .Attr("T: realnumbertype")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Computes softsign: `features / (abs(features) + 1)`.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("SoftsignGrad")
     .Input("gradients: T")
     .Input("features: T")
     .Output("backprops: T")
     .Attr("T: realnumbertype")
-    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
-    .Doc(R"doc(
-Computes softsign gradients for a softsign operation.
-
-gradients: The backpropagated gradients to the corresponding softsign operation.
-features: The features passed as input to the corresponding softsign operation.
-backprops: The gradients: `gradients / (1 + abs(features)) ** 2`.
-)doc");
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn);
 
 // --------------------------------------------------------------------------
 
 REGISTER_OP("Softmax")
     .Input("logits: T")
     .Output("softmax: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 1);
-    })
-    .Doc(R"doc(
-Computes softmax activations.
-
-For each batch `i` and class `j` we have
-
-    softmax[i, j] = exp(logits[i, j]) / sum_j(exp(logits[i, j]))
-
-logits: 2-D with shape `[batch_size, num_classes]`.
-softmax: Same shape as `logits`.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 
 REGISTER_OP("LogSoftmax")
     .Input("logits: T")
     .Output("logsoftmax: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 1);
-    })
-    .Doc(R"doc(
-Computes log softmax activations.
-
-For each batch `i` and class `j` we have
-
-    logsoftmax[i, j] = logits[i, j] - log(sum(exp(logits[i])))
-
-logits: 2-D with shape `[batch_size, num_classes]`.
-logsoftmax: Same shape as `logits`.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 
@@ -2004,7 +1059,7 @@ REGISTER_OP("SoftmaxCrossEntropyWithLogits")
     .Input("labels: T")
     .Output("loss: T")
     .Output("backprop: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle input;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &input));
@@ -2014,26 +1069,14 @@ REGISTER_OP("SoftmaxCrossEntropyWithLogits")
       c->set_output(0, c->Vector(batch_size));
       c->set_output(1, input);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes softmax cross entropy cost and gradients to backpropagate.
-
-Inputs are the logits, not probabilities.
-
-features: batch_size x num_classes matrix
-labels: batch_size x num_classes matrix
-  The caller must ensure that each batch of labels represents a valid
-  probability distribution.
-loss: Per example loss (batch_size vector).
-backprop: backpropagated gradients (batch_size x num_classes matrix).
-)doc");
+    });
 
 REGISTER_OP("SparseSoftmaxCrossEntropyWithLogits")
     .Input("features: T")
     .Input("labels: Tlabels")
     .Output("loss: T")
     .Output("backprop: T")
-    .Attr("T: {half, float, double}")
+    .Attr("T: {half, bfloat16, float, double}")
     .Attr("Tlabels: {int32, int64} = DT_INT64")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle features;
@@ -2049,23 +1092,7 @@ REGISTER_OP("SparseSoftmaxCrossEntropyWithLogits")
       c->set_output(0, c->Vector(batch_size));
       c->set_output(1, features);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes softmax cross entropy cost and gradients to backpropagate.
-
-Unlike `SoftmaxCrossEntropyWithLogits`, this operation does not accept
-a matrix of label probabilities, but rather a single label per row
-of features.  This label is considered to have probability 1.0 for the
-given row.
-
-Inputs are the logits, not probabilities.
-
-features: batch_size x num_classes matrix
-labels: batch_size vector with values in [0, num_classes).
-  This is the label for the given minibatch entry.
-loss: Per example loss (batch_size vector).
-backprop: backpropagated gradients (batch_size x num_classes matrix).
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 
@@ -2085,31 +1112,7 @@ REGISTER_OP("InTopK")
           c->Merge(c->Dim(predictions, 0), c->Dim(targets, 0), &batch_size));
       c->set_output(0, c->Vector(batch_size));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Says whether the targets are in the top `K` predictions.
-
-This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the
-prediction for the target class is among the top `k` predictions among
-all predictions for example `i`. Note that the behavior of `InTopK` differs
-from the `TopK` op in its handling of ties; if multiple classes have the
-same prediction value and straddle the top-`k` boundary, all of those
-classes are considered to be in the top `k`.
-
-More formally, let
-
-  \\(predictions_i\\) be the predictions for all classes for example `i`,
-  \\(targets_i\\) be the target class for example `i`,
-  \\(out_i\\) be the output for example `i`,
-
-$$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$
-
-predictions: A `batch_size` x `classes` tensor.
-targets: A `batch_size` vector of class ids.
-k: Number of top elements to look at for computing precision.
-precision: Computed Precision at `k` as a `bool Tensor`.
-
-)doc");
+    });
 
 // This is the same as `InTopK`, but takes `k` as in input rather than an attr.
 REGISTER_OP("InTopKV2")
@@ -2128,31 +1131,7 @@ REGISTER_OP("InTopKV2")
           c->Merge(c->Dim(predictions, 0), c->Dim(targets, 0), &batch_size));
       c->set_output(0, c->Vector(batch_size));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Says whether the targets are in the top `K` predictions.
-
-This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the
-prediction for the target class is among the top `k` predictions among
-all predictions for example `i`. Note that the behavior of `InTopK` differs
-from the `TopK` op in its handling of ties; if multiple classes have the
-same prediction value and straddle the top-`k` boundary, all of those
-classes are considered to be in the top `k`.
-
-More formally, let
-
-  \\(predictions_i\\) be the predictions for all classes for example `i`,
-  \\(targets_i\\) be the target class for example `i`,
-  \\(out_i\\) be the output for example `i`,
-
-$$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$
-
-predictions: A `batch_size` x `classes` tensor.
-targets: A `batch_size` vector of class ids.
-k: Number of top elements to look at for computing precision.
-precision: Computed precision at `k` as a `bool Tensor`.
-
-)doc");
+    });
 
 namespace {
 
@@ -2200,31 +1179,7 @@ REGISTER_OP("TopK")
     .Attr("sorted: bool = true")
     .Attr("T: realnumbertype")
     .Deprecated(7, "Use TopKV2 instead")
-    .SetShapeFn(TopKShapeFn)
-    .Doc(R"doc(
-Finds values and indices of the `k` largest elements for the last dimension.
-
-If the input is a vector (rank-1), finds the `k` largest entries in the vector
-and outputs their values and indices as vectors.  Thus `values[j]` is the
-`j`-th largest entry in `input`, and its index is `indices[j]`.
-
-For matrices (resp. higher rank input), computes the top `k` entries in each
-row (resp. vector along the last dimension).  Thus,
-
-    values.shape = indices.shape = input.shape[:-1] + [k]
-
-If two elements are equal, the lower-index element appears first.
-
-If `k` varies dynamically, use `TopKV2` below.
-
-input: 1-D or higher with last dimension at least `k`.
-k: Number of top elements to look for along the last dimension (along each
-  row for matrices).
-sorted: If true the resulting `k` elements will be sorted by the values in
-  descending order.
-values: The `k` largest elements along each last dimensional slice.
-indices: The indices of `values` within the last dimension of `input`.
-)doc");
+    .SetShapeFn(TopKShapeFn);
 
 // This is the same as `TopK`, but takes `k` as in input rather than an attr.
 REGISTER_OP("TopKV2")
@@ -2234,29 +1189,7 @@ REGISTER_OP("TopKV2")
     .Output("indices: int32")
     .Attr("sorted: bool = true")
     .Attr("T: realnumbertype")
-    .SetShapeFn(TopKShapeFn)
-    .Doc(R"doc(
-Finds values and indices of the `k` largest elements for the last dimension.
-
-If the input is a vector (rank-1), finds the `k` largest entries in the vector
-and outputs their values and indices as vectors.  Thus `values[j]` is the
-`j`-th largest entry in `input`, and its index is `indices[j]`.
-
-For matrices (resp. higher rank input), computes the top `k` entries in each
-row (resp. vector along the last dimension).  Thus,
-
-    values.shape = indices.shape = input.shape[:-1] + [k]
-
-If two elements are equal, the lower-index element appears first.
-
-input: 1-D or higher with last dimension at least `k`.
-k: 0-D.  Number of top elements to look for along the last dimension (along each
-  row for matrices).
-sorted: If true the resulting `k` elements will be sorted by the values in
-  descending order.
-values: The `k` largest elements along each last dimensional slice.
-indices: The indices of `values` within the last dimension of `input`.
-)doc");
+    .SetShapeFn(TopKShapeFn);
 
 // --------------------------------------------------------------------------
 
@@ -2288,25 +1221,7 @@ REGISTER_OP("NthElement")
       TF_RETURN_IF_ERROR(c->Subshape(input, 0, -1, &s));
       c->set_output(0, s);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Finds values of the `n`-th order statistic for the last dimension.
-
-If the input is a vector (rank-1), finds the entries which is the nth-smallest
-value in the vector and outputs their values as scalar tensor.
-
-For matrices (resp. higher rank input), computes the entries which is the
-nth-smallest value in each row (resp. vector along the last dimension). Thus,
-
-    values.shape = input.shape[:-1]
-
-input: 1-D or higher with last dimension at least `n+1`.
-n: 0-D. Position of sorted vector to select along the last dimension (along
-  each row for matrices). Valid range of n is `[0, input.shape[:-1])`
-reverse: When set to True, find the nth-largest value in the vector and vice
-  versa.
-values: The `n`-th order statistic along each last dimensional slice.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 
@@ -2322,70 +1237,7 @@ REGISTER_OP("FractionalMaxPool")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
     .Attr("T: {float, double, int32, int64}")
-    .SetShapeFn(FractionalPoolShapeFn)
-    .Doc(R"doc(
-Performs fractional max pooling on the input.
-
-Fractional max pooling is slightly different than regular max pooling.  In
-regular max pooling, you downsize an input set by taking the maximum value of
-smaller N x N subsections of the set (often 2x2), and try to reduce the set by
-a factor of N, where N is an integer.  Fractional max pooling, as you might
-expect from the word "fractional", means that the overall reduction ratio N
-does not have to be an integer.
-
-The sizes of the pooling regions are generated randomly but are fairly uniform.
-For example, let's look at the height dimension, and the constraints on the
-list of rows that will be pool boundaries.
-
-First we define the following:
-
-1.  input_row_length : the number of rows from the input set
-2.  output_row_length : which will be smaller than the input
-3.  alpha = input_row_length / output_row_length : our reduction ratio
-4.  K = floor(alpha)
-5.  row_pooling_sequence : this is the result list of pool boundary rows
-
-Then, row_pooling_sequence should satisfy:
-
-1.  a[0] = 0 : the first value of the sequence is 0
-2.  a[end] = input_row_length : the last value of the sequence is the size
-3.  K <= (a[i+1] - a[i]) <= K+1 : all intervals are K or K+1 size
-4.  length(row_pooling_sequence) = output_row_length+1
-
-For more details on fractional max pooling, see this paper:
-[Benjamin Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071)
-
-value: 4-D with shape `[batch, height, width, channels]`.
-pooling_ratio: Pooling ratio for each dimension of `value`, currently only
-  supports row and col dimension and should be >= 1.0. For example, a valid
-  pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements
-  must be 1.0 because we don't allow pooling on batch and channels
-  dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions
-  respectively.
-pseudo_random: When set to True, generates the pooling sequence in a
-  pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin
-  Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) for
-  difference between pseudorandom and random.
-overlapping: When set to True, it means when pooling, the values at the boundary
-  of adjacent pooling cells are used by both cells. For example:
-
-  `index  0  1  2  3  4`
-
-  `value  20 5  16 3  7`
-
-  If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
-  The result would be [20, 16] for fractional max pooling.
-deterministic: When set to True, a fixed pooling region will be used when
-  iterating over a FractionalMaxPool node in the computation graph. Mainly used
-  in unit test to make FractionalMaxPool deterministic.
-seed: If either seed or seed2 are set to be non-zero, the random number
-  generator is seeded by the given seed.  Otherwise, it is seeded by a
-  random seed.
-seed2: An second seed to avoid seed collision.
-output: output tensor after fractional max pooling.
-row_pooling_sequence: row pooling sequence, needed to calculate gradient.
-col_pooling_sequence: column pooling sequence, needed to calculate gradient.
-)doc");
+    .SetShapeFn(FractionalPoolShapeFn);
 
 REGISTER_OP("FractionalMaxPoolGrad")
     .Input("orig_input: T")
@@ -2398,29 +1250,7 @@ REGISTER_OP("FractionalMaxPoolGrad")
     .Attr("T: {float, double, int32, int64}")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRank(c, 4);
-    })
-    .Doc(R"doc(
-Computes gradient of the FractionalMaxPool function.
-
-orig_input: Original input for `fractional_max_pool`
-orig_output: Original output for `fractional_max_pool`
-out_backprop: 4-D with shape `[batch, height, width, channels]`.  Gradients
-  w.r.t. the output of `fractional_max_pool`.
-row_pooling_sequence: row pooling sequence, form pooling region with
-  col_pooling_sequence.
-col_pooling_sequence: column pooling sequence, form pooling region with
-  row_pooling sequence.
-overlapping: When set to True, it means when pooling, the values at the boundary
-  of adjacent pooling cells are used by both cells. For example:
-
-  `index  0  1  2  3  4`
-
-  `value  20 5  16 3  7`
-
-  If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
-  The result would be [20, 16] for fractional max pooling.
-output: 4-D.  Gradients w.r.t. the input of `fractional_max_pool`.
-)doc");
+    });
 
 // --------------------------------------------------------------------------
 
@@ -2436,46 +1266,7 @@ REGISTER_OP("FractionalAvgPool")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
     .Attr("T: {float, double, int32, int64}")
-    .SetShapeFn(FractionalPoolShapeFn)
-    .Doc(R"doc(
-Performs fractional average pooling on the input.
-
-Fractional average pooling is similar to Fractional max pooling in the pooling
-region generation step. The only difference is that after pooling regions are
-generated, a mean operation is performed instead of a max operation in each
-pooling region.
-
-value: 4-D with shape `[batch, height, width, channels]`.
-pooling_ratio: Pooling ratio for each dimension of `value`, currently only
-  supports row and col dimension and should be >= 1.0. For example, a valid
-  pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements
-  must be 1.0 because we don't allow pooling on batch and channels
-  dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions
-  respectively.
-pseudo_random: When set to True, generates the pooling sequence in a
-  pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin
-  Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) for
-  difference between pseudorandom and random.
-overlapping: When set to True, it means when pooling, the values at the boundary
-  of adjacent pooling cells are used by both cells. For example:
-
-  `index  0  1  2  3  4`
-
-  `value  20 5  16 3  7`
-
-  If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
-  The result would be [41/3, 26/3] for fractional avg pooling.
-deterministic: When set to True, a fixed pooling region will be used when
-  iterating over a FractionalAvgPool node in the computation graph. Mainly used
-  in unit test to make FractionalAvgPool deterministic.
-seed: If either seed or seed2 are set to be non-zero, the random number
-  generator is seeded by the given seed.  Otherwise, it is seeded by a
-  random seed.
-seed2: An second seed to avoid seed collision.
-output: output tensor after fractional avg pooling.
-row_pooling_sequence: row pooling sequence, needed to calculate gradient.
-col_pooling_sequence: column pooling sequence, needed to calculate gradient.
-)doc");
+    .SetShapeFn(FractionalPoolShapeFn);
 
 REGISTER_OP("FractionalAvgPoolGrad")
     .Input("orig_input_tensor_shape: int64")
@@ -2494,34 +1285,7 @@ REGISTER_OP("FractionalAvgPoolGrad")
         c->set_output(0, c->UnknownShapeOfRank(4));
       }
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes gradient of the FractionalAvgPool function.
-
-Unlike FractionalMaxPoolGrad, we don't need to find arg_max for
-FractionalAvgPoolGrad, we just need to evenly back-propagate each element of
-out_backprop to those indices that form the same pooling cell. Therefore, we
-just need to know the shape of original input tensor, instead of the whole
-tensor.
-
-orig_input_tensor_shape: Original input tensor shape for `fractional_avg_pool`
-out_backprop: 4-D with shape `[batch, height, width, channels]`.  Gradients
-  w.r.t. the output of `fractional_avg_pool`.
-row_pooling_sequence: row pooling sequence, form pooling region with
-  col_pooling_sequence.
-col_pooling_sequence: column pooling sequence, form pooling region with
-  row_pooling sequence.
-overlapping: When set to True, it means when pooling, the values at the boundary
-  of adjacent pooling cells are used by both cells. For example:
-
-  `index  0  1  2  3  4`
-
-  `value  20 5  16 3  7`
-
-  If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
-  The result would be [41/3, 26/3] for fractional avg pooling.
-output: 4-D.  Gradients w.r.t. the input of `fractional_avg_pool`.
-)doc");
+    });
 
 REGISTER_OP("QuantizedAvgPool")
     .Input("input: T")
@@ -2542,22 +1306,7 @@ REGISTER_OP("QuantizedAvgPool")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    })
-    .Doc(R"doc(
-Produces the average pool of the input tensor for quantized types.
-
-input: 4-D with shape `[batch, height, width, channels]`.
-ksize: The size of the window for each dimension of the input tensor.
-  The length must be 4 to match the number of dimensions of the input.
-strides: The stride of the sliding window for each dimension of the input
-  tensor.  The length must be 4 to match the number of dimensions of the input.
-padding: The type of padding algorithm to use.
-min_input: The float value that the lowest quantized input value represents.
-max_input: The float value that the highest quantized input value represents.
-min_output: The float value that the lowest quantized output value represents.
-max_output: The float value that the highest quantized output value represents.
-
-)doc");
+    });
 
 REGISTER_OP("QuantizedBiasAdd")
     .Input("input: T1")
@@ -2582,21 +1331,7 @@ REGISTER_OP("QuantizedBiasAdd")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    })
-    .Doc(R"doc(
-Adds Tensor 'bias' to Tensor 'input' for Quantized types.
-
-Broadcasts the values of bias on dimensions 0..N-2 of 'input'.
-
-bias: A 1D bias Tensor with size matching the last dimension of 'input'.
-min_input: The float value that the lowest quantized input value represents.
-max_input: The float value that the highest quantized input value represents.
-min_bias: The float value that the lowest quantized bias value represents.
-max_bias: The float value that the highest quantized bias value represents.
-min_out: The float value that the lowest quantized output value represents.
-max_out: The float value that the highest quantized output value represents.
-
-)doc");
+    });
 
 REGISTER_OP("QuantizedConv2D")
     .Input("input: Tinput")
@@ -2613,6 +1348,7 @@ REGISTER_OP("QuantizedConv2D")
     .Attr("out_type: quantizedtype = DT_QINT32")
     .Attr("strides: list(int)")
     .Attr(GetPaddingAttrString())
+    .Attr("dilations: list(int) = [1, 1, 1, 1]")
     .SetShapeFn([](InferenceContext* c) {
       TF_RETURN_IF_ERROR(shape_inference::Conv2DShape(c));
       ShapeHandle unused;
@@ -2623,26 +1359,7 @@ REGISTER_OP("QuantizedConv2D")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes a 2D convolution given quantized 4D input and filter tensors.
-The inputs are quantized tensors where the lowest value represents the real
-number of the associated minimum, and the highest represents the maximum.
-This means that you can only interpret the quantized output in the same way, by
-taking the returned minimum and maximum values into account.
-
-filter: filter's input_depth dimension must match input's depth dimensions.
-strides: The stride of the sliding window for each dimension of the input
-  tensor.
-padding: The type of padding algorithm to use.
-min_input: The float value that the lowest quantized input value represents.
-max_input: The float value that the highest quantized input value represents.
-min_filter: The float value that the lowest quantized filter value represents.
-max_filter: The float value that the highest quantized filter value represents.
-min_output: The float value that the lowest quantized output value represents.
-max_output: The float value that the highest quantized output value represents.
-
-)doc");
+    });
 
 REGISTER_OP("QuantizedMaxPool")
     .Input("input: T")
@@ -2663,22 +1380,7 @@ REGISTER_OP("QuantizedMaxPool")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    })
-    .Doc(R"doc(
-Produces the max pool of the input tensor for quantized types.
-
-input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over.
-ksize: The size of the window for each dimension of the input tensor.
-  The length must be 4 to match the number of dimensions of the input.
-strides: The stride of the sliding window for each dimension of the input
-  tensor. The length must be 4 to match the number of dimensions of the input.
-padding: The type of padding algorithm to use.
-min_input: The float value that the lowest quantized input value represents.
-max_input: The float value that the highest quantized input value represents.
-min_output: The float value that the lowest quantized output value represents.
-max_output: The float value that the highest quantized output value represents.
-
-)doc");
+    });
 
 REGISTER_OP("QuantizedRelu")
     .Input("features: Tinput")
@@ -2697,17 +1399,7 @@ REGISTER_OP("QuantizedRelu")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes Quantized Rectified Linear: `max(features, 0)`
-
-activations: Has the same output shape as "features".
-min_features: The float value that the lowest quantized value represents.
-max_features: The float value that the highest quantized value represents.
-min_activations: The float value that the lowest quantized value represents.
-max_activations: The float value that the highest quantized value represents.
-
-)doc");
+    });
 
 REGISTER_OP("QuantizedRelu6")
     .Input("features: Tinput")
@@ -2726,17 +1418,7 @@ REGISTER_OP("QuantizedRelu6")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes Quantized Rectified Linear 6: `min(max(features, 0), 6)`
-
-activations: Has the same output shape as "features".
-min_features: The float value that the lowest quantized value represents.
-max_features: The float value that the highest quantized value represents.
-min_activations: The float value that the lowest quantized value represents.
-max_activations: The float value that the highest quantized value represents.
-
-)doc");
+    });
 
 REGISTER_OP("QuantizedReluX")
     .Input("features: Tinput")
@@ -2756,17 +1438,7 @@ REGISTER_OP("QuantizedReluX")
       c->set_output(1, c->Scalar());
       c->set_output(2, c->Scalar());
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)`
-
-activations: Has the same output shape as "features".
-min_features: The float value that the lowest quantized value represents.
-max_features: The float value that the highest quantized value represents.
-min_activations: The float value that the lowest quantized value represents.
-max_activations: The float value that the highest quantized value represents.
-
-)doc");
+    });
 
 REGISTER_OP("QuantizedBatchNormWithGlobalNormalization")
     .Input("t: Tinput")
@@ -2809,39 +1481,7 @@ REGISTER_OP("QuantizedBatchNormWithGlobalNormalization")
       c->set_output(2, c->Scalar());
 
       return Status::OK();
-    })
-    .Doc(R"doc(
-Quantized Batch normalization.
-
-This op is deprecated and will be removed in the future. Prefer
-`tf.nn.batch_normalization`.
-
-t: A 4D input Tensor.
-t_min: The value represented by the lowest quantized input.
-t_max: The value represented by the highest quantized input.
-m: A 1D mean Tensor with size matching the last dimension of t.
-  This is the first output from tf.nn.moments,
-  or a saved moving average thereof.
-m_min: The value represented by the lowest quantized mean.
-m_max: The value represented by the highest quantized mean.
-v: A 1D variance Tensor with size matching the last dimension of t.
-  This is the second output from tf.nn.moments,
-  or a saved moving average thereof.
-v_min: The value represented by the lowest quantized variance.
-v_max: The value represented by the highest quantized variance.
-beta: A 1D beta Tensor with size matching the last dimension of t.
-  An offset to be added to the normalized tensor.
-beta_min: The value represented by the lowest quantized offset.
-beta_max: The value represented by the highest quantized offset.
-gamma: A 1D gamma Tensor with size matching the last dimension of t.
-  If "scale_after_normalization" is true, this tensor will be multiplied
-  with the normalized tensor.
-gamma_min: The value represented by the lowest quantized gamma.
-gamma_max: The value represented by the highest quantized gamma.
-variance_epsilon: A small float number to avoid dividing by 0.
-scale_after_normalization: A bool indicating whether the resulted tensor
-  needs to be multiplied with gamma.
-)doc");
+    });
 
 #ifdef INTEL_MKL
 REGISTER_OP("_MklConv2D")
@@ -2866,6 +1506,25 @@ NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
 expected to invoke these operators.
 )doc");
 
+REGISTER_OP("__MklDummyConv2DWithBias")
+    .Input("input: T")
+    .Input("filter: T")
+    .Input("bias: T")
+    .Output("output: T")
+    .Attr("T: {half, float, double}")
+    .Attr("strides: list(int)")
+    .Attr("use_cudnn_on_gpu: bool = true")
+    .Attr(GetPaddingAttrString())
+    .Attr(GetConvnetDataFormatAttrString())
+    .Doc(R"doc(
+Dummy node that enables fusing Conv2D and BiasAdd operator for MKL. This node
+does not perform anything. It is just created as an intermediate output of
+merging Conv2D and BiasAdd.
+
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
 REGISTER_OP("_MklConv2DWithBias")
     .Input("input: T")
     .Input("filter: T")
@@ -2919,6 +1578,88 @@ NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
 expected to invoke these operators.
 )doc");
 
+REGISTER_OP("__MklDummyConv2DBackpropFilterWithBias")
+    .Input("input: T")
+    .Input("filter_sizes: int32")
+    .Input("out_backprop: T")
+    .Output("output: T")
+    .Output("bias_grad: T")
+    .Attr("T: {half, float, double}")
+    .Attr("strides: list(int)")
+    .Attr("use_cudnn_on_gpu: bool = true")
+    .Attr(GetPaddingAttrString())
+    .Attr(GetConvnetDataFormatAttrString())
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle input_shape;
+      // Fetch the data_format attribute, which may not exist.
+      string data_format;
+      Status s = c->GetAttr("data_format", &data_format);
+
+      if (s.ok() && data_format == "NCHW") {
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
+        c->set_output(1, c->Vector(c->Dim(input_shape, -3)));
+      } else {
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
+        c->set_output(1, c->Vector(c->Dim(input_shape, -1)));
+      }
+      ShapeHandle sh;
+      TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &sh));
+      TF_RETURN_IF_ERROR(c->WithRank(sh, 4, &sh));
+      c->set_output(0, sh);
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Dummy node that enables fusing Conv2DBackpropFilter and BiasAddGrad operator
+for MKL. This node does not perform anything. It is just created as an
+intermediate output of merging Conv2DBackpropFilter and BiasAddGrad.
+
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
+REGISTER_OP("_MklConv2DBackpropFilterWithBias")
+    .Input("input: T")
+    .Input("filter_sizes: int32")
+    .Input("out_backprop: T")
+    .Input("mkl_input: uint8")
+    .Input("mkl_filter_size: uint8")
+    .Input("mkl_out_backprop: uint8")
+    .Output("output: T")
+    .Output("bias_grad: T")
+    .Output("mkl_output: uint8")
+    .Output("mkl_bias_grad: uint8")
+    .Attr("T: {half, float, double}")
+    .Attr("strides: list(int)")
+    .Attr("use_cudnn_on_gpu: bool = true")
+    .Attr(GetPaddingAttrString())
+    .Attr(GetConvnetDataFormatAttrString())
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle input_shape;
+      // Fetch the data_format attribute, which may not exist.
+      string data_format;
+      Status s = c->GetAttr("data_format", &data_format);
+
+      if (s.ok() && data_format == "NCHW") {
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
+        c->set_output(1, c->Vector(c->Dim(input_shape, -3)));
+      } else {
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
+        c->set_output(1, c->Vector(c->Dim(input_shape, -1)));
+      }
+      ShapeHandle sh;
+      TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &sh));
+      TF_RETURN_IF_ERROR(c->WithRank(sh, 4, &sh));
+      c->set_output(0, sh);
+      return Status::OK();
+    })
+    .Doc(R"doc(
+MKL version of Conv2DBackpropFilterWithBias. Uses MKL DNN APIs to compute the
+gradients of convolution with respect to the filter.
+
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
 REGISTER_OP("_MklConv2DWithBiasBackpropBias")
     .Input("out_backprop: T")
     .Input("mkl_out_backprop: uint8")
@@ -2995,6 +1736,78 @@ NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
 expected to invoke these operators.
 )doc");
 
+REGISTER_OP("_MklElu")
+    .Input("features: T")
+    .Input("mkl_features: uint8")
+    .Output("activations: T")
+    .Output("mkl_activations: uint8")
+    .Attr("T: realnumbertype")
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+MKL version of Elu operator. Uses MKL DNN APIs to implement Elu operator.
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
+REGISTER_OP("_MklEluGrad")
+    .Input("gradients: T")
+    .Input("features: T")
+    .Input("mkl_gradients: uint8")
+    .Input("mkl_features: uint8")
+    .Output("backprops: T")
+    .Output("mkl_backprops: uint8")
+    .Attr("T: realnumbertype")
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
+    .Doc(R"doc(
+MKL version of EluGrad operator. Uses MKL DNN APIs to compute Elu
+gradients for Elu operation.
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
+REGISTER_OP("_MklSoftmax")
+    .Input("logits: T")
+    .Input("mkl_logits: uint8")
+    .Output("softmax: T")
+    .Output("mkl_softmax: uint8")
+    .Attr("T: {half, float, double}")
+    .SetShapeFn([](InferenceContext* c) {
+      return shape_inference::UnchangedShapeWithRankAtLeast(c, 1);
+    })
+    .Doc(R"doc(
+MKL version of ReluGrad operator. Uses MKL DNN APIs to compute rectified
+linear gradients for Relu operation.
+)doc");
+
+REGISTER_OP("_MklTanh")
+    .Input("features: T")
+    .Input("mkl_features: uint8")
+    .Output("activations: T")
+    .Output("mkl_activations: uint8")
+    .Attr("T: realnumbertype")
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+MKL version of Tanh operator. Uses MKL DNN APIs to implement Tanh operator.
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
+REGISTER_OP("_MklTanhGrad")
+    .Input("gradients: T")
+    .Input("features: T")
+    .Input("mkl_gradients: uint8")
+    .Input("mkl_features: uint8")
+    .Output("backprops: T")
+    .Output("mkl_backprops: uint8")
+    .Attr("T: realnumbertype")
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
+    .Doc(R"doc(
+MKL version of TanhGrad operator. Uses MKL DNN APIs to compute tanh
+gradients for Tanh operation.
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
 REGISTER_OP("_MklMaxPool")
     .Attr("T: {float, half} = DT_FLOAT")
     .Attr("ksize: list(int) >= 4")
@@ -3005,7 +1818,11 @@ REGISTER_OP("_MklMaxPool")
     .Input("input: T")
     .Input("mkl_input: uint8")
     .Output("output: T")
+#ifndef INTEL_MKL_DNN
     .Output("workspace: T")
+#else
+    .Output("workspace: uint8")
+#endif
     .Output("mkl_output: uint8")
     .Output("mkl_workspace: uint8")
     .SetShapeFn(shape_inference::MaxPoolShape)
@@ -3027,7 +1844,11 @@ REGISTER_OP("_MklMaxPoolGrad")
     .Input("orig_input: T")
     .Input("orig_output: T")
     .Input("grad: T")
+#ifndef INTEL_MKL_DNN
     .Input("workspace: T")
+#else
+    .Input("workspace: uint8")
+#endif
     .Input("mkl_orig_input: uint8")
     .Input("mkl_orig_output: uint8")
     .Input("mkl_grad: uint8")
@@ -3095,7 +1916,11 @@ REGISTER_OP("_MklLRN")
     .Input("input: T")
     .Input("mkl_input: uint8")
     .Output("output: T")
+#ifndef INTEL_MKL_DNN
     .Output("workspace: T")
+#else
+    .Output("workspace: uint8")
+#endif
     .Output("mkl_output: uint8")
     .Output("mkl_workspace: uint8")
     .Attr("depth_radius: int = 5")
@@ -3119,7 +1944,11 @@ REGISTER_OP("_MklLRNGrad")
     .Input("input_grads: T")
     .Input("input_image: T")
     .Input("output_image: T")
+#ifndef INTEL_MKL_DNN
     .Input("workspace: T")
+#else
+    .Input("workspace: uint8")
+#endif
     .Input("mkl_input_grads: uint8")
     .Input("mkl_input_image: uint8")
     .Input("mkl_output_image: uint8")
diff --git a/tensorflow/core/ops/no_op.cc b/tensorflow/core/ops/no_op.cc
index e62353bb7f9e0c8b7ec753027a9da274bb6497e7..560e9e8daec0d6556e274f9fa4b12762847093e3 100644
--- a/tensorflow/core/ops/no_op.cc
+++ b/tensorflow/core/ops/no_op.cc
@@ -18,8 +18,6 @@ limitations under the License.
 
 namespace tensorflow {
 
-REGISTER_OP("NoOp")
-    .SetShapeFn(shape_inference::NoOutputs)
-    .Doc("Does nothing. Only useful as a placeholder for control edges.");
+REGISTER_OP("NoOp").SetShapeFn(shape_inference::NoOutputs);
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 9c41957ae6aa4ae1a893f09b6e5282a123831e38..82a895a98b7467358c54b620c9af034486fa98f6 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -6,7 +6,6 @@ op {
     default_value {
       s: ""
     }
-    description: "A string which is the message associated with the exception."
   }
   attr {
     name: "exit_without_error"
@@ -15,8 +14,6 @@ op {
       b: false
     }
   }
-  summary: "Raise a exception to abort the process when called."
-  description: "If exit_without_error is true, the process will exit normally,\notherwise it will exit with a SIGABORT signal.\n\nReturns nothing but an exception."
 }
 op {
   name: "Abs"
@@ -34,6 +31,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -41,14 +39,11 @@ op {
       }
     }
   }
-  summary: "Computes the absolute value of a tensor."
-  description: "Given a tensor `x`, this operation returns a tensor containing the absolute\nvalue of each element in `x`. For example, if x is an input element and y is\nan output element, this operation computes \\\\(y = |x|\\\\)."
 }
 op {
   name: "AccumulateNV2"
   input_arg {
     name: "inputs"
-    description: "A list of `Tensor` objects, each with same shape and type."
     type_attr: "T"
     number_attr: "N"
   }
@@ -69,17 +64,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -89,10 +85,7 @@ op {
   attr {
     name: "shape"
     type: "shape"
-    description: "Shape of elements of `inputs`."
   }
-  summary: "Returns the element-wise sum of a list of tensors."
-  description: "`tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not\nwait for all of its inputs to be ready before beginning to sum. This can\nsave memory if inputs are ready at different times, since minimum temporary\nstorage is proportional to the output size rather than the inputs size.\n\nUnlike the original `accumulate_n`, `accumulate_n_v2` is differentiable.\n\nReturns a `Tensor` of same shape and type as the elements of `inputs`."
   is_aggregate: true
   is_commutative: true
 }
@@ -100,124 +93,107 @@ op {
   name: "AccumulatorApplyGradient"
   input_arg {
     name: "handle"
-    description: "The handle to a accumulator."
     type: DT_STRING
     is_ref: true
   }
   input_arg {
     name: "local_step"
-    description: "The local_step value at which the gradient was computed."
     type: DT_INT64
   }
   input_arg {
     name: "gradient"
-    description: "A tensor of the gradient to be accumulated."
     type_attr: "dtype"
   }
   attr {
     name: "dtype"
     type: "type"
-    description: "The data type of accumulated gradients. Needs to correspond to the type\nof the accumulator."
     allowed_values {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
-  summary: "Applies a gradient to a given accumulator."
-  description: "Does not add if local_step is lesser than the accumulator\'s global_step."
 }
 op {
   name: "AccumulatorNumAccumulated"
   input_arg {
     name: "handle"
-    description: "The handle to an accumulator."
     type: DT_STRING
     is_ref: true
   }
   output_arg {
     name: "num_accumulated"
-    description: "The number of gradients aggregated in the given accumulator."
     type: DT_INT32
   }
-  summary: "Returns the number of gradients aggregated in the given accumulators."
 }
 op {
   name: "AccumulatorSetGlobalStep"
   input_arg {
     name: "handle"
-    description: "The handle to an accumulator."
     type: DT_STRING
     is_ref: true
   }
   input_arg {
     name: "new_global_step"
-    description: "The new global_step value to set."
     type: DT_INT64
   }
-  summary: "Updates the accumulator with a new value for global_step."
-  description: "Logs warning if the accumulator\'s value is already higher than\nnew_global_step."
 }
 op {
   name: "AccumulatorTakeGradient"
   input_arg {
     name: "handle"
-    description: "The handle to an accumulator."
     type: DT_STRING
     is_ref: true
   }
   input_arg {
     name: "num_required"
-    description: "Number of gradients required before we return an aggregate."
     type: DT_INT32
   }
   output_arg {
     name: "average"
-    description: "The average of the accumulated gradients."
     type_attr: "dtype"
   }
   attr {
     name: "dtype"
     type: "type"
-    description: "The data type of accumulated gradients. Needs to correspond to the type\nof the accumulator."
     allowed_values {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
-  summary: "Extracts the average gradient in the given ConditionalAccumulator."
-  description: "The op blocks until sufficient (i.e., more than num_required)\ngradients have been accumulated.  If the accumulator has already\naggregated more than num_required gradients, it returns the average of\nthe accumulated gradients.  Also automatically increments the recorded\nglobal_step in the accumulator by 1, and resets the aggregate to 0."
 }
 op {
   name: "Acos"
@@ -235,6 +211,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -244,7 +221,6 @@ op {
       }
     }
   }
-  summary: "Computes acos of x element-wise."
 }
 op {
   name: "Acosh"
@@ -262,6 +238,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -269,7 +246,6 @@ op {
       }
     }
   }
-  summary: "Computes inverse hyperbolic cosine of x element-wise."
 }
 op {
   name: "Add"
@@ -291,6 +267,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_UINT8
@@ -304,29 +281,23 @@ op {
       }
     }
   }
-  summary: "Returns x + y element-wise."
-  description: "*NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
 }
 op {
   name: "AddManySparseToTensorsMap"
   input_arg {
     name: "sparse_indices"
-    description: "2-D.  The `indices` of the minibatch `SparseTensor`.\n`sparse_indices[:, 0]` must be ordered values in `[0, N)`."
     type: DT_INT64
   }
   input_arg {
     name: "sparse_values"
-    description: "1-D.  The `values` of the minibatch `SparseTensor`."
     type_attr: "T"
   }
   input_arg {
     name: "sparse_shape"
-    description: "1-D.  The `shape` of the minibatch `SparseTensor`.\nThe minibatch size `N == sparse_shape[0]`."
     type: DT_INT64
   }
   output_arg {
     name: "sparse_handles"
-    description: "1-D.  The handles of the `SparseTensor` now stored in the\n`SparseTensorsMap`.  Shape: `[N]`."
     type: DT_INT64
   }
   attr {
@@ -339,7 +310,6 @@ op {
     default_value {
       s: ""
     }
-    description: "The container name for the `SparseTensorsMap` created by this op."
   }
   attr {
     name: "shared_name"
@@ -347,17 +317,13 @@ op {
     default_value {
       s: ""
     }
-    description: "The shared name for the `SparseTensorsMap` created by this op.\nIf blank, the new Operation\'s unique name is used."
   }
-  summary: "Add an `N`-minibatch `SparseTensor` to a `SparseTensorsMap`, return `N` handles."
-  description: "A `SparseTensor` of rank `R` is represented by three tensors: `sparse_indices`,\n`sparse_values`, and `sparse_shape`, where\n\n```sparse_indices.shape[1] == sparse_shape.shape[0] == R```\n\nAn `N`-minibatch of `SparseTensor` objects is represented as a `SparseTensor`\nhaving a first `sparse_indices` column taking values between `[0, N)`, where\nthe minibatch size `N == sparse_shape[0]`.\n\nThe input `SparseTensor` must have rank `R` greater than 1, and the first\ndimension is treated as the minibatch dimension.  Elements of the `SparseTensor`\nmust be sorted in increasing order of this first dimension.  The stored\n`SparseTensor` objects pointed to by each row of the output `sparse_handles`\nwill have rank `R-1`.\n\nThe `SparseTensor` values can then be read out as part of a minibatch by passing\nthe given keys as vector elements to `TakeManySparseFromTensorsMap`.  To ensure\nthe correct `SparseTensorsMap` is accessed, ensure that the same\n`container` and `shared_name` are passed to that Op.  If no `shared_name`\nis provided here, instead use the *name* of the Operation created by calling\n`AddManySparseToTensorsMap` as the `shared_name` passed to\n`TakeManySparseFromTensorsMap`.  Ensure the Operations are colocated."
   is_stateful: true
 }
 op {
   name: "AddN"
   input_arg {
     name: "inputs"
-    description: "Must all be the same size and shape."
     type_attr: "T"
     number_attr: "N"
   }
@@ -378,17 +344,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -396,7 +363,6 @@ op {
       }
     }
   }
-  summary: "Add all input tensors element wise."
   is_aggregate: true
   is_commutative: true
 }
@@ -404,22 +370,18 @@ op {
   name: "AddSparseToTensorsMap"
   input_arg {
     name: "sparse_indices"
-    description: "2-D.  The `indices` of the `SparseTensor`."
     type: DT_INT64
   }
   input_arg {
     name: "sparse_values"
-    description: "1-D.  The `values` of the `SparseTensor`."
     type_attr: "T"
   }
   input_arg {
     name: "sparse_shape"
-    description: "1-D.  The `shape` of the `SparseTensor`."
     type: DT_INT64
   }
   output_arg {
     name: "sparse_handle"
-    description: "0-D.  The handle of the `SparseTensor` now stored in the\n`SparseTensorsMap`."
     type: DT_INT64
   }
   attr {
@@ -432,7 +394,6 @@ op {
     default_value {
       s: ""
     }
-    description: "The container name for the `SparseTensorsMap` created by this op."
   }
   attr {
     name: "shared_name"
@@ -440,10 +401,7 @@ op {
     default_value {
       s: ""
     }
-    description: "The shared name for the `SparseTensorsMap` created by this op.\nIf blank, the new Operation\'s unique name is used."
   }
-  summary: "Add a `SparseTensor` to a `SparseTensorsMap` return its handle."
-  description: "A `SparseTensor` is represented by three tensors: `sparse_indices`,\n`sparse_values`, and `sparse_shape`.\n\nThis operator takes the given `SparseTensor` and adds it to a container\nobject (a `SparseTensorsMap`).  A unique key within this container is generated\nin the form of an `int64`, and this is the value that is returned.\n\nThe `SparseTensor` can then be read out as part of a minibatch by passing\nthe key as a vector element to `TakeManySparseFromTensorsMap`.  To ensure\nthe correct `SparseTensorsMap` is accessed, ensure that the same\n`container` and `shared_name` are passed to that Op.  If no `shared_name`\nis provided here, instead use the *name* of the Operation created by calling\n`AddSparseToTensorsMap` as the `shared_name` passed to\n`TakeManySparseFromTensorsMap`.  Ensure the Operations are colocated."
   is_stateful: true
 }
 op {
@@ -466,6 +424,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_UINT8
@@ -478,8 +437,6 @@ op {
       }
     }
   }
-  summary: "Returns x + y element-wise."
-  description: "*NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
   is_aggregate: true
   is_commutative: true
 }
@@ -520,7 +477,6 @@ op {
       }
     }
   }
-  summary: "Deprecated. Disallowed in GraphDef version >= 2."
   deprecation {
     version: 2
     explanation: "Use AdjustContrastv2 instead"
@@ -530,77 +486,59 @@ op {
   name: "AdjustContrastv2"
   input_arg {
     name: "images"
-    description: "Images to adjust.  At least 3-D."
     type: DT_FLOAT
   }
   input_arg {
     name: "contrast_factor"
-    description: "A float multiplier for adjusting contrast."
     type: DT_FLOAT
   }
   output_arg {
     name: "output"
-    description: "The contrast-adjusted image or images."
     type: DT_FLOAT
   }
-  summary: "Adjust the contrast of one or more images."
-  description: "`images` is a tensor of at least 3 dimensions.  The last 3 dimensions are\ninterpreted as `[height, width, channels]`.  The other dimensions only\nrepresent a collection of images, such as `[batch, height, width, channels].`\n\nContrast is adjusted independently for each channel of each image.\n\nFor each channel, the Op first computes the mean of the image pixels in the\nchannel and then adjusts each component of each pixel to\n`(x - mean) * contrast_factor + mean`."
 }
 op {
   name: "AdjustHue"
   input_arg {
     name: "images"
-    description: "Images to adjust.  At least 3-D."
     type: DT_FLOAT
   }
   input_arg {
     name: "delta"
-    description: "A float delta to add to the hue."
     type: DT_FLOAT
   }
   output_arg {
     name: "output"
-    description: "The hue-adjusted image or images."
     type: DT_FLOAT
   }
-  summary: "Adjust the hue of one or more images."
-  description: "`images` is a tensor of at least 3 dimensions.  The last dimension is\ninterpretted as channels, and must be three.\n\nThe input image is considered in the RGB colorspace. Conceptually, the RGB\ncolors are first mapped into HSV. A delta is then applied all the hue values,\nand then remapped back to RGB colorspace."
 }
 op {
   name: "AdjustSaturation"
   input_arg {
     name: "images"
-    description: "Images to adjust.  At least 3-D."
     type: DT_FLOAT
   }
   input_arg {
     name: "scale"
-    description: "A float scale to add to the saturation."
     type: DT_FLOAT
   }
   output_arg {
     name: "output"
-    description: "The hue-adjusted image or images."
     type: DT_FLOAT
   }
-  summary: "Adjust the saturation of one or more images."
-  description: "`images` is a tensor of at least 3 dimensions.  The last dimension is\ninterpretted as channels, and must be three.\n\nThe input image is considered in the RGB colorspace. Conceptually, the RGB\ncolors are first mapped into HSV. A scale is then applied all the saturation\nvalues, and then remapped back to RGB colorspace."
 }
 op {
   name: "All"
   input_arg {
     name: "input"
-    description: "The tensor to reduce."
     type: DT_BOOL
   }
   input_arg {
     name: "reduction_indices"
-    description: "The dimensions to reduce. Must be in the range\n`[-rank(input), rank(input))`."
     type_attr: "Tidx"
   }
   output_arg {
     name: "output"
-    description: "The reduced tensor."
     type: DT_BOOL
   }
   attr {
@@ -609,7 +547,6 @@ op {
     default_value {
       b: false
     }
-    description: "If true, retain reduced dimensions with length 1."
   }
   attr {
     name: "Tidx"
@@ -624,49 +561,40 @@ op {
       }
     }
   }
-  summary: "Computes the \"logical and\" of elements across dimensions of a tensor."
-  description: "Reduces `input` along the dimensions given in `reduction_indices`. Unless\n`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in\n`reduction_indices`. If `keep_dims` is true, the reduced dimensions are\nretained with length 1."
 }
 op {
   name: "AllCandidateSampler"
   input_arg {
     name: "true_classes"
-    description: "A batch_size * num_true matrix, in which each row contains the\nIDs of the num_true target_classes in the corresponding original label."
     type: DT_INT64
   }
   output_arg {
     name: "sampled_candidates"
-    description: "A vector of length num_sampled, in which each element is\nthe ID of a sampled candidate."
     type: DT_INT64
   }
   output_arg {
     name: "true_expected_count"
-    description: "A batch_size * num_true matrix, representing\nthe number of times each candidate is expected to occur in a batch\nof sampled candidates. If unique=true, then this is a probability."
     type: DT_FLOAT
   }
   output_arg {
     name: "sampled_expected_count"
-    description: "A vector of length num_sampled, for each sampled\ncandidate representing the number of times the candidate is expected\nto occur in a batch of sampled candidates.  If unique=true, then this is a\nprobability."
     type: DT_FLOAT
   }
   attr {
     name: "num_true"
     type: "int"
-    description: "Number of true labels per context."
     has_minimum: true
     minimum: 1
   }
   attr {
     name: "num_sampled"
     type: "int"
-    description: "Number of candidates to produce."
     has_minimum: true
     minimum: 1
   }
   attr {
     name: "unique"
     type: "bool"
-    description: "If unique is true, we sample with rejection, so that all sampled\ncandidates in a batch are unique. This requires some approximation to\nestimate the post-rejection sampling probabilities."
   }
   attr {
     name: "seed"
@@ -674,7 +602,6 @@ op {
     default_value {
       i: 0
     }
-    description: "If either seed or seed2 are set to be non-zero, the random number\ngenerator is seeded by the given seed.  Otherwise, it is seeded by a\nrandom seed."
   }
   attr {
     name: "seed2"
@@ -682,10 +609,7 @@ op {
     default_value {
       i: 0
     }
-    description: "An second seed to avoid seed collision."
   }
-  summary: "Generates labels for candidate sampling with a learned unigram distribution."
-  description: "See explanations of candidate sampling and the data formats at\ngo/candidate-sampling.\n\nFor each batch, this op picks a single set of sampled candidate labels.\n\nThe advantages of sampling candidates per-batch are simplicity and the\npossibility of efficient dense matrix multiplication. The disadvantage is that\nthe sampled candidates must be chosen independently of the context and of the\ntrue labels."
   is_stateful: true
 }
 op {
@@ -724,24 +648,19 @@ op {
       }
     }
   }
-  summary: "Returns the argument of a complex number."
-  description: "Given a tensor `input` of complex numbers, this operation returns a tensor of\ntype `float` that is the argument of each element in `input`. All elements in\n`input` must be complex numbers of the form \\\\(a + bj\\\\), where *a*\nis the real part and *b* is the imaginary part.\n\nThe argument returned by this operation is of the form \\\\(atan2(b, a)\\\\).\n\nFor example:\n\n```\n# tensor \'input\' is [-2.25 + 4.75j, 3.25 + 5.75j]\ntf.angle(input) ==> [2.0132, 1.056]\n```\n\n@compatibility(numpy)\nEquivalent to np.angle.\n@end_compatibility"
 }
 op {
   name: "Any"
   input_arg {
     name: "input"
-    description: "The tensor to reduce."
     type: DT_BOOL
   }
   input_arg {
     name: "reduction_indices"
-    description: "The dimensions to reduce. Must be in the range\n`[-rank(input), rank(input))`."
     type_attr: "Tidx"
   }
   output_arg {
     name: "output"
-    description: "The reduced tensor."
     type: DT_BOOL
   }
   attr {
@@ -750,7 +669,6 @@ op {
     default_value {
       b: false
     }
-    description: "If true, retain reduced dimensions with length 1."
   }
   attr {
     name: "Tidx"
@@ -765,52 +683,42 @@ op {
       }
     }
   }
-  summary: "Computes the \"logical or\" of elements across dimensions of a tensor."
-  description: "Reduces `input` along the dimensions given in `reduction_indices`. Unless\n`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in\n`reduction_indices`. If `keep_dims` is true, the reduced dimensions are\nretained with length 1."
 }
 op {
   name: "ApplyAdadelta"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "accum"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "accum_update"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "rho"
-    description: "Decay factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "epsilon"
-    description: "Constant factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   output_arg {
     name: "out"
-    description: "Same as \"var\"."
     type_attr: "T"
     is_ref: true
   }
@@ -821,17 +729,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -844,38 +753,30 @@ op {
     default_value {
       b: false
     }
-    description: "If True, updating of the var, accum and update_accum tensors will be protected by\na lock; otherwise the behavior is undefined, but may exhibit less contention."
   }
-  summary: "Update \'*var\' according to the adadelta scheme."
-  description: "accum = rho() * accum + (1 - rho()) * grad.square();\nupdate = (update_accum + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad;\nupdate_accum = rho() * update_accum + (1 - rho()) * update.square();\nvar -= update;"
 }
 op {
   name: "ApplyAdagrad"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "accum"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   output_arg {
     name: "out"
-    description: "Same as \"var\"."
     type_attr: "T"
     is_ref: true
   }
@@ -886,17 +787,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -909,59 +811,47 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var and accum tensors will be protected\nby a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
-  summary: "Update \'*var\' according to the adagrad scheme."
-  description: "accum += grad * grad\nvar -= lr * grad * (1 / sqrt(accum))"
 }
 op {
   name: "ApplyAdagradDA"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "gradient_accumulator"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "gradient_squared_accumulator"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l1"
-    description: "L1 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l2"
-    description: "L2 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "global_step"
-    description: "Training step number. Must be a scalar."
     type: DT_INT64
   }
   output_arg {
     name: "out"
-    description: "Same as \"var\"."
     type_attr: "T"
     is_ref: true
   }
@@ -972,17 +862,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -995,68 +886,55 @@ op {
     default_value {
       b: false
     }
-    description: "If True, updating of the var and accum tensors will be protected by\na lock; otherwise the behavior is undefined, but may exhibit less contention."
   }
-  summary: "Update \'*var\' according to the proximal adagrad scheme."
 }
 op {
   name: "ApplyAdam"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "m"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "v"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "beta1_power"
-    description: "Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "beta2_power"
-    description: "Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "beta1"
-    description: "Momentum factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "beta2"
-    description: "Momentum factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "epsilon"
-    description: "Ridge term. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   output_arg {
     name: "out"
-    description: "Same as \"var\"."
     type_attr: "T"
     is_ref: true
   }
@@ -1067,17 +945,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -1090,7 +969,6 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var, m, and v tensors will be protected\nby a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
   attr {
     name: "use_nesterov"
@@ -1098,53 +976,42 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, uses the nesterov update."
   }
-  summary: "Update \'*var\' according to the Adam algorithm."
-  description: "lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)\nm_t <- beta1 * m_{t-1} + (1 - beta1) * g_t\nv_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t\nvariable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)"
 }
 op {
   name: "ApplyAddSign"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "m"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "alpha"
-    description: "Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "sign_decay"
-    description: "Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "beta"
-    description: "Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   output_arg {
     name: "out"
-    description: "Same as \"var\"."
     type_attr: "T"
     is_ref: true
   }
@@ -1155,17 +1022,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -1178,45 +1046,36 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var and m tensors is\nprotected by a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
-  summary: "Update \'*var\' according to the AddSign update."
-  description: "m_t <- beta1 * m_{t-1} + (1 - beta1) * g\nupdate <- (alpha + sign_decay * sign(g) *sign(m)) * g\nvariable <- variable - lr_t * update"
 }
 op {
   name: "ApplyCenteredRMSProp"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "mg"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "ms"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "mom"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "rho"
-    description: "Decay rate. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
@@ -1225,17 +1084,14 @@ op {
   }
   input_arg {
     name: "epsilon"
-    description: "Ridge term. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   output_arg {
     name: "out"
-    description: "Same as \"var\"."
     type_attr: "T"
     is_ref: true
   }
@@ -1246,17 +1102,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -1269,59 +1126,47 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var, mg, ms, and mom tensors is\nprotected by a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
-  summary: "Update \'*var\' according to the centered RMSProp algorithm."
-  description: "The centered RMSProp algorithm uses an estimate of the centered second moment\n(i.e., the variance) for normalization, as opposed to regular RMSProp, which\nuses the (uncentered) second moment. This often helps with training, but is\nslightly more expensive in terms of computation and memory.\n\nNote that in dense implementation of this algorithm, mg, ms, and mom will\nupdate even if the grad is zero, but in this sparse implementation, mg, ms,\nand mom will not update in iterations during which the grad is zero.\n\nmean_square = decay * mean_square + (1-decay) * gradient ** 2\nmean_grad = decay * mean_grad + (1-decay) * gradient\n\nDelta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)\n\nmg <- rho * mg_{t-1} + (1-rho) * grad\nms <- rho * ms_{t-1} + (1-rho) * grad * grad\nmom <- momentum * mom_{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)\nvar <- var - mom"
 }
 op {
   name: "ApplyFtrl"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "accum"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "linear"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l1"
-    description: "L1 regulariation. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l2"
-    description: "L2 regulariation. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "lr_power"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   output_arg {
     name: "out"
-    description: "Same as \"var\"."
     type_attr: "T"
     is_ref: true
   }
@@ -1332,17 +1177,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -1355,49 +1201,39 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var and accum tensors will be protected\nby a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
-  summary: "Update \'*var\' according to the Ftrl-proximal scheme."
-  description: "accum_new = accum + grad * grad\nlinear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var\nquadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2\nvar = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0\naccum = accum_new"
 }
 op {
   name: "ApplyFtrlV2"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "accum"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "linear"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l1"
-    description: "L1 regulariation. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l2"
-    description: "L2 shrinkage regulariation. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
@@ -1406,12 +1242,10 @@ op {
   }
   input_arg {
     name: "lr_power"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   output_arg {
     name: "out"
-    description: "Same as \"var\"."
     type_attr: "T"
     is_ref: true
   }
@@ -1422,17 +1256,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -1445,32 +1280,25 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var and accum tensors will be protected\nby a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
-  summary: "Update \'*var\' according to the Ftrl-proximal scheme."
-  description: "grad_with_shrinkage = grad + 2 * l2_shrinkage * var\naccum_new = accum + grad_with_shrinkage * grad_with_shrinkage\nlinear += grad_with_shrinkage +\n    (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var\nquadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2\nvar = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0\naccum = accum_new"
 }
 op {
   name: "ApplyGradientDescent"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "alpha"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "delta"
-    description: "The change."
     type_attr: "T"
   }
   output_arg {
     name: "out"
-    description: "Same as \"var\"."
     type_attr: "T"
     is_ref: true
   }
@@ -1481,17 +1309,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -1504,42 +1333,34 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, the subtraction will be protected by a lock;\notherwise the behavior is undefined, but may exhibit less contention."
   }
-  summary: "Update \'*var\' by subtracting \'alpha\' * \'delta\' from it."
 }
 op {
   name: "ApplyMomentum"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "accum"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "momentum"
-    description: "Momentum. Must be a scalar."
     type_attr: "T"
   }
   output_arg {
     name: "out"
-    description: "Same as \"var\"."
     type_attr: "T"
     is_ref: true
   }
@@ -1550,17 +1371,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -1573,7 +1395,6 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var and accum tensors will be protected\nby a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
   attr {
     name: "use_nesterov"
@@ -1581,53 +1402,42 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, the tensor passed to compute grad will be\nvar - lr * momentum * accum, so in the end, the var you get is actually\nvar - lr * momentum * accum."
   }
-  summary: "Update \'*var\' according to the momentum scheme. Set use_nesterov = True if you"
-  description: "want to use Nesterov momentum.\n\naccum = accum * momentum + grad\nvar -= lr * accum"
 }
 op {
   name: "ApplyPowerSign"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "m"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "logbase"
-    description: "Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "sign_decay"
-    description: "Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "beta"
-    description: "Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   output_arg {
     name: "out"
-    description: "Same as \"var\"."
     type_attr: "T"
     is_ref: true
   }
@@ -1638,17 +1448,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -1661,48 +1472,38 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var and m tensors is\nprotected by a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
-  summary: "Update \'*var\' according to the AddSign update."
-  description: "m_t <- beta1 * m_{t-1} + (1 - beta1) * g\nupdate <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g\nvariable <- variable - lr_t * update"
 }
 op {
   name: "ApplyProximalAdagrad"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "accum"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l1"
-    description: "L1 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l2"
-    description: "L2 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   output_arg {
     name: "out"
-    description: "Same as \"var\"."
     type_attr: "T"
     is_ref: true
   }
@@ -1713,17 +1514,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -1736,42 +1538,33 @@ op {
     default_value {
       b: false
     }
-    description: "If True, updating of the var and accum tensors will be protected by\na lock; otherwise the behavior is undefined, but may exhibit less contention."
   }
-  summary: "Update \'*var\' and \'*accum\' according to FOBOS with Adagrad learning rate."
-  description: "accum += grad * grad\nprox_v = var - lr * grad * (1 / sqrt(accum))\nvar = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}"
 }
 op {
   name: "ApplyProximalGradientDescent"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "alpha"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l1"
-    description: "L1 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l2"
-    description: "L2 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "delta"
-    description: "The change."
     type_attr: "T"
   }
   output_arg {
     name: "out"
-    description: "Same as \"var\"."
     type_attr: "T"
     is_ref: true
   }
@@ -1782,17 +1575,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -1805,39 +1599,31 @@ op {
     default_value {
       b: false
     }
-    description: "If True, the subtraction will be protected by a lock;\notherwise the behavior is undefined, but may exhibit less contention."
   }
-  summary: "Update \'*var\' as FOBOS algorithm with fixed learning rate."
-  description: "prox_v = var - alpha * delta\nvar = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}"
 }
 op {
   name: "ApplyRMSProp"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "ms"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "mom"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "rho"
-    description: "Decay rate. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
@@ -1846,17 +1632,14 @@ op {
   }
   input_arg {
     name: "epsilon"
-    description: "Ridge term. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   output_arg {
     name: "out"
-    description: "Same as \"var\"."
     type_attr: "T"
     is_ref: true
   }
@@ -1867,17 +1650,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -1890,10 +1674,7 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var, ms, and mom tensors is protected\nby a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
-  summary: "Update \'*var\' according to the RMSProp algorithm."
-  description: "Note that in dense implementation of this algorithm, ms and mom will\nupdate even if the grad is zero, but in this sparse implementation, ms\nand mom will not update in iterations during which the grad is zero.\n\nmean_square = decay * mean_square + (1-decay) * gradient ** 2\nDelta = learning_rate * gradient / sqrt(mean_square + epsilon)\n\nms <- rho * ms_{t-1} + (1-rho) * grad * grad\nmom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)\nvar <- var - mom"
 }
 op {
   name: "ApproximateEqual"
@@ -1916,17 +1697,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -1940,7 +1722,6 @@ op {
       f: 1e-05
     }
   }
-  summary: "Returns the truth value of abs(x-y) < tolerance element-wise."
   is_commutative: true
 }
 op {
@@ -1951,7 +1732,6 @@ op {
   }
   input_arg {
     name: "dimension"
-    description: "int32 or int64, must be in the range `[-rank(input), rank(input))`.\nDescribes which dimension of the input Tensor to reduce across. For vectors,\nuse dimension = 0."
     type_attr: "Tidx"
   }
   output_arg {
@@ -1965,17 +1745,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -2008,8 +1789,6 @@ op {
       }
     }
   }
-  summary: "Returns the index with the largest value across dimensions of a tensor."
-  description: "Note that in case of ties the identity of the return value is not guaranteed."
 }
 op {
   name: "ArgMin"
@@ -2019,7 +1798,6 @@ op {
   }
   input_arg {
     name: "dimension"
-    description: "int32 or int64, must be in the range `[-rank(input), rank(input))`.\nDescribes which dimension of the input Tensor to reduce across. For vectors,\nuse dimension = 0."
     type_attr: "Tidx"
   }
   output_arg {
@@ -2033,17 +1811,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -2076,8 +1855,6 @@ op {
       }
     }
   }
-  summary: "Returns the index with the smallest value across dimensions of a tensor."
-  description: "Note that in case of ties the identity of the return value is not guaranteed."
 }
 op {
   name: "AsString"
@@ -2110,7 +1887,6 @@ op {
     default_value {
       i: -1
     }
-    description: "The post-decimal precision to use for floating point numbers.\nOnly used if precision > -1."
   }
   attr {
     name: "scientific"
@@ -2118,7 +1894,6 @@ op {
     default_value {
       b: false
     }
-    description: "Use scientific notation for floating point numbers."
   }
   attr {
     name: "shortest"
@@ -2126,7 +1901,6 @@ op {
     default_value {
       b: false
     }
-    description: "Use shortest representation (either scientific or standard) for\nfloating point numbers."
   }
   attr {
     name: "width"
@@ -2134,7 +1908,6 @@ op {
     default_value {
       i: -1
     }
-    description: "Pad pre-decimal numbers to this width.\nApplies to both floating point and integer numbers.\nOnly used if width > -1."
   }
   attr {
     name: "fill"
@@ -2142,10 +1915,7 @@ op {
     default_value {
       s: ""
     }
-    description: "The value to pad if width > -1.  If empty, pads with spaces.\nAnother typical value is \'0\'.  String cannot be longer than 1 character."
   }
-  summary: "Converts each entry in the given tensor to strings.  Supports many numeric"
-  description: "types and boolean."
 }
 op {
   name: "Asin"
@@ -2163,6 +1933,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -2172,7 +1943,6 @@ op {
       }
     }
   }
-  summary: "Computes asin of x element-wise."
 }
 op {
   name: "Asinh"
@@ -2190,6 +1960,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -2197,18 +1968,15 @@ op {
       }
     }
   }
-  summary: "Computes inverse hyperbolic sine of x element-wise."
 }
 op {
   name: "Assert"
   input_arg {
     name: "condition"
-    description: "The condition to evaluate."
     type: DT_BOOL
   }
   input_arg {
     name: "data"
-    description: "The tensors to print out when condition is false."
     type_list_attr: "T"
   }
   attr {
@@ -2223,28 +1991,22 @@ op {
     default_value {
       i: 3
     }
-    description: "Print this many entries of each tensor."
   }
-  summary: "Asserts that the given condition is true."
-  description: "If `condition` evaluates to false, print the list of tensors in `data`.\n`summarize` determines how many entries of the tensors to print."
   is_stateful: true
 }
 op {
   name: "Assign"
   input_arg {
     name: "ref"
-    description: "Should be from a `Variable` node. May be uninitialized."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "value"
-    description: "The value to be assigned to the variable."
     type_attr: "T"
   }
   output_arg {
     name: "output_ref"
-    description: "= Same as \"ref\".  Returned as a convenience for operations that want\nto use the new value after the variable has been reset."
     type_attr: "T"
     is_ref: true
   }
@@ -2258,7 +2020,6 @@ op {
     default_value {
       b: true
     }
-    description: "If true, the operation will validate that the shape\nof \'value\' matches the shape of the Tensor being assigned to.  If false,\n\'ref\' will take on the shape of \'value\'."
   }
   attr {
     name: "use_locking"
@@ -2266,28 +2027,22 @@ op {
     default_value {
       b: true
     }
-    description: "If True, the assignment will be protected by a lock;\notherwise the behavior is undefined, but may exhibit less contention."
   }
-  summary: "Update \'ref\' by assigning \'value\' to it."
-  description: "This operation outputs \"ref\" after the assignment is done.\nThis makes it easier to chain operations that need to use the reset value."
   allows_uninitialized_input: true
 }
 op {
   name: "AssignAdd"
   input_arg {
     name: "ref"
-    description: "Should be from a `Variable` node."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "value"
-    description: "The value to be added to the variable."
     type_attr: "T"
   }
   output_arg {
     name: "output_ref"
-    description: "= Same as \"ref\".  Returned as a convenience for operations that want\nto use the new value after the variable has been updated."
     type_attr: "T"
     is_ref: true
   }
@@ -2298,17 +2053,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -2321,48 +2077,37 @@ op {
     default_value {
       b: false
     }
-    description: "If True, the addition will be protected by a lock;\notherwise the behavior is undefined, but may exhibit less contention."
   }
-  summary: "Update \'ref\' by adding \'value\' to it."
-  description: "This operation outputs \"ref\" after the update is done.\nThis makes it easier to chain operations that need to use the reset value."
 }
 op {
   name: "AssignAddVariableOp"
   input_arg {
     name: "resource"
-    description: "handle to the resource in which to store the variable."
     type: DT_RESOURCE
   }
   input_arg {
     name: "value"
-    description: "the value by which the variable will be incremented."
     type_attr: "dtype"
   }
   attr {
     name: "dtype"
     type: "type"
-    description: "the dtype of the value."
   }
-  summary: "Adds a value to the current value of a variable."
-  description: "Any ReadVariableOp which depends directly or indirectly on this assign is\nguaranteed to see the incremented value or a subsequent newer one.\n\nOutputs the incremented value, which can be used to totally order the\nincrements to this variable."
   is_stateful: true
 }
 op {
   name: "AssignSub"
   input_arg {
     name: "ref"
-    description: "Should be from a `Variable` node."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "value"
-    description: "The value to be subtracted to the variable."
     type_attr: "T"
   }
   output_arg {
     name: "output_ref"
-    description: "= Same as \"ref\".  Returned as a convenience for operations that want\nto use the new value after the variable has been updated."
     type_attr: "T"
     is_ref: true
   }
@@ -2373,17 +2118,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -2396,51 +2142,38 @@ op {
     default_value {
       b: false
     }
-    description: "If True, the subtraction will be protected by a lock;\notherwise the behavior is undefined, but may exhibit less contention."
   }
-  summary: "Update \'ref\' by subtracting \'value\' from it."
-  description: "This operation outputs \"ref\" after the update is done.\nThis makes it easier to chain operations that need to use the reset value."
 }
 op {
   name: "AssignSubVariableOp"
   input_arg {
     name: "resource"
-    description: "handle to the resource in which to store the variable."
     type: DT_RESOURCE
   }
   input_arg {
     name: "value"
-    description: "the value by which the variable will be incremented."
     type_attr: "dtype"
   }
   attr {
     name: "dtype"
     type: "type"
-    description: "the dtype of the value."
   }
-  summary: "Subtracts a value from the current value of a variable."
-  description: "Any ReadVariableOp which depends directly or indirectly on this assign is\nguaranteed to see the incremented value or a subsequent newer one.\n\nOutputs the incremented value, which can be used to totally order the\nincrements to this variable."
   is_stateful: true
 }
 op {
   name: "AssignVariableOp"
   input_arg {
     name: "resource"
-    description: "handle to the resource in which to store the variable."
     type: DT_RESOURCE
   }
   input_arg {
     name: "value"
-    description: "the value to set the new tensor to use."
     type_attr: "dtype"
   }
   attr {
     name: "dtype"
     type: "type"
-    description: "the dtype of the value."
   }
-  summary: "Assigns a new value to a variable."
-  description: "Any ReadVariableOp with a control dependency on this op is guaranteed to return\nthis value or a subsequent newer value of the variable."
   is_stateful: true
 }
 op {
@@ -2459,6 +2192,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -2468,7 +2202,6 @@ op {
       }
     }
   }
-  summary: "Computes atan of x element-wise."
 }
 op {
   name: "Atan2"
@@ -2489,13 +2222,12 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Computes arctangent of `y/x` element-wise, respecting signs of the arguments."
-  description: "This is the angle \\( \\theta \\in [-\\pi, \\pi] \\) such that\n\\[ x = r \\cos(\\theta) \\]\nand\n\\[ y = r \\sin(\\theta) \\]\nwhere \\(r = \\sqrt(x^2 + y^2) \\)."
 }
 op {
   name: "Atanh"
@@ -2513,6 +2245,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -2520,29 +2253,24 @@ op {
       }
     }
   }
-  summary: "Computes inverse hyperbolic tangent of x element-wise."
 }
 op {
   name: "AudioSpectrogram"
   input_arg {
     name: "input"
-    description: "Float representation of audio data."
     type: DT_FLOAT
   }
   output_arg {
     name: "spectrogram"
-    description: "3D representation of the audio frequencies as an image."
     type: DT_FLOAT
   }
   attr {
     name: "window_size"
     type: "int"
-    description: "How wide the input window is in samples. For the highest efficiency\nthis should be a power of two, but other values are accepted."
   }
   attr {
     name: "stride"
     type: "int"
-    description: "How widely apart the center of adjacent sample windows should be."
   }
   attr {
     name: "magnitude_squared"
@@ -2550,32 +2278,25 @@ op {
     default_value {
       b: false
     }
-    description: "Whether to return the squared magnitude or just the\nmagnitude. Using squared magnitude can avoid extra calculations."
   }
-  summary: "Produces a visualization of audio data over time."
-  description: "Spectrograms are a standard way of representing audio information as a series of\nslices of frequency information, one slice for each window of time. By joining\nthese together into a sequence, they form a distinctive fingerprint of the sound\nover time.\n\nThis op expects to receive audio data as an input, stored as floats in the range\n-1 to 1, together with a window width in samples, and a stride specifying how\nfar to move the window between slices. From this it generates a three\ndimensional output. The lowest dimension has an amplitude value for each\nfrequency during that time slice. The next dimension is time, with successive\nfrequency slices. The final dimension is for the channels in the input, so a\nstereo audio input would have two here for example.\n\nThis means the layout when converted and saved as an image is rotated 90 degrees\nclockwise from a typical spectrogram. Time is descending down the Y axis, and\nthe frequency decreases from left to right.\n\nEach value in the result represents the square root of the sum of the real and\nimaginary parts of an FFT on the current window of samples. In this way, the\nlowest dimension represents the power of each frequency in the current window,\nand adjacent windows are concatenated in the next dimension.\n\nTo get a more intuitive and visual look at what this operation does, you can run\ntensorflow/examples/wav_to_spectrogram to read in an audio file and save out the\nresulting spectrogram as a PNG image."
 }
 op {
   name: "AudioSummary"
   input_arg {
     name: "tag"
-    description: "Scalar. Used to build the `tag` attribute of the summary values."
     type: DT_STRING
   }
   input_arg {
     name: "tensor"
-    description: "2-D of shape `[batch_size, frames]`."
     type: DT_FLOAT
   }
   output_arg {
     name: "summary"
-    description: "Scalar. Serialized `Summary` protocol buffer."
     type: DT_STRING
   }
   attr {
     name: "sample_rate"
     type: "float"
-    description: "The sample rate of the signal in hertz."
   }
   attr {
     name: "max_outputs"
@@ -2583,12 +2304,9 @@ op {
     default_value {
       i: 3
     }
-    description: "Max number of batch elements to generate audio for."
     has_minimum: true
     minimum: 1
   }
-  summary: "Outputs a `Summary` protocol buffer with audio."
-  description: "The summary has up to `max_outputs` summary values containing audio. The\naudio is built from `tensor` which must be 3-D with shape `[batch_size,\nframes, channels]` or 2-D with shape `[batch_size, frames]`. The values are\nassumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`.\n\nThe `tag` argument is a scalar `Tensor` of type `string`.  It is used to\nbuild the `tag` of the summary values:\n\n*  If `max_outputs` is 1, the summary value tag is \'*tag*/audio\'.\n*  If `max_outputs` is greater than 1, the summary value tags are\n   generated sequentially as \'*tag*/audio/0\', \'*tag*/audio/1\', etc."
   deprecation {
     version: 15
     explanation: "Use AudioSummaryV2."
@@ -2598,22 +2316,18 @@ op {
   name: "AudioSummaryV2"
   input_arg {
     name: "tag"
-    description: "Scalar. Used to build the `tag` attribute of the summary values."
     type: DT_STRING
   }
   input_arg {
     name: "tensor"
-    description: "2-D of shape `[batch_size, frames]`."
     type: DT_FLOAT
   }
   input_arg {
     name: "sample_rate"
-    description: "The sample rate of the signal in hertz."
     type: DT_FLOAT
   }
   output_arg {
     name: "summary"
-    description: "Scalar. Serialized `Summary` protocol buffer."
     type: DT_STRING
   }
   attr {
@@ -2622,43 +2336,35 @@ op {
     default_value {
       i: 3
     }
-    description: "Max number of batch elements to generate audio for."
     has_minimum: true
     minimum: 1
   }
-  summary: "Outputs a `Summary` protocol buffer with audio."
-  description: "The summary has up to `max_outputs` summary values containing audio. The\naudio is built from `tensor` which must be 3-D with shape `[batch_size,\nframes, channels]` or 2-D with shape `[batch_size, frames]`. The values are\nassumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`.\n\nThe `tag` argument is a scalar `Tensor` of type `string`.  It is used to\nbuild the `tag` of the summary values:\n\n*  If `max_outputs` is 1, the summary value tag is \'*tag*/audio\'.\n*  If `max_outputs` is greater than 1, the summary value tags are\n   generated sequentially as \'*tag*/audio/0\', \'*tag*/audio/1\', etc."
 }
 op {
   name: "AvgPool"
   input_arg {
     name: "value"
-    description: "4-D with shape `[batch, height, width, channels]`."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "The average pooled output tensor."
     type_attr: "T"
   }
   attr {
     name: "ksize"
     type: "list(int)"
-    description: "The size of the sliding window for each dimension of `value`."
     has_minimum: true
     minimum: 4
   }
   attr {
     name: "strides"
     type: "list(int)"
-    description: "The stride of the sliding window for each dimension of `value`."
     has_minimum: true
     minimum: 4
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -2672,7 +2378,6 @@ op {
     default_value {
       s: "NHWC"
     }
-    description: "Specify the data format of the input and output data. With the\ndefault format \"NHWC\", the data is stored in the order of:\n    [batch, in_height, in_width, in_channels].\nAlternatively, the format could be \"NCHW\", the data storage order of:\n    [batch, in_channels, in_height, in_width]."
     allowed_values {
       list {
         s: "NHWC"
@@ -2686,44 +2391,38 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Performs average pooling on the input."
-  description: "Each entry in `output` is the mean of the corresponding size `ksize`\nwindow in `value`."
 }
 op {
   name: "AvgPool3D"
   input_arg {
     name: "input"
-    description: "Shape `[batch, depth, rows, cols, channels]` tensor to pool over."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "The average pooled output tensor."
     type_attr: "T"
   }
   attr {
     name: "ksize"
     type: "list(int)"
-    description: "1-D tensor of length 5. The size of the window for each dimension of\nthe input tensor. Must have `ksize[0] = ksize[4] = 1`."
     has_minimum: true
     minimum: 5
   }
   attr {
     name: "strides"
     type: "list(int)"
-    description: "1-D tensor of length 5. The stride of the sliding window for each\ndimension of `input`. Must have `strides[0] = strides[4] = 1`."
     has_minimum: true
     minimum: 5
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -2737,7 +2436,6 @@ op {
     default_value {
       s: "NDHWC"
     }
-    description: "The data format of the input and output data. With the\ndefault format \"NDHWC\", the data is stored in the order of:\n    [batch, in_depth, in_height, in_width, in_channels].\nAlternatively, the format could be \"NCDHW\", the data storage order is:\n    [batch, in_channels, in_depth, in_height, in_width]."
     allowed_values {
       list {
         s: "NDHWC"
@@ -2750,48 +2448,42 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Performs 3D average pooling on the input."
 }
 op {
   name: "AvgPool3DGrad"
   input_arg {
     name: "orig_input_shape"
-    description: "The original input dimensions."
     type: DT_INT32
   }
   input_arg {
     name: "grad"
-    description: "Output backprop of shape `[batch, depth, rows, cols, channels]`."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "The backprop for input."
     type_attr: "T"
   }
   attr {
     name: "ksize"
     type: "list(int)"
-    description: "1-D tensor of length 5. The size of the window for each dimension of\nthe input tensor. Must have `ksize[0] = ksize[4] = 1`."
     has_minimum: true
     minimum: 5
   }
   attr {
     name: "strides"
     type: "list(int)"
-    description: "1-D tensor of length 5. The stride of the sliding window for each\ndimension of `input`. Must have `strides[0] = strides[4] = 1`."
     has_minimum: true
     minimum: 5
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -2805,7 +2497,6 @@ op {
     default_value {
       s: "NDHWC"
     }
-    description: "The data format of the input and output data. With the\ndefault format \"NDHWC\", the data is stored in the order of:\n    [batch, in_depth, in_height, in_width, in_channels].\nAlternatively, the format could be \"NCDHW\", the data storage order is:\n    [batch, in_channels, in_depth, in_height, in_width]."
     allowed_values {
       list {
         s: "NDHWC"
@@ -2818,48 +2509,42 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Computes gradients of average pooling function."
 }
 op {
   name: "AvgPoolGrad"
   input_arg {
     name: "orig_input_shape"
-    description: "1-D.  Shape of the original input to `avg_pool`."
     type: DT_INT32
   }
   input_arg {
     name: "grad"
-    description: "4-D with shape `[batch, height, width, channels]`.  Gradients w.r.t.\nthe output of `avg_pool`."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "4-D.  Gradients w.r.t. the input of `avg_pool`."
     type_attr: "T"
   }
   attr {
     name: "ksize"
     type: "list(int)"
-    description: "The size of the sliding window for each dimension of the input."
     has_minimum: true
     minimum: 4
   }
   attr {
     name: "strides"
     type: "list(int)"
-    description: "The stride of the sliding window for each dimension of the input."
     has_minimum: true
     minimum: 4
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -2873,7 +2558,6 @@ op {
     default_value {
       s: "NHWC"
     }
-    description: "Specify the data format of the input and output data. With the\ndefault format \"NHWC\", the data is stored in the order of:\n    [batch, in_height, in_width, in_channels].\nAlternatively, the format could be \"NCHW\", the data storage order of:\n    [batch, in_channels, in_height, in_width]."
     allowed_values {
       list {
         s: "NHWC"
@@ -2887,25 +2571,23 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Computes gradients of the average pooling function."
 }
 op {
   name: "Barrier"
   output_arg {
     name: "handle"
-    description: "The handle to the barrier."
     type: DT_STRING
     is_ref: true
   }
   attr {
     name: "component_types"
     type: "list(type)"
-    description: "The type of each component in a value."
     has_minimum: true
     minimum: 1
   }
@@ -2916,7 +2598,6 @@ op {
       list {
       }
     }
-    description: "The shape of each component in a value. Each shape must be 1 in the\nfirst dimension. The length of this attr must be the same as the length of\ncomponent_types."
     has_minimum: true
   }
   attr {
@@ -2925,7 +2606,6 @@ op {
     default_value {
       i: -1
     }
-    description: "The capacity of the barrier.  The default capacity is MAX_INT32,\nwhich is the largest capacity of the underlying queue."
   }
   attr {
     name: "container"
@@ -2933,7 +2613,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this barrier is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -2941,17 +2620,13 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this barrier will be shared under the given name\nacross multiple sessions."
   }
-  summary: "Defines a barrier that persists across different graph executions."
-  description: "A barrier represents a key-value map, where each key is a string, and\neach value is a tuple of tensors.\n\nAt runtime, the barrier contains \'complete\' and \'incomplete\'\nelements. A complete element has defined tensors for all components of\nits value tuple, and may be accessed using BarrierTakeMany. An\nincomplete element has some undefined components in its value tuple,\nand may be updated using BarrierInsertMany."
   is_stateful: true
 }
 op {
   name: "BarrierClose"
   input_arg {
     name: "handle"
-    description: "The handle to a barrier."
     type: DT_STRING
     is_ref: true
   }
@@ -2961,42 +2636,33 @@ op {
     default_value {
       b: false
     }
-    description: "If true, all pending enqueue requests that are\nblocked on the barrier\'s queue will be canceled. InsertMany will fail, even\nif no new key is introduced."
   }
-  summary: "Closes the given barrier."
-  description: "This operation signals that no more new elements will be inserted in the\ngiven barrier. Subsequent InsertMany that try to introduce a new key will fail.\nSubsequent InsertMany operations that just add missing components to already\nexisting elements will continue to succeed. Subsequent TakeMany operations will\ncontinue to succeed if sufficient completed elements remain in the barrier.\nSubsequent TakeMany operations that would block will fail immediately."
 }
 op {
   name: "BarrierIncompleteSize"
   input_arg {
     name: "handle"
-    description: "The handle to a barrier."
     type: DT_STRING
     is_ref: true
   }
   output_arg {
     name: "size"
-    description: "The number of incomplete elements (i.e. those with some of their value\ncomponents not set) in the barrier."
     type: DT_INT32
   }
-  summary: "Computes the number of incomplete elements in the given barrier."
 }
 op {
   name: "BarrierInsertMany"
   input_arg {
     name: "handle"
-    description: "The handle to a barrier."
     type: DT_STRING
     is_ref: true
   }
   input_arg {
     name: "keys"
-    description: "A one-dimensional tensor of keys, with length n."
     type: DT_STRING
   }
   input_arg {
     name: "values"
-    description: "An any-dimensional tensor of values, which are associated with the\nrespective keys. The 0th dimension must have length n."
     type_attr: "T"
   }
   attr {
@@ -3006,58 +2672,46 @@ op {
   attr {
     name: "component_index"
     type: "int"
-    description: "The component of the barrier elements that is being assigned."
   }
-  summary: "For each key, assigns the respective value to the specified component."
-  description: "If a key is not found in the barrier, this operation will create a new\nincomplete element. If a key is found in the barrier, and the element\nalready has a value at component_index, this operation will fail with\nINVALID_ARGUMENT, and leave the barrier in an undefined state."
 }
 op {
   name: "BarrierReadySize"
   input_arg {
     name: "handle"
-    description: "The handle to a barrier."
     type: DT_STRING
     is_ref: true
   }
   output_arg {
     name: "size"
-    description: "The number of complete elements (i.e. those with all of their value\ncomponents set) in the barrier."
     type: DT_INT32
   }
-  summary: "Computes the number of complete elements in the given barrier."
 }
 op {
   name: "BarrierTakeMany"
   input_arg {
     name: "handle"
-    description: "The handle to a barrier."
     type: DT_STRING
     is_ref: true
   }
   input_arg {
     name: "num_elements"
-    description: "A single-element tensor containing the number of elements to\ntake."
     type: DT_INT32
   }
   output_arg {
     name: "indices"
-    description: "A one-dimensional tensor of indices, with length num_elems.\nThese indices refer to the batch in which the values were placed into the\nbarrier (starting with MIN_LONG and increasing with each BarrierInsertMany)."
     type: DT_INT64
   }
   output_arg {
     name: "keys"
-    description: "A one-dimensional tensor of keys, with length num_elements."
     type: DT_STRING
   }
   output_arg {
     name: "values"
-    description: "One any-dimensional tensor per component in a barrier element. All\nvalues have length num_elements in the 0th dimension."
     type_list_attr: "component_types"
   }
   attr {
     name: "component_types"
     type: "list(type)"
-    description: "The type of each component in a value."
     has_minimum: true
     minimum: 1
   }
@@ -3067,7 +2721,6 @@ op {
     default_value {
       b: false
     }
-    description: "Allow to return less than num_elements items if barrier is\nalready closed."
   }
   attr {
     name: "wait_for_incomplete"
@@ -3082,10 +2735,7 @@ op {
     default_value {
       i: -1
     }
-    description: "If the queue is empty, this operation will block for up to\ntimeout_ms milliseconds.\nNote: This option is not supported yet."
   }
-  summary: "Takes the given number of completed elements from a barrier."
-  description: "This operation concatenates completed-element component tensors along\nthe 0th dimension to make a single component tensor.\n\nElements come out of the barrier when they are complete, and in the order\nin which they were placed into the barrier.  The indices output provides\ninformation about the batch in which each element was originally inserted\ninto the barrier."
 }
 op {
   name: "BatchCholesky"
@@ -3149,7 +2799,6 @@ op {
   }
   input_arg {
     name: "batch_size"
-    description: "A scalar representing the number of elements to accumulate in a\nbatch."
     type: DT_INT64
   }
   output_arg {
@@ -3168,7 +2817,6 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Creates a dataset that batches `batch_size` elements from `input_dataset`."
 }
 op {
   name: "BatchFFT"
@@ -3264,17 +2912,14 @@ op {
   name: "BatchMatMul"
   input_arg {
     name: "x"
-    description: "2-D or higher with shape `[..., r_x, c_x]`."
     type_attr: "T"
   }
   input_arg {
     name: "y"
-    description: "2-D or higher with shape `[..., r_y, c_y]`."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "3-D or higher with shape `[..., r_o, c_o]`"
     type_attr: "T"
   }
   attr {
@@ -3283,6 +2928,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -3297,7 +2943,6 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, adjoint the slices of `x`. Defaults to `False`."
   }
   attr {
     name: "adj_y"
@@ -3305,10 +2950,7 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, adjoint the slices of `y`. Defaults to `False`."
   }
-  summary: "Multiplies slices of two tensors in batches."
-  description: "Multiplies all slices of `Tensor` `x` and `y` (each slice can be\nviewed as an element of a batch), and arranges the individual results\nin a single output tensor of the same batch size. Each of the\nindividual slices can optionally be adjointed (to adjoint a matrix\nmeans to transpose and conjugate it) before multiplication by setting\nthe `adj_x` or `adj_y` flag to `True`, which are by default `False`.\n\nThe input tensors `x` and `y` are 2-D or higher with shape `[..., r_x, c_x]`\nand `[..., r_y, c_y]`.\n\nThe output tensor is 2-D or higher with shape `[..., r_o, c_o]`, where:\n\n    r_o = c_x if adj_x else r_x\n    c_o = r_y if adj_y else c_y\n\nIt is computed as:\n\n    output[..., :, :] = matrix(x[..., :, :]) * matrix(y[..., :, :])"
 }
 op {
   name: "BatchMatrixBandPart"
@@ -3580,27 +3222,22 @@ op {
   name: "BatchNormWithGlobalNormalization"
   input_arg {
     name: "t"
-    description: "A 4D input Tensor."
     type_attr: "T"
   }
   input_arg {
     name: "m"
-    description: "A 1D mean Tensor with size matching the last dimension of t.\nThis is the first output from tf.nn.moments,\nor a saved moving average thereof."
     type_attr: "T"
   }
   input_arg {
     name: "v"
-    description: "A 1D variance Tensor with size matching the last dimension of t.\nThis is the second output from tf.nn.moments,\nor a saved moving average thereof."
     type_attr: "T"
   }
   input_arg {
     name: "beta"
-    description: "A 1D beta Tensor with size matching the last dimension of t.\nAn offset to be added to the normalized tensor."
     type_attr: "T"
   }
   input_arg {
     name: "gamma"
-    description: "A 1D gamma Tensor with size matching the last dimension of t.\nIf \"scale_after_normalization\" is true, this tensor will be multiplied\nwith the normalized tensor."
     type_attr: "T"
   }
   output_arg {
@@ -3614,17 +3251,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -3634,15 +3272,11 @@ op {
   attr {
     name: "variance_epsilon"
     type: "float"
-    description: "A small float number to avoid dividing by 0."
   }
   attr {
     name: "scale_after_normalization"
     type: "bool"
-    description: "A bool indicating whether the resulted tensor\nneeds to be multiplied with gamma."
   }
-  summary: "Batch normalization."
-  description: "This op is deprecated. Prefer `tf.nn.batch_normalization`."
   deprecation {
     version: 9
     explanation: "Use tf.nn.batch_normalization()"
@@ -3652,52 +3286,42 @@ op {
   name: "BatchNormWithGlobalNormalizationGrad"
   input_arg {
     name: "t"
-    description: "A 4D input Tensor."
     type_attr: "T"
   }
   input_arg {
     name: "m"
-    description: "A 1D mean Tensor with size matching the last dimension of t.\nThis is the first output from tf.nn.moments,\nor a saved moving average thereof."
     type_attr: "T"
   }
   input_arg {
     name: "v"
-    description: "A 1D variance Tensor with size matching the last dimension of t.\nThis is the second output from tf.nn.moments,\nor a saved moving average thereof."
     type_attr: "T"
   }
   input_arg {
     name: "gamma"
-    description: "A 1D gamma Tensor with size matching the last dimension of t.\nIf \"scale_after_normalization\" is true, this Tensor will be multiplied\nwith the normalized Tensor."
     type_attr: "T"
   }
   input_arg {
     name: "backprop"
-    description: "4D backprop Tensor."
     type_attr: "T"
   }
   output_arg {
     name: "dx"
-    description: "4D backprop tensor for input."
     type_attr: "T"
   }
   output_arg {
     name: "dm"
-    description: "1D backprop tensor for mean."
     type_attr: "T"
   }
   output_arg {
     name: "dv"
-    description: "1D backprop tensor for variance."
     type_attr: "T"
   }
   output_arg {
     name: "db"
-    description: "1D backprop tensor for beta."
     type_attr: "T"
   }
   output_arg {
     name: "dg"
-    description: "1D backprop tensor for gamma."
     type_attr: "T"
   }
   attr {
@@ -3707,17 +3331,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -3727,15 +3352,11 @@ op {
   attr {
     name: "variance_epsilon"
     type: "float"
-    description: "A small float number to avoid dividing by 0."
   }
   attr {
     name: "scale_after_normalization"
     type: "bool"
-    description: "A bool indicating whether the resulted tensor\nneeds to be multiplied with gamma."
   }
-  summary: "Gradients for batch normalization."
-  description: "This op is deprecated. See `tf.nn.batch_normalization`."
   deprecation {
     version: 9
     explanation: "Use tf.nn.batch_normalization()"
@@ -3855,17 +3476,14 @@ op {
   name: "BatchToSpace"
   input_arg {
     name: "input"
-    description: "4-D tensor with shape\n`[batch*block_size*block_size, height_pad/block_size, width_pad/block_size,\n  depth]`. Note that the batch size of the input tensor must be divisible by\n`block_size * block_size`."
     type_attr: "T"
   }
   input_arg {
     name: "crops"
-    description: "2-D tensor of non-negative integers with shape `[2, 2]`. It specifies\nhow many elements to crop from the intermediate result across the spatial\ndimensions as follows:\n\n    crops = [[crop_top, crop_bottom], [crop_left, crop_right]]"
     type_attr: "Tidx"
   }
   output_arg {
     name: "output"
-    description: "4-D with shape `[batch, height, width, depth]`, where:\n\n      height = height_pad - crop_top - crop_bottom\n      width = width_pad - crop_left - crop_right\n\nThe attr `block_size` must be greater than one. It indicates the block size.\n\nSome examples:\n\n(1) For the following input of shape `[4, 1, 1, 1]` and block_size of 2:\n\n```\n[[[[1]]], [[[2]]], [[[3]]], [[[4]]]]\n```\n\nThe output tensor has shape `[1, 2, 2, 1]` and value:\n\n```\nx = [[[[1], [2]], [[3], [4]]]]\n```\n\n(2) For the following input of shape `[4, 1, 1, 3]` and block_size of 2:\n\n```\n[[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]\n```\n\nThe output tensor has shape `[1, 2, 2, 3]` and value:\n\n```\nx = [[[[1, 2, 3], [4, 5, 6]],\n      [[7, 8, 9], [10, 11, 12]]]]\n```\n\n(3) For the following input of shape `[4, 2, 2, 1]` and block_size of 2:\n\n```\nx = [[[[1], [3]], [[9], [11]]],\n     [[[2], [4]], [[10], [12]]],\n     [[[5], [7]], [[13], [15]]],\n     [[[6], [8]], [[14], [16]]]]\n```\n\nThe output tensor has shape `[1, 4, 4, 1]` and value:\n\n```\nx = [[[1],   [2],  [3],  [4]],\n     [[5],   [6],  [7],  [8]],\n     [[9],  [10], [11],  [12]],\n     [[13], [14], [15],  [16]]]\n```\n\n(4) For the following input of shape `[8, 1, 2, 1]` and block_size of 2:\n\n```\nx = [[[[1], [3]]], [[[9], [11]]], [[[2], [4]]], [[[10], [12]]],\n     [[[5], [7]]], [[[13], [15]]], [[[6], [8]]], [[[14], [16]]]]\n```\n\nThe output tensor has shape `[2, 2, 4, 1]` and value:\n\n```\nx = [[[[1], [3]], [[5], [7]]],\n     [[[2], [4]], [[10], [12]]],\n     [[[5], [7]], [[13], [15]]],\n     [[[6], [8]], [[14], [16]]]]\n```"
     type_attr: "T"
   }
   attr {
@@ -3891,24 +3509,19 @@ op {
       }
     }
   }
-  summary: "BatchToSpace for 4-D tensors of type T."
-  description: "This is a legacy version of the more general BatchToSpaceND.\n\nRearranges (permutes) data from batch into blocks of spatial data, followed by\ncropping. This is the reverse transformation of SpaceToBatch. More specifically,\nthis op outputs a copy of the input tensor where values from the `batch`\ndimension are moved in spatial blocks to the `height` and `width` dimensions,\nfollowed by cropping along the `height` and `width` dimensions."
 }
 op {
   name: "BatchToSpaceND"
   input_arg {
     name: "input"
-    description: "N-D with shape `input_shape = [batch] + spatial_shape + remaining_shape`,\nwhere spatial_shape has M dimensions."
     type_attr: "T"
   }
   input_arg {
     name: "block_shape"
-    description: "1-D with shape `[M]`, all values must be >= 1."
     type_attr: "Tblock_shape"
   }
   input_arg {
     name: "crops"
-    description: "2-D with shape `[M, 2]`, all values must be >= 0.\n  `crops[i] = [crop_start, crop_end]` specifies the amount to crop from input\n  dimension `i + 1`, which corresponds to spatial dimension `i`.  It is\n  required that\n  `crop_start[i] + crop_end[i] <= block_shape[i] * input_shape[i + 1]`.\n\nThis operation is equivalent to the following steps:\n\n1. Reshape `input` to `reshaped` of shape:\n     [block_shape[0], ..., block_shape[M-1],\n      batch / prod(block_shape),\n      input_shape[1], ..., input_shape[N-1]]\n\n2. Permute dimensions of `reshaped` to produce `permuted` of shape\n     [batch / prod(block_shape),\n\n      input_shape[1], block_shape[0],\n      ...,\n      input_shape[M], block_shape[M-1],\n\n      input_shape[M+1], ..., input_shape[N-1]]\n\n3. Reshape `permuted` to produce `reshaped_permuted` of shape\n     [batch / prod(block_shape),\n\n      input_shape[1] * block_shape[0],\n      ...,\n      input_shape[M] * block_shape[M-1],\n\n      input_shape[M+1],\n      ...,\n      input_shape[N-1]]\n\n4. Crop the start and end of dimensions `[1, ..., M]` of\n   `reshaped_permuted` according to `crops` to produce the output of shape:\n     [batch / prod(block_shape),\n\n      input_shape[1] * block_shape[0] - crops[0,0] - crops[0,1],\n      ...,\n      input_shape[M] * block_shape[M-1] - crops[M-1,0] - crops[M-1,1],\n\n      input_shape[M+1], ..., input_shape[N-1]]\n\nSome examples:\n\n(1) For the following input of shape `[4, 1, 1, 1]`, `block_shape = [2, 2]`, and\n    `crops = [[0, 0], [0, 0]]`:\n\n```\n[[[[1]]], [[[2]]], [[[3]]], [[[4]]]]\n```\n\nThe output tensor has shape `[1, 2, 2, 1]` and value:\n\n```\nx = [[[[1], [2]], [[3], [4]]]]\n```\n\n(2) For the following input of shape `[4, 1, 1, 3]`, `block_shape = [2, 2]`, and\n    `crops = [[0, 0], [0, 0]]`:\n\n```\n[[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]\n```\n\nThe output tensor has shape `[1, 2, 2, 3]` and value:\n\n```\nx = [[[[1, 2, 3], [4, 5, 6]],\n      [[7, 8, 9], [10, 11, 12]]]]\n```\n\n(3) For the following input of shape `[4, 2, 2, 1]`, `block_shape = [2, 2]`, and\n    `crops = [[0, 0], [0, 0]]`:\n\n```\nx = [[[[1], [3]], [[9], [11]]],\n     [[[2], [4]], [[10], [12]]],\n     [[[5], [7]], [[13], [15]]],\n     [[[6], [8]], [[14], [16]]]]\n```\n\nThe output tensor has shape `[1, 4, 4, 1]` and value:\n\n```\nx = [[[1],   [2],  [3],  [4]],\n     [[5],   [6],  [7],  [8]],\n     [[9],  [10], [11],  [12]],\n     [[13], [14], [15],  [16]]]\n```\n\n(4) For the following input of shape `[8, 1, 3, 1]`, `block_shape = [2, 2]`, and\n    `crops = [[0, 0], [2, 0]]`:\n\n```\nx = [[[[0], [1], [3]]], [[[0], [9], [11]]],\n     [[[0], [2], [4]]], [[[0], [10], [12]]],\n     [[[0], [5], [7]]], [[[0], [13], [15]]],\n     [[[0], [6], [8]]], [[[0], [14], [16]]]]\n```\n\nThe output tensor has shape `[2, 2, 4, 1]` and value:\n\n```\nx = [[[[1],   [2],  [3],  [4]],\n      [[5],   [6],  [7],  [8]]],\n     [[[9],  [10], [11],  [12]],\n      [[13], [14], [15],  [16]]]]\n```"
     type_attr: "Tcrops"
   }
   output_arg {
@@ -3945,8 +3558,6 @@ op {
       }
     }
   }
-  summary: "BatchToSpace for N-D tensors of type T."
-  description: "This operation reshapes the \"batch\" dimension 0 into `M + 1` dimensions of shape\n`block_shape + [batch]`, interleaves these blocks back into the grid defined by\nthe spatial dimensions `[1, ..., M]`, to obtain a result with the same rank as\nthe input.  The spatial dimensions of this intermediate result are then\noptionally cropped according to `crops` to produce the output.  This is the\nreverse of SpaceToBatch.  See below for a precise description."
 }
 op {
   name: "Betainc"
@@ -3976,24 +3587,19 @@ op {
       }
     }
   }
-  summary: "Compute the regularized incomplete beta integral \\\\(I_x(a, b)\\\\)."
-  description: "The regularized incomplete beta integral is defined as:\n\n\n\\\\(I_x(a, b) = \\frac{B(x; a, b)}{B(a, b)}\\\\)\n\nwhere\n\n\n\\\\(B(x; a, b) = \\int_0^x t^{a-1} (1 - t)^{b-1} dt\\\\)\n\n\nis the incomplete beta function and \\\\(B(a, b)\\\\) is the *complete*\nbeta function."
 }
 op {
   name: "BiasAdd"
   input_arg {
     name: "value"
-    description: "Any number of dimensions."
     type_attr: "T"
   }
   input_arg {
     name: "bias"
-    description: "1-D with size the last dimension of `value`."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "Broadcasted sum of `value` and `bias`."
     type_attr: "T"
   }
   attr {
@@ -4003,17 +3609,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -4026,7 +3633,6 @@ op {
     default_value {
       s: "NHWC"
     }
-    description: "Specify the data format of the input and output data. With the\ndefault format \"NHWC\", the bias tensor will be added to the last dimension\nof the value tensor.\nAlternatively, the format could be \"NCHW\", the data storage order of:\n    [batch, in_channels, in_height, in_width].\nThe tensor will be added to \"in_channels\", the third-to-the-last\n    dimension."
     allowed_values {
       list {
         s: "NHWC"
@@ -4034,19 +3640,15 @@ op {
       }
     }
   }
-  summary: "Adds `bias` to `value`."
-  description: "This is a special case of `tf.add` where `bias` is restricted to be 1-D.\nBroadcasting is supported, so `value` may have any number of dimensions."
 }
 op {
   name: "BiasAddGrad"
   input_arg {
     name: "out_backprop"
-    description: "Any number of dimensions."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "1-D with size the feature dimension of `out_backprop`."
     type_attr: "T"
   }
   attr {
@@ -4056,17 +3658,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -4079,7 +3682,6 @@ op {
     default_value {
       s: "NHWC"
     }
-    description: "Specify the data format of the input and output data. With the\ndefault format \"NHWC\", the bias tensor will be added to the last dimension\nof the value tensor.\nAlternatively, the format could be \"NCHW\", the data storage order of:\n    [batch, in_channels, in_height, in_width].\nThe tensor will be added to \"in_channels\", the third-to-the-last\n    dimension."
     allowed_values {
       list {
         s: "NHWC"
@@ -4087,24 +3689,19 @@ op {
       }
     }
   }
-  summary: "The backward operation for \"BiasAdd\" on the \"bias\" tensor."
-  description: "It accumulates all the values from out_backprop into the feature dimension.\nFor NHWC data format, the feature dimension is the last. For NCHW data format,\nthe feature dimension is the third-to-last."
 }
 op {
   name: "BiasAddV1"
   input_arg {
     name: "value"
-    description: "Any number of dimensions."
     type_attr: "T"
   }
   input_arg {
     name: "bias"
-    description: "1-D with size the last dimension of `value`."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "Broadcasted sum of `value` and `bias`."
     type_attr: "T"
   }
   attr {
@@ -4114,46 +3711,41 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
-  summary: "Adds `bias` to `value`."
-  description: "This is a deprecated version of BiasAdd and will be soon removed.\n\nThis is a special case of `tf.add` where `bias` is restricted to be 1-D.\nBroadcasting is supported, so `value` may have any number of dimensions."
 }
 op {
   name: "Bincount"
   input_arg {
     name: "arr"
-    description: "int32 `Tensor`."
     type: DT_INT32
   }
   input_arg {
     name: "size"
-    description: "non-negative int32 scalar `Tensor`."
     type: DT_INT32
   }
   input_arg {
     name: "weights"
-    description: "is an int32, int64, float32, or float64 `Tensor` with the same\nshape as `arr`, or a length-0 `Tensor`, in which case it acts as all weights\nequal to 1."
     type_attr: "T"
   }
   output_arg {
     name: "bins"
-    description: "1D `Tensor` with length equal to `size`. The counts or summed weights for\neach value in the range [0, size)."
     type_attr: "T"
   }
   attr {
@@ -4168,8 +3760,6 @@ op {
       }
     }
   }
-  summary: "Counts the number of occurrences of each value in an integer array."
-  description: "Outputs a vector with length `size` and the same dtype as `weights`. If\n`weights` are empty, then index `i` stores the number of times the value `i` is\ncounted in `arr`. If `weights` are non-empty, then index `i` stores the sum of\nthe value in `weights` at each index where the corresponding value in `arr` is\n`i`.\n\nValues in `arr` outside of the range [0, size) are ignored."
 }
 op {
   name: "Bitcast"
@@ -4186,6 +3776,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT64
@@ -4210,6 +3801,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT64
@@ -4229,8 +3821,6 @@ op {
       }
     }
   }
-  summary: "Bitcasts a tensor from one type to another without copying data."
-  description: "Given a tensor `input`, this operation returns a tensor that has the same buffer\ndata as `input` with datatype `type`.\n\nIf the input datatype `T` is larger than the output datatype `type` then the\nshape changes from [...] to [..., sizeof(`T`)/sizeof(`type`)].\n\nIf `T` is smaller than `type`, the operator requires that the rightmost\ndimension be equal to sizeof(`type`)/sizeof(`T`). The shape then goes from\n[..., sizeof(`type`)/sizeof(`T`)] to [...].\n\n*NOTE*: Bitcast is implemented as a low-level cast, so machines with different\nendian orderings will give different results."
 }
 op {
   name: "BitwiseAnd"
@@ -4262,8 +3852,6 @@ op {
       }
     }
   }
-  summary: "Elementwise computes the bitwise AND of `x` and `y`."
-  description: "The result will have those bits set, that are set in both `x` and `y`. The\ncomputation is performed on the underlying representations of `x` and `y`."
   is_commutative: true
 }
 op {
@@ -4296,8 +3884,6 @@ op {
       }
     }
   }
-  summary: "Elementwise computes the bitwise OR of `x` and `y`."
-  description: "The result will have those bits set, that are set in `x`, `y` or both. The\ncomputation is performed on the underlying representations of `x` and `y`."
   is_commutative: true
 }
 op {
@@ -4330,8 +3916,6 @@ op {
       }
     }
   }
-  summary: "Elementwise computes the bitwise XOR of `x` and `y`."
-  description: "The result will have those bits set, that are different in `x` and `y`. The\ncomputation is performed on the underlying representations of `x` and `y`."
   is_commutative: true
 }
 op {
@@ -4361,8 +3945,6 @@ op {
       }
     }
   }
-  summary: "Return the shape of s0 op s1 with broadcast."
-  description: "Given `s0` and `s1`, tensors that represent shapes, compute `r0`, the\nbroadcasted shape. `s0`, `s1` and `r0` are all integer vectors."
 }
 op {
   name: "BroadcastGradientArgs"
@@ -4395,19 +3977,15 @@ op {
       }
     }
   }
-  summary: "Return the reduction indices for computing gradients of s0 op s1 with broadcast."
-  description: "This is typically used by gradient computations for a broadcasting operation."
 }
 op {
   name: "Bucketize"
   input_arg {
     name: "input"
-    description: "Any shape of Tensor contains with int or float type."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "Same shape with \'input\', each value of input replaced with bucket index.\n\n@compatibility(numpy)\nEquivalent to np.digitize.\n@end_compatibility"
     type: DT_INT32
   }
   attr {
@@ -4425,10 +4003,7 @@ op {
   attr {
     name: "boundaries"
     type: "list(float)"
-    description: "A sorted list of floats gives the boundary of the buckets."
   }
-  summary: "Bucketizes \'input\' based on \'boundaries\'."
-  description: "For example, if the inputs are\n    boundaries = [0, 10, 100]\n    input = [[-5, 10000]\n             [150,   10]\n             [5,    100]]\n\nthen the output will be\n    output = [[0, 3]\n              [3, 2]\n              [1, 3]]"
 }
 op {
   name: "BytesProducedStatsDataset"
@@ -4456,54 +4031,45 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Records the bytes size of each element of `input_dataset` in a StatsAggregator."
 }
 op {
   name: "CTCBeamSearchDecoder"
   input_arg {
     name: "inputs"
-    description: "3-D, shape: `(max_time x batch_size x num_classes)`, the logits."
     type: DT_FLOAT
   }
   input_arg {
     name: "sequence_length"
-    description: "A vector containing sequence lengths, size `(batch)`."
     type: DT_INT32
   }
   output_arg {
     name: "decoded_indices"
-    description: "A list (length: top_paths) of indices matrices.  Matrix j,\nsize `(total_decoded_outputs[j] x 2)`, has indices of a\n`SparseTensor<int64, 2>`.  The rows store: [batch, time]."
     type: DT_INT64
     number_attr: "top_paths"
   }
   output_arg {
     name: "decoded_values"
-    description: "A list (length: top_paths) of values vectors.  Vector j,\nsize `(length total_decoded_outputs[j])`, has the values of a\n`SparseTensor<int64, 2>`.  The vector stores the decoded classes for beam j."
     type: DT_INT64
     number_attr: "top_paths"
   }
   output_arg {
     name: "decoded_shape"
-    description: "A list (length: top_paths) of shape vector.  Vector j,\nsize `(2)`, stores the shape of the decoded `SparseTensor[j]`.\nIts values are: `[batch_size, max_decoded_length[j]]`."
     type: DT_INT64
     number_attr: "top_paths"
   }
   output_arg {
     name: "log_probability"
-    description: "A matrix, shaped: `(batch_size x top_paths)`.  The\nsequence log-probabilities."
     type: DT_FLOAT
   }
   attr {
     name: "beam_width"
     type: "int"
-    description: "A scalar >= 0 (beam search beam width)."
     has_minimum: true
     minimum: 1
   }
   attr {
     name: "top_paths"
     type: "int"
-    description: "A scalar >= 0, <= beam_width (controls output size)."
     has_minimum: true
     minimum: 1
   }
@@ -4513,41 +4079,32 @@ op {
     default_value {
       b: true
     }
-    description: "If true, merge repeated classes in output."
   }
-  summary: "Performs beam search decoding on the logits given in input."
-  description: "A note about the attribute merge_repeated: For the beam search decoder,\nthis means that if consecutive entries in a beam are the same, only\nthe first of these is emitted.  That is, when the top path is \"A B B B B\",\n\"A B\" is returned if merge_repeated = True but \"A B B B B\" is\nreturned if merge_repeated = False."
 }
 op {
   name: "CTCGreedyDecoder"
   input_arg {
     name: "inputs"
-    description: "3-D, shape: `(max_time x batch_size x num_classes)`, the logits."
     type: DT_FLOAT
   }
   input_arg {
     name: "sequence_length"
-    description: "A vector containing sequence lengths, size `(batch_size)`."
     type: DT_INT32
   }
   output_arg {
     name: "decoded_indices"
-    description: "Indices matrix, size `(total_decoded_outputs x 2)`,\nof a `SparseTensor<int64, 2>`.  The rows store: [batch, time]."
     type: DT_INT64
   }
   output_arg {
     name: "decoded_values"
-    description: "Values vector, size: `(total_decoded_outputs)`,\nof a `SparseTensor<int64, 2>`.  The vector stores the decoded classes."
     type: DT_INT64
   }
   output_arg {
     name: "decoded_shape"
-    description: "Shape vector, size `(2)`, of the decoded SparseTensor.\nValues are: `[batch_size, max_decoded_length]`."
     type: DT_INT64
   }
   output_arg {
     name: "log_probability"
-    description: "Matrix, size `(batch_size x 1)`, containing sequence\nlog-probabilities."
     type: DT_FLOAT
   }
   attr {
@@ -4556,41 +4113,32 @@ op {
     default_value {
       b: false
     }
-    description: "If True, merge repeated classes in output."
   }
-  summary: "Performs greedy decoding on the logits given in inputs."
-  description: "A note about the attribute merge_repeated: if enabled, when\nconsecutive logits\' maximum indices are the same, only the first of\nthese is emitted.  Labeling the blank \'*\', the sequence \"A B B * B B\"\nbecomes \"A B B\" if merge_repeated = True and \"A B B B B\" if\nmerge_repeated = False.\n\nRegardless of the value of merge_repeated, if the maximum index of a given\ntime and batch corresponds to the blank, index `(num_classes - 1)`, no new\nelement is emitted."
 }
 op {
   name: "CTCLoss"
   input_arg {
     name: "inputs"
-    description: "3-D, shape: `(max_time x batch_size x num_classes)`, the logits."
     type: DT_FLOAT
   }
   input_arg {
     name: "labels_indices"
-    description: "The indices of a `SparseTensor<int32, 2>`.\n`labels_indices(i, :) == [b, t]` means `labels_values(i)` stores the id for\n`(batch b, time t)`."
     type: DT_INT64
   }
   input_arg {
     name: "labels_values"
-    description: "The values (labels) associated with the given batch and time."
     type: DT_INT32
   }
   input_arg {
     name: "sequence_length"
-    description: "A vector containing sequence lengths (batch)."
     type: DT_INT32
   }
   output_arg {
     name: "loss"
-    description: "A vector (batch) containing log-probabilities."
     type: DT_FLOAT
   }
   output_arg {
     name: "gradient"
-    description: "The gradient of `loss`.  3-D, shape:\n`(max_time x batch_size x num_classes)`."
     type: DT_FLOAT
   }
   attr {
@@ -4599,7 +4147,6 @@ op {
     default_value {
       b: false
     }
-    description: "Scalar, if true then repeated labels are\ncollapsed prior to the CTC calculation."
   }
   attr {
     name: "ctc_merge_repeated"
@@ -4607,7 +4154,6 @@ op {
     default_value {
       b: true
     }
-    description: "Scalar.  If set to false, *during* CTC calculation\nrepeated non-blank labels will not be merged and are interpreted as\nindividual labels.  This is a simplified version of CTC."
   }
   attr {
     name: "ignore_longer_outputs_than_inputs"
@@ -4615,10 +4161,7 @@ op {
     default_value {
       b: false
     }
-    description: "Scalar. If set to true, during CTC\ncalculation, items that have longer output sequences than input sequences\nare skipped: they don\'t contribute to the loss term and have zero-gradient."
   }
-  summary: "Calculates the CTC Loss (log probability) for each batch entry.  Also calculates"
-  description: "the gradient.  This class performs the softmax operation for you, so inputs\nshould be e.g. linear projections of outputs by an LSTM."
 }
 op {
   name: "CacheDataset"
@@ -4628,7 +4171,6 @@ op {
   }
   input_arg {
     name: "filename"
-    description: "A path on the filesystem where we should cache the dataset. Note: this\nwill be a directory."
     type: DT_STRING
   }
   output_arg {
@@ -4647,8 +4189,6 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Creates a dataset that caches elements from `input_dataset`."
-  description: "A CacheDataset will iterate over the input_dataset, and store tensors. If the\ncache already exists, the cache will be used. If the cache is inappropriate\n(e.g. cannot be opened, contains tensors of the wrong shape / size), an error\nwill the returned when used."
 }
 op {
   name: "Cast"
@@ -4668,7 +4208,6 @@ op {
     name: "DstT"
     type: "type"
   }
-  summary: "Cast x of type SrcT to y of DstT."
 }
 op {
   name: "Ceil"
@@ -4686,12 +4225,12 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Returns element-wise smallest integer in not less than x."
 }
 op {
   name: "CheckNumerics"
@@ -4709,6 +4248,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -4717,21 +4257,16 @@ op {
   attr {
     name: "message"
     type: "string"
-    description: "Prefix of the error message."
   }
-  summary: "Checks a tensor for NaN and Inf values."
-  description: "When run, reports an `InvalidArgument` error if `tensor` has any values\nthat are not a number (NaN) or infinity (Inf). Otherwise, passes `tensor` as-is."
 }
 op {
   name: "Cholesky"
   input_arg {
     name: "input"
-    description: "Shape is `[..., M, M]`."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "Shape is `[..., M, M]`."
     type_attr: "T"
   }
   attr {
@@ -4746,24 +4281,19 @@ op {
       }
     }
   }
-  summary: "Computes the Cholesky decomposition of one or more square matrices."
-  description: "The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions\nform square matrices.\n\nThe input has to be symmetric and positive definite. Only the lower-triangular\npart of the input will be used for this operation. The upper-triangular part\nwill not be read.\n\nThe output is a tensor of the same shape as the input\ncontaining the Cholesky decompositions for all input submatrices `[..., :, :]`.\n\n**Note**: The gradient computation on GPU is faster for large matrices but\nnot for large batch dimensions when the submatrices are small. In this\ncase it might be faster to use the CPU."
 }
 op {
   name: "CholeskyGrad"
   input_arg {
     name: "l"
-    description: "Output of batch Cholesky algorithm l = cholesky(A). Shape is `[..., M, M]`.\nAlgorithm depends only on lower triangular part of the innermost matrices of\nthis tensor."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "df/dl where f is some scalar function. Shape is `[..., M, M]`.\nAlgorithm depends only on lower triangular part of the innermost matrices of\nthis tensor."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "Symmetrized version of df/dA . Shape is `[..., M, M]`"
     type_attr: "T"
   }
   attr {
@@ -4776,30 +4306,24 @@ op {
       }
     }
   }
-  summary: "Computes the reverse mode backpropagated gradient of the Cholesky algorithm."
-  description: "For an explanation see \"Differentiation of the Cholesky algorithm\" by\nIain Murray http://arxiv.org/abs/1602.07527."
 }
 op {
   name: "CompareAndBitpack"
   input_arg {
     name: "input"
-    description: "Values to compare against `threshold` and bitpack."
     type_attr: "T"
   }
   input_arg {
     name: "threshold"
-    description: "Threshold to compare against."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "The bitpacked comparisons."
     type: DT_UINT8
   }
   attr {
     name: "T"
     type: "type"
-    description: "The type of the input and threshold."
     allowed_values {
       list {
         type: DT_BOOL
@@ -4813,8 +4337,6 @@ op {
       }
     }
   }
-  summary: "Compare values of `input` to `threshold` and pack resulting bits into a `uint8`."
-  description: "Each comparison returns a boolean `true` (if `input_value > threshold`)\nor and `false` otherwise.\n\nThis operation is useful for Locality-Sensitive-Hashing (LSH) and other\nalgorithms that use hashing approximations of cosine and `L2` distances;\ncodes can be generated from an input via:\n\n```python\ncodebook_size = 50\ncodebook_bits = codebook_size * 32\ncodebook = tf.get_variable(\'codebook\', [x.shape[-1].value, codebook_bits],\n                           dtype=x.dtype,\n                           initializer=tf.orthogonal_initializer())\ncodes = compare_and_threshold(tf.matmul(x, codebook), threshold=0.)\ncodes = tf.bitcast(codes, tf.int32)  # go from uint8 to int32\n# now codes has shape x.shape[:-1] + [codebook_size]\n```\n\n**NOTE**: Currently, the innermost dimension of the tensor must be divisible\nby 8.\n\nGiven an `input` shaped `[s0, s1, ..., s_n]`, the output is\na `uint8` tensor shaped `[s0, s1, ..., s_n / 8]`."
 }
 op {
   name: "Complex"
@@ -4856,8 +4378,6 @@ op {
       }
     }
   }
-  summary: "Converts two real numbers to a complex number."
-  description: "Given a tensor `real` representing the real part of a complex number, and a\ntensor `imag` representing the imaginary part of a complex number, this\noperation returns complex numbers elementwise of the form \\\\(a + bj\\\\), where\n*a* represents the `real` part and *b* represents the `imag` part.\n\nThe input tensors `real` and `imag` must have the same shape.\n\nFor example:\n\n```\n# tensor \'real\' is [2.25, 3.25]\n# tensor `imag` is [4.75, 5.75]\ntf.complex(real, imag) ==> [[2.25 + 4.75j], [3.25 + 5.75j]]\n```"
 }
 op {
   name: "ComplexAbs"
@@ -4895,40 +4415,32 @@ op {
       }
     }
   }
-  summary: "Computes the complex absolute value of a tensor."
-  description: "Given a tensor `x` of complex numbers, this operation returns a tensor of type\n`float` or `double` that is the absolute value of each element in `x`. All\nelements in `x` must be complex numbers of the form \\\\(a + bj\\\\). The absolute\nvalue is computed as \\\\( \\sqrt{a^2 + b^2}\\\\)."
 }
 op {
   name: "ComputeAccidentalHits"
   input_arg {
     name: "true_classes"
-    description: "The true_classes output of UnpackSparseLabels."
     type: DT_INT64
   }
   input_arg {
     name: "sampled_candidates"
-    description: "The sampled_candidates output of CandidateSampler."
     type: DT_INT64
   }
   output_arg {
     name: "indices"
-    description: "A vector of indices corresponding to rows of true_candidates."
     type: DT_INT32
   }
   output_arg {
     name: "ids"
-    description: "A vector of IDs of positions in sampled_candidates that match a true_label\nfor the row with the corresponding index in indices."
     type: DT_INT64
   }
   output_arg {
     name: "weights"
-    description: "A vector of the same length as indices and ids, in which each element\nis -FLOAT_MAX."
     type: DT_FLOAT
   }
   attr {
     name: "num_true"
     type: "int"
-    description: "Number of true labels per context."
   }
   attr {
     name: "seed"
@@ -4936,7 +4448,6 @@ op {
     default_value {
       i: 0
     }
-    description: "If either seed or seed2 are set to be non-zero, the random number\ngenerator is seeded by the given seed.  Otherwise, it is seeded by a\nrandom seed."
   }
   attr {
     name: "seed2"
@@ -4944,27 +4455,21 @@ op {
     default_value {
       i: 0
     }
-    description: "An second seed to avoid seed collision."
   }
-  summary: "Computes the ids of the positions in sampled_candidates that match true_labels."
-  description: "When doing log-odds NCE, the result of this op should be passed through a\nSparseToDense op, then added to the logits of the sampled candidates. This has\nthe effect of \'removing\' the sampled labels that match the true labels by\nmaking the classifier sure that they are sampled labels."
 }
 op {
   name: "Concat"
   input_arg {
     name: "concat_dim"
-    description: "0-D.  The dimension along which to concatenate.  Must be in the\nrange [0, rank(values))."
     type: DT_INT32
   }
   input_arg {
     name: "values"
-    description: "The `N` Tensors to concatenate. Their ranks and types must match,\nand their sizes must match in all dimensions except `concat_dim`."
     type_attr: "T"
     number_attr: "N"
   }
   output_arg {
     name: "output"
-    description: "A `Tensor` with the concatenation of values stacked along the\n`concat_dim` dimension.  This tensor\'s shape matches that of `values` except\nin `concat_dim` where it has the sum of the sizes."
     type_attr: "T"
   }
   attr {
@@ -4977,24 +4482,20 @@ op {
     name: "T"
     type: "type"
   }
-  summary: "Concatenates tensors along one dimension."
 }
 op {
   name: "ConcatOffset"
   input_arg {
     name: "concat_dim"
-    description: "The dimension along which to concatenate."
     type: DT_INT32
   }
   input_arg {
     name: "shape"
-    description: "The `N` int32 vectors representing shape of tensors being concatenated."
     type: DT_INT32
     number_attr: "N"
   }
   output_arg {
     name: "offset"
-    description: "The `N` int32 vectors representing the starting offset\nof input tensors within the concatenated output."
     type: DT_INT32
     number_attr: "N"
   }
@@ -5004,25 +4505,20 @@ op {
     has_minimum: true
     minimum: 2
   }
-  summary: "Computes offsets of concat inputs within its output."
-  description: "For example:\n\n```\n# \'x\' is [2, 2, 7]\n# \'y\' is [2, 3, 7]\n# \'z\' is [2, 5, 7]\nconcat_offset(2, [x, y, z]) => [0, 0, 0], [0, 2, 0], [0, 5, 0]\n```\n\nThis is typically used by gradient computations for a concat operation."
 }
 op {
   name: "ConcatV2"
   input_arg {
     name: "values"
-    description: "List of `N` Tensors to concatenate. Their ranks and types must match,\nand their sizes must match in all dimensions except `concat_dim`."
     type_attr: "T"
     number_attr: "N"
   }
   input_arg {
     name: "axis"
-    description: "0-D.  The dimension along which to concatenate.  Must be in the\nrange [-rank(values), rank(values))."
     type_attr: "Tidx"
   }
   output_arg {
     name: "output"
-    description: "A `Tensor` with the concatenation of values stacked along the\n`concat_dim` dimension.  This tensor\'s shape matches that of `values` except\nin `concat_dim` where it has the sum of the sizes."
     type_attr: "T"
   }
   attr {
@@ -5048,7 +4544,6 @@ op {
       }
     }
   }
-  summary: "Concatenates tensors along one dimension."
 }
 op {
   name: "ConcatenateDataset"
@@ -5076,35 +4571,33 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Creates a dataset that concatenates `input_dataset` with `another_dataset`."
 }
 op {
   name: "ConditionalAccumulator"
   output_arg {
     name: "handle"
-    description: "The handle to the accumulator."
     type: DT_STRING
     is_ref: true
   }
   attr {
     name: "dtype"
     type: "type"
-    description: "The type of the value being accumulated."
     allowed_values {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -5114,7 +4607,6 @@ op {
   attr {
     name: "shape"
     type: "shape"
-    description: "The shape of the values, can be [], in which case shape is unknown."
   }
   attr {
     name: "container"
@@ -5122,7 +4614,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this accumulator is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -5130,10 +4621,7 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this accumulator will be shared under the\ngiven name across multiple sessions."
   }
-  summary: "A conditional accumulator for aggregating gradients."
-  description: "The accumulator accepts gradients marked with local_step greater or\nequal to the most recent global_step known to the accumulator. The\naverage can be extracted from the accumulator, provided sufficient\ngradients have been accumulated. Extracting the average automatically\nresets the aggregate to 0, and increments the global_step recorded by\nthe accumulator."
   is_stateful: true
 }
 op {
@@ -5160,8 +4648,6 @@ op {
       }
     }
   }
-  summary: "Returns the complex conjugate of a complex number."
-  description: "Given a tensor `input` of complex numbers, this operation returns a tensor of\ncomplex numbers that are the complex conjugate of each element in `input`. The\ncomplex numbers in `input` must be of the form \\\\(a + bj\\\\), where *a* is the\nreal part and *b* is the imaginary part.\n\nThe complex conjugate returned by this operation is of the form \\\\(a - bj\\\\).\n\nFor example:\n\n```\n# tensor \'input\' is [-2.25 + 4.75j, 3.25 + 5.75j]\ntf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j]\n```"
 }
 op {
   name: "ConjugateTranspose"
@@ -5194,8 +4680,6 @@ op {
       }
     }
   }
-  summary: "Shuffle dimensions of x according to a permutation and conjugate the result."
-  description: "The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy:\n  `y.shape[i] == x.shape[perm[i]] for i in [0, 1, ..., rank(x) - 1]`\n  `y[i,j,k,...,s,t,u] == conj(x[perm[i], perm[j], perm[k],...,perm[s], perm[t], perm[u]])`"
 }
 op {
   name: "Const"
@@ -5206,34 +4690,27 @@ op {
   attr {
     name: "value"
     type: "tensor"
-    description: "Attr `value` is the tensor to return."
   }
   attr {
     name: "dtype"
     type: "type"
   }
-  summary: "Returns a constant tensor."
 }
 op {
   name: "ControlTrigger"
-  summary: "Does nothing. Serves as a control trigger for scheduling."
-  description: "Only useful as a placeholder for control edges."
 }
 op {
   name: "Conv2D"
   input_arg {
     name: "input"
-    description: "A 4-D tensor. The dimension order is interpreted according to the value\nof `data_format`, see below for details."
     type_attr: "T"
   }
   input_arg {
     name: "filter"
-    description: "A 4-D tensor of shape\n`[filter_height, filter_width, in_channels, out_channels]`"
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "A 4-D tensor. The dimension order is determined by the value of\n`data_format`, see below for details."
     type_attr: "T"
   }
   attr {
@@ -5242,6 +4719,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
       }
     }
@@ -5249,7 +4727,6 @@ op {
   attr {
     name: "strides"
     type: "list(int)"
-    description: "1-D tensor of length 4.  The stride of the sliding window for each\ndimension of `input`. The dimension order is determined by the value of\n  `data_format`, see below for details."
   }
   attr {
     name: "use_cudnn_on_gpu"
@@ -5261,7 +4738,6 @@ op {
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -5275,7 +4751,6 @@ op {
     default_value {
       s: "NHWC"
     }
-    description: "Specify the data format of the input and output data. With the\ndefault format \"NHWC\", the data is stored in the order of:\n    [batch, height, width, channels].\nAlternatively, the format could be \"NCHW\", the data storage order of:\n    [batch, channels, height, width]."
     allowed_values {
       list {
         s: "NHWC"
@@ -5283,29 +4758,35 @@ op {
       }
     }
   }
-  summary: "Computes a 2-D convolution given 4-D `input` and `filter` tensors."
-  description: "Given an input tensor of shape `[batch, in_height, in_width, in_channels]`\nand a filter / kernel tensor of shape\n`[filter_height, filter_width, in_channels, out_channels]`, this op\nperforms the following:\n\n1. Flattens the filter to a 2-D matrix with shape\n   `[filter_height * filter_width * in_channels, output_channels]`.\n2. Extracts image patches from the input tensor to form a *virtual*\n   tensor of shape `[batch, out_height, out_width,\n   filter_height * filter_width * in_channels]`.\n3. For each patch, right-multiplies the filter matrix and the image patch\n   vector.\n\nIn detail, with the default NHWC format,\n\n    output[b, i, j, k] =\n        sum_{di, dj, q} input[b, strides[1] * i + di, strides[2] * j + dj, q] *\n                        filter[di, dj, q, k]\n\nMust have `strides[0] = strides[3] = 1`.  For the most common case of the same\nhorizontal and vertices strides, `strides = [1, stride, stride, 1]`."
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
 }
 op {
   name: "Conv2DBackpropFilter"
   input_arg {
     name: "input"
-    description: "4-D with shape `[batch, in_height, in_width, in_channels]`."
     type_attr: "T"
   }
   input_arg {
     name: "filter_sizes"
-    description: "An integer vector representing the tensor shape of `filter`,\nwhere `filter` is a 4-D\n`[filter_height, filter_width, in_channels, out_channels]` tensor."
     type: DT_INT32
   }
   input_arg {
     name: "out_backprop"
-    description: "4-D with shape `[batch, out_height, out_width, out_channels]`.\nGradients w.r.t. the output of the convolution."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "4-D with shape\n`[filter_height, filter_width, in_channels, out_channels]`.  Gradient w.r.t.\nthe `filter` input of the convolution."
     type_attr: "T"
   }
   attr {
@@ -5314,6 +4795,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
       }
     }
@@ -5321,7 +4803,6 @@ op {
   attr {
     name: "strides"
     type: "list(int)"
-    description: "The stride of the sliding window for each dimension of the input\nof the convolution. Must be in the same order as the dimension specified with\nformat."
   }
   attr {
     name: "use_cudnn_on_gpu"
@@ -5333,7 +4814,6 @@ op {
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -5347,7 +4827,6 @@ op {
     default_value {
       s: "NHWC"
     }
-    description: "Specify the data format of the input and output data. With the\ndefault format \"NHWC\", the data is stored in the order of:\n    [batch, in_height, in_width, in_channels].\nAlternatively, the format could be \"NCHW\", the data storage order of:\n    [batch, in_channels, in_height, in_width]."
     allowed_values {
       list {
         s: "NHWC"
@@ -5355,28 +4834,35 @@ op {
       }
     }
   }
-  summary: "Computes the gradients of convolution with respect to the filter."
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
 }
 op {
   name: "Conv2DBackpropInput"
   input_arg {
     name: "input_sizes"
-    description: "An integer vector representing the shape of `input`,\nwhere `input` is a 4-D `[batch, height, width, channels]` tensor."
     type: DT_INT32
   }
   input_arg {
     name: "filter"
-    description: "4-D with shape\n`[filter_height, filter_width, in_channels, out_channels]`."
     type_attr: "T"
   }
   input_arg {
     name: "out_backprop"
-    description: "4-D with shape `[batch, out_height, out_width, out_channels]`.\nGradients w.r.t. the output of the convolution."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "4-D with shape `[batch, in_height, in_width, in_channels]`.  Gradient\nw.r.t. the input of the convolution."
     type_attr: "T"
   }
   attr {
@@ -5385,6 +4871,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
       }
     }
@@ -5392,7 +4879,6 @@ op {
   attr {
     name: "strides"
     type: "list(int)"
-    description: "The stride of the sliding window for each dimension of the input\nof the convolution. Must be in the same order as the dimension specified with\nformat."
   }
   attr {
     name: "use_cudnn_on_gpu"
@@ -5404,7 +4890,6 @@ op {
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -5418,7 +4903,6 @@ op {
     default_value {
       s: "NHWC"
     }
-    description: "Specify the data format of the input and output data. With the\ndefault format \"NHWC\", the data is stored in the order of:\n    [batch, in_height, in_width, in_channels].\nAlternatively, the format could be \"NCHW\", the data storage order of:\n    [batch, in_channels, in_height, in_width]."
     allowed_values {
       list {
         s: "NHWC"
@@ -5426,18 +4910,27 @@ op {
       }
     }
   }
-  summary: "Computes the gradients of convolution with respect to the input."
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
 }
 op {
   name: "Conv3D"
   input_arg {
     name: "input"
-    description: "Shape `[batch, in_depth, in_height, in_width, in_channels]`."
     type_attr: "T"
   }
   input_arg {
     name: "filter"
-    description: "Shape `[filter_depth, filter_height, filter_width, in_channels,\nout_channels]`. `in_channels` must match between `input` and `filter`."
     type_attr: "T"
   }
   output_arg {
@@ -5450,6 +4943,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -5458,14 +4952,12 @@ op {
   attr {
     name: "strides"
     type: "list(int)"
-    description: "1-D tensor of length 5. The stride of the sliding window for each\ndimension of `input`. Must have `strides[0] = strides[4] = 1`."
     has_minimum: true
     minimum: 5
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -5479,7 +4971,6 @@ op {
     default_value {
       s: "NDHWC"
     }
-    description: "The data format of the input and output data. With the\ndefault format \"NDHWC\", the data is stored in the order of:\n    [batch, in_depth, in_height, in_width, in_channels].\nAlternatively, the format could be \"NCDHW\", the data storage order is:\n    [batch, in_channels, in_depth, in_height, in_width]."
     allowed_values {
       list {
         s: "NDHWC"
@@ -5487,24 +4978,32 @@ op {
       }
     }
   }
-  summary: "Computes a 3-D convolution given 5-D `input` and `filter` tensors."
-  description: "In signal processing, cross-correlation is a measure of similarity of\ntwo waveforms as a function of a time-lag applied to one of them. This\nis also known as a sliding dot product or sliding inner-product.\n\nOur Conv3D implements a form of cross-correlation."
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
 }
 op {
   name: "Conv3DBackpropFilter"
   input_arg {
     name: "input"
-    description: "Shape `[batch, depth, rows, cols, in_channels]`."
     type_attr: "T"
   }
   input_arg {
     name: "filter"
-    description: "Shape `[depth, rows, cols, in_channels, out_channels]`.\n`in_channels` must match between `input` and `filter`."
     type_attr: "T"
   }
   input_arg {
     name: "out_backprop"
-    description: "Backprop signal of shape `[batch, out_depth, out_rows, out_cols,\nout_channels]`."
     type_attr: "T"
   }
   output_arg {
@@ -5525,14 +5024,12 @@ op {
   attr {
     name: "strides"
     type: "list(int)"
-    description: "1-D tensor of length 5. The stride of the sliding window for each\ndimension of `input`. Must have `strides[0] = strides[4] = 1`."
     has_minimum: true
     minimum: 5
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -5540,7 +5037,6 @@ op {
       }
     }
   }
-  summary: "Computes the gradients of 3-D convolution with respect to the filter."
   deprecation {
     version: 10
     explanation: "Use Conv3DBackpropFilterV2"
@@ -5550,17 +5046,14 @@ op {
   name: "Conv3DBackpropFilterV2"
   input_arg {
     name: "input"
-    description: "Shape `[batch, depth, rows, cols, in_channels]`."
     type_attr: "T"
   }
   input_arg {
     name: "filter_sizes"
-    description: "An integer vector representing the tensor shape of `filter`,\nwhere `filter` is a 5-D\n`[filter_depth, filter_height, filter_width, in_channels, out_channels]`\ntensor."
     type: DT_INT32
   }
   input_arg {
     name: "out_backprop"
-    description: "Backprop signal of shape `[batch, out_depth, out_rows, out_cols,\nout_channels]`."
     type_attr: "T"
   }
   output_arg {
@@ -5573,6 +5066,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -5581,14 +5075,12 @@ op {
   attr {
     name: "strides"
     type: "list(int)"
-    description: "1-D tensor of length 5. The stride of the sliding window for each\ndimension of `input`. Must have `strides[0] = strides[4] = 1`."
     has_minimum: true
     minimum: 5
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -5602,7 +5094,6 @@ op {
     default_value {
       s: "NDHWC"
     }
-    description: "The data format of the input and output data. With the\ndefault format \"NDHWC\", the data is stored in the order of:\n    [batch, in_depth, in_height, in_width, in_channels].\nAlternatively, the format could be \"NCDHW\", the data storage order is:\n    [batch, in_channels, in_depth, in_height, in_width]."
     allowed_values {
       list {
         s: "NDHWC"
@@ -5610,23 +5101,32 @@ op {
       }
     }
   }
-  summary: "Computes the gradients of 3-D convolution with respect to the filter."
-}
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+}
 op {
   name: "Conv3DBackpropInput"
   input_arg {
     name: "input"
-    description: "Shape `[batch, depth, rows, cols, in_channels]`."
     type_attr: "T"
   }
   input_arg {
     name: "filter"
-    description: "Shape `[depth, rows, cols, in_channels, out_channels]`.\n`in_channels` must match between `input` and `filter`."
     type_attr: "T"
   }
   input_arg {
     name: "out_backprop"
-    description: "Backprop signal of shape `[batch, out_depth, out_rows, out_cols,\nout_channels]`."
     type_attr: "T"
   }
   output_arg {
@@ -5647,14 +5147,12 @@ op {
   attr {
     name: "strides"
     type: "list(int)"
-    description: "1-D tensor of length 5. The stride of the sliding window for each\ndimension of `input`. Must have `strides[0] = strides[4] = 1`."
     has_minimum: true
     minimum: 5
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -5662,7 +5160,6 @@ op {
       }
     }
   }
-  summary: "Computes the gradients of 3-D convolution with respect to the input."
   deprecation {
     version: 10
     explanation: "Use Conv3DBackpropInputV2"
@@ -5672,17 +5169,14 @@ op {
   name: "Conv3DBackpropInputV2"
   input_arg {
     name: "input_sizes"
-    description: "An integer vector representing the tensor shape of `input`,\nwhere `input` is a 5-D\n`[batch, depth, rows, cols, in_channels]` tensor."
     type: DT_INT32
   }
   input_arg {
     name: "filter"
-    description: "Shape `[depth, rows, cols, in_channels, out_channels]`.\n`in_channels` must match between `input` and `filter`."
     type_attr: "T"
   }
   input_arg {
     name: "out_backprop"
-    description: "Backprop signal of shape `[batch, out_depth, out_rows, out_cols,\nout_channels]`."
     type_attr: "T"
   }
   output_arg {
@@ -5695,6 +5189,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -5703,14 +5198,12 @@ op {
   attr {
     name: "strides"
     type: "list(int)"
-    description: "1-D tensor of length 5. The stride of the sliding window for each\ndimension of `input`. Must have `strides[0] = strides[4] = 1`."
     has_minimum: true
     minimum: 5
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -5724,7 +5217,6 @@ op {
     default_value {
       s: "NDHWC"
     }
-    description: "The data format of the input and output data. With the\ndefault format \"NDHWC\", the data is stored in the order of:\n    [batch, in_depth, in_height, in_width, in_channels].\nAlternatively, the format could be \"NCDHW\", the data storage order is:\n    [batch, in_channels, in_depth, in_height, in_width]."
     allowed_values {
       list {
         s: "NDHWC"
@@ -5732,7 +5224,19 @@ op {
       }
     }
   }
-  summary: "Computes the gradients of 3-D convolution with respect to the input."
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
 }
 op {
   name: "Copy"
@@ -5824,6 +5328,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -5831,7 +5336,6 @@ op {
       }
     }
   }
-  summary: "Computes cos of x element-wise."
 }
 op {
   name: "Cosh"
@@ -5849,6 +5353,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -5856,25 +5361,21 @@ op {
       }
     }
   }
-  summary: "Computes hyperbolic cosine of x element-wise."
 }
 op {
   name: "CountUpTo"
   input_arg {
     name: "ref"
-    description: "Should be from a scalar `Variable` node."
     type_attr: "T"
     is_ref: true
   }
   output_arg {
     name: "output"
-    description: "A copy of the input before increment. If nothing else modifies the\ninput, the values produced will all be distinct."
     type_attr: "T"
   }
   attr {
     name: "limit"
     type: "int"
-    description: "If incrementing ref would bring it above limit, instead generates an\n\'OutOfRange\' error."
   }
   attr {
     name: "T"
@@ -5886,33 +5387,49 @@ op {
       }
     }
   }
-  summary: "Increments \'ref\' until it reaches \'limit\'."
+}
+op {
+  name: "CriticalSectionOp"
+  output_arg {
+    name: "resource"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "container"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "shared_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
 }
 op {
   name: "CropAndResize"
   input_arg {
     name: "image"
-    description: "A 4-D tensor of shape `[batch, image_height, image_width, depth]`.\nBoth `image_height` and `image_width` need to be positive."
     type_attr: "T"
   }
   input_arg {
     name: "boxes"
-    description: "A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor\nspecifies the coordinates of a box in the `box_ind[i]` image and is specified\nin normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of\n`y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the\n`[0, 1]` interval of normalized image height is mapped to\n`[0, image_height - 1]` in image height coordinates. We do allow `y1` > `y2`, in\nwhich case the sampled crop is an up-down flipped version of the original\nimage. The width dimension is treated similarly. Normalized coordinates\noutside the `[0, 1]` range are allowed, in which case we use\n`extrapolation_value` to extrapolate the input image values."
     type: DT_FLOAT
   }
   input_arg {
     name: "box_ind"
-    description: "A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.\nThe value of `box_ind[i]` specifies the image that the `i`-th box refers to."
     type: DT_INT32
   }
   input_arg {
     name: "crop_size"
-    description: "A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`. All\ncropped image patches are resized to this size. The aspect ratio of the image\ncontent is not preserved. Both `crop_height` and `crop_width` need to be\npositive."
     type: DT_INT32
   }
   output_arg {
     name: "crops"
-    description: "A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`."
     type: DT_FLOAT
   }
   attr {
@@ -5938,7 +5455,6 @@ op {
     default_value {
       s: "bilinear"
     }
-    description: "A string specifying the interpolation method. Only \'bilinear\' is\nsupported for now."
     allowed_values {
       list {
         s: "bilinear"
@@ -5951,36 +5467,28 @@ op {
     default_value {
       f: 0
     }
-    description: "Value used for extrapolation, when applicable."
   }
-  summary: "Extracts crops from the input image tensor and bilinearly resizes them (possibly"
-  description: "with aspect ratio change) to a common output size specified by `crop_size`. This\nis more general than the `crop_to_bounding_box` op which extracts a fixed size\nslice from the input image and does not allow resizing or aspect ratio change.\n\nReturns a tensor with `crops` from the input `image` at positions defined at the\nbounding box locations in `boxes`. The cropped boxes are all resized (with\nbilinear interpolation) to a fixed `size = [crop_height, crop_width]`. The\nresult is a 4-D tensor `[num_boxes, crop_height, crop_width, depth]`. The\nresizing is corner aligned. In particular, if `boxes = [[0, 0, 1, 1]]`, the\nmethod will give identical results to using `tf.image.resize_bilinear()`\nwith `align_corners=True`."
 }
 op {
   name: "CropAndResizeGradBoxes"
   input_arg {
     name: "grads"
-    description: "A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`."
     type: DT_FLOAT
   }
   input_arg {
     name: "image"
-    description: "A 4-D tensor of shape `[batch, image_height, image_width, depth]`.\nBoth `image_height` and `image_width` need to be positive."
     type_attr: "T"
   }
   input_arg {
     name: "boxes"
-    description: "A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor\nspecifies the coordinates of a box in the `box_ind[i]` image and is specified\nin normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of\n`y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the\n`[0, 1]` interval of normalized image height is mapped to\n`[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in\nwhich case the sampled crop is an up-down flipped version of the original\nimage. The width dimension is treated similarly. Normalized coordinates\noutside the `[0, 1]` range are allowed, in which case we use\n`extrapolation_value` to extrapolate the input image values."
     type: DT_FLOAT
   }
   input_arg {
     name: "box_ind"
-    description: "A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.\nThe value of `box_ind[i]` specifies the image that the `i`-th box refers to."
     type: DT_INT32
   }
   output_arg {
     name: "output"
-    description: "A 2-D tensor of shape `[num_boxes, 4]`."
     type: DT_FLOAT
   }
   attr {
@@ -6006,40 +5514,33 @@ op {
     default_value {
       s: "bilinear"
     }
-    description: "A string specifying the interpolation method. Only \'bilinear\' is\nsupported for now."
     allowed_values {
       list {
         s: "bilinear"
       }
     }
   }
-  summary: "Computes the gradient of the crop_and_resize op wrt the input boxes tensor."
 }
 op {
   name: "CropAndResizeGradImage"
   input_arg {
     name: "grads"
-    description: "A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`."
     type: DT_FLOAT
   }
   input_arg {
     name: "boxes"
-    description: "A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor\nspecifies the coordinates of a box in the `box_ind[i]` image and is specified\nin normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of\n`y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the\n`[0, 1]` interval of normalized image height is mapped to\n`[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in\nwhich case the sampled crop is an up-down flipped version of the original\nimage. The width dimension is treated similarly. Normalized coordinates\noutside the `[0, 1]` range are allowed, in which case we use\n`extrapolation_value` to extrapolate the input image values."
     type: DT_FLOAT
   }
   input_arg {
     name: "box_ind"
-    description: "A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.\nThe value of `box_ind[i]` specifies the image that the `i`-th box refers to."
     type: DT_INT32
   }
   input_arg {
     name: "image_size"
-    description: "A 1-D tensor with value `[batch, image_height, image_width, depth]`\ncontaining the original image size. Both `image_height` and `image_width` need\nto be positive."
     type: DT_INT32
   }
   output_arg {
     name: "output"
-    description: "A 4-D tensor of shape `[batch, image_height, image_width, depth]`."
     type_attr: "T"
   }
   attr {
@@ -6059,30 +5560,25 @@ op {
     default_value {
       s: "bilinear"
     }
-    description: "A string specifying the interpolation method. Only \'bilinear\' is\nsupported for now."
     allowed_values {
       list {
         s: "bilinear"
       }
     }
   }
-  summary: "Computes the gradient of the crop_and_resize op wrt the input image tensor."
 }
 op {
   name: "Cross"
   input_arg {
     name: "a"
-    description: "A tensor containing 3-element vectors."
     type_attr: "T"
   }
   input_arg {
     name: "b"
-    description: "Another tensor, of same type and shape as `a`."
     type_attr: "T"
   }
   output_arg {
     name: "product"
-    description: "Pairwise cross product of the vectors in `a` and `b`."
     type_attr: "T"
   }
   attr {
@@ -6093,10 +5589,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -6104,19 +5601,15 @@ op {
       }
     }
   }
-  summary: "Compute the pairwise cross product."
-  description: "`a` and `b` must be the same shape; they can either be simple 3-element vectors,\nor any shape where the innermost dimension is 3. In the latter case, each pair\nof corresponding 3-element vectors is cross-multiplied independently."
 }
 op {
   name: "Cumprod"
   input_arg {
     name: "x"
-    description: "A `Tensor`. Must be one of the following types: `float32`, `float64`,\n`int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,\n`complex128`, `qint8`, `quint8`, `qint32`, `half`."
     type_attr: "T"
   }
   input_arg {
     name: "axis"
-    description: "A `Tensor` of type `int32` (default: 0). Must be in the range\n`[-rank(x), rank(x))`."
     type_attr: "Tidx"
   }
   output_arg {
@@ -6129,7 +5622,6 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, perform exclusive cumprod."
   }
   attr {
     name: "reverse"
@@ -6137,7 +5629,6 @@ op {
     default_value {
       b: false
     }
-    description: "A `bool` (default: False)."
   }
   attr {
     name: "T"
@@ -6146,17 +5637,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -6176,19 +5668,15 @@ op {
       }
     }
   }
-  summary: "Compute the cumulative product of the tensor `x` along `axis`."
-  description: "By default, this op performs an inclusive cumprod, which means that the first\nelement of the input is identical to the first element of the output:\n\n```python\ntf.cumprod([a, b, c])  # => [a, a * b, a * b * c]\n```\n\nBy setting the `exclusive` kwarg to `True`, an exclusive cumprod is\nperformed instead:\n\n```python\ntf.cumprod([a, b, c], exclusive=True)  # => [1, a, a * b]\n```\n\nBy setting the `reverse` kwarg to `True`, the cumprod is performed in the\nopposite direction:\n\n```python\ntf.cumprod([a, b, c], reverse=True)  # => [a * b * c, b * c, c]\n```\n\nThis is more efficient than using separate `tf.reverse` ops.\n\nThe `reverse` and `exclusive` kwargs can also be combined:\n\n```python\ntf.cumprod([a, b, c], exclusive=True, reverse=True)  # => [b * c, c, 1]\n```"
 }
 op {
   name: "Cumsum"
   input_arg {
     name: "x"
-    description: "A `Tensor`. Must be one of the following types: `float32`, `float64`,\n`int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,\n`complex128`, `qint8`, `quint8`, `qint32`, `half`."
     type_attr: "T"
   }
   input_arg {
     name: "axis"
-    description: "A `Tensor` of type `int32` (default: 0). Must be in the range\n`[-rank(x), rank(x))`."
     type_attr: "Tidx"
   }
   output_arg {
@@ -6201,7 +5689,6 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, perform exclusive cumsum."
   }
   attr {
     name: "reverse"
@@ -6209,7 +5696,6 @@ op {
     default_value {
       b: false
     }
-    description: "A `bool` (default: False)."
   }
   attr {
     name: "T"
@@ -6218,17 +5704,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -6248,19 +5735,91 @@ op {
       }
     }
   }
-  summary: "Compute the cumulative sum of the tensor `x` along `axis`."
-  description: "By default, this op performs an inclusive cumsum, which means that the first\nelement of the input is identical to the first element of the output:\n\n```python\ntf.cumsum([a, b, c])  # => [a, a + b, a + b + c]\n```\n\nBy setting the `exclusive` kwarg to `True`, an exclusive cumsum is\nperformed instead:\n\n```python\ntf.cumsum([a, b, c], exclusive=True)  # => [0, a, a + b]\n```\n\nBy setting the `reverse` kwarg to `True`, the cumsum is performed in the\nopposite direction:\n\n```python\ntf.cumsum([a, b, c], reverse=True)  # => [a + b + c, b + c, c]\n```\n\nThis is more efficient than using separate `tf.reverse` ops.\n\nThe `reverse` and `exclusive` kwargs can also be combined:\n\n```python\ntf.cumsum([a, b, c], exclusive=True, reverse=True)  # => [b + c, c, 0]\n```"
+}
+op {
+  name: "DataFormatDimMap"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "src_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    name: "dst_format"
+    type: "string"
+    default_value {
+      s: "NCHW"
+    }
+  }
+}
+op {
+  name: "DataFormatVecPermute"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "src_format"
+    type: "string"
+    default_value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    name: "dst_format"
+    type: "string"
+    default_value {
+      s: "NCHW"
+    }
+  }
 }
 op {
   name: "DatasetToSingleElement"
   input_arg {
     name: "dataset"
-    description: "A handle to a dataset that contains a single element."
     type: DT_VARIANT
   }
   output_arg {
     name: "components"
-    description: "The components of the single element of `input`."
     type_list_attr: "output_types"
   }
   attr {
@@ -6275,7 +5834,6 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Outputs the single element from the given dataset."
 }
 op {
   name: "DebugGradientIdentity"
@@ -6291,8 +5849,24 @@ op {
     name: "T"
     type: "type"
   }
-  summary: "Identity op for gradient debugging."
-  description: "This op is hidden from public in Python. It is used by TensorFlow Debugger to\nregister gradient tensors for gradient debugging."
+  allows_uninitialized_input: true
+}
+op {
+  name: "DebugGradientRefIdentity"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+    is_ref: true
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+    is_ref: true
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
   allows_uninitialized_input: true
 }
 op {
@@ -6479,17 +6053,14 @@ op {
   name: "DecodeAndCropJpeg"
   input_arg {
     name: "contents"
-    description: "0-D.  The JPEG-encoded image."
     type: DT_STRING
   }
   input_arg {
     name: "crop_window"
-    description: "1-D.  The crop window: [crop_y, crop_x, crop_height, crop_width]."
     type: DT_INT32
   }
   output_arg {
     name: "image"
-    description: "3-D with shape `[height, width, channels]`.."
     type: DT_UINT8
   }
   attr {
@@ -6498,7 +6069,6 @@ op {
     default_value {
       i: 0
     }
-    description: "Number of color channels for the decoded image."
   }
   attr {
     name: "ratio"
@@ -6506,7 +6076,6 @@ op {
     default_value {
       i: 1
     }
-    description: "Downscaling ratio."
   }
   attr {
     name: "fancy_upscaling"
@@ -6514,7 +6083,6 @@ op {
     default_value {
       b: true
     }
-    description: "If true use a slower but nicer upscaling of the\nchroma planes (yuv420/422 only)."
   }
   attr {
     name: "try_recover_truncated"
@@ -6522,7 +6090,6 @@ op {
     default_value {
       b: false
     }
-    description: "If true try to recover an image from truncated input."
   }
   attr {
     name: "acceptable_fraction"
@@ -6530,7 +6097,6 @@ op {
     default_value {
       f: 1
     }
-    description: "The minimum required fraction of lines before a truncated\ninput is accepted."
   }
   attr {
     name: "dct_method"
@@ -6538,36 +6104,27 @@ op {
     default_value {
       s: ""
     }
-    description: "string specifying a hint about the algorithm used for\ndecompression.  Defaults to \"\" which maps to a system-specific\ndefault.  Currently valid values are [\"INTEGER_FAST\",\n\"INTEGER_ACCURATE\"].  The hint may be ignored (e.g., the internal\njpeg library changes to a version that does not have that specific\noption.)"
   }
-  summary: "Decode and Crop a JPEG-encoded image to a uint8 tensor."
-  description: "The attr `channels` indicates the desired number of color channels for the\ndecoded image.\n\nAccepted values are:\n\n*   0: Use the number of channels in the JPEG-encoded image.\n*   1: output a grayscale image.\n*   3: output an RGB image.\n\nIf needed, the JPEG-encoded image is transformed to match the requested number\nof color channels.\n\nThe attr `ratio` allows downscaling the image by an integer factor during\ndecoding.  Allowed values are: 1, 2, 4, and 8.  This is much faster than\ndownscaling the image later.\n\n\nIt is equivalent to a combination of decode and crop, but much faster by only\ndecoding partial jpeg image."
 }
 op {
   name: "DecodeBase64"
   input_arg {
     name: "input"
-    description: "Base64 strings to decode."
     type: DT_STRING
   }
   output_arg {
     name: "output"
-    description: "Decoded strings."
     type: DT_STRING
   }
-  summary: "Decode web-safe base64-encoded strings."
-  description: "Input may or may not have padding at the end. See EncodeBase64 for padding.\nWeb-safe means that input must use - and _ instead of + and /."
 }
 op {
   name: "DecodeBmp"
   input_arg {
     name: "contents"
-    description: "0-D.  The BMP-encoded image."
     type: DT_STRING
   }
   output_arg {
     name: "image"
-    description: "3-D with shape `[height, width, channels]`. RGB order"
     type: DT_UINT8
   }
   attr {
@@ -6577,24 +6134,19 @@ op {
       i: 0
     }
   }
-  summary: "Decode the first frame of a BMP-encoded image to a uint8 tensor."
-  description: "The attr `channels` indicates the desired number of color channels for the\ndecoded image.\n\nAccepted values are:\n\n*   0: Use the number of channels in the BMP-encoded image.\n*   3: output an RGB image.\n*   4: output an RGBA image."
 }
 op {
   name: "DecodeCSV"
   input_arg {
     name: "records"
-    description: "Each string is a record/row in the csv and all records should have\nthe same format."
     type: DT_STRING
   }
   input_arg {
     name: "record_defaults"
-    description: "One tensor per column of the input record, with either a\nscalar default value for that column or empty if the column is required."
     type_list_attr: "OUT_TYPE"
   }
   output_arg {
     name: "output"
-    description: "Each tensor will have the same shape as records."
     type_list_attr: "OUT_TYPE"
   }
   attr {
@@ -6618,7 +6170,6 @@ op {
     default_value {
       s: ","
     }
-    description: "char delimiter to separate fields in a record."
   }
   attr {
     name: "use_quote_delim"
@@ -6626,7 +6177,6 @@ op {
     default_value {
       b: true
     }
-    description: "If false, treats double quotation marks as regular\ncharacters inside of the string fields (ignoring RFC 4180, Section 2,\nBullet 5)."
   }
   attr {
     name: "na_value"
@@ -6634,51 +6184,56 @@ op {
     default_value {
       s: ""
     }
-    description: "Additional string to recognize as NA/NaN."
   }
-  summary: "Convert CSV records to tensors. Each column maps to one tensor."
-  description: "RFC 4180 format is expected for the CSV records.\n(https://tools.ietf.org/html/rfc4180)\nNote that we allow leading and trailing spaces with int or float field."
+}
+op {
+  name: "DecodeCompressed"
+  input_arg {
+    name: "bytes"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "output"
+    type: DT_STRING
+  }
+  attr {
+    name: "compression_type"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
 }
 op {
   name: "DecodeGif"
   input_arg {
     name: "contents"
-    description: "0-D.  The GIF-encoded image."
     type: DT_STRING
   }
   output_arg {
     name: "image"
-    description: "4-D with shape `[num_frames, height, width, 3]`. RGB order"
     type: DT_UINT8
   }
-  summary: "Decode the first frame of a GIF-encoded image to a uint8 tensor."
-  description: "GIF with frame or transparency compression are not supported\nconvert animated GIF from compressed to uncompressed by:\n\n    convert $src.gif -coalesce $dst.gif\n\nThis op also supports decoding JPEGs and PNGs, though it is cleaner to use\n`tf.image.decode_image`."
 }
 op {
   name: "DecodeJSONExample"
   input_arg {
     name: "json_examples"
-    description: "Each string is a JSON object serialized according to the JSON\nmapping of the Example proto."
     type: DT_STRING
   }
   output_arg {
     name: "binary_examples"
-    description: "Each string is a binary Example protocol buffer corresponding\nto the respective element of `json_examples`."
     type: DT_STRING
   }
-  summary: "Convert JSON-encoded Example records to binary protocol buffer strings."
-  description: "This op translates a tensor containing Example records, encoded using\nthe [standard JSON\nmapping](https://developers.google.com/protocol-buffers/docs/proto3#json),\ninto a tensor containing the same records encoded as binary protocol\nbuffers. The resulting tensor can then be fed to any of the other\nExample-parsing ops."
 }
 op {
   name: "DecodeJpeg"
   input_arg {
     name: "contents"
-    description: "0-D.  The JPEG-encoded image."
     type: DT_STRING
   }
   output_arg {
     name: "image"
-    description: "3-D with shape `[height, width, channels]`.."
     type: DT_UINT8
   }
   attr {
@@ -6687,7 +6242,6 @@ op {
     default_value {
       i: 0
     }
-    description: "Number of color channels for the decoded image."
   }
   attr {
     name: "ratio"
@@ -6695,7 +6249,6 @@ op {
     default_value {
       i: 1
     }
-    description: "Downscaling ratio."
   }
   attr {
     name: "fancy_upscaling"
@@ -6703,7 +6256,6 @@ op {
     default_value {
       b: true
     }
-    description: "If true use a slower but nicer upscaling of the\nchroma planes (yuv420/422 only)."
   }
   attr {
     name: "try_recover_truncated"
@@ -6711,7 +6263,6 @@ op {
     default_value {
       b: false
     }
-    description: "If true try to recover an image from truncated input."
   }
   attr {
     name: "acceptable_fraction"
@@ -6719,7 +6270,6 @@ op {
     default_value {
       f: 1
     }
-    description: "The minimum required fraction of lines before a truncated\ninput is accepted."
   }
   attr {
     name: "dct_method"
@@ -6727,21 +6277,16 @@ op {
     default_value {
       s: ""
     }
-    description: "string specifying a hint about the algorithm used for\ndecompression.  Defaults to \"\" which maps to a system-specific\ndefault.  Currently valid values are [\"INTEGER_FAST\",\n\"INTEGER_ACCURATE\"].  The hint may be ignored (e.g., the internal\njpeg library changes to a version that does not have that specific\noption.)"
   }
-  summary: "Decode a JPEG-encoded image to a uint8 tensor."
-  description: "The attr `channels` indicates the desired number of color channels for the\ndecoded image.\n\nAccepted values are:\n\n*   0: Use the number of channels in the JPEG-encoded image.\n*   1: output a grayscale image.\n*   3: output an RGB image.\n\nIf needed, the JPEG-encoded image is transformed to match the requested number\nof color channels.\n\nThe attr `ratio` allows downscaling the image by an integer factor during\ndecoding.  Allowed values are: 1, 2, 4, and 8.  This is much faster than\ndownscaling the image later.\n\n\nThis op also supports decoding PNGs and non-animated GIFs since the interface is\nthe same, though it is cleaner to use `tf.image.decode_image`."
 }
 op {
   name: "DecodePng"
   input_arg {
     name: "contents"
-    description: "0-D.  The PNG-encoded image."
     type: DT_STRING
   }
   output_arg {
     name: "image"
-    description: "3-D with shape `[height, width, channels]`."
     type_attr: "dtype"
   }
   attr {
@@ -6750,7 +6295,6 @@ op {
     default_value {
       i: 0
     }
-    description: "Number of color channels for the decoded image."
   }
   attr {
     name: "dtype"
@@ -6765,19 +6309,15 @@ op {
       }
     }
   }
-  summary: "Decode a PNG-encoded image to a uint8 or uint16 tensor."
-  description: "The attr `channels` indicates the desired number of color channels for the\ndecoded image.\n\nAccepted values are:\n\n*   0: Use the number of channels in the PNG-encoded image.\n*   1: output a grayscale image.\n*   3: output an RGB image.\n*   4: output an RGBA image.\n\nIf needed, the PNG-encoded image is transformed to match the requested number\nof color channels.\n\nThis op also supports decoding JPEGs and non-animated GIFs since the interface\nis the same, though it is cleaner to use `tf.image.decode_image`."
 }
 op {
   name: "DecodeRaw"
   input_arg {
     name: "bytes"
-    description: "All the elements must have the same length."
     type: DT_STRING
   }
   output_arg {
     name: "output"
-    description: "A Tensor with one more dimension than the input `bytes`.  The\nadded dimension will have size equal to the length of the elements\nof `bytes` divided by the number of bytes to represent `out_type`."
     type_attr: "out_type"
   }
   attr {
@@ -6803,25 +6343,20 @@ op {
     default_value {
       b: true
     }
-    description: "Whether the input `bytes` are in little-endian order.\nIgnored for `out_type` values that are stored in a single byte like\n`uint8`."
   }
-  summary: "Reinterpret the bytes of a string as a vector of numbers."
 }
 op {
   name: "DecodeWav"
   input_arg {
     name: "contents"
-    description: "The WAV-encoded audio, usually from a file."
     type: DT_STRING
   }
   output_arg {
     name: "audio"
-    description: "2-D with shape `[length, channels]`."
     type: DT_FLOAT
   }
   output_arg {
     name: "sample_rate"
-    description: "Scalar holding the sample rate found in the WAV header."
     type: DT_INT32
   }
   attr {
@@ -6830,7 +6365,6 @@ op {
     default_value {
       i: -1
     }
-    description: "Number of sample channels wanted."
   }
   attr {
     name: "desired_samples"
@@ -6838,45 +6372,36 @@ op {
     default_value {
       i: -1
     }
-    description: "Length of audio requested."
   }
-  summary: "Decode a 16-bit PCM WAV file to a float tensor."
-  description: "The -32768 to 32767 signed 16-bit values will be scaled to -1.0 to 1.0 in float.\n\nWhen desired_channels is set, if the input contains fewer channels than this\nthen the last channel will be duplicated to give the requested number, else if\nthe input has more channels than requested then the additional channels will be\nignored.\n\nIf desired_samples is set, then the audio will be cropped or padded with zeroes\nto the requested length.\n\nThe first output contains a Tensor with the content of the audio samples. The\nlowest dimension will be the number of channels, and the second will be the\nnumber of samples. For example, a ten-sample-long stereo WAV file should give an\noutput shape of [10, 2]."
 }
 op {
   name: "DeleteSessionTensor"
   input_arg {
     name: "handle"
-    description: "The handle for a tensor stored in the session state."
     type: DT_STRING
   }
-  summary: "Delete the tensor specified by its handle in the session."
+  is_stateful: true
 }
 op {
   name: "DenseToDenseSetOperation"
   input_arg {
     name: "set1"
-    description: "`Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`.\nDimension `n` contains values in a set, duplicates are allowed but ignored."
     type_attr: "T"
   }
   input_arg {
     name: "set2"
-    description: "`Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set1`.\nDimension `n` contains values in a set, duplicates are allowed but ignored."
     type_attr: "T"
   }
   output_arg {
     name: "result_indices"
-    description: "2D indices of a `SparseTensor`."
     type: DT_INT64
   }
   output_arg {
     name: "result_values"
-    description: "1D values of a `SparseTensor`."
     type_attr: "T"
   }
   output_arg {
     name: "result_shape"
-    description: "1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is\nthe same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`\nis the max result set size across all `0...n-1` dimensions."
     type: DT_INT64
   }
   attr {
@@ -6905,24 +6430,19 @@ op {
       }
     }
   }
-  summary: "Applies set operation along last dimension of 2 `Tensor` inputs."
-  description: "See SetOperationOp::SetOperationFromContext for values of `set_operation`.\n\nOutput `result` is a `SparseTensor` represented by `result_indices`,\n`result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this\nhas rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`\ndimension contains the result of `set_operation` applied to the corresponding\n`[0...n-1]` dimension of `set`."
 }
 op {
   name: "DenseToSparseBatchDataset"
   input_arg {
     name: "input_dataset"
-    description: "A handle to an input dataset. Must have a single component."
     type: DT_VARIANT
   }
   input_arg {
     name: "batch_size"
-    description: "A scalar representing the number of elements to accumulate in a\nbatch."
     type: DT_INT64
   }
   input_arg {
     name: "row_shape"
-    description: "A vector representing the dense shape of each row in the produced\nSparseTensor. The shape may be partially specified, using `-1` to indicate\nthat a particular dimension should use the maximum size of all batch elements."
     type: DT_INT64
   }
   output_arg {
@@ -6941,43 +6461,35 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Creates a dataset that yields a SparseTensor for each element of the input."
 }
 op {
   name: "DenseToSparseSetOperation"
   input_arg {
     name: "set1"
-    description: "`Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`.\nDimension `n` contains values in a set, duplicates are allowed but ignored."
     type_attr: "T"
   }
   input_arg {
     name: "set2_indices"
-    description: "2D `Tensor`, indices of a `SparseTensor`. Must be in row-major\norder."
     type: DT_INT64
   }
   input_arg {
     name: "set2_values"
-    description: "1D `Tensor`, values of a `SparseTensor`. Must be in row-major\norder."
     type_attr: "T"
   }
   input_arg {
     name: "set2_shape"
-    description: "1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must\nbe the same as the 1st `n-1` dimensions of `set1`, `result_shape[n]` is the\nmax set size across `n-1` dimensions."
     type: DT_INT64
   }
   output_arg {
     name: "result_indices"
-    description: "2D indices of a `SparseTensor`."
     type: DT_INT64
   }
   output_arg {
     name: "result_values"
-    description: "1D values of a `SparseTensor`."
     type_attr: "T"
   }
   output_arg {
     name: "result_shape"
-    description: "1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is\nthe same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`\nis the max result set size across all `0...n-1` dimensions."
     type: DT_INT64
   }
   attr {
@@ -7006,8 +6518,6 @@ op {
       }
     }
   }
-  summary: "Applies set operation along last dimension of `Tensor` and `SparseTensor`."
-  description: "See SetOperationOp::SetOperationFromContext for values of `set_operation`.\n\nInput `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`,\nand `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same\nas `set1`. Dimension `n` contains values in a set, duplicates are allowed but\nignored.\n\nIf `validate_indices` is `True`, this op validates the order and range of `set2`\nindices.\n\nOutput `result` is a `SparseTensor` represented by `result_indices`,\n`result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this\nhas rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`\ndimension contains the result of `set_operation` applied to the corresponding\n`[0...n-1]` dimension of `set`."
 }
 op {
   name: "DepthToSpace"
@@ -7026,7 +6536,6 @@ op {
   attr {
     name: "block_size"
     type: "int"
-    description: "The size of the spatial block, same as in Space2Depth."
     has_minimum: true
     minimum: 2
   }
@@ -7044,8 +6553,6 @@ op {
       }
     }
   }
-  summary: "DepthToSpace for tensors of type T."
-  description: "Rearranges data from depth into blocks of spatial data.\nThis is the reverse transformation of SpaceToDepth. More specifically,\nthis op outputs a copy of the input tensor where values from the `depth`\ndimension are moved in spatial blocks to the `height` and `width` dimensions.\nThe attr `block_size` indicates the input block size and how the data is moved.\n\n  * Chunks of data of size `block_size * block_size` from depth are rearranged\n    into non-overlapping blocks of size `block_size x block_size`\n  * The width the output tensor is `input_depth * block_size`, whereas the\n    height is `input_height * block_size`.\n  * The Y, X coordinates within each block of the output image are determined\n    by the high order component of the input channel index.\n  * The depth of the input tensor must be divisible by\n    `block_size * block_size`.\n\nThe `data_format` attr specifies the layout of the input and output tensors\nwith the following options:\n  \"NHWC\": `[ batch, height, width, channels ]`\n  \"NCHW\": `[ batch, channels, height, width ]`\n  \"NCHW_VECT_C\":\n      `qint8 [ batch, channels / 4, height, width, channels % 4 ]`\n\nIt is useful to consider the operation as transforming a 6-D Tensor.\ne.g. for data_format = NHWC,\n     Each element in the input tensor can be specified via 6 coordinates,\n     ordered by decreasing memory layout significance as:\n     n,iY,iX,bY,bX,oC  (where n=batch index, iX, iY means X or Y coordinates\n                        within the input image, bX, bY means coordinates\n                        within the output block, oC means output channels).\n     The output would be the input transposed to the following layout:\n     n,iY,bY,iX,bX,oC\n\nThis operation is useful for resizing the activations between convolutions\n(but keeping all data), e.g. instead of pooling. It is also useful for training\npurely convolutional models.\n\nFor example, given an input of shape `[1, 1, 1, 4]`, data_format = \"NHWC\" and\nblock_size = 2:\n\n```\nx = [[[[1, 2, 3, 4]]]]\n\n```\n\nThis operation will output a tensor of shape `[1, 2, 2, 1]`:\n\n```\n   [[[[1], [2]],\n     [[3], [4]]]]\n```\n\nHere, the input has a batch of 1 and each batch element has shape `[1, 1, 4]`,\nthe corresponding output will have 2x2 elements and will have a depth of\n1 channel (1 = `4 / (block_size * block_size)`).\nThe output element shape is `[2, 2, 1]`.\n\nFor an input tensor with larger depth, here of shape `[1, 1, 1, 12]`, e.g.\n\n```\nx = [[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]]\n```\n\nThis operation, for block size of 2, will return the following tensor of shape\n`[1, 2, 2, 3]`\n\n```\n   [[[[1, 2, 3], [4, 5, 6]],\n     [[7, 8, 9], [10, 11, 12]]]]\n\n```\n\nSimilarly, for the following input of shape `[1 2 2 4]`, and a block size of 2:\n\n```\nx =  [[[[1, 2, 3, 4],\n       [5, 6, 7, 8]],\n      [[9, 10, 11, 12],\n       [13, 14, 15, 16]]]]\n```\n\nthe operator will return the following tensor of shape `[1 4 4 1]`:\n\n```\nx = [[[ [1],   [2],  [5],  [6]],\n      [ [3],   [4],  [7],  [8]],\n      [ [9],  [10], [13],  [14]],\n      [ [11], [12], [15],  [16]]]]\n\n```"
 }
 op {
   name: "DepthwiseConv2dNative"
@@ -7066,6 +6573,8 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -7074,12 +6583,10 @@ op {
   attr {
     name: "strides"
     type: "list(int)"
-    description: "1-D of length 4.  The stride of the sliding window for each dimension\nof `input`."
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -7093,7 +6600,6 @@ op {
     default_value {
       s: "NHWC"
     }
-    description: "Specify the data format of the input and output data. With the\ndefault format \"NHWC\", the data is stored in the order of:\n    [batch, height, width, channels].\nAlternatively, the format could be \"NCHW\", the data storage order of:\n    [batch, channels, height, width]."
     allowed_values {
       list {
         s: "NHWC"
@@ -7101,29 +6607,35 @@ op {
       }
     }
   }
-  summary: "Computes a 2-D depthwise convolution given 4-D `input` and `filter` tensors."
-  description: "Given an input tensor of shape `[batch, in_height, in_width, in_channels]`\nand a filter / kernel tensor of shape\n`[filter_height, filter_width, in_channels, channel_multiplier]`, containing\n`in_channels` convolutional filters of depth 1, `depthwise_conv2d` applies\na different filter to each input channel (expanding from 1 channel to\n`channel_multiplier` channels for each), then concatenates the results\ntogether. Thus, the output has `in_channels * channel_multiplier` channels.\n\n```\nfor k in 0..in_channels-1\n  for q in 0..channel_multiplier-1\n    output[b, i, j, k * channel_multiplier + q] =\n      sum_{di, dj} input[b, strides[1] * i + di, strides[2] * j + dj, k] *\n                        filter[di, dj, k, q]\n```\n\nMust have `strides[0] = strides[3] = 1`.  For the most common case of the same\nhorizontal and vertices strides, `strides = [1, stride, stride, 1]`."
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
 }
 op {
   name: "DepthwiseConv2dNativeBackpropFilter"
   input_arg {
     name: "input"
-    description: "4-D with shape based on `data_format`.  For example, if\n`data_format` is \'NHWC\' then `input` is a 4-D `[batch, in_height,\nin_width, in_channels]` tensor."
     type_attr: "T"
   }
   input_arg {
     name: "filter_sizes"
-    description: "An integer vector representing the tensor shape of `filter`,\nwhere `filter` is a 4-D\n`[filter_height, filter_width, in_channels, depthwise_multiplier]` tensor."
     type: DT_INT32
   }
   input_arg {
     name: "out_backprop"
-    description: "4-D with shape  based on `data_format`.\nFor example, if `data_format` is \'NHWC\' then\nout_backprop shape is `[batch, out_height, out_width, out_channels]`.\nGradients w.r.t. the output of the convolution."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "4-D with shape\n`[filter_height, filter_width, in_channels, out_channels]`.  Gradient w.r.t.\nthe `filter` input of the convolution."
     type_attr: "T"
   }
   attr {
@@ -7131,6 +6643,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -7139,12 +6652,10 @@ op {
   attr {
     name: "strides"
     type: "list(int)"
-    description: "The stride of the sliding window for each dimension of the input\nof the convolution."
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -7158,7 +6669,6 @@ op {
     default_value {
       s: "NHWC"
     }
-    description: "Specify the data format of the input and output data. With the\ndefault format \"NHWC\", the data is stored in the order of:\n    [batch, height, width, channels].\nAlternatively, the format could be \"NCHW\", the data storage order of:\n    [batch, channels, height, width]."
     allowed_values {
       list {
         s: "NHWC"
@@ -7166,28 +6676,35 @@ op {
       }
     }
   }
-  summary: "Computes the gradients of depthwise convolution with respect to the filter."
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
 }
 op {
   name: "DepthwiseConv2dNativeBackpropInput"
   input_arg {
     name: "input_sizes"
-    description: "An integer vector representing the shape of `input`, based\non `data_format`.  For example, if `data_format` is \'NHWC\' then\n `input` is a 4-D `[batch, height, width, channels]` tensor."
     type: DT_INT32
   }
   input_arg {
     name: "filter"
-    description: "4-D with shape\n`[filter_height, filter_width, in_channels, depthwise_multiplier]`."
     type_attr: "T"
   }
   input_arg {
     name: "out_backprop"
-    description: "4-D with shape  based on `data_format`.\nFor example, if `data_format` is \'NHWC\' then\nout_backprop shape is `[batch, out_height, out_width, out_channels]`.\nGradients w.r.t. the output of the convolution."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "4-D with shape according to `data_format`.  For example, if\n`data_format` is \'NHWC\', output shape is `[batch, in_height,\nin_width, in_channels]`.  Gradient w.r.t. the input of the\nconvolution."
     type_attr: "T"
   }
   attr {
@@ -7195,6 +6712,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -7203,12 +6721,10 @@ op {
   attr {
     name: "strides"
     type: "list(int)"
-    description: "The stride of the sliding window for each dimension of the input\nof the convolution."
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -7222,7 +6738,6 @@ op {
     default_value {
       s: "NHWC"
     }
-    description: "Specify the data format of the input and output data. With the\ndefault format \"NHWC\", the data is stored in the order of:\n    [batch, height, width, channels].\nAlternatively, the format could be \"NCHW\", the data storage order of:\n    [batch, channels, height, width]."
     allowed_values {
       list {
         s: "NHWC"
@@ -7230,7 +6745,18 @@ op {
       }
     }
   }
-  summary: "Computes the gradients of depthwise convolution with respect to the input."
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
 }
 op {
   name: "Dequantize"
@@ -7240,12 +6766,10 @@ op {
   }
   input_arg {
     name: "min_range"
-    description: "The minimum scalar value possibly produced for the input."
     type: DT_FLOAT
   }
   input_arg {
     name: "max_range"
-    description: "The maximum scalar value possibly produced for the input."
     type: DT_FLOAT
   }
   output_arg {
@@ -7259,9 +6783,9 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
@@ -7279,29 +6803,23 @@ op {
       }
     }
   }
-  summary: "Dequantize the \'input\' tensor into a float Tensor."
-  description: "[min_range, max_range] are scalar floats that specify the range for\nthe \'input\' data. The \'mode\' attribute controls exactly which calculations are\nused to convert the float values to their quantized equivalents.\n\nIn \'MIN_COMBINED\' mode, each value of the tensor will undergo the following:\n\n```\nif T == qint8, in[i] += (range(T) + 1)/ 2.0\nout[i] = min_range + (in[i]* (max_range - min_range) / range(T))\n```\nhere `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`\n\n*MIN_COMBINED Mode Example*\n\nIf the input comes from a QuantizedRelu6, the output type is\nquint8 (range of 0-255) but the possible range of QuantizedRelu6 is\n0-6.  The min_range and max_range values are therefore 0.0 and 6.0.\nDequantize on quint8 will take each value, cast to float, and multiply\nby 6 / 255.\nNote that if quantizedtype is qint8, the operation will additionally add\neach value by 128 prior to casting.\n\nIf the mode is \'MIN_FIRST\', then this approach is used:\n\n```c++\nnum_discrete_values = 1 << (# of bits in T)\nrange_adjust = num_discrete_values / (num_discrete_values - 1)\nrange = (range_max - range_min) * range_adjust\nrange_scale = range / num_discrete_values\nconst double offset_input = static_cast<double>(input) - lowest_quantized;\nresult = range_min + ((input - numeric_limits<T>::min()) * range_scale)\n```\n\n*SCALED mode Example*\n\n`SCALED` mode matches the quantization approach used in\n`QuantizeAndDequantize{V2|V3}`.\n\nIf the mode is `SCALED`, we do not use the full range of the output type,\nchoosing to elide the lowest possible value for symmetry (e.g., output range is\n-127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to\n0.\n\nWe first find the range of values in our tensor. The\nrange we use is always centered on 0, so we find m such that\n```c++\n  m = max(abs(input_min), abs(input_max))\n```\n\nOur input tensor range is then `[-m, m]`.\n\nNext, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`.\nIf T is signed, this is\n```\n  num_bits = sizeof(T) * 8\n  [min_fixed, max_fixed] =\n      [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1]\n```\n\nOtherwise, if T is unsigned, the fixed-point range is\n```\n  [min_fixed, max_fixed] = [0, (1 << num_bits) - 1]\n```\n\nFrom this we compute our scaling factor, s:\n```c++\n  s = (2 * m) / (max_fixed - min_fixed)\n```\n\nNow we can dequantize the elements of our tensor:\n```c++\nresult = input * s\n```"
 }
 op {
   name: "DeserializeIterator"
   input_arg {
     name: "resource_handle"
-    description: "A handle to an iterator resource."
     type: DT_RESOURCE
   }
   input_arg {
     name: "serialized"
-    description: "A variant tensor storing the state of the iterator contained in the\nresource."
     type: DT_VARIANT
   }
-  summary: "Converts the given variant tensor to an iterator and stores it in the given resource."
   is_stateful: true
 }
 op {
   name: "DeserializeManySparse"
   input_arg {
     name: "serialized_sparse"
-    description: "2-D, The `N` serialized `SparseTensor` objects.\nMust have 3 columns."
     type: DT_STRING
   }
   output_arg {
@@ -7319,17 +6837,13 @@ op {
   attr {
     name: "dtype"
     type: "type"
-    description: "The `dtype` of the serialized `SparseTensor` objects."
   }
-  summary: "Deserialize and concatenate `SparseTensors` from a serialized minibatch."
-  description: "The input `serialized_sparse` must be a string matrix of shape `[N x 3]` where\n`N` is the minibatch size and the rows correspond to packed outputs of\n`SerializeSparse`.  The ranks of the original `SparseTensor` objects\nmust all match.  When the final `SparseTensor` is created, it has rank one\nhigher than the ranks of the incoming `SparseTensor` objects\n(they have been concatenated along a new row dimension).\n\nThe output `SparseTensor` object\'s shape values for all dimensions but the\nfirst are the max across the input `SparseTensor` objects\' shape values\nfor the corresponding dimensions.  Its first shape value is `N`, the minibatch\nsize.\n\nThe input `SparseTensor` objects\' indices are assumed ordered in\nstandard lexicographic order.  If this is not the case, after this\nstep run `SparseReorder` to restore index ordering.\n\nFor example, if the serialized input is a `[2 x 3]` matrix representing two\noriginal `SparseTensor` objects:\n\n    index = [ 0]\n            [10]\n            [20]\n    values = [1, 2, 3]\n    shape = [50]\n\nand\n\n    index = [ 2]\n            [10]\n    values = [4, 5]\n    shape = [30]\n\nthen the final deserialized `SparseTensor` will be:\n\n    index = [0  0]\n            [0 10]\n            [0 20]\n            [1  2]\n            [1 10]\n    values = [1, 2, 3, 4, 5]\n    shape = [2 50]"
 }
 op {
   name: "DeserializeSparse"
   input_arg {
     name: "serialized_sparse"
-    description: "The serialized `SparseTensor` objects. The last dimension\nmust have 3 columns."
-    type: DT_STRING
+    type_attr: "Tserialized"
   }
   output_arg {
     name: "sparse_indices"
@@ -7346,15 +6860,25 @@ op {
   attr {
     name: "dtype"
     type: "type"
-    description: "The `dtype` of the serialized `SparseTensor` objects."
   }
-  summary: "Deserialize `SparseTensor` objects."
+  attr {
+    name: "Tserialized"
+    type: "type"
+    default_value {
+      type: DT_STRING
+    }
+    allowed_values {
+      list {
+        type: DT_STRING
+        type: DT_VARIANT
+      }
+    }
+  }
 }
 op {
   name: "DestroyResourceOp"
   input_arg {
     name: "resource"
-    description: "handle to the resource to delete."
     type: DT_RESOURCE
   }
   attr {
@@ -7363,17 +6887,13 @@ op {
     default_value {
       b: true
     }
-    description: "whether to ignore the error when the resource\ndoesn\'t exist."
   }
-  summary: "Deletes the resource specified by the handle."
-  description: "All subsequent operations using the resource will result in a NotFound\nerror status."
   is_stateful: true
 }
 op {
   name: "DestroyTemporaryVariable"
   input_arg {
     name: "ref"
-    description: "A reference to the temporary variable tensor."
     type_attr: "T"
     is_ref: true
   }
@@ -7388,16 +6908,12 @@ op {
   attr {
     name: "var_name"
     type: "string"
-    description: "Name of the temporary variable, usually the name of the matching\n\'TemporaryVariable\' op."
   }
-  summary: "Destroys the temporary variable and returns its final value."
-  description: "Sets output to the value of the Tensor pointed to by \'ref\', then destroys\nthe temporary variable called \'var_name\'.\nAll other uses of \'ref\' *must* have executed before this op.\nThis is typically achieved by chaining the ref through each assign op, or by\nusing control dependencies.\n\nOutputs the final value of the tensor pointed to by \'ref\'."
 }
 op {
   name: "Diag"
   input_arg {
     name: "diagonal"
-    description: "Rank k tensor where k is at most 1."
     type_attr: "T"
   }
   output_arg {
@@ -7409,6 +6925,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -7418,19 +6935,15 @@ op {
       }
     }
   }
-  summary: "Returns a diagonal tensor with a given diagonal values."
-  description: "Given a `diagonal`, this operation returns a tensor with the `diagonal` and\neverything else padded with zeros. The diagonal is computed as follows:\n\nAssume `diagonal` has dimensions [D1,..., Dk], then the output is a tensor of\nrank 2k with dimensions [D1,..., Dk, D1,..., Dk] where:\n\n`output[i1,..., ik, i1,..., ik] = diagonal[i1, ..., ik]` and 0 everywhere else.\n\nFor example:\n\n```\n# \'diagonal\' is [1, 2, 3, 4]\ntf.diag(diagonal) ==> [[1, 0, 0, 0]\n                       [0, 2, 0, 0]\n                       [0, 0, 3, 0]\n                       [0, 0, 0, 4]]\n```"
 }
 op {
   name: "DiagPart"
   input_arg {
     name: "input"
-    description: "Rank k tensor where k is even and not zero."
     type_attr: "T"
   }
   output_arg {
     name: "diagonal"
-    description: "The extracted diagonal."
     type_attr: "T"
   }
   attr {
@@ -7438,6 +6951,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -7447,8 +6961,6 @@ op {
       }
     }
   }
-  summary: "Returns the diagonal part of the tensor."
-  description: "This operation returns a tensor with the `diagonal` part\nof the `input`. The `diagonal` part is computed as follows:\n\nAssume `input` has dimensions `[D1,..., Dk, D1,..., Dk]`, then the output is a\ntensor of rank `k` with dimensions `[D1,..., Dk]` where:\n\n`diagonal[i1,..., ik] = input[i1, ..., ik, i1,..., ik]`.\n\nFor example:\n\n```\n# \'input\' is [[1, 0, 0, 0]\n              [0, 2, 0, 0]\n              [0, 0, 3, 0]\n              [0, 0, 0, 4]]\n\ntf.diag_part(input) ==> [1, 2, 3, 4]\n```"
 }
 op {
   name: "Digamma"
@@ -7466,29 +6978,25 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Computes Psi, the derivative of Lgamma (the log of the absolute value of"
-  description: "`Gamma(x)`), element-wise."
 }
 op {
   name: "Dilation2D"
   input_arg {
     name: "input"
-    description: "4-D with shape `[batch, in_height, in_width, depth]`."
     type_attr: "T"
   }
   input_arg {
     name: "filter"
-    description: "3-D with shape `[filter_height, filter_width, depth]`."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "4-D with shape `[batch, out_height, out_width, depth]`."
     type_attr: "T"
   }
   attr {
@@ -7499,10 +7007,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -7513,21 +7022,18 @@ op {
   attr {
     name: "strides"
     type: "list(int)"
-    description: "The stride of the sliding window for each dimension of the input\ntensor. Must be: `[1, stride_height, stride_width, 1]`."
     has_minimum: true
     minimum: 4
   }
   attr {
     name: "rates"
     type: "list(int)"
-    description: "The input stride for atrous morphological dilation. Must be:\n`[1, rate_height, rate_width, 1]`."
     has_minimum: true
     minimum: 4
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -7535,29 +7041,23 @@ op {
       }
     }
   }
-  summary: "Computes the grayscale dilation of 4-D `input` and 3-D `filter` tensors."
-  description: "The `input` tensor has shape `[batch, in_height, in_width, depth]` and the\n`filter` tensor has shape `[filter_height, filter_width, depth]`, i.e., each\ninput channel is processed independently of the others with its own structuring\nfunction. The `output` tensor has shape\n`[batch, out_height, out_width, depth]`. The spatial dimensions of the output\ntensor depend on the `padding` algorithm. We currently only support the default\n\"NHWC\" `data_format`.\n\nIn detail, the grayscale morphological 2-D dilation is the max-sum correlation\n(for consistency with `conv2d`, we use unmirrored filters):\n\n    output[b, y, x, c] =\n       max_{dy, dx} input[b,\n                          strides[1] * y + rates[1] * dy,\n                          strides[2] * x + rates[2] * dx,\n                          c] +\n                    filter[dy, dx, c]\n\nMax-pooling is a special case when the filter has size equal to the pooling\nkernel size and contains all zeros.\n\nNote on duality: The dilation of `input` by the `filter` is equal to the\nnegation of the erosion of `-input` by the reflected `filter`."
 }
 op {
   name: "Dilation2DBackpropFilter"
   input_arg {
     name: "input"
-    description: "4-D with shape `[batch, in_height, in_width, depth]`."
     type_attr: "T"
   }
   input_arg {
     name: "filter"
-    description: "3-D with shape `[filter_height, filter_width, depth]`."
     type_attr: "T"
   }
   input_arg {
     name: "out_backprop"
-    description: "4-D with shape `[batch, out_height, out_width, depth]`."
     type_attr: "T"
   }
   output_arg {
     name: "filter_backprop"
-    description: "3-D with shape `[filter_height, filter_width, depth]`."
     type_attr: "T"
   }
   attr {
@@ -7568,10 +7068,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -7582,21 +7083,18 @@ op {
   attr {
     name: "strides"
     type: "list(int)"
-    description: "1-D of length 4. The stride of the sliding window for each dimension of\nthe input tensor. Must be: `[1, stride_height, stride_width, 1]`."
     has_minimum: true
     minimum: 4
   }
   attr {
     name: "rates"
     type: "list(int)"
-    description: "1-D of length 4. The input stride for atrous morphological dilation.\nMust be: `[1, rate_height, rate_width, 1]`."
     has_minimum: true
     minimum: 4
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -7604,28 +7102,23 @@ op {
       }
     }
   }
-  summary: "Computes the gradient of morphological 2-D dilation with respect to the filter."
 }
 op {
   name: "Dilation2DBackpropInput"
   input_arg {
     name: "input"
-    description: "4-D with shape `[batch, in_height, in_width, depth]`."
     type_attr: "T"
   }
   input_arg {
     name: "filter"
-    description: "3-D with shape `[filter_height, filter_width, depth]`."
     type_attr: "T"
   }
   input_arg {
     name: "out_backprop"
-    description: "4-D with shape `[batch, out_height, out_width, depth]`."
     type_attr: "T"
   }
   output_arg {
     name: "in_backprop"
-    description: "4-D with shape `[batch, in_height, in_width, depth]`."
     type_attr: "T"
   }
   attr {
@@ -7636,10 +7129,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -7650,21 +7144,18 @@ op {
   attr {
     name: "strides"
     type: "list(int)"
-    description: "1-D of length 4. The stride of the sliding window for each dimension of\nthe input tensor. Must be: `[1, stride_height, stride_width, 1]`."
     has_minimum: true
     minimum: 4
   }
   attr {
     name: "rates"
     type: "list(int)"
-    description: "1-D of length 4. The input stride for atrous morphological dilation.\nMust be: `[1, rate_height, rate_width, 1]`."
     has_minimum: true
     minimum: 4
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -7672,7 +7163,6 @@ op {
       }
     }
   }
-  summary: "Computes the gradient of morphological 2-D dilation with respect to the input."
 }
 op {
   name: "Div"
@@ -7694,6 +7184,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_UINT8
@@ -7707,24 +7198,19 @@ op {
       }
     }
   }
-  summary: "Returns x / y element-wise."
-  description: "*NOTE*: `Div` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
 }
 op {
   name: "DrawBoundingBoxes"
   input_arg {
     name: "images"
-    description: "4-D with shape `[batch, height, width, depth]`. A batch of images."
     type_attr: "T"
   }
   input_arg {
     name: "boxes"
-    description: "3-D with shape `[batch, num_bounding_boxes, 4]` containing bounding\nboxes."
     type: DT_FLOAT
   }
   output_arg {
     name: "output"
-    description: "4-D with the same shape as `images`. The batch of input images with\nbounding boxes drawn on the images."
     type_attr: "T"
   }
   attr {
@@ -7740,8 +7226,6 @@ op {
       }
     }
   }
-  summary: "Draw bounding boxes on a batch of images."
-  description: "Outputs a copy of `images` but draws on top of the pixels zero or more bounding\nboxes specified by the locations in `boxes`. The coordinates of the each\nbounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. The\nbounding box coordinates are floats in `[0.0, 1.0]` relative to the width and\nheight of the underlying image.\n\nFor example, if an image is 100 x 200 pixels (height x width) and the bounding\nbox is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of\nthe bounding box will be `(40, 10)` to `(100, 50)` (in (x,y) coordinates).\n\nParts of the bounding box may fall outside the image."
 }
 op {
   name: "DynamicPartition"
@@ -7751,7 +7235,6 @@ op {
   }
   input_arg {
     name: "partitions"
-    description: "Any shape.  Indices in the range `[0, num_partitions)`."
     type: DT_INT32
   }
   output_arg {
@@ -7762,7 +7245,6 @@ op {
   attr {
     name: "num_partitions"
     type: "int"
-    description: "The number of partitions to output."
     has_minimum: true
     minimum: 1
   }
@@ -7770,8 +7252,6 @@ op {
     name: "T"
     type: "type"
   }
-  summary: "Partitions `data` into `num_partitions` tensors using indices from `partitions`."
-  description: "For each index tuple `js` of size `partitions.ndim`, the slice `data[js, ...]`\nbecomes part of `outputs[partitions[js]]`.  The slices with `partitions[js] = i`\nare placed in `outputs[i]` in lexicographic order of `js`, and the first\ndimension of `outputs[i]` is the number of entries in `partitions` equal to `i`.\nIn detail,\n\n```python\n    outputs[i].shape = [sum(partitions == i)] + data.shape[partitions.ndim:]\n\n    outputs[i] = pack([data[js, ...] for js if partitions[js] == i])\n```\n\n`data.shape` must start with `partitions.shape`.\n\nFor example:\n\n```python\n    # Scalar partitions.\n    partitions = 1\n    num_partitions = 2\n    data = [10, 20]\n    outputs[0] = []  # Empty with shape [0, 2]\n    outputs[1] = [[10, 20]]\n\n    # Vector partitions.\n    partitions = [0, 0, 1, 1, 0]\n    num_partitions = 2\n    data = [10, 20, 30, 40, 50]\n    outputs[0] = [10, 20, 50]\n    outputs[1] = [30, 40]\n```\n\nSee `dynamic_stitch` for an example on how to merge partitions back.\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\"https://www.tensorflow.org/images/DynamicPartition.png\" alt>\n</div>"
 }
 op {
   name: "DynamicStitch"
@@ -7799,44 +7279,61 @@ op {
     name: "T"
     type: "type"
   }
-  summary: "Interleave the values from the `data` tensors into a single tensor."
-  description: "Builds a merged tensor such that\n\n```python\n    merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...]\n```\n\nFor example, if each `indices[m]` is scalar or vector, we have\n\n```python\n    # Scalar indices:\n    merged[indices[m], ...] = data[m][...]\n\n    # Vector indices:\n    merged[indices[m][i], ...] = data[m][i, ...]\n```\n\nEach `data[i].shape` must start with the corresponding `indices[i].shape`,\nand the rest of `data[i].shape` must be constant w.r.t. `i`.  That is, we\nmust have `data[i].shape = indices[i].shape + constant`.  In terms of this\n`constant`, the output shape is\n\n    merged.shape = [max(indices)] + constant\n\nValues are merged in order, so if an index appears in both `indices[m][i]` and\n`indices[n][j]` for `(m,i) < (n,j)` the slice `data[n][j]` will appear in the\nmerged result. If you do not need this guarantee, ParallelDynamicStitch might\nperform better on some devices.\n\nFor example:\n\n```python\n    indices[0] = 6\n    indices[1] = [4, 1]\n    indices[2] = [[5, 2], [0, 3]]\n    data[0] = [61, 62]\n    data[1] = [[41, 42], [11, 12]]\n    data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]]\n    merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42],\n              [51, 52], [61, 62]]\n```\n\nThis method can be used to merge partitions created by `dynamic_partition`\nas illustrated on the following example:\n\n```python\n    # Apply function (increments x_i) on elements for which a certain condition\n    # apply (x_i != -1 in this example).\n    x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4])\n    condition_mask=tf.not_equal(x,tf.constant(-1.))\n    partitioned_data = tf.dynamic_partition(\n        x, tf.cast(condition_mask, tf.int32) , 2)\n    partitioned_data[1] = partitioned_data[1] + 1.0\n    condition_indices = tf.dynamic_partition(\n        tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2)\n    x = tf.dynamic_stitch(condition_indices, partitioned_data)\n    # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain\n    # unchanged.\n```\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\"https://www.tensorflow.org/images/DynamicStitch.png\" alt>\n</div>"
+}
+op {
+  name: "EagerPyFunc"
+  input_arg {
+    name: "input"
+    type_list_attr: "Tin"
+  }
+  output_arg {
+    name: "output"
+    type_list_attr: "Tout"
+  }
+  attr {
+    name: "token"
+    type: "string"
+  }
+  attr {
+    name: "Tin"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "Tout"
+    type: "list(type)"
+    has_minimum: true
+  }
+  is_stateful: true
 }
 op {
   name: "EditDistance"
   input_arg {
     name: "hypothesis_indices"
-    description: "The indices of the hypothesis list SparseTensor.\nThis is an N x R int64 matrix."
     type: DT_INT64
   }
   input_arg {
     name: "hypothesis_values"
-    description: "The values of the hypothesis list SparseTensor.\nThis is an N-length vector."
     type_attr: "T"
   }
   input_arg {
     name: "hypothesis_shape"
-    description: "The shape of the hypothesis list SparseTensor.\nThis is an R-length vector."
     type: DT_INT64
   }
   input_arg {
     name: "truth_indices"
-    description: "The indices of the truth list SparseTensor.\nThis is an M x R int64 matrix."
     type: DT_INT64
   }
   input_arg {
     name: "truth_values"
-    description: "The values of the truth list SparseTensor.\nThis is an M-length vector."
     type_attr: "T"
   }
   input_arg {
     name: "truth_shape"
-    description: "truth indices, vector."
     type: DT_INT64
   }
   output_arg {
     name: "output"
-    description: "A dense float tensor with rank R - 1.\n\nFor the example input:\n\n    // hypothesis represents a 2x1 matrix with variable-length values:\n    //   (0,0) = [\"a\"]\n    //   (1,0) = [\"b\"]\n    hypothesis_indices = [[0, 0, 0],\n                          [1, 0, 0]]\n    hypothesis_values = [\"a\", \"b\"]\n    hypothesis_shape = [2, 1, 1]\n\n    // truth represents a 2x2 matrix with variable-length values:\n    //   (0,0) = []\n    //   (0,1) = [\"a\"]\n    //   (1,0) = [\"b\", \"c\"]\n    //   (1,1) = [\"a\"]\n    truth_indices = [[0, 1, 0],\n                     [1, 0, 0],\n                     [1, 0, 1],\n                     [1, 1, 0]]\n    truth_values = [\"a\", \"b\", \"c\", \"a\"]\n    truth_shape = [2, 2, 2]\n    normalize = true\n\nThe output will be:\n\n    // output is a 2x2 matrix with edit distances normalized by truth lengths.\n    output = [[inf, 1.0],  // (0,0): no truth, (0,1): no hypothesis\n              [0.5, 1.0]]  // (1,0): addition, (1,1): no hypothesis"
     type: DT_FLOAT
   }
   attr {
@@ -7845,14 +7342,11 @@ op {
     default_value {
       b: true
     }
-    description: "boolean (if true, edit distances are normalized by length of truth).\n\nThe output is:"
   }
   attr {
     name: "T"
     type: "type"
   }
-  summary: "Computes the (possibly normalized) Levenshtein Edit Distance."
-  description: "The inputs are variable-length sequences provided by SparseTensors\n  (hypothesis_indices, hypothesis_values, hypothesis_shape)\nand\n  (truth_indices, truth_values, truth_shape).\n\nThe inputs are:"
 }
 op {
   name: "Elu"
@@ -7870,29 +7364,25 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Computes exponential linear: `exp(features) - 1` if < 0, `features` otherwise."
-  description: "See [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)\n](http://arxiv.org/abs/1511.07289)"
 }
 op {
   name: "EluGrad"
   input_arg {
     name: "gradients"
-    description: "The backpropagated gradients to the corresponding Elu operation."
     type_attr: "T"
   }
   input_arg {
     name: "outputs"
-    description: "The outputs of the corresponding Elu operation."
     type_attr: "T"
   }
   output_arg {
     name: "backprops"
-    description: "The gradients: `gradients * (outputs + 1)` if outputs < 0,\n`gradients` otherwise."
     type_attr: "T"
   }
   attr {
@@ -7901,46 +7391,64 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Computes gradients for the exponential linear (Elu) operation."
 }
 op {
-  name: "EncodeBase64"
+  name: "EmptyTensorList"
   input_arg {
-    name: "input"
-    description: "Strings to be encoded."
-    type: DT_STRING
+    name: "element_shape"
+    type_attr: "shape_type"
   }
   output_arg {
-    name: "output"
-    description: "Input strings encoded in base64."
-    type: DT_STRING
+    name: "handle"
+    type: DT_VARIANT
   }
   attr {
-    name: "pad"
-    type: "bool"
-    default_value {
-      b: false
-    }
-    description: "Bool whether padding is applied at the ends."
+    name: "element_dtype"
+    type: "type"
   }
-  summary: "Encode strings into web-safe base64 format."
-  description: "Refer to the following article for more information on base64 format:\nen.wikipedia.org/wiki/Base64. Base64 strings may have padding with \'=\' at the\nend so that the encoded has length multiple of 4. See Padding section of the\nlink above.\n\nWeb-safe means that the encoder uses - and _ instead of + and /."
-}
-op {
-  name: "EncodeJpeg"
-  input_arg {
+  attr {
+    name: "shape_type"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "EncodeBase64"
+  input_arg {
+    name: "input"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "output"
+    type: DT_STRING
+  }
+  attr {
+    name: "pad"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
+op {
+  name: "EncodeJpeg"
+  input_arg {
     name: "image"
-    description: "3-D with shape `[height, width, channels]`."
     type: DT_UINT8
   }
   output_arg {
     name: "contents"
-    description: "0-D. JPEG-encoded image."
     type: DT_STRING
   }
   attr {
@@ -7949,7 +7457,6 @@ op {
     default_value {
       s: ""
     }
-    description: "Per pixel image format."
     allowed_values {
       list {
         s: ""
@@ -7964,7 +7471,6 @@ op {
     default_value {
       i: 95
     }
-    description: "Quality of the compression from 0 to 100 (higher is better and slower)."
   }
   attr {
     name: "progressive"
@@ -7972,7 +7478,6 @@ op {
     default_value {
       b: false
     }
-    description: "If True, create a JPEG that loads progressively (coarse to fine)."
   }
   attr {
     name: "optimize_size"
@@ -7980,7 +7485,6 @@ op {
     default_value {
       b: false
     }
-    description: "If True, spend CPU/RAM to reduce size with no quality change."
   }
   attr {
     name: "chroma_downsampling"
@@ -7988,7 +7492,6 @@ op {
     default_value {
       b: true
     }
-    description: "See http://en.wikipedia.org/wiki/Chroma_subsampling."
   }
   attr {
     name: "density_unit"
@@ -7996,7 +7499,6 @@ op {
     default_value {
       s: "in"
     }
-    description: "Unit used to specify `x_density` and `y_density`:\npixels per inch (`\'in\'`) or centimeter (`\'cm\'`)."
     allowed_values {
       list {
         s: "in"
@@ -8010,7 +7512,6 @@ op {
     default_value {
       i: 300
     }
-    description: "Horizontal pixels per density unit."
   }
   attr {
     name: "y_density"
@@ -8018,7 +7519,6 @@ op {
     default_value {
       i: 300
     }
-    description: "Vertical pixels per density unit."
   }
   attr {
     name: "xmp_metadata"
@@ -8026,21 +7526,16 @@ op {
     default_value {
       s: ""
     }
-    description: "If not empty, embed this XMP metadata in the image header."
   }
-  summary: "JPEG-encode an image."
-  description: "`image` is a 3-D uint8 Tensor of shape `[height, width, channels]`.\n\nThe attr `format` can be used to override the color format of the encoded\noutput.  Values can be:\n\n*   `\'\'`: Use a default format based on the number of channels in the image.\n*   `grayscale`: Output a grayscale JPEG image.  The `channels` dimension\n    of `image` must be 1.\n*   `rgb`: Output an RGB JPEG image. The `channels` dimension\n    of `image` must be 3.\n\nIf `format` is not specified or is the empty string, a default format is picked\nin function of the number of channels in `image`:\n\n*   1: Output a grayscale image.\n*   3: Output an RGB image."
 }
 op {
   name: "EncodePng"
   input_arg {
     name: "image"
-    description: "3-D with shape `[height, width, channels]`."
     type_attr: "T"
   }
   output_arg {
     name: "contents"
-    description: "0-D. PNG-encoded image."
     type: DT_STRING
   }
   attr {
@@ -8049,7 +7544,6 @@ op {
     default_value {
       i: -1
     }
-    description: "Compression level."
   }
   attr {
     name: "T"
@@ -8064,39 +7558,30 @@ op {
       }
     }
   }
-  summary: "PNG-encode an image."
-  description: "`image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]`\nwhere `channels` is:\n\n*   1: for grayscale.\n*   2: for grayscale + alpha.\n*   3: for RGB.\n*   4: for RGBA.\n\nThe ZLIB compression level, `compression`, can be -1 for the PNG-encoder\ndefault or a value from 0 to 9.  9 is the highest compression level, generating\nthe smallest output, but is slower."
 }
 op {
   name: "EncodeWav"
   input_arg {
     name: "audio"
-    description: "2-D with shape `[length, channels]`."
     type: DT_FLOAT
   }
   input_arg {
     name: "sample_rate"
-    description: "Scalar containing the sample frequency."
     type: DT_INT32
   }
   output_arg {
     name: "contents"
-    description: "0-D. WAV-encoded file contents."
     type: DT_STRING
   }
-  summary: "Encode audio data using the WAV file format."
-  description: "This operation will generate a string suitable to be saved out to create a .wav\naudio file. It will be encoded in the 16-bit PCM format. It takes in float\nvalues in the range -1.0f to 1.0f, and any outside that value will be clamped to\nthat range.\n\n`audio` is a 2-D float Tensor of shape `[length, channels]`.\n`sample_rate` is a scalar Tensor holding the rate to use (e.g. 44100)."
 }
 op {
   name: "Enter"
   input_arg {
     name: "data"
-    description: "The tensor to be made available to the child frame."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "The same tensor as `data`."
     type_attr: "T"
   }
   attr {
@@ -8106,7 +7591,6 @@ op {
   attr {
     name: "frame_name"
     type: "string"
-    description: "The name of the child frame."
   }
   attr {
     name: "is_constant"
@@ -8114,7 +7598,6 @@ op {
     default_value {
       b: false
     }
-    description: "If true, the output is constant within the child frame."
   }
   attr {
     name: "parallel_iterations"
@@ -8122,10 +7605,7 @@ op {
     default_value {
       i: 10
     }
-    description: "The number of iterations allowed to run in parallel."
   }
-  summary: "Creates or finds a child frame, and makes `data` available to the child frame."
-  description: "This op is used together with `Exit` to create loops in the graph.\nThe unique `frame_name` is used by the `Executor` to identify frames. If\n`is_constant` is true, `output` is a constant in the child frame; otherwise\nit may be changed in the child frame. At most `parallel_iterations` iterations\nare run in parallel in the child frame."
 }
 op {
   name: "Equal"
@@ -8147,6 +7627,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_UINT8
@@ -8164,8 +7645,6 @@ op {
       }
     }
   }
-  summary: "Returns the truth value of (x == y) element-wise."
-  description: "*NOTE*: `Equal` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
   is_commutative: true
 }
 op {
@@ -8184,12 +7663,12 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Computes the Gauss error function of `x` element-wise."
 }
 op {
   name: "Erfc"
@@ -8207,31 +7686,62 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Computes the complementary error function of `x` element-wise."
+}
+op {
+  name: "ExecuteInCriticalSection"
+  input_arg {
+    name: "critical_section"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "arguments"
+    type_list_attr: "Targuments"
+  }
+  output_arg {
+    name: "outputs"
+    type_list_attr: "output_types"
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+  }
+  is_stateful: true
 }
 op {
   name: "Exit"
   input_arg {
     name: "data"
-    description: "The tensor to be made available to the parent frame."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "The same tensor as `data`."
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
   }
-  summary: "Exits the current frame to its parent frame."
-  description: "Exit makes its input `data` available to the parent frame."
 }
 op {
   name: "Exp"
@@ -8249,6 +7759,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -8256,7 +7767,6 @@ op {
       }
     }
   }
-  summary: "Computes exponential of x element-wise.  \\\\(y = e^x\\\\)."
 }
 op {
   name: "ExpandDims"
@@ -8266,12 +7776,10 @@ op {
   }
   input_arg {
     name: "dim"
-    description: "0-D (scalar). Specifies the dimension index at which to\nexpand the shape of `input`. Must be in the range\n`[-rank(input) - 1, rank(input)]`."
     type_attr: "Tdim"
   }
   output_arg {
     name: "output"
-    description: "Contains the same data as `input`, but its shape has an additional\ndimension of size 1 added."
     type_attr: "T"
   }
   attr {
@@ -8291,8 +7799,6 @@ op {
       }
     }
   }
-  summary: "Inserts a dimension of 1 into a tensor\'s shape."
-  description: "Given a tensor `input`, this operation inserts a dimension of 1 at the\ndimension index `dim` of `input`\'s shape. The dimension index `dim` starts at\nzero; if you specify a negative number for `dim` it is counted backward from\nthe end.\n\nThis operation is useful if you want to add a batch dimension to a single\nelement. For example, if you have a single image of shape `[height, width,\nchannels]`, you can make it a batch of 1 image with `expand_dims(image, 0)`,\nwhich will make the shape `[1, height, width, channels]`.\n\nOther examples:\n\n```\n# \'t\' is a tensor of shape [2]\nshape(expand_dims(t, 0)) ==> [1, 2]\nshape(expand_dims(t, 1)) ==> [2, 1]\nshape(expand_dims(t, -1)) ==> [2, 1]\n\n# \'t2\' is a tensor of shape [2, 3, 5]\nshape(expand_dims(t2, 0)) ==> [1, 2, 3, 5]\nshape(expand_dims(t2, 2)) ==> [2, 3, 1, 5]\nshape(expand_dims(t2, 3)) ==> [2, 3, 5, 1]\n```\n\nThis operation requires that:\n\n`-1-input.dims() <= dim <= input.dims()`\n\nThis operation is related to `squeeze()`, which removes dimensions of\nsize 1."
 }
 op {
   name: "Expm1"
@@ -8310,6 +7816,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -8317,29 +7824,23 @@ op {
       }
     }
   }
-  summary: "Computes exponential of x - 1 element-wise."
-  description: "I.e., \\\\(y = (\\exp x) - 1\\\\)."
 }
 op {
   name: "ExtractGlimpse"
   input_arg {
     name: "input"
-    description: "A 4-D float tensor of shape `[batch_size, height, width, channels]`."
     type: DT_FLOAT
   }
   input_arg {
     name: "size"
-    description: "A 1-D tensor of 2 elements containing the size of the glimpses\nto extract.  The glimpse height must be specified first, following\nby the glimpse width."
     type: DT_INT32
   }
   input_arg {
     name: "offsets"
-    description: "A 2-D integer tensor of shape `[batch_size, 2]` containing\nthe y, x locations of the center of each window."
     type: DT_FLOAT
   }
   output_arg {
     name: "glimpse"
-    description: "A tensor representing the glimpses `[batch_size,\nglimpse_height, glimpse_width, channels]`."
     type: DT_FLOAT
   }
   attr {
@@ -8348,7 +7849,6 @@ op {
     default_value {
       b: true
     }
-    description: "indicates if the offset coordinates are centered relative to\nthe image, in which case the (0, 0) offset is relative to the center\nof the input images. If false, the (0,0) offset corresponds to the\nupper left corner of the input images."
   }
   attr {
     name: "normalized"
@@ -8356,7 +7856,6 @@ op {
     default_value {
       b: true
     }
-    description: "indicates if the offset coordinates are normalized."
   }
   attr {
     name: "uniform_noise"
@@ -8364,41 +7863,33 @@ op {
     default_value {
       b: true
     }
-    description: "indicates if the noise should be generated using a\nuniform distribution or a Gaussian distribution."
   }
-  summary: "Extracts a glimpse from the input tensor."
-  description: "Returns a set of windows called glimpses extracted at location\n`offsets` from the input tensor. If the windows only partially\noverlaps the inputs, the non overlapping areas will be filled with\nrandom noise.\n\nThe result is a 4-D tensor of shape `[batch_size, glimpse_height,\nglimpse_width, channels]`. The channels and batch dimensions are the\nsame as that of the input tensor. The height and width of the output\nwindows are specified in the `size` parameter.\n\nThe argument `normalized` and `centered` controls how the windows are built:\n\n* If the coordinates are normalized but not centered, 0.0 and 1.0\n  correspond to the minimum and maximum of each height and width\n  dimension.\n* If the coordinates are both normalized and centered, they range from\n  -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper\n  left corner, the lower right corner is located at (1.0, 1.0) and the\n  center is at (0, 0).\n* If the coordinates are not normalized they are interpreted as\n  numbers of pixels."
 }
 op {
   name: "ExtractImagePatches"
   input_arg {
     name: "images"
-    description: "4-D Tensor with shape `[batch, in_rows, in_cols, depth]`."
     type_attr: "T"
   }
   output_arg {
     name: "patches"
-    description: "4-D Tensor with shape `[batch, out_rows, out_cols, ksize_rows *\nksize_cols * depth]` containing image patches with size\n`ksize_rows x ksize_cols x depth` vectorized in the \"depth\" dimension. Note\n`out_rows` and `out_cols` are the dimensions of the output patches."
     type_attr: "T"
   }
   attr {
     name: "ksizes"
     type: "list(int)"
-    description: "The size of the sliding window for each dimension of `images`."
     has_minimum: true
     minimum: 4
   }
   attr {
     name: "strides"
     type: "list(int)"
-    description: "1-D of length 4. How far the centers of two consecutive patches are in\nthe images. Must be: `[1, stride_rows, stride_cols, 1]`."
     has_minimum: true
     minimum: 4
   }
   attr {
     name: "rates"
     type: "list(int)"
-    description: "1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the\ninput stride, specifying how far two consecutive patch samples are in the\ninput. Equivalent to extracting patches with\n`patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by\nsubsampling them spatially by a factor of `rates`. This is equivalent to\n`rate` in dilated (a.k.a. Atrous) convolutions."
     has_minimum: true
     minimum: 4
   }
@@ -8410,10 +7901,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -8424,7 +7916,6 @@ op {
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use.\n\nWe specify the size-related attributes as:\n\n```python\n      ksizes = [1, ksize_rows, ksize_cols, 1]\n      strides = [1, strides_rows, strides_cols, 1]\n      rates = [1, rates_rows, rates_cols, 1]\n```"
     allowed_values {
       list {
         s: "SAME"
@@ -8432,18 +7923,15 @@ op {
       }
     }
   }
-  summary: "Extract `patches` from `images` and put them in the \"depth\" output dimension."
 }
 op {
   name: "ExtractJpegShape"
   input_arg {
     name: "contents"
-    description: "0-D. The JPEG-encoded image."
     type: DT_STRING
   }
   output_arg {
     name: "image_shape"
-    description: "1-D. The image shape with format [height, width, channels]."
     type_attr: "output_type"
   }
   attr {
@@ -8452,7 +7940,6 @@ op {
     default_value {
       type: DT_INT32
     }
-    description: "(Optional) The output type of the operation (int32 or int64).\nDefaults to int32."
     allowed_values {
       list {
         type: DT_INT32
@@ -8460,66 +7947,50 @@ op {
       }
     }
   }
-  summary: "Extract the shape information of a JPEG-encoded image."
-  description: "This op only parses the image header, so it is much faster than DecodeJpeg."
 }
 op {
   name: "FFT"
   input_arg {
     name: "input"
-    description: "A complex64 tensor."
     type: DT_COMPLEX64
   }
   output_arg {
     name: "output"
-    description: "A complex64 tensor of the same shape as `input`. The inner-most\n  dimension of `input` is replaced with its 1D Fourier transform.\n\n@compatibility(numpy)\nEquivalent to np.fft.fft\n@end_compatibility"
     type: DT_COMPLEX64
   }
-  summary: "Fast Fourier transform."
-  description: "Computes the 1-dimensional discrete Fourier transform over the inner-most\ndimension of `input`."
 }
 op {
   name: "FFT2D"
   input_arg {
     name: "input"
-    description: "A complex64 tensor."
     type: DT_COMPLEX64
   }
   output_arg {
     name: "output"
-    description: "A complex64 tensor of the same shape as `input`. The inner-most 2\n  dimensions of `input` are replaced with their 2D Fourier transform.\n\n@compatibility(numpy)\nEquivalent to np.fft.fft2\n@end_compatibility"
     type: DT_COMPLEX64
   }
-  summary: "2D fast Fourier transform."
-  description: "Computes the 2-dimensional discrete Fourier transform over the inner-most\n2 dimensions of `input`."
 }
 op {
   name: "FFT3D"
   input_arg {
     name: "input"
-    description: "A complex64 tensor."
     type: DT_COMPLEX64
   }
   output_arg {
     name: "output"
-    description: "A complex64 tensor of the same shape as `input`. The inner-most 3\n  dimensions of `input` are replaced with their 3D Fourier transform.\n\n@compatibility(numpy)\nEquivalent to np.fft.fftn with 3 dimensions.\n@end_compatibility"
     type: DT_COMPLEX64
   }
-  summary: "3D fast Fourier transform."
-  description: "Computes the 3-dimensional discrete Fourier transform over the inner-most 3\ndimensions of `input`."
 }
 op {
   name: "FIFOQueue"
   output_arg {
     name: "handle"
-    description: "The handle to the queue."
     type: DT_STRING
     is_ref: true
   }
   attr {
     name: "component_types"
     type: "list(type)"
-    description: "The type of each component in a value."
     has_minimum: true
     minimum: 1
   }
@@ -8530,7 +8001,6 @@ op {
       list {
       }
     }
-    description: "The shape of each component in a value. The length of this attr must\nbe either 0 or the same as the length of component_types. If the length of\nthis attr is 0, the shapes of queue elements are not constrained, and\nonly one element may be dequeued at a time."
     has_minimum: true
   }
   attr {
@@ -8539,7 +8009,6 @@ op {
     default_value {
       i: -1
     }
-    description: "The upper bound on the number of elements in this queue.\nNegative numbers mean no limit."
   }
   attr {
     name: "container"
@@ -8547,7 +8016,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this queue is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -8555,22 +8023,18 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this queue will be shared under the given name\nacross multiple sessions."
   }
-  summary: "A queue that produces elements in first-in first-out order."
   is_stateful: true
 }
 op {
   name: "FIFOQueueV2"
   output_arg {
     name: "handle"
-    description: "The handle to the queue."
     type: DT_RESOURCE
   }
   attr {
     name: "component_types"
     type: "list(type)"
-    description: "The type of each component in a value."
     has_minimum: true
     minimum: 1
   }
@@ -8581,7 +8045,6 @@ op {
       list {
       }
     }
-    description: "The shape of each component in a value. The length of this attr must\nbe either 0 or the same as the length of component_types. If the length of\nthis attr is 0, the shapes of queue elements are not constrained, and\nonly one element may be dequeued at a time."
     has_minimum: true
   }
   attr {
@@ -8590,7 +8053,6 @@ op {
     default_value {
       i: -1
     }
-    description: "The upper bound on the number of elements in this queue.\nNegative numbers mean no limit."
   }
   attr {
     name: "container"
@@ -8598,7 +8060,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this queue is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -8606,9 +8067,7 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this queue will be shared under the given name\nacross multiple sessions."
   }
-  summary: "A queue that produces elements in first-in first-out order."
   is_stateful: true
 }
 op {
@@ -8617,7 +8076,6 @@ op {
     name: "fact"
     type: DT_STRING
   }
-  summary: "Output a fact about factorials."
 }
 op {
   name: "FakeQuantWithMinMaxArgs"
@@ -8657,24 +8115,19 @@ op {
       b: false
     }
   }
-  summary: "Fake-quantize the \'inputs\' tensor, type float to \'outputs\' tensor of same type."
-  description: "Attributes `[min; max]` define the clamping range for the `inputs` data.\n`inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`\nwhen `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and\nthen de-quantized and output as floats in `[min; max]` interval.\n`num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive.\n\nQuantization is called fake since the output is still in floating point."
 }
 op {
   name: "FakeQuantWithMinMaxArgsGradient"
   input_arg {
     name: "gradients"
-    description: "Backpropagated gradients above the FakeQuantWithMinMaxArgs operation."
     type: DT_FLOAT
   }
   input_arg {
     name: "inputs"
-    description: "Values passed as inputs to the FakeQuantWithMinMaxArgs operation."
     type: DT_FLOAT
   }
   output_arg {
     name: "backprops"
-    description: "Backpropagated gradients below the FakeQuantWithMinMaxArgs operation:\n`gradients * (inputs >= min && inputs <= max)`."
     type: DT_FLOAT
   }
   attr {
@@ -8705,7 +8158,6 @@ op {
       b: false
     }
   }
-  summary: "Compute gradients for a FakeQuantWithMinMaxArgs operation."
 }
 op {
   name: "FakeQuantWithMinMaxVars"
@@ -8739,19 +8191,15 @@ op {
       b: false
     }
   }
-  summary: "Fake-quantize the \'inputs\' tensor of type float via global float scalars `min`"
-  description: "and `max` to \'outputs\' tensor of same shape as `inputs`.\n\n`[min; max]` define the clamping range for the `inputs` data.\n`inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`\nwhen `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and\nthen de-quantized and output as floats in `[min; max]` interval.\n`num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive.\n\nThis operation has a gradient and thus allows for training `min` and `max`\nvalues."
 }
 op {
   name: "FakeQuantWithMinMaxVarsGradient"
   input_arg {
     name: "gradients"
-    description: "Backpropagated gradients above the FakeQuantWithMinMaxVars operation."
     type: DT_FLOAT
   }
   input_arg {
     name: "inputs"
-    description: "Values passed as inputs to the FakeQuantWithMinMaxVars operation.\nmin, max: Quantization interval, scalar floats."
     type: DT_FLOAT
   }
   input_arg {
@@ -8764,17 +8212,14 @@ op {
   }
   output_arg {
     name: "backprops_wrt_input"
-    description: "Backpropagated gradients w.r.t. inputs:\n`gradients * (inputs >= min && inputs <= max)`."
     type: DT_FLOAT
   }
   output_arg {
     name: "backprop_wrt_min"
-    description: "Backpropagated gradients w.r.t. min parameter:\n`sum(gradients * (inputs < min))`."
     type: DT_FLOAT
   }
   output_arg {
     name: "backprop_wrt_max"
-    description: "Backpropagated gradients w.r.t. max parameter:\n`sum(gradients * (inputs > max))`."
     type: DT_FLOAT
   }
   attr {
@@ -8783,7 +8228,6 @@ op {
     default_value {
       i: 8
     }
-    description: "The bitwidth of the quantization; between 2 and 8, inclusive."
   }
   attr {
     name: "narrow_range"
@@ -8791,9 +8235,7 @@ op {
     default_value {
       b: false
     }
-    description: "Whether to quantize into 2^num_bits - 1 distinct values."
   }
-  summary: "Compute gradients for a FakeQuantWithMinMaxVars operation."
 }
 op {
   name: "FakeQuantWithMinMaxVarsPerChannel"
@@ -8827,19 +8269,15 @@ op {
       b: false
     }
   }
-  summary: "Fake-quantize the \'inputs\' tensor of type float and one of the shapes: `[d]`,"
-  description: "`[b, d]` `[b, h, w, d]` via per-channel floats `min` and `max` of shape `[d]`\nto \'outputs\' tensor of same shape as `inputs`.\n\n`[min; max]` define the clamping range for the `inputs` data.\n`inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`\nwhen `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and\nthen de-quantized and output as floats in `[min; max]` interval.\n`num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive.\n\nThis operation has a gradient and thus allows for training `min` and `max`\nvalues."
 }
 op {
   name: "FakeQuantWithMinMaxVarsPerChannelGradient"
   input_arg {
     name: "gradients"
-    description: "Backpropagated gradients above the FakeQuantWithMinMaxVars operation,\nshape one of: `[d]`, `[b, d]`,  `[b, h, w, d]`."
     type: DT_FLOAT
   }
   input_arg {
     name: "inputs"
-    description: "Values passed as inputs to the FakeQuantWithMinMaxVars operation, shape\n  same as `gradients`.\nmin, max: Quantization interval, floats of shape `[d]`."
     type: DT_FLOAT
   }
   input_arg {
@@ -8852,17 +8290,14 @@ op {
   }
   output_arg {
     name: "backprops_wrt_input"
-    description: "Backpropagated gradients w.r.t. inputs, shape same as\n`inputs`:\n  `gradients * (inputs >= min && inputs <= max)`."
     type: DT_FLOAT
   }
   output_arg {
     name: "backprop_wrt_min"
-    description: "Backpropagated gradients w.r.t. min parameter, shape `[d]`:\n`sum_per_d(gradients * (inputs < min))`."
     type: DT_FLOAT
   }
   output_arg {
     name: "backprop_wrt_max"
-    description: "Backpropagated gradients w.r.t. max parameter, shape `[d]`:\n`sum_per_d(gradients * (inputs > max))`."
     type: DT_FLOAT
   }
   attr {
@@ -8871,7 +8306,6 @@ op {
     default_value {
       i: 8
     }
-    description: "The bitwidth of the quantization; between 2 and 8, inclusive."
   }
   attr {
     name: "narrow_range"
@@ -8879,9 +8313,7 @@ op {
     default_value {
       b: false
     }
-    description: "Whether to quantize into 2^num_bits - 1 distinct values."
   }
-  summary: "Compute gradients for a FakeQuantWithMinMaxVarsPerChannel operation."
 }
 op {
   name: "FakeQueue"
@@ -8894,19 +8326,16 @@ op {
     type: DT_STRING
     is_ref: true
   }
-  summary: "Deprecated. Do not use."
   is_stateful: true
 }
 op {
   name: "Fill"
   input_arg {
     name: "dims"
-    description: "1-D. Represents the shape of the output tensor."
-    type: DT_INT32
+    type_attr: "index_type"
   }
   input_arg {
     name: "value"
-    description: "0-D (scalar). Value to fill the returned tensor.\n\n@compatibility(numpy)\nEquivalent to np.full\n@end_compatibility"
     type_attr: "T"
   }
   output_arg {
@@ -8917,8 +8346,19 @@ op {
     name: "T"
     type: "type"
   }
-  summary: "Creates a tensor filled with a scalar value."
-  description: "This operation creates a tensor of shape `dims` and fills it with `value`.\n\nFor example:\n\n```\n# Output tensor has shape [2, 3].\nfill([2, 3], 9) ==> [[9, 9, 9]\n                     [9, 9, 9]]\n```"
+  attr {
+    name: "index_type"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
 }
 op {
   name: "FilterDataset"
@@ -8928,7 +8368,6 @@ op {
   }
   input_arg {
     name: "other_arguments"
-    description: "A list of tensors, typically values that were captured when\nbuilding a closure for `predicate`."
     type_list_attr: "Targuments"
   }
   output_arg {
@@ -8938,7 +8377,6 @@ op {
   attr {
     name: "predicate"
     type: "func"
-    description: "A function returning a scalar boolean."
   }
   attr {
     name: "Targuments"
@@ -8957,48 +8395,39 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Creates a dataset containing elements of `input_dataset` matching `predicate`."
-  description: "The `predicate` function must return a scalar boolean and accept the\nfollowing arguments:\n\n* One tensor for each component of an element of `input_dataset`.\n* One tensor for each value in `other_arguments`."
 }
 op {
   name: "FixedLengthRecordDataset"
   input_arg {
     name: "filenames"
-    description: "A scalar or a vector containing the name(s) of the file(s) to be\nread."
     type: DT_STRING
   }
   input_arg {
     name: "header_bytes"
-    description: "A scalar representing the number of bytes to skip at the\nbeginning of a file."
     type: DT_INT64
   }
   input_arg {
     name: "record_bytes"
-    description: "A scalar representing the number of bytes in each record."
     type: DT_INT64
   }
   input_arg {
     name: "footer_bytes"
-    description: "A scalar representing the number of bytes to skip at the end\nof a file."
     type: DT_INT64
   }
   input_arg {
     name: "buffer_size"
-    description: "A scalar representing the number of bytes to buffer. Must be > 0."
     type: DT_INT64
   }
   output_arg {
     name: "handle"
     type: DT_VARIANT
   }
-  summary: "Creates a dataset that emits the records from one or more binary files."
   is_stateful: true
 }
 op {
   name: "FixedLengthRecordReader"
   output_arg {
     name: "reader_handle"
-    description: "The handle to reference the Reader."
     type: DT_STRING
     is_ref: true
   }
@@ -9008,12 +8437,10 @@ op {
     default_value {
       i: 0
     }
-    description: "Number of bytes in the header, defaults to 0."
   }
   attr {
     name: "record_bytes"
     type: "int"
-    description: "Number of bytes in the record."
   }
   attr {
     name: "footer_bytes"
@@ -9021,7 +8448,6 @@ op {
     default_value {
       i: 0
     }
-    description: "Number of bytes in the footer, defaults to 0."
   }
   attr {
     name: "hop_bytes"
@@ -9029,7 +8455,6 @@ op {
     default_value {
       i: 0
     }
-    description: "Number of bytes to hop before each read. Default of 0 means using\nrecord_bytes."
   }
   attr {
     name: "container"
@@ -9037,7 +8462,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this reader is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -9045,16 +8469,13 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this reader is named in the given bucket\nwith this shared_name. Otherwise, the node name is used instead."
   }
-  summary: "A Reader that outputs fixed-length records from a file."
   is_stateful: true
 }
 op {
   name: "FixedLengthRecordReaderV2"
   output_arg {
     name: "reader_handle"
-    description: "The handle to reference the Reader."
     type: DT_RESOURCE
   }
   attr {
@@ -9063,12 +8484,10 @@ op {
     default_value {
       i: 0
     }
-    description: "Number of bytes in the header, defaults to 0."
   }
   attr {
     name: "record_bytes"
     type: "int"
-    description: "Number of bytes in the record."
   }
   attr {
     name: "footer_bytes"
@@ -9076,7 +8495,6 @@ op {
     default_value {
       i: 0
     }
-    description: "Number of bytes in the footer, defaults to 0."
   }
   attr {
     name: "hop_bytes"
@@ -9084,7 +8502,6 @@ op {
     default_value {
       i: 0
     }
-    description: "Number of bytes to hop before each read. Default of 0 means using\nrecord_bytes."
   }
   attr {
     name: "container"
@@ -9092,7 +8509,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this reader is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -9100,7 +8516,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this reader is named in the given bucket\nwith this shared_name. Otherwise, the node name is used instead."
   }
   attr {
     name: "encoding"
@@ -9108,56 +8523,46 @@ op {
     default_value {
       s: ""
     }
-    description: "The type of encoding for the file. Currently ZLIB and GZIP\nare supported. Defaults to none."
   }
-  summary: "A Reader that outputs fixed-length records from a file."
   is_stateful: true
 }
 op {
   name: "FixedUnigramCandidateSampler"
   input_arg {
     name: "true_classes"
-    description: "A batch_size * num_true matrix, in which each row contains the\nIDs of the num_true target_classes in the corresponding original label."
     type: DT_INT64
   }
   output_arg {
     name: "sampled_candidates"
-    description: "A vector of length num_sampled, in which each element is\nthe ID of a sampled candidate."
     type: DT_INT64
   }
   output_arg {
     name: "true_expected_count"
-    description: "A batch_size * num_true matrix, representing\nthe number of times each candidate is expected to occur in a batch\nof sampled candidates. If unique=true, then this is a probability."
     type: DT_FLOAT
   }
   output_arg {
     name: "sampled_expected_count"
-    description: "A vector of length num_sampled, for each sampled\ncandidate representing the number of times the candidate is expected\nto occur in a batch of sampled candidates.  If unique=true, then this is a\nprobability."
     type: DT_FLOAT
   }
   attr {
     name: "num_true"
     type: "int"
-    description: "Number of true labels per context."
     has_minimum: true
     minimum: 1
   }
   attr {
     name: "num_sampled"
     type: "int"
-    description: "Number of candidates to randomly sample."
     has_minimum: true
     minimum: 1
   }
   attr {
     name: "unique"
     type: "bool"
-    description: "If unique is true, we sample with rejection, so that all sampled\ncandidates in a batch are unique. This requires some approximation to\nestimate the post-rejection sampling probabilities."
   }
   attr {
     name: "range_max"
     type: "int"
-    description: "The sampler will sample integers from the interval [0, range_max)."
     has_minimum: true
     minimum: 1
   }
@@ -9167,7 +8572,6 @@ op {
     default_value {
       s: ""
     }
-    description: "Each valid line in this file (which should have a CSV-like format)\ncorresponds to a valid word ID. IDs are in sequential order, starting from\nnum_reserved_ids. The last entry in each line is expected to be a value\ncorresponding to the count or relative probability. Exactly one of vocab_file\nand unigrams needs to be passed to this op."
   }
   attr {
     name: "distortion"
@@ -9175,7 +8579,6 @@ op {
     default_value {
       f: 1
     }
-    description: "The distortion is used to skew the unigram probability distribution.\nEach weight is first raised to the distortion\'s power before adding to the\ninternal unigram distribution. As a result, distortion = 1.0 gives regular\nunigram sampling (as defined by the vocab file), and distortion = 0.0 gives\na uniform distribution."
   }
   attr {
     name: "num_reserved_ids"
@@ -9183,7 +8586,6 @@ op {
     default_value {
       i: 0
     }
-    description: "Optionally some reserved IDs can be added in the range [0,\n..., num_reserved_ids) by the users. One use case is that a special unknown\nword token is used as ID 0. These IDs will have a sampling probability of 0."
   }
   attr {
     name: "num_shards"
@@ -9191,7 +8593,6 @@ op {
     default_value {
       i: 1
     }
-    description: "A sampler can be used to sample from a subset of the original range\nin order to speed up the whole computation through parallelism. This parameter\n(together with \'shard\') indicates the number of partitions that are being\nused in the overall computation."
     has_minimum: true
     minimum: 1
   }
@@ -9201,7 +8602,6 @@ op {
     default_value {
       i: 0
     }
-    description: "A sampler can be used to sample from a subset of the original range\nin order to speed up the whole computation through parallelism. This parameter\n(together with \'num_shards\') indicates the particular partition number of a\nsampler op, when partitioning is being used."
     has_minimum: true
   }
   attr {
@@ -9211,7 +8611,6 @@ op {
       list {
       }
     }
-    description: "A list of unigram counts or probabilities, one per ID in sequential\norder. Exactly one of vocab_file and unigrams should be passed to this op."
   }
   attr {
     name: "seed"
@@ -9219,7 +8618,6 @@ op {
     default_value {
       i: 0
     }
-    description: "If either seed or seed2 are set to be non-zero, the random number\ngenerator is seeded by the given seed.  Otherwise, it is seeded by a\nrandom seed."
   }
   attr {
     name: "seed2"
@@ -9227,10 +8625,7 @@ op {
     default_value {
       i: 0
     }
-    description: "An second seed to avoid seed collision."
   }
-  summary: "Generates labels for candidate sampling with a learned unigram distribution."
-  description: "A unigram sampler could use a fixed unigram distribution read from a\nfile or passed in as an in-memory array instead of building up the distribution\nfrom data on the fly. There is also an option to skew the distribution by\napplying a distortion power to the weights.\n\nThe vocabulary file should be in CSV-like format, with the last field\nbeing the weight associated with the word.\n\nFor each batch, this op picks a single set of sampled candidate labels.\n\nThe advantages of sampling candidates per-batch are simplicity and the\npossibility of efficient dense matrix multiplication. The disadvantage is that\nthe sampled candidates must be chosen independently of the context and of the\ntrue labels."
   is_stateful: true
 }
 op {
@@ -9250,7 +8645,6 @@ op {
   attr {
     name: "f"
     type: "func"
-    description: "A function mapping elements of `input_dataset`, concatenated with\n`other_arguments`, to a Dataset variant that contains elements matching\n`output_types` and `output_shapes`."
   }
   attr {
     name: "Targuments"
@@ -9269,8 +8663,6 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Creates a dataset that applies `f` to the outputs of `input_dataset`."
-  description: "Unlike MapDataset, the `f` in FlatMapDataset is expected to return a\nDataset variant, and FlatMapDataset will flatten successive results\ninto a single Dataset."
 }
 op {
   name: "Floor"
@@ -9288,12 +8680,12 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Returns element-wise largest integer not greater than x."
 }
 op {
   name: "FloorDiv"
@@ -9315,6 +8707,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_UINT8
@@ -9328,8 +8721,6 @@ op {
       }
     }
   }
-  summary: "Returns x // y element-wise."
-  description: "*NOTE*: `FloorDiv` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
 }
 op {
   name: "FloorMod"
@@ -9352,40 +8743,34 @@ op {
       list {
         type: DT_INT32
         type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Returns element-wise remainder of division. When `x < 0` xor `y < 0` is"
-  description: "true, this follows Python semantics in that the result here is consistent\nwith a flooring divide. E.g. `floor(x / y) * y + mod(x, y) = x`.\n\n*NOTE*: `FloorMod` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
 }
 op {
   name: "FractionalAvgPool"
   input_arg {
     name: "value"
-    description: "4-D with shape `[batch, height, width, channels]`."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "output tensor after fractional avg pooling."
     type_attr: "T"
   }
   output_arg {
     name: "row_pooling_sequence"
-    description: "row pooling sequence, needed to calculate gradient."
     type: DT_INT64
   }
   output_arg {
     name: "col_pooling_sequence"
-    description: "column pooling sequence, needed to calculate gradient."
     type: DT_INT64
   }
   attr {
     name: "pooling_ratio"
     type: "list(float)"
-    description: "Pooling ratio for each dimension of `value`, currently only\nsupports row and col dimension and should be >= 1.0. For example, a valid\npooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements\nmust be 1.0 because we don\'t allow pooling on batch and channels\ndimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions\nrespectively."
     has_minimum: true
     minimum: 4
   }
@@ -9395,7 +8780,6 @@ op {
     default_value {
       b: false
     }
-    description: "When set to True, generates the pooling sequence in a\npseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin\nGraham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) for\ndifference between pseudorandom and random."
   }
   attr {
     name: "overlapping"
@@ -9403,7 +8787,6 @@ op {
     default_value {
       b: false
     }
-    description: "When set to True, it means when pooling, the values at the boundary\nof adjacent pooling cells are used by both cells. For example:\n\n`index  0  1  2  3  4`\n\n`value  20 5  16 3  7`\n\nIf the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.\nThe result would be [41/3, 26/3] for fractional avg pooling."
   }
   attr {
     name: "deterministic"
@@ -9411,7 +8794,6 @@ op {
     default_value {
       b: false
     }
-    description: "When set to True, a fixed pooling region will be used when\niterating over a FractionalAvgPool node in the computation graph. Mainly used\nin unit test to make FractionalAvgPool deterministic."
   }
   attr {
     name: "seed"
@@ -9419,7 +8801,6 @@ op {
     default_value {
       i: 0
     }
-    description: "If either seed or seed2 are set to be non-zero, the random number\ngenerator is seeded by the given seed.  Otherwise, it is seeded by a\nrandom seed."
   }
   attr {
     name: "seed2"
@@ -9427,7 +8808,6 @@ op {
     default_value {
       i: 0
     }
-    description: "An second seed to avoid seed collision."
   }
   attr {
     name: "T"
@@ -9441,34 +8821,27 @@ op {
       }
     }
   }
-  summary: "Performs fractional average pooling on the input."
-  description: "Fractional average pooling is similar to Fractional max pooling in the pooling\nregion generation step. The only difference is that after pooling regions are\ngenerated, a mean operation is performed instead of a max operation in each\npooling region."
 }
 op {
   name: "FractionalAvgPoolGrad"
   input_arg {
     name: "orig_input_tensor_shape"
-    description: "Original input tensor shape for `fractional_avg_pool`"
     type: DT_INT64
   }
   input_arg {
     name: "out_backprop"
-    description: "4-D with shape `[batch, height, width, channels]`.  Gradients\nw.r.t. the output of `fractional_avg_pool`."
     type_attr: "T"
   }
   input_arg {
     name: "row_pooling_sequence"
-    description: "row pooling sequence, form pooling region with\ncol_pooling_sequence."
     type: DT_INT64
   }
   input_arg {
     name: "col_pooling_sequence"
-    description: "column pooling sequence, form pooling region with\nrow_pooling sequence."
     type: DT_INT64
   }
   output_arg {
     name: "output"
-    description: "4-D.  Gradients w.r.t. the input of `fractional_avg_pool`."
     type_attr: "T"
   }
   attr {
@@ -9477,7 +8850,6 @@ op {
     default_value {
       b: false
     }
-    description: "When set to True, it means when pooling, the values at the boundary\nof adjacent pooling cells are used by both cells. For example:\n\n`index  0  1  2  3  4`\n\n`value  20 5  16 3  7`\n\nIf the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.\nThe result would be [41/3, 26/3] for fractional avg pooling."
   }
   attr {
     name: "T"
@@ -9491,35 +8863,28 @@ op {
       }
     }
   }
-  summary: "Computes gradient of the FractionalAvgPool function."
-  description: "Unlike FractionalMaxPoolGrad, we don\'t need to find arg_max for\nFractionalAvgPoolGrad, we just need to evenly back-propagate each element of\nout_backprop to those indices that form the same pooling cell. Therefore, we\njust need to know the shape of original input tensor, instead of the whole\ntensor."
 }
 op {
   name: "FractionalMaxPool"
   input_arg {
     name: "value"
-    description: "4-D with shape `[batch, height, width, channels]`."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "output tensor after fractional max pooling."
     type_attr: "T"
   }
   output_arg {
     name: "row_pooling_sequence"
-    description: "row pooling sequence, needed to calculate gradient."
     type: DT_INT64
   }
   output_arg {
     name: "col_pooling_sequence"
-    description: "column pooling sequence, needed to calculate gradient."
     type: DT_INT64
   }
   attr {
     name: "pooling_ratio"
     type: "list(float)"
-    description: "Pooling ratio for each dimension of `value`, currently only\nsupports row and col dimension and should be >= 1.0. For example, a valid\npooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements\nmust be 1.0 because we don\'t allow pooling on batch and channels\ndimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions\nrespectively."
     has_minimum: true
     minimum: 4
   }
@@ -9529,7 +8894,6 @@ op {
     default_value {
       b: false
     }
-    description: "When set to True, generates the pooling sequence in a\npseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin\nGraham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) for\ndifference between pseudorandom and random."
   }
   attr {
     name: "overlapping"
@@ -9537,7 +8901,6 @@ op {
     default_value {
       b: false
     }
-    description: "When set to True, it means when pooling, the values at the boundary\nof adjacent pooling cells are used by both cells. For example:\n\n`index  0  1  2  3  4`\n\n`value  20 5  16 3  7`\n\nIf the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.\nThe result would be [20, 16] for fractional max pooling."
   }
   attr {
     name: "deterministic"
@@ -9545,7 +8908,6 @@ op {
     default_value {
       b: false
     }
-    description: "When set to True, a fixed pooling region will be used when\niterating over a FractionalMaxPool node in the computation graph. Mainly used\nin unit test to make FractionalMaxPool deterministic."
   }
   attr {
     name: "seed"
@@ -9553,7 +8915,6 @@ op {
     default_value {
       i: 0
     }
-    description: "If either seed or seed2 are set to be non-zero, the random number\ngenerator is seeded by the given seed.  Otherwise, it is seeded by a\nrandom seed."
   }
   attr {
     name: "seed2"
@@ -9561,7 +8922,6 @@ op {
     default_value {
       i: 0
     }
-    description: "An second seed to avoid seed collision."
   }
   attr {
     name: "T"
@@ -9575,39 +8935,31 @@ op {
       }
     }
   }
-  summary: "Performs fractional max pooling on the input."
-  description: "Fractional max pooling is slightly different than regular max pooling.  In\nregular max pooling, you downsize an input set by taking the maximum value of\nsmaller N x N subsections of the set (often 2x2), and try to reduce the set by\na factor of N, where N is an integer.  Fractional max pooling, as you might\nexpect from the word \"fractional\", means that the overall reduction ratio N\ndoes not have to be an integer.\n\nThe sizes of the pooling regions are generated randomly but are fairly uniform.\nFor example, let\'s look at the height dimension, and the constraints on the\nlist of rows that will be pool boundaries.\n\nFirst we define the following:\n\n1.  input_row_length : the number of rows from the input set\n2.  output_row_length : which will be smaller than the input\n3.  alpha = input_row_length / output_row_length : our reduction ratio\n4.  K = floor(alpha)\n5.  row_pooling_sequence : this is the result list of pool boundary rows\n\nThen, row_pooling_sequence should satisfy:\n\n1.  a[0] = 0 : the first value of the sequence is 0\n2.  a[end] = input_row_length : the last value of the sequence is the size\n3.  K <= (a[i+1] - a[i]) <= K+1 : all intervals are K or K+1 size\n4.  length(row_pooling_sequence) = output_row_length+1\n\nFor more details on fractional max pooling, see this paper:\n[Benjamin Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071)"
 }
 op {
   name: "FractionalMaxPoolGrad"
   input_arg {
     name: "orig_input"
-    description: "Original input for `fractional_max_pool`"
     type_attr: "T"
   }
   input_arg {
     name: "orig_output"
-    description: "Original output for `fractional_max_pool`"
     type_attr: "T"
   }
   input_arg {
     name: "out_backprop"
-    description: "4-D with shape `[batch, height, width, channels]`.  Gradients\nw.r.t. the output of `fractional_max_pool`."
     type_attr: "T"
   }
   input_arg {
     name: "row_pooling_sequence"
-    description: "row pooling sequence, form pooling region with\ncol_pooling_sequence."
     type: DT_INT64
   }
   input_arg {
     name: "col_pooling_sequence"
-    description: "column pooling sequence, form pooling region with\nrow_pooling sequence."
     type: DT_INT64
   }
   output_arg {
     name: "output"
-    description: "4-D.  Gradients w.r.t. the input of `fractional_max_pool`."
     type_attr: "T"
   }
   attr {
@@ -9616,7 +8968,6 @@ op {
     default_value {
       b: false
     }
-    description: "When set to True, it means when pooling, the values at the boundary\nof adjacent pooling cells are used by both cells. For example:\n\n`index  0  1  2  3  4`\n\n`value  20 5  16 3  7`\n\nIf the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.\nThe result would be [20, 16] for fractional max pooling."
   }
   attr {
     name: "T"
@@ -9630,64 +8981,52 @@ op {
       }
     }
   }
-  summary: "Computes gradient of the FractionalMaxPool function."
 }
 op {
   name: "FusedBatchNorm"
   input_arg {
     name: "x"
-    description: "A 4D Tensor for input data."
     type_attr: "T"
   }
   input_arg {
     name: "scale"
-    description: "A 1D Tensor for scaling factor, to scale the normalized x."
     type_attr: "T"
   }
   input_arg {
     name: "offset"
-    description: "A 1D Tensor for offset, to shift to the normalized x."
     type_attr: "T"
   }
   input_arg {
     name: "mean"
-    description: "A 1D Tensor for population mean. Used for inference only;\nmust be empty for training."
     type_attr: "T"
   }
   input_arg {
     name: "variance"
-    description: "A 1D Tensor for population variance. Used for inference only;\nmust be empty for training."
     type_attr: "T"
   }
   output_arg {
     name: "y"
-    description: "A 4D Tensor for output data."
     type_attr: "T"
   }
   output_arg {
     name: "batch_mean"
-    description: "A 1D Tensor for the computed batch mean, to be used by TensorFlow\nto compute the running mean."
     type_attr: "T"
   }
   output_arg {
     name: "batch_variance"
-    description: "A 1D Tensor for the computed batch variance, to be used by\nTensorFlow to compute the running variance."
     type_attr: "T"
   }
   output_arg {
     name: "reserve_space_1"
-    description: "A 1D Tensor for the computed batch mean, to be reused\nin the gradient computation."
     type_attr: "T"
   }
   output_arg {
     name: "reserve_space_2"
-    description: "A 1D Tensor for the computed batch variance (inverted variance\nin the cuDNN case), to be reused in the gradient computation."
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    description: "The data type for the elements of input and output Tensors."
     allowed_values {
       list {
         type: DT_FLOAT
@@ -9700,7 +9039,6 @@ op {
     default_value {
       f: 0.0001
     }
-    description: "A small float number added to the variance of x."
   }
   attr {
     name: "data_format"
@@ -9708,7 +9046,6 @@ op {
     default_value {
       s: "NHWC"
     }
-    description: "The data format for x and y. Either \"NHWC\" (default) or \"NCHW\"."
   }
   attr {
     name: "is_training"
@@ -9716,67 +9053,53 @@ op {
     default_value {
       b: true
     }
-    description: "A bool value to indicate the operation is for training (default)\nor inference."
   }
-  summary: "Batch normalization."
-  description: "Note that the size of 4D Tensors are defined by either \"NHWC\" or \"NCHW\".\nThe size of 1D Tensors matches the dimension C of the 4D Tensors."
 }
 op {
   name: "FusedBatchNormGrad"
   input_arg {
     name: "y_backprop"
-    description: "A 4D Tensor for the gradient with respect to y."
     type_attr: "T"
   }
   input_arg {
     name: "x"
-    description: "A 4D Tensor for input data."
     type_attr: "T"
   }
   input_arg {
     name: "scale"
-    description: "A 1D Tensor for scaling factor, to scale the normalized x."
     type_attr: "T"
   }
   input_arg {
     name: "reserve_space_1"
-    description: "When is_training is True, a 1D Tensor for the computed batch\nmean to be reused in gradient computation. When is_training is\nFalse, a 1D Tensor for the population mean to be reused in both\n1st and 2nd order gradient computation."
     type_attr: "T"
   }
   input_arg {
     name: "reserve_space_2"
-    description: "When is_training is True, a 1D Tensor for the computed batch\nvariance (inverted variance in the cuDNN case) to be reused in\ngradient computation. When is_training is False, a 1D Tensor\nfor the population variance to be reused in both 1st and 2nd\norder gradient computation."
     type_attr: "T"
   }
   output_arg {
     name: "x_backprop"
-    description: "A 4D Tensor for the gradient with respect to x."
     type_attr: "T"
   }
   output_arg {
     name: "scale_backprop"
-    description: "A 1D Tensor for the gradient with respect to scale."
     type_attr: "T"
   }
   output_arg {
     name: "offset_backprop"
-    description: "A 1D Tensor for the gradient with respect to offset."
     type_attr: "T"
   }
   output_arg {
     name: "reserve_space_3"
-    description: "Unused placeholder to match the mean input in FusedBatchNorm."
     type_attr: "T"
   }
   output_arg {
     name: "reserve_space_4"
-    description: "Unused placeholder to match the variance input\nin FusedBatchNorm."
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
-    description: "The data type for the elements of input and output Tensors."
     allowed_values {
       list {
         type: DT_FLOAT
@@ -9789,7 +9112,6 @@ op {
     default_value {
       f: 0.0001
     }
-    description: "A small float number added to the variance of x."
   }
   attr {
     name: "data_format"
@@ -9797,7 +9119,6 @@ op {
     default_value {
       s: "NHWC"
     }
-    description: "The data format for y_backprop, x, x_backprop.\nEither \"NHWC\" (default) or \"NCHW\"."
   }
   attr {
     name: "is_training"
@@ -9805,70 +9126,57 @@ op {
     default_value {
       b: true
     }
-    description: "A bool value to indicate the operation is for training (default)\nor inference."
   }
-  summary: "Gradient for batch normalization."
-  description: "Note that the size of 4D Tensors are defined by either \"NHWC\" or \"NCHW\".\nThe size of 1D Tensors matches the dimension C of the 4D Tensors."
 }
 op {
   name: "FusedBatchNormGradV2"
   input_arg {
     name: "y_backprop"
-    description: "A 4D Tensor for the gradient with respect to y."
     type_attr: "T"
   }
   input_arg {
     name: "x"
-    description: "A 4D Tensor for input data."
     type_attr: "T"
   }
   input_arg {
     name: "scale"
-    description: "A 1D Tensor for scaling factor, to scale the normalized x."
     type: DT_FLOAT
   }
   input_arg {
     name: "reserve_space_1"
-    description: "When is_training is True, a 1D Tensor for the computed batch\nmean to be reused in gradient computation. When is_training is\nFalse, a 1D Tensor for the population mean to be reused in both\n1st and 2nd order gradient computation."
     type_attr: "U"
   }
   input_arg {
     name: "reserve_space_2"
-    description: "When is_training is True, a 1D Tensor for the computed batch\nvariance (inverted variance in the cuDNN case) to be reused in\ngradient computation. When is_training is False, a 1D Tensor\nfor the population variance to be reused in both 1st and 2nd\norder gradient computation."
     type_attr: "U"
   }
   output_arg {
     name: "x_backprop"
-    description: "A 4D Tensor for the gradient with respect to x."
     type_attr: "T"
   }
   output_arg {
     name: "scale_backprop"
-    description: "A 1D Tensor for the gradient with respect to scale."
     type_attr: "U"
   }
   output_arg {
     name: "offset_backprop"
-    description: "A 1D Tensor for the gradient with respect to offset."
     type_attr: "U"
   }
   output_arg {
     name: "reserve_space_3"
-    description: "Unused placeholder to match the mean input in FusedBatchNorm."
     type_attr: "U"
   }
   output_arg {
     name: "reserve_space_4"
-    description: "Unused placeholder to match the variance input\nin FusedBatchNorm."
     type_attr: "U"
   }
   attr {
     name: "T"
     type: "type"
-    description: "The data type for the elements of input and output Tensors."
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
       }
     }
@@ -9876,7 +9184,6 @@ op {
   attr {
     name: "U"
     type: "type"
-    description: "The data type for the scale, offset, mean, and variance."
     allowed_values {
       list {
         type: DT_FLOAT
@@ -9889,7 +9196,6 @@ op {
     default_value {
       f: 0.0001
     }
-    description: "A small float number added to the variance of x."
   }
   attr {
     name: "data_format"
@@ -9897,7 +9203,6 @@ op {
     default_value {
       s: "NHWC"
     }
-    description: "The data format for y_backprop, x, x_backprop.\nEither \"NHWC\" (default) or \"NCHW\"."
   }
   attr {
     name: "is_training"
@@ -9905,70 +9210,57 @@ op {
     default_value {
       b: true
     }
-    description: "A bool value to indicate the operation is for training (default)\nor inference."
   }
-  summary: "Gradient for batch normalization."
-  description: "Note that the size of 4D Tensors are defined by either \"NHWC\" or \"NCHW\".\nThe size of 1D Tensors matches the dimension C of the 4D Tensors."
 }
 op {
   name: "FusedBatchNormV2"
   input_arg {
     name: "x"
-    description: "A 4D Tensor for input data."
     type_attr: "T"
   }
   input_arg {
     name: "scale"
-    description: "A 1D Tensor for scaling factor, to scale the normalized x."
     type_attr: "U"
   }
   input_arg {
     name: "offset"
-    description: "A 1D Tensor for offset, to shift to the normalized x."
     type_attr: "U"
   }
   input_arg {
     name: "mean"
-    description: "A 1D Tensor for population mean. Used for inference only;\nmust be empty for training."
     type_attr: "U"
   }
   input_arg {
     name: "variance"
-    description: "A 1D Tensor for population variance. Used for inference only;\nmust be empty for training."
     type_attr: "U"
   }
   output_arg {
     name: "y"
-    description: "A 4D Tensor for output data."
     type_attr: "T"
   }
   output_arg {
     name: "batch_mean"
-    description: "A 1D Tensor for the computed batch mean, to be used by TensorFlow\nto compute the running mean."
     type_attr: "U"
   }
   output_arg {
     name: "batch_variance"
-    description: "A 1D Tensor for the computed batch variance, to be used by\nTensorFlow to compute the running variance."
     type_attr: "U"
   }
   output_arg {
     name: "reserve_space_1"
-    description: "A 1D Tensor for the computed batch mean, to be reused\nin the gradient computation."
     type_attr: "U"
   }
   output_arg {
     name: "reserve_space_2"
-    description: "A 1D Tensor for the computed batch variance (inverted variance\nin the cuDNN case), to be reused in the gradient computation."
     type_attr: "U"
   }
   attr {
     name: "T"
     type: "type"
-    description: "The data type for the elements of input and output Tensors."
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
       }
     }
@@ -9976,7 +9268,6 @@ op {
   attr {
     name: "U"
     type: "type"
-    description: "The data type for the scale, offset, mean, and variance."
     allowed_values {
       list {
         type: DT_FLOAT
@@ -9989,7 +9280,6 @@ op {
     default_value {
       f: 0.0001
     }
-    description: "A small float number added to the variance of x."
   }
   attr {
     name: "data_format"
@@ -9997,7 +9287,6 @@ op {
     default_value {
       s: "NHWC"
     }
-    description: "The data format for x and y. Either \"NHWC\" (default) or \"NCHW\"."
   }
   attr {
     name: "is_training"
@@ -10005,26 +9294,20 @@ op {
     default_value {
       b: true
     }
-    description: "A bool value to indicate the operation is for training (default)\nor inference."
   }
-  summary: "Batch normalization."
-  description: "Note that the size of 4D Tensors are defined by either \"NHWC\" or \"NCHW\".\nThe size of 1D Tensors matches the dimension C of the 4D Tensors."
 }
 op {
   name: "FusedPadConv2D"
   input_arg {
     name: "input"
-    description: "4-D with shape `[batch, in_height, in_width, in_channels]`."
     type_attr: "T"
   }
   input_arg {
     name: "paddings"
-    description: "A two-column matrix specifying the padding sizes. The number of\nrows must be the same as the rank of `input`."
     type: DT_INT32
   }
   input_arg {
     name: "filter"
-    description: "4-D with shape\n`[filter_height, filter_width, in_channels, out_channels]`."
     type_attr: "T"
   }
   output_arg {
@@ -10053,12 +9336,10 @@ op {
   attr {
     name: "strides"
     type: "list(int)"
-    description: "1-D of length 4.  The stride of the sliding window for each dimension\nof `input`. Must be in the same order as the dimension specified with format."
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -10066,29 +9347,23 @@ op {
       }
     }
   }
-  summary: "Performs a padding as a preprocess during a convolution."
-  description: "Similar to FusedResizeAndPadConv2d, this op allows for an optimized\nimplementation where the spatial padding transformation stage is fused with the\nim2col lookup, but in this case without the bilinear filtering required for\nresizing. Fusing the padding prevents the need to write out the intermediate\nresults as whole tensors, reducing memory pressure, and we can get some latency\ngains by merging the transformation calculations.\nThe data_format attribute for Conv2D isn\'t supported by this op, and \'NHWC\'\norder is used instead.\nInternally this op uses a single per-graph scratch buffer, which means that it\nwill block if multiple versions are being run in parallel. This is because this\noperator is primarily an optimization to minimize memory usage."
 }
 op {
   name: "FusedResizeAndPadConv2D"
   input_arg {
     name: "input"
-    description: "4-D with shape `[batch, in_height, in_width, in_channels]`."
     type_attr: "T"
   }
   input_arg {
     name: "size"
-    description: "A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The\nnew size for the images."
     type: DT_INT32
   }
   input_arg {
     name: "paddings"
-    description: "A two-column matrix specifying the padding sizes. The number of\nrows must be the same as the rank of `input`."
     type: DT_INT32
   }
   input_arg {
     name: "filter"
-    description: "4-D with shape\n`[filter_height, filter_width, in_channels, out_channels]`."
     type_attr: "T"
   }
   output_arg {
@@ -10110,7 +9385,6 @@ op {
     default_value {
       b: false
     }
-    description: "If true, rescale input by (new_height - 1) / (height - 1),\nwhich exactly aligns the 4 corners of images and resized images. If false, rescale\nby new_height / height. Treat similarly the width dimension."
   }
   attr {
     name: "mode"
@@ -10125,12 +9399,10 @@ op {
   attr {
     name: "strides"
     type: "list(int)"
-    description: "1-D of length 4.  The stride of the sliding window for each dimension\nof `input`. Must be in the same order as the dimension specified with format."
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -10138,8 +9410,6 @@ op {
       }
     }
   }
-  summary: "Performs a resize and padding as a preprocess during a convolution."
-  description: "It\'s often possible to do spatial transformations more efficiently as part of\nthe packing stage of a convolution, so this op allows for an optimized\nimplementation where these stages are fused together. This prevents the need to\nwrite out the intermediate results as whole tensors, reducing memory pressure,\nand we can get some latency gains by merging the transformation calculations.\nThe data_format attribute for Conv2D isn\'t supported by this op, and defaults to\n\'NHWC\' order.\nInternally this op uses a single per-graph scratch buffer, which means that it\nwill block if multiple versions are being run in parallel. This is because this\noperator is primarily an optimization to minimize memory usage."
 }
 op {
   name: "Gather"
@@ -10176,24 +9446,19 @@ op {
       }
     }
   }
-  summary: "Gather slices from `params` according to `indices`."
-  description: "`indices` must be an integer tensor of any dimension (usually 0-D or 1-D).\nProduces an output tensor with shape `indices.shape + params.shape[1:]` where:\n\n```python\n    # Scalar indices\n    output[:, ..., :] = params[indices, :, ... :]\n\n    # Vector indices\n    output[i, :, ..., :] = params[indices[i], :, ... :]\n\n    # Higher rank indices\n    output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :]\n```\n\nIf `indices` is a permutation and `len(indices) == params.shape[0]` then\nthis operation will permute `params` accordingly.\n\n`validate_indices`: DEPRECATED. If this operation is assigned to CPU, values in\n`indices` are always validated to be within range. If assigned to GPU,\nout-of-bound indices result in safe but unspecified behavior, which may include\nraising an error.\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\"https://www.tensorflow.org/images/Gather.png\" alt>\n</div>"
 }
 op {
   name: "GatherNd"
   input_arg {
     name: "params"
-    description: "The tensor from which to gather values."
     type_attr: "Tparams"
   }
   input_arg {
     name: "indices"
-    description: "Index tensor."
     type_attr: "Tindices"
   }
   output_arg {
     name: "output"
-    description: "Values from `params` gathered from indices given by `indices`, with\nshape `indices.shape[:-1] + params.shape[indices.shape[-1]:]`."
     type_attr: "Tparams"
   }
   attr {
@@ -10210,29 +9475,23 @@ op {
       }
     }
   }
-  summary: "Gather slices from `params` into a Tensor with shape specified by `indices`."
-  description: "`indices` is an K-dimensional integer tensor, best thought of as a\n(K-1)-dimensional tensor of indices into `params`, where each element defines a\nslice of `params`:\n\n    output[i_0, ..., i_{K-2}] = params[indices[i0, ..., i_{K-2}]]\n\nWhereas in @{tf.gather} `indices` defines slices into the first\ndimension of `params`, in `tf.gather_nd`, `indices` defines slices into the\nfirst `N` dimensions of `params`, where `N = indices.shape[-1]`.\n\nThe last dimension of `indices` can be at most the rank of\n`params`:\n\n    indices.shape[-1] <= params.rank\n\nThe last dimension of `indices` corresponds to elements\n(if `indices.shape[-1] == params.rank`) or slices\n(if `indices.shape[-1] < params.rank`) along dimension `indices.shape[-1]`\nof `params`.  The output tensor has shape\n\n    indices.shape[:-1] + params.shape[indices.shape[-1]:]\n\nSome examples below.\n\nSimple indexing into a matrix:\n\n```python\n    indices = [[0, 0], [1, 1]]\n    params = [[\'a\', \'b\'], [\'c\', \'d\']]\n    output = [\'a\', \'d\']\n```\n\nSlice indexing into a matrix:\n\n```python\n    indices = [[1], [0]]\n    params = [[\'a\', \'b\'], [\'c\', \'d\']]\n    output = [[\'c\', \'d\'], [\'a\', \'b\']]\n```\n\nIndexing into a 3-tensor:\n\n```python\n    indices = [[1]]\n    params = [[[\'a0\', \'b0\'], [\'c0\', \'d0\']],\n              [[\'a1\', \'b1\'], [\'c1\', \'d1\']]]\n    output = [[[\'a1\', \'b1\'], [\'c1\', \'d1\']]]\n\n\n    indices = [[0, 1], [1, 0]]\n    params = [[[\'a0\', \'b0\'], [\'c0\', \'d0\']],\n              [[\'a1\', \'b1\'], [\'c1\', \'d1\']]]\n    output = [[\'c0\', \'d0\'], [\'a1\', \'b1\']]\n\n\n    indices = [[0, 0, 1], [1, 0, 1]]\n    params = [[[\'a0\', \'b0\'], [\'c0\', \'d0\']],\n              [[\'a1\', \'b1\'], [\'c1\', \'d1\']]]\n    output = [\'b0\', \'b1\']\n```\n\nBatched indexing into a matrix:\n\n```python\n    indices = [[[0, 0]], [[0, 1]]]\n    params = [[\'a\', \'b\'], [\'c\', \'d\']]\n    output = [[\'a\'], [\'b\']]\n```\n\nBatched slice indexing into a matrix:\n\n```python\n    indices = [[[1]], [[0]]]\n    params = [[\'a\', \'b\'], [\'c\', \'d\']]\n    output = [[[\'c\', \'d\']], [[\'a\', \'b\']]]\n```\n\nBatched indexing into a 3-tensor:\n\n```python\n    indices = [[[1]], [[0]]]\n    params = [[[\'a0\', \'b0\'], [\'c0\', \'d0\']],\n              [[\'a1\', \'b1\'], [\'c1\', \'d1\']]]\n    output = [[[[\'a1\', \'b1\'], [\'c1\', \'d1\']]],\n              [[[\'a0\', \'b0\'], [\'c0\', \'d0\']]]]\n\n    indices = [[[0, 1], [1, 0]], [[0, 0], [1, 1]]]\n    params = [[[\'a0\', \'b0\'], [\'c0\', \'d0\']],\n              [[\'a1\', \'b1\'], [\'c1\', \'d1\']]]\n    output = [[[\'c0\', \'d0\'], [\'a1\', \'b1\']],\n              [[\'a0\', \'b0\'], [\'c1\', \'d1\']]]\n\n\n    indices = [[[0, 0, 1], [1, 0, 1]], [[0, 1, 1], [1, 1, 0]]]\n    params = [[[\'a0\', \'b0\'], [\'c0\', \'d0\']],\n              [[\'a1\', \'b1\'], [\'c1\', \'d1\']]]\n    output = [[\'b0\', \'b1\'], [\'d0\', \'c1\']]\n```"
 }
 op {
   name: "GatherV2"
   input_arg {
     name: "params"
-    description: "The tensor from which to gather values. Must be at least rank\n`axis + 1`."
     type_attr: "Tparams"
   }
   input_arg {
     name: "indices"
-    description: "Index tensor. Must be in range `[0, params.shape[axis])`."
     type_attr: "Tindices"
   }
   input_arg {
     name: "axis"
-    description: "The axis in `params` to gather `indices` from. Defaults to the first\ndimension. Supports negative indexes."
     type_attr: "Taxis"
   }
   output_arg {
     name: "output"
-    description: "Values from `params` gathered from indices given by `indices`, with\nshape `params.shape[:axis] + indices.shape + params.shape[axis + 1:]`."
     type_attr: "Tparams"
   }
   attr {
@@ -10259,41 +9518,33 @@ op {
       }
     }
   }
-  summary: "Gather slices from `params` axis `axis` according to `indices`."
-  description: "`indices` must be an integer tensor of any dimension (usually 0-D or 1-D).\nProduces an output tensor with shape `params.shape[:axis] + indices.shape +\nparams.shape[axis + 1:]` where:\n\n```python\n    # Scalar indices (output is rank(params) - 1).\n    output[a_0, ..., a_n, b_0, ..., b_n] =\n      params[a_0, ..., a_n, indices, b_0, ..., b_n]\n\n    # Vector indices (output is rank(params)).\n    output[a_0, ..., a_n, i, b_0, ..., b_n] =\n      params[a_0, ..., a_n, indices[i], b_0, ..., b_n]\n\n    # Higher rank indices (output is rank(params) + rank(indices) - 1).\n    output[a_0, ..., a_n, i, ..., j, b_0, ... b_n] =\n      params[a_0, ..., a_n, indices[i, ..., j], b_0, ..., b_n]\n```\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\"https://www.tensorflow.org/images/Gather.png\" alt>\n</div>"
 }
 op {
   name: "GenerateVocabRemapping"
   input_arg {
     name: "new_vocab_file"
-    description: "Path to the new vocab file."
     type: DT_STRING
   }
   input_arg {
     name: "old_vocab_file"
-    description: "Path to the old vocab file."
     type: DT_STRING
   }
   output_arg {
     name: "remapping"
-    description: "A Tensor of length num_new_vocab where the element at index i\nis equal to the old ID that maps to the new ID i.  This element is -1 for any\nnew ID that is not found in the old vocabulary."
     type: DT_INT64
   }
   output_arg {
     name: "num_present"
-    description: "Number of new vocab entries found in old vocab."
     type: DT_INT32
   }
   attr {
     name: "new_vocab_offset"
     type: "int"
-    description: "How many entries into the new vocab file to start reading."
     has_minimum: true
   }
   attr {
     name: "num_new_vocab"
     type: "int"
-    description: "Number of entries in the new vocab file to remap."
     has_minimum: true
   }
   attr {
@@ -10302,68 +9553,57 @@ op {
     default_value {
       i: -1
     }
-    description: "Number of entries in the old vocab file to consider.  If -1,\nuse the entire old vocabulary."
     has_minimum: true
     minimum: -1
   }
-  summary: "Given a path to new and old vocabulary files, returns a remapping Tensor of"
-  description: "length `num_new_vocab`, where `remapping[i]` contains the row number in the old\nvocabulary that corresponds to row `i` in the new vocabulary (starting at line\n`new_vocab_offset` and up to `num_new_vocab` entities), or `-1` if entry `i`\nin the new vocabulary is not in the old vocabulary.  The old vocabulary is\nconstrained to the first `old_vocab_size` entries if `old_vocab_size` is not the\ndefault value of -1.\n\n`num_vocab_offset` enables\nuse in the partitioned variable case, and should generally be set through\nexamining partitioning info.  The format of the files should be a text file,\nwith each line containing a single entity within the vocabulary.\n\nFor example, with `new_vocab_file` a text file containing each of the following\nelements on a single line: `[f0, f1, f2, f3]`, old_vocab_file = [f1, f0, f3],\n`num_new_vocab = 3, new_vocab_offset = 1`, the returned remapping would be\n`[0, -1, 2]`.\n\nThe op also returns a count of how many entries in the new vocabulary\nwere present in the old vocabulary, which is used to calculate the number of\nvalues to initialize in a weight matrix remapping\n\nThis functionality can be used to remap both row vocabularies (typically,\nfeatures) and column vocabularies (typically, classes) from TensorFlow\ncheckpoints.  Note that the partitioning logic relies on contiguous vocabularies\ncorresponding to div-partitioned variables.  Moreover, the underlying remapping\nuses an IndexTable (as opposed to an inexact CuckooTable), so client code should\nuse the corresponding index_table_from_file() as the FeatureColumn framework\ndoes (as opposed to tf.feature_to_id(), which uses a CuckooTable)."
 }
 op {
   name: "GetSessionHandle"
   input_arg {
     name: "value"
-    description: "The tensor to be stored."
     type_attr: "T"
   }
   output_arg {
     name: "handle"
-    description: "The handle for the tensor stored in the session state, represented\nas a string."
     type: DT_STRING
   }
   attr {
     name: "T"
     type: "type"
   }
-  summary: "Store the input tensor in the state of the current session."
+  is_stateful: true
 }
 op {
   name: "GetSessionHandleV2"
   input_arg {
     name: "value"
-    description: "The tensor to be stored."
     type_attr: "T"
   }
   output_arg {
     name: "handle"
-    description: "The handle for the tensor stored in the session state, represented\nas a ResourceHandle object."
     type: DT_RESOURCE
   }
   attr {
     name: "T"
     type: "type"
   }
-  summary: "Store the input tensor in the state of the current session."
   is_stateful: true
 }
 op {
   name: "GetSessionTensor"
   input_arg {
     name: "handle"
-    description: "The handle for a tensor stored in the session state."
     type: DT_STRING
   }
   output_arg {
     name: "value"
-    description: "The tensor for the given handle."
     type_attr: "dtype"
   }
   attr {
     name: "dtype"
     type: "type"
-    description: "The type of the output value."
   }
-  summary: "Get the value of the tensor specified by its handle."
+  is_stateful: true
 }
 op {
   name: "Greater"
@@ -10387,10 +9627,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -10398,8 +9639,6 @@ op {
       }
     }
   }
-  summary: "Returns the truth value of (x > y) element-wise."
-  description: "*NOTE*: `Greater` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
 }
 op {
   name: "GreaterEqual"
@@ -10423,10 +9662,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -10434,8 +9674,6 @@ op {
       }
     }
   }
-  summary: "Returns the truth value of (x >= y) element-wise."
-  description: "*NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
 }
 op {
   name: "GroupByWindowDataset"
@@ -10462,7 +9700,6 @@ op {
   attr {
     name: "key_func"
     type: "func"
-    description: "A function mapping an element of `input_dataset`, concatenated\nwith `key_func_other_arguments` to a scalar value of type DT_INT64."
   }
   attr {
     name: "reduce_func"
@@ -10499,19 +9736,31 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Creates a dataset that computes a windowed group-by on `input_dataset`."
-  description: "// TODO(mrry): Support non-int64 keys."
+}
+op {
+  name: "GuaranteeConst"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  is_stateful: true
 }
 op {
   name: "HSVToRGB"
   input_arg {
     name: "images"
-    description: "1-D or higher rank. HSV data to convert. Last dimension must be size 3."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "`images` converted to RGB."
     type_attr: "T"
   }
   attr {
@@ -10522,19 +9771,18 @@ op {
     }
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Convert one or more images from HSV to RGB."
-  description: "Outputs a tensor of the same shape as the `images` tensor, containing the RGB\nvalue of the pixels. The output is only well defined if the value in `images`\nare in `[0,1]`.\n\nSee `rgb_to_hsv` for a description of the HSV encoding."
 }
 op {
   name: "HashTable"
   output_arg {
     name: "table_handle"
-    description: "Handle to a table."
     type: DT_STRING
     is_ref: true
   }
@@ -10544,7 +9792,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this table is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -10552,7 +9799,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this table is shared under the given name across\nmultiple sessions."
   }
   attr {
     name: "use_node_name_sharing"
@@ -10560,27 +9806,21 @@ op {
     default_value {
       b: false
     }
-    description: "If true and shared_name is empty, the table is shared\nusing the node name."
   }
   attr {
     name: "key_dtype"
     type: "type"
-    description: "Type of the table keys."
   }
   attr {
     name: "value_dtype"
     type: "type"
-    description: "Type of the table values."
   }
-  summary: "Creates a non-initialized hash table."
-  description: "This op creates a hash table, specifying the type of its keys and values.\nBefore using the table you will have to initialize it.  After initialization the\ntable will be immutable."
   is_stateful: true
 }
 op {
   name: "HashTableV2"
   output_arg {
     name: "table_handle"
-    description: "Handle to a table."
     type: DT_RESOURCE
   }
   attr {
@@ -10589,7 +9829,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this table is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -10597,7 +9836,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this table is shared under the given name across\nmultiple sessions."
   }
   attr {
     name: "use_node_name_sharing"
@@ -10605,42 +9843,33 @@ op {
     default_value {
       b: false
     }
-    description: "If true and shared_name is empty, the table is shared\nusing the node name."
   }
   attr {
     name: "key_dtype"
     type: "type"
-    description: "Type of the table keys."
   }
   attr {
     name: "value_dtype"
     type: "type"
-    description: "Type of the table values."
   }
-  summary: "Creates a non-initialized hash table."
-  description: "This op creates a hash table, specifying the type of its keys and values.\nBefore using the table you will have to initialize it.  After initialization the\ntable will be immutable."
   is_stateful: true
 }
 op {
   name: "HistogramFixedWidth"
   input_arg {
     name: "values"
-    description: "Numeric `Tensor`."
     type_attr: "T"
   }
   input_arg {
     name: "value_range"
-    description: "Shape [2] `Tensor` of same `dtype` as `values`.\nvalues <= value_range[0] will be mapped to hist[0],\nvalues >= value_range[1] will be mapped to hist[-1]."
     type_attr: "T"
   }
   input_arg {
     name: "nbins"
-    description: "Scalar `int32 Tensor`.  Number of histogram bins."
     type: DT_INT32
   }
   output_arg {
     name: "out"
-    description: "A 1-D `Tensor` holding histogram of values."
     type_attr: "dtype"
   }
   attr {
@@ -10668,24 +9897,19 @@ op {
       }
     }
   }
-  summary: "Return histogram of values."
-  description: "Given the tensor `values`, this operation returns a rank 1 histogram counting\nthe number of entries in `values` that fall into every bin.  The bins are\nequal width and determined by the arguments `value_range` and `nbins`.\n\n```python\n# Bins will be:  (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)\nnbins = 5\nvalue_range = [0.0, 5.0]\nnew_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]\n\nwith tf.get_default_session() as sess:\n  hist = tf.histogram_fixed_width(new_values, value_range, nbins=5)\n  variables.global_variables_initializer().run()\n  sess.run(hist) => [2, 1, 1, 0, 2]\n```"
 }
 op {
   name: "HistogramSummary"
   input_arg {
     name: "tag"
-    description: "Scalar.  Tag to use for the `Summary.Value`."
     type: DT_STRING
   }
   input_arg {
     name: "values"
-    description: "Any shape. Values to use to build the histogram."
     type_attr: "T"
   }
   output_arg {
     name: "summary"
-    description: "Scalar. Serialized `Summary` protocol buffer."
     type: DT_STRING
   }
   attr {
@@ -10699,10 +9923,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -10710,113 +9935,84 @@ op {
       }
     }
   }
-  summary: "Outputs a `Summary` protocol buffer with a histogram."
-  description: "The generated\n[`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)\nhas one summary value containing a histogram for `values`.\n\nThis op reports an `InvalidArgument` error if any value is not finite."
 }
 op {
   name: "IFFT"
   input_arg {
     name: "input"
-    description: "A complex64 tensor."
     type: DT_COMPLEX64
   }
   output_arg {
     name: "output"
-    description: "A complex64 tensor of the same shape as `input`. The inner-most\n  dimension of `input` is replaced with its inverse 1D Fourier transform.\n\n@compatibility(numpy)\nEquivalent to np.fft.ifft\n@end_compatibility"
     type: DT_COMPLEX64
   }
-  summary: "Inverse fast Fourier transform."
-  description: "Computes the inverse 1-dimensional discrete Fourier transform over the\ninner-most dimension of `input`."
 }
 op {
   name: "IFFT2D"
   input_arg {
     name: "input"
-    description: "A complex64 tensor."
     type: DT_COMPLEX64
   }
   output_arg {
     name: "output"
-    description: "A complex64 tensor of the same shape as `input`. The inner-most 2\n  dimensions of `input` are replaced with their inverse 2D Fourier transform.\n\n@compatibility(numpy)\nEquivalent to np.fft.ifft2\n@end_compatibility"
     type: DT_COMPLEX64
   }
-  summary: "Inverse 2D fast Fourier transform."
-  description: "Computes the inverse 2-dimensional discrete Fourier transform over the\ninner-most 2 dimensions of `input`."
 }
 op {
   name: "IFFT3D"
   input_arg {
     name: "input"
-    description: "A complex64 tensor."
     type: DT_COMPLEX64
   }
   output_arg {
     name: "output"
-    description: "A complex64 tensor of the same shape as `input`. The inner-most 3\n  dimensions of `input` are replaced with their inverse 3D Fourier transform.\n\n@compatibility(numpy)\nEquivalent to np.fft.ifftn with 3 dimensions.\n@end_compatibility"
     type: DT_COMPLEX64
   }
-  summary: "Inverse 3D fast Fourier transform."
-  description: "Computes the inverse 3-dimensional discrete Fourier transform over the\ninner-most 3 dimensions of `input`."
 }
 op {
   name: "IRFFT"
   input_arg {
     name: "input"
-    description: "A complex64 tensor."
     type: DT_COMPLEX64
   }
   input_arg {
     name: "fft_length"
-    description: "An int32 tensor of shape [1]. The FFT length."
     type: DT_INT32
   }
   output_arg {
     name: "output"
-    description: "A float32 tensor of the same rank as `input`. The inner-most\n  dimension of `input` is replaced with the `fft_length` samples of its inverse\n  1D Fourier transform.\n\n@compatibility(numpy)\nEquivalent to np.fft.irfft\n@end_compatibility"
     type: DT_FLOAT
   }
-  summary: "Inverse real-valued fast Fourier transform."
-  description: "Computes the inverse 1-dimensional discrete Fourier transform of a real-valued\nsignal over the inner-most dimension of `input`.\n\nThe inner-most dimension of `input` is assumed to be the result of `RFFT`: the\n`fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If\n`fft_length` is not provided, it is computed from the size of the inner-most\ndimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to\ncompute `input` is odd, it should be provided since it cannot be inferred\nproperly.\n\nAlong the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller\nthan the corresponding dimension of `input`, the dimension is cropped. If it is\nlarger, the dimension is padded with zeros."
 }
 op {
   name: "IRFFT2D"
   input_arg {
     name: "input"
-    description: "A complex64 tensor."
     type: DT_COMPLEX64
   }
   input_arg {
     name: "fft_length"
-    description: "An int32 tensor of shape [2]. The FFT length for each dimension."
     type: DT_INT32
   }
   output_arg {
     name: "output"
-    description: "A float32 tensor of the same rank as `input`. The inner-most 2\n  dimensions of `input` are replaced with the `fft_length` samples of their\n  inverse 2D Fourier transform.\n\n@compatibility(numpy)\nEquivalent to np.fft.irfft2\n@end_compatibility"
     type: DT_FLOAT
   }
-  summary: "Inverse 2D real-valued fast Fourier transform."
-  description: "Computes the inverse 2-dimensional discrete Fourier transform of a real-valued\nsignal over the inner-most 2 dimensions of `input`.\n\nThe inner-most 2 dimensions of `input` are assumed to be the result of `RFFT2D`:\nThe inner-most dimension contains the `fft_length / 2 + 1` unique components of\nthe DFT of a real-valued signal. If `fft_length` is not provided, it is computed\nfrom the size of the inner-most 2 dimensions of `input`. If the FFT length used\nto compute `input` is odd, it should be provided since it cannot be inferred\nproperly.\n\nAlong each axis `IRFFT2D` is computed on, if `fft_length` (or\n`fft_length / 2 + 1` for the inner-most dimension) is smaller than the\ncorresponding dimension of `input`, the dimension is cropped. If it is larger,\nthe dimension is padded with zeros."
 }
 op {
   name: "IRFFT3D"
   input_arg {
     name: "input"
-    description: "A complex64 tensor."
     type: DT_COMPLEX64
   }
   input_arg {
     name: "fft_length"
-    description: "An int32 tensor of shape [3]. The FFT length for each dimension."
     type: DT_INT32
   }
   output_arg {
     name: "output"
-    description: "A float32 tensor of the same rank as `input`. The inner-most 3\n  dimensions of `input` are replaced with the `fft_length` samples of their\n  inverse 3D real Fourier transform.\n\n@compatibility(numpy)\nEquivalent to np.irfftn with 3 dimensions.\n@end_compatibility"
     type: DT_FLOAT
   }
-  summary: "Inverse 3D real-valued fast Fourier transform."
-  description: "Computes the inverse 3-dimensional discrete Fourier transform of a real-valued\nsignal over the inner-most 3 dimensions of `input`.\n\nThe inner-most 3 dimensions of `input` are assumed to be the result of `RFFT3D`:\nThe inner-most dimension contains the `fft_length / 2 + 1` unique components of\nthe DFT of a real-valued signal. If `fft_length` is not provided, it is computed\nfrom the size of the inner-most 3 dimensions of `input`. If the FFT length used\nto compute `input` is odd, it should be provided since it cannot be inferred\nproperly.\n\nAlong each axis `IRFFT3D` is computed on, if `fft_length` (or\n`fft_length / 2 + 1` for the inner-most dimension) is smaller than the\ncorresponding dimension of `input`, the dimension is cropped. If it is larger,\nthe dimension is padded with zeros."
 }
 op {
   name: "Identity"
@@ -10832,7 +10028,6 @@ op {
     name: "T"
     type: "type"
   }
-  summary: "Return a tensor with the same shape and contents as the input tensor or value."
 }
 op {
   name: "IdentityN"
@@ -10850,14 +10045,11 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Returns a list of tensors with the same shapes and contents as the input"
-  description: "tensors.\n\nThis op can be used to override the gradient for complicated functions. For\nexample, suppose y = f(x) and we wish to apply a custom function g for backprop\nsuch that dx = g(dy). In Python,\n\n```python\nwith tf.get_default_graph().gradient_override_map(\n    {\'IdentityN\': \'OverrideGradientWithG\'}):\n  y, _ = identity_n([f(x), x])\n\n@tf.RegisterGradient(\'OverrideGradientWithG\')\ndef ApplyG(op, dy, _):\n  return [None, g(dy)]  # Do not backprop to f(x).\n```"
 }
 op {
   name: "IdentityReader"
   output_arg {
     name: "reader_handle"
-    description: "The handle to reference the Reader."
     type: DT_STRING
     is_ref: true
   }
@@ -10867,7 +10059,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this reader is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -10875,17 +10066,13 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this reader is named in the given bucket\nwith this shared_name. Otherwise, the node name is used instead."
   }
-  summary: "A Reader that outputs the queued work as both the key and value."
-  description: "To use, enqueue strings in a Queue.  ReaderRead will take the front\nwork string and output (work, work)."
   is_stateful: true
 }
 op {
   name: "IdentityReaderV2"
   output_arg {
     name: "reader_handle"
-    description: "The handle to reference the Reader."
     type: DT_RESOURCE
   }
   attr {
@@ -10894,7 +10081,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this reader is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -10902,10 +10088,7 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this reader is named in the given bucket\nwith this shared_name. Otherwise, the node name is used instead."
   }
-  summary: "A Reader that outputs the queued work as both the key and value."
-  description: "To use, enqueue strings in a Queue.  ReaderRead will take the front\nwork string and output (work, work)."
   is_stateful: true
 }
 op {
@@ -10932,8 +10115,6 @@ op {
       }
     }
   }
-  summary: "Compute the lower regularized incomplete Gamma function `Q(a, x)`."
-  description: "The lower regularized incomplete Gamma function is defined as:\n\n\n\\\\(P(a, x) = gamma(a, x) / Gamma(a) = 1 - Q(a, x)\\\\)\n\nwhere\n\n\\\\(gamma(a, x) = int_{0}^{x} t^{a-1} exp(-t) dt\\\\)\n\nis the lower incomplete Gamma function.\n\nNote, above `Q(a, x)` (`Igammac`) is the upper regularized complete\nGamma function."
 }
 op {
   name: "Igammac"
@@ -10959,8 +10140,6 @@ op {
       }
     }
   }
-  summary: "Compute the upper regularized incomplete Gamma function `Q(a, x)`."
-  description: "The upper regularized incomplete Gamma function is defined as:\n\n\\\\(Q(a, x) = Gamma(a, x) / Gamma(a) = 1 - P(a, x)\\\\)\n\nwhere\n\n\\\\(Gamma(a, x) = int_{x}^{\\infty} t^{a-1} exp(-t) dt\\\\)\n\nis the upper incomplete Gama function.\n\nNote, above `P(a, x)` (`Igamma`) is the lower regularized complete\nGamma function."
 }
 op {
   name: "IgnoreErrorsDataset"
@@ -10984,7 +10163,6 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Creates a dataset that contains the elements of `input_dataset` ignoring errors."
 }
 op {
   name: "Imag"
@@ -11022,24 +10200,19 @@ op {
       }
     }
   }
-  summary: "Returns the imaginary part of a complex number."
-  description: "Given a tensor `input` of complex numbers, this operation returns a tensor of\ntype `float` that is the imaginary part of each element in `input`. All\nelements in `input` must be complex numbers of the form \\\\(a + bj\\\\), where *a*\nis the real part and *b* is the imaginary part returned by this operation.\n\nFor example:\n\n```\n# tensor \'input\' is [-2.25 + 4.75j, 3.25 + 5.75j]\ntf.imag(input) ==> [4.75, 5.75]\n```"
 }
 op {
   name: "ImageSummary"
   input_arg {
     name: "tag"
-    description: "Scalar. Used to build the `tag` attribute of the summary values."
     type: DT_STRING
   }
   input_arg {
     name: "tensor"
-    description: "4-D of shape `[batch_size, height, width, channels]` where\n`channels` is 1, 3, or 4."
     type_attr: "T"
   }
   output_arg {
     name: "summary"
-    description: "Scalar. Serialized `Summary` protocol buffer."
     type: DT_STRING
   }
   attr {
@@ -11048,7 +10221,6 @@ op {
     default_value {
       i: 3
     }
-    description: "Max number of batch elements to generate images for."
     has_minimum: true
     minimum: 1
   }
@@ -11084,10 +10256,7 @@ op {
         int_val: 255
       }
     }
-    description: "Color to use for pixels with non-finite values."
   }
-  summary: "Outputs a `Summary` protocol buffer with images."
-  description: "The summary has up to `max_images` summary values containing images. The\nimages are built from `tensor` which must be 4-D with shape `[batch_size,\nheight, width, channels]` and where `channels` can be:\n\n*  1: `tensor` is interpreted as Grayscale.\n*  3: `tensor` is interpreted as RGB.\n*  4: `tensor` is interpreted as RGBA.\n\nThe images have the same number of channels as the input tensor. For float\ninput, the values are normalized one image at a time to fit in the range\n`[0, 255]`.  `uint8` values are unchanged.  The op uses two different\nnormalization algorithms:\n\n*  If the input values are all positive, they are rescaled so the largest one\n   is 255.\n\n*  If any input value is negative, the values are shifted so input value 0.0\n   is at 127.  They are then rescaled so that either the smallest value is 0,\n   or the largest one is 255.\n\nThe `tag` argument is a scalar `Tensor` of type `string`.  It is used to\nbuild the `tag` of the summary values:\n\n*  If `max_images` is 1, the summary value tag is \'*tag*/image\'.\n*  If `max_images` is greater than 1, the summary value tags are\n   generated sequentially as \'*tag*/image/0\', \'*tag*/image/1\', etc.\n\nThe `bad_color` argument is the color to use in the generated images for\nnon-finite input values.  It is a `unit8` 1-D tensor of length `channels`.\nEach element must be in the range `[0, 255]` (It represents the value of a\npixel in the output image).  Non-finite values in the input tensor are\nreplaced by this tensor in the output image.  The default value is the color\nred."
 }
 op {
   name: "ImmutableConst"
@@ -11098,42 +10267,33 @@ op {
   attr {
     name: "dtype"
     type: "type"
-    description: "Type of the returned tensor."
   }
   attr {
     name: "shape"
     type: "shape"
-    description: "Shape of the returned tensor."
   }
   attr {
     name: "memory_region_name"
     type: "string"
-    description: "Name of readonly memory region used by the tensor, see\nNewReadOnlyMemoryRegionFromFile in tensorflow::Env."
   }
-  summary: "Returns immutable tensor from memory region."
-  description: "The current implementation memmaps the tensor from a file."
 }
 op {
   name: "InTopK"
   input_arg {
     name: "predictions"
-    description: "A `batch_size` x `classes` tensor."
     type: DT_FLOAT
   }
   input_arg {
     name: "targets"
-    description: "A `batch_size` vector of class ids."
     type_attr: "T"
   }
   output_arg {
     name: "precision"
-    description: "Computed Precision at `k` as a `bool Tensor`."
     type: DT_BOOL
   }
   attr {
     name: "k"
     type: "int"
-    description: "Number of top elements to look at for computing precision."
   }
   attr {
     name: "T"
@@ -11148,29 +10308,23 @@ op {
       }
     }
   }
-  summary: "Says whether the targets are in the top `K` predictions."
-  description: "This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the\nprediction for the target class is among the top `k` predictions among\nall predictions for example `i`. Note that the behavior of `InTopK` differs\nfrom the `TopK` op in its handling of ties; if multiple classes have the\nsame prediction value and straddle the top-`k` boundary, all of those\nclasses are considered to be in the top `k`.\n\nMore formally, let\n\n  \\\\(predictions_i\\\\) be the predictions for all classes for example `i`,\n  \\\\(targets_i\\\\) be the target class for example `i`,\n  \\\\(out_i\\\\) be the output for example `i`,\n\n$$out_i = predictions_{i, targets_i} \\in TopKIncludingTies(predictions_i)$$"
 }
 op {
   name: "InTopKV2"
   input_arg {
     name: "predictions"
-    description: "A `batch_size` x `classes` tensor."
     type: DT_FLOAT
   }
   input_arg {
     name: "targets"
-    description: "A `batch_size` vector of class ids."
     type_attr: "T"
   }
   input_arg {
     name: "k"
-    description: "Number of top elements to look at for computing precision."
     type_attr: "T"
   }
   output_arg {
     name: "precision"
-    description: "Computed precision at `k` as a `bool Tensor`."
     type: DT_BOOL
   }
   attr {
@@ -11186,25 +10340,20 @@ op {
       }
     }
   }
-  summary: "Says whether the targets are in the top `K` predictions."
-  description: "This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the\nprediction for the target class is among the top `k` predictions among\nall predictions for example `i`. Note that the behavior of `InTopK` differs\nfrom the `TopK` op in its handling of ties; if multiple classes have the\nsame prediction value and straddle the top-`k` boundary, all of those\nclasses are considered to be in the top `k`.\n\nMore formally, let\n\n  \\\\(predictions_i\\\\) be the predictions for all classes for example `i`,\n  \\\\(targets_i\\\\) be the target class for example `i`,\n  \\\\(out_i\\\\) be the output for example `i`,\n\n$$out_i = predictions_{i, targets_i} \\in TopKIncludingTies(predictions_i)$$"
 }
 op {
   name: "InitializeTable"
   input_arg {
     name: "table_handle"
-    description: "Handle to a table which will be initialized."
     type: DT_STRING
     is_ref: true
   }
   input_arg {
     name: "keys"
-    description: "Keys of type Tkey."
     type_attr: "Tkey"
   }
   input_arg {
     name: "values"
-    description: "Values of type Tval."
     type_attr: "Tval"
   }
   attr {
@@ -11215,32 +10364,27 @@ op {
     name: "Tval"
     type: "type"
   }
-  summary: "Table initializer that takes two tensors for keys and values respectively."
 }
 op {
   name: "InitializeTableFromTextFile"
   input_arg {
     name: "table_handle"
-    description: "Handle to a table which will be initialized."
     type: DT_STRING
     is_ref: true
   }
   input_arg {
     name: "filename"
-    description: "Filename of a vocabulary text file."
     type: DT_STRING
   }
   attr {
     name: "key_index"
     type: "int"
-    description: "Column index in a line to get the table `key` values from."
     has_minimum: true
     minimum: -2
   }
   attr {
     name: "value_index"
     type: "int"
-    description: "Column index that represents information of a line to get the table\n`value` values from."
     has_minimum: true
     minimum: -2
   }
@@ -11250,7 +10394,6 @@ op {
     default_value {
       i: -1
     }
-    description: "Number of elements of the file, use -1 if unknown."
     has_minimum: true
     minimum: -1
   }
@@ -11260,34 +10403,27 @@ op {
     default_value {
       s: "\t"
     }
-    description: "Delimiter to separate fields in a line."
   }
-  summary: "Initializes a table from a text file."
-  description: "It inserts one key-value pair into the table for each line of the file.\nThe key and value is extracted from the whole line content, elements from the\nsplit line based on `delimiter` or the line number (starting from zero).\nWhere to extract the key and value from a line is specified by `key_index` and\n`value_index`.\n\n- A value of -1 means use the line number(starting from zero), expects `int64`.\n- A value of -2 means use the whole line content, expects `string`.\n- A value >= 0 means use the index (starting at zero) of the split line based\n  on `delimiter`."
 }
 op {
   name: "InitializeTableFromTextFileV2"
   input_arg {
     name: "table_handle"
-    description: "Handle to a table which will be initialized."
     type: DT_RESOURCE
   }
   input_arg {
     name: "filename"
-    description: "Filename of a vocabulary text file."
     type: DT_STRING
   }
   attr {
     name: "key_index"
     type: "int"
-    description: "Column index in a line to get the table `key` values from."
     has_minimum: true
     minimum: -2
   }
   attr {
     name: "value_index"
     type: "int"
-    description: "Column index that represents information of a line to get the table\n`value` values from."
     has_minimum: true
     minimum: -2
   }
@@ -11297,7 +10433,6 @@ op {
     default_value {
       i: -1
     }
-    description: "Number of elements of the file, use -1 if unknown."
     has_minimum: true
     minimum: -1
   }
@@ -11307,27 +10442,21 @@ op {
     default_value {
       s: "\t"
     }
-    description: "Delimiter to separate fields in a line."
   }
-  summary: "Initializes a table from a text file."
-  description: "It inserts one key-value pair into the table for each line of the file.\nThe key and value is extracted from the whole line content, elements from the\nsplit line based on `delimiter` or the line number (starting from zero).\nWhere to extract the key and value from a line is specified by `key_index` and\n`value_index`.\n\n- A value of -1 means use the line number(starting from zero), expects `int64`.\n- A value of -2 means use the whole line content, expects `string`.\n- A value >= 0 means use the index (starting at zero) of the split line based\n  on `delimiter`."
   is_stateful: true
 }
 op {
   name: "InitializeTableV2"
   input_arg {
     name: "table_handle"
-    description: "Handle to a table which will be initialized."
     type: DT_RESOURCE
   }
   input_arg {
     name: "keys"
-    description: "Keys of type Tkey."
     type_attr: "Tkey"
   }
   input_arg {
     name: "values"
-    description: "Values of type Tval."
     type_attr: "Tval"
   }
   attr {
@@ -11338,7 +10467,6 @@ op {
     name: "Tval"
     type: "type"
   }
-  summary: "Table initializer that takes two tensors for keys and values respectively."
   is_stateful: true
 }
 op {
@@ -11366,7 +10494,6 @@ op {
   attr {
     name: "f"
     type: "func"
-    description: "A function mapping elements of `input_dataset`, concatenated with\n`other_arguments`, to a Dataset variant that contains elements matching\n`output_types` and `output_shapes`."
   }
   attr {
     name: "Targuments"
@@ -11385,8 +10512,6 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Creates a dataset that applies `f` to the outputs of `input_dataset`."
-  description: "Unlike MapDataset, the `f` in InterleaveDataset is expected to return\na Dataset variant, and InterleaveDataset will flatten successive\nresults into a single Dataset. Unlike FlatMapDataset,\nInterleaveDataset will interleave sequences of up to `block_length`\nconsecutive elements from `cycle_length` input elements."
 }
 op {
   name: "Inv"
@@ -11404,6 +10529,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -11413,12 +10539,6 @@ op {
       }
     }
   }
-  summary: "Computes the reciprocal of x element-wise."
-  description: "I.e., \\\\(y = 1 / x\\\\)."
-  deprecation {
-    version: 17
-    explanation: "Use Reciprocal"
-  }
 }
 op {
   name: "InvGrad"
@@ -11440,6 +10560,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -11447,12 +10568,6 @@ op {
       }
     }
   }
-  summary: "Computes the gradient for the inverse of `x` wrt its input."
-  description: "Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy`\nis the corresponding input gradient."
-  deprecation {
-    version: 17
-    explanation: "Use ReciprocalGrad"
-  }
 }
 op {
   name: "Invert"
@@ -11480,19 +10595,15 @@ op {
       }
     }
   }
-  summary: "Flips all bits elementwise."
-  description: "The result will have exactly those bits set, that are not set in `x`. The\ncomputation is performed on the underlying representation of x."
 }
 op {
   name: "InvertPermutation"
   input_arg {
     name: "x"
-    description: "1-D."
     type_attr: "T"
   }
   output_arg {
     name: "y"
-    description: "1-D."
     type_attr: "T"
   }
   attr {
@@ -11508,8 +10619,6 @@ op {
       }
     }
   }
-  summary: "Computes the inverse permutation of a tensor."
-  description: "This operation computes the inverse of an index permutation. It takes a 1-D\ninteger tensor `x`, which represents the indices of a zero-based array, and\nswaps each value with its index position. In other words, for an output tensor\n`y` and an input tensor `x`, this operation computes the following:\n\n`y[x[i]] = i for i in [0, 1, ..., len(x) - 1]`\n\nThe values must include 0. There can be no duplicate values or negative values.\n\nFor example:\n\n```\n# tensor `x` is [3, 4, 0, 2, 1]\ninvert_permutation(x) ==> [2, 4, 3, 0, 1]\n```"
 }
 op {
   name: "IsFinite"
@@ -11527,13 +10636,12 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Returns which elements of x are finite."
-  description: "@compatibility(numpy)\nEquivalent to np.isfinite\n@end_compatibility"
 }
 op {
   name: "IsInf"
@@ -11551,13 +10659,12 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Returns which elements of x are Inf."
-  description: "@compatibility(numpy)\nEquivalent to np.isinf\n@end_compatibility"
 }
 op {
   name: "IsNan"
@@ -11575,19 +10682,17 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Returns which elements of x are NaN."
-  description: "@compatibility(numpy)\nEquivalent to np.isnan\n@end_compatibility"
 }
 op {
   name: "IsVariableInitialized"
   input_arg {
     name: "ref"
-    description: "Should be from a `Variable` node. May be uninitialized."
     type_attr: "dtype"
     is_ref: true
   }
@@ -11598,17 +10703,13 @@ op {
   attr {
     name: "dtype"
     type: "type"
-    description: "The type of elements in the variable tensor."
   }
-  summary: "Checks whether a tensor has been initialized."
-  description: "Outputs boolean scalar indicating whether the tensor has been initialized."
   allows_uninitialized_input: true
 }
 op {
   name: "Iterator"
   output_arg {
     name: "handle"
-    description: "A handle to the iterator that can be passed to a \"MakeIterator\"\nor \"IteratorGetNext\" op."
     type: DT_RESOURCE
   }
   attr {
@@ -11631,19 +10732,16 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "A container for an iterator resource."
   is_stateful: true
 }
 op {
   name: "IteratorFromStringHandle"
   input_arg {
     name: "string_handle"
-    description: "A string representation of the given handle."
     type: DT_STRING
   }
   output_arg {
     name: "resource_handle"
-    description: "A handle to an iterator resource."
     type: DT_RESOURCE
   }
   attr {
@@ -11653,7 +10751,6 @@ op {
       list {
       }
     }
-    description: "If specified, defines the type of each tuple component in an\nelement produced by the resulting iterator."
     has_minimum: true
   }
   attr {
@@ -11663,10 +10760,8 @@ op {
       list {
       }
     }
-    description: "If specified, defines the shape of each tuple component in an\nelement produced by the resulting iterator."
     has_minimum: true
   }
-  summary: "Converts the given string representing a handle to an iterator to a resource."
   is_stateful: true
 }
 op {
@@ -11691,7 +10786,6 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Gets the next output from the given iterator."
   is_stateful: true
 }
 op {
@@ -11704,34 +10798,28 @@ op {
     name: "stats_aggregator_handle"
     type: DT_RESOURCE
   }
-  summary: "Associates the given iterator with the given statistics aggregator."
   is_stateful: true
 }
 op {
   name: "IteratorToStringHandle"
   input_arg {
     name: "resource_handle"
-    description: "A handle to an iterator resource."
     type: DT_RESOURCE
   }
   output_arg {
     name: "string_handle"
-    description: "A string representation of the given handle."
     type: DT_STRING
   }
-  summary: "Converts the given `resource_handle` representing an iterator to a string."
   is_stateful: true
 }
 op {
   name: "L2Loss"
   input_arg {
     name: "t"
-    description: "Typically 2-D, but may have any dimensions."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "0-D."
     type_attr: "T"
   }
   attr {
@@ -11740,19 +10828,17 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "L2 Loss."
-  description: "Computes half the L2 norm of a tensor without the `sqrt`:\n\n    output = sum(t ** 2) / 2"
 }
 op {
   name: "LMDBReader"
   output_arg {
     name: "reader_handle"
-    description: "The handle to reference the Reader."
     type: DT_STRING
     is_ref: true
   }
@@ -11762,7 +10848,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this reader is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -11770,16 +10855,13 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this reader is named in the given bucket\nwith this shared_name. Otherwise, the node name is used instead."
   }
-  summary: "A Reader that outputs the records from a LMDB file."
   is_stateful: true
 }
 op {
   name: "LRN"
   input_arg {
     name: "input"
-    description: "4-D."
     type_attr: "T"
   }
   output_arg {
@@ -11792,7 +10874,6 @@ op {
     default_value {
       i: 5
     }
-    description: "0-D.  Half-width of the 1-D normalization window."
   }
   attr {
     name: "bias"
@@ -11800,7 +10881,6 @@ op {
     default_value {
       f: 1
     }
-    description: "An offset (usually positive to avoid dividing by 0)."
   }
   attr {
     name: "alpha"
@@ -11808,7 +10888,6 @@ op {
     default_value {
       f: 1
     }
-    description: "A scale factor, usually positive."
   }
   attr {
     name: "beta"
@@ -11816,7 +10895,6 @@ op {
     default_value {
       f: 0.5
     }
-    description: "An exponent."
   }
   attr {
     name: "T"
@@ -11826,34 +10904,29 @@ op {
     }
     allowed_values {
       list {
-        type: DT_FLOAT
         type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
       }
     }
   }
-  summary: "Local Response Normalization."
-  description: "The 4-D `input` tensor is treated as a 3-D array of 1-D vectors (along the last\ndimension), and each vector is normalized independently.  Within a given vector,\neach component is divided by the weighted, squared sum of inputs within\n`depth_radius`.  In detail,\n\n    sqr_sum[a, b, c, d] =\n        sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2)\n    output = input / (bias + alpha * sqr_sum) ** beta\n\nFor details, see [Krizhevsky et al., ImageNet classification with deep\nconvolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks)."
 }
 op {
   name: "LRNGrad"
   input_arg {
     name: "input_grads"
-    description: "4-D with shape `[batch, height, width, channels]`."
     type_attr: "T"
   }
   input_arg {
     name: "input_image"
-    description: "4-D with shape `[batch, height, width, channels]`."
     type_attr: "T"
   }
   input_arg {
     name: "output_image"
-    description: "4-D with shape `[batch, height, width, channels]`."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "The gradients for LRN."
     type_attr: "T"
   }
   attr {
@@ -11862,7 +10935,6 @@ op {
     default_value {
       i: 5
     }
-    description: "A depth radius."
   }
   attr {
     name: "bias"
@@ -11870,7 +10942,6 @@ op {
     default_value {
       f: 1
     }
-    description: "An offset (usually > 0 to avoid dividing by 0)."
   }
   attr {
     name: "alpha"
@@ -11878,7 +10949,6 @@ op {
     default_value {
       f: 1
     }
-    description: "A scale factor, usually positive."
   }
   attr {
     name: "beta"
@@ -11886,7 +10956,6 @@ op {
     default_value {
       f: 0.5
     }
-    description: "An exponent."
   }
   attr {
     name: "T"
@@ -11896,12 +10965,12 @@ op {
     }
     allowed_values {
       list {
-        type: DT_FLOAT
         type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
       }
     }
   }
-  summary: "Gradients for Local Response Normalization."
 }
 op {
   name: "LatencyStatsDataset"
@@ -11929,53 +10998,44 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Records the latency of producing `input_dataset` elements in a StatsAggregator."
 }
 op {
   name: "LearnedUnigramCandidateSampler"
   input_arg {
     name: "true_classes"
-    description: "A batch_size * num_true matrix, in which each row contains the\nIDs of the num_true target_classes in the corresponding original label."
     type: DT_INT64
   }
   output_arg {
     name: "sampled_candidates"
-    description: "A vector of length num_sampled, in which each element is\nthe ID of a sampled candidate."
     type: DT_INT64
   }
   output_arg {
     name: "true_expected_count"
-    description: "A batch_size * num_true matrix, representing\nthe number of times each candidate is expected to occur in a batch\nof sampled candidates. If unique=true, then this is a probability."
     type: DT_FLOAT
   }
   output_arg {
     name: "sampled_expected_count"
-    description: "A vector of length num_sampled, for each sampled\ncandidate representing the number of times the candidate is expected\nto occur in a batch of sampled candidates.  If unique=true, then this is a\nprobability."
     type: DT_FLOAT
   }
   attr {
     name: "num_true"
     type: "int"
-    description: "Number of true labels per context."
     has_minimum: true
     minimum: 1
   }
   attr {
     name: "num_sampled"
     type: "int"
-    description: "Number of candidates to randomly sample."
     has_minimum: true
     minimum: 1
   }
   attr {
     name: "unique"
     type: "bool"
-    description: "If unique is true, we sample with rejection, so that all sampled\ncandidates in a batch are unique. This requires some approximation to\nestimate the post-rejection sampling probabilities."
   }
   attr {
     name: "range_max"
     type: "int"
-    description: "The sampler will sample integers from the interval [0, range_max)."
     has_minimum: true
     minimum: 1
   }
@@ -11985,7 +11045,6 @@ op {
     default_value {
       i: 0
     }
-    description: "If either seed or seed2 are set to be non-zero, the random number\ngenerator is seeded by the given seed.  Otherwise, it is seeded by a\nrandom seed."
   }
   attr {
     name: "seed2"
@@ -11993,10 +11052,7 @@ op {
     default_value {
       i: 0
     }
-    description: "An second seed to avoid seed collision."
   }
-  summary: "Generates labels for candidate sampling with a learned unigram distribution."
-  description: "See explanations of candidate sampling and the data formats at\ngo/candidate-sampling.\n\nFor each batch, this op picks a single set of sampled candidate labels.\n\nThe advantages of sampling candidates per-batch are simplicity and the\npossibility of efficient dense matrix multiplication. The disadvantage is that\nthe sampled candidates must be chosen independently of the context and of the\ntrue labels."
   is_stateful: true
 }
 op {
@@ -12029,8 +11085,6 @@ op {
       }
     }
   }
-  summary: "Elementwise computes the bitwise left-shift of `x` and `y`."
-  description: "If `y` is negative, or greater than or equal to the width of `x` in bits the\nresult is implementation defined."
   is_commutative: true
 }
 op {
@@ -12055,10 +11109,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -12066,8 +11121,6 @@ op {
       }
     }
   }
-  summary: "Returns the truth value of (x < y) element-wise."
-  description: "*NOTE*: `Less` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
 }
 op {
   name: "LessEqual"
@@ -12091,10 +11144,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -12102,8 +11156,6 @@ op {
       }
     }
   }
-  summary: "Returns the truth value of (x <= y) element-wise."
-  description: "*NOTE*: `LessEqual` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
 }
 op {
   name: "Lgamma"
@@ -12121,33 +11173,29 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Computes the log of the absolute value of `Gamma(x)` element-wise."
 }
 op {
   name: "LinSpace"
   input_arg {
     name: "start"
-    description: "First entry in the range."
     type_attr: "T"
   }
   input_arg {
     name: "stop"
-    description: "Last entry in the range."
     type_attr: "T"
   }
   input_arg {
     name: "num"
-    description: "Number of values to generate."
     type_attr: "Tidx"
   }
   output_arg {
     name: "output"
-    description: "1-D. The generated values."
     type_attr: "T"
   }
   attr {
@@ -12155,6 +11203,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -12173,29 +11222,23 @@ op {
       }
     }
   }
-  summary: "Generates values in an interval."
-  description: "A sequence of `num` evenly-spaced values are generated beginning at `start`.\nIf `num > 1`, the values in the sequence increase by `stop - start / num - 1`,\nso that the last one is exactly `stop`.\n\nFor example:\n\n```\ntf.linspace(10.0, 12.0, 3, name=\"linspace\") => [ 10.0  11.0  12.0]\n```"
 }
 op {
   name: "ListDiff"
   input_arg {
     name: "x"
-    description: "1-D. Values to keep."
     type_attr: "T"
   }
   input_arg {
     name: "y"
-    description: "1-D. Values to remove."
     type_attr: "T"
   }
   output_arg {
     name: "out"
-    description: "1-D. Values present in `x` but not in `y`."
     type_attr: "T"
   }
   output_arg {
     name: "idx"
-    description: "1-D. Positions of `x` values preserved in `out`."
     type_attr: "out_idx"
   }
   attr {
@@ -12215,51 +11258,41 @@ op {
       }
     }
   }
-  summary: "Computes the difference between two lists of numbers or strings."
-  description: "Given a list `x` and a list `y`, this operation returns a list `out` that\nrepresents all values that are in `x` but not in `y`. The returned list `out`\nis sorted in the same order that the numbers appear in `x` (duplicates are\npreserved). This operation also returns a list `idx` that represents the\nposition of each `out` element in `x`. In other words:\n\n`out[i] = x[idx[i]] for i in [0, 1, ..., len(out) - 1]`\n\nFor example, given this input:\n\n```\nx = [1, 2, 3, 4, 5, 6]\ny = [1, 3, 5]\n```\n\nThis operation would return:\n\n```\nout ==> [2, 4, 6]\nidx ==> [1, 3, 5]\n```"
 }
 op {
   name: "LoadAndRemapMatrix"
   input_arg {
     name: "ckpt_path"
-    description: "Path to the TensorFlow checkpoint (version 2, `TensorBundle`) from\nwhich the old matrix `Tensor` will be loaded."
     type: DT_STRING
   }
   input_arg {
     name: "old_tensor_name"
-    description: "Name of the 2-D `Tensor` to load from checkpoint."
     type: DT_STRING
   }
   input_arg {
     name: "row_remapping"
-    description: "An int `Tensor` of row remappings (generally created by\n`generate_vocab_remapping`).  Even if no row remapping is needed, this must\nstill be an index-valued Tensor (e.g. [0, 1, 2, ...]), or a shifted\nindex-valued `Tensor` (e.g. [8, 9, 10, ...], for partitioned `Variables`)."
     type: DT_INT64
   }
   input_arg {
     name: "col_remapping"
-    description: "An int `Tensor` of column remappings (generally created by\n`generate_vocab_remapping`).  May be a size-0 `Tensor` if only row remapping\nis to be done (e.g. column ordering is the same)."
     type: DT_INT64
   }
   input_arg {
     name: "initializing_values"
-    description: "A float `Tensor` containing  values to fill in for cells\nin the output matrix that are not loaded from the checkpoint. Length must be\nexactly the same as the number of missing / new cells."
     type: DT_FLOAT
   }
   output_arg {
     name: "output_matrix"
-    description: "Output matrix containing existing values loaded from the\ncheckpoint, and with any missing values filled in from initializing_values."
     type: DT_FLOAT
   }
   attr {
     name: "num_rows"
     type: "int"
-    description: "Number of rows (length of the 1st dimension) in the output matrix."
     has_minimum: true
   }
   attr {
     name: "num_cols"
     type: "int"
-    description: "Number of columns (length of the 2nd dimension) in the output matrix."
     has_minimum: true
     minimum: 1
   }
@@ -12269,10 +11302,7 @@ op {
     default_value {
       i: -1
     }
-    description: "The maximum number of rows to load from the checkpoint at\nonce. If less than or equal to 0, the entire matrix will be loaded into\nmemory. Setting this arg trades increased disk reads for lower memory usage."
   }
-  summary: "Loads a 2-D (matrix) `Tensor` with name `old_tensor_name` from the checkpoint"
-  description: "at `ckpt_path` and potentially reorders its rows and columns using the\nspecified remappings.\n\nMost users should use one of the wrapper initializers (such as\n`tf.contrib.framework.load_and_remap_matrix_initializer`) instead of this\nfunction directly.\n\nThe remappings are 1-D tensors with the following properties:\n\n* `row_remapping` must have exactly `num_rows` entries. Row `i` of the output\n  matrix will be initialized from the row corresponding to index\n  `row_remapping[i]` in the old `Tensor` from the checkpoint.\n* `col_remapping` must have either 0 entries (indicating that no column\n  reordering is needed) or `num_cols` entries. If specified, column `j` of the\n  output matrix will be initialized from the column corresponding to index\n  `col_remapping[j]` in the old `Tensor` from the checkpoint.\n* A value of -1 in either of the remappings signifies a \"missing\" entry. In that\n  case, values from the `initializing_values` tensor will be used to fill that\n  missing row or column. If `row_remapping` has `r` missing entries and\n  `col_remapping` has `c` missing entries, then the following condition must be\n  true:\n\n`(r * num_cols) + (c * num_rows) - (r * c) == len(initializing_values)`\n\nThe remapping tensors can be generated using the GenerateVocabRemapping op.\n\nAs an example, with row_remapping = [1, 0, -1], col_remapping = [0, 2, -1],\ninitializing_values = [0.5, -0.5, 0.25, -0.25, 42], and w(i, j) representing\nthe value from row i, column j of the old tensor in the checkpoint, the output\nmatrix will look like the following:\n\n[[w(1, 0),  w(1, 2),  0.5],\n [w(0, 0),  w(0, 2), -0.5],\n [0.25,    -0.25,      42]]"
   is_stateful: true
 }
 op {
@@ -12291,6 +11321,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -12298,8 +11329,6 @@ op {
       }
     }
   }
-  summary: "Computes natural logarithm of x element-wise."
-  description: "I.e., \\\\(y = \\log_e x\\\\)."
 }
 op {
   name: "Log1p"
@@ -12317,6 +11346,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -12324,24 +11354,19 @@ op {
       }
     }
   }
-  summary: "Computes natural logarithm of (1 + x) element-wise."
-  description: "I.e., \\\\(y = \\log_e (1 + x)\\\\)."
 }
 op {
   name: "LogMatrixDeterminant"
   input_arg {
     name: "input"
-    description: "Shape is `[N, M, M]`."
     type_attr: "T"
   }
   output_arg {
     name: "sign"
-    description: "The signs of the log determinants of the inputs. Shape is `[N]`."
     type_attr: "T"
   }
   output_arg {
     name: "log_abs_determinant"
-    description: "The logs of the absolute values of the determinants\nof the N input matrices.  Shape is `[N]`."
     type_attr: "T"
   }
   attr {
@@ -12356,19 +11381,15 @@ op {
       }
     }
   }
-  summary: "Computes the sign and the log of the absolute value of the determinant of"
-  description: "one or more square matrices.\n\nThe input is a tensor of shape `[N, M, M]` whose inner-most 2 dimensions\nform square matrices. The outputs are two tensors containing the signs and\nabsolute values of the log determinants for all N input submatrices\n`[..., :, :]` such that the determinant = sign*exp(log_abs_determinant).\nThe log_abs_determinant is computed as det(P)*sum(log(diag(LU))) where LU\nis the LU decomposition of the input and P is the corresponding\npermutation matrix."
 }
 op {
   name: "LogSoftmax"
   input_arg {
     name: "logits"
-    description: "2-D with shape `[batch_size, num_classes]`."
     type_attr: "T"
   }
   output_arg {
     name: "logsoftmax"
-    description: "Same shape as `logits`."
     type_attr: "T"
   }
   attr {
@@ -12377,59 +11398,50 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Computes log softmax activations."
-  description: "For each batch `i` and class `j` we have\n\n    logsoftmax[i, j] = logits[i, j] - log(sum(exp(logits[i])))"
 }
 op {
   name: "LogUniformCandidateSampler"
   input_arg {
     name: "true_classes"
-    description: "A batch_size * num_true matrix, in which each row contains the\nIDs of the num_true target_classes in the corresponding original label."
     type: DT_INT64
   }
   output_arg {
     name: "sampled_candidates"
-    description: "A vector of length num_sampled, in which each element is\nthe ID of a sampled candidate."
     type: DT_INT64
   }
   output_arg {
     name: "true_expected_count"
-    description: "A batch_size * num_true matrix, representing\nthe number of times each candidate is expected to occur in a batch\nof sampled candidates. If unique=true, then this is a probability."
     type: DT_FLOAT
   }
   output_arg {
     name: "sampled_expected_count"
-    description: "A vector of length num_sampled, for each sampled\ncandidate representing the number of times the candidate is expected\nto occur in a batch of sampled candidates.  If unique=true, then this is a\nprobability."
     type: DT_FLOAT
   }
   attr {
     name: "num_true"
     type: "int"
-    description: "Number of true labels per context."
     has_minimum: true
     minimum: 1
   }
   attr {
     name: "num_sampled"
     type: "int"
-    description: "Number of candidates to randomly sample."
     has_minimum: true
     minimum: 1
   }
   attr {
     name: "unique"
     type: "bool"
-    description: "If unique is true, we sample with rejection, so that all sampled\ncandidates in a batch are unique. This requires some approximation to\nestimate the post-rejection sampling probabilities."
   }
   attr {
     name: "range_max"
     type: "int"
-    description: "The sampler will sample integers from the interval [0, range_max)."
     has_minimum: true
     minimum: 1
   }
@@ -12439,7 +11451,6 @@ op {
     default_value {
       i: 0
     }
-    description: "If either seed or seed2 are set to be non-zero, the random number\ngenerator is seeded by the given seed.  Otherwise, it is seeded by a\nrandom seed."
   }
   attr {
     name: "seed2"
@@ -12447,10 +11458,7 @@ op {
     default_value {
       i: 0
     }
-    description: "An second seed to avoid seed collision."
   }
-  summary: "Generates labels for candidate sampling with a log-uniform distribution."
-  description: "See explanations of candidate sampling and the data formats at\ngo/candidate-sampling.\n\nFor each batch, this op picks a single set of sampled candidate labels.\n\nThe advantages of sampling candidates per-batch are simplicity and the\npossibility of efficient dense matrix multiplication. The disadvantage is that\nthe sampled candidates must be chosen independently of the context and of the\ntrue labels."
   is_stateful: true
 }
 op {
@@ -12467,8 +11475,6 @@ op {
     name: "z"
     type: DT_BOOL
   }
-  summary: "Returns the truth value of x AND y element-wise."
-  description: "*NOTE*: `LogicalAnd` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
   is_commutative: true
 }
 op {
@@ -12481,7 +11487,6 @@ op {
     name: "y"
     type: DT_BOOL
   }
-  summary: "Returns the truth value of NOT x element-wise."
 }
 op {
   name: "LogicalOr"
@@ -12497,26 +11502,21 @@ op {
     name: "z"
     type: DT_BOOL
   }
-  summary: "Returns the truth value of x OR y element-wise."
-  description: "*NOTE*: `LogicalOr` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
   is_commutative: true
 }
 op {
   name: "LookupTableExport"
   input_arg {
     name: "table_handle"
-    description: "Handle to the table."
     type: DT_STRING
     is_ref: true
   }
   output_arg {
     name: "keys"
-    description: "Vector of all keys present in the table."
     type_attr: "Tkeys"
   }
   output_arg {
     name: "values"
-    description: "Tensor of all values in the table. Indexed in parallel with `keys`."
     type_attr: "Tvalues"
   }
   attr {
@@ -12527,23 +11527,19 @@ op {
     name: "Tvalues"
     type: "type"
   }
-  summary: "Outputs all keys and values in the table."
 }
 op {
   name: "LookupTableExportV2"
   input_arg {
     name: "table_handle"
-    description: "Handle to the table."
     type: DT_RESOURCE
   }
   output_arg {
     name: "keys"
-    description: "Vector of all keys present in the table."
     type_attr: "Tkeys"
   }
   output_arg {
     name: "values"
-    description: "Tensor of all values in the table. Indexed in parallel with `keys`."
     type_attr: "Tvalues"
   }
   attr {
@@ -12554,20 +11550,17 @@ op {
     name: "Tvalues"
     type: "type"
   }
-  summary: "Outputs all keys and values in the table."
   is_stateful: true
 }
 op {
   name: "LookupTableFind"
   input_arg {
     name: "table_handle"
-    description: "Handle to the table."
     type: DT_STRING
     is_ref: true
   }
   input_arg {
     name: "keys"
-    description: "Any shape.  Keys to look up."
     type_attr: "Tin"
   }
   input_arg {
@@ -12576,7 +11569,6 @@ op {
   }
   output_arg {
     name: "values"
-    description: "Same shape as `keys`.  Values found in the table, or `default_values`\nfor missing keys."
     type_attr: "Tout"
   }
   attr {
@@ -12587,19 +11579,15 @@ op {
     name: "Tout"
     type: "type"
   }
-  summary: "Looks up keys in a table, outputs the corresponding values."
-  description: "The tensor `keys` must of the same type as the keys of the table.\nThe output `values` is of the type of the table values.\n\nThe scalar `default_value` is the value output for keys not present in the\ntable. It must also be of the same type as the table values."
 }
 op {
   name: "LookupTableFindV2"
   input_arg {
     name: "table_handle"
-    description: "Handle to the table."
     type: DT_RESOURCE
   }
   input_arg {
     name: "keys"
-    description: "Any shape.  Keys to look up."
     type_attr: "Tin"
   }
   input_arg {
@@ -12608,7 +11596,6 @@ op {
   }
   output_arg {
     name: "values"
-    description: "Same shape as `keys`.  Values found in the table, or `default_values`\nfor missing keys."
     type_attr: "Tout"
   }
   attr {
@@ -12619,26 +11606,21 @@ op {
     name: "Tout"
     type: "type"
   }
-  summary: "Looks up keys in a table, outputs the corresponding values."
-  description: "The tensor `keys` must of the same type as the keys of the table.\nThe output `values` is of the type of the table values.\n\nThe scalar `default_value` is the value output for keys not present in the\ntable. It must also be of the same type as the table values."
   is_stateful: true
 }
 op {
   name: "LookupTableImport"
   input_arg {
     name: "table_handle"
-    description: "Handle to the table."
     type: DT_STRING
     is_ref: true
   }
   input_arg {
     name: "keys"
-    description: "Any shape.  Keys to look up."
     type_attr: "Tin"
   }
   input_arg {
     name: "values"
-    description: "Values to associate with keys."
     type_attr: "Tout"
   }
   attr {
@@ -12649,24 +11631,19 @@ op {
     name: "Tout"
     type: "type"
   }
-  summary: "Replaces the contents of the table with the specified keys and values."
-  description: "The tensor `keys` must be of the same type as the keys of the table.\nThe tensor `values` must be of the type of the table values."
 }
 op {
   name: "LookupTableImportV2"
   input_arg {
     name: "table_handle"
-    description: "Handle to the table."
     type: DT_RESOURCE
   }
   input_arg {
     name: "keys"
-    description: "Any shape.  Keys to look up."
     type_attr: "Tin"
   }
   input_arg {
     name: "values"
-    description: "Values to associate with keys."
     type_attr: "Tout"
   }
   attr {
@@ -12677,26 +11654,21 @@ op {
     name: "Tout"
     type: "type"
   }
-  summary: "Replaces the contents of the table with the specified keys and values."
-  description: "The tensor `keys` must be of the same type as the keys of the table.\nThe tensor `values` must be of the type of the table values."
   is_stateful: true
 }
 op {
   name: "LookupTableInsert"
   input_arg {
     name: "table_handle"
-    description: "Handle to the table."
     type: DT_STRING
     is_ref: true
   }
   input_arg {
     name: "keys"
-    description: "Any shape.  Keys to look up."
     type_attr: "Tin"
   }
   input_arg {
     name: "values"
-    description: "Values to associate with keys."
     type_attr: "Tout"
   }
   attr {
@@ -12707,24 +11679,19 @@ op {
     name: "Tout"
     type: "type"
   }
-  summary: "Updates the table to associates keys with values."
-  description: "The tensor `keys` must be of the same type as the keys of the table.\nThe tensor `values` must be of the type of the table values."
 }
 op {
   name: "LookupTableInsertV2"
   input_arg {
     name: "table_handle"
-    description: "Handle to the table."
     type: DT_RESOURCE
   }
   input_arg {
     name: "keys"
-    description: "Any shape.  Keys to look up."
     type_attr: "Tin"
   }
   input_arg {
     name: "values"
-    description: "Values to associate with keys."
     type_attr: "Tout"
   }
   attr {
@@ -12735,54 +11702,42 @@ op {
     name: "Tout"
     type: "type"
   }
-  summary: "Updates the table to associates keys with values."
-  description: "The tensor `keys` must be of the same type as the keys of the table.\nThe tensor `values` must be of the type of the table values."
   is_stateful: true
 }
 op {
   name: "LookupTableSize"
   input_arg {
     name: "table_handle"
-    description: "Handle to the table."
     type: DT_STRING
     is_ref: true
   }
   output_arg {
     name: "size"
-    description: "Scalar that contains number of elements in the table."
     type: DT_INT64
   }
-  summary: "Computes the number of elements in the given table."
 }
 op {
   name: "LookupTableSizeV2"
   input_arg {
     name: "table_handle"
-    description: "Handle to the table."
     type: DT_RESOURCE
   }
   output_arg {
     name: "size"
-    description: "Scalar that contains number of elements in the table."
     type: DT_INT64
   }
-  summary: "Computes the number of elements in the given table."
   is_stateful: true
 }
 op {
   name: "LoopCond"
   input_arg {
     name: "input"
-    description: "A boolean scalar, representing the branch predicate of the Switch op."
     type: DT_BOOL
   }
   output_arg {
     name: "output"
-    description: "The same tensor as `input`."
     type: DT_BOOL
   }
-  summary: "Forwards the input to the output."
-  description: "This operator represents the loop termination condition used by the\n\"pivot\" switches of a loop."
 }
 op {
   name: "MakeIterator"
@@ -12794,8 +11749,6 @@ op {
     name: "iterator"
     type: DT_RESOURCE
   }
-  summary: "Makes a new iterator from the given `dataset` and stores it in `iterator`."
-  description: "This operation may be executed multiple times. Each execution will reset the\niterator in `iterator` to the first element of `dataset`."
   is_stateful: true
 }
 op {
@@ -12810,12 +11763,10 @@ op {
   }
   input_arg {
     name: "batch_size"
-    description: "A scalar representing the number of elements to accumulate in a\nbatch. It determines the number of concurrent invocations of `f` that process\nelements from `input_dataset` in parallel."
     type: DT_INT64
   }
   input_arg {
     name: "num_parallel_batches"
-    description: "A scalar representing the number of batches to create in\nparallel. Processing multiple batches in parallel benefits workloads prone to\nstragglers."
     type: DT_INT64
   }
   output_arg {
@@ -12843,8 +11794,6 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Creates a dataset that applies `f` to the outputs of `input_dataset` and then"
-  description: "batches `batch_size` of them.\n\nUnlike a \"MapDataset\", which applies `f` sequentially, this dataset invokes up\nto `batch_size * num_parallel_batches` copies of `f` in parallel."
 }
 op {
   name: "MapClear"
@@ -12882,7 +11831,6 @@ op {
       s: ""
     }
   }
-  summary: "Op removes all elements in the underlying container."
   is_stateful: true
 }
 op {
@@ -12920,7 +11868,6 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Creates a dataset that applies `f` to the outputs of `input_dataset`."
 }
 op {
   name: "MapIncompleteSize"
@@ -12962,7 +11909,6 @@ op {
       s: ""
     }
   }
-  summary: "Op returns the number of incomplete elements in the underlying container."
   is_stateful: true
 }
 op {
@@ -13015,8 +11961,6 @@ op {
       s: ""
     }
   }
-  summary: "Op peeks at the values at the specified key.  If the"
-  description: "underlying container does not contain this key\nthis op will block until it does."
   is_stateful: true
 }
 op {
@@ -13059,14 +12003,12 @@ op {
       s: ""
     }
   }
-  summary: "Op returns the number of elements in the underlying container."
   is_stateful: true
 }
 op {
   name: "MapStage"
   input_arg {
     name: "key"
-    description: "int64"
     type: DT_INT64
   }
   input_arg {
@@ -13075,7 +12017,6 @@ op {
   }
   input_arg {
     name: "values"
-    description: "a list of tensors\ndtypes A list of data types that inserted values should adhere to."
     type_list_attr: "fake_dtypes"
   }
   attr {
@@ -13084,7 +12025,6 @@ op {
     default_value {
       i: 0
     }
-    description: "Maximum number of elements in the Staging Area. If > 0, inserts\non the container will block when the capacity is reached."
     has_minimum: true
   }
   attr {
@@ -13111,7 +12051,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this queue is placed in the given container. Otherwise,\na default container is used."
   }
   attr {
     name: "shared_name"
@@ -13119,9 +12058,7 @@ op {
     default_value {
       s: ""
     }
-    description: "It is necessary to match this name to the matching Unstage Op."
   }
-  summary: "Stage (key, values) in the underlying container which behaves like a hashtable."
   is_stateful: true
 }
 op {
@@ -13174,8 +12111,6 @@ op {
       s: ""
     }
   }
-  summary: "Op removes and returns the values associated with the key"
-  description: "from the underlying container.   If the underlying container\ndoes not contain this key, the op will block until it does."
   is_stateful: true
 }
 op {
@@ -13228,8 +12163,6 @@ op {
       s: ""
     }
   }
-  summary: "Op removes and returns a random (key, value)"
-  description: "from the underlying container.   If the underlying container\ndoes not contain elements, the op will block until it does."
   is_stateful: true
 }
 op {
@@ -13252,7 +12185,6 @@ op {
     default_value {
       b: false
     }
-    description: "If true, \"a\" is transposed before multiplication."
   }
   attr {
     name: "transpose_b"
@@ -13260,7 +12192,6 @@ op {
     default_value {
       b: false
     }
-    description: "If true, \"b\" is transposed before multiplication."
   }
   attr {
     name: "T"
@@ -13268,6 +12199,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -13276,63 +12208,49 @@ op {
       }
     }
   }
-  summary: "Multiply the matrix \"a\" by the matrix \"b\"."
-  description: "The inputs must be two-dimensional matrices and the inner dimension of\n\"a\" (after being transposed if transpose_a is true) must match the\nouter dimension of \"b\" (after being transposed if transposed_b is\ntrue).\n\n*Note*: The default kernel implementation for MatMul on GPUs uses\ncublas."
 }
 op {
   name: "MatchingFiles"
   input_arg {
     name: "pattern"
-    description: "Shell wildcard pattern(s). Scalar or vector of type string."
     type: DT_STRING
   }
   output_arg {
     name: "filenames"
-    description: "A vector of matching filenames."
     type: DT_STRING
   }
-  summary: "Returns the set of files matching one or more glob patterns."
-  description: "Note that this routine only supports wildcard characters in the\nbasename portion of the pattern, not in the directory portion."
 }
 op {
   name: "MatrixBandPart"
   input_arg {
     name: "input"
-    description: "Rank `k` tensor."
     type_attr: "T"
   }
   input_arg {
     name: "num_lower"
-    description: "0-D tensor. Number of subdiagonals to keep. If negative, keep entire\nlower triangle."
     type: DT_INT64
   }
   input_arg {
     name: "num_upper"
-    description: "0-D tensor. Number of superdiagonals to keep. If negative, keep\nentire upper triangle."
     type: DT_INT64
   }
   output_arg {
     name: "band"
-    description: "Rank `k` tensor of the same shape as input. The extracted banded tensor."
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
   }
-  summary: "Copy a tensor setting everything outside a central band in each innermost matrix"
-  description: "to zero.\n\nThe `band` part is computed as follows:\nAssume `input` has `k` dimensions `[I, J, K, ..., M, N]`, then the output is a\ntensor with the same shape where\n\n`band[i, j, k, ..., m, n] = in_band(m, n) * input[i, j, k, ..., m, n]`.\n\nThe indicator function\n\n`in_band(m, n) = (num_lower < 0 || (m-n) <= num_lower)) &&\n                 (num_upper < 0 || (n-m) <= num_upper)`.\n\nFor example:\n\n```\n# if \'input\' is [[ 0,  1,  2, 3]\n                 [-1,  0,  1, 2]\n                 [-2, -1,  0, 1]\n                 [-3, -2, -1, 0]],\n\ntf.matrix_band_part(input, 1, -1) ==> [[ 0,  1,  2, 3]\n                                       [-1,  0,  1, 2]\n                                       [ 0, -1,  0, 1]\n                                       [ 0,  0, -1, 0]],\n\ntf.matrix_band_part(input, 2, 1) ==> [[ 0,  1,  0, 0]\n                                      [-1,  0,  1, 0]\n                                      [-2, -1,  0, 1]\n                                      [ 0, -2, -1, 0]]\n```\n\nUseful special cases:\n\n```\n tf.matrix_band_part(input, 0, -1) ==> Upper triangular part.\n tf.matrix_band_part(input, -1, 0) ==> Lower triangular part.\n tf.matrix_band_part(input, 0, 0) ==> Diagonal.\n```"
 }
 op {
   name: "MatrixDeterminant"
   input_arg {
     name: "input"
-    description: "Shape is `[..., M, M]`."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "Shape is `[...]`."
     type_attr: "T"
   }
   attr {
@@ -13347,57 +12265,45 @@ op {
       }
     }
   }
-  summary: "Computes the determinant of one or more square matrices."
-  description: "The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions\nform square matrices. The output is a tensor containing the determinants\nfor all input submatrices `[..., :, :]`."
 }
 op {
   name: "MatrixDiag"
   input_arg {
     name: "diagonal"
-    description: "Rank `k`, where `k >= 1`."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "Rank `k+1`, with `output.shape = diagonal.shape + [diagonal.shape[-1]]`."
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
   }
-  summary: "Returns a batched diagonal tensor with a given batched diagonal values."
-  description: "Given a `diagonal`, this operation returns a tensor with the `diagonal` and\neverything else padded with zeros. The diagonal is computed as follows:\n\nAssume `diagonal` has `k` dimensions `[I, J, K, ..., N]`, then the output is a\ntensor of rank `k+1` with dimensions [I, J, K, ..., N, N]` where:\n\n`output[i, j, k, ..., m, n] = 1{m=n} * diagonal[i, j, k, ..., n]`.\n\nFor example:\n\n```\n# \'diagonal\' is [[1, 2, 3, 4], [5, 6, 7, 8]]\n\nand diagonal.shape = (2, 4)\n\ntf.matrix_diag(diagonal) ==> [[[1, 0, 0, 0]\n                                     [0, 2, 0, 0]\n                                     [0, 0, 3, 0]\n                                     [0, 0, 0, 4]],\n                                    [[5, 0, 0, 0]\n                                     [0, 6, 0, 0]\n                                     [0, 0, 7, 0]\n                                     [0, 0, 0, 8]]]\n\nwhich has shape (2, 4, 4)\n```"
 }
 op {
   name: "MatrixDiagPart"
   input_arg {
     name: "input"
-    description: "Rank `k` tensor where `k >= 2`."
     type_attr: "T"
   }
   output_arg {
     name: "diagonal"
-    description: "The extracted diagonal(s) having shape\n`diagonal.shape = input.shape[:-2] + [min(input.shape[-2:])]`."
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
   }
-  summary: "Returns the batched diagonal part of a batched tensor."
-  description: "This operation returns a tensor with the `diagonal` part\nof the batched `input`. The `diagonal` part is computed as follows:\n\nAssume `input` has `k` dimensions `[I, J, K, ..., M, N]`, then the output is a\ntensor of rank `k - 1` with dimensions `[I, J, K, ..., min(M, N)]` where:\n\n`diagonal[i, j, k, ..., n] = input[i, j, k, ..., n, n]`.\n\nThe input must be at least a matrix.\n\nFor example:\n\n```\n# \'input\' is [[[1, 0, 0, 0]\n               [0, 2, 0, 0]\n               [0, 0, 3, 0]\n               [0, 0, 0, 4]],\n              [[5, 0, 0, 0]\n               [0, 6, 0, 0]\n               [0, 0, 7, 0]\n               [0, 0, 0, 8]]]\n\nand input.shape = (2, 4, 4)\n\ntf.matrix_diag_part(input) ==> [[1, 2, 3, 4], [5, 6, 7, 8]]\n\nwhich has shape (2, 4)\n```"
 }
 op {
   name: "MatrixExponential"
   input_arg {
     name: "input"
-    description: "Shape is `[..., M, M]`."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "Shape is `[..., M, M]`.\n\n@compatibility(scipy)\nEquivalent to scipy.linalg.expm\n@end_compatibility"
     type_attr: "T"
   }
   attr {
@@ -13412,19 +12318,15 @@ op {
       }
     }
   }
-  summary: "Computes the matrix exponential of one or more square matrices:"
-  description: "exp(A) = \\sum_{n=0}^\\infty A^n/n!\n\nThe exponential is computed using a combination of the scaling and squaring\nmethod and the Pade approximation. Details can be founds in:\nNicholas J. Higham, \"The scaling and squaring method for the matrix exponential\nrevisited,\" SIAM J. Matrix Anal. Applic., 26:1179-1193, 2005.\n\nThe input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions\nform square matrices. The output is a tensor of the same shape as the input\ncontaining the exponential for all input submatrices `[..., :, :]`."
 }
 op {
   name: "MatrixInverse"
   input_arg {
     name: "input"
-    description: "Shape is `[..., M, M]`."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "Shape is `[..., M, M]`.\n\n@compatibility(numpy)\nEquivalent to np.linalg.inv\n@end_compatibility"
     type_attr: "T"
   }
   attr {
@@ -13446,48 +12348,59 @@ op {
       }
     }
   }
-  summary: "Computes the inverse of one or more square invertible matrices or their"
-  description: "adjoints (conjugate transposes).\n\nThe input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions\nform square matrices. The output is a tensor of the same shape as the input\ncontaining the inverse for all input submatrices `[..., :, :]`.\n\nThe op uses LU decomposition with partial pivoting to compute the inverses.\n\nIf a matrix is not invertible there is no guarantee what the op does. It\nmay detect the condition and raise an exception or it may simply return a\ngarbage result."
+}
+op {
+  name: "MatrixLogarithm"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
 }
 op {
   name: "MatrixSetDiag"
   input_arg {
     name: "input"
-    description: "Rank `k+1`, where `k >= 1`."
     type_attr: "T"
   }
   input_arg {
     name: "diagonal"
-    description: "Rank `k`, where `k >= 1`."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "Rank `k+1`, with `output.shape = input.shape`."
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
   }
-  summary: "Returns a batched matrix tensor with new batched diagonal values."
-  description: "Given `input` and `diagonal`, this operation returns a tensor with the\nsame shape and values as `input`, except for the main diagonal of the\ninnermost matrices.  These will be overwritten by the values in `diagonal`.\n\nThe output is computed as follows:\n\nAssume `input` has `k+1` dimensions `[I, J, K, ..., M, N]` and `diagonal` has\n`k` dimensions `[I, J, K, ..., min(M, N)]`.  Then the output is a\ntensor of rank `k+1` with dimensions `[I, J, K, ..., M, N]` where:\n\n  * `output[i, j, k, ..., m, n] = diagonal[i, j, k, ..., n]` for `m == n`.\n  * `output[i, j, k, ..., m, n] = input[i, j, k, ..., m, n]` for `m != n`."
 }
 op {
   name: "MatrixSolve"
   input_arg {
     name: "matrix"
-    description: "Shape is `[..., M, M]`."
     type_attr: "T"
   }
   input_arg {
     name: "rhs"
-    description: "Shape is `[..., M, K]`."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "Shape is `[..., M, K]`."
     type_attr: "T"
   }
   attr {
@@ -13496,7 +12409,6 @@ op {
     default_value {
       b: false
     }
-    description: "Boolean indicating whether to solve with `matrix` or its (block-wise)\nadjoint."
   }
   attr {
     name: "T"
@@ -13510,29 +12422,23 @@ op {
       }
     }
   }
-  summary: "Solves systems of linear equations."
-  description: "`Matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions\nform square matrices. `Rhs` is a tensor of shape `[..., M, K]`. The `output` is\na tensor shape `[..., M, K]`.  If `adjoint` is `False` then each output matrix\nsatisfies `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`.\nIf `adjoint` is `True` then each output matrix satisfies\n`adjoint(matrix[..., :, :]) * output[..., :, :] = rhs[..., :, :]`."
 }
 op {
   name: "MatrixSolveLs"
   input_arg {
     name: "matrix"
-    description: "Shape is `[..., M, N]`."
     type_attr: "T"
   }
   input_arg {
     name: "rhs"
-    description: "Shape is `[..., M, K]`."
     type_attr: "T"
   }
   input_arg {
     name: "l2_regularizer"
-    description: "Scalar tensor.\n\n@compatibility(numpy)\nEquivalent to np.linalg.lstsq\n@end_compatibility"
     type: DT_DOUBLE
   }
   output_arg {
     name: "output"
-    description: "Shape is `[..., N, K]`."
     type_attr: "T"
   }
   attr {
@@ -13554,24 +12460,19 @@ op {
       b: true
     }
   }
-  summary: "Solves one or more linear least-squares problems."
-  description: "`matrix` is a tensor of shape `[..., M, N]` whose inner-most 2 dimensions\nform real or complex matrices of size `[M, N]`. `Rhs` is a tensor of the same\ntype as `matrix` and shape `[..., M, K]`.\nThe output is a tensor shape `[..., N, K]` where each output matrix solves\neach of the equations\n`matrix[..., :, :]` * `output[..., :, :]` = `rhs[..., :, :]`\nin the least squares sense.\n\nWe use the following notation for (complex) matrix and right-hand sides\nin the batch:\n\n`matrix`=\\\\(A \\in \\mathbb{C}^{m \\times n}\\\\),\n`rhs`=\\\\(B  \\in \\mathbb{C}^{m \\times k}\\\\),\n`output`=\\\\(X  \\in \\mathbb{C}^{n \\times k}\\\\),\n`l2_regularizer`=\\\\(\\lambda \\in \\mathbb{R}\\\\).\n\nIf `fast` is `True`, then the solution is computed by solving the normal\nequations using Cholesky decomposition. Specifically, if \\\\(m \\ge n\\\\) then\n\\\\(X = (A^H A + \\lambda I)^{-1} A^H B\\\\), which solves the least-squares\nproblem \\\\(X = \\mathrm{argmin}_{Z \\in \\Re^{n \\times k} } ||A Z - B||_F^2 +\n\\lambda ||Z||_F^2\\\\). If \\\\(m \\lt n\\\\) then `output` is computed as\n\\\\(X = A^H (A A^H + \\lambda I)^{-1} B\\\\), which (for \\\\(\\lambda = 0\\\\)) is the\nminimum-norm solution to the under-determined linear system, i.e.\n\\\\(X = \\mathrm{argmin}_{Z \\in \\mathbb{C}^{n \\times k} } ||Z||_F^2 \\\\),\nsubject to \\\\(A Z = B\\\\). Notice that the fast path is only numerically stable\nwhen \\\\(A\\\\) is numerically full rank and has a condition number\n\\\\(\\mathrm{cond}(A) \\lt \\frac{1}{\\sqrt{\\epsilon_{mach} } }\\\\) or\\\\(\\lambda\\\\) is\nsufficiently large.\n\nIf `fast` is `False` an algorithm based on the numerically robust complete\northogonal decomposition is used. This computes the minimum-norm\nleast-squares solution, even when \\\\(A\\\\) is rank deficient. This path is\ntypically 6-7 times slower than the fast path. If `fast` is `False` then\n`l2_regularizer` is ignored."
 }
 op {
   name: "MatrixTriangularSolve"
   input_arg {
     name: "matrix"
-    description: "Shape is `[..., M, M]`."
     type_attr: "T"
   }
   input_arg {
     name: "rhs"
-    description: "Shape is `[..., M, K]`."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "Shape is `[..., M, K]`."
     type_attr: "T"
   }
   attr {
@@ -13580,7 +12481,6 @@ op {
     default_value {
       b: true
     }
-    description: "Boolean indicating whether the innermost matrices in `matrix` are\nlower or upper triangular."
   }
   attr {
     name: "adjoint"
@@ -13588,7 +12488,6 @@ op {
     default_value {
       b: false
     }
-    description: "Boolean indicating whether to solve with `matrix` or its (block-wise)\n         adjoint.\n\n@compatibility(numpy)\nEquivalent to np.linalg.triangular_solve\n@end_compatibility"
   }
   attr {
     name: "T"
@@ -13602,24 +12501,19 @@ op {
       }
     }
   }
-  summary: "Solves systems of linear equations with upper or lower triangular matrices by"
-  description: "backsubstitution.\n\n`matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions form\nsquare matrices. If `lower` is `True` then the strictly upper triangular part\nof each inner-most matrix is assumed to be zero and not accessed.\nIf `lower` is False then the strictly lower triangular part of each inner-most\nmatrix is assumed to be zero and not accessed.\n`rhs` is a tensor of shape `[..., M, K]`.\n\nThe output is a tensor of shape `[..., M, K]`. If `adjoint` is\n`True` then the innermost matrices in `output` satisfy matrix equations\n`matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`.\nIf `adjoint` is `False` then the strictly then the  innermost matrices in\n`output` satisfy matrix equations\n`adjoint(matrix[..., i, k]) * output[..., k, j] = rhs[..., i, j]`."
 }
 op {
   name: "Max"
   input_arg {
     name: "input"
-    description: "The tensor to reduce."
     type_attr: "T"
   }
   input_arg {
     name: "reduction_indices"
-    description: "The dimensions to reduce. Must be in the range\n`[-rank(input), rank(input))`."
     type_attr: "Tidx"
   }
   output_arg {
     name: "output"
-    description: "The reduced tensor."
     type_attr: "T"
   }
   attr {
@@ -13628,7 +12522,6 @@ op {
     default_value {
       b: false
     }
-    description: "If true, retain reduced dimensions with length 1."
   }
   attr {
     name: "T"
@@ -13637,17 +12530,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -13667,19 +12561,15 @@ op {
       }
     }
   }
-  summary: "Computes the maximum of elements across dimensions of a tensor."
-  description: "Reduces `input` along the dimensions given in `reduction_indices`. Unless\n`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in\n`reduction_indices`. If `keep_dims` is true, the reduced dimensions are\nretained with length 1."
 }
 op {
   name: "MaxPool"
   input_arg {
     name: "input"
-    description: "4-D input to pool over."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "The max pooled output tensor."
     type_attr: "T"
   }
   attr {
@@ -13690,6 +12580,8 @@ op {
     }
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -13698,7 +12590,6 @@ op {
         type: DT_INT16
         type: DT_INT8
         type: DT_UINT16
-        type: DT_HALF
         type: DT_QINT8
       }
     }
@@ -13706,21 +12597,18 @@ op {
   attr {
     name: "ksize"
     type: "list(int)"
-    description: "The size of the window for each dimension of the input tensor."
     has_minimum: true
     minimum: 4
   }
   attr {
     name: "strides"
     type: "list(int)"
-    description: "The stride of the sliding window for each dimension of the\ninput tensor."
     has_minimum: true
     minimum: 4
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -13734,7 +12622,6 @@ op {
     default_value {
       s: "NHWC"
     }
-    description: "Specify the data format of the input and output data. With the\ndefault format \"NHWC\", the data is stored in the order of:\n    [batch, in_height, in_width, in_channels].\nAlternatively, the format could be \"NCHW\", the data storage order of:\n    [batch, in_channels, in_height, in_width]."
     allowed_values {
       list {
         s: "NHWC"
@@ -13743,38 +12630,32 @@ op {
       }
     }
   }
-  summary: "Performs max pooling on the input."
 }
 op {
   name: "MaxPool3D"
   input_arg {
     name: "input"
-    description: "Shape `[batch, depth, rows, cols, channels]` tensor to pool over."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "The max pooled output tensor."
     type_attr: "T"
   }
   attr {
     name: "ksize"
     type: "list(int)"
-    description: "1-D tensor of length 5. The size of the window for each dimension of\nthe input tensor. Must have `ksize[0] = ksize[4] = 1`."
     has_minimum: true
     minimum: 5
   }
   attr {
     name: "strides"
     type: "list(int)"
-    description: "1-D tensor of length 5. The stride of the sliding window for each\ndimension of `input`. Must have `strides[0] = strides[4] = 1`."
     has_minimum: true
     minimum: 5
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -13788,7 +12669,6 @@ op {
     default_value {
       s: "NDHWC"
     }
-    description: "The data format of the input and output data. With the\ndefault format \"NDHWC\", the data is stored in the order of:\n    [batch, in_depth, in_height, in_width, in_channels].\nAlternatively, the format could be \"NCDHW\", the data storage order is:\n    [batch, in_channels, in_depth, in_height, in_width]."
     allowed_values {
       list {
         s: "NDHWC"
@@ -13801,27 +12681,24 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
       }
     }
   }
-  summary: "Performs 3D max pooling on the input."
 }
 op {
   name: "MaxPool3DGrad"
   input_arg {
     name: "orig_input"
-    description: "The original input tensor."
     type_attr: "TInput"
   }
   input_arg {
     name: "orig_output"
-    description: "The original output tensor."
     type_attr: "TInput"
   }
   input_arg {
     name: "grad"
-    description: "Output backprop of shape `[batch, depth, rows, cols, channels]`."
     type_attr: "T"
   }
   output_arg {
@@ -13831,21 +12708,18 @@ op {
   attr {
     name: "ksize"
     type: "list(int)"
-    description: "1-D tensor of length 5. The size of the window for each dimension of\nthe input tensor. Must have `ksize[0] = ksize[4] = 1`."
     has_minimum: true
     minimum: 5
   }
   attr {
     name: "strides"
     type: "list(int)"
-    description: "1-D tensor of length 5. The stride of the sliding window for each\ndimension of `input`. Must have `strides[0] = strides[4] = 1`."
     has_minimum: true
     minimum: 5
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -13859,7 +12733,6 @@ op {
     default_value {
       s: "NDHWC"
     }
-    description: "The data format of the input and output data. With the\ndefault format \"NDHWC\", the data is stored in the order of:\n    [batch, in_depth, in_height, in_width, in_channels].\nAlternatively, the format could be \"NCDHW\", the data storage order is:\n    [batch, in_channels, in_depth, in_height, in_width]."
     allowed_values {
       list {
         s: "NDHWC"
@@ -13875,6 +12748,7 @@ op {
     }
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
       }
     }
@@ -13887,52 +12761,45 @@ op {
     }
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
       }
     }
   }
-  summary: "Computes gradients of max pooling function."
 }
 op {
   name: "MaxPool3DGradGrad"
   input_arg {
     name: "orig_input"
-    description: "The original input tensor."
     type_attr: "T"
   }
   input_arg {
     name: "orig_output"
-    description: "The original output tensor."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "Output backprop of shape `[batch, depth, rows, cols, channels]`."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "Gradients of gradients w.r.t. the input to `max_pool`."
     type_attr: "T"
   }
   attr {
     name: "ksize"
     type: "list(int)"
-    description: "1-D tensor of length 5. The size of the window for each dimension of\nthe input tensor. Must have `ksize[0] = ksize[4] = 1`."
     has_minimum: true
     minimum: 5
   }
   attr {
     name: "strides"
     type: "list(int)"
-    description: "1-D tensor of length 5. The stride of the sliding window for each\ndimension of `input`. Must have `strides[0] = strides[4] = 1`."
     has_minimum: true
     minimum: 5
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -13946,7 +12813,6 @@ op {
     default_value {
       s: "NDHWC"
     }
-    description: "The data format of the input and output data. With the\ndefault format \"NDHWC\", the data is stored in the order of:\n    [batch, in_depth, in_height, in_width, in_channels].\nAlternatively, the format could be \"NCDHW\", the data storage order is:\n    [batch, in_channels, in_depth, in_height, in_width]."
     allowed_values {
       list {
         s: "NDHWC"
@@ -13963,48 +12829,40 @@ op {
       }
     }
   }
-  summary: "Computes second-order gradients of the maxpooling function."
 }
 op {
   name: "MaxPoolGrad"
   input_arg {
     name: "orig_input"
-    description: "The original input tensor."
     type_attr: "T"
   }
   input_arg {
     name: "orig_output"
-    description: "The original output tensor."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "4-D.  Gradients w.r.t. the output of `max_pool`."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "Gradients w.r.t. the input to `max_pool`."
     type_attr: "T"
   }
   attr {
     name: "ksize"
     type: "list(int)"
-    description: "The size of the window for each dimension of the input tensor."
     has_minimum: true
     minimum: 4
   }
   attr {
     name: "strides"
     type: "list(int)"
-    description: "The stride of the sliding window for each dimension of the\ninput tensor."
     has_minimum: true
     minimum: 4
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -14018,7 +12876,6 @@ op {
     default_value {
       s: "NHWC"
     }
-    description: "Specify the data format of the input and output data. With the\ndefault format \"NHWC\", the data is stored in the order of:\n    [batch, in_height, in_width, in_channels].\nAlternatively, the format could be \"NCHW\", the data storage order of:\n    [batch, in_channels, in_height, in_width]."
     allowed_values {
       list {
         s: "NHWC"
@@ -14037,10 +12894,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -14048,48 +12906,40 @@ op {
       }
     }
   }
-  summary: "Computes gradients of the maxpooling function."
 }
 op {
   name: "MaxPoolGradGrad"
   input_arg {
     name: "orig_input"
-    description: "The original input tensor."
     type_attr: "T"
   }
   input_arg {
     name: "orig_output"
-    description: "The original output tensor."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "4-D.  Gradients of gradients w.r.t. the input of `max_pool`."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "Gradients of gradients w.r.t. the input to `max_pool`."
     type_attr: "T"
   }
   attr {
     name: "ksize"
     type: "list(int)"
-    description: "The size of the window for each dimension of the input tensor."
     has_minimum: true
     minimum: 4
   }
   attr {
     name: "strides"
     type: "list(int)"
-    description: "The stride of the sliding window for each dimension of the\ninput tensor."
     has_minimum: true
     minimum: 4
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -14103,7 +12953,6 @@ op {
     default_value {
       s: "NHWC"
     }
-    description: "Specify the data format of the input and output data. With the\ndefault format \"NHWC\", the data is stored in the order of:\n    [batch, in_height, in_width, in_channels].\nAlternatively, the format could be \"NCHW\", the data storage order of:\n    [batch, in_channels, in_height, in_width]."
     allowed_values {
       list {
         s: "NHWC"
@@ -14119,10 +12968,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -14130,44 +12980,36 @@ op {
       }
     }
   }
-  summary: "Computes second-order gradients of the maxpooling function."
 }
 op {
   name: "MaxPoolGradGradV2"
   input_arg {
     name: "orig_input"
-    description: "The original input tensor."
     type_attr: "T"
   }
   input_arg {
     name: "orig_output"
-    description: "The original output tensor."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "4-D.  Gradients of gradients w.r.t. the input of `max_pool`."
     type_attr: "T"
   }
   input_arg {
     name: "ksize"
-    description: "The size of the window for each dimension of the input tensor."
     type: DT_INT32
   }
   input_arg {
     name: "strides"
-    description: "The stride of the sliding window for each dimension of the\ninput tensor."
     type: DT_INT32
   }
   output_arg {
     name: "output"
-    description: "Gradients of gradients w.r.t. the input to `max_pool`."
     type_attr: "T"
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -14181,7 +13023,6 @@ op {
     default_value {
       s: "NHWC"
     }
-    description: "Specify the data format of the input and output data. With the\ndefault format \"NHWC\", the data is stored in the order of:\n    [batch, in_height, in_width, in_channels].\nAlternatively, the format could be \"NCHW\", the data storage order of:\n    [batch, in_channels, in_height, in_width]."
     allowed_values {
       list {
         s: "NHWC"
@@ -14197,10 +13038,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -14208,48 +13050,40 @@ op {
       }
     }
   }
-  summary: "Computes second-order gradients of the maxpooling function."
 }
 op {
   name: "MaxPoolGradGradWithArgmax"
   input_arg {
     name: "input"
-    description: "The original input."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "4-D with shape `[batch, height, width, channels]`.  Gradients w.r.t. the\ninput of `max_pool`."
     type_attr: "T"
   }
   input_arg {
     name: "argmax"
-    description: "The indices of the maximum values chosen for each output of `max_pool`."
     type_attr: "Targmax"
   }
   output_arg {
     name: "output"
-    description: "Gradients of gradients w.r.t. the input of `max_pool`."
     type_attr: "T"
   }
   attr {
     name: "ksize"
     type: "list(int)"
-    description: "The size of the window for each dimension of the input tensor."
     has_minimum: true
     minimum: 4
   }
   attr {
     name: "strides"
     type: "list(int)"
-    description: "The stride of the sliding window for each dimension of the\ninput tensor."
     has_minimum: true
     minimum: 4
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -14275,10 +13109,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -14286,44 +13121,36 @@ op {
       }
     }
   }
-  summary: "Computes second-order gradients of the maxpooling function."
 }
 op {
   name: "MaxPoolGradV2"
   input_arg {
     name: "orig_input"
-    description: "The original input tensor."
     type_attr: "T"
   }
   input_arg {
     name: "orig_output"
-    description: "The original output tensor."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "4-D.  Gradients w.r.t. the output of `max_pool`."
     type_attr: "T"
   }
   input_arg {
     name: "ksize"
-    description: "The size of the window for each dimension of the input tensor."
     type: DT_INT32
   }
   input_arg {
     name: "strides"
-    description: "The stride of the sliding window for each dimension of the\ninput tensor."
     type: DT_INT32
   }
   output_arg {
     name: "output"
-    description: "Gradients w.r.t. the input to `max_pool`."
     type_attr: "T"
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -14337,7 +13164,6 @@ op {
     default_value {
       s: "NHWC"
     }
-    description: "Specify the data format of the input and output data. With the\ndefault format \"NHWC\", the data is stored in the order of:\n    [batch, in_height, in_width, in_channels].\nAlternatively, the format could be \"NCHW\", the data storage order of:\n    [batch, in_channels, in_height, in_width]."
     allowed_values {
       list {
         s: "NHWC"
@@ -14356,10 +13182,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -14367,48 +13194,40 @@ op {
       }
     }
   }
-  summary: "Computes gradients of the maxpooling function."
 }
 op {
   name: "MaxPoolGradWithArgmax"
   input_arg {
     name: "input"
-    description: "The original input."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "4-D with shape `[batch, height, width, channels]`.  Gradients w.r.t. the\noutput of `max_pool`."
     type_attr: "T"
   }
   input_arg {
     name: "argmax"
-    description: "The indices of the maximum values chosen for each output of `max_pool`."
     type_attr: "Targmax"
   }
   output_arg {
     name: "output"
-    description: "Gradients w.r.t. the input of `max_pool`."
     type_attr: "T"
   }
   attr {
     name: "ksize"
     type: "list(int)"
-    description: "The size of the window for each dimension of the input tensor."
     has_minimum: true
     minimum: 4
   }
   attr {
     name: "strides"
     type: "list(int)"
-    description: "The stride of the sliding window for each dimension of the\ninput tensor."
     has_minimum: true
     minimum: 4
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -14434,10 +13253,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -14445,28 +13265,23 @@ op {
       }
     }
   }
-  summary: "Computes gradients of the maxpooling function."
 }
 op {
   name: "MaxPoolV2"
   input_arg {
     name: "input"
-    description: "4-D input to pool over."
     type_attr: "T"
   }
   input_arg {
     name: "ksize"
-    description: "The size of the window for each dimension of the input tensor."
     type: DT_INT32
   }
   input_arg {
     name: "strides"
-    description: "The stride of the sliding window for each dimension of the\ninput tensor."
     type: DT_INT32
   }
   output_arg {
     name: "output"
-    description: "The max pooled output tensor."
     type_attr: "T"
   }
   attr {
@@ -14477,6 +13292,8 @@ op {
     }
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -14485,7 +13302,6 @@ op {
         type: DT_INT16
         type: DT_INT8
         type: DT_UINT16
-        type: DT_HALF
         type: DT_QINT8
       }
     }
@@ -14493,7 +13309,6 @@ op {
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -14507,7 +13322,6 @@ op {
     default_value {
       s: "NHWC"
     }
-    description: "Specify the data format of the input and output data. With the\ndefault format \"NHWC\", the data is stored in the order of:\n    [batch, in_height, in_width, in_channels].\nAlternatively, the format could be \"NCHW\", the data storage order of:\n    [batch, in_channels, in_height, in_width]."
     allowed_values {
       list {
         s: "NHWC"
@@ -14516,36 +13330,30 @@ op {
       }
     }
   }
-  summary: "Performs max pooling on the input."
 }
 op {
   name: "MaxPoolWithArgmax"
   input_arg {
     name: "input"
-    description: "4-D with shape `[batch, height, width, channels]`.  Input to pool over."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "The max pooled output tensor."
     type_attr: "T"
   }
   output_arg {
     name: "argmax"
-    description: "4-D.  The flattened indices of the max values chosen for each output."
     type_attr: "Targmax"
   }
   attr {
     name: "ksize"
     type: "list(int)"
-    description: "The size of the window for each dimension of the input tensor."
     has_minimum: true
     minimum: 4
   }
   attr {
     name: "strides"
     type: "list(int)"
-    description: "The stride of the sliding window for each dimension of the\ninput tensor."
     has_minimum: true
     minimum: 4
   }
@@ -14565,7 +13373,6 @@ op {
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -14581,10 +13388,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -14592,8 +13400,6 @@ op {
       }
     }
   }
-  summary: "Performs max pooling on the input and outputs both max values and indices."
-  description: "The indices in `argmax` are flattened, so that a maximum value at position\n`[b, y, x, c]` becomes flattened index\n`((b * height + y) * width + x) * channels + c`.\n\nThe indices returned are always in `[0, height) x [0, width)` before flattening,\neven if padding is involved and the mathematically correct answer is outside\n(either negative or too large).  This is a bug, but fixing it is difficult to do\nin a safe backwards compatible way, especially due to flattening."
 }
 op {
   name: "Maximum"
@@ -14615,6 +13421,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -14622,25 +13429,20 @@ op {
       }
     }
   }
-  summary: "Returns the max of x and y (i.e. x > y ? x : y) element-wise."
-  description: "*NOTE*: `Maximum` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
   is_commutative: true
 }
 op {
   name: "Mean"
   input_arg {
     name: "input"
-    description: "The tensor to reduce."
     type_attr: "T"
   }
   input_arg {
     name: "reduction_indices"
-    description: "The dimensions to reduce. Must be in the range\n`[-rank(input), rank(input))`."
     type_attr: "Tidx"
   }
   output_arg {
     name: "output"
-    description: "The reduced tensor."
     type_attr: "T"
   }
   attr {
@@ -14649,7 +13451,6 @@ op {
     default_value {
       b: false
     }
-    description: "If true, retain reduced dimensions with length 1."
   }
   attr {
     name: "T"
@@ -14658,17 +13459,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -14688,25 +13490,20 @@ op {
       }
     }
   }
-  summary: "Computes the mean of elements across dimensions of a tensor."
-  description: "Reduces `input` along the dimensions given in `reduction_indices`. Unless\n`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in\n`reduction_indices`. If `keep_dims` is true, the reduced dimensions are\nretained with length 1."
 }
 op {
   name: "Merge"
   input_arg {
     name: "inputs"
-    description: "The input tensors, exactly one of which will become available."
     type_attr: "T"
     number_attr: "N"
   }
   output_arg {
     name: "output"
-    description: "Will be set to the available input tensor."
     type_attr: "T"
   }
   output_arg {
     name: "value_index"
-    description: "The index of the chosen input tensor in `inputs`."
     type: DT_INT32
   }
   attr {
@@ -14719,20 +13516,16 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Forwards the value of an available tensor from `inputs` to `output`."
-  description: "`Merge` waits for at least one of the tensors in `inputs` to become available.\nIt is usually combined with `Switch` to implement branching.\n\n`Merge` forwards the first tensor to become available to `output`, and sets\n`value_index` to its index in `inputs`."
 }
 op {
   name: "MergeSummary"
   input_arg {
     name: "inputs"
-    description: "Can be of any shape.  Each must contain serialized `Summary` protocol\nbuffers."
     type: DT_STRING
     number_attr: "N"
   }
   output_arg {
     name: "summary"
-    description: "Scalar. Serialized `Summary` protocol buffer."
     type: DT_STRING
   }
   attr {
@@ -14741,19 +13534,15 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Merges summaries."
-  description: "This op creates a\n[`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)\nprotocol buffer that contains the union of all the values in the input\nsummaries.\n\nWhen the Op is run, it reports an `InvalidArgument` error if multiple values\nin the summaries to merge use the same tag."
 }
 op {
   name: "MergeV2Checkpoints"
   input_arg {
     name: "checkpoint_prefixes"
-    description: "prefixes of V2 checkpoints to merge."
     type: DT_STRING
   }
   input_arg {
     name: "destination_prefix"
-    description: "scalar.  The desired final prefix.  Allowed to be the same\nas one of the checkpoint_prefixes."
     type: DT_STRING
   }
   attr {
@@ -14762,22 +13551,17 @@ op {
     default_value {
       b: true
     }
-    description: "see above."
   }
-  summary: "V2 format specific: merges the metadata files of sharded checkpoints.  The"
-  description: "result is one logical checkpoint, with one physical metadata file and renamed\ndata files.\n\nIntended for \"grouping\" multiple checkpoints in a sharded checkpoint setup.\n\nIf delete_old_dirs is true, attempts to delete recursively the dirname of each\npath in the input checkpoint_prefixes.  This is useful when those paths are non\nuser-facing temporary locations."
   is_stateful: true
 }
 op {
   name: "Mfcc"
   input_arg {
     name: "spectrogram"
-    description: "Typically produced by the Spectrogram op, with magnitude_squared\nset to true."
     type: DT_FLOAT
   }
   input_arg {
     name: "sample_rate"
-    description: "How many samples per second the source audio used."
     type: DT_INT32
   }
   output_arg {
@@ -14790,7 +13574,6 @@ op {
     default_value {
       f: 4000
     }
-    description: "The highest frequency to use when calculating the\nceptstrum."
   }
   attr {
     name: "lower_frequency_limit"
@@ -14798,7 +13581,6 @@ op {
     default_value {
       f: 20
     }
-    description: "The lowest frequency to use when calculating the\nceptstrum."
   }
   attr {
     name: "filterbank_channel_count"
@@ -14806,7 +13588,6 @@ op {
     default_value {
       i: 40
     }
-    description: "Resolution of the Mel bank used internally."
   }
   attr {
     name: "dct_coefficient_count"
@@ -14814,26 +13595,20 @@ op {
     default_value {
       i: 13
     }
-    description: "How many output channels to produce per time slice."
   }
-  summary: "Transforms a spectrogram into a form that\'s useful for speech recognition."
-  description: "Mel Frequency Cepstral Coefficients are a way of representing audio data that\'s\nbeen effective as an input feature for machine learning. They are created by\ntaking the spectrum of a spectrogram (a \'cepstrum\'), and discarding some of the\nhigher frequencies that are less significant to the human ear. They have a long\nhistory in the speech recognition world, and https://en.wikipedia.org/wiki/Mel-frequency_cepstrum\nis a good resource to learn more."
 }
 op {
   name: "Min"
   input_arg {
     name: "input"
-    description: "The tensor to reduce."
     type_attr: "T"
   }
   input_arg {
     name: "reduction_indices"
-    description: "The dimensions to reduce. Must be in the range\n`[-rank(input), rank(input))`."
     type_attr: "Tidx"
   }
   output_arg {
     name: "output"
-    description: "The reduced tensor."
     type_attr: "T"
   }
   attr {
@@ -14842,7 +13617,6 @@ op {
     default_value {
       b: false
     }
-    description: "If true, retain reduced dimensions with length 1."
   }
   attr {
     name: "T"
@@ -14851,17 +13625,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -14881,8 +13656,6 @@ op {
       }
     }
   }
-  summary: "Computes the minimum of elements across dimensions of a tensor."
-  description: "Reduces `input` along the dimensions given in `reduction_indices`. Unless\n`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in\n`reduction_indices`. If `keep_dims` is true, the reduced dimensions are\nretained with length 1."
 }
 op {
   name: "Minimum"
@@ -14904,6 +13677,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -14911,25 +13685,20 @@ op {
       }
     }
   }
-  summary: "Returns the min of x and y (i.e. x < y ? x : y) element-wise."
-  description: "*NOTE*: `Minimum` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
   is_commutative: true
 }
 op {
   name: "MirrorPad"
   input_arg {
     name: "input"
-    description: "The input tensor to be padded."
     type_attr: "T"
   }
   input_arg {
     name: "paddings"
-    description: "A two-column matrix specifying the padding sizes. The number of\nrows must be the same as the rank of `input`."
     type_attr: "Tpaddings"
   }
   output_arg {
     name: "output"
-    description: "The padded tensor."
     type_attr: "T"
   }
   attr {
@@ -14952,7 +13721,6 @@ op {
   attr {
     name: "mode"
     type: "string"
-    description: "Either `REFLECT` or `SYMMETRIC`. In reflect mode the padded regions\ndo not include the borders, while in symmetric mode the padded regions\ndo include the borders. For example, if `input` is `[1, 2, 3]` and `paddings`\nis `[0, 2]`, then the output is `[1, 2, 3, 2, 1]` in reflect mode, and\nit is `[1, 2, 3, 3, 2]` in symmetric mode."
     allowed_values {
       list {
         s: "REFLECT"
@@ -14960,24 +13728,19 @@ op {
       }
     }
   }
-  summary: "Pads a tensor with mirrored values."
-  description: "This operation pads a `input` with mirrored values according to the `paddings`\nyou specify. `paddings` is an integer tensor with shape `[n, 2]`, where n is\nthe rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates\nhow many values to add before the contents of `input` in that dimension, and\n`paddings[D, 1]` indicates how many values to add after the contents of `input`\nin that dimension. Both `paddings[D, 0]` and `paddings[D, 1]` must be no greater\nthan `input.dim_size(D)` (or `input.dim_size(D) - 1`) if `copy_border` is true\n(if false, respectively).\n\nThe padded size of each dimension D of the output is:\n\n`paddings(D, 0) + input.dim_size(D) + paddings(D, 1)`\n\nFor example:\n\n```\n# \'t\' is [[1, 2, 3], [4, 5, 6]].\n# \'paddings\' is [[1, 1]], [2, 2]].\n# \'mode\' is SYMMETRIC.\n# rank of \'t\' is 2.\npad(t, paddings) ==> [[2, 1, 1, 2, 3, 3, 2]\n                      [2, 1, 1, 2, 3, 3, 2]\n                      [5, 4, 4, 5, 6, 6, 5]\n                      [5, 4, 4, 5, 6, 6, 5]]\n```"
 }
 op {
   name: "MirrorPadGrad"
   input_arg {
     name: "input"
-    description: "The input tensor to be folded."
     type_attr: "T"
   }
   input_arg {
     name: "paddings"
-    description: "A two-column matrix specifying the padding sizes. The number of\nrows must be the same as the rank of `input`."
     type_attr: "Tpaddings"
   }
   output_arg {
     name: "output"
-    description: "The folded tensor."
     type_attr: "T"
   }
   attr {
@@ -15000,7 +13763,6 @@ op {
   attr {
     name: "mode"
     type: "string"
-    description: "The mode used in the `MirrorPad` op."
     allowed_values {
       list {
         s: "REFLECT"
@@ -15008,8 +13770,6 @@ op {
       }
     }
   }
-  summary: "Gradient op for `MirrorPad` op. This op folds a mirror-padded tensor."
-  description: "This operation folds the padded areas of `input` by `MirrorPad` according to the\n`paddings` you specify. `paddings` must be the same as `paddings` argument\ngiven to the corresponding `MirrorPad` op.\n\nThe folded size of each dimension D of the output is:\n\n`input.dim_size(D) - paddings(D, 0) - paddings(D, 1)`\n\nFor example:\n\n```\n# \'t\' is [[1, 2, 3], [4, 5, 6], [7, 8, 9]].\n# \'paddings\' is [[0, 1]], [0, 1]].\n# \'mode\' is SYMMETRIC.\n# rank of \'t\' is 2.\npad(t, paddings) ==> [[ 1,  5]\n                      [11, 28]]\n```"
 }
 op {
   name: "Mod"
@@ -15032,13 +13792,12 @@ op {
       list {
         type: DT_INT32
         type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Returns element-wise remainder of division. This emulates C semantics in that"
-  description: "the result here is consistent with a truncating divide. E.g.\n`tf.truncatediv(x, y) * y + truncate_mod(x, y) = x`.\n\n*NOTE*: `Mod` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
 }
 op {
   name: "Mul"
@@ -15060,6 +13819,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_UINT8
@@ -15073,26 +13833,21 @@ op {
       }
     }
   }
-  summary: "Returns x * y element-wise."
-  description: "*NOTE*: `Mul` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
   is_commutative: true
 }
 op {
   name: "Multinomial"
   input_arg {
     name: "logits"
-    description: "2-D Tensor with shape `[batch_size, num_classes]`.  Each slice `[i, :]`\nrepresents the unnormalized log probabilities for all classes."
     type_attr: "T"
   }
   input_arg {
     name: "num_samples"
-    description: "0-D.  Number of independent samples to draw for each row slice."
     type: DT_INT32
   }
   output_arg {
     name: "output"
-    description: "2-D Tensor with shape `[batch_size, num_samples]`.  Each slice `[i, :]`\ncontains the drawn class labels with range `[0, num_classes)`."
-    type: DT_INT64
+    type_attr: "output_dtype"
   }
   attr {
     name: "seed"
@@ -15100,7 +13855,6 @@ op {
     default_value {
       i: 0
     }
-    description: "If either seed or seed2 is set to be non-zero, the internal random number\ngenerator is seeded by the given seed.  Otherwise, a random seed is used."
   }
   attr {
     name: "seed2"
@@ -15108,7 +13862,6 @@ op {
     default_value {
       i: 0
     }
-    description: "A second seed to avoid seed collision."
   }
   attr {
     name: "T"
@@ -15118,10 +13871,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -15129,19 +13883,29 @@ op {
       }
     }
   }
-  summary: "Draws samples from a multinomial distribution."
+  attr {
+    name: "output_dtype"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
   is_stateful: true
 }
 op {
   name: "MutableDenseHashTable"
   input_arg {
     name: "empty_key"
-    description: "The key used to represent empty key buckets internally. Must not\nbe used in insert or lookup operations."
     type_attr: "key_dtype"
   }
   output_arg {
     name: "table_handle"
-    description: "Handle to a table."
     type: DT_STRING
     is_ref: true
   }
@@ -15151,7 +13915,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this table is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -15159,7 +13922,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this table is shared under the given name across\nmultiple sessions."
   }
   attr {
     name: "use_node_name_sharing"
@@ -15171,12 +13933,10 @@ op {
   attr {
     name: "key_dtype"
     type: "type"
-    description: "Type of the table keys."
   }
   attr {
     name: "value_dtype"
     type: "type"
-    description: "Type of the table values."
   }
   attr {
     name: "value_shape"
@@ -15185,7 +13945,6 @@ op {
       shape {
       }
     }
-    description: "The shape of each value."
   }
   attr {
     name: "initial_num_buckets"
@@ -15193,7 +13952,6 @@ op {
     default_value {
       i: 131072
     }
-    description: "The initial number of hash table buckets. Must be a power\nto 2."
   }
   attr {
     name: "max_load_factor"
@@ -15201,22 +13959,17 @@ op {
     default_value {
       f: 0.8
     }
-    description: "The maximum ratio between number of entries and number of\nbuckets before growing the table. Must be between 0 and 1."
   }
-  summary: "Creates an empty hash table that uses tensors as the backing store."
-  description: "It uses \"open addressing\" with quadratic reprobing to resolve\ncollisions.\n\nThis op creates a mutable hash table, specifying the type of its keys and\nvalues. Each value must be a scalar. Data can be inserted into the table using\nthe insert operations. It does not support the initialization operation."
   is_stateful: true
 }
 op {
   name: "MutableDenseHashTableV2"
   input_arg {
     name: "empty_key"
-    description: "The key used to represent empty key buckets internally. Must not\nbe used in insert or lookup operations."
     type_attr: "key_dtype"
   }
   output_arg {
     name: "table_handle"
-    description: "Handle to a table."
     type: DT_RESOURCE
   }
   attr {
@@ -15225,7 +13978,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this table is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -15233,7 +13985,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this table is shared under the given name across\nmultiple sessions."
   }
   attr {
     name: "use_node_name_sharing"
@@ -15245,12 +13996,10 @@ op {
   attr {
     name: "key_dtype"
     type: "type"
-    description: "Type of the table keys."
   }
   attr {
     name: "value_dtype"
     type: "type"
-    description: "Type of the table values."
   }
   attr {
     name: "value_shape"
@@ -15259,7 +14008,6 @@ op {
       shape {
       }
     }
-    description: "The shape of each value."
   }
   attr {
     name: "initial_num_buckets"
@@ -15267,7 +14015,6 @@ op {
     default_value {
       i: 131072
     }
-    description: "The initial number of hash table buckets. Must be a power\nto 2."
   }
   attr {
     name: "max_load_factor"
@@ -15275,17 +14022,13 @@ op {
     default_value {
       f: 0.8
     }
-    description: "The maximum ratio between number of entries and number of\nbuckets before growing the table. Must be between 0 and 1."
   }
-  summary: "Creates an empty hash table that uses tensors as the backing store."
-  description: "It uses \"open addressing\" with quadratic reprobing to resolve\ncollisions.\n\nThis op creates a mutable hash table, specifying the type of its keys and\nvalues. Each value must be a scalar. Data can be inserted into the table using\nthe insert operations. It does not support the initialization operation."
   is_stateful: true
 }
 op {
   name: "MutableHashTable"
   output_arg {
     name: "table_handle"
-    description: "Handle to a table."
     type: DT_STRING
     is_ref: true
   }
@@ -15295,7 +14038,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this table is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -15303,7 +14045,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this table is shared under the given name across\nmultiple sessions."
   }
   attr {
     name: "use_node_name_sharing"
@@ -15311,27 +14052,21 @@ op {
     default_value {
       b: false
     }
-    description: "If true and shared_name is empty, the table is shared\nusing the node name."
   }
   attr {
     name: "key_dtype"
     type: "type"
-    description: "Type of the table keys."
   }
   attr {
     name: "value_dtype"
     type: "type"
-    description: "Type of the table values."
   }
-  summary: "Creates an empty hash table."
-  description: "This op creates a mutable hash table, specifying the type of its keys and\nvalues. Each value must be a scalar. Data can be inserted into the table using\nthe insert operations. It does not support the initialization operation."
   is_stateful: true
 }
 op {
   name: "MutableHashTableOfTensors"
   output_arg {
     name: "table_handle"
-    description: "Handle to a table."
     type: DT_STRING
     is_ref: true
   }
@@ -15341,7 +14076,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this table is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -15349,7 +14083,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this table is shared under the given name across\nmultiple sessions."
   }
   attr {
     name: "use_node_name_sharing"
@@ -15361,12 +14094,10 @@ op {
   attr {
     name: "key_dtype"
     type: "type"
-    description: "Type of the table keys."
   }
   attr {
     name: "value_dtype"
     type: "type"
-    description: "Type of the table values."
   }
   attr {
     name: "value_shape"
@@ -15376,15 +14107,12 @@ op {
       }
     }
   }
-  summary: "Creates an empty hash table."
-  description: "This op creates a mutable hash table, specifying the type of its keys and\nvalues. Each value must be a vector. Data can be inserted into the table using\nthe insert operations. It does not support the initialization operation."
   is_stateful: true
 }
 op {
   name: "MutableHashTableOfTensorsV2"
   output_arg {
     name: "table_handle"
-    description: "Handle to a table."
     type: DT_RESOURCE
   }
   attr {
@@ -15393,7 +14121,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this table is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -15401,7 +14128,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this table is shared under the given name across\nmultiple sessions."
   }
   attr {
     name: "use_node_name_sharing"
@@ -15413,12 +14139,10 @@ op {
   attr {
     name: "key_dtype"
     type: "type"
-    description: "Type of the table keys."
   }
   attr {
     name: "value_dtype"
     type: "type"
-    description: "Type of the table values."
   }
   attr {
     name: "value_shape"
@@ -15428,15 +14152,12 @@ op {
       }
     }
   }
-  summary: "Creates an empty hash table."
-  description: "This op creates a mutable hash table, specifying the type of its keys and\nvalues. Each value must be a vector. Data can be inserted into the table using\nthe insert operations. It does not support the initialization operation."
   is_stateful: true
 }
 op {
   name: "MutableHashTableV2"
   output_arg {
     name: "table_handle"
-    description: "Handle to a table."
     type: DT_RESOURCE
   }
   attr {
@@ -15445,7 +14166,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this table is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -15453,7 +14173,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this table is shared under the given name across\nmultiple sessions."
   }
   attr {
     name: "use_node_name_sharing"
@@ -15461,20 +14180,15 @@ op {
     default_value {
       b: false
     }
-    description: "If true and shared_name is empty, the table is shared\nusing the node name."
   }
   attr {
     name: "key_dtype"
     type: "type"
-    description: "Type of the table keys."
   }
   attr {
     name: "value_dtype"
     type: "type"
-    description: "Type of the table values."
   }
-  summary: "Creates an empty hash table."
-  description: "This op creates a mutable hash table, specifying the type of its keys and\nvalues. Each value must be a scalar. Data can be inserted into the table using\nthe insert operations. It does not support the initialization operation."
   is_stateful: true
 }
 op {
@@ -15493,6 +14207,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -15502,31 +14217,25 @@ op {
       }
     }
   }
-  summary: "Computes numerical negative value element-wise."
-  description: "I.e., \\\\(y = -x\\\\)."
 }
 op {
   name: "NegTrain"
   input_arg {
     name: "w_in"
-    description: "input word embedding."
     type: DT_FLOAT
     is_ref: true
   }
   input_arg {
     name: "w_out"
-    description: "output word embedding."
     type: DT_FLOAT
     is_ref: true
   }
   input_arg {
     name: "examples"
-    description: "A vector of word ids."
     type: DT_INT32
   }
   input_arg {
     name: "labels"
-    description: "A vector of word ids."
     type: DT_INT32
   }
   input_arg {
@@ -15536,14 +14245,11 @@ op {
   attr {
     name: "vocab_count"
     type: "list(int)"
-    description: "Count of words in the vocabulary."
   }
   attr {
     name: "num_negative_samples"
     type: "int"
-    description: "Number of negative samples per example."
   }
-  summary: "Training via negative sampling."
   deprecation {
     version: 19
     explanation: "Moving word2vec into tensorflow_models/tutorials and deprecating its ops here as a result"
@@ -15554,44 +14260,36 @@ op {
   name: "NextIteration"
   input_arg {
     name: "data"
-    description: "The tensor to be made available to the next iteration."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "The same tensor as `data`."
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
   }
-  summary: "Makes its input available to the next iteration."
 }
 op {
   name: "NoOp"
-  summary: "Does nothing. Only useful as a placeholder for control edges."
 }
 op {
   name: "NonMaxSuppression"
   input_arg {
     name: "boxes"
-    description: "A 2-D float tensor of shape `[num_boxes, 4]`."
     type: DT_FLOAT
   }
   input_arg {
     name: "scores"
-    description: "A 1-D float tensor of shape `[num_boxes]` representing a single\nscore corresponding to each box (each row of boxes)."
     type: DT_FLOAT
   }
   input_arg {
     name: "max_output_size"
-    description: "A scalar integer tensor representing the maximum number of\nboxes to be selected by non max suppression."
     type: DT_INT32
   }
   output_arg {
     name: "selected_indices"
-    description: "A 1-D integer tensor of shape `[M]` representing the selected\nindices from the boxes tensor, where `M <= max_output_size`."
     type: DT_INT32
   }
   attr {
@@ -15600,40 +14298,30 @@ op {
     default_value {
       f: 0.5
     }
-    description: "A float representing the threshold for deciding whether boxes\noverlap too much with respect to IOU."
   }
-  summary: "Greedily selects a subset of bounding boxes in descending order of score,"
-  description: "pruning away boxes that have high intersection-over-union (IOU) overlap\nwith previously selected boxes.  Bounding boxes are supplied as\n[y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any\ndiagonal pair of box corners and the coordinates can be provided as normalized\n(i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm\nis agnostic to where the origin is in the coordinate system.  Note that this\nalgorithm is invariant to orthogonal transformations and translations\nof the coordinate system; thus translating or reflections of the coordinate\nsystem result in the same boxes being selected by the algorithm.\nThe output of this operation is a set of integers indexing into the input\ncollection of bounding boxes representing the selected boxes.  The bounding\nbox coordinates corresponding to the selected indices can then be obtained\nusing the `tf.gather operation`.  For example:\n  selected_indices = tf.image.non_max_suppression(\n      boxes, scores, max_output_size, iou_threshold)\n  selected_boxes = tf.gather(boxes, selected_indices)"
 }
 op {
   name: "NonMaxSuppressionV2"
   input_arg {
     name: "boxes"
-    description: "A 2-D float tensor of shape `[num_boxes, 4]`."
     type: DT_FLOAT
   }
   input_arg {
     name: "scores"
-    description: "A 1-D float tensor of shape `[num_boxes]` representing a single\nscore corresponding to each box (each row of boxes)."
     type: DT_FLOAT
   }
   input_arg {
     name: "max_output_size"
-    description: "A scalar integer tensor representing the maximum number of\nboxes to be selected by non max suppression."
     type: DT_INT32
   }
   input_arg {
     name: "iou_threshold"
-    description: "A 0-D float tensor representing the threshold for deciding whether\nboxes overlap too much with respect to IOU."
     type: DT_FLOAT
   }
   output_arg {
     name: "selected_indices"
-    description: "A 1-D integer tensor of shape `[M]` representing the selected\nindices from the boxes tensor, where `M <= max_output_size`."
     type: DT_INT32
   }
-  summary: "Greedily selects a subset of bounding boxes in descending order of score,"
-  description: "pruning away boxes that have high intersection-over-union (IOU) overlap\nwith previously selected boxes.  Bounding boxes are supplied as\n[y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any\ndiagonal pair of box corners and the coordinates can be provided as normalized\n(i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm\nis agnostic to where the origin is in the coordinate system.  Note that this\nalgorithm is invariant to orthogonal transformations and translations\nof the coordinate system; thus translating or reflections of the coordinate\nsystem result in the same boxes being selected by the algorithm.\n\nThe output of this operation is a set of integers indexing into the input\ncollection of bounding boxes representing the selected boxes.  The bounding\nbox coordinates corresponding to the selected indices can then be obtained\nusing the `tf.gather operation`.  For example:\n\n  selected_indices = tf.image.non_max_suppression_v2(\n      boxes, scores, max_output_size, iou_threshold)\n  selected_boxes = tf.gather(boxes, selected_indices)"
 }
 op {
   name: "NotEqual"
@@ -15655,6 +14343,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_UINT8
@@ -15672,25 +14361,20 @@ op {
       }
     }
   }
-  summary: "Returns the truth value of (x != y) element-wise."
-  description: "*NOTE*: `NotEqual` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
   is_commutative: true
 }
 op {
   name: "NthElement"
   input_arg {
     name: "input"
-    description: "1-D or higher with last dimension at least `n+1`."
     type_attr: "T"
   }
   input_arg {
     name: "n"
-    description: "0-D. Position of sorted vector to select along the last dimension (along\neach row for matrices). Valid range of n is `[0, input.shape[:-1])`"
     type: DT_INT32
   }
   output_arg {
     name: "values"
-    description: "The `n`-th order statistic along each last dimensional slice."
     type_attr: "T"
   }
   attr {
@@ -15699,7 +14383,6 @@ op {
     default_value {
       b: false
     }
-    description: "When set to True, find the nth-largest value in the vector and vice\nversa."
   }
   attr {
     name: "T"
@@ -15709,10 +14392,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -15720,34 +14404,27 @@ op {
       }
     }
   }
-  summary: "Finds values of the `n`-th order statistic for the last dimension."
-  description: "If the input is a vector (rank-1), finds the entries which is the nth-smallest\nvalue in the vector and outputs their values as scalar tensor.\n\nFor matrices (resp. higher rank input), computes the entries which is the\nnth-smallest value in each row (resp. vector along the last dimension). Thus,\n\n    values.shape = input.shape[:-1]"
 }
 op {
   name: "OneHot"
   input_arg {
     name: "indices"
-    description: "A tensor of indices."
     type_attr: "TI"
   }
   input_arg {
     name: "depth"
-    description: "A scalar defining the depth of the one hot dimension."
     type: DT_INT32
   }
   input_arg {
     name: "on_value"
-    description: "A scalar defining the value to fill in output when `indices[j] = i`."
     type_attr: "T"
   }
   input_arg {
     name: "off_value"
-    description: "A scalar defining the value to fill in output when `indices[j] != i`."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "The one-hot tensor."
     type_attr: "T"
   }
   attr {
@@ -15756,7 +14433,6 @@ op {
     default_value {
       i: -1
     }
-    description: "The axis to fill (default: -1, a new inner-most axis)."
   }
   attr {
     name: "T"
@@ -15776,20 +14452,16 @@ op {
       }
     }
   }
-  summary: "Returns a one-hot tensor."
-  description: "The locations represented by indices in `indices` take value `on_value`,\nwhile all other locations take value `off_value`.\n\nIf the input `indices` is rank `N`, the output will have rank `N+1`,\nThe new axis is created at dimension `axis` (default: the new axis is\nappended at the end).\n\nIf `indices` is a scalar the output shape will be a vector of length `depth`.\n\nIf `indices` is a vector of length `features`, the output shape will be:\n```\n  features x depth if axis == -1\n  depth x features if axis == 0\n```\n\nIf `indices` is a matrix (batch) with shape `[batch, features]`,\nthe output shape will be:\n```\n  batch x features x depth if axis == -1\n  batch x depth x features if axis == 1\n  depth x batch x features if axis == 0\n```\n\n\nExamples\n=========\n\nSuppose that\n\n```\n  indices = [0, 2, -1, 1]\n  depth = 3\n  on_value = 5.0\n  off_value = 0.0\n  axis = -1\n```\n\nThen output is `[4 x 3]`:\n\n    ```output =\n      [5.0 0.0 0.0]  // one_hot(0)\n      [0.0 0.0 5.0]  // one_hot(2)\n      [0.0 0.0 0.0]  // one_hot(-1)\n      [0.0 5.0 0.0]  // one_hot(1)\n    ```\n\nSuppose that\n\n```\n  indices = [0, 2, -1, 1]\n  depth = 3\n  on_value = 0.0\n  off_value = 3.0\n  axis = 0\n```\n\nThen output is `[3 x 4]`:\n\n    ```output =\n      [0.0 3.0 3.0 3.0]\n      [3.0 3.0 3.0 0.0]\n      [3.0 3.0 3.0 3.0]\n      [3.0 0.0 3.0 3.0]\n    //  ^                one_hot(0)\n    //      ^            one_hot(2)\n    //          ^        one_hot(-1)\n    //              ^    one_hot(1)\n    ```\nSuppose that\n\n```\n  indices = [[0, 2], [1, -1]]\n  depth = 3\n  on_value = 1.0\n  off_value = 0.0\n  axis = -1\n```\n\nThen output is `[2 x 2 x 3]`:\n\n    ```output =\n      [\n        [1.0, 0.0, 0.0]  // one_hot(0)\n        [0.0, 0.0, 1.0]  // one_hot(2)\n      ][\n        [0.0, 1.0, 0.0]  // one_hot(1)\n        [0.0, 0.0, 0.0]  // one_hot(-1)\n      ]```"
 }
 op {
   name: "OneShotIterator"
   output_arg {
     name: "handle"
-    description: "A handle to the iterator that can be passed to an \"IteratorGetNext\"\nop."
     type: DT_RESOURCE
   }
   attr {
     name: "dataset_factory"
     type: "func"
-    description: "A function of type `() -> DT_VARIANT`, where the returned\nDT_VARIANT is a dataset."
   }
   attr {
     name: "output_types"
@@ -15817,20 +14489,16 @@ op {
       s: ""
     }
   }
-  summary: "Makes a \"one-shot\" iterator that can be iterated only once."
-  description: "A one-shot iterator bundles the logic for defining the dataset and\nthe state of the iterator in a single op, which allows simple input\npipelines to be defined without an additional initialization\n(\"MakeIterator\") step.\n\nOne-shot iterators have the following limitations:\n\n* They do not support parameterization: all logic for creating the underlying\n  dataset must be bundled in the `dataset_factory` function.\n* They are not resettable. Once a one-shot iterator reaches the end of its\n  underlying dataset, subsequent \"IteratorGetNext\" operations on that\n  iterator will always produce an `OutOfRange` error.\n\nFor greater flexibility, use \"Iterator\" and \"MakeIterator\" to define\nan iterator using an arbitrary subgraph, which may capture tensors\n(including fed values) as parameters, and which may be reset multiple\ntimes by rerunning \"MakeIterator\"."
   is_stateful: true
 }
 op {
   name: "OnesLike"
   input_arg {
     name: "x"
-    description: "a tensor of type T."
     type_attr: "T"
   }
   output_arg {
     name: "y"
-    description: "a tensor of the same shape and type as x but filled with ones."
     type_attr: "T"
   }
   attr {
@@ -15838,16 +14506,21 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_INT8
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_UINT16
         type: DT_INT32
         type: DT_INT64
         type: DT_COMPLEX64
         type: DT_COMPLEX128
+        type: DT_BOOL
       }
     }
   }
-  summary: "Returns a tensor of ones with the same shape and type as x."
 }
 op {
   name: "OrderedMapClear"
@@ -15885,7 +14558,6 @@ op {
       s: ""
     }
   }
-  summary: "Op removes all elements in the underlying container."
   is_stateful: true
 }
 op {
@@ -15928,7 +14600,6 @@ op {
       s: ""
     }
   }
-  summary: "Op returns the number of incomplete elements in the underlying container."
   is_stateful: true
 }
 op {
@@ -15981,8 +14652,6 @@ op {
       s: ""
     }
   }
-  summary: "Op peeks at the values at the specified key.  If the"
-  description: "underlying container does not contain this key\nthis op will block until it does.   This Op is optimized for\nperformance."
   is_stateful: true
 }
 op {
@@ -16025,14 +14694,12 @@ op {
       s: ""
     }
   }
-  summary: "Op returns the number of elements in the underlying container."
   is_stateful: true
 }
 op {
   name: "OrderedMapStage"
   input_arg {
     name: "key"
-    description: "int64"
     type: DT_INT64
   }
   input_arg {
@@ -16041,7 +14708,6 @@ op {
   }
   input_arg {
     name: "values"
-    description: "a list of tensors\ndtypes A list of data types that inserted values should adhere to."
     type_list_attr: "fake_dtypes"
   }
   attr {
@@ -16050,7 +14716,6 @@ op {
     default_value {
       i: 0
     }
-    description: "Maximum number of elements in the Staging Area. If > 0, inserts\non the container will block when the capacity is reached."
     has_minimum: true
   }
   attr {
@@ -16077,7 +14742,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this queue is placed in the given container. Otherwise,\na default container is used."
   }
   attr {
     name: "shared_name"
@@ -16085,10 +14749,7 @@ op {
     default_value {
       s: ""
     }
-    description: "It is necessary to match this name to the matching Unstage Op."
   }
-  summary: "Stage (key, values) in the underlying container which behaves like a ordered"
-  description: "associative container.   Elements are ordered by key."
   is_stateful: true
 }
 op {
@@ -16141,8 +14802,6 @@ op {
       s: ""
     }
   }
-  summary: "Op removes and returns the values associated with the key"
-  description: "from the underlying container.   If the underlying container\ndoes not contain this key, the op will block until it does."
   is_stateful: true
 }
 op {
@@ -16195,21 +14854,17 @@ op {
       s: ""
     }
   }
-  summary: "Op removes and returns the (key, value) element with the smallest"
-  description: "key from the underlying container.   If the underlying container\ndoes not contain elements, the op will block until it does."
   is_stateful: true
 }
 op {
   name: "Pack"
   input_arg {
     name: "values"
-    description: "Must be of same shape and type."
     type_attr: "T"
     number_attr: "N"
   }
   output_arg {
     name: "output"
-    description: "The packed tensor."
     type_attr: "T"
   }
   attr {
@@ -16228,10 +14883,7 @@ op {
     default_value {
       i: 0
     }
-    description: "Dimension along which to pack.  Negative values wrap around, so the\nvalid range is `[-(R+1), R+1)`."
   }
-  summary: "Packs a list of `N` rank-`R` tensors into one rank-`(R+1)` tensor."
-  description: "Packs the `N` tensors in `values` into a tensor with rank one higher than each\ntensor in `values`, by packing them along the `axis` dimension.\nGiven a list of tensors of shape `(A, B, C)`;\n\nif `axis == 0` then the `output` tensor will have the shape `(N, A, B, C)`.\nif `axis == 1` then the `output` tensor will have the shape `(A, N, B, C)`.\nEtc.\n\nFor example:\n\n```\n# \'x\' is [1, 4]\n# \'y\' is [2, 5]\n# \'z\' is [3, 6]\npack([x, y, z]) => [[1, 4], [2, 5], [3, 6]]  # Pack along first dim.\npack([x, y, z], axis=1) => [[1, 2, 3], [4, 5, 6]]\n```\n\nThis is the opposite of `unpack`."
 }
 op {
   name: "Pad"
@@ -16264,8 +14916,6 @@ op {
       }
     }
   }
-  summary: "Pads a tensor with zeros."
-  description: "This operation pads a `input` with zeros according to the `paddings` you\nspecify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is the\nrank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates\nhow many zeros to add before the contents of `input` in that dimension, and\n`paddings[D, 1]` indicates how many zeros to add after the contents of `input`\nin that dimension.\n\nThe padded size of each dimension D of the output is:\n\n`paddings(D, 0) + input.dim_size(D) + paddings(D, 1)`\n\nFor example:\n\n```\n# \'t\' is [[1, 1], [2, 2]]\n# \'paddings\' is [[1, 1], [2, 2]]\n# rank of \'t\' is 2\npad(t, paddings) ==> [[0, 0, 0, 0, 0, 0]\n                      [0, 0, 1, 1, 0, 0]\n                      [0, 0, 2, 2, 0, 0]\n                      [0, 0, 0, 0, 0, 0]]\n```"
 }
 op {
   name: "PadV2"
@@ -16302,8 +14952,6 @@ op {
       }
     }
   }
-  summary: "Pads a tensor."
-  description: "This operation pads `input` according to the `paddings` and `constant_values`\nyou specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is\nthe rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates\nhow many padding values to add before the contents of `input` in that dimension,\nand `paddings[D, 1]` indicates how many padding values to add after the contents\nof `input` in that dimension. `constant_values` is a scalar tensor of the same\ntype as `input` that indicates the value to use for padding `input`.\n\nThe padded size of each dimension D of the output is:\n\n`paddings(D, 0) + input.dim_size(D) + paddings(D, 1)`\n\nFor example:\n\n```\n# \'t\' is [[1, 1], [2, 2]]\n# \'paddings\' is [[1, 1], [2, 2]]\n# \'constant_values\' is 0\n# rank of \'t\' is 2\npad(t, paddings) ==> [[0, 0, 0, 0, 0, 0]\n                      [0, 0, 1, 1, 0, 0]\n                      [0, 0, 2, 2, 0, 0]\n                      [0, 0, 0, 0, 0, 0]]\n```"
 }
 op {
   name: "PaddedBatchDataset"
@@ -16313,18 +14961,15 @@ op {
   }
   input_arg {
     name: "batch_size"
-    description: "A scalar representing the number of elements to accumulate in a\nbatch."
     type: DT_INT64
   }
   input_arg {
     name: "padded_shapes"
-    description: "A list of int64 tensors representing the desired padded shapes\nof the corresponding output components. These shapes may be partially\nspecified, using `-1` to indicate that a particular dimension should be\npadded to the maximum size of all batch elements."
     type: DT_INT64
     number_attr: "N"
   }
   input_arg {
     name: "padding_values"
-    description: "A list of scalars containing the padding value to use for\neach of the outputs."
     type_list_attr: "Toutput_types"
   }
   output_arg {
@@ -16349,20 +14994,17 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Creates a dataset that batches and pads `batch_size` elements from the input."
 }
 op {
   name: "PaddingFIFOQueue"
   output_arg {
     name: "handle"
-    description: "The handle to the queue."
     type: DT_STRING
     is_ref: true
   }
   attr {
     name: "component_types"
     type: "list(type)"
-    description: "The type of each component in a value."
     has_minimum: true
     minimum: 1
   }
@@ -16373,7 +15015,6 @@ op {
       list {
       }
     }
-    description: "The shape of each component in a value. The length of this attr must\nbe either 0 or the same as the length of component_types.\nShapes of fixed rank but variable size are allowed by setting\nany shape dimension to -1.  In this case, the inputs\' shape may vary along\nthe given dimension, and DequeueMany will pad the given dimension with\nzeros up to the maximum shape of all elements in the given batch.\nIf the length of this attr is 0, different queue elements may have\ndifferent ranks and shapes, but only one element may be dequeued at a time."
     has_minimum: true
   }
   attr {
@@ -16382,7 +15023,6 @@ op {
     default_value {
       i: -1
     }
-    description: "The upper bound on the number of elements in this queue.\nNegative numbers mean no limit."
   }
   attr {
     name: "container"
@@ -16390,7 +15030,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this queue is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -16398,23 +15037,18 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this queue will be shared under the given name\nacross multiple sessions."
   }
-  summary: "A queue that produces elements in first-in first-out order."
-  description: "Variable-size shapes are allowed by setting the corresponding shape dimensions\nto 0 in the shape attr.  In this case DequeueMany will pad up to the maximum\nsize of any given element in the minibatch.  See below for details."
   is_stateful: true
 }
 op {
   name: "PaddingFIFOQueueV2"
   output_arg {
     name: "handle"
-    description: "The handle to the queue."
     type: DT_RESOURCE
   }
   attr {
     name: "component_types"
     type: "list(type)"
-    description: "The type of each component in a value."
     has_minimum: true
     minimum: 1
   }
@@ -16425,7 +15059,6 @@ op {
       list {
       }
     }
-    description: "The shape of each component in a value. The length of this attr must\nbe either 0 or the same as the length of component_types.\nShapes of fixed rank but variable size are allowed by setting\nany shape dimension to -1.  In this case, the inputs\' shape may vary along\nthe given dimension, and DequeueMany will pad the given dimension with\nzeros up to the maximum shape of all elements in the given batch.\nIf the length of this attr is 0, different queue elements may have\ndifferent ranks and shapes, but only one element may be dequeued at a time."
     has_minimum: true
   }
   attr {
@@ -16434,7 +15067,6 @@ op {
     default_value {
       i: -1
     }
-    description: "The upper bound on the number of elements in this queue.\nNegative numbers mean no limit."
   }
   attr {
     name: "container"
@@ -16442,7 +15074,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this queue is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -16450,23 +15081,18 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this queue will be shared under the given name\nacross multiple sessions."
   }
-  summary: "A queue that produces elements in first-in first-out order."
-  description: "Variable-size shapes are allowed by setting the corresponding shape dimensions\nto 0 in the shape attr.  In this case DequeueMany will pad up to the maximum\nsize of any given element in the minibatch.  See below for details."
   is_stateful: true
 }
 op {
   name: "ParallelConcat"
   input_arg {
     name: "values"
-    description: "Tensors to be concatenated. All must have size 1 in the first dimension\nand same shape."
     type_attr: "T"
     number_attr: "N"
   }
   output_arg {
     name: "output"
-    description: "The concatenated tensor."
     type_attr: "T"
   }
   attr {
@@ -16482,10 +15108,7 @@ op {
   attr {
     name: "shape"
     type: "shape"
-    description: "the final shape of the result; should be equal to the shapes of any input\nbut with the number of input values in the first dimension."
   }
-  summary: "Concatenates a list of `N` tensors along the first dimension."
-  description: "The input tensors are all required to have size 1 in the first dimension.\n\nFor example:\n\n```\n# \'x\' is [[1, 4]]\n# \'y\' is [[2, 5]]\n# \'z\' is [[3, 6]]\nparallel_concat([x, y, z]) => [[1, 4], [2, 5], [3, 6]]  # Pack along first dim.\n```\n\nThe difference between concat and parallel_concat is that concat requires all\nof the inputs be computed before the operation will begin but doesn\'t require\nthat the input shapes be known during graph construction.  Parallel concat\nwill copy pieces of the input into the output as they become available, in\nsome situations this can provide a performance benefit."
 }
 op {
   name: "ParallelDynamicStitch"
@@ -16513,8 +15136,6 @@ op {
     name: "T"
     type: "type"
   }
-  summary: "Interleave the values from the `data` tensors into a single tensor."
-  description: "Builds a merged tensor such that\n\n```python\n    merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...]\n```\n\nFor example, if each `indices[m]` is scalar or vector, we have\n\n```python\n    # Scalar indices:\n    merged[indices[m], ...] = data[m][...]\n\n    # Vector indices:\n    merged[indices[m][i], ...] = data[m][i, ...]\n```\n\nEach `data[i].shape` must start with the corresponding `indices[i].shape`,\nand the rest of `data[i].shape` must be constant w.r.t. `i`.  That is, we\nmust have `data[i].shape = indices[i].shape + constant`.  In terms of this\n`constant`, the output shape is\n\n    merged.shape = [max(indices)] + constant\n\nValues may be merged in parallel, so if an index appears in both `indices[m][i]`\nand `indices[n][j]`, the result may be invalid. This differs from the normal\nDynamicStitch operator that defines the behavior in that case.\n\nFor example:\n\n```python\n    indices[0] = 6\n    indices[1] = [4, 1]\n    indices[2] = [[5, 2], [0, 3]]\n    data[0] = [61, 62]\n    data[1] = [[41, 42], [11, 12]]\n    data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]]\n    merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42],\n              [51, 52], [61, 62]]\n```\n\nThis method can be used to merge partitions created by `dynamic_partition`\nas illustrated on the following example:\n\n```python\n    # Apply function (increments x_i) on elements for which a certain condition\n    # apply (x_i != -1 in this example).\n    x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4])\n    condition_mask=tf.not_equal(x,tf.constant(-1.))\n    partitioned_data = tf.dynamic_partition(\n        x, tf.cast(condition_mask, tf.int32) , 2)\n    partitioned_data[1] = partitioned_data[1] + 1.0\n    condition_indices = tf.dynamic_partition(\n        tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2)\n    x = tf.dynamic_stitch(condition_indices, partitioned_data)\n    # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain\n    # unchanged.\n```\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\"https://www.tensorflow.org/images/DynamicStitch.png\" alt>\n</div>"
 }
 op {
   name: "ParallelInterleaveDataset"
@@ -16538,6 +15159,14 @@ op {
     name: "sloppy"
     type: DT_BOOL
   }
+  input_arg {
+    name: "buffer_output_elements"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "prefetch_input_elements"
+    type: DT_INT64
+  }
   output_arg {
     name: "handle"
     type: DT_VARIANT
@@ -16545,7 +15174,6 @@ op {
   attr {
     name: "f"
     type: "func"
-    description: "A function mapping elements of `input_dataset`, concatenated with\n`other_arguments`, to a Dataset variant that contains elements matching\n`output_types` and `output_shapes`."
   }
   attr {
     name: "Targuments"
@@ -16564,8 +15192,6 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Creates a dataset that applies `f` to the outputs of `input_dataset`."
-  description: "The resulting dataset is similar to the `InterleaveDataset`, with the exception\nthat if retrieving the next value from a dataset would cause the requester to\nblock, it will skip that input dataset. This dataset is especially useful\nwhen loading data from a variable-latency datastores (e.g. HDFS, GCS), as it\nallows the training step to proceed so long as some data is available.\n\n!! WARNING !! This dataset is not deterministic!"
 }
 op {
   name: "ParallelMapDataset"
@@ -16579,7 +15205,6 @@ op {
   }
   input_arg {
     name: "num_parallel_calls"
-    description: "The number of concurrent invocations of `f` that process\nelements from `input_dataset` in parallel."
     type: DT_INT32
   }
   output_arg {
@@ -16607,39 +15232,31 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Creates a dataset that applies `f` to the outputs of `input_dataset`."
-  description: "Unlike a \"MapDataset\", which applies `f` sequentially, this dataset invokes up\nto `num_parallel_calls` copies of `f` in parallel."
 }
 op {
   name: "ParameterizedTruncatedNormal"
   input_arg {
     name: "shape"
-    description: "The shape of the output tensor. Batches are indexed by the 0th dimension."
     type_attr: "T"
   }
   input_arg {
     name: "means"
-    description: "The mean parameter of each batch."
     type_attr: "dtype"
   }
   input_arg {
     name: "stdevs"
-    description: "The standard deviation parameter of each batch. Must be greater than 0."
     type_attr: "dtype"
   }
   input_arg {
     name: "minvals"
-    description: "The minimum cutoff. May be -infinity."
     type_attr: "dtype"
   }
   input_arg {
     name: "maxvals"
-    description: "The maximum cutoff. May be +infinity, and must be more than the minval\nfor each batch."
     type_attr: "dtype"
   }
   output_arg {
     name: "output"
-    description: "A matrix of shape num_batches x samples_per_batch, filled with random\ntruncated normal values using the parameters for each row."
     type_attr: "dtype"
   }
   attr {
@@ -16648,7 +15265,6 @@ op {
     default_value {
       i: 0
     }
-    description: "If either `seed` or `seed2` are set to be non-zero, the random number\ngenerator is seeded by the given seed.  Otherwise, it is seeded by a\nrandom seed."
   }
   attr {
     name: "seed2"
@@ -16656,15 +15272,14 @@ op {
     default_value {
       i: 0
     }
-    description: "A second seed to avoid seed collision."
   }
   attr {
     name: "dtype"
     type: "type"
-    description: "The type of the output."
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -16680,37 +15295,30 @@ op {
       }
     }
   }
-  summary: "Outputs random values from a normal distribution. The parameters may each be a"
-  description: "scalar which applies to the entire output, or a vector of length shape[0] which\nstores the parameters for each batch."
   is_stateful: true
 }
 op {
   name: "ParseExample"
   input_arg {
     name: "serialized"
-    description: "A vector containing a batch of binary serialized Example protos."
     type: DT_STRING
   }
   input_arg {
     name: "names"
-    description: "A vector containing the names of the serialized protos.\nMay contain, for example, table key (descriptive) names for the\ncorresponding serialized protos.  These are purely useful for debugging\npurposes, and the presence of values here has no effect on the output.\nMay also be an empty vector if no names are available.\nIf non-empty, this vector must be the same length as \"serialized\"."
     type: DT_STRING
   }
   input_arg {
     name: "sparse_keys"
-    description: "A list of Nsparse string Tensors (scalars).\nThe keys expected in the Examples\' features associated with sparse values."
     type: DT_STRING
     number_attr: "Nsparse"
   }
   input_arg {
     name: "dense_keys"
-    description: "A list of Ndense string Tensors (scalars).\nThe keys expected in the Examples\' features associated with dense values."
     type: DT_STRING
     number_attr: "Ndense"
   }
   input_arg {
     name: "dense_defaults"
-    description: "A list of Ndense Tensors (some may be empty).\ndense_defaults[j] provides default values\nwhen the example\'s feature_map lacks dense_key[j].  If an empty Tensor is\nprovided for dense_defaults[j], then the Feature dense_keys[j] is required.\nThe input type is inferred from dense_defaults[j], even when it\'s empty.\nIf dense_defaults[j] is not empty, and dense_shapes[j] is fully defined,\nthen the shape of dense_defaults[j] must match that of dense_shapes[j].\nIf dense_shapes[j] has an undefined major dimension (variable strides dense\nfeature), dense_defaults[j] must contain a single element:\nthe padding element."
     type_list_attr: "Tdense"
   }
   output_arg {
@@ -16744,7 +15352,6 @@ op {
   attr {
     name: "sparse_types"
     type: "list(type)"
-    description: "A list of Nsparse types; the data types of data in each Feature\ngiven in sparse_keys.\nCurrently the ParseExample supports DT_FLOAT (FloatList),\nDT_INT64 (Int64List), and DT_STRING (BytesList)."
     has_minimum: true
     allowed_values {
       list {
@@ -16769,55 +15376,118 @@ op {
   attr {
     name: "dense_shapes"
     type: "list(shape)"
-    description: "A list of Ndense shapes; the shapes of data in each Feature\ngiven in dense_keys.\nThe number of elements in the Feature corresponding to dense_key[j]\nmust always equal dense_shapes[j].NumEntries().\nIf dense_shapes[j] == (D0, D1, ..., DN) then the shape of output\nTensor dense_values[j] will be (|serialized|, D0, D1, ..., DN):\nThe dense outputs are just the inputs row-stacked by batch.\nThis works for dense_shapes[j] = (-1, D1, ..., DN).  In this case\nthe shape of the output Tensor dense_values[j] will be\n(|serialized|, M, D1, .., DN), where M is the maximum number of blocks\nof elements of length D1 * .... * DN, across all minibatch entries\nin the input.  Any minibatch entry with less than M blocks of elements of\nlength D1 * ... * DN will be padded with the corresponding default_value\nscalar element along the second dimension."
     has_minimum: true
   }
-  summary: "Transforms a vector of brain.Example protos (as strings) into typed tensors."
 }
 op {
-  name: "ParseSingleSequenceExample"
+  name: "ParseSingleExample"
   input_arg {
     name: "serialized"
-    description: "A scalar containing a binary serialized SequenceExample proto."
     type: DT_STRING
   }
   input_arg {
-    name: "feature_list_dense_missing_assumed_empty"
-    description: "A vector listing the\nFeatureList keys which may be missing from the SequenceExample.  If the\nassociated FeatureList is missing, it is treated as empty.  By default,\nany FeatureList not listed in this vector must exist in the SequenceExample."
+    name: "dense_defaults"
+    type_list_attr: "Tdense"
+  }
+  output_arg {
+    name: "sparse_indices"
+    type: DT_INT64
+    number_attr: "num_sparse"
+  }
+  output_arg {
+    name: "sparse_values"
+    type_list_attr: "sparse_types"
+  }
+  output_arg {
+    name: "sparse_shapes"
+    type: DT_INT64
+    number_attr: "num_sparse"
+  }
+  output_arg {
+    name: "dense_values"
+    type_list_attr: "Tdense"
+  }
+  attr {
+    name: "num_sparse"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "sparse_keys"
+    type: "list(string)"
+    has_minimum: true
+  }
+  attr {
+    name: "dense_keys"
+    type: "list(string)"
+    has_minimum: true
+  }
+  attr {
+    name: "sparse_types"
+    type: "list(type)"
+    has_minimum: true
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_INT64
+        type: DT_STRING
+      }
+    }
+  }
+  attr {
+    name: "Tdense"
+    type: "list(type)"
+    has_minimum: true
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_INT64
+        type: DT_STRING
+      }
+    }
+  }
+  attr {
+    name: "dense_shapes"
+    type: "list(shape)"
+    has_minimum: true
+  }
+}
+op {
+  name: "ParseSingleSequenceExample"
+  input_arg {
+    name: "serialized"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "feature_list_dense_missing_assumed_empty"
     type: DT_STRING
   }
   input_arg {
     name: "context_sparse_keys"
-    description: "A list of Ncontext_sparse string Tensors (scalars).\nThe keys expected in the Examples\' features associated with context_sparse\nvalues."
     type: DT_STRING
     number_attr: "Ncontext_sparse"
   }
   input_arg {
     name: "context_dense_keys"
-    description: "A list of Ncontext_dense string Tensors (scalars).\nThe keys expected in the SequenceExamples\' context features associated with\ndense values."
     type: DT_STRING
     number_attr: "Ncontext_dense"
   }
   input_arg {
     name: "feature_list_sparse_keys"
-    description: "A list of Nfeature_list_sparse string Tensors\n(scalars).  The keys expected in the FeatureLists associated with sparse\nvalues."
     type: DT_STRING
     number_attr: "Nfeature_list_sparse"
   }
   input_arg {
     name: "feature_list_dense_keys"
-    description: "A list of Nfeature_list_dense string Tensors (scalars).\nThe keys expected in the SequenceExamples\' feature_lists associated\nwith lists of dense values."
     type: DT_STRING
     number_attr: "Nfeature_list_dense"
   }
   input_arg {
     name: "context_dense_defaults"
-    description: "A list of Ncontext_dense Tensors (some may be empty).\ncontext_dense_defaults[j] provides default values\nwhen the SequenceExample\'s context map lacks context_dense_key[j].\nIf an empty Tensor is provided for context_dense_defaults[j],\nthen the Feature context_dense_keys[j] is required.\nThe input type is inferred from context_dense_defaults[j], even when it\'s\nempty.  If context_dense_defaults[j] is not empty, its shape must match\ncontext_dense_shapes[j]."
     type_list_attr: "Tcontext_dense"
   }
   input_arg {
     name: "debug_name"
-    description: "A scalar containing the name of the serialized proto.\nMay contain, for example, table key (descriptive) name for the\ncorresponding serialized proto.  This is purely useful for debugging\npurposes, and the presence of values here has no effect on the output.\nMay also be an empty scalar if no name is available."
     type: DT_STRING
   }
   output_arg {
@@ -16895,7 +15565,6 @@ op {
       list {
       }
     }
-    description: "A list of Ncontext_sparse types; the data types of data in\neach context Feature given in context_sparse_keys.\nCurrently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),\nDT_INT64 (Int64List), and DT_STRING (BytesList)."
     has_minimum: true
     allowed_values {
       list {
@@ -16944,7 +15613,6 @@ op {
       list {
       }
     }
-    description: "A list of Ncontext_dense shapes; the shapes of data in\neach context Feature given in context_dense_keys.\nThe number of elements in the Feature corresponding to context_dense_key[j]\nmust always equal context_dense_shapes[j].NumEntries().\nThe shape of context_dense_values[j] will match context_dense_shapes[j]."
     has_minimum: true
   }
   attr {
@@ -16954,7 +15622,6 @@ op {
       list {
       }
     }
-    description: "A list of Nfeature_list_sparse types; the data types\nof data in each FeatureList given in feature_list_sparse_keys.\nCurrently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),\nDT_INT64 (Int64List), and DT_STRING (BytesList)."
     has_minimum: true
     allowed_values {
       list {
@@ -16971,41 +15638,33 @@ op {
       list {
       }
     }
-    description: "A list of Nfeature_list_dense shapes; the shapes of\ndata in each FeatureList given in feature_list_dense_keys.\nThe shape of each Feature in the FeatureList corresponding to\nfeature_list_dense_key[j] must always equal\nfeature_list_dense_shapes[j].NumEntries()."
     has_minimum: true
   }
-  summary: "Transforms a scalar brain.SequenceExample proto (as strings) into typed tensors."
 }
 op {
   name: "ParseTensor"
   input_arg {
     name: "serialized"
-    description: "A scalar string containing a serialized TensorProto proto."
     type: DT_STRING
   }
   output_arg {
     name: "output"
-    description: "A Tensor of type `out_type`."
     type_attr: "out_type"
   }
   attr {
     name: "out_type"
     type: "type"
-    description: "The type of the serialized tensor.  The provided type must match the\ntype of the serialized tensor and no implicit conversion will take place."
   }
-  summary: "Transforms a serialized tensorflow.TensorProto proto into a Tensor."
 }
 op {
   name: "Placeholder"
   output_arg {
     name: "output"
-    description: "A placeholder tensor that must be replaced using the feed mechanism."
     type_attr: "dtype"
   }
   attr {
     name: "dtype"
     type: "type"
-    description: "The type of elements in the tensor."
   }
   attr {
     name: "shape"
@@ -17015,30 +15674,22 @@ op {
         unknown_rank: true
       }
     }
-    description: "(Optional) The shape of the tensor. If the shape has 0 dimensions, the\nshape is unconstrained."
   }
-  summary: "A placeholder op for a value that will be fed into the computation."
-  description: "N.B. This operation will fail with an error if it is executed. It is\nintended as a way to represent a value that will always be fed, and to\nprovide attrs that enable the fed value to be checked at runtime."
 }
 op {
   name: "PlaceholderV2"
   output_arg {
     name: "output"
-    description: "A placeholder tensor that must be replaced using the feed mechanism."
     type_attr: "dtype"
   }
   attr {
     name: "dtype"
     type: "type"
-    description: "The type of elements in the tensor."
   }
   attr {
     name: "shape"
     type: "shape"
-    description: "The shape of the tensor. The shape can be any partially-specified\nshape.  To be unconstrained, pass in a shape with unknown rank."
   }
-  summary: "A placeholder op for a value that will be fed into the computation."
-  description: "N.B. This operation will fail with an error if it is executed. It is\nintended as a way to represent a value that will always be fed, and to\nprovide attrs that enable the fed value to be checked at runtime."
   deprecation {
     version: 23
     explanation: "Placeholder now behaves the same as PlaceholderV2."
@@ -17048,25 +15699,20 @@ op {
   name: "PlaceholderWithDefault"
   input_arg {
     name: "input"
-    description: "The default value to produce when `output` is not fed."
     type_attr: "dtype"
   }
   output_arg {
     name: "output"
-    description: "A placeholder tensor that defaults to `input` if it is not fed."
     type_attr: "dtype"
   }
   attr {
     name: "dtype"
     type: "type"
-    description: "The type of elements in the tensor."
   }
   attr {
     name: "shape"
     type: "shape"
-    description: "The (possibly partial) shape of the tensor."
   }
-  summary: "A placeholder op that passes through `input` when its output is not fed."
 }
 op {
   name: "Polygamma"
@@ -17092,8 +15738,6 @@ op {
       }
     }
   }
-  summary: "Compute the polygamma function \\\\(\\psi^{(n)}(x)\\\\)."
-  description: "The polygamma function is defined as:\n\n\n\\\\(\\psi^{(n)}(x) = \\frac{d^n}{dx^n} \\psi(x)\\\\)\n\nwhere \\\\(\\psi(x)\\\\) is the digamma function."
 }
 op {
   name: "PopulationCount"
@@ -17121,8 +15765,6 @@ op {
       }
     }
   }
-  summary: "Computes element-wise population count (a.k.a. popcount, bitsum, bitcount)."
-  description: "For each entry in `x`, calculates the number of `1` (on) bits in the binary\nrepresentation of that entry.\n\n**NOTE**: It is more efficient to first `tf.bitcast` your tensors into\n`int32` or `int64` and perform the bitcount on the result, than to feed in\n8- or 16-bit inputs and then aggregate the resulting counts."
 }
 op {
   name: "Pow"
@@ -17144,6 +15786,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -17153,8 +15796,6 @@ op {
       }
     }
   }
-  summary: "Computes the power of one value to another."
-  description: "Given a tensor `x` and a tensor `y`, this operation computes \\\\(x^y\\\\) for\ncorresponding elements in `x` and `y`. For example:\n\n```\n# tensor \'x\' is [[2, 2]], [3, 3]]\n# tensor \'y\' is [[8, 16], [2, 3]]\ntf.pow(x, y) ==> [[256, 65536], [9, 27]]\n```"
 }
 op {
   name: "PrefetchDataset"
@@ -17164,7 +15805,6 @@ op {
   }
   input_arg {
     name: "buffer_size"
-    description: "The maximum number of elements to buffer in an iterator over\nthis dataset."
     type: DT_INT64
   }
   output_arg {
@@ -17183,18 +15823,15 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Creates a dataset that asynchronously prefetches elements from `input_dataset`."
 }
 op {
   name: "PreventGradient"
   input_arg {
     name: "input"
-    description: "any tensor."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "the same input tensor."
     type_attr: "T"
   }
   attr {
@@ -17207,26 +15844,20 @@ op {
     default_value {
       s: ""
     }
-    description: "Will be printed in the error when anyone tries to differentiate\nthis operation."
   }
-  summary: "An identity op that triggers an error if a gradient is requested."
-  description: "When executed in a graph, this op outputs its input tensor as-is.\n\nWhen building ops to compute gradients, the TensorFlow gradient system\nwill return an error when trying to lookup the gradient of this op,\nbecause no gradient must ever be registered for this function.  This\nop exists to prevent subtle bugs from silently returning unimplemented\ngradients in some corner cases."
 }
 op {
   name: "Print"
   input_arg {
     name: "input"
-    description: "The tensor passed to `output`"
     type_attr: "T"
   }
   input_arg {
     name: "data"
-    description: "A list of tensors to print out when op is evaluated."
     type_list_attr: "U"
   }
   output_arg {
     name: "output"
-    description: "= The unmodified `input` tensor"
     type_attr: "T"
   }
   attr {
@@ -17244,7 +15875,6 @@ op {
     default_value {
       s: ""
     }
-    description: "A string, prefix of the error message."
   }
   attr {
     name: "first_n"
@@ -17252,7 +15882,6 @@ op {
     default_value {
       i: -1
     }
-    description: "Only log `first_n` number of times. -1 disables logging."
   }
   attr {
     name: "summarize"
@@ -17260,17 +15889,13 @@ op {
     default_value {
       i: 3
     }
-    description: "Only print this many entries of each tensor."
   }
-  summary: "Prints a list of tensors."
-  description: "Passes `input` through to `output` and prints `data` when evaluating."
   is_stateful: true
 }
 op {
   name: "PriorityQueue"
   output_arg {
     name: "handle"
-    description: "The handle to the queue."
     type: DT_STRING
     is_ref: true
   }
@@ -17281,13 +15906,11 @@ op {
       list {
       }
     }
-    description: "The type of each component in a value."
     has_minimum: true
   }
   attr {
     name: "shapes"
     type: "list(shape)"
-    description: "The shape of each component in a value. The length of this attr must\nbe either 0 or the same as the length of component_types. If the length of\nthis attr is 0, the shapes of queue elements are not constrained, and\nonly one element may be dequeued at a time."
     has_minimum: true
   }
   attr {
@@ -17296,7 +15919,6 @@ op {
     default_value {
       i: -1
     }
-    description: "The upper bound on the number of elements in this queue.\nNegative numbers mean no limit."
   }
   attr {
     name: "container"
@@ -17304,7 +15926,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this queue is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -17312,17 +15933,13 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this queue will be shared under the given name\nacross multiple sessions."
   }
-  summary: "A queue that produces elements sorted by the first component value."
-  description: "Note that the PriorityQueue requires the first component of any element\nto be a scalar int64, in addition to the other elements declared by\ncomponent_types.  Therefore calls to Enqueue and EnqueueMany (resp. Dequeue\nand DequeueMany) on a PriorityQueue will all require (resp. output) one extra\nentry in their input (resp. output) lists."
   is_stateful: true
 }
 op {
   name: "PriorityQueueV2"
   output_arg {
     name: "handle"
-    description: "The handle to the queue."
     type: DT_RESOURCE
   }
   attr {
@@ -17332,13 +15949,11 @@ op {
       list {
       }
     }
-    description: "The type of each component in a value."
     has_minimum: true
   }
   attr {
     name: "shapes"
     type: "list(shape)"
-    description: "The shape of each component in a value. The length of this attr must\nbe either 0 or the same as the length of component_types. If the length of\nthis attr is 0, the shapes of queue elements are not constrained, and\nonly one element may be dequeued at a time."
     has_minimum: true
   }
   attr {
@@ -17347,7 +15962,6 @@ op {
     default_value {
       i: -1
     }
-    description: "The upper bound on the number of elements in this queue.\nNegative numbers mean no limit."
   }
   attr {
     name: "container"
@@ -17355,7 +15969,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this queue is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -17363,27 +15976,21 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this queue will be shared under the given name\nacross multiple sessions."
   }
-  summary: "A queue that produces elements sorted by the first component value."
-  description: "Note that the PriorityQueue requires the first component of any element\nto be a scalar int64, in addition to the other elements declared by\ncomponent_types.  Therefore calls to Enqueue and EnqueueMany (resp. Dequeue\nand DequeueMany) on a PriorityQueue will all require (resp. output) one extra\nentry in their input (resp. output) lists."
   is_stateful: true
 }
 op {
   name: "Prod"
   input_arg {
     name: "input"
-    description: "The tensor to reduce."
     type_attr: "T"
   }
   input_arg {
     name: "reduction_indices"
-    description: "The dimensions to reduce. Must be in the range\n`[-rank(input), rank(input))`."
     type_attr: "Tidx"
   }
   output_arg {
     name: "output"
-    description: "The reduced tensor."
     type_attr: "T"
   }
   attr {
@@ -17392,7 +15999,6 @@ op {
     default_value {
       b: false
     }
-    description: "If true, retain reduced dimensions with length 1."
   }
   attr {
     name: "T"
@@ -17401,17 +16007,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -17431,40 +16038,31 @@ op {
       }
     }
   }
-  summary: "Computes the product of elements across dimensions of a tensor."
-  description: "Reduces `input` along the dimensions given in `reduction_indices`. Unless\n`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in\n`reduction_indices`. If `keep_dims` is true, the reduced dimensions are\nretained with length 1."
 }
 op {
   name: "PyFunc"
   input_arg {
     name: "input"
-    description: "List of Tensors that will provide input to the Op."
     type_list_attr: "Tin"
   }
   output_arg {
     name: "output"
-    description: "The outputs from the Op."
     type_list_attr: "Tout"
   }
   attr {
     name: "token"
     type: "string"
-    description: "A token representing a registered python function in this address space."
   }
   attr {
     name: "Tin"
     type: "list(type)"
-    description: "Data types of the inputs to the op."
     has_minimum: true
   }
   attr {
     name: "Tout"
     type: "list(type)"
-    description: "Data types of the outputs from the op.\nThe length of the list specifies the number of outputs."
     has_minimum: true
   }
-  summary: "Invokes a python function to compute func(input)->output."
-  description: "This operation is considered stateful. For a stateless version, see\nPyFuncStateless."
   is_stateful: true
 }
 op {
@@ -17491,23 +16089,19 @@ op {
     type: "list(type)"
     has_minimum: true
   }
-  summary: "A stateless version of PyFunc."
 }
 op {
   name: "Qr"
   input_arg {
     name: "input"
-    description: "A tensor of shape `[..., M, N]` whose inner-most 2 dimensions\nform matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`."
     type_attr: "T"
   }
   output_arg {
     name: "q"
-    description: "Orthonormal basis for range of `a`. If `full_matrices` is `False` then\nshape is `[..., M, P]`; if `full_matrices` is `True` then shape is\n`[..., M, M]`."
     type_attr: "T"
   }
   output_arg {
     name: "r"
-    description: "Triangular factor. If `full_matrices` is `False` then shape is\n`[..., P, N]`. If `full_matrices` is `True` then shape is `[..., M, N]`."
     type_attr: "T"
   }
   attr {
@@ -17516,7 +16110,6 @@ op {
     default_value {
       b: false
     }
-    description: "If true, compute full-sized `q` and `r`. If false\n(the default), compute only the leading `P` columns of `q`."
   }
   attr {
     name: "T"
@@ -17530,8 +16123,6 @@ op {
       }
     }
   }
-  summary: "Computes the QR decompositions of one or more matrices."
-  description: "Computes the QR decomposition of each inner matrix in `tensor` such that\n`tensor[..., :, :] = q[..., :, :] * r[..., :,:])`\n\n```python\n# a is a tensor.\n# q is a tensor of orthonormal matrices.\n# r is a tensor of upper triangular matrices.\nq, r = qr(a)\nq_full, r_full = qr(a, full_matrices=True)\n```"
 }
 op {
   name: "QuantizeAndDequantize"
@@ -17583,12 +16174,12 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Use QuantizeAndDequantizeV2 instead."
   deprecation {
     version: 22
     explanation: "Replaced by QuantizeAndDequantizeV2"
@@ -17598,17 +16189,14 @@ op {
   name: "QuantizeAndDequantizeV2"
   input_arg {
     name: "input"
-    description: "Tensor to quantize and then dequantize."
     type_attr: "T"
   }
   input_arg {
     name: "input_min"
-    description: "If range_given, this is the min of the range, otherwise this input\nwill be ignored."
     type_attr: "T"
   }
   input_arg {
     name: "input_max"
-    description: "If range_given, this is the max of the range, otherwise this input\nwill be ignored."
     type_attr: "T"
   }
   output_arg {
@@ -17621,7 +16209,6 @@ op {
     default_value {
       b: true
     }
-    description: "If the quantization is signed or unsigned."
   }
   attr {
     name: "num_bits"
@@ -17629,7 +16216,6 @@ op {
     default_value {
       i: 8
     }
-    description: "The bitwidth of the quantization."
   }
   attr {
     name: "range_given"
@@ -17637,20 +16223,18 @@ op {
     default_value {
       b: false
     }
-    description: "If the range is given or should be computed from the tensor."
   }
   attr {
     name: "T"
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Quantizes then dequantizes a tensor."
-  description: "This op simulates the precision loss from the quantized forward pass by:\n1. Quantizing the tensor to fixed point numbers, which should match the target\n   quantization method when it is used in inference.\n2. Dequantizing it back to floating point numbers for the following ops, most\n   likely matmul.\n\nThere are different ways to quantize. This version does not use the full range\nof the output type, choosing to elide the lowest possible value for symmetry\n(e.g., output range is -127 to 127, not -128 to 127 for signed 8 bit\nquantization), so that 0.0 maps to 0.\n\nTo perform this op, we first find the range of values in our tensor. The range\nwe use is always centered on 0, so we find m such that\n\n1. m = max(abs(input_min), abs(input_max)) if range_given is true,\n2. m = max(abs(min_elem(input)), abs(max_elem(input))) otherwise.\n\nOur input tensor range is then [-m, m].\n\nNext, we choose our fixed-point quantization buckets, [min_fixed, max_fixed].\nIf signed_input is true, this is\n\n  [min_fixed, max_fixed ] =\n      [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1].\n\nOtherwise, if signed_input is false, the fixed-point range is\n\n  [min_fixed, max_fixed] = [0, (1 << num_bits) - 1].\n\nFrom this we compute our scaling factor, s:\n\n  s = (max_fixed - min_fixed) / (2 * m).\n\nNow we can quantize and dequantize the elements of our tensor.  An element e\nis transformed into e\':\n\n  e\' = (e * s).round_to_nearest() / s.\n\nNote that we have a different number of buckets in the signed vs. unsigned\ncases.  For example, if num_bits == 8, we get 254 buckets in the signed case\nvs. 255 in the unsigned case.\n\nFor example, suppose num_bits = 8 and m = 1.  Then\n\n  [min_fixed, max_fixed] = [-127, 127], and\n  s = (127 + 127) / 2 = 127.\n\nGiven the vector {-1, -0.5, 0, 0.3}, this is quantized to\n{-127, -63, 0, 38}, and dequantized to {-1, -63.0/127, 0, 38.0/127}."
 }
 op {
   name: "QuantizeAndDequantizeV3"
@@ -17693,13 +16277,12 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Quantizes then dequantizes a tensor."
-  description: "This is almost identical to QuantizeAndDequantizeV2, except that num_bits is a\ntensor, so its value can change during training."
 }
 op {
   name: "QuantizeDownAndShrinkRange"
@@ -17709,12 +16292,10 @@ op {
   }
   input_arg {
     name: "input_min"
-    description: "The float value that the minimum quantized input value represents."
     type: DT_FLOAT
   }
   input_arg {
     name: "input_max"
-    description: "The float value that the maximum quantized input value represents."
     type: DT_FLOAT
   }
   output_arg {
@@ -17723,44 +16304,38 @@ op {
   }
   output_arg {
     name: "output_min"
-    description: "The float value that the minimum quantized output value represents."
     type: DT_FLOAT
   }
   output_arg {
     name: "output_max"
-    description: "The float value that the maximum quantized output value represents."
     type: DT_FLOAT
   }
   attr {
     name: "Tinput"
     type: "type"
-    description: "The type of the input."
     allowed_values {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
   attr {
     name: "out_type"
     type: "type"
-    description: "The type of the output. Should be a lower bit depth than Tinput."
     allowed_values {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
-  summary: "Convert the quantized \'input\' tensor into a lower-precision \'output\', using the"
-  description: "actual distribution of the values to maximize the usage of the lower bit depth\nand adjusting the output min and max ranges accordingly.\n\n[input_min, input_max] are scalar floats that specify the range for the float\ninterpretation of the \'input\' data. For example, if input_min is -1.0f and\ninput_max is 1.0f, and we are dealing with quint16 quantized data, then a 0\nvalue in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.\n\nThis operator tries to squeeze as much precision as possible into an output with\na lower bit depth by calculating the actual min and max values found in the\ndata. For example, maybe that quint16 input has no values lower than 16,384 and\nnone higher than 49,152. That means only half the range is actually needed, all\nthe float interpretations are between -0.5f and 0.5f, so if we want to compress\nthe data into a quint8 output, we can use that range rather than the theoretical\n-1.0f to 1.0f that is suggested by the input min and max.\n\nIn practice, this is most useful for taking output from operations like\nQuantizedMatMul that can produce higher bit-depth outputs than their inputs and\nmay have large potential output ranges, but in practice have a distribution of\ninput values that only uses a small fraction of the possible range. By feeding\nthat output into this operator, we can reduce it from 32 bits down to 8 with\nminimal loss of accuracy."
 }
 op {
   name: "QuantizeV2"
@@ -17770,27 +16345,22 @@ op {
   }
   input_arg {
     name: "min_range"
-    description: "The minimum scalar value possibly produced for the input."
     type: DT_FLOAT
   }
   input_arg {
     name: "max_range"
-    description: "The maximum scalar value possibly produced for the input."
     type: DT_FLOAT
   }
   output_arg {
     name: "output"
-    description: "The quantized data produced from the float input."
     type_attr: "T"
   }
   output_arg {
     name: "output_min"
-    description: "The actual minimum scalar value used for the output."
     type: DT_FLOAT
   }
   output_arg {
     name: "output_max"
-    description: "The actual maximum scalar value used for the output."
     type: DT_FLOAT
   }
   attr {
@@ -17800,9 +16370,9 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
@@ -17833,8 +16403,6 @@ op {
       }
     }
   }
-  summary: "Quantize the \'input\' tensor of type float to \'output\' tensor of type \'T\'."
-  description: "[min_range, max_range] are scalar floats that specify the range for\nthe \'input\' data. The \'mode\' attribute controls exactly which calculations are\nused to convert the float values to their quantized equivalents.  The\n\'round_mode\' attribute controls which rounding tie-breaking algorithm is used\nwhen rounding float values to their quantized equivalents.\n\nIn \'MIN_COMBINED\' mode, each value of the tensor will undergo the following:\n\n```\nout[i] = (in[i] - min_range) * range(T) / (max_range - min_range)\nif T == qint8, out[i] -= (range(T) + 1) / 2.0\n```\nhere `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`\n\n*MIN_COMBINED Mode Example*\n\nAssume the input is type float and has a possible range of [0.0, 6.0] and the\noutput type is quint8 ([0, 255]). The min_range and max_range values should be\nspecified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each\nvalue of the input by 255/6 and cast to quint8.\n\nIf the output type was qint8 ([-128, 127]), the operation will additionally\nsubtract each value by 128 prior to casting, so that the range of values aligns\nwith the range of qint8.\n\nIf the mode is \'MIN_FIRST\', then this approach is used:\n\n```\nnum_discrete_values = 1 << (# of bits in T)\nrange_adjust = num_discrete_values / (num_discrete_values - 1)\nrange = (range_max - range_min) * range_adjust\nrange_scale = num_discrete_values / range\nquantized = round(input * range_scale) - round(range_min * range_scale) +\n  numeric_limits<T>::min()\nquantized = max(quantized, numeric_limits<T>::min())\nquantized = min(quantized, numeric_limits<T>::max())\n```\n\nThe biggest difference between this and MIN_COMBINED is that the minimum range\nis rounded first, before it\'s subtracted from the rounded value. With\nMIN_COMBINED, a small bias is introduced where repeated iterations of quantizing\nand dequantizing will introduce a larger and larger error.\n\n*SCALED mode Example*\n\n`SCALED` mode matches the quantization approach used in\n`QuantizeAndDequantize{V2|V3}`.\n\nIf the mode is `SCALED`, we do not use the full range of the output type,\nchoosing to elide the lowest possible value for symmetry (e.g., output range is\n-127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to\n0.\n\nWe first find the range of values in our tensor. The\nrange we use is always centered on 0, so we find m such that\n```c++\n  m = max(abs(input_min), abs(input_max))\n```\n\nOur input tensor range is then `[-m, m]`.\n\nNext, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`.\nIf T is signed, this is\n```\n  num_bits = sizeof(T) * 8\n  [min_fixed, max_fixed] =\n      [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1]\n```\n\nOtherwise, if T is unsigned, the fixed-point range is\n```\n  [min_fixed, max_fixed] = [0, (1 << num_bits) - 1]\n```\n\nFrom this we compute our scaling factor, s:\n```c++\n  s = (max_fixed - min_fixed) / (2 * m)\n```\n\nNow we can quantize the elements of our tensor:\n```c++\nresult = round(input * s)\n```\n\nOne thing to watch out for is that the operator may choose to adjust the\nrequested minimum and maximum values slightly during the quantization process,\nso you should always use the output ports as the range for further calculations.\nFor example, if the requested minimum and maximum values are close to equal,\nthey will be separated by a small epsilon value to prevent ill-formed quantized\nbuffers from being created. Otherwise, you can end up with buffers where all the\nquantized values map to the same float value, which causes problems for\noperations that have to perform further calculations on them."
 }
 op {
   name: "QuantizedAdd"
@@ -17848,22 +16416,18 @@ op {
   }
   input_arg {
     name: "min_x"
-    description: "The float value that the lowest quantized `x` value represents."
     type: DT_FLOAT
   }
   input_arg {
     name: "max_x"
-    description: "The float value that the highest quantized `x` value represents."
     type: DT_FLOAT
   }
   input_arg {
     name: "min_y"
-    description: "The float value that the lowest quantized `y` value represents."
     type: DT_FLOAT
   }
   input_arg {
     name: "max_y"
-    description: "The float value that the highest quantized `y` value represents."
     type: DT_FLOAT
   }
   output_arg {
@@ -17872,12 +16436,10 @@ op {
   }
   output_arg {
     name: "min_z"
-    description: "The float value that the lowest quantized output value represents."
     type: DT_FLOAT
   }
   output_arg {
     name: "max_z"
-    description: "The float value that the highest quantized output value represents.\n\n*NOTE*: `QuantizedAdd` supports limited forms of broadcasting. More about\nbroadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
     type: DT_FLOAT
   }
   attr {
@@ -17887,9 +16449,9 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
@@ -17900,9 +16462,9 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
@@ -17916,30 +16478,26 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
-  summary: "Returns x + y element-wise, working on quantized buffers."
   is_commutative: true
 }
 op {
   name: "QuantizedAvgPool"
   input_arg {
     name: "input"
-    description: "4-D with shape `[batch, height, width, channels]`."
     type_attr: "T"
   }
   input_arg {
     name: "min_input"
-    description: "The float value that the lowest quantized input value represents."
     type: DT_FLOAT
   }
   input_arg {
     name: "max_input"
-    description: "The float value that the highest quantized input value represents."
     type: DT_FLOAT
   }
   output_arg {
@@ -17948,12 +16506,10 @@ op {
   }
   output_arg {
     name: "min_output"
-    description: "The float value that the lowest quantized output value represents."
     type: DT_FLOAT
   }
   output_arg {
     name: "max_output"
-    description: "The float value that the highest quantized output value represents."
     type: DT_FLOAT
   }
   attr {
@@ -17963,26 +16519,23 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
   attr {
     name: "ksize"
     type: "list(int)"
-    description: "The size of the window for each dimension of the input tensor.\nThe length must be 4 to match the number of dimensions of the input."
   }
   attr {
     name: "strides"
     type: "list(int)"
-    description: "The stride of the sliding window for each dimension of the input\ntensor.  The length must be 4 to match the number of dimensions of the input."
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -17990,83 +16543,67 @@ op {
       }
     }
   }
-  summary: "Produces the average pool of the input tensor for quantized types."
 }
 op {
   name: "QuantizedBatchNormWithGlobalNormalization"
   input_arg {
     name: "t"
-    description: "A 4D input Tensor."
     type_attr: "Tinput"
   }
   input_arg {
     name: "t_min"
-    description: "The value represented by the lowest quantized input."
     type: DT_FLOAT
   }
   input_arg {
     name: "t_max"
-    description: "The value represented by the highest quantized input."
     type: DT_FLOAT
   }
   input_arg {
     name: "m"
-    description: "A 1D mean Tensor with size matching the last dimension of t.\nThis is the first output from tf.nn.moments,\nor a saved moving average thereof."
     type_attr: "Tinput"
   }
   input_arg {
     name: "m_min"
-    description: "The value represented by the lowest quantized mean."
     type: DT_FLOAT
   }
   input_arg {
     name: "m_max"
-    description: "The value represented by the highest quantized mean."
     type: DT_FLOAT
   }
   input_arg {
     name: "v"
-    description: "A 1D variance Tensor with size matching the last dimension of t.\nThis is the second output from tf.nn.moments,\nor a saved moving average thereof."
     type_attr: "Tinput"
   }
   input_arg {
     name: "v_min"
-    description: "The value represented by the lowest quantized variance."
     type: DT_FLOAT
   }
   input_arg {
     name: "v_max"
-    description: "The value represented by the highest quantized variance."
     type: DT_FLOAT
   }
   input_arg {
     name: "beta"
-    description: "A 1D beta Tensor with size matching the last dimension of t.\nAn offset to be added to the normalized tensor."
     type_attr: "Tinput"
   }
   input_arg {
     name: "beta_min"
-    description: "The value represented by the lowest quantized offset."
     type: DT_FLOAT
   }
   input_arg {
     name: "beta_max"
-    description: "The value represented by the highest quantized offset."
     type: DT_FLOAT
   }
   input_arg {
     name: "gamma"
-    description: "A 1D gamma Tensor with size matching the last dimension of t.\nIf \"scale_after_normalization\" is true, this tensor will be multiplied\nwith the normalized tensor."
     type_attr: "Tinput"
   }
   input_arg {
     name: "gamma_min"
-    description: "The value represented by the lowest quantized gamma."
     type: DT_FLOAT
   }
   input_arg {
     name: "gamma_max"
-    description: "The value represented by the highest quantized gamma."
     type: DT_FLOAT
   }
   output_arg {
@@ -18088,9 +16625,9 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
@@ -18101,24 +16638,20 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
   attr {
     name: "variance_epsilon"
     type: "float"
-    description: "A small float number to avoid dividing by 0."
   }
   attr {
     name: "scale_after_normalization"
     type: "bool"
-    description: "A bool indicating whether the resulted tensor\nneeds to be multiplied with gamma."
   }
-  summary: "Quantized Batch normalization."
-  description: "This op is deprecated and will be removed in the future. Prefer\n`tf.nn.batch_normalization`."
 }
 op {
   name: "QuantizedBiasAdd"
@@ -18128,27 +16661,22 @@ op {
   }
   input_arg {
     name: "bias"
-    description: "A 1D bias Tensor with size matching the last dimension of \'input\'."
     type_attr: "T2"
   }
   input_arg {
     name: "min_input"
-    description: "The float value that the lowest quantized input value represents."
     type: DT_FLOAT
   }
   input_arg {
     name: "max_input"
-    description: "The float value that the highest quantized input value represents."
     type: DT_FLOAT
   }
   input_arg {
     name: "min_bias"
-    description: "The float value that the lowest quantized bias value represents."
     type: DT_FLOAT
   }
   input_arg {
     name: "max_bias"
-    description: "The float value that the highest quantized bias value represents."
     type: DT_FLOAT
   }
   output_arg {
@@ -18157,12 +16685,10 @@ op {
   }
   output_arg {
     name: "min_out"
-    description: "The float value that the lowest quantized output value represents."
     type: DT_FLOAT
   }
   output_arg {
     name: "max_out"
-    description: "The float value that the highest quantized output value represents."
     type: DT_FLOAT
   }
   attr {
@@ -18172,9 +16698,9 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
@@ -18185,9 +16711,9 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
@@ -18198,53 +16724,44 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
-  summary: "Adds Tensor \'bias\' to Tensor \'input\' for Quantized types."
-  description: "Broadcasts the values of bias on dimensions 0..N-2 of \'input\'."
 }
 op {
   name: "QuantizedConcat"
   input_arg {
     name: "concat_dim"
-    description: "0-D.  The dimension along which to concatenate.  Must be in the\nrange [0, rank(values))."
     type: DT_INT32
   }
   input_arg {
     name: "values"
-    description: "The `N` Tensors to concatenate. Their ranks and types must match,\nand their sizes must match in all dimensions except `concat_dim`."
     type_attr: "T"
     number_attr: "N"
   }
   input_arg {
     name: "input_mins"
-    description: "The minimum scalar values for each of the input tensors."
     type: DT_FLOAT
     number_attr: "N"
   }
   input_arg {
     name: "input_maxes"
-    description: "The maximum scalar values for each of the input tensors."
     type: DT_FLOAT
     number_attr: "N"
   }
   output_arg {
     name: "output"
-    description: "A `Tensor` with the concatenation of values stacked along the\n`concat_dim` dimension.  This tensor\'s shape matches that of `values` except\nin `concat_dim` where it has the sum of the sizes."
     type_attr: "T"
   }
   output_arg {
     name: "output_min"
-    description: "The float value that the minimum quantized output value represents."
     type: DT_FLOAT
   }
   output_arg {
     name: "output_max"
-    description: "The float value that the maximum quantized output value represents."
     type: DT_FLOAT
   }
   attr {
@@ -18257,7 +16774,6 @@ op {
     name: "T"
     type: "type"
   }
-  summary: "Concatenates quantized tensors along one dimension."
 }
 op {
   name: "QuantizedConv2D"
@@ -18267,27 +16783,22 @@ op {
   }
   input_arg {
     name: "filter"
-    description: "filter\'s input_depth dimension must match input\'s depth dimensions."
     type_attr: "Tfilter"
   }
   input_arg {
     name: "min_input"
-    description: "The float value that the lowest quantized input value represents."
     type: DT_FLOAT
   }
   input_arg {
     name: "max_input"
-    description: "The float value that the highest quantized input value represents."
     type: DT_FLOAT
   }
   input_arg {
     name: "min_filter"
-    description: "The float value that the lowest quantized filter value represents."
     type: DT_FLOAT
   }
   input_arg {
     name: "max_filter"
-    description: "The float value that the highest quantized filter value represents."
     type: DT_FLOAT
   }
   output_arg {
@@ -18296,12 +16807,10 @@ op {
   }
   output_arg {
     name: "min_output"
-    description: "The float value that the lowest quantized output value represents."
     type: DT_FLOAT
   }
   output_arg {
     name: "max_output"
-    description: "The float value that the highest quantized output value represents."
     type: DT_FLOAT
   }
   attr {
@@ -18311,9 +16820,9 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
@@ -18324,9 +16833,9 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
@@ -18340,21 +16849,19 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
   attr {
     name: "strides"
     type: "list(int)"
-    description: "The stride of the sliding window for each dimension of the input\ntensor."
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -18362,39 +16869,43 @@ op {
       }
     }
   }
-  summary: "Computes a 2D convolution given quantized 4D input and filter tensors."
-  description: "The inputs are quantized tensors where the lowest value represents the real\nnumber of the associated minimum, and the highest represents the maximum.\nThis means that you can only interpret the quantized output in the same way, by\ntaking the returned minimum and maximum values into account."
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
 }
 op {
   name: "QuantizedInstanceNorm"
   input_arg {
     name: "x"
-    description: "A 4D input Tensor."
     type_attr: "T"
   }
   input_arg {
     name: "x_min"
-    description: "The value represented by the lowest quantized input."
     type: DT_FLOAT
   }
   input_arg {
     name: "x_max"
-    description: "The value represented by the highest quantized input."
     type: DT_FLOAT
   }
   output_arg {
     name: "y"
-    description: "A 4D Tensor."
     type_attr: "T"
   }
   output_arg {
     name: "y_min"
-    description: "The value represented by the lowest quantized output."
     type: DT_FLOAT
   }
   output_arg {
     name: "y_max"
-    description: "The value represented by the highest quantized output."
     type: DT_FLOAT
   }
   attr {
@@ -18404,9 +16915,9 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
@@ -18416,7 +16927,6 @@ op {
     default_value {
       b: false
     }
-    description: "If True, `given_y_min` and `given_y_min`\nand `given_y_max` are used as the output range. Otherwise,\nthe implementation computes the output range."
   }
   attr {
     name: "given_y_min"
@@ -18424,7 +16934,6 @@ op {
     default_value {
       f: 0
     }
-    description: "Output in `y_min` if `output_range_given` is True."
   }
   attr {
     name: "given_y_max"
@@ -18432,7 +16941,6 @@ op {
     default_value {
       f: 0
     }
-    description: "Output in `y_max` if `output_range_given` is True."
   }
   attr {
     name: "variance_epsilon"
@@ -18440,7 +16948,6 @@ op {
     default_value {
       f: 1e-05
     }
-    description: "A small float number to avoid dividing by 0."
   }
   attr {
     name: "min_separation"
@@ -18448,40 +16955,32 @@ op {
     default_value {
       f: 0.001
     }
-    description: "Minimum value of `y_max - y_min`"
   }
-  summary: "Quantized Instance normalization."
 }
 op {
   name: "QuantizedMatMul"
   input_arg {
     name: "a"
-    description: "Must be a two-dimensional tensor."
     type_attr: "T1"
   }
   input_arg {
     name: "b"
-    description: "Must be a two-dimensional tensor."
     type_attr: "T2"
   }
   input_arg {
     name: "min_a"
-    description: "The float value that the lowest quantized `a` value represents."
     type: DT_FLOAT
   }
   input_arg {
     name: "max_a"
-    description: "The float value that the highest quantized `a` value represents."
     type: DT_FLOAT
   }
   input_arg {
     name: "min_b"
-    description: "The float value that the lowest quantized `b` value represents."
     type: DT_FLOAT
   }
   input_arg {
     name: "max_b"
-    description: "The float value that the highest quantized `b` value represents."
     type: DT_FLOAT
   }
   output_arg {
@@ -18490,12 +16989,10 @@ op {
   }
   output_arg {
     name: "min_out"
-    description: "The float value that the lowest quantized output value represents."
     type: DT_FLOAT
   }
   output_arg {
     name: "max_out"
-    description: "The float value that the highest quantized output value represents."
     type: DT_FLOAT
   }
   attr {
@@ -18505,9 +17002,9 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
@@ -18518,9 +17015,9 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
@@ -18534,9 +17031,9 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
@@ -18546,7 +17043,6 @@ op {
     default_value {
       b: false
     }
-    description: "If true, `a` is transposed before multiplication."
   }
   attr {
     name: "transpose_b"
@@ -18554,7 +17050,6 @@ op {
     default_value {
       b: false
     }
-    description: "If true, `b` is transposed before multiplication."
   }
   attr {
     name: "Tactivation"
@@ -18562,35 +17057,29 @@ op {
     default_value {
       type: DT_QUINT8
     }
-    description: "The type of output produced by activation function\nfollowing this operation."
     allowed_values {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
-  summary: "Perform a quantized matrix multiplication of  `a` by the matrix `b`."
-  description: "The inputs must be two-dimensional matrices and the inner dimension of\n`a` (after being transposed if `transpose_a` is non-zero) must match the\nouter dimension of `b` (after being transposed if `transposed_b` is\nnon-zero)."
 }
 op {
   name: "QuantizedMaxPool"
   input_arg {
     name: "input"
-    description: "The 4D (batch x rows x cols x depth) Tensor to MaxReduce over."
     type_attr: "T"
   }
   input_arg {
     name: "min_input"
-    description: "The float value that the lowest quantized input value represents."
     type: DT_FLOAT
   }
   input_arg {
     name: "max_input"
-    description: "The float value that the highest quantized input value represents."
     type: DT_FLOAT
   }
   output_arg {
@@ -18599,12 +17088,10 @@ op {
   }
   output_arg {
     name: "min_output"
-    description: "The float value that the lowest quantized output value represents."
     type: DT_FLOAT
   }
   output_arg {
     name: "max_output"
-    description: "The float value that the highest quantized output value represents."
     type: DT_FLOAT
   }
   attr {
@@ -18614,26 +17101,23 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
   attr {
     name: "ksize"
     type: "list(int)"
-    description: "The size of the window for each dimension of the input tensor.\nThe length must be 4 to match the number of dimensions of the input."
   }
   attr {
     name: "strides"
     type: "list(int)"
-    description: "The stride of the sliding window for each dimension of the input\ntensor. The length must be 4 to match the number of dimensions of the input."
   }
   attr {
     name: "padding"
     type: "string"
-    description: "The type of padding algorithm to use."
     allowed_values {
       list {
         s: "SAME"
@@ -18641,7 +17125,6 @@ op {
       }
     }
   }
-  summary: "Produces the max pool of the input tensor for quantized types."
 }
 op {
   name: "QuantizedMul"
@@ -18655,22 +17138,18 @@ op {
   }
   input_arg {
     name: "min_x"
-    description: "The float value that the lowest quantized `x` value represents."
     type: DT_FLOAT
   }
   input_arg {
     name: "max_x"
-    description: "The float value that the highest quantized `x` value represents."
     type: DT_FLOAT
   }
   input_arg {
     name: "min_y"
-    description: "The float value that the lowest quantized `y` value represents."
     type: DT_FLOAT
   }
   input_arg {
     name: "max_y"
-    description: "The float value that the highest quantized `y` value represents."
     type: DT_FLOAT
   }
   output_arg {
@@ -18679,12 +17158,10 @@ op {
   }
   output_arg {
     name: "min_z"
-    description: "The float value that the lowest quantized output value represents."
     type: DT_FLOAT
   }
   output_arg {
     name: "max_z"
-    description: "The float value that the highest quantized output value represents.\n\n*NOTE*: `QuantizedMul` supports limited forms of broadcasting. More about\nbroadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
     type: DT_FLOAT
   }
   attr {
@@ -18694,9 +17171,9 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
@@ -18707,9 +17184,9 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
@@ -18723,13 +17200,12 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
-  summary: "Returns x * y element-wise, working on quantized buffers."
   is_commutative: true
 }
 op {
@@ -18740,27 +17216,22 @@ op {
   }
   input_arg {
     name: "min_features"
-    description: "The float value that the lowest quantized value represents."
     type: DT_FLOAT
   }
   input_arg {
     name: "max_features"
-    description: "The float value that the highest quantized value represents."
     type: DT_FLOAT
   }
   output_arg {
     name: "activations"
-    description: "Has the same output shape as \"features\"."
     type_attr: "out_type"
   }
   output_arg {
     name: "min_activations"
-    description: "The float value that the lowest quantized value represents."
     type: DT_FLOAT
   }
   output_arg {
     name: "max_activations"
-    description: "The float value that the highest quantized value represents."
     type: DT_FLOAT
   }
   attr {
@@ -18770,9 +17241,9 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
@@ -18786,13 +17257,12 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
-  summary: "Computes Quantized Rectified Linear: `max(features, 0)`"
 }
 op {
   name: "QuantizedRelu6"
@@ -18802,27 +17272,22 @@ op {
   }
   input_arg {
     name: "min_features"
-    description: "The float value that the lowest quantized value represents."
     type: DT_FLOAT
   }
   input_arg {
     name: "max_features"
-    description: "The float value that the highest quantized value represents."
     type: DT_FLOAT
   }
   output_arg {
     name: "activations"
-    description: "Has the same output shape as \"features\"."
     type_attr: "out_type"
   }
   output_arg {
     name: "min_activations"
-    description: "The float value that the lowest quantized value represents."
     type: DT_FLOAT
   }
   output_arg {
     name: "max_activations"
-    description: "The float value that the highest quantized value represents."
     type: DT_FLOAT
   }
   attr {
@@ -18832,9 +17297,9 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
@@ -18848,13 +17313,12 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
-  summary: "Computes Quantized Rectified Linear 6: `min(max(features, 0), 6)`"
 }
 op {
   name: "QuantizedReluX"
@@ -18868,27 +17332,22 @@ op {
   }
   input_arg {
     name: "min_features"
-    description: "The float value that the lowest quantized value represents."
     type: DT_FLOAT
   }
   input_arg {
     name: "max_features"
-    description: "The float value that the highest quantized value represents."
     type: DT_FLOAT
   }
   output_arg {
     name: "activations"
-    description: "Has the same output shape as \"features\"."
     type_attr: "out_type"
   }
   output_arg {
     name: "min_activations"
-    description: "The float value that the lowest quantized value represents."
     type: DT_FLOAT
   }
   output_arg {
     name: "max_activations"
-    description: "The float value that the highest quantized value represents."
     type: DT_FLOAT
   }
   attr {
@@ -18898,9 +17357,9 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
@@ -18914,13 +17373,12 @@ op {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
-  summary: "Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)`"
 }
 op {
   name: "QuantizedReshape"
@@ -18930,17 +17388,14 @@ op {
   }
   input_arg {
     name: "shape"
-    description: "Defines the shape of the output tensor."
     type_attr: "Tshape"
   }
   input_arg {
     name: "input_min"
-    description: "The minimum value of the input."
     type: DT_FLOAT
   }
   input_arg {
     name: "input_max"
-    description: "The maximum value of the input."
     type: DT_FLOAT
   }
   output_arg {
@@ -18949,12 +17404,10 @@ op {
   }
   output_arg {
     name: "output_min"
-    description: "This value is copied from input_min."
     type: DT_FLOAT
   }
   output_arg {
     name: "output_max"
-    description: "This value is copied from input_max."
     type: DT_FLOAT
   }
   attr {
@@ -18974,19 +17427,15 @@ op {
       }
     }
   }
-  summary: "Reshapes a quantized tensor as per the Reshape op."
-  description: "```"
 }
 op {
   name: "QuantizedResizeBilinear"
   input_arg {
     name: "images"
-    description: "4-D with shape `[batch, height, width, channels]`."
     type_attr: "T"
   }
   input_arg {
     name: "size"
-    description: "= A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The\nnew size for the images."
     type: DT_INT32
   }
   input_arg {
@@ -18999,7 +17448,6 @@ op {
   }
   output_arg {
     name: "resized_images"
-    description: "4-D with shape\n`[batch, new_height, new_width, channels]`."
     type_attr: "T"
   }
   output_arg {
@@ -19027,16 +17475,12 @@ op {
     default_value {
       b: false
     }
-    description: "If true, rescale input by (new_height - 1) / (height - 1), which\nexactly aligns the 4 corners of images and resized images. If false, rescale\nby new_height / height. Treat similarly the width dimension."
   }
-  summary: "Resize quantized `images` to `size` using quantized bilinear interpolation."
-  description: "Input images and output images must be quantized types."
 }
 op {
   name: "QueueClose"
   input_arg {
     name: "handle"
-    description: "The handle to a queue."
     type: DT_STRING
     is_ref: true
   }
@@ -19046,16 +17490,12 @@ op {
     default_value {
       b: false
     }
-    description: "If true, all pending enqueue requests that are\nblocked on the given queue will be canceled."
   }
-  summary: "Closes the given queue."
-  description: "This operation signals that no more elements will be enqueued in the\ngiven queue. Subsequent Enqueue(Many) operations will fail.\nSubsequent Dequeue(Many) operations will continue to succeed if\nsufficient elements remain in the queue. Subsequent Dequeue(Many)\noperations that would block will fail immediately."
 }
 op {
   name: "QueueCloseV2"
   input_arg {
     name: "handle"
-    description: "The handle to a queue."
     type: DT_RESOURCE
   }
   attr {
@@ -19064,29 +17504,23 @@ op {
     default_value {
       b: false
     }
-    description: "If true, all pending enqueue requests that are\nblocked on the given queue will be canceled."
   }
-  summary: "Closes the given queue."
-  description: "This operation signals that no more elements will be enqueued in the\ngiven queue. Subsequent Enqueue(Many) operations will fail.\nSubsequent Dequeue(Many) operations will continue to succeed if\nsufficient elements remain in the queue. Subsequent Dequeue(Many)\noperations that would block will fail immediately."
   is_stateful: true
 }
 op {
   name: "QueueDequeue"
   input_arg {
     name: "handle"
-    description: "The handle to a queue."
     type: DT_STRING
     is_ref: true
   }
   output_arg {
     name: "components"
-    description: "One or more tensors that were dequeued as a tuple."
     type_list_attr: "component_types"
   }
   attr {
     name: "component_types"
     type: "list(type)"
-    description: "The type of each component in a tuple."
     has_minimum: true
     minimum: 1
   }
@@ -19096,33 +17530,26 @@ op {
     default_value {
       i: -1
     }
-    description: "If the queue is empty, this operation will block for up to\ntimeout_ms milliseconds.\nNote: This option is not supported yet."
   }
-  summary: "Dequeues a tuple of one or more tensors from the given queue."
-  description: "This operation has k outputs, where k is the number of components\nin the tuples stored in the given queue, and output i is the ith\ncomponent of the dequeued tuple.\n\nN.B. If the queue is empty, this operation will block until an element\nhas been dequeued (or \'timeout_ms\' elapses, if specified)."
 }
 op {
   name: "QueueDequeueMany"
   input_arg {
     name: "handle"
-    description: "The handle to a queue."
     type: DT_STRING
     is_ref: true
   }
   input_arg {
     name: "n"
-    description: "The number of tuples to dequeue."
     type: DT_INT32
   }
   output_arg {
     name: "components"
-    description: "One or more tensors that were dequeued as a tuple."
     type_list_attr: "component_types"
   }
   attr {
     name: "component_types"
     type: "list(type)"
-    description: "The type of each component in a tuple."
     has_minimum: true
     minimum: 1
   }
@@ -19132,32 +17559,25 @@ op {
     default_value {
       i: -1
     }
-    description: "If the queue has fewer than n elements, this operation\nwill block for up to timeout_ms milliseconds.\nNote: This option is not supported yet."
   }
-  summary: "Dequeues `n` tuples of one or more tensors from the given queue."
-  description: "If the queue is closed and there are fewer than `n` elements, then an\nOutOfRange error is returned.\n\nThis operation concatenates queue-element component tensors along the\n0th dimension to make a single component tensor.  All of the components\nin the dequeued tuple will have size `n` in the 0th dimension.\n\nThis operation has `k` outputs, where `k` is the number of components in\nthe tuples stored in the given queue, and output `i` is the ith\ncomponent of the dequeued tuple.\n\nN.B. If the queue is empty, this operation will block until `n` elements\nhave been dequeued (or \'timeout_ms\' elapses, if specified)."
 }
 op {
   name: "QueueDequeueManyV2"
   input_arg {
     name: "handle"
-    description: "The handle to a queue."
     type: DT_RESOURCE
   }
   input_arg {
     name: "n"
-    description: "The number of tuples to dequeue."
     type: DT_INT32
   }
   output_arg {
     name: "components"
-    description: "One or more tensors that were dequeued as a tuple."
     type_list_attr: "component_types"
   }
   attr {
     name: "component_types"
     type: "list(type)"
-    description: "The type of each component in a tuple."
     has_minimum: true
     minimum: 1
   }
@@ -19167,34 +17587,27 @@ op {
     default_value {
       i: -1
     }
-    description: "If the queue has fewer than n elements, this operation\nwill block for up to timeout_ms milliseconds.\nNote: This option is not supported yet."
   }
-  summary: "Dequeues `n` tuples of one or more tensors from the given queue."
-  description: "If the queue is closed and there are fewer than `n` elements, then an\nOutOfRange error is returned.\n\nThis operation concatenates queue-element component tensors along the\n0th dimension to make a single component tensor.  All of the components\nin the dequeued tuple will have size `n` in the 0th dimension.\n\nThis operation has `k` outputs, where `k` is the number of components in\nthe tuples stored in the given queue, and output `i` is the ith\ncomponent of the dequeued tuple.\n\nN.B. If the queue is empty, this operation will block until `n` elements\nhave been dequeued (or \'timeout_ms\' elapses, if specified)."
   is_stateful: true
 }
 op {
   name: "QueueDequeueUpTo"
   input_arg {
     name: "handle"
-    description: "The handle to a queue."
     type: DT_STRING
     is_ref: true
   }
   input_arg {
     name: "n"
-    description: "The number of tuples to dequeue."
     type: DT_INT32
   }
   output_arg {
     name: "components"
-    description: "One or more tensors that were dequeued as a tuple."
     type_list_attr: "component_types"
   }
   attr {
     name: "component_types"
     type: "list(type)"
-    description: "The type of each component in a tuple."
     has_minimum: true
     minimum: 1
   }
@@ -19204,32 +17617,25 @@ op {
     default_value {
       i: -1
     }
-    description: "If the queue has fewer than n elements, this operation\nwill block for up to timeout_ms milliseconds.\nNote: This option is not supported yet."
   }
-  summary: "Dequeues `n` tuples of one or more tensors from the given queue."
-  description: "This operation is not supported by all queues.  If a queue does not support\nDequeueUpTo, then an Unimplemented error is returned.\n\nIf the queue is closed and there are more than 0 but less than `n`\nelements remaining, then instead of returning an OutOfRange error like\nQueueDequeueMany, less than `n` elements are returned immediately.  If\nthe queue is closed and there are 0 elements left in the queue, then\nan OutOfRange error is returned just like in QueueDequeueMany.\nOtherwise the behavior is identical to QueueDequeueMany:\n\nThis operation concatenates queue-element component tensors along the\n0th dimension to make a single component tensor.  All of the components\nin the dequeued tuple will have size `n` in the 0th dimension.\n\nThis operation has k outputs, where `k` is the number of components in\nthe tuples stored in the given queue, and output `i` is the ith\ncomponent of the dequeued tuple."
 }
 op {
   name: "QueueDequeueUpToV2"
   input_arg {
     name: "handle"
-    description: "The handle to a queue."
     type: DT_RESOURCE
   }
   input_arg {
     name: "n"
-    description: "The number of tuples to dequeue."
     type: DT_INT32
   }
   output_arg {
     name: "components"
-    description: "One or more tensors that were dequeued as a tuple."
     type_list_attr: "component_types"
   }
   attr {
     name: "component_types"
     type: "list(type)"
-    description: "The type of each component in a tuple."
     has_minimum: true
     minimum: 1
   }
@@ -19239,28 +17645,22 @@ op {
     default_value {
       i: -1
     }
-    description: "If the queue has fewer than n elements, this operation\nwill block for up to timeout_ms milliseconds.\nNote: This option is not supported yet."
   }
-  summary: "Dequeues `n` tuples of one or more tensors from the given queue."
-  description: "This operation is not supported by all queues.  If a queue does not support\nDequeueUpTo, then an Unimplemented error is returned.\n\nIf the queue is closed and there are more than 0 but less than `n`\nelements remaining, then instead of returning an OutOfRange error like\nQueueDequeueMany, less than `n` elements are returned immediately.  If\nthe queue is closed and there are 0 elements left in the queue, then\nan OutOfRange error is returned just like in QueueDequeueMany.\nOtherwise the behavior is identical to QueueDequeueMany:\n\nThis operation concatenates queue-element component tensors along the\n0th dimension to make a single component tensor.  All of the components\nin the dequeued tuple will have size n in the 0th dimension.\n\nThis operation has `k` outputs, where `k` is the number of components in\nthe tuples stored in the given queue, and output `i` is the ith\ncomponent of the dequeued tuple."
   is_stateful: true
 }
 op {
   name: "QueueDequeueV2"
   input_arg {
     name: "handle"
-    description: "The handle to a queue."
     type: DT_RESOURCE
   }
   output_arg {
     name: "components"
-    description: "One or more tensors that were dequeued as a tuple."
     type_list_attr: "component_types"
   }
   attr {
     name: "component_types"
     type: "list(type)"
-    description: "The type of each component in a tuple."
     has_minimum: true
     minimum: 1
   }
@@ -19270,23 +17670,18 @@ op {
     default_value {
       i: -1
     }
-    description: "If the queue is empty, this operation will block for up to\ntimeout_ms milliseconds.\nNote: This option is not supported yet."
   }
-  summary: "Dequeues a tuple of one or more tensors from the given queue."
-  description: "This operation has k outputs, where k is the number of components\nin the tuples stored in the given queue, and output i is the ith\ncomponent of the dequeued tuple.\n\nN.B. If the queue is empty, this operation will block until an element\nhas been dequeued (or \'timeout_ms\' elapses, if specified)."
   is_stateful: true
 }
 op {
   name: "QueueEnqueue"
   input_arg {
     name: "handle"
-    description: "The handle to a queue."
     type: DT_STRING
     is_ref: true
   }
   input_arg {
     name: "components"
-    description: "One or more tensors from which the enqueued tensors should be taken."
     type_list_attr: "Tcomponents"
   }
   attr {
@@ -19301,22 +17696,17 @@ op {
     default_value {
       i: -1
     }
-    description: "If the queue is full, this operation will block for up to\ntimeout_ms milliseconds.\nNote: This option is not supported yet."
   }
-  summary: "Enqueues a tuple of one or more tensors in the given queue."
-  description: "The components input has k elements, which correspond to the components of\ntuples stored in the given queue.\n\nN.B. If the queue is full, this operation will block until the given\nelement has been enqueued (or \'timeout_ms\' elapses, if specified)."
 }
 op {
   name: "QueueEnqueueMany"
   input_arg {
     name: "handle"
-    description: "The handle to a queue."
     type: DT_STRING
     is_ref: true
   }
   input_arg {
     name: "components"
-    description: "One or more tensors from which the enqueued tensors should\nbe taken."
     type_list_attr: "Tcomponents"
   }
   attr {
@@ -19331,21 +17721,16 @@ op {
     default_value {
       i: -1
     }
-    description: "If the queue is too full, this operation will block for up\nto timeout_ms milliseconds.\nNote: This option is not supported yet."
   }
-  summary: "Enqueues zero or more tuples of one or more tensors in the given queue."
-  description: "This operation slices each component tensor along the 0th dimension to\nmake multiple queue elements. All of the tuple components must have the\nsame size in the 0th dimension.\n\nThe components input has k elements, which correspond to the components of\ntuples stored in the given queue.\n\nN.B. If the queue is full, this operation will block until the given\nelements have been enqueued (or \'timeout_ms\' elapses, if specified)."
 }
 op {
   name: "QueueEnqueueManyV2"
   input_arg {
     name: "handle"
-    description: "The handle to a queue."
     type: DT_RESOURCE
   }
   input_arg {
     name: "components"
-    description: "One or more tensors from which the enqueued tensors should\nbe taken."
     type_list_attr: "Tcomponents"
   }
   attr {
@@ -19360,22 +17745,17 @@ op {
     default_value {
       i: -1
     }
-    description: "If the queue is too full, this operation will block for up\nto timeout_ms milliseconds.\nNote: This option is not supported yet."
   }
-  summary: "Enqueues zero or more tuples of one or more tensors in the given queue."
-  description: "This operation slices each component tensor along the 0th dimension to\nmake multiple queue elements. All of the tuple components must have the\nsame size in the 0th dimension.\n\nThe components input has k elements, which correspond to the components of\ntuples stored in the given queue.\n\nN.B. If the queue is full, this operation will block until the given\nelements have been enqueued (or \'timeout_ms\' elapses, if specified)."
   is_stateful: true
 }
 op {
   name: "QueueEnqueueV2"
   input_arg {
     name: "handle"
-    description: "The handle to a queue."
     type: DT_RESOURCE
   }
   input_arg {
     name: "components"
-    description: "One or more tensors from which the enqueued tensors should be taken."
     type_list_attr: "Tcomponents"
   }
   attr {
@@ -19390,17 +17770,13 @@ op {
     default_value {
       i: -1
     }
-    description: "If the queue is full, this operation will block for up to\ntimeout_ms milliseconds.\nNote: This option is not supported yet."
   }
-  summary: "Enqueues a tuple of one or more tensors in the given queue."
-  description: "The components input has k elements, which correspond to the components of\ntuples stored in the given queue.\n\nN.B. If the queue is full, this operation will block until the given\nelement has been enqueued (or \'timeout_ms\' elapses, if specified)."
   is_stateful: true
 }
 op {
   name: "QueueIsClosed"
   input_arg {
     name: "handle"
-    description: "The handle to a queue."
     type: DT_STRING
     is_ref: true
   }
@@ -19408,124 +17784,96 @@ op {
     name: "is_closed"
     type: DT_BOOL
   }
-  summary: "Returns true if queue is closed."
-  description: "This operation returns true if the queue is closed and false if the queue\nis open."
 }
 op {
   name: "QueueIsClosedV2"
   input_arg {
     name: "handle"
-    description: "The handle to a queue."
     type: DT_RESOURCE
   }
   output_arg {
     name: "is_closed"
     type: DT_BOOL
   }
-  summary: "Returns true if queue is closed."
-  description: "This operation returns true if the queue is closed and false if the queue\nis open."
   is_stateful: true
 }
 op {
   name: "QueueSize"
   input_arg {
     name: "handle"
-    description: "The handle to a queue."
     type: DT_STRING
     is_ref: true
   }
   output_arg {
     name: "size"
-    description: "The number of elements in the given queue."
     type: DT_INT32
   }
-  summary: "Computes the number of elements in the given queue."
 }
 op {
   name: "QueueSizeV2"
   input_arg {
     name: "handle"
-    description: "The handle to a queue."
     type: DT_RESOURCE
   }
   output_arg {
     name: "size"
-    description: "The number of elements in the given queue."
     type: DT_INT32
   }
-  summary: "Computes the number of elements in the given queue."
   is_stateful: true
 }
 op {
   name: "RFFT"
   input_arg {
     name: "input"
-    description: "A float32 tensor."
     type: DT_FLOAT
   }
   input_arg {
     name: "fft_length"
-    description: "An int32 tensor of shape [1]. The FFT length."
     type: DT_INT32
   }
   output_arg {
     name: "output"
-    description: "A complex64 tensor of the same rank as `input`. The inner-most\n  dimension of `input` is replaced with the `fft_length / 2 + 1` unique\n  frequency components of its 1D Fourier transform.\n\n@compatibility(numpy)\nEquivalent to np.fft.rfft\n@end_compatibility"
     type: DT_COMPLEX64
   }
-  summary: "Real-valued fast Fourier transform."
-  description: "Computes the 1-dimensional discrete Fourier transform of a real-valued signal\nover the inner-most dimension of `input`.\n\nSince the DFT of a real signal is Hermitian-symmetric, `RFFT` only returns the\n`fft_length / 2 + 1` unique components of the FFT: the zero-frequency term,\nfollowed by the `fft_length / 2` positive-frequency terms.\n\nAlong the axis `RFFT` is computed on, if `fft_length` is smaller than the\ncorresponding dimension of `input`, the dimension is cropped. If it is larger,\nthe dimension is padded with zeros."
 }
 op {
   name: "RFFT2D"
   input_arg {
     name: "input"
-    description: "A float32 tensor."
     type: DT_FLOAT
   }
   input_arg {
     name: "fft_length"
-    description: "An int32 tensor of shape [2]. The FFT length for each dimension."
     type: DT_INT32
   }
   output_arg {
     name: "output"
-    description: "A complex64 tensor of the same rank as `input`. The inner-most 2\n  dimensions of `input` are replaced with their 2D Fourier transform. The\n  inner-most dimension contains `fft_length / 2 + 1` unique frequency\n  components.\n\n@compatibility(numpy)\nEquivalent to np.fft.rfft2\n@end_compatibility"
     type: DT_COMPLEX64
   }
-  summary: "2D real-valued fast Fourier transform."
-  description: "Computes the 2-dimensional discrete Fourier transform of a real-valued signal\nover the inner-most 2 dimensions of `input`.\n\nSince the DFT of a real signal is Hermitian-symmetric, `RFFT2D` only returns the\n`fft_length / 2 + 1` unique components of the FFT for the inner-most dimension\nof `output`: the zero-frequency term, followed by the `fft_length / 2`\npositive-frequency terms.\n\nAlong each axis `RFFT2D` is computed on, if `fft_length` is smaller than the\ncorresponding dimension of `input`, the dimension is cropped. If it is larger,\nthe dimension is padded with zeros."
 }
 op {
   name: "RFFT3D"
   input_arg {
     name: "input"
-    description: "A float32 tensor."
     type: DT_FLOAT
   }
   input_arg {
     name: "fft_length"
-    description: "An int32 tensor of shape [3]. The FFT length for each dimension."
     type: DT_INT32
   }
   output_arg {
     name: "output"
-    description: "A complex64 tensor of the same rank as `input`. The inner-most 3\n  dimensions of `input` are replaced with the their 3D Fourier transform. The\n  inner-most dimension contains `fft_length / 2 + 1` unique frequency\n  components.\n\n@compatibility(numpy)\nEquivalent to np.fft.rfftn with 3 dimensions.\n@end_compatibility"
     type: DT_COMPLEX64
   }
-  summary: "3D real-valued fast Fourier transform."
-  description: "Computes the 3-dimensional discrete Fourier transform of a real-valued signal\nover the inner-most 3 dimensions of `input`.\n\nSince the DFT of a real signal is Hermitian-symmetric, `RFFT3D` only returns the\n`fft_length / 2 + 1` unique components of the FFT for the inner-most dimension\nof `output`: the zero-frequency term, followed by the `fft_length / 2`\npositive-frequency terms.\n\nAlong each axis `RFFT3D` is computed on, if `fft_length` is smaller than the\ncorresponding dimension of `input`, the dimension is cropped. If it is larger,\nthe dimension is padded with zeros."
 }
 op {
   name: "RGBToHSV"
   input_arg {
     name: "images"
-    description: "1-D or higher rank. RGB data to convert. Last dimension must be size 3."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "`images` converted to HSV."
     type_attr: "T"
   }
   attr {
@@ -19536,29 +17884,26 @@ op {
     }
     allowed_values {
       list {
+        type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Converts one or more images from RGB to HSV."
-  description: "Outputs a tensor of the same shape as the `images` tensor, containing the HSV\nvalue of the pixels. The output is only well defined if the value in `images`\nare in `[0,1]`.\n\n`output[..., 0]` contains hue, `output[..., 1]` contains saturation, and\n`output[..., 2]` contains value. All HSV values are in `[0,1]`. A hue of 0\ncorresponds to pure red, hue 1/3 is pure green, and 2/3 is pure blue."
 }
 op {
   name: "RandomCrop"
   input_arg {
     name: "image"
-    description: "3-D of shape `[height, width, channels]`."
     type_attr: "T"
   }
   input_arg {
     name: "size"
-    description: "1-D of length 2 containing: `crop_height`, `crop_width`.."
     type: DT_INT64
   }
   output_arg {
     name: "output"
-    description: "3-D of shape `[crop_height, crop_width, channels].`"
     type_attr: "T"
   }
   attr {
@@ -19582,7 +17927,6 @@ op {
     default_value {
       i: 0
     }
-    description: "If either seed or seed2 are set to be non-zero, the random number\ngenerator is seeded by the given seed.  Otherwise, it is seeded by a\nrandom seed."
   }
   attr {
     name: "seed2"
@@ -19590,31 +17934,53 @@ op {
     default_value {
       i: 0
     }
-    description: "An second seed to avoid seed collision."
   }
-  summary: "Randomly crop `image`."
-  description: "`size` is a 1-D int64 tensor with 2 elements representing the crop height and\nwidth.  The values must be non negative.\n\nThis Op picks a random location in `image` and crops a `height` by `width`\nrectangle from that location.  The random location is picked so the cropped\narea will fit inside the original image."
   deprecation {
     version: 8
     explanation: "Random crop is now pure Python"
   }
   is_stateful: true
 }
+op {
+  name: "RandomDataset"
+  input_arg {
+    name: "seed"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "seed2"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
 op {
   name: "RandomGamma"
   input_arg {
     name: "shape"
-    description: "1-D integer tensor. Shape of independent samples to draw from each\ndistribution described by the shape parameters given in alpha."
     type_attr: "S"
   }
   input_arg {
     name: "alpha"
-    description: "A tensor in which each scalar is a \"shape\" parameter describing the\nassociated gamma distribution."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "A tensor with shape `shape + shape(alpha)`. Each slice\n`[:, ..., :, i0, i1, ...iN]` contains the samples drawn for\n`alpha[i0, i1, ...iN]`. The dtype of the output matches the dtype of alpha."
     type_attr: "T"
   }
   attr {
@@ -19623,7 +17989,6 @@ op {
     default_value {
       i: 0
     }
-    description: "If either `seed` or `seed2` are set to be non-zero, the random number\ngenerator is seeded by the given seed.  Otherwise, it is seeded by a\nrandom seed."
   }
   attr {
     name: "seed2"
@@ -19631,7 +17996,6 @@ op {
     default_value {
       i: 0
     }
-    description: "A second seed to avoid seed collision."
   }
   attr {
     name: "S"
@@ -19654,8 +18018,6 @@ op {
       }
     }
   }
-  summary: "Outputs random values from the Gamma distribution(s) described by alpha."
-  description: "This op uses the algorithm by Marsaglia et al. to acquire samples via\ntransformation-rejection from pairs of uniform and normal random variables.\nSee http://dl.acm.org/citation.cfm?id=358414"
   is_stateful: true
 }
 op {
@@ -19707,7 +18069,6 @@ op {
       }
     }
   }
-  summary: "Use RandomPoissonV2 instead."
   deprecation {
     version: 25
     explanation: "Replaced by RandomPoissonV2"
@@ -19718,17 +18079,14 @@ op {
   name: "RandomPoissonV2"
   input_arg {
     name: "shape"
-    description: "1-D integer tensor. Shape of independent samples to draw from each\ndistribution described by the shape parameters given in rate."
     type_attr: "S"
   }
   input_arg {
     name: "rate"
-    description: "A tensor in which each scalar is a \"rate\" parameter describing the\nassociated poisson distribution."
     type_attr: "R"
   }
   output_arg {
     name: "output"
-    description: "A tensor with shape `shape + shape(rate)`. Each slice\n`[:, ..., :, i0, i1, ...iN]` contains the samples drawn for\n`rate[i0, i1, ...iN]`."
     type_attr: "dtype"
   }
   attr {
@@ -19737,7 +18095,6 @@ op {
     default_value {
       i: 0
     }
-    description: "If either `seed` or `seed2` are set to be non-zero, the random number\ngenerator is seeded by the given seed.  Otherwise, it is seeded by a\nrandom seed."
   }
   attr {
     name: "seed2"
@@ -19745,7 +18102,6 @@ op {
     default_value {
       i: 0
     }
-    description: "A second seed to avoid seed collision."
   }
   attr {
     name: "S"
@@ -19789,20 +18145,16 @@ op {
       }
     }
   }
-  summary: "Outputs random values from the Poisson distribution(s) described by rate."
-  description: "This op uses two algorithms, depending on rate. If rate >= 10, then\nthe algorithm by Hormann is used to acquire samples via\ntransformation-rejection.\nSee http://www.sciencedirect.com/science/article/pii/0167668793909974.\n\nOtherwise, Knuth\'s algorithm is used to acquire samples via multiplying uniform\nrandom variables.\nSee Donald E. Knuth (1969). Seminumerical Algorithms. The Art of Computer\nProgramming, Volume 2. Addison Wesley"
   is_stateful: true
 }
 op {
   name: "RandomShuffle"
   input_arg {
     name: "value"
-    description: "The tensor to be shuffled."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "A tensor of same shape and type as `value`, shuffled along its first\ndimension."
     type_attr: "T"
   }
   attr {
@@ -19811,7 +18163,6 @@ op {
     default_value {
       i: 0
     }
-    description: "If either `seed` or `seed2` are set to be non-zero, the random number\ngenerator is seeded by the given seed.  Otherwise, it is seeded by a\nrandom seed."
   }
   attr {
     name: "seed2"
@@ -19819,28 +18170,23 @@ op {
     default_value {
       i: 0
     }
-    description: "A second seed to avoid seed collision."
   }
   attr {
     name: "T"
     type: "type"
   }
-  summary: "Randomly shuffles a tensor along its first dimension."
-  description: "  The tensor is shuffled along dimension 0, such that each `value[j]` is mapped\n  to one and only one `output[i]`. For example, a mapping that might occur for a\n  3x2 tensor is:\n\n```\n[[1, 2],       [[5, 6],\n [3, 4],  ==>   [1, 2],\n [5, 6]]        [3, 4]]\n```"
   is_stateful: true
 }
 op {
   name: "RandomShuffleQueue"
   output_arg {
     name: "handle"
-    description: "The handle to the queue."
     type: DT_STRING
     is_ref: true
   }
   attr {
     name: "component_types"
     type: "list(type)"
-    description: "The type of each component in a value."
     has_minimum: true
     minimum: 1
   }
@@ -19851,7 +18197,6 @@ op {
       list {
       }
     }
-    description: "The shape of each component in a value. The length of this attr must\nbe either 0 or the same as the length of component_types. If the length of\nthis attr is 0, the shapes of queue elements are not constrained, and\nonly one element may be dequeued at a time."
     has_minimum: true
   }
   attr {
@@ -19860,7 +18205,6 @@ op {
     default_value {
       i: -1
     }
-    description: "The upper bound on the number of elements in this queue.\nNegative numbers mean no limit."
   }
   attr {
     name: "min_after_dequeue"
@@ -19868,7 +18212,6 @@ op {
     default_value {
       i: 0
     }
-    description: "Dequeue will block unless there would be this\nmany elements after the dequeue or the queue is closed. This\nensures a minimum level of mixing of elements."
   }
   attr {
     name: "seed"
@@ -19876,7 +18219,6 @@ op {
     default_value {
       i: 0
     }
-    description: "If either seed or seed2 is set to be non-zero, the random number\ngenerator is seeded by the given seed.  Otherwise, a random seed is used."
   }
   attr {
     name: "seed2"
@@ -19884,7 +18226,6 @@ op {
     default_value {
       i: 0
     }
-    description: "A second seed to avoid seed collision."
   }
   attr {
     name: "container"
@@ -19892,7 +18233,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this queue is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -19900,22 +18240,18 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this queue will be shared under the given name\nacross multiple sessions."
   }
-  summary: "A queue that randomizes the order of elements."
   is_stateful: true
 }
 op {
   name: "RandomShuffleQueueV2"
   output_arg {
     name: "handle"
-    description: "The handle to the queue."
     type: DT_RESOURCE
   }
   attr {
     name: "component_types"
     type: "list(type)"
-    description: "The type of each component in a value."
     has_minimum: true
     minimum: 1
   }
@@ -19926,7 +18262,6 @@ op {
       list {
       }
     }
-    description: "The shape of each component in a value. The length of this attr must\nbe either 0 or the same as the length of component_types. If the length of\nthis attr is 0, the shapes of queue elements are not constrained, and\nonly one element may be dequeued at a time."
     has_minimum: true
   }
   attr {
@@ -19935,7 +18270,6 @@ op {
     default_value {
       i: -1
     }
-    description: "The upper bound on the number of elements in this queue.\nNegative numbers mean no limit."
   }
   attr {
     name: "min_after_dequeue"
@@ -19943,7 +18277,6 @@ op {
     default_value {
       i: 0
     }
-    description: "Dequeue will block unless there would be this\nmany elements after the dequeue or the queue is closed. This\nensures a minimum level of mixing of elements."
   }
   attr {
     name: "seed"
@@ -19951,7 +18284,6 @@ op {
     default_value {
       i: 0
     }
-    description: "If either seed or seed2 is set to be non-zero, the random number\ngenerator is seeded by the given seed.  Otherwise, a random seed is used."
   }
   attr {
     name: "seed2"
@@ -19959,7 +18291,6 @@ op {
     default_value {
       i: 0
     }
-    description: "A second seed to avoid seed collision."
   }
   attr {
     name: "container"
@@ -19967,7 +18298,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this queue is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -19975,21 +18305,17 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this queue will be shared under the given name\nacross multiple sessions."
   }
-  summary: "A queue that randomizes the order of elements."
   is_stateful: true
 }
 op {
   name: "RandomStandardNormal"
   input_arg {
     name: "shape"
-    description: "The shape of the output tensor."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "A tensor of the specified shape filled with random normal values."
     type_attr: "dtype"
   }
   attr {
@@ -19998,7 +18324,6 @@ op {
     default_value {
       i: 0
     }
-    description: "If either `seed` or `seed2` are set to be non-zero, the random number\ngenerator is seeded by the given seed.  Otherwise, it is seeded by a\nrandom seed."
   }
   attr {
     name: "seed2"
@@ -20006,15 +18331,14 @@ op {
     default_value {
       i: 0
     }
-    description: "A second seed to avoid seed collision."
   }
   attr {
     name: "dtype"
     type: "type"
-    description: "The type of the output."
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -20030,20 +18354,16 @@ op {
       }
     }
   }
-  summary: "Outputs random values from a normal distribution."
-  description: "The generated values will have mean 0 and standard deviation 1."
   is_stateful: true
 }
 op {
   name: "RandomUniform"
   input_arg {
     name: "shape"
-    description: "The shape of the output tensor."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "A tensor of the specified shape filled with uniform random values."
     type_attr: "dtype"
   }
   attr {
@@ -20052,7 +18372,6 @@ op {
     default_value {
       i: 0
     }
-    description: "If either `seed` or `seed2` are set to be non-zero, the random number\ngenerator is seeded by the given seed.  Otherwise, it is seeded by a\nrandom seed."
   }
   attr {
     name: "seed2"
@@ -20060,15 +18379,14 @@ op {
     default_value {
       i: 0
     }
-    description: "A second seed to avoid seed collision."
   }
   attr {
     name: "dtype"
     type: "type"
-    description: "The type of the output."
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -20084,30 +18402,24 @@ op {
       }
     }
   }
-  summary: "Outputs random values from a uniform distribution."
-  description: "The generated values follow a uniform distribution in the range `[0, 1)`. The\nlower bound 0 is included in the range, while the upper bound 1 is excluded."
   is_stateful: true
 }
 op {
   name: "RandomUniformInt"
   input_arg {
     name: "shape"
-    description: "The shape of the output tensor."
     type_attr: "T"
   }
   input_arg {
     name: "minval"
-    description: "0-D.  Inclusive lower bound on the generated integers."
     type_attr: "Tout"
   }
   input_arg {
     name: "maxval"
-    description: "0-D.  Exclusive upper bound on the generated integers."
     type_attr: "Tout"
   }
   output_arg {
     name: "output"
-    description: "A tensor of the specified shape filled with uniform random integers."
     type_attr: "Tout"
   }
   attr {
@@ -20116,7 +18428,6 @@ op {
     default_value {
       i: 0
     }
-    description: "If either `seed` or `seed2` are set to be non-zero, the random number\ngenerator is seeded by the given seed.  Otherwise, it is seeded by a\nrandom seed."
   }
   attr {
     name: "seed2"
@@ -20124,7 +18435,6 @@ op {
     default_value {
       i: 0
     }
-    description: "A second seed to avoid seed collision."
   }
   attr {
     name: "Tout"
@@ -20146,30 +18456,24 @@ op {
       }
     }
   }
-  summary: "Outputs random integers from a uniform distribution."
-  description: "The generated values are uniform integers in the range `[minval, maxval)`.\nThe lower bound `minval` is included in the range, while the upper bound\n`maxval` is excluded.\n\nThe random integers are slightly biased unless `maxval - minval` is an exact\npower of two.  The bias is small for values of `maxval - minval` significantly\nsmaller than the range of the output (either `2^32` or `2^64`)."
   is_stateful: true
 }
 op {
   name: "Range"
   input_arg {
     name: "start"
-    description: "0-D (scalar). First entry in the sequence."
     type_attr: "Tidx"
   }
   input_arg {
     name: "limit"
-    description: "0-D (scalar). Upper limit of sequence, exclusive."
     type_attr: "Tidx"
   }
   input_arg {
     name: "delta"
-    description: "0-D (scalar). Optional. Default is 1. Number that increments `start`."
     type_attr: "Tidx"
   }
   output_arg {
     name: "output"
-    description: "1-D."
     type_attr: "Tidx"
   }
   attr {
@@ -20180,6 +18484,7 @@ op {
     }
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -20187,24 +18492,19 @@ op {
       }
     }
   }
-  summary: "Creates a sequence of numbers."
-  description: "This operation creates a sequence of numbers that begins at `start` and\nextends by increments of `delta` up to but not including `limit`.\n\nFor example:\n\n```\n# \'start\' is 3\n# \'limit\' is 18\n# \'delta\' is 3\ntf.range(start, limit, delta) ==> [3, 6, 9, 12, 15]\n```"
 }
 op {
   name: "RangeDataset"
   input_arg {
     name: "start"
-    description: "corresponds to start in python\'s xrange()."
     type: DT_INT64
   }
   input_arg {
     name: "stop"
-    description: "corresponds to stop in python\'s xrange()."
     type: DT_INT64
   }
   input_arg {
     name: "step"
-    description: "corresponds to step in python\'s xrange()."
     type: DT_INT64
   }
   output_arg {
@@ -20223,7 +18523,6 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Creates a dataset with a range of values. Corresponds to python\'s xrange."
   is_stateful: true
 }
 op {
@@ -20240,8 +18539,6 @@ op {
     name: "T"
     type: "type"
   }
-  summary: "Returns the rank of a tensor."
-  description: "This operation returns an integer representing the rank of `input`.\n\nFor example:\n\n```\n# \'t\' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]\n# shape of tensor \'t\' is [2, 2, 3]\nrank(t) ==> 3\n```\n\n**Note**: The rank of a tensor is not the same as the rank of a matrix. The rank\nof a tensor is the number of indices required to uniquely select each element\nof the tensor. Rank is also known as \"order\", \"degree\", or \"ndims.\""
 }
 op {
   name: "ReadFile"
@@ -20253,13 +18550,11 @@ op {
     name: "contents"
     type: DT_STRING
   }
-  summary: "Reads and outputs the entire contents of the input filename."
 }
 op {
   name: "ReadVariableOp"
   input_arg {
     name: "resource"
-    description: "handle to the resource in which to store the variable."
     type: DT_RESOURCE
   }
   output_arg {
@@ -20269,17 +18564,13 @@ op {
   attr {
     name: "dtype"
     type: "type"
-    description: "the dtype of the value."
   }
-  summary: "Reads the value of a variable."
-  description: "The tensor returned by this operation is immutable.\n\nThe value returned by this operation is guaranteed to be influenced by all the\nwrites on which this operation depends directly or indirectly, and to not be\ninfluenced by any of the writes which depend directly or indirectly on this\noperation."
   is_stateful: true
 }
 op {
   name: "ReaderNumRecordsProduced"
   input_arg {
     name: "reader_handle"
-    description: "Handle to a Reader."
     type: DT_STRING
     is_ref: true
   }
@@ -20287,29 +18578,23 @@ op {
     name: "records_produced"
     type: DT_INT64
   }
-  summary: "Returns the number of records this Reader has produced."
-  description: "This is the same as the number of ReaderRead executions that have\nsucceeded."
 }
 op {
   name: "ReaderNumRecordsProducedV2"
   input_arg {
     name: "reader_handle"
-    description: "Handle to a Reader."
     type: DT_RESOURCE
   }
   output_arg {
     name: "records_produced"
     type: DT_INT64
   }
-  summary: "Returns the number of records this Reader has produced."
-  description: "This is the same as the number of ReaderRead executions that have\nsucceeded."
   is_stateful: true
 }
 op {
   name: "ReaderNumWorkUnitsCompleted"
   input_arg {
     name: "reader_handle"
-    description: "Handle to a Reader."
     type: DT_STRING
     is_ref: true
   }
@@ -20317,195 +18602,153 @@ op {
     name: "units_completed"
     type: DT_INT64
   }
-  summary: "Returns the number of work units this Reader has finished processing."
 }
 op {
   name: "ReaderNumWorkUnitsCompletedV2"
   input_arg {
     name: "reader_handle"
-    description: "Handle to a Reader."
     type: DT_RESOURCE
   }
   output_arg {
     name: "units_completed"
     type: DT_INT64
   }
-  summary: "Returns the number of work units this Reader has finished processing."
   is_stateful: true
 }
 op {
   name: "ReaderRead"
   input_arg {
     name: "reader_handle"
-    description: "Handle to a Reader."
     type: DT_STRING
     is_ref: true
   }
   input_arg {
     name: "queue_handle"
-    description: "Handle to a Queue, with string work items."
     type: DT_STRING
     is_ref: true
   }
   output_arg {
     name: "key"
-    description: "A scalar."
     type: DT_STRING
   }
   output_arg {
     name: "value"
-    description: "A scalar."
     type: DT_STRING
   }
-  summary: "Returns the next record (key, value pair) produced by a Reader."
-  description: "Will dequeue from the input queue if necessary (e.g. when the\nReader needs to start reading from a new file since it has finished\nwith the previous file)."
 }
 op {
   name: "ReaderReadUpTo"
   input_arg {
     name: "reader_handle"
-    description: "Handle to a `Reader`."
     type: DT_STRING
     is_ref: true
   }
   input_arg {
     name: "queue_handle"
-    description: "Handle to a `Queue`, with string work items."
     type: DT_STRING
     is_ref: true
   }
   input_arg {
     name: "num_records"
-    description: "number of records to read from `Reader`."
     type: DT_INT64
   }
   output_arg {
     name: "keys"
-    description: "A 1-D tensor."
     type: DT_STRING
   }
   output_arg {
     name: "values"
-    description: "A 1-D tensor."
     type: DT_STRING
   }
-  summary: "Returns up to `num_records` (key, value) pairs produced by a Reader."
-  description: "Will dequeue from the input queue if necessary (e.g. when the\nReader needs to start reading from a new file since it has finished\nwith the previous file).\nIt may return less than `num_records` even before the last batch."
 }
 op {
   name: "ReaderReadUpToV2"
   input_arg {
     name: "reader_handle"
-    description: "Handle to a `Reader`."
     type: DT_RESOURCE
   }
   input_arg {
     name: "queue_handle"
-    description: "Handle to a `Queue`, with string work items."
     type: DT_RESOURCE
   }
   input_arg {
     name: "num_records"
-    description: "number of records to read from `Reader`."
     type: DT_INT64
   }
   output_arg {
     name: "keys"
-    description: "A 1-D tensor."
     type: DT_STRING
   }
   output_arg {
     name: "values"
-    description: "A 1-D tensor."
     type: DT_STRING
   }
-  summary: "Returns up to `num_records` (key, value) pairs produced by a Reader."
-  description: "Will dequeue from the input queue if necessary (e.g. when the\nReader needs to start reading from a new file since it has finished\nwith the previous file).\nIt may return less than `num_records` even before the last batch."
   is_stateful: true
 }
 op {
   name: "ReaderReadV2"
   input_arg {
     name: "reader_handle"
-    description: "Handle to a Reader."
     type: DT_RESOURCE
   }
   input_arg {
     name: "queue_handle"
-    description: "Handle to a Queue, with string work items."
     type: DT_RESOURCE
   }
   output_arg {
     name: "key"
-    description: "A scalar."
     type: DT_STRING
   }
   output_arg {
     name: "value"
-    description: "A scalar."
     type: DT_STRING
   }
-  summary: "Returns the next record (key, value pair) produced by a Reader."
-  description: "Will dequeue from the input queue if necessary (e.g. when the\nReader needs to start reading from a new file since it has finished\nwith the previous file)."
   is_stateful: true
 }
 op {
   name: "ReaderReset"
   input_arg {
     name: "reader_handle"
-    description: "Handle to a Reader."
     type: DT_STRING
     is_ref: true
   }
-  summary: "Restore a Reader to its initial clean state."
 }
 op {
   name: "ReaderResetV2"
   input_arg {
     name: "reader_handle"
-    description: "Handle to a Reader."
     type: DT_RESOURCE
   }
-  summary: "Restore a Reader to its initial clean state."
   is_stateful: true
 }
 op {
   name: "ReaderRestoreState"
   input_arg {
     name: "reader_handle"
-    description: "Handle to a Reader."
     type: DT_STRING
     is_ref: true
   }
   input_arg {
     name: "state"
-    description: "Result of a ReaderSerializeState of a Reader with type\nmatching reader_handle."
     type: DT_STRING
   }
-  summary: "Restore a reader to a previously saved state."
-  description: "Not all Readers support being restored, so this can produce an\nUnimplemented error."
 }
 op {
   name: "ReaderRestoreStateV2"
   input_arg {
     name: "reader_handle"
-    description: "Handle to a Reader."
     type: DT_RESOURCE
   }
   input_arg {
     name: "state"
-    description: "Result of a ReaderSerializeState of a Reader with type\nmatching reader_handle."
     type: DT_STRING
   }
-  summary: "Restore a reader to a previously saved state."
-  description: "Not all Readers support being restored, so this can produce an\nUnimplemented error."
   is_stateful: true
 }
 op {
   name: "ReaderSerializeState"
   input_arg {
     name: "reader_handle"
-    description: "Handle to a Reader."
     type: DT_STRING
     is_ref: true
   }
@@ -20513,22 +18756,17 @@ op {
     name: "state"
     type: DT_STRING
   }
-  summary: "Produce a string tensor that encodes the state of a Reader."
-  description: "Not all Readers support being serialized, so this can produce an\nUnimplemented error."
 }
 op {
   name: "ReaderSerializeStateV2"
   input_arg {
     name: "reader_handle"
-    description: "Handle to a Reader."
     type: DT_RESOURCE
   }
   output_arg {
     name: "state"
     type: DT_STRING
   }
-  summary: "Produce a string tensor that encodes the state of a Reader."
-  description: "Not all Readers support being serialized, so this can produce an\nUnimplemented error."
   is_stateful: true
 }
 op {
@@ -20567,8 +18805,6 @@ op {
       }
     }
   }
-  summary: "Returns the real part of a complex number."
-  description: "Given a tensor `input` of complex numbers, this operation returns a tensor of\ntype `float` that is the real part of each element in `input`. All elements in\n`input` must be complex numbers of the form \\\\(a + bj\\\\), where *a* is the real\n part returned by this operation and *b* is the imaginary part.\n\nFor example:\n\n```\n# tensor \'input\' is [-2.25 + 4.75j, 3.25 + 5.75j]\ntf.real(input) ==> [-2.25, 3.25]\n```"
 }
 op {
   name: "RealDiv"
@@ -20590,6 +18826,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_UINT8
@@ -20603,8 +18840,6 @@ op {
       }
     }
   }
-  summary: "Returns x / y element-wise for real types."
-  description: "If `x` and `y` are reals, this will return the floating-point division.\n\n*NOTE*: `Div` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
 }
 op {
   name: "Reciprocal"
@@ -20622,6 +18857,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -20631,8 +18867,6 @@ op {
       }
     }
   }
-  summary: "Computes the reciprocal of x element-wise."
-  description: "I.e., \\\\(y = 1 / x\\\\)."
 }
 op {
   name: "ReciprocalGrad"
@@ -20654,6 +18888,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -20661,20 +18896,16 @@ op {
       }
     }
   }
-  summary: "Computes the gradient for the inverse of `x` wrt its input."
-  description: "Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy`\nis the corresponding input gradient."
 }
 op {
   name: "RecordInput"
   output_arg {
     name: "records"
-    description: "A tensor of shape [batch_size]."
     type: DT_STRING
   }
   attr {
     name: "file_pattern"
     type: "string"
-    description: "Glob pattern for the data files."
   }
   attr {
     name: "file_random_seed"
@@ -20682,7 +18913,6 @@ op {
     default_value {
       i: 301
     }
-    description: "Random seeds used to produce randomized records."
   }
   attr {
     name: "file_shuffle_shift_ratio"
@@ -20690,7 +18920,6 @@ op {
     default_value {
       f: 0
     }
-    description: "Shifts the list of files after the list is randomly\nshuffled."
   }
   attr {
     name: "file_buffer_size"
@@ -20698,7 +18927,6 @@ op {
     default_value {
       i: 10000
     }
-    description: "The randomization shuffling buffer."
   }
   attr {
     name: "file_parallelism"
@@ -20706,7 +18934,6 @@ op {
     default_value {
       i: 16
     }
-    description: "How many sstables are opened and concurrently iterated over."
   }
   attr {
     name: "batch_size"
@@ -20714,26 +18941,28 @@ op {
     default_value {
       i: 32
     }
-    description: "The batch size."
   }
-  summary: "Emits randomized records."
+  attr {
+    name: "compression_type"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
   is_stateful: true
 }
 op {
   name: "ReduceJoin"
   input_arg {
     name: "inputs"
-    description: "The input to be joined.  All reduced indices must have non-zero size."
     type: DT_STRING
   }
   input_arg {
     name: "reduction_indices"
-    description: "The dimensions to reduce over.  Dimensions are reduced in the\norder specified.  Omitting `reduction_indices` is equivalent to passing\n`[n-1, n-2, ..., 0]`.  Negative indices from `-n` to `-1` are supported."
     type: DT_INT32
   }
   output_arg {
     name: "output"
-    description: "Has shape equal to that of the input with reduced dimensions removed or\nset to `1` depending on `keep_dims`."
     type: DT_STRING
   }
   attr {
@@ -20742,7 +18971,6 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, retain reduced dimensions with length `1`."
   }
   attr {
     name: "separator"
@@ -20750,22 +18978,17 @@ op {
     default_value {
       s: ""
     }
-    description: "The separator to use when joining."
   }
-  summary: "Joins a string Tensor across the given dimensions."
-  description: "Computes the string join across dimensions in the given string Tensor of shape\n`[d_0, d_1, ..., d_n-1]`.  Returns a new Tensor created by joining the input\nstrings with the given separator (default: empty string).  Negative indices are\ncounted backwards from the end, with `-1` being equivalent to `n - 1`.\n\nFor example:\n\n```python\n# tensor `a` is [[\"a\", \"b\"], [\"c\", \"d\"]]\ntf.reduce_join(a, 0) ==> [\"ac\", \"bd\"]\ntf.reduce_join(a, 1) ==> [\"ab\", \"cd\"]\ntf.reduce_join(a, -2) = tf.reduce_join(a, 0) ==> [\"ac\", \"bd\"]\ntf.reduce_join(a, -1) = tf.reduce_join(a, 1) ==> [\"ab\", \"cd\"]\ntf.reduce_join(a, 0, keep_dims=True) ==> [[\"ac\", \"bd\"]]\ntf.reduce_join(a, 1, keep_dims=True) ==> [[\"ab\"], [\"cd\"]]\ntf.reduce_join(a, 0, separator=\".\") ==> [\"a.c\", \"b.d\"]\ntf.reduce_join(a, [0, 1]) ==> [\"acbd\"]\ntf.reduce_join(a, [1, 0]) ==> [\"abcd\"]\ntf.reduce_join(a, []) ==> [\"abcd\"]\n```"
 }
 op {
   name: "RefEnter"
   input_arg {
     name: "data"
-    description: "The tensor to be made available to the child frame."
     type_attr: "T"
     is_ref: true
   }
   output_arg {
     name: "output"
-    description: "The same tensor as `data`."
     type_attr: "T"
     is_ref: true
   }
@@ -20776,7 +18999,6 @@ op {
   attr {
     name: "frame_name"
     type: "string"
-    description: "The name of the child frame."
   }
   attr {
     name: "is_constant"
@@ -20784,7 +19006,6 @@ op {
     default_value {
       b: false
     }
-    description: "If true, the output is constant within the child frame."
   }
   attr {
     name: "parallel_iterations"
@@ -20792,22 +19013,17 @@ op {
     default_value {
       i: 10
     }
-    description: "The number of iterations allowed to run in parallel."
   }
-  summary: "Creates or finds a child frame, and makes `data` available to the child frame."
-  description: "The unique `frame_name` is used by the `Executor` to identify frames. If\n`is_constant` is true, `output` is a constant in the child frame; otherwise\nit may be changed in the child frame. At most `parallel_iterations` iterations\nare run in parallel in the child frame."
 }
 op {
   name: "RefExit"
   input_arg {
     name: "data"
-    description: "The tensor to be made available to the parent frame."
     type_attr: "T"
     is_ref: true
   }
   output_arg {
     name: "output"
-    description: "The same tensor as `data`."
     type_attr: "T"
     is_ref: true
   }
@@ -20815,8 +19031,6 @@ op {
     name: "T"
     type: "type"
   }
-  summary: "Exits the current frame to its parent frame."
-  description: "Exit makes its input `data` available to the parent frame."
 }
 op {
   name: "RefIdentity"
@@ -20834,27 +19048,23 @@ op {
     name: "T"
     type: "type"
   }
-  summary: "Return the same ref tensor as the input ref tensor."
   allows_uninitialized_input: true
 }
 op {
   name: "RefMerge"
   input_arg {
     name: "inputs"
-    description: "The input tensors, exactly one of which will become available."
     type_attr: "T"
     number_attr: "N"
     is_ref: true
   }
   output_arg {
     name: "output"
-    description: "Will be set to the available input tensor."
     type_attr: "T"
     is_ref: true
   }
   output_arg {
     name: "value_index"
-    description: "The index of the chosen input tensor in `inputs`."
     type: DT_INT32
   }
   attr {
@@ -20867,20 +19077,16 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Forwards the value of an available tensor from `inputs` to `output`."
-  description: "`Merge` waits for at least one of the tensors in `inputs` to become available.\nIt is usually combined with `Switch` to implement branching.\n\n`Merge` forwards the first tensor for become available to `output`, and sets\n`value_index` to its index in `inputs`."
 }
 op {
   name: "RefNextIteration"
   input_arg {
     name: "data"
-    description: "The tensor to be made available to the next iteration."
     type_attr: "T"
     is_ref: true
   }
   output_arg {
     name: "output"
-    description: "The same tensor as `data`."
     type_attr: "T"
     is_ref: true
   }
@@ -20888,25 +19094,21 @@ op {
     name: "T"
     type: "type"
   }
-  summary: "Makes its input available to the next iteration."
 }
 op {
   name: "RefSelect"
   input_arg {
     name: "index"
-    description: "A scalar that determines the input that gets selected."
     type: DT_INT32
   }
   input_arg {
     name: "inputs"
-    description: "A list of ref tensors, one of which will be forwarded to `output`."
     type_attr: "T"
     number_attr: "N"
     is_ref: true
   }
   output_arg {
     name: "output"
-    description: "The forwarded tensor."
     type_attr: "T"
     is_ref: true
   }
@@ -20920,30 +19122,25 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Forwards the `index`th element of `inputs` to `output`."
 }
 op {
   name: "RefSwitch"
   input_arg {
     name: "data"
-    description: "The ref tensor to be forwarded to the appropriate output."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "pred"
-    description: "A scalar that specifies which output port will receive data."
     type: DT_BOOL
   }
   output_arg {
     name: "output_false"
-    description: "If `pred` is false, data will be forwarded to this output."
     type_attr: "T"
     is_ref: true
   }
   output_arg {
     name: "output_true"
-    description: "If `pred` is true, data will be forwarded to this output."
     type_attr: "T"
     is_ref: true
   }
@@ -20951,8 +19148,6 @@ op {
     name: "T"
     type: "type"
   }
-  summary: "Forwards the ref tensor `data` to the output port determined by `pred`."
-  description: "If `pred` is true, the `data` input is forwarded to `output_true`. Otherwise,\nthe data goes to `output_false`.\n\nSee also `Switch` and `Merge`."
   allows_uninitialized_input: true
 }
 op {
@@ -20973,10 +19168,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -20984,7 +19180,6 @@ op {
       }
     }
   }
-  summary: "Computes rectified linear: `max(features, 0)`."
 }
 op {
   name: "Relu6"
@@ -21004,10 +19199,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -21015,23 +19211,19 @@ op {
       }
     }
   }
-  summary: "Computes rectified linear 6: `min(max(features, 0), 6)`."
 }
 op {
   name: "Relu6Grad"
   input_arg {
     name: "gradients"
-    description: "The backpropagated gradients to the corresponding Relu6 operation."
     type_attr: "T"
   }
   input_arg {
     name: "features"
-    description: "The features passed as input to the corresponding Relu6 operation, or\nits output; using either one produces the same result."
     type_attr: "T"
   }
   output_arg {
     name: "backprops"
-    description: "The gradients:\n`gradients * (features > 0) * (features < 6)`."
     type_attr: "T"
   }
   attr {
@@ -21042,10 +19234,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -21053,23 +19246,19 @@ op {
       }
     }
   }
-  summary: "Computes rectified linear 6 gradients for a Relu6 operation."
 }
 op {
   name: "ReluGrad"
   input_arg {
     name: "gradients"
-    description: "The backpropagated gradients to the corresponding Relu operation."
     type_attr: "T"
   }
   input_arg {
     name: "features"
-    description: "The features passed as input to the corresponding Relu operation, OR\nthe outputs of that operation (both work equivalently)."
     type_attr: "T"
   }
   output_arg {
     name: "backprops"
-    description: "`gradients * (features > 0)`."
     type_attr: "T"
   }
   attr {
@@ -21080,10 +19269,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -21091,56 +19281,46 @@ op {
       }
     }
   }
-  summary: "Computes rectified linear gradients for a Relu operation."
 }
 op {
   name: "RemoteCall"
   input_arg {
     name: "target"
-    description: "A fully specified device name where we want to run the function."
     type: DT_STRING
   }
   input_arg {
     name: "args"
-    description: "A list of arguments for the function."
     type_list_attr: "Tin"
   }
   output_arg {
     name: "output"
-    description: "A list of return values."
     type_list_attr: "Tout"
   }
   attr {
     name: "Tin"
     type: "list(type)"
-    description: "The type list for the arguments."
     has_minimum: true
     minimum: 1
   }
   attr {
     name: "Tout"
     type: "list(type)"
-    description: "The type list for the return values."
     has_minimum: true
     minimum: 1
   }
   attr {
     name: "f"
     type: "func"
-    description: "The function to run remotely."
   }
-  summary: "Runs function `f` on a remote device indicated by `target`."
 }
 op {
   name: "RemoteFusedGraphExecute"
   input_arg {
     name: "inputs"
-    description: "Arbitrary number of tensors with arbitrary data types"
     type_list_attr: "Tinputs"
   }
   output_arg {
     name: "outputs"
-    description: "Arbitrary number of tensors with arbitrary data types"
     type_list_attr: "Toutputs"
   }
   attr {
@@ -21156,10 +19336,7 @@ op {
   attr {
     name: "serialized_remote_fused_graph_execute_info"
     type: "string"
-    description: "Serialized protocol buffer\nof RemoteFusedGraphExecuteInfo which contains graph specifications."
   }
-  summary: "Execute a sub graph on a remote processor."
-  description: "The graph specifications(such as graph itself, input tensors and output names)\nare stored as a serialized protocol buffer of RemoteFusedGraphExecuteInfo\nas serialized_remote_fused_graph_execute_info.\nThe specifications will be passed to a dedicated registered\nremote fused graph executor.  The executor will send the graph specifications\nto a remote processor and execute that graph.  The execution results\nwill be passed to consumer nodes as outputs of this node."
 }
 op {
   name: "RepeatDataset"
@@ -21169,7 +19346,6 @@ op {
   }
   input_arg {
     name: "count"
-    description: "A scalar representing the number of times that `input_dataset` should\nbe repeated. A value of `-1` indicates that it should be repeated infinitely."
     type: DT_INT64
   }
   output_arg {
@@ -21188,7 +19364,6 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Creates a dataset that emits the outputs of `input_dataset` `count` times."
 }
 op {
   name: "RequantizationRange"
@@ -21198,40 +19373,33 @@ op {
   }
   input_arg {
     name: "input_min"
-    description: "The float value that the minimum quantized input value represents."
     type: DT_FLOAT
   }
   input_arg {
     name: "input_max"
-    description: "The float value that the maximum quantized input value represents."
     type: DT_FLOAT
   }
   output_arg {
     name: "output_min"
-    description: "The computed min output."
     type: DT_FLOAT
   }
   output_arg {
     name: "output_max"
-    description: "the computed max output."
     type: DT_FLOAT
   }
   attr {
     name: "Tinput"
     type: "type"
-    description: "The type of the input."
     allowed_values {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
-  summary: "Given a quantized tensor described by (input, input_min, input_max), outputs a"
-  description: "range that covers the actual values present in that tensor.  This op is\ntypically used to produce the requested_output_min and requested_output_max for\nRequantize."
 }
 op {
   name: "Requantize"
@@ -21241,22 +19409,18 @@ op {
   }
   input_arg {
     name: "input_min"
-    description: "The float value that the minimum quantized input value represents."
     type: DT_FLOAT
   }
   input_arg {
     name: "input_max"
-    description: "The float value that the maximum quantized input value represents."
     type: DT_FLOAT
   }
   input_arg {
     name: "requested_output_min"
-    description: "The float value that the minimum quantized output value represents."
     type: DT_FLOAT
   }
   input_arg {
     name: "requested_output_max"
-    description: "The float value that the maximum quantized output value represents."
     type: DT_FLOAT
   }
   output_arg {
@@ -21265,44 +19429,38 @@ op {
   }
   output_arg {
     name: "output_min"
-    description: "The requested_output_min value is copied into this output."
     type: DT_FLOAT
   }
   output_arg {
     name: "output_max"
-    description: "The requested_output_max value is copied into this output."
     type: DT_FLOAT
   }
   attr {
     name: "Tinput"
     type: "type"
-    description: "The type of the input."
     allowed_values {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
   attr {
     name: "out_type"
     type: "type"
-    description: "The type of the output. Should be a lower bit depth than Tinput."
     allowed_values {
       list {
         type: DT_QINT8
         type: DT_QUINT8
+        type: DT_QINT32
         type: DT_QINT16
         type: DT_QUINT16
-        type: DT_QINT32
       }
     }
   }
-  summary: "Convert the quantized \'input\' tensor into a lower-precision \'output\', using the"
-  description: "output range specified with \'requested_output_min\' and \'requested_output_max\'.\n\n[input_min, input_max] are scalar floats that specify the range for the float\ninterpretation of the \'input\' data. For example, if input_min is -1.0f and\ninput_max is 1.0f, and we are dealing with quint16 quantized data, then a 0\nvalue in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f."
 }
 op {
   name: "Reshape"
@@ -21312,7 +19470,6 @@ op {
   }
   input_arg {
     name: "shape"
-    description: "Defines the shape of the output tensor."
     type_attr: "Tshape"
   }
   output_arg {
@@ -21336,24 +19493,19 @@ op {
       }
     }
   }
-  summary: "Reshapes a tensor."
-  description: "Given `tensor`, this operation returns a tensor that has the same values\nas `tensor` with shape `shape`.\n\nIf one component of `shape` is the special value -1, the size of that dimension\nis computed so that the total size remains constant.  In particular, a `shape`\nof `[-1]` flattens into 1-D.  At most one component of `shape` can be -1.\n\nIf `shape` is 1-D or higher, then the operation returns a tensor with shape\n`shape` filled with the values of `tensor`. In this case, the number of elements\nimplied by `shape` must be the same as the number of elements in `tensor`.\n\nFor example:\n\n```\n# tensor \'t\' is [1, 2, 3, 4, 5, 6, 7, 8, 9]\n# tensor \'t\' has shape [9]\nreshape(t, [3, 3]) ==> [[1, 2, 3],\n                        [4, 5, 6],\n                        [7, 8, 9]]\n\n# tensor \'t\' is [[[1, 1], [2, 2]],\n#                [[3, 3], [4, 4]]]\n# tensor \'t\' has shape [2, 2, 2]\nreshape(t, [2, 4]) ==> [[1, 1, 2, 2],\n                        [3, 3, 4, 4]]\n\n# tensor \'t\' is [[[1, 1, 1],\n#                 [2, 2, 2]],\n#                [[3, 3, 3],\n#                 [4, 4, 4]],\n#                [[5, 5, 5],\n#                 [6, 6, 6]]]\n# tensor \'t\' has shape [3, 2, 3]\n# pass \'[-1]\' to flatten \'t\'\nreshape(t, [-1]) ==> [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6]\n\n# -1 can also be used to infer the shape\n\n# -1 is inferred to be 9:\nreshape(t, [2, -1]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3],\n                         [4, 4, 4, 5, 5, 5, 6, 6, 6]]\n# -1 is inferred to be 2:\nreshape(t, [-1, 9]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3],\n                         [4, 4, 4, 5, 5, 5, 6, 6, 6]]\n# -1 is inferred to be 3:\nreshape(t, [ 2, -1, 3]) ==> [[[1, 1, 1],\n                              [2, 2, 2],\n                              [3, 3, 3]],\n                             [[4, 4, 4],\n                              [5, 5, 5],\n                              [6, 6, 6]]]\n\n# tensor \'t\' is [7]\n# shape `[]` reshapes to a scalar\nreshape(t, []) ==> 7\n```"
 }
 op {
   name: "ResizeArea"
   input_arg {
     name: "images"
-    description: "4-D with shape `[batch, height, width, channels]`."
     type_attr: "T"
   }
   input_arg {
     name: "size"
-    description: "= A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The\nnew size for the images."
     type: DT_INT32
   }
   output_arg {
     name: "resized_images"
-    description: "4-D with shape\n`[batch, new_height, new_width, channels]`."
     type: DT_FLOAT
   }
   attr {
@@ -21379,26 +19531,20 @@ op {
     default_value {
       b: false
     }
-    description: "If true, rescale input by (new_height - 1) / (height - 1), which\nexactly aligns the 4 corners of images and resized images. If false, rescale\nby new_height / height. Treat similarly the width dimension."
   }
-  summary: "Resize `images` to `size` using area interpolation."
-  description: "Input images can be of different types but output images are always float.\n\nEach output pixel is computed by first transforming the pixel\'s footprint into\nthe input tensor and then averaging the pixels that intersect the footprint. An\ninput pixel\'s contribution to the average is weighted by the fraction of its\narea that intersects the footprint.  This is the same as OpenCV\'s INTER_AREA."
 }
 op {
   name: "ResizeBicubic"
   input_arg {
     name: "images"
-    description: "4-D with shape `[batch, height, width, channels]`."
     type_attr: "T"
   }
   input_arg {
     name: "size"
-    description: "= A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The\nnew size for the images."
     type: DT_INT32
   }
   output_arg {
     name: "resized_images"
-    description: "4-D with shape\n`[batch, new_height, new_width, channels]`."
     type: DT_FLOAT
   }
   attr {
@@ -21424,26 +19570,20 @@ op {
     default_value {
       b: false
     }
-    description: "If true, rescale input by (new_height - 1) / (height - 1), which\nexactly aligns the 4 corners of images and resized images. If false, rescale\nby new_height / height. Treat similarly the width dimension."
   }
-  summary: "Resize `images` to `size` using bicubic interpolation."
-  description: "Input images can be of different types but output images are always float."
 }
 op {
   name: "ResizeBicubicGrad"
   input_arg {
     name: "grads"
-    description: "4-D with shape `[batch, height, width, channels]`."
     type: DT_FLOAT
   }
   input_arg {
     name: "original_image"
-    description: "4-D with shape `[batch, orig_height, orig_width, channels]`,\nThe image tensor that was resized."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "4-D with shape `[batch, orig_height, orig_width, channels]`.\nGradients with respect to the input image. Input image must have been\nfloat or double."
     type_attr: "T"
   }
   attr {
@@ -21462,25 +19602,20 @@ op {
     default_value {
       b: false
     }
-    description: "If true, rescale grads by (orig_height - 1) / (height - 1), which\nexactly aligns the 4 corners of grads and original_image. If false, rescale by\norig_height / height. Treat similarly the width dimension."
   }
-  summary: "Computes the gradient of bicubic interpolation."
 }
 op {
   name: "ResizeBilinear"
   input_arg {
     name: "images"
-    description: "4-D with shape `[batch, height, width, channels]`."
     type_attr: "T"
   }
   input_arg {
     name: "size"
-    description: "= A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The\nnew size for the images."
     type: DT_INT32
   }
   output_arg {
     name: "resized_images"
-    description: "4-D with shape\n`[batch, new_height, new_width, channels]`."
     type: DT_FLOAT
   }
   attr {
@@ -21506,26 +19641,20 @@ op {
     default_value {
       b: false
     }
-    description: "If true, rescale input by (new_height - 1) / (height - 1), which\nexactly aligns the 4 corners of images and resized images. If false, rescale\nby new_height / height. Treat similarly the width dimension."
   }
-  summary: "Resize `images` to `size` using bilinear interpolation."
-  description: "Input images can be of different types but output images are always float."
 }
 op {
   name: "ResizeBilinearGrad"
   input_arg {
     name: "grads"
-    description: "4-D with shape `[batch, height, width, channels]`."
     type: DT_FLOAT
   }
   input_arg {
     name: "original_image"
-    description: "4-D with shape `[batch, orig_height, orig_width, channels]`,\nThe image tensor that was resized."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "4-D with shape `[batch, orig_height, orig_width, channels]`.\nGradients with respect to the input image. Input image must have been\nfloat or double."
     type_attr: "T"
   }
   attr {
@@ -21545,25 +19674,20 @@ op {
     default_value {
       b: false
     }
-    description: "If true, rescale grads by (orig_height - 1) / (height - 1), which\nexactly aligns the 4 corners of grads and original_image. If false, rescale by\norig_height / height. Treat similarly the width dimension."
   }
-  summary: "Computes the gradient of bilinear interpolation."
 }
 op {
   name: "ResizeNearestNeighbor"
   input_arg {
     name: "images"
-    description: "4-D with shape `[batch, height, width, channels]`."
     type_attr: "T"
   }
   input_arg {
     name: "size"
-    description: "= A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The\nnew size for the images."
     type: DT_INT32
   }
   output_arg {
     name: "resized_images"
-    description: "4-D with shape\n`[batch, new_height, new_width, channels]`."
     type_attr: "T"
   }
   attr {
@@ -21589,25 +19713,20 @@ op {
     default_value {
       b: false
     }
-    description: "If true, rescale input by (new_height - 1) / (height - 1), which\nexactly aligns the 4 corners of images and resized images. If false, rescale\nby new_height / height. Treat similarly the width dimension."
   }
-  summary: "Resize `images` to `size` using nearest neighbor interpolation."
 }
 op {
   name: "ResizeNearestNeighborGrad"
   input_arg {
     name: "grads"
-    description: "4-D with shape `[batch, height, width, channels]`."
     type_attr: "T"
   }
   input_arg {
     name: "size"
-    description: "= A 1-D int32 Tensor of 2 elements: `orig_height, orig_width`. The\noriginal input size."
     type: DT_INT32
   }
   output_arg {
     name: "output"
-    description: "4-D with shape `[batch, orig_height, orig_width, channels]`. Gradients\nwith respect to the input image."
     type_attr: "T"
   }
   attr {
@@ -21630,45 +19749,36 @@ op {
     default_value {
       b: false
     }
-    description: "If true, rescale grads by (orig_height - 1) / (height - 1), which\nexactly aligns the 4 corners of grads and original_image. If false, rescale by\norig_height / height. Treat similarly the width dimension."
   }
-  summary: "Computes the gradient of nearest neighbor interpolation."
 }
 op {
   name: "ResourceApplyAdadelta"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "accum"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "accum_update"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "rho"
-    description: "Decay factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "epsilon"
-    description: "Constant factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   attr {
@@ -21678,17 +19788,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -21701,32 +19812,25 @@ op {
     default_value {
       b: false
     }
-    description: "If True, updating of the var, accum and update_accum tensors will be protected by\na lock; otherwise the behavior is undefined, but may exhibit less contention."
   }
-  summary: "Update \'*var\' according to the adadelta scheme."
-  description: "accum = rho() * accum + (1 - rho()) * grad.square();\nupdate = (update_accum + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad;\nupdate_accum = rho() * update_accum + (1 - rho()) * update.square();\nvar -= update;"
   is_stateful: true
 }
 op {
   name: "ResourceApplyAdagrad"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "accum"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   attr {
@@ -21736,17 +19840,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -21759,52 +19864,41 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var and accum tensors will be protected\nby a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
-  summary: "Update \'*var\' according to the adagrad scheme."
-  description: "accum += grad * grad\nvar -= lr * grad * (1 / sqrt(accum))"
   is_stateful: true
 }
 op {
   name: "ResourceApplyAdagradDA"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "gradient_accumulator"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "gradient_squared_accumulator"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l1"
-    description: "L1 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l2"
-    description: "L2 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "global_step"
-    description: "Training step number. Must be a scalar."
     type: DT_INT64
   }
   attr {
@@ -21814,17 +19908,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -21837,61 +19932,49 @@ op {
     default_value {
       b: false
     }
-    description: "If True, updating of the var and accum tensors will be protected by\na lock; otherwise the behavior is undefined, but may exhibit less contention."
   }
-  summary: "Update \'*var\' according to the proximal adagrad scheme."
   is_stateful: true
 }
 op {
   name: "ResourceApplyAdam"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "m"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "v"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "beta1_power"
-    description: "Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "beta2_power"
-    description: "Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "beta1"
-    description: "Momentum factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "beta2"
-    description: "Momentum factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "epsilon"
-    description: "Ridge term. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   attr {
@@ -21901,17 +19984,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -21924,7 +20008,6 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var, m, and v tensors will be protected\nby a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
   attr {
     name: "use_nesterov"
@@ -21932,47 +20015,37 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, uses the nesterov update."
   }
-  summary: "Update \'*var\' according to the Adam algorithm."
-  description: "lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)\nm_t <- beta1 * m_{t-1} + (1 - beta1) * g_t\nv_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t\nvariable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)"
   is_stateful: true
 }
 op {
   name: "ResourceApplyAddSign"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "m"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "alpha"
-    description: "Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "sign_decay"
-    description: "Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "beta"
-    description: "Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   attr {
@@ -21982,17 +20055,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -22005,42 +20079,33 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var and m tensors is\nprotected by a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
-  summary: "Update \'*var\' according to the AddSign update."
-  description: "m_t <- beta1 * m_{t-1} + (1 - beta1) * g\nupdate <- (alpha + sign_decay * sign(g) *sign(m)) * g\nvariable <- variable - lr_t * update"
   is_stateful: true
 }
 op {
   name: "ResourceApplyCenteredRMSProp"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "mg"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "ms"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "mom"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "rho"
-    description: "Decay rate. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
@@ -22049,12 +20114,10 @@ op {
   }
   input_arg {
     name: "epsilon"
-    description: "Ridge term. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   attr {
@@ -22064,17 +20127,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -22087,52 +20151,41 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var, mg, ms, and mom tensors is\nprotected by a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
-  summary: "Update \'*var\' according to the centered RMSProp algorithm."
-  description: "The centered RMSProp algorithm uses an estimate of the centered second moment\n(i.e., the variance) for normalization, as opposed to regular RMSProp, which\nuses the (uncentered) second moment. This often helps with training, but is\nslightly more expensive in terms of computation and memory.\n\nNote that in dense implementation of this algorithm, mg, ms, and mom will\nupdate even if the grad is zero, but in this sparse implementation, mg, ms,\nand mom will not update in iterations during which the grad is zero.\n\nmean_square = decay * mean_square + (1-decay) * gradient ** 2\nmean_grad = decay * mean_grad + (1-decay) * gradient\n\nDelta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)\n\nmg <- rho * mg_{t-1} + (1-rho) * grad\nms <- rho * ms_{t-1} + (1-rho) * grad * grad\nmom <- momentum * mom_{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)\nvar <- var - mom"
   is_stateful: true
 }
 op {
   name: "ResourceApplyFtrl"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "accum"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "linear"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l1"
-    description: "L1 regulariation. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l2"
-    description: "L2 regulariation. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "lr_power"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   attr {
@@ -22142,18 +20195,19 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
-        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
@@ -22165,47 +20219,37 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var and accum tensors will be protected\nby a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
-  summary: "Update \'*var\' according to the Ftrl-proximal scheme."
-  description: "accum_new = accum + grad * grad\nlinear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var\nquadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2\nvar = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0\naccum = accum_new"
   is_stateful: true
 }
 op {
   name: "ResourceApplyFtrlV2"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "accum"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "linear"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l1"
-    description: "L1 regulariation. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l2"
-    description: "L2 shrinkage regulariation. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
@@ -22214,7 +20258,6 @@ op {
   }
   input_arg {
     name: "lr_power"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   attr {
@@ -22224,17 +20267,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -22247,27 +20291,21 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var and accum tensors will be protected\nby a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
-  summary: "Update \'*var\' according to the Ftrl-proximal scheme."
-  description: "grad_with_shrinkage = grad + 2 * l2_shrinkage * var\naccum_new = accum + grad_with_shrinkage * grad_with_shrinkage\nlinear += grad_with_shrinkage +\n    (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var\nquadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2\nvar = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0\naccum = accum_new"
   is_stateful: true
 }
 op {
   name: "ResourceApplyGradientDescent"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "alpha"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "delta"
-    description: "The change."
     type_attr: "T"
   }
   attr {
@@ -22277,17 +20315,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -22300,36 +20339,29 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, the subtraction will be protected by a lock;\notherwise the behavior is undefined, but may exhibit less contention."
   }
-  summary: "Update \'*var\' by subtracting \'alpha\' * \'delta\' from it."
   is_stateful: true
 }
 op {
   name: "ResourceApplyMomentum"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "accum"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "momentum"
-    description: "Momentum. Must be a scalar."
     type_attr: "T"
   }
   attr {
@@ -22339,17 +20371,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -22362,7 +20395,6 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var and accum tensors will be protected\nby a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
   attr {
     name: "use_nesterov"
@@ -22370,47 +20402,37 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, the tensor passed to compute grad will be\nvar - lr * momentum * accum, so in the end, the var you get is actually\nvar - lr * momentum * accum."
   }
-  summary: "Update \'*var\' according to the momentum scheme. Set use_nesterov = True if you"
-  description: "want to use Nesterov momentum.\n\naccum = accum * momentum + grad\nvar -= lr * accum"
   is_stateful: true
 }
 op {
   name: "ResourceApplyPowerSign"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "m"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "logbase"
-    description: "Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "sign_decay"
-    description: "Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "beta"
-    description: "Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   attr {
@@ -22420,17 +20442,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -22443,42 +20466,33 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var and m tensors is\nprotected by a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
-  summary: "Update \'*var\' according to the AddSign update."
-  description: "m_t <- beta1 * m_{t-1} + (1 - beta1) * g\nupdate <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g\nvariable <- variable - lr_t * update"
   is_stateful: true
 }
 op {
   name: "ResourceApplyProximalAdagrad"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "accum"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l1"
-    description: "L1 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l2"
-    description: "L2 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   attr {
@@ -22488,17 +20502,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -22511,37 +20526,29 @@ op {
     default_value {
       b: false
     }
-    description: "If True, updating of the var and accum tensors will be protected by\na lock; otherwise the behavior is undefined, but may exhibit less contention."
   }
-  summary: "Update \'*var\' and \'*accum\' according to FOBOS with Adagrad learning rate."
-  description: "accum += grad * grad\nprox_v = var - lr * grad * (1 / sqrt(accum))\nvar = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}"
   is_stateful: true
 }
 op {
   name: "ResourceApplyProximalGradientDescent"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "alpha"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l1"
-    description: "L1 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l2"
-    description: "L2 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "delta"
-    description: "The change."
     type_attr: "T"
   }
   attr {
@@ -22551,17 +20558,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -22574,37 +20582,29 @@ op {
     default_value {
       b: false
     }
-    description: "If True, the subtraction will be protected by a lock;\notherwise the behavior is undefined, but may exhibit less contention."
   }
-  summary: "Update \'*var\' as FOBOS algorithm with fixed learning rate."
-  description: "prox_v = var - alpha * delta\nvar = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}"
   is_stateful: true
 }
 op {
   name: "ResourceApplyRMSProp"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "ms"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "mom"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "rho"
-    description: "Decay rate. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
@@ -22613,12 +20613,10 @@ op {
   }
   input_arg {
     name: "epsilon"
-    description: "Ridge term. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   attr {
@@ -22628,17 +20626,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -22651,28 +20650,22 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var, ms, and mom tensors is protected\nby a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
-  summary: "Update \'*var\' according to the RMSProp algorithm."
-  description: "Note that in dense implementation of this algorithm, ms and mom will\nupdate even if the grad is zero, but in this sparse implementation, ms\nand mom will not update in iterations during which the grad is zero.\n\nmean_square = decay * mean_square + (1-decay) * gradient ** 2\nDelta = learning_rate * gradient / sqrt(mean_square + epsilon)\n\nms <- rho * ms_{t-1} + (1-rho) * grad * grad\nmom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)\nvar <- var - mom"
   is_stateful: true
 }
 op {
   name: "ResourceCountUpTo"
   input_arg {
     name: "resource"
-    description: "Should be from a scalar `Variable` node."
     type: DT_RESOURCE
   }
   output_arg {
     name: "output"
-    description: "A copy of the input before increment. If nothing else modifies the\ninput, the values produced will all be distinct."
     type_attr: "T"
   }
   attr {
     name: "limit"
     type: "int"
-    description: "If incrementing ref would bring it above limit, instead generates an\n\'OutOfRange\' error."
   }
   attr {
     name: "T"
@@ -22684,7 +20677,6 @@ op {
       }
     }
   }
-  summary: "Increments variable pointed to by \'resource\' until it reaches \'limit\'."
   is_stateful: true
 }
 op {
@@ -22722,25 +20714,20 @@ op {
       }
     }
   }
-  summary: "Gather slices from the variable pointed to by `resource` according to `indices`."
-  description: "`indices` must be an integer tensor of any dimension (usually 0-D or 1-D).\nProduces an output tensor with shape `indices.shape + params.shape[1:]` where:\n\n```python\n    # Scalar indices\n    output[:, ..., :] = params[indices, :, ... :]\n\n    # Vector indices\n    output[i, :, ..., :] = params[indices[i], :, ... :]\n\n    # Higher rank indices\n    output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :]\n```"
   is_stateful: true
 }
 op {
   name: "ResourceScatterAdd"
   input_arg {
     name: "resource"
-    description: "Should be from a `Variable` node."
     type: DT_RESOURCE
   }
   input_arg {
     name: "indices"
-    description: "A tensor of indices into the first dimension of `ref`."
     type_attr: "Tindices"
   }
   input_arg {
     name: "updates"
-    description: "A tensor of updated values to add to `ref`."
     type_attr: "dtype"
   }
   attr {
@@ -22750,17 +20737,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -22777,25 +20765,57 @@ op {
       }
     }
   }
-  summary: "Adds sparse updates to the variable referenced by `resource`."
-  description: "This operation computes\n\n    # Scalar indices\n    ref[indices, ...] += updates[...]\n\n    # Vector indices (for each i)\n    ref[indices[i], ...] += updates[i, ...]\n\n    # High rank indices (for each i, ..., j)\n    ref[indices[i, ..., j], ...] += updates[i, ..., j, ...]\n\nDuplicate entries are handled correctly: if multiple `indices` reference\nthe same location, their contributions add.\n\nRequires `updates.shape = indices.shape + ref.shape[1:]`.\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\'https://www.tensorflow.org/images/ScatterAdd.png\' alt>\n</div>"
+  is_stateful: true
+}
+op {
+  name: "ResourceScatterNdUpdate"
+  input_arg {
+    name: "ref"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "updates"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "use_locking"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
   is_stateful: true
 }
 op {
   name: "ResourceScatterUpdate"
   input_arg {
     name: "resource"
-    description: "Should be from a `Variable` node."
     type: DT_RESOURCE
   }
   input_arg {
     name: "indices"
-    description: "A tensor of indices into the first dimension of `ref`."
     type_attr: "Tindices"
   }
   input_arg {
     name: "updates"
-    description: "A tensor of updated values to add to `ref`."
     type_attr: "dtype"
   }
   attr {
@@ -22805,17 +20825,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -22832,8 +20853,6 @@ op {
       }
     }
   }
-  summary: "Assigns sparse updates to the variable referenced by `resource`."
-  description: "This operation computes\n\n    # Scalar indices\n    ref[indices, ...] = updates[...]\n\n    # Vector indices (for each i)\n    ref[indices[i], ...] = updates[i, ...]\n\n    # High rank indices (for each i, ..., j)\n    ref[indices[i, ..., j], ...] = updates[i, ..., j, ...]"
   is_stateful: true
 }
 op {
@@ -22844,37 +20863,30 @@ op {
   }
   input_arg {
     name: "accum"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "accum_update"
-    description: ": Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "lr"
-    description: "Learning rate. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "rho"
-    description: "Decay factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "epsilon"
-    description: "Constant factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "indices"
-    description: "A vector of indices into the first dimension of var and accum."
     type_attr: "Tindices"
   }
   attr {
@@ -22884,17 +20896,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -22917,36 +20930,29 @@ op {
     default_value {
       b: false
     }
-    description: "If True, updating of the var and accum tensors will be protected by\na lock; otherwise the behavior is undefined, but may exhibit less contention."
   }
-  summary: "var: Should be from a Variable()."
   is_stateful: true
 }
 op {
   name: "ResourceSparseApplyAdagrad"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "accum"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "lr"
-    description: "Learning rate. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "indices"
-    description: "A vector of indices into the first dimension of var and accum."
     type_attr: "Tindices"
   }
   attr {
@@ -22956,17 +20962,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -22989,57 +20996,45 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var and accum tensors will be protected\nby a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
-  summary: "Update relevant entries in \'*var\' and \'*accum\' according to the adagrad scheme."
-  description: "That is for rows we have grad for, we update var and accum as follows:\naccum += grad * grad\nvar -= lr * grad * (1 / sqrt(accum))"
   is_stateful: true
 }
 op {
   name: "ResourceSparseApplyAdagradDA"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "gradient_accumulator"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "gradient_squared_accumulator"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "indices"
-    description: "A vector of indices into the first dimension of var and accum."
     type_attr: "Tindices"
   }
   input_arg {
     name: "lr"
-    description: "Learning rate. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l1"
-    description: "L1 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l2"
-    description: "L2 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "global_step"
-    description: "Training step number. Must be a scalar."
     type: DT_INT64
   }
   attr {
@@ -23049,17 +21044,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -23082,41 +21078,33 @@ op {
     default_value {
       b: false
     }
-    description: "If True, updating of the var and accum tensors will be protected by\na lock; otherwise the behavior is undefined, but may exhibit less contention."
   }
-  summary: "Update entries in \'*var\' and \'*accum\' according to the proximal adagrad scheme."
   is_stateful: true
 }
 op {
   name: "ResourceSparseApplyCenteredRMSProp"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "mg"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "ms"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "mom"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "rho"
-    description: "Decay rate. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
@@ -23125,17 +21113,14 @@ op {
   }
   input_arg {
     name: "epsilon"
-    description: "Ridge term. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "indices"
-    description: "A vector of indices into the first dimension of var, ms and mom."
     type_attr: "Tindices"
   }
   attr {
@@ -23145,17 +21130,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -23178,57 +21164,45 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var, mg, ms, and mom tensors is\nprotected by a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
-  summary: "Update \'*var\' according to the centered RMSProp algorithm."
-  description: "The centered RMSProp algorithm uses an estimate of the centered second moment\n(i.e., the variance) for normalization, as opposed to regular RMSProp, which\nuses the (uncentered) second moment. This often helps with training, but is\nslightly more expensive in terms of computation and memory.\n\nNote that in dense implementation of this algorithm, mg, ms, and mom will\nupdate even if the grad is zero, but in this sparse implementation, mg, ms,\nand mom will not update in iterations during which the grad is zero.\n\nmean_square = decay * mean_square + (1-decay) * gradient ** 2\nmean_grad = decay * mean_grad + (1-decay) * gradient\nDelta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)\n\nms <- rho * ms_{t-1} + (1-rho) * grad * grad\nmom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)\nvar <- var - mom"
   is_stateful: true
 }
 op {
   name: "ResourceSparseApplyFtrl"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "accum"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "linear"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "indices"
-    description: "A vector of indices into the first dimension of var and accum."
     type_attr: "Tindices"
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l1"
-    description: "L1 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l2"
-    description: "L2 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "lr_power"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   attr {
@@ -23238,17 +21212,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -23271,52 +21246,41 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var and accum tensors will be protected\nby a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
-  summary: "Update relevant entries in \'*var\' according to the Ftrl-proximal scheme."
-  description: "That is for rows we have grad for, we update var, accum and linear as follows:\naccum_new = accum + grad * grad\nlinear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var\nquadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2\nvar = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0\naccum = accum_new"
   is_stateful: true
 }
 op {
   name: "ResourceSparseApplyFtrlV2"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "accum"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "linear"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "indices"
-    description: "A vector of indices into the first dimension of var and accum."
     type_attr: "Tindices"
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l1"
-    description: "L1 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l2"
-    description: "L2 shrinkage regulariation. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
@@ -23325,7 +21289,6 @@ op {
   }
   input_arg {
     name: "lr_power"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   attr {
@@ -23335,17 +21298,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -23368,42 +21332,33 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var and accum tensors will be protected\nby a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
-  summary: "Update relevant entries in \'*var\' according to the Ftrl-proximal scheme."
-  description: "That is for rows we have grad for, we update var, accum and linear as follows:\ngrad_with_shrinkage = grad + 2 * l2_shrinkage * var\naccum_new = accum + grad_with_shrinkage * grad_with_shrinkage\nlinear += grad_with_shrinkage +\n    (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var\nquadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2\nvar = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0\naccum = accum_new"
   is_stateful: true
 }
 op {
   name: "ResourceSparseApplyMomentum"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "accum"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "lr"
-    description: "Learning rate. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "indices"
-    description: "A vector of indices into the first dimension of var and accum."
     type_attr: "Tindices"
   }
   input_arg {
     name: "momentum"
-    description: "Momentum. Must be a scalar."
     type_attr: "T"
   }
   attr {
@@ -23413,17 +21368,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -23446,7 +21402,6 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var and accum tensors will be protected\nby a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
   attr {
     name: "use_nesterov"
@@ -23454,47 +21409,37 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, the tensor passed to compute grad will be\nvar - lr * momentum * accum, so in the end, the var you get is actually\nvar - lr * momentum * accum."
   }
-  summary: "Update relevant entries in \'*var\' and \'*accum\' according to the momentum scheme."
-  description: "Set use_nesterov = True if you want to use Nesterov momentum.\n\nThat is for rows we have grad for, we update var and accum as follows:\n\naccum = accum * momentum + grad\nvar -= lr * accum"
   is_stateful: true
 }
 op {
   name: "ResourceSparseApplyProximalAdagrad"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "accum"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "lr"
-    description: "Learning rate. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l1"
-    description: "L1 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l2"
-    description: "L2 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "indices"
-    description: "A vector of indices into the first dimension of var and accum."
     type_attr: "Tindices"
   }
   attr {
@@ -23504,17 +21449,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -23537,42 +21483,33 @@ op {
     default_value {
       b: false
     }
-    description: "If True, updating of the var and accum tensors will be protected by\na lock; otherwise the behavior is undefined, but may exhibit less contention."
   }
-  summary: "Sparse update entries in \'*var\' and \'*accum\' according to FOBOS algorithm."
-  description: "That is for rows we have grad for, we update var and accum as follows:\naccum += grad * grad\nprox_v = var\nprox_v -= lr * grad * (1 / sqrt(accum))\nvar = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}"
   is_stateful: true
 }
 op {
   name: "ResourceSparseApplyProximalGradientDescent"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "alpha"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l1"
-    description: "L1 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l2"
-    description: "L2 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "indices"
-    description: "A vector of indices into the first dimension of var and accum."
     type_attr: "Tindices"
   }
   attr {
@@ -23582,17 +21519,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -23615,37 +21553,29 @@ op {
     default_value {
       b: false
     }
-    description: "If True, the subtraction will be protected by a lock;\notherwise the behavior is undefined, but may exhibit less contention."
   }
-  summary: "Sparse update \'*var\' as FOBOS algorithm with fixed learning rate."
-  description: "That is for rows we have grad for, we update var as follows:\nprox_v = var - alpha * grad\nvar = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}"
   is_stateful: true
 }
 op {
   name: "ResourceSparseApplyRMSProp"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "ms"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "mom"
-    description: "Should be from a Variable()."
     type: DT_RESOURCE
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "rho"
-    description: "Decay rate. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
@@ -23654,17 +21584,14 @@ op {
   }
   input_arg {
     name: "epsilon"
-    description: "Ridge term. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "indices"
-    description: "A vector of indices into the first dimension of var, ms and mom."
     type_attr: "Tindices"
   }
   attr {
@@ -23674,17 +21601,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -23707,10 +21635,7 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var, ms, and mom tensors is protected\nby a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
-  summary: "Update \'*var\' according to the RMSProp algorithm."
-  description: "Note that in dense implementation of this algorithm, ms and mom will\nupdate even if the grad is zero, but in this sparse implementation, ms\nand mom will not update in iterations during which the grad is zero.\n\nmean_square = decay * mean_square + (1-decay) * gradient ** 2\nDelta = learning_rate * gradient / sqrt(mean_square + epsilon)\n\nms <- rho * ms_{t-1} + (1-rho) * grad * grad\nmom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)\nvar <- var - mom"
   is_stateful: true
 }
 op {
@@ -23784,31 +21709,25 @@ op {
       i: 0
     }
   }
-  summary: "Assign `value` to the sliced l-value reference of `ref`."
-  description: "The values of `value` are assigned to the positions in the variable\n`ref` that are selected by the slice parameters. The slice parameters\n`begin, `end`, `strides`, etc. work exactly as in `StridedSlice`.\n\nNOTE this op currently does not support broadcasting and so `value`\'s\nshape must be exactly the shape produced by the slice of `ref`."
   is_stateful: true
 }
 op {
   name: "Restore"
   input_arg {
     name: "file_pattern"
-    description: "Must have a single element. The pattern of the files from\nwhich we read the tensor."
     type: DT_STRING
   }
   input_arg {
     name: "tensor_name"
-    description: "Must have a single element. The name of the tensor to be\nrestored."
     type: DT_STRING
   }
   output_arg {
     name: "tensor"
-    description: "The restored tensor."
     type_attr: "dt"
   }
   attr {
     name: "dt"
     type: "type"
-    description: "The type of the tensor to be restored."
   }
   attr {
     name: "preferred_shard"
@@ -23816,38 +21735,30 @@ op {
     default_value {
       i: -1
     }
-    description: "Index of file to open first if multiple files match\n`file_pattern`."
   }
-  summary: "Restores a tensor from checkpoint files."
-  description: "Reads a tensor stored in one or several files. If there are several files (for\ninstance because a tensor was saved as slices), `file_pattern` may contain\nwildcard symbols (`*` and `?`) in the filename portion only, not in the\ndirectory portion.\n\nIf a `file_pattern` matches several files, `preferred_shard` can be used to hint\nin which file the requested tensor is likely to be found. This op will first\nopen the file at index `preferred_shard` in the list of matching files and try\nto restore tensors from that file.  Only if some tensors or tensor slices are\nnot found in that first file, then the Op opens all the files. Setting\n`preferred_shard` to match the value passed as the `shard` input\nof a matching `Save` Op may speed up Restore.  This attribute only affects\nperformance, not correctness.  The default value -1 means files are processed in\norder.\n\nSee also `RestoreSlice`."
   is_stateful: true
 }
 op {
   name: "RestoreSlice"
   input_arg {
     name: "file_pattern"
-    description: "Must have a single element. The pattern of the files from\nwhich we read the tensor."
     type: DT_STRING
   }
   input_arg {
     name: "tensor_name"
-    description: "Must have a single element. The name of the tensor to be\nrestored."
     type: DT_STRING
   }
   input_arg {
     name: "shape_and_slice"
-    description: "Scalar. The shapes and slice specifications to use when\nrestoring a tensors."
     type: DT_STRING
   }
   output_arg {
     name: "tensor"
-    description: "The restored tensor."
     type_attr: "dt"
   }
   attr {
     name: "dt"
     type: "type"
-    description: "The type of the tensor to be restored."
   }
   attr {
     name: "preferred_shard"
@@ -23855,60 +21766,47 @@ op {
     default_value {
       i: -1
     }
-    description: "Index of file to open first if multiple files match\n`file_pattern`. See the documentation for `Restore`."
   }
-  summary: "Restores a tensor from checkpoint files."
-  description: "This is like `Restore` except that restored tensor can be listed as filling\nonly a slice of a larger tensor.  `shape_and_slice` specifies the shape of the\nlarger tensor and the slice that the restored tensor covers.\n\nThe `shape_and_slice` input has the same format as the\nelements of the `shapes_and_slices` input of the `SaveSlices` op."
   is_stateful: true
 }
 op {
   name: "RestoreV2"
   input_arg {
     name: "prefix"
-    description: "Must have a single element.  The prefix of a V2 checkpoint."
     type: DT_STRING
   }
   input_arg {
     name: "tensor_names"
-    description: "shape {N}.  The names of the tensors to be restored."
     type: DT_STRING
   }
   input_arg {
     name: "shape_and_slices"
-    description: "shape {N}.  The slice specs of the tensors to be restored.\nEmpty strings indicate that they are non-partitioned tensors."
     type: DT_STRING
   }
   output_arg {
     name: "tensors"
-    description: "shape {N}.  The restored tensors, whose shapes are read from the\ncheckpoint directly."
     type_list_attr: "dtypes"
   }
   attr {
     name: "dtypes"
     type: "list(type)"
-    description: "shape {N}.  The list of expected dtype for the tensors.  Must match\nthose stored in the checkpoint."
     has_minimum: true
     minimum: 1
   }
-  summary: "Restores tensors from a V2 checkpoint."
-  description: "For backward compatibility with the V1 format, this Op currently allows\nrestoring from a V1 checkpoint as well:\n  - This Op first attempts to find the V2 index file pointed to by \"prefix\", and\n    if found proceed to read it as a V2 checkpoint;\n  - Otherwise the V1 read path is invoked.\nRelying on this behavior is not recommended, as the ability to fall back to read\nV1 might be deprecated and eventually removed.\n\nBy default, restores the named tensors in full.  If the caller wishes to restore\nspecific slices of stored tensors, \"shape_and_slices\" should be non-empty\nstrings and correspondingly well-formed.\n\nCallers must ensure all the named tensors are indeed stored in the checkpoint."
   is_stateful: true
 }
 op {
   name: "Reverse"
   input_arg {
     name: "tensor"
-    description: "Up to 8-D."
     type_attr: "T"
   }
   input_arg {
     name: "dims"
-    description: "1-D. The dimensions to reverse."
     type: DT_BOOL
   }
   output_arg {
     name: "output"
-    description: "The same shape as `tensor`."
     type_attr: "T"
   }
   attr {
@@ -23932,30 +21830,24 @@ op {
       }
     }
   }
-  summary: "Reverses specific dimensions of a tensor."
-  description: "Given a `tensor`, and a `bool` tensor `dims` representing the dimensions\nof `tensor`, this operation reverses each dimension i of `tensor` where\n`dims[i]` is `True`.\n\n`tensor` can have up to 8 dimensions. The number of dimensions\nof `tensor` must equal the number of elements in `dims`. In other words:\n\n`rank(tensor) = size(dims)`\n\nFor example:\n\n```\n# tensor \'t\' is [[[[ 0,  1,  2,  3],\n#                  [ 4,  5,  6,  7],\n#                  [ 8,  9, 10, 11]],\n#                 [[12, 13, 14, 15],\n#                  [16, 17, 18, 19],\n#                  [20, 21, 22, 23]]]]\n# tensor \'t\' shape is [1, 2, 3, 4]\n\n# \'dims\' is [False, False, False, True]\nreverse(t, dims) ==> [[[[ 3,  2,  1,  0],\n                        [ 7,  6,  5,  4],\n                        [ 11, 10, 9, 8]],\n                       [[15, 14, 13, 12],\n                        [19, 18, 17, 16],\n                        [23, 22, 21, 20]]]]\n\n# \'dims\' is [False, True, False, False]\nreverse(t, dims) ==> [[[[12, 13, 14, 15],\n                        [16, 17, 18, 19],\n                        [20, 21, 22, 23]\n                       [[ 0,  1,  2,  3],\n                        [ 4,  5,  6,  7],\n                        [ 8,  9, 10, 11]]]]\n\n# \'dims\' is [False, False, True, False]\nreverse(t, dims) ==> [[[[8, 9, 10, 11],\n                        [4, 5, 6, 7],\n                        [0, 1, 2, 3]]\n                       [[20, 21, 22, 23],\n                        [16, 17, 18, 19],\n                        [12, 13, 14, 15]]]]\n```"
 }
 op {
   name: "ReverseSequence"
   input_arg {
     name: "input"
-    description: "The input to reverse."
     type_attr: "T"
   }
   input_arg {
     name: "seq_lengths"
-    description: "1-D with length `input.dims(batch_dim)` and\n`max(seq_lengths) <= input.dims(seq_dim)`"
     type_attr: "Tlen"
   }
   output_arg {
     name: "output"
-    description: "The partially reversed input. It has the same shape as `input`."
     type_attr: "T"
   }
   attr {
     name: "seq_dim"
     type: "int"
-    description: "The dimension which is partially reversed."
   }
   attr {
     name: "batch_dim"
@@ -23963,7 +21855,6 @@ op {
     default_value {
       i: 0
     }
-    description: "The dimension along which reversal is performed."
   }
   attr {
     name: "T"
@@ -23982,24 +21873,19 @@ op {
       }
     }
   }
-  summary: "Reverses variable length slices."
-  description: "This op first slices `input` along the dimension `batch_dim`, and for each\nslice `i`, reverses the first `seq_lengths[i]` elements along\nthe dimension `seq_dim`.\n\nThe elements of `seq_lengths` must obey `seq_lengths[i] <= input.dims[seq_dim]`,\nand `seq_lengths` must be a vector of length `input.dims[batch_dim]`.\n\nThe output slice `i` along dimension `batch_dim` is then given by input\nslice `i`, with the first `seq_lengths[i]` slices along dimension\n`seq_dim` reversed.\n\nFor example:\n\n```\n# Given this:\nbatch_dim = 0\nseq_dim = 1\ninput.dims = (4, 8, ...)\nseq_lengths = [7, 2, 3, 5]\n\n# then slices of input are reversed on seq_dim, but only up to seq_lengths:\noutput[0, 0:7, :, ...] = input[0, 7:0:-1, :, ...]\noutput[1, 0:2, :, ...] = input[1, 2:0:-1, :, ...]\noutput[2, 0:3, :, ...] = input[2, 3:0:-1, :, ...]\noutput[3, 0:5, :, ...] = input[3, 5:0:-1, :, ...]\n\n# while entries past seq_lens are copied through:\noutput[0, 7:, :, ...] = input[0, 7:, :, ...]\noutput[1, 2:, :, ...] = input[1, 2:, :, ...]\noutput[2, 3:, :, ...] = input[2, 3:, :, ...]\noutput[3, 2:, :, ...] = input[3, 2:, :, ...]\n```\n\nIn contrast, if:\n\n```\n# Given this:\nbatch_dim = 2\nseq_dim = 0\ninput.dims = (8, ?, 4, ...)\nseq_lengths = [7, 2, 3, 5]\n\n# then slices of input are reversed on seq_dim, but only up to seq_lengths:\noutput[0:7, :, 0, :, ...] = input[7:0:-1, :, 0, :, ...]\noutput[0:2, :, 1, :, ...] = input[2:0:-1, :, 1, :, ...]\noutput[0:3, :, 2, :, ...] = input[3:0:-1, :, 2, :, ...]\noutput[0:5, :, 3, :, ...] = input[5:0:-1, :, 3, :, ...]\n\n# while entries past seq_lens are copied through:\noutput[7:, :, 0, :, ...] = input[7:, :, 0, :, ...]\noutput[2:, :, 1, :, ...] = input[2:, :, 1, :, ...]\noutput[3:, :, 2, :, ...] = input[3:, :, 2, :, ...]\noutput[2:, :, 3, :, ...] = input[2:, :, 3, :, ...]\n```"
 }
 op {
   name: "ReverseV2"
   input_arg {
     name: "tensor"
-    description: "Up to 8-D."
     type_attr: "T"
   }
   input_arg {
     name: "axis"
-    description: "1-D. The indices of the dimensions to reverse. Must be in the range\n`[-rank(tensor), rank(tensor))`."
     type_attr: "Tidx"
   }
   output_arg {
     name: "output"
-    description: "The same shape as `tensor`."
     type_attr: "T"
   }
   attr {
@@ -24028,6 +21914,7 @@ op {
         type: DT_INT64
         type: DT_BOOL
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -24036,8 +21923,6 @@ op {
       }
     }
   }
-  summary: "Reverses specific dimensions of a tensor."
-  description: "NOTE `tf.reverse` has now changed behavior in preparation for 1.0.\n`tf.reverse_v2` is currently an alias that will be deprecated before TF 1.0.\n\nGiven a `tensor`, and a `int32` tensor `axis` representing the set of\ndimensions of `tensor` to reverse. This operation reverses each dimension\n`i` for which there exists `j` s.t. `axis[j] == i`.\n\n`tensor` can have up to 8 dimensions. The number of dimensions specified\nin `axis` may be 0 or more entries. If an index is specified more than\nonce, a InvalidArgument error is raised.\n\nFor example:\n\n```\n# tensor \'t\' is [[[[ 0,  1,  2,  3],\n#                  [ 4,  5,  6,  7],\n#                  [ 8,  9, 10, 11]],\n#                 [[12, 13, 14, 15],\n#                  [16, 17, 18, 19],\n#                  [20, 21, 22, 23]]]]\n# tensor \'t\' shape is [1, 2, 3, 4]\n\n# \'dims\' is [3] or \'dims\' is [-1]\nreverse(t, dims) ==> [[[[ 3,  2,  1,  0],\n                        [ 7,  6,  5,  4],\n                        [ 11, 10, 9, 8]],\n                       [[15, 14, 13, 12],\n                        [19, 18, 17, 16],\n                        [23, 22, 21, 20]]]]\n\n# \'dims\' is \'[1]\' (or \'dims\' is \'[-3]\')\nreverse(t, dims) ==> [[[[12, 13, 14, 15],\n                        [16, 17, 18, 19],\n                        [20, 21, 22, 23]\n                       [[ 0,  1,  2,  3],\n                        [ 4,  5,  6,  7],\n                        [ 8,  9, 10, 11]]]]\n\n# \'dims\' is \'[2]\' (or \'dims\' is \'[-2]\')\nreverse(t, dims) ==> [[[[8, 9, 10, 11],\n                        [4, 5, 6, 7],\n                        [0, 1, 2, 3]]\n                       [[20, 21, 22, 23],\n                        [16, 17, 18, 19],\n                        [12, 13, 14, 15]]]]\n```"
 }
 op {
   name: "RightShift"
@@ -24069,8 +21954,6 @@ op {
       }
     }
   }
-  summary: "Elementwise computes the bitwise right-shift of `x` and `y`."
-  description: "Performs a logical shift for unsigned integer types, and an arithmetic shift\nfor signed integer types.\n\nIf `y` is negative, or greater than or equal to than the width of `x` in bits\nthe result is implementation defined."
   is_commutative: true
 }
 op {
@@ -24088,13 +21971,12 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Returns element-wise integer closest to x."
-  description: "If the result is midway between two representable values,\nthe even representable is chosen.\nFor example:\n\n```\nrint(-1.5) ==> -2.0\nrint(0.5000001) ==> 1.0\nrint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) ==> [-2., -2., -0., 0., 2., 2., 2.]\n```"
 }
 op {
   name: "Round"
@@ -24112,6 +21994,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -24121,8 +22004,6 @@ op {
       }
     }
   }
-  summary: "Rounds the values of a tensor to the nearest integer, element-wise."
-  description: "Rounds half to even.  Also known as bankers rounding. If you want to round\naccording to the current system rounding mode use std::cint."
 }
 op {
   name: "Rsqrt"
@@ -24140,6 +22021,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -24147,8 +22029,6 @@ op {
       }
     }
   }
-  summary: "Computes reciprocal of square root of x element-wise."
-  description: "I.e., \\\\(y = 1 / \\sqrt{x}\\\\)."
 }
 op {
   name: "RsqrtGrad"
@@ -24170,6 +22050,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -24177,34 +22058,27 @@ op {
       }
     }
   }
-  summary: "Computes the gradient for the rsqrt of `x` wrt its input."
-  description: "Specifically, `grad = dy * -0.5 * y^3`, where `y = rsqrt(x)`, and `dy`\nis the corresponding input gradient."
 }
 op {
   name: "SampleDistortedBoundingBox"
   input_arg {
     name: "image_size"
-    description: "1-D, containing `[height, width, channels]`."
     type_attr: "T"
   }
   input_arg {
     name: "bounding_boxes"
-    description: "3-D with shape `[batch, N, 4]` describing the N bounding boxes\nassociated with the image."
     type: DT_FLOAT
   }
   output_arg {
     name: "begin"
-    description: "1-D, containing `[offset_height, offset_width, 0]`. Provide as input to\n`tf.slice`."
     type_attr: "T"
   }
   output_arg {
     name: "size"
-    description: "1-D, containing `[target_height, target_width, -1]`. Provide as input to\n`tf.slice`."
     type_attr: "T"
   }
   output_arg {
     name: "bboxes"
-    description: "3-D with shape `[1, 1, 4]` containing the distorted bounding box.\nProvide as input to `tf.image.draw_bounding_boxes`."
     type: DT_FLOAT
   }
   attr {
@@ -24226,7 +22100,6 @@ op {
     default_value {
       i: 0
     }
-    description: "If either `seed` or `seed2` are set to non-zero, the random number\ngenerator is seeded by the given `seed`.  Otherwise, it is seeded by a random\nseed."
   }
   attr {
     name: "seed2"
@@ -24234,7 +22107,6 @@ op {
     default_value {
       i: 0
     }
-    description: "A second seed to avoid seed collision."
   }
   attr {
     name: "min_object_covered"
@@ -24242,7 +22114,6 @@ op {
     default_value {
       f: 0.1
     }
-    description: "The cropped area of the image must contain at least this\nfraction of any bounding box supplied. The value of this parameter should be\nnon-negative. In the case of 0, the cropped area does not need to overlap\nany of the bounding boxes supplied."
   }
   attr {
     name: "aspect_ratio_range"
@@ -24253,7 +22124,6 @@ op {
         f: 1.33
       }
     }
-    description: "The cropped area of the image must have an aspect ratio =\nwidth / height within this range."
   }
   attr {
     name: "area_range"
@@ -24264,7 +22134,6 @@ op {
         f: 1
       }
     }
-    description: "The cropped area of the image must contain a fraction of the\nsupplied image within in this range."
   }
   attr {
     name: "max_attempts"
@@ -24272,7 +22141,6 @@ op {
     default_value {
       i: 100
     }
-    description: "Number of attempts at generating a cropped region of the image\nof the specified constraints. After `max_attempts` failures, return the entire\nimage."
   }
   attr {
     name: "use_image_if_no_bounding_boxes"
@@ -24280,42 +22148,33 @@ op {
     default_value {
       b: false
     }
-    description: "Controls behavior if no bounding boxes supplied.\nIf true, assume an implicit bounding box covering the whole input. If false,\nraise an error."
   }
-  summary: "Generate a single randomly distorted bounding box for an image."
-  description: "Bounding box annotations are often supplied in addition to ground-truth labels\nin image recognition or object localization tasks. A common technique for\ntraining such a system is to randomly distort an image while preserving\nits content, i.e. *data augmentation*. This Op outputs a randomly distorted\nlocalization of an object, i.e. bounding box, given an `image_size`,\n`bounding_boxes` and a series of constraints.\n\nThe output of this Op is a single bounding box that may be used to crop the\noriginal image. The output is returned as 3 tensors: `begin`, `size` and\n`bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the\nimage. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize\nwhat the bounding box looks like.\n\nBounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The\nbounding box coordinates are floats in `[0.0, 1.0]` relative to the width and\nheight of the underlying image.\n\nFor example,\n\n```python\n    # Generate a single distorted bounding box.\n    begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(\n        tf.shape(image),\n        bounding_boxes=bounding_boxes)\n\n    # Draw the bounding box in an image summary.\n    image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),\n                                                  bbox_for_draw)\n    tf.image_summary(\'images_with_box\', image_with_box)\n\n    # Employ the bounding box to distort the image.\n    distorted_image = tf.slice(image, begin, size)\n```\n\nNote that if no bounding box information is available, setting\n`use_image_if_no_bounding_boxes = true` will assume there is a single implicit\nbounding box covering the whole image. If `use_image_if_no_bounding_boxes` is\nfalse and no bounding boxes are supplied, an error is raised."
   is_stateful: true
 }
 op {
   name: "SampleDistortedBoundingBoxV2"
   input_arg {
     name: "image_size"
-    description: "1-D, containing `[height, width, channels]`."
     type_attr: "T"
   }
   input_arg {
     name: "bounding_boxes"
-    description: "3-D with shape `[batch, N, 4]` describing the N bounding boxes\nassociated with the image."
     type: DT_FLOAT
   }
   input_arg {
     name: "min_object_covered"
-    description: "The cropped area of the image must contain at least this\nfraction of any bounding box supplied. The value of this parameter should be\nnon-negative. In the case of 0, the cropped area does not need to overlap\nany of the bounding boxes supplied."
     type: DT_FLOAT
   }
   output_arg {
     name: "begin"
-    description: "1-D, containing `[offset_height, offset_width, 0]`. Provide as input to\n`tf.slice`."
     type_attr: "T"
   }
   output_arg {
     name: "size"
-    description: "1-D, containing `[target_height, target_width, -1]`. Provide as input to\n`tf.slice`."
     type_attr: "T"
   }
   output_arg {
     name: "bboxes"
-    description: "3-D with shape `[1, 1, 4]` containing the distorted bounding box.\nProvide as input to `tf.image.draw_bounding_boxes`."
     type: DT_FLOAT
   }
   attr {
@@ -24337,7 +22196,6 @@ op {
     default_value {
       i: 0
     }
-    description: "If either `seed` or `seed2` are set to non-zero, the random number\ngenerator is seeded by the given `seed`.  Otherwise, it is seeded by a random\nseed."
   }
   attr {
     name: "seed2"
@@ -24345,7 +22203,6 @@ op {
     default_value {
       i: 0
     }
-    description: "A second seed to avoid seed collision."
   }
   attr {
     name: "aspect_ratio_range"
@@ -24356,7 +22213,6 @@ op {
         f: 1.33
       }
     }
-    description: "The cropped area of the image must have an aspect ratio =\nwidth / height within this range."
   }
   attr {
     name: "area_range"
@@ -24367,7 +22223,6 @@ op {
         f: 1
       }
     }
-    description: "The cropped area of the image must contain a fraction of the\nsupplied image within in this range."
   }
   attr {
     name: "max_attempts"
@@ -24375,7 +22230,6 @@ op {
     default_value {
       i: 100
     }
-    description: "Number of attempts at generating a cropped region of the image\nof the specified constraints. After `max_attempts` failures, return the entire\nimage."
   }
   attr {
     name: "use_image_if_no_bounding_boxes"
@@ -24383,27 +22237,21 @@ op {
     default_value {
       b: false
     }
-    description: "Controls behavior if no bounding boxes supplied.\nIf true, assume an implicit bounding box covering the whole input. If false,\nraise an error."
   }
-  summary: "Generate a single randomly distorted bounding box for an image."
-  description: "Bounding box annotations are often supplied in addition to ground-truth labels\nin image recognition or object localization tasks. A common technique for\ntraining such a system is to randomly distort an image while preserving\nits content, i.e. *data augmentation*. This Op outputs a randomly distorted\nlocalization of an object, i.e. bounding box, given an `image_size`,\n`bounding_boxes` and a series of constraints.\n\nThe output of this Op is a single bounding box that may be used to crop the\noriginal image. The output is returned as 3 tensors: `begin`, `size` and\n`bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the\nimage. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize\nwhat the bounding box looks like.\n\nBounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The\nbounding box coordinates are floats in `[0.0, 1.0]` relative to the width and\nheight of the underlying image.\n\nFor example,\n\n```python\n    # Generate a single distorted bounding box.\n    begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(\n        tf.shape(image),\n        bounding_boxes=bounding_boxes)\n\n    # Draw the bounding box in an image summary.\n    image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),\n                                                  bbox_for_draw)\n    tf.image_summary(\'images_with_box\', image_with_box)\n\n    # Employ the bounding box to distort the image.\n    distorted_image = tf.slice(image, begin, size)\n```\n\nNote that if no bounding box information is available, setting\n`use_image_if_no_bounding_boxes = true` will assume there is a single implicit\nbounding box covering the whole image. If `use_image_if_no_bounding_boxes` is\nfalse and no bounding boxes are supplied, an error is raised."
   is_stateful: true
 }
 op {
   name: "Save"
   input_arg {
     name: "filename"
-    description: "Must have a single element. The name of the file to which we write\nthe tensor."
     type: DT_STRING
   }
   input_arg {
     name: "tensor_names"
-    description: "Shape `[N]`. The names of the tensors to be saved."
     type: DT_STRING
   }
   input_arg {
     name: "data"
-    description: "`N` tensors to save."
     type_list_attr: "T"
   }
   attr {
@@ -24412,30 +22260,24 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Saves the input tensors to disk."
-  description: "The size of `tensor_names` must match the number of tensors in `data`. `data[i]`\nis written to `filename` with name `tensor_names[i]`.\n\nSee also `SaveSlices`."
   is_stateful: true
 }
 op {
   name: "SaveSlices"
   input_arg {
     name: "filename"
-    description: "Must have a single element. The name of the file to which we write the\ntensor."
     type: DT_STRING
   }
   input_arg {
     name: "tensor_names"
-    description: "Shape `[N]`. The names of the tensors to be saved."
     type: DT_STRING
   }
   input_arg {
     name: "shapes_and_slices"
-    description: "Shape `[N]`.  The shapes and slice specifications to use when\nsaving the tensors."
     type: DT_STRING
   }
   input_arg {
     name: "data"
-    description: "`N` tensors to save."
     type_list_attr: "T"
   }
   attr {
@@ -24444,30 +22286,24 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Saves input tensors slices to disk."
-  description: "This is like `Save` except that tensors can be listed in the saved file as being\na slice of a larger tensor.  `shapes_and_slices` specifies the shape of the\nlarger tensor and the slice that this tensor covers. `shapes_and_slices` must\nhave as many elements as `tensor_names`.\n\nElements of the `shapes_and_slices` input must either be:\n\n*  The empty string, in which case the corresponding tensor is\n   saved normally.\n*  A string of the form `dim0 dim1 ... dimN-1 slice-spec` where the\n   `dimI` are the dimensions of the larger tensor and `slice-spec`\n   specifies what part is covered by the tensor to save.\n\n`slice-spec` itself is a `:`-separated list: `slice0:slice1:...:sliceN-1`\nwhere each `sliceI` is either:\n\n*  The string `-` meaning that the slice covers all indices of this dimension\n*  `start,length` where `start` and `length` are integers.  In that\n   case the slice covers `length` indices starting at `start`.\n\nSee also `Save`."
   is_stateful: true
 }
 op {
   name: "SaveV2"
   input_arg {
     name: "prefix"
-    description: "Must have a single element. The prefix of the V2 checkpoint to which we\nwrite the tensors."
     type: DT_STRING
   }
   input_arg {
     name: "tensor_names"
-    description: "shape {N}. The names of the tensors to be saved."
     type: DT_STRING
   }
   input_arg {
     name: "shape_and_slices"
-    description: "shape {N}.  The slice specs of the tensors to be saved.\nEmpty strings indicate that they are non-partitioned tensors."
     type: DT_STRING
   }
   input_arg {
     name: "tensors"
-    description: "`N` tensors to save."
     type_list_attr: "dtypes"
   }
   attr {
@@ -24476,25 +22312,20 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Saves tensors in V2 checkpoint format."
-  description: "By default, saves the named tensors in full.  If the caller wishes to save\nspecific slices of full tensors, \"shape_and_slices\" should be non-empty strings\nand correspondingly well-formed."
   is_stateful: true
 }
 op {
   name: "ScalarSummary"
   input_arg {
     name: "tags"
-    description: "Tags for the summary."
     type: DT_STRING
   }
   input_arg {
     name: "values"
-    description: "Same shape as `tags.  Values for the summary."
     type_attr: "T"
   }
   output_arg {
     name: "summary"
-    description: "Scalar.  Serialized `Summary` protocol buffer."
     type: DT_STRING
   }
   attr {
@@ -24505,10 +22336,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -24516,8 +22348,6 @@ op {
       }
     }
   }
-  summary: "Outputs a `Summary` protocol buffer with scalar values."
-  description: "The input `tags` and `values` must have the same shape.  The generated summary\nhas a summary value for each tag-value pair in `tags` and `values`."
 }
 op {
   name: "ScanDataset"
@@ -24564,29 +22394,24 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Creates a dataset successively reduces `f` over the elements of `input_dataset`."
 }
 op {
   name: "ScatterAdd"
   input_arg {
     name: "ref"
-    description: "Should be from a `Variable` node."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "indices"
-    description: "A tensor of indices into the first dimension of `ref`."
     type_attr: "Tindices"
   }
   input_arg {
     name: "updates"
-    description: "A tensor of updated values to add to `ref`."
     type_attr: "T"
   }
   output_arg {
     name: "output_ref"
-    description: "= Same as `ref`.  Returned as a convenience for operations that want\nto use the updated values after the update is done."
     type_attr: "T"
     is_ref: true
   }
@@ -24597,17 +22422,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -24630,32 +22456,25 @@ op {
     default_value {
       b: false
     }
-    description: "If True, the addition will be protected by a lock;\notherwise the behavior is undefined, but may exhibit less contention."
   }
-  summary: "Adds sparse updates to a variable reference."
-  description: "This operation computes\n\n    # Scalar indices\n    ref[indices, ...] += updates[...]\n\n    # Vector indices (for each i)\n    ref[indices[i], ...] += updates[i, ...]\n\n    # High rank indices (for each i, ..., j)\n    ref[indices[i, ..., j], ...] += updates[i, ..., j, ...]\n\nThis operation outputs `ref` after the update is done.\nThis makes it easier to chain operations that need to use the reset value.\n\nDuplicate entries are handled correctly: if multiple `indices` reference\nthe same location, their contributions add.\n\nRequires `updates.shape = indices.shape + ref.shape[1:]`.\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\"https://www.tensorflow.org/images/ScatterAdd.png\" alt>\n</div>"
 }
 op {
   name: "ScatterDiv"
   input_arg {
     name: "ref"
-    description: "Should be from a `Variable` node."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "indices"
-    description: "A tensor of indices into the first dimension of `ref`."
     type_attr: "Tindices"
   }
   input_arg {
     name: "updates"
-    description: "A tensor of values that `ref` is divided by."
     type_attr: "T"
   }
   output_arg {
     name: "output_ref"
-    description: "= Same as `ref`.  Returned as a convenience for operations that want\nto use the updated values after the update is done."
     type_attr: "T"
     is_ref: true
   }
@@ -24666,17 +22485,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -24699,32 +22519,25 @@ op {
     default_value {
       b: false
     }
-    description: "If True, the operation will be protected by a lock;\notherwise the behavior is undefined, but may exhibit less contention."
   }
-  summary: "Divides a variable reference by sparse updates."
-  description: "This operation computes\n\n```python\n    # Scalar indices\n    ref[indices, ...] /= updates[...]\n\n    # Vector indices (for each i)\n    ref[indices[i], ...] /= updates[i, ...]\n\n    # High rank indices (for each i, ..., j)\n    ref[indices[i, ..., j], ...] /= updates[i, ..., j, ...]\n```\n\nThis operation outputs `ref` after the update is done.\nThis makes it easier to chain operations that need to use the reset value.\n\nDuplicate entries are handled correctly: if multiple `indices` reference\nthe same location, their contributions divide.\n\nRequires `updates.shape = indices.shape + ref.shape[1:]`."
 }
 op {
   name: "ScatterMul"
   input_arg {
     name: "ref"
-    description: "Should be from a `Variable` node."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "indices"
-    description: "A tensor of indices into the first dimension of `ref`."
     type_attr: "Tindices"
   }
   input_arg {
     name: "updates"
-    description: "A tensor of updated values to multiply to `ref`."
     type_attr: "T"
   }
   output_arg {
     name: "output_ref"
-    description: "= Same as `ref`.  Returned as a convenience for operations that want\nto use the updated values after the update is done."
     type_attr: "T"
     is_ref: true
   }
@@ -24735,17 +22548,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -24768,31 +22582,24 @@ op {
     default_value {
       b: false
     }
-    description: "If True, the operation will be protected by a lock;\notherwise the behavior is undefined, but may exhibit less contention."
   }
-  summary: "Multiplies sparse updates into a variable reference."
-  description: "This operation computes\n\n```python\n    # Scalar indices\n    ref[indices, ...] *= updates[...]\n\n    # Vector indices (for each i)\n    ref[indices[i], ...] *= updates[i, ...]\n\n    # High rank indices (for each i, ..., j)\n    ref[indices[i, ..., j], ...] *= updates[i, ..., j, ...]\n```\n\nThis operation outputs `ref` after the update is done.\nThis makes it easier to chain operations that need to use the reset value.\n\nDuplicate entries are handled correctly: if multiple `indices` reference\nthe same location, their contributions multiply.\n\nRequires `updates.shape = indices.shape + ref.shape[1:]`."
 }
 op {
   name: "ScatterNd"
   input_arg {
     name: "indices"
-    description: "Index tensor."
     type_attr: "Tindices"
   }
   input_arg {
     name: "updates"
-    description: "Updates to scatter into output."
     type_attr: "T"
   }
   input_arg {
     name: "shape"
-    description: "1-D. The shape of the resulting tensor."
     type_attr: "Tindices"
   }
   output_arg {
     name: "output"
-    description: "A new tensor with the given shape and updates applied according\nto the indices."
     type_attr: "T"
   }
   attr {
@@ -24809,30 +22616,24 @@ op {
       }
     }
   }
-  summary: "Scatter `updates` into a new (initially zero) tensor according to `indices`."
-  description: "Creates a new tensor by applying sparse `updates` to individual\nvalues or slices within a zero tensor of the given `shape` according to\nindices.  This operator is the inverse of the @{tf.gather_nd} operator which\nextracts values or slices from a given tensor.\n\n**WARNING**: The order in which updates are applied is nondeterministic, so the\noutput will be nondeterministic if `indices` contains duplicates.\n\n`indices` is an integer tensor containing indices into a new tensor of shape\n`shape`.  The last dimension of `indices` can be at most the rank of `shape`:\n\n    indices.shape[-1] <= shape.rank\n\nThe last dimension of `indices` corresponds to indices into elements\n(if `indices.shape[-1] = shape.rank`) or slices\n(if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of\n`shape`.  `updates` is a tensor with shape\n\n    indices.shape[:-1] + shape[indices.shape[-1]:]\n\nThe simplest form of scatter is to insert individual elements in a tensor by\nindex. For example, say we want to insert 4 scattered elements in a rank-1\ntensor with 8 elements.\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\"https://www.tensorflow.org/images/ScatterNd1.png\" alt>\n</div>\n\nIn Python, this scatter operation would look like this:\n\n```python\n    indices = tf.constant([[4], [3], [1], [7]])\n    updates = tf.constant([9, 10, 11, 12])\n    shape = tf.constant([8])\n    scatter = tf.scatter_nd(indices, updates, shape)\n    with tf.Session() as sess:\n      print(sess.run(scatter))\n```\n\nThe resulting tensor would look like this:\n\n    [0, 11, 0, 10, 9, 0, 0, 12]\n\nWe can also, insert entire slices of a higher rank tensor all at once. For\nexample, if we wanted to insert two slices in the first dimension of a\nrank-3 tensor with two matrices of new values.\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\"https://www.tensorflow.org/images/ScatterNd2.png\" alt>\n</div>\n\nIn Python, this scatter operation would look like this:\n\n```python\n    indices = tf.constant([[0], [2]])\n    updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6],\n                            [7, 7, 7, 7], [8, 8, 8, 8]],\n                           [[5, 5, 5, 5], [6, 6, 6, 6],\n                            [7, 7, 7, 7], [8, 8, 8, 8]]])\n    shape = tf.constant([4, 4, 4])\n    scatter = tf.scatter_nd(indices, updates, shape)\n    with tf.Session() as sess:\n      print(sess.run(scatter))\n```\n\nThe resulting tensor would look like this:\n\n    [[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],\n     [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],\n     [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],\n     [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]]"
 }
 op {
   name: "ScatterNdAdd"
   input_arg {
     name: "ref"
-    description: "A mutable Tensor. Should be from a Variable node."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "indices"
-    description: "A Tensor. Must be one of the following types: int32, int64.\nA tensor of indices into ref."
     type_attr: "Tindices"
   }
   input_arg {
     name: "updates"
-    description: "A Tensor. Must have the same type as ref. A tensor of updated values\nto add to ref."
     type_attr: "T"
   }
   output_arg {
     name: "output_ref"
-    description: "Same as ref. Returned as a convenience for operations that want\nto use the updated values after the update is done."
     type_attr: "T"
     is_ref: true
   }
@@ -24843,17 +22644,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -24876,31 +22678,24 @@ op {
     default_value {
       b: false
     }
-    description: "An optional bool. Defaults to True. If True, the assignment will\nbe protected by a lock; otherwise the behavior is undefined,\nbut may exhibit less contention."
   }
-  summary: "Applies sparse addition between `updates` and individual values or slices"
-  description: "within a given variable according to `indices`.\n\n`ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.\n\n`indices` must be integer tensor, containing indices into `ref`.\nIt must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.\n\nThe innermost dimension of `indices` (with length `K`) corresponds to\nindices into elements (if `K = P`) or slices (if `K < P`) along the `K`th\ndimension of `ref`.\n\n`updates` is `Tensor` of rank `Q-1+P-K` with shape:\n\n```\n[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].\n```\n\nFor example, say we want to add 4 scattered elements to a rank-1 tensor to 8\nelements. In Python, that addition would look like this:\n\n    ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8])\n    indices = tf.constant([[4], [3], [1], [7]])\n    updates = tf.constant([9, 10, 11, 12])\n    add = tf.scatter_nd_add(ref, indices, updates)\n    with tf.Session() as sess:\n      print sess.run(add)\n\nThe resulting update to ref would look like this:\n\n    [1, 13, 3, 14, 14, 6, 7, 20]\n\nSee @{tf.scatter_nd} for more details about how to make updates to\nslices."
 }
 op {
   name: "ScatterNdNonAliasingAdd"
   input_arg {
     name: "input"
-    description: "A Tensor."
     type_attr: "T"
   }
   input_arg {
     name: "indices"
-    description: "A Tensor. Must be one of the following types: `int32`, `int64`.\nA tensor of indices into `input`."
     type_attr: "Tindices"
   }
   input_arg {
     name: "updates"
-    description: "A Tensor. Must have the same type as ref. A tensor of updated values\nto add to `input`."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "A `Tensor` with the same shape as `input`, containing values of `input`\nupdated with `updates`."
     type_attr: "T"
   }
   attr {
@@ -24910,17 +22705,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -24937,30 +22733,24 @@ op {
       }
     }
   }
-  summary: "Applies sparse addition to `input` using individual values or slices"
-  description: "from `updates` according to indices `indices`.  The updates are non-aliasing:\n`input` is only modified in-place if no other operations will use it.\nOtherwise, a copy of `input` is made.  This operation has a gradient with\nrespect to both `input` and `updates`.\n\n`input` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.\n\n`indices` must be integer tensor, containing indices into `input`.\nIt must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.\n\nThe innermost dimension of `indices` (with length `K`) corresponds to\nindices into elements (if `K = P`) or `(P-K)`-dimensional slices\n(if `K < P`) along the `K`th dimension of `input`.\n\n`updates` is `Tensor` of rank `Q-1+P-K` with shape:\n\n```\n[d_0, ..., d_{Q-2}, input.shape[K], ..., input.shape[P-1]].\n```\n\nFor example, say we want to add 4 scattered elements to a rank-1 tensor to 8\nelements. In Python, that addition would look like this:\n\n    input = tf.constant([1, 2, 3, 4, 5, 6, 7, 8])\n    indices = tf.constant([[4], [3], [1], [7]])\n    updates = tf.constant([9, 10, 11, 12])\n    output = tf.scatter_nd_non_aliasing_add(input, indices, updates)\n    with tf.Session() as sess:\n      print(sess.run(output))\n\nThe resulting value `output` would look like this:\n\n    [1, 13, 3, 14, 14, 6, 7, 20]\n\nSee @{tf.scatter_nd} for more details about how to make updates to slices."
 }
 op {
   name: "ScatterNdSub"
   input_arg {
     name: "ref"
-    description: "A mutable Tensor. Should be from a Variable node."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "indices"
-    description: "A Tensor. Must be one of the following types: int32, int64.\nA tensor of indices into ref."
     type_attr: "Tindices"
   }
   input_arg {
     name: "updates"
-    description: "A Tensor. Must have the same type as ref. A tensor of updated values\nto subtract from ref."
     type_attr: "T"
   }
   output_arg {
     name: "output_ref"
-    description: "Same as ref. Returned as a convenience for operations that want\nto use the updated values after the update is done."
     type_attr: "T"
     is_ref: true
   }
@@ -24971,17 +22761,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -25004,32 +22795,25 @@ op {
     default_value {
       b: false
     }
-    description: "An optional bool. Defaults to True. If True, the assignment will\nbe protected by a lock; otherwise the behavior is undefined,\nbut may exhibit less contention."
   }
-  summary: "Applies sparse subtraction between `updates` and individual values or slices"
-  description: "within a given variable according to `indices`.\n\n`ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.\n\n`indices` must be integer tensor, containing indices into `ref`.\nIt must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.\n\nThe innermost dimension of `indices` (with length `K`) corresponds to\nindices into elements (if `K = P`) or slices (if `K < P`) along the `K`th\ndimension of `ref`.\n\n`updates` is `Tensor` of rank `Q-1+P-K` with shape:\n\n```\n[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].\n```\n\nFor example, say we want to subtract 4 scattered elements from a rank-1 tensor\nwith 8 elements. In Python, that subtraction would look like this:\n\n    ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8])\n    indices = tf.constant([[4], [3], [1], [7]])\n    updates = tf.constant([9, 10, 11, 12])\n    sub = tf.scatter_nd_sub(ref, indices, updates)\n    with tf.Session() as sess:\n      print sess.run(sub)\n\nThe resulting update to ref would look like this:\n\n    [1, -9, 3, -6, -4, 6, 7, -4]\n\nSee @{tf.scatter_nd} for more details about how to make updates to\nslices."
 }
 op {
   name: "ScatterNdUpdate"
   input_arg {
     name: "ref"
-    description: "A mutable Tensor. Should be from a Variable node."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "indices"
-    description: "A Tensor. Must be one of the following types: int32, int64.\nA tensor of indices into ref."
     type_attr: "Tindices"
   }
   input_arg {
     name: "updates"
-    description: "A Tensor. Must have the same type as ref. A tensor of updated\nvalues to add to ref."
     type_attr: "T"
   }
   output_arg {
     name: "output_ref"
-    description: "Same as ref. Returned as a convenience for operations that want to\nuse the updated values after the update is done."
     type_attr: "T"
     is_ref: true
   }
@@ -25053,32 +22837,25 @@ op {
     default_value {
       b: true
     }
-    description: "An optional bool. Defaults to True. If True, the assignment will\nbe protected by a lock; otherwise the behavior is undefined,\nbut may exhibit less contention."
   }
-  summary: "Applies sparse `updates` to individual values or slices within a given"
-  description: "variable according to `indices`.\n\n`ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.\n\n`indices` must be integer tensor, containing indices into `ref`.\nIt must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.\n\nThe innermost dimension of `indices` (with length `K`) corresponds to\nindices into elements (if `K = P`) or slices (if `K < P`) along the `K`th\ndimension of `ref`.\n\n`updates` is `Tensor` of rank `Q-1+P-K` with shape:\n\n```\n[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].\n```\n\nFor example, say we want to update 4 scattered elements to a rank-1 tensor to\n8 elements. In Python, that update would look like this:\n\n```python\n    ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8])\n    indices = tf.constant([[4], [3], [1] ,[7]])\n    updates = tf.constant([9, 10, 11, 12])\n    update = tf.scatter_nd_update(ref, indices, updates)\n    with tf.Session() as sess:\n      print sess.run(update)\n```\n\nThe resulting update to ref would look like this:\n\n    [1, 11, 3, 10, 9, 6, 7, 12]\n\nSee @{tf.scatter_nd} for more details about how to make updates to\nslices."
 }
 op {
   name: "ScatterSub"
   input_arg {
     name: "ref"
-    description: "Should be from a `Variable` node."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "indices"
-    description: "A tensor of indices into the first dimension of `ref`."
     type_attr: "Tindices"
   }
   input_arg {
     name: "updates"
-    description: "A tensor of updated values to subtract from `ref`."
     type_attr: "T"
   }
   output_arg {
     name: "output_ref"
-    description: "= Same as `ref`.  Returned as a convenience for operations that want\nto use the updated values after the update is done."
     type_attr: "T"
     is_ref: true
   }
@@ -25089,17 +22866,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -25122,32 +22900,25 @@ op {
     default_value {
       b: false
     }
-    description: "If True, the subtraction will be protected by a lock;\notherwise the behavior is undefined, but may exhibit less contention."
   }
-  summary: "Subtracts sparse updates to a variable reference."
-  description: "```python\n    # Scalar indices\n    ref[indices, ...] -= updates[...]\n\n    # Vector indices (for each i)\n    ref[indices[i], ...] -= updates[i, ...]\n\n    # High rank indices (for each i, ..., j)\n    ref[indices[i, ..., j], ...] -= updates[i, ..., j, ...]\n```\n\nThis operation outputs `ref` after the update is done.\nThis makes it easier to chain operations that need to use the reset value.\n\nDuplicate entries are handled correctly: if multiple `indices` reference\nthe same location, their (negated) contributions add.\n\nRequires `updates.shape = indices.shape + ref.shape[1:]`.\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\"https://www.tensorflow.org/images/ScatterSub.png\" alt>\n</div>"
 }
 op {
   name: "ScatterUpdate"
   input_arg {
     name: "ref"
-    description: "Should be from a `Variable` node."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "indices"
-    description: "A tensor of indices into the first dimension of `ref`."
     type_attr: "Tindices"
   }
   input_arg {
     name: "updates"
-    description: "A tensor of updated values to store in `ref`."
     type_attr: "T"
   }
   output_arg {
     name: "output_ref"
-    description: "= Same as `ref`.  Returned as a convenience for operations that want\nto use the updated values after the update is done."
     type_attr: "T"
     is_ref: true
   }
@@ -25171,105 +22942,85 @@ op {
     default_value {
       b: true
     }
-    description: "If True, the assignment will be protected by a lock;\notherwise the behavior is undefined, but may exhibit less contention."
   }
-  summary: "Applies sparse updates to a variable reference."
-  description: "This operation computes\n\n```python\n    # Scalar indices\n    ref[indices, ...] = updates[...]\n\n    # Vector indices (for each i)\n    ref[indices[i], ...] = updates[i, ...]\n\n    # High rank indices (for each i, ..., j)\n    ref[indices[i, ..., j], ...] = updates[i, ..., j, ...]\n```\n\nThis operation outputs `ref` after the update is done.\nThis makes it easier to chain operations that need to use the reset value.\n\nIf values in `ref` is to be updated more than once, because there are\nduplicate entries in `indices`, the order at which the updates happen\nfor each value is undefined.\n\nRequires `updates.shape = indices.shape + ref.shape[1:]`.\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\"https://www.tensorflow.org/images/ScatterUpdate.png\" alt>\n</div>"
 }
 op {
   name: "SdcaFprint"
   input_arg {
     name: "input"
-    description: "vector of strings to compute fingerprints on."
     type: DT_STRING
   }
   output_arg {
     name: "output"
-    description: "a (N,2) shaped matrix where N is the number of elements in the input\nvector. Each row contains the low and high parts of the fingerprint."
     type: DT_INT64
   }
-  summary: "Computes fingerprints of the input strings."
 }
 op {
   name: "SdcaOptimizer"
   input_arg {
     name: "sparse_example_indices"
-    description: "a list of vectors which contain example indices."
     type: DT_INT64
     number_attr: "num_sparse_features"
   }
   input_arg {
     name: "sparse_feature_indices"
-    description: "a list of vectors which contain feature indices."
     type: DT_INT64
     number_attr: "num_sparse_features"
   }
   input_arg {
     name: "sparse_feature_values"
-    description: "a list of vectors which contains feature value\nassociated with each feature group."
     type: DT_FLOAT
     number_attr: "num_sparse_features_with_values"
   }
   input_arg {
     name: "dense_features"
-    description: "a list of matrices which contains the dense feature values."
     type: DT_FLOAT
     number_attr: "num_dense_features"
   }
   input_arg {
     name: "example_weights"
-    description: "a vector which contains the weight associated with each\nexample."
     type: DT_FLOAT
   }
   input_arg {
     name: "example_labels"
-    description: "a vector which contains the label/target associated with each\nexample."
     type: DT_FLOAT
   }
   input_arg {
     name: "sparse_indices"
-    description: "a list of vectors where each value is the indices which has\ncorresponding weights in sparse_weights. This field maybe omitted for the\ndense approach."
     type: DT_INT64
     number_attr: "num_sparse_features"
   }
   input_arg {
     name: "sparse_weights"
-    description: "a list of vectors where each value is the weight associated with\na sparse feature group."
     type: DT_FLOAT
     number_attr: "num_sparse_features"
   }
   input_arg {
     name: "dense_weights"
-    description: "a list of vectors where the values are the weights associated\nwith a dense feature group."
     type: DT_FLOAT
     number_attr: "num_dense_features"
   }
   input_arg {
     name: "example_state_data"
-    description: "a list of vectors containing the example state data."
     type: DT_FLOAT
   }
   output_arg {
     name: "out_example_state_data"
-    description: "a list of vectors containing the updated example state\ndata."
     type: DT_FLOAT
   }
   output_arg {
     name: "out_delta_sparse_weights"
-    description: "a list of vectors where each value is the delta\nweights associated with a sparse feature group."
     type: DT_FLOAT
     number_attr: "num_sparse_features"
   }
   output_arg {
     name: "out_delta_dense_weights"
-    description: "a list of vectors where the values are the delta\nweights associated with a dense feature group."
     type: DT_FLOAT
     number_attr: "num_dense_features"
   }
   attr {
     name: "loss_type"
     type: "string"
-    description: "Type of the primal loss. Currently SdcaSolver supports logistic,\nsquared and hinge losses."
     allowed_values {
       list {
         s: "logistic_loss"
@@ -25285,58 +23036,47 @@ op {
     default_value {
       b: false
     }
-    description: "Whether to use Adapative SDCA for the inner loop."
   }
   attr {
     name: "num_sparse_features"
     type: "int"
-    description: "Number of sparse feature groups to train on."
     has_minimum: true
   }
   attr {
     name: "num_sparse_features_with_values"
     type: "int"
-    description: "Number of sparse feature groups with values\nassociated with it, otherwise implicitly treats values as 1.0."
     has_minimum: true
   }
   attr {
     name: "num_dense_features"
     type: "int"
-    description: "Number of dense feature groups to train on."
     has_minimum: true
   }
   attr {
     name: "l1"
     type: "float"
-    description: "Symmetric l1 regularization strength."
   }
   attr {
     name: "l2"
     type: "float"
-    description: "Symmetric l2 regularization strength."
   }
   attr {
     name: "num_loss_partitions"
     type: "int"
-    description: "Number of partitions of the global loss function."
     has_minimum: true
     minimum: 1
   }
   attr {
     name: "num_inner_iterations"
     type: "int"
-    description: "Number of iterations per mini-batch."
     has_minimum: true
     minimum: 1
   }
-  summary: "Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for"
-  description: "linear models with L1 + L2 regularization. As global optimization objective is\nstrongly-convex, the optimizer optimizes the dual objective at each step. The\noptimizer applies each update one example at a time. Examples are sampled\nuniformly, and the optimizer is learning rate free and enjoys linear convergence\nrate.\n\n[Proximal Stochastic Dual Coordinate Ascent](http://arxiv.org/pdf/1211.2717v1.pdf).<br>\nShai Shalev-Shwartz, Tong Zhang. 2012\n\n$$Loss Objective = \\sum f_{i} (wx_{i}) + (l2 / 2) * |w|^2 + l1 * |w|$$\n\n[Adding vs. Averaging in Distributed Primal-Dual Optimization](http://arxiv.org/abs/1502.03508).<br>\nChenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan,\nPeter Richtarik, Martin Takac. 2015\n\n[Stochastic Dual Coordinate Ascent with Adaptive Probabilities](https://arxiv.org/abs/1502.08053).<br>\nDominik Csiba, Zheng Qu, Peter Richtarik. 2015"
 }
 op {
   name: "SdcaShrinkL1"
   input_arg {
     name: "weights"
-    description: "a list of vectors where each value is the weight associated with a\nfeature group."
     type: DT_FLOAT
     number_attr: "num_features"
     is_ref: true
@@ -25344,20 +23084,16 @@ op {
   attr {
     name: "num_features"
     type: "int"
-    description: "Number of feature groups to apply shrinking step."
     has_minimum: true
   }
   attr {
     name: "l1"
     type: "float"
-    description: "Symmetric l1 regularization strength."
   }
   attr {
     name: "l2"
     type: "float"
-    description: "Symmetric l2 regularization strength. Should be a positive float."
   }
-  summary: "Applies L1 regularization shrink step on the parameters."
 }
 op {
   name: "SegmentMax"
@@ -25367,12 +23103,10 @@ op {
   }
   input_arg {
     name: "segment_ids"
-    description: "A 1-D tensor whose rank is equal to the rank of `data`\'s\nfirst dimension.  Values should be sorted and can be repeated."
     type_attr: "Tindices"
   }
   output_arg {
     name: "output"
-    description: "Has same shape as data, except for dimension 0 which\nhas size `k`, the number of segments."
     type_attr: "T"
   }
   attr {
@@ -25383,10 +23117,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -25404,8 +23139,6 @@ op {
       }
     }
   }
-  summary: "Computes the maximum along segments of a tensor."
-  description: "Read @{$math_ops#segmentation$the section on segmentation} for an explanation of\nsegments.\n\nComputes a tensor such that\n\\\\(output_i = \\max_j(data_j)\\\\) where `max` is over `j` such\nthat `segment_ids[j] == i`.\n\nIf the max is empty for a given segment ID `i`, `output[i] = 0`.\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\"https://www.tensorflow.org/images/SegmentMax.png\" alt>\n</div>"
 }
 op {
   name: "SegmentMean"
@@ -25415,12 +23148,10 @@ op {
   }
   input_arg {
     name: "segment_ids"
-    description: "A 1-D tensor whose rank is equal to the rank of `data`\'s\nfirst dimension.  Values should be sorted and can be repeated."
     type_attr: "Tindices"
   }
   output_arg {
     name: "output"
-    description: "Has same shape as data, except for dimension 0 which\nhas size `k`, the number of segments."
     type_attr: "T"
   }
   attr {
@@ -25431,10 +23162,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -25452,8 +23184,6 @@ op {
       }
     }
   }
-  summary: "Computes the mean along segments of a tensor."
-  description: "Read @{$math_ops#segmentation$the section on segmentation} for an explanation of\nsegments.\n\nComputes a tensor such that\n\\\\(output_i = \\frac{\\sum_j data_j}{N}\\\\) where `mean` is\nover `j` such that `segment_ids[j] == i` and `N` is the total number of\nvalues summed.\n\nIf the mean is empty for a given segment ID `i`, `output[i] = 0`.\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\"https://www.tensorflow.org/images/SegmentMean.png\" alt>\n</div>"
 }
 op {
   name: "SegmentMin"
@@ -25463,12 +23193,10 @@ op {
   }
   input_arg {
     name: "segment_ids"
-    description: "A 1-D tensor whose rank is equal to the rank of `data`\'s\nfirst dimension.  Values should be sorted and can be repeated."
     type_attr: "Tindices"
   }
   output_arg {
     name: "output"
-    description: "Has same shape as data, except for dimension 0 which\nhas size `k`, the number of segments."
     type_attr: "T"
   }
   attr {
@@ -25479,10 +23207,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -25500,8 +23229,6 @@ op {
       }
     }
   }
-  summary: "Computes the minimum along segments of a tensor."
-  description: "Read @{$math_ops#segmentation$the section on segmentation} for an explanation of\nsegments.\n\nComputes a tensor such that\n\\\\(output_i = \\min_j(data_j)\\\\) where `min` is over `j` such\nthat `segment_ids[j] == i`.\n\nIf the min is empty for a given segment ID `i`, `output[i] = 0`.\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\"https://www.tensorflow.org/images/SegmentMin.png\" alt>\n</div>"
 }
 op {
   name: "SegmentProd"
@@ -25511,12 +23238,10 @@ op {
   }
   input_arg {
     name: "segment_ids"
-    description: "A 1-D tensor whose rank is equal to the rank of `data`\'s\nfirst dimension.  Values should be sorted and can be repeated."
     type_attr: "Tindices"
   }
   output_arg {
     name: "output"
-    description: "Has same shape as data, except for dimension 0 which\nhas size `k`, the number of segments."
     type_attr: "T"
   }
   attr {
@@ -25526,17 +23251,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -25553,8 +23279,6 @@ op {
       }
     }
   }
-  summary: "Computes the product along segments of a tensor."
-  description: "Read @{$math_ops#segmentation$the section on segmentation} for an explanation of\nsegments.\n\nComputes a tensor such that\n\\\\(output_i = \\prod_j data_j\\\\) where the product is over `j` such\nthat `segment_ids[j] == i`.\n\nIf the product is empty for a given segment ID `i`, `output[i] = 1`.\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\"https://www.tensorflow.org/images/SegmentProd.png\" alt>\n</div>"
 }
 op {
   name: "SegmentSum"
@@ -25564,12 +23288,10 @@ op {
   }
   input_arg {
     name: "segment_ids"
-    description: "A 1-D tensor whose rank is equal to the rank of `data`\'s\nfirst dimension.  Values should be sorted and can be repeated."
     type_attr: "Tindices"
   }
   output_arg {
     name: "output"
-    description: "Has same shape as data, except for dimension 0 which\nhas size `k`, the number of segments."
     type_attr: "T"
   }
   attr {
@@ -25579,17 +23301,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -25606,8 +23329,6 @@ op {
       }
     }
   }
-  summary: "Computes the sum along segments of a tensor."
-  description: "Read @{$math_ops#segmentation$the section on segmentation} for an explanation of\nsegments.\n\nComputes a tensor such that\n\\\\(output_i = \\sum_j data_j\\\\) where sum is over `j` such\nthat `segment_ids[j] == i`.\n\nIf the sum is empty for a given segment ID `i`, `output[i] = 0`.\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\"https://www.tensorflow.org/images/SegmentSum.png\" alt>\n</div>"
 }
 op {
   name: "Select"
@@ -25617,36 +23338,29 @@ op {
   }
   input_arg {
     name: "t"
-    description: "= A `Tensor` which may have the same shape as `condition`.\nIf `condition` is rank 1, `t` may have higher rank,\nbut its first dimension must match the size of `condition`."
     type_attr: "T"
   }
   input_arg {
     name: "e"
-    description: "= A `Tensor` with the same type and shape as `t`."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "= A `Tensor` with the same type and shape as `t` and `e`."
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
   }
-  summary: "Selects elements from `t` or `e`, depending on `condition`."
-  description: "The `t`, and `e` tensors must all have the same shape, and the\noutput will also have that shape.\n\nThe `condition` tensor must be a scalar if `t` and `e` are scalars.\nIf `t` and `e` are vectors or higher rank, then `condition` must be either a\nscalar, a vector with size matching the first dimension of `t`, or must have\nthe same shape as `t`.\n\nThe `condition` tensor acts as a mask that chooses, based on the value at each\nelement, whether the corresponding element / row in the output should be\ntaken from `t` (if true) or `e` (if false).\n\nIf `condition` is a vector and `t` and `e` are higher rank matrices, then\nit chooses which row (outer dimension) to copy from `t` and `e`.\nIf `condition` has the same shape as `t` and `e`, then it chooses which\nelement to copy from `t` and `e`.\n\nFor example:\n\n```python\n# \'condition\' tensor is [[True,  False]\n#                        [False, True]]\n# \'t\' is [[1, 2],\n#         [3, 4]]\n# \'e\' is [[5, 6],\n#         [7, 8]]\nselect(condition, t, e)  # => [[1, 6], [7, 4]]\n\n\n# \'condition\' tensor is [True, False]\n# \'t\' is [[1, 2],\n#         [3, 4]]\n# \'e\' is [[5, 6],\n#         [7, 8]]\nselect(condition, t, e) ==> [[1, 2],\n                             [7, 8]]\n\n```"
 }
 op {
   name: "SelfAdjointEig"
   input_arg {
     name: "input"
-    description: "Shape is `[..., M, M]`."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "Shape is `[..., M+1, M]`."
     type_attr: "T"
   }
   attr {
@@ -25659,8 +23373,6 @@ op {
       }
     }
   }
-  summary: "Computes the Eigen Decomposition of a batch of square self-adjoint matrices."
-  description: "The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions\nform square matrices, with the same constraints as the single matrix\nSelfAdjointEig.\n\nThe result is a [..., M+1, M] matrix with [..., 0,:] containing the\neigenvalues, and subsequent [...,1:, :] containing the eigenvectors."
   deprecation {
     version: 11
     explanation: "Use SelfAdjointEigV2 instead."
@@ -25670,17 +23382,14 @@ op {
   name: "SelfAdjointEigV2"
   input_arg {
     name: "input"
-    description: "`Tensor` input of shape `[N, N]`."
     type_attr: "T"
   }
   output_arg {
     name: "e"
-    description: "Eigenvalues. Shape is `[N]`."
     type_attr: "T"
   }
   output_arg {
     name: "v"
-    description: "Eigenvectors. Shape is `[N, N]`."
     type_attr: "T"
   }
   attr {
@@ -25689,7 +23398,6 @@ op {
     default_value {
       b: true
     }
-    description: "If `True` then eigenvectors will be computed and returned in `v`.\nOtherwise, only the eigenvalues will be computed."
   }
   attr {
     name: "T"
@@ -25703,8 +23411,6 @@ op {
       }
     }
   }
-  summary: "Computes the eigen decomposition of one or more square self-adjoint matrices."
-  description: "Computes the eigenvalues and (optionally) eigenvectors of each inner matrix in\n`input` such that `input[..., :, :] = v[..., :, :] * diag(e[..., :])`.\n\n```python\n# a is a tensor.\n# e is a tensor of eigenvalues.\n# v is a tensor of eigenvectors.\ne, v = self_adjoint_eig(a)\ne = self_adjoint_eig(a, compute_v=False)\n```"
 }
 op {
   name: "Selu"
@@ -25722,29 +23428,25 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)`"
-  description: "if < 0, `scale * features` otherwise.\n\nSee [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515)"
 }
 op {
   name: "SeluGrad"
   input_arg {
     name: "gradients"
-    description: "The backpropagated gradients to the corresponding Selu operation."
     type_attr: "T"
   }
   input_arg {
     name: "outputs"
-    description: "The outputs of the corresponding Selu operation."
     type_attr: "T"
   }
   output_arg {
     name: "backprops"
-    description: "The gradients: `gradients * (outputs + scale * alpha)`\nif outputs < 0, `scale * gradients` otherwise."
     type_attr: "T"
   }
   attr {
@@ -25753,122 +23455,128 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Computes gradients for the scaled exponential linear (Selu) operation."
 }
 op {
   name: "SerializeIterator"
   input_arg {
     name: "resource_handle"
-    description: "A handle to an iterator resource."
     type: DT_RESOURCE
   }
   output_arg {
     name: "serialized"
-    description: "A variant tensor storing the state of the iterator contained in the\nresource."
     type: DT_VARIANT
   }
-  summary: "Converts the given `resource_handle` representing an iterator to a variant tensor."
   is_stateful: true
 }
 op {
   name: "SerializeManySparse"
   input_arg {
     name: "sparse_indices"
-    description: "2-D.  The `indices` of the minibatch `SparseTensor`."
     type: DT_INT64
   }
   input_arg {
     name: "sparse_values"
-    description: "1-D.  The `values` of the minibatch `SparseTensor`."
     type_attr: "T"
   }
   input_arg {
     name: "sparse_shape"
-    description: "1-D.  The `shape` of the minibatch `SparseTensor`."
     type: DT_INT64
   }
   output_arg {
     name: "serialized_sparse"
-    type: DT_STRING
+    type_attr: "out_type"
   }
   attr {
     name: "T"
     type: "type"
   }
-  summary: "Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` string `Tensor`."
-  description: "The `SparseTensor` must have rank `R` greater than 1, and the first dimension\nis treated as the minibatch dimension.  Elements of the `SparseTensor`\nmust be sorted in increasing order of this first dimension.  The serialized\n`SparseTensor` objects going into each row of `serialized_sparse` will have\nrank `R-1`.\n\nThe minibatch size `N` is extracted from `sparse_shape[0]`."
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_STRING
+    }
+    allowed_values {
+      list {
+        type: DT_STRING
+        type: DT_VARIANT
+      }
+    }
+  }
 }
 op {
   name: "SerializeSparse"
   input_arg {
     name: "sparse_indices"
-    description: "2-D.  The `indices` of the `SparseTensor`."
     type: DT_INT64
   }
   input_arg {
     name: "sparse_values"
-    description: "1-D.  The `values` of the `SparseTensor`."
     type_attr: "T"
   }
   input_arg {
     name: "sparse_shape"
-    description: "1-D.  The `shape` of the `SparseTensor`."
     type: DT_INT64
   }
   output_arg {
     name: "serialized_sparse"
-    type: DT_STRING
+    type_attr: "out_type"
   }
   attr {
     name: "T"
     type: "type"
   }
-  summary: "Serialize a `SparseTensor` into a string 3-vector (1-D `Tensor`) object."
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_STRING
+    }
+    allowed_values {
+      list {
+        type: DT_STRING
+        type: DT_VARIANT
+      }
+    }
+  }
 }
 op {
   name: "SerializeTensor"
   input_arg {
     name: "tensor"
-    description: "A Tensor of type `T`."
     type_attr: "T"
   }
   output_arg {
     name: "serialized"
-    description: "A serialized TensorProto proto of the input tensor."
     type: DT_STRING
   }
   attr {
     name: "T"
     type: "type"
-    description: "The type of the input tensor."
   }
-  summary: "Transforms a Tensor into a serialized TensorProto proto."
 }
 op {
   name: "SetSize"
   input_arg {
     name: "set_indices"
-    description: "2D `Tensor`, indices of a `SparseTensor`."
     type: DT_INT64
   }
   input_arg {
     name: "set_values"
-    description: "1D `Tensor`, values of a `SparseTensor`."
     type_attr: "T"
   }
   input_arg {
     name: "set_shape"
-    description: "1D `Tensor`, shape of a `SparseTensor`."
     type: DT_INT64
   }
   output_arg {
     name: "size"
-    description: "For `set` ranked `n`, this is a `Tensor` with rank `n-1`, and the same 1st\n`n-1` dimensions as `set`. Each value is the number of unique elements in\nthe corresponding `[0...n-1]` dimension of `set`."
     type: DT_INT32
   }
   attr {
@@ -25893,8 +23601,6 @@ op {
       }
     }
   }
-  summary: "Number of unique elements along last dimension of input `set`."
-  description: "Input `set` is a `SparseTensor` represented by `set_indices`, `set_values`,\nand `set_shape`. The last dimension contains values in a set, duplicates are\nallowed but ignored.\n\nIf `validate_indices` is `True`, this op validates the order and range of `set`\nindices."
 }
 op {
   name: "Shape"
@@ -25923,8 +23629,6 @@ op {
       }
     }
   }
-  summary: "Returns the shape of a tensor."
-  description: "This operation returns a 1-D integer tensor representing the shape of `input`.\n\nFor example:\n\n```\n# \'t\' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]\nshape(t) ==> [2, 2, 3]\n```"
 }
 op {
   name: "ShapeN"
@@ -25961,8 +23665,6 @@ op {
       }
     }
   }
-  summary: "Returns shape of tensors."
-  description: "This operation returns N 1-D integer tensors representing shape of `input[i]s`."
 }
 op {
   name: "ShardedFilename"
@@ -25982,8 +23684,6 @@ op {
     name: "filename"
     type: DT_STRING
   }
-  summary: "Generate a sharded filename. The filename is printf formatted as"
-  description: "   %s-%05d-of-%05d, basename, shard, num_shards."
 }
 op {
   name: "ShardedFilespec"
@@ -25999,7 +23699,45 @@ op {
     name: "filename"
     type: DT_STRING
   }
-  summary: "Generate a glob pattern matching all sharded file names."
+}
+op {
+  name: "ShuffleAndRepeatDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "buffer_size"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "seed"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "seed2"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "count"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
 }
 op {
   name: "ShuffleDataset"
@@ -26009,17 +23747,14 @@ op {
   }
   input_arg {
     name: "buffer_size"
-    description: "The number of output elements to buffer in an iterator over\nthis dataset. Compare with the `min_after_dequeue` attr when creating a\n`RandomShuffleQueue`."
     type: DT_INT64
   }
   input_arg {
     name: "seed"
-    description: "A scalar seed for the random number generator. If either seed or\nseed2 is set to be non-zero, the random number generator is seeded\nby the given seed.  Otherwise, a random seed is used."
     type: DT_INT64
   }
   input_arg {
     name: "seed2"
-    description: "A second scalar seed to avoid seed collision."
     type: DT_INT64
   }
   output_arg {
@@ -26032,7 +23767,6 @@ op {
     default_value {
       b: true
     }
-    description: "If true, each iterator over this dataset will be given\na different pseudorandomly generated seed, based on a sequence seeded by the\n`seed` and `seed2` inputs. If false, each iterator will be given the same\nseed, and repeated iteration over this dataset will yield the exact same\nsequence of results."
   }
   attr {
     name: "output_types"
@@ -26046,7 +23780,6 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Creates a dataset that shuffles elements from `input_dataset` pseudorandomly."
 }
 op {
   name: "Sigmoid"
@@ -26064,6 +23797,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -26071,8 +23805,6 @@ op {
       }
     }
   }
-  summary: "Computes sigmoid of `x` element-wise."
-  description: "Specifically, `y = 1 / (1 + exp(-x))`."
 }
 op {
   name: "SigmoidGrad"
@@ -26094,6 +23826,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -26101,8 +23834,6 @@ op {
       }
     }
   }
-  summary: "Computes the gradient of the sigmoid of `x` wrt its input."
-  description: "Specifically, `grad = dy * y * (1 - y)`, where `y = sigmoid(x)`, and\n`dy` is the corresponding input gradient."
 }
 op {
   name: "Sign"
@@ -26120,6 +23851,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -26129,8 +23861,6 @@ op {
       }
     }
   }
-  summary: "Returns an element-wise indication of the sign of a number."
-  description: "`y = sign(x) = -1` if `x < 0`; 0 if `x == 0`; 1 if `x > 0`.\n\nFor complex numbers, `y = sign(x) = x / |x|` if `x != 0`, otherwise `y = 0`."
 }
 op {
   name: "Sin"
@@ -26148,6 +23878,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -26155,7 +23886,6 @@ op {
       }
     }
   }
-  summary: "Computes sin of x element-wise."
 }
 op {
   name: "Sinh"
@@ -26173,6 +23903,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -26180,7 +23911,6 @@ op {
       }
     }
   }
-  summary: "Computes hyperbolic sine of x element-wise."
 }
 op {
   name: "Size"
@@ -26209,8 +23939,6 @@ op {
       }
     }
   }
-  summary: "Returns the size of a tensor."
-  description: "This operation returns an integer representing the number of elements in\n`input`.\n\nFor example:\n\n```\n# \'t\' is [[[1, 1,, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]]\nsize(t) ==> 12\n```"
 }
 op {
   name: "SkipDataset"
@@ -26220,7 +23948,6 @@ op {
   }
   input_arg {
     name: "count"
-    description: "A scalar representing the number of elements from the `input_dataset`\nthat should be skipped.  If count is -1, skips everything."
     type: DT_INT64
   }
   output_arg {
@@ -26239,54 +23966,44 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Creates a dataset that skips `count` elements from the `input_dataset`."
 }
 op {
   name: "Skipgram"
   output_arg {
     name: "vocab_word"
-    description: "A vector of words in the corpus."
     type: DT_STRING
   }
   output_arg {
     name: "vocab_freq"
-    description: "Frequencies of words. Sorted in the non-ascending order."
     type: DT_INT32
   }
   output_arg {
     name: "words_per_epoch"
-    description: "Number of words per epoch in the data file."
     type: DT_INT64
   }
   output_arg {
     name: "current_epoch"
-    description: "The current epoch number."
     type: DT_INT32
   }
   output_arg {
     name: "total_words_processed"
-    description: "The total number of words processed so far."
     type: DT_INT64
   }
   output_arg {
     name: "examples"
-    description: "A vector of word ids."
     type: DT_INT32
   }
   output_arg {
     name: "labels"
-    description: "A vector of word ids."
     type: DT_INT32
   }
   attr {
     name: "filename"
     type: "string"
-    description: "The corpus\'s text file name."
   }
   attr {
     name: "batch_size"
     type: "int"
-    description: "The size of produced batch."
   }
   attr {
     name: "window_size"
@@ -26294,7 +24011,6 @@ op {
     default_value {
       i: 5
     }
-    description: "The number of words to predict to the left and right of the target."
   }
   attr {
     name: "min_count"
@@ -26302,7 +24018,6 @@ op {
     default_value {
       i: 5
     }
-    description: "The minimum number of word occurrences for it to be included in the\nvocabulary."
   }
   attr {
     name: "subsample"
@@ -26310,9 +24025,7 @@ op {
     default_value {
       f: 0.001
     }
-    description: "Threshold for word occurrence. Words that appear with higher\nfrequency will be randomly down-sampled. Set to 0 to disable."
   }
-  summary: "Parses a text file and creates a batch of examples."
   deprecation {
     version: 19
     explanation: "Moving word2vec into tensorflow_models/tutorials and deprecating its ops here as a result"
@@ -26327,12 +24040,10 @@ op {
   }
   input_arg {
     name: "begin"
-    description: "begin[i] specifies the offset into the \'i\'th dimension of\n\'input\' to slice from."
     type_attr: "Index"
   }
   input_arg {
     name: "size"
-    description: "size[i] specifies the number of elements of the \'i\'th dimension\nof \'input\' to slice. If size[i] is -1, all remaining elements in dimension\ni are included in the slice (i.e. this is equivalent to setting\nsize[i] = input.dim_size(i) - begin[i])."
     type_attr: "Index"
   }
   output_arg {
@@ -26353,19 +24064,30 @@ op {
       }
     }
   }
-  summary: "Return a slice from \'input\'."
-  description: "The output tensor is a tensor with dimensions described by \'size\'\nwhose values are extracted from \'input\' starting at the offsets in\n\'begin\'.\n\n*Requirements*:\n  0 <= begin[i] <= begin[i] + size[i] <= Di  for i in [0, n)"
+}
+op {
+  name: "Snapshot"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
 }
 op {
   name: "Softmax"
   input_arg {
     name: "logits"
-    description: "2-D with shape `[batch_size, num_classes]`."
     type_attr: "T"
   }
   output_arg {
     name: "softmax"
-    description: "Same shape as `logits`."
     type_attr: "T"
   }
   attr {
@@ -26374,34 +24096,29 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Computes softmax activations."
-  description: "For each batch `i` and class `j` we have\n\n    softmax[i, j] = exp(logits[i, j]) / sum_j(exp(logits[i, j]))"
 }
 op {
   name: "SoftmaxCrossEntropyWithLogits"
   input_arg {
     name: "features"
-    description: "batch_size x num_classes matrix"
     type_attr: "T"
   }
   input_arg {
     name: "labels"
-    description: "batch_size x num_classes matrix\nThe caller must ensure that each batch of labels represents a valid\nprobability distribution."
     type_attr: "T"
   }
   output_arg {
     name: "loss"
-    description: "Per example loss (batch_size vector)."
     type_attr: "T"
   }
   output_arg {
     name: "backprop"
-    description: "backpropagated gradients (batch_size x num_classes matrix)."
     type_attr: "T"
   }
   attr {
@@ -26410,13 +24127,12 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Computes softmax cross entropy cost and gradients to backpropagate."
-  description: "Inputs are the logits, not probabilities."
 }
 op {
   name: "Softplus"
@@ -26436,10 +24152,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -26447,23 +24164,19 @@ op {
       }
     }
   }
-  summary: "Computes softplus: `log(exp(features) + 1)`."
 }
 op {
   name: "SoftplusGrad"
   input_arg {
     name: "gradients"
-    description: "The backpropagated gradients to the corresponding softplus operation."
     type_attr: "T"
   }
   input_arg {
     name: "features"
-    description: "The features passed as input to the corresponding softplus operation."
     type_attr: "T"
   }
   output_arg {
     name: "backprops"
-    description: "The gradients: `gradients / (1 + exp(-features))`."
     type_attr: "T"
   }
   attr {
@@ -26474,10 +24187,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -26485,7 +24199,6 @@ op {
       }
     }
   }
-  summary: "Computes softplus gradients for a softplus operation."
 }
 op {
   name: "Softsign"
@@ -26505,10 +24218,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -26516,23 +24230,19 @@ op {
       }
     }
   }
-  summary: "Computes softsign: `features / (abs(features) + 1)`."
 }
 op {
   name: "SoftsignGrad"
   input_arg {
     name: "gradients"
-    description: "The backpropagated gradients to the corresponding softsign operation."
     type_attr: "T"
   }
   input_arg {
     name: "features"
-    description: "The features passed as input to the corresponding softsign operation."
     type_attr: "T"
   }
   output_arg {
     name: "backprops"
-    description: "The gradients: `gradients / (1 + abs(features)) ** 2`."
     type_attr: "T"
   }
   attr {
@@ -26543,10 +24253,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -26554,18 +24265,15 @@ op {
       }
     }
   }
-  summary: "Computes softsign gradients for a softsign operation."
 }
 op {
   name: "SpaceToBatch"
   input_arg {
     name: "input"
-    description: "4-D with shape `[batch, height, width, depth]`."
     type_attr: "T"
   }
   input_arg {
     name: "paddings"
-    description: "2-D tensor of non-negative integers with shape `[2, 2]`. It specifies\n  the padding of the input with zeros across the spatial dimensions as follows:\n\n      paddings = [[pad_top, pad_bottom], [pad_left, pad_right]]\n\n  The effective spatial dimensions of the zero-padded input tensor will be:\n\n      height_pad = pad_top + height + pad_bottom\n      width_pad = pad_left + width + pad_right\n\nThe attr `block_size` must be greater than one. It indicates the block size.\n\n  * Non-overlapping blocks of size `block_size x block size` in the height and\n    width dimensions are rearranged into the batch dimension at each location.\n  * The batch of the output tensor is `batch * block_size * block_size`.\n  * Both height_pad and width_pad must be divisible by block_size.\n\nThe shape of the output will be:\n\n    [batch*block_size*block_size, height_pad/block_size, width_pad/block_size,\n     depth]\n\nSome examples:\n\n(1) For the following input of shape `[1, 2, 2, 1]` and block_size of 2:\n\n```\nx = [[[[1], [2]], [[3], [4]]]]\n```\n\nThe output tensor has shape `[4, 1, 1, 1]` and value:\n\n```\n[[[[1]]], [[[2]]], [[[3]]], [[[4]]]]\n```\n\n(2) For the following input of shape `[1, 2, 2, 3]` and block_size of 2:\n\n```\nx = [[[[1, 2, 3], [4, 5, 6]],\n      [[7, 8, 9], [10, 11, 12]]]]\n```\n\nThe output tensor has shape `[4, 1, 1, 3]` and value:\n\n```\n[[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]\n```\n\n(3) For the following input of shape `[1, 4, 4, 1]` and block_size of 2:\n\n```\nx = [[[[1],   [2],  [3],  [4]],\n      [[5],   [6],  [7],  [8]],\n      [[9],  [10], [11],  [12]],\n      [[13], [14], [15],  [16]]]]\n```\n\nThe output tensor has shape `[4, 2, 2, 1]` and value:\n\n```\nx = [[[[1], [3]], [[9], [11]]],\n     [[[2], [4]], [[10], [12]]],\n     [[[5], [7]], [[13], [15]]],\n     [[[6], [8]], [[14], [16]]]]\n```\n\n(4) For the following input of shape `[2, 2, 4, 1]` and block_size of 2:\n\n```\nx = [[[[1],   [2],  [3],  [4]],\n      [[5],   [6],  [7],  [8]]],\n     [[[9],  [10], [11],  [12]],\n      [[13], [14], [15],  [16]]]]\n```\n\nThe output tensor has shape `[8, 1, 2, 1]` and value:\n\n```\nx = [[[[1], [3]]], [[[9], [11]]], [[[2], [4]]], [[[10], [12]]],\n     [[[5], [7]]], [[[13], [15]]], [[[6], [8]]], [[[14], [16]]]]\n```\n\nAmong others, this operation is useful for reducing atrous convolution into\nregular convolution."
     type_attr: "Tpaddings"
   }
   output_arg {
@@ -26595,24 +24303,19 @@ op {
     has_minimum: true
     minimum: 2
   }
-  summary: "SpaceToBatch for 4-D tensors of type T."
-  description: "This is a legacy version of the more general SpaceToBatchND.\n\nZero-pads and then rearranges (permutes) blocks of spatial data into batch.\nMore specifically, this op outputs a copy of the input tensor where values from\nthe `height` and `width` dimensions are moved to the `batch` dimension. After\nthe zero-padding, both `height` and `width` of the input must be divisible by the\nblock size."
 }
 op {
   name: "SpaceToBatchND"
   input_arg {
     name: "input"
-    description: "N-D with shape `input_shape = [batch] + spatial_shape + remaining_shape`,\nwhere spatial_shape has `M` dimensions."
     type_attr: "T"
   }
   input_arg {
     name: "block_shape"
-    description: "1-D with shape `[M]`, all values must be >= 1."
     type_attr: "Tblock_shape"
   }
   input_arg {
     name: "paddings"
-    description: "2-D with shape `[M, 2]`, all values must be >= 0.\n  `paddings[i] = [pad_start, pad_end]` specifies the padding for input dimension\n  `i + 1`, which corresponds to spatial dimension `i`.  It is required that\n  `block_shape[i]` divides `input_shape[i + 1] + pad_start + pad_end`.\n\nThis operation is equivalent to the following steps:\n\n1. Zero-pad the start and end of dimensions `[1, ..., M]` of the\n   input according to `paddings` to produce `padded` of shape `padded_shape`.\n\n2. Reshape `padded` to `reshaped_padded` of shape:\n\n     [batch] +\n     [padded_shape[1] / block_shape[0],\n       block_shape[0],\n      ...,\n      padded_shape[M] / block_shape[M-1],\n      block_shape[M-1]] +\n     remaining_shape\n\n3. Permute dimensions of `reshaped_padded` to produce\n   `permuted_reshaped_padded` of shape:\n\n     block_shape +\n     [batch] +\n     [padded_shape[1] / block_shape[0],\n      ...,\n      padded_shape[M] / block_shape[M-1]] +\n     remaining_shape\n\n4. Reshape `permuted_reshaped_padded` to flatten `block_shape` into the batch\n   dimension, producing an output tensor of shape:\n\n     [batch * prod(block_shape)] +\n     [padded_shape[1] / block_shape[0],\n      ...,\n      padded_shape[M] / block_shape[M-1]] +\n     remaining_shape\n\nSome examples:\n\n(1) For the following input of shape `[1, 2, 2, 1]`, `block_shape = [2, 2]`, and\n    `paddings = [[0, 0], [0, 0]]`:\n\n```\nx = [[[[1], [2]], [[3], [4]]]]\n```\n\nThe output tensor has shape `[4, 1, 1, 1]` and value:\n\n```\n[[[[1]]], [[[2]]], [[[3]]], [[[4]]]]\n```\n\n(2) For the following input of shape `[1, 2, 2, 3]`, `block_shape = [2, 2]`, and\n    `paddings = [[0, 0], [0, 0]]`:\n\n```\nx = [[[[1, 2, 3], [4, 5, 6]],\n      [[7, 8, 9], [10, 11, 12]]]]\n```\n\nThe output tensor has shape `[4, 1, 1, 3]` and value:\n\n```\n[[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]\n```\n\n(3) For the following input of shape `[1, 4, 4, 1]`, `block_shape = [2, 2]`, and\n    `paddings = [[0, 0], [0, 0]]`:\n\n```\nx = [[[[1],   [2],  [3],  [4]],\n      [[5],   [6],  [7],  [8]],\n      [[9],  [10], [11],  [12]],\n      [[13], [14], [15],  [16]]]]\n```\n\nThe output tensor has shape `[4, 2, 2, 1]` and value:\n\n```\nx = [[[[1], [3]], [[9], [11]]],\n     [[[2], [4]], [[10], [12]]],\n     [[[5], [7]], [[13], [15]]],\n     [[[6], [8]], [[14], [16]]]]\n```\n\n(4) For the following input of shape `[2, 2, 4, 1]`, block_shape = `[2, 2]`, and\n    paddings = `[[0, 0], [2, 0]]`:\n\n```\nx = [[[[1],   [2],  [3],  [4]],\n      [[5],   [6],  [7],  [8]]],\n     [[[9],  [10], [11],  [12]],\n      [[13], [14], [15],  [16]]]]\n```\n\nThe output tensor has shape `[8, 1, 3, 1]` and value:\n\n```\nx = [[[[0], [1], [3]]], [[[0], [9], [11]]],\n     [[[0], [2], [4]]], [[[0], [10], [12]]],\n     [[[0], [5], [7]]], [[[0], [13], [15]]],\n     [[[0], [6], [8]]], [[[0], [14], [16]]]]\n```\n\nAmong others, this operation is useful for reducing atrous convolution into\nregular convolution."
     type_attr: "Tpaddings"
   }
   output_arg {
@@ -26649,8 +24352,6 @@ op {
       }
     }
   }
-  summary: "SpaceToBatch for N-D tensors of type T."
-  description: "This operation divides \"spatial\" dimensions `[1, ..., M]` of the input into a\ngrid of blocks of shape `block_shape`, and interleaves these blocks with the\n\"batch\" dimension (0) such that in the output, the spatial dimensions\n`[1, ..., M]` correspond to the position within the grid, and the batch\ndimension combines both the position within a spatial block and the original\nbatch position.  Prior to division into blocks, the spatial dimensions of the\ninput are optionally zero padded according to `paddings`.  See below for a\nprecise description."
 }
 op {
   name: "SpaceToDepth"
@@ -26669,7 +24370,6 @@ op {
   attr {
     name: "block_size"
     type: "int"
-    description: "The size of the spatial block."
     has_minimum: true
     minimum: 2
   }
@@ -26687,56 +24387,49 @@ op {
       }
     }
   }
-  summary: "SpaceToDepth for tensors of type T."
-  description: "Rearranges blocks of spatial data, into depth. More specifically,\nthis op outputs a copy of the input tensor where values from the `height`\nand `width` dimensions are moved to the `depth` dimension.\nThe attr `block_size` indicates the input block size.\n\n  * Non-overlapping blocks of size `block_size x block size` are rearranged\n    into depth at each location.\n  * The depth of the output tensor is `block_size * block_size * input_depth`.\n  * The Y, X coordinates within each block of the input become the high order\n    component of the output channel index.\n  * The input tensor\'s height and width must be divisible by block_size.\n\nThe `data_format` attr specifies the layout of the input and output tensors\nwith the following options:\n  \"NHWC\": `[ batch, height, width, channels ]`\n  \"NCHW\": `[ batch, channels, height, width ]`\n  \"NCHW_VECT_C\":\n      `qint8 [ batch, channels / 4, height, width, channels % 4 ]`\n\nIt is useful to consider the operation as transforming a 6-D Tensor.\ne.g. for data_format = NHWC,\n     Each element in the input tensor can be specified via 6 coordinates,\n     ordered by decreasing memory layout significance as:\n     n,oY,bY,oX,bX,iC  (where n=batch index, oX, oY means X or Y coordinates\n                        within the output image, bX, bY means coordinates\n                        within the input block, iC means input channels).\n     The output would be a transpose to the following layout:\n     n,oY,oX,bY,bX,iC\n\nThis operation is useful for resizing the activations between convolutions\n(but keeping all data), e.g. instead of pooling. It is also useful for training\npurely convolutional models.\n\nFor example, given an input of shape `[1, 2, 2, 1]`, data_format = \"NHWC\" and\nblock_size = 2:\n\n```\nx = [[[[1], [2]],\n      [[3], [4]]]]\n```\n\nThis operation will output a tensor of shape `[1, 1, 1, 4]`:\n\n```\n[[[[1, 2, 3, 4]]]]\n```\n\nHere, the input has a batch of 1 and each batch element has shape `[2, 2, 1]`,\nthe corresponding output will have a single element (i.e. width and height are\nboth 1) and will have a depth of 4 channels (1 * block_size * block_size).\nThe output element shape is `[1, 1, 4]`.\n\nFor an input tensor with larger depth, here of shape `[1, 2, 2, 3]`, e.g.\n\n```\nx = [[[[1, 2, 3], [4, 5, 6]],\n      [[7, 8, 9], [10, 11, 12]]]]\n```\n\nThis operation, for block_size of 2, will return the following tensor of shape\n`[1, 1, 1, 12]`\n\n```\n[[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]]\n```\n\nSimilarly, for the following input of shape `[1 4 4 1]`, and a block size of 2:\n\n```\nx = [[[[1],   [2],  [5],  [6]],\n      [[3],   [4],  [7],  [8]],\n      [[9],  [10], [13],  [14]],\n      [[11], [12], [15],  [16]]]]\n```\n\nthe operator will return the following tensor of shape `[1 2 2 4]`:\n\n```\nx = [[[[1, 2, 3, 4],\n       [5, 6, 7, 8]],\n      [[9, 10, 11, 12],\n       [13, 14, 15, 16]]]]\n```"
 }
 op {
   name: "SparseAccumulatorApplyGradient"
   input_arg {
     name: "handle"
-    description: "The handle to a accumulator."
     type: DT_STRING
     is_ref: true
   }
   input_arg {
     name: "local_step"
-    description: "The local_step value at which the sparse gradient was computed."
     type: DT_INT64
   }
   input_arg {
     name: "gradient_indices"
-    description: "Indices of the sparse gradient to be accumulated. Must be a\nvector."
     type: DT_INT64
   }
   input_arg {
     name: "gradient_values"
-    description: "Values are the non-zero slices of the gradient, and must have\nthe same first dimension as indices, i.e., the nnz represented by indices and\nvalues must be consistent."
     type_attr: "dtype"
   }
   input_arg {
     name: "gradient_shape"
-    description: "Shape of the sparse gradient to be accumulated."
     type: DT_INT64
   }
   attr {
     name: "dtype"
     type: "type"
-    description: "The data type of accumulated gradients. Needs to correspond to the type\nof the accumulator."
     allowed_values {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -26746,102 +24439,85 @@ op {
   attr {
     name: "has_known_shape"
     type: "bool"
-    description: "Boolean indicating whether gradient_shape is unknown, in which\ncase the input is ignored during validation."
   }
-  summary: "Applies a sparse gradient to a given accumulator."
-  description: "Does not add if local_step is smaller than the accumulator\'s\nglobal_step."
 }
 op {
   name: "SparseAccumulatorTakeGradient"
   input_arg {
     name: "handle"
-    description: "The handle to a SparseConditionalAccumulator."
     type: DT_STRING
     is_ref: true
   }
   input_arg {
     name: "num_required"
-    description: "Number of gradients required before we return an aggregate."
     type: DT_INT32
   }
   output_arg {
     name: "indices"
-    description: "Indices of the average of the accumulated sparse gradients."
     type: DT_INT64
   }
   output_arg {
     name: "values"
-    description: "Values of the average of the accumulated sparse gradients."
     type_attr: "dtype"
   }
   output_arg {
     name: "shape"
-    description: "Shape of the average of the accumulated sparse gradients."
     type: DT_INT64
   }
   attr {
     name: "dtype"
     type: "type"
-    description: "The data type of accumulated gradients. Needs to correspond to the type\nof the accumulator."
     allowed_values {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
-  summary: "Extracts the average sparse gradient in a SparseConditionalAccumulator."
-  description: "The op will blocks until sufficient (i.e., more than num_required)\ngradients have been accumulated. If the accumulator has already\naggregated more than num_required gradients, it will return its\naverage of the accumulated gradients.  Also automatically increments\nthe recorded global_step in the accumulator by 1, and resets the\naggregate to 0."
 }
 op {
   name: "SparseAdd"
   input_arg {
     name: "a_indices"
-    description: "2-D.  The `indices` of the first `SparseTensor`, size `[nnz, ndims]` Matrix."
     type: DT_INT64
   }
   input_arg {
     name: "a_values"
-    description: "1-D.  The `values` of the first `SparseTensor`, size `[nnz]` Vector."
     type_attr: "T"
   }
   input_arg {
     name: "a_shape"
-    description: "1-D.  The `shape` of the first `SparseTensor`, size `[ndims]` Vector."
     type: DT_INT64
   }
   input_arg {
     name: "b_indices"
-    description: "2-D.  The `indices` of the second `SparseTensor`, size `[nnz, ndims]` Matrix."
     type: DT_INT64
   }
   input_arg {
     name: "b_values"
-    description: "1-D.  The `values` of the second `SparseTensor`, size `[nnz]` Vector."
     type_attr: "T"
   }
   input_arg {
     name: "b_shape"
-    description: "1-D.  The `shape` of the second `SparseTensor`, size `[ndims]` Vector."
     type: DT_INT64
   }
   input_arg {
     name: "thresh"
-    description: "0-D.  The magnitude threshold that determines if an output value/index\npair takes space."
     type_attr: "Treal"
   }
   output_arg {
@@ -26863,17 +24539,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -26888,10 +24565,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -26899,39 +24577,31 @@ op {
       }
     }
   }
-  summary: "Adds two `SparseTensor` objects to produce another `SparseTensor`."
-  description: "The input `SparseTensor` objects\' indices are assumed ordered in standard\nlexicographic order.  If this is not the case, before this step run\n`SparseReorder` to restore index ordering.\n\nBy default, if two values sum to zero at some index, the output `SparseTensor`\nwould still include that particular location in its index, storing a zero in the\ncorresponding value slot.  To override this, callers can specify `thresh`,\nindicating that if the sum has a magnitude strictly smaller than `thresh`, its\ncorresponding value and index would then not be included.  In particular,\n`thresh == 0` (default) means everything is kept and actual thresholding happens\nonly for a positive value.\n\nIn the following shapes, `nnz` is the count after taking `thresh` into account."
 }
 op {
   name: "SparseAddGrad"
   input_arg {
     name: "backprop_val_grad"
-    description: "1-D with shape `[nnz(sum)]`.  The gradient with respect to\nthe non-empty values of the sum."
     type_attr: "T"
   }
   input_arg {
     name: "a_indices"
-    description: "2-D.  The `indices` of the `SparseTensor` A, size `[nnz(A), ndims]`."
     type: DT_INT64
   }
   input_arg {
     name: "b_indices"
-    description: "2-D.  The `indices` of the `SparseTensor` B, size `[nnz(B), ndims]`."
     type: DT_INT64
   }
   input_arg {
     name: "sum_indices"
-    description: "2-D.  The `indices` of the sum `SparseTensor`, size\n`[nnz(sum), ndims]`."
     type: DT_INT64
   }
   output_arg {
     name: "a_val_grad"
-    description: "1-D with shape `[nnz(A)]`. The gradient with respect to the\nnon-empty values of A."
     type_attr: "T"
   }
   output_arg {
     name: "b_val_grad"
-    description: "1-D with shape `[nnz(B)]`. The gradient with respect to the\nnon-empty values of B."
     type_attr: "T"
   }
   attr {
@@ -26941,25 +24611,24 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
-  summary: "The gradient operator for the SparseAdd op."
-  description: "The SparseAdd op calculates A + B, where A, B, and the sum are all represented\nas `SparseTensor` objects.  This op takes in the upstream gradient w.r.t.\nnon-empty values of the sum, and outputs the gradients w.r.t. the non-empty\nvalues of A and B."
 }
 op {
   name: "SparseApplyAdadelta"
@@ -26970,44 +24639,36 @@ op {
   }
   input_arg {
     name: "accum"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "accum_update"
-    description: ": Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "lr"
-    description: "Learning rate. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "rho"
-    description: "Decay factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "epsilon"
-    description: "Constant factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "indices"
-    description: "A vector of indices into the first dimension of var and accum."
     type_attr: "Tindices"
   }
   output_arg {
     name: "out"
-    description: "Same as \"var\"."
     type_attr: "T"
     is_ref: true
   }
@@ -27018,17 +24679,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -27051,42 +24713,34 @@ op {
     default_value {
       b: false
     }
-    description: "If True, updating of the var and accum tensors will be protected by\na lock; otherwise the behavior is undefined, but may exhibit less contention."
   }
-  summary: "var: Should be from a Variable()."
 }
 op {
   name: "SparseApplyAdagrad"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "accum"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "lr"
-    description: "Learning rate. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "indices"
-    description: "A vector of indices into the first dimension of var and accum."
     type_attr: "Tindices"
   }
   output_arg {
     name: "out"
-    description: "Same as \"var\"."
     type_attr: "T"
     is_ref: true
   }
@@ -27097,17 +24751,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -27130,64 +24785,51 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var and accum tensors will be protected\nby a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
-  summary: "Update relevant entries in \'*var\' and \'*accum\' according to the adagrad scheme."
-  description: "That is for rows we have grad for, we update var and accum as follows:\naccum += grad * grad\nvar -= lr * grad * (1 / sqrt(accum))"
 }
 op {
   name: "SparseApplyAdagradDA"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "gradient_accumulator"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "gradient_squared_accumulator"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "indices"
-    description: "A vector of indices into the first dimension of var and accum."
     type_attr: "Tindices"
   }
   input_arg {
     name: "lr"
-    description: "Learning rate. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l1"
-    description: "L1 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l2"
-    description: "L2 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "global_step"
-    description: "Training step number. Must be a scalar."
     type: DT_INT64
   }
   output_arg {
     name: "out"
-    description: "Same as \"var\"."
     type_attr: "T"
     is_ref: true
   }
@@ -27198,17 +24840,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -27231,44 +24874,36 @@ op {
     default_value {
       b: false
     }
-    description: "If True, updating of the var and accum tensors will be protected by\na lock; otherwise the behavior is undefined, but may exhibit less contention."
   }
-  summary: "Update entries in \'*var\' and \'*accum\' according to the proximal adagrad scheme."
 }
 op {
   name: "SparseApplyCenteredRMSProp"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "mg"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "ms"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "mom"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "rho"
-    description: "Decay rate. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
@@ -27277,22 +24912,18 @@ op {
   }
   input_arg {
     name: "epsilon"
-    description: "Ridge term. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "indices"
-    description: "A vector of indices into the first dimension of var, ms and mom."
     type_attr: "Tindices"
   }
   output_arg {
     name: "out"
-    description: "Same as \"var\"."
     type_attr: "T"
     is_ref: true
   }
@@ -27303,17 +24934,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -27336,64 +24968,51 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var, mg, ms, and mom tensors is\nprotected by a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
-  summary: "Update \'*var\' according to the centered RMSProp algorithm."
-  description: "The centered RMSProp algorithm uses an estimate of the centered second moment\n(i.e., the variance) for normalization, as opposed to regular RMSProp, which\nuses the (uncentered) second moment. This often helps with training, but is\nslightly more expensive in terms of computation and memory.\n\nNote that in dense implementation of this algorithm, mg, ms, and mom will\nupdate even if the grad is zero, but in this sparse implementation, mg, ms,\nand mom will not update in iterations during which the grad is zero.\n\nmean_square = decay * mean_square + (1-decay) * gradient ** 2\nmean_grad = decay * mean_grad + (1-decay) * gradient\nDelta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)\n\nms <- rho * ms_{t-1} + (1-rho) * grad * grad\nmom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)\nvar <- var - mom"
 }
 op {
   name: "SparseApplyFtrl"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "accum"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "linear"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "indices"
-    description: "A vector of indices into the first dimension of var and accum."
     type_attr: "Tindices"
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l1"
-    description: "L1 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l2"
-    description: "L2 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "lr_power"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   output_arg {
     name: "out"
-    description: "Same as \"var\"."
     type_attr: "T"
     is_ref: true
   }
@@ -27404,17 +25023,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -27437,54 +25057,43 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var and accum tensors will be protected\nby a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
-  summary: "Update relevant entries in \'*var\' according to the Ftrl-proximal scheme."
-  description: "That is for rows we have grad for, we update var, accum and linear as follows:\naccum_new = accum + grad * grad\nlinear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var\nquadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2\nvar = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0\naccum = accum_new"
 }
 op {
   name: "SparseApplyFtrlV2"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "accum"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "linear"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "indices"
-    description: "A vector of indices into the first dimension of var and accum."
     type_attr: "Tindices"
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l1"
-    description: "L1 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l2"
-    description: "L2 shrinkage regulariation. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
@@ -27493,12 +25102,10 @@ op {
   }
   input_arg {
     name: "lr_power"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   output_arg {
     name: "out"
-    description: "Same as \"var\"."
     type_attr: "T"
     is_ref: true
   }
@@ -27509,17 +25116,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -27542,48 +25150,38 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var and accum tensors will be protected\nby a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
-  summary: "Update relevant entries in \'*var\' according to the Ftrl-proximal scheme."
-  description: "That is for rows we have grad for, we update var, accum and linear as follows:\ngrad_with_shrinkage = grad + 2 * l2_shrinkage * var\naccum_new = accum + grad_with_shrinkage * grad_with_shrinkage\nlinear += grad_with_shrinkage +\n    (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var\nquadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2\nvar = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0\naccum = accum_new"
 }
 op {
   name: "SparseApplyMomentum"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "accum"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "lr"
-    description: "Learning rate. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "indices"
-    description: "A vector of indices into the first dimension of var and accum."
     type_attr: "Tindices"
   }
   input_arg {
     name: "momentum"
-    description: "Momentum. Must be a scalar."
     type_attr: "T"
   }
   output_arg {
     name: "out"
-    description: "Same as \"var\"."
     type_attr: "T"
     is_ref: true
   }
@@ -27594,17 +25192,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -27627,7 +25226,6 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var and accum tensors will be protected\nby a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
   attr {
     name: "use_nesterov"
@@ -27635,53 +25233,42 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, the tensor passed to compute grad will be\nvar - lr * momentum * accum, so in the end, the var you get is actually\nvar - lr * momentum * accum."
   }
-  summary: "Update relevant entries in \'*var\' and \'*accum\' according to the momentum scheme."
-  description: "Set use_nesterov = True if you want to use Nesterov momentum.\n\nThat is for rows we have grad for, we update var and accum as follows:\n\naccum = accum * momentum + grad\nvar -= lr * accum"
 }
 op {
   name: "SparseApplyProximalAdagrad"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "accum"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "lr"
-    description: "Learning rate. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l1"
-    description: "L1 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l2"
-    description: "L2 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "indices"
-    description: "A vector of indices into the first dimension of var and accum."
     type_attr: "Tindices"
   }
   output_arg {
     name: "out"
-    description: "Same as \"var\"."
     type_attr: "T"
     is_ref: true
   }
@@ -27692,17 +25279,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -27725,47 +25313,37 @@ op {
     default_value {
       b: false
     }
-    description: "If True, updating of the var and accum tensors will be protected by\na lock; otherwise the behavior is undefined, but may exhibit less contention."
   }
-  summary: "Sparse update entries in \'*var\' and \'*accum\' according to FOBOS algorithm."
-  description: "That is for rows we have grad for, we update var and accum as follows:\naccum += grad * grad\nprox_v = var\nprox_v -= lr * grad * (1 / sqrt(accum))\nvar = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}"
 }
 op {
   name: "SparseApplyProximalGradientDescent"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "alpha"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l1"
-    description: "L1 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "l2"
-    description: "L2 regularization. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "indices"
-    description: "A vector of indices into the first dimension of var and accum."
     type_attr: "Tindices"
   }
   output_arg {
     name: "out"
-    description: "Same as \"var\"."
     type_attr: "T"
     is_ref: true
   }
@@ -27776,17 +25354,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -27809,39 +25388,31 @@ op {
     default_value {
       b: false
     }
-    description: "If True, the subtraction will be protected by a lock;\notherwise the behavior is undefined, but may exhibit less contention."
   }
-  summary: "Sparse update \'*var\' as FOBOS algorithm with fixed learning rate."
-  description: "That is for rows we have grad for, we update var as follows:\nprox_v = var - alpha * grad\nvar = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}"
 }
 op {
   name: "SparseApplyRMSProp"
   input_arg {
     name: "var"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "ms"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "mom"
-    description: "Should be from a Variable()."
     type_attr: "T"
     is_ref: true
   }
   input_arg {
     name: "lr"
-    description: "Scaling factor. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "rho"
-    description: "Decay rate. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
@@ -27850,22 +25421,18 @@ op {
   }
   input_arg {
     name: "epsilon"
-    description: "Ridge term. Must be a scalar."
     type_attr: "T"
   }
   input_arg {
     name: "grad"
-    description: "The gradient."
     type_attr: "T"
   }
   input_arg {
     name: "indices"
-    description: "A vector of indices into the first dimension of var, ms and mom."
     type_attr: "Tindices"
   }
   output_arg {
     name: "out"
-    description: "Same as \"var\"."
     type_attr: "T"
     is_ref: true
   }
@@ -27876,17 +25443,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -27909,50 +25477,40 @@ op {
     default_value {
       b: false
     }
-    description: "If `True`, updating of the var, ms, and mom tensors is protected\nby a lock; otherwise the behavior is undefined, but may exhibit less\ncontention."
   }
-  summary: "Update \'*var\' according to the RMSProp algorithm."
-  description: "Note that in dense implementation of this algorithm, ms and mom will\nupdate even if the grad is zero, but in this sparse implementation, ms\nand mom will not update in iterations during which the grad is zero.\n\nmean_square = decay * mean_square + (1-decay) * gradient ** 2\nDelta = learning_rate * gradient / sqrt(mean_square + epsilon)\n\nms <- rho * ms_{t-1} + (1-rho) * grad * grad\nmom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)\nvar <- var - mom"
 }
 op {
   name: "SparseConcat"
   input_arg {
     name: "indices"
-    description: "2-D.  Indices of each input `SparseTensor`."
     type: DT_INT64
     number_attr: "N"
   }
   input_arg {
     name: "values"
-    description: "1-D.  Non-empty values of each `SparseTensor`."
     type_attr: "T"
     number_attr: "N"
   }
   input_arg {
     name: "shapes"
-    description: "1-D.  Shapes of each `SparseTensor`."
     type: DT_INT64
     number_attr: "N"
   }
   output_arg {
     name: "output_indices"
-    description: "2-D.  Indices of the concatenated `SparseTensor`."
     type: DT_INT64
   }
   output_arg {
     name: "output_values"
-    description: "1-D.  Non-empty values of the concatenated `SparseTensor`."
     type_attr: "T"
   }
   output_arg {
     name: "output_shape"
-    description: "1-D.  Shape of the concatenated `SparseTensor`."
     type: DT_INT64
   }
   attr {
     name: "concat_dim"
     type: "int"
-    description: "Dimension to concatenate along. Must be in range [-rank, rank),\nwhere rank is the number of dimensions in each input `SparseTensor`."
   }
   attr {
     name: "N"
@@ -27964,36 +25522,33 @@ op {
     name: "T"
     type: "type"
   }
-  summary: "Concatenates a list of `SparseTensor` along the specified dimension."
-  description: "Concatenation is with respect to the dense versions of these sparse tensors.\nIt is assumed that each input is a `SparseTensor` whose elements are ordered\nalong increasing dimension number.\n\nAll inputs\' shapes must match, except for the concat dimension.  The\n`indices`, `values`, and `shapes` lists must have the same length.\n\nThe output shape is identical to the inputs\', except along the concat\ndimension, where it is the sum of the inputs\' sizes along that dimension.\n\nThe output elements will be resorted to preserve the sort order along\nincreasing dimension number.\n\nThis op runs in `O(M log M)` time, where `M` is the total number of non-empty\nvalues across all inputs. This is due to the need for an internal sort in\norder to concatenate efficiently across an arbitrary dimension.\n\nFor example, if `concat_dim = 1` and the inputs are\n\n    sp_inputs[0]: shape = [2, 3]\n    [0, 2]: \"a\"\n    [1, 0]: \"b\"\n    [1, 1]: \"c\"\n\n    sp_inputs[1]: shape = [2, 4]\n    [0, 1]: \"d\"\n    [0, 2]: \"e\"\n\nthen the output will be\n\n    shape = [2, 7]\n    [0, 2]: \"a\"\n    [0, 4]: \"d\"\n    [0, 5]: \"e\"\n    [1, 0]: \"b\"\n    [1, 1]: \"c\"\n\nGraphically this is equivalent to doing\n\n    [    a] concat [  d e  ] = [    a   d e  ]\n    [b c  ]        [       ]   [b c          ]"
 }
 op {
   name: "SparseConditionalAccumulator"
   output_arg {
     name: "handle"
-    description: "The handle to the accumulator."
     type: DT_STRING
     is_ref: true
   }
   attr {
     name: "dtype"
     type: "type"
-    description: "The type of the value being accumulated."
     allowed_values {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -28003,7 +25558,6 @@ op {
   attr {
     name: "shape"
     type: "shape"
-    description: "The shape of the values."
   }
   attr {
     name: "container"
@@ -28011,7 +25565,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this accumulator is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -28019,49 +25572,39 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this accumulator will be shared under the given name\nacross multiple sessions."
   }
-  summary: "A conditional accumulator for aggregating sparse gradients."
-  description: "The accumulator accepts gradients marked with local_step greater or\nequal to the most recent global_step known to the accumulator. The\naverage can be extracted from the accumulator, provided sufficient\ngradients have been accumulated. Extracting the average automatically\nresets the aggregate to 0, and increments the global_step recorded by\nthe accumulator."
   is_stateful: true
 }
 op {
   name: "SparseCross"
   input_arg {
     name: "indices"
-    description: "2-D.  Indices of each input `SparseTensor`."
     type: DT_INT64
     number_attr: "N"
   }
   input_arg {
     name: "values"
-    description: "1-D.   values of each `SparseTensor`."
     type_list_attr: "sparse_types"
   }
   input_arg {
     name: "shapes"
-    description: "1-D.   Shapes of each `SparseTensor`."
     type: DT_INT64
     number_attr: "N"
   }
   input_arg {
     name: "dense_inputs"
-    description: "2-D.    Columns represented by dense `Tensor`."
     type_list_attr: "dense_types"
   }
   output_arg {
     name: "output_indices"
-    description: "2-D.  Indices of the concatenated `SparseTensor`."
     type: DT_INT64
   }
   output_arg {
     name: "output_values"
-    description: "1-D.  Non-empty values of the concatenated or hashed\n`SparseTensor`."
     type_attr: "out_type"
   }
   output_arg {
     name: "output_shape"
-    description: "1-D.  Shape of the concatenated `SparseTensor`."
     type: DT_INT64
   }
   attr {
@@ -28072,18 +25615,15 @@ op {
   attr {
     name: "hashed_output"
     type: "bool"
-    description: "If true, returns the hash of the cross instead of the string.\nThis will allow us avoiding string manipulations."
   }
   attr {
     name: "num_buckets"
     type: "int"
-    description: "It is used if hashed_output is true.\noutput = hashed_value%num_buckets if num_buckets > 0 else hashed_value."
     has_minimum: true
   }
   attr {
     name: "hash_key"
     type: "int"
-    description: "Specify the hash_key that will be used by the `FingerprintCat64`\nfunction to combine the crosses fingerprints."
   }
   attr {
     name: "sparse_types"
@@ -28127,34 +25667,27 @@ op {
       }
     }
   }
-  summary: "Generates sparse cross from a list of sparse and dense tensors."
-  description: "The op takes two lists, one of 2D `SparseTensor` and one of 2D `Tensor`, each\nrepresenting features of one feature column. It outputs a 2D `SparseTensor` with\nthe batchwise crosses of these features.\n\nFor example, if the inputs are\n\n    inputs[0]: SparseTensor with shape = [2, 2]\n    [0, 0]: \"a\"\n    [1, 0]: \"b\"\n    [1, 1]: \"c\"\n\n    inputs[1]: SparseTensor with shape = [2, 1]\n    [0, 0]: \"d\"\n    [1, 0]: \"e\"\n\n    inputs[2]: Tensor [[\"f\"], [\"g\"]]\n\nthen the output will be\n\n    shape = [2, 2]\n    [0, 0]: \"a_X_d_X_f\"\n    [1, 0]: \"b_X_e_X_g\"\n    [1, 1]: \"c_X_e_X_g\"\n\nif hashed_output=true then the output will be\n\n    shape = [2, 2]\n    [0, 0]: FingerprintCat64(\n                Fingerprint64(\"f\"), FingerprintCat64(\n                    Fingerprint64(\"d\"), Fingerprint64(\"a\")))\n    [1, 0]: FingerprintCat64(\n                Fingerprint64(\"g\"), FingerprintCat64(\n                    Fingerprint64(\"e\"), Fingerprint64(\"b\")))\n    [1, 1]: FingerprintCat64(\n                Fingerprint64(\"g\"), FingerprintCat64(\n                    Fingerprint64(\"e\"), Fingerprint64(\"c\")))"
 }
 op {
   name: "SparseDenseCwiseAdd"
   input_arg {
     name: "sp_indices"
-    description: "2-D.  `N x R` matrix with the indices of non-empty values in a\nSparseTensor, possibly not in canonical ordering."
     type: DT_INT64
   }
   input_arg {
     name: "sp_values"
-    description: "1-D.  `N` non-empty values corresponding to `sp_indices`."
     type_attr: "T"
   }
   input_arg {
     name: "sp_shape"
-    description: "1-D.  Shape of the input SparseTensor."
     type: DT_INT64
   }
   input_arg {
     name: "dense"
-    description: "`R`-D.  The dense Tensor operand."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "1-D.  The `N` values that are operated on."
     type_attr: "T"
   }
   attr {
@@ -28164,51 +25697,45 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
-  summary: "Adds up a SparseTensor and a dense Tensor, using these special rules:"
-  description: "(1) Broadcasts the dense side to have the same shape as the sparse side, if\n    eligible;\n(2) Then, only the dense values pointed to by the indices of the SparseTensor\n    participate in the cwise addition.\n\nBy these rules, the result is a logical SparseTensor with exactly the same\nindices and shape, but possibly with different non-zero values.  The output of\nthis Op is the resultant non-zero values."
 }
 op {
   name: "SparseDenseCwiseDiv"
   input_arg {
     name: "sp_indices"
-    description: "2-D.  `N x R` matrix with the indices of non-empty values in a\nSparseTensor, possibly not in canonical ordering."
     type: DT_INT64
   }
   input_arg {
     name: "sp_values"
-    description: "1-D.  `N` non-empty values corresponding to `sp_indices`."
     type_attr: "T"
   }
   input_arg {
     name: "sp_shape"
-    description: "1-D.  Shape of the input SparseTensor."
     type: DT_INT64
   }
   input_arg {
     name: "dense"
-    description: "`R`-D.  The dense Tensor operand."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "1-D.  The `N` values that are operated on."
     type_attr: "T"
   }
   attr {
@@ -28218,51 +25745,45 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
-  summary: "Component-wise divides a SparseTensor by a dense Tensor."
-  description: "*Limitation*: this Op only broadcasts the dense side to the sparse side, but not\nthe other direction."
 }
 op {
   name: "SparseDenseCwiseMul"
   input_arg {
     name: "sp_indices"
-    description: "2-D.  `N x R` matrix with the indices of non-empty values in a\nSparseTensor, possibly not in canonical ordering."
     type: DT_INT64
   }
   input_arg {
     name: "sp_values"
-    description: "1-D.  `N` non-empty values corresponding to `sp_indices`."
     type_attr: "T"
   }
   input_arg {
     name: "sp_shape"
-    description: "1-D.  Shape of the input SparseTensor."
     type: DT_INT64
   }
   input_arg {
     name: "dense"
-    description: "`R`-D.  The dense Tensor operand."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "1-D.  The `N` values that are operated on."
     type_attr: "T"
   }
   attr {
@@ -28272,46 +25793,41 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
-  summary: "Component-wise multiplies a SparseTensor by a dense Tensor."
-  description: "The output locations corresponding to the implicitly zero elements in the sparse\ntensor will be zero (i.e., will not take up storage space), regardless of the\ncontents of the dense tensor (even if it\'s +/-INF and that INF*0 == NaN).\n\n*Limitation*: this Op only broadcasts the dense side to the sparse side, but not\nthe other direction."
 }
 op {
   name: "SparseFillEmptyRows"
   input_arg {
     name: "indices"
-    description: "2-D. the indices of the sparse tensor."
     type: DT_INT64
   }
   input_arg {
     name: "values"
-    description: "1-D. the values of the sparse tensor."
     type_attr: "T"
   }
   input_arg {
     name: "dense_shape"
-    description: "1-D. the shape of the sparse tensor."
     type: DT_INT64
   }
   input_arg {
     name: "default_value"
-    description: "0-D. default value to insert into location `[row, 0, ..., 0]`\n  for rows missing from the input sparse tensor.\noutput indices: 2-D. the indices of the filled sparse tensor."
     type_attr: "T"
   }
   output_arg {
@@ -28320,54 +25836,43 @@ op {
   }
   output_arg {
     name: "output_values"
-    description: "1-D. the values of the filled sparse tensor."
     type_attr: "T"
   }
   output_arg {
     name: "empty_row_indicator"
-    description: "1-D. whether the dense row was missing in the\ninput sparse tensor."
     type: DT_BOOL
   }
   output_arg {
     name: "reverse_index_map"
-    description: "1-D. a map from the input indices to the output indices."
     type: DT_INT64
   }
   attr {
     name: "T"
     type: "type"
   }
-  summary: "Fills empty rows in the input 2-D `SparseTensor` with a default value."
-  description: "The input `SparseTensor` is represented via the tuple of inputs\n(`indices`, `values`, `dense_shape`).  The output `SparseTensor` has the\nsame `dense_shape` but with indices `output_indices` and values\n`output_values`.\n\nThis op inserts a single entry for every row that doesn\'t have any values.\nThe index is created as `[row, 0, ..., 0]` and the inserted value\nis `default_value`.\n\nFor example, suppose `sp_input` has shape `[5, 6]` and non-empty values:\n\n    [0, 1]: a\n    [0, 3]: b\n    [2, 0]: c\n    [3, 1]: d\n\nRows 1 and 4 are empty, so the output will be of shape `[5, 6]` with values:\n\n    [0, 1]: a\n    [0, 3]: b\n    [1, 0]: default_value\n    [2, 0]: c\n    [3, 1]: d\n    [4, 0]: default_value\n\nThe output `SparseTensor` will be in row-major order and will have the\nsame shape as the input.\n\nThis op also returns an indicator vector shaped `[dense_shape[0]]` such that\n\n    empty_row_indicator[i] = True iff row i was an empty row.\n\nAnd a reverse index map vector shaped `[indices.shape[0]]` that is used during\nbackpropagation,\n\n    reverse_index_map[j] = out_j s.t. indices[j, :] == output_indices[out_j, :]"
 }
 op {
   name: "SparseFillEmptyRowsGrad"
   input_arg {
     name: "reverse_index_map"
-    description: "1-D.  The reverse index map from SparseFillEmptyRows."
     type: DT_INT64
   }
   input_arg {
     name: "grad_values"
-    description: "1-D.  The gradients from backprop."
     type_attr: "T"
   }
   output_arg {
     name: "d_values"
-    description: "1-D.  The backprop into values."
     type_attr: "T"
   }
   output_arg {
     name: "d_default_value"
-    description: "0-D.  The backprop into default_value."
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
   }
-  summary: "The gradient of SparseFillEmptyRows."
-  description: "Takes vectors reverse_index_map, shaped `[N]`, and grad_values,\nshaped `[N_full]`, where `N_full >= N` and copies data into either\n`d_values` or `d_default_value`.  Here `d_values` is shaped `[N]` and\n`d_default_value` is a scalar.\n\n  d_values[j] = grad_values[reverse_index_map[j]]\n  d_default_value = sum_{k : 0 .. N_full - 1} (\n     grad_values[k] * 1{k not in reverse_index_map})"
 }
 op {
   name: "SparseMatMul"
@@ -28437,34 +25942,27 @@ op {
       }
     }
   }
-  summary: "Multiply matrix \"a\" by matrix \"b\"."
-  description: "The inputs must be two-dimensional matrices and the inner dimension of \"a\" must\nmatch the outer dimension of \"b\". This op is optimized for the case where at\nleast one of \"a\" or \"b\" is sparse. The breakeven for using this versus a dense\nmatrix multiply on one platform was 30% zero values in the sparse matrix.\n\nThe gradient computation of this operation will only take advantage of sparsity\nin the input gradient when that gradient comes from a Relu."
 }
 op {
   name: "SparseReduceMax"
   input_arg {
     name: "input_indices"
-    description: "2-D.  `N x R` matrix with the indices of non-empty values in a\nSparseTensor, possibly not in canonical ordering."
     type: DT_INT64
   }
   input_arg {
     name: "input_values"
-    description: "1-D.  `N` non-empty values corresponding to `input_indices`."
     type_attr: "T"
   }
   input_arg {
     name: "input_shape"
-    description: "1-D.  Shape of the input SparseTensor."
     type: DT_INT64
   }
   input_arg {
     name: "reduction_axes"
-    description: "1-D.  Length-`K` vector containing the reduction axes."
     type: DT_INT32
   }
   output_arg {
     name: "output"
-    description: "`R-K`-D.  The reduced Tensor."
     type_attr: "T"
   }
   attr {
@@ -28473,7 +25971,6 @@ op {
     default_value {
       b: false
     }
-    description: "If true, retain reduced dimensions with length 1."
   }
   attr {
     name: "T"
@@ -28483,10 +25980,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -28494,29 +25992,23 @@ op {
       }
     }
   }
-  summary: "Computes the max of elements across dimensions of a SparseTensor."
-  description: "This Op takes a SparseTensor and is the sparse counterpart to\n`tf.reduce_max()`.  In particular, this Op also returns a dense `Tensor`\ninstead of a sparse one.\n\nReduces `sp_input` along the dimensions given in `reduction_axes`.  Unless\n`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in\n`reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained\nwith length 1.\n\nIf `reduction_axes` has no entries, all dimensions are reduced, and a tensor\nwith a single element is returned.  Additionally, the axes can be negative,\nwhich are interpreted according to the indexing rules in Python."
 }
 op {
   name: "SparseReduceMaxSparse"
   input_arg {
     name: "input_indices"
-    description: "2-D.  `N x R` matrix with the indices of non-empty values in a\nSparseTensor, possibly not in canonical ordering."
     type: DT_INT64
   }
   input_arg {
     name: "input_values"
-    description: "1-D.  `N` non-empty values corresponding to `input_indices`."
     type_attr: "T"
   }
   input_arg {
     name: "input_shape"
-    description: "1-D.  Shape of the input SparseTensor."
     type: DT_INT64
   }
   input_arg {
     name: "reduction_axes"
-    description: "1-D.  Length-`K` vector containing the reduction axes."
     type: DT_INT32
   }
   output_arg {
@@ -28537,7 +26029,6 @@ op {
     default_value {
       b: false
     }
-    description: "If true, retain reduced dimensions with length 1."
   }
   attr {
     name: "T"
@@ -28547,10 +26038,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -28558,34 +26050,27 @@ op {
       }
     }
   }
-  summary: "Computes the max of elements across dimensions of a SparseTensor."
-  description: "This Op takes a SparseTensor and is the sparse counterpart to\n`tf.reduce_max()`.  In contrast to SparseReduceMax, this Op returns a\nSparseTensor.\n\nReduces `sp_input` along the dimensions given in `reduction_axes`.  Unless\n`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in\n`reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained\nwith length 1.\n\nIf `reduction_axes` has no entries, all dimensions are reduced, and a tensor\nwith a single element is returned.  Additionally, the axes can be negative,\nwhich are interpreted according to the indexing rules in Python."
 }
 op {
   name: "SparseReduceSum"
   input_arg {
     name: "input_indices"
-    description: "2-D.  `N x R` matrix with the indices of non-empty values in a\nSparseTensor, possibly not in canonical ordering."
     type: DT_INT64
   }
   input_arg {
     name: "input_values"
-    description: "1-D.  `N` non-empty values corresponding to `input_indices`."
     type_attr: "T"
   }
   input_arg {
     name: "input_shape"
-    description: "1-D.  Shape of the input SparseTensor."
     type: DT_INT64
   }
   input_arg {
     name: "reduction_axes"
-    description: "1-D.  Length-`K` vector containing the reduction axes."
     type: DT_INT32
   }
   output_arg {
     name: "output"
-    description: "`R-K`-D.  The reduced Tensor."
     type_attr: "T"
   }
   attr {
@@ -28594,7 +26079,6 @@ op {
     default_value {
       b: false
     }
-    description: "If true, retain reduced dimensions with length 1."
   }
   attr {
     name: "T"
@@ -28603,46 +26087,41 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
-  summary: "Computes the sum of elements across dimensions of a SparseTensor."
-  description: "This Op takes a SparseTensor and is the sparse counterpart to\n`tf.reduce_sum()`.  In particular, this Op also returns a dense `Tensor`\ninstead of a sparse one.\n\nReduces `sp_input` along the dimensions given in `reduction_axes`.  Unless\n`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in\n`reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained\nwith length 1.\n\nIf `reduction_axes` has no entries, all dimensions are reduced, and a tensor\nwith a single element is returned.  Additionally, the axes can be negative,\nwhich are interpreted according to the indexing rules in Python."
 }
 op {
   name: "SparseReduceSumSparse"
   input_arg {
     name: "input_indices"
-    description: "2-D.  `N x R` matrix with the indices of non-empty values in a\nSparseTensor, possibly not in canonical ordering."
     type: DT_INT64
   }
   input_arg {
     name: "input_values"
-    description: "1-D.  `N` non-empty values corresponding to `input_indices`."
     type_attr: "T"
   }
   input_arg {
     name: "input_shape"
-    description: "1-D.  Shape of the input SparseTensor."
     type: DT_INT64
   }
   input_arg {
     name: "reduction_axes"
-    description: "1-D.  Length-`K` vector containing the reduction axes."
     type: DT_INT32
   }
   output_arg {
@@ -28663,7 +26142,6 @@ op {
     default_value {
       b: false
     }
-    description: "If true, retain reduced dimensions with length 1."
   }
   attr {
     name: "T"
@@ -28672,89 +26150,74 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
-  summary: "Computes the sum of elements across dimensions of a SparseTensor."
-  description: "This Op takes a SparseTensor and is the sparse counterpart to\n`tf.reduce_sum()`.  In contrast to SparseReduceSum, this Op returns a\nSparseTensor.\n\nReduces `sp_input` along the dimensions given in `reduction_axes`.  Unless\n`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in\n`reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained\nwith length 1.\n\nIf `reduction_axes` has no entries, all dimensions are reduced, and a tensor\nwith a single element is returned.  Additionally, the axes can be negative,\nwhich are interpreted according to the indexing rules in Python."
 }
 op {
   name: "SparseReorder"
   input_arg {
     name: "input_indices"
-    description: "2-D.  `N x R` matrix with the indices of non-empty values in a\nSparseTensor, possibly not in canonical ordering."
     type: DT_INT64
   }
   input_arg {
     name: "input_values"
-    description: "1-D.  `N` non-empty values corresponding to `input_indices`."
     type_attr: "T"
   }
   input_arg {
     name: "input_shape"
-    description: "1-D.  Shape of the input SparseTensor."
     type: DT_INT64
   }
   output_arg {
     name: "output_indices"
-    description: "2-D.  `N x R` matrix with the same indices as input_indices, but\nin canonical row-major ordering."
     type: DT_INT64
   }
   output_arg {
     name: "output_values"
-    description: "1-D.  `N` non-empty values corresponding to `output_indices`."
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
   }
-  summary: "Reorders a SparseTensor into the canonical, row-major ordering."
-  description: "Note that by convention, all sparse ops preserve the canonical ordering along\nincreasing dimension number. The only time ordering can be violated is during\nmanual manipulation of the indices and values vectors to add entries.\n\nReordering does not affect the shape of the SparseTensor.\n\nIf the tensor has rank `R` and `N` non-empty values, `input_indices` has\nshape `[N, R]`, input_values has length `N`, and input_shape has length `R`."
 }
 op {
   name: "SparseReshape"
   input_arg {
     name: "input_indices"
-    description: "2-D.  `N x R_in` matrix with the indices of non-empty values in a\nSparseTensor."
     type: DT_INT64
   }
   input_arg {
     name: "input_shape"
-    description: "1-D.  `R_in` vector with the input SparseTensor\'s dense shape."
     type: DT_INT64
   }
   input_arg {
     name: "new_shape"
-    description: "1-D.  `R_out` vector with the requested new dense shape."
     type: DT_INT64
   }
   output_arg {
     name: "output_indices"
-    description: "2-D.  `N x R_out` matrix with the updated indices of non-empty\nvalues in the output SparseTensor."
     type: DT_INT64
   }
   output_arg {
     name: "output_shape"
-    description: "1-D.  `R_out` vector with the full dense shape of the output\nSparseTensor.  This is the same as `new_shape` but with any -1 dimensions\nfilled in."
     type: DT_INT64
   }
-  summary: "Reshapes a SparseTensor to represent values in a new dense shape."
-  description: "This operation has the same semantics as reshape on the represented dense\ntensor.  The `input_indices` are recomputed based on the requested `new_shape`.\n\nIf one component of `new_shape` is the special value -1, the size of that\ndimension is computed so that the total dense size remains constant.  At\nmost one component of `new_shape` can be -1.  The number of dense elements\nimplied by `new_shape` must be the same as the number of dense elements\noriginally implied by `input_shape`.\n\nReshaping does not affect the order of values in the SparseTensor.\n\nIf the input tensor has rank `R_in` and `N` non-empty values, and `new_shape`\nhas length `R_out`, then `input_indices` has shape `[N, R_in]`,\n`input_shape` has length `R_in`, `output_indices` has shape `[N, R_out]`, and\n`output_shape` has length `R_out`."
 }
 op {
   name: "SparseSegmentMean"
@@ -28764,17 +26227,14 @@ op {
   }
   input_arg {
     name: "indices"
-    description: "A 1-D tensor. Has same rank as `segment_ids`."
     type_attr: "Tidx"
   }
   input_arg {
     name: "segment_ids"
-    description: "A 1-D tensor. Values should be sorted and can be repeated."
     type: DT_INT32
   }
   output_arg {
     name: "output"
-    description: "Has same shape as data, except for dimension 0 which\nhas size `k`, the number of segments."
     type_attr: "T"
   }
   attr {
@@ -28800,29 +26260,23 @@ op {
       }
     }
   }
-  summary: "Computes the mean along sparse segments of a tensor."
-  description: "Read @{$math_ops#segmentation$the section on segmentation} for an explanation of\nsegments.\n\nLike `SegmentMean`, but `segment_ids` can have rank less than `data`\'s first\ndimension, selecting a subset of dimension 0, specified by `indices`."
 }
 op {
   name: "SparseSegmentMeanGrad"
   input_arg {
     name: "grad"
-    description: "gradient propagated to the SparseSegmentMean op."
     type_attr: "T"
   }
   input_arg {
     name: "indices"
-    description: "indices passed to the corresponding SparseSegmentMean op."
     type_attr: "Tidx"
   }
   input_arg {
     name: "segment_ids"
-    description: "segment_ids passed to the corresponding SparseSegmentMean op."
     type: DT_INT32
   }
   input_arg {
     name: "output_dim0"
-    description: "dimension 0 of \"data\" passed to SparseSegmentMean op."
     type: DT_INT32
   }
   output_arg {
@@ -28852,8 +26306,65 @@ op {
       }
     }
   }
-  summary: "Computes gradients for SparseSegmentMean."
-  description: "Returns tensor \"output\" with same shape as grad, except for dimension 0 whose\nvalue is output_dim0."
+}
+op {
+  name: "SparseSegmentMeanWithNumSegments"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "segment_ids"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "num_segments"
+    type_attr: "Tnumsegments"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tnumsegments"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
 }
 op {
   name: "SparseSegmentSqrtN"
@@ -28863,17 +26374,14 @@ op {
   }
   input_arg {
     name: "indices"
-    description: "A 1-D tensor. Has same rank as `segment_ids`."
     type_attr: "Tidx"
   }
   input_arg {
     name: "segment_ids"
-    description: "A 1-D tensor. Values should be sorted and can be repeated."
     type: DT_INT32
   }
   output_arg {
     name: "output"
-    description: "Has same shape as data, except for dimension 0 which\nhas size `k`, the number of segments."
     type_attr: "T"
   }
   attr {
@@ -28899,29 +26407,23 @@ op {
       }
     }
   }
-  summary: "Computes the sum along sparse segments of a tensor divided by the sqrt of N."
-  description: "N is the size of the segment being reduced.\n\nRead @{$math_ops#segmentation$the section on segmentation} for an explanation of\nsegments."
 }
 op {
   name: "SparseSegmentSqrtNGrad"
   input_arg {
     name: "grad"
-    description: "gradient propagated to the SparseSegmentSqrtN op."
     type_attr: "T"
   }
   input_arg {
     name: "indices"
-    description: "indices passed to the corresponding SparseSegmentSqrtN op."
     type_attr: "Tidx"
   }
   input_arg {
     name: "segment_ids"
-    description: "segment_ids passed to the corresponding SparseSegmentSqrtN op."
     type: DT_INT32
   }
   input_arg {
     name: "output_dim0"
-    description: "dimension 0 of \"data\" passed to SparseSegmentSqrtN op."
     type: DT_INT32
   }
   output_arg {
@@ -28951,8 +26453,65 @@ op {
       }
     }
   }
-  summary: "Computes gradients for SparseSegmentSqrtN."
-  description: "Returns tensor \"output\" with same shape as grad, except for dimension 0 whose\nvalue is output_dim0."
+}
+op {
+  name: "SparseSegmentSqrtNWithNumSegments"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "segment_ids"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "num_segments"
+    type_attr: "Tnumsegments"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tnumsegments"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
 }
 op {
   name: "SparseSegmentSum"
@@ -28962,17 +26521,14 @@ op {
   }
   input_arg {
     name: "indices"
-    description: "A 1-D tensor. Has same rank as `segment_ids`."
     type_attr: "Tidx"
   }
   input_arg {
     name: "segment_ids"
-    description: "A 1-D tensor. Values should be sorted and can be repeated."
     type: DT_INT32
   }
   output_arg {
     name: "output"
-    description: "Has same shape as data, except for dimension 0 which\nhas size `k`, the number of segments."
     type_attr: "T"
   }
   attr {
@@ -28983,10 +26539,67 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
         type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "SparseSegmentSumWithNumSegments"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "segment_ids"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "num_segments"
+    type_attr: "Tnumsegments"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -29007,34 +26620,40 @@ op {
       }
     }
   }
-  summary: "Computes the sum along sparse segments of a tensor."
-  description: "Read @{$math_ops#segmentation$the section on segmentation} for an explanation of\nsegments.\n\nLike `SegmentSum`, but `segment_ids` can have rank less than `data`\'s first\ndimension, selecting a subset of dimension 0, specified by `indices`.\n\nFor example:\n\n```python\nc = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])\n\n# Select two rows, one segment.\ntf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 0]))\n# => [[0 0 0 0]]\n\n# Select two rows, two segment.\ntf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 1]))\n# => [[ 1  2  3  4]\n#     [-1 -2 -3 -4]]\n\n# Select all rows, two segments.\ntf.sparse_segment_sum(c, tf.constant([0, 1, 2]), tf.constant([0, 0, 1]))\n# => [[0 0 0 0]\n#     [5 6 7 8]]\n\n# Which is equivalent to:\ntf.segment_sum(c, tf.constant([0, 0, 1]))\n```"
+  attr {
+    name: "Tnumsegments"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
 }
 op {
   name: "SparseSlice"
   input_arg {
     name: "indices"
-    description: "2-D tensor represents the indices of the sparse tensor."
     type: DT_INT64
   }
   input_arg {
     name: "values"
-    description: "1-D tensor represents the values of the sparse tensor."
     type_attr: "T"
   }
   input_arg {
     name: "shape"
-    description: "1-D. tensor represents the shape of the sparse tensor."
     type: DT_INT64
   }
   input_arg {
     name: "start"
-    description: "1-D. tensor represents the start of the slice."
     type: DT_INT64
   }
   input_arg {
     name: "size"
-    description: "1-D. tensor represents the size of the slice.\noutput indices: A list of 1-D tensors represents the indices of the output\nsparse tensors."
     type: DT_INT64
   }
   output_arg {
@@ -29043,41 +26662,33 @@ op {
   }
   output_arg {
     name: "output_values"
-    description: "A list of 1-D tensors represents the values of the output sparse\ntensors."
     type_attr: "T"
   }
   output_arg {
     name: "output_shape"
-    description: "A list of 1-D tensors represents the shape of the output sparse\ntensors."
     type: DT_INT64
   }
   attr {
     name: "T"
     type: "type"
   }
-  summary: "Slice a `SparseTensor` based on the `start` and `size`."
-  description: "For example, if the input is\n\n    input_tensor = shape = [2, 7]\n    [    a   d e  ]\n    [b c          ]\n\nGraphically the output tensors are:\n\n    sparse_slice([0, 0], [2, 4]) = shape = [2, 4]\n    [    a  ]\n    [b c    ]\n\n    sparse_slice([0, 4], [2, 3]) = shape = [2, 3]\n    [ d e  ]\n    [      ]"
 }
 op {
   name: "SparseSoftmax"
   input_arg {
     name: "sp_indices"
-    description: "2-D.  `NNZ x R` matrix with the indices of non-empty values in a\nSparseTensor, in canonical ordering."
     type: DT_INT64
   }
   input_arg {
     name: "sp_values"
-    description: "1-D.  `NNZ` non-empty values corresponding to `sp_indices`."
     type_attr: "T"
   }
   input_arg {
     name: "sp_shape"
-    description: "1-D.  Shape of the input SparseTensor."
     type: DT_INT64
   }
   output_arg {
     name: "output"
-    description: "1-D.  The `NNZ` values for the result `SparseTensor`."
     type_attr: "T"
   }
   attr {
@@ -29090,29 +26701,23 @@ op {
       }
     }
   }
-  summary: "Applies softmax to a batched N-D `SparseTensor`."
-  description: "The inputs represent an N-D SparseTensor  with logical shape `[..., B, C]`\n(where `N >= 2`), and with indices sorted in the canonical lexicographic order.\n\nThis op is equivalent to applying the normal `tf.nn.softmax()` to each innermost\nlogical submatrix with shape `[B, C]`, but with the catch that *the implicitly\nzero elements do not participate*.  Specifically, the algorithm is equivalent\nto the following:\n\n  (1) Applies `tf.nn.softmax()` to a densified view of each innermost submatrix\n      with shape `[B, C]`, along the size-C dimension;\n  (2) Masks out the original implicitly-zero locations;\n  (3) Renormalizes the remaining elements.\n\nHence, the `SparseTensor` result has exactly the same non-zero indices and\nshape."
 }
 op {
   name: "SparseSoftmaxCrossEntropyWithLogits"
   input_arg {
     name: "features"
-    description: "batch_size x num_classes matrix"
     type_attr: "T"
   }
   input_arg {
     name: "labels"
-    description: "batch_size vector with values in [0, num_classes).\nThis is the label for the given minibatch entry."
     type_attr: "Tlabels"
   }
   output_arg {
     name: "loss"
-    description: "Per example loss (batch_size vector)."
     type_attr: "T"
   }
   output_arg {
     name: "backprop"
-    description: "backpropagated gradients (batch_size x num_classes matrix)."
     type_attr: "T"
   }
   attr {
@@ -29121,6 +26726,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -29139,49 +26745,39 @@ op {
       }
     }
   }
-  summary: "Computes softmax cross entropy cost and gradients to backpropagate."
-  description: "Unlike `SoftmaxCrossEntropyWithLogits`, this operation does not accept\na matrix of label probabilities, but rather a single label per row\nof features.  This label is considered to have probability 1.0 for the\ngiven row.\n\nInputs are the logits, not probabilities."
 }
 op {
   name: "SparseSparseMaximum"
   input_arg {
     name: "a_indices"
-    description: "2-D.  `N x R` matrix with the indices of non-empty values in a\nSparseTensor, in the canonical lexicographic ordering."
     type: DT_INT64
   }
   input_arg {
     name: "a_values"
-    description: "1-D.  `N` non-empty values corresponding to `a_indices`."
     type_attr: "T"
   }
   input_arg {
     name: "a_shape"
-    description: "1-D.  Shape of the input SparseTensor."
     type: DT_INT64
   }
   input_arg {
     name: "b_indices"
-    description: "counterpart to `a_indices` for the other operand."
     type: DT_INT64
   }
   input_arg {
     name: "b_values"
-    description: "counterpart to `a_values` for the other operand; must be of the same dtype."
     type_attr: "T"
   }
   input_arg {
     name: "b_shape"
-    description: "counterpart to `a_shape` for the other operand; the two shapes must be equal."
     type: DT_INT64
   }
   output_arg {
     name: "output_indices"
-    description: "2-D.  The indices of the output SparseTensor."
     type: DT_INT64
   }
   output_arg {
     name: "output_values"
-    description: "1-D.  The values of the output SparseTensor."
     type_attr: "T"
   }
   attr {
@@ -29192,10 +26788,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -29203,49 +26800,39 @@ op {
       }
     }
   }
-  summary: "Returns the element-wise max of two SparseTensors."
-  description: "Assumes the two SparseTensors have the same shape, i.e., no broadcasting."
 }
 op {
   name: "SparseSparseMinimum"
   input_arg {
     name: "a_indices"
-    description: "2-D.  `N x R` matrix with the indices of non-empty values in a\nSparseTensor, in the canonical lexicographic ordering."
     type: DT_INT64
   }
   input_arg {
     name: "a_values"
-    description: "1-D.  `N` non-empty values corresponding to `a_indices`."
     type_attr: "T"
   }
   input_arg {
     name: "a_shape"
-    description: "1-D.  Shape of the input SparseTensor."
     type: DT_INT64
   }
   input_arg {
     name: "b_indices"
-    description: "counterpart to `a_indices` for the other operand."
     type: DT_INT64
   }
   input_arg {
     name: "b_values"
-    description: "counterpart to `a_values` for the other operand; must be of the same dtype."
     type_attr: "T"
   }
   input_arg {
     name: "b_shape"
-    description: "counterpart to `a_shape` for the other operand; the two shapes must be equal."
     type: DT_INT64
   }
   output_arg {
     name: "output_indices"
-    description: "2-D.  The indices of the output SparseTensor."
     type: DT_INT64
   }
   output_arg {
     name: "output_values"
-    description: "1-D.  The values of the output SparseTensor."
     type_attr: "T"
   }
   attr {
@@ -29255,46 +26842,41 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
       }
     }
   }
-  summary: "Returns the element-wise min of two SparseTensors."
-  description: "Assumes the two SparseTensors have the same shape, i.e., no broadcasting."
 }
 op {
   name: "SparseSplit"
   input_arg {
     name: "split_dim"
-    description: "0-D.  The dimension along which to split.  Must be in the range\n`[0, rank(shape))`."
     type: DT_INT64
   }
   input_arg {
     name: "indices"
-    description: "2-D tensor represents the indices of the sparse tensor."
     type: DT_INT64
   }
   input_arg {
     name: "values"
-    description: "1-D tensor represents the values of the sparse tensor."
     type_attr: "T"
   }
   input_arg {
     name: "shape"
-    description: "1-D. tensor represents the shape of the sparse tensor.\noutput indices: A list of 1-D tensors represents the indices of the output\nsparse tensors."
     type: DT_INT64
   }
   output_arg {
@@ -29304,20 +26886,17 @@ op {
   }
   output_arg {
     name: "output_values"
-    description: "A list of 1-D tensors represents the values of the output sparse\ntensors."
     type_attr: "T"
     number_attr: "num_split"
   }
   output_arg {
     name: "output_shape"
-    description: "A list of 1-D tensors represents the shape of the output sparse\ntensors."
     type: DT_INT64
     number_attr: "num_split"
   }
   attr {
     name: "num_split"
     type: "int"
-    description: "The number of ways to split."
     has_minimum: true
     minimum: 1
   }
@@ -29325,29 +26904,23 @@ op {
     name: "T"
     type: "type"
   }
-  summary: "Split a `SparseTensor` into `num_split` tensors along one dimension."
-  description: "If the `shape[split_dim]` is not an integer multiple of `num_split`. Slices\n`[0 : shape[split_dim] % num_split]` gets one extra dimension.\nFor example, if `split_dim = 1` and `num_split = 2` and the input is\n\n    input_tensor = shape = [2, 7]\n    [    a   d e  ]\n    [b c          ]\n\nGraphically the output tensors are:\n\n    output_tensor[0] = shape = [2, 4]\n    [    a  ]\n    [b c    ]\n\n    output_tensor[1] = shape = [2, 3]\n    [ d e  ]\n    [      ]"
 }
 op {
   name: "SparseTensorDenseAdd"
   input_arg {
     name: "a_indices"
-    description: "2-D.  The `indices` of the `SparseTensor`, with shape `[nnz, ndims]`."
     type_attr: "Tindices"
   }
   input_arg {
     name: "a_values"
-    description: "1-D.  The `values` of the `SparseTensor`, with shape `[nnz]`."
     type_attr: "T"
   }
   input_arg {
     name: "a_shape"
-    description: "1-D.  The `shape` of the `SparseTensor`, with shape `[ndims]`."
     type_attr: "Tindices"
   }
   input_arg {
     name: "b"
-    description: "`ndims`-D Tensor.  With shape `a_shape`."
     type_attr: "T"
   }
   output_arg {
@@ -29361,17 +26934,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -29388,29 +26962,23 @@ op {
       }
     }
   }
-  summary: "Adds up a `SparseTensor` and a dense `Tensor`, producing a dense `Tensor`."
-  description: "This Op does not require `a_indices` be sorted in standard lexicographic order."
 }
 op {
   name: "SparseTensorDenseMatMul"
   input_arg {
     name: "a_indices"
-    description: "2-D.  The `indices` of the `SparseTensor`, size `[nnz, 2]` Matrix."
     type_attr: "Tindices"
   }
   input_arg {
     name: "a_values"
-    description: "1-D.  The `values` of the `SparseTensor`, size `[nnz]` Vector."
     type_attr: "T"
   }
   input_arg {
     name: "a_shape"
-    description: "1-D.  The `shape` of the `SparseTensor`, size `[2]` Vector."
     type: DT_INT64
   }
   input_arg {
     name: "b"
-    description: "2-D.  A dense Matrix."
     type_attr: "T"
   }
   output_arg {
@@ -29440,7 +27008,6 @@ op {
     default_value {
       b: false
     }
-    description: "Use the adjoint of A in the matrix multiply.  If A is complex, this\nis transpose(conj(A)).  Otherwise it\'s transpose(A)."
   }
   attr {
     name: "adjoint_b"
@@ -29448,10 +27015,7 @@ op {
     default_value {
       b: false
     }
-    description: "Use the adjoint of B in the matrix multiply.  If B is complex, this\nis transpose(conj(B)).  Otherwise it\'s transpose(B)."
   }
-  summary: "Multiply SparseTensor (of rank 2) \"A\" by dense matrix \"B\"."
-  description: "No validity checking is performed on the indices of A.  However, the following\ninput format is recommended for optimal behavior:\n\nif adjoint_a == false:\n  A should be sorted in lexicographically increasing order.  Use SparseReorder\n  if you\'re not sure.\nif adjoint_a == true:\n  A should be sorted in order of increasing dimension 1 (i.e., \"column major\"\n  order instead of \"row major\" order)."
 }
 op {
   name: "SparseTensorSliceDataset"
@@ -29475,34 +27039,28 @@ op {
     name: "Tvalues"
     type: "type"
   }
-  summary: "Creates a dataset that splits a SparseTensor into elements row-wise."
   is_stateful: true
 }
 op {
   name: "SparseToDense"
   input_arg {
     name: "sparse_indices"
-    description: "0-D, 1-D, or 2-D.  `sparse_indices[i]` contains the complete\nindex where `sparse_values[i]` will be placed."
     type_attr: "Tindices"
   }
   input_arg {
     name: "output_shape"
-    description: "1-D.  Shape of the dense output tensor."
     type_attr: "Tindices"
   }
   input_arg {
     name: "sparse_values"
-    description: "1-D.  Values corresponding to each row of `sparse_indices`,\nor a scalar value to be used for all sparse indices."
     type_attr: "T"
   }
   input_arg {
     name: "default_value"
-    description: "Scalar value to set for indices not specified in\n`sparse_indices`."
     type_attr: "T"
   }
   output_arg {
     name: "dense"
-    description: "Dense output tensor of shape `output_shape`."
     type_attr: "T"
   }
   attr {
@@ -29511,7 +27069,6 @@ op {
     default_value {
       b: true
     }
-    description: "If true, indices are checked to make sure they are sorted in\nlexicographic order and that there are no repeats."
   }
   attr {
     name: "T"
@@ -29527,54 +27084,43 @@ op {
       }
     }
   }
-  summary: "Converts a sparse representation into a dense tensor."
-  description: "Builds an array `dense` with shape `output_shape` such that\n\n```\n# If sparse_indices is scalar\ndense[i] = (i == sparse_indices ? sparse_values : default_value)\n\n# If sparse_indices is a vector, then for each i\ndense[sparse_indices[i]] = sparse_values[i]\n\n# If sparse_indices is an n by d matrix, then for each i in [0, n)\ndense[sparse_indices[i][0], ..., sparse_indices[i][d-1]] = sparse_values[i]\n```\n\nAll other values in `dense` are set to `default_value`.  If `sparse_values` is a\nscalar, all sparse indices are set to this single value.\n\nIndices should be sorted in lexicographic order, and indices must not\ncontain any repeats. If `validate_indices` is true, these properties\nare checked during execution."
 }
 op {
   name: "SparseToSparseSetOperation"
   input_arg {
     name: "set1_indices"
-    description: "2D `Tensor`, indices of a `SparseTensor`. Must be in row-major\norder."
     type: DT_INT64
   }
   input_arg {
     name: "set1_values"
-    description: "1D `Tensor`, values of a `SparseTensor`. Must be in row-major\norder."
     type_attr: "T"
   }
   input_arg {
     name: "set1_shape"
-    description: "1D `Tensor`, shape of a `SparseTensor`. `set1_shape[0...n-1]` must\nbe the same as `set2_shape[0...n-1]`, `set1_shape[n]` is the\nmax set size across `0...n-1` dimensions."
     type: DT_INT64
   }
   input_arg {
     name: "set2_indices"
-    description: "2D `Tensor`, indices of a `SparseTensor`. Must be in row-major\norder."
     type: DT_INT64
   }
   input_arg {
     name: "set2_values"
-    description: "1D `Tensor`, values of a `SparseTensor`. Must be in row-major\norder."
     type_attr: "T"
   }
   input_arg {
     name: "set2_shape"
-    description: "1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must\nbe the same as `set1_shape[0...n-1]`, `set2_shape[n]` is the\nmax set size across `0...n-1` dimensions."
     type: DT_INT64
   }
   output_arg {
     name: "result_indices"
-    description: "2D indices of a `SparseTensor`."
     type: DT_INT64
   }
   output_arg {
     name: "result_values"
-    description: "1D values of a `SparseTensor`."
     type_attr: "T"
   }
   output_arg {
     name: "result_shape"
-    description: "1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is\nthe same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`\nis the max result set size across all `0...n-1` dimensions."
     type: DT_INT64
   }
   attr {
@@ -29603,31 +27149,25 @@ op {
       }
     }
   }
-  summary: "Applies set operation along last dimension of 2 `SparseTensor` inputs."
-  description: "See SetOperationOp::SetOperationFromContext for values of `set_operation`.\n\nIf `validate_indices` is `True`, `SparseToSparseSetOperation` validates the\norder and range of `set1` and `set2` indices.\n\nInput `set1` is a `SparseTensor` represented by `set1_indices`, `set1_values`,\nand `set1_shape`. For `set1` ranked `n`, 1st `n-1` dimensions must be the same\nas `set2`. Dimension `n` contains values in a set, duplicates are allowed but\nignored.\n\nInput `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`,\nand `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same\nas `set1`. Dimension `n` contains values in a set, duplicates are allowed but\nignored.\n\nIf `validate_indices` is `True`, this op validates the order and range of `set1`\nand `set2` indices.\n\nOutput `result` is a `SparseTensor` represented by `result_indices`,\n`result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this\nhas rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`\ndimension contains the result of `set_operation` applied to the corresponding\n`[0...n-1]` dimension of `set`."
 }
 op {
   name: "Split"
   input_arg {
     name: "split_dim"
-    description: "0-D.  The dimension along which to split.  Must be in the range\n`[-rank(value), rank(value))`."
     type: DT_INT32
   }
   input_arg {
     name: "value"
-    description: "The tensor to split."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "They are identically shaped tensors, whose shape matches that of `value`\nexcept along `split_dim`, where their sizes are\n`values.shape[split_dim] / num_split`."
     type_attr: "T"
     number_attr: "num_split"
   }
   attr {
     name: "num_split"
     type: "int"
-    description: "The number of ways to split.  Must evenly divide\n`value.shape[split_dim]`."
     has_minimum: true
     minimum: 1
   }
@@ -29635,28 +27175,23 @@ op {
     name: "T"
     type: "type"
   }
-  summary: "Splits a tensor into `num_split` tensors along one dimension."
 }
 op {
   name: "SplitV"
   input_arg {
     name: "value"
-    description: "The tensor to split."
     type_attr: "T"
   }
   input_arg {
     name: "size_splits"
-    description: "list containing the sizes of each output tensor along the split\ndimension. Must sum to the dimension of value along split_dim.\nCan contain one -1 indicating that dimension is to be inferred."
     type_attr: "Tlen"
   }
   input_arg {
     name: "split_dim"
-    description: "0-D.  The dimension along which to split.  Must be in the range\n`[-rank(value), rank(value))`."
     type: DT_INT32
   }
   output_arg {
     name: "output"
-    description: "Tensors whose shape matches that of `value`\nexcept along `split_dim`, where their sizes are\n`size_splits[i]`."
     type_attr: "T"
     number_attr: "num_split"
   }
@@ -29683,23 +27218,19 @@ op {
       }
     }
   }
-  summary: "Splits a tensor into `num_split` tensors along one dimension."
 }
 op {
   name: "SqlDataset"
   input_arg {
     name: "driver_name"
-    description: "The database type. Currently, the only supported type is \'sqlite\'."
     type: DT_STRING
   }
   input_arg {
     name: "data_source_name"
-    description: "A connection string to connect to the database."
     type: DT_STRING
   }
   input_arg {
     name: "query"
-    description: "A SQL query to execute."
     type: DT_STRING
   }
   output_arg {
@@ -29718,7 +27249,6 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Creates a dataset that executes a SQL query and emits rows of the result set."
   is_stateful: true
 }
 op {
@@ -29737,6 +27267,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -29744,8 +27275,6 @@ op {
       }
     }
   }
-  summary: "Computes square root of x element-wise."
-  description: "I.e., \\\\(y = \\sqrt{x} = x^{1/2}\\\\)."
 }
 op {
   name: "SqrtGrad"
@@ -29767,6 +27296,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -29774,8 +27304,6 @@ op {
       }
     }
   }
-  summary: "Computes the gradient for the sqrt of `x` wrt its input."
-  description: "Specifically, `grad = dy * 0.5 / y`, where `y = sqrt(x)`, and `dy`\nis the corresponding input gradient."
 }
 op {
   name: "Square"
@@ -29793,6 +27321,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -29802,8 +27331,6 @@ op {
       }
     }
   }
-  summary: "Computes square of x element-wise."
-  description: "I.e., \\\\(y = x * x = x^2\\\\)."
 }
 op {
   name: "SquaredDifference"
@@ -29825,6 +27352,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -29834,20 +27362,16 @@ op {
       }
     }
   }
-  summary: "Returns (x - y)(x - y) element-wise."
-  description: "*NOTE*: `SquaredDifference` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
   is_commutative: true
 }
 op {
   name: "Squeeze"
   input_arg {
     name: "input"
-    description: "The `input` to squeeze."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "Contains the same data as `input`, but has one or more dimensions of\nsize 1 removed."
     type_attr: "T"
   }
   attr {
@@ -29861,11 +27385,8 @@ op {
       list {
       }
     }
-    description: "If specified, only squeezes the dimensions listed. The dimension\nindex starts at 0. It is an error to squeeze a dimension that is not 1. Must\nbe in the range `[-rank(input), rank(input))`."
     has_minimum: true
   }
-  summary: "Removes dimensions of size 1 from the shape of a tensor."
-  description: "Given a tensor `input`, this operation returns a tensor of the same type with\nall dimensions of size 1 removed. If you don\'t want to remove all size 1\ndimensions, you can remove specific size 1 dimensions by specifying\n`squeeze_dims`.\n\nFor example:\n\n```\n# \'t\' is a tensor of shape [1, 2, 1, 3, 1, 1]\nshape(squeeze(t)) ==> [2, 3]\n```\n\nOr, to remove specific size 1 dimensions:\n\n```\n# \'t\' is a tensor of shape [1, 2, 1, 3, 1, 1]\nshape(squeeze(t, [2, 4])) ==> [1, 2, 3, 1]\n```"
 }
 op {
   name: "Stack"
@@ -29885,7 +27406,6 @@ op {
       s: ""
     }
   }
-  summary: "Deprecated, use StackV2."
   is_stateful: true
 }
 op {
@@ -29895,16 +27415,13 @@ op {
     type: DT_STRING
     is_ref: true
   }
-  summary: "Deprecated, use StackCloseV2."
 }
 op {
   name: "StackCloseV2"
   input_arg {
     name: "handle"
-    description: "The handle to a stack."
     type: DT_RESOURCE
   }
-  summary: "Delete the stack from its resource container."
   is_stateful: true
 }
 op {
@@ -29922,26 +27439,21 @@ op {
     name: "elem_type"
     type: "type"
   }
-  summary: "Deprecated, use StackPopV2."
 }
 op {
   name: "StackPopV2"
   input_arg {
     name: "handle"
-    description: "The handle to a stack."
     type: DT_RESOURCE
   }
   output_arg {
     name: "elem"
-    description: "The tensor that is popped from the top of the stack."
     type_attr: "elem_type"
   }
   attr {
     name: "elem_type"
     type: "type"
-    description: "The type of the elem that is popped."
   }
-  summary: "Pop the element at the top of the stack."
   is_stateful: true
 }
 op {
@@ -29970,23 +27482,19 @@ op {
       b: false
     }
   }
-  summary: "Deprecated, use StackPushV2."
 }
 op {
   name: "StackPushV2"
   input_arg {
     name: "handle"
-    description: "The handle to a stack."
     type: DT_RESOURCE
   }
   input_arg {
     name: "elem"
-    description: "The tensor to be pushed onto the stack."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "The same tensor as the input \'elem\'."
     type_attr: "T"
   }
   attr {
@@ -29999,27 +27507,22 @@ op {
     default_value {
       b: false
     }
-    description: "Swap `elem` to CPU. Default to false."
   }
-  summary: "Push an element onto the stack."
   is_stateful: true
 }
 op {
   name: "StackV2"
   input_arg {
     name: "max_size"
-    description: "The maximum size of the stack if non-negative. If negative, the stack\nsize is unlimited."
     type: DT_INT32
   }
   output_arg {
     name: "handle"
-    description: "The handle to the stack."
     type: DT_RESOURCE
   }
   attr {
     name: "elem_type"
     type: "type"
-    description: "The type of the elements on the stack."
   }
   attr {
     name: "stack_name"
@@ -30027,16 +27530,13 @@ op {
     default_value {
       s: ""
     }
-    description: "Overrides the name used for the temporary stack resource. Default\nvalue is the name of the \'Stack\' op (which is guaranteed unique)."
   }
-  summary: "A stack that produces elements in first-in last-out order."
   is_stateful: true
 }
 op {
   name: "Stage"
   input_arg {
     name: "values"
-    description: "a list of tensors\ndtypes A list of data types that inserted values should adhere to."
     type_list_attr: "dtypes"
   }
   attr {
@@ -30045,7 +27545,6 @@ op {
     default_value {
       i: 0
     }
-    description: "Maximum number of elements in the Staging Area. If > 0, inserts\non the container will block when the capacity is reached."
     has_minimum: true
   }
   attr {
@@ -30054,7 +27553,6 @@ op {
     default_value {
       i: 0
     }
-    description: "The maximum number of bytes allowed for Tensors in the Staging Area.\nIf > 0, inserts will block until sufficient space is available."
     has_minimum: true
   }
   attr {
@@ -30069,7 +27567,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this queue is placed in the given container. Otherwise,\na default container is used."
   }
   attr {
     name: "shared_name"
@@ -30077,10 +27574,7 @@ op {
     default_value {
       s: ""
     }
-    description: "It is necessary to match this name to the matching Unstage Op."
   }
-  summary: "Stage values similar to a lightweight Enqueue."
-  description: "The basic functionality of this Op is similar to a queue with many\nfewer capabilities and options.  This Op is optimized for performance."
   is_stateful: true
 }
 op {
@@ -30119,7 +27613,6 @@ op {
       s: ""
     }
   }
-  summary: "Op removes all elements in the underlying container."
   is_stateful: true
 }
 op {
@@ -30168,8 +27661,6 @@ op {
       s: ""
     }
   }
-  summary: "Op peeks at the values at the specified index.  If the"
-  description: "underlying container does not contain sufficient elements\nthis op will block until it does.   This Op is optimized for\nperformance."
   is_stateful: true
 }
 op {
@@ -30212,24 +27703,20 @@ op {
       s: ""
     }
   }
-  summary: "Op returns the number of elements in the underlying container."
   is_stateful: true
 }
 op {
   name: "StatelessRandomNormal"
   input_arg {
     name: "shape"
-    description: "The shape of the output tensor."
     type_attr: "T"
   }
   input_arg {
     name: "seed"
-    description: "2 seeds (shape [2])."
-    type: DT_INT64
+    type_attr: "Tseed"
   }
   output_arg {
     name: "output"
-    description: "Random values with specified shape."
     type_attr: "dtype"
   }
   attr {
@@ -30238,7 +27725,6 @@ op {
     default_value {
       type: DT_FLOAT
     }
-    description: "The type of the output."
     allowed_values {
       list {
         type: DT_HALF
@@ -30260,24 +27746,32 @@ op {
       }
     }
   }
-  summary: "Outputs deterministic pseudorandom values from a normal distribution."
-  description: "The generated values will have mean 0 and standard deviation 1.\n\nThe outputs are a deterministic function of `shape` and `seed`."
+  attr {
+    name: "Tseed"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
 }
 op {
   name: "StatelessRandomUniform"
   input_arg {
     name: "shape"
-    description: "The shape of the output tensor."
     type_attr: "T"
   }
   input_arg {
     name: "seed"
-    description: "2 seeds (shape [2])."
-    type: DT_INT64
+    type_attr: "Tseed"
   }
   output_arg {
     name: "output"
-    description: "Random values with specified shape."
     type_attr: "dtype"
   }
   attr {
@@ -30286,7 +27780,6 @@ op {
     default_value {
       type: DT_FLOAT
     }
-    description: "The type of the output."
     allowed_values {
       list {
         type: DT_HALF
@@ -30308,24 +27801,32 @@ op {
       }
     }
   }
-  summary: "Outputs deterministic pseudorandom random values from a uniform distribution."
-  description: "The generated values follow a uniform distribution in the range `[0, 1)`. The\nlower bound 0 is included in the range, while the upper bound 1 is excluded.\n\nThe outputs are a deterministic function of `shape` and `seed`."
+  attr {
+    name: "Tseed"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
 }
 op {
   name: "StatelessTruncatedNormal"
   input_arg {
     name: "shape"
-    description: "The shape of the output tensor."
     type_attr: "T"
   }
   input_arg {
     name: "seed"
-    description: "2 seeds (shape [2])."
-    type: DT_INT64
+    type_attr: "Tseed"
   }
   output_arg {
     name: "output"
-    description: "Random values with specified shape."
     type_attr: "dtype"
   }
   attr {
@@ -30334,7 +27835,6 @@ op {
     default_value {
       type: DT_FLOAT
     }
-    description: "The type of the output."
     allowed_values {
       list {
         type: DT_HALF
@@ -30356,8 +27856,19 @@ op {
       }
     }
   }
-  summary: "Outputs deterministic pseudorandom values from a truncated normal distribution."
-  description: "The generated values follow a normal distribution with mean 0 and standard\ndeviation 1, except that values whose magnitude is more than 2 standard\ndeviations from the mean are dropped and re-picked.\n\nThe outputs are a deterministic function of `shape` and `seed`."
+  attr {
+    name: "Tseed"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
 }
 op {
   name: "StatsAggregatorHandle"
@@ -30379,7 +27890,6 @@ op {
       s: ""
     }
   }
-  summary: "Creates a statistics manager resource."
   is_stateful: true
 }
 op {
@@ -30392,7 +27902,6 @@ op {
     name: "summary"
     type: DT_STRING
   }
-  summary: "Produces a summary of any statistics recorded by the given statistics manager."
   is_stateful: true
 }
 op {
@@ -30409,8 +27918,6 @@ op {
     name: "T"
     type: "type"
   }
-  summary: "Stops gradient computation."
-  description: "When executed in a graph, this op outputs its input tensor as-is.\n\nWhen building ops to compute gradients, this op prevents the contribution of\nits inputs to be taken into account.  Normally, the gradient generator adds ops\nto a graph to compute the derivatives of a specified \'loss\' by recursively\nfinding out inputs that contributed to its computation.  If you insert this op\nin the graph it inputs are masked from the gradient generator.  They are not\ntaken into account for computing gradients.\n\nThis is useful any time you want to compute a value with TensorFlow but need\nto pretend that the value was a constant. Some examples include:\n\n*  The *EM* algorithm where the *M-step* should not involve backpropagation\n   through the output of the *E-step*.\n*  Contrastive divergence training of Boltzmann machines where, when\n   differentiating the energy function, the training must not backpropagate\n   through the graph that generated the samples from the model.\n*  Adversarial training, where no backprop should happen through the adversarial\n   example generation process."
 }
 op {
   name: "StridedSlice"
@@ -30420,17 +27927,14 @@ op {
   }
   input_arg {
     name: "begin"
-    description: "`begin[k]` specifies the offset into the `k`th range specification.\nThe exact dimension this corresponds to will be determined by context.\nOut-of-bounds values will be silently clamped. If the `k`th bit of\n`begin_mask` then `begin[k]` is ignored and the full range of the\nappropriate dimension is used instead. Negative values causes indexing\nto start from the highest element e.g. If `foo==[1,2,3]` then `foo[-1]==3`."
     type_attr: "Index"
   }
   input_arg {
     name: "end"
-    description: "`end[i]` is like `begin` with the exception that `end_mask` is\nused to determine full ranges."
     type_attr: "Index"
   }
   input_arg {
     name: "strides"
-    description: "`strides[i]` specifies the increment in the `i`th specification\nafter extracting a given element. Negative indices will reverse\nthe original order. Out or range values are\nclamped to `[0,dim[i]) if slice[i]>0` or `[-1,dim[i]-1] if slice[i] < 0`"
     type_attr: "Index"
   }
   output_arg {
@@ -30457,7 +27961,6 @@ op {
     default_value {
       i: 0
     }
-    description: "a bitmask where a bit i being 1 means to ignore the begin\nvalue and instead use the largest interval possible. At runtime\nbegin[i] will be replaced with `[0, n-1) if `stride[i] > 0` or\n`[-1, n-1]` if `stride[i] < 0`"
   }
   attr {
     name: "end_mask"
@@ -30465,7 +27968,6 @@ op {
     default_value {
       i: 0
     }
-    description: "analogous to `begin_mask`"
   }
   attr {
     name: "ellipsis_mask"
@@ -30473,7 +27975,6 @@ op {
     default_value {
       i: 0
     }
-    description: "a bitmask where bit `i` being 1 means the `i`th\nposition is actually an ellipsis. One bit at most can be 1.\nIf `ellipsis_mask == 0`, then an implicit ellipsis mask of `1 << (m+1)`\nis provided. This means that `foo[3:5] == foo[3:5, ...]`. An ellipsis\nimplicitly creates as many range specifications as necessary to fully\nspecify the sliced range for every dimension. For example for a 4-dimensional\ntensor `foo` the slice `foo[2, ..., 5:8]` implies `foo[2, :, :, 5:8]`."
   }
   attr {
     name: "new_axis_mask"
@@ -30481,7 +27982,6 @@ op {
     default_value {
       i: 0
     }
-    description: "a bitmask where bit `i` being 1 means the `i`th\nspecification creates a new shape 1 dimension. For example\n`foo[:4, tf.newaxis, :2]` would produce a shape `(4, 1, 2)` tensor."
   }
   attr {
     name: "shrink_axis_mask"
@@ -30489,10 +27989,7 @@ op {
     default_value {
       i: 0
     }
-    description: "a bitmask where bit `i` implies that the `i`th\nspecification should shrink the dimensionality. begin and end\nmust imply a slice of size 1 in the dimension. For example in\npython one might do `foo[:, 3, :]` which would result in\n`shrink_axis_mask` being 2."
   }
-  summary: "Return a strided slice from `input`."
-  description: "Note, most python users will want to use the Python `Tensor.__getitem__`\nor `Variable.__getitem__` rather than this op directly.\n\nThe goal of this op is to produce a new tensor with a subset of\nthe elements from the `n` dimensional `input` tensor. The subset is chosen using\na sequence of `m` sparse range specifications encoded into the arguments\nof this function. Note, in some cases\n`m` could be equal to `n`, but this need not be the case. Each\nrange specification entry can be one of the following:\n\n- An ellipsis (...). Ellipses are used to imply zero or more\n  dimensions of full-dimension selection and are produced using\n  `ellipsis_mask`. For example, `foo[...]` is the identity slice.\n\n- A new axis. This is used to insert a new shape=1 dimension and is\n  produced using `new_axis_mask`. For example, `foo[:, ...]` where\n  `foo` is shape `(3, 4)` produces a `(1, 3, 4)` tensor.\n\n\n- A range `begin:end:stride`. This is used to specify how much to choose from\n  a given dimension. `stride` can be any integer but 0.  `begin` is an integer\n  which represents the index of the first value to select while `end` represents\n  the index of the last value to select. The number of values selected in each\n  dimension is `end - begin` if `stride > 0` and `begin - end` if `stride < 0`.\n  `begin` and `end` can be negative where `-1` is the last element, `-2` is\n  the second to last. `begin_mask` controls whether to replace the explicitly\n  given `begin` with an implicit effective value of `0` if `stride > 0` and\n  `-1` if `stride < 0`. `end_mask` is analogous but produces the number\n  required to create the largest open interval. For example, given a shape\n  `(3,)` tensor `foo[:]`, the effective `begin` and `end` are `0` and `3`. Do\n  not assume this is equivalent to `foo[0:-1]` which has an effective `begin`\n  and `end` of `0` and `2`. Another example is `foo[-2::-1]` which reverses the\n  first dimension of a tensor while dropping the last two (in the original\n  order elements). For example `foo = [1,2,3,4]; foo[-2::-1]` is `[4,3]`.\n\n- A single index. This is used to keep only elements that have a given\n  index. For example (`foo[2, :]` on a shape `(5,6)` tensor produces a\n  shape `(6,)` tensor. This is encoded in `begin` and `end` and\n  `shrink_axis_mask`.\n\nEach conceptual range specification is encoded in the op\'s argument. This\nencoding is best understand by considering a non-trivial example. In\nparticular,\n`foo[1, 2:4, None, ..., :-3:-1, :]` will be encoded as\n\n```\nbegin = [1, 2, x, x, 0, x] # x denotes don\'t care (usually 0)\nend = [2, 4, x, x, -3, x]\nstrides = [1, 1, x, x, -1, 1]\nbegin_mask = 1<<4 | 1 << 5 = 48\nend_mask = 1<<5 = 32\nellipsis_mask = 1<<3 = 8\nnew_axis_mask = 1<<2 4\nshrink_axis_mask = 1<<0\n```\n\nIn this case if `foo.shape` is (5, 5, 5, 5, 5, 5) the final shape of\nthe slice becomes (2, 1, 5, 5, 2, 5).\nLet us walk step by step through each argument specification.\n\n1.  The first argument in the example slice is turned into `begin = 1` and\n`end = begin + 1 = 2`. To disambiguate from the original spec `2:4` we\nalso set the appropriate bit in `shrink_axis_mask`.\n\n2. `2:4` is contributes 2, 4, 1 to begin, end, and stride. All masks have\nzero bits contributed.\n\n3. None is a synonym for `tf.newaxis`. This means insert a dimension of size 1\ndimension in the final shape. Dummy values are contributed to begin,\nend and stride, while the new_axis_mask bit is set.\n\n4. `...` grab the full ranges from as many dimensions as needed to\nfully specify a slice for every dimension of the input shape.\n\n5. `:-3:-1` shows the use of negative indices. A negative index `i` associated\nwith a dimension that has shape `s` is converted to a positive index\n`s + i`. So `-1` becomes `s-1` (i.e. the last element). This conversion\nis done internally so begin, end and strides receive x, -3, and -1.\nThe appropriate begin_mask bit is set to indicate the start range is the\nfull range (ignoring the x).\n\n6. `:` indicates that the entire contents of the corresponding dimension\nis selected. This is equivalent to `::` or `0::1`. begin, end, and strides\nreceive 0, 0, and 1, respectively. The appropriate bits in `begin_mask` and\n`end_mask` are also set.\n\n*Requirements*:\n  `0 != strides[i] for i in [0, m)`\n  `ellipsis_mask must be a power of two (only one ellipsis)`"
 }
 op {
   name: "StridedSliceAssign"
@@ -30571,8 +28068,6 @@ op {
       i: 0
     }
   }
-  summary: "Assign `value` to the sliced l-value reference of `ref`."
-  description: "The values of `value` are assigned to the positions in the variable\n`ref` that are selected by the slice parameters. The slice parameters\n`begin, `end`, `strides`, etc. work exactly as in `StridedSlice`.\n\nNOTE this op currently does not support broadcasting and so `value`\'s\nshape must be exactly the shape produced by the slice of `ref`."
 }
 op {
   name: "StridedSliceGrad"
@@ -30649,14 +28144,11 @@ op {
       i: 0
     }
   }
-  summary: "Returns the gradient of `StridedSlice`."
-  description: "Since `StridedSlice` cuts out pieces of its `input` which is size\n`shape`, its gradient will have the same shape (which is passed here\nas `shape`). The gradient will be zero in any element that the slice\ndoes not select.\n\nArguments are the same as StridedSliceGrad with the exception that\n`dy` is the input gradient to be propagated and `shape` is the\nshape of `StridedSlice`\'s `input`."
 }
 op {
   name: "StringJoin"
   input_arg {
     name: "inputs"
-    description: "A list of string tensors.  The tensors must all have the same shape,\nor be scalars.  Scalars may be mixed in; these will be broadcast to the shape\nof non-scalar inputs."
     type: DT_STRING
     number_attr: "N"
   }
@@ -30676,36 +28168,28 @@ op {
     default_value {
       s: ""
     }
-    description: "string, an optional join separator."
   }
-  summary: "Joins the strings in the given list of string tensors into one tensor;"
-  description: "with the given separator (default is an empty separator)."
 }
 op {
   name: "StringSplit"
   input_arg {
     name: "input"
-    description: "1-D. Strings to split."
     type: DT_STRING
   }
   input_arg {
     name: "delimiter"
-    description: "0-D. Delimiter characters (bytes), or empty string."
     type: DT_STRING
   }
   output_arg {
     name: "indices"
-    description: "A dense matrix of int64 representing the indices of the sparse tensor."
     type: DT_INT64
   }
   output_arg {
     name: "values"
-    description: "A vector of strings corresponding to the splited values."
     type: DT_STRING
   }
   output_arg {
     name: "shape"
-    description: "a length-2 vector of int64 representing the shape of the sparse\ntensor, where the first value is N and the second value is the maximum number\nof tokens in a single input entry."
     type: DT_INT64
   }
   attr {
@@ -30714,10 +28198,7 @@ op {
     default_value {
       b: true
     }
-    description: "A `bool`. If `True`, skip the empty strings from the result."
   }
-  summary: "Split elements of `input` based on `delimiter` into a `SparseTensor`."
-  description: "Let N be the size of source (typically N will be the batch size). Split each\nelement of `input` based on `delimiter` and return a `SparseTensor`\ncontaining the splitted tokens. Empty tokens are ignored.\n\n`delimiter` can be empty, or a string of split characters. If `delimiter` is an\n empty string, each element of `input` is split into individual single-byte\n character strings, including splitting of UTF-8 multibyte sequences. Otherwise\n every character of `delimiter` is a potential split point.\n\nFor example:\n  N = 2, input[0] is \'hello world\' and input[1] is \'a b c\', then the output\n  will be\n\n  indices = [0, 0;\n             0, 1;\n             1, 0;\n             1, 1;\n             1, 2]\n  shape = [2, 3]\n  values = [\'hello\', \'world\', \'a\', \'b\', \'c\']"
 }
 op {
   name: "StringToHashBucket"
@@ -30727,67 +28208,52 @@ op {
   }
   output_arg {
     name: "output"
-    description: "A Tensor of the same shape as the input `string_tensor`."
     type: DT_INT64
   }
   attr {
     name: "num_buckets"
     type: "int"
-    description: "The number of buckets."
     has_minimum: true
     minimum: 1
   }
-  summary: "Converts each string in the input Tensor to its hash mod by a number of buckets."
-  description: "The hash function is deterministic on the content of the string within the\nprocess.\n\nNote that the hash function may change from time to time.\nThis functionality will be deprecated and it\'s recommended to use\n`tf.string_to_hash_bucket_fast()` or `tf.string_to_hash_bucket_strong()`."
 }
 op {
   name: "StringToHashBucketFast"
   input_arg {
     name: "input"
-    description: "The strings to assign a hash bucket."
     type: DT_STRING
   }
   output_arg {
     name: "output"
-    description: "A Tensor of the same shape as the input `string_tensor`."
     type: DT_INT64
   }
   attr {
     name: "num_buckets"
     type: "int"
-    description: "The number of buckets."
     has_minimum: true
     minimum: 1
   }
-  summary: "Converts each string in the input Tensor to its hash mod by a number of buckets."
-  description: "The hash function is deterministic on the content of the string within the\nprocess and will never change. However, it is not suitable for cryptography.\nThis function may be used when CPU time is scarce and inputs are trusted or\nunimportant. There is a risk of adversaries constructing inputs that all hash\nto the same bucket. To prevent this problem, use a strong hash function with\n`tf.string_to_hash_bucket_strong`."
 }
 op {
   name: "StringToHashBucketStrong"
   input_arg {
     name: "input"
-    description: "The strings to assign a hash bucket."
     type: DT_STRING
   }
   output_arg {
     name: "output"
-    description: "A Tensor of the same shape as the input `string_tensor`."
     type: DT_INT64
   }
   attr {
     name: "num_buckets"
     type: "int"
-    description: "The number of buckets."
     has_minimum: true
     minimum: 1
   }
   attr {
     name: "key"
     type: "list(int)"
-    description: "The key for the keyed hash function passed as a list of two uint64\nelements."
   }
-  summary: "Converts each string in the input Tensor to its hash mod by a number of buckets."
-  description: "The hash function is deterministic on the content of the string within the\nprocess. The hash function is a keyed hash function, where attribute `key`\ndefines the key of the hash function. `key` is an array of 2 elements.\n\nA strong hash is important when inputs may be malicious, e.g. URLs with\nadditional components. Adversaries could try to make their inputs hash to the\nsame bucket for a denial-of-service attack or to skew the results. A strong\nhash prevents this by making it difficult, if not infeasible, to compute inputs\nthat hash to the same bucket. This comes at a cost of roughly 4x higher compute\ntime than `tf.string_to_hash_bucket_fast`."
 }
 op {
   name: "StringToNumber"
@@ -30797,7 +28263,6 @@ op {
   }
   output_arg {
     name: "output"
-    description: "A Tensor of the same shape as the input `string_tensor`."
     type_attr: "out_type"
   }
   attr {
@@ -30806,7 +28271,6 @@ op {
     default_value {
       type: DT_FLOAT
     }
-    description: "The numeric type to interpret each string in `string_tensor` as."
     allowed_values {
       list {
         type: DT_FLOAT
@@ -30816,8 +28280,6 @@ op {
       }
     }
   }
-  summary: "Converts each string in the input Tensor to the specified numeric type."
-  description: "(Note that int32 overflow results in an error while float overflow\nresults in a rounded value.)"
 }
 op {
   name: "Sub"
@@ -30839,6 +28301,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_UINT8
@@ -30852,29 +28315,23 @@ op {
       }
     }
   }
-  summary: "Returns x - y element-wise."
-  description: "*NOTE*: `Sub` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
 }
 op {
   name: "Substr"
   input_arg {
     name: "input"
-    description: "Tensor of strings"
     type: DT_STRING
   }
   input_arg {
     name: "pos"
-    description: "Scalar defining the position of first character in each substring"
     type_attr: "T"
   }
   input_arg {
     name: "len"
-    description: "Scalar defining the number of characters to include in each substring"
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "Tensor of substrings"
     type: DT_STRING
   }
   attr {
@@ -30887,24 +28344,19 @@ op {
       }
     }
   }
-  summary: "Return substrings from `Tensor` of strings."
-  description: "For each string in the input `Tensor`, creates a substring starting at index\n`pos` with a total length of `len`.\n\nIf `len` defines a substring that would extend beyond the length of the input\nstring, then as many characters as possible are used.\n\nIf `pos` is negative or specifies a character index larger than any of the input\nstrings, then an `InvalidArgumentError` is thrown.\n\n`pos` and `len` must have the same shape, otherwise a `ValueError` is thrown on\nOp creation.\n\n*NOTE*: `Substr` supports broadcasting up to two dimensions. More about\nbroadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)\n\n---\n\nExamples\n\nUsing scalar `pos` and `len`:\n\n```python\ninput = [b\'Hello\', b\'World\']\nposition = 1\nlength = 3\n\noutput = [b\'ell\', b\'orl\']\n```\n\nUsing `pos` and `len` with same shape as `input`:\n\n```python\ninput = [[b\'ten\', b\'eleven\', b\'twelve\'],\n         [b\'thirteen\', b\'fourteen\', b\'fifteen\'],\n         [b\'sixteen\', b\'seventeen\', b\'eighteen\']]\nposition = [[1, 2, 3],\n            [1, 2, 3],\n            [1, 2, 3]]\nlength =   [[2, 3, 4],\n            [4, 3, 2],\n            [5, 5, 5]]\n\noutput = [[b\'en\', b\'eve\', b\'lve\'],\n          [b\'hirt\', b\'urt\', b\'te\'],\n          [b\'ixtee\', b\'vente\', b\'hteen\']]\n```\n\nBroadcasting `pos` and `len` onto `input`:\n\n```\ninput = [[b\'ten\', b\'eleven\', b\'twelve\'],\n         [b\'thirteen\', b\'fourteen\', b\'fifteen\'],\n         [b\'sixteen\', b\'seventeen\', b\'eighteen\'],\n         [b\'nineteen\', b\'twenty\', b\'twentyone\']]\nposition = [1, 2, 3]\nlength =   [1, 2, 3]\n\noutput = [[b\'e\', b\'ev\', b\'lve\'],\n          [b\'h\', b\'ur\', b\'tee\'],\n          [b\'i\', b\'ve\', b\'hte\'],\n          [b\'i\', b\'en\', b\'nty\']]\n```\n\nBroadcasting `input` onto `pos` and `len`:\n\n```\ninput = b\'thirteen\'\nposition = [1, 5, 7]\nlength =   [3, 2, 1]\n\noutput = [b\'hir\', b\'ee\', b\'n\']\n```"
 }
 op {
   name: "Sum"
   input_arg {
     name: "input"
-    description: "The tensor to reduce."
     type_attr: "T"
   }
   input_arg {
     name: "reduction_indices"
-    description: "The dimensions to reduce. Must be in the range\n`[-rank(input), rank(input))`."
     type_attr: "Tidx"
   }
   output_arg {
     name: "output"
-    description: "The reduced tensor."
     type_attr: "T"
   }
   attr {
@@ -30913,7 +28365,6 @@ op {
     default_value {
       b: false
     }
-    description: "If true, retain reduced dimensions with length 1."
   }
   attr {
     name: "T"
@@ -30922,17 +28373,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -30952,29 +28404,23 @@ op {
       }
     }
   }
-  summary: "Computes the sum of elements across dimensions of a tensor."
-  description: "Reduces `input` along the dimensions given in `reduction_indices`. Unless\n`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in\n`reduction_indices`. If `keep_dims` is true, the reduced dimensions are\nretained with length 1."
 }
 op {
   name: "Svd"
   input_arg {
     name: "input"
-    description: "A tensor of shape `[..., M, N]` whose inner-most 2 dimensions\nform matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`."
     type_attr: "T"
   }
   output_arg {
     name: "s"
-    description: "Singular values. Shape is `[..., P]`."
     type_attr: "T"
   }
   output_arg {
     name: "u"
-    description: "Left singular vectors. If `full_matrices` is `False` then shape is\n`[..., M, P]`; if `full_matrices` is `True` then shape is\n`[..., M, M]`. Undefined if `compute_uv` is `False`."
     type_attr: "T"
   }
   output_arg {
     name: "v"
-    description: "Left singular vectors. If `full_matrices` is `False` then shape is\n`[..., N, P]`. If `full_matrices` is `True` then shape is `[..., N, N]`.\nUndefined if `compute_uv` is false."
     type_attr: "T"
   }
   attr {
@@ -30983,7 +28429,6 @@ op {
     default_value {
       b: true
     }
-    description: "If true, left and right singular vectors will be\ncomputed and returned in `u` and `v`, respectively.\nIf false, `u` and `v` are not set and should never referenced."
   }
   attr {
     name: "full_matrices"
@@ -30991,7 +28436,6 @@ op {
     default_value {
       b: false
     }
-    description: "If true, compute full-sized `u` and `v`. If false\n(the default), compute only the leading `P` singular vectors.\nIgnored if `compute_uv` is `False`."
   }
   attr {
     name: "T"
@@ -31005,100 +28449,81 @@ op {
       }
     }
   }
-  summary: "Computes the singular value decompositions of one or more matrices."
-  description: "Computes the SVD of each inner matrix in `input` such that\n`input[..., :, :] = u[..., :, :] * diag(s[..., :, :]) * transpose(v[..., :, :])`\n\n```python\n# a is a tensor containing a batch of matrices.\n# s is a tensor of singular values for each matrix.\n# u is the tensor containing of left singular vectors for each matrix.\n# v is the tensor containing of right singular vectors for each matrix.\ns, u, v = svd(a)\ns, _, _ = svd(a, compute_uv=False)\n```"
 }
 op {
   name: "Switch"
   input_arg {
     name: "data"
-    description: "The tensor to be forwarded to the appropriate output."
     type_attr: "T"
   }
   input_arg {
     name: "pred"
-    description: "A scalar that specifies which output port will receive data."
     type: DT_BOOL
   }
   output_arg {
     name: "output_false"
-    description: "If `pred` is false, data will be forwarded to this output."
     type_attr: "T"
   }
   output_arg {
     name: "output_true"
-    description: "If `pred` is true, data will be forwarded to this output."
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
   }
-  summary: "Forwards `data` to the output port determined by `pred`."
-  description: "If `pred` is true, the `data` input is forwarded to `output_true`. Otherwise,\nthe data goes to `output_false`.\n\nSee also `RefSwitch` and `Merge`."
 }
 op {
   name: "SymbolicGradient"
   input_arg {
     name: "input"
-    description: "a list of input tensors of size N + M;"
     type_list_attr: "Tin"
   }
   output_arg {
     name: "output"
-    description: "a list of output tensors of size N;"
     type_list_attr: "Tout"
   }
   attr {
     name: "Tin"
     type: "list(type)"
-    description: "the type list for the input list."
     has_minimum: true
     minimum: 1
   }
   attr {
     name: "Tout"
     type: "list(type)"
-    description: "the type list for the input list."
     has_minimum: true
     minimum: 1
   }
   attr {
     name: "f"
     type: "func"
-    description: "The function we want to compute the gradient for.\n\nThe function \'f\' must be a numerical function which takes N inputs and\nproduces M outputs. Its gradient function \'g\', which is computed by\nthis SymbolicGradient op is a function taking N + M inputs and\nproduces N outputs.\n\nI.e. if we have\n   (y1, y2, ..., y_M) = f(x1, x2, ..., x_N),\nthen, g is\n   (dL/dx1, dL/dx2, ..., dL/dx_N) = g(x1, x2, ..., x_N,\n                                     dL/dy1, dL/dy2, ..., dL/dy_M),\n\nwhere L is a scalar-value function of (x1, x2, ..., xN) (e.g., the\nloss function). dL/dx_i is the partial derivative of L with respect\nto x_i.\n\n(Needs some math expert to say the comment above better.)"
   }
-  summary: "Computes the gradient function for function f via backpropagation."
 }
 op {
   name: "TFRecordDataset"
   input_arg {
     name: "filenames"
-    description: "A scalar or vector containing the name(s) of the file(s) to be\nread."
     type: DT_STRING
   }
   input_arg {
     name: "compression_type"
-    description: "A scalar containing either (i) the empty string (no\ncompression), (ii) \"ZLIB\", or (iii) \"GZIP\"."
     type: DT_STRING
   }
   input_arg {
     name: "buffer_size"
-    description: "A scalar representing the number of bytes to buffer. A value of\n0 means no buffering will be performed."
     type: DT_INT64
   }
   output_arg {
     name: "handle"
     type: DT_VARIANT
   }
-  summary: "Creates a dataset that emits the records from one or more TFRecord files."
   is_stateful: true
 }
 op {
   name: "TFRecordReader"
   output_arg {
     name: "reader_handle"
-    description: "The handle to reference the Reader."
     type: DT_STRING
     is_ref: true
   }
@@ -31108,7 +28533,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this reader is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -31116,7 +28540,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this reader is named in the given bucket\nwith this shared_name. Otherwise, the node name is used instead."
   }
   attr {
     name: "compression_type"
@@ -31125,14 +28548,12 @@ op {
       s: ""
     }
   }
-  summary: "A Reader that outputs the records from a TensorFlow Records file."
   is_stateful: true
 }
 op {
   name: "TFRecordReaderV2"
   output_arg {
     name: "reader_handle"
-    description: "The handle to reference the Reader."
     type: DT_RESOURCE
   }
   attr {
@@ -31141,7 +28562,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this reader is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -31149,7 +28569,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this reader is named in the given bucket\nwith this shared_name. Otherwise, the node name is used instead."
   }
   attr {
     name: "compression_type"
@@ -31158,7 +28577,6 @@ op {
       s: ""
     }
   }
-  summary: "A Reader that outputs the records from a TensorFlow Records file."
   is_stateful: true
 }
 op {
@@ -31169,7 +28587,6 @@ op {
   }
   input_arg {
     name: "count"
-    description: "A scalar representing the number of elements from the `input_dataset`\nthat should be taken. A value of `-1` indicates that all of `input_dataset`\nis taken."
     type: DT_INT64
   }
   output_arg {
@@ -31188,34 +28605,28 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Creates a dataset that contains `count` elements from the `input_dataset`."
 }
 op {
   name: "TakeManySparseFromTensorsMap"
   input_arg {
     name: "sparse_handles"
-    description: "1-D, The `N` serialized `SparseTensor` objects.\nShape: `[N]`."
     type: DT_INT64
   }
   output_arg {
     name: "sparse_indices"
-    description: "2-D.  The `indices` of the minibatch `SparseTensor`."
     type: DT_INT64
   }
   output_arg {
     name: "sparse_values"
-    description: "1-D.  The `values` of the minibatch `SparseTensor`."
     type_attr: "dtype"
   }
   output_arg {
     name: "sparse_shape"
-    description: "1-D.  The `shape` of the minibatch `SparseTensor`."
     type: DT_INT64
   }
   attr {
     name: "dtype"
     type: "type"
-    description: "The `dtype` of the `SparseTensor` objects stored in the\n`SparseTensorsMap`."
   }
   attr {
     name: "container"
@@ -31223,7 +28634,6 @@ op {
     default_value {
       s: ""
     }
-    description: "The container name for the `SparseTensorsMap` read by this op."
   }
   attr {
     name: "shared_name"
@@ -31231,10 +28641,7 @@ op {
     default_value {
       s: ""
     }
-    description: "The shared name for the `SparseTensorsMap` read by this op.\nIt should not be blank; rather the `shared_name` or unique Operation name\nof the Op that created the original `SparseTensorsMap` should be used."
   }
-  summary: "Read `SparseTensors` from a `SparseTensorsMap` and concatenate them."
-  description: "The input `sparse_handles` must be an `int64` matrix of shape `[N, 1]` where\n`N` is the minibatch size and the rows correspond to the output handles of\n`AddSparseToTensorsMap` or `AddManySparseToTensorsMap`.  The ranks of the\noriginal `SparseTensor` objects that went into the given input ops must all\nmatch.  When the final `SparseTensor` is created, it has rank one\nhigher than the ranks of the incoming `SparseTensor` objects\n(they have been concatenated along a new row dimension on the left).\n\nThe output `SparseTensor` object\'s shape values for all dimensions but the\nfirst are the max across the input `SparseTensor` objects\' shape values\nfor the corresponding dimensions.  Its first shape value is `N`, the minibatch\nsize.\n\nThe input `SparseTensor` objects\' indices are assumed ordered in\nstandard lexicographic order.  If this is not the case, after this\nstep run `SparseReorder` to restore index ordering.\n\nFor example, if the handles represent an input, which is a `[2, 3]` matrix\nrepresenting two original `SparseTensor` objects:\n\n```\n    index = [ 0]\n            [10]\n            [20]\n    values = [1, 2, 3]\n    shape = [50]\n```\n\nand\n\n```\n    index = [ 2]\n            [10]\n    values = [4, 5]\n    shape = [30]\n```\n\nthen the final `SparseTensor` will be:\n\n```\n    index = [0  0]\n            [0 10]\n            [0 20]\n            [1  2]\n            [1 10]\n    values = [1, 2, 3, 4, 5]\n    shape = [2 50]\n```"
   is_stateful: true
 }
 op {
@@ -31253,6 +28660,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
@@ -31262,7 +28670,6 @@ op {
       }
     }
   }
-  summary: "Computes tan of x element-wise."
 }
 op {
   name: "Tanh"
@@ -31280,6 +28687,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -31287,7 +28695,6 @@ op {
       }
     }
   }
-  summary: "Computes hyperbolic tangent of `x` element-wise."
 }
 op {
   name: "TanhGrad"
@@ -31309,6 +28716,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_COMPLEX64
@@ -31316,26 +28724,21 @@ op {
       }
     }
   }
-  summary: "Computes the gradient for the tanh of `x` wrt its input."
-  description: "Specifically, `grad = dy * (1 - y*y)`, where `y = tanh(x)`, and `dy`\nis the corresponding input gradient."
 }
 op {
   name: "TemporaryVariable"
   output_arg {
     name: "ref"
-    description: "A reference to the variable tensor."
     type_attr: "dtype"
     is_ref: true
   }
   attr {
     name: "shape"
     type: "shape"
-    description: "The shape of the variable tensor."
   }
   attr {
     name: "dtype"
     type: "type"
-    description: "The type of elements in the variable tensor."
   }
   attr {
     name: "var_name"
@@ -31343,10 +28746,7 @@ op {
     default_value {
       s: ""
     }
-    description: "Overrides the name used for the temporary variable resource. Default\nvalue is the name of the \'TemporaryVariable\' op (which is guaranteed unique)."
   }
-  summary: "Returns a tensor that may be mutated, but only persists within a single step."
-  description: "This is an experimental op for internal use only and it is possible to use this\nop in unsafe ways.  DO NOT USE unless you fully understand the risks.\n\nIt is the caller\'s responsibility to ensure that \'ref\' is eventually passed to a\nmatching \'DestroyTemporaryVariable\' op after all other uses have completed.\n\nOutputs a ref to the tensor state so it may be read or modified.\n\n  E.g.\n      var = state_ops._temporary_variable([1, 2], types.float_)\n      var_name = var.op.name\n      var = state_ops.assign(var, [[4.0, 5.0]])\n      var = state_ops.assign_add(var, [[6.0, 7.0]])\n      final = state_ops._destroy_temporary_variable(var, var_name=var_name)"
   is_stateful: true
 }
 op {
@@ -31418,17 +28818,13 @@ op {
     name: "handle"
     type: DT_STRING
   }
-  summary: "Deprecated. Use TensorArrayCloseV3"
 }
 op {
   name: "TensorArrayCloseV3"
   input_arg {
     name: "handle"
-    description: "The handle to a TensorArray (output of TensorArray or TensorArrayGrad)."
     type: DT_RESOURCE
   }
-  summary: "Delete the TensorArray from its resource container."
-  description: "This enables the user to close and release the resource in the middle\nof a step/run."
   is_stateful: true
 }
 op {
@@ -31499,34 +28895,28 @@ op {
       }
     }
   }
-  summary: "Deprecated. Use TensorArrayConcatV3"
 }
 op {
   name: "TensorArrayConcatV3"
   input_arg {
     name: "handle"
-    description: "The handle to a TensorArray."
     type: DT_RESOURCE
   }
   input_arg {
     name: "flow_in"
-    description: "A float scalar that enforces proper chaining of operations."
     type: DT_FLOAT
   }
   output_arg {
     name: "value"
-    description: "All of the elements in the TensorArray, concatenated along the first\naxis."
     type_attr: "dtype"
   }
   output_arg {
     name: "lengths"
-    description: "A vector of the row sizes of the original T elements in the\nvalue output.  In the example above, this would be the values:\n`(n1, n2, ..., n(T-1))`."
     type: DT_INT64
   }
   attr {
     name: "dtype"
     type: "type"
-    description: "The type of the elem that is returned."
   }
   attr {
     name: "element_shape_except0"
@@ -31536,10 +28926,7 @@ op {
         unknown_rank: true
       }
     }
-    description: "The expected shape of an element, if known,\nexcluding the first dimension. Used to validate the shapes of\nTensorArray elements. If this shape is not fully specified, concatenating\nzero-size TensorArrays is an error."
   }
-  summary: "Concat the elements from the TensorArray into value `value`."
-  description: "Takes `T` elements of shapes\n\n  ```\n  (n0 x d0 x d1 x ...), (n1 x d0 x d1 x ...), ..., (n(T-1) x d0 x d1 x ...)\n  ```\n\nand concatenates them into a Tensor of shape:\n\n  ```(n0 + n1 + ... + n(T-1) x d0 x d1 x ...)```\n\nAll elements must have the same shape (excepting the first dimension)."
   is_stateful: true
 }
 op {
@@ -31610,34 +28997,28 @@ op {
       }
     }
   }
-  summary: "Deprecated. Use TensorArrayGatherV3"
 }
 op {
   name: "TensorArrayGatherV3"
   input_arg {
     name: "handle"
-    description: "The handle to a TensorArray."
     type: DT_RESOURCE
   }
   input_arg {
     name: "indices"
-    description: "The locations in the TensorArray from which to read tensor elements."
     type: DT_INT32
   }
   input_arg {
     name: "flow_in"
-    description: "A float scalar that enforces proper chaining of operations."
     type: DT_FLOAT
   }
   output_arg {
     name: "value"
-    description: "All of the elements in the TensorArray, concatenated along a new\naxis (the new dimension 0)."
     type_attr: "dtype"
   }
   attr {
     name: "dtype"
     type: "type"
-    description: "The type of the elem that is returned."
   }
   attr {
     name: "element_shape"
@@ -31647,10 +29028,7 @@ op {
         unknown_rank: true
       }
     }
-    description: "The expected shape of an element, if known. Used to\nvalidate the shapes of TensorArray elements. If this shape is not\nfully specified, gathering zero-size TensorArrays is an error."
   }
-  summary: "Gather specific elements from the TensorArray into output `value`."
-  description: "All elements selected by `indices` must have the same shape."
   is_stateful: true
 }
 op {
@@ -31696,19 +29074,16 @@ op {
     name: "source"
     type: "string"
   }
-  summary: "Deprecated. Use TensorArrayGradV3"
   is_stateful: true
 }
 op {
   name: "TensorArrayGradV3"
   input_arg {
     name: "handle"
-    description: "The handle to the forward TensorArray."
     type: DT_RESOURCE
   }
   input_arg {
     name: "flow_in"
-    description: "A float scalar that enforces proper chaining of operations."
     type: DT_FLOAT
   }
   output_arg {
@@ -31722,10 +29097,7 @@ op {
   attr {
     name: "source"
     type: "string"
-    description: "The gradient source string, used to decide which gradient TensorArray\nto return."
   }
-  summary: "Creates a TensorArray for storing the gradients of values in the given handle."
-  description: "If the given TensorArray gradient already exists, returns a reference to it.\n\nLocks the size of the original TensorArray by disabling its dynamic size flag.\n\n**A note about the input flow_in:**\n\nThe handle flow_in forces the execution of the gradient lookup to occur\nonly after certain other operations have occurred.  For example, when\nthe forward TensorArray is dynamically sized, writes to this TensorArray\nmay resize the object.  The gradient TensorArray is statically sized based\non the size of the forward TensorArray when this operation executes.\nFurthermore, the size of the forward TensorArray is frozen by this call.\nAs a result, the flow is used to ensure that the call to generate the gradient\nTensorArray only happens after all writes are executed.\n\nIn the case of dynamically sized TensorArrays, gradient computation should\nonly be performed on read operations that have themselves been chained via\nflow to occur only after all writes have executed. That way the final size\nof the forward TensorArray is known when this operation is called.\n\n**A note about the source attribute:**\n\nTensorArray gradient calls use an accumulator TensorArray object.  If\nmultiple gradients are calculated and run in the same session, the multiple\ngradient nodes may accidentally flow through the same accumulator TensorArray.\nThis double counts and generally breaks the TensorArray gradient flow.\n\nThe solution is to identify which gradient call this particular\nTensorArray gradient is being called in.  This is performed by identifying\na unique string (e.g. \"gradients\", \"gradients_1\", ...) from the input\ngradient Tensor\'s name.  This string is used as a suffix when creating\nthe TensorArray gradient object here (the attribute `source`).\n\nThe attribute `source` is added as a suffix to the forward TensorArray\'s\nname when performing the creation / lookup, so that each separate gradient\ncalculation gets its own TensorArray accumulator."
   is_stateful: true
 }
 op {
@@ -31811,13 +29183,11 @@ op {
     name: "dtype"
     type: "type"
   }
-  summary: "Deprecated. Use TensorArrayReadV3"
 }
 op {
   name: "TensorArrayReadV3"
   input_arg {
     name: "handle"
-    description: "The handle to a TensorArray."
     type: DT_RESOURCE
   }
   input_arg {
@@ -31826,20 +29196,16 @@ op {
   }
   input_arg {
     name: "flow_in"
-    description: "A float scalar that enforces proper chaining of operations."
     type: DT_FLOAT
   }
   output_arg {
     name: "value"
-    description: "The tensor that is read from the TensorArray."
     type_attr: "dtype"
   }
   attr {
     name: "dtype"
     type: "type"
-    description: "The type of the elem that is returned."
   }
-  summary: "Read an element from the TensorArray into output `value`."
   is_stateful: true
 }
 op {
@@ -31900,41 +29266,33 @@ op {
     name: "T"
     type: "type"
   }
-  summary: "Deprecated. Use TensorArrayScatterV3"
 }
 op {
   name: "TensorArrayScatterV3"
   input_arg {
     name: "handle"
-    description: "The handle to a TensorArray."
     type: DT_RESOURCE
   }
   input_arg {
     name: "indices"
-    description: "The locations at which to write the tensor elements."
     type: DT_INT32
   }
   input_arg {
     name: "value"
-    description: "The concatenated tensor to write to the TensorArray."
     type_attr: "T"
   }
   input_arg {
     name: "flow_in"
-    description: "A float scalar that enforces proper chaining of operations."
     type: DT_FLOAT
   }
   output_arg {
     name: "flow_out"
-    description: "A float scalar that enforces proper chaining of operations."
     type: DT_FLOAT
   }
   attr {
     name: "T"
     type: "type"
   }
-  summary: "Scatter the data from the input value into specific TensorArray elements."
-  description: "`indices` must be a vector, its length must match the first dim of `value`."
   is_stateful: true
 }
 op {
@@ -31971,26 +29329,21 @@ op {
     name: "size"
     type: DT_INT32
   }
-  summary: "Deprecated. Use TensorArraySizeV3"
 }
 op {
   name: "TensorArraySizeV3"
   input_arg {
     name: "handle"
-    description: "The handle to a TensorArray (output of TensorArray or TensorArrayGrad)."
     type: DT_RESOURCE
   }
   input_arg {
     name: "flow_in"
-    description: "A float scalar that enforces proper chaining of operations."
     type: DT_FLOAT
   }
   output_arg {
     name: "size"
-    description: "The current size of the TensorArray."
     type: DT_INT32
   }
-  summary: "Get the current size of the TensorArray."
   is_stateful: true
 }
 op {
@@ -32051,41 +29404,33 @@ op {
     name: "T"
     type: "type"
   }
-  summary: "Deprecated. Use TensorArraySplitV3"
 }
 op {
   name: "TensorArraySplitV3"
   input_arg {
     name: "handle"
-    description: "The handle to a TensorArray."
     type: DT_RESOURCE
   }
   input_arg {
     name: "value"
-    description: "The concatenated tensor to write to the TensorArray."
     type_attr: "T"
   }
   input_arg {
     name: "lengths"
-    description: "The vector of lengths, how to split the rows of value into the\nTensorArray."
     type: DT_INT64
   }
   input_arg {
     name: "flow_in"
-    description: "A float scalar that enforces proper chaining of operations."
     type: DT_FLOAT
   }
   output_arg {
     name: "flow_out"
-    description: "A float scalar that enforces proper chaining of operations."
     type: DT_FLOAT
   }
   attr {
     name: "T"
     type: "type"
   }
-  summary: "Split the data from the input value into TensorArray elements."
-  description: "Assuming that `lengths` takes on values\n\n  ```(n0, n1, ..., n(T-1))```\n\nand that `value` has shape\n\n  ```(n0 + n1 + ... + n(T-1) x d0 x d1 x ...)```,\n\nthis splits values into a TensorArray with T tensors.\n\nTensorArray index t will be the subtensor of values with starting position\n\n  ```(n0 + n1 + ... + n(t-1), 0, 0, ...)```\n\nand having size\n\n  ```nt x d0 x d1 x ...```"
   is_stateful: true
 }
 op {
@@ -32160,30 +29505,25 @@ op {
       s: ""
     }
   }
-  summary: "Deprecated. Use TensorArrayV3"
   is_stateful: true
 }
 op {
   name: "TensorArrayV3"
   input_arg {
     name: "size"
-    description: "The size of the array."
     type: DT_INT32
   }
   output_arg {
     name: "handle"
-    description: "The handle to the TensorArray."
     type: DT_RESOURCE
   }
   output_arg {
     name: "flow"
-    description: "A scalar used to control gradient flow."
     type: DT_FLOAT
   }
   attr {
     name: "dtype"
     type: "type"
-    description: "The type of the elements on the tensor_array."
   }
   attr {
     name: "element_shape"
@@ -32193,7 +29533,6 @@ op {
         unknown_rank: true
       }
     }
-    description: "The expected shape of an element, if known. Used to\nvalidate the shapes of TensorArray elements. If this shape is not\nfully specified, gathering zero-size TensorArrays is an error."
   }
   attr {
     name: "dynamic_size"
@@ -32201,7 +29540,6 @@ op {
     default_value {
       b: false
     }
-    description: "A boolean that determines whether writes to the TensorArray\nare allowed to grow the size.  By default, this is not allowed."
   }
   attr {
     name: "clear_after_read"
@@ -32209,7 +29547,6 @@ op {
     default_value {
       b: true
     }
-    description: "If true (default), Tensors in the TensorArray are cleared\nafter being read.  This disables multiple read semantics but allows early\nrelease of memory."
   }
   attr {
     name: "identical_element_shapes"
@@ -32217,7 +29554,6 @@ op {
     default_value {
       b: false
     }
-    description: "If true (default is false), then all\nelements in the TensorArray will be expected to have have identical shapes.\nThis allows certain behaviors, like dynamically checking for\nconsistent shapes on write, and being able to fill in properly\nshaped zero tensors on stack -- even if the element_shape attribute\nis not fully defined."
   }
   attr {
     name: "tensor_array_name"
@@ -32225,10 +29561,7 @@ op {
     default_value {
       s: ""
     }
-    description: "Overrides the name used for the temporary tensor_array\nresource. Default value is the name of the \'TensorArray\' op (which\nis guaranteed unique)."
   }
-  summary: "An array of Tensors of given size."
-  description: "Write data via Write and read via Read or Pack."
   is_stateful: true
 }
 op {
@@ -32289,40 +29622,33 @@ op {
     name: "T"
     type: "type"
   }
-  summary: "Deprecated. Use TensorArrayGradV3"
 }
 op {
   name: "TensorArrayWriteV3"
   input_arg {
     name: "handle"
-    description: "The handle to a TensorArray."
     type: DT_RESOURCE
   }
   input_arg {
     name: "index"
-    description: "The position to write to inside the TensorArray."
     type: DT_INT32
   }
   input_arg {
     name: "value"
-    description: "The tensor to write to the TensorArray."
     type_attr: "T"
   }
   input_arg {
     name: "flow_in"
-    description: "A float scalar that enforces proper chaining of operations."
     type: DT_FLOAT
   }
   output_arg {
     name: "flow_out"
-    description: "A float scalar that enforces proper chaining of operations."
     type: DT_FLOAT
   }
   attr {
     name: "T"
     type: "type"
   }
-  summary: "Push an element onto the tensor_array."
   is_stateful: true
 }
 op {
@@ -32347,9 +29673,108 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Creates a dataset that emits `components` as a tuple of tensors once."
   is_stateful: true
 }
+op {
+  name: "TensorListFromTensor"
+  input_arg {
+    name: "tensor"
+    type_attr: "element_dtype"
+  }
+  input_arg {
+    name: "element_shape"
+    type_attr: "shape_type"
+  }
+  output_arg {
+    name: "output_handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "element_dtype"
+    type: "type"
+  }
+  attr {
+    name: "shape_type"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "TensorListLength"
+  input_arg {
+    name: "input_handle"
+    type: DT_VARIANT
+  }
+  output_arg {
+    name: "length"
+    type: DT_INT32
+  }
+}
+op {
+  name: "TensorListPopBack"
+  input_arg {
+    name: "input_handle"
+    type: DT_VARIANT
+  }
+  output_arg {
+    name: "output_handle"
+    type: DT_VARIANT
+  }
+  output_arg {
+    name: "tensor"
+    type_attr: "element_dtype"
+  }
+  attr {
+    name: "element_dtype"
+    type: "type"
+  }
+}
+op {
+  name: "TensorListPushBack"
+  input_arg {
+    name: "input_handle"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "tensor"
+    type_attr: "element_dtype"
+  }
+  output_arg {
+    name: "output_handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "element_dtype"
+    type: "type"
+  }
+}
+op {
+  name: "TensorListStack"
+  input_arg {
+    name: "input_handle"
+    type: DT_VARIANT
+  }
+  output_arg {
+    name: "tensor"
+    type_attr: "element_dtype"
+  }
+  attr {
+    name: "element_dtype"
+    type: "type"
+  }
+  attr {
+    name: "num_elements"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+}
 op {
   name: "TensorSliceDataset"
   input_arg {
@@ -32372,14 +29797,12 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Creates a dataset that emits each dim-0 slice of `components` once."
   is_stateful: true
 }
 op {
   name: "TensorSummary"
   input_arg {
     name: "tensor"
-    description: "A tensor to serialize."
     type_attr: "T"
   }
   output_arg {
@@ -32396,7 +29819,6 @@ op {
     default_value {
       s: ""
     }
-    description: "A json-encoded SummaryDescription proto."
   }
   attr {
     name: "labels"
@@ -32405,7 +29827,6 @@ op {
       list {
       }
     }
-    description: "An unused list of strings."
   }
   attr {
     name: "display_name"
@@ -32413,26 +29834,20 @@ op {
     default_value {
       s: ""
     }
-    description: "An unused string."
   }
-  summary: "Outputs a `Summary` protocol buffer with a tensor."
-  description: "This op is being phased out in favor of TensorSummaryV2, which lets callers pass\na tag as well as a serialized SummaryMetadata proto string that contains\nplugin-specific data. We will keep this op to maintain backwards compatibility."
 }
 op {
   name: "TensorSummaryV2"
   input_arg {
     name: "tag"
-    description: "A string attached to this summary. Used for organization in TensorBoard."
     type: DT_STRING
   }
   input_arg {
     name: "tensor"
-    description: "A tensor to serialize."
     type_attr: "T"
   }
   input_arg {
     name: "serialized_summary_metadata"
-    description: "A serialized SummaryMetadata proto. Contains plugin\ndata."
     type: DT_STRING
   }
   output_arg {
@@ -32443,37 +29858,31 @@ op {
     name: "T"
     type: "type"
   }
-  summary: "Outputs a `Summary` protocol buffer with a tensor and per-plugin data."
 }
 op {
   name: "TextLineDataset"
   input_arg {
     name: "filenames"
-    description: "A scalar or a vector containing the name(s) of the file(s) to be\nread."
     type: DT_STRING
   }
   input_arg {
     name: "compression_type"
-    description: "A scalar containing either (i) the empty string (no\ncompression), (ii) \"ZLIB\", or (iii) \"GZIP\"."
     type: DT_STRING
   }
   input_arg {
     name: "buffer_size"
-    description: "A scalar containing the number of bytes to buffer."
     type: DT_INT64
   }
   output_arg {
     name: "handle"
     type: DT_VARIANT
   }
-  summary: "Creates a dataset that emits the lines of one or more text files."
   is_stateful: true
 }
 op {
   name: "TextLineReader"
   output_arg {
     name: "reader_handle"
-    description: "The handle to reference the Reader."
     type: DT_STRING
     is_ref: true
   }
@@ -32483,7 +29892,6 @@ op {
     default_value {
       i: 0
     }
-    description: "Number of lines to skip from the beginning of every file."
   }
   attr {
     name: "container"
@@ -32491,7 +29899,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this reader is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -32499,16 +29906,13 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this reader is named in the given bucket\nwith this shared_name. Otherwise, the node name is used instead."
   }
-  summary: "A Reader that outputs the lines of a file delimited by \'\\n\'."
   is_stateful: true
 }
 op {
   name: "TextLineReaderV2"
   output_arg {
     name: "reader_handle"
-    description: "The handle to reference the Reader."
     type: DT_RESOURCE
   }
   attr {
@@ -32517,7 +29921,6 @@ op {
     default_value {
       i: 0
     }
-    description: "Number of lines to skip from the beginning of every file."
   }
   attr {
     name: "container"
@@ -32525,7 +29928,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this reader is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -32533,56 +29935,46 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this reader is named in the given bucket\nwith this shared_name. Otherwise, the node name is used instead."
   }
-  summary: "A Reader that outputs the lines of a file delimited by \'\\n\'."
   is_stateful: true
 }
 op {
   name: "ThreadUnsafeUnigramCandidateSampler"
   input_arg {
     name: "true_classes"
-    description: "A batch_size * num_true matrix, in which each row contains the\nIDs of the num_true target_classes in the corresponding original label."
     type: DT_INT64
   }
   output_arg {
     name: "sampled_candidates"
-    description: "A vector of length num_sampled, in which each element is\nthe ID of a sampled candidate."
     type: DT_INT64
   }
   output_arg {
     name: "true_expected_count"
-    description: "A batch_size * num_true matrix, representing\nthe number of times each candidate is expected to occur in a batch\nof sampled candidates. If unique=true, then this is a probability."
     type: DT_FLOAT
   }
   output_arg {
     name: "sampled_expected_count"
-    description: "A vector of length num_sampled, for each sampled\ncandidate representing the number of times the candidate is expected\nto occur in a batch of sampled candidates.  If unique=true, then this is a\nprobability."
     type: DT_FLOAT
   }
   attr {
     name: "num_true"
     type: "int"
-    description: "Number of true labels per context."
     has_minimum: true
     minimum: 1
   }
   attr {
     name: "num_sampled"
     type: "int"
-    description: "Number of candidates to randomly sample."
     has_minimum: true
     minimum: 1
   }
   attr {
     name: "unique"
     type: "bool"
-    description: "If unique is true, we sample with rejection, so that all sampled\ncandidates in a batch are unique. This requires some approximation to\nestimate the post-rejection sampling probabilities."
   }
   attr {
     name: "range_max"
     type: "int"
-    description: "The sampler will sample integers from the interval [0, range_max)."
     has_minimum: true
     minimum: 1
   }
@@ -32592,7 +29984,6 @@ op {
     default_value {
       i: 0
     }
-    description: "If either seed or seed2 are set to be non-zero, the random number\ngenerator is seeded by the given seed.  Otherwise, it is seeded by a\nrandom seed."
   }
   attr {
     name: "seed2"
@@ -32600,22 +29991,17 @@ op {
     default_value {
       i: 0
     }
-    description: "An second seed to avoid seed collision."
   }
-  summary: "Generates labels for candidate sampling with a learned unigram distribution."
-  description: "See explanations of candidate sampling and the data formats at\ngo/candidate-sampling.\n\nFor each batch, this op picks a single set of sampled candidate labels.\n\nThe advantages of sampling candidates per-batch are simplicity and the\npossibility of efficient dense matrix multiplication. The disadvantage is that\nthe sampled candidates must be chosen independently of the context and of the\ntrue labels."
   is_stateful: true
 }
 op {
   name: "Tile"
   input_arg {
     name: "input"
-    description: "1-D or higher."
     type_attr: "T"
   }
   input_arg {
     name: "multiples"
-    description: "1-D. Length must be the same as the number of dimensions in `input`"
     type_attr: "Tmultiples"
   }
   output_arg {
@@ -32639,8 +30025,6 @@ op {
       }
     }
   }
-  summary: "Constructs a tensor by tiling a given tensor."
-  description: "This operation creates a new tensor by replicating `input` `multiples` times.\nThe output tensor\'s i\'th dimension has `input.dims(i) * multiples[i]` elements,\nand the values of `input` are replicated `multiples[i]` times along the \'i\'th\ndimension. For example, tiling `[a b c d]` by `[2]` produces\n`[a b c d a b c d]`."
 }
 op {
   name: "TileGrad"
@@ -32660,8 +30044,6 @@ op {
     name: "T"
     type: "type"
   }
-  summary: "Returns the gradient of `Tile`."
-  description: "Since `Tile` takes an input and repeats the input `multiples` times\nalong each dimension, `TileGrad` takes in `multiples` and aggregates\neach repeated tile of `input` into `output`."
   deprecation {
     version: 3
     explanation: "TileGrad has been replaced with reduce_sum"
@@ -32671,23 +30053,19 @@ op {
   name: "TopK"
   input_arg {
     name: "input"
-    description: "1-D or higher with last dimension at least `k`."
     type_attr: "T"
   }
   output_arg {
     name: "values"
-    description: "The `k` largest elements along each last dimensional slice."
     type_attr: "T"
   }
   output_arg {
     name: "indices"
-    description: "The indices of `values` within the last dimension of `input`."
     type: DT_INT32
   }
   attr {
     name: "k"
     type: "int"
-    description: "Number of top elements to look for along the last dimension (along each\nrow for matrices)."
     has_minimum: true
   }
   attr {
@@ -32696,7 +30074,6 @@ op {
     default_value {
       b: true
     }
-    description: "If true the resulting `k` elements will be sorted by the values in\ndescending order."
   }
   attr {
     name: "T"
@@ -32706,10 +30083,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -32717,8 +30095,6 @@ op {
       }
     }
   }
-  summary: "Finds values and indices of the `k` largest elements for the last dimension."
-  description: "If the input is a vector (rank-1), finds the `k` largest entries in the vector\nand outputs their values and indices as vectors.  Thus `values[j]` is the\n`j`-th largest entry in `input`, and its index is `indices[j]`.\n\nFor matrices (resp. higher rank input), computes the top `k` entries in each\nrow (resp. vector along the last dimension).  Thus,\n\n    values.shape = indices.shape = input.shape[:-1] + [k]\n\nIf two elements are equal, the lower-index element appears first.\n\nIf `k` varies dynamically, use `TopKV2` below."
   deprecation {
     version: 7
     explanation: "Use TopKV2 instead"
@@ -32728,22 +30104,18 @@ op {
   name: "TopKV2"
   input_arg {
     name: "input"
-    description: "1-D or higher with last dimension at least `k`."
     type_attr: "T"
   }
   input_arg {
     name: "k"
-    description: "0-D.  Number of top elements to look for along the last dimension (along each\nrow for matrices)."
     type: DT_INT32
   }
   output_arg {
     name: "values"
-    description: "The `k` largest elements along each last dimensional slice."
     type_attr: "T"
   }
   output_arg {
     name: "indices"
-    description: "The indices of `values` within the last dimension of `input`."
     type: DT_INT32
   }
   attr {
@@ -32752,7 +30124,6 @@ op {
     default_value {
       b: true
     }
-    description: "If true the resulting `k` elements will be sorted by the values in\ndescending order."
   }
   attr {
     name: "T"
@@ -32762,10 +30133,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -32773,8 +30145,6 @@ op {
       }
     }
   }
-  summary: "Finds values and indices of the `k` largest elements for the last dimension."
-  description: "If the input is a vector (rank-1), finds the `k` largest entries in the vector\nand outputs their values and indices as vectors.  Thus `values[j]` is the\n`j`-th largest entry in `input`, and its index is `indices[j]`.\n\nFor matrices (resp. higher rank input), computes the top `k` entries in each\nrow (resp. vector along the last dimension).  Thus,\n\n    values.shape = indices.shape = input.shape[:-1] + [k]\n\nIf two elements are equal, the lower-index element appears first."
 }
 op {
   name: "Transpose"
@@ -32807,8 +30177,6 @@ op {
       }
     }
   }
-  summary: "Shuffle dimensions of x according to a permutation."
-  description: "The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy:\n  `y.shape[i] == x.shape[perm[i]] for i in [0, 1, ..., rank(x) - 1]`"
 }
 op {
   name: "TruncateDiv"
@@ -32830,6 +30198,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_UINT8
@@ -32843,8 +30212,6 @@ op {
       }
     }
   }
-  summary: "Returns x / y element-wise for integer types."
-  description: "Truncation designates that negative numbers will round fractional quantities\ntoward zero. I.e. -7 / 5 = -1. This matches C semantics but it is different\nthan Python semantics. See `FloorDiv` for a division function that matches\nPython Semantics.\n\n*NOTE*: `TruncateDiv` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
 }
 op {
   name: "TruncateMod"
@@ -32867,24 +30234,21 @@ op {
       list {
         type: DT_INT32
         type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
     }
   }
-  summary: "Returns element-wise remainder of division. This emulates C semantics in that"
-  description: "the result here is consistent with a truncating divide. E.g. `truncate(x / y) *\ny + truncate_mod(x, y) = x`.\n\n*NOTE*: `TruncateMod` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)"
 }
 op {
   name: "TruncatedNormal"
   input_arg {
     name: "shape"
-    description: "The shape of the output tensor."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "A tensor of the specified shape filled with random truncated normal\nvalues."
     type_attr: "dtype"
   }
   attr {
@@ -32893,7 +30257,6 @@ op {
     default_value {
       i: 0
     }
-    description: "If either `seed` or `seed2` are set to be non-zero, the random number\ngenerator is seeded by the given seed.  Otherwise, it is seeded by a\nrandom seed."
   }
   attr {
     name: "seed2"
@@ -32901,15 +30264,14 @@ op {
     default_value {
       i: 0
     }
-    description: "A second seed to avoid seed collision."
   }
   attr {
     name: "dtype"
     type: "type"
-    description: "The type of the output."
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -32925,55 +30287,45 @@ op {
       }
     }
   }
-  summary: "Outputs random values from a truncated normal distribution."
-  description: "The generated values follow a normal distribution with mean 0 and standard\ndeviation 1, except that values whose magnitude is more than 2 standard\ndeviations from the mean are dropped and re-picked."
   is_stateful: true
 }
 op {
   name: "UniformCandidateSampler"
   input_arg {
     name: "true_classes"
-    description: "A batch_size * num_true matrix, in which each row contains the\nIDs of the num_true target_classes in the corresponding original label."
     type: DT_INT64
   }
   output_arg {
     name: "sampled_candidates"
-    description: "A vector of length num_sampled, in which each element is\nthe ID of a sampled candidate."
     type: DT_INT64
   }
   output_arg {
     name: "true_expected_count"
-    description: "A batch_size * num_true matrix, representing\nthe number of times each candidate is expected to occur in a batch\nof sampled candidates. If unique=true, then this is a probability."
     type: DT_FLOAT
   }
   output_arg {
     name: "sampled_expected_count"
-    description: "A vector of length num_sampled, for each sampled\ncandidate representing the number of times the candidate is expected\nto occur in a batch of sampled candidates.  If unique=true, then this is a\nprobability."
     type: DT_FLOAT
   }
   attr {
     name: "num_true"
     type: "int"
-    description: "Number of true labels per context."
     has_minimum: true
     minimum: 1
   }
   attr {
     name: "num_sampled"
     type: "int"
-    description: "Number of candidates to randomly sample."
     has_minimum: true
     minimum: 1
   }
   attr {
     name: "unique"
     type: "bool"
-    description: "If unique is true, we sample with rejection, so that all sampled\ncandidates in a batch are unique. This requires some approximation to\nestimate the post-rejection sampling probabilities."
   }
   attr {
     name: "range_max"
     type: "int"
-    description: "The sampler will sample integers from the interval [0, range_max)."
     has_minimum: true
     minimum: 1
   }
@@ -32983,7 +30335,6 @@ op {
     default_value {
       i: 0
     }
-    description: "If either seed or seed2 are set to be non-zero, the random number\ngenerator is seeded by the given seed.  Otherwise, it is seeded by a\nrandom seed."
   }
   attr {
     name: "seed2"
@@ -32991,27 +30342,21 @@ op {
     default_value {
       i: 0
     }
-    description: "An second seed to avoid seed collision."
   }
-  summary: "Generates labels for candidate sampling with a uniform distribution."
-  description: "See explanations of candidate sampling and the data formats at\ngo/candidate-sampling.\n\nFor each batch, this op picks a single set of sampled candidate labels.\n\nThe advantages of sampling candidates per-batch are simplicity and the\npossibility of efficient dense matrix multiplication. The disadvantage is that\nthe sampled candidates must be chosen independently of the context and of the\ntrue labels."
   is_stateful: true
 }
 op {
   name: "Unique"
   input_arg {
     name: "x"
-    description: "1-D."
     type_attr: "T"
   }
   output_arg {
     name: "y"
-    description: "1-D."
     type_attr: "T"
   }
   output_arg {
     name: "idx"
-    description: "1-D."
     type_attr: "out_idx"
   }
   attr {
@@ -33031,29 +30376,95 @@ op {
       }
     }
   }
-  summary: "Finds unique elements in a 1-D tensor."
-  description: "This operation returns a tensor `y` containing all of the unique elements of `x`\nsorted in the same order that they occur in `x`. This operation also returns a\ntensor `idx` the same size as `x` that contains the index of each value of `x`\nin the unique output `y`. In other words:\n\n`y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`\n\nFor example:\n\n```\n# tensor \'x\' is [1, 1, 2, 4, 4, 4, 7, 8, 8]\ny, idx = unique(x)\ny ==> [1, 2, 4, 7, 8]\nidx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]\n```"
+}
+op {
+  name: "UniqueDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
+op {
+  name: "UniqueV2"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "axis"
+    type_attr: "Taxis"
+  }
+  output_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "idx"
+    type_attr: "out_idx"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "Taxis"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "out_idx"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
 }
 op {
   name: "UniqueWithCounts"
   input_arg {
     name: "x"
-    description: "1-D."
     type_attr: "T"
   }
   output_arg {
     name: "y"
-    description: "1-D."
     type_attr: "T"
   }
   output_arg {
     name: "idx"
-    description: "1-D."
     type_attr: "out_idx"
   }
   output_arg {
     name: "count"
-    description: "1-D."
     type_attr: "out_idx"
   }
   attr {
@@ -33073,19 +30484,15 @@ op {
       }
     }
   }
-  summary: "Finds unique elements in a 1-D tensor."
-  description: "This operation returns a tensor `y` containing all of the unique elements of `x`\nsorted in the same order that they occur in `x`. This operation also returns a\ntensor `idx` the same size as `x` that contains the index of each value of `x`\nin the unique output `y`. Finally, it returns a third tensor `count` that\ncontains the count of each element of `y` in `x`. In other words:\n\n`y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`\n\nFor example:\n\n```\n# tensor \'x\' is [1, 1, 2, 4, 4, 4, 7, 8, 8]\ny, idx, count = unique_with_counts(x)\ny ==> [1, 2, 4, 7, 8]\nidx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]\ncount ==> [2, 1, 3, 1, 2]\n```"
 }
 op {
   name: "Unpack"
   input_arg {
     name: "value"
-    description: "1-D or higher, with `axis` dimension size equal to `num`."
     type_attr: "T"
   }
   output_arg {
     name: "output"
-    description: "The list of tensors unpacked from `value`."
     type_attr: "T"
     number_attr: "num"
   }
@@ -33104,10 +30511,7 @@ op {
     default_value {
       i: 0
     }
-    description: "Dimension along which to unpack.  Negative values wrap around, so the\nvalid range is `[-R, R)`."
   }
-  summary: "Unpacks a given dimension of a rank-`R` tensor into `num` rank-`(R-1)` tensors."
-  description: "Unpacks `num` tensors from `value` by chipping it along the `axis` dimension.\nFor example, given a tensor of shape `(A, B, C, D)`;\n\nIf `axis == 0` then the i\'th tensor in `output` is the slice `value[i, :, :, :]`\n  and each tensor in `output` will have shape `(B, C, D)`. (Note that the\n  dimension unpacked along is gone, unlike `split`).\n\nIf `axis == 1` then the i\'th tensor in `output` is the slice `value[:, i, :, :]`\n  and each tensor in `output` will have shape `(A, C, D)`.\nEtc.\n\nThis is the opposite of `pack`."
 }
 op {
   name: "UnsortedSegmentMax"
@@ -33117,16 +30521,14 @@ op {
   }
   input_arg {
     name: "segment_ids"
-    description: "A 1-D tensor whose rank is equal to the rank of `data`\'s\nfirst dimension."
     type_attr: "Tindices"
   }
   input_arg {
     name: "num_segments"
-    type: DT_INT32
+    type_attr: "Tnumsegments"
   }
   output_arg {
     name: "output"
-    description: "Has same shape as data, except for dimension 0 which\nhas size `num_segments`."
     type_attr: "T"
   }
   attr {
@@ -33137,10 +30539,11 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
-        type: DT_INT64
         type: DT_UINT8
         type: DT_INT16
         type: DT_INT8
+        type: DT_INT64
+        type: DT_BFLOAT16
         type: DT_UINT16
         type: DT_HALF
         type: DT_UINT32
@@ -33158,8 +30561,19 @@ op {
       }
     }
   }
-  summary: "Computes the Max along segments of a tensor."
-  description: "Read @{$math_ops#segmentation$the section on segmentation} for an explanation of\nsegments.\n\nThis operator is similar to the [unsorted segment sum operator](../../../api_docs/python/math_ops.md#UnsortedSegmentSum).\nInstead of computing the sum over segments, it computes the maximum\nsuch that:\n\n\\\\(output_i = \\max_j data_j\\\\) where max is over `j` such\nthat `segment_ids[j] == i`.\n\nIf the maximum is empty for a given segment ID `i`, it outputs the smallest possible value for specific numeric type,\n `output[i] = numeric_limits<T>::min()`.\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\"https://www.tensorflow.org/images/UnsortedSegmentMax.png\" alt>\n</div>"
+  attr {
+    name: "Tnumsegments"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
 }
 op {
   name: "UnsortedSegmentSum"
@@ -33169,16 +30583,14 @@ op {
   }
   input_arg {
     name: "segment_ids"
-    description: "A tensor whose shape is a prefix of `data.shape`."
     type_attr: "Tindices"
   }
   input_arg {
     name: "num_segments"
-    type: DT_INT32
+    type_attr: "Tnumsegments"
   }
   output_arg {
     name: "output"
-    description: "Has same shape as data, except for the first `segment_ids.rank`\ndimensions, which are replaced with a single dimension which has size\n`num_segments`."
     type_attr: "T"
   }
   attr {
@@ -33188,17 +30600,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -33215,8 +30628,19 @@ op {
       }
     }
   }
-  summary: "Computes the sum along segments of a tensor."
-  description: "Read @{$math_ops#segmentation$the section on segmentation} for an explanation of\nsegments.\n\nComputes a tensor such that\n`(output[i] = sum_{j...} data[j...]` where the sum is over tuples `j...` such\nthat `segment_ids[j...] == i`.  Unlike `SegmentSum`, `segment_ids`\nneed not be sorted and need not cover all values in the full\nrange of valid values.\n\nIf the sum is empty for a given segment ID `i`, `output[i] = 0`.\n\n`num_segments` should equal the number of distinct segment IDs.\n\n<div style=\"width:70%; margin:auto; margin-bottom:10px; margin-top:20px;\">\n<img style=\"width:100%\" src=\"https://www.tensorflow.org/images/UnsortedSegmentSum.png\" alt>\n</div>"
+  attr {
+    name: "Tnumsegments"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
 }
 op {
   name: "Unstage"
@@ -33260,8 +30684,6 @@ op {
       s: ""
     }
   }
-  summary: "Op is similar to a lightweight Dequeue."
-  description: "The basic functionality is similar to dequeue with many fewer\ncapabilities and options.  This Op is optimized for performance."
   is_stateful: true
 }
 op {
@@ -33276,7 +30698,6 @@ op {
     default_value {
       s: ""
     }
-    description: "the container this variable is placed in."
   }
   attr {
     name: "shared_name"
@@ -33284,34 +30705,27 @@ op {
     default_value {
       s: ""
     }
-    description: "the name by which this variable is referred to."
   }
   attr {
     name: "dtype"
     type: "type"
-    description: "the type of this variable. Must agree with the dtypes\nof all ops using this variable."
   }
   attr {
     name: "shape"
     type: "shape"
-    description: "The (possibly partially specified) shape of this variable."
   }
-  summary: "Creates a handle to a Variable resource."
   is_stateful: true
 }
 op {
   name: "VarIsInitializedOp"
   input_arg {
     name: "resource"
-    description: "the input resource handle."
     type: DT_RESOURCE
   }
   output_arg {
     name: "is_initialized"
-    description: "a scalar boolean which is true if the variable has been\ninitialized."
     type: DT_BOOL
   }
-  summary: "Checks whether a resource handle-based variable has been initialized."
   is_stateful: true
 }
 op {
@@ -33343,7 +30757,6 @@ op {
       s: ""
     }
   }
-  summary: "Use VariableV2 instead."
   is_stateful: true
 }
 op {
@@ -33369,27 +30782,22 @@ op {
       }
     }
   }
-  summary: "Returns the shape of the variable pointed to by `resource`."
-  description: "This operation returns a 1-D integer tensor representing the shape of `input`.\n\nFor example:\n\n```\n# \'t\' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]\nshape(t) ==> [2, 2, 3]\n```"
   is_stateful: true
 }
 op {
   name: "VariableV2"
   output_arg {
     name: "ref"
-    description: "A reference to the variable tensor."
     type_attr: "dtype"
     is_ref: true
   }
   attr {
     name: "shape"
     type: "shape"
-    description: "The shape of the variable tensor."
   }
   attr {
     name: "dtype"
     type: "type"
-    description: "The type of elements in the variable tensor."
   }
   attr {
     name: "container"
@@ -33397,7 +30805,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this variable is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -33405,10 +30812,7 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this variable is named in the given bucket\nwith this shared_name. Otherwise, the node name is used instead."
   }
-  summary: "Holds state in the form of a tensor that persists across steps."
-  description: "Outputs a ref to the tensor state so it may be read or modified.\nTODO(zhifengc/mrry): Adds a pointer to a more detail document\nabout sharing states in tensorflow."
   is_stateful: true
 }
 op {
@@ -33431,17 +30835,18 @@ op {
       list {
         type: DT_FLOAT
         type: DT_DOUBLE
-        type: DT_INT64
         type: DT_INT32
         type: DT_UINT8
-        type: DT_UINT16
         type: DT_INT16
         type: DT_INT8
         type: DT_COMPLEX64
-        type: DT_COMPLEX128
+        type: DT_INT64
         type: DT_QINT8
         type: DT_QUINT8
         type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
         type: DT_HALF
         type: DT_UINT32
         type: DT_UINT64
@@ -33449,14 +30854,11 @@ op {
       }
     }
   }
-  summary: "Returns locations of nonzero / true values in a tensor."
-  description: "This operation returns the coordinates of true elements in `input`. The\ncoordinates are returned in a 2-D tensor where the first dimension (rows)\nrepresents the number of true elements, and the second dimension (columns)\nrepresents the coordinates of the true elements. Keep in mind, the shape of\nthe output tensor can vary depending on how many true values there are in\n`input`. Indices are output in row-major order.\n\nFor example:\n\n```\n# \'input\' tensor is [[True, False]\n#                    [True, False]]\n# \'input\' has two true values, so output has two coordinates.\n# \'input\' has rank of 2, so coordinates have two indices.\nwhere(input) ==> [[0, 0],\n                  [1, 0]]\n\n# `input` tensor is [[[True, False]\n#                     [True, False]]\n#                    [[False, True]\n#                     [False, True]]\n#                    [[False, False]\n#                     [False, True]]]\n# \'input\' has 5 true values, so output has 5 coordinates.\n# \'input\' has rank of 3, so coordinates have three indices.\nwhere(input) ==> [[0, 0, 0],\n                  [0, 1, 0],\n                  [1, 0, 1],\n                  [1, 1, 1],\n                  [2, 1, 1]]\n\n# `input` tensor is [[[1.5,  0.0]\n#                     [-0.5, 0.0]]\n#                    [[0.0,  0.25]\n#                     [0.0,  0.75]]\n#                    [[0.0,  0.0]\n#                     [0.0,  0.01]]]\n# \'input\' has 5 nonzero values, so output has 5 coordinates.\n# \'input\' has rank of 3, so coordinates have three indices.\nwhere(input) ==> [[0, 0, 0],\n                  [0, 1, 0],\n                  [1, 0, 1],\n                  [1, 1, 1],\n                  [2, 1, 1]]\n\n# `input` tensor is [[[1.5 + 0.0j, 0.0  + 0.0j]\n#                     [0.0 + 0.5j, 0.0  + 0.0j]]\n#                    [[0.0 + 0.0j, 0.25 + 1.5j]\n#                     [0.0 + 0.0j, 0.75 + 0.0j]]\n#                    [[0.0 + 0.0j, 0.0  + 0.0j]\n#                     [0.0 + 0.0j, 0.01 + 0.0j]]]\n# \'input\' has 5 nonzero magnitude values, so output has 5 coordinates.\n# \'input\' has rank of 3, so coordinates have three indices.\nwhere(input) ==> [[0, 0, 0],\n                  [0, 1, 0],\n                  [1, 0, 1],\n                  [1, 1, 1],\n                  [2, 1, 1]]\n```"
 }
 op {
   name: "WholeFileReader"
   output_arg {
     name: "reader_handle"
-    description: "The handle to reference the Reader."
     type: DT_STRING
     is_ref: true
   }
@@ -33466,7 +30868,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this reader is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -33474,17 +30875,13 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this reader is named in the given bucket\nwith this shared_name. Otherwise, the node name is used instead."
   }
-  summary: "A Reader that outputs the entire contents of a file as a value."
-  description: "To use, enqueue filenames in a Queue.  The output of ReaderRead will\nbe a filename (key) and the contents of that file (value)."
   is_stateful: true
 }
 op {
   name: "WholeFileReaderV2"
   output_arg {
     name: "reader_handle"
-    description: "The handle to reference the Reader."
     type: DT_RESOURCE
   }
   attr {
@@ -33493,7 +30890,6 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this reader is placed in the given container.\nOtherwise, a default container is used."
   }
   attr {
     name: "shared_name"
@@ -33501,44 +30897,34 @@ op {
     default_value {
       s: ""
     }
-    description: "If non-empty, this reader is named in the given bucket\nwith this shared_name. Otherwise, the node name is used instead."
   }
-  summary: "A Reader that outputs the entire contents of a file as a value."
-  description: "To use, enqueue filenames in a Queue.  The output of ReaderRead will\nbe a filename (key) and the contents of that file (value)."
   is_stateful: true
 }
 op {
   name: "WriteFile"
   input_arg {
     name: "filename"
-    description: "scalar. The name of the file to which we write the contents."
     type: DT_STRING
   }
   input_arg {
     name: "contents"
-    description: "scalar. The content to be written to the output file."
     type: DT_STRING
   }
-  summary: "Writes contents to the file at input filename. Creates file and recursively"
-  description: "creates directory if not existing."
 }
 op {
   name: "ZerosLike"
   input_arg {
     name: "x"
-    description: "a tensor of type T."
     type_attr: "T"
   }
   output_arg {
     name: "y"
-    description: "a tensor of the same shape and type as x but filled with zeros."
     type_attr: "T"
   }
   attr {
     name: "T"
     type: "type"
   }
-  summary: "Returns a tensor of zeros with the same shape and type as x."
 }
 op {
   name: "Zeta"
@@ -33564,8 +30950,6 @@ op {
       }
     }
   }
-  summary: "Compute the Hurwitz zeta function \\\\(\\zeta(x, q)\\\\)."
-  description: "The Hurwitz zeta function is defined as:\n\n\n\\\\(\\zeta(x, q) = \\sum_{n=0}^{\\infty} (q + n)^{-x}\\\\)"
 }
 op {
   name: "ZipDataset"
@@ -33596,5 +30980,4 @@ op {
     has_minimum: true
     minimum: 1
   }
-  summary: "Creates a dataset that zips together `input_datasets`."
 }
diff --git a/tensorflow/core/ops/parsing_ops.cc b/tensorflow/core/ops/parsing_ops.cc
index 40ec792ef82ff5e0bdf6d0c4e35bf18f5560c5a7..ddd2aa92748f244c2d132f00780a0d6424f1e595 100644
--- a/tensorflow/core/ops/parsing_ops.cc
+++ b/tensorflow/core/ops/parsing_ops.cc
@@ -35,18 +35,13 @@ REGISTER_OP("DecodeRaw")
           c->input(0), c->Vector(InferenceContext::kUnknownDim), &out));
       c->set_output(0, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Reinterpret the bytes of a string as a vector of numbers.
+    });
 
-bytes: All the elements must have the same length.
-little_endian: Whether the input `bytes` are in little-endian order.
-  Ignored for `out_type` values that are stored in a single byte like
-  `uint8`.
-output: A Tensor with one more dimension than the input `bytes`.  The
-  added dimension will have size equal to the length of the elements
-  of `bytes` divided by the number of bytes to represent `out_type`.
-)doc");
+REGISTER_OP("DecodeCompressed")
+    .Input("bytes: string")
+    .Output("output: string")
+    .Attr("compression_type: string = ''")
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("ParseExample")
     .Input("serialized: string")
@@ -64,7 +59,7 @@ REGISTER_OP("ParseExample")
     .Attr("Tdense: list({float,int64,string}) >= 0")
     .Attr("dense_shapes: list(shape) >= 0")
     .SetShapeFn([](InferenceContext* c) {
-      ParseSingleExampleAttrs attrs;
+      ParseExampleAttrs attrs;
       TF_RETURN_IF_ERROR(attrs.Init(c));
 
       ShapeHandle input;
@@ -93,50 +88,49 @@ REGISTER_OP("ParseExample")
         c->set_output(output_idx++, dense);
       }
       return Status::OK();
-    })
-    .Doc(R"doc(
-Transforms a vector of brain.Example protos (as strings) into typed tensors.
+    });
 
-serialized: A vector containing a batch of binary serialized Example protos.
-names: A vector containing the names of the serialized protos.
-  May contain, for example, table key (descriptive) names for the
-  corresponding serialized protos.  These are purely useful for debugging
-  purposes, and the presence of values here has no effect on the output.
-  May also be an empty vector if no names are available.
-  If non-empty, this vector must be the same length as "serialized".
-dense_keys: A list of Ndense string Tensors (scalars).
-  The keys expected in the Examples' features associated with dense values.
-dense_defaults: A list of Ndense Tensors (some may be empty).
-  dense_defaults[j] provides default values
-  when the example's feature_map lacks dense_key[j].  If an empty Tensor is
-  provided for dense_defaults[j], then the Feature dense_keys[j] is required.
-  The input type is inferred from dense_defaults[j], even when it's empty.
-  If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined,
-  then the shape of dense_defaults[j] must match that of dense_shapes[j].
-  If dense_shapes[j] has an undefined major dimension (variable strides dense
-  feature), dense_defaults[j] must contain a single element:
-  the padding element.
-dense_shapes: A list of Ndense shapes; the shapes of data in each Feature
-  given in dense_keys.
-  The number of elements in the Feature corresponding to dense_key[j]
-  must always equal dense_shapes[j].NumEntries().
-  If dense_shapes[j] == (D0, D1, ..., DN) then the shape of output
-  Tensor dense_values[j] will be (|serialized|, D0, D1, ..., DN):
-  The dense outputs are just the inputs row-stacked by batch.
-  This works for dense_shapes[j] = (-1, D1, ..., DN).  In this case
-  the shape of the output Tensor dense_values[j] will be
-  (|serialized|, M, D1, .., DN), where M is the maximum number of blocks
-  of elements of length D1 * .... * DN, across all minibatch entries
-  in the input.  Any minibatch entry with less than M blocks of elements of
-  length D1 * ... * DN will be padded with the corresponding default_value
-  scalar element along the second dimension.
-sparse_keys: A list of Nsparse string Tensors (scalars).
-  The keys expected in the Examples' features associated with sparse values.
-sparse_types: A list of Nsparse types; the data types of data in each Feature
-  given in sparse_keys.
-  Currently the ParseExample supports DT_FLOAT (FloatList),
-  DT_INT64 (Int64List), and DT_STRING (BytesList).
-)doc");
+REGISTER_OP("ParseSingleExample")
+    .Input("serialized: string")
+    .Input("dense_defaults: Tdense")
+    .Output("sparse_indices: num_sparse * int64")
+    .Output("sparse_values: sparse_types")
+    .Output("sparse_shapes: num_sparse * int64")
+    .Output("dense_values: Tdense")
+    .Attr("num_sparse: int >= 0")
+    .Attr("sparse_keys: list(string) >= 0")
+    .Attr("dense_keys: list(string) >= 0")
+    .Attr("sparse_types: list({float,int64,string}) >= 0")
+    .Attr("Tdense: list({float,int64,string}) >= 0")
+    .Attr("dense_shapes: list(shape) >= 0")
+    .SetShapeFn([](InferenceContext* c) {
+      ParseSingleExampleAttrs attrs;
+      TF_RETURN_IF_ERROR(attrs.Init(c));
+
+      ShapeHandle input;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &input));
+
+      // Output sparse_indices, sparse_values, and sparse_shapes.
+      int output_idx = 0;
+      for (int i = 0; i < attrs.sparse_keys.size(); ++i) {
+        c->set_output(output_idx++, c->Matrix(c->UnknownDim(), 1));
+      }
+      for (int i = 0; i < attrs.sparse_keys.size(); ++i) {
+        c->set_output(output_idx++, c->Vector(c->UnknownDim()));
+      }
+      for (int i = 0; i < attrs.sparse_keys.size(); ++i) {
+        c->set_output(output_idx++, c->Vector(1));
+      }
+
+      // Output dense_shapes.
+      for (int i = 0; i < attrs.dense_keys.size(); ++i) {
+        ShapeHandle dense;
+        TF_RETURN_IF_ERROR(
+            c->MakeShapeFromPartialTensorShape(attrs.dense_shapes[i], &dense));
+        c->set_output(output_idx++, dense);
+      }
+      return Status::OK();
+    });
 
 REGISTER_OP("ParseSingleSequenceExample")
     .Input("serialized: string")
@@ -224,106 +218,24 @@ REGISTER_OP("ParseSingleSequenceExample")
         c->set_output(output_idx++, s);
       }
       return Status::OK();
-    })
-    .Doc(R"doc(
-Transforms a scalar brain.SequenceExample proto (as strings) into typed tensors.
-
-serialized: A scalar containing a binary serialized SequenceExample proto.
-feature_list_dense_missing_assumed_empty: A vector listing the
-  FeatureList keys which may be missing from the SequenceExample.  If the
-  associated FeatureList is missing, it is treated as empty.  By default,
-  any FeatureList not listed in this vector must exist in the SequenceExample.
-context_dense_keys: A list of Ncontext_dense string Tensors (scalars).
-  The keys expected in the SequenceExamples' context features associated with
-  dense values.
-feature_list_dense_keys: A list of Nfeature_list_dense string Tensors (scalars).
-  The keys expected in the SequenceExamples' feature_lists associated
-  with lists of dense values.
-context_dense_defaults: A list of Ncontext_dense Tensors (some may be empty).
-  context_dense_defaults[j] provides default values
-  when the SequenceExample's context map lacks context_dense_key[j].
-  If an empty Tensor is provided for context_dense_defaults[j],
-  then the Feature context_dense_keys[j] is required.
-  The input type is inferred from context_dense_defaults[j], even when it's
-  empty.  If context_dense_defaults[j] is not empty, its shape must match
-  context_dense_shapes[j].
-debug_name: A scalar containing the name of the serialized proto.
-  May contain, for example, table key (descriptive) name for the
-  corresponding serialized proto.  This is purely useful for debugging
-  purposes, and the presence of values here has no effect on the output.
-  May also be an empty scalar if no name is available.
-context_dense_shapes: A list of Ncontext_dense shapes; the shapes of data in
-  each context Feature given in context_dense_keys.
-  The number of elements in the Feature corresponding to context_dense_key[j]
-  must always equal context_dense_shapes[j].NumEntries().
-  The shape of context_dense_values[j] will match context_dense_shapes[j].
-feature_list_dense_shapes: A list of Nfeature_list_dense shapes; the shapes of
-  data in each FeatureList given in feature_list_dense_keys.
-  The shape of each Feature in the FeatureList corresponding to
-  feature_list_dense_key[j] must always equal
-  feature_list_dense_shapes[j].NumEntries().
-context_sparse_keys: A list of Ncontext_sparse string Tensors (scalars).
-  The keys expected in the Examples' features associated with context_sparse
-  values.
-context_sparse_types: A list of Ncontext_sparse types; the data types of data in
-  each context Feature given in context_sparse_keys.
-  Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
-  DT_INT64 (Int64List), and DT_STRING (BytesList).
-feature_list_sparse_keys: A list of Nfeature_list_sparse string Tensors
-  (scalars).  The keys expected in the FeatureLists associated with sparse
-  values.
-feature_list_sparse_types: A list of Nfeature_list_sparse types; the data types
-  of data in each FeatureList given in feature_list_sparse_keys.
-  Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
-  DT_INT64 (Int64List), and DT_STRING (BytesList).
-)doc");
+    });
 
 REGISTER_OP("ParseTensor")
     .Input("serialized: string")
     .Output("output: out_type")
     .Attr("out_type: type")
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-Transforms a serialized tensorflow.TensorProto proto into a Tensor.
-
-serialized: A scalar string containing a serialized TensorProto proto.
-out_type: The type of the serialized tensor.  The provided type must match the
-  type of the serialized tensor and no implicit conversion will take place.
-output: A Tensor of type `out_type`.
-)doc");
+    .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("SerializeTensor")
     .Input("tensor: T")
     .Output("serialized: string")
     .Attr("T: type")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Transforms a Tensor into a serialized TensorProto proto.
-
-tensor: A Tensor of type `T`.
-T: The type of the input tensor.
-serialized: A serialized TensorProto proto of the input tensor.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("DecodeJSONExample")
     .Input("json_examples: string")
     .Output("binary_examples: string")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Convert JSON-encoded Example records to binary protocol buffer strings.
-
-This op translates a tensor containing Example records, encoded using
-the [standard JSON
-mapping](https://developers.google.com/protocol-buffers/docs/proto3#json),
-into a tensor containing the same records encoded as binary protocol
-buffers. The resulting tensor can then be fed to any of the other
-Example-parsing ops.
-
-json_examples: Each string is a JSON object serialized according to the JSON
-  mapping of the Example proto.
-binary_examples: Each string is a binary Example protocol buffer corresponding
-  to the respective element of `json_examples`.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("DecodeCSV")
     .Input("records: string")
@@ -347,39 +259,12 @@ REGISTER_OP("DecodeCSV")
       // Propagate shape of the records input.
       for (int i = 0; i < c->num_outputs(); ++i) c->set_output(i, c->input(0));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Convert CSV records to tensors. Each column maps to one tensor.
-
-RFC 4180 format is expected for the CSV records.
-(https://tools.ietf.org/html/rfc4180)
-Note that we allow leading and trailing spaces with int or float field.
-
-records: Each string is a record/row in the csv and all records should have
-  the same format.
-record_defaults: One tensor per column of the input record, with either a
-  scalar default value for that column or empty if the column is required.
-field_delim: char delimiter to separate fields in a record.
-use_quote_delim: If false, treats double quotation marks as regular
-  characters inside of the string fields (ignoring RFC 4180, Section 2,
-  Bullet 5).
-na_value: Additional string to recognize as NA/NaN.
-output: Each tensor will have the same shape as records.
-)doc");
+    });
 
 REGISTER_OP("StringToNumber")
     .Input("string_tensor: string")
     .Output("output: out_type")
     .Attr("out_type: {float, double, int32, int64} = DT_FLOAT")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Converts each string in the input Tensor to the specified numeric type.
-
-(Note that int32 overflow results in an error while float overflow
-results in a rounded value.)
-
-out_type: The numeric type to interpret each string in `string_tensor` as.
-output: A Tensor of the same shape as the input `string_tensor`.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/parsing_ops_test.cc b/tensorflow/core/ops/parsing_ops_test.cc
index c6e521e33e98017ee7cfd96c88ee82d3d338967f..9121d7ae924fc161ca07017d0057fbf876a9ed12 100644
--- a/tensorflow/core/ops/parsing_ops_test.cc
+++ b/tensorflow/core/ops/parsing_ops_test.cc
@@ -119,7 +119,7 @@ TEST(ParsingOpsTest, ParseExample_ShapeFn) {
            ("[?,2];[?,2];[?];[?];[2];[2];"         // sparse outputs
             "[d0_0,1];[d0_0,1,2];[d0_0,1,2,3]"));  // dense outputs
 
-  // Confirm an error from ParseSingleExampleAttrs.Init().
+  // Confirm an error from ParseExampleAttrs.Init().
   set_outputs(2, 3, true /* add_extra_shape */);
   INFER_ERROR("len(dense_keys) != len(dense_shapes)", op,
               "?;?;?;?;?;?;?;?;?;?");
diff --git a/tensorflow/core/ops/random_ops.cc b/tensorflow/core/ops/random_ops.cc
index 2429171fa93093362510601c5167d63a62caec54..f6c668f5c98efff07a49be15b1187f1858800110 100644
--- a/tensorflow/core/ops/random_ops.cc
+++ b/tensorflow/core/ops/random_ops.cc
@@ -29,24 +29,9 @@ REGISTER_OP("RandomUniform")
     .Output("output: dtype")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
-    .Attr("dtype: {half,float,double}")
+    .Attr("dtype: {half,bfloat16,float,double}")
     .Attr("T: {int32, int64}")
-    .SetShapeFn(shape_inference::RandomShape)
-    .Doc(R"doc(
-Outputs random values from a uniform distribution.
-
-The generated values follow a uniform distribution in the range `[0, 1)`. The
-lower bound 0 is included in the range, while the upper bound 1 is excluded.
-
-shape: The shape of the output tensor.
-dtype: The type of the output.
-seed: If either `seed` or `seed2` are set to be non-zero, the random number
-  generator is seeded by the given seed.  Otherwise, it is seeded by a
-  random seed.
-seed2: A second seed to avoid seed collision.
-
-output: A tensor of the specified shape filled with uniform random values.
-)doc");
+    .SetShapeFn(shape_inference::RandomShape);
 
 REGISTER_OP("RandomUniformInt")
     .Input("shape: T")
@@ -58,28 +43,7 @@ REGISTER_OP("RandomUniformInt")
     .Attr("seed2: int = 0")
     .Attr("Tout: {int32, int64}")
     .Attr("T: {int32, int64}")
-    .SetShapeFn(shape_inference::RandomShape)
-    .Doc(R"doc(
-Outputs random integers from a uniform distribution.
-
-The generated values are uniform integers in the range `[minval, maxval)`.
-The lower bound `minval` is included in the range, while the upper bound
-`maxval` is excluded.
-
-The random integers are slightly biased unless `maxval - minval` is an exact
-power of two.  The bias is small for values of `maxval - minval` significantly
-smaller than the range of the output (either `2^32` or `2^64`).
-
-shape: The shape of the output tensor.
-minval: 0-D.  Inclusive lower bound on the generated integers.
-maxval: 0-D.  Exclusive upper bound on the generated integers.
-seed: If either `seed` or `seed2` are set to be non-zero, the random number
-  generator is seeded by the given seed.  Otherwise, it is seeded by a
-  random seed.
-seed2: A second seed to avoid seed collision.
-
-output: A tensor of the specified shape filled with uniform random integers.
-)doc");
+    .SetShapeFn(shape_inference::RandomShape);
 
 REGISTER_OP("RandomStandardNormal")
     .Input("shape: T")
@@ -87,23 +51,9 @@ REGISTER_OP("RandomStandardNormal")
     .Output("output: dtype")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
-    .Attr("dtype: {half,float,double}")
+    .Attr("dtype: {half,bfloat16,float,double}")
     .Attr("T: {int32, int64}")
-    .SetShapeFn(shape_inference::RandomShape)
-    .Doc(R"doc(
-Outputs random values from a normal distribution.
-
-The generated values will have mean 0 and standard deviation 1.
-
-shape: The shape of the output tensor.
-dtype: The type of the output.
-seed: If either `seed` or `seed2` are set to be non-zero, the random number
-  generator is seeded by the given seed.  Otherwise, it is seeded by a
-  random seed.
-seed2: A second seed to avoid seed collision.
-
-output: A tensor of the specified shape filled with random normal values.
-)doc");
+    .SetShapeFn(shape_inference::RandomShape);
 
 REGISTER_OP("ParameterizedTruncatedNormal")
     .Input("shape: T")
@@ -115,29 +65,9 @@ REGISTER_OP("ParameterizedTruncatedNormal")
     .Output("output: dtype")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
-    .Attr("dtype: {half,float,double}")
+    .Attr("dtype: {half,bfloat16,float,double}")
     .Attr("T: {int32, int64}")
-    .SetShapeFn(shape_inference::RandomShape)
-    .Doc(R"doc(
-Outputs random values from a normal distribution. The parameters may each be a
-scalar which applies to the entire output, or a vector of length shape[0] which
-stores the parameters for each batch.
-
-shape: The shape of the output tensor. Batches are indexed by the 0th dimension.
-means: The mean parameter of each batch.
-stdevs: The standard deviation parameter of each batch. Must be greater than 0.
-minvals: The minimum cutoff. May be -infinity.
-maxvals: The maximum cutoff. May be +infinity, and must be more than the minval
-  for each batch.
-dtype: The type of the output.
-seed: If either `seed` or `seed2` are set to be non-zero, the random number
-  generator is seeded by the given seed.  Otherwise, it is seeded by a
-  random seed.
-seed2: A second seed to avoid seed collision.
-
-output: A matrix of shape num_batches x samples_per_batch, filled with random
-  truncated normal values using the parameters for each row.
-)doc");
+    .SetShapeFn(shape_inference::RandomShape);
 
 REGISTER_OP("TruncatedNormal")
     .Input("shape: T")
@@ -145,26 +75,9 @@ REGISTER_OP("TruncatedNormal")
     .Output("output: dtype")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
-    .Attr("dtype: {half,float,double}")
+    .Attr("dtype: {half,bfloat16,float,double}")
     .Attr("T: {int32, int64}")
-    .SetShapeFn(shape_inference::RandomShape)
-    .Doc(R"doc(
-Outputs random values from a truncated normal distribution.
-
-The generated values follow a normal distribution with mean 0 and standard
-deviation 1, except that values whose magnitude is more than 2 standard
-deviations from the mean are dropped and re-picked.
-
-shape: The shape of the output tensor.
-dtype: The type of the output.
-seed: If either `seed` or `seed2` are set to be non-zero, the random number
-  generator is seeded by the given seed.  Otherwise, it is seeded by a
-  random seed.
-seed2: A second seed to avoid seed collision.
-
-output: A tensor of the specified shape filled with random truncated normal
-  values.
-)doc");
+    .SetShapeFn(shape_inference::RandomShape);
 
 REGISTER_OP("RandomShuffle")
     .Input("value: T")
@@ -173,38 +86,17 @@ REGISTER_OP("RandomShuffle")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
     .Attr("T: type")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Randomly shuffles a tensor along its first dimension.
-
-  The tensor is shuffled along dimension 0, such that each `value[j]` is mapped
-  to one and only one `output[i]`. For example, a mapping that might occur for a
-  3x2 tensor is:
-
-```
-[[1, 2],       [[5, 6],
- [3, 4],  ==>   [1, 2],
- [5, 6]]        [3, 4]]
-```
-
-value: The tensor to be shuffled.
-seed: If either `seed` or `seed2` are set to be non-zero, the random number
-  generator is seeded by the given seed.  Otherwise, it is seeded by a
-  random seed.
-seed2: A second seed to avoid seed collision.
-
-output: A tensor of same shape and type as `value`, shuffled along its first
-  dimension.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("Multinomial")
     .SetIsStateful()
     .Input("logits: T")
     .Input("num_samples: int32")
-    .Output("output: int64")
+    .Output("output: output_dtype")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
     .Attr("T: realnumbertype")
+    .Attr("output_dtype: {int32, int64} = DT_INT64")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle logits_shape;
       ShapeHandle unused;
@@ -214,19 +106,7 @@ REGISTER_OP("Multinomial")
       TF_RETURN_IF_ERROR(c->MakeDimForScalarInput(1, &num_samples));
       c->set_output(0, c->Matrix(c->Dim(logits_shape, 0), num_samples));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Draws samples from a multinomial distribution.
-
-logits: 2-D Tensor with shape `[batch_size, num_classes]`.  Each slice `[i, :]`
-  represents the unnormalized log probabilities for all classes.
-num_samples: 0-D.  Number of independent samples to draw for each row slice.
-seed: If either seed or seed2 is set to be non-zero, the internal random number
-  generator is seeded by the given seed.  Otherwise, a random seed is used.
-seed2: A second seed to avoid seed collision.
-output: 2-D Tensor with shape `[batch_size, num_samples]`.  Each slice `[i, :]`
-  contains the drawn class labels with range `[0, num_classes)`.
-)doc");
+    });
 
 REGISTER_OP("RandomGamma")
     .SetIsStateful()
@@ -243,27 +123,7 @@ REGISTER_OP("RandomGamma")
       TF_RETURN_IF_ERROR(c->Concatenate(out, c->input(1), &out));
       c->set_output(0, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Outputs random values from the Gamma distribution(s) described by alpha.
-
-This op uses the algorithm by Marsaglia et al. to acquire samples via
-transformation-rejection from pairs of uniform and normal random variables.
-See http://dl.acm.org/citation.cfm?id=358414
-
-shape: 1-D integer tensor. Shape of independent samples to draw from each
-  distribution described by the shape parameters given in alpha.
-alpha: A tensor in which each scalar is a "shape" parameter describing the
-  associated gamma distribution.
-seed: If either `seed` or `seed2` are set to be non-zero, the random number
-  generator is seeded by the given seed.  Otherwise, it is seeded by a
-  random seed.
-seed2: A second seed to avoid seed collision.
-
-output: A tensor with shape `shape + shape(alpha)`. Each slice
-  `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for
-  `alpha[i0, i1, ...iN]`. The dtype of the output matches the dtype of alpha.
-)doc");
+    });
 
 REGISTER_OP("RandomPoisson")
     .SetIsStateful()
@@ -281,10 +141,7 @@ REGISTER_OP("RandomPoisson")
       c->set_output(0, out);
       return Status::OK();
     })
-    .Deprecated(25, "Replaced by RandomPoissonV2")
-    .Doc(R"doc(
-Use RandomPoissonV2 instead.
-)doc");
+    .Deprecated(25, "Replaced by RandomPoissonV2");
 
 REGISTER_OP("RandomPoissonV2")
     .SetIsStateful()
@@ -302,32 +159,6 @@ REGISTER_OP("RandomPoissonV2")
       TF_RETURN_IF_ERROR(c->Concatenate(out, c->input(1), &out));
       c->set_output(0, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Outputs random values from the Poisson distribution(s) described by rate.
-
-This op uses two algorithms, depending on rate. If rate >= 10, then
-the algorithm by Hormann is used to acquire samples via
-transformation-rejection.
-See http://www.sciencedirect.com/science/article/pii/0167668793909974.
-
-Otherwise, Knuth's algorithm is used to acquire samples via multiplying uniform
-random variables.
-See Donald E. Knuth (1969). Seminumerical Algorithms. The Art of Computer
-Programming, Volume 2. Addison Wesley
-
-shape: 1-D integer tensor. Shape of independent samples to draw from each
-  distribution described by the shape parameters given in rate.
-rate: A tensor in which each scalar is a "rate" parameter describing the
-  associated poisson distribution.
-seed: If either `seed` or `seed2` are set to be non-zero, the random number
-  generator is seeded by the given seed.  Otherwise, it is seeded by a
-  random seed.
-seed2: A second seed to avoid seed collision.
-
-output: A tensor with shape `shape + shape(rate)`. Each slice
-  `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for
-  `rate[i0, i1, ...iN]`.
-)doc");
+    });
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/remote_fused_graph_ops.cc b/tensorflow/core/ops/remote_fused_graph_ops.cc
index 85370e648c4d43e9595ac16402eb99aa851382d1..d90466673349fb2e75ba2cc81b181520d2bd52b2 100644
--- a/tensorflow/core/ops/remote_fused_graph_ops.cc
+++ b/tensorflow/core/ops/remote_fused_graph_ops.cc
@@ -36,23 +36,6 @@ REGISTER_OP("RemoteFusedGraphExecute")
     .Attr("Tinputs: list(type) >= 0")
     .Attr("Toutputs: list(type) >= 0")
     .Attr("serialized_remote_fused_graph_execute_info: string")
-    .SetShapeFn(RemoteFusedGraphExecuteShapeFn)
-    .Doc(R"doc(
-Execute a sub graph on a remote processor.
-
-The graph specifications(such as graph itself, input tensors and output names)
-are stored as a serialized protocol buffer of RemoteFusedGraphExecuteInfo
-as serialized_remote_fused_graph_execute_info.
-The specifications will be passed to a dedicated registered
-remote fused graph executor.  The executor will send the graph specifications
-to a remote processor and execute that graph.  The execution results
-will be passed to consumer nodes as outputs of this node.
-
-inputs: Arbitrary number of tensors with arbitrary data types
-outputs: Arbitrary number of tensors with arbitrary data types
-serialized_remote_fused_graph_execute_info: Serialized protocol buffer
-of RemoteFusedGraphExecuteInfo which contains graph specifications.
-
-)doc");
+    .SetShapeFn(RemoteFusedGraphExecuteShapeFn);
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/resource_variable_ops.cc b/tensorflow/core/ops/resource_variable_ops.cc
index cdfbec85cf1194d02c81cb4a3d66563dc85dfa57..f6cfbf873a024e3a035842468fc5ccca2d341ce7 100644
--- a/tensorflow/core/ops/resource_variable_ops.cc
+++ b/tensorflow/core/ops/resource_variable_ops.cc
@@ -76,51 +76,19 @@ REGISTER_OP("VarHandleOp")
                                             std::vector<ShapeAndType>{{s, t}});
 
       return Status::OK();
-    })
-    .Doc(R"(
-Creates a handle to a Variable resource.
-
-container: the container this variable is placed in.
-shared_name: the name by which this variable is referred to.
-dtype: the type of this variable. Must agree with the dtypes
-  of all ops using this variable.
-shape: The (possibly partially specified) shape of this variable.
-)");
+    });
 
 REGISTER_OP("ReadVariableOp")
     .Input("resource: resource")
     .Output("value: dtype")
     .Attr("dtype: type")
-    .SetShapeFn(ReadVariableShapeFn)
-    .Doc(R"(
-Reads the value of a variable.
-
-The tensor returned by this operation is immutable.
-
-The value returned by this operation is guaranteed to be influenced by all the
-writes on which this operation depends directly or indirectly, and to not be
-influenced by any of the writes which depend directly or indirectly on this
-operation.
-
-resource: handle to the resource in which to store the variable.
-dtype: the dtype of the value.
-)");
+    .SetShapeFn(ReadVariableShapeFn);
 
 REGISTER_OP("DestroyResourceOp")
     .Input("resource: resource")
     .Attr("ignore_lookup_error: bool = true")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::NoOutputs)
-    .Doc(R"(
-Deletes the resource specified by the handle.
-
-All subsequent operations using the resource will result in a NotFound
-error status.
-
-resource: handle to the resource to delete.
-ignore_lookup_error: whether to ignore the error when the resource
-  doesn't exist.
-)");
+    .SetShapeFn(shape_inference::NoOutputs);
 
 Status CreateAssignShapeFn(InferenceContext* c) {
   ShapeAndType handle_shape_and_type;
@@ -137,74 +105,34 @@ REGISTER_OP("AssignVariableOp")
     .Input("resource: resource")
     .Input("value: dtype")
     .Attr("dtype: type")
-    .SetShapeFn(CreateAssignShapeFn)
-    .Doc(R"(
-Assigns a new value to a variable.
-
-Any ReadVariableOp with a control dependency on this op is guaranteed to return
-this value or a subsequent newer value of the variable.
-
-resource: handle to the resource in which to store the variable.
-value: the value to set the new tensor to use.
-dtype: the dtype of the value.
-)");
+    .SetShapeFn(CreateAssignShapeFn);
 
 REGISTER_OP("AssignAddVariableOp")
     .Input("resource: resource")
     .Input("value: dtype")
     .Attr("dtype: type")
-    .SetShapeFn(CreateAssignShapeFn)
-    .Doc(R"(
-Adds a value to the current value of a variable.
-
-Any ReadVariableOp which depends directly or indirectly on this assign is
-guaranteed to see the incremented value or a subsequent newer one.
-
-Outputs the incremented value, which can be used to totally order the
-increments to this variable.
-
-resource: handle to the resource in which to store the variable.
-value: the value by which the variable will be incremented.
-dtype: the dtype of the value.
-)");
+    .SetShapeFn(CreateAssignShapeFn);
 
 REGISTER_OP("AssignSubVariableOp")
     .Input("resource: resource")
     .Input("value: dtype")
     .Attr("dtype: type")
-    .SetShapeFn(CreateAssignShapeFn)
-    .Doc(R"(
-Subtracts a value from the current value of a variable.
-
-Any ReadVariableOp which depends directly or indirectly on this assign is
-guaranteed to see the incremented value or a subsequent newer one.
-
-Outputs the incremented value, which can be used to totally order the
-increments to this variable.
-
-resource: handle to the resource in which to store the variable.
-value: the value by which the variable will be incremented.
-dtype: the dtype of the value.
-)");
+    .SetShapeFn(CreateAssignShapeFn);
 
 REGISTER_OP("VarIsInitializedOp")
     .Input("resource: resource")
     .Output("is_initialized: bool")
-    .SetShapeFn(tensorflow::shape_inference::ScalarShape)
-    .Doc(R"doc(
-Checks whether a resource handle-based variable has been initialized.
-
-resource: the input resource handle.
-is_initialized: a scalar boolean which is true if the variable has been
-initialized.
-)doc");
+    .SetShapeFn(tensorflow::shape_inference::ScalarShape);
 
 Status VariableShapeShapeFn(InferenceContext* c) {
   auto* handle_data = c->input_handle_shapes_and_types(0);
   if (handle_data == nullptr || handle_data->empty()) {
     return errors::InvalidArgument("Handle doesn't have shape information.");
   }
-  c->set_output(0, (*handle_data)[0].shape);
+  ShapeHandle var_shape = (*handle_data)[0].shape;
+  int64 rank = c->RankKnown(var_shape) ? c->Rank(var_shape)
+                                       : InferenceContext::kUnknownDim;
+  c->set_output(0, c->Vector(rank));
   return Status::OK();
 }
 
@@ -212,20 +140,7 @@ REGISTER_OP("VariableShape")
     .Input("input: resource")
     .Output("output: out_type")
     .Attr("out_type: {int32, int64} = DT_INT32")
-    .SetShapeFn(VariableShapeShapeFn)
-    .Doc(R"doc(
-Returns the shape of the variable pointed to by `resource`.
-
-This operation returns a 1-D integer tensor representing the shape of `input`.
-
-For example:
-
-```
-# 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]
-shape(t) ==> [2, 2, 3]
-```
-
-)doc");
+    .SetShapeFn(VariableShapeShapeFn);
 
 REGISTER_OP("ResourceGather")
     .Input("resource: resource")
@@ -250,25 +165,7 @@ REGISTER_OP("ResourceGather")
       TF_RETURN_IF_ERROR(c->Concatenate(indices_shape, params_subshape, &out));
       c->set_output(0, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Gather slices from the variable pointed to by `resource` according to `indices`.
-
-`indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
-Produces an output tensor with shape `indices.shape + params.shape[1:]` where:
-
-```python
-    # Scalar indices
-    output[:, ..., :] = params[indices, :, ... :]
-
-    # Vector indices
-    output[i, :, ..., :] = params[indices[i], :, ... :]
-
-    # Higher rank indices
-    output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :]
-```
-
-)doc");
+    });
 
 REGISTER_OP("ResourceScatterAdd")
     .Input("resource: resource")
@@ -290,34 +187,7 @@ REGISTER_OP("ResourceScatterAdd")
       TF_RETURN_IF_ERROR(c->Concatenate(indices_shape, var_subshape, &concat));
       TF_RETURN_IF_ERROR(c->Merge(c->input(2), concat, &unused_updates_shape));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Adds sparse updates to the variable referenced by `resource`.
-
-This operation computes
-
-    # Scalar indices
-    ref[indices, ...] += updates[...]
-
-    # Vector indices (for each i)
-    ref[indices[i], ...] += updates[i, ...]
-
-    # High rank indices (for each i, ..., j)
-    ref[indices[i, ..., j], ...] += updates[i, ..., j, ...]
-
-Duplicate entries are handled correctly: if multiple `indices` reference
-the same location, their contributions add.
-
-Requires `updates.shape = indices.shape + ref.shape[1:]`.
-
-<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
-</div>
-
-resource: Should be from a `Variable` node.
-indices: A tensor of indices into the first dimension of `ref`.
-updates: A tensor of updated values to add to `ref`.
-)doc");
+    });
 
 REGISTER_OP("ResourceScatterUpdate")
     .Input("resource: resource")
@@ -339,24 +209,36 @@ REGISTER_OP("ResourceScatterUpdate")
       TF_RETURN_IF_ERROR(c->Concatenate(indices_shape, var_subshape, &concat));
       TF_RETURN_IF_ERROR(c->Merge(c->input(2), concat, &unused_updates_shape));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Assigns sparse updates to the variable referenced by `resource`.
-
-This operation computes
-
-    # Scalar indices
-    ref[indices, ...] = updates[...]
+    });
 
-    # Vector indices (for each i)
-    ref[indices[i], ...] = updates[i, ...]
-
-    # High rank indices (for each i, ..., j)
-    ref[indices[i, ..., j], ...] = updates[i, ..., j, ...]
-
-resource: Should be from a `Variable` node.
-indices: A tensor of indices into the first dimension of `ref`.
-updates: A tensor of updated values to add to `ref`.
-)doc");
+REGISTER_OP("CriticalSectionOp")
+    .Attr("container: string = ''")
+    .Attr("shared_name: string = ''")
+    .Output("resource: resource")
+    .SetIsStateful()
+    .SetShapeFn([](InferenceContext* c) {
+      c->set_output(0, c->Scalar());
+      return Status::OK();
+    });
+
+REGISTER_OP("ExecuteInCriticalSection")
+    .Input("critical_section: resource")
+    .Input("arguments: Targuments")
+    .Output("outputs: output_types")
+    .Attr("f: func")
+    .Attr("Targuments: list(type) >= 0")
+    .Attr("output_types: list(type) >= 0")
+    .Attr("output_shapes: list(shape) >= 0")
+    .SetShapeFn([](InferenceContext* c) {
+      std::vector<PartialTensorShape> output_shapes;
+      TF_RETURN_IF_ERROR(c->GetAttr("output_shapes", &output_shapes));
+      for (int i = 0; i < output_shapes.size(); ++i) {
+        ShapeHandle s;
+        TF_RETURN_IF_ERROR(
+            c->MakeShapeFromPartialTensorShape(output_shapes[i], &s));
+        c->set_output(i, s);
+      }
+      return Status::OK();
+    });
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/script_ops.cc b/tensorflow/core/ops/script_ops.cc
index 8197327b562c5296e4bcbe43ce9ca81696dedf8b..d8716f0389a3bbb9fce88860fd136df04b702475 100644
--- a/tensorflow/core/ops/script_ops.cc
+++ b/tensorflow/core/ops/script_ops.cc
@@ -25,20 +25,7 @@ REGISTER_OP("PyFunc")
     .Attr("Tin: list(type) >= 0")
     .Attr("Tout: list(type) >=0")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-Invokes a python function to compute func(input)->output.
-
-This operation is considered stateful. For a stateless version, see
-PyFuncStateless.
-
-token: A token representing a registered python function in this address space.
-input: List of Tensors that will provide input to the Op.
-output: The outputs from the Op.
-Tin: Data types of the inputs to the op.
-Tout: Data types of the outputs from the op.
-      The length of the list specifies the number of outputs.
-)doc");
+    .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("PyFuncStateless")
     .Input("input: Tin")
@@ -46,9 +33,15 @@ REGISTER_OP("PyFuncStateless")
     .Attr("token: string")
     .Attr("Tin: list(type) >= 0")
     .Attr("Tout: list(type) >= 0")
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-A stateless version of PyFunc.
-)doc");
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("EagerPyFunc")
+    .Input("input: Tin")
+    .Output("output: Tout")
+    .Attr("token: string")
+    .Attr("Tin: list(type) >= 0")
+    .Attr("Tout: list(type) >=0")
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::UnknownShape);
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/sdca_ops.cc b/tensorflow/core/ops/sdca_ops.cc
index dea75a1af83456f730a6c98cc40fd26d02ca2fda..e67d95fa8cb8466365bf12a46a123de174103d0f 100644
--- a/tensorflow/core/ops/sdca_ops.cc
+++ b/tensorflow/core/ops/sdca_ops.cc
@@ -63,78 +63,14 @@ REGISTER_OP("SdcaOptimizer")
     .Output("out_example_state_data: float")
     .Output("out_delta_sparse_weights: num_sparse_features * float")
     .Output("out_delta_dense_weights: num_dense_features * float")
-    .SetShapeFn(ApplySdcaOptimizerShapeFn)
-    .Doc(R"doc(
-Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for
-linear models with L1 + L2 regularization. As global optimization objective is
-strongly-convex, the optimizer optimizes the dual objective at each step. The
-optimizer applies each update one example at a time. Examples are sampled
-uniformly, and the optimizer is learning rate free and enjoys linear convergence
-rate.
-
-[Proximal Stochastic Dual Coordinate Ascent](http://arxiv.org/pdf/1211.2717v1.pdf).<br>
-Shai Shalev-Shwartz, Tong Zhang. 2012
-
-$$Loss Objective = \sum f_{i} (wx_{i}) + (l2 / 2) * |w|^2 + l1 * |w|$$
-
-[Adding vs. Averaging in Distributed Primal-Dual Optimization](http://arxiv.org/abs/1502.03508).<br>
-Chenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan,
-Peter Richtarik, Martin Takac. 2015
-
-[Stochastic Dual Coordinate Ascent with Adaptive Probabilities](https://arxiv.org/abs/1502.08053).<br>
-Dominik Csiba, Zheng Qu, Peter Richtarik. 2015
-
-loss_type: Type of the primal loss. Currently SdcaSolver supports logistic,
-  squared and hinge losses.
-adaptative: Whether to use Adapative SDCA for the inner loop.
-num_sparse_features: Number of sparse feature groups to train on.
-num_sparse_features_with_values: Number of sparse feature groups with values
-  associated with it, otherwise implicitly treats values as 1.0.
-num_dense_features: Number of dense feature groups to train on.
-l1: Symmetric l1 regularization strength.
-l2: Symmetric l2 regularization strength.
-num_loss_partitions: Number of partitions of the global loss function.
-num_inner_iterations: Number of iterations per mini-batch.
-sparse_example_indices: a list of vectors which contain example indices.
-sparse_feature_indices: a list of vectors which contain feature indices.
-sparse_feature_values: a list of vectors which contains feature value
-  associated with each feature group.
-dense_features: a list of matrices which contains the dense feature values.
-example_weights: a vector which contains the weight associated with each
-  example.
-example_labels: a vector which contains the label/target associated with each
-  example.
-sparse_indices: a list of vectors where each value is the indices which has
-  corresponding weights in sparse_weights. This field maybe omitted for the
-  dense approach.
-sparse_weights: a list of vectors where each value is the weight associated with
-  a sparse feature group.
-dense_weights: a list of vectors where the values are the weights associated
- with a dense feature group.
-example_state_data: a list of vectors containing the example state data.
-out_example_state_data: a list of vectors containing the updated example state
-  data.
-out_delta_sparse_weights: a list of vectors where each value is the delta
-  weights associated with a sparse feature group.
-out_delta_dense_weights: a list of vectors where the values are the delta
-  weights associated with a dense feature group.
-)doc");
+    .SetShapeFn(ApplySdcaOptimizerShapeFn);
 
 REGISTER_OP("SdcaShrinkL1")
     .Attr("num_features: int >= 0")
     .Attr("l1: float")
     .Attr("l2: float")
     .Input("weights: Ref(num_features * float)")
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-Applies L1 regularization shrink step on the parameters.
-
-num_features: Number of feature groups to apply shrinking step.
-l1: Symmetric l1 regularization strength.
-l2: Symmetric l2 regularization strength. Should be a positive float.
-weights: a list of vectors where each value is the weight associated with a
-  feature group.
-)doc");
+    .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("SdcaFprint")
     .Input("input: string")
@@ -146,13 +82,6 @@ REGISTER_OP("SdcaFprint")
       TF_RETURN_IF_ERROR(c->Concatenate(handle, c->Vector(2), &output_shape));
       c->set_output(0, output_shape);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Computes fingerprints of the input strings.
-
-input: vector of strings to compute fingerprints on.
-output: a (N,2) shaped matrix where N is the number of elements in the input
-  vector. Each row contains the low and high parts of the fingerprint.
-)doc");
+    });
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/set_ops.cc b/tensorflow/core/ops/set_ops.cc
index 85d1335dcf9b362a856f058758ebe7b130302357..5eb1c4d87d4443532b1ce2ecbe3baad304d98f4e 100644
--- a/tensorflow/core/ops/set_ops.cc
+++ b/tensorflow/core/ops/set_ops.cc
@@ -30,24 +30,7 @@ REGISTER_OP("SetSize")
     .Attr("validate_indices: bool = true")
     .Attr("T: {int8, int16, int32, int64, uint8, uint16, string}")
     .Output("size: int32")
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-Number of unique elements along last dimension of input `set`.
-
-Input `set` is a `SparseTensor` represented by `set_indices`, `set_values`,
-and `set_shape`. The last dimension contains values in a set, duplicates are
-allowed but ignored.
-
-If `validate_indices` is `True`, this op validates the order and range of `set`
-indices.
-
-set_indices: 2D `Tensor`, indices of a `SparseTensor`.
-set_values: 1D `Tensor`, values of a `SparseTensor`.
-set_shape: 1D `Tensor`, shape of a `SparseTensor`.
-size: For `set` ranked `n`, this is a `Tensor` with rank `n-1`, and the same 1st
-    `n-1` dimensions as `set`. Each value is the number of unique elements in
-    the corresponding `[0...n-1]` dimension of `set`.
-)doc");
+    .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("DenseToDenseSetOperation")
     .Input("set1: T")
@@ -103,28 +86,7 @@ REGISTER_OP("DenseToDenseSetOperation")
       c->set_output(1, c->Vector(c->UnknownDim()));
       c->set_output(2, c->Vector(output_rank));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Applies set operation along last dimension of 2 `Tensor` inputs.
-
-See SetOperationOp::SetOperationFromContext for values of `set_operation`.
-
-Output `result` is a `SparseTensor` represented by `result_indices`,
-`result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this
-has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`
-dimension contains the result of `set_operation` applied to the corresponding
-`[0...n-1]` dimension of `set`.
-
-set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`.
-    Dimension `n` contains values in a set, duplicates are allowed but ignored.
-set2: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set1`.
-    Dimension `n` contains values in a set, duplicates are allowed but ignored.
-result_indices: 2D indices of a `SparseTensor`.
-result_values: 1D values of a `SparseTensor`.
-result_shape: 1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is
-    the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`
-    is the max result set size across all `0...n-1` dimensions.
-)doc");
+    });
 
 REGISTER_OP("DenseToSparseSetOperation")
     .Input("set1: T")
@@ -168,41 +130,7 @@ REGISTER_OP("DenseToSparseSetOperation")
       c->set_output(1, c->Vector(c->UnknownDim()));
       c->set_output(2, c->Vector(output_rank_dim));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Applies set operation along last dimension of `Tensor` and `SparseTensor`.
-
-See SetOperationOp::SetOperationFromContext for values of `set_operation`.
-
-Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`,
-and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same
-as `set1`. Dimension `n` contains values in a set, duplicates are allowed but
-ignored.
-
-If `validate_indices` is `True`, this op validates the order and range of `set2`
-indices.
-
-Output `result` is a `SparseTensor` represented by `result_indices`,
-`result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this
-has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`
-dimension contains the result of `set_operation` applied to the corresponding
-`[0...n-1]` dimension of `set`.
-
-set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`.
-    Dimension `n` contains values in a set, duplicates are allowed but ignored.
-set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
-    order.
-set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
-    order.
-set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must
-    be the same as the 1st `n-1` dimensions of `set1`, `result_shape[n]` is the
-    max set size across `n-1` dimensions.
-result_indices: 2D indices of a `SparseTensor`.
-result_values: 1D values of a `SparseTensor`.
-result_shape: 1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is
-    the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`
-    is the max result set size across all `0...n-1` dimensions.
-)doc");
+    });
 
 REGISTER_OP("SparseToSparseSetOperation")
     .Input("set1_indices: int64")
@@ -258,53 +186,6 @@ REGISTER_OP("SparseToSparseSetOperation")
       c->set_output(1, c->Vector(c->UnknownDim()));
       c->set_output(2, c->Vector(output_rank_dim));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Applies set operation along last dimension of 2 `SparseTensor` inputs.
-
-See SetOperationOp::SetOperationFromContext for values of `set_operation`.
-
-If `validate_indices` is `True`, `SparseToSparseSetOperation` validates the
-order and range of `set1` and `set2` indices.
-
-Input `set1` is a `SparseTensor` represented by `set1_indices`, `set1_values`,
-and `set1_shape`. For `set1` ranked `n`, 1st `n-1` dimensions must be the same
-as `set2`. Dimension `n` contains values in a set, duplicates are allowed but
-ignored.
-
-Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`,
-and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same
-as `set1`. Dimension `n` contains values in a set, duplicates are allowed but
-ignored.
-
-If `validate_indices` is `True`, this op validates the order and range of `set1`
-and `set2` indices.
-
-Output `result` is a `SparseTensor` represented by `result_indices`,
-`result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this
-has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`
-dimension contains the result of `set_operation` applied to the corresponding
-`[0...n-1]` dimension of `set`.
-
-set1_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
-    order.
-set1_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
-    order.
-set1_shape: 1D `Tensor`, shape of a `SparseTensor`. `set1_shape[0...n-1]` must
-    be the same as `set2_shape[0...n-1]`, `set1_shape[n]` is the
-    max set size across `0...n-1` dimensions.
-set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
-    order.
-set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
-    order.
-set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must
-    be the same as `set1_shape[0...n-1]`, `set2_shape[n]` is the
-    max set size across `0...n-1` dimensions.
-result_indices: 2D indices of a `SparseTensor`.
-result_values: 1D values of a `SparseTensor`.
-result_shape: 1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is
-    the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`
-    is the max result set size across all `0...n-1` dimensions.
-)doc");
+    });
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/sparse_ops.cc b/tensorflow/core/ops/sparse_ops.cc
index 8414519f0b34c7e8adcef25371257c6b2e7538c1..acc8c782efe7371a42adf8fe587168fd978732a6 100644
--- a/tensorflow/core/ops/sparse_ops.cc
+++ b/tensorflow/core/ops/sparse_ops.cc
@@ -57,26 +57,7 @@ REGISTER_OP("SparseAddGrad")
       c->set_output(0, c->Vector(c->Dim(a_indices, 0)));
       c->set_output(1, c->Vector(c->Dim(b_indices, 0)));
       return Status::OK();
-    })
-    .Doc(R"doc(
-The gradient operator for the SparseAdd op.
-
-The SparseAdd op calculates A + B, where A, B, and the sum are all represented
-as `SparseTensor` objects.  This op takes in the upstream gradient w.r.t.
-non-empty values of the sum, and outputs the gradients w.r.t. the non-empty
-values of A and B.
-
-backprop_val_grad: 1-D with shape `[nnz(sum)]`.  The gradient with respect to
-  the non-empty values of the sum.
-a_indices: 2-D.  The `indices` of the `SparseTensor` A, size `[nnz(A), ndims]`.
-b_indices: 2-D.  The `indices` of the `SparseTensor` B, size `[nnz(B), ndims]`.
-sum_indices: 2-D.  The `indices` of the sum `SparseTensor`, size
-  `[nnz(sum), ndims]`.
-a_val_grad: 1-D with shape `[nnz(A)]`. The gradient with respect to the
-  non-empty values of A.
-b_val_grad: 1-D with shape `[nnz(B)]`. The gradient with respect to the
-  non-empty values of B.
-)doc");
+    });
 
 REGISTER_OP("SparseAdd")
     .Input("a_indices: int64")
@@ -99,33 +80,7 @@ REGISTER_OP("SparseAdd")
       c->set_output(1, c->Vector(InferenceContext::kUnknownDim));
       c->set_output(2, a_shape);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Adds two `SparseTensor` objects to produce another `SparseTensor`.
-
-The input `SparseTensor` objects' indices are assumed ordered in standard
-lexicographic order.  If this is not the case, before this step run
-`SparseReorder` to restore index ordering.
-
-By default, if two values sum to zero at some index, the output `SparseTensor`
-would still include that particular location in its index, storing a zero in the
-corresponding value slot.  To override this, callers can specify `thresh`,
-indicating that if the sum has a magnitude strictly smaller than `thresh`, its
-corresponding value and index would then not be included.  In particular,
-`thresh == 0` (default) means everything is kept and actual thresholding happens
-only for a positive value.
-
-In the following shapes, `nnz` is the count after taking `thresh` into account.
-
-a_indices: 2-D.  The `indices` of the first `SparseTensor`, size `[nnz, ndims]` Matrix.
-a_values: 1-D.  The `values` of the first `SparseTensor`, size `[nnz]` Vector.
-a_shape: 1-D.  The `shape` of the first `SparseTensor`, size `[ndims]` Vector.
-b_indices: 2-D.  The `indices` of the second `SparseTensor`, size `[nnz, ndims]` Matrix.
-b_values: 1-D.  The `values` of the second `SparseTensor`, size `[nnz]` Vector.
-b_shape: 1-D.  The `shape` of the second `SparseTensor`, size `[ndims]` Vector.
-thresh: 0-D.  The magnitude threshold that determines if an output value/index
-pair takes space.
-)doc");
+    });
 
 REGISTER_OP("SparseTensorDenseMatMul")
     .Input("a_indices: Tindices")
@@ -161,36 +116,15 @@ REGISTER_OP("SparseTensorDenseMatMul")
       TF_RETURN_IF_ERROR(c->Merge(inner_left, inner_right, &unused_dim));
       c->set_output(0, c->Matrix(output_left, output_right));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Multiply SparseTensor (of rank 2) "A" by dense matrix "B".
-
-No validity checking is performed on the indices of A.  However, the following
-input format is recommended for optimal behavior:
-
-if adjoint_a == false:
-  A should be sorted in lexicographically increasing order.  Use SparseReorder
-  if you're not sure.
-if adjoint_a == true:
-  A should be sorted in order of increasing dimension 1 (i.e., "column major"
-  order instead of "row major" order).
-
-a_indices: 2-D.  The `indices` of the `SparseTensor`, size `[nnz, 2]` Matrix.
-a_values: 1-D.  The `values` of the `SparseTensor`, size `[nnz]` Vector.
-a_shape: 1-D.  The `shape` of the `SparseTensor`, size `[2]` Vector.
-b: 2-D.  A dense Matrix.
-adjoint_a: Use the adjoint of A in the matrix multiply.  If A is complex, this
-  is transpose(conj(A)).  Otherwise it's transpose(A).
-adjoint_b: Use the adjoint of B in the matrix multiply.  If B is complex, this
-  is transpose(conj(B)).  Otherwise it's transpose(B).
-)doc");
+    });
 
 REGISTER_OP("SerializeSparse")
     .Input("sparse_indices: int64")
     .Input("sparse_values: T")
     .Input("sparse_shape: int64")
     .Attr("T: type")
-    .Output("serialized_sparse: string")
+    .Output("serialized_sparse: out_type")
+    .Attr("out_type: {string, variant} = DT_STRING")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle unused;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &unused));
@@ -198,21 +132,15 @@ REGISTER_OP("SerializeSparse")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused));
       c->set_output(0, c->Vector(3));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Serialize a `SparseTensor` into a string 3-vector (1-D `Tensor`) object.
-
-sparse_indices: 2-D.  The `indices` of the `SparseTensor`.
-sparse_values: 1-D.  The `values` of the `SparseTensor`.
-sparse_shape: 1-D.  The `shape` of the `SparseTensor`.
-)doc");
+    });
 
 REGISTER_OP("SerializeManySparse")
     .Input("sparse_indices: int64")
     .Input("sparse_values: T")
     .Input("sparse_shape: int64")
     .Attr("T: type")
-    .Output("serialized_sparse: string")
+    .Output("serialized_sparse: out_type")
+    .Attr("out_type: {string, variant} = DT_STRING")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle unused;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &unused));
@@ -220,29 +148,15 @@ REGISTER_OP("SerializeManySparse")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused));
       c->set_output(0, c->Matrix(InferenceContext::kUnknownDim, 3));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` string `Tensor`.
-
-The `SparseTensor` must have rank `R` greater than 1, and the first dimension
-is treated as the minibatch dimension.  Elements of the `SparseTensor`
-must be sorted in increasing order of this first dimension.  The serialized
-`SparseTensor` objects going into each row of `serialized_sparse` will have
-rank `R-1`.
-
-The minibatch size `N` is extracted from `sparse_shape[0]`.
-
-sparse_indices: 2-D.  The `indices` of the minibatch `SparseTensor`.
-sparse_values: 1-D.  The `values` of the minibatch `SparseTensor`.
-sparse_shape: 1-D.  The `shape` of the minibatch `SparseTensor`.
-)doc");
+    });
 
 REGISTER_OP("DeserializeSparse")
-    .Input("serialized_sparse: string")
-    .Attr("dtype: type")
+    .Input("serialized_sparse: Tserialized")
     .Output("sparse_indices: int64")
     .Output("sparse_values: dtype")
     .Output("sparse_shape: int64")
+    .Attr("dtype: type")
+    .Attr("Tserialized: {string, variant} = DT_STRING")
     .SetShapeFn([](InferenceContext* c) {
       // serialized sparse is [?, ..., ?, 3] vector.
       DimensionHandle unused;
@@ -252,21 +166,14 @@ REGISTER_OP("DeserializeSparse")
       c->set_output(1, c->Vector(InferenceContext::kUnknownDim));
       c->set_output(2, c->Vector(InferenceContext::kUnknownDim));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Deserialize `SparseTensor` objects.
-
-serialized_sparse: The serialized `SparseTensor` objects. The last dimension
-  must have 3 columns.
-dtype: The `dtype` of the serialized `SparseTensor` objects.
-)doc");
+    });
 
 REGISTER_OP("DeserializeManySparse")
     .Input("serialized_sparse: string")
-    .Attr("dtype: type")
     .Output("sparse_indices: int64")
     .Output("sparse_values: dtype")
     .Output("sparse_shape: int64")
+    .Attr("dtype: type")
     .SetShapeFn([](InferenceContext* c) {
       // serialized sparse is [?,3] matrix.
       ShapeHandle serialized_sparse;
@@ -280,56 +187,7 @@ REGISTER_OP("DeserializeManySparse")
       c->set_output(1, c->Vector(InferenceContext::kUnknownDim));
       c->set_output(2, c->Vector(InferenceContext::kUnknownDim));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Deserialize and concatenate `SparseTensors` from a serialized minibatch.
-
-The input `serialized_sparse` must be a string matrix of shape `[N x 3]` where
-`N` is the minibatch size and the rows correspond to packed outputs of
-`SerializeSparse`.  The ranks of the original `SparseTensor` objects
-must all match.  When the final `SparseTensor` is created, it has rank one
-higher than the ranks of the incoming `SparseTensor` objects
-(they have been concatenated along a new row dimension).
-
-The output `SparseTensor` object's shape values for all dimensions but the
-first are the max across the input `SparseTensor` objects' shape values
-for the corresponding dimensions.  Its first shape value is `N`, the minibatch
-size.
-
-The input `SparseTensor` objects' indices are assumed ordered in
-standard lexicographic order.  If this is not the case, after this
-step run `SparseReorder` to restore index ordering.
-
-For example, if the serialized input is a `[2 x 3]` matrix representing two
-original `SparseTensor` objects:
-
-    index = [ 0]
-            [10]
-            [20]
-    values = [1, 2, 3]
-    shape = [50]
-
-and
-
-    index = [ 2]
-            [10]
-    values = [4, 5]
-    shape = [30]
-
-then the final deserialized `SparseTensor` will be:
-
-    index = [0  0]
-            [0 10]
-            [0 20]
-            [1  2]
-            [1 10]
-    values = [1, 2, 3, 4, 5]
-    shape = [2 50]
-
-serialized_sparse: 2-D, The `N` serialized `SparseTensor` objects.
-  Must have 3 columns.
-dtype: The `dtype` of the serialized `SparseTensor` objects.
-)doc");
+    });
 
 REGISTER_OP("SparseToDense")
     .Input("sparse_indices: Tindices")
@@ -345,41 +203,7 @@ REGISTER_OP("SparseToDense")
       TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &out));
       c->set_output(0, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Converts a sparse representation into a dense tensor.
-
-Builds an array `dense` with shape `output_shape` such that
-
-```
-# If sparse_indices is scalar
-dense[i] = (i == sparse_indices ? sparse_values : default_value)
-
-# If sparse_indices is a vector, then for each i
-dense[sparse_indices[i]] = sparse_values[i]
-
-# If sparse_indices is an n by d matrix, then for each i in [0, n)
-dense[sparse_indices[i][0], ..., sparse_indices[i][d-1]] = sparse_values[i]
-```
-
-All other values in `dense` are set to `default_value`.  If `sparse_values` is a
-scalar, all sparse indices are set to this single value.
-
-Indices should be sorted in lexicographic order, and indices must not
-contain any repeats. If `validate_indices` is true, these properties
-are checked during execution.
-
-sparse_indices: 0-D, 1-D, or 2-D.  `sparse_indices[i]` contains the complete
-  index where `sparse_values[i]` will be placed.
-output_shape: 1-D.  Shape of the dense output tensor.
-sparse_values: 1-D.  Values corresponding to each row of `sparse_indices`,
-  or a scalar value to be used for all sparse indices.
-default_value: Scalar value to set for indices not specified in
-  `sparse_indices`.
-validate_indices: If true, indices are checked to make sure they are sorted in
-  lexicographic order and that there are no repeats.
-dense: Dense output tensor of shape `output_shape`.
-)doc");
+    });
 
 REGISTER_OP("SparseConcat")
     .Input("indices: N * int64")
@@ -424,61 +248,7 @@ REGISTER_OP("SparseConcat")
       c->set_output(1, c->Vector(output_row_count));
       c->set_output(2, output_shape);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Concatenates a list of `SparseTensor` along the specified dimension.
-
-Concatenation is with respect to the dense versions of these sparse tensors.
-It is assumed that each input is a `SparseTensor` whose elements are ordered
-along increasing dimension number.
-
-All inputs' shapes must match, except for the concat dimension.  The
-`indices`, `values`, and `shapes` lists must have the same length.
-
-The output shape is identical to the inputs', except along the concat
-dimension, where it is the sum of the inputs' sizes along that dimension.
-
-The output elements will be resorted to preserve the sort order along
-increasing dimension number.
-
-This op runs in `O(M log M)` time, where `M` is the total number of non-empty
-values across all inputs. This is due to the need for an internal sort in
-order to concatenate efficiently across an arbitrary dimension.
-
-For example, if `concat_dim = 1` and the inputs are
-
-    sp_inputs[0]: shape = [2, 3]
-    [0, 2]: "a"
-    [1, 0]: "b"
-    [1, 1]: "c"
-
-    sp_inputs[1]: shape = [2, 4]
-    [0, 1]: "d"
-    [0, 2]: "e"
-
-then the output will be
-
-    shape = [2, 7]
-    [0, 2]: "a"
-    [0, 4]: "d"
-    [0, 5]: "e"
-    [1, 0]: "b"
-    [1, 1]: "c"
-
-Graphically this is equivalent to doing
-
-    [    a] concat [  d e  ] = [    a   d e  ]
-    [b c  ]        [       ]   [b c          ]
-
-indices: 2-D.  Indices of each input `SparseTensor`.
-values: 1-D.  Non-empty values of each `SparseTensor`.
-shapes: 1-D.  Shapes of each `SparseTensor`.
-output_indices: 2-D.  Indices of the concatenated `SparseTensor`.
-output_values: 1-D.  Non-empty values of the concatenated `SparseTensor`.
-output_shape: 1-D.  Shape of the concatenated `SparseTensor`.
-concat_dim: Dimension to concatenate along. Must be in range [-rank, rank),
-    where rank is the number of dimensions in each input `SparseTensor`.
-)doc");
+    });
 
 REGISTER_OP("SparseCross")
     .Input("indices: N * int64")
@@ -501,62 +271,7 @@ REGISTER_OP("SparseCross")
       c->set_output(1, c->Vector(c->UnknownDim()));
       c->set_output(2, c->Vector(2));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Generates sparse cross from a list of sparse and dense tensors.
-
-The op takes two lists, one of 2D `SparseTensor` and one of 2D `Tensor`, each
-representing features of one feature column. It outputs a 2D `SparseTensor` with
-the batchwise crosses of these features.
-
-For example, if the inputs are
-
-    inputs[0]: SparseTensor with shape = [2, 2]
-    [0, 0]: "a"
-    [1, 0]: "b"
-    [1, 1]: "c"
-
-    inputs[1]: SparseTensor with shape = [2, 1]
-    [0, 0]: "d"
-    [1, 0]: "e"
-
-    inputs[2]: Tensor [["f"], ["g"]]
-
-then the output will be
-
-    shape = [2, 2]
-    [0, 0]: "a_X_d_X_f"
-    [1, 0]: "b_X_e_X_g"
-    [1, 1]: "c_X_e_X_g"
-
-if hashed_output=true then the output will be
-
-    shape = [2, 2]
-    [0, 0]: FingerprintCat64(
-                Fingerprint64("f"), FingerprintCat64(
-                    Fingerprint64("d"), Fingerprint64("a")))
-    [1, 0]: FingerprintCat64(
-                Fingerprint64("g"), FingerprintCat64(
-                    Fingerprint64("e"), Fingerprint64("b")))
-    [1, 1]: FingerprintCat64(
-                Fingerprint64("g"), FingerprintCat64(
-                    Fingerprint64("e"), Fingerprint64("c")))
-
-indices: 2-D.  Indices of each input `SparseTensor`.
-values: 1-D.   values of each `SparseTensor`.
-shapes: 1-D.   Shapes of each `SparseTensor`.
-dense_inputs: 2-D.    Columns represented by dense `Tensor`.
-hashed_output: If true, returns the hash of the cross instead of the string.
-  This will allow us avoiding string manipulations.
-num_buckets: It is used if hashed_output is true.
-  output = hashed_value%num_buckets if num_buckets > 0 else hashed_value.
-hash_key: Specify the hash_key that will be used by the `FingerprintCat64`
-  function to combine the crosses fingerprints.
-output_indices: 2-D.  Indices of the concatenated `SparseTensor`.
-output_values: 1-D.  Non-empty values of the concatenated or hashed
-  `SparseTensor`.
-output_shape: 1-D.  Shape of the concatenated `SparseTensor`.
-)doc");
+    });
 
 REGISTER_OP("SparseSplit")
     .Input("split_dim: int64")
@@ -585,41 +300,7 @@ REGISTER_OP("SparseSplit")
       for (int i = 0; i < num_splits; ++i)
         c->set_output(out_idx++, output_shape);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Split a `SparseTensor` into `num_split` tensors along one dimension.
-
-If the `shape[split_dim]` is not an integer multiple of `num_split`. Slices
-`[0 : shape[split_dim] % num_split]` gets one extra dimension.
-For example, if `split_dim = 1` and `num_split = 2` and the input is
-
-    input_tensor = shape = [2, 7]
-    [    a   d e  ]
-    [b c          ]
-
-Graphically the output tensors are:
-
-    output_tensor[0] = shape = [2, 4]
-    [    a  ]
-    [b c    ]
-
-    output_tensor[1] = shape = [2, 3]
-    [ d e  ]
-    [      ]
-
-split_dim: 0-D.  The dimension along which to split.  Must be in the range
-  `[0, rank(shape))`.
-num_split: The number of ways to split.
-indices: 2-D tensor represents the indices of the sparse tensor.
-values: 1-D tensor represents the values of the sparse tensor.
-shape: 1-D. tensor represents the shape of the sparse tensor.
-output indices: A list of 1-D tensors represents the indices of the output
-sparse tensors.
-output_values: A list of 1-D tensors represents the values of the output sparse
-  tensors.
-output_shape: A list of 1-D tensors represents the shape of the output sparse
-  tensors.
-)doc");
+    });
 
 REGISTER_OP("SparseSlice")
     .Input("indices: int64")
@@ -642,38 +323,7 @@ REGISTER_OP("SparseSlice")
       c->set_output(1, output_values);
       c->set_output(2, output_shape);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Slice a `SparseTensor` based on the `start` and `size`.
-
-For example, if the input is
-
-    input_tensor = shape = [2, 7]
-    [    a   d e  ]
-    [b c          ]
-
-Graphically the output tensors are:
-
-    sparse_slice([0, 0], [2, 4]) = shape = [2, 4]
-    [    a  ]
-    [b c    ]
-
-    sparse_slice([0, 4], [2, 3]) = shape = [2, 3]
-    [ d e  ]
-    [      ]
-
-indices: 2-D tensor represents the indices of the sparse tensor.
-values: 1-D tensor represents the values of the sparse tensor.
-shape: 1-D. tensor represents the shape of the sparse tensor.
-start: 1-D. tensor represents the start of the slice.
-size: 1-D. tensor represents the size of the slice.
-output indices: A list of 1-D tensors represents the indices of the output
-sparse tensors.
-output_values: A list of 1-D tensors represents the values of the output sparse
-  tensors.
-output_shape: A list of 1-D tensors represents the shape of the output sparse
-  tensors.
-)doc");
+    });
 
 REGISTER_OP("SparseReorder")
     .Input("input_indices: int64")
@@ -694,27 +344,7 @@ REGISTER_OP("SparseReorder")
       c->set_output(0, indices);
       c->set_output(1, values);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Reorders a SparseTensor into the canonical, row-major ordering.
-
-Note that by convention, all sparse ops preserve the canonical ordering along
-increasing dimension number. The only time ordering can be violated is during
-manual manipulation of the indices and values vectors to add entries.
-
-Reordering does not affect the shape of the SparseTensor.
-
-If the tensor has rank `R` and `N` non-empty values, `input_indices` has
-shape `[N, R]`, input_values has length `N`, and input_shape has length `R`.
-
-input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-  SparseTensor, possibly not in canonical ordering.
-input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
-input_shape: 1-D.  Shape of the input SparseTensor.
-output_indices: 2-D.  `N x R` matrix with the same indices as input_indices, but
-  in canonical row-major ordering.
-output_values: 1-D.  `N` non-empty values corresponding to `output_indices`.
-)doc");
+    });
 
 REGISTER_OP("SparseReshape")
     .Input("input_indices: int64")
@@ -734,36 +364,7 @@ REGISTER_OP("SparseReshape")
       c->set_output(0, c->Matrix(c->Dim(indices, 0), c->Dim(new_shape, 0)));
       c->set_output(1, new_shape);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Reshapes a SparseTensor to represent values in a new dense shape.
-
-This operation has the same semantics as reshape on the represented dense
-tensor.  The `input_indices` are recomputed based on the requested `new_shape`.
-
-If one component of `new_shape` is the special value -1, the size of that
-dimension is computed so that the total dense size remains constant.  At
-most one component of `new_shape` can be -1.  The number of dense elements
-implied by `new_shape` must be the same as the number of dense elements
-originally implied by `input_shape`.
-
-Reshaping does not affect the order of values in the SparseTensor.
-
-If the input tensor has rank `R_in` and `N` non-empty values, and `new_shape`
-has length `R_out`, then `input_indices` has shape `[N, R_in]`,
-`input_shape` has length `R_in`, `output_indices` has shape `[N, R_out]`, and
-`output_shape` has length `R_out`.
-
-input_indices: 2-D.  `N x R_in` matrix with the indices of non-empty values in a
-  SparseTensor.
-input_shape: 1-D.  `R_in` vector with the input SparseTensor's dense shape.
-new_shape: 1-D.  `R_out` vector with the requested new dense shape.
-output_indices: 2-D.  `N x R_out` matrix with the updated indices of non-empty
-  values in the output SparseTensor.
-output_shape: 1-D.  `R_out` vector with the full dense shape of the output
-  SparseTensor.  This is the same as `new_shape` but with any -1 dimensions
-  filled in.
-)doc");
+    });
 
 REGISTER_OP("SparseTensorDenseAdd")
     .Input("a_indices: Tindices")
@@ -776,17 +377,7 @@ REGISTER_OP("SparseTensorDenseAdd")
     .SetShapeFn([](InferenceContext* c) {
       c->set_output(0, c->input(3));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Adds up a `SparseTensor` and a dense `Tensor`, producing a dense `Tensor`.
-
-This Op does not require `a_indices` be sorted in standard lexicographic order.
-
-a_indices: 2-D.  The `indices` of the `SparseTensor`, with shape `[nnz, ndims]`.
-a_values: 1-D.  The `values` of the `SparseTensor`, with shape `[nnz]`.
-a_shape: 1-D.  The `shape` of the `SparseTensor`, with shape `[ndims]`.
-b: `ndims`-D Tensor.  With shape `a_shape`.
-)doc");
+    });
 
 REGISTER_OP("SparseReduceMax")
     .Input("input_indices: int64")
@@ -796,31 +387,7 @@ REGISTER_OP("SparseReduceMax")
     .Attr("keep_dims: bool = False")
     .Output("output: T")
     .Attr("T: realnumbertype")
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-Computes the max of elements across dimensions of a SparseTensor.
-
-This Op takes a SparseTensor and is the sparse counterpart to
-`tf.reduce_max()`.  In particular, this Op also returns a dense `Tensor`
-instead of a sparse one.
-
-Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
-`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-`reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
-with length 1.
-
-If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
-with a single element is returned.  Additionally, the axes can be negative,
-which are interpreted according to the indexing rules in Python.
-
-input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-  SparseTensor, possibly not in canonical ordering.
-input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
-input_shape: 1-D.  Shape of the input SparseTensor.
-reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
-keep_dims: If true, retain reduced dimensions with length 1.
-output: `R-K`-D.  The reduced Tensor.
-)doc");
+    .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("SparseReduceMaxSparse")
     .Input("input_indices: int64")
@@ -832,30 +399,7 @@ REGISTER_OP("SparseReduceMaxSparse")
     .Output("output_values: T")
     .Output("output_shape: int64")
     .Attr("T: realnumbertype")
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-Computes the max of elements across dimensions of a SparseTensor.
-
-This Op takes a SparseTensor and is the sparse counterpart to
-`tf.reduce_max()`.  In contrast to SparseReduceMax, this Op returns a
-SparseTensor.
-
-Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
-`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-`reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
-with length 1.
-
-If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
-with a single element is returned.  Additionally, the axes can be negative,
-which are interpreted according to the indexing rules in Python.
-
-input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-  SparseTensor, possibly not in canonical ordering.
-input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
-input_shape: 1-D.  Shape of the input SparseTensor.
-reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
-keep_dims: If true, retain reduced dimensions with length 1.
-)doc");
+    .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("SparseReduceSum")
     .Input("input_indices: int64")
@@ -865,31 +409,7 @@ REGISTER_OP("SparseReduceSum")
     .Attr("keep_dims: bool = False")
     .Output("output: T")
     .Attr("T: numbertype")
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-Computes the sum of elements across dimensions of a SparseTensor.
-
-This Op takes a SparseTensor and is the sparse counterpart to
-`tf.reduce_sum()`.  In particular, this Op also returns a dense `Tensor`
-instead of a sparse one.
-
-Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
-`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-`reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
-with length 1.
-
-If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
-with a single element is returned.  Additionally, the axes can be negative,
-which are interpreted according to the indexing rules in Python.
-
-input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-  SparseTensor, possibly not in canonical ordering.
-input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
-input_shape: 1-D.  Shape of the input SparseTensor.
-reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
-keep_dims: If true, retain reduced dimensions with length 1.
-output: `R-K`-D.  The reduced Tensor.
-)doc");
+    .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("SparseReduceSumSparse")
     .Input("input_indices: int64")
@@ -901,30 +421,7 @@ REGISTER_OP("SparseReduceSumSparse")
     .Output("output_values: T")
     .Output("output_shape: int64")
     .Attr("T: numbertype")
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"doc(
-Computes the sum of elements across dimensions of a SparseTensor.
-
-This Op takes a SparseTensor and is the sparse counterpart to
-`tf.reduce_sum()`.  In contrast to SparseReduceSum, this Op returns a
-SparseTensor.
-
-Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
-`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-`reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
-with length 1.
-
-If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
-with a single element is returned.  Additionally, the axes can be negative,
-which are interpreted according to the indexing rules in Python.
-
-input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-  SparseTensor, possibly not in canonical ordering.
-input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
-input_shape: 1-D.  Shape of the input SparseTensor.
-reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
-keep_dims: If true, retain reduced dimensions with length 1.
-)doc");
+    .SetShapeFn(shape_inference::UnknownShape);
 
 #define SPARSE_DENSE_CWISE_SIGNATURE()                           \
   Input("sp_indices: int64")                                     \
@@ -940,63 +437,11 @@ keep_dims: If true, retain reduced dimensions with length 1.
         return Status::OK();                                     \
       })
 
-REGISTER_OP("SparseDenseCwiseMul")
-    .SPARSE_DENSE_CWISE_SIGNATURE()
-    .Doc(R"doc(
-Component-wise multiplies a SparseTensor by a dense Tensor.
-
-The output locations corresponding to the implicitly zero elements in the sparse
-tensor will be zero (i.e., will not take up storage space), regardless of the
-contents of the dense tensor (even if it's +/-INF and that INF*0 == NaN).
-
-*Limitation*: this Op only broadcasts the dense side to the sparse side, but not
-the other direction.
-
-sp_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-  SparseTensor, possibly not in canonical ordering.
-sp_values: 1-D.  `N` non-empty values corresponding to `sp_indices`.
-sp_shape: 1-D.  Shape of the input SparseTensor.
-dense: `R`-D.  The dense Tensor operand.
-output: 1-D.  The `N` values that are operated on.
-)doc");
-
-REGISTER_OP("SparseDenseCwiseDiv")
-    .SPARSE_DENSE_CWISE_SIGNATURE()
-    .Doc(R"doc(
-Component-wise divides a SparseTensor by a dense Tensor.
-
-*Limitation*: this Op only broadcasts the dense side to the sparse side, but not
-the other direction.
-
-sp_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-  SparseTensor, possibly not in canonical ordering.
-sp_values: 1-D.  `N` non-empty values corresponding to `sp_indices`.
-sp_shape: 1-D.  Shape of the input SparseTensor.
-dense: `R`-D.  The dense Tensor operand.
-output: 1-D.  The `N` values that are operated on.
-)doc");
-
-REGISTER_OP("SparseDenseCwiseAdd")
-    .SPARSE_DENSE_CWISE_SIGNATURE()
-    .Doc(R"doc(
-Adds up a SparseTensor and a dense Tensor, using these special rules:
-
-(1) Broadcasts the dense side to have the same shape as the sparse side, if
-    eligible;
-(2) Then, only the dense values pointed to by the indices of the SparseTensor
-    participate in the cwise addition.
+REGISTER_OP("SparseDenseCwiseMul").SPARSE_DENSE_CWISE_SIGNATURE();
 
-By these rules, the result is a logical SparseTensor with exactly the same
-indices and shape, but possibly with different non-zero values.  The output of
-this Op is the resultant non-zero values.
+REGISTER_OP("SparseDenseCwiseDiv").SPARSE_DENSE_CWISE_SIGNATURE();
 
-sp_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-  SparseTensor, possibly not in canonical ordering.
-sp_values: 1-D.  `N` non-empty values corresponding to `sp_indices`.
-sp_shape: 1-D.  Shape of the input SparseTensor.
-dense: `R`-D.  The dense Tensor operand.
-output: 1-D.  The `N` values that are operated on.
-)doc");
+REGISTER_OP("SparseDenseCwiseAdd").SPARSE_DENSE_CWISE_SIGNATURE();
 
 #undef SPARSE_DENSE_CWISE_SIGNATURE
 
@@ -1014,32 +459,7 @@ REGISTER_OP("SparseSoftmax")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused));
       c->set_output(0, values);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Applies softmax to a batched N-D `SparseTensor`.
-
-The inputs represent an N-D SparseTensor  with logical shape `[..., B, C]`
-(where `N >= 2`), and with indices sorted in the canonical lexicographic order.
-
-This op is equivalent to applying the normal `tf.nn.softmax()` to each innermost
-logical submatrix with shape `[B, C]`, but with the catch that *the implicitly
-zero elements do not participate*.  Specifically, the algorithm is equivalent
-to the following:
-
-  (1) Applies `tf.nn.softmax()` to a densified view of each innermost submatrix
-      with shape `[B, C]`, along the size-C dimension;
-  (2) Masks out the original implicitly-zero locations;
-  (3) Renormalizes the remaining elements.
-
-Hence, the `SparseTensor` result has exactly the same non-zero indices and
-shape.
-
-sp_indices: 2-D.  `NNZ x R` matrix with the indices of non-empty values in a
-  SparseTensor, in canonical ordering.
-sp_values: 1-D.  `NNZ` non-empty values corresponding to `sp_indices`.
-sp_shape: 1-D.  Shape of the input SparseTensor.
-output: 1-D.  The `NNZ` values for the result `SparseTensor`.
-)doc");
+    });
 
 REGISTER_OP("SparseSparseMaximum")
     .Input("a_indices: int64")
@@ -1051,23 +471,7 @@ REGISTER_OP("SparseSparseMaximum")
     .Output("output_indices: int64")
     .Output("output_values: T")
     .Attr("T: realnumbertype")
-    .SetShapeFn(SparseSparseMinOrMaxShapeFn)
-    .Doc(R"doc(
-Returns the element-wise max of two SparseTensors.
-
-Assumes the two SparseTensors have the same shape, i.e., no broadcasting.
-
-a_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-  SparseTensor, in the canonical lexicographic ordering.
-a_values: 1-D.  `N` non-empty values corresponding to `a_indices`.
-a_shape: 1-D.  Shape of the input SparseTensor.
-b_indices: counterpart to `a_indices` for the other operand.
-b_values: counterpart to `a_values` for the other operand; must be of the same dtype.
-b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal.
-
-output_indices: 2-D.  The indices of the output SparseTensor.
-output_values: 1-D.  The values of the output SparseTensor.
-)doc");
+    .SetShapeFn(SparseSparseMinOrMaxShapeFn);
 
 REGISTER_OP("SparseSparseMinimum")
     .Input("a_indices: int64")
@@ -1079,23 +483,7 @@ REGISTER_OP("SparseSparseMinimum")
     .Output("output_indices: int64")
     .Output("output_values: T")
     .Attr("T: numbertype")
-    .SetShapeFn(SparseSparseMinOrMaxShapeFn)
-    .Doc(R"doc(
-Returns the element-wise min of two SparseTensors.
-
-Assumes the two SparseTensors have the same shape, i.e., no broadcasting.
-
-a_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-  SparseTensor, in the canonical lexicographic ordering.
-a_values: 1-D.  `N` non-empty values corresponding to `a_indices`.
-a_shape: 1-D.  Shape of the input SparseTensor.
-b_indices: counterpart to `a_indices` for the other operand.
-b_values: counterpart to `a_values` for the other operand; must be of the same dtype.
-b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal.
-
-output_indices: 2-D.  The indices of the output SparseTensor.
-output_values: 1-D.  The values of the output SparseTensor.
-)doc");
+    .SetShapeFn(SparseSparseMinOrMaxShapeFn);
 
 REGISTER_OP("AddSparseToTensorsMap")
     .Input("sparse_indices: int64")
@@ -1113,34 +501,7 @@ REGISTER_OP("AddSparseToTensorsMap")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused));
       c->set_output(0, c->Scalar());
       return Status::OK();
-    })
-    .Doc(R"doc(
-Add a `SparseTensor` to a `SparseTensorsMap` return its handle.
-
-A `SparseTensor` is represented by three tensors: `sparse_indices`,
-`sparse_values`, and `sparse_shape`.
-
-This operator takes the given `SparseTensor` and adds it to a container
-object (a `SparseTensorsMap`).  A unique key within this container is generated
-in the form of an `int64`, and this is the value that is returned.
-
-The `SparseTensor` can then be read out as part of a minibatch by passing
-the key as a vector element to `TakeManySparseFromTensorsMap`.  To ensure
-the correct `SparseTensorsMap` is accessed, ensure that the same
-`container` and `shared_name` are passed to that Op.  If no `shared_name`
-is provided here, instead use the *name* of the Operation created by calling
-`AddSparseToTensorsMap` as the `shared_name` passed to
-`TakeManySparseFromTensorsMap`.  Ensure the Operations are colocated.
-
-sparse_indices: 2-D.  The `indices` of the `SparseTensor`.
-sparse_values: 1-D.  The `values` of the `SparseTensor`.
-sparse_shape: 1-D.  The `shape` of the `SparseTensor`.
-sparse_handle: 0-D.  The handle of the `SparseTensor` now stored in the
-  `SparseTensorsMap`.
-container: The container name for the `SparseTensorsMap` created by this op.
-shared_name: The shared name for the `SparseTensorsMap` created by this op.
-  If blank, the new Operation's unique name is used.
-)doc");
+    });
 
 REGISTER_OP("AddManySparseToTensorsMap")
     .Input("sparse_indices: int64")
@@ -1158,44 +519,7 @@ REGISTER_OP("AddManySparseToTensorsMap")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused));
       c->set_output(0, c->Vector(InferenceContext::kUnknownDim));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Add an `N`-minibatch `SparseTensor` to a `SparseTensorsMap`, return `N` handles.
-
-A `SparseTensor` of rank `R` is represented by three tensors: `sparse_indices`,
-`sparse_values`, and `sparse_shape`, where
-
-```sparse_indices.shape[1] == sparse_shape.shape[0] == R```
-
-An `N`-minibatch of `SparseTensor` objects is represented as a `SparseTensor`
-having a first `sparse_indices` column taking values between `[0, N)`, where
-the minibatch size `N == sparse_shape[0]`.
-
-The input `SparseTensor` must have rank `R` greater than 1, and the first
-dimension is treated as the minibatch dimension.  Elements of the `SparseTensor`
-must be sorted in increasing order of this first dimension.  The stored
-`SparseTensor` objects pointed to by each row of the output `sparse_handles`
-will have rank `R-1`.
-
-The `SparseTensor` values can then be read out as part of a minibatch by passing
-the given keys as vector elements to `TakeManySparseFromTensorsMap`.  To ensure
-the correct `SparseTensorsMap` is accessed, ensure that the same
-`container` and `shared_name` are passed to that Op.  If no `shared_name`
-is provided here, instead use the *name* of the Operation created by calling
-`AddManySparseToTensorsMap` as the `shared_name` passed to
-`TakeManySparseFromTensorsMap`.  Ensure the Operations are colocated.
-
-sparse_indices: 2-D.  The `indices` of the minibatch `SparseTensor`.
-  `sparse_indices[:, 0]` must be ordered values in `[0, N)`.
-sparse_values: 1-D.  The `values` of the minibatch `SparseTensor`.
-sparse_shape: 1-D.  The `shape` of the minibatch `SparseTensor`.
-  The minibatch size `N == sparse_shape[0]`.
-sparse_handles: 1-D.  The handles of the `SparseTensor` now stored in the
-  `SparseTensorsMap`.  Shape: `[N]`.
-container: The container name for the `SparseTensorsMap` created by this op.
-shared_name: The shared name for the `SparseTensorsMap` created by this op.
-  If blank, the new Operation's unique name is used.
-)doc");
+    });
 
 REGISTER_OP("TakeManySparseFromTensorsMap")
     .Input("sparse_handles: int64")
@@ -1216,71 +540,7 @@ REGISTER_OP("TakeManySparseFromTensorsMap")
       c->set_output(1, c->Vector(InferenceContext::kUnknownDim));
       c->set_output(2, c->Vector(InferenceContext::kUnknownDim));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Read `SparseTensors` from a `SparseTensorsMap` and concatenate them.
-
-The input `sparse_handles` must be an `int64` matrix of shape `[N, 1]` where
-`N` is the minibatch size and the rows correspond to the output handles of
-`AddSparseToTensorsMap` or `AddManySparseToTensorsMap`.  The ranks of the
-original `SparseTensor` objects that went into the given input ops must all
-match.  When the final `SparseTensor` is created, it has rank one
-higher than the ranks of the incoming `SparseTensor` objects
-(they have been concatenated along a new row dimension on the left).
-
-The output `SparseTensor` object's shape values for all dimensions but the
-first are the max across the input `SparseTensor` objects' shape values
-for the corresponding dimensions.  Its first shape value is `N`, the minibatch
-size.
-
-The input `SparseTensor` objects' indices are assumed ordered in
-standard lexicographic order.  If this is not the case, after this
-step run `SparseReorder` to restore index ordering.
-
-For example, if the handles represent an input, which is a `[2, 3]` matrix
-representing two original `SparseTensor` objects:
-
-```
-    index = [ 0]
-            [10]
-            [20]
-    values = [1, 2, 3]
-    shape = [50]
-```
-
-and
-
-```
-    index = [ 2]
-            [10]
-    values = [4, 5]
-    shape = [30]
-```
-
-then the final `SparseTensor` will be:
-
-```
-    index = [0  0]
-            [0 10]
-            [0 20]
-            [1  2]
-            [1 10]
-    values = [1, 2, 3, 4, 5]
-    shape = [2 50]
-```
-
-sparse_handles: 1-D, The `N` serialized `SparseTensor` objects.
-  Shape: `[N]`.
-sparse_indices: 2-D.  The `indices` of the minibatch `SparseTensor`.
-sparse_values: 1-D.  The `values` of the minibatch `SparseTensor`.
-sparse_shape: 1-D.  The `shape` of the minibatch `SparseTensor`.
-dtype: The `dtype` of the `SparseTensor` objects stored in the
-  `SparseTensorsMap`.
-container: The container name for the `SparseTensorsMap` read by this op.
-shared_name: The shared name for the `SparseTensorsMap` read by this op.
-  It should not be blank; rather the `shared_name` or unique Operation name
-  of the Op that created the original `SparseTensorsMap` should be used.
-)doc");
+    });
 
 REGISTER_OP("SparseFillEmptyRows")
     .Input("indices: int64")
@@ -1319,59 +579,7 @@ REGISTER_OP("SparseFillEmptyRows")
       c->set_output(2, empty_row_indicator);
       c->set_output(3, reverse_index_map);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Fills empty rows in the input 2-D `SparseTensor` with a default value.
-
-The input `SparseTensor` is represented via the tuple of inputs
-(`indices`, `values`, `dense_shape`).  The output `SparseTensor` has the
-same `dense_shape` but with indices `output_indices` and values
-`output_values`.
-
-This op inserts a single entry for every row that doesn't have any values.
-The index is created as `[row, 0, ..., 0]` and the inserted value
-is `default_value`.
-
-For example, suppose `sp_input` has shape `[5, 6]` and non-empty values:
-
-    [0, 1]: a
-    [0, 3]: b
-    [2, 0]: c
-    [3, 1]: d
-
-Rows 1 and 4 are empty, so the output will be of shape `[5, 6]` with values:
-
-    [0, 1]: a
-    [0, 3]: b
-    [1, 0]: default_value
-    [2, 0]: c
-    [3, 1]: d
-    [4, 0]: default_value
-
-The output `SparseTensor` will be in row-major order and will have the
-same shape as the input.
-
-This op also returns an indicator vector shaped `[dense_shape[0]]` such that
-
-    empty_row_indicator[i] = True iff row i was an empty row.
-
-And a reverse index map vector shaped `[indices.shape[0]]` that is used during
-backpropagation,
-
-    reverse_index_map[j] = out_j s.t. indices[j, :] == output_indices[out_j, :]
-
-
-indices: 2-D. the indices of the sparse tensor.
-values: 1-D. the values of the sparse tensor.
-dense_shape: 1-D. the shape of the sparse tensor.
-default_value: 0-D. default value to insert into location `[row, 0, ..., 0]`
-  for rows missing from the input sparse tensor.
-output indices: 2-D. the indices of the filled sparse tensor.
-output_values: 1-D. the values of the filled sparse tensor.
-empty_row_indicator: 1-D. whether the dense row was missing in the
-  input sparse tensor.
-reverse_index_map: 1-D. a map from the input indices to the output indices.
-)doc");
+    });
 
 REGISTER_OP("SparseFillEmptyRowsGrad")
     .Input("reverse_index_map: int64")
@@ -1387,23 +595,6 @@ REGISTER_OP("SparseFillEmptyRowsGrad")
       c->set_output(0, reverse_index_map);
       c->set_output(1, c->Scalar());
       return Status::OK();
-    })
-    .Doc(R"doc(
-The gradient of SparseFillEmptyRows.
-
-Takes vectors reverse_index_map, shaped `[N]`, and grad_values,
-shaped `[N_full]`, where `N_full >= N` and copies data into either
-`d_values` or `d_default_value`.  Here `d_values` is shaped `[N]` and
-`d_default_value` is a scalar.
-
-  d_values[j] = grad_values[reverse_index_map[j]]
-  d_default_value = sum_{k : 0 .. N_full - 1} (
-     grad_values[k] * 1{k not in reverse_index_map})
-
-reverse_index_map: 1-D.  The reverse index map from SparseFillEmptyRows.
-grad_values: 1-D.  The gradients from backprop.
-d_values: 1-D.  The backprop into values.
-d_default_value: 0-D.  The backprop into default_value.
-)doc");
+    });
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/spectral_ops.cc b/tensorflow/core/ops/spectral_ops.cc
index 592aaa25c3e76186a2dfcf720f62a7a97e10fbf1..508cea3495a9e811d4d12bf022b0ddfdcb33d718 100644
--- a/tensorflow/core/ops/spectral_ops.cc
+++ b/tensorflow/core/ops/spectral_ops.cc
@@ -29,126 +29,42 @@ REGISTER_OP("FFT")
     .Output("output: complex64")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 1);
-    })
-    .Doc(R"doc(
-Fast Fourier transform.
-
-Computes the 1-dimensional discrete Fourier transform over the inner-most
-dimension of `input`.
-
-input: A complex64 tensor.
-output: A complex64 tensor of the same shape as `input`. The inner-most
-  dimension of `input` is replaced with its 1D Fourier transform.
-
-@compatibility(numpy)
-Equivalent to np.fft.fft
-@end_compatibility
-)doc");
+    });
 
 REGISTER_OP("IFFT")
     .Input("input: complex64")
     .Output("output: complex64")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 1);
-    })
-    .Doc(R"doc(
-Inverse fast Fourier transform.
-
-Computes the inverse 1-dimensional discrete Fourier transform over the
-inner-most dimension of `input`.
-
-input: A complex64 tensor.
-output: A complex64 tensor of the same shape as `input`. The inner-most
-  dimension of `input` is replaced with its inverse 1D Fourier transform.
-
-@compatibility(numpy)
-Equivalent to np.fft.ifft
-@end_compatibility
-)doc");
+    });
 
 REGISTER_OP("FFT2D")
     .Input("input: complex64")
     .Output("output: complex64")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 2);
-    })
-    .Doc(R"doc(
-2D fast Fourier transform.
-
-Computes the 2-dimensional discrete Fourier transform over the inner-most
-2 dimensions of `input`.
-
-input: A complex64 tensor.
-output: A complex64 tensor of the same shape as `input`. The inner-most 2
-  dimensions of `input` are replaced with their 2D Fourier transform.
-
-@compatibility(numpy)
-Equivalent to np.fft.fft2
-@end_compatibility
-)doc");
+    });
 
 REGISTER_OP("IFFT2D")
     .Input("input: complex64")
     .Output("output: complex64")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 2);
-    })
-    .Doc(R"doc(
-Inverse 2D fast Fourier transform.
-
-Computes the inverse 2-dimensional discrete Fourier transform over the
-inner-most 2 dimensions of `input`.
-
-input: A complex64 tensor.
-output: A complex64 tensor of the same shape as `input`. The inner-most 2
-  dimensions of `input` are replaced with their inverse 2D Fourier transform.
-
-@compatibility(numpy)
-Equivalent to np.fft.ifft2
-@end_compatibility
-)doc");
+    });
 
 REGISTER_OP("FFT3D")
     .Input("input: complex64")
     .Output("output: complex64")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 3);
-    })
-    .Doc(R"doc(
-3D fast Fourier transform.
-
-Computes the 3-dimensional discrete Fourier transform over the inner-most 3
-dimensions of `input`.
-
-input: A complex64 tensor.
-output: A complex64 tensor of the same shape as `input`. The inner-most 3
-  dimensions of `input` are replaced with their 3D Fourier transform.
-
-@compatibility(numpy)
-Equivalent to np.fft.fftn with 3 dimensions.
-@end_compatibility
-)doc");
+    });
 
 REGISTER_OP("IFFT3D")
     .Input("input: complex64")
     .Output("output: complex64")
     .SetShapeFn([](InferenceContext* c) {
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 3);
-    })
-    .Doc(R"doc(
-Inverse 3D fast Fourier transform.
-
-Computes the inverse 3-dimensional discrete Fourier transform over the
-inner-most 3 dimensions of `input`.
-
-input: A complex64 tensor.
-output: A complex64 tensor of the same shape as `input`. The inner-most 3
-  dimensions of `input` are replaced with their inverse 3D Fourier transform.
-
-@compatibility(numpy)
-Equivalent to np.fft.ifftn with 3 dimensions.
-@end_compatibility
-)doc");
+    });
 
 Status RFFTShape(InferenceContext* c, const bool forward, const int rank) {
   ShapeHandle out;
@@ -190,196 +106,37 @@ REGISTER_OP("RFFT")
     .Input("input: float")
     .Input("fft_length: int32")
     .Output("output: complex64")
-    .SetShapeFn([](InferenceContext* c) { return RFFTShape(c, true, 1); })
-    .Doc(R"doc(
-Real-valued fast Fourier transform.
-
-Computes the 1-dimensional discrete Fourier transform of a real-valued signal
-over the inner-most dimension of `input`.
-
-Since the DFT of a real signal is Hermitian-symmetric, `RFFT` only returns the
-`fft_length / 2 + 1` unique components of the FFT: the zero-frequency term,
-followed by the `fft_length / 2` positive-frequency terms.
-
-Along the axis `RFFT` is computed on, if `fft_length` is smaller than the
-corresponding dimension of `input`, the dimension is cropped. If it is larger,
-the dimension is padded with zeros.
-
-input: A float32 tensor.
-fft_length: An int32 tensor of shape [1]. The FFT length.
-output: A complex64 tensor of the same rank as `input`. The inner-most
-  dimension of `input` is replaced with the `fft_length / 2 + 1` unique
-  frequency components of its 1D Fourier transform.
-
-@compatibility(numpy)
-Equivalent to np.fft.rfft
-@end_compatibility
-)doc");
+    .SetShapeFn([](InferenceContext* c) { return RFFTShape(c, true, 1); });
 
 REGISTER_OP("IRFFT")
     .Input("input: complex64")
     .Input("fft_length: int32")
     .Output("output: float")
-    .SetShapeFn([](InferenceContext* c) { return RFFTShape(c, false, 1); })
-    .Doc(R"doc(
-Inverse real-valued fast Fourier transform.
-
-Computes the inverse 1-dimensional discrete Fourier transform of a real-valued
-signal over the inner-most dimension of `input`.
-
-The inner-most dimension of `input` is assumed to be the result of `RFFT`: the
-`fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If
-`fft_length` is not provided, it is computed from the size of the inner-most
-dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to
-compute `input` is odd, it should be provided since it cannot be inferred
-properly.
-
-Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller
-than the corresponding dimension of `input`, the dimension is cropped. If it is
-larger, the dimension is padded with zeros.
-
-input: A complex64 tensor.
-fft_length: An int32 tensor of shape [1]. The FFT length.
-output: A float32 tensor of the same rank as `input`. The inner-most
-  dimension of `input` is replaced with the `fft_length` samples of its inverse
-  1D Fourier transform.
-
-@compatibility(numpy)
-Equivalent to np.fft.irfft
-@end_compatibility
-)doc");
+    .SetShapeFn([](InferenceContext* c) { return RFFTShape(c, false, 1); });
 
 REGISTER_OP("RFFT2D")
     .Input("input: float")
     .Input("fft_length: int32")
     .Output("output: complex64")
-    .SetShapeFn([](InferenceContext* c) { return RFFTShape(c, true, 2); })
-    .Doc(R"doc(
-2D real-valued fast Fourier transform.
-
-Computes the 2-dimensional discrete Fourier transform of a real-valued signal
-over the inner-most 2 dimensions of `input`.
-
-Since the DFT of a real signal is Hermitian-symmetric, `RFFT2D` only returns the
-`fft_length / 2 + 1` unique components of the FFT for the inner-most dimension
-of `output`: the zero-frequency term, followed by the `fft_length / 2`
-positive-frequency terms.
-
-Along each axis `RFFT2D` is computed on, if `fft_length` is smaller than the
-corresponding dimension of `input`, the dimension is cropped. If it is larger,
-the dimension is padded with zeros.
-
-input: A float32 tensor.
-fft_length: An int32 tensor of shape [2]. The FFT length for each dimension.
-output: A complex64 tensor of the same rank as `input`. The inner-most 2
-  dimensions of `input` are replaced with their 2D Fourier transform. The
-  inner-most dimension contains `fft_length / 2 + 1` unique frequency
-  components.
-
-@compatibility(numpy)
-Equivalent to np.fft.rfft2
-@end_compatibility
-)doc");
+    .SetShapeFn([](InferenceContext* c) { return RFFTShape(c, true, 2); });
 
 REGISTER_OP("IRFFT2D")
     .Input("input: complex64")
     .Input("fft_length: int32")
     .Output("output: float")
-    .SetShapeFn([](InferenceContext* c) { return RFFTShape(c, false, 2); })
-    .Doc(R"doc(
-Inverse 2D real-valued fast Fourier transform.
-
-Computes the inverse 2-dimensional discrete Fourier transform of a real-valued
-signal over the inner-most 2 dimensions of `input`.
-
-The inner-most 2 dimensions of `input` are assumed to be the result of `RFFT2D`:
-The inner-most dimension contains the `fft_length / 2 + 1` unique components of
-the DFT of a real-valued signal. If `fft_length` is not provided, it is computed
-from the size of the inner-most 2 dimensions of `input`. If the FFT length used
-to compute `input` is odd, it should be provided since it cannot be inferred
-properly.
-
-Along each axis `IRFFT2D` is computed on, if `fft_length` (or
-`fft_length / 2 + 1` for the inner-most dimension) is smaller than the
-corresponding dimension of `input`, the dimension is cropped. If it is larger,
-the dimension is padded with zeros.
-
-input: A complex64 tensor.
-fft_length: An int32 tensor of shape [2]. The FFT length for each dimension.
-output: A float32 tensor of the same rank as `input`. The inner-most 2
-  dimensions of `input` are replaced with the `fft_length` samples of their
-  inverse 2D Fourier transform.
-
-@compatibility(numpy)
-Equivalent to np.fft.irfft2
-@end_compatibility
-)doc");
+    .SetShapeFn([](InferenceContext* c) { return RFFTShape(c, false, 2); });
 
 REGISTER_OP("RFFT3D")
     .Input("input: float")
     .Input("fft_length: int32")
     .Output("output: complex64")
-    .SetShapeFn([](InferenceContext* c) { return RFFTShape(c, true, 3); })
-    .Doc(R"doc(
-3D real-valued fast Fourier transform.
-
-Computes the 3-dimensional discrete Fourier transform of a real-valued signal
-over the inner-most 3 dimensions of `input`.
-
-Since the DFT of a real signal is Hermitian-symmetric, `RFFT3D` only returns the
-`fft_length / 2 + 1` unique components of the FFT for the inner-most dimension
-of `output`: the zero-frequency term, followed by the `fft_length / 2`
-positive-frequency terms.
-
-Along each axis `RFFT3D` is computed on, if `fft_length` is smaller than the
-corresponding dimension of `input`, the dimension is cropped. If it is larger,
-the dimension is padded with zeros.
-
-input: A float32 tensor.
-fft_length: An int32 tensor of shape [3]. The FFT length for each dimension.
-output: A complex64 tensor of the same rank as `input`. The inner-most 3
-  dimensions of `input` are replaced with the their 3D Fourier transform. The
-  inner-most dimension contains `fft_length / 2 + 1` unique frequency
-  components.
-
-@compatibility(numpy)
-Equivalent to np.fft.rfftn with 3 dimensions.
-@end_compatibility
-)doc");
+    .SetShapeFn([](InferenceContext* c) { return RFFTShape(c, true, 3); });
 
 REGISTER_OP("IRFFT3D")
     .Input("input: complex64")
     .Input("fft_length: int32")
     .Output("output: float")
-    .SetShapeFn([](InferenceContext* c) { return RFFTShape(c, false, 3); })
-    .Doc(R"doc(
-Inverse 3D real-valued fast Fourier transform.
-
-Computes the inverse 3-dimensional discrete Fourier transform of a real-valued
-signal over the inner-most 3 dimensions of `input`.
-
-The inner-most 3 dimensions of `input` are assumed to be the result of `RFFT3D`:
-The inner-most dimension contains the `fft_length / 2 + 1` unique components of
-the DFT of a real-valued signal. If `fft_length` is not provided, it is computed
-from the size of the inner-most 3 dimensions of `input`. If the FFT length used
-to compute `input` is odd, it should be provided since it cannot be inferred
-properly.
-
-Along each axis `IRFFT3D` is computed on, if `fft_length` (or
-`fft_length / 2 + 1` for the inner-most dimension) is smaller than the
-corresponding dimension of `input`, the dimension is cropped. If it is larger,
-the dimension is padded with zeros.
-
-input: A complex64 tensor.
-fft_length: An int32 tensor of shape [3]. The FFT length for each dimension.
-output: A float32 tensor of the same rank as `input`. The inner-most 3
-  dimensions of `input` are replaced with the `fft_length` samples of their
-  inverse 3D real Fourier transform.
-
-@compatibility(numpy)
-Equivalent to np.irfftn with 3 dimensions.
-@end_compatibility
-)doc");
+    .SetShapeFn([](InferenceContext* c) { return RFFTShape(c, false, 3); });
 
 // Deprecated ops:
 REGISTER_OP("BatchFFT")
diff --git a/tensorflow/core/ops/spectral_ops_test.cc b/tensorflow/core/ops/spectral_ops_test.cc
index 0f8a3e6ef1366b2de08ee352bc54d1bf874a6bed..b1c5e95fc5ce25496d18202182cc418496349bb6 100644
--- a/tensorflow/core/ops/spectral_ops_test.cc
+++ b/tensorflow/core/ops/spectral_ops_test.cc
@@ -22,7 +22,7 @@ namespace tensorflow {
 TEST(MathOpsTest, FFT_ShapeFn) {
   for (const auto* op_name : {"FFT", "IFFT"}) {
     ShapeInferenceTestOp op(op_name);
-    INFER_OK(op, "?", "?");
+    INFER_OK(op, "?", "in0");
     INFER_ERROR("Shape must be at least rank 1 but is rank 0", op, "[]");
     INFER_OK(op, "[?]", "in0");
     INFER_OK(op, "[1]", "in0");
@@ -31,7 +31,7 @@ TEST(MathOpsTest, FFT_ShapeFn) {
 
   for (const auto* op_name : {"FFT2D", "IFFT2D"}) {
     ShapeInferenceTestOp op(op_name);
-    INFER_OK(op, "?", "?");
+    INFER_OK(op, "?", "in0");
     INFER_ERROR("Shape must be at least rank 2 but is rank 1", op, "[1]");
     INFER_OK(op, "[?,1]", "in0");
     INFER_OK(op, "[1,2]", "in0");
@@ -40,7 +40,7 @@ TEST(MathOpsTest, FFT_ShapeFn) {
 
   for (const auto* op_name : {"FFT3D", "IFFT3D"}) {
     ShapeInferenceTestOp op(op_name);
-    INFER_OK(op, "?", "?");
+    INFER_OK(op, "?", "in0");
     INFER_ERROR("Shape must be at least rank 3 but is rank 2", op, "[1,2]");
     INFER_OK(op, "[?,1,?]", "in0");
     INFER_OK(op, "[1,2,3]", "in0");
diff --git a/tensorflow/core/ops/state_ops.cc b/tensorflow/core/ops/state_ops.cc
index da5f091e9f1988721b1947ad812851e0322efa9e..7a524b60c0aa711f36158b73b93fa91606266592 100644
--- a/tensorflow/core/ops/state_ops.cc
+++ b/tensorflow/core/ops/state_ops.cc
@@ -28,22 +28,7 @@ REGISTER_OP("VariableV2")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::ExplicitShape)
-    .Doc(R"doc(
-Holds state in the form of a tensor that persists across steps.
-
-Outputs a ref to the tensor state so it may be read or modified.
-TODO(zhifengc/mrry): Adds a pointer to a more detail document
-about sharing states in tensorflow.
-
-ref: A reference to the variable tensor.
-shape: The shape of the variable tensor.
-dtype: The type of elements in the variable tensor.
-container: If non-empty, this variable is placed in the given container.
-        Otherwise, a default container is used.
-shared_name: If non-empty, this variable is named in the given bucket
-             with this shared_name. Otherwise, the node name is used instead.
-)doc");
+    .SetShapeFn(shape_inference::ExplicitShape);
 
 REGISTER_OP("Variable")
     .Output("ref: Ref(dtype)")
@@ -67,23 +52,14 @@ REGISTER_OP("Variable")
       TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(shape, &out));
       c->set_output(0, out);
       return Status::OK();
-    })
-    .Doc("Use VariableV2 instead.");
+    });
 
 REGISTER_OP("IsVariableInitialized")
     .Input("ref: Ref(dtype)")
     .Output("is_initialized: bool")
     .Attr("dtype: type")
     .SetAllowsUninitializedInput()
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"doc(
-Checks whether a tensor has been initialized.
-
-Outputs boolean scalar indicating whether the tensor has been initialized.
-
-ref: Should be from a `Variable` node. May be uninitialized.
-dtype: The type of elements in the variable tensor.
-)doc");
+    .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("TemporaryVariable")
     .Output("ref: Ref(dtype)")
@@ -91,53 +67,14 @@ REGISTER_OP("TemporaryVariable")
     .Attr("dtype: type")
     .Attr("var_name: string = ''")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::ExplicitShape)
-    .Doc(R"doc(
-Returns a tensor that may be mutated, but only persists within a single step.
-
-This is an experimental op for internal use only and it is possible to use this
-op in unsafe ways.  DO NOT USE unless you fully understand the risks.
-
-It is the caller's responsibility to ensure that 'ref' is eventually passed to a
-matching 'DestroyTemporaryVariable' op after all other uses have completed.
-
-Outputs a ref to the tensor state so it may be read or modified.
-
-  E.g.
-      var = state_ops._temporary_variable([1, 2], types.float_)
-      var_name = var.op.name
-      var = state_ops.assign(var, [[4.0, 5.0]])
-      var = state_ops.assign_add(var, [[6.0, 7.0]])
-      final = state_ops._destroy_temporary_variable(var, var_name=var_name)
-
-ref: A reference to the variable tensor.
-shape: The shape of the variable tensor.
-dtype: The type of elements in the variable tensor.
-var_name: Overrides the name used for the temporary variable resource. Default
-value is the name of the 'TemporaryVariable' op (which is guaranteed unique).
-)doc");
+    .SetShapeFn(shape_inference::ExplicitShape);
 
 REGISTER_OP("DestroyTemporaryVariable")
     .Input("ref: Ref(T)")
     .Output("value: T")
     .Attr("T: type")
     .Attr("var_name: string")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Destroys the temporary variable and returns its final value.
-
-Sets output to the value of the Tensor pointed to by 'ref', then destroys
-the temporary variable called 'var_name'.
-All other uses of 'ref' *must* have executed before this op.
-This is typically achieved by chaining the ref through each assign op, or by
-using control dependencies.
-
-Outputs the final value of the tensor pointed to by 'ref'.
-
-ref: A reference to the temporary variable tensor.
-var_name: Name of the temporary variable, usually the name of the matching
-'TemporaryVariable' op.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("Assign")
     .Input("ref: Ref(T)")
@@ -156,23 +93,7 @@ REGISTER_OP("Assign")
 
       c->set_output(0, c->input(1));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Update 'ref' by assigning 'value' to it.
-
-This operation outputs "ref" after the assignment is done.
-This makes it easier to chain operations that need to use the reset value.
-
-ref: Should be from a `Variable` node. May be uninitialized.
-value: The value to be assigned to the variable.
-validate_shape: If true, the operation will validate that the shape
-  of 'value' matches the shape of the Tensor being assigned to.  If false,
-  'ref' will take on the shape of 'value'.
-use_locking: If True, the assignment will be protected by a lock;
-  otherwise the behavior is undefined, but may exhibit less contention.
-output_ref:= Same as "ref".  Returned as a convenience for operations that want
-  to use the new value after the variable has been reset.
-)doc");
+    });
 
 REGISTER_OP("AssignAdd")
     .Input("ref: Ref(T)")
@@ -180,20 +101,7 @@ REGISTER_OP("AssignAdd")
     .Output("output_ref: Ref(T)")
     .Attr("T: numbertype")
     .Attr("use_locking: bool = false")
-    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
-    .Doc(R"doc(
-Update 'ref' by adding 'value' to it.
-
-This operation outputs "ref" after the update is done.
-This makes it easier to chain operations that need to use the reset value.
-
-ref: Should be from a `Variable` node.
-value: The value to be added to the variable.
-use_locking: If True, the addition will be protected by a lock;
-  otherwise the behavior is undefined, but may exhibit less contention.
-output_ref:= Same as "ref".  Returned as a convenience for operations that want
-  to use the new value after the variable has been updated.
-)doc");
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn);
 
 REGISTER_OP("AssignSub")
     .Input("ref: Ref(T)")
@@ -201,20 +109,7 @@ REGISTER_OP("AssignSub")
     .Output("output_ref: Ref(T)")
     .Attr("T: numbertype")
     .Attr("use_locking: bool = false")
-    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
-    .Doc(R"doc(
-Update 'ref' by subtracting 'value' from it.
-
-This operation outputs "ref" after the update is done.
-This makes it easier to chain operations that need to use the reset value.
-
-ref: Should be from a `Variable` node.
-value: The value to be subtracted to the variable.
-use_locking: If True, the subtraction will be protected by a lock;
-  otherwise the behavior is undefined, but may exhibit less contention.
-output_ref:= Same as "ref".  Returned as a convenience for operations that want
-  to use the new value after the variable has been updated.
-)doc");
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn);
 
 namespace {
 
@@ -243,44 +138,7 @@ REGISTER_OP("ScatterUpdate")
     .Attr("T: type")
     .Attr("Tindices: {int32, int64}")
     .Attr("use_locking: bool = true")
-    .SetShapeFn(ScatterUpdateShape)
-    .Doc(R"doc(
-Applies sparse updates to a variable reference.
-
-This operation computes
-
-```python
-    # Scalar indices
-    ref[indices, ...] = updates[...]
-
-    # Vector indices (for each i)
-    ref[indices[i], ...] = updates[i, ...]
-
-    # High rank indices (for each i, ..., j)
-    ref[indices[i, ..., j], ...] = updates[i, ..., j, ...]
-```
-
-This operation outputs `ref` after the update is done.
-This makes it easier to chain operations that need to use the reset value.
-
-If values in `ref` is to be updated more than once, because there are
-duplicate entries in `indices`, the order at which the updates happen
-for each value is undefined.
-
-Requires `updates.shape = indices.shape + ref.shape[1:]`.
-
-<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="https://www.tensorflow.org/images/ScatterUpdate.png" alt>
-</div>
-
-ref: Should be from a `Variable` node.
-indices: A tensor of indices into the first dimension of `ref`.
-updates: A tensor of updated values to store in `ref`.
-output_ref:= Same as `ref`.  Returned as a convenience for operations that want
-  to use the updated values after the update is done.
-use_locking: If True, the assignment will be protected by a lock;
-  otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
+    .SetShapeFn(ScatterUpdateShape);
 
 REGISTER_OP("ScatterAdd")
     .Input("ref: Ref(T)")
@@ -290,41 +148,7 @@ REGISTER_OP("ScatterAdd")
     .Attr("T: numbertype")
     .Attr("Tindices: {int32, int64}")
     .Attr("use_locking: bool = false")
-    .SetShapeFn(ScatterUpdateShape)
-    .Doc(R"doc(
-Adds sparse updates to a variable reference.
-
-This operation computes
-
-    # Scalar indices
-    ref[indices, ...] += updates[...]
-
-    # Vector indices (for each i)
-    ref[indices[i], ...] += updates[i, ...]
-
-    # High rank indices (for each i, ..., j)
-    ref[indices[i, ..., j], ...] += updates[i, ..., j, ...]
-
-This operation outputs `ref` after the update is done.
-This makes it easier to chain operations that need to use the reset value.
-
-Duplicate entries are handled correctly: if multiple `indices` reference
-the same location, their contributions add.
-
-Requires `updates.shape = indices.shape + ref.shape[1:]`.
-
-<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="https://www.tensorflow.org/images/ScatterAdd.png" alt>
-</div>
-
-ref: Should be from a `Variable` node.
-indices: A tensor of indices into the first dimension of `ref`.
-updates: A tensor of updated values to add to `ref`.
-output_ref:= Same as `ref`.  Returned as a convenience for operations that want
-  to use the updated values after the update is done.
-use_locking: If True, the addition will be protected by a lock;
-  otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
+    .SetShapeFn(ScatterUpdateShape);
 
 REGISTER_OP("ScatterSub")
     .Input("ref: Ref(T)")
@@ -334,41 +158,7 @@ REGISTER_OP("ScatterSub")
     .Attr("T: numbertype")
     .Attr("Tindices: {int32, int64}")
     .Attr("use_locking: bool = false")
-    .SetShapeFn(ScatterUpdateShape)
-    .Doc(R"doc(
-Subtracts sparse updates to a variable reference.
-
-```python
-    # Scalar indices
-    ref[indices, ...] -= updates[...]
-
-    # Vector indices (for each i)
-    ref[indices[i], ...] -= updates[i, ...]
-
-    # High rank indices (for each i, ..., j)
-    ref[indices[i, ..., j], ...] -= updates[i, ..., j, ...]
-```
-
-This operation outputs `ref` after the update is done.
-This makes it easier to chain operations that need to use the reset value.
-
-Duplicate entries are handled correctly: if multiple `indices` reference
-the same location, their (negated) contributions add.
-
-Requires `updates.shape = indices.shape + ref.shape[1:]`.
-
-<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="https://www.tensorflow.org/images/ScatterSub.png" alt>
-</div>
-
-ref: Should be from a `Variable` node.
-indices: A tensor of indices into the first dimension of `ref`.
-updates: A tensor of updated values to subtract from `ref`.
-output_ref:= Same as `ref`.  Returned as a convenience for operations that want
-  to use the updated values after the update is done.
-use_locking: If True, the subtraction will be protected by a lock;
-  otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
+    .SetShapeFn(ScatterUpdateShape);
 
 REGISTER_OP("ScatterMul")
     .Input("ref: Ref(T)")
@@ -378,39 +168,7 @@ REGISTER_OP("ScatterMul")
     .Attr("T: numbertype")
     .Attr("Tindices: {int32, int64}")
     .Attr("use_locking: bool = false")
-    .SetShapeFn(ScatterUpdateShape)
-    .Doc(R"doc(
-Multiplies sparse updates into a variable reference.
-
-This operation computes
-
-```python
-    # Scalar indices
-    ref[indices, ...] *= updates[...]
-
-    # Vector indices (for each i)
-    ref[indices[i], ...] *= updates[i, ...]
-
-    # High rank indices (for each i, ..., j)
-    ref[indices[i, ..., j], ...] *= updates[i, ..., j, ...]
-```
-
-This operation outputs `ref` after the update is done.
-This makes it easier to chain operations that need to use the reset value.
-
-Duplicate entries are handled correctly: if multiple `indices` reference
-the same location, their contributions multiply.
-
-Requires `updates.shape = indices.shape + ref.shape[1:]`.
-
-ref: Should be from a `Variable` node.
-indices: A tensor of indices into the first dimension of `ref`.
-updates: A tensor of updated values to multiply to `ref`.
-output_ref:= Same as `ref`.  Returned as a convenience for operations that want
-  to use the updated values after the update is done.
-use_locking: If True, the operation will be protected by a lock;
-  otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
+    .SetShapeFn(ScatterUpdateShape);
 
 REGISTER_OP("ScatterDiv")
     .Input("ref: Ref(T)")
@@ -420,39 +178,7 @@ REGISTER_OP("ScatterDiv")
     .Attr("T: numbertype")
     .Attr("Tindices: {int32, int64}")
     .Attr("use_locking: bool = false")
-    .SetShapeFn(ScatterUpdateShape)
-    .Doc(R"doc(
-Divides a variable reference by sparse updates.
-
-This operation computes
-
-```python
-    # Scalar indices
-    ref[indices, ...] /= updates[...]
-
-    # Vector indices (for each i)
-    ref[indices[i], ...] /= updates[i, ...]
-
-    # High rank indices (for each i, ..., j)
-    ref[indices[i, ..., j], ...] /= updates[i, ..., j, ...]
-```
-
-This operation outputs `ref` after the update is done.
-This makes it easier to chain operations that need to use the reset value.
-
-Duplicate entries are handled correctly: if multiple `indices` reference
-the same location, their contributions divide.
-
-Requires `updates.shape = indices.shape + ref.shape[1:]`.
-
-ref: Should be from a `Variable` node.
-indices: A tensor of indices into the first dimension of `ref`.
-updates: A tensor of values that `ref` is divided by.
-output_ref:= Same as `ref`.  Returned as a convenience for operations that want
-  to use the updated values after the update is done.
-use_locking: If True, the operation will be protected by a lock;
-  otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
+    .SetShapeFn(ScatterUpdateShape);
 
 REGISTER_OP("ScatterNdUpdate")
     .Input("ref: Ref(T)")
@@ -462,56 +188,16 @@ REGISTER_OP("ScatterNdUpdate")
     .Attr("T: type")
     .Attr("Tindices: {int32, int64}")
     .Attr("use_locking: bool = true")
-    .SetShapeFn(shape_inference::ScatterNdUpdateShape)
-    .Doc(R"doc(
-Applies sparse `updates` to individual values or slices within a given
-variable according to `indices`.
-
-`ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
-
-`indices` must be integer tensor, containing indices into `ref`.
-It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
-
-The innermost dimension of `indices` (with length `K`) corresponds to
-indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
-dimension of `ref`.
+    .SetShapeFn(shape_inference::ScatterNdUpdateShape);
 
-`updates` is `Tensor` of rank `Q-1+P-K` with shape:
-
-```
-[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].
-```
-
-For example, say we want to update 4 scattered elements to a rank-1 tensor to
-8 elements. In Python, that update would look like this:
-
-```python
-    ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8])
-    indices = tf.constant([[4], [3], [1] ,[7]])
-    updates = tf.constant([9, 10, 11, 12])
-    update = tf.scatter_nd_update(ref, indices, updates)
-    with tf.Session() as sess:
-      print sess.run(update)
-```
-
-The resulting update to ref would look like this:
-
-    [1, 11, 3, 10, 9, 6, 7, 12]
-
-See @{tf.scatter_nd} for more details about how to make updates to
-slices.
-
-ref: A mutable Tensor. Should be from a Variable node.
-indices: A Tensor. Must be one of the following types: int32, int64.
-  A tensor of indices into ref.
-updates: A Tensor. Must have the same type as ref. A tensor of updated
-  values to add to ref.
-use_locking: An optional bool. Defaults to True. If True, the assignment will
-  be protected by a lock; otherwise the behavior is undefined,
-  but may exhibit less contention.
-output_ref: Same as ref. Returned as a convenience for operations that want to
-  use the updated values after the update is done.
-)doc");
+REGISTER_OP("ResourceScatterNdUpdate")
+    .Input("ref: resource")
+    .Input("indices: Tindices")
+    .Input("updates: T")
+    .Attr("T: type")
+    .Attr("Tindices: {int32, int64}")
+    .Attr("use_locking: bool = true")
+    .SetShapeFn(shape_inference::ScatterNdUpdateShape);
 
 REGISTER_OP("ScatterNdAdd")
     .Input("ref: Ref(T)")
@@ -521,54 +207,7 @@ REGISTER_OP("ScatterNdAdd")
     .Attr("T: numbertype")
     .Attr("Tindices: {int32, int64}")
     .Attr("use_locking: bool = false")
-    .SetShapeFn(shape_inference::ScatterNdUpdateShape)
-    .Doc(R"doc(
-Applies sparse addition between `updates` and individual values or slices
-within a given variable according to `indices`.
-
-`ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
-
-`indices` must be integer tensor, containing indices into `ref`.
-It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
-
-The innermost dimension of `indices` (with length `K`) corresponds to
-indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
-dimension of `ref`.
-
-`updates` is `Tensor` of rank `Q-1+P-K` with shape:
-
-```
-[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].
-```
-
-For example, say we want to add 4 scattered elements to a rank-1 tensor to 8
-elements. In Python, that addition would look like this:
-
-    ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8])
-    indices = tf.constant([[4], [3], [1], [7]])
-    updates = tf.constant([9, 10, 11, 12])
-    add = tf.scatter_nd_add(ref, indices, updates)
-    with tf.Session() as sess:
-      print sess.run(add)
-
-The resulting update to ref would look like this:
-
-    [1, 13, 3, 14, 14, 6, 7, 20]
-
-See @{tf.scatter_nd} for more details about how to make updates to
-slices.
-
-ref: A mutable Tensor. Should be from a Variable node.
-indices: A Tensor. Must be one of the following types: int32, int64.
-  A tensor of indices into ref.
-updates: A Tensor. Must have the same type as ref. A tensor of updated values
-  to add to ref.
-use_locking: An optional bool. Defaults to True. If True, the assignment will
-  be protected by a lock; otherwise the behavior is undefined,
-  but may exhibit less contention.
-output_ref: Same as ref. Returned as a convenience for operations that want
-  to use the updated values after the update is done.
-)doc");
+    .SetShapeFn(shape_inference::ScatterNdUpdateShape);
 
 REGISTER_OP("ScatterNdSub")
     .Input("ref: Ref(T)")
@@ -578,54 +217,7 @@ REGISTER_OP("ScatterNdSub")
     .Attr("T: numbertype")
     .Attr("Tindices: {int32, int64}")
     .Attr("use_locking: bool = false")
-    .SetShapeFn(shape_inference::ScatterNdUpdateShape)
-    .Doc(R"doc(
-Applies sparse subtraction between `updates` and individual values or slices
-within a given variable according to `indices`.
-
-`ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
-
-`indices` must be integer tensor, containing indices into `ref`.
-It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
-
-The innermost dimension of `indices` (with length `K`) corresponds to
-indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
-dimension of `ref`.
-
-`updates` is `Tensor` of rank `Q-1+P-K` with shape:
-
-```
-[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].
-```
-
-For example, say we want to subtract 4 scattered elements from a rank-1 tensor
-with 8 elements. In Python, that subtraction would look like this:
-
-    ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8])
-    indices = tf.constant([[4], [3], [1], [7]])
-    updates = tf.constant([9, 10, 11, 12])
-    sub = tf.scatter_nd_sub(ref, indices, updates)
-    with tf.Session() as sess:
-      print sess.run(sub)
-
-The resulting update to ref would look like this:
-
-    [1, -9, 3, -6, -4, 6, 7, -4]
-
-See @{tf.scatter_nd} for more details about how to make updates to
-slices.
-
-ref: A mutable Tensor. Should be from a Variable node.
-indices: A Tensor. Must be one of the following types: int32, int64.
-  A tensor of indices into ref.
-updates: A Tensor. Must have the same type as ref. A tensor of updated values
-  to subtract from ref.
-use_locking: An optional bool. Defaults to True. If True, the assignment will
-  be protected by a lock; otherwise the behavior is undefined,
-  but may exhibit less contention.
-output_ref: Same as ref. Returned as a convenience for operations that want
-  to use the updated values after the update is done.
-)doc");
+    .SetShapeFn(shape_inference::ScatterNdUpdateShape);
 
 REGISTER_OP("CountUpTo")
     .Input("ref: Ref(T)")
@@ -637,16 +229,7 @@ REGISTER_OP("CountUpTo")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &output));
       c->set_output(0, output);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Increments 'ref' until it reaches 'limit'.
-
-ref: Should be from a scalar `Variable` node.
-limit: If incrementing ref would bring it above limit, instead generates an
-  'OutOfRange' error.
-output: A copy of the input before increment. If nothing else modifies the
-  input, the values produced will all be distinct.
-)doc");
+    });
 
 REGISTER_OP("ResourceCountUpTo")
     .Input("resource: resource")
@@ -670,15 +253,6 @@ REGISTER_OP("ResourceCountUpTo")
       TF_RETURN_IF_ERROR(c->WithRank(shape_and_type.shape, 0, &output));
       c->set_output(0, output);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Increments variable pointed to by 'resource' until it reaches 'limit'.
-
-resource: Should be from a scalar `Variable` node.
-limit: If incrementing ref would bring it above limit, instead generates an
-  'OutOfRange' error.
-output: A copy of the input before increment. If nothing else modifies the
-  input, the values produced will all be distinct.
-)doc");
+    });
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/stateless_random_ops.cc b/tensorflow/core/ops/stateless_random_ops.cc
index 3e1f8781fcd7718e3443b0b4bee5ea5d33980524..553850610a3c51986664fee52e04809626de22c1 100644
--- a/tensorflow/core/ops/stateless_random_ops.cc
+++ b/tensorflow/core/ops/stateless_random_ops.cc
@@ -46,52 +46,13 @@ static Status StatelessShape(shape_inference::InferenceContext* context) {
       .SetShapeFn(StatelessShape)
 
 // This op is exposed through contrib/stateless only.  The interface may change.
-REGISTER_STATELESS_OP("StatelessRandomUniform")
-    .Doc(R"doc(
-Outputs deterministic pseudorandom random values from a uniform distribution.
-
-The generated values follow a uniform distribution in the range `[0, 1)`. The
-lower bound 0 is included in the range, while the upper bound 1 is excluded.
-
-The outputs are a deterministic function of `shape` and `seed`.
-
-shape: The shape of the output tensor.
-dtype: The type of the output.
-seed: 2 seeds (shape [2]).
-output: Random values with specified shape.
-)doc");
+REGISTER_STATELESS_OP("StatelessRandomUniform");
 
 // This op is exposed through contrib/stateless only.  The interface may change.
-REGISTER_STATELESS_OP("StatelessRandomNormal")
-    .Doc(R"doc(
-Outputs deterministic pseudorandom values from a normal distribution.
-
-The generated values will have mean 0 and standard deviation 1.
-
-The outputs are a deterministic function of `shape` and `seed`.
-
-shape: The shape of the output tensor.
-dtype: The type of the output.
-seed: 2 seeds (shape [2]).
-output: Random values with specified shape.
-)doc");
+REGISTER_STATELESS_OP("StatelessRandomNormal");
 
 // This op is exposed through contrib/stateless only.  The interface may change.
-REGISTER_STATELESS_OP("StatelessTruncatedNormal")
-    .Doc(R"doc(
-Outputs deterministic pseudorandom values from a truncated normal distribution.
-
-The generated values follow a normal distribution with mean 0 and standard
-deviation 1, except that values whose magnitude is more than 2 standard
-deviations from the mean are dropped and re-picked.
-
-The outputs are a deterministic function of `shape` and `seed`.
-
-shape: The shape of the output tensor.
-dtype: The type of the output.
-seed: 2 seeds (shape [2]).
-output: Random values with specified shape.
-)doc");
+REGISTER_STATELESS_OP("StatelessTruncatedNormal");
 
 #undef REGISTER_STATELESS_OP
 
diff --git a/tensorflow/core/ops/string_ops.cc b/tensorflow/core/ops/string_ops.cc
index aebd14c7e55c6bd794e85061de275407a82f71c5..8beb28de0a2e32832b2db60eeb8272a88536e91f 100644
--- a/tensorflow/core/ops/string_ops.cc
+++ b/tensorflow/core/ops/string_ops.cc
@@ -27,67 +27,20 @@ REGISTER_OP("StringToHashBucketFast")
     .Input("input: string")
     .Output("output: int64")
     .Attr("num_buckets: int >= 1")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Converts each string in the input Tensor to its hash mod by a number of buckets.
-
-The hash function is deterministic on the content of the string within the
-process and will never change. However, it is not suitable for cryptography.
-This function may be used when CPU time is scarce and inputs are trusted or
-unimportant. There is a risk of adversaries constructing inputs that all hash
-to the same bucket. To prevent this problem, use a strong hash function with
-`tf.string_to_hash_bucket_strong`.
-
-input: The strings to assign a hash bucket.
-num_buckets: The number of buckets.
-output: A Tensor of the same shape as the input `string_tensor`.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("StringToHashBucketStrong")
     .Input("input: string")
     .Output("output: int64")
     .Attr("num_buckets: int >= 1")
     .Attr("key: list(int)")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Converts each string in the input Tensor to its hash mod by a number of buckets.
-
-The hash function is deterministic on the content of the string within the
-process. The hash function is a keyed hash function, where attribute `key`
-defines the key of the hash function. `key` is an array of 2 elements.
-
-A strong hash is important when inputs may be malicious, e.g. URLs with
-additional components. Adversaries could try to make their inputs hash to the
-same bucket for a denial-of-service attack or to skew the results. A strong
-hash prevents this by making it difficult, if not infeasible, to compute inputs
-that hash to the same bucket. This comes at a cost of roughly 4x higher compute
-time than `tf.string_to_hash_bucket_fast`.
-
-input: The strings to assign a hash bucket.
-num_buckets: The number of buckets.
-key: The key for the keyed hash function passed as a list of two uint64
-  elements.
-output: A Tensor of the same shape as the input `string_tensor`.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("StringToHashBucket")
     .Input("string_tensor: string")
     .Output("output: int64")
     .Attr("num_buckets: int >= 1")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Converts each string in the input Tensor to its hash mod by a number of buckets.
-
-The hash function is deterministic on the content of the string within the
-process.
-
-Note that the hash function may change from time to time.
-This functionality will be deprecated and it's recommended to use
-`tf.string_to_hash_bucket_fast()` or `tf.string_to_hash_bucket_strong()`.
-
-num_buckets: The number of buckets.
-output: A Tensor of the same shape as the input `string_tensor`.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("ReduceJoin")
     .Input("inputs: string")
@@ -95,41 +48,7 @@ REGISTER_OP("ReduceJoin")
     .Attr("keep_dims: bool = false")
     .Attr("separator: string = ''")
     .Output("output: string")
-    .SetShapeFn(shape_inference::ReductionShape)
-    .Doc(R"doc(
-Joins a string Tensor across the given dimensions.
-
-Computes the string join across dimensions in the given string Tensor of shape
-`[d_0, d_1, ..., d_n-1]`.  Returns a new Tensor created by joining the input
-strings with the given separator (default: empty string).  Negative indices are
-counted backwards from the end, with `-1` being equivalent to `n - 1`.
-
-For example:
-
-```python
-# tensor `a` is [["a", "b"], ["c", "d"]]
-tf.reduce_join(a, 0) ==> ["ac", "bd"]
-tf.reduce_join(a, 1) ==> ["ab", "cd"]
-tf.reduce_join(a, -2) = tf.reduce_join(a, 0) ==> ["ac", "bd"]
-tf.reduce_join(a, -1) = tf.reduce_join(a, 1) ==> ["ab", "cd"]
-tf.reduce_join(a, 0, keep_dims=True) ==> [["ac", "bd"]]
-tf.reduce_join(a, 1, keep_dims=True) ==> [["ab"], ["cd"]]
-tf.reduce_join(a, 0, separator=".") ==> ["a.c", "b.d"]
-tf.reduce_join(a, [0, 1]) ==> ["acbd"]
-tf.reduce_join(a, [1, 0]) ==> ["abcd"]
-tf.reduce_join(a, []) ==> ["abcd"]
-```
-
-inputs: The input to be joined.  All reduced indices must have non-zero size.
-reduction_indices: The dimensions to reduce over.  Dimensions are reduced in the
-  order specified.  Omitting `reduction_indices` is equivalent to passing
-  `[n-1, n-2, ..., 0]`.  Negative indices from `-n` to `-1` are supported.
-keep_dims: If `True`, retain reduced dimensions with length `1`.
-separator: The separator to use when joining.
-
-output: Has shape equal to that of the input with reduced dimensions removed or
-  set to `1` depending on `keep_dims`.
-)doc");
+    .SetShapeFn(shape_inference::ReductionShape);
 
 REGISTER_OP("AsString")
     .Input("input: T")
@@ -140,22 +59,7 @@ REGISTER_OP("AsString")
     .Attr("shortest: bool = false")
     .Attr("width: int = -1")
     .Attr("fill: string = ''")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Converts each entry in the given tensor to strings.  Supports many numeric
-types and boolean.
-
-precision: The post-decimal precision to use for floating point numbers.
-  Only used if precision > -1.
-scientific: Use scientific notation for floating point numbers.
-shortest: Use shortest representation (either scientific or standard) for
-  floating point numbers.
-width: Pad pre-decimal numbers to this width.
-  Applies to both floating point and integer numbers.
-  Only used if width > -1.
-fill: The value to pad if width > -1.  If empty, pads with spaces.
-  Another typical value is '0'.  String cannot be longer than 1 character.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("StringJoin")
     .Input("inputs: N * string")
@@ -185,16 +89,7 @@ REGISTER_OP("StringJoin")
       }
       c->set_output(0, out);
       return Status::OK();
-    })
-    .Doc(R"doc(
-Joins the strings in the given list of string tensors into one tensor;
-with the given separator (default is an empty separator).
-
-inputs: A list of string tensors.  The tensors must all have the same shape,
-  or be scalars.  Scalars may be mixed in; these will be broadcast to the shape
-  of non-scalar inputs.
-separator: string, an optional join separator.
-)doc");
+    });
 
 REGISTER_OP("StringSplit")
     .Input("input: string")
@@ -212,74 +107,18 @@ REGISTER_OP("StringSplit")
       c->set_output(1, c->Vector(InferenceContext::kUnknownDim));
       c->set_output(2, c->Vector(2));
       return Status::OK();
-    })
-    .Doc(R"doc(
-Split elements of `input` based on `delimiter` into a `SparseTensor`.
-
-Let N be the size of source (typically N will be the batch size). Split each
-element of `input` based on `delimiter` and return a `SparseTensor`
-containing the splitted tokens. Empty tokens are ignored.
-
-`delimiter` can be empty, or a string of split characters. If `delimiter` is an
- empty string, each element of `input` is split into individual single-byte
- character strings, including splitting of UTF-8 multibyte sequences. Otherwise
- every character of `delimiter` is a potential split point.
-
-For example:
-  N = 2, input[0] is 'hello world' and input[1] is 'a b c', then the output
-  will be
-
-  indices = [0, 0;
-             0, 1;
-             1, 0;
-             1, 1;
-             1, 2]
-  shape = [2, 3]
-  values = ['hello', 'world', 'a', 'b', 'c']
-
-input: 1-D. Strings to split.
-delimiter: 0-D. Delimiter characters (bytes), or empty string.
-skip_empty: A `bool`. If `True`, skip the empty strings from the result.
-indices: A dense matrix of int64 representing the indices of the sparse tensor.
-values: A vector of strings corresponding to the splited values.
-shape: a length-2 vector of int64 representing the shape of the sparse
-  tensor, where the first value is N and the second value is the maximum number
-  of tokens in a single input entry.
-)doc");
+    });
 
 REGISTER_OP("EncodeBase64")
     .Input("input: string")
     .Output("output: string")
     .Attr("pad: bool = false")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Encode strings into web-safe base64 format.
-
-Refer to the following article for more information on base64 format:
-en.wikipedia.org/wiki/Base64. Base64 strings may have padding with '=' at the
-end so that the encoded has length multiple of 4. See Padding section of the
-link above.
-
-Web-safe means that the encoder uses - and _ instead of + and /.
-
-input: Strings to be encoded.
-output: Input strings encoded in base64.
-pad: Bool whether padding is applied at the ends.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("DecodeBase64")
     .Input("input: string")
     .Output("output: string")
-    .SetShapeFn(shape_inference::UnchangedShape)
-    .Doc(R"doc(
-Decode web-safe base64-encoded strings.
-
-Input may or may not have padding at the end. See EncodeBase64 for padding.
-Web-safe means that input must use - and _ instead of + and /.
-
-input: Base64 strings to decode.
-output: Decoded strings.
-)doc");
+    .SetShapeFn(shape_inference::UnchangedShape);
 
 REGISTER_OP("Substr")
     .Input("input: string")
@@ -306,88 +145,6 @@ REGISTER_OP("Substr")
       // c->input(0) is the ShapeHandle to input strings
       // BroadcastBinaryOpShapeFn infers shape from c->input(0) and c->input(1).
       return shape_inference::BroadcastBinaryOpShapeFn(c);
-    })
-    .Doc(R"doc(
-Return substrings from `Tensor` of strings.
-
-For each string in the input `Tensor`, creates a substring starting at index
-`pos` with a total length of `len`.
-
-If `len` defines a substring that would extend beyond the length of the input
-string, then as many characters as possible are used.
-
-If `pos` is negative or specifies a character index larger than any of the input
-strings, then an `InvalidArgumentError` is thrown.
-
-`pos` and `len` must have the same shape, otherwise a `ValueError` is thrown on
-Op creation.
-
-*NOTE*: `Substr` supports broadcasting up to two dimensions. More about
-broadcasting
-[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-
----
-
-Examples
-
-Using scalar `pos` and `len`:
-
-```python
-input = [b'Hello', b'World']
-position = 1
-length = 3
-
-output = [b'ell', b'orl']
-```
-
-Using `pos` and `len` with same shape as `input`:
-
-```python
-input = [[b'ten', b'eleven', b'twelve'],
-         [b'thirteen', b'fourteen', b'fifteen'],
-         [b'sixteen', b'seventeen', b'eighteen']]
-position = [[1, 2, 3],
-            [1, 2, 3],
-            [1, 2, 3]]
-length =   [[2, 3, 4],
-            [4, 3, 2],
-            [5, 5, 5]]
-
-output = [[b'en', b'eve', b'lve'],
-          [b'hirt', b'urt', b'te'],
-          [b'ixtee', b'vente', b'hteen']]
-```
-
-Broadcasting `pos` and `len` onto `input`:
-
-```
-input = [[b'ten', b'eleven', b'twelve'],
-         [b'thirteen', b'fourteen', b'fifteen'],
-         [b'sixteen', b'seventeen', b'eighteen'],
-         [b'nineteen', b'twenty', b'twentyone']]
-position = [1, 2, 3]
-length =   [1, 2, 3]
-
-output = [[b'e', b'ev', b'lve'],
-          [b'h', b'ur', b'tee'],
-          [b'i', b've', b'hte'],
-          [b'i', b'en', b'nty']]
-```
-
-Broadcasting `input` onto `pos` and `len`:
-
-```
-input = b'thirteen'
-position = [1, 5, 7]
-length =   [3, 2, 1]
-
-output = [b'hir', b'ee', b'n']
-```
-
-input: Tensor of strings
-pos: Scalar defining the position of first character in each substring
-len: Scalar defining the number of characters to include in each substring
-output: Tensor of substrings
-)doc");
+    });
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/training_ops.cc b/tensorflow/core/ops/training_ops.cc
index 405318caf20183ce267e84cd2554ed8c77a5b409..e8d03877c91402394567b05df8b738de1c15c8c6 100644
--- a/tensorflow/core/ops/training_ops.cc
+++ b/tensorflow/core/ops/training_ops.cc
@@ -116,17 +116,7 @@ REGISTER_OP("ApplyGradientDescent")
     .Output("out: Ref(T)")
     .Attr("T: numbertype")
     .Attr("use_locking: bool = false")
-    .SetShapeFn(ApplyGradientDescentShapeFn)
-    .Doc(R"doc(
-Update '*var' by subtracting 'alpha' * 'delta' from it.
-
-var: Should be from a Variable().
-alpha: Scaling factor. Must be a scalar.
-delta: The change.
-out: Same as "var".
-use_locking: If `True`, the subtraction will be protected by a lock;
-  otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
+    .SetShapeFn(ApplyGradientDescentShapeFn);
 
 REGISTER_OP("ResourceApplyGradientDescent")
     .Input("var: resource")
@@ -134,16 +124,7 @@ REGISTER_OP("ResourceApplyGradientDescent")
     .Input("delta: T")
     .Attr("T: numbertype")
     .Attr("use_locking: bool = false")
-    .SetShapeFn(ApplyGradientDescentShapeFn)
-    .Doc(R"doc(
-Update '*var' by subtracting 'alpha' * 'delta' from it.
-
-var: Should be from a Variable().
-alpha: Scaling factor. Must be a scalar.
-delta: The change.
-use_locking: If `True`, the subtraction will be protected by a lock;
-  otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
+    .SetShapeFn(ApplyGradientDescentShapeFn);
 
 static Status ApplyProximalGradientDescentShapeFn(InferenceContext* c,
                                                   bool sparse) {
@@ -171,21 +152,7 @@ REGISTER_OP("ApplyProximalGradientDescent")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyProximalGradientDescentShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' as FOBOS algorithm with fixed learning rate.
-prox_v = var - alpha * delta
-var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
-
-var: Should be from a Variable().
-alpha: Scaling factor. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-delta: The change.
-out: Same as "var".
-use_locking: If True, the subtraction will be protected by a lock;
-  otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
+    });
 
 REGISTER_OP("SparseApplyProximalGradientDescent")
     .Input("var: Ref(T)")
@@ -200,24 +167,7 @@ REGISTER_OP("SparseApplyProximalGradientDescent")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyProximalGradientDescentShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Sparse update '*var' as FOBOS algorithm with fixed learning rate.
-
-That is for rows we have grad for, we update var as follows:
-prox_v = var - alpha * grad
-var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
-
-var: Should be from a Variable().
-alpha: Scaling factor. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-out: Same as "var".
-use_locking: If True, the subtraction will be protected by a lock;
-  otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
+    });
 
 REGISTER_OP("ResourceApplyProximalGradientDescent")
     .Input("var: resource")
@@ -229,20 +179,7 @@ REGISTER_OP("ResourceApplyProximalGradientDescent")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyProximalGradientDescentShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' as FOBOS algorithm with fixed learning rate.
-prox_v = var - alpha * delta
-var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
-
-var: Should be from a Variable().
-alpha: Scaling factor. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-delta: The change.
-use_locking: If True, the subtraction will be protected by a lock;
-  otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
+    });
 
 REGISTER_OP("ResourceSparseApplyProximalGradientDescent")
     .Input("var: resource")
@@ -256,23 +193,7 @@ REGISTER_OP("ResourceSparseApplyProximalGradientDescent")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyProximalGradientDescentShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Sparse update '*var' as FOBOS algorithm with fixed learning rate.
-
-That is for rows we have grad for, we update var as follows:
-prox_v = var - alpha * grad
-var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
-
-var: Should be from a Variable().
-alpha: Scaling factor. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-use_locking: If True, the subtraction will be protected by a lock;
-  otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
+    });
 
 static Status ApplyAdadeltaShapeFn(InferenceContext* c, bool sparse) {
   ShapeHandle unused;
@@ -304,26 +225,7 @@ REGISTER_OP("ApplyAdadelta")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdadeltaShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the adadelta scheme.
-
-accum = rho() * accum + (1 - rho()) * grad.square();
-update = (update_accum + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad;
-update_accum = rho() * update_accum + (1 - rho()) * update.square();
-var -= update;
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-accum_update: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-rho: Decay factor. Must be a scalar.
-epsilon: Constant factor. Must be a scalar.
-grad: The gradient.
-out: Same as "var".
-use_locking: If True, updating of the var, accum and update_accum tensors will be protected by
-a lock; otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
+    });
 
 REGISTER_OP("SparseApplyAdadelta")
     .Input("var: Ref(T)")
@@ -340,20 +242,7 @@ REGISTER_OP("SparseApplyAdadelta")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdadeltaShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-var: Should be from a Variable().
-accum: Should be from a Variable().
-accum_update:: Should be from a Variable().
-lr: Learning rate. Must be a scalar.
-rho: Decay factor. Must be a scalar.
-epsilon: Constant factor. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-out: Same as "var".
-use_locking: If True, updating of the var and accum tensors will be protected by
-a lock; otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
+    });
 
 REGISTER_OP("ResourceApplyAdadelta")
     .Input("var: resource")
@@ -367,25 +256,7 @@ REGISTER_OP("ResourceApplyAdadelta")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdadeltaShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the adadelta scheme.
-
-accum = rho() * accum + (1 - rho()) * grad.square();
-update = (update_accum + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad;
-update_accum = rho() * update_accum + (1 - rho()) * update.square();
-var -= update;
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-accum_update: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-rho: Decay factor. Must be a scalar.
-epsilon: Constant factor. Must be a scalar.
-grad: The gradient.
-use_locking: If True, updating of the var, accum and update_accum tensors will be protected by
-a lock; otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
+    });
 
 REGISTER_OP("ResourceSparseApplyAdadelta")
     .Input("var: resource")
@@ -401,19 +272,7 @@ REGISTER_OP("ResourceSparseApplyAdadelta")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdadeltaShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-var: Should be from a Variable().
-accum: Should be from a Variable().
-accum_update:: Should be from a Variable().
-lr: Learning rate. Must be a scalar.
-rho: Decay factor. Must be a scalar.
-epsilon: Constant factor. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-use_locking: If True, updating of the var and accum tensors will be protected by
-a lock; otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
+    });
 
 static Status ApplyAdagradShapeFn(InferenceContext* c, bool sparse) {
   ShapeHandle unused;
@@ -438,22 +297,7 @@ REGISTER_OP("ApplyAdagrad")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdagradShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the adagrad scheme.
-
-accum += grad * grad
-var -= lr * grad * (1 / sqrt(accum))
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-grad: The gradient.
-out: Same as "var".
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
+    });
 
 REGISTER_OP("ResourceApplyAdagrad")
     .Input("var: resource")
@@ -464,21 +308,7 @@ REGISTER_OP("ResourceApplyAdagrad")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdagradShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the adagrad scheme.
-
-accum += grad * grad
-var -= lr * grad * (1 / sqrt(accum))
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-grad: The gradient.
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
+    });
 
 static Status ApplyProximalAdagradShapeFn(InferenceContext* c, bool sparse) {
   ShapeHandle unused;
@@ -507,23 +337,7 @@ REGISTER_OP("ApplyProximalAdagrad")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyProximalAdagradShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' and '*accum' according to FOBOS with Adagrad learning rate.
-accum += grad * grad
-prox_v = var - lr * grad * (1 / sqrt(accum))
-var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-grad: The gradient.
-lr: Scaling factor. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-out: Same as "var".
-use_locking: If True, updating of the var and accum tensors will be protected by
-a lock; otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
+    });
 
 REGISTER_OP("ResourceApplyProximalAdagrad")
     .Input("var: resource")
@@ -536,22 +350,7 @@ REGISTER_OP("ResourceApplyProximalAdagrad")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyProximalAdagradShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' and '*accum' according to FOBOS with Adagrad learning rate.
-accum += grad * grad
-prox_v = var - lr * grad * (1 / sqrt(accum))
-var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-grad: The gradient.
-lr: Scaling factor. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-use_locking: If True, updating of the var and accum tensors will be protected by
-a lock; otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
+    });
 
 REGISTER_OP("SparseApplyAdagrad")
     .Input("var: Ref(T)")
@@ -565,24 +364,7 @@ REGISTER_OP("SparseApplyAdagrad")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdagradShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update relevant entries in '*var' and '*accum' according to the adagrad scheme.
-
-That is for rows we have grad for, we update var and accum as follows:
-accum += grad * grad
-var -= lr * grad * (1 / sqrt(accum))
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-lr: Learning rate. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-out: Same as "var".
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
+    });
 
 REGISTER_OP("ResourceSparseApplyAdagrad")
     .Input("var: resource")
@@ -595,23 +377,7 @@ REGISTER_OP("ResourceSparseApplyAdagrad")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdagradShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update relevant entries in '*var' and '*accum' according to the adagrad scheme.
-
-That is for rows we have grad for, we update var and accum as follows:
-accum += grad * grad
-var -= lr * grad * (1 / sqrt(accum))
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-lr: Learning rate. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
+    });
 
 static Status ApplyAdagradDAShapeFn(InferenceContext* c, bool sparse) {
   ShapeHandle unused;
@@ -647,22 +413,7 @@ REGISTER_OP("ApplyAdagradDA")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdagradDAShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the proximal adagrad scheme.
-
-var: Should be from a Variable().
-gradient_accumulator: Should be from a Variable().
-gradient_squared_accumulator: Should be from a Variable().
-grad: The gradient.
-lr: Scaling factor. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-global_step: Training step number. Must be a scalar.
-out: Same as "var".
-use_locking: If True, updating of the var and accum tensors will be protected by
-a lock; otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
+    });
 
 REGISTER_OP("SparseApplyAdagradDA")
     .Input("var: Ref(T)")
@@ -680,23 +431,7 @@ REGISTER_OP("SparseApplyAdagradDA")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdagradDAShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update entries in '*var' and '*accum' according to the proximal adagrad scheme.
-
-var: Should be from a Variable().
-gradient_accumulator: Should be from a Variable().
-gradient_squared_accumulator: Should be from a Variable().
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-lr: Learning rate. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-global_step: Training step number. Must be a scalar.
-out: Same as "var".
-use_locking: If True, updating of the var and accum tensors will be protected by
-a lock; otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
+    });
 
 REGISTER_OP("SparseApplyProximalAdagrad")
     .Input("var: Ref(T)")
@@ -712,27 +447,7 @@ REGISTER_OP("SparseApplyProximalAdagrad")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyProximalAdagradShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Sparse update entries in '*var' and '*accum' according to FOBOS algorithm.
-
-That is for rows we have grad for, we update var and accum as follows:
-accum += grad * grad
-prox_v = var
-prox_v -= lr * grad * (1 / sqrt(accum))
-var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-lr: Learning rate. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-out: Same as "var".
-use_locking: If True, updating of the var and accum tensors will be protected by
-a lock; otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
+    });
 
 REGISTER_OP("ResourceApplyAdagradDA")
     .Input("var: resource")
@@ -747,21 +462,7 @@ REGISTER_OP("ResourceApplyAdagradDA")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdagradDAShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the proximal adagrad scheme.
-
-var: Should be from a Variable().
-gradient_accumulator: Should be from a Variable().
-gradient_squared_accumulator: Should be from a Variable().
-grad: The gradient.
-lr: Scaling factor. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-global_step: Training step number. Must be a scalar.
-use_locking: If True, updating of the var and accum tensors will be protected by
-a lock; otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
+    });
 
 REGISTER_OP("ResourceSparseApplyAdagradDA")
     .Input("var: resource")
@@ -778,22 +479,7 @@ REGISTER_OP("ResourceSparseApplyAdagradDA")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdagradDAShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update entries in '*var' and '*accum' according to the proximal adagrad scheme.
-
-var: Should be from a Variable().
-gradient_accumulator: Should be from a Variable().
-gradient_squared_accumulator: Should be from a Variable().
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-lr: Learning rate. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-global_step: Training step number. Must be a scalar.
-use_locking: If True, updating of the var and accum tensors will be protected by
-a lock; otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
+    });
 
 REGISTER_OP("ResourceSparseApplyProximalAdagrad")
     .Input("var: resource")
@@ -808,26 +494,7 @@ REGISTER_OP("ResourceSparseApplyProximalAdagrad")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyProximalAdagradShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Sparse update entries in '*var' and '*accum' according to FOBOS algorithm.
-
-That is for rows we have grad for, we update var and accum as follows:
-accum += grad * grad
-prox_v = var
-prox_v -= lr * grad * (1 / sqrt(accum))
-var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-lr: Learning rate. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-use_locking: If True, updating of the var and accum tensors will be protected by
-a lock; otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
+    });
 
 static Status ApplyFtrlShapeFn(InferenceContext* c, bool sparse) {
   ShapeHandle unused;
@@ -861,29 +528,7 @@ REGISTER_OP("ApplyFtrl")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyFtrlShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the Ftrl-proximal scheme.
-
-accum_new = accum + grad * grad
-linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-accum = accum_new
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-linear: Should be from a Variable().
-grad: The gradient.
-lr: Scaling factor. Must be a scalar.
-l1: L1 regulariation. Must be a scalar.
-l2: L2 regulariation. Must be a scalar.
-lr_power: Scaling factor. Must be a scalar.
-out: Same as "var".
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
+    });
 
 REGISTER_OP("SparseApplyFtrl")
     .Input("var: Ref(T)")
@@ -901,31 +546,7 @@ REGISTER_OP("SparseApplyFtrl")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyFtrlShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update relevant entries in '*var' according to the Ftrl-proximal scheme.
-
-That is for rows we have grad for, we update var, accum and linear as follows:
-accum_new = accum + grad * grad
-linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-accum = accum_new
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-linear: Should be from a Variable().
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-lr: Scaling factor. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-lr_power: Scaling factor. Must be a scalar.
-out: Same as "var".
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
+    });
 
 REGISTER_OP("ResourceApplyFtrl")
     .Input("var: resource")
@@ -940,28 +561,7 @@ REGISTER_OP("ResourceApplyFtrl")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyFtrlShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the Ftrl-proximal scheme.
-
-accum_new = accum + grad * grad
-linear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-accum = accum_new
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-linear: Should be from a Variable().
-grad: The gradient.
-lr: Scaling factor. Must be a scalar.
-l1: L1 regulariation. Must be a scalar.
-l2: L2 regulariation. Must be a scalar.
-lr_power: Scaling factor. Must be a scalar.
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
+    });
 
 REGISTER_OP("ResourceSparseApplyFtrl")
     .Input("var: resource")
@@ -978,30 +578,7 @@ REGISTER_OP("ResourceSparseApplyFtrl")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyFtrlShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update relevant entries in '*var' according to the Ftrl-proximal scheme.
-
-That is for rows we have grad for, we update var, accum and linear as follows:
-accum_new = accum + grad * grad
-linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-accum = accum_new
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-linear: Should be from a Variable().
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-lr: Scaling factor. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-lr_power: Scaling factor. Must be a scalar.
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
+    });
 
 REGISTER_OP("ApplyFtrlV2")
     .Input("var: Ref(T)")
@@ -1018,32 +595,7 @@ REGISTER_OP("ApplyFtrlV2")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyFtrlShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the Ftrl-proximal scheme.
-
-grad_with_shrinkage = grad + 2 * l2_shrinkage * var
-accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
-linear += grad_with_shrinkage +
-    (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-accum = accum_new
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-linear: Should be from a Variable().
-grad: The gradient.
-lr: Scaling factor. Must be a scalar.
-l1: L1 regulariation. Must be a scalar.
-l2: online L2 regulariation. Must be a scalar.
-l2: L2 shrinkage regulariation. Must be a scalar.
-lr_power: Scaling factor. Must be a scalar.
-out: Same as "var".
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
+    });
 
 REGISTER_OP("SparseApplyFtrlV2")
     .Input("var: Ref(T)")
@@ -1062,34 +614,7 @@ REGISTER_OP("SparseApplyFtrlV2")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyFtrlShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update relevant entries in '*var' according to the Ftrl-proximal scheme.
-
-That is for rows we have grad for, we update var, accum and linear as follows:
-grad_with_shrinkage = grad + 2 * l2_shrinkage * var
-accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
-linear += grad_with_shrinkage +
-    (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-accum = accum_new
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-linear: Should be from a Variable().
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-lr: Scaling factor. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: onine L2 regularization. Must be a scalar.
-l2: L2 shrinkage regulariation. Must be a scalar.
-lr_power: Scaling factor. Must be a scalar.
-out: Same as "var".
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
+    });
 
 REGISTER_OP("ResourceApplyFtrlV2")
     .Input("var: resource")
@@ -1105,31 +630,7 @@ REGISTER_OP("ResourceApplyFtrlV2")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyFtrlShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the Ftrl-proximal scheme.
-
-grad_with_shrinkage = grad + 2 * l2_shrinkage * var
-accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
-linear += grad_with_shrinkage +
-    (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-accum = accum_new
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-linear: Should be from a Variable().
-grad: The gradient.
-lr: Scaling factor. Must be a scalar.
-l1: L1 regulariation. Must be a scalar.
-l2: onine L2 regularization. Must be a scalar.
-l2: L2 shrinkage regulariation. Must be a scalar.
-lr_power: Scaling factor. Must be a scalar.
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
+    });
 
 REGISTER_OP("ResourceSparseApplyFtrlV2")
     .Input("var: resource")
@@ -1147,33 +648,7 @@ REGISTER_OP("ResourceSparseApplyFtrlV2")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyFtrlShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update relevant entries in '*var' according to the Ftrl-proximal scheme.
-
-That is for rows we have grad for, we update var, accum and linear as follows:
-grad_with_shrinkage = grad + 2 * l2_shrinkage * var
-accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
-linear += grad_with_shrinkage +
-    (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-accum = accum_new
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-linear: Should be from a Variable().
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-lr: Scaling factor. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: onine L2 regularization. Must be a scalar.
-l2: L2 shrinkage regulariation. Must be a scalar.
-lr_power: Scaling factor. Must be a scalar.
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
+    });
 
 static Status ApplyMomentumShapeFn(InferenceContext* c, bool sparse) {
   ShapeHandle unused;
@@ -1202,27 +677,7 @@ REGISTER_OP("ApplyMomentum")
     .Attr("use_nesterov: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyMomentumShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the momentum scheme. Set use_nesterov = True if you
-want to use Nesterov momentum.
-
-accum = accum * momentum + grad
-var -= lr * accum
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-grad: The gradient.
-momentum: Momentum. Must be a scalar.
-out: Same as "var".
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-use_nesterov: If `True`, the tensor passed to compute grad will be
-var - lr * momentum * accum, so in the end, the var you get is actually
-var - lr * momentum * accum.
-)doc");
+    });
 
 REGISTER_OP("SparseApplyMomentum")
     .Input("var: Ref(T)")
@@ -1238,30 +693,7 @@ REGISTER_OP("SparseApplyMomentum")
     .Attr("use_nesterov: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyMomentumShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update relevant entries in '*var' and '*accum' according to the momentum scheme.
-Set use_nesterov = True if you want to use Nesterov momentum.
-
-That is for rows we have grad for, we update var and accum as follows:
-
-accum = accum * momentum + grad
-var -= lr * accum
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-lr: Learning rate. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-momentum: Momentum. Must be a scalar.
-out: Same as "var".
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-use_nesterov: If `True`, the tensor passed to compute grad will be
-var - lr * momentum * accum, so in the end, the var you get is actually
-var - lr * momentum * accum.
-)doc");
+    });
 
 REGISTER_OP("ResourceApplyMomentum")
     .Input("var: resource")
@@ -1274,26 +706,7 @@ REGISTER_OP("ResourceApplyMomentum")
     .Attr("use_nesterov: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyMomentumShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the momentum scheme. Set use_nesterov = True if you
-want to use Nesterov momentum.
-
-accum = accum * momentum + grad
-var -= lr * accum
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-grad: The gradient.
-momentum: Momentum. Must be a scalar.
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-use_nesterov: If `True`, the tensor passed to compute grad will be
-var - lr * momentum * accum, so in the end, the var you get is actually
-var - lr * momentum * accum.
-)doc");
+    });
 
 REGISTER_OP("ResourceSparseApplyMomentum")
     .Input("var: resource")
@@ -1308,29 +721,7 @@ REGISTER_OP("ResourceSparseApplyMomentum")
     .Attr("use_nesterov: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyMomentumShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update relevant entries in '*var' and '*accum' according to the momentum scheme.
-Set use_nesterov = True if you want to use Nesterov momentum.
-
-That is for rows we have grad for, we update var and accum as follows:
-
-accum = accum * momentum + grad
-var -= lr * accum
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-lr: Learning rate. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-momentum: Momentum. Must be a scalar.
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-use_nesterov: If `True`, the tensor passed to compute grad will be
-var - lr * momentum * accum, so in the end, the var you get is actually
-var - lr * momentum * accum.
-)doc");
+    });
 
 static Status ApplyAdamShapeFn(InferenceContext* c, bool sparse) {
   ShapeHandle unused;
@@ -1368,31 +759,7 @@ REGISTER_OP("ApplyAdam")
     .Attr("use_nesterov: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdamShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the Adam algorithm.
-
-lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)
-m_t <- beta1 * m_{t-1} + (1 - beta1) * g_t
-v_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t
-variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)
-
-var: Should be from a Variable().
-m: Should be from a Variable().
-v: Should be from a Variable().
-beta1_power: Must be a scalar.
-beta2_power: Must be a scalar.
-lr: Scaling factor. Must be a scalar.
-beta1: Momentum factor. Must be a scalar.
-beta2: Momentum factor. Must be a scalar.
-epsilon: Ridge term. Must be a scalar.
-grad: The gradient.
-out: Same as "var".
-use_locking: If `True`, updating of the var, m, and v tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-use_nesterov: If `True`, uses the nesterov update.
-)doc");
+    });
 
 REGISTER_OP("ResourceApplyAdam")
     .Input("var: resource")
@@ -1410,30 +777,7 @@ REGISTER_OP("ResourceApplyAdam")
     .Attr("use_nesterov: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAdamShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the Adam algorithm.
-
-lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)
-m_t <- beta1 * m_{t-1} + (1 - beta1) * g_t
-v_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t
-variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)
-
-var: Should be from a Variable().
-m: Should be from a Variable().
-v: Should be from a Variable().
-beta1_power: Must be a scalar.
-beta2_power: Must be a scalar.
-lr: Scaling factor. Must be a scalar.
-beta1: Momentum factor. Must be a scalar.
-beta2: Momentum factor. Must be a scalar.
-epsilon: Ridge term. Must be a scalar.
-grad: The gradient.
-use_locking: If `True`, updating of the var, m, and v tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-use_nesterov: If `True`, uses the nesterov update.
-)doc");
+    });
 
 static Status ApplyRMSPropShapeFn(InferenceContext* c, bool sparse) {
   ShapeHandle unused;
@@ -1484,32 +828,7 @@ REGISTER_OP("ApplyRMSProp")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyRMSPropShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the RMSProp algorithm.
-Note that in dense implementation of this algorithm, ms and mom will
-update even if the grad is zero, but in this sparse implementation, ms
-and mom will not update in iterations during which the grad is zero.
-
-mean_square = decay * mean_square + (1-decay) * gradient ** 2
-Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
-
-ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
-var <- var - mom
-
-var: Should be from a Variable().
-ms: Should be from a Variable().
-mom: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-epsilon: Ridge term. Must be a scalar.
-rho: Decay rate. Must be a scalar.
-grad: The gradient.
-out: Same as "var".
-use_locking: If `True`, updating of the var, ms, and mom tensors is protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
+    });
 
 REGISTER_OP("ApplyCenteredRMSProp")
     .Input("var: Ref(T)")
@@ -1526,41 +845,7 @@ REGISTER_OP("ApplyCenteredRMSProp")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyCenteredRMSPropShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the centered RMSProp algorithm.
-The centered RMSProp algorithm uses an estimate of the centered second moment
-(i.e., the variance) for normalization, as opposed to regular RMSProp, which
-uses the (uncentered) second moment. This often helps with training, but is
-slightly more expensive in terms of computation and memory.
-
-Note that in dense implementation of this algorithm, mg, ms, and mom will
-update even if the grad is zero, but in this sparse implementation, mg, ms,
-and mom will not update in iterations during which the grad is zero.
-
-mean_square = decay * mean_square + (1-decay) * gradient ** 2
-mean_grad = decay * mean_grad + (1-decay) * gradient
-
-Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
-
-mg <- rho * mg_{t-1} + (1-rho) * grad
-ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)
-var <- var - mom
-
-var: Should be from a Variable().
-mg: Should be from a Variable().
-ms: Should be from a Variable().
-mom: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-epsilon: Ridge term. Must be a scalar.
-rho: Decay rate. Must be a scalar.
-grad: The gradient.
-out: Same as "var".
-use_locking: If `True`, updating of the var, mg, ms, and mom tensors is
-  protected by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
+    });
 
 REGISTER_OP("SparseApplyRMSProp")
     .Input("var: Ref(T)")
@@ -1578,33 +863,7 @@ REGISTER_OP("SparseApplyRMSProp")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyRMSPropShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the RMSProp algorithm.
-Note that in dense implementation of this algorithm, ms and mom will
-update even if the grad is zero, but in this sparse implementation, ms
-and mom will not update in iterations during which the grad is zero.
-
-mean_square = decay * mean_square + (1-decay) * gradient ** 2
-Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
-
-ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
-var <- var - mom
-
-var: Should be from a Variable().
-ms: Should be from a Variable().
-mom: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-epsilon: Ridge term. Must be a scalar.
-rho: Decay rate. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var, ms and mom.
-out: Same as "var".
-use_locking: If `True`, updating of the var, ms, and mom tensors is protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
+    });
 
 REGISTER_OP("SparseApplyCenteredRMSProp")
     .Input("var: Ref(T)")
@@ -1623,40 +882,7 @@ REGISTER_OP("SparseApplyCenteredRMSProp")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyCenteredRMSPropShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the centered RMSProp algorithm.
-The centered RMSProp algorithm uses an estimate of the centered second moment
-(i.e., the variance) for normalization, as opposed to regular RMSProp, which
-uses the (uncentered) second moment. This often helps with training, but is
-slightly more expensive in terms of computation and memory.
-
-Note that in dense implementation of this algorithm, mg, ms, and mom will
-update even if the grad is zero, but in this sparse implementation, mg, ms,
-and mom will not update in iterations during which the grad is zero.
-
-mean_square = decay * mean_square + (1-decay) * gradient ** 2
-mean_grad = decay * mean_grad + (1-decay) * gradient
-Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
-
-ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
-var <- var - mom
-
-var: Should be from a Variable().
-mg: Should be from a Variable().
-ms: Should be from a Variable().
-mom: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-epsilon: Ridge term. Must be a scalar.
-rho: Decay rate. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var, ms and mom.
-out: Same as "var".
-use_locking: If `True`, updating of the var, mg, ms, and mom tensors is
-  protected by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
+    });
 
 REGISTER_OP("ResourceApplyRMSProp")
     .Input("var: resource")
@@ -1671,31 +897,7 @@ REGISTER_OP("ResourceApplyRMSProp")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyRMSPropShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the RMSProp algorithm.
-Note that in dense implementation of this algorithm, ms and mom will
-update even if the grad is zero, but in this sparse implementation, ms
-and mom will not update in iterations during which the grad is zero.
-
-mean_square = decay * mean_square + (1-decay) * gradient ** 2
-Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
-
-ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
-var <- var - mom
-
-var: Should be from a Variable().
-ms: Should be from a Variable().
-mom: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-epsilon: Ridge term. Must be a scalar.
-rho: Decay rate. Must be a scalar.
-grad: The gradient.
-use_locking: If `True`, updating of the var, ms, and mom tensors is protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
+    });
 
 REGISTER_OP("ResourceApplyCenteredRMSProp")
     .Input("var: resource")
@@ -1711,40 +913,7 @@ REGISTER_OP("ResourceApplyCenteredRMSProp")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyCenteredRMSPropShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the centered RMSProp algorithm.
-The centered RMSProp algorithm uses an estimate of the centered second moment
-(i.e., the variance) for normalization, as opposed to regular RMSProp, which
-uses the (uncentered) second moment. This often helps with training, but is
-slightly more expensive in terms of computation and memory.
-
-Note that in dense implementation of this algorithm, mg, ms, and mom will
-update even if the grad is zero, but in this sparse implementation, mg, ms,
-and mom will not update in iterations during which the grad is zero.
-
-mean_square = decay * mean_square + (1-decay) * gradient ** 2
-mean_grad = decay * mean_grad + (1-decay) * gradient
-
-Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
-
-mg <- rho * mg_{t-1} + (1-rho) * grad
-ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)
-var <- var - mom
-
-var: Should be from a Variable().
-mg: Should be from a Variable().
-ms: Should be from a Variable().
-mom: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-epsilon: Ridge term. Must be a scalar.
-rho: Decay rate. Must be a scalar.
-grad: The gradient.
-use_locking: If `True`, updating of the var, mg, ms, and mom tensors is
-  protected by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
+    });
 
 REGISTER_OP("ResourceSparseApplyRMSProp")
     .Input("var: resource")
@@ -1761,32 +930,7 @@ REGISTER_OP("ResourceSparseApplyRMSProp")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyRMSPropShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the RMSProp algorithm.
-Note that in dense implementation of this algorithm, ms and mom will
-update even if the grad is zero, but in this sparse implementation, ms
-and mom will not update in iterations during which the grad is zero.
-
-mean_square = decay * mean_square + (1-decay) * gradient ** 2
-Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
-
-ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
-var <- var - mom
-
-var: Should be from a Variable().
-ms: Should be from a Variable().
-mom: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-epsilon: Ridge term. Must be a scalar.
-rho: Decay rate. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var, ms and mom.
-use_locking: If `True`, updating of the var, ms, and mom tensors is protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
+    });
 
 REGISTER_OP("ResourceSparseApplyCenteredRMSProp")
     .Input("var: resource")
@@ -1804,39 +948,7 @@ REGISTER_OP("ResourceSparseApplyCenteredRMSProp")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyCenteredRMSPropShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the centered RMSProp algorithm.
-The centered RMSProp algorithm uses an estimate of the centered second moment
-(i.e., the variance) for normalization, as opposed to regular RMSProp, which
-uses the (uncentered) second moment. This often helps with training, but is
-slightly more expensive in terms of computation and memory.
-
-Note that in dense implementation of this algorithm, mg, ms, and mom will
-update even if the grad is zero, but in this sparse implementation, mg, ms,
-and mom will not update in iterations during which the grad is zero.
-
-mean_square = decay * mean_square + (1-decay) * gradient ** 2
-mean_grad = decay * mean_grad + (1-decay) * gradient
-Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
-
-ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
-var <- var - mom
-
-var: Should be from a Variable().
-mg: Should be from a Variable().
-ms: Should be from a Variable().
-mom: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-epsilon: Ridge term. Must be a scalar.
-rho: Decay rate. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var, ms and mom.
-use_locking: If `True`, updating of the var, mg, ms, and mom tensors is
-  protected by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
+    });
 
 static Status ApplyAddSignShapeFn(InferenceContext* c, bool sparse) {
   ShapeHandle unused;
@@ -1867,8 +979,7 @@ REGISTER_OP("ApplyAddSign")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAddSignShapeFn(c, /*sparse=*/false);
-    })
-    .Doc(strings::StrCat(kAddSignCommonDocStr, kOutDocStr, kLockDocStr));
+    });
 
 REGISTER_OP("ResourceApplyAddSign")
     .Input("var: resource")
@@ -1882,8 +993,7 @@ REGISTER_OP("ResourceApplyAddSign")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyAddSignShapeFn(c, /*sparse=*/false);
-    })
-    .Doc(strings::StrCat(kAddSignCommonDocStr, kLockDocStr));
+    });
 
 static Status ApplyPowerSignShapeFn(InferenceContext* c, bool sparse) {
   ShapeHandle unused;
@@ -1914,8 +1024,7 @@ REGISTER_OP("ApplyPowerSign")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyPowerSignShapeFn(c, /*sparse=*/false);
-    })
-    .Doc(strings::StrCat(kPowerSignCommonDocStr, kOutDocStr, kLockDocStr));
+    });
 
 REGISTER_OP("ResourceApplyPowerSign")
     .Input("var: resource")
@@ -1929,8 +1038,6 @@ REGISTER_OP("ResourceApplyPowerSign")
     .Attr("use_locking: bool = false")
     .SetShapeFn([](InferenceContext* c) {
       return ApplyPowerSignShapeFn(c, /*sparse=*/false);
-    })
-    .Doc(strings::StrCat(kPowerSignCommonDocStr, kLockDocStr));
-
+    });
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/word2vec_ops.cc b/tensorflow/core/ops/word2vec_ops.cc
index b6acc2213c3a2ca1669d6f055c6acefc78de79c3..ed685dcf0ae9a3c61a1db491751f7de4e981300d 100644
--- a/tensorflow/core/ops/word2vec_ops.cc
+++ b/tensorflow/core/ops/word2vec_ops.cc
@@ -33,25 +33,7 @@ REGISTER_OP("Skipgram")
     .Attr("batch_size: int")
     .Attr("window_size: int = 5")
     .Attr("min_count: int = 5")
-    .Attr("subsample: float = 1e-3")
-    .Doc(R"doc(
-Parses a text file and creates a batch of examples.
-
-vocab_word: A vector of words in the corpus.
-vocab_freq: Frequencies of words. Sorted in the non-ascending order.
-words_per_epoch: Number of words per epoch in the data file.
-current_epoch: The current epoch number.
-total_words_processed: The total number of words processed so far.
-examples: A vector of word ids.
-labels: A vector of word ids.
-filename: The corpus's text file name.
-batch_size: The size of produced batch.
-window_size: The number of words to predict to the left and right of the target.
-min_count: The minimum number of word occurrences for it to be included in the
-    vocabulary.
-subsample: Threshold for word occurrence. Words that appear with higher
-    frequency will be randomly down-sampled. Set to 0 to disable.
-)doc");
+    .Attr("subsample: float = 1e-3");
 
 REGISTER_OP("NegTrain")
     .Deprecated(19,
@@ -64,16 +46,6 @@ REGISTER_OP("NegTrain")
     .Input("lr: float")
     .SetIsStateful()
     .Attr("vocab_count: list(int)")
-    .Attr("num_negative_samples: int")
-    .Doc(R"doc(
-Training via negative sampling.
-
-w_in: input word embedding.
-w_out: output word embedding.
-examples: A vector of word ids.
-labels: A vector of word ids.
-vocab_count: Count of words in the vocabulary.
-num_negative_samples: Number of negative samples per example.
-)doc");
+    .Attr("num_negative_samples: int");
 
 }  // end namespace tensorflow
diff --git a/tensorflow/core/platform/cloud/BUILD b/tensorflow/core/platform/cloud/BUILD
index 624145da75194fac7f859d4df0f6f51fe7ac5eff..6b6be757f6e825ef15e918f0dac9f7bcb0ed22fa 100644
--- a/tensorflow/core/platform/cloud/BUILD
+++ b/tensorflow/core/platform/cloud/BUILD
@@ -10,6 +10,8 @@ licenses(["notice"])  # Apache 2.0
 load(
     "//tensorflow:tensorflow.bzl",
     "tf_cc_test",
+    "tf_copts",
+    "if_windows",
 )
 
 filegroup(
@@ -29,6 +31,7 @@ filegroup(
 cc_library(
     name = "expiring_lru_cache",
     hdrs = ["expiring_lru_cache.h"],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = ["//tensorflow/core:lib"],
 )
@@ -37,6 +40,7 @@ cc_library(
     name = "file_block_cache",
     srcs = ["file_block_cache.cc"],
     hdrs = ["file_block_cache.h"],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = ["//tensorflow/core:lib"],
 )
@@ -45,6 +49,7 @@ cc_library(
     name = "gcs_dns_cache",
     srcs = ["gcs_dns_cache.cc"],
     hdrs = ["gcs_dns_cache.h"],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         ":http_request",
@@ -56,6 +61,7 @@ cc_library(
     name = "gcs_file_system",
     srcs = ["gcs_file_system.cc"],
     hdrs = ["gcs_file_system.h"],
+    copts = tf_copts(),
     linkstatic = 1,  # Needed since alwayslink is broken in bazel b/27630669
     visibility = ["//visibility:public"],
     deps = [
@@ -78,6 +84,7 @@ cc_library(
 cc_library(
     name = "http_request",
     hdrs = ["http_request.h"],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         "//tensorflow/core:framework_headers_lib",
@@ -89,6 +96,7 @@ cc_library(
     name = "curl_http_request",
     srcs = ["curl_http_request.cc"],
     hdrs = ["curl_http_request.h"],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         ":http_request",
@@ -104,6 +112,7 @@ cc_library(
     hdrs = [
         "http_request_fake.h",
     ],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         ":curl_http_request",
@@ -121,6 +130,7 @@ cc_library(
         "auth_provider.h",
         "google_auth_provider.h",
     ],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         ":curl_http_request",
@@ -136,6 +146,7 @@ cc_library(
     name = "now_seconds_env",
     testonly = 1,
     hdrs = ["now_seconds_env.h"],
+    copts = tf_copts(),
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         "//tensorflow/core:lib",
@@ -151,6 +162,7 @@ cc_library(
     hdrs = [
         "oauth_client.h",
     ],
+    copts = tf_copts(),
     deps = [
         ":curl_http_request",
         ":http_request",
@@ -169,6 +181,7 @@ cc_library(
     hdrs = [
         "retrying_utils.h",
     ],
+    copts = tf_copts(),
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:lib_internal",
@@ -183,6 +196,7 @@ cc_library(
     hdrs = [
         "retrying_file_system.h",
     ],
+    copts = tf_copts(),
     deps = [
         ":retrying_utils",
         "//tensorflow/core:framework_headers_lib",
@@ -198,6 +212,7 @@ cc_library(
     hdrs = [
         "time_util.h",
     ],
+    copts = tf_copts(),
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:lib_internal",
@@ -247,6 +262,7 @@ tf_cc_test(
     name = "gcs_dns_cache_test",
     size = "small",
     srcs = ["gcs_dns_cache_test.cc"],
+    linkopts = if_windows(["-DEFAULTLIB:ws2_32.lib"]),
     deps = [
         ":gcs_dns_cache",
         "//tensorflow/core:lib",
diff --git a/tensorflow/core/platform/cloud/curl_http_request.cc b/tensorflow/core/platform/cloud/curl_http_request.cc
index d01734ba3a649afa73a5fc8ad59a01a7cc6c3088..88a5d1e96dc2fcb7d12e2c0891d2f04d64bac594 100644
--- a/tensorflow/core/platform/cloud/curl_http_request.cc
+++ b/tensorflow/core/platform/cloud/curl_http_request.cc
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <algorithm>
+
 #include "tensorflow/core/platform/cloud/curl_http_request.h"
 
 #include "tensorflow/core/lib/core/errors.h"
@@ -29,16 +31,6 @@ namespace {
 // Set to 1 to enable verbose debug output from curl.
 constexpr uint64 kVerboseOutput = 0;
 
-// Timeout for the whole request. Set only to prevent hanging indefinitely.
-constexpr uint32 kRequestTimeoutSeconds = 3600;  // 1 hour
-
-// Timeout for the connection phase.
-constexpr uint32 kConnectTimeoutSeconds = 120;  // 2 minutes
-
-// The maximum period of request inactivity, after which the request
-// is terminated.
-constexpr uint64 kInactivityTimeoutSeconds = 60;  // 1 minute
-
 // Proxy to the real libcurl implementation.
 class LibCurlProxy : public LibCurl {
  public:
@@ -117,6 +109,10 @@ class LibCurlProxy : public LibCurl {
   }
 
   void curl_free(void* p) override { ::curl_free(p); }
+
+  const char* curl_easy_strerror(CURLcode errornum) override {
+    return ::curl_easy_strerror(errornum);
+  }
 };
 }  // namespace
 
@@ -125,31 +121,9 @@ CurlHttpRequest::CurlHttpRequest() : CurlHttpRequest(LibCurlProxy::Load()) {}
 CurlHttpRequest::CurlHttpRequest(LibCurl* libcurl, Env* env)
     : libcurl_(libcurl), env_(env) {
   default_response_buffer_.reserve(CURL_MAX_WRITE_SIZE);
-}
-
-CurlHttpRequest::~CurlHttpRequest() {
-  if (curl_headers_) {
-    libcurl_->curl_slist_free_all(curl_headers_);
-  }
-  if (resolve_list_) {
-    libcurl_->curl_slist_free_all(resolve_list_);
-  }
-  if (put_body_) {
-    fclose(put_body_);
-  }
-  if (curl_) {
-    libcurl_->curl_easy_cleanup(curl_);
-  }
-}
 
-Status CurlHttpRequest::Init() {
-  if (is_initialized_) {
-    return errors::FailedPrecondition("Already initialized.");
-  }
   curl_ = libcurl_->curl_easy_init();
-  if (!curl_) {
-    return errors::Internal("Couldn't initialize a curl session.");
-  }
+  CHECK(curl_ != nullptr) << "Couldn't initialize a curl session.";
 
   // NOTE: CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt is configured by
   //       default in //third_party:curl.BUILD and can be customized via an
@@ -161,9 +135,6 @@ Status CurlHttpRequest::Init() {
       strings::StrCat("TensorFlow/", TF_VERSION_STRING).c_str());
   // Do not use signals for timeouts - does not work in multi-threaded programs.
   libcurl_->curl_easy_setopt(curl_, CURLOPT_NOSIGNAL, 1L);
-  libcurl_->curl_easy_setopt(curl_, CURLOPT_TIMEOUT, kRequestTimeoutSeconds);
-  libcurl_->curl_easy_setopt(curl_, CURLOPT_CONNECTTIMEOUT,
-                             kConnectTimeoutSeconds);
   libcurl_->curl_easy_setopt(curl_, CURLOPT_HTTP_VERSION,
                              CURL_HTTP_VERSION_2_0);
 
@@ -175,13 +146,22 @@ Status CurlHttpRequest::Init() {
 
   // If response buffer is not set, libcurl will print results to stdout,
   // so we always set it.
-  is_initialized_ = true;
-  auto s = SetResultBuffer(&default_response_buffer_);
-  if (!s.ok()) {
-    is_initialized_ = false;
-    return s;
+  SetResultBuffer(&default_response_buffer_);
+}
+
+CurlHttpRequest::~CurlHttpRequest() {
+  if (curl_headers_) {
+    libcurl_->curl_slist_free_all(curl_headers_);
+  }
+  if (resolve_list_) {
+    libcurl_->curl_slist_free_all(resolve_list_);
+  }
+  if (put_body_) {
+    fclose(put_body_);
+  }
+  if (curl_) {
+    libcurl_->curl_easy_cleanup(curl_);
   }
-  return Status::OK();
 }
 
 string CurlHttpRequest::EscapeString(const string& str) {
@@ -191,64 +171,52 @@ string CurlHttpRequest::EscapeString(const string& str) {
   return out_str;
 }
 
-Status CurlHttpRequest::SetUri(const string& uri) {
-  TF_RETURN_IF_ERROR(CheckInitialized());
-  TF_RETURN_IF_ERROR(CheckNotSent());
+void CurlHttpRequest::SetUri(const string& uri) {
+  CheckNotSent();
   is_uri_set_ = true;
+  uri_ = uri;
   libcurl_->curl_easy_setopt(curl_, CURLOPT_URL, uri.c_str());
-  return Status::OK();
 }
 
-Status CurlHttpRequest::SetRange(uint64 start, uint64 end) {
-  TF_RETURN_IF_ERROR(CheckInitialized());
-  TF_RETURN_IF_ERROR(CheckNotSent());
+void CurlHttpRequest::SetRange(uint64 start, uint64 end) {
+  CheckNotSent();
   libcurl_->curl_easy_setopt(curl_, CURLOPT_RANGE,
                              strings::StrCat(start, "-", end).c_str());
-  return Status::OK();
 }
 
-Status CurlHttpRequest::AddHeader(const string& name, const string& value) {
-  TF_RETURN_IF_ERROR(CheckInitialized());
-  TF_RETURN_IF_ERROR(CheckNotSent());
+void CurlHttpRequest::AddHeader(const string& name, const string& value) {
+  CheckNotSent();
   curl_headers_ = libcurl_->curl_slist_append(
       curl_headers_, strings::StrCat(name, ": ", value).c_str());
-  return Status::OK();
 }
 
-Status CurlHttpRequest::AddResolveOverride(const string& hostname, int64 port,
-                                           const string& ip_addr) {
-  TF_RETURN_IF_ERROR(CheckInitialized());
-  TF_RETURN_IF_ERROR(CheckNotSent());
+void CurlHttpRequest::AddResolveOverride(const string& hostname, int64 port,
+                                         const string& ip_addr) {
+  CheckNotSent();
   // Resolve values are hostname:port:IP.add.ress
   resolve_list_ = libcurl_->curl_slist_append(
       resolve_list_,
       strings::StrCat(hostname, ":", port, ":", ip_addr).c_str());
-  return Status::OK();
 }
 
-Status CurlHttpRequest::AddAuthBearerHeader(const string& auth_token) {
-  TF_RETURN_IF_ERROR(CheckInitialized());
-  TF_RETURN_IF_ERROR(CheckNotSent());
+void CurlHttpRequest::AddAuthBearerHeader(const string& auth_token) {
+  CheckNotSent();
   if (!auth_token.empty()) {
-    return AddHeader("Authorization", strings::StrCat("Bearer ", auth_token));
+    AddHeader("Authorization", strings::StrCat("Bearer ", auth_token));
   }
-  return Status::OK();
 }
 
-Status CurlHttpRequest::SetDeleteRequest() {
-  TF_RETURN_IF_ERROR(CheckInitialized());
-  TF_RETURN_IF_ERROR(CheckNotSent());
-  TF_RETURN_IF_ERROR(CheckMethodNotSet());
+void CurlHttpRequest::SetDeleteRequest() {
+  CheckNotSent();
+  CheckMethodNotSet();
   is_method_set_ = true;
   libcurl_->curl_easy_setopt(curl_, CURLOPT_CUSTOMREQUEST, "DELETE");
-  return Status::OK();
 }
 
 Status CurlHttpRequest::SetPutFromFile(const string& body_filepath,
                                        size_t offset) {
-  TF_RETURN_IF_ERROR(CheckInitialized());
-  TF_RETURN_IF_ERROR(CheckNotSent());
-  TF_RETURN_IF_ERROR(CheckMethodNotSet());
+  CheckNotSent();
+  CheckMethodNotSet();
   is_method_set_ = true;
   if (put_body_) {
     fclose(put_body_);
@@ -272,10 +240,9 @@ Status CurlHttpRequest::SetPutFromFile(const string& body_filepath,
   return Status::OK();
 }
 
-Status CurlHttpRequest::SetPutEmptyBody() {
-  TF_RETURN_IF_ERROR(CheckInitialized());
-  TF_RETURN_IF_ERROR(CheckNotSent());
-  TF_RETURN_IF_ERROR(CheckMethodNotSet());
+void CurlHttpRequest::SetPutEmptyBody() {
+  CheckNotSent();
+  CheckMethodNotSet();
   is_method_set_ = true;
   libcurl_->curl_easy_setopt(curl_, CURLOPT_PUT, 1);
   curl_headers_ =
@@ -284,13 +251,11 @@ Status CurlHttpRequest::SetPutEmptyBody() {
                              reinterpret_cast<void*>(this));
   libcurl_->curl_easy_setopt(curl_, CURLOPT_READFUNCTION,
                              &CurlHttpRequest::ReadCallback);
-  return Status::OK();
 }
 
-Status CurlHttpRequest::SetPostFromBuffer(const char* buffer, size_t size) {
-  TF_RETURN_IF_ERROR(CheckInitialized());
-  TF_RETURN_IF_ERROR(CheckNotSent());
-  TF_RETURN_IF_ERROR(CheckMethodNotSet());
+void CurlHttpRequest::SetPostFromBuffer(const char* buffer, size_t size) {
+  CheckNotSent();
+  CheckMethodNotSet();
   is_method_set_ = true;
   curl_headers_ = libcurl_->curl_slist_append(
       curl_headers_, strings::StrCat("Content-Length: ", size).c_str());
@@ -300,13 +265,11 @@ Status CurlHttpRequest::SetPostFromBuffer(const char* buffer, size_t size) {
   libcurl_->curl_easy_setopt(curl_, CURLOPT_READFUNCTION,
                              &CurlHttpRequest::ReadCallback);
   post_body_buffer_ = StringPiece(buffer, size);
-  return Status::OK();
 }
 
-Status CurlHttpRequest::SetPostEmptyBody() {
-  TF_RETURN_IF_ERROR(CheckInitialized());
-  TF_RETURN_IF_ERROR(CheckNotSent());
-  TF_RETURN_IF_ERROR(CheckMethodNotSet());
+void CurlHttpRequest::SetPostEmptyBody() {
+  CheckNotSent();
+  CheckMethodNotSet();
   is_method_set_ = true;
   libcurl_->curl_easy_setopt(curl_, CURLOPT_POST, 1);
   curl_headers_ =
@@ -315,15 +278,11 @@ Status CurlHttpRequest::SetPostEmptyBody() {
                              reinterpret_cast<void*>(this));
   libcurl_->curl_easy_setopt(curl_, CURLOPT_READFUNCTION,
                              &CurlHttpRequest::ReadCallback);
-  return Status::OK();
 }
 
-Status CurlHttpRequest::SetResultBuffer(std::vector<char>* out_buffer) {
-  TF_RETURN_IF_ERROR(CheckInitialized());
-  TF_RETURN_IF_ERROR(CheckNotSent());
-  if (!out_buffer) {
-    return errors::InvalidArgument("out_buffer cannot be null");
-  }
+void CurlHttpRequest::SetResultBuffer(std::vector<char>* out_buffer) {
+  CheckNotSent();
+  CHECK(out_buffer != nullptr);
 
   out_buffer->clear();
   response_buffer_ = out_buffer;
@@ -332,7 +291,67 @@ Status CurlHttpRequest::SetResultBuffer(std::vector<char>* out_buffer) {
                              reinterpret_cast<void*>(this));
   libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEFUNCTION,
                              &CurlHttpRequest::WriteCallback);
-  return Status::OK();
+}
+
+void CurlHttpRequest::SetResultBufferDirect(char* buffer, size_t size) {
+  CHECK(buffer != nullptr);
+  CheckNotSent();
+
+  direct_response_ = DirectResponseState{buffer, size, 0};
+
+  libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEDATA,
+                             reinterpret_cast<void*>(this));
+  libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEFUNCTION,
+                             &CurlHttpRequest::WriteCallbackDirect);
+}
+
+bool CurlHttpRequest::IsDirectResponse() const {
+  return direct_response_.buffer_ != nullptr;
+}
+
+size_t CurlHttpRequest::WriteCallbackDirect(const void* ptr, size_t size,
+                                            size_t nmemb, void* userdata) {
+  CHECK(ptr != nullptr);
+  auto that = reinterpret_cast<CurlHttpRequest*>(userdata);
+  DirectResponseState* state = &that->direct_response_;
+  CHECK(state->buffer_ != nullptr);
+  CHECK(state->bytes_transferred_ <= state->buffer_size_);
+
+  size_t curl_bytes_received = size * nmemb;
+  size_t user_buffer_bytes_available =
+      state->buffer_size_ - state->bytes_transferred_;
+
+  // The HTTP server may send a response body that is longer than what we
+  // expected. We must not use CHECK() for this situation, because that would
+  // imply a code bug (in this client code) where none exists; the violation of
+  // expectations would have been caused by the server, not the client. So we
+  // report a log warning, if an HTTP server is misbehaving.
+  if (curl_bytes_received > user_buffer_bytes_available) {
+    LOG(WARNING) << "The HTTP response body that we received is longer than we "
+                    "requested or expected. "
+                 << "Total bytes requested: " << state->buffer_size_
+                 << " Bytes received (so far) in HTTP response body: "
+                 << (state->bytes_transferred_ + curl_bytes_received);
+  }
+
+  size_t bytes_to_copy =
+      std::min<size_t>(curl_bytes_received, user_buffer_bytes_available);
+  memcpy(&state->buffer_[state->bytes_transferred_], ptr, bytes_to_copy);
+  state->bytes_transferred_ += bytes_to_copy;
+  return bytes_to_copy;
+}
+
+size_t CurlHttpRequest::GetResultBufferDirectBytesTransferred() {
+  CHECK(direct_response_.buffer_ != nullptr);
+  return direct_response_.bytes_transferred_;
+}
+
+void CurlHttpRequest::SetTimeouts(uint32 connection, uint32 inactivity,
+                                  uint32 total) {
+  CheckNotSent();
+  connect_timeout_secs_ = connection;
+  inactivity_timeout_secs_ = inactivity;
+  request_timeout_secs_ = total;
 }
 
 size_t CurlHttpRequest::WriteCallback(const void* ptr, size_t size,
@@ -381,12 +400,11 @@ size_t CurlHttpRequest::HeaderCallback(const void* ptr, size_t size,
 }
 
 Status CurlHttpRequest::Send() {
-  TF_RETURN_IF_ERROR(CheckInitialized());
-  TF_RETURN_IF_ERROR(CheckNotSent());
+  CheckNotSent();
+  CHECK(is_uri_set_) << "URI has not been set.";
+
   is_sent_ = true;
-  if (!is_uri_set_) {
-    return errors::FailedPrecondition("URI has not been set.");
-  }
+
   if (curl_headers_) {
     libcurl_->curl_easy_setopt(curl_, CURLOPT_HTTPHEADER, curl_headers_);
   }
@@ -398,6 +416,10 @@ Status CurlHttpRequest::Send() {
   libcurl_->curl_easy_setopt(curl_, CURLOPT_HEADERFUNCTION,
                              &CurlHttpRequest::HeaderCallback);
 
+  libcurl_->curl_easy_setopt(curl_, CURLOPT_TIMEOUT, request_timeout_secs_);
+  libcurl_->curl_easy_setopt(curl_, CURLOPT_CONNECTTIMEOUT,
+                             connect_timeout_secs_);
+
   char error_buffer[CURL_ERROR_SIZE] = {0};
   libcurl_->curl_easy_setopt(curl_, CURLOPT_ERRORBUFFER, error_buffer);
 
@@ -413,6 +435,8 @@ Status CurlHttpRequest::Send() {
       ", error code ", curl_result, ", error message '", error_buffer, "')");
 
   Status result;
+  StringPiece response = GetResponse();
+  string extended_error_message;
   switch (response_code_) {
     // The group of response codes indicating that the request achieved
     // the expected goal.
@@ -445,7 +469,15 @@ Status CurlHttpRequest::Send() {
     // PERMISSION_DENIED indicates an authentication or an authorization issue.
     case 401:  // Unauthorized
     case 403:  // Forbidden
-      result = errors::PermissionDenied(error_message);
+      if (!response.empty()) {
+        extended_error_message = strings::StrCat(
+            error_message, ", response ",
+            response.substr(
+                0, std::min(response.size(), response_to_error_limit_)));
+        result = errors::PermissionDenied(extended_error_message);
+      } else {
+        result = errors::PermissionDenied(error_message);
+      }
       break;
 
     // NOT_FOUND indicates that the requested resource does not exist.
@@ -484,25 +516,23 @@ Status CurlHttpRequest::Send() {
   return result;
 }
 
-Status CurlHttpRequest::CheckInitialized() const {
-  if (!is_initialized_) {
-    return errors::FailedPrecondition("The object has not been initialized.");
-  }
-  return Status::OK();
+void CurlHttpRequest::CheckMethodNotSet() const {
+  CHECK(!is_method_set_) << "HTTP method has been already set.";
 }
 
-Status CurlHttpRequest::CheckMethodNotSet() const {
-  if (is_method_set_) {
-    return errors::FailedPrecondition("HTTP method has been already set.");
-  }
-  return Status::OK();
+void CurlHttpRequest::CheckNotSent() const {
+  CHECK(!is_sent_) << "The request has already been sent.";
 }
 
-Status CurlHttpRequest::CheckNotSent() const {
-  if (is_sent_) {
-    return errors::FailedPrecondition("The request has already been sent.");
+StringPiece CurlHttpRequest::GetResponse() const {
+  StringPiece response;
+  if (IsDirectResponse()) {
+    response = StringPiece(direct_response_.buffer_,
+                           direct_response_.bytes_transferred_);
+  } else {
+    response = StringPiece(response_buffer_->data(), response_buffer_->size());
   }
-  return Status::OK();
+  return response;
 }
 
 string CurlHttpRequest::GetResponseHeader(const string& name) const {
@@ -528,12 +558,37 @@ int CurlHttpRequest::ProgressCallback(void* this_object, curl_off_t dltotal,
     return 0;
   }
 
-  if (now - that->last_progress_timestamp_ > kInactivityTimeoutSeconds) {
+  if (now - that->last_progress_timestamp_ > that->inactivity_timeout_secs_) {
+    double lookup_time = -1;
+    const auto lookup_time_status = that->libcurl_->curl_easy_getinfo(
+        that->curl_, CURLINFO_NAMELOOKUP_TIME, &lookup_time);
+
+    double connect_time = -1;
+    const auto connect_time_status = that->libcurl_->curl_easy_getinfo(
+        that->curl_, CURLINFO_CONNECT_TIME, &connect_time);
+
+    double pretransfer_time = -1;
+    const auto pretransfer_time_status = that->libcurl_->curl_easy_getinfo(
+        that->curl_, CURLINFO_PRETRANSFER_TIME, &pretransfer_time);
+
+    double starttransfer_time = -1;
+    const auto starttransfer_time_status = that->libcurl_->curl_easy_getinfo(
+        that->curl_, CURLINFO_PRETRANSFER_TIME, &starttransfer_time);
+
     LOG(ERROR) << "The transmission  of request " << this_object
-               << " has been stuck at " << current_progress << " of "
-               << dltotal + ultotal << " bytes for "
-               << now - that->last_progress_timestamp_
-               << " seconds and will be aborted.";
+               << " (URI: " << that->uri_ << ") has been stuck at "
+               << current_progress << " of " << dltotal + ultotal
+               << " bytes for " << now - that->last_progress_timestamp_
+               << " seconds and will be aborted. CURL timing information: "
+               << "lookup time: " << lookup_time << " ("
+               << that->libcurl_->curl_easy_strerror(lookup_time_status)
+               << "), connect time: " << connect_time << " ("
+               << that->libcurl_->curl_easy_strerror(connect_time_status)
+               << "), pre-transfer time: " << pretransfer_time << " ("
+               << that->libcurl_->curl_easy_strerror(pretransfer_time_status)
+               << "), start-transfer time: " << starttransfer_time << " ("
+               << that->libcurl_->curl_easy_strerror(starttransfer_time_status)
+               << ")";
     return 1;  // Will abort the request.
   }
 
diff --git a/tensorflow/core/platform/cloud/curl_http_request.h b/tensorflow/core/platform/cloud/curl_http_request.h
index 2396593d6de015d7e002cc59a5ca12a092ab6e86..cfa26f2b795a6cc33aba308597c77088362f1e1b 100644
--- a/tensorflow/core/platform/cloud/curl_http_request.h
+++ b/tensorflow/core/platform/cloud/curl_http_request.h
@@ -57,28 +57,26 @@ class CurlHttpRequest : public HttpRequest {
   CurlHttpRequest(LibCurl* libcurl, Env* env);
   ~CurlHttpRequest() override;
 
-  Status Init() override;
-
   /// Sets the request URI.
-  Status SetUri(const string& uri) override;
+  void SetUri(const string& uri) override;
 
   /// \brief Sets the Range header.
   ///
   /// Used for random seeks, for example "0-999" returns the first 1000 bytes
   /// (note that the right border is included).
-  Status SetRange(uint64 start, uint64 end) override;
+  void SetRange(uint64 start, uint64 end) override;
 
   /// Sets a request header.
-  Status AddHeader(const string& name, const string& value) override;
+  void AddHeader(const string& name, const string& value) override;
 
-  Status AddResolveOverride(const string& hostname, int64 port,
-                            const string& ip_addr) override;
+  void AddResolveOverride(const string& hostname, int64 port,
+                          const string& ip_addr) override;
 
   /// Sets the 'Authorization' header to the value of 'Bearer ' + auth_token.
-  Status AddAuthBearerHeader(const string& auth_token) override;
+  void AddAuthBearerHeader(const string& auth_token) override;
 
   /// Makes the request a DELETE request.
-  Status SetDeleteRequest() override;
+  void SetDeleteRequest() override;
 
   /// \brief Makes the request a PUT request.
   ///
@@ -87,21 +85,44 @@ class CurlHttpRequest : public HttpRequest {
   Status SetPutFromFile(const string& body_filepath, size_t offset) override;
 
   /// Makes the request a PUT request with an empty body.
-  Status SetPutEmptyBody() override;
+  void SetPutEmptyBody() override;
 
   /// \brief Makes the request a POST request.
   ///
   /// The request body will be taken from the specified buffer.
-  Status SetPostFromBuffer(const char* buffer, size_t size) override;
+  void SetPostFromBuffer(const char* buffer, size_t size) override;
 
   /// Makes the request a POST request with an empty body.
-  Status SetPostEmptyBody() override;
+  void SetPostEmptyBody() override;
 
   /// \brief Specifies the buffer for receiving the response body.
   ///
   /// Size of out_buffer after an access will be exactly the number of bytes
   /// read. Existing content of the vector will be cleared.
-  Status SetResultBuffer(std::vector<char>* out_buffer) override;
+  void SetResultBuffer(std::vector<char>* out_buffer) override;
+
+  /// \brief Specifies the buffer for receiving the response body, when the
+  /// caller knows the maximum size of the response body.
+  ///
+  /// This method allows the caller to receive the response body without an
+  /// additional intermediate buffer allocation and copy.  This method should
+  /// be called before calling Send(). After Send() has succeeded, the caller
+  /// should use the GetResultBufferDirectBytesTransferred() method in order
+  /// to learn how many bytes were transferred.
+  ///
+  /// Using this method is mutually exclusive with using SetResultBuffer().
+  void SetResultBufferDirect(char* buffer, size_t size) override;
+
+  /// \brief Distinguish response type (direct vs. implicit).
+  bool IsDirectResponse() const;
+
+  /// \brief Returns the number of bytes (of the response body) that were
+  /// transferred, when using the SetResultBufferDirect() method. The returned
+  /// value will always be less than or equal to the 'size' parameter that
+  /// was passed to SetResultBufferDirect(). If the actual HTTP response body
+  /// was greater than 'size' bytes, then this transfer method will only copy
+  /// the first 'size' bytes, and the rest will be ignored.
+  size_t GetResultBufferDirectBytesTransferred() override;
 
   /// \brief Returns the response headers of a completed request.
   ///
@@ -120,10 +141,16 @@ class CurlHttpRequest : public HttpRequest {
   // Url encodes str and returns a new string.
   string EscapeString(const string& str) override;
 
+  void SetTimeouts(uint32 connection, uint32 inactivity, uint32 total) override;
+
  private:
   /// A write callback in the form which can be accepted by libcurl.
   static size_t WriteCallback(const void* ptr, size_t size, size_t nmemb,
                               void* userdata);
+
+  /// Processes response body content received when using SetResultBufferDirect.
+  static size_t WriteCallbackDirect(const void* ptr, size_t size, size_t nmemb,
+                                    void* userdata);
   /// A read callback in the form which can be accepted by libcurl.
   static size_t ReadCallback(void* ptr, size_t size, size_t nmemb,
                              FILE* userdata);
@@ -134,9 +161,9 @@ class CurlHttpRequest : public HttpRequest {
   static int ProgressCallback(void* this_object, curl_off_t dltotal,
                               curl_off_t dlnow, curl_off_t ultotal,
                               curl_off_t ulnow);
-  Status CheckInitialized() const;
-  Status CheckMethodNotSet() const;
-  Status CheckNotSent() const;
+  void CheckMethodNotSet() const;
+  void CheckNotSent() const;
+  StringPiece GetResponse() const;
 
   LibCurl* libcurl_;
   Env* env_;
@@ -147,6 +174,14 @@ class CurlHttpRequest : public HttpRequest {
   size_t post_body_read_ = 0;
 
   std::vector<char>* response_buffer_ = nullptr;
+
+  struct DirectResponseState {
+    char* buffer_;
+    size_t buffer_size_;
+    size_t bytes_transferred_;
+  };
+  DirectResponseState direct_response_ = {};
+
   CURL* curl_ = nullptr;
   curl_slist* curl_headers_ = nullptr;
   curl_slist* resolve_list_ = nullptr;
@@ -162,12 +197,26 @@ class CurlHttpRequest : public HttpRequest {
   // The last progress in terms of bytes transmitted.
   curl_off_t last_progress_bytes_ = 0;
 
+  // The maximum period of request inactivity.
+  uint32 inactivity_timeout_secs_ = 60;  // 1 minute
+
+  // Timeout for the connection phase.
+  uint32 connect_timeout_secs_ = 120;  // 2 minutes
+
+  // Tiemout for the whole request. Set only to prevent hanging indefinitely.
+  uint32 request_timeout_secs_ = 3600;  // 1 hour
+
   // Members to enforce the usage flow.
-  bool is_initialized_ = false;
   bool is_uri_set_ = false;
   bool is_method_set_ = false;
   bool is_sent_ = false;
 
+  // Store the URI to help disambiguate requests when errors occur.
+  string uri_;
+
+  // Limit the size of a http response that is copied into an error message.
+  const size_t response_to_error_limit_ = 500;
+
   TF_DISALLOW_COPY_AND_ASSIGN(CurlHttpRequest);
 };
 
@@ -205,6 +254,8 @@ class LibCurl {
   virtual void curl_slist_free_all(curl_slist* list) = 0;
   virtual char* curl_easy_escape(CURL* curl, const char* str, int length) = 0;
   virtual void curl_free(void* p) = 0;
+
+  virtual const char* curl_easy_strerror(CURLcode errornum) = 0;
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/cloud/curl_http_request_test.cc b/tensorflow/core/platform/cloud/curl_http_request_test.cc
index 6c0f0818527fdc2610d2f54a965db23a636a98c7..86d26a028733c303b85390b0be8fb8808c6e082a 100644
--- a/tensorflow/core/platform/cloud/curl_http_request_test.cc
+++ b/tensorflow/core/platform/cloud/curl_http_request_test.cc
@@ -219,6 +219,10 @@ class FakeLibCurl : public LibCurl {
   }
   void curl_free(void* p) override { port::Free(p); }
 
+  const char* curl_easy_strerror(CURLcode errornum) override {
+    return "<unimplemented>";
+  }
+
   // Variables defining the behavior of this fake.
   string response_content_;
   uint64 response_code_;
@@ -259,17 +263,15 @@ class FakeLibCurl : public LibCurl {
 TEST(CurlHttpRequestTest, GetRequest) {
   FakeLibCurl libcurl("get response", 200);
   CurlHttpRequest http_request(&libcurl);
-  TF_EXPECT_OK(http_request.Init());
 
   std::vector<char> scratch;
   scratch.insert(scratch.begin(), kTestContent.begin(), kTestContent.end());
-  StringPiece result;
   scratch.reserve(100);
 
-  TF_EXPECT_OK(http_request.SetUri("http://www.testuri.com"));
-  TF_EXPECT_OK(http_request.AddAuthBearerHeader("fake-bearer"));
-  TF_EXPECT_OK(http_request.SetRange(100, 199));
-  TF_EXPECT_OK(http_request.SetResultBuffer(&scratch));
+  http_request.SetUri("http://www.testuri.com");
+  http_request.AddAuthBearerHeader("fake-bearer");
+  http_request.SetRange(100, 199);
+  http_request.SetResultBuffer(&scratch);
   TF_EXPECT_OK(http_request.Send());
 
   EXPECT_EQ("get response", string(scratch.begin(), scratch.end()));
@@ -285,18 +287,48 @@ TEST(CurlHttpRequestTest, GetRequest) {
   EXPECT_EQ(200, http_request.GetResponseCode());
 }
 
+TEST(CurlHttpRequestTest, GetRequest_Direct) {
+  FakeLibCurl libcurl("get response", 200);
+  CurlHttpRequest http_request(&libcurl);
+
+  std::vector<char> scratch(100, 0);
+
+  http_request.SetUri("http://www.testuri.com");
+  http_request.AddAuthBearerHeader("fake-bearer");
+  http_request.SetRange(100, 199);
+  http_request.SetResultBufferDirect(scratch.data(), scratch.capacity());
+  TF_EXPECT_OK(http_request.Send());
+
+  string expected_response = "get response";
+  size_t response_bytes_transferred =
+      http_request.GetResultBufferDirectBytesTransferred();
+  EXPECT_EQ(response_bytes_transferred, expected_response.size());
+  EXPECT_EQ(
+      "get response",
+      string(scratch.begin(), scratch.begin() + response_bytes_transferred));
+
+  // Check interactions with libcurl.
+  EXPECT_TRUE(libcurl.is_initialized_);
+  EXPECT_EQ("http://www.testuri.com", libcurl.url_);
+  EXPECT_EQ("100-199", libcurl.range_);
+  EXPECT_EQ("", libcurl.custom_request_);
+  EXPECT_EQ(1, libcurl.headers_->size());
+  EXPECT_EQ("Authorization: Bearer fake-bearer", (*libcurl.headers_)[0]);
+  EXPECT_FALSE(libcurl.is_post_);
+  EXPECT_EQ(200, http_request.GetResponseCode());
+}
+
 TEST(CurlHttpRequestTest, GetRequest_Empty) {
   FakeLibCurl libcurl("", 200);
   CurlHttpRequest http_request(&libcurl);
-  TF_EXPECT_OK(http_request.Init());
 
   std::vector<char> scratch;
   scratch.resize(0);
 
-  TF_EXPECT_OK(http_request.SetUri("http://www.testuri.com"));
-  TF_EXPECT_OK(http_request.AddAuthBearerHeader("fake-bearer"));
-  TF_EXPECT_OK(http_request.SetRange(100, 199));
-  TF_EXPECT_OK(http_request.SetResultBuffer(&scratch));
+  http_request.SetUri("http://www.testuri.com");
+  http_request.AddAuthBearerHeader("fake-bearer");
+  http_request.SetRange(100, 199);
+  http_request.SetResultBuffer(&scratch);
   TF_EXPECT_OK(http_request.Send());
 
   EXPECT_TRUE(scratch.empty());
@@ -316,15 +348,14 @@ TEST(CurlHttpRequestTest, GetRequest_RangeOutOfBound) {
   FakeLibCurl libcurl("get response", 416);
   libcurl.curl_easy_perform_result_ = CURLE_WRITE_ERROR;
   CurlHttpRequest http_request(&libcurl);
-  TF_EXPECT_OK(http_request.Init());
 
   std::vector<char> scratch;
   scratch.insert(scratch.end(), kTestContent.begin(), kTestContent.end());
 
-  TF_EXPECT_OK(http_request.SetUri("http://www.testuri.com"));
-  TF_EXPECT_OK(http_request.AddAuthBearerHeader("fake-bearer"));
-  TF_EXPECT_OK(http_request.SetRange(100, 199));
-  TF_EXPECT_OK(http_request.SetResultBuffer(&scratch));
+  http_request.SetUri("http://www.testuri.com");
+  http_request.AddAuthBearerHeader("fake-bearer");
+  http_request.SetRange(100, 199);
+  http_request.SetResultBuffer(&scratch);
   TF_EXPECT_OK(http_request.Send());
 
   EXPECT_TRUE(scratch.empty());
@@ -335,15 +366,14 @@ TEST(CurlHttpRequestTest, GetRequest_503) {
   FakeLibCurl libcurl("get response", 503);
   libcurl.curl_easy_perform_result_ = CURLE_WRITE_ERROR;
   CurlHttpRequest http_request(&libcurl);
-  TF_EXPECT_OK(http_request.Init());
 
   std::vector<char> scratch;
   scratch.insert(scratch.end(), kTestContent.begin(), kTestContent.end());
 
-  TF_EXPECT_OK(http_request.SetUri("http://www.testuri.com"));
-  TF_EXPECT_OK(http_request.AddAuthBearerHeader("fake-bearer"));
-  TF_EXPECT_OK(http_request.SetRange(100, 199));
-  TF_EXPECT_OK(http_request.SetResultBuffer(&scratch));
+  http_request.SetUri("http://www.testuri.com");
+  http_request.AddAuthBearerHeader("fake-bearer");
+  http_request.SetRange(100, 199);
+  http_request.SetResultBuffer(&scratch);
   const auto& status = http_request.Send();
   EXPECT_EQ(error::UNAVAILABLE, status.code());
   EXPECT_EQ(
@@ -358,12 +388,11 @@ TEST(CurlHttpRequestTest, GetRequest_HttpCode0) {
   libcurl.curl_easy_perform_result_ = CURLE_OPERATION_TIMEDOUT;
   libcurl.curl_easy_perform_error_message_ = "Operation timed out";
   CurlHttpRequest http_request(&libcurl);
-  TF_EXPECT_OK(http_request.Init());
 
   std::vector<char> scratch;
   scratch.insert(scratch.end(), kTestContent.begin(), kTestContent.end());
 
-  TF_EXPECT_OK(http_request.SetUri("http://www.testuri.com"));
+  http_request.SetUri("http://www.testuri.com");
   const auto& status = http_request.Send();
   EXPECT_EQ(error::UNAVAILABLE, status.code());
   EXPECT_EQ(
@@ -378,9 +407,8 @@ TEST(CurlHttpRequestTest, ResponseHeaders) {
       "get response", 200,
       {"Location: abcd", "Content-Type: text", "unparsable header"});
   CurlHttpRequest http_request(&libcurl);
-  TF_EXPECT_OK(http_request.Init());
 
-  TF_EXPECT_OK(http_request.SetUri("http://www.testuri.com"));
+  http_request.SetUri("http://www.testuri.com");
   TF_EXPECT_OK(http_request.Send());
 
   EXPECT_EQ("abcd", http_request.GetResponseHeader("Location"));
@@ -391,15 +419,14 @@ TEST(CurlHttpRequestTest, ResponseHeaders) {
 TEST(CurlHttpRequestTest, PutRequest_WithBody_FromFile) {
   FakeLibCurl libcurl("", 200);
   CurlHttpRequest http_request(&libcurl);
-  TF_EXPECT_OK(http_request.Init());
 
   auto content_filename = io::JoinPath(testing::TmpDir(), "content");
   std::ofstream content(content_filename, std::ofstream::binary);
   content << "post body content";
   content.close();
 
-  TF_EXPECT_OK(http_request.SetUri("http://www.testuri.com"));
-  TF_EXPECT_OK(http_request.AddAuthBearerHeader("fake-bearer"));
+  http_request.SetUri("http://www.testuri.com");
+  http_request.AddAuthBearerHeader("fake-bearer");
   TF_EXPECT_OK(http_request.SetPutFromFile(content_filename, 0));
   TF_EXPECT_OK(http_request.Send());
 
@@ -419,15 +446,14 @@ TEST(CurlHttpRequestTest, PutRequest_WithBody_FromFile) {
 TEST(CurlHttpRequestTest, PutRequest_WithBody_FromFile_NonZeroOffset) {
   FakeLibCurl libcurl("", 200);
   CurlHttpRequest http_request(&libcurl);
-  TF_EXPECT_OK(http_request.Init());
 
   auto content_filename = io::JoinPath(testing::TmpDir(), "content");
   std::ofstream content(content_filename, std::ofstream::binary);
   content << "post body content";
   content.close();
 
-  TF_EXPECT_OK(http_request.SetUri("http://www.testuri.com"));
-  TF_EXPECT_OK(http_request.AddAuthBearerHeader("fake-bearer"));
+  http_request.SetUri("http://www.testuri.com");
+  http_request.AddAuthBearerHeader("fake-bearer");
   TF_EXPECT_OK(http_request.SetPutFromFile(content_filename, 7));
   TF_EXPECT_OK(http_request.Send());
 
@@ -440,11 +466,10 @@ TEST(CurlHttpRequestTest, PutRequest_WithBody_FromFile_NonZeroOffset) {
 TEST(CurlHttpRequestTest, PutRequest_WithoutBody) {
   FakeLibCurl libcurl("", 200);
   CurlHttpRequest http_request(&libcurl);
-  TF_EXPECT_OK(http_request.Init());
 
-  TF_EXPECT_OK(http_request.SetUri("http://www.testuri.com"));
-  TF_EXPECT_OK(http_request.AddAuthBearerHeader("fake-bearer"));
-  TF_EXPECT_OK(http_request.SetPutEmptyBody());
+  http_request.SetUri("http://www.testuri.com");
+  http_request.AddAuthBearerHeader("fake-bearer");
+  http_request.SetPutEmptyBody();
   TF_EXPECT_OK(http_request.Send());
 
   // Check interactions with libcurl.
@@ -461,13 +486,12 @@ TEST(CurlHttpRequestTest, PutRequest_WithoutBody) {
 TEST(CurlHttpRequestTest, PostRequest_WithBody_FromMemory) {
   FakeLibCurl libcurl("", 200);
   CurlHttpRequest http_request(&libcurl);
-  TF_EXPECT_OK(http_request.Init());
 
   string content = "post body content";
 
-  TF_EXPECT_OK(http_request.SetUri("http://www.testuri.com"));
-  TF_EXPECT_OK(http_request.AddAuthBearerHeader("fake-bearer"));
-  TF_EXPECT_OK(http_request.SetPostFromBuffer(content.c_str(), content.size()));
+  http_request.SetUri("http://www.testuri.com");
+  http_request.AddAuthBearerHeader("fake-bearer");
+  http_request.SetPostFromBuffer(content.c_str(), content.size());
   TF_EXPECT_OK(http_request.Send());
 
   // Check interactions with libcurl.
@@ -484,11 +508,9 @@ TEST(CurlHttpRequestTest, PostRequest_WithBody_FromMemory) {
 TEST(CurlHttpRequestTest, PostRequest_WithoutBody) {
   FakeLibCurl libcurl("", 200);
   CurlHttpRequest http_request(&libcurl);
-  TF_EXPECT_OK(http_request.Init());
-
-  TF_EXPECT_OK(http_request.SetUri("http://www.testuri.com"));
-  TF_EXPECT_OK(http_request.AddAuthBearerHeader("fake-bearer"));
-  TF_EXPECT_OK(http_request.SetPostEmptyBody());
+  http_request.SetUri("http://www.testuri.com");
+  http_request.AddAuthBearerHeader("fake-bearer");
+  http_request.SetPostEmptyBody();
   TF_EXPECT_OK(http_request.Send());
 
   // Check interactions with libcurl.
@@ -505,11 +527,9 @@ TEST(CurlHttpRequestTest, PostRequest_WithoutBody) {
 TEST(CurlHttpRequestTest, DeleteRequest) {
   FakeLibCurl libcurl("", 200);
   CurlHttpRequest http_request(&libcurl);
-  TF_EXPECT_OK(http_request.Init());
-
-  TF_EXPECT_OK(http_request.SetUri("http://www.testuri.com"));
-  TF_EXPECT_OK(http_request.AddAuthBearerHeader("fake-bearer"));
-  TF_EXPECT_OK(http_request.SetDeleteRequest());
+  http_request.SetUri("http://www.testuri.com");
+  http_request.AddAuthBearerHeader("fake-bearer");
+  http_request.SetDeleteRequest();
   TF_EXPECT_OK(http_request.Send());
 
   // Check interactions with libcurl.
@@ -524,65 +544,37 @@ TEST(CurlHttpRequestTest, DeleteRequest) {
 TEST(CurlHttpRequestTest, WrongSequenceOfCalls_NoUri) {
   FakeLibCurl libcurl("", 200);
   CurlHttpRequest http_request(&libcurl);
-  TF_EXPECT_OK(http_request.Init());
-
-  auto s = http_request.Send();
-  ASSERT_TRUE(errors::IsFailedPrecondition(s));
-  EXPECT_TRUE(StringPiece(s.error_message()).contains("URI has not been set"));
+  ASSERT_DEATH((void)http_request.Send(), "URI has not been set");
 }
 
 TEST(CurlHttpRequestTest, WrongSequenceOfCalls_TwoSends) {
   FakeLibCurl libcurl("", 200);
   CurlHttpRequest http_request(&libcurl);
-  TF_EXPECT_OK(http_request.Init());
-
-  TF_EXPECT_OK(http_request.SetUri("http://www.google.com"));
+  http_request.SetUri("http://www.google.com");
   TF_EXPECT_OK(http_request.Send());
-  auto s = http_request.Send();
-  ASSERT_TRUE(errors::IsFailedPrecondition(s));
-  EXPECT_TRUE(StringPiece(s.error_message())
-                  .contains("The request has already been sent"));
+  ASSERT_DEATH((void)http_request.Send(), "The request has already been sent");
 }
 
 TEST(CurlHttpRequestTest, WrongSequenceOfCalls_ReusingAfterSend) {
   FakeLibCurl libcurl("", 200);
   CurlHttpRequest http_request(&libcurl);
-  TF_EXPECT_OK(http_request.Init());
-
-  TF_EXPECT_OK(http_request.SetUri("http://www.google.com"));
+  http_request.SetUri("http://www.google.com");
   TF_EXPECT_OK(http_request.Send());
-  auto s = http_request.SetUri("http://mail.google.com");
-  ASSERT_TRUE(errors::IsFailedPrecondition(s));
-  EXPECT_TRUE(StringPiece(s.error_message())
-                  .contains("The request has already been sent"));
+  ASSERT_DEATH(http_request.SetUri("http://mail.google.com"),
+               "The request has already been sent");
 }
 
 TEST(CurlHttpRequestTest, WrongSequenceOfCalls_SettingMethodTwice) {
   FakeLibCurl libcurl("", 200);
   CurlHttpRequest http_request(&libcurl);
-  TF_EXPECT_OK(http_request.Init());
-
-  TF_EXPECT_OK(http_request.SetDeleteRequest());
-  auto s = http_request.SetPostEmptyBody();
-  ASSERT_TRUE(errors::IsFailedPrecondition(s));
-  EXPECT_TRUE(StringPiece(s.error_message())
-                  .contains("HTTP method has been already set"));
-}
-
-TEST(CurlHttpRequestTest, WrongSequenceOfCalls_NotInitialized) {
-  FakeLibCurl libcurl("", 200);
-  CurlHttpRequest http_request(&libcurl);
-
-  auto s = http_request.SetPostEmptyBody();
-  ASSERT_TRUE(errors::IsFailedPrecondition(s));
-  EXPECT_TRUE(StringPiece(s.error_message())
-                  .contains("The object has not been initialized"));
+  http_request.SetDeleteRequest();
+  ASSERT_DEATH(http_request.SetPostEmptyBody(),
+               "HTTP method has been already set");
 }
 
 TEST(CurlHttpRequestTest, EscapeString) {
   FakeLibCurl libcurl("get response", 200);
   CurlHttpRequest http_request(&libcurl);
-  TF_EXPECT_OK(http_request.Init());
   const string test_string = "a/b/c";
   EXPECT_EQ("a%2Fb%2Fc", http_request.EscapeString(test_string));
 }
@@ -590,17 +582,15 @@ TEST(CurlHttpRequestTest, EscapeString) {
 TEST(CurlHttpRequestTest, ErrorReturnsNoResponse) {
   FakeLibCurl libcurl("get response", 500);
   CurlHttpRequest http_request(&libcurl);
-  TF_EXPECT_OK(http_request.Init());
 
   std::vector<char> scratch;
   scratch.insert(scratch.begin(), kTestContent.begin(), kTestContent.end());
-  StringPiece result;
   scratch.reserve(100);
 
-  TF_EXPECT_OK(http_request.SetUri("http://www.testuri.com"));
-  TF_EXPECT_OK(http_request.AddAuthBearerHeader("fake-bearer"));
-  TF_EXPECT_OK(http_request.SetRange(100, 199));
-  TF_EXPECT_OK(http_request.SetResultBuffer(&scratch));
+  http_request.SetUri("http://www.testuri.com");
+  http_request.AddAuthBearerHeader("fake-bearer");
+  http_request.SetRange(100, 199);
+  http_request.SetResultBuffer(&scratch);
   EXPECT_EQ(error::UNAVAILABLE, http_request.Send().code());
 
   EXPECT_EQ("", string(scratch.begin(), scratch.end()));
@@ -618,8 +608,7 @@ TEST(CurlHttpRequestTest, ProgressIsOk) {
       },
       &env);
   CurlHttpRequest http_request(&libcurl, &env);
-  TF_EXPECT_OK(http_request.Init());
-  TF_EXPECT_OK(http_request.SetUri("http://www.testuri.com"));
+  http_request.SetUri("http://www.testuri.com");
   TF_EXPECT_OK(http_request.Send());
 }
 
@@ -635,8 +624,7 @@ TEST(CurlHttpRequestTest, ProgressIsStuck) {
       },
       &env);
   CurlHttpRequest http_request(&libcurl, &env);
-  TF_EXPECT_OK(http_request.Init());
-  TF_EXPECT_OK(http_request.SetUri("http://www.testuri.com"));
+  http_request.SetUri("http://www.testuri.com");
   auto status = http_request.Send();
   EXPECT_EQ(error::UNAVAILABLE, status.code());
   EXPECT_EQ(
diff --git a/tensorflow/core/platform/cloud/expiring_lru_cache.h b/tensorflow/core/platform/cloud/expiring_lru_cache.h
index 3fc23a4306eb96e85099bd63c9c83c6663fe7e3c..c738497ddd533b5b9a8339e51a21ac204acf68b5 100644
--- a/tensorflow/core/platform/cloud/expiring_lru_cache.h
+++ b/tensorflow/core/platform/cloud/expiring_lru_cache.h
@@ -88,6 +88,13 @@ class ExpiringLRUCache {
     return s;
   }
 
+  /// Clear the cache.
+  void Clear() {
+    mutex_lock lock(mu_);
+    cache_.clear();
+    lru_list_.clear();
+  }
+
   /// Accessors for cache parameters.
   uint64 max_age() const { return max_age_; }
   size_t max_entries() const { return max_entries_; }
diff --git a/tensorflow/core/platform/cloud/expiring_lru_cache_test.cc b/tensorflow/core/platform/cloud/expiring_lru_cache_test.cc
index 8f8d5744a4576991c0056bfefeb30c4bc58549e0..3bc6db38429155ca61732b44da3815422b480c92 100644
--- a/tensorflow/core/platform/cloud/expiring_lru_cache_test.cc
+++ b/tensorflow/core/platform/cloud/expiring_lru_cache_test.cc
@@ -152,5 +152,27 @@ TEST(ExpiringLRUCacheTest, LookupOrCompute) {
   EXPECT_EQ(num_compute_calls, 6);
 }
 
+TEST(ExpiringLRUCacheTest, Clear) {
+  ExpiringLRUCache<int> cache(1, 4);
+  cache.Insert("a", 1);
+  cache.Insert("b", 2);
+  cache.Insert("c", 3);
+  cache.Insert("d", 4);
+  int value = 0;
+  EXPECT_TRUE(cache.Lookup("a", &value));
+  EXPECT_EQ(value, 1);
+  EXPECT_TRUE(cache.Lookup("b", &value));
+  EXPECT_EQ(value, 2);
+  EXPECT_TRUE(cache.Lookup("c", &value));
+  EXPECT_EQ(value, 3);
+  EXPECT_TRUE(cache.Lookup("d", &value));
+  EXPECT_EQ(value, 4);
+  cache.Clear();
+  EXPECT_FALSE(cache.Lookup("a", &value));
+  EXPECT_FALSE(cache.Lookup("b", &value));
+  EXPECT_FALSE(cache.Lookup("c", &value));
+  EXPECT_FALSE(cache.Lookup("d", &value));
+}
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/cloud/file_block_cache.cc b/tensorflow/core/platform/cloud/file_block_cache.cc
index a472ae52fcde114aa5660ee2f6fc2e9323b2ad68..0375af516b0504e8b527409ba22da0caa149ad9d 100644
--- a/tensorflow/core/platform/cloud/file_block_cache.cc
+++ b/tensorflow/core/platform/cloud/file_block_cache.cc
@@ -123,10 +123,14 @@ Status FileBlockCache::MaybeFetch(const Key& key,
       case FetchState::CREATED:
         block->state = FetchState::FETCHING;
         block->mu.unlock();  // Release the lock while making the API call.
-        status.Update(
-            block_fetcher_(key.first, key.second, block_size_, &block->data));
+        block->data.clear();
+        block->data.resize(block_size_, 0);
+        size_t bytes_transferred;
+        status.Update(block_fetcher_(key.first, key.second, block_size_,
+                                     block->data.data(), &bytes_transferred));
         block->mu.lock();  // Reacquire the lock immediately afterwards
         if (status.ok()) {
+          block->data.resize(bytes_transferred, 0);
           downloaded_block = true;
           block->state = FetchState::FINISHED;
         } else {
@@ -150,15 +154,15 @@ Status FileBlockCache::MaybeFetch(const Key& key,
 }
 
 Status FileBlockCache::Read(const string& filename, size_t offset, size_t n,
-                            std::vector<char>* out) {
-  out->clear();
+                            char* buffer, size_t* bytes_transferred) {
+  *bytes_transferred = 0;
   if (n == 0) {
     return Status::OK();
   }
   if (block_size_ == 0 || max_bytes_ == 0) {
     // The cache is effectively disabled, so we pass the read through to the
     // fetcher without breaking it up into blocks.
-    return block_fetcher_(filename, offset, n, out);
+    return block_fetcher_(filename, offset, n, buffer, bytes_transferred);
   }
   // Calculate the block-aligned start and end of the read.
   size_t start = block_size_ * (offset / block_size_);
@@ -166,6 +170,7 @@ Status FileBlockCache::Read(const string& filename, size_t offset, size_t n,
   if (finish < offset + n) {
     finish += block_size_;
   }
+  size_t total_bytes_transferred = 0;
   // Now iterate through the blocks, reading them one at a time.
   for (size_t pos = start; pos < finish; pos += block_size_) {
     Key key = std::make_pair(filename, pos);
@@ -181,7 +186,10 @@ Status FileBlockCache::Read(const string& filename, size_t offset, size_t n,
       // The requested offset is at or beyond the end of the file. This can
       // happen if `offset` is not block-aligned, and the read returns the last
       // block in the file, which does not extend all the way out to `offset`.
-      return errors::OutOfRange("EOF at offset ", offset);
+      *bytes_transferred = total_bytes_transferred;
+      return errors::OutOfRange("EOF at offset ", offset, " in file ", filename,
+                                " at position ", pos, "with data size ",
+                                data.size());
     }
     auto begin = data.begin();
     if (offset > pos) {
@@ -194,13 +202,16 @@ Status FileBlockCache::Read(const string& filename, size_t offset, size_t n,
       end -= (pos + data.size()) - (offset + n);
     }
     if (begin < end) {
-      out->insert(out->end(), begin, end);
+      size_t bytes_to_copy = end - begin;
+      memcpy(&buffer[total_bytes_transferred], &*begin, bytes_to_copy);
+      total_bytes_transferred += bytes_to_copy;
     }
     if (data.size() < block_size_) {
       // The block was a partial block and thus signals EOF at its upper bound.
       break;
     }
   }
+  *bytes_transferred = total_bytes_transferred;
   return Status::OK();
 }
 
@@ -226,6 +237,14 @@ void FileBlockCache::Prune() {
   }
 }
 
+void FileBlockCache::Flush() {
+  mutex_lock lock(mu_);
+  block_map_.clear();
+  lru_list_.clear();
+  lra_list_.clear();
+  cache_size_ = 0;
+}
+
 void FileBlockCache::RemoveFile(const string& filename) {
   mutex_lock lock(mu_);
   RemoveFile_Locked(filename);
diff --git a/tensorflow/core/platform/cloud/file_block_cache.h b/tensorflow/core/platform/cloud/file_block_cache.h
index 36dbf9db83238fa05e3b010c2a73cb823623f54b..5c180e2332042af3ae938c2685ac416952b00187 100644
--- a/tensorflow/core/platform/cloud/file_block_cache.h
+++ b/tensorflow/core/platform/cloud/file_block_cache.h
@@ -43,8 +43,9 @@ class FileBlockCache {
   /// cache is constructed. The returned Status should be OK as long as the
   /// read from the remote filesystem succeeded (similar to the semantics of the
   /// read(2) system call).
-  typedef std::function<Status(const string&, size_t, size_t,
-                               std::vector<char>*)>
+  typedef std::function<Status(const string& filename, size_t offset,
+                               size_t buffer_size, char* buffer,
+                               size_t* bytes_transferred)>
       BlockFetcher;
 
   FileBlockCache(size_t block_size, size_t max_bytes, uint64 max_staleness,
@@ -83,12 +84,15 @@ class FileBlockCache {
   ///    placed in `out`.
   /// 4) OK otherwise (i.e. the read succeeded, and at least one byte was placed
   ///    in `out`).
-  Status Read(const string& filename, size_t offset, size_t n,
-              std::vector<char>* out);
+  Status Read(const string& filename, size_t offset, size_t n, char* buffer,
+              size_t* bytes_transferred);
 
   /// Remove all cached blocks for `filename`.
   void RemoveFile(const string& filename) LOCKS_EXCLUDED(mu_);
 
+  /// Remove all cached data.
+  void Flush() LOCKS_EXCLUDED(mu_);
+
   /// Accessors for cache parameters.
   size_t block_size() const { return block_size_; }
   size_t max_bytes() const { return max_bytes_; }
diff --git a/tensorflow/core/platform/cloud/file_block_cache_test.cc b/tensorflow/core/platform/cloud/file_block_cache_test.cc
index 081b32af64636105925240da70bf050cdec2c4b9..596fdbf19eb03a70c5659d392db368b3cdb791fe 100644
--- a/tensorflow/core/platform/cloud/file_block_cache_test.cc
+++ b/tensorflow/core/platform/cloud/file_block_cache_test.cc
@@ -25,6 +25,18 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
+Status ReadCache(FileBlockCache* cache, const string& filename, size_t offset,
+                 size_t n, std::vector<char>* out) {
+  out->clear();
+  out->resize(n, 0);
+  size_t bytes_transferred = 0;
+  Status status =
+      cache->Read(filename, offset, n, out->data(), &bytes_transferred);
+  EXPECT_LE(bytes_transferred, n);
+  out->resize(bytes_transferred, n);
+  return status;
+}
+
 TEST(FileBlockCacheTest, PassThrough) {
   const string want_filename = "foo/bar";
   const size_t want_offset = 42;
@@ -32,12 +44,13 @@ TEST(FileBlockCacheTest, PassThrough) {
   int calls = 0;
   auto fetcher = [&calls, want_filename, want_offset, want_n](
                      const string& got_filename, size_t got_offset,
-                     size_t got_n, std::vector<char>* out) {
+                     size_t got_n, char* buffer, size_t* bytes_transferred) {
     EXPECT_EQ(got_filename, want_filename);
     EXPECT_EQ(got_offset, want_offset);
     EXPECT_EQ(got_n, want_n);
     calls++;
-    out->resize(got_n, 'x');
+    memset(buffer, 'x', got_n);
+    *bytes_transferred = got_n;
     return Status::OK();
   };
   // If block_size, max_bytes, or both are zero, the cache is a pass-through.
@@ -45,11 +58,11 @@ TEST(FileBlockCacheTest, PassThrough) {
   FileBlockCache cache2(0, 1, 0, fetcher);
   FileBlockCache cache3(0, 0, 0, fetcher);
   std::vector<char> out;
-  TF_EXPECT_OK(cache1.Read(want_filename, want_offset, want_n, &out));
+  TF_EXPECT_OK(ReadCache(&cache1, want_filename, want_offset, want_n, &out));
   EXPECT_EQ(calls, 1);
-  TF_EXPECT_OK(cache2.Read(want_filename, want_offset, want_n, &out));
+  TF_EXPECT_OK(ReadCache(&cache2, want_filename, want_offset, want_n, &out));
   EXPECT_EQ(calls, 2);
-  TF_EXPECT_OK(cache3.Read(want_filename, want_offset, want_n, &out));
+  TF_EXPECT_OK(ReadCache(&cache3, want_filename, want_offset, want_n, &out));
   EXPECT_EQ(calls, 3);
 }
 
@@ -63,13 +76,13 @@ TEST(FileBlockCacheTest, BlockAlignment) {
   }
   // The fetcher just fetches slices of the buffer.
   auto fetcher = [&buf](const string& filename, size_t offset, size_t n,
-                        std::vector<char>* out) {
+                        char* buffer, size_t* bytes_transferred) {
     if (offset < buf.size()) {
-      if (offset + n > buf.size()) {
-        out->insert(out->end(), buf.begin() + offset, buf.end());
-      } else {
-        out->insert(out->end(), buf.begin() + offset, buf.begin() + offset + n);
-      }
+      size_t bytes_to_copy = std::min<size_t>(buf.size() - offset, n);
+      memcpy(buffer, buf.data() + offset, bytes_to_copy);
+      *bytes_transferred = bytes_to_copy;
+    } else {
+      *bytes_transferred = 0;
     }
     return Status::OK();
   };
@@ -80,7 +93,7 @@ TEST(FileBlockCacheTest, BlockAlignment) {
     for (size_t offset = 0; offset < 10; offset++) {
       for (size_t n = block_size - 2; n <= block_size + 2; n++) {
         std::vector<char> got;
-        TF_EXPECT_OK(cache.Read("", offset, n, &got));
+        TF_EXPECT_OK(ReadCache(&cache, "", offset, n, &got));
         // Verify the size of the read.
         if (offset + n <= size) {
           // Expect a full read.
@@ -108,24 +121,27 @@ TEST(FileBlockCacheTest, CacheHits) {
   const size_t block_size = 16;
   std::set<size_t> calls;
   auto fetcher = [&calls, block_size](const string& filename, size_t offset,
-                                      size_t n, std::vector<char>* out) {
+                                      size_t n, char* buffer,
+                                      size_t* bytes_transferred) {
     EXPECT_EQ(n, block_size);
     EXPECT_EQ(offset % block_size, 0);
     EXPECT_EQ(calls.find(offset), calls.end()) << "at offset " << offset;
     calls.insert(offset);
-    out->resize(n, 'x');
+    memset(buffer, 'x', n);
+    *bytes_transferred = n;
     return Status::OK();
   };
   const uint32 block_count = 256;
   FileBlockCache cache(block_size, block_count * block_size, 0, fetcher);
   std::vector<char> out;
+  out.resize(block_count, 0);
   // The cache has space for `block_count` blocks. The loop with i = 0 should
   // fill the cache, and the loop with i = 1 should be all cache hits. The
   // fetcher checks that it is called once and only once for each offset (to
   // fetch the corresponding block).
   for (int i = 0; i < 2; i++) {
     for (int j = 0; j < block_count; j++) {
-      TF_EXPECT_OK(cache.Read("", block_size * j, block_size, &out));
+      TF_EXPECT_OK(ReadCache(&cache, "", block_size * j, block_size, &out));
     }
   }
 }
@@ -138,36 +154,39 @@ TEST(FileBlockCacheTest, OutOfRange) {
   bool second_block = false;
   auto fetcher = [block_size, file_size, &first_block, &second_block](
                      const string& filename, size_t offset, size_t n,
-                     std::vector<char>* out) {
+                     char* buffer, size_t* bytes_transferred) {
     EXPECT_EQ(n, block_size);
     EXPECT_EQ(offset % block_size, 0);
+    size_t bytes_to_copy = 0;
     if (offset == 0) {
       // The first block (16 bytes) of the file.
-      out->resize(n, 'x');
+      memset(buffer, 'x', n);
+      bytes_to_copy = n;
       first_block = true;
     } else if (offset == block_size) {
       // The second block (8 bytes) of the file.
-      out->resize(file_size - block_size, 'x');
+      bytes_to_copy = file_size - block_size;
+      memset(buffer, 'x', bytes_to_copy);
       second_block = true;
     }
+    *bytes_transferred = bytes_to_copy;
     return Status::OK();
   };
   FileBlockCache cache(block_size, block_size, 0, fetcher);
   std::vector<char> out;
   // Reading the first 16 bytes should be fine.
-  TF_EXPECT_OK(cache.Read("", 0, block_size, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "", 0, block_size, &out));
   EXPECT_TRUE(first_block);
   EXPECT_EQ(out.size(), block_size);
   // Reading at offset file_size + 4 will read the second block (since the read
   // at file_size + 4 = 28 will be aligned to an offset of 16) but will return
   // OutOfRange because the offset is past the end of the 24-byte file.
-  Status status = cache.Read("", file_size + 4, 4, &out);
+  Status status = ReadCache(&cache, "", file_size + 4, 4, &out);
   EXPECT_EQ(status.code(), error::OUT_OF_RANGE);
   EXPECT_TRUE(second_block);
-  EXPECT_EQ(out.size(), 0);
   // Reading the second full block will return 8 bytes, from a cache hit.
   second_block = false;
-  TF_EXPECT_OK(cache.Read("", block_size, block_size, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "", block_size, block_size, &out));
   EXPECT_FALSE(second_block);
   EXPECT_EQ(out.size(), file_size - block_size);
 }
@@ -178,20 +197,22 @@ TEST(FileBlockCacheTest, Inconsistent) {
   const size_t block_size = 16;
   // This fetcher returns OK but only fills in one byte for any offset.
   auto fetcher = [block_size](const string& filename, size_t offset, size_t n,
-                              std::vector<char>* out) {
+                              char* buffer, size_t* bytes_transferred) {
     EXPECT_EQ(n, block_size);
     EXPECT_EQ(offset % block_size, 0);
-    out->resize(1, 'x');
+    EXPECT_GE(n, 1);
+    memset(buffer, 'x', 1);
+    *bytes_transferred = 1;
     return Status::OK();
   };
   FileBlockCache cache(block_size, 2 * block_size, 0, fetcher);
   std::vector<char> out;
   // Read the second block; this should yield an OK status and a single byte.
-  TF_EXPECT_OK(cache.Read("", block_size, block_size, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "", block_size, block_size, &out));
   EXPECT_EQ(out.size(), 1);
   // Now read the first block; this should yield an INTERNAL error because we
   // had already cached a partial block at a later position.
-  Status status = cache.Read("", 0, block_size, &out);
+  Status status = ReadCache(&cache, "", 0, block_size, &out);
   EXPECT_EQ(status.code(), error::INTERNAL);
 }
 
@@ -199,14 +220,16 @@ TEST(FileBlockCacheTest, LRU) {
   const size_t block_size = 16;
   std::list<size_t> calls;
   auto fetcher = [&calls, block_size](const string& filename, size_t offset,
-                                      size_t n, std::vector<char>* out) {
+                                      size_t n, char* buffer,
+                                      size_t* bytes_transferred) {
     EXPECT_EQ(n, block_size);
     EXPECT_FALSE(calls.empty()) << "at offset = " << offset;
     if (!calls.empty()) {
       EXPECT_EQ(offset, calls.front());
       calls.pop_front();
     }
-    out->resize(n, 'x');
+    memset(buffer, 'x', n);
+    *bytes_transferred = n;
     return Status::OK();
   };
   const uint32 block_count = 2;
@@ -216,38 +239,39 @@ TEST(FileBlockCacheTest, LRU) {
   // fetcher calls that the cache makes.
   calls.push_back(0);
   // Cache miss - drains an element from `calls`.
-  TF_EXPECT_OK(cache.Read("", 0, 1, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "", 0, 1, &out));
   // Cache hit - does not drain an element from `calls`.
-  TF_EXPECT_OK(cache.Read("", 0, 1, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "", 0, 1, &out));
   calls.push_back(block_size);
   // Cache miss followed by cache hit.
-  TF_EXPECT_OK(cache.Read("", block_size, 1, &out));
-  TF_EXPECT_OK(cache.Read("", block_size, 1, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "", block_size, 1, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "", block_size, 1, &out));
   calls.push_back(2 * block_size);
   // Cache miss followed by cache hit.  Causes eviction of LRU element.
-  TF_EXPECT_OK(cache.Read("", 2 * block_size, 1, &out));
-  TF_EXPECT_OK(cache.Read("", 2 * block_size, 1, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "", 2 * block_size, 1, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "", 2 * block_size, 1, &out));
   // LRU element was at offset 0.  Cache miss.
   calls.push_back(0);
-  TF_EXPECT_OK(cache.Read("", 0, 1, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "", 0, 1, &out));
   // Element at 2 * block_size is still in cache, and this read should update
   // its position in the LRU list so it doesn't get evicted by the next read.
-  TF_EXPECT_OK(cache.Read("", 2 * block_size, 1, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "", 2 * block_size, 1, &out));
   // Element at block_size was evicted.  Reading this element will also cause
   // the LRU element (at 0) to be evicted.
   calls.push_back(block_size);
-  TF_EXPECT_OK(cache.Read("", block_size, 1, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "", block_size, 1, &out));
   // Element at 0 was evicted again.
   calls.push_back(0);
-  TF_EXPECT_OK(cache.Read("", 0, 1, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "", 0, 1, &out));
 }
 
 TEST(FileBlockCacheTest, MaxStaleness) {
   int calls = 0;
   auto fetcher = [&calls](const string& filename, size_t offset, size_t n,
-                          std::vector<char>* out) {
+                          char* buffer, size_t* bytes_transferred) {
     calls++;
-    out->resize(n, 'x');
+    memset(buffer, 'x', n);
+    *bytes_transferred = n;
     return Status::OK();
   };
   std::vector<char> out;
@@ -256,14 +280,14 @@ TEST(FileBlockCacheTest, MaxStaleness) {
   // expected.
   FileBlockCache cache1(8, 16, 2 /* max staleness */, fetcher, env.get());
   // Execute the first read to load the block.
-  TF_EXPECT_OK(cache1.Read("", 0, 1, &out));
+  TF_EXPECT_OK(ReadCache(&cache1, "", 0, 1, &out));
   EXPECT_EQ(calls, 1);
   // Now advance the clock one second at a time and redo the read. The call
   // count should advance every 3 seconds (i.e. every time the staleness is
   // greater than 2).
   for (int i = 1; i <= 10; i++) {
     env->SetNowSeconds(i + 1);
-    TF_EXPECT_OK(cache1.Read("", 0, 1, &out));
+    TF_EXPECT_OK(ReadCache(&cache1, "", 0, 1, &out));
     EXPECT_EQ(calls, 1 + i / 3);
   }
   // Now create a cache with max staleness of 0, and verify that it also works
@@ -272,27 +296,27 @@ TEST(FileBlockCacheTest, MaxStaleness) {
   env->SetNowSeconds(0);
   FileBlockCache cache2(8, 16, 0 /* max staleness */, fetcher, env.get());
   // Execute the first read to load the block.
-  TF_EXPECT_OK(cache2.Read("", 0, 1, &out));
+  TF_EXPECT_OK(ReadCache(&cache2, "", 0, 1, &out));
   EXPECT_EQ(calls, 1);
   // Advance the clock by a huge amount and verify that the cached block is
   // used to satisfy the read.
   env->SetNowSeconds(365 * 24 * 60 * 60);  // ~1 year, just for fun.
-  TF_EXPECT_OK(cache2.Read("", 0, 1, &out));
+  TF_EXPECT_OK(ReadCache(&cache2, "", 0, 1, &out));
   EXPECT_EQ(calls, 1);
 }
 
 TEST(FileBlockCacheTest, RemoveFile) {
   int calls = 0;
   auto fetcher = [&calls](const string& filename, size_t offset, size_t n,
-                          std::vector<char>* out) {
+                          char* buffer, size_t* bytes_transferred) {
     calls++;
     char c = (filename == "a") ? 'a' : (filename == "b") ? 'b' : 'x';
     if (offset > 0) {
       // The first block is lower case and all subsequent blocks are upper case.
       c = toupper(c);
     }
-    out->clear();
-    out->resize(n, c);
+    memset(buffer, c, n);
+    *bytes_transferred = n;
     return Status::OK();
   };
   // This cache has space for 4 blocks; we'll read from two files.
@@ -304,41 +328,41 @@ TEST(FileBlockCacheTest, RemoveFile) {
   std::vector<char> A(n, 'A');
   std::vector<char> B(n, 'B');
   // Fill the cache.
-  TF_EXPECT_OK(cache.Read("a", 0, n, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "a", 0, n, &out));
   EXPECT_EQ(out, a);
   EXPECT_EQ(calls, 1);
-  TF_EXPECT_OK(cache.Read("a", 8, n, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "a", 8, n, &out));
   EXPECT_EQ(out, A);
   EXPECT_EQ(calls, 2);
-  TF_EXPECT_OK(cache.Read("b", 0, n, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "b", 0, n, &out));
   EXPECT_EQ(out, b);
   EXPECT_EQ(calls, 3);
-  TF_EXPECT_OK(cache.Read("b", 8, n, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "b", 8, n, &out));
   EXPECT_EQ(out, B);
   EXPECT_EQ(calls, 4);
   // All four blocks should be in the cache now.
-  TF_EXPECT_OK(cache.Read("a", 0, n, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "a", 0, n, &out));
   EXPECT_EQ(out, a);
-  TF_EXPECT_OK(cache.Read("a", 8, n, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "a", 8, n, &out));
   EXPECT_EQ(out, A);
-  TF_EXPECT_OK(cache.Read("b", 0, n, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "b", 0, n, &out));
   EXPECT_EQ(out, b);
-  TF_EXPECT_OK(cache.Read("b", 8, n, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "b", 8, n, &out));
   EXPECT_EQ(out, B);
   EXPECT_EQ(calls, 4);
   // Remove the blocks from "a".
   cache.RemoveFile("a");
   // Both blocks from "b" should still be there.
-  TF_EXPECT_OK(cache.Read("b", 0, n, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "b", 0, n, &out));
   EXPECT_EQ(out, b);
-  TF_EXPECT_OK(cache.Read("b", 8, n, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "b", 8, n, &out));
   EXPECT_EQ(out, B);
   EXPECT_EQ(calls, 4);
   // The blocks from "a" should not be there.
-  TF_EXPECT_OK(cache.Read("a", 0, n, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "a", 0, n, &out));
   EXPECT_EQ(out, a);
   EXPECT_EQ(calls, 5);
-  TF_EXPECT_OK(cache.Read("a", 8, n, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "a", 8, n, &out));
   EXPECT_EQ(out, A);
   EXPECT_EQ(calls, 6);
 }
@@ -346,10 +370,10 @@ TEST(FileBlockCacheTest, RemoveFile) {
 TEST(FileBlockCacheTest, Prune) {
   int calls = 0;
   auto fetcher = [&calls](const string& filename, size_t offset, size_t n,
-                          std::vector<char>* out) {
+                          char* buffer, size_t* bytes_transferred) {
     calls++;
-    out->clear();
-    out->resize(n, 'x');
+    memset(buffer, 'x', n);
+    *bytes_transferred = n;
     return Status::OK();
   };
   std::vector<char> out;
@@ -360,20 +384,20 @@ TEST(FileBlockCacheTest, Prune) {
   FileBlockCache cache(8, 32, 1 /* max staleness */, fetcher, env.get());
   // Read three blocks into the cache, and advance the timestamp by one second
   // with each read. Start with a block of "a" at the current timestamp `now`.
-  TF_EXPECT_OK(cache.Read("a", 0, 1, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "a", 0, 1, &out));
   // Now load a block of a different file "b" at timestamp `now` + 1
   env->SetNowSeconds(now + 1);
-  TF_EXPECT_OK(cache.Read("b", 0, 1, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "b", 0, 1, &out));
   // Now load a different block of file "a" at timestamp `now` + 1. When the
   // first block of "a" expires, this block should also be removed because it
   // also belongs to file "a".
-  TF_EXPECT_OK(cache.Read("a", 8, 1, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "a", 8, 1, &out));
   // Ensure that all blocks are in the cache (i.e. reads are cache hits).
   EXPECT_EQ(cache.CacheSize(), 24);
   EXPECT_EQ(calls, 3);
-  TF_EXPECT_OK(cache.Read("a", 0, 1, &out));
-  TF_EXPECT_OK(cache.Read("b", 0, 1, &out));
-  TF_EXPECT_OK(cache.Read("a", 8, 1, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "a", 0, 1, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "b", 0, 1, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "a", 8, 1, &out));
   EXPECT_EQ(calls, 3);
   // Advance the fake timestamp so that "a" becomes stale via its first block.
   env->SetNowSeconds(now + 2);
@@ -389,7 +413,7 @@ TEST(FileBlockCacheTest, Prune) {
   // There should be one block left in the cache, and it should be the first
   // block of "b".
   EXPECT_EQ(cache.CacheSize(), 8);
-  TF_EXPECT_OK(cache.Read("b", 0, 1, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "b", 0, 1, &out));
   EXPECT_EQ(calls, 3);
   // Advance the fake time to `now` + 3, at which point "b" becomes stale.
   env->SetNowSeconds(now + 3);
@@ -409,14 +433,14 @@ TEST(FileBlockCacheTest, ParallelReads) {
   const int callers = 4;
   BlockingCounter counter(callers);
   auto fetcher = [&counter](const string& filename, size_t offset, size_t n,
-                            std::vector<char>* out) {
+                            char* buffer, size_t* bytes_transferred) {
     counter.DecrementCount();
     if (!counter.WaitFor(std::chrono::seconds(10))) {
       // This avoids having the test time out, which is harder to debug.
       return errors::FailedPrecondition("desired concurrency not reached");
     }
-    out->clear();
-    out->resize(n, 'x');
+    memset(buffer, 'x', n);
+    *bytes_transferred = n;
     return Status::OK();
   };
   const int block_size = 8;
@@ -426,7 +450,8 @@ TEST(FileBlockCacheTest, ParallelReads) {
     threads.emplace_back(
         Env::Default()->StartThread({}, "caller", [&cache, i, block_size]() {
           std::vector<char> out;
-          TF_EXPECT_OK(cache.Read("a", i * block_size, block_size, &out));
+          TF_EXPECT_OK(
+              ReadCache(&cache, "a", i * block_size, block_size, &out));
           std::vector<char> x(block_size, 'x');
           EXPECT_EQ(out, x);
         }));
@@ -443,11 +468,12 @@ TEST(FileBlockCacheTest, CoalesceConcurrentReads) {
   Notification notification;
   auto fetcher = [&num_requests, &notification, block_size](
                      const string& filename, size_t offset, size_t n,
-                     std::vector<char>* out) {
+                     char* buffer, size_t* bytes_transferred) {
     EXPECT_EQ(n, block_size);
     EXPECT_EQ(offset, 0);
     num_requests++;
-    out->resize(n, 'x');
+    memset(buffer, 'x', n);
+    *bytes_transferred = n;
     notification.Notify();
     // Wait for other thread to issue read.
     Env::Default()->SleepForMicroseconds(100000);  // 0.1 secs
@@ -456,19 +482,38 @@ TEST(FileBlockCacheTest, CoalesceConcurrentReads) {
   FileBlockCache cache(block_size, block_size, 0, fetcher);
   // Fork off thread for parallel read.
   std::unique_ptr<Thread> concurrent(
-      Env::Default()->StartThread({}, "concurrent", [&cache] {
+      Env::Default()->StartThread({}, "concurrent", [&cache, block_size] {
         std::vector<char> out;
-        TF_EXPECT_OK(cache.Read("", 0, block_size / 2, &out));
+        TF_EXPECT_OK(ReadCache(&cache, "", 0, block_size / 2, &out));
         EXPECT_EQ(out.size(), block_size / 2);
       }));
   EXPECT_TRUE(WaitForNotificationWithTimeout(&notification, 10000))
       << "Timeout waiting for concurrent thread to start.";
   std::vector<char> out;
-  TF_EXPECT_OK(cache.Read("", block_size / 2, block_size / 2, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "", block_size / 2, block_size / 2, &out));
   EXPECT_EQ(out.size(), block_size / 2);
 
   EXPECT_EQ(1, num_requests);
 }
 
+TEST(FileBlockCacheTest, Flush) {
+  int calls = 0;
+  auto fetcher = [&calls](const string& filename, size_t offset, size_t n,
+                          char* buffer, size_t* bytes_transferred) {
+    calls++;
+    memset(buffer, 'x', n);
+    *bytes_transferred = n;
+    return Status::OK();
+  };
+  FileBlockCache cache(16, 32, 0, fetcher);
+  std::vector<char> out;
+  TF_EXPECT_OK(ReadCache(&cache, "", 0, 16, &out));
+  TF_EXPECT_OK(ReadCache(&cache, "", 0, 16, &out));
+  EXPECT_EQ(calls, 1);
+  cache.Flush();
+  TF_EXPECT_OK(ReadCache(&cache, "", 0, 16, &out));
+  EXPECT_EQ(calls, 2);
+}
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/cloud/gcs_dns_cache.cc b/tensorflow/core/platform/cloud/gcs_dns_cache.cc
index 63f2da065db9c85eaac0f6ae1f64a079440a9eaf..2b0e55bf371da9660f1422cef97e3ec1a25a9b61 100644
--- a/tensorflow/core/platform/cloud/gcs_dns_cache.cc
+++ b/tensorflow/core/platform/cloud/gcs_dns_cache.cc
@@ -14,60 +14,86 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/platform/cloud/gcs_dns_cache.h"
-
+#ifndef _WIN32
 #include <arpa/inet.h>
 #include <netdb.h>
+#else
+#include <winsock2.h>
+#include <ws2tcpip.h>
+#include <Windows.h>
+#endif
 #include <sys/types.h>
 
 namespace tensorflow {
 
 namespace {
 
-constexpr char kStorageHost[] = "storage.googleapis.com";
-constexpr char kWwwHost[] = "www.googleapis.com";
+const std::vector<string>& kCachedDomainNames =
+    *new std::vector<string>{"www.googleapis.com", "storage.googleapis.com"};
+
+inline void print_getaddrinfo_error(const string& name, int error_code) {
+#ifndef _WIN32
+  if (error_code == EAI_SYSTEM) {
+    LOG(ERROR) << "Error resolving " << name
+               << " (EAI_SYSTEM): " << strerror(errno);
+  } else {
+    LOG(ERROR) << "Error resolving " << name << ": "
+               << gai_strerror(error_code);
+  }
+#else
+  // TODO:WSAGetLastError is better than gai_strerror
+  LOG(ERROR) << "Error resolving " << name << ": " << gai_strerror(error_code);
+#endif
+}
 
+// Selects one item at random from a vector of items, using a uniform
+// distribution.
+template <typename T>
+const T& SelectRandomItemUniform(std::default_random_engine* random,
+                                 const std::vector<T>& items) {
+  CHECK_GT(items.size(), 0);
+  std::uniform_int_distribution<size_t> distribution(0u, items.size() - 1u);
+  size_t choice_index = distribution(*random);
+  return items[choice_index];
+}
 }  // namespace
 
 GcsDnsCache::GcsDnsCache(Env* env, int64 refresh_rate_secs)
     : env_(env), refresh_rate_secs_(refresh_rate_secs) {}
 
-Status GcsDnsCache::AnnotateRequest(HttpRequest* request) {
+void GcsDnsCache::AnnotateRequest(HttpRequest* request) {
   // TODO(saeta): Blacklist failing IP addresses.
   mutex_lock l(mu_);
   if (!started_) {
+    VLOG(1) << "Starting GCS DNS cache.";
     DCHECK(!worker_) << "Worker thread already exists!";
     // Perform DNS resolutions to warm the cache.
-    std::vector<string> www_addresses = ResolveName(kWwwHost);
-    std::vector<string> storage_addresses = ResolveName(kStorageHost);
-    www_addresses.swap(www_addresses_);
-    storage_addresses.swap(storage_addresses_);
+    addresses_ = ResolveNames(kCachedDomainNames);
 
     // Note: we opt to use a thread instead of a delayed closure.
     worker_.reset(env_->StartThread(
         {}, "gcs_dns_worker", std::bind(&GcsDnsCache::WorkerThread, this)));
     started_ = true;
   }
-  if (!storage_addresses_.empty()) {
-    std::uniform_int_distribution<> storage_dist(0,
-                                                 storage_addresses_.size() - 1);
-    size_t index = storage_dist(random_);
-    TF_RETURN_IF_ERROR(request->AddResolveOverride(kStorageHost, 443,
-                                                   storage_addresses_[index]));
-  } else {
-    LOG(WARNING) << "No IP addresses available for " << kStorageHost;
-  }
-  if (!www_addresses_.empty()) {
-    std::uniform_int_distribution<> www_dist(0, www_addresses_.size() - 1);
-    size_t index = www_dist(random_);
-    TF_RETURN_IF_ERROR(
-        request->AddResolveOverride(kWwwHost, 443, www_addresses_[index]));
-  } else {
-    LOG(WARNING) << "No IP addresses available for " << kWwwHost;
+
+  CHECK_EQ(kCachedDomainNames.size(), addresses_.size());
+  for (size_t i = 0; i < kCachedDomainNames.size(); ++i) {
+    const string& name = kCachedDomainNames[i];
+    const std::vector<string>& addresses = addresses_[i];
+    if (!addresses.empty()) {
+      const string& chosen_address =
+          SelectRandomItemUniform(&random_, addresses);
+      request->AddResolveOverride(name, 443, chosen_address);
+      VLOG(1) << "Annotated DNS mapping: " << name << " --> " << chosen_address;
+    } else {
+      LOG(WARNING) << "No IP addresses available for " << name;
+    }
   }
-  return Status::OK();
 }
 
 /* static */ std::vector<string> GcsDnsCache::ResolveName(const string& name) {
+  VLOG(1) << "Resolving DNS name: " << name;
+
   addrinfo hints;
   memset(&hints, 0, sizeof(hints));
   hints.ai_family = AF_INET;  // Only use IPv4 for now.
@@ -77,7 +103,7 @@ Status GcsDnsCache::AnnotateRequest(HttpRequest* request) {
 
   std::vector<string> output;
   if (return_code == 0) {
-    for (addrinfo* i = result; i != nullptr; i = i->ai_next) {
+    for (const addrinfo* i = result; i != nullptr; i = i->ai_next) {
       if (i->ai_family != AF_INET || i->ai_addr->sa_family != AF_INET) {
         LOG(WARNING) << "Non-IPv4 address returned. ai_family: " << i->ai_family
                      << ". sa_family: " << i->ai_addr->sa_family << ".";
@@ -93,16 +119,11 @@ Status GcsDnsCache::AnnotateRequest(HttpRequest* request) {
                    << ": " << strerror(errno);
       } else {
         output.emplace_back(buf);
+        VLOG(1) << "... address: " << buf;
       }
     }
   } else {
-    if (return_code == EAI_SYSTEM) {
-      LOG(ERROR) << "Error resolving " << name
-                 << " (EAI_SYSTEM): " << strerror(errno);
-    } else {
-      LOG(ERROR) << "Error resolving " << name << ": "
-                 << gai_strerror(return_code);
-    }
+    print_getaddrinfo_error(name, return_code);
   }
   if (result != nullptr) {
     freeaddrinfo(result);
@@ -110,6 +131,25 @@ Status GcsDnsCache::AnnotateRequest(HttpRequest* request) {
   return output;
 }
 
+// Performs DNS resolution for a set of DNS names. The return vector contains
+// one element for each element in 'names', and each element is itself a
+// vector of IP addresses (in textual form).
+//
+// If DNS resolution fails for any name, then that slot in the return vector
+// will still be present, but will be an empty vector.
+//
+// Ensures: names.size() == return_value.size()
+
+std::vector<std::vector<string>> GcsDnsCache::ResolveNames(
+    const std::vector<string>& names) {
+  std::vector<std::vector<string>> all_addresses;
+  all_addresses.reserve(names.size());
+  for (const string& name : names) {
+    all_addresses.push_back(ResolveName(name));
+  }
+  return all_addresses;
+}
+
 void GcsDnsCache::WorkerThread() {
   while (true) {
     {
@@ -119,15 +159,14 @@ void GcsDnsCache::WorkerThread() {
       cond_var_.wait_for(l, std::chrono::seconds(refresh_rate_secs_));
       if (cancelled_) return;
     }
+
     // Resolve DNS values
-    std::vector<string> www_addresses = ResolveName(kWwwHost);
-    std::vector<string> storage_addresses = ResolveName(kStorageHost);
+    auto new_addresses = ResolveNames(kCachedDomainNames);
 
     {
       mutex_lock l(mu_);
       // Update instance variables.
-      www_addresses.swap(www_addresses_);
-      storage_addresses.swap(storage_addresses_);
+      addresses_.swap(new_addresses);
     }
   }
 }
diff --git a/tensorflow/core/platform/cloud/gcs_dns_cache.h b/tensorflow/core/platform/cloud/gcs_dns_cache.h
index 7a4d3847a5ac82b1ced742a20ca18ba84bf6fa7c..dd95c18f35053faa500c78da8362fd7691694f84 100644
--- a/tensorflow/core/platform/cloud/gcs_dns_cache.h
+++ b/tensorflow/core/platform/cloud/gcs_dns_cache.h
@@ -48,10 +48,12 @@ class GcsDnsCache {
   }
 
   // Annotate the given HttpRequest with resolve overrides from the cache.
-  Status AnnotateRequest(HttpRequest* request);
+  void AnnotateRequest(HttpRequest* request);
 
  private:
   static std::vector<string> ResolveName(const string& name);
+  static std::vector<std::vector<string>> ResolveNames(
+      const std::vector<string>& names);
   void WorkerThread();
 
   // Define a friend class for testing.
@@ -63,10 +65,11 @@ class GcsDnsCache {
   std::default_random_engine random_ GUARDED_BY(mu_);
   bool started_ GUARDED_BY(mu_) = false;
   bool cancelled_ GUARDED_BY(mu_) = false;
-  std::vector<string> www_addresses_ GUARDED_BY(mu_);
-  std::vector<string> storage_addresses_ GUARDED_BY(mu_);
   std::unique_ptr<Thread> worker_ GUARDED_BY(mu_);  // After mutable vars.
   const int64 refresh_rate_secs_;
+
+  // Entries in this vector correspond to entries in kCachedDomainNames.
+  std::vector<std::vector<string>> addresses_ GUARDED_BY(mu_);
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc b/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc
index 8d1a108f30dd0461a1cd08dd217badbdf24fc400..8be452ff44d03bf3a8a66b99b0e65f98da537d5f 100644
--- a/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc
+++ b/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc
@@ -21,14 +21,11 @@ namespace tensorflow {
 
 class TestHttpRequest : public HttpRequest {
  public:
-  Status Init() override { return Status::OK(); }
-  Status SetUri(const string& uri) override { return Status::OK(); }
-  Status SetRange(uint64 start, uint64 end) override { return Status::OK(); }
-  Status AddHeader(const string& name, const string& value) override {
-    return Status::OK();
-  }
-  Status AddResolveOverride(const string& hostname, int64 port,
-                            const string& ip_addr) override {
+  void SetUri(const string& uri) override {}
+  void SetRange(uint64 start, uint64 end) override {}
+  void AddHeader(const string& name, const string& value) override {}
+  void AddResolveOverride(const string& hostname, int64 port,
+                          const string& ip_addr) override {
     EXPECT_EQ(port, 443) << "Unexpected port set for hostname: " << hostname;
     auto itr = resolve_overrides_.find(hostname);
     EXPECT_EQ(itr, resolve_overrides_.end())
@@ -36,34 +33,30 @@ class TestHttpRequest : public HttpRequest {
 
     resolve_overrides_.insert(
         std::map<string, string>::value_type(hostname, ip_addr));
-    return Status::OK();
   }
 
-  Status AddAuthBearerHeader(const string& auth_token) override {
-    return Status::OK();
-  }
+  void AddAuthBearerHeader(const string& auth_token) override {}
 
-  Status SetDeleteRequest() override { return Status::OK(); }
+  void SetDeleteRequest() override {}
 
   Status SetPutFromFile(const string& body_filepath, size_t offset) override {
     return Status::OK();
   }
-  Status SetPutEmptyBody() override { return Status::OK(); }
-
-  Status SetPostFromBuffer(const char* buffer, size_t size) override {
-    return Status::OK();
-  }
-  Status SetPostEmptyBody() override { return Status::OK(); }
-
-  Status SetResultBuffer(std::vector<char>* out_buffer) override {
-    return Status::OK();
-  }
+  void SetPutEmptyBody() override {}
+  void SetPostFromBuffer(const char* buffer, size_t size) override {}
+  void SetPostEmptyBody() override {}
+  void SetResultBuffer(std::vector<char>* out_buffer) override {}
+  void SetResultBufferDirect(char* buffer, size_t size) override {}
+  size_t GetResultBufferDirectBytesTransferred() override { return 0; }
 
   string GetResponseHeader(const string& name) const override { return ""; }
   uint64 GetResponseCode() const override { return 0; }
   Status Send() override { return Status::OK(); }
   string EscapeString(const string& str) override { return ""; }
 
+  void SetTimeouts(uint32 connection, uint32 inactivity,
+                   uint32 total) override {}
+
   std::map<string, string> resolve_overrides_;
 };
 
@@ -83,13 +76,11 @@ class GcsDnsCacheTest : public ::testing::Test {
     {
       mutex_lock l(d.mu_);
       d.started_ = true;  // Avoid creating a thread.
-      d.www_addresses_ = {"192.168.1.1"};
-      d.storage_addresses_ = {"172.134.1.1"};
+      d.addresses_ = {{"192.168.1.1"}, {"172.134.1.1"}};
     }
 
     TestHttpRequest req;
-    Status s = d.AnnotateRequest(&req);
-    EXPECT_TRUE(s.ok()) << s;
+    d.AnnotateRequest(&req);
     EXPECT_EQ("192.168.1.1", req.resolve_overrides_["www.googleapis.com"]);
     EXPECT_EQ("172.134.1.1", req.resolve_overrides_["storage.googleapis.com"]);
   }
@@ -99,8 +90,7 @@ class GcsDnsCacheTest : public ::testing::Test {
     // a timely manner.
     GcsDnsCache d;
     TestHttpRequest req;
-    Status s = d.AnnotateRequest(&req);
-    EXPECT_TRUE(s.ok()) << s;
+    d.AnnotateRequest(&req);
   }
 };
 
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc
index 54d38fe962bf90e55d8b138e2e734b994b642395..4b30291076d722973bb12a26a12f60ab2c1d40f7 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system.cc
@@ -22,6 +22,9 @@ limitations under the License.
 #include <cstring>
 #include <fstream>
 #include <vector>
+#ifdef _WIN32
+#include <io.h>  // for _mktemp
+#endif
 #include "include/json/json.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
@@ -40,6 +43,12 @@ limitations under the License.
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/thread_annotations.h"
 
+#ifdef _WIN32
+#ifdef DeleteFile
+#undef DeleteFile
+#endif
+#endif
+
 namespace tensorflow {
 
 namespace {
@@ -94,17 +103,40 @@ const FileStatistics DIRECTORY_STAT(0, 0, true);
 // variable to a positive integer describing the frequency used to refresh the
 // userspace DNS cache.
 constexpr char kResolveCacheSecs[] = "GCS_RESOLVE_REFRESH_SECS";
-
+// The environment variable to configure the http request's connection timeout.
+constexpr char kRequestConnectionTimeout[] =
+    "GCS_REQUEST_CONNECTION_TIMEOUT_SECS";
+// The environment varaible to configure the http request's idle timeout.
+constexpr char kRequestIdleTimeout[] = "GCS_REQUEST_IDLE_TIMEOUT_SECS";
+// The environment variable to configure the overall request timeout for
+// metadata requests.
+constexpr char kMetadataRequestTimeout[] = "GCS_METADATA_REQUEST_TIMEOUT_SECS";
+// The environment variable to configure the overall request timeout for
+// block reads requests.
+constexpr char kReadRequestTimeout[] = "GCS_READ_REQUEST_TIMEOUT_SECS";
+// The environment variable to configure the overall request timeout for
+// upload requests.
+constexpr char kWriteRequestTimeout[] = "GCS_WRITE_REQUEST_TIMEOUT_SECS";
+
+// TODO: DO NOT use a hardcoded path
 Status GetTmpFilename(string* filename) {
   if (!filename) {
     return errors::Internal("'filename' cannot be nullptr.");
   }
+#ifndef _WIN32
   char buffer[] = "/tmp/gcs_filesystem_XXXXXX";
   int fd = mkstemp(buffer);
   if (fd < 0) {
     return errors::Internal("Failed to create a temporary file.");
   }
   close(fd);
+#else
+  char buffer[] = "/tmp/gcs_filesystem_XXXXXX";
+  char* ret = _mktemp(buffer);
+  if (ret == nullptr) {
+    return errors::Internal("Failed to create a temporary file.");
+  }
+#endif
   *filename = buffer;
   return Status::OK();
 }
@@ -180,17 +212,21 @@ std::set<string> AddAllSubpaths(const std::vector<string>& paths) {
 
 Status ParseJson(StringPiece json, Json::Value* result) {
   Json::Reader reader;
-  if (!reader.parse(json.ToString(), *result)) {
+  if (!reader.parse(json.data(), json.data() + json.size(), *result)) {
     return errors::Internal("Couldn't parse JSON response from GCS.");
   }
   return Status::OK();
 }
 
+Status ParseJson(const std::vector<char>& json, Json::Value* result) {
+  return ParseJson(StringPiece{json.data(), json.size()}, result);
+}
+
 /// Reads a JSON value with the given name from a parent JSON value.
-Status GetValue(const Json::Value& parent, const string& name,
+Status GetValue(const Json::Value& parent, const char* name,
                 Json::Value* result) {
   *result = parent.get(name, Json::Value::null);
-  if (*result == Json::Value::null) {
+  if (result->isNull()) {
     return errors::Internal("The field '", name,
                             "' was expected in the JSON response.");
   }
@@ -198,7 +234,7 @@ Status GetValue(const Json::Value& parent, const string& name,
 }
 
 /// Reads a string JSON value with the given name from a parent JSON value.
-Status GetStringValue(const Json::Value& parent, const string& name,
+Status GetStringValue(const Json::Value& parent, const char* name,
                       string* result) {
   Json::Value result_value;
   TF_RETURN_IF_ERROR(GetValue(parent, name, &result_value));
@@ -212,7 +248,7 @@ Status GetStringValue(const Json::Value& parent, const string& name,
 }
 
 /// Reads a long JSON value with the given name from a parent JSON value.
-Status GetInt64Value(const Json::Value& parent, const string& name,
+Status GetInt64Value(const Json::Value& parent, const char* name,
                      int64* result) {
   Json::Value result_value;
   TF_RETURN_IF_ERROR(GetValue(parent, name, &result_value));
@@ -221,7 +257,7 @@ Status GetInt64Value(const Json::Value& parent, const string& name,
     return Status::OK();
   }
   if (result_value.isString() &&
-      strings::safe_strto64(result_value.asString().c_str(), result)) {
+      strings::safe_strto64(result_value.asCString(), result)) {
     return Status::OK();
   }
   return errors::Internal(
@@ -230,8 +266,7 @@ Status GetInt64Value(const Json::Value& parent, const string& name,
 }
 
 /// Reads a boolean JSON value with the given name from a parent JSON value.
-Status GetBoolValue(const Json::Value& parent, const string& name,
-                    bool* result) {
+Status GetBoolValue(const Json::Value& parent, const char* name, bool* result) {
   Json::Value result_value;
   TF_RETURN_IF_ERROR(GetValue(parent, name, &result_value));
   if (!result_value.isBool()) {
@@ -253,11 +288,11 @@ class GcsRandomAccessFile : public RandomAccessFile {
   Status Read(uint64 offset, size_t n, StringPiece* result,
               char* scratch) const override {
     *result = StringPiece();
-    std::vector<char> out;
-    TF_RETURN_IF_ERROR(file_block_cache_->Read(filename_, offset, n, &out));
-    std::memcpy(scratch, out.data(), std::min(out.size(), n));
-    *result = StringPiece(scratch, std::min(out.size(), n));
-    if (result->size() < n) {
+    size_t bytes_transferred;
+    TF_RETURN_IF_ERROR(file_block_cache_->Read(filename_, offset, n, scratch,
+                                               &bytes_transferred));
+    *result = StringPiece(scratch, bytes_transferred);
+    if (bytes_transferred < n) {
       // This is not an error per se. The RandomAccessFile interface expects
       // that Read returns OutOfRange if fewer bytes were read than requested.
       return errors::OutOfRange("EOF reached, ", result->size(),
@@ -281,17 +316,18 @@ class GcsRandomAccessFile : public RandomAccessFile {
 class GcsWritableFile : public WritableFile {
  public:
   GcsWritableFile(const string& bucket, const string& object,
-                  AuthProvider* auth_provider,
-                  HttpRequest::Factory* http_request_factory,
+                  GcsFileSystem* filesystem,
+                  GcsFileSystem::TimeoutConfig* timeouts,
                   std::function<void()> file_cache_erase,
                   int64 initial_retry_delay_usec)
       : bucket_(bucket),
         object_(object),
-        auth_provider_(auth_provider),
-        http_request_factory_(http_request_factory),
+        filesystem_(filesystem),
+        timeouts_(timeouts),
         file_cache_erase_(std::move(file_cache_erase)),
         sync_needed_(true),
         initial_retry_delay_usec_(initial_retry_delay_usec) {
+    // TODO: to make it safer, outfile_ should be constructed from an FD
     if (GetTmpFilename(&tmp_content_filename_).ok()) {
       outfile_.open(tmp_content_filename_,
                     std::ofstream::binary | std::ofstream::app);
@@ -304,15 +340,14 @@ class GcsWritableFile : public WritableFile {
   /// with the content to be appended. The class takes onwnership of the
   /// specified tmp file and deletes it on close.
   GcsWritableFile(const string& bucket, const string& object,
-                  AuthProvider* auth_provider,
-                  const string& tmp_content_filename,
-                  HttpRequest::Factory* http_request_factory,
+                  GcsFileSystem* filesystem, const string& tmp_content_filename,
+                  GcsFileSystem::TimeoutConfig* timeouts,
                   std::function<void()> file_cache_erase,
                   int64 initial_retry_delay_usec)
       : bucket_(bucket),
         object_(object),
-        auth_provider_(auth_provider),
-        http_request_factory_(http_request_factory),
+        filesystem_(filesystem),
+        timeouts_(timeouts),
         file_cache_erase_(std::move(file_cache_erase)),
         sync_needed_(true),
         initial_retry_delay_usec_(initial_retry_delay_usec) {
@@ -416,7 +451,7 @@ class GcsWritableFile : public WritableFile {
       return errors::Internal("'size' cannot be nullptr");
     }
     const auto tellp = outfile_.tellp();
-    if (tellp == -1) {
+    if (tellp == static_cast<std::streampos>(-1)) {
       return errors::Internal(
           "Could not get the size of the internal temporary file.");
     }
@@ -432,20 +467,18 @@ class GcsWritableFile : public WritableFile {
     uint64 file_size;
     TF_RETURN_IF_ERROR(GetCurrentFileSize(&file_size));
 
-    string auth_token;
-    TF_RETURN_IF_ERROR(AuthProvider::GetToken(auth_provider_, &auth_token));
-
     std::vector<char> output_buffer;
-    std::unique_ptr<HttpRequest> request(http_request_factory_->Create());
-    TF_RETURN_IF_ERROR(request->Init());
-    TF_RETURN_IF_ERROR(request->SetUri(strings::StrCat(
+    std::unique_ptr<HttpRequest> request;
+    TF_RETURN_IF_ERROR(filesystem_->CreateHttpRequest(&request));
+
+    request->SetUri(strings::StrCat(
         kGcsUploadUriBase, "b/", bucket_,
-        "/o?uploadType=resumable&name=", request->EscapeString(object_))));
-    TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
-    TF_RETURN_IF_ERROR(request->AddHeader("X-Upload-Content-Length",
-                                          std::to_string(file_size)));
-    TF_RETURN_IF_ERROR(request->SetPostEmptyBody());
-    TF_RETURN_IF_ERROR(request->SetResultBuffer(&output_buffer));
+        "/o?uploadType=resumable&name=", request->EscapeString(object_)));
+    request->AddHeader("X-Upload-Content-Length", std::to_string(file_size));
+    request->SetPostEmptyBody();
+    request->SetResultBuffer(&output_buffer);
+    request->SetTimeouts(timeouts_->connect, timeouts_->idle,
+                         timeouts_->metadata);
     TF_RETURN_WITH_CONTEXT_IF_ERROR(
         request->Send(), " when initiating an upload to ", GetGcsPath());
     *session_uri = request->GetResponseHeader("Location");
@@ -470,16 +503,13 @@ class GcsWritableFile : public WritableFile {
     uint64 file_size;
     TF_RETURN_IF_ERROR(GetCurrentFileSize(&file_size));
 
-    string auth_token;
-    TF_RETURN_IF_ERROR(AuthProvider::GetToken(auth_provider_, &auth_token));
-
-    std::unique_ptr<HttpRequest> request(http_request_factory_->Create());
-    TF_RETURN_IF_ERROR(request->Init());
-    TF_RETURN_IF_ERROR(request->SetUri(session_uri));
-    TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
-    TF_RETURN_IF_ERROR(request->AddHeader(
-        "Content-Range", strings::StrCat("bytes */", file_size)));
-    TF_RETURN_IF_ERROR(request->SetPutEmptyBody());
+    std::unique_ptr<HttpRequest> request;
+    TF_RETURN_IF_ERROR(filesystem_->CreateHttpRequest(&request));
+    request->SetUri(session_uri);
+    request->SetTimeouts(timeouts_->connect, timeouts_->idle,
+                         timeouts_->metadata);
+    request->AddHeader("Content-Range", strings::StrCat("bytes */", file_size));
+    request->SetPutEmptyBody();
     const Status& status = request->Send();
     if (status.ok()) {
       *completed = true;
@@ -519,18 +549,16 @@ class GcsWritableFile : public WritableFile {
     uint64 file_size;
     TF_RETURN_IF_ERROR(GetCurrentFileSize(&file_size));
 
-    string auth_token;
-    TF_RETURN_IF_ERROR(AuthProvider::GetToken(auth_provider_, &auth_token));
-
-    std::unique_ptr<HttpRequest> request(http_request_factory_->Create());
-    TF_RETURN_IF_ERROR(request->Init());
-    TF_RETURN_IF_ERROR(request->SetUri(session_uri));
-    TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
+    std::unique_ptr<HttpRequest> request;
+    TF_RETURN_IF_ERROR(filesystem_->CreateHttpRequest(&request));
+    request->SetUri(session_uri);
     if (file_size > 0) {
-      TF_RETURN_IF_ERROR(request->AddHeader(
-          "Content-Range", strings::StrCat("bytes ", start_offset, "-",
-                                           file_size - 1, "/", file_size)));
+      request->AddHeader("Content-Range",
+                         strings::StrCat("bytes ", start_offset, "-",
+                                         file_size - 1, "/", file_size));
     }
+    request->SetTimeouts(timeouts_->connect, timeouts_->idle, timeouts_->write);
+
     TF_RETURN_IF_ERROR(
         request->SetPutFromFile(tmp_content_filename_, start_offset));
     TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when uploading ",
@@ -546,10 +574,10 @@ class GcsWritableFile : public WritableFile {
 
   string bucket_;
   string object_;
-  AuthProvider* auth_provider_;
+  GcsFileSystem* const filesystem_;  // Not owned.
   string tmp_content_filename_;
   std::ofstream outfile_;
-  HttpRequest::Factory* http_request_factory_;
+  GcsFileSystem::TimeoutConfig* timeouts_;
   std::function<void()> file_cache_erase_;
   bool sync_needed_;  // whether there is buffered data that needs to be synced
   int64 initial_retry_delay_usec_;
@@ -634,6 +662,30 @@ GcsFileSystem::GcsFileSystem()
   if (GetEnvVar(kResolveCacheSecs, strings::safe_strto64,
                 &resolve_frequency_secs)) {
     dns_cache_.reset(new GcsDnsCache(resolve_frequency_secs));
+    VLOG(1) << "GCS DNS cache is enabled.  " << kResolveCacheSecs << " = "
+            << resolve_frequency_secs;
+  } else {
+    VLOG(1) << "GCS DNS cache is disabled, because " << kResolveCacheSecs
+            << " = 0 (or is not set)";
+  }
+  // Apply the overrides for request timeouts
+  uint32 timeout_value;
+  if (GetEnvVar(kRequestConnectionTimeout, strings::safe_strtou32,
+                &timeout_value)) {
+    timeouts_.connect = timeout_value;
+  }
+  if (GetEnvVar(kRequestIdleTimeout, strings::safe_strtou32, &timeout_value)) {
+    timeouts_.idle = timeout_value;
+  }
+  if (GetEnvVar(kMetadataRequestTimeout, strings::safe_strtou32,
+                &timeout_value)) {
+    timeouts_.metadata = timeout_value;
+  }
+  if (GetEnvVar(kReadRequestTimeout, strings::safe_strtou32, &timeout_value)) {
+    timeouts_.read = timeout_value;
+  }
+  if (GetEnvVar(kWriteRequestTimeout, strings::safe_strtou32, &timeout_value)) {
+    timeouts_.write = timeout_value;
   }
 }
 
@@ -643,7 +695,8 @@ GcsFileSystem::GcsFileSystem(
     size_t block_size, size_t max_bytes, uint64 max_staleness,
     uint64 stat_cache_max_age, size_t stat_cache_max_entries,
     uint64 matching_paths_cache_max_age,
-    size_t matching_paths_cache_max_entries, int64 initial_retry_delay_usec)
+    size_t matching_paths_cache_max_entries, int64 initial_retry_delay_usec,
+    TimeoutConfig timeouts)
     : auth_provider_(std::move(auth_provider)),
       http_request_factory_(std::move(http_request_factory)),
       file_block_cache_(
@@ -651,6 +704,7 @@ GcsFileSystem::GcsFileSystem(
       stat_cache_(new StatCache(stat_cache_max_age, stat_cache_max_entries)),
       matching_paths_cache_(new MatchingPathsCache(
           matching_paths_cache_max_age, matching_paths_cache_max_entries)),
+      timeouts_(timeouts),
       initial_retry_delay_usec_(initial_retry_delay_usec) {}
 
 Status GcsFileSystem::NewRandomAccessFile(
@@ -667,45 +721,49 @@ std::unique_ptr<FileBlockCache> GcsFileSystem::MakeFileBlockCache(
   std::unique_ptr<FileBlockCache> file_block_cache(
       new FileBlockCache(block_size, max_bytes, max_staleness,
                          [this](const string& filename, size_t offset, size_t n,
-                                std::vector<char>* out) {
-                           return LoadBufferFromGCS(filename, offset, n, out);
+                                char* buffer, size_t* bytes_transferred) {
+                           return LoadBufferFromGCS(filename, offset, n, buffer,
+                                                    bytes_transferred);
                          }));
   return file_block_cache;
 }
 
 // A helper function to actually read the data from GCS.
 Status GcsFileSystem::LoadBufferFromGCS(const string& filename, size_t offset,
-                                        size_t n, std::vector<char>* out) {
+                                        size_t n, char* buffer,
+                                        size_t* bytes_transferred) {
+  *bytes_transferred = 0;
+
   string bucket, object;
   TF_RETURN_IF_ERROR(ParseGcsPath(filename, false, &bucket, &object));
-  string auth_token;
-  TF_RETURN_IF_ERROR(AuthProvider::GetToken(auth_provider_.get(), &auth_token));
 
-  std::unique_ptr<HttpRequest> request(http_request_factory_->Create());
-  TF_RETURN_IF_ERROR(request->Init());
-  TF_RETURN_IF_ERROR(
-      request->SetUri(strings::StrCat("https://", kStorageHost, "/", bucket,
-                                      "/", request->EscapeString(object))));
-  TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
-  TF_RETURN_IF_ERROR(request->SetRange(offset, offset + n - 1));
-  TF_RETURN_IF_ERROR(request->SetResultBuffer(out));
-
-  if (dns_cache_) {
-    TF_RETURN_IF_ERROR(dns_cache_->AnnotateRequest(request.get()));
-  }
+  std::unique_ptr<HttpRequest> request;
+  TF_RETURN_IF_ERROR(CreateHttpRequest(&request));
+  request->SetUri(strings::StrCat("https://", kStorageHost, "/", bucket, "/",
+                                  request->EscapeString(object)));
+  request->SetRange(offset, offset + n - 1);
+  request->SetResultBufferDirect(buffer, n);
+  request->SetTimeouts(timeouts_.connect, timeouts_.idle, timeouts_.read);
 
   TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when reading gs://",
                                   bucket, "/", object);
 
-  if (out->size() < block_size()) {
+  size_t bytes_read = request->GetResultBufferDirectBytesTransferred();
+  *bytes_transferred = bytes_read;
+  VLOG(1) << "Successful read of gs://" << bucket << "/" << object << " @ "
+          << offset << " of size: " << bytes_read;
+
+  if (bytes_read < block_size()) {
     // Check stat cache to see if we encountered an interrupted read.
     FileStatistics stat;
     if (stat_cache_->Lookup(filename, &stat)) {
-      if (offset + out->size() < stat.length) {
+      if (offset + bytes_read < stat.length) {
         return errors::Internal(strings::Printf(
             "File contents are inconsistent for file: %s @ %lu.",
             filename.c_str(), offset));
       }
+      VLOG(2) << "Successful integrity check for: gs://" << bucket << "/"
+              << object << " @ " << offset;
     }
   }
 
@@ -717,7 +775,7 @@ Status GcsFileSystem::NewWritableFile(const string& fname,
   string bucket, object;
   TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object));
   result->reset(new GcsWritableFile(
-      bucket, object, auth_provider_.get(), http_request_factory_.get(),
+      bucket, object, this, &timeouts_,
       [this, fname]() { file_block_cache_->RemoveFile(fname); },
       initial_retry_delay_usec_));
   return Status::OK();
@@ -758,8 +816,7 @@ Status GcsFileSystem::NewAppendableFile(const string& fname,
   string bucket, object;
   TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object));
   result->reset(new GcsWritableFile(
-      bucket, object, auth_provider_.get(), old_content_filename,
-      http_request_factory_.get(),
+      bucket, object, this, old_content_filename, &timeouts_,
       [this, fname]() { file_block_cache_->RemoveFile(fname); },
       initial_retry_delay_usec_));
   return Status::OK();
@@ -835,39 +892,36 @@ Status GcsFileSystem::StatForObject(const string& fname, const string& bucket,
 
   StatCache::ComputeFunc compute_func =
       [this, &bucket, &object](const string& fname, FileStatistics* stat) {
-        string auth_token;
-        TF_RETURN_IF_ERROR(
-            AuthProvider::GetToken(auth_provider_.get(), &auth_token));
-
         std::vector<char> output_buffer;
-        std::unique_ptr<HttpRequest> request(http_request_factory_->Create());
-        TF_RETURN_IF_ERROR(request->Init());
-        TF_RETURN_IF_ERROR(request->SetUri(strings::StrCat(
-            kGcsUriBase, "b/", bucket, "/o/", request->EscapeString(object),
-            "?fields=size%2Cupdated")));
-        TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
-        TF_RETURN_IF_ERROR(request->SetResultBuffer(&output_buffer));
-
-        if (dns_cache_) {
-          TF_RETURN_IF_ERROR(dns_cache_->AnnotateRequest(request.get()));
-        }
+        std::unique_ptr<HttpRequest> request;
+        TF_RETURN_IF_ERROR(CreateHttpRequest(&request));
+        request->SetUri(strings::StrCat(kGcsUriBase, "b/", bucket, "/o/",
+                                        request->EscapeString(object),
+                                        "?fields=size%2Cupdated"));
+        request->SetResultBuffer(&output_buffer);
+        request->SetTimeouts(timeouts_.connect, timeouts_.idle,
+                             timeouts_.metadata);
+
         TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(),
                                         " when reading metadata of gs://",
                                         bucket, "/", object);
 
-        StringPiece response_piece =
-            StringPiece(output_buffer.data(), output_buffer.size());
         Json::Value root;
-        TF_RETURN_IF_ERROR(ParseJson(response_piece, &root));
+        TF_RETURN_IF_ERROR(ParseJson(output_buffer, &root));
 
         // Parse file size.
-        TF_RETURN_IF_ERROR(GetInt64Value(root, "size", &(stat->length)));
+        TF_RETURN_IF_ERROR(GetInt64Value(root, "size", &stat->length));
 
         // Parse file modification time.
         string updated;
         TF_RETURN_IF_ERROR(GetStringValue(root, "updated", &updated));
         TF_RETURN_IF_ERROR(ParseRfc3339Time(updated, &(stat->mtime_nsec)));
 
+        VLOG(1) << "Stat of: gs://" << bucket << "/" << object << " -- "
+                << " length: " << stat->length
+                << "; mtime_nsec: " << stat->mtime_nsec
+                << "; updated: " << updated;
+
         stat->is_directory = false;
         return Status::OK();
       };
@@ -884,14 +938,11 @@ Status GcsFileSystem::BucketExists(const string& bucket, bool* result) {
   if (!result) {
     return errors::Internal("'result' cannot be nullptr.");
   }
-  string auth_token;
-  TF_RETURN_IF_ERROR(AuthProvider::GetToken(auth_provider_.get(), &auth_token));
 
-  std::unique_ptr<HttpRequest> request(http_request_factory_->Create());
-  TF_RETURN_IF_ERROR(request->Init());
-  TF_RETURN_IF_ERROR(
-      request->SetUri(strings::StrCat(kGcsUriBase, "b/", bucket)));
-  TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
+  std::unique_ptr<HttpRequest> request;
+  TF_RETURN_IF_ERROR(CreateHttpRequest(&request));
+  request->SetUri(strings::StrCat(kGcsUriBase, "b/", bucket));
+  request->SetTimeouts(timeouts_.connect, timeouts_.idle, timeouts_.metadata);
   const Status status = request->Send();
   switch (status.code()) {
     case errors::Code::OK:
@@ -991,13 +1042,9 @@ Status GcsFileSystem::GetChildrenBounded(const string& dirname,
   string nextPageToken;
   uint64 retrieved_results = 0;
   while (true) {  // A loop over multiple result pages.
-    string auth_token;
-    TF_RETURN_IF_ERROR(
-        AuthProvider::GetToken(auth_provider_.get(), &auth_token));
-
     std::vector<char> output_buffer;
-    std::unique_ptr<HttpRequest> request(http_request_factory_->Create());
-    TF_RETURN_IF_ERROR(request->Init());
+    std::unique_ptr<HttpRequest> request;
+    TF_RETURN_IF_ERROR(CreateHttpRequest(&request));
     auto uri = strings::StrCat(kGcsUriBase, "b/", bucket, "/o");
     if (recursive) {
       uri = strings::StrCat(uri, "?fields=items%2Fname%2CnextPageToken");
@@ -1020,21 +1067,15 @@ Status GcsFileSystem::GetChildrenBounded(const string& dirname,
       uri =
           strings::StrCat(uri, "&maxResults=", max_results - retrieved_results);
     }
-    TF_RETURN_IF_ERROR(request->SetUri(uri));
-    TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
-    TF_RETURN_IF_ERROR(request->SetResultBuffer(&output_buffer));
-
-    if (dns_cache_) {
-      TF_RETURN_IF_ERROR(dns_cache_->AnnotateRequest(request.get()));
-    }
+    request->SetUri(uri);
+    request->SetResultBuffer(&output_buffer);
+    request->SetTimeouts(timeouts_.connect, timeouts_.idle, timeouts_.metadata);
 
     TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when reading ", dirname);
     Json::Value root;
-    StringPiece response_piece =
-        StringPiece(output_buffer.data(), output_buffer.size());
-    TF_RETURN_IF_ERROR(ParseJson(response_piece, &root));
+    TF_RETURN_IF_ERROR(ParseJson(output_buffer, &root));
     const auto items = root.get("items", Json::Value::null);
-    if (items != Json::Value::null) {
+    if (!items.isNull()) {
       if (!items.isArray()) {
         return errors::Internal(
             "Expected an array 'items' in the GCS response.");
@@ -1065,7 +1106,7 @@ Status GcsFileSystem::GetChildrenBounded(const string& dirname,
       }
     }
     const auto prefixes = root.get("prefixes", Json::Value::null);
-    if (prefixes != Json::Value::null) {
+    if (!prefixes.isNull()) {
       // Subfolders are returned for the non-recursive mode.
       if (!prefixes.isArray()) {
         return errors::Internal(
@@ -1073,7 +1114,7 @@ Status GcsFileSystem::GetChildrenBounded(const string& dirname,
       }
       for (size_t i = 0; i < prefixes.size(); i++) {
         const auto prefix = prefixes.get(i, Json::Value::null);
-        if (prefix == Json::Value::null || !prefix.isString()) {
+        if (prefix.isNull() || !prefix.isString()) {
           return errors::Internal(
               "'prefixes' was expected to be an array of strings in the GCS "
               "response.");
@@ -1092,7 +1133,7 @@ Status GcsFileSystem::GetChildrenBounded(const string& dirname,
       }
     }
     const auto token = root.get("nextPageToken", Json::Value::null);
-    if (token == Json::Value::null) {
+    if (token.isNull()) {
       return Status::OK();
     }
     if (!token.isString()) {
@@ -1139,15 +1180,13 @@ Status GcsFileSystem::DeleteFile(const string& fname) {
   string bucket, object;
   TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object));
 
-  string auth_token;
-  TF_RETURN_IF_ERROR(AuthProvider::GetToken(auth_provider_.get(), &auth_token));
+  std::unique_ptr<HttpRequest> request;
+  TF_RETURN_IF_ERROR(CreateHttpRequest(&request));
+  request->SetUri(strings::StrCat(kGcsUriBase, "b/", bucket, "/o/",
+                                  request->EscapeString(object)));
+  request->SetTimeouts(timeouts_.connect, timeouts_.idle, timeouts_.metadata);
+  request->SetDeleteRequest();
 
-  std::unique_ptr<HttpRequest> request(http_request_factory_->Create());
-  TF_RETURN_IF_ERROR(request->Init());
-  TF_RETURN_IF_ERROR(request->SetUri(strings::StrCat(
-      kGcsUriBase, "b/", bucket, "/o/", request->EscapeString(object))));
-  TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
-  TF_RETURN_IF_ERROR(request->SetDeleteRequest());
   TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when deleting ", fname);
   file_block_cache_->RemoveFile(fname);
   return Status::OK();
@@ -1230,28 +1269,23 @@ Status GcsFileSystem::RenameObject(const string& src, const string& target) {
   TF_RETURN_IF_ERROR(
       ParseGcsPath(target, false, &target_bucket, &target_object));
 
-  string auth_token;
-  TF_RETURN_IF_ERROR(AuthProvider::GetToken(auth_provider_.get(), &auth_token));
-
-  std::unique_ptr<HttpRequest> request(http_request_factory_->Create());
-  TF_RETURN_IF_ERROR(request->Init());
-  TF_RETURN_IF_ERROR(request->SetUri(strings::StrCat(
-      kGcsUriBase, "b/", src_bucket, "/o/", request->EscapeString(src_object),
-      "/rewriteTo/b/", target_bucket, "/o/",
-      request->EscapeString(target_object))));
-  TF_RETURN_IF_ERROR(request->AddAuthBearerHeader(auth_token));
-  TF_RETURN_IF_ERROR(request->SetPostEmptyBody());
+  std::unique_ptr<HttpRequest> request;
+  TF_RETURN_IF_ERROR(CreateHttpRequest(&request));
+  request->SetUri(strings::StrCat(kGcsUriBase, "b/", src_bucket, "/o/",
+                                  request->EscapeString(src_object),
+                                  "/rewriteTo/b/", target_bucket, "/o/",
+                                  request->EscapeString(target_object)));
+  request->SetPostEmptyBody();
+  request->SetTimeouts(timeouts_.connect, timeouts_.idle, timeouts_.metadata);
   std::vector<char> output_buffer;
-  TF_RETURN_IF_ERROR(request->SetResultBuffer(&output_buffer));
+  request->SetResultBuffer(&output_buffer);
   TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when renaming ", src,
                                   " to ", target);
   // Flush the target from the block cache.  The source will be flushed in the
   // DeleteFile call below.
   file_block_cache_->RemoveFile(target);
   Json::Value root;
-  StringPiece response_piece =
-      StringPiece(output_buffer.data(), output_buffer.size());
-  TF_RETURN_IF_ERROR(ParseJson(response_piece, &root));
+  TF_RETURN_IF_ERROR(ParseJson(output_buffer, &root));
   bool done;
   TF_RETURN_IF_ERROR(GetBoolValue(root, "done", &done));
   if (!done) {
@@ -1340,6 +1374,33 @@ Status GcsFileSystem::DeleteRecursively(const string& dirname,
   return Status::OK();
 }
 
+// Flushes all caches for filesystem metadata and file contents. Useful for
+// reclaiming memory once filesystem operations are done (e.g. model is loaded),
+// or for resetting the filesystem to a consistent state.
+void GcsFileSystem::FlushCaches() {
+  file_block_cache_->Flush();
+  stat_cache_->Clear();
+  matching_paths_cache_->Clear();
+}
+
+// Creates an HttpRequest and sets several parameters that are common to all
+// requests.  All code (in GcsFileSystem) that creates an HttpRequest should
+// go through this method, rather than directly using http_request_factory_.
+Status GcsFileSystem::CreateHttpRequest(std::unique_ptr<HttpRequest>* request) {
+  std::unique_ptr<HttpRequest> new_request{http_request_factory_->Create()};
+  if (dns_cache_) {
+    dns_cache_->AnnotateRequest(new_request.get());
+  }
+
+  string auth_token;
+  TF_RETURN_IF_ERROR(AuthProvider::GetToken(auth_provider_.get(), &auth_token));
+
+  new_request->AddAuthBearerHeader(auth_token);
+
+  *request = std::move(new_request);
+  return Status::OK();
+}
+
 REGISTER_FILE_SYSTEM("gs", RetryingGcsFileSystem);
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.h b/tensorflow/core/platform/cloud/gcs_file_system.h
index 4b4853c838abb2d2cc1a6cf68877a0dedcbcc15c..adde161a9340da61791e5c781c608caabc75d996 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.h
+++ b/tensorflow/core/platform/cloud/gcs_file_system.h
@@ -35,6 +35,8 @@ namespace tensorflow {
 /// which adds retry logic to GCS operations.
 class GcsFileSystem : public FileSystem {
  public:
+  struct TimeoutConfig;
+
   GcsFileSystem();
   GcsFileSystem(std::unique_ptr<AuthProvider> auth_provider,
                 std::unique_ptr<HttpRequest::Factory> http_request_factory,
@@ -42,7 +44,7 @@ class GcsFileSystem : public FileSystem {
                 uint64 stat_cache_max_age, size_t stat_cache_max_entries,
                 uint64 matching_paths_cache_max_age,
                 size_t matching_paths_cache_max_entries,
-                int64 initial_retry_delay_usec);
+                int64 initial_retry_delay_usec, TimeoutConfig timeouts);
 
   Status NewRandomAccessFile(
       const string& filename,
@@ -82,11 +84,14 @@ class GcsFileSystem : public FileSystem {
   Status DeleteRecursively(const string& dirname, int64* undeleted_files,
                            int64* undeleted_dirs) override;
 
+  void FlushCaches() override;
+
   /// These accessors are mainly for testing purposes, to verify that the
   /// environment variables that control these parameters are handled correctly.
   size_t block_size() const { return file_block_cache_->block_size(); }
   size_t max_bytes() const { return file_block_cache_->max_bytes(); }
   uint64 max_staleness() const { return file_block_cache_->max_staleness(); }
+  TimeoutConfig timeouts() const { return timeouts_; }
 
   uint64 stat_cache_max_age() const { return stat_cache_->max_age(); }
   size_t stat_cache_max_entries() const { return stat_cache_->max_entries(); }
@@ -98,6 +103,43 @@ class GcsFileSystem : public FileSystem {
     return matching_paths_cache_->max_entries();
   }
 
+  /// Structure containing the information for timeouts related to accessing the
+  /// GCS APIs.
+  ///
+  /// All values are in seconds.
+  struct TimeoutConfig {
+    // The request connection timeout. If a connection cannot be established
+    // within `connect` seconds, abort the request.
+    uint32 connect = 120;  // 2 minutes
+
+    // The request idle timeout. If a request has seen no activity in `idle`
+    // seconds, abort the request.
+    uint32 idle = 60;  // 1 minute
+
+    // The maximum total time a metadata request can take. If a request has not
+    // completed within `metadata` seconds, the request is aborted.
+    uint32 metadata = 3600;  // 1 hour
+
+    // The maximum total time a block read request can take. If a request has
+    // not completed within `read` seconds, the request is aborted.
+    uint32 read = 3600;  // 1 hour
+
+    // The maximum total time an upload request can take. If a request has not
+    // completed within `write` seconds, the request is aborted.
+    uint32 write = 3600;  // 1 hour
+
+    TimeoutConfig() {}
+    TimeoutConfig(uint32 connect, uint32 idle, uint32 metadata, uint32 read,
+                  uint32 write)
+        : connect(connect),
+          idle(idle),
+          metadata(metadata),
+          read(read),
+          write(write) {}
+  };
+
+  Status CreateHttpRequest(std::unique_ptr<HttpRequest>* request);
+
  private:
   /// \brief Checks if the bucket exists. Returns OK if the check succeeded.
   ///
@@ -137,7 +179,7 @@ class GcsFileSystem : public FileSystem {
 
   /// Loads file contents from GCS for a given filename, offset, and length.
   Status LoadBufferFromGCS(const string& filename, size_t offset, size_t n,
-                           std::vector<char>* out);
+                           char* buffer, size_t* bytes_transferred);
 
   std::unique_ptr<AuthProvider> auth_provider_;
   std::unique_ptr<HttpRequest::Factory> http_request_factory_;
@@ -150,6 +192,8 @@ class GcsFileSystem : public FileSystem {
   using MatchingPathsCache = ExpiringLRUCache<std::vector<string>>;
   std::unique_ptr<MatchingPathsCache> matching_paths_cache_;
 
+  TimeoutConfig timeouts_;
+
   /// The initial delay for exponential backoffs when retrying failed calls.
   const int64 initial_retry_delay_usec_ = 1000000L;
 
diff --git a/tensorflow/core/platform/cloud/gcs_file_system_test.cc b/tensorflow/core/platform/cloud/gcs_file_system_test.cc
index 7614ec4d7f01369eff1b21141818c673154b7542..772aec527313fc43fef40983d22e313e338bbe02 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system_test.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system_test.cc
@@ -22,6 +22,8 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
+static GcsFileSystem::TimeoutConfig kTestTimeoutConfig(5, 1, 10, 20, 30);
+
 class FakeAuthProvider : public AuthProvider {
  public:
   Status GetToken(string* token) override {
@@ -35,12 +37,14 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-5\n",
+           "Range: 0-5\n"
+           "Timeouts: 5 1 20\n",
            "012345"),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 6-11\n",
+           "Range: 6-11\n"
+           "Timeouts: 5 1 20\n",
            "6789")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -49,7 +53,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<RandomAccessFile> file;
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
@@ -73,12 +77,14 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache_differentN) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-2\n",
+           "Range: 0-2\n"
+           "Timeouts: 5 1 20\n",
            "012"),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 3-12\n",
+           "Range: 3-12\n"
+           "Timeouts: 5 1 20\n",
            "3456789")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -87,7 +93,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache_differentN) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<RandomAccessFile> file;
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
@@ -116,26 +122,30 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-8\n",
+           "Range: 0-8\n"
+           "Timeouts: 5 1 20\n",
            "012345678"),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 9-17\n",
+           "Range: 9-17\n"
+           "Timeouts: 5 1 20\n",
            "9abcde"),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 18-26\n",
+           "Range: 18-26\n"
+           "Timeouts: 5 1 20\n",
            "")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      9 /* block size */, 18 /* max bytes */, 0 /* max staleness */,
-      0 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   9 /* block size */, 18 /* max bytes */,
+                   0 /* max staleness */, 0 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   char scratch[100];
   StringPiece result;
@@ -185,26 +195,72 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache) {
   EXPECT_EQ("0123", result);
 }
 
+TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache_Flush) {
+  // Our underlying file in this test is a 15 byte file with contents
+  // "0123456789abcde".
+  std::vector<HttpRequest*> requests(
+      {new FakeHttpRequest(
+           "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
+           "Auth Token: fake_token\n"
+           "Range: 0-8\n"
+           "Timeouts: 5 1 20\n",
+           "012345678"),
+       new FakeHttpRequest(
+           "Uri: https://storage.googleapis.com/bucket/random_access.txt\n"
+           "Auth Token: fake_token\n"
+           "Range: 0-8\n"
+           "Timeouts: 5 1 20\n",
+           "012345678")});
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   9 /* block size */, 18 /* max bytes */,
+                   0 /* max staleness */, 0 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay */, kTestTimeoutConfig);
+
+  char scratch[100];
+  StringPiece result;
+  std::unique_ptr<RandomAccessFile> file;
+  TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
+  // Read the first chunk. The cache will be populated with the first block of
+  // 9 bytes.
+  scratch[5] = 'x';
+  TF_EXPECT_OK(file->Read(0, 4, &result, scratch));
+  EXPECT_EQ("0123", result);
+  EXPECT_EQ(scratch[5], 'x');  // Make sure we only copied 4 bytes.
+  // Flush caches and read the second chunk. This will be a cache miss, and
+  // the same block will be fetched again.
+  fs.FlushCaches();
+  TF_EXPECT_OK(file->Read(4, 4, &result, scratch));
+  EXPECT_EQ("4567", result);
+}
+
 TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache_MaxStaleness) {
   // Our underlying file in this test is a 16 byte file with contents
   // "0123456789abcdef".
   std::vector<HttpRequest*> requests(
       {new FakeHttpRequest("Uri: https://storage.googleapis.com/bucket/object\n"
                            "Auth Token: fake_token\n"
-                           "Range: 0-7\n",
+                           "Range: 0-7\n"
+                           "Timeouts: 5 1 20\n",
                            "01234567"),
        new FakeHttpRequest("Uri: https://storage.googleapis.com/bucket/object\n"
                            "Auth Token: fake_token\n"
-                           "Range: 8-15\n",
+                           "Range: 8-15\n"
+                           "Timeouts: 5 1 20\n",
                            "89abcdef")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      8 /* block size */, 16 /* max bytes */, 3600 /* max staleness */,
-      0 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   8 /* block size */, 16 /* max bytes */,
+                   3600 /* max staleness */, 0 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay */, kTestTimeoutConfig);
   char scratch[100];
   StringPiece result;
   // There should only be two HTTP requests issued to GCS even though we iterate
@@ -238,14 +294,15 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache_MaxStaleness) {
 
 TEST(GcsFileSystemTest, NewRandomAccessFile_NoObjectName) {
   std::vector<HttpRequest*> requests;
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      0 /* read ahead bytes */, 0 /* max bytes */, 0 /* max staleness */,
-      0 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   0 /* read ahead bytes */, 0 /* max bytes */,
+                   0 /* max staleness */, 0 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<RandomAccessFile> file;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -257,24 +314,28 @@ TEST(GcsFileSystemTest, NewWritableFile) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fwriteable\n"
            "Auth Token: fake_token\n"
-           "Range: 0-7\n",
+           "Range: 0-7\n"
+           "Timeouts: 5 1 20\n",
            "01234567"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/upload/storage/v1/b/bucket/o?"
            "uploadType=resumable&name=path%2Fwriteable\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            ""),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fwriteable\n"
            "Auth Token: fake_token\n"
-           "Range: 0-7\n",
+           "Range: 0-7\n"
+           "Timeouts: 5 1 20\n",
            "01234567")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -283,7 +344,7 @@ TEST(GcsFileSystemTest, NewWritableFile) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   // Read from the file first, to fill the block cache.
   std::unique_ptr<RandomAccessFile> rfile;
@@ -315,15 +376,18 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceeds) {
            "uploadType=resumable&name=path%2Fwriteable.txt\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            "", errors::Unavailable("503"), 503),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Header Content-Range: bytes */17\n"
                            "Put: yes\n",
                            "", errors::FailedPrecondition("308"), nullptr,
@@ -331,10 +395,12 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceeds) {
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 11-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: ntent2\n",
                            "", errors::Unavailable("503"), 503),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Header Content-Range: bytes */17\n"
                            "Put: yes\n",
                            "", errors::FailedPrecondition("308"), nullptr,
@@ -342,6 +408,7 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceeds) {
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 13-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: ent2\n",
                            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -351,7 +418,7 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceeds) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<WritableFile> file;
   TF_EXPECT_OK(fs.NewWritableFile("gs://bucket/path/writeable.txt", &file));
@@ -369,38 +436,44 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceedsOnGetStatus) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fwriteable\n"
            "Auth Token: fake_token\n"
-           "Range: 0-7\n",
+           "Range: 0-7\n"
+           "Timeouts: 5 1 20\n",
            "01234567"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/upload/storage/v1/b/bucket/o?"
            "uploadType=resumable&name=path%2Fwriteable\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            "", errors::Unavailable("503"), 503),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Header Content-Range: bytes */17\n"
                            "Put: yes\n",
                            "", Status::OK(), nullptr, {}, 201),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fwriteable\n"
            "Auth Token: fake_token\n"
-           "Range: 0-7\n",
+           "Range: 0-7\n"
+           "Timeouts: 5 1 20\n",
            "01234567")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      8 /* block size */, 8 /* max bytes */, 3600 /* max staleness */,
-      0 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   8 /* block size */, 8 /* max bytes */,
+                   3600 /* max staleness */, 0 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay */, kTestTimeoutConfig);
   // Pull the file's first block into the cache. This will trigger the first
   // HTTP request to GCS.
   std::unique_ptr<RandomAccessFile> rfile;
@@ -434,17 +507,20 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadAllAttemptsFail) {
            "uploadType=resumable&name=path%2Fwriteable.txt\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            "", errors::Unavailable("503"), 503)});
   for (int i = 0; i < 10; i++) {
     requests.emplace_back(new FakeHttpRequest(
         "Uri: https://custom/upload/location\n"
         "Auth Token: fake_token\n"
+        "Timeouts: 5 1 10\n"
         "Header Content-Range: bytes */17\n"
         "Put: yes\n",
         "", errors::FailedPrecondition("important HTTP error 308"), nullptr,
@@ -453,6 +529,7 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadAllAttemptsFail) {
         "Uri: https://custom/upload/location\n"
         "Auth Token: fake_token\n"
         "Header Content-Range: bytes 11-16/17\n"
+        "Timeouts: 5 1 30\n"
         "Put body: ntent2\n",
         "", errors::Unavailable("important HTTP error 503"), 503));
   }
@@ -463,12 +540,14 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadAllAttemptsFail) {
       "uploadType=resumable&name=path%2Fwriteable.txt\n"
       "Auth Token: fake_token\n"
       "Header X-Upload-Content-Length: 17\n"
-      "Post: yes\n",
+      "Post: yes\n"
+      "Timeouts: 5 1 10\n",
       "", {{"Location", "https://custom/upload/location"}}));
   requests.emplace_back(
       new FakeHttpRequest("Uri: https://custom/upload/location\n"
                           "Auth Token: fake_token\n"
                           "Header Content-Range: bytes 0-16/17\n"
+                          "Timeouts: 5 1 30\n"
                           "Put body: content1,content2\n",
                           ""));
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -478,7 +557,7 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadAllAttemptsFail) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   2 /* initial retry delay */);
+                   2 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<WritableFile> file;
   TF_EXPECT_OK(fs.NewWritableFile("gs://bucket/path/writeable.txt", &file));
@@ -500,11 +579,13 @@ TEST(GcsFileSystemTest, NewWritableFile_UploadReturns410) {
            "uploadType=resumable&name=path%2Fwriteable.txt\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            "", errors::NotFound("important HTTP error 410"),
                            410),
@@ -515,11 +596,13 @@ TEST(GcsFileSystemTest, NewWritableFile_UploadReturns410) {
            "uploadType=resumable&name=path%2Fwriteable.txt\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -529,7 +612,7 @@ TEST(GcsFileSystemTest, NewWritableFile_UploadReturns410) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<WritableFile> file;
   TF_EXPECT_OK(fs.NewWritableFile("gs://bucket/path/writeable.txt", &file));
@@ -558,7 +641,7 @@ TEST(GcsFileSystemTest, NewWritableFile_NoObjectName) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<WritableFile> file;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -570,33 +653,38 @@ TEST(GcsFileSystemTest, NewAppendableFile) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fappendable\n"
            "Auth Token: fake_token\n"
-           "Range: 0-31\n",
+           "Range: 0-31\n"
+           "Timeouts: 5 1 20\n",
            "content1,"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/upload/storage/v1/b/bucket/o?"
            "uploadType=resumable&name=path%2Fappendable\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 17\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
                            "Header Content-Range: bytes 0-16/17\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            ""),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fappendable\n"
            "Auth Token: fake_token\n"
-           "Range: 0-31\n",
+           "Range: 0-31\n"
+           "Timeouts: 5 1 20\n",
            "01234567")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      32 /* block size */, 32 /* max bytes */, 0 /* max staleness */,
-      0 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   32 /* block size */, 32 /* max bytes */,
+                   0 /* max staleness */, 0 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   // Create an appendable file. This should read the file from GCS, and pull its
   // contents into the block cache.
@@ -629,7 +717,7 @@ TEST(GcsFileSystemTest, NewAppendableFile_NoObjectName) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<WritableFile> file;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -642,7 +730,8 @@ TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Frandom_access.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            strings::StrCat("{\"size\": \"", content.size(),
                            "\", \"updated\": \"2016-04-29T23:15:24.896Z\"}")),
        new FakeHttpRequest(
@@ -650,7 +739,7 @@ TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile) {
                            "path%2Frandom_access.txt\n"
                            "Auth Token: fake_token\n"
                            "Range: 0-",
-                           content.size() - 1, "\n"),
+                           content.size() - 1, "\n", "Timeouts: 5 1 20\n"),
            content)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -659,7 +748,7 @@ TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<ReadOnlyMemoryRegion> region;
   TF_EXPECT_OK(fs.NewReadOnlyMemoryRegionFromFile(
@@ -678,7 +767,7 @@ TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile_NoObjectName) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::unique_ptr<ReadOnlyMemoryRegion> region;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -689,7 +778,8 @@ TEST(GcsFileSystemTest, FileExists_YesAsObject) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
       "path%2Ffile1.txt?fields=size%2Cupdated\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       strings::StrCat("{\"size\": \"1010\","
                       "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -699,7 +789,7 @@ TEST(GcsFileSystemTest, FileExists_YesAsObject) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.FileExists("gs://bucket/path/file1.txt"));
 }
@@ -709,13 +799,15 @@ TEST(GcsFileSystemTest, FileExists_YesAsFolder) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsubfolder?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsubfolder%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/subfolder/\" }]}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -725,7 +817,7 @@ TEST(GcsFileSystemTest, FileExists_YesAsFolder) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.FileExists("gs://bucket/path/subfolder"));
 }
@@ -734,11 +826,13 @@ TEST(GcsFileSystemTest, FileExists_YesAsBucket) {
   std::vector<HttpRequest*> requests(
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"size\": \"100\"}"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"size\": \"100\"}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -747,7 +841,7 @@ TEST(GcsFileSystemTest, FileExists_YesAsBucket) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.FileExists("gs://bucket1"));
   TF_EXPECT_OK(fs.FileExists("gs://bucket1/"));
@@ -758,13 +852,15 @@ TEST(GcsFileSystemTest, FileExists_NotAsObjectOrFolder) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Ffile1.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Ffile1.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": []}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -773,7 +869,7 @@ TEST(GcsFileSystemTest, FileExists_NotAsObjectOrFolder) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   EXPECT_EQ(errors::Code::NOT_FOUND,
             fs.FileExists("gs://bucket/path/file1.txt").code());
@@ -783,11 +879,13 @@ TEST(GcsFileSystemTest, FileExists_NotAsBucket) {
   std::vector<HttpRequest*> requests(
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket2\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket2\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -796,7 +894,7 @@ TEST(GcsFileSystemTest, FileExists_NotAsBucket) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
             fs.FileExists("gs://bucket2/").code());
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -808,29 +906,33 @@ TEST(GcsFileSystemTest, FileExists_StatCache) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Ffile1.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            strings::StrCat("{\"size\": \"1010\","
                            "\"updated\": \"2016-04-29T23:15:24.896Z\"}")),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsubfolder?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsubfolder%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/subfolder/\" }]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
-      3600 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   3600 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   // The stat cache will ensure that repeated lookups don't trigger additional
   // HTTP requests.
@@ -845,7 +947,8 @@ TEST(GcsFileSystemTest, GetChildren_NoItems) {
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&prefix="
       "path%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"prefixes\": [\"path/subpath/\"]}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -854,7 +957,7 @@ TEST(GcsFileSystemTest, GetChildren_NoItems) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children));
@@ -867,7 +970,8 @@ TEST(GcsFileSystemTest, GetChildren_ThreeFiles) {
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&prefix="
       "path%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/file3.txt\" }],"
@@ -879,7 +983,7 @@ TEST(GcsFileSystemTest, GetChildren_ThreeFiles) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children));
@@ -893,7 +997,8 @@ TEST(GcsFileSystemTest, GetChildren_SelfDirectoryMarker) {
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&prefix="
       "path%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/\" },"
       "  { \"name\": \"path/file3.txt\" }],"
@@ -905,7 +1010,7 @@ TEST(GcsFileSystemTest, GetChildren_SelfDirectoryMarker) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay */, kTestTimeoutConfig);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children));
@@ -918,7 +1023,8 @@ TEST(GcsFileSystemTest, GetChildren_ThreeFiles_NoSlash) {
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&prefix="
       "path%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/file3.txt\" }],"
@@ -930,7 +1036,7 @@ TEST(GcsFileSystemTest, GetChildren_ThreeFiles_NoSlash) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path", &children));
@@ -943,7 +1049,8 @@ TEST(GcsFileSystemTest, GetChildren_Root) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket-a-b-c/o?"
       "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -952,7 +1059,7 @@ TEST(GcsFileSystemTest, GetChildren_Root) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket-a-b-c", &children));
@@ -965,7 +1072,8 @@ TEST(GcsFileSystemTest, GetChildren_Empty) {
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&prefix="
       "path%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -974,7 +1082,7 @@ TEST(GcsFileSystemTest, GetChildren_Empty) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children));
@@ -988,7 +1096,8 @@ TEST(GcsFileSystemTest, GetChildren_Pagination) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&"
            "prefix=path%2F\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"nextPageToken\": \"ABCD==\", "
            "\"items\": [ "
            "  { \"name\": \"path/file1.txt\" },"
@@ -999,7 +1108,8 @@ TEST(GcsFileSystemTest, GetChildren_Pagination) {
            "fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&"
            "prefix=path%2F"
            "&pageToken=ABCD==\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/file4.txt\" },"
            "  { \"name\": \"path/file5.txt\" }]}")});
@@ -1011,7 +1121,7 @@ TEST(GcsFileSystemTest, GetChildren_Pagination) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path", &children));
@@ -1025,7 +1135,8 @@ TEST(GcsFileSystemTest, GetMatchingPaths_NoWildcard) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsubpath%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/subpath/file2.txt\" }]}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1035,7 +1146,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_NoWildcard) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> result;
   TF_EXPECT_OK(
@@ -1048,7 +1159,8 @@ TEST(GcsFileSystemTest, GetMatchingPaths_BucketAndWildcard) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/subpath/file2.txt\" },"
@@ -1060,7 +1172,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_BucketAndWildcard) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/*/*", &result));
@@ -1074,7 +1186,8 @@ TEST(GcsFileSystemTest, GetMatchingPaths_FolderAndWildcard_Matches) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken&prefix=path%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/subpath/file2.txt\" },"
@@ -1086,7 +1199,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_FolderAndWildcard_Matches) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/*/file2.txt", &result));
@@ -1098,7 +1211,8 @@ TEST(GcsFileSystemTest, GetMatchingPaths_SelfDirectoryMarker) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken&prefix=path%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/\" },"
       "  { \"name\": \"path/file3.txt\" }]}")});
@@ -1109,7 +1223,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_SelfDirectoryMarker) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/*", &result));
@@ -1120,7 +1234,8 @@ TEST(GcsFileSystemTest, GetMatchingPaths_FolderAndWildcard_NoMatches) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken&prefix=path%2F\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/subpath/file2.txt\" },"
@@ -1132,7 +1247,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_FolderAndWildcard_NoMatches) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/*/file3.txt", &result));
@@ -1148,7 +1263,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_OnlyWildcard) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   std::vector<string> result;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -1160,13 +1275,15 @@ TEST(GcsFileSystemTest, GetMatchingPaths_Cache) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsubpath%2F\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/subpath/file2.txt\" }]}"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/file1.txt\" },"
            "  { \"name\": \"path/subpath/file2.txt\" },"
@@ -1178,7 +1295,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_Cache) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    3600 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   // Repeated calls to fs.GetMatchingPaths on these patterns should not lead to
   // any additional HTTP requests to GCS.
@@ -1196,31 +1313,79 @@ TEST(GcsFileSystemTest, GetMatchingPaths_Cache) {
   }
 }
 
+TEST(GcsFileSystemTest, GetMatchingPaths_Cache_Flush) {
+  std::vector<HttpRequest*> requests(
+      {new FakeHttpRequest(
+           "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
+           "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsubpath%2F\n"
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
+           "{\"items\": [ "
+           "  { \"name\": \"path/subpath/file2.txt\" }]}"),
+       new FakeHttpRequest(
+           "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
+           "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsubpath%2F\n"
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
+           "{\"items\": [ "
+           "  { \"name\": \"path/subpath/file2.txt\" }]}")});
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   3600 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
+
+  // This loop should trigger the first HTTP request to GCS.
+  for (int i = 0; i < 10; i++) {
+    std::vector<string> result;
+    TF_EXPECT_OK(
+        fs.GetMatchingPaths("gs://bucket/path/subpath/file2.txt", &result));
+    EXPECT_EQ(std::vector<string>({"gs://bucket/path/subpath/file2.txt"}),
+              result);
+  }
+  // After flushing caches, there should be another (identical) request to GCS.
+  fs.FlushCaches();
+  for (int i = 0; i < 10; i++) {
+    std::vector<string> result;
+    TF_EXPECT_OK(
+        fs.GetMatchingPaths("gs://bucket/path/subpath/file2.txt", &result));
+    EXPECT_EQ(std::vector<string>({"gs://bucket/path/subpath/file2.txt"}),
+              result);
+  }
+}
+
 TEST(GcsFileSystemTest, DeleteFile) {
   std::vector<HttpRequest*> requests(
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Ffile1.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-15\n",
+           "Range: 0-15\n"
+           "Timeouts: 5 1 20\n",
            "01234567"),
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Ffile1.txt\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            ""),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Ffile1.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-15\n",
+           "Range: 0-15\n"
+           "Timeouts: 5 1 20\n",
            "76543210")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      16 /* block size */, 16 /* max bytes */, 0 /* max staleness */,
-      0 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   16 /* block size */, 16 /* max bytes */,
+                   0 /* max staleness */, 0 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   // Do an initial read of the file to load its contents into the block cache.
   char scratch[100];
@@ -1246,7 +1411,7 @@ TEST(GcsFileSystemTest, DeleteFile_NoObjectName) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
             fs.DeleteFile("gs://bucket/").code());
@@ -1256,7 +1421,8 @@ TEST(GcsFileSystemTest, DeleteDir_Empty) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken&prefix=path%2F&maxResults=2\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1265,7 +1431,7 @@ TEST(GcsFileSystemTest, DeleteDir_Empty) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.DeleteDir("gs://bucket/path/"));
 }
@@ -1275,12 +1441,14 @@ TEST(GcsFileSystemTest, DeleteDir_OnlyDirMarkerLeft) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F&maxResults=2\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/\" }]}"),
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2F\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1290,7 +1458,7 @@ TEST(GcsFileSystemTest, DeleteDir_OnlyDirMarkerLeft) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.DeleteDir("gs://bucket/path/"));
 }
@@ -1298,7 +1466,8 @@ TEST(GcsFileSystemTest, DeleteDir_OnlyDirMarkerLeft) {
 TEST(GcsFileSystemTest, DeleteDir_BucketOnly) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?fields=items%2F"
-      "name%2CnextPageToken&maxResults=2\nAuth Token: fake_token\n",
+      "name%2CnextPageToken&maxResults=2\nAuth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1307,7 +1476,7 @@ TEST(GcsFileSystemTest, DeleteDir_BucketOnly) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.DeleteDir("gs://bucket"));
 }
@@ -1316,7 +1485,8 @@ TEST(GcsFileSystemTest, DeleteDir_NonEmpty) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
       "fields=items%2Fname%2CnextPageToken&prefix=path%2F&maxResults=2\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/file1.txt\" }]}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1326,7 +1496,7 @@ TEST(GcsFileSystemTest, DeleteDir_NonEmpty) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   EXPECT_EQ(error::Code::FAILED_PRECONDITION,
             fs.DeleteDir("gs://bucket/path/").code());
@@ -1336,7 +1506,8 @@ TEST(GcsFileSystemTest, GetFileSize) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
       "file.txt?fields=size%2Cupdated\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       strings::StrCat("{\"size\": \"1010\","
                       "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1346,7 +1517,7 @@ TEST(GcsFileSystemTest, GetFileSize) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   uint64 size;
   TF_EXPECT_OK(fs.GetFileSize("gs://bucket/file.txt", &size));
@@ -1362,7 +1533,7 @@ TEST(GcsFileSystemTest, GetFileSize_NoObjectName) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   uint64 size;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -1376,14 +1547,16 @@ TEST(GcsFileSystemTest, RenameFile_Folder) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path1%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path1/subfolder/file1.txt\" }]}"),
        // Requesting the full list of files in the folder.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path1%2F\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path1/\" },"  // A directory marker.
            "  { \"name\": \"path1/subfolder/file1.txt\" },"
@@ -1393,13 +1566,15 @@ TEST(GcsFileSystemTest, RenameFile_Folder) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path1%2F/rewriteTo/b/bucket/o/path2%2F\n"
            "Auth Token: fake_token\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "{\"done\": true}"),
        // Deleting the original directory marker.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path1%2F\n"
            "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            ""),
        // Copying the first file.
@@ -1408,13 +1583,15 @@ TEST(GcsFileSystemTest, RenameFile_Folder) {
            "path1%2Fsubfolder%2Ffile1.txt/rewriteTo/b/bucket/o/"
            "path2%2Fsubfolder%2Ffile1.txt\n"
            "Auth Token: fake_token\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "{\"done\": true}"),
        // Deleting the first original file.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path1%2Fsubfolder%2Ffile1.txt\n"
            "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            ""),
        // Copying the second file.
@@ -1422,13 +1599,15 @@ TEST(GcsFileSystemTest, RenameFile_Folder) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path1%2Ffile2.txt/rewriteTo/b/bucket/o/path2%2Ffile2.txt\n"
            "Auth Token: fake_token\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "{\"done\": true}"),
        // Deleting the second original file.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path1%2Ffile2.txt\n"
            "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1438,7 +1617,7 @@ TEST(GcsFileSystemTest, RenameFile_Folder) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.RenameFile("gs://bucket/path1", "gs://bucket/path2/"));
 }
@@ -1448,25 +1627,29 @@ TEST(GcsFileSystemTest, RenameFile_Object) {
       {new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fsrc.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-15\n",
+           "Range: 0-15\n"
+           "Timeouts: 5 1 20\n",
            "01234567"),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fdst.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-15\n",
+           "Range: 0-15\n"
+           "Timeouts: 5 1 20\n",
            "76543210"),
        // IsDirectory is checking whether there are children objects.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsrc.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}"),
        // IsDirectory is checking if the path exists as an object.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            strings::StrCat("{\"size\": \"1010\","
                            "\"updated\": \"2016-04-29T23:15:24.896Z\"}")),
        // Copying to the new location.
@@ -1474,33 +1657,38 @@ TEST(GcsFileSystemTest, RenameFile_Object) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt/rewriteTo/b/bucket/o/path%2Fdst.txt\n"
            "Auth Token: fake_token\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "{\"done\": true}"),
        // Deleting the original file.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt\n"
            "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            ""),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fsrc.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-15\n",
+           "Range: 0-15\n"
+           "Timeouts: 5 1 20\n",
            "89abcdef"),
        new FakeHttpRequest(
            "Uri: https://storage.googleapis.com/bucket/path%2Fdst.txt\n"
            "Auth Token: fake_token\n"
-           "Range: 0-15\n",
+           "Range: 0-15\n"
+           "Timeouts: 5 1 20\n",
            "fedcba98")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      16 /* block size */, 64 /* max bytes */, 0 /* max staleness */,
-      0 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   16 /* block size */, 64 /* max bytes */,
+                   0 /* max staleness */, 0 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
   // Do an initial read of the source and destination files to load their
   // contents into the block cache.
   char scratch[100];
@@ -1531,13 +1719,15 @@ TEST(GcsFileSystemTest, RenameFile_Object_DeletionRetried) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsrc.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}"),
        // IsDirectory is checking if the path exists as an object.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            strings::StrCat("{\"size\": \"1010\","
                            "\"updated\": \"2016-04-29T23:15:24.896Z\"}")),
        // Copying to the new location.
@@ -1545,13 +1735,15 @@ TEST(GcsFileSystemTest, RenameFile_Object_DeletionRetried) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt/rewriteTo/b/bucket/o/path%2Fdst.txt\n"
            "Auth Token: fake_token\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "{\"done\": true}"),
        // Deleting the original file - the deletion returns a failure.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt\n"
            "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            "", errors::Unavailable("503"), 503),
        // Deleting the original file again - the deletion returns NOT_FOUND.
@@ -1559,6 +1751,7 @@ TEST(GcsFileSystemTest, RenameFile_Object_DeletionRetried) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt\n"
            "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            "", errors::NotFound("404"), 404)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1568,7 +1761,7 @@ TEST(GcsFileSystemTest, RenameFile_Object_DeletionRetried) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(
       fs.RenameFile("gs://bucket/path/src.txt", "gs://bucket/path/dst.txt"));
@@ -1582,13 +1775,15 @@ TEST(GcsFileSystemTest, RenameFile_Object_Incomplete) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsrc.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}"),
        // IsDirectory is checking if the path exists as an object.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            strings::StrCat("{\"size\": \"1010\","
                            "\"updated\": \"2016-04-29T23:15:24.896Z\"}")),
        // Copying to the new location.
@@ -1596,7 +1791,8 @@ TEST(GcsFileSystemTest, RenameFile_Object_Incomplete) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Fsrc.txt/rewriteTo/b/bucket/o/path%2Fdst.txt\n"
            "Auth Token: fake_token\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "{\"done\": false}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1605,7 +1801,7 @@ TEST(GcsFileSystemTest, RenameFile_Object_Incomplete) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   EXPECT_EQ(
       errors::Code::UNIMPLEMENTED,
@@ -1617,7 +1813,8 @@ TEST(GcsFileSystemTest, Stat_Object) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
       "file.txt?fields=size%2Cupdated\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       strings::StrCat("{\"size\": \"1010\","
                       "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1627,7 +1824,7 @@ TEST(GcsFileSystemTest, Stat_Object) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   FileStatistics stat;
   TF_EXPECT_OK(fs.Stat("gs://bucket/file.txt", &stat));
@@ -1641,13 +1838,15 @@ TEST(GcsFileSystemTest, Stat_Folder) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "subfolder?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=subfolder%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"subfolder/\" }]}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1657,7 +1856,7 @@ TEST(GcsFileSystemTest, Stat_Folder) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   FileStatistics stat;
   TF_EXPECT_OK(fs.Stat("gs://bucket/subfolder", &stat));
@@ -1671,13 +1870,15 @@ TEST(GcsFileSystemTest, Stat_ObjectOrFolderNotFound) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1686,7 +1887,7 @@ TEST(GcsFileSystemTest, Stat_ObjectOrFolderNotFound) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   FileStatistics stat;
   EXPECT_EQ(error::Code::NOT_FOUND, fs.Stat("gs://bucket/path", &stat).code());
@@ -1695,7 +1896,8 @@ TEST(GcsFileSystemTest, Stat_ObjectOrFolderNotFound) {
 TEST(GcsFileSystemTest, Stat_Bucket) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1704,7 +1906,7 @@ TEST(GcsFileSystemTest, Stat_Bucket) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   FileStatistics stat;
   TF_EXPECT_OK(fs.Stat("gs://bucket/", &stat));
@@ -1716,7 +1918,8 @@ TEST(GcsFileSystemTest, Stat_Bucket) {
 TEST(GcsFileSystemTest, Stat_BucketNotFound) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "", errors::NotFound("404"), 404)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1725,7 +1928,7 @@ TEST(GcsFileSystemTest, Stat_BucketNotFound) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   FileStatistics stat;
   EXPECT_EQ(error::Code::NOT_FOUND, fs.Stat("gs://bucket/", &stat).code());
@@ -1736,29 +1939,33 @@ TEST(GcsFileSystemTest, Stat_Cache) {
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "file.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            strings::StrCat("{\"size\": \"1010\","
                            "\"updated\": \"2016-04-29T23:15:24.896Z\"}")),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "subfolder?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=subfolder%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"subfolder/\" }]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
-      3600 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   3600 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   // Repeated calls to fs.Stat on these paths should not lead to any additional
   // HTTP requests to GCS.
@@ -1775,18 +1982,64 @@ TEST(GcsFileSystemTest, Stat_Cache) {
   }
 }
 
+TEST(GcsFileSystemTest, Stat_Cache_Flush) {
+  std::vector<HttpRequest*> requests(
+      {new FakeHttpRequest(
+           "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
+           "file.txt?fields=size%2Cupdated\n"
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
+           strings::StrCat("{\"size\": \"1010\","
+                           "\"updated\": \"2016-04-29T23:15:24.896Z\"}")),
+       new FakeHttpRequest(
+           "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
+           "file.txt?fields=size%2Cupdated\n"
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
+           strings::StrCat("{\"size\": \"1010\","
+                           "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   3600 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
+  // There should be a single HTTP request to GCS for fs.Stat in this loop.
+  for (int i = 0; i < 10; i++) {
+    FileStatistics stat;
+    TF_EXPECT_OK(fs.Stat("gs://bucket/file.txt", &stat));
+    EXPECT_EQ(1010, stat.length);
+    EXPECT_NEAR(1461971724896, stat.mtime_nsec / 1000 / 1000, 1);
+    EXPECT_FALSE(stat.is_directory);
+  }
+  // After flushing caches, there should be a second request to GCS for fs.Stat.
+  fs.FlushCaches();
+  for (int i = 0; i < 10; i++) {
+    FileStatistics stat;
+    TF_EXPECT_OK(fs.Stat("gs://bucket/file.txt", &stat));
+    EXPECT_EQ(1010, stat.length);
+    EXPECT_NEAR(1461971724896, stat.mtime_nsec / 1000 / 1000, 1);
+    EXPECT_FALSE(stat.is_directory);
+  }
+}
+
 TEST(GcsFileSystemTest, IsDirectory_NotFound) {
   std::vector<HttpRequest*> requests(
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=file.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "file.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1795,7 +2048,7 @@ TEST(GcsFileSystemTest, IsDirectory_NotFound) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   EXPECT_EQ(error::Code::NOT_FOUND,
             fs.IsDirectory("gs://bucket/file.txt").code());
@@ -1807,12 +2060,14 @@ TEST(GcsFileSystemTest, IsDirectory_NotDirectoryButObject) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=file.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "file.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            strings::StrCat("{\"size\": \"1010\","
                            "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1822,7 +2077,7 @@ TEST(GcsFileSystemTest, IsDirectory_NotDirectoryButObject) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   EXPECT_EQ(error::Code::FAILED_PRECONDITION,
             fs.IsDirectory("gs://bucket/file.txt").code());
@@ -1834,13 +2089,15 @@ TEST(GcsFileSystemTest, IsDirectory_Yes) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=subfolder%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [{\"name\": \"subfolder/\"}]}"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=subfolder%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [{\"name\": \"subfolder/\"}]}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1849,7 +2106,7 @@ TEST(GcsFileSystemTest, IsDirectory_Yes) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.IsDirectory("gs://bucket/subfolder"));
   TF_EXPECT_OK(fs.IsDirectory("gs://bucket/subfolder/"));
@@ -1859,11 +2116,13 @@ TEST(GcsFileSystemTest, IsDirectory_Bucket) {
   std::vector<HttpRequest*> requests(
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}"),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1872,7 +2131,7 @@ TEST(GcsFileSystemTest, IsDirectory_Bucket) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.IsDirectory("gs://bucket"));
   TF_EXPECT_OK(fs.IsDirectory("gs://bucket/"));
@@ -1881,7 +2140,8 @@ TEST(GcsFileSystemTest, IsDirectory_Bucket) {
 TEST(GcsFileSystemTest, IsDirectory_BucketNotFound) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-      "Auth Token: fake_token\n",
+      "Auth Token: fake_token\n"
+      "Timeouts: 5 1 10\n",
       "", errors::NotFound("404"), 404)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1890,7 +2150,7 @@ TEST(GcsFileSystemTest, IsDirectory_BucketNotFound) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   EXPECT_EQ(error::Code::NOT_FOUND, fs.IsDirectory("gs://bucket/").code());
 }
@@ -1902,10 +2162,12 @@ TEST(GcsFileSystemTest, CreateDir_Folder) {
            "uploadType=resumable&name=subpath%2F\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 0\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: \n",
                            ""),
        new FakeHttpRequest(
@@ -1913,10 +2175,12 @@ TEST(GcsFileSystemTest, CreateDir_Folder) {
            "uploadType=resumable&name=subpath%2F\n"
            "Auth Token: fake_token\n"
            "Header X-Upload-Content-Length: 0\n"
-           "Post: yes\n",
+           "Post: yes\n"
+           "Timeouts: 5 1 10\n",
            "", {{"Location", "https://custom/upload/location"}}),
        new FakeHttpRequest("Uri: https://custom/upload/location\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 30\n"
                            "Put body: \n",
                            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -1926,7 +2190,7 @@ TEST(GcsFileSystemTest, CreateDir_Folder) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.CreateDir("gs://bucket/subpath"));
   TF_EXPECT_OK(fs.CreateDir("gs://bucket/subpath/"));
@@ -1936,11 +2200,13 @@ TEST(GcsFileSystemTest, CreateDir_Bucket) {
   std::vector<HttpRequest*> requests(
       {new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            ""),
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -1949,7 +2215,7 @@ TEST(GcsFileSystemTest, CreateDir_Bucket) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   TF_EXPECT_OK(fs.CreateDir("gs://bucket/"));
   TF_EXPECT_OK(fs.CreateDir("gs://bucket"));
@@ -1962,14 +2228,16 @@ TEST(GcsFileSystemTest, DeleteRecursively_Ok) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/file1.txt\" }]}"),
        // GetChildren recursively.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/\" },"  // The current directory's marker.
            "  { \"name\": \"path/file1.txt\" },"
@@ -1979,30 +2247,35 @@ TEST(GcsFileSystemTest, DeleteRecursively_Ok) {
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2F\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            ""),
        // Delete the object - fails and will be retried.
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Ffile1.txt\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            "", errors::Unavailable("500"), 500),
        // Delete the object again.
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Ffile1.txt\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            ""),
        // Delete the object.
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Fsubpath%2Ffile2.txt\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            ""),
        // Delete the object.
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Ffile3.txt\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            "")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -2012,7 +2285,7 @@ TEST(GcsFileSystemTest, DeleteRecursively_Ok) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   int64 undeleted_files, undeleted_dirs;
   TF_EXPECT_OK(fs.DeleteRecursively("gs://bucket/path", &undeleted_files,
@@ -2028,14 +2301,16 @@ TEST(GcsFileSystemTest, DeleteRecursively_DeletionErrors) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/file1.txt\" }]}"),
        // Calling GetChildren recursively.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/file1.txt\" },"
            "  { \"name\": \"path/subpath/\" },"
@@ -2045,12 +2320,14 @@ TEST(GcsFileSystemTest, DeleteRecursively_DeletionErrors) {
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Ffile1.txt\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            ""),
        // Deleting the directory marker gs://bucket/path/ - fails with 404.
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Fsubpath%2F\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            "", errors::NotFound("404"), 404),
        // Checking if gs://bucket/path/subpath/ is a folder - it is.
@@ -2058,19 +2335,22 @@ TEST(GcsFileSystemTest, DeleteRecursively_DeletionErrors) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsubpath%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            strings::StrCat("{\"items\": [ "
                            "    { \"name\": \"path/subpath/\" }]}")),
        // Deleting the object gs://bucket/path/subpath/file2.txt
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Fsubpath%2Ffile2.txt\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            ""),
        // Deleting the object s://bucket/path/file3.txt - fails with 404.
        new FakeHttpRequest("Uri: https://www.googleapis.com/storage/v1/b"
                            "/bucket/o/path%2Ffile3.txt\n"
                            "Auth Token: fake_token\n"
+                           "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            "", errors::NotFound("404"), 404),
        // Checking if gs://bucket/path/file3.txt/ is a folder - it's not.
@@ -2078,13 +2358,15 @@ TEST(GcsFileSystemTest, DeleteRecursively_DeletionErrors) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2Ffile3.txt%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}"),
        // Checking if gs://bucket/path/file3.txt is an object - fails with 404.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path%2Ffile3.txt?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404)});
 
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
@@ -2094,7 +2376,7 @@ TEST(GcsFileSystemTest, DeleteRecursively_DeletionErrors) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   int64 undeleted_files, undeleted_dirs;
   TF_EXPECT_OK(fs.DeleteRecursively("gs://bucket/path", &undeleted_files,
@@ -2110,13 +2392,15 @@ TEST(GcsFileSystemTest, DeleteRecursively_NotAFolder) {
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
            "fields=items%2Fname%2CnextPageToken&prefix=path%2F"
            "&maxResults=1\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "{}"),
        // IsDirectory is checking if the path exists as an object.
        new FakeHttpRequest(
            "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
            "path?fields=size%2Cupdated\n"
-           "Auth Token: fake_token\n",
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
@@ -2125,7 +2409,7 @@ TEST(GcsFileSystemTest, DeleteRecursively_NotAFolder) {
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
                    0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */);
+                   0 /* initial retry delay*/, kTestTimeoutConfig);
 
   int64 undeleted_files, undeleted_dirs;
   EXPECT_EQ(error::Code::NOT_FOUND,
@@ -2142,6 +2426,11 @@ TEST(GcsFileSystemTest, OverrideCacheParameters) {
   EXPECT_EQ(128 * 1024 * 1024, fs1.block_size());
   EXPECT_EQ(2 * fs1.block_size(), fs1.max_bytes());
   EXPECT_EQ(0, fs1.max_staleness());
+  EXPECT_EQ(120, fs1.timeouts().connect);
+  EXPECT_EQ(60, fs1.timeouts().idle);
+  EXPECT_EQ(3600, fs1.timeouts().metadata);
+  EXPECT_EQ(3600, fs1.timeouts().read);
+  EXPECT_EQ(3600, fs1.timeouts().write);
 
   // Verify legacy readahead buffer override sets block size.
   setenv("GCS_READAHEAD_BUFFER_SIZE_BYTES", "123456789", 1);
@@ -2167,6 +2456,42 @@ TEST(GcsFileSystemTest, OverrideCacheParameters) {
   EXPECT_EQ(32, fs4.stat_cache_max_entries());
   EXPECT_EQ(30, fs4.matching_paths_cache_max_age());
   EXPECT_EQ(64, fs4.matching_paths_cache_max_entries());
+
+  // Verify timeout overrides.
+  setenv("GCS_REQUEST_CONNECTION_TIMEOUT_SECS", "10", 1);
+  setenv("GCS_REQUEST_IDLE_TIMEOUT_SECS", "5", 1);
+  setenv("GCS_METADATA_REQUEST_TIMEOUT_SECS", "20", 1);
+  setenv("GCS_READ_REQUEST_TIMEOUT_SECS", "30", 1);
+  setenv("GCS_WRITE_REQUEST_TIMEOUT_SECS", "40", 1);
+  GcsFileSystem fs5;
+  EXPECT_EQ(10, fs5.timeouts().connect);
+  EXPECT_EQ(5, fs5.timeouts().idle);
+  EXPECT_EQ(20, fs5.timeouts().metadata);
+  EXPECT_EQ(30, fs5.timeouts().read);
+  EXPECT_EQ(40, fs5.timeouts().write);
+}
+
+TEST(GcsFileSystemTest, CreateHttpRequest) {
+  std::vector<HttpRequest*> requests(
+      {// IsDirectory is checking whether there are children objects.
+       new FakeHttpRequest("Uri: https://www.googleapis.com/fake\n"
+                           "Auth Token: fake_token\n"
+                           "Header Hello: world\n",
+                           "{}")});
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */,
+                   0 /* initial retry delay */, kTestTimeoutConfig);
+
+  std::unique_ptr<HttpRequest> request;
+  TF_EXPECT_OK(fs.CreateHttpRequest(&request));
+  request->SetUri("https://www.googleapis.com/fake");
+  request->AddHeader("Hello", "world");
+  TF_EXPECT_OK(request->Send());
 }
 
 }  // namespace
diff --git a/tensorflow/core/platform/cloud/google_auth_provider.cc b/tensorflow/core/platform/cloud/google_auth_provider.cc
index f6fd8373cd593da3afdb159640b9cd29fcb795b5..7e39b63e3e8e19b3ed9e05e5c49422b42774567c 100644
--- a/tensorflow/core/platform/cloud/google_auth_provider.cc
+++ b/tensorflow/core/platform/cloud/google_auth_provider.cc
@@ -14,9 +14,12 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/platform/cloud/google_auth_provider.h"
+#ifndef _WIN32
 #include <pwd.h>
-#include <sys/types.h>
 #include <unistd.h>
+#else
+#include <sys/types.h>
+#endif
 #include <fstream>
 #include "include/json/json.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -208,10 +211,9 @@ Status GoogleAuthProvider::GetTokenFromGce() {
     std::unique_ptr<HttpRequest> request(http_request_factory_->Create());
     std::vector<char> response_buffer;
     const uint64 request_timestamp_sec = env_->NowSeconds();
-    TF_RETURN_IF_ERROR(request->Init());
-    TF_RETURN_IF_ERROR(request->SetUri(kGceTokenUrl));
-    TF_RETURN_IF_ERROR(request->AddHeader("Metadata-Flavor", "Google"));
-    TF_RETURN_IF_ERROR(request->SetResultBuffer(&response_buffer));
+    request->SetUri(kGceTokenUrl);
+    request->AddHeader("Metadata-Flavor", "Google");
+    request->SetResultBuffer(&response_buffer);
     TF_RETURN_IF_ERROR(request->Send());
     StringPiece response =
         StringPiece(&response_buffer[0], response_buffer.size());
diff --git a/tensorflow/core/platform/cloud/http_request.h b/tensorflow/core/platform/cloud/http_request.h
index 02d9e9054ad3b22f3cd15cf7b24d917184db264b..df8a5b86a0b9b3354514be69cb03dd6472e51e86 100644
--- a/tensorflow/core/platform/cloud/http_request.h
+++ b/tensorflow/core/platform/cloud/http_request.h
@@ -50,33 +50,31 @@ class HttpRequest {
   HttpRequest() {}
   virtual ~HttpRequest() {}
 
-  virtual Status Init() = 0;
-
   /// Sets the request URI.
-  virtual Status SetUri(const string& uri) = 0;
+  virtual void SetUri(const string& uri) = 0;
 
   /// \brief Sets the Range header.
   ///
   /// Used for random seeks, for example "0-999" returns the first 1000 bytes
   /// (note that the right border is included).
-  virtual Status SetRange(uint64 start, uint64 end) = 0;
+  virtual void SetRange(uint64 start, uint64 end) = 0;
 
   /// Sets a request header.
-  virtual Status AddHeader(const string& name, const string& value) = 0;
+  virtual void AddHeader(const string& name, const string& value) = 0;
 
   /// Sets a DNS resolve mapping (to skip DNS resolution).
   ///
   /// Note: because GCS is available over HTTPS, we cannot replace the hostname
   /// in the URI with an IP address, as that will cause the certificate check
   /// to fail.
-  virtual Status AddResolveOverride(const string& hostname, int64 port,
-                                    const string& ip_addr) = 0;
+  virtual void AddResolveOverride(const string& hostname, int64 port,
+                                  const string& ip_addr) = 0;
 
   /// Sets the 'Authorization' header to the value of 'Bearer ' + auth_token.
-  virtual Status AddAuthBearerHeader(const string& auth_token) = 0;
+  virtual void AddAuthBearerHeader(const string& auth_token) = 0;
 
   /// Makes the request a DELETE request.
-  virtual Status SetDeleteRequest() = 0;
+  virtual void SetDeleteRequest() = 0;
 
   /// \brief Makes the request a PUT request.
   ///
@@ -85,21 +83,35 @@ class HttpRequest {
   virtual Status SetPutFromFile(const string& body_filepath, size_t offset) = 0;
 
   /// Makes the request a PUT request with an empty body.
-  virtual Status SetPutEmptyBody() = 0;
+  virtual void SetPutEmptyBody() = 0;
 
   /// \brief Makes the request a POST request.
   ///
   /// The request body will be taken from the specified buffer.
-  virtual Status SetPostFromBuffer(const char* buffer, size_t size) = 0;
+  virtual void SetPostFromBuffer(const char* buffer, size_t size) = 0;
 
   /// Makes the request a POST request with an empty body.
-  virtual Status SetPostEmptyBody() = 0;
+  virtual void SetPostEmptyBody() = 0;
 
   /// \brief Specifies the buffer for receiving the response body.
   ///
   /// Size of out_buffer after an access will be exactly the number of bytes
   /// read. Existing content of the vector will be cleared.
-  virtual Status SetResultBuffer(std::vector<char>* out_buffer) = 0;
+  virtual void SetResultBuffer(std::vector<char>* out_buffer) = 0;
+
+  /// \brief Specifies the buffer for receiving the response body.
+  ///
+  /// This method should be used when a caller knows the upper bound of the
+  /// size of the response data.  The caller provides a pre-allocated buffer
+  /// and its size. After the Send() method is called, the
+  /// GetResultBufferDirectBytesTransferred() method may be used to learn to the
+  /// number of bytes that were transferred using this method.
+  virtual void SetResultBufferDirect(char* buffer, size_t size) = 0;
+
+  /// \brief Returns the number of bytes transferred, when using
+  /// SetResultBufferDirect(). This method may only be used when using
+  /// SetResultBufferDirect().
+  virtual size_t GetResultBufferDirectBytesTransferred() = 0;
 
   /// \brief Returns the response headers of a completed request.
   ///
@@ -118,6 +130,16 @@ class HttpRequest {
   // Url encodes str and returns a new string.
   virtual string EscapeString(const string& str) = 0;
 
+  /// \brief Set timeouts for this request.
+  ///
+  /// The connection parameter controls how long we should wait for the
+  /// connection to be established. The inactivity parameter controls how long
+  /// we should wait between additional responses from the server. Finally the
+  /// total parameter controls the maximum total connection time to prevent
+  /// hanging indefinitely.
+  virtual void SetTimeouts(uint32 connection, uint32 inactivity,
+                           uint32 total) = 0;
+
   TF_DISALLOW_COPY_AND_ASSIGN(HttpRequest);
 };
 
diff --git a/tensorflow/core/platform/cloud/http_request_fake.h b/tensorflow/core/platform/cloud/http_request_fake.h
index bfe04f6363b6cde227f73333f2351b550be1dde1..682b97f6ec6d697bef2ef6301a39be35c95c5861 100644
--- a/tensorflow/core/platform/cloud/http_request_fake.h
+++ b/tensorflow/core/platform/cloud/http_request_fake.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PLATFORM_HTTP_REQUEST_FAKE_H_
 #define TENSORFLOW_CORE_PLATFORM_HTTP_REQUEST_FAKE_H_
 
+#include <algorithm>
 #include <fstream>
 #include <string>
 #include <vector>
@@ -37,7 +38,8 @@ class FakeHttpRequest : public CurlHttpRequest {
  public:
   /// Return the response for the given request.
   FakeHttpRequest(const string& request, const string& response)
-      : FakeHttpRequest(request, response, Status::OK(), nullptr, {}, 200) {}
+      : FakeHttpRequest(request, response, Status::OK(), nullptr, {}, 200) {
+  }
 
   /// Return the response with headers for the given request.
   FakeHttpRequest(const string& request, const string& response,
@@ -74,27 +76,19 @@ class FakeHttpRequest : public CurlHttpRequest {
         response_headers_(response_headers),
         response_code_(response_code) {}
 
-  Status Init() override { return Status::OK(); }
-  Status SetUri(const string& uri) override {
-    actual_request_ += "Uri: " + uri + "\n";
-    return Status::OK();
+  void SetUri(const string& uri) override {
+    actual_uri_ += "Uri: " + uri + "\n";
   }
-  Status SetRange(uint64 start, uint64 end) override {
+  void SetRange(uint64 start, uint64 end) override {
     actual_request_ += strings::StrCat("Range: ", start, "-", end, "\n");
-    return Status::OK();
   }
-  Status AddHeader(const string& name, const string& value) override {
+  void AddHeader(const string& name, const string& value) override {
     actual_request_ += "Header " + name + ": " + value + "\n";
-    return Status::OK();
   }
-  Status AddAuthBearerHeader(const string& auth_token) override {
+  void AddAuthBearerHeader(const string& auth_token) override {
     actual_request_ += "Auth Token: " + auth_token + "\n";
-    return Status::OK();
-  }
-  Status SetDeleteRequest() override {
-    actual_request_ += "Delete: yes\n";
-    return Status::OK();
   }
+  void SetDeleteRequest() override { actual_request_ += "Delete: yes\n"; }
   Status SetPutFromFile(const string& body_filepath, size_t offset) override {
     std::ifstream stream(body_filepath);
     const string& content = string(std::istreambuf_iterator<char>(stream),
@@ -103,37 +97,44 @@ class FakeHttpRequest : public CurlHttpRequest {
     actual_request_ += "Put body: " + content + "\n";
     return Status::OK();
   }
-  Status SetPostFromBuffer(const char* buffer, size_t size) override {
+  void SetPostFromBuffer(const char* buffer, size_t size) override {
     if (captured_post_body_) {
       *captured_post_body_ = string(buffer, size);
     } else {
       actual_request_ +=
           strings::StrCat("Post body: ", StringPiece(buffer, size), "\n");
     }
-    return Status::OK();
-  }
-  Status SetPutEmptyBody() override {
-    actual_request_ += "Put: yes\n";
-    return Status::OK();
   }
-  Status SetPostEmptyBody() override {
+  void SetPutEmptyBody() override { actual_request_ += "Put: yes\n"; }
+  void SetPostEmptyBody() override {
     if (captured_post_body_) {
       *captured_post_body_ = "<empty>";
     } else {
       actual_request_ += "Post: yes\n";
     }
-    return Status::OK();
   }
-  Status SetResultBuffer(std::vector<char>* buffer) override {
+  void SetResultBuffer(std::vector<char>* buffer) override {
     buffer->clear();
     buffer_ = buffer;
-    return Status::OK();
+  }
+  void SetResultBufferDirect(char* buffer, size_t size) override {
+    direct_result_buffer_ = buffer;
+    direct_result_buffer_size_ = size;
+  }
+  size_t GetResultBufferDirectBytesTransferred() override {
+    return direct_result_bytes_transferred_;
   }
   Status Send() override {
-    EXPECT_EQ(expected_request_, actual_request_) << "Unexpected HTTP request.";
+    EXPECT_EQ(expected_request_, actual_request())
+        << "Unexpected HTTP request.";
     if (buffer_) {
-      buffer_->insert(buffer_->begin(), response_.c_str(),
-                      response_.c_str() + response_.size());
+      buffer_->insert(buffer_->begin(), response_.data(),
+                      response_.data() + response_.size());
+    } else if (direct_result_buffer_ != nullptr) {
+      size_t bytes_to_copy =
+          std::min<size_t>(direct_result_buffer_size_, response_.size());
+      memcpy(direct_result_buffer_, response_.data(), bytes_to_copy);
+      direct_result_bytes_transferred_ += bytes_to_copy;
     }
     return response_status_;
   }
@@ -160,9 +161,26 @@ class FakeHttpRequest : public CurlHttpRequest {
 
   virtual uint64 GetResponseCode() const override { return response_code_; }
 
+  void SetTimeouts(uint32 connection, uint32 inactivity,
+                   uint32 total) override {
+    actual_request_ += strings::StrCat("Timeouts: ", connection, " ",
+                                       inactivity, " ", total, "\n");
+  }
+
  private:
+  string actual_request() const {
+    string s;
+    s.append(actual_uri_);
+    s.append(actual_request_);
+    return s;
+  }
+
   std::vector<char>* buffer_ = nullptr;
+  char* direct_result_buffer_ = nullptr;
+  size_t direct_result_buffer_size_ = 0;
+  size_t direct_result_bytes_transferred_ = 0;
   string expected_request_;
+  string actual_uri_;
   string actual_request_;
   string response_;
   Status response_status_;
diff --git a/tensorflow/core/platform/cloud/oauth_client.cc b/tensorflow/core/platform/cloud/oauth_client.cc
index c700b97dc95f85400f9a8c214ea1ccc2b1a3e436..06849f9093099b23c8e60350fe5cd9d8282a2836 100644
--- a/tensorflow/core/platform/cloud/oauth_client.cc
+++ b/tensorflow/core/platform/cloud/oauth_client.cc
@@ -14,9 +14,13 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/platform/cloud/oauth_client.h"
+#ifndef _WIN32
 #include <pwd.h>
 #include <sys/types.h>
 #include <unistd.h>
+#else
+#include <sys/types.h>
+#endif
 #include <fstream>
 #include <openssl/bio.h>
 #include <openssl/evp.h>
@@ -212,11 +216,9 @@ Status OAuthClient::GetTokenFromServiceAccountJson(
   // Send the request to the Google OAuth 2.0 server to get the token.
   std::unique_ptr<HttpRequest> request(http_request_factory_->Create());
   std::vector<char> response_buffer;
-  TF_RETURN_IF_ERROR(request->Init());
-  TF_RETURN_IF_ERROR(request->SetUri(oauth_server_uri.ToString()));
-  TF_RETURN_IF_ERROR(
-      request->SetPostFromBuffer(request_body.c_str(), request_body.size()));
-  TF_RETURN_IF_ERROR(request->SetResultBuffer(&response_buffer));
+  request->SetUri(oauth_server_uri.ToString());
+  request->SetPostFromBuffer(request_body.c_str(), request_body.size());
+  request->SetResultBuffer(&response_buffer);
   TF_RETURN_IF_ERROR(request->Send());
 
   StringPiece response =
@@ -246,11 +248,9 @@ Status OAuthClient::GetTokenFromRefreshTokenJson(
 
   std::unique_ptr<HttpRequest> request(http_request_factory_->Create());
   std::vector<char> response_buffer;
-  TF_RETURN_IF_ERROR(request->Init());
-  TF_RETURN_IF_ERROR(request->SetUri(oauth_server_uri.ToString()));
-  TF_RETURN_IF_ERROR(
-      request->SetPostFromBuffer(request_body.c_str(), request_body.size()));
-  TF_RETURN_IF_ERROR(request->SetResultBuffer(&response_buffer));
+  request->SetUri(oauth_server_uri.ToString());
+  request->SetPostFromBuffer(request_body.c_str(), request_body.size());
+  request->SetResultBuffer(&response_buffer);
   TF_RETURN_IF_ERROR(request->Send());
 
   StringPiece response =
diff --git a/tensorflow/core/platform/cloud/time_util.cc b/tensorflow/core/platform/cloud/time_util.cc
index 2f8643f3c7f39c53566d481c078d8f71b44bbedd..0587a65c299778b95ccdec86e03c9f5dca8ec878 100644
--- a/tensorflow/core/platform/cloud/time_util.cc
+++ b/tensorflow/core/platform/cloud/time_util.cc
@@ -18,6 +18,9 @@ limitations under the License.
 #include <cmath>
 #include <cstdio>
 #include <ctime>
+#ifdef _WIN32
+#define timegm _mkgmtime
+#endif
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl
index 0f8cf8f122355651b8793366677de6b7fc9584aa..e9c510c93c67a338df67c0882aef0fcf6ef5e393 100644
--- a/tensorflow/core/platform/default/build_config.bzl
+++ b/tensorflow/core/platform/default/build_config.bzl
@@ -3,6 +3,7 @@
 load("@protobuf_archive//:protobuf.bzl", "proto_gen")
 load("@protobuf_archive//:protobuf.bzl", "py_proto_library")
 load("//tensorflow:tensorflow.bzl", "if_not_mobile")
+load("//tensorflow:tensorflow.bzl", "if_windows")
 load("//tensorflow:tensorflow.bzl", "if_not_windows")
 load("//tensorflow/core:platform/default/build_config_root.bzl", "if_static")
 load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
@@ -66,16 +67,14 @@ def pyx_library(
       pxd_srcs.append(src)
 
   # Invoke cython to produce the shared object libraries.
-  cpp_outs = [src.split(".")[0] + ".cpp" for src in pyx_srcs]
-  native.genrule(
-      name = name + "_cython_translation",
-      srcs = pyx_srcs,
-      outs = cpp_outs,
-      cmd = ("PYTHONHASHSEED=0 $(location @cython//:cython_binary) --cplus $(SRCS)"
-             # Rename outputs to expected location.
-             + """ && python -c 'import shutil, sys; n = len(sys.argv); [shutil.copyfile(src.split(".")[0] + ".cpp", dst) for src, dst in zip(sys.argv[1:], sys.argv[1+n//2:])]' $(SRCS) $(OUTS)"""),
-      tools = ["@cython//:cython_binary"] + pxd_srcs,
-  )
+  for filename in pyx_srcs:
+    native.genrule(
+        name = filename + "_cython_translation",
+        srcs = [filename],
+        outs = [filename.split(".")[0] + ".cpp"],
+        cmd = "PYTHONHASHSEED=0 $(location @cython//:cython_binary) --cplus $(SRCS) --output-file $(OUTS)",
+        tools = ["@cython//:cython_binary"] + pxd_srcs,
+    )
 
   shared_objects = []
   for src in pyx_srcs:
@@ -358,7 +357,9 @@ def tf_additional_proto_hdrs():
       "platform/default/integral_types.h",
       "platform/default/logging.h",
       "platform/default/protobuf.h"
-  ]
+  ] + if_windows([
+      "platform/windows/integral_types.h",
+  ])
 
 def tf_additional_proto_srcs():
   return [
@@ -458,7 +459,6 @@ def tf_additional_lib_deps():
 
 def tf_additional_core_deps():
   return select({
-      "//tensorflow:with_gcp_support_windows_override": [],
       "//tensorflow:with_gcp_support_android_override": [],
       "//tensorflow:with_gcp_support_ios_override": [],
       "//tensorflow:with_gcp_support": [
@@ -510,6 +510,7 @@ def tf_additional_cloud_kernel_deps():
 def tf_lib_proto_parsing_deps():
   return [
       ":protos_all_cc",
+      "//third_party/eigen3",
       "//tensorflow/core/platform/default/build_config:proto_parsing",
   ]
 
@@ -531,6 +532,9 @@ def tf_additional_gdr_lib_defines():
       "//conditions:default": [],
   })
 
+def tf_py_clif_cc(name, visibility=None, **kwargs):
+  pass
+
 def tf_pyclif_proto_library(name, proto_lib, proto_srcfile="", visibility=None,
                             **kwargs):
   pass
diff --git a/tensorflow/core/platform/default/build_config_root.bzl b/tensorflow/core/platform/default/build_config_root.bzl
index 6e98f12114ec6bf715ca8ddcc02dbe8ff8aa8812..09029a4b256beceeb69c735c15bb1587cb1e06ac 100644
--- a/tensorflow/core/platform/default/build_config_root.bzl
+++ b/tensorflow/core/platform/default/build_config_root.bzl
@@ -19,6 +19,9 @@ def tf_additional_plugin_deps():
 def tf_additional_xla_deps_py():
   return []
 
+def tf_additional_grpc_deps_py():
+  return []
+
 def tf_additional_license_deps():
   return select({
       str(Label("//tensorflow:with_xla_support")): ["@llvm//:LICENSE.TXT"],
diff --git a/tensorflow/core/platform/default/logging.cc b/tensorflow/core/platform/default/logging.cc
index ebdd4b624aa423983cdeb2d31c0bf27ff30c89e2..82bd69f9ca46eb1b8dd586d18ed852a2e8c5084e 100644
--- a/tensorflow/core/platform/default/logging.cc
+++ b/tensorflow/core/platform/default/logging.cc
@@ -114,6 +114,8 @@ int64 LogLevelStrToInt(const char* tf_env_var_val) {
   return level;
 }
 
+}  // namespace
+
 int64 MinLogLevelFromEnv() {
   const char* tf_env_var_val = getenv("TF_CPP_MIN_LOG_LEVEL");
   return LogLevelStrToInt(tf_env_var_val);
@@ -124,8 +126,6 @@ int64 MinVLogLevelFromEnv() {
   return LogLevelStrToInt(tf_env_var_val);
 }
 
-}  // namespace
-
 LogMessage::~LogMessage() {
   // Read the min log level once during the first call to logging.
   static int64 min_log_level = MinLogLevelFromEnv();
diff --git a/tensorflow/core/platform/default/logging.h b/tensorflow/core/platform/default/logging.h
index d5f7350cdd805eb71edab0fde72db8383c32addb..40c260f236613e533e30dc006e77b02f393bdd48 100644
--- a/tensorflow/core/platform/default/logging.h
+++ b/tensorflow/core/platform/default/logging.h
@@ -305,6 +305,10 @@ T&& CheckNotNull(const char* file, int line, const char* exprtext, T&& t) {
   return std::forward<T>(t);
 }
 
+int64 MinLogLevelFromEnv();
+
+int64 MinVLogLevelFromEnv();
+
 }  // namespace internal
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/platform/default/mutex.h b/tensorflow/core/platform/default/mutex.h
index c3e44c42d942326af210e1038da20bf655d14a10..044c754e80bd0dee04c73e969c325a2aa4a89c31 100644
--- a/tensorflow/core/platform/default/mutex.h
+++ b/tensorflow/core/platform/default/mutex.h
@@ -31,6 +31,8 @@ namespace tensorflow {
 
 enum LinkerInitialized { LINKER_INITIALIZED };
 
+class condition_variable;
+
 // Mimic std::mutex + C++17's shared_mutex, adding a LinkerInitialized
 // constructor interface.  This type is as fast as mutex, but is also a shared
 // lock.
diff --git a/tensorflow/core/platform/default/stacktrace.h b/tensorflow/core/platform/default/stacktrace.h
index 5f3073262ab9d86b3ee922195f1b5bf28d47414e..c8e297fa8d8c1ee48b060e6e2c7ee89eb0d23b39 100644
--- a/tensorflow/core/platform/default/stacktrace.h
+++ b/tensorflow/core/platform/default/stacktrace.h
@@ -17,12 +17,63 @@ limitations under the License.
 #define TENSORFLOW_CORE_PLATFORM_DEFAULT_STACKTRACE_H_
 
 #include "tensorflow/core/platform/platform.h"
+#if !defined(IS_MOBILE_PLATFORM) && defined(PLATFORM_POSIX) && \
+    (defined(__clang__) || defined(__GNUC__))
+#define TF_GENERATE_BACKTRACE
+#endif
+
+#if defined(TF_GENERATE_BACKTRACE)
+#include <dlfcn.h>
+#include <execinfo.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#endif  // defined(TF_GENERATE_BACKTRACE)
+
+#include <sstream>
+#include <string>
+#include "tensorflow/core/platform/abi.h"
 
 namespace tensorflow {
 
-inline string CurrentStackTrace() { return "No stack trace available"; }
-
-inline void DebugWriteToString(const char* data, void* arg) {}
+// Function to create a pretty stacktrace.
+inline std::string CurrentStackTrace() {
+#if defined(TF_GENERATE_BACKTRACE)
+  std::stringstream ss("");
+  ss << "*** Begin stack trace ***" << std::endl;
+
+  // Get the mangled stack trace.
+  int buffer_size = 128;
+  void* trace[128];
+  buffer_size = backtrace(trace, buffer_size);
+
+  for (int i = 0; i < buffer_size; ++i) {
+    const char* symbol = "";
+    Dl_info info;
+    if (dladdr(trace[i], &info)) {
+      if (info.dli_sname != nullptr) {
+        symbol = info.dli_sname;
+      }
+    }
+
+    std::string demangled = tensorflow::port::MaybeAbiDemangle(symbol);
+    if (demangled.length()) {
+      ss << "\t" << demangled << std::endl;
+    } else {
+      ss << "\t" << symbol << std::endl;
+    }
+  }
+
+  ss << "*** End stack trace ***" << std::endl;
+  return ss.str();
+#else
+  return std::string();
+#endif  // defined(TF_GENERATE_BACKTRACE)
+}
+
+inline void DebugWriteToString(const char* data, void* arg) {
+  reinterpret_cast<std::string*>(arg)->append(data);
+}
 
 // A dummy class that does nothing.  Someday, add real support.
 class SavedStackTrace {
diff --git a/tensorflow/core/platform/env.cc b/tensorflow/core/platform/env.cc
index 12ef55ec26e3355f08235cce557b9c7ae0618f04..1bcca1243fb636b6cd75f2ec796f1f6c7ac364bb 100644
--- a/tensorflow/core/platform/env.cc
+++ b/tensorflow/core/platform/env.cc
@@ -20,6 +20,10 @@ limitations under the License.
 #if defined(__APPLE__)
 #include <mach-o/dyld.h>
 #endif
+#if defined(__FreeBSD__)
+#include <sys/sysctl.h>
+#include <sys/types.h>
+#endif
 #if defined(PLATFORM_WINDOWS)
 #include <windows.h>
 #include "tensorflow/core/platform/windows/windows_file_system.h"
@@ -88,8 +92,12 @@ Status Env::GetFileSystemForFile(const string& fname, FileSystem** result) {
   io::ParseURI(fname, &scheme, &host, &path);
   FileSystem* file_system = file_system_registry_->Lookup(scheme.ToString());
   if (!file_system) {
-    return errors::Unimplemented("File system scheme ", scheme,
-                                 " not implemented");
+    if (scheme.empty()) {
+      scheme = "[local]";
+    }
+
+    return errors::Unimplemented("File system scheme '", scheme,
+                                 "' not implemented (file: '", fname, "')");
   }
   *result = file_system;
   return Status::OK();
@@ -104,6 +112,18 @@ Status Env::RegisterFileSystem(const string& scheme,
   return file_system_registry_->Register(scheme, std::move(factory));
 }
 
+Status Env::FlushFileSystemCaches() {
+  std::vector<string> schemes;
+  TF_RETURN_IF_ERROR(GetRegisteredFileSystemSchemes(&schemes));
+  for (const string& scheme : schemes) {
+    FileSystem* fs = nullptr;
+    TF_RETURN_IF_ERROR(
+        GetFileSystemForFile(io::CreateURI(scheme, "", ""), &fs));
+    fs->FlushCaches();
+  }
+  return Status::OK();
+}
+
 Status Env::NewRandomAccessFile(const string& fname,
                                 std::unique_ptr<RandomAccessFile>* result) {
   FileSystem* fs;
@@ -157,8 +177,8 @@ bool Env::FilesExist(const std::vector<string>& files,
     if (!file_system) {
       fs_result = false;
       if (fs_status) {
-        Status s = errors::Unimplemented("File system scheme ", itr.first,
-                                         " not implemented");
+        Status s = errors::Unimplemented("File system scheme '", itr.first,
+                                         "' not implemented");
         local_status.resize(itr.second.size(), s);
       }
     } else {
@@ -266,6 +286,14 @@ string Env::GetExecutablePath() {
   char unresolved_path[buffer_size];
   _NSGetExecutablePath(unresolved_path, &buffer_size);
   CHECK(realpath(unresolved_path, exe_path));
+#elif defined(__FreeBSD__)
+  int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
+  size_t exe_path_size = PATH_MAX;
+
+  if (sysctl(mib, 4, exe_path, &exe_path_size, NULL, 0) != 0) {
+    // Resolution of path failed
+    return "";
+  }
 #elif defined(PLATFORM_WINDOWS)
   HMODULE hModule = GetModuleHandleW(NULL);
   WCHAR wc_file_path[MAX_PATH] = {0};
@@ -288,30 +316,47 @@ bool Env::LocalTempFilename(string* filename) {
   // Try each directory, as they might be full, have inappropriate
   // permissions or have different problems at times.
   for (const string& dir : dirs) {
+    *filename = io::JoinPath(dir, "tempfile-");
+    if (CreateUniqueFileName(filename, "")) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool Env::CreateUniqueFileName(string* prefix, const string& suffix) {
 #ifdef __APPLE__
-    uint64_t tid64;
-    pthread_threadid_np(nullptr, &tid64);
-    int32 tid = static_cast<int32>(tid64);
-    int32 pid = static_cast<int32>(getpid());
+  uint64_t tid64;
+  pthread_threadid_np(nullptr, &tid64);
+  int32 tid = static_cast<int32>(tid64);
+  int32 pid = static_cast<int32>(getpid());
+#elif defined(__FreeBSD__)
+  // Has to be casted to long first, else this error appears:
+  // static_cast from 'pthread_t' (aka 'pthread *') to 'int32' (aka 'int')
+  // is not allowed
+  int32 tid = static_cast<int32>(static_cast<int64>(pthread_self()));
+  int32 pid = static_cast<int32>(getpid());
 #elif defined(PLATFORM_WINDOWS)
-    int32 tid = static_cast<int32>(GetCurrentThreadId());
-    int32 pid = static_cast<int32>(GetCurrentProcessId());
+  int32 tid = static_cast<int32>(GetCurrentThreadId());
+  int32 pid = static_cast<int32>(GetCurrentProcessId());
 #else
-    int32 tid = static_cast<int32>(pthread_self());
-    int32 pid = static_cast<int32>(getpid());
+  int32 tid = static_cast<int32>(pthread_self());
+  int32 pid = static_cast<int32>(getpid());
 #endif
-    uint64 now_microsec = NowMicros();
+  uint64 now_microsec = NowMicros();
 
-    *filename = io::JoinPath(
-        dir, strings::Printf("tempfile-%s-%x-%d-%llx", port::Hostname().c_str(),
-                             tid, pid, now_microsec));
-    if (FileExists(*filename).ok()) {
-      filename->clear();
-    } else {
-      return true;
-    }
+  *prefix += strings::Printf("%s-%x-%d-%llx", port::Hostname().c_str(), tid,
+                             pid, now_microsec);
+
+  if (!suffix.empty()) {
+    *prefix += suffix;
+  }
+  if (FileExists(*prefix).ok()) {
+    prefix->clear();
+    return false;
+  } else {
+    return true;
   }
-  return false;
 }
 
 Thread::~Thread() {}
diff --git a/tensorflow/core/platform/env.h b/tensorflow/core/platform/env.h
index da8c3e2d7e8a50c9d441cd371078fa86aae13179..557bfa87e50a85a6f9de86548931ea215d8ac7ff 100644
--- a/tensorflow/core/platform/env.h
+++ b/tensorflow/core/platform/env.h
@@ -68,10 +68,13 @@ class Env {
   /// \brief Returns the file system schemes registered for this Env.
   virtual Status GetRegisteredFileSystemSchemes(std::vector<string>* schemes);
 
-  // \brief Register a file system for a scheme.
+  /// \brief Register a file system for a scheme.
   virtual Status RegisterFileSystem(const string& scheme,
                                     FileSystemRegistry::Factory factory);
 
+  /// \brief Flush filesystem caches for all registered filesystems.
+  Status FlushFileSystemCaches();
+
   /// \brief Creates a brand new random access read-only file with the
   /// specified name.
 
@@ -218,6 +221,10 @@ class Env {
   /// Creates a local unique temporary file name. Returns true if success.
   bool LocalTempFilename(string* filename);
 
+  /// Creates a local unique file name that starts with |prefix| and ends with
+  /// |suffix|. Returns true if success.
+  bool CreateUniqueFileName(string* prefix, const string& suffix);
+
   // TODO(jeff,sanjay): Add back thread/thread-pool support if needed.
   // TODO(jeff,sanjay): if needed, tighten spec so relative to epoch, or
   // provide a routine to get the absolute time.
diff --git a/tensorflow/core/platform/env_test.cc b/tensorflow/core/platform/env_test.cc
index c9b362f18235f8ddec0994bc1110aaec950eef72..47ddf0ccb93e827d410e87050d6802747fb84fbf 100644
--- a/tensorflow/core/platform/env_test.cc
+++ b/tensorflow/core/platform/env_test.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -280,6 +281,15 @@ class TmpDirFileSystem : public NullFileSystem {
     StringPiece scheme, host, path;
     io::ParseURI(dir, &scheme, &host, &path);
     if (path.empty()) return errors::NotFound(dir, " not found");
+    // The special "flushed" file exists only if the filesystem's caches have
+    // been flushed.
+    if (path == "/flushed") {
+      if (flushed_) {
+        return Status::OK();
+      } else {
+        return errors::NotFound("FlushCaches() not called yet");
+      }
+    }
     return Env::Default()->FileExists(io::JoinPath(BaseDir(), path));
   }
 
@@ -294,10 +304,23 @@ class TmpDirFileSystem : public NullFileSystem {
     }
     return Env::Default()->CreateDir(io::JoinPath(BaseDir(), path));
   }
+
+  void FlushCaches() override { flushed_ = true; }
+
+ private:
+  bool flushed_ = false;
 };
 
 REGISTER_FILE_SYSTEM("tmpdirfs", TmpDirFileSystem);
 
+TEST_F(DefaultEnvTest, FlushFileSystemCaches) {
+  Env* env = Env::Default();
+  const string flushed = "tmpdirfs://testhost/flushed";
+  EXPECT_EQ(error::Code::NOT_FOUND, env->FileExists(flushed).code());
+  TF_EXPECT_OK(env->FlushFileSystemCaches());
+  TF_EXPECT_OK(env->FileExists(flushed));
+}
+
 TEST_F(DefaultEnvTest, RecursivelyCreateDirWithUri) {
   Env* env = Env::Default();
   const string create_path = "tmpdirfs://testhost/a/b/c/d";
@@ -340,4 +363,18 @@ TEST_F(DefaultEnvTest, LocalTempFilename) {
   EXPECT_FALSE(env->FileExists(filename).ok());
 }
 
+TEST_F(DefaultEnvTest, CreateUniqueFileName) {
+  Env* env = Env::Default();
+
+  string prefix = "tempfile-prefix-";
+  string suffix = ".tmp";
+  string filename = prefix;
+
+  EXPECT_TRUE(env->CreateUniqueFileName(&filename, suffix));
+
+  StringPiece str(filename);
+  EXPECT_TRUE(str.starts_with(prefix));
+  EXPECT_TRUE(str.ends_with(suffix));
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/file_system.cc b/tensorflow/core/platform/file_system.cc
index 938f5af487ab05182cea30996f19c1e40ab1b535..14755891fa2d3b916396c75c9647acafe66ec524 100644
--- a/tensorflow/core/platform/file_system.cc
+++ b/tensorflow/core/platform/file_system.cc
@@ -73,6 +73,8 @@ Status FileSystem::IsDirectory(const string& name) {
   return Status(tensorflow::error::FAILED_PRECONDITION, "Not a directory");
 }
 
+void FileSystem::FlushCaches() {}
+
 RandomAccessFile::~RandomAccessFile() {}
 
 WritableFile::~WritableFile() {}
diff --git a/tensorflow/core/platform/file_system.h b/tensorflow/core/platform/file_system.h
index 903df96b58a7304f04e618613c7fa9561fe798a2..d32efcea0967eea321d512f4d0f3218128f3d59b 100644
--- a/tensorflow/core/platform/file_system.h
+++ b/tensorflow/core/platform/file_system.h
@@ -206,6 +206,9 @@ class FileSystem {
   ///  * UNIMPLEMENTED - The file factory doesn't support directories.
   virtual Status IsDirectory(const string& fname);
 
+  /// \brief Flushes any cached filesystem objects from memory.
+  virtual void FlushCaches();
+
   FileSystem() {}
 
   virtual ~FileSystem();
diff --git a/tensorflow/core/platform/macros.h b/tensorflow/core/platform/macros.h
index 47523c7d2b09275be3747e684df1b656534ed6ea..6119edfd5a63d1aa4e81bb91d95736ed2835c478 100644
--- a/tensorflow/core/platform/macros.h
+++ b/tensorflow/core/platform/macros.h
@@ -93,7 +93,8 @@ limitations under the License.
   ((sizeof(a) / sizeof(*(a))) / \
    static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
 
-#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L
+#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L || \
+    (defined(_MSC_VER) && _MSC_VER >= 1900)
 // Define this to 1 if the code is compiled in C++11 mode; leave it
 // undefined otherwise.  Do NOT define it to 0 -- that causes
 // '#ifdef LANG_CXX11' to behave differently from '#if LANG_CXX11'.
diff --git a/tensorflow/core/platform/posix/env.cc b/tensorflow/core/platform/posix/env.cc
index ba3c4e709078adf8c60cf49ab06c7194cf887cc1..8097624e09f81364071895ad114f26f93f4aab14 100644
--- a/tensorflow/core/platform/posix/env.cc
+++ b/tensorflow/core/platform/posix/env.cc
@@ -136,15 +136,19 @@ void Env::GetLocalTempDirectories(std::vector<string>* list) {
   // Directories, in order of preference. If we find a dir that
   // exists, we stop adding other less-preferred dirs
   const char* candidates[] = {
-      // Non-null only during unittest/regtest
-      getenv("TEST_TMPDIR"),
+    // Non-null only during unittest/regtest
+    getenv("TEST_TMPDIR"),
 
-      // Explicitly-supplied temp dirs
-      getenv("TMPDIR"),
-      getenv("TMP"),
+    // Explicitly-supplied temp dirs
+    getenv("TMPDIR"),
+    getenv("TMP"),
 
-      // If all else fails
-      "/tmp",
+#if defined(__ANDROID__)
+    "/data/local/tmp",
+#endif
+
+    // If all else fails
+    "/tmp",
   };
 
   for (const char* d : candidates) {
diff --git a/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc b/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc
index fb1955edde2abfd3fe5267e1319ea128138ee092..12dc9c58b38d01f6efc5644193fbf38b0e70c8d1 100644
--- a/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc
+++ b/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc
@@ -118,9 +118,10 @@ int64 AndroidArmV7ACpuUtilsHelper::ReadCpuFrequencyFile(
   const int retval = fscanf(fp, "%lld", &freq_in_khz);
   if (retval < 0) {
     LOG(WARNING) << "Failed to \"" << file_path << "\"";
+    fclose(fp);
     return INVALID_CPU_FREQUENCY;
   }
-  pclose(fp);
+  fclose(fp);
   return freq_in_khz * 1000;  // The file contains cpu frequency in khz
 }
 
diff --git a/tensorflow/core/platform/s3/BUILD b/tensorflow/core/platform/s3/BUILD
index b7bc1a11d6583787e2c0fb07d004dc2badc5bcca..2cd5f877c9fcc998b6a727e3ae0a92f17a233c9f 100644
--- a/tensorflow/core/platform/s3/BUILD
+++ b/tensorflow/core/platform/s3/BUILD
@@ -28,6 +28,8 @@ filegroup(
 tf_cc_binary(
     name = "s3_file_system.so",
     srcs = [
+        "aws_logging.cc",
+        "aws_logging.h",
         "s3_crypto.cc",
         "s3_crypto.h",
         "s3_file_system.cc",
@@ -66,6 +68,22 @@ cc_library(
     alwayslink = 1,
 )
 
+cc_library(
+    name = "aws_logging",
+    srcs = [
+        "aws_logging.cc",
+    ],
+    hdrs = [
+        "aws_logging.h",
+    ],
+    deps = [
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "@aws//:aws",
+    ],
+    alwayslink = 1,
+)
+
 cc_library(
     name = "s3_file_system",
     srcs = [
@@ -75,6 +93,7 @@ cc_library(
         "s3_file_system.h",
     ],
     deps = [
+        ":aws_logging",
         ":s3_crypto",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
diff --git a/tensorflow/core/platform/s3/aws_logging.cc b/tensorflow/core/platform/s3/aws_logging.cc
new file mode 100644
index 0000000000000000000000000000000000000000..fbca0acc36b01fa91dece4bdd0d19b7059dc114e
--- /dev/null
+++ b/tensorflow/core/platform/s3/aws_logging.cc
@@ -0,0 +1,122 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/platform/s3/aws_logging.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/mutex.h"
+
+#include <aws/core/Aws.h>
+#include <aws/core/utils/logging/AWSLogging.h>
+#include <aws/core/utils/logging/LogSystemInterface.h>
+
+#include <cstdarg>
+
+namespace tensorflow {
+
+AWSLogSystem::AWSLogSystem(Aws::Utils::Logging::LogLevel log_level)
+    : log_level_(log_level) {}
+
+void AWSLogSystem::Log(Aws::Utils::Logging::LogLevel log_level, const char* tag,
+                       const char* format, ...) {
+  std::va_list args;
+  va_start(args, format);
+
+  const string s = strings::Printf(format, args);
+
+  va_end(args);
+
+  LogMessage(log_level, s);
+}
+
+void AWSLogSystem::LogStream(Aws::Utils::Logging::LogLevel log_level,
+                             const char* tag,
+                             const Aws::OStringStream& message_stream) {
+  LogMessage(log_level, message_stream.rdbuf()->str().c_str());
+}
+
+void AWSLogSystem::LogMessage(Aws::Utils::Logging::LogLevel log_level,
+                              const std::string& message) {
+  if (message == "Initializing Curl library") return;
+  switch (log_level) {
+    case Aws::Utils::Logging::LogLevel::Info:
+      LOG(INFO) << message;
+      break;
+    case Aws::Utils::Logging::LogLevel::Warn:
+      LOG(WARNING) << message;
+      break;
+    case Aws::Utils::Logging::LogLevel::Error:
+      LOG(ERROR) << message;
+      break;
+    case Aws::Utils::Logging::LogLevel::Fatal:
+      LOG(FATAL) << message;
+      break;
+    default:
+      LOG(ERROR) << message;
+      break;
+  }
+}
+
+namespace {
+static const char* kAWSLoggingTag = "AWSLogging";
+
+Aws::Utils::Logging::LogLevel ParseLogLevelFromEnv() {
+  Aws::Utils::Logging::LogLevel log_level = Aws::Utils::Logging::LogLevel::Info;
+
+  const int64_t level = tensorflow::internal::MinLogLevelFromEnv();
+
+  switch (level) {
+    case INFO:
+      log_level = Aws::Utils::Logging::LogLevel::Info;
+      break;
+    case WARNING:
+      log_level = Aws::Utils::Logging::LogLevel::Warn;
+      break;
+    case ERROR:
+      log_level = Aws::Utils::Logging::LogLevel::Error;
+      break;
+    case FATAL:
+      log_level = Aws::Utils::Logging::LogLevel::Fatal;
+      break;
+    default:
+      log_level = Aws::Utils::Logging::LogLevel::Info;
+      break;
+  }
+
+  return log_level;
+}
+}
+
+static bool initialized = false;
+static mutex s3_logging_mutex(LINKER_INITIALIZED);
+void AWSLogSystem::InitializeAWSLogging() {
+  std::lock_guard<mutex> s3_logging_lock(s3_logging_mutex);
+  if (!initialized) {
+    Aws::Utils::Logging::InitializeAWSLogging(
+        Aws::MakeShared<AWSLogSystem>(kAWSLoggingTag, ParseLogLevelFromEnv()));
+    initialized = true;
+    return;
+  }
+}
+
+void AWSLogSystem::ShutdownAWSLogging() {
+  std::lock_guard<mutex> s3_logging_lock(s3_logging_mutex);
+  if (initialized) {
+    Aws::Utils::Logging::ShutdownAWSLogging();
+    initialized = false;
+    return;
+  }
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/platform/s3/aws_logging.h b/tensorflow/core/platform/s3/aws_logging.h
new file mode 100644
index 0000000000000000000000000000000000000000..b0da8f3c83524df682e65878f39a2f500aa64a6b
--- /dev/null
+++ b/tensorflow/core/platform/s3/aws_logging.h
@@ -0,0 +1,68 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_S3_S3_LOGGING_H_
+#define TENSORFLOW_CONTRIB_S3_S3_LOGGING_H_
+
+#include <atomic>
+#include <string>
+
+#include <aws/core/utils/logging/LogLevel.h>
+#include <aws/core/utils/logging/LogSystemInterface.h>
+#include "tensorflow/core/platform/default/logging.h"
+
+namespace tensorflow {
+
+class AWSLogSystem : public Aws::Utils::Logging::LogSystemInterface {
+ public:
+  static void InitializeAWSLogging();
+  static void ShutdownAWSLogging();
+
+  explicit AWSLogSystem(Aws::Utils::Logging::LogLevel log_level);
+  virtual ~AWSLogSystem() = default;
+
+  // Gets the currently configured log level.
+  virtual Aws::Utils::Logging::LogLevel GetLogLevel(void) const override {
+    return log_level_;
+  }
+
+  // Set a new log level. This has the immediate effect of changing the log.
+  void SetLogLevel(Aws::Utils::Logging::LogLevel log_level) {
+    log_level_.store(log_level);
+  }
+
+  // Does a printf style output to ProcessFormattedStatement. Don't use this,
+  // it's unsafe. See LogStream.
+  // Since non-static C++ methods have an implicit this argument,
+  // TF_PRINTF_ATTRIBUTE should be counted from two (vs. one).
+  virtual void Log(Aws::Utils::Logging::LogLevel log_level, const char* tag,
+                   const char* format, ...) override TF_PRINTF_ATTRIBUTE(4, 5);
+
+  // Writes the stream to ProcessFormattedStatement.
+  virtual void LogStream(Aws::Utils::Logging::LogLevel log_level,
+                         const char* tag,
+                         const Aws::OStringStream& messageStream) override;
+
+ private:
+  void LogMessage(Aws::Utils::Logging::LogLevel log_level,
+                  const string& message);
+  std::atomic<Aws::Utils::Logging::LogLevel> log_level_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(AWSLogSystem);
+};
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CONTRIB_S3_S3_LOGGING_H_
diff --git a/tensorflow/core/platform/s3/s3_file_system.cc b/tensorflow/core/platform/s3/s3_file_system.cc
index 234f3c3aed7f036892227dd2ba96a3e1393517b4..58ea3156701268b758f52383d123fd3a16d3fd86 100644
--- a/tensorflow/core/platform/s3/s3_file_system.cc
+++ b/tensorflow/core/platform/s3/s3_file_system.cc
@@ -12,13 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include "tensorflow/core/platform/s3/s3_file_system.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/mutex.h"
-#include "tensorflow/core/platform/s3/s3_file_system.h"
+#include "tensorflow/core/platform/s3/aws_logging.h"
 #include "tensorflow/core/platform/s3/s3_crypto.h"
 
 #include <aws/core/Aws.h>
 #include <aws/core/utils/FileSystemUtils.h>
+#include <aws/core/utils/logging/AWSLogging.h>
+#include <aws/core/utils/logging/LogSystemInterface.h>
 #include <aws/s3/S3Client.h>
 #include <aws/s3/S3Errors.h>
 #include <aws/s3/model/CopyObjectRequest.h>
@@ -33,6 +36,7 @@ limitations under the License.
 
 namespace tensorflow {
 
+namespace {
 static const char* kS3FileSystemAllocationTag = "S3FileSystemAllocation";
 static const size_t kS3ReadAppendableFileBufferSize = 1024 * 1024;
 static const int kS3GetChildrenMaxKeys = 100;
@@ -49,9 +53,15 @@ Aws::Client::ClientConfiguration& GetDefaultClientConfig() {
     if (endpoint) {
       cfg.endpointOverride = Aws::String(endpoint);
     }
-    const char* region = getenv("S3_REGION");
+    const char* region = getenv("AWS_REGION");
     if (region) {
       cfg.region = Aws::String(region);
+    } else {
+      // TODO (yongtang): `S3_REGION` should be deprecated after 2.0.
+      const char* region = getenv("S3_REGION");
+      if (region) {
+        cfg.region = Aws::String(region);
+      }
     }
     const char* use_https = getenv("S3_USE_HTTPS");
     if (use_https) {
@@ -69,6 +79,22 @@ Aws::Client::ClientConfiguration& GetDefaultClientConfig() {
         cfg.verifySSL = true;
       }
     }
+    const char* connect_timeout = getenv("S3_CONNECT_TIMEOUT_MSEC");
+    if (connect_timeout) {
+      int64 timeout;
+
+      if (strings::safe_strto64(connect_timeout, &timeout)) {
+        cfg.connectTimeoutMs = timeout;
+      }
+    }
+    const char* request_timeout = getenv("S3_REQUEST_TIMEOUT_MSEC");
+    if (request_timeout) {
+      int64 timeout;
+
+      if (strings::safe_strto64(request_timeout, &timeout)) {
+        cfg.requestTimeoutMs = timeout;
+      }
+    }
 
     init = true;
   }
@@ -220,7 +246,11 @@ class S3ReadOnlyMemoryRegion : public ReadOnlyMemoryRegion {
   uint64 length_;
 };
 
+}  // namespace
+
 S3FileSystem::S3FileSystem() {
+  AWSLogSystem::InitializeAWSLogging();
+
   Aws::SDKOptions options;
   options.cryptoOptions.sha256Factory_create_fn = []() {
     return Aws::MakeShared<S3SHA256Factory>(S3CryptoAllocationTag);
@@ -234,6 +264,8 @@ S3FileSystem::S3FileSystem() {
 S3FileSystem::~S3FileSystem() {
   Aws::SDKOptions options;
   Aws::ShutdownAPI(options);
+
+  AWSLogSystem::ShutdownAWSLogging();
 }
 
 Status S3FileSystem::NewRandomAccessFile(
diff --git a/tensorflow/core/platform/stacktrace_handler.cc b/tensorflow/core/platform/stacktrace_handler.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ff31c97be0a76b425503120c326a79f5a62d3377
--- /dev/null
+++ b/tensorflow/core/platform/stacktrace_handler.cc
@@ -0,0 +1,135 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/platform/platform.h"
+
+#if !defined(PLATFORM_GOOGLE) && !defined(IS_MOBILE_PLATFORM) && \
+    defined(PLATFORM_POSIX) && (defined(__clang__) || defined(__GNUC__))
+#define TF_GENERATE_STACKTRACE
+#endif
+
+#if defined(TF_GENERATE_STACKTRACE)
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <string>
+
+#include "tensorflow/core/platform/abi.h"
+#include "tensorflow/core/platform/stacktrace.h"
+
+#endif  // defined(TF_GENERATE_STACKTRACE)
+
+namespace tensorflow {
+namespace testing {
+
+#if defined(TF_GENERATE_STACKTRACE)
+// This function will print stacktrace to STDERR.
+// It avoids using malloc, so it makes sure to dump the stack even when the heap
+// is corrupted. However, it can dump mangled symbols.
+inline void SafePrintStackTrace() {
+  static const char begin_msg[] = "*** BEGIN MANGLED STACK TRACE ***\n";
+  (void)write(STDERR_FILENO, begin_msg, strlen(begin_msg));
+
+  int buffer_size = 128;
+  void *trace[128];
+  // Run backtrace to get the size of the stacktrace
+  buffer_size = backtrace(trace, buffer_size);
+
+  // Print a mangled stacktrace to STDERR as safely as possible.
+  backtrace_symbols_fd(trace, buffer_size, STDERR_FILENO);
+
+  static const char end_msg[] = "*** END MANGLED STACK TRACE ***\n\n";
+  (void)write(STDERR_FILENO, end_msg, strlen(end_msg));
+}
+
+static void StacktraceHandler(int sig, siginfo_t *si, void *v) {
+  // Make sure our handler does not deadlock. And this should be the last thing
+  // our program does. Therefore, set a timer to kill the program in 60
+  // seconds.
+  struct itimerval timer;
+  timer.it_value.tv_sec = 60;
+  timer.it_value.tv_usec = 0;
+  timer.it_interval.tv_sec = 0;
+  timer.it_interval.tv_usec = 0;
+  setitimer(ITIMER_REAL, &timer, 0);
+
+  struct sigaction sa_timeout;
+  memset(&sa_timeout, 0, sizeof(sa_timeout));
+  sa_timeout.sa_handler = SIG_DFL;
+  sigaction(SIGALRM, &sa_timeout, 0);
+
+  char buf[128];
+
+  snprintf(buf, sizeof(buf), "*** Received signal %d ***\n", sig);
+  (void)write(STDERR_FILENO, buf, strlen(buf));
+
+  // Print "a" stack trace, as safely as possible.
+  SafePrintStackTrace();
+
+  // Up until this line, we made sure not to allocate memory, to be able to dump
+  // a stack trace even in the event of heap corruption. After this line, we
+  // will try to print more human readable things to the terminal.
+  // But these have a higher probability to fail.
+  std::string stacktrace = CurrentStackTrace();
+  (void)write(STDERR_FILENO, stacktrace.c_str(), stacktrace.length());
+
+  // Abort the program.
+  struct sigaction sa;
+  sigemptyset(&sa.sa_mask);
+  sa.sa_flags = 0;
+  sa.sa_handler = SIG_DFL;
+  sigaction(SIGABRT, &sa, NULL);
+  abort();
+}
+
+void InstallStacktraceHandler() {
+  int handled_signals[] = {SIGSEGV, SIGABRT, SIGBUS, SIGILL, SIGFPE};
+
+  for (int i = 0; i < sizeof(handled_signals) / sizeof(int); i++) {
+    int sig = handled_signals[i];
+    struct sigaction sa;
+    struct sigaction osa;
+
+    sigemptyset(&sa.sa_mask);
+    sa.sa_flags = SA_SIGINFO | SA_RESETHAND;
+    sa.sa_sigaction = &StacktraceHandler;
+    if (sigaction(sig, &sa, &osa) != 0) {
+      char buf[128];
+      snprintf(buf, sizeof(buf),
+               "Warning, can't install backtrace signal handler for signal %d, "
+               "errno:%d \n",
+               sig, errno);
+      (void)write(STDERR_FILENO, buf, strlen(buf));
+    } else if (osa.sa_handler != SIG_DFL) {
+      char buf[128];
+      snprintf(buf, sizeof(buf),
+               "Warning, backtrace signal handler for signal %d overwrote "
+               "previous handler.\n",
+               sig);
+      (void)write(STDERR_FILENO, buf, strlen(buf));
+    }
+  }
+}
+
+#else
+void InstallStacktraceHandler() {}
+#endif  // defined(TF_GENERATE_STACKTRACE)
+
+}  // namespace testing
+}  // namespace tensorflow
diff --git a/tensorflow/core/platform/stacktrace_handler.h b/tensorflow/core/platform/stacktrace_handler.h
new file mode 100644
index 0000000000000000000000000000000000000000..d36c82c9ba893b4438c21662156291aa71df77ee
--- /dev/null
+++ b/tensorflow/core/platform/stacktrace_handler.h
@@ -0,0 +1,28 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_PLATFORM_BACKTRACE_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_PLATFORM_BACKTRACE_H_
+
+namespace tensorflow {
+namespace testing {
+
+// Installs signal handlers to print out stack trace.
+void InstallStacktraceHandler();
+
+}  // namespace testing
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_PLATFORM_BACKTRACE_H_
diff --git a/tensorflow/core/platform/stacktrace_handler_test.cc b/tensorflow/core/platform/stacktrace_handler_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..958c7de232ed4d11a72d6a245c83afb8f62574cd
--- /dev/null
+++ b/tensorflow/core/platform/stacktrace_handler_test.cc
@@ -0,0 +1,82 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+// Testing proper operation of the stacktrace handler.
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <string>
+
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace {
+
+#define READ_BUFFER_SIZE 1024
+
+TEST(StacktraceHandlerTest, GeneratesStacktrace) {
+  // Create a pipe to write/read the child stdout.
+  int test_pipe[2];
+  EXPECT_EQ(pipe(test_pipe), 0);
+
+  // Fork the process.
+  int test_pid = fork();
+
+  if (test_pid == 0) {
+    // Child process.
+    // Close the read end of the pipe, redirect stdout and sleep.
+    close(test_pipe[0]);
+    dup2(test_pipe[1], STDOUT_FILENO);
+    dup2(test_pipe[1], STDERR_FILENO);
+    sleep(10);
+  } else {
+    // Parent process.
+    // Close the write end of the pipe, wait a little and send SIGABRT to the
+    // child process. Then watch the pipe.
+    close(test_pipe[1]);
+    sleep(1);
+
+    // Send the signal.
+    kill(test_pid, SIGABRT);
+
+    // Read from the pipe.
+    char buffer[READ_BUFFER_SIZE];
+    std::string child_output = "";
+    while (true) {
+      int read_length = read(test_pipe[0], buffer, READ_BUFFER_SIZE);
+      if (read_length > 0) {
+        child_output += std::string(buffer, read_length);
+      } else {
+        break;
+      }
+    }
+    close(test_pipe[0]);
+
+    // Just make sure we can detect one of the calls in testing stack.
+    string test_stack_frame = "testing::internal::UnitTestImpl::RunAllTests()";
+
+    // Print the stack trace detected for information.
+    LOG(INFO) << "Output from the child process:";
+    LOG(INFO) << child_output;
+
+    EXPECT_NE(child_output.find(test_stack_frame), std::string::npos);
+  }
+}
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/core/platform/test_main.cc b/tensorflow/core/platform/test_main.cc
index 96c88afcc4b48be97682e06f2b728fd35b79e0da..677114f5f22b4fe70c6f006e536a2da5f17977d6 100644
--- a/tensorflow/core/platform/test_main.cc
+++ b/tensorflow/core/platform/test_main.cc
@@ -27,12 +27,14 @@ limitations under the License.
 #include <iostream>
 
 #include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/platform/stacktrace_handler.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/test_benchmark.h"
 
 GTEST_API_ int main(int argc, char** argv) {
   std::cout << "Running main() from test_main.cc\n";
 
+  tensorflow::testing::InstallStacktraceHandler();
   testing::InitGoogleTest(&argc, argv);
   for (int i = 1; i < argc; i++) {
     if (tensorflow::StringPiece(argv[i]).starts_with("--benchmarks=")) {
diff --git a/tensorflow/core/platform/types.h b/tensorflow/core/platform/types.h
index 93b82ecb7a7e668d5ea3d428861776388ddef9bb..e2dd5b003f291b6ce88ebabe2d66114762bd2c57 100644
--- a/tensorflow/core/platform/types.h
+++ b/tensorflow/core/platform/types.h
@@ -22,13 +22,21 @@ limitations under the License.
 // Include appropriate platform-dependent implementations
 #if defined(PLATFORM_GOOGLE) || defined(GOOGLE_INTEGRAL_TYPES)
 #include "tensorflow/core/platform/google/integral_types.h"
+#elif defined(PLATFORM_WINDOWS)
+#include "tensorflow/core/platform/windows/integral_types.h"
 #elif defined(PLATFORM_POSIX) || defined(PLATFORM_POSIX_ANDROID) || \
-    defined(PLATFORM_GOOGLE_ANDROID) || defined(PLATFORM_WINDOWS)
+    defined(PLATFORM_GOOGLE_ANDROID)
 #include "tensorflow/core/platform/default/integral_types.h"
 #else
 #error Define the appropriate PLATFORM_<foo> macro for this platform
 #endif
 
+#if defined(PLATFORM_WINDOWS)
+#include "tensorflow/core/platform/windows/cpu_info.h"
+#endif
+
+#include "tensorflow/core/lib/bfloat16/bfloat16.h"
+
 namespace tensorflow {
 
 // Define tensorflow::string to refer to appropriate platform specific type.
diff --git a/tensorflow/core/platform/windows/integral_types.h b/tensorflow/core/platform/windows/integral_types.h
new file mode 100644
index 0000000000000000000000000000000000000000..4970b8ca6a1673dd24d2d445348fe5b337ae13be
--- /dev/null
+++ b/tensorflow/core/platform/windows/integral_types.h
@@ -0,0 +1,25 @@
+ /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ ==============================================================================*/
+ 
+#ifndef TENSORFLOW_PLATFORM_WINDOWS_INTEGRAL_TYPES_H_
+#define TENSORFLOW_PLATFORM_WINDOWS_INTEGRAL_TYPES_H_
+
+#include "tensorflow/core/platform/default/integral_types.h"
+
+#include <cstddef>
+
+typedef std::ptrdiff_t ssize_t;
+
+#endif  // TENSORFLOW_PLATFORM_WINDOWS_INTEGRAL_TYPES_H_
diff --git a/tensorflow/core/profiler/BUILD b/tensorflow/core/profiler/BUILD
index 9c2e7a61deb93b3ecdd06ef1b15457e8d49470fc..5fbfc62e74c4ba5e8821eb12eb2cabd1b1c99068 100644
--- a/tensorflow/core/profiler/BUILD
+++ b/tensorflow/core/profiler/BUILD
@@ -34,7 +34,7 @@ tf_cc_binary(
         "//tensorflow/core:framework_internal",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core/profiler/internal:tfprof_options",
+        "//tensorflow/core/profiler:tfprof_options",
         "//tensorflow/core/profiler/internal:tfprof_stats",
         "//tensorflow/core/profiler/internal:tfprof_utils",
         "//tensorflow/core/profiler/internal/advisor:tfprof_advisor",
@@ -42,6 +42,17 @@ tf_cc_binary(
     ],
 )
 
+cc_library(
+    name = "tfprof_options",
+    srcs = ["tfprof_options.cc"],
+    hdrs = ["tfprof_options.h"],
+    deps = [
+        "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/core:lib",
+        "//tensorflow/core/profiler:protos_all_cc",
+    ],
+)
+
 tf_proto_library(
     name = "protos_all",
     srcs = glob(["**/*.proto"]),
diff --git a/tensorflow/core/profiler/g3doc/advise.md b/tensorflow/core/profiler/g3doc/advise.md
index d0de8317f6950a89567b6d3c5705c42fcc8f4653..379c3f1ef69a79564669178016f916312eca7d4c 100644
--- a/tensorflow/core/profiler/g3doc/advise.md
+++ b/tensorflow/core/profiler/g3doc/advise.md
@@ -1,6 +1,6 @@
 ## Auto Detect and Advise
 
-tfprof analyzes profiles and generates advises for common issues.
+tfprof analyzes profiles and generates advice for common issues.
 
 ### Run Advise.
 
diff --git a/tensorflow/core/profiler/g3doc/command_line.md b/tensorflow/core/profiler/g3doc/command_line.md
index d41ac7290db66854faa2178e95f9ce00d8c825b6..bbaf55e613f6f30af5c27e6bdf61156859415c29 100644
--- a/tensorflow/core/profiler/g3doc/command_line.md
+++ b/tensorflow/core/profiler/g3doc/command_line.md
@@ -21,7 +21,8 @@ See QuickStart on generating the file.
 
 <b>THE OLD WAY BELOW IS DEPRECATED:</b>
 
-<b>--graph_path:</b> GraphDef proto file (required). Used to build in-memory
+<b>--graph_path:</b> GraphDef proto file (optional in eager execution).
+Used to build in-memory
 data structure of the model. For example, graph.pbtxt written by tf.Supervisor
 can be passed to --graph_path. You can also easily get GraphDef using
 tf.get_default_graph().as_graph_def(add_shapes=True) or other API.
@@ -72,6 +73,15 @@ bazel-bin/tensorflow/core/profiler/profiler help
 ```shell
 # The following commands will start tfprof interactive mode.
 #
+# Recommended:
+#
+# The file contains the binary string of ProfileProto.
+# It contains all needed information in one file.
+bazel-bin/tensorflow/core/profiler/profiler \
+    --profile_path=profile_xxx
+#
+# Alternatively, user can pass separate files.
+#
 # --graph_path contains the model architecutre and tensor shapes.
 # --run_meta_path contains the memory and time information.
 # --op_log_path contains float operation and code traces.
@@ -80,6 +90,11 @@ bazel-bin/tensorflow/core/profiler/profiler help
 # Only includes model architecture, parameters and shapes.
 bazel-bin/tensorflow/core/profiler/profiler \
     --graph_path=graph.pbtxt
+
+# For profiling eager execution, user can only specify run_meta_path
+# and profile execution info of each operation.
+bazel-bin/tensorflow/core/profiler/profiler \
+    --run_meta_path=run_meta
 #
 # Additionally profile ops memory and timing.
 bazel-bin/tensorflow/core/profiler/profiler \
diff --git a/tensorflow/core/profiler/g3doc/options.md b/tensorflow/core/profiler/g3doc/options.md
index 4c73e372e3bd9f24c83bdc0d3b8d98b5f8b03f11..7f2cd3f698c860f16cd7b027b5ff7c8e24338cf0 100644
--- a/tensorflow/core/profiler/g3doc/options.md
+++ b/tensorflow/core/profiler/g3doc/options.md
@@ -60,11 +60,14 @@ Currently, profiler only tracks the allocation of memory. As a result, the
 accumulated memory request is uaually larger than the peak memory of the overall
 model.
 
-bytes: The memory allocations requested by the operation.
-peak_bytes: The peak requested memory (not de-allocated) by the operation.
-residual_bytes: The memory requested by the operation and not de-allocated
+It's recommended to generate timeline to see the allocator memory usage over
+time.
+
+`bytes`: The memory allocations requested by the operation.
+`peak_bytes`: The peak requested memory (not de-allocated) by the operation.
+`residual_bytes`: The memory requested by the operation and not de-allocated
                 when Compute finishes.
-output_bytes: The memory output by the operation. It's not necessarily requested
+`output_bytes`: The memory output by the operation. It's not necessarily requested
               by the current operation. For example, it can be a tensor
               forwarded from input to output, with in-place mutation.
 
@@ -109,8 +112,8 @@ accelerator_micros and cpu_micros. Note: cpu and accelerator can run in parallel
 
 `-account_displayed_op_only`: If True, only account the statistics of ops eventually displayed. If False, account all op statistics matching -account_type_regexes recursively.
 
-
-Notes: See <b>overview</b> sesion on how does above options play with each other to decide the output and counting.
+Notes: See <b>overview</b> session on how does above options play with each
+other to decide the output and counting.
 
 `-select`: Comma-separated list of attributes to show. Supported attributes:
 [bytes|peak_bytes|residual_bytes|output_bytes|micros|accelerator_micros|cpu_micros|params|float_ops|occurrence|tensor_value|device|op_types|input_shapes].
diff --git a/tensorflow/core/profiler/internal/BUILD b/tensorflow/core/profiler/internal/BUILD
index edf6b32cfa4b9cd4831ac447e8384ada17d7fd8a..05a798bff80a0775e5170bf8f428d9e88d8060b3 100644
--- a/tensorflow/core/profiler/internal/BUILD
+++ b/tensorflow/core/profiler/internal/BUILD
@@ -16,7 +16,6 @@ cc_library(
         ":tfprof_graph",
         ":tfprof_node",
         ":tfprof_op",
-        ":tfprof_options",
         ":tfprof_scope",
         ":tfprof_show",
         ":tfprof_timeline",
@@ -26,6 +25,7 @@ cc_library(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:regexp_internal",
         "//tensorflow/core/profiler:protos_all_cc",
+        "//tensorflow/core/profiler:tfprof_options",
     ],
 )
 
@@ -47,12 +47,12 @@ cc_library(
     srcs = ["tfprof_node.cc"],
     hdrs = ["tfprof_node.h"],
     deps = [
-        ":tfprof_options",
         ":tfprof_utils",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:regexp_internal",
         "//tensorflow/core/profiler:protos_all_cc",
+        "//tensorflow/core/profiler:tfprof_options",
     ],
 )
 
@@ -63,7 +63,6 @@ cc_library(
     deps = [
         ":tfprof_constants",
         ":tfprof_node",
-        ":tfprof_options",
         ":tfprof_show",
         ":tfprof_tensor",
         ":tfprof_utils",
@@ -74,6 +73,7 @@ cc_library(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:regexp_internal",
         "//tensorflow/core/profiler:protos_all_cc",
+        "//tensorflow/core/profiler:tfprof_options",
     ],
 )
 
@@ -84,7 +84,6 @@ cc_library(
     deps = [
         ":tfprof_constants",
         ":tfprof_node",
-        ":tfprof_options",
         ":tfprof_show_multi",
         ":tfprof_tensor",
         ":tfprof_utils",
@@ -94,6 +93,7 @@ cc_library(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:regexp_internal",
         "//tensorflow/core/profiler:protos_all_cc",
+        "//tensorflow/core/profiler:tfprof_options",
     ],
 )
 
@@ -104,7 +104,6 @@ cc_library(
     deps = [
         ":tfprof_constants",
         ":tfprof_node",
-        ":tfprof_options",
         ":tfprof_show_multi",
         ":tfprof_timeline",
         ":tfprof_utils",
@@ -116,6 +115,7 @@ cc_library(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:regexp_internal",
         "//tensorflow/core/profiler:protos_all_cc",
+        "//tensorflow/core/profiler:tfprof_options",
     ],
 )
 
@@ -126,7 +126,6 @@ cc_library(
     deps = [
         ":tfprof_constants",
         ":tfprof_node",
-        ":tfprof_options",
         ":tfprof_show",
         ":tfprof_tensor",
         ":tfprof_utils",
@@ -135,6 +134,7 @@ cc_library(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:regexp_internal",
         "//tensorflow/core/profiler:protos_all_cc",
+        "//tensorflow/core/profiler:tfprof_options",
     ],
 )
 
@@ -145,11 +145,11 @@ cc_library(
     deps = [
         ":tfprof_constants",
         ":tfprof_node",
-        ":tfprof_options",
         ":tfprof_utils",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/profiler:protos_all_cc",
+        "//tensorflow/core/profiler:tfprof_options",
     ],
 )
 
@@ -161,7 +161,6 @@ cc_library(
         ":tfprof_constants",
         ":tfprof_node",
         ":tfprof_node_show",
-        ":tfprof_options",
         ":tfprof_tensor",
         ":tfprof_timeline",
         ":tfprof_utils",
@@ -170,6 +169,7 @@ cc_library(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:regexp_internal",
         "//tensorflow/core/profiler:protos_all_cc",
+        "//tensorflow/core/profiler:tfprof_options",
     ],
 )
 
@@ -181,7 +181,6 @@ cc_library(
         ":tfprof_constants",
         ":tfprof_node",
         ":tfprof_node_show",
-        ":tfprof_options",
         ":tfprof_scope",
         ":tfprof_show",
         ":tfprof_tensor",
@@ -192,6 +191,7 @@ cc_library(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:regexp_internal",
         "//tensorflow/core/profiler:protos_all_cc",
+        "//tensorflow/core/profiler:tfprof_options",
     ],
 )
 
@@ -209,7 +209,6 @@ tf_cc_test(
     ],
     deps = [
         ":tfprof_constants",
-        ":tfprof_options",
         ":tfprof_stats",
         ":tfprof_tf_testlib",
         ":tfprof_utils",
@@ -218,6 +217,7 @@ tf_cc_test(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core/profiler:protos_all_cc",
+        "//tensorflow/core/profiler:tfprof_options",
     ],
 )
 
@@ -231,7 +231,6 @@ tf_cc_test(
     ],
     deps = [
         ":tfprof_constants",
-        ":tfprof_options",
         ":tfprof_stats",
         ":tfprof_tf_testlib",
         ":tfprof_utils",
@@ -241,6 +240,7 @@ tf_cc_test(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core/profiler:protos_all_cc",
+        "//tensorflow/core/profiler:tfprof_options",
     ],
 )
 
@@ -250,21 +250,10 @@ cc_library(
     hdrs = ["tfprof_utils.h"],
     copts = if_not_windows(["-Wno-sign-compare"]),
     deps = [
-        ":tfprof_options",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:regexp_internal",
-    ],
-)
-
-cc_library(
-    name = "tfprof_options",
-    srcs = ["tfprof_options.cc"],
-    hdrs = ["tfprof_options.h"],
-    deps = [
-        "//tensorflow/core:framework_headers_lib",
-        "//tensorflow/core:lib",
-        "//tensorflow/core/profiler:protos_all_cc",
+        "//tensorflow/core/profiler:tfprof_options",
     ],
 )
 
@@ -279,13 +268,13 @@ cc_library(
     srcs = ["print_model_analysis.cc"],
     hdrs = ["print_model_analysis.h"],
     deps = [
-        ":tfprof_options",
         ":tfprof_stats",
         "//tensorflow/c:checkpoint_reader",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/profiler:protos_all_cc",
+        "//tensorflow/core/profiler:tfprof_options",
         "//tensorflow/core/profiler/internal/advisor:tfprof_advisor",
     ],
     alwayslink = 1,
@@ -305,7 +294,6 @@ tf_cc_test(
     ],
     deps = [
         ":tfprof_constants",
-        ":tfprof_options",
         ":tfprof_stats",
         ":tfprof_tf_testlib",
         ":tfprof_utils",
@@ -314,6 +302,7 @@ tf_cc_test(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core/profiler:protos_all_cc",
+        "//tensorflow/core/profiler:tfprof_options",
     ],
 )
 
@@ -340,7 +329,6 @@ tf_cc_test(
         "testdata/graph.pbtxt",
     ],
     deps = [
-        ":tfprof_options",
         ":tfprof_stats",
         ":tfprof_tf_testlib",
         ":tfprof_utils",
@@ -349,6 +337,7 @@ tf_cc_test(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core/profiler:protos_all_cc",
+        "//tensorflow/core/profiler:tfprof_options",
     ],
 )
 
diff --git a/tensorflow/core/profiler/internal/print_model_analysis.cc b/tensorflow/core/profiler/internal/print_model_analysis.cc
index 7a0d590262fe623f701e21c979e53f2abc103305..5a31c7d789e70530586efc1fdfed158d5d19cabb 100644
--- a/tensorflow/core/profiler/internal/print_model_analysis.cc
+++ b/tensorflow/core/profiler/internal/print_model_analysis.cc
@@ -22,13 +22,13 @@ limitations under the License.
 #include "tensorflow/c/checkpoint_reader.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/protobuf/config.pb.h"
 #include "tensorflow/core/profiler/internal/advisor/tfprof_advisor.h"
-#include "tensorflow/core/profiler/internal/tfprof_options.h"
 #include "tensorflow/core/profiler/internal/tfprof_stats.h"
 #include "tensorflow/core/profiler/tfprof_log.pb.h"
+#include "tensorflow/core/profiler/tfprof_options.h"
 #include "tensorflow/core/profiler/tfprof_options.pb.h"
 #include "tensorflow/core/profiler/tfprof_output.pb.h"
+#include "tensorflow/core/protobuf/config.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
@@ -84,12 +84,13 @@ string RunProfile(const string& command, const string& options,
 }  // namespace
 
 bool NewProfiler(const string* graph, const string* op_log) {
-  CHECK(graph) << "graph mustn't be null";
   std::unique_ptr<GraphDef> graph_ptr(new GraphDef());
-  if (!graph_ptr->ParseFromString(*graph)) {
-    if (!protobuf::TextFormat::ParseFromString(*graph, graph_ptr.get())) {
-      fprintf(stderr, "Failed to parse graph\n");
-      return false;
+  if (graph && !graph->empty()) {
+    if (!graph_ptr->ParseFromString(*graph)) {
+      if (!protobuf::TextFormat::ParseFromString(*graph, graph_ptr.get())) {
+        fprintf(stderr, "Failed to parse graph\n");
+        return false;
+      }
     }
   }
 
@@ -123,14 +124,15 @@ double AddStep(int64 step, const string* graph, const string* run_meta,
                const string* op_log) {
   CHECK(tf_stat);
 
-  CHECK(graph && !graph->empty());
-  std::unique_ptr<GraphDef> graph_ptr(new GraphDef());
-  if (!graph_ptr->ParseFromString(*graph)) {
-    if (!protobuf::TextFormat::ParseFromString(*graph, graph_ptr.get())) {
-      fprintf(stderr, "Failed to parse graph\n");
+  if (graph && !graph->empty()) {
+    std::unique_ptr<GraphDef> graph_ptr(new GraphDef());
+    if (!graph_ptr->ParseFromString(*graph)) {
+      if (!protobuf::TextFormat::ParseFromString(*graph, graph_ptr.get())) {
+        fprintf(stderr, "Failed to parse graph\n");
+      }
     }
+    tf_stat->AddGraph(std::move(graph_ptr));
   }
-  tf_stat->AddGraph(std::move(graph_ptr));
 
   CHECK(run_meta && !run_meta->empty());
   // TODO(xpan): Better error handling.
@@ -154,6 +156,13 @@ string Profile(const string* command, const string* options) {
   return RunProfile(*command, *options, tf_stat);
 }
 
+string SerializeToString() {
+  CHECK(tf_stat);
+  string content;
+  tf_stat->SerializeToString(&content);
+  return content;
+}
+
 void WriteProfile(const string* filename) {
   CHECK(tf_stat);
   CHECK(filename) << "empty file name when asking to write profile.";
@@ -163,11 +172,12 @@ void WriteProfile(const string* filename) {
 string PrintModelAnalysis(const string* graph, const string* run_meta,
                           const string* op_log, const string* command,
                           const string* options) {
-  CHECK(graph) << "graph mustn't be null";
   CHECK(command) << "command mustn't be null";
   CHECK(options) << "options mustn't be null";
   std::unique_ptr<GraphDef> graph_ptr(new GraphDef());
-  graph_ptr->ParseFromString(*graph);
+  if (graph && !graph->empty()) {
+    graph_ptr->ParseFromString(*graph);
+  }
 
   std::unique_ptr<RunMetadata> run_meta_ptr;
   if (run_meta && !run_meta->empty()) {
diff --git a/tensorflow/core/profiler/internal/print_model_analysis.h b/tensorflow/core/profiler/internal/print_model_analysis.h
index 31ff5b07b060b43fab6c0b458f6f43c4dcc0576b..90166aa7d5fc16efa1b7d405af4b15491872ad54 100644
--- a/tensorflow/core/profiler/internal/print_model_analysis.h
+++ b/tensorflow/core/profiler/internal/print_model_analysis.h
@@ -44,6 +44,9 @@ void WriteProfile(const string* filename);
 // Load the profile to profiler from a proto buffer file.
 void ProfilerFromFile(const string* filename);
 
+// Returns a binary string that represents the serialized ProfileProto.
+string SerializeToString();
+
 string Profile(const string* command, const string* options);
 
 // Single-step Profiler.
diff --git a/tensorflow/core/profiler/internal/testdata/run_meta b/tensorflow/core/profiler/internal/testdata/run_meta
index ae76acb743fc517239206228369b175c00c1c248..eaea62b06c8f1b7a968948614fee208a7b81c9b2 100644
Binary files a/tensorflow/core/profiler/internal/testdata/run_meta and b/tensorflow/core/profiler/internal/testdata/run_meta differ
diff --git a/tensorflow/core/profiler/internal/tfprof_code.h b/tensorflow/core/profiler/internal/tfprof_code.h
index a118752fce59006f1992ef78380920f52024f9a2..bcbdc1b48c490b40d7fbf460c7f57a3eefef2a0a 100644
--- a/tensorflow/core/profiler/internal/tfprof_code.h
+++ b/tensorflow/core/profiler/internal/tfprof_code.h
@@ -28,12 +28,12 @@ limitations under the License.
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/profiler/internal/tfprof_node.h"
-#include "tensorflow/core/profiler/internal/tfprof_options.h"
 #include "tensorflow/core/profiler/internal/tfprof_show_multi.h"
 #include "tensorflow/core/profiler/internal/tfprof_timeline.h"
 #include "tensorflow/core/profiler/internal/tfprof_utils.h"
 #include "tensorflow/core/profiler/profile.pb.h"
 #include "tensorflow/core/profiler/tfprof_log.pb.h"
+#include "tensorflow/core/profiler/tfprof_options.h"
 #include "tensorflow/core/profiler/tfprof_output.pb.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/profiler/internal/tfprof_graph.h b/tensorflow/core/profiler/internal/tfprof_graph.h
index 8dac4aee77a456f9bb43d1fea255d8d4655c255b..f7eef9c835b1985ccb8436691a35cfd779d94a8d 100644
--- a/tensorflow/core/profiler/internal/tfprof_graph.h
+++ b/tensorflow/core/profiler/internal/tfprof_graph.h
@@ -30,9 +30,9 @@ limitations under the License.
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/profiler/internal/tfprof_node.h"
-#include "tensorflow/core/profiler/internal/tfprof_options.h"
 #include "tensorflow/core/profiler/internal/tfprof_show.h"
 #include "tensorflow/core/profiler/internal/tfprof_utils.h"
+#include "tensorflow/core/profiler/tfprof_options.h"
 #include "tensorflow/core/profiler/tfprof_output.pb.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/profiler/internal/tfprof_node.cc b/tensorflow/core/profiler/internal/tfprof_node.cc
index 671b65d708f57713d984331de73ddf305675b792..86cb20de7bbb4f36bfaa431bc2b81a00dace84df 100644
--- a/tensorflow/core/profiler/internal/tfprof_node.cc
+++ b/tensorflow/core/profiler/internal/tfprof_node.cc
@@ -80,10 +80,15 @@ void ExecStep::AddTimeStats(const string& dev, const NodeExecStats& step_stat) {
 
 void ExecStep::AddMemoryStats(const string& dev,
                               const NodeExecStats& step_stat) {
-  if (exec_.memory_intialized()) {
+  ExecMemory exec_mem;
+  if (step_stat.all_start_micros() > 0) {
+    exec_mem.set_memory_micros(step_stat.all_start_micros() +
+                               step_stat.op_end_rel_micros());
+  } else {
+    fprintf(stderr, "%s has no start time, skipping\n",
+            step_stat.node_name().c_str());
     return;
   }
-  exec_.set_memory_intialized(true);
 
   int accelerator_allocator_cnt = 0;
   for (const auto& mem : step_stat.memory()) {
@@ -93,14 +98,12 @@ void ExecStep::AddMemoryStats(const string& dev,
       continue;
     }
     ++accelerator_allocator_cnt;
-    exec_.set_allocator_bytes_in_use(
-        std::max(static_cast<int64>(exec_.allocator_bytes_in_use()),
+    exec_mem.set_allocator_bytes_in_use(
+        std::max(static_cast<int64>(exec_mem.allocator_bytes_in_use()),
                  static_cast<int64>(mem.allocator_bytes_in_use())));
-    Allocation allocation;
     for (const auto& alloc : mem.allocation_records()) {
-      allocation.add_allocation_records()->MergeFrom(alloc);
+      allocations_.push_back(alloc);
     }
-    allocations_.push_back(allocation);
   }
   if (accelerator_allocator_cnt > 1) {
     fprintf(stderr, "found %d gpu allocator for 1 node\n",
@@ -121,24 +124,50 @@ void ExecStep::AddMemoryStats(const string& dev,
       uint64 output_ptr =
           output.tensor_description().allocation_description().ptr();
       total_output_bytes += output_bytes;
-      output_memory_[output.slot()] = std::make_pair(output_bytes, output_ptr);
+
+      auto& mem = (*exec_mem.mutable_output_memory())[output.slot()];
+      mem.set_ptr(output_ptr);
+      mem.set_bytes(output_bytes);
     }
   }
-  exec_.set_output_bytes(total_output_bytes);
+  exec_mem.set_output_bytes(total_output_bytes);
 
   if (step_stat.has_memory_stats()) {
-    exec_.set_host_temp_bytes(exec_.host_temp_bytes() +
-                              step_stat.memory_stats().host_temp_memory_size());
-    exec_.set_host_persistent_bytes(
-        exec_.host_persistent_bytes() +
-        step_stat.memory_stats().host_persistent_memory_size());
-    exec_.set_accelerator_temp_bytes(
-        exec_.accelerator_temp_bytes() +
-        step_stat.memory_stats().device_temp_memory_size());
-    exec_.set_accelerator_persistent_bytes(
-        exec_.accelerator_persistent_bytes() +
-        step_stat.memory_stats().device_persistent_memory_size());
+    if (IsPlacedOnCPU(dev)) {
+      // Currently we assume ops placed on gpu only allocate memory on gpu.
+      exec_mem.set_host_temp_bytes(exec_mem.host_temp_bytes() +
+                                   step_stat.memory_stats().temp_memory_size());
+      exec_mem.set_host_persistent_bytes(
+          exec_mem.host_persistent_bytes() +
+          step_stat.memory_stats().persistent_memory_size());
+    } else {
+      exec_mem.set_accelerator_temp_bytes(
+          exec_mem.accelerator_temp_bytes() +
+          step_stat.memory_stats().temp_memory_size());
+      exec_mem.set_accelerator_persistent_bytes(
+          exec_mem.accelerator_persistent_bytes() +
+          step_stat.memory_stats().persistent_memory_size());
+    }
   }
+
+  // TODO(xpan): Make this more accurate:
+  // High level: Memory tracking is suspicous and requires large scale
+  // clean up.
+  // Investigte the memory usage difference between CPU/GPU with OpViewTest.
+  //
+  // 1. OpKernelConstruction::allocate_xxx is not traced. Below, we only
+  //    discuss OpKernelContext-related allocations.
+  // 2. allocate_output calls allocate_tensor, which is properly tracked in
+  //    'NodeExecStats.memory'.
+  // 3. allocate_temp is only tracked through record_xxx_temp. It appears
+  //    in 'NodeExecStats.memory_stats'.
+  // 4. allocate_persistent calls allocate_tensor, which is properly tracked
+  //    in 'NodeExecStats.memory'. However, there is no way to count it as
+  //    persistent now.
+  // 5. record_xxx_persistent is called when allocate_persistent
+  //    is not used and hence tracks some complementary bytes. It appears in
+  //    'NodeExecStats.memory_stats'. It's suspicious. But we should
+  //    use it now since it covers constant op.
   int64 residual_bytes = 0;
   int64 requested_bytes = 0;
   int64 peak_bytes = 0;
@@ -147,9 +176,20 @@ void ExecStep::AddMemoryStats(const string& dev,
     requested_bytes += mem.total_bytes();
     peak_bytes += mem.peak_bytes();
   }
-  exec_.set_requested_bytes(requested_bytes);
-  exec_.set_residual_bytes(residual_bytes);
-  exec_.set_peak_bytes(peak_bytes);
+  residual_bytes += exec_mem.host_persistent_bytes() +
+                    exec_mem.accelerator_persistent_bytes();
+  requested_bytes += exec_mem.host_persistent_bytes() +
+                     exec_mem.accelerator_persistent_bytes() +
+                     exec_mem.host_temp_bytes() +
+                     exec_mem.accelerator_temp_bytes();
+  peak_bytes += exec_mem.host_persistent_bytes() +
+                exec_mem.accelerator_persistent_bytes() +
+                exec_mem.host_temp_bytes() + exec_mem.accelerator_temp_bytes();
+
+  exec_mem.set_requested_bytes(requested_bytes);
+  exec_mem.set_residual_bytes(residual_bytes);
+  exec_mem.set_peak_bytes(peak_bytes);
+  memory_execs_.emplace_back(exec_mem);
 }
 
 void TFGraphNode::AddStepStat(int64 step, const string& device,
@@ -251,5 +291,8 @@ bool IsPlacedOnAccelerator(const string& device) {
   return device.find("gpu") != device.npos ||
          device.find("sycl") != device.npos;
 }
+bool IsPlacedOnCPU(const string& device) {
+  return device.find("cpu") != device.npos;
+}
 }  // namespace tfprof
 }  // namespace tensorflow
diff --git a/tensorflow/core/profiler/internal/tfprof_node.h b/tensorflow/core/profiler/internal/tfprof_node.h
index e2d0563a0747d7bec74ce3aeb9d5995f47cff915..255a0987e68400badeb24457e834646c3306f11a 100644
--- a/tensorflow/core/profiler/internal/tfprof_node.h
+++ b/tensorflow/core/profiler/internal/tfprof_node.h
@@ -31,8 +31,8 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/regexp.h"
-#include "tensorflow/core/profiler/internal/tfprof_options.h"
 #include "tensorflow/core/profiler/tfprof_log.pb.h"
+#include "tensorflow/core/profiler/tfprof_options.h"
 
 namespace tensorflow {
 namespace tfprof {
@@ -109,7 +109,6 @@ class ExecStep {
       const {
     return cpu_execs_;
   }
-
   int64 all_start_micros() const { return exec_.all_start_micros(); }
   int64 latest_end_micros() const { return exec_.latest_end_micros(); }
   int64 lastest_schedule_end_micros() const {
@@ -121,27 +120,73 @@ class ExecStep {
     }
     return ret;
   }
-
-  int64 requested_bytes() const { return exec_.requested_bytes(); }
-  int64 peak_bytes() const { return exec_.peak_bytes(); }
-  int64 residual_bytes() const { return exec_.residual_bytes(); }
-  int64 output_bytes() const { return exec_.output_bytes(); }
+  int64 requested_bytes() const {
+    int64 requested_bytes = 0;
+    for (const ExecMemory& exec : memory_execs_) {
+      requested_bytes += exec.requested_bytes();
+    }
+    return requested_bytes;
+  }
+  int64 peak_bytes() const {
+    int64 peak_bytes = 0;
+    for (const ExecMemory& exec : memory_execs_) {
+      peak_bytes += exec.peak_bytes();
+    }
+    return peak_bytes;
+  }
+  int64 residual_bytes() const {
+    int64 residual_bytes = 0;
+    for (const ExecMemory& exec : memory_execs_) {
+      residual_bytes += exec.residual_bytes();
+    }
+    return residual_bytes;
+  }
+  int64 output_bytes() const {
+    int64 output_bytes = 0;
+    for (const ExecMemory& exec : memory_execs_) {
+      output_bytes += exec.output_bytes();
+    }
+    return output_bytes;
+  }
   int64 accelerator_temp_bytes() const {
-    return exec_.accelerator_temp_bytes();
+    int64 accelerator_temp_bytes = 0;
+    for (const ExecMemory& exec : memory_execs_) {
+      accelerator_temp_bytes += exec.accelerator_temp_bytes();
+    }
+    return accelerator_temp_bytes;
+  }
+  int64 host_temp_bytes() const {
+    int64 host_temp_bytes = 0;
+    for (const ExecMemory& exec : memory_execs_) {
+      host_temp_bytes += exec.host_temp_bytes();
+    }
+    return host_temp_bytes;
   }
-  int64 host_temp_bytes() const { return exec_.host_temp_bytes(); }
   int64 accelerator_persistent_bytes() const {
-    return exec_.accelerator_persistent_bytes();
+    int64 accelerator_persistent_bytes = 0;
+    for (const ExecMemory& exec : memory_execs_) {
+      accelerator_persistent_bytes += exec.accelerator_persistent_bytes();
+    }
+    return accelerator_persistent_bytes;
   }
-  int64 host_persistent_bytes() const { return exec_.host_persistent_bytes(); }
-  const std::map<int32, std::pair<int64, uint64>>& output_memory() const {
-    return output_memory_;
+  int64 host_persistent_bytes() const {
+    int64 host_persistent_bytes = 0;
+    for (const ExecMemory& exec : memory_execs_) {
+      host_persistent_bytes += exec.host_persistent_bytes();
+    }
+    return host_persistent_bytes;
   }
-  int64 allocator_bytes_in_use() const {
-    return exec_.allocator_bytes_in_use();
+  std::map<int64, int64> allocator_bytes_in_use() const {
+    std::map<int64, int64> bytes_in_use;
+    for (const ExecMemory& exec : memory_execs_) {
+      bytes_in_use[exec.memory_micros()] = exec.allocator_bytes_in_use();
+    }
+    return bytes_in_use;
   }
 
-  const std::vector<Allocation>& allocations() const { return allocations_; }
+  const std::vector<AllocationRecord>& allocations() const {
+    return allocations_;
+  }
 
   const ExecProfile& ToProto() {
     exec_.mutable_accelerator_execs()->clear();
@@ -169,19 +214,15 @@ class ExecStep {
     for (const string& d : devices_) {
       exec_.add_devices(d);
     }
-
-    exec_.mutable_output_memory()->clear();
-    for (const auto& mem : output_memory_) {
-      auto& mem_pb = (*exec_.mutable_output_memory())[mem.first];
-      mem_pb.set_bytes(mem.second.first);
-      mem_pb.set_ptr(mem.second.second);
-    }
-
     exec_.mutable_allocations()->Clear();
     for (const auto& r : allocations_) {
       exec_.add_allocations()->MergeFrom(r);
     }
 
+    exec_.mutable_memory_execs()->Clear();
+    for (const auto& m : memory_execs_) {
+      exec_.add_memory_execs()->MergeFrom(m);
+    }
     return exec_;
   }
 
@@ -197,6 +238,7 @@ class ExecStep {
     op_execs_.clear();
 
     allocations_.clear();
+    memory_execs_.clear();
 
     for (const auto& exec_time : exec_.accelerator_execs()) {
       auto& exec = accelerator_execs_[exec_time.first];
@@ -214,15 +256,12 @@ class ExecStep {
         op_exec.push_back(std::make_pair(p.int64_values(0), p.int64_values(1)));
       }
     }
-    for (const auto& output_mem : exec_.output_memory()) {
-      auto& mem = output_memory_[output_mem.first];
-      mem.first = output_mem.second.bytes();
-      mem.second = output_mem.second.ptr();
-    }
-
     for (const auto& r : exec_.allocations()) {
       allocations_.push_back(r);
     }
+    for (const auto& m : exec_.memory_execs()) {
+      memory_execs_.push_back(m);
+    }
   }
 
  private:
@@ -237,14 +276,15 @@ class ExecStep {
   std::map<string, std::vector<std::pair<int64, int64>>> cpu_execs_;
   // combines accelerator_execs_ and cpu_execs_.
   std::map<string, std::vector<std::pair<int64, int64>>> op_execs_;
+  // Each ExecMemory corresponds to one scheduling of the op. Normally,
+  // there are multiple schedulings in while_loop.
+  std::vector<ExecMemory> memory_execs_;
   // All devices the op is associated with (e.g. gpu:0 (scheduling),
   // gpu:0:stream:xx (kernel exec), cpu:0 host)
   std::set<string> devices_;
-  // output_idx -> {output_bytes, memory_ptr}
-  std::map<int32, std::pair<int64, uint64>> output_memory_;
 
   // The history of accelerator allocations and deallocations of this step.
-  std::vector<Allocation> allocations_;
+  std::vector<AllocationRecord> allocations_;
 };
 
 #define GRAPH_NODE_BYTES(type)             \
@@ -593,34 +633,20 @@ class TFGraphNode {
   int64 accelerator_persistent_bytes() const {
     int64 persistent_bytes = 0;
     for (const auto& exec : execs_) {
-      persistent_bytes += exec.second.accelerator_persistent_bytes();
+      persistent_bytes = std::max(persistent_bytes,
+                                  exec.second.accelerator_persistent_bytes());
     }
     return persistent_bytes;
   }
-  int64 host_persistent_bytes(int64 step) const {
+  const std::map<int64, int64> allocator_bytes_in_use(int64 step) const {
     auto exec = execs_.find(step);
     if (exec == execs_.end()) {
-      return 0;
-    }
-    return exec->second.host_persistent_bytes();
-  }
-  const std::map<int32, std::pair<int64, uint64>>& output_memory(
-      int64 step) const {
-    auto exec = execs_.find(step);
-    if (exec == execs_.end()) {
-      return empty_output_memory_;
-    }
-    return exec->second.output_memory();
-  }
-  int64 allocator_bytes_in_use(int64 step) const {
-    auto exec = execs_.find(step);
-    if (exec == execs_.end()) {
-      return 0;
+      return empty_bytes_in_use_;
     }
     return exec->second.allocator_bytes_in_use();
   }
 
-  const std::vector<Allocation>& allocations(int64 step) const {
+  const std::vector<AllocationRecord>& allocations(int64 step) const {
     auto exec = execs_.find(step);
     if (exec == execs_.end()) {
       return empty_allocations_;
@@ -725,9 +751,9 @@ class TFGraphNode {
   std::map<int64, ExecStep> execs_;
 
   // Placeholder for empty cases.
-  std::map<int32, std::pair<int64, uint64>> empty_output_memory_;
+  std::map<int64, int64> empty_bytes_in_use_;
   std::map<string, std::vector<std::pair<int64, int64>>> empty_execs_;
-  std::vector<Allocation> empty_allocations_;
+  std::vector<AllocationRecord> empty_allocations_;
 };
 
 class TFMultiGraphNode {
@@ -880,6 +906,7 @@ class TFMultiGraphNode {
   std::map<string, const TFGraphNode*> nodes_;
 };
 
+bool IsPlacedOnCPU(const string& device);
 bool IsPlacedOnAccelerator(const string& device);
 bool CountAsAcceleratorTime(const string& device);
 bool CountAsCPUTime(const string& device);
diff --git a/tensorflow/core/profiler/internal/tfprof_node_show.h b/tensorflow/core/profiler/internal/tfprof_node_show.h
index 3788bf3e80dd891d0ff6d71fd029b347c89f999a..ca6f9bca5e8fcf1a1e8d39d66b28de7e2fcc3f79 100644
--- a/tensorflow/core/profiler/internal/tfprof_node_show.h
+++ b/tensorflow/core/profiler/internal/tfprof_node_show.h
@@ -32,8 +32,8 @@ limitations under the License.
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/profiler/internal/tfprof_constants.h"
 #include "tensorflow/core/profiler/internal/tfprof_node.h"
-#include "tensorflow/core/profiler/internal/tfprof_options.h"
 #include "tensorflow/core/profiler/internal/tfprof_utils.h"
+#include "tensorflow/core/profiler/tfprof_options.h"
 #include "tensorflow/core/profiler/tfprof_output.pb.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/profiler/internal/tfprof_op.h b/tensorflow/core/profiler/internal/tfprof_op.h
index 55a346c7e8d64ab139ab565ded39a745621d361a..fcc5e68f474e643a6e23dc9fc17dce7eca6f04b1 100644
--- a/tensorflow/core/profiler/internal/tfprof_op.h
+++ b/tensorflow/core/profiler/internal/tfprof_op.h
@@ -29,9 +29,9 @@ limitations under the License.
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/profiler/internal/tfprof_node.h"
-#include "tensorflow/core/profiler/internal/tfprof_options.h"
 #include "tensorflow/core/profiler/internal/tfprof_show_multi.h"
 #include "tensorflow/core/profiler/internal/tfprof_utils.h"
+#include "tensorflow/core/profiler/tfprof_options.h"
 #include "tensorflow/core/profiler/tfprof_output.pb.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/profiler/internal/tfprof_scope.h b/tensorflow/core/profiler/internal/tfprof_scope.h
index 710991dde6bcda4b10c69124991aa5ba32026f16..bb847c08666df232a472aca8c882decb630c736d 100644
--- a/tensorflow/core/profiler/internal/tfprof_scope.h
+++ b/tensorflow/core/profiler/internal/tfprof_scope.h
@@ -29,9 +29,9 @@ limitations under the License.
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/profiler/internal/tfprof_node.h"
-#include "tensorflow/core/profiler/internal/tfprof_options.h"
 #include "tensorflow/core/profiler/internal/tfprof_show.h"
 #include "tensorflow/core/profiler/internal/tfprof_utils.h"
+#include "tensorflow/core/profiler/tfprof_options.h"
 #include "tensorflow/core/profiler/tfprof_output.pb.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/profiler/internal/tfprof_show.cc b/tensorflow/core/profiler/internal/tfprof_show.cc
index cf28876089d21f0f8118fbbe0cd51a616e97cbc8..f09cd1dad99de1075d045afc5d413dc33080c70c 100644
--- a/tensorflow/core/profiler/internal/tfprof_show.cc
+++ b/tensorflow/core/profiler/internal/tfprof_show.cc
@@ -25,19 +25,19 @@ limitations under the License.
 namespace tensorflow {
 namespace tfprof {
 
-const GraphNodeProto& TFShow::Show(const Options& opts) {
+const GraphNodeProto& TFShow::Show(const string& prefix, const Options& opts) {
   if (opts.output_type == kOutput[0]) {
     Timeline timeline(opts.step, opts.output_options.at(kTimelineOpts[0]));
     return ShowInternal(opts, &timeline)->proto();
   } else {
     const ShowNode* ret = ShowInternal(opts, nullptr);
     if (opts.output_type == kOutput[1]) {
-      printf("%s", ret->formatted_str.c_str());
+      printf("%s", (prefix + ret->formatted_str).c_str());
       fflush(stdout);
     } else if (opts.output_type == kOutput[2]) {
       Status s = WriteStringToFile(Env::Default(),
                                    opts.output_options.at(kFileOpts[0]),
-                                   ret->formatted_str);
+                                   prefix + ret->formatted_str);
       if (!s.ok()) {
         fprintf(stderr, "%s\n", s.ToString().c_str());
       }
diff --git a/tensorflow/core/profiler/internal/tfprof_show.h b/tensorflow/core/profiler/internal/tfprof_show.h
index 08c231bad7f216e0e00322c095017b3f0356f64a..2067ea3b735a07922168f8b557e6cd8be534b408 100644
--- a/tensorflow/core/profiler/internal/tfprof_show.h
+++ b/tensorflow/core/profiler/internal/tfprof_show.h
@@ -29,10 +29,10 @@ limitations under the License.
 #include "tensorflow/core/profiler/internal/tfprof_constants.h"
 #include "tensorflow/core/profiler/internal/tfprof_node.h"
 #include "tensorflow/core/profiler/internal/tfprof_node_show.h"
-#include "tensorflow/core/profiler/internal/tfprof_options.h"
 #include "tensorflow/core/profiler/internal/tfprof_tensor.h"
 #include "tensorflow/core/profiler/internal/tfprof_timeline.h"
 #include "tensorflow/core/profiler/internal/tfprof_utils.h"
+#include "tensorflow/core/profiler/tfprof_options.h"
 #include "tensorflow/core/profiler/tfprof_output.pb.h"
 
 namespace tensorflow {
@@ -44,7 +44,8 @@ class TFShow {
   virtual ~TFShow() {}
   virtual void AddNode(TFGraphNode* node) = 0;
   virtual void Build() = 0;
-  const GraphNodeProto& Show(const Options& opts);
+  virtual const GraphNodeProto& Show(const string& prefix,
+                                     const Options& opts) final;
 
  protected:
   virtual const ShowNode* ShowInternal(const Options& opts,
diff --git a/tensorflow/core/profiler/internal/tfprof_show_multi.cc b/tensorflow/core/profiler/internal/tfprof_show_multi.cc
index eb826a7137618ba964f6b58f225e0921ea7f5c33..7c65d48d4a148399a1f9b4b6ec1a9058166d9cf5 100644
--- a/tensorflow/core/profiler/internal/tfprof_show_multi.cc
+++ b/tensorflow/core/profiler/internal/tfprof_show_multi.cc
@@ -27,19 +27,20 @@ limitations under the License.
 namespace tensorflow {
 namespace tfprof {
 
-const MultiGraphNodeProto& TFMultiShow::Show(const Options& opts) {
+const MultiGraphNodeProto& TFMultiShow::Show(const string& prefix,
+                                             const Options& opts) {
   if (opts.output_type == kOutput[0]) {
     Timeline timeline(opts.step, opts.output_options.at(kTimelineOpts[0]));
     return ShowInternal(opts, &timeline)->proto();
   } else {
     const ShowMultiNode* ret = ShowInternal(opts, nullptr);
     if (opts.output_type == kOutput[1]) {
-      printf("%s", ret->formatted_str.c_str());
+      printf("%s", (prefix + ret->formatted_str).c_str());
       fflush(stdout);
     } else if (opts.output_type == kOutput[2]) {
       Status s = WriteStringToFile(Env::Default(),
                                    opts.output_options.at(kFileOpts[0]),
-                                   ret->formatted_str);
+                                   prefix + ret->formatted_str);
       if (!s.ok()) {
         fprintf(stderr, "%s\n", s.ToString().c_str());
       }
diff --git a/tensorflow/core/profiler/internal/tfprof_show_multi.h b/tensorflow/core/profiler/internal/tfprof_show_multi.h
index a632c669336b02106c0c2883c22157b05040f189..ac0ada04490a10330f0596e60f103fbfbe75fe4c 100644
--- a/tensorflow/core/profiler/internal/tfprof_show_multi.h
+++ b/tensorflow/core/profiler/internal/tfprof_show_multi.h
@@ -29,11 +29,11 @@ limitations under the License.
 #include "tensorflow/core/profiler/internal/tfprof_constants.h"
 #include "tensorflow/core/profiler/internal/tfprof_node.h"
 #include "tensorflow/core/profiler/internal/tfprof_node_show.h"
-#include "tensorflow/core/profiler/internal/tfprof_options.h"
 #include "tensorflow/core/profiler/internal/tfprof_show.h"
 #include "tensorflow/core/profiler/internal/tfprof_tensor.h"
 #include "tensorflow/core/profiler/internal/tfprof_timeline.h"
 #include "tensorflow/core/profiler/internal/tfprof_utils.h"
+#include "tensorflow/core/profiler/tfprof_options.h"
 #include "tensorflow/core/profiler/tfprof_output.pb.h"
 
 namespace tensorflow {
@@ -45,7 +45,8 @@ class TFMultiShow {
   virtual ~TFMultiShow() {}
   virtual void AddNode(TFGraphNode* node) = 0;
   virtual void Build() = 0;
-  const MultiGraphNodeProto& Show(const Options& opts);
+  virtual const MultiGraphNodeProto& Show(const string& prefix,
+                                          const Options& opts) final;
 
  protected:
   virtual const ShowMultiNode* ShowInternal(const Options& opts,
diff --git a/tensorflow/core/profiler/internal/tfprof_show_test.cc b/tensorflow/core/profiler/internal/tfprof_show_test.cc
index 1f19f8c322a15a726ce354ecf991ea902788d97b..625f64cae5e0040d93ac0bf1c5b5d0788af74ba1 100644
--- a/tensorflow/core/profiler/internal/tfprof_show_test.cc
+++ b/tensorflow/core/profiler/internal/tfprof_show_test.cc
@@ -23,14 +23,21 @@ limitations under the License.
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/profiler/internal/tfprof_constants.h"
-#include "tensorflow/core/profiler/internal/tfprof_options.h"
 #include "tensorflow/core/profiler/internal/tfprof_utils.h"
 #include "tensorflow/core/profiler/tfprof_log.pb.h"
+#include "tensorflow/core/profiler/tfprof_options.h"
 #include "tensorflow/core/profiler/tfprof_output.pb.h"
 #include "tensorflow/core/protobuf/config.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
+
+string CheckAndRemoveDoc(const string& doc) {
+  auto pos = doc.find("Profile:");
+  CHECK(pos != doc.npos);
+  return doc.substr(pos + 9);
+}
+
 class TFProfShowTest : public ::testing::Test {
  protected:
   TFProfShowTest() {
@@ -105,13 +112,14 @@ TEST_F(TFProfShowTest, DumpScopeMode) {
       "node name | # parameters | # float_ops | requested bytes | peak bytes | "
       "residual bytes | output bytes | total execution time | accelerator "
       "execution time | cpu execution time\n_TFProfRoot (--/451 params, --/0 "
-      "flops, --/0B, --/0B, --/0B, --/2.56KB, --/13us, --/0us, --/13us)\n  DW "
-      "(3x3x3x6, 162/162 params, 0/0 flops, 0B/0B, 0B/0B, 0B/0B, "
-      "1.28KB/1.28KB, 2us/2us, 0us/0us, 2us/2us)\n  DW2 (2x2x6x12, 288/288 "
-      "params, 0/0 flops, 0B/0B, 0B/0B, 0B/0B, 1.28KB/1.28KB, 11us/11us, "
-      "0us/0us, 11us/11us)\n  ScalarW (1, 1/1 params, 0/0 flops, 0B/0B, 0B/0B, "
-      "0B/0B, 0B/0B, 0us/0us, 0us/0us, 0us/0us)\n",
-      dump_str);
+      "flops, --/2.56KB, --/2.56KB, --/2.56KB, --/2.56KB, --/13us, --/0us, "
+      "--/13us)\n  DW (3x3x3x6, 162/162 params, 0/0 flops, 1.28KB/1.28KB, "
+      "1.28KB/1.28KB, 1.28KB/1.28KB, 1.28KB/1.28KB, 2us/2us, 0us/0us, "
+      "2us/2us)\n  DW2 (2x2x6x12, 288/288 params, 0/0 flops, 1.28KB/1.28KB, "
+      "1.28KB/1.28KB, 1.28KB/1.28KB, 1.28KB/1.28KB, 11us/11us, 0us/0us, "
+      "11us/11us)\n  ScalarW (1, 1/1 params, 0/0 flops, 0B/0B, 0B/0B, 0B/0B, "
+      "0B/0B, 0us/0us, 0us/0us, 0us/0us)\n",
+      CheckAndRemoveDoc(dump_str));
 
   EXPECT_EQ(dump_str, TestToFromProto("scope", opts));
 }
@@ -158,7 +166,7 @@ TEST_F(TFProfShowTest, DumpAcceleratorAndCPUMicros) {
       "0us/0us)\n        ScalarW/Initializer/random_normal/stddev (0us/0us, "
       "0us/0us)\n    ScalarW/read (0us/0us, 0us/0us)\n  init (0us/0us, "
       "0us/0us)\n",
-      dump_str);
+      CheckAndRemoveDoc(dump_str));
 
   EXPECT_EQ(dump_str, TestToFromProto("scope", opts));
 }
@@ -178,22 +186,22 @@ TEST_F(TFProfShowTest, DumpOpMode) {
   EXPECT_EQ(
       "nodename|requestedbytes|totalexecutiontime|acceleratorexecutiontime|"
       "cpuexecutiontime|#parameters|#float_ops|opoccurrence(run|defined)|"
-      "inputshapes\nVariableV20B(0.00%,0.00%),13us(100.00%,0.26%),0us(100.00%,"
-      "0.00%),13us(100.00%,0.29%),451params(100.00%,100.00%),0float_ops(100.00%"
-      ",0.00%),2|3\n\ninput_type:\t(run*2|defined*3)\texec_time:13us\n\nAdd0B("
-      "0.00%,0.00%),0us(99.74%,0.00%),0us(100.00%,0.00%),0us(99.71%,0.00%),"
-      "0params(0.00%,0.00%),0float_ops(100.00%,0.00%),0|3\n\ninput_type:0:1,"
-      "\t1:1\t(run*0|defined*1)\texec_time:0us\ninput_type:0:2x2x6x12,\t1:1\t("
-      "run*0|defined*1)\texec_time:0us\ninput_type:0:3x3x3x6,\t1:1\t(run*0|"
-      "defined*1)\texec_time:0us\n\nAssign0B(0.00%,0.00%),0us(99.74%,0.00%),"
-      "0us(100.00%,0.00%),0us(99.71%,0.00%),0params(0.00%,0.00%),0float_ops("
-      "100.00%,0.00%),0|3\n\ninput_type:0:1,\t1:1\t(run*0|defined*1)\texec_"
+      "inputshapes\nVariableV22.56KB(100.00%,8.40%),13us(100.00%,0.26%),0us("
+      "100.00%,0.00%),13us(100.00%,0.29%),451params(100.00%,100.00%),0float_"
+      "ops(100.00%,0.00%),2|3\n\ninput_type:\t(run*2|defined*3)\texec_time:"
+      "13us\n\nAdd0B(0.00%,0.00%),0us(99.74%,0.00%),0us(100.00%,0.00%),0us(99."
+      "71%,0.00%),0params(0.00%,0.00%),0float_ops(100.00%,0.00%),0|3\n\ninput_"
+      "type:0:1,\t1:1\t(run*0|defined*1)\texec_time:0us\ninput_type:0:2x2x6x12,"
+      "\t1:1\t(run*0|defined*1)\texec_time:0us\ninput_type:0:3x3x3x6,\t1:1\t("
+      "run*0|defined*1)\texec_time:0us\n\nAssign0B(0.00%,0.00%),0us(99.74%,0."
+      "00%),0us(100.00%,0.00%),0us(99.71%,0.00%),0params(0.00%,0.00%),0float_"
+      "ops(100.00%,0.00%),0|3\n\ninput_type:0:1,\t1:1\t(run*0|defined*1)\texec_"
       "time:0us\ninput_type:0:2x2x6x12,\t1:2x2x6x12\t(run*0|defined*1)\texec_"
       "time:0us\ninput_type:0:3x3x3x6,\t1:3x3x3x6\t(run*0|defined*1)\texec_"
       "time:0us\n\nConst0B(0.00%,0.00%),2us(99.74%,0.04%),0us(100.00%,0.00%),"
       "2us(99.71%,0.04%),0params(0.00%,0.00%),0float_ops(100.00%,0.00%),1|"
-      "10\n\ninput_type:\t(run*1|defined*10)\texec_time:2us\n\nConv2D14.59KB("
-      "100.00%,100.00%),4.89ms(99.70%,98.87%),404us(100.00%,100.00%),4.49ms(99."
+      "10\n\ninput_type:\t(run*1|defined*10)\texec_time:2us\n\nConv2D27.90KB("
+      "91.60%,91.60%),4.89ms(99.70%,98.87%),404us(100.00%,100.00%),4.49ms(99."
       "67%,98.77%),0params(0.00%,0.00%),10.44kfloat_ops(100.00%,100.00%),2|"
       "2\n\ninput_type:0:2x3x3x6,\t1:2x2x6x12\t(run*1|defined*1)\texec_time:"
       "597us\ninput_type:0:2x6x6x3,\t1:3x3x3x6\t(run*1|defined*1)\texec_time:4."
@@ -202,7 +210,7 @@ TEST_F(TFProfShowTest, DumpOpMode) {
       "type:0:1\t(run*0|defined*1)\texec_time:0us\ninput_type:0:2x2x6x12\t(run*"
       "0|defined*1)\texec_time:0us\ninput_type:0:3x3x3x6\t(run*0|defined*1)"
       "\texec_time:0us\n\n",
-      StringReplace(dump_str, " ", ""));
+      StringReplace(CheckAndRemoveDoc(dump_str), " ", ""));
 
   EXPECT_EQ(dump_str, TestToFromProto("op", opts, true));
 }
diff --git a/tensorflow/core/profiler/internal/tfprof_stats.cc b/tensorflow/core/profiler/internal/tfprof_stats.cc
index 7943c075e0243e652cb19125dae95b04dc709f97..5b91309c800fe877ddd45413be4b32125cf7980d 100644
--- a/tensorflow/core/profiler/internal/tfprof_stats.cc
+++ b/tensorflow/core/profiler/internal/tfprof_stats.cc
@@ -26,6 +26,9 @@ limitations under the License.
 namespace tensorflow {
 namespace tfprof {
 namespace {
+
+const char* const kProfilePrefix = "Profile:\n";
+
 bool CreateRunMetadataNode(const string& name, NodeDef* def) {
   // TODO(xpan): Better solution than blacklisting this 2 nodes. They
   // actually cost some resources, maybe include them. Some nodes, such
@@ -48,6 +51,7 @@ TFStats::TFStats(std::unique_ptr<GraphDef> graph,
                  std::unique_ptr<OpLogProto> op_log,
                  std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)
     : has_code_traces_(false),
+      miss_accelerator_stream_(false),
       ckpt_reader_(std::move(ckpt_reader)) {
   CHECK(graph) << "Must at least have GraphDef";
 
@@ -70,7 +74,9 @@ TFStats::TFStats(std::unique_ptr<GraphDef> graph,
 
 TFStats::TFStats(const string& filename,
                  std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)
-    : has_code_traces_(false), ckpt_reader_(std::move(ckpt_reader)) {
+    : has_code_traces_(false),
+      miss_accelerator_stream_(false),
+      ckpt_reader_(std::move(ckpt_reader)) {
   string str;
   Status s = ReadFileToString(Env::Default(), filename, &str);
   if (!s.ok()) {
@@ -141,18 +147,21 @@ const GraphNodeProto& TFStats::ShowGraphNode(const string& cmd,
   if (!Validate(opts)) {
     return empty_graph_node_;
   }
+  string prefix = MaybeReportMissingTrace();
+  prefix += QueryDoc(cmd, opts) + kProfilePrefix;
+
   if (cmd == kCmds[0]) {
-    return scope_view_->Show(opts);
+    return scope_view_->Show(prefix, opts);
   } else if (cmd == kCmds[1]) {
     if (opts.step < 0 && opts.output_type == kOutput[0]) {
       for (int64 step : steps_) {
         Options nopts = opts;
         nopts.step = step;
-        graph_view_->Show(nopts);
+        graph_view_->Show(prefix, nopts);
       }
       return empty_graph_node_;
     }
-    return graph_view_->Show(opts);
+    return graph_view_->Show(prefix, opts);
   } else {
     fprintf(stderr, "Unknown command: %s\n", cmd.c_str());
     return empty_graph_node_;
@@ -164,14 +173,17 @@ const MultiGraphNodeProto& TFStats::ShowMultiGraphNode(
   if (!Validate(opts)) {
     return empty_multi_graph_node_;
   }
+  string prefix = MaybeReportMissingTrace();
+  prefix += QueryDoc(cmd, opts) + kProfilePrefix;
+
   if (cmd == kCmds[2]) {
     if (!has_code_traces()) {
       fprintf(stderr, "No code trace information\n");
       return empty_multi_graph_node_;
     }
-    return code_view_->Show(opts);
+    return code_view_->Show(prefix, opts);
   } else if (cmd == kCmds[3]) {
-    return op_view_->Show(opts);
+    return op_view_->Show(prefix, opts);
   } else {
     fprintf(stderr, "Unknown command: %s\n", cmd.c_str());
     return empty_multi_graph_node_;
@@ -258,7 +270,17 @@ void TFStats::AddRunMeta(int64 step, std::unique_ptr<RunMetadata> run_meta) {
   }
   steps_.insert(step);
 
+  bool has_gpu_scheduling = false;
+  bool has_gpu_stream = false;
+
   for (const auto& dev_stat : run_meta->step_stats().dev_stats()) {
+    string dev = str_util::Lowercase(dev_stat.device());
+    if (IsPlacedOnAccelerator(dev)) {
+      has_gpu_scheduling = true;
+      if (CountAsAcceleratorTime(dev)) {
+        has_gpu_stream = true;
+      }
+    }
     for (const NodeExecStats& node_stat : dev_stat.node_stats()) {
       string name = node_stat.node_name();
       // Sometimes the node_name is suffixed with unnecessary information.
@@ -280,9 +302,26 @@ void TFStats::AddRunMeta(int64 step, std::unique_ptr<RunMetadata> run_meta) {
       }
     }
   }
+
+  if (has_gpu_scheduling && !has_gpu_stream) {
+    miss_accelerator_stream_ = true;
+  }
 }
 
-void TFStats::WriteProfile(const string& filename) {
+string TFStats::MaybeReportMissingTrace() const {
+  string report = "";
+  if (miss_accelerator_stream_) {
+    report +=
+        "\n\nFound accelerator operation but misses accelerator "
+        "stream stats!\n\n"
+        "It's likely a gpu tracing issue rather than tf-profiler issue.\n"
+        "If you found your operation missing accelerator time, "
+        "consider filing a bug to xprof-dev@!\n\n";
+  }
+  return report;
+}
+
+void TFStats::SerializeToString(string* content) {
   ProfileProto profile;
   for (const auto& entry : id_to_string_) {
     (*profile.mutable_id_to_string())[entry.first] = entry.second;
@@ -296,11 +335,17 @@ void TFStats::WriteProfile(const string& filename) {
   }
 
   profile.set_has_trace(has_code_traces_);
+  profile.set_miss_accelerator_stream(miss_accelerator_stream_);
   for (int64 s : steps_) {
     profile.add_steps(s);
   }
-  Status s =
-      WriteStringToFile(Env::Default(), filename, profile.SerializeAsString());
+  *content = profile.SerializeAsString();
+}
+
+void TFStats::WriteProfile(const string& filename) {
+  string content;
+  SerializeToString(&content);
+  Status s = WriteStringToFile(Env::Default(), filename, content);
   if (!s.ok()) {
     fprintf(stderr, "%s\n", s.ToString().c_str());
   }
diff --git a/tensorflow/core/profiler/internal/tfprof_stats.h b/tensorflow/core/profiler/internal/tfprof_stats.h
index d46d9235560c673323d243a40f21bbd06aa9416d..d78abda588b7df5239547a6a3519ce7304c32be1 100644
--- a/tensorflow/core/profiler/internal/tfprof_stats.h
+++ b/tensorflow/core/profiler/internal/tfprof_stats.h
@@ -34,17 +34,17 @@ limitations under the License.
 #include "tensorflow/core/framework/step_stats.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
-#include "tensorflow/core/protobuf/config.pb.h"
 #include "tensorflow/core/profiler/internal/tfprof_code.h"
 #include "tensorflow/core/profiler/internal/tfprof_graph.h"
 #include "tensorflow/core/profiler/internal/tfprof_node.h"
 #include "tensorflow/core/profiler/internal/tfprof_op.h"
-#include "tensorflow/core/profiler/internal/tfprof_options.h"
 #include "tensorflow/core/profiler/internal/tfprof_scope.h"
 #include "tensorflow/core/profiler/internal/tfprof_show.h"
 #include "tensorflow/core/profiler/internal/tfprof_utils.h"
 #include "tensorflow/core/profiler/tfprof_log.pb.h"
+#include "tensorflow/core/profiler/tfprof_options.h"
 #include "tensorflow/core/profiler/tfprof_output.pb.h"
+#include "tensorflow/core/protobuf/config.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
@@ -92,6 +92,7 @@ class TFStats {
   // and code traces.
   void AddOpLogProto(std::unique_ptr<OpLogProto> op_log);
 
+  void SerializeToString(string* content);
   void WriteProfile(const string& filename);
 
   // For test purpose only.
@@ -99,9 +100,11 @@ class TFStats {
 
  private:
   bool Validate(const Options& opts) const;
+  string MaybeReportMissingTrace() const;
 
   std::set<int64> steps_;
   bool has_code_traces_;
+  bool miss_accelerator_stream_;
   std::unique_ptr<TFScope> scope_view_;
   std::unique_ptr<TFGraph> graph_view_;
   std::unique_ptr<TFCode> code_view_;
diff --git a/tensorflow/core/profiler/internal/tfprof_stats_test.cc b/tensorflow/core/profiler/internal/tfprof_stats_test.cc
index 2f2101d76bfd4c0741fff0eb9762444cd8b6fd92..564278c9963836f1e8486cbbdc0901b782ae2f61 100644
--- a/tensorflow/core/profiler/internal/tfprof_stats_test.cc
+++ b/tensorflow/core/profiler/internal/tfprof_stats_test.cc
@@ -24,9 +24,9 @@ limitations under the License.
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/profiler/internal/tfprof_constants.h"
-#include "tensorflow/core/profiler/internal/tfprof_options.h"
 #include "tensorflow/core/profiler/internal/tfprof_utils.h"
 #include "tensorflow/core/profiler/tfprof_log.pb.h"
+#include "tensorflow/core/profiler/tfprof_options.h"
 #include "tensorflow/core/profiler/tfprof_output.pb.h"
 #include "tensorflow/core/protobuf/config.pb.h"
 
@@ -89,21 +89,27 @@ TEST_F(TFProfStatsTest, CustomOpType) {
 
   GraphNodeProto expected;
   CHECK(protobuf::TextFormat::ParseFromString(
-      "name: \"_TFProfRoot\"\ntotal_exec_micros: 13\ntotal_parameters: "
-      "451\nchildren {\n  name: \"DW\"\n  exec_micros: 2\n  parameters: 162\n  "
-      "total_exec_micros: 2\n  total_parameters: 162\n  devices: "
+      "name: \"_TFProfRoot\"\ntotal_exec_micros: 13\ntotal_requested_bytes: "
+      "2560\ntotal_parameters: 451\nchildren {\n  name: \"DW\"\n  exec_micros: "
+      "2\n  requested_bytes: 1280\n  parameters: 162\n  total_exec_micros: 2\n "
+      " total_requested_bytes: 1280\n  total_parameters: 162\n  devices: "
       "\"/job:localhost/replica:0/task:0/gpu:0\"\n  cpu_exec_micros: 2\n  "
       "total_cpu_exec_micros: 2\n  run_count: 1\n  total_run_count: 1\n  "
-      "total_definition_count: 1\n  output_bytes: 1280\n  total_output_bytes: "
-      "1280\n}\nchildren {\n  name: \"DW2\"\n  exec_micros: 11\n  parameters: "
-      "288\n  total_exec_micros: 11\n  total_parameters: 288\n  devices: "
+      "total_definition_count: 1\n  peak_bytes: 1280\n  residual_bytes: 1280\n "
+      " output_bytes: 1280\n  total_peak_bytes: 1280\n  total_residual_bytes: "
+      "1280\n  total_output_bytes: 1280\n}\nchildren {\n  name: \"DW2\"\n  "
+      "exec_micros: 11\n  requested_bytes: 1280\n  parameters: 288\n  "
+      "total_exec_micros: 11\n  total_requested_bytes: 1280\n  "
+      "total_parameters: 288\n  devices: "
       "\"/job:localhost/replica:0/task:0/gpu:0\"\n  cpu_exec_micros: 11\n  "
       "total_cpu_exec_micros: 11\n  run_count: 1\n  total_run_count: 1\n  "
-      "total_definition_count: 1\n  output_bytes: 1280\n  total_output_bytes: "
-      "1280\n}\nchildren {\n  name: \"ScalarW\"\n  parameters: 1\n  "
-      "total_parameters: 1\n  total_definition_count: "
+      "total_definition_count: 1\n  peak_bytes: 1280\n  residual_bytes: 1280\n "
+      " output_bytes: 1280\n  total_peak_bytes: 1280\n  total_residual_bytes: "
+      "1280\n  total_output_bytes: 1280\n}\nchildren {\n  name: \"ScalarW\"\n  "
+      "parameters: 1\n  total_parameters: 1\n  total_definition_count: "
       "1\n}\ntotal_cpu_exec_micros: 13\ntotal_run_count: "
-      "2\ntotal_definition_count: 3\ntotal_output_bytes: 2560\n",
+      "2\ntotal_definition_count: 3\ntotal_peak_bytes: "
+      "2560\ntotal_residual_bytes: 2560\ntotal_output_bytes: 2560\n",
       &expected));
   EXPECT_EQ(expected.DebugString(), root.DebugString());
 
@@ -119,21 +125,27 @@ TEST_F(TFProfStatsTest, CheckPointOpType) {
 
   GraphNodeProto expected;
   CHECK(protobuf::TextFormat::ParseFromString(
-      "name: \"_TFProfRoot\"\ntotal_exec_micros: 13\ntotal_parameters: "
-      "451\nchildren {\n  name: \"DW\"\n  exec_micros: 2\n  parameters: 162\n  "
-      "total_exec_micros: 2\n  total_parameters: 162\n  devices: "
+      "name: \"_TFProfRoot\"\ntotal_exec_micros: 13\ntotal_requested_bytes: "
+      "2560\ntotal_parameters: 451\nchildren {\n  name: \"DW\"\n  exec_micros: "
+      "2\n  requested_bytes: 1280\n  parameters: 162\n  total_exec_micros: 2\n "
+      " total_requested_bytes: 1280\n  total_parameters: 162\n  devices: "
       "\"/job:localhost/replica:0/task:0/gpu:0\"\n  cpu_exec_micros: 2\n  "
       "total_cpu_exec_micros: 2\n  run_count: 1\n  total_run_count: 1\n  "
-      "total_definition_count: 1\n  output_bytes: 1280\n  total_output_bytes: "
-      "1280\n}\nchildren {\n  name: \"DW2\"\n  exec_micros: 11\n  parameters: "
-      "288\n  total_exec_micros: 11\n  total_parameters: 288\n  devices: "
+      "total_definition_count: 1\n  peak_bytes: 1280\n  residual_bytes: 1280\n "
+      " output_bytes: 1280\n  total_peak_bytes: 1280\n  total_residual_bytes: "
+      "1280\n  total_output_bytes: 1280\n}\nchildren {\n  name: \"DW2\"\n  "
+      "exec_micros: 11\n  requested_bytes: 1280\n  parameters: 288\n  "
+      "total_exec_micros: 11\n  total_requested_bytes: 1280\n  "
+      "total_parameters: 288\n  devices: "
       "\"/job:localhost/replica:0/task:0/gpu:0\"\n  cpu_exec_micros: 11\n  "
       "total_cpu_exec_micros: 11\n  run_count: 1\n  total_run_count: 1\n  "
-      "total_definition_count: 1\n  output_bytes: 1280\n  total_output_bytes: "
-      "1280\n}\nchildren {\n  name: \"ScalarW\"\n  parameters: 1\n  "
-      "total_parameters: 1\n  total_definition_count: "
+      "total_definition_count: 1\n  peak_bytes: 1280\n  residual_bytes: 1280\n "
+      " output_bytes: 1280\n  total_peak_bytes: 1280\n  total_residual_bytes: "
+      "1280\n  total_output_bytes: 1280\n}\nchildren {\n  name: \"ScalarW\"\n  "
+      "parameters: 1\n  total_parameters: 1\n  total_definition_count: "
       "1\n}\ntotal_cpu_exec_micros: 13\ntotal_run_count: "
-      "2\ntotal_definition_count: 3\ntotal_output_bytes: 2560\n",
+      "2\ntotal_definition_count: 3\ntotal_peak_bytes: "
+      "2560\ntotal_residual_bytes: 2560\ntotal_output_bytes: 2560\n",
       &expected));
   EXPECT_EQ(expected.DebugString(), root.DebugString());
 
@@ -150,7 +162,7 @@ TEST_F(TFProfStatsTest, TestGraph) {
   GraphNodeProto expected;
   CHECK(protobuf::TextFormat::ParseFromString(
       "name: \"_TFProfRoot\"\ntotal_exec_micros: 4945\ntotal_requested_bytes: "
-      "14592\ntotal_parameters: 451\nchildren {\n  name: "
+      "30464\ntotal_parameters: 451\nchildren {\n  name: "
       "\"DW/Initializer/random_normal/mul\"\n  children {\n    name: "
       "\"DW/Initializer/random_normal/RandomStandardNormal\"\n    children {\n "
       "     name: \"DW/Initializer/random_normal/shape\"\n      "
@@ -166,7 +178,7 @@ TEST_F(TFProfStatsTest, TestGraph) {
       "4\n}\ntotal_float_ops: 10440\ntotal_accelerator_exec_micros: "
       "404\ntotal_cpu_exec_micros: 4541\ntotal_run_count: "
       "6\ntotal_definition_count: 32\ntotal_peak_bytes: "
-      "9984\ntotal_residual_bytes: 1280\ntotal_output_bytes: 4864\n",
+      "25856\ntotal_residual_bytes: 3840\ntotal_output_bytes: 4864\n",
       &expected));
   EXPECT_EQ(expected.DebugString(), root.DebugString());
 
@@ -181,9 +193,9 @@ TEST_F(TFProfStatsTest, TestFloatOps) {
   GraphNodeProto expected;
   CHECK(protobuf::TextFormat::ParseFromString(
       "name: \"_TFProfRoot\"\ntotal_exec_micros: 4945\ntotal_requested_bytes: "
-      "14592\ntotal_parameters: 451\nchildren {\n  name: \"Conv2D\"\n  "
-      "exec_micros: 4292\n  requested_bytes: 9472\n  total_exec_micros: 4292\n "
-      " total_requested_bytes: 9472\n  devices: "
+      "30464\ntotal_parameters: 451\nchildren {\n  name: \"Conv2D\"\n  "
+      "exec_micros: 4292\n  requested_bytes: 18176\n  total_exec_micros: "
+      "4292\n  total_requested_bytes: 18176\n  devices: "
       "\"/job:localhost/replica:0/task:0/gpu:0\"\n  float_ops: 5832\n  "
       "total_float_ops: 5832\n  input_shapes {\n    key: 0\n    value {\n      "
       "dim {\n        size: 2\n      }\n      dim {\n        size: 6\n      "
@@ -194,11 +206,11 @@ TEST_F(TFProfStatsTest, TestFloatOps) {
       "6\n      }\n    }\n  }\n  accelerator_exec_micros: 226\n  "
       "cpu_exec_micros: 4066\n  total_accelerator_exec_micros: 226\n  "
       "total_cpu_exec_micros: 4066\n  run_count: 1\n  total_run_count: 1\n  "
-      "total_definition_count: 1\n  peak_bytes: 5888\n  residual_bytes: 768\n  "
-      "output_bytes: 768\n  total_peak_bytes: 5888\n  total_residual_bytes: "
+      "total_definition_count: 1\n  peak_bytes: 14592\n  residual_bytes: 768\n "
+      " output_bytes: 768\n  total_peak_bytes: 14592\n  total_residual_bytes: "
       "768\n  total_output_bytes: 768\n}\nchildren {\n  name: \"Conv2D_1\"\n  "
-      "exec_micros: 597\n  requested_bytes: 5120\n  total_exec_micros: 597\n  "
-      "total_requested_bytes: 5120\n  devices: "
+      "exec_micros: 597\n  requested_bytes: 9728\n  total_exec_micros: 597\n  "
+      "total_requested_bytes: 9728\n  devices: "
       "\"/job:localhost/replica:0/task:0/gpu:0\"\n  float_ops: 4608\n  "
       "total_float_ops: 4608\n  input_shapes {\n    key: 0\n    value {\n      "
       "dim {\n        size: 2\n      }\n      dim {\n        size: 3\n      "
@@ -209,12 +221,12 @@ TEST_F(TFProfStatsTest, TestFloatOps) {
       "12\n      }\n    }\n  }\n  accelerator_exec_micros: 178\n  "
       "cpu_exec_micros: 419\n  total_accelerator_exec_micros: 178\n  "
       "total_cpu_exec_micros: 419\n  run_count: 1\n  total_run_count: 1\n  "
-      "total_definition_count: 1\n  peak_bytes: 4096\n  residual_bytes: 512\n  "
-      "output_bytes: 512\n  total_peak_bytes: 4096\n  total_residual_bytes: "
+      "total_definition_count: 1\n  peak_bytes: 8704\n  residual_bytes: 512\n  "
+      "output_bytes: 512\n  total_peak_bytes: 8704\n  total_residual_bytes: "
       "512\n  total_output_bytes: 512\n}\ntotal_float_ops: "
       "10440\ntotal_accelerator_exec_micros: 404\ntotal_cpu_exec_micros: "
       "4541\ntotal_run_count: 6\ntotal_definition_count: 35\ntotal_peak_bytes: "
-      "9984\ntotal_residual_bytes: 1280\ntotal_output_bytes: 4864\n",
+      "25856\ntotal_residual_bytes: 3840\ntotal_output_bytes: 4864\n",
       &expected));
   EXPECT_EQ(expected.DebugString(), root.DebugString());
 
@@ -231,9 +243,9 @@ TEST_F(TFProfStatsTest, TestAccountShownNameOnly) {
   GraphNodeProto expected;
   CHECK(protobuf::TextFormat::ParseFromString(
       "name: \"_TFProfRoot\"\ntotal_exec_micros: 597\ntotal_requested_bytes: "
-      "5120\nchildren {\n  name: \"Conv2D_1\"\n  exec_micros: 597\n  "
-      "requested_bytes: 5120\n  total_exec_micros: 597\n  "
-      "total_requested_bytes: 5120\n  devices: "
+      "9728\nchildren {\n  name: \"Conv2D_1\"\n  exec_micros: 597\n  "
+      "requested_bytes: 9728\n  total_exec_micros: 597\n  "
+      "total_requested_bytes: 9728\n  devices: "
       "\"/job:localhost/replica:0/task:0/gpu:0\"\n  float_ops: 4608\n  "
       "total_float_ops: 4608\n  input_shapes {\n    key: 0\n    value {\n      "
       "dim {\n        size: 2\n      }\n      dim {\n        size: 3\n      "
@@ -244,12 +256,12 @@ TEST_F(TFProfStatsTest, TestAccountShownNameOnly) {
       "12\n      }\n    }\n  }\n  accelerator_exec_micros: 178\n  "
       "cpu_exec_micros: 419\n  total_accelerator_exec_micros: 178\n  "
       "total_cpu_exec_micros: 419\n  run_count: 1\n  total_run_count: 1\n  "
-      "total_definition_count: 1\n  peak_bytes: 4096\n  residual_bytes: 512\n  "
-      "output_bytes: 512\n  total_peak_bytes: 4096\n  total_residual_bytes: "
+      "total_definition_count: 1\n  peak_bytes: 8704\n  residual_bytes: 512\n  "
+      "output_bytes: 512\n  total_peak_bytes: 8704\n  total_residual_bytes: "
       "512\n  total_output_bytes: 512\n}\ntotal_float_ops: "
       "4608\ntotal_accelerator_exec_micros: 178\ntotal_cpu_exec_micros: "
       "419\ntotal_run_count: 1\ntotal_definition_count: 2\ntotal_peak_bytes: "
-      "4096\ntotal_residual_bytes: 512\ntotal_output_bytes: 512\n",
+      "8704\ntotal_residual_bytes: 512\ntotal_output_bytes: 512\n",
       &expected));
   EXPECT_EQ(expected.DebugString(), root.DebugString());
 
@@ -265,8 +277,9 @@ TEST_F(TFProfStatsTest, TestShowTensorValue) {
   GraphNodeProto expected;
   CHECK(protobuf::TextFormat::ParseFromString(
       "name: \"_TFProfRoot\"\ntotal_exec_micros: 4945\ntotal_requested_bytes: "
-      "14592\ntotal_parameters: 451\nchildren {\n  name: \"DW\"\n  "
-      "exec_micros: 2\n  parameters: 162\n  total_exec_micros: 2\n  "
+      "30464\ntotal_parameters: 451\nchildren {\n  name: \"DW\"\n  "
+      "exec_micros: 2\n  requested_bytes: 1280\n  parameters: 162\n  "
+      "total_exec_micros: 2\n  total_requested_bytes: 1280\n  "
       "total_parameters: 162\n  devices: "
       "\"/job:localhost/replica:0/task:0/gpu:0\"\n  tensor_value {\n    dtype: "
       "DT_FLOAT\n    value_double: -0.000534315\n    value_double: "
@@ -351,11 +364,13 @@ TEST_F(TFProfStatsTest, TestShowTensorValue) {
       "value_double: 0.000374641\n    value_double: -0.00149603\n    "
       "value_double: -0.000317367\n    value_double: -0.000417829\n  }\n  "
       "cpu_exec_micros: 2\n  total_cpu_exec_micros: 2\n  run_count: 1\n  "
-      "total_run_count: 1\n  total_definition_count: 10\n  output_bytes: "
-      "1280\n  total_output_bytes: 1280\n}\ntotal_float_ops: "
-      "10440\ntotal_accelerator_exec_micros: 404\ntotal_cpu_exec_micros: "
-      "4541\ntotal_run_count: 6\ntotal_definition_count: 35\ntotal_peak_bytes: "
-      "9984\ntotal_residual_bytes: 1280\ntotal_output_bytes: 4864\n",
+      "total_run_count: 1\n  total_definition_count: 10\n  peak_bytes: 1280\n  "
+      "residual_bytes: 1280\n  output_bytes: 1280\n  total_peak_bytes: 1280\n  "
+      "total_residual_bytes: 1280\n  total_output_bytes: "
+      "1280\n}\ntotal_float_ops: 10440\ntotal_accelerator_exec_micros: "
+      "404\ntotal_cpu_exec_micros: 4541\ntotal_run_count: "
+      "6\ntotal_definition_count: 35\ntotal_peak_bytes: "
+      "25856\ntotal_residual_bytes: 3840\ntotal_output_bytes: 4864\n",
       &expected));
   EXPECT_EQ(expected.DebugString(), root.DebugString());
 }
diff --git a/tensorflow/core/profiler/internal/tfprof_tensor_test.cc b/tensorflow/core/profiler/internal/tfprof_tensor_test.cc
index c68888e88fcedc174a9d28bb43408a9a95d50a6b..7fa79d23d853229b32ebd93ddb0640d9c75b323d 100644
--- a/tensorflow/core/profiler/internal/tfprof_tensor_test.cc
+++ b/tensorflow/core/profiler/internal/tfprof_tensor_test.cc
@@ -18,10 +18,10 @@ limitations under the License.
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/test.h"
-#include "tensorflow/core/profiler/internal/tfprof_options.h"
 #include "tensorflow/core/profiler/internal/tfprof_stats.h"
 #include "tensorflow/core/profiler/internal/tfprof_utils.h"
 #include "tensorflow/core/profiler/tfprof_log.pb.h"
+#include "tensorflow/core/profiler/tfprof_options.h"
 #include "tensorflow/core/profiler/tfprof_output.pb.h"
 #include "tensorflow/core/protobuf/config.pb.h"
 
diff --git a/tensorflow/core/profiler/internal/tfprof_timeline.cc b/tensorflow/core/profiler/internal/tfprof_timeline.cc
index bdb000747db72900d748c22140ca38e571db6691..b0dd8ce5e0f046325a309060b19467b7c1494568 100644
--- a/tensorflow/core/profiler/internal/tfprof_timeline.cc
+++ b/tensorflow/core/profiler/internal/tfprof_timeline.cc
@@ -153,10 +153,8 @@ void MemoryTracker::TrackNode(int64 step, const GraphNode* node) {
 
   std::map<int64, int64> allocs;
   for (const auto& alloc : node->node->allocations(step)) {
-    for (const auto& r : alloc.allocation_records()) {
-      allocs[r.alloc_micros()] += r.alloc_bytes();
-      dev.tracked_allocations[r.alloc_micros()] += r.alloc_bytes();
-    }
+    allocs[alloc.alloc_micros()] += alloc.alloc_bytes();
+    dev.tracked_allocations[alloc.alloc_micros()] += alloc.alloc_bytes();
   }
   dev.tracked_allocations[0] += node->node->accelerator_persistent_bytes();
   allocs[0] += node->node->accelerator_persistent_bytes();
@@ -167,9 +165,9 @@ void MemoryTracker::TrackNode(int64 step, const GraphNode* node) {
     last += it->second;
     aggregate_allocs[it->first] = last;
   }
-  int64 end_micros = node->node->lastest_schedule_end_micros(step);
-  if (end_micros > 0 && node->node->allocator_bytes_in_use(step) > 0) {
-    dev.allocations[end_micros] = node->node->allocator_bytes_in_use(step);
+  for (const auto& bytes_in_use : node->node->allocator_bytes_in_use(step)) {
+    if (bytes_in_use.first <= 0) continue;
+    dev.allocations[bytes_in_use.first] = bytes_in_use.second;
   }
 }
 
@@ -265,6 +263,10 @@ void Timeline::GenerateGraphTimeline(const std::vector<GraphNode*>& gnodes) {
     }
   }
   for (const auto& dev : mem_tracker_.devices()) {
+    if (IsPlacedOnCPU(dev.first)) {
+      // TODO(xpan): Maybe also support CPU allocator memory tracking.
+      continue;
+    }
     int64 pid = AllocatePID();
     chrome_formatter_.EmitPID(GetMemoryLaneName(dev.first), pid);
     int64 pid2 = AllocatePID();
diff --git a/tensorflow/core/profiler/internal/tfprof_timeline_test.cc b/tensorflow/core/profiler/internal/tfprof_timeline_test.cc
index 91eac0cf7617eba54f6938fb893192d2a8fe2eaf..e8bd326aa256acf0cc5c2c87abdc8e9662300603 100644
--- a/tensorflow/core/profiler/internal/tfprof_timeline_test.cc
+++ b/tensorflow/core/profiler/internal/tfprof_timeline_test.cc
@@ -23,12 +23,12 @@ limitations under the License.
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/test.h"
-#include "tensorflow/core/protobuf/config.pb.h"
 #include "tensorflow/core/profiler/internal/tfprof_constants.h"
-#include "tensorflow/core/profiler/internal/tfprof_options.h"
 #include "tensorflow/core/profiler/internal/tfprof_utils.h"
 #include "tensorflow/core/profiler/tfprof_log.pb.h"
+#include "tensorflow/core/profiler/tfprof_options.h"
 #include "tensorflow/core/profiler/tfprof_output.pb.h"
+#include "tensorflow/core/protobuf/config.pb.h"
 
 namespace tensorflow {
 namespace tfprof {
@@ -71,7 +71,7 @@ TEST_F(TFProfTimelineTest, GraphView) {
 
   string dump_str;
   TF_CHECK_OK(ReadFileToString(Env::Default(), dump_file + "_0", &dump_str));
-  EXPECT_EQ(7932146665024565912ull, Hash64(dump_str));
+  EXPECT_EQ(16556121177519539380ull, Hash64(dump_str));
 }
 
 TEST_F(TFProfTimelineTest, ScopeView) {
diff --git a/tensorflow/core/profiler/internal/tfprof_utils.cc b/tensorflow/core/profiler/internal/tfprof_utils.cc
index 1ce59ad7552179a6bec387763960d7311958f594..2813bb46fa44bc1ed04e7e8f5cd02737a81abad4 100644
--- a/tensorflow/core/profiler/internal/tfprof_utils.cc
+++ b/tensorflow/core/profiler/internal/tfprof_utils.cc
@@ -297,21 +297,137 @@ void PrintHelp() {
       "See https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler/"
       "g3doc/command_line.md for command line tool tutorial.\n");
   printf(
-      "profiler --profile_path=<ProfileProto binary file> # required\nor:\n"
-      "profiler --graph_path=<GraphDef proto file>  # required\n"
-      "         --run_meta_patn=<RunMetadata proto file>  # optional\n"
-      "         --run_log_path=<OpLogProto proto file>  # optional\n\n");
+      "profiler --profile_path=<ProfileProto binary file> # required\n"
+      "\nOr:\n\n"
+      "profiler --graph_path=<GraphDef proto file>  "
+      "# Contains model graph info (no needed for eager execution)\n"
+      "         --run_meta_path=<RunMetadata proto file>  "
+      "# Contains runtime info. Optional.\n"
+      "         --run_log_path=<OpLogProto proto file>  "
+      "# Contains extra source code, flops, custom type info. Optional\n\n");
   printf(
-      "\nCommands:\n"
+      "\nTo skip interactive mode, append one of the following commands:\n"
       "  scope: Organize profiles based on name scopes.\n"
       "  graph: Organize profiles based on graph node input/output.\n"
       "  op: Organize profiles based on operation type.\n"
       "  code: Organize profiles based on python codes (need op_log_path).\n"
-      "  advise: Auto-profile and advise.\n"
+      "  advise: Auto-profile and advise. (experimental)\n"
       "  set: Set options that will be default for follow up commands.\n"
       "  help: Show helps.\n");
   fflush(stdout);
 }
 
+static const char* const kTotalMicrosHelp =
+    "total execution time: Sum of accelerator execution time and cpu execution "
+    "time.";
+static const char* const kAccMicrosHelp =
+    "accelerator execution time: Time spent executing on the accelerator. "
+    "This is normally measured by the actual hardware library.";
+static const char* const kCPUHelp =
+    "cpu execution time: The time from the start to the end of the operation. "
+    "It's the sum of actual cpu run time plus the time that it spends waiting "
+    "if part of computation is launched asynchronously.";
+static const char* const kBytes =
+    "requested bytes: The memory requested by the operation, accumulatively.";
+static const char* const kPeakBytes =
+    "peak bytes: The peak amount of memory that the operation is holding at "
+    "some point.";
+static const char* const kResidualBytes =
+    "residual bytes: The memory not de-allocated after the operation finishes.";
+static const char* const kOutputBytes =
+    "output bytes: The memory that is output from the operation (not "
+    "necessarilty allocated by the operation)";
+static const char* const kOccurrence =
+    "occurrence: The number of times it occurs";
+static const char* const kInputShapes =
+    "input shape: The shape of input tensors";
+static const char* const kDevice = "device: which device is placed on.";
+static const char* const kFloatOps =
+    "flops: Number of float operations. Note: Please read the implementation "
+    "for the math behind it.";
+static const char* const kParams =
+    "param: Number of parameters (in the Variable).";
+static const char* const kTensorValue = "tensor_value: Not supported now.";
+static const char* const kOpTypes =
+    "op_types: The attributes of the operation, includes the Kernel name "
+    "device placed on and user-defined strings.";
+
+static const char* const kScope =
+    "scope: The nodes in the model graph are organized by their names, which "
+    "is hierarchical like filesystem.";
+static const char* const kGraph =
+    "graph: The nodes in the model graph are organized by their operation "
+    "input and output.";
+static const char* const kCode =
+    "code: When python trace is available, the nodes are python lines and "
+    "their are organized by the python call stack.";
+static const char* const kOp =
+    "op: The nodes are operation kernel type, such as MatMul, Conv2D. Graph "
+    "nodes belonging to the same type are aggregated together.";
+static const char* const kAdvise =
+    "advise: Automatically profile and discover issues. (Experimental)";
+static const char* const kSet =
+    "set: Set a value for an option for future use.";
+static const char* const kHelp = "help: Print helping messages.";
+
+string QueryDoc(const string& cmd, const Options& opts) {
+  string cmd_help = "";
+  if (cmd == kCmds[0]) {
+    cmd_help = kScope;
+  } else if (cmd == kCmds[1]) {
+    cmd_help = kScope;
+  } else if (cmd == kCmds[2]) {
+    cmd_help = kCode;
+  } else if (cmd == kCmds[3]) {
+    cmd_help = kOp;
+  } else if (cmd == kCmds[4]) {
+    cmd_help = kAdvise;
+  } else if (cmd == kCmds[5]) {
+    cmd_help = kSet;
+  } else if (cmd == kCmds[6]) {
+    cmd_help = kHelp;
+  } else {
+    cmd_help = "Unknown command: " + cmd;
+  }
+
+  std::vector<string> helps;
+  for (const string& s : opts.select) {
+    if (s == kShown[0]) {
+      helps.push_back(kBytes);
+    } else if (s == kShown[1]) {
+      helps.push_back(strings::StrCat(kTotalMicrosHelp, "\n", kCPUHelp, "\n",
+                                      kAccMicrosHelp));
+    } else if (s == kShown[2]) {
+      helps.push_back(kParams);
+    } else if (s == kShown[3]) {
+      helps.push_back(kFloatOps);
+    } else if (s == kShown[4]) {
+      helps.push_back(kTensorValue);
+    } else if (s == kShown[5]) {
+      helps.push_back(kDevice);
+    } else if (s == kShown[6]) {
+      helps.push_back(kOpTypes);
+    } else if (s == kShown[7]) {
+      helps.push_back(kOccurrence);
+    } else if (s == kShown[8]) {
+      helps.push_back(kInputShapes);
+    } else if (s == kShown[9]) {
+      helps.push_back(kAccMicrosHelp);
+    } else if (s == kShown[10]) {
+      helps.push_back(kCPUHelp);
+    } else if (s == kShown[11]) {
+      helps.push_back(kPeakBytes);
+    } else if (s == kShown[12]) {
+      helps.push_back(kResidualBytes);
+    } else if (s == kShown[13]) {
+      helps.push_back(kOutputBytes);
+    } else {
+      helps.push_back("Unknown select: " + s);
+    }
+  }
+  return strings::StrCat("\nDoc:\n", cmd_help, "\n",
+                         str_util::Join(helps, "\n"), "\n\n");
+}
+
 }  // namespace tfprof
 }  // namespace tensorflow
diff --git a/tensorflow/core/profiler/internal/tfprof_utils.h b/tensorflow/core/profiler/internal/tfprof_utils.h
index 3407517ce01bbccd5fd82b03f9251fef5015c461..afca3df7f8cb4d15a4abcecdbf2163fbf4ee8945 100644
--- a/tensorflow/core/profiler/internal/tfprof_utils.h
+++ b/tensorflow/core/profiler/internal/tfprof_utils.h
@@ -22,8 +22,8 @@ limitations under the License.
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/profiler/tfprof_options.h"
 #include "tensorflow/core/protobuf/config.pb.h"
-#include "tensorflow/core/profiler/internal/tfprof_options.h"
 
 namespace tensorflow {
 namespace tfprof {
@@ -66,6 +66,9 @@ Status ReadProtoFile(Env* env, const string& fname, T* proto,
 
 void PrintHelp();
 
+// Generate helper message based on the command and options.
+string QueryDoc(const string& cmd, const Options& opts);
+
 }  // namespace tfprof
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/profiler/profiler.cc b/tensorflow/core/profiler/profiler.cc
index b280242df18272b63c7b6a683e70db6c2e315c4d..2cc212d5898c15c0d066a477068f7c68fa244b54 100644
--- a/tensorflow/core/profiler/profiler.cc
+++ b/tensorflow/core/profiler/profiler.cc
@@ -31,13 +31,13 @@ limitations under the License.
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/init_main.h"
 #include "tensorflow/core/platform/protobuf.h"
-#include "tensorflow/core/protobuf/config.pb.h"
-#include "tensorflow/core/util/command_line_flags.h"
 #include "tensorflow/core/profiler/internal/advisor/tfprof_advisor.h"
-#include "tensorflow/core/profiler/internal/tfprof_options.h"
 #include "tensorflow/core/profiler/internal/tfprof_stats.h"
 #include "tensorflow/core/profiler/internal/tfprof_utils.h"
 #include "tensorflow/core/profiler/tfprof_log.pb.h"
+#include "tensorflow/core/profiler/tfprof_options.h"
+#include "tensorflow/core/protobuf/config.pb.h"
+#include "tensorflow/core/util/command_line_flags.h"
 
 namespace tensorflow {
 namespace tfprof {
@@ -140,10 +140,12 @@ int Run(int argc, char** argv) {
   }
   port::InitMain(argv[0], &argc, &argv);
 
-  if (!FLAGS_profile_path.empty() && !FLAGS_graph_path.empty()) {
+  if (!FLAGS_profile_path.empty() &&
+      (!FLAGS_graph_path.empty() || !FLAGS_run_meta_path.empty())) {
     fprintf(stderr,
-            "both --graph_path and --profile_path are set. "
-            "Ignore graph_path\n");
+            "--profile_path is set, do not set --graph_path or "
+            "--run_meta_path\n");
+    return 1;
   }
 
   std::vector<string> account_type_regexes =
@@ -165,7 +167,8 @@ int Run(int argc, char** argv) {
   CHECK(s.ok()) << s.ToString();
 
   string cmd = "";
-  if (argc == 1 && FLAGS_graph_path.empty() && FLAGS_profile_path.empty()) {
+  if (argc == 1 && FLAGS_graph_path.empty() && FLAGS_profile_path.empty() &&
+      FLAGS_run_meta_path.empty()) {
     PrintHelp();
     return 0;
   } else if (argc > 1) {
@@ -202,8 +205,10 @@ int Run(int argc, char** argv) {
         "Try to use a single --profile_path instead of "
         "graph_path,op_log_path,run_meta_path\n");
     std::unique_ptr<GraphDef> graph(new GraphDef());
-    TF_CHECK_OK(
-        ReadProtoFile(Env::Default(), FLAGS_graph_path, graph.get(), false));
+    if (!FLAGS_graph_path.empty()) {
+      TF_CHECK_OK(
+          ReadProtoFile(Env::Default(), FLAGS_graph_path, graph.get(), false));
+    }
 
     std::unique_ptr<OpLogProto> op_log(new OpLogProto());
     if (!FLAGS_op_log_path.empty()) {
diff --git a/tensorflow/core/profiler/tfprof_log.proto b/tensorflow/core/profiler/tfprof_log.proto
index f92301133a3102a2e4233326dd811169e1ecd105..90b9e293ec7851ef58be195db2b76175bf5bd74a 100644
--- a/tensorflow/core/profiler/tfprof_log.proto
+++ b/tensorflow/core/profiler/tfprof_log.proto
@@ -54,6 +54,9 @@ message ProfileProto {
   map<int64, ProfileNode> nodes = 1;
   // Whether or not has code traces.
   bool has_trace = 2;
+  // Whether or not the TF device tracer fails to return accelerator
+  // information (which could lead to 0 accelerator execution time).
+  bool miss_accelerator_stream = 5;
   // Traced steps.
   repeated int64 steps = 3;
 
@@ -90,10 +93,6 @@ message ProfileNode {
   map<int64, ExecProfile> execs = 12;
 }
 
-message Allocation {
-  repeated AllocationRecord allocation_records = 1;
-}
-
 message ExecProfile {
   // Can be larger than 1 if run multiple times in loop.
   int64 run_count = 1;
@@ -110,34 +109,42 @@ message ExecProfile {
   // For cpu, vector size can be larger than 1 if in tf.while_loop.
   map<string, ExecTime> cpu_execs = 5;
 
-  map<int32, Memory> output_memory = 17;
+  // Each entry to memory information of a scheduling of the node.
+  // Normally, there will be multiple entries in while_loop.
+  repeated ExecMemory memory_execs = 7;
+  // The allocation and deallocation times and sizes throughout execution.
+  repeated AllocationRecord allocations = 11;
+  // The devices related to this execution.
+  repeated string devices = 6;
+}
 
-  repeated Allocation allocations = 18;
+message ExecTime {
+  repeated Tuple times = 1;
+}
 
-  repeated string devices = 6;
+message ExecMemory {
+  // This is the timestamp when the memory information was tracked.
+  int64 memory_micros = 1;
+  // NOTE: Please don't depend on the following 4 fields yet. Due to
+  // TensorFlow internal tracing issues, the numbers can be quite wrong.
+  // TODO(xpan): Fix the TensorFlow internal tracing.
+  int64 host_temp_bytes = 2;
+  int64 host_persistent_bytes = 3;
+  int64 accelerator_temp_bytes = 4;
+  int64 accelerator_persistent_bytes = 5;
 
   // Total bytes requested by the op.
-  int64 requested_bytes = 7;
+  int64 requested_bytes = 6;
   // Total bytes requested by the op and released before op end.
-  int64 peak_bytes = 8;
+  int64 peak_bytes = 7;
   // Total bytes requested by the op and not released after op end.
-  int64 residual_bytes = 9;
+  int64 residual_bytes = 8;
   // Total bytes output by the op (not necessarily requested by the op).
-  int64 output_bytes = 10;
-  // Total temporary bytes allocated and released by the op.
-  int64 host_temp_bytes = 11;
-  // Total persistent bytes (e.g. variable) allocated by the op.
-  int64 host_persistent_bytes = 12;
-  int64 accelerator_temp_bytes = 13;
-  int64 accelerator_persistent_bytes = 14;
+  int64 output_bytes = 9;
   // The total number of bytes currently allocated by the allocator if >0.
-  int64 allocator_bytes_in_use = 15;
-
-  bool memory_intialized = 16;
-}
-
-message ExecTime {
-  repeated Tuple times = 1;
+  int64 allocator_bytes_in_use = 10;
+  // The memory of each output of the operation.
+  map<int32, Memory> output_memory = 11;
 }
 
 message Tuple {
diff --git a/tensorflow/core/profiler/internal/tfprof_options.cc b/tensorflow/core/profiler/tfprof_options.cc
similarity index 99%
rename from tensorflow/core/profiler/internal/tfprof_options.cc
rename to tensorflow/core/profiler/tfprof_options.cc
index 663427254182ba57bfba75efa5be82464e5c44f8..9e5ef0a0a31600e12e76cb8f5f3e5a1c6f62a3d5 100644
--- a/tensorflow/core/profiler/internal/tfprof_options.cc
+++ b/tensorflow/core/profiler/tfprof_options.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/profiler/internal/tfprof_options.h"
+#include "tensorflow/core/profiler/tfprof_options.h"
 
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/str_util.h"
diff --git a/tensorflow/core/profiler/internal/tfprof_options.h b/tensorflow/core/profiler/tfprof_options.h
similarity index 100%
rename from tensorflow/core/profiler/internal/tfprof_options.h
rename to tensorflow/core/profiler/tfprof_options.h
diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto
index 1916316245063bd6e8903573a961295f3b79bcf6..ccab69b9c04cad1fdd95f7ff4304fc60e2f459da 100644
--- a/tensorflow/core/protobuf/config.proto
+++ b/tensorflow/core/protobuf/config.proto
@@ -42,18 +42,24 @@ message GPUOptions {
   // A comma-separated list of GPU ids that determines the 'visible'
   // to 'virtual' mapping of GPU devices.  For example, if TensorFlow
   // can see 8 GPU devices in the process, and one wanted to map
-  // visible GPU devices 5 and 3 as "/device:GPU:0", and "/device:GPU:1", then one
-  // would specify this field as "5,3".  This field is similar in
+  // visible GPU devices 5 and 3 as "/device:GPU:0", and "/device:GPU:1",
+  // then one would specify this field as "5,3".  This field is similar in
   // spirit to the CUDA_VISIBLE_DEVICES environment variable, except
   // it applies to the visible GPU devices in the process.
   //
-  // NOTE: The GPU driver provides the process with the visible GPUs
-  // in an order which is not guaranteed to have any correlation to
-  // the *physical* GPU id in the machine.  This field is used for
-  // remapping "visible" to "virtual", which means this operates only
-  // after the process starts.  Users are required to use vendor
-  // specific mechanisms (e.g., CUDA_VISIBLE_DEVICES) to control the
-  // physical to visible device mapping prior to invoking TensorFlow.
+  // NOTE:
+  // 1. The GPU driver provides the process with the visible GPUs
+  //    in an order which is not guaranteed to have any correlation to
+  //    the *physical* GPU id in the machine.  This field is used for
+  //    remapping "visible" to "virtual", which means this operates only
+  //    after the process starts.  Users are required to use vendor
+  //    specific mechanisms (e.g., CUDA_VISIBLE_DEVICES) to control the
+  //    physical to visible device mapping prior to invoking TensorFlow.
+  // 2. In the code, the ids in this list are also called "CUDA GPU id"s,
+  //    and the 'virtual' ids of GPU devices (i.e. the ids in the device
+  //    name "/device:GPU:<id>") are also called "TF GPU id"s. Please
+  //    refer to third_party/tensorflow/core/common_runtime/gpu/gpu_id.h
+  //    for more information.
   string visible_device_list = 5;
 
   // In the event polling loop sleep this many microseconds between
@@ -77,6 +83,52 @@ message GPUOptions {
   // memory is unpageable, having too much pinned memory might negatively impact
   // the overall host system performance.
   bool force_gpu_compatible = 8;
+
+  // Everything inside Experimental is subject to change and is not subject
+  // to API stability guarantees in
+  // https://www.tensorflow.org/programmers_guide/version_compat.
+  message Experimental {
+    // Configuration for breaking down a visible GPU into multiple "virtual"
+    // devices.
+    message VirtualDevices {
+      // Per "virtual" device memory limit, in MB. The number of elements in
+      // the list is the number of virtual devices to create on the
+      // corresponding visible GPU (see "virtual_devices" below).
+      // If empty, it will create single virtual device taking all available
+      // memory from the device.
+      //
+      // For the concept of "visible" and "virtual" GPU, see the comments for
+      // "visible_device_list" above for more information.
+      repeated float memory_limit_mb = 1;
+    }
+
+    // The multi virtual device settings. If empty (not set), it will create
+    // single virtual device on each visible GPU, according to the settings
+    // in "visible_device_list" above. Otherwise, the number of elements in the
+    // list must be the same as the number of visible GPUs (after
+    // "visible_device_list" filtering if it is set), and the string represented
+    // device names (e.g. /device:GPU:<id>) will refer to the virtual
+    // devices and have the <id> field assigned sequentially starting from 0,
+    // according to the order they appear in this list and the "memory_limit"
+    // list inside each element. For example,
+    //   visible_device_list = "1,0"
+    //   virtual_devices { memory_limit: 1GB memory_limit: 2GB }
+    //   virtual_devices {}
+    // will create three virtual devices as:
+    //   /device:GPU:0 -> visible GPU 1 with 1GB memory
+    //   /device:GPU:1 -> visible GPU 1 with 2GB memory
+    //   /device:GPU:2 -> visible GPU 0 with all available memory
+    //
+    // NOTE:
+    // 1. It's invalid to set both this and "per_process_gpu_memory_fraction"
+    //    at the same time.
+    // 2. Currently this setting is per-process, not per-session. Using
+    //    different settings in different sessions within same process will
+    //    result in undefined behavior.
+    repeated VirtualDevices virtual_devices = 1;
+  }
+
+  Experimental experimental = 9;
 };
 
 // Options passed to the graph optimizer
diff --git a/tensorflow/core/protobuf/control_flow.proto b/tensorflow/core/protobuf/control_flow.proto
index 48f503225447c26f8959ba379656361292052b44..2c9476a08ad946e7f019475055397fcd6cfbbc5a 100644
--- a/tensorflow/core/protobuf/control_flow.proto
+++ b/tensorflow/core/protobuf/control_flow.proto
@@ -66,4 +66,9 @@ message WhileContextDef {
 
   // Values and external values in control flow context.
   ValuesDef values_def = 9;
+
+  // Optional name of the maximum_iterations tensor.
+  string maximum_iterations_name = 11;
+
+  // Next available id: 12.
 }
diff --git a/tensorflow/core/protobuf/critical_section.proto b/tensorflow/core/protobuf/critical_section.proto
new file mode 100644
index 0000000000000000000000000000000000000000..0b3f531e6d9f59f05dfc0b7b36beda334f9f5101
--- /dev/null
+++ b/tensorflow/core/protobuf/critical_section.proto
@@ -0,0 +1,22 @@
+syntax = "proto3";
+
+package tensorflow;
+option cc_enable_arenas = true;
+option java_outer_classname = "CriticalSectionProtos";
+option java_multiple_files = true;
+option java_package = "org.tensorflow.framework";
+
+// Protocol buffer representing a CriticalSection.
+message CriticalSectionDef {
+  // Name of the critical section handle.
+  string critical_section_name = 1;
+}
+
+// Protocol buffer representing a CriticalSection execution.
+message CriticalSectionExecutionDef {
+  // Name of the critical section handle.
+  string execute_in_critical_section_name = 1;
+  // Whether this operation requires exclusive access to its resources,
+  // (i.e., no other CriticalSections may request the same resources).
+  bool exclusive_resource_access = 2;
+}
diff --git a/tensorflow/core/protobuf/debug.proto b/tensorflow/core/protobuf/debug.proto
index 136c627e25f33cb9b4ff2de7725406c0f800a5b1..56983f3b7d464f88cebe608ac15882f04f27b003 100644
--- a/tensorflow/core/protobuf/debug.proto
+++ b/tensorflow/core/protobuf/debug.proto
@@ -60,3 +60,25 @@ message DebugOptions {
   // step count.
   int64 global_step = 10;
 }
+
+message DebuggedSourceFile {
+  // The host name on which a source code file is located.
+  string host = 1;
+
+  // Path to the source code file.
+  string file_path = 2;
+
+  // The timestamp at which the source code file is last modified.
+  int64 last_modified = 3;
+
+  // Byte size of the file.
+  int64 bytes = 4;
+
+  // Line-by-line content of the source code file.
+  repeated string lines = 5;
+}
+
+message DebuggedSourceFiles {
+  // A collection of source code files.
+  repeated DebuggedSourceFile source_files = 1;
+}
diff --git a/tensorflow/core/protobuf/master.proto b/tensorflow/core/protobuf/master.proto
index 6b25a86ba46b9285100f7d91ebade711f0425874..0437cb1b83e12d83bf3b8713e2940a6d45173fb5 100644
--- a/tensorflow/core/protobuf/master.proto
+++ b/tensorflow/core/protobuf/master.proto
@@ -23,6 +23,7 @@ option java_package = "org.tensorflow.distruntime";
 
 import "tensorflow/core/framework/device_attributes.proto";
 import "tensorflow/core/framework/graph.proto";
+import "tensorflow/core/lib/core/error_codes.proto";
 import "tensorflow/core/protobuf/config.proto";
 import "tensorflow/core/protobuf/named_tensor.proto";
 
@@ -129,6 +130,13 @@ message RunStepRequest {
   // Partial run handle (optional). If specified, this will be a partial run
   // execution, run up to the specified fetches.
   string partial_run_handle = 6;
+
+  // If true then some errors, e.g., execution errors that have long
+  // error messages, may return an OK RunStepResponse with the actual
+  // error saved in the status_code/status_error_message fields of the
+  // response body. This is a workaround since the RPC subsystem may
+  // truncate long metadata messages.
+  bool store_errors_in_response_body = 7;
 }
 
 message RunStepResponse {
@@ -138,6 +146,13 @@ message RunStepResponse {
 
   // Returned metadata if requested in the options.
   RunMetadata metadata = 2;
+
+  // If store_errors_in_response_body is true in the request, then
+  // optionally the server may return an OK status for the RPC and
+  // fill the true status into the fields below, to allow for messages
+  // that are too long to fit in metadata.
+  error.Code status_code = 3;
+  string status_error_message = 4;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/tensorflow/core/protobuf/meta_graph.proto b/tensorflow/core/protobuf/meta_graph.proto
index 47ec2aa1efeb11135b95b3b2c4342b77f0a9866b..fd86c0da12b26cf5ed8a7846d159dd6feb4ddc4e 100644
--- a/tensorflow/core/protobuf/meta_graph.proto
+++ b/tensorflow/core/protobuf/meta_graph.proto
@@ -61,6 +61,10 @@ message MetaGraphDef {
     // graph. This will be populated by the framework, which will overwrite any
     // user supplied value.
     string tensorflow_git_version = 6;
+
+    // A flag to denote whether default-valued attrs have been stripped from
+    // the nodes in this graph_def.
+    bool stripped_default_attrs = 7;
   }
   MetaInfoDef meta_info_def = 1;
 
diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto
index 3b5d1563a2695c4b33d596f0493e38ff044b3c38..d3c3d432a3739a28a5703f3c1a2aa4cbc95f461c 100644
--- a/tensorflow/core/protobuf/rewriter_config.proto
+++ b/tensorflow/core/protobuf/rewriter_config.proto
@@ -35,7 +35,7 @@ message RewriterConfig {
   Toggle constant_folding = 3;
   // Arithmetic optimizations (default is ON)
   Toggle arithmetic_optimization = 7;
-  // Control dependency optimizations (default is OFF).
+  // Control dependency optimizations (default is ON).
   Toggle dependency_optimization = 8;
   // If true, don't remove unnecessary ops from the graph
   bool disable_model_pruning = 2;
@@ -53,6 +53,7 @@ message RewriterConfig {
     // selected automatically.
     SWAPPING_HEURISTICS = 4;
     RECOMPUTATION_HEURISTICS = 5;
+    SCHEDULING_HEURISTICS = 6;
     // Use any combination of swapping and recomputation heuristics.
     HEURISTICS = 3;
   }
diff --git a/tensorflow/core/protobuf/worker.proto b/tensorflow/core/protobuf/worker.proto
index 385e2dd163b8c668357ea9fabd1dee7d9a675729..9b51db1362124bb1db2645711684bd1cbf3e61b5 100644
--- a/tensorflow/core/protobuf/worker.proto
+++ b/tensorflow/core/protobuf/worker.proto
@@ -27,6 +27,7 @@ import "tensorflow/core/framework/step_stats.proto";
 import "tensorflow/core/framework/device_attributes.proto";
 import "tensorflow/core/framework/graph.proto";
 import "tensorflow/core/framework/tensor.proto";
+import "tensorflow/core/lib/core/error_codes.proto";
 import "tensorflow/core/protobuf/config.proto";
 import "tensorflow/core/protobuf/debug.proto";
 import "tensorflow/core/protobuf/named_tensor.proto";
@@ -226,7 +227,14 @@ message RunGraphRequest {
   // True if this is the last partial run request in a sequence of requests.
   bool is_last_partial_run = 7;
 
-  // Next: 9
+  // If true then some errors, e.g., execution errors that have long
+  // error messages, may return an OK RunGraphResponse with the actual
+  // error saved in the status_code/status_error_message fields of the
+  // response body. This is a workaround since the RPC subsystem may
+  // truncate long metadata messages.
+  bool store_errors_in_response_body = 9;
+
+  // Next: 10
 }
 
 message RunGraphResponse {
@@ -240,6 +248,13 @@ message RunGraphResponse {
   StepStats step_stats = 2;
   CostGraphDef cost_graph = 3;
   repeated GraphDef partition_graph = 4;
+
+  // If store_errors_in_response_body is true in the request, then
+  // optionally the server may return an OK status for the RPC and
+  // fill the true status into the fields below, to allow for messages
+  // that are too long to fit in metadata.
+  error.Code status_code = 5;
+  string status_error_message = 6;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/tensorflow/core/public/session.h b/tensorflow/core/public/session.h
index bca384e59fe9412a77398a81f0c8abbfd512e51a..75ad50f6f2d59a8f4b8282d8e7b395e2323d62e1 100644
--- a/tensorflow/core/public/session.h
+++ b/tensorflow/core/public/session.h
@@ -186,7 +186,7 @@ class Session {
   /// the `SessionOptions::target` field).
   virtual Status Close() = 0;
 
-  // NOTE(ashankar): As of July 2017, this method was added to faciliate some
+  // NOTE(ashankar): As of July 2017, this method was added to facilitate some
   // experimentation. Reconsider/re-evaluate after September 2017.
   //
   // Sets `*output` to the `DeviceMgr` that owns accessible devices in the
diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index ec077c42837e517f94955956ed75430b7a3d0a30..adeb080ddef004c55c97a489a21c207362cf2e27 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -19,12 +19,12 @@ limitations under the License.
 // TensorFlow uses semantic versioning, see http://semver.org/.
 
 #define TF_MAJOR_VERSION 1
-#define TF_MINOR_VERSION 4
+#define TF_MINOR_VERSION 5
 #define TF_PATCH_VERSION 0
 
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
 // "-beta", "-rc", "-rc.1")
-#define TF_VERSION_SUFFIX ""
+#define TF_VERSION_SUFFIX "-rc1"
 
 #define TF_STR_HELPER(x) #x
 #define TF_STR(x) TF_STR_HELPER(x)
@@ -91,10 +91,13 @@ limitations under the License.
 // 24. Deprecate lookup ops (v1) ops in favor of v2 (30may2017)
 // 25. Deprecate stack (v1) ops in favor of v2 (2017/6/15).
 // 25. Deprecate RandomPoisson (v1) ops in favor of v2 (2017/10/25).
+// 26. Add a bool 'stripped_default_attrs' to MetaInfoDef indicating
+//     whether default-valued attrs have been stripped from the nodes in the
+//     GraphDef. (7dec2017)
 
 #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 24
+#define TF_GRAPH_DEF_VERSION 25
 
 // Checkpoint compatibility versions (the versions field in SavedSliceMeta).
 //
@@ -119,5 +122,7 @@ extern const char* tf_compiler_version();
 extern const char* tf_git_version();
 // Value of the _GLIBCXX_USE_CXX11_ABI flag, or 0 if it's not set.
 extern const int tf_cxx11_abi_flag();
+// Returns 1 if build is monolithic, or 0 otherwise.
+extern const int tf_monolithic_build();
 
 #endif  // TENSORFLOW_CORE_PUBLIC_VERSION_H_
diff --git a/tensorflow/core/user_ops/fact.cc b/tensorflow/core/user_ops/fact.cc
index c512275506436d54829b355dbbd9711115d364b3..3a4fc8115a7f91badfeda369a599b3dba3057c63 100644
--- a/tensorflow/core/user_ops/fact.cc
+++ b/tensorflow/core/user_ops/fact.cc
@@ -18,27 +18,23 @@ limitations under the License.
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 
-using namespace tensorflow;
+REGISTER_OP("Fact").Output("fact: string");
 
-REGISTER_OP("Fact")
-    .Output("fact: string")
-    .Doc(R"doc(
-Output a fact about factorials.
-)doc");
-
-class FactOp : public OpKernel {
+class FactOp : public tensorflow::OpKernel {
  public:
-  explicit FactOp(OpKernelConstruction* context) : OpKernel(context) {}
+  explicit FactOp(tensorflow::OpKernelConstruction* context)
+      : OpKernel(context) {}
 
-  void Compute(OpKernelContext* context) override {
+  void Compute(tensorflow::OpKernelContext* context) override {
     // Output a scalar string.
-    Tensor* output_tensor = nullptr;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(0, TensorShape(), &output_tensor));
+    tensorflow::Tensor* output_tensor = nullptr;
+    OP_REQUIRES_OK(context, context->allocate_output(
+                                0, tensorflow::TensorShape(), &output_tensor));
+    using tensorflow::string;
     auto output = output_tensor->template scalar<string>();
 
     output() = "0! == 1";
   }
 };
 
-REGISTER_KERNEL_BUILDER(Name("Fact").Device(DEVICE_CPU), FactOp);
+REGISTER_KERNEL_BUILDER(Name("Fact").Device(tensorflow::DEVICE_CPU), FactOp);
diff --git a/tensorflow/core/util/cuda_kernel_helper.h b/tensorflow/core/util/cuda_kernel_helper.h
index cf11f419a4effd868fa9c933240acb9a05bfa355..3e32ec79731e1529affb49cf6e1aff3f23b84262 100644
--- a/tensorflow/core/util/cuda_kernel_helper.h
+++ b/tensorflow/core/util/cuda_kernel_helper.h
@@ -374,6 +374,20 @@ __device__ __host__ inline Eigen::half ldg(const Eigen::half* address) {
 #endif
 }
 
+template <>
+__device__ __host__ inline tensorflow::bfloat16 ldg(
+    const tensorflow::bfloat16* address) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+  tensorflow::bfloat16 return_value;
+  asm volatile("ld.global.nc.u16 %0, [%1];"
+               : "=h"(return_value.value)
+               : "l"(address));
+  return return_value;
+#else
+  return *address;
+#endif
+}
+
 template <>
 __device__ __host__ inline bool ldg(const bool* address) {
 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
diff --git a/tensorflow/core/util/equal_graph_def.cc b/tensorflow/core/util/equal_graph_def.cc
index a3b7db98cc00eff703bbce95cb3fae7e83be35b5..f1ec497a6772c84d599a76169515ef417c11f430 100644
--- a/tensorflow/core/util/equal_graph_def.cc
+++ b/tensorflow/core/util/equal_graph_def.cc
@@ -148,7 +148,10 @@ bool EqualNodeDef(const NodeDef& actual, const NodeDef& expected, string* diff,
       first_control_input = i;
       break;
     }
-    if (actual.input(i) != expected.input(i)) {
+    // Special case for inputs: "tensor" is equivalent to "tensor:0"
+    if (actual.input(i) != expected.input(i) &&
+        actual.input(i) != strings::StrCat(expected.input(i), ":0") &&
+        strings::StrCat(actual.input(i), ":0") != expected.input(i)) {
       if (diff != nullptr) {
         *diff = strings::StrCat("Node named '", actual.name(), "' has input ",
                                 i, " '", actual.input(i),
diff --git a/tensorflow/core/util/example_proto_fast_parsing.cc b/tensorflow/core/util/example_proto_fast_parsing.cc
index b9cf97195be2ed9ddf526842b3f2c3b59f4cb5b6..7946fa1782ab3ebb225adfc2a139f5a755ddbe8b 100644
--- a/tensorflow/core/util/example_proto_fast_parsing.cc
+++ b/tensorflow/core/util/example_proto_fast_parsing.cc
@@ -94,9 +94,29 @@ class Feature {
     return Status::OK();
   }
 
+  bool GetNumElementsInBytesList(int* num_elements) {
+    protobuf::io::CodedInputStream stream(
+        reinterpret_cast<const uint8*>(serialized_.data()), serialized_.size());
+    EnableAliasing(&stream);
+    uint32 length = 0;
+    if (!stream.ReadVarint32(&length)) return false;
+    auto limit = stream.PushLimit(length);
+    *num_elements = 0;
+    while (!stream.ExpectAtEnd()) {
+      if (!stream.ExpectTag(kDelimitedTag(1))) return false;
+      uint32 bytes_length = 0;
+      if (!stream.ReadVarint32(&bytes_length)) return false;
+      if (!stream.Skip(bytes_length)) return false;
+      ++*num_elements;
+    }
+    stream.PopLimit(limit);
+    return true;
+  }
+
   template <typename Result>
   bool ParseBytesList(Result* bytes_list) {
     DCHECK(bytes_list != nullptr);
+
     protobuf::io::CodedInputStream stream(
         reinterpret_cast<const uint8*>(serialized_.data()), serialized_.size());
 
@@ -447,6 +467,28 @@ class LimitedArraySlice {
   T* end_;
 };
 
+void LogDenseFeatureDataLoss(StringPiece feature_name) {
+  LOG(WARNING) << "Data loss! Feature '" << feature_name
+               << "' is present in multiple concatenated "
+                  "tf.Examples. Ignoring all but last one.";
+  static auto* duplicated_dense_feature = monitoring::Counter<0>::New(
+      "/tensorflow/core/util/example_proto_fast_parsing/"
+      "duplicated_dense_feature",
+      "Dense feature appears twice in a tf.Example");
+  duplicated_dense_feature->GetCell()->IncrementBy(1);
+}
+
+void LogSparseFeatureDataLoss(StringPiece feature_name) {
+  LOG(WARNING) << "Data loss! Feature '" << feature_name
+               << "' is present in multiple concatenated "
+                  "tf.Examples. Ignoring all but last one.";
+  static auto* duplicated_sparse_feature = monitoring::Counter<0>::New(
+      "/tensorflow/core/util/example_proto_fast_parsing/"
+      "duplicated_sparse_feature",
+      "Sparse feature appears twice in a tf.Example");
+  duplicated_sparse_feature->GetCell()->IncrementBy(1);
+}
+
 Status FastParseSerializedExample(
     const string& serialized_example, const string& example_name,
     const size_t example_index, const Config& config,
@@ -510,14 +552,7 @@ Status FastParseSerializedExample(
       // If feature was already visited, skip.
       // Compare comment at the beginning of the loop.
       if (dense_feature_last_example[d] == example_index) {
-        LOG(WARNING) << "Data loss! Feature '" << feature_name
-                     << "' in present in multiple concatenated "
-                        "tf.Examples. Ignoring all but last one.";
-        static auto* duplicated_dense_feature = monitoring::Counter<0>::New(
-            "/tensorflow/core/util/example_proto_fast_parsing/"
-            "duplicated_dense_feature",
-            "Dense feature appears twice in a tf.Example");
-        duplicated_dense_feature->GetCell()->IncrementBy(1);
+        LogDenseFeatureDataLoss(feature_name);
         continue;
       }
       dense_feature_last_example[d] = example_index;
@@ -639,14 +674,7 @@ Status FastParseSerializedExample(
       // If feature was already visited, skip.
       // Compare comment at the beginning of the loop.
       if (sparse_feature_last_example[d] == example_index) {
-        LOG(WARNING) << "Data loss! Feature '" << feature_name
-                     << "' in present in multiple concatenated "
-                        "tf.Examples. Ignoring all but last one.";
-        static auto* duplicated_sparse_feature = monitoring::Counter<0>::New(
-            "/tensorflow/core/util/example_proto_fast_parsing/"
-            "duplicated_sparse_feature",
-            "sparse feature appears twice in a tf.Example");
-        duplicated_sparse_feature->GetCell()->IncrementBy(1);
+        LogSparseFeatureDataLoss(feature_name);
         continue;
       }
       sparse_feature_last_example[d] = example_index;
@@ -1099,5 +1127,333 @@ Status FastParseExample(const Config& config,
   return Status::OK();
 }
 
+Status FastParseSingleExample(const Config& config, const string& serialized,
+                              Result* result) {
+  DCHECK(result != nullptr);
+  // Check config so we can safely CHECK(false) in switches on config.*.dtype
+  for (auto& c : config.sparse) {
+    TF_RETURN_IF_ERROR(CheckConfigDataType(c.dtype));
+  }
+  for (auto& c : config.dense) {
+    TF_RETURN_IF_ERROR(CheckConfigDataType(c.dtype));
+  }
+
+  // TODO(mrry): Cache the construction of this map at Op construction time.
+  size_t config_size = config.dense.size() + config.sparse.size();
+  SeededHasher hasher;
+  // Build config index.
+  PresizedCuckooMap<std::pair<size_t, Type>> config_index(config_size);
+  bool ok = true;
+  for (size_t i = 0; i < 1000; ++i) {
+    for (size_t d = 0; d < config.dense.size(); ++d) {
+      ok &= config_index.InsertUnique(hasher(config.dense[d].feature_name),
+                                      {d, Type::Dense});
+    }
+    for (size_t d = 0; d < config.sparse.size(); ++d) {
+      ok &= config_index.InsertUnique(hasher(config.sparse[d].feature_name),
+                                      {d, Type::Sparse});
+    }
+    if (ok) break;
+    LOG(WARNING) << "Collision found. This should happen only if you have "
+                    "around 2^32 entries in your config.";
+    hasher.seed++;
+    config_index.Clear(config_size);
+  }
+  if (!ok) {
+    return errors::Internal(
+        "Could not avoid collision. This should not happen.");
+  }
+
+  // Allocate dense output tensors.
+  for (size_t d = 0; d < config.dense.size(); ++d) {
+    if (!config.dense[d].variable_length) {
+      TensorShape values_shape;
+      if (!config.dense[d].shape.AsTensorShape(&values_shape)) {
+        return errors::Internal(
+            "Fixed-length shape was not a statically defined shape.");
+      }
+      result->dense_values.emplace_back(config.dense[d].dtype, values_shape);
+    } else {
+      // Variable-length tensor will be allocated later.
+      result->dense_values.emplace_back();
+    }
+  }
+
+  // Allocate sparse output tensors.
+  for (size_t d = 0; d < config.sparse.size(); ++d) {
+    // The dense_shape is always a vector of length 1.
+    result->sparse_shapes.emplace_back(DT_INT64, TensorShape({1}));
+    // Variable-length tensors will be allocated later.
+    result->sparse_indices.emplace_back();
+    result->sparse_values.emplace_back();
+  }
+
+  parsed::Example parsed_example;
+  if (!ParseExample(serialized, &parsed_example)) {
+    return errors::InvalidArgument("Could not parse example input, value: '",
+                                   serialized, "'");
+  }
+  std::vector<bool> sparse_feature_already_seen(config.sparse.size(), false);
+  std::vector<bool> dense_feature_already_seen(config.dense.size(), false);
+
+  // Handle features present in the example.
+  const size_t parsed_example_size = parsed_example.size();
+  for (size_t i = 0; i < parsed_example_size; ++i) {
+    // This is a logic that standard protobuf parsing is implementing.
+    // I.e. last entry in the map overwrites all the previous ones.
+    parsed::FeatureMapEntry& name_and_feature =
+        parsed_example[parsed_example_size - i - 1];
+
+    const StringPiece feature_name = name_and_feature.first;
+    parsed::Feature& feature = name_and_feature.second;
+
+    std::pair<size_t, Type> d_and_type;
+    uint64 h = hasher(feature_name);
+    if (!config_index.Find(h, &d_and_type)) continue;
+
+    size_t d = d_and_type.first;
+    bool is_dense = d_and_type.second == Type::Dense;
+
+    {
+      // Testing for PresizedCuckooMap collision.
+      // TODO(lew): Use dense_hash_map and avoid this and hasher creation.
+      const string& config_feature_name = is_dense
+                                              ? config.dense[d].feature_name
+                                              : config.sparse[d].feature_name;
+      if (feature_name != config_feature_name) continue;
+    }
+
+    auto example_error = [feature_name](StringPiece suffix) {
+      return errors::InvalidArgument("Key: ", feature_name, ".  ", suffix);
+    };
+
+    auto parse_error = [feature_name] {
+      return errors::InvalidArgument("Key: ", feature_name,
+                                     ".  Can't parse serialized Example.");
+    };
+
+    DataType example_dtype;
+    TF_RETURN_IF_ERROR(feature.ParseDataType(&example_dtype));
+    if (example_dtype == DT_INVALID) continue;
+
+    if (is_dense && !config.dense[d].variable_length) {
+      // If feature was already visited, skip.
+      // Compare comment at the beginning of the loop.
+      if (dense_feature_already_seen[d]) {
+        LogDenseFeatureDataLoss(feature_name);
+        continue;
+      }
+      dense_feature_already_seen[d] = true;
+
+      if (example_dtype != config.dense[d].dtype) {
+        return example_error(strings::StrCat(
+            "Data types don't match. Data type: ",
+            DataTypeString(example_dtype),
+            " but expected type: ", DataTypeString(config.dense[d].dtype)));
+      }
+
+      Tensor* out = &result->dense_values[d];
+      const std::size_t num_elements = config.dense[d].elements_per_stride;
+
+      switch (example_dtype) {
+        case DT_INT64: {
+          auto out_p = out->flat<int64>().data();
+          LimitedArraySlice<int64> slice(out_p, num_elements);
+          if (!feature.ParseInt64List(&slice)) return parse_error();
+          if (slice.EndDistance() != 0) {
+            return parse_error();
+          }
+          break;
+        }
+        case DT_FLOAT: {
+          auto out_p = out->flat<float>().data();
+          LimitedArraySlice<float> slice(out_p, num_elements);
+          if (!feature.ParseFloatList(&slice)) return parse_error();
+          if (slice.EndDistance() != 0) {
+            return parse_error();
+          }
+          break;
+        }
+        case DT_STRING: {
+          auto out_p = out->flat<string>().data();
+          LimitedArraySlice<string> slice(out_p, num_elements);
+          if (!feature.ParseBytesList(&slice)) return parse_error();
+          if (slice.EndDistance() != 0) {
+            return parse_error();
+          }
+          break;
+        }
+        default:
+          LOG(FATAL) << "Should not happen.";
+      }
+
+    } else {  // if variable length
+      SparseBuffer out_temp;
+      const size_t num_elements_divisor =
+          is_dense ? config.dense[d].elements_per_stride : 1;
+      size_t num_elements;
+
+      if (is_dense) {
+        // If feature was already visited, skip.
+        // Compare comment at the beginning of the loop.
+        if (dense_feature_already_seen[d]) {
+          LogDenseFeatureDataLoss(feature_name);
+          continue;
+        }
+        dense_feature_already_seen[d] = true;
+        if (example_dtype != config.dense[d].dtype) {
+          return example_error(strings::StrCat(
+              "Data types don't match. Data type: ",
+              DataTypeString(example_dtype),
+              " but expected type: ", DataTypeString(config.dense[d].dtype)));
+        }
+      } else {
+        // If feature was already visited, skip.
+        // Compare comment at the beginning of the loop.
+        if (sparse_feature_already_seen[d]) {
+          LogSparseFeatureDataLoss(feature_name);
+          continue;
+        }
+        sparse_feature_already_seen[d] = true;
+
+        // Handle sparse features.
+        if (example_dtype != DT_INVALID &&
+            example_dtype != config.sparse[d].dtype) {
+          return example_error(strings::StrCat(
+              "Data types don't match. ",
+              "Expected type: ", DataTypeString(config.sparse[d].dtype),
+              ", Actual type: ", DataTypeString(example_dtype)));
+        }
+      }
+
+      switch (example_dtype) {
+        case DT_INT64: {
+          // TODO(mrry): Use the fact that the `int64_list` is packed to read
+          // out the length and pre-allocate the output tensor.
+          if (!feature.ParseInt64List(&out_temp.int64_list))
+            return parse_error();
+          num_elements = out_temp.int64_list.size();
+          break;
+        }
+        case DT_FLOAT: {
+          // TODO(mrry): Use the fact that the `float_list` is packed to read
+          // out the length and pre-allocate the output tensor.
+          if (!feature.ParseFloatList(&out_temp.float_list))
+            return parse_error();
+          num_elements = out_temp.float_list.size();
+          break;
+        }
+        case DT_STRING: {
+          int actual_num_elements = 0;
+          if (!feature.GetNumElementsInBytesList(&actual_num_elements)) {
+            return parse_error();
+          }
+          out_temp.bytes_list.reserve(actual_num_elements);
+          if (!feature.ParseBytesList(&out_temp.bytes_list))
+            return parse_error();
+          num_elements = out_temp.bytes_list.size();
+          break;
+        }
+        default:
+          LOG(FATAL) << "Should not happen. " << DataTypeString(example_dtype);
+      }
+
+      if (num_elements % num_elements_divisor != 0) {
+        return parse_error();
+      }
+
+      Tensor* out;
+      if (is_dense) {
+        TensorShape values_shape;
+        values_shape.AddDim(num_elements / num_elements_divisor);
+        for (int i = 1; i < config.dense[d].shape.dims(); ++i) {
+          values_shape.AddDim(config.dense[d].shape.dim_size(i));
+        }
+
+        out = &result->dense_values[d];
+        *out = Tensor(config.dense[d].dtype, values_shape);
+
+      } else {
+        Tensor* out_indices = &result->sparse_indices[d];
+        Tensor* out_dense_shape = &result->sparse_shapes[d];
+        out = &result->sparse_values[d];
+
+        // TODO(mrry): Investigate the possibility of not materializing
+        // the indices (and perhaps dense_shape) until they are needed.
+        *out_indices = Tensor(
+            DT_INT64, TensorShape({static_cast<int64>(num_elements), 1}));
+        auto indices_flat = out_indices->flat<int64>();
+        for (size_t i = 0; i < num_elements; ++i) {
+          indices_flat(i) = static_cast<int64>(i);
+        }
+
+        *out_dense_shape = Tensor(DT_INT64, TensorShape({1}));
+        auto shapes_shape_t = out_dense_shape->vec<int64>();
+        shapes_shape_t(0) = num_elements;
+
+        *out = Tensor(config.sparse[d].dtype,
+                      TensorShape({static_cast<int64>(num_elements)}));
+      }
+
+      switch (example_dtype) {
+        case DT_INT64: {
+          CopyOrMoveBlock(out_temp.int64_list.begin(),
+                          out_temp.int64_list.end(), out->flat<int64>().data());
+          break;
+        }
+        case DT_FLOAT: {
+          CopyOrMoveBlock(out_temp.float_list.begin(),
+                          out_temp.float_list.end(), out->flat<float>().data());
+          break;
+        }
+        case DT_STRING: {
+          CopyOrMoveBlock(out_temp.bytes_list.begin(),
+                          out_temp.bytes_list.end(),
+                          out->flat<string>().data());
+          break;
+        }
+        default:
+          LOG(FATAL) << "Should not happen.";
+      }
+    }
+  }
+
+  // Handle missing dense features.
+  for (size_t d = 0; d < config.dense.size(); ++d) {
+    if (!dense_feature_already_seen[d]) {
+      if (!config.dense[d].variable_length) {
+        // Handle missing fixed-length dense feature.
+        if (config.dense[d].default_value.NumElements() == 0) {
+          return errors::InvalidArgument(
+              "Feature: ", config.dense[d].feature_name,
+              " (data type: ", DataTypeString(config.dense[d].dtype), ")",
+              " is required but could not be found.");
+        }
+        result->dense_values[d] = config.dense[d].default_value;
+      } else {
+        // Handle missing varlen dense feature.
+        TensorShape empty_shape;
+        empty_shape.AddDim(0);
+        for (int i = 1; i < config.dense[d].shape.dims(); ++i) {
+          empty_shape.AddDim(config.dense[d].shape.dim_size(i));
+        }
+        result->dense_values[d] = Tensor(config.dense[d].dtype, empty_shape);
+      }
+    }
+  }
+
+  // Handle missing sparse features.
+  for (size_t d = 0; d < config.sparse.size(); ++d) {
+    if (!sparse_feature_already_seen[d]) {
+      result->sparse_indices[d] = Tensor(DT_INT64, TensorShape({0, 1}));
+      result->sparse_values[d] =
+          Tensor(config.sparse[d].dtype, TensorShape({0}));
+      result->sparse_shapes[d].vec<int64>()(0) = 0;
+    }
+  }
+
+  return Status::OK();
+}
+
 }  // namespace example
 }  // namespace tensorflow
diff --git a/tensorflow/core/util/example_proto_fast_parsing.h b/tensorflow/core/util/example_proto_fast_parsing.h
index 20536cee163ba926a16f78e5014c5abd2958f5f2..fe59ec77ca9872ada865a27075c733e30a003c21 100644
--- a/tensorflow/core/util/example_proto_fast_parsing.h
+++ b/tensorflow/core/util/example_proto_fast_parsing.h
@@ -79,6 +79,12 @@ Status FastParseExample(const FastParseExampleConfig& config,
                         gtl::ArraySlice<string> example_names,
                         thread::ThreadPool* thread_pool, Result* result);
 
+// TODO(mrry): Move the hash table construction into the config object.
+typedef FastParseExampleConfig FastParseSingleExampleConfig;
+
+Status FastParseSingleExample(const FastParseSingleExampleConfig& config,
+                              const string& serialized, Result* result);
+
 // This function parses serialized Example and populates given example.
 // It uses the same specialized parser as FastParseExample which is efficient.
 // But then constructs Example which is relatively slow.
diff --git a/tensorflow/core/util/example_proto_helper.cc b/tensorflow/core/util/example_proto_helper.cc
index 4b5bf6311233a57914d624a5b77707d02c5bec37..41f56d2daa48e651f5ac4051deae9c05ef1ed859 100644
--- a/tensorflow/core/util/example_proto_helper.cc
+++ b/tensorflow/core/util/example_proto_helper.cc
@@ -400,7 +400,7 @@ Status BatchExampleProtoToTensors(
   return Status::OK();
 }
 
-Status ParseSingleExampleAttrs::FinishInit() {
+Status ParseExampleAttrs::FinishInit() {
   if (static_cast<size_t>(num_sparse) != sparse_types.size()) {
     return errors::InvalidArgument("len(sparse_keys) != len(sparse_types)");
   }
@@ -422,6 +422,25 @@ Status ParseSingleExampleAttrs::FinishInit() {
   return Status::OK();
 }
 
+Status ParseSingleExampleAttrs::FinishInit() {
+  if (sparse_keys.size() != sparse_types.size()) {
+    return errors::InvalidArgument("len(sparse_keys) != len(sparse_types)");
+  }
+  if (dense_keys.size() != dense_types.size()) {
+    return errors::InvalidArgument("len(dense_keys) != len(dense_types)");
+  }
+  if (dense_keys.size() != dense_shapes.size()) {
+    return errors::InvalidArgument("len(dense_keys) != len(dense_shapes)");
+  }
+  for (const DataType& type : dense_types) {
+    TF_RETURN_IF_ERROR(CheckValidType(type));
+  }
+  for (const DataType& type : sparse_types) {
+    TF_RETURN_IF_ERROR(CheckValidType(type));
+  }
+  return Status::OK();
+}
+
 Status ParseSingleSequenceExampleAttrs::FinishInit() {
   if (static_cast<size_t>(num_context_sparse) != context_sparse_types.size()) {
     return errors::InvalidArgument(
diff --git a/tensorflow/core/util/example_proto_helper.h b/tensorflow/core/util/example_proto_helper.h
index 7414d61e8bd850863c8e59c1262121e11559fcff..8b3c6c5a3fa20967377fcf5d9f14a5f1562e73dd 100644
--- a/tensorflow/core/util/example_proto_helper.h
+++ b/tensorflow/core/util/example_proto_helper.h
@@ -148,9 +148,9 @@ Tensor FeatureSparseCopy(const std::size_t batch, const string& key,
 int64 CopyIntoSparseTensor(const Tensor& in, const int batch,
                            const int64 offset, Tensor* indices, Tensor* values);
 
-// Parses the attributes passed to ParseSingleExample.
+// Parses the attributes passed to ParseExample.
 // REQUIRES: Init must be called after construction.
-class ParseSingleExampleAttrs {
+class ParseExampleAttrs {
  public:
   template <typename ContextType>
   Status Init(ContextType* ctx) {
@@ -205,6 +205,72 @@ class ParseSingleExampleAttrs {
   Status FinishInit();  // for context-independent parts of Init.
 };
 
+// Parses the attributes passed to ParseSingleExample.
+// REQUIRES: Init must be called after construction.
+class ParseSingleExampleAttrs {
+ public:
+  template <typename ContextType>
+  Status Init(ContextType* ctx) {
+    TF_RETURN_IF_ERROR(ctx->GetAttr("sparse_keys", &sparse_keys));
+    TF_RETURN_IF_ERROR(ctx->GetAttr("sparse_types", &sparse_types));
+    TF_RETURN_IF_ERROR(ctx->GetAttr("dense_keys", &dense_keys));
+    TF_RETURN_IF_ERROR(ctx->GetAttr("Tdense", &dense_types));
+    TF_RETURN_IF_ERROR(ctx->GetAttr("dense_shapes", &dense_shapes));
+
+    int num_sparse;
+    TF_RETURN_IF_ERROR(ctx->GetAttr("num_sparse", &num_sparse));
+    if (num_sparse != sparse_keys.size() || num_sparse != sparse_types.size()) {
+      return errors::InvalidArgument(
+          "num_sparse (", num_sparse, ") must match the size of sparse_keys (",
+          sparse_keys.size(), ") and sparse_types (", sparse_types.size(), ")");
+    }
+
+    // Temporary check until we start allowing a variable length outer
+    // dimension.
+    for (int i = 0; i < dense_shapes.size(); ++i) {
+      bool shape_ok = true;
+      if (dense_shapes[i].dims() == -1) {
+        shape_ok = false;
+      } else {
+        for (int d = 1; d < dense_shapes[i].dims(); ++d) {
+          if (dense_shapes[i].dim_size(d) == -1) {
+            shape_ok = false;
+          }
+        }
+      }
+      if (!shape_ok) {
+        return errors::InvalidArgument(
+            "dense_shapes[", i,
+            "] has unknown rank or unknown inner dimensions: ",
+            dense_shapes[i].DebugString());
+      }
+      TensorShape dense_shape;
+      if (dense_shapes[i].dims() > 0 && dense_shapes[i].dim_size(0) == -1) {
+        variable_length.push_back(true);
+        for (int d = 1; d < dense_shapes[i].dims(); ++d) {
+          dense_shape.AddDim(dense_shapes[i].dim_size(d));
+        }
+      } else {
+        variable_length.push_back(false);
+        dense_shapes[i].AsTensorShape(&dense_shape);
+      }
+      elements_per_stride.push_back(dense_shape.num_elements());
+    }
+    return FinishInit();
+  }
+
+  std::vector<string> sparse_keys;
+  std::vector<DataType> sparse_types;
+  std::vector<string> dense_keys;
+  std::vector<DataType> dense_types;
+  std::vector<PartialTensorShape> dense_shapes;
+  std::vector<bool> variable_length;
+  std::vector<std::size_t> elements_per_stride;
+
+ private:
+  Status FinishInit();  // for context-independent parts of Init.
+};
+
 // Parses the attributes passed to ParseSingleSequenceExample.
 // REQUIRES: Init must be called after construction.
 class ParseSingleSequenceExampleAttrs {
diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h
index 148c7851bd448a0af754644f09a4d1e0511efe44..2caf5fc56dafb5a8879db8026a78bc7bf46346a4 100644
--- a/tensorflow/core/util/mkl_util.h
+++ b/tensorflow/core/util/mkl_util.h
@@ -328,6 +328,10 @@ class MklShape {
 
 // Forward decl
 TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format);
+memory::dims CalculateTFStrides(const memory::dims& dims_tf_order);
+memory::desc CreateBlockedMemDescHelper(const memory::dims& dim,
+                                        const memory::dims& strides,
+                                        memory::data_type dtype);
 
 class MklDnnShape {
  private:
@@ -364,6 +368,52 @@ class MklDnnShape {
   ~MklDnnShape() {}
   TF_DISALLOW_COPY_AND_ASSIGN(MklDnnShape);  // Cannot copy
 
+  /// Helper function to compare memory::desc objects for MklDnn.
+  /// May be this should go into MklDnn directly.
+  inline bool CompareMklDnnLayouts(const memory::desc& md1,
+                                   const memory::desc& md2) const {
+    mkldnn_memory_desc_t mdd1 = md1.data;
+    mkldnn_memory_desc_t mdd2 = md2.data;
+    const char* d1 = reinterpret_cast<const char*>(&mdd1);
+    const char* d2 = reinterpret_cast<const char*>(&mdd2);
+
+    size_t md_size = sizeof(mdd1);
+    for (size_t i = 0; i < md_size; i++) {
+      if (*d1++ != *d2++) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /// Equality function for MklDnnShape objects
+  /// @return true if both are equal; false otherwise.
+  inline bool operator == (const MklDnnShape& input_shape) const {
+    if (this->IsMklTensor() != input_shape.IsMklTensor()) {
+      return false;
+    }
+
+    // If input tensors are in Mkl layout, then we check for dimensions and
+    // sizes.
+    if (this->IsMklTensor()) {
+      return this->GetTfShape() == input_shape.GetTfShape() &&
+             CompareMklDnnLayouts(this->GetMklLayout(),
+                                  input_shape.GetMklLayout());
+    }
+
+    return true;
+  }
+
+  /// Equality operator for MklDnnShape and TFShape.
+  /// Returns: true if TF shapes for both are the same, false otherwise
+  inline bool operator == (const TensorShape& input_shape) const {
+    if (!this->IsMklTensor()) {
+      return false;
+    }
+
+    return this->GetTfShape() == input_shape;
+  }
+
   inline const bool IsMklTensor() const { return data_.is_mkl_tensor_; }
   inline void SetMklTensor(bool is_mkl_tensor) {
     data_.is_mkl_tensor_ = is_mkl_tensor;
@@ -375,7 +425,7 @@ class MklDnnShape {
   inline size_t GetDimension(char dimension) const {
     int index = GetMklDnnTensorDimIndex(dimension);
     CHECK(index >= 0 && index < this->GetDimension())
-        << "Invalid index from the dimension: " << index << ", " << dimension;
+      << "Invalid index from the dimension: " << index << ", " << dimension;
     return this->DimSize(index);
   }
 
@@ -405,7 +455,7 @@ class MklDnnShape {
   inline memory::dims GetSizesAsMklDnnDims() const {
     memory::dims retVal;
     if (data_.is_mkl_tensor_) {
-      int dimensions = sizeof(data_.sizes_) / sizeof(data_.sizes_[0]);
+      size_t dimensions = sizeof(data_.sizes_) / sizeof(data_.sizes_[0]);
       for (size_t i = 0; i < dimensions; i++) {
         if (data_.sizes_[i] != INVALID_DIM_SIZE)
           retVal.push_back(data_.sizes_[i]);
@@ -423,12 +473,21 @@ class MklDnnShape {
 
   /// Return TensorShape that describes the Tensorflow shape of the tensor
   /// represented by this MklShape.
-  inline TensorShape GetTfShape() {
+  inline TensorShape GetTfShape() const {
     CHECK_EQ(data_.is_mkl_tensor_, true);
 
     std::vector<int32> shape(data_.dimension_, -1);
-    for (size_t idx = 0; idx < data_.dimension_; ++idx) {
-      shape[idx] = data_.sizes_[TfDimIdx(idx)];
+    if (data_.tf_data_format_ != memory::format::blocked) {
+      for (size_t idx = 0; idx < data_.dimension_; ++idx) {
+        shape[idx] = data_.sizes_[TfDimIdx(idx)];
+      }
+    } else {
+      // If Tensorflow shape is in Blocked format, then we don't have dimension
+      // map for it. So we just create Tensorflow shape from sizes in the
+      // specified order.
+      for (size_t idx = 0; idx < data_.dimension_; ++idx) {
+        shape[idx] = data_.sizes_[idx];
+      }
     }
 
     TensorShape ts;
@@ -444,6 +503,12 @@ class MklDnnShape {
     CHECK_NOTNULL(pd);
     data_.mkl_md_ = pd->desc().data;
   }
+
+  inline void SetMklLayout(memory::desc* md) {
+    CHECK_NOTNULL(md);
+    data_.mkl_md_ = md->data;
+  }
+
   inline const memory::desc GetMklLayout() const {
     return memory::desc(data_.mkl_md_);
   }
@@ -452,7 +517,8 @@ class MklDnnShape {
     return data_.tf_data_format_;
   }
   /// We don't create primitive_descriptor for TensorFlow layout now.
-  /// We use lazy evaluation and create it only when needed.
+  /// We use lazy evaluation and create it only when needed. Input format can
+  /// also be Blocked format.
   inline void SetTfLayout(size_t dims, const memory::dims& sizes,
                           memory::format format) {
     CHECK_EQ(dims, sizes.size());
@@ -461,15 +527,26 @@ class MklDnnShape {
       data_.sizes_[ii] = sizes[ii];
     }
     data_.tf_data_format_ = format;
-    SetTfDimOrder(dims, format);
+    if (format != memory::format::blocked) {
+      SetTfDimOrder(dims, format);
+    }
   }
+
   inline const memory::desc GetTfLayout() const {
     memory::dims dims;
     for (size_t ii = 0; ii < data_.dimension_; ii++) {
       dims.push_back(data_.sizes_[ii]);
     }
-    return memory::desc(dims, data_.T_, data_.tf_data_format_);
+
+    // Create Blocked memory desc if input TF format was set like that.
+    if (data_.tf_data_format_ == memory::format::blocked) {
+      auto strides = CalculateTFStrides(dims);
+      return CreateBlockedMemDescHelper(dims, strides, data_.T_);
+    } else {
+      return memory::desc(dims, data_.T_, data_.tf_data_format_);
+    }
   }
+
   inline const memory::desc GetCurLayout() const {
     return IsMklTensor() ? GetMklLayout() : GetTfLayout();
   }
@@ -579,8 +656,13 @@ class MklDnnShape {
 #endif
 
 // List of MklShape objects. Used in Concat/Split layers.
+
 typedef std::vector<MklShape> MklShapeList;
 
+#ifdef INTEL_MKL_DNN
+typedef std::vector<MklDnnShape> MklDnnShapeList;
+#endif
+
 // Check if all tensors specified by MklShapes are MKL tensors.
 inline bool AreAllMklTensors(const MklShapeList& shapes) {
   for (auto& s : shapes) {
@@ -591,6 +673,7 @@ inline bool AreAllMklTensors(const MklShapeList& shapes) {
   return true;
 }
 
+#ifndef INTEL_MKL_DNN
 template <typename T>
 inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor,
                              const MklShape& mkl_shape) {
@@ -615,32 +698,15 @@ inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor,
 
   return output_tensor;
 }
-
-#ifdef INTEL_MKL_DNN
+#else
 template <typename T>
 inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor,
                              const MklDnnShape& mkl_shape) {
   Tensor output_tensor;
   TensorShape output_shape;
 
-#if 0
-  // TODO(nhasabni): need to implement
-  for (size_t j = 0; j < mkl_shape.GetDimension(); j++) {
-    // Outermost to innermost dimension
-    output_shape.AddDim(mkl_shape.GetSizes()[mkl_shape.tf_dim_idx(j)]);
-  }
-
-  // Allocate output tensor.
-  context->allocate_temp(DataTypeToEnum<T>::v(), output_shape, &output_tensor);
-
-  dnnLayout_t output_layout = static_cast<dnnLayout_t>(mkl_shape.GetTfLayout());
-  void* input_buffer = const_cast<T*>(mkl_tensor.flat<T>().data());
-  void* output_buffer = const_cast<T*>(output_tensor.flat<T>().data());
-
-  if (mkl_tensor.NumElements() != 0) {
-    mkl_shape.GetConvertedFlatData(output_layout, input_buffer, output_buffer);
-  }
-#endif
+  TF_CHECK_OK(Status(error::Code::UNIMPLEMENTED,
+                     "Unimplemented conversion function"));
 
   return output_tensor;
 }
@@ -682,6 +748,9 @@ inline void GetMklInputList(OpKernelContext* ctext, StringPiece name,
   ctext->input_list(name, input_tensors);
 }
 
+
+#ifndef INTEL_MKL_DNN
+
 inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name,
                             MklShapeList* mkl_shapes) {
   OpInputList input_mkl_tensors;
@@ -694,6 +763,22 @@ inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name,
   }
 }
 
+#else
+
+inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name,
+                            MklDnnShapeList* mkl_shapes) {
+  OpInputList input_mkl_tensors;
+  GetMklInputList(ctext, strings::StrCat("mkl_", name), &input_mkl_tensors);
+
+  for (int i = 0; i < input_mkl_tensors.size(); i++) {
+    (*mkl_shapes)[i].DeSerializeMklDnnShape(
+        input_mkl_tensors[i].flat<uint8>().data(),
+        input_mkl_tensors[i].flat<uint8>().size() * sizeof(uint8));
+  }
+}
+
+#endif
+
 #ifdef INTEL_MKL_DNN
 /// Get shape of input tensor pointed by 'input_idx' in TensorShape format.
 /// If the input tensor is in MKL layout, then obtains TensorShape from
@@ -909,6 +994,7 @@ inline void CopyMklTensorInToOut(OpKernelContext* context,
   context->set_output(idx_meta_out, meta_output);
 }
 
+#ifndef INTEL_MKL_DNN
 inline void CopyTfTensorInToOutWithShape(OpKernelContext* context,
                                          int idx_in, int idx_out,
                                          const TensorShape& shape) {
@@ -926,6 +1012,27 @@ inline void CopyTfTensorInToOutWithShape(OpKernelContext* context,
   CHECK(output.CopyFrom(data, shape));
   context->set_output(idx_data_out, output);
 }
+#else
+inline void CopyTfTensorInToOutWithShape(OpKernelContext* context,
+                                         int idx_in, int idx_out,
+                                         const TensorShape& shape) {
+  int num_inputs = context->num_inputs();
+  int num_outputs = context->num_outputs();
+  int idx_data_in = GetTensorDataIndex(idx_in, num_inputs);
+  int idx_data_out = GetTensorDataIndex(idx_out, num_outputs);
+
+  const Tensor& data = context->input(idx_data_in);
+  MklDnnShape mkl_shape_output;
+  mkl_shape_output.SetMklTensor(false);
+  AllocateOutputSetMklShape(context, idx_out, mkl_shape_output);
+  Tensor output(data.dtype());
+  // TODO(intel_tf): alternatively, call forward_input_to_output_with_shape(...)
+  CHECK(output.CopyFrom(data, shape));
+  context->set_output(idx_data_out, output);
+}
+#endif
+
+#ifndef INTEL_MKL_DNN
 
 inline void ForwardTfTensorInToOut(OpKernelContext* context,
                                   int idx_in, int idx_out) {
@@ -944,6 +1051,27 @@ inline void ForwardTfTensorInToOut(OpKernelContext* context,
   }
 }
 
+#else
+
+inline void ForwardTfTensorInToOut(OpKernelContext* context,
+                                  int idx_in, int idx_out) {
+  int num_inputs = context->num_inputs();
+  int num_outputs = context->num_outputs();
+  int idx_data_in = GetTensorDataIndex(idx_in, num_inputs);
+  int idx_data_out = GetTensorDataIndex(idx_out, num_outputs);
+
+  MklDnnShape dnn_shape_output;
+  dnn_shape_output.SetMklTensor(false);
+  AllocateOutputSetMklShape(context, idx_out, dnn_shape_output);
+  if (IsRefType(context->input_dtype(idx_data_in))) {
+    context->forward_ref_input_to_ref_output(idx_data_in, idx_data_out);
+  } else {
+    context->set_output(idx_data_out, context->input(idx_data_in));
+  }
+}
+
+#endif
+
 inline void ForwardMklTensorInToOut(OpKernelContext* context,
                                    int idx_in, int idx_out) {
   int num_inputs = context->num_inputs();
@@ -962,6 +1090,25 @@ inline void ForwardMklTensorInToOut(OpKernelContext* context,
   }
 }
 
+#ifdef INTEL_MKL_DNN
+inline void ForwardMklTensorInToOutWithMklShape(OpKernelContext* context,
+                                             int idx_in, int idx_out,
+                                             const MklDnnShape& mkl_shape) {
+  int num_inputs = context->num_inputs();
+  int num_outputs = context->num_outputs();
+  int idx_data_in = GetTensorDataIndex(idx_in, num_inputs);
+  int idx_data_out = GetTensorDataIndex(idx_out, num_outputs);
+
+  AllocateOutputSetMklShape(context, idx_out, mkl_shape);
+
+  if (IsRefType(context->input_dtype(idx_data_in))) {
+    context->forward_ref_input_to_ref_output(idx_data_in, idx_data_out);
+  } else {
+    context->set_output(idx_data_out, context->input(idx_data_in));
+  }
+}
+#endif
+
 // Forward the MKL shape ONLY (used in elementwise and other ops where
 // we call the eigen implementation and MKL shape is not used)
 inline void ForwardMklMetaDataInToOut(OpKernelContext* context,
@@ -985,6 +1132,10 @@ inline void SetDummyMklShapeOutput(OpKernelContext* context,
   AllocateOutputSetMklShape(context, idx_data_out, mkl_shape_output);
 }
 
+#ifndef INTEL_MKL_DNN
+// We don't need these functions in MKLDNN. We have defined equality operator
+// on MklDnnShape class directly.
+
 // Checks if the TF shape for both MKL tensors is the same or not
 // Returns: true if both TF shapes are the same, false otherwise
 inline bool MklCompareShapes(const MklShape* input_shape_0,
@@ -1051,6 +1202,7 @@ inline bool MklCompareShapes(const TensorShape* input_shape_0,
 
   return true;
 }
+#endif
 
 // These functions do not compile with MKL-DNN since mkl.h is missing.
 // We may need to remove them later.
@@ -1127,11 +1279,14 @@ inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) {
 /// @return: Tensorflow data format corresponding to memory::format
 ///          Fails with an error if invalid data format.
 inline TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format) {
-  if (format == memory::format::nhwc)
-    return FORMAT_NHWC;
-  else if (format == memory::format::nchw)
-    return FORMAT_NCHW;
-  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format"));
+  if (format == memory::format::nhwc) return FORMAT_NHWC;
+  else if (format == memory::format::nchw) return FORMAT_NCHW;
+  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT,
+                     "Unsupported data format"));
+
+  // Return to prevent compiler warnings, otherwise TF_CHECK_OK will ensure
+  // that we don't come here.
+  return FORMAT_NHWC;
 }
 
 /// Map TensorShape object into memory::dims required by MKL-DNN
@@ -1175,6 +1330,23 @@ inline memory::dims TFShapeToMklDnnDimsInNCHW(const TensorShape& shape,
   return memory::dims({n, c, h, w});
 }
 
+/// Overloaded version of function above. Input parameters are
+/// self-explanatory.
+inline memory::dims MklDnnDimsInNCHW(const memory::dims& in_dims,
+                                     TensorFormat format) {
+  // Check validity of format.
+  CHECK_NE(TFDataFormatToMklDnnDataFormat(format),
+           memory::format::format_undef);
+
+  int n = in_dims[GetTensorDimIndex(format, 'N')];
+  int c = in_dims[GetTensorDimIndex(format, 'C')];
+  int h = in_dims[GetTensorDimIndex(format, 'H')];
+  int w = in_dims[GetTensorDimIndex(format, 'W')];
+
+  // MKL-DNN requires dimensions in NCHW format.
+  return memory::dims({n, c, h, w});
+}
+
 /// Map MklDnn memory::dims object into TensorShape object.
 ///
 /// This function will simply map input shape in MKL-DNN memory::dims format
@@ -1217,6 +1389,43 @@ inline padding_kind TFPaddingToMklDnnPadding(Padding pad) {
   return padding_kind::zero;
 }
 
+/// Helper function to create memory descriptor in Blocked format
+///
+/// @input: Tensor dimensions
+/// @input: strides corresponding to dimensions. One can use utility
+///         function such as CalculateTFStrides to compute strides
+///         for given dimensions.
+/// @return: memory::desc object corresponding to blocked memory format
+///          for given dimensions and strides.
+inline memory::desc CreateBlockedMemDescHelper(const memory::dims& dim,
+                                               const memory::dims& strides,
+                                               memory::data_type dtype) {
+  CHECK_EQ(dim.size(), strides.size());
+
+  // We have to construct memory descriptor in a C style. This is not at all
+  // ideal but MKLDNN does not offer any API to construct descriptor in
+  // blocked format except a copy constructor that accepts
+  // mkldnn_memory_desc_t.
+  mkldnn_memory_desc_t md;
+  md.primitive_kind = mkldnn_memory;
+  md.ndims = dim.size();
+  md.format = mkldnn_blocked;
+  md.data_type = memory::convert_to_c(dtype);
+
+  for (size_t i = 0; i < dim.size(); i++) {
+    md.layout_desc.blocking.block_dims[i] = 1;
+    md.layout_desc.blocking.strides[1][i] = 1;
+    md.layout_desc.blocking.strides[0][i] = strides[i];
+    md.layout_desc.blocking.padding_dims[i] = dim[i];
+    md.layout_desc.blocking.offset_padding_to_data[i] = 0;
+    md.dims[i] = dim[i];
+  }
+  md.layout_desc.blocking.offset_padding = 0;
+
+  return memory::desc(md);
+}
+
+
 /*
  * Class to represent all the resources corresponding to a tensor in TensorFlow
  * that are required to execute an operation (such as Convolution).
@@ -1285,30 +1494,8 @@ class MklDnnData {
   /// @return: memory::desc object corresponding to blocked memory format
   ///          for given dimensions and strides.
   static inline memory::desc CreateBlockedMemDesc(const memory::dims& dim,
-                                                  const memory::dims& strides) {
-    CHECK_EQ(dim.size(), strides.size());
-
-    // We have to construct memory descriptor in a C style. This is not at all
-    // ideal but MKLDNN does not offer any API to construct descriptor in
-    // blocked format except a copy constructor that accepts
-    // mkldnn_memory_desc_t.
-    mkldnn_memory_desc_t md;
-    md.primitive_kind = mkldnn_memory;
-    md.ndims = dim.size();
-    md.format = mkldnn_blocked;
-    md.data_type = memory::convert_to_c(MklDnnType<T>());
-
-    for (size_t i = 0; i < dim.size(); i++) {
-      md.layout_desc.blocking.block_dims[i] = 1;
-      md.layout_desc.blocking.strides[1][i] = 1;
-      md.layout_desc.blocking.strides[0][i] = strides[i];
-      md.layout_desc.blocking.padding_dims[i] = dim[i];
-      md.layout_desc.blocking.offset_padding_to_data[i] = 0;
-      md.dims[i] = dim[i];
-    }
-    md.layout_desc.blocking.offset_padding = 0;
-
-    return memory::desc(md);
+                                                 const memory::dims& strides) {
+    return CreateBlockedMemDescHelper(dim, strides, MklDnnType<T>());
   }
 
   /// A version of SetUsrMem call that allows user to create memory in blocked
@@ -1376,6 +1563,7 @@ class MklDnnData {
     return user_memory_->get_primitive_desc();
   }
 
+
   /// Get function for descriptor of user memory.
   inline memory::desc GetUsrMemDesc() {
     // This is ugly. Why MKL-DNN does not provide desc() method of const type??
@@ -1438,6 +1626,17 @@ class MklDnnData {
     return op_pd != user_memory_->get_primitive_desc();
   }
 
+  /// Predicate that checks if we need to reorder user's memory into memory
+  /// based on the provided format.
+  ///
+  /// @input: target_format - memory format of the given input of an
+  ///               operation
+  /// @return: true in case reorder of input is needed; false, otherwise.
+  inline bool IsReorderNeeded(const memory::format& target_format) const {
+    CHECK_NOTNULL(user_memory_);
+    return target_format != user_memory_->get_primitive_desc().desc().data.format;
+  }
+
   /// Function to create a reorder from memory pointed by from to memory pointed
   /// by to. Returns created primitive.
   inline primitive CreateReorder(const memory* from, const memory* to) const {
diff --git a/tensorflow/core/util/sparse/sparse_tensor.h b/tensorflow/core/util/sparse/sparse_tensor.h
index 0ea74c38b1916f777eaaf7b0907b614e680ea6e7..f2401a0af4e60f66c606e86e90a37bcf09eb6308 100644
--- a/tensorflow/core/util/sparse/sparse_tensor.h
+++ b/tensorflow/core/util/sparse/sparse_tensor.h
@@ -69,6 +69,21 @@ class SparseTensor {
     CHECK_EQ(shape.size(), dims_) << "Shape rank must be SparseTensor rank.";
   }
 
+  SparseTensor(const SparseTensor& other)
+      : SparseTensor(other.ix_, other.vals_, other.shape_, other.order_) {}
+
+  SparseTensor(SparseTensor&& other)
+      : SparseTensor(std::move(other.ix_), std::move(other.vals_),
+                     std::move(other.shape_), std::move(other.order_)) {}
+
+  SparseTensor& operator=(const SparseTensor& other) {
+    ix_ = other.ix_;
+    vals_ = other.vals_;
+    shape_ = other.shape_;
+    order_ = other.order_;
+    return *this;
+  }
+
   std::size_t num_entries() const { return ix_.dim_size(0); }
 
   int dims() const { return shape_.size(); }
@@ -601,7 +616,7 @@ SparseTensor SparseTensor::Slice(const SparseTensor& input_tensor,
   int index = 0;
   for (int i = 0; i < input_tensor.indices().dim_size(0) && index < count;
        i++) {
-    // The logic here is similiar as the above except that the above
+    // The logic here is similar as the above except that the above
     // only count the number of indices while here we actually generate
     // the output.
     bool hit = true;
diff --git a/tensorflow/core/util/strided_slice_op.cc b/tensorflow/core/util/strided_slice_op.cc
index cfe9275a09189b0d72e57a79cd860de9ab5d82b8..aca60b942d15841438329c922a8aaaded7b08430 100644
--- a/tensorflow/core/util/strided_slice_op.cc
+++ b/tensorflow/core/util/strided_slice_op.cc
@@ -218,8 +218,8 @@ Status ValidateStridedSliceOp(
 
   // Step 2: Make a sparse spec into a full index spec
   //
-  // The sparse spec does not corresopnds to the number of dimensions
-  // Make a dense spec that corresponds to thte number of dimensions
+  // The sparse spec does not correspond to the number of dimensions
+  // Make a dense spec that corresponds to the number of dimensions
   //
   // For example suppose foo[...,3:] on foo.shape=(2,2,3) then
   // we need to produce the missing begin_mask for the first two
diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle.cc b/tensorflow/core/util/tensor_bundle/tensor_bundle.cc
index d0e54b7e4774e8cd2b2295df4f3fa4c724acbfac..579b70ab5149f05749205f24a0c6e64c95f12dfd 100644
--- a/tensorflow/core/util/tensor_bundle/tensor_bundle.cc
+++ b/tensorflow/core/util/tensor_bundle/tensor_bundle.cc
@@ -51,6 +51,9 @@ const int kTensorBundleMinProducer = 0;
 const int kTensorBundleMinConsumer = 0;
 const int kTensorBundleVersion = 1;
 
+// Size of our input buffer for streaming reads
+static const int kBufferSize = 1024 * 1024;
+
 // Key to the special BundleHeaderProto entry.  Do not change this, as clients
 // can make the assumption that the header is always the first entry in the
 // bundle.
@@ -141,7 +144,11 @@ Status ReadVariantTensor(io::InputBuffer* buffered_file, Tensor* ret,
         buffered_file->ReadNBytes(string_length, &buffer[0], &bytes_read));
     *actual_crc32c = crc32c::Extend(*actual_crc32c, buffer.data(), bytes_read);
     VariantTensorDataProto proto;
-    proto.ParseFromString(buffer);
+    if (!proto.ParseFromString(buffer)) {
+      return errors::DataLoss("Unable to parse VariantTensorDataProto from ",
+                              "buffer of size ", string_length, ". ",
+                              "Bundle entry offset: ", offset, " size: ", size);
+    }
     Variant v = proto;
     if (!DecodeUnaryVariant(&v)) {
       return errors::Internal("Could not decode variant with type_name: \"",
@@ -299,43 +306,6 @@ Status WriteVariantTensor(const Tensor& val, FileOutputBuffer* out,
   return Status::OK();
 }
 
-// Reads file[offset:offset+size) into destination[0:size).  Each Read() copies
-// at most "buffer_size" bytes.
-//
-// REQUIRES: "file" contains at least "offset + size" bytes.
-// REQUIRES: "destination" contains at least "size" bytes.
-// On error, "destination" may contain garbage.
-Status ReadInputByChunk(const RandomAccessFile* file, size_t offset,
-                        size_t size, size_t buffer_size, char* destination) {
-  if (size == 0) return Status::OK();
-  CHECK_GT(size, 0);
-  CHECK_GT(buffer_size, 0);
-  size_t bytes_read = 0;
-  StringPiece result;
-
-  while (bytes_read < size) {
-    const size_t desired_bytes = std::min(buffer_size, size - bytes_read);
-    Status status = file->Read(offset + bytes_read, desired_bytes, &result,
-                               destination + bytes_read);
-
-    if (!status.ok()) {
-      return status;
-    } else if (result.size() != desired_bytes) {
-      return errors::DataLoss("Requested ", desired_bytes, " bytes but read ",
-                              result.size(), " bytes.");
-    } else if (result.data() == destination + bytes_read) {
-      // Data is already in the correct location.
-    } else {
-      // memmove is guaranteed to handle overlaps safely (although the src and
-      // dst buffers should not overlap for this function).
-      memmove(destination + bytes_read, result.data(), result.size());
-    }
-    bytes_read += result.size();
-  }
-  CHECK_EQ(bytes_read, size);
-  return Status::OK();
-}
-
 // Returns whether "slice_spec" is a full slice, with respect to the full shape.
 //
 // This can happen say, when "slice_spec" is
@@ -379,10 +349,27 @@ table::Options TableBuilderOptions() {
   return o;
 }
 
+// Writes zeros to output buffer to align the next write to the requested
+// alignment. "size" is the current size of the buffer and is updated to the
+// new size.
+Status PadAlignment(FileOutputBuffer* out, int alignment, int64* size) {
+  int bytes_over = *size % alignment;
+  if (bytes_over == 0) {
+    return Status::OK();
+  }
+  int bytes_to_write = alignment - bytes_over;
+  Status status = out->Append(string(bytes_to_write, '\0'));
+  if (status.ok()) {
+    *size += bytes_to_write;
+  }
+  return status;
+}
+
 }  // namespace
 
-BundleWriter::BundleWriter(Env* env, StringPiece prefix)
+BundleWriter::BundleWriter(Env* env, StringPiece prefix, const Options& options)
     : env_(env),
+      options_(options),
       prefix_(prefix.ToString()),
       tmp_metadata_path_(strings::StrCat(MetaFilename(prefix_), ".tempstate",
                                          random::New64())),
@@ -436,6 +423,7 @@ Status BundleWriter::Add(StringPiece key, const Tensor& val) {
     entry->set_size(data_bytes_written);
     entry->set_crc32c(crc32c::Mask(crc32c));
     size_ += data_bytes_written;
+    status_ = PadAlignment(out_.get(), options_.data_alignment, &size_);
   }
   return status_;
 }
@@ -705,13 +693,6 @@ Status MergeBundles(Env* env, gtl::ArraySlice<string> prefixes,
   return status;
 }
 
-// TODO(b/64763924): Remove after Jan 1st 2018.
-bool GetLenientNames() {
-  const char* lenient_names_str = std::getenv("TF_SAVER_LENIENT_NAMES");
-  return lenient_names_str != nullptr &&
-         std::strcmp(lenient_names_str, "") != 0;
-}
-
 // Interface for reading a tensor bundle.
 
 BundleReader::BundleReader(Env* env, StringPiece prefix)
@@ -757,7 +738,6 @@ BundleReader::BundleReader(Env* env, StringPiece prefix)
   }
   status_ = CheckVersions(header.version(), kTensorBundleVersion,
                           kTensorBundleMinProducer, "Checkpoint", "checkpoint");
-  lenient_names_ = GetLenientNames();
 }
 
 BundleReader::~BundleReader() {
@@ -780,23 +760,6 @@ Status BundleReader::GetBundleEntryProto(StringPiece key,
   TF_CHECK_OK(status_);
   Seek(key);
   if (!iter_->Valid() || iter_->key() != key) {
-    if (lenient_names_ && !key.ends_with(":0")) {
-      // TODO(b/64763924): Remove after Jan 1st 2018.
-      // Try appending ":0" to the key.
-      const string key_with_colon_zero = key.ToString() + ":0";
-      Status status = GetBundleEntryProto(key_with_colon_zero, entry);
-      if (status.ok()) {
-        LOG(WARNING) << "Key " << key << " was not found; using key "
-                     << key_with_colon_zero << " instead. This lenient naming "
-                     << "behavior will be removed on Jan 1st 2018, so please "
-                     << "update your checkpoint file.";
-        return status;
-      } else if (status.code() != error::NOT_FOUND) {
-        return status;
-      }
-      LOG(INFO) << "Looked for both " << key << " and " << key_with_colon_zero
-                << " in checkpoint.";
-    }
     return errors::NotFound("Key ", key, " not found in checkpoint");
   }
 
@@ -847,8 +810,7 @@ Status BundleReader::GetValue(const BundleEntryProto& entry, Tensor* val) {
     std::unique_ptr<RandomAccessFile> file = nullptr;
     TF_RETURN_IF_ERROR(env_->NewRandomAccessFile(
         DataFilename(prefix_, entry.shard_id(), num_shards_), &file));
-    buffered_file =
-        new io::InputBuffer(file.release(), 256 << 10 /* 256KB buffer */);
+    buffered_file = new io::InputBuffer(file.release(), kBufferSize);
     // The InputBuffer and RandomAccessFile objects are both released in dtor.
     data_[entry.shard_id()] = buffered_file;
   }
@@ -856,14 +818,21 @@ Status BundleReader::GetValue(const BundleEntryProto& entry, Tensor* val) {
 
   TF_RETURN_IF_ERROR(buffered_file->Seek(entry.offset()));
   uint32 actual_crc32c = 0;
+
   if (DataTypeCanUseMemcpy(entry.dtype())) {
-    // Important: ReadInputByChunk() bounds the readahead as min(buffer, actual
-    // bytes needed).  This is critical when reading small tensors, so we don't
-    // rely on io::InputBuffer's blind buffering here.
     char* backing_buffer = const_cast<char*>((ret->tensor_data().data()));
-    TF_RETURN_IF_ERROR(ReadInputByChunk(buffered_file->file(), entry.offset(),
-                                        entry.size(), 8 << 20 /* 8MB buffer */,
-                                        backing_buffer));
+    size_t unused_bytes_read;
+    if (entry.size() > kBufferSize) {
+      StringPiece sp;
+      TF_RETURN_IF_ERROR(buffered_file->file()->Read(
+          entry.offset(), entry.size(), &sp, backing_buffer));
+      if (sp.data() != backing_buffer) {
+        memmove(backing_buffer, sp.data(), entry.size());
+      }
+    } else {
+      TF_RETURN_IF_ERROR(buffered_file->ReadNBytes(entry.size(), backing_buffer,
+                                                   &unused_bytes_read));
+    }
     actual_crc32c = crc32c::Value(backing_buffer, entry.size());
   } else if (entry.dtype() == DT_VARIANT) {
     // Relies on io::InputBuffer's buffering, because we issue many neighboring
diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle.h b/tensorflow/core/util/tensor_bundle/tensor_bundle.h
index 129646cb6935dfa16eecd7c5bd880544c8545366..d30ce3f0cf1df2f622994a47164fa91dbfea3e5c 100644
--- a/tensorflow/core/util/tensor_bundle/tensor_bundle.h
+++ b/tensorflow/core/util/tensor_bundle/tensor_bundle.h
@@ -107,7 +107,14 @@ extern const char* const kHeaderEntryKey;
 // All threads accessing the same BundleWriter must synchronize.
 class BundleWriter {
  public:
-  BundleWriter(Env* env, StringPiece prefix);
+  struct Options {
+    Options() {}
+    // Alignment, in bytes, for tensor data.
+    // Must be >= 1. The default size of 1 densely packs tensors.
+    int data_alignment{1};
+  };
+  BundleWriter(Env* env, StringPiece prefix,
+               const Options& options = Options());
 
   // Adds the tensor "val" under key "key".
   // Across calls "key" must be unique but can be added in any order.
@@ -140,6 +147,7 @@ class BundleWriter {
 
  private:
   Env* const env_;  // Not owned.
+  const Options options_;
   const string prefix_;
   const string tmp_metadata_path_;
   const string tmp_data_path_;
@@ -292,10 +300,7 @@ class BundleReader {
   // the header entry in the metadata table.
   int num_shards_;
 
-  // If set to true, try reading key + ":0" whenever key is not found in the
-  // bundle. This is a temporary measure that will be removed on Jan 1st 2018.
-  // TODO(b/64763924): Remove after Jan 1st 2018.
-  bool lenient_names_;
+  friend class TensorBundleAlignmentTest;  // For testing data alignment.
 
   TF_DISALLOW_COPY_AND_ASSIGN(BundleReader);
 };
diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc b/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc
index 341aae36f4165767d56f28bcf733146f473c897b..08f1aa7125bc47421e0db24a9db6f6e2b2f1e365 100644
--- a/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc
+++ b/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc
@@ -28,6 +28,7 @@ limitations under the License.
 #include "tensorflow/core/lib/io/table_builder.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/test_benchmark.h"
 
 namespace tensorflow {
 
@@ -770,4 +771,91 @@ TEST(TensorBundleTest, VersionTest) {
   }
 }
 
+class TensorBundleAlignmentTest : public ::testing::Test {
+ protected:
+  template <typename T>
+  void ExpectAlignment(BundleReader* reader, const string& key, int alignment) {
+    BundleEntryProto full_tensor_entry;
+    TF_ASSERT_OK(reader->GetBundleEntryProto(key, &full_tensor_entry));
+    EXPECT_EQ(0, full_tensor_entry.offset() % alignment);
+  }
+};
+
+TEST_F(TensorBundleAlignmentTest, AlignmentTest) {
+  {
+    BundleWriter::Options opts;
+    opts.data_alignment = 42;
+    BundleWriter writer(Env::Default(), Prefix("foo"), opts);
+    TF_EXPECT_OK(writer.Add("foo_003", Constant_2x3<float>(3)));
+    TF_EXPECT_OK(writer.Add("foo_000", Constant_2x3<float>(0)));
+    TF_EXPECT_OK(writer.Add("foo_002", Constant_2x3<float>(2)));
+    TF_EXPECT_OK(writer.Add("foo_001", Constant_2x3<float>(1)));
+    TF_ASSERT_OK(writer.Finish());
+  }
+  {
+    BundleReader reader(Env::Default(), Prefix("foo"));
+    TF_ASSERT_OK(reader.status());
+    EXPECT_EQ(
+        AllTensorKeys(&reader),
+        std::vector<string>({"foo_000", "foo_001", "foo_002", "foo_003"}));
+    Expect<float>(&reader, "foo_000", Constant_2x3<float>(0));
+    Expect<float>(&reader, "foo_001", Constant_2x3<float>(1));
+    Expect<float>(&reader, "foo_002", Constant_2x3<float>(2));
+    Expect<float>(&reader, "foo_003", Constant_2x3<float>(3));
+  }
+  {
+    BundleReader reader(Env::Default(), Prefix("foo"));
+    TF_ASSERT_OK(reader.status());
+    ExpectNext<float>(&reader, Constant_2x3<float>(0));
+    ExpectNext<float>(&reader, Constant_2x3<float>(1));
+    ExpectNext<float>(&reader, Constant_2x3<float>(2));
+    ExpectNext<float>(&reader, Constant_2x3<float>(3));
+    EXPECT_TRUE(reader.Valid());
+    reader.Next();
+    EXPECT_FALSE(reader.Valid());
+  }
+  {
+    BundleReader reader(Env::Default(), Prefix("foo"));
+    TF_ASSERT_OK(reader.status());
+    ExpectAlignment<float>(&reader, "foo_000", 42);
+    ExpectAlignment<float>(&reader, "foo_001", 42);
+    ExpectAlignment<float>(&reader, "foo_002", 42);
+    ExpectAlignment<float>(&reader, "foo_003", 42);
+  }
+}
+
+static void BM_BundleAlignmentByteOff(int iters, int alignment,
+                                      int tensor_size) {
+  testing::StopTiming();
+  {
+    BundleWriter::Options opts;
+    opts.data_alignment = alignment;
+    BundleWriter writer(Env::Default(), Prefix("foo"), opts);
+    TF_CHECK_OK(writer.Add("small", Constant(true, TensorShape({1}))));
+    TF_CHECK_OK(writer.Add("big", Constant(32.1, TensorShape({tensor_size}))));
+    TF_CHECK_OK(writer.Finish());
+  }
+  BundleReader reader(Env::Default(), Prefix("foo"));
+  TF_CHECK_OK(reader.status());
+  testing::StartTiming();
+  for (int i = 0; i < iters; ++i) {
+    Tensor t;
+    TF_CHECK_OK(reader.Lookup("big", &t));
+  }
+  testing::StopTiming();
+}
+
+#define BM_BundleAlignment(ALIGN, SIZE)                        \
+  static void BM_BundleAlignment_##ALIGN##_##SIZE(int iters) { \
+    BM_BundleAlignmentByteOff(iters, ALIGN, SIZE);             \
+  }                                                            \
+  BENCHMARK(BM_BundleAlignment_##ALIGN##_##SIZE)
+
+BM_BundleAlignment(1, 512);
+BM_BundleAlignment(1, 4096);
+BM_BundleAlignment(1, 1048576);
+BM_BundleAlignment(4096, 512);
+BM_BundleAlignment(4096, 4096);
+BM_BundleAlignment(4096, 1048576);
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/util/tensor_slice_reader.h b/tensorflow/core/util/tensor_slice_reader.h
index 4bb2b246158cb2c3387467d0cd89408a6dee9608..263f56c7fcb2fa822de2e0adb5e346feddc71cc2 100644
--- a/tensorflow/core/util/tensor_slice_reader.h
+++ b/tensorflow/core/util/tensor_slice_reader.h
@@ -15,7 +15,6 @@ limitations under the License.
 
 // The utility to read checkpoints for google brain tensor ops and v3
 // checkpoints for dist_belief.
-//
 
 #ifndef TENSORFLOW_UTIL_TENSOR_SLICE_READER_H_
 #define TENSORFLOW_UTIL_TENSOR_SLICE_READER_H_
diff --git a/tensorflow/core/util/tensor_slice_reader_cache.h b/tensorflow/core/util/tensor_slice_reader_cache.h
index bdd36a2791db690824032f25e339354d23f59441..63a8d0b068d21c8e178f3dd344b15db6484a8453 100644
--- a/tensorflow/core/util/tensor_slice_reader_cache.h
+++ b/tensorflow/core/util/tensor_slice_reader_cache.h
@@ -15,7 +15,6 @@ limitations under the License.
 
 // The utility to read checkpoints for google brain tensor ops and v3
 // checkpoints for dist_belief.
-//
 
 #ifndef TENSORFLOW_UTIL_TENSOR_SLICE_READER_CACHE_H_
 #define TENSORFLOW_UTIL_TENSOR_SLICE_READER_CACHE_H_
diff --git a/tensorflow/core/util/tensor_slice_writer.h b/tensorflow/core/util/tensor_slice_writer.h
index 95d6384afecd28025cc5e14c6f525caeafe1f0a5..bdb4921e1bbf8611d84420c1e52d01fa39c25264 100644
--- a/tensorflow/core/util/tensor_slice_writer.h
+++ b/tensorflow/core/util/tensor_slice_writer.h
@@ -15,7 +15,6 @@ limitations under the License.
 
 // The utility to write checkpoints for google brain tensor ops and v3
 // checkpoints for dist_belief.
-//
 
 #ifndef TENSORFLOW_UTIL_TENSOR_SLICE_WRITER_H_
 #define TENSORFLOW_UTIL_TENSOR_SLICE_WRITER_H_
diff --git a/tensorflow/docs_src/api_guides/cc/guide.md b/tensorflow/docs_src/api_guides/cc/guide.md
index 81fb1e1fda277e8035ada5a410b966fe2de35a09..4e51ada58a3f85e4b21f1c1aec036116d37a72cf 100644
--- a/tensorflow/docs_src/api_guides/cc/guide.md
+++ b/tensorflow/docs_src/api_guides/cc/guide.md
@@ -1,6 +1,6 @@
 # C++ API
 
-Note: By default [tensorflow.org](http://tensorflow.org) shows docs for the
+Note: By default [tensorflow.org](https://www.tensorflow.org) shows docs for the
 most recent stable version. The instructions in this doc require building from
 source. You will probably want to build from the `master` version of tensorflow.
 You should, as a result, be sure you are following the
diff --git a/tensorflow/docs_src/api_guides/python/client.md b/tensorflow/docs_src/api_guides/python/client.md
index 97c19863600a4b67c7af966d3fd2ef8def36fa20..eef23696db27e187124d2c0921c055c2da6f5613 100644
--- a/tensorflow/docs_src/api_guides/python/client.md
+++ b/tensorflow/docs_src/api_guides/python/client.md
@@ -3,8 +3,8 @@
 
 This library contains classes for launching graphs and executing operations.
 
-The @{$get_started/get_started} guide has
-examples of how a graph is launched in a @{tf.Session}.
+@{$programmers_guide/low_level_intro$This guide} has examples of how a graph
+is launched in a @{tf.Session}.
 
 ## Session management
 
diff --git a/tensorflow/docs_src/api_guides/python/contrib.bayesflow.entropy.md b/tensorflow/docs_src/api_guides/python/contrib.bayesflow.entropy.md
deleted file mode 100644
index fc5d5d70d7ebf42c16294c84c2cc3f8381dae236..0000000000000000000000000000000000000000
--- a/tensorflow/docs_src/api_guides/python/contrib.bayesflow.entropy.md
+++ /dev/null
@@ -1 +0,0 @@
-# BayesFlow Entropy (contrib)
diff --git a/tensorflow/docs_src/api_guides/python/contrib.bayesflow.stochastic_graph.md b/tensorflow/docs_src/api_guides/python/contrib.bayesflow.stochastic_graph.md
deleted file mode 100644
index d855787ae695f115368ab76671182f3a6e490411..0000000000000000000000000000000000000000
--- a/tensorflow/docs_src/api_guides/python/contrib.bayesflow.stochastic_graph.md
+++ /dev/null
@@ -1 +0,0 @@
-# BayesFlow Stochastic Graph (contrib)
diff --git a/tensorflow/docs_src/api_guides/python/contrib.bayesflow.stochastic_tensor.md b/tensorflow/docs_src/api_guides/python/contrib.bayesflow.stochastic_tensor.md
deleted file mode 100644
index 1cc1ac5d7e670a243f1dcda6ef8c59b6c6d8de2d..0000000000000000000000000000000000000000
--- a/tensorflow/docs_src/api_guides/python/contrib.bayesflow.stochastic_tensor.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# BayesFlow Stochastic Tensors (contrib)
-[TOC]
-
diff --git a/tensorflow/docs_src/api_guides/python/contrib.bayesflow.variational_inference.md b/tensorflow/docs_src/api_guides/python/contrib.bayesflow.variational_inference.md
deleted file mode 100644
index 8f08c09c8fbbc9b5b6ab8612f140f4b7ca7d8b73..0000000000000000000000000000000000000000
--- a/tensorflow/docs_src/api_guides/python/contrib.bayesflow.variational_inference.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# BayesFlow Variational Inference (contrib)
-[TOC]
-
-Variational inference.
diff --git a/tensorflow/docs_src/api_guides/python/contrib.copy_graph.md b/tensorflow/docs_src/api_guides/python/contrib.copy_graph.md
deleted file mode 100644
index f61f4c764d289814439bb8c5d33bdfb46d208866..0000000000000000000000000000000000000000
--- a/tensorflow/docs_src/api_guides/python/contrib.copy_graph.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copying Graph Elements (contrib)
-[TOC]
-
-Functions for copying elements from one graph to another.
diff --git a/tensorflow/docs_src/api_guides/python/contrib.opt.md b/tensorflow/docs_src/api_guides/python/contrib.opt.md
deleted file mode 100644
index 944a80a5ccb0201b5b5a0cf3b57ca31dfc7ce01a..0000000000000000000000000000000000000000
--- a/tensorflow/docs_src/api_guides/python/contrib.opt.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# Optimization (contrib)
-[TOC]
-
-opt: A module containing optimization routines.
diff --git a/tensorflow/docs_src/api_guides/python/histogram_ops.md b/tensorflow/docs_src/api_guides/python/histogram_ops.md
deleted file mode 100644
index dbd4555429b2a09bdf32e2e421b2d55fac0c0fd0..0000000000000000000000000000000000000000
--- a/tensorflow/docs_src/api_guides/python/histogram_ops.md
+++ /dev/null
@@ -1,6 +0,0 @@
-# Histograms
-[TOC]
-
-## Histograms
-
-*   @{tf.histogram_fixed_width}
diff --git a/tensorflow/docs_src/api_guides/python/image.md b/tensorflow/docs_src/api_guides/python/image.md
index a2c8c3c3c92e2acf177da104304746fb34281de7..051e4547ee6900ded85ae18fb80b51db1eacb009 100644
--- a/tensorflow/docs_src/api_guides/python/image.md
+++ b/tensorflow/docs_src/api_guides/python/image.md
@@ -19,6 +19,7 @@ Note: The PNG encode and decode Ops support RGBA, but the conversions Ops
 presently only support RGB, HSV, and GrayScale. Presently, the alpha channel has
 to be stripped from the image and re-attached using slicing ops.
 
+*   @{tf.image.decode_bmp}
 *   @{tf.image.decode_gif}
 *   @{tf.image.decode_jpeg}
 *   @{tf.image.encode_jpeg}
diff --git a/tensorflow/docs_src/api_guides/python/input_dataset.md b/tensorflow/docs_src/api_guides/python/input_dataset.md
index 94c89c37d520fd1c1ec65fedc813a7b348120913..a6e2fc48e0020ff130f034f747d9ca48b4830c2e 100644
--- a/tensorflow/docs_src/api_guides/python/input_dataset.md
+++ b/tensorflow/docs_src/api_guides/python/input_dataset.md
@@ -18,7 +18,6 @@ Classes that create a dataset from input files.
 Static methods in `Dataset` that create new datasets.
 
 *   @{tf.data.Dataset.from_generator}
-*   @{tf.data.Dataset.from_sparse_tensor_slices}
 *   @{tf.data.Dataset.from_tensor_slices}
 *   @{tf.data.Dataset.from_tensors}
 *   @{tf.data.Dataset.list_files}
@@ -59,8 +58,12 @@ Custom transformation functions can be applied to a `Dataset` using @{tf.data.Da
 *   @{tf.contrib.data.enumerate_dataset}
 *   @{tf.contrib.data.group_by_window}
 *   @{tf.contrib.data.ignore_errors}
+*   @{tf.contrib.data.map_and_batch}
+*   @{tf.contrib.data.padded_batch_and_drop_remainder}
+*   @{tf.contrib.data.parallel_interleave}
 *   @{tf.contrib.data.rejection_resample}
-*   @{tf.contrib.data.sloppy_interleave}
+*   @{tf.contrib.data.scan}
+*   @{tf.contrib.data.shuffle_and_repeat}
 *   @{tf.contrib.data.unbatch}
 
 ## Iterating over datasets
@@ -77,5 +80,7 @@ The `Iterator` class also contains static methods that create a @{tf.data.Iterat
 
 ## Extra functions from `tf.contrib.data`
 
+*   @{tf.contrib.data.get_single_element}
+*   @{tf.contrib.data.make_saveable_from_iterator}
 *   @{tf.contrib.data.read_batch_features}
 
diff --git a/tensorflow/docs_src/api_guides/python/meta_graph.md b/tensorflow/docs_src/api_guides/python/meta_graph.md
index fa4cee87007cfd77663e74956fcfe0f15c55c52c..0eff9000931666dce742358a290f25bb2b5a7b16 100644
--- a/tensorflow/docs_src/api_guides/python/meta_graph.md
+++ b/tensorflow/docs_src/api_guides/python/meta_graph.md
@@ -221,15 +221,9 @@ Here are some of the typical usage models:
     # Addes loss and train.
     labels = tf.constant(0, tf.int32, shape=[100], name="labels")
     batch_size = tf.size(labels)
-    labels = tf.expand_dims(labels, 1)
-    indices = tf.expand_dims(tf.range(0, batch_size), 1)
-    concated = tf.concat([indices, labels], 1)
-    onehot_labels = tf.sparse_to_dense(
-        concated, tf.stack([batch_size, 10]), 1.0, 0.0)
     logits = tf.get_collection("logits")[0]
-    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
-        labels=onehot_labels, logits=logits, name="xentropy")
-    loss = tf.reduce_mean(cross_entropy, name="xentropy_mean")
+    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels,
+                                                  logits=logits)
 
     tf.summary.scalar('loss', loss)
     # Creates the gradient descent optimizer with the given learning rate.
diff --git a/tensorflow/docs_src/api_guides/python/nn.md b/tensorflow/docs_src/api_guides/python/nn.md
index eb3b251099d320244bc212698c45647038df44ae..8e6fd1cff93332b84f552c18f627ba05dc67103e 100644
--- a/tensorflow/docs_src/api_guides/python/nn.md
+++ b/tensorflow/docs_src/api_guides/python/nn.md
@@ -226,6 +226,8 @@ TensorFlow provides several operations that help you perform classification.
 *   @{tf.nn.softmax}
 *   @{tf.nn.log_softmax}
 *   @{tf.nn.softmax_cross_entropy_with_logits}
+*   @{tf.nn.softmax_cross_entropy_with_logits_v2} - identical to the base
+    version, except it allows gradient propagation into the labels.
 *   @{tf.nn.sparse_softmax_cross_entropy_with_logits}
 *   @{tf.nn.weighted_cross_entropy_with_logits}
 
diff --git a/tensorflow/docs_src/api_guides/python/reading_data.md b/tensorflow/docs_src/api_guides/python/reading_data.md
index b3ebaa0f0a3645256d4e92632a10a53e4eb243cb..b3ca9583704eb30e097bb4d7c438ea8c3662df40 100644
--- a/tensorflow/docs_src/api_guides/python/reading_data.md
+++ b/tensorflow/docs_src/api_guides/python/reading_data.md
@@ -1,11 +1,11 @@
 # Reading data
 
 Note: The preferred way to feed data into a tensorflow program is using the
-@{$datasets$Datasets API}.
+@{$datasets$`tf.data` API}.
 
 There are four methods of getting data into a TensorFlow program:
 
-*   `Dataset` API: Easily construct a complex input pipeline. (preferred method)
+*   `tf.data` API: Easily construct a complex input pipeline. (preferred method)
 *   Feeding: Python code provides the data when running each step.
 *   `QueueRunner`: a queue-based input pipeline reads the data from files
     at the beginning of a TensorFlow graph.
@@ -14,26 +14,27 @@ There are four methods of getting data into a TensorFlow program:
 
 [TOC]
 
-## Dataset API
+## `tf.data` API
 
 See the @{$datasets$programmer's guide} for an in-depth explanation of
-@{tf.data.Dataset}. The `Dataset` API allows you to extract and preprocess data
-from different input/file formats, and apply transformations such as batch,
-shuffle, and map to the dataset. This is an improved version of the old input
-methods, feeding and `QueueRunner`.
+@{tf.data.Dataset}. The `tf.data` API enables you to extract and preprocess data
+from different input/file formats, and apply transformations such as batching,
+shuffling, and mapping functions over the dataset. This is an improved version
+of the old input methods---feeding and `QueueRunner`---which are described
+below for historical purposes.
 
 ## Feeding
 
+Warning: "Feeding" is the least efficient way to feed data into a TensorFlow
+program and should only be used for small experiments and debugging.
+
 TensorFlow's feed mechanism lets you inject data into any Tensor in a
-computation graph. A python computation can thus feed data directly into the
+computation graph. A Python computation can thus feed data directly into the
 graph.
 
 Supply feed data through the `feed_dict` argument to a run() or eval() call
 that initiates computation.
 
-Warning: "Feeding" is the least efficient way to feed data into a tensorflow
-program and should only be used for small experiments and debugging.
-
 ```python
 with tf.Session():
   input = tf.placeholder(tf.float32)
@@ -50,11 +51,14 @@ it is executed without a feed, so you won't forget to feed it.
 
 An example using `placeholder` and feeding to train on MNIST data can be found
 in
-[`tensorflow/examples/tutorials/mnist/fully_connected_feed.py`](https://www.tensorflow.org/code/tensorflow/examples/tutorials/mnist/fully_connected_feed.py),
-and is described in the @{$mechanics$MNIST tutorial}.
+[`tensorflow/examples/tutorials/mnist/fully_connected_feed.py`](https://www.tensorflow.org/code/tensorflow/examples/tutorials/mnist/fully_connected_feed.py).
 
 ## `QueueRunner`
 
+Warning: This section discusses implementing input pipelines using the
+queue-based APIs which can be cleanly replaced by the @{$datasets$`tf.data`
+API}.
+
 A typical queue-based pipeline for reading records from files has the following stages:
 
 1.  The list of filenames
@@ -66,9 +70,6 @@ A typical queue-based pipeline for reading records from files has the following
 7.  *Optional* preprocessing
 8.  Example queue
 
-Warning: This section discusses implementing input pipelines using the
-queue-based APIs which can be cleanly replaced by the @{$datasets$Datasets API}.
-
 ### Filenames, shuffling, and epoch limits
 
 For the list of filenames, use either a constant string Tensor (like
@@ -173,14 +174,25 @@ For example,
 [`tensorflow/examples/how_tos/reading_data/convert_to_records.py`](https://www.tensorflow.org/code/tensorflow/examples/how_tos/reading_data/convert_to_records.py)
 converts MNIST data to this format.
 
-To read a file of TFRecords, use
-@{tf.TFRecordReader} with
-the @{tf.parse_single_example}
-decoder. The `parse_single_example` op decodes the example protocol buffers into
-tensors. An MNIST example using the data produced by `convert_to_records` can be
-found in
-[`tensorflow/examples/how_tos/reading_data/fully_connected_reader.py`](https://www.tensorflow.org/code/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py),
-which you can compare with the `fully_connected_feed` version.
+The recommended way to read a TFRecord file is with a @{tf.data.TFRecordDataset}, [as in this example](https://www.tensorflow.org/code/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py):
+
+``` python
+    dataset = tf.data.TFRecordDataset(filename)
+    dataset = dataset.repeat(num_epochs)
+
+    # map takes a python function and applies it to every sample
+    dataset = dataset.map(decode)
+```
+
+To acomplish the same task with a queue based input pipeline requires the following code 
+(using the same `decode` function from the above example): 
+
+``` python
+  filename_queue = tf.train.string_input_producer([filename], num_epochs=num_epochs)
+  reader = tf.TFRecordReader()
+  _, serialized_example = reader.read(filename_queue)
+  image,label = decode(serialized_example)
+```
 
 ### Preprocessing
 
@@ -499,7 +511,7 @@ You can have the train and eval in the same graph in the same process, and share
 their trained variables or layers. See @{$variables$the shared variables tutorial}.
 
 To support the single-graph approach
-@{$programmers_guide/datasets$Datasets} also supplies
+@{$programmers_guide/datasets$`tf.data`} also supplies
 @{$programmers_guide/datasets#creating_an_iterator$advanced iterator types} that
 that allow the user to change the input pipeline without rebuilding the graph or
 session.
diff --git a/tensorflow/docs_src/get_started/linear_regression.md b/tensorflow/docs_src/api_guides/python/regression_examples.md
similarity index 100%
rename from tensorflow/docs_src/get_started/linear_regression.md
rename to tensorflow/docs_src/api_guides/python/regression_examples.md
diff --git a/tensorflow/docs_src/api_guides/python/script_ops.md b/tensorflow/docs_src/api_guides/python/script_ops.md
deleted file mode 100644
index ab49a570c135fefdcb3f4c7d4e4d35df38092b98..0000000000000000000000000000000000000000
--- a/tensorflow/docs_src/api_guides/python/script_ops.md
+++ /dev/null
@@ -1,13 +0,0 @@
-# Wraps python functions
-
-Note: Functions taking `Tensor` arguments can also take anything accepted by
-@{tf.convert_to_tensor}.
-
-[TOC]
-
-## Script Language Operators
-
-TensorFlow provides allows you to wrap python/numpy functions as
-TensorFlow operators.
-
-*   @{tf.py_func}
diff --git a/tensorflow/docs_src/api_guides/python/train.md b/tensorflow/docs_src/api_guides/python/train.md
index 943394f4ae05b9b5b379a58e2fc86341fbbfb6c4..80fe9784de64c3b3f1843cad07bb02507f682eaf 100644
--- a/tensorflow/docs_src/api_guides/python/train.md
+++ b/tensorflow/docs_src/api_guides/python/train.md
@@ -24,6 +24,8 @@ of the subclasses.
 *   @{tf.train.ProximalAdagradOptimizer}
 *   @{tf.train.RMSPropOptimizer}
 
+See @{tf.contrib.opt} for more optimizers.
+
 ## Gradient Computation
 
 TensorFlow provides functions to compute the derivatives for a given
@@ -57,6 +59,9 @@ gradients.
 *   @{tf.train.natural_exp_decay}
 *   @{tf.train.piecewise_constant}
 *   @{tf.train.polynomial_decay}
+*   @{tf.train.cosine_decay}
+*   @{tf.train.linear_cosine_decay}
+*   @{tf.train.noisy_linear_cosine_decay}
 
 ## Moving Averages
 
diff --git a/tensorflow/docs_src/community/benchmarks.md b/tensorflow/docs_src/community/benchmarks.md
index 3bdbabf4bbc7a9ebb1992619cb3c51a95429a0b1..67856ce8698aec0cecf6718d8d4580c67a9eb321 100644
--- a/tensorflow/docs_src/community/benchmarks.md
+++ b/tensorflow/docs_src/community/benchmarks.md
@@ -1,4 +1,4 @@
-# Benchmarks
+# Defining and Running Benchmarks
 
 This guide contains instructions for defining and running a TensorFlow benchmark. These benchmarks store output in [TestResults](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/test_log.proto) format. If these benchmarks are added to TensorFlow github repo, then we will run them daily with our continuous build and display a graph on our dashboard: https://benchmarks-dot-tensorflow-testing.appspot.com/.
 
@@ -52,6 +52,19 @@ Key points to note in the example above:
 * Benchmark method calls `report_benchmark` to report the metric value.
 
 
+## Running with Python
+
+Use the `--benchmarks` flag to run the benchmark with python. A [BenchmarkEntries](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/util/test_log.proto) proto will be printed.
+
+```
+python sample_benchmark.py --benchmarks=SampleBenchmark
+```
+
+Setting the flag as `--benchmarks=.` or `--benchmarks=all` would work as well.
+
+(Please ensure that Tensorflow is installed to successfully import the package in the line `import tensorflow as tf`. For installation instructions, see [Installing TensorFlow](https://www.tensorflow.org/install/). This step is not necessary when running with bazel.)
+
+
 ## Adding a `bazel` Target
 
 We have a special target called `tf_py_logged_benchmark` for benchmarks defined under TensorFlow github repo. `tf_py_logged_benchmark` should wrap around a regular `py_test` target. Running a `tf_py_logged_benchmark` would print a [TestResults](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/test_log.proto) proto. Defining a `tf_py_logged_benchmark` also lets us run it with TensorFlow continuous build.
@@ -84,7 +97,7 @@ load("//tensorflow/tools/test:performance.bzl", "tf_py_logged_benchmark")
 
 tf_py_logged_benchmark(
     name = "sample_logged_benchmark",
-    target = "//tensorflow/tools/test:sample_benchmark",
+    target = "//tensorflow/examples/benchmark:sample_benchmark",
 )
 ```
 
diff --git a/tensorflow/docs_src/community/style_guide.md b/tensorflow/docs_src/community/style_guide.md
index a4c4e2674ee78b2248323a0275a737d6417c5f99..c9268790a71fad9328f60f6a889c19c32117497e 100644
--- a/tensorflow/docs_src/community/style_guide.md
+++ b/tensorflow/docs_src/community/style_guide.md
@@ -59,14 +59,14 @@ filegroup(
             "**/OWNERS",
         ],
     ),
-    visibility = ["//third_party/tensorflow:__subpackages__"],
+    visibility = ["//tensorflow:__subpackages__"],
 )
 ```
 
 * When adding new BUILD file, add this line to `tensorflow/BUILD` file into `all_opensource_files` target.
 
 ```
-"//third_party/tensorflow/<directory>:all_files",
+"//tensorflow/<directory>:all_files",
 ```
 
 * For all Python BUILD targets (libraries and tests) add next line:
diff --git a/tensorflow/docs_src/deploy/distributed.md b/tensorflow/docs_src/deploy/distributed.md
index f3e2fac49f21e0777bc2c9f46c8a5d5c12f9bed4..d7ed6b1debdf256a800aed7304152acf5972bf72 100644
--- a/tensorflow/docs_src/deploy/distributed.md
+++ b/tensorflow/docs_src/deploy/distributed.md
@@ -2,8 +2,8 @@
 
 This document shows how to create a cluster of TensorFlow servers, and how to
 distribute a computation graph across that cluster. We assume that you are
-familiar with the @{$get_started/get_started$basic concepts} of
-writing TensorFlow programs.
+familiar with the @{$programmers_guide/low_level_intro$basic concepts} of
+writing low level TensorFlow programs.
 
 ## Hello distributed TensorFlow!
 
diff --git a/tensorflow/docs_src/extend/add_filesys.md b/tensorflow/docs_src/extend/add_filesys.md
index 44ba198998c7103d9a45e3ce6e6b9235a0b8bfa0..f0591b7b7d8af478db067ecd3bdd949e75d813c9 100644
--- a/tensorflow/docs_src/extend/add_filesys.md
+++ b/tensorflow/docs_src/extend/add_filesys.md
@@ -35,6 +35,7 @@ Note that TensorFlow already includes many filesystem implementations, such as:
 
 *   HDFS - the Hadoop File System
 *   GCS - Google Cloud Storage filesystem
+*   S3 - Amazon Simple Storage Service filesystem
 *   A "memory-mapped-file" filesystem
 
 The rest of this guide describes how to implement a custom filesystem.
diff --git a/tensorflow/docs_src/extend/adding_an_op.md b/tensorflow/docs_src/extend/adding_an_op.md
index c52279b212f46215125a20815f97b07b012a5513..15075e1df8e703415b4acb8e53f76dc9a4a41b50 100644
--- a/tensorflow/docs_src/extend/adding_an_op.md
+++ b/tensorflow/docs_src/extend/adding_an_op.md
@@ -1,6 +1,6 @@
 # Adding a New Op
 
-Note: By default [tensorflow.org](http://tensorflow.org) shows docs for the
+Note: By default [www.tensorflow.org](https://www.tensorflow.org) shows docs for the
 most recent stable version. The instructions in this doc require building from
 source. You will probably want to build from the `master` version of tensorflow.
 You should, as a result, be sure you are following the
diff --git a/tensorflow/docs_src/extend/architecture.md b/tensorflow/docs_src/extend/architecture.md
index 21816502acec7abfca670cac1bceda3e29144b53..c0fc714a4405d6189d187f1552ab96ea2d37dd24 100644
--- a/tensorflow/docs_src/extend/architecture.md
+++ b/tensorflow/docs_src/extend/architecture.md
@@ -7,7 +7,7 @@ learning models and system-level optimizations.
 This document describes the system architecture that makes possible this
 combination of scale and flexibility. It assumes that you have basic familiarity
 with TensorFlow programming concepts such as the computation graph, operations,
-and sessions. See @{$get_started/get_started$Getting Started}
+and sessions. See @{$programmers_guide/low_level_intro$this document}
 for an introduction to these topics. Some familiarity
 with @{$distributed$distributed TensorFlow}
 will also be helpful.
diff --git a/tensorflow/docs_src/extend/estimators.md b/tensorflow/docs_src/extend/estimators.md
deleted file mode 100644
index 7e6507c5840fe621aeb91842c9a83554e568db99..0000000000000000000000000000000000000000
--- a/tensorflow/docs_src/extend/estimators.md
+++ /dev/null
@@ -1,698 +0,0 @@
-# Creating Estimators in tf.estimator
-
-The tf.estimator framework makes it easy to construct and train machine
-learning models via its high-level Estimator API. `Estimator`
-offers classes you can instantiate to quickly configure common model types such
-as regressors and classifiers:
-
-*   @{tf.estimator.LinearClassifier}:
-    Constructs a linear classification model.
-*   @{tf.estimator.LinearRegressor}:
-    Constructs a linear regression model.
-*   @{tf.estimator.DNNClassifier}:
-    Construct a neural network classification model.
-*   @{tf.estimator.DNNRegressor}:
-    Construct a neural network regression model.
-*   @{tf.estimator.DNNLinearCombinedClassifier}:
-    Construct a neural network and linear combined classification model.
-*   @{tf.estimator.DNNLinearCombinedRegressor}:
-    Construct a neural network and linear combined regression model.
-
-But what if none of `tf.estimator`'s predefined model types meets your needs?
-Perhaps you need more granular control over model configuration, such as
-the ability to customize the loss function used for optimization, or specify
-different activation functions for each neural network layer. Or maybe you're
-implementing a ranking or recommendation system, and neither a classifier nor a
-regressor is appropriate for generating predictions.
-
-This tutorial covers how to create your own `Estimator` using the building
-blocks provided in `tf.estimator`, which will predict the ages of
-[abalones](https://en.wikipedia.org/wiki/Abalone) based on their physical
-measurements. You'll learn how to do the following:
-
-*   Instantiate an `Estimator`
-*   Construct a custom model function
-*   Configure a neural network using `tf.feature_column` and `tf.layers`
-*   Choose an appropriate loss function from `tf.losses`
-*   Define a training op for your model
-*   Generate and return predictions
-
-## Prerequisites
-
-This tutorial assumes you already know tf.estimator API basics, such as
-feature columns, input functions, and `train()`/`evaluate()`/`predict()`
-operations. If you've never used tf.estimator before, or need a refresher,
-you should first review the following tutorials:
-
-*   @{$get_started/estimator$tf.estimator Quickstart}: Quick introduction to
-    training a neural network using tf.estimator.
-*   @{$wide$TensorFlow Linear Model Tutorial}: Introduction to
-    feature columns, and an overview on building a linear classifier in
-    tf.estimator.
-*   @{$input_fn$Building Input Functions with tf.estimator}: Overview of how
-    to construct an input_fn to preprocess and feed data into your models.
-
-## An Abalone Age Predictor {#abalone-predictor}
-
-It's possible to estimate the age of an
-[abalone](https://en.wikipedia.org/wiki/Abalone) (sea snail) by the number of
-rings on its shell. However, because this task requires cutting, staining, and
-viewing the shell under a microscope, it's desirable to find other measurements
-that can predict age.
-
-The [Abalone Data Set](https://archive.ics.uci.edu/ml/datasets/Abalone) contains
-the following
-[feature data](https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.names)
-for abalone:
-
-| Feature        | Description                                               |
-| -------------- | --------------------------------------------------------- |
-| Length         | Length of abalone (in longest direction; in mm)           |
-| Diameter       | Diameter of abalone (measurement perpendicular to length; in mm)|
-| Height         | Height of abalone (with its meat inside shell; in mm)     |
-| Whole Weight   | Weight of entire abalone (in grams)                       |
-| Shucked Weight | Weight of abalone meat only (in grams)                    |
-| Viscera Weight | Gut weight of abalone (in grams), after bleeding          |
-| Shell Weight   | Weight of dried abalone shell (in grams)                  |
-
-The label to predict is number of rings, as a proxy for abalone age.
-
-![Abalone shell](https://www.tensorflow.org/images/abalone_shell.jpg)
-**[“Abalone shell”](https://www.flickr.com/photos/thenickster/16641048623/) (by [Nicki Dugan
-Pogue](https://www.flickr.com/photos/thenickster/), CC BY-SA 2.0)**
-
-## Setup
-
-This tutorial uses three data sets.
-[`abalone_train.csv`](http://download.tensorflow.org/data/abalone_train.csv)
-contains labeled training data comprising 3,320 examples.
-[`abalone_test.csv`](http://download.tensorflow.org/data/abalone_test.csv)
-contains labeled test data for 850 examples.
-[`abalone_predict`](http://download.tensorflow.org/data/abalone_predict.csv)
-contains 7 examples on which to make predictions.
-
-The following sections walk through writing the `Estimator` code step by step;
-the [full, final code is available
-here](https://www.tensorflow.org/code/tensorflow/examples/tutorials/estimators/abalone.py).
-
-## Loading Abalone CSV Data into TensorFlow Datasets
-
-To feed the abalone dataset into the model, you'll need to download and load the
-CSVs into TensorFlow `Dataset`s. First, add some standard Python and TensorFlow
-imports, and set up FLAGS:
-
-```python
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import argparse
-import sys
-import tempfile
-
-# Import urllib
-from six.moves import urllib
-
-import numpy as np
-import tensorflow as tf
-
-FLAGS = None
-```
-
-Enable logging:
-
-```python
-tf.logging.set_verbosity(tf.logging.INFO)
-```
-
-Then define a function to load the CSVs (either from files specified in
-command-line options, or downloaded from
-[tensorflow.org](https://www.tensorflow.org/)):
-
-```python
-def maybe_download(train_data, test_data, predict_data):
-  """Maybe downloads training data and returns train and test file names."""
-  if train_data:
-    train_file_name = train_data
-  else:
-    train_file = tempfile.NamedTemporaryFile(delete=False)
-    urllib.request.urlretrieve(
-        "http://download.tensorflow.org/data/abalone_train.csv",
-        train_file.name)
-    train_file_name = train_file.name
-    train_file.close()
-    print("Training data is downloaded to %s" % train_file_name)
-
-  if test_data:
-    test_file_name = test_data
-  else:
-    test_file = tempfile.NamedTemporaryFile(delete=False)
-    urllib.request.urlretrieve(
-        "http://download.tensorflow.org/data/abalone_test.csv", test_file.name)
-    test_file_name = test_file.name
-    test_file.close()
-    print("Test data is downloaded to %s" % test_file_name)
-
-  if predict_data:
-    predict_file_name = predict_data
-  else:
-    predict_file = tempfile.NamedTemporaryFile(delete=False)
-    urllib.request.urlretrieve(
-        "http://download.tensorflow.org/data/abalone_predict.csv",
-        predict_file.name)
-    predict_file_name = predict_file.name
-    predict_file.close()
-    print("Prediction data is downloaded to %s" % predict_file_name)
-
-  return train_file_name, test_file_name, predict_file_name
-```
-
-Finally, create `main()` and load the abalone CSVs into `Datasets`, defining
-flags to allow users to optionally specify CSV files for training, test, and
-prediction datasets via the command line (by default, files will be downloaded
-from [tensorflow.org](https://www.tensorflow.org/)):
-
-```python
-def main(unused_argv):
-  # Load datasets
-  abalone_train, abalone_test, abalone_predict = maybe_download(
-    FLAGS.train_data, FLAGS.test_data, FLAGS.predict_data)
-
-  # Training examples
-  training_set = tf.contrib.learn.datasets.base.load_csv_without_header(
-      filename=abalone_train, target_dtype=np.int, features_dtype=np.float64)
-
-  # Test examples
-  test_set = tf.contrib.learn.datasets.base.load_csv_without_header(
-      filename=abalone_test, target_dtype=np.int, features_dtype=np.float64)
-
-  # Set of 7 examples for which to predict abalone ages
-  prediction_set = tf.contrib.learn.datasets.base.load_csv_without_header(
-      filename=abalone_predict, target_dtype=np.int, features_dtype=np.float64)
-
-if __name__ == "__main__":
-  parser = argparse.ArgumentParser()
-  parser.register("type", "bool", lambda v: v.lower() == "true")
-  parser.add_argument(
-      "--train_data", type=str, default="", help="Path to the training data.")
-  parser.add_argument(
-      "--test_data", type=str, default="", help="Path to the test data.")
-  parser.add_argument(
-      "--predict_data",
-      type=str,
-      default="",
-      help="Path to the prediction data.")
-  FLAGS, unparsed = parser.parse_known_args()
-  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
-```
-
-## Instantiating an Estimator
-
-When defining a model using one of tf.estimator's provided classes, such as
-`DNNClassifier`, you supply all the configuration parameters right in the
-constructor, e.g.:
-
-```python
-my_nn = tf.estimator.DNNClassifier(feature_columns=[age, height, weight],
-                                   hidden_units=[10, 10, 10],
-                                   activation_fn=tf.nn.relu,
-                                   dropout=0.2,
-                                   n_classes=3,
-                                   optimizer="Adam")
-```
-
-You don't need to write any further code to instruct TensorFlow how to train the
-model, calculate loss, or return predictions; that logic is already baked into
-the `DNNClassifier`.
-
-By contrast, when you're creating your own estimator from scratch, the
-constructor accepts just two high-level parameters for model configuration,
-`model_fn` and `params`:
-
-```python
-nn = tf.estimator.Estimator(model_fn=model_fn, params=model_params)
-```
-
-*   `model_fn`: A function object that contains all the aforementioned logic to
-    support training, evaluation, and prediction. You are responsible for
-    implementing that functionality. The next section, [Constructing the
-    `model_fn`](#constructing-modelfn) covers creating a model function in
-    detail.
-
-*   `params`: An optional dict of hyperparameters (e.g., learning rate, dropout)
-    that will be passed into the `model_fn`.
-
-Note: Just like `tf.estimator`'s predefined regressors and classifiers, the
-`Estimator` initializer also accepts the general configuration arguments
-`model_dir` and `config`.
-
-For the abalone age predictor, the model will accept one hyperparameter:
-learning rate. Define `LEARNING_RATE` as a constant at the beginning of your
-code (highlighted in bold below), right after the logging configuration:
-
-<pre class="prettyprint"><code class="lang-python">tf.logging.set_verbosity(tf.logging.INFO)
-
-<strong># Learning rate for the model
-LEARNING_RATE = 0.001</strong></code></pre>
-
-Note: Here, `LEARNING_RATE` is set to `0.001`, but you can tune this value as
-needed to achieve the best results during model training.
-
-Then, add the following code to `main()`, which creates the dict `model_params`
-containing the learning rate and instantiates the `Estimator`:
-
-```python
-# Set model params
-model_params = {"learning_rate": LEARNING_RATE}
-
-# Instantiate Estimator
-nn = tf.estimator.Estimator(model_fn=model_fn, params=model_params)
-```
-
-## Constructing the `model_fn` {#constructing-modelfn}
-
-The basic skeleton for an `Estimator` API model function looks like this:
-
-```python
-def model_fn(features, labels, mode, params):
-   # Logic to do the following:
-   # 1. Configure the model via TensorFlow operations
-   # 2. Define the loss function for training/evaluation
-   # 3. Define the training operation/optimizer
-   # 4. Generate predictions
-   # 5. Return predictions/loss/train_op/eval_metric_ops in EstimatorSpec object
-   return EstimatorSpec(mode, predictions, loss, train_op, eval_metric_ops)
-```
-
-The `model_fn` must accept three arguments:
-
-*   `features`: A dict containing the features passed to the model via
-    `input_fn`.
-*   `labels`: A `Tensor` containing the labels passed to the model via
-    `input_fn`. Will be empty for `predict()` calls, as these are the values the
-    model will infer.
-*   `mode`: One of the following @{tf.estimator.ModeKeys} string values
-    indicating the context in which the model_fn was invoked:
-    *   `tf.estimator.ModeKeys.TRAIN` The `model_fn` was invoked in training
-        mode, namely via a `train()` call.
-    *   `tf.estimator.ModeKeys.EVAL`. The `model_fn` was invoked in
-        evaluation mode, namely via an `evaluate()` call.
-    *   `tf.estimator.ModeKeys.PREDICT`. The `model_fn` was invoked in
-        predict mode, namely via a `predict()` call.
-
-`model_fn` may also accept a `params` argument containing a dict of
-hyperparameters used for training (as shown in the skeleton above).
-
-The body of the function performs the following tasks (described in detail in the
-sections that follow):
-
-*   Configuring the model—here, for the abalone predictor, this will be a neural
-    network.
-*   Defining the loss function used to calculate how closely the model's
-    predictions match the target values.
-*   Defining the training operation that specifies the `optimizer` algorithm to
-    minimize the loss values calculated by the loss function.
-
-The `model_fn` must return a @{tf.estimator.EstimatorSpec}
-object, which contains the following values:
-
-*   `mode` (required). The mode in which the model was run. Typically, you will
-    return the `mode` argument of the `model_fn` here.
-
-*   `predictions` (required in `PREDICT` mode). A dict that maps key names of
-    your choice to `Tensor`s containing the predictions from the model, e.g.:
-
-    ```python
-    predictions = {"results": tensor_of_predictions}
-    ```
-
-    In `PREDICT` mode, the dict that you return in `EstimatorSpec` will then be
-    returned by `predict()`, so you can construct it in the format in which
-    you'd like to consume it.
-
-
-*   `loss` (required in `EVAL` and `TRAIN` mode). A `Tensor` containing a scalar
-    loss value: the output of the model's loss function (discussed in more depth
-    later in [Defining loss for the model](#defining-loss)) calculated over all
-    the input examples. This is used in `TRAIN` mode for error handling and
-    logging, and is automatically included as a metric in `EVAL` mode.
-
-*   `train_op` (required only in `TRAIN` mode). An Op that runs one step of
-    training.
-
-*   `eval_metric_ops` (optional). A dict of name/value pairs specifying the
-    metrics that will be calculated when the model runs in `EVAL` mode. The name
-    is a label of your choice for the metric, and the value is the result of
-    your metric calculation. The @{tf.metrics}
-    module provides predefined functions for a variety of common metrics. The
-    following `eval_metric_ops` contains an `"accuracy"` metric calculated using
-    `tf.metrics.accuracy`:
-
-    ```python
-    eval_metric_ops = {
-        "accuracy": tf.metrics.accuracy(labels, predictions)
-    }
-    ```
-
-    If you do not specify `eval_metric_ops`, only `loss` will be calculated
-    during evaluation.
-
-### Configuring a neural network with `tf.feature_column` and `tf.layers`
-
-Constructing a [neural
-network](https://en.wikipedia.org/wiki/Artificial_neural_network) entails
-creating and connecting the input layer, the hidden layers, and the output
-layer.
-
-The input layer is a series of nodes (one for each feature in the model) that
-will accept the feature data that is passed to the `model_fn` in the `features`
-argument. If `features` contains an n-dimensional `Tensor` with all your feature
-data, then it can serve as the input layer.
-If `features` contains a dict of @{$linear#feature-columns-and-transformations$feature columns} passed to
-the model via an input function, you can convert it to an input-layer `Tensor`
-with the @{tf.feature_column.input_layer} function.
-
-```python
-input_layer = tf.feature_column.input_layer(
-    features=features, feature_columns=[age, height, weight])
-```
-
-As shown above, `input_layer()` takes two required arguments:
-
-*   `features`. A mapping from string keys to the `Tensors` containing the
-    corresponding feature data. This is exactly what is passed to the `model_fn`
-    in the `features` argument.
-*   `feature_columns`. A list of all the `FeatureColumns` in the model—`age`,
-    `height`, and `weight` in the above example.
-
-The input layer of the neural network then must be connected to one or more
-hidden layers via an [activation
-function](https://en.wikipedia.org/wiki/Activation_function) that performs a
-nonlinear transformation on the data from the previous layer. The last hidden
-layer is then connected to the output layer, the final layer in the model.
-`tf.layers` provides the `tf.layers.dense` function for constructing fully
-connected layers. The activation is controlled by the `activation` argument.
-Some options to pass to the `activation` argument are:
-
-*   `tf.nn.relu`. The following code creates a layer of `units` nodes fully
-    connected to the previous layer `input_layer` with a
-    [ReLU activation function](https://en.wikipedia.org/wiki/Rectifier_\(neural_networks\))
-    (@{tf.nn.relu}):
-
-    ```python
-    hidden_layer = tf.layers.dense(
-        inputs=input_layer, units=10, activation=tf.nn.relu)
-    ```
-
-*   `tf.nn.relu6`. The following code creates a layer of `units` nodes fully
-    connected to the previous layer `hidden_layer` with a ReLU 6 activation
-    function (@{tf.nn.relu6}):
-
-    ```python
-    second_hidden_layer = tf.layers.dense(
-        inputs=hidden_layer, units=20, activation=tf.nn.relu)
-    ```
-
-*   `None`. The following code creates a layer of `units` nodes fully connected
-    to the previous layer `second_hidden_layer` with *no* activation function,
-    just a linear transformation:
-
-    ```python
-    output_layer = tf.layers.dense(
-        inputs=second_hidden_layer, units=3, activation=None)
-    ```
-
-Other activation functions are possible, e.g.:
-
-```python
-output_layer = tf.layers.dense(inputs=second_hidden_layer,
-                               units=10,
-                               activation_fn=tf.sigmoid)
-```
-
-The above code creates the neural network layer `output_layer`, which is fully
-connected to `second_hidden_layer` with a sigmoid activation function
-(@{tf.sigmoid}). For a list of predefined
-activation functions available in TensorFlow, see the @{$python/nn#activation_functions$API docs}.
-
-Putting it all together, the following code constructs a full neural network for
-the abalone predictor, and captures its predictions:
-
-```python
-def model_fn(features, labels, mode, params):
-  """Model function for Estimator."""
-
-  # Connect the first hidden layer to input layer
-  # (features["x"]) with relu activation
-  first_hidden_layer = tf.layers.dense(features["x"], 10, activation=tf.nn.relu)
-
-  # Connect the second hidden layer to first hidden layer with relu
-  second_hidden_layer = tf.layers.dense(
-      first_hidden_layer, 10, activation=tf.nn.relu)
-
-  # Connect the output layer to second hidden layer (no activation fn)
-  output_layer = tf.layers.dense(second_hidden_layer, 1)
-
-  # Reshape output layer to 1-dim Tensor to return predictions
-  predictions = tf.reshape(output_layer, [-1])
-  predictions_dict = {"ages": predictions}
-  ...
-```
-
-Here, because you'll be passing the abalone `Datasets` using `numpy_input_fn`
-as shown below, `features` is a dict `{"x": data_tensor}`, so
-`features["x"]` is the input layer. The network contains two hidden
-layers, each with 10 nodes and a ReLU activation function. The output layer
-contains no activation function, and is
-@{tf.reshape} to a one-dimensional
-tensor to capture the model's predictions, which are stored in
-`predictions_dict`.
-
-### Defining loss for the model {#defining-loss}
-
-The `EstimatorSpec` returned by the `model_fn` must contain `loss`: a `Tensor`
-representing the loss value, which quantifies how well the model's predictions
-reflect the label values during training and evaluation runs. The @{tf.losses}
-module provides convenience functions for calculating loss using a variety of
-metrics, including:
-
-*   `absolute_difference(labels, predictions)`. Calculates loss using the
-    [absolute-difference
-    formula](https://en.wikipedia.org/wiki/Deviation_\(statistics\)#Unsigned_or_absolute_deviation)
-    (also known as L<sub>1</sub> loss).
-
-*   `log_loss(labels, predictions)`. Calculates loss using the [logistic loss
-    forumula](https://en.wikipedia.org/wiki/Loss_functions_for_classification#Logistic_loss)
-    (typically used in logistic regression).
-
-*   `mean_squared_error(labels, predictions)`. Calculates loss using the [mean
-    squared error](https://en.wikipedia.org/wiki/Mean_squared_error) (MSE; also
-    known as L<sub>2</sub> loss).
-
-The following example adds a definition for `loss` to the abalone `model_fn`
-using `mean_squared_error()` (in bold):
-
-<pre class="prettyprint"><code class="lang-python">def model_fn(features, labels, mode, params):
-  """Model function for Estimator."""
-
-  # Connect the first hidden layer to input layer
-  # (features["x"]) with relu activation
-  first_hidden_layer = tf.layers.dense(features["x"], 10, activation=tf.nn.relu)
-
-  # Connect the second hidden layer to first hidden layer with relu
-  second_hidden_layer = tf.layers.dense(
-      first_hidden_layer, 10, activation=tf.nn.relu)
-
-  # Connect the output layer to second hidden layer (no activation fn)
-  output_layer = tf.layers.dense(second_hidden_layer, 1)
-
-  # Reshape output layer to 1-dim Tensor to return predictions
-  predictions = tf.reshape(output_layer, [-1])
-  predictions_dict = {"ages": predictions}
-
-
-  <strong># Calculate loss using mean squared error
-  loss = tf.losses.mean_squared_error(labels, predictions)</strong>
-  ...</code></pre>
-
-See the @{$python/contrib.losses$API guide} for a
-full list of loss functions and more details on supported arguments and usage.
-
-Supplementary metrics for evaluation can be added to an `eval_metric_ops` dict.
-The following code defines an `rmse` metric, which calculates the root mean
-squared error for the model predictions. Note that the `labels` tensor is cast
-to a `float64` type to match the data type of the `predictions` tensor, which
-will contain real values:
-
-```python
-eval_metric_ops = {
-    "rmse": tf.metrics.root_mean_squared_error(
-        tf.cast(labels, tf.float64), predictions)
-}
-```
-
-### Defining the training op for the model
-
-The training op defines the optimization algorithm TensorFlow will use when
-fitting the model to the training data. Typically when training, the goal is to
-minimize loss. A simple way to create the training op is to instantiate a
-`tf.train.Optimizer` subclass and call the `minimize` method.
-
-The following code defines a training op for the abalone `model_fn` using the
-loss value calculated in [Defining Loss for the Model](#defining-loss), the
-learning rate passed to the function in `params`, and the gradient descent
-optimizer. For `global_step`, the convenience function
-@{tf.train.get_global_step} takes care of generating an integer variable:
-
-```python
-optimizer = tf.train.GradientDescentOptimizer(
-    learning_rate=params["learning_rate"])
-train_op = optimizer.minimize(
-    loss=loss, global_step=tf.train.get_global_step())
-```
-
-For a full list of optimizers, and other details, see the
-@{$python/train#optimizers$API guide}.
-
-### The complete abalone `model_fn`
-
-Here's the final, complete `model_fn` for the abalone age predictor. The
-following code configures the neural network; defines loss and the training op;
-and returns a `EstimatorSpec` object containing `mode`, `predictions_dict`, `loss`,
-and `train_op`:
-
-```python
-def model_fn(features, labels, mode, params):
-  """Model function for Estimator."""
-
-  # Connect the first hidden layer to input layer
-  # (features["x"]) with relu activation
-  first_hidden_layer = tf.layers.dense(features["x"], 10, activation=tf.nn.relu)
-
-  # Connect the second hidden layer to first hidden layer with relu
-  second_hidden_layer = tf.layers.dense(
-      first_hidden_layer, 10, activation=tf.nn.relu)
-
-  # Connect the output layer to second hidden layer (no activation fn)
-  output_layer = tf.layers.dense(second_hidden_layer, 1)
-
-  # Reshape output layer to 1-dim Tensor to return predictions
-  predictions = tf.reshape(output_layer, [-1])
-
-  # Provide an estimator spec for `ModeKeys.PREDICT`.
-  if mode == tf.estimator.ModeKeys.PREDICT:
-    return tf.estimator.EstimatorSpec(
-        mode=mode,
-        predictions={"ages": predictions})
-
-  # Calculate loss using mean squared error
-  loss = tf.losses.mean_squared_error(labels, predictions)
-
-  # Calculate root mean squared error as additional eval metric
-  eval_metric_ops = {
-      "rmse": tf.metrics.root_mean_squared_error(
-          tf.cast(labels, tf.float64), predictions)
-  }
-
-  optimizer = tf.train.GradientDescentOptimizer(
-      learning_rate=params["learning_rate"])
-  train_op = optimizer.minimize(
-      loss=loss, global_step=tf.train.get_global_step())
-
-  # Provide an estimator spec for `ModeKeys.EVAL` and `ModeKeys.TRAIN` modes.
-  return tf.estimator.EstimatorSpec(
-      mode=mode,
-      loss=loss,
-      train_op=train_op,
-      eval_metric_ops=eval_metric_ops)
-```
-
-## Running the Abalone Model
-
-You've instantiated an `Estimator` for the abalone predictor and defined its
-behavior in `model_fn`; all that's left to do is train, evaluate, and make
-predictions.
-
-Add the following code to the end of `main()` to fit the neural network to the
-training data and evaluate accuracy:
-
-```python
-train_input_fn = tf.estimator.inputs.numpy_input_fn(
-    x={"x": np.array(training_set.data)},
-    y=np.array(training_set.target),
-    num_epochs=None,
-    shuffle=True)
-
-# Train
-nn.train(input_fn=train_input_fn, steps=5000)
-
-# Score accuracy
-test_input_fn = tf.estimator.inputs.numpy_input_fn(
-    x={"x": np.array(test_set.data)},
-    y=np.array(test_set.target),
-    num_epochs=1,
-    shuffle=False)
-
-ev = nn.evaluate(input_fn=test_input_fn)
-print("Loss: %s" % ev["loss"])
-print("Root Mean Squared Error: %s" % ev["rmse"])
-```
-
-Note: The above code uses input functions to feed feature (`x`) and label (`y`)
-`Tensor`s into the model for both training (`train_input_fn`) and evaluation
-(`test_input_fn`). To learn more about input functions, see the tutorial
-@{$input_fn$Building Input Functions with tf.estimator}.
-
-Then run the code. You should see output like the following:
-
-```none
-...
-INFO:tensorflow:loss = 4.86658, step = 4701
-INFO:tensorflow:loss = 4.86191, step = 4801
-INFO:tensorflow:loss = 4.85788, step = 4901
-...
-INFO:tensorflow:Saving evaluation summary for 5000 step: loss = 5.581
-Loss: 5.581
-```
-
-The loss score reported is the mean squared error returned from the `model_fn`
-when run on the `ABALONE_TEST` data set.
-
-To predict ages for the `ABALONE_PREDICT` data set, add the following to
-`main()`:
-
-```python
-# Print out predictions
-predict_input_fn = tf.estimator.inputs.numpy_input_fn(
-    x={"x": prediction_set.data},
-    num_epochs=1,
-    shuffle=False)
-predictions = nn.predict(input_fn=predict_input_fn)
-for i, p in enumerate(predictions):
-  print("Prediction %s: %s" % (i + 1, p["ages"]))
-```
-
-Here, the `predict()` function returns results in `predictions` as an iterable.
-The `for` loop enumerates and prints out the results. Rerun the code, and you
-should see output similar to the following:
-
-```python
-...
-Prediction 1: 4.92229
-Prediction 2: 10.3225
-Prediction 3: 7.384
-Prediction 4: 10.6264
-Prediction 5: 11.0862
-Prediction 6: 9.39239
-Prediction 7: 11.1289
-```
-
-## Additional Resources
-
-Congrats! You've successfully built a tf.estimator `Estimator` from scratch.
-For additional reference materials on building `Estimator`s, see the following
-sections of the API guides:
-
-*   @{$python/contrib.layers$Layers}
-*   @{$python/contrib.losses$Losses}
-*   @{$python/contrib.layers#optimization$Optimization}
diff --git a/tensorflow/docs_src/extend/index.md b/tensorflow/docs_src/extend/index.md
index 00b168c6be96a158c3be69fbcefbf941c0fbbe4d..bdff60b39ec6fe939273a529ec4e46407cface8a 100644
--- a/tensorflow/docs_src/extend/index.md
+++ b/tensorflow/docs_src/extend/index.md
@@ -14,9 +14,6 @@ TensorFlow:
     add support for your own shared or distributed filesystem.
   * @{$new_data_formats$Custom Data Readers}, which details how to add support
     for your own file and record formats.
-  * @{$extend/estimators$Creating Estimators in tf.contrib.learn}, which explains how
-    to write your own custom Estimator.  For example, you could build your
-    own Estimator to implement some variation on standard linear regression.
 
 Python is currently the only language supported by TensorFlow's API stability
 promises.  However, TensorFlow also provides functionality in C++, Java, and Go,
diff --git a/tensorflow/docs_src/extend/leftnav_files b/tensorflow/docs_src/extend/leftnav_files
index 8dbb54f6f63e26e3af725fe55a4d7b2b5ba3cd5d..12315b711b6d1c74bd3b5a5195f6c5c995d2d63f 100644
--- a/tensorflow/docs_src/extend/leftnav_files
+++ b/tensorflow/docs_src/extend/leftnav_files
@@ -3,6 +3,5 @@ architecture.md
 adding_an_op.md
 add_filesys.md
 new_data_formats.md
-estimators.md
 language_bindings.md
 tool_developers/index.md
diff --git a/tensorflow/docs_src/get_started/checkpoints.md b/tensorflow/docs_src/get_started/checkpoints.md
new file mode 100644
index 0000000000000000000000000000000000000000..680e1c0d3f58166a4f6b352816914f5220d84996
--- /dev/null
+++ b/tensorflow/docs_src/get_started/checkpoints.md
@@ -0,0 +1,238 @@
+# Checkpoints
+
+This document examines how to save and restore TensorFlow models built with
+Estimators. TensorFlow provides two model formats:
+
+*   checkpoints, which is a format dependent on the code that created
+    the model.
+*   SavedModel, which is a format independent of the code that created
+    the model.
+
+This document focuses on checkpoints. For details on SavedModel, see the
+@{$saved_model$Saving and Restoring} chapter of the
+*TensorFlow Programmer's Guide*.
+
+
+## Sample code
+
+This document relies on the same
+[https://github.com/tensorflow/models/blob/master/samples/core/get_started/premade_estimator.py](Iris classification example) detailed in @{$premade_estimators$Getting Started with TensorFlow}.
+To download and access the example, invoke the following two commands:
+
+```shell
+git clone https://github.com/tensorflow/models/
+cd models/samples/core/get_started
+```
+
+Most of the code snippets in this document are minor variations
+on `premade_estimator.py`.
+
+
+## Saving partially-trained models
+
+Estimators automatically write the following to disk:
+
+*   **checkpoints**, which are versions of the model created during training.
+*   **event files**, which contain information that
+    [TensorBoard](https://developers.google.com/machine-learning/glossary/#TensorBoard)
+    uses to create visualizations.
+
+To specify the top-level directory in which the Estimator stores its
+information, assign a value to the optional `model_dir` argument of any
+Estimator's constructor.  For example, the following code sets the `model_dir`
+argument to the `models/iris` directory:
+
+```python
+classifier = tf.estimator.DNNClassifier(
+    feature_columns=my_feature_columns,
+    hidden_units=[10, 10],
+    n_classes=3,
+    model_dir='models/iris')
+```
+
+Suppose you call the Estimator's `train` method. For example:
+
+
+```python
+classifier.train(
+        input_fn=lambda:train_input_fn(train_x, train_y, batch_size=100),
+                steps=200)
+```
+
+As suggested by the following diagrams, the first call to `train`
+adds checkpoints and other files to the `model_dir` directory:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/first_train_calls.png">
+</div>
+<div style="text-align: center">
+The first call to train().
+</div>
+
+
+To see the objects in the created `model_dir` directory on a
+UNIX-based system, just call `ls` as follows:
+
+```none
+$ ls -1 models/iris
+checkpoint
+events.out.tfevents.timestamp.hostname
+graph.pbtxt
+model.ckpt-1.data-00000-of-00001
+model.ckpt-1.index
+model.ckpt-1.meta
+model.ckpt-200.data-00000-of-00001
+model.ckpt-200.index
+model.ckpt-200.meta
+```
+
+The preceding `ls` command shows that the Estimator created checkpoints
+at steps 1 (the start of training) and 200 (the end of training).
+
+
+### Default checkpoint directory
+
+If you don't specify `model_dir` in an Estimator's constructor, the Estimator
+writes checkpoint files to a temporary directory chosen by Python's
+[tempfile.mkdtemp](https://docs.python.org/3/library/tempfile.html#tempfile.mkdtemp)
+function. For example, the following Estimator constructor does *not* specify
+the `model_dir` argument:
+
+```python
+classifier = tf.estimator.DNNClassifier(
+    feature_columns=my_feature_columns,
+    hidden_units=[10, 10],
+    n_classes=3)
+
+print(classifier.model_dir)
+```
+
+The `tempfile.mkdtemp` function picks a secure, temporary directory
+appropriate for your operating system. For example, a typical temporary
+directory on macOS might be something like the following:
+
+```None
+/var/folders/0s/5q9kfzfj3gx2knj0vj8p68yc00dhcr/T/tmpYm1Rwa
+```
+
+### Checkpointing Frequency
+
+By default, the Estimator saves
+[checkpoints](https://developers.google.com/machine-learning/glossary/#checkpoint)
+in the `model_dir` according to the following schedule:
+
+*   Writes a checkpoint every 10 minutes (600 seconds).
+*   Writes a checkpoint when the `train` method starts (first iteration)
+    and completes (final iteration).
+*   Retains only the 5 most recent checkpoints in the directory.
+
+You may alter the default schedule by taking the following steps:
+
+1.  Create a @{tf.estimator.RunConfig$`RunConfig`} object that defines the
+    desired schedule.
+2.  When instantiating the Estimator, pass that `RunConfig` object to the
+    Estimator's `config` argument.
+
+For example, the following code changes the checkpointing schedule to every
+20 minutes and retains the 10 most recent checkpoints:
+
+```python
+my_checkpointing_config = tf.estimator.RunConfig(
+    save_checkpoints_secs = 20*60,  # Save checkpoints every 20 minutes.
+    keep_checkpoint_max = 10,       # Retain the 10 most recent checkpoints.
+)
+
+classifier = tf.estimator.DNNClassifier(
+    feature_columns=my_feature_columns,
+    hidden_units=[10, 10],
+    n_classes=3,
+    model_dir='models/iris',
+    config=my_checkpointing_config)
+```
+
+## Restoring your model
+
+The first time you call an Estimator's `train` method, TensorFlow saves a
+checkpoint to the `model_dir`. Each subsequent call to the Estimator's
+`train`, `eval`, or `predict` method causes the following:
+
+1.  The Estimator builds the model's
+    [graph](https://developers.google.com/machine-learning/glossary/#graph)
+    by running the `model_fn()`.  (For details on the `model_fn()`, see
+    @{$custom_estimators$Creating Custom Estimators.})
+2.  The Estimator initializes the weights of the new model from the data
+    stored in the most recent checkpoint.
+
+In other words, as the following illustration suggests, once checkpoints
+exist, TensorFlow rebuilds the model each time you call `train()`,
+`evaluate()`, or `predict()`.
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/subsequent_calls.png">
+</div>
+<div style="text-align: center">
+Subsequent calls to train(), evaluate(), or predict()
+</div>
+
+
+### Avoiding a bad restoration
+
+Restoring a model's state from a checkpoint only works if the model
+and checkpoint are compatible.  For example, suppose you trained a
+`DNNClassifier` Estimator containing two hidden layers,
+each having 10 nodes:
+
+```python
+classifier = tf.estimator.DNNClassifier(
+    feature_columns=feature_columns,
+    hidden_units=[10, 10],
+    n_classes=3,
+    model_dir='models/iris')
+
+classifier.train(
+    input_fn=lambda:train_input_fn(train_x, train_y, batch_size=100),
+        steps=200)
+```
+
+After training (and, therefore, after creating checkpoints in `models/iris`),
+imagine that you changed the number of neurons in each hidden layer from 10 to
+20 and then attempted to retrain the model:
+
+``` python
+classifier2 = tf.estimator.DNNClassifier(
+    feature_columns=my_feature_columns,
+    hidden_units=[20, 20],  # Change the number of neurons in the model.
+    n_classes=3,
+    model_dir='models/iris')
+
+classifier.train(
+    input_fn=lambda:train_input_fn(train_x, train_y, batch_size=100),
+        steps=200)
+```
+
+Since the state in the checkpoint is incompatible with the model described
+in `classifier2`, retraining fails with the following error:
+
+```None
+...
+InvalidArgumentError (see above for traceback): tensor_name =
+dnn/hiddenlayer_1/bias/t_0/Adagrad; shape in shape_and_slice spec [10]
+does not match the shape stored in checkpoint: [20]
+```
+
+To run experiments in which you train and compare slightly different
+versions of a model, save a copy of the code that created each
+`model-dir`, possibly by creating a separate git branch for each version.
+This separation will keep your checkpoints recoverable.
+
+## Summary
+
+Checkpoints provide an easy automatic mechanism for saving and restoring
+models created by Estimators.
+
+See the @{$saved_model$Saving and Restoring}
+chapter of the *TensorFlow Programmer's Guide* for details on:
+
+*   Saving and restoring models using low-level TensorFlow APIs.
+*   Exporting and importing models in the SavedModel format, which is a
+    language-neutral, recoverable, serialization format.
diff --git a/tensorflow/docs_src/get_started/custom_estimators.md b/tensorflow/docs_src/get_started/custom_estimators.md
new file mode 100644
index 0000000000000000000000000000000000000000..6343cc4ee454c7242b98497a37e9852b4e9873ae
--- /dev/null
+++ b/tensorflow/docs_src/get_started/custom_estimators.md
@@ -0,0 +1,602 @@
+
+# Creating Custom Estimators
+
+This document introduces custom Estimators. In particular, this document
+demonstrates how to create a custom @{tf.estimator.Estimator$Estimator} that
+mimics the behavior of the pre-made Estimator
+@{tf.estimator.DNNClassifier$`DNNClassifier`} in solving the Iris problem. See
+the @{$get_started/premade_estimators$Pre-Made Estimators chapter} for details
+on the Iris problem.
+
+To download and access the example code invoke the following two commands:
+
+```shell
+git clone https://github.com/tensorflow/models/
+cd models/samples/core/get_started
+```
+
+In this document we wil be looking at
+[`custom_estimator.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/custom_estimator.py).
+You can run it with the following command:
+
+```bsh
+python custom_estimator.py
+```
+
+If you are feeling impatient, feel free to compare and contrast
+[`custom_estimator.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/custom_estimator.py)
+with
+[`premade_estimator.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/premade_estimator.py).
+(which is in the same directory).
+
+
+
+## Pre-made vs. custom
+
+As the following figure shows, pre-made Estimators are subclasses of the
+@{tf.estimator.Estimator} base class, while custom Estimators are an instance
+of tf.estimator.Estimator:
+
+<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="display:block; margin: 0 auto"
+  alt="Premade estimators are sub-classes of `Estimator`. Custom Estimators are usually (direct) instances of `Estimator`"
+  src="../images/custom_estimators/estimator_types.png">
+</div>
+<div style="text-align: center">
+Pre-made and custom Estimators are all Estimators.
+</div>
+
+Pre-made Estimators are fully baked. Sometimes though, you need more control
+over an Estimator's behavior.  That's where custom Estimators come in. You can
+create a custom Estimator to do just about anything. If you want hidden layers
+connected in some unusual fashion, write a custom Estimator. If you want to
+calculate a unique
+[metric](https://developers.google.com/machine-learning/glossary/#metric)
+for your model, write a custom Estimator.  Basically, if you want an Estimator
+optimized for your specific problem, write a custom Estimator.
+
+A model function (or `model_fn`) implements the ML algorithm. The
+only difference between working with pre-made Estimators and custom Estimators
+is:
+
+* With pre-made Estimators, someone already wrote the model function for you.
+* With custom Estimators, you must write the model function.
+
+Your model function could implement a wide range of algorithms, defining all
+sorts of hidden layers and metrics.  Like input functions, all model functions
+must accept a standard group of input parameters and return a standard group of
+output values. Just as input functions can leverage the Dataset API, model
+functions can leverage the Layers API and the Metrics API.
+
+Let's see how to solve the Iris problem with a custom Estimator. A quick
+reminder--here's the organization of the Iris model that we're trying to mimic:
+
+<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="display:block; margin: 0 auto"
+  alt="A diagram of the network architecture: Inputs, 2 hidden layers, and outputs"
+  src="../images/custom_estimators/full_network.png">
+</div>
+<div style="text-align: center">
+Our implementation of Iris contains four features, two hidden layers,
+and a logits output layer.
+</div>
+
+## Write an Input function
+
+Our custom Estimator implementation uses the same input function as our
+@{$get_started/premade_estimators$pre-made Estimator implementation}, from
+[`iris_data.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/iris_data.py).
+Namely:
+
+```python
+def train_input_fn(features, labels, batch_size):
+    """An input function for training"""
+    # Convert the inputs to a Dataset.
+    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
+
+    # Shuffle, repeat, and batch the examples.
+    dataset = dataset.shuffle(1000).repeat().batch(batch_size)
+
+    # Return the read end of the pipeline.
+    return dataset.make_one_shot_iterator().get_next()
+```
+
+This input function builds an input pipeline that yields batches of
+`(features, labels)` pairs, where `features` is a dictionary features.
+
+## Create feature columns
+
+As detailed in the @{$get_started/premade_estimators$Premade Estimators} and
+@{$get_started/feature_columns$Feature Columns} chapters, you must define
+your model's feature columns to specify how the model should use each feature.
+Whether working with pre-made Estimators or custom Estimators, you define
+feature columns in the same fashion.
+
+The following code creates a simple `numeric_column` for each input feature,
+indicating that the value of the input feature should be used directly as an
+input to the model:
+
+```python
+# Feature columns describe how to use the input.
+my_feature_columns = []
+for key in train_x.keys():
+    my_feature_columns.append(tf.feature_column.numeric_column(key=key))
+```
+
+## Write a model function
+
+The model function we'll use has the following call signature:
+
+```python
+def my_model_fn(
+   features, # This is batch_features from input_fn
+   labels,   # This is batch_labels from input_fn
+   mode,     # An instance of tf.estimator.ModeKeys
+   params):  # Additional configuration
+```
+
+The first two arguments are the batches of features and labels returned from
+the input function; that is, `features` and `labels` are the handles to the
+data your model will use. The `mode` argument indicates whether the caller is
+requesting training, predicting, or evaluation.
+
+The caller may pass `params` to an Estimator's constructor. Any `params` passed
+to the constructor are in turn passed on to the `model_fn`. In
+[`custom_estimator.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/custom_estimator.py)
+the following lines create the estimator and set the params to configure the
+model. This configuration step is similar to how we configured the @{tf.estimator.DNNClassifier} in
+@{$get_started/premade_estimators}.
+
+```python
+classifier = tf.estimator.Estimator(
+    model_fn=my_model,
+    params={
+        'feature_columns': my_feature_columns,
+        # Two hidden layers of 10 nodes each.
+        'hidden_units': [10, 10],
+        # The model must choose between 3 classes.
+        'n_classes': 3,
+    })
+```
+
+To implement a typical model function, you must do the following:
+
+* (Define the model)[#define_the_model].
+* Specify additional calculations for each of
+  the [three different modes](#modes):
+  * [Predict](#predict)
+  * [Evaluate](#evaluate)
+  * [Train](#train)
+
+## Define the model
+
+The basic deep neural network model must define the following three sections:
+
+* An [input layer](https://developers.google.com/machine-learning/glossary/#input_layer)
+* One or more [hidden layers](https://developers.google.com/machine-learning/glossary/#hidden_layer)
+* An [output layer](https://developers.google.com/machine-learning/glossary/#output_layer)
+
+### Define the input layer
+
+The first line of the `model_fn` calls @{tf.feature_column.input_layer} to
+convert the feature dictionary and `feature_columns` into input for your model,
+as follows:
+
+```python
+    # Use `input_layer` to apply the feature columns.
+    net = tf.feature_column.input_layer(features, params['feature_columns'])
+```
+
+The preceding line applies the transformations defined by your feature columns,
+creating the model's input layer.
+
+<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="display:block; margin: 0 auto"
+  alt="A diagram of the input layer, in this case a 1:1 mapping from raw-inputs to features."
+  src="../images/custom_estimators/input_layer.png">
+</div>
+
+
+### Hidden Layers
+
+If you are creating a deep neural network, you must define one or more hidden
+layers. The Layers API provides a rich set of functions to define all types of
+hidden layers, including convolutional, pooling, and dropout layers. For Iris,
+we're simply going to call @{tf.layers.dense} to create hidden layers, with
+dimensions defined by `params['hidden_layers']`. In a `dense` layer each node
+is connected to every node in the preceding layer.  Here's the relevant code:
+
+``` python
+    # Build the hidden layers, sized according to the 'hidden_units' param.
+    for units in params['hidden_units']:
+        net = tf.layers.dense(net, units=units, activation=tf.nn.relu)
+```
+
+* The `units` parameter defines the number of output neurons in a given layer.
+* The `activation` parameter defines the [activation function](https://developers.google.com/machine-learning/glossary/#a) —
+  [Relu](https://developers.google.com/machine-learning/glossary/#ReLU) in this
+  case.
+
+The variable `net` here signifies the current top layer of the network. During
+the first iteration, `net` signifies the input layer. On each loop iteration
+`tf.layers.dense` creates a new layer, which takes the previous layer's output
+as its input, using the variable `net`.
+
+After creating two hidden layers, our network looks as follows. For
+simplicity, the figure does not show all the units in each layer.
+
+<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="display:block; margin: 0 auto"
+  alt="The input layer with two hidden layers added."
+  src="../images/custom_estimators/add_hidden_layer.png">
+</div>
+
+Note that @{tf.layers.dense} provides many additional capabilities, including
+the ability to set a multitude of regularization parameters. For the sake of
+simplicity, though, we're going to simply accept the default values of the
+other parameters.
+
+### Output Layer
+
+We'll define the output layer by calling @{tf.layers.dense} yet again, this
+time without an activation function:
+
+```python
+    # Compute logits (1 per class).
+    logits = tf.layers.dense(net, params['n_classes'], activation=None)
+```
+
+Here, `net` signifies the final hidden layer. Therefore, the full set of layers
+is now connected as follows:
+
+<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="display:block; margin: 0 auto"
+  alt="A logit output layer connected to the top hidden layer"
+  src="../images/custom_estimators/add_logits.png">
+</div>
+<div style="text-align: center">
+The final hidden layer feeds into the output layer.
+</div>
+
+When defining an output layer, the `units` parameter specifies the number of
+outputs. So, by setting `units` to `params['n_classes']`, the model produces
+one output value per class. Each element of the output vector will contain the
+score, or "logit", calculated for the associated class of Iris: Setosa,
+Versicolor, or Virginica, respectively.
+
+Later on, these logits will be transformed into probabilities by the
+@{tf.nn.softmax} function.
+
+## Implement training, evaluation, and prediction {#modes}
+
+The final step in creating a model function is to write branching code that
+implements prediction, evaluation, and training.
+
+The model function gets invoked whenever someone calls the Estimator's `train`,
+`evaluate`, or `predict` methods. Recall that the signature for the model
+function looks like this:
+
+``` python
+def my_model_fn(
+   features, # This is batch_features from input_fn
+   labels,   # This is batch_labels from input_fn
+   mode,     # An instance of tf.estimator.ModeKeys, see below
+   params):  # Additional configuration
+```
+
+Focus on that third argument, mode. As the following table shows, when someone
+calls `train`, `evaluate`, or `predict`, the Estimator framework invokes your model
+function with the mode parameter set as follows:
+
+| Estimator method                 |    Estimator Mode |
+|:---------------------------------|:------------------|
+|@{tf.estimator.Estimator.train$`train()`} |@{tf.estimator.ModeKeys.TRAIN$`ModeKeys.TRAIN`} |
+|@{tf.estimator.Estimator.evaluate$`evaluate()`}  |@{tf.estimator.ModeKeys.EVAL$`ModeKeys.EVAL`}      |
+|@{tf.estimator.Estimator.predict$`predict()`}|@{tf.estimator.ModeKeys.PREDICT$`ModeKeys.PREDICT`} |
+
+For example, suppose you instantiate a custom Estimator to generate an object
+named `classifier`. Then, you make the following call:
+
+``` python
+classifier = tf.estimator.Estimator(...)
+classifier.train(input_fn=lambda: my_input_fn(FILE_TRAIN, True, 500))
+```
+The Estimator framework then calls your model function with mode set to
+`ModeKeys.TRAIN`.
+
+Your model function must provide code to handle all three of the mode values.
+For each mode value, your code must return an instance of
+`tf.estimator.EstimatorSpec`, which contains the information the caller
+requires. Let's examine each mode.
+
+### Predict
+
+When the Estimator's `predict` method is called, the `model_fn` receives
+`mode = ModeKeys.PREDICT`. In this case, the model function must return a
+`tf.estimator.EstimatorSpec` containing the prediction.
+
+The model must have been trained prior to making a prediction. The trained model
+is stored on disk in the `model_dir` directory established when you
+instantiated the Estimator.
+
+The code to generate the prediction for this model looks as follows:
+
+```python
+# Compute predictions.
+predicted_classes = tf.argmax(logits, 1)
+if mode == tf.estimator.ModeKeys.PREDICT:
+    predictions = {
+        'class_ids': predicted_classes[:, tf.newaxis],
+        'probabilities': tf.nn.softmax(logits),
+        'logits': logits,
+    }
+    return tf.estimator.EstimatorSpec(mode, predictions=predictions)
+```
+The prediction dictionary contains everything that your model returns when run
+in prediction mode.
+
+<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="display:block; margin: 0 auto"
+  alt="Additional outputs added to the output layer."
+  src="../images/custom_estimators/add_predictions.png">
+</div>
+
+The `predictions` holds the following three key/value pairs:
+
+*   `class_ids` holds the class id (0, 1, or 2) representing the model's
+    prediction of the most likely species for this example.
+*   `probabilities` holds the three probabilities (in this example, 0.02, 0.95,
+    and 0.03)
+*   `logit` holds the raw logit values (in this example, -1.3, 2.6, and -0.9)
+
+We return that dictionary to the caller via the `predictions` parameter of the
+@{tf.estimator.EstimatorSpec}. The Estimator's
+@{tf.estimator.Estimator.predict$`predict`} method will yield these
+dictionaries.
+
+### Calculate the loss
+
+For both [training](#train) and [evaluation](#evaluate) we need to calculate the
+model's loss. This is the
+[objective](https://developers.google.com/machine-learning/glossary/#objective)
+that will be optimized.
+
+We can calculate the loss by calling @{tf.losses.sparse_softmax_cross_entropy}.
+The value returned by this function will be lowest, approximately 0,
+probability of the correct class (at index `label`) is near 1.0. The loss value
+returned is progressively larger as the probability of the correct class
+decreases.
+
+This function returns the average over the whole batch.
+
+```python
+# Compute loss.
+loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
+```
+
+### Evaluate
+
+When the Estimator's `evaluate` method is called, the `model_fn` receives
+`mode = ModeKeys.EVAL`. In this case, the model function must return a
+`tf.estimator.EstimatorSpec` containing the model's loss and optionally one
+or more metrics.
+
+Although returning metrics is optional, most custom Estimators do return at
+least one metric. TensorFlow provides a Metrics module @{tf.metrics} to
+calculate common metrics.  For brevity's sake, we'll only return accuracy. The
+@{tf.metrics.accuracy} function compares our predictions against the
+true values, that is, against the labels provided by the input function. The
+@{tf.metrics.accuracy} function requires the labels and predictions to have the
+same shape. Here's the call to @{tf.metrics.accuracy}:
+
+``` python
+# Compute evaluation metrics.
+accuracy = tf.metrics.accuracy(labels=labels,
+                               predictions=predicted_classes,
+                               name='acc_op')
+```
+
+The @{tf.estimator.EstimatorSpec$`EstimatorSpec`} returned for evaluation
+typically contains the following information:
+
+* `loss`, which is the model's loss
+* `eval_metric_ops`, which is an optional dictionary of metrics.
+
+So, we'll create a dictionary containing our sole metric. If we had calculated
+other metrics, we would have added them as additional key/value pairs to that
+same dictionary.  Then, we'll pass that dictionary in the `eval_metric_ops`
+argument of `tf.estimator.EstimatorSpec`. Here's the code:
+
+```python
+metrics = {'accuracy': accuracy}
+tf.summary.scalar('accuracy', accuracy[1])
+
+if mode == tf.estimator.ModeKeys.EVAL:
+    return tf.estimator.EstimatorSpec(
+        mode, loss=loss, eval_metric_ops=metrics)
+```
+
+The @{tf.summary.scalar} will make accuracy available to TensorBoard
+in both `TRAIN` and `EVAL` modes. (More on this later).
+
+### Train
+
+When the Estimator's `train` method is called, the `model_fn` is called
+with `mode = ModeKeys.TRAIN`. In this case, the model function must return an
+`EstimatorSpec` that contains the loss and a training operation.
+
+Building the training operation will require an optimizer. We will use
+@{tf.train.AdagradOptimizer} because we're mimicking the `DNNClassifier`, which
+also uses `Adagrad` by default. The `tf.train` package provides many other
+optimizers—feel free to experiment with them.
+
+Here is the code that builds the optimizer:
+
+``` python
+optimizer = tf.train.AdagradOptimizer(learning_rate=0.1)
+```
+
+Next, we build the training operation using the optimizer's
+@{tf.train.Optimizer.minimize$`minimize`} method on the loss we calculated
+earlier.
+
+The `minimize` method also takes a `global_step` parameter. TensorFlow uses this
+parameter to count the number of training steps that have been processed
+(to know when to end a training run). Furthermore, the `global_step` is
+essential for TensorBoard graphs to work correctly. Simply call
+@{tf.train.get_global_step} and pass the result to the `global_step`
+argument of `minimize`.
+
+Here's the code to train the model:
+
+``` python
+train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
+```
+
+The @{tf.estimator.EstimatorSpec$`EstimatorSpec`} returned for training
+must have the following fields set:
+
+* `loss`, which contains the value of the loss function.
+* `train_op`, which executes a training step.
+
+Here's our code to call `EstimatorSpec`:
+
+```python
+return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
+```
+
+The model function is now complete.
+
+## The custom Estimator
+
+Instantiate the custom Estimator through the Estimator base class as follows:
+
+```python
+    # Build 2 hidden layer DNN with 10, 10 units respectively.
+    classifier = tf.estimator.Estimator(
+        model_fn=my_model,
+        params={
+            'feature_columns': my_feature_columns,
+            # Two hidden layers of 10 nodes each.
+            'hidden_units': [10, 10],
+            # The model must choose between 3 classes.
+            'n_classes': 3,
+        })
+```
+Here the `params` dictionary serves the same purpose as the key-word
+arguments of `DNNClassifier`; that is, the `params` dictionary lets you
+configure your Estimator without modifying the code in the `model_fn`.
+
+The rest of the code to train, evaluate, and generate predictions using our
+Estimator is the same as in the
+@{$get_started/premade_estimators$Premade Estimators} chapter. For
+example, the following line will train the model:
+
+```python
+# Train the Model.
+classifier.train(
+    input_fn=lambda:iris_data.train_input_fn(train_x, train_y, args.batch_size),
+    steps=args.train_steps)
+```
+
+## TensorBoard
+
+You can view training results for your custom Estimator in TensorBoard. To see
+this reporting, start TensorBoard from your command line as follows:
+
+```bsh
+# Replace PATH with the actual path passed as model_dir
+tensorboard --logdir=PATH
+```
+
+Then, open TensorBoard by browsing to: [http://localhost:6006](http://localhost:6006)
+
+All the pre-made Estimators automatically log a lot of information to
+TensorBoard. With custom Estimators, however, TensorBoard only provides one
+default log (a graph of the loss) plus the information you explicitly tell
+TensorBoard to log. For the custom Estimator you just created, TensorBoard
+generates the following:
+
+<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
+
+<img style="display:block; margin: 0 auto"
+  alt="Accuracy, 'scalar' graph from tensorboard"
+  src="../images/custom_estimators/accuracy.png">
+
+<img style="display:block; margin: 0 auto"
+  alt="loss 'scalar' graph from tensorboard"
+  src="../images/custom_estimators/loss.png">
+
+<img style="display:block; margin: 0 auto"
+  alt="steps/second 'scalar' graph from tensorboard"
+  src="../images/custom_estimators/steps_per_second.png">
+</div>
+
+<div style="text-align: center">
+TensorBoard displays three graphs.
+</div>
+
+
+In brief, here's what the three graphs tell you:
+
+* global_step/sec: A performance indicator showing how many batches (gradient
+  updates) we processed per second as the model trains.
+
+* loss: The loss reported.
+
+* accuracy: The accuracy is recorded by the following two lines:
+
+  * `eval_metric_ops={'my_accuracy': accuracy})`, during evaluation.
+  * `tf.summary.scalar('accuracy', accuracy[1])`, during training.
+
+These tensorboard graphs are one of the main reasons it's important to pass a
+`global_step` to your optimizer's `minimize` method. The model can't record
+the x-coordinate for these graphs without it.
+
+Note the following in the `my_accuracy` and `loss` graphs:
+
+* The orange line represents training.
+* The blue dot represents evaluation.
+
+During training, summaries (the orange line) are recorded periodically as
+batches are processed, which is why it becomes a graph spanning x-axis range.
+
+By contrast, evaluation produces only a single point on the graph for each call
+to `evaluate`. This point contains the average over the entire evaluation call.
+This has no width on the graph as it is evaluated entirely from the model state
+at a particular training step (from a single checkpoint).
+
+As suggested in the following figure, you may see and also selectively
+disable/enable the reporting using the controls on the left side.
+
+<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="display:block; margin: 0 auto"
+  alt="Check-boxes allowing the user to select which runs are shown."
+  src="../images/custom_estimators/select_run.jpg">
+</div>
+<div style="text-align: center">
+Enable or disable reporting.
+</div>
+
+
+## Summary
+
+Although pre-made Estimators can be an effective way to quickly create new
+models, you will often need the additional flexibility that custom Estimators
+provide. Fortunately, pre-made and custom Estimators follow the same
+programming model. The only practical difference is that you must write a model
+function for custom Estimators; everything else is the same.
+
+For more details, be sure to check out:
+
+* The
+  [official TensorFlow implementation of MNIST](https://github.com/tensorflow/models/tree/master/official/mnist),
+  which uses a custom estimator.
+* The TensorFlow
+  [official models repository](https://github.com/tensorflow/models/tree/master/official),
+  which contains more curated examples using custom estimators.
+* This [TensorBoard video](https://youtu.be/eBbEDRsCmv4), which introduces
+  TensorBoard.
+* The @{$low_level_intro$Low Level Introduction}, which demonstrates
+  how to experiment directly with TensorFlow's low level APIs, making debugging
+  easier.
diff --git a/tensorflow/docs_src/get_started/datasets_quickstart.md b/tensorflow/docs_src/get_started/datasets_quickstart.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecfbf160f0de2414f6cffa07d159a3e26733e3a6
--- /dev/null
+++ b/tensorflow/docs_src/get_started/datasets_quickstart.md
@@ -0,0 +1,402 @@
+# Datasets Quick Start
+
+The @{tf.data} module contains a collection of classes that allows you to
+easily load data, manipulate it, and pipe it into your model. This document
+introduces the API by walking through two simple examples:
+
+* Reading in-memory data from numpy arrays.
+* Reading lines from a csv file.
+
+<!-- TODO(markdaoust): Add links to an example reading from multiple-files
+(image_retraining), and a from_generator example. -->
+
+## Basic input
+
+Taking slices from an array is the simplest way to get started with `tf.data`.
+
+The @{$get_started/premade_estimators$Premade Estimators} chapter describes
+the following `train_input_fn`, from
+[`iris_data.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/iris_data.py),
+to pipe the data into the Estimator:
+
+``` python
+def train_input_fn(features, labels, batch_size):
+    """An input function for training"""
+    # Convert the inputs to a Dataset.
+    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
+
+    # Shuffle, repeat, and batch the examples.
+    dataset = dataset.shuffle(1000).repeat().batch(batch_size)
+
+    # Build the Iterator, and return the read end of the pipeline.
+    return dataset.make_one_shot_iterator().get_next()
+```
+
+Let's look at this more closely.
+
+### Arguments
+
+This function expects three arguments. Arguments expecting an "array" can
+accept nearly anything that can be converted to an array with `numpy.array`.
+One exception is
+[`tuple`](https://docs.python.org/3/tutorial/datastructures.html#tuples-and-sequences)
+which has special meaning for `Datasets`.
+
+* `features`: A `{'feature_name':array}` dictionary (or
+  [`DataFrame`](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html))
+  containing the raw input features.
+* `labels` : An array containing the
+  [label](https://developers.google.com/machine-learning/glossary/#label)
+  for each example.
+* `batch_size` : An integer indicating the desired batch size.
+
+In [`premade_estimator.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/premade_estimator.py)
+we retrieved the Iris data using the `iris_data.load_data()` function.
+You can run it, and unpack the results as follows:
+
+``` python
+import iris_data
+
+# Fetch the data
+train, test = iris_data.load_data()
+features, labels = train
+```
+
+Then we passed this data to the input function, with a line similar to this:
+
+``` python
+batch_size=100
+iris_data.train_input_fn(features, labels, batch_size)
+```
+
+Let's walk through the `train_input_fn()`.
+
+### Slices
+
+In the simplest cases, @{tf.data.Dataset.from_tensor_slices} function takes an
+array and returns a @{tf.data.Dataset} representing slices of the array. For
+example, an array containing the @{$tutorials/layers$mnist training data}
+has a shape of `(60000, 28, 28)`. Passing this to `from_tensor_slices` returns
+a `Dataset` object containing 60000 slices, each one a 28x28 image.
+
+The code that returns this `Dataset` is as follows:
+
+``` python
+train, test = tf.keras.datasets.mnist.load_data()
+mnist_x, mnist_y = train
+
+mnist_ds = tf.data.Dataset.from_tensor_slices(mnist_x)
+print(mnist_ds)
+```
+
+This will print the following line, showing the @{$programmers_guide/tensors#shapes$shapes} and @{$programmers_guide/tensors#data_types$types} of the items in
+the dataset. Note that the dataset does not know how many items it contains.
+
+``` None
+<TensorSliceDataset shapes: (28,28), types: tf.uint8>
+```
+
+The dataset above represents a collection of simple arrays, but datasets are
+much more powerful than this. Datasets transparently handle any nested
+combination of dictionaries or tuples. For example, ensuring that `features`
+is a standard dictionary, you can then convert the dictionary of arrays to
+a `Dataset` of dictionaries as follows:
+
+``` python
+dataset = tf.data.Dataset.from_tensor_slices(dict(features))
+print(dataset)
+```
+``` None
+<TensorSliceDataset
+
+  shapes: {
+    SepalLength: (), PetalWidth: (),
+    PetalLength: (), SepalWidth: ()},
+
+  types: {
+      SepalLength: tf.float64, PetalWidth: tf.float64,
+      PetalLength: tf.float64, SepalWidth: tf.float64}
+>
+```
+
+Here we see that when a `Dataset` contains structured elements, the `shapes`
+and `types` of the `Dataset` take on the same structure. This dataset contains
+dictionaries of @{$programmers_guide/tensors#rank$scalars}, all of type
+`tf.float64`.
+
+The first line of `train_input_fn` uses the same functionality, but adds
+another level of structure. It creates a dataset containing
+`(features, labels)` pairs.
+
+The following code shows that the label is a scalar with type `int64`:
+
+``` python
+# Convert the inputs to a Dataset.
+dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
+print(dataset)
+```
+```
+<TensorSliceDataset
+    shapes: (
+        {
+          SepalLength: (), PetalWidth: (),
+          PetalLength: (), SepalWidth: ()},
+        ()),
+
+    types: (
+        {
+          SepalLength: tf.float64, PetalWidth: tf.float64,
+          PetalLength: tf.float64, SepalWidth: tf.float64},
+        tf.int64)>
+```
+
+### Manipulation
+
+Currently the `Dataset` would iterate over the data once, in a fixed order, and
+only produce a single element at a time. It needs further processing before it
+can be used for training. Fortunately, the `tf.data.Dataset` class provides
+methods to better prepare the data for training. The next line of the input
+function takes advantage of several of these methods:
+
+``` python
+# Shuffle, repeat, and batch the examples.
+dataset = dataset.shuffle(1000).repeat().batch(batch_size)
+```
+
+The @{tf.data.Dataset.shuffle$`shuffle`} method uses a fixed-size buffer to
+shuffle the items as they pass through. Setting a `buffer_size` greater than
+the number of examples in the `Dataset` ensures that the data is completely
+shuffled. The Iris data set only contains 150 examples.
+
+The @{tf.data.Dataset.repeat$`repeat`} method has the `Dataset` restart when
+it reaches the end. To limit the number of epochss, set the `count` argument.
+
+The @{tf.data.Dataset.repeat$`batch`} method collects a number of examples and
+stacks them, to create batches. This adds a dimension to their shape. The new
+dimension is added as the first dimension. The following code uses
+the `batch` method on the MNIST `Dataset`, from earlier. This results in a
+`Dataset` containing 3D arrays representing stacks of `(28,28)` images:
+
+``` python
+print(mnist_ds.batch(100))
+```
+
+``` none
+<BatchDataset
+  shapes: (?, 28, 28),
+  types: tf.uint8>
+```
+Note that the dataset has an unknown batch size because the last batch will
+have fewer elements.
+
+In `train_input_fn`, after batching the `Dataset` contains 1D vectors of
+elements where each scalar was previously:
+
+```python
+print(dataset)
+```
+```
+<TensorSliceDataset
+    shapes: (
+        {
+          SepalLength: (?,), PetalWidth: (?,),
+          PetalLength: (?,), SepalWidth: (?,)},
+        (?,)),
+
+    types: (
+        {
+          SepalLength: tf.float64, PetalWidth: tf.float64,
+          PetalLength: tf.float64, SepalWidth: tf.float64},
+        tf.int64)>
+```
+
+
+### Return
+
+<!-- TODO(markdaoust) This line can be simplified to "return dataset" -->
+
+The `train`, `evaluate`, and `predict` methods of every Estimator require
+input functions to return a `(features, label)` pair containing
+@{$programmers_guide/tensors$tensorflow tensors}. The `train_input_fn` uses
+the following line to convert the Dataset into the expected format:
+
+```python
+# Build the Iterator, and return the read end of the pipeline.
+features_result, labels_result = dataset.make_one_shot_iterator().get_next()
+```
+
+The result is a structure of @{$programmers_guide/tensors$TensorFlow tensors},
+matching the layout of the items in the `Dataset`.
+For an introduction to what these objects are and how to work with them,
+see @{$programmers_guide/low_level_intro}.
+
+``` python
+print((features_result, labels_result))
+```
+
+```None
+({
+    'SepalLength': <tf.Tensor 'IteratorGetNext:2' shape=(?,) dtype=float64>,
+    'PetalWidth': <tf.Tensor 'IteratorGetNext:1' shape=(?,) dtype=float64>,
+    'PetalLength': <tf.Tensor 'IteratorGetNext:0' shape=(?,) dtype=float64>,
+    'SepalWidth': <tf.Tensor 'IteratorGetNext:3' shape=(?,) dtype=float64>},
+Tensor("IteratorGetNext_1:4", shape=(?,), dtype=int64))
+```
+
+## Reading a CSV File
+
+The most common real-world use case for the `Dataset` class is to stream data
+from files on disk. The @{tf.data} module includes a variety of
+file readers. Let's see how parsing the Iris dataset from the csv file looks
+using a `Dataset`.
+
+The following call to the `iris_data.maybe_download` function downloads the
+data if necessary, and returns the pathnames of the resulting files:
+
+``` python
+import iris_data
+train_path, test_path = iris_data.maybe_download()
+```
+
+The [`iris_data.csv_input_fn`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/iris_data.py)
+function contains an alternative implementation that parses the csv files using
+a `Dataset`.
+
+Let's look at how to build an Estimator-compatible input function that reads
+from the local files.
+
+### Build the `Dataset`
+
+We start by building a @{tf.data.TextLineDataset$`TextLineDataset`} object to
+read the file one line at a time. Then, we call the
+@{tf.data.Dataset.skip$`skip`} method to skip over the first line of the file, which contains a header, not an example:
+
+``` python
+ds = tf.data.TextLineDataset(train_path).skip(1)
+```
+
+### Build a csv line parser
+
+Ultimately we will need to parse each of the lines in the dataset, to
+produce the necessary `(features, label)` pairs.
+
+We will start by building a function to parse a single line.
+
+The following `iris_data.parse_line` function acomplishes this taks using the
+@{tf.decode_csv} function, and some simple python code:
+
+We must parse each of the lines in the dataset in order to generate the
+necessary `(features, label)` pairs. The following `_parse_line` function
+calls @{tf.decode_csv} to parse a single line into its features
+and the label. Since Estimators require that features be represented as a
+dictionary, we rely on Python's built-in `dict` and `zip` functions to build
+that dictionary.  The feature names are the keys of that dictionary.
+We then call the dictionary's `pop` method to remove the label field from
+the features dictionary:
+
+``` python
+# Metadata describing the text columns
+COLUMNS = ['SepalLength', 'SepalWidth',
+           'PetalLength', 'PetalWidth',
+           'label']
+FIELD_DEFAULTS = [[0.0], [0.0], [0.0], [0.0], [0]]
+def _parse_line(line):
+    # Decode the line into its fields
+    fields = tf.decode_csv(line, FIELD_DEFAULTS)
+
+    # Pack the result into a dictionary
+    features = dict(zip(COLUMNS,fields))
+
+    # Separate the label from the features
+    label = features.pop('label')
+
+    return features, label
+```
+
+### Parse the lines
+
+Datasets have many methods for manipulating the data while it is being piped
+to a model. The most heavily-used method is @{tf.data.Dataset.map$`map`}, which
+applies a transformation to each element of the `Dataset`.
+
+The `map` method takes a `map_func` argument that describes how each item in the
+`Dataset` should be transformed.
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/datasets/map.png">
+</div>
+<div style="text-align: center">
+The @{tf.data.Dataset.map$`map`} method applies the `map_func` to
+transform each item in the <code>Dataset</code>.
+</div>
+
+So to parse the lines as they are streamed out of the csv file, we pass our
+`_parse_line` function to the `map` method:
+
+``` python
+ds = ds.map(_parse_line)
+print(ds)
+```
+``` None
+<MapDataset
+shapes: (
+    {SepalLength: (), PetalWidth: (), ...},
+    ()),
+types: (
+    {SepalLength: tf.float32, PetalWidth: tf.float32, ...},
+    tf.int32)>
+```
+
+Now instead of simple scalar strings, the dataset contains `(features, label)`
+pairs.
+
+the remainder of the `iris_data.csv_input_fn` function is identical
+to `iris_data.train_input_fn` which was covered in the in the
+[Basic input](#basic_input) section.
+
+### Try it out
+
+This function can be used as a replacement for
+`iris_data.train_input_fn`. It can be used to feed an estimator as follows:
+
+``` python
+train_path, test_path = iris_data.maybe_download()
+
+# All the inputs are numeric
+feature_columns = [
+    tf.feature_column.numeric_column(name)
+    for name in iris_data.CSV_COLUMN_NAMES[:-1]]
+
+# Build the estimator
+est = tf.estimator.LinearClassifier(feature_columns,
+                                    n_classes=3)
+# Train the estimator
+batch_size = 100
+est.train(
+    steps=1000,
+    input_fn=lambda : iris_data.csv_input_fn(train_path, batch_size))
+```
+
+Estimators expect an `input_fn` to take no arguments. To work around this
+restriction, we use `lambda` to capture the arguments and provide the expected
+interface.
+
+## Summary
+
+The `tf.data` module provides a collection of classes and functions for easily
+reading data from a variety of sources. Furthermore, `tf.data` has simple
+powerful methods for applying a wide variety of standard and custom
+transformations.
+
+Now you have the basic idea of how to efficiently load data into an
+Estimator. Consider the following documents next:
+
+
+* @{$get_started/custom_estimators}, which demonstrates how to build your own
+  custom `Estimator` model.
+* The @{$low_level_intro#datasets$Low Level Introduction}, which demonstrates
+  how to experiment directly with `tf.data.Datasets` using TensorFlow's low
+  level APIs.
+* @{$programmers_guide/datasets} which goes into great detail about additional
+  functionality of `Datasets`.
+
diff --git a/tensorflow/docs_src/get_started/estimator.md b/tensorflow/docs_src/get_started/estimator.md
deleted file mode 100644
index 790de6679b0bdbe5f91fd03e3ebfedc278b5b3c8..0000000000000000000000000000000000000000
--- a/tensorflow/docs_src/get_started/estimator.md
+++ /dev/null
@@ -1,410 +0,0 @@
-# tf.estimator Quickstart
-
-TensorFlow’s high-level machine learning API (tf.estimator) makes it easy to
-configure, train, and evaluate a variety of machine learning models. In this
-tutorial, you’ll use tf.estimator to construct a
-[neural network](https://en.wikipedia.org/wiki/Artificial_neural_network)
-classifier and train it on the
-[Iris data set](https://en.wikipedia.org/wiki/Iris_flower_data_set) to
-predict flower species based on sepal/petal geometry. You'll write code to
-perform the following five steps:
-
-1.  Load CSVs containing Iris training/test data into a TensorFlow `Dataset`
-2.  Construct a @{tf.estimator.DNNClassifier$neural network classifier}
-3.  Train the model using the training data
-4.  Evaluate the accuracy of the model
-5.  Classify new samples
-
-NOTE: Remember to @{$install$install TensorFlow on your machine}
-before getting started with this tutorial.
-
-## Complete Neural Network Source Code
-
-Here is the full code for the neural network classifier:
-
-```python
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-from six.moves.urllib.request import urlopen
-
-import numpy as np
-import tensorflow as tf
-
-# Data sets
-IRIS_TRAINING = "iris_training.csv"
-IRIS_TRAINING_URL = "http://download.tensorflow.org/data/iris_training.csv"
-
-IRIS_TEST = "iris_test.csv"
-IRIS_TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"
-
-
-def main():
-  # If the training and test sets aren't stored locally, download them.
-  if not os.path.exists(IRIS_TRAINING):
-    raw = urlopen(IRIS_TRAINING_URL).read()
-    with open(IRIS_TRAINING, "wb") as f:
-      f.write(raw)
-
-  if not os.path.exists(IRIS_TEST):
-    raw = urlopen(IRIS_TEST_URL).read()
-    with open(IRIS_TEST, "wb") as f:
-      f.write(raw)
-
-  # Load datasets.
-  training_set = tf.contrib.learn.datasets.base.load_csv_with_header(
-      filename=IRIS_TRAINING,
-      target_dtype=np.int,
-      features_dtype=np.float32)
-  test_set = tf.contrib.learn.datasets.base.load_csv_with_header(
-      filename=IRIS_TEST,
-      target_dtype=np.int,
-      features_dtype=np.float32)
-
-  # Specify that all features have real-value data
-  feature_columns = [tf.feature_column.numeric_column("x", shape=[4])]
-
-  # Build 3 layer DNN with 10, 20, 10 units respectively.
-  classifier = tf.estimator.DNNClassifier(feature_columns=feature_columns,
-                                          hidden_units=[10, 20, 10],
-                                          n_classes=3,
-                                          model_dir="/tmp/iris_model")
-  # Define the training inputs
-  train_input_fn = tf.estimator.inputs.numpy_input_fn(
-      x={"x": np.array(training_set.data)},
-      y=np.array(training_set.target),
-      num_epochs=None,
-      shuffle=True)
-
-  # Train model.
-  classifier.train(input_fn=train_input_fn, steps=2000)
-
-  # Define the test inputs
-  test_input_fn = tf.estimator.inputs.numpy_input_fn(
-      x={"x": np.array(test_set.data)},
-      y=np.array(test_set.target),
-      num_epochs=1,
-      shuffle=False)
-
-  # Evaluate accuracy.
-  accuracy_score = classifier.evaluate(input_fn=test_input_fn)["accuracy"]
-
-  print("\nTest Accuracy: {0:f}\n".format(accuracy_score))
-
-  # Classify two new flower samples.
-  new_samples = np.array(
-      [[6.4, 3.2, 4.5, 1.5],
-       [5.8, 3.1, 5.0, 1.7]], dtype=np.float32)
-  predict_input_fn = tf.estimator.inputs.numpy_input_fn(
-      x={"x": new_samples},
-      num_epochs=1,
-      shuffle=False)
-
-  predictions = list(classifier.predict(input_fn=predict_input_fn))
-  predicted_classes = [p["classes"] for p in predictions]
-
-  print(
-      "New Samples, Class Predictions:    {}\n"
-      .format(predicted_classes))
-
-if __name__ == "__main__":
-    main()
-```
-
-The following sections walk through the code in detail.
-
-## Load the Iris CSV data to TensorFlow
-
-The [Iris data set](https://en.wikipedia.org/wiki/Iris_flower_data_set) contains
-150 rows of data, comprising 50 samples from each of three related Iris species:
-*Iris setosa*, *Iris virginica*, and *Iris versicolor*.
-
-![Petal geometry compared for three iris species: Iris setosa, Iris virginica, and Iris versicolor](https://www.tensorflow.org/images/iris_three_species.jpg) **From left to right,
-[*Iris setosa*](https://commons.wikimedia.org/w/index.php?curid=170298) (by
-[Radomil](https://commons.wikimedia.org/wiki/User:Radomil), CC BY-SA 3.0),
-[*Iris versicolor*](https://commons.wikimedia.org/w/index.php?curid=248095) (by
-[Dlanglois](https://commons.wikimedia.org/wiki/User:Dlanglois), CC BY-SA 3.0),
-and [*Iris virginica*](https://www.flickr.com/photos/33397993@N05/3352169862)
-(by [Frank Mayfield](https://www.flickr.com/photos/33397993@N05), CC BY-SA
-2.0).**
-
-Each row contains the following data for each flower sample:
-[sepal](https://en.wikipedia.org/wiki/Sepal) length, sepal width,
-[petal](https://en.wikipedia.org/wiki/Petal) length, petal width, and flower
-species. Flower species are represented as integers, with 0 denoting *Iris
-setosa*, 1 denoting *Iris versicolor*, and 2 denoting *Iris virginica*.
-
-Sepal Length | Sepal Width | Petal Length | Petal Width | Species
-:----------- | :---------- | :----------- | :---------- | :-------
-5.1          | 3.5         | 1.4          | 0.2         | 0
-4.9          | 3.0         | 1.4          | 0.2         | 0
-4.7          | 3.2         | 1.3          | 0.2         | 0
-&hellip;     | &hellip;    | &hellip;     | &hellip;    | &hellip;
-7.0          | 3.2         | 4.7          | 1.4         | 1
-6.4          | 3.2         | 4.5          | 1.5         | 1
-6.9          | 3.1         | 4.9          | 1.5         | 1
-&hellip;     | &hellip;    | &hellip;     | &hellip;    | &hellip;
-6.5          | 3.0         | 5.2          | 2.0         | 2
-6.2          | 3.4         | 5.4          | 2.3         | 2
-5.9          | 3.0         | 5.1          | 1.8         | 2
-
-For this tutorial, the Iris data has been randomized and split into two separate
-CSVs:
-
-*   A training set of 120 samples
-    ([iris_training.csv](http://download.tensorflow.org/data/iris_training.csv))
-*   A test set of 30 samples
-    ([iris_test.csv](http://download.tensorflow.org/data/iris_test.csv)).
-
-To get started, first import all the necessary modules, and define where to
-download and store the dataset:
-
-```python
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-from six.moves.urllib.request import urlopen
-
-import tensorflow as tf
-import numpy as np
-
-IRIS_TRAINING = "iris_training.csv"
-IRIS_TRAINING_URL = "http://download.tensorflow.org/data/iris_training.csv"
-
-IRIS_TEST = "iris_test.csv"
-IRIS_TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"
-```
-
-Then, if the training and test sets aren't already stored locally, download
-them.
-
-```python
-if not os.path.exists(IRIS_TRAINING):
-  raw = urlopen(IRIS_TRAINING_URL).read()
-  with open(IRIS_TRAINING,'wb') as f:
-    f.write(raw)
-
-if not os.path.exists(IRIS_TEST):
-  raw = urlopen(IRIS_TEST_URL).read()
-  with open(IRIS_TEST,'wb') as f:
-    f.write(raw)
-```
-
-Next, load the training and test sets into `Dataset`s using the
-[`load_csv_with_header()`](https://www.tensorflow.org/code/tensorflow/contrib/learn/python/learn/datasets/base.py)
-method in `learn.datasets.base`. The `load_csv_with_header()` method takes three
-required arguments:
-
-*   `filename`, which takes the filepath to the CSV file
-*   `target_dtype`, which takes the
-    [`numpy` datatype](http://docs.scipy.org/doc/numpy/user/basics.types.html)
-    of the dataset's target value.
-*   `features_dtype`, which takes the
-    [`numpy` datatype](http://docs.scipy.org/doc/numpy/user/basics.types.html)
-    of the dataset's feature values.
-
-
-Here, the target (the value you're training the model to predict) is flower
-species, which is an integer from 0&ndash;2, so the appropriate `numpy` datatype
-is `np.int`:
-
-```python
-# Load datasets.
-training_set = tf.contrib.learn.datasets.base.load_csv_with_header(
-    filename=IRIS_TRAINING,
-    target_dtype=np.int,
-    features_dtype=np.float32)
-test_set = tf.contrib.learn.datasets.base.load_csv_with_header(
-    filename=IRIS_TEST,
-    target_dtype=np.int,
-    features_dtype=np.float32)
-```
-
-`Dataset`s in tf.contrib.learn are
-[named tuples](https://docs.python.org/2/library/collections.html#collections.namedtuple);
-you can access feature data and target values via the `data` and `target`
-fields. Here, `training_set.data` and `training_set.target` contain the feature
-data and target values for the training set, respectively, and `test_set.data`
-and `test_set.target` contain feature data and target values for the test set.
-
-Later on, in
-["Fit the DNNClassifier to the Iris Training Data,"](#fit-dnnclassifier)
-you'll use `training_set.data` and
-`training_set.target` to train your model, and in
-["Evaluate Model Accuracy,"](#evaluate-accuracy) you'll use `test_set.data` and
-`test_set.target`. But first, you'll construct your model in the next section.
-
-## Construct a Deep Neural Network Classifier
-
-tf.estimator offers a variety of predefined models, called `Estimator`s, which
-you can use "out of the box" to run training and evaluation operations on your
-data.
-Here, you'll configure a Deep Neural Network Classifier model to fit the Iris
-data. Using tf.estimator, you can instantiate your
-@{tf.estimator.DNNClassifier} with just a couple lines of code:
-
-```python
-# Specify that all features have real-value data
-feature_columns = [tf.feature_column.numeric_column("x", shape=[4])]
-
-# Build 3 layer DNN with 10, 20, 10 units respectively.
-classifier = tf.estimator.DNNClassifier(feature_columns=feature_columns,
-                                        hidden_units=[10, 20, 10],
-                                        n_classes=3,
-                                        model_dir="/tmp/iris_model")
-```
-
-The code above first defines the model's feature columns, which specify the data
-type for the features in the data set. All the feature data is continuous, so
-`tf.feature_column.numeric_column` is the appropriate function to use to
-construct the feature columns. There are four features in the data set (sepal
-width, sepal height, petal width, and petal height), so accordingly `shape`
-must be set to `[4]` to hold all the data.
-
-Then, the code creates a `DNNClassifier` model using the following arguments:
-
-*   `feature_columns=feature_columns`. The set of feature columns defined above.
-*   `hidden_units=[10, 20, 10]`. Three
-    [hidden layers](http://stats.stackexchange.com/questions/181/how-to-choose-the-number-of-hidden-layers-and-nodes-in-a-feedforward-neural-netw),
-    containing 10, 20, and 10 neurons, respectively.
-*   `n_classes=3`. Three target classes, representing the three Iris species.
-*   `model_dir=/tmp/iris_model`. The directory in which TensorFlow will save
-    checkpoint data and TensorBoard summaries during model training.
-
-## Describe the training input pipeline {#train-input}
-
-The `tf.estimator` API uses input functions, which create the TensorFlow
-operations that generate data for the model.
-We can use `tf.estimator.inputs.numpy_input_fn` to produce the input pipeline:
-
-```python
-# Define the training inputs
-train_input_fn = tf.estimator.inputs.numpy_input_fn(
-    x={"x": np.array(training_set.data)},
-    y=np.array(training_set.target),
-    num_epochs=None,
-    shuffle=True)
-```
-
-## Fit the DNNClassifier to the Iris Training Data {#fit-dnnclassifier}
-
-Now that you've configured your DNN `classifier` model, you can fit it to the
-Iris training data using the @{tf.estimator.Estimator.train$`train`} method.
-Pass `train_input_fn` as the `input_fn`, and the number of steps to train
-(here, 2000):
-
-```python
-# Train model.
-classifier.train(input_fn=train_input_fn, steps=2000)
-```
-
-The state of the model is preserved in the `classifier`, which means you can
-train iteratively if you like. For example, the above is equivalent to the
-following:
-
-```python
-classifier.train(input_fn=train_input_fn, steps=1000)
-classifier.train(input_fn=train_input_fn, steps=1000)
-```
-
-However, if you're looking to track the model while it trains, you'll likely
-want to instead use a TensorFlow @{tf.train.SessionRunHook$`SessionRunHook`}
-to perform logging operations.
-
-## Evaluate Model Accuracy {#evaluate-accuracy}
-
-You've trained your `DNNClassifier` model on the Iris training data; now, you
-can check its accuracy on the Iris test data using the
-@{tf.estimator.Estimator.evaluate$`evaluate`} method. Like `train`,
-`evaluate` takes an input function that builds its input pipeline. `evaluate`
-returns a `dict`s with the evaluation results. The following code passes the
-Iris test data&mdash;`test_set.data` and `test_set.target`&mdash;to `evaluate`
-and prints the `accuracy` from the results:
-
-```python
-# Define the test inputs
-test_input_fn = tf.estimator.inputs.numpy_input_fn(
-    x={"x": np.array(test_set.data)},
-    y=np.array(test_set.target),
-    num_epochs=1,
-    shuffle=False)
-
-# Evaluate accuracy.
-accuracy_score = classifier.evaluate(input_fn=test_input_fn)["accuracy"]
-
-print("\nTest Accuracy: {0:f}\n".format(accuracy_score))
-```
-
-Note: The `num_epochs=1` argument to `numpy_input_fn` is important here.
-`test_input_fn` will iterate over the data once, and then raise
-`OutOfRangeError`. This error signals the classifier to stop evaluating, so it
-will evaluate over the input once.
-
-When you run the full script, it will print something close to:
-
-```
-Test Accuracy: 0.966667
-```
-
-Your accuracy result may vary a bit, but should be higher than 90%. Not bad for
-a relatively small data set!
-
-## Classify New Samples
-
-Use the estimator's `predict()` method to classify new samples. For example, say
-you have these two new flower samples:
-
-Sepal Length | Sepal Width | Petal Length | Petal Width
-:----------- | :---------- | :----------- | :----------
-6.4          | 3.2         | 4.5          | 1.5
-5.8          | 3.1         | 5.0          | 1.7
-
-You can predict their species using the `predict()` method. `predict` returns a
-generator of dicts, which can easily be converted to a list. The following code
-retrieves and prints the class predictions:
-
-```python
-# Classify two new flower samples.
-new_samples = np.array(
-    [[6.4, 3.2, 4.5, 1.5],
-     [5.8, 3.1, 5.0, 1.7]], dtype=np.float32)
-predict_input_fn = tf.estimator.inputs.numpy_input_fn(
-    x={"x": new_samples},
-    num_epochs=1,
-    shuffle=False)
-
-predictions = list(classifier.predict(input_fn=predict_input_fn))
-predicted_classes = [p["classes"] for p in predictions]
-
-print(
-    "New Samples, Class Predictions:    {}\n"
-    .format(predicted_classes))
-```
-
-Your results should look as follows:
-
-```
-New Samples, Class Predictions:    [1 2]
-```
-
-The model thus predicts that the first sample is *Iris versicolor*, and the
-second sample is *Iris virginica*.
-
-## Additional Resources
-
-*   To learn more about using tf.estimator to create linear models, see
-    @{$linear$Large-scale Linear Models with TensorFlow}.
-
-*   To build your own Estimator using tf.estimator APIs, check out
-    @{$extend/estimators$Creating Estimators}.
-
-*   To experiment with neural network modeling and visualization in the browser,
-    check out [Deep Playground](http://playground.tensorflow.org/).
-
-*   For more advanced tutorials on neural networks, see
-    @{$deep_cnn$Convolutional Neural Networks} and @{$recurrent$Recurrent Neural
-    Networks}.
diff --git a/tensorflow/docs_src/get_started/feature_columns.md b/tensorflow/docs_src/get_started/feature_columns.md
new file mode 100644
index 0000000000000000000000000000000000000000..e3308ed716d63f10bf0e9dda858c23eef30709a6
--- /dev/null
+++ b/tensorflow/docs_src/get_started/feature_columns.md
@@ -0,0 +1,572 @@
+# Feature Columns
+
+This document details feature columns. Think of **feature columns** as the
+intermediaries between raw data and Estimators. Feature columns are very rich,
+enabling you to transform a diverse range of raw data into formats that
+Estimators can use, allowing easy experimentation.
+
+In @{$get_started/premade_estimators$Premade Estimators}, we used the premade
+Estimator, @{tf.estimator.DNNClassifier$`DNNClassifier`} to train a model to
+predict different types of Iris flowers from four input features. That example
+created only numerical feature columns (of type
+@{tf.feature_column.numeric_column}). Although numerical feature columns model
+the lengths of petals and sepals effectively, real world data sets contain all
+kinds of features, many of which are non-numerical.
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/feature_cloud.jpg">
+</div>
+<div style="text-align: center">
+Some real-world features (such as, longitude) are numerical, but many are not.
+</div>
+
+## Input to a Deep Neural Network
+
+What kind of data can a deep neural network operate on? The answer
+is, of course, numbers (for example, `tf.float32`). After all, every neuron in
+a neural network performs multiplication and addition operations on weights and
+input data. Real-life input data, however, often contains non-numerical
+(categorical) data. For example, consider a `product_class` feature that can
+contain the following three non-numerical values:
+
+* `kitchenware`
+* `electronics`
+* `sports`
+
+ML models generally represent categorical values as simple vectors in which a
+1 represents the presence of a value and a 0 represents the absence of a value.
+For example, when `product_class` is set to `sports`, an ML model would usually
+represent `product_class` as  `[0, 0, 1]`, meaning:
+
+* `0`: `kitchenware` is absent
+* `0`: `electronics` is absent
+* `1`: `sports` is present
+
+So, although raw data can be numerical or categorical, an ML model represents
+all features as numbers.
+
+## Feature Columns
+
+As the following figure suggests, you specify the input to a model through the
+`feature_columns` argument of an Estimator (`DNNClassifier` for Iris).
+Feature Columns bridge input data (as returned by `input_fn`) with your model.
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/inputs_to_model_bridge.jpg">
+</div>
+<div style="text-align: center">
+Feature columns bridge raw data with the data your model needs.
+</div>
+
+To create feature columns, call functions from the
+@{tf.feature_column} module. This document explains nine of the functions in
+that module. As the following figure shows, all nine functions return either a
+Categorical-Column or a Dense-Column object, except `bucketized_column`, which
+inherits from both classes:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/some_constructors.jpg">
+</div>
+<div style="text-align: center">
+Feature column methods fall into two main categories and one hybrid category.
+</div>
+
+Let's look at these functions in more detail.
+
+### Numeric column
+
+The Iris classifier calls the @{tf.feature_column.numeric_column} function for
+all input features:
+
+  * `SepalLength`
+  * `SepalWidth`
+  * `PetalLength`
+  * `PetalWidth`
+
+Although `tf.numeric_column` provides optional arguments, calling
+`tf.numeric_column` without any arguments, as follows, is a fine way to specify
+a numerical value with the default data type (`tf.float32`) as input to your
+model:
+
+```python
+# Defaults to a tf.float32 scalar.
+numeric_feature_column = tf.feature_column.numeric_column(key="SepalLength")
+```
+
+To specify a non-default numerical data type, use the `dtype` argument. For
+example:
+
+``` python
+# Represent a tf.float64 scalar.
+numeric_feature_column = tf.feature_column.numeric_column(key="SepalLength",
+                                                          dtype=tf.float64)
+```
+
+By default, a numeric column creates a single value (scalar). Use the shape
+argument to specify another shape. For example:
+
+<!--TODO(markdaoust) link to full example-->
+```python
+# Represent a 10-element vector in which each cell contains a tf.float32.
+vector_feature_column = tf.feature_column.numeric_column(key="Bowling",
+                                                         shape=10)
+
+# Represent a 10x5 matrix in which each cell contains a tf.float32.
+matrix_feature_column = tf.feature_column.numeric_column(key="MyMatrix",
+                                                         shape=[10,5])
+```
+### Bucketized column
+
+Often, you don't want to feed a number directly into the model, but instead
+split its value into different categories based on numerical ranges.  To do so,
+create a @{tf.feature_column.bucketized_column$bucketized column}. For
+example, consider raw data that represents the year a house was built. Instead
+of representing that year as a scalar numeric column, we could split the year
+into the following four buckets:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/bucketized_column.jpg">
+</div>
+<div style="text-align: center">
+Dividing year data into four buckets.
+</div>
+
+The model will represent the buckets as follows:
+
+|Date Range |Represented as... |
+|:----------|:-----------------|
+|< 1960               | [1, 0, 0, 0] |
+|>= 1960 but < 1980   | [0, 1, 0, 0] |
+|>= 1980 but < 2000   | [0, 0, 1, 0] |
+|> 2000               | [0, 0, 0, 1] |
+
+Why would you want to split a number—a perfectly valid input to your
+model—into a categorical value? Well, notice that the categorization splits a
+single input number into a four-element vector. Therefore, the model now can
+learn _four individual weights_ rather than just one; four weights creates a
+richer model than one weight. More importantly, bucketizing enables the model
+to clearly distinguish between different year categories since only one of the
+elements is set (1) and the other three elements are cleared (0). When we just
+use a single number (a year) as input, the model can only learn a linear
+relationship. So, bucketing provides the model with additional flexibility that
+the model can use to learn.
+
+The following code demonstrates how to create a bucketized feature:
+
+<!--TODO(markdaoust) link to full example - housing price grid?-->
+```python
+# First, convert the raw input to a numeric column.
+numeric_feature_column = tf.feature_column.numeric_column("Year")
+
+# Then, bucketize the numeric column on the years 1960, 1980, and 2000.
+bucketized_feature_column = tf.feature_column.bucketized_column(
+    source_column = numeric_feature_column,
+    boundaries = [1960, 1980, 2000])
+```
+Note that specifying a _three_-element boundaries vector creates a
+_four_-element bucketized vector.
+
+
+### Categorical identity column
+
+**Categorical identity columns** can be seen as a special case of bucketized
+columns. In traditional bucketized columns, each bucket represents a range of
+values (for example, from 1960 to 1979). In a categorical identity column, each
+bucket represents a single, unique integer. For example, let's say you want to
+represent the integer range `[0, 4)`.  That is, you want to represent the
+integers 0, 1, 2, or 3. In this case, the categorical identity mapping looks
+like this:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/categorical_column_with_identity.jpg">
+</div>
+<div style="text-align: center">
+A categorical identity column mapping. Note that this is a one-hot
+encoding, not a binary numerical encoding.
+</div>
+
+As with bucketized columns, a model can learn a separate weight for each class
+in a categorical identity column. For example, instead of using a string to
+represent the `product_class`, let's represent each class with a unique integer
+value. That is:
+
+* `0="kitchenware"`
+* `1="electronics"`
+* `2="sport"`
+
+Call @{tf.feature_column.categorical_column_with_identity} to implement a
+categorical identity column. For example:
+
+``` python
+# Create categorical output for an integer feature named "my_feature_b",
+# The values of my_feature_b must be >= 0 and < num_buckets
+identity_feature_column = tf.feature_column.categorical_column_with_identity(
+    key='my_feature_b',
+    num_buckets=4) # Values [0, 4)
+
+# In order for the preceding call to work, the input_fn() must return
+# a dictionary containing 'my_feature_b' as a key. Furthermore, the values
+# assigned to 'my_feature_b' must belong to the set [0, 4).
+def input_fn():
+    ...
+    return ({ 'my_feature_a':[7, 9, 5, 2], 'my_feature_b':[3, 1, 2, 2] },
+            [Label_values])
+```
+
+### Categorical vocabulary column
+
+We cannot input strings directly to a model. Instead, we must first map strings
+to numeric or categorical values. Categorical vocabulary columns provide a good
+way to represent strings as a one-hot vector. For example:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/categorical_column_with_vocabulary.jpg">
+</div>
+<div style="text-align: center">
+Mapping string values to vocabulary columns.
+</div>
+
+As you can see, categorical vocabulary columns are kind of an enum version of
+categorical identity columns. TensorFlow provides two different functions to
+create categorical vocabulary columns:
+
+* @{tf.feature_column.categorical_column_with_vocabulary_list}
+* @{tf.feature_column.categorical_column_with_vocabulary_file}
+
+`categorical_column_with_vocabulary_list` maps each string to an integer based
+on an explicit vocabulary list. For example:
+
+```python
+# Given input "feature_name_from_input_fn" which is a string,
+# create a categorical feature by mapping the input to one of
+# the elements in the vocabulary list.
+vocabulary_feature_column =
+    tf.feature_column.categorical_column_with_vocabulary_list(
+        key="a feature returned by input_fn()",
+        vocabulary_list=["kitchenware", "electronics", "sports"])
+```
+
+The preceding function is pretty straightforward, but it has a significant
+drawback. Namely, there's way too much typing when the vocabulary list is long.
+For these cases, call
+`tf.feature_column.categorical_column_with_vocabulary_file` instead, which lets
+you place the vocabulary words in a separate file. For example:
+
+```python
+
+# Given input "feature_name_from_input_fn" which is a string,
+# create a categorical feature to our model by mapping the input to one of
+# the elements in the vocabulary file
+vocabulary_feature_column =
+    tf.feature_column.categorical_column_with_vocabulary_file(
+        key="a feature returned by input_fn()",
+        vocabulary_file="product_class.txt",
+        vocabulary_size=3)
+```
+
+`product_class.txt` should contain one line for each vocabulary element. In our
+case:
+
+```None
+kitchenware
+electronics
+sports
+```
+
+### Hashed Column
+
+So far, we've worked with a naively small number of categories. For example,
+our product_class example has only 3 categories. Often though, the number of
+categories can be so big that it's not possible to have individual categories
+for each vocabulary word or integer because that would consume too much memory.
+For these cases, we can instead turn the question around and ask, "How many
+categories am I willing to have for my input?"  In fact, the
+@{tf.feature_column.categorical_column_with_hash_bucket} function enables you
+to specify the number of categories. For this type of feature column the model
+calculates a hash value of the input, then puts it into one of
+the `hash_bucket_size` categories using the modulo operator, as in the following
+pseudocode:
+
+```python
+# pseudocode
+feature_id = hash(raw_feature) % hash_buckets_size
+```
+
+The code to create the `feature_column` might look something like this:
+
+``` python
+hashed_feature_column =
+    tf.feature_column.categorical_column_with_hash_bucket(
+        key = "some_feature",
+        hash_buckets_size = 100) # The number of categories
+```
+At this point, you might rightfully think: "This is crazy!" After all, we are
+forcing the different input values to a smaller set of categories. This means
+that two probably unrelated inputs will be mapped to the same
+category, and consequently mean the same thing to the neural network. The
+following figure illustrates this dilemma, showing that kitchenware and sports
+both get assigned to category (hash bucket) 12:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/hashed_column.jpg">
+</div>
+<div style="text-align: center">
+Representing data with hash buckets.
+</div>
+
+As with many counterintuitive phenomena in machine learning, it turns out that
+hashing often works well in practice. That's because hash categories provide
+the model with some separation. The model can use additional features to further
+separate kitchenware from sports.
+
+### Crossed column
+
+Combining features into a single feature, better known as
+[feature crosses](https://developers.google.com/machine-learning/glossary/#feature_cross),
+enables the model to learn separate weights for each combination of
+features.
+
+More concretely, suppose we want our model to calculate real estate prices in
+Atlanta, GA. Real-estate prices within this city vary greatly depending on
+location. Representing latitude and longitude as separate features isn't very
+useful in identifying real-estate location dependencies; however, crossing
+latitude and longitude into a single feature can pinpoint locations. Suppose we
+represent Atlanta as a grid of 100x100 rectangular sections, identifying each
+of the 10,000 sections by a feature cross of latitude and longitude. This
+feature cross enables the model to train on pricing conditions related to each
+individual section, which is a much stronger signal than latitude and longitude
+alone.
+
+The following figure shows our plan, with the latitude & longitude values for
+the corners of the city in red text:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/Atlanta.jpg">
+</div>
+<div style="text-align: center">
+Map of Atlanta. Imagine this map divided into 10,000 sections of
+equal size.
+</div>
+
+For the solution, we used a combination of the `bucketized_column` we looked at
+earlier, with the @{tf.feature_column.crossed_column} function.
+
+<!--TODO(markdaoust) link to full example-->
+
+``` python
+def make_dataset(latitude, longitude, labels):
+    assert latitude.shape == longitude.shape == labels.shape
+
+    features = {'latitude': latitude.flatten(),
+                'longitude': longitude.flatten()}
+    labels=labels.flatten()
+
+    return tf.data.Dataset.from_tensor_slices((features, labels))
+
+
+# Bucketize the latitude and longitude usig the `edges`
+latitude_bucket_fc = tf.feature_column.bucketized_column(
+    tf.feature_column.numeric_column('latitude'),
+    list(atlanta.latitude.edges))
+
+longitude_bucket_fc = tf.feature_column.bucketized_column(
+    tf.feature_column.numeric_column('longitude'),
+    list(atlanta.longitude.edges))
+
+# Cross the bucketized columns, using 5000 hash bins.
+crossed_lat_lon_fc = tf.feature_column.crossed_column(
+    [latitude_bucket_fc, longitude_bucket_fc], 5000)
+
+fc = [
+    latitude_bucket_fc,
+    longitude_bucket_fc,
+    crossed_lat_lon_fc]
+
+# Build and train the Estimator.
+est = tf.estimator.LinearRegressor(fc, ...)
+```
+
+You may create a feature cross from either of the following:
+
+* Feature names; that is, names from the `dict` returned from `input_fn`.
+* Any categorical column, except `categorical_column_with_hash_bucket`
+  (since `crossed_column` hashes the input).
+
+When the feature columns `latitude_bucket_fc` and `longitude_bucket_fc` are
+crossed, TensorFlow will create `(latitude_fc, longitude_fc)` pairs for each
+example. This would produce a full grid of possibilities as follows:
+
+``` None
+ (0,0),  (0,1)...  (0,99)
+ (1,0),  (1,1)...  (1,99)
+   ...     ...       ...
+(99,0), (99,1)...(99, 99)
+```
+
+Except that a full grid would only be tractable for inputs with limited
+vocabularies. Instead of building this, potentially huge, table of inputs,
+the `crossed_column` only builds the number requested by the `hash_bucket_size`
+argument. The feature column assigns an example to a index by running a hash
+function on the tuple of inputs, followed by a modulo operation with
+`hash_bucket_size`.
+
+As discussed earlier, performing the
+hash and modulo function limits the number of categories, but can cause category
+collisions; that is, multiple (latitude, longitude) feature crosses will end
+up in the same hash bucket. In practice though, performing feature crosses
+still adds significant value to the learning capability of your models.
+
+Somewhat counterintuitively, when creating feature crosses, you typically still
+should include the original (uncrossed) features in your model (as in the
+preceding code snippet). The independent latitude and longitude features help the
+model distinguish between examples where a hash collision has occurred in the
+crossed feature.
+
+## Indicator and embedding columns
+
+Indicator columns and embedding columns never work on features directly, but
+instead take categorical columns as input.
+
+When using an indicator column, we're telling TensorFlow to do exactly what
+we've seen in our categorical product_class example. That is, an
+**indicator column** treats each category as an element in a one-hot vector,
+where the matching category has value 1 and the rest have 0s:
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/categorical_column_with_identity.jpg">
+</div>
+<div style="text-align: center">
+Representing data in indicator columns.
+</div>
+
+Here's how you create an indicator column by calling
+@{tf.feature_column.indicator_column}:
+
+``` python
+categorical_column = ... # Create any type of categorical column.
+
+# Represent the categorical column as an indicator column.
+indicator_column = tf.feature_column.indicator_column(categorical_column)
+```
+
+Now, suppose instead of having just three possible classes, we have a million.
+Or maybe a billion. For a number of reasons, as the number of categories grow
+large, it becomes infeasible to train a neural network using indicator columns.
+
+We can use an embedding column to overcome this limitation. Instead of
+representing the data as a one-hot vector of many dimensions, an
+**embedding column** represents that data as a lower-dimensional, ordinary
+vector in which each cell can contain any number, not just 0 or 1. By
+permitting a richer palette of numbers for every cell, an embedding column
+contains far fewer cells than an indicator column.
+
+Let's look at an example comparing indicator and embedding columns. Suppose our
+input examples consists of different words from a limited palette of only 81
+words. Further suppose that the data set provides provides the following input
+words in 4 separate examples:
+
+* `"dog"`
+* `"spoon"`
+* `"scissors"`
+* `"guitar"`
+
+In that case, the following figure illustrates the processing path for
+embedding columns or indicator columns.
+
+<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/feature_columns/embedding_vs_indicator.jpg">
+</div>
+<div style="text-align: center">
+An embedding column stores categorical data in a lower-dimensional
+vector than an indicator column. (We just placed random numbers into the
+embedding vectors; training determines the actual numbers.)
+</div>
+
+When an example is processed, one of the `categorical_column_with...` functions
+maps the example string to a numerical categorical value. For example, a
+function maps "spoon" to `[32]`. (The 32 comes from our imagination—the actual
+values depend on the mapping function.) You may then represent these numerical
+categorical values in either of the following two ways:
+
+* As an indicator column. A function converts each numeric categorical value
+  into an 81-element vector (because our palette consists of 81 words), placing
+  a 1 in the index of the categorical value (0, 32, 79, 80) and a 0 in all the
+  other positions.
+
+* As an embedding column. A function uses the numerical categorical values
+  `(0, 32, 79, 80)` as indices to a lookup table. Each slot in that lookup table
+  contains a 3-element vector.
+
+How do the values in the embeddings vectors magically get assigned? Actually,
+the assignments happen during training. That is, the model learns the best way
+to map your input numeric categorical values to the embeddings vector value in
+order to solve your problem. Embedding columns increase your model's
+capabilities, since an embeddings vector learns new relationships between
+categories from the training data.
+
+Why is the embedding vector size 3 in our example? Well, the following "formula"
+provides a general rule of thumb about the number of embedding dimensions:
+
+```python
+embedding_dimensions =  number_of_categories**0.25
+```
+
+That is, the embedding vector dimension should be the 4th root of the number of
+categories. Since our vocabulary size in this example is 81, the recommended
+number of dimensions is 3:
+
+``` python
+3 =  81**0.25
+```
+Note that this is just a general guideline; you can set the number of embedding
+dimensions as you please.
+
+Call @{tf.feature_column.embedding_column} to create an `embedding_column` as
+suggested by the following snippet:
+
+``` python
+categorical_column = ... # Create any categorical column
+
+# Represent the categorical column as an embedding column.
+# This means creating a one-hot vector with one element for each category.
+embedding_column = tf.feature_column.embedding_column(
+    categorical_column=categorical_column,
+    dimension=dimension_of_embedding_vector)
+```
+
+@{$programmers_guide/embedding$Embeddings} is a significant topic within machine
+learning. This information was just to get you started using them as feature
+columns.
+
+## Passing feature columns to Estimators
+
+As the following list indicates, not all Estimators permit all types of
+`feature_columns` argument(s):
+
+* @{tf.estimator.LinearClassifier$`LinearClassifier`} and
+  @{tf.estimator.LinearRegressor$`LinearRegressor`}: Accept all types of
+  feature column.
+* @{tf.estimator.DNNClassifier$`DNNClassifier`} and
+  @{tf.estimator.DNNRegressor$`DNNRegressor`}: Only accept dense columns. Other
+  column types must be wrapped in either an `indicator_column` or
+  `embedding_column`.
+* @{tf.estimator.DNNLinearCombinedClassifier$`DNNLinearCombinedClassifier`} and
+  @{tf.estimator.DNNLinearCombinedRegressor$`DNNLinearCombinedRegressor`}:
+    * The `linear_feature_columns` argument accepts any feature column type.
+    * The `dnn_feature_columns` argument only accepts dense columns.
+
+## Other Sources
+
+For more examples on feature columns, view the following:
+
+* The @{$low_level_intro#feature_columns$Low Level Introduction} demonstrates how
+  experiment directly with `feature_columns` using TensorFlow's low level APIs.
+* The @{$wide$wide} and @{$wide_and_deep$Wide & Deep} Tutorials solve a
+  binary classification problem using `feature_columns` on a variety of input
+  data types.
+
+To learn more about embeddings, see the following:
+
+* [Deep Learning, NLP, and representations](http://colah.github.io/posts/2014-07-NLP-RNNs-Representations/)
+  (Chris Olah's blog)
+* The TensorFlow [Embedding Projector](http://projector.tensorflow.org)
diff --git a/tensorflow/docs_src/get_started/get_started.md b/tensorflow/docs_src/get_started/get_started.md
deleted file mode 100644
index 231108215ac73bc9ab87a896b3441a7da5f2b507..0000000000000000000000000000000000000000
--- a/tensorflow/docs_src/get_started/get_started.md
+++ /dev/null
@@ -1,480 +0,0 @@
-# Getting Started With TensorFlow
-
-This guide gets you started programming in TensorFlow. Before using this guide,
-@{$install$install TensorFlow}. To get the most out of
-this guide, you should know the following:
-
-*   How to program in Python.
-*   At least a little bit about arrays.
-*   Ideally, something about machine learning. However, if you know little or
-    nothing about machine learning, then this is still the first guide you
-    should read.
-
-TensorFlow provides multiple APIs. The lowest level API--TensorFlow Core--
-provides you with complete programming control. We recommend TensorFlow Core for
-machine learning researchers and others who require fine levels of control over
-their models. The higher level APIs are built on top of TensorFlow Core. These
-higher level APIs are typically easier to learn and use than TensorFlow Core. In
-addition, the higher level APIs make repetitive tasks easier and more consistent
-between different users. A high-level API like tf.estimator helps you manage
-data sets, estimators, training and inference.
-
-This guide begins with a tutorial on TensorFlow Core. Later, we
-demonstrate how to implement the same model in tf.estimator. Knowing
-TensorFlow Core principles will give you a great mental model of how things are
-working internally when you use the more compact higher level API.
-
-# Tensors
-
-The central unit of data in TensorFlow is the **tensor**. A tensor consists of a
-set of primitive values shaped into an array of any number of dimensions. A
-tensor's **rank** is its number of dimensions. Here are some examples of
-tensors:
-
-```python
-3 # a rank 0 tensor; a scalar with shape []
-[1., 2., 3.] # a rank 1 tensor; a vector with shape [3]
-[[1., 2., 3.], [4., 5., 6.]] # a rank 2 tensor; a matrix with shape [2, 3]
-[[[1., 2., 3.]], [[7., 8., 9.]]] # a rank 3 tensor with shape [2, 1, 3]
-```
-
-## TensorFlow Core tutorial
-
-### Importing TensorFlow
-
-The canonical import statement for TensorFlow programs is as follows:
-
-```python
-import tensorflow as tf
-```
-This gives Python access to all of TensorFlow's classes, methods, and symbols.
-Most of the documentation assumes you have already done this.
-
-### The Computational Graph
-
-You might think of TensorFlow Core programs as consisting of two discrete
-sections:
-
-1.  Building the computational graph.
-2.  Running the computational graph.
-
-A **computational graph** is a series of TensorFlow operations arranged into a
-graph of nodes.
-Let's build a simple computational graph. Each node takes zero
-or more tensors as inputs and produces a tensor as an output. One type of node
-is a constant. Like all TensorFlow constants, it takes no inputs, and it outputs
-a value it stores internally. We can create two floating point Tensors `node1`
-and `node2` as follows:
-
-```python
-node1 = tf.constant(3.0, dtype=tf.float32)
-node2 = tf.constant(4.0) # also tf.float32 implicitly
-print(node1, node2)
-```
-
-The final print statement produces
-
-```
-Tensor("Const:0", shape=(), dtype=float32) Tensor("Const_1:0", shape=(), dtype=float32)
-```
-
-Notice that printing the nodes does not output the values `3.0` and `4.0` as you
-might expect. Instead, they are nodes that, when evaluated, would produce 3.0
-and 4.0, respectively. To actually evaluate the nodes, we must run the
-computational graph within a **session**. A session encapsulates the control and
-state of the TensorFlow runtime.
-
-The following code creates a `Session` object and then invokes its `run` method
-to run enough of the computational graph to evaluate `node1` and `node2`. By
-running the computational graph in a session as follows:
-
-```python
-sess = tf.Session()
-print(sess.run([node1, node2]))
-```
-
-we see the expected values of 3.0 and 4.0:
-
-```
-[3.0, 4.0]
-```
-
-We can build more complicated computations by combining `Tensor` nodes with
-operations (Operations are also nodes). For example, we can add our two
-constant nodes and produce a new graph as follows:
-
-```python
-from __future__ import print_function
-node3 = tf.add(node1, node2)
-print("node3:", node3)
-print("sess.run(node3):", sess.run(node3))
-```
-
-The last two print statements produce
-
-```
-node3: Tensor("Add:0", shape=(), dtype=float32)
-sess.run(node3): 7.0
-```
-
-TensorFlow provides a utility called TensorBoard that can display a picture of
-the computational graph. Here is a screenshot showing how TensorBoard
-visualizes the graph:
-
-![TensorBoard screenshot](https://www.tensorflow.org/images/getting_started_add.png)
-
-As it stands, this graph is not especially interesting because it always
-produces a constant result. A graph can be parameterized to accept external
-inputs, known as **placeholders**. A **placeholder** is a promise to provide a
-value later.
-
-```python
-a = tf.placeholder(tf.float32)
-b = tf.placeholder(tf.float32)
-adder_node = a + b  # + provides a shortcut for tf.add(a, b)
-```
-
-The preceding three lines are a bit like a function or a lambda in which we
-define two input parameters (a and b) and then an operation on them. We can
-evaluate this graph with multiple inputs by using the feed_dict argument to
-the [run method](https://www.tensorflow.org/api_docs/python/tf/Session#run)
-to feed concrete values to the placeholders:
-
-```python
-print(sess.run(adder_node, {a: 3, b: 4.5}))
-print(sess.run(adder_node, {a: [1, 3], b: [2, 4]}))
-```
-resulting in the output
-
-```
-7.5
-[ 3.  7.]
-```
-
-In TensorBoard, the graph looks like this:
-
-![TensorBoard screenshot](https://www.tensorflow.org/images/getting_started_adder.png)
-
-We can make the computational graph more complex by adding another operation.
-For example,
-
-```python
-add_and_triple = adder_node * 3.
-print(sess.run(add_and_triple, {a: 3, b: 4.5}))
-```
-produces the output
-```
-22.5
-```
-
-The preceding computational graph would look as follows in TensorBoard:
-
-![TensorBoard screenshot](https://www.tensorflow.org/images/getting_started_triple.png)
-
-In machine learning we will typically want a model that can take arbitrary
-inputs, such as the one above.  To make the model trainable, we need to be able
-to modify the graph to get new outputs with the same input.  **Variables** allow
-us to add trainable parameters to a graph.  They are constructed with a type and
-initial value:
-
-
-```python
-W = tf.Variable([.3], dtype=tf.float32)
-b = tf.Variable([-.3], dtype=tf.float32)
-x = tf.placeholder(tf.float32)
-linear_model = W*x + b
-```
-
-Constants are initialized when you call `tf.constant`, and their value can never
-change. By contrast, variables are not initialized when you call `tf.Variable`.
-To initialize all the variables in a TensorFlow program, you must explicitly
-call a special operation as follows:
-
-```python
-init = tf.global_variables_initializer()
-sess.run(init)
-```
-It is important to realize `init` is a handle to the TensorFlow sub-graph that
-initializes all the global variables. Until we call `sess.run`, the variables
-are uninitialized.
-
-
-Since `x` is a placeholder, we can evaluate `linear_model` for several values of
-`x` simultaneously as follows:
-
-```python
-print(sess.run(linear_model, {x: [1, 2, 3, 4]}))
-```
-to produce the output
-```
-[ 0.          0.30000001  0.60000002  0.90000004]
-```
-
-We've created a model, but we don't know how good it is yet. To evaluate the
-model on training data, we need a `y` placeholder to provide the desired values,
-and we need to write a loss function.
-
-A loss function measures how far apart the
-current model is from the provided data. We'll use a standard loss model for
-linear regression, which sums the squares of the deltas between the current
-model and the provided data. `linear_model - y` creates a vector where each
-element is the corresponding example's error delta. We call `tf.square` to
-square that error. Then, we sum all the squared errors to create a single scalar
-that abstracts the error of all examples using `tf.reduce_sum`:
-
-```python
-y = tf.placeholder(tf.float32)
-squared_deltas = tf.square(linear_model - y)
-loss = tf.reduce_sum(squared_deltas)
-print(sess.run(loss, {x: [1, 2, 3, 4], y: [0, -1, -2, -3]}))
-```
-producing the loss value
-```
-23.66
-```
-
-We could improve this manually by reassigning the values of `W` and `b` to the
-perfect values of -1 and 1. A variable is initialized to the value provided to
-`tf.Variable` but can be changed using operations like `tf.assign`. For example,
-`W=-1` and `b=1` are the optimal parameters for our model. We can change `W` and
-`b` accordingly:
-
-```python
-fixW = tf.assign(W, [-1.])
-fixb = tf.assign(b, [1.])
-sess.run([fixW, fixb])
-print(sess.run(loss, {x: [1, 2, 3, 4], y: [0, -1, -2, -3]}))
-```
-The final print shows the loss now is zero.
-```
-0.0
-```
-
-We guessed the "perfect" values of `W` and `b`, but the whole point of machine
-learning is to find the correct model parameters automatically.  We will show
-how to accomplish this in the next section.
-
-## tf.train API
-
-A complete discussion of machine learning is out of the scope of this tutorial.
-However, TensorFlow provides **optimizers** that slowly change each variable in
-order to minimize the loss function. The simplest optimizer is **gradient
-descent**. It modifies each variable according to the magnitude of the
-derivative of loss with respect to that variable. In general, computing symbolic
-derivatives manually is tedious and error-prone. Consequently, TensorFlow can
-automatically produce derivatives given only a description of the model using
-the function `tf.gradients`. For simplicity, optimizers typically do this
-for you. For example,
-
-```python
-optimizer = tf.train.GradientDescentOptimizer(0.01)
-train = optimizer.minimize(loss)
-```
-
-```python
-sess.run(init) # reset variables to incorrect defaults.
-for i in range(1000):
-  sess.run(train, {x: [1, 2, 3, 4], y: [0, -1, -2, -3]})
-
-print(sess.run([W, b]))
-```
-results in the final model parameters:
-```
-[array([-0.9999969], dtype=float32), array([ 0.99999082], dtype=float32)]
-```
-
-Now we have done actual machine learning!  Although this simple linear
-regression model does not require much TensorFlow core code, more complicated
-models and methods to feed data into your models necessitate more code. Thus,
-TensorFlow provides higher level abstractions for common patterns, structures,
-and functionality. We will learn how to use some of these abstractions in the
-next section.
-
-### Complete program
-
-The completed trainable linear regression model is shown here:
-
-```python
-import tensorflow as tf
-
-# Model parameters
-W = tf.Variable([.3], dtype=tf.float32)
-b = tf.Variable([-.3], dtype=tf.float32)
-# Model input and output
-x = tf.placeholder(tf.float32)
-linear_model = W*x + b
-y = tf.placeholder(tf.float32)
-
-# loss
-loss = tf.reduce_sum(tf.square(linear_model - y)) # sum of the squares
-# optimizer
-optimizer = tf.train.GradientDescentOptimizer(0.01)
-train = optimizer.minimize(loss)
-
-# training data
-x_train = [1, 2, 3, 4]
-y_train = [0, -1, -2, -3]
-# training loop
-init = tf.global_variables_initializer()
-sess = tf.Session()
-sess.run(init) # initialize variables with incorrect defaults.
-for i in range(1000):
-  sess.run(train, {x: x_train, y: y_train})
-
-# evaluate training accuracy
-curr_W, curr_b, curr_loss = sess.run([W, b, loss], {x: x_train, y: y_train})
-print("W: %s b: %s loss: %s"%(curr_W, curr_b, curr_loss))
-```
-When run, it produces
-```
-W: [-0.9999969] b: [ 0.99999082] loss: 5.69997e-11
-```
-
-Notice that the loss is a very small number (very close to zero). If you run
-this program, your loss may not be exactly the same as the aforementioned loss
-because the model is initialized with pseudorandom values.
-
-This more complicated program can still be visualized in TensorBoard
-![TensorBoard final model visualization](https://www.tensorflow.org/images/getting_started_final.png)
-
-## `tf.estimator`
-
-`tf.estimator` is a high-level TensorFlow library that simplifies the
-mechanics of machine learning, including the following:
-
-*   running training loops
-*   running evaluation loops
-*   managing data sets
-
-tf.estimator defines many common models.
-
-### Basic usage
-
-Notice how much simpler the linear regression program becomes with
-`tf.estimator`:
-
-```python
-# NumPy is often used to load, manipulate and preprocess data.
-import numpy as np
-import tensorflow as tf
-
-# Declare list of features. We only have one numeric feature. There are many
-# other types of columns that are more complicated and useful.
-feature_columns = [tf.feature_column.numeric_column("x", shape=[1])]
-
-# An estimator is the front end to invoke training (fitting) and evaluation
-# (inference). There are many predefined types like linear regression,
-# linear classification, and many neural network classifiers and regressors.
-# The following code provides an estimator that does linear regression.
-estimator = tf.estimator.LinearRegressor(feature_columns=feature_columns)
-
-# TensorFlow provides many helper methods to read and set up data sets.
-# Here we use two data sets: one for training and one for evaluation
-# We have to tell the function how many batches
-# of data (num_epochs) we want and how big each batch should be.
-x_train = np.array([1., 2., 3., 4.])
-y_train = np.array([0., -1., -2., -3.])
-x_eval = np.array([2., 5., 8., 1.])
-y_eval = np.array([-1.01, -4.1, -7, 0.])
-input_fn = tf.estimator.inputs.numpy_input_fn(
-    {"x": x_train}, y_train, batch_size=4, num_epochs=None, shuffle=True)
-train_input_fn = tf.estimator.inputs.numpy_input_fn(
-    {"x": x_train}, y_train, batch_size=4, num_epochs=1000, shuffle=False)
-eval_input_fn = tf.estimator.inputs.numpy_input_fn(
-    {"x": x_eval}, y_eval, batch_size=4, num_epochs=1000, shuffle=False)
-
-# We can invoke 1000 training steps by invoking the method and passing the
-# training data set.
-estimator.train(input_fn=input_fn, steps=1000)
-
-# Here we evaluate how well our model did.
-train_metrics = estimator.evaluate(input_fn=train_input_fn)
-eval_metrics = estimator.evaluate(input_fn=eval_input_fn)
-print("train metrics: %r"% train_metrics)
-print("eval metrics: %r"% eval_metrics)
-```
-When run, it produces something like
-```
-train metrics: {'average_loss': 1.4833182e-08, 'global_step': 1000, 'loss': 5.9332727e-08}
-eval metrics: {'average_loss': 0.0025353201, 'global_step': 1000, 'loss': 0.01014128}
-```
-Notice how our eval data has a higher loss, but it is still close to zero.
-That means we are learning properly.
-
-### A custom model
-
-`tf.estimator` does not lock you into its predefined models. Suppose we
-wanted to create a custom model that is not built into TensorFlow. We can still
-retain the high level abstraction of data set, feeding, training, etc. of
-`tf.estimator`. For illustration, we will show how to implement our own
-equivalent model to `LinearRegressor` using our knowledge of the lower level
-TensorFlow API.
-
-To define a custom model that works with `tf.estimator`, we need to use
-`tf.estimator.Estimator`. `tf.estimator.LinearRegressor` is actually
-a sub-class of `tf.estimator.Estimator`. Instead of sub-classing
-`Estimator`, we simply provide `Estimator` a function `model_fn` that tells
-`tf.estimator` how it can evaluate predictions, training steps, and
-loss. The code is as follows:
-
-```python
-import numpy as np
-import tensorflow as tf
-
-# Declare list of features, we only have one real-valued feature
-def model_fn(features, labels, mode):
-  # Build a linear model and predict values
-  W = tf.get_variable("W", [1], dtype=tf.float64)
-  b = tf.get_variable("b", [1], dtype=tf.float64)
-  y = W*features['x'] + b
-  # Loss sub-graph
-  loss = tf.reduce_sum(tf.square(y - labels))
-  # Training sub-graph
-  global_step = tf.train.get_global_step()
-  optimizer = tf.train.GradientDescentOptimizer(0.01)
-  train = tf.group(optimizer.minimize(loss),
-                   tf.assign_add(global_step, 1))
-  # EstimatorSpec connects subgraphs we built to the
-  # appropriate functionality.
-  return tf.estimator.EstimatorSpec(
-      mode=mode,
-      predictions=y,
-      loss=loss,
-      train_op=train)
-
-estimator = tf.estimator.Estimator(model_fn=model_fn)
-# define our data sets
-x_train = np.array([1., 2., 3., 4.])
-y_train = np.array([0., -1., -2., -3.])
-x_eval = np.array([2., 5., 8., 1.])
-y_eval = np.array([-1.01, -4.1, -7., 0.])
-input_fn = tf.estimator.inputs.numpy_input_fn(
-    {"x": x_train}, y_train, batch_size=4, num_epochs=None, shuffle=True)
-train_input_fn = tf.estimator.inputs.numpy_input_fn(
-    {"x": x_train}, y_train, batch_size=4, num_epochs=1000, shuffle=False)
-eval_input_fn = tf.estimator.inputs.numpy_input_fn(
-    {"x": x_eval}, y_eval, batch_size=4, num_epochs=1, shuffle=False)
-
-# train
-estimator.train(input_fn=input_fn, steps=1000)
-# Here we evaluate how well our model did.
-train_metrics = estimator.evaluate(input_fn=train_input_fn)
-eval_metrics = estimator.evaluate(input_fn=eval_input_fn)
-print("train metrics: %r"% train_metrics)
-print("eval metrics: %r"% eval_metrics)
-```
-When run, it produces
-```
-train metrics: {'loss': 1.227995e-11, 'global_step': 1000}
-eval metrics: {'loss': 0.01010036, 'global_step': 1000}
-```
-
-Notice how the contents of the custom `model_fn()` function are very similar
-to our manual model training loop from the lower level API.
-
-## Next steps
-
-Now you have a working knowledge of the basics of TensorFlow. We have several
-more tutorials that you can look at to learn more. If you are a beginner in
-machine learning see @{$beginners$MNIST for beginners},
-otherwise see @{$pros$Deep MNIST for experts}.
diff --git a/tensorflow/docs_src/get_started/get_started_for_beginners.md b/tensorflow/docs_src/get_started/get_started_for_beginners.md
new file mode 100644
index 0000000000000000000000000000000000000000..ea1c2fb3f473b9e39567c7607d3b3ad10d2de6b5
--- /dev/null
+++ b/tensorflow/docs_src/get_started/get_started_for_beginners.md
@@ -0,0 +1,732 @@
+# Getting Started for ML Beginners
+
+This document explains how to use machine learning to classify (categorize)
+Iris flowers by species.  This document dives deeply into the TensorFlow
+code to do exactly that, explaining ML fundamentals along the way.
+
+If the following list describes you, then you are in the right place:
+
+*   You know little to nothing about machine learning.
+*   You want to learn how to write TensorFlow programs.
+*   You can code (at least a little) in Python.
+
+If you are already familiar with basic machine learning concepts
+but are new to TensorFlow, read
+@{$premade_estimators$Getting Started with TensorFlow: for ML Experts}.
+
+## The Iris classification problem
+
+Imagine you are a botanist seeking an automated way to classify each
+Iris flower you find.  Machine learning provides many ways to classify flowers.
+For instance, a sophisticated machine learning program could classify flowers
+based on photographs.  Our ambitions are more modest--we're going to classify
+Iris flowers based solely on the length and width of their
+[sepals](https://en.wikipedia.org/wiki/Sepal) and
+[petals](https://en.wikipedia.org/wiki/Petal).
+
+The Iris genus entails about 300 species, but our program will classify only
+the following three:
+
+*   Iris setosa
+*   Iris virginica
+*   Iris versicolor
+
+<div style="margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%"
+  alt="Petal geometry compared for three iris species: Iris setosa, Iris virginica, and Iris versicolor"
+  src="../images/iris_three_species.jpg">
+</div>
+**From left to right,
+[*Iris setosa*](https://commons.wikimedia.org/w/index.php?curid=170298) (by
+[Radomil](https://commons.wikimedia.org/wiki/User:Radomil), CC BY-SA 3.0),
+[*Iris versicolor*](https://commons.wikimedia.org/w/index.php?curid=248095) (by
+[Dlanglois](https://commons.wikimedia.org/wiki/User:Dlanglois), CC BY-SA 3.0),
+and [*Iris virginica*](https://www.flickr.com/photos/33397993@N05/3352169862)
+(by [Frank Mayfield](https://www.flickr.com/photos/33397993@N05), CC BY-SA
+2.0).**
+<p>&nbsp;</p>
+
+Fortunately, someone has already created [a data set of 120 Iris
+flowers](https://en.wikipedia.org/wiki/Iris_flower_data_set)
+with the sepal and petal measurements.  This data set has become
+one of the canonical introductions to machine learning classification problems.
+(The [MNIST database](https://en.wikipedia.org/wiki/MNIST_database),
+which contains handwritten digits, is another popular classification
+problem.) The first 5 entries of the Iris data set
+look as follows:
+
+| Sepal length | sepal width | petal length | petal width | species
+| ---          | ---         | ---          | ---         | ---
+|6.4           | 2.8         | 5.6          | 2.2         | 2
+|5.0           | 2.3         | 3.3          | 1.0         | 1
+|4.9           | 2.5         | 4.5          | 1.7         | 2
+|4.9           | 3.1         | 1.5          | 0.1         | 0
+|5.7           | 3.8         | 1.7          | 0.3         | 0
+
+Let's introduce some terms:
+
+*   The last column (species) is called the
+    [**label**](https://developers.google.com/machine-learning/glossary/#label);
+    the first four columns are called
+    [**features**](https://developers.google.com/machine-learning/glossary/#feature).
+    Features are characteristics of an example, while the label is
+    the thing we're trying to predict.
+
+*   An [**example**](https://developers.google.com/machine-learning/glossary/#example)
+    consists of the set of features and the label for one sample
+    flower. The preceding table shows 5 examples from a data set of
+    120 examples.
+
+Each label is naturally a string (for example, "setosa"), but machine learning
+typically relies on numeric values. Therefore, someone mapped each string to
+a number.  Here's the representation scheme:
+
+* 0 represents setosa
+* 1 represents versicolor
+* 2 represents virginica
+
+
+## Models and training
+
+A **model** is the relationship between features
+and the label.  For the Iris problem, the model defines the relationship
+between the sepal and petal measurements and the Iris species.
+Some simple models can be described with a few lines of algebra;
+more complex machine learning models
+contain such a large number of interlacing mathematical functions and
+parameters that they become hard to summarize mathematically.
+
+Could you determine the relationship between the four features and the
+Iris species *without* using machine learning?  That is, could you use
+traditional programming techniques (for example, a lot of conditional
+statements) to create a model?  Maybe. You could play with the data set
+long enough to determine the right relationships of petal and sepal
+measurements to particular species.  However, a good machine learning
+approach *determines the model for you*.  That is, if you feed enough
+representative examples into the right machine learning model type, the program
+will determine the relationship between sepals, petals, and species.
+
+**Training** is the stage of machine learning in which the model is
+gradually optimized (learned).  The Iris problem is an example
+of [**supervised machine
+learning**](https://developers.google.com/machine-learning/glossary/#supervised_machine_learning)
+in which a model is trained from examples that contain labels.  (In
+[**unsupervised machine
+learning**](https://developers.google.com/machine-learning/glossary/#unsupervised_machine_learning),
+the examples don't contain labels. Instead, the model typically finds
+patterns among the features.)
+
+
+
+
+## Get the sample program
+
+Prior to playing with the sample code in this document, do the following:
+
+1.  @{$install$Install TensorFlow}.
+2.  If you installed TensorFlow with virtualenv or Anaconda, activate your
+    TensorFlow environment.
+3.  Install or upgrade pandas by issuing the following command:
+
+     `pip install pandas`
+
+
+Take the following steps to get the sample program:
+
+1. Clone the TensorFlow Models repository from github by entering the following
+   command:
+
+       `git clone https://github.com/tensorflow/models`
+
+2. Change directory within that branch to the location containing the examples
+   used in this document:
+
+       `cd models/samples/core/get_started/`
+
+In that `get_started` directory, you'll find a program
+named `premade_estimator.py`.
+
+
+## Run the sample program
+
+You run TensorFlow programs as you would run any Python program. Therefore,
+issue the following command from a command line to
+run `premade_estimators.py`:
+
+``` bash
+python premade_estimator.py
+```
+
+Running the program should output a whole bunch of information ending with
+three prediction lines like the following:
+
+```None
+...
+Prediction is "Setosa" (99.6%), expected "Setosa"
+
+Prediction is "Versicolor" (99.8%), expected "Versicolor"
+
+Prediction is "Virginica" (97.9%), expected "Virginica"
+```
+
+If the program generates errors instead of predictions, ask yourself the
+following questions:
+
+* Did you install TensorFlow properly?
+* Are you using the correct version of TensorFlow?  The `premade_estimators.py`
+  program requires at least TensorFlow v1.4.
+* If you installed TensorFlow with virtualenv or Anaconda, did you activate
+  the environment?
+
+
+
+## The TensorFlow programming stack
+
+As the following illustration shows, TensorFlow
+provides a programming stack consisting of multiple API layers:
+
+<div style="margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/tensorflow_programming_environment.png">
+</div>
+**The TensorFlow Programming Environment.**
+<p>&nbsp;</p>
+
+As you start writing TensorFlow programs, we strongly recommend focusing on
+the following two high-level APIs:
+
+*   Estimators
+*   Datasets
+
+Although we'll grab an occasional convenience function from other APIs,
+this document focuses on the preceding two APIs.
+
+
+## The program itself
+
+Thanks for your patience; let's dig into the code.
+The general outline of `premade_estimator.py`--and many other TensorFlow
+programs--is as follows:
+
+*   Import and parse the data sets.
+*   Create feature columns to describe the data.
+*   Select the type of model
+*   Train the model.
+*   Evaluate the model's effectiveness.
+*   Let the trained model make predictions.
+
+The following subsections detail each part.
+
+
+### Import and parse the data sets
+
+The Iris program requires the data from the following two .csv files:
+
+*   `http://download.tensorflow.org/data/iris_training.csv`, which contains
+    the training set.
+*   `http://download.tensorflow.org/data/iris_test.csv`, which contains the
+    the test set.
+
+The **training set** contains the examples that we'll use to train the model;
+the **test set** contains the examples that we'll use to evaluate the trained
+model's effectiveness.
+
+The training set and test set started out as a
+single data set.  Then, someone split the examples, with the majority going into
+the training set and the remainder going into the test set.  Adding
+examples to the training set usually builds a better model; however, adding
+more examples to the test set enables us to better gauge the model's
+effectiveness. Regardless of the split, the examples in the test set
+must be separate from the examples in the training set.  Otherwise, you can't
+accurately determine the model's effectiveness.
+
+The `premade_estimators.py` program relies on the `load_data` function
+in the adjacent [`iris_data.py`](
+https://github.com/tensorflow/models/blob/master/samples/core/get_started/iris_data.py)
+file to read in and parse the training set and test set.
+Here is a heavily commented version of the function:
+
+```python
+TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
+TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"
+
+CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth',
+                    'PetalLength', 'PetalWidth', 'Species']
+
+...
+
+def load_data(label_name='Species'):
+    """Parses the csv file in TRAIN_URL and TEST_URL."""
+
+    # Create a local copy of the training set.
+    train_path = tf.keras.utils.get_file(fname=TRAIN_URL.split('/')[-1],
+                                         origin=TRAIN_URL)
+    # train_path now holds the pathname: ~/.keras/datasets/iris_training.csv
+
+    # Parse the local CSV file.
+    train = pd.read_csv(filepath_or_buffer=train_path,
+                        names=CSV_COLUMN_NAMES,  # list of column names
+                        header=0  # ignore the first row of the CSV file.
+                       )
+    # train now holds a pandas DataFrame, which is data structure
+    # analogous to a table.
+
+    # 1. Assign the DataFrame's labels (the right-most column) to train_label.
+    # 2. Delete (pop) the labels from the DataFrame.
+    # 3. Assign the remainder of the DataFrame to train_features
+    train_features, train_label = train, train.pop(label_name)
+
+    # Apply the preceding logic to the test set.
+    test_path = tf.keras.utils.get_file(TEST_URL.split('/')[-1], TEST_URL)
+    test = pd.read_csv(test_path, names=CSV_COLUMN_NAMES, header=0)
+    test_features, test_label = test, test.pop(label_name)
+
+    # Return four DataFrames.
+    return (train_features, train_label), (test_features, test_label)
+```
+
+Keras is an open-sourced machine learning library; `tf.keras` is a TensorFlow
+implementation of Keras.  The `premade_estimator.py` program only accesses
+one `tf.keras` function; namely, the `tf.keras.utils.get_file` convenience
+function, which copies a remote CSV file to a local file system.
+
+The call to `load_data` returns two `(feature,label)` pairs, for the training
+and test sets respectively:
+
+```python
+    # Call load_data() to parse the CSV file.
+    (train_feature, train_label), (test_feature, test_label) = load_data()
+```
+
+Pandas is an open-source Python library leveraged by several
+TensorFlow functions.  A pandas
+[**DataFrame**](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html)
+is a table with named columns headers and numbered rows.
+The features returned by `load_data` are packed in `DataFrames`.
+For example, the `test_feature` DataFrame looks as follows:
+
+```none
+    SepalLength  SepalWidth  PetalLength  PetalWidth
+0           5.9         3.0          4.2         1.5
+1           6.9         3.1          5.4         2.1
+2           5.1         3.3          1.7         0.5
+...
+27          6.7         3.1          4.7         1.5
+28          6.7         3.3          5.7         2.5
+29          6.4         2.9          4.3         1.3
+```
+
+
+### Describe the data
+
+A **feature column** is a data structure that tells your model
+how to interpret the data in each feature.  In the Iris problem,
+we want the model to interpret the data in each
+feature as its literal floating-point value; that is, we want the
+model to interpret an input value like 5.4 as, well, 5.4.  However,
+in other machine learning problems, it is often desirable to interpret
+data less literally.  Using feature columns to
+interpret data is such a rich topic that we devote an entire
+@{$feature_columns$document} to it.
+
+From a code perspective, you build a list of `feature_column` objects by calling
+functions from the @{tf.feature_column} module. Each object describes an input
+to the model. To tell the model to interpret data as a floating-point value,
+call @{tf.feature_column.numeric_column).  In `premade_estimator.py`, all
+four features should be interpreted as literal floating-point values, so
+the code to create a feature column looks as follows:
+
+```python
+# Create feature columns for all features.
+my_feature_columns = []
+for key in train_x.keys():
+    my_feature_columns.append(tf.feature_column.numeric_column(key=key))
+```
+
+Here is a less elegant, but possibly clearer, alternative way to
+encode the preceding block:
+
+```python
+my_feature_columns = [
+    tf.feature_column.numeric_column(key='SepalLength'),
+    tf.feature_column.numeric_column(key='SepalWidth'),
+    tf.feature_column.numeric_column(key='PetalLength'),
+    tf.feature_column.numeric_column(key='PetalWidth')
+]
+```
+
+
+### Select the type of model
+
+We need the select the kind of model that will be trained.
+Lots of model types exist; picking the ideal type takes experience.
+We've selected a neural network to solve the Iris problem.  [**Neural
+networks**](https://developers.google.com/machine-learning/glossary/#neural_network)
+can find complex relationships between features and the label.
+A neural network is a highly-structured graph, organized into one or more
+[**hidden layers**](https://developers.google.com/machine-learning/glossary/#hidden_layer).
+Each hidden layer consists of one or more
+[**neurons**](https://developers.google.com/machine-learning/glossary/#neuron).
+There are several categories of neural networks.
+We'll be using a [**fully connected neural
+network**](https://developers.google.com/machine-learning/glossary/#fully_connected_layer),
+which means that the neurons in one layer take inputs from *every* neuron in
+the previous layer.  For example, the following figure illustrates a 
+fully connected neural network consisting of three hidden layers:
+
+*   The first hidden layer contains four neurons.
+*   The second hidden layer contains three neurons.
+*   The third hidden layer contains two neurons.
+
+<div style="margin:auto; margin-bottom:10px; margin-top:20px;">
+<img style="width:100%" src="../images/simple_dnn.svg">
+</div>
+**A neural network with three hidden layers.**
+<p>&nbsp;</p>
+
+To specify a model type, instantiate an
+[**Estimator**](https://developers.google.com/machine-learning/glossary/#Estimators)
+class.  TensorFlow provides two categories of Estimators:
+
+*   [**pre-made
+    Estimators**](https://developers.google.com/machine-learning/glossary/#pre-made_Estimator),
+    which someone else has already written for you.
+*   [**custom
+    Estimators**](https://developers.google.com/machine-learning/glossary/#custom_estimator),
+    which you must code yourself, at least partially.
+
+To implement a neural network, the `premade_estimators.py` program uses
+a pre-made Estimator named @{tf.estimator.DNNClassifier}.  This Estimator
+builds a neural network that classifies examples.  The following call
+instantiates `DNNClassifier`:
+
+```python
+    classifier = tf.estimator.DNNClassifier(
+        feature_columns=my_feature_columns,
+        hidden_units=[10, 10],
+        n_classes=3)
+```
+
+Use the `hidden_units` parameter to define the number of neurons
+in each hidden layer of the neural network.  Assign this parameter
+a list. For example:
+
+```python
+        hidden_units=[10, 10],
+```
+
+The length of the list assigned to `hidden_units` identifies the number of
+hidden layers (2, in this case).
+Each value in the list represents the number of neurons in a particular
+hidden layer (10 in the first hidden layer and 10 in the second hidden layer).
+To change the number of hidden layers or neurons, simply assign a different
+list to the `hidden_units` parameter.
+
+The ideal number of hidden layers and neurons depends on the problem
+and the data set. Like many aspects of machine learning,
+picking the ideal shape of the neural network requires some mixture
+of knowledge and experimentation.
+As a rule of thumb, increasing the number of hidden layers and neurons
+*typically* creates a more powerful model, which requires more data to
+train effectively.
+
+The `n_classes` parameter specifies the number of possible values that the
+neural network can predict.  Since the Iris problem classifies 3 Iris species,
+we set `n_classes` to 3.
+
+The constructor for `tf.Estimator.DNNClassifier` takes an optional argument
+named `optimizer`, which our sample code chose not to specify.  The
+[**optimizer**](https://developers.google.com/machine-learning/glossary/#optimizer)
+controls how the model will train.  As you develop more expertise in machine
+learning, optimizers and
+[**learning
+rate**](https://developers.google.com/machine-learning/glossary/#learning_rate)
+will become very important.
+
+
+
+### Train the model
+
+Instantiating a `tf.Estimator.DNNClassifier` creates a framework for learning 
+the model. Basically, we've wired a network but haven't yet let data flow 
+through it. To train the neural network, call the Estimator object's `train` 
+method. For example:
+
+```python
+    classifier.train(
+        input_fn=lambda:train_input_fn(train_feature, train_label, args.batch_size),
+        steps=args.train_steps)
+```
+
+The `steps` argument tells `train` to stop training after the specified
+number of iterations.  Increasing `steps` increases the amount of time
+the model will train.  Counter-intuitively, training a model longer
+does not guarantee a better model.  The default value of `args.train_steps`
+is 1000.  The number of steps to train is a
+[**hyperparameter**](https://developers.google.com/machine-learning/glossary/#hyperparameter)
+you can tune. Choosing the right number of steps usually
+requires both experience and experimentation.
+
+The `input_fn` parameter identifies the function that supplies the
+training data.  The call to the `train` method indicates that the
+`train_input_fn` function will supply the training data.  Here's that
+method's signature:
+
+```python
+def train_input_fn(features, labels, batch_size):
+```
+
+We're passing the following arguments to `train_input_fn`:
+
+* `train_feature` is a Python dictionary in which:
+    * Each key is the name of a feature.
+    * Each value is an array containing the values for each example in the
+      training set.
+* `train_label` is an array containing the values of the label for every
+  example in the training set.
+* `args.batch_size` is an integer defining the [**batch
+  size**](https://developers.google.com/machine-learning/glossary/#batch_size).
+
+The `train_input_fn` function relies on the **Dataset API**. This is a
+high-level TensorFlow API for reading data and transforming it into a form
+that the `train` method requires.  The following call converts the
+input features and labels into a `tf.data.Dataset` object, which is the base
+class of the Dataset API:
+
+```python
+    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
+```
+
+The `tf.dataset` class provides many useful functions for preparing examples
+for training. The following line calls three of those functions:
+
+```python
+    dataset = dataset.shuffle(buffer_size=1000).repeat(count=None).batch(batch_size)
+```
+
+Training works best if the training examples are in
+random order.  To randomize the examples, call
+`tf.data.Dataset.shuffle`.  Setting the `buffer_size` to a value
+larger than the number of examples (120) ensures that the data will
+be well shuffled.
+
+During training, the `train` method typically processes the
+examples multiple times.  Calling the
+`tf.data.Dataset.repeat` method without any arguments ensures
+that the `train` method has an infinite supply of (now shuffled)
+training set examples.
+
+The `train` method processes a
+[**batch**](https://developers.google.com/machine-learning/glossary/#batch)
+of examples at a time.
+The `tf.data.Dataset.batch` method creates a batch by
+concatenating multiple examples.
+This program sets the default [**batch
+size**](https://developers.google.com/machine-learning/glossary/#batch_size)
+to 100, meaning that the `batch` method will concatenate groups of
+100 examples.  The ideal batch size depends on the problem.  As a rule
+of thumb, smaller batch sizes usually enable the `train` method to train
+the model faster at the expense (sometimes) of accuracy.
+
+The following `return` statement passes a batch of examples back to
+the caller (the `train` method).
+
+```python
+   return dataset.make_one_shot_iterator().get_next()
+```
+
+
+### Evaluate the model
+
+**Evaluating** means determining how effectively the model makes
+predictions.  To determine the Iris classification model's effectiveness,
+pass some sepal and petal measurements to the model and ask the model
+to predict what Iris species they represent. Then compare the model's
+prediction against the actual label.  For example, a model that picked
+the correct species on half the input examples would have an
+[accuracy](https://developers.google.com/machine-learning/glossary/#accuracy)
+of 0.5.  The following suggests a more effective model:
+
+
+<table>
+  <tr>
+    <th style="background-color:darkblue" colspan="5">
+       Test Set</th>
+  </tr>
+  <tr>
+    <th colspan="4">Features</th>
+    <th colspan="1">Label</th>
+    <th colspan="1">Prediction</th>
+  </tr>
+  <tr> <td>5.9</td> <td>3.0</td> <td>4.3</td> <td>1.5</td> <td>1</td> 
+          <td style="background-color:green">1</td></tr>
+  <tr> <td>6.9</td> <td>3.1</td> <td>5.4</td> <td>2.1</td> <td>2</td> 
+          <td style="background-color:green">2</td></tr>
+  <tr> <td>5.1</td> <td>3.3</td> <td>1.7</td> <td>0.5</td> <td>0</td> 
+          <td style="background-color:green">0</td></tr>
+  <tr> <td>6.0</td> <td>3.4</td> <td>4.5</td> <td>1.6</td> <td>1</td> 
+          <td style="background-color:red">2</td></tr>
+  <tr> <td>5.5</td> <td>2.5</td> <td>4.0</td> <td>1.3</td> <td>1</td> 
+          <td style="background-color:green">1</td></tr>
+</table>
+**A model that is 80% accurate.**
+<p>&nbsp;</p>
+
+To evaluate a model's effectiveness, each Estimator provides an `evaluate`
+method.  The `premade_estimator.py` program calls `evaluate` as follows:
+
+```python
+# Evaluate the model.
+eval_result = classifier.evaluate(
+    input_fn=lambda:eval_input_fn(test_x, test_y, args.batch_size))
+
+print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))
+```
+
+The call to `classifier.evaluate` is similar to the call to `classifier.train`.
+The biggest difference is that `classifier.evaluate` must get its examples
+from the test set rather than the training set.  In other words, to
+fairly assess a model's effectiveness, the examples used to
+*evaluate* a model must be different from the examples used to *train*
+the model.  The `eval_input_fn` function serves a batch of examples from
+the test set.  Here's the `eval_input_fn` method:
+
+```python
+def eval_input_fn(features, labels=None, batch_size=None):
+    """An input function for evaluation or prediction"""
+    if labels is None:
+        # No labels, use only features.
+        inputs = features
+    else:
+        inputs = (features, labels)
+
+    # Convert inputs to a tf.dataset object.
+    dataset = tf.data.Dataset.from_tensor_slices(inputs)
+
+    # Batch the examples
+    assert batch_size is not None, "batch_size must not be None"
+    dataset = dataset.batch(batch_size)
+
+    # Return the read end of the pipeline.
+    return dataset.make_one_shot_iterator().get_next()
+```
+
+In brief, `eval_input_fn` does the following when called by
+`classifier.evaluate`:
+
+1.  Converts the features and labels from the test set to a `tf.dataset`
+    object.
+2.  Creates a batch of test set examples.  (There's no need to shuffle
+    or repeat the test set examples.)
+3.  Returns that batch of test set examples to `classifier.evaluate`.
+
+Running this code yields the following output (or something close to it):
+
+```none
+Test set accuracy: 0.967
+```
+
+An accuracy of 0.967 implies that our trained model correctly classified 29
+out of the 30 Iris species in the test set.
+
+
+### Predicting
+
+We've now trained a model and "proven" that it is good--but not
+perfect--at classifying Iris species.  Now let's use the trained
+model to make some predictions on [**unlabeled
+examples**](https://developers.google.com/machine-learning/glossary/#unlabeled_example);
+that is, on examples that contain features but not a label.
+
+In real-life, the unlabeled examples could come from lots of different
+sources including apps, CSV files, and data feeds.  For now, we're simply
+going to manually provide the following three unlabeled examples:
+
+```python
+    predict_x = {
+        'SepalLength': [5.1, 5.9, 6.9],
+        'SepalWidth': [3.3, 3.0, 3.1],
+        'PetalLength': [1.7, 4.2, 5.4],
+        'PetalWidth': [0.5, 1.5, 2.1],
+    }
+```
+
+Every Estimator provides a `predict` method, which `premade_estimator.py`
+calls as follows:
+
+```python
+predictions = classifier.predict(
+    input_fn=lambda:eval_input_fn(predict_x, batch_size=args.batch_size))
+```
+
+As with the `evaluate` method, our `predict` method also gathers examples
+from the `eval_input_fn` method.
+
+When doing predictions, we're *not* passing labels to `eval_input_fn`.
+Therefore, `eval_input_fn` does the following:
+
+1.  Converts the features from the 3-element manual set we just created.
+2.  Creates a batch of 3 examples from that manual set.
+3.  Returns that batch of examples to `classifier.predict`.
+
+The `predict` method returns a python iterable, yielding a dictionary of
+prediction results for each example.  This dictionary contains several keys.
+The `probabilities` key holds a list of three floating-point values,
+each representing the probability that the input example is a particular
+Iris species.  For example, consider the following `probabilities` list:
+
+```none
+'probabilities': array([  1.19127117e-08,   3.97069454e-02,   9.60292995e-01])
+```
+
+The preceding list indicates:
+
+*   A negligible chance of the Iris being Setosa.
+*   A 3.97% chance of the Iris being Versicolor.
+*   A 96.0% chance of the Iris being Virginica.
+
+The `class_ids` key holds a one-element array that identifies the most
+probable species.  For example:
+
+```none
+'class_ids': array([2])
+```
+
+The number `2` corresponds to Virginica.  The following code iterates
+through the returned `predictions` to report on each prediction:
+
+``` python
+for pred_dict, expec in zip(predictions, expected):
+    template = ('\nPrediction is "{}" ({:.1f}%), expected "{}"')
+
+    class_id = pred_dict['class_ids'][0]
+    probability = pred_dict['probabilities'][class_id]
+    print(template.format(SPECIES[class_id], 100 * probability, expec))
+```
+
+Running the program yields the following output:
+
+
+``` None
+...
+Prediction is "Setosa" (99.6%), expected "Setosa"
+
+Prediction is "Versicolor" (99.8%), expected "Versicolor"
+
+Prediction is "Virginica" (97.9%), expected "Virginica"
+```
+
+
+## Summary
+
+<!--TODO(barryr): When MLCC is released, add pointers to relevant sections.-->
+This document provides a short introduction to machine learning.
+
+Because `premade_estimators.py` relies on high-level APIs, much of the
+mathematical complexity in machine learning is hidden.
+If you intend to become more proficient in machine learning, we recommend
+ultimately learning more about [**gradient
+descent**](https://developers.google.com/machine-learning/glossary/#gradient_descent),
+batching, and neural networks.
+
+We recommend reading the @{$feature_columns$Feature Columns} document next,
+which explains how to represent different kinds of data in machine learning.
diff --git a/tensorflow/docs_src/get_started/index.md b/tensorflow/docs_src/get_started/index.md
index 003fac1a287688e1d1d343b1dcc834500fd20856..b7bd1286e3ce9026df49718d94cf53cf784a3be8 100644
--- a/tensorflow/docs_src/get_started/index.md
+++ b/tensorflow/docs_src/get_started/index.md
@@ -1,36 +1,35 @@
 # Getting Started
 
-For a brief overview of TensorFlow programming fundamentals, see the following
-guide:
-
-  * @{$get_started/get_started$Getting Started with TensorFlow}
-
-MNIST has become the canonical dataset for trying out a new machine learning
-toolkit.  We offer three guides that each demonstrate a different approach
-to training an MNIST model on TensorFlow:
-
-  * @{$mnist/beginners$MNIST for ML Beginners}, which introduces MNIST through
-    the high-level API.
-  * @{$mnist/pros$Deep MNIST for Experts}, which is more-in depth than
-    "MNIST for ML Beginners," and assumes some familiarity with machine
-    learning concepts.
-  * @{$mnist/mechanics$TensorFlow Mechanics 101}, which introduces MNIST through
-    the low-level API.
-
-For developers new to TensorFlow, the high-level API is a good place to start.
-To learn about the high-level API, read the following guides:
-
-  * @{$get_started/estimator$tf.estimator Quickstart}, which introduces this
-    API.
-  * @{$get_started/input_fn$Building Input Functions},
-    which takes you into a somewhat more sophisticated use of this API.
-
-TensorBoard is a utility to visualize different aspects of machine learning.
-The following guides explain how to use TensorBoard:
-
-  * @{$get_started/summaries_and_tensorboard$TensorBoard: Visualizing Learning},
-    which gets you started.
-  * @{$get_started/graph_viz$TensorBoard: Graph Visualization}, which explains
-    how to visualize the computational graph.  Graph visualization is typically
-    more useful for programmers using the low-level API.
-
+TensorFlow is a tool for machine learning. While it contains a wide range of
+functionality, TensorFlow is mainly designed for deep neural network models.
+
+TensorFlow provides many APIs. This section focuses on the high-level APIs.
+If you are new to TensorFlow, begin by reading one of the following documents:
+
+  * @{$get_started/get_started_for_beginners}, which is aimed at readers
+    new to machine learning.
+  * @{$get_started/premade_estimators}, which is aimed at readers who have
+    experience in machine learning.
+
+Then, read the following documents, which demonstrate the key features
+in the high-level APIs:
+
+  * @{$get_started/checkpoints}, which explains how to save training progress
+    and resume where you left off.
+  * @{$get_started/feature_columns}, which shows how an
+    Estimator can handle a variety of input data types without changes to the
+    model.
+  * @{$get_started/datasets_quickstart}, which introduces TensorFlow's
+    input pipelines.
+  * @{$get_started/custom_estimators}, which demonstrates how
+    to build and train models you design yourself.
+
+For more advanced users:
+
+  * The @{$low_level_intro$Low Level Introduction} demonstrates how to use
+    TensorFlow outside of the Estimator framework, for debugging and
+    experimentation.
+  * The @{$programmers_guide$Programmer's Guide} details major
+    TensorFlow components.
+  * The @{$tutorials$Tutorials} provide walkthroughs of a variety of
+    TensorFlow models.
diff --git a/tensorflow/docs_src/get_started/input_fn.md b/tensorflow/docs_src/get_started/input_fn.md
deleted file mode 100644
index 24bfdbdd2e91a6d87a5ab1ec2ba264d90ef8e148..0000000000000000000000000000000000000000
--- a/tensorflow/docs_src/get_started/input_fn.md
+++ /dev/null
@@ -1,438 +0,0 @@
-# Building Input Functions with tf.estimator
-
-This tutorial introduces you to creating input functions in tf.estimator.
-You'll get an overview of how to construct an `input_fn` to preprocess and feed
-data into your models. Then, you'll implement an `input_fn` that feeds training,
-evaluation, and prediction data into a neural network regressor for predicting
-median house values.
-
-## Custom Input Pipelines with input_fn
-
-The `input_fn` is used to pass feature and target data to the `train`,
-`evaluate`, and `predict` methods of the `Estimator`.
-The user can do feature engineering or pre-processing inside the `input_fn`.
-Here's an example taken from the @{$get_started/estimator$tf.estimator Quickstart tutorial}:
-
-```python
-import numpy as np
-
-training_set = tf.contrib.learn.datasets.base.load_csv_with_header(
-    filename=IRIS_TRAINING, target_dtype=np.int, features_dtype=np.float32)
-
-train_input_fn = tf.estimator.inputs.numpy_input_fn(
-    x={"x": np.array(training_set.data)},
-    y=np.array(training_set.target),
-    num_epochs=None,
-    shuffle=True)
-
-classifier.train(input_fn=train_input_fn, steps=2000)
-```
-
-### Anatomy of an input_fn
-
-The following code illustrates the basic skeleton for an input function:
-
-```python
-def my_input_fn():
-
-    # Preprocess your data here...
-
-    # ...then return 1) a mapping of feature columns to Tensors with
-    # the corresponding feature data, and 2) a Tensor containing labels
-    return feature_cols, labels
-```
-
-The body of the input function contains the specific logic for preprocessing
-your input data, such as scrubbing out bad examples or
-[feature scaling](https://en.wikipedia.org/wiki/Feature_scaling).
-
-Input functions must return the following two values containing the final
-feature and label data to be fed into your model (as shown in the above code
-skeleton):
-
-<dl>
-  <dt><code>feature_cols</code></dt>
-  <dd>A dict containing key/value pairs that map feature column
-names to <code>Tensor</code>s (or <code>SparseTensor</code>s) containing the corresponding feature
-data.</dd>
-  <dt><code>labels</code></dt>
-  <dd>A <code>Tensor</code> containing your label (target) values: the values your model aims to predict.</dd>
-</dl>
-
-### Converting Feature Data to Tensors
-
-If your feature/label data is a python array or stored in
-[_pandas_](http://pandas.pydata.org/) dataframes or
-[numpy](http://www.numpy.org/) arrays, you can use the following methods to
-construct `input_fn`:
-
-```python
-import numpy as np
-# numpy input_fn.
-my_input_fn = tf.estimator.inputs.numpy_input_fn(
-    x={"x": np.array(x_data)},
-    y=np.array(y_data),
-    ...)
-```
-
-```python
-import pandas as pd
-# pandas input_fn.
-my_input_fn = tf.estimator.inputs.pandas_input_fn(
-    x=pd.DataFrame({"x": x_data}),
-    y=pd.Series(y_data),
-    ...)
-```
-
-For [sparse, categorical data](https://en.wikipedia.org/wiki/Sparse_matrix)
-(data where the majority of values are 0), you'll instead want to populate a
-`SparseTensor`, which is instantiated with three arguments:
-
-<dl>
-  <dt><code>dense_shape</code></dt>
-  <dd>The shape of the tensor. Takes a list indicating the number of elements in each dimension. For example, <code>dense_shape=[3,6]</code> specifies a two-dimensional 3x6 tensor, <code>dense_shape=[2,3,4]</code> specifies a three-dimensional 2x3x4 tensor, and <code>dense_shape=[9]</code> specifies a one-dimensional tensor with 9 elements.</dd>
-  <dt><code>indices</code></dt>
-  <dd>The indices of the elements in your tensor that contain nonzero values. Takes a list of terms, where each term is itself a list containing the index of a nonzero element. (Elements are zero-indexed—i.e., [0,0] is the index value for the element in the first column of the first row in a two-dimensional tensor.) For example, <code>indices=[[1,3], [2,4]]</code> specifies that the elements with indexes of [1,3] and [2,4] have nonzero values.</dd>
-  <dt><code>values</code></dt>
-  <dd>A one-dimensional tensor of values. Term <code>i</code> in <code>values</code> corresponds to term <code>i</code> in <code>indices</code> and specifies its value. For example, given <code>indices=[[1,3], [2,4]]</code>, the parameter <code>values=[18, 3.6]</code> specifies that element [1,3] of the tensor has a value of 18, and element [2,4] of the tensor has a value of 3.6.</dd>
-</dl>
-
-The following code defines a two-dimensional `SparseTensor` with 3 rows and 5
-columns. The element with index [0,1] has a value of 6, and the element with
-index [2,4] has a value of 0.5 (all other values are 0):
-
-```python
-sparse_tensor = tf.SparseTensor(indices=[[0,1], [2,4]],
-                                values=[6, 0.5],
-                                dense_shape=[3, 5])
-```
-
-This corresponds to the following dense tensor:
-
-```none
-[[0, 6, 0, 0, 0]
- [0, 0, 0, 0, 0]
- [0, 0, 0, 0, 0.5]]
-```
-
-For more on `SparseTensor`, see @{tf.SparseTensor}.
-
-### Passing input_fn Data to Your Model
-
-To feed data to your model for training, you simply pass the input function
-you've created to your `train` operation as the value of the `input_fn`
-parameter, e.g.:
-
-```python
-classifier.train(input_fn=my_input_fn, steps=2000)
-```
-
-Note that the `input_fn` parameter must receive a function object (i.e.,
-`input_fn=my_input_fn`), not the return value of a function call
-(`input_fn=my_input_fn()`). This means that if you try to pass parameters to the
-`input_fn` in your `train` call, as in the following code, it will result in a
-`TypeError`:
-
-```python
-classifier.train(input_fn=my_input_fn(training_set), steps=2000)
-```
-
-However, if you'd like to be able to parameterize your input function, there are
-other methods for doing so. You can employ a wrapper function that takes no
-arguments as your `input_fn` and use it to invoke your input function
-with the desired parameters. For example:
-
-```python
-def my_input_fn(data_set):
-  ...
-
-def my_input_fn_training_set():
-  return my_input_fn(training_set)
-
-classifier.train(input_fn=my_input_fn_training_set, steps=2000)
-```
-
-Alternatively, you can use Python's [`functools.partial`](https://docs.python.org/2/library/functools.html#functools.partial)
-function to construct a new function object with all parameter values fixed:
-
-```python
-classifier.train(
-    input_fn=functools.partial(my_input_fn, data_set=training_set),
-    steps=2000)
-```
-
-A third option is to wrap your `input_fn` invocation in a
-[`lambda`](https://docs.python.org/3/tutorial/controlflow.html#lambda-expressions)
-and pass it to the `input_fn` parameter:
-
-```python
-classifier.train(input_fn=lambda: my_input_fn(training_set), steps=2000)
-```
-
-One big advantage of designing your input pipeline as shown above—to accept a
-parameter for data set—is that you can pass the same `input_fn` to `evaluate`
-and `predict` operations by just changing the data set argument, e.g.:
-
-```python
-classifier.evaluate(input_fn=lambda: my_input_fn(test_set), steps=2000)
-```
-
-This approach enhances code maintainability: no need to define multiple
-`input_fn` (e.g. `input_fn_train`, `input_fn_test`, `input_fn_predict`) for each
-type of operation.
-
-Finally, you can use the methods in `tf.estimator.inputs` to create `input_fn`
-from numpy or pandas data sets. The additional benefit is that you can use
-more arguments, such as `num_epochs` and `shuffle` to control how the `input_fn`
-iterates over the data:
-
-```python
-import pandas as pd
-
-def get_input_fn_from_pandas(data_set, num_epochs=None, shuffle=True):
-  return tf.estimator.inputs.pandas_input_fn(
-      x=pd.DataFrame(...),
-      y=pd.Series(...),
-      num_epochs=num_epochs,
-      shuffle=shuffle)
-```
-
-```python
-import numpy as np
-
-def get_input_fn_from_numpy(data_set, num_epochs=None, shuffle=True):
-  return tf.estimator.inputs.numpy_input_fn(
-      x={...},
-      y=np.array(...),
-      num_epochs=num_epochs,
-      shuffle=shuffle)
-```
-
-### A Neural Network Model for Boston House Values
-
-In the remainder of this tutorial, you'll write an input function for
-preprocessing a subset of Boston housing data pulled from the UCI Housing Data
-Set and use it to feed data to
-a neural network regressor for predicting median house values.
-
-The [Boston CSV data sets](#setup) you'll use to train your neural network
-contain the following
-[feature data](https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.names)
-for Boston suburbs:
-
-Feature | Description
-------- | ---------------------------------------------------------------
-CRIM    | Crime rate per capita
-ZN      | Fraction of residential land zoned to permit 25,000+ sq ft lots
-INDUS   | Fraction of land that is non-retail business
-NOX     | Concentration of nitric oxides in parts per 10 million
-RM      | Average Rooms per dwelling
-AGE     | Fraction of owner-occupied residences built before 1940
-DIS     | Distance to Boston-area employment centers
-TAX     | Property tax rate per $10,000
-PTRATIO | Student-teacher ratio
-
-And the label your model will predict is MEDV, the median value of
-owner-occupied residences in thousands of dollars.
-
-## Setup {#setup}
-
-Download the following data sets:
-[boston_train.csv](http://download.tensorflow.org/data/boston_train.csv),
-[boston_test.csv](http://download.tensorflow.org/data/boston_test.csv), and
-[boston_predict.csv](http://download.tensorflow.org/data/boston_predict.csv).
-
-The following sections provide a step-by-step walkthrough of how to create an
-input function, feed these data sets into a neural network regressor, train and
-evaluate the model, and make house value predictions. The full, final code is [available
-here](https://www.tensorflow.org/code/tensorflow/examples/tutorials/input_fn/boston.py).
-
-### Importing the Housing Data
-
-To start, set up your imports (including `pandas` and `tensorflow`) and set logging verbosity to
-`INFO` for more detailed log output:
-
-```python
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import itertools
-
-import pandas as pd
-import tensorflow as tf
-
-tf.logging.set_verbosity(tf.logging.INFO)
-```
-
-Define the column names for the data set in `COLUMNS`. To distinguish features
-from the label, also define `FEATURES` and `LABEL`. Then read the three CSVs
-([train](http://download.tensorflow.org/data/boston_train.csv),
-[test](http://download.tensorflow.org/data/boston_test.csv), and
-[predict](http://download.tensorflow.org/data/boston_predict.csv)) into _pandas_
-`DataFrame`s:
-
-```python
-COLUMNS = ["crim", "zn", "indus", "nox", "rm", "age",
-           "dis", "tax", "ptratio", "medv"]
-FEATURES = ["crim", "zn", "indus", "nox", "rm",
-            "age", "dis", "tax", "ptratio"]
-LABEL = "medv"
-
-training_set = pd.read_csv("boston_train.csv", skipinitialspace=True,
-                           skiprows=1, names=COLUMNS)
-test_set = pd.read_csv("boston_test.csv", skipinitialspace=True,
-                       skiprows=1, names=COLUMNS)
-prediction_set = pd.read_csv("boston_predict.csv", skipinitialspace=True,
-                             skiprows=1, names=COLUMNS)
-```
-
-### Defining FeatureColumns and Creating the Regressor
-
-Next, create a list of `FeatureColumn`s for the input data, which formally
-specify the set of features to use for training. Because all features in the
-housing data set contain continuous values, you can create their
-`FeatureColumn`s using the `tf.feature_column.numeric_column()` function:
-
-```python
-feature_cols = [tf.feature_column.numeric_column(k) for k in FEATURES]
-```
-
-NOTE: For a more in-depth overview of feature columns, see
-@{$linear#feature-columns-and-transformations$this introduction},
-and for an example that illustrates how to define `FeatureColumns` for
-categorical data, see the @{$wide$Linear Model Tutorial}.
-
-Now, instantiate a `DNNRegressor` for the neural network regression model.
-You'll need to provide two arguments here: `hidden_units`, a hyperparameter
-specifying the number of nodes in each hidden layer (here, two hidden layers
-with 10 nodes each), and `feature_columns`, containing the list of
-`FeatureColumns` you just defined:
-
-```python
-regressor = tf.estimator.DNNRegressor(feature_columns=feature_cols,
-                                      hidden_units=[10, 10],
-                                      model_dir="/tmp/boston_model")
-```
-
-### Building the input_fn
-
-To pass input data into the `regressor`, write a factory method that accepts a
-_pandas_ `Dataframe` and returns an `input_fn`:
-
-```python
-def get_input_fn(data_set, num_epochs=None, shuffle=True):
-  return tf.estimator.inputs.pandas_input_fn(
-      x=pd.DataFrame({k: data_set[k].values for k in FEATURES}),
-      y = pd.Series(data_set[LABEL].values),
-      num_epochs=num_epochs,
-      shuffle=shuffle)
-```
-
-Note that the input data is passed into `input_fn` in the `data_set` argument,
-which means the function can process any of the `DataFrame`s you've imported:
-`training_set`, `test_set`, and `prediction_set`.
-
-Two additional arguments are provided:
-* `num_epochs`: controls the number of
-  epochs to iterate over data. For training, set this to `None`, so the
-  `input_fn` keeps returning data until the required number of train steps is
-  reached. For evaluate and predict, set this to 1, so the `input_fn` will
-  iterate over the data once and then raise `OutOfRangeError`. That error will
-  signal the `Estimator` to stop evaluate or predict.
-* `shuffle`: Whether to shuffle the data. For evaluate and predict, set this to
-  `False`, so the `input_fn` iterates over the data sequentially. For train,
-  set this to `True`.
-
-### Training the Regressor
-
-To train the neural network regressor, run `train` with the `training_set`
-passed to the `input_fn` as follows:
-
-```python
-regressor.train(input_fn=get_input_fn(training_set), steps=5000)
-```
-
-You should see log output similar to the following, which reports training loss
-for every 100 steps:
-
-```none
-INFO:tensorflow:Step 1: loss = 483.179
-INFO:tensorflow:Step 101: loss = 81.2072
-INFO:tensorflow:Step 201: loss = 72.4354
-...
-INFO:tensorflow:Step 1801: loss = 33.4454
-INFO:tensorflow:Step 1901: loss = 32.3397
-INFO:tensorflow:Step 2001: loss = 32.0053
-INFO:tensorflow:Step 4801: loss = 27.2791
-INFO:tensorflow:Step 4901: loss = 27.2251
-INFO:tensorflow:Saving checkpoints for 5000 into /tmp/boston_model/model.ckpt.
-INFO:tensorflow:Loss for final step: 27.1674.
-```
-
-### Evaluating the Model
-
-Next, see how the trained model performs against the test data set. Run
-`evaluate`, and this time pass the `test_set` to the `input_fn`:
-
-```python
-ev = regressor.evaluate(
-    input_fn=get_input_fn(test_set, num_epochs=1, shuffle=False))
-```
-
-Retrieve the loss from the `ev` results and print it to output:
-
-```python
-loss_score = ev["loss"]
-print("Loss: {0:f}".format(loss_score))
-```
-
-You should see results similar to the following:
-
-```none
-INFO:tensorflow:Eval steps [0,1) for training step 5000.
-INFO:tensorflow:Saving evaluation summary for 5000 step: loss = 11.9221
-Loss: 11.922098
-```
-
-### Making Predictions
-
-Finally, you can use the model to predict median house values for the
-`prediction_set`, which contains feature data but no labels for six examples:
-
-```python
-y = regressor.predict(
-    input_fn=get_input_fn(prediction_set, num_epochs=1, shuffle=False))
-# .predict() returns an iterator of dicts; convert to a list and print
-# predictions
-predictions = list(p["predictions"] for p in itertools.islice(y, 6))
-print("Predictions: {}".format(str(predictions)))
-```
-
-Your results should contain six house-value predictions in thousands of dollars,
-e.g:
-
-```none
-Predictions: [ 33.30348587  17.04452896  22.56370163  34.74345398  14.55953979
-  19.58005714]
-```
-
-## Additional Resources
-
-This tutorial focused on creating an `input_fn` for a neural network regressor.
-To learn more about using `input_fn`s for other types of models, check out the
-following resources:
-
-*   @{$linear$Large-scale Linear Models with TensorFlow}: This
-    introduction to linear models in TensorFlow provides a high-level overview
-    of feature columns and techniques for transforming input data.
-
-*   @{$wide$TensorFlow Linear Model Tutorial}: This tutorial covers
-    creating `FeatureColumn`s and an `input_fn` for a linear classification
-    model that predicts income range based on census data.
-
-*   @{$wide_and_deep$TensorFlow Wide & Deep Learning Tutorial}: Building on
-    the @{$wide$Linear Model Tutorial}, this tutorial covers
-    `FeatureColumn` and `input_fn` creation for a "wide and deep" model that
-    combines a linear model and a neural network using
-    `DNNLinearCombinedClassifier`.
diff --git a/tensorflow/docs_src/get_started/leftnav_files b/tensorflow/docs_src/get_started/leftnav_files
index bb67eaddda369c0271c4fdb17a686016ffa80a2e..437791d6a32db3e43415e381a034424ae8225f6f 100644
--- a/tensorflow/docs_src/get_started/leftnav_files
+++ b/tensorflow/docs_src/get_started/leftnav_files
@@ -1,10 +1,11 @@
 index.md
-get_started.md
-mnist/beginners.md
-mnist/pros.md
-mnist/mechanics.md
-estimator.md
-input_fn.md
-summaries_and_tensorboard.md
-graph_viz.md
-tensorboard_histograms.md
+
+### Getting Started
+get_started_for_beginners.md
+premade_estimators.md
+
+### Details
+checkpoints.md
+feature_columns.md
+datasets_quickstart.md
+custom_estimators.md
diff --git a/tensorflow/docs_src/get_started/mnist/beginners.md b/tensorflow/docs_src/get_started/mnist/beginners.md
deleted file mode 100644
index 38c467ddc32c9ca21432cc7fe74a594446804293..0000000000000000000000000000000000000000
--- a/tensorflow/docs_src/get_started/mnist/beginners.md
+++ /dev/null
@@ -1,455 +0,0 @@
-# MNIST For ML Beginners
-
-*This tutorial is intended for readers who are new to both machine learning and
-TensorFlow. If you already know what MNIST is, and what softmax (multinomial
-logistic) regression is, you might prefer this
-@{$pros$faster paced tutorial}.  Be sure to
-@{$install$install TensorFlow} before starting either
-tutorial.*
-
-When one learns how to program, there's a tradition that the first thing you do
-is print "Hello World." Just like programming has Hello World, machine learning
-has MNIST.
-
-MNIST is a simple computer vision dataset. It consists of images of handwritten
-digits like these:
-
-<div style="width:40%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="https://www.tensorflow.org/images/MNIST.png">
-</div>
-
-It also includes labels for each image, telling us which digit it is. For
-example, the labels for the above images are 5, 0, 4, and 1.
-
-In this tutorial, we're going to train a model to look at images and predict
-what digits they are. Our goal isn't to train a really elaborate model that
-achieves state-of-the-art performance -- although we'll give you code to do that
-later! -- but rather to dip a toe into using TensorFlow. As such, we're going
-to start with a very simple model, called a Softmax Regression.
-
-The actual code for this tutorial is very short, and all the interesting
-stuff happens in just three lines. However, it is very
-important to understand the ideas behind it: both how TensorFlow works and the
-core machine learning concepts. Because of this, we are going to very carefully
-work through the code.
-
-## About this tutorial
-
-This tutorial is an explanation, line by line, of what is happening in the
-[mnist_softmax.py](https://www.tensorflow.org/code/tensorflow/examples/tutorials/mnist/mnist_softmax.py) code.
-
-You can use this tutorial in a few different ways, including:
-
-- Copy and paste each code snippet, line by line, into a Python environment as
-  you read through the explanations of each line.
-
-- Run the entire `mnist_softmax.py` Python file either before or after reading
-  through the explanations, and use this tutorial to understand the lines of
-  code that aren't clear to you.
-
-What we will accomplish in this tutorial:
-
-- Learn about the MNIST data and softmax regressions
-
-- Create a function that is a model for recognizing digits, based on looking at
-  every pixel in the image
-
-- Use TensorFlow to train the model to recognize digits by having it "look" at
-  thousands of examples (and run our first TensorFlow session to do so)
-
-- Check the model's accuracy with our test data
-
-## The MNIST Data
-
-The MNIST data is hosted on
-[Yann LeCun's website](http://yann.lecun.com/exdb/mnist/).  If you are copying and
-pasting in the code from this tutorial, start here with these two lines of code
-which will download and read in the data automatically:
-
-```python
-from tensorflow.examples.tutorials.mnist import input_data
-mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
-```
-
-The MNIST data is split into three parts: 55,000 data points of training
-data (`mnist.train`), 10,000 points of test data (`mnist.test`), and 5,000
-points of validation data (`mnist.validation`). This split is very important:
-it's essential in machine learning that we have separate data which we don't
-learn from so that we can make sure that what we've learned actually
-generalizes!
-
-As mentioned earlier, every MNIST data point has two parts: an image of a
-handwritten digit and a corresponding label. We'll call the images "x"
-and the labels "y". Both the training set and test set contain images and their
-corresponding labels; for example the training images are `mnist.train.images`
-and the training labels are `mnist.train.labels`.
-
-Each image is 28 pixels by 28 pixels. We can interpret this as a big array of
-numbers:
-
-<div style="width:50%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="https://www.tensorflow.org/images/MNIST-Matrix.png">
-</div>
-
-We can flatten this array into a vector of 28x28 = 784 numbers. It doesn't
-matter how we flatten the array, as long as we're consistent between images.
-From this perspective, the MNIST images are just a bunch of points in a
-784-dimensional vector space, with a
-[very rich structure](https://colah.github.io/posts/2014-10-Visualizing-MNIST/)
-(warning: computationally intensive visualizations).
-
-Flattening the data throws away information about the 2D structure of the image.
-Isn't that bad? Well, the best computer vision methods do exploit this
-structure, and we will in later tutorials. But the simple method we will be
-using here, a softmax regression (defined below), won't.
-
-The result is that `mnist.train.images` is a tensor (an n-dimensional array)
-with a shape of `[55000, 784]`. The first dimension is an index into the list
-of images and the second dimension is the index for each pixel in each image.
-Each entry in the tensor is a pixel intensity between 0 and 1, for a particular
-pixel in a particular image.
-
-<div style="width:40%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="https://www.tensorflow.org/images/mnist-train-xs.png">
-</div>
-
-Each image in MNIST has a corresponding label, a number between 0 and 9
-representing the digit drawn in the image.
-
-For the purposes of this tutorial, we're going to want our labels as "one-hot
-vectors". A one-hot vector is a vector which is 0 in most dimensions, and 1 in a
-single dimension. In this case, the \\(n\\)th digit will be represented as a
-vector which is 1 in the \\(n\\)th dimension. For example, 3 would be
-\\([0,0,0,1,0,0,0,0,0,0]\\).  Consequently, `mnist.train.labels` is a
-`[55000, 10]` array of floats.
-
-<div style="width:40%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="https://www.tensorflow.org/images/mnist-train-ys.png">
-</div>
-
-We're now ready to actually make our model!
-
-## Softmax Regressions
-
-We know that every image in MNIST is of a handwritten digit between zero and
-nine.  So there are only ten possible things that a given image can be. We want
-to be able to look at an image and give the probabilities for it being each
-digit. For example, our model might look at a picture of a nine and be 80% sure
-it's a nine, but give a 5% chance to it being an eight (because of the top loop)
-and a bit of probability to all the others because it isn't 100% sure.
-
-This is a classic case where a softmax regression is a natural, simple model.
-If you want to assign probabilities to an object being one of several different
-things, softmax is the thing to do, because softmax gives us a list of values
-between 0 and 1 that add up to 1. Even later on, when we train more sophisticated
-models, the final step will be a layer of softmax.
-
-A softmax regression has two steps: first we add up the evidence of our input
-being in certain classes, and then we convert that evidence into probabilities.
-
-To tally up the evidence that a given image is in a particular class, we do a
-weighted sum of the pixel intensities. The weight is negative if that pixel
-having a high intensity is evidence against the image being in that class, and
-positive if it is evidence in favor.
-
-The following diagram shows the weights one model learned for each of these
-classes. Red represents negative weights, while blue represents positive
-weights.
-
-<div style="width:40%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="https://www.tensorflow.org/images/softmax-weights.png">
-</div>
-
-We also add some extra evidence called a bias. Basically, we want to be able
-to say that some things are more likely independent of the input. The result is
-that the evidence for a class \\(i\\) given an input \\(x\\) is:
-
-$$\text{evidence}_i = \sum_j W_{i,~ j} x_j + b_i$$
-
-where \\(W_i\\) is the weights and \\(b_i\\) is the bias for class \\(i\\),
-and \\(j\\) is an index for summing over the pixels in our input image \\(x\\).
-We then convert the evidence tallies into our predicted probabilities
-\\(y\\) using the "softmax" function:
-
-$$y = \text{softmax}(\text{evidence})$$
-
-Here softmax is serving as an "activation" or "link" function, shaping
-the output of our linear function into the form we want -- in this case, a
-probability distribution over 10 cases.
-You can think of it as converting tallies
-of evidence into probabilities of our input being in each class.
-It's defined as:
-
-$$\text{softmax}(evidence) = \text{normalize}(\exp(evidence))$$
-
-If you expand that equation out, you get:
-
-$$\text{softmax}(evidence)_i = \frac{\exp(evidence_i)}{\sum_j \exp(evidence_j)}$$
-
-But it's often more helpful to think of softmax the first way: exponentiating
-its inputs and then normalizing them.  The exponentiation means that one more
-unit of evidence increases the weight given to any hypothesis multiplicatively.
-And conversely, having one less unit of evidence means that a hypothesis gets a
-fraction of its earlier weight. No hypothesis ever has zero or negative
-weight. Softmax then normalizes these weights, so that they add up to one,
-forming a valid probability distribution. (To get more intuition about the
-softmax function, check out the
-[section](http://neuralnetworksanddeeplearning.com/chap3.html#softmax) on it in
-Michael Nielsen's book, complete with an interactive visualization.)
-
-You can picture our softmax regression as looking something like the following,
-although with a lot more \\(x\\)s. For each output, we compute a weighted sum of
-the \\(x\\)s, add a bias, and then apply softmax.
-
-<div style="width:55%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="https://www.tensorflow.org/images/softmax-regression-scalargraph.png">
-</div>
-
-If we write that out as equations, we get:
-
-<div style="width:52%; margin-left:25%; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="https://www.tensorflow.org/images/softmax-regression-scalarequation.png"
-   alt="[y1, y2, y3] = softmax(W11*x1 + W12*x2 + W13*x3 + b1,  W21*x1 + W22*x2 + W23*x3 + b2,  W31*x1 + W32*x2 + W33*x3 + b3)">
-</div>
-
-We can "vectorize" this procedure, turning it into a matrix multiplication
-and vector addition. This is helpful for computational efficiency. (It's also
-a useful way to think.)
-
-<div style="width:50%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img style="width:100%" src="https://www.tensorflow.org/images/softmax-regression-vectorequation.png"
- alt="[y1, y2, y3] = softmax([[W11, W12, W13], [W21, W22, W23], [W31, W32, W33]]*[x1, x2, x3] + [b1, b2, b3])">
-</div>
-
-More compactly, we can just write:
-
-$$y = \text{softmax}(Wx + b)$$
-
-Now let's turn that into something that TensorFlow can use.
-
-## Implementing the Regression
-
-
-To do efficient numerical computing in Python, we typically use libraries like
-[NumPy](http://www.numpy.org) that do expensive operations such as matrix
-multiplication outside Python, using highly efficient code implemented in
-another language.  Unfortunately, there can still be a lot of overhead from
-switching back to Python every operation. This overhead is especially bad if you
-want to run computations on GPUs or in a distributed manner, where there can be
-a high cost to transferring data.
-
-TensorFlow also does its heavy lifting outside Python, but it takes things a
-step further to avoid this overhead.  Instead of running a single expensive
-operation independently from Python, TensorFlow lets us describe a graph of
-interacting operations that run entirely outside Python. (Approaches like this
-can be seen in a few machine learning libraries.)
-
-To use TensorFlow, first we need to import it.
-
-```python
-import tensorflow as tf
-```
-
-We describe these interacting operations by manipulating symbolic variables.
-Let's create one:
-
-```python
-x = tf.placeholder(tf.float32, [None, 784])
-```
-
-`x` isn't a specific value. It's a `placeholder`, a value that we'll input when
-we ask TensorFlow to run a computation. We want to be able to input any number
-of MNIST images, each flattened into a 784-dimensional vector. We represent
-this as a 2-D tensor of floating-point numbers, with a shape `[None, 784]`.
-(Here `None` means that a dimension can be of any length.)
-
-We also need the weights and biases for our model. We could imagine treating
-these like additional inputs, but TensorFlow has an even better way to handle
-it: `Variable`.  A `Variable` is a modifiable tensor that lives in TensorFlow's
-graph of interacting operations. It can be used and even modified by the
-computation. For machine learning applications, one generally has the model
-parameters be `Variable`s.
-
-```python
-W = tf.Variable(tf.zeros([784, 10]))
-b = tf.Variable(tf.zeros([10]))
-```
-
-We create these `Variable`s by giving `tf.Variable` the initial value of the
-`Variable`: in this case, we initialize both `W` and `b` as tensors full of
-zeros. Since we are going to learn `W` and `b`, it doesn't matter very much
-what they initially are.
-
-Notice that `W` has a shape of [784, 10] because we want to multiply the
-784-dimensional image vectors by it to produce 10-dimensional vectors of
-evidence for the difference classes. `b` has a shape of [10] so we can add it
-to the output.
-
-We can now implement our model. It only takes one line to define it!
-
-```python
-y = tf.nn.softmax(tf.matmul(x, W) + b)
-```
-
-First, we multiply `x` by `W` with the expression `tf.matmul(x, W)`. This is
-flipped from when we multiplied them in our equation, where we had \\(Wx\\), as
-a small trick to deal with `x` being a 2D tensor with multiple inputs. We then
-add `b`, and finally apply `tf.nn.softmax`.
-
-That's it. It only took us one line to define our model, after a couple short
-lines of setup. That isn't because TensorFlow is designed to make a softmax
-regression particularly easy: it's just a very flexible way to describe many
-kinds of numerical computations, from machine learning models to physics
-simulations. And once defined, our model can be run on different devices:
-your computer's CPU, GPUs, and even phones!
-
-
-## Training
-
-In order to train our model, we need to define what it means for the model to be
-good. Well, actually, in machine learning we typically define what it means for
-a model to be bad. We call this the cost, or the loss, and it represents how far
-off our model is from our desired outcome. We try to minimize that error, and
-the smaller the error margin, the better our model is.
-
-One very common, very nice function to determine the loss of a model is called
-"cross-entropy." Cross-entropy arises from thinking about information
-compressing codes in information theory but it winds up being an important idea
-in lots of areas, from gambling to machine learning. It's defined as:
-
-$$H_{y'}(y) = -\sum_i y'_i \log(y_i)$$
-
-Where \\(y\\) is our predicted probability distribution, and \\(y'\\) is the true
-distribution (the one-hot vector with the digit labels).  In some rough sense, the
-cross-entropy is measuring how inefficient our predictions are for describing
-the truth. Going into more detail about cross-entropy is beyond the scope of
-this tutorial, but it's well worth
-[understanding](https://colah.github.io/posts/2015-09-Visual-Information).
-
-To implement cross-entropy we need to first add a new placeholder to input the
-correct answers:
-
-```python
-y_ = tf.placeholder(tf.float32, [None, 10])
-```
-
-Then we can implement the cross-entropy function, \\(-\sum y'\log(y)\\):
-
-```python
-cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
-```
-
-First, `tf.log` computes the logarithm of each element of `y`. Next, we multiply
-each element of `y_` with the corresponding element of `tf.log(y)`. Then
-`tf.reduce_sum` adds the elements in the second dimension of y, due to the
-`reduction_indices=[1]` parameter. Finally, `tf.reduce_mean` computes the mean
-over all the examples in the batch.
-
-Note that in the source code, we don't use this formulation, because it is
-numerically unstable.  Instead, we apply
-`tf.nn.softmax_cross_entropy_with_logits` on the unnormalized logits (e.g., we
-call `softmax_cross_entropy_with_logits` on `tf.matmul(x, W) + b`), because this
-more numerically stable function internally computes the softmax activation.  In
-your code, consider using `tf.nn.softmax_cross_entropy_with_logits`
-instead.
-
-Now that we know what we want our model to do, it's very easy to have TensorFlow
-train it to do so.  Because TensorFlow knows the entire graph of your
-computations, it can automatically use the
-[backpropagation algorithm](https://colah.github.io/posts/2015-08-Backprop) to
-efficiently determine how your variables affect the loss you ask it to
-minimize. Then it can apply your choice of optimization algorithm to modify the
-variables and reduce the loss.
-
-```python
-train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
-```
-
-In this case, we ask TensorFlow to minimize `cross_entropy` using the
-[gradient descent algorithm](https://en.wikipedia.org/wiki/Gradient_descent)
-with a learning rate of 0.5. Gradient descent is a simple procedure, where
-TensorFlow simply shifts each variable a little bit in the direction that
-reduces the cost. But TensorFlow also provides
-@{$python/train#Optimizers$many other optimization algorithms}:
-using one is as simple as tweaking one line.
-
-What TensorFlow actually does here, behind the scenes, is to add new operations
-to your graph which implement backpropagation and gradient descent. Then it
-gives you back a single operation which, when run, does a step of gradient
-descent training, slightly tweaking your variables to reduce the loss.
-
-
-We can now launch the model in an `InteractiveSession`:
-
-```python
-sess = tf.InteractiveSession()
-```
-
-We first have to create an operation to initialize the variables we created:
-
-```python
-tf.global_variables_initializer().run()
-```
-
-
-Let's train -- we'll run the training step 1000 times!
-
-```python
-for _ in range(1000):
-  batch_xs, batch_ys = mnist.train.next_batch(100)
-  sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
-```
-
-Each step of the loop, we get a "batch" of one hundred random data points from
-our training set. We run `train_step` feeding in the batches data to replace
-the `placeholder`s.
-
-Using small batches of random data is called stochastic training -- in this
-case, stochastic gradient descent. Ideally, we'd like to use all our data for
-every step of training because that would give us a better sense of what we
-should be doing, but that's expensive. So, instead, we use a different subset
-every time. Doing this is cheap and has much of the same benefit.
-
-
-
-## Evaluating Our Model
-
-How well does our model do?
-
-Well, first let's figure out where we predicted the correct label. `tf.argmax`
-is an extremely useful function which gives you the index of the highest entry
-in a tensor along some axis. For example, `tf.argmax(y,1)` is the label our
-model thinks is most likely for each input, while `tf.argmax(y_,1)` is the
-correct label. We can use `tf.equal` to check if our prediction matches the
-truth.
-
-```python
-correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
-```
-
-That gives us a list of booleans. To determine what fraction are correct, we
-cast to floating point numbers and then take the mean. For example,
-`[True, False, True, True]` would become `[1,0,1,1]` which would become `0.75`.
-
-```python
-accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
-```
-
-Finally, we ask for our accuracy on our test data.
-
-```python
-print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
-```
-
-This should be about 92%.
-
-Is that good? Well, not really. In fact, it's pretty bad. This is because we're
-using a very simple model. With some small changes, we can get to 97%. The best
-models can get to over 99.7% accuracy! (For more information, have a look at
-this
-[list of results](https://rodrigob.github.io/are_we_there_yet/build/classification_datasets_results).)
-
-What matters is that we learned from this model. Still, if you're feeling a bit
-down about these results, check out
-@{$pros$the next tutorial} where we do a lot
-better, and learn how to build more sophisticated models using TensorFlow!
diff --git a/tensorflow/docs_src/get_started/mnist/mechanics.md b/tensorflow/docs_src/get_started/mnist/mechanics.md
deleted file mode 100644
index 27fae45b5b0b4126132556cfac312fbb3c4f515a..0000000000000000000000000000000000000000
--- a/tensorflow/docs_src/get_started/mnist/mechanics.md
+++ /dev/null
@@ -1,489 +0,0 @@
-# TensorFlow Mechanics 101
-
-Code: [tensorflow/examples/tutorials/mnist/](https://www.tensorflow.org/code/tensorflow/examples/tutorials/mnist/)
-
-The goal of this tutorial is to show how to use TensorFlow to train and
-evaluate a simple feed-forward neural network for handwritten digit
-classification using the (classic) MNIST data set.  The intended audience for
-this tutorial is experienced machine learning users interested in using
-TensorFlow.
-
-These tutorials are not intended for teaching Machine Learning in general.
-
-Please ensure you have followed the instructions to
-@{$install$install TensorFlow}.
-
-## Tutorial Files
-
-This tutorial references the following files:
-
-File | Purpose
---- | ---
-[`mnist.py`](https://www.tensorflow.org/code/tensorflow/examples/tutorials/mnist/mnist.py) | The code to build a fully-connected MNIST model.
-[`fully_connected_feed.py`](https://www.tensorflow.org/code/tensorflow/examples/tutorials/mnist/fully_connected_feed.py) | The main code to train the built MNIST model against the downloaded dataset using a feed dictionary.
-
-Simply run the `fully_connected_feed.py` file directly to start training:
-
-```bash
-python fully_connected_feed.py
-```
-
-## Prepare the Data
-
-MNIST is a classic problem in machine learning. The problem is to look at
-greyscale 28x28 pixel images of handwritten digits and determine which digit
-the image represents, for all the digits from zero to nine.
-
-![MNIST Digits](https://www.tensorflow.org/images/mnist_digits.png "MNIST Digits")
-
-For more information, refer to [Yann LeCun's MNIST page](http://yann.lecun.com/exdb/mnist/)
-or [Chris Olah's visualizations of MNIST](http://colah.github.io/posts/2014-10-Visualizing-MNIST/).
-
-### Download
-
-At the top of the `run_training()` method, the `input_data.read_data_sets()`
-function will ensure that the correct data has been downloaded to your local
-training folder and then unpack that data to return a dictionary of `DataSet`
-instances.
-
-```python
-data_sets = input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data)
-```
-
-**NOTE**: The `fake_data` flag is used for unit-testing purposes and may be
-safely ignored by the reader.
-
-Dataset | Purpose
---- | ---
-`data_sets.train` | 55000 images and labels, for primary training.
-`data_sets.validation` | 5000 images and labels, for iterative validation of training accuracy.
-`data_sets.test` | 10000 images and labels, for final testing of trained accuracy.
-
-### Inputs and Placeholders
-
-The `placeholder_inputs()` function creates two @{tf.placeholder}
-ops that define the shape of the inputs, including the `batch_size`, to the
-rest of the graph and into which the actual training examples will be fed.
-
-```python
-images_placeholder = tf.placeholder(tf.float32, shape=(batch_size,
-                                                       mnist.IMAGE_PIXELS))
-labels_placeholder = tf.placeholder(tf.int32, shape=(batch_size))
-```
-
-Further down, in the training loop, the full image and label datasets are
-sliced to fit the `batch_size` for each step, matched with these placeholder
-ops, and then passed into the `sess.run()` function using the `feed_dict`
-parameter.
-
-## Build the Graph
-
-After creating placeholders for the data, the graph is built from the
-`mnist.py` file according to a 3-stage pattern: `inference()`, `loss()`, and
-`training()`.
-
-1.  `inference()` - Builds the graph as far as required for running
-the network forward to make predictions.
-1.  `loss()` - Adds to the inference graph the ops required to generate
-loss.
-1.  `training()` - Adds to the loss graph the ops required to compute
-and apply gradients.
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:100%" src="https://www.tensorflow.org/images/mnist_subgraph.png">
-</div>
-
-### Inference
-
-The `inference()` function builds the graph as far as needed to
-return the tensor that would contain the output predictions.
-
-It takes the images placeholder as input and builds on top
-of it a pair of fully connected layers with [ReLU](https://en.wikipedia.org/wiki/Rectifier_(neural_networks)) activation followed by a ten
-node linear layer specifying the output logits.
-
-Each layer is created beneath a unique @{tf.name_scope}
-that acts as a prefix to the items created within that scope.
-
-```python
-with tf.name_scope('hidden1'):
-```
-
-Within the defined scope, the weights and biases to be used by each of these
-layers are generated into @{tf.Variable}
-instances, with their desired shapes:
-
-```python
-weights = tf.Variable(
-    tf.truncated_normal([IMAGE_PIXELS, hidden1_units],
-                        stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))),
-    name='weights')
-biases = tf.Variable(tf.zeros([hidden1_units]),
-                     name='biases')
-```
-
-When, for instance, these are created under the `hidden1` scope, the unique
-name given to the weights variable would be "`hidden1/weights`".
-
-Each variable is given initializer ops as part of their construction.
-
-In this most common case, the weights are initialized with the
-@{tf.truncated_normal}
-and given their shape of a 2-D tensor with
-the first dim representing the number of units in the layer from which the
-weights connect and the second dim representing the number of
-units in the layer to which the weights connect.  For the first layer, named
-`hidden1`, the dimensions are `[IMAGE_PIXELS, hidden1_units]` because the
-weights are connecting the image inputs to the hidden1 layer.  The
-`tf.truncated_normal` initializer generates a random distribution with a given
-mean and standard deviation.
-
-Then the biases are initialized with @{tf.zeros}
-to ensure they start with all zero values, and their shape is simply the number
-of units in the layer to which they connect.
-
-The graph's three primary ops -- two @{tf.nn.relu}
-ops wrapping @{tf.matmul}
-for the hidden layers and one extra `tf.matmul` for the logits -- are then
-created, each in turn, with separate `tf.Variable` instances connected to each
-of the input placeholders or the output tensors of the previous layer.
-
-```python
-hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases)
-```
-
-```python
-hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases)
-```
-
-```python
-logits = tf.matmul(hidden2, weights) + biases
-```
-
-Finally, the `logits` tensor that will contain the output is returned.
-
-### Loss
-
-The `loss()` function further builds the graph by adding the required loss
-ops.
-
-First, the values from the `labels_placeholder` are converted to 64-bit integers. Then, a @{tf.nn.sparse_softmax_cross_entropy_with_logits} op is added to automatically produce 1-hot labels from the `labels_placeholder` and compare the output logits from the `inference()` function with those 1-hot labels.
-
-```python
-labels = tf.to_int64(labels)
-cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
-    labels=labels, logits=logits, name='xentropy')
-```
-
-It then uses @{tf.reduce_mean}
-to average the cross entropy values across the batch dimension (the first
-dimension) as the total loss.
-
-```python
-loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
-```
-
-And the tensor that will then contain the loss value is returned.
-
-> Note: Cross-entropy is an idea from information theory that allows us
-> to describe how bad it is to believe the predictions of the neural network,
-> given what is actually true. For more information, read the blog post Visual
-> Information Theory (http://colah.github.io/posts/2015-09-Visual-Information/)
-
-### Training
-
-The `training()` function adds the operations needed to minimize the loss via
-[Gradient Descent](https://en.wikipedia.org/wiki/Gradient_descent).
-
-Firstly, it takes the loss tensor from the `loss()` function and hands it to a
-@{tf.summary.scalar},
-an op for generating summary values into the events file when used with a
-@{tf.summary.FileWriter} (see below).  In this case, it will emit the snapshot value of
-the loss every time the summaries are written out.
-
-```python
-tf.summary.scalar('loss', loss)
-```
-
-Next, we instantiate a @{tf.train.GradientDescentOptimizer}
-responsible for applying gradients with the requested learning rate.
-
-```python
-optimizer = tf.train.GradientDescentOptimizer(learning_rate)
-```
-
-We then generate a single variable to contain a counter for the global
-training step and the @{tf.train.Optimizer.minimize}
-op is used to both update the trainable weights in the system and increment the
-global step.  This op is, by convention, known as the `train_op` and is what must
-be run by a TensorFlow session in order to induce one full step of training
-(see below).
-
-```python
-global_step = tf.Variable(0, name='global_step', trainable=False)
-train_op = optimizer.minimize(loss, global_step=global_step)
-```
-
-## Train the Model
-
-Once the graph is built, it can be iteratively trained and evaluated in a loop
-controlled by the user code in `fully_connected_feed.py`.
-
-### The Graph
-
-At the top of the `run_training()` function is a python `with` command that
-indicates all of the built ops are to be associated with the default
-global @{tf.Graph}
-instance.
-
-```python
-with tf.Graph().as_default():
-```
-
-A `tf.Graph` is a collection of ops that may be executed together as a group.
-Most TensorFlow uses will only need to rely on the single default graph.
-
-More complicated uses with multiple graphs are possible, but beyond the scope of
-this simple tutorial.
-
-### The Session
-
-Once all of the build preparation has been completed and all of the necessary
-ops generated, a @{tf.Session}
-is created for running the graph.
-
-```python
-sess = tf.Session()
-```
-
-Alternately, a `Session` may be generated into a `with` block for scoping:
-
-```python
-with tf.Session() as sess:
-```
-
-The empty parameter to session indicates that this code will attach to
-(or create if not yet created) the default local session.
-
-Immediately after creating the session, all of the `tf.Variable`
-instances are initialized by calling @{tf.Session.run}
-on their initialization op.
-
-```python
-init = tf.global_variables_initializer()
-sess.run(init)
-```
-
-The @{tf.Session.run}
-method will run the complete subset of the graph that
-corresponds to the op(s) passed as parameters.  In this first call, the `init`
-op is a @{tf.group}
-that contains only the initializers for the variables.  None of the rest of the
-graph is run here; that happens in the training loop below.
-
-### Train Loop
-
-After initializing the variables with the session, training may begin.
-
-The user code controls the training per step, and the simplest loop that
-can do useful training is:
-
-```python
-for step in xrange(FLAGS.max_steps):
-    sess.run(train_op)
-```
-
-However, this tutorial is slightly more complicated in that it must also slice
-up the input data for each step to match the previously generated placeholders.
-
-#### Feed the Graph
-
-For each step, the code will generate a feed dictionary that will contain the
-set of examples on which to train for the step, keyed by the placeholder
-ops they represent.
-
-In the `fill_feed_dict()` function, the given `DataSet` is queried for its next
-`batch_size` set of images and labels, and tensors matching the placeholders are
-filled containing the next images and labels.
-
-```python
-images_feed, labels_feed = data_set.next_batch(FLAGS.batch_size,
-                                               FLAGS.fake_data)
-```
-
-A python dictionary object is then generated with the placeholders as keys and
-the representative feed tensors as values.
-
-```python
-feed_dict = {
-    images_placeholder: images_feed,
-    labels_placeholder: labels_feed,
-}
-```
-
-This is passed into the `sess.run()` function's `feed_dict` parameter to provide
-the input examples for this step of training.
-
-#### Check the Status
-
-The code specifies two values to fetch in its run call: `[train_op, loss]`.
-
-```python
-for step in xrange(FLAGS.max_steps):
-    feed_dict = fill_feed_dict(data_sets.train,
-                               images_placeholder,
-                               labels_placeholder)
-    _, loss_value = sess.run([train_op, loss],
-                             feed_dict=feed_dict)
-```
-
-Because there are two values to fetch, `sess.run()` returns a tuple with two
-items.  Each `Tensor` in the list of values to fetch corresponds to a numpy
-array in the returned tuple, filled with the value of that tensor during this
-step of training. Since `train_op` is an `Operation` with no output value, the
-corresponding element in the returned tuple is `None` and, thus,
-discarded. However, the value of the `loss` tensor may become NaN if the model
-diverges during training, so we capture this value for logging.
-
-Assuming that the training runs fine without NaNs, the training loop also
-prints a simple status text every 100 steps to let the user know the state of
-training.
-
-```python
-if step % 100 == 0:
-    print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration))
-```
-
-#### Visualize the Status
-
-In order to emit the events files used by @{$summaries_and_tensorboard$TensorBoard},
-all of the summaries (in this case, only one) are collected into a single Tensor
-during the graph building phase.
-
-```python
-summary = tf.summary.merge_all()
-```
-
-And then after the session is created, a @{tf.summary.FileWriter}
-may be instantiated to write the events files, which
-contain both the graph itself and the values of the summaries.
-
-```python
-summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)
-```
-
-Lastly, the events file will be updated with new summary values every time the
-`summary` is evaluated and the output passed to the writer's `add_summary()`
-function.
-
-```python
-summary_str = sess.run(summary, feed_dict=feed_dict)
-summary_writer.add_summary(summary_str, step)
-```
-
-When the events files are written, TensorBoard may be run against the training
-folder to display the values from the summaries.
-
-![MNIST TensorBoard](https://www.tensorflow.org/images/mnist_tensorboard.png "MNIST TensorBoard")
-
-**NOTE**: For more info about how to build and run Tensorboard, please see the accompanying tutorial @{$summaries_and_tensorboard$Tensorboard: Visualizing Learning}.
-
-#### Save a Checkpoint
-
-In order to emit a checkpoint file that may be used to later restore a model
-for further training or evaluation, we instantiate a
-@{tf.train.Saver}.
-
-```python
-saver = tf.train.Saver()
-```
-
-In the training loop, the @{tf.train.Saver.save}
-method will periodically be called to write a checkpoint file to the training
-directory with the current values of all the trainable variables.
-
-```python
-saver.save(sess, FLAGS.train_dir, global_step=step)
-```
-
-At some later point in the future, training might be resumed by using the
-@{tf.train.Saver.restore}
-method to reload the model parameters.
-
-```python
-saver.restore(sess, FLAGS.train_dir)
-```
-
-## Evaluate the Model
-
-Every thousand steps, the code will attempt to evaluate the model against both
-the training and test datasets.  The `do_eval()` function is called thrice, for
-the training, validation, and test datasets.
-
-```python
-print('Training Data Eval:')
-do_eval(sess,
-        eval_correct,
-        images_placeholder,
-        labels_placeholder,
-        data_sets.train)
-print('Validation Data Eval:')
-do_eval(sess,
-        eval_correct,
-        images_placeholder,
-        labels_placeholder,
-        data_sets.validation)
-print('Test Data Eval:')
-do_eval(sess,
-        eval_correct,
-        images_placeholder,
-        labels_placeholder,
-        data_sets.test)
-```
-
-> Note that more complicated usage would usually sequester the `data_sets.test`
-> to only be checked after significant amounts of hyperparameter tuning.  For
-> the sake of a simple little MNIST problem, however, we evaluate against all of
-> the data.
-
-### Build the Eval Graph
-
-Before entering the training loop, the Eval op should have been built
-by calling the `evaluation()` function from `mnist.py` with the same
-logits/labels parameters as the `loss()` function.
-
-```python
-eval_correct = mnist.evaluation(logits, labels_placeholder)
-```
-
-The `evaluation()` function simply generates a @{tf.nn.in_top_k}
-op that can automatically score each model output as correct if the true label
-can be found in the K most-likely predictions.  In this case, we set the value
-of K to 1 to only consider a prediction correct if it is for the true label.
-
-```python
-eval_correct = tf.nn.in_top_k(logits, labels, 1)
-```
-
-### Eval Output
-
-One can then create a loop for filling a `feed_dict` and calling `sess.run()`
-against the `eval_correct` op to evaluate the model on the given dataset.
-
-```python
-for step in xrange(steps_per_epoch):
-    feed_dict = fill_feed_dict(data_set,
-                               images_placeholder,
-                               labels_placeholder)
-    true_count += sess.run(eval_correct, feed_dict=feed_dict)
-```
-
-The `true_count` variable simply accumulates all of the predictions that the
-`in_top_k` op has determined to be correct.  From there, the precision may be
-calculated from simply dividing by the total number of examples.
-
-```python
-precision = true_count / num_examples
-print('  Num examples: %d  Num correct: %d  Precision @ 1: %0.04f' %
-      (num_examples, true_count, precision))
-```
diff --git a/tensorflow/docs_src/get_started/mnist/pros.md b/tensorflow/docs_src/get_started/mnist/pros.md
deleted file mode 100644
index 4933dd28cd37e695a10ab28832f26a613589d01a..0000000000000000000000000000000000000000
--- a/tensorflow/docs_src/get_started/mnist/pros.md
+++ /dev/null
@@ -1,435 +0,0 @@
-# Deep MNIST for Experts
-
-TensorFlow is a powerful library for doing large-scale numerical computation.
-One of the tasks at which it excels is implementing and training deep neural
-networks.  In this tutorial we will learn the basic building blocks of a
-TensorFlow model while constructing a deep convolutional MNIST classifier.
-
-*This introduction assumes familiarity with neural networks and the MNIST
-dataset. If you don't have
-a background with them, check out the
-@{$beginners$introduction for beginners}. Be sure to
-@{$install$install TensorFlow} before starting.*
-
-
-## About this tutorial
-
-The first part of this tutorial explains what is happening in the
-[mnist_softmax.py](https://www.tensorflow.org/code/tensorflow/examples/tutorials/mnist/mnist_softmax.py)
-code, which is a basic implementation of a Tensorflow model.  The second part
-shows some ways to improve the accuracy.
-
-You can copy and paste each code snippet from this tutorial into a Python
-environment to follow along, or you can download the fully implemented deep net
-from [mnist_deep.py](https://www.tensorflow.org/code/tensorflow/examples/tutorials/mnist/mnist_deep.py)
-.
-
-What we will accomplish in this tutorial:
-
-- Create a softmax regression function that is a model for recognizing MNIST
-  digits, based on looking at every pixel in the image
-
-- Use Tensorflow to train the model to recognize digits by having it "look" at
-  thousands of examples (and run our first Tensorflow session to do so)
-
-- Check the model's accuracy with our test data
-
-- Build, train, and test a multilayer convolutional neural network to improve
-  the results
-
-## Setup
-
-Before we create our model, we will first load the MNIST dataset, and start a
-TensorFlow session.
-
-### Load MNIST Data
-
-If you are copying and pasting in the code from this tutorial, start here with
-these two lines of code which will download and read in the data automatically:
-
-```python
-from tensorflow.examples.tutorials.mnist import input_data
-mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
-```
-
-Here `mnist` is a lightweight class which stores the training, validation, and
-testing sets as NumPy arrays.  It also provides a function for iterating through
-data minibatches, which we will use below.
-
-### Start TensorFlow InteractiveSession
-
-TensorFlow relies on a highly efficient C++ backend to do its computation. The
-connection to this backend is called a session.  The common usage for TensorFlow
-programs is to first create a graph and then launch it in a session.
-
-Here we instead use the convenient `InteractiveSession` class, which makes
-TensorFlow more flexible about how you structure your code.  It allows you to
-interleave operations which build a
-@{$get_started/get_started#the_computational_graph$computation graph}
-with ones that run the graph.  This is particularly convenient when working in
-interactive contexts like IPython.  If you are not using an
-`InteractiveSession`, then you should build the entire computation graph before
-starting a session and
-@{$get_started/get_started#the_computational_graph$launching the graph}.
-
-```python
-import tensorflow as tf
-sess = tf.InteractiveSession()
-```
-
-#### Computation Graph
-
-To do efficient numerical computing in Python, we typically use libraries like
-[NumPy](http://www.numpy.org/) that do expensive operations such as matrix
-multiplication outside Python, using highly efficient code implemented in
-another language.  Unfortunately, there can still be a lot of overhead from
-switching back to Python every operation. This overhead is especially bad if you
-want to run computations on GPUs or in a distributed manner, where there can be
-a high cost to transferring data.
-
-TensorFlow also does its heavy lifting outside Python, but it takes things a
-step further to avoid this overhead.  Instead of running a single expensive
-operation independently from Python, TensorFlow lets us describe a graph of
-interacting operations that run entirely outside Python.  This approach is
-similar to that used in Theano or Torch.
-
-The role of the Python code is therefore to build this external computation
-graph, and to dictate which parts of the computation graph should be run. See
-the @{$get_started/get_started#the_computational_graph$Computation Graph}
-section of @{$get_started/get_started} for more detail.
-
-## Build a Softmax Regression Model
-
-In this section we will build a softmax regression model with a single linear
-layer. In the next section, we will extend this to the case of softmax
-regression with a multilayer convolutional network.
-
-### Placeholders
-
-We start building the computation graph by creating nodes for the
-input images and target output classes.
-
-```python
-x = tf.placeholder(tf.float32, shape=[None, 784])
-y_ = tf.placeholder(tf.float32, shape=[None, 10])
-```
-
-Here `x` and `y_` aren't specific values. Rather, they are each a `placeholder`
--- a value that we'll input when we ask TensorFlow to run a computation.
-
-The input images `x` will consist of a 2d tensor of floating point numbers.
-Here we assign it a `shape` of `[None, 784]`, where `784` is the dimensionality
-of a single flattened 28 by 28 pixel MNIST image, and `None` indicates that the
-first dimension, corresponding to the batch size, can be of any size.  The
-target output classes `y_` will also consist of a 2d tensor, where each row is a
-one-hot 10-dimensional vector indicating which digit class (zero through nine)
-the corresponding MNIST image belongs to.
-
-The `shape` argument to `placeholder` is optional, but it allows TensorFlow
-to automatically catch bugs stemming from inconsistent tensor shapes.
-
-### Variables
-
-We now define the weights `W` and biases `b` for our model. We could imagine
-treating these like additional inputs, but TensorFlow has an even better way to
-handle them: `Variable`.  A `Variable` is a value that lives in TensorFlow's
-computation graph.  It can be used and even modified by the computation. In
-machine learning applications, one generally has the model parameters be
-`Variable`s.
-
-```python
-W = tf.Variable(tf.zeros([784,10]))
-b = tf.Variable(tf.zeros([10]))
-```
-
-We pass the initial value for each parameter in the call to `tf.Variable`.  In
-this case, we initialize both `W` and `b` as tensors full of zeros. `W` is a
-784x10 matrix (because we have 784 input features and 10 outputs) and `b` is a
-10-dimensional vector (because we have 10 classes).
-
-Before `Variable`s can be used within a session, they must be initialized using
-that session.  This step takes the initial values (in this case tensors full of
-zeros) that have already been specified, and assigns them to each
-`Variable`. This can be done for all `Variables` at once:
-
-```python
-sess.run(tf.global_variables_initializer())
-```
-
-### Predicted Class and Loss Function
-
-We can now implement our regression model. It only takes one line!  We multiply
-the vectorized input images `x` by the weight matrix `W`, add the bias `b`.
-
-```python
-y = tf.matmul(x,W) + b
-```
-
-We can specify a loss function just as easily. Loss indicates how bad the
-model's prediction was on a single example; we try to minimize that while
-training across all the examples. Here, our loss function is the cross-entropy
-between the target and the softmax activation function applied to the model's
-prediction.  As in the beginners tutorial, we use the stable formulation:
-
-```python
-cross_entropy = tf.reduce_mean(
-    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
-```
-
-Note that `tf.nn.softmax_cross_entropy_with_logits` internally applies the
-softmax on the model's unnormalized model prediction and sums across all
-classes, and `tf.reduce_mean` takes the average over these sums.
-
-## Train the Model
-
-Now that we have defined our model and training loss function, it is
-straightforward to train using TensorFlow.  Because TensorFlow knows the entire
-computation graph, it can use automatic differentiation to find the gradients of
-the loss with respect to each of the variables.  TensorFlow has a variety of
-@{$python/train#optimizers$built-in optimization algorithms}.
-For this example, we will use steepest gradient descent, with a step length of
-0.5, to descend the cross entropy.
-
-```python
-train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
-```
-
-What TensorFlow actually did in that single line was to add new operations to
-the computation graph. These operations included ones to compute gradients,
-compute parameter update steps, and apply update steps to the parameters.
-
-The returned operation `train_step`, when run, will apply the gradient descent
-updates to the parameters. Training the model can therefore be accomplished by
-repeatedly running `train_step`.
-
-```python
-for _ in range(1000):
-  batch = mnist.train.next_batch(100)
-  train_step.run(feed_dict={x: batch[0], y_: batch[1]})
-```
-
-We load 100 training examples in each training iteration. We then run the
-`train_step` operation, using `feed_dict` to replace the `placeholder` tensors
-`x` and `y_` with the training examples.  Note that you can replace any tensor
-in your computation graph using `feed_dict` -- it's not restricted to just
-`placeholder`s.
-
-### Evaluate the Model
-
-How well did our model do?
-
-First we'll figure out where we predicted the correct label. `tf.argmax` is an
-extremely useful function which gives you the index of the highest entry in a
-tensor along some axis. For example, `tf.argmax(y,1)` is the label our model
-thinks is most likely for each input, while `tf.argmax(y_,1)` is the true
-label. We can use `tf.equal` to check if our prediction matches the truth.
-
-```python
-correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
-```
-
-That gives us a list of booleans. To determine what fraction are correct, we
-cast to floating point numbers and then take the mean. For example,
-`[True, False, True, True]` would become `[1,0,1,1]` which would become `0.75`.
-
-```python
-accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
-```
-
-Finally, we can evaluate our accuracy on the test data. This should be about
-92% correct.
-
-```python
-print(accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
-```
-
-## Build a Multilayer Convolutional Network
-
-Getting 92% accuracy on MNIST is bad. It's almost embarrassingly bad. In this
-section, we'll fix that, jumping from a very simple model to something
-moderately sophisticated: a small convolutional neural network. This will get us
-to around 99.2% accuracy -- not state of the art, but respectable.
-
-Here is a diagram, created with TensorBoard, of the model we will build:
-
-<div style="width:40%; margin:auto; margin-bottom:10px; margin-top:20px;">
-<img src="https://www.tensorflow.org/images/mnist_deep.png">
-</div>
-
-### Weight Initialization
-
-To create this model, we're going to need to create a lot of weights and biases.
-One should generally initialize weights with a small amount of noise for
-symmetry breaking, and to prevent 0 gradients. Since we're using
-[ReLU](https://en.wikipedia.org/wiki/Rectifier_(neural_networks)) neurons, it is
-also good practice to initialize them with a slightly positive initial bias to
-avoid "dead neurons". Instead of doing this repeatedly while we build the model,
-let's create two handy functions to do it for us.
-
-```python
-def weight_variable(shape):
-  initial = tf.truncated_normal(shape, stddev=0.1)
-  return tf.Variable(initial)
-
-def bias_variable(shape):
-  initial = tf.constant(0.1, shape=shape)
-  return tf.Variable(initial)
-```
-
-### Convolution and Pooling
-
-TensorFlow also gives us a lot of flexibility in convolution and pooling
-operations. How do we handle the boundaries? What is our stride size?
-In this example, we're always going to choose the vanilla version.
-Our convolutions uses a stride of one and are zero padded so that the
-output is the same size as the input. Our pooling is plain old max pooling
-over 2x2 blocks. To keep our code cleaner, let's also abstract those operations
-into functions.
-
-```python
-def conv2d(x, W):
-  return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
-
-def max_pool_2x2(x):
-  return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
-                        strides=[1, 2, 2, 1], padding='SAME')
-```
-
-### First Convolutional Layer
-
-We can now implement our first layer. It will consist of convolution, followed
-by max pooling. The convolution will compute 32 features for each 5x5 patch.
-Its weight tensor will have a shape of `[5, 5, 1, 32]`. The first two
-dimensions are the patch size, the next is the number of input channels, and
-the last is the number of output channels. We will also have a bias vector with
-a component for each output channel.
-
-```python
-W_conv1 = weight_variable([5, 5, 1, 32])
-b_conv1 = bias_variable([32])
-```
-
-To apply the layer, we first reshape `x` to a 4d tensor, with the second and
-third dimensions corresponding to image width and height, and the final
-dimension corresponding to the number of color channels.
-
-```python
-x_image = tf.reshape(x, [-1, 28, 28, 1])
-```
-
-We then convolve `x_image` with the weight tensor, add the
-bias, apply the ReLU function, and finally max pool. The `max_pool_2x2` method will
-reduce the image size to 14x14.
-
-```python
-h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
-h_pool1 = max_pool_2x2(h_conv1)
-```
-
-### Second Convolutional Layer
-
-In order to build a deep network, we stack several layers of this type. The
-second layer will have 64 features for each 5x5 patch.
-
-```python
-W_conv2 = weight_variable([5, 5, 32, 64])
-b_conv2 = bias_variable([64])
-
-h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
-h_pool2 = max_pool_2x2(h_conv2)
-```
-
-### Densely Connected Layer
-
-Now that the image size has been reduced to 7x7, we add a fully-connected layer
-with 1024 neurons to allow processing on the entire image. We reshape the tensor
-from the pooling layer into a batch of vectors,
-multiply by a weight matrix, add a bias, and apply a ReLU.
-
-```python
-W_fc1 = weight_variable([7 * 7 * 64, 1024])
-b_fc1 = bias_variable([1024])
-
-h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
-h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
-```
-
-#### Dropout
-
-To reduce overfitting, we will apply [dropout](
-https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf) before the readout layer.
-We create a `placeholder` for the probability that a neuron's output is kept
-during dropout. This allows us to turn dropout on during training, and turn it
-off during testing.
-TensorFlow's `tf.nn.dropout` op automatically handles scaling neuron outputs in
-addition to masking them, so dropout just works without any additional
-scaling.<sup id="a1">[1](#f1)</sup>
-
-```python
-keep_prob = tf.placeholder(tf.float32)
-h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
-```
-
-### Readout Layer
-
-Finally, we add a layer, just like for the one layer softmax regression
-above.
-
-```python
-W_fc2 = weight_variable([1024, 10])
-b_fc2 = bias_variable([10])
-
-y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
-```
-
-### Train and Evaluate the Model
-
-How well does this model do? To train and evaluate it we will use code that is
-nearly identical to that for the simple one layer SoftMax network above.
-
-The differences are that:
-
-- We will replace the steepest gradient descent optimizer with the more
-  sophisticated ADAM optimizer.
-
-- We will include the additional parameter `keep_prob` in `feed_dict` to control
-  the dropout rate.
-
-- We will add logging to every 100th iteration in the training process.
-
-We will also use tf.Session rather than tf.InteractiveSession. This better
-separates the process of creating the graph (model specification) and the
-process of evaluating the graph (model fitting). It generally makes for cleaner
-code. The tf.Session is created within a [`with` block](https://docs.python.org/3/whatsnew/2.6.html#pep-343-the-with-statement)
-so that it is automatically destroyed once the block is exited.
-
-Feel free to run this code. Be aware that it does 20,000 training iterations
-and may take a while (possibly up to half an hour), depending on your processor.
-
-```python
-cross_entropy = tf.reduce_mean(
-    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
-train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
-correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
-accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
-
-with tf.Session() as sess:
-  sess.run(tf.global_variables_initializer())
-  for i in range(20000):
-    batch = mnist.train.next_batch(50)
-    if i % 100 == 0:
-      train_accuracy = accuracy.eval(feed_dict={
-          x: batch[0], y_: batch[1], keep_prob: 1.0})
-      print('step %d, training accuracy %g' % (i, train_accuracy))
-    train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
-
-  print('test accuracy %g' % accuracy.eval(feed_dict={
-      x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
-```
-
-The final test set accuracy after running this code should be approximately 99.2%.
-
-We have learned how to quickly and easily build, train, and evaluate a
-fairly sophisticated deep learning model using TensorFlow.
-
-<b id="f1">1</b>: For this small convolutional network, performance is actually nearly identical with and without dropout. Dropout is often very effective at reducing overfitting, but it is most useful when training very large neural networks. [↩](#a1)
diff --git a/tensorflow/docs_src/get_started/premade_estimators.md b/tensorflow/docs_src/get_started/premade_estimators.md
index ff839fd040167dc16087311666ff25da2088c519..dbc35065abf22c88c325c4edc370b6da91c4df5b 100644
--- a/tensorflow/docs_src/get_started/premade_estimators.md
+++ b/tensorflow/docs_src/get_started/premade_estimators.md
@@ -6,7 +6,7 @@ how to write the Iris classification problem in TensorFlow.
 
 Prior to reading this document, do the following:
 
-* [Install TensorFlow](install/index.md).
+* @{$install$Install TensorFlow}.
 * If you installed TensorFlow with virtualenv or Anaconda, activate your
   TensorFlow environment.
 * To keep the data import simple, our Iris example uses Pandas. You can
@@ -28,7 +28,11 @@ Take the following steps to get the sample code for this program:
 
        `cd models/samples/core/get_started/`
 
-The program described in this document is called `premade_estimator.py`.
+The program described in this document is
+[`premade_estimator.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/premade_estimator.py).
+This program uses
+[`iris_data.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/iris_data.py)
+To fetch its training data.
 
 ### Running the program
 
@@ -38,15 +42,15 @@ You run TensorFlow programs as you would run any Python program. For example:
 python premade_estimator.py
 ```
 
-The program should output training logs and some predictions against a test
-set. For example, the first line in the following output shows that the model
-thinks there is a 99.6% chance that the first example in the test set is a
-Sentosa. Since the test set `expected "Setosa"`, this appears to be a good
-prediction.
+The program should output training logs followed by some predictions against
+the test set. For example, the first line in the following output shows that
+the model thinks there is a 99.6% chance that the first example in the test
+set is a Setosa. Since the test set `expected "Setosa"`, this appears to be
+a good prediction.
 
 ``` None
 ...
-Prediction is "Sentosa" (99.6%), expected "Setosa"
+Prediction is "Setosa" (99.6%), expected "Setosa"
 
 Prediction is "Versicolor" (99.8%), expected "Versicolor"
 
@@ -67,7 +71,7 @@ Before getting into the details of the program itself, let's investigate the
 programming environment. As the following illustration shows, TensorFlow
 provides a programming stack consisting of multiple API layers:
 
-<div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
+<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
 <img style="width:100%" src="../images/tensorflow_programming_environment.png">
 </div>
 <div style="text-align: center">
@@ -76,12 +80,12 @@ The TensorFlow Programming Environment
 
 We strongly recommend writing TensorFlow programs with the following APIs:
 
-* Estimators, which represent a complete model. The Estimator API provides
-  methods to train the model, to judge the model's accuracy, and to generate
-  predictions.
-* Datasets, which build a data input pipeline. The Dataset API has methods to
-  load and manipulate data, and feed it into your model. The Datasets API meshes
-  well with the Estimators API.
+* @{tf.estimator$Estimators}, which represent a complete model.
+  The Estimator API provides methods to train the model, to judge the model's
+  accuracy, and to generate predictions.
+* @{$get_started/datasets_quickstart$Datasets}, which build a data input
+  pipeline. The Dataset API has methods to load and manipulate data, and feed
+  it into your model. The Datasets API meshes well with the Estimators API.
 
 ## Classifying irises: an overview
 
@@ -106,8 +110,10 @@ and [*Iris virginica*](https://www.flickr.com/photos/33397993@N05/3352169862)
 
 ### The data set
 
-The Iris data set contains four features and one label.  The four features
-identify the following botanical characteristics of individual Iris flowers:
+The Iris data set contains four features and one
+[label](https://developers.google.com/machine-learning/glossary/#label).
+The four features identify the following botanical characteristics of
+individual Iris flowers:
 
 * sepal length
 * sepal width
@@ -128,7 +134,7 @@ The following table shows three examples in the data set:
 
 |sepal length | sepal width | petal length | petal width| species (label) |
 |------------:|------------:|-------------:|-----------:|:---------------:|
-|         5.1 |         3.3 |          1.7 |        0.5 |   0 (Sentosa)   |
+|         5.1 |         3.3 |          1.7 |        0.5 |   0 (Setosa)   |
 |         5.0 |         2.3 |          3.3 |        1.0 |   1 (versicolor)|
 |         6.4 |         2.8 |          5.6 |        2.2 |   2 (virginica) |
 
@@ -143,11 +149,10 @@ topology:
 The following figure illustrates the features, hidden layers, and predictions
 (not all of the nodes in the hidden layers are shown):
 
-
 <div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
 <img style="width:100%"
   alt="A diagram of the network architecture: Inputs, 2 hidden layers, and outputs"
-  src="../images/iris_model.png">
+  src="../images/custom_estimators/full_network.png">
 </div>
 <div style="text-align: center">
 The Model.
@@ -201,31 +206,20 @@ Let's see how those tasks are implemented in Iris.
 You must create input functions to supply data for training,
 evaluating, and prediction.
 
-An **input function** is a function that returns the following two-element
-tuple:
+An **input function** is a function that returns a @{tf.data.Dataset} object
+which outputs the following two-element tuple:
 
 * "features" - A Python dictionary in which:
     * Each key is the name of a feature.
     * Each value is an array containing all of that feature's values.
-* "label" - An array containing the values of the label for every example.
-
-Just to demonstrate the format of the input function here's a simple
-implementation:
-
-```python
-def input_evaluation_set():
-    features = {'SepalLength': np.array([6.4, 5.0]),
-                'SepalWidth':  np.array([2.8, 2.3]),
-                'PetalLength': np.array([5.6, 3.3]),
-                'PetalWidth':  np.array([2.2, 1.0])}
-    labels = np.array([2, 1])
-    return features, labels
-```
+* "label" - An array containing the values of the
+  [label](https://developers.google.com/machine-learning/glossary/#label) for
+  every example.
 
 Your input function may generate the "features" dictionary and "label" list any
-way you like. However, we recommend using TensorFlow's Dataset API, which can
-deftly parse all sorts of data. At a high-level, the Datasets API consists of
-the following classes:
+way you like. However, we recommend using TensorFlow's @{tf.data.Dataset} API,
+which can deftly parse all sorts of data. At a high-level,
+the @{tf.data.Dataset} API consists of the following classes:
 
 <div style="width:80%; margin:auto; margin-bottom:10px; margin-top:20px;">
 <img style="width:100%"
@@ -248,34 +242,33 @@ The Dataset API can handle a lot of common cases for you. For example,
 using the Dataset API, you can easily read in records from a large collection
 of files in parallel and join them into a single stream.
 
-To keep things simple in this example we are going to load the data with pandas, and build our input pipeline from this in-memory data.
+To keep things simple in this example we are going to load the data with pandas,
+and build our input pipeline from this in-memory data.
 
-Here is the input function used for training in this program:
+Here is the input function used for training in this program, which is available
+in [`iris_data.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/iris_data.py):
 
 ``` python
 def train_input_fn(features, labels, batch_size):
     """An input function for training"""
     # Convert the inputs to a Dataset.
-    dataset = tf.data.Dataset.from_tensor_slices((features, labels))
+    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
 
     # Shuffle, repeat, and batch the examples.
-    dataset = dataset.shuffle(1000).repeat().batch(batch_size)
-
-    # Build the Iterator, and return the read end of the pipeline.
-    return dataset.make_one_shot_iterator().get_next()
+    return dataset.shuffle(1000).repeat().batch(batch_size)
 ```
 
 ## Define the Feature Columns
 
 A [**Feature Column**](https://developers.google.com/machine-learning/glossary/#feature_columns)
-is an object describing how the model should use raw input features from the
+is an object describing how the model should use raw input data from the
 features dictionary. When you build an Estimator model, you pass it a list of
 feature columns that describes each of the features you want the model to use.
-
-These objects are created by functions in the @{tf.feature_column} module. `tf.feature_column` methods provide many different ways to represent data.
+The @{tf.feature_column} module provides many options for representing data
+to the model.
 
 For Iris, the 4 raw features are numeric values, so we'll build a list of
-feature columns, to tell the Estimator model to represent each of the four
+feature columns to tell the Estimator model to represent each of the four
 features as 32-bit floating-point values. Therefore, the code to create the
 Feature Column is simply:
 
@@ -287,7 +280,8 @@ for key in train_x.keys():
 ```
 
 Feature Columns can be far more sophisticated than those we're showing here.
-<!--TODO(markdaoust) add link to feature_columns doc when it exists.-->
+We detail feature columns @{$get_started/feature_columns$later on} in
+getting started.
 
 Now that we have the description of how we want the model to represent the raw
 features, we can build the estimator.
@@ -295,14 +289,13 @@ features, we can build the estimator.
 
 ## Instantiate an Estimator
 
-The Iris problem is a classic classifier problem. Fortunately, TensorFlow
+The Iris problem is a classic classification problem. Fortunately, TensorFlow
 provides several pre-made classifier Estimators, including:
 
 * @{tf.estimator.DNNClassifier}—for deep models that perform multi-class
   classification.
 * @{tf.estimator.DNNLinearCombinedClassifier}—for wide-n-deep models.
-* @{tf.estimator.LinearClassifier}—for linear models that feed results into
-  binary classifiers.
+* @{tf.estimator.LinearClassifier}— for classifiers based on linear models.
 
 For the Iris problem, `tf.estimator.DNNClassifier` seems like the best choice.
 Here's how we instantiated this Estimator:
@@ -332,14 +325,15 @@ Train the model by calling the Estimator's `train` method as follows:
 ```python
 # Train the Model.
 classifier.train(
-    input_fn=lambda:train_input_fn(train_x, train_y, args.batch_size),
+    input_fn=lambda:iris_data.train_input_fn(train_x, train_y, args.batch_size),
     steps=args.train_steps)
 ```
 
-Here we wrap up our `input_fn` call in a [`lambda`](https://docs.python.org/3/tutorial/controlflow.html)
-to allow the Estimator to call it, at the correct time, with no arguments.
-The `steps` argument tells the method to stop training after a number of
-training steps.
+Here we wrap up our `input_fn` call in a
+[`lambda`](https://docs.python.org/3/tutorial/controlflow.html)
+to capture the arguments while providing an input function that takes no
+arguments, as expected by the Estimator. The `steps` argument tells the method
+to stop training after a number of training steps.
 
 ### Evaluate the trained model
 
@@ -350,14 +344,14 @@ model on the test data:
 ```python
 # Evaluate the model.
 eval_result = classifier.evaluate(
-    input_fn=lambda:eval_input_fn(test_x, test_y, args.batch_size))
+    input_fn=lambda:iris_data.eval_input_fn(test_x, test_y, args.batch_size))
 
 print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))
 ```
 
-Note how unlike our call to the `train` method, we did not pass the `steps`
-argument to evaluate. Our `eval_input_fn` doesn't use the `repeat` method on
-the dataset, so evaluation just runs to the end of the data.
+Unlike our call to the `train` method, we did not pass the `steps`
+argument to evaluate. Our `eval_input_fn` only yields a single
+[epoch](https://developers.google.com/machine-learning/glossary/#epoch) of data.
 
 Running this code yields the following output (or something similar):
 
@@ -383,7 +377,8 @@ predict_x = {
 }
 
 predictions = classifier.predict(
-    input_fn=lambda:eval_input_fn(predict_x, batch_size=args.batch_size))
+    input_fn=lambda:iris_data.eval_input_fn(predict_x,
+                                            batch_size=args.batch_size))
 ```
 
 The `predict` method returns a Python iterable, yielding a dictionary of
@@ -397,29 +392,35 @@ for pred_dict, expec in zip(predictions, expected):
 
     class_id = pred_dict['class_ids'][0]
     probability = pred_dict['probabilities'][class_id]
-    print(template.format(SPECIES[class_id], 100 * probability, expec))
+
+    print(template.format(iris_data.SPECIES[class_id],
+                          100 * probability, expec))
 ```
 
 Running the preceding code yields the following output:
 
 ``` None
 ...
-Prediction is "Sentosa" (99.6%), expected "Setosa"
+Prediction is "Setosa" (99.6%), expected "Setosa"
 
 Prediction is "Versicolor" (99.8%), expected "Versicolor"
 
 Prediction is "Virginica" (97.9%), expected "Virginica"
 ```
 
-## Next
 
-Now that you've gotten started writing TensorFlow programs.
+## Summary
+
+Pre-made Estimators are an effective way to quickly create standard models.
+
+Now that you've gotten started writing TensorFlow programs, consider the
+following material:
 
-* For more on Datasets, see the
-  @{$programmers_guide/datasets$Programmer's guide} and
-  @{tf.data$reference documentation}.
-* For more on Estimators, see the
-  @{$programmers_guide/estimators$Programmer's guide} and
-  @{tf.estimator$reference documentation}.
-<!--TODO(markdaoust) add links to next get_started section when it exists.-->
+* @{$get_started/checkpoints$Checkpoints} to learn how to save and restore
+  models.
+* @{$get_started/datasets_quickstart$Datasets} to learn more about importing
+  data into your
+  model.
+* @{$get_started/custom_estimators$Creating Custom Estimators} to learn how to
+  write your own Estimator, customized for a particular problem.
 
diff --git a/tensorflow/docs_src/install/index.md b/tensorflow/docs_src/install/index.md
index c4fc882ddd43eed8fd1c8562f6ac89a7dd68535d..3c8488643f071c147dfbc4e0b4b4760b0a817718 100644
--- a/tensorflow/docs_src/install/index.md
+++ b/tensorflow/docs_src/install/index.md
@@ -4,7 +4,7 @@ We've built and tested TensorFlow on the following 64-bit laptop/desktop
 operating systems:
 
   * MacOS X 10.11 (El Capitan) or later.
-  * Ubuntu 14.04 or later
+  * Ubuntu 16.04 or later
   * Windows 7 or later.
 
 Although you might be able to install TensorFlow on other laptop or desktop
diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md
index df622c6ac57907122e4d236e3623d947dc35ac58..ba1a4118aece1f42822f7cd084feed50c5cf6ebb 100644
--- a/tensorflow/docs_src/install/install_c.md
+++ b/tensorflow/docs_src/install/install_c.md
@@ -38,7 +38,7 @@ enable TensorFlow for C:
          OS="linux" # Change to "darwin" for macOS
          TARGET_DIRECTORY="/usr/local"
          curl -L \
-           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.4.0.tar.gz" |
+           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.5.0-rc1.tar.gz" |
            sudo tar -C $TARGET_DIRECTORY -xz
 
      The `tar` command extracts the TensorFlow C library into the `lib`
diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md
index 8b3da49a0d4bca1b2bc2293520e0b946a7727c88..87cc647317a11fab0d9d0219dd5764af3dcb2ecc 100644
--- a/tensorflow/docs_src/install/install_go.md
+++ b/tensorflow/docs_src/install/install_go.md
@@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go:
          TF_TYPE="cpu" # Change to "gpu" for GPU support
          TARGET_DIRECTORY='/usr/local'
          curl -L \
-           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.4.0.tar.gz" |
+           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.5.0-rc1.tar.gz" |
          sudo tar -C $TARGET_DIRECTORY -xz
 
      The `tar` command extracts the TensorFlow C library into the `lib`
diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md
index 6eb81582491899c9c278c41fb39ae21d7fc3f4a9..37e109a6e4bdee97ad02bc7aceb2c0c24e1ec7ec 100644
--- a/tensorflow/docs_src/install/install_java.md
+++ b/tensorflow/docs_src/install/install_java.md
@@ -17,7 +17,7 @@ instructions might also work on other variants, we have only tested
 (and we only support) these instructions on machines meeting the
 following requirements:
 
-  * Ubuntu 14.04 or higher; 64-bit, x86
+  * Ubuntu 16.04 or higher; 64-bit, x86
   * macOS X 10.11 (El Capitan) or higher
   * Windows 7 or higher; 64-bit, x86
 
@@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs:
 <dependency>
   <groupId>org.tensorflow</groupId>
   <artifactId>tensorflow</artifactId>
-  <version>1.4.0</version>
+  <version>1.5.0-rc1</version>
 </dependency>
 ```
 
@@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow:
                <dependency>
                  <groupId>org.tensorflow</groupId>
                  <artifactId>tensorflow</artifactId>
-                 <version>1.4.0</version>
+                 <version>1.5.0-rc1</version>
                </dependency>
              </dependencies>
          </project>
@@ -113,6 +113,29 @@ Maven projects. If not, check
 [Stack Overflow](http://stackoverflow.com/questions/tagged/tensorflow)
 for possible solutions.  You can skip reading the rest of this document.
 
+### GPU support
+
+If your Linux system has an NVIDIA® GPU and your TensorFlow Java program
+requires GPU acceleration, then add the following to the project's `pom.xml`
+instead:
+
+```xml
+<dependency>
+  <groupId>org.tensorflow</groupId>
+  <artifactId>libtensorflow</artifactId>
+  <version>1.5.0-rc1</version>
+</dependency>
+<dependency>
+  <groupId>org.tensorflow</groupId>
+  <artifactId>libtensorflow_jni_gpu</artifactId>
+  <version>1.5.0-rc1</version>
+</dependency>
+```
+
+GPU acceleration is available via Maven only for Linux and only if your system
+meets the
+@{$install_linux#determine_which_tensorflow_to_install$requirements for GPU}.
+
 ## Using TensorFlow with JDK
 
 This section describes how to use TensorFlow using the `java` and `javac`
@@ -124,7 +147,7 @@ refer to the simpler instructions above instead.
 Take the following steps to install TensorFlow for Java on Linux or macOS:
 
   1. Download
-     [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0.jar),
+     [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.5.0-rc1.jar),
      which is the TensorFlow Java Archive (JAR).
 
   2. Decide whether you will run TensorFlow for Java on CPU(s) only or with
@@ -143,7 +166,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS:
          OS=$(uname -s | tr '[:upper:]' '[:lower:]')
          mkdir -p ./jni
          curl -L \
-           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.4.0.tar.gz" |
+           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.5.0-rc1.tar.gz" |
            tar -xz -C ./jni
 
 ### Install on Windows
@@ -151,10 +174,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS:
 Take the following steps to install TensorFlow for Java on Windows:
 
   1. Download
-     [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0.jar),
+     [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.5.0-rc1.jar),
      which is the TensorFlow Java Archive (JAR).
   2. Download the following Java Native Interface (JNI) file appropriate for
-     [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.4.0.zip).
+     [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.5.0-rc1.zip).
   3. Extract this .zip file.
 
 
@@ -202,7 +225,7 @@ must be part of your `classpath`. For example, you can include the
 downloaded `.jar` in your `classpath` by using the `-cp` compilation flag
 as follows:
 
-<pre><b>javac -cp libtensorflow-1.4.0.jar HelloTF.java</b></pre>
+<pre><b>javac -cp libtensorflow-1.5.0-rc1.jar HelloTF.java</b></pre>
 
 
 ### Running
@@ -216,11 +239,11 @@ two files are available to the JVM:
 For example, the following command line executes the `HelloTF` program on Linux
 and macOS X:
 
-<pre><b>java -cp libtensorflow-1.4.0.jar:. -Djava.library.path=./jni HelloTF</b></pre>
+<pre><b>java -cp libtensorflow-1.5.0-rc1.jar:. -Djava.library.path=./jni HelloTF</b></pre>
 
 And the following command line executes the `HelloTF` program on Windows:
 
-<pre><b>java -cp libtensorflow-1.4.0.jar;. -Djava.library.path=jni HelloTF</b></pre>
+<pre><b>java -cp libtensorflow-1.5.0-rc1.jar;. -Djava.library.path=jni HelloTF</b></pre>
 
 If the program prints <tt>Hello from <i>version</i></tt>, you've successfully
 installed TensorFlow for Java and are ready to use the API.  If the program
diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md
index 28b04bab9561a050aee2acb4bb8b472a86c12b95..03f12dff08cb3483666df4b8553b97fc1c4f34f9 100644
--- a/tensorflow/docs_src/install/install_linux.md
+++ b/tensorflow/docs_src/install/install_linux.md
@@ -6,7 +6,7 @@ tested (and we only support) these instructions on machines meeting the
 following requirements:
 
   * 64-bit desktops or laptops
-  * Ubuntu 14.04 or higher
+  * Ubuntu 16.04 or higher
 
 
 ## Determine which TensorFlow to install
@@ -188,7 +188,7 @@ Take the following steps to install TensorFlow with Virtualenv:
      Virtualenv environment:
 
      <pre>(tensorflow)$ <b>pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl</b></pre>
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0rc1-cp34-cp34m-linux_x86_64.whl</b></pre>
 
 If you encounter installation problems, see
 [Common Installation Problems](#common_installation_problems).
@@ -293,7 +293,7 @@ take the following steps:
 
      <pre>
      $ <b>sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl</b>
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0rc1-cp34-cp34m-linux_x86_64.whl</b>
      </pre>
 
      If this step fails, see
@@ -480,7 +480,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
 
      <pre>
      (tensorflow)$ <b>pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl</b></pre>
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0rc1-cp34-cp34m-linux_x86_64.whl</b></pre>
 
 
 <a name="ValidateYourInstallation"></a>
@@ -531,7 +531,7 @@ TensorFlow programs:
 
 <pre>Hello, TensorFlow!</pre>
 
-If you are new to TensorFlow, see @{$get_started/get_started$Getting Started with TensorFlow}.
+If you are new to TensorFlow, see @{$get_started/premade_estimators$Getting Started with TensorFlow}.
 
 If the system outputs an error message instead of a greeting, see [Common
 installation problems](#common_installation_problems).
@@ -648,14 +648,14 @@ This section documents the relevant values for Linux installations.
 CPU only:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0rc1-cp27-none-linux_x86_64.whl
 </pre>
 
 
 GPU support:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.5.0rc1-cp27-none-linux_x86_64.whl
 </pre>
 
 Note that GPU support requires the NVIDIA hardware and software described in
@@ -667,14 +667,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
 CPU only:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0rc1-cp34-cp34m-linux_x86_64.whl
 </pre>
 
 
 GPU support:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.5.0rc1-cp34-cp34m-linux_x86_64.whl
 </pre>
 
 Note that GPU support requires the NVIDIA hardware and software described in
@@ -686,14 +686,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
 CPU only:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0rc1-cp35-cp35m-linux_x86_64.whl
 </pre>
 
 
 GPU support:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.5.0rc1-cp35-cp35m-linux_x86_64.whl
 </pre>
 
 
@@ -705,57 +705,16 @@ Note that GPU support requires the NVIDIA hardware and software described in
 CPU only:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0rc1-cp36-cp36m-linux_x86_64.whl
 </pre>
 
 
 GPU support:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.5.0rc1-cp36-cp36m-linux_x86_64.whl
 </pre>
 
 
 Note that GPU support requires the NVIDIA hardware and software described in
 [NVIDIA requirements to run TensorFlow with GPU support](#NVIDIARequirements).
-
-<a name="Protobuf31"></a>
-## Protobuf pip package 3.1
-
-You can skip this section unless you are seeing problems related
-to the protobuf pip package.
-
-**NOTE:** If your TensorFlow programs are running slowly, you might
-have a problem related to the protobuf pip package.
-
-The TensorFlow pip package depends on protobuf pip package version 3.1. The
-protobuf pip package downloaded from PyPI (when invoking
-<tt>pip install protobuf</tt>) is a Python-only library containing
-Python implementations of proto serialization/deserialization that can run
-**10x-50x slower** than the C++ implementation. Protobuf also supports a
-binary extension for the Python package that contains fast
-C++ based proto parsing.  This extension is not available in the
-standard Python-only pip package.  We have created a custom binary
-pip package for protobuf that contains the binary extension. To install
-the custom binary protobuf pip package, invoke one of the following commands:
-
-  * for Python 2.7:
-
-  <pre>
-  $ <b>pip install --upgrade \
-  https://storage.googleapis.com/tensorflow/linux/cpu/protobuf-3.1.0-cp27-none-linux_x86_64.whl</b></pre>
-
-  * for Python 3.5:
-
-  <pre>
-  $ <b>pip3 install --upgrade \
-  https://storage.googleapis.com/tensorflow/linux/cpu/protobuf-3.1.0-cp35-none-linux_x86_64.whl</b></pre>
-
-Installing this protobuf package will overwrite the existing protobuf package.
-Note that the binary pip package already has support for protobufs
-larger than 64MB, which should fix errors such as these:
-
-<pre>[libprotobuf ERROR google/protobuf/src/google/protobuf/io/coded_stream.cc:207]
-A protocol message was rejected because it was too big (more than 67108864 bytes).
-To increase the limit (or to disable these warnings), see
-CodedInputStream::SetTotalBytesLimit() in google/protobuf/io/coded_stream.h.</pre>
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md
index 79b383817b4865dab20232b453d522c2613f9e9d..e13ddadab7b0e2ed96bdaf5600b3479a4b5eec55 100644
--- a/tensorflow/docs_src/install/install_mac.md
+++ b/tensorflow/docs_src/install/install_mac.md
@@ -79,22 +79,23 @@ Take the following steps to install TensorFlow with Virtualenv:
   4. Activate the Virtualenv environment by issuing one of the
      following commands:
 
-     <pre>$ <b>source ~/tensorflow/bin/activate</b>      # If using bash, sh, ksh, or zsh
-    $ <b>source ~/tensorflow/bin/activate.csh</b>  # If using csh or tcsh </pre>
+     <pre>$ <b>cd <i>targetDirectory</i></b>
+    $ <b>source ./bin/activate</b>      # If using bash, sh, ksh, or zsh
+    $ <b>source ./bin/activate.csh</b>  # If using csh or tcsh </pre>
 
      The preceding `source` command should change your prompt to the following:
 
-     <pre> (tensorflow)$ </pre>
+     <pre> (<i>targetDirectory</i>)$ </pre>
 
   5. Ensure pip ≥8.1 is installed:
 
-     <pre> (tensorflow)$ <b>easy_install -U pip</b></pre>
+     <pre> (<i>targetDirectory</i>)$ <b>easy_install -U pip</b></pre>
 
   6. Issue one of the following commands to install TensorFlow and all the
      packages that TensorFlow requires into the active Virtualenv environment:
 
-     <pre> (tensorflow)$ <b>pip install --upgrade tensorflow</b>      # for Python 2.7
-     (tensorflow)$ <b>pip3 install --upgrade tensorflow</b>     # for Python 3.n
+     <pre> (<i>targetDirectory</i>)$ <b>pip install --upgrade tensorflow</b>      # for Python 2.7
+     (<i>targetDirectory</i>)$ <b>pip3 install --upgrade tensorflow</b>     # for Python 3.n
 
   7. Optional. If Step 6 failed (typically because you invoked a pip version
      lower than 8.1), install TensorFlow in the active
@@ -114,7 +115,7 @@ Take the following steps to install TensorFlow with Virtualenv:
      TensorFlow in the active Virtualenv is as follows:
 
      <pre> $ <b>pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl</b></pre>
+     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0rc1-py2-none-any.whl</b></pre>
 
 If you encounter installation problems, see
 [Common Installation Problems](#common-installation-problems).
@@ -128,16 +129,18 @@ to confirm that the installation worked properly.
 
 Note that you must activate the Virtualenv environment each time you
 use TensorFlow in a new shell.  If the Virtualenv environment is not
-currently active (that is, the prompt is not `(tensorflow)`, invoke
+currently active (that is, the prompt is not `(<i>targetDirectory</i>)`, invoke
 one of the following commands:
 
-<pre>$ <b>source ~/tensorflow/bin/activate</b>      # bash, sh, ksh, or zsh
-$ <b>source ~/tensorflow/bin/activate.csh</b>  # csh or tcsh </pre>
+<pre>$ <b>cd <i>targetDirectory</i></b>
+$ <b>source ./bin/activate</b>      # If using bash, sh, ksh, or zsh
+$ <b>source ./bin/activate.csh</b>  # If using csh or tcsh </pre>
+
 
 Your prompt will transform to the following to indicate that your
 tensorflow environment is active:
 
-<pre> (tensorflow)$ </pre>
+<pre> (<i>targetDirectory</i>)$ </pre>
 
 When the Virtualenv environment is active, you may run
 TensorFlow programs from this shell.
@@ -145,7 +148,7 @@ TensorFlow programs from this shell.
 When you are done using TensorFlow, you may deactivate the
 environment by issuing the following command:
 
-<pre> (tensorflow)$ <b>deactivate</b> </pre>
+<pre> (<i>targetDirectory</i>)$ <b>deactivate</b> </pre>
 
 The prompt will revert back to your default prompt (as defined by `PS1`).
 
@@ -235,7 +238,7 @@ take the following steps:
      issue the following command:
 
      <pre> $ <b>sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl</b> </pre>
+     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0rc1-py2-none-any.whl</b> </pre>
 
      If the preceding command fails, see
      [installation problems](#common-installation-problems).
@@ -331,20 +334,20 @@ Take the following steps to install TensorFlow in an Anaconda environment:
   3. Activate the conda environment by issuing the following command:
 
      <pre>$ <b>source activate tensorflow</b>
-     (tensorflow)$  # Your prompt should change</pre>
+     (<i>targetDirectory</i>)$  # Your prompt should change</pre>
 
   4. Issue a command of the following format to install
      TensorFlow inside your conda environment:
 
-     <pre>(tensorflow)<b>$ pip install --ignore-installed --upgrade</b> <i>TF_PYTHON_URL</i></pre>
+     <pre>(<i>targetDirectory</i>)<b>$ pip install --ignore-installed --upgrade</b> <i>TF_PYTHON_URL</i></pre>
 
      where <i>TF_PYTHON_URL</i> is the
      [URL of the TensorFlow Python package](#the_url_of_the_tensorflow_python_package).
      For example, the following command installs the CPU-only version of
      TensorFlow for Python 2.7:
 
-     <pre> (tensorflow)$ <b>pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl</b></pre>
+     <pre> (<i>targetDirectory</i>)$ <b>pip install --ignore-installed --upgrade \
+     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0rc1-py2-none-any.whl</b></pre>
 
 
 <a name="ValidateYourInstallation"></a>
@@ -395,7 +398,7 @@ writing TensorFlow programs:
 <pre>Hello, TensorFlow!</pre>
 
 If you are new to TensorFlow, see
-@{$get_started/get_started$Getting Started with TensorFlow}.
+@{$get_started/premade_estimators$Getting Started with TensorFlow}.
 
 If the system outputs an error message instead of a greeting, see
 [Common installation problems](#common_installation_problems).
@@ -517,7 +520,7 @@ This section documents the relevant values for Mac OS installations.
 
 
 <pre>
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0rc1-py2-none-any.whl
 </pre>
 
 
@@ -525,46 +528,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.
 
 
 <pre>
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0rc1-py3-none-any.whl
 </pre>
-
-
-
-<a name="Protobuf31"></a>
-## Protobuf pip package 3.1
-
-You can skip this section unless you are seeing problems related
-to the protobuf pip package.
-
-**NOTE:** If your TensorFlow programs are running slowly, you might
-have a problem related to the protobuf pip package.
-
-The TensorFlow pip package depends on protobuf pip package version 3.1. The
-protobuf pip package downloaded from PyPI (when invoking
-<tt>pip install protobuf</tt>) is a Python-only library containing
-Python implementations of proto serialization/deserialization that can run
-**10x-50x slower** than the C++ implementation. Protobuf also supports a
-binary extension for the Python package that contains fast
-C++ based proto parsing.  This extension is not available in the
-standard Python-only pip package.  We have created a custom binary
-pip package for protobuf that contains the binary extension. To install
-the custom binary protobuf pip package, invoke one of the following commands:
-
-  * for Python 2.7:
-
-    <pre>$ <b>pip install --upgrade \
-    https://storage.googleapis.com/tensorflow/mac/cpu/protobuf-3.1.0-cp27-none-macosx_10_11_x86_64.whl</b></pre>
-
-  * for Python 3.n:
-
-    <pre>$ <b>pip3 install --upgrade \
-    https://storage.googleapis.com/tensorflow/mac/cpu/protobuf-3.1.0-cp35-none-macosx_10_11_x86_64.whl</b></pre>
-
-Installing this protobuf package will overwrite the existing protobuf package.
-Note that the binary pip package already has support for protobufs
-larger than 64MB, which should fix errors such as these:
-
-<pre>[libprotobuf ERROR google/protobuf/src/google/protobuf/io/coded_stream.cc:207]
-A protocol message was rejected because it was too big (more than 67108864 bytes).
-To increase the limit (or to disable these warnings), see
-CodedInputStream::SetTotalBytesLimit() in google/protobuf/io/coded_stream.h.</pre>
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md
index c01aa907a37cca3e1ef976ddd64ab2d50a6f5d33..f494cc7a7c0575fd7950b6fe28d7671e1f25725f 100644
--- a/tensorflow/docs_src/install/install_sources.md
+++ b/tensorflow/docs_src/install/install_sources.md
@@ -25,8 +25,10 @@ like to try to build TensorFlow on Windows anyway, use either of the
 following:
 
 *   [Bazel on Windows](https://bazel.build/versions/master/docs/windows.html)
-*   [TensorFlow CMake build](https://github.com/tensorflow/tensorflow/tree/r0.12/tensorflow/contrib/cmake)
+*   [TensorFlow CMake build](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/cmake)
 
+Note: Starting from 1.6 release, our prebuilt binaries will use AVX
+instructions. Older CPUs may not be able to execute these binaries.
 
 ## Determine which TensorFlow to install
 
@@ -180,7 +182,7 @@ If bazel is not installed on your system, install it now by following
 
 ### Install python dependencies
 
-To install TensorFlow, you must install the following packages:
+To build TensorFlow, you must install the following packages:
 
   * six
   * numpy, which is a numerical processing package that TensorFlow requires.
@@ -196,7 +198,11 @@ After installing pip, invoke the following commands:
 
 <pre> $ <b>sudo pip install six numpy wheel</b> </pre>
 
-
+Note: These are just the minimum requirements to _build_ tensorflow. Installing
+the pip package will download additional packages required to _run_ it. If you
+plan on executing tasks directly with `bazel` , without the pip installation,
+you may need to install additional python packages. For example, you should
+`pip install mock enum34` before running TensorFlow's tests with bazel.
 
 ### Optional: install TensorFlow for GPU prerequisites
 
@@ -355,10 +361,10 @@ Invoke `pip install` to install that pip package.
 The filename of the `.whl` file depends on your platform.
 For example, the following command will install the pip package
 
-for TensorFlow 1.4.0 on Linux:
+for TensorFlow 1.5.0rc1 on Linux:
 
 <pre>
-$ <b>sudo pip install /tmp/tensorflow_pkg/tensorflow-1.4.0-py2-none-any.whl</b>
+$ <b>sudo pip install /tmp/tensorflow_pkg/tensorflow-1.5.0rc1-py2-none-any.whl</b>
 </pre>
 
 ## Validate your installation
@@ -441,15 +447,27 @@ Stack Overflow and specify the `tensorflow` tag.
   <td>Invoking `python` or `ipython` generates the following error:
   <pre>ImportError: cannot import name pywrap_tensorflow</pre></td>
 </tr>
+
+<tr>
+  <td><a href="https://stackoverflow.com/questions/45276830">45276830</a></td>
+  <td><pre>external/local_config_cc/BUILD:50:5: in apple_cc_toolchain rule
+  @local_config_cc//:cc-compiler-darwin_x86_64: Xcode version must be specified
+  to use an Apple CROSSTOOL.</pre>
+  </td>
+</tr>
+
 </table>
 
 ## Tested source configurations
 **Linux**
 <table>
 <tr><th>Version:</th><th>CPU/GPU:</th><th>Python Version:</th><th>Compiler:</th><th>Build Tools:</th><th>cuDNN:</th><th>CUDA:</th></tr>
+
+<tr><td>tensorflow-1.5.0-rc1</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.8.0</td><td>N/A</td><td>N/A</td></tr>
+<tr><td>tensorflow_gpu-1.5.0-rc1</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.8.0</td><td>7</td><td>9</td></tr>
 <tr><td>tensorflow-1.4.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.5.4</td><td>N/A</td><td>N/A</td></tr>
 <tr><td>tensorflow_gpu-1.4.0</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.5.4</td><td>6</td><td>8</td></tr>
- <tr><td>tensorflow-1.3.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
+<tr><td>tensorflow-1.3.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
 <tr><td>tensorflow_gpu-1.3.0</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.5</td><td>6</td><td>8</td></tr>
 <tr><td>tensorflow-1.2.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
 <tr><td>tensorflow_gpu-1.2.0</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>GCC 4.8</td><td>Bazel 0.4.5</td><td>5.1</td><td>8</td></tr>
@@ -462,8 +480,9 @@ Stack Overflow and specify the `tensorflow` tag.
 **Mac**
 <table>
 <tr><th>Version:</th><th>CPU/GPU:</th><th>Python Version:</th><th>Compiler:</th><th>Build Tools:</th><th>cuDNN:</th><th>CUDA:</th></tr>
+<tr><td>tensorflow-1.5.0-rc1</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.8.1</td><td>N/A</td><td>N/A</td></tr>
 <tr><td>tensorflow-1.4.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.5.4</td><td>N/A</td><td>N/A</td></tr>
- <tr><td>tensorflow-1.3.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
+<tr><td>tensorflow-1.3.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
 <tr><td>tensorflow-1.2.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.4.5</td><td>N/A</td><td>N/A</td></tr>
 <tr><td>tensorflow-1.1.0</td><td>CPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.4.2</td><td>N/A</td><td>N/A</td></tr>
 <tr><td>tensorflow_gpu-1.1.0</td><td>GPU</td><td>2.7, 3.3-3.6</td><td>Clang from xcode</td><td>Bazel 0.4.2</td><td>5.1</td><td>8</td></tr>
@@ -474,6 +493,8 @@ Stack Overflow and specify the `tensorflow` tag.
 **Windows**
 <table>
 <tr><th>Version:</th><th>CPU/GPU:</th><th>Python Version:</th><th>Compiler:</th><th>Build Tools:</th><th>cuDNN:</th><th>CUDA:</th></tr>
+<tr><td>tensorflow-1.5.0-rc1</td><td>CPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr>
+<tr><td>tensorflow_gpu-1.5.0-rc1</td><td>GPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>7</td><td>9</td></tr>
 <tr><td>tensorflow-1.4.0</td><td>CPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr>
 <tr><td>tensorflow_gpu-1.4.0</td><td>GPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>6</td><td>8</td></tr>
 <tr><td>tensorflow-1.3.0</td><td>CPU</td><td>3.5-3.6</td><td>MSVC 2015 update 3</td><td>Cmake v3.6.3</td><td>N/A</td><td>N/A</td></tr>
diff --git a/tensorflow/docs_src/install/leftnav_files b/tensorflow/docs_src/install/leftnav_files
index bc30d37bd08863d52e6ada370ac98e49b0aca54d..0e8b5ae7a17eb43cffc76d40692c4f0042de44af 100644
--- a/tensorflow/docs_src/install/leftnav_files
+++ b/tensorflow/docs_src/install/leftnav_files
@@ -1,10 +1,16 @@
+index.md
+
+### Python
 install_linux.md
 install_mac.md
 install_windows.md
 install_sources.md
 >>>
 migration.md
->>>
+
+### Other Languages
 install_java.md
 install_go.md
 install_c.md
+
+
diff --git a/tensorflow/docs_src/mobile/leftnav_files b/tensorflow/docs_src/mobile/leftnav_files
index 4d2c3b62341717d90d6e4afabd105d7fd7a7866d..ac50f528ba468d8a830c059539d3399f413f39c8 100644
--- a/tensorflow/docs_src/mobile/leftnav_files
+++ b/tensorflow/docs_src/mobile/leftnav_files
@@ -1,6 +1,7 @@
 index.md
 ### TensorFlow Lite
 tflite/index.md
+tflite/demo_android.md
 >>>
 ### TensorFlow Mobile
 mobile_intro.md
diff --git a/tensorflow/docs_src/mobile/tflite/demo_android.md b/tensorflow/docs_src/mobile/tflite/demo_android.md
new file mode 100644
index 0000000000000000000000000000000000000000..79b567897cb8a38ed2e27e73aa7e8fee95f718b8
--- /dev/null
+++ b/tensorflow/docs_src/mobile/tflite/demo_android.md
@@ -0,0 +1,39 @@
+# TensorFlow Lite Demo for Android
+
+The TensorFlow Lite demo is a camera app that continuously classifies whatever
+it sees from your device's back camera, using a quantized MobileNet model.
+
+You'll need an Android device running Android 5.0 or higher to run the demo.
+
+To get you started working with TensorFlow Lite on Android, we'll walk you
+through building and deploying our TensorFlow demo app in Android Studio.
+
+It's also possible to build the demo app with Bazel, but we only recommend
+this for advanced users who are very familiar with the Bazel build
+environment. For more information on that, see our page [on Github](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite#building-tensorflow-lite-and-the-demo-app-from-source).
+
+## Build and deploy with Android Studio
+
+1. Clone the TensorFlow repository from GitHub if you haven't already:
+
+        git clone https://github.com/tensorflow/tensorflow
+
+2. Install the latest version of Android Studio from [here](https://developer.android.com/studio/index.html).
+
+3. From the **Welcome to Android Studio** screen, use the **Import Project
+   (Gradle, Eclipse ADT, etc)** option to import the
+   `tensorflow/contrib/lite/java/demo` directory as an existing Android Studio
+   Project.
+
+    Android Studio may prompt you to install Gradle upgrades and other tool
+    versions; you should accept these upgrades.
+
+4. Download the TensorFlow Lite MobileNet model from [here](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip).
+
+    Unzip this and copy the `mobilenet_quant_v1_224.tflite` file to the assets
+    directory: `tensorflow/contrib/lite/java/demo/app/src/main/assets/`
+
+5. Build and run the app in Android Studio.
+
+You'll have to grant permissions for the app to use the device's camera. Point
+the camera at various objects and enjoy seeing how the model classifies things!
diff --git a/tensorflow/docs_src/mobile/tflite/index.md b/tensorflow/docs_src/mobile/tflite/index.md
index 49d93669a2808159a87538ab1191def5ed9ab9d4..beb24794fc98724e2423e02a71028f79be45cf75 100644
--- a/tensorflow/docs_src/mobile/tflite/index.md
+++ b/tensorflow/docs_src/mobile/tflite/index.md
@@ -95,7 +95,7 @@ following:
 
     All of the following models are guaranteed to work out of the box:
 
-    - Inception V3, a popular model for detecting the the dominant objects
+    - Inception V3, a popular model for detecting the dominant objects
       present in an image.
 
     - [MobileNets](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md),
@@ -155,7 +155,7 @@ retraining for both floating point and quantized inference.
 
 The following diagram shows the architectural design of TensorFlow Lite:
 
-<img src = "/images/tflite-architecture.jpg">
+![tensorflow lite architecture](https://www.tensorflow.org/images/tflite-architecture.jpg)
 
 Starting with a trained TensorFlow model on disk, you'll convert that model to
 the TensorFlow Lite file format (`.tflite`) using the TensorFlow Lite
diff --git a/tensorflow/docs_src/performance/datasets_performance.md b/tensorflow/docs_src/performance/datasets_performance.md
new file mode 100644
index 0000000000000000000000000000000000000000..4f95e17c3598c23645fad07441c267266e5ef34e
--- /dev/null
+++ b/tensorflow/docs_src/performance/datasets_performance.md
@@ -0,0 +1,331 @@
+# Input Pipeline Performance Guide
+
+GPUs and TPUs can radically reduce the time required to execute a single
+training step. Achieving peak performance requires an efficient input pipeline
+that delivers data for the next step before the current step has finished. The
+`tf.data` API helps to build flexible and efficient input pipelines. This
+document explains the `tf.data` API's features and best practices for building
+high performance TensorFlow input pipelines across a variety of models and
+accelerators.
+
+This guide does the following:
+
+*   Illustrates that TensorFlow input pipelines are essentially an
+    [ETL](https://en.wikipedia.org/wiki/Extract,_transform,_load) process.
+*   Describes common performance optimizations in the context of the `tf.data`
+    API.
+*   Discusses the performance implications of the order in which you apply
+    transformations.
+*   Summarizes the best practices for designing performant TensorFlow input
+    pipelines.
+
+
+## Input Pipeline Structure
+
+A typical TensorFlow training input pipeline can be framed as an ETL process:
+
+1.  **Extract**: Read data from persistent storage -- either local (e.g. HDD or
+    SSD) or remote (e.g. [GCS](https://cloud.google.com/storage/) or
+    [HDFS](https://en.wikipedia.org/wiki/Apache_Hadoop#Hadoop_distributed_file_system)).
+2.  **Transform**: Use CPU cores to parse and perform preprocessing operations
+    on the data such as image decompression, data augmentation transformations
+    (such as random crop, flips, and color distortions), shuffling, and batching.
+3.  **Load**: Load the transformed data onto the accelerator device(s) (for
+    example, GPU(s) or TPU(s)) that execute the machine learning model.
+
+This pattern effectively utilizes the CPU, while reserving the accelerator for
+the heavy lifting of training your model. In addition, viewing input pipelines
+as an ETL process provides structure that facilitates the application of
+performance optimizations.
+
+When using the @{tf.estimator.Estimator} API, the first two phases (Extract and
+Transform) are captured in the `input_fn` passed to
+@{tf.estimator.Estimator.train}. In code, this might look like the following
+(naive, sequential) implementation:
+
+```
+def parse_fn(example):
+  "Parse TFExample records and perform simple data augmentation."
+  example_fmt = {
+    "image": tf.FixedLengthFeature((), tf.string, ""),
+    "label": tf.FixedLengthFeature((), tf.int64, -1)
+  }
+  parsed = tf.parse_single_example(example, example_fmt)
+  image = tf.image.decode_image(parsed["image"])
+  image = _augment_helper(image)  # augments image using slice, reshape, resize_bilinear
+  return image, parsed["label"]
+
+def input_fn():
+  files = tf.data.Dataset.list_files("/path/to/dataset/train-*.tfrecord")
+  dataset = files.interleave(tf.data.TFRecordDataset)
+  dataset = dataset.shuffle(buffer_size=FLAGS.shuffle_buffer_size)
+  dataset = dataset.map(map_func=parse_fn)
+  dataset = dataset.batch(batch_size=FLAGS.batch_size)
+  return dataset
+```
+
+The next section builds on this input pipeline, adding performance
+optimizations.
+
+## Optimizing Performance
+
+As new computing devices (such as GPUs and TPUs) make it possible to train
+neural networks at an increasingly fast rate, the CPU processing is prone to
+becoming the bottleneck. The `tf.data` API provides users with building blocks
+to design input pipelines that effectively utilize the CPU, optimizing each step
+of the ETL process.
+
+### Pipelining
+
+To perform a training step, you must first extract and transform the training
+data and then feed it to a model running on an accelerator. However, in a naive
+synchronous implementation, while the CPU is preparing the data, the accelerator
+is sitting idle. Conversely, while the accelerator is training the model, the
+CPU is sitting idle. The training step time is thus the sum of both CPU
+pre-processing time and the accelerator training time.
+
+**Pipelining** overlaps the preprocessing and model execution of a training
+step. While the accelerator is performing training step `N`, the CPU is
+preparing the data for step `N+1`. Doing so reduces the step time to the maximum
+(as opposed to the sum) of the training and the time it takes to extract and
+transform the data.
+
+Without pipelining, the CPU and the GPU/TPU sit idle much of the time:
+
+![without pipelining](https://www.tensorflow.org/images/datasets_without_pipelining.png)
+
+With pipelining, idle time diminishes significantly:
+
+![with pipelining](https://www.tensorflow.org/images/datasets_with_pipelining.png)
+
+The `tf.data` API provides a software pipelining mechanism through the
+@{tf.data.Dataset.prefetch} transformation, which can be used to decouple the
+time data is produced from the time it is consumed. In particular, the
+transformation uses a background thread and an internal buffer to prefetch
+elements from the input dataset ahead of the time they are requested. Thus, to
+achieve the pipelining effect illustrated above, you can add `prefetch(1)` as
+the final transformation to your dataset pipeline (or `prefetch(n)` if a single
+training step consumes n elements).
+
+To apply this change to our running example, change:
+
+```
+dataset = dataset.batch(batch_size=FLAGS.batch_size)
+return dataset
+```
+
+to:
+
+
+```
+dataset = dataset.batch(batch_size=FLAGS.batch_size)
+dataset = dataset.prefetch(buffer_size=FLAGS.prefetch_buffer_size)
+return dataset
+```
+
+Note that the prefetch transformation will yield benefits any time there is an
+opportunity to overlap the work of a "producer" with the work of a "consumer."
+The preceding recommendation is simply the most common application.
+
+### Parallelize Data Transformation
+
+When preparing a batch, input elements may need to be pre-processed. To this
+end, the `tf.data` API offers the @{tf.data.Dataset.map} transformation, which
+applies a user-defined function (for example, `parse_fn` from the running
+example) to each element of the input dataset. Because input elements are
+independent of one another, the pre-processing can be parallelized across
+multiple CPU cores. To make this possible, the `map` transformation provides the
+`num_parallel_calls` argument to specify the level of parallelism. For example,
+the following diagram illustrates the effect of setting `num_parallel_calls=2`
+to the `map` transformation:
+
+![parallel map](https://www.tensorflow.org/images/datasets_parallel_map.png)
+
+Choosing the best value for the `num_parallel_calls` argument depends on your
+hardware, characteristics of your training data (such as its size and shape),
+the cost of your map function, and what other processing is happening on the
+CPU at the same time; a simple heuristic is to use the number of available CPU
+cores. For instance, if the machine executing the example above had 4 cores, it
+would have been more efficient to set `num_parallel_calls=4`. On the other hand,
+setting `num_parallel_calls` to a value much greater than the number of
+available CPUs can lead to inefficient scheduling, resulting in a slowdown.
+
+To apply this change to our running example, change:
+
+```
+dataset = dataset.map(map_func=parse_fn)
+```
+
+to:
+
+```
+dataset = dataset.map(map_func=parse_fn, num_parallel_calls=FLAGS.num_parallel_calls)
+```
+
+Furthermore, if your batch size is in the hundreds or thousands, your pipeline
+will likely additionally benefit from parallelizing the batch creation. To this
+end, the `tf.data` API provides the @{tf.contrib.data.map_and_batch}
+transformation, which effectively "fuses" the map and batch transformations.
+
+To apply this change to our running example, change:
+
+```
+dataset = dataset.map(map_func=parse_fn, num_parallel_calls=FLAGS.num_parallel_calls)
+dataset = dataset.batch(batch_size=FLAGS.batch_size)
+```
+
+to:
+
+```
+dataset = dataset.apply(tf.contrib.data.map_and_batch(
+    map_func=parse_fn, batch_size=FLAGS.batch_size))
+```
+
+### Parallelize Data Extraction
+
+In a real-world setting, the input data may be stored remotely (for example,
+GCS or HDFS), either because the input data would not fit locally or because the
+training is distributed and it would not make sense to replicate the input data
+on every machine. A dataset pipeline that works well when reading data locally
+might become bottlenecked on I/O when reading data remotely because of the
+following differences between local and remote storage:
+
+
+*   **Time-to-first-byte:** Reading the first byte of a file from remote storage
+    can take orders of magnitude longer than from local storage.
+*   **Read throughput:** While remote storage typically offers large aggregate
+    bandwidth, reading a single file might only be able to utilize a small
+    fraction of this bandwidth.
+
+In addition, once the raw bytes are read into memory, it may also be necessary
+to deserialize or decrypt the data
+(e.g. [protobuf](https://developers.google.com/protocol-buffers/)), which adds
+additional overhead. This overhead is present irrespective of whether the data
+is stored locally or remotely, but can be worse in the remote case if data is
+not prefetched effectively.
+
+To mitigate the impact of the various data extraction overheads, the `tf.data`
+API offers the @{tf.contrib.data.parallel_interleave} transformation. Use this
+transformation to parallelize the execution of and interleave the contents of
+other datasets (such as data file readers). The
+number of datasets to overlap can be specified by the `cycle_length` argument.
+
+The following diagram illustrates the effect of supplying `cycle_length=2` to
+the `parallel_interleave` transformation:
+
+![parallel io](https://www.tensorflow.org/images/datasets_parallel_io.png)
+
+To apply this change to our running example, change:
+
+```
+dataset = files.interleave(tf.data.TFRecordDataset)
+```
+
+to:
+
+```
+dataset = files.apply(tf.contrib.data.parallel_interleave(
+    tf.data.TFRecordDataset, cycle_length=FLAGS.num_parallel_readers))
+```
+
+
+The throughput of remote storage systems can vary over time due to load or
+network events. To account for this variance, the `parallel_interleave`
+transformation can optionally use prefetching. (See
+@{tf.contrib.data.parallel_interleave} for details).
+
+By default, the `parallel_interleave` transformation provides a deterministic
+ordering of elements to aid reproducibility. As an alternative to prefetching
+(which may be ineffective in some cases), the `parallel_interleave`
+transformation also provides an option that can boost performance at the expense
+of ordering guarantees. In particular, if the `sloppy` argument is set to true,
+the transformation may depart from its otherwise deterministic ordering, by
+temporarily skipping over files whose elements are not available when the next
+element is requested.
+
+## Performance Considerations
+
+The `tf.data` API is designed around composable transformations to provide its
+users with flexibility. Although many of these transformations are commutative,
+the ordering of certain transformations has performance implications.
+
+### Map and Batch
+
+Invoking the user-defined function passed into the `map` transformation has
+overhead related to scheduling and executing the user-defined function.
+Normally, this overhead is small compared to the amount of computation performed
+by the function. However, if `map` does little work, this overhead can dominate
+the total cost. In such cases, we recommend vectorizing the user-defined
+function (that is, have it operate over a batch of inputs at once) and apply the
+`batch` transformation _before_ the `map` transformation.
+
+### Map and Cache
+
+The @{tf.data.Dataset.cache} transformation can cache a dataset, either in
+memory or on local storage. If the user-defined function passed into the `map`
+transformation is expensive, apply the cache transformation after the map
+transformation as long as the resulting dataset can still fit into memory or
+local storage. If the user-defined function increases the space required to
+store the dataset beyond the cache capacity, consider pre-processing your data
+before your training job to reduce resource usage.
+
+### Map and Interleave / Prefetch / Shuffle
+
+A number of transformations, including `interleave`, `prefetch`, and `shuffle`,
+maintain an internal buffer of elements. If the user-defined function passed
+into the `map` transformation changes the size of the elements, then the
+ordering of the map transformation and the transformations that buffer elements
+affects the memory usage. In general, we recommend choosing the order that
+results in lower memory footprint, unless different ordering is desirable for
+performance (for example, to enable fusing of the map and batch transformations).
+
+### Repeat and Shuffle
+
+The @{tf.data.Dataset.repeat} transformation repeats the input data a finite (or
+infinite) number of times; each repetition of the data is typically referred to
+as an _epoch_. The @{tf.data.Dataset.shuffle} transformation randomizes the
+order of the dataset's examples.
+
+If the `repeat` transformation is applied before the `shuffle` transformation,
+then the epoch boundaries are blurred. That is, certain elements can be repeated
+before other elements appear even once. On the other hand, if the `shuffle`
+transformation is applied before the repeat transformation, then performance
+might slow down at the beginning of each epoch related to initialization of the
+internal state of the `shuffle` transformation. In other words, the former
+(`repeat` before `shuffle`) provides better performance, while the latter
+(`shuffle` before `repeat`) provides stronger ordering guarantees.
+
+When possible, we recommend using the fused
+@{tf.contrib.data.shuffle_and_repeat} transformation, which combines the best of
+both worlds (good performance and strong ordering guarantees). Otherwise, we
+recommend shuffling before repeating.
+
+## Summary of Best Practices
+
+Here is a summary of the best practices for designing input pipelines:
+
+*   Use the `prefetch` transformation to overlap the work of a producer and
+    consumer. In particular, we recommend adding prefetch(n) (where n is the
+    number of elements / batches consumed by a training step) to the end of your
+    input pipeline to overlap the transformations performed on the CPU with the
+    training done on the accelerator.
+*   Parallelize the `map` transformation by setting the `num_parallel_calls`
+    argument. We recommend using the number of available CPU cores for its value.
+*   If you are combining pre-processed elements into a batch using the `batch`
+    transformation, we recommend using the fused `map_and_batch` transformation;
+    especially if you are using large batch sizes.
+*   If you are working with data stored remotely and / or requiring
+    deserialization, we recommend using the `parallel_interleave`
+    transformation to overlap the reading (and deserialization) of data from
+    different files.
+*   Vectorize cheap user-defined functions passed in to the `map` transformation
+    to amortize the overhead associated with scheduling and executing the
+    function.
+*   If your data can fit into memory, use the `cache` transformation to cache it
+    in memory during the first epoch, so that subsequent epochs can avoid the
+    overhead associated with reading, parsing, and transforming it.
+*   If your pre-processing increases the size of your data, we recommend
+    applying the `interleave`, `prefetch`, and `shuffle` first (if possible) to
+    reduce memory usage.
+*   We recommend applying the `shuffle` transformation _before_ the `repeat`
+    transformation, ideally using the fused `shuffle_and_repeat` transformation.
diff --git a/tensorflow/docs_src/performance/leftnav_files b/tensorflow/docs_src/performance/leftnav_files
index d22847322084d584a4ddc713486109ede838fee8..316f023f43dcfe781c7819d1681335267ddd5f76 100644
--- a/tensorflow/docs_src/performance/leftnav_files
+++ b/tensorflow/docs_src/performance/leftnav_files
@@ -1,8 +1,9 @@
 performance_guide.md
+datasets_performance.md
 performance_models.md
 benchmarks.md
-quantization.md
->>>
+
+### XLA
 xla/index.md
 xla/broadcasting.md
 xla/developing_new_backend.md
@@ -10,3 +11,6 @@ xla/jit.md
 xla/operation_semantics.md
 xla/shapes.md
 xla/tfcompile.md
+
+### Quantization
+quantization.md
diff --git a/tensorflow/docs_src/performance/performance_guide.md b/tensorflow/docs_src/performance/performance_guide.md
index 17f71a6d7705c75e7322932cc652ec6728c8c626..10e7ad7ada533c8da5e5b871b38809b90604685e 100644
--- a/tensorflow/docs_src/performance/performance_guide.md
+++ b/tensorflow/docs_src/performance/performance_guide.md
@@ -18,6 +18,7 @@ following sections:
 *   [Input pipeline optimizations](#input-pipeline-optimization)
 *   [Data formats](#data-formats)
 *   [Common fused Ops](#common-fused-ops)
+*   [RNN Performance](#rnn-performance)
 *   [Building and installing from source](#building-and-installing-from-source)
 
 ### Input pipeline optimization
@@ -65,22 +66,25 @@ with tf.device('/cpu:0'):
 If using `tf.estimator.Estimator` the input function is automatically placed on
 the CPU.
 
-#### Using the Dataset API
+#### Using the tf.data API
 
-The @{$datasets$Dataset API} is replacing `queue_runner` as the recommended API
-for building input pipelines. The API was added to contrib as part of TensorFlow
-1.2 and will move to core in the near future. This
+The @{$datasets$tf.data API} is replacing `queue_runner` as the recommended API
+for building input pipelines. This
 [ResNet example](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10_estimator/cifar10_main.py)
 ([arXiv:1512.03385](https://arxiv.org/abs/1512.03385))
-training CIFAR-10 illustrates the use of the Dataset API along with
-`tf.estimator.Estimator`. The Dataset API utilizes C++ multi-threading and has a
-much lower overhead than the Python-based `queue_runner` that is limited by
-Python's multi-threading performance.
+training CIFAR-10 illustrates the use of the `tf.data` API along with
+`tf.estimator.Estimator`.
+
+The `tf.data` API utilizes C++ multi-threading and has a much lower overhead
+than the Python-based `queue_runner` that is limited by Python's multi-threading
+performance. A detailed performance guide for the `tf.data` API can be found
+[here](#datasets_performance).
 
 While feeding data using a `feed_dict` offers a high level of flexibility, in
-most instances using `feed_dict` does not scale optimally. However, in instances
-where only a single GPU is being used the difference can be negligible. Using
-the Dataset API is still strongly recommended. Try to avoid the following:
+general `feed_dict` does not provide a scalable solution. If only a single GPU
+is used, the difference between the `tf.data` API and `feed_dict` performance
+may be negligible. Our recommendation is to avoid using `feed_dict` for all but
+trivial examples. In particular, avoid using `feed_dict` with large inputs:
 
 ```python
 # feed_dict often results in suboptimal performance when using large inputs.
@@ -197,6 +201,53 @@ since before TensorFlow 1.0.
 bn = tf.contrib.layers.batch_norm(input_layer, fused=True, data_format='NCHW')
 ```
 
+### RNN Performance
+
+There are many ways to specify an RNN computation in TensorFlow and they have
+trade-offs with respect to model flexibility and performance. The
+@{tf.nn.rnn_cell.BasicLSTMCell} should be considered a reference implementation
+and used only as a last resort when no other options will work.
+
+When using one of the cells, rather than the fully fused RNN layers, you have a
+choice of whether to use @{tf.nn.static_rnn} or @{tf.nn.dynamic_rnn}.  There
+shouldn't generally be a performance difference at runtime, but large unroll
+amounts can increase the graph size of the @{tf.nn.static_rnn} and cause long
+compile times.  An additional advantage of @{tf.nn.dynamic_rnn} is that it can
+optionally swap memory from the GPU to the CPU to enable training of very long
+sequences.  Depending on the model and hardware configuration, this can come at
+a performance cost.  It is also possible to run multiple iterations of
+@{tf.nn.dynamic_rnn} and the underlying @{tf.while_loop} construct in parallel,
+although this is rarely useful with RNN models as they are inherently
+sequential.
+
+On NVIDIA GPUs, the use of @{tf.contrib.cudnn_rnn} should always be preferred
+unless you want layer normalization, which it doesn't support.  It is often at
+least an order of magnitude faster than @{tf.contrib.rnn.BasicLSTMCell} and
+@{tf.contrib.rnn.LSTMBlockCell} and uses 3-4x less memory than
+@{tf.contrib.rnn.BasicLSTMCell}.
+
+If you need to run one step of the RNN at a time, as might be the case in
+reinforcement learning with a recurrent policy, then you should use the
+@{tf.contrib.rnn.LSTMBlockCell} with your own environment interaction loop
+inside a @{tf.while_loop} construct. Running one step of the RNN at a time and
+returning to Python is possible, but it will be slower.
+
+On CPUs, mobile devices, and if @{tf.contrib.cudnn_rnn} is not available on
+your GPU, the fastest and most memory efficient option is
+@{tf.contrib.rnn.LSTMBlockFusedCell}.
+
+For all of the less common cell types like @{tf.contrib.rnn.NASCell},
+@{tf.contrib.rnn.PhasedLSTMCell}, @{tf.contrib.rnn.UGRNNCell},
+@{tf.contrib.rnn.GLSTMCell}, @{tf.contrib.rnn.Conv1DLSTMCell},
+@{tf.contrib.rnn.Conv2DLSTMCell}, @{tf.contrib.rnn.LayerNormBasicLSTMCell},
+etc., one should be aware that they are implemented in the graph like
+@{tf.contrib.rnn.BasicLSTMCell} and as such will suffer from the same poor
+performance and high memory usage.  One should consider whether or not those
+trade-offs are worth it before using these cells. For example, while layer
+normalization can speed up convergence, because cuDNN is 20x faster the fastest
+wall clock time to convergence is usually obtained without it.
+
+
 ### Building and installing from source
 
 The default TensorFlow binaries target the broadest range of hardware to make
diff --git a/tensorflow/docs_src/performance/xla/broadcasting.md b/tensorflow/docs_src/performance/xla/broadcasting.md
index 8dbf0d0446f41b26489912734bc11704e61efeab..ca3bddf758cf64e7c580f9babfe559ae23708705 100644
--- a/tensorflow/docs_src/performance/xla/broadcasting.md
+++ b/tensorflow/docs_src/performance/xla/broadcasting.md
@@ -33,11 +33,11 @@ In Numpy, this is called [broadcasting]
 
 ## Principles
 
-XLA is a low-level infrastructure with a XLA language this is as strict and
-explicit as possible, avoiding implicit and "magical" features that may make
-some computations slightly easier to define, at the cost of more assumptions
-baked into user code that will be difficult to change in the long term. If
-necessary, implicit and magical features can be added in client-level wrappers.
+The XLA language is as strict and explicit as possible, avoiding implicit and
+"magical" features. Such features may make some computations slightly easier to
+define, at the cost of more assumptions baked into user code that will be
+difficult to change in the long term. If necessary, implicit and magical
+features can be added in client-level wrappers.
 
 In regards to broadcasting, explicit broadcasting specifications on operations
 between arrays of different ranks is required. This is different from Numpy,
diff --git a/tensorflow/docs_src/performance/xla/developing_new_backend.md b/tensorflow/docs_src/performance/xla/developing_new_backend.md
index 28010ff1b785813e15c56d4bb5c26b0bcedce3d9..74ea15bb2bac2014257f0b1719820f7ee313b66b 100644
--- a/tensorflow/docs_src/performance/xla/developing_new_backend.md
+++ b/tensorflow/docs_src/performance/xla/developing_new_backend.md
@@ -62,11 +62,11 @@ If it is not possible to utilize LLVM, then the best option is to implement a
 new backend for XLA for the desired hardware. This option requires the most
 effort. The classes that need to be implemented are as follows:
 
-*   [StreamExecutor](https://www.tensorflow.org/code/tensorflow/stream_executor/stream_executor.h):
+*   [`StreamExecutor`](https://www.tensorflow.org/code/tensorflow/stream_executor/stream_executor.h):
     For many devices not all methods of `StreamExecutor` are needed. See
     existing `StreamExecutor` implementations for details.
-*   [xla::Compiler](https://www.tensorflow.org/code/tensorflow/compiler/xla/service/compiler.h):
-    This class encapsulates the compilation of a HLO computation into an
+*   [`xla::Compiler`](https://www.tensorflow.org/code/tensorflow/compiler/xla/service/compiler.h):
+    This class encapsulates the compilation of an HLO computation into an
     `xla::Executable`.
 *   [`xla::Executable`](https://www.tensorflow.org/code/tensorflow/compiler/xla/service/executable.h):
     This class is used to launch a compiled computation on the platform.
diff --git a/tensorflow/docs_src/performance/xla/index.md b/tensorflow/docs_src/performance/xla/index.md
index 19045b45d92a2ca42c3943bc0662ca42bd0c2c24..a8847830740302a0de6f57cb3b7a0d6c7e096d32 100644
--- a/tensorflow/docs_src/performance/xla/index.md
+++ b/tensorflow/docs_src/performance/xla/index.md
@@ -65,18 +65,19 @@ The following diagram shows the compilation process in XLA:
   <img src="https://www.tensorflow.org/images/how-does-xla-work.png">
 </div>
 
-XLA comes with several optimizations and analyzes that are target-independent,
-such as [CSE](https://en.wikipedia.org/wiki/Common_subexpression_elimination),
+XLA comes with several optimizations and analysis passes that are
+target-independent, such as
+[CSE](https://en.wikipedia.org/wiki/Common_subexpression_elimination),
 target-independent operation fusion, and buffer analysis for allocating runtime
 memory for the computation.
 
 After the target-independent step, XLA sends the HLO computation to a backend.
-The backend can perform further HLO-level analyzes and optimizations, this time
-with target specific information and needs in mind. For example, the XLA GPU
-backend may perform operation fusion beneficial specifically for the GPU
-programming model and determine how to partition the computation into streams.
-At this stage, backends may also pattern-match certain operations or
-combinations thereof to optimized library calls.
+The backend can perform further HLO-level optimizations, this time with target
+specific information and needs in mind. For example, the XLA GPU backend may
+perform operation fusion beneficial specifically for the GPU programming model
+and determine how to partition the computation into streams. At this stage,
+backends may also pattern-match certain operations or combinations thereof to
+optimized library calls.
 
 The next step is target-specific code generation. The CPU and GPU backends
 included with XLA use [LLVM](http://llvm.org) for low-level IR, optimization,
diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md
index 217f542caa64a5fafb61536a3b9591cae42b517b..1e9b8b35db65ef19a4bcb607b98af1e1de4e6d5b 100644
--- a/tensorflow/docs_src/performance/xla/operation_semantics.md
+++ b/tensorflow/docs_src/performance/xla/operation_semantics.md
@@ -13,6 +13,154 @@ arbitrary-dimensional array. For convenience, special cases have more specific
 and familiar names; for example a *vector* is a 1-dimensional array and a
 *matrix* is a 2-dimensional array.
 
+## BatchNormGrad
+
+See also
+[`ComputationBuilder::BatchNormGrad`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h)
+and [the original batch normalization paper](https://arxiv.org/abs/1502.03167)
+for a detailed description of the algorithm.
+
+Calculates gradients of batch norm.
+
+<b> `BatchNormGrad(operand, scale, mean, variance, grad_output, epsilon, feature_index)` </b>
+
+| Arguments       | Type                    | Semantics                        |
+| --------------  | ----------------------- | -------------------------------- |
+| `operand`       | `ComputationDataHandle` | n dimensional array to be        |
+:                 :                         : normalized (x)                   :
+| `scale`         | `ComputationDataHandle` | 1 dimensional array              |
+:                 :                         : (\\(\gamma\\))                   :
+| `mean`          | `ComputationDataHandle` | 1 dimensional array (\\(\mu\\))  |
+| `variance`      | `ComputationDataHandle` | 1 dimensional array              |
+:                 :                         : (\\(\sigma^2\\))                 :
+| `grad_output`   | `ComputationDataHandle` | Gradients passed to              |
+:                 :                         : `BatchNormTraining`              :
+:                 :                         : (\\( \nabla y\\))                :
+| `epsilon`       | `float`                 | Epsilon value (\\(\epsilon\\))   |
+| `feature_index` | `int64`                 | Index to feature dimension in    |
+:                 :                         : `operand`                        :
+
+For each feature in the feature dimension (`feature_index` is the index for the
+feature dimension in `operand`), the operation calculates the gradients with
+respect to `operand`, `offset` and `scale` across all the other dimensions. The
+`feature_index` must be a valid index for the feature dimension in `operand`.
+
+The three gradients are defined by the following formulas:
+
+\\( \nabla x = \nabla y * \gamma * \sqrt{\sigma^2+\epsilon} \\)
+
+\\( \nabla \gamma = sum(\nabla y * (x - \mu) * \sqrt{\sigma^2 + \epsilon}) \\)
+
+\\( \nabla \beta = sum(\nabla y) \\)
+
+The inputs `mean` and `variance` represents moments value
+across batch and spatial dimensions.
+
+The output type is a tuple of three handles:
+
+|Outputs       | Type                    | Semantics                           |
+|------------- | ----------------------- | ------------------------------------|
+|`grad_operand`| `ComputationDataHandle` | gradient with respect to input      |
+:              :                         : `operand`                           :
+|`grad_scale`  | `ComputationDataHandle` | gradient with respect to input      |
+:              :                         : `scale`                             :
+|`grad_offset` | `ComputationDataHandle` | gradient with respect to input      |
+:              :                         : `offset`                            :
+
+
+## BatchNormInference
+
+See also
+[`ComputationBuilder::BatchNormInference`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h) and
+[the original batch normalization paper](https://arxiv.org/abs/1502.03167)
+for a detailed description of the algorithm.
+
+Normalizes an array across batch and spatial dimensions.
+
+<b> `BatchNormInference(operand, scale, offset, mean, variance, epsilon, feature_index)` </b>
+
+| Arguments       | Type                    | Semantics                       |
+| --------------  | ----------------------- | ------------------------------- |
+| `operand`       | `ComputationDataHandle` | n dimensional array to be       |
+:                 :                         : normalized                      :
+| `scale`         | `ComputationDataHandle` | 1 dimensional array             |
+| `offset`        | `ComputationDataHandle` | 1 dimensional array             |
+| `mean`          | `ComputationDataHandle` | 1 dimensional array             |
+| `variance`      | `ComputationDataHandle` | 1 dimensional array             |
+| `epsilon`       | `float`                 | Epsilon value                   |
+| `feature_index` | `int64`                 | Index to feature dimension in   |
+:                 :                         : `operand`                       :
+
+For each feature in the feature dimension (`feature_index` is the index for the
+feature dimension in `operand`), the operation calculates the mean and variance
+across all the other dimensions and uses the mean and variance to normalize each
+element in `operand`. The `feature_index` must be a valid index for the feature
+dimension in `operand`.
+
+`BatchNormInference`  is equivalent to calling `BatchNormTraining` without
+computing `mean` and `variance` for each batch. It uses the input `mean` and
+`variance` instead as estimated values. The purpose of this op is to reduce
+latency in inference, hence the name `BatchNormInference`.
+
+The output is an n-dimensional, normalized array with the same shape as input
+`operand`.
+
+## BatchNormTraining
+
+See also
+[`ComputationBuilder::BatchNormTraining`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h) and
+[`the original batch normalization paper`](https://arxiv.org/abs/1502.03167)
+for a detailed description of the algorithm.
+
+Normalizes an array across batch and spatial dimensions.
+
+<b> `BatchNormTraining(operand, scale, offset, epsilon, feature_index)` </b>
+
+| Arguments       | Type                    | Semantics                        |
+| --------------- | ----------------------- | -------------------------------- |
+| `operand`       | `ComputationDataHandle` | n dimensional array to be        |
+:                 :                         : normalized                       :
+| `scale`         | `ComputationDataHandle` | 1 dimensional array              |
+:                 :                         : (\\(\gamma\\))                   :
+| `offset`        | `ComputationDataHandle` | 1 dimensional array              |
+:                 :                         : (\\(\beta\\ )                    :
+| `epsilon`       | `float`                 | Epsilon value (\\(\epsilon\\))   |
+| `feature_index` | `int64`                 | Index to feature dimension       |
+:                 :                         : in `operand`                     :
+
+For each feature in the feature dimension (`feature_index` is the index for the
+feature dimension in `operand`), the operation calculates the mean and variance
+across all the other dimensions and uses the mean and variance to normalize each
+element in `operand`. The `feature_index` must be a valid index for the feature
+dimension in `operand`.
+
+The algorithm goes as follows for each batch in `operand` \\(x\\) that
+contains `m` elements with `w` and `h` as the size of spatial dimensions (
+assuming `operand` is an 4 dimensional array):
+
+- Calculates batch mean \\(\mu_l\\) for each feature `l` in feature dimension:
+\\(\mu_l=\frac{1}{mwh}\sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h x_{ijkl}\\)
+
+- Calculates batch variance \\(\sigma^2_l\\):
+\\(\sigma^2_l=\frac{1}{mwh}\sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h (x_{ijkl} - \mu_l)^2\\)
+
+- Normalizes, scales and shifts:
+\\(y_{ijkl}=\frac{\gamma_l(x_{ijkl}-\mu_l)}{\sqrt[2]{\sigma^2_l+\epsilon}}+\beta_l\\)
+
+The epsilon value, usually a small number, is added to avoid divide-by-zero errors.
+
+The output type is a tuple of three `ComputationDataHandle`s:
+
+| Outputs      | Type                    | Semantics                            |
+| ------------ | ----------------------- | -------------------------------------|
+| `output`     | `ComputationDataHandle` | n dimensional array with the same    |
+:              :                         : shape as input `operand` (y)         :
+| `batch_mean` | `ComputationDataHandle` | 1 dimensional array (\\(\mu\\))      |
+| `batch_var`  | `ComputationDataHandle` | 1 dimensional array (\\(\sigma^2\\)) |
+
+The `batch_mean` and `batch_var` are moments calculated across the batch and
+spatial dimensions using the formulas above.
+
 ## BitcastConvertType
 
 See also
@@ -239,39 +387,34 @@ Diagram:
   <img style="width:100%" src="https://www.tensorflow.org/images/ops_concatenate.png">
 </div>
 
-## ConvertElementType
-
-See also
-[`ComputationBuilder::ConvertElementType`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
-
-Similar to an element-wise `static_cast` in C++, performs an element-wise
-conversion operation from a data shape to a target shape. The dimensions must
-match, and the conversion is an element-wise one; e.g. `s32` elements become
-`f32` elements via an `s32`-to-`f32` conversion routine.
+## Conditional
 
-<b> `ConvertElementType(operand, new_element_type)` </b>
+See also [`ComputationBuilder::Conditional`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
 
-Arguments          | Type                    | Semantics
------------------- | ----------------------- | ---------------------------
-`operand`          | `ComputationDataHandle` | array of type T with dims D
-`new_element_type` | `PrimitiveType`         | type U
+<b> `Conditional(pred, true_operand, true_computation, false_operand,
+    false_computation)` </b>
 
-The dimensions of the operand and the target shape must match. The source and
-destination element types must not be tuples.
+| Arguments           | Type                    | Semantics                   |
+| ------------------- | ----------------------- | --------------------------- |
+| `pred`              | `ComputationDataHandle` | Scalar of type `PRED`       |
+| `true_operand`      | `ComputationDataHandle` | Argument of type `T_0`      |
+| `true_computation`  | `Computation`           | Computation of type `T_0 -> |
+:                     :                         : S`                          :
+| `false_operand`     | `ComputationDataHandle` | Argument of type `T_1`      |
+| `false_computation` | `Computation`           | Computation of type `T_1 -> |
+:                     :                         : S`                          :
 
-A conversion such as `T=s32` to `U=f32` will perform a normalizing int-to-float
-conversion routine such as round-to-nearest-even.
+Executes `true_computation` if `pred` is `true`, `false_computation` if `pred`
+is `false`, and returns the result.
 
-> Note: The precise float-to-int and visa-versa conversions are currently
-> unspecified, but may become additional arguments to the convert operation in
-> the future.  Not all possible conversions have been implemented for all
->targets.
+The `true_computation` must take in a single argument of type `T_0` and will be
+invoked with `true_operand` which must be of the same type. The
+`false_computation` must take in a single argument of type `T_1` and will be
+invoked with `false_operand` which must be of the same type. The type of the
+returned value of `true_computation` and `false_computation` must be the same.
 
-```
-let a: s32[3] = {0, 1, 2};
-let b: f32[3] = convert(a, f32);
-then b == f32[3]{0.0, 1.0, 2.0}
-```
+Note that only one of `true_computation` and `false_computation` will be
+executed depending on the value of `pred`.
 
 ## Conv (convolution)
 
@@ -395,6 +538,40 @@ for (b, oz, oy, ox) {  // output coordinates
 }
 ```
 
+## ConvertElementType
+
+See also
+[`ComputationBuilder::ConvertElementType`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
+
+Similar to an element-wise `static_cast` in C++, performs an element-wise
+conversion operation from a data shape to a target shape. The dimensions must
+match, and the conversion is an element-wise one; e.g. `s32` elements become
+`f32` elements via an `s32`-to-`f32` conversion routine.
+
+<b> `ConvertElementType(operand, new_element_type)` </b>
+
+Arguments          | Type                    | Semantics
+------------------ | ----------------------- | ---------------------------
+`operand`          | `ComputationDataHandle` | array of type T with dims D
+`new_element_type` | `PrimitiveType`         | type U
+
+The dimensions of the operand and the target shape must match. The source and
+destination element types must not be tuples.
+
+A conversion such as `T=s32` to `U=f32` will perform a normalizing int-to-float
+conversion routine such as round-to-nearest-even.
+
+> Note: The precise float-to-int and visa-versa conversions are currently
+> unspecified, but may become additional arguments to the convert operation in
+> the future.  Not all possible conversions have been implemented for all
+>targets.
+
+```
+let a: s32[3] = {0, 1, 2};
+let b: f32[3] = convert(a, f32);
+then b == f32[3]{0.0, 1.0, 2.0}
+```
+
 ## CrossReplicaSum
 
 See also
@@ -409,9 +586,9 @@ Computes a sum across replicas.
 | `operand`    | `ComputationDataHandle` | Array to sum across replicas.      |
 
 The output shape is the same as the input shape. For example, if there are two
-replicas and the operand has the value `(1.0, 2.5)` and `(3.0, 5.1)`
+replicas and the operand has the value `(1.0, 2.5)` and `(3.0, 5.25)`
 respectively on the two replicas, then the output value from this op will be
-`(4.0, 7.6)` on both replicas.
+`(4.0, 7.75)` on both replicas.
 
 Computing the result of CrossReplicaSum requires having one input from each
 replica, so if one replica executes a CrossReplicaSum node more times than
@@ -511,282 +688,338 @@ contracted dimensions of `lhs` and `rhs` must be of the same size. In practice,
 it can be used to perform dot products between vectors, vector/matrix
 multiplications or matrix/matrix multiplications.
 
-## Element-wise binary arithmetic operations
+## DotGeneral
 
 See also
-[`ComputationBuilder::Add`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
+[`ComputationBuilder::DotGeneral`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
 
-A set of element-wise binary arithmetic operations is supported.
+<b> `DotGeneral(lhs, rhs, dimension_numbers)` </b>
 
-<b> `Op(lhs, rhs)` </b>
+| Arguments | Type                    | Semantics
+| --------- | ----------------------- | ---------------
+| `lhs`     | `ComputationDataHandle` | array of type T
+| `rhs`     | `ComputationDataHandle` | array of type T
+| `dimension_numbers` | `DotDimensionNumbers` | array of type T
 
-Where `Op` is one of `Add` (addition), `Sub` (subtraction), `Mul`
-(multiplication), `Div` (division), `Rem` (remainder), `Max` (maximum), `Min`
-(minimum), `LogicalAnd` (logical AND), or `LogicalOr` (logical OR).
+As Dot, but allows contracting and batch dimension numbers to be specified for
+both the 'lhs' and 'rhs'.
 
-Arguments | Type                    | Semantics
---------- | ----------------------- | ----------------------------------------
-`lhs`     | `ComputationDataHandle` | left-hand-side operand: array of type T
-`rhs`     | `ComputationDataHandle` | right-hand-side operand: array of type T
+| DotDimensionNumbers Fields | Type                    | Semantics
+| --------- | ----------------------- | ---------------
+| 'lhs_contracting_dimensions' | repeated int64 | 'lhs' contracting dimension numbers |
+| 'rhs_contracting_dimensions' | repeated int64 | 'rhs' contracting dimension numbers |
+| 'lhs_batch_dimensions' | repeated int64 | 'lhs' batch dimension numbers |
+| 'rhs_batch_dimensions' | repeated int64 | 'rhs' batch dimension numbers |
 
-The arguments' shapes have to be either similar or compatible. See the
-@{$broadcasting$broadcasting} documentation about what it means for shapes to
-be compatible. The result of an operation has a shape which is the result of
-broadcasting the two input arrays. In this variant, operations between arrays of
-different ranks are *not* supported, unless one of the operands is a scalar.
+DotGeneral performs the sum of products over contracting dimensions specified
+in 'dimension_numbers'.
 
-When `Op` is `Rem`, the sign of the result is taken from the dividend, and the
-absolute value of the result is always less than the divisor's absolute value.
+Associated contracting dimension numbers from the 'lhs' and 'rhs' do not need
+to be the same, but must be listed in the same order in both
+'lhs/rhs_contracting_dimensions' arrays and have the same dimension sizes.
 
-An alternative variant with different-rank broadcasting support exists for these
-operations:
+Example with contracting dimension numbers:
 
-<b> `Op(lhs, rhs, broadcast_dimensions)` </b>
+```
+lhs = { {1.0, 2.0, 3.0},
+        {4.0, 5.0, 6.0} }
 
-Where `Op` is the same as above. This variant of the operation should be used
-for arithmetic operations between arrays of different ranks (such as adding a
-matrix to a vector).
+rhs = { {1.0, 1.0, 1.0},
+        {2.0, 2.0, 2.0} }
 
-The additional `broadcast_dimensions` operand is a slice of integers used to
-expand the rank of the lower-rank operand up to the rank of the higher-rank
-operand. `broadcast_dimensions` maps the dimensions of the lower-rank shape to
-the dimensions of the higher-rank shape. The unmapped dimensions of the expanded
-shape are filled with dimensions of size one. Degenerate-dimension broadcasting
-then broadcasts the shapes along these degenerate dimension to equalize the
-shapes of both operands. The semantics are described in detail on the
-@{$broadcasting$broadcasting page}.
+DotDimensionNumbers dnums;
+dnums.add_lhs_contracting_dimensions(1);
+dnums.add_rhs_contracting_dimensions(1);
 
-## Element-wise comparison operations
+DotGeneral(lhs, rhs, dnums) -> { {6.0, 12.0},
+                                 {15.0, 30.0} }
+```
 
-See also
-[`ComputationBuilder::Eq`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
+Associated batch dimension numbers from the 'lhs' and 'rhs' must have the same
+dimension number, must be listed in the same order in both arrays, and must
+have the same dimension sizes.
 
-A set of standard element-wise binary comparison operations is supported. Note
-that standard IEEE 754 floating-point comparison semantics apply when comparing
-floating-point types.
+Example with batch dimension numbers (batch size 2, 2x2 matrices):
 
-<b> `Op(lhs, rhs)` </b>
+```
+lhs = { { {1.0, 2.0},
+          {3.0, 4.0} },
+        { {5.0, 6.0},
+          {7.0, 8.0} } }
+
+rhs = { { {1.0, 0.0},
+          {0.0, 1.0} },
+        { {1.0, 0.0},
+          {0.0, 1.0} } }
+
+DotDimensionNumbers dnums;
+dnums.add_lhs_contracting_dimensions(2);
+dnums.add_rhs_contracting_dimensions(1);
+dnums.add_lhs_batch_dimensions(0);
+dnums.add_rhs_batch_dimensions(0);
+
+DotGeneral(lhs, rhs, dnums) -> { { {1.0, 2.0},
+                                   {3.0, 4.0} },
+                                 { {5.0, 6.0},
+                                   {7.0, 8.0} } }
+```
 
-Where `Op` is one of `Eq` (equal-to), `Ne` (not equal-to), `Ge`
-(greater-or-equal-than), `Gt` (greater-than), `Le` (less-or-equal-than), `Lt`
-(less-than).
+| Input                               | Output            | Semantics        |
+| ----------------------------------- | ----------------- | ---------------- |
+| [b0, m, k] `dot` [b0, k, n]         | [b0, m, n]        |  batch matmul    |
+| [b0, b1, m, k] `dot` [b0, b1, k, n] | [b0, b1, m, n]    |  batch matmul    |
 
-Arguments | Type                    | Semantics
---------- | ----------------------- | ----------------------------------------
-`lhs`     | `ComputationDataHandle` | left-hand-side operand: array of type T
-`rhs`     | `ComputationDataHandle` | right-hand-side operand: array of type T
+## DynamicSlice
 
-The arguments' shapes have to be either similar or compatible. See the
-@{$broadcasting$broadcasting} documentation about what it means for shapes to
-be compatible. The result of an operation has a shape which is the result of
-broadcasting the two input arrays with the element type `PRED`. In this variant,
-operations between arrays of different ranks are *not* supported, unless one of
-the operands is a scalar.
-
-An alternative variant with different-rank broadcasting support exists for these
-operations:
+See also
+[`ComputationBuilder::DynamicSlice`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
 
-<b> `Op(lhs, rhs, broadcast_dimensions)` </b>
+DynamicSlice extracts a sub-array from the input array at dynamic
+`start_indices`. The size of the slice in each dimension is passed in
+`size_indices`, which specify the end point of exclusive slice intervals in each
+dimension: [start, start + size). The shape of `start_indices` must be rank ==
+1, with dimension size equal to the rank of `operand`.
+Note: handling of out-of-bounds slice indices (generated by incorrect runtime
+calculation of 'start_indices') is currently implementation-defined. Currently,
+slice indices are computed modulo input dimension sizes to prevent out-of-bound
+array accesses, but this behavior may change in future implementations.
 
-Where `Op` is the same as above. This variant of the operation should be used
-for comparison operations between arrays of different ranks (such as adding a
-matrix to a vector).
+<b> `DynamicSlice(operand, start_indices, size_indices)` </b>
 
-The additional `broadcast_dimensions` operand is a slice of integers specifying
-the dimensions to use for broadcasting the operands. The semantics are described
-in detail on the @{$broadcasting$broadcasting page}.
+| Arguments       | Type                    | Semantics                        |
+| --------------- | ----------------------- | -------------------------------- |
+| `operand`       | `ComputationDataHandle` | N dimensional array of type T    |
+| `start_indices` | `ComputationDataHandle` | Rank 1 array of N integers       |
+:                 :                         : containing the starting indices  :
+:                 :                         : of the slice for each dimension. :
+:                 :                         : Value must be greater than or    :
+:                 :                         : equal to zero.                   :
+| `size_indices`  | `ArraySlice<int64>`     | List of N integers containing    |
+:                 :                         : the slice size for each          :
+:                 :                         : dimension. Each value must be    :
+:                 :                         : strictly greater than zero, and  :
+:                 :                         : start + size must be less than   :
+:                 :                         : or equal to the size of the      :
+:                 :                         : dimension to avoid wrapping      :
+:                 :                         : modulo dimension size.           :
 
-## Element-wise unary functions
+1-dimensional example:
 
-ComputationBuilder supports these element-wise unary functions:
+```
+let a = {0.0, 1.0, 2.0, 3.0, 4.0}
+let s = {2}
 
-<b>`Abs(operand)`</b> Element-wise abs `x -> |x|`.
+DynamicSlice(a, s, {2}) produces:
+  {2.0, 3.0}
+```
 
-<b>`Ceil(operand)`</b> Element-wise ceil `x -> ⌈x⌉`.
+2-dimensional example:
 
-<b>`Cos(operand)`</b> Element-wise cosine `x -> cos(x)`.
+```
+let b =
+ { {0.0,  1.0,  2.0},
+   {3.0,  4.0,  5.0},
+   {6.0,  7.0,  8.0},
+   {9.0, 10.0, 11.0} }
+let s = {2, 1}
 
-<b>`Exp(operand)`</b> Element-wise natural exponential `x -> e^x`.
+DynamicSlice(b, s, {2, 2}) produces:
+  { { 7.0,  8.0},
+    {10.0, 11.0} }
+```
+## DynamicUpdateSlice
 
-<b>`Floor(operand)`</b> Element-wise floor `x -> ⌊x⌋`.
+See also
+[`ComputationBuilder::DynamicUpdateSlice`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
 
-<b>`IsFinite(operand)`</b> Tests whether each element of `operand` is finite,
-i.e., is not positive or negative infinity, and is not `NaN`. Returns an array
-of `PRED` values with the same shape as the input, where each element is `true`
-if and only if the corresponding input element is finite.
+DynamicUpdateSlice generates a result which is the value of the input array
+`operand`, with a slice `update` overwritten at `start_indices`.
+The shape of `update` determines the shape of the sub-array of the result which
+is updated.
+The shape of `start_indices` must be rank == 1, with dimension size equal to
+the rank of `operand`.
+Note: handling of out-of-bounds slice indices (generated by incorrect runtime
+calculation of 'start_indices') is currently implementation-defined. Currently,
+slice indices are computed modulo update dimension sizes to prevent out-of-bound
+array accesses, but this behavior may change in future implementations.
 
-<b>`Log(operand)`</b> Element-wise natural logarithm `x -> ln(x)`.
+<b> `DynamicUpdateSlice(operand, update, start_indices)` </b>
 
-<b>`LogicalNot(operand)`</b> Element-wise logical not `x -> !(x)`.
+| Arguments       | Type                    | Semantics                        |
+| --------------- | ----------------------- | -------------------------------- |
+| `operand`       | `ComputationDataHandle` | N dimensional array of type T    |
+| `update`        | `ComputationDataHandle` | N dimensional array of type T    |
+:                 :                         : containing the slice update.     :
+:                 :                         : Each dimension of update shape    :
+:                 :                         : must be strictly greater than    :
+:                 :                         : zero, and start + update must be :
+:                 :                         : less than operand size for each  :
+:                 :                         : dimension to avoid generating    :
+:                 :                         : out-of-bounds update indices.    :
+| `start_indices` | `ComputationDataHandle` | Rank 1 array of N integers       |
+:                 :                         : containing the starting indices  :
+:                 :                         : of the slice for each dimension. :
+:                 :                         : Value must be greater than or    :
+:                 :                         : equal to zero.                   :
 
-<b>`Neg(operand)`</b> Element-wise negation `x -> -x`.
+1-dimensional example:
 
-<b>`Sign(operand)`</b> Element-wise sign operation `x -> sgn(x)` where
+```
+let a = {0.0, 1.0, 2.0, 3.0, 4.0}
+let u = {5.0, 6.0}
+let s = {2}
 
-$$\text{sgn}(x) = \begin{cases} -1 & x < 0\\ 0 & x = 0\\ 1 & x > 0 \end{cases}$$
+DynamicUpdateSlice(a, u, s) produces:
+  {0.0, 1.0, 5.0, 6.0, 4.0}
+```
 
-using the comparison operator of the element type of `operand`.
+2-dimensional example:
 
-<b>`Tanh(operand)`</b> Element-wise hyperbolic tangent `x -> tanh(x)`.
+```
+let b =
+ { {0.0,  1.0,  2.0},
+   {3.0,  4.0,  5.0},
+   {6.0,  7.0,  8.0},
+   {9.0, 10.0, 11.0} }
+let u =
+ { {12.0,  13.0},
+   {14.0,  15.0},
+   {16.0,  17.0} }
 
+let s = {1, 1}
 
-Arguments | Type                    | Semantics
---------- | ----------------------- | ---------------------------
-`operand` | `ComputationDataHandle` | The operand to the function
+DynamicUpdateSlice(b, u, s) produces:
+ { {0.0,  1.0,  2.0},
+   {3.0, 12.0, 13.0},
+   {6.0, 14.0, 15.0},
+   {9.0, 16.0, 17.0} }
+```
 
-The function is applied to each element in the `operand` array, resulting in an
-array with the same shape. It is allowed for `operand` to be a scalar (rank 0).
+## Element-wise binary arithmetic operations
 
+See also
+[`ComputationBuilder::Add`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
 
-## BatchNormTraining
+A set of element-wise binary arithmetic operations is supported.
 
-See also
-[`ComputationBuilder::BatchNormTraining`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h) and
-[`the original batch normalization paper`](https://arxiv.org/abs/1502.03167)
-for a detailed description of the algorithm.
+<b> `Op(lhs, rhs)` </b>
 
-<b> Warning: Not implemented on GPU backend yet. </b>
+Where `Op` is one of `Add` (addition), `Sub` (subtraction), `Mul`
+(multiplication), `Div` (division), `Rem` (remainder), `Max` (maximum), `Min`
+(minimum), `LogicalAnd` (logical AND), or `LogicalOr` (logical OR).
 
-Normalizes an array across batch and spatial dimensions.
+Arguments | Type                    | Semantics
+--------- | ----------------------- | ----------------------------------------
+`lhs`     | `ComputationDataHandle` | left-hand-side operand: array of type T
+`rhs`     | `ComputationDataHandle` | right-hand-side operand: array of type T
 
-<b> `BatchNormTraining(operand, scale, offset, epsilon, feature_index)` </b>
+The arguments' shapes have to be either similar or compatible. See the
+@{$broadcasting$broadcasting} documentation about what it means for shapes to
+be compatible. The result of an operation has a shape which is the result of
+broadcasting the two input arrays. In this variant, operations between arrays of
+different ranks are *not* supported, unless one of the operands is a scalar.
 
-| Arguments       | Type                    | Semantics                        |
-| --------------- | ----------------------- | -------------------------------- |
-| `operand`       | `ComputationDataHandle` | n dimensional array to be        |
-:                 :                         : normalized                       :
-| `scale`         | `ComputationDataHandle` | 1 dimensional array              |
-:                 :                         : (\\(\gamma\\))                   :
-| `offset`        | `ComputationDataHandle` | 1 dimensional array              |
-:                 :                         : (\\(\beta\\ )                    :
-| `epsilon`       | `float`                 | Epsilon value (\\(\epsilon\\))   |
-| `feature_index` | `int64`                 | Index to feature dimension       |
-:                 :                         : in `operand`                     :
+When `Op` is `Rem`, the sign of the result is taken from the dividend, and the
+absolute value of the result is always less than the divisor's absolute value.
 
+An alternative variant with different-rank broadcasting support exists for these
+operations:
 
-For each feature in the feature dimension (`feature_index` is the index for the
-feature dimension in `operand`), the operation calculates the mean and variance
-across all the other dimensions and use the mean and variance to normalize each
-element in `operand`. The `feature_index` must be a valid index for the feature
-dimension in `operand`.
+<b> `Op(lhs, rhs, broadcast_dimensions)` </b>
 
-The algorithm goes as follows for each batch in `operand` \\(x\\) that
-contains `m` elements with `w` and `h` as the size of spatial dimensions (
-assuming `operand` is an 4 dimensional array):
+Where `Op` is the same as above. This variant of the operation should be used
+for arithmetic operations between arrays of different ranks (such as adding a
+matrix to a vector).
 
-- Calculates batch mean \\(\mu_l\\) for each feature `l` in feature dimension:
-\\(\mu_l=\frac{1}{mwh}\sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h x_{ijkl}\\)
+The additional `broadcast_dimensions` operand is a slice of integers used to
+expand the rank of the lower-rank operand up to the rank of the higher-rank
+operand. `broadcast_dimensions` maps the dimensions of the lower-rank shape to
+the dimensions of the higher-rank shape. The unmapped dimensions of the expanded
+shape are filled with dimensions of size one. Degenerate-dimension broadcasting
+then broadcasts the shapes along these degenerate dimension to equalize the
+shapes of both operands. The semantics are described in detail on the
+@{$broadcasting$broadcasting page}.
 
-- Calculates batch variance \\(\sigma^2_l\\):
-\\(\sigma^2_l=\frac{1}{mwh}\sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h (x_{ijkl} - \mu_l)^2\\)
+## Element-wise comparison operations
 
-- Normalizes, scales and shifts:
-\\(y_{ijkl}=\frac{\gamma_l(x_{ijkl}-\mu_l)}{\sqrt[2]{\sigma^2_l+\epsilon}}+\beta_l\\)
+See also
+[`ComputationBuilder::Eq`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
 
-The epsilon value, usually a small number, is added to avoid divide-by-zero errors.
+A set of standard element-wise binary comparison operations is supported. Note
+that standard IEEE 754 floating-point comparison semantics apply when comparing
+floating-point types.
 
-The output type is a tuple of three ComputationDataHandles:
+<b> `Op(lhs, rhs)` </b>
 
-| Outputs      | Type                    | Semantics                            |
-| ------------ | ----------------------- | -------------------------------------|
-| `output`     | `ComputationDataHandle` | n dimensional array with the same    |
-:              :                         : shape as input `operand` (y)         :
-| `batch_mean` | `ComputationDataHandle` | 1 dimensional array (\\(\mu\\))      |
-| `batch_var`  | `ComputationDataHandle` | 1 dimensional array (\\(\sigma^2\\)) |
+Where `Op` is one of `Eq` (equal-to), `Ne` (not equal-to), `Ge`
+(greater-or-equal-than), `Gt` (greater-than), `Le` (less-or-equal-than), `Lt`
+(less-than).
 
-The `batch_mean` and `batch_var` are moments calculated across the batch and
-spatial dimensions using the formulas above.
+Arguments | Type                    | Semantics
+--------- | ----------------------- | ----------------------------------------
+`lhs`     | `ComputationDataHandle` | left-hand-side operand: array of type T
+`rhs`     | `ComputationDataHandle` | right-hand-side operand: array of type T
 
-## BatchNormInference
+The arguments' shapes have to be either similar or compatible. See the
+@{$broadcasting$broadcasting} documentation about what it means for shapes to
+be compatible. The result of an operation has a shape which is the result of
+broadcasting the two input arrays with the element type `PRED`. In this variant,
+operations between arrays of different ranks are *not* supported, unless one of
+the operands is a scalar.
 
-See also
-[`ComputationBuilder::BatchNormInference`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
+An alternative variant with different-rank broadcasting support exists for these
+operations:
 
-<b> Warning: Not implemented yet. </b>
+<b> `Op(lhs, rhs, broadcast_dimensions)` </b>
 
-Normalizes an array across batch and spatial dimensions.
+Where `Op` is the same as above. This variant of the operation should be used
+for comparison operations between arrays of different ranks (such as adding a
+matrix to a vector).
 
-<b> `BatchNormInference(operand, scale, offset, mean, variance, epsilon, feature_index)` </b>
+The additional `broadcast_dimensions` operand is a slice of integers specifying
+the dimensions to use for broadcasting the operands. The semantics are described
+in detail on the @{$broadcasting$broadcasting page}.
 
-| Arguments       | Type                    | Semantics                       |
-| --------------  | ----------------------- | ------------------------------- |
-| `operand`       | `ComputationDataHandle` | n dimensional array to be       |
-:                 :                         : normalized                      :
-| `scale`         | `ComputationDataHandle` | 1 dimensional array             |
-| `offset`        | `ComputationDataHandle` | 1 dimensional array             |
-| `mean`          | `ComputationDataHandle` | 1 dimensional array             |
-| `variance`      | `ComputationDataHandle` | 1 dimensional array             |
-| `epsilon`       | `float`                 | Epsilon value                   |
-| `feature_index` | `int64`                 | Index to feature dimension in   |
-:                 :                         : `operand`                       :
+## Element-wise unary functions
 
-For each feature in the feature dimension (`feature_index` is the index for the
-feature dimension in `operand`), the operation calculates the mean and variance
-across all the other dimensions and use the mean and variance to normalize each
-element in `operand`. The `feature_index` must be a valid index for the feature
-dimension in `operand`.
+ComputationBuilder supports these element-wise unary functions:
 
-`BatchNormInference`  is equivalent to calling `BatchNormTraining` without
-computing `mean` and `variance` for each batch. It uses the input `mean` and
-`variance` instead as estimated values. The purpose of this op is to reduce
-latency in inference, hence the name `BatchNormInference`.
+<b>`Abs(operand)`</b> Element-wise abs `x -> |x|`.
 
-The output is a n dimensional, normalized array with the same shape as input
-`operand`.
+<b>`Ceil(operand)`</b> Element-wise ceil `x -> ⌈x⌉`.
 
-## BatchNormGrad
+<b>`Cos(operand)`</b> Element-wise cosine `x -> cos(x)`.
 
-See also
-[`ComputationBuilder::BatchNormGrad`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
+<b>`Exp(operand)`</b> Element-wise natural exponential `x -> e^x`.
 
-<b> Warning: Not implemented yet. </b>
+<b>`Floor(operand)`</b> Element-wise floor `x -> ⌊x⌋`.
 
-Calculates gradients of batch norm.
+<b>`IsFinite(operand)`</b> Tests whether each element of `operand` is finite,
+i.e., is not positive or negative infinity, and is not `NaN`. Returns an array
+of `PRED` values with the same shape as the input, where each element is `true`
+if and only if the corresponding input element is finite.
 
-<b> `BatchNormGrad(operand, scale, mean, variance, grad_output, epsilon, feature_index)` </b>
+<b>`Log(operand)`</b> Element-wise natural logarithm `x -> ln(x)`.
 
-| Arguments       | Type                    | Semantics                        |
-| --------------  | ----------------------- | -------------------------------- |
-| `operand`       | `ComputationDataHandle` | n dimensional array to be        |
-:                 :                         : normalized (x)                   :
-| `scale`         | `ComputationDataHandle` | 1 dimensional array              |
-:                 :                         : (\\(\gamma\\))                   :
-| `mean`          | `ComputationDataHandle` | 1 dimensional array (\\(\mu\\))  |
-| `variance`      | `ComputationDataHandle` | 1 dimensional array              |
-:                 :                         : (\\(\sigma^2\\))                 :
-| `grad_output`   | `ComputationDataHandle` | Gradients passed to              |
-:                 :                         : `BatchNormTraining`              :
-:                 :                         : (\\( \nabla y\\))                :
-| `epsilon`       | `float`                 | Epsilon value (\\(\epsilon\\))   |
-| `feature_index` | `int64`                 | Index to feature dimension in    |
-:                 :                         : `operand`                        :
+<b>`LogicalNot(operand)`</b> Element-wise logical not `x -> !(x)`.
 
-For each feature in the feature dimension (`feature_index` is the index for the
-feature dimension in `operand`), the operation calculates the gradients with
-respect to `operand`, `offset` and `scale` across all the other dimensions. The
-`feature_index` must be a valid index for the feature dimension in `operand`.
+<b>`Neg(operand)`</b> Element-wise negation `x -> -x`.
 
-The three gradients are defined by the following formulas:
+<b>`Sign(operand)`</b> Element-wise sign operation `x -> sgn(x)` where
 
-\\( \nabla x = \nabla y * \gamma * \sqrt{\sigma^2+\epsilon} \\)
+$$\text{sgn}(x) = \begin{cases} -1 & x < 0\\ 0 & x = 0\\ 1 & x > 0 \end{cases}$$
 
-\\( \nabla \gamma = sum(\nabla y * (x - \mu) * \sqrt{\sigma^2 + \epsilon}) \\)
+using the comparison operator of the element type of `operand`.
 
-\\( \nabla \beta = sum(\nabla y) \\)
+<b>`Tanh(operand)`</b> Element-wise hyperbolic tangent `x -> tanh(x)`.
 
-The inputs `mean` and `variance` represents moments value
-across batch and spatial dimensions.
 
-The output type is a tuple of three ComputationDataHandles:
+Arguments | Type                    | Semantics
+--------- | ----------------------- | ---------------------------
+`operand` | `ComputationDataHandle` | The operand to the function
 
-|Outputs       | Type                    | Semantics                           |
-|------------- | ----------------------- | ------------------------------------|
-|`grad_operand`| `ComputationDataHandle` | gradient with respect to input      |
-:              :                         : `operand`                           :
-|`grad_offset` | `ComputationDataHandle` | gradient with respect to input      |
-:              :                         : `offset`                            :
-|`grad_scale`  | `ComputationDataHandle` | gradient with respect to input      |
-:              :                         : `scale`                             :
+The function is applied to each element in the `operand` array, resulting in an
+array with the same shape. It is allowed for `operand` to be a scalar (rank 0).
 
 
 ## GetTupleElement
@@ -955,61 +1188,6 @@ transfer. The context is a tuple of {receive buffer (shape), request identifier
 Given a context created by a `Recv` instruction, waits for the data transfer to
 complete and returns the received data.
 
-## Send
-
-See also
-[`ComputationBuilder::Send`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
-
-<b> `Send(operand, channel_handle)` </b>
-
-| Arguments        | Type                    | Semantics                        |
-| ---------------- | ----------------------- | -------------------------------- |
-| `operand`        | `ComputationDataHandle` | data to send (array of type T)   |
-| `channel_handle` | `ChannelHandle`         | unique identifier for each send/recv pair |
-
-Sends the given operand data to a `Recv` instruction in another computation
-that shares the same channel handle. Does not return any data.
-
-Similar to the `Recv` operation, the client API of `Send` operation represents
-synchronous communication, and is internally decomposed into 2 HLO instructions
-(`Send` and `SendDone`) to enable asynchronous data transfers. See also
-[`HloInstruction::CreateSend` and `HloInstruction::CreateSendDone`](https://www.tensorflow.org/code/tensorflow/compiler/xla/service/hlo_instruction.h).
-
-<b>`Send(HloInstruction operand, int64 channel_id)`</b>
-
-Initiates an asynchronous transfer of the operand to the resources allocated by
-the `Recv` instruction with the same channel id. Returns a context, which is
-used by a following `SendDone` instruction to wait for the completion of the
-data transfer. The context is a tuple of {operand (shape), request identifier
-(U32)} and it can only be used by a `SendDone` instruction.
-
-<b> `SendDone(HloInstruction context)` </b>
-
-Given a context created by a `Send` instruction, waits for the data transfer to
-complete.  The instruction does not return any data.
-
-<b> Scheduling of channel instructions </b>
-
-The execution order of the 4 instructions for each channel (`Recv`, `RecvDone`,
-`Send`, `SendDone`) is as below.
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:70%" src="../../images/send_recv_order.png">
-</div>
-
-* `Recv` happens before `Send`
-* `Send` happens before `RecvDone`
-* `Recv` happens before `RecvDone`
-* `Send` happens before `SendDone`
-
-When the backend compilers generate a linear schedule for each computation that
-communicates via channel instructions, there must not be cycles across the
-computations. For example, below schedules lead to deadlocks.
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:100%" src="../../images/send_recv_schedule.png">
-</div>
-
 ## Reduce
 
 See also
@@ -1163,7 +1341,6 @@ must have a non-negative number of mantissa bits.  The number of exponent or
 mantissa bits may exceed the corresponding value for type `T`; the corresponding
 portion of the conversion is then simply a no-op.
 
-
 ## ReduceWindow
 
 See also
@@ -1348,63 +1525,97 @@ the reversing dimensions, its index i is transformed into N - 1 - i).
 One use for the `Rev` operation is to reverse the convolution weight array along
 the two window dimensions during the gradient computation in neural networks.
 
-## RngBernoulli
+## RngNormal
 
 See also
-[`ComputationBuilder::RngBernoulli`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
+[`ComputationBuilder::RngNormal`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
 
 Constructs an output of a given shape with random numbers generated following
-the Bernoulli distribution. The parameter needs to be a scalar valued F32
-operand while the output shape needs to have elemental type U32.
+the $$N(\mu, \sigma)$$ normal distribution. The parameters `mu` and `sigma`, and
+output shape have to have elemental type F32. The parameters furthermore have to
+be scalar valued.
 
-<b>`RngBernoulli(mean, shape)`</b>
+<b>`RngNormal(mean, sigma, shape)`</b>
 
-| Arguments | Type                    | Semantics                             |
-| --------- | ----------------------- | ------------------------------------- |
-| `mean`    | `ComputationDataHandle` | Scalar of type F32 specifying mean of |
-:           :                         : generated numbers                     :
-| `shape`   | `Shape`                 | Output shape of type U32              |
+| Arguments | Type                    | Semantics                              |
+| --------- | ----------------------- | -------------------------------------- |
+| `mu`      | `ComputationDataHandle` | Scalar of type F32 specifying mean of  |
+:           :                         : generated numbers                      :
+| `sigma`   | `ComputationDataHandle` | Scalar of type F32 specifying standard |
+:           :                         : deviation of generated numbers         :
+| `shape`   | `Shape`                 | Output shape of type F32               |
 
-## RngNormal
+## RngUniform
 
 See also
-[`ComputationBuilder::RngNormal`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
+[`ComputationBuilder::RngUniform`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
 
 Constructs an output of a given shape with random numbers generated following
-the $$N(\mu, \sigma)$$ normal distribution. The parameters `mu` and `sigma`, and
-output shape have to have elemental type F32. The parameters furthermore have to
-be scalar valued.
+the uniform distribution over the interval $$[a,b)$$. The parameters and output
+shape may be either F32, S32 or U32, but the types have to be consistent.
+Furthermore, the parameters need to be scalar valued. If $$b <= a$$ the result
+is implementation-defined.
 
-<b>`RngNormal(mean, sigma, shape)`</b>
+<b>`RngUniform(a, b, shape)`</b>
+
+| Arguments | Type                    | Semantics                         |
+| --------- | ----------------------- | --------------------------------- |
+| `a`       | `ComputationDataHandle` | Scalar of type T specifying lower |
+:           :                         : limit of interval                 :
+| `b`       | `ComputationDataHandle` | Scalar of type T specifying upper |
+:           :                         : limit of interval                 :
+| `shape`   | `Shape`                 | Output shape of type T            |
+
+## Select
+
+See also
+[`ComputationBuilder::Select`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
+
+Constructs an output array from elements of two input arrays, based on the
+values of a predicate array.
+
+<b> `Select(pred, on_true, on_false)` </b>
+
+Arguments  | Type                    | Semantics
+---------- | ----------------------- | ------------------
+`pred`     | `ComputationDataHandle` | array of type PRED
+`on_true`  | `ComputationDataHandle` | array of type T
+`on_false` | `ComputationDataHandle` | array of type T
+
+The arrays `on_true` and `on_false` must have the same shape. This is also the
+shape of the output array. The array `pred` must have the same dimensionality as
+`on_true` and `on_false`, with the `PRED` element type.
 
-| Arguments | Type                    | Semantics                              |
-| --------- | ----------------------- | -------------------------------------- |
-| `mu`      | `ComputationDataHandle` | Scalar of type F32 specifying mean of  |
-:           :                         : generated numbers                      :
-| `sigma`   | `ComputationDataHandle` | Scalar of type F32 specifying standard |
-:           :                         : deviation of generated numbers         :
-| `shape`   | `Shape`                 | Output shape of type F32               |
+For each element `P` of `pred`, the corresponding element of the output array is
+taken from `on_true` if the value of `P` is `true`, and from `on_false` if the
+value of `P` is `false`. As a restricted form of [broadcasting]
+(broadcasting.md), `pred` can be a scalar of type `PRED`. In this case, the
+output array is taken wholly from `on_true` if `pred` is `true`, and from
+`on_false` if `pred` is `false`.
 
-## RngUniform
+Example with non-scalar `pred`:
 
-See also
-[`ComputationBuilder::RngUniform`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
+```
+let pred: PRED[4] = {true, false, false, true};
+let v1: s32[4] = {1, 2, 3, 4};
+let v2: s32[4] = {100, 200, 300, 400};
+==>
+Select(pred, v1, v2) = s32[4]{1, 200, 300, 4};
+```
 
-Constructs an output of a given shape with random numbers generated following
-the uniform distribution over the interval $$[a,b)$$. The parameters and output
-shape may be either F32, S32 or U32, but the types have to be consistent.
-Furthermore, the parameters need to be scalar valued. If $$b <= a$$ the result
-is implementation-defined.
+Example with scalar `pred`:
 
-<b>`RngUniform(a, b, shape)`</b>
+```
+let pred: PRED = true;
+let v1: s32[4] = {1, 2, 3, 4};
+let v2: s32[4] = {100, 200, 300, 400};
+==>
+Select(pred, v1, v2) = s32[4]{1, 2, 3, 4};
+```
 
-| Arguments | Type                    | Semantics                         |
-| --------- | ----------------------- | --------------------------------- |
-| `a`       | `ComputationDataHandle` | Scalar of type T specifying lower |
-:           :                         : limit of interval                 :
-| `b`       | `ComputationDataHandle` | Scalar of type T specifying upper |
-:           :                         : limit of interval                 :
-| `shape`   | `Shape`                 | Output shape of type T            |
+Selections between tuples are supported. Tuples are considered to be scalar
+types for this purpose. If `on_true` and `on_false` are tuples (which must have
+the same shape!) then `pred` has to be a scalar of type `PRED`.
 
 ## SelectAndScatter
 
@@ -1487,56 +1698,60 @@ non-deterministic. Therefore, the `scatter` function should not be overly
 sensitive to reassociation. See the discussion about associativity in the
 context of [`Reduce`](#reduce) for more details.
 
-## Select
+## Send
 
 See also
-[`ComputationBuilder::Select`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
+[`ComputationBuilder::Send`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
 
-Constructs an output array from elements of two input arrays, based on the
-values of a predicate array.
+<b> `Send(operand, channel_handle)` </b>
 
-<b> `Select(pred, on_true, on_false)` </b>
+| Arguments        | Type                    | Semantics                        |
+| ---------------- | ----------------------- | -------------------------------- |
+| `operand`        | `ComputationDataHandle` | data to send (array of type T)   |
+| `channel_handle` | `ChannelHandle`         | unique identifier for each send/recv pair |
 
-Arguments  | Type                    | Semantics
----------- | ----------------------- | ------------------
-`pred`     | `ComputationDataHandle` | array of type PRED
-`on_true`  | `ComputationDataHandle` | array of type T
-`on_false` | `ComputationDataHandle` | array of type T
+Sends the given operand data to a `Recv` instruction in another computation
+that shares the same channel handle. Does not return any data.
 
-The arrays `on_true` and `on_false` must have the same shape. This is also the
-shape of the output array. The array `pred` must have the same dimensionality as
-`on_true` and `on_false`, with the `PRED` element type.
+Similar to the `Recv` operation, the client API of `Send` operation represents
+synchronous communication, and is internally decomposed into 2 HLO instructions
+(`Send` and `SendDone`) to enable asynchronous data transfers. See also
+[`HloInstruction::CreateSend` and `HloInstruction::CreateSendDone`](https://www.tensorflow.org/code/tensorflow/compiler/xla/service/hlo_instruction.h).
 
-For each element `P` of `pred`, the corresponding element of the output array is
-taken from `on_true` if the value of `P` is `true`, and from `on_false` if the
-value of `P` is `false`. As a restricted form of [broadcasting]
-(broadcasting.md), `pred` can be a scalar of type `PRED`. In this case, the
-output array is taken wholly from `on_true` if `pred` is `true`, and from
-`on_false` if `pred` is `false`.
+<b>`Send(HloInstruction operand, int64 channel_id)`</b>
 
-Example with non-scalar `pred`:
+Initiates an asynchronous transfer of the operand to the resources allocated by
+the `Recv` instruction with the same channel id. Returns a context, which is
+used by a following `SendDone` instruction to wait for the completion of the
+data transfer. The context is a tuple of {operand (shape), request identifier
+(U32)} and it can only be used by a `SendDone` instruction.
 
-```
-let pred: PRED[4] = {true, false, false, true};
-let v1: s32[4] = {1, 2, 3, 4};
-let v2: s32[4] = {100, 200, 300, 400};
-==>
-Select(pred, v1, v2) = s32[4]{1, 200, 300, 4};
-```
+<b> `SendDone(HloInstruction context)` </b>
 
-Example with scalar `pred`:
+Given a context created by a `Send` instruction, waits for the data transfer to
+complete.  The instruction does not return any data.
 
-```
-let pred: PRED = true;
-let v1: s32[4] = {1, 2, 3, 4};
-let v2: s32[4] = {100, 200, 300, 400};
-==>
-Select(pred, v1, v2) = s32[4]{1, 2, 3, 4};
-```
+<b> Scheduling of channel instructions </b>
 
-Selections between tuples are supported. Tuples are considered to be scalar
-types for this purpose. If `on_true` and `on_false` are tuples (which must have
-the same shape!) then `pred` has to be a scalar of type `PRED`.
+The execution order of the 4 instructions for each channel (`Recv`, `RecvDone`,
+`Send`, `SendDone`) is as below.
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:70%" src="../../images/send_recv_order.png">
+</div>
+
+* `Recv` happens before `Send`
+* `Send` happens before `RecvDone`
+* `Recv` happens before `RecvDone`
+* `Send` happens before `SendDone`
+
+When the backend compilers generate a linear schedule for each computation that
+communicates via channel instructions, there must not be cycles across the
+computations. For example, below schedules lead to deadlocks.
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:100%" src="../../images/send_recv_schedule.png">
+</div>
 
 ## Slice
 
@@ -1590,132 +1805,6 @@ Slice(b, {2, 1}, {4, 3}) produces:
     {10.0, 11.0} }
 ```
 
-## DynamicSlice
-
-See also
-[`ComputationBuilder::DynamicSlice`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
-
-DynamicSlice extracts a sub-array from the input array at dynamic
-`start_indices`. The size of the slice in each dimension is passed in
-`size_indices`, which specify the end point of exclusive slice intervals in each
-dimension: [start, start + size). The shape of `start_indices` must be rank ==
-1, with dimension size equal to the rank of `operand`.
-Note: handling of out-of-bounds slice indices (generated by incorrect runtime
-calculation of 'start_indices') is currently implementation-defined. Currently,
-slice indices are computed modulo input dimension sizes to prevent out-of-bound
-array accesses, but this behavior may change in future implementations.
-
-<b> `DynamicSlice(operand, start_indices, size_indices)` </b>
-
-| Arguments       | Type                    | Semantics                        |
-| --------------- | ----------------------- | -------------------------------- |
-| `operand`       | `ComputationDataHandle` | N dimensional array of type T    |
-| `start_indices` | `ComputationDataHandle` | Rank 1 array of N integers       |
-:                 :                         : containing the starting indices  :
-:                 :                         : of the slice for each dimension. :
-:                 :                         : Value must be greater than or    :
-:                 :                         : equal to zero.                   :
-| `size_indices`  | `ArraySlice<int64>`     | List of N integers containing    |
-:                 :                         : the slice size for each          :
-:                 :                         : dimension. Each value must be    :
-:                 :                         : strictly greater than zero, and  :
-:                 :                         : start + size must be less than   :
-:                 :                         : or equal to the size of the      :
-:                 :                         : dimension to avoid wrapping      :
-:                 :                         : modulo dimension size.           :
-
-1-dimensional example:
-
-```
-let a = {0.0, 1.0, 2.0, 3.0, 4.0}
-let s = {2}
-
-DynamicSlice(a, s, {2}) produces:
-  {2.0, 3.0}
-```
-
-2-dimensional example:
-
-```
-let b =
- { {0.0,  1.0,  2.0},
-   {3.0,  4.0,  5.0},
-   {6.0,  7.0,  8.0},
-   {9.0, 10.0, 11.0} }
-let s = {2, 1}
-
-DynamicSlice(b, s, {2, 2}) produces:
-  { { 7.0,  8.0},
-    {10.0, 11.0} }
-```
-## DynamicUpdateSlice
-
-See also
-[`ComputationBuilder::DynamicUpdateSlice`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/computation_builder.h).
-
-DynamicUpdateSlice generates a result which is the value of the input array
-`operand`, with a slice `update` overwritten at `start_indices`.
-The shape of `update` determines the shape of the sub-array of the result which
-is updated.
-The shape of `start_indices` must be rank == 1, with dimension size equal to
-the rank of `operand`.
-Note: handling of out-of-bounds slice indices (generated by incorrect runtime
-calculation of 'start_indices') is currently implementation-defined. Currently,
-slice indices are computed modulo update dimension sizes to prevent out-of-bound
-array accesses, but this behavior may change in future implementations.
-
-<b> `DynamicUpdateSlice(operand, update, start_indices)` </b>
-
-| Arguments       | Type                    | Semantics                        |
-| --------------- | ----------------------- | -------------------------------- |
-| `operand`       | `ComputationDataHandle` | N dimensional array of type T    |
-| `update`        | `ComputationDataHandle` | N dimensional array of type T    |
-:                 :                         : containing the slice update.     :
-:                 :                         : Each dimension of update shape    :
-:                 :                         : must be strictly greater than    :
-:                 :                         : zero, and start + update must be :
-:                 :                         : less than operand size for each  :
-:                 :                         : dimension to avoid generating    :
-:                 :                         : out-of-bounds update indices.    :
-| `start_indices` | `ComputationDataHandle` | Rank 1 array of N integers       |
-:                 :                         : containing the starting indices  :
-:                 :                         : of the slice for each dimension. :
-:                 :                         : Value must be greater than or    :
-:                 :                         : equal to zero.                   :
-
-1-dimensional example:
-
-```
-let a = {0.0, 1.0, 2.0, 3.0, 4.0}
-let u = {5.0, 6.0}
-let s = {2}
-
-DynamicUpdateSlice(a, u, s) produces:
-  {0.0, 1.0, 5.0, 6.0, 4.0}
-```
-
-2-dimensional example:
-
-```
-let b =
- { {0.0,  1.0,  2.0},
-   {3.0,  4.0,  5.0},
-   {6.0,  7.0,  8.0},
-   {9.0, 10.0, 11.0} }
-let u =
- { {12.0,  13.0},
-   {14.0,  15.0},
-   {16.0,  17.0} }
-
-let s = {1, 1}
-
-DynamicUpdateSlice(b, u, s) produces:
- { {0.0,  1.0,  2.0},
-   {3.0, 12.0, 13.0},
-   {6.0, 14.0, 15.0},
-   {9.0, 16.0, 17.0} }
-```
-
 ## Sort
 
 See also
diff --git a/tensorflow/docs_src/programmers_guide/datasets.md b/tensorflow/docs_src/programmers_guide/datasets.md
index 9ced56f0f5b7de7c60dd1393fce95667b0c5303d..9ede4ab83c1dcdb7370e83dfb9227fbb235d0689 100644
--- a/tensorflow/docs_src/programmers_guide/datasets.md
+++ b/tensorflow/docs_src/programmers_guide/datasets.md
@@ -1,16 +1,16 @@
 # Importing Data
 
-The @{tf.data.Dataset$`Dataset`} API enables you to build complex input pipelines from
+The @{tf.data} API enables you to build complex input pipelines from
 simple, reusable pieces. For example, the pipeline for an image model might
 aggregate data from files in a distributed file system, apply random
 perturbations to each image, and merge randomly selected images into a batch
 for training. The pipeline for a text model might involve extracting symbols
 from raw text data, converting them to embedding identifiers with a lookup
-table, and batching together sequences of different lengths. The `Dataset` API
+table, and batching together sequences of different lengths. The `tf.data` API
 makes it easy to deal with large amounts of data, different data formats, and
 complicated transformations.
 
-The `Dataset` API introduces two new abstractions to TensorFlow:
+The `tf.data` API introduces two new abstractions to TensorFlow:
 
 * A `tf.data.Dataset` represents a sequence of elements, in which
   each element contains one or more `Tensor` objects. For example, in an image
@@ -121,7 +121,7 @@ dataset3 = dataset3.filter(lambda x, (y, z): ...)
 ### Creating an iterator
 
 Once you have built a `Dataset` to represent your input data, the next step is to
-create an `Iterator` to access elements from that dataset.  The `Dataset` API
+create an `Iterator` to access elements from that dataset.  The `tf.data` API
 currently supports the following iterators, in increasing level of
 sophistication:
 
@@ -379,7 +379,7 @@ sess.run(iterator.initializer, feed_dict={features_placeholder: features,
 
 ### Consuming TFRecord data
 
-The `Dataset` API supports a variety of file formats so that you can process
+The `tf.data` API supports a variety of file formats so that you can process
 large datasets that do not fit in memory. For example, the TFRecord file format
 is a simple record-oriented binary format that many TensorFlow applications use
 for training data. The `tf.data.TFRecordDataset` class enables you to
@@ -455,9 +455,6 @@ dataset = dataset.flat_map(
         .filter(lambda line: tf.not_equal(tf.substr(line, 0, 1), "#"))))
 ```
 
-For a full example of parsing a CSV file using datasets, see [`imports85.py`](https://www.tensorflow.org/code/tensorflow/examples/get_started/regression/imports85.py)
-in @{$get_started/linear_regression}.
-
 <!--
 TODO(mrry): Add these sections.
 
@@ -540,7 +537,7 @@ import cv2
 # Use a custom OpenCV function to read the image, instead of the standard
 # TensorFlow `tf.read_file()` operation.
 def _read_py_function(filename, label):
-  image_decoded = cv2.imread(image_string, cv2.IMREAD_GRAYSCALE)
+  image_decoded = cv2.imread(filename.decode(), cv2.IMREAD_GRAYSCALE)
   return image_decoded, label
 
 # Use standard TensorFlow operations to resize the image to a fixed shape.
@@ -628,7 +625,7 @@ TODO(mrry): Add this section.
 
 ### Processing multiple epochs
 
-The `Dataset` API offers two main ways to process multiple epochs of the same
+The `tf.data` API offers two main ways to process multiple epochs of the same
 data.
 
 The simplest way to iterate over a dataset in multiple epochs is to use the
@@ -693,7 +690,7 @@ dataset = dataset.repeat()
 The @{tf.train.MonitoredTrainingSession} API simplifies many aspects of running
 TensorFlow in a distributed setting. `MonitoredTrainingSession` uses the
 @{tf.errors.OutOfRangeError} to signal that training has completed, so to use it
-with the `Dataset` API, we recommend using
+with the `tf.data` API, we recommend using
 `Dataset.make_one_shot_iterator()`. For example:
 
 ```python
@@ -735,7 +732,7 @@ def dataset_input_fn():
     parsed = tf.parse_single_example(record, keys_to_features)
 
     # Perform additional preprocessing on the parsed data.
-    image = tf.decode_jpeg(parsed["image_data"])
+    image = tf.image.decode_jpeg(parsed["image_data"])
     image = tf.reshape(image, [299, 299, 1])
     label = tf.cast(parsed["label"], tf.int32)
 
diff --git a/tensorflow/docs_src/programmers_guide/debugger.md b/tensorflow/docs_src/programmers_guide/debugger.md
index 25cb72008d5a5418f46aa543871e97cee996ecb5..9eaee2702829cbfd96cd56e832003724eba5bb1b 100644
--- a/tensorflow/docs_src/programmers_guide/debugger.md
+++ b/tensorflow/docs_src/programmers_guide/debugger.md
@@ -159,6 +159,7 @@ Try the following commands at the `tfdbg>` prompt (referencing the code at
 | | `-r <range>` | Highlight elements falling into specified numerical range. Multiple ranges can be used in conjunction. | `pt hidden/Relu:0 -a -r [[-inf,-1],[1,inf]]` |
 | | `-n <number>` | Print dump corresponding to specified 0-based dump number. Required for tensors with multiple dumps. | `pt -n 0 hidden/Relu:0` |
 | | `-s` | Include a summary of the numeric values of the tensor (applicable only to non-empty tensors with Boolean and numeric types such as `int*` and `float*`.) | `pt -s hidden/Relu:0[0:50,:]` |
+| | `-w` | Write the value of the tensor (possibly sliced) to a Numpy file using [`numpy.save()`](https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.save.html) | `pt -s hidden/Relu:0 -w /tmp/relu.npy` |
 | **`@[coordinates]`** | | Navigate to specified element in `pt` output. | `@[10,0]` or `@10,0` |
 | **`/regex`** | |  [less](https://linux.die.net/man/1/less)-style search for given regular expression. | `/inf` |
 | **`/`** | | Scroll to the next line with matches to the searched regex (if any). | `/` |
@@ -167,6 +168,7 @@ Try the following commands at the `tfdbg>` prompt (referencing the code at
 | **eval** | | **Evaluate arbitrary Python and numpy expression.** | |
 | | `eval <expression>` | Evaluate a Python / numpy expression, with numpy available as `np` and debug tensor names enclosed in backticks. | ``eval "np.matmul((`output/Identity:0` / `Softmax:0`).T, `Softmax:0`)"`` |
 | | `-a` | Print a large-sized evaluation result in its entirety, i.e., without using ellipses. | ``eval -a 'np.sum(`Softmax:0`, axis=1)'`` |
+| | `-w` | Write the result of the evaluation to a Numpy file using [`numpy.save()`](https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.save.html) | ``eval -a 'np.sum(`Softmax:0`, axis=1)' -w /tmp/softmax_sum.npy`` |
 | **`ni`** | | **Display node information.** | |
 | | `-a` | Include node attributes in the output. | `ni -a hidden/Relu` |
 | | `-d` | List the debug dumps available from the node. | `ni -d hidden/Relu` |
@@ -338,11 +340,11 @@ tfdbg> ni cross_entropy/Log
 ![tfdbg run-end UI: infs and nans](https://www.tensorflow.org/images/tfdbg_screenshot_run_end_node_info.png)
 
 You can see that this node has the op type `Log`
-and that its input is the node `softmax/Softmax`. Run the following command to
+and that its input is the node `Softmax`. Run the following command to
 take a closer look at the input tensor:
 
 ```none
-tfdbg> pt softmax/Softmax:0
+tfdbg> pt Softmax:0
 ```
 
 Examine the values in the input tensor, searching for zeros:
@@ -392,7 +394,7 @@ diff = -(y_ * tf.log(y))
 to the built-in, numerically-stable implementation of softmax cross-entropy:
 
 ```python
-diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=logits)
+diff = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=logits)
 ```
 
 Rerun with the `--debug` flag as follows:
diff --git a/tensorflow/docs_src/programmers_guide/embedding.md b/tensorflow/docs_src/programmers_guide/embedding.md
index 4095c6c97a4703bdf16e8feceaacdefaa50488b3..e8027fc12b368ddfbc51cc47441478901d7caec7 100644
--- a/tensorflow/docs_src/programmers_guide/embedding.md
+++ b/tensorflow/docs_src/programmers_guide/embedding.md
@@ -2,9 +2,10 @@
 
 This document introduces the concept of embeddings, gives a simple example of
 how to train an embedding in TensorFlow, and explains how to view embeddings
-with the TensorBoard Embedding Projector. The first two parts target newcomers
-to machine learning or TensorFlow, and the Embedding Projector how-to is for
-users at all levels.
+with the TensorBoard Embedding Projector
+([live example](http://projector.tensorflow.org)). The first two parts target
+newcomers to machine learning or TensorFlow, and the Embedding Projector how-to
+is for users at all levels.
 
 [TOC]
 
@@ -119,7 +120,7 @@ data set.
   text patterns.
 
 Further useful articles are
-[How to Use t-SNE Effectively](distill.pub/2016/misread-tsne/) and
+[How to Use t-SNE Effectively](https://distill.pub/2016/misread-tsne/) and
 [Principal Component Analysis Explained Visually](http://setosa.io/ev/principal-component-analysis/).
 
 ### Exploration
diff --git a/tensorflow/docs_src/programmers_guide/estimators.md b/tensorflow/docs_src/programmers_guide/estimators.md
index 6544a16f2bcb1ebbbe33489bd1a0974aa30f6a17..ffadf29ad7710de860a56253f279204d17cc318a 100644
--- a/tensorflow/docs_src/programmers_guide/estimators.md
+++ b/tensorflow/docs_src/programmers_guide/estimators.md
@@ -134,7 +134,7 @@ The heart of every Estimator--whether pre-made or custom--is its
 evaluation, and prediction. When you are using a pre-made Estimator,
 someone else has already implemented the model function. When relying
 on a custom Estimator, you must write the model function yourself. A
-@{$extend/estimators$companion document}
+@{$get_started/custom_estimators$companion document}
 explains how to write the model function.
 
 
@@ -186,9 +186,9 @@ est_inception_v3.train(input_fn=train_input_fn, steps=2000)
 ```
 Note that the names of feature columns and labels of a keras estimator come from
 the corresponding compiled keras model. For example, the input key names for
-@{$get_started/input_fn} in above `est_inception_v3` estimator can be obtained
-from `keras_inception_v3.input_names`, and similarily, the predicted output
-names can be obtained from `keras_inception_v3.output_names`.
+`train_input_fn` above can be obtained from `keras_inception_v3.input_names`,
+and similarly, the predicted output names can be obtained from
+`keras_inception_v3.output_names`.
 
 For more details, please refer to the documentation for
 @{tf.keras.estimator.model_to_estimator}.
diff --git a/tensorflow/docs_src/programmers_guide/faq.md b/tensorflow/docs_src/programmers_guide/faq.md
index 67ed0a9a607677242838199d346393439b48545d..70931f2862de98cb1e934f85919d558a3b36304a 100644
--- a/tensorflow/docs_src/programmers_guide/faq.md
+++ b/tensorflow/docs_src/programmers_guide/faq.md
@@ -68,14 +68,6 @@ dictionary that maps @{tf.Tensor} objects to
 numpy arrays (and some other types), which will be used as the values of those
 tensors in the execution of a step.
 
-Often, you have certain tensors, such as inputs, that will always be fed. The
-@{tf.placeholder} op allows you
-to define tensors that *must* be fed, and optionally allows you to constrain
-their shape as well. See the
-@{$beginners$beginners' MNIST tutorial} for an
-example of how placeholders and feeding can be used to provide the training data
-for a neural network.
-
 #### What is the difference between `Session.run()` and `Tensor.eval()`?
 
 If `t` is a @{tf.Tensor} object,
@@ -300,7 +292,7 @@ functions, methods, and properties. We also adhere to the
 [Google Python style guide](https://google.github.io/styleguide/pyguide.html).
 
 The TensorFlow C++ code base adheres to the
-[Google C++ style guide](http://google.github.io/styleguide/cppguide.html).
+[Google C++ style guide](https://google.github.io/styleguide/cppguide.html).
 
 (<sup>*</sup> With one exception: we use 2-space indentation instead of 4-space
 indentation.)
diff --git a/tensorflow/docs_src/get_started/graph_viz.md b/tensorflow/docs_src/programmers_guide/graph_viz.md
similarity index 98%
rename from tensorflow/docs_src/get_started/graph_viz.md
rename to tensorflow/docs_src/programmers_guide/graph_viz.md
index 06ec427b757d6a34270b646341786bc8925473d5..f581ae56dae45238d697196e8ad56c86f7309604 100644
--- a/tensorflow/docs_src/get_started/graph_viz.md
+++ b/tensorflow/docs_src/programmers_guide/graph_viz.md
@@ -248,8 +248,9 @@ The images below show the CIFAR-10 model with tensor shape information:
 Often it is useful to collect runtime metadata for a run, such as total memory
 usage, total compute time, and tensor shapes for nodes. The code example below
 is a snippet from the train and test section of a modification of the
-@{$beginners$simple MNIST tutorial},
-in which we have recorded summaries and runtime statistics. See the @{$summaries_and_tensorboard#serializing-the-data$Summaries Tutorial}
+@{$layers$simple MNIST tutorial}, in which we have recorded summaries and
+runtime statistics. See the
+@{$summaries_and_tensorboard#serializing-the-data$Summaries Tutorial}
 for details on how to record summaries.
 Full source is [here](https://www.tensorflow.org/code/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py).
 
diff --git a/tensorflow/docs_src/programmers_guide/graphs.md b/tensorflow/docs_src/programmers_guide/graphs.md
index 984058297f9ae1ad25ea4c0ef036f0477a6ac024..2b4896c381052b5a3fb97385a18dbff82c2c0d89 100644
--- a/tensorflow/docs_src/programmers_guide/graphs.md
+++ b/tensorflow/docs_src/programmers_guide/graphs.md
@@ -487,7 +487,7 @@ subgraph inside.
 ![](../images/mnist_deep.png)
 
 For more information about visualizing your TensorFlow application with
-TensorBoard, see the [TensorBoard tutorial](TODO).
+TensorBoard, see the [TensorBoard tutorial](../get_started/summaries_and_tensorboard.md).
 
 ## Programming with multiple graphs
 
diff --git a/tensorflow/docs_src/programmers_guide/index.md b/tensorflow/docs_src/programmers_guide/index.md
index 2e2cf7c0818bb4854675726b1660f31fb73cb3d4..d45e666ce7b440bae20ba32d894526372af7e17b 100644
--- a/tensorflow/docs_src/programmers_guide/index.md
+++ b/tensorflow/docs_src/programmers_guide/index.md
@@ -1,16 +1,24 @@
 # Programmer's Guide
 
-The documents in this unit dive into the details of writing TensorFlow
-code.  For TensorFlow 1.3, we revised this document extensively.
-The units are now as follows:
+The documents in this unit dive into the details of how TensorFlow
+works. The units are as follows:
 
-  * @{$programmers_guide/estimators$Estimators}, which introduces a high-level
+## High Level APIs
+
+  * @{$programmers_guide/estimators}, which introduces a high-level
     TensorFlow API that greatly simplifies ML programming.
-  * @{$programmers_guide/tensors$Tensors}, which explains how to create,
+  * @{$programmers_guide/datasets}, which explains how to
+    set up data pipelines to read data sets into your TensorFlow program.
+
+## Low Level APIs
+
+  * @{$programmers_guide/low_level_intro}, which introduces the
+    basics of how you can to use TensorFlow outside of the high Level APIs.
+  * @{$programmers_guide/tensors}, which explains how to create,
     manipulate, and access Tensors--the fundamental object in TensorFlow.
-  * @{$programmers_guide/variables$Variables}, which details how
+  * @{$programmers_guide/variables}, which details how
     to represent shared, persistent state in your program.
-  * @{$programmers_guide/graphs$Graphs and Sessions}, which explains:
+  * @{$programmers_guide/graphs}, which explains:
       * dataflow graphs, which are TensorFlow's representation of computations
         as dependencies between operations.
       * sessions, which are TensorFlow's mechanism for running dataflow graphs
@@ -20,18 +28,40 @@ The units are now as follows:
     such as Estimators or Keras, the high-level API creates and manages
     graphs and sessions for you, but understanding graphs and sessions
     can still be helpful.
-  * @{$programmers_guide/saved_model$Saving and Restoring}, which
+  * @{$programmers_guide/saved_model}, which
     explains how to save and restore variables and models.
-  * @{$programmers_guide/datasets$Input Pipelines}, which explains how to
-    set up data pipelines to read data sets into your TensorFlow program.
-  * @{$programmers_guide/embedding$Embeddings}, which introduces the concept
+  * @{$using_gpu} explains how TensorFlow assigns operations to
+    devices and how you can change the arrangement manually.
+
+
+## ML Concepts
+
+  * @{$programmers_guide/embedding}, which introduces the concept
     of embeddings, provides a simple example of training an embedding in
     TensorFlow, and explains how to view embeddings with the TensorBoard
     Embedding Projector.
-  * @{$programmers_guide/debugger$Debugging TensorFlow Programs}, which
+
+## Debugging
+
+  * @{$programmers_guide/debugger}, which
     explains how to use the TensorFlow debugger (tfdbg).
-  * @{$programmers_guide/version_compat$TensorFlow Version Compatibility},
+
+## TensorBoard
+
+TensorBoard is a utility to visualize different aspects of machine learning.
+The following guides explain how to use TensorBoard:
+
+  * @{$programmers_guide/summaries_and_tensorboard},
+    which introduces TensorBoard.
+  * @{$programmers_guide/graph_viz}, which
+    explains how to visualize the computational graph.
+  * @{$programmers_guide/tensorboard_histograms} which demonstrates the how to
+    use TensorBoard's histogram dashboard.
+
+
+## Misc
+
+  * @{$programmers_guide/version_compat},
     which explains backward compatibility guarantees and non-guarantees.
-  * @{$programmers_guide/faq$FAQ}, which contains frequently asked
-    questions about TensorFlow. (We have not revised this document for v1.3,
-    except to remove some obsolete information.)
+  * @{$programmers_guide/faq}, which contains frequently asked
+    questions about TensorFlow.
diff --git a/tensorflow/docs_src/programmers_guide/leftnav_files b/tensorflow/docs_src/programmers_guide/leftnav_files
index 5adc7fad6ce2200d52e79d35234a209d87ad3d58..38de3ccc3e474e6051976c810519212da8f5051e 100644
--- a/tensorflow/docs_src/programmers_guide/leftnav_files
+++ b/tensorflow/docs_src/programmers_guide/leftnav_files
@@ -1,12 +1,28 @@
 index.md
+
+### High Level APIs
 estimators.md
+datasets.md
+
+### Low Level APIs
+low_level_intro.md
 tensors.md
 variables.md
 graphs.md
 saved_model.md
-datasets.md
+using_gpu.md
+
+### ML Concepts
 embedding.md
+
+### Debugging
 debugger.md
-supervisor.md
+
+### TensorBoard
+summaries_and_tensorboard.md
+graph_viz.md
+tensorboard_histograms.md
+
+### Misc
 version_compat.md
 faq.md
diff --git a/tensorflow/docs_src/programmers_guide/low_level_intro.md b/tensorflow/docs_src/programmers_guide/low_level_intro.md
new file mode 100644
index 0000000000000000000000000000000000000000..8f6d3fbd46d8b76d6033d95fd51c1df45733f5a3
--- /dev/null
+++ b/tensorflow/docs_src/programmers_guide/low_level_intro.md
@@ -0,0 +1,587 @@
+# Introduction
+
+This guide gets you started programming in the low-level TensorFlow APIs
+(TensorFlow Core), showing you how to:
+
+  * Manage your own TensorFlow program (a `tf.Graph`) and TensorFlow
+    runtime (a `tf.Session`), instead of relying on Estimators to manage them.
+  * Run TensorFlow operations, using a `tf.Session`.
+  * Use high level components ([datasets](#datasets), [layers](#layers), and
+    [feature_columns](#feature_columns)) in this low level environment.
+  * Build your own training loop, instead of using the one
+    @{$get_started/premade_estimators$provided by Estimators}.
+
+We recommend using the higher level APIs to build models when possible.
+Knowing TensorFlow Core is valuable for the following reasons:
+
+  * Experimentation and debugging are both more straight forward
+    when you can use low level TensorFlow operations directly.
+  * It gives you a mental model of how things work internally when
+    using the higher level APIs.
+
+## Setup
+
+Before using this guide, @{$install$install TensorFlow}.
+
+To get the most out of this guide, you should know the following:
+
+*   How to program in Python.
+*   At least a little bit about arrays.
+*   Ideally, something about machine learning.
+
+Feel free to launch `python` and follow along with this walkthrough.
+Run the following lines to set up your Python environment:
+
+```python
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import tensorflow as tf
+```
+
+## Tensor Values
+
+The central unit of data in TensorFlow is the **tensor**. A tensor consists of a
+set of primitive values shaped into an array of any number of dimensions. A
+tensor's **rank** is its number of dimensions, while its **shape** is a tuple
+of integers specifying the array's length along each dimension. Here are some
+examples of tensor values:
+
+```python
+3. # a rank 0 tensor; a scalar with shape [],
+[1., 2., 3.] # a rank 1 tensor; a vector with shape [3]
+[[1., 2., 3.], [4., 5., 6.]] # a rank 2 tensor; a matrix with shape [2, 3]
+[[[1., 2., 3.]], [[7., 8., 9.]]] # a rank 3 tensor with shape [2, 1, 3]
+```
+
+TensorFlow uses numpy arrays to represent tensor **values**.
+
+## TensorFlow Core Walkthrough
+
+You might think of TensorFlow Core programs as consisting of two discrete
+sections:
+
+1.  Building the computational graph (a @{tf.Graph}).
+2.  Running the computational graph (using a @{tf.Session}).
+
+### Graph
+
+A **computational graph** is a series of TensorFlow operations arranged into a
+graph. The graph is composed of two types of objects.
+
+  * @{tf.Operation$Operations} (or "ops"): The nodes of the graph.
+    Operations describe calculations that consume and produce tensors.
+  * @{tf.Tensor$Tensors}: The edges in the graph. These represent the values
+    that will flow through the graph. Most TensorFlow functions return
+    `tf.Tensors`.
+
+Important: `tf.Tensors` do not have values, they are just handles to elements
+in the computation graph.
+
+Let's build a simple computational graph. The most basic operation is a
+constant. The Python function that builds the operation takes a tensor value as
+input. The resulting operation takes no inputs. When run, it outputs the
+value that was passed to the constructor. We can create two floating point
+constants `a` and `b` as follows:
+
+```python
+a = tf.constant(3.0, dtype=tf.float32)
+b = tf.constant(4.0) # also tf.float32 implicitly
+total = a + b
+print(a)
+print(b)
+print(total)
+```
+
+The print statements produce:
+
+```
+Tensor("Const:0", shape=(), dtype=float32)
+Tensor("Const_1:0", shape=(), dtype=float32)
+Tensor("add:0", shape=(), dtype=float32)
+```
+
+Notice that printing the tensors does not output the values `3.0`, `4.0`, and
+`7.0` as you might expect. The above statements only build the computation
+graph. These `tf.Tensor` objects just represent the results of the operations
+that will be run.
+
+Each operation in a graph is given a unique name. This name is independent of
+the names the objects are assigned to in Python. Tensors are named after the
+operation that produces them followed by an output index, as in
+`"add:0"` above.
+
+### TensorBoard
+
+TensorFlow provides a utility called TensorBoard. One of TensorBoard's many
+capabilities is visualizing a computation graph. You can easily do this with
+a few simple commands.
+
+First you save the computation graph to a TensorBoard summary file as
+follows:
+
+```
+writer = tf.summary.FileWriter('.')
+writer.add_graph(tf.get_default_graph())
+```
+
+This will produce an `event` file in the current directory with a name in the
+following format:
+
+```
+events.out.tfevents.{timestamp}.{hostname}
+```
+
+Now, in a new terminal, launch TensorBoard with the following shell command:
+
+```bsh
+tensorboard --logdir .
+```
+
+Then open TensorBoard's [graphs page](http://localhost:6006/#graphs) in your
+browser, and you should see a graph similar to the following:
+
+![TensorBoard screenshot](https://www.tensorflow.org/images/getting_started_add.png)
+
+For more about TensorBoard's graph visualization tools see @{$graph_viz}.
+
+### Session
+
+To evaluate tensors, instantiate a @{tf.Session} object, informally known as a
+**session**. A session encapsulates the state of the TensorFlow runtime, and
+runs TensorFlow operations. If a `tf.Graph` is like a `.py` file, a `tf.Session`
+is like the `python` executable.
+
+The following code creates a `tf.Session` object and then invokes its `run`
+method to evaluate the `total` tensor we created above:
+
+```python
+sess = tf.Session()
+print(sess.run(total))
+```
+
+When you request the output of a node with `Session.run` TensorFlow backtracks
+through the graph and runs all the nodes that provide input to the requested
+output node. So this prints the expected value of 7.0:
+
+```
+7.0
+```
+
+You can pass multiple tensors to `tf.Session.run`. The `run` method
+transparently handles any combination of tuples or dictionaries, as in the
+following example:
+
+```python
+print(sess.run({'ab':(a, b), 'total':total}))
+```
+
+which returns the results in a structure of the same layout:
+
+``` None
+{'total': 7.0, 'ab': (3.0, 4.0)}
+```
+
+During a call to `tf.Session.run` any `tf.Tensor` only has a single value.
+For example, the following code calls `tf.random_uniform` to produce a
+`tf.Tensor` that generates a random 3-element vector (with values in `[0,1)`):
+
+```python
+vec = tf.random_uniform(shape=(3,))
+out1 = vec + 1
+out2 = vec + 2
+print(sess.run(vec))
+print(sess.run(vec))
+print(sess.run((out1, out2)))
+```
+
+The result shows a different random value on each call to `run`, but
+a consistent value during a single `run` (`out1` and `out2` receive the same
+random input):
+
+```
+[ 0.52917576  0.64076328  0.68353939]
+[ 0.66192627  0.89126778  0.06254101]
+(
+  array([ 1.88408756,  1.87149239,  1.84057522], dtype=float32),
+  array([ 2.88408756,  2.87149239,  2.84057522], dtype=float32)
+)
+```
+
+Some TensorFlow functions return `tf.Operations` instead of `tf.Tensors`.
+The result of calling `run` on an Operation is `None`. You run an operation
+to cause a side-effect, not to retrieve a value. Examples of this include the
+[initialization](#Initializing Layers), and [training](#Training) ops
+demonstrated later.
+
+### Feeding
+
+As it stands, this graph is not especially interesting because it always
+produces a constant result. A graph can be parameterized to accept external
+inputs, known as **placeholders**. A **placeholder** is a promise to provide a
+value later, like a function argument.
+
+```python
+x = tf.placeholder(tf.float32)
+y = tf.placeholder(tf.float32)
+z = x + y
+```
+
+The preceding three lines are a bit like a function in which we
+define two input parameters (`x` and `y`) and then an operation on them. We can
+evaluate this graph with multiple inputs by using the `feed_dict` argument of
+the @{tf.Session.run$run method} to feed concrete values to the placeholders:
+
+```python
+print(sess.run(z, feed_dict={x: 3, y: 4.5}))
+print(sess.run(z, feed_dict={x: [1, 3], y: [2, 4]}))
+```
+This results in the following output:
+
+```
+7.5
+[ 3.  7.]
+```
+
+Also note that the `feed_dict` argument can be used to overwrite any tensor in
+the graph. The only difference between placeholders and other `tf.Tensors` is
+that placeholders throw an error if no value is fed to them.
+
+## Datasets
+
+Placeholders work for simple experiments, but @{tf.data$Datasets} are the
+preferred method of streaming data into a model.
+
+To get a runnable `tf.Tensor` from a Dataset you must first convert it to a
+@{tf.data.Iterator}, and then call the Iterator's
+@{tf.data.Iterator.get_next$`get_next`} method.
+
+The simplest way to create an Iterator is with the
+@{tf.data.Dataset.make_one_shot_iterator$`make_one_shot_iterator`} method.
+For example, in the following code the `next_item` tensor will return a row from
+the `my_data` array on each `run` call:
+
+``` python
+my_data = [
+    [0, 1,],
+    [2, 3,],
+    [4, 5,],
+    [6, 7,],
+]
+slices = tf.data.Dataset.from_tensor_slices(my_data)
+next_item = slices.make_one_shot_iterator().get_next()
+```
+
+Reaching the end of the data stream causes `Dataset` to throw an
+@{tf.errors.OutOfRangeError$`OutOfRangeError`}. For example, the following code
+reads the `next_item` until there is no more data to read:
+
+``` python
+while True:
+  try:
+    print(sess.run(next_item))
+  except tf.errors.OutOfRangeError:
+    break
+```
+
+For more details on Datasets and Iterators see: @{$programmers_guide/datasets}.
+
+## Layers
+
+A trainable model must modify the values in the graph to get new outputs with
+the same input.  @{tf.layers$Layers} are the preferred way to add trainable
+parameters to a graph.
+
+Layers package together both the variables and the operations that act
+on them, . For example a
+[densely-connected layer](https://developers.google.com/machine-learning/glossary/#fully_connected_layer)
+performs a weighted sum across all inputs
+for each output and applies an optional
+[activation function](https://developers.google.com/machine-learning/glossary/#activation_function).
+The connection weights and biases are managed by the layer object.
+
+### Creating Layers
+
+The following code creates a @{tf.layers.Dense$`Dense`} layer that takes a
+batch of input vectors, and produces a single output value for each. To apply a
+layer to an input, call the layer as if it were a function. For example:
+
+```python
+x = tf.placeholder(tf.float32, shape=[None, 3])
+linear_model = tf.layers.Dense(units=1)
+y = linear_model(x)
+```
+
+The layer inspects its input to determine sizes for its internal variables. So
+here we must set the shape of the `x` placeholder so that the layer can
+build a weight matrix of the correct size.
+
+Now that we have defined the calculation of the output, `y`, there is one more
+detail we need to take care of before we run the calculation.
+
+### Initializing Layers
+
+The layer contains variables that must be **initialized** before they can be
+used. While it is possible to initialize variables individually, you can easily
+initialize all the variables in a TensorFlow graph as follows:
+
+```python
+init = tf.global_variables_initializer()
+sess.run(init)
+```
+
+Important: Calling `tf.global_variables_initializer` only
+creates and returns a handle to a TensorFlow operation. That op
+will initialize all the global variables when we run it with `tf.Session.run`.
+
+Also note that this `global_variables_initializer` only initializes variables
+that existed in the graph when the  initializer was created. So the initializer
+should be one of the last things added during graph construction.
+
+### Executing Layers
+
+Now that the layer is initialized, we can evaluate the `linear_model`'s output
+tensor as we would any other tensor. For example, the following code:
+
+```python
+print(sess.run(y, {x: [[1, 2, 3],[4, 5, 6]]}))
+```
+
+will generate a two-element output vector such as the following:
+
+```
+[[-3.41378999]
+ [-9.14999008]]
+```
+
+### Layer Function shortcuts
+
+For each layer class (like @{tf.layers.Dense}) TensorFlow also supplies a
+shortcut function (like @{tf.layers.dense}). The only difference is that the
+shortcut function versions create and run the layer in a single call. For
+example, the following code is equivalent to the earlier version:
+
+```python
+x = tf.placeholder(tf.float32, shape=[None, 3])
+y = tf.layers.dense(x, units=1)
+
+init = tf.global_variables_initializer()
+sess.run(init)
+
+print(sess.run(y, {x: [[1, 2, 3], [4, 5, 6]]}))
+```
+
+While convenient, this approach allows no access to the @{tf.layers.Layer}
+object. This makes introspection and debugging more difficult,
+and layer reuse impossible.
+
+## Feature columns
+
+The easiest way to experiment with feature columns is using the
+@{tf.feature_column.input_layer} function. This function only accepts
+@{$get_started/feature_columns$dense columns} as inputs, so to view the result
+of a categorical column you must wrap it in an
+@{tf.feature_column.indicator_column}. For example:
+
+``` python
+features = {
+    'sales' : [[5], [10], [8], [9]],
+    'department': ['sports', 'sports', 'gardening', 'gardening']}
+
+department_column = tf.feature_column.categorical_column_with_vocabulary_list(
+        'department', ['sports', 'gardening'])
+department_column = tf.feature_column.indicator_column(department_column)
+
+columns = [
+    tf.feature_column.numeric_column('sales'),
+    department_column
+]
+
+inputs = tf.feature_column.input_layer(features, columns)
+```
+
+Running the `inputs` tensor will parse the `features` into a batch of vectors.
+
+Feature columns can have internal state, like layers, so they often need to be
+initialized. Categorical columns use @{tf.contrib.lookup$lookup tables}
+internally and these require a separate initialization op,
+@{tf.tables_initializer}.
+
+``` python
+var_init = tf.global_variables_initializer()
+table_init = tf.tables_initializer()
+sess = tf.Session()
+sess.run((var_init, table_init))
+```
+
+Once the internal state has been initialized you can run `inputs` like any
+other `tf.Tensor`:
+
+```python
+print(sess.run(inputs))
+```
+
+This shows how the feature columns have packed the input vectors, with the
+one-hot "department" as the first two indices and "sales" as the third.
+
+```None
+[[  1.   0.   5.]
+ [  1.   0.  10.]
+ [  0.   1.   8.]
+ [  0.   1.   9.]]
+```
+
+## Training
+
+Now that you're familiar with the basics of core TensorFlow, let's train a
+small regression model manually.
+
+### Define the data
+
+First let's define some inputs, `x`, and the expected output for each input,
+`y_true`:
+
+```python
+x = tf.constant([[1], [2], [3], [4]], dtype=tf.float32)
+y_true = tf.constant([[0], [-1], [-2], [-3]], dtype=tf.float32)
+```
+
+### Define the model
+
+Next, build a simple linear model, with 1 output:
+
+``` python
+linear_model = tf.layers.Dense(units=1)
+
+y_pred = linear_model(x)
+```
+
+You can evaluate the predictions as follows:
+
+``` python
+sess = tf.Session()
+init = tf.global_variables_initializer()
+sess.run(init)
+
+print(sess.run(y_pred))
+```
+
+The model hasn't yet been trained, so the four "predicted" values aren't very
+good. Here's what we got; your own output will almost certainly differ:
+
+``` None
+[[ 0.02631879]
+ [ 0.05263758]
+ [ 0.07895637]
+ [ 0.10527515]]
+```
+
+### loss
+
+To optimize a model, you first need to define the loss. We'll use the mean
+square error, a standard loss for regression problems.
+
+While you could do this manually with lower level math operations,
+the @{tf.losses} module provides a set of common loss functions. You can use it
+to calculate the mean square error as follows:
+
+``` python
+loss = tf.losses.mean_squared_error(labels=y_true, predictions=y_pred)
+
+print(sess.run(loss))
+```
+This will produce a loss value, something like:
+
+``` None
+2.23962
+```
+
+### Training
+
+TensorFlow provides
+[**optimizers**](https://developers.google.com/machine-learning/glossary/#optimizer)
+implementing standard optimization algorithms. These are implemented as
+sub-classes of @{tf.train.Optimizer}. They incrementally change each
+variable in order to minimizethe loss. The simplest optimization algorithm is
+[**gradient descent**](https://developers.google.com/machine-learning/glossary/#gradient_descent),
+implemented by @{tf.train.GradientDescentOptimizer}. It modifies each
+variable according to the magnitude of the derivative of loss with respect to
+that variable. For example:
+
+```python
+optimizer = tf.train.GradientDescentOptimizer(0.01)
+train = optimizer.minimize(loss)
+```
+
+This code builds all the graph components necessary for the optimization, and
+returns a training operation. When run, the training op will update variables
+in the graph. You might run it as follows:
+
+```python
+for i in range(100):
+  _, loss_value = sess.run((train, loss))
+  print(loss_value)
+```
+
+Since `train` is an op, not a tensor, it doesn't return a value when run.
+To see the progression of the loss during training, we run the loss tensor at
+the same time, producing output like the following:
+
+``` None
+1.35659
+1.00412
+0.759167
+0.588829
+0.470264
+0.387626
+0.329918
+0.289511
+0.261112
+0.241046
+...
+```
+
+### Complete program
+
+```python
+x = tf.constant([[1], [2], [3], [4]], dtype=tf.float32)
+y_true = tf.constant([[0], [-1], [-2], [-3]], dtype=tf.float32)
+
+linear_model = tf.layers.Dense(units=1)
+
+y_pred = linear_model(x)
+loss = tf.losses.mean_squared_error(labels=y_true, predictions=y_pred)
+
+optimizer = tf.train.GradientDescentOptimizer(0.01)
+train = optimizer.minimize(loss)
+
+init = tf.global_variables_initializer()
+
+sess = tf.Session()
+sess.run(init)
+for i in range(100):
+  _, loss_value = sess.run((train, loss))
+  print(loss_value)
+
+print(sess.run(y_pred))
+```
+
+## Next steps
+
+To learn more about building models with TensorFlow consider the following:
+
+* @{$get_started/custom_estimators$Custom Estimators}, to learn how to build
+  customized models with TensorFlow. Your knowledge of TensorFlow Core will
+  help you understand and debug your own models.
+
+If you want to learn more about the inner workings of TensorFlow consider the
+following documents, which go into more depth on many of the topics discussed
+here:
+
+* @{$graphs}
+* @{$tensors}
+* @{$variables}
+
+
diff --git a/tensorflow/docs_src/programmers_guide/saved_model.md b/tensorflow/docs_src/programmers_guide/saved_model.md
index 54693f3d4d356da93e6e31595d04ed58e173e061..fa7a94cc0686bb86c8b7033589a4b2da0e02c87c 100644
--- a/tensorflow/docs_src/programmers_guide/saved_model.md
+++ b/tensorflow/docs_src/programmers_guide/saved_model.md
@@ -349,10 +349,10 @@ SavedModel format. This section explains how to:
 
 ### Preparing serving inputs
 
-During training, an @{$input_fn$`input_fn()`} ingests data and prepares it for
-use by the model.  At serving time, similarly, a `serving_input_receiver_fn()`
-accepts inference requests and prepares them for the model.  This function
-has the following purposes:
+During training, an @{$premade_estimators#input_fn$`input_fn()`} ingests data
+and prepares it for use by the model.  At serving time, similarly, a
+`serving_input_receiver_fn()` accepts inference requests and prepares them for
+the model.  This function has the following purposes:
 
 *  To add placeholders to the graph that the serving system will feed
    with inference requests.
@@ -479,10 +479,10 @@ does not specify one.
 ### Serving the exported model locally
 
 For local deployment, you can serve your model using
-[TensorFlow Serving](http://github.com/tensorflow/serving), an open-source project that loads a
-SavedModel and exposes it as a [gRPC](http://www.grpc.io/) service.
+[TensorFlow Serving](https://github.com/tensorflow/serving), an open-source project that loads a
+SavedModel and exposes it as a [gRPC](https://www.grpc.io/) service.
 
-First, [install TensorFlow Serving](http://github.com/tensorflow/serving).
+First, [install TensorFlow Serving](https://github.com/tensorflow/serving).
 
 Then build and run the local model server, substituting `$export_dir_base` with
 the path to the SavedModel you exported above:
diff --git a/tensorflow/docs_src/get_started/summaries_and_tensorboard.md b/tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md
similarity index 96%
rename from tensorflow/docs_src/get_started/summaries_and_tensorboard.md
rename to tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md
index ce5db079ba3a502ffdec96191b03a8b951ac3db6..05dfdfdc4d2257fc680e7fa99b666ef86e3bef09 100644
--- a/tensorflow/docs_src/get_started/summaries_and_tensorboard.md
+++ b/tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md
@@ -76,7 +76,7 @@ data than you need, though. Instead, consider running the merged summary op
 every `n` steps.
 
 The code example below is a modification of the
-@{$beginners$simple MNIST tutorial},
+@{$layers$simple MNIST tutorial},
 in which we have added some summary ops, and run them every ten steps. If you
 run this and then launch `tensorboard --logdir=/tmp/tensorflow/mnist`, you'll be able
 to visualize statistics, such as how the weights or accuracy varied during
@@ -137,12 +137,10 @@ with tf.name_scope('cross_entropy'):
   #
   # can be numerically unstable.
   #
-  # So here we use tf.nn.softmax_cross_entropy_with_logits on the
-  # raw outputs of the nn_layer above, and then average across
-  # the batch.
-  diff = tf.nn.softmax_cross_entropy_with_logits(targets=y_, logits=y)
+  # So here we use tf.losses.sparse_softmax_cross_entropy on the
+  # raw logit outputs of the nn_layer above.
   with tf.name_scope('total'):
-    cross_entropy = tf.reduce_mean(diff)
+    cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=y)
 tf.summary.scalar('cross_entropy', cross_entropy)
 
 with tf.name_scope('train'):
diff --git a/tensorflow/docs_src/get_started/tensorboard_histograms.md b/tensorflow/docs_src/programmers_guide/tensorboard_histograms.md
similarity index 100%
rename from tensorflow/docs_src/get_started/tensorboard_histograms.md
rename to tensorflow/docs_src/programmers_guide/tensorboard_histograms.md
diff --git a/tensorflow/docs_src/programmers_guide/tensors.md b/tensorflow/docs_src/programmers_guide/tensors.md
index 47d4db2a568c9f8009982e44a85e44f0250860c1..58a80d533927e4f0d1458f87406914c1efa00605 100644
--- a/tensorflow/docs_src/programmers_guide/tensors.md
+++ b/tensorflow/docs_src/programmers_guide/tensors.md
@@ -112,8 +112,8 @@ For example, the following method programmatically determines the rank
 of the `tf.Tensor` defined in the previous section:
 
 ```python
-r = tf.rank(my3d)
-# After the graph runs, r will hold the value 3.
+r = tf.rank(my_image)
+# After the graph runs, r will hold the value 4.
 ```
 
 ### Referring to `tf.Tensor` slices
diff --git a/tensorflow/docs_src/tutorials/using_gpu.md b/tensorflow/docs_src/programmers_guide/using_gpu.md
similarity index 99%
rename from tensorflow/docs_src/tutorials/using_gpu.md
rename to tensorflow/docs_src/programmers_guide/using_gpu.md
index de8d88ce766cb1314cadd62e3f3e26f8cd36d1b9..c429ca4750753278e4736650a08fd0c71e0d9fad 100644
--- a/tensorflow/docs_src/tutorials/using_gpu.md
+++ b/tensorflow/docs_src/programmers_guide/using_gpu.md
@@ -172,7 +172,7 @@ If you would like to run TensorFlow on multiple GPUs, you can construct your
 model in a multi-tower fashion where each tower is assigned to a different GPU.
 For example:
 
-```
+``` python
 # Creates a graph.
 c = []
 for d in ['/device:GPU:2', '/device:GPU:3']:
diff --git a/tensorflow/docs_src/programmers_guide/variables.md b/tensorflow/docs_src/programmers_guide/variables.md
index 16753c931f151ea6d3ce7cd465bf98d23cde78ae..64250738056043e236b5eb236bcbf29375655260 100644
--- a/tensorflow/docs_src/programmers_guide/variables.md
+++ b/tensorflow/docs_src/programmers_guide/variables.md
@@ -205,7 +205,7 @@ methods:
 v = tf.get_variable("v", shape=(), initializer=tf.zeros_initializer())
 assignment = v.assign_add(1)
 tf.global_variables_initializer().run()
-assignment.run()
+sess.run(assignment)  # or assignment.op.run(), or assignment.eval()
 ```
 
 Most TensorFlow optimizers have specialized ops that efficiently update the
diff --git a/tensorflow/docs_src/programmers_guide/version_compat.md b/tensorflow/docs_src/programmers_guide/version_compat.md
index d3e8e425091aac52b435479c4086bf7a4043dd19..a28f1385c87c7a083ee96977c5ab268c6977e17e 100644
--- a/tensorflow/docs_src/programmers_guide/version_compat.md
+++ b/tensorflow/docs_src/programmers_guide/version_compat.md
@@ -60,7 +60,7 @@ patch versions.  The public APIs consist of
     * [`tensor_shape`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor_shape.proto)
     * [`types`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/types.proto)
 
-## What is *not* covered
+## What is *not* covered {not_covered}
 
 Some API functions are explicitly marked as "experimental" and can change in
 backward incompatible ways between minor releases. These include:
diff --git a/tensorflow/docs_src/tutorials/audio_recognition.md b/tensorflow/docs_src/tutorials/audio_recognition.md
index 336f4d9c18b45cda2441bc7a83e9698bbd618d22..7d79f433c41b42a268816d8277ea69b0d62a04f3 100644
--- a/tensorflow/docs_src/tutorials/audio_recognition.md
+++ b/tensorflow/docs_src/tutorials/audio_recognition.md
@@ -246,7 +246,7 @@ results as in your server testing.
 The demo app updates its UI list of results automatically based on the labels
 text file you copy into assets alongside your frozen graph, which means you can
 easily try out different models without needing to make any code changes. You
-will need to updaye `LABEL_FILENAME` and `MODEL_FILENAME` to point to the files
+will need to update `LABEL_FILENAME` and `MODEL_FILENAME` to point to the files
 you've added if you change the paths though.
 
 ## How does this Model Work?
diff --git a/tensorflow/docs_src/tutorials/deep_cnn.md b/tensorflow/docs_src/tutorials/deep_cnn.md
index 6f802fd106d0e7cc8b2049af2548c51803b43195..679754020470dddfcffa76e62ca8f55a439ec4f5 100644
--- a/tensorflow/docs_src/tutorials/deep_cnn.md
+++ b/tensorflow/docs_src/tutorials/deep_cnn.md
@@ -195,9 +195,8 @@ The usual method for training a network to perform N-way classification is
 aka. *softmax regression*. Softmax regression applies a
 @{tf.nn.softmax$softmax} nonlinearity to the
 output of the network and calculates the
-@{tf.nn.softmax_cross_entropy_with_logits$cross-entropy}
-between the normalized predictions and a
-@{tf.sparse_to_dense$1-hot encoding} of the label.
+@{tf.nn.sparse_softmax_cross_entropy_with_logits$cross-entropy}
+between the normalized predictions and the label index.
 For regularization, we also apply the usual
 @{tf.nn.l2_loss$weight decay} losses to all learned
 variables.  The objective function for the model is the sum of the cross entropy
diff --git a/tensorflow/docs_src/tutorials/image_recognition.md b/tensorflow/docs_src/tutorials/image_recognition.md
index 32257f87d6662f44536f45510b6a7c82628de2ff..332bcf54f02e6e3c7d805746011dfab642943cfe 100644
--- a/tensorflow/docs_src/tutorials/image_recognition.md
+++ b/tensorflow/docs_src/tutorials/image_recognition.md
@@ -450,9 +450,7 @@ covering them.
 
 To find out more about implementing convolutional neural networks, you can jump
 to the TensorFlow @{$deep_cnn$deep convolutional networks tutorial},
-or start a bit more gently with our
-@{$beginners$ML beginner} or @{$pros$ML expert}
-MNIST starter tutorials. Finally, if you want to get up to speed on research
-in this area, you can
+or start a bit more gently with our @{$layers$MNIST starter tutorial}.
+Finally, if you want to get up to speed on research in this area, you can
 read the recent work of all the papers referenced in this tutorial.
 
diff --git a/tensorflow/docs_src/tutorials/image_retraining.md b/tensorflow/docs_src/tutorials/image_retraining.md
index ad565e6d8be5e1e1c0efe5993608a4c1083e562b..df15bc0a9c3763aa51c2fc8cf36ce9fc3544ae68 100644
--- a/tensorflow/docs_src/tutorials/image_retraining.md
+++ b/tensorflow/docs_src/tutorials/image_retraining.md
@@ -44,8 +44,14 @@ following command (these examples are not included in the installation):
 
 ```sh
 git clone https://github.com/tensorflow/tensorflow
+```
+
+Then checkout the version of the tensorflow repository matching your
+installation and this tutorial as follows:
 
+``` sh
 cd tensorflow
+git checkout {version}
 ```
 
 In the simplest cases the retrainer can then be run like this:
@@ -384,7 +390,7 @@ image size that your model expects, as follows:
 python tensorflow/examples/label_image/label_image.py \
 --graph=/tmp/output_graph.pb --labels=/tmp/output_labels.txt \
 --input_layer=input \
---output_layer=final_result:0 \
+--output_layer=final_result \
 --input_height=224 --input_width=224 \
 --input_mean=128 --input_std=128 \
 --image=$HOME/flower_photos/daisy/21652746_cc379e0eea_m.jpg
diff --git a/tensorflow/docs_src/tutorials/index.md b/tensorflow/docs_src/tutorials/index.md
index a34dbd69569be9cd234e98009ed148080fbbdb70..8c697e48e550c4e425db33bab7257532d209ac7a 100644
--- a/tensorflow/docs_src/tutorials/index.md
+++ b/tensorflow/docs_src/tutorials/index.md
@@ -1,53 +1,60 @@
 # Tutorials
 
+
 This section contains tutorials demonstrating how to do specific tasks
 in TensorFlow.  If you are new to TensorFlow, we recommend reading the
-documents in the "Get Started" section before reading these tutorials.
+documents in the "@{$get_started$Get Started}" section before reading
+these tutorials.
 
-The following tutorial explains the interaction of CPUs and GPUs on a
-TensorFlow system:
+## Images
 
-  * @{$using_gpu$Using GPUs}
+These tutorials cover different aspects of image recognition:
 
-The following tutorials cover different aspects of image recognition:
+  * @{$layers}, which introduces convolutional neural networks (CNNs) and
+    demonstrates how to build a CNN in TensorFlow.
+  * @{$image_recognition}, which introduces the field of image recognition and
+    uses a pre-trained model (Inception) for recognizing images.
+  * @{$image_retraining}, which has a wonderfully self-explanatory title.
+  * @{$deep_cnn}, which demonstrates how to build a small CNN for recognizing
+    images.  This tutorial is aimed at advanced TensorFlow users.
 
-  * @{$image_recognition$Image Recognition}, which introduces the field of
-    image recognition and a model (Inception) for recognizing images.
-  * @{$image_retraining$How to Retrain Inception's Final Layer for New Categories},
-    which has a wonderfully self-explanatory title.
-  * @{$layers$A Guide to TF Layers: Building a Convolutional Neural Network},
-    which introduces convolutional neural networks (CNNs) and demonstrates how
-    to build a CNN in TensorFlow.
-  * @{$deep_cnn$Convolutional Neural Networks}, which demonstrates how to
-    build a small CNN for recognizing images.  This tutorial is aimed at
-    advanced TensorFlow users.
 
-The following tutorials focus on machine learning problems in human language:
+## Sequences
 
-  * @{$word2vec$Vector Representations of Words}, which demonstrates how to
-    create an embedding for words.
-  * @{$recurrent$Recurrent Neural Networks}, which demonstrates how to use a
+These tutorials focus on machine learning problems dealing with sequence data.
+
+  * @{$recurrent}, which demonstrates how to use a
     recurrent neural network to predict the next word in a sentence.
-  * @{$seq2seq$Sequence-to-Sequence Models}, which demonstrates how to use a
+  * @{$seq2seq}, which demonstrates how to use a
     sequence-to-sequence model to translate text from English to French.
+  * @{$recurrent_quickdraw}
+    builds a classification model for drawings, directly from the sequence of
+    pen strokes.
+  * @{$audio_recognition}, which shows how to
+    build a basic speech recognition network.
 
-The following tutorials focus on linear models:
+## Data representation
 
-  * @{$linear$Large-Scale Linear Models with TensorFlow}, which introduces
-    linear models and demonstrates how to build them with the high-level API.
-  * @{$wide$TensorFlow Linear Model Tutorial}, which demonstrates how to solve
-    a binary classification problem in TensorFlow.
-  * @{$wide_and_deep$TensorFlow Wide & Deep Learning Tutorial}, which explains
-    how to use the high-level API to jointly train both a wide linear model
-    and a deep feed-forward neural network.
-  * @{$kernel_methods$Improving Linear Models Using Explicit Kernel Methods},
+These tutorials demonstrate various data representations that can be used in
+TensorFlow.
+
+  * @{$wide}, uses
+    @{tf.feature_column$feature columns} to feed a variety of data types
+    to linear model, to solve a classification problem.
+  * @{$wide_and_deep}, builds on the
+    above linear model tutorial, adding a deep feed-forward neural network
+    component and a DNN-compatible data representation.
+  * @{$word2vec}, which demonstrates how to
+    create an embedding for words.
+  * @{$kernel_methods},
     which shows how to improve the quality of a linear model by using explicit
     kernel mappings.
-  * @{$audio_recognition$Simple Audio Recognition}, which shows how to
-    build a basic speech recognition network.
 
-Although TensorFlow specializes in machine learning, you may also use
-TensorFlow to solve other kinds of math problems.  For example:
+## Non Machine Learning
+
+Although TensorFlow specializes in machine learning, the core of TensorFlow is
+a powerful numeric computation system which you can also use to solve other
+kinds of math problems.  For example:
 
-  * @{$mandelbrot$Mandelbrot Set}
-  * @{$pdes$Partial Differential Equations}
+  * @{$mandelbrot}
+  * @{$pdes}
diff --git a/tensorflow/docs_src/tutorials/kernel_methods.md b/tensorflow/docs_src/tutorials/kernel_methods.md
index 324c34fdfa84d922f298d87d77e8e1d635f876ae..63f408c2ca304d6345ffff459b799b011f8d8035 100644
--- a/tensorflow/docs_src/tutorials/kernel_methods.md
+++ b/tensorflow/docs_src/tutorials/kernel_methods.md
@@ -1,5 +1,10 @@
 # Improving Linear Models Using Explicit Kernel Methods
 
+Note: This document uses a deprecated version of ${tf.estimator},
+which has a ${tf.contrib.learn.estimator$different interface}.
+It also uses other `contrib` methods whose
+${$version_compat#not_covered$API may not be stable}.
+
 In this tutorial, we demonstrate how combining (explicit) kernel methods with
 linear models can drastically increase the latters' quality of predictions
 without significantly increasing training and inference times. Unlike dual
@@ -44,18 +49,18 @@ respectively. Each split contains one numpy array for images (with shape
 tutorial, we only use the train and validation splits to train and evaluate our
 models respectively.
 
-In order to feed data to a tf.contrib.learn Estimator, it is helpful to convert
+In order to feed data to a `tf.contrib.learn Estimator`, it is helpful to convert
 it to Tensors. For this, we will use an `input function` which adds Ops to the
 TensorFlow graph that, when executed, create mini-batches of Tensors to be used
 downstream. For more background on input functions, check
-@{$get_started/input_fn$Building Input Functions with tf.contrib.learn}. In this
-example, we will use the `tf.train.shuffle_batch` Op which, besides converting
-numpy arrays to Tensors, allows us to specify the batch_size and whether to
-randomize the input every time the input_fn Ops are executed (randomization
-typically expedites convergence during training). The full code for loading and
-preparing the data is shown in the snippet below. In this example, we use
-mini-batches of size 256 for training and the entire sample (5K entries) for
-evaluation. Feel free to experiment with different batch sizes.
+@{$get_started/premade_estimators#input_fn$this section on input functions}.
+In this example, we will use the `tf.train.shuffle_batch` Op which, besides
+converting numpy arrays to Tensors, allows us to specify the batch_size and
+whether to randomize the input every time the input_fn Ops are executed
+(randomization typically expedites convergence during training). The full code
+for loading and preparing the data is shown in the snippet below. In this
+example, we use mini-batches of size 256 for training and the entire sample
+(5K entries) for evaluation. Feel free to experiment with different batch sizes.
 
 ```python
 import numpy as np
diff --git a/tensorflow/docs_src/tutorials/layers.md b/tensorflow/docs_src/tutorials/layers.md
index e808a3677f2a3e89597ef82cc86dd3646775d693..b898cbe29c2bac9ade341fe3b3566e42e133fc5b 100644
--- a/tensorflow/docs_src/tutorials/layers.md
+++ b/tensorflow/docs_src/tutorials/layers.md
@@ -169,9 +169,7 @@ def cnn_model_fn(features, labels, mode):
     return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
 
   # Calculate Loss (for both TRAIN and EVAL modes)
-  onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10)
-  loss = tf.losses.softmax_cross_entropy(
-      onehot_labels=onehot_labels, logits=logits)
+  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 
   # Configure the Training Op (for TRAIN mode)
   if mode == tf.estimator.ModeKeys.TRAIN:
@@ -192,7 +190,7 @@ def cnn_model_fn(features, labels, mode):
 The following sections (with headings corresponding to each code block above)
 dive deeper into the `tf.layers` code used to create each layer, as well as how
 to calculate loss, configure the training op, and generate predictions. If
-you're already experienced with CNNs and @{$extend/estimators$TensorFlow `Estimator`s},
+you're already experienced with CNNs and @{$get_started/custom_estimators$TensorFlow `Estimator`s},
 and find the above code intuitive, you may want to skim these sections or just
 skip ahead to ["Training and Evaluating the CNN MNIST
 Classifier"](#training-and-evaluating-the-cnn-mnist-classifier).
@@ -536,8 +534,8 @@ if mode == tf.estimator.ModeKeys.TRAIN:
 ```
 
 > Note: For a more in-depth look at configuring training ops for Estimator model
-> functions, see @{$extend/estimators#defining-the-training-op-for-the-model$"Defining
-> the training op for the model"} in the @{$extend/estimators$"Creating Estimations in
+> functions, see @{$get_started/custom_estimators#defining-the-training-op-for-the-model$"Defining
+> the training op for the model"} in the @{$get_started/custom_estimators$"Creating Estimations in
 > tf.estimator"} tutorial.
 
 ### Add evaluation metrics
@@ -601,7 +599,7 @@ be saved (here, we specify the temp directory `/tmp/mnist_convnet_model`, but
 feel free to change to another directory of your choice).
 
 > Note: For an in-depth walkthrough of the TensorFlow `Estimator` API, see the
-> tutorial @{$extend/estimators$"Creating Estimators in tf.estimator."}
+> tutorial @{$get_started/custom_estimators$"Creating Estimators in tf.estimator."}
 
 ### Set Up a Logging Hook {#set_up_a_logging_hook}
 
@@ -720,10 +718,9 @@ Here, we've achieved an accuracy of 97.3% on our test data set.
 To learn more about TensorFlow Estimators and CNNs in TensorFlow, see the
 following resources:
 
-*   @{$extend/estimators$Creating Estimators in tf.estimator}. An
-    introduction to the TensorFlow Estimator API, which walks through
+*   @{$get_started/custom_estimators$Creating Estimators in tf.estimator}
+    provides an introduction to the TensorFlow Estimator API. It walks through
     configuring an Estimator, writing a model function, calculating loss, and
     defining a training op.
-*   @{$pros#build-a-multilayer-convolutional-network$Deep MNIST for Experts: Building a Multilayer CNN}. Walks
-    through how to build a MNIST CNN classification model *without layers* using
-    lower-level TensorFlow operations.
+*   @{$deep_cnn} walks through how to build a MNIST CNN classification model
+    *without estimators* using lower-level TensorFlow operations.
diff --git a/tensorflow/docs_src/tutorials/leftnav_files b/tensorflow/docs_src/tutorials/leftnav_files
index 5a5d6ca558867e1c8f3dca221a98ca7c0a7ee986..41ffdc86010fb8407889df26eefa5fa59952c5da 100644
--- a/tensorflow/docs_src/tutorials/leftnav_files
+++ b/tensorflow/docs_src/tutorials/leftnav_files
@@ -1,16 +1,23 @@
 index.md
-using_gpu.md
+
+### Images
+layers.md
 image_recognition.md
 image_retraining.md
-layers.md
 deep_cnn.md
-word2vec.md
+
+### Sequences
 recurrent.md
 seq2seq.md
-linear.md
+recurrent_quickdraw.md
+audio_recognition.md
+
+### Data Representation
 wide.md
 wide_and_deep.md
+word2vec.md
 kernel_methods.md
-audio_recognition.md
+
+### Non-ML
 mandelbrot.md
 pdes.md
diff --git a/tensorflow/docs_src/tutorials/linear.md b/tensorflow/docs_src/tutorials/linear.md
index d333d01279067de47819410795505f731e14fed3..265ded877d1ff9fb0b1cc2ad678729a3b7247aa8 100644
--- a/tensorflow/docs_src/tutorials/linear.md
+++ b/tensorflow/docs_src/tutorials/linear.md
@@ -1,36 +1,37 @@
 # Large-scale Linear Models with TensorFlow
 
-The tf.estimator API provides (among other things) a rich set of tools for
+@{tf.estimator$Estimators} provides (among other things) a rich set of tools for
 working with linear models in TensorFlow. This document provides an overview of
 those tools. It explains:
 
-   * what a linear model is.
-   * why you might want to use a linear model.
-   * how tf.estimator makes it easy to build linear models in TensorFlow.
-   * how you can use tf.estimator to combine linear models with
-   deep learning to get the advantages of both.
+   * What a linear model is.
+   * Why you might want to use a linear model.
+   * How Estimators make it easy to build linear models in TensorFlow.
+   * How you can use Estimators to combine linear models with.
+     deep learning to get the advantages of both.
 
-Read this overview to decide whether the tf.estimator linear model tools might
+Read this overview to decide whether the Estimator's linear model tools  might
 be useful to you. Then do the @{$wide$Linear Models tutorial} to
 give it a try. This overview uses code samples from the tutorial, but the
 tutorial walks through the code in greater detail.
 
 To understand this overview it will help to have some familiarity
-with basic machine learning concepts, and also with @{$get_started/estimator$`tf.estimator`}.
+with basic machine learning concepts, and also with
+@{$get_started/premade_estimators$Estimators}.
 
 [TOC]
 
 ## What is a linear model?
 
-A *linear model* uses a single weighted sum of features to make a prediction.
+A **linear model** uses a single weighted sum of features to make a prediction.
 For example, if you have [data](https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.names)
 on age, years of education, and weekly hours of
-work for a population, you can learn weights for each of those numbers so that
+work for a population, a model can learn weights for each of those numbers so that
 their weighted sum estimates a person's salary. You can also use linear models
 for classification.
 
 Some linear models transform the weighted sum into a more convenient form. For
-example, *logistic regression* plugs the weighted sum into the logistic
+example, [**logistic regression**](https://developers.google.com/machine-learning/glossary/#logistic_regression) plugs the weighted sum into the logistic
 function to turn the output into a value between 0 and 1. But you still just
 have one weight for each input feature.
 
@@ -51,10 +52,10 @@ Linear models:
    * provide an excellent starting point for learning about machine learning.
    * are widely used in industry.
 
-## How does tf.estimator help you build linear models?
+## How do Estimators help you build linear models?
 
 You can build a linear model from scratch in TensorFlow without the help of a
-special API. But tf.estimator provides some tools that make it easier to build
+special API. But Estimators provides some tools that make it easier to build
 effective large-scale linear models.
 
 ### Feature columns and transformations
@@ -86,10 +87,10 @@ become [0, 1, 0] and 'green' would become [0, 0, 1]. These vectors are called
 "sparse" because they may be very long, with many zeros, when the set of
 possible values is very large (such as all English words).
 
-While you don't need to use categorical columns to use tf.estimator linear
-models, one of the strengths of linear models is their ability to deal with
-large sparse vectors. Sparse features are a primary use case for the
-tf.estimator linear model tools.
+While you don't need to use categorical columns to use the linear model tools
+provided by Estimators, one of the strengths of linear models is their ability
+to deal with large sparse vectors. Sparse features are a primary use case for
+the linear model tools provided by Estimators.
 
 ##### Encoding sparse columns
 
@@ -173,7 +174,7 @@ the data itself. You provide the data through an input function.
 The input function must return a dictionary of tensors. Each key corresponds to
 the name of a `FeatureColumn`. Each key's value is a tensor containing the
 values of that feature for all data instances. See
-@{$input_fn$Building Input Functions with tf.estimator} for a
+@{$premade_estimators#input_fn} for a
 more comprehensive look at input functions, and `input_fn` in the
 [linear models tutorial code](https://github.com/tensorflow/models/tree/master/official/wide_deep/wide_deep.py)
 for an example implementation of an input function.
@@ -220,7 +221,7 @@ for key in sorted(results):
 
 ### Wide and deep learning
 
-The tf.estimator API also provides an estimator class that lets you jointly
+The `tf.estimator` module also provides an estimator class that lets you jointly
 train a linear model and a deep neural network. This novel approach combines the
 ability of linear models to "memorize" key features with the generalization
 ability of neural nets. Use `tf.estimator.DNNLinearCombinedClassifier` to
diff --git a/tensorflow/docs_src/tutorials/recurrent.md b/tensorflow/docs_src/tutorials/recurrent.md
index 3bae9bb457a0696722d239e664207a4d8021f0d8..14da2c8785276abb34d6959d738f5b39e6c6a2e8 100644
--- a/tensorflow/docs_src/tutorials/recurrent.md
+++ b/tensorflow/docs_src/tutorials/recurrent.md
@@ -57,6 +57,7 @@ important to note that `current_batch_of_words` does not correspond to a
 TensorFlow will automatically sum the gradients of each batch for you.
 
 For example:
+
 ```
  t=0  t=1    t=2  t=3     t=4
 [The, brown, fox, is,     quick]
diff --git a/tensorflow/docs_src/tutorials/recurrent_quickdraw.md b/tensorflow/docs_src/tutorials/recurrent_quickdraw.md
new file mode 100644
index 0000000000000000000000000000000000000000..e22536adb6f0b893602ff79612cfb01e10586a18
--- /dev/null
+++ b/tensorflow/docs_src/tutorials/recurrent_quickdraw.md
@@ -0,0 +1,410 @@
+# Recurrent Neural Networks for Drawing Classification
+
+[Quick, Draw!]: http://quickdraw.withgoogle.com
+
+[Quick, Draw!] is a game where a player is challenged to draw a number of
+objects and see if a computer can recognize the drawing.
+
+The recognition in [Quick, Draw!] is performed by a classifier that takes the
+user input, given as a sequence of strokes of points in x and y, and recognizes
+the object category that the user tried to draw.
+
+In this tutorial we'll show how to build an RNN-based recognizer for this
+problem. The model will use a combination of convolutional layers, LSTM layers,
+and a softmax output layer to classify the drawings:
+
+<center> ![RNN model structure](../images/quickdraw_model.png) </center>
+
+The figure above shows the structure of the model that we will build in this
+tutorial. The input is a drawing that is encoded as a sequence of strokes of
+points in x, y, and n, where n indicates whether a the point is the first point
+in a new stroke.
+
+Then, a series of 1-dimensional convolutions is applied. Then LSTM layers are
+applied and the sum of the outputs of all LSTM steps is fed into a softmax layer
+to make a classification decision among the classes of drawings that we know.
+
+This tutorial uses the data from actual [Quick, Draw!] games [that is publicly
+available](https://quickdraw.withgoogle.com/data). This dataset contains of 50M
+drawings in 345 categories.
+
+## Run the tutorial code
+
+To try the code for this tutorial:
+
+1.  @{$install$Install TensorFlow} if you haven't already.
+1.  Download the [tutorial code]
+(https://github.com/tensorflow/models/tree/master/tutorials/rnn/quickdraw/train_model.py).
+1.  [Download the data](#download-the-data) in `TFRecord` format from
+    [here](http://download.tensorflow.org/data/quickdraw_tutorial_dataset_v1.tar.gz) and unzip it. More details about [how to
+    obtain the original Quick, Draw!
+    data](#optional-download-the-full-quick-draw-data) and [how to convert that
+    to `TFRecord` files](#optional-converting-the-data) is available below.
+
+1.  Execute the tutorial code with the following command to train the RNN-based
+    model described in this tutorial. Make sure to adjust the paths to point to
+    the unzipped data from the download in step 3.
+
+```shell
+  python train_model.py \
+    --training_data=rnn_tutorial_data/training.tfrecord-?????-of-????? \
+    --eval_data=rnn_tutorial_data/eval.tfrecord-?????-of-????? \
+    --classes_file=rnn_tutorial_data/training.tfrecord.classes
+```
+
+## Tutorial details
+
+### Download the data
+
+We make the data that we use in this tutorial available as `TFRecord` files
+containing `TFExamples`. You can download the data from here:
+
+http://download.tensorflow.org/data/quickdraw_tutorial_dataset_v1.tar.gz
+
+Alternatively you can download the original data in `ndjson` format from the
+Google cloud and convert it to the `TFRecord` files containing `TFExamples`
+yourself as described in the next section.
+
+### Optional: Download the full Quick Draw Data
+
+The full [Quick, Draw!](https://quickdraw.withgoogle.com)
+[dataset](https://quickdraw.withgoogle.com/data) is available on Google Cloud
+Storage as [ndjson](http://ndjson.org/) files separated by category. You can
+[browse the list of files in Cloud
+Console](https://console.cloud.google.com/storage/quickdraw_dataset).
+
+To download the data we recommend using
+[gsutil](https://cloud.google.com/storage/docs/gsutil_install#install) to
+download the entire dataset. Note that the original .ndjson files require
+downloading ~22GB.
+
+Then use the following command to check that your gsutil installation works and
+that you can access the data bucket:
+
+```shell
+gsutil ls -r "gs://quickdraw_dataset/full/simplified/*"
+```
+
+which will output a long list of files like the following:
+
+```shell
+gs://quickdraw_dataset/full/simplified/The Eiffel Tower.ndjson
+gs://quickdraw_dataset/full/simplified/The Great Wall of China.ndjson
+gs://quickdraw_dataset/full/simplified/The Mona Lisa.ndjson
+gs://quickdraw_dataset/full/simplified/aircraft carrier.ndjson
+...
+```
+
+Then create a folder and download the dataset there.
+
+```shell
+mkdir rnn_tutorial_data
+cd rnn_tutorial_data
+gsutil -m cp "gs://quickdraw_dataset/full/simplified/*" .
+```
+
+This download will take a while and download a bit more than 23GB of data.
+
+### Optional: Converting the data
+
+To convert the `ndjson` files to
+@{$python/python_io#tfrecords_format_details$TFRecord} files containing
+${tf.train.Example} protos run the following command.
+
+```shell
+   python create_dataset.py --ndjson_path rnn_tutorial_data \
+      --output_path rnn_tutorial_data
+```
+
+This will store the data in 10 shards of
+@{$python/python_io#tfrecords_format_details$TFRecord} files with 10000 items
+per class for the training data and 1000 items per class as eval data.
+
+This conversion process is described in more detail in the following.
+
+The original QuickDraw data is formatted as `ndjson` files where each line
+contains a JSON object like the following:
+
+```json
+{"word":"cat",
+ "countrycode":"VE",
+ "timestamp":"2017-03-02 23:25:10.07453 UTC",
+ "recognized":true,
+ "key_id":"5201136883597312",
+ "drawing":[
+   [
+     [130,113,99,109,76,64,55,48,48,51,59,86,133,154,170,203,214,217,215,208,186,176,162,157,132],
+     [72,40,27,79,82,88,100,120,134,152,165,184,189,186,179,152,131,114,100,89,76,0,31,65,70]
+   ],[
+     [76,28,7],
+     [136,128,128]
+   ],[
+     [76,23,0],
+     [160,164,175]
+   ],[
+     [87,52,37],
+     [175,191,204]
+   ],[
+     [174,220,246,251],
+     [134,132,136,139]
+   ],[
+     [175,255],
+     [147,168]
+   ],[
+     [171,208,215],
+     [164,198,210]
+   ],[
+     [130,110,108,111,130,139,139,119],
+     [129,134,137,144,148,144,136,130]
+   ],[
+     [107,106],
+     [96,113]
+   ]
+ ]
+}
+```
+
+For our purpose of building a classifier we only care about the fields "`word`"
+and "`drawing`". While parsing the ndjson files, we process them line by line
+using a function that converts the strokes from the `drawing` field into a
+tensor of size `[number of points, 3]` containing the differences of consecutive
+points. This function also returns the class name as a string.
+
+```python
+def parse_line(ndjson_line):
+  """Parse an ndjson line and return ink (as np array) and classname."""
+  sample = json.loads(ndjson_line)
+  class_name = sample["word"]
+  inkarray = sample["drawing"]
+  stroke_lengths = [len(stroke[0]) for stroke in inkarray]
+  total_points = sum(stroke_lengths)
+  np_ink = np.zeros((total_points, 3), dtype=np.float32)
+  current_t = 0
+  for stroke in inkarray:
+    for i in [0, 1]:
+      np_ink[current_t:(current_t + len(stroke[0])), i] = stroke[i]
+    current_t += len(stroke[0])
+    np_ink[current_t - 1, 2] = 1  # stroke_end
+  # Preprocessing.
+  # 1. Size normalization.
+  lower = np.min(np_ink[:, 0:2], axis=0)
+  upper = np.max(np_ink[:, 0:2], axis=0)
+  scale = upper - lower
+  scale[scale == 0] = 1
+  np_ink[:, 0:2] = (np_ink[:, 0:2] - lower) / scale
+  # 2. Compute deltas.
+  np_ink = np_ink[1:, 0:2] - np_ink[0:-1, 0:2]
+  return np_ink, class_name
+```
+
+Since we want the data to be shuffled for writing we read from each of the
+category files in random order and write to a random shard.
+
+For the training data we read the first 10000 items for each class and for the
+eval data we read the next 1000 items for each class.
+
+This data is then reformatted into a tensor of shape `[num_training_samples,
+max_length, 3]`. Then we determine the bounding box of the original drawing in
+screen coordinates and normalize the size such that the drawing has unit height.
+
+<center> ![Size normalization](../images/quickdraw_sizenormalization.png) </center>
+
+Finally, we compute the differences between consecutive points and store these
+as a `VarLenFeature` in a
+[tensorflow.Example](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
+under the key `ink`. In addition we store the `class_index` as a single entry
+`FixedLengthFeature` and the `shape` of the `ink` as a `FixedLengthFeature` of
+length 2.
+
+### Defining the model
+
+To define the model we create a new `Estimator`. If you want to read more about
+estimators, we recommend @{$get_started/custom_estimators$this tutorial}.
+
+To build the model, we:
+
+1.  reshape the input back into the original shape - where the mini batch is
+    padded to the maximal length of its contents. In addition to the ink data we
+    also have the lengths for each example and the target class. This happens in
+    the function [`_get_input_tensors`](#-get-input-tensors).
+
+1.  pass the input through to a series of convolution layers in
+    [`_add_conv_layers`](#-add-conv-layers).
+
+1.  pass the output of the convolutions into a series of bidirectional LSTM
+    layers in [`_add_rnn_layers`](#-add-rnn-layers). At the end of that, the
+    outputs for each time step are summed up to have a compact, fixed length
+    embedding of the input.
+
+1.  classify this embedding using a softmax layer in
+    [`_add_fc_layers`](#-add-fc-layers).
+
+In code this looks like:
+
+```python
+inks, lengths, targets = _get_input_tensors(features, targets)
+convolved = _add_conv_layers(inks)
+final_state = _add_rnn_layers(convolved, lengths)
+logits =_add_fc_layers(final_state)
+```
+
+### _get_input_tensors
+
+To obtain the input features we first obtain the shape from the features dict
+and then create a 1D tensor of size `[batch_size]` containing the lengths of the
+input sequences. The ink is stored as a SparseTensor in the features dict which
+we convert into a dense tensor and then reshape to be `[batch_size, ?, 3]`. And
+finally, if targets were passed in we make sure they are stored as a 1D tensor
+of size `[batch_size]`
+
+In code this looks like this:
+
+```python
+shapes = features["shape"]
+lengths = tf.squeeze(
+    tf.slice(shapes, begin=[0, 0], size=[params["batch_size"], 1]))
+inks = tf.reshape(
+    tf.sparse_tensor_to_dense(features["ink"]),
+    [params["batch_size"], -1, 3])
+if targets is not None:
+  targets = tf.squeeze(targets)
+```
+
+### _add_conv_layers
+
+The desired number of convolution layers and the lengths of the filters is
+configured through the parameters `num_conv` and `conv_len` in the `params`
+dict.
+
+The input is a sequence where each point has dimensionality 3. We are going to
+use 1D convolutions where we treat the 3 input features as channels. That means
+that the input is a `[batch_size, length, 3]` tensor and the output will be a
+`[batch_size, length, number_of_filters]` tensor.
+
+```python
+convolved = inks
+for i in range(len(params.num_conv)):
+  convolved_input = convolved
+  if params.batch_norm:
+    convolved_input = tf.layers.batch_normalization(
+        convolved_input,
+        training=(mode == tf.estimator.ModeKeys.TRAIN))
+  # Add dropout layer if enabled and not first convolution layer.
+  if i > 0 and params.dropout:
+    convolved_input = tf.layers.dropout(
+        convolved_input,
+        rate=params.dropout,
+        training=(mode == tf.estimator.ModeKeys.TRAIN))
+  convolved = tf.layers.conv1d(
+      convolved_input,
+      filters=params.num_conv[i],
+      kernel_size=params.conv_len[i],
+      activation=None,
+      strides=1,
+      padding="same",
+      name="conv1d_%d" % i)
+return convolved, lengths
+```
+
+### _add_rnn_layers
+
+We pass the output from the convolutions into bidirectional LSTM layers for
+which we use a helper function from contrib.
+
+```python
+outputs, _, _ = contrib_rnn.stack_bidirectional_dynamic_rnn(
+    cells_fw=[cell(params.num_nodes) for _ in range(params.num_layers)],
+    cells_bw=[cell(params.num_nodes) for _ in range(params.num_layers)],
+    inputs=convolved,
+    sequence_length=lengths,
+    dtype=tf.float32,
+    scope="rnn_classification")
+```
+
+see the code for more details and how to use `CUDA` accelerated implementations.
+
+To create a compact, fixed-length embedding, we sum up the output of the LSTMs.
+We first zero out the regions of the batch where the sequences have no data.
+
+```python
+mask = tf.tile(
+    tf.expand_dims(tf.sequence_mask(lengths, tf.shape(outputs)[1]), 2),
+    [1, 1, tf.shape(outputs)[2]])
+zero_outside = tf.where(mask, outputs, tf.zeros_like(outputs))
+outputs = tf.reduce_sum(zero_outside, axis=1)
+```
+
+### _add_fc_layers
+
+The embedding of the input is passed into a fully connected layer which we then
+use as a softmax layer.
+
+```python
+tf.layers.dense(final_state, params.num_classes)
+```
+
+### Loss, predictions, and optimizer
+
+Finally, we need to add a loss, a training op, and predictions to create the
+`ModelFn`:
+
+```python
+cross_entropy = tf.reduce_mean(
+    tf.nn.sparse_softmax_cross_entropy_with_logits(
+        labels=targets, logits=logits))
+# Add the optimizer.
+train_op = tf.contrib.layers.optimize_loss(
+    loss=cross_entropy,
+    global_step=tf.train.get_global_step(),
+    learning_rate=params.learning_rate,
+    optimizer="Adam",
+    # some gradient clipping stabilizes training in the beginning.
+    clip_gradients=params.gradient_clipping_norm,
+    summaries=["learning_rate", "loss", "gradients", "gradient_norm"])
+predictions = tf.argmax(logits, axis=1)
+return model_fn_lib.ModelFnOps(
+    mode=mode,
+    predictions={"logits": logits,
+                 "predictions": predictions},
+    loss=cross_entropy,
+    train_op=train_op,
+    eval_metric_ops={"accuracy": tf.metrics.accuracy(targets, predictions)})
+```
+
+### Training and evaluating the model
+
+To train and evaluate the model we can rely on the functionalities of the
+`Estimator` APIs and easily run training and evaluation with the `Experiment`
+APIs:
+
+```python
+  estimator = tf.estimator.Estimator(
+      model_fn=model_fn,
+      model_dir=output_dir,
+      config=config,
+      params=model_params)
+  # Train the model.
+  tf.contrib.learn.Experiment(
+      estimator=estimator,
+      train_input_fn=get_input_fn(
+          mode=tf.contrib.learn.ModeKeys.TRAIN,
+          tfrecord_pattern=FLAGS.training_data,
+          batch_size=FLAGS.batch_size),
+      train_steps=FLAGS.steps,
+      eval_input_fn=get_input_fn(
+          mode=tf.contrib.learn.ModeKeys.EVAL,
+          tfrecord_pattern=FLAGS.eval_data,
+          batch_size=FLAGS.batch_size),
+      min_eval_frequency=1000)
+```
+
+Note that this tutorial is just a quick example on a relatively small dataset to
+get you familiar with the APIs of recurrent neural networks and estimators. Such
+models can be even more powerful if you try them on a large dataset.
+
+When training the model for 1M steps you can expect to get an accuracy of
+approximately of approximately 70% on the top-1 candidate. Note that this
+accuracy is sufficient to build the quickdraw game because of the game dynamics
+the user will be able to adjust their drawing until it is ready. Also, the game
+does not use the top-1 candidate only but accepts a drawing as correct if the
+target category shows up with a score better than a fixed threshold.
diff --git a/tensorflow/docs_src/tutorials/wide.md b/tensorflow/docs_src/tutorials/wide.md
index 68dda1f2222b4175cd891d727065c93da6a5e68f..dba6f54c52ca5bf2569c66ad055329708de3991c 100644
--- a/tensorflow/docs_src/tutorials/wide.md
+++ b/tensorflow/docs_src/tutorials/wide.md
@@ -55,7 +55,7 @@ and continuous columns:
 
 Here's a list of columns available in the Census Income dataset:
 
-| Column Name    | Type        | Description                       | {.sortable}
+| Column Name    | Type        | Description                       |
 | -------------- | ----------- | --------------------------------- |
 | age            | Continuous  | The age of the individual         |
 | workclass      | Categorical | The type of employer the          |
diff --git a/tensorflow/examples/android/README.md b/tensorflow/examples/android/README.md
index 79202a38d7199033a9fefa8c6ba71e383aa0bf19..30a26d13c5734c5cf4a3b565c793db3e093c8271 100644
--- a/tensorflow/examples/android/README.md
+++ b/tensorflow/examples/android/README.md
@@ -126,6 +126,10 @@ the Android NDK and SDK must be installed on your system.
 2.  The Android NDK is required to build the native (C/C++) TensorFlow code. The
     current recommended version is 14b, which may be found
     [here](https://developer.android.com/ndk/downloads/older_releases.html#ndk-14b-downloads).
+
+      * NDK 16, the revision released in November 2017, is **incompatible** with
+        Bazel. See [here](https://github.com/tensorflow/tensorflow/issues/14918).
+
 3.  The Android SDK and build tools may be obtained
     [here](https://developer.android.com/tools/revisions/build-tools.html), or
     alternatively as part of [Android
@@ -133,8 +137,16 @@ the Android NDK and SDK must be installed on your system.
     23 is required to build the TF Android demo (though it will run on API >= 21
     devices).
 
+      - The Android Studio SDK Manager's NDK installer will install the latest
+        revision of the NDK, which is **incompatible** with Bazel. You'll need
+        to download an older version manually, as (2) suggests.
+
 ##### Edit WORKSPACE
 
+NOTE: As long as you have the SDK and NDK installed, the `./configure` script
+will create these rules for you. Answer "Yes" when the script asks to
+automatically configure the `./WORKSPACE`.
+
 The Android entries in
 [`<workspace_root>/WORKSPACE`](../../../WORKSPACE#L19-L36) must be uncommented
 with the paths filled in appropriately depending on where you installed the NDK
@@ -156,7 +168,7 @@ download-models.gradle.
 
 **Optional**: If you wish to place the models in your assets manually, remove
 all of the `model_files` entries from the `assets` list in `tensorflow_demo`
-found in the `[BUILD](BUILD)` file. Then download and extract the archives
+found in the [`BUILD`](BUILD#L92) file. Then download and extract the archives
 yourself to the `assets` directory in the source tree:
 
 ```bash
diff --git a/tensorflow/examples/android/build.gradle b/tensorflow/examples/android/build.gradle
index 48f566f825d2714fe5970531e3d9c9f0f7ca940e..f7bdf8b816a8191770bc1ad59b890041b8e39912 100644
--- a/tensorflow/examples/android/build.gradle
+++ b/tensorflow/examples/android/build.gradle
@@ -28,8 +28,8 @@ buildscript {
     }
 
     dependencies {
-        classpath 'com.android.tools.build:gradle:2.3.0'
-        classpath 'org.apache.httpcomponents:httpclient:4.5.2'
+        classpath 'com.android.tools.build:gradle:3.0.1'
+        classpath 'org.apache.httpcomponents:httpclient:4.5.4'
     }
 }
 
@@ -75,7 +75,7 @@ apply plugin: 'com.android.application'
 
 android {
     compileSdkVersion 23
-    buildToolsVersion "25.0.2"
+    buildToolsVersion '26.0.2'
 
     if (nativeBuildSystem == 'cmake') {
         defaultConfig {
diff --git a/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.jar b/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.jar
new file mode 100644
index 0000000000000000000000000000000000000000..13372aef5e24af05341d49695ee84e5f9b594659
Binary files /dev/null and b/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.jar differ
diff --git a/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.properties b/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.properties
new file mode 100644
index 0000000000000000000000000000000000000000..bd9ee87db3742e9f8c62df2ec9a7852550d9bbc9
--- /dev/null
+++ b/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,6 @@
+#Sat Nov 18 15:06:47 CET 2017
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-4.1-all.zip
diff --git a/tensorflow/examples/android/gradlew b/tensorflow/examples/android/gradlew
new file mode 100644
index 0000000000000000000000000000000000000000..9d82f78915133e1c35a6ea51252590fb38efac2f
--- /dev/null
+++ b/tensorflow/examples/android/gradlew
@@ -0,0 +1,160 @@
+#!/usr/bin/env bash
+
+##############################################################################
+##
+##  Gradle start up script for UN*X
+##
+##############################################################################
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS=""
+
+APP_NAME="Gradle"
+APP_BASE_NAME=`basename "$0"`
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD="maximum"
+
+warn ( ) {
+    echo "$*"
+}
+
+die ( ) {
+    echo
+    echo "$*"
+    echo
+    exit 1
+}
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+case "`uname`" in
+  CYGWIN* )
+    cygwin=true
+    ;;
+  Darwin* )
+    darwin=true
+    ;;
+  MINGW* )
+    msys=true
+    ;;
+esac
+
+# Attempt to set APP_HOME
+# Resolve links: $0 may be a link
+PRG="$0"
+# Need this for relative symlinks.
+while [ -h "$PRG" ] ; do
+    ls=`ls -ld "$PRG"`
+    link=`expr "$ls" : '.*-> \(.*\)$'`
+    if expr "$link" : '/.*' > /dev/null; then
+        PRG="$link"
+    else
+        PRG=`dirname "$PRG"`"/$link"
+    fi
+done
+SAVED="`pwd`"
+cd "`dirname \"$PRG\"`/" >/dev/null
+APP_HOME="`pwd -P`"
+cd "$SAVED" >/dev/null
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+        # IBM's JDK on AIX uses strange locations for the executables
+        JAVACMD="$JAVA_HOME/jre/sh/java"
+    else
+        JAVACMD="$JAVA_HOME/bin/java"
+    fi
+    if [ ! -x "$JAVACMD" ] ; then
+        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+    fi
+else
+    JAVACMD="java"
+    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
+    MAX_FD_LIMIT=`ulimit -H -n`
+    if [ $? -eq 0 ] ; then
+        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
+            MAX_FD="$MAX_FD_LIMIT"
+        fi
+        ulimit -n $MAX_FD
+        if [ $? -ne 0 ] ; then
+            warn "Could not set maximum file descriptor limit: $MAX_FD"
+        fi
+    else
+        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
+    fi
+fi
+
+# For Darwin, add options to specify how the application appears in the dock
+if $darwin; then
+    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
+fi
+
+# For Cygwin, switch paths to Windows format before running java
+if $cygwin ; then
+    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
+    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
+    JAVACMD=`cygpath --unix "$JAVACMD"`
+
+    # We build the pattern for arguments to be converted via cygpath
+    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
+    SEP=""
+    for dir in $ROOTDIRSRAW ; do
+        ROOTDIRS="$ROOTDIRS$SEP$dir"
+        SEP="|"
+    done
+    OURCYGPATTERN="(^($ROOTDIRS))"
+    # Add a user-defined pattern to the cygpath arguments
+    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
+        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
+    fi
+    # Now convert the arguments - kludge to limit ourselves to /bin/sh
+    i=0
+    for arg in "$@" ; do
+        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
+        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option
+
+        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
+            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
+        else
+            eval `echo args$i`="\"$arg\""
+        fi
+        i=$((i+1))
+    done
+    case $i in
+        (0) set -- ;;
+        (1) set -- "$args0" ;;
+        (2) set -- "$args0" "$args1" ;;
+        (3) set -- "$args0" "$args1" "$args2" ;;
+        (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
+        (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
+        (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
+        (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
+        (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
+        (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
+    esac
+fi
+
+# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
+function splitJvmOpts() {
+    JVM_OPTS=("$@")
+}
+eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
+JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
+
+exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
diff --git a/tensorflow/examples/android/gradlew.bat b/tensorflow/examples/android/gradlew.bat
new file mode 100644
index 0000000000000000000000000000000000000000..8a0b282aa6885fb573c106b3551f7275c5f17e8e
--- /dev/null
+++ b/tensorflow/examples/android/gradlew.bat
@@ -0,0 +1,90 @@
+@if "%DEBUG%" == "" @echo off
+@rem ##########################################################################
+@rem
+@rem  Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS=
+
+set DIRNAME=%~dp0
+if "%DIRNAME%" == "" set DIRNAME=.
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if "%ERRORLEVEL%" == "0" goto init
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto init
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:init
+@rem Get command-line arguments, handling Windowz variants
+
+if not "%OS%" == "Windows_NT" goto win9xME_args
+if "%@eval[2+2]" == "4" goto 4NT_args
+
+:win9xME_args
+@rem Slurp the command line arguments.
+set CMD_LINE_ARGS=
+set _SKIP=2
+
+:win9xME_args_slurp
+if "x%~1" == "x" goto execute
+
+set CMD_LINE_ARGS=%*
+goto execute
+
+:4NT_args
+@rem Get arguments from the 4NT Shell from JP Software
+set CMD_LINE_ARGS=%$
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
+
+:end
+@rem End local scope for the variables with windows NT shell
+if "%ERRORLEVEL%"=="0" goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
+exit /b 1
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java
index 4e45f42d0c97ed9dad9f9702adc3c1efe658699f..8bd4abb154a8f8c74f2195d4acbb99d3d5d498ea 100644
--- a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java
+++ b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java
@@ -333,8 +333,12 @@ public abstract class CameraActivity extends Activity
           continue;
         }
 
-        useCamera2API = isHardwareLevelSupported(characteristics,
-            CameraCharacteristics.INFO_SUPPORTED_HARDWARE_LEVEL_FULL);
+        // Fallback to camera1 API for internal cameras that don't have full support.
+        // This should help with legacy situations where using the camera2 API causes
+        // distorted or otherwise broken previews.
+        useCamera2API = (facing == CameraCharacteristics.LENS_FACING_EXTERNAL)
+            || isHardwareLevelSupported(characteristics, 
+                                        CameraCharacteristics.INFO_SUPPORTED_HARDWARE_LEVEL_FULL);
         LOGGER.i("Camera API lv2?: %s", useCamera2API);
         return cameraId;
       }
diff --git a/tensorflow/examples/how_tos/reading_data/convert_to_records.py b/tensorflow/examples/how_tos/reading_data/convert_to_records.py
index a402eac053cb474db0fd90876501a9c13906ea82..c89e83956322cb87a4cf41c6b7172f03d941b429 100644
--- a/tensorflow/examples/how_tos/reading_data/convert_to_records.py
+++ b/tensorflow/examples/how_tos/reading_data/convert_to_records.py
@@ -55,12 +55,15 @@ def convert_to(data_set, name):
   with tf.python_io.TFRecordWriter(filename) as writer:
     for index in range(num_examples):
       image_raw = images[index].tostring()
-      example = tf.train.Example(features=tf.train.Features(feature={
-          'height': _int64_feature(rows),
-          'width': _int64_feature(cols),
-          'depth': _int64_feature(depth),
-          'label': _int64_feature(int(labels[index])),
-          'image_raw': _bytes_feature(image_raw)}))
+      example = tf.train.Example(
+          features=tf.train.Features(
+              feature={
+                  'height': _int64_feature(rows),
+                  'width': _int64_feature(cols),
+                  'depth': _int64_feature(depth),
+                  'label': _int64_feature(int(labels[index])),
+                  'image_raw': _bytes_feature(image_raw)
+              }))
       writer.write(example.SerializeToString())
 
 
diff --git a/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py b/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
index a9ed02dd1a60ad79c2943212155bad864a750a99..fa4c1c0da5f31863aa4d99b6ec84e1e50e1a1551 100644
--- a/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
+++ b/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py
@@ -45,9 +45,7 @@ TRAIN_FILE = 'train.tfrecords'
 VALIDATION_FILE = 'validation.tfrecords'
 
 
-def read_and_decode(filename_queue):
-  reader = tf.TFRecordReader()
-  _, serialized_example = reader.read(filename_queue)
+def decode(serialized_example):
   features = tf.parse_single_example(
       serialized_example,
       # Defaults are not specified since both keys are required.
@@ -60,22 +58,26 @@ def read_and_decode(filename_queue):
   # length mnist.IMAGE_PIXELS) to a uint8 tensor with shape
   # [mnist.IMAGE_PIXELS].
   image = tf.decode_raw(features['image_raw'], tf.uint8)
-  image.set_shape([mnist.IMAGE_PIXELS])
+  image.set_shape((mnist.IMAGE_PIXELS))
 
+  # Convert label from a scalar uint8 tensor to an int32 scalar.
+  label = tf.cast(features['label'], tf.int32)
+
+  return image, label
+
+def augment(image, label):
   # OPTIONAL: Could reshape into a 28x28 image and apply distortions
   # here.  Since we are not applying any distortions in this
   # example, and the next step expects the image to be flattened
   # into a vector, we don't bother.
+  return image, label
 
+def normalize(image, label):
   # Convert from [0, 255] -> [-0.5, 0.5] floats.
   image = tf.cast(image, tf.float32) * (1. / 255) - 0.5
 
-  # Convert label from a scalar uint8 tensor to an int32 scalar.
-  label = tf.cast(features['label'], tf.int32)
-
   return image, label
 
-
 def inputs(train, batch_size, num_epochs):
   """Reads input data num_epochs times.
 
@@ -91,31 +93,32 @@ def inputs(train, batch_size, num_epochs):
       in the range [-0.5, 0.5].
     * labels is an int32 tensor with shape [batch_size] with the true label,
       a number in the range [0, mnist.NUM_CLASSES).
-    Note that an tf.train.QueueRunner is added to the graph, which
-    must be run using e.g. tf.train.start_queue_runners().
+
+    This function creates a one_shot_iterator, meaning that it will only iterate
+    over the dataset once. On the other hand there is no special initialization
+    required.
   """
   if not num_epochs: num_epochs = None
   filename = os.path.join(FLAGS.train_dir,
                           TRAIN_FILE if train else VALIDATION_FILE)
 
   with tf.name_scope('input'):
-    filename_queue = tf.train.string_input_producer(
-        [filename], num_epochs=num_epochs)
+    # TFRecordDataset opens a protobuf and reads entries line by line
+    # could also be [list, of, filenames]
+    dataset = tf.data.TFRecordDataset(filename)
+    dataset = dataset.repeat(num_epochs)
 
-    # Even when reading in multiple threads, share the filename
-    # queue.
-    image, label = read_and_decode(filename_queue)
+    # map takes a python function and applies it to every sample
+    dataset = dataset.map(decode)
+    dataset = dataset.map(augment)
+    dataset = dataset.map(normalize)
 
-    # Shuffle the examples and collect them into batch_size batches.
-    # (Internally uses a RandomShuffleQueue.)
-    # We run this in two threads to avoid being a bottleneck.
-    images, sparse_labels = tf.train.shuffle_batch(
-        [image, label], batch_size=batch_size, num_threads=2,
-        capacity=1000 + 3 * batch_size,
-        # Ensures a minimum amount of shuffling of examples.
-        min_after_dequeue=1000)
+    #the parameter is the queue size
+    dataset = dataset.shuffle(1000 + 3 * batch_size)
+    dataset = dataset.batch(batch_size)
 
-    return images, sparse_labels
+    iterator = dataset.make_one_shot_iterator()
+  return iterator.get_next()
 
 
 def run_training():
@@ -124,16 +127,16 @@ def run_training():
   # Tell TensorFlow that the model will be built into the default Graph.
   with tf.Graph().as_default():
     # Input images and labels.
-    images, labels = inputs(train=True, batch_size=FLAGS.batch_size,
-                            num_epochs=FLAGS.num_epochs)
+    image_batch, label_batch = inputs(train=True, batch_size=FLAGS.batch_size,
+                               num_epochs=FLAGS.num_epochs)
 
     # Build a Graph that computes predictions from the inference model.
-    logits = mnist.inference(images,
+    logits = mnist.inference(image_batch,
                              FLAGS.hidden1,
                              FLAGS.hidden2)
 
     # Add to the Graph the loss calculation.
-    loss = mnist.loss(logits, labels)
+    loss = mnist.loss(logits, label_batch)
 
     # Add to the Graph operations that train the model.
     train_op = mnist.training(loss, FLAGS.learning_rate)
@@ -143,46 +146,32 @@ def run_training():
                        tf.local_variables_initializer())
 
     # Create a session for running operations in the Graph.
-    sess = tf.Session()
-
-    # Initialize the variables (the trained variables and the
-    # epoch counter).
-    sess.run(init_op)
-
-    # Start input enqueue threads.
-    coord = tf.train.Coordinator()
-    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
-
-    try:
-      step = 0
-      while not coord.should_stop():
-        start_time = time.time()
-
-        # Run one step of the model.  The return values are
-        # the activations from the `train_op` (which is
-        # discarded) and the `loss` op.  To inspect the values
-        # of your ops or variables, you may include them in
-        # the list passed to sess.run() and the value tensors
-        # will be returned in the tuple from the call.
-        _, loss_value = sess.run([train_op, loss])
-
-        duration = time.time() - start_time
-
-        # Print an overview fairly often.
-        if step % 100 == 0:
-          print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value,
+    with tf.Session() as sess:
+      # Initialize the variables (the trained variables and the
+      # epoch counter).
+      sess.run(init_op)
+      try:
+        step = 0
+        while True: #train until OutOfRangeError
+          start_time = time.time()
+
+          # Run one step of the model.  The return values are
+          # the activations from the `train_op` (which is
+          # discarded) and the `loss` op.  To inspect the values
+          # of your ops or variables, you may include them in
+          # the list passed to sess.run() and the value tensors
+          # will be returned in the tuple from the call.
+          _, loss_value = sess.run([train_op, loss])
+
+          duration = time.time() - start_time
+
+          # Print an overview fairly often.
+          if step % 100 == 0:
+            print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value,
                                                      duration))
-        step += 1
-    except tf.errors.OutOfRangeError:
-      print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step))
-    finally:
-      # When done, ask the threads to stop.
-      coord.request_stop()
-
-    # Wait for threads to finish.
-    coord.join(threads)
-    sess.close()
-
+          step += 1
+      except tf.errors.OutOfRangeError:
+        print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step))
 
 def main(_):
   run_training()
diff --git a/tensorflow/examples/image_retraining/retrain.py b/tensorflow/examples/image_retraining/retrain.py
index ebddfb20f4b60986fba1cdbfe3fcb184149b0a99..ec22684eaf63700c608c6ce45f22941555246b99 100644
--- a/tensorflow/examples/image_retraining/retrain.py
+++ b/tensorflow/examples/image_retraining/retrain.py
@@ -539,10 +539,8 @@ def get_random_cached_bottlenecks(sess, image_lists, how_many, category,
           sess, image_lists, label_name, image_index, image_dir, category,
           bottleneck_dir, jpeg_data_tensor, decoded_image_tensor,
           resized_input_tensor, bottleneck_tensor, architecture)
-      ground_truth = np.zeros(class_count, dtype=np.float32)
-      ground_truth[label_index] = 1.0
       bottlenecks.append(bottleneck)
-      ground_truths.append(ground_truth)
+      ground_truths.append(label_index)
       filenames.append(image_name)
   else:
     # Retrieve all bottlenecks.
@@ -555,10 +553,8 @@ def get_random_cached_bottlenecks(sess, image_lists, how_many, category,
             sess, image_lists, label_name, image_index, image_dir, category,
             bottleneck_dir, jpeg_data_tensor, decoded_image_tensor,
             resized_input_tensor, bottleneck_tensor, architecture)
-        ground_truth = np.zeros(class_count, dtype=np.float32)
-        ground_truth[label_index] = 1.0
         bottlenecks.append(bottleneck)
-        ground_truths.append(ground_truth)
+        ground_truths.append(label_index)
         filenames.append(image_name)
   return bottlenecks, ground_truths, filenames
 
@@ -610,10 +606,8 @@ def get_random_distorted_bottlenecks(
     bottleneck_values = sess.run(bottleneck_tensor,
                                  {resized_input_tensor: distorted_image_data})
     bottleneck_values = np.squeeze(bottleneck_values)
-    ground_truth = np.zeros(class_count, dtype=np.float32)
-    ground_truth[label_index] = 1.0
     bottlenecks.append(bottleneck_values)
-    ground_truths.append(ground_truth)
+    ground_truths.append(label_index)
   return bottlenecks, ground_truths
 
 
@@ -774,9 +768,8 @@ def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor,
         shape=[None, bottleneck_tensor_size],
         name='BottleneckInputPlaceholder')
 
-    ground_truth_input = tf.placeholder(tf.float32,
-                                        [None, class_count],
-                                        name='GroundTruthInput')
+    ground_truth_input = tf.placeholder(
+        tf.int64, [None], name='GroundTruthInput')
 
   # Organizing the following ops as `final_training_ops` so they're easier
   # to see in TensorBoard
@@ -823,10 +816,8 @@ def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor,
   tf.summary.histogram('activations', final_tensor)
 
   with tf.name_scope('cross_entropy'):
-    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
+    cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy(
         labels=ground_truth_input, logits=logits)
-    with tf.name_scope('total'):
-      cross_entropy_mean = tf.reduce_mean(cross_entropy)
 
   tf.summary.scalar('cross_entropy', cross_entropy_mean)
 
@@ -852,8 +843,7 @@ def add_evaluation_step(result_tensor, ground_truth_tensor):
   with tf.name_scope('accuracy'):
     with tf.name_scope('correct_prediction'):
       prediction = tf.argmax(result_tensor, 1)
-      correct_prediction = tf.equal(
-          prediction, tf.argmax(ground_truth_tensor, 1))
+      correct_prediction = tf.equal(prediction, ground_truth_tensor)
     with tf.name_scope('accuracy'):
       evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
   tf.summary.scalar('accuracy', evaluation_step)
@@ -1178,7 +1168,7 @@ def main(_):
     if FLAGS.print_misclassified_test_images:
       tf.logging.info('=== MISCLASSIFIED TEST IMAGES ===')
       for i, test_filename in enumerate(test_filenames):
-        if predictions[i] != test_ground_truth[i].argmax():
+        if predictions[i] != test_ground_truth[i]:
           tf.logging.info('%70s  %s' %
                           (test_filename,
                            list(image_lists.keys())[predictions[i]]))
diff --git a/tensorflow/examples/image_retraining/retrain_test.py b/tensorflow/examples/image_retraining/retrain_test.py
index 2de4c4ec99f87544bfda9d0fe5977f60742d82a0..8b8dd45fd72e3d29bdb7f6291cc53b912adf3644 100644
--- a/tensorflow/examples/image_retraining/retrain_test.py
+++ b/tensorflow/examples/image_retraining/retrain_test.py
@@ -87,7 +87,7 @@ class ImageRetrainingTest(test_util.TensorFlowTestCase):
   def testAddEvaluationStep(self):
     with tf.Graph().as_default():
       final = tf.placeholder(tf.float32, [1], name='final')
-      gt = tf.placeholder(tf.float32, [1], name='gt')
+      gt = tf.placeholder(tf.int64, [1], name='gt')
       self.assertIsNotNone(retrain.add_evaluation_step(final, gt))
 
   def testAddJpegDecoding(self):
diff --git a/tensorflow/examples/ios/.gitignore b/tensorflow/examples/ios/.gitignore
index e572b3012c600ab856ac8e5bd71e4291b1ba7bcf..dbabfb33bf11e0436d8900ba9f2d1ba6195a9a47 100644
--- a/tensorflow/examples/ios/.gitignore
+++ b/tensorflow/examples/ios/.gitignore
@@ -2,3 +2,6 @@ project.xcworkspace
 xcuserdata
 imagenet_comp_graph_label_strings.txt
 tensorflow_inception_graph.pb
+simple/data/LICENSE
+camera/data/LICENSE
+benchmark/data/LICENSE
diff --git a/tensorflow/examples/label_image/BUILD b/tensorflow/examples/label_image/BUILD
index 9207fc6332db9870fbb5e2b4bd6b77a5a24fbb23..2abbe9dacca79b8d6e516550e28a9b203b18f123 100644
--- a/tensorflow/examples/label_image/BUILD
+++ b/tensorflow/examples/label_image/BUILD
@@ -51,6 +51,16 @@ tf_cc_binary(
     }),
 )
 
+py_binary(
+    name = "label_image_py",
+    srcs = ["label_image.py"],
+    main = "label_image.py",
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow:tensorflow_py",
+    ],
+)
+
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/examples/label_image/README.md b/tensorflow/examples/label_image/README.md
index a9e44745e5cfa673c19d2c2fb434251b12d7aad6..cfd0132a7ae385d1a79c4b8f26b54244dcd3a087 100644
--- a/tensorflow/examples/label_image/README.md
+++ b/tensorflow/examples/label_image/README.md
@@ -73,10 +73,23 @@ Python than the Python code mentioned in the
 [Inception tutorial](https://www.tensorflow.org/tutorials/image_recognition/).
 and could be easier to add visualization or debug code.
 
-With tensorflow python package installed, you can run it like:
+
+`bazel-bin/tensorflow/examples/label_image/label_image_py` should be there after
+```bash
+$ bazel build tensorflow/examples/label_image/...
+```
+
+Run
+
+```bash
+$ bazel-bin/tensorflow/examples/label_image/label_image_py
+```
+
+Or, with tensorflow python package installed, you can run it like:
 ```bash
 $ python3 tensorflow/examples/label_image/label_image.py
 ```
+
 And get result similar to this:
 ```
 military uniform 0.834305
diff --git a/tensorflow/examples/label_image/label_image.py b/tensorflow/examples/label_image/label_image.py
index 39d09813375687fc954cab3d55ce997f8684da17..d62b73384c4969dc56a2f91d89719ba02a8f9431 100644
--- a/tensorflow/examples/label_image/label_image.py
+++ b/tensorflow/examples/label_image/label_image.py
@@ -51,7 +51,7 @@ def read_tensor_from_image_file(file_name, input_height=299, input_width=299,
     image_reader = tf.image.decode_jpeg(file_reader, channels = 3,
                                         name='jpeg_reader')
   float_caster = tf.cast(image_reader, tf.float32)
-  dims_expander = tf.expand_dims(float_caster, 0);
+  dims_expander = tf.expand_dims(float_caster, 0)
   resized = tf.image.resize_bilinear(dims_expander, [input_height, input_width])
   normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std])
   sess = tf.Session()
@@ -118,8 +118,8 @@ if __name__ == "__main__":
 
   input_name = "import/" + input_layer
   output_name = "import/" + output_layer
-  input_operation = graph.get_operation_by_name(input_name);
-  output_operation = graph.get_operation_by_name(output_name);
+  input_operation = graph.get_operation_by_name(input_name)
+  output_operation = graph.get_operation_by_name(output_name)
 
   with tf.Session(graph=graph) as sess:
     results = sess.run(output_operation.outputs[0],
diff --git a/tensorflow/examples/learn/iris_custom_decay_dnn.py b/tensorflow/examples/learn/iris_custom_decay_dnn.py
index 072357e51c418ae1163debe29516c31ccc367386..4a219694d10ef075e0e0403cdd7ed100c39ddadd 100644
--- a/tensorflow/examples/learn/iris_custom_decay_dnn.py
+++ b/tensorflow/examples/learn/iris_custom_decay_dnn.py
@@ -46,12 +46,8 @@ def my_model(features, labels, mode):
     }
     return tf.estimator.EstimatorSpec(mode, predictions=predictions)
 
-  # Convert the labels to a one-hot tensor of shape (length of features, 3) and
-  # with a on-value of 1 for each one-hot vector of length 3.
-  onehot_labels = tf.one_hot(labels, 3, 1, 0)
   # Compute loss.
-  loss = tf.losses.softmax_cross_entropy(
-      onehot_labels=onehot_labels, logits=logits)
+  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 
   # Create training op with exponentially decaying learning rate.
   if mode == tf.estimator.ModeKeys.TRAIN:
diff --git a/tensorflow/examples/learn/iris_custom_model.py b/tensorflow/examples/learn/iris_custom_model.py
index 471a99ba76dd8012ba3b1a519d5d07fb378f89e7..c6bdb86ba52b9715b977909d9b7d0fbc59161a53 100644
--- a/tensorflow/examples/learn/iris_custom_model.py
+++ b/tensorflow/examples/learn/iris_custom_model.py
@@ -47,12 +47,8 @@ def my_model(features, labels, mode):
     }
     return tf.estimator.EstimatorSpec(mode, predictions=predictions)
 
-  # Convert the labels to a one-hot tensor of shape (length of features, 3) and
-  # with a on-value of 1 for each one-hot vector of length 3.
-  onehot_labels = tf.one_hot(labels, 3, 1, 0)
   # Compute loss.
-  loss = tf.losses.softmax_cross_entropy(
-      onehot_labels=onehot_labels, logits=logits)
+  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 
   # Create training op.
   if mode == tf.estimator.ModeKeys.TRAIN:
diff --git a/tensorflow/examples/learn/mnist.py b/tensorflow/examples/learn/mnist.py
index 88425ea0d0bf72fb7e7d9cbab27da023f3ade122..98819b20bfea5021d52e2c50b004bccdaf1f25e7 100644
--- a/tensorflow/examples/learn/mnist.py
+++ b/tensorflow/examples/learn/mnist.py
@@ -77,9 +77,7 @@ def conv_model(features, labels, mode):
     return tf.estimator.EstimatorSpec(mode, predictions=predictions)
 
   # Compute loss.
-  onehot_labels = tf.one_hot(tf.cast(labels, tf.int32), N_DIGITS, 1, 0)
-  loss = tf.losses.softmax_cross_entropy(
-      onehot_labels=onehot_labels, logits=logits)
+  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 
   # Create training op.
   if mode == tf.estimator.ModeKeys.TRAIN:
diff --git a/tensorflow/examples/learn/multiple_gpu.py b/tensorflow/examples/learn/multiple_gpu.py
index a294950a386a7207858bbcff345f14de44ffb9ca..3bad22ddf66b7981930637d64cc8653e3fb29cdf 100644
--- a/tensorflow/examples/learn/multiple_gpu.py
+++ b/tensorflow/examples/learn/multiple_gpu.py
@@ -65,12 +65,8 @@ def my_model(features, labels, mode):
       }
       return tf.estimator.EstimatorSpec(mode, predictions=predictions)
 
-    # Convert the labels to a one-hot tensor of shape (length of features, 3)
-    # and with a on-value of 1 for each one-hot vector of length 3.
-    onehot_labels = tf.one_hot(labels, 3, 1, 0)
     # Compute loss.
-    loss = tf.losses.softmax_cross_entropy(
-        onehot_labels=onehot_labels, logits=logits)
+    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 
     # Create training op.
     if mode == tf.estimator.ModeKeys.TRAIN:
diff --git a/tensorflow/examples/learn/resnet.py b/tensorflow/examples/learn/resnet.py
index 1e0966475b01d067330dc4797032d561857fd208..9542e552504580a6614f8bd2f43c38dfa795750f 100755
--- a/tensorflow/examples/learn/resnet.py
+++ b/tensorflow/examples/learn/resnet.py
@@ -151,9 +151,7 @@ def res_net_model(features, labels, mode):
     return tf.estimator.EstimatorSpec(mode, predictions=predictions)
 
   # Compute loss.
-  onehot_labels = tf.one_hot(tf.cast(labels, tf.int32), N_DIGITS, 1, 0)
-  loss = tf.losses.softmax_cross_entropy(
-      onehot_labels=onehot_labels, logits=logits)
+  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 
   # Create training op.
   if mode == tf.estimator.ModeKeys.TRAIN:
diff --git a/tensorflow/examples/learn/text_classification.py b/tensorflow/examples/learn/text_classification.py
index ba89c532be5fa0e13a2dcb1f7894be4c631507d7..eb117c39a122f4f6c108dd18f8f8035edf05eaa1 100644
--- a/tensorflow/examples/learn/text_classification.py
+++ b/tensorflow/examples/learn/text_classification.py
@@ -46,9 +46,7 @@ def estimator_spec_for_softmax_classification(
             'prob': tf.nn.softmax(logits)
         })
 
-  onehot_labels = tf.one_hot(labels, MAX_LABEL, 1, 0)
-  loss = tf.losses.softmax_cross_entropy(
-      onehot_labels=onehot_labels, logits=logits)
+  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
   if mode == tf.estimator.ModeKeys.TRAIN:
     optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
     train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
diff --git a/tensorflow/examples/learn/text_classification_character_cnn.py b/tensorflow/examples/learn/text_classification_character_cnn.py
index 363ff003628e03be40c1be6b7b32e12a07533047..afda170e2a9c1b0281fdd3d7ed210a1bfcd4481b 100644
--- a/tensorflow/examples/learn/text_classification_character_cnn.py
+++ b/tensorflow/examples/learn/text_classification_character_cnn.py
@@ -88,9 +88,7 @@ def char_cnn_model(features, labels, mode):
             'prob': tf.nn.softmax(logits)
         })
 
-  onehot_labels = tf.one_hot(labels, MAX_LABEL, 1, 0)
-  loss = tf.losses.softmax_cross_entropy(
-      onehot_labels=onehot_labels, logits=logits)
+  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
   if mode == tf.estimator.ModeKeys.TRAIN:
     optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
     train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
diff --git a/tensorflow/examples/learn/text_classification_character_rnn.py b/tensorflow/examples/learn/text_classification_character_rnn.py
index 86adc056add508c309b3a5b93e58e9c195995642..15733821fb17eb17269fea295020f6690bb62854 100644
--- a/tensorflow/examples/learn/text_classification_character_rnn.py
+++ b/tensorflow/examples/learn/text_classification_character_rnn.py
@@ -59,9 +59,7 @@ def char_rnn_model(features, labels, mode):
             'prob': tf.nn.softmax(logits)
         })
 
-  onehot_labels = tf.one_hot(labels, MAX_LABEL, 1, 0)
-  loss = tf.losses.softmax_cross_entropy(
-      onehot_labels=onehot_labels, logits=logits)
+  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
   if mode == tf.estimator.ModeKeys.TRAIN:
     optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
     train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
diff --git a/tensorflow/examples/learn/text_classification_cnn.py b/tensorflow/examples/learn/text_classification_cnn.py
index be262285a3a7aa0d6b9430a2226b448fe674cd7f..9e21aee87f629835222ab367dc3ed55863f553e4 100644
--- a/tensorflow/examples/learn/text_classification_cnn.py
+++ b/tensorflow/examples/learn/text_classification_cnn.py
@@ -87,9 +87,7 @@ def cnn_model(features, labels, mode):
             'prob': tf.nn.softmax(logits)
         })
 
-  onehot_labels = tf.one_hot(labels, MAX_LABEL, 1, 0)
-  loss = tf.losses.softmax_cross_entropy(
-      onehot_labels=onehot_labels, logits=logits)
+  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
   if mode == tf.estimator.ModeKeys.TRAIN:
     optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
     train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
diff --git a/tensorflow/examples/speech_commands/input_data.py b/tensorflow/examples/speech_commands/input_data.py
index 751652b330cd203efe216567172fd3dbb4a5b401..e7db9cddf02daf9a32d3ed859ee9bd35b2cae838 100644
--- a/tensorflow/examples/speech_commands/input_data.py
+++ b/tensorflow/examples/speech_commands/input_data.py
@@ -417,8 +417,7 @@ class AudioProcessor(object):
       sess: TensorFlow session that was active when processor was created.
 
     Returns:
-      List of sample data for the transformed samples, and list of labels in
-      one-hot form.
+      List of sample data for the transformed samples, and list of label indexes
     """
     # Pick one of the partitions to choose samples from.
     candidates = self.data_index[mode]
@@ -428,7 +427,7 @@ class AudioProcessor(object):
       sample_count = max(0, min(how_many, len(candidates) - offset))
     # Data and labels will be populated and returned.
     data = np.zeros((sample_count, model_settings['fingerprint_size']))
-    labels = np.zeros((sample_count, model_settings['label_count']))
+    labels = np.zeros(sample_count)
     desired_samples = model_settings['desired_samples']
     use_background = self.background_data and (mode == 'training')
     pick_deterministically = (mode != 'training')
@@ -483,7 +482,7 @@ class AudioProcessor(object):
       # Run the graph to produce the output audio.
       data[i - offset, :] = sess.run(self.mfcc_, feed_dict=input_dict).flatten()
       label_index = self.word_to_index[sample['label']]
-      labels[i - offset, label_index] = 1
+      labels[i - offset] = label_index
     return data, labels
 
   def get_unprocessed_data(self, how_many, model_settings, mode):
diff --git a/tensorflow/examples/speech_commands/train.py b/tensorflow/examples/speech_commands/train.py
index a4141b49178f088a6130c768f31ddd14933b2877..a4e80041f82191d7c58a3e52c929340eb604ec9d 100644
--- a/tensorflow/examples/speech_commands/train.py
+++ b/tensorflow/examples/speech_commands/train.py
@@ -133,7 +133,7 @@ def main(_):
 
   # Define loss and optimizer
   ground_truth_input = tf.placeholder(
-      tf.float32, [None, label_count], name='groundtruth_input')
+      tf.int64, [None], name='groundtruth_input')
 
   # Optionally we can add runtime checks to spot when NaNs or other symptoms of
   # numerical errors start occurring during training.
@@ -144,9 +144,8 @@ def main(_):
 
   # Create the back propagation and training evaluation machinery in the graph.
   with tf.name_scope('cross_entropy'):
-    cross_entropy_mean = tf.reduce_mean(
-        tf.nn.softmax_cross_entropy_with_logits(
-            labels=ground_truth_input, logits=logits))
+    cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy(
+        labels=ground_truth_input, logits=logits)
   tf.summary.scalar('cross_entropy', cross_entropy_mean)
   with tf.name_scope('train'), tf.control_dependencies(control_dependencies):
     learning_rate_input = tf.placeholder(
@@ -154,9 +153,9 @@ def main(_):
     train_step = tf.train.GradientDescentOptimizer(
         learning_rate_input).minimize(cross_entropy_mean)
   predicted_indices = tf.argmax(logits, 1)
-  expected_indices = tf.argmax(ground_truth_input, 1)
-  correct_prediction = tf.equal(predicted_indices, expected_indices)
-  confusion_matrix = tf.confusion_matrix(expected_indices, predicted_indices, num_classes=label_count)
+  correct_prediction = tf.equal(predicted_indices, ground_truth_input)
+  confusion_matrix = tf.confusion_matrix(
+      ground_truth_input, predicted_indices, num_classes=label_count)
   evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
   tf.summary.scalar('accuracy', evaluation_step)
 
diff --git a/tensorflow/examples/tutorials/layers/cnn_mnist.py b/tensorflow/examples/tutorials/layers/cnn_mnist.py
index 2124843fcb21d0c4a28ef9a11aba012a5a116e84..1e8d7d05e1c6af08d788857e74c04134333d019c 100644
--- a/tensorflow/examples/tutorials/layers/cnn_mnist.py
+++ b/tensorflow/examples/tutorials/layers/cnn_mnist.py
@@ -97,9 +97,7 @@ def cnn_model_fn(features, labels, mode):
     return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
 
   # Calculate Loss (for both TRAIN and EVAL modes)
-  onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10)
-  loss = tf.losses.softmax_cross_entropy(
-      onehot_labels=onehot_labels, logits=logits)
+  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 
   # Configure the Training Op (for TRAIN mode)
   if mode == tf.estimator.ModeKeys.TRAIN:
diff --git a/tensorflow/examples/tutorials/mnist/mnist.py b/tensorflow/examples/tutorials/mnist/mnist.py
index 3585043a2a9f1920422c50cd60ce18fcfa646419..7cedd0e264f35ac4ab924c93032b019e2aae78cf 100644
--- a/tensorflow/examples/tutorials/mnist/mnist.py
+++ b/tensorflow/examples/tutorials/mnist/mnist.py
@@ -94,9 +94,7 @@ def loss(logits, labels):
     loss: Loss tensor of type float.
   """
   labels = tf.to_int64(labels)
-  cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
-      labels=labels, logits=logits, name='xentropy')
-  return tf.reduce_mean(cross_entropy, name='xentropy_mean')
+  return tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 
 
 def training(loss, learning_rate):
diff --git a/tensorflow/examples/tutorials/mnist/mnist_deep.py b/tensorflow/examples/tutorials/mnist/mnist_deep.py
index a4dbab5123d49ee97445a5921a14bd1764593025..1e0294db27bc675870afceca77a2cdcd4b3f5ad3 100644
--- a/tensorflow/examples/tutorials/mnist/mnist_deep.py
+++ b/tensorflow/examples/tutorials/mnist/mnist_deep.py
@@ -125,27 +125,27 @@ def bias_variable(shape):
 
 def main(_):
   # Import data
-  mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
+  mnist = input_data.read_data_sets(FLAGS.data_dir)
 
   # Create the model
   x = tf.placeholder(tf.float32, [None, 784])
 
   # Define loss and optimizer
-  y_ = tf.placeholder(tf.float32, [None, 10])
+  y_ = tf.placeholder(tf.int64, [None])
 
   # Build the graph for the deep net
   y_conv, keep_prob = deepnn(x)
 
   with tf.name_scope('loss'):
-    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_,
-                                                            logits=y_conv)
+    cross_entropy = tf.losses.sparse_softmax_cross_entropy(
+        labels=y_, logits=y_conv)
   cross_entropy = tf.reduce_mean(cross_entropy)
 
   with tf.name_scope('adam_optimizer'):
     train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
 
   with tf.name_scope('accuracy'):
-    correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
+    correct_prediction = tf.equal(tf.argmax(y_conv, 1), y_)
     correct_prediction = tf.cast(correct_prediction, tf.float32)
   accuracy = tf.reduce_mean(correct_prediction)
 
diff --git a/tensorflow/examples/tutorials/mnist/mnist_softmax.py b/tensorflow/examples/tutorials/mnist/mnist_softmax.py
index addd2d3810219f70ffb5f7c919f01de35dd816d9..fb3ac942039e670fb5ca975c5d9835ba065190a2 100644
--- a/tensorflow/examples/tutorials/mnist/mnist_softmax.py
+++ b/tensorflow/examples/tutorials/mnist/mnist_softmax.py
@@ -34,7 +34,7 @@ FLAGS = None
 
 def main(_):
   # Import data
-  mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
+  mnist = input_data.read_data_sets(FLAGS.data_dir)
 
   # Create the model
   x = tf.placeholder(tf.float32, [None, 784])
@@ -43,7 +43,7 @@ def main(_):
   y = tf.matmul(x, W) + b
 
   # Define loss and optimizer
-  y_ = tf.placeholder(tf.float32, [None, 10])
+  y_ = tf.placeholder(tf.int64, [None])
 
   # The raw formulation of cross-entropy,
   #
@@ -52,10 +52,9 @@ def main(_):
   #
   # can be numerically unstable.
   #
-  # So here we use tf.nn.softmax_cross_entropy_with_logits on the raw
+  # So here we use tf.losses.sparse_softmax_cross_entropy on the raw
   # outputs of 'y', and then average across the batch.
-  cross_entropy = tf.reduce_mean(
-      tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
+  cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=y)
   train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
 
   sess = tf.InteractiveSession()
@@ -66,7 +65,7 @@ def main(_):
     sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
 
   # Test trained model
-  correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
+  correct_prediction = tf.equal(tf.argmax(y, 1), y_)
   accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
   print(sess.run(accuracy, feed_dict={x: mnist.test.images,
                                       y_: mnist.test.labels}))
diff --git a/tensorflow/examples/tutorials/mnist/mnist_softmax_xla.py b/tensorflow/examples/tutorials/mnist/mnist_softmax_xla.py
index eaff05913af756c6ab0bf80e8f0893b1d239d60d..e89317494f9b7171a93b2706d9d612d456ddf937 100644
--- a/tensorflow/examples/tutorials/mnist/mnist_softmax_xla.py
+++ b/tensorflow/examples/tutorials/mnist/mnist_softmax_xla.py
@@ -32,7 +32,7 @@ FLAGS = None
 
 def main(_):
   # Import data
-  mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
+  mnist = input_data.read_data_sets(FLAGS.data_dir)
 
   # Create the model
   x = tf.placeholder(tf.float32, [None, 784])
@@ -41,7 +41,7 @@ def main(_):
   y = tf.matmul(x, w) + b
 
   # Define loss and optimizer
-  y_ = tf.placeholder(tf.float32, [None, 10])
+  y_ = tf.placeholder(tf.int64, [None])
 
   # The raw formulation of cross-entropy,
   #
@@ -50,10 +50,9 @@ def main(_):
   #
   # can be numerically unstable.
   #
-  # So here we use tf.nn.softmax_cross_entropy_with_logits on the raw
-  # outputs of 'y', and then average across the batch.
-  cross_entropy = tf.reduce_mean(
-      tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
+  # So here we use tf.losses.sparse_softmax_cross_entropy on the raw
+  # logit outputs of 'y', and then average across the batch.
+  cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=y)
   train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
 
   config = tf.ConfigProto()
@@ -86,7 +85,7 @@ def main(_):
       sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
 
   # Test trained model
-  correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
+  correct_prediction = tf.equal(tf.argmax(y, 1), y_)
   accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
   print(sess.run(accuracy,
                  feed_dict={x: mnist.test.images,
diff --git a/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py b/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py
index c401d09df8ca5132178ab31e3b14b3a5cf98e70d..7967e22d6a0319a530cb2f00e54872f022ac0095 100644
--- a/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py
+++ b/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py
@@ -38,7 +38,6 @@ FLAGS = None
 def train():
   # Import data
   mnist = input_data.read_data_sets(FLAGS.data_dir,
-                                    one_hot=True,
                                     fake_data=FLAGS.fake_data)
 
   sess = tf.InteractiveSession()
@@ -47,7 +46,7 @@ def train():
   # Input placeholders
   with tf.name_scope('input'):
     x = tf.placeholder(tf.float32, [None, 784], name='x-input')
-    y_ = tf.placeholder(tf.float32, [None, 10], name='y-input')
+    y_ = tf.placeholder(tf.int64, [None], name='y-input')
 
   with tf.name_scope('input_reshape'):
     image_shaped_input = tf.reshape(x, [-1, 28, 28, 1])
@@ -117,12 +116,12 @@ def train():
     #
     # can be numerically unstable.
     #
-    # So here we use tf.nn.softmax_cross_entropy_with_logits on the
-    # raw outputs of the nn_layer above, and then average across
+    # So here we use tf.losses.sparse_softmax_cross_entropy on the
+    # raw logit outputs of the nn_layer above, and then average across
     # the batch.
-    diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y)
     with tf.name_scope('total'):
-      cross_entropy = tf.reduce_mean(diff)
+      cross_entropy = tf.losses.sparse_softmax_cross_entropy(
+          labels=y_, logits=y)
   tf.summary.scalar('cross_entropy', cross_entropy)
 
   with tf.name_scope('train'):
@@ -131,7 +130,7 @@ def train():
 
   with tf.name_scope('accuracy'):
     with tf.name_scope('correct_prediction'):
-      correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
+      correct_prediction = tf.equal(tf.argmax(y, 1), y_)
     with tf.name_scope('accuracy'):
       accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
   tf.summary.scalar('accuracy', accuracy)
diff --git a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
index 142e45a2e8cb244bf1c7015b9001a463bf54b434..87cd95165e99f3fa7d8911112865a33570186533 100644
--- a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
+++ b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
@@ -120,7 +120,7 @@ def generate_batch(batch_size, num_skips, skip_window):
       batch[i * num_skips + j] = buffer[skip_window]
       labels[i * num_skips + j, 0] = buffer[context_word]
     if data_index == len(data):
-      buffer[:] = data[:span]
+      buffer.extend(data[0:span])
       data_index = span
     else:
       buffer.append(data[data_index])
diff --git a/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.cc b/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.cc
index 1e375ed48edcc779509179d7eae0ff93bbc87b16..4a429837b7b997f0f6571060280a9a15543b9f54 100644
--- a/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.cc
+++ b/tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.cc
@@ -53,7 +53,8 @@ tensorflow::Status WavToSpectrogram(const tensorflow::string& input_wav,
   //  - Scales, clamps, and converts that spectrogram to 0 to 255 uint8's.
   //  - Reshapes the tensor so that it's [height, width, 1] for imaging.
   //  - Encodes it as a PNG stream and saves it out to a file.
-  Output file_reader = ReadFile(root.WithOpName("input_wav"), input_wav);
+  Output file_reader =
+      tensorflow::ops::ReadFile(root.WithOpName("input_wav"), input_wav);
   DecodeWav wav_decoder =
       DecodeWav(root.WithOpName("wav_decoder"), file_reader);
   Output spectrogram = AudioSpectrogram(root.WithOpName("spectrogram"),
@@ -71,8 +72,8 @@ tensorflow::Status WavToSpectrogram(const tensorflow::string& input_wav,
   Output squeeze = Squeeze(root.WithOpName("squeeze"), expand_dims,
                            Squeeze::Attrs().Axis({0}));
   Output png_encoder = EncodePng(root.WithOpName("png_encoder"), squeeze);
-  WriteFile file_writer =
-      WriteFile(root.WithOpName("output_image"), output_image, png_encoder);
+  tensorflow::ops::WriteFile file_writer = tensorflow::ops::WriteFile(
+      root.WithOpName("output_image"), output_image, png_encoder);
   tensorflow::GraphDef graph;
   TF_RETURN_IF_ERROR(root.ToGraphDef(&graph));
 
diff --git a/tensorflow/go/README.md b/tensorflow/go/README.md
index 376e22b38082f7ebeacf49edd44e85c12be2d95f..b1bd87eb0c3b3a498a1db45f11d9a48552e08079 100644
--- a/tensorflow/go/README.md
+++ b/tensorflow/go/README.md
@@ -26,9 +26,12 @@ from source.
     ([Linux](https://www.tensorflow.org/install/install_sources#PrepareLinux)
     or [OS
     X](https://www.tensorflow.org/install/install_sources#PrepareMac)).
-    If you don't need GPU support, then try the following: `sh # Linux sudo
-    apt-get install python swig python-numpy # OS X with homebrew brew install
-    swig`
+    If you don't need GPU support, then try the following:
+
+    ```sh
+    sudo apt-get install python swig python-numpy # Linux
+    brew install swig                             # OS X with homebrew
+    ```
 
 ### Build
 
diff --git a/tensorflow/go/genop/generate.sh b/tensorflow/go/genop/generate.sh
index 01fcfb9058378b49d1315ddbbcc08e6a5de09d7d..a894c87c2765d01d2310159b19092904ad50a8b3 100644
--- a/tensorflow/go/genop/generate.sh
+++ b/tensorflow/go/genop/generate.sh
@@ -19,6 +19,11 @@ set -e
 go get github.com/golang/protobuf/proto
 go get github.com/golang/protobuf/protoc-gen-go
 
+if [ -z "${GOPATH}" ]
+then
+  GOPATH=$(go env GOPATH)
+fi
+
 cd $(dirname $0)
 for g in $(echo "${GOPATH//:/ }"); do
     TF_DIR="${g}/src/github.com/tensorflow/tensorflow"
diff --git a/tensorflow/go/genop/internal/api_def_map.go b/tensorflow/go/genop/internal/api_def_map.go
new file mode 100644
index 0000000000000000000000000000000000000000..07b689dbba23a3aa991983f3b373fa8445c673e1
--- /dev/null
+++ b/tensorflow/go/genop/internal/api_def_map.go
@@ -0,0 +1,127 @@
+/*
+Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package internal
+
+/*
+#include <stdlib.h>
+#include <string.h>
+
+#include "tensorflow/c/c_api.h"
+*/
+import "C"
+
+import (
+	"errors"
+	"fmt"
+	"runtime"
+	"unsafe"
+
+	"github.com/golang/protobuf/proto"
+	pb "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal/proto/tensorflow/core/framework"
+)
+
+// Encapsulates a collection of API definitions.
+//
+// apiDefMap represents a map from operation name to corresponding
+// ApiDef proto (see
+// https://www.tensorflow.org/code/tensorflow/core/framework/api_def.proto
+// for ApiDef proto definition).
+type apiDefMap struct {
+	c *C.TF_ApiDefMap
+}
+
+// Creates and returns a new apiDefMap instance.
+//
+// oplist is and OpList proto instance (see
+// https://www.tensorflow.org/code/tensorflow/core/framework/op_def.proto
+// for OpList proto definition).
+
+func newAPIDefMap(oplist *pb.OpList) (*apiDefMap, error) {
+	// Create a buffer containing the serialized OpList.
+	opdefSerialized, err := proto.Marshal(oplist)
+	if err != nil {
+		return nil, fmt.Errorf("could not serialize OpDef for %s", oplist.String())
+	}
+	data := C.CBytes(opdefSerialized)
+	defer C.free(data)
+
+	opbuf := C.TF_NewBuffer()
+	defer C.TF_DeleteBuffer(opbuf)
+	opbuf.data = data
+	opbuf.length = C.size_t(len(opdefSerialized))
+
+	// Create ApiDefMap.
+	status := C.TF_NewStatus()
+	defer C.TF_DeleteStatus(status)
+	capimap := C.TF_NewApiDefMap(opbuf, status)
+	if C.TF_GetCode(status) != C.TF_OK {
+		return nil, errors.New(C.GoString(C.TF_Message(status)))
+	}
+	apimap := &apiDefMap{capimap}
+	runtime.SetFinalizer(
+		apimap,
+		func(a *apiDefMap) {
+			C.TF_DeleteApiDefMap(a.c)
+		})
+	return apimap, nil
+}
+
+// Updates apiDefMap with the overrides specified in `data`.
+//
+// data - ApiDef text proto.
+func (m *apiDefMap) Put(data string) error {
+	cdata := C.CString(data)
+	defer C.free(unsafe.Pointer(cdata))
+	status := C.TF_NewStatus()
+	defer C.TF_DeleteStatus(status)
+	C.TF_ApiDefMapPut(m.c, cdata, C.size_t(len(data)), status)
+	if C.TF_GetCode(status) != C.TF_OK {
+		return errors.New(C.GoString(C.TF_Message(status)))
+	}
+	return nil
+}
+
+// Returns ApiDef proto instance for the TensorFlow operation
+// named `opname`.
+func (m *apiDefMap) Get(opname string) (*pb.ApiDef, error) {
+	cname := C.CString(opname)
+	defer C.free(unsafe.Pointer(cname))
+	status := C.TF_NewStatus()
+	defer C.TF_DeleteStatus(status)
+	apidefBuf := C.TF_ApiDefMapGet(
+		m.c, cname, C.size_t(len(opname)), status)
+	defer C.TF_DeleteBuffer(apidefBuf)
+	if C.TF_GetCode(status) != C.TF_OK {
+		return nil, errors.New(C.GoString(C.TF_Message(status)))
+	}
+	if apidefBuf == nil {
+		return nil, fmt.Errorf("could not find ApiDef for %s", opname)
+	}
+
+	var (
+		apidef = new(pb.ApiDef)
+		size   = int(apidefBuf.length)
+		// A []byte backed by C memory.
+		// See: https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices
+		data = (*[1 << 30]byte)(unsafe.Pointer(apidefBuf.data))[:size:size]
+		err  = proto.Unmarshal(data, apidef)
+	)
+	if err != nil {
+		return nil, err
+	}
+	return apidef, nil
+}
diff --git a/tensorflow/go/genop/internal/genop.go b/tensorflow/go/genop/internal/genop.go
index dec08dee1ca4f2d85f9bac834323889adad178d3..82f7510f2ed947e0a87e4d88cfce1ecaaa6362f8 100644
--- a/tensorflow/go/genop/internal/genop.go
+++ b/tensorflow/go/genop/internal/genop.go
@@ -29,12 +29,18 @@ limitations under the License.
 // encountered.
 package internal
 
-// #include "tensorflow/c/c_api.h"
+/*
+#include <stdlib.h>
+
+#include "tensorflow/c/c_api.h"
+*/
 import "C"
 
 import (
 	"fmt"
 	"io"
+	"io/ioutil"
+	"path"
 	"reflect"
 	"strings"
 	"text/template"
@@ -47,15 +53,23 @@ import (
 // GenerateFunctionsForRegisteredOps writes a Go source code file to w
 // containing functions for each TensorFlow operation registered in the address
 // space of the calling process.
-func GenerateFunctionsForRegisteredOps(w io.Writer) error {
-	ops, err := registeredOps()
+// apidefDirs should be a contain of directories containing api_def_*.pbtxt
+// files to load.
+func GenerateFunctionsForRegisteredOps(
+	w io.Writer, apidefDirs []string) error {
+	ops, apimap, err := registeredOps()
 	if err != nil {
 		return err
 	}
-	return generateFunctionsForOps(w, ops)
+	for _, dir := range apidefDirs {
+		if err = updateAPIDefs(apimap, dir); err != nil {
+			return err
+		}
+	}
+	return generateFunctionsForOps(w, ops, apimap)
 }
 
-func registeredOps() (*pb.OpList, error) {
+func registeredOps() (*pb.OpList, *apiDefMap, error) {
 	buf := C.TF_GetAllOpList()
 	defer C.TF_DeleteBuffer(buf)
 	var (
@@ -66,10 +80,31 @@ func registeredOps() (*pb.OpList, error) {
 		data = (*[1 << 30]byte)(unsafe.Pointer(buf.data))[:size:size]
 		err  = proto.Unmarshal(data, list)
 	)
-	return list, err
+	if err != nil {
+		return nil, nil, err
+	}
+	apimap, err := newAPIDefMap(list)
+	return list, apimap, err
+}
+
+func updateAPIDefs(m *apiDefMap, dir string) error {
+	files, err := ioutil.ReadDir(dir)
+	if err != nil {
+		return err
+	}
+	for _, file := range files {
+		data, err := ioutil.ReadFile(path.Join(dir, file.Name()))
+		if err != nil {
+			return fmt.Errorf("failed to read %q: %v", file.Name(), err)
+		}
+		if err = m.Put(string(data)); err != nil {
+			return fmt.Errorf("failed to process %q: %v", file.Name(), err)
+		}
+	}
+	return nil
 }
 
-func generateFunctionsForOps(w io.Writer, ops *pb.OpList) error {
+func generateFunctionsForOps(w io.Writer, ops *pb.OpList, apimap *apiDefMap) error {
 	thisPackage := reflect.TypeOf(tmplArgs{}).PkgPath()
 	if err := tmplHeader.Execute(w, thisPackage); err != nil {
 		return err
@@ -83,14 +118,18 @@ func generateFunctionsForOps(w io.Writer, ops *pb.OpList) error {
 		if blacklist[op.Name] {
 			continue
 		}
-		if err := generateFunctionForOp(w, op); err != nil {
+		apidef, err := apimap.Get(op.Name)
+		if err != nil {
+			return err
+		}
+		if err := generateFunctionForOp(w, op, apidef); err != nil {
 			return err
 		}
 	}
 	return nil
 }
 
-func generateFunctionForOp(w io.Writer, op *pb.OpDef) error {
+func generateFunctionForOp(w io.Writer, op *pb.OpDef, apidef *pb.ApiDef) error {
 	if strings.HasPrefix(op.Name, "_") { // Internal operation
 		return nil
 	}
@@ -112,12 +151,16 @@ func generateFunctionForOp(w io.Writer, op *pb.OpDef) error {
 			return nil
 		}
 	}
-	if op.Summary == "" {
+	if apidef.Summary == "" {
 		// Undocumented operation, perhaps a sign of not being ready to
 		// export.
 		return nil
 	}
-	return tmplOp.Execute(w, newTmplArgs(op))
+	tmplArgs, err := newTmplArgs(op, apidef)
+	if err != nil {
+		return err
+	}
+	return tmplOp.Execute(w, tmplArgs)
 }
 
 var (
@@ -172,7 +215,7 @@ func makeOutputList(op *tf.Operation, start int, output string) ([]tf.Output, in
 type {{.Op.Name}}Attr func(optionalAttr)
 
 {{range .OptionalAttrs}}
-// {{$.Op.Name}}{{CamelCase .Name}} sets the optional {{.Name}} attribute to value.
+// {{$.Op.Name}}{{CamelCase .RenameTo}} sets the optional {{.RenameTo}} attribute to value.
 {{- if .Description}}
 //
 // value: {{MakeComment .Description}}
@@ -180,9 +223,9 @@ type {{.Op.Name}}Attr func(optionalAttr)
 // If not specified, defaults to {{StripLeadingColon .DefaultValue}}
 {{- if .HasMinimum}}
 //
-// {{if IsListAttr .}}REQUIRES: len(value) >= {{.Minimum}}{{else}}REQUIRES: value >= {{.Minimum}}{{end}}
+// {{if .IsListAttr }}REQUIRES: len(value) >= {{.Minimum}}{{else}}REQUIRES: value >= {{.Minimum}}{{end}}
 {{- end}}
-func {{$.Op.Name}}{{CamelCase .Name}}(value {{GoType .Type}}) {{$.Op.Name}}Attr {
+func {{$.Op.Name}}{{CamelCase .RenameTo}}(value {{GoType .Type}}) {{$.Op.Name}}Attr {
 	return func(m optionalAttr) {
 		m[{{printf "%q" .Name}}] = value
 	}
@@ -192,14 +235,14 @@ func {{$.Op.Name}}{{CamelCase .Name}}(value {{GoType .Type}}) {{$.Op.Name}}Attr
 
 {{- /* Create a godoc friendly comment. */ -}}
 
-// {{MakeComment .Op.Summary}}
+// {{MakeComment .APIDef.Summary}}
 
 {{- with .Op.Deprecation}}
 //
 // DEPRECATED at GraphDef version {{.Version}}: {{.Explanation}}
 {{- end -}}
 
-{{- with .Op.Description}}
+{{- with .APIDef.Description}}
 //
 // {{MakeComment .}}
 {{- end -}}
@@ -207,11 +250,11 @@ func {{$.Op.Name}}{{CamelCase .Name}}(value {{GoType .Type}}) {{$.Op.Name}}Attr
 {{- if .DescribeArguments}}
 //
 // Arguments:
-{{- range .Op.InputArg}}
-//	{{if .Description}}{{Identifier .Name}}: {{MakeComment .Description}}{{end}}
+{{- range .InArgsReordered}}
+//	{{if .Description}}{{Identifier .RenameTo}}: {{MakeComment .Description}}{{end}}
 {{- end -}}
 {{- range .RequiredAttrs}}
-//	{{if .Description}}{{Identifier .Name}}: {{MakeComment .Description}}{{end}}
+//	{{if .Description}}{{Identifier .RenameTo}}: {{MakeComment .Description}}{{end}}
 {{- end -}}
 {{- end -}}
 
@@ -221,12 +264,12 @@ func {{$.Op.Name}}{{CamelCase .Name}}(value {{GoType .Type}}) {{$.Op.Name}}Attr
 {{- else }}
 {{- if .DescribeOutputs}}
 //
-{{- if ((len .Op.OutputArg) eq 1) }}
-// Returns {{range .Op.OutputArg}}{{MakeComment .Description}}{{end}}
+{{- if ((len .OutArgs) eq 1) }}
+// Returns {{range .OutArgs}}{{MakeComment .Description}}{{end}}
 {{- else }}
 // Returns:
-{{- range .Op.OutputArg}}
-//	{{Identifier .Name}}{{if .Description}}: {{MakeComment .Description}}{{end}}
+{{- range .OutArgs}}
+//	{{Identifier .RenameTo}}{{if .Description}}: {{MakeComment .Description}}{{end}}
 {{- end -}}
 {{- end -}}
 {{- end -}}
@@ -247,15 +290,15 @@ func {{.Op.Name}}
 */ -}}
 
 (scope *Scope
-{{- range $i, $a := .Op.InputArg}}, {{Identifier $a.Name}} {{if IsListArg $a}}[]{{end}}tf.Output{{end -}}
-{{range $i, $a := .RequiredAttrs}}, {{Identifier $a.Name}} {{GoType $a.Type}}{{end -}}
+{{- range $i, $a := .InArgsReordered}}, {{Identifier $a.RenameTo}} {{if $a.IsListArg}}[]{{end}}tf.Output{{end -}}
+{{range $i, $a := .RequiredAttrs}}, {{Identifier $a.RenameTo}} {{GoType $a.Type}}{{end -}}
 {{if .OptionalAttrs}}, optional ...{{.Op.Name}}Attr{{end -}}
 )
 
-{{- /* Construct outputs: len(OpDef.OutputArg) or a *tf.Operation */ -}}
+{{- /* Construct outputs: len(.OutArgs) or a *tf.Operation */ -}}
 
-{{if .Op.OutputArg -}}
-({{range $i,$a := .Op.OutputArg}}{{if $i}}, {{end}}{{Identifier $a.Name}} {{if IsListArg $a}}[]{{end}}tf.Output{{end -}})
+{{if .OutArgs -}}
+({{range $i,$a := .OutArgs}}{{if $i}}, {{end}}{{Identifier $a.RenameTo}} {{if $a.IsListArg}}[]{{end}}tf.Output{{end -}})
 {{- else -}}
 (o *tf.Operation)
 {{- end }} {
@@ -263,7 +306,7 @@ func {{.Op.Name}}
 		return
 	}
 	{{if .HasAttrs -}}
-	attrs := map[string]interface{}{ {{- range .RequiredAttrs}}{{printf "%q" .Name}}: {{Identifier .Name}},{{end}}}
+	attrs := map[string]interface{}{ {{- range .RequiredAttrs}}{{printf "%q" .Name}}: {{Identifier .RenameTo}},{{end}}}
 	{{if .OptionalAttrs -}}
 	for _, a := range optional {
 		a(attrs)
@@ -272,16 +315,16 @@ func {{.Op.Name}}
 	{{end -}}
 	opspec := tf.OpSpec{
 		Type: {{printf "%q" .Op.Name}},
-		{{if .Op.InputArg -}}
+		{{if .InArgs -}}
 		Input: []tf.Input{
-			{{range .Op.InputArg}}{{if IsListArg .}}tf.OutputList({{Identifier .Name}}){{else}}{{Identifier .Name}}{{end}}, {{end}}
+			{{range $i,$a := .InArgs}}{{if $a.IsListArg}}tf.OutputList({{Identifier $a.RenameTo}}){{else}}{{Identifier $a.RenameTo}}{{end}}, {{end}}
 		},
 		{{- end}}
 		{{- if .HasAttrs}}
 		Attrs: attrs,
 		{{- end}}
 	}
-	{{- if .Op.OutputArg}}
+	{{- if .OutArgs}}
 	{{- if .HasListOutput}}
 	op := scope.AddOperation(opspec)
 	if scope.Err() != nil {
@@ -289,43 +332,105 @@ func {{.Op.Name}}
 	}
 	var idx int
 	var err error
-	{{- range $i, $a := .Op.OutputArg}}
-	{{- if IsListArg $a}}
-	if {{Identifier .Name}}, idx, err = makeOutputList(op, idx, {{printf "%q" .Name}}); err != nil {
+	{{- range $i, $a := .OutArgs}}
+	{{- if $a.IsListArg}}
+	if {{Identifier .RenameTo}}, idx, err = makeOutputList(op, idx, {{printf "%q" .Name}}); err != nil {
 		scope.UpdateErr({{printf "%q" $.Op.Name}}, err)
 		return
 	}
 	{{- else }}
-	{{Identifier .Name}} = op.Output(idx)
+	{{Identifier .RenameTo}} = op.Output(idx)
 	{{- end }}{{- /* if IsListArg */}}
-	{{- end }}{{- /* range .Op.OutputArg */}}
-	return {{range $i, $a := .Op.OutputArg}}{{if $i}}, {{end}}{{Identifier .Name}}{{end}}
+	{{- end }}{{- /* range .OutArgs */}}
+	return {{range $i, $a := .OutArgs}}{{if $i}}, {{end}}{{Identifier .RenameTo}}{{end}}
 	{{- else }}
 	op := scope.AddOperation(opspec)
-	return {{range $i, $a := .Op.OutputArg}}{{if $i}}, {{end}}op.Output({{$i}}){{end}}
+	return {{range $i, $a := .OutArgs}}{{if $i}}, {{end}}op.Output({{$i}}){{end}}
 	{{- end }}{{- /* if .HasListOutput */}}
 	{{- else }}
 	return scope.AddOperation(opspec)
-	{{- end }}{{- /* if .Op.OutputArg */}}
+	{{- end }}{{- /* if .OutArgs */}}
 }
 `))
 )
 
+type attrWrapper struct {
+	op  *pb.OpDef_AttrDef
+	api *pb.ApiDef_Attr
+}
+
+func (a *attrWrapper) Name() string             { return a.api.Name }
+func (a *attrWrapper) RenameTo() string         { return a.api.RenameTo }
+func (a *attrWrapper) Description() string      { return a.api.Description }
+func (a *attrWrapper) Type() string             { return a.op.Type }
+func (a *attrWrapper) IsListAttr() bool         { return isListAttr(a.op) }
+func (a *attrWrapper) HasMinimum() bool         { return a.op.HasMinimum }
+func (a *attrWrapper) Minimum() int64           { return a.op.Minimum }
+func (a *attrWrapper) DefaultValue() interface{} { return a.api.DefaultValue }
+
+type argWrapper struct {
+	op  *pb.OpDef_ArgDef
+	api *pb.ApiDef_Arg
+}
+
+func (a *argWrapper) Name() string        { return a.api.Name }
+func (a *argWrapper) RenameTo() string    { return a.api.RenameTo }
+func (a *argWrapper) Description() string { return a.api.Description }
+func (a *argWrapper) IsListArg() bool     { return isListArg(a.op) }
+
 type tmplArgs struct {
-	Op *pb.OpDef
+	Op     *pb.OpDef
+	APIDef *pb.ApiDef
 	// Op.Attr is split into two categories
 	// (1) Required: These must be specified by the client and are thus
 	//     included in the function signature.
 	// (2) Optional: These need not be specified (as they have default
 	//     values) and thus do not appear in the function signature.
-	RequiredAttrs []*pb.OpDef_AttrDef
-	OptionalAttrs []*pb.OpDef_AttrDef
+	RequiredAttrs []*attrWrapper
+	OptionalAttrs []*attrWrapper
+	InArgs        []*argWrapper
+	// Input arguments ordered based on arg_order field of ApiDef.
+	InArgsReordered []*argWrapper
+	OutArgs         []*argWrapper
 }
 
-func newTmplArgs(op *pb.OpDef) *tmplArgs {
-	ret := tmplArgs{Op: op}
+func newTmplArgs(op *pb.OpDef, apidef *pb.ApiDef) (*tmplArgs, error) {
+	ret := tmplArgs{Op: op, APIDef: apidef}
+
+	// Setup InArgs field
+	for i, in := range op.InputArg {
+		argCombined := argWrapper{op: in, api: apidef.InArg[i]}
+		ret.InArgs = append(ret.InArgs, &argCombined)
+	}
+
+	// Setup OutArgs field
+	for i, out := range op.OutputArg {
+		argCombined := argWrapper{op: out, api: apidef.OutArg[i]}
+		ret.OutArgs = append(ret.OutArgs, &argCombined)
+	}
+
+	// Setup InArgsReordered field
+	for _, argName := range apidef.ArgOrder {
+		// Find the argument in op.InputArg
+		argIndex := -1
+		for i, in := range op.InputArg {
+			if in.Name == argName {
+				argIndex = i
+				break
+			}
+		}
+		if argIndex == -1 {
+			return nil, fmt.Errorf(
+				"couldn't find argument %s in ApiDef for op %s",
+				argName, op.Name)
+		}
+		argCombined := argWrapper{
+			op: op.InputArg[argIndex], api: apidef.InArg[argIndex]}
+		ret.InArgsReordered = append(ret.InArgsReordered, &argCombined)
+	}
+
 	if len(op.Attr) == 0 {
-		return &ret
+		return &ret, nil
 	}
 	// Attributes related to the InputArg's type are inferred automatically
 	// and are not exposed to the client.
@@ -341,28 +446,29 @@ func newTmplArgs(op *pb.OpDef) *tmplArgs {
 			inferred[in.NumberAttr] = true
 		}
 	}
-	for _, attr := range op.Attr {
+	for i, attr := range op.Attr {
 		if inferred[attr.Name] {
 			continue
 		}
+		attrCombined := attrWrapper{op: attr, api: apidef.Attr[i]}
 		if attr.DefaultValue == nil {
-			ret.RequiredAttrs = append(ret.RequiredAttrs, attr)
+			ret.RequiredAttrs = append(ret.RequiredAttrs, &attrCombined)
 		} else {
-			ret.OptionalAttrs = append(ret.OptionalAttrs, attr)
+			ret.OptionalAttrs = append(ret.OptionalAttrs, &attrCombined)
 		}
 	}
-	return &ret
+	return &ret, nil
 }
 
 func (a *tmplArgs) HasAttrs() bool { return len(a.RequiredAttrs)+len(a.OptionalAttrs) > 0 }
 func (a *tmplArgs) DescribeArguments() bool {
-	for _, arg := range a.Op.InputArg {
-		if arg.Description != "" {
+	for _, arg := range a.InArgs {
+		if arg.Description() != "" {
 			return true
 		}
 	}
 	for _, attr := range a.RequiredAttrs {
-		if attr.Description != "" {
+		if attr.Description() != "" {
 			return true
 		}
 	}
@@ -370,16 +476,16 @@ func (a *tmplArgs) DescribeArguments() bool {
 
 }
 func (a *tmplArgs) DescribeOutputs() bool {
-	for _, arg := range a.Op.OutputArg {
-		if arg.Description != "" {
+	for _, arg := range a.OutArgs {
+		if arg.Description() != "" {
 			return true
 		}
 	}
 	return false
 }
 func (a *tmplArgs) HasListOutput() bool {
-	for _, arg := range a.Op.OutputArg {
-		if isListArg(arg) {
+	for _, arg := range a.OutArgs {
+		if arg.IsListArg() {
 			return true
 		}
 	}
diff --git a/tensorflow/go/genop/internal/genop_test.go b/tensorflow/go/genop/internal/genop_test.go
index c984c0063a9f663d82dbb797a5acec1becb79e5f..b3a23dff102a690b1f7f08b675219929355f139f 100644
--- a/tensorflow/go/genop/internal/genop_test.go
+++ b/tensorflow/go/genop/internal/genop_test.go
@@ -25,19 +25,44 @@ import (
 	pb "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal/proto/tensorflow/core/framework"
 )
 
+// Creates an ApiDef based on opdef and applies overrides
+// from apidefText (ApiDef text proto).
+func GetAPIDef(t *testing.T, opdef *pb.OpDef, apidefText string) *pb.ApiDef {
+	opdefList := &pb.OpList{Op: []*pb.OpDef{opdef}}
+	apimap, err := newAPIDefMap(opdefList)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = apimap.Put(apidefText)
+	if err != nil {
+		t.Fatal(err)
+	}
+	apidef, err := apimap.Get(opdef.Name)
+	if err != nil {
+		t.Fatal(err)
+	}
+	return apidef
+}
+
 func TestGenerateOp(t *testing.T) {
 	// TestGenerateOp validates the generated source code for an op.
 	// The OpDef for the test cases are simplified forms of real ops.
 	testdata := []struct {
 		tag    string
 		opdef  string
+		apidef string
 		wanted string
 	}{
 		{
 			tag: "NoOp",
 			opdef: `
 name: "NoOp"
+`,
+			apidef: `
+op: <
+graph_op_name: "NoOp"
 summary: "No. Op."
+>
 `,
 			wanted: `
 // No. Op.
@@ -80,8 +105,13 @@ attr: <
     >
   >
 >
+`,
+			apidef: `
+op: <
+graph_op_name: "Add"
 summary: "Returns x + y element-wise."
 description: "Blah blah",
+>
 `,
 			wanted: `
 // Returns x + y element-wise.
@@ -122,7 +152,12 @@ attr: <
   name: "DstT"
   type: "type"
 >
+`,
+			apidef: `
+op: <
+graph_op_name: "Cast"
 summary: "Cast x of type SrcT to y of DstT."
+>
 `,
 			wanted: `
 // Cast x of type SrcT to y of DstT.
@@ -149,12 +184,10 @@ func Cast(scope *Scope, x tf.Output, DstT tf.DataType) (y tf.Output) {
 name: "DecodeJpeg"
 input_arg: <
   name: "contents"
-  description: "0-D.  The JPEG-encoded image."
   type: DT_STRING
 >
 output_arg: <
   name: "image"
-  description: "3-D with shape [height, width, channels]"
   type: DT_UINT8
 >
 attr: <
@@ -163,7 +196,6 @@ attr: <
   default_value: <
     i: 0
   >
-  description: "Number of color channels for the decoded image."
 >
 attr: <
   name: "fancy_upscaling"
@@ -171,7 +203,6 @@ attr: <
   default_value: <
     b: true
   >
-  description: "If true use a slower but nicer upscaling of the\nchroma planes (yuv420/422 only)."
 >
 attr: <
   name: "acceptable_fraction"
@@ -179,10 +210,34 @@ attr: <
   default_value: <
     f: 1
   >
+>
+`,
+			apidef: `
+op: <
+graph_op_name: "DecodeJpeg"
+in_arg: <
+  name: "contents"
+  description: "0-D.  The JPEG-encoded image."
+>
+out_arg: <
+  name: "image"
+  description: "3-D with shape [height, width, channels]"
+>
+attr: <
+  name: "channels"
+  description: "Number of color channels for the decoded image."
+>
+attr: <
+  name: "fancy_upscaling"
+  description: "If true use a slower but nicer upscaling of the\nchroma planes (yuv420/422 only)."
+>
+attr: <
+  name: "acceptable_fraction"
   description: "The minimum required fraction of lines before a truncated\ninput is accepted."
 >
 summary: "Decode a JPEG-encoded image to a uint8 tensor."
 description: "Norna dorna fjord\nkajorna\nhahaha"
+>
 `,
 			wanted: `
 // DecodeJpegAttr is an optional argument to DecodeJpeg.
@@ -270,7 +325,12 @@ attr: <
   name: "T"
   type: "type"
 >
+`,
+			apidef: `
+op: <
+graph_op_name: "TwoOutputs"
 summary: "Op that produces multiple outputs"
+>
 `,
 			wanted: `
 // Op that produces multiple outputs
@@ -326,8 +386,13 @@ attr: <
     >
   >
 >
+`,
+			apidef: `
+op: <
+graph_op_name: "ShapeN"
 summary: "Returns shape of tensors."
 description: "Some description here."
+>
 `,
 			wanted: `
 // ShapeNAttr is an optional argument to ShapeN.
@@ -371,6 +436,102 @@ func ShapeN(scope *Scope, input []tf.Output, optional ...ShapeNAttr) (output []t
 	}
 	return output
 }
+`,
+		},
+		{
+			tag: "ApiDefOverrides",
+			opdef: `
+name: "TestOp"
+input_arg: <
+  name: "a"
+  type: DT_STRING
+>
+input_arg: <
+  name: "b"
+  type: DT_STRING
+>
+output_arg: <
+  name: "c"
+  type: DT_UINT8
+>
+attr: <
+  name: "d"
+  type: "int"
+  default_value: <
+    i: 0
+  >
+>
+`,
+			apidef: `
+op: <
+graph_op_name: "TestOp"
+in_arg: <
+  name: "a"
+  rename_to: "aa"
+  description: "Description for aa."
+>
+in_arg: <
+  name: "b"
+  rename_to: "bb"
+  description: "Description for bb."
+>
+arg_order: "b"
+arg_order: "a"
+out_arg: <
+  name: "c"
+  rename_to: "cc"
+  description: "Description for cc."
+>
+attr: <
+  name: "d"
+  rename_to: "dd"
+  description: "Description for dd."
+>
+summary: "Summary for TestOp."
+description: "Description for TestOp."
+>
+`,
+			wanted: `
+// TestOpAttr is an optional argument to TestOp.
+type TestOpAttr func(optionalAttr)
+
+// TestOpDd sets the optional dd attribute to value.
+//
+// value: Description for dd.
+// If not specified, defaults to 0
+func TestOpDd(value int64) TestOpAttr {
+	return func(m optionalAttr) {
+		m["d"] = value
+	}
+}
+
+// Summary for TestOp.
+//
+// Description for TestOp.
+//
+// Arguments:
+//	bb: Description for bb.
+//	aa: Description for aa.
+//
+// Returns Description for cc.
+func TestOp(scope *Scope, bb tf.Output, aa tf.Output, optional ...TestOpAttr) (cc tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "TestOp",
+		Input: []tf.Input{
+			aa, bb,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
 `,
 		},
 	}
@@ -378,11 +539,13 @@ func ShapeN(scope *Scope, input []tf.Output, optional ...ShapeNAttr) (output []t
 	for _, test := range testdata {
 		t.Run(test.tag, func(t *testing.T) {
 			var opdef pb.OpDef
+			var apidef *pb.ApiDef
 			var buf bytes.Buffer
 			if err := proto.UnmarshalText(test.opdef, &opdef); err != nil {
 				t.Fatal(err)
 			}
-			if err := generateFunctionForOp(&buf, &opdef); err != nil {
+			apidef = GetAPIDef(t, &opdef, test.apidef)
+			if err := generateFunctionForOp(&buf, &opdef, apidef); err != nil {
 				t.Fatal(err)
 			}
 			got, err := format.Source(buf.Bytes())
diff --git a/tensorflow/go/genop/main.go b/tensorflow/go/genop/main.go
index b6f8e2d5a8e30c4721b5c49f64b15f72cc70a794..4a53084ed13b39938ea9ee8b9479d2dd2481e706 100644
--- a/tensorflow/go/genop/main.go
+++ b/tensorflow/go/genop/main.go
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
-//go:generate sh generate.sh
+//go:generate bash generate.sh
 
 // Command genop generates a Go source file with functions for TensorFlow ops.
 package main
@@ -27,15 +27,17 @@ import (
 	"log"
 	"os"
 	"path/filepath"
+	"strings"
 
 	"github.com/tensorflow/tensorflow/tensorflow/go/genop/internal"
 )
 
 func main() {
 	var (
-		filename = flag.String("outfile", "", "File to write generated source code to.")
-		header   = flag.String("header", "", "Path to a file whose contents will be copied into the generated file. Can be empty")
-		buf      bytes.Buffer
+		filename   = flag.String("outfile", "", "File to write generated source code to.")
+		header     = flag.String("header", "", "Path to a file whose contents will be copied into the generated file. Can be empty")
+		apiDefDirs = flag.String("api_def_dirs", "", "Comma-separated directories containing api_def_*.pbtxt files.")
+		buf        bytes.Buffer
 	)
 	flag.Parse()
 	if *filename == "" {
@@ -51,7 +53,13 @@ func main() {
 	}
 	os.MkdirAll(filepath.Dir(*filename), 0755)
 
-	if err := internal.GenerateFunctionsForRegisteredOps(&buf); err != nil {
+	apiDefDirsList := []string{}
+	if len(*apiDefDirs) > 0 {
+		apiDefDirsList = strings.Split(*apiDefDirs, ",")
+	}
+
+	if err := internal.GenerateFunctionsForRegisteredOps(
+		&buf, apiDefDirsList); err != nil {
 		log.Fatal(err)
 	}
 	formatted, err := format.Source(buf.Bytes())
diff --git a/tensorflow/go/graph.go b/tensorflow/go/graph.go
index 46c600eab17c6c467d0b3a3312f848541f382e80..fc087d9d995dfe031e61fd0fa15d649c2ee35cc9 100644
--- a/tensorflow/go/graph.go
+++ b/tensorflow/go/graph.go
@@ -20,6 +20,25 @@ package tensorflow
 //
 // #include <stdlib.h>
 // #include <string.h>
+//
+// void TF_SetAttrShapeList_Helper(TF_OperationDescription* desc,
+//                                 const char* attr_name,
+//                                 const int64_t* flat_dims,
+//                                 const int* num_dims,
+//                                 int num_shapes) {
+//  const int64_t** dims =
+//    (const int64_t**)malloc(sizeof(const int64_t*) * num_shapes);
+//  int i = 0;
+//  for (i = 0; i < num_shapes; i++) {
+//    dims[i] = flat_dims;
+//    if (num_dims[i] > 0) {
+//      // flat_dims will be NULL iff num_shapes is 0 or all elements in num_dims are <= 0.
+//      flat_dims += num_dims[i];
+//    }
+//  }
+//  TF_SetAttrShapeList(desc, attr_name, dims, num_dims, num_shapes);
+//  free(dims);
+// }
 import "C"
 
 import (
@@ -114,6 +133,20 @@ func (g *Graph) Operation(name string) *Operation {
 	return &Operation{cop, g}
 }
 
+// Operations returns a list of all operations in the graph
+func (g *Graph) Operations() []Operation {
+	var pos C.size_t = 0
+	ops := []Operation{}
+	for {
+		cop := C.TF_GraphNextOperation(g.c, &pos)
+		if cop == nil {
+			break
+		}
+		ops = append(ops, Operation{cop, g})
+	}
+	return ops
+}
+
 // OpSpec is the specification of an Operation to be added to a Graph
 // (using Graph.AddOperation).
 type OpSpec struct {
@@ -289,41 +322,37 @@ func setAttr(cdesc *C.TF_OperationDescription, status *status, name string, valu
 			return fmt.Errorf("bad value for attribute %q: %v", name, err)
 		}
 	case Shape:
-		ndims, dims := cshape(value)
+		ndims := C.int(value.NumDimensions())
 		var dimsp *C.int64_t
 		if ndims > 0 {
+			dims := make([]C.int64_t, ndims)
+			for i, d := range value.dims {
+				dims[i] = C.int64_t(d)
+			}
 			dimsp = &dims[0]
 		}
 		C.TF_SetAttrShape(cdesc, cAttrName, dimsp, ndims)
 	case []Shape:
-		ndims := make([]C.int, len(value))
-		dims := make([][]C.int64_t, len(value))
-		dimsp := make([]*C.int64_t, len(value))
-		for i, s := range value {
-			ndims[i], dims[i] = cshape(s)
-			if ndims[i] > 0 {
-				dimsp[i] = &dims[i][0]
-			}
-		}
-		if len(value) > 0 {
-			C.TF_SetAttrShapeList(cdesc, cAttrName, &dimsp[0], &ndims[0], C.int(len(value)))
-		} else {
+		if len(value) == 0 {
 			C.TF_SetAttrShapeList(cdesc, cAttrName, nil, nil, 0)
+		} else {
+			var flatDims []C.int64_t
+			ndims := make([]C.int, len(value))
+			for i, s := range value {
+				nd := s.NumDimensions()
+				ndims[i] = C.int(nd)
+				for _, d := range s.dims {
+					flatDims = append(flatDims, C.int64_t(d))
+				}
+			}
+			var flatDimsp *C.int64_t
+			if len(flatDims) > 0 {
+				flatDimsp = &flatDims[0]
+			}
+			C.TF_SetAttrShapeList_Helper(cdesc, cAttrName, flatDimsp, &ndims[0], C.int(len(value)))
 		}
 	default:
 		return fmt.Errorf("attribute %q has a type (%T) which is not valid for operation attributes", name, value)
 	}
 	return nil
 }
-
-func cshape(s Shape) (C.int, []C.int64_t) {
-	ndims := C.int(s.NumDimensions())
-	if ndims < 0 {
-		return -1, nil
-	}
-	dims := make([]C.int64_t, ndims)
-	for i, s := range s.dims {
-		dims[i] = C.int64_t(s)
-	}
-	return ndims, dims
-}
diff --git a/tensorflow/go/graph_test.go b/tensorflow/go/graph_test.go
index c3120bc720308402b22884f29b7ff87ef035874b..b8d65c54f697153ad236f5e27d9f27d048c3a22e 100644
--- a/tensorflow/go/graph_test.go
+++ b/tensorflow/go/graph_test.go
@@ -29,10 +29,26 @@ func hasOperations(g *Graph, ops ...string) error {
 			missing = append(missing, op)
 		}
 	}
-	if len(missing) == 0 {
-		return nil
+	if len(missing) != 0 {
+		return fmt.Errorf("Graph does not have the operations %v", missing)
 	}
-	return fmt.Errorf("Graph does not have the operations %v", missing)
+
+	inList := map[string]bool{}
+	for _, op := range g.Operations() {
+		inList[op.Name()] = true
+	}
+
+	for _, op := range ops {
+		if !inList[op] {
+			missing = append(missing, op)
+		}
+	}
+
+	if len(missing) != 0 {
+		return fmt.Errorf("Operations %v are missing from graph.Operations()", missing)
+	}
+
+	return nil
 }
 
 func TestGraphWriteToAndImport(t *testing.T) {
diff --git a/tensorflow/go/op/generate.go b/tensorflow/go/op/generate.go
index 17ece1c7a2547ee872bf9b79c99f3ef1f9be1b2c..e5a9bea77091e438d572a2863216744b446095de 100644
--- a/tensorflow/go/op/generate.go
+++ b/tensorflow/go/op/generate.go
@@ -15,6 +15,6 @@ limitations under the License.
 */
 
 //go:generate go generate ../genop
-//go:generate go run ../genop/main.go -outfile wrappers.go
+//go:generate go run ../genop/main.go -outfile wrappers.go -api_def_dirs ../../core/api_def/base_api/
 
 package op
diff --git a/tensorflow/go/op/op_test.go b/tensorflow/go/op/op_test.go
index 2451ba360699a7ac24f64209339e7b4f92ffb548..842dee9ffe396c44cfa26bbc7fd34a598e62bf89 100644
--- a/tensorflow/go/op/op_test.go
+++ b/tensorflow/go/op/op_test.go
@@ -58,3 +58,76 @@ func TestAddOperationFailure(t *testing.T) {
 	_ = resize.Shape()
 	t.Errorf("resize.Shape() should have paniced since the underlying Operation was not created")
 }
+
+func TestShapeAttribute(t *testing.T) {
+	s := NewScope()
+	x := Placeholder(s.SubScope("x"), tf.Int32, PlaceholderShape(tf.MakeShape(1)))
+	y := Placeholder(s.SubScope("y"), tf.Int32, PlaceholderShape(tf.Shape{}))
+	z := Add(s, x, y)
+	graph, err := s.Finalize()
+	if err != nil {
+		t.Fatal(err)
+	}
+	sess, err := tf.NewSession(graph, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	value, err := tf.NewTensor([]int32{7})
+	if err != nil {
+		t.Fatal(err)
+	}
+	feeds := map[tf.Output]*tf.Tensor{
+		x: value,
+		y: value,
+	}
+	fetched, err := sess.Run(feeds, []tf.Output{z}, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if got, want := len(fetched), 1; got != want {
+		t.Fatalf("Fetched %d tensors, expected %d", got, want)
+	}
+	if got, want := fetched[0].Value().([]int32), []int32{14}; len(got) != len(want) || len(got) != 1 || got[0] != want[0] {
+		t.Fatalf("Got %v, want %v", got, want)
+	}
+}
+
+func TestDataset(t *testing.T) {
+	var (
+		s = NewScope()
+
+		// The use of a non-scalar here is inspired by
+		// https://github.com/tensorflow/tensorflow/issues/14891
+		c       = Const(s, []int32{21718, 31415})
+		types   = []tf.DataType{c.DataType()}
+		shapes  = []tf.Shape{c.Shape()}
+		dataset = TensorDataset(s, []tf.Output{c}, shapes)
+
+		iterator = Iterator(s, "", "", types, shapes)
+		next     = IteratorGetNext(s, iterator, types, shapes)
+		init     = MakeIterator(s, dataset, iterator)
+	)
+	graph, err := s.Finalize()
+	if err != nil {
+		t.Fatal(err)
+	}
+	sess, err := tf.NewSession(graph, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if _, err := sess.Run(nil, nil, []*tf.Operation{init}); err != nil {
+		t.Fatal(err)
+	}
+	results, err := sess.Run(nil, next, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	got := results[0].Value().([]int32)
+	if len(got) != 2 || got[0] != 21718 || got[1] != 31415 {
+		t.Errorf("Got %v, want {21718, 31415}", got)
+	}
+	if _, err := sess.Run(nil, next, nil); err == nil {
+		t.Errorf("Expected sess.Run() to fail since the iterator should have reached the end of the dataset")
+	}
+}
diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 664e37d3a15ef250e3ef90b3201504c108c5c55b..7bcc55959cd6822fdcd52ad00d12f29fb17c33ef 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -38,208 +38,252 @@ func makeOutputList(op *tf.Operation, start int, output string) ([]tf.Output, in
 	return list, start + size, nil
 }
 
-// Writes a `Summary` protocol buffer with scalar values.
-//
-// The input `tag` and `value` must have the scalars.
+// WriteImageSummaryAttr is an optional argument to WriteImageSummary.
+type WriteImageSummaryAttr func(optionalAttr)
+
+// WriteImageSummaryMaxImages sets the optional max_images attribute to value.
 //
-// Arguments:
-//	writer: A handle to a summary writer.
-//	step: The step to write the summary for.
-//	tag: Tag for the summary.
-//	value: Value for the summary.
+// value: Max number of batch elements to generate images for.
+// If not specified, defaults to 3
 //
-// Returns the created operation.
-func WriteScalarSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, value tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "WriteScalarSummary",
-		Input: []tf.Input{
-			writer, step, tag, value,
-		},
+// REQUIRES: value >= 1
+func WriteImageSummaryMaxImages(value int64) WriteImageSummaryAttr {
+	return func(m optionalAttr) {
+		m["max_images"] = value
 	}
-	return scope.AddOperation(opspec)
 }
 
-// Outputs a `tf.Event` protocol buffer.
+// Writes a `Summary` protocol buffer with images.
 //
-// When CreateSummaryDbWriter is being used, this op can be useful for
-// importing data from event logs.
+// The summary has up to `max_images` summary values containing images. The
+// images are built from `tensor` which must be 4-D with shape `[batch_size,
+// height, width, channels]` and where `channels` can be:
 //
-// Arguments:
-//	writer: A handle to a summary writer.
-//	event: A string containing a binary-encoded tf.Event proto.
+// *  1: `tensor` is interpreted as Grayscale.
+// *  3: `tensor` is interpreted as RGB.
+// *  4: `tensor` is interpreted as RGBA.
 //
-// Returns the created operation.
-func ImportEvent(scope *Scope, writer tf.Output, event tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ImportEvent",
-		Input: []tf.Input{
-			writer, event,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Outputs a `Summary` protocol buffer with a tensor.
+// The images have the same number of channels as the input tensor. For float
+// input, the values are normalized one image at a time to fit in the range
+// `[0, 255]`.  `uint8` values are unchanged.  The op uses two different
+// normalization algorithms:
+//
+// *  If the input values are all positive, they are rescaled so the largest one
+//    is 255.
+//
+// *  If any input value is negative, the values are shifted so input value 0.0
+//    is at 127.  They are then rescaled so that either the smallest value is 0,
+//    or the largest one is 255.
+//
+// The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
+// build the `tag` of the summary values:
+//
+// *  If `max_images` is 1, the summary value tag is '*tag*/image'.
+// *  If `max_images` is greater than 1, the summary value tags are
+//    generated sequentially as '*tag*/image/0', '*tag*/image/1', etc.
+//
+// The `bad_color` argument is the color to use in the generated images for
+// non-finite input values.  It is a `unit8` 1-D tensor of length `channels`.
+// Each element must be in the range `[0, 255]` (It represents the value of a
+// pixel in the output image).  Non-finite values in the input tensor are
+// replaced by this tensor in the output image.  The default value is the color
+// red.
 //
 // Arguments:
 //	writer: A handle to a summary writer.
 //	step: The step to write the summary for.
-//	tensor: A tensor to serialize.
-//	tag: The summary's tag.
-//	summary_metadata: Serialized SummaryMetadata protocol buffer containing
-// plugin-related metadata for this summary.
+//	tag: Scalar. Used to build the `tag` attribute of the summary values.
+//	tensor: 4-D of shape `[batch_size, height, width, channels]` where
+// `channels` is 1, 3, or 4.
+//	bad_color: Color to use for pixels with non-finite values.
 //
 // Returns the created operation.
-func WriteSummary(scope *Scope, writer tf.Output, step tf.Output, tensor tf.Output, tag tf.Output, summary_metadata tf.Output) (o *tf.Operation) {
+func WriteImageSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, tensor tf.Output, bad_color tf.Output, optional ...WriteImageSummaryAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "WriteSummary",
+		Type: "WriteImageSummary",
 		Input: []tf.Input{
-			writer, step, tensor, tag, summary_metadata,
+			writer, step, tag, tensor, bad_color,
 		},
+		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
-// Flushes and closes the summary writer.
+// Partitions `data` into `num_partitions` tensors using indices from `partitions`.
 //
-// Also removes it from the resource manager. To reopen, use another
-// CreateSummaryFileWriter op.
+// For each index tuple `js` of size `partitions.ndim`, the slice `data[js, ...]`
+// becomes part of `outputs[partitions[js]]`.  The slices with `partitions[js] = i`
+// are placed in `outputs[i]` in lexicographic order of `js`, and the first
+// dimension of `outputs[i]` is the number of entries in `partitions` equal to `i`.
+// In detail,
+//
+// ```python
+//     outputs[i].shape = [sum(partitions == i)] + data.shape[partitions.ndim:]
+//
+//     outputs[i] = pack([data[js, ...] for js if partitions[js] == i])
+// ```
+//
+// `data.shape` must start with `partitions.shape`.
+//
+// For example:
+//
+// ```python
+//     # Scalar partitions.
+//     partitions = 1
+//     num_partitions = 2
+//     data = [10, 20]
+//     outputs[0] = []  # Empty with shape [0, 2]
+//     outputs[1] = [[10, 20]]
+//
+//     # Vector partitions.
+//     partitions = [0, 0, 1, 1, 0]
+//     num_partitions = 2
+//     data = [10, 20, 30, 40, 50]
+//     outputs[0] = [10, 20, 50]
+//     outputs[1] = [30, 40]
+// ```
+//
+// See `dynamic_stitch` for an example on how to merge partitions back.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/DynamicPartition.png" alt>
+// </div>
 //
 // Arguments:
-//	writer: A handle to the summary writer resource.
 //
-// Returns the created operation.
-func CloseSummaryWriter(scope *Scope, writer tf.Output) (o *tf.Operation) {
+//	partitions: Any shape.  Indices in the range `[0, num_partitions)`.
+//	num_partitions: The number of partitions to output.
+func DynamicPartition(scope *Scope, data tf.Output, partitions tf.Output, num_partitions int64) (outputs []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"num_partitions": num_partitions}
 	opspec := tf.OpSpec{
-		Type: "CloseSummaryWriter",
+		Type: "DynamicPartition",
 		Input: []tf.Input{
-			writer,
+			data, partitions,
 		},
+		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
-}
-
-// Flushes the writer's unwritten events.
-//
-// Arguments:
-//	writer: A handle to the summary writer resource.
-//
-// Returns the created operation.
-func FlushSummaryWriter(scope *Scope, writer tf.Output) (o *tf.Operation) {
+	op := scope.AddOperation(opspec)
 	if scope.Err() != nil {
 		return
 	}
-	opspec := tf.OpSpec{
-		Type: "FlushSummaryWriter",
-		Input: []tf.Input{
-			writer,
-		},
+	var idx int
+	var err error
+	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
+		scope.UpdateErr("DynamicPartition", err)
+		return
 	}
-	return scope.AddOperation(opspec)
+	return outputs
 }
 
-// FakeQuantWithMinMaxVarsPerChannelGradientAttr is an optional argument to FakeQuantWithMinMaxVarsPerChannelGradient.
-type FakeQuantWithMinMaxVarsPerChannelGradientAttr func(optionalAttr)
+// MutableHashTableOfTensorsV2Attr is an optional argument to MutableHashTableOfTensorsV2.
+type MutableHashTableOfTensorsV2Attr func(optionalAttr)
 
-// FakeQuantWithMinMaxVarsPerChannelGradientNumBits sets the optional num_bits attribute to value.
+// MutableHashTableOfTensorsV2Container sets the optional container attribute to value.
 //
-// value: The bitwidth of the quantization; between 2 and 8, inclusive.
-// If not specified, defaults to 8
-func FakeQuantWithMinMaxVarsPerChannelGradientNumBits(value int64) FakeQuantWithMinMaxVarsPerChannelGradientAttr {
+// value: If non-empty, this table is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func MutableHashTableOfTensorsV2Container(value string) MutableHashTableOfTensorsV2Attr {
 	return func(m optionalAttr) {
-		m["num_bits"] = value
+		m["container"] = value
 	}
 }
 
-// FakeQuantWithMinMaxVarsPerChannelGradientNarrowRange sets the optional narrow_range attribute to value.
+// MutableHashTableOfTensorsV2SharedName sets the optional shared_name attribute to value.
 //
-// value: Whether to quantize into 2^num_bits - 1 distinct values.
+// value: If non-empty, this table is shared under the given name across
+// multiple sessions.
+// If not specified, defaults to ""
+func MutableHashTableOfTensorsV2SharedName(value string) MutableHashTableOfTensorsV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// MutableHashTableOfTensorsV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value.
 // If not specified, defaults to false
-func FakeQuantWithMinMaxVarsPerChannelGradientNarrowRange(value bool) FakeQuantWithMinMaxVarsPerChannelGradientAttr {
+func MutableHashTableOfTensorsV2UseNodeNameSharing(value bool) MutableHashTableOfTensorsV2Attr {
 	return func(m optionalAttr) {
-		m["narrow_range"] = value
+		m["use_node_name_sharing"] = value
 	}
 }
 
-// Compute gradients for a FakeQuantWithMinMaxVarsPerChannel operation.
-//
-// Arguments:
-//	gradients: Backpropagated gradients above the FakeQuantWithMinMaxVars operation,
-// shape one of: `[d]`, `[b, d]`,  `[b, h, w, d]`.
-//	inputs: Values passed as inputs to the FakeQuantWithMinMaxVars operation, shape
-//   same as `gradients`.
-// min, max: Quantization interval, floats of shape `[d]`.
+// MutableHashTableOfTensorsV2ValueShape sets the optional value_shape attribute to value.
+// If not specified, defaults to <>
+func MutableHashTableOfTensorsV2ValueShape(value tf.Shape) MutableHashTableOfTensorsV2Attr {
+	return func(m optionalAttr) {
+		m["value_shape"] = value
+	}
+}
+
+// Creates an empty hash table.
 //
+// This op creates a mutable hash table, specifying the type of its keys and
+// values. Each value must be a vector. Data can be inserted into the table using
+// the insert operations. It does not support the initialization operation.
 //
+// Arguments:
+//	key_dtype: Type of the table keys.
+//	value_dtype: Type of the table values.
 //
-// Returns Backpropagated gradients w.r.t. inputs, shape same as
-// `inputs`:
-//   `gradients * (inputs >= min && inputs <= max)`.Backpropagated gradients w.r.t. min parameter, shape `[d]`:
-// `sum_per_d(gradients * (inputs < min))`.Backpropagated gradients w.r.t. max parameter, shape `[d]`:
-// `sum_per_d(gradients * (inputs > max))`.
-func FakeQuantWithMinMaxVarsPerChannelGradient(scope *Scope, gradients tf.Output, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsPerChannelGradientAttr) (backprops_wrt_input tf.Output, backprop_wrt_min tf.Output, backprop_wrt_max tf.Output) {
+// Returns Handle to a table.
+func MutableHashTableOfTensorsV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...MutableHashTableOfTensorsV2Attr) (table_handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "FakeQuantWithMinMaxVarsPerChannelGradient",
-		Input: []tf.Input{
-			gradients, inputs, min, max,
-		},
+		Type: "MutableHashTableOfTensorsV2",
+
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// FakeQuantWithMinMaxVarsAttr is an optional argument to FakeQuantWithMinMaxVars.
-type FakeQuantWithMinMaxVarsAttr func(optionalAttr)
+// ResourceApplyProximalAdagradAttr is an optional argument to ResourceApplyProximalAdagrad.
+type ResourceApplyProximalAdagradAttr func(optionalAttr)
 
-// FakeQuantWithMinMaxVarsNumBits sets the optional num_bits attribute to value.
-// If not specified, defaults to 8
-func FakeQuantWithMinMaxVarsNumBits(value int64) FakeQuantWithMinMaxVarsAttr {
-	return func(m optionalAttr) {
-		m["num_bits"] = value
-	}
-}
-
-// FakeQuantWithMinMaxVarsNarrowRange sets the optional narrow_range attribute to value.
+// ResourceApplyProximalAdagradUseLocking sets the optional use_locking attribute to value.
+//
+// value: If True, updating of the var and accum tensors will be protected by
+// a lock; otherwise the behavior is undefined, but may exhibit less contention.
 // If not specified, defaults to false
-func FakeQuantWithMinMaxVarsNarrowRange(value bool) FakeQuantWithMinMaxVarsAttr {
+func ResourceApplyProximalAdagradUseLocking(value bool) ResourceApplyProximalAdagradAttr {
 	return func(m optionalAttr) {
-		m["narrow_range"] = value
+		m["use_locking"] = value
 	}
 }
 
-// Fake-quantize the 'inputs' tensor of type float via global float scalars `min`
+// Update '*var' and '*accum' according to FOBOS with Adagrad learning rate.
 //
-// and `max` to 'outputs' tensor of same shape as `inputs`.
+// accum += grad * grad
+// prox_v = var - lr * grad * (1 / sqrt(accum))
+// var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
 //
-// `[min; max]` define the clamping range for the `inputs` data.
-// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`
-// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and
-// then de-quantized and output as floats in `[min; max]` interval.
-// `num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive.
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	l1: L1 regularization. Must be a scalar.
+//	l2: L2 regularization. Must be a scalar.
+//	grad: The gradient.
 //
-// This operation has a gradient and thus allows for training `min` and `max`
-// values.
-func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsAttr) (outputs tf.Output) {
+// Returns the created operation.
+func ResourceApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, optional ...ResourceApplyProximalAdagradAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -248,907 +292,499 @@ func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "FakeQuantWithMinMaxVars",
+		Type: "ResourceApplyProximalAdagrad",
 		Input: []tf.Input{
-			inputs, min, max,
+			var_, accum, lr, l1, l2, grad,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// QuantizedInstanceNormAttr is an optional argument to QuantizedInstanceNorm.
-type QuantizedInstanceNormAttr func(optionalAttr)
-
-// QuantizedInstanceNormOutputRangeGiven sets the optional output_range_given attribute to value.
-//
-// value: If True, `given_y_min` and `given_y_min`
-// and `given_y_max` are used as the output range. Otherwise,
-// the implementation computes the output range.
-// If not specified, defaults to false
-func QuantizedInstanceNormOutputRangeGiven(value bool) QuantizedInstanceNormAttr {
-	return func(m optionalAttr) {
-		m["output_range_given"] = value
-	}
-}
+// MutableHashTableV2Attr is an optional argument to MutableHashTableV2.
+type MutableHashTableV2Attr func(optionalAttr)
 
-// QuantizedInstanceNormGivenYMin sets the optional given_y_min attribute to value.
+// MutableHashTableV2Container sets the optional container attribute to value.
 //
-// value: Output in `y_min` if `output_range_given` is True.
-// If not specified, defaults to 0
-func QuantizedInstanceNormGivenYMin(value float32) QuantizedInstanceNormAttr {
+// value: If non-empty, this table is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func MutableHashTableV2Container(value string) MutableHashTableV2Attr {
 	return func(m optionalAttr) {
-		m["given_y_min"] = value
+		m["container"] = value
 	}
 }
 
-// QuantizedInstanceNormGivenYMax sets the optional given_y_max attribute to value.
+// MutableHashTableV2SharedName sets the optional shared_name attribute to value.
 //
-// value: Output in `y_max` if `output_range_given` is True.
-// If not specified, defaults to 0
-func QuantizedInstanceNormGivenYMax(value float32) QuantizedInstanceNormAttr {
+// value: If non-empty, this table is shared under the given name across
+// multiple sessions.
+// If not specified, defaults to ""
+func MutableHashTableV2SharedName(value string) MutableHashTableV2Attr {
 	return func(m optionalAttr) {
-		m["given_y_max"] = value
+		m["shared_name"] = value
 	}
 }
 
-// QuantizedInstanceNormVarianceEpsilon sets the optional variance_epsilon attribute to value.
+// MutableHashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value.
 //
-// value: A small float number to avoid dividing by 0.
-// If not specified, defaults to 1e-05
-func QuantizedInstanceNormVarianceEpsilon(value float32) QuantizedInstanceNormAttr {
+// value: If true and shared_name is empty, the table is shared
+// using the node name.
+// If not specified, defaults to false
+func MutableHashTableV2UseNodeNameSharing(value bool) MutableHashTableV2Attr {
 	return func(m optionalAttr) {
-		m["variance_epsilon"] = value
+		m["use_node_name_sharing"] = value
 	}
 }
 
-// QuantizedInstanceNormMinSeparation sets the optional min_separation attribute to value.
+// Creates an empty hash table.
 //
-// value: Minimum value of `y_max - y_min`
-// If not specified, defaults to 0.001
-func QuantizedInstanceNormMinSeparation(value float32) QuantizedInstanceNormAttr {
-	return func(m optionalAttr) {
-		m["min_separation"] = value
-	}
-}
-
-// Quantized Instance normalization.
+// This op creates a mutable hash table, specifying the type of its keys and
+// values. Each value must be a scalar. Data can be inserted into the table using
+// the insert operations. It does not support the initialization operation.
 //
 // Arguments:
-//	x: A 4D input Tensor.
-//	x_min: The value represented by the lowest quantized input.
-//	x_max: The value represented by the highest quantized input.
+//	key_dtype: Type of the table keys.
+//	value_dtype: Type of the table values.
 //
-// Returns A 4D Tensor.The value represented by the lowest quantized output.The value represented by the highest quantized output.
-func QuantizedInstanceNorm(scope *Scope, x tf.Output, x_min tf.Output, x_max tf.Output, optional ...QuantizedInstanceNormAttr) (y tf.Output, y_min tf.Output, y_max tf.Output) {
+// Returns Handle to a table.
+func MutableHashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...MutableHashTableV2Attr) (table_handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "QuantizedInstanceNorm",
-		Input: []tf.Input{
-			x, x_min, x_max,
-		},
+		Type: "MutableHashTableV2",
+
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// QuantizeAndDequantizeAttr is an optional argument to QuantizeAndDequantize.
-type QuantizeAndDequantizeAttr func(optionalAttr)
-
-// QuantizeAndDequantizeSignedInput sets the optional signed_input attribute to value.
-// If not specified, defaults to true
-func QuantizeAndDequantizeSignedInput(value bool) QuantizeAndDequantizeAttr {
-	return func(m optionalAttr) {
-		m["signed_input"] = value
-	}
-}
+// MapUnstageNoKeyAttr is an optional argument to MapUnstageNoKey.
+type MapUnstageNoKeyAttr func(optionalAttr)
 
-// QuantizeAndDequantizeNumBits sets the optional num_bits attribute to value.
-// If not specified, defaults to 8
-func QuantizeAndDequantizeNumBits(value int64) QuantizeAndDequantizeAttr {
+// MapUnstageNoKeyCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func MapUnstageNoKeyCapacity(value int64) MapUnstageNoKeyAttr {
 	return func(m optionalAttr) {
-		m["num_bits"] = value
+		m["capacity"] = value
 	}
 }
 
-// QuantizeAndDequantizeRangeGiven sets the optional range_given attribute to value.
-// If not specified, defaults to false
-func QuantizeAndDequantizeRangeGiven(value bool) QuantizeAndDequantizeAttr {
+// MapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func MapUnstageNoKeyMemoryLimit(value int64) MapUnstageNoKeyAttr {
 	return func(m optionalAttr) {
-		m["range_given"] = value
+		m["memory_limit"] = value
 	}
 }
 
-// QuantizeAndDequantizeInputMin sets the optional input_min attribute to value.
-// If not specified, defaults to 0
-func QuantizeAndDequantizeInputMin(value float32) QuantizeAndDequantizeAttr {
+// MapUnstageNoKeyContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func MapUnstageNoKeyContainer(value string) MapUnstageNoKeyAttr {
 	return func(m optionalAttr) {
-		m["input_min"] = value
+		m["container"] = value
 	}
 }
 
-// QuantizeAndDequantizeInputMax sets the optional input_max attribute to value.
-// If not specified, defaults to 0
-func QuantizeAndDequantizeInputMax(value float32) QuantizeAndDequantizeAttr {
+// MapUnstageNoKeySharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func MapUnstageNoKeySharedName(value string) MapUnstageNoKeyAttr {
 	return func(m optionalAttr) {
-		m["input_max"] = value
+		m["shared_name"] = value
 	}
 }
 
-// Use QuantizeAndDequantizeV2 instead.
+// Op removes and returns a random (key, value)
 //
-// DEPRECATED at GraphDef version 22: Replaced by QuantizeAndDequantizeV2
-func QuantizeAndDequantize(scope *Scope, input tf.Output, optional ...QuantizeAndDequantizeAttr) (output tf.Output) {
+// from the underlying container.   If the underlying container
+// does not contain elements, the op will block until it does.
+func MapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...MapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"dtypes": dtypes}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "QuantizeAndDequantize",
+		Type: "MapUnstageNoKey",
 		Input: []tf.Input{
-			input,
+			indices,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	key = op.Output(idx)
+	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
+		scope.UpdateErr("MapUnstageNoKey", err)
+		return
+	}
+	return key, values
 }
 
-// OneHotAttr is an optional argument to OneHot.
-type OneHotAttr func(optionalAttr)
+// HashTableV2Attr is an optional argument to HashTableV2.
+type HashTableV2Attr func(optionalAttr)
 
-// OneHotAxis sets the optional axis attribute to value.
+// HashTableV2Container sets the optional container attribute to value.
 //
-// value: The axis to fill (default: -1, a new inner-most axis).
-// If not specified, defaults to -1
-func OneHotAxis(value int64) OneHotAttr {
+// value: If non-empty, this table is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func HashTableV2Container(value string) HashTableV2Attr {
 	return func(m optionalAttr) {
-		m["axis"] = value
+		m["container"] = value
 	}
 }
 
-// Returns a one-hot tensor.
+// HashTableV2SharedName sets the optional shared_name attribute to value.
 //
-// The locations represented by indices in `indices` take value `on_value`,
-// while all other locations take value `off_value`.
+// value: If non-empty, this table is shared under the given name across
+// multiple sessions.
+// If not specified, defaults to ""
+func HashTableV2SharedName(value string) HashTableV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// HashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value.
 //
-// If the input `indices` is rank `N`, the output will have rank `N+1`,
-// The new axis is created at dimension `axis` (default: the new axis is
-// appended at the end).
+// value: If true and shared_name is empty, the table is shared
+// using the node name.
+// If not specified, defaults to false
+func HashTableV2UseNodeNameSharing(value bool) HashTableV2Attr {
+	return func(m optionalAttr) {
+		m["use_node_name_sharing"] = value
+	}
+}
+
+// Creates a non-initialized hash table.
 //
-// If `indices` is a scalar the output shape will be a vector of length `depth`.
+// This op creates a hash table, specifying the type of its keys and values.
+// Before using the table you will have to initialize it.  After initialization the
+// table will be immutable.
 //
-// If `indices` is a vector of length `features`, the output shape will be:
-// ```
-//   features x depth if axis == -1
-//   depth x features if axis == 0
-// ```
+// Arguments:
+//	key_dtype: Type of the table keys.
+//	value_dtype: Type of the table values.
 //
-// If `indices` is a matrix (batch) with shape `[batch, features]`,
-// the output shape will be:
-// ```
-//   batch x features x depth if axis == -1
-//   batch x depth x features if axis == 1
-//   depth x batch x features if axis == 0
-// ```
-//
-//
-// Examples
-// =========
-//
-// Suppose that
-//
-// ```
-//   indices = [0, 2, -1, 1]
-//   depth = 3
-//   on_value = 5.0
-//   off_value = 0.0
-//   axis = -1
-// ```
-//
-// Then output is `[4 x 3]`:
-//
-//     ```output =
-//       [5.0 0.0 0.0]  // one_hot(0)
-//       [0.0 0.0 5.0]  // one_hot(2)
-//       [0.0 0.0 0.0]  // one_hot(-1)
-//       [0.0 5.0 0.0]  // one_hot(1)
-//     ```
-//
-// Suppose that
-//
-// ```
-//   indices = [0, 2, -1, 1]
-//   depth = 3
-//   on_value = 0.0
-//   off_value = 3.0
-//   axis = 0
-// ```
-//
-// Then output is `[3 x 4]`:
-//
-//     ```output =
-//       [0.0 3.0 3.0 3.0]
-//       [3.0 3.0 3.0 0.0]
-//       [3.0 3.0 3.0 3.0]
-//       [3.0 0.0 3.0 3.0]
-//     //  ^                one_hot(0)
-//     //      ^            one_hot(2)
-//     //          ^        one_hot(-1)
-//     //              ^    one_hot(1)
-//     ```
-// Suppose that
-//
-// ```
-//   indices = [[0, 2], [1, -1]]
-//   depth = 3
-//   on_value = 1.0
-//   off_value = 0.0
-//   axis = -1
-// ```
-//
-// Then output is `[2 x 2 x 3]`:
-//
-//     ```output =
-//       [
-//         [1.0, 0.0, 0.0]  // one_hot(0)
-//         [0.0, 0.0, 1.0]  // one_hot(2)
-//       ][
-//         [0.0, 1.0, 0.0]  // one_hot(1)
-//         [0.0, 0.0, 0.0]  // one_hot(-1)
-//       ]```
-//
-// Arguments:
-//	indices: A tensor of indices.
-//	depth: A scalar defining the depth of the one hot dimension.
-//	on_value: A scalar defining the value to fill in output when `indices[j] = i`.
-//	off_value: A scalar defining the value to fill in output when `indices[j] != i`.
-//
-// Returns The one-hot tensor.
-func OneHot(scope *Scope, indices tf.Output, depth tf.Output, on_value tf.Output, off_value tf.Output, optional ...OneHotAttr) (output tf.Output) {
+// Returns Handle to a table.
+func HashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...HashTableV2Attr) (table_handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "OneHot",
-		Input: []tf.Input{
-			indices, depth, on_value, off_value,
-		},
+		Type: "HashTableV2",
+
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Bitcasts a tensor from one type to another without copying data.
+// Replaces the contents of the table with the specified keys and values.
 //
-// Given a tensor `input`, this operation returns a tensor that has the same buffer
-// data as `input` with datatype `type`.
+// The tensor `keys` must be of the same type as the keys of the table.
+// The tensor `values` must be of the type of the table values.
 //
-// If the input datatype `T` is larger than the output datatype `type` then the
-// shape changes from [...] to [..., sizeof(`T`)/sizeof(`type`)].
+// Arguments:
+//	table_handle: Handle to the table.
+//	keys: Any shape.  Keys to look up.
+//	values: Values to associate with keys.
 //
-// If `T` is smaller than `type`, the operator requires that the rightmost
-// dimension be equal to sizeof(`type`)/sizeof(`T`). The shape then goes from
-// [..., sizeof(`type`)/sizeof(`T`)] to [...].
+// Returns the created operation.
+func LookupTableImportV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "LookupTableImportV2",
+		Input: []tf.Input{
+			table_handle, keys, values,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Returns (x - y)(x - y) element-wise.
 //
-// *NOTE*: Bitcast is implemented as a low-level cast, so machines with different
-// endian orderings will give different results.
-func Bitcast(scope *Scope, input tf.Output, type_ tf.DataType) (output tf.Output) {
+// *NOTE*: `SquaredDifference` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func SquaredDifference(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"type": type_}
 	opspec := tf.OpSpec{
-		Type: "Bitcast",
+		Type: "SquaredDifference",
 		Input: []tf.Input{
-			input,
+			x, y,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Extract `patches` from `images` and put them in the "depth" output dimension.
-//
-// Arguments:
-//	images: 4-D Tensor with shape `[batch, in_rows, in_cols, depth]`.
-//	ksizes: The size of the sliding window for each dimension of `images`.
-//	strides: 1-D of length 4. How far the centers of two consecutive patches are in
-// the images. Must be: `[1, stride_rows, stride_cols, 1]`.
-//	rates: 1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the
-// input stride, specifying how far two consecutive patch samples are in the
-// input. Equivalent to extracting patches with
-// `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by
-// subsampling them spatially by a factor of `rates`. This is equivalent to
-// `rate` in dilated (a.k.a. Atrous) convolutions.
-//	padding: The type of padding algorithm to use.
+// Forwards the input to the output.
 //
-// We specify the size-related attributes as:
+// This operator represents the loop termination condition used by the
+// "pivot" switches of a loop.
 //
-// ```python
-//       ksizes = [1, ksize_rows, ksize_cols, 1]
-//       strides = [1, strides_rows, strides_cols, 1]
-//       rates = [1, rates_rows, rates_cols, 1]
-// ```
+// Arguments:
+//	input: A boolean scalar, representing the branch predicate of the Switch op.
 //
-// Returns 4-D Tensor with shape `[batch, out_rows, out_cols, ksize_rows *
-// ksize_cols * depth]` containing image patches with size
-// `ksize_rows x ksize_cols x depth` vectorized in the "depth" dimension. Note
-// `out_rows` and `out_cols` are the dimensions of the output patches.
-func ExtractImagePatches(scope *Scope, images tf.Output, ksizes []int64, strides []int64, rates []int64, padding string) (patches tf.Output) {
+// Returns The same tensor as `input`.
+func LoopCond(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"ksizes": ksizes, "strides": strides, "rates": rates, "padding": padding}
 	opspec := tf.OpSpec{
-		Type: "ExtractImagePatches",
+		Type: "LoopCond",
 		Input: []tf.Input{
-			images,
+			input,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// BatchToSpace for N-D tensors of type T.
-//
-// This operation reshapes the "batch" dimension 0 into `M + 1` dimensions of shape
-// `block_shape + [batch]`, interleaves these blocks back into the grid defined by
-// the spatial dimensions `[1, ..., M]`, to obtain a result with the same rank as
-// the input.  The spatial dimensions of this intermediate result are then
-// optionally cropped according to `crops` to produce the output.  This is the
-// reverse of SpaceToBatch.  See below for a precise description.
+// QuantizedMulAttr is an optional argument to QuantizedMul.
+type QuantizedMulAttr func(optionalAttr)
+
+// QuantizedMulToutput sets the optional Toutput attribute to value.
+// If not specified, defaults to DT_QINT32
+func QuantizedMulToutput(value tf.DataType) QuantizedMulAttr {
+	return func(m optionalAttr) {
+		m["Toutput"] = value
+	}
+}
+
+// Returns x * y element-wise, working on quantized buffers.
 //
 // Arguments:
-//	input: N-D with shape `input_shape = [batch] + spatial_shape + remaining_shape`,
-// where spatial_shape has M dimensions.
-//	block_shape: 1-D with shape `[M]`, all values must be >= 1.
-//	crops: 2-D with shape `[M, 2]`, all values must be >= 0.
-//   `crops[i] = [crop_start, crop_end]` specifies the amount to crop from input
-//   dimension `i + 1`, which corresponds to spatial dimension `i`.  It is
-//   required that
-//   `crop_start[i] + crop_end[i] <= block_shape[i] * input_shape[i + 1]`.
 //
-// This operation is equivalent to the following steps:
 //
-// 1. Reshape `input` to `reshaped` of shape:
-//      [block_shape[0], ..., block_shape[M-1],
-//       batch / prod(block_shape),
-//       input_shape[1], ..., input_shape[N-1]]
+//	min_x: The float value that the lowest quantized `x` value represents.
+//	max_x: The float value that the highest quantized `x` value represents.
+//	min_y: The float value that the lowest quantized `y` value represents.
+//	max_y: The float value that the highest quantized `y` value represents.
 //
-// 2. Permute dimensions of `reshaped` to produce `permuted` of shape
-//      [batch / prod(block_shape),
+// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
 //
-//       input_shape[1], block_shape[0],
-//       ...,
-//       input_shape[M], block_shape[M-1],
+// *NOTE*: `QuantizedMul` supports limited forms of broadcasting. More about
+// broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func QuantizedMul(scope *Scope, x tf.Output, y tf.Output, min_x tf.Output, max_x tf.Output, min_y tf.Output, max_y tf.Output, optional ...QuantizedMulAttr) (z tf.Output, min_z tf.Output, max_z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizedMul",
+		Input: []tf.Input{
+			x, y, min_x, max_x, min_y, max_y,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// QuantizedMatMulAttr is an optional argument to QuantizedMatMul.
+type QuantizedMatMulAttr func(optionalAttr)
+
+// QuantizedMatMulToutput sets the optional Toutput attribute to value.
+// If not specified, defaults to DT_QINT32
+func QuantizedMatMulToutput(value tf.DataType) QuantizedMatMulAttr {
+	return func(m optionalAttr) {
+		m["Toutput"] = value
+	}
+}
+
+// QuantizedMatMulTransposeA sets the optional transpose_a attribute to value.
 //
-//       input_shape[M+1], ..., input_shape[N-1]]
+// value: If true, `a` is transposed before multiplication.
+// If not specified, defaults to false
+func QuantizedMatMulTransposeA(value bool) QuantizedMatMulAttr {
+	return func(m optionalAttr) {
+		m["transpose_a"] = value
+	}
+}
+
+// QuantizedMatMulTransposeB sets the optional transpose_b attribute to value.
 //
-// 3. Reshape `permuted` to produce `reshaped_permuted` of shape
-//      [batch / prod(block_shape),
-//
-//       input_shape[1] * block_shape[0],
-//       ...,
-//       input_shape[M] * block_shape[M-1],
-//
-//       input_shape[M+1],
-//       ...,
-//       input_shape[N-1]]
-//
-// 4. Crop the start and end of dimensions `[1, ..., M]` of
-//    `reshaped_permuted` according to `crops` to produce the output of shape:
-//      [batch / prod(block_shape),
-//
-//       input_shape[1] * block_shape[0] - crops[0,0] - crops[0,1],
-//       ...,
-//       input_shape[M] * block_shape[M-1] - crops[M-1,0] - crops[M-1,1],
-//
-//       input_shape[M+1], ..., input_shape[N-1]]
-//
-// Some examples:
-//
-// (1) For the following input of shape `[4, 1, 1, 1]`, `block_shape = [2, 2]`, and
-//     `crops = [[0, 0], [0, 0]]`:
-//
-// ```
-// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
-// ```
-//
-// The output tensor has shape `[1, 2, 2, 1]` and value:
-//
-// ```
-// x = [[[[1], [2]], [[3], [4]]]]
-// ```
-//
-// (2) For the following input of shape `[4, 1, 1, 3]`, `block_shape = [2, 2]`, and
-//     `crops = [[0, 0], [0, 0]]`:
-//
-// ```
-// [[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]
-// ```
-//
-// The output tensor has shape `[1, 2, 2, 3]` and value:
-//
-// ```
-// x = [[[[1, 2, 3], [4, 5, 6]],
-//       [[7, 8, 9], [10, 11, 12]]]]
-// ```
-//
-// (3) For the following input of shape `[4, 2, 2, 1]`, `block_shape = [2, 2]`, and
-//     `crops = [[0, 0], [0, 0]]`:
-//
-// ```
-// x = [[[[1], [3]], [[9], [11]]],
-//      [[[2], [4]], [[10], [12]]],
-//      [[[5], [7]], [[13], [15]]],
-//      [[[6], [8]], [[14], [16]]]]
-// ```
-//
-// The output tensor has shape `[1, 4, 4, 1]` and value:
-//
-// ```
-// x = [[[1],   [2],  [3],  [4]],
-//      [[5],   [6],  [7],  [8]],
-//      [[9],  [10], [11],  [12]],
-//      [[13], [14], [15],  [16]]]
-// ```
-//
-// (4) For the following input of shape `[8, 1, 3, 1]`, `block_shape = [2, 2]`, and
-//     `crops = [[0, 0], [2, 0]]`:
-//
-// ```
-// x = [[[[0], [1], [3]]], [[[0], [9], [11]]],
-//      [[[0], [2], [4]]], [[[0], [10], [12]]],
-//      [[[0], [5], [7]]], [[[0], [13], [15]]],
-//      [[[0], [6], [8]]], [[[0], [14], [16]]]]
-// ```
-//
-// The output tensor has shape `[2, 2, 4, 1]` and value:
-//
-// ```
-// x = [[[[1],   [2],  [3],  [4]],
-//       [[5],   [6],  [7],  [8]]],
-//      [[[9],  [10], [11],  [12]],
-//       [[13], [14], [15],  [16]]]]
-// ```
-func BatchToSpaceND(scope *Scope, input tf.Output, block_shape tf.Output, crops tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "BatchToSpaceND",
-		Input: []tf.Input{
-			input, block_shape, crops,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// SpaceToBatch for 4-D tensors of type T.
-//
-// This is a legacy version of the more general SpaceToBatchND.
-//
-// Zero-pads and then rearranges (permutes) blocks of spatial data into batch.
-// More specifically, this op outputs a copy of the input tensor where values from
-// the `height` and `width` dimensions are moved to the `batch` dimension. After
-// the zero-padding, both `height` and `width` of the input must be divisible by the
-// block size.
-//
-// Arguments:
-//	input: 4-D with shape `[batch, height, width, depth]`.
-//	paddings: 2-D tensor of non-negative integers with shape `[2, 2]`. It specifies
-//   the padding of the input with zeros across the spatial dimensions as follows:
-//
-//       paddings = [[pad_top, pad_bottom], [pad_left, pad_right]]
-//
-//   The effective spatial dimensions of the zero-padded input tensor will be:
-//
-//       height_pad = pad_top + height + pad_bottom
-//       width_pad = pad_left + width + pad_right
-//
-// The attr `block_size` must be greater than one. It indicates the block size.
-//
-//   * Non-overlapping blocks of size `block_size x block size` in the height and
-//     width dimensions are rearranged into the batch dimension at each location.
-//   * The batch of the output tensor is `batch * block_size * block_size`.
-//   * Both height_pad and width_pad must be divisible by block_size.
-//
-// The shape of the output will be:
-//
-//     [batch*block_size*block_size, height_pad/block_size, width_pad/block_size,
-//      depth]
-//
-// Some examples:
-//
-// (1) For the following input of shape `[1, 2, 2, 1]` and block_size of 2:
-//
-// ```
-// x = [[[[1], [2]], [[3], [4]]]]
-// ```
-//
-// The output tensor has shape `[4, 1, 1, 1]` and value:
-//
-// ```
-// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
-// ```
-//
-// (2) For the following input of shape `[1, 2, 2, 3]` and block_size of 2:
-//
-// ```
-// x = [[[[1, 2, 3], [4, 5, 6]],
-//       [[7, 8, 9], [10, 11, 12]]]]
-// ```
-//
-// The output tensor has shape `[4, 1, 1, 3]` and value:
-//
-// ```
-// [[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]
-// ```
-//
-// (3) For the following input of shape `[1, 4, 4, 1]` and block_size of 2:
-//
-// ```
-// x = [[[[1],   [2],  [3],  [4]],
-//       [[5],   [6],  [7],  [8]],
-//       [[9],  [10], [11],  [12]],
-//       [[13], [14], [15],  [16]]]]
-// ```
-//
-// The output tensor has shape `[4, 2, 2, 1]` and value:
-//
-// ```
-// x = [[[[1], [3]], [[9], [11]]],
-//      [[[2], [4]], [[10], [12]]],
-//      [[[5], [7]], [[13], [15]]],
-//      [[[6], [8]], [[14], [16]]]]
-// ```
-//
-// (4) For the following input of shape `[2, 2, 4, 1]` and block_size of 2:
-//
-// ```
-// x = [[[[1],   [2],  [3],  [4]],
-//       [[5],   [6],  [7],  [8]]],
-//      [[[9],  [10], [11],  [12]],
-//       [[13], [14], [15],  [16]]]]
-// ```
-//
-// The output tensor has shape `[8, 1, 2, 1]` and value:
-//
-// ```
-// x = [[[[1], [3]]], [[[9], [11]]], [[[2], [4]]], [[[10], [12]]],
-//      [[[5], [7]]], [[[13], [15]]], [[[6], [8]]], [[[14], [16]]]]
-// ```
-//
-// Among others, this operation is useful for reducing atrous convolution into
-// regular convolution.
-//
-func SpaceToBatch(scope *Scope, input tf.Output, paddings tf.Output, block_size int64) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"block_size": block_size}
-	opspec := tf.OpSpec{
-		Type: "SpaceToBatch",
-		Input: []tf.Input{
-			input, paddings,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// QuantizeAndDequantizeV2Attr is an optional argument to QuantizeAndDequantizeV2.
-type QuantizeAndDequantizeV2Attr func(optionalAttr)
-
-// QuantizeAndDequantizeV2SignedInput sets the optional signed_input attribute to value.
-//
-// value: If the quantization is signed or unsigned.
-// If not specified, defaults to true
-func QuantizeAndDequantizeV2SignedInput(value bool) QuantizeAndDequantizeV2Attr {
-	return func(m optionalAttr) {
-		m["signed_input"] = value
-	}
-}
-
-// QuantizeAndDequantizeV2NumBits sets the optional num_bits attribute to value.
-//
-// value: The bitwidth of the quantization.
-// If not specified, defaults to 8
-func QuantizeAndDequantizeV2NumBits(value int64) QuantizeAndDequantizeV2Attr {
-	return func(m optionalAttr) {
-		m["num_bits"] = value
-	}
-}
-
-// QuantizeAndDequantizeV2RangeGiven sets the optional range_given attribute to value.
-//
-// value: If the range is given or should be computed from the tensor.
+// value: If true, `b` is transposed before multiplication.
 // If not specified, defaults to false
-func QuantizeAndDequantizeV2RangeGiven(value bool) QuantizeAndDequantizeV2Attr {
+func QuantizedMatMulTransposeB(value bool) QuantizedMatMulAttr {
 	return func(m optionalAttr) {
-		m["range_given"] = value
+		m["transpose_b"] = value
 	}
 }
 
-// Quantizes then dequantizes a tensor.
-//
-// This op simulates the precision loss from the quantized forward pass by:
-// 1. Quantizing the tensor to fixed point numbers, which should match the target
-//    quantization method when it is used in inference.
-// 2. Dequantizing it back to floating point numbers for the following ops, most
-//    likely matmul.
-//
-// There are different ways to quantize. This version does not use the full range
-// of the output type, choosing to elide the lowest possible value for symmetry
-// (e.g., output range is -127 to 127, not -128 to 127 for signed 8 bit
-// quantization), so that 0.0 maps to 0.
-//
-// To perform this op, we first find the range of values in our tensor. The range
-// we use is always centered on 0, so we find m such that
-//
-// 1. m = max(abs(input_min), abs(input_max)) if range_given is true,
-// 2. m = max(abs(min_elem(input)), abs(max_elem(input))) otherwise.
-//
-// Our input tensor range is then [-m, m].
-//
-// Next, we choose our fixed-point quantization buckets, [min_fixed, max_fixed].
-// If signed_input is true, this is
-//
-//   [min_fixed, max_fixed ] =
-//       [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1].
-//
-// Otherwise, if signed_input is false, the fixed-point range is
-//
-//   [min_fixed, max_fixed] = [0, (1 << num_bits) - 1].
-//
-// From this we compute our scaling factor, s:
-//
-//   s = (max_fixed - min_fixed) / (2 * m).
-//
-// Now we can quantize and dequantize the elements of our tensor.  An element e
-// is transformed into e':
-//
-//   e' = (e * s).round_to_nearest() / s.
-//
-// Note that we have a different number of buckets in the signed vs. unsigned
-// cases.  For example, if num_bits == 8, we get 254 buckets in the signed case
-// vs. 255 in the unsigned case.
-//
-// For example, suppose num_bits = 8 and m = 1.  Then
-//
-//   [min_fixed, max_fixed] = [-127, 127], and
-//   s = (127 + 127) / 2 = 127.
-//
-// Given the vector {-1, -0.5, 0, 0.3}, this is quantized to
-// {-127, -63, 0, 38}, and dequantized to {-1, -63.0/127, 0, 38.0/127}.
+// QuantizedMatMulTactivation sets the optional Tactivation attribute to value.
 //
-// Arguments:
-//	input: Tensor to quantize and then dequantize.
-//	input_min: If range_given, this is the min of the range, otherwise this input
-// will be ignored.
-//	input_max: If range_given, this is the max of the range, otherwise this input
-// will be ignored.
-func QuantizeAndDequantizeV2(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, optional ...QuantizeAndDequantizeV2Attr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizeAndDequantizeV2",
-		Input: []tf.Input{
-			input, input_min, input_max,
-		},
-		Attrs: attrs,
+// value: The type of output produced by activation function
+// following this operation.
+// If not specified, defaults to DT_QUINT8
+func QuantizedMatMulTactivation(value tf.DataType) QuantizedMatMulAttr {
+	return func(m optionalAttr) {
+		m["Tactivation"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// SpaceToBatch for N-D tensors of type T.
+// Perform a quantized matrix multiplication of  `a` by the matrix `b`.
 //
-// This operation divides "spatial" dimensions `[1, ..., M]` of the input into a
-// grid of blocks of shape `block_shape`, and interleaves these blocks with the
-// "batch" dimension (0) such that in the output, the spatial dimensions
-// `[1, ..., M]` correspond to the position within the grid, and the batch
-// dimension combines both the position within a spatial block and the original
-// batch position.  Prior to division into blocks, the spatial dimensions of the
-// input are optionally zero padded according to `paddings`.  See below for a
-// precise description.
+// The inputs must be two-dimensional matrices and the inner dimension of
+// `a` (after being transposed if `transpose_a` is non-zero) must match the
+// outer dimension of `b` (after being transposed if `transposed_b` is
+// non-zero).
 //
 // Arguments:
-//	input: N-D with shape `input_shape = [batch] + spatial_shape + remaining_shape`,
-// where spatial_shape has `M` dimensions.
-//	block_shape: 1-D with shape `[M]`, all values must be >= 1.
-//	paddings: 2-D with shape `[M, 2]`, all values must be >= 0.
-//   `paddings[i] = [pad_start, pad_end]` specifies the padding for input dimension
-//   `i + 1`, which corresponds to spatial dimension `i`.  It is required that
-//   `block_shape[i]` divides `input_shape[i + 1] + pad_start + pad_end`.
-//
-// This operation is equivalent to the following steps:
-//
-// 1. Zero-pad the start and end of dimensions `[1, ..., M]` of the
-//    input according to `paddings` to produce `padded` of shape `padded_shape`.
-//
-// 2. Reshape `padded` to `reshaped_padded` of shape:
-//
-//      [batch] +
-//      [padded_shape[1] / block_shape[0],
-//        block_shape[0],
-//       ...,
-//       padded_shape[M] / block_shape[M-1],
-//       block_shape[M-1]] +
-//      remaining_shape
-//
-// 3. Permute dimensions of `reshaped_padded` to produce
-//    `permuted_reshaped_padded` of shape:
-//
-//      block_shape +
-//      [batch] +
-//      [padded_shape[1] / block_shape[0],
-//       ...,
-//       padded_shape[M] / block_shape[M-1]] +
-//      remaining_shape
-//
-// 4. Reshape `permuted_reshaped_padded` to flatten `block_shape` into the batch
-//    dimension, producing an output tensor of shape:
-//
-//      [batch * prod(block_shape)] +
-//      [padded_shape[1] / block_shape[0],
-//       ...,
-//       padded_shape[M] / block_shape[M-1]] +
-//      remaining_shape
-//
-// Some examples:
-//
-// (1) For the following input of shape `[1, 2, 2, 1]`, `block_shape = [2, 2]`, and
-//     `paddings = [[0, 0], [0, 0]]`:
-//
-// ```
-// x = [[[[1], [2]], [[3], [4]]]]
-// ```
-//
-// The output tensor has shape `[4, 1, 1, 1]` and value:
-//
-// ```
-// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
-// ```
-//
-// (2) For the following input of shape `[1, 2, 2, 3]`, `block_shape = [2, 2]`, and
-//     `paddings = [[0, 0], [0, 0]]`:
-//
-// ```
-// x = [[[[1, 2, 3], [4, 5, 6]],
-//       [[7, 8, 9], [10, 11, 12]]]]
-// ```
-//
-// The output tensor has shape `[4, 1, 1, 3]` and value:
-//
-// ```
-// [[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]
-// ```
-//
-// (3) For the following input of shape `[1, 4, 4, 1]`, `block_shape = [2, 2]`, and
-//     `paddings = [[0, 0], [0, 0]]`:
+//	a: Must be a two-dimensional tensor.
+//	b: Must be a two-dimensional tensor.
+//	min_a: The float value that the lowest quantized `a` value represents.
+//	max_a: The float value that the highest quantized `a` value represents.
+//	min_b: The float value that the lowest quantized `b` value represents.
+//	max_b: The float value that the highest quantized `b` value represents.
 //
-// ```
-// x = [[[[1],   [2],  [3],  [4]],
-//       [[5],   [6],  [7],  [8]],
-//       [[9],  [10], [11],  [12]],
-//       [[13], [14], [15],  [16]]]]
-// ```
+// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
+func QuantizedMatMul(scope *Scope, a tf.Output, b tf.Output, min_a tf.Output, max_a tf.Output, min_b tf.Output, max_b tf.Output, optional ...QuantizedMatMulAttr) (out tf.Output, min_out tf.Output, max_out tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizedMatMul",
+		Input: []tf.Input{
+			a, b, min_a, max_a, min_b, max_b,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// A placeholder op that passes through `input` when its output is not fed.
 //
-// The output tensor has shape `[4, 2, 2, 1]` and value:
+// Arguments:
+//	input: The default value to produce when `output` is not fed.
+//	shape: The (possibly partial) shape of the tensor.
 //
-// ```
-// x = [[[[1], [3]], [[9], [11]]],
-//      [[[2], [4]], [[10], [12]]],
-//      [[[5], [7]], [[13], [15]]],
-//      [[[6], [8]], [[14], [16]]]]
-// ```
+// Returns A placeholder tensor that defaults to `input` if it is not fed.
+func PlaceholderWithDefault(scope *Scope, input tf.Output, shape tf.Shape) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"shape": shape}
+	opspec := tf.OpSpec{
+		Type: "PlaceholderWithDefault",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the complex conjugate of a complex number.
 //
-// (4) For the following input of shape `[2, 2, 4, 1]`, block_shape = `[2, 2]`, and
-//     paddings = `[[0, 0], [2, 0]]`:
+// Given a tensor `input` of complex numbers, this operation returns a tensor of
+// complex numbers that are the complex conjugate of each element in `input`. The
+// complex numbers in `input` must be of the form \\(a + bj\\), where *a* is the
+// real part and *b* is the imaginary part.
 //
-// ```
-// x = [[[[1],   [2],  [3],  [4]],
-//       [[5],   [6],  [7],  [8]]],
-//      [[[9],  [10], [11],  [12]],
-//       [[13], [14], [15],  [16]]]]
-// ```
+// The complex conjugate returned by this operation is of the form \\(a - bj\\).
 //
-// The output tensor has shape `[8, 1, 3, 1]` and value:
+// For example:
 //
 // ```
-// x = [[[[0], [1], [3]]], [[[0], [9], [11]]],
-//      [[[0], [2], [4]]], [[[0], [10], [12]]],
-//      [[[0], [5], [7]]], [[[0], [13], [15]]],
-//      [[[0], [6], [8]]], [[[0], [14], [16]]]]
+// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
+// tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j]
 // ```
-//
-// Among others, this operation is useful for reducing atrous convolution into
-// regular convolution.
-func SpaceToBatchND(scope *Scope, input tf.Output, block_shape tf.Output, paddings tf.Output) (output tf.Output) {
+func Conj(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "SpaceToBatchND",
+		Type: "Conj",
 		Input: []tf.Input{
-			input, block_shape, paddings,
+			input,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// SqueezeAttr is an optional argument to Squeeze.
-type SqueezeAttr func(optionalAttr)
+// ResourceSparseApplyMomentumAttr is an optional argument to ResourceSparseApplyMomentum.
+type ResourceSparseApplyMomentumAttr func(optionalAttr)
 
-// SqueezeSqueezeDims sets the optional squeeze_dims attribute to value.
-//
-// value: If specified, only squeezes the dimensions listed. The dimension
-// index starts at 0. It is an error to squeeze a dimension that is not 1. Must
-// be in the range `[-rank(input), rank(input))`.
-// If not specified, defaults to <>
+// ResourceSparseApplyMomentumUseLocking sets the optional use_locking attribute to value.
 //
-// REQUIRES: len(value) >= 0
-func SqueezeSqueezeDims(value []int64) SqueezeAttr {
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceSparseApplyMomentumUseLocking(value bool) ResourceSparseApplyMomentumAttr {
 	return func(m optionalAttr) {
-		m["squeeze_dims"] = value
+		m["use_locking"] = value
 	}
 }
 
-// Removes dimensions of size 1 from the shape of a tensor.
-//
-// Given a tensor `input`, this operation returns a tensor of the same type with
-// all dimensions of size 1 removed. If you don't want to remove all size 1
-// dimensions, you can remove specific size 1 dimensions by specifying
-// `squeeze_dims`.
+// ResourceSparseApplyMomentumUseNesterov sets the optional use_nesterov attribute to value.
 //
-// For example:
+// value: If `True`, the tensor passed to compute grad will be
+// var - lr * momentum * accum, so in the end, the var you get is actually
+// var - lr * momentum * accum.
+// If not specified, defaults to false
+func ResourceSparseApplyMomentumUseNesterov(value bool) ResourceSparseApplyMomentumAttr {
+	return func(m optionalAttr) {
+		m["use_nesterov"] = value
+	}
+}
+
+// Update relevant entries in '*var' and '*accum' according to the momentum scheme.
 //
-// ```
-// # 't' is a tensor of shape [1, 2, 1, 3, 1, 1]
-// shape(squeeze(t)) ==> [2, 3]
-// ```
+// Set use_nesterov = True if you want to use Nesterov momentum.
 //
-// Or, to remove specific size 1 dimensions:
+// That is for rows we have grad for, we update var and accum as follows:
 //
-// ```
-// # 't' is a tensor of shape [1, 2, 1, 3, 1, 1]
-// shape(squeeze(t, [2, 4])) ==> [1, 2, 3, 1]
-// ```
+// accum = accum * momentum + grad
+// var -= lr * accum
 //
 // Arguments:
-//	input: The `input` to squeeze.
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Learning rate. Must be a scalar.
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
+//	momentum: Momentum. Must be a scalar.
 //
-// Returns Contains the same data as `input`, but has one or more dimensions of
-// size 1 removed.
-func Squeeze(scope *Scope, input tf.Output, optional ...SqueezeAttr) (output tf.Output) {
+// Returns the created operation.
+func ResourceSparseApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, momentum tf.Output, optional ...ResourceSparseApplyMomentumAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -1157,830 +793,661 @@ func Squeeze(scope *Scope, input tf.Output, optional ...SqueezeAttr) (output tf.
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Squeeze",
+		Type: "ResourceSparseApplyMomentum",
 		Input: []tf.Input{
-			input,
+			var_, accum, lr, grad, indices, momentum,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// A placeholder op for a value that will be fed into the computation.
+// Creates a sequence of numbers.
 //
-// DEPRECATED at GraphDef version 23: Placeholder now behaves the same as PlaceholderV2.
+// This operation creates a sequence of numbers that begins at `start` and
+// extends by increments of `delta` up to but not including `limit`.
 //
-// N.B. This operation will fail with an error if it is executed. It is
-// intended as a way to represent a value that will always be fed, and to
-// provide attrs that enable the fed value to be checked at runtime.
+// For example:
+//
+// ```
+// # 'start' is 3
+// # 'limit' is 18
+// # 'delta' is 3
+// tf.range(start, limit, delta) ==> [3, 6, 9, 12, 15]
+// ```
 //
 // Arguments:
-//	dtype: The type of elements in the tensor.
-//	shape: The shape of the tensor. The shape can be any partially-specified
-// shape.  To be unconstrained, pass in a shape with unknown rank.
+//	start: 0-D (scalar). First entry in the sequence.
+//	limit: 0-D (scalar). Upper limit of sequence, exclusive.
+//	delta: 0-D (scalar). Optional. Default is 1. Number that increments `start`.
 //
-// Returns A placeholder tensor that must be replaced using the feed mechanism.
-func PlaceholderV2(scope *Scope, dtype tf.DataType, shape tf.Shape) (output tf.Output) {
+// Returns 1-D.
+func Range(scope *Scope, start tf.Output, limit tf.Output, delta tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype, "shape": shape}
 	opspec := tf.OpSpec{
-		Type: "PlaceholderV2",
-
-		Attrs: attrs,
+		Type: "Range",
+		Input: []tf.Input{
+			start, limit, delta,
+		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Pads a tensor with mirrored values.
-//
-// This operation pads a `input` with mirrored values according to the `paddings`
-// you specify. `paddings` is an integer tensor with shape `[n, 2]`, where n is
-// the rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates
-// how many values to add before the contents of `input` in that dimension, and
-// `paddings[D, 1]` indicates how many values to add after the contents of `input`
-// in that dimension. Both `paddings[D, 0]` and `paddings[D, 1]` must be no greater
-// than `input.dim_size(D)` (or `input.dim_size(D) - 1`) if `copy_border` is true
-// (if false, respectively).
-//
-// The padded size of each dimension D of the output is:
-//
-// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)`
-//
-// For example:
+// Computes gradients for SparseSegmentSqrtN.
 //
-// ```
-// # 't' is [[1, 2, 3], [4, 5, 6]].
-// # 'paddings' is [[1, 1]], [2, 2]].
-// # 'mode' is SYMMETRIC.
-// # rank of 't' is 2.
-// pad(t, paddings) ==> [[2, 1, 1, 2, 3, 3, 2]
-//                       [2, 1, 1, 2, 3, 3, 2]
-//                       [5, 4, 4, 5, 6, 6, 5]
-//                       [5, 4, 4, 5, 6, 6, 5]]
-// ```
+// Returns tensor "output" with same shape as grad, except for dimension 0 whose
+// value is output_dim0.
 //
 // Arguments:
-//	input: The input tensor to be padded.
-//	paddings: A two-column matrix specifying the padding sizes. The number of
-// rows must be the same as the rank of `input`.
-//	mode: Either `REFLECT` or `SYMMETRIC`. In reflect mode the padded regions
-// do not include the borders, while in symmetric mode the padded regions
-// do include the borders. For example, if `input` is `[1, 2, 3]` and `paddings`
-// is `[0, 2]`, then the output is `[1, 2, 3, 2, 1]` in reflect mode, and
-// it is `[1, 2, 3, 3, 2]` in symmetric mode.
-//
-// Returns The padded tensor.
-func MirrorPad(scope *Scope, input tf.Output, paddings tf.Output, mode string) (output tf.Output) {
+//	grad: gradient propagated to the SparseSegmentSqrtN op.
+//	indices: indices passed to the corresponding SparseSegmentSqrtN op.
+//	segment_ids: segment_ids passed to the corresponding SparseSegmentSqrtN op.
+//	output_dim0: dimension 0 of "data" passed to SparseSegmentSqrtN op.
+func SparseSegmentSqrtNGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"mode": mode}
 	opspec := tf.OpSpec{
-		Type: "MirrorPad",
+		Type: "SparseSegmentSqrtNGrad",
 		Input: []tf.Input{
-			input, paddings,
+			grad, indices, segment_ids, output_dim0,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Return the reduction indices for computing gradients of s0 op s1 with broadcast.
+// Computes the mean along sparse segments of a tensor.
 //
-// This is typically used by gradient computations for a broadcasting operation.
-func BroadcastGradientArgs(scope *Scope, s0 tf.Output, s1 tf.Output) (r0 tf.Output, r1 tf.Output) {
+// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+// segments.
+//
+// Like `SegmentMean`, but `segment_ids` can have rank less than `data`'s first
+// dimension, selecting a subset of dimension 0, specified by `indices`.
+//
+// Arguments:
+//
+//	indices: A 1-D tensor. Has same rank as `segment_ids`.
+//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SparseSegmentMean(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "BroadcastGradientArgs",
+		Type: "SparseSegmentMean",
 		Input: []tf.Input{
-			s0, s1,
+			data, indices, segment_ids,
 		},
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
-// Return the shape of s0 op s1 with broadcast.
+// Pop the element at the top of the stack.
 //
-// Given `s0` and `s1`, tensors that represent shapes, compute `r0`, the
-// broadcasted shape. `s0`, `s1` and `r0` are all integer vectors.
-func BroadcastArgs(scope *Scope, s0 tf.Output, s1 tf.Output) (r0 tf.Output) {
+// Arguments:
+//	handle: The handle to a stack.
+//	elem_type: The type of the elem that is popped.
+//
+// Returns The tensor that is popped from the top of the stack.
+func StackPopV2(scope *Scope, handle tf.Output, elem_type tf.DataType) (elem tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"elem_type": elem_type}
 	opspec := tf.OpSpec{
-		Type: "BroadcastArgs",
+		Type: "StackPopV2",
 		Input: []tf.Input{
-			s0, s1,
+			handle,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns locations of nonzero / true values in a tensor.
+// Computes the sum along sparse segments of a tensor.
 //
-// This operation returns the coordinates of true elements in `input`. The
-// coordinates are returned in a 2-D tensor where the first dimension (rows)
-// represents the number of true elements, and the second dimension (columns)
-// represents the coordinates of the true elements. Keep in mind, the shape of
-// the output tensor can vary depending on how many true values there are in
-// `input`. Indices are output in row-major order.
+// Like `SparseSegmentSum`, but allows missing ids in `segment_ids`. If an id is
+// misisng, the `output` tensor at that position will be zeroed.
+//
+// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+// segments.
 //
 // For example:
 //
+// ```python
+// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
+//
+// tf.sparse_segment_sum_with_num_segments(
+//     c, tf.constant([0, 1]), tf.constant([0, 0]), num_segments=3)
+// # => [[0 0 0 0]
+// #     [0 0 0 0]
+// #     [0 0 0 0]]
+//
+// tf.sparse_segment_sum_with_num_segments(c,
+//                                         tf.constant([0, 1]),
+//                                         tf.constant([0, 2],
+//                                         num_segments=4))
+// # => [[ 1  2  3  4]
+// #     [ 0  0  0  0]
+// #     [-1 -2 -3 -4]
+// #     [ 0  0  0  0]]
 // ```
-// # 'input' tensor is [[True, False]
-// #                    [True, False]]
-// # 'input' has two true values, so output has two coordinates.
-// # 'input' has rank of 2, so coordinates have two indices.
-// where(input) ==> [[0, 0],
-//                   [1, 0]]
 //
-// # `input` tensor is [[[True, False]
-// #                     [True, False]]
-// #                    [[False, True]
-// #                     [False, True]]
-// #                    [[False, False]
-// #                     [False, True]]]
-// # 'input' has 5 true values, so output has 5 coordinates.
-// # 'input' has rank of 3, so coordinates have three indices.
-// where(input) ==> [[0, 0, 0],
-//                   [0, 1, 0],
-//                   [1, 0, 1],
-//                   [1, 1, 1],
-//                   [2, 1, 1]]
+// Arguments:
 //
-// # `input` tensor is [[[1.5,  0.0]
-// #                     [-0.5, 0.0]]
-// #                    [[0.0,  0.25]
-// #                     [0.0,  0.75]]
-// #                    [[0.0,  0.0]
-// #                     [0.0,  0.01]]]
-// # 'input' has 5 nonzero values, so output has 5 coordinates.
-// # 'input' has rank of 3, so coordinates have three indices.
-// where(input) ==> [[0, 0, 0],
-//                   [0, 1, 0],
-//                   [1, 0, 1],
-//                   [1, 1, 1],
-//                   [2, 1, 1]]
+//	indices: A 1-D tensor. Has same rank as `segment_ids`.
+//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+//	num_segments: Should equal the number of distinct segment IDs.
 //
-// # `input` tensor is [[[1.5 + 0.0j, 0.0  + 0.0j]
-// #                     [0.0 + 0.5j, 0.0  + 0.0j]]
-// #                    [[0.0 + 0.0j, 0.25 + 1.5j]
-// #                     [0.0 + 0.0j, 0.75 + 0.0j]]
-// #                    [[0.0 + 0.0j, 0.0  + 0.0j]
-// #                     [0.0 + 0.0j, 0.01 + 0.0j]]]
-// # 'input' has 5 nonzero magnitude values, so output has 5 coordinates.
-// # 'input' has rank of 3, so coordinates have three indices.
-// where(input) ==> [[0, 0, 0],
-//                   [0, 1, 0],
-//                   [1, 0, 1],
-//                   [1, 1, 1],
-//                   [2, 1, 1]]
-// ```
-func Where(scope *Scope, input tf.Output) (index tf.Output) {
+// Returns Has same shape as data, except for dimension 0 which
+// has size `num_segments`.
+func SparseSegmentSumWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Where",
+		Type: "SparseSegmentSumWithNumSegments",
 		Input: []tf.Input{
-			input,
+			data, indices, segment_ids, num_segments,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns the gradient of `Tile`.
+// SparseToDenseAttr is an optional argument to SparseToDense.
+type SparseToDenseAttr func(optionalAttr)
+
+// SparseToDenseValidateIndices sets the optional validate_indices attribute to value.
 //
-// DEPRECATED at GraphDef version 3: TileGrad has been replaced with reduce_sum
+// value: If true, indices are checked to make sure they are sorted in
+// lexicographic order and that there are no repeats.
+// If not specified, defaults to true
+func SparseToDenseValidateIndices(value bool) SparseToDenseAttr {
+	return func(m optionalAttr) {
+		m["validate_indices"] = value
+	}
+}
+
+// Converts a sparse representation into a dense tensor.
 //
-// Since `Tile` takes an input and repeats the input `multiples` times
-// along each dimension, `TileGrad` takes in `multiples` and aggregates
-// each repeated tile of `input` into `output`.
-func TileGrad(scope *Scope, input tf.Output, multiples tf.Output) (output tf.Output) {
+// Builds an array `dense` with shape `output_shape` such that
+//
+// ```
+// # If sparse_indices is scalar
+// dense[i] = (i == sparse_indices ? sparse_values : default_value)
+//
+// # If sparse_indices is a vector, then for each i
+// dense[sparse_indices[i]] = sparse_values[i]
+//
+// # If sparse_indices is an n by d matrix, then for each i in [0, n)
+// dense[sparse_indices[i][0], ..., sparse_indices[i][d-1]] = sparse_values[i]
+// ```
+//
+// All other values in `dense` are set to `default_value`.  If `sparse_values` is a
+// scalar, all sparse indices are set to this single value.
+//
+// Indices should be sorted in lexicographic order, and indices must not
+// contain any repeats. If `validate_indices` is true, these properties
+// are checked during execution.
+//
+// Arguments:
+//	sparse_indices: 0-D, 1-D, or 2-D.  `sparse_indices[i]` contains the complete
+// index where `sparse_values[i]` will be placed.
+//	output_shape: 1-D.  Shape of the dense output tensor.
+//	sparse_values: 1-D.  Values corresponding to each row of `sparse_indices`,
+// or a scalar value to be used for all sparse indices.
+//	default_value: Scalar value to set for indices not specified in
+// `sparse_indices`.
+//
+// Returns Dense output tensor of shape `output_shape`.
+func SparseToDense(scope *Scope, sparse_indices tf.Output, output_shape tf.Output, sparse_values tf.Output, default_value tf.Output, optional ...SparseToDenseAttr) (dense tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "TileGrad",
+		Type: "SparseToDense",
 		Input: []tf.Input{
-			input, multiples,
+			sparse_indices, output_shape, sparse_values, default_value,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// StridedSliceGradAttr is an optional argument to StridedSliceGrad.
-type StridedSliceGradAttr func(optionalAttr)
-
-// StridedSliceGradBeginMask sets the optional begin_mask attribute to value.
-// If not specified, defaults to 0
-func StridedSliceGradBeginMask(value int64) StridedSliceGradAttr {
-	return func(m optionalAttr) {
-		m["begin_mask"] = value
-	}
-}
-
-// StridedSliceGradEndMask sets the optional end_mask attribute to value.
-// If not specified, defaults to 0
-func StridedSliceGradEndMask(value int64) StridedSliceGradAttr {
-	return func(m optionalAttr) {
-		m["end_mask"] = value
-	}
-}
-
-// StridedSliceGradEllipsisMask sets the optional ellipsis_mask attribute to value.
-// If not specified, defaults to 0
-func StridedSliceGradEllipsisMask(value int64) StridedSliceGradAttr {
-	return func(m optionalAttr) {
-		m["ellipsis_mask"] = value
-	}
-}
-
-// StridedSliceGradNewAxisMask sets the optional new_axis_mask attribute to value.
-// If not specified, defaults to 0
-func StridedSliceGradNewAxisMask(value int64) StridedSliceGradAttr {
-	return func(m optionalAttr) {
-		m["new_axis_mask"] = value
-	}
-}
-
-// StridedSliceGradShrinkAxisMask sets the optional shrink_axis_mask attribute to value.
-// If not specified, defaults to 0
-func StridedSliceGradShrinkAxisMask(value int64) StridedSliceGradAttr {
-	return func(m optionalAttr) {
-		m["shrink_axis_mask"] = value
-	}
-}
-
-// Returns the gradient of `StridedSlice`.
+// Counts the number of occurrences of each value in an integer array.
 //
-// Since `StridedSlice` cuts out pieces of its `input` which is size
-// `shape`, its gradient will have the same shape (which is passed here
-// as `shape`). The gradient will be zero in any element that the slice
-// does not select.
+// Outputs a vector with length `size` and the same dtype as `weights`. If
+// `weights` are empty, then index `i` stores the number of times the value `i` is
+// counted in `arr`. If `weights` are non-empty, then index `i` stores the sum of
+// the value in `weights` at each index where the corresponding value in `arr` is
+// `i`.
 //
-// Arguments are the same as StridedSliceGrad with the exception that
-// `dy` is the input gradient to be propagated and `shape` is the
-// shape of `StridedSlice`'s `input`.
-func StridedSliceGrad(scope *Scope, shape tf.Output, begin tf.Output, end tf.Output, strides tf.Output, dy tf.Output, optional ...StridedSliceGradAttr) (output tf.Output) {
+// Values in `arr` outside of the range [0, size) are ignored.
+//
+// Arguments:
+//	arr: int32 `Tensor`.
+//	size: non-negative int32 scalar `Tensor`.
+//	weights: is an int32, int64, float32, or float64 `Tensor` with the same
+// shape as `arr`, or a length-0 `Tensor`, in which case it acts as all weights
+// equal to 1.
+//
+// Returns 1D `Tensor` with length equal to `size`. The counts or summed weights for
+// each value in the range [0, size).
+func Bincount(scope *Scope, arr tf.Output, size tf.Output, weights tf.Output) (bins tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "StridedSliceGrad",
+		Type: "Bincount",
 		Input: []tf.Input{
-			shape, begin, end, strides, dy,
+			arr, size, weights,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Return a slice from 'input'.
+// Computes the sum along sparse segments of a tensor.
 //
-// The output tensor is a tensor with dimensions described by 'size'
-// whose values are extracted from 'input' starting at the offsets in
-// 'begin'.
+// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+// segments.
 //
-// *Requirements*:
-//   0 <= begin[i] <= begin[i] + size[i] <= Di  for i in [0, n)
+// Like `SegmentSum`, but `segment_ids` can have rank less than `data`'s first
+// dimension, selecting a subset of dimension 0, specified by `indices`.
+//
+// For example:
+//
+// ```python
+// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
+//
+// # Select two rows, one segment.
+// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 0]))
+// # => [[0 0 0 0]]
+//
+// # Select two rows, two segment.
+// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 1]))
+// # => [[ 1  2  3  4]
+// #     [-1 -2 -3 -4]]
+//
+// # Select all rows, two segments.
+// tf.sparse_segment_sum(c, tf.constant([0, 1, 2]), tf.constant([0, 0, 1]))
+// # => [[0 0 0 0]
+// #     [5 6 7 8]]
+//
+// # Which is equivalent to:
+// tf.segment_sum(c, tf.constant([0, 0, 1]))
+// ```
 //
 // Arguments:
 //
-//	begin: begin[i] specifies the offset into the 'i'th dimension of
-// 'input' to slice from.
-//	size: size[i] specifies the number of elements of the 'i'th dimension
-// of 'input' to slice. If size[i] is -1, all remaining elements in dimension
-// i are included in the slice (i.e. this is equivalent to setting
-// size[i] = input.dim_size(i) - begin[i]).
-func Slice(scope *Scope, input tf.Output, begin tf.Output, size tf.Output) (output tf.Output) {
+//	indices: A 1-D tensor. Has same rank as `segment_ids`.
+//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SparseSegmentSum(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Slice",
+		Type: "SparseSegmentSum",
 		Input: []tf.Input{
-			input, begin, size,
+			data, indices, segment_ids,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Shuffle dimensions of x according to a permutation and conjugate the result.
-//
-// The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy:
-//   `y.shape[i] == x.shape[perm[i]] for i in [0, 1, ..., rank(x) - 1]`
-//   `y[i,j,k,...,s,t,u] == conj(x[perm[i], perm[j], perm[k],...,perm[s], perm[t], perm[u]])`
-func ConjugateTranspose(scope *Scope, x tf.Output, perm tf.Output) (y tf.Output) {
+// Computes hyperbolic sine of x element-wise.
+func Sinh(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "ConjugateTranspose",
+		Type: "Sinh",
 		Input: []tf.Input{
-			x, perm,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Checks a tensor for NaN and Inf values.
+// Computes the sum along segments of a tensor.
 //
-// When run, reports an `InvalidArgument` error if `tensor` has any values
-// that are not a number (NaN) or infinity (Inf). Otherwise, passes `tensor` as-is.
+// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+// segments.
+//
+// Computes a tensor such that
+// `(output[i] = sum_{j...} data[j...]` where the sum is over tuples `j...` such
+// that `segment_ids[j...] == i`.  Unlike `SegmentSum`, `segment_ids`
+// need not be sorted and need not cover all values in the full
+// range of valid values.
+//
+// If the sum is empty for a given segment ID `i`, `output[i] = 0`.
+// If the given segment ID `i` is negative, the value is dropped and will not be
+// added to the sum of the segment.
+//
+// `num_segments` should equal the number of distinct segment IDs.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/UnsortedSegmentSum.png" alt>
+// </div>
 //
 // Arguments:
 //
-//	message: Prefix of the error message.
-func CheckNumerics(scope *Scope, tensor tf.Output, message string) (output tf.Output) {
+//	segment_ids: A tensor whose shape is a prefix of `data.shape`.
+//
+//
+// Returns Has same shape as data, except for the first `segment_ids.rank`
+// dimensions, which are replaced with a single dimension which has size
+// `num_segments`.
+func UnsortedSegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"message": message}
 	opspec := tf.OpSpec{
-		Type: "CheckNumerics",
+		Type: "UnsortedSegmentSum",
 		Input: []tf.Input{
-			tensor,
+			data, segment_ids, num_segments,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// PreventGradientAttr is an optional argument to PreventGradient.
-type PreventGradientAttr func(optionalAttr)
-
-// PreventGradientMessage sets the optional message attribute to value.
-//
-// value: Will be printed in the error when anyone tries to differentiate
-// this operation.
-// If not specified, defaults to ""
-func PreventGradientMessage(value string) PreventGradientAttr {
-	return func(m optionalAttr) {
-		m["message"] = value
-	}
-}
-
-// An identity op that triggers an error if a gradient is requested.
-//
-// When executed in a graph, this op outputs its input tensor as-is.
-//
-// When building ops to compute gradients, the TensorFlow gradient system
-// will return an error when trying to lookup the gradient of this op,
-// because no gradient must ever be registered for this function.  This
-// op exists to prevent subtle bugs from silently returning unimplemented
-// gradients in some corner cases.
-//
-// Arguments:
-//	input: any tensor.
+// Returns which elements of x are finite.
 //
-// Returns the same input tensor.
-func PreventGradient(scope *Scope, input tf.Output, optional ...PreventGradientAttr) (output tf.Output) {
+// @compatibility(numpy)
+// Equivalent to np.isfinite
+// @end_compatibility
+func IsFinite(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "PreventGradient",
+		Type: "IsFinite",
 		Input: []tf.Input{
-			input,
+			x,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Stops gradient computation.
+// MatMulAttr is an optional argument to MatMul.
+type MatMulAttr func(optionalAttr)
+
+// MatMulTransposeA sets the optional transpose_a attribute to value.
 //
-// When executed in a graph, this op outputs its input tensor as-is.
+// value: If true, "a" is transposed before multiplication.
+// If not specified, defaults to false
+func MatMulTransposeA(value bool) MatMulAttr {
+	return func(m optionalAttr) {
+		m["transpose_a"] = value
+	}
+}
+
+// MatMulTransposeB sets the optional transpose_b attribute to value.
 //
-// When building ops to compute gradients, this op prevents the contribution of
-// its inputs to be taken into account.  Normally, the gradient generator adds ops
-// to a graph to compute the derivatives of a specified 'loss' by recursively
-// finding out inputs that contributed to its computation.  If you insert this op
-// in the graph it inputs are masked from the gradient generator.  They are not
-// taken into account for computing gradients.
+// value: If true, "b" is transposed before multiplication.
+// If not specified, defaults to false
+func MatMulTransposeB(value bool) MatMulAttr {
+	return func(m optionalAttr) {
+		m["transpose_b"] = value
+	}
+}
+
+// Multiply the matrix "a" by the matrix "b".
 //
-// This is useful any time you want to compute a value with TensorFlow but need
-// to pretend that the value was a constant. Some examples include:
+// The inputs must be two-dimensional matrices and the inner dimension of
+// "a" (after being transposed if transpose_a is true) must match the
+// outer dimension of "b" (after being transposed if transposed_b is
+// true).
 //
-// *  The *EM* algorithm where the *M-step* should not involve backpropagation
-//    through the output of the *E-step*.
-// *  Contrastive divergence training of Boltzmann machines where, when
-//    differentiating the energy function, the training must not backpropagate
-//    through the graph that generated the samples from the model.
-// *  Adversarial training, where no backprop should happen through the adversarial
-//    example generation process.
-func StopGradient(scope *Scope, input tf.Output) (output tf.Output) {
+// *Note*: The default kernel implementation for MatMul on GPUs uses
+// cublas.
+func MatMul(scope *Scope, a tf.Output, b tf.Output, optional ...MatMulAttr) (product tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "StopGradient",
+		Type: "MatMul",
 		Input: []tf.Input{
-			input,
+			a, b,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Gather slices from `params` into a Tensor with shape specified by `indices`.
-//
-// `indices` is an K-dimensional integer tensor, best thought of as a
-// (K-1)-dimensional tensor of indices into `params`, where each element defines a
-// slice of `params`:
-//
-//     output[i_0, ..., i_{K-2}] = params[indices[i0, ..., i_{K-2}]]
-//
-// Whereas in @{tf.gather} `indices` defines slices into the first
-// dimension of `params`, in `tf.gather_nd`, `indices` defines slices into the
-// first `N` dimensions of `params`, where `N = indices.shape[-1]`.
-//
-// The last dimension of `indices` can be at most the rank of
-// `params`:
-//
-//     indices.shape[-1] <= params.rank
-//
-// The last dimension of `indices` corresponds to elements
-// (if `indices.shape[-1] == params.rank`) or slices
-// (if `indices.shape[-1] < params.rank`) along dimension `indices.shape[-1]`
-// of `params`.  The output tensor has shape
-//
-//     indices.shape[:-1] + params.shape[indices.shape[-1]:]
-//
-// Some examples below.
-//
-// Simple indexing into a matrix:
-//
-// ```python
-//     indices = [[0, 0], [1, 1]]
-//     params = [['a', 'b'], ['c', 'd']]
-//     output = ['a', 'd']
-// ```
-//
-// Slice indexing into a matrix:
-//
-// ```python
-//     indices = [[1], [0]]
-//     params = [['a', 'b'], ['c', 'd']]
-//     output = [['c', 'd'], ['a', 'b']]
-// ```
-//
-// Indexing into a 3-tensor:
-//
-// ```python
-//     indices = [[1]]
-//     params = [[['a0', 'b0'], ['c0', 'd0']],
-//               [['a1', 'b1'], ['c1', 'd1']]]
-//     output = [[['a1', 'b1'], ['c1', 'd1']]]
-//
-//
-//     indices = [[0, 1], [1, 0]]
-//     params = [[['a0', 'b0'], ['c0', 'd0']],
-//               [['a1', 'b1'], ['c1', 'd1']]]
-//     output = [['c0', 'd0'], ['a1', 'b1']]
+// Selects elements from `x` or `y`, depending on `condition`.
 //
+// The `x`, and `y` tensors must all have the same shape, and the
+// output will also have that shape.
 //
-//     indices = [[0, 0, 1], [1, 0, 1]]
-//     params = [[['a0', 'b0'], ['c0', 'd0']],
-//               [['a1', 'b1'], ['c1', 'd1']]]
-//     output = ['b0', 'b1']
-// ```
-//
-// Batched indexing into a matrix:
-//
-// ```python
-//     indices = [[[0, 0]], [[0, 1]]]
-//     params = [['a', 'b'], ['c', 'd']]
-//     output = [['a'], ['b']]
-// ```
+// The `condition` tensor must be a scalar if `x` and `y` are scalars.
+// If `x` and `y` are vectors or higher rank, then `condition` must be either a
+// scalar, a vector with size matching the first dimension of `x`, or must have
+// the same shape as `x`.
 //
-// Batched slice indexing into a matrix:
+// The `condition` tensor acts as a mask that chooses, based on the value at each
+// element, whether the corresponding element / row in the output should be
+// taken from `x` (if true) or `y` (if false).
 //
-// ```python
-//     indices = [[[1]], [[0]]]
-//     params = [['a', 'b'], ['c', 'd']]
-//     output = [[['c', 'd']], [['a', 'b']]]
-// ```
+// If `condition` is a vector and `x` and `y` are higher rank matrices, then
+// it chooses which row (outer dimension) to copy from `x` and `y`.
+// If `condition` has the same shape as `x` and `y`, then it chooses which
+// element to copy from `x` and `y`.
 //
-// Batched indexing into a 3-tensor:
+// For example:
 //
 // ```python
-//     indices = [[[1]], [[0]]]
-//     params = [[['a0', 'b0'], ['c0', 'd0']],
-//               [['a1', 'b1'], ['c1', 'd1']]]
-//     output = [[[['a1', 'b1'], ['c1', 'd1']]],
-//               [[['a0', 'b0'], ['c0', 'd0']]]]
-//
-//     indices = [[[0, 1], [1, 0]], [[0, 0], [1, 1]]]
-//     params = [[['a0', 'b0'], ['c0', 'd0']],
-//               [['a1', 'b1'], ['c1', 'd1']]]
-//     output = [[['c0', 'd0'], ['a1', 'b1']],
-//               [['a0', 'b0'], ['c1', 'd1']]]
+// # 'condition' tensor is [[True,  False]
+// #                        [False, True]]
+// # 't' is [[1, 2],
+// #         [3, 4]]
+// # 'e' is [[5, 6],
+// #         [7, 8]]
+// select(condition, t, e)  # => [[1, 6], [7, 4]]
 //
 //
-//     indices = [[[0, 0, 1], [1, 0, 1]], [[0, 1, 1], [1, 1, 0]]]
-//     params = [[['a0', 'b0'], ['c0', 'd0']],
-//               [['a1', 'b1'], ['c1', 'd1']]]
-//     output = [['b0', 'b1'], ['d0', 'c1']]
+// # 'condition' tensor is [True, False]
+// # 't' is [[1, 2],
+// #         [3, 4]]
+// # 'e' is [[5, 6],
+// #         [7, 8]]
+// select(condition, t, e) ==> [[1, 2],
+//                              [7, 8]]
+//
 // ```
 //
 // Arguments:
-//	params: The tensor from which to gather values.
-//	indices: Index tensor.
 //
-// Returns Values from `params` gathered from indices given by `indices`, with
-// shape `indices.shape[:-1] + params.shape[indices.shape[-1]:]`.
-func GatherNd(scope *Scope, params tf.Output, indices tf.Output) (output tf.Output) {
+//	x: = A `Tensor` which may have the same shape as `condition`.
+// If `condition` is rank 1, `x` may have higher rank,
+// but its first dimension must match the size of `condition`.
+//	y: = A `Tensor` with the same type and shape as `x`.
+//
+// Returns = A `Tensor` with the same type and shape as `x` and `y`.
+func Select(scope *Scope, condition tf.Output, x tf.Output, y tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "GatherNd",
+		Type: "Select",
 		Input: []tf.Input{
-			params, indices,
+			condition, x, y,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// EditDistanceAttr is an optional argument to EditDistance.
-type EditDistanceAttr func(optionalAttr)
-
-// EditDistanceNormalize sets the optional normalize attribute to value.
-//
-// value: boolean (if true, edit distances are normalized by length of truth).
+// Returns the truth value of x OR y element-wise.
 //
-// The output is:
-// If not specified, defaults to true
-func EditDistanceNormalize(value bool) EditDistanceAttr {
-	return func(m optionalAttr) {
-		m["normalize"] = value
+// *NOTE*: `LogicalOr` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func LogicalOr(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "LogicalOr",
+		Input: []tf.Input{
+			x, y,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Computes the (possibly normalized) Levenshtein Edit Distance.
-//
-// The inputs are variable-length sequences provided by SparseTensors
-//   (hypothesis_indices, hypothesis_values, hypothesis_shape)
-// and
-//   (truth_indices, truth_values, truth_shape).
+// Compute the regularized incomplete beta integral \\(I_x(a, b)\\).
 //
-// The inputs are:
+// The regularized incomplete beta integral is defined as:
 //
-// Arguments:
-//	hypothesis_indices: The indices of the hypothesis list SparseTensor.
-// This is an N x R int64 matrix.
-//	hypothesis_values: The values of the hypothesis list SparseTensor.
-// This is an N-length vector.
-//	hypothesis_shape: The shape of the hypothesis list SparseTensor.
-// This is an R-length vector.
-//	truth_indices: The indices of the truth list SparseTensor.
-// This is an M x R int64 matrix.
-//	truth_values: The values of the truth list SparseTensor.
-// This is an M-length vector.
-//	truth_shape: truth indices, vector.
 //
-// Returns A dense float tensor with rank R - 1.
+// \\(I_x(a, b) = \frac{B(x; a, b)}{B(a, b)}\\)
 //
-// For the example input:
+// where
 //
-//     // hypothesis represents a 2x1 matrix with variable-length values:
-//     //   (0,0) = ["a"]
-//     //   (1,0) = ["b"]
-//     hypothesis_indices = [[0, 0, 0],
-//                           [1, 0, 0]]
-//     hypothesis_values = ["a", "b"]
-//     hypothesis_shape = [2, 1, 1]
 //
-//     // truth represents a 2x2 matrix with variable-length values:
-//     //   (0,0) = []
-//     //   (0,1) = ["a"]
-//     //   (1,0) = ["b", "c"]
-//     //   (1,1) = ["a"]
-//     truth_indices = [[0, 1, 0],
-//                      [1, 0, 0],
-//                      [1, 0, 1],
-//                      [1, 1, 0]]
-//     truth_values = ["a", "b", "c", "a"]
-//     truth_shape = [2, 2, 2]
-//     normalize = true
+// \\(B(x; a, b) = \int_0^x t^{a-1} (1 - t)^{b-1} dt\\)
 //
-// The output will be:
 //
-//     // output is a 2x2 matrix with edit distances normalized by truth lengths.
-//     output = [[inf, 1.0],  // (0,0): no truth, (0,1): no hypothesis
-//               [0.5, 1.0]]  // (1,0): addition, (1,1): no hypothesis
-func EditDistance(scope *Scope, hypothesis_indices tf.Output, hypothesis_values tf.Output, hypothesis_shape tf.Output, truth_indices tf.Output, truth_values tf.Output, truth_shape tf.Output, optional ...EditDistanceAttr) (output tf.Output) {
+// is the incomplete beta function and \\(B(a, b)\\) is the *complete*
+// beta function.
+func Betainc(scope *Scope, a tf.Output, b tf.Output, x tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "EditDistance",
+		Type: "Betainc",
 		Input: []tf.Input{
-			hypothesis_indices, hypothesis_values, hypothesis_shape, truth_indices, truth_values, truth_shape,
+			a, b, x,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns a batched matrix tensor with new batched diagonal values.
-//
-// Given `input` and `diagonal`, this operation returns a tensor with the
-// same shape and values as `input`, except for the main diagonal of the
-// innermost matrices.  These will be overwritten by the values in `diagonal`.
+// Computes the sum along sparse segments of a tensor divided by the sqrt of N.
 //
-// The output is computed as follows:
+// N is the size of the segment being reduced.
 //
-// Assume `input` has `k+1` dimensions `[I, J, K, ..., M, N]` and `diagonal` has
-// `k` dimensions `[I, J, K, ..., min(M, N)]`.  Then the output is a
-// tensor of rank `k+1` with dimensions `[I, J, K, ..., M, N]` where:
+// Like `SparseSegmentSqrtN`, but allows missing ids in `segment_ids`. If an id is
+// misisng, the `output` tensor at that position will be zeroed.
 //
-//   * `output[i, j, k, ..., m, n] = diagonal[i, j, k, ..., n]` for `m == n`.
-//   * `output[i, j, k, ..., m, n] = input[i, j, k, ..., m, n]` for `m != n`.
+// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+// segments.
 //
 // Arguments:
-//	input: Rank `k+1`, where `k >= 1`.
-//	diagonal: Rank `k`, where `k >= 1`.
 //
-// Returns Rank `k+1`, with `output.shape = input.shape`.
-func MatrixSetDiag(scope *Scope, input tf.Output, diagonal tf.Output) (output tf.Output) {
+//	indices: A 1-D tensor. Has same rank as `segment_ids`.
+//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+//	num_segments: Should equal the number of distinct segment IDs.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SparseSegmentSqrtNWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "MatrixSetDiag",
+		Type: "SparseSegmentSqrtNWithNumSegments",
 		Input: []tf.Input{
-			input, diagonal,
+			data, indices, segment_ids, num_segments,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns the diagonal part of the tensor.
-//
-// This operation returns a tensor with the `diagonal` part
-// of the `input`. The `diagonal` part is computed as follows:
-//
-// Assume `input` has dimensions `[D1,..., Dk, D1,..., Dk]`, then the output is a
-// tensor of rank `k` with dimensions `[D1,..., Dk]` where:
+// Compute the upper regularized incomplete Gamma function `Q(a, x)`.
 //
-// `diagonal[i1,..., ik] = input[i1, ..., ik, i1,..., ik]`.
+// The upper regularized incomplete Gamma function is defined as:
 //
-// For example:
+// \\(Q(a, x) = Gamma(a, x) / Gamma(a) = 1 - P(a, x)\\)
 //
-// ```
-// # 'input' is [[1, 0, 0, 0]
-//               [0, 2, 0, 0]
-//               [0, 0, 3, 0]
-//               [0, 0, 0, 4]]
+// where
 //
-// tf.diag_part(input) ==> [1, 2, 3, 4]
-// ```
+// \\(Gamma(a, x) = int_{x}^{\infty} t^{a-1} exp(-t) dt\\)
 //
-// Arguments:
-//	input: Rank k tensor where k is even and not zero.
+// is the upper incomplete Gama function.
 //
-// Returns The extracted diagonal.
-func DiagPart(scope *Scope, input tf.Output) (diagonal tf.Output) {
+// Note, above `P(a, x)` (`Igamma`) is the lower regularized complete
+// Gamma function.
+func Igammac(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "DiagPart",
+		Type: "Igammac",
 		Input: []tf.Input{
-			input,
+			a, x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// DequantizeAttr is an optional argument to Dequantize.
-type DequantizeAttr func(optionalAttr)
-
-// DequantizeMode sets the optional mode attribute to value.
-// If not specified, defaults to "MIN_COMBINED"
-func DequantizeMode(value string) DequantizeAttr {
-	return func(m optionalAttr) {
-		m["mode"] = value
-	}
-}
+// FakeQuantWithMinMaxVarsGradientAttr is an optional argument to FakeQuantWithMinMaxVarsGradient.
+type FakeQuantWithMinMaxVarsGradientAttr func(optionalAttr)
 
-// Dequantize the 'input' tensor into a float Tensor.
-//
-// [min_range, max_range] are scalar floats that specify the range for
-// the 'input' data. The 'mode' attribute controls exactly which calculations are
-// used to convert the float values to their quantized equivalents.
-//
-// In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
-//
-// ```
-// if T == qint8, in[i] += (range(T) + 1)/ 2.0
-// out[i] = min_range + (in[i]* (max_range - min_range) / range(T))
-// ```
-// here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
-//
-// *MIN_COMBINED Mode Example*
-//
-// If the input comes from a QuantizedRelu6, the output type is
-// quint8 (range of 0-255) but the possible range of QuantizedRelu6 is
-// 0-6.  The min_range and max_range values are therefore 0.0 and 6.0.
-// Dequantize on quint8 will take each value, cast to float, and multiply
-// by 6 / 255.
-// Note that if quantizedtype is qint8, the operation will additionally add
-// each value by 128 prior to casting.
-//
-// If the mode is 'MIN_FIRST', then this approach is used:
-//
-// ```c++
-// num_discrete_values = 1 << (# of bits in T)
-// range_adjust = num_discrete_values / (num_discrete_values - 1)
-// range = (range_max - range_min) * range_adjust
-// range_scale = range / num_discrete_values
-// const double offset_input = static_cast<double>(input) - lowest_quantized;
-// result = range_min + ((input - numeric_limits<T>::min()) * range_scale)
-// ```
-//
-// *SCALED mode Example*
-//
-// `SCALED` mode matches the quantization approach used in
-// `QuantizeAndDequantize{V2|V3}`.
-//
-// If the mode is `SCALED`, we do not use the full range of the output type,
-// choosing to elide the lowest possible value for symmetry (e.g., output range is
-// -127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to
-// 0.
-//
-// We first find the range of values in our tensor. The
-// range we use is always centered on 0, so we find m such that
-// ```c++
-//   m = max(abs(input_min), abs(input_max))
-// ```
-//
-// Our input tensor range is then `[-m, m]`.
-//
-// Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`.
-// If T is signed, this is
-// ```
-//   num_bits = sizeof(T) * 8
-//   [min_fixed, max_fixed] =
-//       [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1]
-// ```
-//
-// Otherwise, if T is unsigned, the fixed-point range is
-// ```
-//   [min_fixed, max_fixed] = [0, (1 << num_bits) - 1]
-// ```
+// FakeQuantWithMinMaxVarsGradientNumBits sets the optional num_bits attribute to value.
 //
-// From this we compute our scaling factor, s:
-// ```c++
-//   s = (2 * m) / (max_fixed - min_fixed)
-// ```
+// value: The bitwidth of the quantization; between 2 and 8, inclusive.
+// If not specified, defaults to 8
+func FakeQuantWithMinMaxVarsGradientNumBits(value int64) FakeQuantWithMinMaxVarsGradientAttr {
+	return func(m optionalAttr) {
+		m["num_bits"] = value
+	}
+}
+
+// FakeQuantWithMinMaxVarsGradientNarrowRange sets the optional narrow_range attribute to value.
 //
-// Now we can dequantize the elements of our tensor:
-// ```c++
-// result = input * s
-// ```
+// value: Whether to quantize into 2^num_bits - 1 distinct values.
+// If not specified, defaults to false
+func FakeQuantWithMinMaxVarsGradientNarrowRange(value bool) FakeQuantWithMinMaxVarsGradientAttr {
+	return func(m optionalAttr) {
+		m["narrow_range"] = value
+	}
+}
+
+// Compute gradients for a FakeQuantWithMinMaxVars operation.
 //
 // Arguments:
+//	gradients: Backpropagated gradients above the FakeQuantWithMinMaxVars operation.
+//	inputs: Values passed as inputs to the FakeQuantWithMinMaxVars operation.
+// min, max: Quantization interval, scalar floats.
 //
-//	min_range: The minimum scalar value possibly produced for the input.
-//	max_range: The maximum scalar value possibly produced for the input.
-func Dequantize(scope *Scope, input tf.Output, min_range tf.Output, max_range tf.Output, optional ...DequantizeAttr) (output tf.Output) {
+//
+//
+// Returns Backpropagated gradients w.r.t. inputs:
+// `gradients * (inputs >= min && inputs <= max)`.Backpropagated gradients w.r.t. min parameter:
+// `sum(gradients * (inputs < min))`.Backpropagated gradients w.r.t. max parameter:
+// `sum(gradients * (inputs > max))`.
+func FakeQuantWithMinMaxVarsGradient(scope *Scope, gradients tf.Output, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsGradientAttr) (backprops_wrt_input tf.Output, backprop_wrt_min tf.Output, backprop_wrt_max tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -1989,242 +1456,250 @@ func Dequantize(scope *Scope, input tf.Output, min_range tf.Output, max_range tf
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Dequantize",
+		Type: "FakeQuantWithMinMaxVarsGradient",
 		Input: []tf.Input{
-			input, min_range, max_range,
+			gradients, inputs, min, max,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Returns a tensor of zeros with the same shape and type as x.
-//
-// Arguments:
-//	x: a tensor of type T.
+// LogUniformCandidateSamplerAttr is an optional argument to LogUniformCandidateSampler.
+type LogUniformCandidateSamplerAttr func(optionalAttr)
+
+// LogUniformCandidateSamplerSeed sets the optional seed attribute to value.
 //
-// Returns a tensor of the same shape and type as x but filled with zeros.
-func ZerosLike(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func LogUniformCandidateSamplerSeed(value int64) LogUniformCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "ZerosLike",
-		Input: []tf.Input{
-			x,
-		},
+}
+
+// LogUniformCandidateSamplerSeed2 sets the optional seed2 attribute to value.
+//
+// value: An second seed to avoid seed collision.
+// If not specified, defaults to 0
+func LogUniformCandidateSamplerSeed2(value int64) LogUniformCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Splits a tensor into `num_split` tensors along one dimension.
+// Generates labels for candidate sampling with a log-uniform distribution.
 //
-// Arguments:
-//	value: The tensor to split.
-//	size_splits: list containing the sizes of each output tensor along the split
-// dimension. Must sum to the dimension of value along split_dim.
-// Can contain one -1 indicating that dimension is to be inferred.
-//	split_dim: 0-D.  The dimension along which to split.  Must be in the range
-// `[-rank(value), rank(value))`.
+// See explanations of candidate sampling and the data formats at
+// go/candidate-sampling.
 //
+// For each batch, this op picks a single set of sampled candidate labels.
 //
-// Returns Tensors whose shape matches that of `value`
-// except along `split_dim`, where their sizes are
-// `size_splits[i]`.
-func SplitV(scope *Scope, value tf.Output, size_splits tf.Output, split_dim tf.Output, num_split int64) (output []tf.Output) {
+// The advantages of sampling candidates per-batch are simplicity and the
+// possibility of efficient dense matrix multiplication. The disadvantage is that
+// the sampled candidates must be chosen independently of the context and of the
+// true labels.
+//
+// Arguments:
+//	true_classes: A batch_size * num_true matrix, in which each row contains the
+// IDs of the num_true target_classes in the corresponding original label.
+//	num_true: Number of true labels per context.
+//	num_sampled: Number of candidates to randomly sample.
+//	unique: If unique is true, we sample with rejection, so that all sampled
+// candidates in a batch are unique. This requires some approximation to
+// estimate the post-rejection sampling probabilities.
+//	range_max: The sampler will sample integers from the interval [0, range_max).
+//
+// Returns A vector of length num_sampled, in which each element is
+// the ID of a sampled candidate.A batch_size * num_true matrix, representing
+// the number of times each candidate is expected to occur in a batch
+// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
+// candidate representing the number of times the candidate is expected
+// to occur in a batch of sampled candidates.  If unique=true, then this is a
+// probability.
+func LogUniformCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...LogUniformCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_split": num_split}
+	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "SplitV",
+		Type: "LogUniformCandidateSampler",
 		Input: []tf.Input{
-			value, size_splits, split_dim,
+			true_classes,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
-		scope.UpdateErr("SplitV", err)
-		return
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// ApproximateEqualAttr is an optional argument to ApproximateEqual.
+type ApproximateEqualAttr func(optionalAttr)
+
+// ApproximateEqualTolerance sets the optional tolerance attribute to value.
+// If not specified, defaults to 1e-05
+func ApproximateEqualTolerance(value float32) ApproximateEqualAttr {
+	return func(m optionalAttr) {
+		m["tolerance"] = value
 	}
-	return output
 }
 
-// Splits a tensor into `num_split` tensors along one dimension.
-//
-// Arguments:
-//	split_dim: 0-D.  The dimension along which to split.  Must be in the range
-// `[-rank(value), rank(value))`.
-//	value: The tensor to split.
-//	num_split: The number of ways to split.  Must evenly divide
-// `value.shape[split_dim]`.
-//
-// Returns They are identically shaped tensors, whose shape matches that of `value`
-// except along `split_dim`, where their sizes are
-// `values.shape[split_dim] / num_split`.
-func Split(scope *Scope, split_dim tf.Output, value tf.Output, num_split int64) (output []tf.Output) {
+// Returns the truth value of abs(x-y) < tolerance element-wise.
+func ApproximateEqual(scope *Scope, x tf.Output, y tf.Output, optional ...ApproximateEqualAttr) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_split": num_split}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Split",
+		Type: "ApproximateEqual",
 		Input: []tf.Input{
-			split_dim, value,
+			x, y,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
-		scope.UpdateErr("Split", err)
-		return
-	}
-	return output
+	return op.Output(0)
 }
 
-// Computes offsets of concat inputs within its output.
-//
-// For example:
-//
-// ```
-// # 'x' is [2, 2, 7]
-// # 'y' is [2, 3, 7]
-// # 'z' is [2, 5, 7]
-// concat_offset(2, [x, y, z]) => [0, 0, 0], [0, 2, 0], [0, 5, 0]
-// ```
-//
-// This is typically used by gradient computations for a concat operation.
-//
-// Arguments:
-//	concat_dim: The dimension along which to concatenate.
-//	shape: The `N` int32 vectors representing shape of tensors being concatenated.
+// Returns x / y element-wise.
 //
-// Returns The `N` int32 vectors representing the starting offset
-// of input tensors within the concatenated output.
-func ConcatOffset(scope *Scope, concat_dim tf.Output, shape []tf.Output) (offset []tf.Output) {
+// *NOTE*: `Div` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Div(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "ConcatOffset",
+		Type: "Div",
 		Input: []tf.Input{
-			concat_dim, tf.OutputList(shape),
+			x, y,
 		},
 	}
 	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if offset, idx, err = makeOutputList(op, idx, "offset"); err != nil {
-		scope.UpdateErr("ConcatOffset", err)
-		return
-	}
-	return offset
+	return op.Output(0)
 }
 
-// Writes a `Summary` protocol buffer with a histogram.
-//
-// The generated
-// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
-// has one summary value containing a histogram for `values`.
-//
-// This op reports an `InvalidArgument` error if any value is not finite.
-//
-// Arguments:
-//	writer: A handle to a summary writer.
-//	step: The step to write the summary for.
-//	tag: Scalar.  Tag to use for the `Summary.Value`.
-//	values: Any shape. Values to use to build the histogram.
+// Returns x * y element-wise.
 //
-// Returns the created operation.
-func WriteHistogramSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, values tf.Output) (o *tf.Operation) {
+// *NOTE*: `Multiply` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Mul(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "WriteHistogramSummary",
+		Type: "Mul",
 		Input: []tf.Input{
-			writer, step, tag, values,
+			x, y,
 		},
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Concatenates tensors along one dimension.
+// SparseReduceSumSparseAttr is an optional argument to SparseReduceSumSparse.
+type SparseReduceSumSparseAttr func(optionalAttr)
+
+// SparseReduceSumSparseKeepDims sets the optional keep_dims attribute to value.
+//
+// value: If true, retain reduced dimensions with length 1.
+// If not specified, defaults to false
+func SparseReduceSumSparseKeepDims(value bool) SparseReduceSumSparseAttr {
+	return func(m optionalAttr) {
+		m["keep_dims"] = value
+	}
+}
+
+// Computes the sum of elements across dimensions of a SparseTensor.
+//
+// This Op takes a SparseTensor and is the sparse counterpart to
+// `tf.reduce_sum()`.  In contrast to SparseReduceSum, this Op returns a
+// SparseTensor.
+//
+// Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
+// with length 1.
 //
-// Arguments:
-//	concat_dim: 0-D.  The dimension along which to concatenate.  Must be in the
-// range [0, rank(values)).
-//	values: The `N` Tensors to concatenate. Their ranks and types must match,
-// and their sizes must match in all dimensions except `concat_dim`.
+// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
+// with a single element is returned.  Additionally, the axes can be negative,
+// which are interpreted according to the indexing rules in Python.
 //
-// Returns A `Tensor` with the concatenation of values stacked along the
-// `concat_dim` dimension.  This tensor's shape matches that of `values` except
-// in `concat_dim` where it has the sum of the sizes.
-func Concat(scope *Scope, concat_dim tf.Output, values []tf.Output) (output tf.Output) {
+// Arguments:
+//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, possibly not in canonical ordering.
+//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
+//	input_shape: 1-D.  Shape of the input SparseTensor.
+//	reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
+func SparseReduceSumSparse(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceSumSparseAttr) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Concat",
+		Type: "SparseReduceSumSparse",
 		Input: []tf.Input{
-			concat_dim, tf.OutputList(values),
+			input_indices, input_values, input_shape, reduction_axes,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Concatenates a list of `N` tensors along the first dimension.
-//
-// The input tensors are all required to have size 1 in the first dimension.
-//
-// For example:
+// BiasAddAttr is an optional argument to BiasAdd.
+type BiasAddAttr func(optionalAttr)
+
+// BiasAddDataFormat sets the optional data_format attribute to value.
 //
-// ```
-// # 'x' is [[1, 4]]
-// # 'y' is [[2, 5]]
-// # 'z' is [[3, 6]]
-// parallel_concat([x, y, z]) => [[1, 4], [2, 5], [3, 6]]  # Pack along first dim.
-// ```
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the bias tensor will be added to the last dimension
+// of the value tensor.
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// The tensor will be added to "in_channels", the third-to-the-last
+//     dimension.
+// If not specified, defaults to "NHWC"
+func BiasAddDataFormat(value string) BiasAddAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Adds `bias` to `value`.
 //
-// The difference between concat and parallel_concat is that concat requires all
-// of the inputs be computed before the operation will begin but doesn't require
-// that the input shapes be known during graph construction.  Parallel concat
-// will copy pieces of the input into the output as they become available, in
-// some situations this can provide a performance benefit.
+// This is a special case of `tf.add` where `bias` is restricted to be 1-D.
+// Broadcasting is supported, so `value` may have any number of dimensions.
 //
 // Arguments:
-//	values: Tensors to be concatenated. All must have size 1 in the first dimension
-// and same shape.
-//	shape: the final shape of the result; should be equal to the shapes of any input
-// but with the number of input values in the first dimension.
+//	value: Any number of dimensions.
+//	bias: 1-D with size the last dimension of `value`.
 //
-// Returns The concatenated tensor.
-func ParallelConcat(scope *Scope, values []tf.Output, shape tf.Shape) (output tf.Output) {
+// Returns Broadcasted sum of `value` and `bias`.
+func BiasAdd(scope *Scope, value tf.Output, bias tf.Output, optional ...BiasAddAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"shape": shape}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "ParallelConcat",
+		Type: "BiasAdd",
 		Input: []tf.Input{
-			tf.OutputList(values),
+			value, bias,
 		},
 		Attrs: attrs,
 	}
@@ -2232,40 +1707,36 @@ func ParallelConcat(scope *Scope, values []tf.Output, shape tf.Shape) (output tf
 	return op.Output(0)
 }
 
-// UniqueAttr is an optional argument to Unique.
-type UniqueAttr func(optionalAttr)
+// BiasAddGradAttr is an optional argument to BiasAddGrad.
+type BiasAddGradAttr func(optionalAttr)
 
-// UniqueOutIdx sets the optional out_idx attribute to value.
-// If not specified, defaults to DT_INT32
-func UniqueOutIdx(value tf.DataType) UniqueAttr {
+// BiasAddGradDataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the bias tensor will be added to the last dimension
+// of the value tensor.
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// The tensor will be added to "in_channels", the third-to-the-last
+//     dimension.
+// If not specified, defaults to "NHWC"
+func BiasAddGradDataFormat(value string) BiasAddGradAttr {
 	return func(m optionalAttr) {
-		m["out_idx"] = value
+		m["data_format"] = value
 	}
 }
 
-// Finds unique elements in a 1-D tensor.
-//
-// This operation returns a tensor `y` containing all of the unique elements of `x`
-// sorted in the same order that they occur in `x`. This operation also returns a
-// tensor `idx` the same size as `x` that contains the index of each value of `x`
-// in the unique output `y`. In other words:
-//
-// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
-//
-// For example:
+// The backward operation for "BiasAdd" on the "bias" tensor.
 //
-// ```
-// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
-// y, idx = unique(x)
-// y ==> [1, 2, 4, 7, 8]
-// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
-// ```
+// It accumulates all the values from out_backprop into the feature dimension.
+// For NHWC data format, the feature dimension is the last. For NCHW data format,
+// the feature dimension is the third-to-last.
 //
 // Arguments:
-//	x: 1-D.
+//	out_backprop: Any number of dimensions.
 //
-// Returns 1-D.1-D.
-func Unique(scope *Scope, x tf.Output, optional ...UniqueAttr) (y tf.Output, idx tf.Output) {
+// Returns 1-D with size the feature dimension of `out_backprop`.
+func BiasAddGrad(scope *Scope, out_backprop tf.Output, optional ...BiasAddGradAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -2274,61 +1745,83 @@ func Unique(scope *Scope, x tf.Output, optional ...UniqueAttr) (y tf.Output, idx
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Unique",
+		Type: "BiasAddGrad",
 		Input: []tf.Input{
-			x,
+			out_backprop,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
-// DecodeWavAttr is an optional argument to DecodeWav.
-type DecodeWavAttr func(optionalAttr)
+// Returns x + y element-wise.
+//
+// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func AddV2(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "AddV2",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
 
-// DecodeWavDesiredChannels sets the optional desired_channels attribute to value.
+// Returns x + y element-wise.
 //
-// value: Number of sample channels wanted.
-// If not specified, defaults to -1
-func DecodeWavDesiredChannels(value int64) DecodeWavAttr {
-	return func(m optionalAttr) {
-		m["desired_channels"] = value
+// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Add(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Add",
+		Input: []tf.Input{
+			x, y,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// DecodeWavDesiredSamples sets the optional desired_samples attribute to value.
+// NthElementAttr is an optional argument to NthElement.
+type NthElementAttr func(optionalAttr)
+
+// NthElementReverse sets the optional reverse attribute to value.
 //
-// value: Length of audio requested.
-// If not specified, defaults to -1
-func DecodeWavDesiredSamples(value int64) DecodeWavAttr {
+// value: When set to True, find the nth-largest value in the vector and vice
+// versa.
+// If not specified, defaults to false
+func NthElementReverse(value bool) NthElementAttr {
 	return func(m optionalAttr) {
-		m["desired_samples"] = value
+		m["reverse"] = value
 	}
 }
 
-// Decode a 16-bit PCM WAV file to a float tensor.
-//
-// The -32768 to 32767 signed 16-bit values will be scaled to -1.0 to 1.0 in float.
+// Finds values of the `n`-th order statistic for the last dimension.
 //
-// When desired_channels is set, if the input contains fewer channels than this
-// then the last channel will be duplicated to give the requested number, else if
-// the input has more channels than requested then the additional channels will be
-// ignored.
+// If the input is a vector (rank-1), finds the entries which is the nth-smallest
+// value in the vector and outputs their values as scalar tensor.
 //
-// If desired_samples is set, then the audio will be cropped or padded with zeroes
-// to the requested length.
+// For matrices (resp. higher rank input), computes the entries which is the
+// nth-smallest value in each row (resp. vector along the last dimension). Thus,
 //
-// The first output contains a Tensor with the content of the audio samples. The
-// lowest dimension will be the number of channels, and the second will be the
-// number of samples. For example, a ten-sample-long stereo WAV file should give an
-// output shape of [10, 2].
+//     values.shape = input.shape[:-1]
 //
 // Arguments:
-//	contents: The WAV-encoded audio, usually from a file.
+//	input: 1-D or higher with last dimension at least `n+1`.
+//	n: 0-D. Position of sorted vector to select along the last dimension (along
+// each row for matrices). Valid range of n is `[0, input.shape[:-1])`
 //
-// Returns 2-D with shape `[length, channels]`.Scalar holding the sample rate found in the WAV header.
-func DecodeWav(scope *Scope, contents tf.Output, optional ...DecodeWavAttr) (audio tf.Output, sample_rate tf.Output) {
+// Returns The `n`-th order statistic along each last dimensional slice.
+func NthElement(scope *Scope, input tf.Output, n tf.Output, optional ...NthElementAttr) (values tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -2337,462 +1830,490 @@ func DecodeWav(scope *Scope, contents tf.Output, optional ...DecodeWavAttr) (aud
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "DecodeWav",
+		Type: "NthElement",
 		Input: []tf.Input{
-			contents,
+			input, n,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
-// Elementwise computes the bitwise right-shift of `x` and `y`.
+// Computes the Max along segments of a tensor.
 //
-// Performs a logical shift for unsigned integer types, and an arithmetic shift
-// for signed integer types.
+// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+// segments.
 //
-// If `y` is negative, or greater than or equal to than the width of `x` in bits
-// the result is implementation defined.
-func RightShift(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// This operator is similar to the [unsorted segment sum operator](../../../api_docs/python/math_ops.md#UnsortedSegmentSum).
+// Instead of computing the sum over segments, it computes the maximum
+// such that:
+//
+// \\(output_i = \max_j data_j\\) where max is over `j` such
+// that `segment_ids[j] == i`.
+//
+// If the maximum is empty for a given segment ID `i`, it outputs the smallest possible value for specific numeric type,
+//  `output[i] = numeric_limits<T>::min()`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/UnsortedSegmentMax.png" alt>
+// </div>
+//
+// Arguments:
+//
+//	segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
+// first dimension.
+//
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `num_segments`.
+func UnsortedSegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "RightShift",
+		Type: "UnsortedSegmentMax",
 		Input: []tf.Input{
-			x, y,
+			data, segment_ids, num_segments,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Elementwise computes the bitwise left-shift of `x` and `y`.
-//
-// If `y` is negative, or greater than or equal to the width of `x` in bits the
-// result is implementation defined.
-func LeftShift(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// Computes exponential of x element-wise.  \\(y = e^x\\).
+func Exp(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "LeftShift",
+		Type: "Exp",
 		Input: []tf.Input{
-			x, y,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Elementwise computes the bitwise AND of `x` and `y`.
+// Returns an element-wise indication of the sign of a number.
 //
-// The result will have those bits set, that are set in both `x` and `y`. The
-// computation is performed on the underlying representations of `x` and `y`.
-func BitwiseAnd(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// `y = sign(x) = -1` if `x < 0`; 0 if `x == 0`; 1 if `x > 0`.
+//
+// For complex numbers, `y = sign(x) = x / |x|` if `x != 0`, otherwise `y = 0`.
+func Sign(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "BitwiseAnd",
+		Type: "Sign",
 		Input: []tf.Input{
-			x, y,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// FixedUnigramCandidateSamplerAttr is an optional argument to FixedUnigramCandidateSampler.
-type FixedUnigramCandidateSamplerAttr func(optionalAttr)
+// QuantizedAddAttr is an optional argument to QuantizedAdd.
+type QuantizedAddAttr func(optionalAttr)
 
-// FixedUnigramCandidateSamplerVocabFile sets the optional vocab_file attribute to value.
-//
-// value: Each valid line in this file (which should have a CSV-like format)
-// corresponds to a valid word ID. IDs are in sequential order, starting from
-// num_reserved_ids. The last entry in each line is expected to be a value
-// corresponding to the count or relative probability. Exactly one of vocab_file
-// and unigrams needs to be passed to this op.
-// If not specified, defaults to ""
-func FixedUnigramCandidateSamplerVocabFile(value string) FixedUnigramCandidateSamplerAttr {
+// QuantizedAddToutput sets the optional Toutput attribute to value.
+// If not specified, defaults to DT_QINT32
+func QuantizedAddToutput(value tf.DataType) QuantizedAddAttr {
 	return func(m optionalAttr) {
-		m["vocab_file"] = value
+		m["Toutput"] = value
 	}
 }
 
-// FixedUnigramCandidateSamplerDistortion sets the optional distortion attribute to value.
+// Returns x + y element-wise, working on quantized buffers.
 //
-// value: The distortion is used to skew the unigram probability distribution.
-// Each weight is first raised to the distortion's power before adding to the
-// internal unigram distribution. As a result, distortion = 1.0 gives regular
-// unigram sampling (as defined by the vocab file), and distortion = 0.0 gives
-// a uniform distribution.
-// If not specified, defaults to 1
-func FixedUnigramCandidateSamplerDistortion(value float32) FixedUnigramCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["distortion"] = value
+// Arguments:
+//
+//
+//	min_x: The float value that the lowest quantized `x` value represents.
+//	max_x: The float value that the highest quantized `x` value represents.
+//	min_y: The float value that the lowest quantized `y` value represents.
+//	max_y: The float value that the highest quantized `y` value represents.
+//
+// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
+//
+// *NOTE*: `QuantizedAdd` supports limited forms of broadcasting. More about
+// broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func QuantizedAdd(scope *Scope, x tf.Output, y tf.Output, min_x tf.Output, max_x tf.Output, min_y tf.Output, max_y tf.Output, optional ...QuantizedAddAttr) (z tf.Output, min_z tf.Output, max_z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizedAdd",
+		Input: []tf.Input{
+			x, y, min_x, max_x, min_y, max_y,
+		},
+		Attrs: attrs,
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// FixedUnigramCandidateSamplerNumReservedIds sets the optional num_reserved_ids attribute to value.
-//
-// value: Optionally some reserved IDs can be added in the range [0,
-// ..., num_reserved_ids) by the users. One use case is that a special unknown
-// word token is used as ID 0. These IDs will have a sampling probability of 0.
-// If not specified, defaults to 0
-func FixedUnigramCandidateSamplerNumReservedIds(value int64) FixedUnigramCandidateSamplerAttr {
+// ArgMinAttr is an optional argument to ArgMin.
+type ArgMinAttr func(optionalAttr)
+
+// ArgMinOutputType sets the optional output_type attribute to value.
+// If not specified, defaults to DT_INT64
+func ArgMinOutputType(value tf.DataType) ArgMinAttr {
 	return func(m optionalAttr) {
-		m["num_reserved_ids"] = value
+		m["output_type"] = value
 	}
 }
 
-// FixedUnigramCandidateSamplerNumShards sets the optional num_shards attribute to value.
+// Returns the index with the smallest value across dimensions of a tensor.
 //
-// value: A sampler can be used to sample from a subset of the original range
-// in order to speed up the whole computation through parallelism. This parameter
-// (together with 'shard') indicates the number of partitions that are being
-// used in the overall computation.
-// If not specified, defaults to 1
+// Note that in case of ties the identity of the return value is not guaranteed.
 //
-// REQUIRES: value >= 1
-func FixedUnigramCandidateSamplerNumShards(value int64) FixedUnigramCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["num_shards"] = value
+// Arguments:
+//
+//	dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`.
+// Describes which dimension of the input Tensor to reduce across. For vectors,
+// use dimension = 0.
+func ArgMin(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMinAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ArgMin",
+		Input: []tf.Input{
+			input, dimension,
+		},
+		Attrs: attrs,
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// FixedUnigramCandidateSamplerShard sets the optional shard attribute to value.
+// Convert the quantized 'input' tensor into a lower-precision 'output', using the
 //
-// value: A sampler can be used to sample from a subset of the original range
-// in order to speed up the whole computation through parallelism. This parameter
-// (together with 'num_shards') indicates the particular partition number of a
-// sampler op, when partitioning is being used.
-// If not specified, defaults to 0
+// output range specified with 'requested_output_min' and 'requested_output_max'.
 //
-// REQUIRES: value >= 0
-func FixedUnigramCandidateSamplerShard(value int64) FixedUnigramCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["shard"] = value
-	}
-}
-
-// FixedUnigramCandidateSamplerUnigrams sets the optional unigrams attribute to value.
+// [input_min, input_max] are scalar floats that specify the range for the float
+// interpretation of the 'input' data. For example, if input_min is -1.0f and
+// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
+// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
 //
-// value: A list of unigram counts or probabilities, one per ID in sequential
-// order. Exactly one of vocab_file and unigrams should be passed to this op.
-// If not specified, defaults to <>
-func FixedUnigramCandidateSamplerUnigrams(value []float32) FixedUnigramCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["unigrams"] = value
-	}
-}
-
-// FixedUnigramCandidateSamplerSeed sets the optional seed attribute to value.
+// Arguments:
 //
-// value: If either seed or seed2 are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func FixedUnigramCandidateSamplerSeed(value int64) FixedUnigramCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// FixedUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value.
+//	input_min: The float value that the minimum quantized input value represents.
+//	input_max: The float value that the maximum quantized input value represents.
+//	requested_output_min: The float value that the minimum quantized output value represents.
+//	requested_output_max: The float value that the maximum quantized output value represents.
+//	out_type: The type of the output. Should be a lower bit depth than Tinput.
 //
-// value: An second seed to avoid seed collision.
-// If not specified, defaults to 0
-func FixedUnigramCandidateSamplerSeed2(value int64) FixedUnigramCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
+// Returns The requested_output_min value is copied into this output.The requested_output_max value is copied into this output.
+func Requantize(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"out_type": out_type}
+	opspec := tf.OpSpec{
+		Type: "Requantize",
+		Input: []tf.Input{
+			input, input_min, input_max, requested_output_min, requested_output_max,
+		},
+		Attrs: attrs,
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Generates labels for candidate sampling with a learned unigram distribution.
-//
-// A unigram sampler could use a fixed unigram distribution read from a
-// file or passed in as an in-memory array instead of building up the distribution
-// from data on the fly. There is also an option to skew the distribution by
-// applying a distortion power to the weights.
-//
-// The vocabulary file should be in CSV-like format, with the last field
-// being the weight associated with the word.
-//
-// For each batch, this op picks a single set of sampled candidate labels.
+// Computes the determinant of one or more square matrices.
 //
-// The advantages of sampling candidates per-batch are simplicity and the
-// possibility of efficient dense matrix multiplication. The disadvantage is that
-// the sampled candidates must be chosen independently of the context and of the
-// true labels.
+// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+// form square matrices. The output is a tensor containing the determinants
+// for all input submatrices `[..., :, :]`.
 //
 // Arguments:
-//	true_classes: A batch_size * num_true matrix, in which each row contains the
-// IDs of the num_true target_classes in the corresponding original label.
-//	num_true: Number of true labels per context.
-//	num_sampled: Number of candidates to randomly sample.
-//	unique: If unique is true, we sample with rejection, so that all sampled
-// candidates in a batch are unique. This requires some approximation to
-// estimate the post-rejection sampling probabilities.
-//	range_max: The sampler will sample integers from the interval [0, range_max).
+//	input: Shape is `[..., M, M]`.
 //
-// Returns A vector of length num_sampled, in which each element is
-// the ID of a sampled candidate.A batch_size * num_true matrix, representing
-// the number of times each candidate is expected to occur in a batch
-// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
-// candidate representing the number of times the candidate is expected
-// to occur in a batch of sampled candidates.  If unique=true, then this is a
-// probability.
-func FixedUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...FixedUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
+// Returns Shape is `[...]`.
+func MatrixDeterminant(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "MatrixDeterminant",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes sin of x element-wise.
+func Sin(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Sin",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the complementary error function of `x` element-wise.
+func Erfc(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
-	for _, a := range optional {
-		a(attrs)
+	opspec := tf.OpSpec{
+		Type: "Erfc",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes Psi, the derivative of Lgamma (the log of the absolute value of
+//
+// `Gamma(x)`), element-wise.
+func Digamma(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
 	opspec := tf.OpSpec{
-		Type: "FixedUnigramCandidateSampler",
+		Type: "Digamma",
 		Input: []tf.Input{
-			true_classes,
+			x,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// UniformCandidateSamplerAttr is an optional argument to UniformCandidateSampler.
-type UniformCandidateSamplerAttr func(optionalAttr)
+// Conv2DBackpropFilterAttr is an optional argument to Conv2DBackpropFilter.
+type Conv2DBackpropFilterAttr func(optionalAttr)
 
-// UniformCandidateSamplerSeed sets the optional seed attribute to value.
-//
-// value: If either seed or seed2 are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func UniformCandidateSamplerSeed(value int64) UniformCandidateSamplerAttr {
+// Conv2DBackpropFilterUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value.
+// If not specified, defaults to true
+func Conv2DBackpropFilterUseCudnnOnGpu(value bool) Conv2DBackpropFilterAttr {
 	return func(m optionalAttr) {
-		m["seed"] = value
+		m["use_cudnn_on_gpu"] = value
 	}
 }
 
-// UniformCandidateSamplerSeed2 sets the optional seed2 attribute to value.
+// Conv2DBackpropFilterDataFormat sets the optional data_format attribute to value.
 //
-// value: An second seed to avoid seed collision.
-// If not specified, defaults to 0
-func UniformCandidateSamplerSeed2(value int64) UniformCandidateSamplerAttr {
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func Conv2DBackpropFilterDataFormat(value string) Conv2DBackpropFilterAttr {
 	return func(m optionalAttr) {
-		m["seed2"] = value
+		m["data_format"] = value
 	}
 }
 
-// Generates labels for candidate sampling with a uniform distribution.
-//
-// See explanations of candidate sampling and the data formats at
-// go/candidate-sampling.
-//
-// For each batch, this op picks a single set of sampled candidate labels.
+// Conv2DBackpropFilterDilations sets the optional dilations attribute to value.
 //
-// The advantages of sampling candidates per-batch are simplicity and the
-// possibility of efficient dense matrix multiplication. The disadvantage is that
-// the sampled candidates must be chosen independently of the context and of the
-// true labels.
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+// element on that dimension. The dimension order is determined by the value of
+// `data_format`, see above for details. Dilations in the batch and depth
+// dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
+func Conv2DBackpropFilterDilations(value []int64) Conv2DBackpropFilterAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes the gradients of convolution with respect to the filter.
 //
 // Arguments:
-//	true_classes: A batch_size * num_true matrix, in which each row contains the
-// IDs of the num_true target_classes in the corresponding original label.
-//	num_true: Number of true labels per context.
-//	num_sampled: Number of candidates to randomly sample.
-//	unique: If unique is true, we sample with rejection, so that all sampled
-// candidates in a batch are unique. This requires some approximation to
-// estimate the post-rejection sampling probabilities.
-//	range_max: The sampler will sample integers from the interval [0, range_max).
+//	input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
+//	filter_sizes: An integer vector representing the tensor shape of `filter`,
+// where `filter` is a 4-D
+// `[filter_height, filter_width, in_channels, out_channels]` tensor.
+//	out_backprop: 4-D with shape `[batch, out_height, out_width, out_channels]`.
+// Gradients w.r.t. the output of the convolution.
+//	strides: The stride of the sliding window for each dimension of the input
+// of the convolution. Must be in the same order as the dimension specified with
+// format.
+//	padding: The type of padding algorithm to use.
 //
-// Returns A vector of length num_sampled, in which each element is
-// the ID of a sampled candidate.A batch_size * num_true matrix, representing
-// the number of times each candidate is expected to occur in a batch
-// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
-// candidate representing the number of times the candidate is expected
-// to occur in a batch of sampled candidates.  If unique=true, then this is a
-// probability.
-func UniformCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...UniformCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
+// Returns 4-D with shape
+// `[filter_height, filter_width, in_channels, out_channels]`.  Gradient w.r.t.
+// the `filter` input of the convolution.
+func Conv2DBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv2DBackpropFilterAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "UniformCandidateSampler",
+		Type: "Conv2DBackpropFilter",
 		Input: []tf.Input{
-			true_classes,
+			input, filter_sizes, out_backprop,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// AbortAttr is an optional argument to Abort.
-type AbortAttr func(optionalAttr)
-
-// AbortErrorMsg sets the optional error_msg attribute to value.
+// Returns the number of work units this Reader has finished processing.
 //
-// value: A string which is the message associated with the exception.
-// If not specified, defaults to ""
-func AbortErrorMsg(value string) AbortAttr {
-	return func(m optionalAttr) {
-		m["error_msg"] = value
+// Arguments:
+//	reader_handle: Handle to a Reader.
+func ReaderNumWorkUnitsCompletedV2(scope *Scope, reader_handle tf.Output) (units_completed tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
-}
-
-// AbortExitWithoutError sets the optional exit_without_error attribute to value.
-// If not specified, defaults to false
-func AbortExitWithoutError(value bool) AbortAttr {
-	return func(m optionalAttr) {
-		m["exit_without_error"] = value
+	opspec := tf.OpSpec{
+		Type: "ReaderNumWorkUnitsCompletedV2",
+		Input: []tf.Input{
+			reader_handle,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Raise a exception to abort the process when called.
-//
-// If exit_without_error is true, the process will exit normally,
-// otherwise it will exit with a SIGABORT signal.
+// Returns x / y element-wise for real types.
 //
-// Returns nothing but an exception.
+// If `x` and `y` are reals, this will return the floating-point division.
 //
-// Returns the created operation.
-func Abort(scope *Scope, optional ...AbortAttr) (o *tf.Operation) {
+// *NOTE*: `Div` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func RealDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "Abort",
-
-		Attrs: attrs,
+		Type: "RealDiv",
+		Input: []tf.Input{
+			x, y,
+		},
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// SpaceToDepthAttr is an optional argument to SpaceToDepth.
-type SpaceToDepthAttr func(optionalAttr)
-
-// SpaceToDepthDataFormat sets the optional data_format attribute to value.
-// If not specified, defaults to "NHWC"
-func SpaceToDepthDataFormat(value string) SpaceToDepthAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
+// Computes the log of the absolute value of `Gamma(x)` element-wise.
+func Lgamma(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
-}
-
-// SpaceToDepth for tensors of type T.
-//
-// Rearranges blocks of spatial data, into depth. More specifically,
-// this op outputs a copy of the input tensor where values from the `height`
-// and `width` dimensions are moved to the `depth` dimension.
-// The attr `block_size` indicates the input block size.
-//
-//   * Non-overlapping blocks of size `block_size x block size` are rearranged
-//     into depth at each location.
-//   * The depth of the output tensor is `block_size * block_size * input_depth`.
-//   * The Y, X coordinates within each block of the input become the high order
-//     component of the output channel index.
-//   * The input tensor's height and width must be divisible by block_size.
-//
-// The `data_format` attr specifies the layout of the input and output tensors
-// with the following options:
-//   "NHWC": `[ batch, height, width, channels ]`
-//   "NCHW": `[ batch, channels, height, width ]`
-//   "NCHW_VECT_C":
-//       `qint8 [ batch, channels / 4, height, width, channels % 4 ]`
-//
-// It is useful to consider the operation as transforming a 6-D Tensor.
-// e.g. for data_format = NHWC,
-//      Each element in the input tensor can be specified via 6 coordinates,
-//      ordered by decreasing memory layout significance as:
-//      n,oY,bY,oX,bX,iC  (where n=batch index, oX, oY means X or Y coordinates
-//                         within the output image, bX, bY means coordinates
-//                         within the input block, iC means input channels).
-//      The output would be a transpose to the following layout:
-//      n,oY,oX,bY,bX,iC
-//
-// This operation is useful for resizing the activations between convolutions
-// (but keeping all data), e.g. instead of pooling. It is also useful for training
-// purely convolutional models.
-//
-// For example, given an input of shape `[1, 2, 2, 1]`, data_format = "NHWC" and
-// block_size = 2:
-//
-// ```
-// x = [[[[1], [2]],
-//       [[3], [4]]]]
-// ```
-//
-// This operation will output a tensor of shape `[1, 1, 1, 4]`:
-//
-// ```
-// [[[[1, 2, 3, 4]]]]
-// ```
-//
-// Here, the input has a batch of 1 and each batch element has shape `[2, 2, 1]`,
-// the corresponding output will have a single element (i.e. width and height are
-// both 1) and will have a depth of 4 channels (1 * block_size * block_size).
-// The output element shape is `[1, 1, 4]`.
-//
-// For an input tensor with larger depth, here of shape `[1, 2, 2, 3]`, e.g.
-//
-// ```
-// x = [[[[1, 2, 3], [4, 5, 6]],
-//       [[7, 8, 9], [10, 11, 12]]]]
-// ```
+	opspec := tf.OpSpec{
+		Type: "Lgamma",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the reverse mode backpropagated gradient of the Cholesky algorithm.
 //
-// This operation, for block_size of 2, will return the following tensor of shape
-// `[1, 1, 1, 12]`
+// For an explanation see "Differentiation of the Cholesky algorithm" by
+// Iain Murray http://arxiv.org/abs/1602.07527.
 //
-// ```
-// [[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]]
-// ```
+// Arguments:
+//	l: Output of batch Cholesky algorithm l = cholesky(A). Shape is `[..., M, M]`.
+// Algorithm depends only on lower triangular part of the innermost matrices of
+// this tensor.
+//	grad: df/dl where f is some scalar function. Shape is `[..., M, M]`.
+// Algorithm depends only on lower triangular part of the innermost matrices of
+// this tensor.
 //
-// Similarly, for the following input of shape `[1 4 4 1]`, and a block size of 2:
+// Returns Symmetrized version of df/dA . Shape is `[..., M, M]`
+func CholeskyGrad(scope *Scope, l tf.Output, grad tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "CholeskyGrad",
+		Input: []tf.Input{
+			l, grad,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes inverse hyperbolic cosine of x element-wise.
+func Acosh(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Acosh",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// SerializeManySparseAttr is an optional argument to SerializeManySparse.
+type SerializeManySparseAttr func(optionalAttr)
+
+// SerializeManySparseOutType sets the optional out_type attribute to value.
 //
-// ```
-// x = [[[[1],   [2],  [5],  [6]],
-//       [[3],   [4],  [7],  [8]],
-//       [[9],  [10], [13],  [14]],
-//       [[11], [12], [15],  [16]]]]
-// ```
+// value: The `dtype` to use for serialization; the supported types are `string`
+// (default) and `variant`.
+// If not specified, defaults to DT_STRING
+func SerializeManySparseOutType(value tf.DataType) SerializeManySparseAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor` object.
 //
-// the operator will return the following tensor of shape `[1 2 2 4]`:
+// The `SparseTensor` must have rank `R` greater than 1, and the first dimension
+// is treated as the minibatch dimension.  Elements of the `SparseTensor`
+// must be sorted in increasing order of this first dimension.  The serialized
+// `SparseTensor` objects going into each row of `serialized_sparse` will have
+// rank `R-1`.
 //
-// ```
-// x = [[[[1, 2, 3, 4],
-//        [5, 6, 7, 8]],
-//       [[9, 10, 11, 12],
-//        [13, 14, 15, 16]]]]
-// ```
+// The minibatch size `N` is extracted from `sparse_shape[0]`.
 //
 // Arguments:
-//
-//	block_size: The size of the spatial block.
-func SpaceToDepth(scope *Scope, input tf.Output, block_size int64, optional ...SpaceToDepthAttr) (output tf.Output) {
+//	sparse_indices: 2-D.  The `indices` of the minibatch `SparseTensor`.
+//	sparse_values: 1-D.  The `values` of the minibatch `SparseTensor`.
+//	sparse_shape: 1-D.  The `shape` of the minibatch `SparseTensor`.
+func SerializeManySparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeManySparseAttr) (serialized_sparse tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"block_size": block_size}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "SpaceToDepth",
+		Type: "SerializeManySparse",
 		Input: []tf.Input{
-			input,
+			sparse_indices, sparse_values, sparse_shape,
 		},
 		Attrs: attrs,
 	}
@@ -2800,171 +2321,116 @@ func SpaceToDepth(scope *Scope, input tf.Output, block_size int64, optional ...S
 	return op.Output(0)
 }
 
-// Scatter `updates` into a new (initially zero) tensor according to `indices`.
-//
-// Creates a new tensor by applying sparse `updates` to individual
-// values or slices within a zero tensor of the given `shape` according to
-// indices.  This operator is the inverse of the @{tf.gather_nd} operator which
-// extracts values or slices from a given tensor.
-//
-// **WARNING**: The order in which updates are applied is nondeterministic, so the
-// output will be nondeterministic if `indices` contains duplicates.
-//
-// `indices` is an integer tensor containing indices into a new tensor of shape
-// `shape`.  The last dimension of `indices` can be at most the rank of `shape`:
-//
-//     indices.shape[-1] <= shape.rank
-//
-// The last dimension of `indices` corresponds to indices into elements
-// (if `indices.shape[-1] = shape.rank`) or slices
-// (if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of
-// `shape`.  `updates` is a tensor with shape
-//
-//     indices.shape[:-1] + shape[indices.shape[-1]:]
-//
-// The simplest form of scatter is to insert individual elements in a tensor by
-// index. For example, say we want to insert 4 scattered elements in a rank-1
-// tensor with 8 elements.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/ScatterNd1.png" alt>
-// </div>
-//
-// In Python, this scatter operation would look like this:
-//
-// ```python
-//     indices = tf.constant([[4], [3], [1], [7]])
-//     updates = tf.constant([9, 10, 11, 12])
-//     shape = tf.constant([8])
-//     scatter = tf.scatter_nd(indices, updates, shape)
-//     with tf.Session() as sess:
-//       print(sess.run(scatter))
-// ```
-//
-// The resulting tensor would look like this:
-//
-//     [0, 11, 0, 10, 9, 0, 0, 12]
-//
-// We can also, insert entire slices of a higher rank tensor all at once. For
-// example, if we wanted to insert two slices in the first dimension of a
-// rank-3 tensor with two matrices of new values.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/ScatterNd2.png" alt>
-// </div>
-//
-// In Python, this scatter operation would look like this:
-//
-// ```python
-//     indices = tf.constant([[0], [2]])
-//     updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6],
-//                             [7, 7, 7, 7], [8, 8, 8, 8]],
-//                            [[5, 5, 5, 5], [6, 6, 6, 6],
-//                             [7, 7, 7, 7], [8, 8, 8, 8]]])
-//     shape = tf.constant([4, 4, 4])
-//     scatter = tf.scatter_nd(indices, updates, shape)
-//     with tf.Session() as sess:
-//       print(sess.run(scatter))
-// ```
-//
-// The resulting tensor would look like this:
-//
-//     [[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],
-//      [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
-//      [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],
-//      [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]]
-//
-// Arguments:
-//	indices: Index tensor.
-//	updates: Updates to scatter into output.
-//	shape: 1-D. The shape of the resulting tensor.
-//
-// Returns A new tensor with the given shape and updates applied according
-// to the indices.
-func ScatterNd(scope *Scope, indices tf.Output, updates tf.Output, shape tf.Output) (output tf.Output) {
+// TensorArrayV2Attr is an optional argument to TensorArrayV2.
+type TensorArrayV2Attr func(optionalAttr)
+
+// TensorArrayV2ElementShape sets the optional element_shape attribute to value.
+// If not specified, defaults to <unknown_rank:true >
+func TensorArrayV2ElementShape(value tf.Shape) TensorArrayV2Attr {
+	return func(m optionalAttr) {
+		m["element_shape"] = value
+	}
+}
+
+// TensorArrayV2DynamicSize sets the optional dynamic_size attribute to value.
+// If not specified, defaults to false
+func TensorArrayV2DynamicSize(value bool) TensorArrayV2Attr {
+	return func(m optionalAttr) {
+		m["dynamic_size"] = value
+	}
+}
+
+// TensorArrayV2ClearAfterRead sets the optional clear_after_read attribute to value.
+// If not specified, defaults to true
+func TensorArrayV2ClearAfterRead(value bool) TensorArrayV2Attr {
+	return func(m optionalAttr) {
+		m["clear_after_read"] = value
+	}
+}
+
+// TensorArrayV2TensorArrayName sets the optional tensor_array_name attribute to value.
+// If not specified, defaults to ""
+func TensorArrayV2TensorArrayName(value string) TensorArrayV2Attr {
+	return func(m optionalAttr) {
+		m["tensor_array_name"] = value
+	}
+}
+
+// Deprecated. Use TensorArrayV3
+func TensorArrayV2(scope *Scope, size tf.Output, dtype tf.DataType, optional ...TensorArrayV2Attr) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "ScatterNd",
+		Type: "TensorArrayV2",
 		Input: []tf.Input{
-			indices, updates, shape,
+			size,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Exits the current frame to its parent frame.
+// Computes the mean along sparse segments of a tensor.
 //
-// Exit makes its input `data` available to the parent frame.
+// Like `SparseSegmentMean`, but allows missing ids in `segment_ids`. If an id is
+// misisng, the `output` tensor at that position will be zeroed.
+//
+// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+// segments.
 //
 // Arguments:
-//	data: The tensor to be made available to the parent frame.
 //
-// Returns The same tensor as `data`.
-func Exit(scope *Scope, data tf.Output) (output tf.Output) {
+//	indices: A 1-D tensor. Has same rank as `segment_ids`.
+//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+//	num_segments: Should equal the number of distinct segment IDs.
+//
+// Returns Has same shape as data, except for dimension 0 which has size
+// `num_segments`.
+func SparseSegmentMeanWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Exit",
+		Type: "SparseSegmentMeanWithNumSegments",
 		Input: []tf.Input{
-			data,
+			data, indices, segment_ids, num_segments,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// EnterAttr is an optional argument to Enter.
-type EnterAttr func(optionalAttr)
-
-// EnterIsConstant sets the optional is_constant attribute to value.
-//
-// value: If true, the output is constant within the child frame.
-// If not specified, defaults to false
-func EnterIsConstant(value bool) EnterAttr {
-	return func(m optionalAttr) {
-		m["is_constant"] = value
+// Computes hyperbolic cosine of x element-wise.
+func Cosh(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
-}
-
-// EnterParallelIterations sets the optional parallel_iterations attribute to value.
-//
-// value: The number of iterations allowed to run in parallel.
-// If not specified, defaults to 10
-func EnterParallelIterations(value int64) EnterAttr {
-	return func(m optionalAttr) {
-		m["parallel_iterations"] = value
+	opspec := tf.OpSpec{
+		Type: "Cosh",
+		Input: []tf.Input{
+			x,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Creates or finds a child frame, and makes `data` available to the child frame.
-//
-// This op is used together with `Exit` to create loops in the graph.
-// The unique `frame_name` is used by the `Executor` to identify frames. If
-// `is_constant` is true, `output` is a constant in the child frame; otherwise
-// it may be changed in the child frame. At most `parallel_iterations` iterations
-// are run in parallel in the child frame.
-//
-// Arguments:
-//	data: The tensor to be made available to the child frame.
-//	frame_name: The name of the child frame.
-//
-// Returns The same tensor as `data`.
-func Enter(scope *Scope, data tf.Output, frame_name string, optional ...EnterAttr) (output tf.Output) {
+// Creates a dataset that emits each dim-0 slice of `components` once.
+func TensorSliceDataset(scope *Scope, components []tf.Output, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"frame_name": frame_name}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "Enter",
+		Type: "TensorSliceDataset",
 		Input: []tf.Input{
-			data,
+			tf.OutputList(components),
 		},
 		Attrs: attrs,
 	}
@@ -2972,67 +2438,73 @@ func Enter(scope *Scope, data tf.Output, frame_name string, optional ...EnterAtt
 	return op.Output(0)
 }
 
-// Forwards `data` to the output port determined by `pred`.
-//
-// If `pred` is true, the `data` input is forwarded to `output_true`. Otherwise,
-// the data goes to `output_false`.
+// Computes natural logarithm of (1 + x) element-wise.
 //
-// See also `RefSwitch` and `Merge`.
+// I.e., \\(y = \log_e (1 + x)\\).
+func Log1p(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Log1p",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes rectified linear 6 gradients for a Relu6 operation.
 //
 // Arguments:
-//	data: The tensor to be forwarded to the appropriate output.
-//	pred: A scalar that specifies which output port will receive data.
+//	gradients: The backpropagated gradients to the corresponding Relu6 operation.
+//	features: The features passed as input to the corresponding Relu6 operation, or
+// its output; using either one produces the same result.
 //
-// Returns If `pred` is false, data will be forwarded to this output.If `pred` is true, data will be forwarded to this output.
-func Switch(scope *Scope, data tf.Output, pred tf.Output) (output_false tf.Output, output_true tf.Output) {
+// Returns The gradients:
+// `gradients * (features > 0) * (features < 6)`.
+func Relu6Grad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Switch",
+		Type: "Relu6Grad",
 		Input: []tf.Input{
-			data, pred,
+			gradients, features,
 		},
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
-// CTCGreedyDecoderAttr is an optional argument to CTCGreedyDecoder.
-type CTCGreedyDecoderAttr func(optionalAttr)
+// ResizeBicubicAttr is an optional argument to ResizeBicubic.
+type ResizeBicubicAttr func(optionalAttr)
 
-// CTCGreedyDecoderMergeRepeated sets the optional merge_repeated attribute to value.
+// ResizeBicubicAlignCorners sets the optional align_corners attribute to value.
 //
-// value: If True, merge repeated classes in output.
+// value: If true, rescale input by (new_height - 1) / (height - 1), which
+// exactly aligns the 4 corners of images and resized images. If false, rescale
+// by new_height / height. Treat similarly the width dimension.
 // If not specified, defaults to false
-func CTCGreedyDecoderMergeRepeated(value bool) CTCGreedyDecoderAttr {
+func ResizeBicubicAlignCorners(value bool) ResizeBicubicAttr {
 	return func(m optionalAttr) {
-		m["merge_repeated"] = value
+		m["align_corners"] = value
 	}
 }
 
-// Performs greedy decoding on the logits given in inputs.
-//
-// A note about the attribute merge_repeated: if enabled, when
-// consecutive logits' maximum indices are the same, only the first of
-// these is emitted.  Labeling the blank '*', the sequence "A B B * B B"
-// becomes "A B B" if merge_repeated = True and "A B B B B" if
-// merge_repeated = False.
+// Resize `images` to `size` using bicubic interpolation.
 //
-// Regardless of the value of merge_repeated, if the maximum index of a given
-// time and batch corresponds to the blank, index `(num_classes - 1)`, no new
-// element is emitted.
+// Input images can be of different types but output images are always float.
 //
 // Arguments:
-//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
-//	sequence_length: A vector containing sequence lengths, size `(batch_size)`.
+//	images: 4-D with shape `[batch, height, width, channels]`.
+//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
+// new size for the images.
 //
-// Returns Indices matrix, size `(total_decoded_outputs x 2)`,
-// of a `SparseTensor<int64, 2>`.  The rows store: [batch, time].Values vector, size: `(total_decoded_outputs)`,
-// of a `SparseTensor<int64, 2>`.  The vector stores the decoded classes.Shape vector, size `(2)`, of the decoded SparseTensor.
-// Values are: `[batch_size, max_decoded_length]`.Matrix, size `(batch_size x 1)`, containing sequence
-// log-probabilities.
-func CTCGreedyDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, optional ...CTCGreedyDecoderAttr) (decoded_indices tf.Output, decoded_values tf.Output, decoded_shape tf.Output, log_probability tf.Output) {
+// Returns 4-D with shape
+// `[batch, new_height, new_width, channels]`.
+func ResizeBicubic(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBicubicAttr) (resized_images tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -3041,374 +2513,358 @@ func CTCGreedyDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output,
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "CTCGreedyDecoder",
+		Type: "ResizeBicubic",
 		Input: []tf.Input{
-			inputs, sequence_length,
+			images, size,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
-}
-
-// CTCLossAttr is an optional argument to CTCLoss.
-type CTCLossAttr func(optionalAttr)
-
-// CTCLossPreprocessCollapseRepeated sets the optional preprocess_collapse_repeated attribute to value.
-//
-// value: Scalar, if true then repeated labels are
-// collapsed prior to the CTC calculation.
-// If not specified, defaults to false
-func CTCLossPreprocessCollapseRepeated(value bool) CTCLossAttr {
-	return func(m optionalAttr) {
-		m["preprocess_collapse_repeated"] = value
-	}
+	return op.Output(0)
 }
 
-// CTCLossCtcMergeRepeated sets the optional ctc_merge_repeated attribute to value.
+// Computes natural logarithm of x element-wise.
 //
-// value: Scalar.  If set to false, *during* CTC calculation
-// repeated non-blank labels will not be merged and are interpreted as
-// individual labels.  This is a simplified version of CTC.
-// If not specified, defaults to true
-func CTCLossCtcMergeRepeated(value bool) CTCLossAttr {
-	return func(m optionalAttr) {
-		m["ctc_merge_repeated"] = value
+// I.e., \\(y = \log_e x\\).
+func Log(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
-}
-
-// CTCLossIgnoreLongerOutputsThanInputs sets the optional ignore_longer_outputs_than_inputs attribute to value.
-//
-// value: Scalar. If set to true, during CTC
-// calculation, items that have longer output sequences than input sequences
-// are skipped: they don't contribute to the loss term and have zero-gradient.
-// If not specified, defaults to false
-func CTCLossIgnoreLongerOutputsThanInputs(value bool) CTCLossAttr {
-	return func(m optionalAttr) {
-		m["ignore_longer_outputs_than_inputs"] = value
+	opspec := tf.OpSpec{
+		Type: "Log",
+		Input: []tf.Input{
+			x,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Calculates the CTC Loss (log probability) for each batch entry.  Also calculates
-//
-// the gradient.  This class performs the softmax operation for you, so inputs
-// should be e.g. linear projections of outputs by an LSTM.
-//
-// Arguments:
-//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
-//	labels_indices: The indices of a `SparseTensor<int32, 2>`.
-// `labels_indices(i, :) == [b, t]` means `labels_values(i)` stores the id for
-// `(batch b, time t)`.
-//	labels_values: The values (labels) associated with the given batch and time.
-//	sequence_length: A vector containing sequence lengths (batch).
+// Rounds the values of a tensor to the nearest integer, element-wise.
 //
-// Returns A vector (batch) containing log-probabilities.The gradient of `loss`.  3-D, shape:
-// `(max_time x batch_size x num_classes)`.
-func CTCLoss(scope *Scope, inputs tf.Output, labels_indices tf.Output, labels_values tf.Output, sequence_length tf.Output, optional ...CTCLossAttr) (loss tf.Output, gradient tf.Output) {
+// Rounds half to even.  Also known as bankers rounding. If you want to round
+// according to the current system rounding mode use std::cint.
+func Round(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "CTCLoss",
+		Type: "Round",
 		Input: []tf.Input{
-			inputs, labels_indices, labels_values, sequence_length,
+			x,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
-// OrderedMapSizeAttr is an optional argument to OrderedMapSize.
-type OrderedMapSizeAttr func(optionalAttr)
+// RecordInputAttr is an optional argument to RecordInput.
+type RecordInputAttr func(optionalAttr)
 
-// OrderedMapSizeCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
+// RecordInputFileRandomSeed sets the optional file_random_seed attribute to value.
 //
-// REQUIRES: value >= 0
-func OrderedMapSizeCapacity(value int64) OrderedMapSizeAttr {
+// value: Random seeds used to produce randomized records.
+// If not specified, defaults to 301
+func RecordInputFileRandomSeed(value int64) RecordInputAttr {
 	return func(m optionalAttr) {
-		m["capacity"] = value
+		m["file_random_seed"] = value
 	}
 }
 
-// OrderedMapSizeMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
+// RecordInputFileShuffleShiftRatio sets the optional file_shuffle_shift_ratio attribute to value.
 //
-// REQUIRES: value >= 0
-func OrderedMapSizeMemoryLimit(value int64) OrderedMapSizeAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// OrderedMapSizeContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func OrderedMapSizeContainer(value string) OrderedMapSizeAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// OrderedMapSizeSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func OrderedMapSizeSharedName(value string) OrderedMapSizeAttr {
+// value: Shifts the list of files after the list is randomly
+// shuffled.
+// If not specified, defaults to 0
+func RecordInputFileShuffleShiftRatio(value float32) RecordInputAttr {
 	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Op returns the number of elements in the underlying container.
-func OrderedMapSize(scope *Scope, dtypes []tf.DataType, optional ...OrderedMapSizeAttr) (size tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "OrderedMapSize",
-
-		Attrs: attrs,
+		m["file_shuffle_shift_ratio"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// OrderedMapUnstageAttr is an optional argument to OrderedMapUnstage.
-type OrderedMapUnstageAttr func(optionalAttr)
-
-// OrderedMapUnstageCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
+// RecordInputFileBufferSize sets the optional file_buffer_size attribute to value.
 //
-// REQUIRES: value >= 0
-func OrderedMapUnstageCapacity(value int64) OrderedMapUnstageAttr {
+// value: The randomization shuffling buffer.
+// If not specified, defaults to 10000
+func RecordInputFileBufferSize(value int64) RecordInputAttr {
 	return func(m optionalAttr) {
-		m["capacity"] = value
+		m["file_buffer_size"] = value
 	}
 }
 
-// OrderedMapUnstageMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
+// RecordInputFileParallelism sets the optional file_parallelism attribute to value.
 //
-// REQUIRES: value >= 0
-func OrderedMapUnstageMemoryLimit(value int64) OrderedMapUnstageAttr {
+// value: How many sstables are opened and concurrently iterated over.
+// If not specified, defaults to 16
+func RecordInputFileParallelism(value int64) RecordInputAttr {
 	return func(m optionalAttr) {
-		m["memory_limit"] = value
+		m["file_parallelism"] = value
 	}
 }
 
-// OrderedMapUnstageContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func OrderedMapUnstageContainer(value string) OrderedMapUnstageAttr {
+// RecordInputBatchSize sets the optional batch_size attribute to value.
+//
+// value: The batch size.
+// If not specified, defaults to 32
+func RecordInputBatchSize(value int64) RecordInputAttr {
 	return func(m optionalAttr) {
-		m["container"] = value
+		m["batch_size"] = value
 	}
 }
 
-// OrderedMapUnstageSharedName sets the optional shared_name attribute to value.
+// RecordInputCompressionType sets the optional compression_type attribute to value.
+//
+// value: The type of compression for the file. Currently ZLIB and
+// GZIP are supported. Defaults to none.
 // If not specified, defaults to ""
-func OrderedMapUnstageSharedName(value string) OrderedMapUnstageAttr {
+func RecordInputCompressionType(value string) RecordInputAttr {
 	return func(m optionalAttr) {
-		m["shared_name"] = value
+		m["compression_type"] = value
 	}
 }
 
-// Op removes and returns the values associated with the key
+// Emits randomized records.
 //
-// from the underlying container.   If the underlying container
-// does not contain this key, the op will block until it does.
-func OrderedMapUnstage(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapUnstageAttr) (values []tf.Output) {
+// Arguments:
+//	file_pattern: Glob pattern for the data files.
+//
+// Returns A tensor of shape [batch_size].
+func RecordInput(scope *Scope, file_pattern string, optional ...RecordInputAttr) (records tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
+	attrs := map[string]interface{}{"file_pattern": file_pattern}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "OrderedMapUnstage",
-		Input: []tf.Input{
-			key, indices,
-		},
+		Type: "RecordInput",
+
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes reciprocal of square root of x element-wise.
+//
+// I.e., \\(y = 1 / \sqrt{x}\\).
+func Rsqrt(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	var idx int
-	var err error
-	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
-		scope.UpdateErr("OrderedMapUnstage", err)
-		return
+	opspec := tf.OpSpec{
+		Type: "Rsqrt",
+		Input: []tf.Input{
+			x,
+		},
 	}
-	return values
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// MapIncompleteSizeAttr is an optional argument to MapIncompleteSize.
-type MapIncompleteSizeAttr func(optionalAttr)
-
-// MapIncompleteSizeCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
+// Inserts a dimension of 1 into a tensor's shape.
 //
-// REQUIRES: value >= 0
-func MapIncompleteSizeCapacity(value int64) MapIncompleteSizeAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// MapIncompleteSizeMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
+// Given a tensor `input`, this operation inserts a dimension of 1 at the
+// dimension index `axis` of `input`'s shape. The dimension index `axis` starts at
+// zero; if you specify a negative number for `axis` it is counted backward from
+// the end.
 //
-// REQUIRES: value >= 0
-func MapIncompleteSizeMemoryLimit(value int64) MapIncompleteSizeAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
+// This operation is useful if you want to add a batch dimension to a single
+// element. For example, if you have a single image of shape `[height, width,
+// channels]`, you can make it a batch of 1 image with `expand_dims(image, 0)`,
+// which will make the shape `[1, height, width, channels]`.
+//
+// Other examples:
+//
+// ```
+// # 't' is a tensor of shape [2]
+// shape(expand_dims(t, 0)) ==> [1, 2]
+// shape(expand_dims(t, 1)) ==> [2, 1]
+// shape(expand_dims(t, -1)) ==> [2, 1]
+//
+// # 't2' is a tensor of shape [2, 3, 5]
+// shape(expand_dims(t2, 0)) ==> [1, 2, 3, 5]
+// shape(expand_dims(t2, 2)) ==> [2, 3, 1, 5]
+// shape(expand_dims(t2, 3)) ==> [2, 3, 5, 1]
+// ```
+//
+// This operation requires that:
+//
+// `-1-input.dims() <= dim <= input.dims()`
+//
+// This operation is related to `squeeze()`, which removes dimensions of
+// size 1.
+//
+// Arguments:
+//
+//	axis: 0-D (scalar). Specifies the dimension index at which to
+// expand the shape of `input`. Must be in the range
+// `[-rank(input) - 1, rank(input)]`.
+//
+// Returns Contains the same data as `input`, but its shape has an additional
+// dimension of size 1 added.
+func ExpandDims(scope *Scope, input tf.Output, axis tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
-}
-
-// MapIncompleteSizeContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func MapIncompleteSizeContainer(value string) MapIncompleteSizeAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
+	opspec := tf.OpSpec{
+		Type: "ExpandDims",
+		Input: []tf.Input{
+			input, axis,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// MapIncompleteSizeSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func MapIncompleteSizeSharedName(value string) MapIncompleteSizeAttr {
+// MatrixInverseAttr is an optional argument to MatrixInverse.
+type MatrixInverseAttr func(optionalAttr)
+
+// MatrixInverseAdjoint sets the optional adjoint attribute to value.
+// If not specified, defaults to false
+func MatrixInverseAdjoint(value bool) MatrixInverseAttr {
 	return func(m optionalAttr) {
-		m["shared_name"] = value
+		m["adjoint"] = value
 	}
 }
 
-// Op returns the number of incomplete elements in the underlying container.
-func MapIncompleteSize(scope *Scope, dtypes []tf.DataType, optional ...MapIncompleteSizeAttr) (size tf.Output) {
+// Computes the inverse of one or more square invertible matrices or their
+//
+// adjoints (conjugate transposes).
+//
+// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+// form square matrices. The output is a tensor of the same shape as the input
+// containing the inverse for all input submatrices `[..., :, :]`.
+//
+// The op uses LU decomposition with partial pivoting to compute the inverses.
+//
+// If a matrix is not invertible there is no guarantee what the op does. It
+// may detect the condition and raise an exception or it may simply return a
+// garbage result.
+//
+// Arguments:
+//	input: Shape is `[..., M, M]`.
+//
+// Returns Shape is `[..., M, M]`.
+//
+// @compatibility(numpy)
+// Equivalent to np.linalg.inv
+// @end_compatibility
+func MatrixInverse(scope *Scope, input tf.Output, optional ...MatrixInverseAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "MapIncompleteSize",
-
+		Type: "MatrixInverse",
+		Input: []tf.Input{
+			input,
+		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// MapSizeAttr is an optional argument to MapSize.
-type MapSizeAttr func(optionalAttr)
-
-// MapSizeCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
+// Computes square of x element-wise.
 //
-// REQUIRES: value >= 0
-func MapSizeCapacity(value int64) MapSizeAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
+// I.e., \\(y = x * x = x^2\\).
+func Square(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
-}
-
-// MapSizeMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func MapSizeMemoryLimit(value int64) MapSizeAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
+	opspec := tf.OpSpec{
+		Type: "Square",
+		Input: []tf.Input{
+			x,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// MapSizeContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func MapSizeContainer(value string) MapSizeAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
+// Computes exponential linear: `exp(features) - 1` if < 0, `features` otherwise.
+//
+// See [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)
+// ](http://arxiv.org/abs/1511.07289)
+func Elu(scope *Scope, features tf.Output) (activations tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
-}
-
-// MapSizeSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func MapSizeSharedName(value string) MapSizeAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
+	opspec := tf.OpSpec{
+		Type: "Elu",
+		Input: []tf.Input{
+			features,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Op returns the number of elements in the underlying container.
-func MapSize(scope *Scope, dtypes []tf.DataType, optional ...MapSizeAttr) (size tf.Output) {
+// Computes the reciprocal of x element-wise.
+//
+// I.e., \\(y = 1 / x\\).
+func Reciprocal(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "MapSize",
-
-		Attrs: attrs,
+		Type: "Reciprocal",
+		Input: []tf.Input{
+			x,
+		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// MapUnstageAttr is an optional argument to MapUnstage.
-type MapUnstageAttr func(optionalAttr)
+// OrderedMapClearAttr is an optional argument to OrderedMapClear.
+type OrderedMapClearAttr func(optionalAttr)
 
-// MapUnstageCapacity sets the optional capacity attribute to value.
+// OrderedMapClearCapacity sets the optional capacity attribute to value.
 // If not specified, defaults to 0
 //
 // REQUIRES: value >= 0
-func MapUnstageCapacity(value int64) MapUnstageAttr {
+func OrderedMapClearCapacity(value int64) OrderedMapClearAttr {
 	return func(m optionalAttr) {
 		m["capacity"] = value
 	}
 }
 
-// MapUnstageMemoryLimit sets the optional memory_limit attribute to value.
+// OrderedMapClearMemoryLimit sets the optional memory_limit attribute to value.
 // If not specified, defaults to 0
 //
 // REQUIRES: value >= 0
-func MapUnstageMemoryLimit(value int64) MapUnstageAttr {
+func OrderedMapClearMemoryLimit(value int64) OrderedMapClearAttr {
 	return func(m optionalAttr) {
 		m["memory_limit"] = value
 	}
 }
 
-// MapUnstageContainer sets the optional container attribute to value.
+// OrderedMapClearContainer sets the optional container attribute to value.
 // If not specified, defaults to ""
-func MapUnstageContainer(value string) MapUnstageAttr {
+func OrderedMapClearContainer(value string) OrderedMapClearAttr {
 	return func(m optionalAttr) {
 		m["container"] = value
 	}
 }
 
-// MapUnstageSharedName sets the optional shared_name attribute to value.
+// OrderedMapClearSharedName sets the optional shared_name attribute to value.
 // If not specified, defaults to ""
-func MapUnstageSharedName(value string) MapUnstageAttr {
+func OrderedMapClearSharedName(value string) OrderedMapClearAttr {
 	return func(m optionalAttr) {
 		m["shared_name"] = value
 	}
 }
 
-// Op removes and returns the values associated with the key
+// Op removes all elements in the underlying container.
 //
-// from the underlying container.   If the underlying container
-// does not contain this key, the op will block until it does.
-func MapUnstage(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...MapUnstageAttr) (values []tf.Output) {
+// Returns the created operation.
+func OrderedMapClear(scope *Scope, dtypes []tf.DataType, optional ...OrderedMapClearAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -3417,950 +2873,1141 @@ func MapUnstage(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.Data
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "MapUnstage",
+		Type: "OrderedMapClear",
+
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Computes the reciprocal of x element-wise.
+//
+// I.e., \\(y = 1 / x\\).
+func Inv(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Inv",
 		Input: []tf.Input{
-			key, indices,
+			x,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ComplexAbsAttr is an optional argument to ComplexAbs.
+type ComplexAbsAttr func(optionalAttr)
+
+// ComplexAbsTout sets the optional Tout attribute to value.
+// If not specified, defaults to DT_FLOAT
+func ComplexAbsTout(value tf.DataType) ComplexAbsAttr {
+	return func(m optionalAttr) {
+		m["Tout"] = value
+	}
+}
+
+// Computes the complex absolute value of a tensor.
+//
+// Given a tensor `x` of complex numbers, this operation returns a tensor of type
+// `float` or `double` that is the absolute value of each element in `x`. All
+// elements in `x` must be complex numbers of the form \\(a + bj\\). The absolute
+// value is computed as \\( \sqrt{a^2 + b^2}\\).
+func ComplexAbs(scope *Scope, x tf.Output, optional ...ComplexAbsAttr) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	var idx int
-	var err error
-	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
-		scope.UpdateErr("MapUnstage", err)
-		return
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
 	}
-	return values
+	opspec := tf.OpSpec{
+		Type: "ComplexAbs",
+		Input: []tf.Input{
+			x,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Forwards the value of an available tensor from `inputs` to `output`.
-//
-// `Merge` waits for at least one of the tensors in `inputs` to become available.
-// It is usually combined with `Switch` to implement branching.
-//
-// `Merge` forwards the first tensor to become available to `output`, and sets
-// `value_index` to its index in `inputs`.
-//
-// Arguments:
-//	inputs: The input tensors, exactly one of which will become available.
+// Returns the truth value of x AND y element-wise.
 //
-// Returns Will be set to the available input tensor.The index of the chosen input tensor in `inputs`.
-func Merge(scope *Scope, inputs []tf.Output) (output tf.Output, value_index tf.Output) {
+// *NOTE*: `LogicalAnd` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func LogicalAnd(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Merge",
+		Type: "LogicalAnd",
 		Input: []tf.Input{
-			tf.OutputList(inputs),
+			x, y,
 		},
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
-// MapPeekAttr is an optional argument to MapPeek.
-type MapPeekAttr func(optionalAttr)
+// Cast x of type SrcT to y of DstT.
+func Cast(scope *Scope, x tf.Output, DstT tf.DataType) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"DstT": DstT}
+	opspec := tf.OpSpec{
+		Type: "Cast",
+		Input: []tf.Input{
+			x,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
 
-// MapPeekCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
+// MaxAttr is an optional argument to Max.
+type MaxAttr func(optionalAttr)
+
+// MaxKeepDims sets the optional keep_dims attribute to value.
 //
-// REQUIRES: value >= 0
-func MapPeekCapacity(value int64) MapPeekAttr {
+// value: If true, retain reduced dimensions with length 1.
+// If not specified, defaults to false
+func MaxKeepDims(value bool) MaxAttr {
 	return func(m optionalAttr) {
-		m["capacity"] = value
+		m["keep_dims"] = value
 	}
 }
 
-// MapPeekMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
+// Computes the maximum of elements across dimensions of a tensor.
 //
-// REQUIRES: value >= 0
-func MapPeekMemoryLimit(value int64) MapPeekAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
+// Reduces `input` along the dimensions given in `axis`. Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `axis`. If `keep_dims` is true, the reduced dimensions are
+// retained with length 1.
+//
+// Arguments:
+//	input: The tensor to reduce.
+//	axis: The dimensions to reduce. Must be in the range
+// `[-rank(input), rank(input))`.
+//
+// Returns The reduced tensor.
+func Max(scope *Scope, input tf.Output, axis tf.Output, optional ...MaxAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Max",
+		Input: []tf.Input{
+			input, axis,
+		},
+		Attrs: attrs,
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// MapPeekContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func MapPeekContainer(value string) MapPeekAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
+// Quantized Batch normalization.
+//
+// This op is deprecated and will be removed in the future. Prefer
+// `tf.nn.batch_normalization`.
+//
+// Arguments:
+//	t: A 4D input Tensor.
+//	t_min: The value represented by the lowest quantized input.
+//	t_max: The value represented by the highest quantized input.
+//	m: A 1D mean Tensor with size matching the last dimension of t.
+// This is the first output from tf.nn.moments,
+// or a saved moving average thereof.
+//	m_min: The value represented by the lowest quantized mean.
+//	m_max: The value represented by the highest quantized mean.
+//	v: A 1D variance Tensor with size matching the last dimension of t.
+// This is the second output from tf.nn.moments,
+// or a saved moving average thereof.
+//	v_min: The value represented by the lowest quantized variance.
+//	v_max: The value represented by the highest quantized variance.
+//	beta: A 1D beta Tensor with size matching the last dimension of t.
+// An offset to be added to the normalized tensor.
+//	beta_min: The value represented by the lowest quantized offset.
+//	beta_max: The value represented by the highest quantized offset.
+//	gamma: A 1D gamma Tensor with size matching the last dimension of t.
+// If "scale_after_normalization" is true, this tensor will be multiplied
+// with the normalized tensor.
+//	gamma_min: The value represented by the lowest quantized gamma.
+//	gamma_max: The value represented by the highest quantized gamma.
+//
+//	variance_epsilon: A small float number to avoid dividing by 0.
+//	scale_after_normalization: A bool indicating whether the resulted tensor
+// needs to be multiplied with gamma.
+func QuantizedBatchNormWithGlobalNormalization(scope *Scope, t tf.Output, t_min tf.Output, t_max tf.Output, m tf.Output, m_min tf.Output, m_max tf.Output, v tf.Output, v_min tf.Output, v_max tf.Output, beta tf.Output, beta_min tf.Output, beta_max tf.Output, gamma tf.Output, gamma_min tf.Output, gamma_max tf.Output, out_type tf.DataType, variance_epsilon float32, scale_after_normalization bool) (result tf.Output, result_min tf.Output, result_max tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"out_type": out_type, "variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization}
+	opspec := tf.OpSpec{
+		Type: "QuantizedBatchNormWithGlobalNormalization",
+		Input: []tf.Input{
+			t, t_min, t_max, m, m_min, m_max, v, v_min, v_max, beta, beta_min, beta_max, gamma, gamma_min, gamma_max,
+		},
+		Attrs: attrs,
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// MapPeekSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func MapPeekSharedName(value string) MapPeekAttr {
+// HistogramFixedWidthAttr is an optional argument to HistogramFixedWidth.
+type HistogramFixedWidthAttr func(optionalAttr)
+
+// HistogramFixedWidthDtype sets the optional dtype attribute to value.
+// If not specified, defaults to DT_INT32
+func HistogramFixedWidthDtype(value tf.DataType) HistogramFixedWidthAttr {
 	return func(m optionalAttr) {
-		m["shared_name"] = value
+		m["dtype"] = value
 	}
 }
 
-// Op peeks at the values at the specified key.  If the
+// Return histogram of values.
 //
-// underlying container does not contain this key
-// this op will block until it does.
-func MapPeek(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...MapPeekAttr) (values []tf.Output) {
+// Given the tensor `values`, this operation returns a rank 1 histogram counting
+// the number of entries in `values` that fall into every bin.  The bins are
+// equal width and determined by the arguments `value_range` and `nbins`.
+//
+// ```python
+// # Bins will be:  (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
+// nbins = 5
+// value_range = [0.0, 5.0]
+// new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]
+//
+// with tf.get_default_session() as sess:
+//   hist = tf.histogram_fixed_width(new_values, value_range, nbins=5)
+//   variables.global_variables_initializer().run()
+//   sess.run(hist) => [2, 1, 1, 0, 2]
+// ```
+//
+// Arguments:
+//	values: Numeric `Tensor`.
+//	value_range: Shape [2] `Tensor` of same `dtype` as `values`.
+// values <= value_range[0] will be mapped to hist[0],
+// values >= value_range[1] will be mapped to hist[-1].
+//	nbins: Scalar `int32 Tensor`.  Number of histogram bins.
+//
+// Returns A 1-D `Tensor` holding histogram of values.
+func HistogramFixedWidth(scope *Scope, values tf.Output, value_range tf.Output, nbins tf.Output, optional ...HistogramFixedWidthAttr) (out tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "MapPeek",
+		Type: "HistogramFixedWidth",
 		Input: []tf.Input{
-			key, indices,
+			values, value_range, nbins,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates summary database writer accessible by given resource handle.
+//
+// This can be used to write tensors from the execution graph directly
+// to a database. Only SQLite is supported right now. This function
+// will create the schema if it doesn't exist. Entries in the Users,
+// Experiments, and Runs tables will be created automatically if they
+// don't already exist.
+//
+// Arguments:
+//	writer: Handle to SummaryWriter resource to overwrite.
+//	db_uri: For example "file:/tmp/foo.sqlite".
+//	experiment_name: Can't contain ASCII control characters or <>. Case
+// sensitive. If empty, then the Run will not be associated with any
+// Experiment.
+//	run_name: Can't contain ASCII control characters or <>. Case sensitive.
+// If empty, then each Tag will not be associated with any Run.
+//	user_name: Must be valid as both a DNS label and Linux username. If
+// empty, then the Experiment will not be associated with any User.
+//
+// Returns the created operation.
+func CreateSummaryDbWriter(scope *Scope, writer tf.Output, db_uri tf.Output, experiment_name tf.Output, run_name tf.Output, user_name tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	var idx int
-	var err error
-	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
-		scope.UpdateErr("MapPeek", err)
-		return
+	opspec := tf.OpSpec{
+		Type: "CreateSummaryDbWriter",
+		Input: []tf.Input{
+			writer, db_uri, experiment_name, run_name, user_name,
+		},
 	}
-	return values
+	return scope.AddOperation(opspec)
 }
 
-// MapStageAttr is an optional argument to MapStage.
-type MapStageAttr func(optionalAttr)
-
-// MapStageCapacity sets the optional capacity attribute to value.
+// Adds Tensor 'bias' to Tensor 'input' for Quantized types.
 //
-// value: Maximum number of elements in the Staging Area. If > 0, inserts
-// on the container will block when the capacity is reached.
-// If not specified, defaults to 0
+// Broadcasts the values of bias on dimensions 0..N-2 of 'input'.
 //
-// REQUIRES: value >= 0
-func MapStageCapacity(value int64) MapStageAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// MapStageMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
+// Arguments:
 //
-// REQUIRES: value >= 0
-func MapStageMemoryLimit(value int64) MapStageAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// MapStageContainer sets the optional container attribute to value.
+//	bias: A 1D bias Tensor with size matching the last dimension of 'input'.
+//	min_input: The float value that the lowest quantized input value represents.
+//	max_input: The float value that the highest quantized input value represents.
+//	min_bias: The float value that the lowest quantized bias value represents.
+//	max_bias: The float value that the highest quantized bias value represents.
 //
-// value: If non-empty, this queue is placed in the given container. Otherwise,
-// a default container is used.
-// If not specified, defaults to ""
-func MapStageContainer(value string) MapStageAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
+//
+// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
+func QuantizedBiasAdd(scope *Scope, input tf.Output, bias tf.Output, min_input tf.Output, max_input tf.Output, min_bias tf.Output, max_bias tf.Output, out_type tf.DataType) (output tf.Output, min_out tf.Output, max_out tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"out_type": out_type}
+	opspec := tf.OpSpec{
+		Type: "QuantizedBiasAdd",
+		Input: []tf.Input{
+			input, bias, min_input, max_input, min_bias, max_bias,
+		},
+		Attrs: attrs,
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// MapStageSharedName sets the optional shared_name attribute to value.
+// Produces the average pool of the input tensor for quantized types.
 //
-// value: It is necessary to match this name to the matching Unstage Op.
-// If not specified, defaults to ""
-func MapStageSharedName(value string) MapStageAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
+// Arguments:
+//	input: 4-D with shape `[batch, height, width, channels]`.
+//	min_input: The float value that the lowest quantized input value represents.
+//	max_input: The float value that the highest quantized input value represents.
+//	ksize: The size of the window for each dimension of the input tensor.
+// The length must be 4 to match the number of dimensions of the input.
+//	strides: The stride of the sliding window for each dimension of the input
+// tensor.  The length must be 4 to match the number of dimensions of the input.
+//	padding: The type of padding algorithm to use.
+//
+// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
+func QuantizedAvgPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	opspec := tf.OpSpec{
+		Type: "QuantizedAvgPool",
+		Input: []tf.Input{
+			input, min_input, max_input,
+		},
+		Attrs: attrs,
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Stage (key, values) in the underlying container which behaves like a hashtable.
-//
-// Arguments:
-//	key: int64
+// Updates the table to associates keys with values.
 //
-//	values: a list of tensors
-// dtypes A list of data types that inserted values should adhere to.
+// The tensor `keys` must be of the same type as the keys of the table.
+// The tensor `values` must be of the type of the table values.
 //
+// Arguments:
+//	table_handle: Handle to the table.
+//	keys: Any shape.  Keys to look up.
+//	values: Values to associate with keys.
 //
 // Returns the created operation.
-func MapStage(scope *Scope, key tf.Output, indices tf.Output, values []tf.Output, dtypes []tf.DataType, optional ...MapStageAttr) (o *tf.Operation) {
+func LookupTableInsertV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "MapStage",
+		Type: "LookupTableInsertV2",
 		Input: []tf.Input{
-			key, indices, tf.OutputList(values),
+			table_handle, keys, values,
 		},
-		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
-// DepthToSpaceAttr is an optional argument to DepthToSpace.
-type DepthToSpaceAttr func(optionalAttr)
+// FractionalAvgPoolAttr is an optional argument to FractionalAvgPool.
+type FractionalAvgPoolAttr func(optionalAttr)
 
-// DepthToSpaceDataFormat sets the optional data_format attribute to value.
-// If not specified, defaults to "NHWC"
-func DepthToSpaceDataFormat(value string) DepthToSpaceAttr {
+// FractionalAvgPoolPseudoRandom sets the optional pseudo_random attribute to value.
+//
+// value: When set to True, generates the pooling sequence in a
+// pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin
+// Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) for
+// difference between pseudorandom and random.
+// If not specified, defaults to false
+func FractionalAvgPoolPseudoRandom(value bool) FractionalAvgPoolAttr {
 	return func(m optionalAttr) {
-		m["data_format"] = value
+		m["pseudo_random"] = value
 	}
 }
 
-// DepthToSpace for tensors of type T.
-//
-// Rearranges data from depth into blocks of spatial data.
-// This is the reverse transformation of SpaceToDepth. More specifically,
-// this op outputs a copy of the input tensor where values from the `depth`
-// dimension are moved in spatial blocks to the `height` and `width` dimensions.
-// The attr `block_size` indicates the input block size and how the data is moved.
-//
-//   * Chunks of data of size `block_size * block_size` from depth are rearranged
-//     into non-overlapping blocks of size `block_size x block_size`
-//   * The width the output tensor is `input_depth * block_size`, whereas the
-//     height is `input_height * block_size`.
-//   * The Y, X coordinates within each block of the output image are determined
-//     by the high order component of the input channel index.
-//   * The depth of the input tensor must be divisible by
-//     `block_size * block_size`.
-//
-// The `data_format` attr specifies the layout of the input and output tensors
-// with the following options:
-//   "NHWC": `[ batch, height, width, channels ]`
-//   "NCHW": `[ batch, channels, height, width ]`
-//   "NCHW_VECT_C":
-//       `qint8 [ batch, channels / 4, height, width, channels % 4 ]`
-//
-// It is useful to consider the operation as transforming a 6-D Tensor.
-// e.g. for data_format = NHWC,
-//      Each element in the input tensor can be specified via 6 coordinates,
-//      ordered by decreasing memory layout significance as:
-//      n,iY,iX,bY,bX,oC  (where n=batch index, iX, iY means X or Y coordinates
-//                         within the input image, bX, bY means coordinates
-//                         within the output block, oC means output channels).
-//      The output would be the input transposed to the following layout:
-//      n,iY,bY,iX,bX,oC
-//
-// This operation is useful for resizing the activations between convolutions
-// (but keeping all data), e.g. instead of pooling. It is also useful for training
-// purely convolutional models.
-//
-// For example, given an input of shape `[1, 1, 1, 4]`, data_format = "NHWC" and
-// block_size = 2:
-//
-// ```
-// x = [[[[1, 2, 3, 4]]]]
-//
-// ```
-//
-// This operation will output a tensor of shape `[1, 2, 2, 1]`:
-//
-// ```
-//    [[[[1], [2]],
-//      [[3], [4]]]]
-// ```
-//
-// Here, the input has a batch of 1 and each batch element has shape `[1, 1, 4]`,
-// the corresponding output will have 2x2 elements and will have a depth of
-// 1 channel (1 = `4 / (block_size * block_size)`).
-// The output element shape is `[2, 2, 1]`.
-//
-// For an input tensor with larger depth, here of shape `[1, 1, 1, 12]`, e.g.
-//
-// ```
-// x = [[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]]
-// ```
+// FractionalAvgPoolOverlapping sets the optional overlapping attribute to value.
 //
-// This operation, for block size of 2, will return the following tensor of shape
-// `[1, 2, 2, 3]`
+// value: When set to True, it means when pooling, the values at the boundary
+// of adjacent pooling cells are used by both cells. For example:
 //
-// ```
-//    [[[[1, 2, 3], [4, 5, 6]],
-//      [[7, 8, 9], [10, 11, 12]]]]
+// `index  0  1  2  3  4`
 //
-// ```
+// `value  20 5  16 3  7`
 //
-// Similarly, for the following input of shape `[1 2 2 4]`, and a block size of 2:
+// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
+// The result would be [41/3, 26/3] for fractional avg pooling.
+// If not specified, defaults to false
+func FractionalAvgPoolOverlapping(value bool) FractionalAvgPoolAttr {
+	return func(m optionalAttr) {
+		m["overlapping"] = value
+	}
+}
+
+// FractionalAvgPoolDeterministic sets the optional deterministic attribute to value.
 //
-// ```
-// x =  [[[[1, 2, 3, 4],
-//        [5, 6, 7, 8]],
-//       [[9, 10, 11, 12],
-//        [13, 14, 15, 16]]]]
-// ```
+// value: When set to True, a fixed pooling region will be used when
+// iterating over a FractionalAvgPool node in the computation graph. Mainly used
+// in unit test to make FractionalAvgPool deterministic.
+// If not specified, defaults to false
+func FractionalAvgPoolDeterministic(value bool) FractionalAvgPoolAttr {
+	return func(m optionalAttr) {
+		m["deterministic"] = value
+	}
+}
+
+// FractionalAvgPoolSeed sets the optional seed attribute to value.
 //
-// the operator will return the following tensor of shape `[1 4 4 1]`:
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func FractionalAvgPoolSeed(value int64) FractionalAvgPoolAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// FractionalAvgPoolSeed2 sets the optional seed2 attribute to value.
 //
-// ```
-// x = [[[ [1],   [2],  [5],  [6]],
-//       [ [3],   [4],  [7],  [8]],
-//       [ [9],  [10], [13],  [14]],
-//       [ [11], [12], [15],  [16]]]]
+// value: An second seed to avoid seed collision.
+// If not specified, defaults to 0
+func FractionalAvgPoolSeed2(value int64) FractionalAvgPoolAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Performs fractional average pooling on the input.
 //
-// ```
+// Fractional average pooling is similar to Fractional max pooling in the pooling
+// region generation step. The only difference is that after pooling regions are
+// generated, a mean operation is performed instead of a max operation in each
+// pooling region.
 //
 // Arguments:
+//	value: 4-D with shape `[batch, height, width, channels]`.
+//	pooling_ratio: Pooling ratio for each dimension of `value`, currently only
+// supports row and col dimension and should be >= 1.0. For example, a valid
+// pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements
+// must be 1.0 because we don't allow pooling on batch and channels
+// dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions
+// respectively.
 //
-//	block_size: The size of the spatial block, same as in Space2Depth.
-func DepthToSpace(scope *Scope, input tf.Output, block_size int64, optional ...DepthToSpaceAttr) (output tf.Output) {
+// Returns output tensor after fractional avg pooling.row pooling sequence, needed to calculate gradient.column pooling sequence, needed to calculate gradient.
+func FractionalAvgPool(scope *Scope, value tf.Output, pooling_ratio []float32, optional ...FractionalAvgPoolAttr) (output tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"block_size": block_size}
+	attrs := map[string]interface{}{"pooling_ratio": pooling_ratio}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "DepthToSpace",
+		Type: "FractionalAvgPool",
 		Input: []tf.Input{
-			input,
+			value,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// StagePeekAttr is an optional argument to StagePeek.
-type StagePeekAttr func(optionalAttr)
+// RandomCropAttr is an optional argument to RandomCrop.
+type RandomCropAttr func(optionalAttr)
 
-// StagePeekCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
+// RandomCropSeed sets the optional seed attribute to value.
 //
-// REQUIRES: value >= 0
-func StagePeekCapacity(value int64) StagePeekAttr {
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func RandomCropSeed(value int64) RandomCropAttr {
 	return func(m optionalAttr) {
-		m["capacity"] = value
+		m["seed"] = value
 	}
 }
 
-// StagePeekMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
+// RandomCropSeed2 sets the optional seed2 attribute to value.
 //
-// REQUIRES: value >= 0
-func StagePeekMemoryLimit(value int64) StagePeekAttr {
+// value: An second seed to avoid seed collision.
+// If not specified, defaults to 0
+func RandomCropSeed2(value int64) RandomCropAttr {
 	return func(m optionalAttr) {
-		m["memory_limit"] = value
+		m["seed2"] = value
 	}
 }
 
-// StagePeekContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func StagePeekContainer(value string) StagePeekAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
+// Randomly crop `image`.
+//
+// DEPRECATED at GraphDef version 8: Random crop is now pure Python
+//
+// `size` is a 1-D int64 tensor with 2 elements representing the crop height and
+// width.  The values must be non negative.
+//
+// This Op picks a random location in `image` and crops a `height` by `width`
+// rectangle from that location.  The random location is picked so the cropped
+// area will fit inside the original image.
+//
+// Arguments:
+//	image: 3-D of shape `[height, width, channels]`.
+//	size: 1-D of length 2 containing: `crop_height`, `crop_width`..
+//
+// Returns 3-D of shape `[crop_height, crop_width, channels].`
+func RandomCrop(scope *Scope, image tf.Output, size tf.Output, optional ...RandomCropAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RandomCrop",
+		Input: []tf.Input{
+			image, size,
+		},
+		Attrs: attrs,
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// StagePeekSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func StagePeekSharedName(value string) StagePeekAttr {
+// TopKV2Attr is an optional argument to TopKV2.
+type TopKV2Attr func(optionalAttr)
+
+// TopKV2Sorted sets the optional sorted attribute to value.
+//
+// value: If true the resulting `k` elements will be sorted by the values in
+// descending order.
+// If not specified, defaults to true
+func TopKV2Sorted(value bool) TopKV2Attr {
 	return func(m optionalAttr) {
-		m["shared_name"] = value
+		m["sorted"] = value
 	}
 }
 
-// Op peeks at the values at the specified index.  If the
+// Finds values and indices of the `k` largest elements for the last dimension.
 //
-// underlying container does not contain sufficient elements
-// this op will block until it does.   This Op is optimized for
-// performance.
-func StagePeek(scope *Scope, index tf.Output, dtypes []tf.DataType, optional ...StagePeekAttr) (values []tf.Output) {
+// If the input is a vector (rank-1), finds the `k` largest entries in the vector
+// and outputs their values and indices as vectors.  Thus `values[j]` is the
+// `j`-th largest entry in `input`, and its index is `indices[j]`.
+//
+// For matrices (resp. higher rank input), computes the top `k` entries in each
+// row (resp. vector along the last dimension).  Thus,
+//
+//     values.shape = indices.shape = input.shape[:-1] + [k]
+//
+// If two elements are equal, the lower-index element appears first.
+//
+// Arguments:
+//	input: 1-D or higher with last dimension at least `k`.
+//	k: 0-D.  Number of top elements to look for along the last dimension (along each
+// row for matrices).
+//
+// Returns The `k` largest elements along each last dimensional slice.The indices of `values` within the last dimension of `input`.
+func TopKV2(scope *Scope, input tf.Output, k tf.Output, optional ...TopKV2Attr) (values tf.Output, indices tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "StagePeek",
+		Type: "TopKV2",
 		Input: []tf.Input{
-			index,
+			input, k,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Returns x // y element-wise.
+//
+// *NOTE*: `FloorDiv` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func FloorDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	var idx int
-	var err error
-	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
-		scope.UpdateErr("StagePeek", err)
-		return
+	opspec := tf.OpSpec{
+		Type: "FloorDiv",
+		Input: []tf.Input{
+			x, y,
+		},
 	}
-	return values
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// StageAttr is an optional argument to Stage.
-type StageAttr func(optionalAttr)
-
-// StageCapacity sets the optional capacity attribute to value.
+// Returns a batched diagonal tensor with a given batched diagonal values.
 //
-// value: Maximum number of elements in the Staging Area. If > 0, inserts
-// on the container will block when the capacity is reached.
-// If not specified, defaults to 0
+// Given a `diagonal`, this operation returns a tensor with the `diagonal` and
+// everything else padded with zeros. The diagonal is computed as follows:
 //
-// REQUIRES: value >= 0
-func StageCapacity(value int64) StageAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// StageMemoryLimit sets the optional memory_limit attribute to value.
+// Assume `diagonal` has `k` dimensions `[I, J, K, ..., N]`, then the output is a
+// tensor of rank `k+1` with dimensions [I, J, K, ..., N, N]` where:
 //
-// value: The maximum number of bytes allowed for Tensors in the Staging Area.
-// If > 0, inserts will block until sufficient space is available.
-// If not specified, defaults to 0
+// `output[i, j, k, ..., m, n] = 1{m=n} * diagonal[i, j, k, ..., n]`.
 //
-// REQUIRES: value >= 0
-func StageMemoryLimit(value int64) StageAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// StageContainer sets the optional container attribute to value.
+// For example:
 //
-// value: If non-empty, this queue is placed in the given container. Otherwise,
-// a default container is used.
-// If not specified, defaults to ""
-func StageContainer(value string) StageAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
+// ```
+// # 'diagonal' is [[1, 2, 3, 4], [5, 6, 7, 8]]
+//
+// and diagonal.shape = (2, 4)
+//
+// tf.matrix_diag(diagonal) ==> [[[1, 0, 0, 0]
+//                                      [0, 2, 0, 0]
+//                                      [0, 0, 3, 0]
+//                                      [0, 0, 0, 4]],
+//                                     [[5, 0, 0, 0]
+//                                      [0, 6, 0, 0]
+//                                      [0, 0, 7, 0]
+//                                      [0, 0, 0, 8]]]
+//
+// which has shape (2, 4, 4)
+// ```
+//
+// Arguments:
+//	diagonal: Rank `k`, where `k >= 1`.
+//
+// Returns Rank `k+1`, with `output.shape = diagonal.shape + [diagonal.shape[-1]]`.
+func MatrixDiag(scope *Scope, diagonal tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "MatrixDiag",
+		Input: []tf.Input{
+			diagonal,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// StageSharedName sets the optional shared_name attribute to value.
+// Says whether the targets are in the top `K` predictions.
 //
-// value: It is necessary to match this name to the matching Unstage Op.
-// If not specified, defaults to ""
-func StageSharedName(value string) StageAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Stage values similar to a lightweight Enqueue.
+// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the
+// prediction for the target class is among the top `k` predictions among
+// all predictions for example `i`. Note that the behavior of `InTopK` differs
+// from the `TopK` op in its handling of ties; if multiple classes have the
+// same prediction value and straddle the top-`k` boundary, all of those
+// classes are considered to be in the top `k`.
 //
-// The basic functionality of this Op is similar to a queue with many
-// fewer capabilities and options.  This Op is optimized for performance.
+// More formally, let
+//
+//   \\(predictions_i\\) be the predictions for all classes for example `i`,
+//   \\(targets_i\\) be the target class for example `i`,
+//   \\(out_i\\) be the output for example `i`,
+//
+// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$
 //
 // Arguments:
-//	values: a list of tensors
-// dtypes A list of data types that inserted values should adhere to.
+//	predictions: A `batch_size` x `classes` tensor.
+//	targets: A `batch_size` vector of class ids.
+//	k: Number of top elements to look at for computing precision.
 //
-// Returns the created operation.
-func Stage(scope *Scope, values []tf.Output, optional ...StageAttr) (o *tf.Operation) {
+// Returns Computed Precision at `k` as a `bool Tensor`.
+func InTopK(scope *Scope, predictions tf.Output, targets tf.Output, k int64) (precision tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"k": k}
 	opspec := tf.OpSpec{
-		Type: "Stage",
+		Type: "InTopK",
 		Input: []tf.Input{
-			tf.OutputList(values),
+			predictions, targets,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
-}
-
-// FakeQuantWithMinMaxArgsAttr is an optional argument to FakeQuantWithMinMaxArgs.
-type FakeQuantWithMinMaxArgsAttr func(optionalAttr)
-
-// FakeQuantWithMinMaxArgsMin sets the optional min attribute to value.
-// If not specified, defaults to -6
-func FakeQuantWithMinMaxArgsMin(value float32) FakeQuantWithMinMaxArgsAttr {
-	return func(m optionalAttr) {
-		m["min"] = value
-	}
-}
-
-// FakeQuantWithMinMaxArgsMax sets the optional max attribute to value.
-// If not specified, defaults to 6
-func FakeQuantWithMinMaxArgsMax(value float32) FakeQuantWithMinMaxArgsAttr {
-	return func(m optionalAttr) {
-		m["max"] = value
-	}
-}
-
-// FakeQuantWithMinMaxArgsNumBits sets the optional num_bits attribute to value.
-// If not specified, defaults to 8
-func FakeQuantWithMinMaxArgsNumBits(value int64) FakeQuantWithMinMaxArgsAttr {
-	return func(m optionalAttr) {
-		m["num_bits"] = value
-	}
-}
-
-// FakeQuantWithMinMaxArgsNarrowRange sets the optional narrow_range attribute to value.
-// If not specified, defaults to false
-func FakeQuantWithMinMaxArgsNarrowRange(value bool) FakeQuantWithMinMaxArgsAttr {
-	return func(m optionalAttr) {
-		m["narrow_range"] = value
-	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Fake-quantize the 'inputs' tensor, type float to 'outputs' tensor of same type.
+// Given a quantized tensor described by (input, input_min, input_max), outputs a
 //
-// Attributes `[min; max]` define the clamping range for the `inputs` data.
-// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`
-// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and
-// then de-quantized and output as floats in `[min; max]` interval.
-// `num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive.
+// range that covers the actual values present in that tensor.  This op is
+// typically used to produce the requested_output_min and requested_output_max for
+// Requantize.
 //
-// Quantization is called fake since the output is still in floating point.
-func FakeQuantWithMinMaxArgs(scope *Scope, inputs tf.Output, optional ...FakeQuantWithMinMaxArgsAttr) (outputs tf.Output) {
+// Arguments:
+//
+//	input_min: The float value that the minimum quantized input value represents.
+//	input_max: The float value that the maximum quantized input value represents.
+//
+// Returns The computed min output.the computed max output.
+func RequantizationRange(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output) (output_min tf.Output, output_max tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "FakeQuantWithMinMaxArgs",
+		Type: "RequantizationRange",
 		Input: []tf.Input{
-			inputs,
+			input, input_min, input_max,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1)
 }
 
-// Deprecated. Use TensorArraySizeV3
-func TensorArraySizeV2(scope *Scope, handle tf.Output, flow_in tf.Output) (size tf.Output) {
+// Returns the truth value of (x <= y) element-wise.
+//
+// *NOTE*: `LessEqual` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func LessEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "TensorArraySizeV2",
+		Type: "LessEqual",
 		Input: []tf.Input{
-			handle, flow_in,
+			x, y,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Deprecated. Use TensorArrayScatterV3
-func TensorArrayScatterV2(scope *Scope, handle tf.Output, indices tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) {
+// Computes softmax activations.
+//
+// For each batch `i` and class `j` we have
+//
+//     softmax[i, j] = exp(logits[i, j]) / sum_j(exp(logits[i, j]))
+//
+// Arguments:
+//	logits: 2-D with shape `[batch_size, num_classes]`.
+//
+// Returns Same shape as `logits`.
+func Softmax(scope *Scope, logits tf.Output) (softmax tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "TensorArrayScatterV2",
+		Type: "Softmax",
 		Input: []tf.Input{
-			handle, indices, value, flow_in,
+			logits,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Deprecated. Use TensorArrayGradV3
-func TensorArrayWriteV2(scope *Scope, handle tf.Output, index tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) {
+// DecodeBmpAttr is an optional argument to DecodeBmp.
+type DecodeBmpAttr func(optionalAttr)
+
+// DecodeBmpChannels sets the optional channels attribute to value.
+// If not specified, defaults to 0
+func DecodeBmpChannels(value int64) DecodeBmpAttr {
+	return func(m optionalAttr) {
+		m["channels"] = value
+	}
+}
+
+// Decode the first frame of a BMP-encoded image to a uint8 tensor.
+//
+// The attr `channels` indicates the desired number of color channels for the
+// decoded image.
+//
+// Accepted values are:
+//
+// *   0: Use the number of channels in the BMP-encoded image.
+// *   3: output an RGB image.
+// *   4: output an RGBA image.
+//
+// Arguments:
+//	contents: 0-D.  The BMP-encoded image.
+//
+// Returns 3-D with shape `[height, width, channels]`. RGB order
+func DecodeBmp(scope *Scope, contents tf.Output, optional ...DecodeBmpAttr) (image tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "TensorArrayWriteV2",
+		Type: "DecodeBmp",
 		Input: []tf.Input{
-			handle, index, value, flow_in,
+			contents,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Get the current size of the TensorArray.
+// Computes softsign gradients for a softsign operation.
 //
 // Arguments:
-//	handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad).
-//	flow_in: A float scalar that enforces proper chaining of operations.
+//	gradients: The backpropagated gradients to the corresponding softsign operation.
+//	features: The features passed as input to the corresponding softsign operation.
 //
-// Returns The current size of the TensorArray.
-func TensorArraySizeV3(scope *Scope, handle tf.Output, flow_in tf.Output) (size tf.Output) {
+// Returns The gradients: `gradients / (1 + abs(features)) ** 2`.
+func SoftsignGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "TensorArraySizeV3",
+		Type: "SoftsignGrad",
 		Input: []tf.Input{
-			handle, flow_in,
+			gradients, features,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// LearnedUnigramCandidateSamplerAttr is an optional argument to LearnedUnigramCandidateSampler.
-type LearnedUnigramCandidateSamplerAttr func(optionalAttr)
+// BatchMatMulAttr is an optional argument to BatchMatMul.
+type BatchMatMulAttr func(optionalAttr)
 
-// LearnedUnigramCandidateSamplerSeed sets the optional seed attribute to value.
+// BatchMatMulAdjX sets the optional adj_x attribute to value.
 //
-// value: If either seed or seed2 are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func LearnedUnigramCandidateSamplerSeed(value int64) LearnedUnigramCandidateSamplerAttr {
+// value: If `True`, adjoint the slices of `x`. Defaults to `False`.
+// If not specified, defaults to false
+func BatchMatMulAdjX(value bool) BatchMatMulAttr {
 	return func(m optionalAttr) {
-		m["seed"] = value
+		m["adj_x"] = value
 	}
 }
 
-// LearnedUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value.
+// BatchMatMulAdjY sets the optional adj_y attribute to value.
 //
-// value: An second seed to avoid seed collision.
-// If not specified, defaults to 0
-func LearnedUnigramCandidateSamplerSeed2(value int64) LearnedUnigramCandidateSamplerAttr {
+// value: If `True`, adjoint the slices of `y`. Defaults to `False`.
+// If not specified, defaults to false
+func BatchMatMulAdjY(value bool) BatchMatMulAttr {
 	return func(m optionalAttr) {
-		m["seed2"] = value
+		m["adj_y"] = value
 	}
 }
 
-// Generates labels for candidate sampling with a learned unigram distribution.
+// Multiplies slices of two tensors in batches.
 //
-// See explanations of candidate sampling and the data formats at
-// go/candidate-sampling.
+// Multiplies all slices of `Tensor` `x` and `y` (each slice can be
+// viewed as an element of a batch), and arranges the individual results
+// in a single output tensor of the same batch size. Each of the
+// individual slices can optionally be adjointed (to adjoint a matrix
+// means to transpose and conjugate it) before multiplication by setting
+// the `adj_x` or `adj_y` flag to `True`, which are by default `False`.
 //
-// For each batch, this op picks a single set of sampled candidate labels.
+// The input tensors `x` and `y` are 2-D or higher with shape `[..., r_x, c_x]`
+// and `[..., r_y, c_y]`.
 //
-// The advantages of sampling candidates per-batch are simplicity and the
-// possibility of efficient dense matrix multiplication. The disadvantage is that
-// the sampled candidates must be chosen independently of the context and of the
-// true labels.
+// The output tensor is 2-D or higher with shape `[..., r_o, c_o]`, where:
+//
+//     r_o = c_x if adj_x else r_x
+//     c_o = r_y if adj_y else c_y
+//
+// It is computed as:
+//
+//     output[..., :, :] = matrix(x[..., :, :]) * matrix(y[..., :, :])
 //
 // Arguments:
-//	true_classes: A batch_size * num_true matrix, in which each row contains the
-// IDs of the num_true target_classes in the corresponding original label.
-//	num_true: Number of true labels per context.
-//	num_sampled: Number of candidates to randomly sample.
-//	unique: If unique is true, we sample with rejection, so that all sampled
-// candidates in a batch are unique. This requires some approximation to
-// estimate the post-rejection sampling probabilities.
-//	range_max: The sampler will sample integers from the interval [0, range_max).
+//	x: 2-D or higher with shape `[..., r_x, c_x]`.
+//	y: 2-D or higher with shape `[..., r_y, c_y]`.
 //
-// Returns A vector of length num_sampled, in which each element is
-// the ID of a sampled candidate.A batch_size * num_true matrix, representing
-// the number of times each candidate is expected to occur in a batch
-// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
-// candidate representing the number of times the candidate is expected
-// to occur in a batch of sampled candidates.  If unique=true, then this is a
-// probability.
-func LearnedUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...LearnedUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
+// Returns 3-D or higher with shape `[..., r_o, c_o]`
+func BatchMatMul(scope *Scope, x tf.Output, y tf.Output, optional ...BatchMatMulAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "LearnedUnigramCandidateSampler",
+		Type: "BatchMatMul",
 		Input: []tf.Input{
-			true_classes,
+			x, y,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// Split the data from the input value into TensorArray elements.
-//
-// Assuming that `lengths` takes on values
-//
-//   ```(n0, n1, ..., n(T-1))```
-//
-// and that `value` has shape
-//
-//   ```(n0 + n1 + ... + n(T-1) x d0 x d1 x ...)```,
-//
-// this splits values into a TensorArray with T tensors.
-//
-// TensorArray index t will be the subtensor of values with starting position
+// Pads a tensor.
 //
-//   ```(n0 + n1 + ... + n(t-1), 0, 0, ...)```
+// This operation pads `input` according to the `paddings` and `constant_values`
+// you specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is
+// the rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates
+// how many padding values to add before the contents of `input` in that dimension,
+// and `paddings[D, 1]` indicates how many padding values to add after the contents
+// of `input` in that dimension. `constant_values` is a scalar tensor of the same
+// type as `input` that indicates the value to use for padding `input`.
 //
-// and having size
+// The padded size of each dimension D of the output is:
 //
-//   ```nt x d0 x d1 x ...```
+// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)`
 //
-// Arguments:
-//	handle: The handle to a TensorArray.
-//	value: The concatenated tensor to write to the TensorArray.
-//	lengths: The vector of lengths, how to split the rows of value into the
-// TensorArray.
-//	flow_in: A float scalar that enforces proper chaining of operations.
+// For example:
 //
-// Returns A float scalar that enforces proper chaining of operations.
-func TensorArraySplitV3(scope *Scope, handle tf.Output, value tf.Output, lengths tf.Output, flow_in tf.Output) (flow_out tf.Output) {
+// ```
+// # 't' is [[1, 1], [2, 2]]
+// # 'paddings' is [[1, 1], [2, 2]]
+// # 'constant_values' is 0
+// # rank of 't' is 2
+// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0]
+//                       [0, 0, 1, 1, 0, 0]
+//                       [0, 0, 2, 2, 0, 0]
+//                       [0, 0, 0, 0, 0, 0]]
+// ```
+func PadV2(scope *Scope, input tf.Output, paddings tf.Output, constant_values tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "TensorArraySplitV3",
+		Type: "PadV2",
 		Input: []tf.Input{
-			handle, value, lengths, flow_in,
+			input, paddings, constant_values,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns a diagonal tensor with a given diagonal values.
-//
-// Given a `diagonal`, this operation returns a tensor with the `diagonal` and
-// everything else padded with zeros. The diagonal is computed as follows:
-//
-// Assume `diagonal` has dimensions [D1,..., Dk], then the output is a tensor of
-// rank 2k with dimensions [D1,..., Dk, D1,..., Dk] where:
-//
-// `output[i1,..., ik, i1,..., ik] = diagonal[i1, ..., ik]` and 0 everywhere else.
-//
-// For example:
-//
-// ```
-// # 'diagonal' is [1, 2, 3, 4]
-// tf.diag(diagonal) ==> [[1, 0, 0, 0]
-//                        [0, 2, 0, 0]
-//                        [0, 0, 3, 0]
-//                        [0, 0, 0, 4]]
-// ```
+// Returns which elements of x are NaN.
 //
-// Arguments:
-//	diagonal: Rank k tensor where k is at most 1.
-func Diag(scope *Scope, diagonal tf.Output) (output tf.Output) {
+// @compatibility(numpy)
+// Equivalent to np.isnan
+// @end_compatibility
+func IsNan(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Diag",
+		Type: "IsNan",
 		Input: []tf.Input{
-			diagonal,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// TensorArrayConcatV3Attr is an optional argument to TensorArrayConcatV3.
-type TensorArrayConcatV3Attr func(optionalAttr)
+// FractionalAvgPoolGradAttr is an optional argument to FractionalAvgPoolGrad.
+type FractionalAvgPoolGradAttr func(optionalAttr)
 
-// TensorArrayConcatV3ElementShapeExcept0 sets the optional element_shape_except0 attribute to value.
+// FractionalAvgPoolGradOverlapping sets the optional overlapping attribute to value.
 //
-// value: The expected shape of an element, if known,
-// excluding the first dimension. Used to validate the shapes of
-// TensorArray elements. If this shape is not fully specified, concatenating
-// zero-size TensorArrays is an error.
-// If not specified, defaults to <unknown_rank:true >
-func TensorArrayConcatV3ElementShapeExcept0(value tf.Shape) TensorArrayConcatV3Attr {
+// value: When set to True, it means when pooling, the values at the boundary
+// of adjacent pooling cells are used by both cells. For example:
+//
+// `index  0  1  2  3  4`
+//
+// `value  20 5  16 3  7`
+//
+// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
+// The result would be [41/3, 26/3] for fractional avg pooling.
+// If not specified, defaults to false
+func FractionalAvgPoolGradOverlapping(value bool) FractionalAvgPoolGradAttr {
 	return func(m optionalAttr) {
-		m["element_shape_except0"] = value
+		m["overlapping"] = value
 	}
 }
 
-// Concat the elements from the TensorArray into value `value`.
-//
-// Takes `T` elements of shapes
-//
-//   ```
-//   (n0 x d0 x d1 x ...), (n1 x d0 x d1 x ...), ..., (n(T-1) x d0 x d1 x ...)
-//   ```
-//
-// and concatenates them into a Tensor of shape:
-//
-//   ```(n0 + n1 + ... + n(T-1) x d0 x d1 x ...)```
+// Computes gradient of the FractionalAvgPool function.
 //
-// All elements must have the same shape (excepting the first dimension).
+// Unlike FractionalMaxPoolGrad, we don't need to find arg_max for
+// FractionalAvgPoolGrad, we just need to evenly back-propagate each element of
+// out_backprop to those indices that form the same pooling cell. Therefore, we
+// just need to know the shape of original input tensor, instead of the whole
+// tensor.
 //
 // Arguments:
-//	handle: The handle to a TensorArray.
-//	flow_in: A float scalar that enforces proper chaining of operations.
-//	dtype: The type of the elem that is returned.
+//	orig_input_tensor_shape: Original input tensor shape for `fractional_avg_pool`
+//	out_backprop: 4-D with shape `[batch, height, width, channels]`.  Gradients
+// w.r.t. the output of `fractional_avg_pool`.
+//	row_pooling_sequence: row pooling sequence, form pooling region with
+// col_pooling_sequence.
+//	col_pooling_sequence: column pooling sequence, form pooling region with
+// row_pooling sequence.
 //
-// Returns All of the elements in the TensorArray, concatenated along the first
-// axis.A vector of the row sizes of the original T elements in the
-// value output.  In the example above, this would be the values:
-// `(n1, n2, ..., n(T-1))`.
-func TensorArrayConcatV3(scope *Scope, handle tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayConcatV3Attr) (value tf.Output, lengths tf.Output) {
+// Returns 4-D.  Gradients w.r.t. the input of `fractional_avg_pool`.
+func FractionalAvgPoolGrad(scope *Scope, orig_input_tensor_shape tf.Output, out_backprop tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output, optional ...FractionalAvgPoolGradAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "TensorArrayConcatV3",
+		Type: "FractionalAvgPoolGrad",
 		Input: []tf.Input{
-			handle, flow_in,
+			orig_input_tensor_shape, out_backprop, row_pooling_sequence, col_pooling_sequence,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
-// Scatter the data from the input value into specific TensorArray elements.
-//
-// `indices` must be a vector, its length must match the first dim of `value`.
+// Computes gradients for the exponential linear (Elu) operation.
 //
 // Arguments:
-//	handle: The handle to a TensorArray.
-//	indices: The locations at which to write the tensor elements.
-//	value: The concatenated tensor to write to the TensorArray.
-//	flow_in: A float scalar that enforces proper chaining of operations.
+//	gradients: The backpropagated gradients to the corresponding Elu operation.
+//	outputs: The outputs of the corresponding Elu operation.
 //
-// Returns A float scalar that enforces proper chaining of operations.
-func TensorArrayScatterV3(scope *Scope, handle tf.Output, indices tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) {
+// Returns The gradients: `gradients * (outputs + 1)` if outputs < 0,
+// `gradients` otherwise.
+func EluGrad(scope *Scope, gradients tf.Output, outputs tf.Output) (backprops tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "TensorArrayScatterV3",
+		Type: "EluGrad",
 		Input: []tf.Input{
-			handle, indices, value, flow_in,
+			gradients, outputs,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Push an element onto the tensor_array.
+// Converts each string in the input Tensor to its hash mod by a number of buckets.
+//
+// The hash function is deterministic on the content of the string within the
+// process.
+//
+// Note that the hash function may change from time to time.
+// This functionality will be deprecated and it's recommended to use
+// `tf.string_to_hash_bucket_fast()` or `tf.string_to_hash_bucket_strong()`.
 //
 // Arguments:
-//	handle: The handle to a TensorArray.
-//	index: The position to write to inside the TensorArray.
-//	value: The tensor to write to the TensorArray.
-//	flow_in: A float scalar that enforces proper chaining of operations.
 //
-// Returns A float scalar that enforces proper chaining of operations.
-func TensorArrayWriteV3(scope *Scope, handle tf.Output, index tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) {
+//	num_buckets: The number of buckets.
+//
+// Returns A Tensor of the same shape as the input `string_tensor`.
+func StringToHashBucket(scope *Scope, string_tensor tf.Output, num_buckets int64) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"num_buckets": num_buckets}
 	opspec := tf.OpSpec{
-		Type: "TensorArrayWriteV3",
+		Type: "StringToHashBucket",
 		Input: []tf.Input{
-			handle, index, value, flow_in,
+			string_tensor,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Creates a TensorArray for storing the gradients of values in the given handle.
-//
-// If the given TensorArray gradient already exists, returns a reference to it.
-//
-// Locks the size of the original TensorArray by disabling its dynamic size flag.
-//
-// **A note about the input flow_in:**
-//
-// The handle flow_in forces the execution of the gradient lookup to occur
-// only after certain other operations have occurred.  For example, when
-// the forward TensorArray is dynamically sized, writes to this TensorArray
-// may resize the object.  The gradient TensorArray is statically sized based
-// on the size of the forward TensorArray when this operation executes.
-// Furthermore, the size of the forward TensorArray is frozen by this call.
-// As a result, the flow is used to ensure that the call to generate the gradient
-// TensorArray only happens after all writes are executed.
-//
-// In the case of dynamically sized TensorArrays, gradient computation should
-// only be performed on read operations that have themselves been chained via
-// flow to occur only after all writes have executed. That way the final size
-// of the forward TensorArray is known when this operation is called.
-//
-// **A note about the source attribute:**
-//
-// TensorArray gradient calls use an accumulator TensorArray object.  If
-// multiple gradients are calculated and run in the same session, the multiple
-// gradient nodes may accidentally flow through the same accumulator TensorArray.
-// This double counts and generally breaks the TensorArray gradient flow.
-//
-// The solution is to identify which gradient call this particular
-// TensorArray gradient is being called in.  This is performed by identifying
-// a unique string (e.g. "gradients", "gradients_1", ...) from the input
-// gradient Tensor's name.  This string is used as a suffix when creating
-// the TensorArray gradient object here (the attribute `source`).
-//
-// The attribute `source` is added as a suffix to the forward TensorArray's
-// name when performing the creation / lookup, so that each separate gradient
-// calculation gets its own TensorArray accumulator.
+// Creates a dataset that contains `count` elements from the `input_dataset`.
 //
 // Arguments:
-//	handle: The handle to the forward TensorArray.
-//	flow_in: A float scalar that enforces proper chaining of operations.
-//	source: The gradient source string, used to decide which gradient TensorArray
-// to return.
-func TensorArrayGradV3(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output, flow_out tf.Output) {
+//
+//	count: A scalar representing the number of elements from the `input_dataset`
+// that should be taken. A value of `-1` indicates that all of `input_dataset`
+// is taken.
+//
+//
+func TakeDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"source": source}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "TensorArrayGradV3",
+		Type: "TakeDataset",
 		Input: []tf.Input{
-			handle, flow_in,
+			input_dataset, count,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
-// StackPushV2Attr is an optional argument to StackPushV2.
-type StackPushV2Attr func(optionalAttr)
+// Computes rectified linear 6: `min(max(features, 0), 6)`.
+func Relu6(scope *Scope, features tf.Output) (activations tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Relu6",
+		Input: []tf.Input{
+			features,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
 
-// StackPushV2SwapMemory sets the optional swap_memory attribute to value.
+// Computes rectified linear gradients for a Relu operation.
 //
-// value: Swap `elem` to CPU. Default to false.
-// If not specified, defaults to false
-func StackPushV2SwapMemory(value bool) StackPushV2Attr {
-	return func(m optionalAttr) {
-		m["swap_memory"] = value
+// Arguments:
+//	gradients: The backpropagated gradients to the corresponding Relu operation.
+//	features: The features passed as input to the corresponding Relu operation, OR
+// the outputs of that operation (both work equivalently).
+//
+// Returns `gradients * (features > 0)`.
+func ReluGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ReluGrad",
+		Input: []tf.Input{
+			gradients, features,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Push an element onto the stack.
+// Computes the gradient of morphological 2-D dilation with respect to the input.
 //
 // Arguments:
-//	handle: The handle to a stack.
-//	elem: The tensor to be pushed onto the stack.
+//	input: 4-D with shape `[batch, in_height, in_width, depth]`.
+//	filter: 3-D with shape `[filter_height, filter_width, depth]`.
+//	out_backprop: 4-D with shape `[batch, out_height, out_width, depth]`.
+//	strides: 1-D of length 4. The stride of the sliding window for each dimension of
+// the input tensor. Must be: `[1, stride_height, stride_width, 1]`.
+//	rates: 1-D of length 4. The input stride for atrous morphological dilation.
+// Must be: `[1, rate_height, rate_width, 1]`.
+//	padding: The type of padding algorithm to use.
 //
-// Returns The same tensor as the input 'elem'.
-func StackPushV2(scope *Scope, handle tf.Output, elem tf.Output, optional ...StackPushV2Attr) (output tf.Output) {
+// Returns 4-D with shape `[batch, in_height, in_width, depth]`.
+func Dilation2DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, rates []int64, padding string) (in_backprop tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding}
 	opspec := tf.OpSpec{
-		Type: "StackPushV2",
+		Type: "Dilation2DBackpropInput",
 		Input: []tf.Input{
-			handle, elem,
+			input, filter, out_backprop,
 		},
 		Attrs: attrs,
 	}
@@ -4368,764 +4015,771 @@ func StackPushV2(scope *Scope, handle tf.Output, elem tf.Output, optional ...Sta
 	return op.Output(0)
 }
 
-// StackV2Attr is an optional argument to StackV2.
-type StackV2Attr func(optionalAttr)
+// CTCBeamSearchDecoderAttr is an optional argument to CTCBeamSearchDecoder.
+type CTCBeamSearchDecoderAttr func(optionalAttr)
 
-// StackV2StackName sets the optional stack_name attribute to value.
+// CTCBeamSearchDecoderMergeRepeated sets the optional merge_repeated attribute to value.
 //
-// value: Overrides the name used for the temporary stack resource. Default
-// value is the name of the 'Stack' op (which is guaranteed unique).
-// If not specified, defaults to ""
-func StackV2StackName(value string) StackV2Attr {
+// value: If true, merge repeated classes in output.
+// If not specified, defaults to true
+func CTCBeamSearchDecoderMergeRepeated(value bool) CTCBeamSearchDecoderAttr {
 	return func(m optionalAttr) {
-		m["stack_name"] = value
+		m["merge_repeated"] = value
 	}
 }
 
-// A stack that produces elements in first-in last-out order.
+// Performs beam search decoding on the logits given in input.
+//
+// A note about the attribute merge_repeated: For the beam search decoder,
+// this means that if consecutive entries in a beam are the same, only
+// the first of these is emitted.  That is, when the top path is "A B B B B",
+// "A B" is returned if merge_repeated = True but "A B B B B" is
+// returned if merge_repeated = False.
 //
 // Arguments:
-//	max_size: The maximum size of the stack if non-negative. If negative, the stack
-// size is unlimited.
-//	elem_type: The type of the elements on the stack.
+//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
+//	sequence_length: A vector containing sequence lengths, size `(batch)`.
+//	beam_width: A scalar >= 0 (beam search beam width).
+//	top_paths: A scalar >= 0, <= beam_width (controls output size).
 //
-// Returns The handle to the stack.
-func StackV2(scope *Scope, max_size tf.Output, elem_type tf.DataType, optional ...StackV2Attr) (handle tf.Output) {
+// Returns A list (length: top_paths) of indices matrices.  Matrix j,
+// size `(total_decoded_outputs[j] x 2)`, has indices of a
+// `SparseTensor<int64, 2>`.  The rows store: [batch, time].A list (length: top_paths) of values vectors.  Vector j,
+// size `(length total_decoded_outputs[j])`, has the values of a
+// `SparseTensor<int64, 2>`.  The vector stores the decoded classes for beam j.A list (length: top_paths) of shape vector.  Vector j,
+// size `(2)`, stores the shape of the decoded `SparseTensor[j]`.
+// Its values are: `[batch_size, max_decoded_length[j]]`.A matrix, shaped: `(batch_size x top_paths)`.  The
+// sequence log-probabilities.
+func CTCBeamSearchDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, beam_width int64, top_paths int64, optional ...CTCBeamSearchDecoderAttr) (decoded_indices []tf.Output, decoded_values []tf.Output, decoded_shape []tf.Output, log_probability tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"elem_type": elem_type}
+	attrs := map[string]interface{}{"beam_width": beam_width, "top_paths": top_paths}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "StackV2",
+		Type: "CTCBeamSearchDecoder",
 		Input: []tf.Input{
-			max_size,
+			inputs, sequence_length,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if decoded_indices, idx, err = makeOutputList(op, idx, "decoded_indices"); err != nil {
+		scope.UpdateErr("CTCBeamSearchDecoder", err)
+		return
+	}
+	if decoded_values, idx, err = makeOutputList(op, idx, "decoded_values"); err != nil {
+		scope.UpdateErr("CTCBeamSearchDecoder", err)
+		return
+	}
+	if decoded_shape, idx, err = makeOutputList(op, idx, "decoded_shape"); err != nil {
+		scope.UpdateErr("CTCBeamSearchDecoder", err)
+		return
+	}
+	log_probability = op.Output(idx)
+	return decoded_indices, decoded_values, decoded_shape, log_probability
 }
 
-// Returns the batched diagonal part of a batched tensor.
-//
-// This operation returns a tensor with the `diagonal` part
-// of the batched `input`. The `diagonal` part is computed as follows:
-//
-// Assume `input` has `k` dimensions `[I, J, K, ..., M, N]`, then the output is a
-// tensor of rank `k - 1` with dimensions `[I, J, K, ..., min(M, N)]` where:
-//
-// `diagonal[i, j, k, ..., n] = input[i, j, k, ..., n, n]`.
+// AudioSpectrogramAttr is an optional argument to AudioSpectrogram.
+type AudioSpectrogramAttr func(optionalAttr)
+
+// AudioSpectrogramMagnitudeSquared sets the optional magnitude_squared attribute to value.
 //
-// The input must be at least a matrix.
+// value: Whether to return the squared magnitude or just the
+// magnitude. Using squared magnitude can avoid extra calculations.
+// If not specified, defaults to false
+func AudioSpectrogramMagnitudeSquared(value bool) AudioSpectrogramAttr {
+	return func(m optionalAttr) {
+		m["magnitude_squared"] = value
+	}
+}
+
+// Produces a visualization of audio data over time.
 //
-// For example:
+// Spectrograms are a standard way of representing audio information as a series of
+// slices of frequency information, one slice for each window of time. By joining
+// these together into a sequence, they form a distinctive fingerprint of the sound
+// over time.
 //
-// ```
-// # 'input' is [[[1, 0, 0, 0]
-//                [0, 2, 0, 0]
-//                [0, 0, 3, 0]
-//                [0, 0, 0, 4]],
-//               [[5, 0, 0, 0]
-//                [0, 6, 0, 0]
-//                [0, 0, 7, 0]
-//                [0, 0, 0, 8]]]
+// This op expects to receive audio data as an input, stored as floats in the range
+// -1 to 1, together with a window width in samples, and a stride specifying how
+// far to move the window between slices. From this it generates a three
+// dimensional output. The lowest dimension has an amplitude value for each
+// frequency during that time slice. The next dimension is time, with successive
+// frequency slices. The final dimension is for the channels in the input, so a
+// stereo audio input would have two here for example.
 //
-// and input.shape = (2, 4, 4)
+// This means the layout when converted and saved as an image is rotated 90 degrees
+// clockwise from a typical spectrogram. Time is descending down the Y axis, and
+// the frequency decreases from left to right.
 //
-// tf.matrix_diag_part(input) ==> [[1, 2, 3, 4], [5, 6, 7, 8]]
+// Each value in the result represents the square root of the sum of the real and
+// imaginary parts of an FFT on the current window of samples. In this way, the
+// lowest dimension represents the power of each frequency in the current window,
+// and adjacent windows are concatenated in the next dimension.
 //
-// which has shape (2, 4)
-// ```
+// To get a more intuitive and visual look at what this operation does, you can run
+// tensorflow/examples/wav_to_spectrogram to read in an audio file and save out the
+// resulting spectrogram as a PNG image.
 //
 // Arguments:
-//	input: Rank `k` tensor where `k >= 2`.
+//	input: Float representation of audio data.
+//	window_size: How wide the input window is in samples. For the highest efficiency
+// this should be a power of two, but other values are accepted.
+//	stride: How widely apart the center of adjacent sample windows should be.
 //
-// Returns The extracted diagonal(s) having shape
-// `diagonal.shape = input.shape[:-2] + [min(input.shape[-2:])]`.
-func MatrixDiagPart(scope *Scope, input tf.Output) (diagonal tf.Output) {
+// Returns 3D representation of the audio frequencies as an image.
+func AudioSpectrogram(scope *Scope, input tf.Output, window_size int64, stride int64, optional ...AudioSpectrogramAttr) (spectrogram tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"window_size": window_size, "stride": stride}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "MatrixDiagPart",
+		Type: "AudioSpectrogram",
 		Input: []tf.Input{
 			input,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns true if queue is closed.
+// Compute the polygamma function \\(\psi^{(n)}(x)\\).
 //
-// This operation returns true if the queue is closed and false if the queue
-// is open.
+// The polygamma function is defined as:
 //
-// Arguments:
-//	handle: The handle to a queue.
-func QueueIsClosedV2(scope *Scope, handle tf.Output) (is_closed tf.Output) {
+//
+// \\(\psi^{(n)}(x) = \frac{d^n}{dx^n} \psi(x)\\)
+//
+// where \\(\psi(x)\\) is the digamma function.
+func Polygamma(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "QueueIsClosedV2",
-		Input: []tf.Input{
-			handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// QueueCloseV2Attr is an optional argument to QueueCloseV2.
-type QueueCloseV2Attr func(optionalAttr)
-
-// QueueCloseV2CancelPendingEnqueues sets the optional cancel_pending_enqueues attribute to value.
-//
-// value: If true, all pending enqueue requests that are
-// blocked on the given queue will be canceled.
-// If not specified, defaults to false
-func QueueCloseV2CancelPendingEnqueues(value bool) QueueCloseV2Attr {
-	return func(m optionalAttr) {
-		m["cancel_pending_enqueues"] = value
+		Type: "Polygamma",
+		Input: []tf.Input{
+			a, x,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Closes the given queue.
-//
-// This operation signals that no more elements will be enqueued in the
-// given queue. Subsequent Enqueue(Many) operations will fail.
-// Subsequent Dequeue(Many) operations will continue to succeed if
-// sufficient elements remain in the queue. Subsequent Dequeue(Many)
-// operations that would block will fail immediately.
+// Computes second-order gradients of the maxpooling function.
 //
 // Arguments:
-//	handle: The handle to a queue.
+//	input: The original input.
+//	grad: 4-D with shape `[batch, height, width, channels]`.  Gradients w.r.t. the
+// input of `max_pool`.
+//	argmax: The indices of the maximum values chosen for each output of `max_pool`.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
+//	padding: The type of padding algorithm to use.
 //
-// Returns the created operation.
-func QueueCloseV2(scope *Scope, handle tf.Output, optional ...QueueCloseV2Attr) (o *tf.Operation) {
+// Returns Gradients of gradients w.r.t. the input of `max_pool`.
+func MaxPoolGradGradWithArgmax(scope *Scope, input tf.Output, grad tf.Output, argmax tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
 	opspec := tf.OpSpec{
-		Type: "QueueCloseV2",
+		Type: "MaxPoolGradGradWithArgmax",
 		Input: []tf.Input{
-			handle,
+			input, grad, argmax,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// QueueDequeueUpToV2Attr is an optional argument to QueueDequeueUpToV2.
-type QueueDequeueUpToV2Attr func(optionalAttr)
+// MaxPoolGradGradV2Attr is an optional argument to MaxPoolGradGradV2.
+type MaxPoolGradGradV2Attr func(optionalAttr)
 
-// QueueDequeueUpToV2TimeoutMs sets the optional timeout_ms attribute to value.
+// MaxPoolGradGradV2DataFormat sets the optional data_format attribute to value.
 //
-// value: If the queue has fewer than n elements, this operation
-// will block for up to timeout_ms milliseconds.
-// Note: This option is not supported yet.
-// If not specified, defaults to -1
-func QueueDequeueUpToV2TimeoutMs(value int64) QueueDequeueUpToV2Attr {
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func MaxPoolGradGradV2DataFormat(value string) MaxPoolGradGradV2Attr {
 	return func(m optionalAttr) {
-		m["timeout_ms"] = value
+		m["data_format"] = value
 	}
 }
 
-// Dequeues `n` tuples of one or more tensors from the given queue.
-//
-// This operation is not supported by all queues.  If a queue does not support
-// DequeueUpTo, then an Unimplemented error is returned.
-//
-// If the queue is closed and there are more than 0 but less than `n`
-// elements remaining, then instead of returning an OutOfRange error like
-// QueueDequeueMany, less than `n` elements are returned immediately.  If
-// the queue is closed and there are 0 elements left in the queue, then
-// an OutOfRange error is returned just like in QueueDequeueMany.
-// Otherwise the behavior is identical to QueueDequeueMany:
-//
-// This operation concatenates queue-element component tensors along the
-// 0th dimension to make a single component tensor.  All of the components
-// in the dequeued tuple will have size n in the 0th dimension.
-//
-// This operation has `k` outputs, where `k` is the number of components in
-// the tuples stored in the given queue, and output `i` is the ith
-// component of the dequeued tuple.
+// Computes second-order gradients of the maxpooling function.
 //
 // Arguments:
-//	handle: The handle to a queue.
-//	n: The number of tuples to dequeue.
-//	component_types: The type of each component in a tuple.
+//	orig_input: The original input tensor.
+//	orig_output: The original output tensor.
+//	grad: 4-D.  Gradients of gradients w.r.t. the input of `max_pool`.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
+//	padding: The type of padding algorithm to use.
 //
-// Returns One or more tensors that were dequeued as a tuple.
-func QueueDequeueUpToV2(scope *Scope, handle tf.Output, n tf.Output, component_types []tf.DataType, optional ...QueueDequeueUpToV2Attr) (components []tf.Output) {
+// Returns Gradients of gradients w.r.t. the input to `max_pool`.
+func MaxPoolGradGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradGradV2Attr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"component_types": component_types}
+	attrs := map[string]interface{}{"padding": padding}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "QueueDequeueUpToV2",
+		Type: "MaxPoolGradGradV2",
 		Input: []tf.Input{
-			handle, n,
+			orig_input, orig_output, grad, ksize, strides,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
-		scope.UpdateErr("QueueDequeueUpToV2", err)
-		return
-	}
-	return components
+	return op.Output(0)
 }
 
-// Deprecated. Use TensorArrayCloseV3
+// Fast Fourier transform.
 //
-// Returns the created operation.
-func TensorArrayCloseV2(scope *Scope, handle tf.Output) (o *tf.Operation) {
+// Computes the 1-dimensional discrete Fourier transform over the inner-most
+// dimension of `input`.
+//
+// Arguments:
+//	input: A complex64 tensor.
+//
+// Returns A complex64 tensor of the same shape as `input`. The inner-most
+//   dimension of `input` is replaced with its 1D Fourier transform.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.fft
+// @end_compatibility
+func FFT(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "TensorArrayCloseV2",
+		Type: "FFT",
 		Input: []tf.Input{
-			handle,
+			input,
 		},
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// QueueDequeueManyV2Attr is an optional argument to QueueDequeueManyV2.
-type QueueDequeueManyV2Attr func(optionalAttr)
+// MaxPoolAttr is an optional argument to MaxPool.
+type MaxPoolAttr func(optionalAttr)
 
-// QueueDequeueManyV2TimeoutMs sets the optional timeout_ms attribute to value.
+// MaxPoolDataFormat sets the optional data_format attribute to value.
 //
-// value: If the queue has fewer than n elements, this operation
-// will block for up to timeout_ms milliseconds.
-// Note: This option is not supported yet.
-// If not specified, defaults to -1
-func QueueDequeueManyV2TimeoutMs(value int64) QueueDequeueManyV2Attr {
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func MaxPoolDataFormat(value string) MaxPoolAttr {
 	return func(m optionalAttr) {
-		m["timeout_ms"] = value
+		m["data_format"] = value
 	}
 }
 
-// Dequeues `n` tuples of one or more tensors from the given queue.
-//
-// If the queue is closed and there are fewer than `n` elements, then an
-// OutOfRange error is returned.
-//
-// This operation concatenates queue-element component tensors along the
-// 0th dimension to make a single component tensor.  All of the components
-// in the dequeued tuple will have size `n` in the 0th dimension.
-//
-// This operation has `k` outputs, where `k` is the number of components in
-// the tuples stored in the given queue, and output `i` is the ith
-// component of the dequeued tuple.
-//
-// N.B. If the queue is empty, this operation will block until `n` elements
-// have been dequeued (or 'timeout_ms' elapses, if specified).
+// Performs max pooling on the input.
 //
 // Arguments:
-//	handle: The handle to a queue.
-//	n: The number of tuples to dequeue.
-//	component_types: The type of each component in a tuple.
+//	input: 4-D input to pool over.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
+//	padding: The type of padding algorithm to use.
 //
-// Returns One or more tensors that were dequeued as a tuple.
-func QueueDequeueManyV2(scope *Scope, handle tf.Output, n tf.Output, component_types []tf.DataType, optional ...QueueDequeueManyV2Attr) (components []tf.Output) {
+// Returns The max pooled output tensor.
+func MaxPool(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"component_types": component_types}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "QueueDequeueManyV2",
+		Type: "MaxPool",
 		Input: []tf.Input{
-			handle, n,
+			input,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
-		scope.UpdateErr("QueueDequeueManyV2", err)
-		return
-	}
-	return components
+	return op.Output(0)
 }
 
-// QueueEnqueueV2Attr is an optional argument to QueueEnqueueV2.
-type QueueEnqueueV2Attr func(optionalAttr)
-
-// QueueEnqueueV2TimeoutMs sets the optional timeout_ms attribute to value.
+// Bucketizes 'input' based on 'boundaries'.
 //
-// value: If the queue is full, this operation will block for up to
-// timeout_ms milliseconds.
-// Note: This option is not supported yet.
-// If not specified, defaults to -1
-func QueueEnqueueV2TimeoutMs(value int64) QueueEnqueueV2Attr {
-	return func(m optionalAttr) {
-		m["timeout_ms"] = value
+// For example, if the inputs are
+//     boundaries = [0, 10, 100]
+//     input = [[-5, 10000]
+//              [150,   10]
+//              [5,    100]]
+//
+// then the output will be
+//     output = [[0, 3]
+//               [3, 2]
+//               [1, 3]]
+//
+// Arguments:
+//	input: Any shape of Tensor contains with int or float type.
+//	boundaries: A sorted list of floats gives the boundary of the buckets.
+//
+// Returns Same shape with 'input', each value of input replaced with bucket index.
+//
+// @compatibility(numpy)
+// Equivalent to np.digitize.
+// @end_compatibility
+func Bucketize(scope *Scope, input tf.Output, boundaries []float32) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"boundaries": boundaries}
+	opspec := tf.OpSpec{
+		Type: "Bucketize",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Enqueues a tuple of one or more tensors in the given queue.
-//
-// The components input has k elements, which correspond to the components of
-// tuples stored in the given queue.
-//
-// N.B. If the queue is full, this operation will block until the given
-// element has been enqueued (or 'timeout_ms' elapses, if specified).
+// Computes gradients of the maxpooling function.
 //
 // Arguments:
-//	handle: The handle to a queue.
-//	components: One or more tensors from which the enqueued tensors should be taken.
+//	input: The original input.
+//	grad: 4-D with shape `[batch, height, width, channels]`.  Gradients w.r.t. the
+// output of `max_pool`.
+//	argmax: The indices of the maximum values chosen for each output of `max_pool`.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
+//	padding: The type of padding algorithm to use.
 //
-// Returns the created operation.
-func QueueEnqueueV2(scope *Scope, handle tf.Output, components []tf.Output, optional ...QueueEnqueueV2Attr) (o *tf.Operation) {
+// Returns Gradients w.r.t. the input of `max_pool`.
+func MaxPoolGradWithArgmax(scope *Scope, input tf.Output, grad tf.Output, argmax tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
 	opspec := tf.OpSpec{
-		Type: "QueueEnqueueV2",
+		Type: "MaxPoolGradWithArgmax",
 		Input: []tf.Input{
-			handle, tf.OutputList(components),
+			input, grad, argmax,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// ResourceStridedSliceAssignAttr is an optional argument to ResourceStridedSliceAssign.
-type ResourceStridedSliceAssignAttr func(optionalAttr)
+// CriticalSectionOpAttr is an optional argument to CriticalSectionOp.
+type CriticalSectionOpAttr func(optionalAttr)
 
-// ResourceStridedSliceAssignBeginMask sets the optional begin_mask attribute to value.
-// If not specified, defaults to 0
-func ResourceStridedSliceAssignBeginMask(value int64) ResourceStridedSliceAssignAttr {
+// CriticalSectionOpContainer sets the optional container attribute to value.
+//
+// value: the container this critical section is placed in.
+// If not specified, defaults to ""
+func CriticalSectionOpContainer(value string) CriticalSectionOpAttr {
 	return func(m optionalAttr) {
-		m["begin_mask"] = value
+		m["container"] = value
 	}
 }
 
-// ResourceStridedSliceAssignEndMask sets the optional end_mask attribute to value.
-// If not specified, defaults to 0
-func ResourceStridedSliceAssignEndMask(value int64) ResourceStridedSliceAssignAttr {
+// CriticalSectionOpSharedName sets the optional shared_name attribute to value.
+//
+// value: the name by which this critical section is referred to.
+// If not specified, defaults to ""
+func CriticalSectionOpSharedName(value string) CriticalSectionOpAttr {
 	return func(m optionalAttr) {
-		m["end_mask"] = value
+		m["shared_name"] = value
 	}
 }
 
-// ResourceStridedSliceAssignEllipsisMask sets the optional ellipsis_mask attribute to value.
-// If not specified, defaults to 0
-func ResourceStridedSliceAssignEllipsisMask(value int64) ResourceStridedSliceAssignAttr {
-	return func(m optionalAttr) {
-		m["ellipsis_mask"] = value
+// Creates a handle to a CriticalSection resource.
+func CriticalSectionOp(scope *Scope, optional ...CriticalSectionOpAttr) (resource tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
-}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CriticalSectionOp",
 
-// ResourceStridedSliceAssignNewAxisMask sets the optional new_axis_mask attribute to value.
-// If not specified, defaults to 0
-func ResourceStridedSliceAssignNewAxisMask(value int64) ResourceStridedSliceAssignAttr {
-	return func(m optionalAttr) {
-		m["new_axis_mask"] = value
+		Attrs: attrs,
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// ResourceStridedSliceAssignShrinkAxisMask sets the optional shrink_axis_mask attribute to value.
-// If not specified, defaults to 0
-func ResourceStridedSliceAssignShrinkAxisMask(value int64) ResourceStridedSliceAssignAttr {
+// AvgPool3DAttr is an optional argument to AvgPool3D.
+type AvgPool3DAttr func(optionalAttr)
+
+// AvgPool3DDataFormat sets the optional data_format attribute to value.
+//
+// value: The data format of the input and output data. With the
+// default format "NDHWC", the data is stored in the order of:
+//     [batch, in_depth, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCDHW", the data storage order is:
+//     [batch, in_channels, in_depth, in_height, in_width].
+// If not specified, defaults to "NDHWC"
+func AvgPool3DDataFormat(value string) AvgPool3DAttr {
 	return func(m optionalAttr) {
-		m["shrink_axis_mask"] = value
+		m["data_format"] = value
 	}
 }
 
-// Assign `value` to the sliced l-value reference of `ref`.
-//
-// The values of `value` are assigned to the positions in the variable
-// `ref` that are selected by the slice parameters. The slice parameters
-// `begin, `end`, `strides`, etc. work exactly as in `StridedSlice`.
+// Performs 3D average pooling on the input.
 //
-// NOTE this op currently does not support broadcasting and so `value`'s
-// shape must be exactly the shape produced by the slice of `ref`.
+// Arguments:
+//	input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over.
+//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
+// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
 //
-// Returns the created operation.
-func ResourceStridedSliceAssign(scope *Scope, ref tf.Output, begin tf.Output, end tf.Output, strides tf.Output, value tf.Output, optional ...ResourceStridedSliceAssignAttr) (o *tf.Operation) {
+// Returns The average pooled output tensor.
+func AvgPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceStridedSliceAssign",
+		Type: "AvgPool3D",
 		Input: []tf.Input{
-			ref, begin, end, strides, value,
+			input,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// UnstageAttr is an optional argument to Unstage.
-type UnstageAttr func(optionalAttr)
-
-// UnstageCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
+// Returns element-wise remainder of division. This emulates C semantics in that
 //
-// REQUIRES: value >= 0
-func UnstageCapacity(value int64) UnstageAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// UnstageMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
+// the result here is consistent with a truncating divide. E.g.
+// `tf.truncatediv(x, y) * y + truncate_mod(x, y) = x`.
 //
-// REQUIRES: value >= 0
-func UnstageMemoryLimit(value int64) UnstageAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// UnstageContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func UnstageContainer(value string) UnstageAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
+// *NOTE*: `Mod` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Mod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
-}
-
-// UnstageSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func UnstageSharedName(value string) UnstageAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
+	opspec := tf.OpSpec{
+		Type: "Mod",
+		Input: []tf.Input{
+			x, y,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Op is similar to a lightweight Dequeue.
+// Computes square root of x element-wise.
 //
-// The basic functionality is similar to dequeue with many fewer
-// capabilities and options.  This Op is optimized for performance.
-func Unstage(scope *Scope, dtypes []tf.DataType, optional ...UnstageAttr) (values []tf.Output) {
+// I.e., \\(y = \sqrt{x} = x^{1/2}\\).
+func Sqrt(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "Unstage",
-
-		Attrs: attrs,
+		Type: "Sqrt",
+		Input: []tf.Input{
+			x,
+		},
 	}
 	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
-		scope.UpdateErr("Unstage", err)
-		return
-	}
-	return values
+	return op.Output(0)
 }
 
-// PriorityQueueV2Attr is an optional argument to PriorityQueueV2.
-type PriorityQueueV2Attr func(optionalAttr)
-
-// PriorityQueueV2ComponentTypes sets the optional component_types attribute to value.
+// Computes the gradients of 3-D convolution with respect to the filter.
 //
-// value: The type of each component in a value.
-// If not specified, defaults to <>
+// DEPRECATED at GraphDef version 10: Use Conv3DBackpropFilterV2
 //
-// REQUIRES: len(value) >= 0
-func PriorityQueueV2ComponentTypes(value []tf.DataType) PriorityQueueV2Attr {
-	return func(m optionalAttr) {
-		m["component_types"] = value
+// Arguments:
+//	input: Shape `[batch, depth, rows, cols, in_channels]`.
+//	filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
+// `in_channels` must match between `input` and `filter`.
+//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
+// out_channels]`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
+func Conv3DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string) (output tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
-}
-
-// PriorityQueueV2Capacity sets the optional capacity attribute to value.
-//
-// value: The upper bound on the number of elements in this queue.
-// Negative numbers mean no limit.
-// If not specified, defaults to -1
-func PriorityQueueV2Capacity(value int64) PriorityQueueV2Attr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	opspec := tf.OpSpec{
+		Type: "Conv3DBackpropFilter",
+		Input: []tf.Input{
+			input, filter, out_backprop,
+		},
+		Attrs: attrs,
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// PriorityQueueV2Container sets the optional container attribute to value.
+// Computes the gradient for the rsqrt of `x` wrt its input.
 //
-// value: If non-empty, this queue is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func PriorityQueueV2Container(value string) PriorityQueueV2Attr {
-	return func(m optionalAttr) {
-		m["container"] = value
+// Specifically, `grad = dy * -0.5 * y^3`, where `y = rsqrt(x)`, and `dy`
+// is the corresponding input gradient.
+func RsqrtGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "RsqrtGrad",
+		Input: []tf.Input{
+			y, dy,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// PriorityQueueV2SharedName sets the optional shared_name attribute to value.
+// ReverseSequenceAttr is an optional argument to ReverseSequence.
+type ReverseSequenceAttr func(optionalAttr)
+
+// ReverseSequenceBatchDim sets the optional batch_dim attribute to value.
 //
-// value: If non-empty, this queue will be shared under the given name
-// across multiple sessions.
-// If not specified, defaults to ""
-func PriorityQueueV2SharedName(value string) PriorityQueueV2Attr {
+// value: The dimension along which reversal is performed.
+// If not specified, defaults to 0
+func ReverseSequenceBatchDim(value int64) ReverseSequenceAttr {
 	return func(m optionalAttr) {
-		m["shared_name"] = value
+		m["batch_dim"] = value
 	}
 }
 
-// A queue that produces elements sorted by the first component value.
+// Reverses variable length slices.
 //
-// Note that the PriorityQueue requires the first component of any element
-// to be a scalar int64, in addition to the other elements declared by
-// component_types.  Therefore calls to Enqueue and EnqueueMany (resp. Dequeue
-// and DequeueMany) on a PriorityQueue will all require (resp. output) one extra
-// entry in their input (resp. output) lists.
+// This op first slices `input` along the dimension `batch_dim`, and for each
+// slice `i`, reverses the first `seq_lengths[i]` elements along
+// the dimension `seq_dim`.
+//
+// The elements of `seq_lengths` must obey `seq_lengths[i] <= input.dims[seq_dim]`,
+// and `seq_lengths` must be a vector of length `input.dims[batch_dim]`.
+//
+// The output slice `i` along dimension `batch_dim` is then given by input
+// slice `i`, with the first `seq_lengths[i]` slices along dimension
+// `seq_dim` reversed.
+//
+// For example:
+//
+// ```
+// # Given this:
+// batch_dim = 0
+// seq_dim = 1
+// input.dims = (4, 8, ...)
+// seq_lengths = [7, 2, 3, 5]
+//
+// # then slices of input are reversed on seq_dim, but only up to seq_lengths:
+// output[0, 0:7, :, ...] = input[0, 7:0:-1, :, ...]
+// output[1, 0:2, :, ...] = input[1, 2:0:-1, :, ...]
+// output[2, 0:3, :, ...] = input[2, 3:0:-1, :, ...]
+// output[3, 0:5, :, ...] = input[3, 5:0:-1, :, ...]
+//
+// # while entries past seq_lens are copied through:
+// output[0, 7:, :, ...] = input[0, 7:, :, ...]
+// output[1, 2:, :, ...] = input[1, 2:, :, ...]
+// output[2, 3:, :, ...] = input[2, 3:, :, ...]
+// output[3, 2:, :, ...] = input[3, 2:, :, ...]
+// ```
+//
+// In contrast, if:
+//
+// ```
+// # Given this:
+// batch_dim = 2
+// seq_dim = 0
+// input.dims = (8, ?, 4, ...)
+// seq_lengths = [7, 2, 3, 5]
+//
+// # then slices of input are reversed on seq_dim, but only up to seq_lengths:
+// output[0:7, :, 0, :, ...] = input[7:0:-1, :, 0, :, ...]
+// output[0:2, :, 1, :, ...] = input[2:0:-1, :, 1, :, ...]
+// output[0:3, :, 2, :, ...] = input[3:0:-1, :, 2, :, ...]
+// output[0:5, :, 3, :, ...] = input[5:0:-1, :, 3, :, ...]
+//
+// # while entries past seq_lens are copied through:
+// output[7:, :, 0, :, ...] = input[7:, :, 0, :, ...]
+// output[2:, :, 1, :, ...] = input[2:, :, 1, :, ...]
+// output[3:, :, 2, :, ...] = input[3:, :, 2, :, ...]
+// output[2:, :, 3, :, ...] = input[2:, :, 3, :, ...]
+// ```
 //
 // Arguments:
-//	shapes: The shape of each component in a value. The length of this attr must
-// be either 0 or the same as the length of component_types. If the length of
-// this attr is 0, the shapes of queue elements are not constrained, and
-// only one element may be dequeued at a time.
+//	input: The input to reverse.
+//	seq_lengths: 1-D with length `input.dims(batch_dim)` and
+// `max(seq_lengths) <= input.dims(seq_dim)`
+//	seq_dim: The dimension which is partially reversed.
 //
-// Returns The handle to the queue.
-func PriorityQueueV2(scope *Scope, shapes []tf.Shape, optional ...PriorityQueueV2Attr) (handle tf.Output) {
+// Returns The partially reversed input. It has the same shape as `input`.
+func ReverseSequence(scope *Scope, input tf.Output, seq_lengths tf.Output, seq_dim int64, optional ...ReverseSequenceAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"shapes": shapes}
+	attrs := map[string]interface{}{"seq_dim": seq_dim}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "PriorityQueueV2",
-
+		Type: "ReverseSequence",
+		Input: []tf.Input{
+			input, seq_lengths,
+		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// StridedSliceAttr is an optional argument to StridedSlice.
-type StridedSliceAttr func(optionalAttr)
-
-// StridedSliceBeginMask sets the optional begin_mask attribute to value.
-//
-// value: a bitmask where a bit i being 1 means to ignore the begin
-// value and instead use the largest interval possible. At runtime
-// begin[i] will be replaced with `[0, n-1) if `stride[i] > 0` or
-// `[-1, n-1]` if `stride[i] < 0`
-// If not specified, defaults to 0
-func StridedSliceBeginMask(value int64) StridedSliceAttr {
-	return func(m optionalAttr) {
-		m["begin_mask"] = value
-	}
-}
-
-// StridedSliceEndMask sets the optional end_mask attribute to value.
-//
-// value: analogous to `begin_mask`
-// If not specified, defaults to 0
-func StridedSliceEndMask(value int64) StridedSliceAttr {
-	return func(m optionalAttr) {
-		m["end_mask"] = value
-	}
-}
-
-// StridedSliceEllipsisMask sets the optional ellipsis_mask attribute to value.
-//
-// value: a bitmask where bit `i` being 1 means the `i`th
-// position is actually an ellipsis. One bit at most can be 1.
-// If `ellipsis_mask == 0`, then an implicit ellipsis mask of `1 << (m+1)`
-// is provided. This means that `foo[3:5] == foo[3:5, ...]`. An ellipsis
-// implicitly creates as many range specifications as necessary to fully
-// specify the sliced range for every dimension. For example for a 4-dimensional
-// tensor `foo` the slice `foo[2, ..., 5:8]` implies `foo[2, :, :, 5:8]`.
-// If not specified, defaults to 0
-func StridedSliceEllipsisMask(value int64) StridedSliceAttr {
-	return func(m optionalAttr) {
-		m["ellipsis_mask"] = value
-	}
-}
+// DepthwiseConv2dNativeAttr is an optional argument to DepthwiseConv2dNative.
+type DepthwiseConv2dNativeAttr func(optionalAttr)
 
-// StridedSliceNewAxisMask sets the optional new_axis_mask attribute to value.
+// DepthwiseConv2dNativeDataFormat sets the optional data_format attribute to value.
 //
-// value: a bitmask where bit `i` being 1 means the `i`th
-// specification creates a new shape 1 dimension. For example
-// `foo[:4, tf.newaxis, :2]` would produce a shape `(4, 1, 2)` tensor.
-// If not specified, defaults to 0
-func StridedSliceNewAxisMask(value int64) StridedSliceAttr {
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, height, width, channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, channels, height, width].
+// If not specified, defaults to "NHWC"
+func DepthwiseConv2dNativeDataFormat(value string) DepthwiseConv2dNativeAttr {
 	return func(m optionalAttr) {
-		m["new_axis_mask"] = value
+		m["data_format"] = value
 	}
 }
 
-// StridedSliceShrinkAxisMask sets the optional shrink_axis_mask attribute to value.
+// DepthwiseConv2dNativeDilations sets the optional dilations attribute to value.
 //
-// value: a bitmask where bit `i` implies that the `i`th
-// specification should shrink the dimensionality. begin and end
-// must imply a slice of size 1 in the dimension. For example in
-// python one might do `foo[:, 3, :]` which would result in
-// `shrink_axis_mask` being 2.
-// If not specified, defaults to 0
-func StridedSliceShrinkAxisMask(value int64) StridedSliceAttr {
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+// element on that dimension. The dimension order is determined by the value of
+// `data_format`, see above for details. Dilations in the batch and depth
+// dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
+func DepthwiseConv2dNativeDilations(value []int64) DepthwiseConv2dNativeAttr {
 	return func(m optionalAttr) {
-		m["shrink_axis_mask"] = value
+		m["dilations"] = value
 	}
 }
 
-// Return a strided slice from `input`.
-//
-// Note, most python users will want to use the Python `Tensor.__getitem__`
-// or `Variable.__getitem__` rather than this op directly.
-//
-// The goal of this op is to produce a new tensor with a subset of
-// the elements from the `n` dimensional `input` tensor. The subset is chosen using
-// a sequence of `m` sparse range specifications encoded into the arguments
-// of this function. Note, in some cases
-// `m` could be equal to `n`, but this need not be the case. Each
-// range specification entry can be one of the following:
-//
-// - An ellipsis (...). Ellipses are used to imply zero or more
-//   dimensions of full-dimension selection and are produced using
-//   `ellipsis_mask`. For example, `foo[...]` is the identity slice.
-//
-// - A new axis. This is used to insert a new shape=1 dimension and is
-//   produced using `new_axis_mask`. For example, `foo[:, ...]` where
-//   `foo` is shape `(3, 4)` produces a `(1, 3, 4)` tensor.
-//
-//
-// - A range `begin:end:stride`. This is used to specify how much to choose from
-//   a given dimension. `stride` can be any integer but 0.  `begin` is an integer
-//   which represents the index of the first value to select while `end` represents
-//   the index of the last value to select. The number of values selected in each
-//   dimension is `end - begin` if `stride > 0` and `begin - end` if `stride < 0`.
-//   `begin` and `end` can be negative where `-1` is the last element, `-2` is
-//   the second to last. `begin_mask` controls whether to replace the explicitly
-//   given `begin` with an implicit effective value of `0` if `stride > 0` and
-//   `-1` if `stride < 0`. `end_mask` is analogous but produces the number
-//   required to create the largest open interval. For example, given a shape
-//   `(3,)` tensor `foo[:]`, the effective `begin` and `end` are `0` and `3`. Do
-//   not assume this is equivalent to `foo[0:-1]` which has an effective `begin`
-//   and `end` of `0` and `2`. Another example is `foo[-2::-1]` which reverses the
-//   first dimension of a tensor while dropping the last two (in the original
-//   order elements). For example `foo = [1,2,3,4]; foo[-2::-1]` is `[4,3]`.
-//
-// - A single index. This is used to keep only elements that have a given
-//   index. For example (`foo[2, :]` on a shape `(5,6)` tensor produces a
-//   shape `(6,)` tensor. This is encoded in `begin` and `end` and
-//   `shrink_axis_mask`.
+// Computes a 2-D depthwise convolution given 4-D `input` and `filter` tensors.
 //
-// Each conceptual range specification is encoded in the op's argument. This
-// encoding is best understand by considering a non-trivial example. In
-// particular,
-// `foo[1, 2:4, None, ..., :-3:-1, :]` will be encoded as
+// Given an input tensor of shape `[batch, in_height, in_width, in_channels]`
+// and a filter / kernel tensor of shape
+// `[filter_height, filter_width, in_channels, channel_multiplier]`, containing
+// `in_channels` convolutional filters of depth 1, `depthwise_conv2d` applies
+// a different filter to each input channel (expanding from 1 channel to
+// `channel_multiplier` channels for each), then concatenates the results
+// together. Thus, the output has `in_channels * channel_multiplier` channels.
 //
 // ```
-// begin = [1, 2, x, x, 0, x] # x denotes don't care (usually 0)
-// end = [2, 4, x, x, -3, x]
-// strides = [1, 1, x, x, -1, 1]
-// begin_mask = 1<<4 | 1 << 5 = 48
-// end_mask = 1<<5 = 32
-// ellipsis_mask = 1<<3 = 8
-// new_axis_mask = 1<<2 4
-// shrink_axis_mask = 1<<0
+// for k in 0..in_channels-1
+//   for q in 0..channel_multiplier-1
+//     output[b, i, j, k * channel_multiplier + q] =
+//       sum_{di, dj} input[b, strides[1] * i + di, strides[2] * j + dj, k] *
+//                         filter[di, dj, k, q]
 // ```
 //
-// In this case if `foo.shape` is (5, 5, 5, 5, 5, 5) the final shape of
-// the slice becomes (2, 1, 5, 5, 2, 5).
-// Let us walk step by step through each argument specification.
-//
-// 1.  The first argument in the example slice is turned into `begin = 1` and
-// `end = begin + 1 = 2`. To disambiguate from the original spec `2:4` we
-// also set the appropriate bit in `shrink_axis_mask`.
-//
-// 2. `2:4` is contributes 2, 4, 1 to begin, end, and stride. All masks have
-// zero bits contributed.
+// Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
+// horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
 //
-// 3. None is a synonym for `tf.newaxis`. This means insert a dimension of size 1
-// dimension in the final shape. Dummy values are contributed to begin,
-// end and stride, while the new_axis_mask bit is set.
+// Arguments:
 //
-// 4. `...` grab the full ranges from as many dimensions as needed to
-// fully specify a slice for every dimension of the input shape.
 //
-// 5. `:-3:-1` shows the use of negative indices. A negative index `i` associated
-// with a dimension that has shape `s` is converted to a positive index
-// `s + i`. So `-1` becomes `s-1` (i.e. the last element). This conversion
-// is done internally so begin, end and strides receive x, -3, and -1.
-// The appropriate begin_mask bit is set to indicate the start range is the
-// full range (ignoring the x).
+//	strides: 1-D of length 4.  The stride of the sliding window for each dimension
+// of `input`.
+//	padding: The type of padding algorithm to use.
+func DepthwiseConv2dNative(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DepthwiseConv2dNative",
+		Input: []tf.Input{
+			input, filter,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// TensorArrayGatherV3Attr is an optional argument to TensorArrayGatherV3.
+type TensorArrayGatherV3Attr func(optionalAttr)
+
+// TensorArrayGatherV3ElementShape sets the optional element_shape attribute to value.
 //
-// 6. `:` indicates that the entire contents of the corresponding dimension
-// is selected. This is equivalent to `::` or `0::1`. begin, end, and strides
-// receive 0, 0, and 1, respectively. The appropriate bits in `begin_mask` and
-// `end_mask` are also set.
+// value: The expected shape of an element, if known. Used to
+// validate the shapes of TensorArray elements. If this shape is not
+// fully specified, gathering zero-size TensorArrays is an error.
+// If not specified, defaults to <unknown_rank:true >
+func TensorArrayGatherV3ElementShape(value tf.Shape) TensorArrayGatherV3Attr {
+	return func(m optionalAttr) {
+		m["element_shape"] = value
+	}
+}
+
+// Gather specific elements from the TensorArray into output `value`.
 //
-// *Requirements*:
-//   `0 != strides[i] for i in [0, m)`
-//   `ellipsis_mask must be a power of two (only one ellipsis)`
+// All elements selected by `indices` must have the same shape.
 //
 // Arguments:
+//	handle: The handle to a TensorArray.
+//	indices: The locations in the TensorArray from which to read tensor elements.
+//	flow_in: A float scalar that enforces proper chaining of operations.
+//	dtype: The type of the elem that is returned.
 //
-//	begin: `begin[k]` specifies the offset into the `k`th range specification.
-// The exact dimension this corresponds to will be determined by context.
-// Out-of-bounds values will be silently clamped. If the `k`th bit of
-// `begin_mask` then `begin[k]` is ignored and the full range of the
-// appropriate dimension is used instead. Negative values causes indexing
-// to start from the highest element e.g. If `foo==[1,2,3]` then `foo[-1]==3`.
-//	end: `end[i]` is like `begin` with the exception that `end_mask` is
-// used to determine full ranges.
-//	strides: `strides[i]` specifies the increment in the `i`th specification
-// after extracting a given element. Negative indices will reverse
-// the original order. Out or range values are
-// clamped to `[0,dim[i]) if slice[i]>0` or `[-1,dim[i]-1] if slice[i] < 0`
-func StridedSlice(scope *Scope, input tf.Output, begin tf.Output, end tf.Output, strides tf.Output, optional ...StridedSliceAttr) (output tf.Output) {
+// Returns All of the elements in the TensorArray, concatenated along a new
+// axis (the new dimension 0).
+func TensorArrayGatherV3(scope *Scope, handle tf.Output, indices tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayGatherV3Attr) (value tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"dtype": dtype}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "StridedSlice",
+		Type: "TensorArrayGatherV3",
 		Input: []tf.Input{
-			input, begin, end, strides,
+			handle, indices, flow_in,
 		},
 		Attrs: attrs,
 	}
@@ -5133,107 +4787,155 @@ func StridedSlice(scope *Scope, input tf.Output, begin tf.Output, end tf.Output,
 	return op.Output(0)
 }
 
-// Interleave the values from the `data` tensors into a single tensor.
+// Converts each string in the input Tensor to its hash mod by a number of buckets.
 //
-// Builds a merged tensor such that
+// The hash function is deterministic on the content of the string within the
+// process and will never change. However, it is not suitable for cryptography.
+// This function may be used when CPU time is scarce and inputs are trusted or
+// unimportant. There is a risk of adversaries constructing inputs that all hash
+// to the same bucket. To prevent this problem, use a strong hash function with
+// `tf.string_to_hash_bucket_strong`.
 //
-// ```python
-//     merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...]
-// ```
+// Arguments:
+//	input: The strings to assign a hash bucket.
+//	num_buckets: The number of buckets.
 //
-// For example, if each `indices[m]` is scalar or vector, we have
+// Returns A Tensor of the same shape as the input `string_tensor`.
+func StringToHashBucketFast(scope *Scope, input tf.Output, num_buckets int64) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_buckets": num_buckets}
+	opspec := tf.OpSpec{
+		Type: "StringToHashBucketFast",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the max of x and y (i.e. x > y ? x : y) element-wise.
 //
-// ```python
-//     # Scalar indices:
-//     merged[indices[m], ...] = data[m][...]
+// *NOTE*: `Maximum` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Maximum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Maximum",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Outputs all keys and values in the table.
 //
-//     # Vector indices:
-//     merged[indices[m][i], ...] = data[m][i, ...]
-// ```
+// Arguments:
+//	table_handle: Handle to the table.
 //
-// Each `data[i].shape` must start with the corresponding `indices[i].shape`,
-// and the rest of `data[i].shape` must be constant w.r.t. `i`.  That is, we
-// must have `data[i].shape = indices[i].shape + constant`.  In terms of this
-// `constant`, the output shape is
 //
-//     merged.shape = [max(indices)] + constant
 //
-// Values may be merged in parallel, so if an index appears in both `indices[m][i]`
-// and `indices[n][j]`, the result may be invalid. This differs from the normal
-// DynamicStitch operator that defines the behavior in that case.
+// Returns Vector of all keys present in the table.Tensor of all values in the table. Indexed in parallel with `keys`.
+func LookupTableExportV2(scope *Scope, table_handle tf.Output, Tkeys tf.DataType, Tvalues tf.DataType) (keys tf.Output, values tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"Tkeys": Tkeys, "Tvalues": Tvalues}
+	opspec := tf.OpSpec{
+		Type: "LookupTableExportV2",
+		Input: []tf.Input{
+			table_handle,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Real-valued fast Fourier transform.
 //
-// For example:
+// Computes the 1-dimensional discrete Fourier transform of a real-valued signal
+// over the inner-most dimension of `input`.
 //
-// ```python
-//     indices[0] = 6
-//     indices[1] = [4, 1]
-//     indices[2] = [[5, 2], [0, 3]]
-//     data[0] = [61, 62]
-//     data[1] = [[41, 42], [11, 12]]
-//     data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]]
-//     merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42],
-//               [51, 52], [61, 62]]
-// ```
+// Since the DFT of a real signal is Hermitian-symmetric, `RFFT` only returns the
+// `fft_length / 2 + 1` unique components of the FFT: the zero-frequency term,
+// followed by the `fft_length / 2` positive-frequency terms.
 //
-// This method can be used to merge partitions created by `dynamic_partition`
-// as illustrated on the following example:
+// Along the axis `RFFT` is computed on, if `fft_length` is smaller than the
+// corresponding dimension of `input`, the dimension is cropped. If it is larger,
+// the dimension is padded with zeros.
 //
-// ```python
-//     # Apply function (increments x_i) on elements for which a certain condition
-//     # apply (x_i != -1 in this example).
-//     x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4])
-//     condition_mask=tf.not_equal(x,tf.constant(-1.))
-//     partitioned_data = tf.dynamic_partition(
-//         x, tf.cast(condition_mask, tf.int32) , 2)
-//     partitioned_data[1] = partitioned_data[1] + 1.0
-//     condition_indices = tf.dynamic_partition(
-//         tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2)
-//     x = tf.dynamic_stitch(condition_indices, partitioned_data)
-//     # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain
-//     # unchanged.
-// ```
+// Arguments:
+//	input: A float32 tensor.
+//	fft_length: An int32 tensor of shape [1]. The FFT length.
 //
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/DynamicStitch.png" alt>
-// </div>
-func ParallelDynamicStitch(scope *Scope, indices []tf.Output, data []tf.Output) (merged tf.Output) {
+// Returns A complex64 tensor of the same rank as `input`. The inner-most
+//   dimension of `input` is replaced with the `fft_length / 2 + 1` unique
+//   frequency components of its 1D Fourier transform.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.rfft
+// @end_compatibility
+func RFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "ParallelDynamicStitch",
+		Type: "RFFT",
 		Input: []tf.Input{
-			tf.OutputList(indices), tf.OutputList(data),
+			input, fft_length,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// TensorArrayGatherV2Attr is an optional argument to TensorArrayGatherV2.
-type TensorArrayGatherV2Attr func(optionalAttr)
+// ComplexAttr is an optional argument to Complex.
+type ComplexAttr func(optionalAttr)
 
-// TensorArrayGatherV2ElementShape sets the optional element_shape attribute to value.
-// If not specified, defaults to <unknown_rank:true >
-func TensorArrayGatherV2ElementShape(value tf.Shape) TensorArrayGatherV2Attr {
+// ComplexTout sets the optional Tout attribute to value.
+// If not specified, defaults to DT_COMPLEX64
+func ComplexTout(value tf.DataType) ComplexAttr {
 	return func(m optionalAttr) {
-		m["element_shape"] = value
+		m["Tout"] = value
 	}
 }
 
-// Deprecated. Use TensorArrayGatherV3
-func TensorArrayGatherV2(scope *Scope, handle tf.Output, indices tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayGatherV2Attr) (value tf.Output) {
+// Converts two real numbers to a complex number.
+//
+// Given a tensor `real` representing the real part of a complex number, and a
+// tensor `imag` representing the imaginary part of a complex number, this
+// operation returns complex numbers elementwise of the form \\(a + bj\\), where
+// *a* represents the `real` part and *b* represents the `imag` part.
+//
+// The input tensors `real` and `imag` must have the same shape.
+//
+// For example:
+//
+// ```
+// # tensor 'real' is [2.25, 3.25]
+// # tensor `imag` is [4.75, 5.75]
+// tf.complex(real, imag) ==> [[2.25 + 4.75j], [3.25 + 5.75j]]
+// ```
+func Complex(scope *Scope, real tf.Output, imag tf.Output, optional ...ComplexAttr) (out tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "TensorArrayGatherV2",
+		Type: "Complex",
 		Input: []tf.Input{
-			handle, indices, flow_in,
+			real, imag,
 		},
 		Attrs: attrs,
 	}
@@ -5241,256 +4943,283 @@ func TensorArrayGatherV2(scope *Scope, handle tf.Output, indices tf.Output, flow
 	return op.Output(0)
 }
 
-// Interleave the values from the `data` tensors into a single tensor.
-//
-// Builds a merged tensor such that
-//
-// ```python
-//     merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...]
-// ```
-//
-// For example, if each `indices[m]` is scalar or vector, we have
-//
-// ```python
-//     # Scalar indices:
-//     merged[indices[m], ...] = data[m][...]
-//
-//     # Vector indices:
-//     merged[indices[m][i], ...] = data[m][i, ...]
-// ```
-//
-// Each `data[i].shape` must start with the corresponding `indices[i].shape`,
-// and the rest of `data[i].shape` must be constant w.r.t. `i`.  That is, we
-// must have `data[i].shape = indices[i].shape + constant`.  In terms of this
-// `constant`, the output shape is
-//
-//     merged.shape = [max(indices)] + constant
+// ImagAttr is an optional argument to Imag.
+type ImagAttr func(optionalAttr)
+
+// ImagTout sets the optional Tout attribute to value.
+// If not specified, defaults to DT_FLOAT
+func ImagTout(value tf.DataType) ImagAttr {
+	return func(m optionalAttr) {
+		m["Tout"] = value
+	}
+}
+
+// Returns the imaginary part of a complex number.
 //
-// Values are merged in order, so if an index appears in both `indices[m][i]` and
-// `indices[n][j]` for `(m,i) < (n,j)` the slice `data[n][j]` will appear in the
-// merged result. If you do not need this guarantee, ParallelDynamicStitch might
-// perform better on some devices.
+// Given a tensor `input` of complex numbers, this operation returns a tensor of
+// type `float` that is the imaginary part of each element in `input`. All
+// elements in `input` must be complex numbers of the form \\(a + bj\\), where *a*
+// is the real part and *b* is the imaginary part returned by this operation.
 //
 // For example:
 //
-// ```python
-//     indices[0] = 6
-//     indices[1] = [4, 1]
-//     indices[2] = [[5, 2], [0, 3]]
-//     data[0] = [61, 62]
-//     data[1] = [[41, 42], [11, 12]]
-//     data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]]
-//     merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42],
-//               [51, 52], [61, 62]]
 // ```
-//
-// This method can be used to merge partitions created by `dynamic_partition`
-// as illustrated on the following example:
-//
-// ```python
-//     # Apply function (increments x_i) on elements for which a certain condition
-//     # apply (x_i != -1 in this example).
-//     x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4])
-//     condition_mask=tf.not_equal(x,tf.constant(-1.))
-//     partitioned_data = tf.dynamic_partition(
-//         x, tf.cast(condition_mask, tf.int32) , 2)
-//     partitioned_data[1] = partitioned_data[1] + 1.0
-//     condition_indices = tf.dynamic_partition(
-//         tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2)
-//     x = tf.dynamic_stitch(condition_indices, partitioned_data)
-//     # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain
-//     # unchanged.
+// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
+// tf.imag(input) ==> [4.75, 5.75]
 // ```
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/DynamicStitch.png" alt>
-// </div>
-func DynamicStitch(scope *Scope, indices []tf.Output, data []tf.Output) (merged tf.Output) {
+func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "DynamicStitch",
+		Type: "Imag",
 		Input: []tf.Input{
-			tf.OutputList(indices), tf.OutputList(data),
+			input,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Produces a summary of any statistics recorded by the given statistics manager.
-func StatsAggregatorSummary(scope *Scope, iterator tf.Output) (summary tf.Output) {
+// Compute the Hurwitz zeta function \\(\zeta(x, q)\\).
+//
+// The Hurwitz zeta function is defined as:
+//
+//
+// \\(\zeta(x, q) = \sum_{n=0}^{\infty} (q + n)^{-x}\\)
+func Zeta(scope *Scope, x tf.Output, q tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "StatsAggregatorSummary",
+		Type: "Zeta",
 		Input: []tf.Input{
-			iterator,
+			x, q,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// FIFOQueueV2Attr is an optional argument to FIFOQueueV2.
-type FIFOQueueV2Attr func(optionalAttr)
+// LRNGradAttr is an optional argument to LRNGrad.
+type LRNGradAttr func(optionalAttr)
 
-// FIFOQueueV2Shapes sets the optional shapes attribute to value.
-//
-// value: The shape of each component in a value. The length of this attr must
-// be either 0 or the same as the length of component_types. If the length of
-// this attr is 0, the shapes of queue elements are not constrained, and
-// only one element may be dequeued at a time.
-// If not specified, defaults to <>
+// LRNGradDepthRadius sets the optional depth_radius attribute to value.
 //
-// REQUIRES: len(value) >= 0
-func FIFOQueueV2Shapes(value []tf.Shape) FIFOQueueV2Attr {
+// value: A depth radius.
+// If not specified, defaults to 5
+func LRNGradDepthRadius(value int64) LRNGradAttr {
 	return func(m optionalAttr) {
-		m["shapes"] = value
+		m["depth_radius"] = value
 	}
 }
 
-// FIFOQueueV2Capacity sets the optional capacity attribute to value.
+// LRNGradBias sets the optional bias attribute to value.
 //
-// value: The upper bound on the number of elements in this queue.
-// Negative numbers mean no limit.
-// If not specified, defaults to -1
-func FIFOQueueV2Capacity(value int64) FIFOQueueV2Attr {
+// value: An offset (usually > 0 to avoid dividing by 0).
+// If not specified, defaults to 1
+func LRNGradBias(value float32) LRNGradAttr {
 	return func(m optionalAttr) {
-		m["capacity"] = value
+		m["bias"] = value
 	}
 }
 
-// FIFOQueueV2Container sets the optional container attribute to value.
+// LRNGradAlpha sets the optional alpha attribute to value.
 //
-// value: If non-empty, this queue is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func FIFOQueueV2Container(value string) FIFOQueueV2Attr {
+// value: A scale factor, usually positive.
+// If not specified, defaults to 1
+func LRNGradAlpha(value float32) LRNGradAttr {
 	return func(m optionalAttr) {
-		m["container"] = value
+		m["alpha"] = value
 	}
 }
 
-// FIFOQueueV2SharedName sets the optional shared_name attribute to value.
+// LRNGradBeta sets the optional beta attribute to value.
 //
-// value: If non-empty, this queue will be shared under the given name
-// across multiple sessions.
-// If not specified, defaults to ""
-func FIFOQueueV2SharedName(value string) FIFOQueueV2Attr {
+// value: An exponent.
+// If not specified, defaults to 0.5
+func LRNGradBeta(value float32) LRNGradAttr {
 	return func(m optionalAttr) {
-		m["shared_name"] = value
+		m["beta"] = value
 	}
 }
 
-// A queue that produces elements in first-in first-out order.
+// Gradients for Local Response Normalization.
 //
 // Arguments:
-//	component_types: The type of each component in a value.
+//	input_grads: 4-D with shape `[batch, height, width, channels]`.
+//	input_image: 4-D with shape `[batch, height, width, channels]`.
+//	output_image: 4-D with shape `[batch, height, width, channels]`.
 //
-// Returns The handle to the queue.
-func FIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...FIFOQueueV2Attr) (handle tf.Output) {
+// Returns The gradients for LRN.
+func LRNGrad(scope *Scope, input_grads tf.Output, input_image tf.Output, output_image tf.Output, optional ...LRNGradAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"component_types": component_types}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "FIFOQueueV2",
-
+		Type: "LRNGrad",
+		Input: []tf.Input{
+			input_grads, input_image, output_image,
+		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Converts the given `resource_handle` representing an iterator to a variant tensor.
+// AnyAttr is an optional argument to Any.
+type AnyAttr func(optionalAttr)
+
+// AnyKeepDims sets the optional keep_dims attribute to value.
+//
+// value: If true, retain reduced dimensions with length 1.
+// If not specified, defaults to false
+func AnyKeepDims(value bool) AnyAttr {
+	return func(m optionalAttr) {
+		m["keep_dims"] = value
+	}
+}
+
+// Computes the "logical or" of elements across dimensions of a tensor.
+//
+// Reduces `input` along the dimensions given in `axis`. Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `axis`. If `keep_dims` is true, the reduced dimensions are
+// retained with length 1.
 //
 // Arguments:
-//	resource_handle: A handle to an iterator resource.
+//	input: The tensor to reduce.
+//	axis: The dimensions to reduce. Must be in the range
+// `[-rank(input), rank(input))`.
 //
-// Returns A variant tensor storing the state of the iterator contained in the
-// resource.
-func SerializeIterator(scope *Scope, resource_handle tf.Output) (serialized tf.Output) {
+// Returns The reduced tensor.
+func Any(scope *Scope, input tf.Output, axis tf.Output, optional ...AnyAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "SerializeIterator",
+		Type: "Any",
 		Input: []tf.Input{
-			resource_handle,
+			input, axis,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Return a tensor with the same shape and contents as the input tensor or value.
-func Identity(scope *Scope, input tf.Output) (output tf.Output) {
+// ResourceApplyFtrlAttr is an optional argument to ResourceApplyFtrl.
+type ResourceApplyFtrlAttr func(optionalAttr)
+
+// ResourceApplyFtrlUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyFtrlUseLocking(value bool) ResourceApplyFtrlAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the Ftrl-proximal scheme.
+//
+// accum_new = accum + grad * grad
+// linear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
+// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
+// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
+// accum = accum_new
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	linear: Should be from a Variable().
+//	grad: The gradient.
+//	lr: Scaling factor. Must be a scalar.
+//	l1: L1 regulariation. Must be a scalar.
+//	l2: L2 regulariation. Must be a scalar.
+//	lr_power: Scaling factor. Must be a scalar.
+//
+// Returns the created operation.
+func ResourceApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Identity",
+		Type: "ResourceApplyFtrl",
 		Input: []tf.Input{
-			input,
+			var_, accum, linear, grad, lr, l1, l2, lr_power,
 		},
+		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// IteratorFromStringHandleAttr is an optional argument to IteratorFromStringHandle.
-type IteratorFromStringHandleAttr func(optionalAttr)
+// RandomUniformAttr is an optional argument to RandomUniform.
+type RandomUniformAttr func(optionalAttr)
 
-// IteratorFromStringHandleOutputTypes sets the optional output_types attribute to value.
-//
-// value: If specified, defines the type of each tuple component in an
-// element produced by the resulting iterator.
-// If not specified, defaults to <>
+// RandomUniformSeed sets the optional seed attribute to value.
 //
-// REQUIRES: len(value) >= 0
-func IteratorFromStringHandleOutputTypes(value []tf.DataType) IteratorFromStringHandleAttr {
+// value: If either `seed` or `seed2` are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func RandomUniformSeed(value int64) RandomUniformAttr {
 	return func(m optionalAttr) {
-		m["output_types"] = value
+		m["seed"] = value
 	}
 }
 
-// IteratorFromStringHandleOutputShapes sets the optional output_shapes attribute to value.
-//
-// value: If specified, defines the shape of each tuple component in an
-// element produced by the resulting iterator.
-// If not specified, defaults to <>
+// RandomUniformSeed2 sets the optional seed2 attribute to value.
 //
-// REQUIRES: len(value) >= 0
-func IteratorFromStringHandleOutputShapes(value []tf.Shape) IteratorFromStringHandleAttr {
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func RandomUniformSeed2(value int64) RandomUniformAttr {
 	return func(m optionalAttr) {
-		m["output_shapes"] = value
+		m["seed2"] = value
 	}
 }
 
-// Converts the given string representing a handle to an iterator to a resource.
+// Outputs random values from a uniform distribution.
+//
+// The generated values follow a uniform distribution in the range `[0, 1)`. The
+// lower bound 0 is included in the range, while the upper bound 1 is excluded.
 //
 // Arguments:
-//	string_handle: A string representation of the given handle.
+//	shape: The shape of the output tensor.
+//	dtype: The type of the output.
 //
-// Returns A handle to an iterator resource.
-func IteratorFromStringHandle(scope *Scope, string_handle tf.Output, optional ...IteratorFromStringHandleAttr) (resource_handle tf.Output) {
+// Returns A tensor of the specified shape filled with uniform random values.
+func RandomUniform(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomUniformAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"dtype": dtype}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "IteratorFromStringHandle",
+		Type: "RandomUniform",
 		Input: []tf.Input{
-			string_handle,
+			shape,
 		},
 		Attrs: attrs,
 	}
@@ -5498,21 +5227,30 @@ func IteratorFromStringHandle(scope *Scope, string_handle tf.Output, optional ..
 	return op.Output(0)
 }
 
-// ShapeNAttr is an optional argument to ShapeN.
-type ShapeNAttr func(optionalAttr)
+// AssertAttr is an optional argument to Assert.
+type AssertAttr func(optionalAttr)
 
-// ShapeNOutType sets the optional out_type attribute to value.
-// If not specified, defaults to DT_INT32
-func ShapeNOutType(value tf.DataType) ShapeNAttr {
+// AssertSummarize sets the optional summarize attribute to value.
+//
+// value: Print this many entries of each tensor.
+// If not specified, defaults to 3
+func AssertSummarize(value int64) AssertAttr {
 	return func(m optionalAttr) {
-		m["out_type"] = value
+		m["summarize"] = value
 	}
 }
 
-// Returns shape of tensors.
+// Asserts that the given condition is true.
 //
-// This operation returns N 1-D integer tensors representing shape of `input[i]s`.
-func ShapeN(scope *Scope, input []tf.Output, optional ...ShapeNAttr) (output []tf.Output) {
+// If `condition` evaluates to false, print the list of tensors in `data`.
+// `summarize` determines how many entries of the tensors to print.
+//
+// Arguments:
+//	condition: The condition to evaluate.
+//	data: The tensors to print out when condition is false.
+//
+// Returns the created operation.
+func Assert(scope *Scope, condition tf.Output, data []tf.Output, optional ...AssertAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -5521,62 +5259,79 @@ func ShapeN(scope *Scope, input []tf.Output, optional ...ShapeNAttr) (output []t
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ShapeN",
+		Type: "Assert",
 		Input: []tf.Input{
-			tf.OutputList(input),
+			condition, tf.OutputList(data),
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
-		scope.UpdateErr("ShapeN", err)
-		return
-	}
-	return output
+	return scope.AddOperation(opspec)
 }
 
-// Converts the given `resource_handle` representing an iterator to a string.
+// Computes element-wise population count (a.k.a. popcount, bitsum, bitcount).
 //
-// Arguments:
-//	resource_handle: A handle to an iterator resource.
+// For each entry in `x`, calculates the number of `1` (on) bits in the binary
+// representation of that entry.
 //
-// Returns A string representation of the given handle.
-func IteratorToStringHandle(scope *Scope, resource_handle tf.Output) (string_handle tf.Output) {
+// **NOTE**: It is more efficient to first `tf.bitcast` your tensors into
+// `int32` or `int64` and perform the bitcount on the result, than to feed in
+// 8- or 16-bit inputs and then aggregate the resulting counts.
+func PopulationCount(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "IteratorToStringHandle",
+		Type: "PopulationCount",
 		Input: []tf.Input{
-			resource_handle,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Outputs the single element from the given dataset.
+// Split a `SparseTensor` into `num_split` tensors along one dimension.
 //
-// Arguments:
-//	dataset: A handle to a dataset that contains a single element.
+// If the `shape[split_dim]` is not an integer multiple of `num_split`. Slices
+// `[0 : shape[split_dim] % num_split]` gets one extra dimension.
+// For example, if `split_dim = 1` and `num_split = 2` and the input is
 //
+//     input_tensor = shape = [2, 7]
+//     [    a   d e  ]
+//     [b c          ]
 //
+// Graphically the output tensors are:
 //
-// Returns The components of the single element of `input`.
-func DatasetToSingleElement(scope *Scope, dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) {
+//     output_tensor[0] = shape = [2, 4]
+//     [    a  ]
+//     [b c    ]
+//
+//     output_tensor[1] = shape = [2, 3]
+//     [ d e  ]
+//     [      ]
+//
+// Arguments:
+//	split_dim: 0-D.  The dimension along which to split.  Must be in the range
+// `[0, rank(shape))`.
+//	indices: 2-D tensor represents the indices of the sparse tensor.
+//	values: 1-D tensor represents the values of the sparse tensor.
+//	shape: 1-D. tensor represents the shape of the sparse tensor.
+// output indices: A list of 1-D tensors represents the indices of the output
+// sparse tensors.
+//	num_split: The number of ways to split.
+//
+// Returns A list of 1-D tensors represents the values of the output sparse
+// tensors.A list of 1-D tensors represents the shape of the output sparse
+// tensors.
+func SparseSplit(scope *Scope, split_dim tf.Output, indices tf.Output, values tf.Output, shape tf.Output, num_split int64) (output_indices []tf.Output, output_values []tf.Output, output_shape []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	attrs := map[string]interface{}{"num_split": num_split}
 	opspec := tf.OpSpec{
-		Type: "DatasetToSingleElement",
+		Type: "SparseSplit",
 		Input: []tf.Input{
-			dataset,
+			split_dim, indices, values, shape,
 		},
 		Attrs: attrs,
 	}
@@ -5586,282 +5341,375 @@ func DatasetToSingleElement(scope *Scope, dataset tf.Output, output_types []tf.D
 	}
 	var idx int
 	var err error
-	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
-		scope.UpdateErr("DatasetToSingleElement", err)
+	if output_indices, idx, err = makeOutputList(op, idx, "output_indices"); err != nil {
+		scope.UpdateErr("SparseSplit", err)
 		return
 	}
-	return components
+	if output_values, idx, err = makeOutputList(op, idx, "output_values"); err != nil {
+		scope.UpdateErr("SparseSplit", err)
+		return
+	}
+	if output_shape, idx, err = makeOutputList(op, idx, "output_shape"); err != nil {
+		scope.UpdateErr("SparseSplit", err)
+		return
+	}
+	return output_indices, output_values, output_shape
 }
 
-// Gets the next output from the given iterator.
-func IteratorGetNext(scope *Scope, iterator tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) {
+// Returns the truth value of (x < y) element-wise.
+//
+// *NOTE*: `Less` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Less(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "IteratorGetNext",
+		Type: "Less",
 		Input: []tf.Input{
-			iterator,
+			x, y,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// QuantizedReluXAttr is an optional argument to QuantizedReluX.
+type QuantizedReluXAttr func(optionalAttr)
+
+// QuantizedReluXOutType sets the optional out_type attribute to value.
+// If not specified, defaults to DT_QUINT8
+func QuantizedReluXOutType(value tf.DataType) QuantizedReluXAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)`
+//
+// Arguments:
+//
+//
+//	min_features: The float value that the lowest quantized value represents.
+//	max_features: The float value that the highest quantized value represents.
+//
+// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents.
+func QuantizedReluX(scope *Scope, features tf.Output, max_value tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedReluXAttr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	var idx int
-	var err error
-	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
-		scope.UpdateErr("IteratorGetNext", err)
-		return
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
 	}
-	return components
+	opspec := tf.OpSpec{
+		Type: "QuantizedReluX",
+		Input: []tf.Input{
+			features, max_value, min_features, max_features,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Makes a new iterator from the given `dataset` and stores it in `iterator`.
+// Applies softmax to a batched N-D `SparseTensor`.
 //
-// This operation may be executed multiple times. Each execution will reset the
-// iterator in `iterator` to the first element of `dataset`.
+// The inputs represent an N-D SparseTensor  with logical shape `[..., B, C]`
+// (where `N >= 2`), and with indices sorted in the canonical lexicographic order.
 //
-// Returns the created operation.
-func MakeIterator(scope *Scope, dataset tf.Output, iterator tf.Output) (o *tf.Operation) {
+// This op is equivalent to applying the normal `tf.nn.softmax()` to each innermost
+// logical submatrix with shape `[B, C]`, but with the catch that *the implicitly
+// zero elements do not participate*.  Specifically, the algorithm is equivalent
+// to the following:
+//
+//   (1) Applies `tf.nn.softmax()` to a densified view of each innermost submatrix
+//       with shape `[B, C]`, along the size-C dimension;
+//   (2) Masks out the original implicitly-zero locations;
+//   (3) Renormalizes the remaining elements.
+//
+// Hence, the `SparseTensor` result has exactly the same non-zero indices and
+// shape.
+//
+// Arguments:
+//	sp_indices: 2-D.  `NNZ x R` matrix with the indices of non-empty values in a
+// SparseTensor, in canonical ordering.
+//	sp_values: 1-D.  `NNZ` non-empty values corresponding to `sp_indices`.
+//	sp_shape: 1-D.  Shape of the input SparseTensor.
+//
+// Returns 1-D.  The `NNZ` values for the result `SparseTensor`.
+func SparseSoftmax(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "MakeIterator",
+		Type: "SparseSoftmax",
 		Input: []tf.Input{
-			dataset, iterator,
+			sp_indices, sp_values, sp_shape,
 		},
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Creates a dataset that emits the records from one or more TFRecord files.
-//
-// Arguments:
-//	filenames: A scalar or vector containing the name(s) of the file(s) to be
-// read.
-//	compression_type: A scalar containing either (i) the empty string (no
-// compression), (ii) "ZLIB", or (iii) "GZIP".
-//	buffer_size: A scalar representing the number of bytes to buffer. A value of
-// 0 means no buffering will be performed.
-func TFRecordDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) {
+// RandomPoissonAttr is an optional argument to RandomPoisson.
+type RandomPoissonAttr func(optionalAttr)
+
+// RandomPoissonSeed sets the optional seed attribute to value.
+// If not specified, defaults to 0
+func RandomPoissonSeed(value int64) RandomPoissonAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// RandomPoissonSeed2 sets the optional seed2 attribute to value.
+// If not specified, defaults to 0
+func RandomPoissonSeed2(value int64) RandomPoissonAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Use RandomPoissonV2 instead.
+//
+// DEPRECATED at GraphDef version 25: Replaced by RandomPoissonV2
+func RandomPoisson(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "TFRecordDataset",
+		Type: "RandomPoisson",
 		Input: []tf.Input{
-			filenames, compression_type, buffer_size,
+			shape, rate,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Concatenates quantized tensors along one dimension.
-//
-// Arguments:
-//	concat_dim: 0-D.  The dimension along which to concatenate.  Must be in the
-// range [0, rank(values)).
-//	values: The `N` Tensors to concatenate. Their ranks and types must match,
-// and their sizes must match in all dimensions except `concat_dim`.
-//	input_mins: The minimum scalar values for each of the input tensors.
-//	input_maxes: The maximum scalar values for each of the input tensors.
+// MaxPoolGradV2Attr is an optional argument to MaxPoolGradV2.
+type MaxPoolGradV2Attr func(optionalAttr)
+
+// MaxPoolGradV2DataFormat sets the optional data_format attribute to value.
 //
-// Returns A `Tensor` with the concatenation of values stacked along the
-// `concat_dim` dimension.  This tensor's shape matches that of `values` except
-// in `concat_dim` where it has the sum of the sizes.The float value that the minimum quantized output value represents.The float value that the maximum quantized output value represents.
-func QuantizedConcat(scope *Scope, concat_dim tf.Output, values []tf.Output, input_mins []tf.Output, input_maxes []tf.Output) (output tf.Output, output_min tf.Output, output_max tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizedConcat",
-		Input: []tf.Input{
-			concat_dim, tf.OutputList(values), tf.OutputList(input_mins), tf.OutputList(input_maxes),
-		},
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func MaxPoolGradV2DataFormat(value string) MaxPoolGradV2Attr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Creates a dataset that emits the records from one or more binary files.
+// Computes gradients of the maxpooling function.
 //
 // Arguments:
-//	filenames: A scalar or a vector containing the name(s) of the file(s) to be
-// read.
-//	header_bytes: A scalar representing the number of bytes to skip at the
-// beginning of a file.
-//	record_bytes: A scalar representing the number of bytes in each record.
-//	footer_bytes: A scalar representing the number of bytes to skip at the end
-// of a file.
-//	buffer_size: A scalar representing the number of bytes to buffer. Must be > 0.
-func FixedLengthRecordDataset(scope *Scope, filenames tf.Output, header_bytes tf.Output, record_bytes tf.Output, footer_bytes tf.Output, buffer_size tf.Output) (handle tf.Output) {
+//	orig_input: The original input tensor.
+//	orig_output: The original output tensor.
+//	grad: 4-D.  Gradients w.r.t. the output of `max_pool`.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
+//	padding: The type of padding algorithm to use.
+//
+// Returns Gradients w.r.t. the input to `max_pool`.
+func MaxPoolGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradV2Attr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "FixedLengthRecordDataset",
+		Type: "MaxPoolGradV2",
 		Input: []tf.Input{
-			filenames, header_bytes, record_bytes, footer_bytes, buffer_size,
+			orig_input, orig_output, grad, ksize, strides,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Creates a dataset that executes a SQL query and emits rows of the result set.
+// Restore a reader to a previously saved state.
 //
-// Arguments:
-//	driver_name: The database type. Currently, the only supported type is 'sqlite'.
-//	data_source_name: A connection string to connect to the database.
-//	query: A SQL query to execute.
+// Not all Readers support being restored, so this can produce an
+// Unimplemented error.
 //
+// Arguments:
+//	reader_handle: Handle to a Reader.
+//	state: Result of a ReaderSerializeState of a Reader with type
+// matching reader_handle.
 //
-func SqlDataset(scope *Scope, driver_name tf.Output, data_source_name tf.Output, query tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+// Returns the created operation.
+func ReaderRestoreStateV2(scope *Scope, reader_handle tf.Output, state tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "SqlDataset",
+		Type: "ReaderRestoreStateV2",
 		Input: []tf.Input{
-			driver_name, data_source_name, query,
+			reader_handle, state,
 		},
-		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// PlaceholderAttr is an optional argument to Placeholder.
-type PlaceholderAttr func(optionalAttr)
+// ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2.
+type ResourceSparseApplyFtrlV2Attr func(optionalAttr)
 
-// PlaceholderShape sets the optional shape attribute to value.
+// ResourceSparseApplyFtrlV2UseLocking sets the optional use_locking attribute to value.
 //
-// value: (Optional) The shape of the tensor. If the shape has 0 dimensions, the
-// shape is unconstrained.
-// If not specified, defaults to <unknown_rank:true >
-func PlaceholderShape(value tf.Shape) PlaceholderAttr {
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceSparseApplyFtrlV2UseLocking(value bool) ResourceSparseApplyFtrlV2Attr {
 	return func(m optionalAttr) {
-		m["shape"] = value
+		m["use_locking"] = value
 	}
 }
 
-// A placeholder op for a value that will be fed into the computation.
+// Update relevant entries in '*var' according to the Ftrl-proximal scheme.
 //
-// N.B. This operation will fail with an error if it is executed. It is
-// intended as a way to represent a value that will always be fed, and to
-// provide attrs that enable the fed value to be checked at runtime.
+// That is for rows we have grad for, we update var, accum and linear as follows:
+// grad_with_shrinkage = grad + 2 * l2_shrinkage * var
+// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
+// linear += grad_with_shrinkage +
+//     (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
+// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
+// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
+// accum = accum_new
 //
 // Arguments:
-//	dtype: The type of elements in the tensor.
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	linear: Should be from a Variable().
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
+//	lr: Scaling factor. Must be a scalar.
+//	l1: L1 regularization. Must be a scalar.
+//	l2: L2 shrinkage regulariation. Must be a scalar.
 //
-// Returns A placeholder tensor that must be replaced using the feed mechanism.
-func Placeholder(scope *Scope, dtype tf.DataType, optional ...PlaceholderAttr) (output tf.Output) {
+//	lr_power: Scaling factor. Must be a scalar.
+//
+// Returns the created operation.
+func ResourceSparseApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlV2Attr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Placeholder",
-
+		Type: "ResourceSparseApplyFtrlV2",
+		Input: []tf.Input{
+			var_, accum, linear, grad, indices, lr, l1, l2, l2_shrinkage, lr_power,
+		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// Creates a dataset that caches elements from `input_dataset`.
-//
-// A CacheDataset will iterate over the input_dataset, and store tensors. If the
-// cache already exists, the cache will be used. If the cache is inappropriate
-// (e.g. cannot be opened, contains tensors of the wrong shape / size), an error
-// will the returned when used.
-//
-// Arguments:
-//
-//	filename: A path on the filesystem where we should cache the dataset. Note: this
-// will be a directory.
-//
+// Associates the given iterator with the given statistics aggregator.
 //
-func CacheDataset(scope *Scope, input_dataset tf.Output, filename tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+// Returns the created operation.
+func IteratorSetStatsAggregator(scope *Scope, iterator_handle tf.Output, stats_aggregator_handle tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "CacheDataset",
+		Type: "IteratorSetStatsAggregator",
 		Input: []tf.Input{
-			input_dataset, filename,
+			iterator_handle, stats_aggregator_handle,
 		},
-		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// Identity op for gradient debugging.
-//
-// This op is hidden from public in Python. It is used by TensorFlow Debugger to
-// register gradient tensors for gradient debugging.
-func DebugGradientIdentity(scope *Scope, input tf.Output) (output tf.Output) {
+// Returns element-wise smallest integer in not less than x.
+func Ceil(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "DebugGradientIdentity",
+		Type: "Ceil",
 		Input: []tf.Input{
-			input,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Deprecated. Use TensorArrayGradV3
-func TensorArrayGradV2(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output) {
+// Computes the number of elements in the given table.
+//
+// Arguments:
+//	table_handle: Handle to the table.
+//
+// Returns Scalar that contains number of elements in the table.
+func LookupTableSizeV2(scope *Scope, table_handle tf.Output) (size tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"source": source}
 	opspec := tf.OpSpec{
-		Type: "TensorArrayGradV2",
+		Type: "LookupTableSizeV2",
 		Input: []tf.Input{
-			handle, flow_in,
+			table_handle,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Creates a dataset that yields a SparseTensor for each element of the input.
+// ResizeBilinearGradAttr is an optional argument to ResizeBilinearGrad.
+type ResizeBilinearGradAttr func(optionalAttr)
+
+// ResizeBilinearGradAlignCorners sets the optional align_corners attribute to value.
 //
-// Arguments:
-//	input_dataset: A handle to an input dataset. Must have a single component.
-//	batch_size: A scalar representing the number of elements to accumulate in a
-// batch.
-//	row_shape: A vector representing the dense shape of each row in the produced
-// SparseTensor. The shape may be partially specified, using `-1` to indicate
-// that a particular dimension should use the maximum size of all batch elements.
+// value: If true, rescale grads by (orig_height - 1) / (height - 1), which
+// exactly aligns the 4 corners of grads and original_image. If false, rescale by
+// orig_height / height. Treat similarly the width dimension.
+// If not specified, defaults to false
+func ResizeBilinearGradAlignCorners(value bool) ResizeBilinearGradAttr {
+	return func(m optionalAttr) {
+		m["align_corners"] = value
+	}
+}
+
+// Computes the gradient of bilinear interpolation.
 //
+// Arguments:
+//	grads: 4-D with shape `[batch, height, width, channels]`.
+//	original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`,
+// The image tensor that was resized.
 //
-func DenseToSparseBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, row_shape tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`.
+// Gradients with respect to the input image. Input image must have been
+// float or double.
+func ResizeBilinearGrad(scope *Scope, grads tf.Output, original_image tf.Output, optional ...ResizeBilinearGradAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "DenseToSparseBatchDataset",
+		Type: "ResizeBilinearGrad",
 		Input: []tf.Input{
-			input_dataset, batch_size, row_shape,
+			grads, original_image,
 		},
 		Attrs: attrs,
 	}
@@ -5869,97 +5717,124 @@ func DenseToSparseBatchDataset(scope *Scope, input_dataset tf.Output, batch_size
 	return op.Output(0)
 }
 
-// Creates a dataset that batches and pads `batch_size` elements from the input.
+// Computes the sum along sparse segments of a tensor divided by the sqrt of N.
+//
+// N is the size of the segment being reduced.
+//
+// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+// segments.
 //
 // Arguments:
 //
-//	batch_size: A scalar representing the number of elements to accumulate in a
-// batch.
-//	padded_shapes: A list of int64 tensors representing the desired padded shapes
-// of the corresponding output components. These shapes may be partially
-// specified, using `-1` to indicate that a particular dimension should be
-// padded to the maximum size of all batch elements.
-//	padding_values: A list of scalars containing the padding value to use for
-// each of the outputs.
+//	indices: A 1-D tensor. Has same rank as `segment_ids`.
+//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
 //
-func PaddedBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, padded_shapes []tf.Output, padding_values []tf.Output, output_shapes []tf.Shape) (handle tf.Output) {
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SparseSegmentSqrtN(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "PaddedBatchDataset",
+		Type: "SparseSegmentSqrtN",
 		Input: []tf.Input{
-			input_dataset, batch_size, tf.OutputList(padded_shapes), tf.OutputList(padding_values),
+			data, indices, segment_ids,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// TensorArrayConcatV2Attr is an optional argument to TensorArrayConcatV2.
-type TensorArrayConcatV2Attr func(optionalAttr)
+// StatelessTruncatedNormalAttr is an optional argument to StatelessTruncatedNormal.
+type StatelessTruncatedNormalAttr func(optionalAttr)
 
-// TensorArrayConcatV2ElementShapeExcept0 sets the optional element_shape_except0 attribute to value.
-// If not specified, defaults to <unknown_rank:true >
-func TensorArrayConcatV2ElementShapeExcept0(value tf.Shape) TensorArrayConcatV2Attr {
+// StatelessTruncatedNormalDtype sets the optional dtype attribute to value.
+//
+// value: The type of the output.
+// If not specified, defaults to DT_FLOAT
+func StatelessTruncatedNormalDtype(value tf.DataType) StatelessTruncatedNormalAttr {
 	return func(m optionalAttr) {
-		m["element_shape_except0"] = value
+		m["dtype"] = value
 	}
 }
 
-// Deprecated. Use TensorArrayConcatV3
-func TensorArrayConcatV2(scope *Scope, handle tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayConcatV2Attr) (value tf.Output, lengths tf.Output) {
+// Outputs deterministic pseudorandom values from a truncated normal distribution.
+//
+// The generated values follow a normal distribution with mean 0 and standard
+// deviation 1, except that values whose magnitude is more than 2 standard
+// deviations from the mean are dropped and re-picked.
+//
+// The outputs are a deterministic function of `shape` and `seed`.
+//
+// Arguments:
+//	shape: The shape of the output tensor.
+//	seed: 2 seeds (shape [2]).
+//
+// Returns Random values with specified shape.
+func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessTruncatedNormalAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "TensorArrayConcatV2",
+		Type: "StatelessTruncatedNormal",
 		Input: []tf.Input{
-			handle, flow_in,
+			shape, seed,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
-// Converts the given variant tensor to an iterator and stores it in the given resource.
-//
-// Arguments:
-//	resource_handle: A handle to an iterator resource.
-//	serialized: A variant tensor storing the state of the iterator contained in the
-// resource.
+// RestoreSliceAttr is an optional argument to RestoreSlice.
+type RestoreSliceAttr func(optionalAttr)
+
+// RestoreSlicePreferredShard sets the optional preferred_shard attribute to value.
 //
-// Returns the created operation.
-func DeserializeIterator(scope *Scope, resource_handle tf.Output, serialized tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "DeserializeIterator",
-		Input: []tf.Input{
-			resource_handle, serialized,
-		},
+// value: Index of file to open first if multiple files match
+// `file_pattern`. See the documentation for `Restore`.
+// If not specified, defaults to -1
+func RestoreSlicePreferredShard(value int64) RestoreSliceAttr {
+	return func(m optionalAttr) {
+		m["preferred_shard"] = value
 	}
-	return scope.AddOperation(opspec)
 }
 
-// Records the latency of producing `input_dataset` elements in a StatsAggregator.
-func LatencyStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+// Restores a tensor from checkpoint files.
+//
+// This is like `Restore` except that restored tensor can be listed as filling
+// only a slice of a larger tensor.  `shape_and_slice` specifies the shape of the
+// larger tensor and the slice that the restored tensor covers.
+//
+// The `shape_and_slice` input has the same format as the
+// elements of the `shapes_and_slices` input of the `SaveSlices` op.
+//
+// Arguments:
+//	file_pattern: Must have a single element. The pattern of the files from
+// which we read the tensor.
+//	tensor_name: Must have a single element. The name of the tensor to be
+// restored.
+//	shape_and_slice: Scalar. The shapes and slice specifications to use when
+// restoring a tensors.
+//	dt: The type of the tensor to be restored.
+//
+// Returns The restored tensor.
+func RestoreSlice(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, shape_and_slice tf.Output, dt tf.DataType, optional ...RestoreSliceAttr) (tensor tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	attrs := map[string]interface{}{"dt": dt}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "LatencyStatsDataset",
+		Type: "RestoreSlice",
 		Input: []tf.Input{
-			input_dataset, tag,
+			file_pattern, tensor_name, shape_and_slice,
 		},
 		Attrs: attrs,
 	}
@@ -5967,41 +5842,96 @@ func LatencyStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, o
 	return op.Output(0)
 }
 
-// Concatenates tensors along one dimension.
+// UniqueWithCountsAttr is an optional argument to UniqueWithCounts.
+type UniqueWithCountsAttr func(optionalAttr)
+
+// UniqueWithCountsOutIdx sets the optional out_idx attribute to value.
+// If not specified, defaults to DT_INT32
+func UniqueWithCountsOutIdx(value tf.DataType) UniqueWithCountsAttr {
+	return func(m optionalAttr) {
+		m["out_idx"] = value
+	}
+}
+
+// Finds unique elements in a 1-D tensor.
+//
+// This operation returns a tensor `y` containing all of the unique elements of `x`
+// sorted in the same order that they occur in `x`. This operation also returns a
+// tensor `idx` the same size as `x` that contains the index of each value of `x`
+// in the unique output `y`. Finally, it returns a third tensor `count` that
+// contains the count of each element of `y` in `x`. In other words:
+//
+// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
+//
+// For example:
+//
+// ```
+// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
+// y, idx, count = unique_with_counts(x)
+// y ==> [1, 2, 4, 7, 8]
+// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
+// count ==> [2, 1, 3, 1, 2]
+// ```
 //
 // Arguments:
-//	values: List of `N` Tensors to concatenate. Their ranks and types must match,
-// and their sizes must match in all dimensions except `concat_dim`.
-//	axis: 0-D.  The dimension along which to concatenate.  Must be in the
-// range [-rank(values), rank(values)).
+//	x: 1-D.
 //
-// Returns A `Tensor` with the concatenation of values stacked along the
-// `concat_dim` dimension.  This tensor's shape matches that of `values` except
-// in `concat_dim` where it has the sum of the sizes.
-func ConcatV2(scope *Scope, values []tf.Output, axis tf.Output) (output tf.Output) {
+// Returns 1-D.1-D.1-D.
+func UniqueWithCounts(scope *Scope, x tf.Output, optional ...UniqueWithCountsAttr) (y tf.Output, idx tf.Output, count tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "ConcatV2",
+		Type: "UniqueWithCounts",
 		Input: []tf.Input{
-			tf.OutputList(values), axis,
+			x,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Creates a dataset that contains the elements of `input_dataset` ignoring errors.
-func IgnoreErrorsDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+// StatelessRandomNormalAttr is an optional argument to StatelessRandomNormal.
+type StatelessRandomNormalAttr func(optionalAttr)
+
+// StatelessRandomNormalDtype sets the optional dtype attribute to value.
+//
+// value: The type of the output.
+// If not specified, defaults to DT_FLOAT
+func StatelessRandomNormalDtype(value tf.DataType) StatelessRandomNormalAttr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
+	}
+}
+
+// Outputs deterministic pseudorandom values from a normal distribution.
+//
+// The generated values will have mean 0 and standard deviation 1.
+//
+// The outputs are a deterministic function of `shape` and `seed`.
+//
+// Arguments:
+//	shape: The shape of the output tensor.
+//	seed: 2 seeds (shape [2]).
+//
+// Returns Random values with specified shape.
+func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomNormalAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "IgnoreErrorsDataset",
+		Type: "StatelessRandomNormal",
 		Input: []tf.Input{
-			input_dataset,
+			shape, seed,
 		},
 		Attrs: attrs,
 	}
@@ -6009,181 +5939,192 @@ func IgnoreErrorsDataset(scope *Scope, input_dataset tf.Output, output_types []t
 	return op.Output(0)
 }
 
-// Creates a dataset that concatenates `input_dataset` with `another_dataset`.
-func ConcatenateDataset(scope *Scope, input_dataset tf.Output, another_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+// Reshapes a quantized tensor as per the Reshape op.
+//
+// ```
+//
+// Arguments:
+//
+//	shape: Defines the shape of the output tensor.
+//	input_min: The minimum value of the input.
+//	input_max: The maximum value of the input.
+//
+// Returns This value is copied from input_min.This value is copied from input_max.
+func QuantizedReshape(scope *Scope, tensor tf.Output, shape tf.Output, input_min tf.Output, input_max tf.Output) (output tf.Output, output_min tf.Output, output_max tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "ConcatenateDataset",
+		Type: "QuantizedReshape",
 		Input: []tf.Input{
-			input_dataset, another_dataset,
+			tensor, shape, input_min, input_max,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Creates a dataset that splits a SparseTensor into elements row-wise.
-func SparseTensorSliceDataset(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output) (handle tf.Output) {
+// GatherAttr is an optional argument to Gather.
+type GatherAttr func(optionalAttr)
+
+// GatherValidateIndices sets the optional validate_indices attribute to value.
+// If not specified, defaults to true
+func GatherValidateIndices(value bool) GatherAttr {
+	return func(m optionalAttr) {
+		m["validate_indices"] = value
+	}
+}
+
+// Gather slices from `params` according to `indices`.
+//
+// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
+// Produces an output tensor with shape `indices.shape + params.shape[1:]` where:
+//
+// ```python
+//     # Scalar indices
+//     output[:, ..., :] = params[indices, :, ... :]
+//
+//     # Vector indices
+//     output[i, :, ..., :] = params[indices[i], :, ... :]
+//
+//     # Higher rank indices
+//     output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :]
+// ```
+//
+// If `indices` is a permutation and `len(indices) == params.shape[0]` then
+// this operation will permute `params` accordingly.
+//
+// `validate_indices`: DEPRECATED. If this operation is assigned to CPU, values in
+// `indices` are always validated to be within range. If assigned to GPU,
+// out-of-bound indices result in safe but unspecified behavior, which may include
+// raising an error.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/Gather.png" alt>
+// </div>
+func Gather(scope *Scope, params tf.Output, indices tf.Output, optional ...GatherAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "SparseTensorSliceDataset",
+		Type: "Gather",
 		Input: []tf.Input{
-			indices, values, dense_shape,
+			params, indices,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Reshapes a tensor.
-//
-// Given `tensor`, this operation returns a tensor that has the same values
-// as `tensor` with shape `shape`.
-//
-// If one component of `shape` is the special value -1, the size of that dimension
-// is computed so that the total size remains constant.  In particular, a `shape`
-// of `[-1]` flattens into 1-D.  At most one component of `shape` can be -1.
-//
-// If `shape` is 1-D or higher, then the operation returns a tensor with shape
-// `shape` filled with the values of `tensor`. In this case, the number of elements
-// implied by `shape` must be the same as the number of elements in `tensor`.
-//
-// For example:
-//
-// ```
-// # tensor 't' is [1, 2, 3, 4, 5, 6, 7, 8, 9]
-// # tensor 't' has shape [9]
-// reshape(t, [3, 3]) ==> [[1, 2, 3],
-//                         [4, 5, 6],
-//                         [7, 8, 9]]
-//
-// # tensor 't' is [[[1, 1], [2, 2]],
-// #                [[3, 3], [4, 4]]]
-// # tensor 't' has shape [2, 2, 2]
-// reshape(t, [2, 4]) ==> [[1, 1, 2, 2],
-//                         [3, 3, 4, 4]]
-//
-// # tensor 't' is [[[1, 1, 1],
-// #                 [2, 2, 2]],
-// #                [[3, 3, 3],
-// #                 [4, 4, 4]],
-// #                [[5, 5, 5],
-// #                 [6, 6, 6]]]
-// # tensor 't' has shape [3, 2, 3]
-// # pass '[-1]' to flatten 't'
-// reshape(t, [-1]) ==> [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6]
-//
-// # -1 can also be used to infer the shape
-//
-// # -1 is inferred to be 9:
-// reshape(t, [2, -1]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3],
-//                          [4, 4, 4, 5, 5, 5, 6, 6, 6]]
-// # -1 is inferred to be 2:
-// reshape(t, [-1, 9]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3],
-//                          [4, 4, 4, 5, 5, 5, 6, 6, 6]]
-// # -1 is inferred to be 3:
-// reshape(t, [ 2, -1, 3]) ==> [[[1, 1, 1],
-//                               [2, 2, 2],
-//                               [3, 3, 3]],
-//                              [[4, 4, 4],
-//                               [5, 5, 5],
-//                               [6, 6, 6]]]
-//
-// # tensor 't' is [7]
-// # shape `[]` reshapes to a scalar
-// reshape(t, []) ==> 7
-// ```
-//
-// Arguments:
+// Returns the truth value of (x != y) element-wise.
 //
-//	shape: Defines the shape of the output tensor.
-func Reshape(scope *Scope, tensor tf.Output, shape tf.Output) (output tf.Output) {
+// *NOTE*: `NotEqual` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func NotEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Reshape",
+		Type: "NotEqual",
 		Input: []tf.Input{
-			tensor, shape,
+			x, y,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Greedily selects a subset of bounding boxes in descending order of score,
+// Inverse 3D real-valued fast Fourier transform.
 //
-// pruning away boxes that have high intersection-over-union (IOU) overlap
-// with previously selected boxes.  Bounding boxes are supplied as
-// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
-// diagonal pair of box corners and the coordinates can be provided as normalized
-// (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
-// is agnostic to where the origin is in the coordinate system.  Note that this
-// algorithm is invariant to orthogonal transformations and translations
-// of the coordinate system; thus translating or reflections of the coordinate
-// system result in the same boxes being selected by the algorithm.
+// Computes the inverse 3-dimensional discrete Fourier transform of a real-valued
+// signal over the inner-most 3 dimensions of `input`.
 //
-// The output of this operation is a set of integers indexing into the input
-// collection of bounding boxes representing the selected boxes.  The bounding
-// box coordinates corresponding to the selected indices can then be obtained
-// using the `tf.gather operation`.  For example:
+// The inner-most 3 dimensions of `input` are assumed to be the result of `RFFT3D`:
+// The inner-most dimension contains the `fft_length / 2 + 1` unique components of
+// the DFT of a real-valued signal. If `fft_length` is not provided, it is computed
+// from the size of the inner-most 3 dimensions of `input`. If the FFT length used
+// to compute `input` is odd, it should be provided since it cannot be inferred
+// properly.
 //
-//   selected_indices = tf.image.non_max_suppression_v2(
-//       boxes, scores, max_output_size, iou_threshold)
-//   selected_boxes = tf.gather(boxes, selected_indices)
+// Along each axis `IRFFT3D` is computed on, if `fft_length` (or
+// `fft_length / 2 + 1` for the inner-most dimension) is smaller than the
+// corresponding dimension of `input`, the dimension is cropped. If it is larger,
+// the dimension is padded with zeros.
 //
 // Arguments:
-//	boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
-//	scores: A 1-D float tensor of shape `[num_boxes]` representing a single
-// score corresponding to each box (each row of boxes).
-//	max_output_size: A scalar integer tensor representing the maximum number of
-// boxes to be selected by non max suppression.
-//	iou_threshold: A 0-D float tensor representing the threshold for deciding whether
-// boxes overlap too much with respect to IOU.
+//	input: A complex64 tensor.
+//	fft_length: An int32 tensor of shape [3]. The FFT length for each dimension.
 //
-// Returns A 1-D integer tensor of shape `[M]` representing the selected
-// indices from the boxes tensor, where `M <= max_output_size`.
-func NonMaxSuppressionV2(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output) (selected_indices tf.Output) {
+// Returns A float32 tensor of the same rank as `input`. The inner-most 3
+//   dimensions of `input` are replaced with the `fft_length` samples of their
+//   inverse 3D real Fourier transform.
+//
+// @compatibility(numpy)
+// Equivalent to np.irfftn with 3 dimensions.
+// @end_compatibility
+func IRFFT3D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "NonMaxSuppressionV2",
+		Type: "IRFFT3D",
 		Input: []tf.Input{
-			boxes, scores, max_output_size, iou_threshold,
+			input, fft_length,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// StatsAggregatorHandleAttr is an optional argument to StatsAggregatorHandle.
-type StatsAggregatorHandleAttr func(optionalAttr)
-
-// StatsAggregatorHandleContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func StatsAggregatorHandleContainer(value string) StatsAggregatorHandleAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
+// StringSplitAttr is an optional argument to StringSplit.
+type StringSplitAttr func(optionalAttr)
 
-// StatsAggregatorHandleSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func StatsAggregatorHandleSharedName(value string) StatsAggregatorHandleAttr {
+// StringSplitSkipEmpty sets the optional skip_empty attribute to value.
+//
+// value: A `bool`. If `True`, skip the empty strings from the result.
+// If not specified, defaults to true
+func StringSplitSkipEmpty(value bool) StringSplitAttr {
 	return func(m optionalAttr) {
-		m["shared_name"] = value
+		m["skip_empty"] = value
 	}
 }
 
-// Creates a statistics manager resource.
-func StatsAggregatorHandle(scope *Scope, optional ...StatsAggregatorHandleAttr) (handle tf.Output) {
+// Split elements of `input` based on `delimiter` into a `SparseTensor`.
+//
+// Let N be the size of source (typically N will be the batch size). Split each
+// element of `input` based on `delimiter` and return a `SparseTensor`
+// containing the splitted tokens. Empty tokens are ignored.
+//
+// `delimiter` can be empty, or a string of split characters. If `delimiter` is an
+//  empty string, each element of `input` is split into individual single-byte
+//  character strings, including splitting of UTF-8 multibyte sequences. Otherwise
+//  every character of `delimiter` is a potential split point.
+//
+// For example:
+//   N = 2, input[0] is 'hello world' and input[1] is 'a b c', then the output
+//   will be
+//
+//   indices = [0, 0;
+//              0, 1;
+//              1, 0;
+//              1, 1;
+//              1, 2]
+//   shape = [2, 3]
+//   values = ['hello', 'world', 'a', 'b', 'c']
+//
+// Arguments:
+//	input: 1-D. Strings to split.
+//	delimiter: 0-D. Delimiter characters (bytes), or empty string.
+//
+// Returns A dense matrix of int64 representing the indices of the sparse tensor.A vector of strings corresponding to the splited values.a length-2 vector of int64 representing the shape of the sparse
+// tensor, where the first value is N and the second value is the maximum number
+// of tokens in a single input entry.
+func StringSplit(scope *Scope, input tf.Output, delimiter tf.Output, optional ...StringSplitAttr) (indices tf.Output, values tf.Output, shape tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -6192,49 +6133,54 @@ func StatsAggregatorHandle(scope *Scope, optional ...StatsAggregatorHandleAttr)
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "StatsAggregatorHandle",
-
+		Type: "StringSplit",
+		Input: []tf.Input{
+			input, delimiter,
+		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// CropAndResizeGradBoxesAttr is an optional argument to CropAndResizeGradBoxes.
-type CropAndResizeGradBoxesAttr func(optionalAttr)
+// WriteAudioSummaryAttr is an optional argument to WriteAudioSummary.
+type WriteAudioSummaryAttr func(optionalAttr)
 
-// CropAndResizeGradBoxesMethod sets the optional method attribute to value.
+// WriteAudioSummaryMaxOutputs sets the optional max_outputs attribute to value.
 //
-// value: A string specifying the interpolation method. Only 'bilinear' is
-// supported for now.
-// If not specified, defaults to "bilinear"
-func CropAndResizeGradBoxesMethod(value string) CropAndResizeGradBoxesAttr {
+// value: Max number of batch elements to generate audio for.
+// If not specified, defaults to 3
+//
+// REQUIRES: value >= 1
+func WriteAudioSummaryMaxOutputs(value int64) WriteAudioSummaryAttr {
 	return func(m optionalAttr) {
-		m["method"] = value
+		m["max_outputs"] = value
 	}
 }
 
-// Computes the gradient of the crop_and_resize op wrt the input boxes tensor.
+// Writes a `Summary` protocol buffer with audio.
+//
+// The summary has up to `max_outputs` summary values containing audio. The
+// audio is built from `tensor` which must be 3-D with shape `[batch_size,
+// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are
+// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`.
+//
+// The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
+// build the `tag` of the summary values:
+//
+// *  If `max_outputs` is 1, the summary value tag is '*tag*/audio'.
+// *  If `max_outputs` is greater than 1, the summary value tags are
+//    generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc.
 //
 // Arguments:
-//	grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
-//	image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
-// Both `image_height` and `image_width` need to be positive.
-//	boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
-// specifies the coordinates of a box in the `box_ind[i]` image and is specified
-// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
-// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
-// `[0, 1]` interval of normalized image height is mapped to
-// `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in
-// which case the sampled crop is an up-down flipped version of the original
-// image. The width dimension is treated similarly. Normalized coordinates
-// outside the `[0, 1]` range are allowed, in which case we use
-// `extrapolation_value` to extrapolate the input image values.
-//	box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
-// The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
+//	writer: A handle to a summary writer.
+//	step: The step to write the summary for.
+//	tag: Scalar. Used to build the `tag` attribute of the summary values.
+//	tensor: 2-D of shape `[batch_size, frames]`.
+//	sample_rate: The sample rate of the signal in hertz.
 //
-// Returns A 2-D tensor of shape `[num_boxes, 4]`.
-func CropAndResizeGradBoxes(scope *Scope, grads tf.Output, image tf.Output, boxes tf.Output, box_ind tf.Output, optional ...CropAndResizeGradBoxesAttr) (output tf.Output) {
+// Returns the created operation.
+func WriteAudioSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...WriteAudioSummaryAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -6243,58 +6189,53 @@ func CropAndResizeGradBoxes(scope *Scope, grads tf.Output, image tf.Output, boxe
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "CropAndResizeGradBoxes",
+		Type: "WriteAudioSummary",
 		Input: []tf.Input{
-			grads, image, boxes, box_ind,
+			writer, step, tag, tensor, sample_rate,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// ShuffleDatasetAttr is an optional argument to ShuffleDataset.
-type ShuffleDatasetAttr func(optionalAttr)
+// ProdAttr is an optional argument to Prod.
+type ProdAttr func(optionalAttr)
 
-// ShuffleDatasetReshuffleEachIteration sets the optional reshuffle_each_iteration attribute to value.
+// ProdKeepDims sets the optional keep_dims attribute to value.
 //
-// value: If true, each iterator over this dataset will be given
-// a different pseudorandomly generated seed, based on a sequence seeded by the
-// `seed` and `seed2` inputs. If false, each iterator will be given the same
-// seed, and repeated iteration over this dataset will yield the exact same
-// sequence of results.
-// If not specified, defaults to true
-func ShuffleDatasetReshuffleEachIteration(value bool) ShuffleDatasetAttr {
+// value: If true, retain reduced dimensions with length 1.
+// If not specified, defaults to false
+func ProdKeepDims(value bool) ProdAttr {
 	return func(m optionalAttr) {
-		m["reshuffle_each_iteration"] = value
+		m["keep_dims"] = value
 	}
 }
 
-// Creates a dataset that shuffles elements from `input_dataset` pseudorandomly.
-//
-// Arguments:
+// Computes the product of elements across dimensions of a tensor.
 //
-//	buffer_size: The number of output elements to buffer in an iterator over
-// this dataset. Compare with the `min_after_dequeue` attr when creating a
-// `RandomShuffleQueue`.
-//	seed: A scalar seed for the random number generator. If either seed or
-// seed2 is set to be non-zero, the random number generator is seeded
-// by the given seed.  Otherwise, a random seed is used.
-//	seed2: A second scalar seed to avoid seed collision.
+// Reduces `input` along the dimensions given in `axis`. Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `axis`. If `keep_dims` is true, the reduced dimensions are
+// retained with length 1.
 //
+// Arguments:
+//	input: The tensor to reduce.
+//	axis: The dimensions to reduce. Must be in the range
+// `[-rank(input), rank(input))`.
 //
-func ShuffleDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, seed tf.Output, seed2 tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ShuffleDatasetAttr) (handle tf.Output) {
+// Returns The reduced tensor.
+func Prod(scope *Scope, input tf.Output, axis tf.Output, optional ...ProdAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ShuffleDataset",
+		Type: "Prod",
 		Input: []tf.Input{
-			input_dataset, buffer_size, seed, seed2,
+			input, axis,
 		},
 		Attrs: attrs,
 	}
@@ -6302,54 +6243,44 @@ func ShuffleDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output
 	return op.Output(0)
 }
 
-// CropAndResizeGradImageAttr is an optional argument to CropAndResizeGradImage.
-type CropAndResizeGradImageAttr func(optionalAttr)
+// ResizeBilinearAttr is an optional argument to ResizeBilinear.
+type ResizeBilinearAttr func(optionalAttr)
 
-// CropAndResizeGradImageMethod sets the optional method attribute to value.
+// ResizeBilinearAlignCorners sets the optional align_corners attribute to value.
 //
-// value: A string specifying the interpolation method. Only 'bilinear' is
-// supported for now.
-// If not specified, defaults to "bilinear"
-func CropAndResizeGradImageMethod(value string) CropAndResizeGradImageAttr {
+// value: If true, rescale input by (new_height - 1) / (height - 1), which
+// exactly aligns the 4 corners of images and resized images. If false, rescale
+// by new_height / height. Treat similarly the width dimension.
+// If not specified, defaults to false
+func ResizeBilinearAlignCorners(value bool) ResizeBilinearAttr {
 	return func(m optionalAttr) {
-		m["method"] = value
+		m["align_corners"] = value
 	}
 }
 
-// Computes the gradient of the crop_and_resize op wrt the input image tensor.
+// Resize `images` to `size` using bilinear interpolation.
 //
-// Arguments:
-//	grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
-//	boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
-// specifies the coordinates of a box in the `box_ind[i]` image and is specified
-// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
-// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
-// `[0, 1]` interval of normalized image height is mapped to
-// `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in
-// which case the sampled crop is an up-down flipped version of the original
-// image. The width dimension is treated similarly. Normalized coordinates
-// outside the `[0, 1]` range are allowed, in which case we use
-// `extrapolation_value` to extrapolate the input image values.
-//	box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
-// The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
-//	image_size: A 1-D tensor with value `[batch, image_height, image_width, depth]`
-// containing the original image size. Both `image_height` and `image_width` need
-// to be positive.
+// Input images can be of different types but output images are always float.
 //
+// Arguments:
+//	images: 4-D with shape `[batch, height, width, channels]`.
+//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
+// new size for the images.
 //
-// Returns A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
-func CropAndResizeGradImage(scope *Scope, grads tf.Output, boxes tf.Output, box_ind tf.Output, image_size tf.Output, T tf.DataType, optional ...CropAndResizeGradImageAttr) (output tf.Output) {
+// Returns 4-D with shape
+// `[batch, new_height, new_width, channels]`.
+func ResizeBilinear(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBilinearAttr) (resized_images tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"T": T}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "CropAndResizeGradImage",
+		Type: "ResizeBilinear",
 		Input: []tf.Input{
-			grads, boxes, box_ind, image_size,
+			images, size,
 		},
 		Attrs: attrs,
 	}
@@ -6357,240 +6288,189 @@ func CropAndResizeGradImage(scope *Scope, grads tf.Output, boxes tf.Output, box_
 	return op.Output(0)
 }
 
-// A container for an iterator resource.
-//
-// Returns A handle to the iterator that can be passed to a "MakeIterator"
-// or "IteratorGetNext" op.
-func Iterator(scope *Scope, shared_name string, container string, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+// Computes softsign: `features / (abs(features) + 1)`.
+func Softsign(scope *Scope, features tf.Output) (activations tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"shared_name": shared_name, "container": container, "output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "Iterator",
-
-		Attrs: attrs,
+		Type: "Softsign",
+		Input: []tf.Input{
+			features,
+		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// ExtractGlimpseAttr is an optional argument to ExtractGlimpse.
-type ExtractGlimpseAttr func(optionalAttr)
+// GenerateVocabRemappingAttr is an optional argument to GenerateVocabRemapping.
+type GenerateVocabRemappingAttr func(optionalAttr)
 
-// ExtractGlimpseCentered sets the optional centered attribute to value.
+// GenerateVocabRemappingOldVocabSize sets the optional old_vocab_size attribute to value.
 //
-// value: indicates if the offset coordinates are centered relative to
-// the image, in which case the (0, 0) offset is relative to the center
-// of the input images. If false, the (0,0) offset corresponds to the
-// upper left corner of the input images.
-// If not specified, defaults to true
-func ExtractGlimpseCentered(value bool) ExtractGlimpseAttr {
-	return func(m optionalAttr) {
-		m["centered"] = value
-	}
-}
-
-// ExtractGlimpseNormalized sets the optional normalized attribute to value.
+// value: Number of entries in the old vocab file to consider.  If -1,
+// use the entire old vocabulary.
+// If not specified, defaults to -1
 //
-// value: indicates if the offset coordinates are normalized.
-// If not specified, defaults to true
-func ExtractGlimpseNormalized(value bool) ExtractGlimpseAttr {
+// REQUIRES: value >= -1
+func GenerateVocabRemappingOldVocabSize(value int64) GenerateVocabRemappingAttr {
 	return func(m optionalAttr) {
-		m["normalized"] = value
+		m["old_vocab_size"] = value
 	}
 }
 
-// ExtractGlimpseUniformNoise sets the optional uniform_noise attribute to value.
+// Given a path to new and old vocabulary files, returns a remapping Tensor of
 //
-// value: indicates if the noise should be generated using a
-// uniform distribution or a Gaussian distribution.
-// If not specified, defaults to true
-func ExtractGlimpseUniformNoise(value bool) ExtractGlimpseAttr {
-	return func(m optionalAttr) {
-		m["uniform_noise"] = value
-	}
-}
-
-// Extracts a glimpse from the input tensor.
+// length `num_new_vocab`, where `remapping[i]` contains the row number in the old
+// vocabulary that corresponds to row `i` in the new vocabulary (starting at line
+// `new_vocab_offset` and up to `num_new_vocab` entities), or `-1` if entry `i`
+// in the new vocabulary is not in the old vocabulary.  The old vocabulary is
+// constrained to the first `old_vocab_size` entries if `old_vocab_size` is not the
+// default value of -1.
 //
-// Returns a set of windows called glimpses extracted at location
-// `offsets` from the input tensor. If the windows only partially
-// overlaps the inputs, the non overlapping areas will be filled with
-// random noise.
+// `num_vocab_offset` enables
+// use in the partitioned variable case, and should generally be set through
+// examining partitioning info.  The format of the files should be a text file,
+// with each line containing a single entity within the vocabulary.
 //
-// The result is a 4-D tensor of shape `[batch_size, glimpse_height,
-// glimpse_width, channels]`. The channels and batch dimensions are the
-// same as that of the input tensor. The height and width of the output
-// windows are specified in the `size` parameter.
+// For example, with `new_vocab_file` a text file containing each of the following
+// elements on a single line: `[f0, f1, f2, f3]`, old_vocab_file = [f1, f0, f3],
+// `num_new_vocab = 3, new_vocab_offset = 1`, the returned remapping would be
+// `[0, -1, 2]`.
 //
-// The argument `normalized` and `centered` controls how the windows are built:
+// The op also returns a count of how many entries in the new vocabulary
+// were present in the old vocabulary, which is used to calculate the number of
+// values to initialize in a weight matrix remapping
 //
-// * If the coordinates are normalized but not centered, 0.0 and 1.0
-//   correspond to the minimum and maximum of each height and width
-//   dimension.
-// * If the coordinates are both normalized and centered, they range from
-//   -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper
-//   left corner, the lower right corner is located at (1.0, 1.0) and the
-//   center is at (0, 0).
-// * If the coordinates are not normalized they are interpreted as
-//   numbers of pixels.
+// This functionality can be used to remap both row vocabularies (typically,
+// features) and column vocabularies (typically, classes) from TensorFlow
+// checkpoints.  Note that the partitioning logic relies on contiguous vocabularies
+// corresponding to div-partitioned variables.  Moreover, the underlying remapping
+// uses an IndexTable (as opposed to an inexact CuckooTable), so client code should
+// use the corresponding index_table_from_file() as the FeatureColumn framework
+// does (as opposed to tf.feature_to_id(), which uses a CuckooTable).
 //
 // Arguments:
-//	input: A 4-D float tensor of shape `[batch_size, height, width, channels]`.
-//	size: A 1-D tensor of 2 elements containing the size of the glimpses
-// to extract.  The glimpse height must be specified first, following
-// by the glimpse width.
-//	offsets: A 2-D integer tensor of shape `[batch_size, 2]` containing
-// the y, x locations of the center of each window.
+//	new_vocab_file: Path to the new vocab file.
+//	old_vocab_file: Path to the old vocab file.
+//	new_vocab_offset: How many entries into the new vocab file to start reading.
+//	num_new_vocab: Number of entries in the new vocab file to remap.
 //
-// Returns A tensor representing the glimpses `[batch_size,
-// glimpse_height, glimpse_width, channels]`.
-func ExtractGlimpse(scope *Scope, input tf.Output, size tf.Output, offsets tf.Output, optional ...ExtractGlimpseAttr) (glimpse tf.Output) {
+// Returns A Tensor of length num_new_vocab where the element at index i
+// is equal to the old ID that maps to the new ID i.  This element is -1 for any
+// new ID that is not found in the old vocabulary.Number of new vocab entries found in old vocab.
+func GenerateVocabRemapping(scope *Scope, new_vocab_file tf.Output, old_vocab_file tf.Output, new_vocab_offset int64, num_new_vocab int64, optional ...GenerateVocabRemappingAttr) (remapping tf.Output, num_present tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"new_vocab_offset": new_vocab_offset, "num_new_vocab": num_new_vocab}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ExtractGlimpse",
+		Type: "GenerateVocabRemapping",
+		Input: []tf.Input{
+			new_vocab_file, old_vocab_file,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Assigns sparse updates to the variable referenced by `resource`.
+//
+// This operation computes
+//
+//     # Scalar indices
+//     ref[indices, ...] = updates[...]
+//
+//     # Vector indices (for each i)
+//     ref[indices[i], ...] = updates[i, ...]
+//
+//     # High rank indices (for each i, ..., j)
+//     ref[indices[i, ..., j], ...] = updates[i, ..., j, ...]
+//
+// Arguments:
+//	resource: Should be from a `Variable` node.
+//	indices: A tensor of indices into the first dimension of `ref`.
+//	updates: A tensor of updated values to add to `ref`.
+//
+// Returns the created operation.
+func ResourceScatterUpdate(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceScatterUpdate",
 		Input: []tf.Input{
-			input, size, offsets,
+			resource, indices, updates,
 		},
-		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// SampleDistortedBoundingBoxV2Attr is an optional argument to SampleDistortedBoundingBoxV2.
-type SampleDistortedBoundingBoxV2Attr func(optionalAttr)
+// CumsumAttr is an optional argument to Cumsum.
+type CumsumAttr func(optionalAttr)
 
-// SampleDistortedBoundingBoxV2Seed sets the optional seed attribute to value.
+// CumsumExclusive sets the optional exclusive attribute to value.
 //
-// value: If either `seed` or `seed2` are set to non-zero, the random number
-// generator is seeded by the given `seed`.  Otherwise, it is seeded by a random
-// seed.
-// If not specified, defaults to 0
-func SampleDistortedBoundingBoxV2Seed(value int64) SampleDistortedBoundingBoxV2Attr {
+// value: If `True`, perform exclusive cumsum.
+// If not specified, defaults to false
+func CumsumExclusive(value bool) CumsumAttr {
 	return func(m optionalAttr) {
-		m["seed"] = value
+		m["exclusive"] = value
 	}
 }
 
-// SampleDistortedBoundingBoxV2Seed2 sets the optional seed2 attribute to value.
+// CumsumReverse sets the optional reverse attribute to value.
 //
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func SampleDistortedBoundingBoxV2Seed2(value int64) SampleDistortedBoundingBoxV2Attr {
+// value: A `bool` (default: False).
+// If not specified, defaults to false
+func CumsumReverse(value bool) CumsumAttr {
 	return func(m optionalAttr) {
-		m["seed2"] = value
+		m["reverse"] = value
 	}
 }
 
-// SampleDistortedBoundingBoxV2AspectRatioRange sets the optional aspect_ratio_range attribute to value.
+// Compute the cumulative sum of the tensor `x` along `axis`.
 //
-// value: The cropped area of the image must have an aspect ratio =
-// width / height within this range.
-// If not specified, defaults to <f:0.75 f:1.33 >
-func SampleDistortedBoundingBoxV2AspectRatioRange(value []float32) SampleDistortedBoundingBoxV2Attr {
-	return func(m optionalAttr) {
-		m["aspect_ratio_range"] = value
-	}
-}
-
-// SampleDistortedBoundingBoxV2AreaRange sets the optional area_range attribute to value.
+// By default, this op performs an inclusive cumsum, which means that the first
+// element of the input is identical to the first element of the output:
 //
-// value: The cropped area of the image must contain a fraction of the
-// supplied image within in this range.
-// If not specified, defaults to <f:0.05 f:1 >
-func SampleDistortedBoundingBoxV2AreaRange(value []float32) SampleDistortedBoundingBoxV2Attr {
-	return func(m optionalAttr) {
-		m["area_range"] = value
-	}
-}
-
-// SampleDistortedBoundingBoxV2MaxAttempts sets the optional max_attempts attribute to value.
+// ```python
+// tf.cumsum([a, b, c])  # => [a, a + b, a + b + c]
+// ```
 //
-// value: Number of attempts at generating a cropped region of the image
-// of the specified constraints. After `max_attempts` failures, return the entire
-// image.
-// If not specified, defaults to 100
-func SampleDistortedBoundingBoxV2MaxAttempts(value int64) SampleDistortedBoundingBoxV2Attr {
-	return func(m optionalAttr) {
-		m["max_attempts"] = value
-	}
-}
-
-// SampleDistortedBoundingBoxV2UseImageIfNoBoundingBoxes sets the optional use_image_if_no_bounding_boxes attribute to value.
+// By setting the `exclusive` kwarg to `True`, an exclusive cumsum is
+// performed instead:
 //
-// value: Controls behavior if no bounding boxes supplied.
-// If true, assume an implicit bounding box covering the whole input. If false,
-// raise an error.
-// If not specified, defaults to false
-func SampleDistortedBoundingBoxV2UseImageIfNoBoundingBoxes(value bool) SampleDistortedBoundingBoxV2Attr {
-	return func(m optionalAttr) {
-		m["use_image_if_no_bounding_boxes"] = value
-	}
-}
-
-// Generate a single randomly distorted bounding box for an image.
+// ```python
+// tf.cumsum([a, b, c], exclusive=True)  # => [0, a, a + b]
+// ```
 //
-// Bounding box annotations are often supplied in addition to ground-truth labels
-// in image recognition or object localization tasks. A common technique for
-// training such a system is to randomly distort an image while preserving
-// its content, i.e. *data augmentation*. This Op outputs a randomly distorted
-// localization of an object, i.e. bounding box, given an `image_size`,
-// `bounding_boxes` and a series of constraints.
+// By setting the `reverse` kwarg to `True`, the cumsum is performed in the
+// opposite direction:
 //
-// The output of this Op is a single bounding box that may be used to crop the
-// original image. The output is returned as 3 tensors: `begin`, `size` and
-// `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
-// image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize
-// what the bounding box looks like.
+// ```python
+// tf.cumsum([a, b, c], reverse=True)  # => [a + b + c, b + c, c]
+// ```
 //
-// Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The
-// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
-// height of the underlying image.
+// This is more efficient than using separate `tf.reverse` ops.
 //
-// For example,
+// The `reverse` and `exclusive` kwargs can also be combined:
 //
 // ```python
-//     # Generate a single distorted bounding box.
-//     begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
-//         tf.shape(image),
-//         bounding_boxes=bounding_boxes)
-//
-//     # Draw the bounding box in an image summary.
-//     image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
-//                                                   bbox_for_draw)
-//     tf.image_summary('images_with_box', image_with_box)
-//
-//     # Employ the bounding box to distort the image.
-//     distorted_image = tf.slice(image, begin, size)
+// tf.cumsum([a, b, c], exclusive=True, reverse=True)  # => [b + c, c, 0]
 // ```
 //
-// Note that if no bounding box information is available, setting
-// `use_image_if_no_bounding_boxes = true` will assume there is a single implicit
-// bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
-// false and no bounding boxes are supplied, an error is raised.
-//
 // Arguments:
-//	image_size: 1-D, containing `[height, width, channels]`.
-//	bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes
-// associated with the image.
-//	min_object_covered: The cropped area of the image must contain at least this
-// fraction of any bounding box supplied. The value of this parameter should be
-// non-negative. In the case of 0, the cropped area does not need to overlap
-// any of the bounding boxes supplied.
-//
-// Returns 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to
-// `tf.slice`.1-D, containing `[target_height, target_width, -1]`. Provide as input to
-// `tf.slice`.3-D with shape `[1, 1, 4]` containing the distorted bounding box.
-// Provide as input to `tf.image.draw_bounding_boxes`.
-func SampleDistortedBoundingBoxV2(scope *Scope, image_size tf.Output, bounding_boxes tf.Output, min_object_covered tf.Output, optional ...SampleDistortedBoundingBoxV2Attr) (begin tf.Output, size tf.Output, bboxes tf.Output) {
+//	x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
+// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
+// `complex128`, `qint8`, `quint8`, `qint32`, `half`.
+//	axis: A `Tensor` of type `int32` (default: 0). Must be in the range
+// `[-rank(x), rank(x))`.
+func Cumsum(scope *Scope, x tf.Output, axis tf.Output, optional ...CumsumAttr) (out tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -6599,343 +6479,312 @@ func SampleDistortedBoundingBoxV2(scope *Scope, image_size tf.Output, bounding_b
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "SampleDistortedBoundingBoxV2",
+		Type: "Cumsum",
 		Input: []tf.Input{
-			image_size, bounding_boxes, min_object_covered,
+			x, axis,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// Draw bounding boxes on a batch of images.
-//
-// Outputs a copy of `images` but draws on top of the pixels zero or more bounding
-// boxes specified by the locations in `boxes`. The coordinates of the each
-// bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. The
-// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
-// height of the underlying image.
-//
-// For example, if an image is 100 x 200 pixels (height x width) and the bounding
-// box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of
-// the bounding box will be `(40, 10)` to `(100, 50)` (in (x,y) coordinates).
-//
-// Parts of the bounding box may fall outside the image.
+// QuantizedRelu6Attr is an optional argument to QuantizedRelu6.
+type QuantizedRelu6Attr func(optionalAttr)
+
+// QuantizedRelu6OutType sets the optional out_type attribute to value.
+// If not specified, defaults to DT_QUINT8
+func QuantizedRelu6OutType(value tf.DataType) QuantizedRelu6Attr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Computes Quantized Rectified Linear 6: `min(max(features, 0), 6)`
 //
 // Arguments:
-//	images: 4-D with shape `[batch, height, width, depth]`. A batch of images.
-//	boxes: 3-D with shape `[batch, num_bounding_boxes, 4]` containing bounding
-// boxes.
 //
-// Returns 4-D with the same shape as `images`. The batch of input images with
-// bounding boxes drawn on the images.
-func DrawBoundingBoxes(scope *Scope, images tf.Output, boxes tf.Output) (output tf.Output) {
+//	min_features: The float value that the lowest quantized value represents.
+//	max_features: The float value that the highest quantized value represents.
+//
+// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents.
+func QuantizedRelu6(scope *Scope, features tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedRelu6Attr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "DrawBoundingBoxes",
+		Type: "QuantizedRelu6",
 		Input: []tf.Input{
-			images, boxes,
+			features, min_features, max_features,
 		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// FixedLengthRecordReaderV2Attr is an optional argument to FixedLengthRecordReaderV2.
+type FixedLengthRecordReaderV2Attr func(optionalAttr)
+
+// FixedLengthRecordReaderV2HeaderBytes sets the optional header_bytes attribute to value.
+//
+// value: Number of bytes in the header, defaults to 0.
+// If not specified, defaults to 0
+func FixedLengthRecordReaderV2HeaderBytes(value int64) FixedLengthRecordReaderV2Attr {
+	return func(m optionalAttr) {
+		m["header_bytes"] = value
+	}
+}
+
+// FixedLengthRecordReaderV2FooterBytes sets the optional footer_bytes attribute to value.
+//
+// value: Number of bytes in the footer, defaults to 0.
+// If not specified, defaults to 0
+func FixedLengthRecordReaderV2FooterBytes(value int64) FixedLengthRecordReaderV2Attr {
+	return func(m optionalAttr) {
+		m["footer_bytes"] = value
+	}
+}
+
+// FixedLengthRecordReaderV2HopBytes sets the optional hop_bytes attribute to value.
+//
+// value: Number of bytes to hop before each read. Default of 0 means using
+// record_bytes.
+// If not specified, defaults to 0
+func FixedLengthRecordReaderV2HopBytes(value int64) FixedLengthRecordReaderV2Attr {
+	return func(m optionalAttr) {
+		m["hop_bytes"] = value
+	}
+}
+
+// FixedLengthRecordReaderV2Container sets the optional container attribute to value.
+//
+// value: If non-empty, this reader is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func FixedLengthRecordReaderV2Container(value string) FixedLengthRecordReaderV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// FixedLengthRecordReaderV2SharedName sets the optional shared_name attribute to value.
+//
+// value: If non-empty, this reader is named in the given bucket
+// with this shared_name. Otherwise, the node name is used instead.
+// If not specified, defaults to ""
+func FixedLengthRecordReaderV2SharedName(value string) FixedLengthRecordReaderV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Convert one or more images from HSV to RGB.
-//
-// Outputs a tensor of the same shape as the `images` tensor, containing the RGB
-// value of the pixels. The output is only well defined if the value in `images`
-// are in `[0,1]`.
+// FixedLengthRecordReaderV2Encoding sets the optional encoding attribute to value.
 //
-// See `rgb_to_hsv` for a description of the HSV encoding.
+// value: The type of encoding for the file. Currently ZLIB and GZIP
+// are supported. Defaults to none.
+// If not specified, defaults to ""
+func FixedLengthRecordReaderV2Encoding(value string) FixedLengthRecordReaderV2Attr {
+	return func(m optionalAttr) {
+		m["encoding"] = value
+	}
+}
+
+// A Reader that outputs fixed-length records from a file.
 //
 // Arguments:
-//	images: 1-D or higher rank. HSV data to convert. Last dimension must be size 3.
+//	record_bytes: Number of bytes in the record.
 //
-// Returns `images` converted to RGB.
-func HSVToRGB(scope *Scope, images tf.Output) (output tf.Output) {
+// Returns The handle to reference the Reader.
+func FixedLengthRecordReaderV2(scope *Scope, record_bytes int64, optional ...FixedLengthRecordReaderV2Attr) (reader_handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"record_bytes": record_bytes}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "HSVToRGB",
-		Input: []tf.Input{
-			images,
-		},
+		Type: "FixedLengthRecordReaderV2",
+
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns a list of tensors with the same shapes and contents as the input
-//
-// tensors.
+// The gradient operator for the SparseAdd op.
 //
-// This op can be used to override the gradient for complicated functions. For
-// example, suppose y = f(x) and we wish to apply a custom function g for backprop
-// such that dx = g(dy). In Python,
+// The SparseAdd op calculates A + B, where A, B, and the sum are all represented
+// as `SparseTensor` objects.  This op takes in the upstream gradient w.r.t.
+// non-empty values of the sum, and outputs the gradients w.r.t. the non-empty
+// values of A and B.
 //
-// ```python
-// with tf.get_default_graph().gradient_override_map(
-//     {'IdentityN': 'OverrideGradientWithG'}):
-//   y, _ = identity_n([f(x), x])
+// Arguments:
+//	backprop_val_grad: 1-D with shape `[nnz(sum)]`.  The gradient with respect to
+// the non-empty values of the sum.
+//	a_indices: 2-D.  The `indices` of the `SparseTensor` A, size `[nnz(A), ndims]`.
+//	b_indices: 2-D.  The `indices` of the `SparseTensor` B, size `[nnz(B), ndims]`.
+//	sum_indices: 2-D.  The `indices` of the sum `SparseTensor`, size
+// `[nnz(sum), ndims]`.
 //
-// @tf.RegisterGradient('OverrideGradientWithG')
-// def ApplyG(op, dy, _):
-//   return [None, g(dy)]  # Do not backprop to f(x).
-// ```
-func IdentityN(scope *Scope, input []tf.Output) (output []tf.Output) {
+// Returns 1-D with shape `[nnz(A)]`. The gradient with respect to the
+// non-empty values of A.1-D with shape `[nnz(B)]`. The gradient with respect to the
+// non-empty values of B.
+func SparseAddGrad(scope *Scope, backprop_val_grad tf.Output, a_indices tf.Output, b_indices tf.Output, sum_indices tf.Output) (a_val_grad tf.Output, b_val_grad tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "IdentityN",
+		Type: "SparseAddGrad",
 		Input: []tf.Input{
-			tf.OutputList(input),
+			backprop_val_grad, a_indices, b_indices, sum_indices,
 		},
 	}
 	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
-		scope.UpdateErr("IdentityN", err)
-		return
-	}
-	return output
+	return op.Output(0), op.Output(1)
 }
 
-// Decode the first frame of a GIF-encoded image to a uint8 tensor.
-//
-// GIF with frame or transparency compression are not supported
-// convert animated GIF from compressed to uncompressed by:
-//
-//     convert $src.gif -coalesce $dst.gif
-//
-// This op also supports decoding JPEGs and PNGs, though it is cleaner to use
-// `tf.image.decode_image`.
-//
-// Arguments:
-//	contents: 0-D.  The GIF-encoded image.
-//
-// Returns 4-D with shape `[num_frames, height, width, 3]`. RGB order
-func DecodeGif(scope *Scope, contents tf.Output) (image tf.Output) {
+// Computes atan of x element-wise.
+func Atan(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "DecodeGif",
+		Type: "Atan",
 		Input: []tf.Input{
-			contents,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// DecodePngAttr is an optional argument to DecodePng.
-type DecodePngAttr func(optionalAttr)
-
-// DecodePngChannels sets the optional channels attribute to value.
-//
-// value: Number of color channels for the decoded image.
-// If not specified, defaults to 0
-func DecodePngChannels(value int64) DecodePngAttr {
-	return func(m optionalAttr) {
-		m["channels"] = value
-	}
-}
-
-// DecodePngDtype sets the optional dtype attribute to value.
-// If not specified, defaults to DT_UINT8
-func DecodePngDtype(value tf.DataType) DecodePngAttr {
-	return func(m optionalAttr) {
-		m["dtype"] = value
-	}
-}
-
-// Decode a PNG-encoded image to a uint8 or uint16 tensor.
-//
-// The attr `channels` indicates the desired number of color channels for the
-// decoded image.
-//
-// Accepted values are:
-//
-// *   0: Use the number of channels in the PNG-encoded image.
-// *   1: output a grayscale image.
-// *   3: output an RGB image.
-// *   4: output an RGBA image.
+// Encode audio data using the WAV file format.
 //
-// If needed, the PNG-encoded image is transformed to match the requested number
-// of color channels.
+// This operation will generate a string suitable to be saved out to create a .wav
+// audio file. It will be encoded in the 16-bit PCM format. It takes in float
+// values in the range -1.0f to 1.0f, and any outside that value will be clamped to
+// that range.
 //
-// This op also supports decoding JPEGs and non-animated GIFs since the interface
-// is the same, though it is cleaner to use `tf.image.decode_image`.
+// `audio` is a 2-D float Tensor of shape `[length, channels]`.
+// `sample_rate` is a scalar Tensor holding the rate to use (e.g. 44100).
 //
 // Arguments:
-//	contents: 0-D.  The PNG-encoded image.
+//	audio: 2-D with shape `[length, channels]`.
+//	sample_rate: Scalar containing the sample frequency.
 //
-// Returns 3-D with shape `[height, width, channels]`.
-func DecodePng(scope *Scope, contents tf.Output, optional ...DecodePngAttr) (image tf.Output) {
+// Returns 0-D. WAV-encoded file contents.
+func EncodeWav(scope *Scope, audio tf.Output, sample_rate tf.Output) (contents tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "DecodePng",
+		Type: "EncodeWav",
 		Input: []tf.Input{
-			contents,
+			audio, sample_rate,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Adjust the contrast of one or more images.
-//
-// `images` is a tensor of at least 3 dimensions.  The last 3 dimensions are
-// interpreted as `[height, width, channels]`.  The other dimensions only
-// represent a collection of images, such as `[batch, height, width, channels].`
+// Converts each string in the input Tensor to its hash mod by a number of buckets.
 //
-// Contrast is adjusted independently for each channel of each image.
+// The hash function is deterministic on the content of the string within the
+// process. The hash function is a keyed hash function, where attribute `key`
+// defines the key of the hash function. `key` is an array of 2 elements.
 //
-// For each channel, the Op first computes the mean of the image pixels in the
-// channel and then adjusts each component of each pixel to
-// `(x - mean) * contrast_factor + mean`.
+// A strong hash is important when inputs may be malicious, e.g. URLs with
+// additional components. Adversaries could try to make their inputs hash to the
+// same bucket for a denial-of-service attack or to skew the results. A strong
+// hash prevents this by making it difficult, if not infeasible, to compute inputs
+// that hash to the same bucket. This comes at a cost of roughly 4x higher compute
+// time than `tf.string_to_hash_bucket_fast`.
 //
 // Arguments:
-//	images: Images to adjust.  At least 3-D.
-//	contrast_factor: A float multiplier for adjusting contrast.
+//	input: The strings to assign a hash bucket.
+//	num_buckets: The number of buckets.
+//	key: The key for the keyed hash function passed as a list of two uint64
+// elements.
 //
-// Returns The contrast-adjusted image or images.
-func AdjustContrastv2(scope *Scope, images tf.Output, contrast_factor tf.Output) (output tf.Output) {
+// Returns A Tensor of the same shape as the input `string_tensor`.
+func StringToHashBucketStrong(scope *Scope, input tf.Output, num_buckets int64, key []int64) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"num_buckets": num_buckets, "key": key}
 	opspec := tf.OpSpec{
-		Type: "AdjustContrastv2",
+		Type: "StringToHashBucketStrong",
 		Input: []tf.Input{
-			images, contrast_factor,
+			input,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// PaddingFIFOQueueV2Attr is an optional argument to PaddingFIFOQueueV2.
-type PaddingFIFOQueueV2Attr func(optionalAttr)
-
-// PaddingFIFOQueueV2Shapes sets the optional shapes attribute to value.
-//
-// value: The shape of each component in a value. The length of this attr must
-// be either 0 or the same as the length of component_types.
-// Shapes of fixed rank but variable size are allowed by setting
-// any shape dimension to -1.  In this case, the inputs' shape may vary along
-// the given dimension, and DequeueMany will pad the given dimension with
-// zeros up to the maximum shape of all elements in the given batch.
-// If the length of this attr is 0, different queue elements may have
-// different ranks and shapes, but only one element may be dequeued at a time.
-// If not specified, defaults to <>
-//
-// REQUIRES: len(value) >= 0
-func PaddingFIFOQueueV2Shapes(value []tf.Shape) PaddingFIFOQueueV2Attr {
-	return func(m optionalAttr) {
-		m["shapes"] = value
-	}
-}
-
-// PaddingFIFOQueueV2Capacity sets the optional capacity attribute to value.
-//
-// value: The upper bound on the number of elements in this queue.
-// Negative numbers mean no limit.
-// If not specified, defaults to -1
-func PaddingFIFOQueueV2Capacity(value int64) PaddingFIFOQueueV2Attr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// PaddingFIFOQueueV2Container sets the optional container attribute to value.
+// Generates values in an interval.
 //
-// value: If non-empty, this queue is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func PaddingFIFOQueueV2Container(value string) PaddingFIFOQueueV2Attr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// PaddingFIFOQueueV2SharedName sets the optional shared_name attribute to value.
+// A sequence of `num` evenly-spaced values are generated beginning at `start`.
+// If `num > 1`, the values in the sequence increase by `stop - start / num - 1`,
+// so that the last one is exactly `stop`.
 //
-// value: If non-empty, this queue will be shared under the given name
-// across multiple sessions.
-// If not specified, defaults to ""
-func PaddingFIFOQueueV2SharedName(value string) PaddingFIFOQueueV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// A queue that produces elements in first-in first-out order.
+// For example:
 //
-// Variable-size shapes are allowed by setting the corresponding shape dimensions
-// to 0 in the shape attr.  In this case DequeueMany will pad up to the maximum
-// size of any given element in the minibatch.  See below for details.
+// ```
+// tf.linspace(10.0, 12.0, 3, name="linspace") => [ 10.0  11.0  12.0]
+// ```
 //
 // Arguments:
-//	component_types: The type of each component in a value.
+//	start: First entry in the range.
+//	stop: Last entry in the range.
+//	num: Number of values to generate.
 //
-// Returns The handle to the queue.
-func PaddingFIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...PaddingFIFOQueueV2Attr) (handle tf.Output) {
+// Returns 1-D. The generated values.
+func LinSpace(scope *Scope, start tf.Output, stop tf.Output, num tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"component_types": component_types}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "PaddingFIFOQueueV2",
-
-		Attrs: attrs,
+		Type: "LinSpace",
+		Input: []tf.Input{
+			start, stop, num,
+		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// ExtractJpegShapeAttr is an optional argument to ExtractJpegShape.
-type ExtractJpegShapeAttr func(optionalAttr)
+// DestroyResourceOpAttr is an optional argument to DestroyResourceOp.
+type DestroyResourceOpAttr func(optionalAttr)
 
-// ExtractJpegShapeOutputType sets the optional output_type attribute to value.
+// DestroyResourceOpIgnoreLookupError sets the optional ignore_lookup_error attribute to value.
 //
-// value: (Optional) The output type of the operation (int32 or int64).
-// Defaults to int32.
-// If not specified, defaults to DT_INT32
-func ExtractJpegShapeOutputType(value tf.DataType) ExtractJpegShapeAttr {
+// value: whether to ignore the error when the resource
+// doesn't exist.
+// If not specified, defaults to true
+func DestroyResourceOpIgnoreLookupError(value bool) DestroyResourceOpAttr {
 	return func(m optionalAttr) {
-		m["output_type"] = value
+		m["ignore_lookup_error"] = value
 	}
 }
 
-// Extract the shape information of a JPEG-encoded image.
+// Deletes the resource specified by the handle.
 //
-// This op only parses the image header, so it is much faster than DecodeJpeg.
+// All subsequent operations using the resource will result in a NotFound
+// error status.
 //
 // Arguments:
-//	contents: 0-D. The JPEG-encoded image.
+//	resource: handle to the resource to delete.
 //
-// Returns 1-D. The image shape with format [height, width, channels].
-func ExtractJpegShape(scope *Scope, contents tf.Output, optional ...ExtractJpegShapeAttr) (image_shape tf.Output) {
+// Returns the created operation.
+func DestroyResourceOp(scope *Scope, resource tf.Output, optional ...DestroyResourceOpAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -6944,113 +6793,76 @@ func ExtractJpegShape(scope *Scope, contents tf.Output, optional ...ExtractJpegS
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ExtractJpegShape",
+		Type: "DestroyResourceOp",
 		Input: []tf.Input{
-			contents,
+			resource,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// DecodeJpegAttr is an optional argument to DecodeJpeg.
-type DecodeJpegAttr func(optionalAttr)
-
-// DecodeJpegChannels sets the optional channels attribute to value.
-//
-// value: Number of color channels for the decoded image.
-// If not specified, defaults to 0
-func DecodeJpegChannels(value int64) DecodeJpegAttr {
-	return func(m optionalAttr) {
-		m["channels"] = value
-	}
+	return scope.AddOperation(opspec)
 }
 
-// DecodeJpegRatio sets the optional ratio attribute to value.
-//
-// value: Downscaling ratio.
-// If not specified, defaults to 1
-func DecodeJpegRatio(value int64) DecodeJpegAttr {
-	return func(m optionalAttr) {
-		m["ratio"] = value
-	}
-}
+// CumprodAttr is an optional argument to Cumprod.
+type CumprodAttr func(optionalAttr)
 
-// DecodeJpegFancyUpscaling sets the optional fancy_upscaling attribute to value.
+// CumprodExclusive sets the optional exclusive attribute to value.
 //
-// value: If true use a slower but nicer upscaling of the
-// chroma planes (yuv420/422 only).
-// If not specified, defaults to true
-func DecodeJpegFancyUpscaling(value bool) DecodeJpegAttr {
+// value: If `True`, perform exclusive cumprod.
+// If not specified, defaults to false
+func CumprodExclusive(value bool) CumprodAttr {
 	return func(m optionalAttr) {
-		m["fancy_upscaling"] = value
+		m["exclusive"] = value
 	}
 }
 
-// DecodeJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value.
+// CumprodReverse sets the optional reverse attribute to value.
 //
-// value: If true try to recover an image from truncated input.
+// value: A `bool` (default: False).
 // If not specified, defaults to false
-func DecodeJpegTryRecoverTruncated(value bool) DecodeJpegAttr {
+func CumprodReverse(value bool) CumprodAttr {
 	return func(m optionalAttr) {
-		m["try_recover_truncated"] = value
+		m["reverse"] = value
 	}
 }
 
-// DecodeJpegAcceptableFraction sets the optional acceptable_fraction attribute to value.
+// Compute the cumulative product of the tensor `x` along `axis`.
 //
-// value: The minimum required fraction of lines before a truncated
-// input is accepted.
-// If not specified, defaults to 1
-func DecodeJpegAcceptableFraction(value float32) DecodeJpegAttr {
-	return func(m optionalAttr) {
-		m["acceptable_fraction"] = value
-	}
-}
-
-// DecodeJpegDctMethod sets the optional dct_method attribute to value.
+// By default, this op performs an inclusive cumprod, which means that the first
+// element of the input is identical to the first element of the output:
 //
-// value: string specifying a hint about the algorithm used for
-// decompression.  Defaults to "" which maps to a system-specific
-// default.  Currently valid values are ["INTEGER_FAST",
-// "INTEGER_ACCURATE"].  The hint may be ignored (e.g., the internal
-// jpeg library changes to a version that does not have that specific
-// option.)
-// If not specified, defaults to ""
-func DecodeJpegDctMethod(value string) DecodeJpegAttr {
-	return func(m optionalAttr) {
-		m["dct_method"] = value
-	}
-}
-
-// Decode a JPEG-encoded image to a uint8 tensor.
+// ```python
+// tf.cumprod([a, b, c])  # => [a, a * b, a * b * c]
+// ```
 //
-// The attr `channels` indicates the desired number of color channels for the
-// decoded image.
+// By setting the `exclusive` kwarg to `True`, an exclusive cumprod is
+// performed instead:
 //
-// Accepted values are:
+// ```python
+// tf.cumprod([a, b, c], exclusive=True)  # => [1, a, a * b]
+// ```
 //
-// *   0: Use the number of channels in the JPEG-encoded image.
-// *   1: output a grayscale image.
-// *   3: output an RGB image.
+// By setting the `reverse` kwarg to `True`, the cumprod is performed in the
+// opposite direction:
 //
-// If needed, the JPEG-encoded image is transformed to match the requested number
-// of color channels.
+// ```python
+// tf.cumprod([a, b, c], reverse=True)  # => [a * b * c, b * c, c]
+// ```
 //
-// The attr `ratio` allows downscaling the image by an integer factor during
-// decoding.  Allowed values are: 1, 2, 4, and 8.  This is much faster than
-// downscaling the image later.
+// This is more efficient than using separate `tf.reverse` ops.
 //
+// The `reverse` and `exclusive` kwargs can also be combined:
 //
-// This op also supports decoding PNGs and non-animated GIFs since the interface is
-// the same, though it is cleaner to use `tf.image.decode_image`.
+// ```python
+// tf.cumprod([a, b, c], exclusive=True, reverse=True)  # => [b * c, c, 1]
+// ```
 //
 // Arguments:
-//	contents: 0-D.  The JPEG-encoded image.
-//
-// Returns 3-D with shape `[height, width, channels]`..
-func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (image tf.Output) {
+//	x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
+// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
+// `complex128`, `qint8`, `quint8`, `qint32`, `half`.
+//	axis: A `Tensor` of type `int32` (default: 0). Must be in the range
+// `[-rank(x), rank(x))`.
+func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr) (out tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -7059,9 +6871,9 @@ func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (i
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "DecodeJpeg",
+		Type: "Cumprod",
 		Input: []tf.Input{
-			contents,
+			x, axis,
 		},
 		Attrs: attrs,
 	}
@@ -7069,31 +6881,91 @@ func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (i
 	return op.Output(0)
 }
 
-// ResizeNearestNeighborGradAttr is an optional argument to ResizeNearestNeighborGrad.
-type ResizeNearestNeighborGradAttr func(optionalAttr)
+// Computes the mean along segments of a tensor.
+//
+// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+// segments.
+//
+// Computes a tensor such that
+// \\(output_i = \frac{\sum_j data_j}{N}\\) where `mean` is
+// over `j` such that `segment_ids[j] == i` and `N` is the total number of
+// values summed.
+//
+// If the mean is empty for a given segment ID `i`, `output[i] = 0`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentMean.png" alt>
+// </div>
+//
+// Arguments:
+//
+//	segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
+// first dimension.  Values should be sorted and can be repeated.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SegmentMean(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SegmentMean",
+		Input: []tf.Input{
+			data, segment_ids,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
 
-// ResizeNearestNeighborGradAlignCorners sets the optional align_corners attribute to value.
+// ResourceSparseApplyCenteredRMSPropAttr is an optional argument to ResourceSparseApplyCenteredRMSProp.
+type ResourceSparseApplyCenteredRMSPropAttr func(optionalAttr)
+
+// ResourceSparseApplyCenteredRMSPropUseLocking sets the optional use_locking attribute to value.
 //
-// value: If true, rescale grads by (orig_height - 1) / (height - 1), which
-// exactly aligns the 4 corners of grads and original_image. If false, rescale by
-// orig_height / height. Treat similarly the width dimension.
+// value: If `True`, updating of the var, mg, ms, and mom tensors is
+// protected by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
 // If not specified, defaults to false
-func ResizeNearestNeighborGradAlignCorners(value bool) ResizeNearestNeighborGradAttr {
+func ResourceSparseApplyCenteredRMSPropUseLocking(value bool) ResourceSparseApplyCenteredRMSPropAttr {
 	return func(m optionalAttr) {
-		m["align_corners"] = value
+		m["use_locking"] = value
 	}
 }
 
-// Computes the gradient of nearest neighbor interpolation.
+// Update '*var' according to the centered RMSProp algorithm.
+//
+// The centered RMSProp algorithm uses an estimate of the centered second moment
+// (i.e., the variance) for normalization, as opposed to regular RMSProp, which
+// uses the (uncentered) second moment. This often helps with training, but is
+// slightly more expensive in terms of computation and memory.
+//
+// Note that in dense implementation of this algorithm, mg, ms, and mom will
+// update even if the grad is zero, but in this sparse implementation, mg, ms,
+// and mom will not update in iterations during which the grad is zero.
+//
+// mean_square = decay * mean_square + (1-decay) * gradient ** 2
+// mean_grad = decay * mean_grad + (1-decay) * gradient
+// Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
+//
+// ms <- rho * ms_{t-1} + (1-rho) * grad * grad
+// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
+// var <- var - mom
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	mg: Should be from a Variable().
+//	ms: Should be from a Variable().
+//	mom: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	rho: Decay rate. Must be a scalar.
 //
-// Arguments:
-//	grads: 4-D with shape `[batch, height, width, channels]`.
-//	size: = A 1-D int32 Tensor of 2 elements: `orig_height, orig_width`. The
-// original input size.
+//	epsilon: Ridge term. Must be a scalar.
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var, ms and mom.
 //
-// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`. Gradients
-// with respect to the input image.
-func ResizeNearestNeighborGrad(scope *Scope, grads tf.Output, size tf.Output, optional ...ResizeNearestNeighborGradAttr) (output tf.Output) {
+// Returns the created operation.
+func ResourceSparseApplyCenteredRMSProp(scope *Scope, var_ tf.Output, mg tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyCenteredRMSPropAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -7102,52 +6974,32 @@ func ResizeNearestNeighborGrad(scope *Scope, grads tf.Output, size tf.Output, op
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResizeNearestNeighborGrad",
+		Type: "ResourceSparseApplyCenteredRMSProp",
 		Input: []tf.Input{
-			grads, size,
+			var_, mg, ms, mom, lr, rho, momentum, epsilon, grad, indices,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResizeNearestNeighborAttr is an optional argument to ResizeNearestNeighbor.
-type ResizeNearestNeighborAttr func(optionalAttr)
-
-// ResizeNearestNeighborAlignCorners sets the optional align_corners attribute to value.
-//
-// value: If true, rescale input by (new_height - 1) / (height - 1), which
-// exactly aligns the 4 corners of images and resized images. If false, rescale
-// by new_height / height. Treat similarly the width dimension.
-// If not specified, defaults to false
-func ResizeNearestNeighborAlignCorners(value bool) ResizeNearestNeighborAttr {
-	return func(m optionalAttr) {
-		m["align_corners"] = value
-	}
+	return scope.AddOperation(opspec)
 }
 
-// Resize `images` to `size` using nearest neighbor interpolation.
+// Creates a dataset that batches `batch_size` elements from `input_dataset`.
 //
 // Arguments:
-//	images: 4-D with shape `[batch, height, width, channels]`.
-//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
-// new size for the images.
 //
-// Returns 4-D with shape
-// `[batch, new_height, new_width, channels]`.
-func ResizeNearestNeighbor(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeNearestNeighborAttr) (resized_images tf.Output) {
+//	batch_size: A scalar representing the number of elements to accumulate in a
+// batch.
+//
+//
+func BatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "ResizeNearestNeighbor",
+		Type: "BatchDataset",
 		Input: []tf.Input{
-			images, size,
+			input_dataset, batch_size,
 		},
 		Attrs: attrs,
 	}
@@ -7155,267 +7007,313 @@ func ResizeNearestNeighbor(scope *Scope, images tf.Output, size tf.Output, optio
 	return op.Output(0)
 }
 
-// ResizeBicubicGradAttr is an optional argument to ResizeBicubicGrad.
-type ResizeBicubicGradAttr func(optionalAttr)
-
-// ResizeBicubicGradAlignCorners sets the optional align_corners attribute to value.
+// Inverse fast Fourier transform.
 //
-// value: If true, rescale grads by (orig_height - 1) / (height - 1), which
-// exactly aligns the 4 corners of grads and original_image. If false, rescale by
-// orig_height / height. Treat similarly the width dimension.
-// If not specified, defaults to false
-func ResizeBicubicGradAlignCorners(value bool) ResizeBicubicGradAttr {
-	return func(m optionalAttr) {
-		m["align_corners"] = value
-	}
-}
-
-// Computes the gradient of bicubic interpolation.
+// Computes the inverse 1-dimensional discrete Fourier transform over the
+// inner-most dimension of `input`.
 //
 // Arguments:
-//	grads: 4-D with shape `[batch, height, width, channels]`.
-//	original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`,
-// The image tensor that was resized.
+//	input: A complex64 tensor.
 //
-// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`.
-// Gradients with respect to the input image. Input image must have been
-// float or double.
-func ResizeBicubicGrad(scope *Scope, grads tf.Output, original_image tf.Output, optional ...ResizeBicubicGradAttr) (output tf.Output) {
+// Returns A complex64 tensor of the same shape as `input`. The inner-most
+//   dimension of `input` is replaced with its inverse 1D Fourier transform.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.ifft
+// @end_compatibility
+func IFFT(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "ResizeBicubicGrad",
+		Type: "IFFT",
 		Input: []tf.Input{
-			grads, original_image,
+			input,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// SummaryWriterAttr is an optional argument to SummaryWriter.
-type SummaryWriterAttr func(optionalAttr)
+// LRNAttr is an optional argument to LRN.
+type LRNAttr func(optionalAttr)
 
-// SummaryWriterSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func SummaryWriterSharedName(value string) SummaryWriterAttr {
+// LRNDepthRadius sets the optional depth_radius attribute to value.
+//
+// value: 0-D.  Half-width of the 1-D normalization window.
+// If not specified, defaults to 5
+func LRNDepthRadius(value int64) LRNAttr {
 	return func(m optionalAttr) {
-		m["shared_name"] = value
+		m["depth_radius"] = value
 	}
 }
 
-// SummaryWriterContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func SummaryWriterContainer(value string) SummaryWriterAttr {
+// LRNBias sets the optional bias attribute to value.
+//
+// value: An offset (usually positive to avoid dividing by 0).
+// If not specified, defaults to 1
+func LRNBias(value float32) LRNAttr {
 	return func(m optionalAttr) {
-		m["container"] = value
+		m["bias"] = value
 	}
 }
 
-// Returns a handle to be used to access a summary writer.
-//
-// The summary writer is an in-graph resource which can be used by ops to write
-// summaries to event files.
+// LRNAlpha sets the optional alpha attribute to value.
 //
-// Returns the summary writer resource. Scalar handle.
-func SummaryWriter(scope *Scope, optional ...SummaryWriterAttr) (writer tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
+// value: A scale factor, usually positive.
+// If not specified, defaults to 1
+func LRNAlpha(value float32) LRNAttr {
+	return func(m optionalAttr) {
+		m["alpha"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "SummaryWriter",
+}
 
-		Attrs: attrs,
+// LRNBeta sets the optional beta attribute to value.
+//
+// value: An exponent.
+// If not specified, defaults to 0.5
+func LRNBeta(value float32) LRNAttr {
+	return func(m optionalAttr) {
+		m["beta"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Returns the set of files matching one or more glob patterns.
+// Local Response Normalization.
 //
-// Note that this routine only supports wildcard characters in the
-// basename portion of the pattern, not in the directory portion.
+// The 4-D `input` tensor is treated as a 3-D array of 1-D vectors (along the last
+// dimension), and each vector is normalized independently.  Within a given vector,
+// each component is divided by the weighted, squared sum of inputs within
+// `depth_radius`.  In detail,
 //
-// Arguments:
-//	pattern: Shell wildcard pattern(s). Scalar or vector of type string.
+//     sqr_sum[a, b, c, d] =
+//         sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2)
+//     output = input / (bias + alpha * sqr_sum) ** beta
 //
-// Returns A vector of matching filenames.
-func MatchingFiles(scope *Scope, pattern tf.Output) (filenames tf.Output) {
+// For details, see [Krizhevsky et al., ImageNet classification with deep
+// convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
+//
+// Arguments:
+//	input: 4-D.
+func LRN(scope *Scope, input tf.Output, optional ...LRNAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "MatchingFiles",
+		Type: "LRN",
 		Input: []tf.Input{
-			pattern,
+			input,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Store the input tensor in the state of the current session.
-//
-// Arguments:
-//	value: The tensor to be stored.
-//
-// Returns The handle for the tensor stored in the session state, represented
-// as a ResourceHandle object.
-func GetSessionHandleV2(scope *Scope, value tf.Output) (handle tf.Output) {
+// Creates a dataset that zips together `input_datasets`.
+func ZipDataset(scope *Scope, input_datasets []tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "GetSessionHandleV2",
+		Type: "ZipDataset",
 		Input: []tf.Input{
-			value,
+			tf.OutputList(input_datasets),
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Adjust the hue of one or more images.
-//
-// `images` is a tensor of at least 3 dimensions.  The last dimension is
-// interpretted as channels, and must be three.
-//
-// The input image is considered in the RGB colorspace. Conceptually, the RGB
-// colors are first mapped into HSV. A delta is then applied all the hue values,
-// and then remapped back to RGB colorspace.
+// Writes a `GraphDef` protocol buffer to a `SummaryWriter`.
 //
 // Arguments:
-//	images: Images to adjust.  At least 3-D.
-//	delta: A float delta to add to the hue.
+//	writer: Handle of `SummaryWriter`.
+//	step: The step to write the summary for.
+//	tensor: A scalar string of the serialized tf.GraphDef proto.
 //
-// Returns The hue-adjusted image or images.
-func AdjustHue(scope *Scope, images tf.Output, delta tf.Output) (output tf.Output) {
+// Returns the created operation.
+func WriteGraphSummary(scope *Scope, writer tf.Output, step tf.Output, tensor tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "AdjustHue",
+		Type: "WriteGraphSummary",
 		Input: []tf.Input{
-			images, delta,
+			writer, step, tensor,
 		},
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// Restore a Reader to its initial clean state.
+// ResourceSparseApplyAdagradAttr is an optional argument to ResourceSparseApplyAdagrad.
+type ResourceSparseApplyAdagradAttr func(optionalAttr)
+
+// ResourceSparseApplyAdagradUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceSparseApplyAdagradUseLocking(value bool) ResourceSparseApplyAdagradAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update relevant entries in '*var' and '*accum' according to the adagrad scheme.
+//
+// That is for rows we have grad for, we update var and accum as follows:
+// accum += grad * grad
+// var -= lr * grad * (1 / sqrt(accum))
 //
 // Arguments:
-//	reader_handle: Handle to a Reader.
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Learning rate. Must be a scalar.
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
 //
 // Returns the created operation.
-func ReaderResetV2(scope *Scope, reader_handle tf.Output) (o *tf.Operation) {
+func ResourceSparseApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdagradAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "ReaderResetV2",
+		Type: "ResourceSparseApplyAdagrad",
 		Input: []tf.Input{
-			reader_handle,
+			var_, accum, lr, grad, indices,
 		},
+		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
-// Returns up to `num_records` (key, value) pairs produced by a Reader.
+// 2D real-valued fast Fourier transform.
 //
-// Will dequeue from the input queue if necessary (e.g. when the
-// Reader needs to start reading from a new file since it has finished
-// with the previous file).
-// It may return less than `num_records` even before the last batch.
+// Computes the 2-dimensional discrete Fourier transform of a real-valued signal
+// over the inner-most 2 dimensions of `input`.
+//
+// Since the DFT of a real signal is Hermitian-symmetric, `RFFT2D` only returns the
+// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension
+// of `output`: the zero-frequency term, followed by the `fft_length / 2`
+// positive-frequency terms.
+//
+// Along each axis `RFFT2D` is computed on, if `fft_length` is smaller than the
+// corresponding dimension of `input`, the dimension is cropped. If it is larger,
+// the dimension is padded with zeros.
 //
 // Arguments:
-//	reader_handle: Handle to a `Reader`.
-//	queue_handle: Handle to a `Queue`, with string work items.
-//	num_records: number of records to read from `Reader`.
+//	input: A float32 tensor.
+//	fft_length: An int32 tensor of shape [2]. The FFT length for each dimension.
 //
-// Returns A 1-D tensor.A 1-D tensor.
-func ReaderReadUpToV2(scope *Scope, reader_handle tf.Output, queue_handle tf.Output, num_records tf.Output) (keys tf.Output, values tf.Output) {
+// Returns A complex64 tensor of the same rank as `input`. The inner-most 2
+//   dimensions of `input` are replaced with their 2D Fourier transform. The
+//   inner-most dimension contains `fft_length / 2 + 1` unique frequency
+//   components.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.rfft2
+// @end_compatibility
+func RFFT2D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "ReaderReadUpToV2",
+		Type: "RFFT2D",
 		Input: []tf.Input{
-			reader_handle, queue_handle, num_records,
+			input, fft_length,
 		},
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
-// Returns the next record (key, value pair) produced by a Reader.
+// ResizeAreaAttr is an optional argument to ResizeArea.
+type ResizeAreaAttr func(optionalAttr)
+
+// ResizeAreaAlignCorners sets the optional align_corners attribute to value.
 //
-// Will dequeue from the input queue if necessary (e.g. when the
-// Reader needs to start reading from a new file since it has finished
-// with the previous file).
+// value: If true, rescale input by (new_height - 1) / (height - 1), which
+// exactly aligns the 4 corners of images and resized images. If false, rescale
+// by new_height / height. Treat similarly the width dimension.
+// If not specified, defaults to false
+func ResizeAreaAlignCorners(value bool) ResizeAreaAttr {
+	return func(m optionalAttr) {
+		m["align_corners"] = value
+	}
+}
+
+// Resize `images` to `size` using area interpolation.
+//
+// Input images can be of different types but output images are always float.
+//
+// Each output pixel is computed by first transforming the pixel's footprint into
+// the input tensor and then averaging the pixels that intersect the footprint. An
+// input pixel's contribution to the average is weighted by the fraction of its
+// area that intersects the footprint.  This is the same as OpenCV's INTER_AREA.
 //
 // Arguments:
-//	reader_handle: Handle to a Reader.
-//	queue_handle: Handle to a Queue, with string work items.
+//	images: 4-D with shape `[batch, height, width, channels]`.
+//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
+// new size for the images.
 //
-// Returns A scalar.A scalar.
-func ReaderReadV2(scope *Scope, reader_handle tf.Output, queue_handle tf.Output) (key tf.Output, value tf.Output) {
+// Returns 4-D with shape
+// `[batch, new_height, new_width, channels]`.
+func ResizeArea(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeAreaAttr) (resized_images tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "ReaderReadV2",
+		Type: "ResizeArea",
 		Input: []tf.Input{
-			reader_handle, queue_handle,
+			images, size,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
-// IdentityReaderV2Attr is an optional argument to IdentityReaderV2.
-type IdentityReaderV2Attr func(optionalAttr)
+// StatelessRandomUniformAttr is an optional argument to StatelessRandomUniform.
+type StatelessRandomUniformAttr func(optionalAttr)
 
-// IdentityReaderV2Container sets the optional container attribute to value.
+// StatelessRandomUniformDtype sets the optional dtype attribute to value.
 //
-// value: If non-empty, this reader is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func IdentityReaderV2Container(value string) IdentityReaderV2Attr {
+// value: The type of the output.
+// If not specified, defaults to DT_FLOAT
+func StatelessRandomUniformDtype(value tf.DataType) StatelessRandomUniformAttr {
 	return func(m optionalAttr) {
-		m["container"] = value
+		m["dtype"] = value
 	}
 }
 
-// IdentityReaderV2SharedName sets the optional shared_name attribute to value.
+// Outputs deterministic pseudorandom random values from a uniform distribution.
 //
-// value: If non-empty, this reader is named in the given bucket
-// with this shared_name. Otherwise, the node name is used instead.
-// If not specified, defaults to ""
-func IdentityReaderV2SharedName(value string) IdentityReaderV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// A Reader that outputs the queued work as both the key and value.
+// The generated values follow a uniform distribution in the range `[0, 1)`. The
+// lower bound 0 is included in the range, while the upper bound 1 is excluded.
 //
-// To use, enqueue strings in a Queue.  ReaderRead will take the front
-// work string and output (work, work).
+// The outputs are a deterministic function of `shape` and `seed`.
 //
-// Returns The handle to reference the Reader.
-func IdentityReaderV2(scope *Scope, optional ...IdentityReaderV2Attr) (reader_handle tf.Output) {
+// Arguments:
+//	shape: The shape of the output tensor.
+//	seed: 2 seeds (shape [2]).
+//
+// Returns Random values with specified shape.
+func StatelessRandomUniform(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomUniformAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -7424,51 +7322,47 @@ func IdentityReaderV2(scope *Scope, optional ...IdentityReaderV2Attr) (reader_ha
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "IdentityReaderV2",
-
+		Type: "StatelessRandomUniform",
+		Input: []tf.Input{
+			shape, seed,
+		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// TFRecordReaderV2Attr is an optional argument to TFRecordReaderV2.
-type TFRecordReaderV2Attr func(optionalAttr)
+// AngleAttr is an optional argument to Angle.
+type AngleAttr func(optionalAttr)
 
-// TFRecordReaderV2Container sets the optional container attribute to value.
-//
-// value: If non-empty, this reader is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func TFRecordReaderV2Container(value string) TFRecordReaderV2Attr {
+// AngleTout sets the optional Tout attribute to value.
+// If not specified, defaults to DT_FLOAT
+func AngleTout(value tf.DataType) AngleAttr {
 	return func(m optionalAttr) {
-		m["container"] = value
+		m["Tout"] = value
 	}
 }
 
-// TFRecordReaderV2SharedName sets the optional shared_name attribute to value.
+// Returns the argument of a complex number.
 //
-// value: If non-empty, this reader is named in the given bucket
-// with this shared_name. Otherwise, the node name is used instead.
-// If not specified, defaults to ""
-func TFRecordReaderV2SharedName(value string) TFRecordReaderV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// TFRecordReaderV2CompressionType sets the optional compression_type attribute to value.
-// If not specified, defaults to ""
-func TFRecordReaderV2CompressionType(value string) TFRecordReaderV2Attr {
-	return func(m optionalAttr) {
-		m["compression_type"] = value
-	}
-}
-
-// A Reader that outputs the records from a TensorFlow Records file.
+// Given a tensor `input` of complex numbers, this operation returns a tensor of
+// type `float` that is the argument of each element in `input`. All elements in
+// `input` must be complex numbers of the form \\(a + bj\\), where *a*
+// is the real part and *b* is the imaginary part.
 //
-// Returns The handle to reference the Reader.
-func TFRecordReaderV2(scope *Scope, optional ...TFRecordReaderV2Attr) (reader_handle tf.Output) {
+// The argument returned by this operation is of the form \\(atan2(b, a)\\).
+//
+// For example:
+//
+// ```
+// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
+// tf.angle(input) ==> [2.0132, 1.056]
+// ```
+//
+// @compatibility(numpy)
+// Equivalent to np.angle.
+// @end_compatibility
+func Angle(scope *Scope, input tf.Output, optional ...AngleAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -7477,62 +7371,55 @@ func TFRecordReaderV2(scope *Scope, optional ...TFRecordReaderV2Attr) (reader_ha
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "TFRecordReaderV2",
-
+		Type: "Angle",
+		Input: []tf.Input{
+			input,
+		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// TextLineReaderV2Attr is an optional argument to TextLineReaderV2.
-type TextLineReaderV2Attr func(optionalAttr)
-
-// TextLineReaderV2SkipHeaderLines sets the optional skip_header_lines attribute to value.
-//
-// value: Number of lines to skip from the beginning of every file.
-// If not specified, defaults to 0
-func TextLineReaderV2SkipHeaderLines(value int64) TextLineReaderV2Attr {
-	return func(m optionalAttr) {
-		m["skip_header_lines"] = value
-	}
-}
+// VarHandleOpAttr is an optional argument to VarHandleOp.
+type VarHandleOpAttr func(optionalAttr)
 
-// TextLineReaderV2Container sets the optional container attribute to value.
+// VarHandleOpContainer sets the optional container attribute to value.
 //
-// value: If non-empty, this reader is placed in the given container.
-// Otherwise, a default container is used.
+// value: the container this variable is placed in.
 // If not specified, defaults to ""
-func TextLineReaderV2Container(value string) TextLineReaderV2Attr {
+func VarHandleOpContainer(value string) VarHandleOpAttr {
 	return func(m optionalAttr) {
 		m["container"] = value
 	}
 }
 
-// TextLineReaderV2SharedName sets the optional shared_name attribute to value.
+// VarHandleOpSharedName sets the optional shared_name attribute to value.
 //
-// value: If non-empty, this reader is named in the given bucket
-// with this shared_name. Otherwise, the node name is used instead.
+// value: the name by which this variable is referred to.
 // If not specified, defaults to ""
-func TextLineReaderV2SharedName(value string) TextLineReaderV2Attr {
+func VarHandleOpSharedName(value string) VarHandleOpAttr {
 	return func(m optionalAttr) {
 		m["shared_name"] = value
 	}
 }
 
-// A Reader that outputs the lines of a file delimited by '\n'.
+// Creates a handle to a Variable resource.
 //
-// Returns The handle to reference the Reader.
-func TextLineReaderV2(scope *Scope, optional ...TextLineReaderV2Attr) (reader_handle tf.Output) {
+// Arguments:
+//	dtype: the type of this variable. Must agree with the dtypes
+// of all ops using this variable.
+//	shape: The (possibly partially specified) shape of this variable.
+func VarHandleOp(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ...VarHandleOpAttr) (resource tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"dtype": dtype, "shape": shape}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "TextLineReaderV2",
+		Type: "VarHandleOp",
 
 		Attrs: attrs,
 	}
@@ -7540,185 +7427,152 @@ func TextLineReaderV2(scope *Scope, optional ...TextLineReaderV2Attr) (reader_ha
 	return op.Output(0)
 }
 
-// Generate a glob pattern matching all sharded file names.
-func ShardedFilespec(scope *Scope, basename tf.Output, num_shards tf.Output) (filename tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ShardedFilespec",
-		Input: []tf.Input{
-			basename, num_shards,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Delete the stack from its resource container.
+// Creates a summary file writer accessible by the given resource handle.
 //
 // Arguments:
-//	handle: The handle to a stack.
+//	writer: A handle to the summary writer resource
+//	logdir: Directory where the event file will be written.
+//	max_queue: Size of the queue of pending events and summaries.
+//	flush_millis: How often, in milliseconds, to flush the pending events and
+// summaries to disk.
+//	filename_suffix: Every event file's name is suffixed with this suffix.
 //
 // Returns the created operation.
-func StackCloseV2(scope *Scope, handle tf.Output) (o *tf.Operation) {
+func CreateSummaryFileWriter(scope *Scope, writer tf.Output, logdir tf.Output, max_queue tf.Output, flush_millis tf.Output, filename_suffix tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "StackCloseV2",
+		Type: "CreateSummaryFileWriter",
 		Input: []tf.Input{
-			handle,
+			writer, logdir, max_queue, flush_millis, filename_suffix,
 		},
 	}
 	return scope.AddOperation(opspec)
 }
 
-// Generate a sharded filename. The filename is printf formatted as
+// Elementwise computes the bitwise XOR of `x` and `y`.
 //
-//    %s-%05d-of-%05d, basename, shard, num_shards.
-func ShardedFilename(scope *Scope, basename tf.Output, shard tf.Output, num_shards tf.Output) (filename tf.Output) {
+// The result will have those bits set, that are different in `x` and `y`. The
+// computation is performed on the underlying representations of `x` and `y`.
+func BitwiseXor(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "ShardedFilename",
+		Type: "BitwiseXor",
 		Input: []tf.Input{
-			basename, shard, num_shards,
+			x, y,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Saves input tensors slices to disk.
-//
-// This is like `Save` except that tensors can be listed in the saved file as being
-// a slice of a larger tensor.  `shapes_and_slices` specifies the shape of the
-// larger tensor and the slice that this tensor covers. `shapes_and_slices` must
-// have as many elements as `tensor_names`.
-//
-// Elements of the `shapes_and_slices` input must either be:
+// Deserialize `SparseTensor` objects.
 //
-// *  The empty string, in which case the corresponding tensor is
-//    saved normally.
-// *  A string of the form `dim0 dim1 ... dimN-1 slice-spec` where the
-//    `dimI` are the dimensions of the larger tensor and `slice-spec`
-//    specifies what part is covered by the tensor to save.
-//
-// `slice-spec` itself is a `:`-separated list: `slice0:slice1:...:sliceN-1`
-// where each `sliceI` is either:
+// The input `serialized_sparse` must have the shape `[?, ?, ..., ?, 3]` where
+// the last dimension stores serialized `SparseTensor` objects and the other N
+// dimensions (N >= 0) correspond to a batch. The ranks of the original
+// `SparseTensor` objects must all match. When the final `SparseTensor` is
+// created, its rank is the rank of the incoming `SparseTensor` objects plus N;
+// the sparse tensors have been concatenated along new dimensions, one for each
+// batch.
 //
-// *  The string `-` meaning that the slice covers all indices of this dimension
-// *  `start,length` where `start` and `length` are integers.  In that
-//    case the slice covers `length` indices starting at `start`.
+// The output `SparseTensor` object's shape values for the original dimensions
+// are the max across the input `SparseTensor` objects' shape values for the
+// corresponding dimensions. The new dimensions match the size of the batch.
 //
-// See also `Save`.
+// The input `SparseTensor` objects' indices are assumed ordered in
+// standard lexicographic order.  If this is not the case, after this
+// step run `SparseReorder` to restore index ordering.
 //
-// Arguments:
-//	filename: Must have a single element. The name of the file to which we write the
-// tensor.
-//	tensor_names: Shape `[N]`. The names of the tensors to be saved.
-//	shapes_and_slices: Shape `[N]`.  The shapes and slice specifications to use when
-// saving the tensors.
-//	data: `N` tensors to save.
+// For example, if the serialized input is a `[2 x 3]` matrix representing two
+// original `SparseTensor` objects:
 //
-// Returns the created operation.
-func SaveSlices(scope *Scope, filename tf.Output, tensor_names tf.Output, shapes_and_slices tf.Output, data []tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SaveSlices",
-		Input: []tf.Input{
-			filename, tensor_names, shapes_and_slices, tf.OutputList(data),
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// MergeV2CheckpointsAttr is an optional argument to MergeV2Checkpoints.
-type MergeV2CheckpointsAttr func(optionalAttr)
-
-// MergeV2CheckpointsDeleteOldDirs sets the optional delete_old_dirs attribute to value.
+//     index = [ 0]
+//             [10]
+//             [20]
+//     values = [1, 2, 3]
+//     shape = [50]
 //
-// value: see above.
-// If not specified, defaults to true
-func MergeV2CheckpointsDeleteOldDirs(value bool) MergeV2CheckpointsAttr {
-	return func(m optionalAttr) {
-		m["delete_old_dirs"] = value
-	}
-}
-
-// V2 format specific: merges the metadata files of sharded checkpoints.  The
+// and
 //
-// result is one logical checkpoint, with one physical metadata file and renamed
-// data files.
+//     index = [ 2]
+//             [10]
+//     values = [4, 5]
+//     shape = [30]
 //
-// Intended for "grouping" multiple checkpoints in a sharded checkpoint setup.
+// then the final deserialized `SparseTensor` will be:
 //
-// If delete_old_dirs is true, attempts to delete recursively the dirname of each
-// path in the input checkpoint_prefixes.  This is useful when those paths are non
-// user-facing temporary locations.
+//     index = [0  0]
+//             [0 10]
+//             [0 20]
+//             [1  2]
+//             [1 10]
+//     values = [1, 2, 3, 4, 5]
+//     shape = [2 50]
 //
 // Arguments:
-//	checkpoint_prefixes: prefixes of V2 checkpoints to merge.
-//	destination_prefix: scalar.  The desired final prefix.  Allowed to be the same
-// as one of the checkpoint_prefixes.
-//
-// Returns the created operation.
-func MergeV2Checkpoints(scope *Scope, checkpoint_prefixes tf.Output, destination_prefix tf.Output, optional ...MergeV2CheckpointsAttr) (o *tf.Operation) {
+//	serialized_sparse: The serialized `SparseTensor` objects. The last dimension
+// must have 3 columns.
+//	dtype: The `dtype` of the serialized `SparseTensor` objects.
+func DeserializeSparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"dtype": dtype}
 	opspec := tf.OpSpec{
-		Type: "MergeV2Checkpoints",
+		Type: "DeserializeSparse",
 		Input: []tf.Input{
-			checkpoint_prefixes, destination_prefix,
+			serialized_sparse,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// QueueEnqueueManyV2Attr is an optional argument to QueueEnqueueManyV2.
-type QueueEnqueueManyV2Attr func(optionalAttr)
+// ResourceApplyRMSPropAttr is an optional argument to ResourceApplyRMSProp.
+type ResourceApplyRMSPropAttr func(optionalAttr)
 
-// QueueEnqueueManyV2TimeoutMs sets the optional timeout_ms attribute to value.
+// ResourceApplyRMSPropUseLocking sets the optional use_locking attribute to value.
 //
-// value: If the queue is too full, this operation will block for up
-// to timeout_ms milliseconds.
-// Note: This option is not supported yet.
-// If not specified, defaults to -1
-func QueueEnqueueManyV2TimeoutMs(value int64) QueueEnqueueManyV2Attr {
+// value: If `True`, updating of the var, ms, and mom tensors is protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyRMSPropUseLocking(value bool) ResourceApplyRMSPropAttr {
 	return func(m optionalAttr) {
-		m["timeout_ms"] = value
+		m["use_locking"] = value
 	}
 }
 
-// Enqueues zero or more tuples of one or more tensors in the given queue.
+// Update '*var' according to the RMSProp algorithm.
 //
-// This operation slices each component tensor along the 0th dimension to
-// make multiple queue elements. All of the tuple components must have the
-// same size in the 0th dimension.
+// Note that in dense implementation of this algorithm, ms and mom will
+// update even if the grad is zero, but in this sparse implementation, ms
+// and mom will not update in iterations during which the grad is zero.
 //
-// The components input has k elements, which correspond to the components of
-// tuples stored in the given queue.
+// mean_square = decay * mean_square + (1-decay) * gradient ** 2
+// Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
 //
-// N.B. If the queue is full, this operation will block until the given
-// elements have been enqueued (or 'timeout_ms' elapses, if specified).
+// ms <- rho * ms_{t-1} + (1-rho) * grad * grad
+// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
+// var <- var - mom
 //
 // Arguments:
-//	handle: The handle to a queue.
-//	components: One or more tensors from which the enqueued tensors should
-// be taken.
+//	var_: Should be from a Variable().
+//	ms: Should be from a Variable().
+//	mom: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	rho: Decay rate. Must be a scalar.
+//
+//	epsilon: Ridge term. Must be a scalar.
+//	grad: The gradient.
 //
 // Returns the created operation.
-func QueueEnqueueManyV2(scope *Scope, handle tf.Output, components []tf.Output, optional ...QueueEnqueueManyV2Attr) (o *tf.Operation) {
+func ResourceApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyRMSPropAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -7727,171 +7581,118 @@ func QueueEnqueueManyV2(scope *Scope, handle tf.Output, components []tf.Output,
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "QueueEnqueueManyV2",
+		Type: "ResourceApplyRMSProp",
 		Input: []tf.Input{
-			handle, tf.OutputList(components),
+			var_, ms, mom, lr, rho, momentum, epsilon, grad,
 		},
 		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
-// SvdAttr is an optional argument to Svd.
-type SvdAttr func(optionalAttr)
-
-// SvdComputeUv sets the optional compute_uv attribute to value.
-//
-// value: If true, left and right singular vectors will be
-// computed and returned in `u` and `v`, respectively.
-// If false, `u` and `v` are not set and should never referenced.
-// If not specified, defaults to true
-func SvdComputeUv(value bool) SvdAttr {
-	return func(m optionalAttr) {
-		m["compute_uv"] = value
-	}
-}
+// SizeAttr is an optional argument to Size.
+type SizeAttr func(optionalAttr)
 
-// SvdFullMatrices sets the optional full_matrices attribute to value.
-//
-// value: If true, compute full-sized `u` and `v`. If false
-// (the default), compute only the leading `P` singular vectors.
-// Ignored if `compute_uv` is `False`.
-// If not specified, defaults to false
-func SvdFullMatrices(value bool) SvdAttr {
+// SizeOutType sets the optional out_type attribute to value.
+// If not specified, defaults to DT_INT32
+func SizeOutType(value tf.DataType) SizeAttr {
 	return func(m optionalAttr) {
-		m["full_matrices"] = value
-	}
-}
-
-// Computes the singular value decompositions of one or more matrices.
-//
-// Computes the SVD of each inner matrix in `input` such that
-// `input[..., :, :] = u[..., :, :] * diag(s[..., :, :]) * transpose(v[..., :, :])`
-//
-// ```python
-// # a is a tensor containing a batch of matrices.
-// # s is a tensor of singular values for each matrix.
-// # u is the tensor containing of left singular vectors for each matrix.
-// # v is the tensor containing of right singular vectors for each matrix.
-// s, u, v = svd(a)
-// s, _, _ = svd(a, compute_uv=False)
-// ```
-//
-// Arguments:
-//	input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions
-// form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`.
-//
-// Returns Singular values. Shape is `[..., P]`.Left singular vectors. If `full_matrices` is `False` then shape is
-// `[..., M, P]`; if `full_matrices` is `True` then shape is
-// `[..., M, M]`. Undefined if `compute_uv` is `False`.Left singular vectors. If `full_matrices` is `False` then shape is
-// `[..., N, P]`. If `full_matrices` is `True` then shape is `[..., N, N]`.
-// Undefined if `compute_uv` is false.
-func Svd(scope *Scope, input tf.Output, optional ...SvdAttr) (s tf.Output, u tf.Output, v tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Svd",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
+		m["out_type"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Converts one or more images from RGB to HSV.
-//
-// Outputs a tensor of the same shape as the `images` tensor, containing the HSV
-// value of the pixels. The output is only well defined if the value in `images`
-// are in `[0,1]`.
+// Returns the size of a tensor.
 //
-// `output[..., 0]` contains hue, `output[..., 1]` contains saturation, and
-// `output[..., 2]` contains value. All HSV values are in `[0,1]`. A hue of 0
-// corresponds to pure red, hue 1/3 is pure green, and 2/3 is pure blue.
+// This operation returns an integer representing the number of elements in
+// `input`.
 //
-// Arguments:
-//	images: 1-D or higher rank. RGB data to convert. Last dimension must be size 3.
+// For example:
 //
-// Returns `images` converted to HSV.
-func RGBToHSV(scope *Scope, images tf.Output) (output tf.Output) {
+// ```
+// # 't' is [[[1, 1,, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]]
+// size(t) ==> 12
+// ```
+func Size(scope *Scope, input tf.Output, optional ...SizeAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "RGBToHSV",
+		Type: "Size",
 		Input: []tf.Input{
-			images,
+			input,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// MatrixSolveLsAttr is an optional argument to MatrixSolveLs.
-type MatrixSolveLsAttr func(optionalAttr)
+// ResourceScatterNdUpdateAttr is an optional argument to ResourceScatterNdUpdate.
+type ResourceScatterNdUpdateAttr func(optionalAttr)
 
-// MatrixSolveLsFast sets the optional fast attribute to value.
+// ResourceScatterNdUpdateUseLocking sets the optional use_locking attribute to value.
+//
+// value: An optional bool. Defaults to True. If True, the assignment will
+// be protected by a lock; otherwise the behavior is undefined,
+// but may exhibit less contention.
 // If not specified, defaults to true
-func MatrixSolveLsFast(value bool) MatrixSolveLsAttr {
+func ResourceScatterNdUpdateUseLocking(value bool) ResourceScatterNdUpdateAttr {
 	return func(m optionalAttr) {
-		m["fast"] = value
+		m["use_locking"] = value
 	}
 }
 
-// Solves one or more linear least-squares problems.
+// Applies sparse `updates` to individual values or slices within a given
 //
-// `matrix` is a tensor of shape `[..., M, N]` whose inner-most 2 dimensions
-// form real or complex matrices of size `[M, N]`. `Rhs` is a tensor of the same
-// type as `matrix` and shape `[..., M, K]`.
-// The output is a tensor shape `[..., N, K]` where each output matrix solves
-// each of the equations
-// `matrix[..., :, :]` * `output[..., :, :]` = `rhs[..., :, :]`
-// in the least squares sense.
+// variable according to `indices`.
 //
-// We use the following notation for (complex) matrix and right-hand sides
-// in the batch:
+// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
 //
-// `matrix`=\\(A \in \mathbb{C}^{m \times n}\\),
-// `rhs`=\\(B  \in \mathbb{C}^{m \times k}\\),
-// `output`=\\(X  \in \mathbb{C}^{n \times k}\\),
-// `l2_regularizer`=\\(\lambda \in \mathbb{R}\\).
+// `indices` must be integer tensor, containing indices into `ref`.
+// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
 //
-// If `fast` is `True`, then the solution is computed by solving the normal
-// equations using Cholesky decomposition. Specifically, if \\(m \ge n\\) then
-// \\(X = (A^H A + \lambda I)^{-1} A^H B\\), which solves the least-squares
-// problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 +
-// \lambda ||Z||_F^2\\). If \\(m \lt n\\) then `output` is computed as
-// \\(X = A^H (A A^H + \lambda I)^{-1} B\\), which (for \\(\lambda = 0\\)) is the
-// minimum-norm solution to the under-determined linear system, i.e.
-// \\(X = \mathrm{argmin}_{Z \in \mathbb{C}^{n \times k} } ||Z||_F^2 \\),
-// subject to \\(A Z = B\\). Notice that the fast path is only numerically stable
-// when \\(A\\) is numerically full rank and has a condition number
-// \\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach} } }\\) or\\(\lambda\\) is
-// sufficiently large.
+// The innermost dimension of `indices` (with length `K`) corresponds to
+// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
+// dimension of `ref`.
 //
-// If `fast` is `False` an algorithm based on the numerically robust complete
-// orthogonal decomposition is used. This computes the minimum-norm
-// least-squares solution, even when \\(A\\) is rank deficient. This path is
-// typically 6-7 times slower than the fast path. If `fast` is `False` then
-// `l2_regularizer` is ignored.
+// `updates` is `Tensor` of rank `Q-1+P-K` with shape:
 //
-// Arguments:
-//	matrix: Shape is `[..., M, N]`.
-//	rhs: Shape is `[..., M, K]`.
-//	l2_regularizer: Scalar tensor.
+// ```
+// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].
+// ```
 //
-// @compatibility(numpy)
-// Equivalent to np.linalg.lstsq
-// @end_compatibility
+// For example, say we want to update 4 scattered elements to a rank-1 tensor to
+// 8 elements. In Python, that update would look like this:
 //
-// Returns Shape is `[..., N, K]`.
-func MatrixSolveLs(scope *Scope, matrix tf.Output, rhs tf.Output, l2_regularizer tf.Output, optional ...MatrixSolveLsAttr) (output tf.Output) {
+// ```python
+//     ref = tfe.Variable([1, 2, 3, 4, 5, 6, 7, 8])
+//     indices = tf.constant([[4], [3], [1] ,[7]])
+//     updates = tf.constant([9, 10, 11, 12])
+//     update = tf.scatter_nd_update(ref, indices, updates)
+//     with tf.Session() as sess:
+//       print sess.run(update)
+// ```
+//
+// The resulting update to ref would look like this:
+//
+//     [1, 11, 3, 10, 9, 6, 7, 12]
+//
+// See @{tf.scatter_nd} for more details about how to make updates to
+// slices.
+//
+// Arguments:
+//	ref: A resource handle. Must be from a VarHandleOp.
+//	indices: A Tensor. Must be one of the following types: int32, int64.
+// A tensor of indices into ref.
+//	updates: A Tensor. Must have the same type as ref. A tensor of updated
+// values to add to ref.
+//
+// Returns the created operation.
+func ResourceScatterNdUpdate(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdUpdateAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -7900,76 +7701,115 @@ func MatrixSolveLs(scope *Scope, matrix tf.Output, rhs tf.Output, l2_regularizer
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "MatrixSolveLs",
+		Type: "ResourceScatterNdUpdate",
 		Input: []tf.Input{
-			matrix, rhs, l2_regularizer,
+			ref, indices, updates,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// Adjust the saturation of one or more images.
-//
-// `images` is a tensor of at least 3 dimensions.  The last dimension is
-// interpretted as channels, and must be three.
-//
-// The input image is considered in the RGB colorspace. Conceptually, the RGB
-// colors are first mapped into HSV. A scale is then applied all the saturation
-// values, and then remapped back to RGB colorspace.
+// StageSizeAttr is an optional argument to StageSize.
+type StageSizeAttr func(optionalAttr)
+
+// StageSizeCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
 //
-// Arguments:
-//	images: Images to adjust.  At least 3-D.
-//	scale: A float scale to add to the saturation.
+// REQUIRES: value >= 0
+func StageSizeCapacity(value int64) StageSizeAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// StageSizeMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
 //
-// Returns The hue-adjusted image or images.
-func AdjustSaturation(scope *Scope, images tf.Output, scale tf.Output) (output tf.Output) {
+// REQUIRES: value >= 0
+func StageSizeMemoryLimit(value int64) StageSizeAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// StageSizeContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func StageSizeContainer(value string) StageSizeAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// StageSizeSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func StageSizeSharedName(value string) StageSizeAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Op returns the number of elements in the underlying container.
+func StageSize(scope *Scope, dtypes []tf.DataType, optional ...StageSizeAttr) (size tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "AdjustSaturation",
-		Input: []tf.Input{
-			images, scale,
-		},
+		Type: "StageSize",
+
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// SelfAdjointEigV2Attr is an optional argument to SelfAdjointEigV2.
-type SelfAdjointEigV2Attr func(optionalAttr)
+// NonMaxSuppressionAttr is an optional argument to NonMaxSuppression.
+type NonMaxSuppressionAttr func(optionalAttr)
 
-// SelfAdjointEigV2ComputeV sets the optional compute_v attribute to value.
+// NonMaxSuppressionIouThreshold sets the optional iou_threshold attribute to value.
 //
-// value: If `True` then eigenvectors will be computed and returned in `v`.
-// Otherwise, only the eigenvalues will be computed.
-// If not specified, defaults to true
-func SelfAdjointEigV2ComputeV(value bool) SelfAdjointEigV2Attr {
+// value: A float representing the threshold for deciding whether boxes
+// overlap too much with respect to IOU.
+// If not specified, defaults to 0.5
+func NonMaxSuppressionIouThreshold(value float32) NonMaxSuppressionAttr {
 	return func(m optionalAttr) {
-		m["compute_v"] = value
+		m["iou_threshold"] = value
 	}
 }
 
-// Computes the eigen decomposition of one or more square self-adjoint matrices.
-//
-// Computes the eigenvalues and (optionally) eigenvectors of each inner matrix in
-// `input` such that `input[..., :, :] = v[..., :, :] * diag(e[..., :])`.
+// Greedily selects a subset of bounding boxes in descending order of score,
 //
-// ```python
-// # a is a tensor.
-// # e is a tensor of eigenvalues.
-// # v is a tensor of eigenvectors.
-// e, v = self_adjoint_eig(a)
-// e = self_adjoint_eig(a, compute_v=False)
-// ```
+// pruning away boxes that have high intersection-over-union (IOU) overlap
+// with previously selected boxes.  Bounding boxes are supplied as
+// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
+// diagonal pair of box corners and the coordinates can be provided as normalized
+// (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
+// is agnostic to where the origin is in the coordinate system.  Note that this
+// algorithm is invariant to orthogonal transformations and translations
+// of the coordinate system; thus translating or reflections of the coordinate
+// system result in the same boxes being selected by the algorithm.
+// The output of this operation is a set of integers indexing into the input
+// collection of bounding boxes representing the selected boxes.  The bounding
+// box coordinates corresponding to the selected indices can then be obtained
+// using the `tf.gather operation`.  For example:
+//   selected_indices = tf.image.non_max_suppression(
+//       boxes, scores, max_output_size, iou_threshold)
+//   selected_boxes = tf.gather(boxes, selected_indices)
 //
 // Arguments:
-//	input: `Tensor` input of shape `[N, N]`.
+//	boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
+//	scores: A 1-D float tensor of shape `[num_boxes]` representing a single
+// score corresponding to each box (each row of boxes).
+//	max_output_size: A scalar integer tensor representing the maximum number of
+// boxes to be selected by non max suppression.
 //
-// Returns Eigenvalues. Shape is `[N]`.Eigenvectors. Shape is `[N, N]`.
-func SelfAdjointEigV2(scope *Scope, input tf.Output, optional ...SelfAdjointEigV2Attr) (e tf.Output, v tf.Output) {
+// Returns A 1-D integer tensor of shape `[M]` representing the selected
+// indices from the boxes tensor, where `M <= max_output_size`.
+func NonMaxSuppression(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, optional ...NonMaxSuppressionAttr) (selected_indices tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -7978,301 +7818,381 @@ func SelfAdjointEigV2(scope *Scope, input tf.Output, optional ...SelfAdjointEigV
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "SelfAdjointEigV2",
+		Type: "NonMaxSuppression",
 		Input: []tf.Input{
-			input,
+			boxes, scores, max_output_size,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
-// Computes the Eigen Decomposition of a batch of square self-adjoint matrices.
-//
-// DEPRECATED at GraphDef version 11: Use SelfAdjointEigV2 instead.
+// Creates a dataset that emits `components` as a tuple of tensors once.
+func TensorDataset(scope *Scope, components []tf.Output, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "TensorDataset",
+		Input: []tf.Input{
+			tf.OutputList(components),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Component-wise multiplies a SparseTensor by a dense Tensor.
 //
-// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-// form square matrices, with the same constraints as the single matrix
-// SelfAdjointEig.
+// The output locations corresponding to the implicitly zero elements in the sparse
+// tensor will be zero (i.e., will not take up storage space), regardless of the
+// contents of the dense tensor (even if it's +/-INF and that INF*0 == NaN).
 //
-// The result is a [..., M+1, M] matrix with [..., 0,:] containing the
-// eigenvalues, and subsequent [...,1:, :] containing the eigenvectors.
+// *Limitation*: this Op only broadcasts the dense side to the sparse side, but not
+// the other direction.
 //
 // Arguments:
-//	input: Shape is `[..., M, M]`.
-//
-// Returns Shape is `[..., M+1, M]`.
-func SelfAdjointEig(scope *Scope, input tf.Output) (output tf.Output) {
+//	sp_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, possibly not in canonical ordering.
+//	sp_values: 1-D.  `N` non-empty values corresponding to `sp_indices`.
+//	sp_shape: 1-D.  Shape of the input SparseTensor.
+//	dense: `R`-D.  The dense Tensor operand.
+//
+// Returns 1-D.  The `N` values that are operated on.
+func SparseDenseCwiseMul(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "SelfAdjointEig",
+		Type: "SparseDenseCwiseMul",
 		Input: []tf.Input{
-			input,
+			sp_indices, sp_values, sp_shape, dense,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Writes contents to the file at input filename. Creates file and recursively
+// ResourceSparseApplyFtrlAttr is an optional argument to ResourceSparseApplyFtrl.
+type ResourceSparseApplyFtrlAttr func(optionalAttr)
+
+// ResourceSparseApplyFtrlUseLocking sets the optional use_locking attribute to value.
 //
-// creates directory if not existing.
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceSparseApplyFtrlUseLocking(value bool) ResourceSparseApplyFtrlAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update relevant entries in '*var' according to the Ftrl-proximal scheme.
+//
+// That is for rows we have grad for, we update var, accum and linear as follows:
+// accum_new = accum + grad * grad
+// linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
+// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
+// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
+// accum = accum_new
 //
 // Arguments:
-//	filename: scalar. The name of the file to which we write the contents.
-//	contents: scalar. The content to be written to the output file.
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	linear: Should be from a Variable().
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
+//	lr: Scaling factor. Must be a scalar.
+//	l1: L1 regularization. Must be a scalar.
+//	l2: L2 regularization. Must be a scalar.
+//	lr_power: Scaling factor. Must be a scalar.
 //
 // Returns the created operation.
-func WriteFile(scope *Scope, filename tf.Output, contents tf.Output) (o *tf.Operation) {
+func ResourceSparseApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "WriteFile",
+		Type: "ResourceSparseApplyFtrl",
 		Input: []tf.Input{
-			filename, contents,
+			var_, accum, linear, grad, indices, lr, l1, l2, lr_power,
 		},
+		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
-// Computes the Cholesky decomposition of one or more square matrices.
-//
-// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-// form square matrices.
-//
-// The input has to be symmetric and positive definite. Only the lower-triangular
-// part of the input will be used for this operation. The upper-triangular part
-// will not be read.
-//
-// The output is a tensor of the same shape as the input
-// containing the Cholesky decompositions for all input submatrices `[..., :, :]`.
-//
-// **Note**: The gradient computation on GPU is faster for large matrices but
-// not for large batch dimensions when the submatrices are small. In this
-// case it might be faster to use the CPU.
-//
-// Arguments:
-//	input: Shape is `[..., M, M]`.
+// Returns which elements of x are Inf.
 //
-// Returns Shape is `[..., M, M]`.
-func Cholesky(scope *Scope, input tf.Output) (output tf.Output) {
+// @compatibility(numpy)
+// Equivalent to np.isinf
+// @end_compatibility
+func IsInf(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Cholesky",
+		Type: "IsInf",
 		Input: []tf.Input{
-			input,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes the matrix exponential of one or more square matrices:
+// ResourceSparseApplyRMSPropAttr is an optional argument to ResourceSparseApplyRMSProp.
+type ResourceSparseApplyRMSPropAttr func(optionalAttr)
+
+// ResourceSparseApplyRMSPropUseLocking sets the optional use_locking attribute to value.
 //
-// exp(A) = \sum_{n=0}^\infty A^n/n!
+// value: If `True`, updating of the var, ms, and mom tensors is protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceSparseApplyRMSPropUseLocking(value bool) ResourceSparseApplyRMSPropAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the RMSProp algorithm.
 //
-// The exponential is computed using a combination of the scaling and squaring
-// method and the Pade approximation. Details can be founds in:
-// Nicholas J. Higham, "The scaling and squaring method for the matrix exponential
-// revisited," SIAM J. Matrix Anal. Applic., 26:1179-1193, 2005.
+// Note that in dense implementation of this algorithm, ms and mom will
+// update even if the grad is zero, but in this sparse implementation, ms
+// and mom will not update in iterations during which the grad is zero.
 //
-// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-// form square matrices. The output is a tensor of the same shape as the input
-// containing the exponential for all input submatrices `[..., :, :]`.
+// mean_square = decay * mean_square + (1-decay) * gradient ** 2
+// Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
+//
+// ms <- rho * ms_{t-1} + (1-rho) * grad * grad
+// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
+// var <- var - mom
 //
 // Arguments:
-//	input: Shape is `[..., M, M]`.
+//	var_: Should be from a Variable().
+//	ms: Should be from a Variable().
+//	mom: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	rho: Decay rate. Must be a scalar.
 //
-// Returns Shape is `[..., M, M]`.
+//	epsilon: Ridge term. Must be a scalar.
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var, ms and mom.
 //
-// @compatibility(scipy)
-// Equivalent to scipy.linalg.expm
-// @end_compatibility
-func MatrixExponential(scope *Scope, input tf.Output) (output tf.Output) {
+// Returns the created operation.
+func ResourceSparseApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyRMSPropAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "MatrixExponential",
+		Type: "ResourceSparseApplyRMSProp",
 		Input: []tf.Input{
-			input,
+			var_, ms, mom, lr, rho, momentum, epsilon, grad, indices,
 		},
+		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// Merges summaries.
-//
-// This op creates a
-// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
-// protocol buffer that contains the union of all the values in the input
-// summaries.
-//
-// When the Op is run, it reports an `InvalidArgument` error if multiple values
-// in the summaries to merge use the same tag.
-//
-// Arguments:
-//	inputs: Can be of any shape.  Each must contain serialized `Summary` protocol
-// buffers.
+// Returns the truth value of (x > y) element-wise.
 //
-// Returns Scalar. Serialized `Summary` protocol buffer.
-func MergeSummary(scope *Scope, inputs []tf.Output) (summary tf.Output) {
+// *NOTE*: `Greater` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Greater(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "MergeSummary",
+		Type: "Greater",
 		Input: []tf.Input{
-			tf.OutputList(inputs),
+			x, y,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// AudioSummaryV2Attr is an optional argument to AudioSummaryV2.
-type AudioSummaryV2Attr func(optionalAttr)
+// SampleDistortedBoundingBoxAttr is an optional argument to SampleDistortedBoundingBox.
+type SampleDistortedBoundingBoxAttr func(optionalAttr)
 
-// AudioSummaryV2MaxOutputs sets the optional max_outputs attribute to value.
-//
-// value: Max number of batch elements to generate audio for.
-// If not specified, defaults to 3
+// SampleDistortedBoundingBoxSeed sets the optional seed attribute to value.
 //
-// REQUIRES: value >= 1
-func AudioSummaryV2MaxOutputs(value int64) AudioSummaryV2Attr {
+// value: If either `seed` or `seed2` are set to non-zero, the random number
+// generator is seeded by the given `seed`.  Otherwise, it is seeded by a random
+// seed.
+// If not specified, defaults to 0
+func SampleDistortedBoundingBoxSeed(value int64) SampleDistortedBoundingBoxAttr {
 	return func(m optionalAttr) {
-		m["max_outputs"] = value
+		m["seed"] = value
 	}
 }
 
-// Outputs a `Summary` protocol buffer with audio.
-//
-// The summary has up to `max_outputs` summary values containing audio. The
-// audio is built from `tensor` which must be 3-D with shape `[batch_size,
-// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are
-// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`.
-//
-// The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
-// build the `tag` of the summary values:
+// SampleDistortedBoundingBoxSeed2 sets the optional seed2 attribute to value.
 //
-// *  If `max_outputs` is 1, the summary value tag is '*tag*/audio'.
-// *  If `max_outputs` is greater than 1, the summary value tags are
-//    generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc.
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func SampleDistortedBoundingBoxSeed2(value int64) SampleDistortedBoundingBoxAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxMinObjectCovered sets the optional min_object_covered attribute to value.
 //
-// Arguments:
-//	tag: Scalar. Used to build the `tag` attribute of the summary values.
-//	tensor: 2-D of shape `[batch_size, frames]`.
-//	sample_rate: The sample rate of the signal in hertz.
+// value: The cropped area of the image must contain at least this
+// fraction of any bounding box supplied. The value of this parameter should be
+// non-negative. In the case of 0, the cropped area does not need to overlap
+// any of the bounding boxes supplied.
+// If not specified, defaults to 0.1
+func SampleDistortedBoundingBoxMinObjectCovered(value float32) SampleDistortedBoundingBoxAttr {
+	return func(m optionalAttr) {
+		m["min_object_covered"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxAspectRatioRange sets the optional aspect_ratio_range attribute to value.
 //
-// Returns Scalar. Serialized `Summary` protocol buffer.
-func AudioSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...AudioSummaryV2Attr) (summary tf.Output) {
-	if scope.Err() != nil {
-		return
+// value: The cropped area of the image must have an aspect ratio =
+// width / height within this range.
+// If not specified, defaults to <f:0.75 f:1.33 >
+func SampleDistortedBoundingBoxAspectRatioRange(value []float32) SampleDistortedBoundingBoxAttr {
+	return func(m optionalAttr) {
+		m["aspect_ratio_range"] = value
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
+}
+
+// SampleDistortedBoundingBoxAreaRange sets the optional area_range attribute to value.
+//
+// value: The cropped area of the image must contain a fraction of the
+// supplied image within in this range.
+// If not specified, defaults to <f:0.05 f:1 >
+func SampleDistortedBoundingBoxAreaRange(value []float32) SampleDistortedBoundingBoxAttr {
+	return func(m optionalAttr) {
+		m["area_range"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "AudioSummaryV2",
-		Input: []tf.Input{
-			tag, tensor, sample_rate,
-		},
-		Attrs: attrs,
+}
+
+// SampleDistortedBoundingBoxMaxAttempts sets the optional max_attempts attribute to value.
+//
+// value: Number of attempts at generating a cropped region of the image
+// of the specified constraints. After `max_attempts` failures, return the entire
+// image.
+// If not specified, defaults to 100
+func SampleDistortedBoundingBoxMaxAttempts(value int64) SampleDistortedBoundingBoxAttr {
+	return func(m optionalAttr) {
+		m["max_attempts"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Conv3DBackpropInputV2Attr is an optional argument to Conv3DBackpropInputV2.
-type Conv3DBackpropInputV2Attr func(optionalAttr)
-
-// Conv3DBackpropInputV2DataFormat sets the optional data_format attribute to value.
+// SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes sets the optional use_image_if_no_bounding_boxes attribute to value.
 //
-// value: The data format of the input and output data. With the
-// default format "NDHWC", the data is stored in the order of:
-//     [batch, in_depth, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCDHW", the data storage order is:
-//     [batch, in_channels, in_depth, in_height, in_width].
-// If not specified, defaults to "NDHWC"
-func Conv3DBackpropInputV2DataFormat(value string) Conv3DBackpropInputV2Attr {
+// value: Controls behavior if no bounding boxes supplied.
+// If true, assume an implicit bounding box covering the whole input. If false,
+// raise an error.
+// If not specified, defaults to false
+func SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes(value bool) SampleDistortedBoundingBoxAttr {
 	return func(m optionalAttr) {
-		m["data_format"] = value
+		m["use_image_if_no_bounding_boxes"] = value
 	}
 }
 
-// Computes the gradients of 3-D convolution with respect to the input.
+// Generate a single randomly distorted bounding box for an image.
+//
+// Bounding box annotations are often supplied in addition to ground-truth labels
+// in image recognition or object localization tasks. A common technique for
+// training such a system is to randomly distort an image while preserving
+// its content, i.e. *data augmentation*. This Op outputs a randomly distorted
+// localization of an object, i.e. bounding box, given an `image_size`,
+// `bounding_boxes` and a series of constraints.
+//
+// The output of this Op is a single bounding box that may be used to crop the
+// original image. The output is returned as 3 tensors: `begin`, `size` and
+// `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
+// image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize
+// what the bounding box looks like.
+//
+// Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The
+// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
+// height of the underlying image.
+//
+// For example,
+//
+// ```python
+//     # Generate a single distorted bounding box.
+//     begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
+//         tf.shape(image),
+//         bounding_boxes=bounding_boxes)
+//
+//     # Draw the bounding box in an image summary.
+//     image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
+//                                                   bbox_for_draw)
+//     tf.summary.image('images_with_box', image_with_box)
+//
+//     # Employ the bounding box to distort the image.
+//     distorted_image = tf.slice(image, begin, size)
+// ```
+//
+// Note that if no bounding box information is available, setting
+// `use_image_if_no_bounding_boxes = true` will assume there is a single implicit
+// bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
+// false and no bounding boxes are supplied, an error is raised.
 //
 // Arguments:
-//	input_sizes: An integer vector representing the tensor shape of `input`,
-// where `input` is a 5-D
-// `[batch, depth, rows, cols, in_channels]` tensor.
-//	filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
-// `in_channels` must match between `input` and `filter`.
-//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
-// out_channels]`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
-func Conv3DBackpropInputV2(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropInputV2Attr) (output tf.Output) {
+//	image_size: 1-D, containing `[height, width, channels]`.
+//	bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes
+// associated with the image.
+//
+// Returns 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to
+// `tf.slice`.1-D, containing `[target_height, target_width, -1]`. Provide as input to
+// `tf.slice`.3-D with shape `[1, 1, 4]` containing the distorted bounding box.
+// Provide as input to `tf.image.draw_bounding_boxes`.
+func SampleDistortedBoundingBox(scope *Scope, image_size tf.Output, bounding_boxes tf.Output, optional ...SampleDistortedBoundingBoxAttr) (begin tf.Output, size tf.Output, bboxes tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Conv3DBackpropInputV2",
+		Type: "SampleDistortedBoundingBox",
 		Input: []tf.Input{
-			input_sizes, filter, out_backprop,
+			image_size, bounding_boxes,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns a tensor of ones with the same shape and type as x.
-//
-// Arguments:
-//	x: a tensor of type T.
-//
-// Returns a tensor of the same shape and type as x but filled with ones.
-func OnesLike(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "OnesLike",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Returns element-wise remainder of division. This emulates C semantics in that
+// Returns x / y element-wise for integer types.
 //
-// the result here is consistent with a truncating divide. E.g.
-// `tf.truncatediv(x, y) * y + truncate_mod(x, y) = x`.
+// Truncation designates that negative numbers will round fractional quantities
+// toward zero. I.e. -7 / 5 = -1. This matches C semantics but it is different
+// than Python semantics. See `FloorDiv` for a division function that matches
+// Python Semantics.
 //
-// *NOTE*: `Mod` supports broadcasting. More about broadcasting
+// *NOTE*: `TruncateDiv` supports broadcasting. More about broadcasting
 // [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Mod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+func TruncateDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Mod",
+		Type: "TruncateDiv",
 		Input: []tf.Input{
 			x, y,
 		},
@@ -8281,267 +8201,249 @@ func Mod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	return op.Output(0)
 }
 
-// Computes the gradients of 3-D convolution with respect to the filter.
+// Restores tensors from a V2 checkpoint.
 //
-// DEPRECATED at GraphDef version 10: Use Conv3DBackpropFilterV2
+// For backward compatibility with the V1 format, this Op currently allows
+// restoring from a V1 checkpoint as well:
+//   - This Op first attempts to find the V2 index file pointed to by "prefix", and
+//     if found proceed to read it as a V2 checkpoint;
+//   - Otherwise the V1 read path is invoked.
+// Relying on this behavior is not recommended, as the ability to fall back to read
+// V1 might be deprecated and eventually removed.
+//
+// By default, restores the named tensors in full.  If the caller wishes to restore
+// specific slices of stored tensors, "shape_and_slices" should be non-empty
+// strings and correspondingly well-formed.
+//
+// Callers must ensure all the named tensors are indeed stored in the checkpoint.
 //
 // Arguments:
-//	input: Shape `[batch, depth, rows, cols, in_channels]`.
-//	filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
-// `in_channels` must match between `input` and `filter`.
-//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
-// out_channels]`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
-func Conv3DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string) (output tf.Output) {
+//	prefix: Must have a single element.  The prefix of a V2 checkpoint.
+//	tensor_names: shape {N}.  The names of the tensors to be restored.
+//	shape_and_slices: shape {N}.  The slice specs of the tensors to be restored.
+// Empty strings indicate that they are non-partitioned tensors.
+//	dtypes: shape {N}.  The list of expected dtype for the tensors.  Must match
+// those stored in the checkpoint.
+//
+// Returns shape {N}.  The restored tensors, whose shapes are read from the
+// checkpoint directly.
+func RestoreV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and_slices tf.Output, dtypes []tf.DataType) (tensors []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	attrs := map[string]interface{}{"dtypes": dtypes}
 	opspec := tf.OpSpec{
-		Type: "Conv3DBackpropFilter",
+		Type: "RestoreV2",
 		Input: []tf.Input{
-			input, filter, out_backprop,
+			prefix, tensor_names, shape_and_slices,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if tensors, idx, err = makeOutputList(op, idx, "tensors"); err != nil {
+		scope.UpdateErr("RestoreV2", err)
+		return
+	}
+	return tensors
 }
 
-// Computes the gradients of 3-D convolution with respect to the input.
+// Decode web-safe base64-encoded strings.
 //
-// DEPRECATED at GraphDef version 10: Use Conv3DBackpropInputV2
+// Input may or may not have padding at the end. See EncodeBase64 for padding.
+// Web-safe means that input must use - and _ instead of + and /.
 //
 // Arguments:
-//	input: Shape `[batch, depth, rows, cols, in_channels]`.
-//	filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
-// `in_channels` must match between `input` and `filter`.
-//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
-// out_channels]`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
-func Conv3DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string) (output tf.Output) {
+//	input: Base64 strings to decode.
+//
+// Returns Decoded strings.
+func DecodeBase64(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
 	opspec := tf.OpSpec{
-		Type: "Conv3DBackpropInput",
+		Type: "DecodeBase64",
 		Input: []tf.Input{
-			input, filter, out_backprop,
+			input,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// ReverseSequenceAttr is an optional argument to ReverseSequence.
-type ReverseSequenceAttr func(optionalAttr)
-
-// ReverseSequenceBatchDim sets the optional batch_dim attribute to value.
-//
-// value: The dimension along which reversal is performed.
-// If not specified, defaults to 0
-func ReverseSequenceBatchDim(value int64) ReverseSequenceAttr {
-	return func(m optionalAttr) {
-		m["batch_dim"] = value
-	}
-}
-
-// Reverses variable length slices.
-//
-// This op first slices `input` along the dimension `batch_dim`, and for each
-// slice `i`, reverses the first `seq_lengths[i]` elements along
-// the dimension `seq_dim`.
-//
-// The elements of `seq_lengths` must obey `seq_lengths[i] <= input.dims[seq_dim]`,
-// and `seq_lengths` must be a vector of length `input.dims[batch_dim]`.
-//
-// The output slice `i` along dimension `batch_dim` is then given by input
-// slice `i`, with the first `seq_lengths[i]` slices along dimension
-// `seq_dim` reversed.
-//
-// For example:
-//
-// ```
-// # Given this:
-// batch_dim = 0
-// seq_dim = 1
-// input.dims = (4, 8, ...)
-// seq_lengths = [7, 2, 3, 5]
-//
-// # then slices of input are reversed on seq_dim, but only up to seq_lengths:
-// output[0, 0:7, :, ...] = input[0, 7:0:-1, :, ...]
-// output[1, 0:2, :, ...] = input[1, 2:0:-1, :, ...]
-// output[2, 0:3, :, ...] = input[2, 3:0:-1, :, ...]
-// output[3, 0:5, :, ...] = input[3, 5:0:-1, :, ...]
-//
-// # while entries past seq_lens are copied through:
-// output[0, 7:, :, ...] = input[0, 7:, :, ...]
-// output[1, 2:, :, ...] = input[1, 2:, :, ...]
-// output[2, 3:, :, ...] = input[2, 3:, :, ...]
-// output[3, 2:, :, ...] = input[3, 2:, :, ...]
-// ```
+// Store the input tensor in the state of the current session.
 //
-// In contrast, if:
+// Arguments:
+//	value: The tensor to be stored.
 //
-// ```
-// # Given this:
-// batch_dim = 2
-// seq_dim = 0
-// input.dims = (8, ?, 4, ...)
-// seq_lengths = [7, 2, 3, 5]
+// Returns The handle for the tensor stored in the session state, represented
+// as a string.
+func GetSessionHandle(scope *Scope, value tf.Output) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "GetSessionHandle",
+		Input: []tf.Input{
+			value,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceSparseApplyProximalAdagradAttr is an optional argument to ResourceSparseApplyProximalAdagrad.
+type ResourceSparseApplyProximalAdagradAttr func(optionalAttr)
+
+// ResourceSparseApplyProximalAdagradUseLocking sets the optional use_locking attribute to value.
 //
-// # then slices of input are reversed on seq_dim, but only up to seq_lengths:
-// output[0:7, :, 0, :, ...] = input[7:0:-1, :, 0, :, ...]
-// output[0:2, :, 1, :, ...] = input[2:0:-1, :, 1, :, ...]
-// output[0:3, :, 2, :, ...] = input[3:0:-1, :, 2, :, ...]
-// output[0:5, :, 3, :, ...] = input[5:0:-1, :, 3, :, ...]
+// value: If True, updating of the var and accum tensors will be protected by
+// a lock; otherwise the behavior is undefined, but may exhibit less contention.
+// If not specified, defaults to false
+func ResourceSparseApplyProximalAdagradUseLocking(value bool) ResourceSparseApplyProximalAdagradAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Sparse update entries in '*var' and '*accum' according to FOBOS algorithm.
 //
-// # while entries past seq_lens are copied through:
-// output[7:, :, 0, :, ...] = input[7:, :, 0, :, ...]
-// output[2:, :, 1, :, ...] = input[2:, :, 1, :, ...]
-// output[3:, :, 2, :, ...] = input[3:, :, 2, :, ...]
-// output[2:, :, 3, :, ...] = input[2:, :, 3, :, ...]
-// ```
+// That is for rows we have grad for, we update var and accum as follows:
+// accum += grad * grad
+// prox_v = var
+// prox_v -= lr * grad * (1 / sqrt(accum))
+// var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
 //
 // Arguments:
-//	input: The input to reverse.
-//	seq_lengths: 1-D with length `input.dims(batch_dim)` and
-// `max(seq_lengths) <= input.dims(seq_dim)`
-//	seq_dim: The dimension which is partially reversed.
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Learning rate. Must be a scalar.
+//	l1: L1 regularization. Must be a scalar.
+//	l2: L2 regularization. Must be a scalar.
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
 //
-// Returns The partially reversed input. It has the same shape as `input`.
-func ReverseSequence(scope *Scope, input tf.Output, seq_lengths tf.Output, seq_dim int64, optional ...ReverseSequenceAttr) (output tf.Output) {
+// Returns the created operation.
+func ResourceSparseApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyProximalAdagradAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"seq_dim": seq_dim}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ReverseSequence",
+		Type: "ResourceSparseApplyProximalAdagrad",
 		Input: []tf.Input{
-			input, seq_lengths,
+			var_, accum, lr, l1, l2, grad, indices,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// Computes the gradient for the rsqrt of `x` wrt its input.
-//
-// Specifically, `grad = dy * -0.5 * y^3`, where `y = rsqrt(x)`, and `dy`
-// is the corresponding input gradient.
-func RsqrtGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
+// Returns element-wise largest integer not greater than x.
+func Floor(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "RsqrtGrad",
+		Type: "Floor",
 		Input: []tf.Input{
-			y, dy,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Makes its input available to the next iteration.
-//
-// Arguments:
-//	data: The tensor to be made available to the next iteration.
-//
-// Returns The same tensor as `data`.
-func NextIteration(scope *Scope, data tf.Output) (output tf.Output) {
+// Computes the Gauss error function of `x` element-wise.
+func Erf(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "NextIteration",
+		Type: "Erf",
 		Input: []tf.Input{
-			data,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Does nothing. Only useful as a placeholder for control edges.
+// Reads the value of a variable.
 //
-// Returns the created operation.
-func NoOp(scope *Scope) (o *tf.Operation) {
+// The tensor returned by this operation is immutable.
+//
+// The value returned by this operation is guaranteed to be influenced by all the
+// writes on which this operation depends directly or indirectly, and to not be
+// influenced by any of the writes which depend directly or indirectly on this
+// operation.
+//
+// Arguments:
+//	resource: handle to the resource in which to store the variable.
+//	dtype: the dtype of the value.
+func ReadVariableOp(scope *Scope, resource tf.Output, dtype tf.DataType) (value tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"dtype": dtype}
 	opspec := tf.OpSpec{
-		Type: "NoOp",
+		Type: "ReadVariableOp",
+		Input: []tf.Input{
+			resource,
+		},
+		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// DepthwiseConv2dNativeAttr is an optional argument to DepthwiseConv2dNative.
-type DepthwiseConv2dNativeAttr func(optionalAttr)
+// MaxPool3DGradAttr is an optional argument to MaxPool3DGrad.
+type MaxPool3DGradAttr func(optionalAttr)
 
-// DepthwiseConv2dNativeDataFormat sets the optional data_format attribute to value.
+// MaxPool3DGradDataFormat sets the optional data_format attribute to value.
 //
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, height, width, channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, channels, height, width].
-// If not specified, defaults to "NHWC"
-func DepthwiseConv2dNativeDataFormat(value string) DepthwiseConv2dNativeAttr {
+// value: The data format of the input and output data. With the
+// default format "NDHWC", the data is stored in the order of:
+//     [batch, in_depth, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCDHW", the data storage order is:
+//     [batch, in_channels, in_depth, in_height, in_width].
+// If not specified, defaults to "NDHWC"
+func MaxPool3DGradDataFormat(value string) MaxPool3DGradAttr {
 	return func(m optionalAttr) {
 		m["data_format"] = value
 	}
 }
 
-// Computes a 2-D depthwise convolution given 4-D `input` and `filter` tensors.
-//
-// Given an input tensor of shape `[batch, in_height, in_width, in_channels]`
-// and a filter / kernel tensor of shape
-// `[filter_height, filter_width, in_channels, channel_multiplier]`, containing
-// `in_channels` convolutional filters of depth 1, `depthwise_conv2d` applies
-// a different filter to each input channel (expanding from 1 channel to
-// `channel_multiplier` channels for each), then concatenates the results
-// together. Thus, the output has `in_channels * channel_multiplier` channels.
-//
-// ```
-// for k in 0..in_channels-1
-//   for q in 0..channel_multiplier-1
-//     output[b, i, j, k * channel_multiplier + q] =
-//       sum_{di, dj} input[b, strides[1] * i + di, strides[2] * j + dj, k] *
-//                         filter[di, dj, k, q]
-// ```
-//
-// Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
-// horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
+// Computes gradients of max pooling function.
 //
 // Arguments:
-//
-//
-//	strides: 1-D of length 4.  The stride of the sliding window for each dimension
-// of `input`.
+//	orig_input: The original input tensor.
+//	orig_output: The original output tensor.
+//	grad: Output backprop of shape `[batch, depth, rows, cols, channels]`.
+//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
+// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
 //	padding: The type of padding algorithm to use.
-func DepthwiseConv2dNative(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeAttr) (output tf.Output) {
+func MaxPool3DGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DGradAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "DepthwiseConv2dNative",
+		Type: "MaxPool3DGrad",
 		Input: []tf.Input{
-			input, filter,
+			orig_input, orig_output, grad,
 		},
 		Attrs: attrs,
 	}
@@ -8549,66 +8451,43 @@ func DepthwiseConv2dNative(scope *Scope, input tf.Output, filter tf.Output, stri
 	return op.Output(0)
 }
 
-// CropAndResizeAttr is an optional argument to CropAndResize.
-type CropAndResizeAttr func(optionalAttr)
+// SparseReduceSumAttr is an optional argument to SparseReduceSum.
+type SparseReduceSumAttr func(optionalAttr)
 
-// CropAndResizeMethod sets the optional method attribute to value.
+// SparseReduceSumKeepDims sets the optional keep_dims attribute to value.
 //
-// value: A string specifying the interpolation method. Only 'bilinear' is
-// supported for now.
-// If not specified, defaults to "bilinear"
-func CropAndResizeMethod(value string) CropAndResizeAttr {
+// value: If true, retain reduced dimensions with length 1.
+// If not specified, defaults to false
+func SparseReduceSumKeepDims(value bool) SparseReduceSumAttr {
 	return func(m optionalAttr) {
-		m["method"] = value
+		m["keep_dims"] = value
 	}
 }
 
-// CropAndResizeExtrapolationValue sets the optional extrapolation_value attribute to value.
+// Computes the sum of elements across dimensions of a SparseTensor.
 //
-// value: Value used for extrapolation, when applicable.
-// If not specified, defaults to 0
-func CropAndResizeExtrapolationValue(value float32) CropAndResizeAttr {
-	return func(m optionalAttr) {
-		m["extrapolation_value"] = value
-	}
-}
-
-// Extracts crops from the input image tensor and bilinearly resizes them (possibly
+// This Op takes a SparseTensor and is the sparse counterpart to
+// `tf.reduce_sum()`.  In particular, this Op also returns a dense `Tensor`
+// instead of a sparse one.
 //
-// with aspect ratio change) to a common output size specified by `crop_size`. This
-// is more general than the `crop_to_bounding_box` op which extracts a fixed size
-// slice from the input image and does not allow resizing or aspect ratio change.
+// Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
+// with length 1.
 //
-// Returns a tensor with `crops` from the input `image` at positions defined at the
-// bounding box locations in `boxes`. The cropped boxes are all resized (with
-// bilinear interpolation) to a fixed `size = [crop_height, crop_width]`. The
-// result is a 4-D tensor `[num_boxes, crop_height, crop_width, depth]`. The
-// resizing is corner aligned. In particular, if `boxes = [[0, 0, 1, 1]]`, the
-// method will give identical results to using `tf.image.resize_bilinear()`
-// with `align_corners=True`.
+// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
+// with a single element is returned.  Additionally, the axes can be negative,
+// which are interpreted according to the indexing rules in Python.
 //
 // Arguments:
-//	image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
-// Both `image_height` and `image_width` need to be positive.
-//	boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
-// specifies the coordinates of a box in the `box_ind[i]` image and is specified
-// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
-// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
-// `[0, 1]` interval of normalized image height is mapped to
-// `[0, image_height - 1]` in image height coordinates. We do allow `y1` > `y2`, in
-// which case the sampled crop is an up-down flipped version of the original
-// image. The width dimension is treated similarly. Normalized coordinates
-// outside the `[0, 1]` range are allowed, in which case we use
-// `extrapolation_value` to extrapolate the input image values.
-//	box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
-// The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
-//	crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`. All
-// cropped image patches are resized to this size. The aspect ratio of the image
-// content is not preserved. Both `crop_height` and `crop_width` need to be
-// positive.
+//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, possibly not in canonical ordering.
+//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
+//	input_shape: 1-D.  Shape of the input SparseTensor.
+//	reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
 //
-// Returns A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
-func CropAndResize(scope *Scope, image tf.Output, boxes tf.Output, box_ind tf.Output, crop_size tf.Output, optional ...CropAndResizeAttr) (crops tf.Output) {
+// Returns `R-K`-D.  The reduced Tensor.
+func SparseReduceSum(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceSumAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -8617,9 +8496,9 @@ func CropAndResize(scope *Scope, image tf.Output, boxes tf.Output, box_ind tf.Ou
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "CropAndResize",
+		Type: "SparseReduceSum",
 		Input: []tf.Input{
-			image, boxes, box_ind, crop_size,
+			input_indices, input_values, input_shape, reduction_axes,
 		},
 		Attrs: attrs,
 	}
@@ -8627,283 +8506,240 @@ func CropAndResize(scope *Scope, image tf.Output, boxes tf.Output, box_ind tf.Ou
 	return op.Output(0)
 }
 
-// MaxPoolGradAttr is an optional argument to MaxPoolGrad.
-type MaxPoolGradAttr func(optionalAttr)
+// ResourceApplyAdagradAttr is an optional argument to ResourceApplyAdagrad.
+type ResourceApplyAdagradAttr func(optionalAttr)
 
-// MaxPoolGradDataFormat sets the optional data_format attribute to value.
+// ResourceApplyAdagradUseLocking sets the optional use_locking attribute to value.
 //
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func MaxPoolGradDataFormat(value string) MaxPoolGradAttr {
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyAdagradUseLocking(value bool) ResourceApplyAdagradAttr {
 	return func(m optionalAttr) {
-		m["data_format"] = value
+		m["use_locking"] = value
 	}
 }
 
-// Computes gradients of the maxpooling function.
+// Update '*var' according to the adagrad scheme.
+//
+// accum += grad * grad
+// var -= lr * grad * (1 / sqrt(accum))
 //
 // Arguments:
-//	orig_input: The original input tensor.
-//	orig_output: The original output tensor.
-//	grad: 4-D.  Gradients w.r.t. the output of `max_pool`.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
-//	padding: The type of padding algorithm to use.
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	grad: The gradient.
 //
-// Returns Gradients w.r.t. the input to `max_pool`.
-func MaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradAttr) (output tf.Output) {
+// Returns the created operation.
+func ResourceApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, optional ...ResourceApplyAdagradAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "MaxPoolGrad",
+		Type: "ResourceApplyAdagrad",
 		Input: []tf.Input{
-			orig_input, orig_output, grad,
+			var_, accum, lr, grad,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// EncodeJpegAttr is an optional argument to EncodeJpeg.
-type EncodeJpegAttr func(optionalAttr)
-
-// EncodeJpegFormat sets the optional format attribute to value.
+// Returns element-wise remainder of division. This emulates C semantics in that
 //
-// value: Per pixel image format.
-// If not specified, defaults to ""
-func EncodeJpegFormat(value string) EncodeJpegAttr {
-	return func(m optionalAttr) {
-		m["format"] = value
-	}
-}
-
-// EncodeJpegQuality sets the optional quality attribute to value.
+// the result here is consistent with a truncating divide. E.g. `truncate(x / y) *
+// y + truncate_mod(x, y) = x`.
 //
-// value: Quality of the compression from 0 to 100 (higher is better and slower).
-// If not specified, defaults to 95
-func EncodeJpegQuality(value int64) EncodeJpegAttr {
-	return func(m optionalAttr) {
-		m["quality"] = value
+// *NOTE*: `TruncateMod` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func TruncateMod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
-}
-
-// EncodeJpegProgressive sets the optional progressive attribute to value.
-//
-// value: If True, create a JPEG that loads progressively (coarse to fine).
-// If not specified, defaults to false
-func EncodeJpegProgressive(value bool) EncodeJpegAttr {
-	return func(m optionalAttr) {
-		m["progressive"] = value
+	opspec := tf.OpSpec{
+		Type: "TruncateMod",
+		Input: []tf.Input{
+			x, y,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// EncodeJpegOptimizeSize sets the optional optimize_size attribute to value.
+// Inverse 2D real-valued fast Fourier transform.
 //
-// value: If True, spend CPU/RAM to reduce size with no quality change.
-// If not specified, defaults to false
-func EncodeJpegOptimizeSize(value bool) EncodeJpegAttr {
-	return func(m optionalAttr) {
-		m["optimize_size"] = value
-	}
-}
-
-// EncodeJpegChromaDownsampling sets the optional chroma_downsampling attribute to value.
+// Computes the inverse 2-dimensional discrete Fourier transform of a real-valued
+// signal over the inner-most 2 dimensions of `input`.
 //
-// value: See http://en.wikipedia.org/wiki/Chroma_subsampling.
-// If not specified, defaults to true
-func EncodeJpegChromaDownsampling(value bool) EncodeJpegAttr {
-	return func(m optionalAttr) {
-		m["chroma_downsampling"] = value
-	}
-}
-
-// EncodeJpegDensityUnit sets the optional density_unit attribute to value.
+// The inner-most 2 dimensions of `input` are assumed to be the result of `RFFT2D`:
+// The inner-most dimension contains the `fft_length / 2 + 1` unique components of
+// the DFT of a real-valued signal. If `fft_length` is not provided, it is computed
+// from the size of the inner-most 2 dimensions of `input`. If the FFT length used
+// to compute `input` is odd, it should be provided since it cannot be inferred
+// properly.
 //
-// value: Unit used to specify `x_density` and `y_density`:
-// pixels per inch (`'in'`) or centimeter (`'cm'`).
-// If not specified, defaults to "in"
-func EncodeJpegDensityUnit(value string) EncodeJpegAttr {
-	return func(m optionalAttr) {
-		m["density_unit"] = value
-	}
-}
-
-// EncodeJpegXDensity sets the optional x_density attribute to value.
+// Along each axis `IRFFT2D` is computed on, if `fft_length` (or
+// `fft_length / 2 + 1` for the inner-most dimension) is smaller than the
+// corresponding dimension of `input`, the dimension is cropped. If it is larger,
+// the dimension is padded with zeros.
 //
-// value: Horizontal pixels per density unit.
-// If not specified, defaults to 300
-func EncodeJpegXDensity(value int64) EncodeJpegAttr {
-	return func(m optionalAttr) {
-		m["x_density"] = value
-	}
-}
-
-// EncodeJpegYDensity sets the optional y_density attribute to value.
+// Arguments:
+//	input: A complex64 tensor.
+//	fft_length: An int32 tensor of shape [2]. The FFT length for each dimension.
 //
-// value: Vertical pixels per density unit.
-// If not specified, defaults to 300
-func EncodeJpegYDensity(value int64) EncodeJpegAttr {
-	return func(m optionalAttr) {
-		m["y_density"] = value
-	}
-}
-
-// EncodeJpegXmpMetadata sets the optional xmp_metadata attribute to value.
+// Returns A float32 tensor of the same rank as `input`. The inner-most 2
+//   dimensions of `input` are replaced with the `fft_length` samples of their
+//   inverse 2D Fourier transform.
 //
-// value: If not empty, embed this XMP metadata in the image header.
-// If not specified, defaults to ""
-func EncodeJpegXmpMetadata(value string) EncodeJpegAttr {
-	return func(m optionalAttr) {
-		m["xmp_metadata"] = value
+// @compatibility(numpy)
+// Equivalent to np.fft.irfft2
+// @end_compatibility
+func IRFFT2D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "IRFFT2D",
+		Input: []tf.Input{
+			input, fft_length,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// JPEG-encode an image.
-//
-// `image` is a 3-D uint8 Tensor of shape `[height, width, channels]`.
-//
-// The attr `format` can be used to override the color format of the encoded
-// output.  Values can be:
-//
-// *   `''`: Use a default format based on the number of channels in the image.
-// *   `grayscale`: Output a grayscale JPEG image.  The `channels` dimension
-//     of `image` must be 1.
-// *   `rgb`: Output an RGB JPEG image. The `channels` dimension
-//     of `image` must be 3.
-//
-// If `format` is not specified or is the empty string, a default format is picked
-// in function of the number of channels in `image`:
+// Compute the pairwise cross product.
 //
-// *   1: Output a grayscale image.
-// *   3: Output an RGB image.
+// `a` and `b` must be the same shape; they can either be simple 3-element vectors,
+// or any shape where the innermost dimension is 3. In the latter case, each pair
+// of corresponding 3-element vectors is cross-multiplied independently.
 //
 // Arguments:
-//	image: 3-D with shape `[height, width, channels]`.
+//	a: A tensor containing 3-element vectors.
+//	b: Another tensor, of same type and shape as `a`.
 //
-// Returns 0-D. JPEG-encoded image.
-func EncodeJpeg(scope *Scope, image tf.Output, optional ...EncodeJpegAttr) (contents tf.Output) {
+// Returns Pairwise cross product of the vectors in `a` and `b`.
+func Cross(scope *Scope, a tf.Output, b tf.Output) (product tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "EncodeJpeg",
+		Type: "Cross",
 		Input: []tf.Input{
-			image,
+			a, b,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Gradients for batch normalization.
-//
-// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization()
-//
-// This op is deprecated. See `tf.nn.batch_normalization`.
+// Transforms a vector of brain.Example protos (as strings) into typed tensors.
 //
 // Arguments:
-//	t: A 4D input Tensor.
-//	m: A 1D mean Tensor with size matching the last dimension of t.
-// This is the first output from tf.nn.moments,
-// or a saved moving average thereof.
-//	v: A 1D variance Tensor with size matching the last dimension of t.
-// This is the second output from tf.nn.moments,
-// or a saved moving average thereof.
-//	gamma: A 1D gamma Tensor with size matching the last dimension of t.
-// If "scale_after_normalization" is true, this Tensor will be multiplied
-// with the normalized Tensor.
-//	backprop: 4D backprop Tensor.
-//	variance_epsilon: A small float number to avoid dividing by 0.
-//	scale_after_normalization: A bool indicating whether the resulted tensor
-// needs to be multiplied with gamma.
-//
-// Returns 4D backprop tensor for input.1D backprop tensor for mean.1D backprop tensor for variance.1D backprop tensor for beta.1D backprop tensor for gamma.
-func BatchNormWithGlobalNormalizationGrad(scope *Scope, t tf.Output, m tf.Output, v tf.Output, gamma tf.Output, backprop tf.Output, variance_epsilon float32, scale_after_normalization bool) (dx tf.Output, dm tf.Output, dv tf.Output, db tf.Output, dg tf.Output) {
+//	serialized: A vector containing a batch of binary serialized Example protos.
+//	names: A vector containing the names of the serialized protos.
+// May contain, for example, table key (descriptive) names for the
+// corresponding serialized protos.  These are purely useful for debugging
+// purposes, and the presence of values here has no effect on the output.
+// May also be an empty vector if no names are available.
+// If non-empty, this vector must be the same length as "serialized".
+//	sparse_keys: A list of Nsparse string Tensors (scalars).
+// The keys expected in the Examples' features associated with sparse values.
+//	dense_keys: A list of Ndense string Tensors (scalars).
+// The keys expected in the Examples' features associated with dense values.
+//	dense_defaults: A list of Ndense Tensors (some may be empty).
+// dense_defaults[j] provides default values
+// when the example's feature_map lacks dense_key[j].  If an empty Tensor is
+// provided for dense_defaults[j], then the Feature dense_keys[j] is required.
+// The input type is inferred from dense_defaults[j], even when it's empty.
+// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined,
+// then the shape of dense_defaults[j] must match that of dense_shapes[j].
+// If dense_shapes[j] has an undefined major dimension (variable strides dense
+// feature), dense_defaults[j] must contain a single element:
+// the padding element.
+//	sparse_types: A list of Nsparse types; the data types of data in each Feature
+// given in sparse_keys.
+// Currently the ParseExample supports DT_FLOAT (FloatList),
+// DT_INT64 (Int64List), and DT_STRING (BytesList).
+//	dense_shapes: A list of Ndense shapes; the shapes of data in each Feature
+// given in dense_keys.
+// The number of elements in the Feature corresponding to dense_key[j]
+// must always equal dense_shapes[j].NumEntries().
+// If dense_shapes[j] == (D0, D1, ..., DN) then the shape of output
+// Tensor dense_values[j] will be (|serialized|, D0, D1, ..., DN):
+// The dense outputs are just the inputs row-stacked by batch.
+// This works for dense_shapes[j] = (-1, D1, ..., DN).  In this case
+// the shape of the output Tensor dense_values[j] will be
+// (|serialized|, M, D1, .., DN), where M is the maximum number of blocks
+// of elements of length D1 * .... * DN, across all minibatch entries
+// in the input.  Any minibatch entry with less than M blocks of elements of
+// length D1 * ... * DN will be padded with the corresponding default_value
+// scalar element along the second dimension.
+func ParseExample(scope *Scope, serialized tf.Output, names tf.Output, sparse_keys []tf.Output, dense_keys []tf.Output, dense_defaults []tf.Output, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization}
+	attrs := map[string]interface{}{"sparse_types": sparse_types, "dense_shapes": dense_shapes}
 	opspec := tf.OpSpec{
-		Type: "BatchNormWithGlobalNormalizationGrad",
+		Type: "ParseExample",
 		Input: []tf.Input{
-			t, m, v, gamma, backprop,
+			serialized, names, tf.OutputList(sparse_keys), tf.OutputList(dense_keys), tf.OutputList(dense_defaults),
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
-}
-
-// FusedBatchNormV2Attr is an optional argument to FusedBatchNormV2.
-type FusedBatchNormV2Attr func(optionalAttr)
-
-// FusedBatchNormV2Epsilon sets the optional epsilon attribute to value.
-//
-// value: A small float number added to the variance of x.
-// If not specified, defaults to 0.0001
-func FusedBatchNormV2Epsilon(value float32) FusedBatchNormV2Attr {
-	return func(m optionalAttr) {
-		m["epsilon"] = value
+	if scope.Err() != nil {
+		return
 	}
-}
-
-// FusedBatchNormV2DataFormat sets the optional data_format attribute to value.
-//
-// value: The data format for x and y. Either "NHWC" (default) or "NCHW".
-// If not specified, defaults to "NHWC"
-func FusedBatchNormV2DataFormat(value string) FusedBatchNormV2Attr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
+	var idx int
+	var err error
+	if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil {
+		scope.UpdateErr("ParseExample", err)
+		return
+	}
+	if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil {
+		scope.UpdateErr("ParseExample", err)
+		return
+	}
+	if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil {
+		scope.UpdateErr("ParseExample", err)
+		return
+	}
+	if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil {
+		scope.UpdateErr("ParseExample", err)
+		return
 	}
+	return sparse_indices, sparse_values, sparse_shapes, dense_values
 }
 
-// FusedBatchNormV2IsTraining sets the optional is_training attribute to value.
-//
-// value: A bool value to indicate the operation is for training (default)
-// or inference.
-// If not specified, defaults to true
-func FusedBatchNormV2IsTraining(value bool) FusedBatchNormV2Attr {
+// VariableShapeAttr is an optional argument to VariableShape.
+type VariableShapeAttr func(optionalAttr)
+
+// VariableShapeOutType sets the optional out_type attribute to value.
+// If not specified, defaults to DT_INT32
+func VariableShapeOutType(value tf.DataType) VariableShapeAttr {
 	return func(m optionalAttr) {
-		m["is_training"] = value
+		m["out_type"] = value
 	}
 }
 
-// Batch normalization.
+// Returns the shape of the variable pointed to by `resource`.
 //
-// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
-// The size of 1D Tensors matches the dimension C of the 4D Tensors.
+// This operation returns a 1-D integer tensor representing the shape of `input`.
 //
-// Arguments:
-//	x: A 4D Tensor for input data.
-//	scale: A 1D Tensor for scaling factor, to scale the normalized x.
-//	offset: A 1D Tensor for offset, to shift to the normalized x.
-//	mean: A 1D Tensor for population mean. Used for inference only;
-// must be empty for training.
-//	variance: A 1D Tensor for population variance. Used for inference only;
-// must be empty for training.
+// For example:
 //
-// Returns A 4D Tensor for output data.A 1D Tensor for the computed batch mean, to be used by TensorFlow
-// to compute the running mean.A 1D Tensor for the computed batch variance, to be used by
-// TensorFlow to compute the running variance.A 1D Tensor for the computed batch mean, to be reused
-// in the gradient computation.A 1D Tensor for the computed batch variance (inverted variance
-// in the cuDNN case), to be reused in the gradient computation.
-func FusedBatchNormV2(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output, mean tf.Output, variance tf.Output, optional ...FusedBatchNormV2Attr) (y tf.Output, batch_mean tf.Output, batch_variance tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output) {
+// ```
+// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]
+// shape(t) ==> [2, 2, 3]
+// ```
+func VariableShape(scope *Scope, input tf.Output, optional ...VariableShapeAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -8912,210 +8748,197 @@ func FusedBatchNormV2(scope *Scope, x tf.Output, scale tf.Output, offset tf.Outp
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "FusedBatchNormV2",
+		Type: "VariableShape",
 		Input: []tf.Input{
-			x, scale, offset, mean, variance,
+			input,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
-}
-
-// Conv2DBackpropInputAttr is an optional argument to Conv2DBackpropInput.
-type Conv2DBackpropInputAttr func(optionalAttr)
-
-// Conv2DBackpropInputUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value.
-// If not specified, defaults to true
-func Conv2DBackpropInputUseCudnnOnGpu(value bool) Conv2DBackpropInputAttr {
-	return func(m optionalAttr) {
-		m["use_cudnn_on_gpu"] = value
-	}
+	return op.Output(0)
 }
 
-// Conv2DBackpropInputDataFormat sets the optional data_format attribute to value.
+// Fills empty rows in the input 2-D `SparseTensor` with a default value.
 //
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func Conv2DBackpropInputDataFormat(value string) Conv2DBackpropInputAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Computes the gradients of convolution with respect to the input.
+// The input `SparseTensor` is represented via the tuple of inputs
+// (`indices`, `values`, `dense_shape`).  The output `SparseTensor` has the
+// same `dense_shape` but with indices `output_indices` and values
+// `output_values`.
+//
+// This op inserts a single entry for every row that doesn't have any values.
+// The index is created as `[row, 0, ..., 0]` and the inserted value
+// is `default_value`.
+//
+// For example, suppose `sp_input` has shape `[5, 6]` and non-empty values:
+//
+//     [0, 1]: a
+//     [0, 3]: b
+//     [2, 0]: c
+//     [3, 1]: d
+//
+// Rows 1 and 4 are empty, so the output will be of shape `[5, 6]` with values:
+//
+//     [0, 1]: a
+//     [0, 3]: b
+//     [1, 0]: default_value
+//     [2, 0]: c
+//     [3, 1]: d
+//     [4, 0]: default_value
+//
+// The output `SparseTensor` will be in row-major order and will have the
+// same shape as the input.
+//
+// This op also returns an indicator vector shaped `[dense_shape[0]]` such that
+//
+//     empty_row_indicator[i] = True iff row i was an empty row.
+//
+// And a reverse index map vector shaped `[indices.shape[0]]` that is used during
+// backpropagation,
+//
+//     reverse_index_map[j] = out_j s.t. indices[j, :] == output_indices[out_j, :]
 //
 // Arguments:
-//	input_sizes: An integer vector representing the shape of `input`,
-// where `input` is a 4-D `[batch, height, width, channels]` tensor.
-//	filter: 4-D with shape
-// `[filter_height, filter_width, in_channels, out_channels]`.
-//	out_backprop: 4-D with shape `[batch, out_height, out_width, out_channels]`.
-// Gradients w.r.t. the output of the convolution.
-//	strides: The stride of the sliding window for each dimension of the input
-// of the convolution. Must be in the same order as the dimension specified with
-// format.
-//	padding: The type of padding algorithm to use.
+//	indices: 2-D. the indices of the sparse tensor.
+//	values: 1-D. the values of the sparse tensor.
+//	dense_shape: 1-D. the shape of the sparse tensor.
+//	default_value: 0-D. default value to insert into location `[row, 0, ..., 0]`
+//   for rows missing from the input sparse tensor.
+// output indices: 2-D. the indices of the filled sparse tensor.
 //
-// Returns 4-D with shape `[batch, in_height, in_width, in_channels]`.  Gradient
-// w.r.t. the input of the convolution.
-func Conv2DBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv2DBackpropInputAttr) (output tf.Output) {
+// Returns 1-D. the values of the filled sparse tensor.1-D. whether the dense row was missing in the
+// input sparse tensor.1-D. a map from the input indices to the output indices.
+func SparseFillEmptyRows(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output, default_value tf.Output) (output_indices tf.Output, output_values tf.Output, empty_row_indicator tf.Output, reverse_index_map tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "Conv2DBackpropInput",
+		Type: "SparseFillEmptyRows",
 		Input: []tf.Input{
-			input_sizes, filter, out_backprop,
+			indices, values, dense_shape, default_value,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
 }
 
-// FusedBatchNormAttr is an optional argument to FusedBatchNorm.
-type FusedBatchNormAttr func(optionalAttr)
-
-// FusedBatchNormEpsilon sets the optional epsilon attribute to value.
+// Reverses specific dimensions of a tensor.
 //
-// value: A small float number added to the variance of x.
-// If not specified, defaults to 0.0001
-func FusedBatchNormEpsilon(value float32) FusedBatchNormAttr {
-	return func(m optionalAttr) {
-		m["epsilon"] = value
-	}
-}
-
-// FusedBatchNormDataFormat sets the optional data_format attribute to value.
+// Given a `tensor`, and a `bool` tensor `dims` representing the dimensions
+// of `tensor`, this operation reverses each dimension i of `tensor` where
+// `dims[i]` is `True`.
 //
-// value: The data format for x and y. Either "NHWC" (default) or "NCHW".
-// If not specified, defaults to "NHWC"
-func FusedBatchNormDataFormat(value string) FusedBatchNormAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// FusedBatchNormIsTraining sets the optional is_training attribute to value.
+// `tensor` can have up to 8 dimensions. The number of dimensions
+// of `tensor` must equal the number of elements in `dims`. In other words:
+//
+// `rank(tensor) = size(dims)`
+//
+// For example:
+//
+// ```
+// # tensor 't' is [[[[ 0,  1,  2,  3],
+// #                  [ 4,  5,  6,  7],
+// #                  [ 8,  9, 10, 11]],
+// #                 [[12, 13, 14, 15],
+// #                  [16, 17, 18, 19],
+// #                  [20, 21, 22, 23]]]]
+// # tensor 't' shape is [1, 2, 3, 4]
+//
+// # 'dims' is [False, False, False, True]
+// reverse(t, dims) ==> [[[[ 3,  2,  1,  0],
+//                         [ 7,  6,  5,  4],
+//                         [ 11, 10, 9, 8]],
+//                        [[15, 14, 13, 12],
+//                         [19, 18, 17, 16],
+//                         [23, 22, 21, 20]]]]
 //
-// value: A bool value to indicate the operation is for training (default)
-// or inference.
-// If not specified, defaults to true
-func FusedBatchNormIsTraining(value bool) FusedBatchNormAttr {
-	return func(m optionalAttr) {
-		m["is_training"] = value
-	}
-}
-
-// Batch normalization.
+// # 'dims' is [False, True, False, False]
+// reverse(t, dims) ==> [[[[12, 13, 14, 15],
+//                         [16, 17, 18, 19],
+//                         [20, 21, 22, 23]
+//                        [[ 0,  1,  2,  3],
+//                         [ 4,  5,  6,  7],
+//                         [ 8,  9, 10, 11]]]]
 //
-// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
-// The size of 1D Tensors matches the dimension C of the 4D Tensors.
+// # 'dims' is [False, False, True, False]
+// reverse(t, dims) ==> [[[[8, 9, 10, 11],
+//                         [4, 5, 6, 7],
+//                         [0, 1, 2, 3]]
+//                        [[20, 21, 22, 23],
+//                         [16, 17, 18, 19],
+//                         [12, 13, 14, 15]]]]
+// ```
 //
 // Arguments:
-//	x: A 4D Tensor for input data.
-//	scale: A 1D Tensor for scaling factor, to scale the normalized x.
-//	offset: A 1D Tensor for offset, to shift to the normalized x.
-//	mean: A 1D Tensor for population mean. Used for inference only;
-// must be empty for training.
-//	variance: A 1D Tensor for population variance. Used for inference only;
-// must be empty for training.
+//	tensor: Up to 8-D.
+//	dims: 1-D. The dimensions to reverse.
 //
-// Returns A 4D Tensor for output data.A 1D Tensor for the computed batch mean, to be used by TensorFlow
-// to compute the running mean.A 1D Tensor for the computed batch variance, to be used by
-// TensorFlow to compute the running variance.A 1D Tensor for the computed batch mean, to be reused
-// in the gradient computation.A 1D Tensor for the computed batch variance (inverted variance
-// in the cuDNN case), to be reused in the gradient computation.
-func FusedBatchNorm(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output, mean tf.Output, variance tf.Output, optional ...FusedBatchNormAttr) (y tf.Output, batch_mean tf.Output, batch_variance tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output) {
+// Returns The same shape as `tensor`.
+func Reverse(scope *Scope, tensor tf.Output, dims tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "FusedBatchNorm",
+		Type: "Reverse",
 		Input: []tf.Input{
-			x, scale, offset, mean, variance,
+			tensor, dims,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
-}
-
-// RandomStandardNormalAttr is an optional argument to RandomStandardNormal.
-type RandomStandardNormalAttr func(optionalAttr)
-
-// RandomStandardNormalSeed sets the optional seed attribute to value.
-//
-// value: If either `seed` or `seed2` are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func RandomStandardNormalSeed(value int64) RandomStandardNormalAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
+	return op.Output(0)
 }
 
-// RandomStandardNormalSeed2 sets the optional seed2 attribute to value.
+// Computes log softmax activations.
 //
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func RandomStandardNormalSeed2(value int64) RandomStandardNormalAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Outputs random values from a normal distribution.
+// For each batch `i` and class `j` we have
 //
-// The generated values will have mean 0 and standard deviation 1.
+//     logsoftmax[i, j] = logits[i, j] - log(sum(exp(logits[i])))
 //
 // Arguments:
-//	shape: The shape of the output tensor.
-//	dtype: The type of the output.
+//	logits: 2-D with shape `[batch_size, num_classes]`.
 //
-// Returns A tensor of the specified shape filled with random normal values.
-func RandomStandardNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomStandardNormalAttr) (output tf.Output) {
+// Returns Same shape as `logits`.
+func LogSoftmax(scope *Scope, logits tf.Output) (logsoftmax tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "RandomStandardNormal",
+		Type: "LogSoftmax",
 		Input: []tf.Input{
-			shape,
+			logits,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes sigmoid of `x` element-wise.
+// Computes the inverse permutation of a tensor.
 //
-// Specifically, `y = 1 / (1 + exp(-x))`.
-func Sigmoid(scope *Scope, x tf.Output) (y tf.Output) {
+// This operation computes the inverse of an index permutation. It takes a 1-D
+// integer tensor `x`, which represents the indices of a zero-based array, and
+// swaps each value with its index position. In other words, for an output tensor
+// `y` and an input tensor `x`, this operation computes the following:
+//
+// `y[x[i]] = i for i in [0, 1, ..., len(x) - 1]`
+//
+// The values must include 0. There can be no duplicate values or negative values.
+//
+// For example:
+//
+// ```
+// # tensor `x` is [3, 4, 0, 2, 1]
+// invert_permutation(x) ==> [2, 4, 3, 0, 1]
+// ```
+//
+// Arguments:
+//	x: 1-D.
+//
+// Returns 1-D.
+func InvertPermutation(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Sigmoid",
+		Type: "InvertPermutation",
 		Input: []tf.Input{
 			x,
 		},
@@ -9124,328 +8947,342 @@ func Sigmoid(scope *Scope, x tf.Output) (y tf.Output) {
 	return op.Output(0)
 }
 
-// ComputeAccidentalHitsAttr is an optional argument to ComputeAccidentalHits.
-type ComputeAccidentalHitsAttr func(optionalAttr)
-
-// ComputeAccidentalHitsSeed sets the optional seed attribute to value.
+// Gradient op for `MirrorPad` op. This op folds a mirror-padded tensor.
 //
-// value: If either seed or seed2 are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func ComputeAccidentalHitsSeed(value int64) ComputeAccidentalHitsAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// ComputeAccidentalHitsSeed2 sets the optional seed2 attribute to value.
+// This operation folds the padded areas of `input` by `MirrorPad` according to the
+// `paddings` you specify. `paddings` must be the same as `paddings` argument
+// given to the corresponding `MirrorPad` op.
 //
-// value: An second seed to avoid seed collision.
-// If not specified, defaults to 0
-func ComputeAccidentalHitsSeed2(value int64) ComputeAccidentalHitsAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Computes the ids of the positions in sampled_candidates that match true_labels.
+// The folded size of each dimension D of the output is:
 //
-// When doing log-odds NCE, the result of this op should be passed through a
-// SparseToDense op, then added to the logits of the sampled candidates. This has
-// the effect of 'removing' the sampled labels that match the true labels by
-// making the classifier sure that they are sampled labels.
+// `input.dim_size(D) - paddings(D, 0) - paddings(D, 1)`
+//
+// For example:
+//
+// ```
+// # 't' is [[1, 2, 3], [4, 5, 6], [7, 8, 9]].
+// # 'paddings' is [[0, 1]], [0, 1]].
+// # 'mode' is SYMMETRIC.
+// # rank of 't' is 2.
+// pad(t, paddings) ==> [[ 1,  5]
+//                       [11, 28]]
+// ```
 //
 // Arguments:
-//	true_classes: The true_classes output of UnpackSparseLabels.
-//	sampled_candidates: The sampled_candidates output of CandidateSampler.
-//	num_true: Number of true labels per context.
+//	input: The input tensor to be folded.
+//	paddings: A two-column matrix specifying the padding sizes. The number of
+// rows must be the same as the rank of `input`.
+//	mode: The mode used in the `MirrorPad` op.
 //
-// Returns A vector of indices corresponding to rows of true_candidates.A vector of IDs of positions in sampled_candidates that match a true_label
-// for the row with the corresponding index in indices.A vector of the same length as indices and ids, in which each element
-// is -FLOAT_MAX.
-func ComputeAccidentalHits(scope *Scope, true_classes tf.Output, sampled_candidates tf.Output, num_true int64, optional ...ComputeAccidentalHitsAttr) (indices tf.Output, ids tf.Output, weights tf.Output) {
+// Returns The folded tensor.
+func MirrorPadGrad(scope *Scope, input tf.Output, paddings tf.Output, mode string) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_true": num_true}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"mode": mode}
 	opspec := tf.OpSpec{
-		Type: "ComputeAccidentalHits",
+		Type: "MirrorPadGrad",
 		Input: []tf.Input{
-			true_classes, sampled_candidates,
+			input, paddings,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// StageClearAttr is an optional argument to StageClear.
-type StageClearAttr func(optionalAttr)
-
-// StageClearCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
+// Computes softmax cross entropy cost and gradients to backpropagate.
 //
-// REQUIRES: value >= 0
-func StageClearCapacity(value int64) StageClearAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// StageClearMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
+// Unlike `SoftmaxCrossEntropyWithLogits`, this operation does not accept
+// a matrix of label probabilities, but rather a single label per row
+// of features.  This label is considered to have probability 1.0 for the
+// given row.
 //
-// REQUIRES: value >= 0
-func StageClearMemoryLimit(value int64) StageClearAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// StageClearContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func StageClearContainer(value string) StageClearAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// StageClearSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func StageClearSharedName(value string) StageClearAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Op removes all elements in the underlying container.
+// Inputs are the logits, not probabilities.
 //
-// Returns the created operation.
-func StageClear(scope *Scope, dtypes []tf.DataType, optional ...StageClearAttr) (o *tf.Operation) {
+// Arguments:
+//	features: batch_size x num_classes matrix
+//	labels: batch_size vector with values in [0, num_classes).
+// This is the label for the given minibatch entry.
+//
+// Returns Per example loss (batch_size vector).backpropagated gradients (batch_size x num_classes matrix).
+func SparseSoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.Output) (loss tf.Output, backprop tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "StageClear",
-
-		Attrs: attrs,
+		Type: "SparseSoftmaxCrossEntropyWithLogits",
+		Input: []tf.Input{
+			features, labels,
+		},
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
 }
 
-// AvgPoolGradAttr is an optional argument to AvgPoolGrad.
-type AvgPoolGradAttr func(optionalAttr)
+// ResourceSparseApplyAdagradDAAttr is an optional argument to ResourceSparseApplyAdagradDA.
+type ResourceSparseApplyAdagradDAAttr func(optionalAttr)
 
-// AvgPoolGradDataFormat sets the optional data_format attribute to value.
+// ResourceSparseApplyAdagradDAUseLocking sets the optional use_locking attribute to value.
 //
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func AvgPoolGradDataFormat(value string) AvgPoolGradAttr {
+// value: If True, updating of the var and accum tensors will be protected by
+// a lock; otherwise the behavior is undefined, but may exhibit less contention.
+// If not specified, defaults to false
+func ResourceSparseApplyAdagradDAUseLocking(value bool) ResourceSparseApplyAdagradDAAttr {
 	return func(m optionalAttr) {
-		m["data_format"] = value
+		m["use_locking"] = value
 	}
 }
 
-// Computes gradients of the average pooling function.
+// Update entries in '*var' and '*accum' according to the proximal adagrad scheme.
 //
 // Arguments:
-//	orig_input_shape: 1-D.  Shape of the original input to `avg_pool`.
-//	grad: 4-D with shape `[batch, height, width, channels]`.  Gradients w.r.t.
-// the output of `avg_pool`.
-//	ksize: The size of the sliding window for each dimension of the input.
-//	strides: The stride of the sliding window for each dimension of the input.
-//	padding: The type of padding algorithm to use.
+//	var_: Should be from a Variable().
+//	gradient_accumulator: Should be from a Variable().
+//	gradient_squared_accumulator: Should be from a Variable().
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
+//	lr: Learning rate. Must be a scalar.
+//	l1: L1 regularization. Must be a scalar.
+//	l2: L2 regularization. Must be a scalar.
+//	global_step: Training step number. Must be a scalar.
 //
-// Returns 4-D.  Gradients w.r.t. the input of `avg_pool`.
-func AvgPoolGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPoolGradAttr) (output tf.Output) {
+// Returns the created operation.
+func ResourceSparseApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator tf.Output, gradient_squared_accumulator tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, global_step tf.Output, optional ...ResourceSparseApplyAdagradDAAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "AvgPoolGrad",
+		Type: "ResourceSparseApplyAdagradDA",
 		Input: []tf.Input{
-			orig_input_shape, grad,
+			var_, gradient_accumulator, gradient_squared_accumulator, grad, indices, lr, l1, l2, global_step,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// Computes the maximum along segments of a tensor.
-//
-// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
-// segments.
-//
-// Computes a tensor such that
-// \\(output_i = \max_j(data_j)\\) where `max` is over `j` such
-// that `segment_ids[j] == i`.
-//
-// If the max is empty for a given segment ID `i`, `output[i] = 0`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentMax.png" alt>
-// </div>
-//
-// Arguments:
-//
-//	segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
-// first dimension.  Values should be sorted and can be repeated.
-//
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
+// Returns the truth value of NOT x element-wise.
+func LogicalNot(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "SegmentMax",
+		Type: "LogicalNot",
 		Input: []tf.Input{
-			data, segment_ids,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns the rank of a tensor.
+// 3D real-valued fast Fourier transform.
 //
-// This operation returns an integer representing the rank of `input`.
+// Computes the 3-dimensional discrete Fourier transform of a real-valued signal
+// over the inner-most 3 dimensions of `input`.
 //
-// For example:
+// Since the DFT of a real signal is Hermitian-symmetric, `RFFT3D` only returns the
+// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension
+// of `output`: the zero-frequency term, followed by the `fft_length / 2`
+// positive-frequency terms.
 //
-// ```
-// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]
-// # shape of tensor 't' is [2, 2, 3]
-// rank(t) ==> 3
-// ```
+// Along each axis `RFFT3D` is computed on, if `fft_length` is smaller than the
+// corresponding dimension of `input`, the dimension is cropped. If it is larger,
+// the dimension is padded with zeros.
 //
-// **Note**: The rank of a tensor is not the same as the rank of a matrix. The rank
-// of a tensor is the number of indices required to uniquely select each element
-// of the tensor. Rank is also known as "order", "degree", or "ndims."
-func Rank(scope *Scope, input tf.Output) (output tf.Output) {
+// Arguments:
+//	input: A float32 tensor.
+//	fft_length: An int32 tensor of shape [3]. The FFT length for each dimension.
+//
+// Returns A complex64 tensor of the same rank as `input`. The inner-most 3
+//   dimensions of `input` are replaced with the their 3D Fourier transform. The
+//   inner-most dimension contains `fft_length / 2 + 1` unique frequency
+//   components.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.rfftn with 3 dimensions.
+// @end_compatibility
+func RFFT3D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Rank",
+		Type: "RFFT3D",
 		Input: []tf.Input{
-			input,
+			input, fft_length,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// DecodeCSVAttr is an optional argument to DecodeCSV.
-type DecodeCSVAttr func(optionalAttr)
+// TensorArrayV3Attr is an optional argument to TensorArrayV3.
+type TensorArrayV3Attr func(optionalAttr)
 
-// DecodeCSVFieldDelim sets the optional field_delim attribute to value.
+// TensorArrayV3ElementShape sets the optional element_shape attribute to value.
 //
-// value: char delimiter to separate fields in a record.
-// If not specified, defaults to ","
-func DecodeCSVFieldDelim(value string) DecodeCSVAttr {
+// value: The expected shape of an element, if known. Used to
+// validate the shapes of TensorArray elements. If this shape is not
+// fully specified, gathering zero-size TensorArrays is an error.
+// If not specified, defaults to <unknown_rank:true >
+func TensorArrayV3ElementShape(value tf.Shape) TensorArrayV3Attr {
 	return func(m optionalAttr) {
-		m["field_delim"] = value
+		m["element_shape"] = value
 	}
 }
 
-// DecodeCSVUseQuoteDelim sets the optional use_quote_delim attribute to value.
+// TensorArrayV3DynamicSize sets the optional dynamic_size attribute to value.
 //
-// value: If false, treats double quotation marks as regular
-// characters inside of the string fields (ignoring RFC 4180, Section 2,
-// Bullet 5).
+// value: A boolean that determines whether writes to the TensorArray
+// are allowed to grow the size.  By default, this is not allowed.
+// If not specified, defaults to false
+func TensorArrayV3DynamicSize(value bool) TensorArrayV3Attr {
+	return func(m optionalAttr) {
+		m["dynamic_size"] = value
+	}
+}
+
+// TensorArrayV3ClearAfterRead sets the optional clear_after_read attribute to value.
+//
+// value: If true (default), Tensors in the TensorArray are cleared
+// after being read.  This disables multiple read semantics but allows early
+// release of memory.
 // If not specified, defaults to true
-func DecodeCSVUseQuoteDelim(value bool) DecodeCSVAttr {
+func TensorArrayV3ClearAfterRead(value bool) TensorArrayV3Attr {
 	return func(m optionalAttr) {
-		m["use_quote_delim"] = value
+		m["clear_after_read"] = value
 	}
 }
 
-// DecodeCSVNaValue sets the optional na_value attribute to value.
+// TensorArrayV3IdenticalElementShapes sets the optional identical_element_shapes attribute to value.
 //
-// value: Additional string to recognize as NA/NaN.
+// value: If true (default is false), then all
+// elements in the TensorArray will be expected to have have identical shapes.
+// This allows certain behaviors, like dynamically checking for
+// consistent shapes on write, and being able to fill in properly
+// shaped zero tensors on stack -- even if the element_shape attribute
+// is not fully defined.
+// If not specified, defaults to false
+func TensorArrayV3IdenticalElementShapes(value bool) TensorArrayV3Attr {
+	return func(m optionalAttr) {
+		m["identical_element_shapes"] = value
+	}
+}
+
+// TensorArrayV3TensorArrayName sets the optional tensor_array_name attribute to value.
+//
+// value: Overrides the name used for the temporary tensor_array
+// resource. Default value is the name of the 'TensorArray' op (which
+// is guaranteed unique).
 // If not specified, defaults to ""
-func DecodeCSVNaValue(value string) DecodeCSVAttr {
+func TensorArrayV3TensorArrayName(value string) TensorArrayV3Attr {
 	return func(m optionalAttr) {
-		m["na_value"] = value
+		m["tensor_array_name"] = value
 	}
 }
 
-// Convert CSV records to tensors. Each column maps to one tensor.
+// An array of Tensors of given size.
 //
-// RFC 4180 format is expected for the CSV records.
-// (https://tools.ietf.org/html/rfc4180)
-// Note that we allow leading and trailing spaces with int or float field.
+// Write data via Write and read via Read or Pack.
 //
 // Arguments:
-//	records: Each string is a record/row in the csv and all records should have
-// the same format.
-//	record_defaults: One tensor per column of the input record, with either a
-// scalar default value for that column or empty if the column is required.
+//	size: The size of the array.
+//	dtype: The type of the elements on the tensor_array.
 //
-// Returns Each tensor will have the same shape as records.
-func DecodeCSV(scope *Scope, records tf.Output, record_defaults []tf.Output, optional ...DecodeCSVAttr) (output []tf.Output) {
+// Returns The handle to the TensorArray.A scalar used to control gradient flow.
+func TensorArrayV3(scope *Scope, size tf.Output, dtype tf.DataType, optional ...TensorArrayV3Attr) (handle tf.Output, flow tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorArrayV3",
+		Input: []tf.Input{
+			size,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// MaxPool3DAttr is an optional argument to MaxPool3D.
+type MaxPool3DAttr func(optionalAttr)
+
+// MaxPool3DDataFormat sets the optional data_format attribute to value.
+//
+// value: The data format of the input and output data. With the
+// default format "NDHWC", the data is stored in the order of:
+//     [batch, in_depth, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCDHW", the data storage order is:
+//     [batch, in_channels, in_depth, in_height, in_width].
+// If not specified, defaults to "NDHWC"
+func MaxPool3DDataFormat(value string) MaxPool3DAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Performs 3D max pooling on the input.
+//
+// Arguments:
+//	input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over.
+//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
+// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
+//
+// Returns The max pooled output tensor.
+func MaxPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "DecodeCSV",
+		Type: "MaxPool3D",
 		Input: []tf.Input{
-			records, tf.OutputList(record_defaults),
+			input,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
-		scope.UpdateErr("DecodeCSV", err)
-		return
-	}
-	return output
+	return op.Output(0)
 }
 
-// Transforms a serialized tensorflow.TensorProto proto into a Tensor.
+// Computes the gradients of 3-D convolution with respect to the input.
 //
-// Arguments:
-//	serialized: A scalar string containing a serialized TensorProto proto.
-//	out_type: The type of the serialized tensor.  The provided type must match the
-// type of the serialized tensor and no implicit conversion will take place.
+// DEPRECATED at GraphDef version 10: Use Conv3DBackpropInputV2
 //
-// Returns A Tensor of type `out_type`.
-func ParseTensor(scope *Scope, serialized tf.Output, out_type tf.DataType) (output tf.Output) {
+// Arguments:
+//	input: Shape `[batch, depth, rows, cols, in_channels]`.
+//	filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
+// `in_channels` must match between `input` and `filter`.
+//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
+// out_channels]`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
+func Conv3DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"out_type": out_type}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
 	opspec := tf.OpSpec{
-		Type: "ParseTensor",
+		Type: "Conv3DBackpropInput",
 		Input: []tf.Input{
-			serialized,
+			input, filter, out_backprop,
 		},
 		Attrs: attrs,
 	}
@@ -9453,228 +9290,279 @@ func ParseTensor(scope *Scope, serialized tf.Output, out_type tf.DataType) (outp
 	return op.Output(0)
 }
 
-// Computes acos of x element-wise.
-func Acos(scope *Scope, x tf.Output) (y tf.Output) {
+// Inverse 2D fast Fourier transform.
+//
+// Computes the inverse 2-dimensional discrete Fourier transform over the
+// inner-most 2 dimensions of `input`.
+//
+// Arguments:
+//	input: A complex64 tensor.
+//
+// Returns A complex64 tensor of the same shape as `input`. The inner-most 2
+//   dimensions of `input` are replaced with their inverse 2D Fourier transform.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.ifft2
+// @end_compatibility
+func IFFT2D(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Acos",
+		Type: "IFFT2D",
 		Input: []tf.Input{
-			x,
+			input,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Copy a tensor setting everything outside a central band in each innermost matrix
-//
-// to zero.
-//
-// The `band` part is computed as follows:
-// Assume `input` has `k` dimensions `[I, J, K, ..., M, N]`, then the output is a
-// tensor with the same shape where
-//
-// `band[i, j, k, ..., m, n] = in_band(m, n) * input[i, j, k, ..., m, n]`.
-//
-// The indicator function
+// Creates a tensor filled with a scalar value.
 //
-// `in_band(m, n) = (num_lower < 0 || (m-n) <= num_lower)) &&
-//                  (num_upper < 0 || (n-m) <= num_upper)`.
+// This operation creates a tensor of shape `dims` and fills it with `value`.
 //
 // For example:
 //
 // ```
-// # if 'input' is [[ 0,  1,  2, 3]
-//                  [-1,  0,  1, 2]
-//                  [-2, -1,  0, 1]
-//                  [-3, -2, -1, 0]],
-//
-// tf.matrix_band_part(input, 1, -1) ==> [[ 0,  1,  2, 3]
-//                                        [-1,  0,  1, 2]
-//                                        [ 0, -1,  0, 1]
-//                                        [ 0,  0, -1, 0]],
-//
-// tf.matrix_band_part(input, 2, 1) ==> [[ 0,  1,  0, 0]
-//                                       [-1,  0,  1, 0]
-//                                       [-2, -1,  0, 1]
-//                                       [ 0, -2, -1, 0]]
+// # Output tensor has shape [2, 3].
+// fill([2, 3], 9) ==> [[9, 9, 9]
+//                      [9, 9, 9]]
 // ```
 //
-// Useful special cases:
+// Arguments:
+//	dims: 1-D. Represents the shape of the output tensor.
+//	value: 0-D (scalar). Value to fill the returned tensor.
 //
-// ```
-//  tf.matrix_band_part(input, 0, -1) ==> Upper triangular part.
-//  tf.matrix_band_part(input, -1, 0) ==> Lower triangular part.
-//  tf.matrix_band_part(input, 0, 0) ==> Diagonal.
-// ```
+// @compatibility(numpy)
+// Equivalent to np.full
+// @end_compatibility
+func Fill(scope *Scope, dims tf.Output, value tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Fill",
+		Input: []tf.Input{
+			dims, value,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// 2D fast Fourier transform.
+//
+// Computes the 2-dimensional discrete Fourier transform over the inner-most
+// 2 dimensions of `input`.
 //
 // Arguments:
-//	input: Rank `k` tensor.
-//	num_lower: 0-D tensor. Number of subdiagonals to keep. If negative, keep entire
-// lower triangle.
-//	num_upper: 0-D tensor. Number of superdiagonals to keep. If negative, keep
-// entire upper triangle.
+//	input: A complex64 tensor.
 //
-// Returns Rank `k` tensor of the same shape as input. The extracted banded tensor.
-func MatrixBandPart(scope *Scope, input tf.Output, num_lower tf.Output, num_upper tf.Output) (band tf.Output) {
+// Returns A complex64 tensor of the same shape as `input`. The inner-most 2
+//   dimensions of `input` are replaced with their 2D Fourier transform.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.fft2
+// @end_compatibility
+func FFT2D(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "MatrixBandPart",
+		Type: "FFT2D",
 		Input: []tf.Input{
-			input, num_lower, num_upper,
+			input,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// DecodeRawAttr is an optional argument to DecodeRaw.
-type DecodeRawAttr func(optionalAttr)
+// ResourceApplyProximalGradientDescentAttr is an optional argument to ResourceApplyProximalGradientDescent.
+type ResourceApplyProximalGradientDescentAttr func(optionalAttr)
 
-// DecodeRawLittleEndian sets the optional little_endian attribute to value.
+// ResourceApplyProximalGradientDescentUseLocking sets the optional use_locking attribute to value.
 //
-// value: Whether the input `bytes` are in little-endian order.
-// Ignored for `out_type` values that are stored in a single byte like
-// `uint8`.
-// If not specified, defaults to true
-func DecodeRawLittleEndian(value bool) DecodeRawAttr {
+// value: If True, the subtraction will be protected by a lock;
+// otherwise the behavior is undefined, but may exhibit less contention.
+// If not specified, defaults to false
+func ResourceApplyProximalGradientDescentUseLocking(value bool) ResourceApplyProximalGradientDescentAttr {
 	return func(m optionalAttr) {
-		m["little_endian"] = value
+		m["use_locking"] = value
 	}
 }
 
-// Reinterpret the bytes of a string as a vector of numbers.
+// Update '*var' as FOBOS algorithm with fixed learning rate.
 //
-// Arguments:
-//	bytes: All the elements must have the same length.
+// prox_v = var - alpha * delta
+// var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
 //
+// Arguments:
+//	var_: Should be from a Variable().
+//	alpha: Scaling factor. Must be a scalar.
+//	l1: L1 regularization. Must be a scalar.
+//	l2: L2 regularization. Must be a scalar.
+//	delta: The change.
 //
-// Returns A Tensor with one more dimension than the input `bytes`.  The
-// added dimension will have size equal to the length of the elements
-// of `bytes` divided by the number of bytes to represent `out_type`.
-func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ...DecodeRawAttr) (output tf.Output) {
+// Returns the created operation.
+func ResourceApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, l1 tf.Output, l2 tf.Output, delta tf.Output, optional ...ResourceApplyProximalGradientDescentAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"out_type": out_type}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "DecodeRaw",
+		Type: "ResourceApplyProximalGradientDescent",
 		Input: []tf.Input{
-			bytes,
+			var_, alpha, l1, l2, delta,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// OrderedMapIncompleteSizeAttr is an optional argument to OrderedMapIncompleteSize.
-type OrderedMapIncompleteSizeAttr func(optionalAttr)
-
-// OrderedMapIncompleteSizeCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
+// Computes the gradient for the sqrt of `x` wrt its input.
 //
-// REQUIRES: value >= 0
-func OrderedMapIncompleteSizeCapacity(value int64) OrderedMapIncompleteSizeAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
+// Specifically, `grad = dy * 0.5 / y`, where `y = sqrt(x)`, and `dy`
+// is the corresponding input gradient.
+func SqrtGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SqrtGrad",
+		Input: []tf.Input{
+			y, dy,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// OrderedMapIncompleteSizeMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
+// Get the value of the tensor specified by its handle.
 //
-// REQUIRES: value >= 0
-func OrderedMapIncompleteSizeMemoryLimit(value int64) OrderedMapIncompleteSizeAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
+// Arguments:
+//	handle: The handle for a tensor stored in the session state.
+//	dtype: The type of the output value.
+//
+// Returns The tensor for the given handle.
+func GetSessionTensor(scope *Scope, handle tf.Output, dtype tf.DataType) (value tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
-}
-
-// OrderedMapIncompleteSizeContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func OrderedMapIncompleteSizeContainer(value string) OrderedMapIncompleteSizeAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
+	attrs := map[string]interface{}{"dtype": dtype}
+	opspec := tf.OpSpec{
+		Type: "GetSessionTensor",
+		Input: []tf.Input{
+			handle,
+		},
+		Attrs: attrs,
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// OrderedMapIncompleteSizeSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func OrderedMapIncompleteSizeSharedName(value string) OrderedMapIncompleteSizeAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
+// Returns x - y element-wise.
+//
+// *NOTE*: `Subtract` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Sub",
+		Input: []tf.Input{
+			x, y,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Op returns the number of incomplete elements in the underlying container.
-func OrderedMapIncompleteSize(scope *Scope, dtypes []tf.DataType, optional ...OrderedMapIncompleteSizeAttr) (size tf.Output) {
+// Computes softmax cross entropy cost and gradients to backpropagate.
+//
+// Inputs are the logits, not probabilities.
+//
+// Arguments:
+//	features: batch_size x num_classes matrix
+//	labels: batch_size x num_classes matrix
+// The caller must ensure that each batch of labels represents a valid
+// probability distribution.
+//
+// Returns Per example loss (batch_size vector).backpropagated gradients (batch_size x num_classes matrix).
+func SoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.Output) (loss tf.Output, backprop tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "OrderedMapIncompleteSize",
-
-		Attrs: attrs,
+		Type: "SoftmaxCrossEntropyWithLogits",
+		Input: []tf.Input{
+			features, labels,
+		},
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1)
 }
 
-// RandomShuffleAttr is an optional argument to RandomShuffle.
-type RandomShuffleAttr func(optionalAttr)
+// ReduceJoinAttr is an optional argument to ReduceJoin.
+type ReduceJoinAttr func(optionalAttr)
 
-// RandomShuffleSeed sets the optional seed attribute to value.
+// ReduceJoinKeepDims sets the optional keep_dims attribute to value.
 //
-// value: If either `seed` or `seed2` are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func RandomShuffleSeed(value int64) RandomShuffleAttr {
+// value: If `True`, retain reduced dimensions with length `1`.
+// If not specified, defaults to false
+func ReduceJoinKeepDims(value bool) ReduceJoinAttr {
 	return func(m optionalAttr) {
-		m["seed"] = value
+		m["keep_dims"] = value
 	}
 }
 
-// RandomShuffleSeed2 sets the optional seed2 attribute to value.
+// ReduceJoinSeparator sets the optional separator attribute to value.
 //
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func RandomShuffleSeed2(value int64) RandomShuffleAttr {
+// value: The separator to use when joining.
+// If not specified, defaults to ""
+func ReduceJoinSeparator(value string) ReduceJoinAttr {
 	return func(m optionalAttr) {
-		m["seed2"] = value
+		m["separator"] = value
 	}
 }
 
-// Randomly shuffles a tensor along its first dimension.
+// Joins a string Tensor across the given dimensions.
 //
-//   The tensor is shuffled along dimension 0, such that each `value[j]` is mapped
-//   to one and only one `output[i]`. For example, a mapping that might occur for a
-//   3x2 tensor is:
+// Computes the string join across dimensions in the given string Tensor of shape
+// `[d_0, d_1, ..., d_n-1]`.  Returns a new Tensor created by joining the input
+// strings with the given separator (default: empty string).  Negative indices are
+// counted backwards from the end, with `-1` being equivalent to `n - 1`.
 //
-// ```
-// [[1, 2],       [[5, 6],
-//  [3, 4],  ==>   [1, 2],
-//  [5, 6]]        [3, 4]]
+// For example:
+//
+// ```python
+// # tensor `a` is [["a", "b"], ["c", "d"]]
+// tf.reduce_join(a, 0) ==> ["ac", "bd"]
+// tf.reduce_join(a, 1) ==> ["ab", "cd"]
+// tf.reduce_join(a, -2) = tf.reduce_join(a, 0) ==> ["ac", "bd"]
+// tf.reduce_join(a, -1) = tf.reduce_join(a, 1) ==> ["ab", "cd"]
+// tf.reduce_join(a, 0, keep_dims=True) ==> [["ac", "bd"]]
+// tf.reduce_join(a, 1, keep_dims=True) ==> [["ab"], ["cd"]]
+// tf.reduce_join(a, 0, separator=".") ==> ["a.c", "b.d"]
+// tf.reduce_join(a, [0, 1]) ==> ["acbd"]
+// tf.reduce_join(a, [1, 0]) ==> ["abcd"]
+// tf.reduce_join(a, []) ==> ["abcd"]
 // ```
 //
 // Arguments:
-//	value: The tensor to be shuffled.
+//	inputs: The input to be joined.  All reduced indices must have non-zero size.
+//	reduction_indices: The dimensions to reduce over.  Dimensions are reduced in the
+// order specified.  Omitting `reduction_indices` is equivalent to passing
+// `[n-1, n-2, ..., 0]`.  Negative indices from `-n` to `-1` are supported.
 //
-// Returns A tensor of same shape and type as `value`, shuffled along its first
-// dimension.
-func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr) (output tf.Output) {
+// Returns Has shape equal to that of the input with reduced dimensions removed or
+// set to `1` depending on `keep_dims`.
+func ReduceJoin(scope *Scope, inputs tf.Output, reduction_indices tf.Output, optional ...ReduceJoinAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -9683,9 +9571,9 @@ func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr)
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "RandomShuffle",
+		Type: "ReduceJoin",
 		Input: []tf.Input{
-			value,
+			inputs, reduction_indices,
 		},
 		Attrs: attrs,
 	}
@@ -9693,39 +9581,78 @@ func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr)
 	return op.Output(0)
 }
 
-// FakeQuantWithMinMaxVarsPerChannelAttr is an optional argument to FakeQuantWithMinMaxVarsPerChannel.
-type FakeQuantWithMinMaxVarsPerChannelAttr func(optionalAttr)
+// Computes cos of x element-wise.
+func Cos(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Cos",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
 
-// FakeQuantWithMinMaxVarsPerChannelNumBits sets the optional num_bits attribute to value.
-// If not specified, defaults to 8
-func FakeQuantWithMinMaxVarsPerChannelNumBits(value int64) FakeQuantWithMinMaxVarsPerChannelAttr {
+// FusedBatchNormGradAttr is an optional argument to FusedBatchNormGrad.
+type FusedBatchNormGradAttr func(optionalAttr)
+
+// FusedBatchNormGradEpsilon sets the optional epsilon attribute to value.
+//
+// value: A small float number added to the variance of x.
+// If not specified, defaults to 0.0001
+func FusedBatchNormGradEpsilon(value float32) FusedBatchNormGradAttr {
 	return func(m optionalAttr) {
-		m["num_bits"] = value
+		m["epsilon"] = value
 	}
 }
 
-// FakeQuantWithMinMaxVarsPerChannelNarrowRange sets the optional narrow_range attribute to value.
-// If not specified, defaults to false
-func FakeQuantWithMinMaxVarsPerChannelNarrowRange(value bool) FakeQuantWithMinMaxVarsPerChannelAttr {
+// FusedBatchNormGradDataFormat sets the optional data_format attribute to value.
+//
+// value: The data format for y_backprop, x, x_backprop.
+// Either "NHWC" (default) or "NCHW".
+// If not specified, defaults to "NHWC"
+func FusedBatchNormGradDataFormat(value string) FusedBatchNormGradAttr {
 	return func(m optionalAttr) {
-		m["narrow_range"] = value
+		m["data_format"] = value
 	}
 }
 
-// Fake-quantize the 'inputs' tensor of type float and one of the shapes: `[d]`,
+// FusedBatchNormGradIsTraining sets the optional is_training attribute to value.
 //
-// `[b, d]` `[b, h, w, d]` via per-channel floats `min` and `max` of shape `[d]`
-// to 'outputs' tensor of same shape as `inputs`.
+// value: A bool value to indicate the operation is for training (default)
+// or inference.
+// If not specified, defaults to true
+func FusedBatchNormGradIsTraining(value bool) FusedBatchNormGradAttr {
+	return func(m optionalAttr) {
+		m["is_training"] = value
+	}
+}
+
+// Gradient for batch normalization.
 //
-// `[min; max]` define the clamping range for the `inputs` data.
-// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`
-// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and
-// then de-quantized and output as floats in `[min; max]` interval.
-// `num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive.
+// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
+// The size of 1D Tensors matches the dimension C of the 4D Tensors.
 //
-// This operation has a gradient and thus allows for training `min` and `max`
-// values.
-func FakeQuantWithMinMaxVarsPerChannel(scope *Scope, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsPerChannelAttr) (outputs tf.Output) {
+// Arguments:
+//	y_backprop: A 4D Tensor for the gradient with respect to y.
+//	x: A 4D Tensor for input data.
+//	scale: A 1D Tensor for scaling factor, to scale the normalized x.
+//	reserve_space_1: When is_training is True, a 1D Tensor for the computed batch
+// mean to be reused in gradient computation. When is_training is
+// False, a 1D Tensor for the population mean to be reused in both
+// 1st and 2nd order gradient computation.
+//	reserve_space_2: When is_training is True, a 1D Tensor for the computed batch
+// variance (inverted variance in the cuDNN case) to be reused in
+// gradient computation. When is_training is False, a 1D Tensor
+// for the population variance to be reused in both 1st and 2nd
+// order gradient computation.
+//
+// Returns A 4D Tensor for the gradient with respect to x.A 1D Tensor for the gradient with respect to scale.A 1D Tensor for the gradient with respect to offset.Unused placeholder to match the mean input in FusedBatchNorm.Unused placeholder to match the variance input
+// in FusedBatchNorm.
+func FusedBatchNormGrad(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, optional ...FusedBatchNormGradAttr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_3 tf.Output, reserve_space_4 tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -9734,110 +9661,121 @@ func FakeQuantWithMinMaxVarsPerChannel(scope *Scope, inputs tf.Output, min tf.Ou
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "FakeQuantWithMinMaxVarsPerChannel",
+		Type: "FusedBatchNormGrad",
 		Input: []tf.Input{
-			inputs, min, max,
+			y_backprop, x, scale, reserve_space_1, reserve_space_2,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
 }
 
-// TruncatedNormalAttr is an optional argument to TruncatedNormal.
-type TruncatedNormalAttr func(optionalAttr)
+// TopKAttr is an optional argument to TopK.
+type TopKAttr func(optionalAttr)
 
-// TruncatedNormalSeed sets the optional seed attribute to value.
+// TopKSorted sets the optional sorted attribute to value.
 //
-// value: If either `seed` or `seed2` are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func TruncatedNormalSeed(value int64) TruncatedNormalAttr {
+// value: If true the resulting `k` elements will be sorted by the values in
+// descending order.
+// If not specified, defaults to true
+func TopKSorted(value bool) TopKAttr {
 	return func(m optionalAttr) {
-		m["seed"] = value
+		m["sorted"] = value
 	}
 }
 
-// TruncatedNormalSeed2 sets the optional seed2 attribute to value.
+// Finds values and indices of the `k` largest elements for the last dimension.
 //
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func TruncatedNormalSeed2(value int64) TruncatedNormalAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
+// DEPRECATED at GraphDef version 7: Use TopKV2 instead
+//
+// If the input is a vector (rank-1), finds the `k` largest entries in the vector
+// and outputs their values and indices as vectors.  Thus `values[j]` is the
+// `j`-th largest entry in `input`, and its index is `indices[j]`.
+//
+// For matrices (resp. higher rank input), computes the top `k` entries in each
+// row (resp. vector along the last dimension).  Thus,
+//
+//     values.shape = indices.shape = input.shape[:-1] + [k]
+//
+// If two elements are equal, the lower-index element appears first.
+//
+// If `k` varies dynamically, use `TopKV2` below.
+//
+// Arguments:
+//	input: 1-D or higher with last dimension at least `k`.
+//	k: Number of top elements to look for along the last dimension (along each
+// row for matrices).
+//
+// Returns The `k` largest elements along each last dimensional slice.The indices of `values` within the last dimension of `input`.
+func TopK(scope *Scope, input tf.Output, k int64, optional ...TopKAttr) (values tf.Output, indices tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"k": k}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "TopK",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
 }
 
-// Outputs random values from a truncated normal distribution.
-//
-// The generated values follow a normal distribution with mean 0 and standard
-// deviation 1, except that values whose magnitude is more than 2 standard
-// deviations from the mean are dropped and re-picked.
+// Transforms a Tensor into a serialized TensorProto proto.
 //
 // Arguments:
-//	shape: The shape of the output tensor.
-//	dtype: The type of the output.
+//	tensor: A Tensor of type `T`.
 //
-// Returns A tensor of the specified shape filled with random truncated normal
-// values.
-func TruncatedNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...TruncatedNormalAttr) (output tf.Output) {
+// Returns A serialized TensorProto proto of the input tensor.
+func SerializeTensor(scope *Scope, tensor tf.Output) (serialized tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "TruncatedNormal",
+		Type: "SerializeTensor",
 		Input: []tf.Input{
-			shape,
+			tensor,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// ResourceApplyFtrlV2Attr is an optional argument to ResourceApplyFtrlV2.
-type ResourceApplyFtrlV2Attr func(optionalAttr)
+// MatrixSolveAttr is an optional argument to MatrixSolve.
+type MatrixSolveAttr func(optionalAttr)
 
-// ResourceApplyFtrlV2UseLocking sets the optional use_locking attribute to value.
+// MatrixSolveAdjoint sets the optional adjoint attribute to value.
 //
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
+// value: Boolean indicating whether to solve with `matrix` or its (block-wise)
+// adjoint.
 // If not specified, defaults to false
-func ResourceApplyFtrlV2UseLocking(value bool) ResourceApplyFtrlV2Attr {
+func MatrixSolveAdjoint(value bool) MatrixSolveAttr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["adjoint"] = value
 	}
 }
 
-// Update '*var' according to the Ftrl-proximal scheme.
+// Solves systems of linear equations.
 //
-// grad_with_shrinkage = grad + 2 * l2_shrinkage * var
-// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
-// linear += grad_with_shrinkage +
-//     (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-// accum = accum_new
+// `Matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+// form square matrices. `Rhs` is a tensor of shape `[..., M, K]`. The `output` is
+// a tensor shape `[..., M, K]`.  If `adjoint` is `False` then each output matrix
+// satisfies `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`.
+// If `adjoint` is `True` then each output matrix satisfies
+// `adjoint(matrix[..., :, :]) * output[..., :, :] = rhs[..., :, :]`.
 //
 // Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	linear: Should be from a Variable().
-//	grad: The gradient.
-//	lr: Scaling factor. Must be a scalar.
-//	l1: L1 regulariation. Must be a scalar.
-//	l2: L2 shrinkage regulariation. Must be a scalar.
-//
-//	lr_power: Scaling factor. Must be a scalar.
+//	matrix: Shape is `[..., M, M]`.
+//	rhs: Shape is `[..., M, K]`.
 //
-// Returns the created operation.
-func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlV2Attr) (o *tf.Operation) {
+// Returns Shape is `[..., M, K]`.
+func MatrixSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixSolveAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -9846,177 +9784,193 @@ func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear t
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyFtrlV2",
+		Type: "MatrixSolve",
 		Input: []tf.Input{
-			var_, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power,
+			matrix, rhs,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// SkipgramAttr is an optional argument to Skipgram.
-type SkipgramAttr func(optionalAttr)
-
-// SkipgramWindowSize sets the optional window_size attribute to value.
+// Looks up keys in a table, outputs the corresponding values.
 //
-// value: The number of words to predict to the left and right of the target.
-// If not specified, defaults to 5
-func SkipgramWindowSize(value int64) SkipgramAttr {
-	return func(m optionalAttr) {
-		m["window_size"] = value
-	}
-}
-
-// SkipgramMinCount sets the optional min_count attribute to value.
+// The tensor `keys` must of the same type as the keys of the table.
+// The output `values` is of the type of the table values.
 //
-// value: The minimum number of word occurrences for it to be included in the
-// vocabulary.
-// If not specified, defaults to 5
-func SkipgramMinCount(value int64) SkipgramAttr {
-	return func(m optionalAttr) {
-		m["min_count"] = value
-	}
-}
-
-// SkipgramSubsample sets the optional subsample attribute to value.
+// The scalar `default_value` is the value output for keys not present in the
+// table. It must also be of the same type as the table values.
 //
-// value: Threshold for word occurrence. Words that appear with higher
-// frequency will be randomly down-sampled. Set to 0 to disable.
-// If not specified, defaults to 0.001
-func SkipgramSubsample(value float32) SkipgramAttr {
-	return func(m optionalAttr) {
-		m["subsample"] = value
+// Arguments:
+//	table_handle: Handle to the table.
+//	keys: Any shape.  Keys to look up.
+//
+//
+// Returns Same shape as `keys`.  Values found in the table, or `default_values`
+// for missing keys.
+func LookupTableFindV2(scope *Scope, table_handle tf.Output, keys tf.Output, default_value tf.Output) (values tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "LookupTableFindV2",
+		Input: []tf.Input{
+			table_handle, keys, default_value,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Parses a text file and creates a batch of examples.
+// Inverse 3D fast Fourier transform.
 //
-// DEPRECATED at GraphDef version 19: Moving word2vec into tensorflow_models/tutorials and deprecating its ops here as a result
+// Computes the inverse 3-dimensional discrete Fourier transform over the
+// inner-most 3 dimensions of `input`.
 //
 // Arguments:
-//	filename: The corpus's text file name.
-//	batch_size: The size of produced batch.
+//	input: A complex64 tensor.
 //
-// Returns A vector of words in the corpus.Frequencies of words. Sorted in the non-ascending order.Number of words per epoch in the data file.The current epoch number.The total number of words processed so far.A vector of word ids.A vector of word ids.
-func Skipgram(scope *Scope, filename string, batch_size int64, optional ...SkipgramAttr) (vocab_word tf.Output, vocab_freq tf.Output, words_per_epoch tf.Output, current_epoch tf.Output, total_words_processed tf.Output, examples tf.Output, labels tf.Output) {
+// Returns A complex64 tensor of the same shape as `input`. The inner-most 3
+//   dimensions of `input` are replaced with their inverse 3D Fourier transform.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.ifftn with 3 dimensions.
+// @end_compatibility
+func IFFT3D(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"filename": filename, "batch_size": batch_size}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "Skipgram",
-
-		Attrs: attrs,
+		Type: "IFFT3D",
+		Input: []tf.Input{
+			input,
+		},
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6)
+	return op.Output(0)
 }
 
-// ParameterizedTruncatedNormalAttr is an optional argument to ParameterizedTruncatedNormal.
-type ParameterizedTruncatedNormalAttr func(optionalAttr)
-
-// ParameterizedTruncatedNormalSeed sets the optional seed attribute to value.
+// Adds `bias` to `value`.
 //
-// value: If either `seed` or `seed2` are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func ParameterizedTruncatedNormalSeed(value int64) ParameterizedTruncatedNormalAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// ParameterizedTruncatedNormalSeed2 sets the optional seed2 attribute to value.
+// This is a deprecated version of BiasAdd and will be soon removed.
 //
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func ParameterizedTruncatedNormalSeed2(value int64) ParameterizedTruncatedNormalAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
+// This is a special case of `tf.add` where `bias` is restricted to be 1-D.
+// Broadcasting is supported, so `value` may have any number of dimensions.
+//
+// Arguments:
+//	value: Any number of dimensions.
+//	bias: 1-D with size the last dimension of `value`.
+//
+// Returns Broadcasted sum of `value` and `bias`.
+func BiasAddV1(scope *Scope, value tf.Output, bias tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
+	opspec := tf.OpSpec{
+		Type: "BiasAddV1",
+		Input: []tf.Input{
+			value, bias,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Outputs random values from a normal distribution. The parameters may each be a
+// Reverses specific dimensions of a tensor.
 //
-// scalar which applies to the entire output, or a vector of length shape[0] which
-// stores the parameters for each batch.
+// NOTE `tf.reverse` has now changed behavior in preparation for 1.0.
+// `tf.reverse_v2` is currently an alias that will be deprecated before TF 1.0.
+//
+// Given a `tensor`, and a `int32` tensor `axis` representing the set of
+// dimensions of `tensor` to reverse. This operation reverses each dimension
+// `i` for which there exists `j` s.t. `axis[j] == i`.
+//
+// `tensor` can have up to 8 dimensions. The number of dimensions specified
+// in `axis` may be 0 or more entries. If an index is specified more than
+// once, a InvalidArgument error is raised.
+//
+// For example:
+//
+// ```
+// # tensor 't' is [[[[ 0,  1,  2,  3],
+// #                  [ 4,  5,  6,  7],
+// #                  [ 8,  9, 10, 11]],
+// #                 [[12, 13, 14, 15],
+// #                  [16, 17, 18, 19],
+// #                  [20, 21, 22, 23]]]]
+// # tensor 't' shape is [1, 2, 3, 4]
+//
+// # 'dims' is [3] or 'dims' is [-1]
+// reverse(t, dims) ==> [[[[ 3,  2,  1,  0],
+//                         [ 7,  6,  5,  4],
+//                         [ 11, 10, 9, 8]],
+//                        [[15, 14, 13, 12],
+//                         [19, 18, 17, 16],
+//                         [23, 22, 21, 20]]]]
+//
+// # 'dims' is '[1]' (or 'dims' is '[-3]')
+// reverse(t, dims) ==> [[[[12, 13, 14, 15],
+//                         [16, 17, 18, 19],
+//                         [20, 21, 22, 23]
+//                        [[ 0,  1,  2,  3],
+//                         [ 4,  5,  6,  7],
+//                         [ 8,  9, 10, 11]]]]
+//
+// # 'dims' is '[2]' (or 'dims' is '[-2]')
+// reverse(t, dims) ==> [[[[8, 9, 10, 11],
+//                         [4, 5, 6, 7],
+//                         [0, 1, 2, 3]]
+//                        [[20, 21, 22, 23],
+//                         [16, 17, 18, 19],
+//                         [12, 13, 14, 15]]]]
+// ```
 //
 // Arguments:
-//	shape: The shape of the output tensor. Batches are indexed by the 0th dimension.
-//	means: The mean parameter of each batch.
-//	stdevs: The standard deviation parameter of each batch. Must be greater than 0.
-//	minvals: The minimum cutoff. May be -infinity.
-//	maxvals: The maximum cutoff. May be +infinity, and must be more than the minval
-// for each batch.
+//	tensor: Up to 8-D.
+//	axis: 1-D. The indices of the dimensions to reverse. Must be in the range
+// `[-rank(tensor), rank(tensor))`.
 //
-// Returns A matrix of shape num_batches x samples_per_batch, filled with random
-// truncated normal values using the parameters for each row.
-func ParameterizedTruncatedNormal(scope *Scope, shape tf.Output, means tf.Output, stdevs tf.Output, minvals tf.Output, maxvals tf.Output, optional ...ParameterizedTruncatedNormalAttr) (output tf.Output) {
+// Returns The same shape as `tensor`.
+func ReverseV2(scope *Scope, tensor tf.Output, axis tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "ParameterizedTruncatedNormal",
+		Type: "ReverseV2",
 		Input: []tf.Input{
-			shape, means, stdevs, minvals, maxvals,
+			tensor, axis,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// RandomUniformIntAttr is an optional argument to RandomUniformInt.
-type RandomUniformIntAttr func(optionalAttr)
-
-// RandomUniformIntSeed sets the optional seed attribute to value.
-//
-// value: If either `seed` or `seed2` are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func RandomUniformIntSeed(value int64) RandomUniformIntAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
+// RealAttr is an optional argument to Real.
+type RealAttr func(optionalAttr)
 
-// RandomUniformIntSeed2 sets the optional seed2 attribute to value.
-//
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func RandomUniformIntSeed2(value int64) RandomUniformIntAttr {
+// RealTout sets the optional Tout attribute to value.
+// If not specified, defaults to DT_FLOAT
+func RealTout(value tf.DataType) RealAttr {
 	return func(m optionalAttr) {
-		m["seed2"] = value
+		m["Tout"] = value
 	}
 }
 
-// Outputs random integers from a uniform distribution.
-//
-// The generated values are uniform integers in the range `[minval, maxval)`.
-// The lower bound `minval` is included in the range, while the upper bound
-// `maxval` is excluded.
+// Returns the real part of a complex number.
 //
-// The random integers are slightly biased unless `maxval - minval` is an exact
-// power of two.  The bias is small for values of `maxval - minval` significantly
-// smaller than the range of the output (either `2^32` or `2^64`).
+// Given a tensor `input` of complex numbers, this operation returns a tensor of
+// type `float` that is the real part of each element in `input`. All elements in
+// `input` must be complex numbers of the form \\(a + bj\\), where *a* is the real
+//  part returned by this operation and *b* is the imaginary part.
 //
-// Arguments:
-//	shape: The shape of the output tensor.
-//	minval: 0-D.  Inclusive lower bound on the generated integers.
-//	maxval: 0-D.  Exclusive upper bound on the generated integers.
+// For example:
 //
-// Returns A tensor of the specified shape filled with uniform random integers.
-func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf.Output, optional ...RandomUniformIntAttr) (output tf.Output) {
+// ```
+// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
+// tf.real(input) ==> [-2.25, 3.25]
+// ```
+func Real(scope *Scope, input tf.Output, optional ...RealAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -10025,9 +9979,9 @@ func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "RandomUniformInt",
+		Type: "Real",
 		Input: []tf.Input{
-			shape, minval, maxval,
+			input,
 		},
 		Attrs: attrs,
 	}
@@ -10035,136 +9989,126 @@ func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf
 	return op.Output(0)
 }
 
-// Convert JSON-encoded Example records to binary protocol buffer strings.
-//
-// This op translates a tensor containing Example records, encoded using
-// the [standard JSON
-// mapping](https://developers.google.com/protocol-buffers/docs/proto3#json),
-// into a tensor containing the same records encoded as binary protocol
-// buffers. The resulting tensor can then be fed to any of the other
-// Example-parsing ops.
+// AudioSummaryAttr is an optional argument to AudioSummary.
+type AudioSummaryAttr func(optionalAttr)
+
+// AudioSummaryMaxOutputs sets the optional max_outputs attribute to value.
 //
-// Arguments:
-//	json_examples: Each string is a JSON object serialized according to the JSON
-// mapping of the Example proto.
+// value: Max number of batch elements to generate audio for.
+// If not specified, defaults to 3
 //
-// Returns Each string is a binary Example protocol buffer corresponding
-// to the respective element of `json_examples`.
-func DecodeJSONExample(scope *Scope, json_examples tf.Output) (binary_examples tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "DecodeJSONExample",
-		Input: []tf.Input{
-			json_examples,
-		},
+// REQUIRES: value >= 1
+func AudioSummaryMaxOutputs(value int64) AudioSummaryAttr {
+	return func(m optionalAttr) {
+		m["max_outputs"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Adds sparse updates to the variable referenced by `resource`.
-//
-// This operation computes
-//
-//     # Scalar indices
-//     ref[indices, ...] += updates[...]
-//
-//     # Vector indices (for each i)
-//     ref[indices[i], ...] += updates[i, ...]
+// Outputs a `Summary` protocol buffer with audio.
 //
-//     # High rank indices (for each i, ..., j)
-//     ref[indices[i, ..., j], ...] += updates[i, ..., j, ...]
+// DEPRECATED at GraphDef version 15: Use AudioSummaryV2.
 //
-// Duplicate entries are handled correctly: if multiple `indices` reference
-// the same location, their contributions add.
+// The summary has up to `max_outputs` summary values containing audio. The
+// audio is built from `tensor` which must be 3-D with shape `[batch_size,
+// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are
+// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`.
 //
-// Requires `updates.shape = indices.shape + ref.shape[1:]`.
+// The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
+// build the `tag` of the summary values:
 //
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
-// </div>
+// *  If `max_outputs` is 1, the summary value tag is '*tag*/audio'.
+// *  If `max_outputs` is greater than 1, the summary value tags are
+//    generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc.
 //
 // Arguments:
-//	resource: Should be from a `Variable` node.
-//	indices: A tensor of indices into the first dimension of `ref`.
-//	updates: A tensor of updated values to add to `ref`.
+//	tag: Scalar. Used to build the `tag` attribute of the summary values.
+//	tensor: 2-D of shape `[batch_size, frames]`.
+//	sample_rate: The sample rate of the signal in hertz.
 //
-// Returns the created operation.
-func ResourceScatterAdd(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
+// Returns Scalar. Serialized `Summary` protocol buffer.
+func AudioSummary(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate float32, optional ...AudioSummaryAttr) (summary tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	opspec := tf.OpSpec{
-		Type: "ResourceScatterAdd",
-		Input: []tf.Input{
-			resource, indices, updates,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Delete the TensorArray from its resource container.
-//
-// This enables the user to close and release the resource in the middle
-// of a step/run.
-//
-// Arguments:
-//	handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad).
-//
-// Returns the created operation.
-func TensorArrayCloseV3(scope *Scope, handle tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
+	attrs := map[string]interface{}{"sample_rate": sample_rate}
+	for _, a := range optional {
+		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "TensorArrayCloseV3",
+		Type: "AudioSummary",
 		Input: []tf.Input{
-			handle,
+			tag, tensor,
 		},
+		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// ResourceGatherAttr is an optional argument to ResourceGather.
-type ResourceGatherAttr func(optionalAttr)
+// QrAttr is an optional argument to Qr.
+type QrAttr func(optionalAttr)
 
-// ResourceGatherValidateIndices sets the optional validate_indices attribute to value.
-// If not specified, defaults to true
-func ResourceGatherValidateIndices(value bool) ResourceGatherAttr {
+// QrFullMatrices sets the optional full_matrices attribute to value.
+//
+// value: If true, compute full-sized `q` and `r`. If false
+// (the default), compute only the leading `P` columns of `q`.
+// If not specified, defaults to false
+func QrFullMatrices(value bool) QrAttr {
 	return func(m optionalAttr) {
-		m["validate_indices"] = value
+		m["full_matrices"] = value
 	}
 }
 
-// Gather slices from the variable pointed to by `resource` according to `indices`.
+// Computes the QR decompositions of one or more matrices.
 //
-// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
-// Produces an output tensor with shape `indices.shape + params.shape[1:]` where:
+// Computes the QR decomposition of each inner matrix in `tensor` such that
+// `tensor[..., :, :] = q[..., :, :] * r[..., :,:])`
 //
 // ```python
-//     # Scalar indices
-//     output[:, ..., :] = params[indices, :, ... :]
+// # a is a tensor.
+// # q is a tensor of orthonormal matrices.
+// # r is a tensor of upper triangular matrices.
+// q, r = qr(a)
+// q_full, r_full = qr(a, full_matrices=True)
+// ```
 //
-//     # Vector indices
-//     output[i, :, ..., :] = params[indices[i], :, ... :]
+// Arguments:
+//	input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions
+// form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`.
 //
-//     # Higher rank indices
-//     output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :]
-// ```
-func ResourceGather(scope *Scope, resource tf.Output, indices tf.Output, dtype tf.DataType, optional ...ResourceGatherAttr) (output tf.Output) {
+// Returns Orthonormal basis for range of `a`. If `full_matrices` is `False` then
+// shape is `[..., M, P]`; if `full_matrices` is `True` then shape is
+// `[..., M, M]`.Triangular factor. If `full_matrices` is `False` then shape is
+// `[..., P, N]`. If `full_matrices` is `True` then shape is `[..., M, N]`.
+func Qr(scope *Scope, input tf.Output, optional ...QrAttr) (q tf.Output, r tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceGather",
+		Type: "Qr",
 		Input: []tf.Input{
-			resource, indices,
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Records the bytes size of each element of `input_dataset` in a StatsAggregator.
+func BytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "BytesProducedStatsDataset",
+		Input: []tf.Input{
+			input_dataset, tag,
 		},
 		Attrs: attrs,
 	}
@@ -10172,317 +10116,182 @@ func ResourceGather(scope *Scope, resource tf.Output, indices tf.Output, dtype t
 	return op.Output(0)
 }
 
-// QuantizedConv2DAttr is an optional argument to QuantizedConv2D.
-type QuantizedConv2DAttr func(optionalAttr)
+// ResourceSparseApplyProximalGradientDescentAttr is an optional argument to ResourceSparseApplyProximalGradientDescent.
+type ResourceSparseApplyProximalGradientDescentAttr func(optionalAttr)
 
-// QuantizedConv2DOutType sets the optional out_type attribute to value.
-// If not specified, defaults to DT_QINT32
-func QuantizedConv2DOutType(value tf.DataType) QuantizedConv2DAttr {
+// ResourceSparseApplyProximalGradientDescentUseLocking sets the optional use_locking attribute to value.
+//
+// value: If True, the subtraction will be protected by a lock;
+// otherwise the behavior is undefined, but may exhibit less contention.
+// If not specified, defaults to false
+func ResourceSparseApplyProximalGradientDescentUseLocking(value bool) ResourceSparseApplyProximalGradientDescentAttr {
 	return func(m optionalAttr) {
-		m["out_type"] = value
+		m["use_locking"] = value
 	}
 }
-
-// Computes a 2D convolution given quantized 4D input and filter tensors.
-//
-// The inputs are quantized tensors where the lowest value represents the real
-// number of the associated minimum, and the highest represents the maximum.
-// This means that you can only interpret the quantized output in the same way, by
-// taking the returned minimum and maximum values into account.
+
+// Sparse update '*var' as FOBOS algorithm with fixed learning rate.
 //
-// Arguments:
+// That is for rows we have grad for, we update var as follows:
+// prox_v = var - alpha * grad
+// var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
 //
-//	filter: filter's input_depth dimension must match input's depth dimensions.
-//	min_input: The float value that the lowest quantized input value represents.
-//	max_input: The float value that the highest quantized input value represents.
-//	min_filter: The float value that the lowest quantized filter value represents.
-//	max_filter: The float value that the highest quantized filter value represents.
-//	strides: The stride of the sliding window for each dimension of the input
-// tensor.
-//	padding: The type of padding algorithm to use.
+// Arguments:
+//	var_: Should be from a Variable().
+//	alpha: Scaling factor. Must be a scalar.
+//	l1: L1 regularization. Must be a scalar.
+//	l2: L2 regularization. Must be a scalar.
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
 //
-// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
-func QuantizedConv2D(scope *Scope, input tf.Output, filter tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedConv2DAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) {
+// Returns the created operation.
+func ResourceSparseApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyProximalGradientDescentAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "QuantizedConv2D",
+		Type: "ResourceSparseApplyProximalGradientDescent",
 		Input: []tf.Input{
-			input, filter, min_input, max_input, min_filter, max_filter,
+			var_, alpha, l1, l2, grad, indices,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return scope.AddOperation(opspec)
 }
 
-// QueueDequeueV2Attr is an optional argument to QueueDequeueV2.
-type QueueDequeueV2Attr func(optionalAttr)
+// MeanAttr is an optional argument to Mean.
+type MeanAttr func(optionalAttr)
 
-// QueueDequeueV2TimeoutMs sets the optional timeout_ms attribute to value.
+// MeanKeepDims sets the optional keep_dims attribute to value.
 //
-// value: If the queue is empty, this operation will block for up to
-// timeout_ms milliseconds.
-// Note: This option is not supported yet.
-// If not specified, defaults to -1
-func QueueDequeueV2TimeoutMs(value int64) QueueDequeueV2Attr {
+// value: If true, retain reduced dimensions with length 1.
+// If not specified, defaults to false
+func MeanKeepDims(value bool) MeanAttr {
 	return func(m optionalAttr) {
-		m["timeout_ms"] = value
+		m["keep_dims"] = value
 	}
 }
 
-// Dequeues a tuple of one or more tensors from the given queue.
-//
-// This operation has k outputs, where k is the number of components
-// in the tuples stored in the given queue, and output i is the ith
-// component of the dequeued tuple.
+// Computes the mean of elements across dimensions of a tensor.
 //
-// N.B. If the queue is empty, this operation will block until an element
-// has been dequeued (or 'timeout_ms' elapses, if specified).
+// Reduces `input` along the dimensions given in `axis`. Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `axis`. If `keep_dims` is true, the reduced dimensions are
+// retained with length 1.
 //
 // Arguments:
-//	handle: The handle to a queue.
-//	component_types: The type of each component in a tuple.
+//	input: The tensor to reduce.
+//	axis: The dimensions to reduce. Must be in the range
+// `[-rank(input), rank(input))`.
 //
-// Returns One or more tensors that were dequeued as a tuple.
-func QueueDequeueV2(scope *Scope, handle tf.Output, component_types []tf.DataType, optional ...QueueDequeueV2Attr) (components []tf.Output) {
+// Returns The reduced tensor.
+func Mean(scope *Scope, input tf.Output, axis tf.Output, optional ...MeanAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"component_types": component_types}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "QueueDequeueV2",
+		Type: "Mean",
 		Input: []tf.Input{
-			handle,
+			input, axis,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
-		scope.UpdateErr("QueueDequeueV2", err)
-		return
-	}
-	return components
-}
-
-// ParseSingleSequenceExampleAttr is an optional argument to ParseSingleSequenceExample.
-type ParseSingleSequenceExampleAttr func(optionalAttr)
-
-// ParseSingleSequenceExampleContextSparseTypes sets the optional context_sparse_types attribute to value.
-//
-// value: A list of Ncontext_sparse types; the data types of data in
-// each context Feature given in context_sparse_keys.
-// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
-// DT_INT64 (Int64List), and DT_STRING (BytesList).
-// If not specified, defaults to <>
-//
-// REQUIRES: len(value) >= 0
-func ParseSingleSequenceExampleContextSparseTypes(value []tf.DataType) ParseSingleSequenceExampleAttr {
-	return func(m optionalAttr) {
-		m["context_sparse_types"] = value
-	}
+	return op.Output(0)
 }
 
-// ParseSingleSequenceExampleFeatureListDenseTypes sets the optional feature_list_dense_types attribute to value.
-// If not specified, defaults to <>
-//
-// REQUIRES: len(value) >= 0
-func ParseSingleSequenceExampleFeatureListDenseTypes(value []tf.DataType) ParseSingleSequenceExampleAttr {
-	return func(m optionalAttr) {
-		m["feature_list_dense_types"] = value
-	}
-}
+// InitializeTableFromTextFileV2Attr is an optional argument to InitializeTableFromTextFileV2.
+type InitializeTableFromTextFileV2Attr func(optionalAttr)
 
-// ParseSingleSequenceExampleContextDenseShapes sets the optional context_dense_shapes attribute to value.
+// InitializeTableFromTextFileV2VocabSize sets the optional vocab_size attribute to value.
 //
-// value: A list of Ncontext_dense shapes; the shapes of data in
-// each context Feature given in context_dense_keys.
-// The number of elements in the Feature corresponding to context_dense_key[j]
-// must always equal context_dense_shapes[j].NumEntries().
-// The shape of context_dense_values[j] will match context_dense_shapes[j].
-// If not specified, defaults to <>
+// value: Number of elements of the file, use -1 if unknown.
+// If not specified, defaults to -1
 //
-// REQUIRES: len(value) >= 0
-func ParseSingleSequenceExampleContextDenseShapes(value []tf.Shape) ParseSingleSequenceExampleAttr {
+// REQUIRES: value >= -1
+func InitializeTableFromTextFileV2VocabSize(value int64) InitializeTableFromTextFileV2Attr {
 	return func(m optionalAttr) {
-		m["context_dense_shapes"] = value
+		m["vocab_size"] = value
 	}
 }
 
-// ParseSingleSequenceExampleFeatureListSparseTypes sets the optional feature_list_sparse_types attribute to value.
-//
-// value: A list of Nfeature_list_sparse types; the data types
-// of data in each FeatureList given in feature_list_sparse_keys.
-// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
-// DT_INT64 (Int64List), and DT_STRING (BytesList).
-// If not specified, defaults to <>
+// InitializeTableFromTextFileV2Delimiter sets the optional delimiter attribute to value.
 //
-// REQUIRES: len(value) >= 0
-func ParseSingleSequenceExampleFeatureListSparseTypes(value []tf.DataType) ParseSingleSequenceExampleAttr {
+// value: Delimiter to separate fields in a line.
+// If not specified, defaults to "\t"
+func InitializeTableFromTextFileV2Delimiter(value string) InitializeTableFromTextFileV2Attr {
 	return func(m optionalAttr) {
-		m["feature_list_sparse_types"] = value
+		m["delimiter"] = value
 	}
 }
 
-// ParseSingleSequenceExampleFeatureListDenseShapes sets the optional feature_list_dense_shapes attribute to value.
+// Initializes a table from a text file.
 //
-// value: A list of Nfeature_list_dense shapes; the shapes of
-// data in each FeatureList given in feature_list_dense_keys.
-// The shape of each Feature in the FeatureList corresponding to
-// feature_list_dense_key[j] must always equal
-// feature_list_dense_shapes[j].NumEntries().
-// If not specified, defaults to <>
+// It inserts one key-value pair into the table for each line of the file.
+// The key and value is extracted from the whole line content, elements from the
+// split line based on `delimiter` or the line number (starting from zero).
+// Where to extract the key and value from a line is specified by `key_index` and
+// `value_index`.
 //
-// REQUIRES: len(value) >= 0
-func ParseSingleSequenceExampleFeatureListDenseShapes(value []tf.Shape) ParseSingleSequenceExampleAttr {
-	return func(m optionalAttr) {
-		m["feature_list_dense_shapes"] = value
-	}
-}
-
-// Transforms a scalar brain.SequenceExample proto (as strings) into typed tensors.
+// - A value of -1 means use the line number(starting from zero), expects `int64`.
+// - A value of -2 means use the whole line content, expects `string`.
+// - A value >= 0 means use the index (starting at zero) of the split line based
+//   on `delimiter`.
 //
 // Arguments:
-//	serialized: A scalar containing a binary serialized SequenceExample proto.
-//	feature_list_dense_missing_assumed_empty: A vector listing the
-// FeatureList keys which may be missing from the SequenceExample.  If the
-// associated FeatureList is missing, it is treated as empty.  By default,
-// any FeatureList not listed in this vector must exist in the SequenceExample.
-//	context_sparse_keys: A list of Ncontext_sparse string Tensors (scalars).
-// The keys expected in the Examples' features associated with context_sparse
-// values.
-//	context_dense_keys: A list of Ncontext_dense string Tensors (scalars).
-// The keys expected in the SequenceExamples' context features associated with
-// dense values.
-//	feature_list_sparse_keys: A list of Nfeature_list_sparse string Tensors
-// (scalars).  The keys expected in the FeatureLists associated with sparse
-// values.
-//	feature_list_dense_keys: A list of Nfeature_list_dense string Tensors (scalars).
-// The keys expected in the SequenceExamples' feature_lists associated
-// with lists of dense values.
-//	context_dense_defaults: A list of Ncontext_dense Tensors (some may be empty).
-// context_dense_defaults[j] provides default values
-// when the SequenceExample's context map lacks context_dense_key[j].
-// If an empty Tensor is provided for context_dense_defaults[j],
-// then the Feature context_dense_keys[j] is required.
-// The input type is inferred from context_dense_defaults[j], even when it's
-// empty.  If context_dense_defaults[j] is not empty, its shape must match
-// context_dense_shapes[j].
-//	debug_name: A scalar containing the name of the serialized proto.
-// May contain, for example, table key (descriptive) name for the
-// corresponding serialized proto.  This is purely useful for debugging
-// purposes, and the presence of values here has no effect on the output.
-// May also be an empty scalar if no name is available.
-func ParseSingleSequenceExample(scope *Scope, serialized tf.Output, feature_list_dense_missing_assumed_empty tf.Output, context_sparse_keys []tf.Output, context_dense_keys []tf.Output, feature_list_sparse_keys []tf.Output, feature_list_dense_keys []tf.Output, context_dense_defaults []tf.Output, debug_name tf.Output, optional ...ParseSingleSequenceExampleAttr) (context_sparse_indices []tf.Output, context_sparse_values []tf.Output, context_sparse_shapes []tf.Output, context_dense_values []tf.Output, feature_list_sparse_indices []tf.Output, feature_list_sparse_values []tf.Output, feature_list_sparse_shapes []tf.Output, feature_list_dense_values []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ParseSingleSequenceExample",
-		Input: []tf.Input{
-			serialized, feature_list_dense_missing_assumed_empty, tf.OutputList(context_sparse_keys), tf.OutputList(context_dense_keys), tf.OutputList(feature_list_sparse_keys), tf.OutputList(feature_list_dense_keys), tf.OutputList(context_dense_defaults), debug_name,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
+//	table_handle: Handle to a table which will be initialized.
+//	filename: Filename of a vocabulary text file.
+//	key_index: Column index in a line to get the table `key` values from.
+//	value_index: Column index that represents information of a line to get the table
+// `value` values from.
+//
+// Returns the created operation.
+func InitializeTableFromTextFileV2(scope *Scope, table_handle tf.Output, filename tf.Output, key_index int64, value_index int64, optional ...InitializeTableFromTextFileV2Attr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	var idx int
-	var err error
-	if context_sparse_indices, idx, err = makeOutputList(op, idx, "context_sparse_indices"); err != nil {
-		scope.UpdateErr("ParseSingleSequenceExample", err)
-		return
-	}
-	if context_sparse_values, idx, err = makeOutputList(op, idx, "context_sparse_values"); err != nil {
-		scope.UpdateErr("ParseSingleSequenceExample", err)
-		return
-	}
-	if context_sparse_shapes, idx, err = makeOutputList(op, idx, "context_sparse_shapes"); err != nil {
-		scope.UpdateErr("ParseSingleSequenceExample", err)
-		return
-	}
-	if context_dense_values, idx, err = makeOutputList(op, idx, "context_dense_values"); err != nil {
-		scope.UpdateErr("ParseSingleSequenceExample", err)
-		return
-	}
-	if feature_list_sparse_indices, idx, err = makeOutputList(op, idx, "feature_list_sparse_indices"); err != nil {
-		scope.UpdateErr("ParseSingleSequenceExample", err)
-		return
-	}
-	if feature_list_sparse_values, idx, err = makeOutputList(op, idx, "feature_list_sparse_values"); err != nil {
-		scope.UpdateErr("ParseSingleSequenceExample", err)
-		return
-	}
-	if feature_list_sparse_shapes, idx, err = makeOutputList(op, idx, "feature_list_sparse_shapes"); err != nil {
-		scope.UpdateErr("ParseSingleSequenceExample", err)
-		return
-	}
-	if feature_list_dense_values, idx, err = makeOutputList(op, idx, "feature_list_dense_values"); err != nil {
-		scope.UpdateErr("ParseSingleSequenceExample", err)
-		return
-	}
-	return context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes, feature_list_dense_values
-}
-
-// RandomGammaAttr is an optional argument to RandomGamma.
-type RandomGammaAttr func(optionalAttr)
-
-// RandomGammaSeed sets the optional seed attribute to value.
-//
-// value: If either `seed` or `seed2` are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func RandomGammaSeed(value int64) RandomGammaAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
+	attrs := map[string]interface{}{"key_index": key_index, "value_index": value_index}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "InitializeTableFromTextFileV2",
+		Input: []tf.Input{
+			table_handle, filename,
+		},
+		Attrs: attrs,
 	}
+	return scope.AddOperation(opspec)
 }
 
-// RandomGammaSeed2 sets the optional seed2 attribute to value.
-//
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func RandomGammaSeed2(value int64) RandomGammaAttr {
+// QuantizedReluAttr is an optional argument to QuantizedRelu.
+type QuantizedReluAttr func(optionalAttr)
+
+// QuantizedReluOutType sets the optional out_type attribute to value.
+// If not specified, defaults to DT_QUINT8
+func QuantizedReluOutType(value tf.DataType) QuantizedReluAttr {
 	return func(m optionalAttr) {
-		m["seed2"] = value
+		m["out_type"] = value
 	}
 }
 
-// Outputs random values from the Gamma distribution(s) described by alpha.
-//
-// This op uses the algorithm by Marsaglia et al. to acquire samples via
-// transformation-rejection from pairs of uniform and normal random variables.
-// See http://dl.acm.org/citation.cfm?id=358414
+// Computes Quantized Rectified Linear: `max(features, 0)`
 //
 // Arguments:
-//	shape: 1-D integer tensor. Shape of independent samples to draw from each
-// distribution described by the shape parameters given in alpha.
-//	alpha: A tensor in which each scalar is a "shape" parameter describing the
-// associated gamma distribution.
 //
-// Returns A tensor with shape `shape + shape(alpha)`. Each slice
-// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for
-// `alpha[i0, i1, ...iN]`. The dtype of the output matches the dtype of alpha.
-func RandomGamma(scope *Scope, shape tf.Output, alpha tf.Output, optional ...RandomGammaAttr) (output tf.Output) {
+//	min_features: The float value that the lowest quantized value represents.
+//	max_features: The float value that the highest quantized value represents.
+//
+// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents.
+func QuantizedRelu(scope *Scope, features tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedReluAttr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -10491,81 +10300,125 @@ func RandomGamma(scope *Scope, shape tf.Output, alpha tf.Output, optional ...Ran
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "RandomGamma",
+		Type: "QuantizedRelu",
 		Input: []tf.Input{
-			shape, alpha,
+			features, min_features, max_features,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Returns the element-wise sum of a list of tensors.
+// Reshapes a SparseTensor to represent values in a new dense shape.
 //
-// `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not
-// wait for all of its inputs to be ready before beginning to sum. This can
-// save memory if inputs are ready at different times, since minimum temporary
-// storage is proportional to the output size rather than the inputs size.
+// This operation has the same semantics as reshape on the represented dense
+// tensor.  The `input_indices` are recomputed based on the requested `new_shape`.
 //
-// Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable.
+// If one component of `new_shape` is the special value -1, the size of that
+// dimension is computed so that the total dense size remains constant.  At
+// most one component of `new_shape` can be -1.  The number of dense elements
+// implied by `new_shape` must be the same as the number of dense elements
+// originally implied by `input_shape`.
 //
-// Returns a `Tensor` of same shape and type as the elements of `inputs`.
+// Reshaping does not affect the order of values in the SparseTensor.
+//
+// If the input tensor has rank `R_in` and `N` non-empty values, and `new_shape`
+// has length `R_out`, then `input_indices` has shape `[N, R_in]`,
+// `input_shape` has length `R_in`, `output_indices` has shape `[N, R_out]`, and
+// `output_shape` has length `R_out`.
 //
 // Arguments:
-//	inputs: A list of `Tensor` objects, each with same shape and type.
-//	shape: Shape of elements of `inputs`.
-func AccumulateNV2(scope *Scope, inputs []tf.Output, shape tf.Shape) (sum tf.Output) {
+//	input_indices: 2-D.  `N x R_in` matrix with the indices of non-empty values in a
+// SparseTensor.
+//	input_shape: 1-D.  `R_in` vector with the input SparseTensor's dense shape.
+//	new_shape: 1-D.  `R_out` vector with the requested new dense shape.
+//
+// Returns 2-D.  `N x R_out` matrix with the updated indices of non-empty
+// values in the output SparseTensor.1-D.  `R_out` vector with the full dense shape of the output
+// SparseTensor.  This is the same as `new_shape` but with any -1 dimensions
+// filled in.
+func SparseReshape(scope *Scope, input_indices tf.Output, input_shape tf.Output, new_shape tf.Output) (output_indices tf.Output, output_shape tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"shape": shape}
 	opspec := tf.OpSpec{
-		Type: "AccumulateNV2",
+		Type: "SparseReshape",
 		Input: []tf.Input{
-			tf.OutputList(inputs),
+			input_indices, input_shape, new_shape,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1)
 }
 
-// Computes the gradient for the inverse of `x` wrt its input.
-//
-// Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy`
-// is the corresponding input gradient.
-func ReciprocalGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
+// Deprecated. Use TensorArraySplitV3
+func TensorArraySplitV2(scope *Scope, handle tf.Output, value tf.Output, lengths tf.Output, flow_in tf.Output) (flow_out tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "ReciprocalGrad",
+		Type: "TensorArraySplitV2",
 		Input: []tf.Input{
-			y, dy,
+			handle, value, lengths, flow_in,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Creates a dataset with a range of values. Corresponds to python's xrange.
+// PackAttr is an optional argument to Pack.
+type PackAttr func(optionalAttr)
+
+// PackAxis sets the optional axis attribute to value.
 //
-// Arguments:
-//	start: corresponds to start in python's xrange().
-//	stop: corresponds to stop in python's xrange().
-//	step: corresponds to step in python's xrange().
+// value: Dimension along which to pack.  Negative values wrap around, so the
+// valid range is `[-(R+1), R+1)`.
+// If not specified, defaults to 0
+func PackAxis(value int64) PackAttr {
+	return func(m optionalAttr) {
+		m["axis"] = value
+	}
+}
+
+// Packs a list of `N` rank-`R` tensors into one rank-`(R+1)` tensor.
 //
+// Packs the `N` tensors in `values` into a tensor with rank one higher than each
+// tensor in `values`, by packing them along the `axis` dimension.
+// Given a list of tensors of shape `(A, B, C)`;
 //
-func RangeDataset(scope *Scope, start tf.Output, stop tf.Output, step tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+// if `axis == 0` then the `output` tensor will have the shape `(N, A, B, C)`.
+// if `axis == 1` then the `output` tensor will have the shape `(A, N, B, C)`.
+// Etc.
+//
+// For example:
+//
+// ```
+// # 'x' is [1, 4]
+// # 'y' is [2, 5]
+// # 'z' is [3, 6]
+// pack([x, y, z]) => [[1, 4], [2, 5], [3, 6]]  # Pack along first dim.
+// pack([x, y, z], axis=1) => [[1, 2, 3], [4, 5, 6]]
+// ```
+//
+// This is the opposite of `unpack`.
+//
+// Arguments:
+//	values: Must be of same shape and type.
+//
+// Returns The packed tensor.
+func Pack(scope *Scope, values []tf.Output, optional ...PackAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "RangeDataset",
+		Type: "Pack",
 		Input: []tf.Input{
-			start, stop, step,
+			tf.OutputList(values),
 		},
 		Attrs: attrs,
 	}
@@ -10573,87 +10426,86 @@ func RangeDataset(scope *Scope, start tf.Output, stop tf.Output, step tf.Output,
 	return op.Output(0)
 }
 
-// Saves tensors in V2 checkpoint format.
+// Reorders a SparseTensor into the canonical, row-major ordering.
 //
-// By default, saves the named tensors in full.  If the caller wishes to save
-// specific slices of full tensors, "shape_and_slices" should be non-empty strings
-// and correspondingly well-formed.
+// Note that by convention, all sparse ops preserve the canonical ordering along
+// increasing dimension number. The only time ordering can be violated is during
+// manual manipulation of the indices and values vectors to add entries.
+//
+// Reordering does not affect the shape of the SparseTensor.
+//
+// If the tensor has rank `R` and `N` non-empty values, `input_indices` has
+// shape `[N, R]`, input_values has length `N`, and input_shape has length `R`.
 //
 // Arguments:
-//	prefix: Must have a single element. The prefix of the V2 checkpoint to which we
-// write the tensors.
-//	tensor_names: shape {N}. The names of the tensors to be saved.
-//	shape_and_slices: shape {N}.  The slice specs of the tensors to be saved.
-// Empty strings indicate that they are non-partitioned tensors.
-//	tensors: `N` tensors to save.
+//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, possibly not in canonical ordering.
+//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
+//	input_shape: 1-D.  Shape of the input SparseTensor.
 //
-// Returns the created operation.
-func SaveV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and_slices tf.Output, tensors []tf.Output) (o *tf.Operation) {
+// Returns 2-D.  `N x R` matrix with the same indices as input_indices, but
+// in canonical row-major ordering.1-D.  `N` non-empty values corresponding to `output_indices`.
+func SparseReorder(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output) (output_indices tf.Output, output_values tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "SaveV2",
+		Type: "SparseReorder",
 		Input: []tf.Input{
-			prefix, tensor_names, shape_and_slices, tf.OutputList(tensors),
+			input_indices, input_values, input_shape,
 		},
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
 }
 
-// MatrixTriangularSolveAttr is an optional argument to MatrixTriangularSolve.
-type MatrixTriangularSolveAttr func(optionalAttr)
-
-// MatrixTriangularSolveLower sets the optional lower attribute to value.
-//
-// value: Boolean indicating whether the innermost matrices in `matrix` are
-// lower or upper triangular.
-// If not specified, defaults to true
-func MatrixTriangularSolveLower(value bool) MatrixTriangularSolveAttr {
-	return func(m optionalAttr) {
-		m["lower"] = value
+// Computes rectified linear: `max(features, 0)`.
+func Relu(scope *Scope, features tf.Output) (activations tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Relu",
+		Input: []tf.Input{
+			features,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// MatrixTriangularSolveAdjoint sets the optional adjoint attribute to value.
-//
-// value: Boolean indicating whether to solve with `matrix` or its (block-wise)
-//          adjoint.
+// ResourceApplyAddSignAttr is an optional argument to ResourceApplyAddSign.
+type ResourceApplyAddSignAttr func(optionalAttr)
+
+// ResourceApplyAddSignUseLocking sets the optional use_locking attribute to value.
 //
-// @compatibility(numpy)
-// Equivalent to np.linalg.triangular_solve
-// @end_compatibility
+// value: If `True`, updating of the var and m tensors is
+// protected by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
 // If not specified, defaults to false
-func MatrixTriangularSolveAdjoint(value bool) MatrixTriangularSolveAttr {
+func ResourceApplyAddSignUseLocking(value bool) ResourceApplyAddSignAttr {
 	return func(m optionalAttr) {
-		m["adjoint"] = value
+		m["use_locking"] = value
 	}
 }
 
-// Solves systems of linear equations with upper or lower triangular matrices by
-//
-// backsubstitution.
-//
-// `matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions form
-// square matrices. If `lower` is `True` then the strictly upper triangular part
-// of each inner-most matrix is assumed to be zero and not accessed.
-// If `lower` is False then the strictly lower triangular part of each inner-most
-// matrix is assumed to be zero and not accessed.
-// `rhs` is a tensor of shape `[..., M, K]`.
+// Update '*var' according to the AddSign update.
 //
-// The output is a tensor of shape `[..., M, K]`. If `adjoint` is
-// `True` then the innermost matrices in `output` satisfy matrix equations
-// `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`.
-// If `adjoint` is `False` then the strictly then the  innermost matrices in
-// `output` satisfy matrix equations
-// `adjoint(matrix[..., i, k]) * output[..., k, j] = rhs[..., i, j]`.
+// m_t <- beta1 * m_{t-1} + (1 - beta1) * g
+// update <- (alpha + sign_decay * sign(g) *sign(m)) * g
+// variable <- variable - lr_t * update
 //
 // Arguments:
-//	matrix: Shape is `[..., M, M]`.
-//	rhs: Shape is `[..., M, K]`.
+//	var_: Should be from a Variable().
+//	m: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	alpha: Must be a scalar.
+//	sign_decay: Must be a scalar.
+//	beta: Must be a scalar.
+//	grad: The gradient.
 //
-// Returns Shape is `[..., M, K]`.
-func MatrixTriangularSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixTriangularSolveAttr) (output tf.Output) {
+// Returns the created operation.
+func ResourceApplyAddSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, alpha tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyAddSignAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -10662,82 +10514,96 @@ func MatrixTriangularSolve(scope *Scope, matrix tf.Output, rhs tf.Output, option
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "MatrixTriangularSolve",
+		Type: "ResourceApplyAddSign",
 		Input: []tf.Input{
-			matrix, rhs,
+			var_, m, lr, alpha, sign_decay, beta, grad,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// Computes fingerprints of the input strings.
+// FractionalMaxPoolGradAttr is an optional argument to FractionalMaxPoolGrad.
+type FractionalMaxPoolGradAttr func(optionalAttr)
+
+// FractionalMaxPoolGradOverlapping sets the optional overlapping attribute to value.
+//
+// value: When set to True, it means when pooling, the values at the boundary
+// of adjacent pooling cells are used by both cells. For example:
+//
+// `index  0  1  2  3  4`
+//
+// `value  20 5  16 3  7`
+//
+// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
+// The result would be [20, 16] for fractional max pooling.
+// If not specified, defaults to false
+func FractionalMaxPoolGradOverlapping(value bool) FractionalMaxPoolGradAttr {
+	return func(m optionalAttr) {
+		m["overlapping"] = value
+	}
+}
+
+// Computes gradient of the FractionalMaxPool function.
 //
 // Arguments:
-//	input: vector of strings to compute fingerprints on.
+//	orig_input: Original input for `fractional_max_pool`
+//	orig_output: Original output for `fractional_max_pool`
+//	out_backprop: 4-D with shape `[batch, height, width, channels]`.  Gradients
+// w.r.t. the output of `fractional_max_pool`.
+//	row_pooling_sequence: row pooling sequence, form pooling region with
+// col_pooling_sequence.
+//	col_pooling_sequence: column pooling sequence, form pooling region with
+// row_pooling sequence.
 //
-// Returns a (N,2) shaped matrix where N is the number of elements in the input
-// vector. Each row contains the low and high parts of the fingerprint.
-func SdcaFprint(scope *Scope, input tf.Output) (output tf.Output) {
+// Returns 4-D.  Gradients w.r.t. the input of `fractional_max_pool`.
+func FractionalMaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, out_backprop tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output, optional ...FractionalMaxPoolGradAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "SdcaFprint",
+		Type: "FractionalMaxPoolGrad",
 		Input: []tf.Input{
-			input,
+			orig_input, orig_output, out_backprop, row_pooling_sequence, col_pooling_sequence,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// SparseMatMulAttr is an optional argument to SparseMatMul.
-type SparseMatMulAttr func(optionalAttr)
-
-// SparseMatMulTransposeA sets the optional transpose_a attribute to value.
-// If not specified, defaults to false
-func SparseMatMulTransposeA(value bool) SparseMatMulAttr {
-	return func(m optionalAttr) {
-		m["transpose_a"] = value
-	}
-}
-
-// SparseMatMulTransposeB sets the optional transpose_b attribute to value.
-// If not specified, defaults to false
-func SparseMatMulTransposeB(value bool) SparseMatMulAttr {
-	return func(m optionalAttr) {
-		m["transpose_b"] = value
-	}
-}
-
-// SparseMatMulAIsSparse sets the optional a_is_sparse attribute to value.
-// If not specified, defaults to false
-func SparseMatMulAIsSparse(value bool) SparseMatMulAttr {
-	return func(m optionalAttr) {
-		m["a_is_sparse"] = value
-	}
-}
+// ResourceApplyAdagradDAAttr is an optional argument to ResourceApplyAdagradDA.
+type ResourceApplyAdagradDAAttr func(optionalAttr)
 
-// SparseMatMulBIsSparse sets the optional b_is_sparse attribute to value.
+// ResourceApplyAdagradDAUseLocking sets the optional use_locking attribute to value.
+//
+// value: If True, updating of the var and accum tensors will be protected by
+// a lock; otherwise the behavior is undefined, but may exhibit less contention.
 // If not specified, defaults to false
-func SparseMatMulBIsSparse(value bool) SparseMatMulAttr {
+func ResourceApplyAdagradDAUseLocking(value bool) ResourceApplyAdagradDAAttr {
 	return func(m optionalAttr) {
-		m["b_is_sparse"] = value
+		m["use_locking"] = value
 	}
 }
 
-// Multiply matrix "a" by matrix "b".
+// Update '*var' according to the proximal adagrad scheme.
 //
-// The inputs must be two-dimensional matrices and the inner dimension of "a" must
-// match the outer dimension of "b". This op is optimized for the case where at
-// least one of "a" or "b" is sparse. The breakeven for using this versus a dense
-// matrix multiply on one platform was 30% zero values in the sparse matrix.
+// Arguments:
+//	var_: Should be from a Variable().
+//	gradient_accumulator: Should be from a Variable().
+//	gradient_squared_accumulator: Should be from a Variable().
+//	grad: The gradient.
+//	lr: Scaling factor. Must be a scalar.
+//	l1: L1 regularization. Must be a scalar.
+//	l2: L2 regularization. Must be a scalar.
+//	global_step: Training step number. Must be a scalar.
 //
-// The gradient computation of this operation will only take advantage of sparsity
-// in the input gradient when that gradient comes from a Relu.
-func SparseMatMul(scope *Scope, a tf.Output, b tf.Output, optional ...SparseMatMulAttr) (product tf.Output) {
+// Returns the created operation.
+func ResourceApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator tf.Output, gradient_squared_accumulator tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, global_step tf.Output, optional ...ResourceApplyAdagradDAAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -10746,176 +10612,151 @@ func SparseMatMul(scope *Scope, a tf.Output, b tf.Output, optional ...SparseMatM
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "SparseMatMul",
+		Type: "ResourceApplyAdagradDA",
 		Input: []tf.Input{
-			a, b,
+			var_, gradient_accumulator, gradient_squared_accumulator, grad, lr, l1, l2, global_step,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// SdcaOptimizerAttr is an optional argument to SdcaOptimizer.
-type SdcaOptimizerAttr func(optionalAttr)
+// SparseReduceMaxSparseAttr is an optional argument to SparseReduceMaxSparse.
+type SparseReduceMaxSparseAttr func(optionalAttr)
 
-// SdcaOptimizerAdaptative sets the optional adaptative attribute to value.
+// SparseReduceMaxSparseKeepDims sets the optional keep_dims attribute to value.
 //
-// value: Whether to use Adapative SDCA for the inner loop.
+// value: If true, retain reduced dimensions with length 1.
 // If not specified, defaults to false
-func SdcaOptimizerAdaptative(value bool) SdcaOptimizerAttr {
+func SparseReduceMaxSparseKeepDims(value bool) SparseReduceMaxSparseAttr {
 	return func(m optionalAttr) {
-		m["adaptative"] = value
+		m["keep_dims"] = value
 	}
 }
 
-// Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for
-//
-// linear models with L1 + L2 regularization. As global optimization objective is
-// strongly-convex, the optimizer optimizes the dual objective at each step. The
-// optimizer applies each update one example at a time. Examples are sampled
-// uniformly, and the optimizer is learning rate free and enjoys linear convergence
-// rate.
-//
-// [Proximal Stochastic Dual Coordinate Ascent](http://arxiv.org/pdf/1211.2717v1.pdf).<br>
-// Shai Shalev-Shwartz, Tong Zhang. 2012
+// Computes the max of elements across dimensions of a SparseTensor.
 //
-// $$Loss Objective = \sum f_{i} (wx_{i}) + (l2 / 2) * |w|^2 + l1 * |w|$$
+// This Op takes a SparseTensor and is the sparse counterpart to
+// `tf.reduce_max()`.  In contrast to SparseReduceMax, this Op returns a
+// SparseTensor.
 //
-// [Adding vs. Averaging in Distributed Primal-Dual Optimization](http://arxiv.org/abs/1502.03508).<br>
-// Chenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan,
-// Peter Richtarik, Martin Takac. 2015
+// Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
+// with length 1.
 //
-// [Stochastic Dual Coordinate Ascent with Adaptive Probabilities](https://arxiv.org/abs/1502.08053).<br>
-// Dominik Csiba, Zheng Qu, Peter Richtarik. 2015
+// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
+// with a single element is returned.  Additionally, the axes can be negative,
+// which are interpreted according to the indexing rules in Python.
 //
 // Arguments:
-//	sparse_example_indices: a list of vectors which contain example indices.
-//	sparse_feature_indices: a list of vectors which contain feature indices.
-//	sparse_feature_values: a list of vectors which contains feature value
-// associated with each feature group.
-//	dense_features: a list of matrices which contains the dense feature values.
-//	example_weights: a vector which contains the weight associated with each
-// example.
-//	example_labels: a vector which contains the label/target associated with each
-// example.
-//	sparse_indices: a list of vectors where each value is the indices which has
-// corresponding weights in sparse_weights. This field maybe omitted for the
-// dense approach.
-//	sparse_weights: a list of vectors where each value is the weight associated with
-// a sparse feature group.
-//	dense_weights: a list of vectors where the values are the weights associated
-// with a dense feature group.
-//	example_state_data: a list of vectors containing the example state data.
-//	loss_type: Type of the primal loss. Currently SdcaSolver supports logistic,
-// squared and hinge losses.
-//	l1: Symmetric l1 regularization strength.
-//	l2: Symmetric l2 regularization strength.
-//	num_loss_partitions: Number of partitions of the global loss function.
-//	num_inner_iterations: Number of iterations per mini-batch.
-//
-// Returns a list of vectors containing the updated example state
-// data.a list of vectors where each value is the delta
-// weights associated with a sparse feature group.a list of vectors where the values are the delta
-// weights associated with a dense feature group.
-func SdcaOptimizer(scope *Scope, sparse_example_indices []tf.Output, sparse_feature_indices []tf.Output, sparse_feature_values []tf.Output, dense_features []tf.Output, example_weights tf.Output, example_labels tf.Output, sparse_indices []tf.Output, sparse_weights []tf.Output, dense_weights []tf.Output, example_state_data tf.Output, loss_type string, l1 float32, l2 float32, num_loss_partitions int64, num_inner_iterations int64, optional ...SdcaOptimizerAttr) (out_example_state_data tf.Output, out_delta_sparse_weights []tf.Output, out_delta_dense_weights []tf.Output) {
+//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, possibly not in canonical ordering.
+//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
+//	input_shape: 1-D.  Shape of the input SparseTensor.
+//	reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
+func SparseReduceMaxSparse(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceMaxSparseAttr) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"loss_type": loss_type, "l1": l1, "l2": l2, "num_loss_partitions": num_loss_partitions, "num_inner_iterations": num_inner_iterations}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "SdcaOptimizer",
+		Type: "SparseReduceMaxSparse",
 		Input: []tf.Input{
-			tf.OutputList(sparse_example_indices), tf.OutputList(sparse_feature_indices), tf.OutputList(sparse_feature_values), tf.OutputList(dense_features), example_weights, example_labels, tf.OutputList(sparse_indices), tf.OutputList(sparse_weights), tf.OutputList(dense_weights), example_state_data,
+			input_indices, input_values, input_shape, reduction_axes,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	out_example_state_data = op.Output(idx)
-	if out_delta_sparse_weights, idx, err = makeOutputList(op, idx, "out_delta_sparse_weights"); err != nil {
-		scope.UpdateErr("SdcaOptimizer", err)
-		return
-	}
-	if out_delta_dense_weights, idx, err = makeOutputList(op, idx, "out_delta_dense_weights"); err != nil {
-		scope.UpdateErr("SdcaOptimizer", err)
-		return
-	}
-	return out_example_state_data, out_delta_sparse_weights, out_delta_dense_weights
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Computes the minimum along segments of a tensor.
-//
-// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
-// segments.
-//
-// Computes a tensor such that
-// \\(output_i = \min_j(data_j)\\) where `min` is over `j` such
-// that `segment_ids[j] == i`.
-//
-// If the min is empty for a given segment ID `i`, `output[i] = 0`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentMin.png" alt>
-// </div>
+// Creates a dataset that emits the outputs of `input_dataset` `count` times.
 //
 // Arguments:
 //
-//	segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
-// first dimension.  Values should be sorted and can be repeated.
+//	count: A scalar representing the number of times that `input_dataset` should
+// be repeated. A value of `-1` indicates that it should be repeated infinitely.
+//
 //
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
+func RepeatDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "SegmentMin",
+		Type: "RepeatDataset",
 		Input: []tf.Input{
-			data, segment_ids,
+			input_dataset, count,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// QuantizedResizeBilinearAttr is an optional argument to QuantizedResizeBilinear.
-type QuantizedResizeBilinearAttr func(optionalAttr)
+// AddManySparseToTensorsMapAttr is an optional argument to AddManySparseToTensorsMap.
+type AddManySparseToTensorsMapAttr func(optionalAttr)
 
-// QuantizedResizeBilinearAlignCorners sets the optional align_corners attribute to value.
+// AddManySparseToTensorsMapContainer sets the optional container attribute to value.
 //
-// value: If true, rescale input by (new_height - 1) / (height - 1), which
-// exactly aligns the 4 corners of images and resized images. If false, rescale
-// by new_height / height. Treat similarly the width dimension.
-// If not specified, defaults to false
-func QuantizedResizeBilinearAlignCorners(value bool) QuantizedResizeBilinearAttr {
+// value: The container name for the `SparseTensorsMap` created by this op.
+// If not specified, defaults to ""
+func AddManySparseToTensorsMapContainer(value string) AddManySparseToTensorsMapAttr {
 	return func(m optionalAttr) {
-		m["align_corners"] = value
+		m["container"] = value
 	}
 }
 
-// Resize quantized `images` to `size` using quantized bilinear interpolation.
+// AddManySparseToTensorsMapSharedName sets the optional shared_name attribute to value.
 //
-// Input images and output images must be quantized types.
+// value: The shared name for the `SparseTensorsMap` created by this op.
+// If blank, the new Operation's unique name is used.
+// If not specified, defaults to ""
+func AddManySparseToTensorsMapSharedName(value string) AddManySparseToTensorsMapAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Add an `N`-minibatch `SparseTensor` to a `SparseTensorsMap`, return `N` handles.
 //
-// Arguments:
-//	images: 4-D with shape `[batch, height, width, channels]`.
-//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
-// new size for the images.
+// A `SparseTensor` of rank `R` is represented by three tensors: `sparse_indices`,
+// `sparse_values`, and `sparse_shape`, where
+//
+// ```sparse_indices.shape[1] == sparse_shape.shape[0] == R```
 //
+// An `N`-minibatch of `SparseTensor` objects is represented as a `SparseTensor`
+// having a first `sparse_indices` column taking values between `[0, N)`, where
+// the minibatch size `N == sparse_shape[0]`.
 //
+// The input `SparseTensor` must have rank `R` greater than 1, and the first
+// dimension is treated as the minibatch dimension.  Elements of the `SparseTensor`
+// must be sorted in increasing order of this first dimension.  The stored
+// `SparseTensor` objects pointed to by each row of the output `sparse_handles`
+// will have rank `R-1`.
 //
-// Returns 4-D with shape
-// `[batch, new_height, new_width, channels]`.
-func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min tf.Output, max tf.Output, optional ...QuantizedResizeBilinearAttr) (resized_images tf.Output, out_min tf.Output, out_max tf.Output) {
+// The `SparseTensor` values can then be read out as part of a minibatch by passing
+// the given keys as vector elements to `TakeManySparseFromTensorsMap`.  To ensure
+// the correct `SparseTensorsMap` is accessed, ensure that the same
+// `container` and `shared_name` are passed to that Op.  If no `shared_name`
+// is provided here, instead use the *name* of the Operation created by calling
+// `AddManySparseToTensorsMap` as the `shared_name` passed to
+// `TakeManySparseFromTensorsMap`.  Ensure the Operations are colocated.
+//
+// Arguments:
+//	sparse_indices: 2-D.  The `indices` of the minibatch `SparseTensor`.
+// `sparse_indices[:, 0]` must be ordered values in `[0, N)`.
+//	sparse_values: 1-D.  The `values` of the minibatch `SparseTensor`.
+//	sparse_shape: 1-D.  The `shape` of the minibatch `SparseTensor`.
+// The minibatch size `N == sparse_shape[0]`.
+//
+// Returns 1-D.  The handles of the `SparseTensor` now stored in the
+// `SparseTensorsMap`.  Shape: `[N]`.
+func AddManySparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...AddManySparseToTensorsMapAttr) (sparse_handles tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -10924,69 +10765,54 @@ func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "QuantizedResizeBilinear",
+		Type: "AddManySparseToTensorsMap",
 		Input: []tf.Input{
-			images, size, min, max,
+			sparse_indices, sparse_values, sparse_shape,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// RestoreAttr is an optional argument to Restore.
-type RestoreAttr func(optionalAttr)
+// MinAttr is an optional argument to Min.
+type MinAttr func(optionalAttr)
 
-// RestorePreferredShard sets the optional preferred_shard attribute to value.
+// MinKeepDims sets the optional keep_dims attribute to value.
 //
-// value: Index of file to open first if multiple files match
-// `file_pattern`.
-// If not specified, defaults to -1
-func RestorePreferredShard(value int64) RestoreAttr {
+// value: If true, retain reduced dimensions with length 1.
+// If not specified, defaults to false
+func MinKeepDims(value bool) MinAttr {
 	return func(m optionalAttr) {
-		m["preferred_shard"] = value
+		m["keep_dims"] = value
 	}
 }
 
-// Restores a tensor from checkpoint files.
-//
-// Reads a tensor stored in one or several files. If there are several files (for
-// instance because a tensor was saved as slices), `file_pattern` may contain
-// wildcard symbols (`*` and `?`) in the filename portion only, not in the
-// directory portion.
-//
-// If a `file_pattern` matches several files, `preferred_shard` can be used to hint
-// in which file the requested tensor is likely to be found. This op will first
-// open the file at index `preferred_shard` in the list of matching files and try
-// to restore tensors from that file.  Only if some tensors or tensor slices are
-// not found in that first file, then the Op opens all the files. Setting
-// `preferred_shard` to match the value passed as the `shard` input
-// of a matching `Save` Op may speed up Restore.  This attribute only affects
-// performance, not correctness.  The default value -1 means files are processed in
-// order.
+// Computes the minimum of elements across dimensions of a tensor.
 //
-// See also `RestoreSlice`.
+// Reduces `input` along the dimensions given in `axis`. Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `axis`. If `keep_dims` is true, the reduced dimensions are
+// retained with length 1.
 //
 // Arguments:
-//	file_pattern: Must have a single element. The pattern of the files from
-// which we read the tensor.
-//	tensor_name: Must have a single element. The name of the tensor to be
-// restored.
-//	dt: The type of the tensor to be restored.
+//	input: The tensor to reduce.
+//	axis: The dimensions to reduce. Must be in the range
+// `[-rank(input), rank(input))`.
 //
-// Returns The restored tensor.
-func Restore(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, dt tf.DataType, optional ...RestoreAttr) (tensor tf.Output) {
+// Returns The reduced tensor.
+func Min(scope *Scope, input tf.Output, axis tf.Output, optional ...MinAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dt": dt}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Restore",
+		Type: "Min",
 		Input: []tf.Input{
-			file_pattern, tensor_name,
+			input, axis,
 		},
 		Attrs: attrs,
 	}
@@ -10994,357 +10820,354 @@ func Restore(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, dt tf.
 	return op.Output(0)
 }
 
-// WriteAudioSummaryAttr is an optional argument to WriteAudioSummary.
-type WriteAudioSummaryAttr func(optionalAttr)
-
-// WriteAudioSummaryMaxOutputs sets the optional max_outputs attribute to value.
+// Shuffle dimensions of x according to a permutation.
 //
-// value: Max number of batch elements to generate audio for.
-// If not specified, defaults to 3
+// The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy:
+//   `y.shape[i] == x.shape[perm[i]] for i in [0, 1, ..., rank(x) - 1]`
+func Transpose(scope *Scope, x tf.Output, perm tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Transpose",
+		Input: []tf.Input{
+			x, perm,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// DepthwiseConv2dNativeBackpropFilterAttr is an optional argument to DepthwiseConv2dNativeBackpropFilter.
+type DepthwiseConv2dNativeBackpropFilterAttr func(optionalAttr)
+
+// DepthwiseConv2dNativeBackpropFilterDataFormat sets the optional data_format attribute to value.
 //
-// REQUIRES: value >= 1
-func WriteAudioSummaryMaxOutputs(value int64) WriteAudioSummaryAttr {
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, height, width, channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, channels, height, width].
+// If not specified, defaults to "NHWC"
+func DepthwiseConv2dNativeBackpropFilterDataFormat(value string) DepthwiseConv2dNativeBackpropFilterAttr {
 	return func(m optionalAttr) {
-		m["max_outputs"] = value
+		m["data_format"] = value
 	}
 }
 
-// Writes a `Summary` protocol buffer with audio.
-//
-// The summary has up to `max_outputs` summary values containing audio. The
-// audio is built from `tensor` which must be 3-D with shape `[batch_size,
-// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are
-// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`.
+// DepthwiseConv2dNativeBackpropFilterDilations sets the optional dilations attribute to value.
 //
-// The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
-// build the `tag` of the summary values:
-//
-// *  If `max_outputs` is 1, the summary value tag is '*tag*/audio'.
-// *  If `max_outputs` is greater than 1, the summary value tags are
-//    generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc.
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+// element on that dimension. The dimension order is determined by the value of
+// `data_format`, see above for details. Dilations in the batch and depth
+// dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
+func DepthwiseConv2dNativeBackpropFilterDilations(value []int64) DepthwiseConv2dNativeBackpropFilterAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes the gradients of depthwise convolution with respect to the filter.
 //
 // Arguments:
-//	writer: A handle to a summary writer.
-//	step: The step to write the summary for.
-//	tag: Scalar. Used to build the `tag` attribute of the summary values.
-//	tensor: 2-D of shape `[batch_size, frames]`.
-//	sample_rate: The sample rate of the signal in hertz.
+//	input: 4-D with shape based on `data_format`.  For example, if
+// `data_format` is 'NHWC' then `input` is a 4-D `[batch, in_height,
+// in_width, in_channels]` tensor.
+//	filter_sizes: An integer vector representing the tensor shape of `filter`,
+// where `filter` is a 4-D
+// `[filter_height, filter_width, in_channels, depthwise_multiplier]` tensor.
+//	out_backprop: 4-D with shape  based on `data_format`.
+// For example, if `data_format` is 'NHWC' then
+// out_backprop shape is `[batch, out_height, out_width, out_channels]`.
+// Gradients w.r.t. the output of the convolution.
+//	strides: The stride of the sliding window for each dimension of the input
+// of the convolution.
+//	padding: The type of padding algorithm to use.
 //
-// Returns the created operation.
-func WriteAudioSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...WriteAudioSummaryAttr) (o *tf.Operation) {
+// Returns 4-D with shape
+// `[filter_height, filter_width, in_channels, out_channels]`.  Gradient w.r.t.
+// the `filter` input of the convolution.
+func DepthwiseConv2dNativeBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropFilterAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "WriteAudioSummary",
+		Type: "DepthwiseConv2dNativeBackpropFilter",
 		Input: []tf.Input{
-			writer, step, tag, tensor, sample_rate,
+			input, filter_sizes, out_backprop,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
-}
-
-// FusedResizeAndPadConv2DAttr is an optional argument to FusedResizeAndPadConv2D.
-type FusedResizeAndPadConv2DAttr func(optionalAttr)
-
-// FusedResizeAndPadConv2DResizeAlignCorners sets the optional resize_align_corners attribute to value.
-//
-// value: If true, rescale input by (new_height - 1) / (height - 1),
-// which exactly aligns the 4 corners of images and resized images. If false, rescale
-// by new_height / height. Treat similarly the width dimension.
-// If not specified, defaults to false
-func FusedResizeAndPadConv2DResizeAlignCorners(value bool) FusedResizeAndPadConv2DAttr {
-	return func(m optionalAttr) {
-		m["resize_align_corners"] = value
-	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Performs a resize and padding as a preprocess during a convolution.
-//
-// It's often possible to do spatial transformations more efficiently as part of
-// the packing stage of a convolution, so this op allows for an optimized
-// implementation where these stages are fused together. This prevents the need to
-// write out the intermediate results as whole tensors, reducing memory pressure,
-// and we can get some latency gains by merging the transformation calculations.
-// The data_format attribute for Conv2D isn't supported by this op, and defaults to
-// 'NHWC' order.
-// Internally this op uses a single per-graph scratch buffer, which means that it
-// will block if multiple versions are being run in parallel. This is because this
-// operator is primarily an optimization to minimize memory usage.
-//
-// Arguments:
-//	input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
-//	size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
-// new size for the images.
-//	paddings: A two-column matrix specifying the padding sizes. The number of
-// rows must be the same as the rank of `input`.
-//	filter: 4-D with shape
-// `[filter_height, filter_width, in_channels, out_channels]`.
+// Component-wise divides a SparseTensor by a dense Tensor.
 //
-//	strides: 1-D of length 4.  The stride of the sliding window for each dimension
-// of `input`. Must be in the same order as the dimension specified with format.
-//	padding: The type of padding algorithm to use.
-func FusedResizeAndPadConv2D(scope *Scope, input tf.Output, size tf.Output, paddings tf.Output, filter tf.Output, mode string, strides []int64, padding string, optional ...FusedResizeAndPadConv2DAttr) (output tf.Output) {
+// *Limitation*: this Op only broadcasts the dense side to the sparse side, but not
+// the other direction.
+//
+// Arguments:
+//	sp_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, possibly not in canonical ordering.
+//	sp_values: 1-D.  `N` non-empty values corresponding to `sp_indices`.
+//	sp_shape: 1-D.  Shape of the input SparseTensor.
+//	dense: `R`-D.  The dense Tensor operand.
+//
+// Returns 1-D.  The `N` values that are operated on.
+func SparseDenseCwiseDiv(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"mode": mode, "strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "FusedResizeAndPadConv2D",
+		Type: "SparseDenseCwiseDiv",
 		Input: []tf.Input{
-			input, size, paddings, filter,
+			sp_indices, sp_values, sp_shape, dense,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// DenseToSparseSetOperationAttr is an optional argument to DenseToSparseSetOperation.
-type DenseToSparseSetOperationAttr func(optionalAttr)
+// ResourceApplyMomentumAttr is an optional argument to ResourceApplyMomentum.
+type ResourceApplyMomentumAttr func(optionalAttr)
 
-// DenseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value.
-// If not specified, defaults to true
-func DenseToSparseSetOperationValidateIndices(value bool) DenseToSparseSetOperationAttr {
+// ResourceApplyMomentumUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyMomentumUseLocking(value bool) ResourceApplyMomentumAttr {
 	return func(m optionalAttr) {
-		m["validate_indices"] = value
+		m["use_locking"] = value
 	}
 }
 
-// Applies set operation along last dimension of `Tensor` and `SparseTensor`.
-//
-// See SetOperationOp::SetOperationFromContext for values of `set_operation`.
+// ResourceApplyMomentumUseNesterov sets the optional use_nesterov attribute to value.
 //
-// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`,
-// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same
-// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but
-// ignored.
+// value: If `True`, the tensor passed to compute grad will be
+// var - lr * momentum * accum, so in the end, the var you get is actually
+// var - lr * momentum * accum.
+// If not specified, defaults to false
+func ResourceApplyMomentumUseNesterov(value bool) ResourceApplyMomentumAttr {
+	return func(m optionalAttr) {
+		m["use_nesterov"] = value
+	}
+}
+
+// Update '*var' according to the momentum scheme. Set use_nesterov = True if you
 //
-// If `validate_indices` is `True`, this op validates the order and range of `set2`
-// indices.
+// want to use Nesterov momentum.
 //
-// Output `result` is a `SparseTensor` represented by `result_indices`,
-// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this
-// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`
-// dimension contains the result of `set_operation` applied to the corresponding
-// `[0...n-1]` dimension of `set`.
+// accum = accum * momentum + grad
+// var -= lr * accum
 //
 // Arguments:
-//	set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`.
-// Dimension `n` contains values in a set, duplicates are allowed but ignored.
-//	set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
-// order.
-//	set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
-// order.
-//	set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must
-// be the same as the 1st `n-1` dimensions of `set1`, `result_shape[n]` is the
-// max set size across `n-1` dimensions.
-//
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	grad: The gradient.
+//	momentum: Momentum. Must be a scalar.
 //
-// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is
-// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`
-// is the max result set size across all `0...n-1` dimensions.
-func DenseToSparseSetOperation(scope *Scope, set1 tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...DenseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) {
+// Returns the created operation.
+func ResourceApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyMomentumAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"set_operation": set_operation}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "DenseToSparseSetOperation",
+		Type: "ResourceApplyMomentum",
 		Input: []tf.Input{
-			set1, set2_indices, set2_values, set2_shape,
+			var_, accum, lr, grad, momentum,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return scope.AddOperation(opspec)
 }
 
-// Delete the tensor specified by its handle in the session.
-//
-// Arguments:
-//	handle: The handle for a tensor stored in the session state.
+// Returns the truth value of (x >= y) element-wise.
 //
-// Returns the created operation.
-func DeleteSessionTensor(scope *Scope, handle tf.Output) (o *tf.Operation) {
+// *NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func GreaterEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "DeleteSessionTensor",
+		Type: "GreaterEqual",
 		Input: []tf.Input{
-			handle,
+			x, y,
 		},
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// DenseToDenseSetOperationAttr is an optional argument to DenseToDenseSetOperation.
-type DenseToDenseSetOperationAttr func(optionalAttr)
+// Conv3DAttr is an optional argument to Conv3D.
+type Conv3DAttr func(optionalAttr)
 
-// DenseToDenseSetOperationValidateIndices sets the optional validate_indices attribute to value.
-// If not specified, defaults to true
-func DenseToDenseSetOperationValidateIndices(value bool) DenseToDenseSetOperationAttr {
+// Conv3DDataFormat sets the optional data_format attribute to value.
+//
+// value: The data format of the input and output data. With the
+// default format "NDHWC", the data is stored in the order of:
+//     [batch, in_depth, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCDHW", the data storage order is:
+//     [batch, in_channels, in_depth, in_height, in_width].
+// If not specified, defaults to "NDHWC"
+func Conv3DDataFormat(value string) Conv3DAttr {
 	return func(m optionalAttr) {
-		m["validate_indices"] = value
+		m["data_format"] = value
 	}
 }
 
-// Applies set operation along last dimension of 2 `Tensor` inputs.
-//
-// See SetOperationOp::SetOperationFromContext for values of `set_operation`.
+// Conv3DDilations sets the optional dilations attribute to value.
 //
-// Output `result` is a `SparseTensor` represented by `result_indices`,
-// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this
-// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`
-// dimension contains the result of `set_operation` applied to the corresponding
-// `[0...n-1]` dimension of `set`.
+// value: 1-D tensor of length 5.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each
+// filter element on that dimension. The dimension order is determined by the
+// value of `data_format`, see above for details. Dilations in the batch and
+// depth dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 i:1 >
+func Conv3DDilations(value []int64) Conv3DAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes a 3-D convolution given 5-D `input` and `filter` tensors.
 //
-// Arguments:
-//	set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`.
-// Dimension `n` contains values in a set, duplicates are allowed but ignored.
-//	set2: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set1`.
-// Dimension `n` contains values in a set, duplicates are allowed but ignored.
+// In signal processing, cross-correlation is a measure of similarity of
+// two waveforms as a function of a time-lag applied to one of them. This
+// is also known as a sliding dot product or sliding inner-product.
 //
+// Our Conv3D implements a form of cross-correlation.
 //
-// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is
-// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`
-// is the max result set size across all `0...n-1` dimensions.
-func DenseToDenseSetOperation(scope *Scope, set1 tf.Output, set2 tf.Output, set_operation string, optional ...DenseToDenseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) {
+// Arguments:
+//	input: Shape `[batch, in_depth, in_height, in_width, in_channels]`.
+//	filter: Shape `[filter_depth, filter_height, filter_width, in_channels,
+// out_channels]`. `in_channels` must match between `input` and `filter`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
+func Conv3D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...Conv3DAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"set_operation": set_operation}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "DenseToDenseSetOperation",
+		Type: "Conv3D",
 		Input: []tf.Input{
-			set1, set2,
+			input, filter,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// SumAttr is an optional argument to Sum.
-type SumAttr func(optionalAttr)
-
-// SumKeepDims sets the optional keep_dims attribute to value.
+// Adds up a SparseTensor and a dense Tensor, using these special rules:
 //
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func SumKeepDims(value bool) SumAttr {
-	return func(m optionalAttr) {
-		m["keep_dims"] = value
-	}
-}
-
-// Computes the sum of elements across dimensions of a tensor.
+// (1) Broadcasts the dense side to have the same shape as the sparse side, if
+//     eligible;
+// (2) Then, only the dense values pointed to by the indices of the SparseTensor
+//     participate in the cwise addition.
 //
-// Reduces `input` along the dimensions given in `reduction_indices`. Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `reduction_indices`. If `keep_dims` is true, the reduced dimensions are
-// retained with length 1.
+// By these rules, the result is a logical SparseTensor with exactly the same
+// indices and shape, but possibly with different non-zero values.  The output of
+// this Op is the resultant non-zero values.
 //
 // Arguments:
-//	input: The tensor to reduce.
-//	reduction_indices: The dimensions to reduce. Must be in the range
-// `[-rank(input), rank(input))`.
+//	sp_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, possibly not in canonical ordering.
+//	sp_values: 1-D.  `N` non-empty values corresponding to `sp_indices`.
+//	sp_shape: 1-D.  Shape of the input SparseTensor.
+//	dense: `R`-D.  The dense Tensor operand.
 //
-// Returns The reduced tensor.
-func Sum(scope *Scope, input tf.Output, reduction_indices tf.Output, optional ...SumAttr) (output tf.Output) {
+// Returns 1-D.  The `N` values that are operated on.
+func SparseDenseCwiseAdd(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "Sum",
+		Type: "SparseDenseCwiseAdd",
 		Input: []tf.Input{
-			input, reduction_indices,
+			sp_indices, sp_values, sp_shape, dense,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes the sign and the log of the absolute value of the determinant of
-//
-// one or more square matrices.
-//
-// The input is a tensor of shape `[N, M, M]` whose inner-most 2 dimensions
-// form square matrices. The outputs are two tensors containing the signs and
-// absolute values of the log determinants for all N input submatrices
-// `[..., :, :]` such that the determinant = sign*exp(log_abs_determinant).
-// The log_abs_determinant is computed as det(P)*sum(log(diag(LU))) where LU
-// is the LU decomposition of the input and P is the corresponding
-// permutation matrix.
+// Read an element from the TensorArray into output `value`.
 //
 // Arguments:
-//	input: Shape is `[N, M, M]`.
+//	handle: The handle to a TensorArray.
 //
-// Returns The signs of the log determinants of the inputs. Shape is `[N]`.The logs of the absolute values of the determinants
-// of the N input matrices.  Shape is `[N]`.
-func LogMatrixDeterminant(scope *Scope, input tf.Output) (sign tf.Output, log_abs_determinant tf.Output) {
+//	flow_in: A float scalar that enforces proper chaining of operations.
+//	dtype: The type of the elem that is returned.
+//
+// Returns The tensor that is read from the TensorArray.
+func TensorArrayReadV3(scope *Scope, handle tf.Output, index tf.Output, flow_in tf.Output, dtype tf.DataType) (value tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"dtype": dtype}
 	opspec := tf.OpSpec{
-		Type: "LogMatrixDeterminant",
+		Type: "TensorArrayReadV3",
 		Input: []tf.Input{
-			input,
+			handle, index, flow_in,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
-// SetSizeAttr is an optional argument to SetSize.
-type SetSizeAttr func(optionalAttr)
+// EncodePngAttr is an optional argument to EncodePng.
+type EncodePngAttr func(optionalAttr)
 
-// SetSizeValidateIndices sets the optional validate_indices attribute to value.
-// If not specified, defaults to true
-func SetSizeValidateIndices(value bool) SetSizeAttr {
+// EncodePngCompression sets the optional compression attribute to value.
+//
+// value: Compression level.
+// If not specified, defaults to -1
+func EncodePngCompression(value int64) EncodePngAttr {
 	return func(m optionalAttr) {
-		m["validate_indices"] = value
+		m["compression"] = value
 	}
 }
 
-// Number of unique elements along last dimension of input `set`.
+// PNG-encode an image.
 //
-// Input `set` is a `SparseTensor` represented by `set_indices`, `set_values`,
-// and `set_shape`. The last dimension contains values in a set, duplicates are
-// allowed but ignored.
+// `image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]`
+// where `channels` is:
 //
-// If `validate_indices` is `True`, this op validates the order and range of `set`
-// indices.
+// *   1: for grayscale.
+// *   2: for grayscale + alpha.
+// *   3: for RGB.
+// *   4: for RGBA.
+//
+// The ZLIB compression level, `compression`, can be -1 for the PNG-encoder
+// default or a value from 0 to 9.  9 is the highest compression level, generating
+// the smallest output, but is slower.
 //
 // Arguments:
-//	set_indices: 2D `Tensor`, indices of a `SparseTensor`.
-//	set_values: 1D `Tensor`, values of a `SparseTensor`.
-//	set_shape: 1D `Tensor`, shape of a `SparseTensor`.
+//	image: 3-D with shape `[height, width, channels]`.
 //
-// Returns For `set` ranked `n`, this is a `Tensor` with rank `n-1`, and the same 1st
-// `n-1` dimensions as `set`. Each value is the number of unique elements in
-// the corresponding `[0...n-1]` dimension of `set`.
-func SetSize(scope *Scope, set_indices tf.Output, set_values tf.Output, set_shape tf.Output, optional ...SetSizeAttr) (size tf.Output) {
+// Returns 0-D. PNG-encoded image.
+func EncodePng(scope *Scope, image tf.Output, optional ...EncodePngAttr) (contents tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -11353,9 +11176,9 @@ func SetSize(scope *Scope, set_indices tf.Output, set_values tf.Output, set_shap
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "SetSize",
+		Type: "EncodePng",
 		Input: []tf.Input{
-			set_indices, set_values, set_shape,
+			image,
 		},
 		Attrs: attrs,
 	}
@@ -11363,258 +11186,195 @@ func SetSize(scope *Scope, set_indices tf.Output, set_values tf.Output, set_shap
 	return op.Output(0)
 }
 
-// The gradient of SparseFillEmptyRows.
-//
-// Takes vectors reverse_index_map, shaped `[N]`, and grad_values,
-// shaped `[N_full]`, where `N_full >= N` and copies data into either
-// `d_values` or `d_default_value`.  Here `d_values` is shaped `[N]` and
-// `d_default_value` is a scalar.
-//
-//   d_values[j] = grad_values[reverse_index_map[j]]
-//   d_default_value = sum_{k : 0 .. N_full - 1} (
-//      grad_values[k] * 1{k not in reverse_index_map})
-//
-// Arguments:
-//	reverse_index_map: 1-D.  The reverse index map from SparseFillEmptyRows.
-//	grad_values: 1-D.  The gradients from backprop.
+// DataFormatVecPermuteAttr is an optional argument to DataFormatVecPermute.
+type DataFormatVecPermuteAttr func(optionalAttr)
+
+// DataFormatVecPermuteSrcFormat sets the optional src_format attribute to value.
 //
-// Returns 1-D.  The backprop into values.0-D.  The backprop into default_value.
-func SparseFillEmptyRowsGrad(scope *Scope, reverse_index_map tf.Output, grad_values tf.Output) (d_values tf.Output, d_default_value tf.Output) {
-	if scope.Err() != nil {
-		return
+// value: source data format.
+// If not specified, defaults to "NHWC"
+func DataFormatVecPermuteSrcFormat(value string) DataFormatVecPermuteAttr {
+	return func(m optionalAttr) {
+		m["src_format"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "SparseFillEmptyRowsGrad",
-		Input: []tf.Input{
-			reverse_index_map, grad_values,
-		},
+}
+
+// DataFormatVecPermuteDstFormat sets the optional dst_format attribute to value.
+//
+// value: destination data format.
+// If not specified, defaults to "NCHW"
+func DataFormatVecPermuteDstFormat(value string) DataFormatVecPermuteAttr {
+	return func(m optionalAttr) {
+		m["dst_format"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
 }
 
-// Assigns a new value to a variable.
+// Returns the permuted vector/tensor in the destination data format given the
 //
-// Any ReadVariableOp with a control dependency on this op is guaranteed to return
-// this value or a subsequent newer value of the variable.
+// one in the source data format.
 //
 // Arguments:
-//	resource: handle to the resource in which to store the variable.
-//	value: the value to set the new tensor to use.
+//	x: Vector of size 4 or Tensor of shape (4, 2) in source data format.
 //
-// Returns the created operation.
-func AssignVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) {
+// Returns Vector of size 4 or Tensor of shape (4, 2) in destination data format.
+func DataFormatVecPermute(scope *Scope, x tf.Output, optional ...DataFormatVecPermuteAttr) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "AssignVariableOp",
+		Type: "DataFormatVecPermute",
 		Input: []tf.Input{
-			resource, value,
+			x,
 		},
+		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Says whether the targets are in the top `K` predictions.
-//
-// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the
-// prediction for the target class is among the top `k` predictions among
-// all predictions for example `i`. Note that the behavior of `InTopK` differs
-// from the `TopK` op in its handling of ties; if multiple classes have the
-// same prediction value and straddle the top-`k` boundary, all of those
-// classes are considered to be in the top `k`.
-//
-// More formally, let
-//
-//   \\(predictions_i\\) be the predictions for all classes for example `i`,
-//   \\(targets_i\\) be the target class for example `i`,
-//   \\(out_i\\) be the output for example `i`,
-//
-// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$
+// Returns element-wise integer closest to x.
 //
-// Arguments:
-//	predictions: A `batch_size` x `classes` tensor.
-//	targets: A `batch_size` vector of class ids.
-//	k: Number of top elements to look at for computing precision.
+// If the result is midway between two representable values,
+// the even representable is chosen.
+// For example:
 //
-// Returns Computed precision at `k` as a `bool Tensor`.
-func InTopKV2(scope *Scope, predictions tf.Output, targets tf.Output, k tf.Output) (precision tf.Output) {
+// ```
+// rint(-1.5) ==> -2.0
+// rint(0.5000001) ==> 1.0
+// rint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) ==> [-2., -2., -0., 0., 2., 2., 2.]
+// ```
+func Rint(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "InTopKV2",
+		Type: "Rint",
 		Input: []tf.Input{
-			predictions, targets, k,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// TakeManySparseFromTensorsMapAttr is an optional argument to TakeManySparseFromTensorsMap.
-type TakeManySparseFromTensorsMapAttr func(optionalAttr)
+// OrderedMapUnstageNoKeyAttr is an optional argument to OrderedMapUnstageNoKey.
+type OrderedMapUnstageNoKeyAttr func(optionalAttr)
 
-// TakeManySparseFromTensorsMapContainer sets the optional container attribute to value.
+// OrderedMapUnstageNoKeyCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
 //
-// value: The container name for the `SparseTensorsMap` read by this op.
+// REQUIRES: value >= 0
+func OrderedMapUnstageNoKeyCapacity(value int64) OrderedMapUnstageNoKeyAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// OrderedMapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func OrderedMapUnstageNoKeyMemoryLimit(value int64) OrderedMapUnstageNoKeyAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// OrderedMapUnstageNoKeyContainer sets the optional container attribute to value.
 // If not specified, defaults to ""
-func TakeManySparseFromTensorsMapContainer(value string) TakeManySparseFromTensorsMapAttr {
+func OrderedMapUnstageNoKeyContainer(value string) OrderedMapUnstageNoKeyAttr {
 	return func(m optionalAttr) {
 		m["container"] = value
 	}
 }
 
-// TakeManySparseFromTensorsMapSharedName sets the optional shared_name attribute to value.
-//
-// value: The shared name for the `SparseTensorsMap` read by this op.
-// It should not be blank; rather the `shared_name` or unique Operation name
-// of the Op that created the original `SparseTensorsMap` should be used.
+// OrderedMapUnstageNoKeySharedName sets the optional shared_name attribute to value.
 // If not specified, defaults to ""
-func TakeManySparseFromTensorsMapSharedName(value string) TakeManySparseFromTensorsMapAttr {
+func OrderedMapUnstageNoKeySharedName(value string) OrderedMapUnstageNoKeyAttr {
 	return func(m optionalAttr) {
 		m["shared_name"] = value
 	}
 }
 
-// Read `SparseTensors` from a `SparseTensorsMap` and concatenate them.
-//
-// The input `sparse_handles` must be an `int64` matrix of shape `[N, 1]` where
-// `N` is the minibatch size and the rows correspond to the output handles of
-// `AddSparseToTensorsMap` or `AddManySparseToTensorsMap`.  The ranks of the
-// original `SparseTensor` objects that went into the given input ops must all
-// match.  When the final `SparseTensor` is created, it has rank one
-// higher than the ranks of the incoming `SparseTensor` objects
-// (they have been concatenated along a new row dimension on the left).
-//
-// The output `SparseTensor` object's shape values for all dimensions but the
-// first are the max across the input `SparseTensor` objects' shape values
-// for the corresponding dimensions.  Its first shape value is `N`, the minibatch
-// size.
-//
-// The input `SparseTensor` objects' indices are assumed ordered in
-// standard lexicographic order.  If this is not the case, after this
-// step run `SparseReorder` to restore index ordering.
-//
-// For example, if the handles represent an input, which is a `[2, 3]` matrix
-// representing two original `SparseTensor` objects:
-//
-// ```
-//     index = [ 0]
-//             [10]
-//             [20]
-//     values = [1, 2, 3]
-//     shape = [50]
-// ```
-//
-// and
-//
-// ```
-//     index = [ 2]
-//             [10]
-//     values = [4, 5]
-//     shape = [30]
-// ```
-//
-// then the final `SparseTensor` will be:
-//
-// ```
-//     index = [0  0]
-//             [0 10]
-//             [0 20]
-//             [1  2]
-//             [1 10]
-//     values = [1, 2, 3, 4, 5]
-//     shape = [2 50]
-// ```
-//
-// Arguments:
-//	sparse_handles: 1-D, The `N` serialized `SparseTensor` objects.
-// Shape: `[N]`.
-//	dtype: The `dtype` of the `SparseTensor` objects stored in the
-// `SparseTensorsMap`.
+// Op removes and returns the (key, value) element with the smallest
 //
-// Returns 2-D.  The `indices` of the minibatch `SparseTensor`.1-D.  The `values` of the minibatch `SparseTensor`.1-D.  The `shape` of the minibatch `SparseTensor`.
-func TakeManySparseFromTensorsMap(scope *Scope, sparse_handles tf.Output, dtype tf.DataType, optional ...TakeManySparseFromTensorsMapAttr) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) {
+// key from the underlying container.   If the underlying container
+// does not contain elements, the op will block until it does.
+func OrderedMapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype}
+	attrs := map[string]interface{}{"dtypes": dtypes}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "TakeManySparseFromTensorsMap",
+		Type: "OrderedMapUnstageNoKey",
 		Input: []tf.Input{
-			sparse_handles,
+			indices,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// AddSparseToTensorsMapAttr is an optional argument to AddSparseToTensorsMap.
-type AddSparseToTensorsMapAttr func(optionalAttr)
-
-// AddSparseToTensorsMapContainer sets the optional container attribute to value.
-//
-// value: The container name for the `SparseTensorsMap` created by this op.
-// If not specified, defaults to ""
-func AddSparseToTensorsMapContainer(value string) AddSparseToTensorsMapAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	key = op.Output(idx)
+	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
+		scope.UpdateErr("OrderedMapUnstageNoKey", err)
+		return
 	}
+	return key, values
 }
 
-// AddSparseToTensorsMapSharedName sets the optional shared_name attribute to value.
+// MaxPool3DGradGradAttr is an optional argument to MaxPool3DGradGrad.
+type MaxPool3DGradGradAttr func(optionalAttr)
+
+// MaxPool3DGradGradDataFormat sets the optional data_format attribute to value.
 //
-// value: The shared name for the `SparseTensorsMap` created by this op.
-// If blank, the new Operation's unique name is used.
-// If not specified, defaults to ""
-func AddSparseToTensorsMapSharedName(value string) AddSparseToTensorsMapAttr {
+// value: The data format of the input and output data. With the
+// default format "NDHWC", the data is stored in the order of:
+//     [batch, in_depth, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCDHW", the data storage order is:
+//     [batch, in_channels, in_depth, in_height, in_width].
+// If not specified, defaults to "NDHWC"
+func MaxPool3DGradGradDataFormat(value string) MaxPool3DGradGradAttr {
 	return func(m optionalAttr) {
-		m["shared_name"] = value
+		m["data_format"] = value
 	}
 }
 
-// Add a `SparseTensor` to a `SparseTensorsMap` return its handle.
-//
-// A `SparseTensor` is represented by three tensors: `sparse_indices`,
-// `sparse_values`, and `sparse_shape`.
-//
-// This operator takes the given `SparseTensor` and adds it to a container
-// object (a `SparseTensorsMap`).  A unique key within this container is generated
-// in the form of an `int64`, and this is the value that is returned.
-//
-// The `SparseTensor` can then be read out as part of a minibatch by passing
-// the key as a vector element to `TakeManySparseFromTensorsMap`.  To ensure
-// the correct `SparseTensorsMap` is accessed, ensure that the same
-// `container` and `shared_name` are passed to that Op.  If no `shared_name`
-// is provided here, instead use the *name* of the Operation created by calling
-// `AddSparseToTensorsMap` as the `shared_name` passed to
-// `TakeManySparseFromTensorsMap`.  Ensure the Operations are colocated.
+// Computes second-order gradients of the maxpooling function.
 //
 // Arguments:
-//	sparse_indices: 2-D.  The `indices` of the `SparseTensor`.
-//	sparse_values: 1-D.  The `values` of the `SparseTensor`.
-//	sparse_shape: 1-D.  The `shape` of the `SparseTensor`.
+//	orig_input: The original input tensor.
+//	orig_output: The original output tensor.
+//	grad: Output backprop of shape `[batch, depth, rows, cols, channels]`.
+//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
+// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
 //
-// Returns 0-D.  The handle of the `SparseTensor` now stored in the
-// `SparseTensorsMap`.
-func AddSparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...AddSparseToTensorsMapAttr) (sparse_handle tf.Output) {
+// Returns Gradients of gradients w.r.t. the input to `max_pool`.
+func MaxPool3DGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DGradGradAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "AddSparseToTensorsMap",
+		Type: "MaxPool3DGradGrad",
 		Input: []tf.Input{
-			sparse_indices, sparse_values, sparse_shape,
+			orig_input, orig_output, grad,
 		},
 		Attrs: attrs,
 	}
@@ -11622,154 +11382,131 @@ func AddSparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values
 	return op.Output(0)
 }
 
-// FusedBatchNormGradV2Attr is an optional argument to FusedBatchNormGradV2.
-type FusedBatchNormGradV2Attr func(optionalAttr)
-
-// FusedBatchNormGradV2Epsilon sets the optional epsilon attribute to value.
-//
-// value: A small float number added to the variance of x.
-// If not specified, defaults to 0.0001
-func FusedBatchNormGradV2Epsilon(value float32) FusedBatchNormGradV2Attr {
-	return func(m optionalAttr) {
-		m["epsilon"] = value
-	}
-}
+// Conv3DBackpropFilterV2Attr is an optional argument to Conv3DBackpropFilterV2.
+type Conv3DBackpropFilterV2Attr func(optionalAttr)
 
-// FusedBatchNormGradV2DataFormat sets the optional data_format attribute to value.
+// Conv3DBackpropFilterV2DataFormat sets the optional data_format attribute to value.
 //
-// value: The data format for y_backprop, x, x_backprop.
-// Either "NHWC" (default) or "NCHW".
-// If not specified, defaults to "NHWC"
-func FusedBatchNormGradV2DataFormat(value string) FusedBatchNormGradV2Attr {
+// value: The data format of the input and output data. With the
+// default format "NDHWC", the data is stored in the order of:
+//     [batch, in_depth, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCDHW", the data storage order is:
+//     [batch, in_channels, in_depth, in_height, in_width].
+// If not specified, defaults to "NDHWC"
+func Conv3DBackpropFilterV2DataFormat(value string) Conv3DBackpropFilterV2Attr {
 	return func(m optionalAttr) {
 		m["data_format"] = value
 	}
 }
 
-// FusedBatchNormGradV2IsTraining sets the optional is_training attribute to value.
+// Conv3DBackpropFilterV2Dilations sets the optional dilations attribute to value.
 //
-// value: A bool value to indicate the operation is for training (default)
-// or inference.
-// If not specified, defaults to true
-func FusedBatchNormGradV2IsTraining(value bool) FusedBatchNormGradV2Attr {
+// value: 1-D tensor of length 5.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each
+// filter element on that dimension. The dimension order is determined by the
+// value of `data_format`, see above for details. Dilations in the batch and
+// depth dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 i:1 >
+func Conv3DBackpropFilterV2Dilations(value []int64) Conv3DBackpropFilterV2Attr {
 	return func(m optionalAttr) {
-		m["is_training"] = value
+		m["dilations"] = value
 	}
 }
 
-// Gradient for batch normalization.
-//
-// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
-// The size of 1D Tensors matches the dimension C of the 4D Tensors.
+// Computes the gradients of 3-D convolution with respect to the filter.
 //
 // Arguments:
-//	y_backprop: A 4D Tensor for the gradient with respect to y.
-//	x: A 4D Tensor for input data.
-//	scale: A 1D Tensor for scaling factor, to scale the normalized x.
-//	reserve_space_1: When is_training is True, a 1D Tensor for the computed batch
-// mean to be reused in gradient computation. When is_training is
-// False, a 1D Tensor for the population mean to be reused in both
-// 1st and 2nd order gradient computation.
-//	reserve_space_2: When is_training is True, a 1D Tensor for the computed batch
-// variance (inverted variance in the cuDNN case) to be reused in
-// gradient computation. When is_training is False, a 1D Tensor
-// for the population variance to be reused in both 1st and 2nd
-// order gradient computation.
-//
-// Returns A 4D Tensor for the gradient with respect to x.A 1D Tensor for the gradient with respect to scale.A 1D Tensor for the gradient with respect to offset.Unused placeholder to match the mean input in FusedBatchNorm.Unused placeholder to match the variance input
-// in FusedBatchNorm.
-func FusedBatchNormGradV2(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, optional ...FusedBatchNormGradV2Attr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_3 tf.Output, reserve_space_4 tf.Output) {
+//	input: Shape `[batch, depth, rows, cols, in_channels]`.
+//	filter_sizes: An integer vector representing the tensor shape of `filter`,
+// where `filter` is a 5-D
+// `[filter_depth, filter_height, filter_width, in_channels, out_channels]`
+// tensor.
+//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
+// out_channels]`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
+func Conv3DBackpropFilterV2(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropFilterV2Attr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "FusedBatchNormGradV2",
+		Type: "Conv3DBackpropFilterV2",
 		Input: []tf.Input{
-			y_backprop, x, scale, reserve_space_1, reserve_space_2,
+			input, filter_sizes, out_backprop,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
+	return op.Output(0)
 }
 
-// Constructs a tensor by tiling a given tensor.
+// Execute a sub graph on a remote processor.
 //
-// This operation creates a new tensor by replicating `input` `multiples` times.
-// The output tensor's i'th dimension has `input.dims(i) * multiples[i]` elements,
-// and the values of `input` are replicated `multiples[i]` times along the 'i'th
-// dimension. For example, tiling `[a b c d]` by `[2]` produces
-// `[a b c d a b c d]`.
+// The graph specifications(such as graph itself, input tensors and output names)
+// are stored as a serialized protocol buffer of RemoteFusedGraphExecuteInfo
+// as serialized_remote_fused_graph_execute_info.
+// The specifications will be passed to a dedicated registered
+// remote fused graph executor.  The executor will send the graph specifications
+// to a remote processor and execute that graph.  The execution results
+// will be passed to consumer nodes as outputs of this node.
 //
 // Arguments:
-//	input: 1-D or higher.
-//	multiples: 1-D. Length must be the same as the number of dimensions in `input`
-func Tile(scope *Scope, input tf.Output, multiples tf.Output) (output tf.Output) {
+//	inputs: Arbitrary number of tensors with arbitrary data types
+//
+//	serialized_remote_fused_graph_execute_info: Serialized protocol buffer
+// of RemoteFusedGraphExecuteInfo which contains graph specifications.
+//
+// Returns Arbitrary number of tensors with arbitrary data types
+func RemoteFusedGraphExecute(scope *Scope, inputs []tf.Output, Toutputs []tf.DataType, serialized_remote_fused_graph_execute_info string) (outputs []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"Toutputs": Toutputs, "serialized_remote_fused_graph_execute_info": serialized_remote_fused_graph_execute_info}
 	opspec := tf.OpSpec{
-		Type: "Tile",
+		Type: "RemoteFusedGraphExecute",
 		Input: []tf.Input{
-			input, multiples,
+			tf.OutputList(inputs),
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the element-wise min of two SparseTensors.
-//
-// Assumes the two SparseTensors have the same shape, i.e., no broadcasting.
-//
-// Arguments:
-//	a_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, in the canonical lexicographic ordering.
-//	a_values: 1-D.  `N` non-empty values corresponding to `a_indices`.
-//	a_shape: 1-D.  Shape of the input SparseTensor.
-//	b_indices: counterpart to `a_indices` for the other operand.
-//	b_values: counterpart to `a_values` for the other operand; must be of the same dtype.
-//	b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal.
-//
-// Returns 2-D.  The indices of the output SparseTensor.1-D.  The values of the output SparseTensor.
-func SparseSparseMinimum(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output) (output_indices tf.Output, output_values tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	opspec := tf.OpSpec{
-		Type: "SparseSparseMinimum",
-		Input: []tf.Input{
-			a_indices, a_values, a_shape, b_indices, b_values, b_shape,
-		},
+	var idx int
+	var err error
+	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
+		scope.UpdateErr("RemoteFusedGraphExecute", err)
+		return
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return outputs
 }
 
-// AllCandidateSamplerAttr is an optional argument to AllCandidateSampler.
-type AllCandidateSamplerAttr func(optionalAttr)
+// ThreadUnsafeUnigramCandidateSamplerAttr is an optional argument to ThreadUnsafeUnigramCandidateSampler.
+type ThreadUnsafeUnigramCandidateSamplerAttr func(optionalAttr)
 
-// AllCandidateSamplerSeed sets the optional seed attribute to value.
+// ThreadUnsafeUnigramCandidateSamplerSeed sets the optional seed attribute to value.
 //
 // value: If either seed or seed2 are set to be non-zero, the random number
 // generator is seeded by the given seed.  Otherwise, it is seeded by a
 // random seed.
 // If not specified, defaults to 0
-func AllCandidateSamplerSeed(value int64) AllCandidateSamplerAttr {
+func ThreadUnsafeUnigramCandidateSamplerSeed(value int64) ThreadUnsafeUnigramCandidateSamplerAttr {
 	return func(m optionalAttr) {
 		m["seed"] = value
 	}
 }
 
-// AllCandidateSamplerSeed2 sets the optional seed2 attribute to value.
+// ThreadUnsafeUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value.
 //
 // value: An second seed to avoid seed collision.
 // If not specified, defaults to 0
-func AllCandidateSamplerSeed2(value int64) AllCandidateSamplerAttr {
+func ThreadUnsafeUnigramCandidateSamplerSeed2(value int64) ThreadUnsafeUnigramCandidateSamplerAttr {
 	return func(m optionalAttr) {
 		m["seed2"] = value
 	}
@@ -11791,10 +11528,11 @@ func AllCandidateSamplerSeed2(value int64) AllCandidateSamplerAttr {
 //	true_classes: A batch_size * num_true matrix, in which each row contains the
 // IDs of the num_true target_classes in the corresponding original label.
 //	num_true: Number of true labels per context.
-//	num_sampled: Number of candidates to produce.
+//	num_sampled: Number of candidates to randomly sample.
 //	unique: If unique is true, we sample with rejection, so that all sampled
 // candidates in a batch are unique. This requires some approximation to
 // estimate the post-rejection sampling probabilities.
+//	range_max: The sampler will sample integers from the interval [0, range_max).
 //
 // Returns A vector of length num_sampled, in which each element is
 // the ID of a sampled candidate.A batch_size * num_true matrix, representing
@@ -11803,134 +11541,64 @@ func AllCandidateSamplerSeed2(value int64) AllCandidateSamplerAttr {
 // candidate representing the number of times the candidate is expected
 // to occur in a batch of sampled candidates.  If unique=true, then this is a
 // probability.
-func AllCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, optional ...AllCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
+func ThreadUnsafeUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...ThreadUnsafeUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique}
+	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "AllCandidateSampler",
+		Type: "ThreadUnsafeUnigramCandidateSampler",
 		Input: []tf.Input{
-			true_classes,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// DecodeAndCropJpegAttr is an optional argument to DecodeAndCropJpeg.
-type DecodeAndCropJpegAttr func(optionalAttr)
-
-// DecodeAndCropJpegChannels sets the optional channels attribute to value.
-//
-// value: Number of color channels for the decoded image.
-// If not specified, defaults to 0
-func DecodeAndCropJpegChannels(value int64) DecodeAndCropJpegAttr {
-	return func(m optionalAttr) {
-		m["channels"] = value
-	}
-}
-
-// DecodeAndCropJpegRatio sets the optional ratio attribute to value.
-//
-// value: Downscaling ratio.
-// If not specified, defaults to 1
-func DecodeAndCropJpegRatio(value int64) DecodeAndCropJpegAttr {
-	return func(m optionalAttr) {
-		m["ratio"] = value
-	}
-}
-
-// DecodeAndCropJpegFancyUpscaling sets the optional fancy_upscaling attribute to value.
-//
-// value: If true use a slower but nicer upscaling of the
-// chroma planes (yuv420/422 only).
-// If not specified, defaults to true
-func DecodeAndCropJpegFancyUpscaling(value bool) DecodeAndCropJpegAttr {
-	return func(m optionalAttr) {
-		m["fancy_upscaling"] = value
-	}
-}
-
-// DecodeAndCropJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value.
-//
-// value: If true try to recover an image from truncated input.
-// If not specified, defaults to false
-func DecodeAndCropJpegTryRecoverTruncated(value bool) DecodeAndCropJpegAttr {
-	return func(m optionalAttr) {
-		m["try_recover_truncated"] = value
+			true_classes,
+		},
+		Attrs: attrs,
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// DecodeAndCropJpegAcceptableFraction sets the optional acceptable_fraction attribute to value.
-//
-// value: The minimum required fraction of lines before a truncated
-// input is accepted.
-// If not specified, defaults to 1
-func DecodeAndCropJpegAcceptableFraction(value float32) DecodeAndCropJpegAttr {
-	return func(m optionalAttr) {
-		m["acceptable_fraction"] = value
-	}
-}
+// MaxPoolV2Attr is an optional argument to MaxPoolV2.
+type MaxPoolV2Attr func(optionalAttr)
 
-// DecodeAndCropJpegDctMethod sets the optional dct_method attribute to value.
+// MaxPoolV2DataFormat sets the optional data_format attribute to value.
 //
-// value: string specifying a hint about the algorithm used for
-// decompression.  Defaults to "" which maps to a system-specific
-// default.  Currently valid values are ["INTEGER_FAST",
-// "INTEGER_ACCURATE"].  The hint may be ignored (e.g., the internal
-// jpeg library changes to a version that does not have that specific
-// option.)
-// If not specified, defaults to ""
-func DecodeAndCropJpegDctMethod(value string) DecodeAndCropJpegAttr {
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func MaxPoolV2DataFormat(value string) MaxPoolV2Attr {
 	return func(m optionalAttr) {
-		m["dct_method"] = value
+		m["data_format"] = value
 	}
 }
 
-// Decode and Crop a JPEG-encoded image to a uint8 tensor.
-//
-// The attr `channels` indicates the desired number of color channels for the
-// decoded image.
-//
-// Accepted values are:
-//
-// *   0: Use the number of channels in the JPEG-encoded image.
-// *   1: output a grayscale image.
-// *   3: output an RGB image.
-//
-// If needed, the JPEG-encoded image is transformed to match the requested number
-// of color channels.
-//
-// The attr `ratio` allows downscaling the image by an integer factor during
-// decoding.  Allowed values are: 1, 2, 4, and 8.  This is much faster than
-// downscaling the image later.
-//
-//
-// It is equivalent to a combination of decode and crop, but much faster by only
-// decoding partial jpeg image.
+// Performs max pooling on the input.
 //
 // Arguments:
-//	contents: 0-D.  The JPEG-encoded image.
-//	crop_window: 1-D.  The crop window: [crop_y, crop_x, crop_height, crop_width].
+//	input: 4-D input to pool over.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
+//	padding: The type of padding algorithm to use.
 //
-// Returns 3-D with shape `[height, width, channels]`..
-func DecodeAndCropJpeg(scope *Scope, contents tf.Output, crop_window tf.Output, optional ...DecodeAndCropJpegAttr) (image tf.Output) {
+// Returns The max pooled output tensor.
+func MaxPoolV2(scope *Scope, input tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolV2Attr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"padding": padding}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "DecodeAndCropJpeg",
+		Type: "MaxPoolV2",
 		Input: []tf.Input{
-			contents, crop_window,
+			input, ksize, strides,
 		},
 		Attrs: attrs,
 	}
@@ -11938,72 +11606,69 @@ func DecodeAndCropJpeg(scope *Scope, contents tf.Output, crop_window tf.Output,
 	return op.Output(0)
 }
 
-// RandomPoissonV2Attr is an optional argument to RandomPoissonV2.
-type RandomPoissonV2Attr func(optionalAttr)
-
-// RandomPoissonV2Seed sets the optional seed attribute to value.
-//
-// value: If either `seed` or `seed2` are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func RandomPoissonV2Seed(value int64) RandomPoissonV2Attr {
-	return func(m optionalAttr) {
-		m["seed"] = value
+// Deprecated. Use TensorArrayReadV3
+func TensorArrayReadV2(scope *Scope, handle tf.Output, index tf.Output, flow_in tf.Output, dtype tf.DataType) (value tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	opspec := tf.OpSpec{
+		Type: "TensorArrayReadV2",
+		Input: []tf.Input{
+			handle, index, flow_in,
+		},
+		Attrs: attrs,
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// RandomPoissonV2Seed2 sets the optional seed2 attribute to value.
+// Does nothing. Serves as a control trigger for scheduling.
 //
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func RandomPoissonV2Seed2(value int64) RandomPoissonV2Attr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
+// Only useful as a placeholder for control edges.
+//
+// Returns the created operation.
+func ControlTrigger(scope *Scope) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
 	}
-}
-
-// RandomPoissonV2Dtype sets the optional dtype attribute to value.
-// If not specified, defaults to DT_INT64
-func RandomPoissonV2Dtype(value tf.DataType) RandomPoissonV2Attr {
-	return func(m optionalAttr) {
-		m["dtype"] = value
+	opspec := tf.OpSpec{
+		Type: "ControlTrigger",
 	}
+	return scope.AddOperation(opspec)
 }
 
-// Outputs random values from the Poisson distribution(s) described by rate.
+// Batch normalization.
 //
-// This op uses two algorithms, depending on rate. If rate >= 10, then
-// the algorithm by Hormann is used to acquire samples via
-// transformation-rejection.
-// See http://www.sciencedirect.com/science/article/pii/0167668793909974.
+// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization()
 //
-// Otherwise, Knuth's algorithm is used to acquire samples via multiplying uniform
-// random variables.
-// See Donald E. Knuth (1969). Seminumerical Algorithms. The Art of Computer
-// Programming, Volume 2. Addison Wesley
+// This op is deprecated. Prefer `tf.nn.batch_normalization`.
 //
 // Arguments:
-//	shape: 1-D integer tensor. Shape of independent samples to draw from each
-// distribution described by the shape parameters given in rate.
-//	rate: A tensor in which each scalar is a "rate" parameter describing the
-// associated poisson distribution.
-//
-// Returns A tensor with shape `shape + shape(rate)`. Each slice
-// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for
-// `rate[i0, i1, ...iN]`.
-func RandomPoissonV2(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonV2Attr) (output tf.Output) {
+//	t: A 4D input Tensor.
+//	m: A 1D mean Tensor with size matching the last dimension of t.
+// This is the first output from tf.nn.moments,
+// or a saved moving average thereof.
+//	v: A 1D variance Tensor with size matching the last dimension of t.
+// This is the second output from tf.nn.moments,
+// or a saved moving average thereof.
+//	beta: A 1D beta Tensor with size matching the last dimension of t.
+// An offset to be added to the normalized tensor.
+//	gamma: A 1D gamma Tensor with size matching the last dimension of t.
+// If "scale_after_normalization" is true, this tensor will be multiplied
+// with the normalized tensor.
+//	variance_epsilon: A small float number to avoid dividing by 0.
+//	scale_after_normalization: A bool indicating whether the resulted tensor
+// needs to be multiplied with gamma.
+func BatchNormWithGlobalNormalization(scope *Scope, t tf.Output, m tf.Output, v tf.Output, beta tf.Output, gamma tf.Output, variance_epsilon float32, scale_after_normalization bool) (result tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization}
 	opspec := tf.OpSpec{
-		Type: "RandomPoissonV2",
+		Type: "BatchNormWithGlobalNormalization",
 		Input: []tf.Input{
-			shape, rate,
+			t, m, v, beta, gamma,
 		},
 		Attrs: attrs,
 	}
@@ -12011,393 +11676,422 @@ func RandomPoissonV2(scope *Scope, shape tf.Output, rate tf.Output, optional ...
 	return op.Output(0)
 }
 
-// OrderedMapPeekAttr is an optional argument to OrderedMapPeek.
-type OrderedMapPeekAttr func(optionalAttr)
+// MutableDenseHashTableV2Attr is an optional argument to MutableDenseHashTableV2.
+type MutableDenseHashTableV2Attr func(optionalAttr)
 
-// OrderedMapPeekCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
+// MutableDenseHashTableV2Container sets the optional container attribute to value.
 //
-// REQUIRES: value >= 0
-func OrderedMapPeekCapacity(value int64) OrderedMapPeekAttr {
+// value: If non-empty, this table is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func MutableDenseHashTableV2Container(value string) MutableDenseHashTableV2Attr {
 	return func(m optionalAttr) {
-		m["capacity"] = value
+		m["container"] = value
 	}
 }
 
-// OrderedMapPeekMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
+// MutableDenseHashTableV2SharedName sets the optional shared_name attribute to value.
 //
-// REQUIRES: value >= 0
-func OrderedMapPeekMemoryLimit(value int64) OrderedMapPeekAttr {
+// value: If non-empty, this table is shared under the given name across
+// multiple sessions.
+// If not specified, defaults to ""
+func MutableDenseHashTableV2SharedName(value string) MutableDenseHashTableV2Attr {
 	return func(m optionalAttr) {
-		m["memory_limit"] = value
+		m["shared_name"] = value
 	}
 }
 
-// OrderedMapPeekContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func OrderedMapPeekContainer(value string) OrderedMapPeekAttr {
+// MutableDenseHashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value.
+// If not specified, defaults to false
+func MutableDenseHashTableV2UseNodeNameSharing(value bool) MutableDenseHashTableV2Attr {
 	return func(m optionalAttr) {
-		m["container"] = value
+		m["use_node_name_sharing"] = value
 	}
 }
 
-// OrderedMapPeekSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func OrderedMapPeekSharedName(value string) OrderedMapPeekAttr {
+// MutableDenseHashTableV2ValueShape sets the optional value_shape attribute to value.
+//
+// value: The shape of each value.
+// If not specified, defaults to <>
+func MutableDenseHashTableV2ValueShape(value tf.Shape) MutableDenseHashTableV2Attr {
 	return func(m optionalAttr) {
-		m["shared_name"] = value
+		m["value_shape"] = value
 	}
 }
 
-// Op peeks at the values at the specified key.  If the
+// MutableDenseHashTableV2InitialNumBuckets sets the optional initial_num_buckets attribute to value.
 //
-// underlying container does not contain this key
-// this op will block until it does.   This Op is optimized for
-// performance.
-func OrderedMapPeek(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapPeekAttr) (values []tf.Output) {
+// value: The initial number of hash table buckets. Must be a power
+// to 2.
+// If not specified, defaults to 131072
+func MutableDenseHashTableV2InitialNumBuckets(value int64) MutableDenseHashTableV2Attr {
+	return func(m optionalAttr) {
+		m["initial_num_buckets"] = value
+	}
+}
+
+// MutableDenseHashTableV2MaxLoadFactor sets the optional max_load_factor attribute to value.
+//
+// value: The maximum ratio between number of entries and number of
+// buckets before growing the table. Must be between 0 and 1.
+// If not specified, defaults to 0.8
+func MutableDenseHashTableV2MaxLoadFactor(value float32) MutableDenseHashTableV2Attr {
+	return func(m optionalAttr) {
+		m["max_load_factor"] = value
+	}
+}
+
+// Creates an empty hash table that uses tensors as the backing store.
+//
+// It uses "open addressing" with quadratic reprobing to resolve
+// collisions.
+//
+// This op creates a mutable hash table, specifying the type of its keys and
+// values. Each value must be a scalar. Data can be inserted into the table using
+// the insert operations. It does not support the initialization operation.
+//
+// Arguments:
+//	empty_key: The key used to represent empty key buckets internally. Must not
+// be used in insert or lookup operations.
+//	value_dtype: Type of the table values.
+//
+// Returns Handle to a table.
+func MutableDenseHashTableV2(scope *Scope, empty_key tf.Output, value_dtype tf.DataType, optional ...MutableDenseHashTableV2Attr) (table_handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
+	attrs := map[string]interface{}{"value_dtype": value_dtype}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "OrderedMapPeek",
+		Type: "MutableDenseHashTableV2",
 		Input: []tf.Input{
-			key, indices,
+			empty_key,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Produces the max pool of the input tensor for quantized types.
+//
+// Arguments:
+//	input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over.
+//	min_input: The float value that the lowest quantized input value represents.
+//	max_input: The float value that the highest quantized input value represents.
+//	ksize: The size of the window for each dimension of the input tensor.
+// The length must be 4 to match the number of dimensions of the input.
+//	strides: The stride of the sliding window for each dimension of the input
+// tensor. The length must be 4 to match the number of dimensions of the input.
+//	padding: The type of padding algorithm to use.
+//
+// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
+func QuantizedMaxPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	var idx int
-	var err error
-	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
-		scope.UpdateErr("OrderedMapPeek", err)
-		return
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	opspec := tf.OpSpec{
+		Type: "QuantizedMaxPool",
+		Input: []tf.Input{
+			input, min_input, max_input,
+		},
+		Attrs: attrs,
 	}
-	return values
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Adds two `SparseTensor` objects to produce another `SparseTensor`.
-//
-// The input `SparseTensor` objects' indices are assumed ordered in standard
-// lexicographic order.  If this is not the case, before this step run
-// `SparseReorder` to restore index ordering.
-//
-// By default, if two values sum to zero at some index, the output `SparseTensor`
-// would still include that particular location in its index, storing a zero in the
-// corresponding value slot.  To override this, callers can specify `thresh`,
-// indicating that if the sum has a magnitude strictly smaller than `thresh`, its
-// corresponding value and index would then not be included.  In particular,
-// `thresh == 0` (default) means everything is kept and actual thresholding happens
-// only for a positive value.
-//
-// In the following shapes, `nnz` is the count after taking `thresh` into account.
-//
-// Arguments:
-//	a_indices: 2-D.  The `indices` of the first `SparseTensor`, size `[nnz, ndims]` Matrix.
-//	a_values: 1-D.  The `values` of the first `SparseTensor`, size `[nnz]` Vector.
-//	a_shape: 1-D.  The `shape` of the first `SparseTensor`, size `[ndims]` Vector.
-//	b_indices: 2-D.  The `indices` of the second `SparseTensor`, size `[nnz, ndims]` Matrix.
-//	b_values: 1-D.  The `values` of the second `SparseTensor`, size `[nnz]` Vector.
-//	b_shape: 1-D.  The `shape` of the second `SparseTensor`, size `[ndims]` Vector.
-//	thresh: 0-D.  The magnitude threshold that determines if an output value/index
-// pair takes space.
-func SparseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output, thresh tf.Output) (sum_indices tf.Output, sum_values tf.Output, sum_shape tf.Output) {
+// Computes softplus: `log(exp(features) + 1)`.
+func Softplus(scope *Scope, features tf.Output) (activations tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "SparseAdd",
+		Type: "Softplus",
 		Input: []tf.Input{
-			a_indices, a_values, a_shape, b_indices, b_values, b_shape, thresh,
+			features,
 		},
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// Computes the gradient of the sigmoid of `x` wrt its input.
+// Computes exponential of x - 1 element-wise.
 //
-// Specifically, `grad = dy * y * (1 - y)`, where `y = sigmoid(x)`, and
-// `dy` is the corresponding input gradient.
-func SigmoidGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
+// I.e., \\(y = (\exp x) - 1\\).
+func Expm1(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "SigmoidGrad",
+		Type: "Expm1",
 		Input: []tf.Input{
-			y, dy,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Subtracts a value from the current value of a variable.
-//
-// Any ReadVariableOp which depends directly or indirectly on this assign is
-// guaranteed to see the incremented value or a subsequent newer one.
+// Returns the number of records this Reader has produced.
 //
-// Outputs the incremented value, which can be used to totally order the
-// increments to this variable.
+// This is the same as the number of ReaderRead executions that have
+// succeeded.
 //
 // Arguments:
-//	resource: handle to the resource in which to store the variable.
-//	value: the value by which the variable will be incremented.
-//
-// Returns the created operation.
-func AssignSubVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) {
+//	reader_handle: Handle to a Reader.
+func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_produced tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "AssignSubVariableOp",
+		Type: "ReaderNumRecordsProducedV2",
 		Input: []tf.Input{
-			resource, value,
+			reader_handle,
 		},
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// SparseReduceMaxAttr is an optional argument to SparseReduceMax.
-type SparseReduceMaxAttr func(optionalAttr)
-
-// SparseReduceMaxKeepDims sets the optional keep_dims attribute to value.
+// Computes the sum along segments of a tensor.
 //
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func SparseReduceMaxKeepDims(value bool) SparseReduceMaxAttr {
-	return func(m optionalAttr) {
-		m["keep_dims"] = value
-	}
-}
-
-// Computes the max of elements across dimensions of a SparseTensor.
+// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+// segments.
 //
-// This Op takes a SparseTensor and is the sparse counterpart to
-// `tf.reduce_max()`.  In particular, this Op also returns a dense `Tensor`
-// instead of a sparse one.
+// Computes a tensor such that
+// \\(output_i = \sum_j data_j\\) where sum is over `j` such
+// that `segment_ids[j] == i`.
 //
-// Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
-// with length 1.
+// If the sum is empty for a given segment ID `i`, `output[i] = 0`.
 //
-// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
-// with a single element is returned.  Additionally, the axes can be negative,
-// which are interpreted according to the indexing rules in Python.
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentSum.png" alt>
+// </div>
 //
 // Arguments:
-//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, possibly not in canonical ordering.
-//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
-//	input_shape: 1-D.  Shape of the input SparseTensor.
-//	reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
 //
-// Returns `R-K`-D.  The reduced Tensor.
-func SparseReduceMax(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceMaxAttr) (output tf.Output) {
+//	segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
+// first dimension.  Values should be sorted and can be repeated.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "SparseReduceMax",
+		Type: "SegmentSum",
 		Input: []tf.Input{
-			input_indices, input_values, input_shape, reduction_axes,
+			data, segment_ids,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Conv3DBackpropFilterV2Attr is an optional argument to Conv3DBackpropFilterV2.
-type Conv3DBackpropFilterV2Attr func(optionalAttr)
-
-// Conv3DBackpropFilterV2DataFormat sets the optional data_format attribute to value.
+// Creates a dataset that emits the lines of one or more text files.
 //
-// value: The data format of the input and output data. With the
-// default format "NDHWC", the data is stored in the order of:
-//     [batch, in_depth, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCDHW", the data storage order is:
-//     [batch, in_channels, in_depth, in_height, in_width].
-// If not specified, defaults to "NDHWC"
-func Conv3DBackpropFilterV2DataFormat(value string) Conv3DBackpropFilterV2Attr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
+// Arguments:
+//	filenames: A scalar or a vector containing the name(s) of the file(s) to be
+// read.
+//	compression_type: A scalar containing either (i) the empty string (no
+// compression), (ii) "ZLIB", or (iii) "GZIP".
+//	buffer_size: A scalar containing the number of bytes to buffer.
+func TextLineDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
+	opspec := tf.OpSpec{
+		Type: "TextLineDataset",
+		Input: []tf.Input{
+			filenames, compression_type, buffer_size,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Computes the gradients of 3-D convolution with respect to the filter.
+// Checks whether a resource handle-based variable has been initialized.
 //
 // Arguments:
-//	input: Shape `[batch, depth, rows, cols, in_channels]`.
-//	filter_sizes: An integer vector representing the tensor shape of `filter`,
-// where `filter` is a 5-D
-// `[filter_depth, filter_height, filter_width, in_channels, out_channels]`
-// tensor.
-//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
-// out_channels]`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
-func Conv3DBackpropFilterV2(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropFilterV2Attr) (output tf.Output) {
+//	resource: the input resource handle.
+//
+// Returns a scalar boolean which is true if the variable has been
+// initialized.
+func VarIsInitializedOp(scope *Scope, resource tf.Output) (is_initialized tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "Conv3DBackpropFilterV2",
+		Type: "VarIsInitializedOp",
 		Input: []tf.Input{
-			input, filter_sizes, out_backprop,
+			resource,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Execute a sub graph on a remote processor.
+// Pads a tensor with zeros.
 //
-// The graph specifications(such as graph itself, input tensors and output names)
-// are stored as a serialized protocol buffer of RemoteFusedGraphExecuteInfo
-// as serialized_remote_fused_graph_execute_info.
-// The specifications will be passed to a dedicated registered
-// remote fused graph executor.  The executor will send the graph specifications
-// to a remote processor and execute that graph.  The execution results
-// will be passed to consumer nodes as outputs of this node.
+// This operation pads a `input` with zeros according to the `paddings` you
+// specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is the
+// rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates
+// how many zeros to add before the contents of `input` in that dimension, and
+// `paddings[D, 1]` indicates how many zeros to add after the contents of `input`
+// in that dimension.
 //
-// Arguments:
-//	inputs: Arbitrary number of tensors with arbitrary data types
+// The padded size of each dimension D of the output is:
 //
-//	serialized_remote_fused_graph_execute_info: Serialized protocol buffer
-// of RemoteFusedGraphExecuteInfo which contains graph specifications.
+// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)`
 //
-// Returns Arbitrary number of tensors with arbitrary data types
-func RemoteFusedGraphExecute(scope *Scope, inputs []tf.Output, Toutputs []tf.DataType, serialized_remote_fused_graph_execute_info string) (outputs []tf.Output) {
+// For example:
+//
+// ```
+// # 't' is [[1, 1], [2, 2]]
+// # 'paddings' is [[1, 1], [2, 2]]
+// # rank of 't' is 2
+// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0]
+//                       [0, 0, 1, 1, 0, 0]
+//                       [0, 0, 2, 2, 0, 0]
+//                       [0, 0, 0, 0, 0, 0]]
+// ```
+func Pad(scope *Scope, input tf.Output, paddings tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"Toutputs": Toutputs, "serialized_remote_fused_graph_execute_info": serialized_remote_fused_graph_execute_info}
 	opspec := tf.OpSpec{
-		Type: "RemoteFusedGraphExecute",
+		Type: "Pad",
 		Input: []tf.Input{
-			tf.OutputList(inputs),
+			input, paddings,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
+	return op.Output(0)
+}
+
+// SparseTensorDenseMatMulAttr is an optional argument to SparseTensorDenseMatMul.
+type SparseTensorDenseMatMulAttr func(optionalAttr)
+
+// SparseTensorDenseMatMulAdjointA sets the optional adjoint_a attribute to value.
+//
+// value: Use the adjoint of A in the matrix multiply.  If A is complex, this
+// is transpose(conj(A)).  Otherwise it's transpose(A).
+// If not specified, defaults to false
+func SparseTensorDenseMatMulAdjointA(value bool) SparseTensorDenseMatMulAttr {
+	return func(m optionalAttr) {
+		m["adjoint_a"] = value
 	}
-	var idx int
-	var err error
-	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
-		scope.UpdateErr("RemoteFusedGraphExecute", err)
-		return
+}
+
+// SparseTensorDenseMatMulAdjointB sets the optional adjoint_b attribute to value.
+//
+// value: Use the adjoint of B in the matrix multiply.  If B is complex, this
+// is transpose(conj(B)).  Otherwise it's transpose(B).
+// If not specified, defaults to false
+func SparseTensorDenseMatMulAdjointB(value bool) SparseTensorDenseMatMulAttr {
+	return func(m optionalAttr) {
+		m["adjoint_b"] = value
 	}
-	return outputs
 }
 
-// Computes numerical negative value element-wise.
+// Multiply SparseTensor (of rank 2) "A" by dense matrix "B".
 //
-// I.e., \\(y = -x\\).
-func Neg(scope *Scope, x tf.Output) (y tf.Output) {
+// No validity checking is performed on the indices of A.  However, the following
+// input format is recommended for optimal behavior:
+//
+// if adjoint_a == false:
+//   A should be sorted in lexicographically increasing order.  Use SparseReorder
+//   if you're not sure.
+// if adjoint_a == true:
+//   A should be sorted in order of increasing dimension 1 (i.e., "column major"
+//   order instead of "row major" order).
+//
+// Arguments:
+//	a_indices: 2-D.  The `indices` of the `SparseTensor`, size `[nnz, 2]` Matrix.
+//	a_values: 1-D.  The `values` of the `SparseTensor`, size `[nnz]` Vector.
+//	a_shape: 1-D.  The `shape` of the `SparseTensor`, size `[2]` Vector.
+//	b: 2-D.  A dense Matrix.
+func SparseTensorDenseMatMul(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output, optional ...SparseTensorDenseMatMulAttr) (product tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Neg",
+		Type: "SparseTensorDenseMatMul",
 		Input: []tf.Input{
-			x,
+			a_indices, a_values, a_shape, b,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// SparseToSparseSetOperationAttr is an optional argument to SparseToSparseSetOperation.
-type SparseToSparseSetOperationAttr func(optionalAttr)
-
-// SparseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value.
-// If not specified, defaults to true
-func SparseToSparseSetOperationValidateIndices(value bool) SparseToSparseSetOperationAttr {
-	return func(m optionalAttr) {
-		m["validate_indices"] = value
-	}
-}
-
-// Applies set operation along last dimension of 2 `SparseTensor` inputs.
+// Deserialize and concatenate `SparseTensors` from a serialized minibatch.
 //
-// See SetOperationOp::SetOperationFromContext for values of `set_operation`.
+// The input `serialized_sparse` must be a string matrix of shape `[N x 3]` where
+// `N` is the minibatch size and the rows correspond to packed outputs of
+// `SerializeSparse`.  The ranks of the original `SparseTensor` objects
+// must all match.  When the final `SparseTensor` is created, it has rank one
+// higher than the ranks of the incoming `SparseTensor` objects
+// (they have been concatenated along a new row dimension).
 //
-// If `validate_indices` is `True`, `SparseToSparseSetOperation` validates the
-// order and range of `set1` and `set2` indices.
+// The output `SparseTensor` object's shape values for all dimensions but the
+// first are the max across the input `SparseTensor` objects' shape values
+// for the corresponding dimensions.  Its first shape value is `N`, the minibatch
+// size.
 //
-// Input `set1` is a `SparseTensor` represented by `set1_indices`, `set1_values`,
-// and `set1_shape`. For `set1` ranked `n`, 1st `n-1` dimensions must be the same
-// as `set2`. Dimension `n` contains values in a set, duplicates are allowed but
-// ignored.
+// The input `SparseTensor` objects' indices are assumed ordered in
+// standard lexicographic order.  If this is not the case, after this
+// step run `SparseReorder` to restore index ordering.
 //
-// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`,
-// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same
-// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but
-// ignored.
+// For example, if the serialized input is a `[2 x 3]` matrix representing two
+// original `SparseTensor` objects:
 //
-// If `validate_indices` is `True`, this op validates the order and range of `set1`
-// and `set2` indices.
+//     index = [ 0]
+//             [10]
+//             [20]
+//     values = [1, 2, 3]
+//     shape = [50]
 //
-// Output `result` is a `SparseTensor` represented by `result_indices`,
-// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this
-// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`
-// dimension contains the result of `set_operation` applied to the corresponding
-// `[0...n-1]` dimension of `set`.
+// and
 //
-// Arguments:
-//	set1_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
-// order.
-//	set1_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
-// order.
-//	set1_shape: 1D `Tensor`, shape of a `SparseTensor`. `set1_shape[0...n-1]` must
-// be the same as `set2_shape[0...n-1]`, `set1_shape[n]` is the
-// max set size across `0...n-1` dimensions.
-//	set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
-// order.
-//	set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
-// order.
-//	set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must
-// be the same as `set1_shape[0...n-1]`, `set2_shape[n]` is the
-// max set size across `0...n-1` dimensions.
+//     index = [ 2]
+//             [10]
+//     values = [4, 5]
+//     shape = [30]
 //
+// then the final deserialized `SparseTensor` will be:
 //
-// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is
-// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`
-// is the max result set size across all `0...n-1` dimensions.
-func SparseToSparseSetOperation(scope *Scope, set1_indices tf.Output, set1_values tf.Output, set1_shape tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...SparseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) {
+//     index = [0  0]
+//             [0 10]
+//             [0 20]
+//             [1  2]
+//             [1 10]
+//     values = [1, 2, 3, 4, 5]
+//     shape = [2 50]
+//
+// Arguments:
+//	serialized_sparse: 2-D, The `N` serialized `SparseTensor` objects.
+// Must have 3 columns.
+//	dtype: The `dtype` of the serialized `SparseTensor` objects.
+func DeserializeManySparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"set_operation": set_operation}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"dtype": dtype}
 	opspec := tf.OpSpec{
-		Type: "SparseToSparseSetOperation",
+		Type: "DeserializeManySparse",
 		Input: []tf.Input{
-			set1_indices, set1_values, set1_shape, set2_indices, set2_values, set2_shape,
+			serialized_sparse,
 		},
 		Attrs: attrs,
 	}
@@ -12405,196 +12099,176 @@ func SparseToSparseSetOperation(scope *Scope, set1_indices tf.Output, set1_value
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Elementwise computes the bitwise OR of `x` and `y`.
+// StringJoinAttr is an optional argument to StringJoin.
+type StringJoinAttr func(optionalAttr)
+
+// StringJoinSeparator sets the optional separator attribute to value.
 //
-// The result will have those bits set, that are set in `x`, `y` or both. The
-// computation is performed on the underlying representations of `x` and `y`.
-func BitwiseOr(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// value: string, an optional join separator.
+// If not specified, defaults to ""
+func StringJoinSeparator(value string) StringJoinAttr {
+	return func(m optionalAttr) {
+		m["separator"] = value
+	}
+}
+
+// Joins the strings in the given list of string tensors into one tensor;
+//
+// with the given separator (default is an empty separator).
+//
+// Arguments:
+//	inputs: A list of string tensors.  The tensors must all have the same shape,
+// or be scalars.  Scalars may be mixed in; these will be broadcast to the shape
+// of non-scalar inputs.
+func StringJoin(scope *Scope, inputs []tf.Output, optional ...StringJoinAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "BitwiseOr",
+		Type: "StringJoin",
 		Input: []tf.Input{
-			x, y,
+			tf.OutputList(inputs),
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Adds up a `SparseTensor` and a dense `Tensor`, producing a dense `Tensor`.
+// Returns immutable tensor from memory region.
 //
-// This Op does not require `a_indices` be sorted in standard lexicographic order.
+// The current implementation memmaps the tensor from a file.
 //
 // Arguments:
-//	a_indices: 2-D.  The `indices` of the `SparseTensor`, with shape `[nnz, ndims]`.
-//	a_values: 1-D.  The `values` of the `SparseTensor`, with shape `[nnz]`.
-//	a_shape: 1-D.  The `shape` of the `SparseTensor`, with shape `[ndims]`.
-//	b: `ndims`-D Tensor.  With shape `a_shape`.
-func SparseTensorDenseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output) (output tf.Output) {
+//	dtype: Type of the returned tensor.
+//	shape: Shape of the returned tensor.
+//	memory_region_name: Name of readonly memory region used by the tensor, see
+// NewReadOnlyMemoryRegionFromFile in tensorflow::Env.
+func ImmutableConst(scope *Scope, dtype tf.DataType, shape tf.Shape, memory_region_name string) (tensor tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"dtype": dtype, "shape": shape, "memory_region_name": memory_region_name}
 	opspec := tf.OpSpec{
-		Type: "SparseTensorDenseAdd",
-		Input: []tf.Input{
-			a_indices, a_values, a_shape, b,
-		},
+		Type: "ImmutableConst",
+
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// AvgPoolAttr is an optional argument to AvgPool.
-type AvgPoolAttr func(optionalAttr)
-
-// AvgPoolDataFormat sets the optional data_format attribute to value.
+// Inverse real-valued fast Fourier transform.
 //
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func AvgPoolDataFormat(value string) AvgPoolAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Performs average pooling on the input.
+// Computes the inverse 1-dimensional discrete Fourier transform of a real-valued
+// signal over the inner-most dimension of `input`.
 //
-// Each entry in `output` is the mean of the corresponding size `ksize`
-// window in `value`.
+// The inner-most dimension of `input` is assumed to be the result of `RFFT`: the
+// `fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If
+// `fft_length` is not provided, it is computed from the size of the inner-most
+// dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to
+// compute `input` is odd, it should be provided since it cannot be inferred
+// properly.
+//
+// Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller
+// than the corresponding dimension of `input`, the dimension is cropped. If it is
+// larger, the dimension is padded with zeros.
 //
 // Arguments:
-//	value: 4-D with shape `[batch, height, width, channels]`.
-//	ksize: The size of the sliding window for each dimension of `value`.
-//	strides: The stride of the sliding window for each dimension of `value`.
-//	padding: The type of padding algorithm to use.
+//	input: A complex64 tensor.
+//	fft_length: An int32 tensor of shape [1]. The FFT length.
 //
-// Returns The average pooled output tensor.
-func AvgPool(scope *Scope, value tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPoolAttr) (output tf.Output) {
+// Returns A float32 tensor of the same rank as `input`. The inner-most
+//   dimension of `input` is replaced with the `fft_length` samples of its inverse
+//   1D Fourier transform.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.irfft
+// @end_compatibility
+func IRFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "AvgPool",
+		Type: "IRFFT",
 		Input: []tf.Input{
-			value,
+			input, fft_length,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Slice a `SparseTensor` based on the `start` and `size`.
-//
-// For example, if the input is
+// Concatenates a list of `SparseTensor` along the specified dimension.
 //
-//     input_tensor = shape = [2, 7]
-//     [    a   d e  ]
-//     [b c          ]
+// Concatenation is with respect to the dense versions of these sparse tensors.
+// It is assumed that each input is a `SparseTensor` whose elements are ordered
+// along increasing dimension number.
 //
-// Graphically the output tensors are:
+// All inputs' shapes must match, except for the concat dimension.  The
+// `indices`, `values`, and `shapes` lists must have the same length.
 //
-//     sparse_slice([0, 0], [2, 4]) = shape = [2, 4]
-//     [    a  ]
-//     [b c    ]
+// The output shape is identical to the inputs', except along the concat
+// dimension, where it is the sum of the inputs' sizes along that dimension.
 //
-//     sparse_slice([0, 4], [2, 3]) = shape = [2, 3]
-//     [ d e  ]
-//     [      ]
+// The output elements will be resorted to preserve the sort order along
+// increasing dimension number.
 //
-// Arguments:
-//	indices: 2-D tensor represents the indices of the sparse tensor.
-//	values: 1-D tensor represents the values of the sparse tensor.
-//	shape: 1-D. tensor represents the shape of the sparse tensor.
-//	start: 1-D. tensor represents the start of the slice.
-//	size: 1-D. tensor represents the size of the slice.
-// output indices: A list of 1-D tensors represents the indices of the output
-// sparse tensors.
+// This op runs in `O(M log M)` time, where `M` is the total number of non-empty
+// values across all inputs. This is due to the need for an internal sort in
+// order to concatenate efficiently across an arbitrary dimension.
 //
-// Returns A list of 1-D tensors represents the values of the output sparse
-// tensors.A list of 1-D tensors represents the shape of the output sparse
-// tensors.
-func SparseSlice(scope *Scope, indices tf.Output, values tf.Output, shape tf.Output, start tf.Output, size tf.Output) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSlice",
-		Input: []tf.Input{
-			indices, values, shape, start, size,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// ListDiffAttr is an optional argument to ListDiff.
-type ListDiffAttr func(optionalAttr)
-
-// ListDiffOutIdx sets the optional out_idx attribute to value.
-// If not specified, defaults to DT_INT32
-func ListDiffOutIdx(value tf.DataType) ListDiffAttr {
-	return func(m optionalAttr) {
-		m["out_idx"] = value
-	}
-}
-
-// Computes the difference between two lists of numbers or strings.
+// For example, if `concat_dim = 1` and the inputs are
 //
-// Given a list `x` and a list `y`, this operation returns a list `out` that
-// represents all values that are in `x` but not in `y`. The returned list `out`
-// is sorted in the same order that the numbers appear in `x` (duplicates are
-// preserved). This operation also returns a list `idx` that represents the
-// position of each `out` element in `x`. In other words:
+//     sp_inputs[0]: shape = [2, 3]
+//     [0, 2]: "a"
+//     [1, 0]: "b"
+//     [1, 1]: "c"
 //
-// `out[i] = x[idx[i]] for i in [0, 1, ..., len(out) - 1]`
+//     sp_inputs[1]: shape = [2, 4]
+//     [0, 1]: "d"
+//     [0, 2]: "e"
 //
-// For example, given this input:
+// then the output will be
 //
-// ```
-// x = [1, 2, 3, 4, 5, 6]
-// y = [1, 3, 5]
-// ```
+//     shape = [2, 7]
+//     [0, 2]: "a"
+//     [0, 4]: "d"
+//     [0, 5]: "e"
+//     [1, 0]: "b"
+//     [1, 1]: "c"
 //
-// This operation would return:
+// Graphically this is equivalent to doing
 //
-// ```
-// out ==> [2, 4, 6]
-// idx ==> [1, 3, 5]
-// ```
+//     [    a] concat [  d e  ] = [    a   d e  ]
+//     [b c  ]        [       ]   [b c          ]
 //
 // Arguments:
-//	x: 1-D. Values to keep.
-//	y: 1-D. Values to remove.
+//	indices: 2-D.  Indices of each input `SparseTensor`.
+//	values: 1-D.  Non-empty values of each `SparseTensor`.
+//	shapes: 1-D.  Shapes of each `SparseTensor`.
+//	concat_dim: Dimension to concatenate along. Must be in range [-rank, rank),
+// where rank is the number of dimensions in each input `SparseTensor`.
 //
-// Returns 1-D. Values present in `x` but not in `y`.1-D. Positions of `x` values preserved in `out`.
-func ListDiff(scope *Scope, x tf.Output, y tf.Output, optional ...ListDiffAttr) (out tf.Output, idx tf.Output) {
+// Returns 2-D.  Indices of the concatenated `SparseTensor`.1-D.  Non-empty values of the concatenated `SparseTensor`.1-D.  Shape of the concatenated `SparseTensor`.
+func SparseConcat(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, concat_dim int64) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"concat_dim": concat_dim}
 	opspec := tf.OpSpec{
-		Type: "ListDiff",
+		Type: "SparseConcat",
 		Input: []tf.Input{
-			x, y,
+			tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes),
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
 // Generates sparse cross from a list of sparse and dense tensors.
@@ -12668,127 +12342,157 @@ func SparseCross(scope *Scope, indices []tf.Output, values []tf.Output, shapes [
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// FractionalMaxPoolAttr is an optional argument to FractionalMaxPool.
-type FractionalMaxPoolAttr func(optionalAttr)
+// ListDiffAttr is an optional argument to ListDiff.
+type ListDiffAttr func(optionalAttr)
 
-// FractionalMaxPoolPseudoRandom sets the optional pseudo_random attribute to value.
-//
-// value: When set to True, generates the pooling sequence in a
-// pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin
-// Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) for
-// difference between pseudorandom and random.
-// If not specified, defaults to false
-func FractionalMaxPoolPseudoRandom(value bool) FractionalMaxPoolAttr {
+// ListDiffOutIdx sets the optional out_idx attribute to value.
+// If not specified, defaults to DT_INT32
+func ListDiffOutIdx(value tf.DataType) ListDiffAttr {
 	return func(m optionalAttr) {
-		m["pseudo_random"] = value
+		m["out_idx"] = value
 	}
 }
 
-// FractionalMaxPoolOverlapping sets the optional overlapping attribute to value.
+// Computes the difference between two lists of numbers or strings.
 //
-// value: When set to True, it means when pooling, the values at the boundary
-// of adjacent pooling cells are used by both cells. For example:
+// Given a list `x` and a list `y`, this operation returns a list `out` that
+// represents all values that are in `x` but not in `y`. The returned list `out`
+// is sorted in the same order that the numbers appear in `x` (duplicates are
+// preserved). This operation also returns a list `idx` that represents the
+// position of each `out` element in `x`. In other words:
 //
-// `index  0  1  2  3  4`
+// `out[i] = x[idx[i]] for i in [0, 1, ..., len(out) - 1]`
 //
-// `value  20 5  16 3  7`
+// For example, given this input:
 //
-// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
-// The result would be [20, 16] for fractional max pooling.
-// If not specified, defaults to false
-func FractionalMaxPoolOverlapping(value bool) FractionalMaxPoolAttr {
-	return func(m optionalAttr) {
-		m["overlapping"] = value
-	}
-}
-
-// FractionalMaxPoolDeterministic sets the optional deterministic attribute to value.
+// ```
+// x = [1, 2, 3, 4, 5, 6]
+// y = [1, 3, 5]
+// ```
 //
-// value: When set to True, a fixed pooling region will be used when
-// iterating over a FractionalMaxPool node in the computation graph. Mainly used
-// in unit test to make FractionalMaxPool deterministic.
-// If not specified, defaults to false
-func FractionalMaxPoolDeterministic(value bool) FractionalMaxPoolAttr {
-	return func(m optionalAttr) {
-		m["deterministic"] = value
+// This operation would return:
+//
+// ```
+// out ==> [2, 4, 6]
+// idx ==> [1, 3, 5]
+// ```
+//
+// Arguments:
+//	x: 1-D. Values to keep.
+//	y: 1-D. Values to remove.
+//
+// Returns 1-D. Values present in `x` but not in `y`.1-D. Positions of `x` values preserved in `out`.
+func ListDiff(scope *Scope, x tf.Output, y tf.Output, optional ...ListDiffAttr) (out tf.Output, idx tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ListDiff",
+		Input: []tf.Input{
+			x, y,
+		},
+		Attrs: attrs,
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
 }
 
-// FractionalMaxPoolSeed sets the optional seed attribute to value.
+// Adds up a `SparseTensor` and a dense `Tensor`, producing a dense `Tensor`.
 //
-// value: If either seed or seed2 are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func FractionalMaxPoolSeed(value int64) FractionalMaxPoolAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
+// This Op does not require `a_indices` be sorted in standard lexicographic order.
+//
+// Arguments:
+//	a_indices: 2-D.  The `indices` of the `SparseTensor`, with shape `[nnz, ndims]`.
+//	a_values: 1-D.  The `values` of the `SparseTensor`, with shape `[nnz]`.
+//	a_shape: 1-D.  The `shape` of the `SparseTensor`, with shape `[ndims]`.
+//	b: `ndims`-D Tensor.  With shape `a_shape`.
+func SparseTensorDenseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseTensorDenseAdd",
+		Input: []tf.Input{
+			a_indices, a_values, a_shape, b,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// FractionalMaxPoolSeed2 sets the optional seed2 attribute to value.
-//
-// value: An second seed to avoid seed collision.
-// If not specified, defaults to 0
-func FractionalMaxPoolSeed2(value int64) FractionalMaxPoolAttr {
+// SparseToSparseSetOperationAttr is an optional argument to SparseToSparseSetOperation.
+type SparseToSparseSetOperationAttr func(optionalAttr)
+
+// SparseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value.
+// If not specified, defaults to true
+func SparseToSparseSetOperationValidateIndices(value bool) SparseToSparseSetOperationAttr {
 	return func(m optionalAttr) {
-		m["seed2"] = value
+		m["validate_indices"] = value
 	}
 }
 
-// Performs fractional max pooling on the input.
-//
-// Fractional max pooling is slightly different than regular max pooling.  In
-// regular max pooling, you downsize an input set by taking the maximum value of
-// smaller N x N subsections of the set (often 2x2), and try to reduce the set by
-// a factor of N, where N is an integer.  Fractional max pooling, as you might
-// expect from the word "fractional", means that the overall reduction ratio N
-// does not have to be an integer.
+// Applies set operation along last dimension of 2 `SparseTensor` inputs.
 //
-// The sizes of the pooling regions are generated randomly but are fairly uniform.
-// For example, let's look at the height dimension, and the constraints on the
-// list of rows that will be pool boundaries.
+// See SetOperationOp::SetOperationFromContext for values of `set_operation`.
 //
-// First we define the following:
+// If `validate_indices` is `True`, `SparseToSparseSetOperation` validates the
+// order and range of `set1` and `set2` indices.
 //
-// 1.  input_row_length : the number of rows from the input set
-// 2.  output_row_length : which will be smaller than the input
-// 3.  alpha = input_row_length / output_row_length : our reduction ratio
-// 4.  K = floor(alpha)
-// 5.  row_pooling_sequence : this is the result list of pool boundary rows
+// Input `set1` is a `SparseTensor` represented by `set1_indices`, `set1_values`,
+// and `set1_shape`. For `set1` ranked `n`, 1st `n-1` dimensions must be the same
+// as `set2`. Dimension `n` contains values in a set, duplicates are allowed but
+// ignored.
 //
-// Then, row_pooling_sequence should satisfy:
+// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`,
+// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same
+// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but
+// ignored.
 //
-// 1.  a[0] = 0 : the first value of the sequence is 0
-// 2.  a[end] = input_row_length : the last value of the sequence is the size
-// 3.  K <= (a[i+1] - a[i]) <= K+1 : all intervals are K or K+1 size
-// 4.  length(row_pooling_sequence) = output_row_length+1
+// If `validate_indices` is `True`, this op validates the order and range of `set1`
+// and `set2` indices.
 //
-// For more details on fractional max pooling, see this paper:
-// [Benjamin Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071)
+// Output `result` is a `SparseTensor` represented by `result_indices`,
+// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this
+// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`
+// dimension contains the result of `set_operation` applied to the corresponding
+// `[0...n-1]` dimension of `set`.
 //
 // Arguments:
-//	value: 4-D with shape `[batch, height, width, channels]`.
-//	pooling_ratio: Pooling ratio for each dimension of `value`, currently only
-// supports row and col dimension and should be >= 1.0. For example, a valid
-// pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements
-// must be 1.0 because we don't allow pooling on batch and channels
-// dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions
-// respectively.
+//	set1_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
+// order.
+//	set1_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
+// order.
+//	set1_shape: 1D `Tensor`, shape of a `SparseTensor`. `set1_shape[0...n-1]` must
+// be the same as `set2_shape[0...n-1]`, `set1_shape[n]` is the
+// max set size across `0...n-1` dimensions.
+//	set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
+// order.
+//	set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
+// order.
+//	set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must
+// be the same as `set1_shape[0...n-1]`, `set2_shape[n]` is the
+// max set size across `0...n-1` dimensions.
 //
-// Returns output tensor after fractional max pooling.row pooling sequence, needed to calculate gradient.column pooling sequence, needed to calculate gradient.
-func FractionalMaxPool(scope *Scope, value tf.Output, pooling_ratio []float32, optional ...FractionalMaxPoolAttr) (output tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output) {
+//
+// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is
+// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`
+// is the max result set size across all `0...n-1` dimensions.
+func SparseToSparseSetOperation(scope *Scope, set1_indices tf.Output, set1_values tf.Output, set1_shape tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...SparseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"pooling_ratio": pooling_ratio}
+	attrs := map[string]interface{}{"set_operation": set_operation}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "FractionalMaxPool",
+		Type: "SparseToSparseSetOperation",
 		Input: []tf.Input{
-			value,
+			set1_indices, set1_values, set1_shape, set2_indices, set2_values, set2_shape,
 		},
 		Attrs: attrs,
 	}
@@ -12796,244 +12500,260 @@ func FractionalMaxPool(scope *Scope, value tf.Output, pooling_ratio []float32, o
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Concatenates a list of `SparseTensor` along the specified dimension.
-//
-// Concatenation is with respect to the dense versions of these sparse tensors.
-// It is assumed that each input is a `SparseTensor` whose elements are ordered
-// along increasing dimension number.
-//
-// All inputs' shapes must match, except for the concat dimension.  The
-// `indices`, `values`, and `shapes` lists must have the same length.
-//
-// The output shape is identical to the inputs', except along the concat
-// dimension, where it is the sum of the inputs' sizes along that dimension.
-//
-// The output elements will be resorted to preserve the sort order along
-// increasing dimension number.
-//
-// This op runs in `O(M log M)` time, where `M` is the total number of non-empty
-// values across all inputs. This is due to the need for an internal sort in
-// order to concatenate efficiently across an arbitrary dimension.
-//
-// For example, if `concat_dim = 1` and the inputs are
-//
-//     sp_inputs[0]: shape = [2, 3]
-//     [0, 2]: "a"
-//     [1, 0]: "b"
-//     [1, 1]: "c"
-//
-//     sp_inputs[1]: shape = [2, 4]
-//     [0, 1]: "d"
-//     [0, 2]: "e"
-//
-// then the output will be
-//
-//     shape = [2, 7]
-//     [0, 2]: "a"
-//     [0, 4]: "d"
-//     [0, 5]: "e"
-//     [1, 0]: "b"
-//     [1, 1]: "c"
-//
-// Graphically this is equivalent to doing
-//
-//     [    a] concat [  d e  ] = [    a   d e  ]
-//     [b c  ]        [       ]   [b c          ]
-//
-// Arguments:
-//	indices: 2-D.  Indices of each input `SparseTensor`.
-//	values: 1-D.  Non-empty values of each `SparseTensor`.
-//	shapes: 1-D.  Shapes of each `SparseTensor`.
-//	concat_dim: Dimension to concatenate along. Must be in range [-rank, rank),
-// where rank is the number of dimensions in each input `SparseTensor`.
+// Computes numerical negative value element-wise.
 //
-// Returns 2-D.  Indices of the concatenated `SparseTensor`.1-D.  Non-empty values of the concatenated `SparseTensor`.1-D.  Shape of the concatenated `SparseTensor`.
-func SparseConcat(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, concat_dim int64) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) {
+// I.e., \\(y = -x\\).
+func Neg(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"concat_dim": concat_dim}
 	opspec := tf.OpSpec{
-		Type: "SparseConcat",
+		Type: "Neg",
 		Input: []tf.Input{
-			tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes),
+			x,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// Performs a padding as a preprocess during a convolution.
+// Writes a `Summary` protocol buffer with a histogram.
 //
-// Similar to FusedResizeAndPadConv2d, this op allows for an optimized
-// implementation where the spatial padding transformation stage is fused with the
-// im2col lookup, but in this case without the bilinear filtering required for
-// resizing. Fusing the padding prevents the need to write out the intermediate
-// results as whole tensors, reducing memory pressure, and we can get some latency
-// gains by merging the transformation calculations.
-// The data_format attribute for Conv2D isn't supported by this op, and 'NHWC'
-// order is used instead.
-// Internally this op uses a single per-graph scratch buffer, which means that it
-// will block if multiple versions are being run in parallel. This is because this
-// operator is primarily an optimization to minimize memory usage.
+// The generated
+// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
+// has one summary value containing a histogram for `values`.
+//
+// This op reports an `InvalidArgument` error if any value is not finite.
 //
 // Arguments:
-//	input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
-//	paddings: A two-column matrix specifying the padding sizes. The number of
-// rows must be the same as the rank of `input`.
-//	filter: 4-D with shape
-// `[filter_height, filter_width, in_channels, out_channels]`.
+//	writer: A handle to a summary writer.
+//	step: The step to write the summary for.
+//	tag: Scalar.  Tag to use for the `Summary.Value`.
+//	values: Any shape. Values to use to build the histogram.
 //
-//	strides: 1-D of length 4.  The stride of the sliding window for each dimension
-// of `input`. Must be in the same order as the dimension specified with format.
-//	padding: The type of padding algorithm to use.
-func FusedPadConv2D(scope *Scope, input tf.Output, paddings tf.Output, filter tf.Output, mode string, strides []int64, padding string) (output tf.Output) {
+// Returns the created operation.
+func WriteHistogramSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, values tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"mode": mode, "strides": strides, "padding": padding}
 	opspec := tf.OpSpec{
-		Type: "FusedPadConv2D",
+		Type: "WriteHistogramSummary",
 		Input: []tf.Input{
-			input, paddings, filter,
+			writer, step, tag, values,
 		},
-		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// Returns immutable tensor from memory region.
+// Adds two `SparseTensor` objects to produce another `SparseTensor`.
 //
-// The current implementation memmaps the tensor from a file.
+// The input `SparseTensor` objects' indices are assumed ordered in standard
+// lexicographic order.  If this is not the case, before this step run
+// `SparseReorder` to restore index ordering.
+//
+// By default, if two values sum to zero at some index, the output `SparseTensor`
+// would still include that particular location in its index, storing a zero in the
+// corresponding value slot.  To override this, callers can specify `thresh`,
+// indicating that if the sum has a magnitude strictly smaller than `thresh`, its
+// corresponding value and index would then not be included.  In particular,
+// `thresh == 0` (default) means everything is kept and actual thresholding happens
+// only for a positive value.
+//
+// In the following shapes, `nnz` is the count after taking `thresh` into account.
 //
 // Arguments:
-//	dtype: Type of the returned tensor.
-//	shape: Shape of the returned tensor.
-//	memory_region_name: Name of readonly memory region used by the tensor, see
-// NewReadOnlyMemoryRegionFromFile in tensorflow::Env.
-func ImmutableConst(scope *Scope, dtype tf.DataType, shape tf.Shape, memory_region_name string) (tensor tf.Output) {
+//	a_indices: 2-D.  The `indices` of the first `SparseTensor`, size `[nnz, ndims]` Matrix.
+//	a_values: 1-D.  The `values` of the first `SparseTensor`, size `[nnz]` Vector.
+//	a_shape: 1-D.  The `shape` of the first `SparseTensor`, size `[ndims]` Vector.
+//	b_indices: 2-D.  The `indices` of the second `SparseTensor`, size `[nnz, ndims]` Matrix.
+//	b_values: 1-D.  The `values` of the second `SparseTensor`, size `[nnz]` Vector.
+//	b_shape: 1-D.  The `shape` of the second `SparseTensor`, size `[ndims]` Vector.
+//	thresh: 0-D.  The magnitude threshold that determines if an output value/index
+// pair takes space.
+func SparseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output, thresh tf.Output) (sum_indices tf.Output, sum_values tf.Output, sum_shape tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype, "shape": shape, "memory_region_name": memory_region_name}
 	opspec := tf.OpSpec{
-		Type: "ImmutableConst",
+		Type: "SparseAdd",
+		Input: []tf.Input{
+			a_indices, a_values, a_shape, b_indices, b_values, b_shape, thresh,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
 
-		Attrs: attrs,
+// OrderedMapPeekAttr is an optional argument to OrderedMapPeek.
+type OrderedMapPeekAttr func(optionalAttr)
+
+// OrderedMapPeekCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func OrderedMapPeekCapacity(value int64) OrderedMapPeekAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// OrderedMapPeekMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func OrderedMapPeekMemoryLimit(value int64) OrderedMapPeekAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// OrderedMapPeekContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func OrderedMapPeekContainer(value string) OrderedMapPeekAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// OrderedMapPeekSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func OrderedMapPeekSharedName(value string) OrderedMapPeekAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Deserialize and concatenate `SparseTensors` from a serialized minibatch.
-//
-// The input `serialized_sparse` must be a string matrix of shape `[N x 3]` where
-// `N` is the minibatch size and the rows correspond to packed outputs of
-// `SerializeSparse`.  The ranks of the original `SparseTensor` objects
-// must all match.  When the final `SparseTensor` is created, it has rank one
-// higher than the ranks of the incoming `SparseTensor` objects
-// (they have been concatenated along a new row dimension).
-//
-// The output `SparseTensor` object's shape values for all dimensions but the
-// first are the max across the input `SparseTensor` objects' shape values
-// for the corresponding dimensions.  Its first shape value is `N`, the minibatch
-// size.
-//
-// The input `SparseTensor` objects' indices are assumed ordered in
-// standard lexicographic order.  If this is not the case, after this
-// step run `SparseReorder` to restore index ordering.
-//
-// For example, if the serialized input is a `[2 x 3]` matrix representing two
-// original `SparseTensor` objects:
-//
-//     index = [ 0]
-//             [10]
-//             [20]
-//     values = [1, 2, 3]
-//     shape = [50]
-//
-// and
-//
-//     index = [ 2]
-//             [10]
-//     values = [4, 5]
-//     shape = [30]
-//
-// then the final deserialized `SparseTensor` will be:
-//
-//     index = [0  0]
-//             [0 10]
-//             [0 20]
-//             [1  2]
-//             [1 10]
-//     values = [1, 2, 3, 4, 5]
-//     shape = [2 50]
+// Op peeks at the values at the specified key.  If the
 //
-// Arguments:
-//	serialized_sparse: 2-D, The `N` serialized `SparseTensor` objects.
-// Must have 3 columns.
-//	dtype: The `dtype` of the serialized `SparseTensor` objects.
-func DeserializeManySparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) {
+// underlying container does not contain this key
+// this op will block until it does.   This Op is optimized for
+// performance.
+func OrderedMapPeek(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapPeekAttr) (values []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "DeserializeManySparse",
+		Type: "OrderedMapPeek",
 		Input: []tf.Input{
-			serialized_sparse,
+			key, indices,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
+		scope.UpdateErr("OrderedMapPeek", err)
+		return
+	}
+	return values
 }
 
-// SparseTensorDenseMatMulAttr is an optional argument to SparseTensorDenseMatMul.
-type SparseTensorDenseMatMulAttr func(optionalAttr)
+// DecodeAndCropJpegAttr is an optional argument to DecodeAndCropJpeg.
+type DecodeAndCropJpegAttr func(optionalAttr)
 
-// SparseTensorDenseMatMulAdjointA sets the optional adjoint_a attribute to value.
+// DecodeAndCropJpegChannels sets the optional channels attribute to value.
 //
-// value: Use the adjoint of A in the matrix multiply.  If A is complex, this
-// is transpose(conj(A)).  Otherwise it's transpose(A).
-// If not specified, defaults to false
-func SparseTensorDenseMatMulAdjointA(value bool) SparseTensorDenseMatMulAttr {
+// value: Number of color channels for the decoded image.
+// If not specified, defaults to 0
+func DecodeAndCropJpegChannels(value int64) DecodeAndCropJpegAttr {
 	return func(m optionalAttr) {
-		m["adjoint_a"] = value
+		m["channels"] = value
 	}
 }
 
-// SparseTensorDenseMatMulAdjointB sets the optional adjoint_b attribute to value.
+// DecodeAndCropJpegRatio sets the optional ratio attribute to value.
 //
-// value: Use the adjoint of B in the matrix multiply.  If B is complex, this
-// is transpose(conj(B)).  Otherwise it's transpose(B).
+// value: Downscaling ratio.
+// If not specified, defaults to 1
+func DecodeAndCropJpegRatio(value int64) DecodeAndCropJpegAttr {
+	return func(m optionalAttr) {
+		m["ratio"] = value
+	}
+}
+
+// DecodeAndCropJpegFancyUpscaling sets the optional fancy_upscaling attribute to value.
+//
+// value: If true use a slower but nicer upscaling of the
+// chroma planes (yuv420/422 only).
+// If not specified, defaults to true
+func DecodeAndCropJpegFancyUpscaling(value bool) DecodeAndCropJpegAttr {
+	return func(m optionalAttr) {
+		m["fancy_upscaling"] = value
+	}
+}
+
+// DecodeAndCropJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value.
+//
+// value: If true try to recover an image from truncated input.
 // If not specified, defaults to false
-func SparseTensorDenseMatMulAdjointB(value bool) SparseTensorDenseMatMulAttr {
+func DecodeAndCropJpegTryRecoverTruncated(value bool) DecodeAndCropJpegAttr {
 	return func(m optionalAttr) {
-		m["adjoint_b"] = value
+		m["try_recover_truncated"] = value
 	}
 }
 
-// Multiply SparseTensor (of rank 2) "A" by dense matrix "B".
+// DecodeAndCropJpegAcceptableFraction sets the optional acceptable_fraction attribute to value.
 //
-// No validity checking is performed on the indices of A.  However, the following
-// input format is recommended for optimal behavior:
+// value: The minimum required fraction of lines before a truncated
+// input is accepted.
+// If not specified, defaults to 1
+func DecodeAndCropJpegAcceptableFraction(value float32) DecodeAndCropJpegAttr {
+	return func(m optionalAttr) {
+		m["acceptable_fraction"] = value
+	}
+}
+
+// DecodeAndCropJpegDctMethod sets the optional dct_method attribute to value.
+//
+// value: string specifying a hint about the algorithm used for
+// decompression.  Defaults to "" which maps to a system-specific
+// default.  Currently valid values are ["INTEGER_FAST",
+// "INTEGER_ACCURATE"].  The hint may be ignored (e.g., the internal
+// jpeg library changes to a version that does not have that specific
+// option.)
+// If not specified, defaults to ""
+func DecodeAndCropJpegDctMethod(value string) DecodeAndCropJpegAttr {
+	return func(m optionalAttr) {
+		m["dct_method"] = value
+	}
+}
+
+// Decode and Crop a JPEG-encoded image to a uint8 tensor.
+//
+// The attr `channels` indicates the desired number of color channels for the
+// decoded image.
+//
+// Accepted values are:
+//
+// *   0: Use the number of channels in the JPEG-encoded image.
+// *   1: output a grayscale image.
+// *   3: output an RGB image.
+//
+// If needed, the JPEG-encoded image is transformed to match the requested number
+// of color channels.
+//
+// The attr `ratio` allows downscaling the image by an integer factor during
+// decoding.  Allowed values are: 1, 2, 4, and 8.  This is much faster than
+// downscaling the image later.
 //
-// if adjoint_a == false:
-//   A should be sorted in lexicographically increasing order.  Use SparseReorder
-//   if you're not sure.
-// if adjoint_a == true:
-//   A should be sorted in order of increasing dimension 1 (i.e., "column major"
-//   order instead of "row major" order).
+//
+// It is equivalent to a combination of decode and crop, but much faster by only
+// decoding partial jpeg image.
 //
 // Arguments:
-//	a_indices: 2-D.  The `indices` of the `SparseTensor`, size `[nnz, 2]` Matrix.
-//	a_values: 1-D.  The `values` of the `SparseTensor`, size `[nnz]` Vector.
-//	a_shape: 1-D.  The `shape` of the `SparseTensor`, size `[2]` Vector.
-//	b: 2-D.  A dense Matrix.
-func SparseTensorDenseMatMul(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output, optional ...SparseTensorDenseMatMulAttr) (product tf.Output) {
+//	contents: 0-D.  The JPEG-encoded image.
+//	crop_window: 1-D.  The crop window: [crop_y, crop_x, crop_height, crop_width].
+//
+// Returns 3-D with shape `[height, width, channels]`..
+func DecodeAndCropJpeg(scope *Scope, contents tf.Output, crop_window tf.Output, optional ...DecodeAndCropJpegAttr) (image tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -13042,9 +12762,9 @@ func SparseTensorDenseMatMul(scope *Scope, a_indices tf.Output, a_values tf.Outp
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "SparseTensorDenseMatMul",
+		Type: "DecodeAndCropJpeg",
 		Input: []tf.Input{
-			a_indices, a_values, a_shape, b,
+			contents, crop_window,
 		},
 		Attrs: attrs,
 	}
@@ -13052,487 +12772,450 @@ func SparseTensorDenseMatMul(scope *Scope, a_indices tf.Output, a_values tf.Outp
 	return op.Output(0)
 }
 
-// WriteImageSummaryAttr is an optional argument to WriteImageSummary.
-type WriteImageSummaryAttr func(optionalAttr)
+// AllCandidateSamplerAttr is an optional argument to AllCandidateSampler.
+type AllCandidateSamplerAttr func(optionalAttr)
 
-// WriteImageSummaryMaxImages sets the optional max_images attribute to value.
-//
-// value: Max number of batch elements to generate images for.
-// If not specified, defaults to 3
+// AllCandidateSamplerSeed sets the optional seed attribute to value.
 //
-// REQUIRES: value >= 1
-func WriteImageSummaryMaxImages(value int64) WriteImageSummaryAttr {
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func AllCandidateSamplerSeed(value int64) AllCandidateSamplerAttr {
 	return func(m optionalAttr) {
-		m["max_images"] = value
+		m["seed"] = value
 	}
 }
 
-// Writes a `Summary` protocol buffer with images.
-//
-// The summary has up to `max_images` summary values containing images. The
-// images are built from `tensor` which must be 4-D with shape `[batch_size,
-// height, width, channels]` and where `channels` can be:
-//
-// *  1: `tensor` is interpreted as Grayscale.
-// *  3: `tensor` is interpreted as RGB.
-// *  4: `tensor` is interpreted as RGBA.
-//
-// The images have the same number of channels as the input tensor. For float
-// input, the values are normalized one image at a time to fit in the range
-// `[0, 255]`.  `uint8` values are unchanged.  The op uses two different
-// normalization algorithms:
-//
-// *  If the input values are all positive, they are rescaled so the largest one
-//    is 255.
-//
-// *  If any input value is negative, the values are shifted so input value 0.0
-//    is at 127.  They are then rescaled so that either the smallest value is 0,
-//    or the largest one is 255.
-//
-// The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
-// build the `tag` of the summary values:
+// AllCandidateSamplerSeed2 sets the optional seed2 attribute to value.
 //
-// *  If `max_images` is 1, the summary value tag is '*tag*/image'.
-// *  If `max_images` is greater than 1, the summary value tags are
-//    generated sequentially as '*tag*/image/0', '*tag*/image/1', etc.
+// value: An second seed to avoid seed collision.
+// If not specified, defaults to 0
+func AllCandidateSamplerSeed2(value int64) AllCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Generates labels for candidate sampling with a learned unigram distribution.
 //
-// The `bad_color` argument is the color to use in the generated images for
-// non-finite input values.  It is a `unit8` 1-D tensor of length `channels`.
-// Each element must be in the range `[0, 255]` (It represents the value of a
-// pixel in the output image).  Non-finite values in the input tensor are
-// replaced by this tensor in the output image.  The default value is the color
-// red.
+// See explanations of candidate sampling and the data formats at
+// go/candidate-sampling.
+//
+// For each batch, this op picks a single set of sampled candidate labels.
+//
+// The advantages of sampling candidates per-batch are simplicity and the
+// possibility of efficient dense matrix multiplication. The disadvantage is that
+// the sampled candidates must be chosen independently of the context and of the
+// true labels.
 //
 // Arguments:
-//	writer: A handle to a summary writer.
-//	step: The step to write the summary for.
-//	tag: Scalar. Used to build the `tag` attribute of the summary values.
-//	tensor: 4-D of shape `[batch_size, height, width, channels]` where
-// `channels` is 1, 3, or 4.
-//	bad_color: Color to use for pixels with non-finite values.
+//	true_classes: A batch_size * num_true matrix, in which each row contains the
+// IDs of the num_true target_classes in the corresponding original label.
+//	num_true: Number of true labels per context.
+//	num_sampled: Number of candidates to produce.
+//	unique: If unique is true, we sample with rejection, so that all sampled
+// candidates in a batch are unique. This requires some approximation to
+// estimate the post-rejection sampling probabilities.
 //
-// Returns the created operation.
-func WriteImageSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, tensor tf.Output, bad_color tf.Output, optional ...WriteImageSummaryAttr) (o *tf.Operation) {
+// Returns A vector of length num_sampled, in which each element is
+// the ID of a sampled candidate.A batch_size * num_true matrix, representing
+// the number of times each candidate is expected to occur in a batch
+// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
+// candidate representing the number of times the candidate is expected
+// to occur in a batch of sampled candidates.  If unique=true, then this is a
+// probability.
+func AllCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, optional ...AllCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "WriteImageSummary",
+		Type: "AllCandidateSampler",
 		Input: []tf.Input{
-			writer, step, tag, tensor, bad_color,
+			true_classes,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Pads a tensor with zeros.
-//
-// This operation pads a `input` with zeros according to the `paddings` you
-// specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is the
-// rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates
-// how many zeros to add before the contents of `input` in that dimension, and
-// `paddings[D, 1]` indicates how many zeros to add after the contents of `input`
-// in that dimension.
-//
-// The padded size of each dimension D of the output is:
+// Returns the element-wise min of two SparseTensors.
 //
-// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)`
+// Assumes the two SparseTensors have the same shape, i.e., no broadcasting.
 //
-// For example:
+// Arguments:
+//	a_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, in the canonical lexicographic ordering.
+//	a_values: 1-D.  `N` non-empty values corresponding to `a_indices`.
+//	a_shape: 1-D.  Shape of the input SparseTensor.
+//	b_indices: counterpart to `a_indices` for the other operand.
+//	b_values: counterpart to `a_values` for the other operand; must be of the same dtype.
+//	b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal.
 //
-// ```
-// # 't' is [[1, 1], [2, 2]]
-// # 'paddings' is [[1, 1], [2, 2]]
-// # rank of 't' is 2
-// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0]
-//                       [0, 0, 1, 1, 0, 0]
-//                       [0, 0, 2, 2, 0, 0]
-//                       [0, 0, 0, 0, 0, 0]]
-// ```
-func Pad(scope *Scope, input tf.Output, paddings tf.Output) (output tf.Output) {
+// Returns 2-D.  The indices of the output SparseTensor.1-D.  The values of the output SparseTensor.
+func SparseSparseMinimum(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output) (output_indices tf.Output, output_values tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Pad",
+		Type: "SparseSparseMinimum",
 		Input: []tf.Input{
-			input, paddings,
+			a_indices, a_values, a_shape, b_indices, b_values, b_shape,
 		},
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1)
 }
 
-// Computes the number of elements in the given queue.
+// Constructs a tensor by tiling a given tensor.
 //
-// Arguments:
-//	handle: The handle to a queue.
+// This operation creates a new tensor by replicating `input` `multiples` times.
+// The output tensor's i'th dimension has `input.dims(i) * multiples[i]` elements,
+// and the values of `input` are replicated `multiples[i]` times along the 'i'th
+// dimension. For example, tiling `[a b c d]` by `[2]` produces
+// `[a b c d a b c d]`.
 //
-// Returns The number of elements in the given queue.
-func QueueSizeV2(scope *Scope, handle tf.Output) (size tf.Output) {
+// Arguments:
+//	input: 1-D or higher.
+//	multiples: 1-D. Length must be the same as the number of dimensions in `input`
+func Tile(scope *Scope, input tf.Output, multiples tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "QueueSizeV2",
+		Type: "Tile",
 		Input: []tf.Input{
-			handle,
+			input, multiples,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Outputs a `Summary` protocol buffer with a histogram.
+// Saves the input tensors to disk.
 //
-// The generated
-// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
-// has one summary value containing a histogram for `values`.
+// The size of `tensor_names` must match the number of tensors in `data`. `data[i]`
+// is written to `filename` with name `tensor_names[i]`.
 //
-// This op reports an `InvalidArgument` error if any value is not finite.
+// See also `SaveSlices`.
 //
 // Arguments:
-//	tag: Scalar.  Tag to use for the `Summary.Value`.
-//	values: Any shape. Values to use to build the histogram.
+//	filename: Must have a single element. The name of the file to which we write
+// the tensor.
+//	tensor_names: Shape `[N]`. The names of the tensors to be saved.
+//	data: `N` tensors to save.
 //
-// Returns Scalar. Serialized `Summary` protocol buffer.
-func HistogramSummary(scope *Scope, tag tf.Output, values tf.Output) (summary tf.Output) {
+// Returns the created operation.
+func Save(scope *Scope, filename tf.Output, tensor_names tf.Output, data []tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "HistogramSummary",
+		Type: "Save",
 		Input: []tf.Input{
-			tag, values,
+			filename, tensor_names, tf.OutputList(data),
 		},
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// Creates a dataset that emits the lines of one or more text files.
+// Returns element-wise remainder of division. When `x < 0` xor `y < 0` is
 //
-// Arguments:
-//	filenames: A scalar or a vector containing the name(s) of the file(s) to be
-// read.
-//	compression_type: A scalar containing either (i) the empty string (no
-// compression), (ii) "ZLIB", or (iii) "GZIP".
-//	buffer_size: A scalar containing the number of bytes to buffer.
-func TextLineDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) {
+// true, this follows Python semantics in that the result here is consistent
+// with a flooring divide. E.g. `floor(x / y) * y + mod(x, y) = x`.
+//
+// *NOTE*: `FloorMod` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func FloorMod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "TextLineDataset",
+		Type: "FloorMod",
 		Input: []tf.Input{
-			filenames, compression_type, buffer_size,
+			x, y,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns the number of records this Reader has produced.
-//
-// This is the same as the number of ReaderRead executions that have
-// succeeded.
+// TakeManySparseFromTensorsMapAttr is an optional argument to TakeManySparseFromTensorsMap.
+type TakeManySparseFromTensorsMapAttr func(optionalAttr)
+
+// TakeManySparseFromTensorsMapContainer sets the optional container attribute to value.
 //
-// Arguments:
-//	reader_handle: Handle to a Reader.
-func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_produced tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ReaderNumRecordsProducedV2",
-		Input: []tf.Input{
-			reader_handle,
-		},
+// value: The container name for the `SparseTensorsMap` read by this op.
+// If not specified, defaults to ""
+func TakeManySparseFromTensorsMapContainer(value string) TakeManySparseFromTensorsMapAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Computes exponential of x - 1 element-wise.
+// TakeManySparseFromTensorsMapSharedName sets the optional shared_name attribute to value.
 //
-// I.e., \\(y = (\exp x) - 1\\).
-func Expm1(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Expm1",
-		Input: []tf.Input{
-			x,
-		},
+// value: The shared name for the `SparseTensorsMap` read by this op.
+// It should not be blank; rather the `shared_name` or unique Operation name
+// of the Op that created the original `SparseTensorsMap` should be used.
+// If not specified, defaults to ""
+func TakeManySparseFromTensorsMapSharedName(value string) TakeManySparseFromTensorsMapAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Batch normalization.
+// Read `SparseTensors` from a `SparseTensorsMap` and concatenate them.
 //
-// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization()
+// The input `sparse_handles` must be an `int64` matrix of shape `[N, 1]` where
+// `N` is the minibatch size and the rows correspond to the output handles of
+// `AddSparseToTensorsMap` or `AddManySparseToTensorsMap`.  The ranks of the
+// original `SparseTensor` objects that went into the given input ops must all
+// match.  When the final `SparseTensor` is created, it has rank one
+// higher than the ranks of the incoming `SparseTensor` objects
+// (they have been concatenated along a new row dimension on the left).
 //
-// This op is deprecated. Prefer `tf.nn.batch_normalization`.
+// The output `SparseTensor` object's shape values for all dimensions but the
+// first are the max across the input `SparseTensor` objects' shape values
+// for the corresponding dimensions.  Its first shape value is `N`, the minibatch
+// size.
 //
-// Arguments:
-//	t: A 4D input Tensor.
-//	m: A 1D mean Tensor with size matching the last dimension of t.
-// This is the first output from tf.nn.moments,
-// or a saved moving average thereof.
-//	v: A 1D variance Tensor with size matching the last dimension of t.
-// This is the second output from tf.nn.moments,
-// or a saved moving average thereof.
-//	beta: A 1D beta Tensor with size matching the last dimension of t.
-// An offset to be added to the normalized tensor.
-//	gamma: A 1D gamma Tensor with size matching the last dimension of t.
-// If "scale_after_normalization" is true, this tensor will be multiplied
-// with the normalized tensor.
-//	variance_epsilon: A small float number to avoid dividing by 0.
-//	scale_after_normalization: A bool indicating whether the resulted tensor
-// needs to be multiplied with gamma.
-func BatchNormWithGlobalNormalization(scope *Scope, t tf.Output, m tf.Output, v tf.Output, beta tf.Output, gamma tf.Output, variance_epsilon float32, scale_after_normalization bool) (result tf.Output) {
+// The input `SparseTensor` objects' indices are assumed ordered in
+// standard lexicographic order.  If this is not the case, after this
+// step run `SparseReorder` to restore index ordering.
+//
+// For example, if the handles represent an input, which is a `[2, 3]` matrix
+// representing two original `SparseTensor` objects:
+//
+// ```
+//     index = [ 0]
+//             [10]
+//             [20]
+//     values = [1, 2, 3]
+//     shape = [50]
+// ```
+//
+// and
+//
+// ```
+//     index = [ 2]
+//             [10]
+//     values = [4, 5]
+//     shape = [30]
+// ```
+//
+// then the final `SparseTensor` will be:
+//
+// ```
+//     index = [0  0]
+//             [0 10]
+//             [0 20]
+//             [1  2]
+//             [1 10]
+//     values = [1, 2, 3, 4, 5]
+//     shape = [2 50]
+// ```
+//
+// Arguments:
+//	sparse_handles: 1-D, The `N` serialized `SparseTensor` objects.
+// Shape: `[N]`.
+//	dtype: The `dtype` of the `SparseTensor` objects stored in the
+// `SparseTensorsMap`.
+//
+// Returns 2-D.  The `indices` of the minibatch `SparseTensor`.1-D.  The `values` of the minibatch `SparseTensor`.1-D.  The `shape` of the minibatch `SparseTensor`.
+func TakeManySparseFromTensorsMap(scope *Scope, sparse_handles tf.Output, dtype tf.DataType, optional ...TakeManySparseFromTensorsMapAttr) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization}
+	attrs := map[string]interface{}{"dtype": dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "BatchNormWithGlobalNormalization",
+		Type: "TakeManySparseFromTensorsMap",
 		Input: []tf.Input{
-			t, m, v, beta, gamma,
+			sparse_handles,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// MaxPoolV2Attr is an optional argument to MaxPoolV2.
-type MaxPoolV2Attr func(optionalAttr)
-
-// MaxPoolV2DataFormat sets the optional data_format attribute to value.
+// Says whether the targets are in the top `K` predictions.
 //
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func MaxPoolV2DataFormat(value string) MaxPoolV2Attr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Performs max pooling on the input.
+// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the
+// prediction for the target class is among the top `k` predictions among
+// all predictions for example `i`. Note that the behavior of `InTopK` differs
+// from the `TopK` op in its handling of ties; if multiple classes have the
+// same prediction value and straddle the top-`k` boundary, all of those
+// classes are considered to be in the top `k`.
+//
+// More formally, let
+//
+//   \\(predictions_i\\) be the predictions for all classes for example `i`,
+//   \\(targets_i\\) be the target class for example `i`,
+//   \\(out_i\\) be the output for example `i`,
+//
+// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$
 //
 // Arguments:
-//	input: 4-D input to pool over.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
-//	padding: The type of padding algorithm to use.
+//	predictions: A `batch_size` x `classes` tensor.
+//	targets: A `batch_size` vector of class ids.
+//	k: Number of top elements to look at for computing precision.
 //
-// Returns The max pooled output tensor.
-func MaxPoolV2(scope *Scope, input tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolV2Attr) (output tf.Output) {
+// Returns Computed precision at `k` as a `bool Tensor`.
+func InTopKV2(scope *Scope, predictions tf.Output, targets tf.Output, k tf.Output) (precision tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "MaxPoolV2",
+		Type: "InTopKV2",
 		Input: []tf.Input{
-			input, ksize, strides,
+			predictions, targets, k,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// OrderedMapUnstageNoKeyAttr is an optional argument to OrderedMapUnstageNoKey.
-type OrderedMapUnstageNoKeyAttr func(optionalAttr)
-
-// OrderedMapUnstageNoKeyCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
+// Assigns a new value to a variable.
 //
-// REQUIRES: value >= 0
-func OrderedMapUnstageNoKeyCapacity(value int64) OrderedMapUnstageNoKeyAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// OrderedMapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
+// Any ReadVariableOp with a control dependency on this op is guaranteed to return
+// this value or a subsequent newer value of the variable.
 //
-// REQUIRES: value >= 0
-func OrderedMapUnstageNoKeyMemoryLimit(value int64) OrderedMapUnstageNoKeyAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// OrderedMapUnstageNoKeyContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func OrderedMapUnstageNoKeyContainer(value string) OrderedMapUnstageNoKeyAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
+// Arguments:
+//	resource: handle to the resource in which to store the variable.
+//	value: the value to set the new tensor to use.
+//
+// Returns the created operation.
+func AssignVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
 	}
-}
-
-// OrderedMapUnstageNoKeySharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func OrderedMapUnstageNoKeySharedName(value string) OrderedMapUnstageNoKeyAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
+	opspec := tf.OpSpec{
+		Type: "AssignVariableOp",
+		Input: []tf.Input{
+			resource, value,
+		},
 	}
+	return scope.AddOperation(opspec)
 }
 
-// Op removes and returns the (key, value) element with the smallest
+// Returns a tensor of ones with the same shape and type as x.
 //
-// key from the underlying container.   If the underlying container
-// does not contain elements, the op will block until it does.
-func OrderedMapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) {
+// Arguments:
+//	x: a tensor of type T.
+//
+// Returns a tensor of the same shape and type as x but filled with ones.
+func OnesLike(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "OrderedMapUnstageNoKey",
+		Type: "OnesLike",
 		Input: []tf.Input{
-			indices,
+			x,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	key = op.Output(idx)
-	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
-		scope.UpdateErr("OrderedMapUnstageNoKey", err)
-		return
-	}
-	return key, values
+	return op.Output(0)
 }
 
-// Read an element from the TensorArray into output `value`.
+// The gradient of SparseFillEmptyRows.
 //
-// Arguments:
-//	handle: The handle to a TensorArray.
+// Takes vectors reverse_index_map, shaped `[N]`, and grad_values,
+// shaped `[N_full]`, where `N_full >= N` and copies data into either
+// `d_values` or `d_default_value`.  Here `d_values` is shaped `[N]` and
+// `d_default_value` is a scalar.
 //
-//	flow_in: A float scalar that enforces proper chaining of operations.
-//	dtype: The type of the elem that is returned.
+//   d_values[j] = grad_values[reverse_index_map[j]]
+//   d_default_value = sum_{k : 0 .. N_full - 1} (
+//      grad_values[k] * 1{k not in reverse_index_map})
 //
-// Returns The tensor that is read from the TensorArray.
-func TensorArrayReadV3(scope *Scope, handle tf.Output, index tf.Output, flow_in tf.Output, dtype tf.DataType) (value tf.Output) {
+// Arguments:
+//	reverse_index_map: 1-D.  The reverse index map from SparseFillEmptyRows.
+//	grad_values: 1-D.  The gradients from backprop.
+//
+// Returns 1-D.  The backprop into values.0-D.  The backprop into default_value.
+func SparseFillEmptyRowsGrad(scope *Scope, reverse_index_map tf.Output, grad_values tf.Output) (d_values tf.Output, d_default_value tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype}
 	opspec := tf.OpSpec{
-		Type: "TensorArrayReadV3",
+		Type: "SparseFillEmptyRowsGrad",
 		Input: []tf.Input{
-			handle, index, flow_in,
+			reverse_index_map, grad_values,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1)
 }
 
-// Adds up a SparseTensor and a dense Tensor, using these special rules:
-//
-// (1) Broadcasts the dense side to have the same shape as the sparse side, if
-//     eligible;
-// (2) Then, only the dense values pointed to by the indices of the SparseTensor
-//     participate in the cwise addition.
-//
-// By these rules, the result is a logical SparseTensor with exactly the same
-// indices and shape, but possibly with different non-zero values.  The output of
-// this Op is the resultant non-zero values.
+// Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)`
 //
-// Arguments:
-//	sp_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, possibly not in canonical ordering.
-//	sp_values: 1-D.  `N` non-empty values corresponding to `sp_indices`.
-//	sp_shape: 1-D.  Shape of the input SparseTensor.
-//	dense: `R`-D.  The dense Tensor operand.
+// if < 0, `scale * features` otherwise.
 //
-// Returns 1-D.  The `N` values that are operated on.
-func SparseDenseCwiseAdd(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) {
+// See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515)
+func Selu(scope *Scope, features tf.Output) (activations tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "SparseDenseCwiseAdd",
+		Type: "Selu",
 		Input: []tf.Input{
-			sp_indices, sp_values, sp_shape, dense,
+			features,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Conv3DAttr is an optional argument to Conv3D.
-type Conv3DAttr func(optionalAttr)
+// SetSizeAttr is an optional argument to SetSize.
+type SetSizeAttr func(optionalAttr)
 
-// Conv3DDataFormat sets the optional data_format attribute to value.
-//
-// value: The data format of the input and output data. With the
-// default format "NDHWC", the data is stored in the order of:
-//     [batch, in_depth, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCDHW", the data storage order is:
-//     [batch, in_channels, in_depth, in_height, in_width].
-// If not specified, defaults to "NDHWC"
-func Conv3DDataFormat(value string) Conv3DAttr {
+// SetSizeValidateIndices sets the optional validate_indices attribute to value.
+// If not specified, defaults to true
+func SetSizeValidateIndices(value bool) SetSizeAttr {
 	return func(m optionalAttr) {
-		m["data_format"] = value
+		m["validate_indices"] = value
 	}
 }
 
-// Computes a 3-D convolution given 5-D `input` and `filter` tensors.
+// Number of unique elements along last dimension of input `set`.
 //
-// In signal processing, cross-correlation is a measure of similarity of
-// two waveforms as a function of a time-lag applied to one of them. This
-// is also known as a sliding dot product or sliding inner-product.
+// Input `set` is a `SparseTensor` represented by `set_indices`, `set_values`,
+// and `set_shape`. The last dimension contains values in a set, duplicates are
+// allowed but ignored.
 //
-// Our Conv3D implements a form of cross-correlation.
+// If `validate_indices` is `True`, this op validates the order and range of `set`
+// indices.
 //
 // Arguments:
-//	input: Shape `[batch, in_depth, in_height, in_width, in_channels]`.
-//	filter: Shape `[filter_depth, filter_height, filter_width, in_channels,
-// out_channels]`. `in_channels` must match between `input` and `filter`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
-func Conv3D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...Conv3DAttr) (output tf.Output) {
+//	set_indices: 2D `Tensor`, indices of a `SparseTensor`.
+//	set_values: 1D `Tensor`, values of a `SparseTensor`.
+//	set_shape: 1D `Tensor`, shape of a `SparseTensor`.
+//
+// Returns For `set` ranked `n`, this is a `Tensor` with rank `n-1`, and the same 1st
+// `n-1` dimensions as `set`. Each value is the number of unique elements in
+// the corresponding `[0...n-1]` dimension of `set`.
+func SetSize(scope *Scope, set_indices tf.Output, set_values tf.Output, set_shape tf.Output, optional ...SetSizeAttr) (size tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Conv3D",
+		Type: "SetSize",
 		Input: []tf.Input{
-			input, filter,
+			set_indices, set_values, set_shape,
 		},
 		Attrs: attrs,
 	}
@@ -13540,67 +13223,64 @@ func Conv3D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, pa
 	return op.Output(0)
 }
 
-// Returns the truth value of (x >= y) element-wise.
+// Computes the sign and the log of the absolute value of the determinant of
 //
-// *NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func GreaterEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// one or more square matrices.
+//
+// The input is a tensor of shape `[N, M, M]` whose inner-most 2 dimensions
+// form square matrices. The outputs are two tensors containing the signs and
+// absolute values of the log determinants for all N input submatrices
+// `[..., :, :]` such that the determinant = sign*exp(log_abs_determinant).
+// The log_abs_determinant is computed as det(P)*sum(log(diag(LU))) where LU
+// is the LU decomposition of the input and P is the corresponding
+// permutation matrix.
+//
+// Arguments:
+//	input: Shape is `[N, M, M]`.
+//
+// Returns The signs of the log determinants of the inputs. Shape is `[N]`.The logs of the absolute values of the determinants
+// of the N input matrices.  Shape is `[N]`.
+func LogMatrixDeterminant(scope *Scope, input tf.Output) (sign tf.Output, log_abs_determinant tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "GreaterEqual",
+		Type: "LogMatrixDeterminant",
 		Input: []tf.Input{
-			x, y,
+			input,
 		},
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1)
 }
 
-// ResourceApplyMomentumAttr is an optional argument to ResourceApplyMomentum.
-type ResourceApplyMomentumAttr func(optionalAttr)
-
-// ResourceApplyMomentumUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyMomentumUseLocking(value bool) ResourceApplyMomentumAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
+// SumAttr is an optional argument to Sum.
+type SumAttr func(optionalAttr)
 
-// ResourceApplyMomentumUseNesterov sets the optional use_nesterov attribute to value.
+// SumKeepDims sets the optional keep_dims attribute to value.
 //
-// value: If `True`, the tensor passed to compute grad will be
-// var - lr * momentum * accum, so in the end, the var you get is actually
-// var - lr * momentum * accum.
+// value: If true, retain reduced dimensions with length 1.
 // If not specified, defaults to false
-func ResourceApplyMomentumUseNesterov(value bool) ResourceApplyMomentumAttr {
+func SumKeepDims(value bool) SumAttr {
 	return func(m optionalAttr) {
-		m["use_nesterov"] = value
+		m["keep_dims"] = value
 	}
 }
 
-// Update '*var' according to the momentum scheme. Set use_nesterov = True if you
-//
-// want to use Nesterov momentum.
+// Computes the sum of elements across dimensions of a tensor.
 //
-// accum = accum * momentum + grad
-// var -= lr * accum
+// Reduces `input` along the dimensions given in `axis`. Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `axis`. If `keep_dims` is true, the reduced dimensions are
+// retained with length 1.
 //
 // Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	grad: The gradient.
-//	momentum: Momentum. Must be a scalar.
+//	input: The tensor to reduce.
+//	axis: The dimensions to reduce. Must be in the range
+// `[-rank(input), rank(input))`.
 //
-// Returns the created operation.
-func ResourceApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyMomentumAttr) (o *tf.Operation) {
+// Returns The reduced tensor.
+func Sum(scope *Scope, input tf.Output, axis tf.Output, optional ...SumAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -13609,173 +13289,115 @@ func ResourceApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyMomentum",
+		Type: "Sum",
 		Input: []tf.Input{
-			var_, accum, lr, grad, momentum,
+			input, axis,
 		},
 		Attrs: attrs,
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Delete the tensor specified by its handle in the session.
+//
+// Arguments:
+//	handle: The handle for a tensor stored in the session state.
+//
+// Returns the created operation.
+func DeleteSessionTensor(scope *Scope, handle tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "DeleteSessionTensor",
+		Input: []tf.Input{
+			handle,
+		},
+	}
 	return scope.AddOperation(opspec)
 }
 
-// Returns element-wise integer closest to x.
+// L2 Loss.
 //
-// If the result is midway between two representable values,
-// the even representable is chosen.
-// For example:
+// Computes half the L2 norm of a tensor without the `sqrt`:
 //
-// ```
-// rint(-1.5) ==> -2.0
-// rint(0.5000001) ==> 1.0
-// rint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) ==> [-2., -2., -0., 0., 2., 2., 2.]
-// ```
-func Rint(scope *Scope, x tf.Output) (y tf.Output) {
+//     output = sum(t ** 2) / 2
+//
+// Arguments:
+//	t: Typically 2-D, but may have any dimensions.
+//
+// Returns 0-D.
+func L2Loss(scope *Scope, t tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Rint",
+		Type: "L2Loss",
 		Input: []tf.Input{
-			x,
+			t,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// QuantizeV2Attr is an optional argument to QuantizeV2.
-type QuantizeV2Attr func(optionalAttr)
-
-// QuantizeV2Mode sets the optional mode attribute to value.
-// If not specified, defaults to "MIN_COMBINED"
-func QuantizeV2Mode(value string) QuantizeV2Attr {
-	return func(m optionalAttr) {
-		m["mode"] = value
-	}
-}
+// DenseToSparseSetOperationAttr is an optional argument to DenseToSparseSetOperation.
+type DenseToSparseSetOperationAttr func(optionalAttr)
 
-// QuantizeV2RoundMode sets the optional round_mode attribute to value.
-// If not specified, defaults to "HALF_AWAY_FROM_ZERO"
-func QuantizeV2RoundMode(value string) QuantizeV2Attr {
+// DenseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value.
+// If not specified, defaults to true
+func DenseToSparseSetOperationValidateIndices(value bool) DenseToSparseSetOperationAttr {
 	return func(m optionalAttr) {
-		m["round_mode"] = value
+		m["validate_indices"] = value
 	}
 }
 
-// Quantize the 'input' tensor of type float to 'output' tensor of type 'T'.
-//
-// [min_range, max_range] are scalar floats that specify the range for
-// the 'input' data. The 'mode' attribute controls exactly which calculations are
-// used to convert the float values to their quantized equivalents.  The
-// 'round_mode' attribute controls which rounding tie-breaking algorithm is used
-// when rounding float values to their quantized equivalents.
-//
-// In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
-//
-// ```
-// out[i] = (in[i] - min_range) * range(T) / (max_range - min_range)
-// if T == qint8, out[i] -= (range(T) + 1) / 2.0
-// ```
-// here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
-//
-// *MIN_COMBINED Mode Example*
-//
-// Assume the input is type float and has a possible range of [0.0, 6.0] and the
-// output type is quint8 ([0, 255]). The min_range and max_range values should be
-// specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each
-// value of the input by 255/6 and cast to quint8.
-//
-// If the output type was qint8 ([-128, 127]), the operation will additionally
-// subtract each value by 128 prior to casting, so that the range of values aligns
-// with the range of qint8.
-//
-// If the mode is 'MIN_FIRST', then this approach is used:
-//
-// ```
-// num_discrete_values = 1 << (# of bits in T)
-// range_adjust = num_discrete_values / (num_discrete_values - 1)
-// range = (range_max - range_min) * range_adjust
-// range_scale = num_discrete_values / range
-// quantized = round(input * range_scale) - round(range_min * range_scale) +
-//   numeric_limits<T>::min()
-// quantized = max(quantized, numeric_limits<T>::min())
-// quantized = min(quantized, numeric_limits<T>::max())
-// ```
-//
-// The biggest difference between this and MIN_COMBINED is that the minimum range
-// is rounded first, before it's subtracted from the rounded value. With
-// MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing
-// and dequantizing will introduce a larger and larger error.
-//
-// *SCALED mode Example*
-//
-// `SCALED` mode matches the quantization approach used in
-// `QuantizeAndDequantize{V2|V3}`.
-//
-// If the mode is `SCALED`, we do not use the full range of the output type,
-// choosing to elide the lowest possible value for symmetry (e.g., output range is
-// -127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to
-// 0.
-//
-// We first find the range of values in our tensor. The
-// range we use is always centered on 0, so we find m such that
-// ```c++
-//   m = max(abs(input_min), abs(input_max))
-// ```
-//
-// Our input tensor range is then `[-m, m]`.
-//
-// Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`.
-// If T is signed, this is
-// ```
-//   num_bits = sizeof(T) * 8
-//   [min_fixed, max_fixed] =
-//       [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1]
-// ```
+// Applies set operation along last dimension of `Tensor` and `SparseTensor`.
 //
-// Otherwise, if T is unsigned, the fixed-point range is
-// ```
-//   [min_fixed, max_fixed] = [0, (1 << num_bits) - 1]
-// ```
+// See SetOperationOp::SetOperationFromContext for values of `set_operation`.
 //
-// From this we compute our scaling factor, s:
-// ```c++
-//   s = (max_fixed - min_fixed) / (2 * m)
-// ```
+// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`,
+// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same
+// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but
+// ignored.
 //
-// Now we can quantize the elements of our tensor:
-// ```c++
-// result = round(input * s)
-// ```
+// If `validate_indices` is `True`, this op validates the order and range of `set2`
+// indices.
 //
-// One thing to watch out for is that the operator may choose to adjust the
-// requested minimum and maximum values slightly during the quantization process,
-// so you should always use the output ports as the range for further calculations.
-// For example, if the requested minimum and maximum values are close to equal,
-// they will be separated by a small epsilon value to prevent ill-formed quantized
-// buffers from being created. Otherwise, you can end up with buffers where all the
-// quantized values map to the same float value, which causes problems for
-// operations that have to perform further calculations on them.
+// Output `result` is a `SparseTensor` represented by `result_indices`,
+// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this
+// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`
+// dimension contains the result of `set_operation` applied to the corresponding
+// `[0...n-1]` dimension of `set`.
 //
 // Arguments:
-//
-//	min_range: The minimum scalar value possibly produced for the input.
-//	max_range: The maximum scalar value possibly produced for the input.
+//	set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`.
+// Dimension `n` contains values in a set, duplicates are allowed but ignored.
+//	set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
+// order.
+//	set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
+// order.
+//	set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must
+// be the same as the 1st `n-1` dimensions of `set1`, `result_shape[n]` is the
+// max set size across `n-1` dimensions.
 //
 //
-// Returns The quantized data produced from the float input.The actual minimum scalar value used for the output.The actual maximum scalar value used for the output.
-func QuantizeV2(scope *Scope, input tf.Output, min_range tf.Output, max_range tf.Output, T tf.DataType, optional ...QuantizeV2Attr) (output tf.Output, output_min tf.Output, output_max tf.Output) {
+// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is
+// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`
+// is the max result set size across all `0...n-1` dimensions.
+func DenseToSparseSetOperation(scope *Scope, set1 tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...DenseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"T": T}
+	attrs := map[string]interface{}{"set_operation": set_operation}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "QuantizeV2",
+		Type: "DenseToSparseSetOperation",
 		Input: []tf.Input{
-			input, min_range, max_range,
+			set1, set2_indices, set2_values, set2_shape,
 		},
 		Attrs: attrs,
 	}
@@ -13783,55 +13405,58 @@ func QuantizeV2(scope *Scope, input tf.Output, min_range tf.Output, max_range tf
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// DepthwiseConv2dNativeBackpropFilterAttr is an optional argument to DepthwiseConv2dNativeBackpropFilter.
-type DepthwiseConv2dNativeBackpropFilterAttr func(optionalAttr)
+// FusedResizeAndPadConv2DAttr is an optional argument to FusedResizeAndPadConv2D.
+type FusedResizeAndPadConv2DAttr func(optionalAttr)
 
-// DepthwiseConv2dNativeBackpropFilterDataFormat sets the optional data_format attribute to value.
+// FusedResizeAndPadConv2DResizeAlignCorners sets the optional resize_align_corners attribute to value.
 //
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, height, width, channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, channels, height, width].
-// If not specified, defaults to "NHWC"
-func DepthwiseConv2dNativeBackpropFilterDataFormat(value string) DepthwiseConv2dNativeBackpropFilterAttr {
+// value: If true, rescale input by (new_height - 1) / (height - 1),
+// which exactly aligns the 4 corners of images and resized images. If false, rescale
+// by new_height / height. Treat similarly the width dimension.
+// If not specified, defaults to false
+func FusedResizeAndPadConv2DResizeAlignCorners(value bool) FusedResizeAndPadConv2DAttr {
 	return func(m optionalAttr) {
-		m["data_format"] = value
+		m["resize_align_corners"] = value
 	}
 }
 
-// Computes the gradients of depthwise convolution with respect to the filter.
+// Performs a resize and padding as a preprocess during a convolution.
+//
+// It's often possible to do spatial transformations more efficiently as part of
+// the packing stage of a convolution, so this op allows for an optimized
+// implementation where these stages are fused together. This prevents the need to
+// write out the intermediate results as whole tensors, reducing memory pressure,
+// and we can get some latency gains by merging the transformation calculations.
+// The data_format attribute for Conv2D isn't supported by this op, and defaults to
+// 'NHWC' order.
+// Internally this op uses a single per-graph scratch buffer, which means that it
+// will block if multiple versions are being run in parallel. This is because this
+// operator is primarily an optimization to minimize memory usage.
 //
 // Arguments:
-//	input: 4-D with shape based on `data_format`.  For example, if
-// `data_format` is 'NHWC' then `input` is a 4-D `[batch, in_height,
-// in_width, in_channels]` tensor.
-//	filter_sizes: An integer vector representing the tensor shape of `filter`,
-// where `filter` is a 4-D
-// `[filter_height, filter_width, in_channels, depthwise_multiplier]` tensor.
-//	out_backprop: 4-D with shape  based on `data_format`.
-// For example, if `data_format` is 'NHWC' then
-// out_backprop shape is `[batch, out_height, out_width, out_channels]`.
-// Gradients w.r.t. the output of the convolution.
-//	strides: The stride of the sliding window for each dimension of the input
-// of the convolution.
-//	padding: The type of padding algorithm to use.
+//	input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
+//	size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
+// new size for the images.
+//	paddings: A two-column matrix specifying the padding sizes. The number of
+// rows must be the same as the rank of `input`.
+//	filter: 4-D with shape
+// `[filter_height, filter_width, in_channels, out_channels]`.
 //
-// Returns 4-D with shape
-// `[filter_height, filter_width, in_channels, out_channels]`.  Gradient w.r.t.
-// the `filter` input of the convolution.
-func DepthwiseConv2dNativeBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropFilterAttr) (output tf.Output) {
+//	strides: 1-D of length 4.  The stride of the sliding window for each dimension
+// of `input`. Must be in the same order as the dimension specified with format.
+//	padding: The type of padding algorithm to use.
+func FusedResizeAndPadConv2D(scope *Scope, input tf.Output, size tf.Output, paddings tf.Output, filter tf.Output, mode string, strides []int64, padding string, optional ...FusedResizeAndPadConv2DAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	attrs := map[string]interface{}{"mode": mode, "strides": strides, "padding": padding}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "DepthwiseConv2dNativeBackpropFilter",
+		Type: "FusedResizeAndPadConv2D",
 		Input: []tf.Input{
-			input, filter_sizes, out_backprop,
+			input, size, paddings, filter,
 		},
 		Attrs: attrs,
 	}
@@ -13839,98 +13464,121 @@ func DepthwiseConv2dNativeBackpropFilter(scope *Scope, input tf.Output, filter_s
 	return op.Output(0)
 }
 
-// Shuffle dimensions of x according to a permutation.
+// Subtracts a value from the current value of a variable.
 //
-// The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy:
-//   `y.shape[i] == x.shape[perm[i]] for i in [0, 1, ..., rank(x) - 1]`
-func Transpose(scope *Scope, x tf.Output, perm tf.Output) (y tf.Output) {
+// Any ReadVariableOp which depends directly or indirectly on this assign is
+// guaranteed to see the incremented value or a subsequent newer one.
+//
+// Outputs the incremented value, which can be used to totally order the
+// increments to this variable.
+//
+// Arguments:
+//	resource: handle to the resource in which to store the variable.
+//	value: the value by which the variable will be incremented.
+//
+// Returns the created operation.
+func AssignSubVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Transpose",
+		Type: "AssignSubVariableOp",
 		Input: []tf.Input{
-			x, perm,
+			resource, value,
 		},
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// Reads and outputs the entire contents of the input filename.
-func ReadFile(scope *Scope, filename tf.Output) (contents tf.Output) {
+// RestoreAttr is an optional argument to Restore.
+type RestoreAttr func(optionalAttr)
+
+// RestorePreferredShard sets the optional preferred_shard attribute to value.
+//
+// value: Index of file to open first if multiple files match
+// `file_pattern`.
+// If not specified, defaults to -1
+func RestorePreferredShard(value int64) RestoreAttr {
+	return func(m optionalAttr) {
+		m["preferred_shard"] = value
+	}
+}
+
+// Restores a tensor from checkpoint files.
+//
+// Reads a tensor stored in one or several files. If there are several files (for
+// instance because a tensor was saved as slices), `file_pattern` may contain
+// wildcard symbols (`*` and `?`) in the filename portion only, not in the
+// directory portion.
+//
+// If a `file_pattern` matches several files, `preferred_shard` can be used to hint
+// in which file the requested tensor is likely to be found. This op will first
+// open the file at index `preferred_shard` in the list of matching files and try
+// to restore tensors from that file.  Only if some tensors or tensor slices are
+// not found in that first file, then the Op opens all the files. Setting
+// `preferred_shard` to match the value passed as the `shard` input
+// of a matching `Save` Op may speed up Restore.  This attribute only affects
+// performance, not correctness.  The default value -1 means files are processed in
+// order.
+//
+// See also `RestoreSlice`.
+//
+// Arguments:
+//	file_pattern: Must have a single element. The pattern of the files from
+// which we read the tensor.
+//	tensor_name: Must have a single element. The name of the tensor to be
+// restored.
+//	dt: The type of the tensor to be restored.
+//
+// Returns The restored tensor.
+func Restore(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, dt tf.DataType, optional ...RestoreAttr) (tensor tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"dt": dt}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "ReadFile",
+		Type: "Restore",
 		Input: []tf.Input{
-			filename,
+			file_pattern, tensor_name,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// AddManySparseToTensorsMapAttr is an optional argument to AddManySparseToTensorsMap.
-type AddManySparseToTensorsMapAttr func(optionalAttr)
-
-// AddManySparseToTensorsMapContainer sets the optional container attribute to value.
-//
-// value: The container name for the `SparseTensorsMap` created by this op.
-// If not specified, defaults to ""
-func AddManySparseToTensorsMapContainer(value string) AddManySparseToTensorsMapAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
+// QuantizedResizeBilinearAttr is an optional argument to QuantizedResizeBilinear.
+type QuantizedResizeBilinearAttr func(optionalAttr)
 
-// AddManySparseToTensorsMapSharedName sets the optional shared_name attribute to value.
+// QuantizedResizeBilinearAlignCorners sets the optional align_corners attribute to value.
 //
-// value: The shared name for the `SparseTensorsMap` created by this op.
-// If blank, the new Operation's unique name is used.
-// If not specified, defaults to ""
-func AddManySparseToTensorsMapSharedName(value string) AddManySparseToTensorsMapAttr {
+// value: If true, rescale input by (new_height - 1) / (height - 1), which
+// exactly aligns the 4 corners of images and resized images. If false, rescale
+// by new_height / height. Treat similarly the width dimension.
+// If not specified, defaults to false
+func QuantizedResizeBilinearAlignCorners(value bool) QuantizedResizeBilinearAttr {
 	return func(m optionalAttr) {
-		m["shared_name"] = value
+		m["align_corners"] = value
 	}
 }
 
-// Add an `N`-minibatch `SparseTensor` to a `SparseTensorsMap`, return `N` handles.
-//
-// A `SparseTensor` of rank `R` is represented by three tensors: `sparse_indices`,
-// `sparse_values`, and `sparse_shape`, where
-//
-// ```sparse_indices.shape[1] == sparse_shape.shape[0] == R```
+// Resize quantized `images` to `size` using quantized bilinear interpolation.
 //
-// An `N`-minibatch of `SparseTensor` objects is represented as a `SparseTensor`
-// having a first `sparse_indices` column taking values between `[0, N)`, where
-// the minibatch size `N == sparse_shape[0]`.
+// Input images and output images must be quantized types.
 //
-// The input `SparseTensor` must have rank `R` greater than 1, and the first
-// dimension is treated as the minibatch dimension.  Elements of the `SparseTensor`
-// must be sorted in increasing order of this first dimension.  The stored
-// `SparseTensor` objects pointed to by each row of the output `sparse_handles`
-// will have rank `R-1`.
+// Arguments:
+//	images: 4-D with shape `[batch, height, width, channels]`.
+//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
+// new size for the images.
 //
-// The `SparseTensor` values can then be read out as part of a minibatch by passing
-// the given keys as vector elements to `TakeManySparseFromTensorsMap`.  To ensure
-// the correct `SparseTensorsMap` is accessed, ensure that the same
-// `container` and `shared_name` are passed to that Op.  If no `shared_name`
-// is provided here, instead use the *name* of the Operation created by calling
-// `AddManySparseToTensorsMap` as the `shared_name` passed to
-// `TakeManySparseFromTensorsMap`.  Ensure the Operations are colocated.
 //
-// Arguments:
-//	sparse_indices: 2-D.  The `indices` of the minibatch `SparseTensor`.
-// `sparse_indices[:, 0]` must be ordered values in `[0, N)`.
-//	sparse_values: 1-D.  The `values` of the minibatch `SparseTensor`.
-//	sparse_shape: 1-D.  The `shape` of the minibatch `SparseTensor`.
-// The minibatch size `N == sparse_shape[0]`.
 //
-// Returns 1-D.  The handles of the `SparseTensor` now stored in the
-// `SparseTensorsMap`.  Shape: `[N]`.
-func AddManySparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...AddManySparseToTensorsMapAttr) (sparse_handles tf.Output) {
+// Returns 4-D with shape
+// `[batch, new_height, new_width, channels]`.
+func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min tf.Output, max tf.Output, optional ...QuantizedResizeBilinearAttr) (resized_images tf.Output, out_min tf.Output, out_max tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -13939,173 +13587,192 @@ func AddManySparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_va
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "AddManySparseToTensorsMap",
+		Type: "QuantizedResizeBilinear",
 		Input: []tf.Input{
-			sparse_indices, sparse_values, sparse_shape,
+			images, size, min, max,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Creates a dataset that emits the outputs of `input_dataset` `count` times.
+// Computes the minimum along segments of a tensor.
 //
-// Arguments:
+// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+// segments.
 //
-//	count: A scalar representing the number of times that `input_dataset` should
-// be repeated. A value of `-1` indicates that it should be repeated infinitely.
+// Computes a tensor such that
+// \\(output_i = \min_j(data_j)\\) where `min` is over `j` such
+// that `segment_ids[j] == i`.
 //
+// If the min is empty for a given segment ID `i`, `output[i] = 0`.
 //
-func RepeatDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentMin.png" alt>
+// </div>
+//
+// Arguments:
+//
+//	segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
+// first dimension.  Values should be sorted and can be repeated.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "RepeatDataset",
+		Type: "SegmentMin",
 		Input: []tf.Input{
-			input_dataset, count,
+			data, segment_ids,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// SparseReduceMaxSparseAttr is an optional argument to SparseReduceMaxSparse.
-type SparseReduceMaxSparseAttr func(optionalAttr)
+// SdcaOptimizerAttr is an optional argument to SdcaOptimizer.
+type SdcaOptimizerAttr func(optionalAttr)
 
-// SparseReduceMaxSparseKeepDims sets the optional keep_dims attribute to value.
+// SdcaOptimizerAdaptative sets the optional adaptative attribute to value.
 //
-// value: If true, retain reduced dimensions with length 1.
+// value: Whether to use Adapative SDCA for the inner loop.
 // If not specified, defaults to false
-func SparseReduceMaxSparseKeepDims(value bool) SparseReduceMaxSparseAttr {
+func SdcaOptimizerAdaptative(value bool) SdcaOptimizerAttr {
 	return func(m optionalAttr) {
-		m["keep_dims"] = value
+		m["adaptative"] = value
 	}
 }
 
-// Computes the max of elements across dimensions of a SparseTensor.
+// Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for
 //
-// This Op takes a SparseTensor and is the sparse counterpart to
-// `tf.reduce_max()`.  In contrast to SparseReduceMax, this Op returns a
-// SparseTensor.
+// linear models with L1 + L2 regularization. As global optimization objective is
+// strongly-convex, the optimizer optimizes the dual objective at each step. The
+// optimizer applies each update one example at a time. Examples are sampled
+// uniformly, and the optimizer is learning rate free and enjoys linear convergence
+// rate.
 //
-// Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
-// with length 1.
+// [Proximal Stochastic Dual Coordinate Ascent](http://arxiv.org/pdf/1211.2717v1.pdf).<br>
+// Shai Shalev-Shwartz, Tong Zhang. 2012
 //
-// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
-// with a single element is returned.  Additionally, the axes can be negative,
-// which are interpreted according to the indexing rules in Python.
+// $$Loss Objective = \sum f_{i} (wx_{i}) + (l2 / 2) * |w|^2 + l1 * |w|$$
+//
+// [Adding vs. Averaging in Distributed Primal-Dual Optimization](http://arxiv.org/abs/1502.03508).<br>
+// Chenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan,
+// Peter Richtarik, Martin Takac. 2015
+//
+// [Stochastic Dual Coordinate Ascent with Adaptive Probabilities](https://arxiv.org/abs/1502.08053).<br>
+// Dominik Csiba, Zheng Qu, Peter Richtarik. 2015
 //
 // Arguments:
-//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, possibly not in canonical ordering.
-//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
-//	input_shape: 1-D.  Shape of the input SparseTensor.
-//	reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
-func SparseReduceMaxSparse(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceMaxSparseAttr) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) {
+//	sparse_example_indices: a list of vectors which contain example indices.
+//	sparse_feature_indices: a list of vectors which contain feature indices.
+//	sparse_feature_values: a list of vectors which contains feature value
+// associated with each feature group.
+//	dense_features: a list of matrices which contains the dense feature values.
+//	example_weights: a vector which contains the weight associated with each
+// example.
+//	example_labels: a vector which contains the label/target associated with each
+// example.
+//	sparse_indices: a list of vectors where each value is the indices which has
+// corresponding weights in sparse_weights. This field maybe omitted for the
+// dense approach.
+//	sparse_weights: a list of vectors where each value is the weight associated with
+// a sparse feature group.
+//	dense_weights: a list of vectors where the values are the weights associated
+// with a dense feature group.
+//	example_state_data: a list of vectors containing the example state data.
+//	loss_type: Type of the primal loss. Currently SdcaSolver supports logistic,
+// squared and hinge losses.
+//	l1: Symmetric l1 regularization strength.
+//	l2: Symmetric l2 regularization strength.
+//	num_loss_partitions: Number of partitions of the global loss function.
+//	num_inner_iterations: Number of iterations per mini-batch.
+//
+// Returns a list of vectors containing the updated example state
+// data.a list of vectors where each value is the delta
+// weights associated with a sparse feature group.a list of vectors where the values are the delta
+// weights associated with a dense feature group.
+func SdcaOptimizer(scope *Scope, sparse_example_indices []tf.Output, sparse_feature_indices []tf.Output, sparse_feature_values []tf.Output, dense_features []tf.Output, example_weights tf.Output, example_labels tf.Output, sparse_indices []tf.Output, sparse_weights []tf.Output, dense_weights []tf.Output, example_state_data tf.Output, loss_type string, l1 float32, l2 float32, num_loss_partitions int64, num_inner_iterations int64, optional ...SdcaOptimizerAttr) (out_example_state_data tf.Output, out_delta_sparse_weights []tf.Output, out_delta_dense_weights []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"loss_type": loss_type, "l1": l1, "l2": l2, "num_loss_partitions": num_loss_partitions, "num_inner_iterations": num_inner_iterations}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "SparseReduceMaxSparse",
+		Type: "SdcaOptimizer",
 		Input: []tf.Input{
-			input_indices, input_values, input_shape, reduction_axes,
+			tf.OutputList(sparse_example_indices), tf.OutputList(sparse_feature_indices), tf.OutputList(sparse_feature_values), tf.OutputList(dense_features), example_weights, example_labels, tf.OutputList(sparse_indices), tf.OutputList(sparse_weights), tf.OutputList(dense_weights), example_state_data,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	out_example_state_data = op.Output(idx)
+	if out_delta_sparse_weights, idx, err = makeOutputList(op, idx, "out_delta_sparse_weights"); err != nil {
+		scope.UpdateErr("SdcaOptimizer", err)
+		return
+	}
+	if out_delta_dense_weights, idx, err = makeOutputList(op, idx, "out_delta_dense_weights"); err != nil {
+		scope.UpdateErr("SdcaOptimizer", err)
+		return
+	}
+	return out_example_state_data, out_delta_sparse_weights, out_delta_dense_weights
 }
 
-// ResourceApplyAdagradDAAttr is an optional argument to ResourceApplyAdagradDA.
-type ResourceApplyAdagradDAAttr func(optionalAttr)
+// SparseMatMulAttr is an optional argument to SparseMatMul.
+type SparseMatMulAttr func(optionalAttr)
 
-// ResourceApplyAdagradDAUseLocking sets the optional use_locking attribute to value.
-//
-// value: If True, updating of the var and accum tensors will be protected by
-// a lock; otherwise the behavior is undefined, but may exhibit less contention.
+// SparseMatMulTransposeA sets the optional transpose_a attribute to value.
 // If not specified, defaults to false
-func ResourceApplyAdagradDAUseLocking(value bool) ResourceApplyAdagradDAAttr {
+func SparseMatMulTransposeA(value bool) SparseMatMulAttr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["transpose_a"] = value
 	}
 }
 
-// Update '*var' according to the proximal adagrad scheme.
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	gradient_accumulator: Should be from a Variable().
-//	gradient_squared_accumulator: Should be from a Variable().
-//	grad: The gradient.
-//	lr: Scaling factor. Must be a scalar.
-//	l1: L1 regularization. Must be a scalar.
-//	l2: L2 regularization. Must be a scalar.
-//	global_step: Training step number. Must be a scalar.
-//
-// Returns the created operation.
-func ResourceApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator tf.Output, gradient_squared_accumulator tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, global_step tf.Output, optional ...ResourceApplyAdagradDAAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyAdagradDA",
-		Input: []tf.Input{
-			var_, gradient_accumulator, gradient_squared_accumulator, grad, lr, l1, l2, global_step,
-		},
-		Attrs: attrs,
+// SparseMatMulTransposeB sets the optional transpose_b attribute to value.
+// If not specified, defaults to false
+func SparseMatMulTransposeB(value bool) SparseMatMulAttr {
+	return func(m optionalAttr) {
+		m["transpose_b"] = value
 	}
-	return scope.AddOperation(opspec)
 }
 
-// FractionalMaxPoolGradAttr is an optional argument to FractionalMaxPoolGrad.
-type FractionalMaxPoolGradAttr func(optionalAttr)
+// SparseMatMulAIsSparse sets the optional a_is_sparse attribute to value.
+// If not specified, defaults to false
+func SparseMatMulAIsSparse(value bool) SparseMatMulAttr {
+	return func(m optionalAttr) {
+		m["a_is_sparse"] = value
+	}
+}
 
-// FractionalMaxPoolGradOverlapping sets the optional overlapping attribute to value.
-//
-// value: When set to True, it means when pooling, the values at the boundary
-// of adjacent pooling cells are used by both cells. For example:
-//
-// `index  0  1  2  3  4`
-//
-// `value  20 5  16 3  7`
-//
-// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
-// The result would be [20, 16] for fractional max pooling.
+// SparseMatMulBIsSparse sets the optional b_is_sparse attribute to value.
 // If not specified, defaults to false
-func FractionalMaxPoolGradOverlapping(value bool) FractionalMaxPoolGradAttr {
+func SparseMatMulBIsSparse(value bool) SparseMatMulAttr {
 	return func(m optionalAttr) {
-		m["overlapping"] = value
+		m["b_is_sparse"] = value
 	}
 }
 
-// Computes gradient of the FractionalMaxPool function.
+// Multiply matrix "a" by matrix "b".
 //
-// Arguments:
-//	orig_input: Original input for `fractional_max_pool`
-//	orig_output: Original output for `fractional_max_pool`
-//	out_backprop: 4-D with shape `[batch, height, width, channels]`.  Gradients
-// w.r.t. the output of `fractional_max_pool`.
-//	row_pooling_sequence: row pooling sequence, form pooling region with
-// col_pooling_sequence.
-//	col_pooling_sequence: column pooling sequence, form pooling region with
-// row_pooling sequence.
+// The inputs must be two-dimensional matrices and the inner dimension of "a" must
+// match the outer dimension of "b". This op is optimized for the case where at
+// least one of "a" or "b" is sparse. The breakeven for using this versus a dense
+// matrix multiply on one platform was 30% zero values in the sparse matrix.
 //
-// Returns 4-D.  Gradients w.r.t. the input of `fractional_max_pool`.
-func FractionalMaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, out_backprop tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output, optional ...FractionalMaxPoolGradAttr) (output tf.Output) {
+// The gradient computation of this operation will only take advantage of sparsity
+// in the input gradient when that gradient comes from a Relu.
+func SparseMatMul(scope *Scope, a tf.Output, b tf.Output, optional ...SparseMatMulAttr) (product tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -14114,9 +13781,9 @@ func FractionalMaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Ou
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "FractionalMaxPoolGrad",
+		Type: "SparseMatMul",
 		Input: []tf.Input{
-			orig_input, orig_output, out_backprop, row_pooling_sequence, col_pooling_sequence,
+			a, b,
 		},
 		Attrs: attrs,
 	}
@@ -14124,53 +13791,52 @@ func FractionalMaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Ou
 	return op.Output(0)
 }
 
-// Does nothing. Serves as a control trigger for scheduling.
+// Computes the power of one value to another.
 //
-// Only useful as a placeholder for control edges.
+// Given a tensor `x` and a tensor `y`, this operation computes \\(x^y\\) for
+// corresponding elements in `x` and `y`. For example:
 //
-// Returns the created operation.
-func ControlTrigger(scope *Scope) (o *tf.Operation) {
+// ```
+// # tensor 'x' is [[2, 2]], [3, 3]]
+// # tensor 'y' is [[8, 16], [2, 3]]
+// tf.pow(x, y) ==> [[256, 65536], [9, 27]]
+// ```
+func Pow(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "ControlTrigger",
+		Type: "Pow",
+		Input: []tf.Input{
+			x, y,
+		},
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// ResourceApplyAddSignAttr is an optional argument to ResourceApplyAddSign.
-type ResourceApplyAddSignAttr func(optionalAttr)
+// ShapeAttr is an optional argument to Shape.
+type ShapeAttr func(optionalAttr)
 
-// ResourceApplyAddSignUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and m tensors is
-// protected by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyAddSignUseLocking(value bool) ResourceApplyAddSignAttr {
+// ShapeOutType sets the optional out_type attribute to value.
+// If not specified, defaults to DT_INT32
+func ShapeOutType(value tf.DataType) ShapeAttr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["out_type"] = value
 	}
 }
 
-// Update '*var' according to the AddSign update.
+// Returns the shape of a tensor.
 //
-// m_t <- beta1 * m_{t-1} + (1 - beta1) * g
-// update <- (alpha + sign_decay * sign(g) *sign(m)) * g
-// variable <- variable - lr_t * update
+// This operation returns a 1-D integer tensor representing the shape of `input`.
 //
-// Arguments:
-//	var_: Should be from a Variable().
-//	m: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	alpha: Must be a scalar.
-//	sign_decay: Must be a scalar.
-//	beta: Must be a scalar.
-//	grad: The gradient.
+// For example:
 //
-// Returns the created operation.
-func ResourceApplyAddSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, alpha tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyAddSignAttr) (o *tf.Operation) {
+// ```
+// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]
+// shape(t) ==> [2, 2, 3]
+// ```
+func Shape(scope *Scope, input tf.Output, optional ...ShapeAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -14179,89 +13845,92 @@ func ResourceApplyAddSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Outpu
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyAddSign",
+		Type: "Shape",
 		Input: []tf.Input{
-			var_, m, lr, alpha, sign_decay, beta, grad,
+			input,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Reorders a SparseTensor into the canonical, row-major ordering.
-//
-// Note that by convention, all sparse ops preserve the canonical ordering along
-// increasing dimension number. The only time ordering can be violated is during
-// manual manipulation of the indices and values vectors to add entries.
-//
-// Reordering does not affect the shape of the SparseTensor.
-//
-// If the tensor has rank `R` and `N` non-empty values, `input_indices` has
-// shape `[N, R]`, input_values has length `N`, and input_shape has length `R`.
+// Computes fingerprints of the input strings.
 //
 // Arguments:
-//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, possibly not in canonical ordering.
-//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
-//	input_shape: 1-D.  Shape of the input SparseTensor.
+//	input: vector of strings to compute fingerprints on.
 //
-// Returns 2-D.  `N x R` matrix with the same indices as input_indices, but
-// in canonical row-major ordering.1-D.  `N` non-empty values corresponding to `output_indices`.
-func SparseReorder(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output) (output_indices tf.Output, output_values tf.Output) {
+// Returns a (N,2) shaped matrix where N is the number of elements in the input
+// vector. Each row contains the low and high parts of the fingerprint.
+func SdcaFprint(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "SparseReorder",
+		Type: "SdcaFprint",
 		Input: []tf.Input{
-			input_indices, input_values, input_shape,
+			input,
 		},
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
-// PackAttr is an optional argument to Pack.
-type PackAttr func(optionalAttr)
+// RandomPoissonV2Attr is an optional argument to RandomPoissonV2.
+type RandomPoissonV2Attr func(optionalAttr)
 
-// PackAxis sets the optional axis attribute to value.
+// RandomPoissonV2Seed sets the optional seed attribute to value.
 //
-// value: Dimension along which to pack.  Negative values wrap around, so the
-// valid range is `[-(R+1), R+1)`.
+// value: If either `seed` or `seed2` are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
 // If not specified, defaults to 0
-func PackAxis(value int64) PackAttr {
+func RandomPoissonV2Seed(value int64) RandomPoissonV2Attr {
 	return func(m optionalAttr) {
-		m["axis"] = value
+		m["seed"] = value
 	}
 }
 
-// Packs a list of `N` rank-`R` tensors into one rank-`(R+1)` tensor.
-//
-// Packs the `N` tensors in `values` into a tensor with rank one higher than each
-// tensor in `values`, by packing them along the `axis` dimension.
-// Given a list of tensors of shape `(A, B, C)`;
-//
-// if `axis == 0` then the `output` tensor will have the shape `(N, A, B, C)`.
-// if `axis == 1` then the `output` tensor will have the shape `(A, N, B, C)`.
-// Etc.
+// RandomPoissonV2Seed2 sets the optional seed2 attribute to value.
 //
-// For example:
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func RandomPoissonV2Seed2(value int64) RandomPoissonV2Attr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// RandomPoissonV2Dtype sets the optional dtype attribute to value.
+// If not specified, defaults to DT_INT64
+func RandomPoissonV2Dtype(value tf.DataType) RandomPoissonV2Attr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
+	}
+}
+
+// Outputs random values from the Poisson distribution(s) described by rate.
 //
-// ```
-// # 'x' is [1, 4]
-// # 'y' is [2, 5]
-// # 'z' is [3, 6]
-// pack([x, y, z]) => [[1, 4], [2, 5], [3, 6]]  # Pack along first dim.
-// pack([x, y, z], axis=1) => [[1, 2, 3], [4, 5, 6]]
-// ```
+// This op uses two algorithms, depending on rate. If rate >= 10, then
+// the algorithm by Hormann is used to acquire samples via
+// transformation-rejection.
+// See http://www.sciencedirect.com/science/article/pii/0167668793909974.
 //
-// This is the opposite of `unpack`.
+// Otherwise, Knuth's algorithm is used to acquire samples via multiplying uniform
+// random variables.
+// See Donald E. Knuth (1969). Seminumerical Algorithms. The Art of Computer
+// Programming, Volume 2. Addison Wesley
 //
 // Arguments:
-//	values: Must be of same shape and type.
+//	shape: 1-D integer tensor. Shape of independent samples to draw from each
+// distribution described by the shape parameters given in rate.
+//	rate: A tensor in which each scalar is a "rate" parameter describing the
+// associated poisson distribution.
 //
-// Returns The packed tensor.
-func Pack(scope *Scope, values []tf.Output, optional ...PackAttr) (output tf.Output) {
+// Returns A tensor with shape `shape + shape(rate)`. Each slice
+// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for
+// `rate[i0, i1, ...iN]`.
+func RandomPoissonV2(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonV2Attr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -14270,9 +13939,9 @@ func Pack(scope *Scope, values []tf.Output, optional ...PackAttr) (output tf.Out
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Pack",
+		Type: "RandomPoissonV2",
 		Input: []tf.Input{
-			tf.OutputList(values),
+			shape, rate,
 		},
 		Attrs: attrs,
 	}
@@ -14280,41 +13949,59 @@ func Pack(scope *Scope, values []tf.Output, optional ...PackAttr) (output tf.Out
 	return op.Output(0)
 }
 
-// Deprecated. Use TensorArraySplitV3
-func TensorArraySplitV2(scope *Scope, handle tf.Output, value tf.Output, lengths tf.Output, flow_in tf.Output) (flow_out tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorArraySplitV2",
-		Input: []tf.Input{
-			handle, value, lengths, flow_in,
-		},
+// MatrixTriangularSolveAttr is an optional argument to MatrixTriangularSolve.
+type MatrixTriangularSolveAttr func(optionalAttr)
+
+// MatrixTriangularSolveLower sets the optional lower attribute to value.
+//
+// value: Boolean indicating whether the innermost matrices in `matrix` are
+// lower or upper triangular.
+// If not specified, defaults to true
+func MatrixTriangularSolveLower(value bool) MatrixTriangularSolveAttr {
+	return func(m optionalAttr) {
+		m["lower"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// QuantizedReluAttr is an optional argument to QuantizedRelu.
-type QuantizedReluAttr func(optionalAttr)
-
-// QuantizedReluOutType sets the optional out_type attribute to value.
-// If not specified, defaults to DT_QUINT8
-func QuantizedReluOutType(value tf.DataType) QuantizedReluAttr {
+// MatrixTriangularSolveAdjoint sets the optional adjoint attribute to value.
+//
+// value: Boolean indicating whether to solve with `matrix` or its (block-wise)
+//          adjoint.
+//
+// @compatibility(numpy)
+// Equivalent to np.linalg.triangular_solve
+// @end_compatibility
+// If not specified, defaults to false
+func MatrixTriangularSolveAdjoint(value bool) MatrixTriangularSolveAttr {
 	return func(m optionalAttr) {
-		m["out_type"] = value
+		m["adjoint"] = value
 	}
 }
 
-// Computes Quantized Rectified Linear: `max(features, 0)`
+// Solves systems of linear equations with upper or lower triangular matrices by
 //
-// Arguments:
+// backsubstitution.
 //
-//	min_features: The float value that the lowest quantized value represents.
-//	max_features: The float value that the highest quantized value represents.
+// `matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions form
+// square matrices. If `lower` is `True` then the strictly upper triangular part
+// of each inner-most matrix is assumed to be zero and not accessed.
+// If `lower` is False then the strictly lower triangular part of each inner-most
+// matrix is assumed to be zero and not accessed.
+// `rhs` is a tensor of shape `[..., M, K]`.
 //
-// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents.
-func QuantizedRelu(scope *Scope, features tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedReluAttr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) {
+// The output is a tensor of shape `[..., M, K]`. If `adjoint` is
+// `True` then the innermost matrices in `output` satisfy matrix equations
+// `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`.
+// If `adjoint` is `False` then the strictly then the  innermost matrices in
+// `output` satisfy matrix equations
+// `adjoint(matrix[..., i, k]) * output[..., k, j] = rhs[..., i, j]`.
+//
+// Arguments:
+//	matrix: Shape is `[..., M, M]`.
+//	rhs: Shape is `[..., M, K]`.
+//
+// Returns Shape is `[..., M, K]`.
+func MatrixTriangularSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixTriangularSolveAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -14323,136 +14010,118 @@ func QuantizedRelu(scope *Scope, features tf.Output, min_features tf.Output, max
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "QuantizedRelu",
+		Type: "MatrixTriangularSolve",
 		Input: []tf.Input{
-			features, min_features, max_features,
+			matrix, rhs,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// Records the bytes size of each element of `input_dataset` in a StatsAggregator.
-func BytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+// Computes inverse hyperbolic sine of x element-wise.
+func Asinh(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "BytesProducedStatsDataset",
+		Type: "Asinh",
 		Input: []tf.Input{
-			input_dataset, tag,
+			x,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// QrAttr is an optional argument to Qr.
-type QrAttr func(optionalAttr)
-
-// QrFullMatrices sets the optional full_matrices attribute to value.
-//
-// value: If true, compute full-sized `q` and `r`. If false
-// (the default), compute only the leading `P` columns of `q`.
-// If not specified, defaults to false
-func QrFullMatrices(value bool) QrAttr {
-	return func(m optionalAttr) {
-		m["full_matrices"] = value
-	}
-}
-
-// Computes the QR decompositions of one or more matrices.
-//
-// Computes the QR decomposition of each inner matrix in `tensor` such that
-// `tensor[..., :, :] = q[..., :, :] * r[..., :,:])`
-//
-// ```python
-// # a is a tensor.
-// # q is a tensor of orthonormal matrices.
-// # r is a tensor of upper triangular matrices.
-// q, r = qr(a)
-// q_full, r_full = qr(a, full_matrices=True)
-// ```
+// Creates a dataset with a range of values. Corresponds to python's xrange.
 //
 // Arguments:
-//	input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions
-// form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`.
+//	start: corresponds to start in python's xrange().
+//	stop: corresponds to stop in python's xrange().
+//	step: corresponds to step in python's xrange().
 //
-// Returns Orthonormal basis for range of `a`. If `full_matrices` is `False` then
-// shape is `[..., M, P]`; if `full_matrices` is `True` then shape is
-// `[..., M, M]`.Triangular factor. If `full_matrices` is `False` then shape is
-// `[..., P, N]`. If `full_matrices` is `True` then shape is `[..., M, N]`.
-func Qr(scope *Scope, input tf.Output, optional ...QrAttr) (q tf.Output, r tf.Output) {
+//
+func RangeDataset(scope *Scope, start tf.Output, stop tf.Output, step tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "Qr",
+		Type: "RangeDataset",
 		Input: []tf.Input{
-			input,
+			start, stop, step,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
-// AudioSummaryAttr is an optional argument to AudioSummary.
-type AudioSummaryAttr func(optionalAttr)
+// DepthwiseConv2dNativeBackpropInputAttr is an optional argument to DepthwiseConv2dNativeBackpropInput.
+type DepthwiseConv2dNativeBackpropInputAttr func(optionalAttr)
 
-// AudioSummaryMaxOutputs sets the optional max_outputs attribute to value.
-//
-// value: Max number of batch elements to generate audio for.
-// If not specified, defaults to 3
+// DepthwiseConv2dNativeBackpropInputDataFormat sets the optional data_format attribute to value.
 //
-// REQUIRES: value >= 1
-func AudioSummaryMaxOutputs(value int64) AudioSummaryAttr {
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, height, width, channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, channels, height, width].
+// If not specified, defaults to "NHWC"
+func DepthwiseConv2dNativeBackpropInputDataFormat(value string) DepthwiseConv2dNativeBackpropInputAttr {
 	return func(m optionalAttr) {
-		m["max_outputs"] = value
+		m["data_format"] = value
 	}
 }
 
-// Outputs a `Summary` protocol buffer with audio.
-//
-// DEPRECATED at GraphDef version 15: Use AudioSummaryV2.
-//
-// The summary has up to `max_outputs` summary values containing audio. The
-// audio is built from `tensor` which must be 3-D with shape `[batch_size,
-// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are
-// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`.
-//
-// The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
-// build the `tag` of the summary values:
+// DepthwiseConv2dNativeBackpropInputDilations sets the optional dilations attribute to value.
 //
-// *  If `max_outputs` is 1, the summary value tag is '*tag*/audio'.
-// *  If `max_outputs` is greater than 1, the summary value tags are
-//    generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc.
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+// element on that dimension. The dimension order is determined by the value of
+// `data_format`, see above for details. Dilations in the batch and depth
+// dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
+func DepthwiseConv2dNativeBackpropInputDilations(value []int64) DepthwiseConv2dNativeBackpropInputAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes the gradients of depthwise convolution with respect to the input.
 //
 // Arguments:
-//	tag: Scalar. Used to build the `tag` attribute of the summary values.
-//	tensor: 2-D of shape `[batch_size, frames]`.
-//	sample_rate: The sample rate of the signal in hertz.
+//	input_sizes: An integer vector representing the shape of `input`, based
+// on `data_format`.  For example, if `data_format` is 'NHWC' then
+//  `input` is a 4-D `[batch, height, width, channels]` tensor.
+//	filter: 4-D with shape
+// `[filter_height, filter_width, in_channels, depthwise_multiplier]`.
+//	out_backprop: 4-D with shape  based on `data_format`.
+// For example, if `data_format` is 'NHWC' then
+// out_backprop shape is `[batch, out_height, out_width, out_channels]`.
+// Gradients w.r.t. the output of the convolution.
+//	strides: The stride of the sliding window for each dimension of the input
+// of the convolution.
+//	padding: The type of padding algorithm to use.
 //
-// Returns Scalar. Serialized `Summary` protocol buffer.
-func AudioSummary(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate float32, optional ...AudioSummaryAttr) (summary tf.Output) {
+// Returns 4-D with shape according to `data_format`.  For example, if
+// `data_format` is 'NHWC', output shape is `[batch, in_height,
+// in_width, in_channels]`.  Gradient w.r.t. the input of the
+// convolution.
+func DepthwiseConv2dNativeBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropInputAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"sample_rate": sample_rate}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "AudioSummary",
+		Type: "DepthwiseConv2dNativeBackpropInput",
 		Input: []tf.Input{
-			tag, tensor,
+			input_sizes, filter, out_backprop,
 		},
 		Attrs: attrs,
 	}
@@ -14460,188 +14129,183 @@ func AudioSummary(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate flo
 	return op.Output(0)
 }
 
-// Reverses specific dimensions of a tensor.
-//
-// NOTE `tf.reverse` has now changed behavior in preparation for 1.0.
-// `tf.reverse_v2` is currently an alias that will be deprecated before TF 1.0.
+// Adds sparse updates to the variable referenced by `resource`.
 //
-// Given a `tensor`, and a `int32` tensor `axis` representing the set of
-// dimensions of `tensor` to reverse. This operation reverses each dimension
-// `i` for which there exists `j` s.t. `axis[j] == i`.
+// This operation computes
 //
-// `tensor` can have up to 8 dimensions. The number of dimensions specified
-// in `axis` may be 0 or more entries. If an index is specified more than
-// once, a InvalidArgument error is raised.
+//     # Scalar indices
+//     ref[indices, ...] += updates[...]
 //
-// For example:
+//     # Vector indices (for each i)
+//     ref[indices[i], ...] += updates[i, ...]
 //
-// ```
-// # tensor 't' is [[[[ 0,  1,  2,  3],
-// #                  [ 4,  5,  6,  7],
-// #                  [ 8,  9, 10, 11]],
-// #                 [[12, 13, 14, 15],
-// #                  [16, 17, 18, 19],
-// #                  [20, 21, 22, 23]]]]
-// # tensor 't' shape is [1, 2, 3, 4]
+//     # High rank indices (for each i, ..., j)
+//     ref[indices[i, ..., j], ...] += updates[i, ..., j, ...]
 //
-// # 'dims' is [3] or 'dims' is [-1]
-// reverse(t, dims) ==> [[[[ 3,  2,  1,  0],
-//                         [ 7,  6,  5,  4],
-//                         [ 11, 10, 9, 8]],
-//                        [[15, 14, 13, 12],
-//                         [19, 18, 17, 16],
-//                         [23, 22, 21, 20]]]]
+// Duplicate entries are handled correctly: if multiple `indices` reference
+// the same location, their contributions add.
 //
-// # 'dims' is '[1]' (or 'dims' is '[-3]')
-// reverse(t, dims) ==> [[[[12, 13, 14, 15],
-//                         [16, 17, 18, 19],
-//                         [20, 21, 22, 23]
-//                        [[ 0,  1,  2,  3],
-//                         [ 4,  5,  6,  7],
-//                         [ 8,  9, 10, 11]]]]
+// Requires `updates.shape = indices.shape + ref.shape[1:]`.
 //
-// # 'dims' is '[2]' (or 'dims' is '[-2]')
-// reverse(t, dims) ==> [[[[8, 9, 10, 11],
-//                         [4, 5, 6, 7],
-//                         [0, 1, 2, 3]]
-//                        [[20, 21, 22, 23],
-//                         [16, 17, 18, 19],
-//                         [12, 13, 14, 15]]]]
-// ```
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
+// </div>
 //
 // Arguments:
-//	tensor: Up to 8-D.
-//	axis: 1-D. The indices of the dimensions to reverse. Must be in the range
-// `[-rank(tensor), rank(tensor))`.
+//	resource: Should be from a `Variable` node.
+//	indices: A tensor of indices into the first dimension of `ref`.
+//	updates: A tensor of updated values to add to `ref`.
 //
-// Returns The same shape as `tensor`.
-func ReverseV2(scope *Scope, tensor tf.Output, axis tf.Output) (output tf.Output) {
+// Returns the created operation.
+func ResourceScatterAdd(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "ReverseV2",
+		Type: "ResourceScatterAdd",
 		Input: []tf.Input{
-			tensor, axis,
+			resource, indices, updates,
 		},
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// ResourceApplyCenteredRMSPropAttr is an optional argument to ResourceApplyCenteredRMSProp.
-type ResourceApplyCenteredRMSPropAttr func(optionalAttr)
-
-// ResourceApplyCenteredRMSPropUseLocking sets the optional use_locking attribute to value.
+// Computes the gradient for the inverse of `x` wrt its input.
 //
-// value: If `True`, updating of the var, mg, ms, and mom tensors is
-// protected by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyCenteredRMSPropUseLocking(value bool) ResourceApplyCenteredRMSPropAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
+// Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy`
+// is the corresponding input gradient.
+func ReciprocalGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ReciprocalGrad",
+		Input: []tf.Input{
+			y, dy,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Update '*var' according to the centered RMSProp algorithm.
-//
-// The centered RMSProp algorithm uses an estimate of the centered second moment
-// (i.e., the variance) for normalization, as opposed to regular RMSProp, which
-// uses the (uncentered) second moment. This often helps with training, but is
-// slightly more expensive in terms of computation and memory.
-//
-// Note that in dense implementation of this algorithm, mg, ms, and mom will
-// update even if the grad is zero, but in this sparse implementation, mg, ms,
-// and mom will not update in iterations during which the grad is zero.
-//
-// mean_square = decay * mean_square + (1-decay) * gradient ** 2
-// mean_grad = decay * mean_grad + (1-decay) * gradient
-//
-// Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
-//
-// mg <- rho * mg_{t-1} + (1-rho) * grad
-// ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)
-// var <- var - mom
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	mg: Should be from a Variable().
-//	ms: Should be from a Variable().
-//	mom: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	rho: Decay rate. Must be a scalar.
-//
-//	epsilon: Ridge term. Must be a scalar.
-//	grad: The gradient.
+// Returns the min of x and y (i.e. x < y ? x : y) element-wise.
 //
-// Returns the created operation.
-func ResourceApplyCenteredRMSProp(scope *Scope, var_ tf.Output, mg tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyCenteredRMSPropAttr) (o *tf.Operation) {
+// *NOTE*: `Minimum` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Minimum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyCenteredRMSProp",
+		Type: "Minimum",
 		Input: []tf.Input{
-			var_, mg, ms, mom, lr, rho, momentum, epsilon, grad,
+			x, y,
 		},
-		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Inverse 3D fast Fourier transform.
+// MfccAttr is an optional argument to Mfcc.
+type MfccAttr func(optionalAttr)
+
+// MfccUpperFrequencyLimit sets the optional upper_frequency_limit attribute to value.
 //
-// Computes the inverse 3-dimensional discrete Fourier transform over the
-// inner-most 3 dimensions of `input`.
+// value: The highest frequency to use when calculating the
+// ceptstrum.
+// If not specified, defaults to 4000
+func MfccUpperFrequencyLimit(value float32) MfccAttr {
+	return func(m optionalAttr) {
+		m["upper_frequency_limit"] = value
+	}
+}
+
+// MfccLowerFrequencyLimit sets the optional lower_frequency_limit attribute to value.
 //
-// Arguments:
-//	input: A complex64 tensor.
+// value: The lowest frequency to use when calculating the
+// ceptstrum.
+// If not specified, defaults to 20
+func MfccLowerFrequencyLimit(value float32) MfccAttr {
+	return func(m optionalAttr) {
+		m["lower_frequency_limit"] = value
+	}
+}
+
+// MfccFilterbankChannelCount sets the optional filterbank_channel_count attribute to value.
 //
-// Returns A complex64 tensor of the same shape as `input`. The inner-most 3
-//   dimensions of `input` are replaced with their inverse 3D Fourier transform.
+// value: Resolution of the Mel bank used internally.
+// If not specified, defaults to 40
+func MfccFilterbankChannelCount(value int64) MfccAttr {
+	return func(m optionalAttr) {
+		m["filterbank_channel_count"] = value
+	}
+}
+
+// MfccDctCoefficientCount sets the optional dct_coefficient_count attribute to value.
 //
-// @compatibility(numpy)
-// Equivalent to np.fft.ifftn with 3 dimensions.
-// @end_compatibility
-func IFFT3D(scope *Scope, input tf.Output) (output tf.Output) {
+// value: How many output channels to produce per time slice.
+// If not specified, defaults to 13
+func MfccDctCoefficientCount(value int64) MfccAttr {
+	return func(m optionalAttr) {
+		m["dct_coefficient_count"] = value
+	}
+}
+
+// Transforms a spectrogram into a form that's useful for speech recognition.
+//
+// Mel Frequency Cepstral Coefficients are a way of representing audio data that's
+// been effective as an input feature for machine learning. They are created by
+// taking the spectrum of a spectrogram (a 'cepstrum'), and discarding some of the
+// higher frequencies that are less significant to the human ear. They have a long
+// history in the speech recognition world, and https://en.wikipedia.org/wiki/Mel-frequency_cepstrum
+// is a good resource to learn more.
+//
+// Arguments:
+//	spectrogram: Typically produced by the Spectrogram op, with magnitude_squared
+// set to true.
+//	sample_rate: How many samples per second the source audio used.
+func Mfcc(scope *Scope, spectrogram tf.Output, sample_rate tf.Output, optional ...MfccAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "IFFT3D",
+		Type: "Mfcc",
 		Input: []tf.Input{
-			input,
+			spectrogram, sample_rate,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Increments variable pointed to by 'resource' until it reaches 'limit'.
+// Returns the element-wise sum of a list of tensors.
 //
-// Arguments:
-//	resource: Should be from a scalar `Variable` node.
-//	limit: If incrementing ref would bring it above limit, instead generates an
-// 'OutOfRange' error.
+// `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not
+// wait for all of its inputs to be ready before beginning to sum. This can
+// save memory if inputs are ready at different times, since minimum temporary
+// storage is proportional to the output size rather than the inputs size.
 //
+// Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable.
 //
-// Returns A copy of the input before increment. If nothing else modifies the
-// input, the values produced will all be distinct.
-func ResourceCountUpTo(scope *Scope, resource tf.Output, limit int64, T tf.DataType) (output tf.Output) {
+// Returns a `Tensor` of same shape and type as the elements of `inputs`.
+//
+// Arguments:
+//	inputs: A list of `Tensor` objects, each with same shape and type.
+//	shape: Shape of elements of `inputs`.
+func AccumulateNV2(scope *Scope, inputs []tf.Output, shape tf.Shape) (sum tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"limit": limit, "T": T}
+	attrs := map[string]interface{}{"shape": shape}
 	opspec := tf.OpSpec{
-		Type: "ResourceCountUpTo",
+		Type: "AccumulateNV2",
 		Input: []tf.Input{
-			resource,
+			tf.OutputList(inputs),
 		},
 		Attrs: attrs,
 	}
@@ -14649,84 +14313,106 @@ func ResourceCountUpTo(scope *Scope, resource tf.Output, limit int64, T tf.DataT
 	return op.Output(0)
 }
 
-// Looks up keys in a table, outputs the corresponding values.
+// Convert the quantized 'input' tensor into a lower-precision 'output', using the
 //
-// The tensor `keys` must of the same type as the keys of the table.
-// The output `values` is of the type of the table values.
+// actual distribution of the values to maximize the usage of the lower bit depth
+// and adjusting the output min and max ranges accordingly.
 //
-// The scalar `default_value` is the value output for keys not present in the
-// table. It must also be of the same type as the table values.
+// [input_min, input_max] are scalar floats that specify the range for the float
+// interpretation of the 'input' data. For example, if input_min is -1.0f and
+// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
+// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
+//
+// This operator tries to squeeze as much precision as possible into an output with
+// a lower bit depth by calculating the actual min and max values found in the
+// data. For example, maybe that quint16 input has no values lower than 16,384 and
+// none higher than 49,152. That means only half the range is actually needed, all
+// the float interpretations are between -0.5f and 0.5f, so if we want to compress
+// the data into a quint8 output, we can use that range rather than the theoretical
+// -1.0f to 1.0f that is suggested by the input min and max.
+//
+// In practice, this is most useful for taking output from operations like
+// QuantizedMatMul that can produce higher bit-depth outputs than their inputs and
+// may have large potential output ranges, but in practice have a distribution of
+// input values that only uses a small fraction of the possible range. By feeding
+// that output into this operator, we can reduce it from 32 bits down to 8 with
+// minimal loss of accuracy.
 //
 // Arguments:
-//	table_handle: Handle to the table.
-//	keys: Any shape.  Keys to look up.
 //
+//	input_min: The float value that the minimum quantized input value represents.
+//	input_max: The float value that the maximum quantized input value represents.
+//	out_type: The type of the output. Should be a lower bit depth than Tinput.
 //
-// Returns Same shape as `keys`.  Values found in the table, or `default_values`
-// for missing keys.
-func LookupTableFindV2(scope *Scope, table_handle tf.Output, keys tf.Output, default_value tf.Output) (values tf.Output) {
+// Returns The float value that the minimum quantized output value represents.The float value that the maximum quantized output value represents.
+func QuantizeDownAndShrinkRange(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"out_type": out_type}
 	opspec := tf.OpSpec{
-		Type: "LookupTableFindV2",
+		Type: "QuantizeDownAndShrinkRange",
 		Input: []tf.Input{
-			table_handle, keys, default_value,
+			input, input_min, input_max,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// DepthwiseConv2dNativeBackpropInputAttr is an optional argument to DepthwiseConv2dNativeBackpropInput.
-type DepthwiseConv2dNativeBackpropInputAttr func(optionalAttr)
+// RandomGammaAttr is an optional argument to RandomGamma.
+type RandomGammaAttr func(optionalAttr)
 
-// DepthwiseConv2dNativeBackpropInputDataFormat sets the optional data_format attribute to value.
+// RandomGammaSeed sets the optional seed attribute to value.
 //
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, height, width, channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, channels, height, width].
-// If not specified, defaults to "NHWC"
-func DepthwiseConv2dNativeBackpropInputDataFormat(value string) DepthwiseConv2dNativeBackpropInputAttr {
+// value: If either `seed` or `seed2` are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func RandomGammaSeed(value int64) RandomGammaAttr {
 	return func(m optionalAttr) {
-		m["data_format"] = value
+		m["seed"] = value
 	}
 }
 
-// Computes the gradients of depthwise convolution with respect to the input.
+// RandomGammaSeed2 sets the optional seed2 attribute to value.
+//
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func RandomGammaSeed2(value int64) RandomGammaAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Outputs random values from the Gamma distribution(s) described by alpha.
+//
+// This op uses the algorithm by Marsaglia et al. to acquire samples via
+// transformation-rejection from pairs of uniform and normal random variables.
+// See http://dl.acm.org/citation.cfm?id=358414
 //
 // Arguments:
-//	input_sizes: An integer vector representing the shape of `input`, based
-// on `data_format`.  For example, if `data_format` is 'NHWC' then
-//  `input` is a 4-D `[batch, height, width, channels]` tensor.
-//	filter: 4-D with shape
-// `[filter_height, filter_width, in_channels, depthwise_multiplier]`.
-//	out_backprop: 4-D with shape  based on `data_format`.
-// For example, if `data_format` is 'NHWC' then
-// out_backprop shape is `[batch, out_height, out_width, out_channels]`.
-// Gradients w.r.t. the output of the convolution.
-//	strides: The stride of the sliding window for each dimension of the input
-// of the convolution.
-//	padding: The type of padding algorithm to use.
+//	shape: 1-D integer tensor. Shape of independent samples to draw from each
+// distribution described by the shape parameters given in alpha.
+//	alpha: A tensor in which each scalar is a "shape" parameter describing the
+// associated gamma distribution.
 //
-// Returns 4-D with shape according to `data_format`.  For example, if
-// `data_format` is 'NHWC', output shape is `[batch, in_height,
-// in_width, in_channels]`.  Gradient w.r.t. the input of the
-// convolution.
-func DepthwiseConv2dNativeBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropInputAttr) (output tf.Output) {
+// Returns A tensor with shape `shape + shape(alpha)`. Each slice
+// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for
+// `alpha[i0, i1, ...iN]`. The dtype of the output matches the dtype of alpha.
+func RandomGamma(scope *Scope, shape tf.Output, alpha tf.Output, optional ...RandomGammaAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "DepthwiseConv2dNativeBackpropInput",
+		Type: "RandomGamma",
 		Input: []tf.Input{
-			input_sizes, filter, out_backprop,
+			shape, alpha,
 		},
 		Attrs: attrs,
 	}
@@ -14734,46 +14420,47 @@ func DepthwiseConv2dNativeBackpropInput(scope *Scope, input_sizes tf.Output, fil
 	return op.Output(0)
 }
 
-// MatrixSolveAttr is an optional argument to MatrixSolve.
-type MatrixSolveAttr func(optionalAttr)
+// AvgPool3DGradAttr is an optional argument to AvgPool3DGrad.
+type AvgPool3DGradAttr func(optionalAttr)
 
-// MatrixSolveAdjoint sets the optional adjoint attribute to value.
+// AvgPool3DGradDataFormat sets the optional data_format attribute to value.
 //
-// value: Boolean indicating whether to solve with `matrix` or its (block-wise)
-// adjoint.
-// If not specified, defaults to false
-func MatrixSolveAdjoint(value bool) MatrixSolveAttr {
+// value: The data format of the input and output data. With the
+// default format "NDHWC", the data is stored in the order of:
+//     [batch, in_depth, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCDHW", the data storage order is:
+//     [batch, in_channels, in_depth, in_height, in_width].
+// If not specified, defaults to "NDHWC"
+func AvgPool3DGradDataFormat(value string) AvgPool3DGradAttr {
 	return func(m optionalAttr) {
-		m["adjoint"] = value
+		m["data_format"] = value
 	}
 }
 
-// Solves systems of linear equations.
-//
-// `Matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-// form square matrices. `Rhs` is a tensor of shape `[..., M, K]`. The `output` is
-// a tensor shape `[..., M, K]`.  If `adjoint` is `False` then each output matrix
-// satisfies `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`.
-// If `adjoint` is `True` then each output matrix satisfies
-// `adjoint(matrix[..., :, :]) * output[..., :, :] = rhs[..., :, :]`.
+// Computes gradients of average pooling function.
 //
 // Arguments:
-//	matrix: Shape is `[..., M, M]`.
-//	rhs: Shape is `[..., M, K]`.
+//	orig_input_shape: The original input dimensions.
+//	grad: Output backprop of shape `[batch, depth, rows, cols, channels]`.
+//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
+// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
 //
-// Returns Shape is `[..., M, K]`.
-func MatrixSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixSolveAttr) (output tf.Output) {
+// Returns The backprop for input.
+func AvgPool3DGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DGradAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "MatrixSolve",
+		Type: "AvgPool3DGrad",
 		Input: []tf.Input{
-			matrix, rhs,
+			orig_input_shape, grad,
 		},
 		Attrs: attrs,
 	}
@@ -14781,83 +14468,115 @@ func MatrixSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...Matr
 	return op.Output(0)
 }
 
-// Transforms a Tensor into a serialized TensorProto proto.
+// ParseSingleSequenceExampleAttr is an optional argument to ParseSingleSequenceExample.
+type ParseSingleSequenceExampleAttr func(optionalAttr)
+
+// ParseSingleSequenceExampleContextSparseTypes sets the optional context_sparse_types attribute to value.
 //
-// Arguments:
-//	tensor: A Tensor of type `T`.
+// value: A list of Ncontext_sparse types; the data types of data in
+// each context Feature given in context_sparse_keys.
+// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
+// DT_INT64 (Int64List), and DT_STRING (BytesList).
+// If not specified, defaults to <>
 //
-// Returns A serialized TensorProto proto of the input tensor.
-func SerializeTensor(scope *Scope, tensor tf.Output) (serialized tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SerializeTensor",
-		Input: []tf.Input{
-			tensor,
-		},
+// REQUIRES: len(value) >= 0
+func ParseSingleSequenceExampleContextSparseTypes(value []tf.DataType) ParseSingleSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["context_sparse_types"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// FusedBatchNormGradAttr is an optional argument to FusedBatchNormGrad.
-type FusedBatchNormGradAttr func(optionalAttr)
-
-// FusedBatchNormGradEpsilon sets the optional epsilon attribute to value.
+// ParseSingleSequenceExampleFeatureListDenseTypes sets the optional feature_list_dense_types attribute to value.
+// If not specified, defaults to <>
 //
-// value: A small float number added to the variance of x.
-// If not specified, defaults to 0.0001
-func FusedBatchNormGradEpsilon(value float32) FusedBatchNormGradAttr {
+// REQUIRES: len(value) >= 0
+func ParseSingleSequenceExampleFeatureListDenseTypes(value []tf.DataType) ParseSingleSequenceExampleAttr {
 	return func(m optionalAttr) {
-		m["epsilon"] = value
+		m["feature_list_dense_types"] = value
 	}
 }
 
-// FusedBatchNormGradDataFormat sets the optional data_format attribute to value.
+// ParseSingleSequenceExampleContextDenseShapes sets the optional context_dense_shapes attribute to value.
 //
-// value: The data format for y_backprop, x, x_backprop.
-// Either "NHWC" (default) or "NCHW".
-// If not specified, defaults to "NHWC"
-func FusedBatchNormGradDataFormat(value string) FusedBatchNormGradAttr {
+// value: A list of Ncontext_dense shapes; the shapes of data in
+// each context Feature given in context_dense_keys.
+// The number of elements in the Feature corresponding to context_dense_key[j]
+// must always equal context_dense_shapes[j].NumEntries().
+// The shape of context_dense_values[j] will match context_dense_shapes[j].
+// If not specified, defaults to <>
+//
+// REQUIRES: len(value) >= 0
+func ParseSingleSequenceExampleContextDenseShapes(value []tf.Shape) ParseSingleSequenceExampleAttr {
 	return func(m optionalAttr) {
-		m["data_format"] = value
+		m["context_dense_shapes"] = value
 	}
 }
 
-// FusedBatchNormGradIsTraining sets the optional is_training attribute to value.
+// ParseSingleSequenceExampleFeatureListSparseTypes sets the optional feature_list_sparse_types attribute to value.
 //
-// value: A bool value to indicate the operation is for training (default)
-// or inference.
-// If not specified, defaults to true
-func FusedBatchNormGradIsTraining(value bool) FusedBatchNormGradAttr {
+// value: A list of Nfeature_list_sparse types; the data types
+// of data in each FeatureList given in feature_list_sparse_keys.
+// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
+// DT_INT64 (Int64List), and DT_STRING (BytesList).
+// If not specified, defaults to <>
+//
+// REQUIRES: len(value) >= 0
+func ParseSingleSequenceExampleFeatureListSparseTypes(value []tf.DataType) ParseSingleSequenceExampleAttr {
 	return func(m optionalAttr) {
-		m["is_training"] = value
+		m["feature_list_sparse_types"] = value
 	}
 }
 
-// Gradient for batch normalization.
+// ParseSingleSequenceExampleFeatureListDenseShapes sets the optional feature_list_dense_shapes attribute to value.
 //
-// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
-// The size of 1D Tensors matches the dimension C of the 4D Tensors.
+// value: A list of Nfeature_list_dense shapes; the shapes of
+// data in each FeatureList given in feature_list_dense_keys.
+// The shape of each Feature in the FeatureList corresponding to
+// feature_list_dense_key[j] must always equal
+// feature_list_dense_shapes[j].NumEntries().
+// If not specified, defaults to <>
 //
-// Arguments:
-//	y_backprop: A 4D Tensor for the gradient with respect to y.
-//	x: A 4D Tensor for input data.
-//	scale: A 1D Tensor for scaling factor, to scale the normalized x.
-//	reserve_space_1: When is_training is True, a 1D Tensor for the computed batch
-// mean to be reused in gradient computation. When is_training is
-// False, a 1D Tensor for the population mean to be reused in both
-// 1st and 2nd order gradient computation.
-//	reserve_space_2: When is_training is True, a 1D Tensor for the computed batch
-// variance (inverted variance in the cuDNN case) to be reused in
-// gradient computation. When is_training is False, a 1D Tensor
-// for the population variance to be reused in both 1st and 2nd
-// order gradient computation.
+// REQUIRES: len(value) >= 0
+func ParseSingleSequenceExampleFeatureListDenseShapes(value []tf.Shape) ParseSingleSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["feature_list_dense_shapes"] = value
+	}
+}
+
+// Transforms a scalar brain.SequenceExample proto (as strings) into typed tensors.
 //
-// Returns A 4D Tensor for the gradient with respect to x.A 1D Tensor for the gradient with respect to scale.A 1D Tensor for the gradient with respect to offset.Unused placeholder to match the mean input in FusedBatchNorm.Unused placeholder to match the variance input
-// in FusedBatchNorm.
-func FusedBatchNormGrad(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, optional ...FusedBatchNormGradAttr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_3 tf.Output, reserve_space_4 tf.Output) {
+// Arguments:
+//	serialized: A scalar containing a binary serialized SequenceExample proto.
+//	feature_list_dense_missing_assumed_empty: A vector listing the
+// FeatureList keys which may be missing from the SequenceExample.  If the
+// associated FeatureList is missing, it is treated as empty.  By default,
+// any FeatureList not listed in this vector must exist in the SequenceExample.
+//	context_sparse_keys: A list of Ncontext_sparse string Tensors (scalars).
+// The keys expected in the Examples' features associated with context_sparse
+// values.
+//	context_dense_keys: A list of Ncontext_dense string Tensors (scalars).
+// The keys expected in the SequenceExamples' context features associated with
+// dense values.
+//	feature_list_sparse_keys: A list of Nfeature_list_sparse string Tensors
+// (scalars).  The keys expected in the FeatureLists associated with sparse
+// values.
+//	feature_list_dense_keys: A list of Nfeature_list_dense string Tensors (scalars).
+// The keys expected in the SequenceExamples' feature_lists associated
+// with lists of dense values.
+//	context_dense_defaults: A list of Ncontext_dense Tensors (some may be empty).
+// context_dense_defaults[j] provides default values
+// when the SequenceExample's context map lacks context_dense_key[j].
+// If an empty Tensor is provided for context_dense_defaults[j],
+// then the Feature context_dense_keys[j] is required.
+// The input type is inferred from context_dense_defaults[j], even when it's
+// empty.  If context_dense_defaults[j] is not empty, its shape must match
+// context_dense_shapes[j].
+//	debug_name: A scalar containing the name of the serialized proto.
+// May contain, for example, table key (descriptive) name for the
+// corresponding serialized proto.  This is purely useful for debugging
+// purposes, and the presence of values here has no effect on the output.
+// May also be an empty scalar if no name is available.
+func ParseSingleSequenceExample(scope *Scope, serialized tf.Output, feature_list_dense_missing_assumed_empty tf.Output, context_sparse_keys []tf.Output, context_dense_keys []tf.Output, feature_list_sparse_keys []tf.Output, feature_list_dense_keys []tf.Output, context_dense_defaults []tf.Output, debug_name tf.Output, optional ...ParseSingleSequenceExampleAttr) (context_sparse_indices []tf.Output, context_sparse_values []tf.Output, context_sparse_shapes []tf.Output, context_dense_values []tf.Output, feature_list_sparse_indices []tf.Output, feature_list_sparse_values []tf.Output, feature_list_sparse_shapes []tf.Output, feature_list_dense_values []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -14866,411 +14585,428 @@ func FusedBatchNormGrad(scope *Scope, y_backprop tf.Output, x tf.Output, scale t
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "FusedBatchNormGrad",
+		Type: "ParseSingleSequenceExample",
 		Input: []tf.Input{
-			y_backprop, x, scale, reserve_space_1, reserve_space_2,
+			serialized, feature_list_dense_missing_assumed_empty, tf.OutputList(context_sparse_keys), tf.OutputList(context_dense_keys), tf.OutputList(feature_list_sparse_keys), tf.OutputList(feature_list_dense_keys), tf.OutputList(context_dense_defaults), debug_name,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
-}
-
-// Computes rectified linear: `max(features, 0)`.
-func Relu(scope *Scope, features tf.Output) (activations tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	opspec := tf.OpSpec{
-		Type: "Relu",
-		Input: []tf.Input{
-			features,
-		},
+	var idx int
+	var err error
+	if context_sparse_indices, idx, err = makeOutputList(op, idx, "context_sparse_indices"); err != nil {
+		scope.UpdateErr("ParseSingleSequenceExample", err)
+		return
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// L2 Loss.
-//
-// Computes half the L2 norm of a tensor without the `sqrt`:
-//
-//     output = sum(t ** 2) / 2
-//
-// Arguments:
-//	t: Typically 2-D, but may have any dimensions.
-//
-// Returns 0-D.
-func L2Loss(scope *Scope, t tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
+	if context_sparse_values, idx, err = makeOutputList(op, idx, "context_sparse_values"); err != nil {
+		scope.UpdateErr("ParseSingleSequenceExample", err)
 		return
 	}
-	opspec := tf.OpSpec{
-		Type: "L2Loss",
-		Input: []tf.Input{
-			t,
-		},
+	if context_sparse_shapes, idx, err = makeOutputList(op, idx, "context_sparse_shapes"); err != nil {
+		scope.UpdateErr("ParseSingleSequenceExample", err)
+		return
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	if context_dense_values, idx, err = makeOutputList(op, idx, "context_dense_values"); err != nil {
+		scope.UpdateErr("ParseSingleSequenceExample", err)
+		return
+	}
+	if feature_list_sparse_indices, idx, err = makeOutputList(op, idx, "feature_list_sparse_indices"); err != nil {
+		scope.UpdateErr("ParseSingleSequenceExample", err)
+		return
+	}
+	if feature_list_sparse_values, idx, err = makeOutputList(op, idx, "feature_list_sparse_values"); err != nil {
+		scope.UpdateErr("ParseSingleSequenceExample", err)
+		return
+	}
+	if feature_list_sparse_shapes, idx, err = makeOutputList(op, idx, "feature_list_sparse_shapes"); err != nil {
+		scope.UpdateErr("ParseSingleSequenceExample", err)
+		return
+	}
+	if feature_list_dense_values, idx, err = makeOutputList(op, idx, "feature_list_dense_values"); err != nil {
+		scope.UpdateErr("ParseSingleSequenceExample", err)
+		return
+	}
+	return context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes, feature_list_dense_values
 }
 
-// ShapeAttr is an optional argument to Shape.
-type ShapeAttr func(optionalAttr)
+// QuantizedConv2DAttr is an optional argument to QuantizedConv2D.
+type QuantizedConv2DAttr func(optionalAttr)
 
-// ShapeOutType sets the optional out_type attribute to value.
-// If not specified, defaults to DT_INT32
-func ShapeOutType(value tf.DataType) ShapeAttr {
+// QuantizedConv2DOutType sets the optional out_type attribute to value.
+// If not specified, defaults to DT_QINT32
+func QuantizedConv2DOutType(value tf.DataType) QuantizedConv2DAttr {
 	return func(m optionalAttr) {
 		m["out_type"] = value
 	}
 }
 
-// Returns the shape of a tensor.
+// QuantizedConv2DDilations sets the optional dilations attribute to value.
 //
-// This operation returns a 1-D integer tensor representing the shape of `input`.
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each
+// filter element on that dimension. The dimension order is determined by the
+// value of `data_format`, see above for details. Dilations in the batch and
+// depth dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
+func QuantizedConv2DDilations(value []int64) QuantizedConv2DAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes a 2D convolution given quantized 4D input and filter tensors.
 //
-// For example:
+// The inputs are quantized tensors where the lowest value represents the real
+// number of the associated minimum, and the highest represents the maximum.
+// This means that you can only interpret the quantized output in the same way, by
+// taking the returned minimum and maximum values into account.
 //
-// ```
-// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]
-// shape(t) ==> [2, 2, 3]
-// ```
-func Shape(scope *Scope, input tf.Output, optional ...ShapeAttr) (output tf.Output) {
+// Arguments:
+//
+//	filter: filter's input_depth dimension must match input's depth dimensions.
+//	min_input: The float value that the lowest quantized input value represents.
+//	max_input: The float value that the highest quantized input value represents.
+//	min_filter: The float value that the lowest quantized filter value represents.
+//	max_filter: The float value that the highest quantized filter value represents.
+//	strides: The stride of the sliding window for each dimension of the input
+// tensor.
+//	padding: The type of padding algorithm to use.
+//
+// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
+func QuantizedConv2D(scope *Scope, input tf.Output, filter tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedConv2DAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Shape",
+		Type: "QuantizedConv2D",
 		Input: []tf.Input{
-			input,
+			input, filter, min_input, max_input, min_filter, max_filter,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Computes softmax cross entropy cost and gradients to backpropagate.
-//
-// Inputs are the logits, not probabilities.
-//
-// Arguments:
-//	features: batch_size x num_classes matrix
-//	labels: batch_size x num_classes matrix
-// The caller must ensure that each batch of labels represents a valid
-// probability distribution.
-//
-// Returns Per example loss (batch_size vector).backpropagated gradients (batch_size x num_classes matrix).
-func SoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.Output) (loss tf.Output, backprop tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SoftmaxCrossEntropyWithLogits",
-		Input: []tf.Input{
-			features, labels,
-		},
+// ResourceGatherAttr is an optional argument to ResourceGather.
+type ResourceGatherAttr func(optionalAttr)
+
+// ResourceGatherValidateIndices sets the optional validate_indices attribute to value.
+// If not specified, defaults to true
+func ResourceGatherValidateIndices(value bool) ResourceGatherAttr {
+	return func(m optionalAttr) {
+		m["validate_indices"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
 }
 
-// Returns x - y element-wise.
+// Gather slices from the variable pointed to by `resource` according to `indices`.
 //
-// *NOTE*: `Sub` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
+// Produces an output tensor with shape `indices.shape + params.shape[1:]` where:
+//
+// ```python
+//     # Scalar indices
+//     output[:, ..., :] = params[indices, :, ... :]
+//
+//     # Vector indices
+//     output[i, :, ..., :] = params[indices[i], :, ... :]
+//
+//     # Higher rank indices
+//     output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :]
+// ```
+func ResourceGather(scope *Scope, resource tf.Output, indices tf.Output, dtype tf.DataType, optional ...ResourceGatherAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Sub",
+		Type: "ResourceGather",
 		Input: []tf.Input{
-			x, y,
+			resource, indices,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Get the value of the tensor specified by its handle.
+// Delete the TensorArray from its resource container.
+//
+// This enables the user to close and release the resource in the middle
+// of a step/run.
 //
 // Arguments:
-//	handle: The handle for a tensor stored in the session state.
-//	dtype: The type of the output value.
+//	handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad).
 //
-// Returns The tensor for the given handle.
-func GetSessionTensor(scope *Scope, handle tf.Output, dtype tf.DataType) (value tf.Output) {
+// Returns the created operation.
+func TensorArrayCloseV3(scope *Scope, handle tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype}
 	opspec := tf.OpSpec{
-		Type: "GetSessionTensor",
+		Type: "TensorArrayCloseV3",
 		Input: []tf.Input{
 			handle,
 		},
-		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// ResourceApplyProximalGradientDescentAttr is an optional argument to ResourceApplyProximalGradientDescent.
-type ResourceApplyProximalGradientDescentAttr func(optionalAttr)
+// MaxPoolGradGradAttr is an optional argument to MaxPoolGradGrad.
+type MaxPoolGradGradAttr func(optionalAttr)
 
-// ResourceApplyProximalGradientDescentUseLocking sets the optional use_locking attribute to value.
+// MaxPoolGradGradDataFormat sets the optional data_format attribute to value.
 //
-// value: If True, the subtraction will be protected by a lock;
-// otherwise the behavior is undefined, but may exhibit less contention.
-// If not specified, defaults to false
-func ResourceApplyProximalGradientDescentUseLocking(value bool) ResourceApplyProximalGradientDescentAttr {
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func MaxPoolGradGradDataFormat(value string) MaxPoolGradGradAttr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["data_format"] = value
 	}
 }
 
-// Update '*var' as FOBOS algorithm with fixed learning rate.
-//
-// prox_v = var - alpha * delta
-// var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
+// Computes second-order gradients of the maxpooling function.
 //
 // Arguments:
-//	var_: Should be from a Variable().
-//	alpha: Scaling factor. Must be a scalar.
-//	l1: L1 regularization. Must be a scalar.
-//	l2: L2 regularization. Must be a scalar.
-//	delta: The change.
+//	orig_input: The original input tensor.
+//	orig_output: The original output tensor.
+//	grad: 4-D.  Gradients of gradients w.r.t. the input of `max_pool`.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
+//	padding: The type of padding algorithm to use.
 //
-// Returns the created operation.
-func ResourceApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, l1 tf.Output, l2 tf.Output, delta tf.Output, optional ...ResourceApplyProximalGradientDescentAttr) (o *tf.Operation) {
+// Returns Gradients of gradients w.r.t. the input to `max_pool`.
+func MaxPoolGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradGradAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyProximalGradientDescent",
+		Type: "MaxPoolGradGrad",
 		Input: []tf.Input{
-			var_, alpha, l1, l2, delta,
+			orig_input, orig_output, grad,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// 2D fast Fourier transform.
-//
-// Computes the 2-dimensional discrete Fourier transform over the inner-most
-// 2 dimensions of `input`.
-//
-// Arguments:
-//	input: A complex64 tensor.
-//
-// Returns A complex64 tensor of the same shape as `input`. The inner-most 2
-//   dimensions of `input` are replaced with their 2D Fourier transform.
+// RandomUniformIntAttr is an optional argument to RandomUniformInt.
+type RandomUniformIntAttr func(optionalAttr)
+
+// RandomUniformIntSeed sets the optional seed attribute to value.
 //
-// @compatibility(numpy)
-// Equivalent to np.fft.fft2
-// @end_compatibility
-func FFT2D(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "FFT2D",
-		Input: []tf.Input{
-			input,
-		},
+// value: If either `seed` or `seed2` are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func RandomUniformIntSeed(value int64) RandomUniformIntAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Creates a tensor filled with a scalar value.
-//
-// This operation creates a tensor of shape `dims` and fills it with `value`.
-//
-// For example:
-//
-// ```
-// # Output tensor has shape [2, 3].
-// fill([2, 3], 9) ==> [[9, 9, 9]
-//                      [9, 9, 9]]
-// ```
-//
-// Arguments:
-//	dims: 1-D. Represents the shape of the output tensor.
-//	value: 0-D (scalar). Value to fill the returned tensor.
+// RandomUniformIntSeed2 sets the optional seed2 attribute to value.
 //
-// @compatibility(numpy)
-// Equivalent to np.full
-// @end_compatibility
-func Fill(scope *Scope, dims tf.Output, value tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Fill",
-		Input: []tf.Input{
-			dims, value,
-		},
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func RandomUniformIntSeed2(value int64) RandomUniformIntAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Inverse 2D fast Fourier transform.
+// Outputs random integers from a uniform distribution.
 //
-// Computes the inverse 2-dimensional discrete Fourier transform over the
-// inner-most 2 dimensions of `input`.
+// The generated values are uniform integers in the range `[minval, maxval)`.
+// The lower bound `minval` is included in the range, while the upper bound
+// `maxval` is excluded.
 //
-// Arguments:
-//	input: A complex64 tensor.
+// The random integers are slightly biased unless `maxval - minval` is an exact
+// power of two.  The bias is small for values of `maxval - minval` significantly
+// smaller than the range of the output (either `2^32` or `2^64`).
 //
-// Returns A complex64 tensor of the same shape as `input`. The inner-most 2
-//   dimensions of `input` are replaced with their inverse 2D Fourier transform.
+// Arguments:
+//	shape: The shape of the output tensor.
+//	minval: 0-D.  Inclusive lower bound on the generated integers.
+//	maxval: 0-D.  Exclusive upper bound on the generated integers.
 //
-// @compatibility(numpy)
-// Equivalent to np.fft.ifft2
-// @end_compatibility
-func IFFT2D(scope *Scope, input tf.Output) (output tf.Output) {
+// Returns A tensor of the specified shape filled with uniform random integers.
+func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf.Output, optional ...RandomUniformIntAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "IFFT2D",
+		Type: "RandomUniformInt",
 		Input: []tf.Input{
-			input,
+			shape, minval, maxval,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// TensorArrayV3Attr is an optional argument to TensorArrayV3.
-type TensorArrayV3Attr func(optionalAttr)
+// SkipgramAttr is an optional argument to Skipgram.
+type SkipgramAttr func(optionalAttr)
 
-// TensorArrayV3ElementShape sets the optional element_shape attribute to value.
+// SkipgramWindowSize sets the optional window_size attribute to value.
 //
-// value: The expected shape of an element, if known. Used to
-// validate the shapes of TensorArray elements. If this shape is not
-// fully specified, gathering zero-size TensorArrays is an error.
-// If not specified, defaults to <unknown_rank:true >
-func TensorArrayV3ElementShape(value tf.Shape) TensorArrayV3Attr {
+// value: The number of words to predict to the left and right of the target.
+// If not specified, defaults to 5
+func SkipgramWindowSize(value int64) SkipgramAttr {
 	return func(m optionalAttr) {
-		m["element_shape"] = value
+		m["window_size"] = value
 	}
 }
 
-// TensorArrayV3DynamicSize sets the optional dynamic_size attribute to value.
+// SkipgramMinCount sets the optional min_count attribute to value.
 //
-// value: A boolean that determines whether writes to the TensorArray
-// are allowed to grow the size.  By default, this is not allowed.
-// If not specified, defaults to false
-func TensorArrayV3DynamicSize(value bool) TensorArrayV3Attr {
+// value: The minimum number of word occurrences for it to be included in the
+// vocabulary.
+// If not specified, defaults to 5
+func SkipgramMinCount(value int64) SkipgramAttr {
 	return func(m optionalAttr) {
-		m["dynamic_size"] = value
+		m["min_count"] = value
 	}
 }
 
-// TensorArrayV3ClearAfterRead sets the optional clear_after_read attribute to value.
+// SkipgramSubsample sets the optional subsample attribute to value.
 //
-// value: If true (default), Tensors in the TensorArray are cleared
-// after being read.  This disables multiple read semantics but allows early
-// release of memory.
-// If not specified, defaults to true
-func TensorArrayV3ClearAfterRead(value bool) TensorArrayV3Attr {
+// value: Threshold for word occurrence. Words that appear with higher
+// frequency will be randomly down-sampled. Set to 0 to disable.
+// If not specified, defaults to 0.001
+func SkipgramSubsample(value float32) SkipgramAttr {
 	return func(m optionalAttr) {
-		m["clear_after_read"] = value
+		m["subsample"] = value
 	}
 }
 
-// TensorArrayV3IdenticalElementShapes sets the optional identical_element_shapes attribute to value.
+// Parses a text file and creates a batch of examples.
 //
-// value: If true (default is false), then all
-// elements in the TensorArray will be expected to have have identical shapes.
-// This allows certain behaviors, like dynamically checking for
-// consistent shapes on write, and being able to fill in properly
-// shaped zero tensors on stack -- even if the element_shape attribute
-// is not fully defined.
-// If not specified, defaults to false
-func TensorArrayV3IdenticalElementShapes(value bool) TensorArrayV3Attr {
-	return func(m optionalAttr) {
-		m["identical_element_shapes"] = value
+// DEPRECATED at GraphDef version 19: Moving word2vec into tensorflow_models/tutorials and deprecating its ops here as a result
+//
+// Arguments:
+//	filename: The corpus's text file name.
+//	batch_size: The size of produced batch.
+//
+// Returns A vector of words in the corpus.Frequencies of words. Sorted in the non-ascending order.Number of words per epoch in the data file.The current epoch number.The total number of words processed so far.A vector of word ids.A vector of word ids.
+func Skipgram(scope *Scope, filename string, batch_size int64, optional ...SkipgramAttr) (vocab_word tf.Output, vocab_freq tf.Output, words_per_epoch tf.Output, current_epoch tf.Output, total_words_processed tf.Output, examples tf.Output, labels tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"filename": filename, "batch_size": batch_size}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Skipgram",
+
+		Attrs: attrs,
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6)
 }
 
-// TensorArrayV3TensorArrayName sets the optional tensor_array_name attribute to value.
+// StringToNumberAttr is an optional argument to StringToNumber.
+type StringToNumberAttr func(optionalAttr)
+
+// StringToNumberOutType sets the optional out_type attribute to value.
 //
-// value: Overrides the name used for the temporary tensor_array
-// resource. Default value is the name of the 'TensorArray' op (which
-// is guaranteed unique).
-// If not specified, defaults to ""
-func TensorArrayV3TensorArrayName(value string) TensorArrayV3Attr {
+// value: The numeric type to interpret each string in `string_tensor` as.
+// If not specified, defaults to DT_FLOAT
+func StringToNumberOutType(value tf.DataType) StringToNumberAttr {
 	return func(m optionalAttr) {
-		m["tensor_array_name"] = value
+		m["out_type"] = value
 	}
 }
 
-// An array of Tensors of given size.
-//
-// Write data via Write and read via Read or Pack.
+// Converts each string in the input Tensor to the specified numeric type.
 //
-// Arguments:
-//	size: The size of the array.
-//	dtype: The type of the elements on the tensor_array.
+// (Note that int32 overflow results in an error while float overflow
+// results in a rounded value.)
 //
-// Returns The handle to the TensorArray.A scalar used to control gradient flow.
-func TensorArrayV3(scope *Scope, size tf.Output, dtype tf.DataType, optional ...TensorArrayV3Attr) (handle tf.Output, flow tf.Output) {
+// Returns A Tensor of the same shape as the input `string_tensor`.
+func StringToNumber(scope *Scope, string_tensor tf.Output, optional ...StringToNumberAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "TensorArrayV3",
+		Type: "StringToNumber",
 		Input: []tf.Input{
-			size,
+			string_tensor,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
-// ResourceApplyGradientDescentAttr is an optional argument to ResourceApplyGradientDescent.
-type ResourceApplyGradientDescentAttr func(optionalAttr)
+// ResourceApplyFtrlV2Attr is an optional argument to ResourceApplyFtrlV2.
+type ResourceApplyFtrlV2Attr func(optionalAttr)
 
-// ResourceApplyGradientDescentUseLocking sets the optional use_locking attribute to value.
+// ResourceApplyFtrlV2UseLocking sets the optional use_locking attribute to value.
 //
-// value: If `True`, the subtraction will be protected by a lock;
-// otherwise the behavior is undefined, but may exhibit less contention.
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
 // If not specified, defaults to false
-func ResourceApplyGradientDescentUseLocking(value bool) ResourceApplyGradientDescentAttr {
+func ResourceApplyFtrlV2UseLocking(value bool) ResourceApplyFtrlV2Attr {
 	return func(m optionalAttr) {
 		m["use_locking"] = value
 	}
 }
 
-// Update '*var' by subtracting 'alpha' * 'delta' from it.
+// Update '*var' according to the Ftrl-proximal scheme.
+//
+// grad_with_shrinkage = grad + 2 * l2_shrinkage * var
+// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
+// linear += grad_with_shrinkage +
+//     (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
+// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
+// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
+// accum = accum_new
 //
 // Arguments:
 //	var_: Should be from a Variable().
-//	alpha: Scaling factor. Must be a scalar.
-//	delta: The change.
+//	accum: Should be from a Variable().
+//	linear: Should be from a Variable().
+//	grad: The gradient.
+//	lr: Scaling factor. Must be a scalar.
+//	l1: L1 regulariation. Must be a scalar.
+//	l2: L2 shrinkage regulariation. Must be a scalar.
+//
+//	lr_power: Scaling factor. Must be a scalar.
 //
 // Returns the created operation.
-func ResourceApplyGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, delta tf.Output, optional ...ResourceApplyGradientDescentAttr) (o *tf.Operation) {
+func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlV2Attr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -15279,60 +15015,64 @@ func ResourceApplyGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output,
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyGradientDescent",
+		Type: "ResourceApplyFtrlV2",
 		Input: []tf.Input{
-			var_, alpha, delta,
+			var_, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power,
 		},
 		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
-// MultinomialAttr is an optional argument to Multinomial.
-type MultinomialAttr func(optionalAttr)
+// TruncatedNormalAttr is an optional argument to TruncatedNormal.
+type TruncatedNormalAttr func(optionalAttr)
 
-// MultinomialSeed sets the optional seed attribute to value.
+// TruncatedNormalSeed sets the optional seed attribute to value.
 //
-// value: If either seed or seed2 is set to be non-zero, the internal random number
-// generator is seeded by the given seed.  Otherwise, a random seed is used.
+// value: If either `seed` or `seed2` are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
 // If not specified, defaults to 0
-func MultinomialSeed(value int64) MultinomialAttr {
+func TruncatedNormalSeed(value int64) TruncatedNormalAttr {
 	return func(m optionalAttr) {
 		m["seed"] = value
 	}
 }
 
-// MultinomialSeed2 sets the optional seed2 attribute to value.
+// TruncatedNormalSeed2 sets the optional seed2 attribute to value.
 //
 // value: A second seed to avoid seed collision.
 // If not specified, defaults to 0
-func MultinomialSeed2(value int64) MultinomialAttr {
+func TruncatedNormalSeed2(value int64) TruncatedNormalAttr {
 	return func(m optionalAttr) {
 		m["seed2"] = value
 	}
 }
 
-// Draws samples from a multinomial distribution.
+// Outputs random values from a truncated normal distribution.
+//
+// The generated values follow a normal distribution with mean 0 and standard
+// deviation 1, except that values whose magnitude is more than 2 standard
+// deviations from the mean are dropped and re-picked.
 //
 // Arguments:
-//	logits: 2-D Tensor with shape `[batch_size, num_classes]`.  Each slice `[i, :]`
-// represents the unnormalized log probabilities for all classes.
-//	num_samples: 0-D.  Number of independent samples to draw for each row slice.
+//	shape: The shape of the output tensor.
+//	dtype: The type of the output.
 //
-// Returns 2-D Tensor with shape `[batch_size, num_samples]`.  Each slice `[i, :]`
-// contains the drawn class labels with range `[0, num_classes)`.
-func Multinomial(scope *Scope, logits tf.Output, num_samples tf.Output, optional ...MultinomialAttr) (output tf.Output) {
+// Returns A tensor of the specified shape filled with random truncated normal
+// values.
+func TruncatedNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...TruncatedNormalAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"dtype": dtype}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Multinomial",
+		Type: "TruncatedNormal",
 		Input: []tf.Input{
-			logits, num_samples,
+			shape,
 		},
 		Attrs: attrs,
 	}
@@ -15340,35 +15080,39 @@ func Multinomial(scope *Scope, logits tf.Output, num_samples tf.Output, optional
 	return op.Output(0)
 }
 
-// ResourceSparseApplyAdagradDAAttr is an optional argument to ResourceSparseApplyAdagradDA.
-type ResourceSparseApplyAdagradDAAttr func(optionalAttr)
+// FakeQuantWithMinMaxVarsPerChannelAttr is an optional argument to FakeQuantWithMinMaxVarsPerChannel.
+type FakeQuantWithMinMaxVarsPerChannelAttr func(optionalAttr)
 
-// ResourceSparseApplyAdagradDAUseLocking sets the optional use_locking attribute to value.
-//
-// value: If True, updating of the var and accum tensors will be protected by
-// a lock; otherwise the behavior is undefined, but may exhibit less contention.
+// FakeQuantWithMinMaxVarsPerChannelNumBits sets the optional num_bits attribute to value.
+// If not specified, defaults to 8
+func FakeQuantWithMinMaxVarsPerChannelNumBits(value int64) FakeQuantWithMinMaxVarsPerChannelAttr {
+	return func(m optionalAttr) {
+		m["num_bits"] = value
+	}
+}
+
+// FakeQuantWithMinMaxVarsPerChannelNarrowRange sets the optional narrow_range attribute to value.
 // If not specified, defaults to false
-func ResourceSparseApplyAdagradDAUseLocking(value bool) ResourceSparseApplyAdagradDAAttr {
+func FakeQuantWithMinMaxVarsPerChannelNarrowRange(value bool) FakeQuantWithMinMaxVarsPerChannelAttr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["narrow_range"] = value
 	}
 }
 
-// Update entries in '*var' and '*accum' according to the proximal adagrad scheme.
+// Fake-quantize the 'inputs' tensor of type float and one of the shapes: `[d]`,
 //
-// Arguments:
-//	var_: Should be from a Variable().
-//	gradient_accumulator: Should be from a Variable().
-//	gradient_squared_accumulator: Should be from a Variable().
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
-//	lr: Learning rate. Must be a scalar.
-//	l1: L1 regularization. Must be a scalar.
-//	l2: L2 regularization. Must be a scalar.
-//	global_step: Training step number. Must be a scalar.
+// `[b, d]` `[b, h, w, d]` via per-channel floats `min` and `max` of shape `[d]`
+// to 'outputs' tensor of same shape as `inputs`.
 //
-// Returns the created operation.
-func ResourceSparseApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator tf.Output, gradient_squared_accumulator tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, global_step tf.Output, optional ...ResourceSparseApplyAdagradDAAttr) (o *tf.Operation) {
+// `[min; max]` define the clamping range for the `inputs` data.
+// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`
+// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and
+// then de-quantized and output as floats in `[min; max]` interval.
+// `num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive.
+//
+// This operation has a gradient and thus allows for training `min` and `max`
+// values.
+func FakeQuantWithMinMaxVarsPerChannel(scope *Scope, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsPerChannelAttr) (outputs tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -15377,97 +15121,170 @@ func ResourceSparseApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumul
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyAdagradDA",
+		Type: "FakeQuantWithMinMaxVarsPerChannel",
 		Input: []tf.Input{
-			var_, gradient_accumulator, gradient_squared_accumulator, grad, indices, lr, l1, l2, global_step,
+			inputs, min, max,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Computes softmax cross entropy cost and gradients to backpropagate.
+// RandomShuffleAttr is an optional argument to RandomShuffle.
+type RandomShuffleAttr func(optionalAttr)
+
+// RandomShuffleSeed sets the optional seed attribute to value.
 //
-// Unlike `SoftmaxCrossEntropyWithLogits`, this operation does not accept
-// a matrix of label probabilities, but rather a single label per row
-// of features.  This label is considered to have probability 1.0 for the
-// given row.
+// value: If either `seed` or `seed2` are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func RandomShuffleSeed(value int64) RandomShuffleAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// RandomShuffleSeed2 sets the optional seed2 attribute to value.
 //
-// Inputs are the logits, not probabilities.
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func RandomShuffleSeed2(value int64) RandomShuffleAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Randomly shuffles a tensor along its first dimension.
+//
+//   The tensor is shuffled along dimension 0, such that each `value[j]` is mapped
+//   to one and only one `output[i]`. For example, a mapping that might occur for a
+//   3x2 tensor is:
+//
+// ```
+// [[1, 2],       [[5, 6],
+//  [3, 4],  ==>   [1, 2],
+//  [5, 6]]        [3, 4]]
+// ```
 //
 // Arguments:
-//	features: batch_size x num_classes matrix
-//	labels: batch_size vector with values in [0, num_classes).
-// This is the label for the given minibatch entry.
+//	value: The tensor to be shuffled.
 //
-// Returns Per example loss (batch_size vector).backpropagated gradients (batch_size x num_classes matrix).
-func SparseSoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.Output) (loss tf.Output, backprop tf.Output) {
+// Returns A tensor of same shape and type as `value`, shuffled along its first
+// dimension.
+func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "SparseSoftmaxCrossEntropyWithLogits",
+		Type: "RandomShuffle",
 		Input: []tf.Input{
-			features, labels,
+			value,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
-// TensorSummaryAttr is an optional argument to TensorSummary.
-type TensorSummaryAttr func(optionalAttr)
+// OrderedMapIncompleteSizeAttr is an optional argument to OrderedMapIncompleteSize.
+type OrderedMapIncompleteSizeAttr func(optionalAttr)
 
-// TensorSummaryDescription sets the optional description attribute to value.
+// OrderedMapIncompleteSizeCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
 //
-// value: A json-encoded SummaryDescription proto.
+// REQUIRES: value >= 0
+func OrderedMapIncompleteSizeCapacity(value int64) OrderedMapIncompleteSizeAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// OrderedMapIncompleteSizeMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func OrderedMapIncompleteSizeMemoryLimit(value int64) OrderedMapIncompleteSizeAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// OrderedMapIncompleteSizeContainer sets the optional container attribute to value.
 // If not specified, defaults to ""
-func TensorSummaryDescription(value string) TensorSummaryAttr {
+func OrderedMapIncompleteSizeContainer(value string) OrderedMapIncompleteSizeAttr {
 	return func(m optionalAttr) {
-		m["description"] = value
+		m["container"] = value
+	}
+}
+
+// OrderedMapIncompleteSizeSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func OrderedMapIncompleteSizeSharedName(value string) OrderedMapIncompleteSizeAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
 	}
 }
 
-// TensorSummaryLabels sets the optional labels attribute to value.
-//
-// value: An unused list of strings.
-// If not specified, defaults to <>
-func TensorSummaryLabels(value []string) TensorSummaryAttr {
-	return func(m optionalAttr) {
-		m["labels"] = value
+// Op returns the number of incomplete elements in the underlying container.
+func OrderedMapIncompleteSize(scope *Scope, dtypes []tf.DataType, optional ...OrderedMapIncompleteSizeAttr) (size tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "OrderedMapIncompleteSize",
+
+		Attrs: attrs,
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// TensorSummaryDisplayName sets the optional display_name attribute to value.
+// DecodeRawAttr is an optional argument to DecodeRaw.
+type DecodeRawAttr func(optionalAttr)
+
+// DecodeRawLittleEndian sets the optional little_endian attribute to value.
 //
-// value: An unused string.
-// If not specified, defaults to ""
-func TensorSummaryDisplayName(value string) TensorSummaryAttr {
+// value: Whether the input `bytes` are in little-endian order.
+// Ignored for `out_type` values that are stored in a single byte like
+// `uint8`.
+// If not specified, defaults to true
+func DecodeRawLittleEndian(value bool) DecodeRawAttr {
 	return func(m optionalAttr) {
-		m["display_name"] = value
+		m["little_endian"] = value
 	}
 }
 
-// Outputs a `Summary` protocol buffer with a tensor.
-//
-// This op is being phased out in favor of TensorSummaryV2, which lets callers pass
-// a tag as well as a serialized SummaryMetadata proto string that contains
-// plugin-specific data. We will keep this op to maintain backwards compatibility.
+// Reinterpret the bytes of a string as a vector of numbers.
 //
 // Arguments:
-//	tensor: A tensor to serialize.
-func TensorSummary(scope *Scope, tensor tf.Output, optional ...TensorSummaryAttr) (summary tf.Output) {
+//	bytes: All the elements must have the same length.
+//
+//
+// Returns A Tensor with one more dimension than the input `bytes`.  The
+// added dimension will have size equal to the length of the elements
+// of `bytes` divided by the number of bytes to represent `out_type`.
+func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ...DecodeRawAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"out_type": out_type}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "TensorSummary",
+		Type: "DecodeRaw",
 		Input: []tf.Input{
-			tensor,
+			bytes,
 		},
 		Attrs: attrs,
 	}
@@ -15475,288 +15292,316 @@ func TensorSummary(scope *Scope, tensor tf.Output, optional ...TensorSummaryAttr
 	return op.Output(0)
 }
 
-// Gradient op for `MirrorPad` op. This op folds a mirror-padded tensor.
+// Copy a tensor setting everything outside a central band in each innermost matrix
 //
-// This operation folds the padded areas of `input` by `MirrorPad` according to the
-// `paddings` you specify. `paddings` must be the same as `paddings` argument
-// given to the corresponding `MirrorPad` op.
+// to zero.
 //
-// The folded size of each dimension D of the output is:
+// The `band` part is computed as follows:
+// Assume `input` has `k` dimensions `[I, J, K, ..., M, N]`, then the output is a
+// tensor with the same shape where
 //
-// `input.dim_size(D) - paddings(D, 0) - paddings(D, 1)`
+// `band[i, j, k, ..., m, n] = in_band(m, n) * input[i, j, k, ..., m, n]`.
+//
+// The indicator function
+//
+// `in_band(m, n) = (num_lower < 0 || (m-n) <= num_lower)) &&
+//                  (num_upper < 0 || (n-m) <= num_upper)`.
 //
 // For example:
 //
 // ```
-// # 't' is [[1, 2, 3], [4, 5, 6], [7, 8, 9]].
-// # 'paddings' is [[0, 1]], [0, 1]].
-// # 'mode' is SYMMETRIC.
-// # rank of 't' is 2.
-// pad(t, paddings) ==> [[ 1,  5]
-//                       [11, 28]]
+// # if 'input' is [[ 0,  1,  2, 3]
+//                  [-1,  0,  1, 2]
+//                  [-2, -1,  0, 1]
+//                  [-3, -2, -1, 0]],
+//
+// tf.matrix_band_part(input, 1, -1) ==> [[ 0,  1,  2, 3]
+//                                        [-1,  0,  1, 2]
+//                                        [ 0, -1,  0, 1]
+//                                        [ 0,  0, -1, 0]],
+//
+// tf.matrix_band_part(input, 2, 1) ==> [[ 0,  1,  0, 0]
+//                                       [-1,  0,  1, 0]
+//                                       [-2, -1,  0, 1]
+//                                       [ 0, -2, -1, 0]]
+// ```
+//
+// Useful special cases:
+//
+// ```
+//  tf.matrix_band_part(input, 0, -1) ==> Upper triangular part.
+//  tf.matrix_band_part(input, -1, 0) ==> Lower triangular part.
+//  tf.matrix_band_part(input, 0, 0) ==> Diagonal.
 // ```
 //
 // Arguments:
-//	input: The input tensor to be folded.
-//	paddings: A two-column matrix specifying the padding sizes. The number of
-// rows must be the same as the rank of `input`.
-//	mode: The mode used in the `MirrorPad` op.
+//	input: Rank `k` tensor.
+//	num_lower: 0-D tensor. Number of subdiagonals to keep. If negative, keep entire
+// lower triangle.
+//	num_upper: 0-D tensor. Number of superdiagonals to keep. If negative, keep
+// entire upper triangle.
 //
-// Returns The folded tensor.
-func MirrorPadGrad(scope *Scope, input tf.Output, paddings tf.Output, mode string) (output tf.Output) {
+// Returns Rank `k` tensor of the same shape as input. The extracted banded tensor.
+func MatrixBandPart(scope *Scope, input tf.Output, num_lower tf.Output, num_upper tf.Output) (band tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"mode": mode}
 	opspec := tf.OpSpec{
-		Type: "MirrorPadGrad",
+		Type: "MatrixBandPart",
 		Input: []tf.Input{
-			input, paddings,
+			input, num_lower, num_upper,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes the inverse permutation of a tensor.
-//
-// This operation computes the inverse of an index permutation. It takes a 1-D
-// integer tensor `x`, which represents the indices of a zero-based array, and
-// swaps each value with its index position. In other words, for an output tensor
-// `y` and an input tensor `x`, this operation computes the following:
-//
-// `y[x[i]] = i for i in [0, 1, ..., len(x) - 1]`
+// DecodeCompressedAttr is an optional argument to DecodeCompressed.
+type DecodeCompressedAttr func(optionalAttr)
+
+// DecodeCompressedCompressionType sets the optional compression_type attribute to value.
 //
-// The values must include 0. There can be no duplicate values or negative values.
+// value: A scalar containing either (i) the empty string (no
+// compression), (ii) "ZLIB", or (iii) "GZIP".
+// If not specified, defaults to ""
+func DecodeCompressedCompressionType(value string) DecodeCompressedAttr {
+	return func(m optionalAttr) {
+		m["compression_type"] = value
+	}
+}
+
+// Decompress strings.
 //
-// For example:
+// This op decompresses each element of the `bytes` input `Tensor`, which
+// is assumed to be compressed using the given `compression_type`.
 //
-// ```
-// # tensor `x` is [3, 4, 0, 2, 1]
-// invert_permutation(x) ==> [2, 4, 3, 0, 1]
-// ```
+// The `output` is a string `Tensor` of the same shape as `bytes`,
+// each element containing the decompressed data from the corresponding
+// element in `bytes`.
 //
 // Arguments:
-//	x: 1-D.
+//	bytes: A Tensor of string which is compressed.
 //
-// Returns 1-D.
-func InvertPermutation(scope *Scope, x tf.Output) (y tf.Output) {
+// Returns A Tensor with the same shape as input `bytes`, uncompressed
+// from bytes.
+func DecodeCompressed(scope *Scope, bytes tf.Output, optional ...DecodeCompressedAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "InvertPermutation",
+		Type: "DecodeCompressed",
 		Input: []tf.Input{
-			x,
+			bytes,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Reverses specific dimensions of a tensor.
-//
-// Given a `tensor`, and a `bool` tensor `dims` representing the dimensions
-// of `tensor`, this operation reverses each dimension i of `tensor` where
-// `dims[i]` is `True`.
-//
-// `tensor` can have up to 8 dimensions. The number of dimensions
-// of `tensor` must equal the number of elements in `dims`. In other words:
-//
-// `rank(tensor) = size(dims)`
-//
-// For example:
+// WholeFileReaderV2Attr is an optional argument to WholeFileReaderV2.
+type WholeFileReaderV2Attr func(optionalAttr)
+
+// WholeFileReaderV2Container sets the optional container attribute to value.
 //
-// ```
-// # tensor 't' is [[[[ 0,  1,  2,  3],
-// #                  [ 4,  5,  6,  7],
-// #                  [ 8,  9, 10, 11]],
-// #                 [[12, 13, 14, 15],
-// #                  [16, 17, 18, 19],
-// #                  [20, 21, 22, 23]]]]
-// # tensor 't' shape is [1, 2, 3, 4]
+// value: If non-empty, this reader is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func WholeFileReaderV2Container(value string) WholeFileReaderV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// WholeFileReaderV2SharedName sets the optional shared_name attribute to value.
 //
-// # 'dims' is [False, False, False, True]
-// reverse(t, dims) ==> [[[[ 3,  2,  1,  0],
-//                         [ 7,  6,  5,  4],
-//                         [ 11, 10, 9, 8]],
-//                        [[15, 14, 13, 12],
-//                         [19, 18, 17, 16],
-//                         [23, 22, 21, 20]]]]
+// value: If non-empty, this reader is named in the given bucket
+// with this shared_name. Otherwise, the node name is used instead.
+// If not specified, defaults to ""
+func WholeFileReaderV2SharedName(value string) WholeFileReaderV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// A Reader that outputs the entire contents of a file as a value.
 //
-// # 'dims' is [False, True, False, False]
-// reverse(t, dims) ==> [[[[12, 13, 14, 15],
-//                         [16, 17, 18, 19],
-//                         [20, 21, 22, 23]
-//                        [[ 0,  1,  2,  3],
-//                         [ 4,  5,  6,  7],
-//                         [ 8,  9, 10, 11]]]]
+// To use, enqueue filenames in a Queue.  The output of ReaderRead will
+// be a filename (key) and the contents of that file (value).
 //
-// # 'dims' is [False, False, True, False]
-// reverse(t, dims) ==> [[[[8, 9, 10, 11],
-//                         [4, 5, 6, 7],
-//                         [0, 1, 2, 3]]
-//                        [[20, 21, 22, 23],
-//                         [16, 17, 18, 19],
-//                         [12, 13, 14, 15]]]]
-// ```
+// Returns The handle to reference the Reader.
+func WholeFileReaderV2(scope *Scope, optional ...WholeFileReaderV2Attr) (reader_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "WholeFileReaderV2",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Transforms a tf.Example proto (as a string) into typed tensors.
 //
 // Arguments:
-//	tensor: Up to 8-D.
-//	dims: 1-D. The dimensions to reverse.
-//
-// Returns The same shape as `tensor`.
-func Reverse(scope *Scope, tensor tf.Output, dims tf.Output) (output tf.Output) {
+//	serialized: A vector containing a batch of binary serialized Example protos.
+//	dense_defaults: A list of Tensors (some may be empty), whose length matches
+// the length of `dense_keys`. dense_defaults[j] provides default values
+// when the example's feature_map lacks dense_key[j].  If an empty Tensor is
+// provided for dense_defaults[j], then the Feature dense_keys[j] is required.
+// The input type is inferred from dense_defaults[j], even when it's empty.
+// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined,
+// then the shape of dense_defaults[j] must match that of dense_shapes[j].
+// If dense_shapes[j] has an undefined major dimension (variable strides dense
+// feature), dense_defaults[j] must contain a single element:
+// the padding element.
+//	num_sparse: The number of sparse features to be parsed from the example. This
+// must match the lengths of `sparse_keys` and `sparse_types`.
+//	sparse_keys: A list of `num_sparse` strings.
+// The keys expected in the Examples' features associated with sparse values.
+//	dense_keys: The keys expected in the Examples' features associated with dense
+// values.
+//	sparse_types: A list of `num_sparse` types; the data types of data in each
+// Feature given in sparse_keys.
+// Currently the ParseSingleExample op supports DT_FLOAT (FloatList),
+// DT_INT64 (Int64List), and DT_STRING (BytesList).
+//	dense_shapes: The shapes of data in each Feature given in dense_keys.
+// The length of this list must match the length of `dense_keys`.  The
+// number of elements in the Feature corresponding to dense_key[j] must
+// always equal dense_shapes[j].NumEntries().  If dense_shapes[j] ==
+// (D0, D1, ..., DN) then the shape of output Tensor dense_values[j]
+// will be (D0, D1, ..., DN): In the case dense_shapes[j] = (-1, D1,
+// ..., DN), the shape of the output Tensor dense_values[j] will be (M,
+// D1, .., DN), where M is the number of blocks of elements of length
+// D1 * .... * DN, in the input.
+func ParseSingleExample(scope *Scope, serialized tf.Output, dense_defaults []tf.Output, num_sparse int64, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"num_sparse": num_sparse, "sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes}
 	opspec := tf.OpSpec{
-		Type: "Reverse",
+		Type: "ParseSingleExample",
 		Input: []tf.Input{
-			tensor, dims,
+			serialized, tf.OutputList(dense_defaults),
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil {
+		scope.UpdateErr("ParseSingleExample", err)
+		return
+	}
+	if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil {
+		scope.UpdateErr("ParseSingleExample", err)
+		return
+	}
+	if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil {
+		scope.UpdateErr("ParseSingleExample", err)
+		return
+	}
+	if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil {
+		scope.UpdateErr("ParseSingleExample", err)
+		return
+	}
+	return sparse_indices, sparse_values, sparse_shapes, dense_values
 }
 
-// Fills empty rows in the input 2-D `SparseTensor` with a default value.
-//
-// The input `SparseTensor` is represented via the tuple of inputs
-// (`indices`, `values`, `dense_shape`).  The output `SparseTensor` has the
-// same `dense_shape` but with indices `output_indices` and values
-// `output_values`.
-//
-// This op inserts a single entry for every row that doesn't have any values.
-// The index is created as `[row, 0, ..., 0]` and the inserted value
-// is `default_value`.
-//
-// For example, suppose `sp_input` has shape `[5, 6]` and non-empty values:
-//
-//     [0, 1]: a
-//     [0, 3]: b
-//     [2, 0]: c
-//     [3, 1]: d
-//
-// Rows 1 and 4 are empty, so the output will be of shape `[5, 6]` with values:
-//
-//     [0, 1]: a
-//     [0, 3]: b
-//     [1, 0]: default_value
-//     [2, 0]: c
-//     [3, 1]: d
-//     [4, 0]: default_value
-//
-// The output `SparseTensor` will be in row-major order and will have the
-// same shape as the input.
-//
-// This op also returns an indicator vector shaped `[dense_shape[0]]` such that
-//
-//     empty_row_indicator[i] = True iff row i was an empty row.
-//
-// And a reverse index map vector shaped `[indices.shape[0]]` that is used during
-// backpropagation,
-//
-//     reverse_index_map[j] = out_j s.t. indices[j, :] == output_indices[out_j, :]
-//
-// Arguments:
-//	indices: 2-D. the indices of the sparse tensor.
-//	values: 1-D. the values of the sparse tensor.
-//	dense_shape: 1-D. the shape of the sparse tensor.
-//	default_value: 0-D. default value to insert into location `[row, 0, ..., 0]`
-//   for rows missing from the input sparse tensor.
-// output indices: 2-D. the indices of the filled sparse tensor.
-//
-// Returns 1-D. the values of the filled sparse tensor.1-D. whether the dense row was missing in the
-// input sparse tensor.1-D. a map from the input indices to the output indices.
-func SparseFillEmptyRows(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output, default_value tf.Output) (output_indices tf.Output, output_values tf.Output, empty_row_indicator tf.Output, reverse_index_map tf.Output) {
+// Computes acos of x element-wise.
+func Acos(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "SparseFillEmptyRows",
+		Type: "Acos",
 		Input: []tf.Input{
-			indices, values, dense_shape, default_value,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+	return op.Output(0)
 }
 
-// Conv2DAttr is an optional argument to Conv2D.
-type Conv2DAttr func(optionalAttr)
-
-// Conv2DUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value.
-// If not specified, defaults to true
-func Conv2DUseCudnnOnGpu(value bool) Conv2DAttr {
-	return func(m optionalAttr) {
-		m["use_cudnn_on_gpu"] = value
-	}
-}
+// MaxPoolWithArgmaxAttr is an optional argument to MaxPoolWithArgmax.
+type MaxPoolWithArgmaxAttr func(optionalAttr)
 
-// Conv2DDataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, height, width, channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, channels, height, width].
-// If not specified, defaults to "NHWC"
-func Conv2DDataFormat(value string) Conv2DAttr {
+// MaxPoolWithArgmaxTargmax sets the optional Targmax attribute to value.
+// If not specified, defaults to DT_INT64
+func MaxPoolWithArgmaxTargmax(value tf.DataType) MaxPoolWithArgmaxAttr {
 	return func(m optionalAttr) {
-		m["data_format"] = value
+		m["Targmax"] = value
 	}
 }
 
-// Computes a 2-D convolution given 4-D `input` and `filter` tensors.
-//
-// Given an input tensor of shape `[batch, in_height, in_width, in_channels]`
-// and a filter / kernel tensor of shape
-// `[filter_height, filter_width, in_channels, out_channels]`, this op
-// performs the following:
-//
-// 1. Flattens the filter to a 2-D matrix with shape
-//    `[filter_height * filter_width * in_channels, output_channels]`.
-// 2. Extracts image patches from the input tensor to form a *virtual*
-//    tensor of shape `[batch, out_height, out_width,
-//    filter_height * filter_width * in_channels]`.
-// 3. For each patch, right-multiplies the filter matrix and the image patch
-//    vector.
-//
-// In detail, with the default NHWC format,
+// Performs max pooling on the input and outputs both max values and indices.
 //
-//     output[b, i, j, k] =
-//         sum_{di, dj, q} input[b, strides[1] * i + di, strides[2] * j + dj, q] *
-//                         filter[di, dj, q, k]
+// The indices in `argmax` are flattened, so that a maximum value at position
+// `[b, y, x, c]` becomes flattened index
+// `((b * height + y) * width + x) * channels + c`.
 //
-// Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
-// horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
+// The indices returned are always in `[0, height) x [0, width)` before flattening,
+// even if padding is involved and the mathematically correct answer is outside
+// (either negative or too large).  This is a bug, but fixing it is difficult to do
+// in a safe backwards compatible way, especially due to flattening.
 //
 // Arguments:
-//	input: A 4-D tensor. The dimension order is interpreted according to the value
-// of `data_format`, see below for details.
-//	filter: A 4-D tensor of shape
-// `[filter_height, filter_width, in_channels, out_channels]`
-//	strides: 1-D tensor of length 4.  The stride of the sliding window for each
-// dimension of `input`. The dimension order is determined by the value of
-//   `data_format`, see below for details.
+//	input: 4-D with shape `[batch, height, width, channels]`.  Input to pool over.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
 //	padding: The type of padding algorithm to use.
 //
-// Returns A 4-D tensor. The dimension order is determined by the value of
-// `data_format`, see below for details.
-func Conv2D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...Conv2DAttr) (output tf.Output) {
+// Returns The max pooled output tensor.4-D.  The flattened indices of the max values chosen for each output.
+func MaxPoolWithArgmax(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolWithArgmaxAttr) (output tf.Output, argmax tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Conv2D",
+		Type: "MaxPoolWithArgmax",
 		Input: []tf.Input{
-			input, filter,
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Transforms a serialized tensorflow.TensorProto proto into a Tensor.
+//
+// Arguments:
+//	serialized: A scalar string containing a serialized TensorProto proto.
+//	out_type: The type of the serialized tensor.  The provided type must match the
+// type of the serialized tensor and no implicit conversion will take place.
+//
+// Returns A Tensor of type `out_type`.
+func ParseTensor(scope *Scope, serialized tf.Output, out_type tf.DataType) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"out_type": out_type}
+	opspec := tf.OpSpec{
+		Type: "ParseTensor",
+		Input: []tf.Input{
+			serialized,
 		},
 		Attrs: attrs,
 	}
@@ -15764,68 +15609,113 @@ func Conv2D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, pa
 	return op.Output(0)
 }
 
-// VariableShapeAttr is an optional argument to VariableShape.
-type VariableShapeAttr func(optionalAttr)
+// MapClearAttr is an optional argument to MapClear.
+type MapClearAttr func(optionalAttr)
 
-// VariableShapeOutType sets the optional out_type attribute to value.
-// If not specified, defaults to DT_INT32
-func VariableShapeOutType(value tf.DataType) VariableShapeAttr {
+// MapClearCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func MapClearCapacity(value int64) MapClearAttr {
 	return func(m optionalAttr) {
-		m["out_type"] = value
+		m["capacity"] = value
 	}
 }
 
-// Returns the shape of the variable pointed to by `resource`.
-//
-// This operation returns a 1-D integer tensor representing the shape of `input`.
+// MapClearMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
 //
-// For example:
+// REQUIRES: value >= 0
+func MapClearMemoryLimit(value int64) MapClearAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// MapClearContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func MapClearContainer(value string) MapClearAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// MapClearSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func MapClearSharedName(value string) MapClearAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Op removes all elements in the underlying container.
 //
-// ```
-// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]
-// shape(t) ==> [2, 2, 3]
-// ```
-func VariableShape(scope *Scope, input tf.Output, optional ...VariableShapeAttr) (output tf.Output) {
+// Returns the created operation.
+func MapClear(scope *Scope, dtypes []tf.DataType, optional ...MapClearAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"dtypes": dtypes}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "VariableShape",
-		Input: []tf.Input{
-			input,
-		},
+		Type: "MapClear",
+
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
+}
+
+// DecodeCSVAttr is an optional argument to DecodeCSV.
+type DecodeCSVAttr func(optionalAttr)
+
+// DecodeCSVFieldDelim sets the optional field_delim attribute to value.
+//
+// value: char delimiter to separate fields in a record.
+// If not specified, defaults to ","
+func DecodeCSVFieldDelim(value string) DecodeCSVAttr {
+	return func(m optionalAttr) {
+		m["field_delim"] = value
+	}
 }
 
-// StringJoinAttr is an optional argument to StringJoin.
-type StringJoinAttr func(optionalAttr)
+// DecodeCSVUseQuoteDelim sets the optional use_quote_delim attribute to value.
+//
+// value: If false, treats double quotation marks as regular
+// characters inside of the string fields (ignoring RFC 4180, Section 2,
+// Bullet 5).
+// If not specified, defaults to true
+func DecodeCSVUseQuoteDelim(value bool) DecodeCSVAttr {
+	return func(m optionalAttr) {
+		m["use_quote_delim"] = value
+	}
+}
 
-// StringJoinSeparator sets the optional separator attribute to value.
+// DecodeCSVNaValue sets the optional na_value attribute to value.
 //
-// value: string, an optional join separator.
+// value: Additional string to recognize as NA/NaN.
 // If not specified, defaults to ""
-func StringJoinSeparator(value string) StringJoinAttr {
+func DecodeCSVNaValue(value string) DecodeCSVAttr {
 	return func(m optionalAttr) {
-		m["separator"] = value
+		m["na_value"] = value
 	}
 }
 
-// Joins the strings in the given list of string tensors into one tensor;
+// Convert CSV records to tensors. Each column maps to one tensor.
 //
-// with the given separator (default is an empty separator).
+// RFC 4180 format is expected for the CSV records.
+// (https://tools.ietf.org/html/rfc4180)
+// Note that we allow leading and trailing spaces with int or float field.
 //
 // Arguments:
-//	inputs: A list of string tensors.  The tensors must all have the same shape,
-// or be scalars.  Scalars may be mixed in; these will be broadcast to the shape
-// of non-scalar inputs.
-func StringJoin(scope *Scope, inputs []tf.Output, optional ...StringJoinAttr) (output tf.Output) {
+//	records: Each string is a record/row in the csv and all records should have
+// the same format.
+//	record_defaults: One tensor per column of the input record, with either a
+// scalar default value for that column or empty if the column is required.
+//
+// Returns Each tensor will have the same shape as records.
+func DecodeCSV(scope *Scope, records tf.Output, record_defaults []tf.Output, optional ...DecodeCSVAttr) (output []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -15834,67 +15724,9 @@ func StringJoin(scope *Scope, inputs []tf.Output, optional ...StringJoinAttr) (o
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "StringJoin",
-		Input: []tf.Input{
-			tf.OutputList(inputs),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Transforms a vector of brain.Example protos (as strings) into typed tensors.
-//
-// Arguments:
-//	serialized: A vector containing a batch of binary serialized Example protos.
-//	names: A vector containing the names of the serialized protos.
-// May contain, for example, table key (descriptive) names for the
-// corresponding serialized protos.  These are purely useful for debugging
-// purposes, and the presence of values here has no effect on the output.
-// May also be an empty vector if no names are available.
-// If non-empty, this vector must be the same length as "serialized".
-//	sparse_keys: A list of Nsparse string Tensors (scalars).
-// The keys expected in the Examples' features associated with sparse values.
-//	dense_keys: A list of Ndense string Tensors (scalars).
-// The keys expected in the Examples' features associated with dense values.
-//	dense_defaults: A list of Ndense Tensors (some may be empty).
-// dense_defaults[j] provides default values
-// when the example's feature_map lacks dense_key[j].  If an empty Tensor is
-// provided for dense_defaults[j], then the Feature dense_keys[j] is required.
-// The input type is inferred from dense_defaults[j], even when it's empty.
-// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined,
-// then the shape of dense_defaults[j] must match that of dense_shapes[j].
-// If dense_shapes[j] has an undefined major dimension (variable strides dense
-// feature), dense_defaults[j] must contain a single element:
-// the padding element.
-//	sparse_types: A list of Nsparse types; the data types of data in each Feature
-// given in sparse_keys.
-// Currently the ParseExample supports DT_FLOAT (FloatList),
-// DT_INT64 (Int64List), and DT_STRING (BytesList).
-//	dense_shapes: A list of Ndense shapes; the shapes of data in each Feature
-// given in dense_keys.
-// The number of elements in the Feature corresponding to dense_key[j]
-// must always equal dense_shapes[j].NumEntries().
-// If dense_shapes[j] == (D0, D1, ..., DN) then the shape of output
-// Tensor dense_values[j] will be (|serialized|, D0, D1, ..., DN):
-// The dense outputs are just the inputs row-stacked by batch.
-// This works for dense_shapes[j] = (-1, D1, ..., DN).  In this case
-// the shape of the output Tensor dense_values[j] will be
-// (|serialized|, M, D1, .., DN), where M is the maximum number of blocks
-// of elements of length D1 * .... * DN, across all minibatch entries
-// in the input.  Any minibatch entry with less than M blocks of elements of
-// length D1 * ... * DN will be padded with the corresponding default_value
-// scalar element along the second dimension.
-func ParseExample(scope *Scope, serialized tf.Output, names tf.Output, sparse_keys []tf.Output, dense_keys []tf.Output, dense_defaults []tf.Output, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"sparse_types": sparse_types, "dense_shapes": dense_shapes}
-	opspec := tf.OpSpec{
-		Type: "ParseExample",
+		Type: "DecodeCSV",
 		Input: []tf.Input{
-			serialized, names, tf.OutputList(sparse_keys), tf.OutputList(dense_keys), tf.OutputList(dense_defaults),
+			records, tf.OutputList(record_defaults),
 		},
 		Attrs: attrs,
 	}
@@ -15904,242 +15736,178 @@ func ParseExample(scope *Scope, serialized tf.Output, names tf.Output, sparse_ke
 	}
 	var idx int
 	var err error
-	if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil {
-		scope.UpdateErr("ParseExample", err)
-		return
-	}
-	if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil {
-		scope.UpdateErr("ParseExample", err)
-		return
-	}
-	if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil {
-		scope.UpdateErr("ParseExample", err)
-		return
-	}
-	if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil {
-		scope.UpdateErr("ParseExample", err)
+	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
+		scope.UpdateErr("DecodeCSV", err)
 		return
 	}
-	return sparse_indices, sparse_values, sparse_shapes, dense_values
+	return output
 }
 
-// Compute the pairwise cross product.
+// Returns the rank of a tensor.
 //
-// `a` and `b` must be the same shape; they can either be simple 3-element vectors,
-// or any shape where the innermost dimension is 3. In the latter case, each pair
-// of corresponding 3-element vectors is cross-multiplied independently.
+// This operation returns an integer representing the rank of `input`.
 //
-// Arguments:
-//	a: A tensor containing 3-element vectors.
-//	b: Another tensor, of same type and shape as `a`.
+// For example:
 //
-// Returns Pairwise cross product of the vectors in `a` and `b`.
-func Cross(scope *Scope, a tf.Output, b tf.Output) (product tf.Output) {
+// ```
+// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]
+// # shape of tensor 't' is [2, 2, 3]
+// rank(t) ==> 3
+// ```
+//
+// **Note**: The rank of a tensor is not the same as the rank of a matrix. The rank
+// of a tensor is the number of indices required to uniquely select each element
+// of the tensor. Rank is also known as "order", "degree", or "ndims."
+func Rank(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Cross",
+		Type: "Rank",
 		Input: []tf.Input{
-			a, b,
+			input,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Inverse 2D real-valued fast Fourier transform.
-//
-// Computes the inverse 2-dimensional discrete Fourier transform of a real-valued
-// signal over the inner-most 2 dimensions of `input`.
-//
-// The inner-most 2 dimensions of `input` are assumed to be the result of `RFFT2D`:
-// The inner-most dimension contains the `fft_length / 2 + 1` unique components of
-// the DFT of a real-valued signal. If `fft_length` is not provided, it is computed
-// from the size of the inner-most 2 dimensions of `input`. If the FFT length used
-// to compute `input` is odd, it should be provided since it cannot be inferred
-// properly.
-//
-// Along each axis `IRFFT2D` is computed on, if `fft_length` (or
-// `fft_length / 2 + 1` for the inner-most dimension) is smaller than the
-// corresponding dimension of `input`, the dimension is cropped. If it is larger,
-// the dimension is padded with zeros.
-//
-// Arguments:
-//	input: A complex64 tensor.
-//	fft_length: An int32 tensor of shape [2]. The FFT length for each dimension.
-//
-// Returns A float32 tensor of the same rank as `input`. The inner-most 2
-//   dimensions of `input` are replaced with the `fft_length` samples of their
-//   inverse 2D Fourier transform.
-//
-// @compatibility(numpy)
-// Equivalent to np.fft.irfft2
-// @end_compatibility
-func IRFFT2D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
+// Output a fact about factorials.
+func Fact(scope *Scope) (fact tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "IRFFT2D",
-		Input: []tf.Input{
-			input, fft_length,
-		},
+		Type: "Fact",
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns element-wise remainder of division. This emulates C semantics in that
+// Makes its input available to the next iteration.
 //
-// the result here is consistent with a truncating divide. E.g. `truncate(x / y) *
-// y + truncate_mod(x, y) = x`.
+// Arguments:
+//	data: The tensor to be made available to the next iteration.
 //
-// *NOTE*: `TruncateMod` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func TruncateMod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// Returns The same tensor as `data`.
+func NextIteration(scope *Scope, data tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "TruncateMod",
+		Type: "NextIteration",
 		Input: []tf.Input{
-			x, y,
+			data,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// ResourceApplyAdagradAttr is an optional argument to ResourceApplyAdagrad.
-type ResourceApplyAdagradAttr func(optionalAttr)
-
-// ResourceApplyAdagradUseLocking sets the optional use_locking attribute to value.
+// Creates a dataset that skips `count` elements from the `input_dataset`.
 //
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyAdagradUseLocking(value bool) ResourceApplyAdagradAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the adagrad scheme.
+// Arguments:
 //
-// accum += grad * grad
-// var -= lr * grad * (1 / sqrt(accum))
+//	count: A scalar representing the number of elements from the `input_dataset`
+// that should be skipped.  If count is -1, skips everything.
 //
-// Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	grad: The gradient.
 //
-// Returns the created operation.
-func ResourceApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, optional ...ResourceApplyAdagradAttr) (o *tf.Operation) {
+func SkipDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyAdagrad",
+		Type: "SkipDataset",
 		Input: []tf.Input{
-			var_, accum, lr, grad,
+			input_dataset, count,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// SparseReduceSumAttr is an optional argument to SparseReduceSum.
-type SparseReduceSumAttr func(optionalAttr)
-
-// SparseReduceSumKeepDims sets the optional keep_dims attribute to value.
-//
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func SparseReduceSumKeepDims(value bool) SparseReduceSumAttr {
-	return func(m optionalAttr) {
-		m["keep_dims"] = value
+// Computes hyperbolic tangent of `x` element-wise.
+func Tanh(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Tanh",
+		Input: []tf.Input{
+			x,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Computes the sum of elements across dimensions of a SparseTensor.
+// Computes the maximum along segments of a tensor.
 //
-// This Op takes a SparseTensor and is the sparse counterpart to
-// `tf.reduce_sum()`.  In particular, this Op also returns a dense `Tensor`
-// instead of a sparse one.
+// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
+// segments.
 //
-// Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
-// with length 1.
+// Computes a tensor such that
+// \\(output_i = \max_j(data_j)\\) where `max` is over `j` such
+// that `segment_ids[j] == i`.
 //
-// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
-// with a single element is returned.  Additionally, the axes can be negative,
-// which are interpreted according to the indexing rules in Python.
+// If the max is empty for a given segment ID `i`, `output[i] = 0`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentMax.png" alt>
+// </div>
 //
 // Arguments:
-//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, possibly not in canonical ordering.
-//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
-//	input_shape: 1-D.  Shape of the input SparseTensor.
-//	reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
 //
-// Returns `R-K`-D.  The reduced Tensor.
-func SparseReduceSum(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceSumAttr) (output tf.Output) {
+//	segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
+// first dimension.  Values should be sorted and can be repeated.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "SparseReduceSum",
+		Type: "SegmentMax",
 		Input: []tf.Input{
-			input_indices, input_values, input_shape, reduction_axes,
+			data, segment_ids,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// MaxPool3DGradAttr is an optional argument to MaxPool3DGrad.
-type MaxPool3DGradAttr func(optionalAttr)
+// AvgPoolGradAttr is an optional argument to AvgPoolGrad.
+type AvgPoolGradAttr func(optionalAttr)
 
-// MaxPool3DGradDataFormat sets the optional data_format attribute to value.
+// AvgPoolGradDataFormat sets the optional data_format attribute to value.
 //
-// value: The data format of the input and output data. With the
-// default format "NDHWC", the data is stored in the order of:
-//     [batch, in_depth, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCDHW", the data storage order is:
-//     [batch, in_channels, in_depth, in_height, in_width].
-// If not specified, defaults to "NDHWC"
-func MaxPool3DGradDataFormat(value string) MaxPool3DGradAttr {
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func AvgPoolGradDataFormat(value string) AvgPoolGradAttr {
 	return func(m optionalAttr) {
 		m["data_format"] = value
 	}
 }
 
-// Computes gradients of max pooling function.
+// Computes gradients of the average pooling function.
 //
 // Arguments:
-//	orig_input: The original input tensor.
-//	orig_output: The original output tensor.
-//	grad: Output backprop of shape `[batch, depth, rows, cols, channels]`.
-//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
-// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	orig_input_shape: 1-D.  Shape of the original input to `avg_pool`.
+//	grad: 4-D with shape `[batch, height, width, channels]`.  Gradients w.r.t.
+// the output of `avg_pool`.
+//	ksize: The size of the sliding window for each dimension of the input.
+//	strides: The stride of the sliding window for each dimension of the input.
 //	padding: The type of padding algorithm to use.
-func MaxPool3DGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DGradAttr) (output tf.Output) {
+//
+// Returns 4-D.  Gradients w.r.t. the input of `avg_pool`.
+func AvgPoolGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPoolGradAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -16148,9 +15916,9 @@ func MaxPool3DGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, gr
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "MaxPool3DGrad",
+		Type: "AvgPoolGrad",
 		Input: []tf.Input{
-			orig_input, orig_output, grad,
+			orig_input_shape, grad,
 		},
 		Attrs: attrs,
 	}
@@ -16158,397 +15926,247 @@ func MaxPool3DGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, gr
 	return op.Output(0)
 }
 
-// ThreadUnsafeUnigramCandidateSamplerAttr is an optional argument to ThreadUnsafeUnigramCandidateSampler.
-type ThreadUnsafeUnigramCandidateSamplerAttr func(optionalAttr)
+// StageClearAttr is an optional argument to StageClear.
+type StageClearAttr func(optionalAttr)
 
-// ThreadUnsafeUnigramCandidateSamplerSeed sets the optional seed attribute to value.
-//
-// value: If either seed or seed2 are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
+// StageClearCapacity sets the optional capacity attribute to value.
 // If not specified, defaults to 0
-func ThreadUnsafeUnigramCandidateSamplerSeed(value int64) ThreadUnsafeUnigramCandidateSamplerAttr {
+//
+// REQUIRES: value >= 0
+func StageClearCapacity(value int64) StageClearAttr {
 	return func(m optionalAttr) {
-		m["seed"] = value
+		m["capacity"] = value
 	}
 }
 
-// ThreadUnsafeUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value.
-//
-// value: An second seed to avoid seed collision.
+// StageClearMemoryLimit sets the optional memory_limit attribute to value.
 // If not specified, defaults to 0
-func ThreadUnsafeUnigramCandidateSamplerSeed2(value int64) ThreadUnsafeUnigramCandidateSamplerAttr {
+//
+// REQUIRES: value >= 0
+func StageClearMemoryLimit(value int64) StageClearAttr {
 	return func(m optionalAttr) {
-		m["seed2"] = value
+		m["memory_limit"] = value
 	}
 }
 
-// Generates labels for candidate sampling with a learned unigram distribution.
-//
-// See explanations of candidate sampling and the data formats at
-// go/candidate-sampling.
-//
-// For each batch, this op picks a single set of sampled candidate labels.
-//
-// The advantages of sampling candidates per-batch are simplicity and the
-// possibility of efficient dense matrix multiplication. The disadvantage is that
-// the sampled candidates must be chosen independently of the context and of the
-// true labels.
-//
-// Arguments:
-//	true_classes: A batch_size * num_true matrix, in which each row contains the
-// IDs of the num_true target_classes in the corresponding original label.
-//	num_true: Number of true labels per context.
-//	num_sampled: Number of candidates to randomly sample.
-//	unique: If unique is true, we sample with rejection, so that all sampled
-// candidates in a batch are unique. This requires some approximation to
-// estimate the post-rejection sampling probabilities.
-//	range_max: The sampler will sample integers from the interval [0, range_max).
-//
-// Returns A vector of length num_sampled, in which each element is
-// the ID of a sampled candidate.A batch_size * num_true matrix, representing
-// the number of times each candidate is expected to occur in a batch
-// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
-// candidate representing the number of times the candidate is expected
-// to occur in a batch of sampled candidates.  If unique=true, then this is a
-// probability.
-func ThreadUnsafeUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...ThreadUnsafeUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ThreadUnsafeUnigramCandidateSampler",
-		Input: []tf.Input{
-			true_classes,
-		},
-		Attrs: attrs,
+// StageClearContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func StageClearContainer(value string) StageClearAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// ResourceSparseApplyProximalAdagradAttr is an optional argument to ResourceSparseApplyProximalAdagrad.
-type ResourceSparseApplyProximalAdagradAttr func(optionalAttr)
-
-// ResourceSparseApplyProximalAdagradUseLocking sets the optional use_locking attribute to value.
-//
-// value: If True, updating of the var and accum tensors will be protected by
-// a lock; otherwise the behavior is undefined, but may exhibit less contention.
-// If not specified, defaults to false
-func ResourceSparseApplyProximalAdagradUseLocking(value bool) ResourceSparseApplyProximalAdagradAttr {
+// StageClearSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func StageClearSharedName(value string) StageClearAttr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["shared_name"] = value
 	}
 }
 
-// Sparse update entries in '*var' and '*accum' according to FOBOS algorithm.
-//
-// That is for rows we have grad for, we update var and accum as follows:
-// accum += grad * grad
-// prox_v = var
-// prox_v -= lr * grad * (1 / sqrt(accum))
-// var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	lr: Learning rate. Must be a scalar.
-//	l1: L1 regularization. Must be a scalar.
-//	l2: L2 regularization. Must be a scalar.
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
+// Op removes all elements in the underlying container.
 //
 // Returns the created operation.
-func ResourceSparseApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyProximalAdagradAttr) (o *tf.Operation) {
+func StageClear(scope *Scope, dtypes []tf.DataType, optional ...StageClearAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"dtypes": dtypes}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyProximalAdagrad",
-		Input: []tf.Input{
-			var_, accum, lr, l1, l2, grad, indices,
-		},
+		Type: "StageClear",
+
 		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
-// Store the input tensor in the state of the current session.
-//
-// Arguments:
-//	value: The tensor to be stored.
-//
-// Returns The handle for the tensor stored in the session state, represented
-// as a string.
-func GetSessionHandle(scope *Scope, value tf.Output) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "GetSessionHandle",
-		Input: []tf.Input{
-			value,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
+// ComputeAccidentalHitsAttr is an optional argument to ComputeAccidentalHits.
+type ComputeAccidentalHitsAttr func(optionalAttr)
 
-// Decode web-safe base64-encoded strings.
-//
-// Input may or may not have padding at the end. See EncodeBase64 for padding.
-// Web-safe means that input must use - and _ instead of + and /.
-//
-// Arguments:
-//	input: Base64 strings to decode.
+// ComputeAccidentalHitsSeed sets the optional seed attribute to value.
 //
-// Returns Decoded strings.
-func DecodeBase64(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "DecodeBase64",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func ComputeAccidentalHitsSeed(value int64) ComputeAccidentalHitsAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
 }
 
-// Computes hyperbolic tangent of `x` element-wise.
-func Tanh(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Tanh",
-		Input: []tf.Input{
-			x,
-		},
+// ComputeAccidentalHitsSeed2 sets the optional seed2 attribute to value.
+//
+// value: An second seed to avoid seed collision.
+// If not specified, defaults to 0
+func ComputeAccidentalHitsSeed2(value int64) ComputeAccidentalHitsAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Restores tensors from a V2 checkpoint.
-//
-// For backward compatibility with the V1 format, this Op currently allows
-// restoring from a V1 checkpoint as well:
-//   - This Op first attempts to find the V2 index file pointed to by "prefix", and
-//     if found proceed to read it as a V2 checkpoint;
-//   - Otherwise the V1 read path is invoked.
-// Relying on this behavior is not recommended, as the ability to fall back to read
-// V1 might be deprecated and eventually removed.
-//
-// By default, restores the named tensors in full.  If the caller wishes to restore
-// specific slices of stored tensors, "shape_and_slices" should be non-empty
-// strings and correspondingly well-formed.
+// Computes the ids of the positions in sampled_candidates that match true_labels.
 //
-// Callers must ensure all the named tensors are indeed stored in the checkpoint.
+// When doing log-odds NCE, the result of this op should be passed through a
+// SparseToDense op, then added to the logits of the sampled candidates. This has
+// the effect of 'removing' the sampled labels that match the true labels by
+// making the classifier sure that they are sampled labels.
 //
 // Arguments:
-//	prefix: Must have a single element.  The prefix of a V2 checkpoint.
-//	tensor_names: shape {N}.  The names of the tensors to be restored.
-//	shape_and_slices: shape {N}.  The slice specs of the tensors to be restored.
-// Empty strings indicate that they are non-partitioned tensors.
-//	dtypes: shape {N}.  The list of expected dtype for the tensors.  Must match
-// those stored in the checkpoint.
+//	true_classes: The true_classes output of UnpackSparseLabels.
+//	sampled_candidates: The sampled_candidates output of CandidateSampler.
+//	num_true: Number of true labels per context.
 //
-// Returns shape {N}.  The restored tensors, whose shapes are read from the
-// checkpoint directly.
-func RestoreV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and_slices tf.Output, dtypes []tf.DataType) (tensors []tf.Output) {
+// Returns A vector of indices corresponding to rows of true_candidates.A vector of IDs of positions in sampled_candidates that match a true_label
+// for the row with the corresponding index in indices.A vector of the same length as indices and ids, in which each element
+// is -FLOAT_MAX.
+func ComputeAccidentalHits(scope *Scope, true_classes tf.Output, sampled_candidates tf.Output, num_true int64, optional ...ComputeAccidentalHitsAttr) (indices tf.Output, ids tf.Output, weights tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
+	attrs := map[string]interface{}{"num_true": num_true}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "RestoreV2",
+		Type: "ComputeAccidentalHits",
 		Input: []tf.Input{
-			prefix, tensor_names, shape_and_slices,
+			true_classes, sampled_candidates,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if tensors, idx, err = makeOutputList(op, idx, "tensors"); err != nil {
-		scope.UpdateErr("RestoreV2", err)
-		return
-	}
-	return tensors
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Returns x / y element-wise for integer types.
-//
-// Truncation designates that negative numbers will round fractional quantities
-// toward zero. I.e. -7 / 5 = -1. This matches C semantics but it is different
-// than Python semantics. See `FloorDiv` for a division function that matches
-// Python Semantics.
+// Computes sigmoid of `x` element-wise.
 //
-// *NOTE*: `TruncateDiv` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func TruncateDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// Specifically, `y = 1 / (1 + exp(-x))`.
+func Sigmoid(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "TruncateDiv",
+		Type: "Sigmoid",
 		Input: []tf.Input{
-			x, y,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// SampleDistortedBoundingBoxAttr is an optional argument to SampleDistortedBoundingBox.
-type SampleDistortedBoundingBoxAttr func(optionalAttr)
+// RandomStandardNormalAttr is an optional argument to RandomStandardNormal.
+type RandomStandardNormalAttr func(optionalAttr)
 
-// SampleDistortedBoundingBoxSeed sets the optional seed attribute to value.
+// RandomStandardNormalSeed sets the optional seed attribute to value.
 //
-// value: If either `seed` or `seed2` are set to non-zero, the random number
-// generator is seeded by the given `seed`.  Otherwise, it is seeded by a random
-// seed.
+// value: If either `seed` or `seed2` are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
 // If not specified, defaults to 0
-func SampleDistortedBoundingBoxSeed(value int64) SampleDistortedBoundingBoxAttr {
+func RandomStandardNormalSeed(value int64) RandomStandardNormalAttr {
 	return func(m optionalAttr) {
 		m["seed"] = value
 	}
 }
 
-// SampleDistortedBoundingBoxSeed2 sets the optional seed2 attribute to value.
+// RandomStandardNormalSeed2 sets the optional seed2 attribute to value.
 //
 // value: A second seed to avoid seed collision.
 // If not specified, defaults to 0
-func SampleDistortedBoundingBoxSeed2(value int64) SampleDistortedBoundingBoxAttr {
+func RandomStandardNormalSeed2(value int64) RandomStandardNormalAttr {
 	return func(m optionalAttr) {
 		m["seed2"] = value
 	}
 }
 
-// SampleDistortedBoundingBoxMinObjectCovered sets the optional min_object_covered attribute to value.
+// Outputs random values from a normal distribution.
 //
-// value: The cropped area of the image must contain at least this
-// fraction of any bounding box supplied. The value of this parameter should be
-// non-negative. In the case of 0, the cropped area does not need to overlap
-// any of the bounding boxes supplied.
-// If not specified, defaults to 0.1
-func SampleDistortedBoundingBoxMinObjectCovered(value float32) SampleDistortedBoundingBoxAttr {
-	return func(m optionalAttr) {
-		m["min_object_covered"] = value
-	}
-}
-
-// SampleDistortedBoundingBoxAspectRatioRange sets the optional aspect_ratio_range attribute to value.
+// The generated values will have mean 0 and standard deviation 1.
 //
-// value: The cropped area of the image must have an aspect ratio =
-// width / height within this range.
-// If not specified, defaults to <f:0.75 f:1.33 >
-func SampleDistortedBoundingBoxAspectRatioRange(value []float32) SampleDistortedBoundingBoxAttr {
-	return func(m optionalAttr) {
-		m["aspect_ratio_range"] = value
+// Arguments:
+//	shape: The shape of the output tensor.
+//	dtype: The type of the output.
+//
+// Returns A tensor of the specified shape filled with random normal values.
+func RandomStandardNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomStandardNormalAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RandomStandardNormal",
+		Input: []tf.Input{
+			shape,
+		},
+		Attrs: attrs,
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// SampleDistortedBoundingBoxAreaRange sets the optional area_range attribute to value.
+// FusedBatchNormAttr is an optional argument to FusedBatchNorm.
+type FusedBatchNormAttr func(optionalAttr)
+
+// FusedBatchNormEpsilon sets the optional epsilon attribute to value.
 //
-// value: The cropped area of the image must contain a fraction of the
-// supplied image within in this range.
-// If not specified, defaults to <f:0.05 f:1 >
-func SampleDistortedBoundingBoxAreaRange(value []float32) SampleDistortedBoundingBoxAttr {
+// value: A small float number added to the variance of x.
+// If not specified, defaults to 0.0001
+func FusedBatchNormEpsilon(value float32) FusedBatchNormAttr {
 	return func(m optionalAttr) {
-		m["area_range"] = value
+		m["epsilon"] = value
 	}
 }
 
-// SampleDistortedBoundingBoxMaxAttempts sets the optional max_attempts attribute to value.
+// FusedBatchNormDataFormat sets the optional data_format attribute to value.
 //
-// value: Number of attempts at generating a cropped region of the image
-// of the specified constraints. After `max_attempts` failures, return the entire
-// image.
-// If not specified, defaults to 100
-func SampleDistortedBoundingBoxMaxAttempts(value int64) SampleDistortedBoundingBoxAttr {
+// value: The data format for x and y. Either "NHWC" (default) or "NCHW".
+// If not specified, defaults to "NHWC"
+func FusedBatchNormDataFormat(value string) FusedBatchNormAttr {
 	return func(m optionalAttr) {
-		m["max_attempts"] = value
+		m["data_format"] = value
 	}
 }
 
-// SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes sets the optional use_image_if_no_bounding_boxes attribute to value.
+// FusedBatchNormIsTraining sets the optional is_training attribute to value.
 //
-// value: Controls behavior if no bounding boxes supplied.
-// If true, assume an implicit bounding box covering the whole input. If false,
-// raise an error.
-// If not specified, defaults to false
-func SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes(value bool) SampleDistortedBoundingBoxAttr {
+// value: A bool value to indicate the operation is for training (default)
+// or inference.
+// If not specified, defaults to true
+func FusedBatchNormIsTraining(value bool) FusedBatchNormAttr {
 	return func(m optionalAttr) {
-		m["use_image_if_no_bounding_boxes"] = value
+		m["is_training"] = value
 	}
 }
 
-// Generate a single randomly distorted bounding box for an image.
-//
-// Bounding box annotations are often supplied in addition to ground-truth labels
-// in image recognition or object localization tasks. A common technique for
-// training such a system is to randomly distort an image while preserving
-// its content, i.e. *data augmentation*. This Op outputs a randomly distorted
-// localization of an object, i.e. bounding box, given an `image_size`,
-// `bounding_boxes` and a series of constraints.
-//
-// The output of this Op is a single bounding box that may be used to crop the
-// original image. The output is returned as 3 tensors: `begin`, `size` and
-// `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
-// image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize
-// what the bounding box looks like.
-//
-// Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The
-// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
-// height of the underlying image.
-//
-// For example,
-//
-// ```python
-//     # Generate a single distorted bounding box.
-//     begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
-//         tf.shape(image),
-//         bounding_boxes=bounding_boxes)
-//
-//     # Draw the bounding box in an image summary.
-//     image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
-//                                                   bbox_for_draw)
-//     tf.image_summary('images_with_box', image_with_box)
-//
-//     # Employ the bounding box to distort the image.
-//     distorted_image = tf.slice(image, begin, size)
-// ```
+// Batch normalization.
 //
-// Note that if no bounding box information is available, setting
-// `use_image_if_no_bounding_boxes = true` will assume there is a single implicit
-// bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
-// false and no bounding boxes are supplied, an error is raised.
+// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
+// The size of 1D Tensors matches the dimension C of the 4D Tensors.
 //
 // Arguments:
-//	image_size: 1-D, containing `[height, width, channels]`.
-//	bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes
-// associated with the image.
+//	x: A 4D Tensor for input data.
+//	scale: A 1D Tensor for scaling factor, to scale the normalized x.
+//	offset: A 1D Tensor for offset, to shift to the normalized x.
+//	mean: A 1D Tensor for population mean. Used for inference only;
+// must be empty for training.
+//	variance: A 1D Tensor for population variance. Used for inference only;
+// must be empty for training.
 //
-// Returns 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to
-// `tf.slice`.1-D, containing `[target_height, target_width, -1]`. Provide as input to
-// `tf.slice`.3-D with shape `[1, 1, 4]` containing the distorted bounding box.
-// Provide as input to `tf.image.draw_bounding_boxes`.
-func SampleDistortedBoundingBox(scope *Scope, image_size tf.Output, bounding_boxes tf.Output, optional ...SampleDistortedBoundingBoxAttr) (begin tf.Output, size tf.Output, bboxes tf.Output) {
+// Returns A 4D Tensor for output data.A 1D Tensor for the computed batch mean, to be used by TensorFlow
+// to compute the running mean.A 1D Tensor for the computed batch variance, to be used by
+// TensorFlow to compute the running variance.A 1D Tensor for the computed batch mean, to be reused
+// in the gradient computation.A 1D Tensor for the computed batch variance (inverted variance
+// in the cuDNN case), to be reused in the gradient computation.
+func FusedBatchNorm(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output, mean tf.Output, variance tf.Output, optional ...FusedBatchNormAttr) (y tf.Output, batch_mean tf.Output, batch_variance tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -16557,75 +16175,85 @@ func SampleDistortedBoundingBox(scope *Scope, image_size tf.Output, bounding_box
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "SampleDistortedBoundingBox",
+		Type: "FusedBatchNorm",
 		Input: []tf.Input{
-			image_size, bounding_boxes,
+			x, scale, offset, mean, variance,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
 }
 
-// Returns the truth value of (x > y) element-wise.
-//
-// *NOTE*: `Greater` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Greater(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// Computes tan of x element-wise.
+func Tan(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Greater",
+		Type: "Tan",
 		Input: []tf.Input{
-			x, y,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// ResourceSparseApplyRMSPropAttr is an optional argument to ResourceSparseApplyRMSProp.
-type ResourceSparseApplyRMSPropAttr func(optionalAttr)
+// FusedBatchNormV2Attr is an optional argument to FusedBatchNormV2.
+type FusedBatchNormV2Attr func(optionalAttr)
 
-// ResourceSparseApplyRMSPropUseLocking sets the optional use_locking attribute to value.
+// FusedBatchNormV2Epsilon sets the optional epsilon attribute to value.
 //
-// value: If `True`, updating of the var, ms, and mom tensors is protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceSparseApplyRMSPropUseLocking(value bool) ResourceSparseApplyRMSPropAttr {
+// value: A small float number added to the variance of x.
+// If not specified, defaults to 0.0001
+func FusedBatchNormV2Epsilon(value float32) FusedBatchNormV2Attr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["epsilon"] = value
 	}
 }
 
-// Update '*var' according to the RMSProp algorithm.
+// FusedBatchNormV2DataFormat sets the optional data_format attribute to value.
 //
-// Note that in dense implementation of this algorithm, ms and mom will
-// update even if the grad is zero, but in this sparse implementation, ms
-// and mom will not update in iterations during which the grad is zero.
+// value: The data format for x and y. Either "NHWC" (default) or "NCHW".
+// If not specified, defaults to "NHWC"
+func FusedBatchNormV2DataFormat(value string) FusedBatchNormV2Attr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// FusedBatchNormV2IsTraining sets the optional is_training attribute to value.
 //
-// mean_square = decay * mean_square + (1-decay) * gradient ** 2
-// Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
+// value: A bool value to indicate the operation is for training (default)
+// or inference.
+// If not specified, defaults to true
+func FusedBatchNormV2IsTraining(value bool) FusedBatchNormV2Attr {
+	return func(m optionalAttr) {
+		m["is_training"] = value
+	}
+}
+
+// Batch normalization.
 //
-// ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
-// var <- var - mom
+// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
+// The size of 1D Tensors matches the dimension C of the 4D Tensors.
 //
 // Arguments:
-//	var_: Should be from a Variable().
-//	ms: Should be from a Variable().
-//	mom: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	rho: Decay rate. Must be a scalar.
-//
-//	epsilon: Ridge term. Must be a scalar.
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var, ms and mom.
+//	x: A 4D Tensor for input data.
+//	scale: A 1D Tensor for scaling factor, to scale the normalized x.
+//	offset: A 1D Tensor for offset, to shift to the normalized x.
+//	mean: A 1D Tensor for population mean. Used for inference only;
+// must be empty for training.
+//	variance: A 1D Tensor for population variance. Used for inference only;
+// must be empty for training.
 //
-// Returns the created operation.
-func ResourceSparseApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyRMSPropAttr) (o *tf.Operation) {
+// Returns A 4D Tensor for output data.A 1D Tensor for the computed batch mean, to be used by TensorFlow
+// to compute the running mean.A 1D Tensor for the computed batch variance, to be used by
+// TensorFlow to compute the running variance.A 1D Tensor for the computed batch mean, to be reused
+// in the gradient computation.A 1D Tensor for the computed batch variance (inverted variance
+// in the cuDNN case), to be reused in the gradient computation.
+func FusedBatchNormV2(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output, mean tf.Output, variance tf.Output, optional ...FusedBatchNormV2Attr) (y tf.Output, batch_mean tf.Output, batch_variance tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -16634,71 +16262,194 @@ func ResourceSparseApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyRMSProp",
+		Type: "FusedBatchNormV2",
 		Input: []tf.Input{
-			var_, ms, mom, lr, rho, momentum, epsilon, grad, indices,
+			x, scale, offset, mean, variance,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
 }
 
-// Returns which elements of x are Inf.
+// MultinomialAttr is an optional argument to Multinomial.
+type MultinomialAttr func(optionalAttr)
+
+// MultinomialSeed sets the optional seed attribute to value.
 //
-// @compatibility(numpy)
-// Equivalent to np.isinf
-// @end_compatibility
-func IsInf(scope *Scope, x tf.Output) (y tf.Output) {
+// value: If either seed or seed2 is set to be non-zero, the internal random number
+// generator is seeded by the given seed.  Otherwise, a random seed is used.
+// If not specified, defaults to 0
+func MultinomialSeed(value int64) MultinomialAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// MultinomialSeed2 sets the optional seed2 attribute to value.
+//
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func MultinomialSeed2(value int64) MultinomialAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// MultinomialOutputDtype sets the optional output_dtype attribute to value.
+// If not specified, defaults to DT_INT64
+func MultinomialOutputDtype(value tf.DataType) MultinomialAttr {
+	return func(m optionalAttr) {
+		m["output_dtype"] = value
+	}
+}
+
+// Draws samples from a multinomial distribution.
+//
+// Arguments:
+//	logits: 2-D Tensor with shape `[batch_size, num_classes]`.  Each slice `[i, :]`
+// represents the unnormalized log probabilities for all classes.
+//	num_samples: 0-D.  Number of independent samples to draw for each row slice.
+//
+// Returns 2-D Tensor with shape `[batch_size, num_samples]`.  Each slice `[i, :]`
+// contains the drawn class labels with range `[0, num_classes)`.
+func Multinomial(scope *Scope, logits tf.Output, num_samples tf.Output, optional ...MultinomialAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "IsInf",
+		Type: "Multinomial",
 		Input: []tf.Input{
-			x,
+			logits, num_samples,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// ResourceSparseApplyFtrlAttr is an optional argument to ResourceSparseApplyFtrl.
-type ResourceSparseApplyFtrlAttr func(optionalAttr)
+// EncodeJpegAttr is an optional argument to EncodeJpeg.
+type EncodeJpegAttr func(optionalAttr)
 
-// ResourceSparseApplyFtrlUseLocking sets the optional use_locking attribute to value.
+// EncodeJpegFormat sets the optional format attribute to value.
 //
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
+// value: Per pixel image format.
+// If not specified, defaults to ""
+func EncodeJpegFormat(value string) EncodeJpegAttr {
+	return func(m optionalAttr) {
+		m["format"] = value
+	}
+}
+
+// EncodeJpegQuality sets the optional quality attribute to value.
+//
+// value: Quality of the compression from 0 to 100 (higher is better and slower).
+// If not specified, defaults to 95
+func EncodeJpegQuality(value int64) EncodeJpegAttr {
+	return func(m optionalAttr) {
+		m["quality"] = value
+	}
+}
+
+// EncodeJpegProgressive sets the optional progressive attribute to value.
+//
+// value: If True, create a JPEG that loads progressively (coarse to fine).
 // If not specified, defaults to false
-func ResourceSparseApplyFtrlUseLocking(value bool) ResourceSparseApplyFtrlAttr {
+func EncodeJpegProgressive(value bool) EncodeJpegAttr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["progressive"] = value
 	}
 }
 
-// Update relevant entries in '*var' according to the Ftrl-proximal scheme.
+// EncodeJpegOptimizeSize sets the optional optimize_size attribute to value.
 //
-// That is for rows we have grad for, we update var, accum and linear as follows:
-// accum_new = accum + grad * grad
-// linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-// accum = accum_new
+// value: If True, spend CPU/RAM to reduce size with no quality change.
+// If not specified, defaults to false
+func EncodeJpegOptimizeSize(value bool) EncodeJpegAttr {
+	return func(m optionalAttr) {
+		m["optimize_size"] = value
+	}
+}
+
+// EncodeJpegChromaDownsampling sets the optional chroma_downsampling attribute to value.
+//
+// value: See http://en.wikipedia.org/wiki/Chroma_subsampling.
+// If not specified, defaults to true
+func EncodeJpegChromaDownsampling(value bool) EncodeJpegAttr {
+	return func(m optionalAttr) {
+		m["chroma_downsampling"] = value
+	}
+}
+
+// EncodeJpegDensityUnit sets the optional density_unit attribute to value.
+//
+// value: Unit used to specify `x_density` and `y_density`:
+// pixels per inch (`'in'`) or centimeter (`'cm'`).
+// If not specified, defaults to "in"
+func EncodeJpegDensityUnit(value string) EncodeJpegAttr {
+	return func(m optionalAttr) {
+		m["density_unit"] = value
+	}
+}
+
+// EncodeJpegXDensity sets the optional x_density attribute to value.
+//
+// value: Horizontal pixels per density unit.
+// If not specified, defaults to 300
+func EncodeJpegXDensity(value int64) EncodeJpegAttr {
+	return func(m optionalAttr) {
+		m["x_density"] = value
+	}
+}
+
+// EncodeJpegYDensity sets the optional y_density attribute to value.
+//
+// value: Vertical pixels per density unit.
+// If not specified, defaults to 300
+func EncodeJpegYDensity(value int64) EncodeJpegAttr {
+	return func(m optionalAttr) {
+		m["y_density"] = value
+	}
+}
+
+// EncodeJpegXmpMetadata sets the optional xmp_metadata attribute to value.
+//
+// value: If not empty, embed this XMP metadata in the image header.
+// If not specified, defaults to ""
+func EncodeJpegXmpMetadata(value string) EncodeJpegAttr {
+	return func(m optionalAttr) {
+		m["xmp_metadata"] = value
+	}
+}
+
+// JPEG-encode an image.
+//
+// `image` is a 3-D uint8 Tensor of shape `[height, width, channels]`.
+//
+// The attr `format` can be used to override the color format of the encoded
+// output.  Values can be:
+//
+// *   `''`: Use a default format based on the number of channels in the image.
+// *   `grayscale`: Output a grayscale JPEG image.  The `channels` dimension
+//     of `image` must be 1.
+// *   `rgb`: Output an RGB JPEG image. The `channels` dimension
+//     of `image` must be 3.
+//
+// If `format` is not specified or is the empty string, a default format is picked
+// in function of the number of channels in `image`:
+//
+// *   1: Output a grayscale image.
+// *   3: Output an RGB image.
 //
 // Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	linear: Should be from a Variable().
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
-//	lr: Scaling factor. Must be a scalar.
-//	l1: L1 regularization. Must be a scalar.
-//	l2: L2 regularization. Must be a scalar.
-//	lr_power: Scaling factor. Must be a scalar.
+//	image: 3-D with shape `[height, width, channels]`.
 //
-// Returns the created operation.
-func ResourceSparseApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlAttr) (o *tf.Operation) {
+// Returns 0-D. JPEG-encoded image.
+func EncodeJpeg(scope *Scope, image tf.Output, optional ...EncodeJpegAttr) (contents tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -16707,56 +16458,57 @@ func ResourceSparseApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, line
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyFtrl",
+		Type: "EncodeJpeg",
 		Input: []tf.Input{
-			var_, accum, linear, grad, indices, lr, l1, l2, lr_power,
+			image,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Component-wise multiplies a SparseTensor by a dense Tensor.
-//
-// The output locations corresponding to the implicitly zero elements in the sparse
-// tensor will be zero (i.e., will not take up storage space), regardless of the
-// contents of the dense tensor (even if it's +/-INF and that INF*0 == NaN).
+// MaxPoolGradAttr is an optional argument to MaxPoolGrad.
+type MaxPoolGradAttr func(optionalAttr)
+
+// MaxPoolGradDataFormat sets the optional data_format attribute to value.
 //
-// *Limitation*: this Op only broadcasts the dense side to the sparse side, but not
-// the other direction.
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func MaxPoolGradDataFormat(value string) MaxPoolGradAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Computes gradients of the maxpooling function.
 //
 // Arguments:
-//	sp_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, possibly not in canonical ordering.
-//	sp_values: 1-D.  `N` non-empty values corresponding to `sp_indices`.
-//	sp_shape: 1-D.  Shape of the input SparseTensor.
-//	dense: `R`-D.  The dense Tensor operand.
+//	orig_input: The original input tensor.
+//	orig_output: The original output tensor.
+//	grad: 4-D.  Gradients w.r.t. the output of `max_pool`.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
+//	padding: The type of padding algorithm to use.
 //
-// Returns 1-D.  The `N` values that are operated on.
-func SparseDenseCwiseMul(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) {
+// Returns Gradients w.r.t. the input to `max_pool`.
+func MaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	opspec := tf.OpSpec{
-		Type: "SparseDenseCwiseMul",
-		Input: []tf.Input{
-			sp_indices, sp_values, sp_shape, dense,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset that emits `components` as a tuple of tensors once.
-func TensorDataset(scope *Scope, components []tf.Output, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
 	}
-	attrs := map[string]interface{}{"output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "TensorDataset",
+		Type: "MaxPoolGrad",
 		Input: []tf.Input{
-			tf.OutputList(components),
+			orig_input, orig_output, grad,
 		},
 		Attrs: attrs,
 	}
@@ -16764,49 +16516,66 @@ func TensorDataset(scope *Scope, components []tf.Output, output_shapes []tf.Shap
 	return op.Output(0)
 }
 
-// NonMaxSuppressionAttr is an optional argument to NonMaxSuppression.
-type NonMaxSuppressionAttr func(optionalAttr)
+// CropAndResizeAttr is an optional argument to CropAndResize.
+type CropAndResizeAttr func(optionalAttr)
 
-// NonMaxSuppressionIouThreshold sets the optional iou_threshold attribute to value.
+// CropAndResizeMethod sets the optional method attribute to value.
 //
-// value: A float representing the threshold for deciding whether boxes
-// overlap too much with respect to IOU.
-// If not specified, defaults to 0.5
-func NonMaxSuppressionIouThreshold(value float32) NonMaxSuppressionAttr {
+// value: A string specifying the interpolation method. Only 'bilinear' is
+// supported for now.
+// If not specified, defaults to "bilinear"
+func CropAndResizeMethod(value string) CropAndResizeAttr {
 	return func(m optionalAttr) {
-		m["iou_threshold"] = value
+		m["method"] = value
 	}
 }
 
-// Greedily selects a subset of bounding boxes in descending order of score,
+// CropAndResizeExtrapolationValue sets the optional extrapolation_value attribute to value.
 //
-// pruning away boxes that have high intersection-over-union (IOU) overlap
-// with previously selected boxes.  Bounding boxes are supplied as
-// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
-// diagonal pair of box corners and the coordinates can be provided as normalized
-// (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
-// is agnostic to where the origin is in the coordinate system.  Note that this
-// algorithm is invariant to orthogonal transformations and translations
-// of the coordinate system; thus translating or reflections of the coordinate
-// system result in the same boxes being selected by the algorithm.
-// The output of this operation is a set of integers indexing into the input
-// collection of bounding boxes representing the selected boxes.  The bounding
-// box coordinates corresponding to the selected indices can then be obtained
-// using the `tf.gather operation`.  For example:
-//   selected_indices = tf.image.non_max_suppression(
-//       boxes, scores, max_output_size, iou_threshold)
-//   selected_boxes = tf.gather(boxes, selected_indices)
+// value: Value used for extrapolation, when applicable.
+// If not specified, defaults to 0
+func CropAndResizeExtrapolationValue(value float32) CropAndResizeAttr {
+	return func(m optionalAttr) {
+		m["extrapolation_value"] = value
+	}
+}
+
+// Extracts crops from the input image tensor and bilinearly resizes them (possibly
+//
+// with aspect ratio change) to a common output size specified by `crop_size`. This
+// is more general than the `crop_to_bounding_box` op which extracts a fixed size
+// slice from the input image and does not allow resizing or aspect ratio change.
+//
+// Returns a tensor with `crops` from the input `image` at positions defined at the
+// bounding box locations in `boxes`. The cropped boxes are all resized (with
+// bilinear interpolation) to a fixed `size = [crop_height, crop_width]`. The
+// result is a 4-D tensor `[num_boxes, crop_height, crop_width, depth]`. The
+// resizing is corner aligned. In particular, if `boxes = [[0, 0, 1, 1]]`, the
+// method will give identical results to using `tf.image.resize_bilinear()`
+// with `align_corners=True`.
 //
 // Arguments:
-//	boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
-//	scores: A 1-D float tensor of shape `[num_boxes]` representing a single
-// score corresponding to each box (each row of boxes).
-//	max_output_size: A scalar integer tensor representing the maximum number of
-// boxes to be selected by non max suppression.
+//	image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
+// Both `image_height` and `image_width` need to be positive.
+//	boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
+// specifies the coordinates of a box in the `box_ind[i]` image and is specified
+// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
+// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
+// `[0, 1]` interval of normalized image height is mapped to
+// `[0, image_height - 1]` in image height coordinates. We do allow `y1` > `y2`, in
+// which case the sampled crop is an up-down flipped version of the original
+// image. The width dimension is treated similarly. Normalized coordinates
+// outside the `[0, 1]` range are allowed, in which case we use
+// `extrapolation_value` to extrapolate the input image values.
+//	box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
+// The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
+//	crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`. All
+// cropped image patches are resized to this size. The aspect ratio of the image
+// content is not preserved. Both `crop_height` and `crop_width` need to be
+// positive.
 //
-// Returns A 1-D integer tensor of shape `[M]` representing the selected
-// indices from the boxes tensor, where `M <= max_output_size`.
-func NonMaxSuppression(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, optional ...NonMaxSuppressionAttr) (selected_indices tf.Output) {
+// Returns A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
+func CropAndResize(scope *Scope, image tf.Output, boxes tf.Output, box_ind tf.Output, crop_size tf.Output, optional ...CropAndResizeAttr) (crops tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -16815,9 +16584,9 @@ func NonMaxSuppression(scope *Scope, boxes tf.Output, scores tf.Output, max_outp
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "NonMaxSuppression",
+		Type: "CropAndResize",
 		Input: []tf.Input{
-			boxes, scores, max_output_size,
+			image, boxes, box_ind, crop_size,
 		},
 		Attrs: attrs,
 	}
@@ -16825,38 +16594,38 @@ func NonMaxSuppression(scope *Scope, boxes tf.Output, scores tf.Output, max_outp
 	return op.Output(0)
 }
 
-// ResourceApplyAdadeltaAttr is an optional argument to ResourceApplyAdadelta.
-type ResourceApplyAdadeltaAttr func(optionalAttr)
+// ResourceApplyPowerSignAttr is an optional argument to ResourceApplyPowerSign.
+type ResourceApplyPowerSignAttr func(optionalAttr)
 
-// ResourceApplyAdadeltaUseLocking sets the optional use_locking attribute to value.
+// ResourceApplyPowerSignUseLocking sets the optional use_locking attribute to value.
 //
-// value: If True, updating of the var, accum and update_accum tensors will be protected by
-// a lock; otherwise the behavior is undefined, but may exhibit less contention.
+// value: If `True`, updating of the var and m tensors is
+// protected by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
 // If not specified, defaults to false
-func ResourceApplyAdadeltaUseLocking(value bool) ResourceApplyAdadeltaAttr {
+func ResourceApplyPowerSignUseLocking(value bool) ResourceApplyPowerSignAttr {
 	return func(m optionalAttr) {
 		m["use_locking"] = value
 	}
 }
 
-// Update '*var' according to the adadelta scheme.
+// Update '*var' according to the AddSign update.
 //
-// accum = rho() * accum + (1 - rho()) * grad.square();
-// update = (update_accum + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad;
-// update_accum = rho() * update_accum + (1 - rho()) * update.square();
-// var -= update;
+// m_t <- beta1 * m_{t-1} + (1 - beta1) * g
+// update <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g
+// variable <- variable - lr_t * update
 //
 // Arguments:
 //	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	accum_update: Should be from a Variable().
+//	m: Should be from a Variable().
 //	lr: Scaling factor. Must be a scalar.
-//	rho: Decay factor. Must be a scalar.
-//	epsilon: Constant factor. Must be a scalar.
+//	logbase: Must be a scalar.
+//	sign_decay: Must be a scalar.
+//	beta: Must be a scalar.
 //	grad: The gradient.
 //
 // Returns the created operation.
-func ResourceApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output, accum_update tf.Output, lr tf.Output, rho tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdadeltaAttr) (o *tf.Operation) {
+func ResourceApplyPowerSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, logbase tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyPowerSignAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -16865,106 +16634,152 @@ func ResourceApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output, accum_
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyAdadelta",
+		Type: "ResourceApplyPowerSign",
 		Input: []tf.Input{
-			var_, accum, accum_update, lr, rho, epsilon, grad,
+			var_, m, lr, logbase, sign_decay, beta, grad,
 		},
 		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
-// StageSizeAttr is an optional argument to StageSize.
-type StageSizeAttr func(optionalAttr)
+// Deprecated. Disallowed in GraphDef version >= 2.
+//
+// DEPRECATED at GraphDef version 2: Use AdjustContrastv2 instead
+func AdjustContrast(scope *Scope, images tf.Output, contrast_factor tf.Output, min_value tf.Output, max_value tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "AdjustContrast",
+		Input: []tf.Input{
+			images, contrast_factor, min_value, max_value,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
 
-// StageSizeCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
+// Table initializer that takes two tensors for keys and values respectively.
 //
-// REQUIRES: value >= 0
-func StageSizeCapacity(value int64) StageSizeAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
+// Arguments:
+//	table_handle: Handle to a table which will be initialized.
+//	keys: Keys of type Tkey.
+//	values: Values of type Tval.
+//
+// Returns the created operation.
+func InitializeTableV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "InitializeTableV2",
+		Input: []tf.Input{
+			table_handle, keys, values,
+		},
 	}
+	return scope.AddOperation(opspec)
 }
 
-// StageSizeMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
+// PrintAttr is an optional argument to Print.
+type PrintAttr func(optionalAttr)
+
+// PrintMessage sets the optional message attribute to value.
 //
-// REQUIRES: value >= 0
-func StageSizeMemoryLimit(value int64) StageSizeAttr {
+// value: A string, prefix of the error message.
+// If not specified, defaults to ""
+func PrintMessage(value string) PrintAttr {
 	return func(m optionalAttr) {
-		m["memory_limit"] = value
+		m["message"] = value
 	}
 }
 
-// StageSizeContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func StageSizeContainer(value string) StageSizeAttr {
+// PrintFirstN sets the optional first_n attribute to value.
+//
+// value: Only log `first_n` number of times. -1 disables logging.
+// If not specified, defaults to -1
+func PrintFirstN(value int64) PrintAttr {
 	return func(m optionalAttr) {
-		m["container"] = value
+		m["first_n"] = value
 	}
 }
 
-// StageSizeSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func StageSizeSharedName(value string) StageSizeAttr {
+// PrintSummarize sets the optional summarize attribute to value.
+//
+// value: Only print this many entries of each tensor.
+// If not specified, defaults to 3
+func PrintSummarize(value int64) PrintAttr {
 	return func(m optionalAttr) {
-		m["shared_name"] = value
+		m["summarize"] = value
 	}
 }
 
-// Op returns the number of elements in the underlying container.
-func StageSize(scope *Scope, dtypes []tf.DataType, optional ...StageSizeAttr) (size tf.Output) {
+// Prints a list of tensors.
+//
+// Passes `input` through to `output` and prints `data` when evaluating.
+//
+// Arguments:
+//	input: The tensor passed to `output`
+//	data: A list of tensors to print out when op is evaluated.
+//
+// Returns = The unmodified `input` tensor
+func Print(scope *Scope, input tf.Output, data []tf.Output, optional ...PrintAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "StageSize",
-
+		Type: "Print",
+		Input: []tf.Input{
+			input, tf.OutputList(data),
+		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// SizeAttr is an optional argument to Size.
-type SizeAttr func(optionalAttr)
-
-// SizeOutType sets the optional out_type attribute to value.
-// If not specified, defaults to DT_INT32
-func SizeOutType(value tf.DataType) SizeAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
+// Outputs a `Summary` protocol buffer with a tensor and per-plugin data.
+//
+// Arguments:
+//	tag: A string attached to this summary. Used for organization in TensorBoard.
+//	tensor: A tensor to serialize.
+//	serialized_summary_metadata: A serialized SummaryMetadata proto. Contains plugin
+// data.
+func TensorSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, serialized_summary_metadata tf.Output) (summary tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorSummaryV2",
+		Input: []tf.Input{
+			tag, tensor, serialized_summary_metadata,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Returns the size of a tensor.
+// Creates a dataset that asynchronously prefetches elements from `input_dataset`.
 //
-// This operation returns an integer representing the number of elements in
-// `input`.
+// Arguments:
 //
-// For example:
+//	buffer_size: The maximum number of elements to buffer in an iterator over
+// this dataset.
 //
-// ```
-// # 't' is [[[1, 1,, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]]
-// size(t) ==> 12
-// ```
-func Size(scope *Scope, input tf.Output, optional ...SizeAttr) (output tf.Output) {
+//
+func PrefetchDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "Size",
+		Type: "PrefetchDataset",
 		Input: []tf.Input{
-			input,
+			input_dataset, buffer_size,
 		},
 		Attrs: attrs,
 	}
@@ -16972,46 +16787,48 @@ func Size(scope *Scope, input tf.Output, optional ...SizeAttr) (output tf.Output
 	return op.Output(0)
 }
 
-// ResourceApplyRMSPropAttr is an optional argument to ResourceApplyRMSProp.
-type ResourceApplyRMSPropAttr func(optionalAttr)
+// TensorSummaryAttr is an optional argument to TensorSummary.
+type TensorSummaryAttr func(optionalAttr)
 
-// ResourceApplyRMSPropUseLocking sets the optional use_locking attribute to value.
+// TensorSummaryDescription sets the optional description attribute to value.
 //
-// value: If `True`, updating of the var, ms, and mom tensors is protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyRMSPropUseLocking(value bool) ResourceApplyRMSPropAttr {
+// value: A json-encoded SummaryDescription proto.
+// If not specified, defaults to ""
+func TensorSummaryDescription(value string) TensorSummaryAttr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["description"] = value
 	}
 }
 
-// Update '*var' according to the RMSProp algorithm.
+// TensorSummaryLabels sets the optional labels attribute to value.
 //
-// Note that in dense implementation of this algorithm, ms and mom will
-// update even if the grad is zero, but in this sparse implementation, ms
-// and mom will not update in iterations during which the grad is zero.
+// value: An unused list of strings.
+// If not specified, defaults to <>
+func TensorSummaryLabels(value []string) TensorSummaryAttr {
+	return func(m optionalAttr) {
+		m["labels"] = value
+	}
+}
+
+// TensorSummaryDisplayName sets the optional display_name attribute to value.
 //
-// mean_square = decay * mean_square + (1-decay) * gradient ** 2
-// Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
+// value: An unused string.
+// If not specified, defaults to ""
+func TensorSummaryDisplayName(value string) TensorSummaryAttr {
+	return func(m optionalAttr) {
+		m["display_name"] = value
+	}
+}
+
+// Outputs a `Summary` protocol buffer with a tensor.
 //
-// ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
-// var <- var - mom
+// This op is being phased out in favor of TensorSummaryV2, which lets callers pass
+// a tag as well as a serialized SummaryMetadata proto string that contains
+// plugin-specific data. We will keep this op to maintain backwards compatibility.
 //
 // Arguments:
-//	var_: Should be from a Variable().
-//	ms: Should be from a Variable().
-//	mom: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	rho: Decay rate. Must be a scalar.
-//
-//	epsilon: Ridge term. Must be a scalar.
-//	grad: The gradient.
-//
-// Returns the created operation.
-func ResourceApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyRMSPropAttr) (o *tf.Operation) {
+//	tensor: A tensor to serialize.
+func TensorSummary(scope *Scope, tensor tf.Output, optional ...TensorSummaryAttr) (summary tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -17020,198 +16837,227 @@ func ResourceApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Out
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyRMSProp",
+		Type: "TensorSummary",
 		Input: []tf.Input{
-			var_, ms, mom, lr, rho, momentum, epsilon, grad,
+			tensor,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// ResourceApplyAdamAttr is an optional argument to ResourceApplyAdam.
-type ResourceApplyAdamAttr func(optionalAttr)
-
-// ResourceApplyAdamUseLocking sets the optional use_locking attribute to value.
+// Computes the gradient for the tanh of `x` wrt its input.
 //
-// value: If `True`, updating of the var, m, and v tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyAdamUseLocking(value bool) ResourceApplyAdamAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
+// Specifically, `grad = dy * (1 - y*y)`, where `y = tanh(x)`, and `dy`
+// is the corresponding input gradient.
+func TanhGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
-}
-
-// ResourceApplyAdamUseNesterov sets the optional use_nesterov attribute to value.
-//
-// value: If `True`, uses the nesterov update.
-// If not specified, defaults to false
-func ResourceApplyAdamUseNesterov(value bool) ResourceApplyAdamAttr {
-	return func(m optionalAttr) {
-		m["use_nesterov"] = value
+	opspec := tf.OpSpec{
+		Type: "TanhGrad",
+		Input: []tf.Input{
+			y, dy,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Update '*var' according to the Adam algorithm.
+// Outputs a `Summary` protocol buffer with scalar values.
 //
-// lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)
-// m_t <- beta1 * m_{t-1} + (1 - beta1) * g_t
-// v_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t
-// variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)
+// The input `tags` and `values` must have the same shape.  The generated summary
+// has a summary value for each tag-value pair in `tags` and `values`.
 //
 // Arguments:
-//	var_: Should be from a Variable().
-//	m: Should be from a Variable().
-//	v: Should be from a Variable().
-//	beta1_power: Must be a scalar.
-//	beta2_power: Must be a scalar.
-//	lr: Scaling factor. Must be a scalar.
-//	beta1: Momentum factor. Must be a scalar.
-//	beta2: Momentum factor. Must be a scalar.
-//	epsilon: Ridge term. Must be a scalar.
-//	grad: The gradient.
+//	tags: Tags for the summary.
+//	values: Same shape as `tags.  Values for the summary.
 //
-// Returns the created operation.
-func ResourceApplyAdam(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, beta2_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdamAttr) (o *tf.Operation) {
+// Returns Scalar.  Serialized `Summary` protocol buffer.
+func ScalarSummary(scope *Scope, tags tf.Output, values tf.Output) (summary tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyAdam",
+		Type: "ScalarSummary",
 		Input: []tf.Input{
-			var_, m, v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad,
+			tags, values,
 		},
-		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// 3D fast Fourier transform.
-//
-// Computes the 3-dimensional discrete Fourier transform over the inner-most 3
-// dimensions of `input`.
+// Outputs a `Summary` protocol buffer with a histogram.
 //
-// Arguments:
-//	input: A complex64 tensor.
+// The generated
+// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
+// has one summary value containing a histogram for `values`.
 //
-// Returns A complex64 tensor of the same shape as `input`. The inner-most 3
-//   dimensions of `input` are replaced with their 3D Fourier transform.
+// This op reports an `InvalidArgument` error if any value is not finite.
 //
-// @compatibility(numpy)
-// Equivalent to np.fft.fftn with 3 dimensions.
-// @end_compatibility
-func FFT3D(scope *Scope, input tf.Output) (output tf.Output) {
+// Arguments:
+//	tag: Scalar.  Tag to use for the `Summary.Value`.
+//	values: Any shape. Values to use to build the histogram.
+//
+// Returns Scalar. Serialized `Summary` protocol buffer.
+func HistogramSummary(scope *Scope, tag tf.Output, values tf.Output) (summary tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "FFT3D",
+		Type: "HistogramSummary",
 		Input: []tf.Input{
-			input,
+			tag, values,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Deserialize `SparseTensor` from a (serialized) string 3-vector (1-D `Tensor`)
-//
-// object.
+// Computes the number of elements in the given queue.
 //
 // Arguments:
-//	serialized_sparse: 1-D, The serialized `SparseTensor` object. Must have 3 columns.
-//	dtype: The `dtype` of the serialized `SparseTensor` object.
-func DeserializeSparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) {
+//	handle: The handle to a queue.
+//
+// Returns The number of elements in the given queue.
+func QueueSizeV2(scope *Scope, handle tf.Output) (size tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype}
 	opspec := tf.OpSpec{
-		Type: "DeserializeSparse",
+		Type: "QueueSizeV2",
 		Input: []tf.Input{
-			serialized_sparse,
+			handle,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// Elementwise computes the bitwise XOR of `x` and `y`.
+// ImageSummaryAttr is an optional argument to ImageSummary.
+type ImageSummaryAttr func(optionalAttr)
+
+// ImageSummaryMaxImages sets the optional max_images attribute to value.
 //
-// The result will have those bits set, that are different in `x` and `y`. The
-// computation is performed on the underlying representations of `x` and `y`.
-func BitwiseXor(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
+// value: Max number of batch elements to generate images for.
+// If not specified, defaults to 3
+//
+// REQUIRES: value >= 1
+func ImageSummaryMaxImages(value int64) ImageSummaryAttr {
+	return func(m optionalAttr) {
+		m["max_images"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "BitwiseXor",
-		Input: []tf.Input{
-			x, y,
-		},
+}
+
+// ImageSummaryBadColor sets the optional bad_color attribute to value.
+//
+// value: Color to use for pixels with non-finite values.
+// If not specified, defaults to <dtype:DT_UINT8 tensor_shape:<dim:<size:4 > > int_val:255 int_val:0 int_val:0 int_val:255 >
+func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr {
+	return func(m optionalAttr) {
+		m["bad_color"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Creates a summary file writer accessible by the given resource handle.
+// Outputs a `Summary` protocol buffer with images.
+//
+// The summary has up to `max_images` summary values containing images. The
+// images are built from `tensor` which must be 4-D with shape `[batch_size,
+// height, width, channels]` and where `channels` can be:
+//
+// *  1: `tensor` is interpreted as Grayscale.
+// *  3: `tensor` is interpreted as RGB.
+// *  4: `tensor` is interpreted as RGBA.
+//
+// The images have the same number of channels as the input tensor. For float
+// input, the values are normalized one image at a time to fit in the range
+// `[0, 255]`.  `uint8` values are unchanged.  The op uses two different
+// normalization algorithms:
+//
+// *  If the input values are all positive, they are rescaled so the largest one
+//    is 255.
+//
+// *  If any input value is negative, the values are shifted so input value 0.0
+//    is at 127.  They are then rescaled so that either the smallest value is 0,
+//    or the largest one is 255.
+//
+// The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
+// build the `tag` of the summary values:
+//
+// *  If `max_images` is 1, the summary value tag is '*tag*/image'.
+// *  If `max_images` is greater than 1, the summary value tags are
+//    generated sequentially as '*tag*/image/0', '*tag*/image/1', etc.
+//
+// The `bad_color` argument is the color to use in the generated images for
+// non-finite input values.  It is a `unit8` 1-D tensor of length `channels`.
+// Each element must be in the range `[0, 255]` (It represents the value of a
+// pixel in the output image).  Non-finite values in the input tensor are
+// replaced by this tensor in the output image.  The default value is the color
+// red.
 //
 // Arguments:
-//	writer: A handle to the summary writer resource
-//	logdir: Directory where the event file will be written.
-//	max_queue: Size of the queue of pending events and summaries.
-//	flush_millis: How often, in milliseconds, to flush the pending events and
-// summaries to disk.
-//	filename_suffix: Every event file's name is suffixed with this suffix.
+//	tag: Scalar. Used to build the `tag` attribute of the summary values.
+//	tensor: 4-D of shape `[batch_size, height, width, channels]` where
+// `channels` is 1, 3, or 4.
 //
-// Returns the created operation.
-func CreateSummaryFileWriter(scope *Scope, writer tf.Output, logdir tf.Output, max_queue tf.Output, flush_millis tf.Output, filename_suffix tf.Output) (o *tf.Operation) {
+// Returns Scalar. Serialized `Summary` protocol buffer.
+func ImageSummary(scope *Scope, tag tf.Output, tensor tf.Output, optional ...ImageSummaryAttr) (summary tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "CreateSummaryFileWriter",
+		Type: "ImageSummary",
 		Input: []tf.Input{
-			writer, logdir, max_queue, flush_millis, filename_suffix,
+			tag, tensor,
 		},
+		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// EncodeBase64Attr is an optional argument to EncodeBase64.
-type EncodeBase64Attr func(optionalAttr)
+// AudioSummaryV2Attr is an optional argument to AudioSummaryV2.
+type AudioSummaryV2Attr func(optionalAttr)
 
-// EncodeBase64Pad sets the optional pad attribute to value.
+// AudioSummaryV2MaxOutputs sets the optional max_outputs attribute to value.
 //
-// value: Bool whether padding is applied at the ends.
-// If not specified, defaults to false
-func EncodeBase64Pad(value bool) EncodeBase64Attr {
+// value: Max number of batch elements to generate audio for.
+// If not specified, defaults to 3
+//
+// REQUIRES: value >= 1
+func AudioSummaryV2MaxOutputs(value int64) AudioSummaryV2Attr {
 	return func(m optionalAttr) {
-		m["pad"] = value
+		m["max_outputs"] = value
 	}
 }
 
-// Encode strings into web-safe base64 format.
+// Outputs a `Summary` protocol buffer with audio.
 //
-// Refer to the following article for more information on base64 format:
-// en.wikipedia.org/wiki/Base64. Base64 strings may have padding with '=' at the
-// end so that the encoded has length multiple of 4. See Padding section of the
-// link above.
+// The summary has up to `max_outputs` summary values containing audio. The
+// audio is built from `tensor` which must be 3-D with shape `[batch_size,
+// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are
+// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`.
 //
-// Web-safe means that the encoder uses - and _ instead of + and /.
+// The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
+// build the `tag` of the summary values:
+//
+// *  If `max_outputs` is 1, the summary value tag is '*tag*/audio'.
+// *  If `max_outputs` is greater than 1, the summary value tags are
+//    generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc.
 //
 // Arguments:
-//	input: Strings to be encoded.
+//	tag: Scalar. Used to build the `tag` attribute of the summary values.
+//	tensor: 2-D of shape `[batch_size, frames]`.
+//	sample_rate: The sample rate of the signal in hertz.
 //
-// Returns Input strings encoded in base64.
-func EncodeBase64(scope *Scope, input tf.Output, optional ...EncodeBase64Attr) (output tf.Output) {
+// Returns Scalar. Serialized `Summary` protocol buffer.
+func AudioSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...AudioSummaryV2Attr) (summary tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -17220,9 +17066,9 @@ func EncodeBase64(scope *Scope, input tf.Output, optional ...EncodeBase64Attr) (
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "EncodeBase64",
+		Type: "AudioSummaryV2",
 		Input: []tf.Input{
-			input,
+			tag, tensor, sample_rate,
 		},
 		Attrs: attrs,
 	}
@@ -17230,101 +17076,105 @@ func EncodeBase64(scope *Scope, input tf.Output, optional ...EncodeBase64Attr) (
 	return op.Output(0)
 }
 
-// VarHandleOpAttr is an optional argument to VarHandleOp.
-type VarHandleOpAttr func(optionalAttr)
+// AvgPoolAttr is an optional argument to AvgPool.
+type AvgPoolAttr func(optionalAttr)
 
-// VarHandleOpContainer sets the optional container attribute to value.
+// AvgPoolDataFormat sets the optional data_format attribute to value.
 //
-// value: the container this variable is placed in.
-// If not specified, defaults to ""
-func VarHandleOpContainer(value string) VarHandleOpAttr {
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func AvgPoolDataFormat(value string) AvgPoolAttr {
 	return func(m optionalAttr) {
-		m["container"] = value
+		m["data_format"] = value
 	}
 }
 
-// VarHandleOpSharedName sets the optional shared_name attribute to value.
+// Performs average pooling on the input.
 //
-// value: the name by which this variable is referred to.
-// If not specified, defaults to ""
-func VarHandleOpSharedName(value string) VarHandleOpAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Creates a handle to a Variable resource.
+// Each entry in `output` is the mean of the corresponding size `ksize`
+// window in `value`.
 //
 // Arguments:
-//	dtype: the type of this variable. Must agree with the dtypes
-// of all ops using this variable.
-//	shape: The (possibly partially specified) shape of this variable.
-func VarHandleOp(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ...VarHandleOpAttr) (resource tf.Output) {
+//	value: 4-D with shape `[batch, height, width, channels]`.
+//	ksize: The size of the sliding window for each dimension of `value`.
+//	strides: The stride of the sliding window for each dimension of `value`.
+//	padding: The type of padding algorithm to use.
+//
+// Returns The average pooled output tensor.
+func AvgPool(scope *Scope, value tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPoolAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype, "shape": shape}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "VarHandleOp",
-
+		Type: "AvgPool",
+		Input: []tf.Input{
+			value,
+		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Output a fact about factorials.
-func Fact(scope *Scope) (fact tf.Output) {
+// Merges summaries.
+//
+// This op creates a
+// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
+// protocol buffer that contains the union of all the values in the input
+// summaries.
+//
+// When the Op is run, it reports an `InvalidArgument` error if multiple values
+// in the summaries to merge use the same tag.
+//
+// Arguments:
+//	inputs: Can be of any shape.  Each must contain serialized `Summary` protocol
+// buffers.
+//
+// Returns Scalar. Serialized `Summary` protocol buffer.
+func MergeSummary(scope *Scope, inputs []tf.Output) (summary tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Fact",
+		Type: "MergeSummary",
+		Input: []tf.Input{
+			tf.OutputList(inputs),
+		},
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// StatelessRandomUniformAttr is an optional argument to StatelessRandomUniform.
-type StatelessRandomUniformAttr func(optionalAttr)
-
-// StatelessRandomUniformDtype sets the optional dtype attribute to value.
-//
-// value: The type of the output.
-// If not specified, defaults to DT_FLOAT
-func StatelessRandomUniformDtype(value tf.DataType) StatelessRandomUniformAttr {
-	return func(m optionalAttr) {
-		m["dtype"] = value
-	}
-}
-
-// Outputs deterministic pseudorandom random values from a uniform distribution.
-//
-// The generated values follow a uniform distribution in the range `[0, 1)`. The
-// lower bound 0 is included in the range, while the upper bound 1 is excluded.
-//
-// The outputs are a deterministic function of `shape` and `seed`.
+	return op.Output(0)
+}
+
+// Computes the gradient of morphological 2-D dilation with respect to the filter.
 //
 // Arguments:
-//	shape: The shape of the output tensor.
-//	seed: 2 seeds (shape [2]).
+//	input: 4-D with shape `[batch, in_height, in_width, depth]`.
+//	filter: 3-D with shape `[filter_height, filter_width, depth]`.
+//	out_backprop: 4-D with shape `[batch, out_height, out_width, depth]`.
+//	strides: 1-D of length 4. The stride of the sliding window for each dimension of
+// the input tensor. Must be: `[1, stride_height, stride_width, 1]`.
+//	rates: 1-D of length 4. The input stride for atrous morphological dilation.
+// Must be: `[1, rate_height, rate_width, 1]`.
+//	padding: The type of padding algorithm to use.
 //
-// Returns Random values with specified shape.
-func StatelessRandomUniform(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomUniformAttr) (output tf.Output) {
+// Returns 3-D with shape `[filter_height, filter_width, depth]`.
+func Dilation2DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, rates []int64, padding string) (filter_backprop tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding}
 	opspec := tf.OpSpec{
-		Type: "StatelessRandomUniform",
+		Type: "Dilation2DBackpropFilter",
 		Input: []tf.Input{
-			shape, seed,
+			input, filter, out_backprop,
 		},
 		Attrs: attrs,
 	}
@@ -17332,49 +17182,55 @@ func StatelessRandomUniform(scope *Scope, shape tf.Output, seed tf.Output, optio
 	return op.Output(0)
 }
 
-// PrintAttr is an optional argument to Print.
-type PrintAttr func(optionalAttr)
+// AddSparseToTensorsMapAttr is an optional argument to AddSparseToTensorsMap.
+type AddSparseToTensorsMapAttr func(optionalAttr)
 
-// PrintMessage sets the optional message attribute to value.
+// AddSparseToTensorsMapContainer sets the optional container attribute to value.
 //
-// value: A string, prefix of the error message.
+// value: The container name for the `SparseTensorsMap` created by this op.
 // If not specified, defaults to ""
-func PrintMessage(value string) PrintAttr {
+func AddSparseToTensorsMapContainer(value string) AddSparseToTensorsMapAttr {
 	return func(m optionalAttr) {
-		m["message"] = value
+		m["container"] = value
 	}
 }
 
-// PrintFirstN sets the optional first_n attribute to value.
+// AddSparseToTensorsMapSharedName sets the optional shared_name attribute to value.
 //
-// value: Only log `first_n` number of times. -1 disables logging.
-// If not specified, defaults to -1
-func PrintFirstN(value int64) PrintAttr {
+// value: The shared name for the `SparseTensorsMap` created by this op.
+// If blank, the new Operation's unique name is used.
+// If not specified, defaults to ""
+func AddSparseToTensorsMapSharedName(value string) AddSparseToTensorsMapAttr {
 	return func(m optionalAttr) {
-		m["first_n"] = value
+		m["shared_name"] = value
 	}
 }
 
-// PrintSummarize sets the optional summarize attribute to value.
+// Add a `SparseTensor` to a `SparseTensorsMap` return its handle.
 //
-// value: Only print this many entries of each tensor.
-// If not specified, defaults to 3
-func PrintSummarize(value int64) PrintAttr {
-	return func(m optionalAttr) {
-		m["summarize"] = value
-	}
-}
-
-// Prints a list of tensors.
+// A `SparseTensor` is represented by three tensors: `sparse_indices`,
+// `sparse_values`, and `sparse_shape`.
 //
-// Passes `input` through to `output` and prints `data` when evaluating.
+// This operator takes the given `SparseTensor` and adds it to a container
+// object (a `SparseTensorsMap`).  A unique key within this container is generated
+// in the form of an `int64`, and this is the value that is returned.
+//
+// The `SparseTensor` can then be read out as part of a minibatch by passing
+// the key as a vector element to `TakeManySparseFromTensorsMap`.  To ensure
+// the correct `SparseTensorsMap` is accessed, ensure that the same
+// `container` and `shared_name` are passed to that Op.  If no `shared_name`
+// is provided here, instead use the *name* of the Operation created by calling
+// `AddSparseToTensorsMap` as the `shared_name` passed to
+// `TakeManySparseFromTensorsMap`.  Ensure the Operations are colocated.
 //
 // Arguments:
-//	input: The tensor passed to `output`
-//	data: A list of tensors to print out when op is evaluated.
+//	sparse_indices: 2-D.  The `indices` of the `SparseTensor`.
+//	sparse_values: 1-D.  The `values` of the `SparseTensor`.
+//	sparse_shape: 1-D.  The `shape` of the `SparseTensor`.
 //
-// Returns = The unmodified `input` tensor
-func Print(scope *Scope, input tf.Output, data []tf.Output, optional ...PrintAttr) (output tf.Output) {
+// Returns 0-D.  The handle of the `SparseTensor` now stored in the
+// `SparseTensorsMap`.
+func AddSparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...AddSparseToTensorsMapAttr) (sparse_handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -17383,9 +17239,9 @@ func Print(scope *Scope, input tf.Output, data []tf.Output, optional ...PrintAtt
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Print",
+		Type: "AddSparseToTensorsMap",
 		Input: []tf.Input{
-			input, tf.OutputList(data),
+			sparse_indices, sparse_values, sparse_shape,
 		},
 		Attrs: attrs,
 	}
@@ -17393,280 +17249,217 @@ func Print(scope *Scope, input tf.Output, data []tf.Output, optional ...PrintAtt
 	return op.Output(0)
 }
 
-// LoadAndRemapMatrixAttr is an optional argument to LoadAndRemapMatrix.
-type LoadAndRemapMatrixAttr func(optionalAttr)
-
-// LoadAndRemapMatrixMaxRowsInMemory sets the optional max_rows_in_memory attribute to value.
-//
-// value: The maximum number of rows to load from the checkpoint at
-// once. If less than or equal to 0, the entire matrix will be loaded into
-// memory. Setting this arg trades increased disk reads for lower memory usage.
-// If not specified, defaults to -1
-func LoadAndRemapMatrixMaxRowsInMemory(value int64) LoadAndRemapMatrixAttr {
-	return func(m optionalAttr) {
-		m["max_rows_in_memory"] = value
-	}
-}
-
-// Loads a 2-D (matrix) `Tensor` with name `old_tensor_name` from the checkpoint
-//
-// at `ckpt_path` and potentially reorders its rows and columns using the
-// specified remappings.
-//
-// Most users should use one of the wrapper initializers (such as
-// `tf.contrib.framework.load_and_remap_matrix_initializer`) instead of this
-// function directly.
-//
-// The remappings are 1-D tensors with the following properties:
-//
-// * `row_remapping` must have exactly `num_rows` entries. Row `i` of the output
-//   matrix will be initialized from the row corresponding to index
-//   `row_remapping[i]` in the old `Tensor` from the checkpoint.
-// * `col_remapping` must have either 0 entries (indicating that no column
-//   reordering is needed) or `num_cols` entries. If specified, column `j` of the
-//   output matrix will be initialized from the column corresponding to index
-//   `col_remapping[j]` in the old `Tensor` from the checkpoint.
-// * A value of -1 in either of the remappings signifies a "missing" entry. In that
-//   case, values from the `initializing_values` tensor will be used to fill that
-//   missing row or column. If `row_remapping` has `r` missing entries and
-//   `col_remapping` has `c` missing entries, then the following condition must be
-//   true:
-//
-// `(r * num_cols) + (c * num_rows) - (r * c) == len(initializing_values)`
-//
-// The remapping tensors can be generated using the GenerateVocabRemapping op.
-//
-// As an example, with row_remapping = [1, 0, -1], col_remapping = [0, 2, -1],
-// initializing_values = [0.5, -0.5, 0.25, -0.25, 42], and w(i, j) representing
-// the value from row i, column j of the old tensor in the checkpoint, the output
-// matrix will look like the following:
+// Writes a `Summary` protocol buffer with scalar values.
 //
-// [[w(1, 0),  w(1, 2),  0.5],
-//  [w(0, 0),  w(0, 2), -0.5],
-//  [0.25,    -0.25,      42]]
+// The input `tag` and `value` must have the scalars.
 //
 // Arguments:
-//	ckpt_path: Path to the TensorFlow checkpoint (version 2, `TensorBundle`) from
-// which the old matrix `Tensor` will be loaded.
-//	old_tensor_name: Name of the 2-D `Tensor` to load from checkpoint.
-//	row_remapping: An int `Tensor` of row remappings (generally created by
-// `generate_vocab_remapping`).  Even if no row remapping is needed, this must
-// still be an index-valued Tensor (e.g. [0, 1, 2, ...]), or a shifted
-// index-valued `Tensor` (e.g. [8, 9, 10, ...], for partitioned `Variables`).
-//	col_remapping: An int `Tensor` of column remappings (generally created by
-// `generate_vocab_remapping`).  May be a size-0 `Tensor` if only row remapping
-// is to be done (e.g. column ordering is the same).
-//	initializing_values: A float `Tensor` containing  values to fill in for cells
-// in the output matrix that are not loaded from the checkpoint. Length must be
-// exactly the same as the number of missing / new cells.
-//	num_rows: Number of rows (length of the 1st dimension) in the output matrix.
-//	num_cols: Number of columns (length of the 2nd dimension) in the output matrix.
+//	writer: A handle to a summary writer.
+//	step: The step to write the summary for.
+//	tag: Tag for the summary.
+//	value: Value for the summary.
 //
-// Returns Output matrix containing existing values loaded from the
-// checkpoint, and with any missing values filled in from initializing_values.
-func LoadAndRemapMatrix(scope *Scope, ckpt_path tf.Output, old_tensor_name tf.Output, row_remapping tf.Output, col_remapping tf.Output, initializing_values tf.Output, num_rows int64, num_cols int64, optional ...LoadAndRemapMatrixAttr) (output_matrix tf.Output) {
+// Returns the created operation.
+func WriteScalarSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, value tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_rows": num_rows, "num_cols": num_cols}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "LoadAndRemapMatrix",
+		Type: "WriteScalarSummary",
 		Input: []tf.Input{
-			ckpt_path, old_tensor_name, row_remapping, col_remapping, initializing_values,
+			writer, step, tag, value,
 		},
-		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// Checks whether a resource handle-based variable has been initialized.
+// Computes the matrix exponential of one or more square matrices:
+//
+// exp(A) = \sum_{n=0}^\infty A^n/n!
+//
+// The exponential is computed using a combination of the scaling and squaring
+// method and the Pade approximation. Details can be founds in:
+// Nicholas J. Higham, "The scaling and squaring method for the matrix exponential
+// revisited," SIAM J. Matrix Anal. Applic., 26:1179-1193, 2005.
+//
+// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+// form square matrices. The output is a tensor of the same shape as the input
+// containing the exponential for all input submatrices `[..., :, :]`.
 //
 // Arguments:
-//	resource: the input resource handle.
+//	input: Shape is `[..., M, M]`.
 //
-// Returns a scalar boolean which is true if the variable has been
-// initialized.
-func VarIsInitializedOp(scope *Scope, resource tf.Output) (is_initialized tf.Output) {
+// Returns Shape is `[..., M, M]`.
+//
+// @compatibility(scipy)
+// Equivalent to scipy.linalg.expm
+// @end_compatibility
+func MatrixExponential(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "VarIsInitializedOp",
+		Type: "MatrixExponential",
 		Input: []tf.Input{
-			resource,
+			input,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// ResizeAreaAttr is an optional argument to ResizeArea.
-type ResizeAreaAttr func(optionalAttr)
+// QueueDequeueUpToV2Attr is an optional argument to QueueDequeueUpToV2.
+type QueueDequeueUpToV2Attr func(optionalAttr)
 
-// ResizeAreaAlignCorners sets the optional align_corners attribute to value.
+// QueueDequeueUpToV2TimeoutMs sets the optional timeout_ms attribute to value.
 //
-// value: If true, rescale input by (new_height - 1) / (height - 1), which
-// exactly aligns the 4 corners of images and resized images. If false, rescale
-// by new_height / height. Treat similarly the width dimension.
-// If not specified, defaults to false
-func ResizeAreaAlignCorners(value bool) ResizeAreaAttr {
+// value: If the queue has fewer than n elements, this operation
+// will block for up to timeout_ms milliseconds.
+// Note: This option is not supported yet.
+// If not specified, defaults to -1
+func QueueDequeueUpToV2TimeoutMs(value int64) QueueDequeueUpToV2Attr {
 	return func(m optionalAttr) {
-		m["align_corners"] = value
+		m["timeout_ms"] = value
 	}
 }
 
-// Resize `images` to `size` using area interpolation.
+// Dequeues `n` tuples of one or more tensors from the given queue.
 //
-// Input images can be of different types but output images are always float.
+// This operation is not supported by all queues.  If a queue does not support
+// DequeueUpTo, then an Unimplemented error is returned.
 //
-// Each output pixel is computed by first transforming the pixel's footprint into
-// the input tensor and then averaging the pixels that intersect the footprint. An
-// input pixel's contribution to the average is weighted by the fraction of its
-// area that intersects the footprint.  This is the same as OpenCV's INTER_AREA.
+// If the queue is closed and there are more than 0 but less than `n`
+// elements remaining, then instead of returning an OutOfRange error like
+// QueueDequeueMany, less than `n` elements are returned immediately.  If
+// the queue is closed and there are 0 elements left in the queue, then
+// an OutOfRange error is returned just like in QueueDequeueMany.
+// Otherwise the behavior is identical to QueueDequeueMany:
+//
+// This operation concatenates queue-element component tensors along the
+// 0th dimension to make a single component tensor.  All of the components
+// in the dequeued tuple will have size n in the 0th dimension.
+//
+// This operation has `k` outputs, where `k` is the number of components in
+// the tuples stored in the given queue, and output `i` is the ith
+// component of the dequeued tuple.
 //
 // Arguments:
-//	images: 4-D with shape `[batch, height, width, channels]`.
-//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
-// new size for the images.
+//	handle: The handle to a queue.
+//	n: The number of tuples to dequeue.
+//	component_types: The type of each component in a tuple.
 //
-// Returns 4-D with shape
-// `[batch, new_height, new_width, channels]`.
-func ResizeArea(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeAreaAttr) (resized_images tf.Output) {
+// Returns One or more tensors that were dequeued as a tuple.
+func QueueDequeueUpToV2(scope *Scope, handle tf.Output, n tf.Output, component_types []tf.DataType, optional ...QueueDequeueUpToV2Attr) (components []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"component_types": component_types}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResizeArea",
+		Type: "QueueDequeueUpToV2",
 		Input: []tf.Input{
-			images, size,
+			handle, n,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RealAttr is an optional argument to Real.
-type RealAttr func(optionalAttr)
-
-// RealTout sets the optional Tout attribute to value.
-// If not specified, defaults to DT_FLOAT
-func RealTout(value tf.DataType) RealAttr {
-	return func(m optionalAttr) {
-		m["Tout"] = value
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
+		scope.UpdateErr("QueueDequeueUpToV2", err)
+		return
 	}
+	return components
 }
 
-// Returns the real part of a complex number.
+// Computes the Cholesky decomposition of one or more square matrices.
 //
-// Given a tensor `input` of complex numbers, this operation returns a tensor of
-// type `float` that is the real part of each element in `input`. All elements in
-// `input` must be complex numbers of the form \\(a + bj\\), where *a* is the real
-//  part returned by this operation and *b* is the imaginary part.
+// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+// form square matrices.
 //
-// For example:
+// The input has to be symmetric and positive definite. Only the lower-triangular
+// part of the input will be used for this operation. The upper-triangular part
+// will not be read.
 //
-// ```
-// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
-// tf.real(input) ==> [-2.25, 3.25]
-// ```
-func Real(scope *Scope, input tf.Output, optional ...RealAttr) (output tf.Output) {
+// The output is a tensor of the same shape as the input
+// containing the Cholesky decompositions for all input submatrices `[..., :, :]`.
+//
+// **Note**: The gradient computation on GPU is faster for large matrices but
+// not for large batch dimensions when the submatrices are small. In this
+// case it might be faster to use the CPU.
+//
+// Arguments:
+//	input: Shape is `[..., M, M]`.
+//
+// Returns Shape is `[..., M, M]`.
+func Cholesky(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "Real",
+		Type: "Cholesky",
 		Input: []tf.Input{
 			input,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// 2D real-valued fast Fourier transform.
-//
-// Computes the 2-dimensional discrete Fourier transform of a real-valued signal
-// over the inner-most 2 dimensions of `input`.
-//
-// Since the DFT of a real signal is Hermitian-symmetric, `RFFT2D` only returns the
-// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension
-// of `output`: the zero-frequency term, followed by the `fft_length / 2`
-// positive-frequency terms.
+// Writes contents to the file at input filename. Creates file and recursively
 //
-// Along each axis `RFFT2D` is computed on, if `fft_length` is smaller than the
-// corresponding dimension of `input`, the dimension is cropped. If it is larger,
-// the dimension is padded with zeros.
+// creates directory if not existing.
 //
 // Arguments:
-//	input: A float32 tensor.
-//	fft_length: An int32 tensor of shape [2]. The FFT length for each dimension.
-//
-// Returns A complex64 tensor of the same rank as `input`. The inner-most 2
-//   dimensions of `input` are replaced with their 2D Fourier transform. The
-//   inner-most dimension contains `fft_length / 2 + 1` unique frequency
-//   components.
+//	filename: scalar. The name of the file to which we write the contents.
+//	contents: scalar. The content to be written to the output file.
 //
-// @compatibility(numpy)
-// Equivalent to np.fft.rfft2
-// @end_compatibility
-func RFFT2D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
+// Returns the created operation.
+func WriteFile(scope *Scope, filename tf.Output, contents tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "RFFT2D",
+		Type: "WriteFile",
 		Input: []tf.Input{
-			input, fft_length,
+			filename, contents,
 		},
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// ResourceSparseApplyAdagradAttr is an optional argument to ResourceSparseApplyAdagrad.
-type ResourceSparseApplyAdagradAttr func(optionalAttr)
+// AllAttr is an optional argument to All.
+type AllAttr func(optionalAttr)
 
-// ResourceSparseApplyAdagradUseLocking sets the optional use_locking attribute to value.
+// AllKeepDims sets the optional keep_dims attribute to value.
 //
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
+// value: If true, retain reduced dimensions with length 1.
 // If not specified, defaults to false
-func ResourceSparseApplyAdagradUseLocking(value bool) ResourceSparseApplyAdagradAttr {
+func AllKeepDims(value bool) AllAttr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["keep_dims"] = value
 	}
 }
 
-// Update relevant entries in '*var' and '*accum' according to the adagrad scheme.
+// Computes the "logical and" of elements across dimensions of a tensor.
 //
-// That is for rows we have grad for, we update var and accum as follows:
-// accum += grad * grad
-// var -= lr * grad * (1 / sqrt(accum))
+// Reduces `input` along the dimensions given in `axis`. Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `axis`. If `keep_dims` is true, the reduced dimensions are
+// retained with length 1.
 //
 // Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	lr: Learning rate. Must be a scalar.
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
+//	input: The tensor to reduce.
+//	axis: The dimensions to reduce. Must be in the range
+// `[-rank(input), rank(input))`.
 //
-// Returns the created operation.
-func ResourceSparseApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdagradAttr) (o *tf.Operation) {
+// Returns The reduced tensor.
+func All(scope *Scope, input tf.Output, axis tf.Output, optional ...AllAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -17675,124 +17468,76 @@ func ResourceSparseApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, l
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyAdagrad",
+		Type: "All",
 		Input: []tf.Input{
-			var_, accum, lr, grad, indices,
+			input, axis,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Creates a dataset that zips together `input_datasets`.
-func ZipDataset(scope *Scope, input_datasets []tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+// Computes the Eigen Decomposition of a batch of square self-adjoint matrices.
+//
+// DEPRECATED at GraphDef version 11: Use SelfAdjointEigV2 instead.
+//
+// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+// form square matrices, with the same constraints as the single matrix
+// SelfAdjointEig.
+//
+// The result is a [..., M+1, M] matrix with [..., 0,:] containing the
+// eigenvalues, and subsequent [...,1:, :] containing the eigenvectors.
+//
+// Arguments:
+//	input: Shape is `[..., M, M]`.
+//
+// Returns Shape is `[..., M+1, M]`.
+func SelfAdjointEig(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "ZipDataset",
+		Type: "SelfAdjointEig",
 		Input: []tf.Input{
-			tf.OutputList(input_datasets),
+			input,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// MutableDenseHashTableV2Attr is an optional argument to MutableDenseHashTableV2.
-type MutableDenseHashTableV2Attr func(optionalAttr)
-
-// MutableDenseHashTableV2Container sets the optional container attribute to value.
-//
-// value: If non-empty, this table is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func MutableDenseHashTableV2Container(value string) MutableDenseHashTableV2Attr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// MutableDenseHashTableV2SharedName sets the optional shared_name attribute to value.
-//
-// value: If non-empty, this table is shared under the given name across
-// multiple sessions.
-// If not specified, defaults to ""
-func MutableDenseHashTableV2SharedName(value string) MutableDenseHashTableV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// MutableDenseHashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value.
-// If not specified, defaults to false
-func MutableDenseHashTableV2UseNodeNameSharing(value bool) MutableDenseHashTableV2Attr {
-	return func(m optionalAttr) {
-		m["use_node_name_sharing"] = value
-	}
-}
-
-// MutableDenseHashTableV2ValueShape sets the optional value_shape attribute to value.
-//
-// value: The shape of each value.
-// If not specified, defaults to <>
-func MutableDenseHashTableV2ValueShape(value tf.Shape) MutableDenseHashTableV2Attr {
-	return func(m optionalAttr) {
-		m["value_shape"] = value
-	}
-}
-
-// MutableDenseHashTableV2InitialNumBuckets sets the optional initial_num_buckets attribute to value.
-//
-// value: The initial number of hash table buckets. Must be a power
-// to 2.
-// If not specified, defaults to 131072
-func MutableDenseHashTableV2InitialNumBuckets(value int64) MutableDenseHashTableV2Attr {
-	return func(m optionalAttr) {
-		m["initial_num_buckets"] = value
-	}
-}
-
-// MutableDenseHashTableV2MaxLoadFactor sets the optional max_load_factor attribute to value.
-//
-// value: The maximum ratio between number of entries and number of
-// buckets before growing the table. Must be between 0 and 1.
-// If not specified, defaults to 0.8
-func MutableDenseHashTableV2MaxLoadFactor(value float32) MutableDenseHashTableV2Attr {
-	return func(m optionalAttr) {
-		m["max_load_factor"] = value
-	}
-}
-
-// Creates an empty hash table that uses tensors as the backing store.
-//
-// It uses "open addressing" with quadratic reprobing to resolve
-// collisions.
-//
-// This op creates a mutable hash table, specifying the type of its keys and
-// values. Each value must be a scalar. Data can be inserted into the table using
-// the insert operations. It does not support the initialization operation.
+// Computes softplus gradients for a softplus operation.
 //
 // Arguments:
-//	empty_key: The key used to represent empty key buckets internally. Must not
-// be used in insert or lookup operations.
-//	value_dtype: Type of the table values.
+//	gradients: The backpropagated gradients to the corresponding softplus operation.
+//	features: The features passed as input to the corresponding softplus operation.
 //
-// Returns Handle to a table.
-func MutableDenseHashTableV2(scope *Scope, empty_key tf.Output, value_dtype tf.DataType, optional ...MutableDenseHashTableV2Attr) (table_handle tf.Output) {
+// Returns The gradients: `gradients / (1 + exp(-features))`.
+func SoftplusGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"value_dtype": value_dtype}
-	for _, a := range optional {
-		a(attrs)
+	opspec := tf.OpSpec{
+		Type: "SoftplusGrad",
+		Input: []tf.Input{
+			gradients, features,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that contains the unique elements of `input_dataset`.
+func UniqueDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "MutableDenseHashTableV2",
+		Type: "UniqueDataset",
 		Input: []tf.Input{
-			empty_key,
+			input_dataset,
 		},
 		Attrs: attrs,
 	}
@@ -17800,66 +17545,38 @@ func MutableDenseHashTableV2(scope *Scope, empty_key tf.Output, value_dtype tf.D
 	return op.Output(0)
 }
 
-// LRNAttr is an optional argument to LRN.
-type LRNAttr func(optionalAttr)
-
-// LRNDepthRadius sets the optional depth_radius attribute to value.
-//
-// value: 0-D.  Half-width of the 1-D normalization window.
-// If not specified, defaults to 5
-func LRNDepthRadius(value int64) LRNAttr {
-	return func(m optionalAttr) {
-		m["depth_radius"] = value
-	}
-}
-
-// LRNBias sets the optional bias attribute to value.
-//
-// value: An offset (usually positive to avoid dividing by 0).
-// If not specified, defaults to 1
-func LRNBias(value float32) LRNAttr {
-	return func(m optionalAttr) {
-		m["bias"] = value
-	}
-}
-
-// LRNAlpha sets the optional alpha attribute to value.
-//
-// value: A scale factor, usually positive.
-// If not specified, defaults to 1
-func LRNAlpha(value float32) LRNAttr {
-	return func(m optionalAttr) {
-		m["alpha"] = value
-	}
-}
+// SelfAdjointEigV2Attr is an optional argument to SelfAdjointEigV2.
+type SelfAdjointEigV2Attr func(optionalAttr)
 
-// LRNBeta sets the optional beta attribute to value.
+// SelfAdjointEigV2ComputeV sets the optional compute_v attribute to value.
 //
-// value: An exponent.
-// If not specified, defaults to 0.5
-func LRNBeta(value float32) LRNAttr {
+// value: If `True` then eigenvectors will be computed and returned in `v`.
+// Otherwise, only the eigenvalues will be computed.
+// If not specified, defaults to true
+func SelfAdjointEigV2ComputeV(value bool) SelfAdjointEigV2Attr {
 	return func(m optionalAttr) {
-		m["beta"] = value
+		m["compute_v"] = value
 	}
 }
 
-// Local Response Normalization.
-//
-// The 4-D `input` tensor is treated as a 3-D array of 1-D vectors (along the last
-// dimension), and each vector is normalized independently.  Within a given vector,
-// each component is divided by the weighted, squared sum of inputs within
-// `depth_radius`.  In detail,
+// Computes the eigen decomposition of one or more square self-adjoint matrices.
 //
-//     sqr_sum[a, b, c, d] =
-//         sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2)
-//     output = input / (bias + alpha * sqr_sum) ** beta
+// Computes the eigenvalues and (optionally) eigenvectors of each inner matrix in
+// `input` such that `input[..., :, :] = v[..., :, :] * diag(e[..., :])`.
 //
-// For details, see [Krizhevsky et al., ImageNet classification with deep
-// convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
+// ```python
+// # a is a tensor.
+// # e is a tensor of eigenvalues.
+// # v is a tensor of eigenvectors.
+// e, v = self_adjoint_eig(a)
+// e = self_adjoint_eig(a, compute_v=False)
+// ```
 //
 // Arguments:
-//	input: 4-D.
-func LRN(scope *Scope, input tf.Output, optional ...LRNAttr) (output tf.Output) {
+//	input: `Tensor` input of shape `[N, N]`.
+//
+// Returns Eigenvalues. Shape is `[N]`.Eigenvectors. Shape is `[N, N]`.
+func SelfAdjointEigV2(scope *Scope, input tf.Output, optional ...SelfAdjointEigV2Attr) (e tf.Output, v tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -17868,61 +17585,132 @@ func LRN(scope *Scope, input tf.Output, optional ...LRNAttr) (output tf.Output)
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "LRN",
+		Type: "SelfAdjointEigV2",
 		Input: []tf.Input{
 			input,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1)
 }
 
-// Inverse fast Fourier transform.
+// Adjust the saturation of one or more images.
 //
-// Computes the inverse 1-dimensional discrete Fourier transform over the
-// inner-most dimension of `input`.
+// `images` is a tensor of at least 3 dimensions.  The last dimension is
+// interpretted as channels, and must be three.
+//
+// The input image is considered in the RGB colorspace. Conceptually, the RGB
+// colors are first mapped into HSV. A scale is then applied all the saturation
+// values, and then remapped back to RGB colorspace.
 //
 // Arguments:
-//	input: A complex64 tensor.
+//	images: Images to adjust.  At least 3-D.
+//	scale: A float scale to add to the saturation.
 //
-// Returns A complex64 tensor of the same shape as `input`. The inner-most
-//   dimension of `input` is replaced with its inverse 1D Fourier transform.
+// Returns The hue-adjusted image or images.
+func AdjustSaturation(scope *Scope, images tf.Output, scale tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "AdjustSaturation",
+		Input: []tf.Input{
+			images, scale,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Elementwise computes the bitwise OR of `x` and `y`.
 //
-// @compatibility(numpy)
-// Equivalent to np.fft.ifft
-// @end_compatibility
-func IFFT(scope *Scope, input tf.Output) (output tf.Output) {
+// The result will have those bits set, that are set in `x`, `y` or both. The
+// computation is performed on the underlying representations of `x` and `y`.
+func BitwiseOr(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "IFFT",
+		Type: "BitwiseOr",
 		Input: []tf.Input{
-			input,
+			x, y,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Creates a dataset that batches `batch_size` elements from `input_dataset`.
+// MatrixSolveLsAttr is an optional argument to MatrixSolveLs.
+type MatrixSolveLsAttr func(optionalAttr)
+
+// MatrixSolveLsFast sets the optional fast attribute to value.
+// If not specified, defaults to true
+func MatrixSolveLsFast(value bool) MatrixSolveLsAttr {
+	return func(m optionalAttr) {
+		m["fast"] = value
+	}
+}
+
+// Solves one or more linear least-squares problems.
 //
-// Arguments:
+// `matrix` is a tensor of shape `[..., M, N]` whose inner-most 2 dimensions
+// form real or complex matrices of size `[M, N]`. `Rhs` is a tensor of the same
+// type as `matrix` and shape `[..., M, K]`.
+// The output is a tensor shape `[..., N, K]` where each output matrix solves
+// each of the equations
+// `matrix[..., :, :]` * `output[..., :, :]` = `rhs[..., :, :]`
+// in the least squares sense.
 //
-//	batch_size: A scalar representing the number of elements to accumulate in a
-// batch.
+// We use the following notation for (complex) matrix and right-hand sides
+// in the batch:
 //
+// `matrix`=\\(A \in \mathbb{C}^{m \times n}\\),
+// `rhs`=\\(B  \in \mathbb{C}^{m \times k}\\),
+// `output`=\\(X  \in \mathbb{C}^{n \times k}\\),
+// `l2_regularizer`=\\(\lambda \in \mathbb{R}\\).
 //
-func BatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+// If `fast` is `True`, then the solution is computed by solving the normal
+// equations using Cholesky decomposition. Specifically, if \\(m \ge n\\) then
+// \\(X = (A^H A + \lambda I)^{-1} A^H B\\), which solves the least-squares
+// problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 +
+// \lambda ||Z||_F^2\\). If \\(m \lt n\\) then `output` is computed as
+// \\(X = A^H (A A^H + \lambda I)^{-1} B\\), which (for \\(\lambda = 0\\)) is the
+// minimum-norm solution to the under-determined linear system, i.e.
+// \\(X = \mathrm{argmin}_{Z \in \mathbb{C}^{n \times k} } ||Z||_F^2 \\),
+// subject to \\(A Z = B\\). Notice that the fast path is only numerically stable
+// when \\(A\\) is numerically full rank and has a condition number
+// \\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach} } }\\) or\\(\lambda\\) is
+// sufficiently large.
+//
+// If `fast` is `False` an algorithm based on the numerically robust complete
+// orthogonal decomposition is used. This computes the minimum-norm
+// least-squares solution, even when \\(A\\) is rank deficient. This path is
+// typically 6-7 times slower than the fast path. If `fast` is `False` then
+// `l2_regularizer` is ignored.
+//
+// Arguments:
+//	matrix: Shape is `[..., M, N]`.
+//	rhs: Shape is `[..., M, K]`.
+//	l2_regularizer: Scalar tensor.
+//
+// @compatibility(numpy)
+// Equivalent to np.linalg.lstsq
+// @end_compatibility
+//
+// Returns Shape is `[..., N, K]`.
+func MatrixSolveLs(scope *Scope, matrix tf.Output, rhs tf.Output, l2_regularizer tf.Output, optional ...MatrixSolveLsAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "BatchDataset",
+		Type: "MatrixSolveLs",
 		Input: []tf.Input{
-			input_dataset, batch_size,
+			matrix, rhs, l2_regularizer,
 		},
 		Attrs: attrs,
 	}
@@ -17930,54 +17718,57 @@ func BatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, o
 	return op.Output(0)
 }
 
-// ResourceSparseApplyCenteredRMSPropAttr is an optional argument to ResourceSparseApplyCenteredRMSProp.
-type ResourceSparseApplyCenteredRMSPropAttr func(optionalAttr)
+// SvdAttr is an optional argument to Svd.
+type SvdAttr func(optionalAttr)
 
-// ResourceSparseApplyCenteredRMSPropUseLocking sets the optional use_locking attribute to value.
+// SvdComputeUv sets the optional compute_uv attribute to value.
 //
-// value: If `True`, updating of the var, mg, ms, and mom tensors is
-// protected by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceSparseApplyCenteredRMSPropUseLocking(value bool) ResourceSparseApplyCenteredRMSPropAttr {
+// value: If true, left and right singular vectors will be
+// computed and returned in `u` and `v`, respectively.
+// If false, `u` and `v` are not set and should never referenced.
+// If not specified, defaults to true
+func SvdComputeUv(value bool) SvdAttr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["compute_uv"] = value
 	}
 }
 
-// Update '*var' according to the centered RMSProp algorithm.
-//
-// The centered RMSProp algorithm uses an estimate of the centered second moment
-// (i.e., the variance) for normalization, as opposed to regular RMSProp, which
-// uses the (uncentered) second moment. This often helps with training, but is
-// slightly more expensive in terms of computation and memory.
-//
-// Note that in dense implementation of this algorithm, mg, ms, and mom will
-// update even if the grad is zero, but in this sparse implementation, mg, ms,
-// and mom will not update in iterations during which the grad is zero.
+// SvdFullMatrices sets the optional full_matrices attribute to value.
 //
-// mean_square = decay * mean_square + (1-decay) * gradient ** 2
-// mean_grad = decay * mean_grad + (1-decay) * gradient
-// Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
+// value: If true, compute full-sized `u` and `v`. If false
+// (the default), compute only the leading `P` singular vectors.
+// Ignored if `compute_uv` is `False`.
+// If not specified, defaults to false
+func SvdFullMatrices(value bool) SvdAttr {
+	return func(m optionalAttr) {
+		m["full_matrices"] = value
+	}
+}
+
+// Computes the singular value decompositions of one or more matrices.
 //
-// ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
-// var <- var - mom
+// Computes the SVD of each inner matrix in `input` such that
+// `input[..., :, :] = u[..., :, :] * diag(s[..., :, :]) * transpose(v[..., :, :])`
 //
-// Arguments:
-//	var_: Should be from a Variable().
-//	mg: Should be from a Variable().
-//	ms: Should be from a Variable().
-//	mom: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	rho: Decay rate. Must be a scalar.
+// ```python
+// # a is a tensor containing a batch of matrices.
+// # s is a tensor of singular values for each matrix.
+// # u is the tensor containing of left singular vectors for each matrix.
+// # v is the tensor containing of right singular vectors for each matrix.
+// s, u, v = svd(a)
+// s, _, _ = svd(a, compute_uv=False)
+// ```
 //
-//	epsilon: Ridge term. Must be a scalar.
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var, ms and mom.
+// Arguments:
+//	input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions
+// form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`.
 //
-// Returns the created operation.
-func ResourceSparseApplyCenteredRMSProp(scope *Scope, var_ tf.Output, mg tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyCenteredRMSPropAttr) (o *tf.Operation) {
+// Returns Singular values. Shape is `[..., P]`.Left singular vectors. If `full_matrices` is `False` then shape is
+// `[..., M, P]`; if `full_matrices` is `True` then shape is
+// `[..., M, M]`. Undefined if `compute_uv` is `False`.Left singular vectors. If `full_matrices` is `False` then shape is
+// `[..., N, P]`. If `full_matrices` is `True` then shape is `[..., N, N]`.
+// Undefined if `compute_uv` is false.
+func Svd(scope *Scope, input tf.Output, optional ...SvdAttr) (s tf.Output, u tf.Output, v tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -17986,47 +17777,80 @@ func ResourceSparseApplyCenteredRMSProp(scope *Scope, var_ tf.Output, mg tf.Outp
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyCenteredRMSProp",
+		Type: "Svd",
 		Input: []tf.Input{
-			var_, mg, ms, mom, lr, rho, momentum, epsilon, grad, indices,
+			input,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Flips all bits elementwise.
+// QueueEnqueueManyV2Attr is an optional argument to QueueEnqueueManyV2.
+type QueueEnqueueManyV2Attr func(optionalAttr)
+
+// QueueEnqueueManyV2TimeoutMs sets the optional timeout_ms attribute to value.
 //
-// The result will have exactly those bits set, that are not set in `x`. The
-// computation is performed on the underlying representation of x.
-func Invert(scope *Scope, x tf.Output) (y tf.Output) {
+// value: If the queue is too full, this operation will block for up
+// to timeout_ms milliseconds.
+// Note: This option is not supported yet.
+// If not specified, defaults to -1
+func QueueEnqueueManyV2TimeoutMs(value int64) QueueEnqueueManyV2Attr {
+	return func(m optionalAttr) {
+		m["timeout_ms"] = value
+	}
+}
+
+// Enqueues zero or more tuples of one or more tensors in the given queue.
+//
+// This operation slices each component tensor along the 0th dimension to
+// make multiple queue elements. All of the tuple components must have the
+// same size in the 0th dimension.
+//
+// The components input has k elements, which correspond to the components of
+// tuples stored in the given queue.
+//
+// N.B. If the queue is full, this operation will block until the given
+// elements have been enqueued (or 'timeout_ms' elapses, if specified).
+//
+// Arguments:
+//	handle: The handle to a queue.
+//	components: One or more tensors from which the enqueued tensors should
+// be taken.
+//
+// Returns the created operation.
+func QueueEnqueueManyV2(scope *Scope, handle tf.Output, components []tf.Output, optional ...QueueEnqueueManyV2Attr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Invert",
+		Type: "QueueEnqueueManyV2",
 		Input: []tf.Input{
-			x,
+			handle, tf.OutputList(components),
 		},
+		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// Computes the mean along segments of a tensor.
+// Computes the product along segments of a tensor.
 //
 // Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
 // segments.
 //
 // Computes a tensor such that
-// \\(output_i = \frac{\sum_j data_j}{N}\\) where `mean` is
-// over `j` such that `segment_ids[j] == i` and `N` is the total number of
-// values summed.
+// \\(output_i = \prod_j data_j\\) where the product is over `j` such
+// that `segment_ids[j] == i`.
 //
-// If the mean is empty for a given segment ID `i`, `output[i] = 0`.
+// If the product is empty for a given segment ID `i`, `output[i] = 1`.
 //
 // <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentMean.png" alt>
+// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentProd.png" alt>
 // </div>
 //
 // Arguments:
@@ -18036,12 +17860,12 @@ func Invert(scope *Scope, x tf.Output) (y tf.Output) {
 //
 // Returns Has same shape as data, except for dimension 0 which
 // has size `k`, the number of segments.
-func SegmentMean(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
+func SegmentProd(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "SegmentMean",
+		Type: "SegmentProd",
 		Input: []tf.Input{
 			data, segment_ids,
 		},
@@ -18050,67 +17874,78 @@ func SegmentMean(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf
 	return op.Output(0)
 }
 
-// CumprodAttr is an optional argument to Cumprod.
-type CumprodAttr func(optionalAttr)
+// Converts one or more images from RGB to HSV.
+//
+// Outputs a tensor of the same shape as the `images` tensor, containing the HSV
+// value of the pixels. The output is only well defined if the value in `images`
+// are in `[0,1]`.
+//
+// `output[..., 0]` contains hue, `output[..., 1]` contains saturation, and
+// `output[..., 2]` contains value. All HSV values are in `[0,1]`. A hue of 0
+// corresponds to pure red, hue 1/3 is pure green, and 2/3 is pure blue.
+//
+// Arguments:
+//	images: 1-D or higher rank. RGB data to convert. Last dimension must be size 3.
+//
+// Returns `images` converted to HSV.
+func RGBToHSV(scope *Scope, images tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "RGBToHSV",
+		Input: []tf.Input{
+			images,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
 
-// CumprodExclusive sets the optional exclusive attribute to value.
+// Does nothing. Only useful as a placeholder for control edges.
 //
-// value: If `True`, perform exclusive cumprod.
-// If not specified, defaults to false
-func CumprodExclusive(value bool) CumprodAttr {
-	return func(m optionalAttr) {
-		m["exclusive"] = value
+// Returns the created operation.
+func NoOp(scope *Scope) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "NoOp",
 	}
+	return scope.AddOperation(opspec)
 }
 
-// CumprodReverse sets the optional reverse attribute to value.
+// MergeV2CheckpointsAttr is an optional argument to MergeV2Checkpoints.
+type MergeV2CheckpointsAttr func(optionalAttr)
+
+// MergeV2CheckpointsDeleteOldDirs sets the optional delete_old_dirs attribute to value.
 //
-// value: A `bool` (default: False).
-// If not specified, defaults to false
-func CumprodReverse(value bool) CumprodAttr {
+// value: see above.
+// If not specified, defaults to true
+func MergeV2CheckpointsDeleteOldDirs(value bool) MergeV2CheckpointsAttr {
 	return func(m optionalAttr) {
-		m["reverse"] = value
+		m["delete_old_dirs"] = value
 	}
 }
 
-// Compute the cumulative product of the tensor `x` along `axis`.
-//
-// By default, this op performs an inclusive cumprod, which means that the first
-// element of the input is identical to the first element of the output:
-//
-// ```python
-// tf.cumprod([a, b, c])  # => [a, a * b, a * b * c]
-// ```
-//
-// By setting the `exclusive` kwarg to `True`, an exclusive cumprod is
-// performed instead:
-//
-// ```python
-// tf.cumprod([a, b, c], exclusive=True)  # => [1, a, a * b]
-// ```
-//
-// By setting the `reverse` kwarg to `True`, the cumprod is performed in the
-// opposite direction:
-//
-// ```python
-// tf.cumprod([a, b, c], reverse=True)  # => [a * b * c, b * c, c]
-// ```
+// V2 format specific: merges the metadata files of sharded checkpoints.  The
 //
-// This is more efficient than using separate `tf.reverse` ops.
+// result is one logical checkpoint, with one physical metadata file and renamed
+// data files.
 //
-// The `reverse` and `exclusive` kwargs can also be combined:
+// Intended for "grouping" multiple checkpoints in a sharded checkpoint setup.
 //
-// ```python
-// tf.cumprod([a, b, c], exclusive=True, reverse=True)  # => [b * c, c, 1]
-// ```
+// If delete_old_dirs is true, attempts to delete recursively the dirname of each
+// path in the input checkpoint_prefixes.  This is useful when those paths are non
+// user-facing temporary locations.
 //
 // Arguments:
-//	x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
-// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
-// `complex128`, `qint8`, `quint8`, `qint32`, `half`.
-//	axis: A `Tensor` of type `int32` (default: 0). Must be in the range
-// `[-rank(x), rank(x))`.
-func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr) (out tf.Output) {
+//	checkpoint_prefixes: prefixes of V2 checkpoints to merge.
+//	destination_prefix: scalar.  The desired final prefix.  Allowed to be the same
+// as one of the checkpoint_prefixes.
+//
+// Returns the created operation.
+func MergeV2Checkpoints(scope *Scope, checkpoint_prefixes tf.Output, destination_prefix tf.Output, optional ...MergeV2CheckpointsAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -18119,312 +17954,424 @@ func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr)
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Cumprod",
+		Type: "MergeV2Checkpoints",
 		Input: []tf.Input{
-			x, axis,
+			checkpoint_prefixes, destination_prefix,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// ResourceApplyPowerSignAttr is an optional argument to ResourceApplyPowerSign.
-type ResourceApplyPowerSignAttr func(optionalAttr)
-
-// ResourceApplyPowerSignUseLocking sets the optional use_locking attribute to value.
+// Saves input tensors slices to disk.
 //
-// value: If `True`, updating of the var and m tensors is
-// protected by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyPowerSignUseLocking(value bool) ResourceApplyPowerSignAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the AddSign update.
+// This is like `Save` except that tensors can be listed in the saved file as being
+// a slice of a larger tensor.  `shapes_and_slices` specifies the shape of the
+// larger tensor and the slice that this tensor covers. `shapes_and_slices` must
+// have as many elements as `tensor_names`.
 //
-// m_t <- beta1 * m_{t-1} + (1 - beta1) * g
-// update <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g
-// variable <- variable - lr_t * update
+// Elements of the `shapes_and_slices` input must either be:
+//
+// *  The empty string, in which case the corresponding tensor is
+//    saved normally.
+// *  A string of the form `dim0 dim1 ... dimN-1 slice-spec` where the
+//    `dimI` are the dimensions of the larger tensor and `slice-spec`
+//    specifies what part is covered by the tensor to save.
+//
+// `slice-spec` itself is a `:`-separated list: `slice0:slice1:...:sliceN-1`
+// where each `sliceI` is either:
+//
+// *  The string `-` meaning that the slice covers all indices of this dimension
+// *  `start,length` where `start` and `length` are integers.  In that
+//    case the slice covers `length` indices starting at `start`.
+//
+// See also `Save`.
 //
 // Arguments:
-//	var_: Should be from a Variable().
-//	m: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	logbase: Must be a scalar.
-//	sign_decay: Must be a scalar.
-//	beta: Must be a scalar.
-//	grad: The gradient.
+//	filename: Must have a single element. The name of the file to which we write the
+// tensor.
+//	tensor_names: Shape `[N]`. The names of the tensors to be saved.
+//	shapes_and_slices: Shape `[N]`.  The shapes and slice specifications to use when
+// saving the tensors.
+//	data: `N` tensors to save.
 //
 // Returns the created operation.
-func ResourceApplyPowerSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, logbase tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyPowerSignAttr) (o *tf.Operation) {
+func SaveSlices(scope *Scope, filename tf.Output, tensor_names tf.Output, shapes_and_slices tf.Output, data []tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyPowerSign",
+		Type: "SaveSlices",
 		Input: []tf.Input{
-			var_, m, lr, logbase, sign_decay, beta, grad,
+			filename, tensor_names, shapes_and_slices, tf.OutputList(data),
 		},
-		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
-// DestroyResourceOpAttr is an optional argument to DestroyResourceOp.
-type DestroyResourceOpAttr func(optionalAttr)
+// DenseToDenseSetOperationAttr is an optional argument to DenseToDenseSetOperation.
+type DenseToDenseSetOperationAttr func(optionalAttr)
 
-// DestroyResourceOpIgnoreLookupError sets the optional ignore_lookup_error attribute to value.
-//
-// value: whether to ignore the error when the resource
-// doesn't exist.
+// DenseToDenseSetOperationValidateIndices sets the optional validate_indices attribute to value.
 // If not specified, defaults to true
-func DestroyResourceOpIgnoreLookupError(value bool) DestroyResourceOpAttr {
+func DenseToDenseSetOperationValidateIndices(value bool) DenseToDenseSetOperationAttr {
 	return func(m optionalAttr) {
-		m["ignore_lookup_error"] = value
+		m["validate_indices"] = value
 	}
 }
 
-// Deletes the resource specified by the handle.
+// Applies set operation along last dimension of 2 `Tensor` inputs.
 //
-// All subsequent operations using the resource will result in a NotFound
-// error status.
+// See SetOperationOp::SetOperationFromContext for values of `set_operation`.
+//
+// Output `result` is a `SparseTensor` represented by `result_indices`,
+// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this
+// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`
+// dimension contains the result of `set_operation` applied to the corresponding
+// `[0...n-1]` dimension of `set`.
 //
 // Arguments:
-//	resource: handle to the resource to delete.
+//	set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`.
+// Dimension `n` contains values in a set, duplicates are allowed but ignored.
+//	set2: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set1`.
+// Dimension `n` contains values in a set, duplicates are allowed but ignored.
 //
-// Returns the created operation.
-func DestroyResourceOp(scope *Scope, resource tf.Output, optional ...DestroyResourceOpAttr) (o *tf.Operation) {
+//
+// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is
+// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`
+// is the max result set size across all `0...n-1` dimensions.
+func DenseToDenseSetOperation(scope *Scope, set1 tf.Output, set2 tf.Output, set_operation string, optional ...DenseToDenseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"set_operation": set_operation}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "DestroyResourceOp",
+		Type: "DenseToDenseSetOperation",
 		Input: []tf.Input{
-			resource,
+			set1, set2,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Converts each string in the input Tensor to its hash mod by a number of buckets.
-//
-// The hash function is deterministic on the content of the string within the
-// process. The hash function is a keyed hash function, where attribute `key`
-// defines the key of the hash function. `key` is an array of 2 elements.
-//
-// A strong hash is important when inputs may be malicious, e.g. URLs with
-// additional components. Adversaries could try to make their inputs hash to the
-// same bucket for a denial-of-service attack or to skew the results. A strong
-// hash prevents this by making it difficult, if not infeasible, to compute inputs
-// that hash to the same bucket. This comes at a cost of roughly 4x higher compute
-// time than `tf.string_to_hash_bucket_fast`.
-//
-// Arguments:
-//	input: The strings to assign a hash bucket.
-//	num_buckets: The number of buckets.
-//	key: The key for the keyed hash function passed as a list of two uint64
-// elements.
+// Generate a sharded filename. The filename is printf formatted as
 //
-// Returns A Tensor of the same shape as the input `string_tensor`.
-func StringToHashBucketStrong(scope *Scope, input tf.Output, num_buckets int64, key []int64) (output tf.Output) {
+//    %s-%05d-of-%05d, basename, shard, num_shards.
+func ShardedFilename(scope *Scope, basename tf.Output, shard tf.Output, num_shards tf.Output) (filename tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_buckets": num_buckets, "key": key}
 	opspec := tf.OpSpec{
-		Type: "StringToHashBucketStrong",
+		Type: "ShardedFilename",
 		Input: []tf.Input{
-			input,
+			basename, shard, num_shards,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Encode audio data using the WAV file format.
+// Generate a glob pattern matching all sharded file names.
+func ShardedFilespec(scope *Scope, basename tf.Output, num_shards tf.Output) (filename tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ShardedFilespec",
+		Input: []tf.Input{
+			basename, num_shards,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// TextLineReaderV2Attr is an optional argument to TextLineReaderV2.
+type TextLineReaderV2Attr func(optionalAttr)
+
+// TextLineReaderV2SkipHeaderLines sets the optional skip_header_lines attribute to value.
 //
-// This operation will generate a string suitable to be saved out to create a .wav
-// audio file. It will be encoded in the 16-bit PCM format. It takes in float
-// values in the range -1.0f to 1.0f, and any outside that value will be clamped to
-// that range.
+// value: Number of lines to skip from the beginning of every file.
+// If not specified, defaults to 0
+func TextLineReaderV2SkipHeaderLines(value int64) TextLineReaderV2Attr {
+	return func(m optionalAttr) {
+		m["skip_header_lines"] = value
+	}
+}
+
+// TextLineReaderV2Container sets the optional container attribute to value.
 //
-// `audio` is a 2-D float Tensor of shape `[length, channels]`.
-// `sample_rate` is a scalar Tensor holding the rate to use (e.g. 44100).
+// value: If non-empty, this reader is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func TextLineReaderV2Container(value string) TextLineReaderV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// TextLineReaderV2SharedName sets the optional shared_name attribute to value.
 //
-// Arguments:
-//	audio: 2-D with shape `[length, channels]`.
-//	sample_rate: Scalar containing the sample frequency.
+// value: If non-empty, this reader is named in the given bucket
+// with this shared_name. Otherwise, the node name is used instead.
+// If not specified, defaults to ""
+func TextLineReaderV2SharedName(value string) TextLineReaderV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// A Reader that outputs the lines of a file delimited by '\n'.
 //
-// Returns 0-D. WAV-encoded file contents.
-func EncodeWav(scope *Scope, audio tf.Output, sample_rate tf.Output) (contents tf.Output) {
+// Returns The handle to reference the Reader.
+func TextLineReaderV2(scope *Scope, optional ...TextLineReaderV2Attr) (reader_handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "EncodeWav",
-		Input: []tf.Input{
-			audio, sample_rate,
-		},
+		Type: "TextLineReaderV2",
+
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// The gradient operator for the SparseAdd op.
+// LoadAndRemapMatrixAttr is an optional argument to LoadAndRemapMatrix.
+type LoadAndRemapMatrixAttr func(optionalAttr)
+
+// LoadAndRemapMatrixMaxRowsInMemory sets the optional max_rows_in_memory attribute to value.
 //
-// The SparseAdd op calculates A + B, where A, B, and the sum are all represented
-// as `SparseTensor` objects.  This op takes in the upstream gradient w.r.t.
-// non-empty values of the sum, and outputs the gradients w.r.t. the non-empty
-// values of A and B.
+// value: The maximum number of rows to load from the checkpoint at
+// once. If less than or equal to 0, the entire matrix will be loaded into
+// memory. Setting this arg trades increased disk reads for lower memory usage.
+// If not specified, defaults to -1
+func LoadAndRemapMatrixMaxRowsInMemory(value int64) LoadAndRemapMatrixAttr {
+	return func(m optionalAttr) {
+		m["max_rows_in_memory"] = value
+	}
+}
+
+// Loads a 2-D (matrix) `Tensor` with name `old_tensor_name` from the checkpoint
+//
+// at `ckpt_path` and potentially reorders its rows and columns using the
+// specified remappings.
+//
+// Most users should use one of the wrapper initializers (such as
+// `tf.contrib.framework.load_and_remap_matrix_initializer`) instead of this
+// function directly.
+//
+// The remappings are 1-D tensors with the following properties:
+//
+// * `row_remapping` must have exactly `num_rows` entries. Row `i` of the output
+//   matrix will be initialized from the row corresponding to index
+//   `row_remapping[i]` in the old `Tensor` from the checkpoint.
+// * `col_remapping` must have either 0 entries (indicating that no column
+//   reordering is needed) or `num_cols` entries. If specified, column `j` of the
+//   output matrix will be initialized from the column corresponding to index
+//   `col_remapping[j]` in the old `Tensor` from the checkpoint.
+// * A value of -1 in either of the remappings signifies a "missing" entry. In that
+//   case, values from the `initializing_values` tensor will be used to fill that
+//   missing row or column. If `row_remapping` has `r` missing entries and
+//   `col_remapping` has `c` missing entries, then the following condition must be
+//   true:
+//
+// `(r * num_cols) + (c * num_rows) - (r * c) == len(initializing_values)`
+//
+// The remapping tensors can be generated using the GenerateVocabRemapping op.
+//
+// As an example, with row_remapping = [1, 0, -1], col_remapping = [0, 2, -1],
+// initializing_values = [0.5, -0.5, 0.25, -0.25, 42], and w(i, j) representing
+// the value from row i, column j of the old tensor in the checkpoint, the output
+// matrix will look like the following:
+//
+// [[w(1, 0),  w(1, 2),  0.5],
+//  [w(0, 0),  w(0, 2), -0.5],
+//  [0.25,    -0.25,      42]]
 //
 // Arguments:
-//	backprop_val_grad: 1-D with shape `[nnz(sum)]`.  The gradient with respect to
-// the non-empty values of the sum.
-//	a_indices: 2-D.  The `indices` of the `SparseTensor` A, size `[nnz(A), ndims]`.
-//	b_indices: 2-D.  The `indices` of the `SparseTensor` B, size `[nnz(B), ndims]`.
-//	sum_indices: 2-D.  The `indices` of the sum `SparseTensor`, size
-// `[nnz(sum), ndims]`.
+//	ckpt_path: Path to the TensorFlow checkpoint (version 2, `TensorBundle`) from
+// which the old matrix `Tensor` will be loaded.
+//	old_tensor_name: Name of the 2-D `Tensor` to load from checkpoint.
+//	row_remapping: An int `Tensor` of row remappings (generally created by
+// `generate_vocab_remapping`).  Even if no row remapping is needed, this must
+// still be an index-valued Tensor (e.g. [0, 1, 2, ...]), or a shifted
+// index-valued `Tensor` (e.g. [8, 9, 10, ...], for partitioned `Variables`).
+//	col_remapping: An int `Tensor` of column remappings (generally created by
+// `generate_vocab_remapping`).  May be a size-0 `Tensor` if only row remapping
+// is to be done (e.g. column ordering is the same).
+//	initializing_values: A float `Tensor` containing  values to fill in for cells
+// in the output matrix that are not loaded from the checkpoint. Length must be
+// exactly the same as the number of missing / new cells.
+//	num_rows: Number of rows (length of the 1st dimension) in the output matrix.
+//	num_cols: Number of columns (length of the 2nd dimension) in the output matrix.
 //
-// Returns 1-D with shape `[nnz(A)]`. The gradient with respect to the
-// non-empty values of A.1-D with shape `[nnz(B)]`. The gradient with respect to the
-// non-empty values of B.
-func SparseAddGrad(scope *Scope, backprop_val_grad tf.Output, a_indices tf.Output, b_indices tf.Output, sum_indices tf.Output) (a_val_grad tf.Output, b_val_grad tf.Output) {
+// Returns Output matrix containing existing values loaded from the
+// checkpoint, and with any missing values filled in from initializing_values.
+func LoadAndRemapMatrix(scope *Scope, ckpt_path tf.Output, old_tensor_name tf.Output, row_remapping tf.Output, col_remapping tf.Output, initializing_values tf.Output, num_rows int64, num_cols int64, optional ...LoadAndRemapMatrixAttr) (output_matrix tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"num_rows": num_rows, "num_cols": num_cols}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "SparseAddGrad",
+		Type: "LoadAndRemapMatrix",
 		Input: []tf.Input{
-			backprop_val_grad, a_indices, b_indices, sum_indices,
+			ckpt_path, old_tensor_name, row_remapping, col_remapping, initializing_values,
 		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// TFRecordReaderV2Attr is an optional argument to TFRecordReaderV2.
+type TFRecordReaderV2Attr func(optionalAttr)
+
+// TFRecordReaderV2Container sets the optional container attribute to value.
+//
+// value: If non-empty, this reader is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func TFRecordReaderV2Container(value string) TFRecordReaderV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
 }
 
-// Adds `bias` to `value`.
-//
-// This is a deprecated version of BiasAdd and will be soon removed.
-//
-// This is a special case of `tf.add` where `bias` is restricted to be 1-D.
-// Broadcasting is supported, so `value` may have any number of dimensions.
+// TFRecordReaderV2SharedName sets the optional shared_name attribute to value.
 //
-// Arguments:
-//	value: Any number of dimensions.
-//	bias: 1-D with size the last dimension of `value`.
+// value: If non-empty, this reader is named in the given bucket
+// with this shared_name. Otherwise, the node name is used instead.
+// If not specified, defaults to ""
+func TFRecordReaderV2SharedName(value string) TFRecordReaderV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// TFRecordReaderV2CompressionType sets the optional compression_type attribute to value.
+// If not specified, defaults to ""
+func TFRecordReaderV2CompressionType(value string) TFRecordReaderV2Attr {
+	return func(m optionalAttr) {
+		m["compression_type"] = value
+	}
+}
+
+// A Reader that outputs the records from a TensorFlow Records file.
 //
-// Returns Broadcasted sum of `value` and `bias`.
-func BiasAddV1(scope *Scope, value tf.Output, bias tf.Output) (output tf.Output) {
+// Returns The handle to reference the Reader.
+func TFRecordReaderV2(scope *Scope, optional ...TFRecordReaderV2Attr) (reader_handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "BiasAddV1",
-		Input: []tf.Input{
-			value, bias,
-		},
+		Type: "TFRecordReaderV2",
+
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// FixedLengthRecordReaderV2Attr is an optional argument to FixedLengthRecordReaderV2.
-type FixedLengthRecordReaderV2Attr func(optionalAttr)
+// QuantizeAndDequantizeV3Attr is an optional argument to QuantizeAndDequantizeV3.
+type QuantizeAndDequantizeV3Attr func(optionalAttr)
 
-// FixedLengthRecordReaderV2HeaderBytes sets the optional header_bytes attribute to value.
-//
-// value: Number of bytes in the header, defaults to 0.
-// If not specified, defaults to 0
-func FixedLengthRecordReaderV2HeaderBytes(value int64) FixedLengthRecordReaderV2Attr {
+// QuantizeAndDequantizeV3SignedInput sets the optional signed_input attribute to value.
+// If not specified, defaults to true
+func QuantizeAndDequantizeV3SignedInput(value bool) QuantizeAndDequantizeV3Attr {
 	return func(m optionalAttr) {
-		m["header_bytes"] = value
+		m["signed_input"] = value
 	}
 }
 
-// FixedLengthRecordReaderV2FooterBytes sets the optional footer_bytes attribute to value.
-//
-// value: Number of bytes in the footer, defaults to 0.
-// If not specified, defaults to 0
-func FixedLengthRecordReaderV2FooterBytes(value int64) FixedLengthRecordReaderV2Attr {
+// QuantizeAndDequantizeV3RangeGiven sets the optional range_given attribute to value.
+// If not specified, defaults to true
+func QuantizeAndDequantizeV3RangeGiven(value bool) QuantizeAndDequantizeV3Attr {
 	return func(m optionalAttr) {
-		m["footer_bytes"] = value
+		m["range_given"] = value
 	}
 }
 
-// FixedLengthRecordReaderV2HopBytes sets the optional hop_bytes attribute to value.
+// Quantizes then dequantizes a tensor.
 //
-// value: Number of bytes to hop before each read. Default of 0 means using
-// record_bytes.
-// If not specified, defaults to 0
-func FixedLengthRecordReaderV2HopBytes(value int64) FixedLengthRecordReaderV2Attr {
-	return func(m optionalAttr) {
-		m["hop_bytes"] = value
+// This is almost identical to QuantizeAndDequantizeV2, except that num_bits is a
+// tensor, so its value can change during training.
+func QuantizeAndDequantizeV3(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, num_bits tf.Output, optional ...QuantizeAndDequantizeV3Attr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizeAndDequantizeV3",
+		Input: []tf.Input{
+			input, input_min, input_max, num_bits,
+		},
+		Attrs: attrs,
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// FixedLengthRecordReaderV2Container sets the optional container attribute to value.
+// IdentityReaderV2Attr is an optional argument to IdentityReaderV2.
+type IdentityReaderV2Attr func(optionalAttr)
+
+// IdentityReaderV2Container sets the optional container attribute to value.
 //
 // value: If non-empty, this reader is placed in the given container.
 // Otherwise, a default container is used.
 // If not specified, defaults to ""
-func FixedLengthRecordReaderV2Container(value string) FixedLengthRecordReaderV2Attr {
+func IdentityReaderV2Container(value string) IdentityReaderV2Attr {
 	return func(m optionalAttr) {
 		m["container"] = value
 	}
 }
 
-// FixedLengthRecordReaderV2SharedName sets the optional shared_name attribute to value.
+// IdentityReaderV2SharedName sets the optional shared_name attribute to value.
 //
 // value: If non-empty, this reader is named in the given bucket
 // with this shared_name. Otherwise, the node name is used instead.
 // If not specified, defaults to ""
-func FixedLengthRecordReaderV2SharedName(value string) FixedLengthRecordReaderV2Attr {
+func IdentityReaderV2SharedName(value string) IdentityReaderV2Attr {
 	return func(m optionalAttr) {
 		m["shared_name"] = value
 	}
 }
 
-// FixedLengthRecordReaderV2Encoding sets the optional encoding attribute to value.
-//
-// value: The type of encoding for the file. Currently ZLIB and GZIP
-// are supported. Defaults to none.
-// If not specified, defaults to ""
-func FixedLengthRecordReaderV2Encoding(value string) FixedLengthRecordReaderV2Attr {
-	return func(m optionalAttr) {
-		m["encoding"] = value
-	}
-}
-
-// A Reader that outputs fixed-length records from a file.
+// A Reader that outputs the queued work as both the key and value.
 //
-// Arguments:
-//	record_bytes: Number of bytes in the record.
+// To use, enqueue strings in a Queue.  ReaderRead will take the front
+// work string and output (work, work).
 //
 // Returns The handle to reference the Reader.
-func FixedLengthRecordReaderV2(scope *Scope, record_bytes int64, optional ...FixedLengthRecordReaderV2Attr) (reader_handle tf.Output) {
+func IdentityReaderV2(scope *Scope, optional ...IdentityReaderV2Attr) (reader_handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"record_bytes": record_bytes}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "FixedLengthRecordReaderV2",
+		Type: "IdentityReaderV2",
 
 		Attrs: attrs,
 	}
@@ -18432,26 +18379,29 @@ func FixedLengthRecordReaderV2(scope *Scope, record_bytes int64, optional ...Fix
 	return op.Output(0)
 }
 
-// QuantizedRelu6Attr is an optional argument to QuantizedRelu6.
-type QuantizedRelu6Attr func(optionalAttr)
+// ResourceApplyGradientDescentAttr is an optional argument to ResourceApplyGradientDescent.
+type ResourceApplyGradientDescentAttr func(optionalAttr)
 
-// QuantizedRelu6OutType sets the optional out_type attribute to value.
-// If not specified, defaults to DT_QUINT8
-func QuantizedRelu6OutType(value tf.DataType) QuantizedRelu6Attr {
+// ResourceApplyGradientDescentUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, the subtraction will be protected by a lock;
+// otherwise the behavior is undefined, but may exhibit less contention.
+// If not specified, defaults to false
+func ResourceApplyGradientDescentUseLocking(value bool) ResourceApplyGradientDescentAttr {
 	return func(m optionalAttr) {
-		m["out_type"] = value
+		m["use_locking"] = value
 	}
 }
 
-// Computes Quantized Rectified Linear 6: `min(max(features, 0), 6)`
+// Update '*var' by subtracting 'alpha' * 'delta' from it.
 //
 // Arguments:
+//	var_: Should be from a Variable().
+//	alpha: Scaling factor. Must be a scalar.
+//	delta: The change.
 //
-//	min_features: The float value that the lowest quantized value represents.
-//	max_features: The float value that the highest quantized value represents.
-//
-// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents.
-func QuantizedRelu6(scope *Scope, features tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedRelu6Attr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) {
+// Returns the created operation.
+func ResourceApplyGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, delta tf.Output, optional ...ResourceApplyGradientDescentAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -18460,77 +18410,160 @@ func QuantizedRelu6(scope *Scope, features tf.Output, min_features tf.Output, ma
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "QuantizedRelu6",
+		Type: "ResourceApplyGradientDescent",
+		Input: []tf.Input{
+			var_, alpha, delta,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Returns the next record (key, value pair) produced by a Reader.
+//
+// Will dequeue from the input queue if necessary (e.g. when the
+// Reader needs to start reading from a new file since it has finished
+// with the previous file).
+//
+// Arguments:
+//	reader_handle: Handle to a Reader.
+//	queue_handle: Handle to a Queue, with string work items.
+//
+// Returns A scalar.A scalar.
+func ReaderReadV2(scope *Scope, reader_handle tf.Output, queue_handle tf.Output) (key tf.Output, value tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ReaderReadV2",
+		Input: []tf.Input{
+			reader_handle, queue_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Returns up to `num_records` (key, value) pairs produced by a Reader.
+//
+// Will dequeue from the input queue if necessary (e.g. when the
+// Reader needs to start reading from a new file since it has finished
+// with the previous file).
+// It may return less than `num_records` even before the last batch.
+//
+// Arguments:
+//	reader_handle: Handle to a `Reader`.
+//	queue_handle: Handle to a `Queue`, with string work items.
+//	num_records: number of records to read from `Reader`.
+//
+// Returns A 1-D tensor.A 1-D tensor.
+func ReaderReadUpToV2(scope *Scope, reader_handle tf.Output, queue_handle tf.Output, num_records tf.Output) (keys tf.Output, values tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ReaderReadUpToV2",
+		Input: []tf.Input{
+			reader_handle, queue_handle, num_records,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Restore a Reader to its initial clean state.
+//
+// Arguments:
+//	reader_handle: Handle to a Reader.
+//
+// Returns the created operation.
+func ReaderResetV2(scope *Scope, reader_handle tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ReaderResetV2",
+		Input: []tf.Input{
+			reader_handle,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Adjust the hue of one or more images.
+//
+// `images` is a tensor of at least 3 dimensions.  The last dimension is
+// interpretted as channels, and must be three.
+//
+// The input image is considered in the RGB colorspace. Conceptually, the RGB
+// colors are first mapped into HSV. A delta is then applied all the hue values,
+// and then remapped back to RGB colorspace.
+//
+// Arguments:
+//	images: Images to adjust.  At least 3-D.
+//	delta: A float delta to add to the hue.
+//
+// Returns The hue-adjusted image or images.
+func AdjustHue(scope *Scope, images tf.Output, delta tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "AdjustHue",
 		Input: []tf.Input{
-			features, min_features, max_features,
+			images, delta,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// CumsumAttr is an optional argument to Cumsum.
-type CumsumAttr func(optionalAttr)
+// ResourceApplyAdamAttr is an optional argument to ResourceApplyAdam.
+type ResourceApplyAdamAttr func(optionalAttr)
 
-// CumsumExclusive sets the optional exclusive attribute to value.
+// ResourceApplyAdamUseLocking sets the optional use_locking attribute to value.
 //
-// value: If `True`, perform exclusive cumsum.
+// value: If `True`, updating of the var, m, and v tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
 // If not specified, defaults to false
-func CumsumExclusive(value bool) CumsumAttr {
+func ResourceApplyAdamUseLocking(value bool) ResourceApplyAdamAttr {
 	return func(m optionalAttr) {
-		m["exclusive"] = value
+		m["use_locking"] = value
 	}
 }
 
-// CumsumReverse sets the optional reverse attribute to value.
+// ResourceApplyAdamUseNesterov sets the optional use_nesterov attribute to value.
 //
-// value: A `bool` (default: False).
+// value: If `True`, uses the nesterov update.
 // If not specified, defaults to false
-func CumsumReverse(value bool) CumsumAttr {
+func ResourceApplyAdamUseNesterov(value bool) ResourceApplyAdamAttr {
 	return func(m optionalAttr) {
-		m["reverse"] = value
+		m["use_nesterov"] = value
 	}
 }
 
-// Compute the cumulative sum of the tensor `x` along `axis`.
-//
-// By default, this op performs an inclusive cumsum, which means that the first
-// element of the input is identical to the first element of the output:
-//
-// ```python
-// tf.cumsum([a, b, c])  # => [a, a + b, a + b + c]
-// ```
-//
-// By setting the `exclusive` kwarg to `True`, an exclusive cumsum is
-// performed instead:
-//
-// ```python
-// tf.cumsum([a, b, c], exclusive=True)  # => [0, a, a + b]
-// ```
-//
-// By setting the `reverse` kwarg to `True`, the cumsum is performed in the
-// opposite direction:
-//
-// ```python
-// tf.cumsum([a, b, c], reverse=True)  # => [a + b + c, b + c, c]
-// ```
-//
-// This is more efficient than using separate `tf.reverse` ops.
-//
-// The `reverse` and `exclusive` kwargs can also be combined:
+// Update '*var' according to the Adam algorithm.
 //
-// ```python
-// tf.cumsum([a, b, c], exclusive=True, reverse=True)  # => [b + c, c, 0]
-// ```
+// lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)
+// m_t <- beta1 * m_{t-1} + (1 - beta1) * g_t
+// v_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t
+// variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)
 //
 // Arguments:
-//	x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
-// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
-// `complex128`, `qint8`, `quint8`, `qint32`, `half`.
-//	axis: A `Tensor` of type `int32` (default: 0). Must be in the range
-// `[-rank(x), rank(x))`.
-func Cumsum(scope *Scope, x tf.Output, axis tf.Output, optional ...CumsumAttr) (out tf.Output) {
+//	var_: Should be from a Variable().
+//	m: Should be from a Variable().
+//	v: Should be from a Variable().
+//	beta1_power: Must be a scalar.
+//	beta2_power: Must be a scalar.
+//	lr: Scaling factor. Must be a scalar.
+//	beta1: Momentum factor. Must be a scalar.
+//	beta2: Momentum factor. Must be a scalar.
+//	epsilon: Ridge term. Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyAdam(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, beta2_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdamAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -18539,236 +18572,185 @@ func Cumsum(scope *Scope, x tf.Output, axis tf.Output, optional ...CumsumAttr) (
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Cumsum",
+		Type: "ResourceApplyAdam",
 		Input: []tf.Input{
-			x, axis,
+			var_, m, v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// AsStringAttr is an optional argument to AsString.
-type AsStringAttr func(optionalAttr)
-
-// AsStringPrecision sets the optional precision attribute to value.
+// Store the input tensor in the state of the current session.
 //
-// value: The post-decimal precision to use for floating point numbers.
-// Only used if precision > -1.
-// If not specified, defaults to -1
-func AsStringPrecision(value int64) AsStringAttr {
-	return func(m optionalAttr) {
-		m["precision"] = value
-	}
-}
-
-// AsStringScientific sets the optional scientific attribute to value.
+// Arguments:
+//	value: The tensor to be stored.
 //
-// value: Use scientific notation for floating point numbers.
-// If not specified, defaults to false
-func AsStringScientific(value bool) AsStringAttr {
-	return func(m optionalAttr) {
-		m["scientific"] = value
+// Returns The handle for the tensor stored in the session state, represented
+// as a ResourceHandle object.
+func GetSessionHandleV2(scope *Scope, value tf.Output) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
-}
-
-// AsStringShortest sets the optional shortest attribute to value.
-//
-// value: Use shortest representation (either scientific or standard) for
-// floating point numbers.
-// If not specified, defaults to false
-func AsStringShortest(value bool) AsStringAttr {
-	return func(m optionalAttr) {
-		m["shortest"] = value
+	opspec := tf.OpSpec{
+		Type: "GetSessionHandleV2",
+		Input: []tf.Input{
+			value,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// AsStringWidth sets the optional width attribute to value.
+// Returns the set of files matching one or more glob patterns.
 //
-// value: Pad pre-decimal numbers to this width.
-// Applies to both floating point and integer numbers.
-// Only used if width > -1.
-// If not specified, defaults to -1
-func AsStringWidth(value int64) AsStringAttr {
-	return func(m optionalAttr) {
-		m["width"] = value
-	}
-}
-
-// AsStringFill sets the optional fill attribute to value.
+// Note that this routine only supports wildcard characters in the
+// basename portion of the pattern, not in the directory portion.
 //
-// value: The value to pad if width > -1.  If empty, pads with spaces.
-// Another typical value is '0'.  String cannot be longer than 1 character.
-// If not specified, defaults to ""
-func AsStringFill(value string) AsStringAttr {
-	return func(m optionalAttr) {
-		m["fill"] = value
-	}
-}
-
-// Converts each entry in the given tensor to strings.  Supports many numeric
+// Arguments:
+//	pattern: Shell wildcard pattern(s). Scalar or vector of type string.
 //
-// types and boolean.
-func AsString(scope *Scope, input tf.Output, optional ...AsStringAttr) (output tf.Output) {
+// Returns A vector of matching filenames.
+func MatchingFiles(scope *Scope, pattern tf.Output) (filenames tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "AsString",
+		Type: "MatchingFiles",
 		Input: []tf.Input{
-			input,
+			pattern,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Assigns sparse updates to the variable referenced by `resource`.
-//
-// This operation computes
-//
-//     # Scalar indices
-//     ref[indices, ...] = updates[...]
-//
-//     # Vector indices (for each i)
-//     ref[indices[i], ...] = updates[i, ...]
+// Computes gradients for SparseSegmentMean.
 //
-//     # High rank indices (for each i, ..., j)
-//     ref[indices[i, ..., j], ...] = updates[i, ..., j, ...]
+// Returns tensor "output" with same shape as grad, except for dimension 0 whose
+// value is output_dim0.
 //
 // Arguments:
-//	resource: Should be from a `Variable` node.
-//	indices: A tensor of indices into the first dimension of `ref`.
-//	updates: A tensor of updated values to add to `ref`.
-//
-// Returns the created operation.
-func ResourceScatterUpdate(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
+//	grad: gradient propagated to the SparseSegmentMean op.
+//	indices: indices passed to the corresponding SparseSegmentMean op.
+//	segment_ids: segment_ids passed to the corresponding SparseSegmentMean op.
+//	output_dim0: dimension 0 of "data" passed to SparseSegmentMean op.
+func SparseSegmentMeanGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceScatterUpdate",
+		Type: "SparseSegmentMeanGrad",
 		Input: []tf.Input{
-			resource, indices, updates,
+			grad, indices, segment_ids, output_dim0,
 		},
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// GenerateVocabRemappingAttr is an optional argument to GenerateVocabRemapping.
-type GenerateVocabRemappingAttr func(optionalAttr)
+// SummaryWriterAttr is an optional argument to SummaryWriter.
+type SummaryWriterAttr func(optionalAttr)
 
-// GenerateVocabRemappingOldVocabSize sets the optional old_vocab_size attribute to value.
-//
-// value: Number of entries in the old vocab file to consider.  If -1,
-// use the entire old vocabulary.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func GenerateVocabRemappingOldVocabSize(value int64) GenerateVocabRemappingAttr {
+// SummaryWriterSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func SummaryWriterSharedName(value string) SummaryWriterAttr {
 	return func(m optionalAttr) {
-		m["old_vocab_size"] = value
+		m["shared_name"] = value
 	}
 }
 
-// Given a path to new and old vocabulary files, returns a remapping Tensor of
-//
-// length `num_new_vocab`, where `remapping[i]` contains the row number in the old
-// vocabulary that corresponds to row `i` in the new vocabulary (starting at line
-// `new_vocab_offset` and up to `num_new_vocab` entities), or `-1` if entry `i`
-// in the new vocabulary is not in the old vocabulary.  The old vocabulary is
-// constrained to the first `old_vocab_size` entries if `old_vocab_size` is not the
-// default value of -1.
-//
-// `num_vocab_offset` enables
-// use in the partitioned variable case, and should generally be set through
-// examining partitioning info.  The format of the files should be a text file,
-// with each line containing a single entity within the vocabulary.
-//
-// For example, with `new_vocab_file` a text file containing each of the following
-// elements on a single line: `[f0, f1, f2, f3]`, old_vocab_file = [f1, f0, f3],
-// `num_new_vocab = 3, new_vocab_offset = 1`, the returned remapping would be
-// `[0, -1, 2]`.
-//
-// The op also returns a count of how many entries in the new vocabulary
-// were present in the old vocabulary, which is used to calculate the number of
-// values to initialize in a weight matrix remapping
-//
-// This functionality can be used to remap both row vocabularies (typically,
-// features) and column vocabularies (typically, classes) from TensorFlow
-// checkpoints.  Note that the partitioning logic relies on contiguous vocabularies
-// corresponding to div-partitioned variables.  Moreover, the underlying remapping
-// uses an IndexTable (as opposed to an inexact CuckooTable), so client code should
-// use the corresponding index_table_from_file() as the FeatureColumn framework
-// does (as opposed to tf.feature_to_id(), which uses a CuckooTable).
+// SummaryWriterContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func SummaryWriterContainer(value string) SummaryWriterAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// Returns a handle to be used to access a summary writer.
 //
-// Arguments:
-//	new_vocab_file: Path to the new vocab file.
-//	old_vocab_file: Path to the old vocab file.
-//	new_vocab_offset: How many entries into the new vocab file to start reading.
-//	num_new_vocab: Number of entries in the new vocab file to remap.
+// The summary writer is an in-graph resource which can be used by ops to write
+// summaries to event files.
 //
-// Returns A Tensor of length num_new_vocab where the element at index i
-// is equal to the old ID that maps to the new ID i.  This element is -1 for any
-// new ID that is not found in the old vocabulary.Number of new vocab entries found in old vocab.
-func GenerateVocabRemapping(scope *Scope, new_vocab_file tf.Output, old_vocab_file tf.Output, new_vocab_offset int64, num_new_vocab int64, optional ...GenerateVocabRemappingAttr) (remapping tf.Output, num_present tf.Output) {
+// Returns the summary writer resource. Scalar handle.
+func SummaryWriter(scope *Scope, optional ...SummaryWriterAttr) (writer tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"new_vocab_offset": new_vocab_offset, "num_new_vocab": num_new_vocab}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "GenerateVocabRemapping",
-		Input: []tf.Input{
-			new_vocab_file, old_vocab_file,
-		},
+		Type: "SummaryWriter",
+
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
-// Computes softsign: `features / (abs(features) + 1)`.
-func Softsign(scope *Scope, features tf.Output) (activations tf.Output) {
+// ResizeBicubicGradAttr is an optional argument to ResizeBicubicGrad.
+type ResizeBicubicGradAttr func(optionalAttr)
+
+// ResizeBicubicGradAlignCorners sets the optional align_corners attribute to value.
+//
+// value: If true, rescale grads by (orig_height - 1) / (height - 1), which
+// exactly aligns the 4 corners of grads and original_image. If false, rescale by
+// orig_height / height. Treat similarly the width dimension.
+// If not specified, defaults to false
+func ResizeBicubicGradAlignCorners(value bool) ResizeBicubicGradAttr {
+	return func(m optionalAttr) {
+		m["align_corners"] = value
+	}
+}
+
+// Computes the gradient of bicubic interpolation.
+//
+// Arguments:
+//	grads: 4-D with shape `[batch, height, width, channels]`.
+//	original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`,
+// The image tensor that was resized.
+//
+// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`.
+// Gradients with respect to the input image. Input image must have been
+// float or double.
+func ResizeBicubicGrad(scope *Scope, grads tf.Output, original_image tf.Output, optional ...ResizeBicubicGradAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Softsign",
+		Type: "ResizeBicubicGrad",
 		Input: []tf.Input{
-			features,
+			grads, original_image,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// ResizeBilinearAttr is an optional argument to ResizeBilinear.
-type ResizeBilinearAttr func(optionalAttr)
+// ResizeNearestNeighborAttr is an optional argument to ResizeNearestNeighbor.
+type ResizeNearestNeighborAttr func(optionalAttr)
 
-// ResizeBilinearAlignCorners sets the optional align_corners attribute to value.
+// ResizeNearestNeighborAlignCorners sets the optional align_corners attribute to value.
 //
 // value: If true, rescale input by (new_height - 1) / (height - 1), which
 // exactly aligns the 4 corners of images and resized images. If false, rescale
 // by new_height / height. Treat similarly the width dimension.
 // If not specified, defaults to false
-func ResizeBilinearAlignCorners(value bool) ResizeBilinearAttr {
+func ResizeNearestNeighborAlignCorners(value bool) ResizeNearestNeighborAttr {
 	return func(m optionalAttr) {
 		m["align_corners"] = value
 	}
 }
 
-// Resize `images` to `size` using bilinear interpolation.
-//
-// Input images can be of different types but output images are always float.
+// Resize `images` to `size` using nearest neighbor interpolation.
 //
 // Arguments:
 //	images: 4-D with shape `[batch, height, width, channels]`.
@@ -18777,7 +18759,7 @@ func ResizeBilinearAlignCorners(value bool) ResizeBilinearAttr {
 //
 // Returns 4-D with shape
 // `[batch, new_height, new_width, channels]`.
-func ResizeBilinear(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBilinearAttr) (resized_images tf.Output) {
+func ResizeNearestNeighbor(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeNearestNeighborAttr) (resized_images tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -18786,7 +18768,7 @@ func ResizeBilinear(scope *Scope, images tf.Output, size tf.Output, optional ...
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResizeBilinear",
+		Type: "ResizeNearestNeighbor",
 		Input: []tf.Input{
 			images, size,
 		},
@@ -18796,33 +18778,31 @@ func ResizeBilinear(scope *Scope, images tf.Output, size tf.Output, optional ...
 	return op.Output(0)
 }
 
-// ProdAttr is an optional argument to Prod.
-type ProdAttr func(optionalAttr)
+// ResizeNearestNeighborGradAttr is an optional argument to ResizeNearestNeighborGrad.
+type ResizeNearestNeighborGradAttr func(optionalAttr)
 
-// ProdKeepDims sets the optional keep_dims attribute to value.
+// ResizeNearestNeighborGradAlignCorners sets the optional align_corners attribute to value.
 //
-// value: If true, retain reduced dimensions with length 1.
+// value: If true, rescale grads by (orig_height - 1) / (height - 1), which
+// exactly aligns the 4 corners of grads and original_image. If false, rescale by
+// orig_height / height. Treat similarly the width dimension.
 // If not specified, defaults to false
-func ProdKeepDims(value bool) ProdAttr {
+func ResizeNearestNeighborGradAlignCorners(value bool) ResizeNearestNeighborGradAttr {
 	return func(m optionalAttr) {
-		m["keep_dims"] = value
+		m["align_corners"] = value
 	}
 }
 
-// Computes the product of elements across dimensions of a tensor.
-//
-// Reduces `input` along the dimensions given in `reduction_indices`. Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `reduction_indices`. If `keep_dims` is true, the reduced dimensions are
-// retained with length 1.
+// Computes the gradient of nearest neighbor interpolation.
 //
 // Arguments:
-//	input: The tensor to reduce.
-//	reduction_indices: The dimensions to reduce. Must be in the range
-// `[-rank(input), rank(input))`.
+//	grads: 4-D with shape `[batch, height, width, channels]`.
+//	size: = A 1-D int32 Tensor of 2 elements: `orig_height, orig_width`. The
+// original input size.
 //
-// Returns The reduced tensor.
-func Prod(scope *Scope, input tf.Output, reduction_indices tf.Output, optional ...ProdAttr) (output tf.Output) {
+// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`. Gradients
+// with respect to the input image.
+func ResizeNearestNeighborGrad(scope *Scope, grads tf.Output, size tf.Output, optional ...ResizeNearestNeighborGradAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -18831,9 +18811,9 @@ func Prod(scope *Scope, input tf.Output, reduction_indices tf.Output, optional .
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Prod",
+		Type: "ResizeNearestNeighborGrad",
 		Input: []tf.Input{
-			input, reduction_indices,
+			grads, size,
 		},
 		Attrs: attrs,
 	}
@@ -18841,327 +18821,383 @@ func Prod(scope *Scope, input tf.Output, reduction_indices tf.Output, optional .
 	return op.Output(0)
 }
 
-// StringSplitAttr is an optional argument to StringSplit.
-type StringSplitAttr func(optionalAttr)
+// DecodeJpegAttr is an optional argument to DecodeJpeg.
+type DecodeJpegAttr func(optionalAttr)
 
-// StringSplitSkipEmpty sets the optional skip_empty attribute to value.
+// DecodeJpegChannels sets the optional channels attribute to value.
 //
-// value: A `bool`. If `True`, skip the empty strings from the result.
-// If not specified, defaults to true
-func StringSplitSkipEmpty(value bool) StringSplitAttr {
+// value: Number of color channels for the decoded image.
+// If not specified, defaults to 0
+func DecodeJpegChannels(value int64) DecodeJpegAttr {
 	return func(m optionalAttr) {
-		m["skip_empty"] = value
+		m["channels"] = value
 	}
 }
 
-// Split elements of `input` based on `delimiter` into a `SparseTensor`.
-//
-// Let N be the size of source (typically N will be the batch size). Split each
-// element of `input` based on `delimiter` and return a `SparseTensor`
-// containing the splitted tokens. Empty tokens are ignored.
-//
-// `delimiter` can be empty, or a string of split characters. If `delimiter` is an
-//  empty string, each element of `input` is split into individual single-byte
-//  character strings, including splitting of UTF-8 multibyte sequences. Otherwise
-//  every character of `delimiter` is a potential split point.
-//
-// For example:
-//   N = 2, input[0] is 'hello world' and input[1] is 'a b c', then the output
-//   will be
+// DecodeJpegRatio sets the optional ratio attribute to value.
 //
-//   indices = [0, 0;
-//              0, 1;
-//              1, 0;
-//              1, 1;
-//              1, 2]
-//   shape = [2, 3]
-//   values = ['hello', 'world', 'a', 'b', 'c']
+// value: Downscaling ratio.
+// If not specified, defaults to 1
+func DecodeJpegRatio(value int64) DecodeJpegAttr {
+	return func(m optionalAttr) {
+		m["ratio"] = value
+	}
+}
+
+// DecodeJpegFancyUpscaling sets the optional fancy_upscaling attribute to value.
 //
-// Arguments:
-//	input: 1-D. Strings to split.
-//	delimiter: 0-D. Delimiter characters (bytes), or empty string.
+// value: If true use a slower but nicer upscaling of the
+// chroma planes (yuv420/422 only).
+// If not specified, defaults to true
+func DecodeJpegFancyUpscaling(value bool) DecodeJpegAttr {
+	return func(m optionalAttr) {
+		m["fancy_upscaling"] = value
+	}
+}
+
+// DecodeJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value.
 //
-// Returns A dense matrix of int64 representing the indices of the sparse tensor.A vector of strings corresponding to the splited values.a length-2 vector of int64 representing the shape of the sparse
-// tensor, where the first value is N and the second value is the maximum number
-// of tokens in a single input entry.
-func StringSplit(scope *Scope, input tf.Output, delimiter tf.Output, optional ...StringSplitAttr) (indices tf.Output, values tf.Output, shape tf.Output) {
-	if scope.Err() != nil {
-		return
+// value: If true try to recover an image from truncated input.
+// If not specified, defaults to false
+func DecodeJpegTryRecoverTruncated(value bool) DecodeJpegAttr {
+	return func(m optionalAttr) {
+		m["try_recover_truncated"] = value
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
+}
+
+// DecodeJpegAcceptableFraction sets the optional acceptable_fraction attribute to value.
+//
+// value: The minimum required fraction of lines before a truncated
+// input is accepted.
+// If not specified, defaults to 1
+func DecodeJpegAcceptableFraction(value float32) DecodeJpegAttr {
+	return func(m optionalAttr) {
+		m["acceptable_fraction"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "StringSplit",
-		Input: []tf.Input{
-			input, delimiter,
-		},
-		Attrs: attrs,
+}
+
+// DecodeJpegDctMethod sets the optional dct_method attribute to value.
+//
+// value: string specifying a hint about the algorithm used for
+// decompression.  Defaults to "" which maps to a system-specific
+// default.  Currently valid values are ["INTEGER_FAST",
+// "INTEGER_ACCURATE"].  The hint may be ignored (e.g., the internal
+// jpeg library changes to a version that does not have that specific
+// option.)
+// If not specified, defaults to ""
+func DecodeJpegDctMethod(value string) DecodeJpegAttr {
+	return func(m optionalAttr) {
+		m["dct_method"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Inverse 3D real-valued fast Fourier transform.
+// Decode a JPEG-encoded image to a uint8 tensor.
 //
-// Computes the inverse 3-dimensional discrete Fourier transform of a real-valued
-// signal over the inner-most 3 dimensions of `input`.
+// The attr `channels` indicates the desired number of color channels for the
+// decoded image.
 //
-// The inner-most 3 dimensions of `input` are assumed to be the result of `RFFT3D`:
-// The inner-most dimension contains the `fft_length / 2 + 1` unique components of
-// the DFT of a real-valued signal. If `fft_length` is not provided, it is computed
-// from the size of the inner-most 3 dimensions of `input`. If the FFT length used
-// to compute `input` is odd, it should be provided since it cannot be inferred
-// properly.
+// Accepted values are:
 //
-// Along each axis `IRFFT3D` is computed on, if `fft_length` (or
-// `fft_length / 2 + 1` for the inner-most dimension) is smaller than the
-// corresponding dimension of `input`, the dimension is cropped. If it is larger,
-// the dimension is padded with zeros.
+// *   0: Use the number of channels in the JPEG-encoded image.
+// *   1: output a grayscale image.
+// *   3: output an RGB image.
 //
-// Arguments:
-//	input: A complex64 tensor.
-//	fft_length: An int32 tensor of shape [3]. The FFT length for each dimension.
+// If needed, the JPEG-encoded image is transformed to match the requested number
+// of color channels.
 //
-// Returns A float32 tensor of the same rank as `input`. The inner-most 3
-//   dimensions of `input` are replaced with the `fft_length` samples of their
-//   inverse 3D real Fourier transform.
+// The attr `ratio` allows downscaling the image by an integer factor during
+// decoding.  Allowed values are: 1, 2, 4, and 8.  This is much faster than
+// downscaling the image later.
 //
-// @compatibility(numpy)
-// Equivalent to np.irfftn with 3 dimensions.
-// @end_compatibility
-func IRFFT3D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
+//
+// This op also supports decoding PNGs and non-animated GIFs since the interface is
+// the same, though it is cleaner to use `tf.image.decode_image`.
+//
+// Arguments:
+//	contents: 0-D.  The JPEG-encoded image.
+//
+// Returns 3-D with shape `[height, width, channels]`..
+func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (image tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "IRFFT3D",
+		Type: "DecodeJpeg",
 		Input: []tf.Input{
-			input, fft_length,
+			contents,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns the truth value of (x != y) element-wise.
+// ExtractJpegShapeAttr is an optional argument to ExtractJpegShape.
+type ExtractJpegShapeAttr func(optionalAttr)
+
+// ExtractJpegShapeOutputType sets the optional output_type attribute to value.
 //
-// *NOTE*: `NotEqual` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func NotEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// value: (Optional) The output type of the operation (int32 or int64).
+// Defaults to int32.
+// If not specified, defaults to DT_INT32
+func ExtractJpegShapeOutputType(value tf.DataType) ExtractJpegShapeAttr {
+	return func(m optionalAttr) {
+		m["output_type"] = value
+	}
+}
+
+// Extract the shape information of a JPEG-encoded image.
+//
+// This op only parses the image header, so it is much faster than DecodeJpeg.
+//
+// Arguments:
+//	contents: 0-D. The JPEG-encoded image.
+//
+// Returns 1-D. The image shape with format [height, width, channels].
+func ExtractJpegShape(scope *Scope, contents tf.Output, optional ...ExtractJpegShapeAttr) (image_shape tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "NotEqual",
+		Type: "ExtractJpegShape",
 		Input: []tf.Input{
-			x, y,
+			contents,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// GatherAttr is an optional argument to Gather.
-type GatherAttr func(optionalAttr)
+// PaddingFIFOQueueV2Attr is an optional argument to PaddingFIFOQueueV2.
+type PaddingFIFOQueueV2Attr func(optionalAttr)
 
-// GatherValidateIndices sets the optional validate_indices attribute to value.
-// If not specified, defaults to true
-func GatherValidateIndices(value bool) GatherAttr {
+// PaddingFIFOQueueV2Shapes sets the optional shapes attribute to value.
+//
+// value: The shape of each component in a value. The length of this attr must
+// be either 0 or the same as the length of component_types.
+// Shapes of fixed rank but variable size are allowed by setting
+// any shape dimension to -1.  In this case, the inputs' shape may vary along
+// the given dimension, and DequeueMany will pad the given dimension with
+// zeros up to the maximum shape of all elements in the given batch.
+// If the length of this attr is 0, different queue elements may have
+// different ranks and shapes, but only one element may be dequeued at a time.
+// If not specified, defaults to <>
+//
+// REQUIRES: len(value) >= 0
+func PaddingFIFOQueueV2Shapes(value []tf.Shape) PaddingFIFOQueueV2Attr {
 	return func(m optionalAttr) {
-		m["validate_indices"] = value
+		m["shapes"] = value
 	}
 }
 
-// Gather slices from `params` according to `indices`.
-//
-// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
-// Produces an output tensor with shape `indices.shape + params.shape[1:]` where:
+// PaddingFIFOQueueV2Capacity sets the optional capacity attribute to value.
 //
-// ```python
-//     # Scalar indices
-//     output[:, ..., :] = params[indices, :, ... :]
+// value: The upper bound on the number of elements in this queue.
+// Negative numbers mean no limit.
+// If not specified, defaults to -1
+func PaddingFIFOQueueV2Capacity(value int64) PaddingFIFOQueueV2Attr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// PaddingFIFOQueueV2Container sets the optional container attribute to value.
 //
-//     # Vector indices
-//     output[i, :, ..., :] = params[indices[i], :, ... :]
+// value: If non-empty, this queue is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func PaddingFIFOQueueV2Container(value string) PaddingFIFOQueueV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// PaddingFIFOQueueV2SharedName sets the optional shared_name attribute to value.
 //
-//     # Higher rank indices
-//     output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :]
-// ```
+// value: If non-empty, this queue will be shared under the given name
+// across multiple sessions.
+// If not specified, defaults to ""
+func PaddingFIFOQueueV2SharedName(value string) PaddingFIFOQueueV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// A queue that produces elements in first-in first-out order.
 //
-// If `indices` is a permutation and `len(indices) == params.shape[0]` then
-// this operation will permute `params` accordingly.
+// Variable-size shapes are allowed by setting the corresponding shape dimensions
+// to 0 in the shape attr.  In this case DequeueMany will pad up to the maximum
+// size of any given element in the minibatch.  See below for details.
 //
-// `validate_indices`: DEPRECATED. If this operation is assigned to CPU, values in
-// `indices` are always validated to be within range. If assigned to GPU,
-// out-of-bound indices result in safe but unspecified behavior, which may include
-// raising an error.
+// Arguments:
+//	component_types: The type of each component in a value.
 //
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/Gather.png" alt>
-// </div>
-func Gather(scope *Scope, params tf.Output, indices tf.Output, optional ...GatherAttr) (output tf.Output) {
+// Returns The handle to the queue.
+func PaddingFIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...PaddingFIFOQueueV2Attr) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"component_types": component_types}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Gather",
-		Input: []tf.Input{
-			params, indices,
-		},
+		Type: "PaddingFIFOQueueV2",
+
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Produce a string tensor that encodes the state of a Reader.
-//
-// Not all Readers support being serialized, so this can produce an
-// Unimplemented error.
+// DecodePngAttr is an optional argument to DecodePng.
+type DecodePngAttr func(optionalAttr)
+
+// DecodePngChannels sets the optional channels attribute to value.
 //
-// Arguments:
-//	reader_handle: Handle to a Reader.
-func ReaderSerializeStateV2(scope *Scope, reader_handle tf.Output) (state tf.Output) {
-	if scope.Err() != nil {
-		return
+// value: Number of color channels for the decoded image.
+// If not specified, defaults to 0
+func DecodePngChannels(value int64) DecodePngAttr {
+	return func(m optionalAttr) {
+		m["channels"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "ReaderSerializeStateV2",
-		Input: []tf.Input{
-			reader_handle,
-		},
+}
+
+// DecodePngDtype sets the optional dtype attribute to value.
+// If not specified, defaults to DT_UINT8
+func DecodePngDtype(value tf.DataType) DecodePngAttr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Return substrings from `Tensor` of strings.
-//
-// For each string in the input `Tensor`, creates a substring starting at index
-// `pos` with a total length of `len`.
-//
-// If `len` defines a substring that would extend beyond the length of the input
-// string, then as many characters as possible are used.
-//
-// If `pos` is negative or specifies a character index larger than any of the input
-// strings, then an `InvalidArgumentError` is thrown.
-//
-// `pos` and `len` must have the same shape, otherwise a `ValueError` is thrown on
-// Op creation.
-//
-// *NOTE*: `Substr` supports broadcasting up to two dimensions. More about
-// broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-//
-// ---
-//
-// Examples
-//
-// Using scalar `pos` and `len`:
-//
-// ```python
-// input = [b'Hello', b'World']
-// position = 1
-// length = 3
+// Decode a PNG-encoded image to a uint8 or uint16 tensor.
 //
-// output = [b'ell', b'orl']
-// ```
+// The attr `channels` indicates the desired number of color channels for the
+// decoded image.
 //
-// Using `pos` and `len` with same shape as `input`:
+// Accepted values are:
 //
-// ```python
-// input = [[b'ten', b'eleven', b'twelve'],
-//          [b'thirteen', b'fourteen', b'fifteen'],
-//          [b'sixteen', b'seventeen', b'eighteen']]
-// position = [[1, 2, 3],
-//             [1, 2, 3],
-//             [1, 2, 3]]
-// length =   [[2, 3, 4],
-//             [4, 3, 2],
-//             [5, 5, 5]]
+// *   0: Use the number of channels in the PNG-encoded image.
+// *   1: output a grayscale image.
+// *   3: output an RGB image.
+// *   4: output an RGBA image.
 //
-// output = [[b'en', b'eve', b'lve'],
-//           [b'hirt', b'urt', b'te'],
-//           [b'ixtee', b'vente', b'hteen']]
-// ```
+// If needed, the PNG-encoded image is transformed to match the requested number
+// of color channels.
 //
-// Broadcasting `pos` and `len` onto `input`:
+// This op also supports decoding JPEGs and non-animated GIFs since the interface
+// is the same, though it is cleaner to use `tf.image.decode_image`.
 //
-// ```
-// input = [[b'ten', b'eleven', b'twelve'],
-//          [b'thirteen', b'fourteen', b'fifteen'],
-//          [b'sixteen', b'seventeen', b'eighteen'],
-//          [b'nineteen', b'twenty', b'twentyone']]
-// position = [1, 2, 3]
-// length =   [1, 2, 3]
+// Arguments:
+//	contents: 0-D.  The PNG-encoded image.
 //
-// output = [[b'e', b'ev', b'lve'],
-//           [b'h', b'ur', b'tee'],
-//           [b'i', b've', b'hte'],
-//           [b'i', b'en', b'nty']]
-// ```
+// Returns 3-D with shape `[height, width, channels]`.
+func DecodePng(scope *Scope, contents tf.Output, optional ...DecodePngAttr) (image tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DecodePng",
+		Input: []tf.Input{
+			contents,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Decode the first frame of a GIF-encoded image to a uint8 tensor.
 //
-// Broadcasting `input` onto `pos` and `len`:
+// GIF with frame or transparency compression are not supported
+// convert animated GIF from compressed to uncompressed by:
 //
-// ```
-// input = b'thirteen'
-// position = [1, 5, 7]
-// length =   [3, 2, 1]
+//     convert $src.gif -coalesce $dst.gif
 //
-// output = [b'hir', b'ee', b'n']
-// ```
+// This op also supports decoding JPEGs and PNGs, though it is cleaner to use
+// `tf.image.decode_image`.
 //
 // Arguments:
-//	input: Tensor of strings
-//	pos: Scalar defining the position of first character in each substring
-//	len: Scalar defining the number of characters to include in each substring
+//	contents: 0-D.  The GIF-encoded image.
 //
-// Returns Tensor of substrings
-func Substr(scope *Scope, input tf.Output, pos tf.Output, len tf.Output) (output tf.Output) {
+// Returns 4-D with shape `[num_frames, height, width, 3]`. RGB order
+func DecodeGif(scope *Scope, contents tf.Output) (image tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Substr",
+		Type: "DecodeGif",
 		Input: []tf.Input{
-			input, pos, len,
+			contents,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// StatelessRandomNormalAttr is an optional argument to StatelessRandomNormal.
-type StatelessRandomNormalAttr func(optionalAttr)
+// ResourceApplyCenteredRMSPropAttr is an optional argument to ResourceApplyCenteredRMSProp.
+type ResourceApplyCenteredRMSPropAttr func(optionalAttr)
 
-// StatelessRandomNormalDtype sets the optional dtype attribute to value.
+// ResourceApplyCenteredRMSPropUseLocking sets the optional use_locking attribute to value.
 //
-// value: The type of the output.
-// If not specified, defaults to DT_FLOAT
-func StatelessRandomNormalDtype(value tf.DataType) StatelessRandomNormalAttr {
+// value: If `True`, updating of the var, mg, ms, and mom tensors is
+// protected by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyCenteredRMSPropUseLocking(value bool) ResourceApplyCenteredRMSPropAttr {
 	return func(m optionalAttr) {
-		m["dtype"] = value
+		m["use_locking"] = value
 	}
 }
 
-// Outputs deterministic pseudorandom values from a normal distribution.
+// Update '*var' according to the centered RMSProp algorithm.
 //
-// The generated values will have mean 0 and standard deviation 1.
+// The centered RMSProp algorithm uses an estimate of the centered second moment
+// (i.e., the variance) for normalization, as opposed to regular RMSProp, which
+// uses the (uncentered) second moment. This often helps with training, but is
+// slightly more expensive in terms of computation and memory.
 //
-// The outputs are a deterministic function of `shape` and `seed`.
+// Note that in dense implementation of this algorithm, mg, ms, and mom will
+// update even if the grad is zero, but in this sparse implementation, mg, ms,
+// and mom will not update in iterations during which the grad is zero.
+//
+// mean_square = decay * mean_square + (1-decay) * gradient ** 2
+// mean_grad = decay * mean_grad + (1-decay) * gradient
+//
+// Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
+//
+// mg <- rho * mg_{t-1} + (1-rho) * grad
+// ms <- rho * ms_{t-1} + (1-rho) * grad * grad
+// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)
+// var <- var - mom
 //
 // Arguments:
-//	shape: The shape of the output tensor.
-//	seed: 2 seeds (shape [2]).
+//	var_: Should be from a Variable().
+//	mg: Should be from a Variable().
+//	ms: Should be from a Variable().
+//	mom: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	rho: Decay rate. Must be a scalar.
 //
-// Returns Random values with specified shape.
-func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomNormalAttr) (output tf.Output) {
+//	epsilon: Ridge term. Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyCenteredRMSProp(scope *Scope, var_ tf.Output, mg tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyCenteredRMSPropAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -19170,149 +19206,225 @@ func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, option
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "StatelessRandomNormal",
+		Type: "ResourceApplyCenteredRMSProp",
 		Input: []tf.Input{
-			shape, seed,
+			var_, mg, ms, mom, lr, rho, momentum, epsilon, grad,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// UniqueWithCountsAttr is an optional argument to UniqueWithCounts.
-type UniqueWithCountsAttr func(optionalAttr)
-
-// UniqueWithCountsOutIdx sets the optional out_idx attribute to value.
-// If not specified, defaults to DT_INT32
-func UniqueWithCountsOutIdx(value tf.DataType) UniqueWithCountsAttr {
-	return func(m optionalAttr) {
-		m["out_idx"] = value
-	}
+	return scope.AddOperation(opspec)
 }
 
-// Finds unique elements in a 1-D tensor.
+// Returns a list of tensors with the same shapes and contents as the input
 //
-// This operation returns a tensor `y` containing all of the unique elements of `x`
-// sorted in the same order that they occur in `x`. This operation also returns a
-// tensor `idx` the same size as `x` that contains the index of each value of `x`
-// in the unique output `y`. Finally, it returns a third tensor `count` that
-// contains the count of each element of `y` in `x`. In other words:
+// tensors.
 //
-// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
+// This op can be used to override the gradient for complicated functions. For
+// example, suppose y = f(x) and we wish to apply a custom function g for backprop
+// such that dx = g(dy). In Python,
 //
-// For example:
+// ```python
+// with tf.get_default_graph().gradient_override_map(
+//     {'IdentityN': 'OverrideGradientWithG'}):
+//   y, _ = identity_n([f(x), x])
 //
+// @tf.RegisterGradient('OverrideGradientWithG')
+// def ApplyG(op, dy, _):
+//   return [None, g(dy)]  # Do not backprop to f(x).
 // ```
-// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
-// y, idx, count = unique_with_counts(x)
-// y ==> [1, 2, 4, 7, 8]
-// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
-// count ==> [2, 1, 3, 1, 2]
-// ```
-//
-// Arguments:
-//	x: 1-D.
-//
-// Returns 1-D.1-D.1-D.
-func UniqueWithCounts(scope *Scope, x tf.Output, optional ...UniqueWithCountsAttr) (y tf.Output, idx tf.Output, count tf.Output) {
+func IdentityN(scope *Scope, input []tf.Output) (output []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "UniqueWithCounts",
+		Type: "IdentityN",
 		Input: []tf.Input{
-			x,
+			tf.OutputList(input),
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
+		scope.UpdateErr("IdentityN", err)
+		return
+	}
+	return output
 }
 
-// RestoreSliceAttr is an optional argument to RestoreSlice.
-type RestoreSliceAttr func(optionalAttr)
-
-// RestoreSlicePreferredShard sets the optional preferred_shard attribute to value.
+// Computes the gradient of the sigmoid of `x` wrt its input.
 //
-// value: Index of file to open first if multiple files match
-// `file_pattern`. See the documentation for `Restore`.
-// If not specified, defaults to -1
-func RestoreSlicePreferredShard(value int64) RestoreSliceAttr {
-	return func(m optionalAttr) {
-		m["preferred_shard"] = value
+// Specifically, `grad = dy * y * (1 - y)`, where `y = sigmoid(x)`, and
+// `dy` is the corresponding input gradient.
+func SigmoidGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SigmoidGrad",
+		Input: []tf.Input{
+			y, dy,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Restores a tensor from checkpoint files.
+// Convert one or more images from HSV to RGB.
 //
-// This is like `Restore` except that restored tensor can be listed as filling
-// only a slice of a larger tensor.  `shape_and_slice` specifies the shape of the
-// larger tensor and the slice that the restored tensor covers.
+// Outputs a tensor of the same shape as the `images` tensor, containing the RGB
+// value of the pixels. The output is only well defined if the value in `images`
+// are in `[0,1]`.
 //
-// The `shape_and_slice` input has the same format as the
-// elements of the `shapes_and_slices` input of the `SaveSlices` op.
+// See `rgb_to_hsv` for a description of the HSV encoding.
 //
 // Arguments:
-//	file_pattern: Must have a single element. The pattern of the files from
-// which we read the tensor.
-//	tensor_name: Must have a single element. The name of the tensor to be
-// restored.
-//	shape_and_slice: Scalar. The shapes and slice specifications to use when
-// restoring a tensors.
-//	dt: The type of the tensor to be restored.
+//	images: 1-D or higher rank. HSV data to convert. Last dimension must be size 3.
 //
-// Returns The restored tensor.
-func RestoreSlice(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, shape_and_slice tf.Output, dt tf.DataType, optional ...RestoreSliceAttr) (tensor tf.Output) {
+// Returns `images` converted to RGB.
+func HSVToRGB(scope *Scope, images tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dt": dt}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "RestoreSlice",
+		Type: "HSVToRGB",
 		Input: []tf.Input{
-			file_pattern, tensor_name, shape_and_slice,
+			images,
 		},
-		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// SampleDistortedBoundingBoxV2Attr is an optional argument to SampleDistortedBoundingBoxV2.
+type SampleDistortedBoundingBoxV2Attr func(optionalAttr)
+
+// SampleDistortedBoundingBoxV2Seed sets the optional seed attribute to value.
+//
+// value: If either `seed` or `seed2` are set to non-zero, the random number
+// generator is seeded by the given `seed`.  Otherwise, it is seeded by a random
+// seed.
+// If not specified, defaults to 0
+func SampleDistortedBoundingBoxV2Seed(value int64) SampleDistortedBoundingBoxV2Attr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxV2Seed2 sets the optional seed2 attribute to value.
+//
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func SampleDistortedBoundingBoxV2Seed2(value int64) SampleDistortedBoundingBoxV2Attr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxV2AspectRatioRange sets the optional aspect_ratio_range attribute to value.
+//
+// value: The cropped area of the image must have an aspect ratio =
+// width / height within this range.
+// If not specified, defaults to <f:0.75 f:1.33 >
+func SampleDistortedBoundingBoxV2AspectRatioRange(value []float32) SampleDistortedBoundingBoxV2Attr {
+	return func(m optionalAttr) {
+		m["aspect_ratio_range"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxV2AreaRange sets the optional area_range attribute to value.
+//
+// value: The cropped area of the image must contain a fraction of the
+// supplied image within in this range.
+// If not specified, defaults to <f:0.05 f:1 >
+func SampleDistortedBoundingBoxV2AreaRange(value []float32) SampleDistortedBoundingBoxV2Attr {
+	return func(m optionalAttr) {
+		m["area_range"] = value
+	}
 }
 
-// StatelessTruncatedNormalAttr is an optional argument to StatelessTruncatedNormal.
-type StatelessTruncatedNormalAttr func(optionalAttr)
+// SampleDistortedBoundingBoxV2MaxAttempts sets the optional max_attempts attribute to value.
+//
+// value: Number of attempts at generating a cropped region of the image
+// of the specified constraints. After `max_attempts` failures, return the entire
+// image.
+// If not specified, defaults to 100
+func SampleDistortedBoundingBoxV2MaxAttempts(value int64) SampleDistortedBoundingBoxV2Attr {
+	return func(m optionalAttr) {
+		m["max_attempts"] = value
+	}
+}
 
-// StatelessTruncatedNormalDtype sets the optional dtype attribute to value.
+// SampleDistortedBoundingBoxV2UseImageIfNoBoundingBoxes sets the optional use_image_if_no_bounding_boxes attribute to value.
 //
-// value: The type of the output.
-// If not specified, defaults to DT_FLOAT
-func StatelessTruncatedNormalDtype(value tf.DataType) StatelessTruncatedNormalAttr {
+// value: Controls behavior if no bounding boxes supplied.
+// If true, assume an implicit bounding box covering the whole input. If false,
+// raise an error.
+// If not specified, defaults to false
+func SampleDistortedBoundingBoxV2UseImageIfNoBoundingBoxes(value bool) SampleDistortedBoundingBoxV2Attr {
 	return func(m optionalAttr) {
-		m["dtype"] = value
+		m["use_image_if_no_bounding_boxes"] = value
 	}
 }
 
-// Outputs deterministic pseudorandom values from a truncated normal distribution.
+// Generate a single randomly distorted bounding box for an image.
 //
-// The generated values follow a normal distribution with mean 0 and standard
-// deviation 1, except that values whose magnitude is more than 2 standard
-// deviations from the mean are dropped and re-picked.
+// Bounding box annotations are often supplied in addition to ground-truth labels
+// in image recognition or object localization tasks. A common technique for
+// training such a system is to randomly distort an image while preserving
+// its content, i.e. *data augmentation*. This Op outputs a randomly distorted
+// localization of an object, i.e. bounding box, given an `image_size`,
+// `bounding_boxes` and a series of constraints.
 //
-// The outputs are a deterministic function of `shape` and `seed`.
+// The output of this Op is a single bounding box that may be used to crop the
+// original image. The output is returned as 3 tensors: `begin`, `size` and
+// `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
+// image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize
+// what the bounding box looks like.
+//
+// Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The
+// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
+// height of the underlying image.
+//
+// For example,
+//
+// ```python
+//     # Generate a single distorted bounding box.
+//     begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
+//         tf.shape(image),
+//         bounding_boxes=bounding_boxes)
+//
+//     # Draw the bounding box in an image summary.
+//     image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
+//                                                   bbox_for_draw)
+//     tf.summary.image('images_with_box', image_with_box)
+//
+//     # Employ the bounding box to distort the image.
+//     distorted_image = tf.slice(image, begin, size)
+// ```
+//
+// Note that if no bounding box information is available, setting
+// `use_image_if_no_bounding_boxes = true` will assume there is a single implicit
+// bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
+// false and no bounding boxes are supplied, an error is raised.
 //
 // Arguments:
-//	shape: The shape of the output tensor.
-//	seed: 2 seeds (shape [2]).
+//	image_size: 1-D, containing `[height, width, channels]`.
+//	bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes
+// associated with the image.
+//	min_object_covered: The cropped area of the image must contain at least this
+// fraction of any bounding box supplied. The value of this parameter should be
+// non-negative. In the case of 0, the cropped area does not need to overlap
+// any of the bounding boxes supplied.
 //
-// Returns Random values with specified shape.
-func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessTruncatedNormalAttr) (output tf.Output) {
+// Returns 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to
+// `tf.slice`.1-D, containing `[target_height, target_width, -1]`. Provide as input to
+// `tf.slice`.3-D with shape `[1, 1, 4]` containing the distorted bounding box.
+// Provide as input to `tf.image.draw_bounding_boxes`.
+func SampleDistortedBoundingBoxV2(scope *Scope, image_size tf.Output, bounding_boxes tf.Output, min_object_covered tf.Output, optional ...SampleDistortedBoundingBoxV2Attr) (begin tf.Output, size tf.Output, bboxes tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -19321,70 +19433,88 @@ func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, opt
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "StatelessTruncatedNormal",
+		Type: "SampleDistortedBoundingBoxV2",
 		Input: []tf.Input{
-			shape, seed,
+			image_size, bounding_boxes, min_object_covered,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Computes the sum along sparse segments of a tensor divided by the sqrt of N.
-//
-// N is the size of the segment being reduced.
-//
-// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
-// segments.
-//
-// Arguments:
-//
-//	indices: A 1-D tensor. Has same rank as `segment_ids`.
-//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+// ExtractGlimpseAttr is an optional argument to ExtractGlimpse.
+type ExtractGlimpseAttr func(optionalAttr)
+
+// ExtractGlimpseCentered sets the optional centered attribute to value.
 //
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SparseSegmentSqrtN(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSegmentSqrtN",
-		Input: []tf.Input{
-			data, indices, segment_ids,
-		},
+// value: indicates if the offset coordinates are centered relative to
+// the image, in which case the (0, 0) offset is relative to the center
+// of the input images. If false, the (0,0) offset corresponds to the
+// upper left corner of the input images.
+// If not specified, defaults to true
+func ExtractGlimpseCentered(value bool) ExtractGlimpseAttr {
+	return func(m optionalAttr) {
+		m["centered"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// ResizeBilinearGradAttr is an optional argument to ResizeBilinearGrad.
-type ResizeBilinearGradAttr func(optionalAttr)
+// ExtractGlimpseNormalized sets the optional normalized attribute to value.
+//
+// value: indicates if the offset coordinates are normalized.
+// If not specified, defaults to true
+func ExtractGlimpseNormalized(value bool) ExtractGlimpseAttr {
+	return func(m optionalAttr) {
+		m["normalized"] = value
+	}
+}
 
-// ResizeBilinearGradAlignCorners sets the optional align_corners attribute to value.
+// ExtractGlimpseUniformNoise sets the optional uniform_noise attribute to value.
 //
-// value: If true, rescale grads by (orig_height - 1) / (height - 1), which
-// exactly aligns the 4 corners of grads and original_image. If false, rescale by
-// orig_height / height. Treat similarly the width dimension.
-// If not specified, defaults to false
-func ResizeBilinearGradAlignCorners(value bool) ResizeBilinearGradAttr {
+// value: indicates if the noise should be generated using a
+// uniform distribution or a Gaussian distribution.
+// If not specified, defaults to true
+func ExtractGlimpseUniformNoise(value bool) ExtractGlimpseAttr {
 	return func(m optionalAttr) {
-		m["align_corners"] = value
+		m["uniform_noise"] = value
 	}
 }
 
-// Computes the gradient of bilinear interpolation.
+// Extracts a glimpse from the input tensor.
+//
+// Returns a set of windows called glimpses extracted at location
+// `offsets` from the input tensor. If the windows only partially
+// overlaps the inputs, the non overlapping areas will be filled with
+// random noise.
+//
+// The result is a 4-D tensor of shape `[batch_size, glimpse_height,
+// glimpse_width, channels]`. The channels and batch dimensions are the
+// same as that of the input tensor. The height and width of the output
+// windows are specified in the `size` parameter.
+//
+// The argument `normalized` and `centered` controls how the windows are built:
+//
+// * If the coordinates are normalized but not centered, 0.0 and 1.0
+//   correspond to the minimum and maximum of each height and width
+//   dimension.
+// * If the coordinates are both normalized and centered, they range from
+//   -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper
+//   left corner, the lower right corner is located at (1.0, 1.0) and the
+//   center is at (0, 0).
+// * If the coordinates are not normalized they are interpreted as
+//   numbers of pixels.
 //
 // Arguments:
-//	grads: 4-D with shape `[batch, height, width, channels]`.
-//	original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`,
-// The image tensor that was resized.
+//	input: A 4-D float tensor of shape `[batch_size, height, width, channels]`.
+//	size: A 1-D tensor of 2 elements containing the size of the glimpses
+// to extract.  The glimpse height must be specified first, following
+// by the glimpse width.
+//	offsets: A 2-D integer tensor of shape `[batch_size, 2]` containing
+// the y, x locations of the center of each window.
 //
-// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`.
-// Gradients with respect to the input image. Input image must have been
-// float or double.
-func ResizeBilinearGrad(scope *Scope, grads tf.Output, original_image tf.Output, optional ...ResizeBilinearGradAttr) (output tf.Output) {
+// Returns A tensor representing the glimpses `[batch_size,
+// glimpse_height, glimpse_width, channels]`.
+func ExtractGlimpse(scope *Scope, input tf.Output, size tf.Output, offsets tf.Output, optional ...ExtractGlimpseAttr) (glimpse tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -19393,9 +19523,9 @@ func ResizeBilinearGrad(scope *Scope, grads tf.Output, original_image tf.Output,
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResizeBilinearGrad",
+		Type: "ExtractGlimpse",
 		Input: []tf.Input{
-			grads, original_image,
+			input, size, offsets,
 		},
 		Attrs: attrs,
 	}
@@ -19403,74 +19533,66 @@ func ResizeBilinearGrad(scope *Scope, grads tf.Output, original_image tf.Output,
 	return op.Output(0)
 }
 
-// Computes the number of elements in the given table.
-//
-// Arguments:
-//	table_handle: Handle to the table.
+// A container for an iterator resource.
 //
-// Returns Scalar that contains number of elements in the table.
-func LookupTableSizeV2(scope *Scope, table_handle tf.Output) (size tf.Output) {
+// Returns A handle to the iterator that can be passed to a "MakeIterator"
+// or "IteratorGetNext" op.
+func Iterator(scope *Scope, shared_name string, container string, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"shared_name": shared_name, "container": container, "output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "LookupTableSizeV2",
-		Input: []tf.Input{
-			table_handle,
-		},
+		Type: "Iterator",
+
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Component-wise divides a SparseTensor by a dense Tensor.
-//
-// *Limitation*: this Op only broadcasts the dense side to the sparse side, but not
-// the other direction.
-//
-// Arguments:
-//	sp_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, possibly not in canonical ordering.
-//	sp_values: 1-D.  `N` non-empty values corresponding to `sp_indices`.
-//	sp_shape: 1-D.  Shape of the input SparseTensor.
-//	dense: `R`-D.  The dense Tensor operand.
+// ShuffleDatasetAttr is an optional argument to ShuffleDataset.
+type ShuffleDatasetAttr func(optionalAttr)
+
+// ShuffleDatasetReshuffleEachIteration sets the optional reshuffle_each_iteration attribute to value.
 //
-// Returns 1-D.  The `N` values that are operated on.
-func SparseDenseCwiseDiv(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseDenseCwiseDiv",
-		Input: []tf.Input{
-			sp_indices, sp_values, sp_shape, dense,
-		},
+// value: If true, each iterator over this dataset will be given
+// a different pseudorandomly generated seed, based on a sequence seeded by the
+// `seed` and `seed2` inputs. If false, each iterator will be given the same
+// seed, and repeated iteration over this dataset will yield the exact same
+// sequence of results.
+// If not specified, defaults to true
+func ShuffleDatasetReshuffleEachIteration(value bool) ShuffleDatasetAttr {
+	return func(m optionalAttr) {
+		m["reshuffle_each_iteration"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Reads the value of a variable.
+// Creates a dataset that shuffles elements from `input_dataset` pseudorandomly.
+//
+// Arguments:
 //
-// The tensor returned by this operation is immutable.
+//	buffer_size: The number of output elements to buffer in an iterator over
+// this dataset. Compare with the `min_after_dequeue` attr when creating a
+// `RandomShuffleQueue`.
+//	seed: A scalar seed for the random number generator. If either `seed` or
+// `seed2` is set to be non-zero, the random number generator is seeded
+// by the given seed.  Otherwise, a random seed is used.
+//	seed2: A second scalar seed to avoid seed collision.
 //
-// The value returned by this operation is guaranteed to be influenced by all the
-// writes on which this operation depends directly or indirectly, and to not be
-// influenced by any of the writes which depend directly or indirectly on this
-// operation.
 //
-// Arguments:
-//	resource: handle to the resource in which to store the variable.
-//	dtype: the dtype of the value.
-func ReadVariableOp(scope *Scope, resource tf.Output, dtype tf.DataType) (value tf.Output) {
+func ShuffleDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, seed tf.Output, seed2 tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ShuffleDatasetAttr) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "ReadVariableOp",
+		Type: "ShuffleDataset",
 		Input: []tf.Input{
-			resource,
+			input_dataset, buffer_size, seed, seed2,
 		},
 		Attrs: attrs,
 	}
@@ -19478,62 +19600,69 @@ func ReadVariableOp(scope *Scope, resource tf.Output, dtype tf.DataType) (value
 	return op.Output(0)
 }
 
-// Associates the given iterator with the given statistics aggregator.
+// 3D fast Fourier transform.
 //
-// Returns the created operation.
-func IteratorSetStatsAggregator(scope *Scope, iterator_handle tf.Output, stats_aggregator_handle tf.Output) (o *tf.Operation) {
+// Computes the 3-dimensional discrete Fourier transform over the inner-most 3
+// dimensions of `input`.
+//
+// Arguments:
+//	input: A complex64 tensor.
+//
+// Returns A complex64 tensor of the same shape as `input`. The inner-most 3
+//   dimensions of `input` are replaced with their 3D Fourier transform.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.fftn with 3 dimensions.
+// @end_compatibility
+func FFT3D(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "IteratorSetStatsAggregator",
+		Type: "FFT3D",
 		Input: []tf.Input{
-			iterator_handle, stats_aggregator_handle,
+			input,
 		},
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2.
-type ResourceSparseApplyFtrlV2Attr func(optionalAttr)
+// CropAndResizeGradBoxesAttr is an optional argument to CropAndResizeGradBoxes.
+type CropAndResizeGradBoxesAttr func(optionalAttr)
 
-// ResourceSparseApplyFtrlV2UseLocking sets the optional use_locking attribute to value.
+// CropAndResizeGradBoxesMethod sets the optional method attribute to value.
 //
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceSparseApplyFtrlV2UseLocking(value bool) ResourceSparseApplyFtrlV2Attr {
+// value: A string specifying the interpolation method. Only 'bilinear' is
+// supported for now.
+// If not specified, defaults to "bilinear"
+func CropAndResizeGradBoxesMethod(value string) CropAndResizeGradBoxesAttr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["method"] = value
 	}
 }
 
-// Update relevant entries in '*var' according to the Ftrl-proximal scheme.
-//
-// That is for rows we have grad for, we update var, accum and linear as follows:
-// grad_with_shrinkage = grad + 2 * l2_shrinkage * var
-// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
-// linear += grad_with_shrinkage +
-//     (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-// accum = accum_new
+// Computes the gradient of the crop_and_resize op wrt the input boxes tensor.
 //
 // Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	linear: Should be from a Variable().
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
-//	lr: Scaling factor. Must be a scalar.
-//	l1: L1 regularization. Must be a scalar.
-//	l2: L2 shrinkage regulariation. Must be a scalar.
-//
-//	lr_power: Scaling factor. Must be a scalar.
+//	grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
+//	image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
+// Both `image_height` and `image_width` need to be positive.
+//	boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
+// specifies the coordinates of a box in the `box_ind[i]` image and is specified
+// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
+// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
+// `[0, 1]` interval of normalized image height is mapped to
+// `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in
+// which case the sampled crop is an up-down flipped version of the original
+// image. The width dimension is treated similarly. Normalized coordinates
+// outside the `[0, 1]` range are allowed, in which case we use
+// `extrapolation_value` to extrapolate the input image values.
+//	box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
+// The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
 //
-// Returns the created operation.
-func ResourceSparseApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlV2Attr) (o *tf.Operation) {
+// Returns A 2-D tensor of shape `[num_boxes, 4]`.
+func CropAndResizeGradBoxes(scope *Scope, grads tf.Output, image tf.Output, boxes tf.Output, box_ind tf.Output, optional ...CropAndResizeGradBoxesAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -19542,81 +19671,65 @@ func ResourceSparseApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, li
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyFtrlV2",
+		Type: "CropAndResizeGradBoxes",
 		Input: []tf.Input{
-			var_, accum, linear, grad, indices, lr, l1, l2, l2_shrinkage, lr_power,
+			grads, image, boxes, box_ind,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Restore a reader to a previously saved state.
+// Saves tensors in V2 checkpoint format.
 //
-// Not all Readers support being restored, so this can produce an
-// Unimplemented error.
+// By default, saves the named tensors in full.  If the caller wishes to save
+// specific slices of full tensors, "shape_and_slices" should be non-empty strings
+// and correspondingly well-formed.
 //
 // Arguments:
-//	reader_handle: Handle to a Reader.
-//	state: Result of a ReaderSerializeState of a Reader with type
-// matching reader_handle.
+//	prefix: Must have a single element. The prefix of the V2 checkpoint to which we
+// write the tensors.
+//	tensor_names: shape {N}. The names of the tensors to be saved.
+//	shape_and_slices: shape {N}.  The slice specs of the tensors to be saved.
+// Empty strings indicate that they are non-partitioned tensors.
+//	tensors: `N` tensors to save.
 //
 // Returns the created operation.
-func ReaderRestoreStateV2(scope *Scope, reader_handle tf.Output, state tf.Output) (o *tf.Operation) {
+func SaveV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and_slices tf.Output, tensors []tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "ReaderRestoreStateV2",
+		Type: "SaveV2",
 		Input: []tf.Input{
-			reader_handle, state,
+			prefix, tensor_names, shape_and_slices, tf.OutputList(tensors),
 		},
 	}
 	return scope.AddOperation(opspec)
 }
 
-// Computes the absolute value of a tensor.
-//
-// Given a tensor `x`, this operation returns a tensor containing the absolute
-// value of each element in `x`. For example, if x is an input element and y is
-// an output element, this operation computes \\(y = |x|\\).
-func Abs(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Abs",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RandomPoissonAttr is an optional argument to RandomPoisson.
-type RandomPoissonAttr func(optionalAttr)
+// StatsAggregatorHandleAttr is an optional argument to StatsAggregatorHandle.
+type StatsAggregatorHandleAttr func(optionalAttr)
 
-// RandomPoissonSeed sets the optional seed attribute to value.
-// If not specified, defaults to 0
-func RandomPoissonSeed(value int64) RandomPoissonAttr {
+// StatsAggregatorHandleContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func StatsAggregatorHandleContainer(value string) StatsAggregatorHandleAttr {
 	return func(m optionalAttr) {
-		m["seed"] = value
+		m["container"] = value
 	}
 }
 
-// RandomPoissonSeed2 sets the optional seed2 attribute to value.
-// If not specified, defaults to 0
-func RandomPoissonSeed2(value int64) RandomPoissonAttr {
+// StatsAggregatorHandleSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func StatsAggregatorHandleSharedName(value string) StatsAggregatorHandleAttr {
 	return func(m optionalAttr) {
-		m["seed2"] = value
+		m["shared_name"] = value
 	}
 }
 
-// Use RandomPoissonV2 instead.
-//
-// DEPRECATED at GraphDef version 25: Replaced by RandomPoissonV2
-func RandomPoisson(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonAttr) (output tf.Output) {
+// Creates a statistics manager resource.
+func StatsAggregatorHandle(scope *Scope, optional ...StatsAggregatorHandleAttr) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -19625,373 +19738,233 @@ func RandomPoisson(scope *Scope, shape tf.Output, rate tf.Output, optional ...Ra
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "RandomPoisson",
-		Input: []tf.Input{
-			shape, rate,
-		},
+		Type: "StatsAggregatorHandle",
+
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Applies softmax to a batched N-D `SparseTensor`.
-//
-// The inputs represent an N-D SparseTensor  with logical shape `[..., B, C]`
-// (where `N >= 2`), and with indices sorted in the canonical lexicographic order.
+// Greedily selects a subset of bounding boxes in descending order of score,
 //
-// This op is equivalent to applying the normal `tf.nn.softmax()` to each innermost
-// logical submatrix with shape `[B, C]`, but with the catch that *the implicitly
-// zero elements do not participate*.  Specifically, the algorithm is equivalent
-// to the following:
+// pruning away boxes that have high intersection-over-union (IOU) overlap
+// with previously selected boxes.  Bounding boxes are supplied as
+// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
+// diagonal pair of box corners and the coordinates can be provided as normalized
+// (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
+// is agnostic to where the origin is in the coordinate system.  Note that this
+// algorithm is invariant to orthogonal transformations and translations
+// of the coordinate system; thus translating or reflections of the coordinate
+// system result in the same boxes being selected by the algorithm.
 //
-//   (1) Applies `tf.nn.softmax()` to a densified view of each innermost submatrix
-//       with shape `[B, C]`, along the size-C dimension;
-//   (2) Masks out the original implicitly-zero locations;
-//   (3) Renormalizes the remaining elements.
+// The output of this operation is a set of integers indexing into the input
+// collection of bounding boxes representing the selected boxes.  The bounding
+// box coordinates corresponding to the selected indices can then be obtained
+// using the `tf.gather operation`.  For example:
 //
-// Hence, the `SparseTensor` result has exactly the same non-zero indices and
-// shape.
+//   selected_indices = tf.image.non_max_suppression_v2(
+//       boxes, scores, max_output_size, iou_threshold)
+//   selected_boxes = tf.gather(boxes, selected_indices)
 //
 // Arguments:
-//	sp_indices: 2-D.  `NNZ x R` matrix with the indices of non-empty values in a
-// SparseTensor, in canonical ordering.
-//	sp_values: 1-D.  `NNZ` non-empty values corresponding to `sp_indices`.
-//	sp_shape: 1-D.  Shape of the input SparseTensor.
-//
-// Returns 1-D.  The `NNZ` values for the result `SparseTensor`.
-func SparseSoftmax(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSoftmax",
-		Input: []tf.Input{
-			sp_indices, sp_values, sp_shape,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes gradients for SparseSegmentMean.
-//
-// Returns tensor "output" with same shape as grad, except for dimension 0 whose
-// value is output_dim0.
+//	boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
+//	scores: A 1-D float tensor of shape `[num_boxes]` representing a single
+// score corresponding to each box (each row of boxes).
+//	max_output_size: A scalar integer tensor representing the maximum number of
+// boxes to be selected by non max suppression.
+//	iou_threshold: A 0-D float tensor representing the threshold for deciding whether
+// boxes overlap too much with respect to IOU.
 //
-// Arguments:
-//	grad: gradient propagated to the SparseSegmentMean op.
-//	indices: indices passed to the corresponding SparseSegmentMean op.
-//	segment_ids: segment_ids passed to the corresponding SparseSegmentMean op.
-//	output_dim0: dimension 0 of "data" passed to SparseSegmentMean op.
-func SparseSegmentMeanGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) {
+// Returns A 1-D integer tensor of shape `[M]` representing the selected
+// indices from the boxes tensor, where `M <= max_output_size`.
+func NonMaxSuppressionV2(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output) (selected_indices tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "SparseSegmentMeanGrad",
+		Type: "NonMaxSuppressionV2",
 		Input: []tf.Input{
-			grad, indices, segment_ids, output_dim0,
+			boxes, scores, max_output_size, iou_threshold,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Applies sparse addition to `input` using individual values or slices
-//
-// from `updates` according to indices `indices`.  The updates are non-aliasing:
-// `input` is only modified in-place if no other operations will use it.
-// Otherwise, a copy of `input` is made.  This operation has a gradient with
-// respect to both `input` and `updates`.
+// Reshapes a tensor.
 //
-// `input` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
+// Given `tensor`, this operation returns a tensor that has the same values
+// as `tensor` with shape `shape`.
 //
-// `indices` must be integer tensor, containing indices into `input`.
-// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
+// If one component of `shape` is the special value -1, the size of that dimension
+// is computed so that the total size remains constant.  In particular, a `shape`
+// of `[-1]` flattens into 1-D.  At most one component of `shape` can be -1.
 //
-// The innermost dimension of `indices` (with length `K`) corresponds to
-// indices into elements (if `K = P`) or `(P-K)`-dimensional slices
-// (if `K < P`) along the `K`th dimension of `input`.
+// If `shape` is 1-D or higher, then the operation returns a tensor with shape
+// `shape` filled with the values of `tensor`. In this case, the number of elements
+// implied by `shape` must be the same as the number of elements in `tensor`.
 //
-// `updates` is `Tensor` of rank `Q-1+P-K` with shape:
+// For example:
 //
 // ```
-// [d_0, ..., d_{Q-2}, input.shape[K], ..., input.shape[P-1]].
-// ```
+// # tensor 't' is [1, 2, 3, 4, 5, 6, 7, 8, 9]
+// # tensor 't' has shape [9]
+// reshape(t, [3, 3]) ==> [[1, 2, 3],
+//                         [4, 5, 6],
+//                         [7, 8, 9]]
 //
-// For example, say we want to add 4 scattered elements to a rank-1 tensor to 8
-// elements. In Python, that addition would look like this:
+// # tensor 't' is [[[1, 1], [2, 2]],
+// #                [[3, 3], [4, 4]]]
+// # tensor 't' has shape [2, 2, 2]
+// reshape(t, [2, 4]) ==> [[1, 1, 2, 2],
+//                         [3, 3, 4, 4]]
 //
-//     input = tf.constant([1, 2, 3, 4, 5, 6, 7, 8])
-//     indices = tf.constant([[4], [3], [1], [7]])
-//     updates = tf.constant([9, 10, 11, 12])
-//     output = tf.scatter_nd_non_aliasing_add(input, indices, updates)
-//     with tf.Session() as sess:
-//       print(sess.run(output))
+// # tensor 't' is [[[1, 1, 1],
+// #                 [2, 2, 2]],
+// #                [[3, 3, 3],
+// #                 [4, 4, 4]],
+// #                [[5, 5, 5],
+// #                 [6, 6, 6]]]
+// # tensor 't' has shape [3, 2, 3]
+// # pass '[-1]' to flatten 't'
+// reshape(t, [-1]) ==> [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6]
 //
-// The resulting value `output` would look like this:
+// # -1 can also be used to infer the shape
 //
-//     [1, 13, 3, 14, 14, 6, 7, 20]
+// # -1 is inferred to be 9:
+// reshape(t, [2, -1]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3],
+//                          [4, 4, 4, 5, 5, 5, 6, 6, 6]]
+// # -1 is inferred to be 2:
+// reshape(t, [-1, 9]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3],
+//                          [4, 4, 4, 5, 5, 5, 6, 6, 6]]
+// # -1 is inferred to be 3:
+// reshape(t, [ 2, -1, 3]) ==> [[[1, 1, 1],
+//                               [2, 2, 2],
+//                               [3, 3, 3]],
+//                              [[4, 4, 4],
+//                               [5, 5, 5],
+//                               [6, 6, 6]]]
 //
-// See @{tf.scatter_nd} for more details about how to make updates to slices.
+// # tensor 't' is [7]
+// # shape `[]` reshapes to a scalar
+// reshape(t, []) ==> 7
+// ```
 //
 // Arguments:
-//	input: A Tensor.
-//	indices: A Tensor. Must be one of the following types: `int32`, `int64`.
-// A tensor of indices into `input`.
-//	updates: A Tensor. Must have the same type as ref. A tensor of updated values
-// to add to `input`.
 //
-// Returns A `Tensor` with the same shape as `input`, containing values of `input`
-// updated with `updates`.
-func ScatterNdNonAliasingAdd(scope *Scope, input tf.Output, indices tf.Output, updates tf.Output) (output tf.Output) {
+//	shape: Defines the shape of the output tensor.
+func Reshape(scope *Scope, tensor tf.Output, shape tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "ScatterNdNonAliasingAdd",
+		Type: "Reshape",
 		Input: []tf.Input{
-			input, indices, updates,
+			tensor, shape,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// QuantizedReluXAttr is an optional argument to QuantizedReluX.
-type QuantizedReluXAttr func(optionalAttr)
-
-// QuantizedReluXOutType sets the optional out_type attribute to value.
-// If not specified, defaults to DT_QUINT8
-func QuantizedReluXOutType(value tf.DataType) QuantizedReluXAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)`
-//
-// Arguments:
-//
-//
-//	min_features: The float value that the lowest quantized value represents.
-//	max_features: The float value that the highest quantized value represents.
-//
-// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents.
-func QuantizedReluX(scope *Scope, features tf.Output, max_value tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedReluXAttr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) {
+// Creates a dataset that splits a SparseTensor into elements row-wise.
+func SparseTensorSliceDataset(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "QuantizedReluX",
+		Type: "SparseTensorSliceDataset",
 		Input: []tf.Input{
-			features, max_value, min_features, max_features,
+			indices, values, dense_shape,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// UnpackAttr is an optional argument to Unpack.
-type UnpackAttr func(optionalAttr)
-
-// UnpackAxis sets the optional axis attribute to value.
-//
-// value: Dimension along which to unpack.  Negative values wrap around, so the
-// valid range is `[-R, R)`.
-// If not specified, defaults to 0
-func UnpackAxis(value int64) UnpackAttr {
-	return func(m optionalAttr) {
-		m["axis"] = value
-	}
+	return op.Output(0)
 }
 
-// Unpacks a given dimension of a rank-`R` tensor into `num` rank-`(R-1)` tensors.
-//
-// Unpacks `num` tensors from `value` by chipping it along the `axis` dimension.
-// For example, given a tensor of shape `(A, B, C, D)`;
-//
-// If `axis == 0` then the i'th tensor in `output` is the slice `value[i, :, :, :]`
-//   and each tensor in `output` will have shape `(B, C, D)`. (Note that the
-//   dimension unpacked along is gone, unlike `split`).
-//
-// If `axis == 1` then the i'th tensor in `output` is the slice `value[:, i, :, :]`
-//   and each tensor in `output` will have shape `(A, C, D)`.
-// Etc.
-//
-// This is the opposite of `pack`.
-//
-// Arguments:
-//	value: 1-D or higher, with `axis` dimension size equal to `num`.
-//
-//
-// Returns The list of tensors unpacked from `value`.
-func Unpack(scope *Scope, value tf.Output, num int64, optional ...UnpackAttr) (output []tf.Output) {
+// Creates a dataset that concatenates `input_dataset` with `another_dataset`.
+func ConcatenateDataset(scope *Scope, input_dataset tf.Output, another_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num": num}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "Unpack",
+		Type: "ConcatenateDataset",
 		Input: []tf.Input{
-			value,
+			input_dataset, another_dataset,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
-		scope.UpdateErr("Unpack", err)
-		return
-	}
-	return output
+	return op.Output(0)
 }
 
-// Split a `SparseTensor` into `num_split` tensors along one dimension.
-//
-// If the `shape[split_dim]` is not an integer multiple of `num_split`. Slices
-// `[0 : shape[split_dim] % num_split]` gets one extra dimension.
-// For example, if `split_dim = 1` and `num_split = 2` and the input is
-//
-//     input_tensor = shape = [2, 7]
-//     [    a   d e  ]
-//     [b c          ]
-//
-// Graphically the output tensors are:
-//
-//     output_tensor[0] = shape = [2, 4]
-//     [    a  ]
-//     [b c    ]
-//
-//     output_tensor[1] = shape = [2, 3]
-//     [ d e  ]
-//     [      ]
-//
-// Arguments:
-//	split_dim: 0-D.  The dimension along which to split.  Must be in the range
-// `[0, rank(shape))`.
-//	indices: 2-D tensor represents the indices of the sparse tensor.
-//	values: 1-D tensor represents the values of the sparse tensor.
-//	shape: 1-D. tensor represents the shape of the sparse tensor.
-// output indices: A list of 1-D tensors represents the indices of the output
-// sparse tensors.
-//	num_split: The number of ways to split.
-//
-// Returns A list of 1-D tensors represents the values of the output sparse
-// tensors.A list of 1-D tensors represents the shape of the output sparse
-// tensors.
-func SparseSplit(scope *Scope, split_dim tf.Output, indices tf.Output, values tf.Output, shape tf.Output, num_split int64) (output_indices []tf.Output, output_values []tf.Output, output_shape []tf.Output) {
+// Creates a dataset that contains the elements of `input_dataset` ignoring errors.
+func IgnoreErrorsDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_split": num_split}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "SparseSplit",
+		Type: "IgnoreErrorsDataset",
 		Input: []tf.Input{
-			split_dim, indices, values, shape,
+			input_dataset,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if output_indices, idx, err = makeOutputList(op, idx, "output_indices"); err != nil {
-		scope.UpdateErr("SparseSplit", err)
-		return
-	}
-	if output_values, idx, err = makeOutputList(op, idx, "output_values"); err != nil {
-		scope.UpdateErr("SparseSplit", err)
-		return
-	}
-	if output_shape, idx, err = makeOutputList(op, idx, "output_shape"); err != nil {
-		scope.UpdateErr("SparseSplit", err)
-		return
-	}
-	return output_indices, output_values, output_shape
+	return op.Output(0)
 }
 
-// ReduceJoinAttr is an optional argument to ReduceJoin.
-type ReduceJoinAttr func(optionalAttr)
-
-// ReduceJoinKeepDims sets the optional keep_dims attribute to value.
-//
-// value: If `True`, retain reduced dimensions with length `1`.
-// If not specified, defaults to false
-func ReduceJoinKeepDims(value bool) ReduceJoinAttr {
-	return func(m optionalAttr) {
-		m["keep_dims"] = value
-	}
-}
+// CropAndResizeGradImageAttr is an optional argument to CropAndResizeGradImage.
+type CropAndResizeGradImageAttr func(optionalAttr)
 
-// ReduceJoinSeparator sets the optional separator attribute to value.
+// CropAndResizeGradImageMethod sets the optional method attribute to value.
 //
-// value: The separator to use when joining.
-// If not specified, defaults to ""
-func ReduceJoinSeparator(value string) ReduceJoinAttr {
+// value: A string specifying the interpolation method. Only 'bilinear' is
+// supported for now.
+// If not specified, defaults to "bilinear"
+func CropAndResizeGradImageMethod(value string) CropAndResizeGradImageAttr {
 	return func(m optionalAttr) {
-		m["separator"] = value
+		m["method"] = value
 	}
 }
 
-// Joins a string Tensor across the given dimensions.
-//
-// Computes the string join across dimensions in the given string Tensor of shape
-// `[d_0, d_1, ..., d_n-1]`.  Returns a new Tensor created by joining the input
-// strings with the given separator (default: empty string).  Negative indices are
-// counted backwards from the end, with `-1` being equivalent to `n - 1`.
-//
-// For example:
-//
-// ```python
-// # tensor `a` is [["a", "b"], ["c", "d"]]
-// tf.reduce_join(a, 0) ==> ["ac", "bd"]
-// tf.reduce_join(a, 1) ==> ["ab", "cd"]
-// tf.reduce_join(a, -2) = tf.reduce_join(a, 0) ==> ["ac", "bd"]
-// tf.reduce_join(a, -1) = tf.reduce_join(a, 1) ==> ["ab", "cd"]
-// tf.reduce_join(a, 0, keep_dims=True) ==> [["ac", "bd"]]
-// tf.reduce_join(a, 1, keep_dims=True) ==> [["ab"], ["cd"]]
-// tf.reduce_join(a, 0, separator=".") ==> ["a.c", "b.d"]
-// tf.reduce_join(a, [0, 1]) ==> ["acbd"]
-// tf.reduce_join(a, [1, 0]) ==> ["abcd"]
-// tf.reduce_join(a, []) ==> ["abcd"]
-// ```
+// Computes the gradient of the crop_and_resize op wrt the input image tensor.
 //
 // Arguments:
-//	inputs: The input to be joined.  All reduced indices must have non-zero size.
-//	reduction_indices: The dimensions to reduce over.  Dimensions are reduced in the
-// order specified.  Omitting `reduction_indices` is equivalent to passing
-// `[n-1, n-2, ..., 0]`.  Negative indices from `-n` to `-1` are supported.
+//	grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
+//	boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
+// specifies the coordinates of a box in the `box_ind[i]` image and is specified
+// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
+// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
+// `[0, 1]` interval of normalized image height is mapped to
+// `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in
+// which case the sampled crop is an up-down flipped version of the original
+// image. The width dimension is treated similarly. Normalized coordinates
+// outside the `[0, 1]` range are allowed, in which case we use
+// `extrapolation_value` to extrapolate the input image values.
+//	box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
+// The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
+//	image_size: A 1-D tensor with value `[batch, image_height, image_width, depth]`
+// containing the original image size. Both `image_height` and `image_width` need
+// to be positive.
 //
-// Returns Has shape equal to that of the input with reduced dimensions removed or
-// set to `1` depending on `keep_dims`.
-func ReduceJoin(scope *Scope, inputs tf.Output, reduction_indices tf.Output, optional ...ReduceJoinAttr) (output tf.Output) {
+//
+// Returns A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
+func CropAndResizeGradImage(scope *Scope, grads tf.Output, boxes tf.Output, box_ind tf.Output, image_size tf.Output, T tf.DataType, optional ...CropAndResizeGradImageAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"T": T}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ReduceJoin",
+		Type: "CropAndResizeGradImage",
 		Input: []tf.Input{
-			inputs, reduction_indices,
+			grads, boxes, box_ind, image_size,
 		},
 		Attrs: attrs,
 	}
@@ -19999,257 +19972,271 @@ func ReduceJoin(scope *Scope, inputs tf.Output, reduction_indices tf.Output, opt
 	return op.Output(0)
 }
 
-// Computes element-wise population count (a.k.a. popcount, bitsum, bitcount).
-//
-// For each entry in `x`, calculates the number of `1` (on) bits in the binary
-// representation of that entry.
-//
-// **NOTE**: It is more efficient to first `tf.bitcast` your tensors into
-// `int32` or `int64` and perform the bitcount on the result, than to feed in
-// 8- or 16-bit inputs and then aggregate the resulting counts.
-func PopulationCount(scope *Scope, x tf.Output) (y tf.Output) {
+// Reads and outputs the entire contents of the input filename.
+func ReadFile(scope *Scope, filename tf.Output) (contents tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "PopulationCount",
+		Type: "ReadFile",
 		Input: []tf.Input{
-			x,
+			filename,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// AssertAttr is an optional argument to Assert.
-type AssertAttr func(optionalAttr)
-
-// AssertSummarize sets the optional summarize attribute to value.
+// Concatenates tensors along one dimension.
 //
-// value: Print this many entries of each tensor.
-// If not specified, defaults to 3
-func AssertSummarize(value int64) AssertAttr {
-	return func(m optionalAttr) {
-		m["summarize"] = value
+// Arguments:
+//	values: List of `N` Tensors to concatenate. Their ranks and types must match,
+// and their sizes must match in all dimensions except `concat_dim`.
+//	axis: 0-D.  The dimension along which to concatenate.  Must be in the
+// range [-rank(values), rank(values)).
+//
+// Returns A `Tensor` with the concatenation of values stacked along the
+// `concat_dim` dimension.  This tensor's shape matches that of `values` except
+// in `concat_dim` where it has the sum of the sizes.
+func ConcatV2(scope *Scope, values []tf.Output, axis tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ConcatV2",
+		Input: []tf.Input{
+			tf.OutputList(values), axis,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Asserts that the given condition is true.
+// Adds a value to the current value of a variable.
 //
-// If `condition` evaluates to false, print the list of tensors in `data`.
-// `summarize` determines how many entries of the tensors to print.
+// Any ReadVariableOp which depends directly or indirectly on this assign is
+// guaranteed to see the incremented value or a subsequent newer one.
+//
+// Outputs the incremented value, which can be used to totally order the
+// increments to this variable.
 //
 // Arguments:
-//	condition: The condition to evaluate.
-//	data: The tensors to print out when condition is false.
+//	resource: handle to the resource in which to store the variable.
+//	value: the value by which the variable will be incremented.
 //
 // Returns the created operation.
-func Assert(scope *Scope, condition tf.Output, data []tf.Output, optional ...AssertAttr) (o *tf.Operation) {
+func AssignAddVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "Assert",
+		Type: "AssignAddVariableOp",
 		Input: []tf.Input{
-			condition, tf.OutputList(data),
+			resource, value,
 		},
-		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
-// RandomUniformAttr is an optional argument to RandomUniform.
-type RandomUniformAttr func(optionalAttr)
-
-// RandomUniformSeed sets the optional seed attribute to value.
-//
-// value: If either `seed` or `seed2` are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func RandomUniformSeed(value int64) RandomUniformAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
+// Records the latency of producing `input_dataset` elements in a StatsAggregator.
+func LatencyStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
-}
-
-// RandomUniformSeed2 sets the optional seed2 attribute to value.
-//
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func RandomUniformSeed2(value int64) RandomUniformAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "LatencyStatsDataset",
+		Input: []tf.Input{
+			input_dataset, tag,
+		},
+		Attrs: attrs,
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Outputs random values from a uniform distribution.
+// Convert JSON-encoded Example records to binary protocol buffer strings.
 //
-// The generated values follow a uniform distribution in the range `[0, 1)`. The
-// lower bound 0 is included in the range, while the upper bound 1 is excluded.
+// This op translates a tensor containing Example records, encoded using
+// the [standard JSON
+// mapping](https://developers.google.com/protocol-buffers/docs/proto3#json),
+// into a tensor containing the same records encoded as binary protocol
+// buffers. The resulting tensor can then be fed to any of the other
+// Example-parsing ops.
 //
 // Arguments:
-//	shape: The shape of the output tensor.
-//	dtype: The type of the output.
+//	json_examples: Each string is a JSON object serialized according to the JSON
+// mapping of the Example proto.
 //
-// Returns A tensor of the specified shape filled with uniform random values.
-func RandomUniform(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomUniformAttr) (output tf.Output) {
+// Returns Each string is a binary Example protocol buffer corresponding
+// to the respective element of `json_examples`.
+func DecodeJSONExample(scope *Scope, json_examples tf.Output) (binary_examples tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "RandomUniform",
+		Type: "DecodeJSONExample",
 		Input: []tf.Input{
-			shape,
+			json_examples,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// ResourceApplyFtrlAttr is an optional argument to ResourceApplyFtrl.
-type ResourceApplyFtrlAttr func(optionalAttr)
-
-// ResourceApplyFtrlUseLocking sets the optional use_locking attribute to value.
+// Computes the grayscale dilation of 4-D `input` and 3-D `filter` tensors.
 //
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyFtrlUseLocking(value bool) ResourceApplyFtrlAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
+// The `input` tensor has shape `[batch, in_height, in_width, depth]` and the
+// `filter` tensor has shape `[filter_height, filter_width, depth]`, i.e., each
+// input channel is processed independently of the others with its own structuring
+// function. The `output` tensor has shape
+// `[batch, out_height, out_width, depth]`. The spatial dimensions of the output
+// tensor depend on the `padding` algorithm. We currently only support the default
+// "NHWC" `data_format`.
+//
+// In detail, the grayscale morphological 2-D dilation is the max-sum correlation
+// (for consistency with `conv2d`, we use unmirrored filters):
+//
+//     output[b, y, x, c] =
+//        max_{dy, dx} input[b,
+//                           strides[1] * y + rates[1] * dy,
+//                           strides[2] * x + rates[2] * dx,
+//                           c] +
+//                     filter[dy, dx, c]
+//
+// Max-pooling is a special case when the filter has size equal to the pooling
+// kernel size and contains all zeros.
+//
+// Note on duality: The dilation of `input` by the `filter` is equal to the
+// negation of the erosion of `-input` by the reflected `filter`.
+//
+// Arguments:
+//	input: 4-D with shape `[batch, in_height, in_width, depth]`.
+//	filter: 3-D with shape `[filter_height, filter_width, depth]`.
+//	strides: The stride of the sliding window for each dimension of the input
+// tensor. Must be: `[1, stride_height, stride_width, 1]`.
+//	rates: The input stride for atrous morphological dilation. Must be:
+// `[1, rate_height, rate_width, 1]`.
+//	padding: The type of padding algorithm to use.
+//
+// Returns 4-D with shape `[batch, out_height, out_width, depth]`.
+func Dilation2D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, rates []int64, padding string) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding}
+	opspec := tf.OpSpec{
+		Type: "Dilation2D",
+		Input: []tf.Input{
+			input, filter,
+		},
+		Attrs: attrs,
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Update '*var' according to the Ftrl-proximal scheme.
-//
-// accum_new = accum + grad * grad
-// linear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-// accum = accum_new
+// Converts the given variant tensor to an iterator and stores it in the given resource.
 //
 // Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	linear: Should be from a Variable().
-//	grad: The gradient.
-//	lr: Scaling factor. Must be a scalar.
-//	l1: L1 regulariation. Must be a scalar.
-//	l2: L2 regulariation. Must be a scalar.
-//	lr_power: Scaling factor. Must be a scalar.
+//	resource_handle: A handle to an iterator resource.
+//	serialized: A variant tensor storing the state of the iterator contained in the
+// resource.
 //
 // Returns the created operation.
-func ResourceApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlAttr) (o *tf.Operation) {
+func DeserializeIterator(scope *Scope, resource_handle tf.Output, serialized tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyFtrl",
+		Type: "DeserializeIterator",
 		Input: []tf.Input{
-			var_, accum, linear, grad, lr, l1, l2, lr_power,
+			resource_handle, serialized,
 		},
-		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
-// AnyAttr is an optional argument to Any.
-type AnyAttr func(optionalAttr)
+// TensorArrayConcatV2Attr is an optional argument to TensorArrayConcatV2.
+type TensorArrayConcatV2Attr func(optionalAttr)
 
-// AnyKeepDims sets the optional keep_dims attribute to value.
-//
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func AnyKeepDims(value bool) AnyAttr {
+// TensorArrayConcatV2ElementShapeExcept0 sets the optional element_shape_except0 attribute to value.
+// If not specified, defaults to <unknown_rank:true >
+func TensorArrayConcatV2ElementShapeExcept0(value tf.Shape) TensorArrayConcatV2Attr {
 	return func(m optionalAttr) {
-		m["keep_dims"] = value
+		m["element_shape_except0"] = value
 	}
 }
 
-// Computes the "logical or" of elements across dimensions of a tensor.
-//
-// Reduces `input` along the dimensions given in `reduction_indices`. Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `reduction_indices`. If `keep_dims` is true, the reduced dimensions are
-// retained with length 1.
-//
-// Arguments:
-//	input: The tensor to reduce.
-//	reduction_indices: The dimensions to reduce. Must be in the range
-// `[-rank(input), rank(input))`.
-//
-// Returns The reduced tensor.
-func Any(scope *Scope, input tf.Output, reduction_indices tf.Output, optional ...AnyAttr) (output tf.Output) {
+// Deprecated. Use TensorArrayConcatV3
+func TensorArrayConcatV2(scope *Scope, handle tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayConcatV2Attr) (value tf.Output, lengths tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"dtype": dtype}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Any",
+		Type: "TensorArrayConcatV2",
 		Input: []tf.Input{
-			input, reduction_indices,
+			handle, flow_in,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1)
 }
 
-// Compute the Hurwitz zeta function \\(\zeta(x, q)\\).
+// Creates a dataset that batches and pads `batch_size` elements from the input.
 //
-// The Hurwitz zeta function is defined as:
+// Arguments:
 //
+//	batch_size: A scalar representing the number of elements to accumulate in a
+// batch.
+//	padded_shapes: A list of int64 tensors representing the desired padded shapes
+// of the corresponding output components. These shapes may be partially
+// specified, using `-1` to indicate that a particular dimension should be
+// padded to the maximum size of all batch elements.
+//	padding_values: A list of scalars containing the padding value to use for
+// each of the outputs.
 //
-// \\(\zeta(x, q) = \sum_{n=0}^{\infty} (q + n)^{-x}\\)
-func Zeta(scope *Scope, x tf.Output, q tf.Output) (z tf.Output) {
+func PaddedBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, padded_shapes []tf.Output, padding_values []tf.Output, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "Zeta",
+		Type: "PaddedBatchDataset",
 		Input: []tf.Input{
-			x, q,
+			input_dataset, batch_size, tf.OutputList(padded_shapes), tf.OutputList(padding_values),
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Creates a dataset that skips `count` elements from the `input_dataset`.
+// Creates a dataset that batches input elements into a SparseTensor.
 //
 // Arguments:
-//
-//	count: A scalar representing the number of elements from the `input_dataset`
-// that should be skipped.  If count is -1, skips everything.
+//	input_dataset: A handle to an input dataset. Must have a single component.
+//	batch_size: A scalar representing the number of elements to accumulate in a
+// batch.
+//	row_shape: A vector representing the dense shape of each row in the produced
+// SparseTensor. The shape may be partially specified, using `-1` to indicate
+// that a particular dimension should use the maximum size of all batch elements.
 //
 //
-func SkipDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+func DenseToSparseBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, row_shape tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "SkipDataset",
+		Type: "DenseToSparseBatchDataset",
 		Input: []tf.Input{
-			input_dataset, count,
+			input_dataset, batch_size, row_shape,
 		},
 		Attrs: attrs,
 	}
@@ -20257,31 +20244,51 @@ func SkipDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_
 	return op.Output(0)
 }
 
-// ImagAttr is an optional argument to Imag.
-type ImagAttr func(optionalAttr)
+// Deprecated. Use TensorArrayGradV3
+func TensorArrayGradV2(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"source": source}
+	opspec := tf.OpSpec{
+		Type: "TensorArrayGradV2",
+		Input: []tf.Input{
+			handle, flow_in,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
 
-// ImagTout sets the optional Tout attribute to value.
-// If not specified, defaults to DT_FLOAT
-func ImagTout(value tf.DataType) ImagAttr {
+// ResourceSparseApplyAdadeltaAttr is an optional argument to ResourceSparseApplyAdadelta.
+type ResourceSparseApplyAdadeltaAttr func(optionalAttr)
+
+// ResourceSparseApplyAdadeltaUseLocking sets the optional use_locking attribute to value.
+//
+// value: If True, updating of the var and accum tensors will be protected by
+// a lock; otherwise the behavior is undefined, but may exhibit less contention.
+// If not specified, defaults to false
+func ResourceSparseApplyAdadeltaUseLocking(value bool) ResourceSparseApplyAdadeltaAttr {
 	return func(m optionalAttr) {
-		m["Tout"] = value
+		m["use_locking"] = value
 	}
 }
 
-// Returns the imaginary part of a complex number.
+// var: Should be from a Variable().
 //
-// Given a tensor `input` of complex numbers, this operation returns a tensor of
-// type `float` that is the imaginary part of each element in `input`. All
-// elements in `input` must be complex numbers of the form \\(a + bj\\), where *a*
-// is the real part and *b* is the imaginary part returned by this operation.
+// Arguments:
 //
-// For example:
+//	accum: Should be from a Variable().
+//	accum_update: : Should be from a Variable().
+//	lr: Learning rate. Must be a scalar.
+//	rho: Decay factor. Must be a scalar.
+//	epsilon: Constant factor. Must be a scalar.
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
 //
-// ```
-// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
-// tf.imag(input) ==> [4.75, 5.75]
-// ```
-func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output) {
+// Returns the created operation.
+func ResourceSparseApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output, accum_update tf.Output, lr tf.Output, rho tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdadeltaAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -20290,564 +20297,495 @@ func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Imag",
+		Type: "ResourceSparseApplyAdadelta",
 		Input: []tf.Input{
-			input,
+			var_, accum, accum_update, lr, rho, epsilon, grad, indices,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// ComplexAttr is an optional argument to Complex.
-type ComplexAttr func(optionalAttr)
-
-// ComplexTout sets the optional Tout attribute to value.
-// If not specified, defaults to DT_COMPLEX64
-func ComplexTout(value tf.DataType) ComplexAttr {
-	return func(m optionalAttr) {
-		m["Tout"] = value
+// Identity op for gradient debugging.
+//
+// This op is hidden from public in Python. It is used by TensorFlow Debugger to
+// register gradient tensors for gradient debugging.
+// This op operates on non-reference-type tensors.
+func DebugGradientIdentity(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "DebugGradientIdentity",
+		Input: []tf.Input{
+			input,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Converts two real numbers to a complex number.
+// Return substrings from `Tensor` of strings.
 //
-// Given a tensor `real` representing the real part of a complex number, and a
-// tensor `imag` representing the imaginary part of a complex number, this
-// operation returns complex numbers elementwise of the form \\(a + bj\\), where
-// *a* represents the `real` part and *b* represents the `imag` part.
+// For each string in the input `Tensor`, creates a substring starting at index
+// `pos` with a total length of `len`.
 //
-// The input tensors `real` and `imag` must have the same shape.
+// If `len` defines a substring that would extend beyond the length of the input
+// string, then as many characters as possible are used.
 //
-// For example:
+// If `pos` is negative or specifies a character index larger than any of the input
+// strings, then an `InvalidArgumentError` is thrown.
+//
+// `pos` and `len` must have the same shape, otherwise a `ValueError` is thrown on
+// Op creation.
+//
+// *NOTE*: `Substr` supports broadcasting up to two dimensions. More about
+// broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+//
+// ---
+//
+// Examples
+//
+// Using scalar `pos` and `len`:
+//
+// ```python
+// input = [b'Hello', b'World']
+// position = 1
+// length = 3
+//
+// output = [b'ell', b'orl']
+// ```
+//
+// Using `pos` and `len` with same shape as `input`:
+//
+// ```python
+// input = [[b'ten', b'eleven', b'twelve'],
+//          [b'thirteen', b'fourteen', b'fifteen'],
+//          [b'sixteen', b'seventeen', b'eighteen']]
+// position = [[1, 2, 3],
+//             [1, 2, 3],
+//             [1, 2, 3]]
+// length =   [[2, 3, 4],
+//             [4, 3, 2],
+//             [5, 5, 5]]
+//
+// output = [[b'en', b'eve', b'lve'],
+//           [b'hirt', b'urt', b'te'],
+//           [b'ixtee', b'vente', b'hteen']]
+// ```
+//
+// Broadcasting `pos` and `len` onto `input`:
+//
+// ```
+// input = [[b'ten', b'eleven', b'twelve'],
+//          [b'thirteen', b'fourteen', b'fifteen'],
+//          [b'sixteen', b'seventeen', b'eighteen'],
+//          [b'nineteen', b'twenty', b'twentyone']]
+// position = [1, 2, 3]
+// length =   [1, 2, 3]
+//
+// output = [[b'e', b'ev', b'lve'],
+//           [b'h', b'ur', b'tee'],
+//           [b'i', b've', b'hte'],
+//           [b'i', b'en', b'nty']]
+// ```
+//
+// Broadcasting `input` onto `pos` and `len`:
 //
 // ```
-// # tensor 'real' is [2.25, 3.25]
-// # tensor `imag` is [4.75, 5.75]
-// tf.complex(real, imag) ==> [[2.25 + 4.75j], [3.25 + 5.75j]]
+// input = b'thirteen'
+// position = [1, 5, 7]
+// length =   [3, 2, 1]
+//
+// output = [b'hir', b'ee', b'n']
 // ```
-func Complex(scope *Scope, real tf.Output, imag tf.Output, optional ...ComplexAttr) (out tf.Output) {
+//
+// Arguments:
+//	input: Tensor of strings
+//	pos: Scalar defining the position of first character in each substring
+//	len: Scalar defining the number of characters to include in each substring
+//
+// Returns Tensor of substrings
+func Substr(scope *Scope, input tf.Output, pos tf.Output, len tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "Complex",
+		Type: "Substr",
 		Input: []tf.Input{
-			real, imag,
+			input, pos, len,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Inverse real-valued fast Fourier transform.
-//
-// Computes the inverse 1-dimensional discrete Fourier transform of a real-valued
-// signal over the inner-most dimension of `input`.
-//
-// The inner-most dimension of `input` is assumed to be the result of `RFFT`: the
-// `fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If
-// `fft_length` is not provided, it is computed from the size of the inner-most
-// dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to
-// compute `input` is odd, it should be provided since it cannot be inferred
-// properly.
-//
-// Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller
-// than the corresponding dimension of `input`, the dimension is cropped. If it is
-// larger, the dimension is padded with zeros.
+// Creates a Dataset that returns pseudorandom numbers.
 //
 // Arguments:
-//	input: A complex64 tensor.
-//	fft_length: An int32 tensor of shape [1]. The FFT length.
+//	seed: A scalar seed for the random number generator. If either seed or
+// seed2 is set to be non-zero, the random number generator is seeded
+// by the given seed.  Otherwise, a random seed is used.
+//	seed2: A second scalar seed to avoid seed collision.
 //
-// Returns A float32 tensor of the same rank as `input`. The inner-most
-//   dimension of `input` is replaced with the `fft_length` samples of its inverse
-//   1D Fourier transform.
 //
-// @compatibility(numpy)
-// Equivalent to np.fft.irfft
-// @end_compatibility
-func IRFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
+func RandomDataset(scope *Scope, seed tf.Output, seed2 tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "IRFFT",
+		Type: "RandomDataset",
 		Input: []tf.Input{
-			input, fft_length,
+			seed, seed2,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Adds a value to the current value of a variable.
+// Creates a dataset that shuffles and repeats elements from `input_dataset`
 //
-// Any ReadVariableOp which depends directly or indirectly on this assign is
-// guaranteed to see the incremented value or a subsequent newer one.
-//
-// Outputs the incremented value, which can be used to totally order the
-// increments to this variable.
+// pseudorandomly.
 //
 // Arguments:
-//	resource: handle to the resource in which to store the variable.
-//	value: the value by which the variable will be incremented.
 //
-// Returns the created operation.
-func AssignAddVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "AssignAddVariableOp",
-		Input: []tf.Input{
-			resource, value,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Computes inverse hyperbolic sine of x element-wise.
-func Asinh(scope *Scope, x tf.Output) (y tf.Output) {
+//	buffer_size: The number of output elements to buffer in an iterator over
+// this dataset. Compare with the `min_after_dequeue` attr when creating a
+// `RandomShuffleQueue`.
+//	seed: A scalar seed for the random number generator. If either `seed` or
+// `seed2` is set to be non-zero, the random number generator is seeded
+// by the given seed.  Otherwise, a random seed is used.
+//	seed2: A second scalar seed to avoid seed collision.
+//	count: A scalar representing the number of times the underlying dataset
+// should be repeated. The default is `-1`, which results in infinite repetition.
+//
+//
+func ShuffleAndRepeatDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, seed tf.Output, seed2 tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "Asinh",
+		Type: "ShuffleAndRepeatDataset",
 		Input: []tf.Input{
-			x,
+			input_dataset, buffer_size, seed, seed2, count,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Real-valued fast Fourier transform.
-//
-// Computes the 1-dimensional discrete Fourier transform of a real-valued signal
-// over the inner-most dimension of `input`.
-//
-// Since the DFT of a real signal is Hermitian-symmetric, `RFFT` only returns the
-// `fft_length / 2 + 1` unique components of the FFT: the zero-frequency term,
-// followed by the `fft_length / 2` positive-frequency terms.
+// Creates a dataset that caches elements from `input_dataset`.
 //
-// Along the axis `RFFT` is computed on, if `fft_length` is smaller than the
-// corresponding dimension of `input`, the dimension is cropped. If it is larger,
-// the dimension is padded with zeros.
+// A CacheDataset will iterate over the input_dataset, and store tensors. If the
+// cache already exists, the cache will be used. If the cache is inappropriate
+// (e.g. cannot be opened, contains tensors of the wrong shape / size), an error
+// will the returned when used.
 //
 // Arguments:
-//	input: A float32 tensor.
-//	fft_length: An int32 tensor of shape [1]. The FFT length.
 //
-// Returns A complex64 tensor of the same rank as `input`. The inner-most
-//   dimension of `input` is replaced with the `fft_length / 2 + 1` unique
-//   frequency components of its 1D Fourier transform.
+//	filename: A path on the filesystem where we should cache the dataset. Note: this
+// will be a directory.
 //
-// @compatibility(numpy)
-// Equivalent to np.fft.rfft
-// @end_compatibility
-func RFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
+//
+func CacheDataset(scope *Scope, input_dataset tf.Output, filename tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "RFFT",
+		Type: "CacheDataset",
 		Input: []tf.Input{
-			input, fft_length,
+			input_dataset, filename,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// OrderedMapStageAttr is an optional argument to OrderedMapStage.
-type OrderedMapStageAttr func(optionalAttr)
-
-// OrderedMapStageCapacity sets the optional capacity attribute to value.
-//
-// value: Maximum number of elements in the Staging Area. If > 0, inserts
-// on the container will block when the capacity is reached.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func OrderedMapStageCapacity(value int64) OrderedMapStageAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// OrderedMapStageMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func OrderedMapStageMemoryLimit(value int64) OrderedMapStageAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// OrderedMapStageContainer sets the optional container attribute to value.
-//
-// value: If non-empty, this queue is placed in the given container. Otherwise,
-// a default container is used.
-// If not specified, defaults to ""
-func OrderedMapStageContainer(value string) OrderedMapStageAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
+// PlaceholderAttr is an optional argument to Placeholder.
+type PlaceholderAttr func(optionalAttr)
 
-// OrderedMapStageSharedName sets the optional shared_name attribute to value.
+// PlaceholderShape sets the optional shape attribute to value.
 //
-// value: It is necessary to match this name to the matching Unstage Op.
-// If not specified, defaults to ""
-func OrderedMapStageSharedName(value string) OrderedMapStageAttr {
+// value: (Optional) The shape of the tensor. If the shape has 0 dimensions, the
+// shape is unconstrained.
+// If not specified, defaults to <unknown_rank:true >
+func PlaceholderShape(value tf.Shape) PlaceholderAttr {
 	return func(m optionalAttr) {
-		m["shared_name"] = value
+		m["shape"] = value
 	}
 }
 
-// Stage (key, values) in the underlying container which behaves like a ordered
+// A placeholder op for a value that will be fed into the computation.
 //
-// associative container.   Elements are ordered by key.
+// N.B. This operation will fail with an error if it is executed. It is
+// intended as a way to represent a value that will always be fed, and to
+// provide attrs that enable the fed value to be checked at runtime.
 //
 // Arguments:
-//	key: int64
-//
-//	values: a list of tensors
-// dtypes A list of data types that inserted values should adhere to.
-//
+//	dtype: The type of elements in the tensor.
 //
-// Returns the created operation.
-func OrderedMapStage(scope *Scope, key tf.Output, indices tf.Output, values []tf.Output, dtypes []tf.DataType, optional ...OrderedMapStageAttr) (o *tf.Operation) {
+// Returns A placeholder tensor that must be replaced using the feed mechanism.
+func Placeholder(scope *Scope, dtype tf.DataType, optional ...PlaceholderAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
+	attrs := map[string]interface{}{"dtype": dtype}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "OrderedMapStage",
-		Input: []tf.Input{
-			key, indices, tf.OutputList(values),
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
+		Type: "Placeholder",
 
-// Computes the gradient for the tanh of `x` wrt its input.
-//
-// Specifically, `grad = dy * (1 - y*y)`, where `y = tanh(x)`, and `dy`
-// is the corresponding input gradient.
-func TanhGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TanhGrad",
-		Input: []tf.Input{
-			y, dy,
-		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Outputs all keys and values in the table.
+// Creates a dataset that executes a SQL query and emits rows of the result set.
 //
 // Arguments:
-//	table_handle: Handle to the table.
-//
+//	driver_name: The database type. Currently, the only supported type is 'sqlite'.
+//	data_source_name: A connection string to connect to the database.
+//	query: A SQL query to execute.
 //
 //
-// Returns Vector of all keys present in the table.Tensor of all values in the table. Indexed in parallel with `keys`.
-func LookupTableExportV2(scope *Scope, table_handle tf.Output, Tkeys tf.DataType, Tvalues tf.DataType) (keys tf.Output, values tf.Output) {
+func SqlDataset(scope *Scope, driver_name tf.Output, data_source_name tf.Output, query tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"Tkeys": Tkeys, "Tvalues": Tvalues}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "LookupTableExportV2",
+		Type: "SqlDataset",
 		Input: []tf.Input{
-			table_handle,
+			driver_name, data_source_name, query,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
-// Converts each string in the input Tensor to its hash mod by a number of buckets.
-//
-// The hash function is deterministic on the content of the string within the
-// process and will never change. However, it is not suitable for cryptography.
-// This function may be used when CPU time is scarce and inputs are trusted or
-// unimportant. There is a risk of adversaries constructing inputs that all hash
-// to the same bucket. To prevent this problem, use a strong hash function with
-// `tf.string_to_hash_bucket_strong`.
+// Creates a dataset that emits the records from one or more binary files.
 //
 // Arguments:
-//	input: The strings to assign a hash bucket.
-//	num_buckets: The number of buckets.
-//
-// Returns A Tensor of the same shape as the input `string_tensor`.
-func StringToHashBucketFast(scope *Scope, input tf.Output, num_buckets int64) (output tf.Output) {
+//	filenames: A scalar or a vector containing the name(s) of the file(s) to be
+// read.
+//	header_bytes: A scalar representing the number of bytes to skip at the
+// beginning of a file.
+//	record_bytes: A scalar representing the number of bytes in each record.
+//	footer_bytes: A scalar representing the number of bytes to skip at the end
+// of a file.
+//	buffer_size: A scalar representing the number of bytes to buffer. Must be > 0.
+func FixedLengthRecordDataset(scope *Scope, filenames tf.Output, header_bytes tf.Output, record_bytes tf.Output, footer_bytes tf.Output, buffer_size tf.Output) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_buckets": num_buckets}
 	opspec := tf.OpSpec{
-		Type: "StringToHashBucketFast",
+		Type: "FixedLengthRecordDataset",
 		Input: []tf.Input{
-			input,
+			filenames, header_bytes, record_bytes, footer_bytes, buffer_size,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// TensorArrayGatherV3Attr is an optional argument to TensorArrayGatherV3.
-type TensorArrayGatherV3Attr func(optionalAttr)
-
-// TensorArrayGatherV3ElementShape sets the optional element_shape attribute to value.
+// Slice a `SparseTensor` based on the `start` and `size`.
 //
-// value: The expected shape of an element, if known. Used to
-// validate the shapes of TensorArray elements. If this shape is not
-// fully specified, gathering zero-size TensorArrays is an error.
-// If not specified, defaults to <unknown_rank:true >
-func TensorArrayGatherV3ElementShape(value tf.Shape) TensorArrayGatherV3Attr {
-	return func(m optionalAttr) {
-		m["element_shape"] = value
-	}
-}
-
-// Gather specific elements from the TensorArray into output `value`.
+// For example, if the input is
 //
-// All elements selected by `indices` must have the same shape.
+//     input_tensor = shape = [2, 7]
+//     [    a   d e  ]
+//     [b c          ]
+//
+// Graphically the output tensors are:
+//
+//     sparse_slice([0, 0], [2, 4]) = shape = [2, 4]
+//     [    a  ]
+//     [b c    ]
+//
+//     sparse_slice([0, 4], [2, 3]) = shape = [2, 3]
+//     [ d e  ]
+//     [      ]
 //
 // Arguments:
-//	handle: The handle to a TensorArray.
-//	indices: The locations in the TensorArray from which to read tensor elements.
-//	flow_in: A float scalar that enforces proper chaining of operations.
-//	dtype: The type of the elem that is returned.
+//	indices: 2-D tensor represents the indices of the sparse tensor.
+//	values: 1-D tensor represents the values of the sparse tensor.
+//	shape: 1-D. tensor represents the shape of the sparse tensor.
+//	start: 1-D. tensor represents the start of the slice.
+//	size: 1-D. tensor represents the size of the slice.
+// output indices: A list of 1-D tensors represents the indices of the output
+// sparse tensors.
 //
-// Returns All of the elements in the TensorArray, concatenated along a new
-// axis (the new dimension 0).
-func TensorArrayGatherV3(scope *Scope, handle tf.Output, indices tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayGatherV3Attr) (value tf.Output) {
+// Returns A list of 1-D tensors represents the values of the output sparse
+// tensors.A list of 1-D tensors represents the shape of the output sparse
+// tensors.
+func SparseSlice(scope *Scope, indices tf.Output, values tf.Output, shape tf.Output, start tf.Output, size tf.Output) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "TensorArrayGatherV3",
+		Type: "SparseSlice",
 		Input: []tf.Input{
-			handle, indices, flow_in,
+			indices, values, shape, start, size,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Deprecated. Disallowed in GraphDef version >= 2.
+// Concatenates quantized tensors along one dimension.
 //
-// DEPRECATED at GraphDef version 2: Use AdjustContrastv2 instead
-func AdjustContrast(scope *Scope, images tf.Output, contrast_factor tf.Output, min_value tf.Output, max_value tf.Output) (output tf.Output) {
+// Arguments:
+//	concat_dim: 0-D.  The dimension along which to concatenate.  Must be in the
+// range [0, rank(values)).
+//	values: The `N` Tensors to concatenate. Their ranks and types must match,
+// and their sizes must match in all dimensions except `concat_dim`.
+//	input_mins: The minimum scalar values for each of the input tensors.
+//	input_maxes: The maximum scalar values for each of the input tensors.
+//
+// Returns A `Tensor` with the concatenation of values stacked along the
+// `concat_dim` dimension.  This tensor's shape matches that of `values` except
+// in `concat_dim` where it has the sum of the sizes.The float value that the minimum quantized output value represents.The float value that the maximum quantized output value represents.
+func QuantizedConcat(scope *Scope, concat_dim tf.Output, values []tf.Output, input_mins []tf.Output, input_maxes []tf.Output) (output tf.Output, output_min tf.Output, output_max tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "AdjustContrast",
+		Type: "QuantizedConcat",
 		Input: []tf.Input{
-			images, contrast_factor, min_value, max_value,
+			concat_dim, tf.OutputList(values), tf.OutputList(input_mins), tf.OutputList(input_maxes),
 		},
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// MaxPoolGradGradAttr is an optional argument to MaxPoolGradGrad.
-type MaxPoolGradGradAttr func(optionalAttr)
-
-// MaxPoolGradGradDataFormat sets the optional data_format attribute to value.
+// Gradients for batch normalization.
 //
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func MaxPoolGradGradDataFormat(value string) MaxPoolGradGradAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Computes second-order gradients of the maxpooling function.
+// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization()
+//
+// This op is deprecated. See `tf.nn.batch_normalization`.
 //
 // Arguments:
-//	orig_input: The original input tensor.
-//	orig_output: The original output tensor.
-//	grad: 4-D.  Gradients of gradients w.r.t. the input of `max_pool`.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
-//	padding: The type of padding algorithm to use.
+//	t: A 4D input Tensor.
+//	m: A 1D mean Tensor with size matching the last dimension of t.
+// This is the first output from tf.nn.moments,
+// or a saved moving average thereof.
+//	v: A 1D variance Tensor with size matching the last dimension of t.
+// This is the second output from tf.nn.moments,
+// or a saved moving average thereof.
+//	gamma: A 1D gamma Tensor with size matching the last dimension of t.
+// If "scale_after_normalization" is true, this Tensor will be multiplied
+// with the normalized Tensor.
+//	backprop: 4D backprop Tensor.
+//	variance_epsilon: A small float number to avoid dividing by 0.
+//	scale_after_normalization: A bool indicating whether the resulted tensor
+// needs to be multiplied with gamma.
 //
-// Returns Gradients of gradients w.r.t. the input to `max_pool`.
-func MaxPoolGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradGradAttr) (output tf.Output) {
+// Returns 4D backprop tensor for input.1D backprop tensor for mean.1D backprop tensor for variance.1D backprop tensor for beta.1D backprop tensor for gamma.
+func BatchNormWithGlobalNormalizationGrad(scope *Scope, t tf.Output, m tf.Output, v tf.Output, gamma tf.Output, backprop tf.Output, variance_epsilon float32, scale_after_normalization bool) (dx tf.Output, dm tf.Output, dv tf.Output, db tf.Output, dg tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization}
 	opspec := tf.OpSpec{
-		Type: "MaxPoolGradGrad",
+		Type: "BatchNormWithGlobalNormalizationGrad",
 		Input: []tf.Input{
-			orig_input, orig_output, grad,
+			t, m, v, gamma, backprop,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
 }
 
-// 3D real-valued fast Fourier transform.
-//
-// Computes the 3-dimensional discrete Fourier transform of a real-valued signal
-// over the inner-most 3 dimensions of `input`.
-//
-// Since the DFT of a real signal is Hermitian-symmetric, `RFFT3D` only returns the
-// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension
-// of `output`: the zero-frequency term, followed by the `fft_length / 2`
-// positive-frequency terms.
-//
-// Along each axis `RFFT3D` is computed on, if `fft_length` is smaller than the
-// corresponding dimension of `input`, the dimension is cropped. If it is larger,
-// the dimension is padded with zeros.
+// Creates a dataset that emits the records from one or more TFRecord files.
 //
 // Arguments:
-//	input: A float32 tensor.
-//	fft_length: An int32 tensor of shape [3]. The FFT length for each dimension.
-//
-// Returns A complex64 tensor of the same rank as `input`. The inner-most 3
-//   dimensions of `input` are replaced with the their 3D Fourier transform. The
-//   inner-most dimension contains `fft_length / 2 + 1` unique frequency
-//   components.
-//
-// @compatibility(numpy)
-// Equivalent to np.fft.rfftn with 3 dimensions.
-// @end_compatibility
-func RFFT3D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
+//	filenames: A scalar or vector containing the name(s) of the file(s) to be
+// read.
+//	compression_type: A scalar containing either (i) the empty string (no
+// compression), (ii) "ZLIB", or (iii) "GZIP".
+//	buffer_size: A scalar representing the number of bytes to buffer. A value of
+// 0 means no buffering will be performed.
+func TFRecordDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "RFFT3D",
+		Type: "TFRecordDataset",
 		Input: []tf.Input{
-			input, fft_length,
+			filenames, compression_type, buffer_size,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// QuantizeAndDequantizeV3Attr is an optional argument to QuantizeAndDequantizeV3.
-type QuantizeAndDequantizeV3Attr func(optionalAttr)
+// FakeQuantWithMinMaxArgsGradientAttr is an optional argument to FakeQuantWithMinMaxArgsGradient.
+type FakeQuantWithMinMaxArgsGradientAttr func(optionalAttr)
 
-// QuantizeAndDequantizeV3SignedInput sets the optional signed_input attribute to value.
-// If not specified, defaults to true
-func QuantizeAndDequantizeV3SignedInput(value bool) QuantizeAndDequantizeV3Attr {
+// FakeQuantWithMinMaxArgsGradientMin sets the optional min attribute to value.
+// If not specified, defaults to -6
+func FakeQuantWithMinMaxArgsGradientMin(value float32) FakeQuantWithMinMaxArgsGradientAttr {
 	return func(m optionalAttr) {
-		m["signed_input"] = value
+		m["min"] = value
 	}
 }
 
-// QuantizeAndDequantizeV3RangeGiven sets the optional range_given attribute to value.
-// If not specified, defaults to true
-func QuantizeAndDequantizeV3RangeGiven(value bool) QuantizeAndDequantizeV3Attr {
+// FakeQuantWithMinMaxArgsGradientMax sets the optional max attribute to value.
+// If not specified, defaults to 6
+func FakeQuantWithMinMaxArgsGradientMax(value float32) FakeQuantWithMinMaxArgsGradientAttr {
 	return func(m optionalAttr) {
-		m["range_given"] = value
+		m["max"] = value
 	}
 }
 
-// Quantizes then dequantizes a tensor.
-//
-// This is almost identical to QuantizeAndDequantizeV2, except that num_bits is a
-// tensor, so its value can change during training.
-func QuantizeAndDequantizeV3(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, num_bits tf.Output, optional ...QuantizeAndDequantizeV3Attr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizeAndDequantizeV3",
-		Input: []tf.Input{
-			input, input_min, input_max, num_bits,
-		},
-		Attrs: attrs,
+// FakeQuantWithMinMaxArgsGradientNumBits sets the optional num_bits attribute to value.
+// If not specified, defaults to 8
+func FakeQuantWithMinMaxArgsGradientNumBits(value int64) FakeQuantWithMinMaxArgsGradientAttr {
+	return func(m optionalAttr) {
+		m["num_bits"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// AvgPool3DAttr is an optional argument to AvgPool3D.
-type AvgPool3DAttr func(optionalAttr)
-
-// AvgPool3DDataFormat sets the optional data_format attribute to value.
-//
-// value: The data format of the input and output data. With the
-// default format "NDHWC", the data is stored in the order of:
-//     [batch, in_depth, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCDHW", the data storage order is:
-//     [batch, in_channels, in_depth, in_height, in_width].
-// If not specified, defaults to "NDHWC"
-func AvgPool3DDataFormat(value string) AvgPool3DAttr {
+// FakeQuantWithMinMaxArgsGradientNarrowRange sets the optional narrow_range attribute to value.
+// If not specified, defaults to false
+func FakeQuantWithMinMaxArgsGradientNarrowRange(value bool) FakeQuantWithMinMaxArgsGradientAttr {
 	return func(m optionalAttr) {
-		m["data_format"] = value
+		m["narrow_range"] = value
 	}
 }
 
-// Performs 3D average pooling on the input.
+// Compute gradients for a FakeQuantWithMinMaxArgs operation.
 //
 // Arguments:
-//	input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over.
-//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
-// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
+//	gradients: Backpropagated gradients above the FakeQuantWithMinMaxArgs operation.
+//	inputs: Values passed as inputs to the FakeQuantWithMinMaxArgs operation.
 //
-// Returns The average pooled output tensor.
-func AvgPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DAttr) (output tf.Output) {
+// Returns Backpropagated gradients below the FakeQuantWithMinMaxArgs operation:
+// `gradients * (inputs >= min && inputs <= max)`.
+func FakeQuantWithMinMaxArgsGradient(scope *Scope, gradients tf.Output, inputs tf.Output, optional ...FakeQuantWithMinMaxArgsGradientAttr) (backprops tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "AvgPool3D",
+		Type: "FakeQuantWithMinMaxArgsGradient",
 		Input: []tf.Input{
-			input,
+			gradients, inputs,
 		},
 		Attrs: attrs,
 	}
@@ -20855,76 +20793,104 @@ func AvgPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, pa
 	return op.Output(0)
 }
 
-// Produces the max pool of the input tensor for quantized types.
+// BatchToSpace for 4-D tensors of type T.
+//
+// This is a legacy version of the more general BatchToSpaceND.
+//
+// Rearranges (permutes) data from batch into blocks of spatial data, followed by
+// cropping. This is the reverse transformation of SpaceToBatch. More specifically,
+// this op outputs a copy of the input tensor where values from the `batch`
+// dimension are moved in spatial blocks to the `height` and `width` dimensions,
+// followed by cropping along the `height` and `width` dimensions.
 //
 // Arguments:
-//	input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over.
-//	min_input: The float value that the lowest quantized input value represents.
-//	max_input: The float value that the highest quantized input value represents.
-//	ksize: The size of the window for each dimension of the input tensor.
-// The length must be 4 to match the number of dimensions of the input.
-//	strides: The stride of the sliding window for each dimension of the input
-// tensor. The length must be 4 to match the number of dimensions of the input.
-//	padding: The type of padding algorithm to use.
+//	input: 4-D tensor with shape
+// `[batch*block_size*block_size, height_pad/block_size, width_pad/block_size,
+//   depth]`. Note that the batch size of the input tensor must be divisible by
+// `block_size * block_size`.
+//	crops: 2-D tensor of non-negative integers with shape `[2, 2]`. It specifies
+// how many elements to crop from the intermediate result across the spatial
+// dimensions as follows:
 //
-// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
-func QuantizedMaxPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	opspec := tf.OpSpec{
-		Type: "QuantizedMaxPool",
-		Input: []tf.Input{
-			input, min_input, max_input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// AvgPool3DGradAttr is an optional argument to AvgPool3DGrad.
-type AvgPool3DGradAttr func(optionalAttr)
-
-// AvgPool3DGradDataFormat sets the optional data_format attribute to value.
+//     crops = [[crop_top, crop_bottom], [crop_left, crop_right]]
 //
-// value: The data format of the input and output data. With the
-// default format "NDHWC", the data is stored in the order of:
-//     [batch, in_depth, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCDHW", the data storage order is:
-//     [batch, in_channels, in_depth, in_height, in_width].
-// If not specified, defaults to "NDHWC"
-func AvgPool3DGradDataFormat(value string) AvgPool3DGradAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Computes gradients of average pooling function.
 //
-// Arguments:
-//	orig_input_shape: The original input dimensions.
-//	grad: Output backprop of shape `[batch, depth, rows, cols, channels]`.
-//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
-// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
+// Returns 4-D with shape `[batch, height, width, depth]`, where:
 //
-// Returns The backprop for input.
-func AvgPool3DGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DGradAttr) (output tf.Output) {
+//       height = height_pad - crop_top - crop_bottom
+//       width = width_pad - crop_left - crop_right
+//
+// The attr `block_size` must be greater than one. It indicates the block size.
+//
+// Some examples:
+//
+// (1) For the following input of shape `[4, 1, 1, 1]` and block_size of 2:
+//
+// ```
+// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
+// ```
+//
+// The output tensor has shape `[1, 2, 2, 1]` and value:
+//
+// ```
+// x = [[[[1], [2]], [[3], [4]]]]
+// ```
+//
+// (2) For the following input of shape `[4, 1, 1, 3]` and block_size of 2:
+//
+// ```
+// [[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]
+// ```
+//
+// The output tensor has shape `[1, 2, 2, 3]` and value:
+//
+// ```
+// x = [[[[1, 2, 3], [4, 5, 6]],
+//       [[7, 8, 9], [10, 11, 12]]]]
+// ```
+//
+// (3) For the following input of shape `[4, 2, 2, 1]` and block_size of 2:
+//
+// ```
+// x = [[[[1], [3]], [[9], [11]]],
+//      [[[2], [4]], [[10], [12]]],
+//      [[[5], [7]], [[13], [15]]],
+//      [[[6], [8]], [[14], [16]]]]
+// ```
+//
+// The output tensor has shape `[1, 4, 4, 1]` and value:
+//
+// ```
+// x = [[[1],   [2],  [3],  [4]],
+//      [[5],   [6],  [7],  [8]],
+//      [[9],  [10], [11],  [12]],
+//      [[13], [14], [15],  [16]]]
+// ```
+//
+// (4) For the following input of shape `[8, 1, 2, 1]` and block_size of 2:
+//
+// ```
+// x = [[[[1], [3]]], [[[9], [11]]], [[[2], [4]]], [[[10], [12]]],
+//      [[[5], [7]]], [[[13], [15]]], [[[6], [8]]], [[[14], [16]]]]
+// ```
+//
+// The output tensor has shape `[2, 2, 4, 1]` and value:
+//
+// ```
+// x = [[[[1], [3]], [[5], [7]]],
+//      [[[2], [4]], [[10], [12]]],
+//      [[[5], [7]], [[13], [15]]],
+//      [[[6], [8]], [[14], [16]]]]
+// ```
+func BatchToSpace(scope *Scope, input tf.Output, crops tf.Output, block_size int64) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"block_size": block_size}
 	opspec := tf.OpSpec{
-		Type: "AvgPool3DGrad",
+		Type: "BatchToSpace",
 		Input: []tf.Input{
-			orig_input_shape, grad,
+			input, crops,
 		},
 		Attrs: attrs,
 	}
@@ -20932,187 +20898,150 @@ func AvgPool3DGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksi
 	return op.Output(0)
 }
 
-// Writes a `GraphDef` protocol buffer to a `SummaryWriter`.
+// Makes a new iterator from the given `dataset` and stores it in `iterator`.
 //
-// Arguments:
-//	writer: Handle of `SummaryWriter`.
-//	step: The step to write the summary for.
-//	tensor: A scalar string of the serialized tf.GraphDef proto.
+// This operation may be executed multiple times. Each execution will reset the
+// iterator in `iterator` to the first element of `dataset`.
 //
 // Returns the created operation.
-func WriteGraphSummary(scope *Scope, writer tf.Output, step tf.Output, tensor tf.Output) (o *tf.Operation) {
+func MakeIterator(scope *Scope, dataset tf.Output, iterator tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "WriteGraphSummary",
+		Type: "MakeIterator",
 		Input: []tf.Input{
-			writer, step, tensor,
+			dataset, iterator,
 		},
 	}
 	return scope.AddOperation(opspec)
 }
 
-// MaxPool3DGradGradAttr is an optional argument to MaxPool3DGradGrad.
-type MaxPool3DGradGradAttr func(optionalAttr)
-
-// MaxPool3DGradGradDataFormat sets the optional data_format attribute to value.
+// Adjust the contrast of one or more images.
 //
-// value: The data format of the input and output data. With the
-// default format "NDHWC", the data is stored in the order of:
-//     [batch, in_depth, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCDHW", the data storage order is:
-//     [batch, in_channels, in_depth, in_height, in_width].
-// If not specified, defaults to "NDHWC"
-func MaxPool3DGradGradDataFormat(value string) MaxPool3DGradGradAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Computes second-order gradients of the maxpooling function.
+// `images` is a tensor of at least 3 dimensions.  The last 3 dimensions are
+// interpreted as `[height, width, channels]`.  The other dimensions only
+// represent a collection of images, such as `[batch, height, width, channels].`
+//
+// Contrast is adjusted independently for each channel of each image.
+//
+// For each channel, the Op first computes the mean of the image pixels in the
+// channel and then adjusts each component of each pixel to
+// `(x - mean) * contrast_factor + mean`.
 //
 // Arguments:
-//	orig_input: The original input tensor.
-//	orig_output: The original output tensor.
-//	grad: Output backprop of shape `[batch, depth, rows, cols, channels]`.
-//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
-// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
+//	images: Images to adjust.  At least 3-D.
+//	contrast_factor: A float multiplier for adjusting contrast.
 //
-// Returns Gradients of gradients w.r.t. the input to `max_pool`.
-func MaxPool3DGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DGradGradAttr) (output tf.Output) {
+// Returns The contrast-adjusted image or images.
+func AdjustContrastv2(scope *Scope, images tf.Output, contrast_factor tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "MaxPool3DGradGrad",
+		Type: "AdjustContrastv2",
 		Input: []tf.Input{
-			orig_input, orig_output, grad,
+			images, contrast_factor,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// FakeQuantWithMinMaxArgsGradientAttr is an optional argument to FakeQuantWithMinMaxArgsGradient.
-type FakeQuantWithMinMaxArgsGradientAttr func(optionalAttr)
-
-// FakeQuantWithMinMaxArgsGradientMin sets the optional min attribute to value.
-// If not specified, defaults to -6
-func FakeQuantWithMinMaxArgsGradientMin(value float32) FakeQuantWithMinMaxArgsGradientAttr {
-	return func(m optionalAttr) {
-		m["min"] = value
-	}
-}
-
-// FakeQuantWithMinMaxArgsGradientMax sets the optional max attribute to value.
-// If not specified, defaults to 6
-func FakeQuantWithMinMaxArgsGradientMax(value float32) FakeQuantWithMinMaxArgsGradientAttr {
-	return func(m optionalAttr) {
-		m["max"] = value
+// Gets the next output from the given iterator.
+func IteratorGetNext(scope *Scope, iterator tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
-}
-
-// FakeQuantWithMinMaxArgsGradientNumBits sets the optional num_bits attribute to value.
-// If not specified, defaults to 8
-func FakeQuantWithMinMaxArgsGradientNumBits(value int64) FakeQuantWithMinMaxArgsGradientAttr {
-	return func(m optionalAttr) {
-		m["num_bits"] = value
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "IteratorGetNext",
+		Input: []tf.Input{
+			iterator,
+		},
+		Attrs: attrs,
 	}
-}
-
-// FakeQuantWithMinMaxArgsGradientNarrowRange sets the optional narrow_range attribute to value.
-// If not specified, defaults to false
-func FakeQuantWithMinMaxArgsGradientNarrowRange(value bool) FakeQuantWithMinMaxArgsGradientAttr {
-	return func(m optionalAttr) {
-		m["narrow_range"] = value
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
+		scope.UpdateErr("IteratorGetNext", err)
+		return
 	}
+	return components
 }
 
-// Compute gradients for a FakeQuantWithMinMaxArgs operation.
+// Outputs the single element from the given dataset.
 //
 // Arguments:
-//	gradients: Backpropagated gradients above the FakeQuantWithMinMaxArgs operation.
-//	inputs: Values passed as inputs to the FakeQuantWithMinMaxArgs operation.
+//	dataset: A handle to a dataset that contains a single element.
 //
-// Returns Backpropagated gradients below the FakeQuantWithMinMaxArgs operation:
-// `gradients * (inputs >= min && inputs <= max)`.
-func FakeQuantWithMinMaxArgsGradient(scope *Scope, gradients tf.Output, inputs tf.Output, optional ...FakeQuantWithMinMaxArgsGradientAttr) (backprops tf.Output) {
+//
+//
+// Returns The components of the single element of `input`.
+func DatasetToSingleElement(scope *Scope, dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "FakeQuantWithMinMaxArgsGradient",
+		Type: "DatasetToSingleElement",
 		Input: []tf.Input{
-			gradients, inputs,
+			dataset,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
+		scope.UpdateErr("DatasetToSingleElement", err)
+		return
+	}
+	return components
 }
 
-// Computes gradients of the maxpooling function.
+// Converts the given `resource_handle` representing an iterator to a string.
 //
 // Arguments:
-//	input: The original input.
-//	grad: 4-D with shape `[batch, height, width, channels]`.  Gradients w.r.t. the
-// output of `max_pool`.
-//	argmax: The indices of the maximum values chosen for each output of `max_pool`.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
-//	padding: The type of padding algorithm to use.
+//	resource_handle: A handle to an iterator resource.
 //
-// Returns Gradients w.r.t. the input of `max_pool`.
-func MaxPoolGradWithArgmax(scope *Scope, input tf.Output, grad tf.Output, argmax tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output) {
+// Returns A string representation of the given handle.
+func IteratorToStringHandle(scope *Scope, resource_handle tf.Output) (string_handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
 	opspec := tf.OpSpec{
-		Type: "MaxPoolGradWithArgmax",
+		Type: "IteratorToStringHandle",
 		Input: []tf.Input{
-			input, grad, argmax,
+			resource_handle,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// StringToNumberAttr is an optional argument to StringToNumber.
-type StringToNumberAttr func(optionalAttr)
+// ShapeNAttr is an optional argument to ShapeN.
+type ShapeNAttr func(optionalAttr)
 
-// StringToNumberOutType sets the optional out_type attribute to value.
-//
-// value: The numeric type to interpret each string in `string_tensor` as.
-// If not specified, defaults to DT_FLOAT
-func StringToNumberOutType(value tf.DataType) StringToNumberAttr {
+// ShapeNOutType sets the optional out_type attribute to value.
+// If not specified, defaults to DT_INT32
+func ShapeNOutType(value tf.DataType) ShapeNAttr {
 	return func(m optionalAttr) {
 		m["out_type"] = value
 	}
 }
 
-// Converts each string in the input Tensor to the specified numeric type.
-//
-// (Note that int32 overflow results in an error while float overflow
-// results in a rounded value.)
+// Returns shape of tensors.
 //
-// Returns A Tensor of the same shape as the input `string_tensor`.
-func StringToNumber(scope *Scope, string_tensor tf.Output, optional ...StringToNumberAttr) (output tf.Output) {
+// This operation returns N 1-D integer tensors representing shape of `input[i]s`.
+func ShapeN(scope *Scope, input []tf.Output, optional ...ShapeNAttr) (output []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -21121,83 +21050,61 @@ func StringToNumber(scope *Scope, string_tensor tf.Output, optional ...StringToN
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "StringToNumber",
+		Type: "ShapeN",
 		Input: []tf.Input{
-			string_tensor,
+			tf.OutputList(input),
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the truth value of NOT x element-wise.
-func LogicalNot(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	opspec := tf.OpSpec{
-		Type: "LogicalNot",
-		Input: []tf.Input{
-			x,
-		},
+	var idx int
+	var err error
+	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
+		scope.UpdateErr("ShapeN", err)
+		return
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return output
 }
 
-// LRNGradAttr is an optional argument to LRNGrad.
-type LRNGradAttr func(optionalAttr)
+// IteratorFromStringHandleAttr is an optional argument to IteratorFromStringHandle.
+type IteratorFromStringHandleAttr func(optionalAttr)
 
-// LRNGradDepthRadius sets the optional depth_radius attribute to value.
+// IteratorFromStringHandleOutputTypes sets the optional output_types attribute to value.
 //
-// value: A depth radius.
-// If not specified, defaults to 5
-func LRNGradDepthRadius(value int64) LRNGradAttr {
-	return func(m optionalAttr) {
-		m["depth_radius"] = value
-	}
-}
-
-// LRNGradBias sets the optional bias attribute to value.
+// value: If specified, defines the type of each tuple component in an
+// element produced by the resulting iterator.
+// If not specified, defaults to <>
 //
-// value: An offset (usually > 0 to avoid dividing by 0).
-// If not specified, defaults to 1
-func LRNGradBias(value float32) LRNGradAttr {
+// REQUIRES: len(value) >= 0
+func IteratorFromStringHandleOutputTypes(value []tf.DataType) IteratorFromStringHandleAttr {
 	return func(m optionalAttr) {
-		m["bias"] = value
+		m["output_types"] = value
 	}
 }
 
-// LRNGradAlpha sets the optional alpha attribute to value.
+// IteratorFromStringHandleOutputShapes sets the optional output_shapes attribute to value.
 //
-// value: A scale factor, usually positive.
-// If not specified, defaults to 1
-func LRNGradAlpha(value float32) LRNGradAttr {
-	return func(m optionalAttr) {
-		m["alpha"] = value
-	}
-}
-
-// LRNGradBeta sets the optional beta attribute to value.
+// value: If specified, defines the shape of each tuple component in an
+// element produced by the resulting iterator.
+// If not specified, defaults to <>
 //
-// value: An exponent.
-// If not specified, defaults to 0.5
-func LRNGradBeta(value float32) LRNGradAttr {
+// REQUIRES: len(value) >= 0
+func IteratorFromStringHandleOutputShapes(value []tf.Shape) IteratorFromStringHandleAttr {
 	return func(m optionalAttr) {
-		m["beta"] = value
+		m["output_shapes"] = value
 	}
 }
 
-// Gradients for Local Response Normalization.
+// Converts the given string representing a handle to an iterator to a resource.
 //
 // Arguments:
-//	input_grads: 4-D with shape `[batch, height, width, channels]`.
-//	input_image: 4-D with shape `[batch, height, width, channels]`.
-//	output_image: 4-D with shape `[batch, height, width, channels]`.
+//	string_handle: A string representation of the given handle.
 //
-// Returns The gradients for LRN.
-func LRNGrad(scope *Scope, input_grads tf.Output, input_image tf.Output, output_image tf.Output, optional ...LRNGradAttr) (output tf.Output) {
+// Returns A handle to an iterator resource.
+func IteratorFromStringHandle(scope *Scope, string_handle tf.Output, optional ...IteratorFromStringHandleAttr) (resource_handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -21206,9 +21113,9 @@ func LRNGrad(scope *Scope, input_grads tf.Output, input_image tf.Output, output_
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "LRNGrad",
+		Type: "IteratorFromStringHandle",
 		Input: []tf.Input{
-			input_grads, input_image, output_image,
+			string_handle,
 		},
 		Attrs: attrs,
 	}
@@ -21216,183 +21123,251 @@ func LRNGrad(scope *Scope, input_grads tf.Output, input_image tf.Output, output_
 	return op.Output(0)
 }
 
-// EncodePngAttr is an optional argument to EncodePng.
-type EncodePngAttr func(optionalAttr)
+// Computes arctangent of `y/x` element-wise, respecting signs of the arguments.
+//
+// This is the angle \( \theta \in [-\pi, \pi] \) such that
+// \[ x = r \cos(\theta) \]
+// and
+// \[ y = r \sin(\theta) \]
+// where \(r = \sqrt(x^2 + y^2) \).
+func Atan2(scope *Scope, y tf.Output, x tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Atan2",
+		Input: []tf.Input{
+			y, x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
 
-// EncodePngCompression sets the optional compression attribute to value.
+// Return a tensor with the same shape and contents as the input tensor or value.
+func Identity(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Identity",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Gather slices from `params` axis `axis` according to `indices`.
 //
-// value: Compression level.
-// If not specified, defaults to -1
-func EncodePngCompression(value int64) EncodePngAttr {
-	return func(m optionalAttr) {
-		m["compression"] = value
+// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
+// Produces an output tensor with shape `params.shape[:axis] + indices.shape +
+// params.shape[axis + 1:]` where:
+//
+// ```python
+//     # Scalar indices (output is rank(params) - 1).
+//     output[a_0, ..., a_n, b_0, ..., b_n] =
+//       params[a_0, ..., a_n, indices, b_0, ..., b_n]
+//
+//     # Vector indices (output is rank(params)).
+//     output[a_0, ..., a_n, i, b_0, ..., b_n] =
+//       params[a_0, ..., a_n, indices[i], b_0, ..., b_n]
+//
+//     # Higher rank indices (output is rank(params) + rank(indices) - 1).
+//     output[a_0, ..., a_n, i, ..., j, b_0, ... b_n] =
+//       params[a_0, ..., a_n, indices[i, ..., j], b_0, ..., b_n]
+// ```
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/Gather.png" alt>
+// </div>
+//
+// Arguments:
+//	params: The tensor from which to gather values. Must be at least rank
+// `axis + 1`.
+//	indices: Index tensor. Must be in range `[0, params.shape[axis])`.
+//	axis: The axis in `params` to gather `indices` from. Defaults to the first
+// dimension. Supports negative indexes.
+//
+// Returns Values from `params` gathered from indices given by `indices`, with
+// shape `params.shape[:axis] + indices.shape + params.shape[axis + 1:]`.
+func GatherV2(scope *Scope, params tf.Output, indices tf.Output, axis tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "GatherV2",
+		Input: []tf.Input{
+			params, indices, axis,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// PNG-encode an image.
-//
-// `image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]`
-// where `channels` is:
-//
-// *   1: for grayscale.
-// *   2: for grayscale + alpha.
-// *   3: for RGB.
-// *   4: for RGBA.
-//
-// The ZLIB compression level, `compression`, can be -1 for the PNG-encoder
-// default or a value from 0 to 9.  9 is the highest compression level, generating
-// the smallest output, but is slower.
+// Converts the given `resource_handle` representing an iterator to a variant tensor.
 //
 // Arguments:
-//	image: 3-D with shape `[height, width, channels]`.
+//	resource_handle: A handle to an iterator resource.
 //
-// Returns 0-D. PNG-encoded image.
-func EncodePng(scope *Scope, image tf.Output, optional ...EncodePngAttr) (contents tf.Output) {
+// Returns A variant tensor storing the state of the iterator contained in the
+// resource.
+func SerializeIterator(scope *Scope, resource_handle tf.Output) (serialized tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "EncodePng",
+		Type: "SerializeIterator",
 		Input: []tf.Input{
-			image,
+			resource_handle,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// MaxPoolAttr is an optional argument to MaxPool.
-type MaxPoolAttr func(optionalAttr)
+// FIFOQueueV2Attr is an optional argument to FIFOQueueV2.
+type FIFOQueueV2Attr func(optionalAttr)
 
-// MaxPoolDataFormat sets the optional data_format attribute to value.
+// FIFOQueueV2Shapes sets the optional shapes attribute to value.
 //
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func MaxPoolDataFormat(value string) MaxPoolAttr {
+// value: The shape of each component in a value. The length of this attr must
+// be either 0 or the same as the length of component_types. If the length of
+// this attr is 0, the shapes of queue elements are not constrained, and
+// only one element may be dequeued at a time.
+// If not specified, defaults to <>
+//
+// REQUIRES: len(value) >= 0
+func FIFOQueueV2Shapes(value []tf.Shape) FIFOQueueV2Attr {
 	return func(m optionalAttr) {
-		m["data_format"] = value
+		m["shapes"] = value
 	}
 }
 
-// Performs max pooling on the input.
+// FIFOQueueV2Capacity sets the optional capacity attribute to value.
+//
+// value: The upper bound on the number of elements in this queue.
+// Negative numbers mean no limit.
+// If not specified, defaults to -1
+func FIFOQueueV2Capacity(value int64) FIFOQueueV2Attr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// FIFOQueueV2Container sets the optional container attribute to value.
+//
+// value: If non-empty, this queue is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func FIFOQueueV2Container(value string) FIFOQueueV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// FIFOQueueV2SharedName sets the optional shared_name attribute to value.
+//
+// value: If non-empty, this queue will be shared under the given name
+// across multiple sessions.
+// If not specified, defaults to ""
+func FIFOQueueV2SharedName(value string) FIFOQueueV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// A queue that produces elements in first-in first-out order.
 //
 // Arguments:
-//	input: 4-D input to pool over.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
-//	padding: The type of padding algorithm to use.
+//	component_types: The type of each component in a value.
 //
-// Returns The max pooled output tensor.
-func MaxPool(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolAttr) (output tf.Output) {
+// Returns The handle to the queue.
+func FIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...FIFOQueueV2Attr) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	attrs := map[string]interface{}{"component_types": component_types}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "MaxPool",
-		Input: []tf.Input{
-			input,
-		},
+		Type: "FIFOQueueV2",
+
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Fast Fourier transform.
-//
-// Computes the 1-dimensional discrete Fourier transform over the inner-most
-// dimension of `input`.
-//
-// Arguments:
-//	input: A complex64 tensor.
-//
-// Returns A complex64 tensor of the same shape as `input`. The inner-most
-//   dimension of `input` is replaced with its 1D Fourier transform.
-//
-// @compatibility(numpy)
-// Equivalent to np.fft.fft
-// @end_compatibility
-func FFT(scope *Scope, input tf.Output) (output tf.Output) {
+// Produces a summary of any statistics recorded by the given statistics manager.
+func StatsAggregatorSummary(scope *Scope, iterator tf.Output) (summary tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "FFT",
+		Type: "StatsAggregatorSummary",
 		Input: []tf.Input{
-			input,
+			iterator,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// MaxPoolWithArgmaxAttr is an optional argument to MaxPoolWithArgmax.
-type MaxPoolWithArgmaxAttr func(optionalAttr)
-
-// MaxPoolWithArgmaxTargmax sets the optional Targmax attribute to value.
-// If not specified, defaults to DT_INT64
-func MaxPoolWithArgmaxTargmax(value tf.DataType) MaxPoolWithArgmaxAttr {
-	return func(m optionalAttr) {
-		m["Targmax"] = value
-	}
-}
-
-// Performs max pooling on the input and outputs both max values and indices.
-//
-// The indices in `argmax` are flattened, so that a maximum value at position
-// `[b, y, x, c]` becomes flattened index
-// `((b * height + y) * width + x) * channels + c`.
+// Performs a padding as a preprocess during a convolution.
 //
-// The indices returned are always in `[0, height) x [0, width)` before flattening,
-// even if padding is involved and the mathematically correct answer is outside
-// (either negative or too large).  This is a bug, but fixing it is difficult to do
-// in a safe backwards compatible way, especially due to flattening.
+// Similar to FusedResizeAndPadConv2d, this op allows for an optimized
+// implementation where the spatial padding transformation stage is fused with the
+// im2col lookup, but in this case without the bilinear filtering required for
+// resizing. Fusing the padding prevents the need to write out the intermediate
+// results as whole tensors, reducing memory pressure, and we can get some latency
+// gains by merging the transformation calculations.
+// The data_format attribute for Conv2D isn't supported by this op, and 'NHWC'
+// order is used instead.
+// Internally this op uses a single per-graph scratch buffer, which means that it
+// will block if multiple versions are being run in parallel. This is because this
+// operator is primarily an optimization to minimize memory usage.
 //
 // Arguments:
-//	input: 4-D with shape `[batch, height, width, channels]`.  Input to pool over.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
-//	padding: The type of padding algorithm to use.
+//	input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
+//	paddings: A two-column matrix specifying the padding sizes. The number of
+// rows must be the same as the rank of `input`.
+//	filter: 4-D with shape
+// `[filter_height, filter_width, in_channels, out_channels]`.
 //
-// Returns The max pooled output tensor.4-D.  The flattened indices of the max values chosen for each output.
-func MaxPoolWithArgmax(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolWithArgmaxAttr) (output tf.Output, argmax tf.Output) {
+//	strides: 1-D of length 4.  The stride of the sliding window for each dimension
+// of `input`. Must be in the same order as the dimension specified with format.
+//	padding: The type of padding algorithm to use.
+func FusedPadConv2D(scope *Scope, input tf.Output, paddings tf.Output, filter tf.Output, mode string, strides []int64, padding string) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"mode": mode, "strides": strides, "padding": padding}
 	opspec := tf.OpSpec{
-		Type: "MaxPoolWithArgmax",
+		Type: "FusedPadConv2D",
 		Input: []tf.Input{
-			input,
+			input, paddings, filter,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
-// MaxPoolGradGradV2Attr is an optional argument to MaxPoolGradGradV2.
-type MaxPoolGradGradV2Attr func(optionalAttr)
+// Conv2DBackpropInputAttr is an optional argument to Conv2DBackpropInput.
+type Conv2DBackpropInputAttr func(optionalAttr)
 
-// MaxPoolGradGradV2DataFormat sets the optional data_format attribute to value.
+// Conv2DBackpropInputUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value.
+// If not specified, defaults to true
+func Conv2DBackpropInputUseCudnnOnGpu(value bool) Conv2DBackpropInputAttr {
+	return func(m optionalAttr) {
+		m["use_cudnn_on_gpu"] = value
+	}
+}
+
+// Conv2DBackpropInputDataFormat sets the optional data_format attribute to value.
 //
 // value: Specify the data format of the input and output data. With the
 // default format "NHWC", the data is stored in the order of:
@@ -21400,65 +21375,54 @@ type MaxPoolGradGradV2Attr func(optionalAttr)
 // Alternatively, the format could be "NCHW", the data storage order of:
 //     [batch, in_channels, in_height, in_width].
 // If not specified, defaults to "NHWC"
-func MaxPoolGradGradV2DataFormat(value string) MaxPoolGradGradV2Attr {
+func Conv2DBackpropInputDataFormat(value string) Conv2DBackpropInputAttr {
 	return func(m optionalAttr) {
 		m["data_format"] = value
 	}
 }
 
-// Computes second-order gradients of the maxpooling function.
-//
-// Arguments:
-//	orig_input: The original input tensor.
-//	orig_output: The original output tensor.
-//	grad: 4-D.  Gradients of gradients w.r.t. the input of `max_pool`.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
-//	padding: The type of padding algorithm to use.
+// Conv2DBackpropInputDilations sets the optional dilations attribute to value.
 //
-// Returns Gradients of gradients w.r.t. the input to `max_pool`.
-func MaxPoolGradGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradGradV2Attr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MaxPoolGradGradV2",
-		Input: []tf.Input{
-			orig_input, orig_output, grad, ksize, strides,
-		},
-		Attrs: attrs,
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+// element on that dimension. The dimension order is determined by the value of
+// `data_format`, see above for details. Dilations in the batch and depth
+// dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
+func Conv2DBackpropInputDilations(value []int64) Conv2DBackpropInputAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Computes second-order gradients of the maxpooling function.
+// Computes the gradients of convolution with respect to the input.
 //
 // Arguments:
-//	input: The original input.
-//	grad: 4-D with shape `[batch, height, width, channels]`.  Gradients w.r.t. the
-// input of `max_pool`.
-//	argmax: The indices of the maximum values chosen for each output of `max_pool`.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
+//	input_sizes: An integer vector representing the shape of `input`,
+// where `input` is a 4-D `[batch, height, width, channels]` tensor.
+//	filter: 4-D with shape
+// `[filter_height, filter_width, in_channels, out_channels]`.
+//	out_backprop: 4-D with shape `[batch, out_height, out_width, out_channels]`.
+// Gradients w.r.t. the output of the convolution.
+//	strides: The stride of the sliding window for each dimension of the input
+// of the convolution. Must be in the same order as the dimension specified with
+// format.
 //	padding: The type of padding algorithm to use.
 //
-// Returns Gradients of gradients w.r.t. the input of `max_pool`.
-func MaxPoolGradGradWithArgmax(scope *Scope, input tf.Output, grad tf.Output, argmax tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output) {
+// Returns 4-D with shape `[batch, in_height, in_width, in_channels]`.  Gradient
+// w.r.t. the input of the convolution.
+func Conv2DBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv2DBackpropInputAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "MaxPoolGradGradWithArgmax",
+		Type: "Conv2DBackpropInput",
 		Input: []tf.Input{
-			input, grad, argmax,
+			input_sizes, filter, out_backprop,
 		},
 		Attrs: attrs,
 	}
@@ -21466,141 +21430,126 @@ func MaxPoolGradGradWithArgmax(scope *Scope, input tf.Output, grad tf.Output, ar
 	return op.Output(0)
 }
 
-// Compute the polygamma function \\(\psi^{(n)}(x)\\).
+// Interleave the values from the `data` tensors into a single tensor.
 //
-// The polygamma function is defined as:
+// Builds a merged tensor such that
+//
+// ```python
+//     merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...]
+// ```
 //
+// For example, if each `indices[m]` is scalar or vector, we have
 //
-// \\(\psi^{(n)}(x) = \frac{d^n}{dx^n} \psi(x)\\)
+// ```python
+//     # Scalar indices:
+//     merged[indices[m], ...] = data[m][...]
 //
-// where \\(\psi(x)\\) is the digamma function.
-func Polygamma(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) {
+//     # Vector indices:
+//     merged[indices[m][i], ...] = data[m][i, ...]
+// ```
+//
+// Each `data[i].shape` must start with the corresponding `indices[i].shape`,
+// and the rest of `data[i].shape` must be constant w.r.t. `i`.  That is, we
+// must have `data[i].shape = indices[i].shape + constant`.  In terms of this
+// `constant`, the output shape is
+//
+//     merged.shape = [max(indices)] + constant
+//
+// Values are merged in order, so if an index appears in both `indices[m][i]` and
+// `indices[n][j]` for `(m,i) < (n,j)` the slice `data[n][j]` will appear in the
+// merged result. If you do not need this guarantee, ParallelDynamicStitch might
+// perform better on some devices.
+//
+// For example:
+//
+// ```python
+//     indices[0] = 6
+//     indices[1] = [4, 1]
+//     indices[2] = [[5, 2], [0, 3]]
+//     data[0] = [61, 62]
+//     data[1] = [[41, 42], [11, 12]]
+//     data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]]
+//     merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42],
+//               [51, 52], [61, 62]]
+// ```
+//
+// This method can be used to merge partitions created by `dynamic_partition`
+// as illustrated on the following example:
+//
+// ```python
+//     # Apply function (increments x_i) on elements for which a certain condition
+//     # apply (x_i != -1 in this example).
+//     x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4])
+//     condition_mask=tf.not_equal(x,tf.constant(-1.))
+//     partitioned_data = tf.dynamic_partition(
+//         x, tf.cast(condition_mask, tf.int32) , 2)
+//     partitioned_data[1] = partitioned_data[1] + 1.0
+//     condition_indices = tf.dynamic_partition(
+//         tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2)
+//     x = tf.dynamic_stitch(condition_indices, partitioned_data)
+//     # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain
+//     # unchanged.
+// ```
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/DynamicStitch.png" alt>
+// </div>
+func DynamicStitch(scope *Scope, indices []tf.Output, data []tf.Output) (merged tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Polygamma",
+		Type: "DynamicStitch",
 		Input: []tf.Input{
-			a, x,
+			tf.OutputList(indices), tf.OutputList(data),
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes the grayscale dilation of 4-D `input` and 3-D `filter` tensors.
-//
-// The `input` tensor has shape `[batch, in_height, in_width, depth]` and the
-// `filter` tensor has shape `[filter_height, filter_width, depth]`, i.e., each
-// input channel is processed independently of the others with its own structuring
-// function. The `output` tensor has shape
-// `[batch, out_height, out_width, depth]`. The spatial dimensions of the output
-// tensor depend on the `padding` algorithm. We currently only support the default
-// "NHWC" `data_format`.
-//
-// In detail, the grayscale morphological 2-D dilation is the max-sum correlation
-// (for consistency with `conv2d`, we use unmirrored filters):
-//
-//     output[b, y, x, c] =
-//        max_{dy, dx} input[b,
-//                           strides[1] * y + rates[1] * dy,
-//                           strides[2] * x + rates[2] * dx,
-//                           c] +
-//                     filter[dy, dx, c]
-//
-// Max-pooling is a special case when the filter has size equal to the pooling
-// kernel size and contains all zeros.
-//
-// Note on duality: The dilation of `input` by the `filter` is equal to the
-// negation of the erosion of `-input` by the reflected `filter`.
-//
-// Arguments:
-//	input: 4-D with shape `[batch, in_height, in_width, depth]`.
-//	filter: 3-D with shape `[filter_height, filter_width, depth]`.
-//	strides: The stride of the sliding window for each dimension of the input
-// tensor. Must be: `[1, stride_height, stride_width, 1]`.
-//	rates: The input stride for atrous morphological dilation. Must be:
-// `[1, rate_height, rate_width, 1]`.
-//	padding: The type of padding algorithm to use.
+// Returns the truth value of (x == y) element-wise.
 //
-// Returns 4-D with shape `[batch, out_height, out_width, depth]`.
-func Dilation2D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, rates []int64, padding string) (output tf.Output) {
+// *NOTE*: `Equal` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Equal(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding}
 	opspec := tf.OpSpec{
-		Type: "Dilation2D",
+		Type: "Equal",
 		Input: []tf.Input{
-			input, filter,
+			x, y,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// AudioSpectrogramAttr is an optional argument to AudioSpectrogram.
-type AudioSpectrogramAttr func(optionalAttr)
+// TensorArrayGatherV2Attr is an optional argument to TensorArrayGatherV2.
+type TensorArrayGatherV2Attr func(optionalAttr)
 
-// AudioSpectrogramMagnitudeSquared sets the optional magnitude_squared attribute to value.
-//
-// value: Whether to return the squared magnitude or just the
-// magnitude. Using squared magnitude can avoid extra calculations.
-// If not specified, defaults to false
-func AudioSpectrogramMagnitudeSquared(value bool) AudioSpectrogramAttr {
+// TensorArrayGatherV2ElementShape sets the optional element_shape attribute to value.
+// If not specified, defaults to <unknown_rank:true >
+func TensorArrayGatherV2ElementShape(value tf.Shape) TensorArrayGatherV2Attr {
 	return func(m optionalAttr) {
-		m["magnitude_squared"] = value
+		m["element_shape"] = value
 	}
 }
 
-// Produces a visualization of audio data over time.
-//
-// Spectrograms are a standard way of representing audio information as a series of
-// slices of frequency information, one slice for each window of time. By joining
-// these together into a sequence, they form a distinctive fingerprint of the sound
-// over time.
-//
-// This op expects to receive audio data as an input, stored as floats in the range
-// -1 to 1, together with a window width in samples, and a stride specifying how
-// far to move the window between slices. From this it generates a three
-// dimensional output. The lowest dimension has an amplitude value for each
-// frequency during that time slice. The next dimension is time, with successive
-// frequency slices. The final dimension is for the channels in the input, so a
-// stereo audio input would have two here for example.
-//
-// This means the layout when converted and saved as an image is rotated 90 degrees
-// clockwise from a typical spectrogram. Time is descending down the Y axis, and
-// the frequency decreases from left to right.
-//
-// Each value in the result represents the square root of the sum of the real and
-// imaginary parts of an FFT on the current window of samples. In this way, the
-// lowest dimension represents the power of each frequency in the current window,
-// and adjacent windows are concatenated in the next dimension.
-//
-// To get a more intuitive and visual look at what this operation does, you can run
-// tensorflow/examples/wav_to_spectrogram to read in an audio file and save out the
-// resulting spectrogram as a PNG image.
-//
-// Arguments:
-//	input: Float representation of audio data.
-//	window_size: How wide the input window is in samples. For the highest efficiency
-// this should be a power of two, but other values are accepted.
-//	stride: How widely apart the center of adjacent sample windows should be.
-//
-// Returns 3D representation of the audio frequencies as an image.
-func AudioSpectrogram(scope *Scope, input tf.Output, window_size int64, stride int64, optional ...AudioSpectrogramAttr) (spectrogram tf.Output) {
+// Deprecated. Use TensorArrayGatherV3
+func TensorArrayGatherV2(scope *Scope, handle tf.Output, indices tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayGatherV2Attr) (value tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"window_size": window_size, "stride": stride}
+	attrs := map[string]interface{}{"dtype": dtype}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "AudioSpectrogram",
+		Type: "TensorArrayGatherV2",
 		Input: []tf.Input{
-			input,
+			handle, indices, flow_in,
 		},
 		Attrs: attrs,
 	}
@@ -21608,236 +21557,276 @@ func AudioSpectrogram(scope *Scope, input tf.Output, window_size int64, stride i
 	return op.Output(0)
 }
 
-// Computes the gradient of morphological 2-D dilation with respect to the input.
+// Interleave the values from the `data` tensors into a single tensor.
 //
-// Arguments:
-//	input: 4-D with shape `[batch, in_height, in_width, depth]`.
-//	filter: 3-D with shape `[filter_height, filter_width, depth]`.
-//	out_backprop: 4-D with shape `[batch, out_height, out_width, depth]`.
-//	strides: 1-D of length 4. The stride of the sliding window for each dimension of
-// the input tensor. Must be: `[1, stride_height, stride_width, 1]`.
-//	rates: 1-D of length 4. The input stride for atrous morphological dilation.
-// Must be: `[1, rate_height, rate_width, 1]`.
-//	padding: The type of padding algorithm to use.
+// Builds a merged tensor such that
 //
-// Returns 4-D with shape `[batch, in_height, in_width, depth]`.
-func Dilation2DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, rates []int64, padding string) (in_backprop tf.Output) {
+// ```python
+//     merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...]
+// ```
+//
+// For example, if each `indices[m]` is scalar or vector, we have
+//
+// ```python
+//     # Scalar indices:
+//     merged[indices[m], ...] = data[m][...]
+//
+//     # Vector indices:
+//     merged[indices[m][i], ...] = data[m][i, ...]
+// ```
+//
+// Each `data[i].shape` must start with the corresponding `indices[i].shape`,
+// and the rest of `data[i].shape` must be constant w.r.t. `i`.  That is, we
+// must have `data[i].shape = indices[i].shape + constant`.  In terms of this
+// `constant`, the output shape is
+//
+//     merged.shape = [max(indices)] + constant
+//
+// Values may be merged in parallel, so if an index appears in both `indices[m][i]`
+// and `indices[n][j]`, the result may be invalid. This differs from the normal
+// DynamicStitch operator that defines the behavior in that case.
+//
+// For example:
+//
+// ```python
+//     indices[0] = 6
+//     indices[1] = [4, 1]
+//     indices[2] = [[5, 2], [0, 3]]
+//     data[0] = [61, 62]
+//     data[1] = [[41, 42], [11, 12]]
+//     data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]]
+//     merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42],
+//               [51, 52], [61, 62]]
+// ```
+//
+// This method can be used to merge partitions created by `dynamic_partition`
+// as illustrated on the following example:
+//
+// ```python
+//     # Apply function (increments x_i) on elements for which a certain condition
+//     # apply (x_i != -1 in this example).
+//     x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4])
+//     condition_mask=tf.not_equal(x,tf.constant(-1.))
+//     partitioned_data = tf.dynamic_partition(
+//         x, tf.cast(condition_mask, tf.int32) , 2)
+//     partitioned_data[1] = partitioned_data[1] + 1.0
+//     condition_indices = tf.dynamic_partition(
+//         tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2)
+//     x = tf.dynamic_stitch(condition_indices, partitioned_data)
+//     # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain
+//     # unchanged.
+// ```
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/DynamicStitch.png" alt>
+// </div>
+func ParallelDynamicStitch(scope *Scope, indices []tf.Output, data []tf.Output) (merged tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding}
 	opspec := tf.OpSpec{
-		Type: "Dilation2DBackpropInput",
+		Type: "ParallelDynamicStitch",
 		Input: []tf.Input{
-			input, filter, out_backprop,
+			tf.OutputList(indices), tf.OutputList(data),
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns the truth value of (x == y) element-wise.
+// Computes the gradient for the inverse of `x` wrt its input.
 //
-// *NOTE*: `Equal` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Equal(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy`
+// is the corresponding input gradient.
+func InvGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Equal",
+		Type: "InvGrad",
 		Input: []tf.Input{
-			x, y,
+			y, dy,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes the gradient of morphological 2-D dilation with respect to the filter.
-//
-// Arguments:
-//	input: 4-D with shape `[batch, in_height, in_width, depth]`.
-//	filter: 3-D with shape `[filter_height, filter_width, depth]`.
-//	out_backprop: 4-D with shape `[batch, out_height, out_width, depth]`.
-//	strides: 1-D of length 4. The stride of the sliding window for each dimension of
-// the input tensor. Must be: `[1, stride_height, stride_width, 1]`.
-//	rates: 1-D of length 4. The input stride for atrous morphological dilation.
-// Must be: `[1, rate_height, rate_width, 1]`.
-//	padding: The type of padding algorithm to use.
+// StridedSliceAttr is an optional argument to StridedSlice.
+type StridedSliceAttr func(optionalAttr)
+
+// StridedSliceBeginMask sets the optional begin_mask attribute to value.
 //
-// Returns 3-D with shape `[filter_height, filter_width, depth]`.
-func Dilation2DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, rates []int64, padding string) (filter_backprop tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding}
-	opspec := tf.OpSpec{
-		Type: "Dilation2DBackpropFilter",
-		Input: []tf.Input{
-			input, filter, out_backprop,
-		},
-		Attrs: attrs,
+// value: a bitmask where a bit i being 1 means to ignore the begin
+// value and instead use the largest interval possible. At runtime
+// begin[i] will be replaced with `[0, n-1) if `stride[i] > 0` or
+// `[-1, n-1]` if `stride[i] < 0`
+// If not specified, defaults to 0
+func StridedSliceBeginMask(value int64) StridedSliceAttr {
+	return func(m optionalAttr) {
+		m["begin_mask"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Computes rectified linear gradients for a Relu operation.
-//
-// Arguments:
-//	gradients: The backpropagated gradients to the corresponding Relu operation.
-//	features: The features passed as input to the corresponding Relu operation, OR
-// the outputs of that operation (both work equivalently).
+// StridedSliceEndMask sets the optional end_mask attribute to value.
 //
-// Returns `gradients * (features > 0)`.
-func ReluGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
-	if scope.Err() != nil {
-		return
+// value: analogous to `begin_mask`
+// If not specified, defaults to 0
+func StridedSliceEndMask(value int64) StridedSliceAttr {
+	return func(m optionalAttr) {
+		m["end_mask"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "ReluGrad",
-		Input: []tf.Input{
-			gradients, features,
-		},
+}
+
+// StridedSliceEllipsisMask sets the optional ellipsis_mask attribute to value.
+//
+// value: a bitmask where bit `i` being 1 means the `i`th
+// position is actually an ellipsis. One bit at most can be 1.
+// If `ellipsis_mask == 0`, then an implicit ellipsis mask of `1 << (m+1)`
+// is provided. This means that `foo[3:5] == foo[3:5, ...]`. An ellipsis
+// implicitly creates as many range specifications as necessary to fully
+// specify the sliced range for every dimension. For example for a 4-dimensional
+// tensor `foo` the slice `foo[2, ..., 5:8]` implies `foo[2, :, :, 5:8]`.
+// If not specified, defaults to 0
+func StridedSliceEllipsisMask(value int64) StridedSliceAttr {
+	return func(m optionalAttr) {
+		m["ellipsis_mask"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Computes rectified linear 6: `min(max(features, 0), 6)`.
-func Relu6(scope *Scope, features tf.Output) (activations tf.Output) {
-	if scope.Err() != nil {
-		return
+// StridedSliceNewAxisMask sets the optional new_axis_mask attribute to value.
+//
+// value: a bitmask where bit `i` being 1 means the `i`th
+// specification creates a new shape 1 dimension. For example
+// `foo[:4, tf.newaxis, :2]` would produce a shape `(4, 1, 2)` tensor.
+// If not specified, defaults to 0
+func StridedSliceNewAxisMask(value int64) StridedSliceAttr {
+	return func(m optionalAttr) {
+		m["new_axis_mask"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "Relu6",
-		Input: []tf.Input{
-			features,
-		},
+}
+
+// StridedSliceShrinkAxisMask sets the optional shrink_axis_mask attribute to value.
+//
+// value: a bitmask where bit `i` implies that the `i`th
+// specification should shrink the dimensionality. begin and end
+// must imply a slice of size 1 in the dimension. For example in
+// python one might do `foo[:, 3, :]` which would result in
+// `shrink_axis_mask` being 2.
+// If not specified, defaults to 0
+func StridedSliceShrinkAxisMask(value int64) StridedSliceAttr {
+	return func(m optionalAttr) {
+		m["shrink_axis_mask"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Creates a dataset that contains `count` elements from the `input_dataset`.
+// Return a strided slice from `input`.
 //
-// Arguments:
+// Note, most python users will want to use the Python `Tensor.__getitem__`
+// or `Variable.__getitem__` rather than this op directly.
 //
-//	count: A scalar representing the number of elements from the `input_dataset`
-// that should be taken. A value of `-1` indicates that all of `input_dataset`
-// is taken.
+// The goal of this op is to produce a new tensor with a subset of
+// the elements from the `n` dimensional `input` tensor. The subset is chosen using
+// a sequence of `m` sparse range specifications encoded into the arguments
+// of this function. Note, in some cases
+// `m` could be equal to `n`, but this need not be the case. Each
+// range specification entry can be one of the following:
 //
+// - An ellipsis (...). Ellipses are used to imply zero or more
+//   dimensions of full-dimension selection and are produced using
+//   `ellipsis_mask`. For example, `foo[...]` is the identity slice.
 //
-func TakeDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "TakeDataset",
-		Input: []tf.Input{
-			input_dataset, count,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Converts each string in the input Tensor to its hash mod by a number of buckets.
+// - A new axis. This is used to insert a new shape=1 dimension and is
+//   produced using `new_axis_mask`. For example, `foo[:, ...]` where
+//   `foo` is shape `(3, 4)` produces a `(1, 3, 4)` tensor.
 //
-// The hash function is deterministic on the content of the string within the
-// process.
 //
-// Note that the hash function may change from time to time.
-// This functionality will be deprecated and it's recommended to use
-// `tf.string_to_hash_bucket_fast()` or `tf.string_to_hash_bucket_strong()`.
+// - A range `begin:end:stride`. This is used to specify how much to choose from
+//   a given dimension. `stride` can be any integer but 0.  `begin` is an integer
+//   which represents the index of the first value to select while `end` represents
+//   the index of the last value to select. The number of values selected in each
+//   dimension is `end - begin` if `stride > 0` and `begin - end` if `stride < 0`.
+//   `begin` and `end` can be negative where `-1` is the last element, `-2` is
+//   the second to last. `begin_mask` controls whether to replace the explicitly
+//   given `begin` with an implicit effective value of `0` if `stride > 0` and
+//   `-1` if `stride < 0`. `end_mask` is analogous but produces the number
+//   required to create the largest open interval. For example, given a shape
+//   `(3,)` tensor `foo[:]`, the effective `begin` and `end` are `0` and `3`. Do
+//   not assume this is equivalent to `foo[0:-1]` which has an effective `begin`
+//   and `end` of `0` and `2`. Another example is `foo[-2::-1]` which reverses the
+//   first dimension of a tensor while dropping the last two (in the original
+//   order elements). For example `foo = [1,2,3,4]; foo[-2::-1]` is `[4,3]`.
 //
-// Arguments:
+// - A single index. This is used to keep only elements that have a given
+//   index. For example (`foo[2, :]` on a shape `(5,6)` tensor produces a
+//   shape `(6,)` tensor. This is encoded in `begin` and `end` and
+//   `shrink_axis_mask`.
 //
-//	num_buckets: The number of buckets.
+// Each conceptual range specification is encoded in the op's argument. This
+// encoding is best understand by considering a non-trivial example. In
+// particular,
+// `foo[1, 2:4, None, ..., :-3:-1, :]` will be encoded as
 //
-// Returns A Tensor of the same shape as the input `string_tensor`.
-func StringToHashBucket(scope *Scope, string_tensor tf.Output, num_buckets int64) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_buckets": num_buckets}
-	opspec := tf.OpSpec{
-		Type: "StringToHashBucket",
-		Input: []tf.Input{
-			string_tensor,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes gradients for the exponential linear (Elu) operation.
+// ```
+// begin = [1, 2, x, x, 0, x] # x denotes don't care (usually 0)
+// end = [2, 4, x, x, -3, x]
+// strides = [1, 1, x, x, -1, 1]
+// begin_mask = 1<<4 | 1 << 5 = 48
+// end_mask = 1<<5 = 32
+// ellipsis_mask = 1<<3 = 8
+// new_axis_mask = 1<<2 4
+// shrink_axis_mask = 1<<0
+// ```
 //
-// Arguments:
-//	gradients: The backpropagated gradients to the corresponding Elu operation.
-//	outputs: The outputs of the corresponding Elu operation.
+// In this case if `foo.shape` is (5, 5, 5, 5, 5, 5) the final shape of
+// the slice becomes (2, 1, 5, 5, 2, 5).
+// Let us walk step by step through each argument specification.
 //
-// Returns The gradients: `gradients * (outputs + 1)` if outputs < 0,
-// `gradients` otherwise.
-func EluGrad(scope *Scope, gradients tf.Output, outputs tf.Output) (backprops tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "EluGrad",
-		Input: []tf.Input{
-			gradients, outputs,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// FractionalAvgPoolGradAttr is an optional argument to FractionalAvgPoolGrad.
-type FractionalAvgPoolGradAttr func(optionalAttr)
-
-// FractionalAvgPoolGradOverlapping sets the optional overlapping attribute to value.
+// 1.  The first argument in the example slice is turned into `begin = 1` and
+// `end = begin + 1 = 2`. To disambiguate from the original spec `2:4` we
+// also set the appropriate bit in `shrink_axis_mask`.
+//
+// 2. `2:4` is contributes 2, 4, 1 to begin, end, and stride. All masks have
+// zero bits contributed.
 //
-// value: When set to True, it means when pooling, the values at the boundary
-// of adjacent pooling cells are used by both cells. For example:
+// 3. None is a synonym for `tf.newaxis`. This means insert a dimension of size 1
+// dimension in the final shape. Dummy values are contributed to begin,
+// end and stride, while the new_axis_mask bit is set.
 //
-// `index  0  1  2  3  4`
+// 4. `...` grab the full ranges from as many dimensions as needed to
+// fully specify a slice for every dimension of the input shape.
 //
-// `value  20 5  16 3  7`
+// 5. `:-3:-1` shows the use of negative indices. A negative index `i` associated
+// with a dimension that has shape `s` is converted to a positive index
+// `s + i`. So `-1` becomes `s-1` (i.e. the last element). This conversion
+// is done internally so begin, end and strides receive x, -3, and -1.
+// The appropriate begin_mask bit is set to indicate the start range is the
+// full range (ignoring the x).
 //
-// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
-// The result would be [41/3, 26/3] for fractional avg pooling.
-// If not specified, defaults to false
-func FractionalAvgPoolGradOverlapping(value bool) FractionalAvgPoolGradAttr {
-	return func(m optionalAttr) {
-		m["overlapping"] = value
-	}
-}
-
-// Computes gradient of the FractionalAvgPool function.
+// 6. `:` indicates that the entire contents of the corresponding dimension
+// is selected. This is equivalent to `::` or `0::1`. begin, end, and strides
+// receive 0, 0, and 1, respectively. The appropriate bits in `begin_mask` and
+// `end_mask` are also set.
 //
-// Unlike FractionalMaxPoolGrad, we don't need to find arg_max for
-// FractionalAvgPoolGrad, we just need to evenly back-propagate each element of
-// out_backprop to those indices that form the same pooling cell. Therefore, we
-// just need to know the shape of original input tensor, instead of the whole
-// tensor.
+// *Requirements*:
+//   `0 != strides[i] for i in [0, m)`
+//   `ellipsis_mask must be a power of two (only one ellipsis)`
 //
 // Arguments:
-//	orig_input_tensor_shape: Original input tensor shape for `fractional_avg_pool`
-//	out_backprop: 4-D with shape `[batch, height, width, channels]`.  Gradients
-// w.r.t. the output of `fractional_avg_pool`.
-//	row_pooling_sequence: row pooling sequence, form pooling region with
-// col_pooling_sequence.
-//	col_pooling_sequence: column pooling sequence, form pooling region with
-// row_pooling sequence.
 //
-// Returns 4-D.  Gradients w.r.t. the input of `fractional_avg_pool`.
-func FractionalAvgPoolGrad(scope *Scope, orig_input_tensor_shape tf.Output, out_backprop tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output, optional ...FractionalAvgPoolGradAttr) (output tf.Output) {
+//	begin: `begin[k]` specifies the offset into the `k`th range specification.
+// The exact dimension this corresponds to will be determined by context.
+// Out-of-bounds values will be silently clamped. If the `k`th bit of
+// `begin_mask` then `begin[k]` is ignored and the full range of the
+// appropriate dimension is used instead. Negative values causes indexing
+// to start from the highest element e.g. If `foo==[1,2,3]` then `foo[-1]==3`.
+//	end: `end[i]` is like `begin` with the exception that `end_mask` is
+// used to determine full ranges.
+//	strides: `strides[i]` specifies the increment in the `i`th specification
+// after extracting a given element. Negative indices will reverse
+// the original order. Out or range values are
+// clamped to `[0,dim[i]) if slice[i]>0` or `[-1,dim[i]-1] if slice[i] < 0`
+func StridedSlice(scope *Scope, input tf.Output, begin tf.Output, end tf.Output, strides tf.Output, optional ...StridedSliceAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -21846,9 +21835,9 @@ func FractionalAvgPoolGrad(scope *Scope, orig_input_tensor_shape tf.Output, out_
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "FractionalAvgPoolGrad",
+		Type: "StridedSlice",
 		Input: []tf.Input{
-			orig_input_tensor_shape, out_backprop, row_pooling_sequence, col_pooling_sequence,
+			input, begin, end, strides,
 		},
 		Attrs: attrs,
 	}
@@ -21856,216 +21845,176 @@ func FractionalAvgPoolGrad(scope *Scope, orig_input_tensor_shape tf.Output, out_
 	return op.Output(0)
 }
 
-// Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)`
+// PriorityQueueV2Attr is an optional argument to PriorityQueueV2.
+type PriorityQueueV2Attr func(optionalAttr)
+
+// PriorityQueueV2ComponentTypes sets the optional component_types attribute to value.
 //
-// if < 0, `scale * features` otherwise.
+// value: The type of each component in a value.
+// If not specified, defaults to <>
 //
-// See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515)
-func Selu(scope *Scope, features tf.Output) (activations tf.Output) {
-	if scope.Err() != nil {
-		return
+// REQUIRES: len(value) >= 0
+func PriorityQueueV2ComponentTypes(value []tf.DataType) PriorityQueueV2Attr {
+	return func(m optionalAttr) {
+		m["component_types"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "Selu",
-		Input: []tf.Input{
-			features,
-		},
+}
+
+// PriorityQueueV2Capacity sets the optional capacity attribute to value.
+//
+// value: The upper bound on the number of elements in this queue.
+// Negative numbers mean no limit.
+// If not specified, defaults to -1
+func PriorityQueueV2Capacity(value int64) PriorityQueueV2Attr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// ResourceSparseApplyAdadeltaAttr is an optional argument to ResourceSparseApplyAdadelta.
-type ResourceSparseApplyAdadeltaAttr func(optionalAttr)
+// PriorityQueueV2Container sets the optional container attribute to value.
+//
+// value: If non-empty, this queue is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func PriorityQueueV2Container(value string) PriorityQueueV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
 
-// ResourceSparseApplyAdadeltaUseLocking sets the optional use_locking attribute to value.
+// PriorityQueueV2SharedName sets the optional shared_name attribute to value.
 //
-// value: If True, updating of the var and accum tensors will be protected by
-// a lock; otherwise the behavior is undefined, but may exhibit less contention.
-// If not specified, defaults to false
-func ResourceSparseApplyAdadeltaUseLocking(value bool) ResourceSparseApplyAdadeltaAttr {
+// value: If non-empty, this queue will be shared under the given name
+// across multiple sessions.
+// If not specified, defaults to ""
+func PriorityQueueV2SharedName(value string) PriorityQueueV2Attr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["shared_name"] = value
 	}
 }
 
-// var: Should be from a Variable().
+// A queue that produces elements sorted by the first component value.
 //
-// Arguments:
+// Note that the PriorityQueue requires the first component of any element
+// to be a scalar int64, in addition to the other elements declared by
+// component_types.  Therefore calls to Enqueue and EnqueueMany (resp. Dequeue
+// and DequeueMany) on a PriorityQueue will all require (resp. output) one extra
+// entry in their input (resp. output) lists.
 //
-//	accum: Should be from a Variable().
-//	accum_update: : Should be from a Variable().
-//	lr: Learning rate. Must be a scalar.
-//	rho: Decay factor. Must be a scalar.
-//	epsilon: Constant factor. Must be a scalar.
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
+// Arguments:
+//	shapes: The shape of each component in a value. The length of this attr must
+// be either 0 or the same as the length of component_types. If the length of
+// this attr is 0, the shapes of queue elements are not constrained, and
+// only one element may be dequeued at a time.
 //
-// Returns the created operation.
-func ResourceSparseApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output, accum_update tf.Output, lr tf.Output, rho tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdadeltaAttr) (o *tf.Operation) {
+// Returns The handle to the queue.
+func PriorityQueueV2(scope *Scope, shapes []tf.Shape, optional ...PriorityQueueV2Attr) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"shapes": shapes}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyAdadelta",
-		Input: []tf.Input{
-			var_, accum, accum_update, lr, rho, epsilon, grad, indices,
-		},
+		Type: "PriorityQueueV2",
+
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Returns which elements of x are NaN.
+// UnstageAttr is an optional argument to Unstage.
+type UnstageAttr func(optionalAttr)
+
+// UnstageCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
 //
-// @compatibility(numpy)
-// Equivalent to np.isnan
-// @end_compatibility
-func IsNan(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "IsNan",
-		Input: []tf.Input{
-			x,
-		},
+// REQUIRES: value >= 0
+func UnstageCapacity(value int64) UnstageAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Pads a tensor.
-//
-// This operation pads `input` according to the `paddings` and `constant_values`
-// you specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is
-// the rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates
-// how many padding values to add before the contents of `input` in that dimension,
-// and `paddings[D, 1]` indicates how many padding values to add after the contents
-// of `input` in that dimension. `constant_values` is a scalar tensor of the same
-// type as `input` that indicates the value to use for padding `input`.
-//
-// The padded size of each dimension D of the output is:
-//
-// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)`
-//
-// For example:
+// UnstageMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
 //
-// ```
-// # 't' is [[1, 1], [2, 2]]
-// # 'paddings' is [[1, 1], [2, 2]]
-// # 'constant_values' is 0
-// # rank of 't' is 2
-// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0]
-//                       [0, 0, 1, 1, 0, 0]
-//                       [0, 0, 2, 2, 0, 0]
-//                       [0, 0, 0, 0, 0, 0]]
-// ```
-func PadV2(scope *Scope, input tf.Output, paddings tf.Output, constant_values tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
+// REQUIRES: value >= 0
+func UnstageMemoryLimit(value int64) UnstageAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "PadV2",
-		Input: []tf.Input{
-			input, paddings, constant_values,
-		},
+}
+
+// UnstageContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func UnstageContainer(value string) UnstageAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Computes gradients for the scaled exponential linear (Selu) operation.
-//
-// Arguments:
-//	gradients: The backpropagated gradients to the corresponding Selu operation.
-//	outputs: The outputs of the corresponding Selu operation.
+// UnstageSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func UnstageSharedName(value string) UnstageAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Op is similar to a lightweight Dequeue.
 //
-// Returns The gradients: `gradients * (outputs + scale * alpha)`
-// if outputs < 0, `scale * gradients` otherwise.
-func SeluGrad(scope *Scope, gradients tf.Output, outputs tf.Output) (backprops tf.Output) {
+// The basic functionality is similar to dequeue with many fewer
+// capabilities and options.  This Op is optimized for performance.
+func Unstage(scope *Scope, dtypes []tf.DataType, optional ...UnstageAttr) (values []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "SeluGrad",
-		Input: []tf.Input{
-			gradients, outputs,
-		},
+		Type: "Unstage",
+
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes softplus: `log(exp(features) + 1)`.
-func Softplus(scope *Scope, features tf.Output) (activations tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	opspec := tf.OpSpec{
-		Type: "Softplus",
-		Input: []tf.Input{
-			features,
-		},
+	var idx int
+	var err error
+	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
+		scope.UpdateErr("Unstage", err)
+		return
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return values
 }
 
-// BatchMatMulAttr is an optional argument to BatchMatMul.
-type BatchMatMulAttr func(optionalAttr)
-
-// BatchMatMulAdjX sets the optional adj_x attribute to value.
-//
-// value: If `True`, adjoint the slices of `x`. Defaults to `False`.
-// If not specified, defaults to false
-func BatchMatMulAdjX(value bool) BatchMatMulAttr {
-	return func(m optionalAttr) {
-		m["adj_x"] = value
-	}
-}
+// ArgMaxAttr is an optional argument to ArgMax.
+type ArgMaxAttr func(optionalAttr)
 
-// BatchMatMulAdjY sets the optional adj_y attribute to value.
-//
-// value: If `True`, adjoint the slices of `y`. Defaults to `False`.
-// If not specified, defaults to false
-func BatchMatMulAdjY(value bool) BatchMatMulAttr {
+// ArgMaxOutputType sets the optional output_type attribute to value.
+// If not specified, defaults to DT_INT64
+func ArgMaxOutputType(value tf.DataType) ArgMaxAttr {
 	return func(m optionalAttr) {
-		m["adj_y"] = value
+		m["output_type"] = value
 	}
 }
 
-// Multiplies slices of two tensors in batches.
-//
-// Multiplies all slices of `Tensor` `x` and `y` (each slice can be
-// viewed as an element of a batch), and arranges the individual results
-// in a single output tensor of the same batch size. Each of the
-// individual slices can optionally be adjointed (to adjoint a matrix
-// means to transpose and conjugate it) before multiplication by setting
-// the `adj_x` or `adj_y` flag to `True`, which are by default `False`.
-//
-// The input tensors `x` and `y` are 2-D or higher with shape `[..., r_x, c_x]`
-// and `[..., r_y, c_y]`.
-//
-// The output tensor is 2-D or higher with shape `[..., r_o, c_o]`, where:
-//
-//     r_o = c_x if adj_x else r_x
-//     c_o = r_y if adj_y else c_y
-//
-// It is computed as:
+// Returns the index with the largest value across dimensions of a tensor.
 //
-//     output[..., :, :] = matrix(x[..., :, :]) * matrix(y[..., :, :])
+// Note that in case of ties the identity of the return value is not guaranteed.
 //
 // Arguments:
-//	x: 2-D or higher with shape `[..., r_x, c_x]`.
-//	y: 2-D or higher with shape `[..., r_y, c_y]`.
 //
-// Returns 3-D or higher with shape `[..., r_o, c_o]`
-func BatchMatMul(scope *Scope, x tf.Output, y tf.Output, optional ...BatchMatMulAttr) (output tf.Output) {
+//	dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`.
+// Describes which dimension of the input Tensor to reduce across. For vectors,
+// use dimension = 0.
+func ArgMax(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMaxAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -22074,9 +22023,9 @@ func BatchMatMul(scope *Scope, x tf.Output, y tf.Output, optional ...BatchMatMul
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "BatchMatMul",
+		Type: "ArgMax",
 		Input: []tf.Input{
-			x, y,
+			input, dimension,
 		},
 		Attrs: attrs,
 	}
@@ -22084,75 +22033,60 @@ func BatchMatMul(scope *Scope, x tf.Output, y tf.Output, optional ...BatchMatMul
 	return op.Output(0)
 }
 
-// Computes softplus gradients for a softplus operation.
-//
-// Arguments:
-//	gradients: The backpropagated gradients to the corresponding softplus operation.
-//	features: The features passed as input to the corresponding softplus operation.
-//
-// Returns The gradients: `gradients / (1 + exp(-features))`.
-func SoftplusGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SoftplusGrad",
-		Input: []tf.Input{
-			gradients, features,
-		},
+// ResourceStridedSliceAssignAttr is an optional argument to ResourceStridedSliceAssign.
+type ResourceStridedSliceAssignAttr func(optionalAttr)
+
+// ResourceStridedSliceAssignBeginMask sets the optional begin_mask attribute to value.
+// If not specified, defaults to 0
+func ResourceStridedSliceAssignBeginMask(value int64) ResourceStridedSliceAssignAttr {
+	return func(m optionalAttr) {
+		m["begin_mask"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Computes softsign gradients for a softsign operation.
-//
-// Arguments:
-//	gradients: The backpropagated gradients to the corresponding softsign operation.
-//	features: The features passed as input to the corresponding softsign operation.
-//
-// Returns The gradients: `gradients / (1 + abs(features)) ** 2`.
-func SoftsignGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
-	if scope.Err() != nil {
-		return
+// ResourceStridedSliceAssignEndMask sets the optional end_mask attribute to value.
+// If not specified, defaults to 0
+func ResourceStridedSliceAssignEndMask(value int64) ResourceStridedSliceAssignAttr {
+	return func(m optionalAttr) {
+		m["end_mask"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "SoftsignGrad",
-		Input: []tf.Input{
-			gradients, features,
-		},
+}
+
+// ResourceStridedSliceAssignEllipsisMask sets the optional ellipsis_mask attribute to value.
+// If not specified, defaults to 0
+func ResourceStridedSliceAssignEllipsisMask(value int64) ResourceStridedSliceAssignAttr {
+	return func(m optionalAttr) {
+		m["ellipsis_mask"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// DecodeBmpAttr is an optional argument to DecodeBmp.
-type DecodeBmpAttr func(optionalAttr)
+// ResourceStridedSliceAssignNewAxisMask sets the optional new_axis_mask attribute to value.
+// If not specified, defaults to 0
+func ResourceStridedSliceAssignNewAxisMask(value int64) ResourceStridedSliceAssignAttr {
+	return func(m optionalAttr) {
+		m["new_axis_mask"] = value
+	}
+}
 
-// DecodeBmpChannels sets the optional channels attribute to value.
+// ResourceStridedSliceAssignShrinkAxisMask sets the optional shrink_axis_mask attribute to value.
 // If not specified, defaults to 0
-func DecodeBmpChannels(value int64) DecodeBmpAttr {
+func ResourceStridedSliceAssignShrinkAxisMask(value int64) ResourceStridedSliceAssignAttr {
 	return func(m optionalAttr) {
-		m["channels"] = value
+		m["shrink_axis_mask"] = value
 	}
 }
 
-// Decode the first frame of a BMP-encoded image to a uint8 tensor.
-//
-// The attr `channels` indicates the desired number of color channels for the
-// decoded image.
-//
-// Accepted values are:
+// Assign `value` to the sliced l-value reference of `ref`.
 //
-// *   0: Use the number of channels in the BMP-encoded image.
-// *   3: output an RGB image.
-// *   4: output an RGBA image.
+// The values of `value` are assigned to the positions in the variable
+// `ref` that are selected by the slice parameters. The slice parameters
+// `begin, `end`, `strides`, etc. work exactly as in `StridedSlice`.
 //
-// Arguments:
-//	contents: 0-D.  The BMP-encoded image.
+// NOTE this op currently does not support broadcasting and so `value`'s
+// shape must be exactly the shape produced by the slice of `ref`.
 //
-// Returns 3-D with shape `[height, width, channels]`. RGB order
-func DecodeBmp(scope *Scope, contents tf.Output, optional ...DecodeBmpAttr) (image tf.Output) {
+// Returns the created operation.
+func ResourceStridedSliceAssign(scope *Scope, ref tf.Output, begin tf.Output, end tf.Output, strides tf.Output, value tf.Output, optional ...ResourceStridedSliceAssignAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -22161,1672 +22095,1985 @@ func DecodeBmp(scope *Scope, contents tf.Output, optional ...DecodeBmpAttr) (ima
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "DecodeBmp",
+		Type: "ResourceStridedSliceAssign",
 		Input: []tf.Input{
-			contents,
+			ref, begin, end, strides, value,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// Computes softmax activations.
+// QueueEnqueueV2Attr is an optional argument to QueueEnqueueV2.
+type QueueEnqueueV2Attr func(optionalAttr)
+
+// QueueEnqueueV2TimeoutMs sets the optional timeout_ms attribute to value.
 //
-// For each batch `i` and class `j` we have
+// value: If the queue is full, this operation will block for up to
+// timeout_ms milliseconds.
+// Note: This option is not supported yet.
+// If not specified, defaults to -1
+func QueueEnqueueV2TimeoutMs(value int64) QueueEnqueueV2Attr {
+	return func(m optionalAttr) {
+		m["timeout_ms"] = value
+	}
+}
+
+// Enqueues a tuple of one or more tensors in the given queue.
 //
-//     softmax[i, j] = exp(logits[i, j]) / sum_j(exp(logits[i, j]))
+// The components input has k elements, which correspond to the components of
+// tuples stored in the given queue.
+//
+// N.B. If the queue is full, this operation will block until the given
+// element has been enqueued (or 'timeout_ms' elapses, if specified).
 //
 // Arguments:
-//	logits: 2-D with shape `[batch_size, num_classes]`.
+//	handle: The handle to a queue.
+//	components: One or more tensors from which the enqueued tensors should be taken.
 //
-// Returns Same shape as `logits`.
-func Softmax(scope *Scope, logits tf.Output) (softmax tf.Output) {
+// Returns the created operation.
+func QueueEnqueueV2(scope *Scope, handle tf.Output, components []tf.Output, optional ...QueueEnqueueV2Attr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Softmax",
+		Type: "QueueEnqueueV2",
 		Input: []tf.Input{
-			logits,
+			handle, tf.OutputList(components),
 		},
+		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RandomShuffleQueueV2Attr is an optional argument to RandomShuffleQueueV2.
-type RandomShuffleQueueV2Attr func(optionalAttr)
-
-// RandomShuffleQueueV2Shapes sets the optional shapes attribute to value.
-//
-// value: The shape of each component in a value. The length of this attr must
-// be either 0 or the same as the length of component_types. If the length of
-// this attr is 0, the shapes of queue elements are not constrained, and
-// only one element may be dequeued at a time.
-// If not specified, defaults to <>
-//
-// REQUIRES: len(value) >= 0
-func RandomShuffleQueueV2Shapes(value []tf.Shape) RandomShuffleQueueV2Attr {
-	return func(m optionalAttr) {
-		m["shapes"] = value
-	}
-}
-
-// RandomShuffleQueueV2Capacity sets the optional capacity attribute to value.
-//
-// value: The upper bound on the number of elements in this queue.
-// Negative numbers mean no limit.
-// If not specified, defaults to -1
-func RandomShuffleQueueV2Capacity(value int64) RandomShuffleQueueV2Attr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// RandomShuffleQueueV2MinAfterDequeue sets the optional min_after_dequeue attribute to value.
-//
-// value: Dequeue will block unless there would be this
-// many elements after the dequeue or the queue is closed. This
-// ensures a minimum level of mixing of elements.
-// If not specified, defaults to 0
-func RandomShuffleQueueV2MinAfterDequeue(value int64) RandomShuffleQueueV2Attr {
-	return func(m optionalAttr) {
-		m["min_after_dequeue"] = value
-	}
+	return scope.AddOperation(opspec)
 }
 
-// RandomShuffleQueueV2Seed sets the optional seed attribute to value.
-//
-// value: If either seed or seed2 is set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, a random seed is used.
-// If not specified, defaults to 0
-func RandomShuffleQueueV2Seed(value int64) RandomShuffleQueueV2Attr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
+// QueueDequeueManyV2Attr is an optional argument to QueueDequeueManyV2.
+type QueueDequeueManyV2Attr func(optionalAttr)
 
-// RandomShuffleQueueV2Seed2 sets the optional seed2 attribute to value.
+// QueueDequeueManyV2TimeoutMs sets the optional timeout_ms attribute to value.
 //
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func RandomShuffleQueueV2Seed2(value int64) RandomShuffleQueueV2Attr {
+// value: If the queue has fewer than n elements, this operation
+// will block for up to timeout_ms milliseconds.
+// Note: This option is not supported yet.
+// If not specified, defaults to -1
+func QueueDequeueManyV2TimeoutMs(value int64) QueueDequeueManyV2Attr {
 	return func(m optionalAttr) {
-		m["seed2"] = value
+		m["timeout_ms"] = value
 	}
 }
 
-// RandomShuffleQueueV2Container sets the optional container attribute to value.
+// Dequeues `n` tuples of one or more tensors from the given queue.
 //
-// value: If non-empty, this queue is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func RandomShuffleQueueV2Container(value string) RandomShuffleQueueV2Attr {
-	return func(m optionalAttr) {
-		m["container"] = value
+// If the queue is closed and there are fewer than `n` elements, then an
+// OutOfRange error is returned.
+//
+// This operation concatenates queue-element component tensors along the
+// 0th dimension to make a single component tensor.  All of the components
+// in the dequeued tuple will have size `n` in the 0th dimension.
+//
+// This operation has `k` outputs, where `k` is the number of components in
+// the tuples stored in the given queue, and output `i` is the ith
+// component of the dequeued tuple.
+//
+// N.B. If the queue is empty, this operation will block until `n` elements
+// have been dequeued (or 'timeout_ms' elapses, if specified).
+//
+// Arguments:
+//	handle: The handle to a queue.
+//	n: The number of tuples to dequeue.
+//	component_types: The type of each component in a tuple.
+//
+// Returns One or more tensors that were dequeued as a tuple.
+func QueueDequeueManyV2(scope *Scope, handle tf.Output, n tf.Output, component_types []tf.DataType, optional ...QueueDequeueManyV2Attr) (components []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"component_types": component_types}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QueueDequeueManyV2",
+		Input: []tf.Input{
+			handle, n,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
 	}
+	var idx int
+	var err error
+	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
+		scope.UpdateErr("QueueDequeueManyV2", err)
+		return
+	}
+	return components
 }
 
-// RandomShuffleQueueV2SharedName sets the optional shared_name attribute to value.
+// EncodeBase64Attr is an optional argument to EncodeBase64.
+type EncodeBase64Attr func(optionalAttr)
+
+// EncodeBase64Pad sets the optional pad attribute to value.
 //
-// value: If non-empty, this queue will be shared under the given name
-// across multiple sessions.
-// If not specified, defaults to ""
-func RandomShuffleQueueV2SharedName(value string) RandomShuffleQueueV2Attr {
+// value: Bool whether padding is applied at the ends.
+// If not specified, defaults to false
+func EncodeBase64Pad(value bool) EncodeBase64Attr {
 	return func(m optionalAttr) {
-		m["shared_name"] = value
+		m["pad"] = value
 	}
 }
 
-// A queue that randomizes the order of elements.
+// Encode strings into web-safe base64 format.
+//
+// Refer to the following article for more information on base64 format:
+// en.wikipedia.org/wiki/Base64. Base64 strings may have padding with '=' at the
+// end so that the encoded has length multiple of 4. See Padding section of the
+// link above.
+//
+// Web-safe means that the encoder uses - and _ instead of + and /.
 //
 // Arguments:
-//	component_types: The type of each component in a value.
+//	input: Strings to be encoded.
 //
-// Returns The handle to the queue.
-func RandomShuffleQueueV2(scope *Scope, component_types []tf.DataType, optional ...RandomShuffleQueueV2Attr) (handle tf.Output) {
+// Returns Input strings encoded in base64.
+func EncodeBase64(scope *Scope, input tf.Output, optional ...EncodeBase64Attr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"component_types": component_types}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "RandomShuffleQueueV2",
-
+		Type: "EncodeBase64",
+		Input: []tf.Input{
+			input,
+		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Outputs a `Summary` protocol buffer with scalar values.
-//
-// The input `tags` and `values` must have the same shape.  The generated summary
-// has a summary value for each tag-value pair in `tags` and `values`.
-//
-// Arguments:
-//	tags: Tags for the summary.
-//	values: Same shape as `tags.  Values for the summary.
+// Deprecated. Use TensorArrayCloseV3
 //
-// Returns Scalar.  Serialized `Summary` protocol buffer.
-func ScalarSummary(scope *Scope, tags tf.Output, values tf.Output) (summary tf.Output) {
+// Returns the created operation.
+func TensorArrayCloseV2(scope *Scope, handle tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "ScalarSummary",
+		Type: "TensorArrayCloseV2",
 		Input: []tf.Input{
-			tags, values,
+			handle,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Computes inverse hyperbolic tangent of x element-wise.
+func Atanh(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Atanh",
+		Input: []tf.Input{
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns the truth value of (x <= y) element-wise.
+// Returns true if queue is closed.
 //
-// *NOTE*: `LessEqual` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func LessEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// This operation returns true if the queue is closed and false if the queue
+// is open.
+//
+// Arguments:
+//	handle: The handle to a queue.
+func QueueIsClosedV2(scope *Scope, handle tf.Output) (is_closed tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "LessEqual",
+		Type: "QueueIsClosedV2",
 		Input: []tf.Input{
-			x, y,
+			handle,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes log softmax activations.
+// Returns the batched diagonal part of a batched tensor.
 //
-// For each batch `i` and class `j` we have
+// This operation returns a tensor with the `diagonal` part
+// of the batched `input`. The `diagonal` part is computed as follows:
 //
-//     logsoftmax[i, j] = logits[i, j] - log(sum(exp(logits[i])))
+// Assume `input` has `k` dimensions `[I, J, K, ..., M, N]`, then the output is a
+// tensor of rank `k - 1` with dimensions `[I, J, K, ..., min(M, N)]` where:
+//
+// `diagonal[i, j, k, ..., n] = input[i, j, k, ..., n, n]`.
+//
+// The input must be at least a matrix.
+//
+// For example:
+//
+// ```
+// # 'input' is [[[1, 0, 0, 0]
+//                [0, 2, 0, 0]
+//                [0, 0, 3, 0]
+//                [0, 0, 0, 4]],
+//               [[5, 0, 0, 0]
+//                [0, 6, 0, 0]
+//                [0, 0, 7, 0]
+//                [0, 0, 0, 8]]]
+//
+// and input.shape = (2, 4, 4)
+//
+// tf.matrix_diag_part(input) ==> [[1, 2, 3, 4], [5, 6, 7, 8]]
+//
+// which has shape (2, 4)
+// ```
 //
 // Arguments:
-//	logits: 2-D with shape `[batch_size, num_classes]`.
+//	input: Rank `k` tensor where `k >= 2`.
 //
-// Returns Same shape as `logits`.
-func LogSoftmax(scope *Scope, logits tf.Output) (logsoftmax tf.Output) {
+// Returns The extracted diagonal(s) having shape
+// `diagonal.shape = input.shape[:-2] + [min(input.shape[-2:])]`.
+func MatrixDiagPart(scope *Scope, input tf.Output) (diagonal tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "LogSoftmax",
+		Type: "MatrixDiagPart",
 		Input: []tf.Input{
-			logits,
+			input,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Given a quantized tensor described by (input, input_min, input_max), outputs a
-//
-// range that covers the actual values present in that tensor.  This op is
-// typically used to produce the requested_output_min and requested_output_max for
-// Requantize.
-//
-// Arguments:
-//
-//	input_min: The float value that the minimum quantized input value represents.
-//	input_max: The float value that the maximum quantized input value represents.
+// Computes the absolute value of a tensor.
 //
-// Returns The computed min output.the computed max output.
-func RequantizationRange(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output) (output_min tf.Output, output_max tf.Output) {
+// Given a tensor `x`, this operation returns a tensor containing the absolute
+// value of each element in `x`. For example, if x is an input element and y is
+// an output element, this operation computes \\(y = |x|\\).
+func Abs(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "RequantizationRange",
+		Type: "Abs",
 		Input: []tf.Input{
-			input, input_min, input_max,
+			x,
 		},
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// OrderedMapStageAttr is an optional argument to OrderedMapStage.
+type OrderedMapStageAttr func(optionalAttr)
+
+// OrderedMapStageCapacity sets the optional capacity attribute to value.
+//
+// value: Maximum number of elements in the Staging Area. If > 0, inserts
+// on the container will block when the capacity is reached.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func OrderedMapStageCapacity(value int64) OrderedMapStageAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// OrderedMapStageMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func OrderedMapStageMemoryLimit(value int64) OrderedMapStageAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// OrderedMapStageContainer sets the optional container attribute to value.
+//
+// value: If non-empty, this queue is placed in the given container. Otherwise,
+// a default container is used.
+// If not specified, defaults to ""
+func OrderedMapStageContainer(value string) OrderedMapStageAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
 }
 
-// Says whether the targets are in the top `K` predictions.
+// OrderedMapStageSharedName sets the optional shared_name attribute to value.
 //
-// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the
-// prediction for the target class is among the top `k` predictions among
-// all predictions for example `i`. Note that the behavior of `InTopK` differs
-// from the `TopK` op in its handling of ties; if multiple classes have the
-// same prediction value and straddle the top-`k` boundary, all of those
-// classes are considered to be in the top `k`.
+// value: It is necessary to match this name to the matching Unstage Op.
+// If not specified, defaults to ""
+func OrderedMapStageSharedName(value string) OrderedMapStageAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Stage (key, values) in the underlying container which behaves like a ordered
 //
-// More formally, let
+// associative container.   Elements are ordered by key.
 //
-//   \\(predictions_i\\) be the predictions for all classes for example `i`,
-//   \\(targets_i\\) be the target class for example `i`,
-//   \\(out_i\\) be the output for example `i`,
+// Arguments:
+//	key: int64
 //
-// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$
+//	values: a list of tensors
+// dtypes A list of data types that inserted values should adhere to.
 //
-// Arguments:
-//	predictions: A `batch_size` x `classes` tensor.
-//	targets: A `batch_size` vector of class ids.
-//	k: Number of top elements to look at for computing precision.
 //
-// Returns Computed Precision at `k` as a `bool Tensor`.
-func InTopK(scope *Scope, predictions tf.Output, targets tf.Output, k int64) (precision tf.Output) {
+// Returns the created operation.
+func OrderedMapStage(scope *Scope, key tf.Output, indices tf.Output, values []tf.Output, dtypes []tf.DataType, optional ...OrderedMapStageAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"k": k}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "InTopK",
+		Type: "OrderedMapStage",
 		Input: []tf.Input{
-			predictions, targets,
+			key, indices, tf.OutputList(values),
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// Returns a batched diagonal tensor with a given batched diagonal values.
-//
-// Given a `diagonal`, this operation returns a tensor with the `diagonal` and
-// everything else padded with zeros. The diagonal is computed as follows:
-//
-// Assume `diagonal` has `k` dimensions `[I, J, K, ..., N]`, then the output is a
-// tensor of rank `k+1` with dimensions [I, J, K, ..., N, N]` where:
-//
-// `output[i, j, k, ..., m, n] = 1{m=n} * diagonal[i, j, k, ..., n]`.
-//
-// For example:
-//
-// ```
-// # 'diagonal' is [[1, 2, 3, 4], [5, 6, 7, 8]]
-//
-// and diagonal.shape = (2, 4)
-//
-// tf.matrix_diag(diagonal) ==> [[[1, 0, 0, 0]
-//                                      [0, 2, 0, 0]
-//                                      [0, 0, 3, 0]
-//                                      [0, 0, 0, 4]],
-//                                     [[5, 0, 0, 0]
-//                                      [0, 6, 0, 0]
-//                                      [0, 0, 7, 0]
-//                                      [0, 0, 0, 8]]]
+// StackPushV2Attr is an optional argument to StackPushV2.
+type StackPushV2Attr func(optionalAttr)
+
+// StackPushV2SwapMemory sets the optional swap_memory attribute to value.
 //
-// which has shape (2, 4, 4)
-// ```
+// value: Swap `elem` to CPU. Default to false.
+// If not specified, defaults to false
+func StackPushV2SwapMemory(value bool) StackPushV2Attr {
+	return func(m optionalAttr) {
+		m["swap_memory"] = value
+	}
+}
+
+// Push an element onto the stack.
 //
 // Arguments:
-//	diagonal: Rank `k`, where `k >= 1`.
+//	handle: The handle to a stack.
+//	elem: The tensor to be pushed onto the stack.
 //
-// Returns Rank `k+1`, with `output.shape = diagonal.shape + [diagonal.shape[-1]]`.
-func MatrixDiag(scope *Scope, diagonal tf.Output) (output tf.Output) {
+// Returns The same tensor as the input 'elem'.
+func StackPushV2(scope *Scope, handle tf.Output, elem tf.Output, optional ...StackPushV2Attr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "MatrixDiag",
+		Type: "StackPushV2",
 		Input: []tf.Input{
-			diagonal,
+			handle, elem,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// MaxPool3DAttr is an optional argument to MaxPool3D.
-type MaxPool3DAttr func(optionalAttr)
+// FusedBatchNormGradV2Attr is an optional argument to FusedBatchNormGradV2.
+type FusedBatchNormGradV2Attr func(optionalAttr)
 
-// MaxPool3DDataFormat sets the optional data_format attribute to value.
+// FusedBatchNormGradV2Epsilon sets the optional epsilon attribute to value.
 //
-// value: The data format of the input and output data. With the
-// default format "NDHWC", the data is stored in the order of:
-//     [batch, in_depth, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCDHW", the data storage order is:
-//     [batch, in_channels, in_depth, in_height, in_width].
-// If not specified, defaults to "NDHWC"
-func MaxPool3DDataFormat(value string) MaxPool3DAttr {
+// value: A small float number added to the variance of x.
+// If not specified, defaults to 0.0001
+func FusedBatchNormGradV2Epsilon(value float32) FusedBatchNormGradV2Attr {
+	return func(m optionalAttr) {
+		m["epsilon"] = value
+	}
+}
+
+// FusedBatchNormGradV2DataFormat sets the optional data_format attribute to value.
+//
+// value: The data format for y_backprop, x, x_backprop.
+// Either "NHWC" (default) or "NCHW".
+// If not specified, defaults to "NHWC"
+func FusedBatchNormGradV2DataFormat(value string) FusedBatchNormGradV2Attr {
 	return func(m optionalAttr) {
 		m["data_format"] = value
 	}
 }
 
-// Performs 3D max pooling on the input.
+// FusedBatchNormGradV2IsTraining sets the optional is_training attribute to value.
+//
+// value: A bool value to indicate the operation is for training (default)
+// or inference.
+// If not specified, defaults to true
+func FusedBatchNormGradV2IsTraining(value bool) FusedBatchNormGradV2Attr {
+	return func(m optionalAttr) {
+		m["is_training"] = value
+	}
+}
+
+// Gradient for batch normalization.
+//
+// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
+// The size of 1D Tensors matches the dimension C of the 4D Tensors.
 //
 // Arguments:
-//	input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over.
-//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
-// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
+//	y_backprop: A 4D Tensor for the gradient with respect to y.
+//	x: A 4D Tensor for input data.
+//	scale: A 1D Tensor for scaling factor, to scale the normalized x.
+//	reserve_space_1: When is_training is True, a 1D Tensor for the computed batch
+// mean to be reused in gradient computation. When is_training is
+// False, a 1D Tensor for the population mean to be reused in both
+// 1st and 2nd order gradient computation.
+//	reserve_space_2: When is_training is True, a 1D Tensor for the computed batch
+// variance (inverted variance in the cuDNN case) to be reused in
+// gradient computation. When is_training is False, a 1D Tensor
+// for the population variance to be reused in both 1st and 2nd
+// order gradient computation.
 //
-// Returns The max pooled output tensor.
-func MaxPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DAttr) (output tf.Output) {
+// Returns A 4D Tensor for the gradient with respect to x.A 1D Tensor for the gradient with respect to scale.A 1D Tensor for the gradient with respect to offset.Unused placeholder to match the mean input in FusedBatchNorm.Unused placeholder to match the variance input
+// in FusedBatchNorm.
+func FusedBatchNormGradV2(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, optional ...FusedBatchNormGradV2Attr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_3 tf.Output, reserve_space_4 tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "MaxPool3D",
+		Type: "FusedBatchNormGradV2",
 		Input: []tf.Input{
-			input,
+			y_backprop, x, scale, reserve_space_1, reserve_space_2,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
 }
 
-// Returns x // y element-wise.
+// Creates a TensorArray for storing the gradients of values in the given handle.
 //
-// *NOTE*: `FloorDiv` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func FloorDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// If the given TensorArray gradient already exists, returns a reference to it.
+//
+// Locks the size of the original TensorArray by disabling its dynamic size flag.
+//
+// **A note about the input flow_in:**
+//
+// The handle flow_in forces the execution of the gradient lookup to occur
+// only after certain other operations have occurred.  For example, when
+// the forward TensorArray is dynamically sized, writes to this TensorArray
+// may resize the object.  The gradient TensorArray is statically sized based
+// on the size of the forward TensorArray when this operation executes.
+// Furthermore, the size of the forward TensorArray is frozen by this call.
+// As a result, the flow is used to ensure that the call to generate the gradient
+// TensorArray only happens after all writes are executed.
+//
+// In the case of dynamically sized TensorArrays, gradient computation should
+// only be performed on read operations that have themselves been chained via
+// flow to occur only after all writes have executed. That way the final size
+// of the forward TensorArray is known when this operation is called.
+//
+// **A note about the source attribute:**
+//
+// TensorArray gradient calls use an accumulator TensorArray object.  If
+// multiple gradients are calculated and run in the same session, the multiple
+// gradient nodes may accidentally flow through the same accumulator TensorArray.
+// This double counts and generally breaks the TensorArray gradient flow.
+//
+// The solution is to identify which gradient call this particular
+// TensorArray gradient is being called in.  This is performed by identifying
+// a unique string (e.g. "gradients", "gradients_1", ...) from the input
+// gradient Tensor's name.  This string is used as a suffix when creating
+// the TensorArray gradient object here (the attribute `source`).
+//
+// The attribute `source` is added as a suffix to the forward TensorArray's
+// name when performing the creation / lookup, so that each separate gradient
+// calculation gets its own TensorArray accumulator.
+//
+// Arguments:
+//	handle: The handle to the forward TensorArray.
+//	flow_in: A float scalar that enforces proper chaining of operations.
+//	source: The gradient source string, used to decide which gradient TensorArray
+// to return.
+func TensorArrayGradV3(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output, flow_out tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"source": source}
 	opspec := tf.OpSpec{
-		Type: "FloorDiv",
+		Type: "TensorArrayGradV3",
 		Input: []tf.Input{
-			x, y,
+			handle, flow_in,
 		},
+		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// TopKAttr is an optional argument to TopK.
-type TopKAttr func(optionalAttr)
-
-// TopKSorted sets the optional sorted attribute to value.
-//
-// value: If true the resulting `k` elements will be sorted by the values in
-// descending order.
-// If not specified, defaults to true
-func TopKSorted(value bool) TopKAttr {
-	return func(m optionalAttr) {
-		m["sorted"] = value
-	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
 }
 
-// Finds values and indices of the `k` largest elements for the last dimension.
-//
-// DEPRECATED at GraphDef version 7: Use TopKV2 instead
+// Compare values of `input` to `threshold` and pack resulting bits into a `uint8`.
 //
-// If the input is a vector (rank-1), finds the `k` largest entries in the vector
-// and outputs their values and indices as vectors.  Thus `values[j]` is the
-// `j`-th largest entry in `input`, and its index is `indices[j]`.
+// Each comparison returns a boolean `true` (if `input_value > threshold`)
+// or and `false` otherwise.
 //
-// For matrices (resp. higher rank input), computes the top `k` entries in each
-// row (resp. vector along the last dimension).  Thus,
+// This operation is useful for Locality-Sensitive-Hashing (LSH) and other
+// algorithms that use hashing approximations of cosine and `L2` distances;
+// codes can be generated from an input via:
 //
-//     values.shape = indices.shape = input.shape[:-1] + [k]
+// ```python
+// codebook_size = 50
+// codebook_bits = codebook_size * 32
+// codebook = tf.get_variable('codebook', [x.shape[-1].value, codebook_bits],
+//                            dtype=x.dtype,
+//                            initializer=tf.orthogonal_initializer())
+// codes = compare_and_threshold(tf.matmul(x, codebook), threshold=0.)
+// codes = tf.bitcast(codes, tf.int32)  # go from uint8 to int32
+// # now codes has shape x.shape[:-1] + [codebook_size]
+// ```
 //
-// If two elements are equal, the lower-index element appears first.
+// **NOTE**: Currently, the innermost dimension of the tensor must be divisible
+// by 8.
 //
-// If `k` varies dynamically, use `TopKV2` below.
+// Given an `input` shaped `[s0, s1, ..., s_n]`, the output is
+// a `uint8` tensor shaped `[s0, s1, ..., s_n / 8]`.
 //
 // Arguments:
-//	input: 1-D or higher with last dimension at least `k`.
-//	k: Number of top elements to look for along the last dimension (along each
-// row for matrices).
+//	input: Values to compare against `threshold` and bitpack.
+//	threshold: Threshold to compare against.
 //
-// Returns The `k` largest elements along each last dimensional slice.The indices of `values` within the last dimension of `input`.
-func TopK(scope *Scope, input tf.Output, k int64, optional ...TopKAttr) (values tf.Output, indices tf.Output) {
+// Returns The bitpacked comparisons.
+func CompareAndBitpack(scope *Scope, input tf.Output, threshold tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"k": k}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "TopK",
+		Type: "CompareAndBitpack",
 		Input: []tf.Input{
-			input,
+			input, threshold,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
-// TopKV2Attr is an optional argument to TopKV2.
-type TopKV2Attr func(optionalAttr)
-
-// TopKV2Sorted sets the optional sorted attribute to value.
+// Push an element onto the tensor_array.
 //
-// value: If true the resulting `k` elements will be sorted by the values in
-// descending order.
-// If not specified, defaults to true
-func TopKV2Sorted(value bool) TopKV2Attr {
-	return func(m optionalAttr) {
-		m["sorted"] = value
+// Arguments:
+//	handle: The handle to a TensorArray.
+//	index: The position to write to inside the TensorArray.
+//	value: The tensor to write to the TensorArray.
+//	flow_in: A float scalar that enforces proper chaining of operations.
+//
+// Returns A float scalar that enforces proper chaining of operations.
+func TensorArrayWriteV3(scope *Scope, handle tf.Output, index tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorArrayWriteV3",
+		Input: []tf.Input{
+			handle, index, value, flow_in,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Finds values and indices of the `k` largest elements for the last dimension.
-//
-// If the input is a vector (rank-1), finds the `k` largest entries in the vector
-// and outputs their values and indices as vectors.  Thus `values[j]` is the
-// `j`-th largest entry in `input`, and its index is `indices[j]`.
-//
-// For matrices (resp. higher rank input), computes the top `k` entries in each
-// row (resp. vector along the last dimension).  Thus,
-//
-//     values.shape = indices.shape = input.shape[:-1] + [k]
+// Scatter the data from the input value into specific TensorArray elements.
 //
-// If two elements are equal, the lower-index element appears first.
+// `indices` must be a vector, its length must match the first dim of `value`.
 //
 // Arguments:
-//	input: 1-D or higher with last dimension at least `k`.
-//	k: 0-D.  Number of top elements to look for along the last dimension (along each
-// row for matrices).
+//	handle: The handle to a TensorArray.
+//	indices: The locations at which to write the tensor elements.
+//	value: The concatenated tensor to write to the TensorArray.
+//	flow_in: A float scalar that enforces proper chaining of operations.
 //
-// Returns The `k` largest elements along each last dimensional slice.The indices of `values` within the last dimension of `input`.
-func TopKV2(scope *Scope, input tf.Output, k tf.Output, optional ...TopKV2Attr) (values tf.Output, indices tf.Output) {
+// Returns A float scalar that enforces proper chaining of operations.
+func TensorArrayScatterV3(scope *Scope, handle tf.Output, indices tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "TopKV2",
+		Type: "TensorArrayScatterV3",
 		Input: []tf.Input{
-			input, k,
+			handle, indices, value, flow_in,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
-// RandomCropAttr is an optional argument to RandomCrop.
-type RandomCropAttr func(optionalAttr)
+// TensorArrayConcatV3Attr is an optional argument to TensorArrayConcatV3.
+type TensorArrayConcatV3Attr func(optionalAttr)
 
-// RandomCropSeed sets the optional seed attribute to value.
+// TensorArrayConcatV3ElementShapeExcept0 sets the optional element_shape_except0 attribute to value.
 //
-// value: If either seed or seed2 are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func RandomCropSeed(value int64) RandomCropAttr {
+// value: The expected shape of an element, if known,
+// excluding the first dimension. Used to validate the shapes of
+// TensorArray elements. If this shape is not fully specified, concatenating
+// zero-size TensorArrays is an error.
+// If not specified, defaults to <unknown_rank:true >
+func TensorArrayConcatV3ElementShapeExcept0(value tf.Shape) TensorArrayConcatV3Attr {
 	return func(m optionalAttr) {
-		m["seed"] = value
+		m["element_shape_except0"] = value
 	}
 }
 
-// RandomCropSeed2 sets the optional seed2 attribute to value.
+// Concat the elements from the TensorArray into value `value`.
 //
-// value: An second seed to avoid seed collision.
-// If not specified, defaults to 0
-func RandomCropSeed2(value int64) RandomCropAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Randomly crop `image`.
+// Takes `T` elements of shapes
 //
-// DEPRECATED at GraphDef version 8: Random crop is now pure Python
+//   ```
+//   (n0 x d0 x d1 x ...), (n1 x d0 x d1 x ...), ..., (n(T-1) x d0 x d1 x ...)
+//   ```
 //
-// `size` is a 1-D int64 tensor with 2 elements representing the crop height and
-// width.  The values must be non negative.
+// and concatenates them into a Tensor of shape:
 //
-// This Op picks a random location in `image` and crops a `height` by `width`
-// rectangle from that location.  The random location is picked so the cropped
-// area will fit inside the original image.
+//   ```(n0 + n1 + ... + n(T-1) x d0 x d1 x ...)```
+//
+// All elements must have the same shape (excepting the first dimension).
 //
 // Arguments:
-//	image: 3-D of shape `[height, width, channels]`.
-//	size: 1-D of length 2 containing: `crop_height`, `crop_width`..
+//	handle: The handle to a TensorArray.
+//	flow_in: A float scalar that enforces proper chaining of operations.
+//	dtype: The type of the elem that is returned.
 //
-// Returns 3-D of shape `[crop_height, crop_width, channels].`
-func RandomCrop(scope *Scope, image tf.Output, size tf.Output, optional ...RandomCropAttr) (output tf.Output) {
+// Returns All of the elements in the TensorArray, concatenated along the first
+// axis.A vector of the row sizes of the original T elements in the
+// value output.  In the example above, this would be the values:
+// `(n1, n2, ..., n(T-1))`.
+func TensorArrayConcatV3(scope *Scope, handle tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayConcatV3Attr) (value tf.Output, lengths tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"dtype": dtype}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "RandomCrop",
+		Type: "TensorArrayConcatV3",
 		Input: []tf.Input{
-			image, size,
+			handle, flow_in,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// FractionalAvgPoolAttr is an optional argument to FractionalAvgPool.
-type FractionalAvgPoolAttr func(optionalAttr)
-
-// FractionalAvgPoolPseudoRandom sets the optional pseudo_random attribute to value.
-//
-// value: When set to True, generates the pooling sequence in a
-// pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin
-// Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) for
-// difference between pseudorandom and random.
-// If not specified, defaults to false
-func FractionalAvgPoolPseudoRandom(value bool) FractionalAvgPoolAttr {
-	return func(m optionalAttr) {
-		m["pseudo_random"] = value
-	}
-}
-
-// FractionalAvgPoolOverlapping sets the optional overlapping attribute to value.
-//
-// value: When set to True, it means when pooling, the values at the boundary
-// of adjacent pooling cells are used by both cells. For example:
-//
-// `index  0  1  2  3  4`
-//
-// `value  20 5  16 3  7`
-//
-// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
-// The result would be [41/3, 26/3] for fractional avg pooling.
-// If not specified, defaults to false
-func FractionalAvgPoolOverlapping(value bool) FractionalAvgPoolAttr {
-	return func(m optionalAttr) {
-		m["overlapping"] = value
-	}
+	return op.Output(0), op.Output(1)
 }
 
-// FractionalAvgPoolDeterministic sets the optional deterministic attribute to value.
-//
-// value: When set to True, a fixed pooling region will be used when
-// iterating over a FractionalAvgPool node in the computation graph. Mainly used
-// in unit test to make FractionalAvgPool deterministic.
-// If not specified, defaults to false
-func FractionalAvgPoolDeterministic(value bool) FractionalAvgPoolAttr {
-	return func(m optionalAttr) {
-		m["deterministic"] = value
-	}
-}
+// ParameterizedTruncatedNormalAttr is an optional argument to ParameterizedTruncatedNormal.
+type ParameterizedTruncatedNormalAttr func(optionalAttr)
 
-// FractionalAvgPoolSeed sets the optional seed attribute to value.
+// ParameterizedTruncatedNormalSeed sets the optional seed attribute to value.
 //
-// value: If either seed or seed2 are set to be non-zero, the random number
+// value: If either `seed` or `seed2` are set to be non-zero, the random number
 // generator is seeded by the given seed.  Otherwise, it is seeded by a
 // random seed.
 // If not specified, defaults to 0
-func FractionalAvgPoolSeed(value int64) FractionalAvgPoolAttr {
+func ParameterizedTruncatedNormalSeed(value int64) ParameterizedTruncatedNormalAttr {
 	return func(m optionalAttr) {
 		m["seed"] = value
 	}
 }
 
-// FractionalAvgPoolSeed2 sets the optional seed2 attribute to value.
+// ParameterizedTruncatedNormalSeed2 sets the optional seed2 attribute to value.
 //
-// value: An second seed to avoid seed collision.
+// value: A second seed to avoid seed collision.
 // If not specified, defaults to 0
-func FractionalAvgPoolSeed2(value int64) FractionalAvgPoolAttr {
+func ParameterizedTruncatedNormalSeed2(value int64) ParameterizedTruncatedNormalAttr {
 	return func(m optionalAttr) {
 		m["seed2"] = value
 	}
 }
 
-// Performs fractional average pooling on the input.
-//
-// Fractional average pooling is similar to Fractional max pooling in the pooling
-// region generation step. The only difference is that after pooling regions are
-// generated, a mean operation is performed instead of a max operation in each
-// pooling region.
-//
-// Arguments:
-//	value: 4-D with shape `[batch, height, width, channels]`.
-//	pooling_ratio: Pooling ratio for each dimension of `value`, currently only
-// supports row and col dimension and should be >= 1.0. For example, a valid
-// pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements
-// must be 1.0 because we don't allow pooling on batch and channels
-// dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions
-// respectively.
+// Outputs random values from a normal distribution. The parameters may each be a
 //
-// Returns output tensor after fractional avg pooling.row pooling sequence, needed to calculate gradient.column pooling sequence, needed to calculate gradient.
-func FractionalAvgPool(scope *Scope, value tf.Output, pooling_ratio []float32, optional ...FractionalAvgPoolAttr) (output tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output) {
+// scalar which applies to the entire output, or a vector of length shape[0] which
+// stores the parameters for each batch.
+//
+// Arguments:
+//	shape: The shape of the output tensor. Batches are indexed by the 0th dimension.
+//	means: The mean parameter of each batch.
+//	stdevs: The standard deviation parameter of each batch. Must be greater than 0.
+//	minvals: The minimum cutoff. May be -infinity.
+//	maxvals: The maximum cutoff. May be +infinity, and must be more than the minval
+// for each batch.
+//
+// Returns A matrix of shape num_batches x samples_per_batch, filled with random
+// truncated normal values using the parameters for each row.
+func ParameterizedTruncatedNormal(scope *Scope, shape tf.Output, means tf.Output, stdevs tf.Output, minvals tf.Output, maxvals tf.Output, optional ...ParameterizedTruncatedNormalAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"pooling_ratio": pooling_ratio}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "FractionalAvgPool",
+		Type: "ParameterizedTruncatedNormal",
 		Input: []tf.Input{
-			value,
+			shape, means, stdevs, minvals, maxvals,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// Updates the table to associates keys with values.
+// Returns a diagonal tensor with a given diagonal values.
 //
-// The tensor `keys` must be of the same type as the keys of the table.
-// The tensor `values` must be of the type of the table values.
+// Given a `diagonal`, this operation returns a tensor with the `diagonal` and
+// everything else padded with zeros. The diagonal is computed as follows:
 //
-// Arguments:
-//	table_handle: Handle to the table.
-//	keys: Any shape.  Keys to look up.
-//	values: Values to associate with keys.
+// Assume `diagonal` has dimensions [D1,..., Dk], then the output is a tensor of
+// rank 2k with dimensions [D1,..., Dk, D1,..., Dk] where:
 //
-// Returns the created operation.
-func LookupTableInsertV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) {
+// `output[i1,..., ik, i1,..., ik] = diagonal[i1, ..., ik]` and 0 everywhere else.
+//
+// For example:
+//
+// ```
+// # 'diagonal' is [1, 2, 3, 4]
+// tf.diag(diagonal) ==> [[1, 0, 0, 0]
+//                        [0, 2, 0, 0]
+//                        [0, 0, 3, 0]
+//                        [0, 0, 0, 4]]
+// ```
+//
+// Arguments:
+//	diagonal: Rank k tensor where k is at most 1.
+func Diag(scope *Scope, diagonal tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "LookupTableInsertV2",
+		Type: "Diag",
 		Input: []tf.Input{
-			table_handle, keys, values,
+			diagonal,
 		},
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Produces the average pool of the input tensor for quantized types.
+// Split the data from the input value into TensorArray elements.
+//
+// Assuming that `lengths` takes on values
+//
+//   ```(n0, n1, ..., n(T-1))```
+//
+// and that `value` has shape
+//
+//   ```(n0 + n1 + ... + n(T-1) x d0 x d1 x ...)```,
+//
+// this splits values into a TensorArray with T tensors.
+//
+// TensorArray index t will be the subtensor of values with starting position
+//
+//   ```(n0 + n1 + ... + n(t-1), 0, 0, ...)```
+//
+// and having size
+//
+//   ```nt x d0 x d1 x ...```
 //
 // Arguments:
-//	input: 4-D with shape `[batch, height, width, channels]`.
-//	min_input: The float value that the lowest quantized input value represents.
-//	max_input: The float value that the highest quantized input value represents.
-//	ksize: The size of the window for each dimension of the input tensor.
-// The length must be 4 to match the number of dimensions of the input.
-//	strides: The stride of the sliding window for each dimension of the input
-// tensor.  The length must be 4 to match the number of dimensions of the input.
-//	padding: The type of padding algorithm to use.
+//	handle: The handle to a TensorArray.
+//	value: The concatenated tensor to write to the TensorArray.
+//	lengths: The vector of lengths, how to split the rows of value into the
+// TensorArray.
+//	flow_in: A float scalar that enforces proper chaining of operations.
 //
-// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
-func QuantizedAvgPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) {
+// Returns A float scalar that enforces proper chaining of operations.
+func TensorArraySplitV3(scope *Scope, handle tf.Output, value tf.Output, lengths tf.Output, flow_in tf.Output) (flow_out tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
 	opspec := tf.OpSpec{
-		Type: "QuantizedAvgPool",
+		Type: "TensorArraySplitV3",
 		Input: []tf.Input{
-			input, min_input, max_input,
+			handle, value, lengths, flow_in,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// Adds Tensor 'bias' to Tensor 'input' for Quantized types.
+// SerializeSparseAttr is an optional argument to SerializeSparse.
+type SerializeSparseAttr func(optionalAttr)
+
+// SerializeSparseOutType sets the optional out_type attribute to value.
 //
-// Broadcasts the values of bias on dimensions 0..N-2 of 'input'.
+// value: The `dtype` to use for serialization; the supported types are `string`
+// (default) and `variant`.
+// If not specified, defaults to DT_STRING
+func SerializeSparseOutType(value tf.DataType) SerializeSparseAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Serialize a `SparseTensor` into a `[3]` `Tensor` object.
 //
 // Arguments:
-//
-//	bias: A 1D bias Tensor with size matching the last dimension of 'input'.
-//	min_input: The float value that the lowest quantized input value represents.
-//	max_input: The float value that the highest quantized input value represents.
-//	min_bias: The float value that the lowest quantized bias value represents.
-//	max_bias: The float value that the highest quantized bias value represents.
-//
-//
-// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
-func QuantizedBiasAdd(scope *Scope, input tf.Output, bias tf.Output, min_input tf.Output, max_input tf.Output, min_bias tf.Output, max_bias tf.Output, out_type tf.DataType) (output tf.Output, min_out tf.Output, max_out tf.Output) {
+//	sparse_indices: 2-D.  The `indices` of the `SparseTensor`.
+//	sparse_values: 1-D.  The `values` of the `SparseTensor`.
+//	sparse_shape: 1-D.  The `shape` of the `SparseTensor`.
+func SerializeSparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeSparseAttr) (serialized_sparse tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"out_type": out_type}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "QuantizedBiasAdd",
+		Type: "SerializeSparse",
 		Input: []tf.Input{
-			input, bias, min_input, max_input, min_bias, max_bias,
+			sparse_indices, sparse_values, sparse_shape,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// Creates summary database writer accessible by given resource handle.
-//
-// This can be used to write tensors from the execution graph directly
-// to a database. Only SQLite is supported right now. This function
-// will create the schema if it doesn't exist. Entries in the Users,
-// Experiments, and Runs tables will be created automatically if they
-// don't already exist.
+// RandomShuffleQueueV2Attr is an optional argument to RandomShuffleQueueV2.
+type RandomShuffleQueueV2Attr func(optionalAttr)
+
+// RandomShuffleQueueV2Shapes sets the optional shapes attribute to value.
 //
-// Arguments:
-//	writer: Handle to SummaryWriter resource to overwrite.
-//	db_uri: For example "file:/tmp/foo.sqlite".
-//	experiment_name: Can't contain ASCII control characters or <>. Case
-// sensitive. If empty, then the Run will not be associated with any
-// Experiment.
-//	run_name: Can't contain ASCII control characters or <>. Case sensitive.
-// If empty, then each Tag will not be associated with any Run.
-//	user_name: Must be valid as both a DNS label and Linux username. If
-// empty, then the Experiment will not be associated with any User.
+// value: The shape of each component in a value. The length of this attr must
+// be either 0 or the same as the length of component_types. If the length of
+// this attr is 0, the shapes of queue elements are not constrained, and
+// only one element may be dequeued at a time.
+// If not specified, defaults to <>
 //
-// Returns the created operation.
-func CreateSummaryDbWriter(scope *Scope, writer tf.Output, db_uri tf.Output, experiment_name tf.Output, run_name tf.Output, user_name tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
+// REQUIRES: len(value) >= 0
+func RandomShuffleQueueV2Shapes(value []tf.Shape) RandomShuffleQueueV2Attr {
+	return func(m optionalAttr) {
+		m["shapes"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "CreateSummaryDbWriter",
-		Input: []tf.Input{
-			writer, db_uri, experiment_name, run_name, user_name,
-		},
+}
+
+// RandomShuffleQueueV2Capacity sets the optional capacity attribute to value.
+//
+// value: The upper bound on the number of elements in this queue.
+// Negative numbers mean no limit.
+// If not specified, defaults to -1
+func RandomShuffleQueueV2Capacity(value int64) RandomShuffleQueueV2Attr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
 	}
-	return scope.AddOperation(opspec)
 }
 
-// HistogramFixedWidthAttr is an optional argument to HistogramFixedWidth.
-type HistogramFixedWidthAttr func(optionalAttr)
+// RandomShuffleQueueV2MinAfterDequeue sets the optional min_after_dequeue attribute to value.
+//
+// value: Dequeue will block unless there would be this
+// many elements after the dequeue or the queue is closed. This
+// ensures a minimum level of mixing of elements.
+// If not specified, defaults to 0
+func RandomShuffleQueueV2MinAfterDequeue(value int64) RandomShuffleQueueV2Attr {
+	return func(m optionalAttr) {
+		m["min_after_dequeue"] = value
+	}
+}
 
-// HistogramFixedWidthDtype sets the optional dtype attribute to value.
-// If not specified, defaults to DT_INT32
-func HistogramFixedWidthDtype(value tf.DataType) HistogramFixedWidthAttr {
+// RandomShuffleQueueV2Seed sets the optional seed attribute to value.
+//
+// value: If either seed or seed2 is set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, a random seed is used.
+// If not specified, defaults to 0
+func RandomShuffleQueueV2Seed(value int64) RandomShuffleQueueV2Attr {
 	return func(m optionalAttr) {
-		m["dtype"] = value
+		m["seed"] = value
 	}
 }
 
-// Return histogram of values.
+// RandomShuffleQueueV2Seed2 sets the optional seed2 attribute to value.
 //
-// Given the tensor `values`, this operation returns a rank 1 histogram counting
-// the number of entries in `values` that fall into every bin.  The bins are
-// equal width and determined by the arguments `value_range` and `nbins`.
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func RandomShuffleQueueV2Seed2(value int64) RandomShuffleQueueV2Attr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// RandomShuffleQueueV2Container sets the optional container attribute to value.
 //
-// ```python
-// # Bins will be:  (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
-// nbins = 5
-// value_range = [0.0, 5.0]
-// new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]
+// value: If non-empty, this queue is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func RandomShuffleQueueV2Container(value string) RandomShuffleQueueV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// RandomShuffleQueueV2SharedName sets the optional shared_name attribute to value.
 //
-// with tf.get_default_session() as sess:
-//   hist = tf.histogram_fixed_width(new_values, value_range, nbins=5)
-//   variables.global_variables_initializer().run()
-//   sess.run(hist) => [2, 1, 1, 0, 2]
-// ```
+// value: If non-empty, this queue will be shared under the given name
+// across multiple sessions.
+// If not specified, defaults to ""
+func RandomShuffleQueueV2SharedName(value string) RandomShuffleQueueV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// A queue that randomizes the order of elements.
 //
 // Arguments:
-//	values: Numeric `Tensor`.
-//	value_range: Shape [2] `Tensor` of same `dtype` as `values`.
-// values <= value_range[0] will be mapped to hist[0],
-// values >= value_range[1] will be mapped to hist[-1].
-//	nbins: Scalar `int32 Tensor`.  Number of histogram bins.
+//	component_types: The type of each component in a value.
 //
-// Returns A 1-D `Tensor` holding histogram of values.
-func HistogramFixedWidth(scope *Scope, values tf.Output, value_range tf.Output, nbins tf.Output, optional ...HistogramFixedWidthAttr) (out tf.Output) {
+// Returns The handle to the queue.
+func RandomShuffleQueueV2(scope *Scope, component_types []tf.DataType, optional ...RandomShuffleQueueV2Attr) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"component_types": component_types}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "HistogramFixedWidth",
-		Input: []tf.Input{
-			values, value_range, nbins,
-		},
+		Type: "RandomShuffleQueueV2",
+
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Quantized Batch normalization.
+// Draw bounding boxes on a batch of images.
 //
-// This op is deprecated and will be removed in the future. Prefer
-// `tf.nn.batch_normalization`.
+// Outputs a copy of `images` but draws on top of the pixels zero or more bounding
+// boxes specified by the locations in `boxes`. The coordinates of the each
+// bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. The
+// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
+// height of the underlying image.
+//
+// For example, if an image is 100 x 200 pixels (height x width) and the bounding
+// box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of
+// the bounding box will be `(40, 10)` to `(100, 50)` (in (x,y) coordinates).
+//
+// Parts of the bounding box may fall outside the image.
 //
 // Arguments:
-//	t: A 4D input Tensor.
-//	t_min: The value represented by the lowest quantized input.
-//	t_max: The value represented by the highest quantized input.
-//	m: A 1D mean Tensor with size matching the last dimension of t.
-// This is the first output from tf.nn.moments,
-// or a saved moving average thereof.
-//	m_min: The value represented by the lowest quantized mean.
-//	m_max: The value represented by the highest quantized mean.
-//	v: A 1D variance Tensor with size matching the last dimension of t.
-// This is the second output from tf.nn.moments,
-// or a saved moving average thereof.
-//	v_min: The value represented by the lowest quantized variance.
-//	v_max: The value represented by the highest quantized variance.
-//	beta: A 1D beta Tensor with size matching the last dimension of t.
-// An offset to be added to the normalized tensor.
-//	beta_min: The value represented by the lowest quantized offset.
-//	beta_max: The value represented by the highest quantized offset.
-//	gamma: A 1D gamma Tensor with size matching the last dimension of t.
-// If "scale_after_normalization" is true, this tensor will be multiplied
-// with the normalized tensor.
-//	gamma_min: The value represented by the lowest quantized gamma.
-//	gamma_max: The value represented by the highest quantized gamma.
+//	images: 4-D with shape `[batch, height, width, depth]`. A batch of images.
+//	boxes: 3-D with shape `[batch, num_bounding_boxes, 4]` containing bounding
+// boxes.
 //
-//	variance_epsilon: A small float number to avoid dividing by 0.
-//	scale_after_normalization: A bool indicating whether the resulted tensor
-// needs to be multiplied with gamma.
-func QuantizedBatchNormWithGlobalNormalization(scope *Scope, t tf.Output, t_min tf.Output, t_max tf.Output, m tf.Output, m_min tf.Output, m_max tf.Output, v tf.Output, v_min tf.Output, v_max tf.Output, beta tf.Output, beta_min tf.Output, beta_max tf.Output, gamma tf.Output, gamma_min tf.Output, gamma_max tf.Output, out_type tf.DataType, variance_epsilon float32, scale_after_normalization bool) (result tf.Output, result_min tf.Output, result_max tf.Output) {
+// Returns 4-D with the same shape as `images`. The batch of input images with
+// bounding boxes drawn on the images.
+func DrawBoundingBoxes(scope *Scope, images tf.Output, boxes tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"out_type": out_type, "variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization}
 	opspec := tf.OpSpec{
-		Type: "QuantizedBatchNormWithGlobalNormalization",
+		Type: "DrawBoundingBoxes",
 		Input: []tf.Input{
-			t, t_min, t_max, m, m_min, m_max, v, v_min, v_max, beta, beta_min, beta_max, gamma, gamma_min, gamma_max,
+			images, boxes,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// Add all input tensors element wise.
+// LearnedUnigramCandidateSamplerAttr is an optional argument to LearnedUnigramCandidateSampler.
+type LearnedUnigramCandidateSamplerAttr func(optionalAttr)
+
+// LearnedUnigramCandidateSamplerSeed sets the optional seed attribute to value.
 //
-// Arguments:
-//	inputs: Must all be the same size and shape.
-func AddN(scope *Scope, inputs []tf.Output) (sum tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "AddN",
-		Input: []tf.Input{
-			tf.OutputList(inputs),
-		},
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func LearnedUnigramCandidateSamplerSeed(value int64) LearnedUnigramCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// MaxAttr is an optional argument to Max.
-type MaxAttr func(optionalAttr)
-
-// MaxKeepDims sets the optional keep_dims attribute to value.
+// LearnedUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value.
 //
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func MaxKeepDims(value bool) MaxAttr {
+// value: An second seed to avoid seed collision.
+// If not specified, defaults to 0
+func LearnedUnigramCandidateSamplerSeed2(value int64) LearnedUnigramCandidateSamplerAttr {
 	return func(m optionalAttr) {
-		m["keep_dims"] = value
+		m["seed2"] = value
 	}
 }
 
-// Computes the maximum of elements across dimensions of a tensor.
+// Generates labels for candidate sampling with a learned unigram distribution.
 //
-// Reduces `input` along the dimensions given in `reduction_indices`. Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `reduction_indices`. If `keep_dims` is true, the reduced dimensions are
-// retained with length 1.
+// See explanations of candidate sampling and the data formats at
+// go/candidate-sampling.
+//
+// For each batch, this op picks a single set of sampled candidate labels.
+//
+// The advantages of sampling candidates per-batch are simplicity and the
+// possibility of efficient dense matrix multiplication. The disadvantage is that
+// the sampled candidates must be chosen independently of the context and of the
+// true labels.
 //
 // Arguments:
-//	input: The tensor to reduce.
-//	reduction_indices: The dimensions to reduce. Must be in the range
-// `[-rank(input), rank(input))`.
+//	true_classes: A batch_size * num_true matrix, in which each row contains the
+// IDs of the num_true target_classes in the corresponding original label.
+//	num_true: Number of true labels per context.
+//	num_sampled: Number of candidates to randomly sample.
+//	unique: If unique is true, we sample with rejection, so that all sampled
+// candidates in a batch are unique. This requires some approximation to
+// estimate the post-rejection sampling probabilities.
+//	range_max: The sampler will sample integers from the interval [0, range_max).
 //
-// Returns The reduced tensor.
-func Max(scope *Scope, input tf.Output, reduction_indices tf.Output, optional ...MaxAttr) (output tf.Output) {
+// Returns A vector of length num_sampled, in which each element is
+// the ID of a sampled candidate.A batch_size * num_true matrix, representing
+// the number of times each candidate is expected to occur in a batch
+// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
+// candidate representing the number of times the candidate is expected
+// to occur in a batch of sampled candidates.  If unique=true, then this is a
+// probability.
+func LearnedUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...LearnedUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Max",
+		Type: "LearnedUnigramCandidateSampler",
 		Input: []tf.Input{
-			input, reduction_indices,
+			true_classes,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Cast x of type SrcT to y of DstT.
-func Cast(scope *Scope, x tf.Output, DstT tf.DataType) (y tf.Output) {
+// Computes gradients for the scaled exponential linear (Selu) operation.
+//
+// Arguments:
+//	gradients: The backpropagated gradients to the corresponding Selu operation.
+//	outputs: The outputs of the corresponding Selu operation.
+//
+// Returns The gradients: `gradients * (outputs + scale * alpha)`
+// if outputs < 0, `scale * gradients` otherwise.
+func SeluGrad(scope *Scope, gradients tf.Output, outputs tf.Output) (backprops tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"DstT": DstT}
 	opspec := tf.OpSpec{
-		Type: "Cast",
+		Type: "SeluGrad",
 		Input: []tf.Input{
-			x,
+			gradients, outputs,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns the truth value of x AND y element-wise.
+// Get the current size of the TensorArray.
 //
-// *NOTE*: `LogicalAnd` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func LogicalAnd(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// Arguments:
+//	handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad).
+//	flow_in: A float scalar that enforces proper chaining of operations.
+//
+// Returns The current size of the TensorArray.
+func TensorArraySizeV3(scope *Scope, handle tf.Output, flow_in tf.Output) (size tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "LogicalAnd",
+		Type: "TensorArraySizeV3",
 		Input: []tf.Input{
-			x, y,
+			handle, flow_in,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// ComplexAbsAttr is an optional argument to ComplexAbs.
-type ComplexAbsAttr func(optionalAttr)
-
-// ComplexAbsTout sets the optional Tout attribute to value.
-// If not specified, defaults to DT_FLOAT
-func ComplexAbsTout(value tf.DataType) ComplexAbsAttr {
-	return func(m optionalAttr) {
-		m["Tout"] = value
-	}
-}
-
-// Computes the complex absolute value of a tensor.
-//
-// Given a tensor `x` of complex numbers, this operation returns a tensor of type
-// `float` or `double` that is the absolute value of each element in `x`. All
-// elements in `x` must be complex numbers of the form \\(a + bj\\). The absolute
-// value is computed as \\( \sqrt{a^2 + b^2}\\).
-func ComplexAbs(scope *Scope, x tf.Output, optional ...ComplexAbsAttr) (y tf.Output) {
+// Deprecated. Use TensorArrayGradV3
+func TensorArrayWriteV2(scope *Scope, handle tf.Output, index tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "ComplexAbs",
+		Type: "TensorArrayWriteV2",
 		Input: []tf.Input{
-			x,
+			handle, index, value, flow_in,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes the reciprocal of x element-wise.
+// SparseReduceMaxAttr is an optional argument to SparseReduceMax.
+type SparseReduceMaxAttr func(optionalAttr)
+
+// SparseReduceMaxKeepDims sets the optional keep_dims attribute to value.
+//
+// value: If true, retain reduced dimensions with length 1.
+// If not specified, defaults to false
+func SparseReduceMaxKeepDims(value bool) SparseReduceMaxAttr {
+	return func(m optionalAttr) {
+		m["keep_dims"] = value
+	}
+}
+
+// Computes the max of elements across dimensions of a SparseTensor.
 //
-// DEPRECATED at GraphDef version 17: Use Reciprocal
+// This Op takes a SparseTensor and is the sparse counterpart to
+// `tf.reduce_max()`.  In particular, this Op also returns a dense `Tensor`
+// instead of a sparse one.
 //
-// I.e., \\(y = 1 / x\\).
-func Inv(scope *Scope, x tf.Output) (y tf.Output) {
+// Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
+// with length 1.
+//
+// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
+// with a single element is returned.  Additionally, the axes can be negative,
+// which are interpreted according to the indexing rules in Python.
+//
+// Arguments:
+//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, possibly not in canonical ordering.
+//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
+//	input_shape: 1-D.  Shape of the input SparseTensor.
+//	reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
+//
+// Returns `R-K`-D.  The reduced Tensor.
+func SparseReduceMax(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceMaxAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Inv",
+		Type: "SparseReduceMax",
 		Input: []tf.Input{
-			x,
+			input_indices, input_values, input_shape, reduction_axes,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// OrderedMapClearAttr is an optional argument to OrderedMapClear.
-type OrderedMapClearAttr func(optionalAttr)
+// AsStringAttr is an optional argument to AsString.
+type AsStringAttr func(optionalAttr)
 
-// OrderedMapClearCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
+// AsStringPrecision sets the optional precision attribute to value.
 //
-// REQUIRES: value >= 0
-func OrderedMapClearCapacity(value int64) OrderedMapClearAttr {
+// value: The post-decimal precision to use for floating point numbers.
+// Only used if precision > -1.
+// If not specified, defaults to -1
+func AsStringPrecision(value int64) AsStringAttr {
 	return func(m optionalAttr) {
-		m["capacity"] = value
+		m["precision"] = value
 	}
 }
 
-// OrderedMapClearMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
+// AsStringScientific sets the optional scientific attribute to value.
 //
-// REQUIRES: value >= 0
-func OrderedMapClearMemoryLimit(value int64) OrderedMapClearAttr {
+// value: Use scientific notation for floating point numbers.
+// If not specified, defaults to false
+func AsStringScientific(value bool) AsStringAttr {
 	return func(m optionalAttr) {
-		m["memory_limit"] = value
+		m["scientific"] = value
 	}
 }
 
-// OrderedMapClearContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func OrderedMapClearContainer(value string) OrderedMapClearAttr {
+// AsStringShortest sets the optional shortest attribute to value.
+//
+// value: Use shortest representation (either scientific or standard) for
+// floating point numbers.
+// If not specified, defaults to false
+func AsStringShortest(value bool) AsStringAttr {
 	return func(m optionalAttr) {
-		m["container"] = value
+		m["shortest"] = value
 	}
 }
 
-// OrderedMapClearSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func OrderedMapClearSharedName(value string) OrderedMapClearAttr {
+// AsStringWidth sets the optional width attribute to value.
+//
+// value: Pad pre-decimal numbers to this width.
+// Applies to both floating point and integer numbers.
+// Only used if width > -1.
+// If not specified, defaults to -1
+func AsStringWidth(value int64) AsStringAttr {
 	return func(m optionalAttr) {
-		m["shared_name"] = value
+		m["width"] = value
 	}
 }
 
-// Op removes all elements in the underlying container.
+// AsStringFill sets the optional fill attribute to value.
 //
-// Returns the created operation.
-func OrderedMapClear(scope *Scope, dtypes []tf.DataType, optional ...OrderedMapClearAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "OrderedMapClear",
-
-		Attrs: attrs,
+// value: The value to pad if width > -1.  If empty, pads with spaces.
+// Another typical value is '0'.  String cannot be longer than 1 character.
+// If not specified, defaults to ""
+func AsStringFill(value string) AsStringAttr {
+	return func(m optionalAttr) {
+		m["fill"] = value
 	}
-	return scope.AddOperation(opspec)
 }
 
-// Returns the element-wise max of two SparseTensors.
-//
-// Assumes the two SparseTensors have the same shape, i.e., no broadcasting.
-//
-// Arguments:
-//	a_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, in the canonical lexicographic ordering.
-//	a_values: 1-D.  `N` non-empty values corresponding to `a_indices`.
-//	a_shape: 1-D.  Shape of the input SparseTensor.
-//	b_indices: counterpart to `a_indices` for the other operand.
-//	b_values: counterpart to `a_values` for the other operand; must be of the same dtype.
-//	b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal.
+// Converts each entry in the given tensor to strings.  Supports many numeric
 //
-// Returns 2-D.  The indices of the output SparseTensor.1-D.  The values of the output SparseTensor.
-func SparseSparseMaximum(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output) (output_indices tf.Output, output_values tf.Output) {
+// types and boolean.
+func AsString(scope *Scope, input tf.Output, optional ...AsStringAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	opspec := tf.OpSpec{
-		Type: "SparseSparseMaximum",
-		Input: []tf.Input{
-			a_indices, a_values, a_shape, b_indices, b_values, b_shape,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Computes the gradient for the inverse of `x` wrt its input.
-//
-// DEPRECATED at GraphDef version 17: Use ReciprocalGrad
-//
-// Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy`
-// is the corresponding input gradient.
-func InvGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "InvGrad",
+		Type: "AsString",
 		Input: []tf.Input{
-			y, dy,
+			input,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes the reciprocal of x element-wise.
-//
-// I.e., \\(y = 1 / x\\).
-func Reciprocal(scope *Scope, x tf.Output) (y tf.Output) {
+// Deprecated. Use TensorArrayScatterV3
+func TensorArrayScatterV2(scope *Scope, handle tf.Output, indices tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Reciprocal",
+		Type: "TensorArrayScatterV2",
 		Input: []tf.Input{
-			x,
+			handle, indices, value, flow_in,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes exponential linear: `exp(features) - 1` if < 0, `features` otherwise.
+// Applies sparse addition to `input` using individual values or slices
 //
-// See [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)
-// ](http://arxiv.org/abs/1511.07289)
-func Elu(scope *Scope, features tf.Output) (activations tf.Output) {
+// from `updates` according to indices `indices`.  The updates are non-aliasing:
+// `input` is only modified in-place if no other operations will use it.
+// Otherwise, a copy of `input` is made.  This operation has a gradient with
+// respect to both `input` and `updates`.
+//
+// `input` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
+//
+// `indices` must be integer tensor, containing indices into `input`.
+// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
+//
+// The innermost dimension of `indices` (with length `K`) corresponds to
+// indices into elements (if `K = P`) or `(P-K)`-dimensional slices
+// (if `K < P`) along the `K`th dimension of `input`.
+//
+// `updates` is `Tensor` of rank `Q-1+P-K` with shape:
+//
+// ```
+// [d_0, ..., d_{Q-2}, input.shape[K], ..., input.shape[P-1]].
+// ```
+//
+// For example, say we want to add 4 scattered elements to a rank-1 tensor to 8
+// elements. In Python, that addition would look like this:
+//
+//     input = tf.constant([1, 2, 3, 4, 5, 6, 7, 8])
+//     indices = tf.constant([[4], [3], [1], [7]])
+//     updates = tf.constant([9, 10, 11, 12])
+//     output = tf.scatter_nd_non_aliasing_add(input, indices, updates)
+//     with tf.Session() as sess:
+//       print(sess.run(output))
+//
+// The resulting value `output` would look like this:
+//
+//     [1, 13, 3, 14, 14, 6, 7, 20]
+//
+// See @{tf.scatter_nd} for more details about how to make updates to slices.
+//
+// Arguments:
+//	input: A Tensor.
+//	indices: A Tensor. Must be one of the following types: `int32`, `int64`.
+// A tensor of indices into `input`.
+//	updates: A Tensor. Must have the same type as ref. A tensor of updated values
+// to add to `input`.
+//
+// Returns A `Tensor` with the same shape as `input`, containing values of `input`
+// updated with `updates`.
+func ScatterNdNonAliasingAdd(scope *Scope, input tf.Output, indices tf.Output, updates tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Elu",
+		Type: "ScatterNdNonAliasingAdd",
 		Input: []tf.Input{
-			features,
+			input, indices, updates,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes square of x element-wise.
+// FractionalMaxPoolAttr is an optional argument to FractionalMaxPool.
+type FractionalMaxPoolAttr func(optionalAttr)
+
+// FractionalMaxPoolPseudoRandom sets the optional pseudo_random attribute to value.
 //
-// I.e., \\(y = x * x = x^2\\).
-func Square(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Square",
-		Input: []tf.Input{
-			x,
-		},
+// value: When set to True, generates the pooling sequence in a
+// pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin
+// Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) for
+// difference between pseudorandom and random.
+// If not specified, defaults to false
+func FractionalMaxPoolPseudoRandom(value bool) FractionalMaxPoolAttr {
+	return func(m optionalAttr) {
+		m["pseudo_random"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Returns element-wise remainder of division. When `x < 0` xor `y < 0` is
+// FractionalMaxPoolOverlapping sets the optional overlapping attribute to value.
 //
-// true, this follows Python semantics in that the result here is consistent
-// with a flooring divide. E.g. `floor(x / y) * y + mod(x, y) = x`.
+// value: When set to True, it means when pooling, the values at the boundary
+// of adjacent pooling cells are used by both cells. For example:
 //
-// *NOTE*: `FloorMod` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func FloorMod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "FloorMod",
-		Input: []tf.Input{
-			x, y,
-		},
+// `index  0  1  2  3  4`
+//
+// `value  20 5  16 3  7`
+//
+// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
+// The result would be [20, 16] for fractional max pooling.
+// If not specified, defaults to false
+func FractionalMaxPoolOverlapping(value bool) FractionalMaxPoolAttr {
+	return func(m optionalAttr) {
+		m["overlapping"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Computes square root of x element-wise.
+// FractionalMaxPoolDeterministic sets the optional deterministic attribute to value.
 //
-// I.e., \\(y = \sqrt{x} = x^{1/2}\\).
-func Sqrt(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Sqrt",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+// value: When set to True, a fixed pooling region will be used when
+// iterating over a FractionalMaxPool node in the computation graph. Mainly used
+// in unit test to make FractionalMaxPool deterministic.
+// If not specified, defaults to false
+func FractionalMaxPoolDeterministic(value bool) FractionalMaxPoolAttr {
+	return func(m optionalAttr) {
+		m["deterministic"] = value
+	}
 }
 
-// MatrixInverseAttr is an optional argument to MatrixInverse.
-type MatrixInverseAttr func(optionalAttr)
+// FractionalMaxPoolSeed sets the optional seed attribute to value.
+//
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func FractionalMaxPoolSeed(value int64) FractionalMaxPoolAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
 
-// MatrixInverseAdjoint sets the optional adjoint attribute to value.
-// If not specified, defaults to false
-func MatrixInverseAdjoint(value bool) MatrixInverseAttr {
+// FractionalMaxPoolSeed2 sets the optional seed2 attribute to value.
+//
+// value: An second seed to avoid seed collision.
+// If not specified, defaults to 0
+func FractionalMaxPoolSeed2(value int64) FractionalMaxPoolAttr {
 	return func(m optionalAttr) {
-		m["adjoint"] = value
+		m["seed2"] = value
 	}
 }
 
-// Computes the inverse of one or more square invertible matrices or their
+// Performs fractional max pooling on the input.
 //
-// adjoints (conjugate transposes).
+// Fractional max pooling is slightly different than regular max pooling.  In
+// regular max pooling, you downsize an input set by taking the maximum value of
+// smaller N x N subsections of the set (often 2x2), and try to reduce the set by
+// a factor of N, where N is an integer.  Fractional max pooling, as you might
+// expect from the word "fractional", means that the overall reduction ratio N
+// does not have to be an integer.
 //
-// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-// form square matrices. The output is a tensor of the same shape as the input
-// containing the inverse for all input submatrices `[..., :, :]`.
+// The sizes of the pooling regions are generated randomly but are fairly uniform.
+// For example, let's look at the height dimension, and the constraints on the
+// list of rows that will be pool boundaries.
 //
-// The op uses LU decomposition with partial pivoting to compute the inverses.
+// First we define the following:
 //
-// If a matrix is not invertible there is no guarantee what the op does. It
-// may detect the condition and raise an exception or it may simply return a
-// garbage result.
+// 1.  input_row_length : the number of rows from the input set
+// 2.  output_row_length : which will be smaller than the input
+// 3.  alpha = input_row_length / output_row_length : our reduction ratio
+// 4.  K = floor(alpha)
+// 5.  row_pooling_sequence : this is the result list of pool boundary rows
 //
-// Arguments:
-//	input: Shape is `[..., M, M]`.
+// Then, row_pooling_sequence should satisfy:
 //
-// Returns Shape is `[..., M, M]`.
+// 1.  a[0] = 0 : the first value of the sequence is 0
+// 2.  a[end] = input_row_length : the last value of the sequence is the size
+// 3.  K <= (a[i+1] - a[i]) <= K+1 : all intervals are K or K+1 size
+// 4.  length(row_pooling_sequence) = output_row_length+1
 //
-// @compatibility(numpy)
-// Equivalent to np.linalg.inv
-// @end_compatibility
-func MatrixInverse(scope *Scope, input tf.Output, optional ...MatrixInverseAttr) (output tf.Output) {
+// For more details on fractional max pooling, see this paper:
+// [Benjamin Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071)
+//
+// Arguments:
+//	value: 4-D with shape `[batch, height, width, channels]`.
+//	pooling_ratio: Pooling ratio for each dimension of `value`, currently only
+// supports row and col dimension and should be >= 1.0. For example, a valid
+// pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements
+// must be 1.0 because we don't allow pooling on batch and channels
+// dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions
+// respectively.
+//
+// Returns output tensor after fractional max pooling.row pooling sequence, needed to calculate gradient.column pooling sequence, needed to calculate gradient.
+func FractionalMaxPool(scope *Scope, value tf.Output, pooling_ratio []float32, optional ...FractionalMaxPoolAttr) (output tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"pooling_ratio": pooling_ratio}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "MatrixInverse",
+		Type: "FractionalMaxPool",
 		Input: []tf.Input{
-			input,
+			value,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Computes the gradient for the sqrt of `x` wrt its input.
-//
-// Specifically, `grad = dy * 0.5 / y`, where `y = sqrt(x)`, and `dy`
-// is the corresponding input gradient.
-func SqrtGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
+// Deprecated. Use TensorArraySizeV3
+func TensorArraySizeV2(scope *Scope, handle tf.Output, flow_in tf.Output) (size tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "SqrtGrad",
+		Type: "TensorArraySizeV2",
 		Input: []tf.Input{
-			y, dy,
+			handle, flow_in,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Inserts a dimension of 1 into a tensor's shape.
-//
-// Given a tensor `input`, this operation inserts a dimension of 1 at the
-// dimension index `dim` of `input`'s shape. The dimension index `dim` starts at
-// zero; if you specify a negative number for `dim` it is counted backward from
-// the end.
+// Conv2DAttr is an optional argument to Conv2D.
+type Conv2DAttr func(optionalAttr)
+
+// Conv2DUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value.
+// If not specified, defaults to true
+func Conv2DUseCudnnOnGpu(value bool) Conv2DAttr {
+	return func(m optionalAttr) {
+		m["use_cudnn_on_gpu"] = value
+	}
+}
+
+// Conv2DDataFormat sets the optional data_format attribute to value.
 //
-// This operation is useful if you want to add a batch dimension to a single
-// element. For example, if you have a single image of shape `[height, width,
-// channels]`, you can make it a batch of 1 image with `expand_dims(image, 0)`,
-// which will make the shape `[1, height, width, channels]`.
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, height, width, channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, channels, height, width].
+// If not specified, defaults to "NHWC"
+func Conv2DDataFormat(value string) Conv2DAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Conv2DDilations sets the optional dilations attribute to value.
 //
-// Other examples:
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each
+// filter element on that dimension. The dimension order is determined by the
+// value of `data_format`, see above for details. Dilations in the batch and
+// depth dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
+func Conv2DDilations(value []int64) Conv2DAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes a 2-D convolution given 4-D `input` and `filter` tensors.
 //
-// ```
-// # 't' is a tensor of shape [2]
-// shape(expand_dims(t, 0)) ==> [1, 2]
-// shape(expand_dims(t, 1)) ==> [2, 1]
-// shape(expand_dims(t, -1)) ==> [2, 1]
+// Given an input tensor of shape `[batch, in_height, in_width, in_channels]`
+// and a filter / kernel tensor of shape
+// `[filter_height, filter_width, in_channels, out_channels]`, this op
+// performs the following:
 //
-// # 't2' is a tensor of shape [2, 3, 5]
-// shape(expand_dims(t2, 0)) ==> [1, 2, 3, 5]
-// shape(expand_dims(t2, 2)) ==> [2, 3, 1, 5]
-// shape(expand_dims(t2, 3)) ==> [2, 3, 5, 1]
-// ```
+// 1. Flattens the filter to a 2-D matrix with shape
+//    `[filter_height * filter_width * in_channels, output_channels]`.
+// 2. Extracts image patches from the input tensor to form a *virtual*
+//    tensor of shape `[batch, out_height, out_width,
+//    filter_height * filter_width * in_channels]`.
+// 3. For each patch, right-multiplies the filter matrix and the image patch
+//    vector.
 //
-// This operation requires that:
+// In detail, with the default NHWC format,
 //
-// `-1-input.dims() <= dim <= input.dims()`
+//     output[b, i, j, k] =
+//         sum_{di, dj, q} input[b, strides[1] * i + di, strides[2] * j + dj, q] *
+//                         filter[di, dj, q, k]
 //
-// This operation is related to `squeeze()`, which removes dimensions of
-// size 1.
+// Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
+// horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
 //
 // Arguments:
+//	input: A 4-D tensor. The dimension order is interpreted according to the value
+// of `data_format`, see below for details.
+//	filter: A 4-D tensor of shape
+// `[filter_height, filter_width, in_channels, out_channels]`
+//	strides: 1-D tensor of length 4.  The stride of the sliding window for each
+// dimension of `input`. The dimension order is determined by the value of
+// `data_format`, see below for details.
+//	padding: The type of padding algorithm to use.
 //
-//	dim: 0-D (scalar). Specifies the dimension index at which to
-// expand the shape of `input`. Must be in the range
-// `[-rank(input) - 1, rank(input)]`.
-//
-// Returns Contains the same data as `input`, but its shape has an additional
-// dimension of size 1 added.
-func ExpandDims(scope *Scope, input tf.Output, dim tf.Output) (output tf.Output) {
+// Returns A 4-D tensor. The dimension order is determined by the value of
+// `data_format`, see below for details.
+func Conv2D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...Conv2DAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "ExpandDims",
+		Type: "Conv2D",
 		Input: []tf.Input{
-			input, dim,
+			input, filter,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// AllAttr is an optional argument to All.
-type AllAttr func(optionalAttr)
+// FakeQuantWithMinMaxArgsAttr is an optional argument to FakeQuantWithMinMaxArgs.
+type FakeQuantWithMinMaxArgsAttr func(optionalAttr)
 
-// AllKeepDims sets the optional keep_dims attribute to value.
-//
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func AllKeepDims(value bool) AllAttr {
+// FakeQuantWithMinMaxArgsMin sets the optional min attribute to value.
+// If not specified, defaults to -6
+func FakeQuantWithMinMaxArgsMin(value float32) FakeQuantWithMinMaxArgsAttr {
 	return func(m optionalAttr) {
-		m["keep_dims"] = value
+		m["min"] = value
 	}
 }
 
-// Computes the "logical and" of elements across dimensions of a tensor.
-//
-// Reduces `input` along the dimensions given in `reduction_indices`. Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `reduction_indices`. If `keep_dims` is true, the reduced dimensions are
-// retained with length 1.
-//
-// Arguments:
-//	input: The tensor to reduce.
-//	reduction_indices: The dimensions to reduce. Must be in the range
-// `[-rank(input), rank(input))`.
-//
-// Returns The reduced tensor.
-func All(scope *Scope, input tf.Output, reduction_indices tf.Output, optional ...AllAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "All",
-		Input: []tf.Input{
-			input, reduction_indices,
-		},
-		Attrs: attrs,
+// FakeQuantWithMinMaxArgsMax sets the optional max attribute to value.
+// If not specified, defaults to 6
+func FakeQuantWithMinMaxArgsMax(value float32) FakeQuantWithMinMaxArgsAttr {
+	return func(m optionalAttr) {
+		m["max"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// CTCBeamSearchDecoderAttr is an optional argument to CTCBeamSearchDecoder.
-type CTCBeamSearchDecoderAttr func(optionalAttr)
+// FakeQuantWithMinMaxArgsNumBits sets the optional num_bits attribute to value.
+// If not specified, defaults to 8
+func FakeQuantWithMinMaxArgsNumBits(value int64) FakeQuantWithMinMaxArgsAttr {
+	return func(m optionalAttr) {
+		m["num_bits"] = value
+	}
+}
 
-// CTCBeamSearchDecoderMergeRepeated sets the optional merge_repeated attribute to value.
-//
-// value: If true, merge repeated classes in output.
-// If not specified, defaults to true
-func CTCBeamSearchDecoderMergeRepeated(value bool) CTCBeamSearchDecoderAttr {
+// FakeQuantWithMinMaxArgsNarrowRange sets the optional narrow_range attribute to value.
+// If not specified, defaults to false
+func FakeQuantWithMinMaxArgsNarrowRange(value bool) FakeQuantWithMinMaxArgsAttr {
 	return func(m optionalAttr) {
-		m["merge_repeated"] = value
+		m["narrow_range"] = value
 	}
 }
 
-// Performs beam search decoding on the logits given in input.
-//
-// A note about the attribute merge_repeated: For the beam search decoder,
-// this means that if consecutive entries in a beam are the same, only
-// the first of these is emitted.  That is, when the top path is "A B B B B",
-// "A B" is returned if merge_repeated = True but "A B B B B" is
-// returned if merge_repeated = False.
+// Fake-quantize the 'inputs' tensor, type float to 'outputs' tensor of same type.
 //
-// Arguments:
-//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
-//	sequence_length: A vector containing sequence lengths, size `(batch)`.
-//	beam_width: A scalar >= 0 (beam search beam width).
-//	top_paths: A scalar >= 0, <= beam_width (controls output size).
+// Attributes `[min; max]` define the clamping range for the `inputs` data.
+// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`
+// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and
+// then de-quantized and output as floats in `[min; max]` interval.
+// `num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive.
 //
-// Returns A list (length: top_paths) of indices matrices.  Matrix j,
-// size `(total_decoded_outputs[j] x 2)`, has indices of a
-// `SparseTensor<int64, 2>`.  The rows store: [batch, time].A list (length: top_paths) of values vectors.  Vector j,
-// size `(length total_decoded_outputs[j])`, has the values of a
-// `SparseTensor<int64, 2>`.  The vector stores the decoded classes for beam j.A list (length: top_paths) of shape vector.  Vector j,
-// size `(2)`, stores the shape of the decoded `SparseTensor[j]`.
-// Its values are: `[batch_size, max_decoded_length[j]]`.A matrix, shaped: `(batch_size x top_paths)`.  The
-// sequence log-probabilities.
-func CTCBeamSearchDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, beam_width int64, top_paths int64, optional ...CTCBeamSearchDecoderAttr) (decoded_indices []tf.Output, decoded_values []tf.Output, decoded_shape []tf.Output, log_probability tf.Output) {
+// Quantization is called fake since the output is still in floating point.
+func FakeQuantWithMinMaxArgs(scope *Scope, inputs tf.Output, optional ...FakeQuantWithMinMaxArgsAttr) (outputs tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"beam_width": beam_width, "top_paths": top_paths}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "CTCBeamSearchDecoder",
+		Type: "FakeQuantWithMinMaxArgs",
 		Input: []tf.Input{
-			inputs, sequence_length,
+			inputs,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
+	return op.Output(0)
+}
+
+// StageAttr is an optional argument to Stage.
+type StageAttr func(optionalAttr)
+
+// StageCapacity sets the optional capacity attribute to value.
+//
+// value: Maximum number of elements in the Staging Area. If > 0, inserts
+// on the container will block when the capacity is reached.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func StageCapacity(value int64) StageAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
 	}
-	var idx int
-	var err error
-	if decoded_indices, idx, err = makeOutputList(op, idx, "decoded_indices"); err != nil {
-		scope.UpdateErr("CTCBeamSearchDecoder", err)
-		return
+}
+
+// StageMemoryLimit sets the optional memory_limit attribute to value.
+//
+// value: The maximum number of bytes allowed for Tensors in the Staging Area.
+// If > 0, inserts will block until sufficient space is available.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func StageMemoryLimit(value int64) StageAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
 	}
-	if decoded_values, idx, err = makeOutputList(op, idx, "decoded_values"); err != nil {
-		scope.UpdateErr("CTCBeamSearchDecoder", err)
-		return
+}
+
+// StageContainer sets the optional container attribute to value.
+//
+// value: If non-empty, this queue is placed in the given container. Otherwise,
+// a default container is used.
+// If not specified, defaults to ""
+func StageContainer(value string) StageAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
 	}
-	if decoded_shape, idx, err = makeOutputList(op, idx, "decoded_shape"); err != nil {
-		scope.UpdateErr("CTCBeamSearchDecoder", err)
-		return
+}
+
+// StageSharedName sets the optional shared_name attribute to value.
+//
+// value: It is necessary to match this name to the matching Unstage Op.
+// If not specified, defaults to ""
+func StageSharedName(value string) StageAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
 	}
-	log_probability = op.Output(idx)
-	return decoded_indices, decoded_values, decoded_shape, log_probability
 }
 
-// Computes reciprocal of square root of x element-wise.
+// Stage values similar to a lightweight Enqueue.
 //
-// I.e., \\(y = 1 / \sqrt{x}\\).
-func Rsqrt(scope *Scope, x tf.Output) (y tf.Output) {
+// The basic functionality of this Op is similar to a queue with many
+// fewer capabilities and options.  This Op is optimized for performance.
+//
+// Arguments:
+//	values: a list of tensors
+// dtypes A list of data types that inserted values should adhere to.
+//
+// Returns the created operation.
+func Stage(scope *Scope, values []tf.Output, optional ...StageAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Rsqrt",
+		Type: "Stage",
 		Input: []tf.Input{
-			x,
+			tf.OutputList(values),
 		},
+		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// RecordInputAttr is an optional argument to RecordInput.
-type RecordInputAttr func(optionalAttr)
+// StagePeekAttr is an optional argument to StagePeek.
+type StagePeekAttr func(optionalAttr)
 
-// RecordInputFileRandomSeed sets the optional file_random_seed attribute to value.
+// StagePeekCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
 //
-// value: Random seeds used to produce randomized records.
-// If not specified, defaults to 301
-func RecordInputFileRandomSeed(value int64) RecordInputAttr {
+// REQUIRES: value >= 0
+func StagePeekCapacity(value int64) StagePeekAttr {
 	return func(m optionalAttr) {
-		m["file_random_seed"] = value
+		m["capacity"] = value
 	}
 }
 
-// RecordInputFileShuffleShiftRatio sets the optional file_shuffle_shift_ratio attribute to value.
-//
-// value: Shifts the list of files after the list is randomly
-// shuffled.
+// StagePeekMemoryLimit sets the optional memory_limit attribute to value.
 // If not specified, defaults to 0
-func RecordInputFileShuffleShiftRatio(value float32) RecordInputAttr {
-	return func(m optionalAttr) {
-		m["file_shuffle_shift_ratio"] = value
-	}
-}
-
-// RecordInputFileBufferSize sets the optional file_buffer_size attribute to value.
 //
-// value: The randomization shuffling buffer.
-// If not specified, defaults to 10000
-func RecordInputFileBufferSize(value int64) RecordInputAttr {
+// REQUIRES: value >= 0
+func StagePeekMemoryLimit(value int64) StagePeekAttr {
 	return func(m optionalAttr) {
-		m["file_buffer_size"] = value
+		m["memory_limit"] = value
 	}
 }
 
-// RecordInputFileParallelism sets the optional file_parallelism attribute to value.
-//
-// value: How many sstables are opened and concurrently iterated over.
-// If not specified, defaults to 16
-func RecordInputFileParallelism(value int64) RecordInputAttr {
+// StagePeekContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func StagePeekContainer(value string) StagePeekAttr {
 	return func(m optionalAttr) {
-		m["file_parallelism"] = value
+		m["container"] = value
 	}
 }
 
-// RecordInputBatchSize sets the optional batch_size attribute to value.
-//
-// value: The batch size.
-// If not specified, defaults to 32
-func RecordInputBatchSize(value int64) RecordInputAttr {
+// StagePeekSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func StagePeekSharedName(value string) StagePeekAttr {
 	return func(m optionalAttr) {
-		m["batch_size"] = value
+		m["shared_name"] = value
 	}
 }
 
-// Emits randomized records.
-//
-// Arguments:
-//	file_pattern: Glob pattern for the data files.
+// Op peeks at the values at the specified index.  If the
 //
-// Returns A tensor of shape [batch_size].
-func RecordInput(scope *Scope, file_pattern string, optional ...RecordInputAttr) (records tf.Output) {
+// underlying container does not contain sufficient elements
+// this op will block until it does.   This Op is optimized for
+// performance.
+func StagePeek(scope *Scope, index tf.Output, dtypes []tf.DataType, optional ...StagePeekAttr) (values []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"file_pattern": file_pattern}
+	attrs := map[string]interface{}{"dtypes": dtypes}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "RecordInput",
-
+		Type: "StagePeek",
+		Input: []tf.Input{
+			index,
+		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Rounds the values of a tensor to the nearest integer, element-wise.
-//
-// Rounds half to even.  Also known as bankers rounding. If you want to round
-// according to the current system rounding mode use std::cint.
-func Round(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	opspec := tf.OpSpec{
-		Type: "Round",
-		Input: []tf.Input{
-			x,
-		},
+	var idx int
+	var err error
+	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
+		scope.UpdateErr("StagePeek", err)
+		return
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return values
 }
 
-// Generates values in an interval.
-//
-// A sequence of `num` evenly-spaced values are generated beginning at `start`.
-// If `num > 1`, the values in the sequence increase by `stop - start / num - 1`,
-// so that the last one is exactly `stop`.
-//
-// For example:
-//
-// ```
-// tf.linspace(10.0, 12.0, 3, name="linspace") => [ 10.0  11.0  12.0]
-// ```
-//
-// Arguments:
-//	start: First entry in the range.
-//	stop: Last entry in the range.
-//	num: Number of values to generate.
+// Conv3DBackpropInputV2Attr is an optional argument to Conv3DBackpropInputV2.
+type Conv3DBackpropInputV2Attr func(optionalAttr)
+
+// Conv3DBackpropInputV2DataFormat sets the optional data_format attribute to value.
 //
-// Returns 1-D. The generated values.
-func LinSpace(scope *Scope, start tf.Output, stop tf.Output, num tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
+// value: The data format of the input and output data. With the
+// default format "NDHWC", the data is stored in the order of:
+//     [batch, in_depth, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCDHW", the data storage order is:
+//     [batch, in_channels, in_depth, in_height, in_width].
+// If not specified, defaults to "NDHWC"
+func Conv3DBackpropInputV2DataFormat(value string) Conv3DBackpropInputV2Attr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "LinSpace",
-		Input: []tf.Input{
-			start, stop, num,
-		},
+}
+
+// Conv3DBackpropInputV2Dilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 5.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each
+// filter element on that dimension. The dimension order is determined by the
+// value of `data_format`, see above for details. Dilations in the batch and
+// depth dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 i:1 >
+func Conv3DBackpropInputV2Dilations(value []int64) Conv3DBackpropInputV2Attr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Computes natural logarithm of x element-wise.
+// Computes the gradients of 3-D convolution with respect to the input.
 //
-// I.e., \\(y = \log_e x\\).
-func Log(scope *Scope, x tf.Output) (y tf.Output) {
+// Arguments:
+//	input_sizes: An integer vector representing the tensor shape of `input`,
+// where `input` is a 5-D
+// `[batch, depth, rows, cols, in_channels]` tensor.
+//	filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
+// `in_channels` must match between `input` and `filter`.
+//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
+// out_channels]`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
+func Conv3DBackpropInputV2(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropInputV2Attr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Log",
+		Type: "Conv3DBackpropInputV2",
 		Input: []tf.Input{
-			x,
+			input_sizes, filter, out_backprop,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// ResizeBicubicAttr is an optional argument to ResizeBicubic.
-type ResizeBicubicAttr func(optionalAttr)
+// DepthToSpaceAttr is an optional argument to DepthToSpace.
+type DepthToSpaceAttr func(optionalAttr)
 
-// ResizeBicubicAlignCorners sets the optional align_corners attribute to value.
-//
-// value: If true, rescale input by (new_height - 1) / (height - 1), which
-// exactly aligns the 4 corners of images and resized images. If false, rescale
-// by new_height / height. Treat similarly the width dimension.
-// If not specified, defaults to false
-func ResizeBicubicAlignCorners(value bool) ResizeBicubicAttr {
+// DepthToSpaceDataFormat sets the optional data_format attribute to value.
+// If not specified, defaults to "NHWC"
+func DepthToSpaceDataFormat(value string) DepthToSpaceAttr {
 	return func(m optionalAttr) {
-		m["align_corners"] = value
+		m["data_format"] = value
 	}
 }
 
-// Resize `images` to `size` using bicubic interpolation.
+// DepthToSpace for tensors of type T.
 //
-// Input images can be of different types but output images are always float.
+// Rearranges data from depth into blocks of spatial data.
+// This is the reverse transformation of SpaceToDepth. More specifically,
+// this op outputs a copy of the input tensor where values from the `depth`
+// dimension are moved in spatial blocks to the `height` and `width` dimensions.
+// The attr `block_size` indicates the input block size and how the data is moved.
+//
+//   * Chunks of data of size `block_size * block_size` from depth are rearranged
+//     into non-overlapping blocks of size `block_size x block_size`
+//   * The width the output tensor is `input_depth * block_size`, whereas the
+//     height is `input_height * block_size`.
+//   * The Y, X coordinates within each block of the output image are determined
+//     by the high order component of the input channel index.
+//   * The depth of the input tensor must be divisible by
+//     `block_size * block_size`.
+//
+// The `data_format` attr specifies the layout of the input and output tensors
+// with the following options:
+//   "NHWC": `[ batch, height, width, channels ]`
+//   "NCHW": `[ batch, channels, height, width ]`
+//   "NCHW_VECT_C":
+//       `qint8 [ batch, channels / 4, height, width, 4 ]`
+//
+// It is useful to consider the operation as transforming a 6-D Tensor.
+// e.g. for data_format = NHWC,
+//      Each element in the input tensor can be specified via 6 coordinates,
+//      ordered by decreasing memory layout significance as:
+//      n,iY,iX,bY,bX,oC  (where n=batch index, iX, iY means X or Y coordinates
+//                         within the input image, bX, bY means coordinates
+//                         within the output block, oC means output channels).
+//      The output would be the input transposed to the following layout:
+//      n,iY,bY,iX,bX,oC
+//
+// This operation is useful for resizing the activations between convolutions
+// (but keeping all data), e.g. instead of pooling. It is also useful for training
+// purely convolutional models.
+//
+// For example, given an input of shape `[1, 1, 1, 4]`, data_format = "NHWC" and
+// block_size = 2:
+//
+// ```
+// x = [[[[1, 2, 3, 4]]]]
+//
+// ```
+//
+// This operation will output a tensor of shape `[1, 2, 2, 1]`:
+//
+// ```
+//    [[[[1], [2]],
+//      [[3], [4]]]]
+// ```
+//
+// Here, the input has a batch of 1 and each batch element has shape `[1, 1, 4]`,
+// the corresponding output will have 2x2 elements and will have a depth of
+// 1 channel (1 = `4 / (block_size * block_size)`).
+// The output element shape is `[2, 2, 1]`.
+//
+// For an input tensor with larger depth, here of shape `[1, 1, 1, 12]`, e.g.
+//
+// ```
+// x = [[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]]
+// ```
+//
+// This operation, for block size of 2, will return the following tensor of shape
+// `[1, 2, 2, 3]`
+//
+// ```
+//    [[[[1, 2, 3], [4, 5, 6]],
+//      [[7, 8, 9], [10, 11, 12]]]]
+//
+// ```
+//
+// Similarly, for the following input of shape `[1 2 2 4]`, and a block size of 2:
+//
+// ```
+// x =  [[[[1, 2, 3, 4],
+//        [5, 6, 7, 8]],
+//       [[9, 10, 11, 12],
+//        [13, 14, 15, 16]]]]
+// ```
+//
+// the operator will return the following tensor of shape `[1 4 4 1]`:
+//
+// ```
+// x = [[[ [1],   [2],  [5],  [6]],
+//       [ [3],   [4],  [7],  [8]],
+//       [ [9],  [10], [13],  [14]],
+//       [ [11], [12], [15],  [16]]]]
+//
+// ```
 //
 // Arguments:
-//	images: 4-D with shape `[batch, height, width, channels]`.
-//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
-// new size for the images.
 //
-// Returns 4-D with shape
-// `[batch, new_height, new_width, channels]`.
-func ResizeBicubic(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBicubicAttr) (resized_images tf.Output) {
+//	block_size: The size of the spatial block, same as in Space2Depth.
+func DepthToSpace(scope *Scope, input tf.Output, block_size int64, optional ...DepthToSpaceAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"block_size": block_size}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResizeBicubic",
+		Type: "DepthToSpace",
 		Input: []tf.Input{
-			images, size,
+			input,
 		},
 		Attrs: attrs,
 	}
@@ -23834,136 +24081,125 @@ func ResizeBicubic(scope *Scope, images tf.Output, size tf.Output, optional ...R
 	return op.Output(0)
 }
 
-// Computes rectified linear 6 gradients for a Relu6 operation.
+// MapStageAttr is an optional argument to MapStage.
+type MapStageAttr func(optionalAttr)
+
+// MapStageCapacity sets the optional capacity attribute to value.
 //
-// Arguments:
-//	gradients: The backpropagated gradients to the corresponding Relu6 operation.
-//	features: The features passed as input to the corresponding Relu6 operation, or
-// its output; using either one produces the same result.
+// value: Maximum number of elements in the Staging Area. If > 0, inserts
+// on the container will block when the capacity is reached.
+// If not specified, defaults to 0
 //
-// Returns The gradients:
-// `gradients * (features > 0) * (features < 6)`.
-func Relu6Grad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Relu6Grad",
-		Input: []tf.Input{
-			gradients, features,
-		},
+// REQUIRES: value >= 0
+func MapStageCapacity(value int64) MapStageAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Computes natural logarithm of (1 + x) element-wise.
+// MapStageMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
 //
-// I.e., \\(y = \log_e (1 + x)\\).
-func Log1p(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Log1p",
-		Input: []tf.Input{
-			x,
-		},
+// REQUIRES: value >= 0
+func MapStageMemoryLimit(value int64) MapStageAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Creates a dataset that emits each dim-0 slice of `components` once.
-func TensorSliceDataset(scope *Scope, components []tf.Output, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "TensorSliceDataset",
-		Input: []tf.Input{
-			tf.OutputList(components),
-		},
-		Attrs: attrs,
+// MapStageContainer sets the optional container attribute to value.
+//
+// value: If non-empty, this queue is placed in the given container. Otherwise,
+// a default container is used.
+// If not specified, defaults to ""
+func MapStageContainer(value string) MapStageAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Computes tan of x element-wise.
-func Tan(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Tan",
-		Input: []tf.Input{
-			x,
-		},
+// MapStageSharedName sets the optional shared_name attribute to value.
+//
+// value: It is necessary to match this name to the matching Unstage Op.
+// If not specified, defaults to ""
+func MapStageSharedName(value string) MapStageAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Computes hyperbolic cosine of x element-wise.
-func Cosh(scope *Scope, x tf.Output) (y tf.Output) {
+// Stage (key, values) in the underlying container which behaves like a hashtable.
+//
+// Arguments:
+//	key: int64
+//
+//	values: a list of tensors
+// dtypes A list of data types that inserted values should adhere to.
+//
+//
+// Returns the created operation.
+func MapStage(scope *Scope, key tf.Output, indices tf.Output, values []tf.Output, dtypes []tf.DataType, optional ...MapStageAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Cosh",
+		Type: "MapStage",
 		Input: []tf.Input{
-			x,
+			key, indices, tf.OutputList(values),
 		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
 }
 
-// MapClearAttr is an optional argument to MapClear.
-type MapClearAttr func(optionalAttr)
+// MapPeekAttr is an optional argument to MapPeek.
+type MapPeekAttr func(optionalAttr)
 
-// MapClearCapacity sets the optional capacity attribute to value.
+// MapPeekCapacity sets the optional capacity attribute to value.
 // If not specified, defaults to 0
 //
 // REQUIRES: value >= 0
-func MapClearCapacity(value int64) MapClearAttr {
+func MapPeekCapacity(value int64) MapPeekAttr {
 	return func(m optionalAttr) {
 		m["capacity"] = value
 	}
 }
 
-// MapClearMemoryLimit sets the optional memory_limit attribute to value.
+// MapPeekMemoryLimit sets the optional memory_limit attribute to value.
 // If not specified, defaults to 0
 //
 // REQUIRES: value >= 0
-func MapClearMemoryLimit(value int64) MapClearAttr {
+func MapPeekMemoryLimit(value int64) MapPeekAttr {
 	return func(m optionalAttr) {
 		m["memory_limit"] = value
 	}
 }
 
-// MapClearContainer sets the optional container attribute to value.
+// MapPeekContainer sets the optional container attribute to value.
 // If not specified, defaults to ""
-func MapClearContainer(value string) MapClearAttr {
+func MapPeekContainer(value string) MapPeekAttr {
 	return func(m optionalAttr) {
 		m["container"] = value
 	}
 }
 
-// MapClearSharedName sets the optional shared_name attribute to value.
+// MapPeekSharedName sets the optional shared_name attribute to value.
 // If not specified, defaults to ""
-func MapClearSharedName(value string) MapClearAttr {
+func MapPeekSharedName(value string) MapPeekAttr {
 	return func(m optionalAttr) {
 		m["shared_name"] = value
 	}
 }
 
-// Op removes all elements in the underlying container.
+// Op peeks at the values at the specified key.  If the
 //
-// Returns the created operation.
-func MapClear(scope *Scope, dtypes []tf.DataType, optional ...MapClearAttr) (o *tf.Operation) {
+// underlying container does not contain this key
+// this op will block until it does.
+func MapPeek(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...MapPeekAttr) (values []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -23972,639 +24208,625 @@ func MapClear(scope *Scope, dtypes []tf.DataType, optional ...MapClearAttr) (o *
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "MapClear",
-
+		Type: "MapPeek",
+		Input: []tf.Input{
+			key, indices,
+		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
-}
-
-// TensorArrayV2Attr is an optional argument to TensorArrayV2.
-type TensorArrayV2Attr func(optionalAttr)
-
-// TensorArrayV2ElementShape sets the optional element_shape attribute to value.
-// If not specified, defaults to <unknown_rank:true >
-func TensorArrayV2ElementShape(value tf.Shape) TensorArrayV2Attr {
-	return func(m optionalAttr) {
-		m["element_shape"] = value
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
 	}
-}
-
-// TensorArrayV2DynamicSize sets the optional dynamic_size attribute to value.
-// If not specified, defaults to false
-func TensorArrayV2DynamicSize(value bool) TensorArrayV2Attr {
-	return func(m optionalAttr) {
-		m["dynamic_size"] = value
+	var idx int
+	var err error
+	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
+		scope.UpdateErr("MapPeek", err)
+		return
 	}
+	return values
 }
 
-// TensorArrayV2ClearAfterRead sets the optional clear_after_read attribute to value.
-// If not specified, defaults to true
-func TensorArrayV2ClearAfterRead(value bool) TensorArrayV2Attr {
-	return func(m optionalAttr) {
-		m["clear_after_read"] = value
-	}
-}
+// QueueCloseV2Attr is an optional argument to QueueCloseV2.
+type QueueCloseV2Attr func(optionalAttr)
 
-// TensorArrayV2TensorArrayName sets the optional tensor_array_name attribute to value.
-// If not specified, defaults to ""
-func TensorArrayV2TensorArrayName(value string) TensorArrayV2Attr {
+// QueueCloseV2CancelPendingEnqueues sets the optional cancel_pending_enqueues attribute to value.
+//
+// value: If true, all pending enqueue requests that are
+// blocked on the given queue will be canceled.
+// If not specified, defaults to false
+func QueueCloseV2CancelPendingEnqueues(value bool) QueueCloseV2Attr {
 	return func(m optionalAttr) {
-		m["tensor_array_name"] = value
+		m["cancel_pending_enqueues"] = value
 	}
 }
 
-// Deprecated. Use TensorArrayV3
-func TensorArrayV2(scope *Scope, size tf.Output, dtype tf.DataType, optional ...TensorArrayV2Attr) (handle tf.Output) {
+// Closes the given queue.
+//
+// This operation signals that no more elements will be enqueued in the
+// given queue. Subsequent Enqueue(Many) operations will fail.
+// Subsequent Dequeue(Many) operations will continue to succeed if
+// sufficient elements remain in the queue. Subsequent Dequeue(Many)
+// operations that would block will fail immediately.
+//
+// Arguments:
+//	handle: The handle to a queue.
+//
+// Returns the created operation.
+func QueueCloseV2(scope *Scope, handle tf.Output, optional ...QueueCloseV2Attr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "TensorArrayV2",
+		Type: "QueueCloseV2",
 		Input: []tf.Input{
-			size,
+			handle,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` string `Tensor`.
+// Forwards the value of an available tensor from `inputs` to `output`.
 //
-// The `SparseTensor` must have rank `R` greater than 1, and the first dimension
-// is treated as the minibatch dimension.  Elements of the `SparseTensor`
-// must be sorted in increasing order of this first dimension.  The serialized
-// `SparseTensor` objects going into each row of `serialized_sparse` will have
-// rank `R-1`.
+// `Merge` waits for at least one of the tensors in `inputs` to become available.
+// It is usually combined with `Switch` to implement branching.
 //
-// The minibatch size `N` is extracted from `sparse_shape[0]`.
+// `Merge` forwards the first tensor to become available to `output`, and sets
+// `value_index` to its index in `inputs`.
 //
 // Arguments:
-//	sparse_indices: 2-D.  The `indices` of the minibatch `SparseTensor`.
-//	sparse_values: 1-D.  The `values` of the minibatch `SparseTensor`.
-//	sparse_shape: 1-D.  The `shape` of the minibatch `SparseTensor`.
-func SerializeManySparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) (serialized_sparse tf.Output) {
+//	inputs: The input tensors, exactly one of which will become available.
+//
+// Returns Will be set to the available input tensor.The index of the chosen input tensor in `inputs`.
+func Merge(scope *Scope, inputs []tf.Output) (output tf.Output, value_index tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "SerializeManySparse",
+		Type: "Merge",
 		Input: []tf.Input{
-			sparse_indices, sparse_values, sparse_shape,
+			tf.OutputList(inputs),
 		},
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1)
 }
 
-// Computes inverse hyperbolic cosine of x element-wise.
-func Acosh(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Acosh",
-		Input: []tf.Input{
-			x,
-		},
+// MapUnstageAttr is an optional argument to MapUnstage.
+type MapUnstageAttr func(optionalAttr)
+
+// MapUnstageCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func MapUnstageCapacity(value int64) MapUnstageAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Computes the reverse mode backpropagated gradient of the Cholesky algorithm.
-//
-// For an explanation see "Differentiation of the Cholesky algorithm" by
-// Iain Murray http://arxiv.org/abs/1602.07527.
+// MapUnstageMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
 //
-// Arguments:
-//	l: Output of batch Cholesky algorithm l = cholesky(A). Shape is `[..., M, M]`.
-// Algorithm depends only on lower triangular part of the innermost matrices of
-// this tensor.
-//	grad: df/dl where f is some scalar function. Shape is `[..., M, M]`.
-// Algorithm depends only on lower triangular part of the innermost matrices of
-// this tensor.
+// REQUIRES: value >= 0
+func MapUnstageMemoryLimit(value int64) MapUnstageAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// MapUnstageContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func MapUnstageContainer(value string) MapUnstageAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// MapUnstageSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func MapUnstageSharedName(value string) MapUnstageAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Op removes and returns the values associated with the key
 //
-// Returns Symmetrized version of df/dA . Shape is `[..., M, M]`
-func CholeskyGrad(scope *Scope, l tf.Output, grad tf.Output) (output tf.Output) {
+// from the underlying container.   If the underlying container
+// does not contain this key, the op will block until it does.
+func MapUnstage(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...MapUnstageAttr) (values []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "CholeskyGrad",
+		Type: "MapUnstage",
 		Input: []tf.Input{
-			l, grad,
+			key, indices,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes inverse hyperbolic tangent of x element-wise.
-func Atanh(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	opspec := tf.OpSpec{
-		Type: "Atanh",
-		Input: []tf.Input{
-			x,
-		},
+	var idx int
+	var err error
+	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
+		scope.UpdateErr("MapUnstage", err)
+		return
+	}
+	return values
+}
+
+// MapSizeAttr is an optional argument to MapSize.
+type MapSizeAttr func(optionalAttr)
+
+// MapSizeCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func MapSizeCapacity(value int64) MapSizeAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// MapSizeMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func MapSizeMemoryLimit(value int64) MapSizeAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// MapSizeContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func MapSizeContainer(value string) MapSizeAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// MapSizeSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func MapSizeSharedName(value string) MapSizeAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Computes the log of the absolute value of `Gamma(x)` element-wise.
-func Lgamma(scope *Scope, x tf.Output) (y tf.Output) {
+// Op returns the number of elements in the underlying container.
+func MapSize(scope *Scope, dtypes []tf.DataType, optional ...MapSizeAttr) (size tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Lgamma",
-		Input: []tf.Input{
-			x,
-		},
+		Type: "MapSize",
+
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns x / y element-wise for real types.
-//
-// If `x` and `y` are reals, this will return the floating-point division.
+// MapIncompleteSizeAttr is an optional argument to MapIncompleteSize.
+type MapIncompleteSizeAttr func(optionalAttr)
+
+// MapIncompleteSizeCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
 //
-// *NOTE*: `Div` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func RealDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "RealDiv",
-		Input: []tf.Input{
-			x, y,
-		},
+// REQUIRES: value >= 0
+func MapIncompleteSizeCapacity(value int64) MapIncompleteSizeAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Returns the number of work units this Reader has finished processing.
+// MapIncompleteSizeMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
 //
-// Arguments:
-//	reader_handle: Handle to a Reader.
-func ReaderNumWorkUnitsCompletedV2(scope *Scope, reader_handle tf.Output) (units_completed tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ReaderNumWorkUnitsCompletedV2",
-		Input: []tf.Input{
-			reader_handle,
-		},
+// REQUIRES: value >= 0
+func MapIncompleteSizeMemoryLimit(value int64) MapIncompleteSizeAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Conv2DBackpropFilterAttr is an optional argument to Conv2DBackpropFilter.
-type Conv2DBackpropFilterAttr func(optionalAttr)
-
-// Conv2DBackpropFilterUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value.
-// If not specified, defaults to true
-func Conv2DBackpropFilterUseCudnnOnGpu(value bool) Conv2DBackpropFilterAttr {
+// MapIncompleteSizeContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func MapIncompleteSizeContainer(value string) MapIncompleteSizeAttr {
 	return func(m optionalAttr) {
-		m["use_cudnn_on_gpu"] = value
+		m["container"] = value
 	}
 }
 
-// Conv2DBackpropFilterDataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func Conv2DBackpropFilterDataFormat(value string) Conv2DBackpropFilterAttr {
+// MapIncompleteSizeSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func MapIncompleteSizeSharedName(value string) MapIncompleteSizeAttr {
 	return func(m optionalAttr) {
-		m["data_format"] = value
+		m["shared_name"] = value
 	}
 }
 
-// Computes the gradients of convolution with respect to the filter.
-//
-// Arguments:
-//	input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
-//	filter_sizes: An integer vector representing the tensor shape of `filter`,
-// where `filter` is a 4-D
-// `[filter_height, filter_width, in_channels, out_channels]` tensor.
-//	out_backprop: 4-D with shape `[batch, out_height, out_width, out_channels]`.
-// Gradients w.r.t. the output of the convolution.
-//	strides: The stride of the sliding window for each dimension of the input
-// of the convolution. Must be in the same order as the dimension specified with
-// format.
-//	padding: The type of padding algorithm to use.
-//
-// Returns 4-D with shape
-// `[filter_height, filter_width, in_channels, out_channels]`.  Gradient w.r.t.
-// the `filter` input of the convolution.
-func Conv2DBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv2DBackpropFilterAttr) (output tf.Output) {
+// Op returns the number of incomplete elements in the underlying container.
+func MapIncompleteSize(scope *Scope, dtypes []tf.DataType, optional ...MapIncompleteSizeAttr) (size tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	attrs := map[string]interface{}{"dtypes": dtypes}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Conv2DBackpropFilter",
-		Input: []tf.Input{
-			input, filter_sizes, out_backprop,
-		},
+		Type: "MapIncompleteSize",
+
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// MinAttr is an optional argument to Min.
-type MinAttr func(optionalAttr)
+// OrderedMapUnstageAttr is an optional argument to OrderedMapUnstage.
+type OrderedMapUnstageAttr func(optionalAttr)
 
-// MinKeepDims sets the optional keep_dims attribute to value.
+// OrderedMapUnstageCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
 //
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func MinKeepDims(value bool) MinAttr {
+// REQUIRES: value >= 0
+func OrderedMapUnstageCapacity(value int64) OrderedMapUnstageAttr {
 	return func(m optionalAttr) {
-		m["keep_dims"] = value
+		m["capacity"] = value
 	}
 }
 
-// Computes the minimum of elements across dimensions of a tensor.
-//
-// Reduces `input` along the dimensions given in `reduction_indices`. Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `reduction_indices`. If `keep_dims` is true, the reduced dimensions are
-// retained with length 1.
-//
-// Arguments:
-//	input: The tensor to reduce.
-//	reduction_indices: The dimensions to reduce. Must be in the range
-// `[-rank(input), rank(input))`.
+// OrderedMapUnstageMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
 //
-// Returns The reduced tensor.
-func Min(scope *Scope, input tf.Output, reduction_indices tf.Output, optional ...MinAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Min",
-		Input: []tf.Input{
-			input, reduction_indices,
-		},
-		Attrs: attrs,
+// REQUIRES: value >= 0
+func OrderedMapUnstageMemoryLimit(value int64) OrderedMapUnstageAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Computes Psi, the derivative of Lgamma (the log of the absolute value of
-//
-// `Gamma(x)`), element-wise.
-func Digamma(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
+// OrderedMapUnstageContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func OrderedMapUnstageContainer(value string) OrderedMapUnstageAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "Digamma",
-		Input: []tf.Input{
-			x,
-		},
+}
+
+// OrderedMapUnstageSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func OrderedMapUnstageSharedName(value string) OrderedMapUnstageAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Returns element-wise largest integer not greater than x.
-func Floor(scope *Scope, x tf.Output) (y tf.Output) {
+// Op removes and returns the values associated with the key
+//
+// from the underlying container.   If the underlying container
+// does not contain this key, the op will block until it does.
+func OrderedMapUnstage(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapUnstageAttr) (values []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Floor",
+		Type: "OrderedMapUnstage",
 		Input: []tf.Input{
-			x,
+			key, indices,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the Gauss error function of `x` element-wise.
-func Erf(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	opspec := tf.OpSpec{
-		Type: "Erf",
-		Input: []tf.Input{
-			x,
-		},
+	var idx int
+	var err error
+	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
+		scope.UpdateErr("OrderedMapUnstage", err)
+		return
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return values
 }
 
-// Gather slices from `params` axis `axis` according to `indices`.
-//
-// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
-// Produces an output tensor with shape `params.shape[:axis] + indices.shape +
-// params.shape[axis + 1:]` where:
-//
-// ```python
-//     # Scalar indices (output is rank(params) - 1).
-//     output[a_0, ..., a_n, b_0, ..., b_n] =
-//       params[a_0, ..., a_n, indices, b_0, ..., b_n]
-//
-//     # Vector indices (output is rank(params)).
-//     output[a_0, ..., a_n, i, b_0, ..., b_n] =
-//       params[a_0, ..., a_n, indices[i], b_0, ..., b_n]
-//
-//     # Higher rank indices (output is rank(params) + rank(indices) - 1).
-//     output[a_0, ..., a_n, i, ..., j, b_0, ... b_n] =
-//       params[a_0, ..., a_n, indices[i, ..., j], b_0, ..., b_n]
-// ```
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/Gather.png" alt>
-// </div>
+// OrderedMapSizeAttr is an optional argument to OrderedMapSize.
+type OrderedMapSizeAttr func(optionalAttr)
+
+// OrderedMapSizeCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
 //
-// Arguments:
-//	params: The tensor from which to gather values. Must be at least rank
-// `axis + 1`.
-//	indices: Index tensor. Must be in range `[0, params.shape[axis])`.
-//	axis: The axis in `params` to gather `indices` from. Defaults to the first
-// dimension. Supports negative indexes.
+// REQUIRES: value >= 0
+func OrderedMapSizeCapacity(value int64) OrderedMapSizeAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// OrderedMapSizeMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
 //
-// Returns Values from `params` gathered from indices given by `indices`, with
-// shape `params.shape[:axis] + indices.shape + params.shape[axis + 1:]`.
-func GatherV2(scope *Scope, params tf.Output, indices tf.Output, axis tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
+// REQUIRES: value >= 0
+func OrderedMapSizeMemoryLimit(value int64) OrderedMapSizeAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "GatherV2",
-		Input: []tf.Input{
-			params, indices, axis,
-		},
+}
+
+// OrderedMapSizeContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func OrderedMapSizeContainer(value string) OrderedMapSizeAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Computes the complementary error function of `x` element-wise.
-func Erfc(scope *Scope, x tf.Output) (y tf.Output) {
+// OrderedMapSizeSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func OrderedMapSizeSharedName(value string) OrderedMapSizeAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Op returns the number of elements in the underlying container.
+func OrderedMapSize(scope *Scope, dtypes []tf.DataType, optional ...OrderedMapSizeAttr) (size tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Erfc",
-		Input: []tf.Input{
-			x,
-		},
+		Type: "OrderedMapSize",
+
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes sin of x element-wise.
-func Sin(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
+// CTCLossAttr is an optional argument to CTCLoss.
+type CTCLossAttr func(optionalAttr)
+
+// CTCLossPreprocessCollapseRepeated sets the optional preprocess_collapse_repeated attribute to value.
+//
+// value: Scalar, if true then repeated labels are
+// collapsed prior to the CTC calculation.
+// If not specified, defaults to false
+func CTCLossPreprocessCollapseRepeated(value bool) CTCLossAttr {
+	return func(m optionalAttr) {
+		m["preprocess_collapse_repeated"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "Sin",
-		Input: []tf.Input{
-			x,
-		},
+}
+
+// CTCLossCtcMergeRepeated sets the optional ctc_merge_repeated attribute to value.
+//
+// value: Scalar.  If set to false, *during* CTC calculation
+// repeated non-blank labels will not be merged and are interpreted as
+// individual labels.  This is a simplified version of CTC.
+// If not specified, defaults to true
+func CTCLossCtcMergeRepeated(value bool) CTCLossAttr {
+	return func(m optionalAttr) {
+		m["ctc_merge_repeated"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Computes the determinant of one or more square matrices.
+// CTCLossIgnoreLongerOutputsThanInputs sets the optional ignore_longer_outputs_than_inputs attribute to value.
 //
-// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-// form square matrices. The output is a tensor containing the determinants
-// for all input submatrices `[..., :, :]`.
+// value: Scalar. If set to true, during CTC
+// calculation, items that have longer output sequences than input sequences
+// are skipped: they don't contribute to the loss term and have zero-gradient.
+// If not specified, defaults to false
+func CTCLossIgnoreLongerOutputsThanInputs(value bool) CTCLossAttr {
+	return func(m optionalAttr) {
+		m["ignore_longer_outputs_than_inputs"] = value
+	}
+}
+
+// Calculates the CTC Loss (log probability) for each batch entry.  Also calculates
+//
+// the gradient.  This class performs the softmax operation for you, so inputs
+// should be e.g. linear projections of outputs by an LSTM.
 //
 // Arguments:
-//	input: Shape is `[..., M, M]`.
+//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
+//	labels_indices: The indices of a `SparseTensor<int32, 2>`.
+// `labels_indices(i, :) == [b, t]` means `labels_values(i)` stores the id for
+// `(batch b, time t)`.
+//	labels_values: The values (labels) associated with the given batch and time.
+//	sequence_length: A vector containing sequence lengths (batch).
 //
-// Returns Shape is `[...]`.
-func MatrixDeterminant(scope *Scope, input tf.Output) (output tf.Output) {
+// Returns A vector (batch) containing log-probabilities.The gradient of `loss`.  3-D, shape:
+// `(max_time x batch_size x num_classes)`.
+func CTCLoss(scope *Scope, inputs tf.Output, labels_indices tf.Output, labels_values tf.Output, sequence_length tf.Output, optional ...CTCLossAttr) (loss tf.Output, gradient tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "MatrixDeterminant",
+		Type: "CTCLoss",
 		Input: []tf.Input{
-			input,
+			inputs, labels_indices, labels_values, sequence_length,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1)
 }
 
-// Computes cos of x element-wise.
-func Cos(scope *Scope, x tf.Output) (y tf.Output) {
+// CTCGreedyDecoderAttr is an optional argument to CTCGreedyDecoder.
+type CTCGreedyDecoderAttr func(optionalAttr)
+
+// CTCGreedyDecoderMergeRepeated sets the optional merge_repeated attribute to value.
+//
+// value: If True, merge repeated classes in output.
+// If not specified, defaults to false
+func CTCGreedyDecoderMergeRepeated(value bool) CTCGreedyDecoderAttr {
+	return func(m optionalAttr) {
+		m["merge_repeated"] = value
+	}
+}
+
+// Performs greedy decoding on the logits given in inputs.
+//
+// A note about the attribute merge_repeated: if enabled, when
+// consecutive logits' maximum indices are the same, only the first of
+// these is emitted.  Labeling the blank '*', the sequence "A B B * B B"
+// becomes "A B B" if merge_repeated = True and "A B B B B" if
+// merge_repeated = False.
+//
+// Regardless of the value of merge_repeated, if the maximum index of a given
+// time and batch corresponds to the blank, index `(num_classes - 1)`, no new
+// element is emitted.
+//
+// Arguments:
+//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
+//	sequence_length: A vector containing sequence lengths, size `(batch_size)`.
+//
+// Returns Indices matrix, size `(total_decoded_outputs x 2)`,
+// of a `SparseTensor<int64, 2>`.  The rows store: [batch, time].Values vector, size: `(total_decoded_outputs)`,
+// of a `SparseTensor<int64, 2>`.  The vector stores the decoded classes.Shape vector, size `(2)`, of the decoded SparseTensor.
+// Values are: `[batch_size, max_decoded_length]`.Matrix, size `(batch_size x 1)`, containing sequence
+// log-probabilities.
+func CTCGreedyDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, optional ...CTCGreedyDecoderAttr) (decoded_indices tf.Output, decoded_values tf.Output, decoded_shape tf.Output, log_probability tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Cos",
+		Type: "CTCGreedyDecoder",
 		Input: []tf.Input{
-			x,
+			inputs, sequence_length,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
 }
 
-// BatchToSpace for 4-D tensors of type T.
-//
-// This is a legacy version of the more general BatchToSpaceND.
-//
-// Rearranges (permutes) data from batch into blocks of spatial data, followed by
-// cropping. This is the reverse transformation of SpaceToBatch. More specifically,
-// this op outputs a copy of the input tensor where values from the `batch`
-// dimension are moved in spatial blocks to the `height` and `width` dimensions,
-// followed by cropping along the `height` and `width` dimensions.
-//
-// Arguments:
-//	input: 4-D tensor with shape
-// `[batch*block_size*block_size, height_pad/block_size, width_pad/block_size,
-//   depth]`. Note that the batch size of the input tensor must be divisible by
-// `block_size * block_size`.
-//	crops: 2-D tensor of non-negative integers with shape `[2, 2]`. It specifies
-// how many elements to crop from the intermediate result across the spatial
-// dimensions as follows:
-//
-//     crops = [[crop_top, crop_bottom], [crop_left, crop_right]]
-//
-//
-// Returns 4-D with shape `[batch, height, width, depth]`, where:
-//
-//       height = height_pad - crop_top - crop_bottom
-//       width = width_pad - crop_left - crop_right
-//
-// The attr `block_size` must be greater than one. It indicates the block size.
-//
-// Some examples:
-//
-// (1) For the following input of shape `[4, 1, 1, 1]` and block_size of 2:
-//
-// ```
-// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
-// ```
-//
-// The output tensor has shape `[1, 2, 2, 1]` and value:
-//
-// ```
-// x = [[[[1], [2]], [[3], [4]]]]
-// ```
-//
-// (2) For the following input of shape `[4, 1, 1, 3]` and block_size of 2:
-//
-// ```
-// [[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]
-// ```
-//
-// The output tensor has shape `[1, 2, 2, 3]` and value:
-//
-// ```
-// x = [[[[1, 2, 3], [4, 5, 6]],
-//       [[7, 8, 9], [10, 11, 12]]]]
-// ```
-//
-// (3) For the following input of shape `[4, 2, 2, 1]` and block_size of 2:
-//
-// ```
-// x = [[[[1], [3]], [[9], [11]]],
-//      [[[2], [4]], [[10], [12]]],
-//      [[[5], [7]], [[13], [15]]],
-//      [[[6], [8]], [[14], [16]]]]
-// ```
-//
-// The output tensor has shape `[1, 4, 4, 1]` and value:
-//
-// ```
-// x = [[[1],   [2],  [3],  [4]],
-//      [[5],   [6],  [7],  [8]],
-//      [[9],  [10], [11],  [12]],
-//      [[13], [14], [15],  [16]]]
-// ```
+// Forwards `data` to the output port determined by `pred`.
 //
-// (4) For the following input of shape `[8, 1, 2, 1]` and block_size of 2:
+// If `pred` is true, the `data` input is forwarded to `output_true`. Otherwise,
+// the data goes to `output_false`.
 //
-// ```
-// x = [[[[1], [3]]], [[[9], [11]]], [[[2], [4]]], [[[10], [12]]],
-//      [[[5], [7]]], [[[13], [15]]], [[[6], [8]]], [[[14], [16]]]]
-// ```
+// See also `RefSwitch` and `Merge`.
 //
-// The output tensor has shape `[2, 2, 4, 1]` and value:
+// Arguments:
+//	data: The tensor to be forwarded to the appropriate output.
+//	pred: A scalar that specifies which output port will receive data.
 //
-// ```
-// x = [[[[1], [3]], [[5], [7]]],
-//      [[[2], [4]], [[10], [12]]],
-//      [[[5], [7]], [[13], [15]]],
-//      [[[6], [8]], [[14], [16]]]]
-// ```
-func BatchToSpace(scope *Scope, input tf.Output, crops tf.Output, block_size int64) (output tf.Output) {
+// Returns If `pred` is false, data will be forwarded to this output.If `pred` is true, data will be forwarded to this output.
+func Switch(scope *Scope, data tf.Output, pred tf.Output) (output_false tf.Output, output_true tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"block_size": block_size}
 	opspec := tf.OpSpec{
-		Type: "BatchToSpace",
+		Type: "Switch",
 		Input: []tf.Input{
-			input, crops,
+			data, pred,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Add all input tensors element wise.
+//
+// Arguments:
+//	inputs: Must all be the same size and shape.
+func AddN(scope *Scope, inputs []tf.Output) (sum tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "AddN",
+		Input: []tf.Input{
+			tf.OutputList(inputs),
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// SparseToDenseAttr is an optional argument to SparseToDense.
-type SparseToDenseAttr func(optionalAttr)
+// EnterAttr is an optional argument to Enter.
+type EnterAttr func(optionalAttr)
 
-// SparseToDenseValidateIndices sets the optional validate_indices attribute to value.
+// EnterIsConstant sets the optional is_constant attribute to value.
 //
-// value: If true, indices are checked to make sure they are sorted in
-// lexicographic order and that there are no repeats.
-// If not specified, defaults to true
-func SparseToDenseValidateIndices(value bool) SparseToDenseAttr {
+// value: If true, the output is constant within the child frame.
+// If not specified, defaults to false
+func EnterIsConstant(value bool) EnterAttr {
 	return func(m optionalAttr) {
-		m["validate_indices"] = value
+		m["is_constant"] = value
 	}
 }
 
-// Converts a sparse representation into a dense tensor.
-//
-// Builds an array `dense` with shape `output_shape` such that
-//
-// ```
-// # If sparse_indices is scalar
-// dense[i] = (i == sparse_indices ? sparse_values : default_value)
-//
-// # If sparse_indices is a vector, then for each i
-// dense[sparse_indices[i]] = sparse_values[i]
-//
-// # If sparse_indices is an n by d matrix, then for each i in [0, n)
-// dense[sparse_indices[i][0], ..., sparse_indices[i][d-1]] = sparse_values[i]
-// ```
+// EnterParallelIterations sets the optional parallel_iterations attribute to value.
 //
-// All other values in `dense` are set to `default_value`.  If `sparse_values` is a
-// scalar, all sparse indices are set to this single value.
+// value: The number of iterations allowed to run in parallel.
+// If not specified, defaults to 10
+func EnterParallelIterations(value int64) EnterAttr {
+	return func(m optionalAttr) {
+		m["parallel_iterations"] = value
+	}
+}
+
+// Creates or finds a child frame, and makes `data` available to the child frame.
 //
-// Indices should be sorted in lexicographic order, and indices must not
-// contain any repeats. If `validate_indices` is true, these properties
-// are checked during execution.
+// This op is used together with `Exit` to create loops in the graph.
+// The unique `frame_name` is used by the `Executor` to identify frames. If
+// `is_constant` is true, `output` is a constant in the child frame; otherwise
+// it may be changed in the child frame. At most `parallel_iterations` iterations
+// are run in parallel in the child frame.
 //
 // Arguments:
-//	sparse_indices: 0-D, 1-D, or 2-D.  `sparse_indices[i]` contains the complete
-// index where `sparse_values[i]` will be placed.
-//	output_shape: 1-D.  Shape of the dense output tensor.
-//	sparse_values: 1-D.  Values corresponding to each row of `sparse_indices`,
-// or a scalar value to be used for all sparse indices.
-//	default_value: Scalar value to set for indices not specified in
-// `sparse_indices`.
+//	data: The tensor to be made available to the child frame.
+//	frame_name: The name of the child frame.
 //
-// Returns Dense output tensor of shape `output_shape`.
-func SparseToDense(scope *Scope, sparse_indices tf.Output, output_shape tf.Output, sparse_values tf.Output, default_value tf.Output, optional ...SparseToDenseAttr) (dense tf.Output) {
+// Returns The same tensor as `data`.
+func Enter(scope *Scope, data tf.Output, frame_name string, optional ...EnterAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"frame_name": frame_name}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "SparseToDense",
+		Type: "Enter",
 		Input: []tf.Input{
-			sparse_indices, output_shape, sparse_values, default_value,
+			data,
 		},
 		Attrs: attrs,
 	}
@@ -24612,136 +24834,271 @@ func SparseToDense(scope *Scope, sparse_indices tf.Output, output_shape tf.Outpu
 	return op.Output(0)
 }
 
-// NthElementAttr is an optional argument to NthElement.
-type NthElementAttr func(optionalAttr)
-
-// NthElementReverse sets the optional reverse attribute to value.
+// Produce a string tensor that encodes the state of a Reader.
 //
-// value: When set to True, find the nth-largest value in the vector and vice
-// versa.
-// If not specified, defaults to false
-func NthElementReverse(value bool) NthElementAttr {
-	return func(m optionalAttr) {
-		m["reverse"] = value
+// Not all Readers support being serialized, so this can produce an
+// Unimplemented error.
+//
+// Arguments:
+//	reader_handle: Handle to a Reader.
+func ReaderSerializeStateV2(scope *Scope, reader_handle tf.Output) (state tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ReaderSerializeStateV2",
+		Input: []tf.Input{
+			reader_handle,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Finds values of the `n`-th order statistic for the last dimension.
-//
-// If the input is a vector (rank-1), finds the entries which is the nth-smallest
-// value in the vector and outputs their values as scalar tensor.
-//
-// For matrices (resp. higher rank input), computes the entries which is the
-// nth-smallest value in each row (resp. vector along the last dimension). Thus,
+// Exits the current frame to its parent frame.
 //
-//     values.shape = input.shape[:-1]
+// Exit makes its input `data` available to the parent frame.
 //
 // Arguments:
-//	input: 1-D or higher with last dimension at least `n+1`.
-//	n: 0-D. Position of sorted vector to select along the last dimension (along
-// each row for matrices). Valid range of n is `[0, input.shape[:-1])`
+//	data: The tensor to be made available to the parent frame.
 //
-// Returns The `n`-th order statistic along each last dimensional slice.
-func NthElement(scope *Scope, input tf.Output, n tf.Output, optional ...NthElementAttr) (values tf.Output) {
+// Returns The same tensor as `data`.
+func Exit(scope *Scope, data tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "NthElement",
+		Type: "Exit",
 		Input: []tf.Input{
-			input, n,
+			data,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes asin of x element-wise.
-func Asin(scope *Scope, x tf.Output) (y tf.Output) {
+// Returns a copy of the input tensor.
+func Snapshot(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Asin",
+		Type: "Snapshot",
 		Input: []tf.Input{
-			x,
+			input,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Convert the quantized 'input' tensor into a lower-precision 'output', using the
+// Scatter `updates` into a new (initially zero) tensor according to `indices`.
 //
-// output range specified with 'requested_output_min' and 'requested_output_max'.
+// Creates a new tensor by applying sparse `updates` to individual
+// values or slices within a zero tensor of the given `shape` according to
+// indices.  This operator is the inverse of the @{tf.gather_nd} operator which
+// extracts values or slices from a given tensor.
 //
-// [input_min, input_max] are scalar floats that specify the range for the float
-// interpretation of the 'input' data. For example, if input_min is -1.0f and
-// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
-// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
+// **WARNING**: The order in which updates are applied is nondeterministic, so the
+// output will be nondeterministic if `indices` contains duplicates.
 //
-// Arguments:
+// `indices` is an integer tensor containing indices into a new tensor of shape
+// `shape`.  The last dimension of `indices` can be at most the rank of `shape`:
 //
-//	input_min: The float value that the minimum quantized input value represents.
-//	input_max: The float value that the maximum quantized input value represents.
-//	requested_output_min: The float value that the minimum quantized output value represents.
-//	requested_output_max: The float value that the maximum quantized output value represents.
-//	out_type: The type of the output. Should be a lower bit depth than Tinput.
+//     indices.shape[-1] <= shape.rank
 //
-// Returns The requested_output_min value is copied into this output.The requested_output_max value is copied into this output.
-func Requantize(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) {
+// The last dimension of `indices` corresponds to indices into elements
+// (if `indices.shape[-1] = shape.rank`) or slices
+// (if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of
+// `shape`.  `updates` is a tensor with shape
+//
+//     indices.shape[:-1] + shape[indices.shape[-1]:]
+//
+// The simplest form of scatter is to insert individual elements in a tensor by
+// index. For example, say we want to insert 4 scattered elements in a rank-1
+// tensor with 8 elements.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/ScatterNd1.png" alt>
+// </div>
+//
+// In Python, this scatter operation would look like this:
+//
+// ```python
+//     indices = tf.constant([[4], [3], [1], [7]])
+//     updates = tf.constant([9, 10, 11, 12])
+//     shape = tf.constant([8])
+//     scatter = tf.scatter_nd(indices, updates, shape)
+//     with tf.Session() as sess:
+//       print(sess.run(scatter))
+// ```
+//
+// The resulting tensor would look like this:
+//
+//     [0, 11, 0, 10, 9, 0, 0, 12]
+//
+// We can also, insert entire slices of a higher rank tensor all at once. For
+// example, if we wanted to insert two slices in the first dimension of a
+// rank-3 tensor with two matrices of new values.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/ScatterNd2.png" alt>
+// </div>
+//
+// In Python, this scatter operation would look like this:
+//
+// ```python
+//     indices = tf.constant([[0], [2]])
+//     updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6],
+//                             [7, 7, 7, 7], [8, 8, 8, 8]],
+//                            [[5, 5, 5, 5], [6, 6, 6, 6],
+//                             [7, 7, 7, 7], [8, 8, 8, 8]]])
+//     shape = tf.constant([4, 4, 4])
+//     scatter = tf.scatter_nd(indices, updates, shape)
+//     with tf.Session() as sess:
+//       print(sess.run(scatter))
+// ```
+//
+// The resulting tensor would look like this:
+//
+//     [[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],
+//      [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
+//      [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],
+//      [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]]
+//
+// Arguments:
+//	indices: Index tensor.
+//	updates: Updates to scatter into output.
+//	shape: 1-D. The shape of the resulting tensor.
+//
+// Returns A new tensor with the given shape and updates applied according
+// to the indices.
+func ScatterNd(scope *Scope, indices tf.Output, updates tf.Output, shape tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"out_type": out_type}
 	opspec := tf.OpSpec{
-		Type: "Requantize",
+		Type: "ScatterNd",
 		Input: []tf.Input{
-			input, input_min, input_max, requested_output_min, requested_output_max,
+			indices, updates, shape,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// ArgMinAttr is an optional argument to ArgMin.
-type ArgMinAttr func(optionalAttr)
+// SpaceToDepthAttr is an optional argument to SpaceToDepth.
+type SpaceToDepthAttr func(optionalAttr)
 
-// ArgMinOutputType sets the optional output_type attribute to value.
-// If not specified, defaults to DT_INT64
-func ArgMinOutputType(value tf.DataType) ArgMinAttr {
+// SpaceToDepthDataFormat sets the optional data_format attribute to value.
+// If not specified, defaults to "NHWC"
+func SpaceToDepthDataFormat(value string) SpaceToDepthAttr {
 	return func(m optionalAttr) {
-		m["output_type"] = value
+		m["data_format"] = value
 	}
 }
 
-// Returns the index with the smallest value across dimensions of a tensor.
+// SpaceToDepth for tensors of type T.
 //
-// Note that in case of ties the identity of the return value is not guaranteed.
+// Rearranges blocks of spatial data, into depth. More specifically,
+// this op outputs a copy of the input tensor where values from the `height`
+// and `width` dimensions are moved to the `depth` dimension.
+// The attr `block_size` indicates the input block size.
+//
+//   * Non-overlapping blocks of size `block_size x block size` are rearranged
+//     into depth at each location.
+//   * The depth of the output tensor is `block_size * block_size * input_depth`.
+//   * The Y, X coordinates within each block of the input become the high order
+//     component of the output channel index.
+//   * The input tensor's height and width must be divisible by block_size.
+//
+// The `data_format` attr specifies the layout of the input and output tensors
+// with the following options:
+//   "NHWC": `[ batch, height, width, channels ]`
+//   "NCHW": `[ batch, channels, height, width ]`
+//   "NCHW_VECT_C":
+//       `qint8 [ batch, channels / 4, height, width, 4 ]`
+//
+// It is useful to consider the operation as transforming a 6-D Tensor.
+// e.g. for data_format = NHWC,
+//      Each element in the input tensor can be specified via 6 coordinates,
+//      ordered by decreasing memory layout significance as:
+//      n,oY,bY,oX,bX,iC  (where n=batch index, oX, oY means X or Y coordinates
+//                         within the output image, bX, bY means coordinates
+//                         within the input block, iC means input channels).
+//      The output would be a transpose to the following layout:
+//      n,oY,oX,bY,bX,iC
+//
+// This operation is useful for resizing the activations between convolutions
+// (but keeping all data), e.g. instead of pooling. It is also useful for training
+// purely convolutional models.
+//
+// For example, given an input of shape `[1, 2, 2, 1]`, data_format = "NHWC" and
+// block_size = 2:
+//
+// ```
+// x = [[[[1], [2]],
+//       [[3], [4]]]]
+// ```
+//
+// This operation will output a tensor of shape `[1, 1, 1, 4]`:
+//
+// ```
+// [[[[1, 2, 3, 4]]]]
+// ```
+//
+// Here, the input has a batch of 1 and each batch element has shape `[2, 2, 1]`,
+// the corresponding output will have a single element (i.e. width and height are
+// both 1) and will have a depth of 4 channels (1 * block_size * block_size).
+// The output element shape is `[1, 1, 4]`.
+//
+// For an input tensor with larger depth, here of shape `[1, 2, 2, 3]`, e.g.
+//
+// ```
+// x = [[[[1, 2, 3], [4, 5, 6]],
+//       [[7, 8, 9], [10, 11, 12]]]]
+// ```
+//
+// This operation, for block_size of 2, will return the following tensor of shape
+// `[1, 1, 1, 12]`
+//
+// ```
+// [[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]]
+// ```
+//
+// Similarly, for the following input of shape `[1 4 4 1]`, and a block size of 2:
+//
+// ```
+// x = [[[[1],   [2],  [5],  [6]],
+//       [[3],   [4],  [7],  [8]],
+//       [[9],  [10], [13],  [14]],
+//       [[11], [12], [15],  [16]]]]
+// ```
+//
+// the operator will return the following tensor of shape `[1 2 2 4]`:
+//
+// ```
+// x = [[[[1, 2, 3, 4],
+//        [5, 6, 7, 8]],
+//       [[9, 10, 11, 12],
+//        [13, 14, 15, 16]]]]
+// ```
 //
 // Arguments:
 //
-//	dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`.
-// Describes which dimension of the input Tensor to reduce across. For vectors,
-// use dimension = 0.
-func ArgMin(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMinAttr) (output tf.Output) {
+//	block_size: The size of the spatial block.
+func SpaceToDepth(scope *Scope, input tf.Output, block_size int64, optional ...SpaceToDepthAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"block_size": block_size}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ArgMin",
+		Type: "SpaceToDepth",
 		Input: []tf.Input{
-			input, dimension,
+			input,
 		},
 		Attrs: attrs,
 	}
@@ -24749,36 +25106,36 @@ func ArgMin(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgM
 	return op.Output(0)
 }
 
-// ResourceSparseApplyProximalGradientDescentAttr is an optional argument to ResourceSparseApplyProximalGradientDescent.
-type ResourceSparseApplyProximalGradientDescentAttr func(optionalAttr)
+// AbortAttr is an optional argument to Abort.
+type AbortAttr func(optionalAttr)
 
-// ResourceSparseApplyProximalGradientDescentUseLocking sets the optional use_locking attribute to value.
+// AbortErrorMsg sets the optional error_msg attribute to value.
 //
-// value: If True, the subtraction will be protected by a lock;
-// otherwise the behavior is undefined, but may exhibit less contention.
+// value: A string which is the message associated with the exception.
+// If not specified, defaults to ""
+func AbortErrorMsg(value string) AbortAttr {
+	return func(m optionalAttr) {
+		m["error_msg"] = value
+	}
+}
+
+// AbortExitWithoutError sets the optional exit_without_error attribute to value.
 // If not specified, defaults to false
-func ResourceSparseApplyProximalGradientDescentUseLocking(value bool) ResourceSparseApplyProximalGradientDescentAttr {
+func AbortExitWithoutError(value bool) AbortAttr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["exit_without_error"] = value
 	}
 }
 
-// Sparse update '*var' as FOBOS algorithm with fixed learning rate.
+// Raise a exception to abort the process when called.
 //
-// That is for rows we have grad for, we update var as follows:
-// prox_v = var - alpha * grad
-// var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
+// If exit_without_error is true, the process will exit normally,
+// otherwise it will exit with a SIGABORT signal.
 //
-// Arguments:
-//	var_: Should be from a Variable().
-//	alpha: Scaling factor. Must be a scalar.
-//	l1: L1 regularization. Must be a scalar.
-//	l2: L2 regularization. Must be a scalar.
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
+// Returns nothing but an exception.
 //
 // Returns the created operation.
-func ResourceSparseApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyProximalGradientDescentAttr) (o *tf.Operation) {
+func Abort(scope *Scope, optional ...AbortAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -24787,741 +25144,942 @@ func ResourceSparseApplyProximalGradientDescent(scope *Scope, var_ tf.Output, al
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyProximalGradientDescent",
-		Input: []tf.Input{
-			var_, alpha, l1, l2, grad, indices,
-		},
+		Type: "Abort",
+
 		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
-// InitializeTableFromTextFileV2Attr is an optional argument to InitializeTableFromTextFileV2.
-type InitializeTableFromTextFileV2Attr func(optionalAttr)
+// UniformCandidateSamplerAttr is an optional argument to UniformCandidateSampler.
+type UniformCandidateSamplerAttr func(optionalAttr)
 
-// InitializeTableFromTextFileV2VocabSize sets the optional vocab_size attribute to value.
-//
-// value: Number of elements of the file, use -1 if unknown.
-// If not specified, defaults to -1
+// UniformCandidateSamplerSeed sets the optional seed attribute to value.
 //
-// REQUIRES: value >= -1
-func InitializeTableFromTextFileV2VocabSize(value int64) InitializeTableFromTextFileV2Attr {
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func UniformCandidateSamplerSeed(value int64) UniformCandidateSamplerAttr {
 	return func(m optionalAttr) {
-		m["vocab_size"] = value
+		m["seed"] = value
 	}
 }
 
-// InitializeTableFromTextFileV2Delimiter sets the optional delimiter attribute to value.
+// UniformCandidateSamplerSeed2 sets the optional seed2 attribute to value.
 //
-// value: Delimiter to separate fields in a line.
-// If not specified, defaults to "\t"
-func InitializeTableFromTextFileV2Delimiter(value string) InitializeTableFromTextFileV2Attr {
+// value: An second seed to avoid seed collision.
+// If not specified, defaults to 0
+func UniformCandidateSamplerSeed2(value int64) UniformCandidateSamplerAttr {
 	return func(m optionalAttr) {
-		m["delimiter"] = value
+		m["seed2"] = value
 	}
 }
 
-// Initializes a table from a text file.
+// Generates labels for candidate sampling with a uniform distribution.
 //
-// It inserts one key-value pair into the table for each line of the file.
-// The key and value is extracted from the whole line content, elements from the
-// split line based on `delimiter` or the line number (starting from zero).
-// Where to extract the key and value from a line is specified by `key_index` and
-// `value_index`.
+// See explanations of candidate sampling and the data formats at
+// go/candidate-sampling.
 //
-// - A value of -1 means use the line number(starting from zero), expects `int64`.
-// - A value of -2 means use the whole line content, expects `string`.
-// - A value >= 0 means use the index (starting at zero) of the split line based
-//   on `delimiter`.
+// For each batch, this op picks a single set of sampled candidate labels.
+//
+// The advantages of sampling candidates per-batch are simplicity and the
+// possibility of efficient dense matrix multiplication. The disadvantage is that
+// the sampled candidates must be chosen independently of the context and of the
+// true labels.
 //
 // Arguments:
-//	table_handle: Handle to a table which will be initialized.
-//	filename: Filename of a vocabulary text file.
-//	key_index: Column index in a line to get the table `key` values from.
-//	value_index: Column index that represents information of a line to get the table
-// `value` values from.
+//	true_classes: A batch_size * num_true matrix, in which each row contains the
+// IDs of the num_true target_classes in the corresponding original label.
+//	num_true: Number of true labels per context.
+//	num_sampled: Number of candidates to randomly sample.
+//	unique: If unique is true, we sample with rejection, so that all sampled
+// candidates in a batch are unique. This requires some approximation to
+// estimate the post-rejection sampling probabilities.
+//	range_max: The sampler will sample integers from the interval [0, range_max).
 //
-// Returns the created operation.
-func InitializeTableFromTextFileV2(scope *Scope, table_handle tf.Output, filename tf.Output, key_index int64, value_index int64, optional ...InitializeTableFromTextFileV2Attr) (o *tf.Operation) {
+// Returns A vector of length num_sampled, in which each element is
+// the ID of a sampled candidate.A batch_size * num_true matrix, representing
+// the number of times each candidate is expected to occur in a batch
+// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
+// candidate representing the number of times the candidate is expected
+// to occur in a batch of sampled candidates.  If unique=true, then this is a
+// probability.
+func UniformCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...UniformCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"key_index": key_index, "value_index": value_index}
+	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "InitializeTableFromTextFileV2",
+		Type: "UniformCandidateSampler",
 		Input: []tf.Input{
-			table_handle, filename,
+			true_classes,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// FixedUnigramCandidateSamplerAttr is an optional argument to FixedUnigramCandidateSampler.
+type FixedUnigramCandidateSamplerAttr func(optionalAttr)
+
+// FixedUnigramCandidateSamplerVocabFile sets the optional vocab_file attribute to value.
+//
+// value: Each valid line in this file (which should have a CSV-like format)
+// corresponds to a valid word ID. IDs are in sequential order, starting from
+// num_reserved_ids. The last entry in each line is expected to be a value
+// corresponding to the count or relative probability. Exactly one of vocab_file
+// and unigrams needs to be passed to this op.
+// If not specified, defaults to ""
+func FixedUnigramCandidateSamplerVocabFile(value string) FixedUnigramCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["vocab_file"] = value
+	}
 }
 
-// Computes atan of x element-wise.
-func Atan(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
+// FixedUnigramCandidateSamplerDistortion sets the optional distortion attribute to value.
+//
+// value: The distortion is used to skew the unigram probability distribution.
+// Each weight is first raised to the distortion's power before adding to the
+// internal unigram distribution. As a result, distortion = 1.0 gives regular
+// unigram sampling (as defined by the vocab file), and distortion = 0.0 gives
+// a uniform distribution.
+// If not specified, defaults to 1
+func FixedUnigramCandidateSamplerDistortion(value float32) FixedUnigramCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["distortion"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "Atan",
-		Input: []tf.Input{
-			x,
-		},
+}
+
+// FixedUnigramCandidateSamplerNumReservedIds sets the optional num_reserved_ids attribute to value.
+//
+// value: Optionally some reserved IDs can be added in the range [0,
+// ..., num_reserved_ids) by the users. One use case is that a special unknown
+// word token is used as ID 0. These IDs will have a sampling probability of 0.
+// If not specified, defaults to 0
+func FixedUnigramCandidateSamplerNumReservedIds(value int64) FixedUnigramCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["num_reserved_ids"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// MfccAttr is an optional argument to Mfcc.
-type MfccAttr func(optionalAttr)
+// FixedUnigramCandidateSamplerNumShards sets the optional num_shards attribute to value.
+//
+// value: A sampler can be used to sample from a subset of the original range
+// in order to speed up the whole computation through parallelism. This parameter
+// (together with 'shard') indicates the number of partitions that are being
+// used in the overall computation.
+// If not specified, defaults to 1
+//
+// REQUIRES: value >= 1
+func FixedUnigramCandidateSamplerNumShards(value int64) FixedUnigramCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["num_shards"] = value
+	}
+}
 
-// MfccUpperFrequencyLimit sets the optional upper_frequency_limit attribute to value.
+// FixedUnigramCandidateSamplerShard sets the optional shard attribute to value.
 //
-// value: The highest frequency to use when calculating the
-// ceptstrum.
-// If not specified, defaults to 4000
-func MfccUpperFrequencyLimit(value float32) MfccAttr {
+// value: A sampler can be used to sample from a subset of the original range
+// in order to speed up the whole computation through parallelism. This parameter
+// (together with 'num_shards') indicates the particular partition number of a
+// sampler op, when partitioning is being used.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func FixedUnigramCandidateSamplerShard(value int64) FixedUnigramCandidateSamplerAttr {
 	return func(m optionalAttr) {
-		m["upper_frequency_limit"] = value
+		m["shard"] = value
 	}
 }
 
-// MfccLowerFrequencyLimit sets the optional lower_frequency_limit attribute to value.
+// FixedUnigramCandidateSamplerUnigrams sets the optional unigrams attribute to value.
 //
-// value: The lowest frequency to use when calculating the
-// ceptstrum.
-// If not specified, defaults to 20
-func MfccLowerFrequencyLimit(value float32) MfccAttr {
+// value: A list of unigram counts or probabilities, one per ID in sequential
+// order. Exactly one of vocab_file and unigrams should be passed to this op.
+// If not specified, defaults to <>
+func FixedUnigramCandidateSamplerUnigrams(value []float32) FixedUnigramCandidateSamplerAttr {
 	return func(m optionalAttr) {
-		m["lower_frequency_limit"] = value
+		m["unigrams"] = value
 	}
 }
 
-// MfccFilterbankChannelCount sets the optional filterbank_channel_count attribute to value.
+// FixedUnigramCandidateSamplerSeed sets the optional seed attribute to value.
 //
-// value: Resolution of the Mel bank used internally.
-// If not specified, defaults to 40
-func MfccFilterbankChannelCount(value int64) MfccAttr {
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func FixedUnigramCandidateSamplerSeed(value int64) FixedUnigramCandidateSamplerAttr {
 	return func(m optionalAttr) {
-		m["filterbank_channel_count"] = value
+		m["seed"] = value
 	}
 }
 
-// MfccDctCoefficientCount sets the optional dct_coefficient_count attribute to value.
+// FixedUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value.
 //
-// value: How many output channels to produce per time slice.
-// If not specified, defaults to 13
-func MfccDctCoefficientCount(value int64) MfccAttr {
+// value: An second seed to avoid seed collision.
+// If not specified, defaults to 0
+func FixedUnigramCandidateSamplerSeed2(value int64) FixedUnigramCandidateSamplerAttr {
 	return func(m optionalAttr) {
-		m["dct_coefficient_count"] = value
+		m["seed2"] = value
 	}
 }
 
-// Transforms a spectrogram into a form that's useful for speech recognition.
+// Generates labels for candidate sampling with a learned unigram distribution.
 //
-// Mel Frequency Cepstral Coefficients are a way of representing audio data that's
-// been effective as an input feature for machine learning. They are created by
-// taking the spectrum of a spectrogram (a 'cepstrum'), and discarding some of the
-// higher frequencies that are less significant to the human ear. They have a long
-// history in the speech recognition world, and https://en.wikipedia.org/wiki/Mel-frequency_cepstrum
-// is a good resource to learn more.
+// A unigram sampler could use a fixed unigram distribution read from a
+// file or passed in as an in-memory array instead of building up the distribution
+// from data on the fly. There is also an option to skew the distribution by
+// applying a distortion power to the weights.
+//
+// The vocabulary file should be in CSV-like format, with the last field
+// being the weight associated with the word.
+//
+// For each batch, this op picks a single set of sampled candidate labels.
+//
+// The advantages of sampling candidates per-batch are simplicity and the
+// possibility of efficient dense matrix multiplication. The disadvantage is that
+// the sampled candidates must be chosen independently of the context and of the
+// true labels.
 //
 // Arguments:
-//	spectrogram: Typically produced by the Spectrogram op, with magnitude_squared
-// set to true.
-//	sample_rate: How many samples per second the source audio used.
-func Mfcc(scope *Scope, spectrogram tf.Output, sample_rate tf.Output, optional ...MfccAttr) (output tf.Output) {
+//	true_classes: A batch_size * num_true matrix, in which each row contains the
+// IDs of the num_true target_classes in the corresponding original label.
+//	num_true: Number of true labels per context.
+//	num_sampled: Number of candidates to randomly sample.
+//	unique: If unique is true, we sample with rejection, so that all sampled
+// candidates in a batch are unique. This requires some approximation to
+// estimate the post-rejection sampling probabilities.
+//	range_max: The sampler will sample integers from the interval [0, range_max).
+//
+// Returns A vector of length num_sampled, in which each element is
+// the ID of a sampled candidate.A batch_size * num_true matrix, representing
+// the number of times each candidate is expected to occur in a batch
+// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
+// candidate representing the number of times the candidate is expected
+// to occur in a batch of sampled candidates.  If unique=true, then this is a
+// probability.
+func FixedUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...FixedUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Mfcc",
+		Type: "FixedUnigramCandidateSampler",
 		Input: []tf.Input{
-			spectrogram, sample_rate,
+			true_classes,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// QuantizedAddAttr is an optional argument to QuantizedAdd.
-type QuantizedAddAttr func(optionalAttr)
-
-// QuantizedAddToutput sets the optional Toutput attribute to value.
-// If not specified, defaults to DT_QINT32
-func QuantizedAddToutput(value tf.DataType) QuantizedAddAttr {
-	return func(m optionalAttr) {
-		m["Toutput"] = value
+// Elementwise computes the bitwise AND of `x` and `y`.
+//
+// The result will have those bits set, that are set in both `x` and `y`. The
+// computation is performed on the underlying representations of `x` and `y`.
+func BitwiseAnd(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BitwiseAnd",
+		Input: []tf.Input{
+			x, y,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Returns x + y element-wise, working on quantized buffers.
-//
-// Arguments:
-//
-//
-//	min_x: The float value that the lowest quantized `x` value represents.
-//	max_x: The float value that the highest quantized `x` value represents.
-//	min_y: The float value that the lowest quantized `y` value represents.
-//	max_y: The float value that the highest quantized `y` value represents.
-//
-// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
+// Elementwise computes the bitwise left-shift of `x` and `y`.
 //
-// *NOTE*: `QuantizedAdd` supports limited forms of broadcasting. More about
-// broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func QuantizedAdd(scope *Scope, x tf.Output, y tf.Output, min_x tf.Output, max_x tf.Output, min_y tf.Output, max_y tf.Output, optional ...QuantizedAddAttr) (z tf.Output, min_z tf.Output, max_z tf.Output) {
+// If `y` is negative, or greater than or equal to the width of `x` in bits the
+// result is implementation defined.
+func LeftShift(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "QuantizedAdd",
+		Type: "LeftShift",
 		Input: []tf.Input{
-			x, y, min_x, max_x, min_y, max_y,
+			x, y,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// Returns an element-wise indication of the sign of a number.
+// Elementwise computes the bitwise right-shift of `x` and `y`.
 //
-// `y = sign(x) = -1` if `x < 0`; 0 if `x == 0`; 1 if `x > 0`.
+// Performs a logical shift for unsigned integer types, and an arithmetic shift
+// for signed integer types.
 //
-// For complex numbers, `y = sign(x) = x / |x|` if `x != 0`, otherwise `y = 0`.
-func Sign(scope *Scope, x tf.Output) (y tf.Output) {
+// If `y` is negative, or greater than or equal to than the width of `x` in bits
+// the result is implementation defined.
+func RightShift(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Sign",
+		Type: "RightShift",
 		Input: []tf.Input{
-			x,
+			x, y,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns element-wise smallest integer in not less than x.
-func Ceil(scope *Scope, x tf.Output) (y tf.Output) {
+// DecodeWavAttr is an optional argument to DecodeWav.
+type DecodeWavAttr func(optionalAttr)
+
+// DecodeWavDesiredChannels sets the optional desired_channels attribute to value.
+//
+// value: Number of sample channels wanted.
+// If not specified, defaults to -1
+func DecodeWavDesiredChannels(value int64) DecodeWavAttr {
+	return func(m optionalAttr) {
+		m["desired_channels"] = value
+	}
+}
+
+// DecodeWavDesiredSamples sets the optional desired_samples attribute to value.
+//
+// value: Length of audio requested.
+// If not specified, defaults to -1
+func DecodeWavDesiredSamples(value int64) DecodeWavAttr {
+	return func(m optionalAttr) {
+		m["desired_samples"] = value
+	}
+}
+
+// Decode a 16-bit PCM WAV file to a float tensor.
+//
+// The -32768 to 32767 signed 16-bit values will be scaled to -1.0 to 1.0 in float.
+//
+// When desired_channels is set, if the input contains fewer channels than this
+// then the last channel will be duplicated to give the requested number, else if
+// the input has more channels than requested then the additional channels will be
+// ignored.
+//
+// If desired_samples is set, then the audio will be cropped or padded with zeroes
+// to the requested length.
+//
+// The first output contains a Tensor with the content of the audio samples. The
+// lowest dimension will be the number of channels, and the second will be the
+// number of samples. For example, a ten-sample-long stereo WAV file should give an
+// output shape of [10, 2].
+//
+// Arguments:
+//	contents: The WAV-encoded audio, usually from a file.
+//
+// Returns 2-D with shape `[length, channels]`.Scalar holding the sample rate found in the WAV header.
+func DecodeWav(scope *Scope, contents tf.Output, optional ...DecodeWavAttr) (audio tf.Output, sample_rate tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Ceil",
+		Type: "DecodeWav",
 		Input: []tf.Input{
-			x,
+			contents,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1)
 }
 
-// Computes exponential of x element-wise.  \\(y = e^x\\).
-func Exp(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Exp",
-		Input: []tf.Input{
-			x,
-		},
+// UniqueAttr is an optional argument to Unique.
+type UniqueAttr func(optionalAttr)
+
+// UniqueOutIdx sets the optional out_idx attribute to value.
+// If not specified, defaults to DT_INT32
+func UniqueOutIdx(value tf.DataType) UniqueAttr {
+	return func(m optionalAttr) {
+		m["out_idx"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Computes the Max along segments of a tensor.
-//
-// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
-// segments.
+// Finds unique elements in a 1-D tensor.
 //
-// This operator is similar to the [unsorted segment sum operator](../../../api_docs/python/math_ops.md#UnsortedSegmentSum).
-// Instead of computing the sum over segments, it computes the maximum
-// such that:
+// This operation returns a tensor `y` containing all of the unique elements of `x`
+// sorted in the same order that they occur in `x`. This operation also returns a
+// tensor `idx` the same size as `x` that contains the index of each value of `x`
+// in the unique output `y`. In other words:
 //
-// \\(output_i = \max_j data_j\\) where max is over `j` such
-// that `segment_ids[j] == i`.
+// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
 //
-// If the maximum is empty for a given segment ID `i`, it outputs the smallest possible value for specific numeric type,
-//  `output[i] = numeric_limits<T>::min()`.
+// For example:
 //
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/UnsortedSegmentMax.png" alt>
-// </div>
+// ```
+// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
+// y, idx = unique(x)
+// y ==> [1, 2, 4, 7, 8]
+// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
+// ```
 //
 // Arguments:
+//	x: 1-D.
 //
-//	segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
-// first dimension.
-//
-//
-// Returns Has same shape as data, except for dimension 0 which
-// has size `num_segments`.
-func UnsortedSegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
+// Returns 1-D.1-D.
+func Unique(scope *Scope, x tf.Output, optional ...UniqueAttr) (y tf.Output, idx tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "UnsortedSegmentMax",
+		Type: "Unique",
 		Input: []tf.Input{
-			data, segment_ids, num_segments,
+			x,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1)
 }
 
-// Returns x + y element-wise.
+// Concatenates a list of `N` tensors along the first dimension.
 //
-// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Add(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// The input tensors are all required to have size 1 in the first dimension.
+//
+// For example:
+//
+// ```
+// # 'x' is [[1, 4]]
+// # 'y' is [[2, 5]]
+// # 'z' is [[3, 6]]
+// parallel_concat([x, y, z]) => [[1, 4], [2, 5], [3, 6]]  # Pack along first dim.
+// ```
+//
+// The difference between concat and parallel_concat is that concat requires all
+// of the inputs be computed before the operation will begin but doesn't require
+// that the input shapes be known during graph construction.  Parallel concat
+// will copy pieces of the input into the output as they become available, in
+// some situations this can provide a performance benefit.
+//
+// Arguments:
+//	values: Tensors to be concatenated. All must have size 1 in the first dimension
+// and same shape.
+//	shape: the final shape of the result; should be equal to the shapes of any input
+// but with the number of input values in the first dimension.
+//
+// Returns The concatenated tensor.
+func ParallelConcat(scope *Scope, values []tf.Output, shape tf.Shape) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"shape": shape}
 	opspec := tf.OpSpec{
-		Type: "Add",
+		Type: "ParallelConcat",
 		Input: []tf.Input{
-			x, y,
+			tf.OutputList(values),
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns x + y element-wise.
+// Concatenates tensors along one dimension.
 //
-// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func AddV2(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// Arguments:
+//	concat_dim: 0-D.  The dimension along which to concatenate.  Must be in the
+// range [0, rank(values)).
+//	values: The `N` Tensors to concatenate. Their ranks and types must match,
+// and their sizes must match in all dimensions except `concat_dim`.
+//
+// Returns A `Tensor` with the concatenation of values stacked along the
+// `concat_dim` dimension.  This tensor's shape matches that of `values` except
+// in `concat_dim` where it has the sum of the sizes.
+func Concat(scope *Scope, concat_dim tf.Output, values []tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "AddV2",
+		Type: "Concat",
 		Input: []tf.Input{
-			x, y,
+			concat_dim, tf.OutputList(values),
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Saves the input tensors to disk.
+// Compute the lower regularized incomplete Gamma function `Q(a, x)`.
 //
-// The size of `tensor_names` must match the number of tensors in `data`. `data[i]`
-// is written to `filename` with name `tensor_names[i]`.
+// The lower regularized incomplete Gamma function is defined as:
 //
-// See also `SaveSlices`.
 //
-// Arguments:
-//	filename: Must have a single element. The name of the file to which we write
-// the tensor.
-//	tensor_names: Shape `[N]`. The names of the tensors to be saved.
-//	data: `N` tensors to save.
+// \\(P(a, x) = gamma(a, x) / Gamma(a) = 1 - Q(a, x)\\)
 //
-// Returns the created operation.
-func Save(scope *Scope, filename tf.Output, tensor_names tf.Output, data []tf.Output) (o *tf.Operation) {
+// where
+//
+// \\(gamma(a, x) = int_{0}^{x} t^{a-1} exp(-t) dt\\)
+//
+// is the lower incomplete Gamma function.
+//
+// Note, above `Q(a, x)` (`Igammac`) is the upper regularized complete
+// Gamma function.
+func Igamma(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Save",
+		Type: "Igamma",
 		Input: []tf.Input{
-			filename, tensor_names, tf.OutputList(data),
+			a, x,
 		},
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// BiasAddAttr is an optional argument to BiasAdd.
-type BiasAddAttr func(optionalAttr)
-
-// BiasAddDataFormat sets the optional data_format attribute to value.
+// Computes offsets of concat inputs within its output.
 //
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the bias tensor will be added to the last dimension
-// of the value tensor.
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// The tensor will be added to "in_channels", the third-to-the-last
-//     dimension.
-// If not specified, defaults to "NHWC"
-func BiasAddDataFormat(value string) BiasAddAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Adds `bias` to `value`.
+// For example:
 //
-// This is a special case of `tf.add` where `bias` is restricted to be 1-D.
-// Broadcasting is supported, so `value` may have any number of dimensions.
+// ```
+// # 'x' is [2, 2, 7]
+// # 'y' is [2, 3, 7]
+// # 'z' is [2, 5, 7]
+// concat_offset(2, [x, y, z]) => [0, 0, 0], [0, 2, 0], [0, 5, 0]
+// ```
+//
+// This is typically used by gradient computations for a concat operation.
 //
 // Arguments:
-//	value: Any number of dimensions.
-//	bias: 1-D with size the last dimension of `value`.
+//	concat_dim: The dimension along which to concatenate.
+//	shape: The `N` int32 vectors representing shape of tensors being concatenated.
 //
-// Returns Broadcasted sum of `value` and `bias`.
-func BiasAdd(scope *Scope, value tf.Output, bias tf.Output, optional ...BiasAddAttr) (output tf.Output) {
+// Returns The `N` int32 vectors representing the starting offset
+// of input tensors within the concatenated output.
+func ConcatOffset(scope *Scope, concat_dim tf.Output, shape []tf.Output) (offset []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "BiasAdd",
+		Type: "ConcatOffset",
 		Input: []tf.Input{
-			value, bias,
+			concat_dim, tf.OutputList(shape),
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// SparseReduceSumSparseAttr is an optional argument to SparseReduceSumSparse.
-type SparseReduceSumSparseAttr func(optionalAttr)
-
-// SparseReduceSumSparseKeepDims sets the optional keep_dims attribute to value.
-//
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func SparseReduceSumSparseKeepDims(value bool) SparseReduceSumSparseAttr {
-	return func(m optionalAttr) {
-		m["keep_dims"] = value
+	if scope.Err() != nil {
+		return
 	}
-}
-
-// Computes the sum of elements across dimensions of a SparseTensor.
-//
-// This Op takes a SparseTensor and is the sparse counterpart to
-// `tf.reduce_sum()`.  In contrast to SparseReduceSum, this Op returns a
-// SparseTensor.
-//
-// Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
-// with length 1.
-//
-// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
-// with a single element is returned.  Additionally, the axes can be negative,
-// which are interpreted according to the indexing rules in Python.
+	var idx int
+	var err error
+	if offset, idx, err = makeOutputList(op, idx, "offset"); err != nil {
+		scope.UpdateErr("ConcatOffset", err)
+		return
+	}
+	return offset
+}
+
+// Splits a tensor into `num_split` tensors along one dimension.
 //
 // Arguments:
-//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, possibly not in canonical ordering.
-//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
-//	input_shape: 1-D.  Shape of the input SparseTensor.
-//	reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
-func SparseReduceSumSparse(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceSumSparseAttr) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) {
+//	axis: 0-D.  The dimension along which to split.  Must be in the range
+// `[-rank(value), rank(value))`.
+//	value: The tensor to split.
+//	num_split: The number of ways to split.  Must evenly divide
+// `value.shape[split_dim]`.
+//
+// Returns They are identically shaped tensors, whose shape matches that of `value`
+// except along `axis`, where their sizes are
+// `values.shape[split_dim] / num_split`.
+func Split(scope *Scope, axis tf.Output, value tf.Output, num_split int64) (output []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"num_split": num_split}
 	opspec := tf.OpSpec{
-		Type: "SparseReduceSumSparse",
+		Type: "Split",
 		Input: []tf.Input{
-			input_indices, input_values, input_shape, reduction_axes,
+			axis, value,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
+		scope.UpdateErr("Split", err)
+		return
+	}
+	return output
 }
 
-// Returns x * y element-wise.
+// Splits a tensor into `num_split` tensors along one dimension.
 //
-// *NOTE*: `Mul` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Mul(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// Arguments:
+//	value: The tensor to split.
+//	size_splits: list containing the sizes of each output tensor along the split
+// dimension. Must sum to the dimension of value along split_dim.
+// Can contain one -1 indicating that dimension is to be inferred.
+//	axis: 0-D.  The dimension along which to split.  Must be in the range
+// `[-rank(value), rank(value))`.
+//
+//
+// Returns Tensors whose shape matches that of `value`
+// except along `axis`, where their sizes are
+// `size_splits[i]`.
+func SplitV(scope *Scope, value tf.Output, size_splits tf.Output, axis tf.Output, num_split int64) (output []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"num_split": num_split}
 	opspec := tf.OpSpec{
-		Type: "Mul",
+		Type: "SplitV",
 		Input: []tf.Input{
-			x, y,
+			value, size_splits, axis,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
+		scope.UpdateErr("SplitV", err)
+		return
+	}
+	return output
 }
 
-// Returns x / y element-wise.
+// Gives a guarantee to the TF runtime that the input tensor is a constant.
 //
-// *NOTE*: `Div` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Div(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// The runtime is then free to make optimizations based on this.
+//
+// Only accepts value typed tensors as inputs and rejects resource variable handles
+// as input.
+//
+// Returns the input tensor without modification.
+func GuaranteeConst(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Div",
+		Type: "GuaranteeConst",
 		Input: []tf.Input{
-			x, y,
+			input,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// ApproximateEqualAttr is an optional argument to ApproximateEqual.
-type ApproximateEqualAttr func(optionalAttr)
-
-// ApproximateEqualTolerance sets the optional tolerance attribute to value.
-// If not specified, defaults to 1e-05
-func ApproximateEqualTolerance(value float32) ApproximateEqualAttr {
-	return func(m optionalAttr) {
-		m["tolerance"] = value
-	}
-}
-
-// Returns the truth value of abs(x-y) < tolerance element-wise.
-func ApproximateEqual(scope *Scope, x tf.Output, y tf.Output, optional ...ApproximateEqualAttr) (z tf.Output) {
+// Returns a tensor of zeros with the same shape and type as x.
+//
+// Arguments:
+//	x: a tensor of type T.
+//
+// Returns a tensor of the same shape and type as x but filled with zeros.
+func ZerosLike(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "ApproximateEqual",
+		Type: "ZerosLike",
 		Input: []tf.Input{
-			x, y,
+			x,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns the max of x and y (i.e. x > y ? x : y) element-wise.
+// Flips all bits elementwise.
 //
-// *NOTE*: `Maximum` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Maximum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// The result will have exactly those bits set, that are not set in `x`. The
+// computation is performed on the underlying representation of x.
+func Invert(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Maximum",
+		Type: "Invert",
 		Input: []tf.Input{
-			x, y,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// LogUniformCandidateSamplerAttr is an optional argument to LogUniformCandidateSampler.
-type LogUniformCandidateSamplerAttr func(optionalAttr)
+// DequantizeAttr is an optional argument to Dequantize.
+type DequantizeAttr func(optionalAttr)
 
-// LogUniformCandidateSamplerSeed sets the optional seed attribute to value.
-//
-// value: If either seed or seed2 are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func LogUniformCandidateSamplerSeed(value int64) LogUniformCandidateSamplerAttr {
+// DequantizeMode sets the optional mode attribute to value.
+// If not specified, defaults to "MIN_COMBINED"
+func DequantizeMode(value string) DequantizeAttr {
 	return func(m optionalAttr) {
-		m["seed"] = value
+		m["mode"] = value
 	}
 }
 
-// LogUniformCandidateSamplerSeed2 sets the optional seed2 attribute to value.
+// Dequantize the 'input' tensor into a float Tensor.
 //
-// value: An second seed to avoid seed collision.
-// If not specified, defaults to 0
-func LogUniformCandidateSamplerSeed2(value int64) LogUniformCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Generates labels for candidate sampling with a log-uniform distribution.
+// [min_range, max_range] are scalar floats that specify the range for
+// the 'input' data. The 'mode' attribute controls exactly which calculations are
+// used to convert the float values to their quantized equivalents.
 //
-// See explanations of candidate sampling and the data formats at
-// go/candidate-sampling.
+// In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
 //
-// For each batch, this op picks a single set of sampled candidate labels.
+// ```
+// if T == qint8, in[i] += (range(T) + 1)/ 2.0
+// out[i] = min_range + (in[i]* (max_range - min_range) / range(T))
+// ```
+// here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
 //
-// The advantages of sampling candidates per-batch are simplicity and the
-// possibility of efficient dense matrix multiplication. The disadvantage is that
-// the sampled candidates must be chosen independently of the context and of the
-// true labels.
+// *MIN_COMBINED Mode Example*
+//
+// If the input comes from a QuantizedRelu6, the output type is
+// quint8 (range of 0-255) but the possible range of QuantizedRelu6 is
+// 0-6.  The min_range and max_range values are therefore 0.0 and 6.0.
+// Dequantize on quint8 will take each value, cast to float, and multiply
+// by 6 / 255.
+// Note that if quantizedtype is qint8, the operation will additionally add
+// each value by 128 prior to casting.
+//
+// If the mode is 'MIN_FIRST', then this approach is used:
+//
+// ```c++
+// num_discrete_values = 1 << (# of bits in T)
+// range_adjust = num_discrete_values / (num_discrete_values - 1)
+// range = (range_max - range_min) * range_adjust
+// range_scale = range / num_discrete_values
+// const double offset_input = static_cast<double>(input) - lowest_quantized;
+// result = range_min + ((input - numeric_limits<T>::min()) * range_scale)
+// ```
+//
+// *SCALED mode Example*
+//
+// `SCALED` mode matches the quantization approach used in
+// `QuantizeAndDequantize{V2|V3}`.
+//
+// If the mode is `SCALED`, we do not use the full range of the output type,
+// choosing to elide the lowest possible value for symmetry (e.g., output range is
+// -127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to
+// 0.
+//
+// We first find the range of values in our tensor. The
+// range we use is always centered on 0, so we find m such that
+// ```c++
+//   m = max(abs(input_min), abs(input_max))
+// ```
+//
+// Our input tensor range is then `[-m, m]`.
+//
+// Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`.
+// If T is signed, this is
+// ```
+//   num_bits = sizeof(T) * 8
+//   [min_fixed, max_fixed] =
+//       [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1]
+// ```
+//
+// Otherwise, if T is unsigned, the fixed-point range is
+// ```
+//   [min_fixed, max_fixed] = [0, (1 << num_bits) - 1]
+// ```
+//
+// From this we compute our scaling factor, s:
+// ```c++
+//   s = (2 * m) / (max_fixed - min_fixed)
+// ```
+//
+// Now we can dequantize the elements of our tensor:
+// ```c++
+// result = input * s
+// ```
 //
 // Arguments:
-//	true_classes: A batch_size * num_true matrix, in which each row contains the
-// IDs of the num_true target_classes in the corresponding original label.
-//	num_true: Number of true labels per context.
-//	num_sampled: Number of candidates to randomly sample.
-//	unique: If unique is true, we sample with rejection, so that all sampled
-// candidates in a batch are unique. This requires some approximation to
-// estimate the post-rejection sampling probabilities.
-//	range_max: The sampler will sample integers from the interval [0, range_max).
 //
-// Returns A vector of length num_sampled, in which each element is
-// the ID of a sampled candidate.A batch_size * num_true matrix, representing
-// the number of times each candidate is expected to occur in a batch
-// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
-// candidate representing the number of times the candidate is expected
-// to occur in a batch of sampled candidates.  If unique=true, then this is a
-// probability.
-func LogUniformCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...LogUniformCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
+//	min_range: The minimum scalar value possibly produced for the input.
+//	max_range: The maximum scalar value possibly produced for the input.
+func Dequantize(scope *Scope, input tf.Output, min_range tf.Output, max_range tf.Output, optional ...DequantizeAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "LogUniformCandidateSampler",
+		Type: "Dequantize",
 		Input: []tf.Input{
-			true_classes,
+			input, min_range, max_range,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// Returns the truth value of (x < y) element-wise.
+// Returns the element-wise max of two SparseTensors.
+//
+// Assumes the two SparseTensors have the same shape, i.e., no broadcasting.
+//
+// Arguments:
+//	a_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, in the canonical lexicographic ordering.
+//	a_values: 1-D.  `N` non-empty values corresponding to `a_indices`.
+//	a_shape: 1-D.  Shape of the input SparseTensor.
+//	b_indices: counterpart to `a_indices` for the other operand.
+//	b_values: counterpart to `a_values` for the other operand; must be of the same dtype.
+//	b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal.
 //
-// *NOTE*: `Less` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Less(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// Returns 2-D.  The indices of the output SparseTensor.1-D.  The values of the output SparseTensor.
+func SparseSparseMaximum(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output) (output_indices tf.Output, output_values tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Less",
+		Type: "SparseSparseMaximum",
 		Input: []tf.Input{
-			x, y,
+			a_indices, a_values, a_shape, b_indices, b_values, b_shape,
 		},
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1)
 }
 
-// FakeQuantWithMinMaxVarsGradientAttr is an optional argument to FakeQuantWithMinMaxVarsGradient.
-type FakeQuantWithMinMaxVarsGradientAttr func(optionalAttr)
-
-// FakeQuantWithMinMaxVarsGradientNumBits sets the optional num_bits attribute to value.
+// Returns a batched matrix tensor with new batched diagonal values.
 //
-// value: The bitwidth of the quantization; between 2 and 8, inclusive.
-// If not specified, defaults to 8
-func FakeQuantWithMinMaxVarsGradientNumBits(value int64) FakeQuantWithMinMaxVarsGradientAttr {
-	return func(m optionalAttr) {
-		m["num_bits"] = value
-	}
-}
-
-// FakeQuantWithMinMaxVarsGradientNarrowRange sets the optional narrow_range attribute to value.
+// Given `input` and `diagonal`, this operation returns a tensor with the
+// same shape and values as `input`, except for the main diagonal of the
+// innermost matrices.  These will be overwritten by the values in `diagonal`.
 //
-// value: Whether to quantize into 2^num_bits - 1 distinct values.
-// If not specified, defaults to false
-func FakeQuantWithMinMaxVarsGradientNarrowRange(value bool) FakeQuantWithMinMaxVarsGradientAttr {
-	return func(m optionalAttr) {
-		m["narrow_range"] = value
-	}
-}
-
-// Compute gradients for a FakeQuantWithMinMaxVars operation.
+// The output is computed as follows:
 //
-// Arguments:
-//	gradients: Backpropagated gradients above the FakeQuantWithMinMaxVars operation.
-//	inputs: Values passed as inputs to the FakeQuantWithMinMaxVars operation.
-// min, max: Quantization interval, scalar floats.
+// Assume `input` has `k+1` dimensions `[I, J, K, ..., M, N]` and `diagonal` has
+// `k` dimensions `[I, J, K, ..., min(M, N)]`.  Then the output is a
+// tensor of rank `k+1` with dimensions `[I, J, K, ..., M, N]` where:
 //
+//   * `output[i, j, k, ..., m, n] = diagonal[i, j, k, ..., n]` for `m == n`.
+//   * `output[i, j, k, ..., m, n] = input[i, j, k, ..., m, n]` for `m != n`.
 //
+// Arguments:
+//	input: Rank `k+1`, where `k >= 1`.
+//	diagonal: Rank `k`, where `k >= 1`.
 //
-// Returns Backpropagated gradients w.r.t. inputs:
-// `gradients * (inputs >= min && inputs <= max)`.Backpropagated gradients w.r.t. min parameter:
-// `sum(gradients * (inputs < min))`.Backpropagated gradients w.r.t. max parameter:
-// `sum(gradients * (inputs > max))`.
-func FakeQuantWithMinMaxVarsGradient(scope *Scope, gradients tf.Output, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsGradientAttr) (backprops_wrt_input tf.Output, backprop_wrt_min tf.Output, backprop_wrt_max tf.Output) {
+// Returns Rank `k+1`, with `output.shape = input.shape`.
+func MatrixSetDiag(scope *Scope, input tf.Output, diagonal tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "FakeQuantWithMinMaxVarsGradient",
+		Type: "MatrixSetDiag",
 		Input: []tf.Input{
-			gradients, inputs, min, max,
+			input, diagonal,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// MaxPoolGradV2Attr is an optional argument to MaxPoolGradV2.
-type MaxPoolGradV2Attr func(optionalAttr)
+// EditDistanceAttr is an optional argument to EditDistance.
+type EditDistanceAttr func(optionalAttr)
 
-// MaxPoolGradV2DataFormat sets the optional data_format attribute to value.
+// EditDistanceNormalize sets the optional normalize attribute to value.
 //
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func MaxPoolGradV2DataFormat(value string) MaxPoolGradV2Attr {
+// value: boolean (if true, edit distances are normalized by length of truth).
+//
+// The output is:
+// If not specified, defaults to true
+func EditDistanceNormalize(value bool) EditDistanceAttr {
 	return func(m optionalAttr) {
-		m["data_format"] = value
+		m["normalize"] = value
 	}
 }
 
-// Computes gradients of the maxpooling function.
+// Computes the (possibly normalized) Levenshtein Edit Distance.
+//
+// The inputs are variable-length sequences provided by SparseTensors
+//   (hypothesis_indices, hypothesis_values, hypothesis_shape)
+// and
+//   (truth_indices, truth_values, truth_shape).
+//
+// The inputs are:
 //
 // Arguments:
-//	orig_input: The original input tensor.
-//	orig_output: The original output tensor.
-//	grad: 4-D.  Gradients w.r.t. the output of `max_pool`.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
-//	padding: The type of padding algorithm to use.
+//	hypothesis_indices: The indices of the hypothesis list SparseTensor.
+// This is an N x R int64 matrix.
+//	hypothesis_values: The values of the hypothesis list SparseTensor.
+// This is an N-length vector.
+//	hypothesis_shape: The shape of the hypothesis list SparseTensor.
+// This is an R-length vector.
+//	truth_indices: The indices of the truth list SparseTensor.
+// This is an M x R int64 matrix.
+//	truth_values: The values of the truth list SparseTensor.
+// This is an M-length vector.
+//	truth_shape: truth indices, vector.
 //
-// Returns Gradients w.r.t. the input to `max_pool`.
-func MaxPoolGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradV2Attr) (output tf.Output) {
+// Returns A dense float tensor with rank R - 1.
+//
+// For the example input:
+//
+//     // hypothesis represents a 2x1 matrix with variable-length values:
+//     //   (0,0) = ["a"]
+//     //   (1,0) = ["b"]
+//     hypothesis_indices = [[0, 0, 0],
+//                           [1, 0, 0]]
+//     hypothesis_values = ["a", "b"]
+//     hypothesis_shape = [2, 1, 1]
+//
+//     // truth represents a 2x2 matrix with variable-length values:
+//     //   (0,0) = []
+//     //   (0,1) = ["a"]
+//     //   (1,0) = ["b", "c"]
+//     //   (1,1) = ["a"]
+//     truth_indices = [[0, 1, 0],
+//                      [1, 0, 0],
+//                      [1, 0, 1],
+//                      [1, 1, 0]]
+//     truth_values = ["a", "b", "c", "a"]
+//     truth_shape = [2, 2, 2]
+//     normalize = true
+//
+// The output will be:
+//
+//     // output is a 2x2 matrix with edit distances normalized by truth lengths.
+//     output = [[inf, 1.0],  // (0,0): no truth, (0,1): no hypothesis
+//               [0.5, 1.0]]  // (1,0): addition, (1,1): no hypothesis
+func EditDistance(scope *Scope, hypothesis_indices tf.Output, hypothesis_values tf.Output, hypothesis_shape tf.Output, truth_indices tf.Output, truth_values tf.Output, truth_shape tf.Output, optional ...EditDistanceAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"padding": padding}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "MaxPoolGradV2",
+		Type: "EditDistance",
 		Input: []tf.Input{
-			orig_input, orig_output, grad, ksize, strides,
+			hypothesis_indices, hypothesis_values, hypothesis_shape, truth_indices, truth_values, truth_shape,
 		},
 		Attrs: attrs,
 	}
@@ -25529,362 +26087,436 @@ func MaxPoolGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, gr
 	return op.Output(0)
 }
 
-// Returns the min of x and y (i.e. x < y ? x : y) element-wise.
+// Gather slices from `params` into a Tensor with shape specified by `indices`.
+//
+// `indices` is an K-dimensional integer tensor, best thought of as a
+// (K-1)-dimensional tensor of indices into `params`, where each element defines a
+// slice of `params`:
+//
+//     output[i_0, ..., i_{K-2}] = params[indices[i0, ..., i_{K-2}]]
+//
+// Whereas in @{tf.gather} `indices` defines slices into the first
+// dimension of `params`, in `tf.gather_nd`, `indices` defines slices into the
+// first `N` dimensions of `params`, where `N = indices.shape[-1]`.
+//
+// The last dimension of `indices` can be at most the rank of
+// `params`:
+//
+//     indices.shape[-1] <= params.rank
+//
+// The last dimension of `indices` corresponds to elements
+// (if `indices.shape[-1] == params.rank`) or slices
+// (if `indices.shape[-1] < params.rank`) along dimension `indices.shape[-1]`
+// of `params`.  The output tensor has shape
+//
+//     indices.shape[:-1] + params.shape[indices.shape[-1]:]
+//
+// Some examples below.
+//
+// Simple indexing into a matrix:
+//
+// ```python
+//     indices = [[0, 0], [1, 1]]
+//     params = [['a', 'b'], ['c', 'd']]
+//     output = ['a', 'd']
+// ```
+//
+// Slice indexing into a matrix:
+//
+// ```python
+//     indices = [[1], [0]]
+//     params = [['a', 'b'], ['c', 'd']]
+//     output = [['c', 'd'], ['a', 'b']]
+// ```
+//
+// Indexing into a 3-tensor:
+//
+// ```python
+//     indices = [[1]]
+//     params = [[['a0', 'b0'], ['c0', 'd0']],
+//               [['a1', 'b1'], ['c1', 'd1']]]
+//     output = [[['a1', 'b1'], ['c1', 'd1']]]
+//
+//
+//     indices = [[0, 1], [1, 0]]
+//     params = [[['a0', 'b0'], ['c0', 'd0']],
+//               [['a1', 'b1'], ['c1', 'd1']]]
+//     output = [['c0', 'd0'], ['a1', 'b1']]
+//
+//
+//     indices = [[0, 0, 1], [1, 0, 1]]
+//     params = [[['a0', 'b0'], ['c0', 'd0']],
+//               [['a1', 'b1'], ['c1', 'd1']]]
+//     output = ['b0', 'b1']
+// ```
+//
+// Batched indexing into a matrix:
+//
+// ```python
+//     indices = [[[0, 0]], [[0, 1]]]
+//     params = [['a', 'b'], ['c', 'd']]
+//     output = [['a'], ['b']]
+// ```
+//
+// Batched slice indexing into a matrix:
+//
+// ```python
+//     indices = [[[1]], [[0]]]
+//     params = [['a', 'b'], ['c', 'd']]
+//     output = [[['c', 'd']], [['a', 'b']]]
+// ```
+//
+// Batched indexing into a 3-tensor:
+//
+// ```python
+//     indices = [[[1]], [[0]]]
+//     params = [[['a0', 'b0'], ['c0', 'd0']],
+//               [['a1', 'b1'], ['c1', 'd1']]]
+//     output = [[[['a1', 'b1'], ['c1', 'd1']]],
+//               [[['a0', 'b0'], ['c0', 'd0']]]]
+//
+//     indices = [[[0, 1], [1, 0]], [[0, 0], [1, 1]]]
+//     params = [[['a0', 'b0'], ['c0', 'd0']],
+//               [['a1', 'b1'], ['c1', 'd1']]]
+//     output = [[['c0', 'd0'], ['a1', 'b1']],
+//               [['a0', 'b0'], ['c1', 'd1']]]
+//
+//
+//     indices = [[[0, 0, 1], [1, 0, 1]], [[0, 1, 1], [1, 1, 0]]]
+//     params = [[['a0', 'b0'], ['c0', 'd0']],
+//               [['a1', 'b1'], ['c1', 'd1']]]
+//     output = [['b0', 'b1'], ['d0', 'c1']]
+// ```
 //
-// *NOTE*: `Minimum` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Minimum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// Arguments:
+//	params: The tensor from which to gather values.
+//	indices: Index tensor.
+//
+// Returns Values from `params` gathered from indices given by `indices`, with
+// shape `indices.shape[:-1] + params.shape[indices.shape[-1]:]`.
+func GatherNd(scope *Scope, params tf.Output, indices tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Minimum",
+		Type: "GatherNd",
 		Input: []tf.Input{
-			x, y,
+			params, indices,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// BiasAddGradAttr is an optional argument to BiasAddGrad.
-type BiasAddGradAttr func(optionalAttr)
-
-// BiasAddGradDataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the bias tensor will be added to the last dimension
-// of the value tensor.
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// The tensor will be added to "in_channels", the third-to-the-last
-//     dimension.
-// If not specified, defaults to "NHWC"
-func BiasAddGradDataFormat(value string) BiasAddGradAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// The backward operation for "BiasAdd" on the "bias" tensor.
-//
-// It accumulates all the values from out_backprop into the feature dimension.
-// For NHWC data format, the feature dimension is the last. For NCHW data format,
-// the feature dimension is the third-to-last.
-//
-// Arguments:
-//	out_backprop: Any number of dimensions.
+// Eagerly executes a python function to compute func(input)->output. The
 //
-// Returns 1-D with size the feature dimension of `out_backprop`.
-func BiasAddGrad(scope *Scope, out_backprop tf.Output, optional ...BiasAddGradAttr) (output tf.Output) {
+// semantics of the input, output, and attributes are the same as those for
+// PyFunc.
+func EagerPyFunc(scope *Scope, input []tf.Output, token string, Tout []tf.DataType) (output []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"token": token, "Tout": Tout}
 	opspec := tf.OpSpec{
-		Type: "BiasAddGrad",
+		Type: "EagerPyFunc",
 		Input: []tf.Input{
-			out_backprop,
+			tf.OutputList(input),
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the power of one value to another.
-//
-// Given a tensor `x` and a tensor `y`, this operation computes \\(x^y\\) for
-// corresponding elements in `x` and `y`. For example:
-//
-// ```
-// # tensor 'x' is [[2, 2]], [3, 3]]
-// # tensor 'y' is [[8, 16], [2, 3]]
-// tf.pow(x, y) ==> [[256, 65536], [9, 27]]
-// ```
-func Pow(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	opspec := tf.OpSpec{
-		Type: "Pow",
-		Input: []tf.Input{
-			x, y,
-		},
+	var idx int
+	var err error
+	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
+		scope.UpdateErr("EagerPyFunc", err)
+		return
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return output
 }
 
-// Compute the upper regularized incomplete Gamma function `Q(a, x)`.
-//
-// The upper regularized incomplete Gamma function is defined as:
-//
-// \\(Q(a, x) = Gamma(a, x) / Gamma(a) = 1 - P(a, x)\\)
+// Stops gradient computation.
 //
-// where
+// When executed in a graph, this op outputs its input tensor as-is.
 //
-// \\(Gamma(a, x) = int_{x}^{\infty} t^{a-1} exp(-t) dt\\)
+// When building ops to compute gradients, this op prevents the contribution of
+// its inputs to be taken into account.  Normally, the gradient generator adds ops
+// to a graph to compute the derivatives of a specified 'loss' by recursively
+// finding out inputs that contributed to its computation.  If you insert this op
+// in the graph it inputs are masked from the gradient generator.  They are not
+// taken into account for computing gradients.
 //
-// is the upper incomplete Gama function.
+// This is useful any time you want to compute a value with TensorFlow but need
+// to pretend that the value was a constant. Some examples include:
 //
-// Note, above `P(a, x)` (`Igamma`) is the lower regularized complete
-// Gamma function.
-func Igammac(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) {
+// *  The *EM* algorithm where the *M-step* should not involve backpropagation
+//    through the output of the *E-step*.
+// *  Contrastive divergence training of Boltzmann machines where, when
+//    differentiating the energy function, the training must not backpropagate
+//    through the graph that generated the samples from the model.
+// *  Adversarial training, where no backprop should happen through the adversarial
+//    example generation process.
+func StopGradient(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Igammac",
+		Type: "StopGradient",
 		Input: []tf.Input{
-			a, x,
+			input,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Compute the lower regularized incomplete Gamma function `Q(a, x)`.
-//
-// The lower regularized incomplete Gamma function is defined as:
-//
-//
-// \\(P(a, x) = gamma(a, x) / Gamma(a) = 1 - Q(a, x)\\)
-//
-// where
-//
-// \\(gamma(a, x) = int_{0}^{x} t^{a-1} exp(-t) dt\\)
-//
-// is the lower incomplete Gamma function.
-//
-// Note, above `Q(a, x)` (`Igammac`) is the upper regularized complete
-// Gamma function.
-func Igamma(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) {
+// Computes asin of x element-wise.
+func Asin(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Igamma",
+		Type: "Asin",
 		Input: []tf.Input{
-			a, x,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes arctangent of `y/x` element-wise, respecting signs of the arguments.
+// PreventGradientAttr is an optional argument to PreventGradient.
+type PreventGradientAttr func(optionalAttr)
+
+// PreventGradientMessage sets the optional message attribute to value.
 //
-// This is the angle \( \theta \in [-\pi, \pi] \) such that
-// \[ x = r \cos(\theta) \]
-// and
-// \[ y = r \sin(\theta) \]
-// where \(r = \sqrt(x^2 + y^2) \).
-func Atan2(scope *Scope, y tf.Output, x tf.Output) (z tf.Output) {
+// value: Will be printed in the error when anyone tries to differentiate
+// this operation.
+// If not specified, defaults to ""
+func PreventGradientMessage(value string) PreventGradientAttr {
+	return func(m optionalAttr) {
+		m["message"] = value
+	}
+}
+
+// An identity op that triggers an error if a gradient is requested.
+//
+// When executed in a graph, this op outputs its input tensor as-is.
+//
+// When building ops to compute gradients, the TensorFlow gradient system
+// will return an error when trying to lookup the gradient of this op,
+// because no gradient must ever be registered for this function.  This
+// op exists to prevent subtle bugs from silently returning unimplemented
+// gradients in some corner cases.
+//
+// Arguments:
+//	input: any tensor.
+//
+// Returns the same input tensor.
+func PreventGradient(scope *Scope, input tf.Output, optional ...PreventGradientAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Atan2",
+		Type: "PreventGradient",
 		Input: []tf.Input{
-			y, x,
+			input,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Compute the regularized incomplete beta integral \\(I_x(a, b)\\).
-//
-// The regularized incomplete beta integral is defined as:
-//
-//
-// \\(I_x(a, b) = \frac{B(x; a, b)}{B(a, b)}\\)
-//
-// where
-//
+// Checks a tensor for NaN and Inf values.
 //
-// \\(B(x; a, b) = \int_0^x t^{a-1} (1 - t)^{b-1} dt\\)
+// When run, reports an `InvalidArgument` error if `tensor` has any values
+// that are not a number (NaN) or infinity (Inf). Otherwise, passes `tensor` as-is.
 //
+// Arguments:
 //
-// is the incomplete beta function and \\(B(a, b)\\) is the *complete*
-// beta function.
-func Betainc(scope *Scope, a tf.Output, b tf.Output, x tf.Output) (z tf.Output) {
+//	message: Prefix of the error message.
+func CheckNumerics(scope *Scope, tensor tf.Output, message string) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"message": message}
 	opspec := tf.OpSpec{
-		Type: "Betainc",
+		Type: "CheckNumerics",
 		Input: []tf.Input{
-			a, b, x,
+			tensor,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns the truth value of x OR y element-wise.
+// Shuffle dimensions of x according to a permutation and conjugate the result.
 //
-// *NOTE*: `LogicalOr` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func LogicalOr(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy:
+//   `y.shape[i] == x.shape[perm[i]] for i in [0, 1, ..., rank(x) - 1]`
+//   `y[i,j,k,...,s,t,u] == conj(x[perm[i], perm[j], perm[k],...,perm[s], perm[t], perm[u]])`
+func ConjugateTranspose(scope *Scope, x tf.Output, perm tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "LogicalOr",
+		Type: "ConjugateTranspose",
 		Input: []tf.Input{
-			x, y,
+			x, perm,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Selects elements from `t` or `e`, depending on `condition`.
-//
-// The `t`, and `e` tensors must all have the same shape, and the
-// output will also have that shape.
-//
-// The `condition` tensor must be a scalar if `t` and `e` are scalars.
-// If `t` and `e` are vectors or higher rank, then `condition` must be either a
-// scalar, a vector with size matching the first dimension of `t`, or must have
-// the same shape as `t`.
+// UniqueV2Attr is an optional argument to UniqueV2.
+type UniqueV2Attr func(optionalAttr)
+
+// UniqueV2OutIdx sets the optional out_idx attribute to value.
+// If not specified, defaults to DT_INT32
+func UniqueV2OutIdx(value tf.DataType) UniqueV2Attr {
+	return func(m optionalAttr) {
+		m["out_idx"] = value
+	}
+}
+
+// Finds unique elements in a 1-D tensor.
 //
-// The `condition` tensor acts as a mask that chooses, based on the value at each
-// element, whether the corresponding element / row in the output should be
-// taken from `t` (if true) or `e` (if false).
+// This operation returns a tensor `y` containing all of the unique elements of `x`
+// sorted in the same order that they occur in `x`. This operation also returns a
+// tensor `idx` the same size as `x` that contains the index of each value of `x`
+// in the unique output `y`. In other words:
 //
-// If `condition` is a vector and `t` and `e` are higher rank matrices, then
-// it chooses which row (outer dimension) to copy from `t` and `e`.
-// If `condition` has the same shape as `t` and `e`, then it chooses which
-// element to copy from `t` and `e`.
+// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
 //
 // For example:
 //
-// ```python
-// # 'condition' tensor is [[True,  False]
-// #                        [False, True]]
-// # 't' is [[1, 2],
-// #         [3, 4]]
-// # 'e' is [[5, 6],
-// #         [7, 8]]
-// select(condition, t, e)  # => [[1, 6], [7, 4]]
-//
-//
-// # 'condition' tensor is [True, False]
-// # 't' is [[1, 2],
-// #         [3, 4]]
-// # 'e' is [[5, 6],
-// #         [7, 8]]
-// select(condition, t, e) ==> [[1, 2],
-//                              [7, 8]]
-//
+// ```
+// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
+// y, idx = unique(x)
+// y ==> [1, 2, 4, 7, 8]
+// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
 // ```
 //
 // Arguments:
+//	x: A `Tensor`.
+//	axis: A `Tensor` of type `int64` (default: 0). The axis of the Tensor to
+// find the unique elements.
 //
-//	t: = A `Tensor` which may have the same shape as `condition`.
-// If `condition` is rank 1, `t` may have higher rank,
-// but its first dimension must match the size of `condition`.
-//	e: = A `Tensor` with the same type and shape as `t`.
-//
-// Returns = A `Tensor` with the same type and shape as `t` and `e`.
-func Select(scope *Scope, condition tf.Output, t tf.Output, e tf.Output) (output tf.Output) {
+// Returns A `Tensor`. Unique elements along the `axis` of `Tensor` x.A 1-D Tensor. Has the same type as x that contains the index of each
+// value of x in the output y.
+func UniqueV2(scope *Scope, x tf.Output, axis tf.Output, optional ...UniqueV2Attr) (y tf.Output, idx tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Select",
+		Type: "UniqueV2",
 		Input: []tf.Input{
-			condition, t, e,
+			x, axis,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1)
 }
 
-// MatMulAttr is an optional argument to MatMul.
-type MatMulAttr func(optionalAttr)
-
-// MatMulTransposeA sets the optional transpose_a attribute to value.
+// Return a slice from 'input'.
 //
-// value: If true, "a" is transposed before multiplication.
-// If not specified, defaults to false
-func MatMulTransposeA(value bool) MatMulAttr {
-	return func(m optionalAttr) {
-		m["transpose_a"] = value
-	}
-}
-
-// MatMulTransposeB sets the optional transpose_b attribute to value.
+// The output tensor is a tensor with dimensions described by 'size'
+// whose values are extracted from 'input' starting at the offsets in
+// 'begin'.
 //
-// value: If true, "b" is transposed before multiplication.
-// If not specified, defaults to false
-func MatMulTransposeB(value bool) MatMulAttr {
-	return func(m optionalAttr) {
-		m["transpose_b"] = value
-	}
-}
-
-// Multiply the matrix "a" by the matrix "b".
+// *Requirements*:
+//   0 <= begin[i] <= begin[i] + size[i] <= Di  for i in [0, n)
 //
-// The inputs must be two-dimensional matrices and the inner dimension of
-// "a" (after being transposed if transpose_a is true) must match the
-// outer dimension of "b" (after being transposed if transposed_b is
-// true).
+// Arguments:
 //
-// *Note*: The default kernel implementation for MatMul on GPUs uses
-// cublas.
-func MatMul(scope *Scope, a tf.Output, b tf.Output, optional ...MatMulAttr) (product tf.Output) {
+//	begin: begin[i] specifies the offset into the 'i'th dimension of
+// 'input' to slice from.
+//	size: size[i] specifies the number of elements of the 'i'th dimension
+// of 'input' to slice. If size[i] is -1, all remaining elements in dimension
+// i are included in the slice (i.e. this is equivalent to setting
+// size[i] = input.dim_size(i) - begin[i]).
+func Slice(scope *Scope, input tf.Output, begin tf.Output, size tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "MatMul",
+		Type: "Slice",
 		Input: []tf.Input{
-			a, b,
+			input, begin, size,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// MeanAttr is an optional argument to Mean.
-type MeanAttr func(optionalAttr)
+// StridedSliceGradAttr is an optional argument to StridedSliceGrad.
+type StridedSliceGradAttr func(optionalAttr)
 
-// MeanKeepDims sets the optional keep_dims attribute to value.
-//
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func MeanKeepDims(value bool) MeanAttr {
+// StridedSliceGradBeginMask sets the optional begin_mask attribute to value.
+// If not specified, defaults to 0
+func StridedSliceGradBeginMask(value int64) StridedSliceGradAttr {
 	return func(m optionalAttr) {
-		m["keep_dims"] = value
+		m["begin_mask"] = value
 	}
 }
 
-// Computes the mean of elements across dimensions of a tensor.
-//
-// Reduces `input` along the dimensions given in `reduction_indices`. Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `reduction_indices`. If `keep_dims` is true, the reduced dimensions are
-// retained with length 1.
+// StridedSliceGradEndMask sets the optional end_mask attribute to value.
+// If not specified, defaults to 0
+func StridedSliceGradEndMask(value int64) StridedSliceGradAttr {
+	return func(m optionalAttr) {
+		m["end_mask"] = value
+	}
+}
+
+// StridedSliceGradEllipsisMask sets the optional ellipsis_mask attribute to value.
+// If not specified, defaults to 0
+func StridedSliceGradEllipsisMask(value int64) StridedSliceGradAttr {
+	return func(m optionalAttr) {
+		m["ellipsis_mask"] = value
+	}
+}
+
+// StridedSliceGradNewAxisMask sets the optional new_axis_mask attribute to value.
+// If not specified, defaults to 0
+func StridedSliceGradNewAxisMask(value int64) StridedSliceGradAttr {
+	return func(m optionalAttr) {
+		m["new_axis_mask"] = value
+	}
+}
+
+// StridedSliceGradShrinkAxisMask sets the optional shrink_axis_mask attribute to value.
+// If not specified, defaults to 0
+func StridedSliceGradShrinkAxisMask(value int64) StridedSliceGradAttr {
+	return func(m optionalAttr) {
+		m["shrink_axis_mask"] = value
+	}
+}
+
+// Returns the gradient of `StridedSlice`.
 //
-// Arguments:
-//	input: The tensor to reduce.
-//	reduction_indices: The dimensions to reduce. Must be in the range
-// `[-rank(input), rank(input))`.
+// Since `StridedSlice` cuts out pieces of its `input` which is size
+// `shape`, its gradient will have the same shape (which is passed here
+// as `shape`). The gradient will be zero in any element that the slice
+// does not select.
 //
-// Returns The reduced tensor.
-func Mean(scope *Scope, input tf.Output, reduction_indices tf.Output, optional ...MeanAttr) (output tf.Output) {
+// Arguments are the same as StridedSliceGrad with the exception that
+// `dy` is the input gradient to be propagated and `shape` is the
+// shape of `StridedSlice`'s `input`.
+func StridedSliceGrad(scope *Scope, shape tf.Output, begin tf.Output, end tf.Output, strides tf.Output, dy tf.Output, optional ...StridedSliceGradAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -25893,9 +26525,9 @@ func Mean(scope *Scope, input tf.Output, reduction_indices tf.Output, optional .
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Mean",
+		Type: "StridedSliceGrad",
 		Input: []tf.Input{
-			input, reduction_indices,
+			shape, begin, end, strides, dy,
 		},
 		Attrs: attrs,
 	}
@@ -25903,46 +26535,60 @@ func Mean(scope *Scope, input tf.Output, reduction_indices tf.Output, optional .
 	return op.Output(0)
 }
 
-// Returns which elements of x are finite.
+// Returns the gradient of `Tile`.
 //
-// @compatibility(numpy)
-// Equivalent to np.isfinite
-// @end_compatibility
-func IsFinite(scope *Scope, x tf.Output) (y tf.Output) {
+// DEPRECATED at GraphDef version 3: TileGrad has been replaced with reduce_sum
+//
+// Since `Tile` takes an input and repeats the input `multiples` times
+// along each dimension, `TileGrad` takes in `multiples` and aggregates
+// each repeated tile of `input` into `output`.
+func TileGrad(scope *Scope, input tf.Output, multiples tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "IsFinite",
+		Type: "TileGrad",
 		Input: []tf.Input{
-			x,
+			input, multiples,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// ArgMaxAttr is an optional argument to ArgMax.
-type ArgMaxAttr func(optionalAttr)
+// DataFormatDimMapAttr is an optional argument to DataFormatDimMap.
+type DataFormatDimMapAttr func(optionalAttr)
 
-// ArgMaxOutputType sets the optional output_type attribute to value.
-// If not specified, defaults to DT_INT64
-func ArgMaxOutputType(value tf.DataType) ArgMaxAttr {
+// DataFormatDimMapSrcFormat sets the optional src_format attribute to value.
+//
+// value: source data format.
+// If not specified, defaults to "NHWC"
+func DataFormatDimMapSrcFormat(value string) DataFormatDimMapAttr {
 	return func(m optionalAttr) {
-		m["output_type"] = value
+		m["src_format"] = value
 	}
 }
 
-// Returns the index with the largest value across dimensions of a tensor.
+// DataFormatDimMapDstFormat sets the optional dst_format attribute to value.
 //
-// Note that in case of ties the identity of the return value is not guaranteed.
+// value: destination data format.
+// If not specified, defaults to "NCHW"
+func DataFormatDimMapDstFormat(value string) DataFormatDimMapAttr {
+	return func(m optionalAttr) {
+		m["dst_format"] = value
+	}
+}
+
+// Returns the dimension index in the destination data format given the one in
+//
+// the source data format.
 //
 // Arguments:
+//	x: A Tensor with each element as a dimension index in source data format.
+// Must be in the range [-4, 4).
 //
-//	dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`.
-// Describes which dimension of the input Tensor to reduce across. For vectors,
-// use dimension = 0.
-func ArgMax(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMaxAttr) (output tf.Output) {
+// Returns A Tensor with each element as a dimension index in destination data format.
+func DataFormatDimMap(scope *Scope, x tf.Output, optional ...DataFormatDimMapAttr) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -25951,9 +26597,9 @@ func ArgMax(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgM
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ArgMax",
+		Type: "DataFormatDimMap",
 		Input: []tf.Input{
-			input, dimension,
+			x,
 		},
 		Attrs: attrs,
 	}
@@ -25961,121 +26607,89 @@ func ArgMax(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgM
 	return op.Output(0)
 }
 
-// Computes the sum along segments of a tensor.
-//
-// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
-// segments.
-//
-// Computes a tensor such that
-// \\(output_i = \sum_j data_j\\) where sum is over `j` such
-// that `segment_ids[j] == i`.
-//
-// If the sum is empty for a given segment ID `i`, `output[i] = 0`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentSum.png" alt>
-// </div>
-//
-// Arguments:
-//
-//	segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
-// first dimension.  Values should be sorted and can be repeated.
+// Return the shape of s0 op s1 with broadcast.
 //
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
+// Given `s0` and `s1`, tensors that represent shapes, compute `r0`, the
+// broadcasted shape. `s0`, `s1` and `r0` are all integer vectors.
+func BroadcastArgs(scope *Scope, s0 tf.Output, s1 tf.Output) (r0 tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "SegmentSum",
+		Type: "BroadcastArgs",
 		Input: []tf.Input{
-			data, segment_ids,
+			s0, s1,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// ImageSummaryAttr is an optional argument to ImageSummary.
-type ImageSummaryAttr func(optionalAttr)
-
-// ImageSummaryMaxImages sets the optional max_images attribute to value.
-//
-// value: Max number of batch elements to generate images for.
-// If not specified, defaults to 3
+// Return the reduction indices for computing gradients of s0 op s1 with broadcast.
 //
-// REQUIRES: value >= 1
-func ImageSummaryMaxImages(value int64) ImageSummaryAttr {
-	return func(m optionalAttr) {
-		m["max_images"] = value
+// This is typically used by gradient computations for a broadcasting operation.
+func BroadcastGradientArgs(scope *Scope, s0 tf.Output, s1 tf.Output) (r0 tf.Output, r1 tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
-}
-
-// ImageSummaryBadColor sets the optional bad_color attribute to value.
-//
-// value: Color to use for pixels with non-finite values.
-// If not specified, defaults to <dtype:DT_UINT8 tensor_shape:<dim:<size:4 > > int_val:255 int_val:0 int_val:0 int_val:255 >
-func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr {
-	return func(m optionalAttr) {
-		m["bad_color"] = value
+	opspec := tf.OpSpec{
+		Type: "BroadcastGradientArgs",
+		Input: []tf.Input{
+			s0, s1,
+		},
 	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
 }
 
-// Outputs a `Summary` protocol buffer with images.
-//
-// The summary has up to `max_images` summary values containing images. The
-// images are built from `tensor` which must be 4-D with shape `[batch_size,
-// height, width, channels]` and where `channels` can be:
-//
-// *  1: `tensor` is interpreted as Grayscale.
-// *  3: `tensor` is interpreted as RGB.
-// *  4: `tensor` is interpreted as RGBA.
-//
-// The images have the same number of channels as the input tensor. For float
-// input, the values are normalized one image at a time to fit in the range
-// `[0, 255]`.  `uint8` values are unchanged.  The op uses two different
-// normalization algorithms:
+// Pads a tensor with mirrored values.
 //
-// *  If the input values are all positive, they are rescaled so the largest one
-//    is 255.
+// This operation pads a `input` with mirrored values according to the `paddings`
+// you specify. `paddings` is an integer tensor with shape `[n, 2]`, where n is
+// the rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates
+// how many values to add before the contents of `input` in that dimension, and
+// `paddings[D, 1]` indicates how many values to add after the contents of `input`
+// in that dimension. Both `paddings[D, 0]` and `paddings[D, 1]` must be no greater
+// than `input.dim_size(D)` (or `input.dim_size(D) - 1`) if `copy_border` is true
+// (if false, respectively).
 //
-// *  If any input value is negative, the values are shifted so input value 0.0
-//    is at 127.  They are then rescaled so that either the smallest value is 0,
-//    or the largest one is 255.
+// The padded size of each dimension D of the output is:
 //
-// The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
-// build the `tag` of the summary values:
+// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)`
 //
-// *  If `max_images` is 1, the summary value tag is '*tag*/image'.
-// *  If `max_images` is greater than 1, the summary value tags are
-//    generated sequentially as '*tag*/image/0', '*tag*/image/1', etc.
+// For example:
 //
-// The `bad_color` argument is the color to use in the generated images for
-// non-finite input values.  It is a `unit8` 1-D tensor of length `channels`.
-// Each element must be in the range `[0, 255]` (It represents the value of a
-// pixel in the output image).  Non-finite values in the input tensor are
-// replaced by this tensor in the output image.  The default value is the color
-// red.
+// ```
+// # 't' is [[1, 2, 3], [4, 5, 6]].
+// # 'paddings' is [[1, 1]], [2, 2]].
+// # 'mode' is SYMMETRIC.
+// # rank of 't' is 2.
+// pad(t, paddings) ==> [[2, 1, 1, 2, 3, 3, 2]
+//                       [2, 1, 1, 2, 3, 3, 2]
+//                       [5, 4, 4, 5, 6, 6, 5]
+//                       [5, 4, 4, 5, 6, 6, 5]]
+// ```
 //
 // Arguments:
-//	tag: Scalar. Used to build the `tag` attribute of the summary values.
-//	tensor: 4-D of shape `[batch_size, height, width, channels]` where
-// `channels` is 1, 3, or 4.
+//	input: The input tensor to be padded.
+//	paddings: A two-column matrix specifying the padding sizes. The number of
+// rows must be the same as the rank of `input`.
+//	mode: Either `REFLECT` or `SYMMETRIC`. In reflect mode the padded regions
+// do not include the borders, while in symmetric mode the padded regions
+// do include the borders. For example, if `input` is `[1, 2, 3]` and `paddings`
+// is `[0, 2]`, then the output is `[1, 2, 3, 2, 1]` in reflect mode, and
+// it is `[1, 2, 3, 3, 2]` in symmetric mode.
 //
-// Returns Scalar. Serialized `Summary` protocol buffer.
-func ImageSummary(scope *Scope, tag tf.Output, tensor tf.Output, optional ...ImageSummaryAttr) (summary tf.Output) {
+// Returns The padded tensor.
+func MirrorPad(scope *Scope, input tf.Output, paddings tf.Output, mode string) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"mode": mode}
 	opspec := tf.OpSpec{
-		Type: "ImageSummary",
+		Type: "MirrorPad",
 		Input: []tf.Input{
-			tag, tensor,
+			input, paddings,
 		},
 		Attrs: attrs,
 	}
@@ -26083,318 +26697,374 @@ func ImageSummary(scope *Scope, tag tf.Output, tensor tf.Output, optional ...Ima
 	return op.Output(0)
 }
 
-// Bucketizes 'input' based on 'boundaries'.
+// A placeholder op for a value that will be fed into the computation.
 //
-// For example, if the inputs are
-//     boundaries = [0, 10, 100]
-//     input = [[-5, 10000]
-//              [150,   10]
-//              [5,    100]]
+// DEPRECATED at GraphDef version 23: Placeholder now behaves the same as PlaceholderV2.
 //
-// then the output will be
-//     output = [[0, 3]
-//               [3, 2]
-//               [1, 3]]
+// N.B. This operation will fail with an error if it is executed. It is
+// intended as a way to represent a value that will always be fed, and to
+// provide attrs that enable the fed value to be checked at runtime.
 //
 // Arguments:
-//	input: Any shape of Tensor contains with int or float type.
-//	boundaries: A sorted list of floats gives the boundary of the buckets.
-//
-// Returns Same shape with 'input', each value of input replaced with bucket index.
+//	dtype: The type of elements in the tensor.
+//	shape: The shape of the tensor. The shape can be any partially-specified
+// shape.  To be unconstrained, pass in a shape with unknown rank.
 //
-// @compatibility(numpy)
-// Equivalent to np.digitize.
-// @end_compatibility
-func Bucketize(scope *Scope, input tf.Output, boundaries []float32) (output tf.Output) {
+// Returns A placeholder tensor that must be replaced using the feed mechanism.
+func PlaceholderV2(scope *Scope, dtype tf.DataType, shape tf.Shape) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"boundaries": boundaries}
+	attrs := map[string]interface{}{"dtype": dtype, "shape": shape}
 	opspec := tf.OpSpec{
-		Type: "Bucketize",
-		Input: []tf.Input{
-			input,
-		},
+		Type: "PlaceholderV2",
+
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Reshapes a SparseTensor to represent values in a new dense shape.
-//
-// This operation has the same semantics as reshape on the represented dense
-// tensor.  The `input_indices` are recomputed based on the requested `new_shape`.
-//
-// If one component of `new_shape` is the special value -1, the size of that
-// dimension is computed so that the total dense size remains constant.  At
-// most one component of `new_shape` can be -1.  The number of dense elements
-// implied by `new_shape` must be the same as the number of dense elements
-// originally implied by `input_shape`.
+// ResourceApplyAdadeltaAttr is an optional argument to ResourceApplyAdadelta.
+type ResourceApplyAdadeltaAttr func(optionalAttr)
+
+// ResourceApplyAdadeltaUseLocking sets the optional use_locking attribute to value.
 //
-// Reshaping does not affect the order of values in the SparseTensor.
+// value: If True, updating of the var, accum and update_accum tensors will be protected by
+// a lock; otherwise the behavior is undefined, but may exhibit less contention.
+// If not specified, defaults to false
+func ResourceApplyAdadeltaUseLocking(value bool) ResourceApplyAdadeltaAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the adadelta scheme.
 //
-// If the input tensor has rank `R_in` and `N` non-empty values, and `new_shape`
-// has length `R_out`, then `input_indices` has shape `[N, R_in]`,
-// `input_shape` has length `R_in`, `output_indices` has shape `[N, R_out]`, and
-// `output_shape` has length `R_out`.
+// accum = rho() * accum + (1 - rho()) * grad.square();
+// update = (update_accum + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad;
+// update_accum = rho() * update_accum + (1 - rho()) * update.square();
+// var -= update;
 //
 // Arguments:
-//	input_indices: 2-D.  `N x R_in` matrix with the indices of non-empty values in a
-// SparseTensor.
-//	input_shape: 1-D.  `R_in` vector with the input SparseTensor's dense shape.
-//	new_shape: 1-D.  `R_out` vector with the requested new dense shape.
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	accum_update: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	rho: Decay factor. Must be a scalar.
+//	epsilon: Constant factor. Must be a scalar.
+//	grad: The gradient.
 //
-// Returns 2-D.  `N x R_out` matrix with the updated indices of non-empty
-// values in the output SparseTensor.1-D.  `R_out` vector with the full dense shape of the output
-// SparseTensor.  This is the same as `new_shape` but with any -1 dimensions
-// filled in.
-func SparseReshape(scope *Scope, input_indices tf.Output, input_shape tf.Output, new_shape tf.Output) (output_indices tf.Output, output_shape tf.Output) {
+// Returns the created operation.
+func ResourceApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output, accum_update tf.Output, lr tf.Output, rho tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdadeltaAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "SparseReshape",
+		Type: "ResourceApplyAdadelta",
 		Input: []tf.Input{
-			input_indices, input_shape, new_shape,
+			var_, accum, accum_update, lr, rho, epsilon, grad,
 		},
+		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return scope.AddOperation(opspec)
 }
 
-// Computes the product along segments of a tensor.
+// SqueezeAttr is an optional argument to Squeeze.
+type SqueezeAttr func(optionalAttr)
+
+// SqueezeAxis sets the optional axis attribute to value.
 //
-// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
-// segments.
+// value: If specified, only squeezes the dimensions listed. The dimension
+// index starts at 0. It is an error to squeeze a dimension that is not 1. Must
+// be in the range `[-rank(input), rank(input))`.
+// If not specified, defaults to <>
 //
-// Computes a tensor such that
-// \\(output_i = \prod_j data_j\\) where the product is over `j` such
-// that `segment_ids[j] == i`.
+// REQUIRES: len(value) >= 0
+func SqueezeAxis(value []int64) SqueezeAttr {
+	return func(m optionalAttr) {
+		m["squeeze_dims"] = value
+	}
+}
+
+// Removes dimensions of size 1 from the shape of a tensor.
 //
-// If the product is empty for a given segment ID `i`, `output[i] = 1`.
+// Given a tensor `input`, this operation returns a tensor of the same type with
+// all dimensions of size 1 removed. If you don't want to remove all size 1
+// dimensions, you can remove specific size 1 dimensions by specifying
+// `axis`.
 //
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentProd.png" alt>
-// </div>
+// For example:
 //
-// Arguments:
+// ```
+// # 't' is a tensor of shape [1, 2, 1, 3, 1, 1]
+// shape(squeeze(t)) ==> [2, 3]
+// ```
 //
-//	segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s
-// first dimension.  Values should be sorted and can be repeated.
+// Or, to remove specific size 1 dimensions:
 //
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SegmentProd(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
+// ```
+// # 't' is a tensor of shape [1, 2, 1, 3, 1, 1]
+// shape(squeeze(t, [2, 4])) ==> [1, 2, 3, 1]
+// ```
+//
+// Arguments:
+//	input: The `input` to squeeze.
+//
+// Returns Contains the same data as `input`, but has one or more dimensions of
+// size 1 removed.
+func Squeeze(scope *Scope, input tf.Output, optional ...SqueezeAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "SegmentProd",
+		Type: "Squeeze",
 		Input: []tf.Input{
-			data, segment_ids,
+			input,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes the sum along segments of a tensor.
+// SpaceToBatch for N-D tensors of type T.
 //
-// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
-// segments.
+// This operation divides "spatial" dimensions `[1, ..., M]` of the input into a
+// grid of blocks of shape `block_shape`, and interleaves these blocks with the
+// "batch" dimension (0) such that in the output, the spatial dimensions
+// `[1, ..., M]` correspond to the position within the grid, and the batch
+// dimension combines both the position within a spatial block and the original
+// batch position.  Prior to division into blocks, the spatial dimensions of the
+// input are optionally zero padded according to `paddings`.  See below for a
+// precise description.
+//
+// Arguments:
+//	input: N-D with shape `input_shape = [batch] + spatial_shape + remaining_shape`,
+// where spatial_shape has `M` dimensions.
+//	block_shape: 1-D with shape `[M]`, all values must be >= 1.
+//	paddings: 2-D with shape `[M, 2]`, all values must be >= 0.
+//   `paddings[i] = [pad_start, pad_end]` specifies the padding for input dimension
+//   `i + 1`, which corresponds to spatial dimension `i`.  It is required that
+//   `block_shape[i]` divides `input_shape[i + 1] + pad_start + pad_end`.
+//
+// This operation is equivalent to the following steps:
+//
+// 1. Zero-pad the start and end of dimensions `[1, ..., M]` of the
+//    input according to `paddings` to produce `padded` of shape `padded_shape`.
+//
+// 2. Reshape `padded` to `reshaped_padded` of shape:
+//
+//      [batch] +
+//      [padded_shape[1] / block_shape[0],
+//        block_shape[0],
+//       ...,
+//       padded_shape[M] / block_shape[M-1],
+//       block_shape[M-1]] +
+//      remaining_shape
+//
+// 3. Permute dimensions of `reshaped_padded` to produce
+//    `permuted_reshaped_padded` of shape:
+//
+//      block_shape +
+//      [batch] +
+//      [padded_shape[1] / block_shape[0],
+//       ...,
+//       padded_shape[M] / block_shape[M-1]] +
+//      remaining_shape
 //
-// Computes a tensor such that
-// `(output[i] = sum_{j...} data[j...]` where the sum is over tuples `j...` such
-// that `segment_ids[j...] == i`.  Unlike `SegmentSum`, `segment_ids`
-// need not be sorted and need not cover all values in the full
-// range of valid values.
+// 4. Reshape `permuted_reshaped_padded` to flatten `block_shape` into the batch
+//    dimension, producing an output tensor of shape:
 //
-// If the sum is empty for a given segment ID `i`, `output[i] = 0`.
+//      [batch * prod(block_shape)] +
+//      [padded_shape[1] / block_shape[0],
+//       ...,
+//       padded_shape[M] / block_shape[M-1]] +
+//      remaining_shape
 //
-// `num_segments` should equal the number of distinct segment IDs.
+// Some examples:
 //
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/UnsortedSegmentSum.png" alt>
-// </div>
+// (1) For the following input of shape `[1, 2, 2, 1]`, `block_shape = [2, 2]`, and
+//     `paddings = [[0, 0], [0, 0]]`:
 //
-// Arguments:
+// ```
+// x = [[[[1], [2]], [[3], [4]]]]
+// ```
 //
-//	segment_ids: A tensor whose shape is a prefix of `data.shape`.
+// The output tensor has shape `[4, 1, 1, 1]` and value:
 //
+// ```
+// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
+// ```
 //
-// Returns Has same shape as data, except for the first `segment_ids.rank`
-// dimensions, which are replaced with a single dimension which has size
-// `num_segments`.
-func UnsortedSegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "UnsortedSegmentSum",
-		Input: []tf.Input{
-			data, segment_ids, num_segments,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes hyperbolic sine of x element-wise.
-func Sinh(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Sinh",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the sum along sparse segments of a tensor.
+// (2) For the following input of shape `[1, 2, 2, 3]`, `block_shape = [2, 2]`, and
+//     `paddings = [[0, 0], [0, 0]]`:
 //
-// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
-// segments.
+// ```
+// x = [[[[1, 2, 3], [4, 5, 6]],
+//       [[7, 8, 9], [10, 11, 12]]]]
+// ```
 //
-// Like `SegmentSum`, but `segment_ids` can have rank less than `data`'s first
-// dimension, selecting a subset of dimension 0, specified by `indices`.
+// The output tensor has shape `[4, 1, 1, 3]` and value:
 //
-// For example:
+// ```
+// [[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]
+// ```
 //
-// ```python
-// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
+// (3) For the following input of shape `[1, 4, 4, 1]`, `block_shape = [2, 2]`, and
+//     `paddings = [[0, 0], [0, 0]]`:
 //
-// # Select two rows, one segment.
-// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 0]))
-// # => [[0 0 0 0]]
+// ```
+// x = [[[[1],   [2],  [3],  [4]],
+//       [[5],   [6],  [7],  [8]],
+//       [[9],  [10], [11],  [12]],
+//       [[13], [14], [15],  [16]]]]
+// ```
 //
-// # Select two rows, two segment.
-// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 1]))
-// # => [[ 1  2  3  4]
-// #     [-1 -2 -3 -4]]
+// The output tensor has shape `[4, 2, 2, 1]` and value:
 //
-// # Select all rows, two segments.
-// tf.sparse_segment_sum(c, tf.constant([0, 1, 2]), tf.constant([0, 0, 1]))
-// # => [[0 0 0 0]
-// #     [5 6 7 8]]
+// ```
+// x = [[[[1], [3]], [[9], [11]]],
+//      [[[2], [4]], [[10], [12]]],
+//      [[[5], [7]], [[13], [15]]],
+//      [[[6], [8]], [[14], [16]]]]
+// ```
 //
-// # Which is equivalent to:
-// tf.segment_sum(c, tf.constant([0, 0, 1]))
+// (4) For the following input of shape `[2, 2, 4, 1]`, block_shape = `[2, 2]`, and
+//     paddings = `[[0, 0], [2, 0]]`:
+//
+// ```
+// x = [[[[1],   [2],  [3],  [4]],
+//       [[5],   [6],  [7],  [8]]],
+//      [[[9],  [10], [11],  [12]],
+//       [[13], [14], [15],  [16]]]]
 // ```
 //
-// Arguments:
+// The output tensor has shape `[8, 1, 3, 1]` and value:
 //
-//	indices: A 1-D tensor. Has same rank as `segment_ids`.
-//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+// ```
+// x = [[[[0], [1], [3]]], [[[0], [9], [11]]],
+//      [[[0], [2], [4]]], [[[0], [10], [12]]],
+//      [[[0], [5], [7]]], [[[0], [13], [15]]],
+//      [[[0], [6], [8]]], [[[0], [14], [16]]]]
+// ```
 //
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SparseSegmentSum(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) {
+// Among others, this operation is useful for reducing atrous convolution into
+// regular convolution.
+func SpaceToBatchND(scope *Scope, input tf.Output, block_shape tf.Output, paddings tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "SparseSegmentSum",
+		Type: "SpaceToBatchND",
 		Input: []tf.Input{
-			data, indices, segment_ids,
+			input, block_shape, paddings,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Counts the number of occurrences of each value in an integer array.
-//
-// Outputs a vector with length `size` and the same dtype as `weights`. If
-// `weights` are empty, then index `i` stores the number of times the value `i` is
-// counted in `arr`. If `weights` are non-empty, then index `i` stores the sum of
-// the value in `weights` at each index where the corresponding value in `arr` is
-// `i`.
-//
-// Values in `arr` outside of the range [0, size) are ignored.
-//
-// Arguments:
-//	arr: int32 `Tensor`.
-//	size: non-negative int32 scalar `Tensor`.
-//	weights: is an int32, int64, float32, or float64 `Tensor` with the same
-// shape as `arr`, or a length-0 `Tensor`, in which case it acts as all weights
-// equal to 1.
+// QuantizeAndDequantizeV2Attr is an optional argument to QuantizeAndDequantizeV2.
+type QuantizeAndDequantizeV2Attr func(optionalAttr)
+
+// QuantizeAndDequantizeV2SignedInput sets the optional signed_input attribute to value.
 //
-// Returns 1D `Tensor` with length equal to `size`. The counts or summed weights for
-// each value in the range [0, size).
-func Bincount(scope *Scope, arr tf.Output, size tf.Output, weights tf.Output) (bins tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Bincount",
-		Input: []tf.Input{
-			arr, size, weights,
-		},
+// value: If the quantization is signed or unsigned.
+// If not specified, defaults to true
+func QuantizeAndDequantizeV2SignedInput(value bool) QuantizeAndDequantizeV2Attr {
+	return func(m optionalAttr) {
+		m["signed_input"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Pop the element at the top of the stack.
-//
-// Arguments:
-//	handle: The handle to a stack.
-//	elem_type: The type of the elem that is popped.
+// QuantizeAndDequantizeV2NumBits sets the optional num_bits attribute to value.
 //
-// Returns The tensor that is popped from the top of the stack.
-func StackPopV2(scope *Scope, handle tf.Output, elem_type tf.DataType) (elem tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"elem_type": elem_type}
-	opspec := tf.OpSpec{
-		Type: "StackPopV2",
-		Input: []tf.Input{
-			handle,
-		},
-		Attrs: attrs,
+// value: The bitwidth of the quantization.
+// If not specified, defaults to 8
+func QuantizeAndDequantizeV2NumBits(value int64) QuantizeAndDequantizeV2Attr {
+	return func(m optionalAttr) {
+		m["num_bits"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// WholeFileReaderV2Attr is an optional argument to WholeFileReaderV2.
-type WholeFileReaderV2Attr func(optionalAttr)
-
-// WholeFileReaderV2Container sets the optional container attribute to value.
+// QuantizeAndDequantizeV2RangeGiven sets the optional range_given attribute to value.
 //
-// value: If non-empty, this reader is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func WholeFileReaderV2Container(value string) WholeFileReaderV2Attr {
+// value: If the range is given or should be computed from the tensor.
+// If not specified, defaults to false
+func QuantizeAndDequantizeV2RangeGiven(value bool) QuantizeAndDequantizeV2Attr {
 	return func(m optionalAttr) {
-		m["container"] = value
+		m["range_given"] = value
 	}
 }
 
-// WholeFileReaderV2SharedName sets the optional shared_name attribute to value.
+// Quantizes then dequantizes a tensor.
+//
+// This op simulates the precision loss from the quantized forward pass by:
+// 1. Quantizing the tensor to fixed point numbers, which should match the target
+//    quantization method when it is used in inference.
+// 2. Dequantizing it back to floating point numbers for the following ops, most
+//    likely matmul.
+//
+// There are different ways to quantize. This version does not use the full range
+// of the output type, choosing to elide the lowest possible value for symmetry
+// (e.g., output range is -127 to 127, not -128 to 127 for signed 8 bit
+// quantization), so that 0.0 maps to 0.
+//
+// To perform this op, we first find the range of values in our tensor. The range
+// we use is always centered on 0, so we find m such that
+//
+// 1. m = max(abs(input_min), abs(input_max)) if range_given is true,
+// 2. m = max(abs(min_elem(input)), abs(max_elem(input))) otherwise.
+//
+// Our input tensor range is then [-m, m].
+//
+// Next, we choose our fixed-point quantization buckets, [min_fixed, max_fixed].
+// If signed_input is true, this is
+//
+//   [min_fixed, max_fixed ] =
+//       [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1].
+//
+// Otherwise, if signed_input is false, the fixed-point range is
+//
+//   [min_fixed, max_fixed] = [0, (1 << num_bits) - 1].
+//
+// From this we compute our scaling factor, s:
+//
+//   s = (max_fixed - min_fixed) / (2 * m).
+//
+// Now we can quantize and dequantize the elements of our tensor.  An element e
+// is transformed into e':
+//
+//   e' = (e * s).round_to_nearest() / s.
+//
+// Note that we have a different number of buckets in the signed vs. unsigned
+// cases.  For example, if num_bits == 8, we get 254 buckets in the signed case
+// vs. 255 in the unsigned case.
 //
-// value: If non-empty, this reader is named in the given bucket
-// with this shared_name. Otherwise, the node name is used instead.
-// If not specified, defaults to ""
-func WholeFileReaderV2SharedName(value string) WholeFileReaderV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// A Reader that outputs the entire contents of a file as a value.
+// For example, suppose num_bits = 8 and m = 1.  Then
 //
-// To use, enqueue filenames in a Queue.  The output of ReaderRead will
-// be a filename (key) and the contents of that file (value).
+//   [min_fixed, max_fixed] = [-127, 127], and
+//   s = (127 + 127) / 2 = 127.
 //
-// Returns The handle to reference the Reader.
-func WholeFileReaderV2(scope *Scope, optional ...WholeFileReaderV2Attr) (reader_handle tf.Output) {
+// Given the vector {-1, -0.5, 0, 0.3}, this is quantized to
+// {-127, -63, 0, 38}, and dequantized to {-1, -63.0/127, 0, 38.0/127}.
+//
+// Arguments:
+//	input: Tensor to quantize and then dequantize.
+//	input_min: If range_given, this is the min of the range, otherwise this input
+// will be ignored.
+//	input_max: If range_given, this is the max of the range, otherwise this input
+// will be ignored.
+func QuantizeAndDequantizeV2(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, optional ...QuantizeAndDequantizeV2Attr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -26403,168 +27073,123 @@ func WholeFileReaderV2(scope *Scope, optional ...WholeFileReaderV2Attr) (reader_
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "WholeFileReaderV2",
-
+		Type: "QuantizeAndDequantizeV2",
+		Input: []tf.Input{
+			input, input_min, input_max,
+		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes the mean along sparse segments of a tensor.
+// SpaceToBatch for 4-D tensors of type T.
 //
-// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of
-// segments.
+// This is a legacy version of the more general SpaceToBatchND.
 //
-// Like `SegmentMean`, but `segment_ids` can have rank less than `data`'s first
-// dimension, selecting a subset of dimension 0, specified by `indices`.
+// Zero-pads and then rearranges (permutes) blocks of spatial data into batch.
+// More specifically, this op outputs a copy of the input tensor where values from
+// the `height` and `width` dimensions are moved to the `batch` dimension. After
+// the zero-padding, both `height` and `width` of the input must be divisible by the
+// block size.
 //
 // Arguments:
+//	input: 4-D with shape `[batch, height, width, depth]`.
+//	paddings: 2-D tensor of non-negative integers with shape `[2, 2]`. It specifies
+//   the padding of the input with zeros across the spatial dimensions as follows:
 //
-//	indices: A 1-D tensor. Has same rank as `segment_ids`.
-//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+//       paddings = [[pad_top, pad_bottom], [pad_left, pad_right]]
 //
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SparseSegmentMean(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSegmentMean",
-		Input: []tf.Input{
-			data, indices, segment_ids,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Reshapes a quantized tensor as per the Reshape op.
+//   The effective spatial dimensions of the zero-padded input tensor will be:
+//
+//       height_pad = pad_top + height + pad_bottom
+//       width_pad = pad_left + width + pad_right
+//
+// The attr `block_size` must be greater than one. It indicates the block size.
+//
+//   * Non-overlapping blocks of size `block_size x block size` in the height and
+//     width dimensions are rearranged into the batch dimension at each location.
+//   * The batch of the output tensor is `batch * block_size * block_size`.
+//   * Both height_pad and width_pad must be divisible by block_size.
+//
+// The shape of the output will be:
+//
+//     [batch*block_size*block_size, height_pad/block_size, width_pad/block_size,
+//      depth]
+//
+// Some examples:
+//
+// (1) For the following input of shape `[1, 2, 2, 1]` and block_size of 2:
 //
 // ```
+// x = [[[[1], [2]], [[3], [4]]]]
+// ```
 //
-// Arguments:
+// The output tensor has shape `[4, 1, 1, 1]` and value:
 //
-//	shape: Defines the shape of the output tensor.
-//	input_min: The minimum value of the input.
-//	input_max: The maximum value of the input.
+// ```
+// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
+// ```
 //
-// Returns This value is copied from input_min.This value is copied from input_max.
-func QuantizedReshape(scope *Scope, tensor tf.Output, shape tf.Output, input_min tf.Output, input_max tf.Output) (output tf.Output, output_min tf.Output, output_max tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizedReshape",
-		Input: []tf.Input{
-			tensor, shape, input_min, input_max,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Computes gradients for SparseSegmentSqrtN.
+// (2) For the following input of shape `[1, 2, 2, 3]` and block_size of 2:
 //
-// Returns tensor "output" with same shape as grad, except for dimension 0 whose
-// value is output_dim0.
+// ```
+// x = [[[[1, 2, 3], [4, 5, 6]],
+//       [[7, 8, 9], [10, 11, 12]]]]
+// ```
 //
-// Arguments:
-//	grad: gradient propagated to the SparseSegmentSqrtN op.
-//	indices: indices passed to the corresponding SparseSegmentSqrtN op.
-//	segment_ids: segment_ids passed to the corresponding SparseSegmentSqrtN op.
-//	output_dim0: dimension 0 of "data" passed to SparseSegmentSqrtN op.
-func SparseSegmentSqrtNGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSegmentSqrtNGrad",
-		Input: []tf.Input{
-			grad, indices, segment_ids, output_dim0,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a sequence of numbers.
+// The output tensor has shape `[4, 1, 1, 3]` and value:
 //
-// This operation creates a sequence of numbers that begins at `start` and
-// extends by increments of `delta` up to but not including `limit`.
+// ```
+// [[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]
+// ```
 //
-// For example:
+// (3) For the following input of shape `[1, 4, 4, 1]` and block_size of 2:
 //
 // ```
-// # 'start' is 3
-// # 'limit' is 18
-// # 'delta' is 3
-// tf.range(start, limit, delta) ==> [3, 6, 9, 12, 15]
+// x = [[[[1],   [2],  [3],  [4]],
+//       [[5],   [6],  [7],  [8]],
+//       [[9],  [10], [11],  [12]],
+//       [[13], [14], [15],  [16]]]]
 // ```
 //
-// Arguments:
-//	start: 0-D (scalar). First entry in the sequence.
-//	limit: 0-D (scalar). Upper limit of sequence, exclusive.
-//	delta: 0-D (scalar). Optional. Default is 1. Number that increments `start`.
+// The output tensor has shape `[4, 2, 2, 1]` and value:
 //
-// Returns 1-D.
-func Range(scope *Scope, start tf.Output, limit tf.Output, delta tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Range",
-		Input: []tf.Input{
-			start, limit, delta,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// AngleAttr is an optional argument to Angle.
-type AngleAttr func(optionalAttr)
-
-// AngleTout sets the optional Tout attribute to value.
-// If not specified, defaults to DT_FLOAT
-func AngleTout(value tf.DataType) AngleAttr {
-	return func(m optionalAttr) {
-		m["Tout"] = value
-	}
-}
-
-// Returns the argument of a complex number.
+// ```
+// x = [[[[1], [3]], [[9], [11]]],
+//      [[[2], [4]], [[10], [12]]],
+//      [[[5], [7]], [[13], [15]]],
+//      [[[6], [8]], [[14], [16]]]]
+// ```
 //
-// Given a tensor `input` of complex numbers, this operation returns a tensor of
-// type `float` that is the argument of each element in `input`. All elements in
-// `input` must be complex numbers of the form \\(a + bj\\), where *a*
-// is the real part and *b* is the imaginary part.
+// (4) For the following input of shape `[2, 2, 4, 1]` and block_size of 2:
 //
-// The argument returned by this operation is of the form \\(atan2(b, a)\\).
+// ```
+// x = [[[[1],   [2],  [3],  [4]],
+//       [[5],   [6],  [7],  [8]]],
+//      [[[9],  [10], [11],  [12]],
+//       [[13], [14], [15],  [16]]]]
+// ```
 //
-// For example:
+// The output tensor has shape `[8, 1, 2, 1]` and value:
 //
 // ```
-// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
-// tf.angle(input) ==> [2.0132, 1.056]
+// x = [[[[1], [3]]], [[[9], [11]]], [[[2], [4]]], [[[10], [12]]],
+//      [[[5], [7]]], [[[13], [15]]], [[[6], [8]]], [[[14], [16]]]]
 // ```
 //
-// @compatibility(numpy)
-// Equivalent to np.angle.
-// @end_compatibility
-func Angle(scope *Scope, input tf.Output, optional ...AngleAttr) (output tf.Output) {
+// Among others, this operation is useful for reducing atrous convolution into
+// regular convolution.
+//
+func SpaceToBatch(scope *Scope, input tf.Output, paddings tf.Output, block_size int64) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"block_size": block_size}
 	opspec := tf.OpSpec{
-		Type: "Angle",
+		Type: "SpaceToBatch",
 		Input: []tf.Input{
-			input,
+			input, paddings,
 		},
 		Attrs: attrs,
 	}
@@ -26572,114 +27197,87 @@ func Angle(scope *Scope, input tf.Output, optional ...AngleAttr) (output tf.Outp
 	return op.Output(0)
 }
 
-// ResourceSparseApplyMomentumAttr is an optional argument to ResourceSparseApplyMomentum.
-type ResourceSparseApplyMomentumAttr func(optionalAttr)
-
-// ResourceSparseApplyMomentumUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceSparseApplyMomentumUseLocking(value bool) ResourceSparseApplyMomentumAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
+// UnpackAttr is an optional argument to Unpack.
+type UnpackAttr func(optionalAttr)
 
-// ResourceSparseApplyMomentumUseNesterov sets the optional use_nesterov attribute to value.
+// UnpackAxis sets the optional axis attribute to value.
 //
-// value: If `True`, the tensor passed to compute grad will be
-// var - lr * momentum * accum, so in the end, the var you get is actually
-// var - lr * momentum * accum.
-// If not specified, defaults to false
-func ResourceSparseApplyMomentumUseNesterov(value bool) ResourceSparseApplyMomentumAttr {
+// value: Dimension along which to unpack.  Negative values wrap around, so the
+// valid range is `[-R, R)`.
+// If not specified, defaults to 0
+func UnpackAxis(value int64) UnpackAttr {
 	return func(m optionalAttr) {
-		m["use_nesterov"] = value
+		m["axis"] = value
 	}
 }
 
-// Update relevant entries in '*var' and '*accum' according to the momentum scheme.
+// Unpacks a given dimension of a rank-`R` tensor into `num` rank-`(R-1)` tensors.
 //
-// Set use_nesterov = True if you want to use Nesterov momentum.
+// Unpacks `num` tensors from `value` by chipping it along the `axis` dimension.
+// For example, given a tensor of shape `(A, B, C, D)`;
 //
-// That is for rows we have grad for, we update var and accum as follows:
+// If `axis == 0` then the i'th tensor in `output` is the slice `value[i, :, :, :]`
+//   and each tensor in `output` will have shape `(B, C, D)`. (Note that the
+//   dimension unpacked along is gone, unlike `split`).
 //
-// accum = accum * momentum + grad
-// var -= lr * accum
+// If `axis == 1` then the i'th tensor in `output` is the slice `value[:, i, :, :]`
+//   and each tensor in `output` will have shape `(A, C, D)`.
+// Etc.
+//
+// This is the opposite of `pack`.
 //
 // Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	lr: Learning rate. Must be a scalar.
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
-//	momentum: Momentum. Must be a scalar.
+//	value: 1-D or higher, with `axis` dimension size equal to `num`.
 //
-// Returns the created operation.
-func ResourceSparseApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, momentum tf.Output, optional ...ResourceSparseApplyMomentumAttr) (o *tf.Operation) {
+//
+// Returns The list of tensors unpacked from `value`.
+func Unpack(scope *Scope, value tf.Output, num int64, optional ...UnpackAttr) (output []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"num": num}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyMomentum",
+		Type: "Unpack",
 		Input: []tf.Input{
-			var_, accum, lr, grad, indices, momentum,
+			value,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
-}
-
-// Returns the complex conjugate of a complex number.
-//
-// Given a tensor `input` of complex numbers, this operation returns a tensor of
-// complex numbers that are the complex conjugate of each element in `input`. The
-// complex numbers in `input` must be of the form \\(a + bj\\), where *a* is the
-// real part and *b* is the imaginary part.
-//
-// The complex conjugate returned by this operation is of the form \\(a - bj\\).
-//
-// For example:
-//
-// ```
-// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
-// tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j]
-// ```
-func Conj(scope *Scope, input tf.Output) (output tf.Output) {
+	op := scope.AddOperation(opspec)
 	if scope.Err() != nil {
 		return
 	}
-	opspec := tf.OpSpec{
-		Type: "Conj",
-		Input: []tf.Input{
-			input,
-		},
+	var idx int
+	var err error
+	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
+		scope.UpdateErr("Unpack", err)
+		return
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return output
 }
 
-// A placeholder op that passes through `input` when its output is not fed.
+// Increments variable pointed to by 'resource' until it reaches 'limit'.
 //
 // Arguments:
-//	input: The default value to produce when `output` is not fed.
-//	shape: The (possibly partial) shape of the tensor.
+//	resource: Should be from a scalar `Variable` node.
+//	limit: If incrementing ref would bring it above limit, instead generates an
+// 'OutOfRange' error.
 //
-// Returns A placeholder tensor that defaults to `input` if it is not fed.
-func PlaceholderWithDefault(scope *Scope, input tf.Output, shape tf.Shape) (output tf.Output) {
+//
+// Returns A copy of the input before increment. If nothing else modifies the
+// input, the values produced will all be distinct.
+func ResourceCountUpTo(scope *Scope, resource tf.Output, limit int64, T tf.DataType) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"shape": shape}
+	attrs := map[string]interface{}{"limit": limit, "T": T}
 	opspec := tf.OpSpec{
-		Type: "PlaceholderWithDefault",
+		Type: "ResourceCountUpTo",
 		Input: []tf.Input{
-			input,
+			resource,
 		},
 		Attrs: attrs,
 	}
@@ -26687,532 +27285,714 @@ func PlaceholderWithDefault(scope *Scope, input tf.Output, shape tf.Shape) (outp
 	return op.Output(0)
 }
 
-// Deprecated. Use TensorArrayReadV3
-func TensorArrayReadV2(scope *Scope, handle tf.Output, index tf.Output, flow_in tf.Output, dtype tf.DataType) (value tf.Output) {
+// Delete the stack from its resource container.
+//
+// Arguments:
+//	handle: The handle to a stack.
+//
+// Returns the created operation.
+func StackCloseV2(scope *Scope, handle tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype}
 	opspec := tf.OpSpec{
-		Type: "TensorArrayReadV2",
+		Type: "StackCloseV2",
 		Input: []tf.Input{
-			handle, index, flow_in,
+			handle,
 		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// QuantizedMatMulAttr is an optional argument to QuantizedMatMul.
-type QuantizedMatMulAttr func(optionalAttr)
-
-// QuantizedMatMulToutput sets the optional Toutput attribute to value.
-// If not specified, defaults to DT_QINT32
-func QuantizedMatMulToutput(value tf.DataType) QuantizedMatMulAttr {
-	return func(m optionalAttr) {
-		m["Toutput"] = value
 	}
+	return scope.AddOperation(opspec)
 }
 
-// QuantizedMatMulTransposeA sets the optional transpose_a attribute to value.
+// BatchToSpace for N-D tensors of type T.
 //
-// value: If true, `a` is transposed before multiplication.
-// If not specified, defaults to false
-func QuantizedMatMulTransposeA(value bool) QuantizedMatMulAttr {
-	return func(m optionalAttr) {
-		m["transpose_a"] = value
-	}
-}
-
-// QuantizedMatMulTransposeB sets the optional transpose_b attribute to value.
+// This operation reshapes the "batch" dimension 0 into `M + 1` dimensions of shape
+// `block_shape + [batch]`, interleaves these blocks back into the grid defined by
+// the spatial dimensions `[1, ..., M]`, to obtain a result with the same rank as
+// the input.  The spatial dimensions of this intermediate result are then
+// optionally cropped according to `crops` to produce the output.  This is the
+// reverse of SpaceToBatch.  See below for a precise description.
 //
-// value: If true, `b` is transposed before multiplication.
-// If not specified, defaults to false
-func QuantizedMatMulTransposeB(value bool) QuantizedMatMulAttr {
-	return func(m optionalAttr) {
-		m["transpose_b"] = value
-	}
-}
-
-// QuantizedMatMulTactivation sets the optional Tactivation attribute to value.
+// Arguments:
+//	input: N-D with shape `input_shape = [batch] + spatial_shape + remaining_shape`,
+// where spatial_shape has M dimensions.
+//	block_shape: 1-D with shape `[M]`, all values must be >= 1.
+//	crops: 2-D with shape `[M, 2]`, all values must be >= 0.
+//   `crops[i] = [crop_start, crop_end]` specifies the amount to crop from input
+//   dimension `i + 1`, which corresponds to spatial dimension `i`.  It is
+//   required that
+//   `crop_start[i] + crop_end[i] <= block_shape[i] * input_shape[i + 1]`.
 //
-// value: The type of output produced by activation function
-// following this operation.
-// If not specified, defaults to DT_QUINT8
-func QuantizedMatMulTactivation(value tf.DataType) QuantizedMatMulAttr {
-	return func(m optionalAttr) {
-		m["Tactivation"] = value
-	}
-}
-
-// Perform a quantized matrix multiplication of  `a` by the matrix `b`.
+// This operation is equivalent to the following steps:
 //
-// The inputs must be two-dimensional matrices and the inner dimension of
-// `a` (after being transposed if `transpose_a` is non-zero) must match the
-// outer dimension of `b` (after being transposed if `transposed_b` is
-// non-zero).
+// 1. Reshape `input` to `reshaped` of shape:
+//      [block_shape[0], ..., block_shape[M-1],
+//       batch / prod(block_shape),
+//       input_shape[1], ..., input_shape[N-1]]
+//
+// 2. Permute dimensions of `reshaped` to produce `permuted` of shape
+//      [batch / prod(block_shape),
+//
+//       input_shape[1], block_shape[0],
+//       ...,
+//       input_shape[M], block_shape[M-1],
+//
+//       input_shape[M+1], ..., input_shape[N-1]]
+//
+// 3. Reshape `permuted` to produce `reshaped_permuted` of shape
+//      [batch / prod(block_shape),
+//
+//       input_shape[1] * block_shape[0],
+//       ...,
+//       input_shape[M] * block_shape[M-1],
+//
+//       input_shape[M+1],
+//       ...,
+//       input_shape[N-1]]
+//
+// 4. Crop the start and end of dimensions `[1, ..., M]` of
+//    `reshaped_permuted` according to `crops` to produce the output of shape:
+//      [batch / prod(block_shape),
+//
+//       input_shape[1] * block_shape[0] - crops[0,0] - crops[0,1],
+//       ...,
+//       input_shape[M] * block_shape[M-1] - crops[M-1,0] - crops[M-1,1],
+//
+//       input_shape[M+1], ..., input_shape[N-1]]
+//
+// Some examples:
+//
+// (1) For the following input of shape `[4, 1, 1, 1]`, `block_shape = [2, 2]`, and
+//     `crops = [[0, 0], [0, 0]]`:
+//
+// ```
+// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
+// ```
+//
+// The output tensor has shape `[1, 2, 2, 1]` and value:
+//
+// ```
+// x = [[[[1], [2]], [[3], [4]]]]
+// ```
+//
+// (2) For the following input of shape `[4, 1, 1, 3]`, `block_shape = [2, 2]`, and
+//     `crops = [[0, 0], [0, 0]]`:
+//
+// ```
+// [[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]
+// ```
+//
+// The output tensor has shape `[1, 2, 2, 3]` and value:
+//
+// ```
+// x = [[[[1, 2, 3], [4, 5, 6]],
+//       [[7, 8, 9], [10, 11, 12]]]]
+// ```
+//
+// (3) For the following input of shape `[4, 2, 2, 1]`, `block_shape = [2, 2]`, and
+//     `crops = [[0, 0], [0, 0]]`:
+//
+// ```
+// x = [[[[1], [3]], [[9], [11]]],
+//      [[[2], [4]], [[10], [12]]],
+//      [[[5], [7]], [[13], [15]]],
+//      [[[6], [8]], [[14], [16]]]]
+// ```
+//
+// The output tensor has shape `[1, 4, 4, 1]` and value:
+//
+// ```
+// x = [[[1],   [2],  [3],  [4]],
+//      [[5],   [6],  [7],  [8]],
+//      [[9],  [10], [11],  [12]],
+//      [[13], [14], [15],  [16]]]
+// ```
+//
+// (4) For the following input of shape `[8, 1, 3, 1]`, `block_shape = [2, 2]`, and
+//     `crops = [[0, 0], [2, 0]]`:
+//
+// ```
+// x = [[[[0], [1], [3]]], [[[0], [9], [11]]],
+//      [[[0], [2], [4]]], [[[0], [10], [12]]],
+//      [[[0], [5], [7]]], [[[0], [13], [15]]],
+//      [[[0], [6], [8]]], [[[0], [14], [16]]]]
+// ```
 //
-// Arguments:
-//	a: Must be a two-dimensional tensor.
-//	b: Must be a two-dimensional tensor.
-//	min_a: The float value that the lowest quantized `a` value represents.
-//	max_a: The float value that the highest quantized `a` value represents.
-//	min_b: The float value that the lowest quantized `b` value represents.
-//	max_b: The float value that the highest quantized `b` value represents.
+// The output tensor has shape `[2, 2, 4, 1]` and value:
 //
-// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
-func QuantizedMatMul(scope *Scope, a tf.Output, b tf.Output, min_a tf.Output, max_a tf.Output, min_b tf.Output, max_b tf.Output, optional ...QuantizedMatMulAttr) (out tf.Output, min_out tf.Output, max_out tf.Output) {
+// ```
+// x = [[[[1],   [2],  [3],  [4]],
+//       [[5],   [6],  [7],  [8]]],
+//      [[[9],  [10], [11],  [12]],
+//       [[13], [14], [15],  [16]]]]
+// ```
+func BatchToSpaceND(scope *Scope, input tf.Output, block_shape tf.Output, crops tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "QuantizedMatMul",
+		Type: "BatchToSpaceND",
 		Input: []tf.Input{
-			a, b, min_a, max_a, min_b, max_b,
+			input, block_shape, crops,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// QuantizedMulAttr is an optional argument to QuantizedMul.
-type QuantizedMulAttr func(optionalAttr)
-
-// QuantizedMulToutput sets the optional Toutput attribute to value.
-// If not specified, defaults to DT_QINT32
-func QuantizedMulToutput(value tf.DataType) QuantizedMulAttr {
-	return func(m optionalAttr) {
-		m["Toutput"] = value
-	}
+	return op.Output(0)
 }
 
-// Returns x * y element-wise, working on quantized buffers.
+// Extract `patches` from `images` and put them in the "depth" output dimension.
 //
 // Arguments:
+//	images: 4-D Tensor with shape `[batch, in_rows, in_cols, depth]`.
+//	ksizes: The size of the sliding window for each dimension of `images`.
+//	strides: 1-D of length 4. How far the centers of two consecutive patches are in
+// the images. Must be: `[1, stride_rows, stride_cols, 1]`.
+//	rates: 1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the
+// input stride, specifying how far two consecutive patch samples are in the
+// input. Equivalent to extracting patches with
+// `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by
+// subsampling them spatially by a factor of `rates`. This is equivalent to
+// `rate` in dilated (a.k.a. Atrous) convolutions.
+//	padding: The type of padding algorithm to use.
 //
+// We specify the size-related attributes as:
 //
-//	min_x: The float value that the lowest quantized `x` value represents.
-//	max_x: The float value that the highest quantized `x` value represents.
-//	min_y: The float value that the lowest quantized `y` value represents.
-//	max_y: The float value that the highest quantized `y` value represents.
-//
-// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
+// ```python
+//       ksizes = [1, ksize_rows, ksize_cols, 1]
+//       strides = [1, strides_rows, strides_cols, 1]
+//       rates = [1, rates_rows, rates_cols, 1]
+// ```
 //
-// *NOTE*: `QuantizedMul` supports limited forms of broadcasting. More about
-// broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func QuantizedMul(scope *Scope, x tf.Output, y tf.Output, min_x tf.Output, max_x tf.Output, min_y tf.Output, max_y tf.Output, optional ...QuantizedMulAttr) (z tf.Output, min_z tf.Output, max_z tf.Output) {
+// Returns 4-D Tensor with shape `[batch, out_rows, out_cols, ksize_rows *
+// ksize_cols * depth]` containing image patches with size
+// `ksize_rows x ksize_cols x depth` vectorized in the "depth" dimension. Note
+// `out_rows` and `out_cols` are the dimensions of the output patches.
+func ExtractImagePatches(scope *Scope, images tf.Output, ksizes []int64, strides []int64, rates []int64, padding string) (patches tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"ksizes": ksizes, "strides": strides, "rates": rates, "padding": padding}
 	opspec := tf.OpSpec{
-		Type: "QuantizedMul",
+		Type: "ExtractImagePatches",
 		Input: []tf.Input{
-			x, y, min_x, max_x, min_y, max_y,
+			images,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// Forwards the input to the output.
+// Bitcasts a tensor from one type to another without copying data.
 //
-// This operator represents the loop termination condition used by the
-// "pivot" switches of a loop.
+// Given a tensor `input`, this operation returns a tensor that has the same buffer
+// data as `input` with datatype `type`.
 //
-// Arguments:
-//	input: A boolean scalar, representing the branch predicate of the Switch op.
+// If the input datatype `T` is larger than the output datatype `type` then the
+// shape changes from [...] to [..., sizeof(`T`)/sizeof(`type`)].
 //
-// Returns The same tensor as `input`.
-func LoopCond(scope *Scope, input tf.Output) (output tf.Output) {
+// If `T` is smaller than `type`, the operator requires that the rightmost
+// dimension be equal to sizeof(`type`)/sizeof(`T`). The shape then goes from
+// [..., sizeof(`type`)/sizeof(`T`)] to [...].
+//
+// *NOTE*: Bitcast is implemented as a low-level cast, so machines with different
+// endian orderings will give different results.
+func Bitcast(scope *Scope, input tf.Output, type_ tf.DataType) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"type": type_}
 	opspec := tf.OpSpec{
-		Type: "LoopCond",
+		Type: "Bitcast",
 		Input: []tf.Input{
 			input,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns (x - y)(x - y) element-wise.
+// OneHotAttr is an optional argument to OneHot.
+type OneHotAttr func(optionalAttr)
+
+// OneHotAxis sets the optional axis attribute to value.
 //
-// *NOTE*: `SquaredDifference` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func SquaredDifference(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SquaredDifference",
-		Input: []tf.Input{
-			x, y,
-		},
+// value: The axis to fill (default: -1, a new inner-most axis).
+// If not specified, defaults to -1
+func OneHotAxis(value int64) OneHotAttr {
+	return func(m optionalAttr) {
+		m["axis"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Convert the quantized 'input' tensor into a lower-precision 'output', using the
+// Returns a one-hot tensor.
 //
-// actual distribution of the values to maximize the usage of the lower bit depth
-// and adjusting the output min and max ranges accordingly.
+// The locations represented by indices in `indices` take value `on_value`,
+// while all other locations take value `off_value`.
 //
-// [input_min, input_max] are scalar floats that specify the range for the float
-// interpretation of the 'input' data. For example, if input_min is -1.0f and
-// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
-// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
+// If the input `indices` is rank `N`, the output will have rank `N+1`,
+// The new axis is created at dimension `axis` (default: the new axis is
+// appended at the end).
 //
-// This operator tries to squeeze as much precision as possible into an output with
-// a lower bit depth by calculating the actual min and max values found in the
-// data. For example, maybe that quint16 input has no values lower than 16,384 and
-// none higher than 49,152. That means only half the range is actually needed, all
-// the float interpretations are between -0.5f and 0.5f, so if we want to compress
-// the data into a quint8 output, we can use that range rather than the theoretical
-// -1.0f to 1.0f that is suggested by the input min and max.
+// If `indices` is a scalar the output shape will be a vector of length `depth`.
 //
-// In practice, this is most useful for taking output from operations like
-// QuantizedMatMul that can produce higher bit-depth outputs than their inputs and
-// may have large potential output ranges, but in practice have a distribution of
-// input values that only uses a small fraction of the possible range. By feeding
-// that output into this operator, we can reduce it from 32 bits down to 8 with
-// minimal loss of accuracy.
+// If `indices` is a vector of length `features`, the output shape will be:
+// ```
+//   features x depth if axis == -1
+//   depth x features if axis == 0
+// ```
 //
-// Arguments:
+// If `indices` is a matrix (batch) with shape `[batch, features]`,
+// the output shape will be:
+// ```
+//   batch x features x depth if axis == -1
+//   batch x depth x features if axis == 1
+//   depth x batch x features if axis == 0
+// ```
 //
-//	input_min: The float value that the minimum quantized input value represents.
-//	input_max: The float value that the maximum quantized input value represents.
-//	out_type: The type of the output. Should be a lower bit depth than Tinput.
 //
-// Returns The float value that the minimum quantized output value represents.The float value that the maximum quantized output value represents.
-func QuantizeDownAndShrinkRange(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) {
+// Examples
+// =========
+//
+// Suppose that
+//
+// ```
+//   indices = [0, 2, -1, 1]
+//   depth = 3
+//   on_value = 5.0
+//   off_value = 0.0
+//   axis = -1
+// ```
+//
+// Then output is `[4 x 3]`:
+//
+//     ```output =
+//       [5.0 0.0 0.0]  // one_hot(0)
+//       [0.0 0.0 5.0]  // one_hot(2)
+//       [0.0 0.0 0.0]  // one_hot(-1)
+//       [0.0 5.0 0.0]  // one_hot(1)
+//     ```
+//
+// Suppose that
+//
+// ```
+//   indices = [0, 2, -1, 1]
+//   depth = 3
+//   on_value = 0.0
+//   off_value = 3.0
+//   axis = 0
+// ```
+//
+// Then output is `[3 x 4]`:
+//
+//     ```output =
+//       [0.0 3.0 3.0 3.0]
+//       [3.0 3.0 3.0 0.0]
+//       [3.0 3.0 3.0 3.0]
+//       [3.0 0.0 3.0 3.0]
+//     //  ^                one_hot(0)
+//     //      ^            one_hot(2)
+//     //          ^        one_hot(-1)
+//     //              ^    one_hot(1)
+//     ```
+// Suppose that
+//
+// ```
+//   indices = [[0, 2], [1, -1]]
+//   depth = 3
+//   on_value = 1.0
+//   off_value = 0.0
+//   axis = -1
+// ```
+//
+// Then output is `[2 x 2 x 3]`:
+//
+//     ```output =
+//       [
+//         [1.0, 0.0, 0.0]  // one_hot(0)
+//         [0.0, 0.0, 1.0]  // one_hot(2)
+//       ][
+//         [0.0, 1.0, 0.0]  // one_hot(1)
+//         [0.0, 0.0, 0.0]  // one_hot(-1)
+//       ]```
+//
+// Arguments:
+//	indices: A tensor of indices.
+//	depth: A scalar defining the depth of the one hot dimension.
+//	on_value: A scalar defining the value to fill in output when `indices[j] = i`.
+//	off_value: A scalar defining the value to fill in output when `indices[j] != i`.
+//
+// Returns The one-hot tensor.
+func OneHot(scope *Scope, indices tf.Output, depth tf.Output, on_value tf.Output, off_value tf.Output, optional ...OneHotAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"out_type": out_type}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "QuantizeDownAndShrinkRange",
+		Type: "OneHot",
 		Input: []tf.Input{
-			input, input_min, input_max,
+			indices, depth, on_value, off_value,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// Compare values of `input` to `threshold` and pack resulting bits into a `uint8`.
-//
-// Each comparison returns a boolean `true` (if `input_value > threshold`)
-// or and `false` otherwise.
-//
-// This operation is useful for Locality-Sensitive-Hashing (LSH) and other
-// algorithms that use hashing approximations of cosine and `L2` distances;
-// codes can be generated from an input via:
+// QueueDequeueV2Attr is an optional argument to QueueDequeueV2.
+type QueueDequeueV2Attr func(optionalAttr)
+
+// QueueDequeueV2TimeoutMs sets the optional timeout_ms attribute to value.
 //
-// ```python
-// codebook_size = 50
-// codebook_bits = codebook_size * 32
-// codebook = tf.get_variable('codebook', [x.shape[-1].value, codebook_bits],
-//                            dtype=x.dtype,
-//                            initializer=tf.orthogonal_initializer())
-// codes = compare_and_threshold(tf.matmul(x, codebook), threshold=0.)
-// codes = tf.bitcast(codes, tf.int32)  # go from uint8 to int32
-// # now codes has shape x.shape[:-1] + [codebook_size]
-// ```
+// value: If the queue is empty, this operation will block for up to
+// timeout_ms milliseconds.
+// Note: This option is not supported yet.
+// If not specified, defaults to -1
+func QueueDequeueV2TimeoutMs(value int64) QueueDequeueV2Attr {
+	return func(m optionalAttr) {
+		m["timeout_ms"] = value
+	}
+}
+
+// Dequeues a tuple of one or more tensors from the given queue.
 //
-// **NOTE**: Currently, the innermost dimension of the tensor must be divisible
-// by 8.
+// This operation has k outputs, where k is the number of components
+// in the tuples stored in the given queue, and output i is the ith
+// component of the dequeued tuple.
 //
-// Given an `input` shaped `[s0, s1, ..., s_n]`, the output is
-// a `uint8` tensor shaped `[s0, s1, ..., s_n / 8]`.
+// N.B. If the queue is empty, this operation will block until an element
+// has been dequeued (or 'timeout_ms' elapses, if specified).
 //
 // Arguments:
-//	input: Values to compare against `threshold` and bitpack.
-//	threshold: Threshold to compare against.
+//	handle: The handle to a queue.
+//	component_types: The type of each component in a tuple.
 //
-// Returns The bitpacked comparisons.
-func CompareAndBitpack(scope *Scope, input tf.Output, threshold tf.Output) (output tf.Output) {
+// Returns One or more tensors that were dequeued as a tuple.
+func QueueDequeueV2(scope *Scope, handle tf.Output, component_types []tf.DataType, optional ...QueueDequeueV2Attr) (components []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"component_types": component_types}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "CompareAndBitpack",
+		Type: "QueueDequeueV2",
 		Input: []tf.Input{
-			input, threshold,
+			handle,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
+		scope.UpdateErr("QueueDequeueV2", err)
+		return
+	}
+	return components
 }
 
-// Replaces the contents of the table with the specified keys and values.
+// Returns locations of nonzero / true values in a tensor.
 //
-// The tensor `keys` must be of the same type as the keys of the table.
-// The tensor `values` must be of the type of the table values.
+// This operation returns the coordinates of true elements in `condition`. The
+// coordinates are returned in a 2-D tensor where the first dimension (rows)
+// represents the number of true elements, and the second dimension (columns)
+// represents the coordinates of the true elements. Keep in mind, the shape of
+// the output tensor can vary depending on how many true values there are in
+// `condition`. Indices are output in row-major order.
 //
-// Arguments:
-//	table_handle: Handle to the table.
-//	keys: Any shape.  Keys to look up.
-//	values: Values to associate with keys.
+// For example:
 //
-// Returns the created operation.
-func LookupTableImportV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) {
+// ```
+// # 'input' tensor is [[True, False]
+// #                    [True, False]]
+// # 'input' has two true values, so output has two coordinates.
+// # 'input' has rank of 2, so coordinates have two indices.
+// where(input) ==> [[0, 0],
+//                   [1, 0]]
+//
+// # `condition` tensor is [[[True, False]
+// #                     [True, False]]
+// #                    [[False, True]
+// #                     [False, True]]
+// #                    [[False, False]
+// #                     [False, True]]]
+// # 'input' has 5 true values, so output has 5 coordinates.
+// # 'input' has rank of 3, so coordinates have three indices.
+// where(input) ==> [[0, 0, 0],
+//                   [0, 1, 0],
+//                   [1, 0, 1],
+//                   [1, 1, 1],
+//                   [2, 1, 1]]
+//
+// # `condition` tensor is [[[1.5,  0.0]
+// #                     [-0.5, 0.0]]
+// #                    [[0.0,  0.25]
+// #                     [0.0,  0.75]]
+// #                    [[0.0,  0.0]
+// #                     [0.0,  0.01]]]
+// # 'input' has 5 nonzero values, so output has 5 coordinates.
+// # 'input' has rank of 3, so coordinates have three indices.
+// where(input) ==> [[0, 0, 0],
+//                   [0, 1, 0],
+//                   [1, 0, 1],
+//                   [1, 1, 1],
+//                   [2, 1, 1]]
+//
+// # `condition` tensor is [[[1.5 + 0.0j, 0.0  + 0.0j]
+// #                     [0.0 + 0.5j, 0.0  + 0.0j]]
+// #                    [[0.0 + 0.0j, 0.25 + 1.5j]
+// #                     [0.0 + 0.0j, 0.75 + 0.0j]]
+// #                    [[0.0 + 0.0j, 0.0  + 0.0j]
+// #                     [0.0 + 0.0j, 0.01 + 0.0j]]]
+// # 'input' has 5 nonzero magnitude values, so output has 5 coordinates.
+// # 'input' has rank of 3, so coordinates have three indices.
+// where(input) ==> [[0, 0, 0],
+//                   [0, 1, 0],
+//                   [1, 0, 1],
+//                   [1, 1, 1],
+//                   [2, 1, 1]]
+// ```
+func Where(scope *Scope, condition tf.Output) (index tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "LookupTableImportV2",
+		Type: "Where",
 		Input: []tf.Input{
-			table_handle, keys, values,
+			condition,
 		},
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// HashTableV2Attr is an optional argument to HashTableV2.
-type HashTableV2Attr func(optionalAttr)
+// QuantizeAndDequantizeAttr is an optional argument to QuantizeAndDequantize.
+type QuantizeAndDequantizeAttr func(optionalAttr)
 
-// HashTableV2Container sets the optional container attribute to value.
-//
-// value: If non-empty, this table is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func HashTableV2Container(value string) HashTableV2Attr {
+// QuantizeAndDequantizeSignedInput sets the optional signed_input attribute to value.
+// If not specified, defaults to true
+func QuantizeAndDequantizeSignedInput(value bool) QuantizeAndDequantizeAttr {
 	return func(m optionalAttr) {
-		m["container"] = value
+		m["signed_input"] = value
 	}
 }
 
-// HashTableV2SharedName sets the optional shared_name attribute to value.
-//
-// value: If non-empty, this table is shared under the given name across
-// multiple sessions.
-// If not specified, defaults to ""
-func HashTableV2SharedName(value string) HashTableV2Attr {
+// QuantizeAndDequantizeNumBits sets the optional num_bits attribute to value.
+// If not specified, defaults to 8
+func QuantizeAndDequantizeNumBits(value int64) QuantizeAndDequantizeAttr {
 	return func(m optionalAttr) {
-		m["shared_name"] = value
+		m["num_bits"] = value
 	}
 }
 
-// HashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value.
-//
-// value: If true and shared_name is empty, the table is shared
-// using the node name.
+// QuantizeAndDequantizeRangeGiven sets the optional range_given attribute to value.
 // If not specified, defaults to false
-func HashTableV2UseNodeNameSharing(value bool) HashTableV2Attr {
+func QuantizeAndDequantizeRangeGiven(value bool) QuantizeAndDequantizeAttr {
 	return func(m optionalAttr) {
-		m["use_node_name_sharing"] = value
+		m["range_given"] = value
 	}
 }
 
-// Creates a non-initialized hash table.
-//
-// This op creates a hash table, specifying the type of its keys and values.
-// Before using the table you will have to initialize it.  After initialization the
-// table will be immutable.
-//
-// Arguments:
-//	key_dtype: Type of the table keys.
-//	value_dtype: Type of the table values.
+// QuantizeAndDequantizeInputMin sets the optional input_min attribute to value.
+// If not specified, defaults to 0
+func QuantizeAndDequantizeInputMin(value float32) QuantizeAndDequantizeAttr {
+	return func(m optionalAttr) {
+		m["input_min"] = value
+	}
+}
+
+// QuantizeAndDequantizeInputMax sets the optional input_max attribute to value.
+// If not specified, defaults to 0
+func QuantizeAndDequantizeInputMax(value float32) QuantizeAndDequantizeAttr {
+	return func(m optionalAttr) {
+		m["input_max"] = value
+	}
+}
+
+// Use QuantizeAndDequantizeV2 instead.
 //
-// Returns Handle to a table.
-func HashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...HashTableV2Attr) (table_handle tf.Output) {
+// DEPRECATED at GraphDef version 22: Replaced by QuantizeAndDequantizeV2
+func QuantizeAndDequantize(scope *Scope, input tf.Output, optional ...QuantizeAndDequantizeAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "HashTableV2",
-
+		Type: "QuantizeAndDequantize",
+		Input: []tf.Input{
+			input,
+		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// MutableHashTableV2Attr is an optional argument to MutableHashTableV2.
-type MutableHashTableV2Attr func(optionalAttr)
-
-// MutableHashTableV2Container sets the optional container attribute to value.
-//
-// value: If non-empty, this table is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func MutableHashTableV2Container(value string) MutableHashTableV2Attr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// MutableHashTableV2SharedName sets the optional shared_name attribute to value.
-//
-// value: If non-empty, this table is shared under the given name across
-// multiple sessions.
-// If not specified, defaults to ""
-func MutableHashTableV2SharedName(value string) MutableHashTableV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// MutableHashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value.
+// Returns the diagonal part of the tensor.
 //
-// value: If true and shared_name is empty, the table is shared
-// using the node name.
-// If not specified, defaults to false
-func MutableHashTableV2UseNodeNameSharing(value bool) MutableHashTableV2Attr {
-	return func(m optionalAttr) {
-		m["use_node_name_sharing"] = value
-	}
-}
-
-// Creates an empty hash table.
+// This operation returns a tensor with the `diagonal` part
+// of the `input`. The `diagonal` part is computed as follows:
 //
-// This op creates a mutable hash table, specifying the type of its keys and
-// values. Each value must be a scalar. Data can be inserted into the table using
-// the insert operations. It does not support the initialization operation.
+// Assume `input` has dimensions `[D1,..., Dk, D1,..., Dk]`, then the output is a
+// tensor of rank `k` with dimensions `[D1,..., Dk]` where:
+//
+// `diagonal[i1,..., ik] = input[i1, ..., ik, i1,..., ik]`.
+//
+// For example:
+//
+// ```
+// # 'input' is [[1, 0, 0, 0]
+//               [0, 2, 0, 0]
+//               [0, 0, 3, 0]
+//               [0, 0, 0, 4]]
+//
+// tf.diag_part(input) ==> [1, 2, 3, 4]
+// ```
 //
 // Arguments:
-//	key_dtype: Type of the table keys.
-//	value_dtype: Type of the table values.
+//	input: Rank k tensor where k is even and not zero.
 //
-// Returns Handle to a table.
-func MutableHashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...MutableHashTableV2Attr) (table_handle tf.Output) {
+// Returns The extracted diagonal.
+func DiagPart(scope *Scope, input tf.Output) (diagonal tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "MutableHashTableV2",
-
-		Attrs: attrs,
+		Type: "DiagPart",
+		Input: []tf.Input{
+			input,
+		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// MapUnstageNoKeyAttr is an optional argument to MapUnstageNoKey.
-type MapUnstageNoKeyAttr func(optionalAttr)
+// QuantizedInstanceNormAttr is an optional argument to QuantizedInstanceNorm.
+type QuantizedInstanceNormAttr func(optionalAttr)
 
-// MapUnstageNoKeyCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
+// QuantizedInstanceNormOutputRangeGiven sets the optional output_range_given attribute to value.
 //
-// REQUIRES: value >= 0
-func MapUnstageNoKeyCapacity(value int64) MapUnstageNoKeyAttr {
+// value: If True, `given_y_min` and `given_y_min`
+// and `given_y_max` are used as the output range. Otherwise,
+// the implementation computes the output range.
+// If not specified, defaults to false
+func QuantizedInstanceNormOutputRangeGiven(value bool) QuantizedInstanceNormAttr {
 	return func(m optionalAttr) {
-		m["capacity"] = value
+		m["output_range_given"] = value
 	}
 }
 
-// MapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value.
+// QuantizedInstanceNormGivenYMin sets the optional given_y_min attribute to value.
+//
+// value: Output in `y_min` if `output_range_given` is True.
 // If not specified, defaults to 0
+func QuantizedInstanceNormGivenYMin(value float32) QuantizedInstanceNormAttr {
+	return func(m optionalAttr) {
+		m["given_y_min"] = value
+	}
+}
+
+// QuantizedInstanceNormGivenYMax sets the optional given_y_max attribute to value.
 //
-// REQUIRES: value >= 0
-func MapUnstageNoKeyMemoryLimit(value int64) MapUnstageNoKeyAttr {
+// value: Output in `y_max` if `output_range_given` is True.
+// If not specified, defaults to 0
+func QuantizedInstanceNormGivenYMax(value float32) QuantizedInstanceNormAttr {
 	return func(m optionalAttr) {
-		m["memory_limit"] = value
+		m["given_y_max"] = value
 	}
 }
 
-// MapUnstageNoKeyContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func MapUnstageNoKeyContainer(value string) MapUnstageNoKeyAttr {
+// QuantizedInstanceNormVarianceEpsilon sets the optional variance_epsilon attribute to value.
+//
+// value: A small float number to avoid dividing by 0.
+// If not specified, defaults to 1e-05
+func QuantizedInstanceNormVarianceEpsilon(value float32) QuantizedInstanceNormAttr {
 	return func(m optionalAttr) {
-		m["container"] = value
+		m["variance_epsilon"] = value
 	}
 }
 
-// MapUnstageNoKeySharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func MapUnstageNoKeySharedName(value string) MapUnstageNoKeyAttr {
+// QuantizedInstanceNormMinSeparation sets the optional min_separation attribute to value.
+//
+// value: Minimum value of `y_max - y_min`
+// If not specified, defaults to 0.001
+func QuantizedInstanceNormMinSeparation(value float32) QuantizedInstanceNormAttr {
 	return func(m optionalAttr) {
-		m["shared_name"] = value
+		m["min_separation"] = value
 	}
 }
 
-// Op removes and returns a random (key, value)
+// Quantized Instance normalization.
 //
-// from the underlying container.   If the underlying container
-// does not contain elements, the op will block until it does.
-func MapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...MapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) {
+// Arguments:
+//	x: A 4D input Tensor.
+//	x_min: The value represented by the lowest quantized input.
+//	x_max: The value represented by the highest quantized input.
+//
+// Returns A 4D Tensor.The value represented by the lowest quantized output.The value represented by the highest quantized output.
+func QuantizedInstanceNorm(scope *Scope, x tf.Output, x_min tf.Output, x_max tf.Output, optional ...QuantizedInstanceNormAttr) (y tf.Output, y_min tf.Output, y_max tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "MapUnstageNoKey",
+		Type: "QuantizedInstanceNorm",
 		Input: []tf.Input{
-			indices,
+			x, x_min, x_max,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	key = op.Output(idx)
-	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
-		scope.UpdateErr("MapUnstageNoKey", err)
-		return
-	}
-	return key, values
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// ResourceApplyProximalAdagradAttr is an optional argument to ResourceApplyProximalAdagrad.
-type ResourceApplyProximalAdagradAttr func(optionalAttr)
+// FakeQuantWithMinMaxVarsAttr is an optional argument to FakeQuantWithMinMaxVars.
+type FakeQuantWithMinMaxVarsAttr func(optionalAttr)
 
-// ResourceApplyProximalAdagradUseLocking sets the optional use_locking attribute to value.
-//
-// value: If True, updating of the var and accum tensors will be protected by
-// a lock; otherwise the behavior is undefined, but may exhibit less contention.
+// FakeQuantWithMinMaxVarsNumBits sets the optional num_bits attribute to value.
+// If not specified, defaults to 8
+func FakeQuantWithMinMaxVarsNumBits(value int64) FakeQuantWithMinMaxVarsAttr {
+	return func(m optionalAttr) {
+		m["num_bits"] = value
+	}
+}
+
+// FakeQuantWithMinMaxVarsNarrowRange sets the optional narrow_range attribute to value.
 // If not specified, defaults to false
-func ResourceApplyProximalAdagradUseLocking(value bool) ResourceApplyProximalAdagradAttr {
+func FakeQuantWithMinMaxVarsNarrowRange(value bool) FakeQuantWithMinMaxVarsAttr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["narrow_range"] = value
 	}
 }
 
-// Update '*var' and '*accum' according to FOBOS with Adagrad learning rate.
+// Fake-quantize the 'inputs' tensor of type float via global float scalars `min`
 //
-// accum += grad * grad
-// prox_v = var - lr * grad * (1 / sqrt(accum))
-// var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
+// and `max` to 'outputs' tensor of same shape as `inputs`.
 //
-// Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	l1: L1 regularization. Must be a scalar.
-//	l2: L2 regularization. Must be a scalar.
-//	grad: The gradient.
+// `[min; max]` define the clamping range for the `inputs` data.
+// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`
+// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and
+// then de-quantized and output as floats in `[min; max]` interval.
+// `num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive.
 //
-// Returns the created operation.
-func ResourceApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, optional ...ResourceApplyProximalAdagradAttr) (o *tf.Operation) {
+// This operation has a gradient and thus allows for training `min` and `max`
+// values.
+func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsAttr) (outputs tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -27221,235 +28001,339 @@ func ResourceApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output,
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyProximalAdagrad",
+		Type: "FakeQuantWithMinMaxVars",
 		Input: []tf.Input{
-			var_, accum, lr, l1, l2, grad,
+			inputs, min, max,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// MutableHashTableOfTensorsV2Attr is an optional argument to MutableHashTableOfTensorsV2.
-type MutableHashTableOfTensorsV2Attr func(optionalAttr)
+// FakeQuantWithMinMaxVarsPerChannelGradientAttr is an optional argument to FakeQuantWithMinMaxVarsPerChannelGradient.
+type FakeQuantWithMinMaxVarsPerChannelGradientAttr func(optionalAttr)
 
-// MutableHashTableOfTensorsV2Container sets the optional container attribute to value.
+// FakeQuantWithMinMaxVarsPerChannelGradientNumBits sets the optional num_bits attribute to value.
 //
-// value: If non-empty, this table is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func MutableHashTableOfTensorsV2Container(value string) MutableHashTableOfTensorsV2Attr {
+// value: The bitwidth of the quantization; between 2 and 8, inclusive.
+// If not specified, defaults to 8
+func FakeQuantWithMinMaxVarsPerChannelGradientNumBits(value int64) FakeQuantWithMinMaxVarsPerChannelGradientAttr {
 	return func(m optionalAttr) {
-		m["container"] = value
+		m["num_bits"] = value
 	}
 }
 
-// MutableHashTableOfTensorsV2SharedName sets the optional shared_name attribute to value.
+// FakeQuantWithMinMaxVarsPerChannelGradientNarrowRange sets the optional narrow_range attribute to value.
 //
-// value: If non-empty, this table is shared under the given name across
-// multiple sessions.
-// If not specified, defaults to ""
-func MutableHashTableOfTensorsV2SharedName(value string) MutableHashTableOfTensorsV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// MutableHashTableOfTensorsV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value.
+// value: Whether to quantize into 2^num_bits - 1 distinct values.
 // If not specified, defaults to false
-func MutableHashTableOfTensorsV2UseNodeNameSharing(value bool) MutableHashTableOfTensorsV2Attr {
-	return func(m optionalAttr) {
-		m["use_node_name_sharing"] = value
-	}
-}
-
-// MutableHashTableOfTensorsV2ValueShape sets the optional value_shape attribute to value.
-// If not specified, defaults to <>
-func MutableHashTableOfTensorsV2ValueShape(value tf.Shape) MutableHashTableOfTensorsV2Attr {
+func FakeQuantWithMinMaxVarsPerChannelGradientNarrowRange(value bool) FakeQuantWithMinMaxVarsPerChannelGradientAttr {
 	return func(m optionalAttr) {
-		m["value_shape"] = value
+		m["narrow_range"] = value
 	}
 }
 
-// Creates an empty hash table.
-//
-// This op creates a mutable hash table, specifying the type of its keys and
-// values. Each value must be a vector. Data can be inserted into the table using
-// the insert operations. It does not support the initialization operation.
+// Compute gradients for a FakeQuantWithMinMaxVarsPerChannel operation.
 //
 // Arguments:
-//	key_dtype: Type of the table keys.
-//	value_dtype: Type of the table values.
+//	gradients: Backpropagated gradients above the FakeQuantWithMinMaxVars operation,
+// shape one of: `[d]`, `[b, d]`,  `[b, h, w, d]`.
+//	inputs: Values passed as inputs to the FakeQuantWithMinMaxVars operation, shape
+//   same as `gradients`.
+// min, max: Quantization interval, floats of shape `[d]`.
 //
-// Returns Handle to a table.
-func MutableHashTableOfTensorsV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...MutableHashTableOfTensorsV2Attr) (table_handle tf.Output) {
+//
+//
+// Returns Backpropagated gradients w.r.t. inputs, shape same as
+// `inputs`:
+//   `gradients * (inputs >= min && inputs <= max)`.Backpropagated gradients w.r.t. min parameter, shape `[d]`:
+// `sum_per_d(gradients * (inputs < min))`.Backpropagated gradients w.r.t. max parameter, shape `[d]`:
+// `sum_per_d(gradients * (inputs > max))`.
+func FakeQuantWithMinMaxVarsPerChannelGradient(scope *Scope, gradients tf.Output, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsPerChannelGradientAttr) (backprops_wrt_input tf.Output, backprop_wrt_min tf.Output, backprop_wrt_max tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
-	opspec := tf.OpSpec{
-		Type: "MutableHashTableOfTensorsV2",
+	opspec := tf.OpSpec{
+		Type: "FakeQuantWithMinMaxVarsPerChannelGradient",
+		Input: []tf.Input{
+			gradients, inputs, min, max,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// QuantizeV2Attr is an optional argument to QuantizeV2.
+type QuantizeV2Attr func(optionalAttr)
+
+// QuantizeV2Mode sets the optional mode attribute to value.
+// If not specified, defaults to "MIN_COMBINED"
+func QuantizeV2Mode(value string) QuantizeV2Attr {
+	return func(m optionalAttr) {
+		m["mode"] = value
+	}
+}
 
-		Attrs: attrs,
+// QuantizeV2RoundMode sets the optional round_mode attribute to value.
+// If not specified, defaults to "HALF_AWAY_FROM_ZERO"
+func QuantizeV2RoundMode(value string) QuantizeV2Attr {
+	return func(m optionalAttr) {
+		m["round_mode"] = value
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Partitions `data` into `num_partitions` tensors using indices from `partitions`.
+// Quantize the 'input' tensor of type float to 'output' tensor of type 'T'.
 //
-// For each index tuple `js` of size `partitions.ndim`, the slice `data[js, ...]`
-// becomes part of `outputs[partitions[js]]`.  The slices with `partitions[js] = i`
-// are placed in `outputs[i]` in lexicographic order of `js`, and the first
-// dimension of `outputs[i]` is the number of entries in `partitions` equal to `i`.
-// In detail,
+// [min_range, max_range] are scalar floats that specify the range for
+// the 'input' data. The 'mode' attribute controls exactly which calculations are
+// used to convert the float values to their quantized equivalents.  The
+// 'round_mode' attribute controls which rounding tie-breaking algorithm is used
+// when rounding float values to their quantized equivalents.
 //
-// ```python
-//     outputs[i].shape = [sum(partitions == i)] + data.shape[partitions.ndim:]
+// In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
 //
-//     outputs[i] = pack([data[js, ...] for js if partitions[js] == i])
 // ```
+// out[i] = (in[i] - min_range) * range(T) / (max_range - min_range)
+// if T == qint8, out[i] -= (range(T) + 1) / 2.0
+// ```
+// here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
 //
-// `data.shape` must start with `partitions.shape`.
+// *MIN_COMBINED Mode Example*
 //
-// For example:
+// Assume the input is type float and has a possible range of [0.0, 6.0] and the
+// output type is quint8 ([0, 255]). The min_range and max_range values should be
+// specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each
+// value of the input by 255/6 and cast to quint8.
 //
-// ```python
-//     # Scalar partitions.
-//     partitions = 1
-//     num_partitions = 2
-//     data = [10, 20]
-//     outputs[0] = []  # Empty with shape [0, 2]
-//     outputs[1] = [[10, 20]]
+// If the output type was qint8 ([-128, 127]), the operation will additionally
+// subtract each value by 128 prior to casting, so that the range of values aligns
+// with the range of qint8.
 //
-//     # Vector partitions.
-//     partitions = [0, 0, 1, 1, 0]
-//     num_partitions = 2
-//     data = [10, 20, 30, 40, 50]
-//     outputs[0] = [10, 20, 50]
-//     outputs[1] = [30, 40]
+// If the mode is 'MIN_FIRST', then this approach is used:
+//
+// ```
+// num_discrete_values = 1 << (# of bits in T)
+// range_adjust = num_discrete_values / (num_discrete_values - 1)
+// range = (range_max - range_min) * range_adjust
+// range_scale = num_discrete_values / range
+// quantized = round(input * range_scale) - round(range_min * range_scale) +
+//   numeric_limits<T>::min()
+// quantized = max(quantized, numeric_limits<T>::min())
+// quantized = min(quantized, numeric_limits<T>::max())
 // ```
 //
-// See `dynamic_stitch` for an example on how to merge partitions back.
+// The biggest difference between this and MIN_COMBINED is that the minimum range
+// is rounded first, before it's subtracted from the rounded value. With
+// MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing
+// and dequantizing will introduce a larger and larger error.
 //
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/DynamicPartition.png" alt>
-// </div>
+// *SCALED mode Example*
+//
+// `SCALED` mode matches the quantization approach used in
+// `QuantizeAndDequantize{V2|V3}`.
+//
+// If the mode is `SCALED`, we do not use the full range of the output type,
+// choosing to elide the lowest possible value for symmetry (e.g., output range is
+// -127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to
+// 0.
+//
+// We first find the range of values in our tensor. The
+// range we use is always centered on 0, so we find m such that
+// ```c++
+//   m = max(abs(input_min), abs(input_max))
+// ```
+//
+// Our input tensor range is then `[-m, m]`.
+//
+// Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`.
+// If T is signed, this is
+// ```
+//   num_bits = sizeof(T) * 8
+//   [min_fixed, max_fixed] =
+//       [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1]
+// ```
+//
+// Otherwise, if T is unsigned, the fixed-point range is
+// ```
+//   [min_fixed, max_fixed] = [0, (1 << num_bits) - 1]
+// ```
+//
+// From this we compute our scaling factor, s:
+// ```c++
+//   s = (max_fixed - min_fixed) / (2 * m)
+// ```
+//
+// Now we can quantize the elements of our tensor:
+// ```c++
+// result = round(input * s)
+// ```
+//
+// One thing to watch out for is that the operator may choose to adjust the
+// requested minimum and maximum values slightly during the quantization process,
+// so you should always use the output ports as the range for further calculations.
+// For example, if the requested minimum and maximum values are close to equal,
+// they will be separated by a small epsilon value to prevent ill-formed quantized
+// buffers from being created. Otherwise, you can end up with buffers where all the
+// quantized values map to the same float value, which causes problems for
+// operations that have to perform further calculations on them.
 //
 // Arguments:
 //
-//	partitions: Any shape.  Indices in the range `[0, num_partitions)`.
-//	num_partitions: The number of partitions to output.
-func DynamicPartition(scope *Scope, data tf.Output, partitions tf.Output, num_partitions int64) (outputs []tf.Output) {
+//	min_range: The minimum scalar value possibly produced for the input.
+//	max_range: The maximum scalar value possibly produced for the input.
+//
+//
+// Returns The quantized data produced from the float input.The actual minimum scalar value used for the output.The actual maximum scalar value used for the output.
+func QuantizeV2(scope *Scope, input tf.Output, min_range tf.Output, max_range tf.Output, T tf.DataType, optional ...QuantizeV2Attr) (output tf.Output, output_min tf.Output, output_max tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_partitions": num_partitions}
+	attrs := map[string]interface{}{"T": T}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "DynamicPartition",
+		Type: "QuantizeV2",
 		Input: []tf.Input{
-			data, partitions,
+			input, min_range, max_range,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Flushes the writer's unwritten events.
+//
+// Arguments:
+//	writer: A handle to the summary writer resource.
+//
+// Returns the created operation.
+func FlushSummaryWriter(scope *Scope, writer tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	var idx int
-	var err error
-	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
-		scope.UpdateErr("DynamicPartition", err)
-		return
+	opspec := tf.OpSpec{
+		Type: "FlushSummaryWriter",
+		Input: []tf.Input{
+			writer,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// StackV2Attr is an optional argument to StackV2.
+type StackV2Attr func(optionalAttr)
+
+// StackV2StackName sets the optional stack_name attribute to value.
+//
+// value: Overrides the name used for the temporary stack resource. Default
+// value is the name of the 'Stack' op (which is guaranteed unique).
+// If not specified, defaults to ""
+func StackV2StackName(value string) StackV2Attr {
+	return func(m optionalAttr) {
+		m["stack_name"] = value
 	}
-	return outputs
 }
 
-// Serialize a `SparseTensor` into a string 3-vector (1-D `Tensor`) object.
+// A stack that produces elements in first-in last-out order.
 //
 // Arguments:
-//	sparse_indices: 2-D.  The `indices` of the `SparseTensor`.
-//	sparse_values: 1-D.  The `values` of the `SparseTensor`.
-//	sparse_shape: 1-D.  The `shape` of the `SparseTensor`.
-func SerializeSparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) (serialized_sparse tf.Output) {
+//	max_size: The maximum size of the stack if non-negative. If negative, the stack
+// size is unlimited.
+//	elem_type: The type of the elements on the stack.
+//
+// Returns The handle to the stack.
+func StackV2(scope *Scope, max_size tf.Output, elem_type tf.DataType, optional ...StackV2Attr) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"elem_type": elem_type}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "SerializeSparse",
+		Type: "StackV2",
 		Input: []tf.Input{
-			sparse_indices, sparse_values, sparse_shape,
+			max_size,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Table initializer that takes two tensors for keys and values respectively.
+// Flushes and closes the summary writer.
+//
+// Also removes it from the resource manager. To reopen, use another
+// CreateSummaryFileWriter op.
 //
 // Arguments:
-//	table_handle: Handle to a table which will be initialized.
-//	keys: Keys of type Tkey.
-//	values: Values of type Tval.
+//	writer: A handle to the summary writer resource.
 //
 // Returns the created operation.
-func InitializeTableV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) {
+func CloseSummaryWriter(scope *Scope, writer tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "InitializeTableV2",
+		Type: "CloseSummaryWriter",
 		Input: []tf.Input{
-			table_handle, keys, values,
+			writer,
 		},
 	}
 	return scope.AddOperation(opspec)
 }
 
-// Creates a dataset that asynchronously prefetches elements from `input_dataset`.
+// Outputs a `Summary` protocol buffer with a tensor.
 //
 // Arguments:
+//	writer: A handle to a summary writer.
+//	step: The step to write the summary for.
+//	tensor: A tensor to serialize.
+//	tag: The summary's tag.
+//	summary_metadata: Serialized SummaryMetadata protocol buffer containing
+// plugin-related metadata for this summary.
 //
-//	buffer_size: The maximum number of elements to buffer in an iterator over
-// this dataset.
-//
-//
-func PrefetchDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+// Returns the created operation.
+func WriteSummary(scope *Scope, writer tf.Output, step tf.Output, tensor tf.Output, tag tf.Output, summary_metadata tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "PrefetchDataset",
+		Type: "WriteSummary",
 		Input: []tf.Input{
-			input_dataset, buffer_size,
+			writer, step, tensor, tag, summary_metadata,
 		},
-		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// Outputs a `Summary` protocol buffer with a tensor and per-plugin data.
+// Outputs a `tf.Event` protocol buffer.
+//
+// When CreateSummaryDbWriter is being used, this op can be useful for
+// importing data from event logs.
 //
 // Arguments:
-//	tag: A string attached to this summary. Used for organization in TensorBoard.
-//	tensor: A tensor to serialize.
-//	serialized_summary_metadata: A serialized SummaryMetadata proto. Contains plugin
-// data.
-func TensorSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, serialized_summary_metadata tf.Output) (summary tf.Output) {
+//	writer: A handle to a summary writer.
+//	event: A string containing a binary-encoded tf.Event proto.
+//
+// Returns the created operation.
+func ImportEvent(scope *Scope, writer tf.Output, event tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "TensorSummaryV2",
+		Type: "ImportEvent",
 		Input: []tf.Input{
-			tag, tensor, serialized_summary_metadata,
+			writer, event,
 		},
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
diff --git a/tensorflow/go/session.go b/tensorflow/go/session.go
index fc914f86df365e7d328fe2fc95f032885c717b31..db6ae4f26cd92dcf5e542052e4bae561bbefe999 100644
--- a/tensorflow/go/session.go
+++ b/tensorflow/go/session.go
@@ -65,6 +65,51 @@ func NewSession(graph *Graph, options *SessionOptions) (*Session, error) {
 	return s, nil
 }
 
+// Device structure contains information about a device associated with a session, as returned by ListDevices()
+type Device struct {
+	Name, Type       string
+	MemoryLimitBytes int64
+}
+
+// Return list of devices associated with a Session
+func (s *Session) ListDevices() ([]Device, error) {
+	var devices []Device
+
+	status := newStatus()
+	devices_list := C.TF_SessionListDevices(s.c, status.c)
+	if err := status.Err(); err != nil {
+		return nil, fmt.Errorf("SessionListDevices() failed: %v", err)
+	}
+	defer C.TF_DeleteDeviceList(devices_list)
+
+	for i := 0; i < int(C.TF_DeviceListCount(devices_list)); i++ {
+		device_name := C.TF_DeviceListName(devices_list, C.int(i), status.c)
+		if err := status.Err(); err != nil {
+			return nil, fmt.Errorf("DeviceListName(index=%d) failed: %v", i, err)
+		}
+
+		device_type := C.TF_DeviceListType(devices_list, C.int(i), status.c)
+		if err := status.Err(); err != nil {
+			return nil, fmt.Errorf("DeviceListType(index=%d) failed: %v", i, err)
+		}
+
+		memory_limit_bytes := C.TF_DeviceListMemoryBytes(devices_list, C.int(i), status.c)
+		if err := status.Err(); err != nil {
+			return nil, fmt.Errorf("DeviceListMemoryBytes(index=%d) failed: %v", i, err)
+		}
+
+		device := Device{
+			Name:             C.GoString(device_name),
+			Type:             C.GoString(device_type),
+			MemoryLimitBytes: int64(memory_limit_bytes),
+		}
+
+		devices = append(devices, device)
+	}
+
+	return devices, nil
+}
+
 // Run the graph with the associated session starting with the supplied feeds
 // to compute the value of the requested fetches. Runs, but does not return
 // Tensors for operations specified in targets.
diff --git a/tensorflow/go/session_test.go b/tensorflow/go/session_test.go
index 73d78a8e5773d8bc25f349c9736bda4595bea64e..05ace99a2387c6884832427187525f2fb7d5aba2 100644
--- a/tensorflow/go/session_test.go
+++ b/tensorflow/go/session_test.go
@@ -283,3 +283,19 @@ func TestSessionConfig(t *testing.T) {
 		t.Fatalf("Got %v, want -1", output[0].Value())
 	}
 }
+
+func TestListDevices(t *testing.T) {
+	s, err := NewSession(NewGraph(), nil)
+	if err != nil {
+		t.Fatalf("NewSession(): %v", err)
+	}
+
+	devices, err := s.ListDevices()
+	if err != nil {
+		t.Fatalf("ListDevices(): %v", err)
+	}
+
+	if len(devices) == 0 {
+		t.Fatalf("no devices detected")
+	}
+}
diff --git a/tensorflow/go/tensor.go b/tensorflow/go/tensor.go
index cd6f4bc1f02326728320c1f307d3ce0fbd744b44..2d25c04dc9b1d0bc2ae831f98c0879e73a6bfafa 100644
--- a/tensorflow/go/tensor.go
+++ b/tensorflow/go/tensor.go
@@ -270,7 +270,7 @@ func typeOf(dt DataType, shape []int64) reflect.Type {
 		}
 	}
 	if ret == nil {
-		panic(bug("DataType %v is not supported", dt))
+		panic(bug("DataType %v is not supported (see https://www.tensorflow.org/code/tensorflow/core/framework/types.proto)", dt))
 	}
 	for range shape {
 		ret = reflect.SliceOf(ret)
diff --git a/tensorflow/java/BUILD b/tensorflow/java/BUILD
index c0563da06d99bcf06477c094b560ceff6a01eff0..9dee1aa72bf0d76ee35931f1e852bfd22556a540 100644
--- a/tensorflow/java/BUILD
+++ b/tensorflow/java/BUILD
@@ -14,6 +14,7 @@ load(
     "tf_copts",
     "tf_custom_op_library",
     "tf_java_test",
+    "tf_cc_test",
 )
 
 java_library(
@@ -97,10 +98,26 @@ tf_java_op_gen_srcjar(
 # file before making it an executable. See tf_java_op_gen_srcjar().
 cc_library(
     name = "java_op_gen_tool",
-    srcs = glob([
-        "src/gen/cc/*.h",
-        "src/gen/cc/*.cc",
-    ]),
+    srcs = [
+        "src/gen/cc/op_gen_main.cc",
+    ],
+    copts = tf_copts(),
+    deps = [
+        ":java_op_gen_lib",
+    ],
+)
+
+cc_library(
+    name = "java_op_gen_lib",
+    srcs = [
+        "src/gen/cc/op_generator.cc",
+        "src/gen/cc/source_writer.cc",
+    ],
+    hdrs = [
+        "src/gen/cc/java_defs.h",
+        "src/gen/cc/op_generator.h",
+        "src/gen/cc/source_writer.h",
+    ],
     copts = tf_copts(),
     deps = [
         "//tensorflow/core:framework",
@@ -280,21 +297,6 @@ tf_java_test(
     ],
 )
 
-#java_test(
-#    name = "OperatorProcessorTest",
-#    size = "small",
-#    srcs = ["src/test/java/org/tensorflow/processor/OperatorProcessorTest.java"],
-#    javacopts = JAVACOPTS,
-#    resources = [":processor_test_resources"],
-#    test_class = "org.tensorflow.processor.OperatorProcessorTest",
-#    deps = [
-#        ":processor_library",
-#        "//third_party/java/junit",
-#        "@com_google_testing_compile",
-#        "@com_google_truth",
-#    ],
-#)
-
 filegroup(
     name = "processor_test_resources",
     srcs = glob([
@@ -303,6 +305,20 @@ filegroup(
     ]),
 )
 
+tf_cc_test(
+    name = "source_writer_test",
+    size = "small",
+    srcs = [
+        "src/gen/cc/source_writer_test.cc",
+    ],
+    deps = [
+        ":java_op_gen_lib",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
+
 filegroup(
     name = "libtensorflow_jni",
     srcs = select({
diff --git a/tensorflow/java/maven/.gitignore b/tensorflow/java/maven/.gitignore
index 0e11e83a0cb649425b2072f24b0d7106c08cff81..ff080515d5e730b308bf78f7e28244c6c799cdc3 100644
--- a/tensorflow/java/maven/.gitignore
+++ b/tensorflow/java/maven/.gitignore
@@ -5,7 +5,10 @@ libtensorflow/src
 libtensorflow/target
 libtensorflow_jni/src
 libtensorflow_jni/target
+libtensorflow_jni_gpu/src
+libtensorflow_jni_gpu/target
 tensorflow/src
 tensorflow/target
 proto/src
 proto/target
+pom.xml.versionsBackup
diff --git a/tensorflow/java/maven/README.md b/tensorflow/java/maven/README.md
index 622777536188df4462550b8dc471b64328ad204f..c7e8f0380629f492ade9ba47cdcb4bc286ac82bc 100644
--- a/tensorflow/java/maven/README.md
+++ b/tensorflow/java/maven/README.md
@@ -22,11 +22,12 @@ Hence, the process for building and uploading release artifacts is not a single
 
 ## Artifact Structure
 
-There are six artifacts and thus `pom.xml`s involved in this release:
+There are seven artifacts and thus `pom.xml`s involved in this release:
 
 1.  `tensorflow`: The single dependency for projects requiring TensorFlow for
-    Java. This convenience package depends on the two below, and is the one that
-    should typically be used in other programs.
+    Java. This convenience package depends on `libtensorflow` and
+    `libtensorflow_jni`. Typically, this is the single dependency that should
+    be used by client programs (unless GPU support is required).
 
 2.  `libtensorflow`: Java-only code for the [TensorFlow Java API](https://www.tensorflow.org/api_docs/java/reference/org/tensorflow/package-summary).
     The `.jar` itself has no native code, but requires the native code be either
@@ -36,15 +37,20 @@ There are six artifacts and thus `pom.xml`s involved in this release:
 3.  `libtensorflow_jni`: The native libraries required by `libtensorflow`.
     Native code for all supported platforms is packaged into a single `.jar`.
 
-4.  `proto`: Generated Java code for TensorFlow protocol buffers
+4.  `libtensorflow_jni_gpu`: The native libraries required by `libtensorflow`
+    with GPU (CUDA) support enabled. Programs requiring GPU-enabled TensorFlow
+    should add a dependency on `libtensorflow` and `libtensorflow_jni_gpu`.
+    As of January 2018, this artifact is *Linux only*.
+
+5.  `proto`: Generated Java code for TensorFlow protocol buffers
     (e.g., `MetaGraphDef`, `ConfigProto` etc.)
 
-5. `tensorflow-android`: A package geared towards
+6. `tensorflow-android`: A package geared towards
     supporting [TensorFlow on Android](../../contrib/android/README.md), and is
     a self-contained Android AAR library containing all necessary native and
     Java code.
 
-6.  [`parentpom`](https://maven.apache.org/pom/index.html): Common settings
+7.  [`parentpom`](https://maven.apache.org/pom/index.html): Common settings
     shared by all of the above.
 
 
diff --git a/tensorflow/java/maven/libtensorflow/pom.xml b/tensorflow/java/maven/libtensorflow/pom.xml
index d365c39ef4a5b10f45f6045567082724510fab54..6285ee0483d9171d6cdb9b4dbf2675bafb953038 100644
--- a/tensorflow/java/maven/libtensorflow/pom.xml
+++ b/tensorflow/java/maven/libtensorflow/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.4.0</version>
+    <version>1.5.0-rc1</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>libtensorflow</artifactId>
diff --git a/tensorflow/java/maven/libtensorflow_jni/pom.xml b/tensorflow/java/maven/libtensorflow_jni/pom.xml
index 0111fc62a4d6bfb27e51fd40778edf37f8c2e501..b0e5c44fecc9bf3a95ac3d4e36d9f98d74d3b2bb 100644
--- a/tensorflow/java/maven/libtensorflow_jni/pom.xml
+++ b/tensorflow/java/maven/libtensorflow_jni/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.4.0</version>
+    <version>1.5.0-rc1</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>libtensorflow_jni</artifactId>
diff --git a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml
new file mode 100644
index 0000000000000000000000000000000000000000..02c5dca13f4d292718afca7e99bac82710e1949f
--- /dev/null
+++ b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml
@@ -0,0 +1,15 @@
+<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+  <modelVersion>4.0.0</modelVersion>
+  <description>Platform-dependent native code with GPU (CUDA) support for the TensorFlow Java library.</description>
+  <parent>
+    <groupId>org.tensorflow</groupId>
+    <artifactId>parentpom</artifactId>
+    <version>1.5.0-rc1</version>
+    <relativePath>../</relativePath>
+  </parent>
+  <artifactId>libtensorflow_jni_gpu</artifactId>
+  <packaging>jar</packaging>
+</project>
+
diff --git a/tensorflow/java/maven/pom.xml b/tensorflow/java/maven/pom.xml
index 06042216b4612e4a55f712b8f941b53c2bdf1daf..949597ca7f1e7a05cf6c0e5a15cb5307b00859a1 100644
--- a/tensorflow/java/maven/pom.xml
+++ b/tensorflow/java/maven/pom.xml
@@ -6,7 +6,7 @@
   <modelVersion>4.0.0</modelVersion>
   <groupId>org.tensorflow</groupId>
   <artifactId>parentpom</artifactId>
-  <version>1.4.0</version>
+  <version>1.5.0-rc1</version>
   <packaging>pom</packaging>
 
   <url>https://www.tensorflow.org</url>
@@ -29,6 +29,7 @@
   <modules>
     <module>libtensorflow</module>
     <module>libtensorflow_jni</module>
+    <module>libtensorflow_jni_gpu</module>
     <module>tensorflow</module>
     <module>proto</module>
   </modules>
diff --git a/tensorflow/java/maven/proto/pom.xml b/tensorflow/java/maven/proto/pom.xml
index 2c9d76b563377c3fc4ecede0460ef4e53e27b417..9f0ebcf84c9c8e01662a93034a4407c6b58a6d7e 100644
--- a/tensorflow/java/maven/proto/pom.xml
+++ b/tensorflow/java/maven/proto/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.4.0</version>
+    <version>1.5.0-rc1</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>proto</artifactId>
diff --git a/tensorflow/java/maven/run_inside_container.sh b/tensorflow/java/maven/run_inside_container.sh
index a2ce097195450eff566f0be48ca4f1a6b99401cc..6136ccfdfb92d6a71c440b23dc0a13ebe86c52e6 100644
--- a/tensorflow/java/maven/run_inside_container.sh
+++ b/tensorflow/java/maven/run_inside_container.sh
@@ -44,7 +44,7 @@ clean() {
   # (though if run inside a clean docker container, there won't be any dirty
   # artifacts lying around)
   mvn -q clean
-  rm -rf libtensorflow_jni/src libtensorflow_jni/target libtensorflow/src libtensorflow/target tensorflow-android/target
+  rm -rf libtensorflow_jni/src libtensorflow_jni/target libtensorflow_jni_gpu/src libtensorflow_jni_gpu/target libtensorflow/src libtensorflow/target tensorflow-android/target
 }
 
 update_version_in_pom() {
@@ -119,6 +119,26 @@ download_libtensorflow_jni() {
   cd "${DIR}"
 }
 
+download_libtensorflow_jni_gpu() {
+  NATIVE_DIR="${DIR}/libtensorflow_jni_gpu/src/main/resources/org/tensorflow/native"
+  mkdir -p "${NATIVE_DIR}"
+  cd "${NATIVE_DIR}"
+
+  mkdir linux-x86_64
+
+  if [[ "${IS_SNAPSHOT}" == "true" ]]; then
+    # Nightly builds from http://ci.tensorflow.org/view/Nightly/job/nightly-libtensorflow/
+    # and http://ci.tensorflow.org/view/Nightly/job/nightly-libtensorflow-windows/
+    curl -L "http://ci.tensorflow.org/view/Nightly/job/nightly-libtensorflow/TYPE=gpu-linux/lastSuccessfulBuild/artifact/lib_package/libtensorflow_jni-gpu-linux-x86_64.tar.gz" | tar -xvz -C linux-x86_64
+  else
+    curl -L "${RELEASE_URL_PREFIX}/libtensorflow_jni-gpu-linux-x86_64-${TF_VERSION}.tar.gz" | tar -xvz -C linux-x86_64
+  fi
+
+  # Updated timestamps seem to be required to get Maven to pick up the file.
+  touch linux-x86_64/*
+  cd "${DIR}"
+}
+
 # Ideally, the .jar for generated Java code for TensorFlow protocol buffer files
 # would have been produced by bazel rules. However, protocol buffer library
 # support in bazel is in flux. Once
@@ -225,6 +245,7 @@ clean
 update_version_in_pom
 download_libtensorflow
 download_libtensorflow_jni
+download_libtensorflow_jni_gpu
 update_tensorflow_android
 generate_java_protos
 # Build the release artifacts
diff --git a/tensorflow/java/maven/tensorflow/pom.xml b/tensorflow/java/maven/tensorflow/pom.xml
index 474a9adb9ae6cbedcc8f67abb0431710f2ecbef9..88d897362ad6c8f84d93cbc9bcf3c30905b345be 100644
--- a/tensorflow/java/maven/tensorflow/pom.xml
+++ b/tensorflow/java/maven/tensorflow/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.4.0</version>
+    <version>1.5.0-rc1</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>tensorflow</artifactId>
diff --git a/tensorflow/java/src/gen/cc/java_defs.h b/tensorflow/java/src/gen/cc/java_defs.h
new file mode 100644
index 0000000000000000000000000000000000000000..615cdc165b36abdc3cf5e717ddb8b385367c067f
--- /dev/null
+++ b/tensorflow/java/src/gen/cc/java_defs.h
@@ -0,0 +1,273 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_JAVA_SRC_GEN_CC_JAVA_DEFS_H_
+#define TENSORFLOW_JAVA_SRC_GEN_CC_JAVA_DEFS_H_
+
+#include <string>
+#include <vector>
+#include <deque>
+
+#include "tensorflow/core/platform/env.h"
+
+namespace tensorflow {
+namespace java {
+
+// An enumeration of different modifiers commonly used in Java
+enum Modifier {
+  PUBLIC    = (1 << 0),
+  PROTECTED = (1 << 1),
+  PRIVATE   = (1 << 2),
+  STATIC    = (1 << 3),
+  FINAL     = (1 << 4),
+};
+
+class Annotation;
+
+// A definition of any kind of Java type (classes, interfaces...)
+//
+// Note that most of the data fields of this class are only useful in specific
+// contexts and are not required in many cases. For example, annotations and
+// supertypes are only useful when declaring a type.
+class Type {
+ public:
+  enum Kind {
+    PRIMITIVE, CLASS, INTERFACE, ENUM, GENERIC, ANNOTATION
+  };
+  static const Type Byte() {
+    return Type(Type::PRIMITIVE, "byte");
+  }
+  static const Type Char() {
+    return Type(Type::PRIMITIVE, "char");
+  }
+  static const Type Short() {
+    return Type(Type::PRIMITIVE, "short");
+  }
+  static const Type Int() {
+    return Type(Type::PRIMITIVE, "int");
+  }
+  static const Type Long() {
+    return Type(Type::PRIMITIVE, "long");
+  }
+  static const Type Float() {
+    return Type(Type::PRIMITIVE, "float");
+  }
+  static const Type Double() {
+    return Type(Type::PRIMITIVE, "double");
+  }
+  static const Type Boolean() {
+    return Type(Type::PRIMITIVE, "boolean");
+  }
+  static const Type Void() {
+    // For simplicity, we consider 'void' as a primitive type, like the Java
+    // Reflection API does
+    return Type(Type::PRIMITIVE, "void");
+  }
+  static Type Class(const string& name, const string& package = "") {
+    return Type(Type::CLASS, name, package);
+  }
+  static Type Interface(const string& name, const string& package = "") {
+    return Type(Type::INTERFACE, name, package);
+  }
+  static Type Enum(const string& name, const string& package = "") {
+    return Type(Type::ENUM, name, package);
+  }
+  static Type Generic(const string& name = "") {
+    return Type(Type::GENERIC, name);
+  }
+  static Type ClassOf(const Type& type) {
+    return Class("Class").add_parameter(type);
+  }
+  static Type ListOf(const Type& type) {
+    return Interface("List", "java.util").add_parameter(type);
+  }
+  static Type IterableOf(const Type& type) {
+    return Interface("Iterable").add_parameter(type);
+  }
+  const Kind& kind() const { return kind_; }
+  const string& name() const { return name_; }
+  const string& package() const { return package_; }
+  const string& description() const { return description_; }
+  Type& description(const string& description) {
+    description_ = description;
+    return *this;
+  }
+  const std::vector<Type>& parameters() const { return parameters_; }
+  Type& add_parameter(const Type& parameter) {
+    parameters_.push_back(parameter);
+    return *this;
+  }
+  const std::vector<Annotation>& annotations() const { return annotations_; }
+  Type& add_annotation(const Annotation& annotation) {
+    annotations_.push_back(annotation);
+    return *this;
+  }
+  const std::deque<Type>& supertypes() const { return supertypes_; }
+  Type& add_supertype(const Type& type) {
+    if (type.kind_ == CLASS) {
+      supertypes_.push_front(type);  // keep superclass at the front of the list
+    } else if (type.kind_ == INTERFACE) {
+      supertypes_.push_back(type);
+    }
+    return *this;
+  }
+  // Returns true if "type" is of a known collection type (only a few for now)
+  bool IsCollection() const {
+    return name_ == "List" || name_ == "Iterable";
+  }
+  // Returns true if this instance is a wildcard (<?>)
+  bool IsWildcard() const {
+    return kind_ == GENERIC && name_.empty();
+  }
+
+ protected:
+  Type(Kind kind, const string& name, const string& package = "")
+    : kind_(kind), name_(name), package_(package) {}
+
+ private:
+  Kind kind_;
+  string name_;
+  string package_;
+  string description_;
+  std::vector<Type> parameters_;
+  std::vector<Annotation> annotations_;
+  std::deque<Type> supertypes_;
+};
+
+// Definition of a Java annotation
+//
+// This class only defines the usage of an annotation in a specific context,
+// giving optionally a set of attributes to initialize.
+class Annotation : public Type {
+ public:
+  static Annotation Create(const string& type_name, const string& pkg = "") {
+    return Annotation(type_name, pkg);
+  }
+  const string& attributes() const { return attributes_; }
+  Annotation& attributes(const string& attributes) {
+    attributes_ = attributes;
+    return *this;
+  }
+
+ private:
+  string attributes_;
+
+  Annotation(const string& name, const string& package)
+    : Type(Kind::ANNOTATION, name, package) {}
+};
+
+// A definition of a Java variable
+//
+// This class declares an instance of a type, such as a class field or a
+// method argument, which can be documented.
+class Variable {
+ public:
+  static Variable Create(const string& name, const Type& type) {
+    return Variable(name, type, false);
+  }
+  static Variable Varargs(const string& name, const Type& type) {
+    return Variable(name, type, true);
+  }
+  const string& name() const { return name_; }
+  const Type& type() const { return type_; }
+  bool variadic() const { return variadic_; }
+  const string& description() const { return description_; }
+  Variable& description(const string& description) {
+    description_ = description;
+    return *this;
+  }
+ private:
+  string name_;
+  Type type_;
+  bool variadic_;
+  string description_;
+
+  Variable(const string& name, const Type& type, bool variadic)
+    : name_(name), type_(type), variadic_(variadic) {}
+};
+
+// A definition of a Java class method
+//
+// This class defines the signature of a method, including its name, return
+// type and arguments.
+class Method {
+ public:
+  static Method Create(const string& name, const Type& return_type) {
+    return Method(name, return_type, false);
+  }
+  static Method ConstructorFor(const Type& clazz) {
+    return Method(clazz.name(), clazz, true);
+  }
+  bool constructor() const { return constructor_; }
+  const string& name() const { return name_; }
+  const Type& return_type() const { return return_type_; }
+  const string& description() const { return description_; }
+  Method& description(const string& description) {
+    description_ = description;
+    return *this;
+  }
+  const string& return_description() const { return return_description_; }
+  Method& return_description(const string& description) {
+    return_description_ = description;
+    return *this;
+  }
+  const std::vector<Variable>& arguments() const { return arguments_; }
+  Method& add_arguments(const std::vector<Variable>& args) {
+    arguments_.insert(arguments_.cend(), args.cbegin(), args.cend());
+    return *this;
+  }
+  Method& add_argument(const Variable& var) {
+    arguments_.push_back(var);
+    return *this;
+  }
+  const std::vector<Annotation>& annotations() const { return annotations_; }
+  Method& add_annotation(const Annotation& annotation) {
+    annotations_.push_back(annotation);
+    return *this;
+  }
+
+ private:
+  string name_;
+  Type return_type_;
+  bool constructor_;
+  string description_;
+  string return_description_;
+  std::vector<Variable> arguments_;
+  std::vector<Annotation> annotations_;
+
+  Method(const string& name, const Type& return_type, bool constructor)
+    : name_(name), return_type_(return_type), constructor_(constructor) {}
+};
+
+// A piece of code to read from a file.
+class Snippet {
+ public:
+  static Snippet Create(const string& fname, Env* env = Env::Default()) {
+    return Snippet(fname, env);
+  }
+  const string& data() const { return data_; }
+
+ private:
+  string data_;
+
+  Snippet(const string& fname, Env* env) {
+    TF_CHECK_OK(ReadFileToString(env, fname, &data_));
+  }
+};
+
+}  // namespace java
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_JAVA_SRC_GEN_CC_JAVA_DEFS_H_
diff --git a/tensorflow/java/src/gen/cc/op_gen_main.cc b/tensorflow/java/src/gen/cc/op_gen_main.cc
index a7c66dda893a3109e0e0bfe76f5becef766afb0e..bea99f3d7f6bea1ebc7097cbd7aae1fa7e5a87fa 100644
--- a/tensorflow/java/src/gen/cc/op_gen_main.cc
+++ b/tensorflow/java/src/gen/cc/op_gen_main.cc
@@ -25,7 +25,7 @@
 #include "tensorflow/java/src/gen/cc/op_generator.h"
 
 namespace tensorflow {
-namespace op_gen {
+namespace java {
 
 const char kUsageHeader[] =
     "\n\nGenerator of operation wrappers in Java.\n\n"
@@ -51,7 +51,7 @@ const char kUsageHeader[] =
     "Finally, the '--base_package' overrides the default parent package "
     "under which the generated subpackage and classes are to be located.\n\n";
 
-}  // namespace op_gen
+}  // namespace java
 }  // namespace tensorflow
 
 int main(int argc, char* argv[]) {
@@ -67,13 +67,13 @@ int main(int argc, char* argv[]) {
       tensorflow::Flag(
           "base_package", &base_package,
           "Package parent to the generated subpackage and classes")};
-  tensorflow::string usage = tensorflow::op_gen::kUsageHeader;
+  tensorflow::string usage = tensorflow::java::kUsageHeader;
   usage += tensorflow::Flags::Usage(argv[0], flag_list);
   bool parsed_flags_ok = tensorflow::Flags::Parse(&argc, argv, flag_list);
   tensorflow::port::InitMain(usage.c_str(), &argc, &argv);
   QCHECK(parsed_flags_ok && !lib_name.empty() && !output_dir.empty()) << usage;
 
-  tensorflow::OpGenerator generator;
+  tensorflow::java::OpGenerator generator;
   tensorflow::OpList ops;
   tensorflow::OpRegistry::Global()->Export(true, &ops);
   tensorflow::Status status =
diff --git a/tensorflow/java/src/gen/cc/op_generator.cc b/tensorflow/java/src/gen/cc/op_generator.cc
index df130c32e6afcba157da282026280756b778f3ad..def06baf2db43e1fa42f03cf9619abd34785cea7 100644
--- a/tensorflow/java/src/gen/cc/op_generator.cc
+++ b/tensorflow/java/src/gen/cc/op_generator.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/java/src/gen/cc/op_generator.h"
 
 namespace tensorflow {
+namespace java {
 namespace {
 
 string CamelCase(const string& str, char delimiter, bool upper) {
@@ -63,4 +64,5 @@ Status OpGenerator::Run(const OpList& ops, const string& lib_name,
   return Status::OK();
 }
 
+}  // namespace java
 }  // namespace tensorflow
diff --git a/tensorflow/java/src/gen/cc/op_generator.h b/tensorflow/java/src/gen/cc/op_generator.h
index eec1082b5162298e68fbd05d82d5563777e865db..4b55ed3ed94f11c1f810c0a56989853ee1154587 100644
--- a/tensorflow/java/src/gen/cc/op_generator.h
+++ b/tensorflow/java/src/gen/cc/op_generator.h
@@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/core/platform/env.h"
 
 namespace tensorflow {
+namespace java {
 
 /// \brief A generator of Java operation wrappers.
 ///
@@ -46,6 +47,7 @@ class OpGenerator {
   Env* env;
 };
 
+}  // namespace java
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_JAVA_SRC_GEN_CC_OP_GENERATOR_H_
diff --git a/tensorflow/java/src/gen/cc/source_writer.cc b/tensorflow/java/src/gen/cc/source_writer.cc
new file mode 100644
index 0000000000000000000000000000000000000000..2da81f2911e60be6a47ac13fe8be6142fa283780
--- /dev/null
+++ b/tensorflow/java/src/gen/cc/source_writer.cc
@@ -0,0 +1,62 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <string>
+
+#include "tensorflow/java/src/gen/cc/source_writer.h"
+
+namespace tensorflow {
+
+SourceWriter& SourceWriter::Append(const StringPiece& str) {
+  if (!str.empty()) {
+    if (newline_) {
+      DoAppend(left_margin_ + line_prefix_);
+      newline_ = false;
+    }
+    DoAppend(str);
+  }
+  return *this;
+}
+
+SourceWriter& SourceWriter::Write(const string& str) {
+  size_t line_pos = 0;
+  do {
+    size_t start_pos = line_pos;
+    line_pos = str.find('\n', start_pos);
+    if (line_pos != string::npos) {
+      ++line_pos;
+      Append(StringPiece(str.data() + start_pos, line_pos - start_pos));
+      newline_ = true;
+    } else {
+      Append(StringPiece(str.data() + start_pos, str.size() - start_pos));
+    }
+  } while (line_pos != string::npos && line_pos < str.size());
+
+  return *this;
+}
+
+SourceWriter& SourceWriter::EndLine() {
+  Append("\n");
+  newline_ = true;
+  return *this;
+}
+
+SourceWriter& SourceWriter::Indent(int tab) {
+  left_margin_.resize(std::max(static_cast<int>(left_margin_.size() + tab), 0),
+                      ' ');
+  return *this;
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/java/src/gen/cc/source_writer.h b/tensorflow/java/src/gen/cc/source_writer.h
new file mode 100644
index 0000000000000000000000000000000000000000..bff26eb185db0cf933632f33f916b87d8a757edd
--- /dev/null
+++ b/tensorflow/java/src/gen/cc/source_writer.h
@@ -0,0 +1,133 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_JAVA_SRC_GEN_CC_SOURCE_WRITER_H_
+#define TENSORFLOW_JAVA_SRC_GEN_CC_SOURCE_WRITER_H_
+
+#include <string>
+
+#include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/platform/env.h"
+
+namespace tensorflow {
+
+// A utility class for writing source code, normally generated at
+// compile-time.
+//
+// Source writers are language-agnostic and therefore only expose generic
+// methods common to most languages. Extend or wrap this class to implement
+// language-specific features.
+//
+// Note: if you are looking to reuse this class for generating code in another
+// language than Java, please do by moving it at the '//tensorflow/core/lib/io'
+// level.
+class SourceWriter {
+ public:
+  virtual ~SourceWriter() = default;
+
+  // Returns true if the writer is at the beginnig of a new line
+  bool newline() const { return newline_; }
+
+  // Appends a piece of code or text.
+  //
+  // It is expected that no newline character is present in the data provided,
+  // otherwise Write() must be used.
+  SourceWriter& Append(const StringPiece& str);
+
+  // Writes a block of code or text.
+  //
+  // The data might potentially contain newline characters, therefore it will
+  // be scanned to ensure that each line is indented and prefixed properly,
+  // making it a bit slower than Append().
+  SourceWriter& Write(const string& text);
+
+  // Appends a newline character and start writing on a new line.
+  SourceWriter& EndLine();
+
+  // Indents following lines with white spaces.
+  //
+  // Indentation is cumulative, i.e. the provided tabulation is added to the
+  // current indentation value. If the tabulation is negative, the operation
+  // will outdent the source code, until the indentation reaches 0 again.
+  //
+  // For example, calling Indent(2) twice will indent code with 4 white
+  // spaces. Then calling Indent(-2) will outdent the code back to 2 white
+  // spaces.
+  SourceWriter& Indent(int tab);
+
+  // Prefixes following lines with provided character(s).
+  //
+  // A common use case of a prefix is for commenting or documenting the code.
+  //
+  // The prefix is written after the indentation, For example, invoking
+  // Indent(2)->Prefix("//") will result in prefixing lines with "  //".
+  //
+  // An empty value ("") will remove any line prefix that was previously set.
+  SourceWriter& Prefix(const char* line_prefix) {
+    line_prefix_ = line_prefix;
+    return *this;
+  }
+
+ protected:
+  virtual void DoAppend(const StringPiece& str) = 0;
+
+ private:
+  string left_margin_;
+  string line_prefix_;
+  bool newline_ = true;
+};
+
+// A writer that outputs source code into a file.
+//
+// Note: the writer does not acquire the ownership of the file being passed in
+// parameter.
+class SourceFileWriter : public SourceWriter {
+ public:
+  explicit SourceFileWriter(WritableFile* file) : file_(file) {}
+  virtual ~SourceFileWriter() = default;
+
+ protected:
+  void DoAppend(const StringPiece& str) override {
+    TF_CHECK_OK(file_->Append(str));
+  }
+
+ private:
+  WritableFile* file_;
+};
+
+// A writer that outputs source code into a string buffer.
+class SourceBufferWriter : public SourceWriter {
+ public:
+  SourceBufferWriter() : owns_buffer_(true), buffer_(new string()) {}
+  explicit SourceBufferWriter(string* buffer)
+      : owns_buffer_(false), buffer_(buffer) {}
+  virtual ~SourceBufferWriter() {
+    if (owns_buffer_) delete buffer_;
+  }
+  const string& str() { return *buffer_; }
+
+ protected:
+  void DoAppend(const StringPiece& str) override {
+    buffer_->append(str.begin(), str.end());
+  }
+
+ private:
+  bool owns_buffer_;
+  string* buffer_;
+};
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_JAVA_SRC_GEN_CC_SOURCE_WRITER_H_
diff --git a/tensorflow/java/src/gen/cc/source_writer_test.cc b/tensorflow/java/src/gen/cc/source_writer_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..e9738957548184726395c4e6634ba12a5a9a0109
--- /dev/null
+++ b/tensorflow/java/src/gen/cc/source_writer_test.cc
@@ -0,0 +1,215 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/java/src/gen/cc/source_writer.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace {
+
+TEST(AppendTest, SingleLineText) {
+  SourceBufferWriter writer;
+  writer.Append("You say goodbye and I say hello!");
+
+  const char* expected = "You say goodbye and I say hello!";
+  ASSERT_STREQ(expected, writer.str().data());
+}
+
+TEST(AppendTest, MultiLineText) {
+  SourceBufferWriter writer;
+  writer.Append("You say goodbye\nand I say hello!");
+
+  const char* expected = "You say goodbye\nand I say hello!";
+  ASSERT_STREQ(expected, writer.str().data());
+}
+
+TEST(AppendTest, MultiLineTextWithIndent) {
+  SourceBufferWriter writer;
+  writer.Indent(2).Append("You say goodbye\nand I say hello!");
+
+  const char* expected = "  You say goodbye\nand I say hello!";
+  ASSERT_STREQ(expected, writer.str().data());
+}
+
+TEST(AppendTest, MultiLineTextWithPrefix) {
+  SourceBufferWriter writer;
+  writer.Prefix("--").Append("You say goodbye\nand I say hello!");
+
+  const char* expected = "--You say goodbye\nand I say hello!";
+  ASSERT_STREQ(expected, writer.str().data());
+}
+
+TEST(AppendTest, MultiLineTextWithIndentAndPrefix) {
+  SourceBufferWriter writer;
+  writer.Indent(2).Prefix("--").Append("You say goodbye\nand I say hello!");
+
+  const char* expected = "  --You say goodbye\nand I say hello!";
+  ASSERT_STREQ(expected, writer.str().data());
+}
+
+TEST(WriteTest, SingleLineText) {
+  SourceBufferWriter writer;
+  writer.Write("You say goodbye and I say hello!");
+
+  const char* expected = "You say goodbye and I say hello!";
+  ASSERT_STREQ(expected, writer.str().data());
+}
+
+TEST(WriteTest, MultiLineText) {
+  SourceBufferWriter writer;
+  writer.Write("You say goodbye\nand I say hello!");
+
+  const char* expected = "You say goodbye\nand I say hello!";
+  ASSERT_STREQ(expected, writer.str().data());
+}
+
+TEST(WriteTest, MultiLineTextWithIndent) {
+  SourceBufferWriter writer;
+  writer.Indent(2).Write("You say goodbye\nand I say hello!");
+
+  const char* expected = "  You say goodbye\n  and I say hello!";
+  ASSERT_STREQ(expected, writer.str().data());
+}
+
+TEST(WriteTest, MultiLineTextWithPrefix) {
+  SourceBufferWriter writer;
+  writer.Prefix("--").Write("You say goodbye\nand I say hello!");
+
+  const char* expected = "--You say goodbye\n--and I say hello!";
+  ASSERT_STREQ(expected, writer.str().data());
+}
+
+TEST(WriteTest, MultiLineTextWithIndentAndPrefix) {
+  SourceBufferWriter writer;
+  writer.Indent(2).Prefix("--").Write("You say goodbye\nand I say hello!");
+
+  const char* expected = "  --You say goodbye\n  --and I say hello!";
+  ASSERT_STREQ(expected, writer.str().data());
+}
+
+TEST(MarginTest, Basic) {
+  SourceBufferWriter writer;
+  writer.Append("You say goodbye").EndLine().Append("and I say hello!");
+
+  const char* expected = "You say goodbye\nand I say hello!";
+  ASSERT_STREQ(expected, writer.str().data());
+}
+
+TEST(MarginTest, Indent) {
+  SourceBufferWriter writer;
+  writer.Append("You say goodbye")
+      .EndLine()
+      .Indent(2)
+      .Append("and I say hello!");
+
+  const char* expected = "You say goodbye\n  and I say hello!";
+  ASSERT_STREQ(expected, writer.str().data());
+}
+
+TEST(MarginTest, IndentAndOutdent) {
+  SourceBufferWriter writer;
+  writer.Append("You say goodbye")
+      .EndLine()
+      .Indent(2)
+      .Append("and I say hello!")
+      .EndLine()
+      .Indent(-2)
+      .Append("Hello, hello!");
+
+  const char* expected = "You say goodbye\n  and I say hello!\nHello, hello!";
+  ASSERT_STREQ(expected, writer.str().data());
+}
+
+TEST(MarginTest, Prefix) {
+  SourceBufferWriter writer;
+  writer.Append("You say goodbye")
+      .EndLine()
+      .Prefix("--")
+      .Append("and I say hello!");
+
+  const char* expected = "You say goodbye\n--and I say hello!";
+  ASSERT_STREQ(expected, writer.str().data());
+}
+
+TEST(MarginTest, PrefixAndRemovePrefix) {
+  SourceBufferWriter writer;
+  writer.Append("You say goodbye")
+      .EndLine()
+      .Prefix("--")
+      .Append("and I say hello!")
+      .EndLine()
+      .Prefix("")
+      .Append("Hello, hello!");
+
+  const char* expected = "You say goodbye\n--and I say hello!\nHello, hello!";
+  ASSERT_STREQ(expected, writer.str().data());
+}
+
+TEST(MarginTest, IndentAndPrefixAndOutdentAndRemovePrefix) {
+  SourceBufferWriter writer;
+  writer.Append("You say goodbye")
+      .EndLine()
+      .Indent(2)
+      .Prefix("--")
+      .Append("and I say hello!")
+      .EndLine()
+      .Indent(-2)
+      .Prefix("")
+      .Append("Hello, hello!");
+
+  const char* expected = "You say goodbye\n  --and I say hello!\nHello, hello!";
+  ASSERT_STREQ(expected, writer.str().data());
+}
+
+TEST(MarginTest, NegativeIndent) {
+  SourceBufferWriter writer;
+  writer.Append("You say goodbye")
+      .EndLine()
+      .Indent(-10)
+      .Append("and I say hello!");
+
+  const char* expected = "You say goodbye\nand I say hello!";
+  ASSERT_STREQ(expected, writer.str().data());
+}
+
+TEST(MarginTest, CumulativeIndent) {
+  SourceBufferWriter writer;
+  writer.Append("You say goodbye")
+      .EndLine()
+      .Indent(2)
+      .Append("and I say hello!")
+      .EndLine()
+      .Indent(2)
+      .Append("Hello, hello!");
+
+  const char* expected =
+      "You say goodbye\n  and I say hello!\n    Hello, hello!";
+  ASSERT_STREQ(expected, writer.str().data());
+}
+
+TEST(MarginTest, EmptyPrefix) {
+  SourceBufferWriter writer;
+  writer.Append("You say goodbye")
+      .EndLine()
+      .Prefix("")
+      .Append("and I say hello!");
+
+  const char* expected = "You say goodbye\nand I say hello!";
+  ASSERT_STREQ(expected, writer.str().data());
+}
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/java/src/gen/gen_ops.bzl b/tensorflow/java/src/gen/gen_ops.bzl
index 28f0908ec4a7a02a2a66ab44577b36e7bb3b4a53..a6650fc4ea0b67bcea46e8d5e3ec84aaafef0f7a 100644
--- a/tensorflow/java/src/gen/gen_ops.bzl
+++ b/tensorflow/java/src/gen/gen_ops.bzl
@@ -52,7 +52,7 @@ def tf_java_op_gen_srcjar(name,
 
   # Generate a source archive containing generated code for these ops.
   gen_srcjar = out_dir + name + ".srcjar"
-  gen_cmds += ["$(location @local_jdk//:jar) cMf $(location :" + gen_srcjar + ") -C $(@D) ."]
+  gen_cmds += ["$(location @local_jdk//:jar) cMf $(location :" + gen_srcjar + ") -C $(@D) src"]
   gen_tools += ["@local_jdk//:jar"] + ["@local_jdk//:jdk"]
   gen_tools += tf_binary_additional_srcs()
   native.genrule(
diff --git a/tensorflow/java/src/gen/java/org/tensorflow/processor/OperatorProcessor.java b/tensorflow/java/src/gen/java/org/tensorflow/processor/OperatorProcessor.java
index 45e42878c770b3c19d96790e5b4bf2ed41a0de29..11fda4fc22aeec9c2d94b5e884c11ceb2a66d29e 100644
--- a/tensorflow/java/src/gen/java/org/tensorflow/processor/OperatorProcessor.java
+++ b/tensorflow/java/src/gen/java/org/tensorflow/processor/OperatorProcessor.java
@@ -77,7 +77,7 @@ public final class OperatorProcessor extends AbstractProcessor {
     TypeElement annotation = annotations.iterator().next();
     Set<? extends Element> annotated = roundEnv.getElementsAnnotatedWith(annotation);
 
-    // If there are no annotated elements, claim the annotion but do nothing.
+    // If there are no annotated elements, claim the annotation but do nothing.
     if (annotated.size() == 0) {
       return true;
     }
diff --git a/tensorflow/java/src/main/java/org/tensorflow/OperationBuilder.java b/tensorflow/java/src/main/java/org/tensorflow/OperationBuilder.java
index beb3635585c33f5a3942e4f7d44ac597daf8ff72..a24150484e83dcccf3e1869155569431969b74cf 100644
--- a/tensorflow/java/src/main/java/org/tensorflow/OperationBuilder.java
+++ b/tensorflow/java/src/main/java/org/tensorflow/OperationBuilder.java
@@ -352,7 +352,8 @@ public final class OperationBuilder {
 
   private static native void setAttrShape(long handle, String name, long[] shape, int numDims);
 
-  private static native void setAttrShapeList(long handle, String name, long[] shapes, int[] numDims);
+  private static native void setAttrShapeList(
+      long handle, String name, long[] shapes, int[] numDims);
 
   private static native void setAttrStringList(long handle, String name, Object[] value);
 }
diff --git a/tensorflow/java/src/main/native/BUILD b/tensorflow/java/src/main/native/BUILD
index 8e95ea4f7936672020f4f196f286ef73661cdcb1..49348daa94ed04990a657922a0fbb515b7721d82 100644
--- a/tensorflow/java/src/main/native/BUILD
+++ b/tensorflow/java/src/main/native/BUILD
@@ -67,6 +67,7 @@ genrule(
 genrule(
     name = "copy_jni_md_h",
     srcs = select({
+        "//tensorflow:windows": ["@bazel_tools//tools/jdk:jni_md_header-windows"],
         "//tensorflow:darwin": ["@bazel_tools//tools/jdk:jni_md_header-darwin"],
         "//conditions:default": ["@bazel_tools//tools/jdk:jni_md_header-linux"],
     }),
diff --git a/tensorflow/java/src/main/native/operation_builder_jni.cc b/tensorflow/java/src/main/native/operation_builder_jni.cc
index 71a451ad1309659a9f96d9b9eedf60a8b3fd9683..55d214a7c4b81a01e48121214e91397626652f11 100644
--- a/tensorflow/java/src/main/native/operation_builder_jni.cc
+++ b/tensorflow/java/src/main/native/operation_builder_jni.cc
@@ -275,15 +275,15 @@ JNIEXPORT void JNICALL Java_org_tensorflow_OperationBuilder_setAttrShapeList(
   if (num_dims_length > 0) {
     const int shapes_length = env->GetArrayLength(shapes);
     cshapes.reset(new int64_t[shapes_length]);
-    cdims.reset(new int64_t* [num_dims_length]);
+    cdims.reset(new int64_t*[num_dims_length]);
     cnum_dims.reset(new int[num_dims_length]);
     jlong* shapes_elems =
-        (jlong*) env->GetPrimitiveArrayCritical(shapes, nullptr);
+        static_cast<jlong*>(env->GetPrimitiveArrayCritical(shapes, nullptr));
     std::memcpy(cshapes.get(), shapes_elems, shapes_length << 3);
     env->ReleasePrimitiveArrayCritical(shapes, shapes_elems, JNI_ABORT);
     int64_t* cshapes_ptr = cshapes.get();
     jint* num_dims_elems =
-        (jint*) env->GetPrimitiveArrayCritical(num_dims, nullptr);
+        static_cast<jint*>(env->GetPrimitiveArrayCritical(num_dims, nullptr));
     for (int i = 0; i < num_dims_length; ++i) {
       cnum_dims[i] = static_cast<int>(num_dims_elems[i]);
       cdims[i] = cshapes_ptr;
diff --git a/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java b/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java
index 2430816725abdd664cd016cdfefa6c94b3d0b9b1..0a4a8cf4e3f65311ba887b4d47bc79080bfd5382 100644
--- a/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java
+++ b/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java
@@ -151,10 +151,10 @@ public class OperationBuilderTest {
   @Test
   public void setAttrShapeList() {
     // Those shapes match tensors ones, so no exception is thrown
-    testSetAttrShapeList(new Shape[] { Shape.make(2, 2), Shape.make(2, 2, 2) });
+    testSetAttrShapeList(new Shape[] {Shape.make(2, 2), Shape.make(2, 2, 2)});
     try {
       // Those shapes do not match tensors ones, exception is thrown
-      testSetAttrShapeList(new Shape[] { Shape.make(2, 2), Shape.make(2, 2, 2, 2) });
+      testSetAttrShapeList(new Shape[] {Shape.make(2, 2), Shape.make(2, 2, 2, 2)});
       fail("Shapes are incompatible and an exception was expected");
     } catch (IllegalArgumentException e) {
       // expected
@@ -189,20 +189,23 @@ public class OperationBuilderTest {
   }
 
   private static void testSetAttrShapeList(Shape[] shapes) {
-    try (Graph g = new Graph(); Session s = new Session(g)) {
-      int[][] matrix = new int[][] { { 0, 0 }, { 0, 0 } };
-      Output<?> queue = g.opBuilder("FIFOQueue", "queue")
-          .setAttr("component_types", new DataType[] { DataType.INT32, DataType.INT32 }) 
-          .setAttr("shapes", shapes)
-          .build()
-          .output(0);
+    try (Graph g = new Graph();
+        Session s = new Session(g)) {
+      int[][] matrix = new int[][] {{0, 0}, {0, 0}};
+      Output<?> queue =
+          g.opBuilder("FIFOQueue", "queue")
+              .setAttr("component_types", new DataType[] {DataType.INT32, DataType.INT32})
+              .setAttr("shapes", shapes)
+              .build()
+              .output(0);
       assertTrue(hasNode(g, "queue"));
       Output<Integer> c1 = TestUtil.constant(g, "const1", matrix);
-      Output<Integer> c2 = TestUtil.constant(g, "const2", new int[][][] { matrix, matrix });
-      Operation enqueue = g.opBuilder("QueueEnqueue", "enqueue")
-          .addInput(queue)
-          .addInputList(new Output<?>[] { c1, c2 })
-          .build();
+      Output<Integer> c2 = TestUtil.constant(g, "const2", new int[][][] {matrix, matrix});
+      Operation enqueue =
+          g.opBuilder("QueueEnqueue", "enqueue")
+              .addInput(queue)
+              .addInputList(new Output<?>[] {c1, c2})
+              .build();
       assertTrue(hasNode(g, "enqueue"));
 
       s.runner().addTarget(enqueue).run();
diff --git a/tensorflow/java/src/test/java/org/tensorflow/SessionTest.java b/tensorflow/java/src/test/java/org/tensorflow/SessionTest.java
index a86b4dd117ede64d2b105ceb189220a5dd5d9740..e8cc76c2a6458193161a98e17483fe73de107b77 100644
--- a/tensorflow/java/src/test/java/org/tensorflow/SessionTest.java
+++ b/tensorflow/java/src/test/java/org/tensorflow/SessionTest.java
@@ -151,7 +151,7 @@ public class SessionTest {
       s.close();
       try {
         s.runner().run();
-        fail("methods on a close()d session should fail");
+        fail("methods on a session should fail after close() is called");
       } catch (IllegalStateException e) {
         // expected exception
       }
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 23ad9bfa56fd974fb553e7581a86712b00d1b465..3493ed76f3d00d5af2f065d30de279ac2109aab1 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -74,6 +74,7 @@ py_library(
         ":io_ops",
         ":layers",
         ":lib",
+        ":list_ops",
         ":math_ops",
         ":metrics",
         ":nn",
@@ -171,7 +172,21 @@ tf_py_test(
     name = "flags_test",
     size = "small",
     srcs = ["platform/flags_test.py"],
-    additional_deps = [":platform"],
+    additional_deps = [
+        ":client_testlib",
+        ":platform",
+    ],
+)
+
+tf_py_test(
+    name = "stacktrace_handler_test",
+    size = "small",
+    srcs = ["platform/stacktrace_handler_test.py"],
+    additional_deps = [
+        ":client_testlib",
+        ":platform",
+    ],
+    tags = ["no_windows"],
 )
 
 tf_py_test(
@@ -179,10 +194,7 @@ tf_py_test(
     size = "small",
     srcs = ["platform/app_test.py"],
     additional_deps = [":platform"],
-    tags = [
-        "manual",
-        "notap",
-    ],
+    tags = ["notap"],
 )
 
 cc_library(
@@ -207,11 +219,11 @@ cc_library(
     srcs = ["grappler/model_analyzer.cc"],
     hdrs = ["grappler/model_analyzer.h"],
     deps = [
+        "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/grappler:grappler_item",
         "//tensorflow/core/grappler/costs:graph_properties",
-        "//tensorflow/core/grappler/costs:utils",
     ],
 )
 
@@ -227,11 +239,25 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "bfloat16_lib",
+    srcs = ["lib/core/bfloat16.cc"],
+    hdrs = ["lib/core/bfloat16.h"],
+    deps = [
+        ":numpy_lib",
+        ":safe_ptr",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//util/python:python_headers",
+    ],
+)
+
 cc_library(
     name = "ndarray_tensor_bridge",
     srcs = ["lib/core/ndarray_tensor_bridge.cc"],
     hdrs = ["lib/core/ndarray_tensor_bridge.h"],
     deps = [
+        ":bfloat16_lib",
         ":numpy_lib",
         "//tensorflow/c:c_api",
         "//tensorflow/core:lib",
@@ -268,10 +294,15 @@ cc_library(
     deps = [
         ":ndarray_tensor_bridge",
         ":numpy_lib",
+        ":py_util",
+        ":safe_ptr",
+        "//tensorflow/c:tf_status_helper",
+        "//tensorflow/c/eager:c_api",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:script_ops_op_lib",
+        "//tensorflow/python/eager:pywrap_tfe_lib",
         "//third_party/py/numpy:headers",
         "//util/python:python_headers",
     ],
@@ -293,6 +324,7 @@ cc_library(
     srcs = ["lib/core/ndarray_tensor.cc"],
     hdrs = ["lib/core/ndarray_tensor.h"],
     deps = [
+        ":bfloat16_lib",
         ":ndarray_tensor_bridge",
         ":numpy_lib",
         ":safe_ptr",
@@ -309,6 +341,7 @@ cc_library(
     hdrs = ["lib/core/py_seq_tensor.h"],
     deps = [
         ":numpy_lib",
+        ":py_util",
         ":safe_ptr",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
@@ -316,6 +349,17 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "py_util",
+    srcs = ["lib/core/py_util.cc"],
+    hdrs = ["lib/core/py_util.h"],
+    deps = [
+        "//tensorflow/core:lib",
+        "//tensorflow/core:script_ops_op_lib",
+        "//util/python:python_headers",
+    ],
+)
+
 cc_library(
     name = "py_record_reader_lib",
     srcs = ["lib/io/py_record_reader.cc"],
@@ -352,6 +396,7 @@ tf_cc_shared_object(
     }),
     deps = [
         "//tensorflow/core:framework_headers_lib",
+        "//third_party/eigen3",
         "@protobuf_archive//:protobuf_headers",
     ],
 )
@@ -586,6 +631,7 @@ py_library(
     srcs = ["framework/dtypes.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":pywrap_tensorflow",
         "//tensorflow/core:protos_all_py",
     ],
 )
@@ -676,6 +722,7 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":c_api_util",
+        ":control_flow_util",
         ":device",
         ":dtypes",
         ":op_def_registry",
@@ -766,15 +813,23 @@ py_library(
     srcs = ["framework/test_util.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":array_ops",
         ":client",
         ":errors",
-        ":framework",
         ":framework_for_generated_wrappers",
         ":platform",
         ":platform_test",
         ":pywrap_tensorflow",
+        ":random_seed",
+        ":resource_variable_ops",
+        ":session",
         ":training",
         ":util",
+        ":variables",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python/eager:backprop",
+        "//tensorflow/python/eager:context",
+        "//tensorflow/python/eager:tape",
         "//third_party/py/numpy",
         "@six_archive//:six",
     ],
@@ -1074,6 +1129,7 @@ py_test(
         ":variables",
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python/eager:context",
+        "//tensorflow/python/eager:function",
     ],
 )
 
@@ -1180,6 +1236,12 @@ py_test(
         ":framework_test_lib",
         ":platform_test",
         ":random_ops",
+        ":resource_variable_ops",
+        ":session",
+        ":test_ops",
+        ":variables",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python/eager:context",
         "//third_party/py/numpy",
     ],
 )
@@ -1190,6 +1252,7 @@ py_test(
     srcs = ["framework/dtypes_test.py"],
     main = "framework/dtypes_test.py",
     srcs_version = "PY2AND3",
+    tags = ["no_windows"],
     deps = [
         ":framework_for_generated_wrappers",
         ":framework_test_lib",
@@ -1204,12 +1267,12 @@ py_test(
     name = "op_def_library_test",
     size = "small",
     srcs = ["framework/op_def_library_test.py"],
-    main = "framework/op_def_library_test.py",
     srcs_version = "PY2AND3",
     deps = [
         ":framework_for_generated_wrappers",
         ":framework_test_lib",
         ":platform_test",
+        ":test_ops",
     ],
 )
 
@@ -1258,7 +1321,10 @@ tf_gen_op_wrapper_private_py(
 
 tf_gen_op_wrapper_private_py(
     name = "control_flow_ops_gen",
-    visibility = ["//learning/brain/python/ops:__pkg__"],
+    visibility = [
+        "//learning/brain/python/ops:__pkg__",
+        "//tensorflow/python/kernel_tests:__pkg__",
+    ],
     deps = [
         "//tensorflow/core:control_flow_ops_op_lib",
         "//tensorflow/core:no_op_op_lib",
@@ -1359,6 +1425,10 @@ tf_gen_op_wrapper_private_py(
     name = "resource_variable_ops_gen",
 )
 
+tf_gen_op_wrapper_private_py(
+    name = "list_ops_gen",
+)
+
 tf_gen_op_wrapper_private_py(
     name = "script_ops_gen",
 )
@@ -1526,6 +1596,7 @@ py_library(
     deps = [
         ":control_flow_ops",
         ":control_flow_ops_gen",
+        ":control_flow_util",
         ":framework",
         ":framework_for_generated_wrappers",
         ":math_ops",
@@ -1542,6 +1613,7 @@ py_library(
         ":array_ops_gen",
         ":constant_op",
         ":control_flow_ops_gen",
+        ":control_flow_util",
         ":data_flow_ops_gen",
         ":dtypes",
         ":framework_ops",
@@ -1557,6 +1629,15 @@ py_library(
     ],
 )
 
+py_library(
+    name = "control_flow_util",
+    srcs = ["ops/control_flow_util.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":platform",
+    ],
+)
+
 py_library(
     name = "ctc_ops",
     srcs = ["ops/ctc_ops.py"],
@@ -1630,6 +1711,7 @@ py_library(
         ":bitwise_ops",
         ":control_flow_grad",
         ":control_flow_ops",
+        ":control_flow_util",
         ":framework",
         ":framework_for_generated_wrappers",
         ":functional_ops",
@@ -1706,6 +1788,7 @@ py_library(
         ":math_ops",
         ":nn_ops",
         ":random_ops",
+        "//third_party/py/numpy",
     ],
 )
 
@@ -1863,6 +1946,15 @@ py_library(
     ],
 )
 
+py_library(
+    name = "list_ops",
+    srcs = ["ops/list_ops.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":list_ops_gen",
+    ],
+)
+
 py_library(
     name = "nn",
     srcs = [
@@ -1982,6 +2074,7 @@ py_library(
     deps = [
         ":array_ops",
         ":control_flow_ops",
+        ":control_flow_util",
         ":framework_for_generated_wrappers",
         ":math_ops",
         ":rnn_cell",
@@ -2288,6 +2381,8 @@ py_library(
         ":platform",
         ":util",
         ":variable_scope",
+        "//tensorflow/python/eager:context",
+        "//tensorflow/python/eager:function",
     ],
 )
 
@@ -2581,7 +2676,7 @@ cuda_py_test(
         ":nn_grad",
         "//third_party/py/numpy",
     ],
-    shard_count = 4,
+    shard_count = 16,
 )
 
 cuda_py_test(
@@ -2966,12 +3061,14 @@ tf_cuda_library(
         ":safe_ptr",
         ":test_ops_kernels",
         "//tensorflow/c:c_api",
+        "//tensorflow/c:c_api_internal",
         "//tensorflow/c:tf_status_helper",
         "//tensorflow/core",
         "//tensorflow/core:all_kernels",
         "//tensorflow/core:direct_session",
         "//tensorflow/core:framework",
         "//tensorflow/core:framework_internal",
+        "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//third_party/py/numpy:headers",
@@ -3007,12 +3104,14 @@ tf_py_wrap_cc(
         "grappler/item.i",
         "grappler/model_analyzer.i",
         "grappler/tf_optimizer.i",
+        "lib/core/bfloat16.i",
         "lib/core/py_func.i",
         "lib/core/strings.i",
         "lib/io/file_io.i",
         "lib/io/py_record_reader.i",
         "lib/io/py_record_writer.i",
         "platform/base.i",
+        "platform/stacktrace_handler.i",
         "pywrap_tfe.i",
         "training/quantize_training.i",
         "training/server_lib.i",
@@ -3025,6 +3124,7 @@ tf_py_wrap_cc(
         "util/util.i",
     ],
     deps = [
+        ":bfloat16_lib",
         ":cost_analyzer_lib",
         ":model_analyzer_lib",
         ":cpp_python_util",
@@ -3099,130 +3199,124 @@ py_library(
     ],
 )
 
-py_test(
+tf_py_test(
     name = "server_lib_test",
     size = "small",
     srcs = ["training/server_lib_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
+    additional_deps = [
         ":array_ops",
         ":client",
         ":client_testlib",
         ":data_flow_ops",
         ":errors",
-        ":extra_py_tests_deps",
         ":framework_for_generated_wrappers",
         ":math_ops",
         ":training",
         ":variables",
-        "//tensorflow/core:protos_all_py",
         "//third_party/py/numpy",
+        "//tensorflow/core:protos_all_py",
     ],
+    grpc_enabled = True,
 )
 
-py_test(
+tf_py_test(
     name = "server_lib_multiple_containers_test",
     size = "small",
     srcs = ["training/server_lib_multiple_containers_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
+    additional_deps = [
         ":array_ops",
         ":client",
         ":client_testlib",
         ":data_flow_ops",
         ":errors",
-        ":extra_py_tests_deps",
         ":framework_for_generated_wrappers",
         ":math_ops",
         ":training",
         ":variables",
-        "//tensorflow/core:protos_all_py",
         "//third_party/py/numpy",
+        "//tensorflow/core:protos_all_py",
     ],
+    grpc_enabled = True,
 )
 
-py_test(
+tf_py_test(
     name = "server_lib_same_variables_clear_container_test",
     size = "small",
     srcs = ["training/server_lib_same_variables_clear_container_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
+    additional_deps = [
         ":array_ops",
         ":client",
         ":client_testlib",
         ":data_flow_ops",
         ":errors",
-        ":extra_py_tests_deps",
         ":framework_for_generated_wrappers",
         ":math_ops",
         ":training",
         ":variables",
-        "//tensorflow/core:protos_all_py",
         "//third_party/py/numpy",
+        "//tensorflow/core:protos_all_py",
     ],
+    grpc_enabled = True,
 )
 
-py_test(
+tf_py_test(
     name = "server_lib_same_variables_clear_test",
     size = "small",
     srcs = ["training/server_lib_same_variables_clear_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
+    additional_deps = [
         ":array_ops",
         ":client",
         ":client_testlib",
         ":data_flow_ops",
         ":errors",
-        ":extra_py_tests_deps",
         ":framework_for_generated_wrappers",
         ":math_ops",
         ":training",
         ":variables",
-        "//tensorflow/core:protos_all_py",
         "//third_party/py/numpy",
+        "//tensorflow/core:protos_all_py",
     ],
+    grpc_enabled = True,
 )
 
-py_test(
+tf_py_test(
     name = "server_lib_same_variables_no_clear_test",
     size = "small",
     srcs = ["training/server_lib_same_variables_no_clear_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
+    additional_deps = [
         ":array_ops",
         ":client",
         ":client_testlib",
         ":data_flow_ops",
         ":errors",
-        ":extra_py_tests_deps",
         ":framework_for_generated_wrappers",
         ":math_ops",
         ":training",
         ":variables",
-        "//tensorflow/core:protos_all_py",
         "//third_party/py/numpy",
+        "//tensorflow/core:protos_all_py",
     ],
+    grpc_enabled = True,
 )
 
-py_test(
+tf_py_test(
     name = "server_lib_sparse_job_test",
     size = "small",
     srcs = ["training/server_lib_sparse_job_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
+    additional_deps = [
         ":array_ops",
         ":client",
         ":client_testlib",
         ":data_flow_ops",
         ":errors",
-        ":extra_py_tests_deps",
         ":framework_for_generated_wrappers",
         ":math_ops",
         ":training",
         ":variables",
-        "//tensorflow/core:protos_all_py",
         "//third_party/py/numpy",
+        "//tensorflow/core:protos_all_py",
     ],
+    grpc_enabled = True,
 )
 
 cuda_py_test(
@@ -3242,6 +3336,7 @@ cuda_py_test(
         ":variables",
         "//third_party/py/numpy",
     ],
+    grpc_enabled = True,
     tags = [
         "no_oss",  # Test flaky due to port collisions.
         "oss_serial",
@@ -3260,6 +3355,7 @@ tf_py_test(
         ":training",
         ":variables",
     ],
+    grpc_enabled = True,
     tags = [
         "no_oss",  # Test flaky due to port collisions.
         "notsan",  # data race due to b/62910646
@@ -3290,17 +3386,11 @@ tf_cuda_library(
     alwayslink = 1,
 )
 
-py_test(
+tf_py_test(
     name = "session_test",
     size = "small",
     srcs = ["client/session_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_gpu",
-        "no_pip_gpu",  # testInteractivePlacePrunedGraph fails on invalid assumption about GPU ops.
-        "no_windows",
-    ],
-    deps = [
+    additional_deps = [
         ":array_ops",
         ":client",
         ":control_flow_ops",
@@ -3318,21 +3408,19 @@ py_test(
         "//third_party/py/numpy",
         "@six_archive//:six",
     ],
+    grpc_enabled = True,
+    tags = [
+        "no_gpu",
+        "no_pip_gpu",  # testInteractivePlacePrunedGraph fails on invalid assumption about GPU ops.
+        "no_windows",
+    ],
 )
 
-py_test(
+tf_py_test(
     name = "session_clusterspec_prop_test",
     size = "small",
     srcs = ["client/session_clusterspec_prop_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_gpu",
-        "no_oss",
-        "no_pip",
-        "no_pip_gpu",
-        "notap",
-    ],
-    deps = [
+    additional_deps = [
         ":array_ops",
         ":client",
         ":client_testlib",
@@ -3347,37 +3435,40 @@ py_test(
         ":variables",
         "//third_party/py/numpy",
     ],
+    grpc_enabled = True,
+    tags = [
+        "no_gpu",
+        "no_oss",
+        "no_pip",
+        "no_pip_gpu",
+        "notap",
+    ],
 )
 
-py_test(
+tf_py_test(
     name = "session_list_devices_test",
     size = "small",
     srcs = ["client/session_list_devices_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_gpu",
-        "no_pip_gpu",
-        "notsan",  # data race due to b/62910646
-    ],
-    deps = [
+    additional_deps = [
         ":client",
         ":framework",
         ":framework_test_lib",
         ":platform_test",
         ":training",
     ],
+    grpc_enabled = True,
+    tags = [
+        "no_gpu",
+        "no_pip_gpu",
+        "notsan",  # data race due to b/62910646
+    ],
 )
 
-py_test(
+tf_py_test(
     name = "session_partial_run_test",
     size = "small",
     srcs = ["client/session_partial_run_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_gpu",
-        "no_windows",
-    ],
-    deps = [
+    additional_deps = [
         ":array_ops",
         ":client",
         ":errors",
@@ -3390,6 +3481,11 @@ py_test(
         ":util",
         "@six_archive//:six",
     ],
+    grpc_enabled = True,
+    tags = [
+        "no_gpu",
+        "no_windows",
+    ],
 )
 
 cuda_py_test(
@@ -3405,6 +3501,20 @@ cuda_py_test(
     ],
 )
 
+cuda_py_test(
+    name = "virtual_gpu_test",
+    size = "small",
+    srcs = ["client/virtual_gpu_test.py"],
+    additional_deps = [
+        ":client",
+        ":client_testlib",
+        ":framework_for_generated_wrappers",
+        ":math_ops",
+        "//tensorflow/core:protos_all_py",
+    ],
+    tags = ["noguitar"],
+)
+
 py_test(
     name = "graph_util_test",
     size = "small",
@@ -3422,6 +3532,19 @@ py_test(
     ],
 )
 
+py_test(
+    name = "bfloat16_test",
+    size = "small",
+    srcs = ["lib/core/bfloat16_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_windows"],
+    deps = [
+        ":client_testlib",
+        ":lib",
+        ":pywrap_tensorflow",
+    ],
+)
+
 py_test(
     name = "file_io_test",
     size = "small",
@@ -3575,7 +3698,9 @@ cuda_py_test(
         "//third_party/py/numpy",
         "@six_archive//:six",
         "//tensorflow/core:protos_all_py",
+        "//tensorflow/python/data/ops:dataset_ops",
     ],
+    tags = ["multi_gpu"],
 )
 
 py_test(
@@ -3624,6 +3749,7 @@ cuda_py_test(
     srcs = ["training/session_manager_test.py"],
     additional_deps = [
         ":array_ops",
+        ":control_flow_ops",
         ":client",
         ":client_testlib",
         ":errors",
@@ -3632,20 +3758,18 @@ cuda_py_test(
         ":training",
         ":variables",
     ],
+    grpc_enabled = True,
     main = "training/session_manager_test.py",
 )
 
-py_test(
+tf_py_test(
     name = "supervisor_test",
     size = "small",
     srcs = ["training/supervisor_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["no_windows"],
-    deps = [
+    additional_deps = [
         ":array_ops",
         ":client_testlib",
         ":errors",
-        ":extra_py_tests_deps",
         ":framework",
         ":framework_for_generated_wrappers",
         ":io_ops",
@@ -3656,6 +3780,8 @@ py_test(
         ":variables",
         "//tensorflow/core:protos_all_py",
     ],
+    grpc_enabled = True,
+    tags = ["no_windows"],
 )
 
 py_test(
@@ -4269,6 +4395,7 @@ cuda_py_test(
         ":variables",
         "//third_party/py/numpy",
     ],
+    grpc_enabled = True,
     main = "client/session_benchmark.py",
 )
 
@@ -4305,7 +4432,10 @@ py_test(
         "grappler/item_test.py",
     ],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],  # tf_optimizer is not available in pip.
+    tags = [
+        "grappler",
+        "no_pip",  # tf_optimizer is not available in pip.
+    ],
     deps = [
         ":client_testlib",
         ":framework_for_generated_wrappers",
@@ -4315,6 +4445,27 @@ py_test(
     ],
 )
 
+py_test(
+    name = "datasets_test",
+    size = "small",
+    srcs = [
+        "grappler/datasets_test.py",
+    ],
+    srcs_version = "PY2AND3",
+    tags = [
+        "grappler",
+        "no_pip",  # tf_optimizer is not available in pip.
+    ],
+    deps = [
+        ":array_ops",
+        ":client_testlib",
+        ":framework_for_generated_wrappers",
+        ":tf_item",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python/data",
+    ],
+)
+
 py_library(
     name = "tf_cluster",
     srcs = [
@@ -4328,21 +4479,24 @@ py_library(
     ],
 )
 
-py_test(
+cuda_py_test(
     name = "cluster_test",
     size = "small",
     srcs = [
         "grappler/cluster_test.py",
     ],
-    srcs_version = "PY2AND3",
-    tags = ["no_pip"],  # tf_optimizer is not available in pip.
-    deps = [
+    additional_deps = [
         ":client_testlib",
         ":framework_for_generated_wrappers",
         ":tf_cluster",
         ":tf_item",
         "//tensorflow/core:protos_all_py",
     ],
+    shard_count = 10,
+    tags = [
+        "grappler",
+        "no_pip",  # tf_optimizer is not available in pip.
+    ],
 )
 
 py_library(
@@ -4362,12 +4516,13 @@ py_test(
     name = "tf_optimizer_test",
     size = "small",
     srcs = [
-        "grappler/cluster_test.py",
-        "grappler/item_test.py",
         "grappler/tf_optimizer_test.py",
     ],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],  # tf_optimizer is not available in pip.
+    tags = [
+        "grappler",
+        "no_pip",  # tf_optimizer is not available in pip.
+    ],
     deps = [
         ":client_testlib",
         ":framework_for_generated_wrappers",
@@ -4385,6 +4540,9 @@ py_test(
         "grappler/memory_optimizer_test.py",
     ],
     srcs_version = "PY2AND3",
+    tags = [
+        "grappler",
+    ],
     deps = [
         ":client_testlib",
         ":framework_for_generated_wrappers",
@@ -4419,13 +4577,17 @@ cuda_py_test(
         ":nn",
         ":ops",
         ":random_ops",
+        ":state_ops",
         ":tf_cluster",
         ":tf_optimizer",
         ":training",
         "//third_party/py/numpy",
         "//tensorflow/core:protos_all_py",
     ],
-    tags = ["manual"],
+    shard_count = 10,
+    tags = [
+        "grappler",
+    ],
 )
 
 py_library(
@@ -4460,7 +4622,10 @@ py_test(
     size = "small",
     srcs = ["grappler/cost_analyzer_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "grappler",
+        "no_pip",
+    ],
     deps = [
         ":array_ops",
         ":client_testlib",
@@ -4492,7 +4657,10 @@ py_test(
     size = "small",
     srcs = ["grappler/model_analyzer_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "grappler",
+        "no_pip",
+    ],
     deps = [
         ":array_ops",
         ":client_testlib",
diff --git a/tensorflow/python/__init__.py b/tensorflow/python/__init__.py
index af34aca3e345ff6d12f471f289b77001b40c00bf..bc9ddec2a54a784027120828e9b15a2bf500414e 100644
--- a/tensorflow/python/__init__.py
+++ b/tensorflow/python/__init__.py
@@ -263,6 +263,7 @@ _allowed_symbols.extend([
     'GIT_VERSION',
     'COMPILER_VERSION',
     'CXX11_ABI_FLAG',
+    'MONOLITHIC_BUILD',
 ])
 
 # Remove all extra symbols that don't have a docstring or are not explicitly
@@ -282,6 +283,7 @@ _exported_dunders = set([
     '__git_version__',
     '__compiler_version__',
     '__cxx11_abi_flag__',
+    '__monolithic_build__',
 ])
 
 # Expose symbols minus dunders, unless they are whitelisted above.
diff --git a/tensorflow/python/build_defs.bzl b/tensorflow/python/build_defs.bzl
index 2d8625933f9ea4ab3bedf8d3157430d821f3e584..7f29adc06fcc5922114b7cd2bde8a8df5b1e0665 100644
--- a/tensorflow/python/build_defs.bzl
+++ b/tensorflow/python/build_defs.bzl
@@ -27,4 +27,8 @@ def tf_gen_op_wrapper_private_py(name, out=None, deps=[],
     deps=deps,
     require_shape_functions=require_shape_functions,
     generated_target_name=name,
+    api_def_srcs = [
+        "//tensorflow/core/api_def:base_api_def",
+        "//tensorflow/core/api_def:python_api_def",
+    ],
   )
diff --git a/tensorflow/python/client/session.py b/tensorflow/python/client/session.py
index 759c36ad72e922671288b0d57fe9e442b915c144..1481a4d035cbc63aa655be6c4d441e6f6741e118 100644
--- a/tensorflow/python/client/session.py
+++ b/tensorflow/python/client/session.py
@@ -126,6 +126,12 @@ _REGISTERED_EXPANSIONS = [
      lambda feed: [feed])]
 # pylint: enable=g-long-lambda
 
+
+def _convert_to_numpy_obj(numpy_dtype, obj):
+  """Explicitly convert obj based on numpy type except for string type."""
+  return numpy_dtype(obj) if numpy_dtype is not object else str(obj)
+
+
 def register_session_run_conversion_functions(tensor_type, fetch_function,
     feed_function=None, feed_function_for_partial_run=None):
   """Register fetch and feed conversion functions for `tf.Session.run()`.
@@ -1072,12 +1078,14 @@ class BaseSession(SessionInterface):
                             'strings, lists, numpy ndarrays, or TensorHandles.')
 
           subfeed_dtype = subfeed_t.dtype.as_numpy_dtype
-          if isinstance(subfeed_val,
-                        int) and subfeed_dtype(subfeed_val) != subfeed_val:
+          if isinstance(subfeed_val, int) and _convert_to_numpy_obj(
+              subfeed_dtype, subfeed_val) != subfeed_val:
             raise TypeError(
-                'Type of feed value ' + str(subfeed_val) + ' is not'
-                ' compatible with Tensor type ' + str(subfeed_dtype) + '.'
-                ' Try explicitly setting the type of the feed tensor'
+                'Type of feed value ' + str(subfeed_val) + ' with type ' +
+                str(type(subfeed_val)) +
+                ' is not compatible with Tensor type ' +
+                str(subfeed_dtype) +
+                '. Try explicitly setting the type of the feed tensor'
                 ' to a larger type (e.g. int64).')
 
           is_tensor_handle_feed = isinstance(subfeed_val,
@@ -1160,9 +1168,6 @@ class BaseSession(SessionInterface):
       TypeError: If `fetches` or `feed_list` cannot be interpreted
         as arguments to @{tf.Session.run}.
     """
-    assert not self._created_with_new_api, ('session.make_callable() doesn\'t '
-                                            'work with C API')
-
     if feed_list is not None:
       if not isinstance(feed_list, (list, tuple)):
         raise TypeError('`feed_list` must be a list or tuple.')
@@ -1184,12 +1189,18 @@ class BaseSession(SessionInterface):
 
     # Create a fetch handler to take care of the structure of fetches.
     fetch_handler = _FetchHandler(self._graph, fetches, {})
-    fetch_list_as_strings = _name_list(fetch_handler.fetches())
-    target_list_as_strings = _name_list(fetch_handler.targets())
+    if self._created_with_new_api:
+      # pylint: disable=protected-access
+      fetch_list = [t._as_tf_output() for t in fetch_handler.fetches()]
+      target_list = [op._c_op for op in fetch_handler.targets()]
+      # pylint: enable=protected-access
+    else:
+      fetch_list = _name_list(fetch_handler.fetches())
+      target_list = _name_list(fetch_handler.targets())
 
     def _callable_template_with_options_and_metadata(
-        fetch_list_as_strings,
-        target_list_as_strings,
+        fetch_list,
+        target_list,
         fetch_handler,
         options=None,
         run_metadata=None):
@@ -1199,9 +1210,14 @@ class BaseSession(SessionInterface):
       run_metadata_ptr = tf_session.TF_NewBuffer() if run_metadata else None
       try:
         with errors.raise_exception_on_not_ok_status() as status:
-          results = tf_session.TF_Run(
-              self._session, options_ptr, {}, fetch_list_as_strings,
-              target_list_as_strings, status, run_metadata_ptr)
+          if self._created_with_new_api:
+            results = tf_session.TF_SessionRun_wrapper(
+                self._session, options_ptr, {}, fetch_list, target_list,
+                run_metadata_ptr, status)
+          else:
+            results = tf_session.TF_Run(
+                self._session, options_ptr, {}, fetch_list, target_list, status,
+                run_metadata_ptr)
           if fetch_handler:
             results = fetch_handler.build_results(self, results)
           else:
@@ -1218,27 +1234,35 @@ class BaseSession(SessionInterface):
 
     if accept_options:
       return functools.partial(
-          _callable_template_with_options_and_metadata, fetch_list_as_strings,
-          target_list_as_strings, fetch_handler)
+          _callable_template_with_options_and_metadata, fetch_list,
+          target_list, fetch_handler)
     elif isinstance(fetches, ops.Operation):
       # Special case for fetching a single operation, because the
       # function will have no return value.
-      assert not fetch_list_as_strings
-      assert len(target_list_as_strings) == 1
+      assert not fetch_list
+      assert len(target_list) == 1
       def _single_operation_run():
         with errors.raise_exception_on_not_ok_status() as status:
-          tf_session.TF_Run(self._session, None, {}, [],
-                            target_list_as_strings, status, None)
+          if self._created_with_new_api:
+            tf_session.TF_SessionRun_wrapper(
+                self._session, None, {}, [], target_list, None, status)
+          else:
+            tf_session.TF_Run(
+                self._session, None, {}, [], target_list, status, None)
       return _single_operation_run
     elif isinstance(fetches, ops.Tensor):
       # Special case for fetching a single tensor, because the
       # function can return the result of `TF_Run()` directly.
-      assert len(fetch_list_as_strings) == 1
-      assert not target_list_as_strings
+      assert len(fetch_list) == 1
+      assert not target_list
       def _single_tensor_run():
         with errors.raise_exception_on_not_ok_status() as status:
-          results = tf_session.TF_Run(self._session, None, {},
-                                      fetch_list_as_strings, [], status, None)
+          if self._created_with_new_api:
+            results = tf_session.TF_SessionRun_wrapper(
+                self._session, None, {}, fetch_list, [], None, status)
+          else:
+            results = tf_session.TF_Run(
+                self._session, None, {}, fetch_list, [], status, None)
         return results[0]
       return _single_tensor_run
     else:
@@ -1246,9 +1270,12 @@ class BaseSession(SessionInterface):
       # results for us.
       def _fetch_handler_run():
         with errors.raise_exception_on_not_ok_status() as status:
-          results = tf_session.TF_Run(self._session, None, {},
-                                      fetch_list_as_strings,
-                                      target_list_as_strings, status, None)
+          if self._created_with_new_api:
+            results = tf_session.TF_SessionRun_wrapper(
+                self._session, None, {}, fetch_list, target_list, None, status)
+          else:
+            results = tf_session.TF_Run(
+                self._session, None, {}, fetch_list, target_list, status, None)
         return fetch_handler.build_results(self, results)
       return _fetch_handler_run
 
diff --git a/tensorflow/python/client/session_clusterspec_prop_test.py b/tensorflow/python/client/session_clusterspec_prop_test.py
index c85b22eb156407fcb78302c43b9cb17b8f6b5e06..f1934241334e049c1d02e095d371927bec71be14 100644
--- a/tensorflow/python/client/session_clusterspec_prop_test.py
+++ b/tensorflow/python/client/session_clusterspec_prop_test.py
@@ -77,7 +77,8 @@ class SessionClusterSpecPropagationTest(test_util.TensorFlowTestCase):
     config = config_pb2.ConfigProto(cluster_def=cluster_def)
 
     with ops.Graph().as_default() as g, ops.device('/job:worker/task:1'):
-      const = constant_op.constant(17)
+      with ops.device('/cpu:0'):	 
+        const = constant_op.constant(17)
     sess = session.Session(server1.target, config=config, graph=g)
     run_options = config_pb2.RunOptions(
         trace_level=config_pb2.RunOptions.FULL_TRACE)
diff --git a/tensorflow/python/client/session_list_devices_test.py b/tensorflow/python/client/session_list_devices_test.py
index 584b1abe55c0df09afad0c432837646e75beb653..5a7413c12e9db92cb85d54a69602753ff6476425 100644
--- a/tensorflow/python/client/session_list_devices_test.py
+++ b/tensorflow/python/client/session_list_devices_test.py
@@ -39,7 +39,6 @@ class SessionListDevicesTestMethods(object):
       devices = sess.list_devices()
       self.assertTrue('/job:localhost/replica:0/task:0/device:CPU:0' in set(
           [d.name for d in devices]), devices)
-      self.assertGreaterEqual(1, len(devices), devices)
 
   def testInvalidDeviceNumber(self):
     opts = tf_session.TF_NewSessionOptions()
@@ -65,7 +64,6 @@ class SessionListDevicesTestMethods(object):
       devices = sess.list_devices()
       self.assertTrue('/job:local/replica:0/task:0/device:CPU:0' in set(
           [d.name for d in devices]), devices)
-      self.assertGreaterEqual(1, len(devices), devices)
 
   def testListDevicesClusterSpecPropagation(self):
     server1 = server_lib.Server.create_local_server()
@@ -84,7 +82,6 @@ class SessionListDevicesTestMethods(object):
           '/job:worker/replica:0/task:0/device:CPU:0' in device_names)
       self.assertTrue(
           '/job:worker/replica:0/task:1/device:CPU:0' in device_names)
-      self.assertGreaterEqual(2, len(devices), devices)
 
 
 class SessionListDevicesTest(SessionListDevicesTestMethods,
diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py
index f4b02711955242085e222d341e04fb9fc409dd63..c579fba33951c4624e02de1e20a9aa5bad11cd73 100644
--- a/tensorflow/python/client/session_test.py
+++ b/tensorflow/python/client/session_test.py
@@ -28,6 +28,8 @@ import numpy as np
 import six
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
+from tensorflow.core.framework import attr_value_pb2
+from tensorflow.core.framework import types_pb2
 from tensorflow.core.lib.core import error_codes_pb2
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.core.protobuf import rewriter_config_pb2
@@ -55,13 +57,13 @@ from tensorflow.python.platform import googletest
 from tensorflow.python.training import server_lib
 from tensorflow.python.util import compat
 
-ops._USE_C_API = True
 
 # NOTE(mrry): Dummy shape registration for ops used in the tests, since they
 # don't have C++ op registrations on which to attach C++ shape fns.
 ops.RegisterShape('ConstructionFails')(common_shapes.unknown_shape)
 
 
+@test_util.with_c_api
 class SessionTest(test_util.TensorFlowTestCase):
 
   def testUseExistingGraph(self):
@@ -163,8 +165,9 @@ class SessionTest(test_util.TensorFlowTestCase):
         # Run with a bogus handle.
         s.partial_run('foo', r1, feed_dict={a: 1, b: 2})
 
-  @test_util.disable_c_api  # No shape registration for 'ConstructionFails'
   def testOpConstructionErrorPayload(self):
+    if ops._USE_C_API: return  # No shape registration for 'ConstructionFails'
+
     with session.Session():
       failing_op = ops.get_default_graph().create_op(
           'ConstructionFails', [], [], name='f')
@@ -206,7 +209,6 @@ class SessionTest(test_util.TensorFlowTestCase):
       with self.assertRaises(TypeError):
         s.run({'a': a, 'b': None})
 
-  @test_util.disable_c_api  # session.make_callable() doesn't work with C API
   def testFetchSingleton(self):
     with session.Session() as sess:
       a = constant_op.constant(42.0)
@@ -229,7 +231,6 @@ class SessionTest(test_util.TensorFlowTestCase):
       res = sess.run(a.op)  # An op, not a tensor.
       self.assertEqual(None, res)
 
-  @test_util.disable_c_api  # session.make_callable() doesn't work with C API
   def testFetchList(self):
     with session.Session() as sess:
       a = constant_op.constant(42.0)
@@ -245,7 +246,6 @@ class SessionTest(test_util.TensorFlowTestCase):
       self.assertTrue(isinstance(res, list))
       self.assertEqual([42.0, None, 44.0, 42.0, None], res)
 
-  @test_util.disable_c_api  # session.make_callable() doesn't work with C API
   def testFetchTuple(self):
     with session.Session() as sess:
       a = constant_op.constant(42.0)
@@ -259,7 +259,6 @@ class SessionTest(test_util.TensorFlowTestCase):
       self.assertTrue(isinstance(res, tuple))
       self.assertEqual((42.0, None, 44.0, 42.0), res)
 
-  @test_util.disable_c_api  # session.make_callable() doesn't work with C API
   def testFetchNamedTuple(self):
     # pylint: disable=invalid-name
     ABC = collections.namedtuple('ABC', ['a', 'b', 'c'])
@@ -1176,7 +1175,6 @@ class SessionTest(test_util.TensorFlowTestCase):
       self.assertAllEqual(b_val, [[2.0, 2.0, 2.0]])
       self.assertAllEqual(a2_val, [[1.0, 1.0]])
 
-  @test_util.disable_c_api  # session.make_callable() doesn't work with C API
   def testFeedAndFetch(self):
     with session.Session() as sess:
       for dtype in [dtypes.float16,
@@ -1223,7 +1221,6 @@ class SessionTest(test_util.TensorFlowTestCase):
           self.assertAllEqual(np_array, out_v)
           self.assertAllEqual(np_array, feed_v)
 
-  @test_util.disable_c_api  # session.make_callable() doesn't work with C API
   def testMakeCallableOnTensorWithRunOptions(self):
     with session.Session() as sess:
       a = constant_op.constant(42.0)
@@ -1236,7 +1233,6 @@ class SessionTest(test_util.TensorFlowTestCase):
       self.assertEqual(42.0, res)
       self.assertGreater(len(run_metadata.step_stats.dev_stats), 0)
 
-  @test_util.disable_c_api  # session.make_callable() doesn't work with C API
   def testMakeCallableOnOperationWithRunOptions(self):
     with session.Session() as sess:
       a = variables.Variable(42.0)
@@ -1251,7 +1247,6 @@ class SessionTest(test_util.TensorFlowTestCase):
       self.assertEqual(43.0, sess.run(a))
       self.assertGreater(len(run_metadata.step_stats.dev_stats), 0)
 
-  @test_util.disable_c_api  # session.make_callable() doesn't work with C API
   def testMakeCallableWithFeedListAndRunOptions(self):
     with session.Session() as sess:
       ph = array_ops.placeholder(dtypes.float32)
@@ -1458,9 +1453,10 @@ class SessionTest(test_util.TensorFlowTestCase):
         self.assertTrue(run_metadata.HasField('step_stats'))
         self.assertEquals(len(run_metadata.step_stats.dev_stats), 1)
 
-  # TODO(nolivia): C API doesn't yet handle marking nodes as not feedable.
-  @test_util.disable_c_api
   def testFeedShapeCompatibility(self):
+    # TODO(nolivia): C API doesn't yet handle marking nodes as not feedable.
+    if ops._USE_C_API: return
+
     with session.Session() as sess:
       some_tensor = constant_op.constant([2.0, 2.0, 2.0, 2.0])
       new_shape = constant_op.constant([2, 2])
@@ -1741,6 +1737,161 @@ class SessionTest(test_util.TensorFlowTestCase):
     server = server_lib.Server.create_local_server()
     self.runTestAddFunctionToSession(server.target)
 
+  def testOpenAndCloseGrpcSession(self):
+    server = server_lib.Server.create_local_server()
+    with session.Session(server.target):
+      pass
+
+  def testOpenAndCloseSession(self):
+    with session.Session():
+      pass
+
+  def testAutoConvertAndCheckData(self):
+    with self.test_session() as sess:
+      a = array_ops.placeholder(dtype=dtypes.string)
+      with self.assertRaisesRegexp(
+          TypeError, 'Type of feed value 1 with type <(\w+) \'int\'> is not'):
+        sess.run(a, feed_dict={a: 1})
+
+class GraphMutationTest(test_util.TensorFlowTestCase):
+
+  def setUp(self):
+    self._original_use_c_api_value = ops._USE_C_API
+    ops._USE_C_API = True
+    super(GraphMutationTest, self).setUp()
+
+  def tearDown(self):
+    ops._USE_C_API = self._original_use_c_api_value
+    super(GraphMutationTest, self).tearDown()
+
+  def testUpdateInputAfterRunning(self):
+    with ops.Graph().as_default() as g:
+      a = constant_op.constant(1.0)
+      b = constant_op.constant(2.0)
+      c = a + b
+
+    with session.Session(graph=g) as sess:
+      self.assertAllEqual(3.0, sess.run(c))
+      c.op._update_input(1, a)  # pylint: disable=protected-access
+      with self.assertRaisesRegexp(
+          errors.FailedPreconditionError,
+          'add.*was changed by updating input tensor after it was run'):
+        sess.run(c)
+
+      # Check that running the graph with a new session is fine
+      with session.Session(graph=g) as sess2:
+        self.assertAllEqual(2.0, sess2.run(c))
+
+  def testSetDeviceAfterRunning(self):
+    with ops.Graph().as_default() as g:
+      a = constant_op.constant(1.0)
+      b = constant_op.constant(2.0)
+      c = a + b
+
+    with session.Session(graph=g) as sess:
+      self.assertAllEqual(3.0, sess.run(c))
+      c.op._set_device('/cpu:0')  # pylint: disable=protected-access
+      with self.assertRaisesRegexp(
+          errors.FailedPreconditionError,
+          'add.*was changed by setting device after it was run'):
+        sess.run(c)
+
+  def testSetAttrAfterRunning(self):
+    with ops.Graph().as_default() as g:
+      a = constant_op.constant(1.0, dtype=dtypes.float32)
+      b = math_ops.cast(a, dtypes.float64)
+
+    with session.Session(graph=g) as sess:
+      self.assertAllEqual(1.0, sess.run(b))
+      b.op._set_attr('DstT',
+                     attr_value_pb2.AttrValue(type=types_pb2.DT_FLOAT))
+      with self.assertRaisesRegexp(
+          errors.FailedPreconditionError,
+          'Cast.*was changed by setting attribute after it was run'):
+        sess.run(b)
+
+  def testRunModifyRun(self):
+    with ops.Graph().as_default() as g:
+      a = constant_op.constant(1.0)
+      b = constant_op.constant(2.0)
+      c = a + b
+
+      with session.Session(graph=g) as sess:
+        self.assertAllEqual(3.0, sess.run(c))
+
+        d = b + c
+        d.op._update_input(0, a)  # pylint: disable=protected-access
+        self.assertAllEqual(3.0, sess.run(c))
+        self.assertAllEqual(4.0, sess.run(d))
+
+  def testRunModifyRunTwoSessions(self):
+    with ops.Graph().as_default() as g:
+      a = constant_op.constant(1.0)
+      b = constant_op.constant(2.0)
+      c = a + b
+
+      with session.Session(graph=g) as sess1:
+        with session.Session(graph=g) as sess2:
+          self.assertAllEqual(3.0, sess1.run(c))
+          self.assertAllEqual(3.0, sess2.run(c))
+
+          d = b + c
+          d.op._update_input(0, a)  # pylint: disable=protected-access
+          self.assertAllEqual(3.0, sess2.run(c))
+          self.assertAllEqual(4.0, sess2.run(d))
+
+          d.op._update_input(0, b)  # pylint: disable=protected-access
+          self.assertAllEqual(3.0, sess1.run(c))
+          self.assertAllEqual(5.0, sess1.run(d))
+
+          with self.assertRaisesRegexp(
+              errors.FailedPreconditionError,
+              'add.*was changed by updating input tensor after it was run'):
+            sess2.run(c)
+
+  def testTwoSessionsOneRunBeforeModification(self):
+    with ops.Graph().as_default() as g, ops.device('/cpu:0'):
+      a = constant_op.constant(1.0)
+      b = constant_op.constant(2.0)
+      c = a + b
+
+    with session.Session(graph=g) as sess1:
+      with session.Session(graph=g) as sess2:
+        sess1.run(c)
+
+        c.op._set_device('/cpu:0')  # pylint: disable=protected-access
+
+        with self.assertRaisesRegexp(
+            errors.FailedPreconditionError,
+            'add.*was changed by setting device after it was run'):
+          sess1.run(c)
+
+        # sess2 was not run before modification
+        self.assertAllEqual(3.0, sess2.run(c))
+
+  def testTwoSessionsBothRunBeforeModification(self):
+    with ops.Graph().as_default() as g, ops.device('/cpu:0'):
+      a = constant_op.constant(1.0)
+      b = constant_op.constant(2.0)
+      c = a + b
+
+    with session.Session(graph=g) as sess1:
+      with session.Session(graph=g) as sess2:
+        sess1.run(c)
+        sess2.run(c)
+
+        c.op._set_device('/cpu:0')  # pylint: disable=protected-access
+
+        with self.assertRaisesRegexp(
+            errors.FailedPreconditionError,
+            'add.*was changed by setting device after it was run'):
+          sess1.run(c)
+
+        with self.assertRaisesRegexp(
+            errors.FailedPreconditionError,
+            'add.*was changed by setting device after it was run'):
+          sess2.run(c)
+
 
 if __name__ == '__main__':
   googletest.main()
diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i
index 5fa1a7e8fc2388bf64670624de7c653318dcb981..1fd488e7b6388f7953a279dca8f93ab57a85f63d 100644
--- a/tensorflow/python/client/tf_session.i
+++ b/tensorflow/python/client/tf_session.i
@@ -67,6 +67,15 @@ PyObject* CreateWrappedTFOperation(TF_Operation* tf_operation) {
   return SWIG_NewPointerObj(tf_operation, SWIGTYPE_p_TF_Operation, 0);
 }
 
+// Helper function to convert a Python list of ints to a C++ vector of int64s
+void PyInt64ListToVector(PyObject* py_int_seq, std::vector<int64_t>* vec) {
+  int size = PySequence_Fast_GET_SIZE(py_int_seq);
+  for (int i = 0; i < size; ++i) {
+    PyObject* item = PySequence_Fast_GET_ITEM(py_int_seq, i);
+    vec->push_back(PyInt_AsLong(item));
+  }
+}
+
 %}
 
 %include "tensorflow/python/client/tf_sessionrun_wrapper.i"
@@ -91,6 +100,9 @@ tensorflow::ImportNumpy();
 // _GLIBCXX_USE_CXX11_ABI flag value
 %constant const int __cxx11_abi_flag__ = tf_cxx11_abi_flag();
 
+// Flag indicating whether the build is monolithic
+%constant const int __monolithic_build__ = tf_monolithic_build();
+
 // Release the Python GIL for the duration of most methods.
 %exception {
   Py_BEGIN_ALLOW_THREADS;
@@ -145,6 +157,25 @@ tensorflow::ImportNumpy();
   }
 }
 
+%ignore TF_OperationOutputConsumers;
+%unignore TF_OperationOutputConsumers_wrapper;
+// See comment for "%noexception TF_SessionRun_wrapper;"
+%noexception TF_OperationGetOutputConsumers_wrapper;
+
+// Build a Python list of unicode strings and return it. (Operation names are
+// always represented as unicode.)
+%typemap(out) std::vector<const char*>
+tensorflow::TF_OperationOutputConsumers_wrapper {
+  $result = PyList_New($1.size());
+  if (!$result) {
+    SWIG_exception_fail(SWIG_MemoryError, "$symname: couldn't create list");
+  }
+
+  for (size_t i = 0; i < $1.size(); ++i) {
+    PyList_SET_ITEM($result, i, PyUnicode_FromString($1[i]));
+  }
+}
+
 %unignore GetOperationInputs;
 // See comment for "%noexception TF_SessionRun_wrapper;"
 %noexception GetOperationInputs;
@@ -157,13 +188,30 @@ tensorflow::ImportNumpy();
     SWIG_exception_fail(SWIG_MemoryError, "$symname: couldn't create list");
   }
 
-  // Unwrap the generated SwigValueWrapper<std::vector<TF_Output>> via &
-  std::vector<TF_Output>* tf_outputs = &$1;
-  for (size_t i = 0; i < $1.size(); ++i) {
-    PyList_SET_ITEM($result, i, CreateWrappedTFOutput((*tf_outputs)[i]));
+  // Unwrap the generated SwigValueWrapper<std::vector<TF_Output>>
+  const std::vector<TF_Output>& tf_outputs = $1;
+  for (size_t i = 0; i < tf_outputs.size(); ++i) {
+    PyList_SET_ITEM($result, i, CreateWrappedTFOutput(tf_outputs[i]));
   }
 }
 
+%ignore TF_ImportGraphDefResultsMissingUnusedInputMappings;
+%unignore TF_ImportGraphDefResultsMissingUnusedInputMappings_wrapper;
+// See comment for "%noexception TF_SessionRun_wrapper;"
+%noexception TF_ImportGraphDefResultsMissingUnusedInputMappings_wrapper;
+
+%typemap(out) std::vector<string>
+TF_ImportGraphDefResultsMissingUnusedInputMappings_wrapper{
+  $result = PyList_New($1.size());
+  if (!$result) {
+    SWIG_exception_fail(SWIG_MemoryError, "$symname: couldn't create list");
+  }
+  for (size_t i = 0; i < $1.size(); ++i) {
+    const string& input_str = $1[i];
+    PyList_SET_ITEM($result, i, PyBytes_FromStringAndSize(input_str.data(),
+                                                          input_str.size()));
+  }
+}
 
 ////////////////////////////////////////////////////////////////////////////////
 // BEGIN TYPEMAPS FOR tensorflow::TF_Run_wrapper()
@@ -437,6 +485,7 @@ tensorflow::ImportNumpy();
 %unignore tensorflow;
 %unignore TF_Run;
 %unignore EqualGraphDefWrapper;
+%unignore EqualAttrValueWrapper;
 
 // Include the wrapper for TF_PRunSetup from tf_session_helper.h.
 
@@ -532,6 +581,144 @@ def TF_Reset(target, containers=None, config=None):
 %unignore TF_GraphGetTensorShapeHelper;
 %ignore TF_GraphGetTensorShape;
 
+// We use TF_GraphSetTensorShape_wrapper instead of
+// TF_GraphSetTensorShape
+%ignore TF_GraphSetTensorShape;
+%unignore tensorflow;
+%unignore TF_GraphSetTensorShape_wrapper;
+
+// $input is a Python list of ints to a vector<int> for TF_GraphSetTensorShape_wrapper
+%typemap(in) (const std::vector<int64_t>& dims)
+    (std::vector<int64_t> dims_local){
+  if ($input != Py_None) {
+    PyObject* py_int_seq = PySequence_Fast($input, tensorflow::strings::Printf(
+          "$symname: expected list but got %s ",
+          Py_TYPE($input)->tp_name).c_str());
+    if (py_int_seq == nullptr) {
+      SWIG_exception_fail(SWIG_RuntimeError, tensorflow::strings::Printf(
+          "$symname: PySequence_Fast returned NULL.").c_str());
+    }
+    PyInt64ListToVector(py_int_seq, &dims_local);
+    Py_DECREF(py_int_seq);
+    $1 = &dims_local;
+  } else {
+    $1 = nullptr;
+  }
+}
+
+// We use TF_GraphGetTensorShape_wrapper instead of
+// TF_GraphGetTensorShape
+%ignore TF_GraphGetTensorShape;
+%unignore tensorflow;
+%unignore TF_GraphGetTensorShape_wrapper;
+
+// Build a Python list of ints and return it.
+%typemap(out) std::vector<int64_t> tensorflow::TF_GraphGetTensorShape_wrapper {
+  $result = PyList_New($1.size());
+  if (!$result) {
+    SWIG_exception_fail(SWIG_MemoryError, "$symname: couldn't create list");
+  }
+
+  for (size_t i = 0; i < $1.size(); ++i) {
+    PyList_SET_ITEM($result, i, PyInt_FromLong($1[i]));
+  }
+}
+
+// We use TF_GraphSetOutputHandleShapesAndTypes_wrapper instead of
+// TF_GraphSetOutputHandleShapesAndTypes
+%ignore TF_GraphSetOutputHandleShapesAndTypes;
+%unignore tensorflow;
+%unignore TF_GraphSetOutputHandleShapesAndTypes_wrapper;
+
+// The space between the double angle brackets below looks extraneous, but
+// our version of SWIG cannot parse ">>".
+%typemap(in) (const std::vector<std::vector<int64_t> >& shapes)
+    (std::vector<std::vector<int64_t> > shapes_local){
+  PyObject* seq = PySequence_Fast($input, tensorflow::strings::Printf(
+        "$symname: expected list but got %s ",
+        Py_TYPE($input)->tp_name).c_str());
+  if (seq == nullptr) {
+    SWIG_exception_fail(SWIG_RuntimeError, tensorflow::strings::Printf(
+        "$symname: PySequence_Fast returned NULL.").c_str());
+  }
+
+  int size = PySequence_Fast_GET_SIZE(seq);
+  if (size == 0) {
+    SWIG_exception_fail(SWIG_ValueError, tensorflow::strings::Printf(
+        "$symname: shapes list must be non-empty").c_str());
+  }
+
+  for (int i = 0; i < size; ++i) {
+    PyObject* item = PySequence_Fast_GET_ITEM(seq, i);
+    std::vector<int64_t> dims;
+    if (item != Py_None) {
+      PyObject* py_int_seq = PySequence_Fast(item, tensorflow::strings::Printf(
+            "$symname: expected list but got %s ",
+            Py_TYPE($input)->tp_name).c_str());
+      if (py_int_seq == nullptr) {
+        SWIG_exception_fail(SWIG_RuntimeError, tensorflow::strings::Printf(
+            "$symname: PySequence_Fast returned NULL.").c_str());
+      }
+      PyInt64ListToVector(py_int_seq, &dims);
+      Py_DECREF(py_int_seq);
+    }
+    shapes_local.push_back(dims);
+  }
+
+  Py_DECREF(seq);
+  $1 = &shapes_local;
+}
+
+%typemap(in) (const std::vector<int>& ranks)
+    (std::vector<int> ranks_local){
+  PyObject* seq = PySequence_Fast($input, tensorflow::strings::Printf(
+        "$symname: expected list but got %s ",
+        Py_TYPE($input)->tp_name).c_str());
+  if (seq == nullptr) {
+    SWIG_exception_fail(SWIG_RuntimeError, tensorflow::strings::Printf(
+        "$symname: PySequence_Fast returned NULL.").c_str());
+  }
+
+  int size = PySequence_Fast_GET_SIZE(seq);
+  if (size == 0) {
+    SWIG_exception_fail(SWIG_ValueError, tensorflow::strings::Printf(
+        "$symname: shapes list must be non-empty").c_str());
+  }
+
+  for (int i = 0; i < size; ++i) {
+    PyObject* item = PySequence_Fast_GET_ITEM(seq, i);
+    ranks_local.push_back((int) PyInt_AsLong(item));
+  }
+
+  Py_DECREF(seq);
+  $1 = &ranks_local;
+}
+
+%typemap(in) (const std::vector<TF_DataType>& types)
+    (std::vector<TF_DataType> types_local){
+  PyObject* seq = PySequence_Fast($input, tensorflow::strings::Printf(
+        "$symname: expected list but got %s ",
+        Py_TYPE($input)->tp_name).c_str());
+  if (seq == nullptr) {
+    SWIG_exception_fail(SWIG_RuntimeError, tensorflow::strings::Printf(
+        "$symname: PySequence_Fast returned NULL.").c_str());
+  }
+
+  int size = PySequence_Fast_GET_SIZE(seq);
+  if (size == 0) {
+    SWIG_exception_fail(SWIG_ValueError, tensorflow::strings::Printf(
+        "$symname: shapes list must be non-empty").c_str());
+  }
+
+  for (int i = 0; i < size; ++i) {
+    PyObject* item = PySequence_Fast_GET_ITEM(seq, i);
+    types_local.push_back((TF_DataType) PyInt_AsLong(item));
+  }
+
+  Py_DECREF(seq);
+  $1 = &types_local;
+}
+
 %include "tensorflow/python/client/tf_session_helper.h"
 
 %unignoreall
diff --git a/tensorflow/python/client/tf_session_helper.cc b/tensorflow/python/client/tf_session_helper.cc
index ad982e5dd8d4fc2b151ab5e246e8cff3b88304b6..361dbc22b097a9bc82f656d7416b88c4a3a1ec2d 100644
--- a/tensorflow/python/client/tf_session_helper.cc
+++ b/tensorflow/python/client/tf_session_helper.cc
@@ -18,11 +18,16 @@ limitations under the License.
 #include <cstring>
 
 #include "tensorflow/c/c_api.h"
+#include "tensorflow/c/c_api_internal.h"
 #include "tensorflow/c/tf_status_helper.h"
 #include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/framework/attr_value.pb.h"
+#include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/log_memory.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/graph/tensor_id.h"
 #include "tensorflow/core/lib/core/coding.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/equal_graph_def.h"
 #include "tensorflow/python/lib/core/ndarray_tensor.h"
@@ -299,6 +304,27 @@ string EqualGraphDefWrapper(const string& actual, const string& expected) {
   return EqualGraphDef(actual_def, expected_def, &diff) ? "" : diff;
 }
 
+string EqualAttrValueWrapper(const string& actual, const string& expected) {
+  AttrValue actual_attr_value;
+  if (!actual_attr_value.ParseFromString(actual)) {
+    return "actual is not a valid serialized AttrValue";
+  }
+
+  AttrValue expected_attr_value;
+  if (!expected_attr_value.ParseFromString(expected)) {
+    return "expected is not a valid serialized AttrValue";
+  }
+
+  string diff;
+  if (!AreAttrValuesEqual(actual_attr_value, expected_attr_value)) {
+    diff = strings::Printf(
+        "Actual AttrValue %s does not match Expected AttrValue %s.",
+        SummarizeAttrValue(actual_attr_value).c_str(),
+        SummarizeAttrValue(expected_attr_value).c_str());
+  }
+  return diff;
+}
+
 // Return value set to 6 inlined elements so it fits in a 64-byte cache line.
 tensorflow::gtl::InlinedVector<int64_t, 6> TF_GraphGetTensorShapeHelper(
     TF_Graph* graph, TF_Output output, TF_Status* out_status,
@@ -374,6 +400,19 @@ std::vector<TF_Operation*> TF_OperationGetControlInputs_wrapper(
   return control_inputs;
 }
 
+std::vector<const char*> TF_OperationOutputConsumers_wrapper(
+    TF_Output oper_out) {
+  int num_consumers = TF_OperationOutputNumConsumers(oper_out);
+  std::vector<TF_Input> consumers(num_consumers);
+  TF_OperationOutputConsumers(oper_out, consumers.data(), num_consumers);
+
+  std::vector<const char*> consumer_names(num_consumers);
+  for (int i = 0; i < num_consumers; ++i) {
+    consumer_names[i] = TF_OperationName(consumers[i].oper);
+  }
+  return consumer_names;
+}
+
 TF_Function* TF_GraphToFunction_wrapper(
     const TF_Graph* fn_body, const char* fn_name, bool append_hash_to_fn_name,
     const std::vector<TF_Operation*>* opers,
@@ -407,4 +446,51 @@ TF_Function* TF_GraphToFunction_wrapper(
                             opts, description, out_status);
 }
 
+void TF_GraphSetOutputHandleShapesAndTypes_wrapper(
+    TF_Graph* graph, TF_Output output,
+    const std::vector<std::vector<int64_t>>& shapes,
+    const std::vector<int>& ranks, const std::vector<TF_DataType>& types,
+    TF_Status* status) {
+  std::vector<const int64_t*> shapes_pointers(shapes.size());
+  for (int i = 0; i < shapes.size(); ++i) {
+    shapes_pointers[i] = ranks[i] <= 0 ? nullptr : &shapes[i][0];
+  }
+  TF_GraphSetOutputHandleShapesAndTypes(graph, output, shapes.size(),
+                                        shapes_pointers.data(), ranks.data(),
+                                        types.data(), status);
+}
+
+void TF_GraphSetTensorShape_wrapper(TF_Graph* graph, TF_Output output,
+                                    const std::vector<int64_t>& dims,
+                                    bool unknown_shape, TF_Status* status) {
+  if (unknown_shape) {
+    TF_GraphSetTensorShape(graph, output, nullptr, -1, status);
+    return;
+  }
+  TF_GraphSetTensorShape(graph, output, dims.data(), dims.size(), status);
+}
+
+std::vector<int64_t> TF_GraphGetTensorShape_wrapper(TF_Graph* graph,
+                                                    TF_Output output,
+                                                    int num_dims,
+                                                    TF_Status* status) {
+  std::vector<int64_t> dims(num_dims);
+  TF_GraphGetTensorShape(graph, output, dims.data(), num_dims, status);
+  return dims;
+}
+
+std::vector<string> TF_ImportGraphDefResultsMissingUnusedInputMappings_wrapper(
+    TF_ImportGraphDefResults* results) {
+  int num_missing_unused_input_mappings;
+  const char** src_names;
+  int* src_indexes;
+  TF_ImportGraphDefResultsMissingUnusedInputMappings(
+      results, &num_missing_unused_input_mappings, &src_names, &src_indexes);
+  std::vector<string> input_strs(num_missing_unused_input_mappings);
+  for (int i = 0; i < num_missing_unused_input_mappings; ++i) {
+    input_strs[i] = TensorId(src_names[i], src_indexes[i]).ToString();
+  }
+  return input_strs;
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/python/client/tf_session_helper.h b/tensorflow/python/client/tf_session_helper.h
index 6ed08d3a5818163c03f2bb52157b8371312aba18..29d5b28f40a7c07c199eec8c8cd85de626f6b068 100644
--- a/tensorflow/python/client/tf_session_helper.h
+++ b/tensorflow/python/client/tf_session_helper.h
@@ -97,6 +97,13 @@ void TF_Reset_wrapper(const TF_SessionOptions* opt,
 // for no difference.
 string EqualGraphDefWrapper(const string& actual, const string& expected);
 
+// Convenience wrapper around AreAttrValuesEqual to make it easier to wrap.
+// The actual and expected strings must correspond to a serialized binary
+// representation of two AttrValue proto instances.
+// Returns an explanation if a difference is found, or the empty string
+// for no difference.
+string EqualAttrValueWrapper(const string& actual, const string& expected);
+
 // Gets shape from C API Graph object.
 //
 // If shape is known, returns shape vector where -1 means "unknown
@@ -160,6 +167,11 @@ std::vector<TF_Output> GetOperationInputs(TF_Operation* oper);
 std::vector<TF_Operation*> TF_OperationGetControlInputs_wrapper(
     TF_Operation* oper);
 
+// Retrieves the op names of the consumers of `oper_out`. The returned strings
+// have the lifetime of the underlying TF_Graph.
+std::vector<const char*> TF_OperationOutputConsumers_wrapper(
+    TF_Output oper_out);
+
 // `opers` equaling NULL are converted to `nopers = -1`.
 // `output_names` must be empty or have the same length as `outputs`.
 TF_Function* TF_GraphToFunction_wrapper(
@@ -168,6 +180,39 @@ TF_Function* TF_GraphToFunction_wrapper(
     const std::vector<TF_Output>& inputs, const std::vector<TF_Output>& outputs,
     const NameVector& output_names, const TF_FunctionOptions* opts,
     const char* description, TF_Status* out_status);
+
+// Set the shapes and types for the output's handle.
+//
+// The sizes of 'shapes', 'ranks', and 'types' must be equal; `shapes[i]`
+// contains the shape of the handle's i-th value, `ranks[i]` contains the i-th
+// shape's rank, and `types[i]` contains the i-th value's dtype. If the i-th
+// shape is unknown, then `ranks[i]` must be equal to -1.
+//
+// The space between the double angle brackets below looks extraneous, but
+// our version of SWIG cannot parse ">>".
+void TF_GraphSetOutputHandleShapesAndTypes_wrapper(
+    TF_Graph* graph, TF_Output output,
+    const std::vector<std::vector<int64_t> >& shapes,
+    const std::vector<int>& ranks, const std::vector<TF_DataType>& types,
+    TF_Status* status);
+
+// Set the shape of output. If unknown is true, `num_dims` must be set to
+// -1 and `dims` is set to nullptr.
+void TF_GraphSetTensorShape_wrapper(TF_Graph* graph, TF_Output output,
+                                    const std::vector<int64_t>& dims,
+                                    bool unknown_shape, TF_Status* status);
+
+// Return the shape of output. `num_dims` should be the output of
+// TF_GraphGetTensorNumDims. If `num_dims = -1`, this should not be called.
+std::vector<int64_t> TF_GraphGetTensorShape_wrapper(TF_Graph* graph,
+                                                    TF_Output output,
+                                                    int num_dims,
+                                                    TF_Status* status);
+
+// Returns the string representations of the missing unused input mappings.
+std::vector<string> TF_ImportGraphDefResultsMissingUnusedInputMappings_wrapper(
+    TF_ImportGraphDefResults* results);
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_PYTHON_CLIENT_TF_SESSION_HELPER_H_
diff --git a/tensorflow/python/client/virtual_gpu_test.py b/tensorflow/python/client/virtual_gpu_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..addf63474c9ba213cf0c1eeffa9d31e94f15eac1
--- /dev/null
+++ b/tensorflow/python/client/virtual_gpu_test.py
@@ -0,0 +1,245 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for multiple virtual GPU support."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import random
+
+import numpy as np
+
+from google.protobuf import text_format
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+from tensorflow.python.platform import tf_logging as logging
+
+
+class VirtualGpuTestUtil(object):
+
+  def __init__(self,
+               dim=1000,
+               num_ops=100,
+               virtual_devices_per_gpu=None,
+               device_probabilities=None):
+    self._dim = dim
+    self._num_ops = num_ops
+    if virtual_devices_per_gpu is None:
+      self._virtual_devices_per_gpu = [3]
+    else:
+      self._virtual_devices_per_gpu = virtual_devices_per_gpu
+    self._visible_device_list = [
+        i for i in range(len(self._virtual_devices_per_gpu))
+    ]
+    gpu_devices = [
+        ('/gpu:' + str(i)) for i in range(sum(self._virtual_devices_per_gpu))
+    ]
+    self.devices = ['/cpu:0'] + gpu_devices
+    self._num_devices = len(self.devices)
+    # Each virtual device gets 2GB memory.
+    self._mem_limits_mb = [
+        ([1 << 11] * i) for i in self._virtual_devices_per_gpu
+    ]
+    self.config = self._GetSessionConfig()
+
+    if device_probabilities is not None:
+      self._device_probabilities = list(device_probabilities)  # Deep copy
+      for i in range(1, self._num_devices):
+        self._device_probabilities[i] += self._device_probabilities[i - 1]
+    else:
+      # Each device gets same probability to be assigned an operation.
+      step = 1.0 / self._num_devices
+      self._device_probabilities = [
+          (x + 1) * step for x in range(self._num_devices)
+      ]
+    # To prevent rounding error causing problems.
+    self._device_probabilities[self._num_devices - 1] = 1.1
+
+    logging.info('dim: %d', self._dim)
+    logging.info('num_ops: %d', self._num_ops)
+    logging.info('visible_device_list: %s', str(self._visible_device_list))
+    logging.info('virtual_devices_per_gpu: %s',
+                 str(self._virtual_devices_per_gpu))
+    logging.info('mem_limits: %s', str(self._mem_limits_mb))
+    logging.info('devices: %s', str(self.devices))
+    logging.info('config: %s', text_format.MessageToString(self.config))
+    logging.info('device_probabilities: %s', str(self._device_probabilities))
+
+  # Creates virtual GPU devices
+  def _GetSessionConfig(self):
+    virtual_device_gpu_options = config_pb2.GPUOptions(
+        visible_device_list=','.join(str(d) for d in self._visible_device_list),
+        experimental=config_pb2.GPUOptions.Experimental(virtual_devices=[
+            config_pb2.GPUOptions.Experimental.VirtualDevices(
+                memory_limit_mb=i) for i in self._mem_limits_mb
+        ]))
+    return config_pb2.ConfigProto(gpu_options=virtual_device_gpu_options)
+
+  # Generates a list of 3-tuples, each tuple contains the source and destination
+  # device index for a binary operation like 'add', like:
+  # (src_devcie_1, src_device_2, dst_device)
+  def _GenerateOperationPlacement(self):
+    result = []
+    for unused_i in range(self._num_ops):
+      op_device = ()
+      for unused_j in range(3):
+        random_num = random.random()
+        for device_index in range(self._num_devices):
+          if self._device_probabilities[device_index] > random_num:
+            op_device += (device_index,)
+            break
+      result.append(op_device)
+    return result
+
+  # Logs part of the matrix for debugging purposes.
+  def _LogMatrix(self, mat, dim):
+    logging.info('---- printing the first 10*10 submatrix ----')
+    for i in range(min(10, dim)):
+      row = ''
+      for j in range(min(10, dim)):
+        row += ' ' + str(mat[i][j])
+      logging.info(row)
+
+  # Runs a list of 'add' operations where each operation satisfies the device
+  # placement constraints in `op_placement`, and returns the result.
+  def _TestRandomGraphWithDevices(self,
+                                  sess,
+                                  seed,
+                                  op_placement,
+                                  devices,
+                                  debug_mode=False):
+    data = []
+    shape = (self._dim, self._dim)
+    feed_dict = {}
+    # Initialize the matrices
+    for i in range(len(devices)):
+      with ops.device(devices[i]):
+        var = array_ops.placeholder(dtypes.float32, shape=shape)
+        np.random.seed(seed + i)
+        feed_dict[var] = np.random.uniform(
+            low=0, high=0.1, size=shape).astype(np.float32)
+        data.append(var)
+    # Run the 'add' operations on those matrices
+    for op in op_placement:
+      with ops.device(devices[op[2]]):
+        data[op[2]] = math_ops.add(data[op[0]], data[op[1]])
+    with ops.device('/cpu:0'):
+      s = data[0]
+      for i in range(1, len(data)):
+        s = math_ops.add(s, data[i])
+    if debug_mode:
+      logging.info(ops.get_default_graph().as_graph_def())
+    result = sess.run(s, feed_dict=feed_dict)
+    self._LogMatrix(result, self._dim)
+    return result
+
+  # Generates a random graph with `self._num_ops` 'add' operations with each
+  # operation placed on different virtual device, test that the result is
+  # identical to the result obtained by running the same graph on cpu only.
+  def TestRandomGraph(self, sess, op_placement=None, random_seed=None):
+    debug_mode = False
+    if op_placement is None:
+      op_placement = self._GenerateOperationPlacement()
+    else:
+      debug_mode = True
+    if random_seed is None:
+      random_seed = random.randint(0, 1 << 31)
+    else:
+      debug_mode = True
+    logging.info('Virtual gpu functional test for random graph...')
+    logging.info('operation placement: %s', str(op_placement))
+    logging.info('random seed: %d', random_seed)
+
+    # Run with multiple virtual gpus.
+    result_vgd = self._TestRandomGraphWithDevices(
+        sess, random_seed, op_placement, self.devices, debug_mode=debug_mode)
+    # Run with single cpu.
+    result_cpu = self._TestRandomGraphWithDevices(
+        sess,
+        random_seed,
+        op_placement, ['/cpu:0'] * self._num_devices,
+        debug_mode=debug_mode)
+    # Test the result
+    for i in range(self._dim):
+      for j in range(self._dim):
+        if result_vgd[i][j] != result_cpu[i][j]:
+          logging.error(
+              'Result mismatch at row %d column %d: expected %f, actual %f', i,
+              j, result_cpu[i][j], result_vgd[i][j])
+          logging.error('Devices: %s', self.devices)
+          logging.error('Memory limits (in MB): %s', self._mem_limits_mb)
+          return False
+    return True
+
+
+@test_util.with_c_api
+class VirtualGpuTest(test_util.TensorFlowTestCase):
+
+  def __init__(self, method_name):
+    super(VirtualGpuTest, self).__init__(method_name)
+    self._util = VirtualGpuTestUtil()
+
+  def testStatsContainAllDeviceNames(self):
+    with self.test_session(config=self._util.config) as sess:
+      # TODO(laigd): b/70811538. The is_gpu_available() call will invoke
+      # DeviceFactory::AddDevices() with a default SessionOption, which prevents
+      # adding virtual devices in the future, thus must be called within a
+      # context of a session within which virtual devices are created. Same in
+      # the following test case.
+      if not test.is_gpu_available(cuda_only=True):
+        self.skipTest('No GPU available')
+      run_options = config_pb2.RunOptions(
+          trace_level=config_pb2.RunOptions.FULL_TRACE)
+      run_metadata = config_pb2.RunMetadata()
+
+      mat_shape = [10, 10]
+      data = []
+      for d in self._util.devices:
+        with ops.device(d):
+          var = variables.Variable(random_ops.random_uniform(mat_shape))
+          sess.run(var.initializer)
+          data.append(var)
+      s = data[0]
+      for i in range(1, len(data)):
+        s = math_ops.add(s, data[i])
+      sess.run(s, options=run_options, run_metadata=run_metadata)
+
+    self.assertTrue(run_metadata.HasField('step_stats'))
+    step_stats = run_metadata.step_stats
+    devices = [d.device for d in step_stats.dev_stats]
+    self.assertTrue('/job:localhost/replica:0/task:0/device:CPU:0' in devices)
+    self.assertTrue('/job:localhost/replica:0/task:0/device:GPU:0' in devices)
+    self.assertTrue('/job:localhost/replica:0/task:0/device:GPU:1' in devices)
+    self.assertTrue('/job:localhost/replica:0/task:0/device:GPU:2' in devices)
+
+  def testLargeRandomGraph(self):
+    with self.test_session(config=self._util.config) as sess:
+      if not test.is_gpu_available(cuda_only=True):
+        self.skipTest('No GPU available')
+      for _ in range(10):
+        if not self._util.TestRandomGraph(sess):
+          return
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
new file mode 100644
index 0000000000000000000000000000000000000000..5fb389cf92818c7a464cf4a4479d86377185d5cf
--- /dev/null
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -0,0 +1,378 @@
+# Tests of TensorFlow kernels written using the Python API.
+
+package(
+    default_visibility = ["//tensorflow:internal"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+load("//tensorflow:tensorflow.bzl", "tf_py_test")
+load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+load("//tensorflow:tensorflow.bzl", "sycl_py_test")
+
+tf_py_test(
+    name = "batch_dataset_op_test",
+    size = "small",
+    srcs = ["batch_dataset_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:string_ops",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+tf_py_test(
+    name = "dataset_constructor_op_test",
+    size = "small",
+    srcs = ["dataset_constructor_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:random_ops",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:session",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+    tags = [
+        "manual",
+        "nomac",  # b/62040583
+    ],
+)
+
+tf_py_test(
+    name = "dataset_from_generator_op_test",
+    size = "small",
+    srcs = ["dataset_from_generator_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
+tf_py_test(
+    name = "filter_dataset_op_test",
+    size = "small",
+    srcs = ["filter_dataset_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:functional_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+tf_py_test(
+    name = "flat_map_dataset_op_test",
+    size = "small",
+    srcs = ["flat_map_dataset_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:session",
+        "//tensorflow/python:sparse_ops",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:training",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+    grpc_enabled = True,
+)
+
+tf_py_test(
+    name = "list_files_dataset_op_test",
+    size = "small",
+    srcs = ["list_files_dataset_op_test.py"],
+    additional_deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+tf_py_test(
+    name = "interleave_dataset_op_test",
+    size = "small",
+    srcs = ["interleave_dataset_op_test.py"],
+    additional_deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:session",
+        "//tensorflow/python:sparse_ops",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:training",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+tf_py_test(
+    name = "map_dataset_op_test",
+    size = "small",
+    srcs = ["map_dataset_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:data_flow_ops",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:functional_ops",
+        "//tensorflow/python:lookup_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:random_ops",
+        "//tensorflow/python:script_ops",
+        "//tensorflow/python:sparse_ops",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:string_ops",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+tf_py_test(
+    name = "prefetch_dataset_op_test",
+    size = "small",
+    srcs = ["prefetch_dataset_op_test.py"],
+    additional_deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+tf_py_test(
+    name = "range_dataset_op_test",
+    size = "small",
+    srcs = ["range_dataset_op_test.py"],
+    additional_deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:io_ops",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:iterator_ops",
+    ],
+)
+
+tf_py_test(
+    name = "reader_dataset_ops_test",
+    size = "small",
+    srcs = ["reader_dataset_ops_test.py"],
+    additional_deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:io_ops",
+        "//tensorflow/python:lib",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/ops:iterator_ops",
+        "//tensorflow/python/data/ops:readers",
+    ],
+)
+
+tf_py_test(
+    name = "sequence_dataset_op_test",
+    size = "small",
+    srcs = ["sequence_dataset_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+tf_py_test(
+    name = "shuffle_dataset_op_test",
+    size = "small",
+    srcs = ["shuffle_dataset_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:iterator_ops",
+    ],
+)
+
+tf_py_test(
+    name = "shard_dataset_op_test",
+    size = "small",
+    srcs = ["shard_dataset_op_test.py"],
+    additional_deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+tf_py_test(
+    name = "cache_dataset_op_test",
+    size = "small",
+    srcs = ["cache_dataset_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:iterator_ops",
+    ],
+)
+
+tf_py_test(
+    name = "zip_dataset_op_test",
+    size = "small",
+    srcs = ["zip_dataset_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+tf_py_test(
+    name = "concatenate_dataset_op_test",
+    size = "small",
+    srcs = ["concatenate_dataset_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+    ],
+)
+
+tf_py_test(
+    name = "iterator_ops_test",
+    size = "small",
+    srcs = ["iterator_ops_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python/data/ops:readers",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:iterator_ops",
+        "//tensorflow/python/data/util:sparse",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:function",
+        "//tensorflow/python:functional_ops",
+        "//tensorflow/python:gradients",
+        "//tensorflow/python:io_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:random_ops",
+        "//tensorflow/python:script_ops",
+        "//tensorflow/python:session",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python:training",
+    ],
+    grpc_enabled = True,
+)
+
+tf_py_test(
+    name = "iterator_ops_cluster_test",
+    size = "small",
+    srcs = ["iterator_ops_cluster_test.py"],
+    additional_deps = [
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:function",
+        "//tensorflow/python:functional_ops",
+        "//tensorflow/python:session",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:iterator_ops",
+    ],
+    grpc_enabled = True,
+    tags = [
+        "no_oss",  # Test flaky due to port collisions.
+        "no_windows",
+    ],
+)
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
diff --git a/tensorflow/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/python/data/kernel_tests/batch_dataset_op_test.py
similarity index 94%
rename from tensorflow/python/kernel_tests/batch_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/batch_dataset_op_test.py
index 0546218601d6d1eac3658f86b58af43c2d5a5f04..53c8be1d1dc8b2f23b4faef7d64350edffede34a 100644
--- a/tensorflow/python/kernel_tests/batch_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/batch_dataset_op_test.py
@@ -187,6 +187,26 @@ class BatchDatasetTest(test.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
+  def testBatchShapeError(self):
+    def generator():
+      yield [1.0, 2.0, 3.0]
+      yield [4.0, 5.0, 6.0]
+      yield [7.0, 8.0, 9.0, 10.0]
+
+    iterator = (dataset_ops.Dataset.from_generator(generator, dtypes.float32,
+                                                   output_shapes=[None])
+                .batch(3)
+                .make_initializable_iterator())
+    next_element = iterator.get_next()
+
+    with self.test_session() as sess:
+      sess.run(iterator.initializer)
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r"Cannot batch tensors with different shapes in component 0. "
+          r"First element had shape \[3\] and element 2 had shape \[4\]."):
+        sess.run(next_element)
+
   def testPaddedBatchDataset(self):
     seq_lens = array_ops.placeholder(dtypes.int32, shape=[None])
     padded_shape = array_ops.placeholder(dtypes.int64, shape=[1])
diff --git a/tensorflow/python/kernel_tests/cache_dataset_op_test.py b/tensorflow/python/data/kernel_tests/cache_dataset_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/cache_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/cache_dataset_op_test.py
diff --git a/tensorflow/python/kernel_tests/concatenate_dataset_op_test.py b/tensorflow/python/data/kernel_tests/concatenate_dataset_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/concatenate_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/concatenate_dataset_op_test.py
diff --git a/tensorflow/python/kernel_tests/dataset_constructor_op_test.py b/tensorflow/python/data/kernel_tests/dataset_constructor_op_test.py
similarity index 77%
rename from tensorflow/python/kernel_tests/dataset_constructor_op_test.py
rename to tensorflow/python/data/kernel_tests/dataset_constructor_op_test.py
index 9e2a62055051d7cea269fcbcdb697a895b33d821..14627810b57f68fd96e3e3cc7b51b4fbf7365299 100644
--- a/tensorflow/python/kernel_tests/dataset_constructor_op_test.py
+++ b/tensorflow/python/data/kernel_tests/dataset_constructor_op_test.py
@@ -17,6 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import time
+
 import numpy as np
 
 from tensorflow.core.protobuf import config_pb2
@@ -37,7 +39,7 @@ from tensorflow.python.platform import test
 class DatasetConstructorTest(test.TestCase):
 
   def testFromTensors(self):
-    """Test an dataset that represents a single tuple of tensors."""
+    """Test a dataset that represents a single tuple of tensors."""
     components = (np.array(1), np.array([1, 2, 3]), np.array(37.0))
 
     iterator = (dataset_ops.Dataset.from_tensors(components)
@@ -62,7 +64,7 @@ class DatasetConstructorTest(test.TestCase):
     self.assertAllEqual(a.dense_shape, b.dense_shape)
 
   def testFromTensorsSparse(self):
-    """Test an dataset that represents a single tuple of tensors."""
+    """Test a dataset that represents a single tuple of tensors."""
     components = (sparse_tensor.SparseTensorValue(
         indices=np.array([[0]]),
         values=np.array([0]),
@@ -125,7 +127,7 @@ class DatasetConstructorTest(test.TestCase):
         sess.run(get_next)
 
   def testFromTensorSlices(self):
-    """Test an dataset that represents the slices from a tuple of tensors."""
+    """Test a dataset that represents the slices from a tuple of tensors."""
     components = (
         np.tile(np.array([[1], [2], [3], [4]]), 20), np.tile(
             np.array([[12], [13], [14], [15]]), 22),
@@ -150,7 +152,7 @@ class DatasetConstructorTest(test.TestCase):
         sess.run(get_next)
 
   def testFromTensorSlicesSparse(self):
-    """Test an dataset that represents the slices from a tuple of tensors."""
+    """Test a dataset that represents the slices from a tuple of tensors."""
     components = (sparse_tensor.SparseTensorValue(
         indices=np.array([[0, 0], [1, 0], [2, 0]]),
         values=np.array([0, 0, 0]),
@@ -206,7 +208,7 @@ class DatasetConstructorTest(test.TestCase):
         sess.run(get_next)
 
   def testFromTensorSlicesMixed(self):
-    """Test an dataset that represents the slices from a tuple of tensors."""
+    """Test a dataset that represents the slices from a tuple of tensors."""
     components = (np.tile(np.array([[1], [2], [3]]), 20),
                   np.tile(np.array([[12], [13], [14]]), 22),
                   np.array([37.0, 38.0, 39.0]),
@@ -484,11 +486,168 @@ class DatasetConstructorTest(test.TestCase):
       sess.run(var_1.initializer)
 
       iterator = dataset.make_initializable_iterator()
+      sess.run(iterator.initializer)
 
       with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          "Trying to access resource located in device"):
-        sess.run(iterator.initializer)
+          errors.FailedPreconditionError,
+          "Error while reading resource variable Variable"):
+        sess.run(iterator.get_next())
+
+
+class DatasetConstructorBenchmark(test.Benchmark):
+
+  def benchmarkSliceRepeatBatch(self):
+    input_size = 10000
+    batch_size = 100
+    num_epochs = 100
+
+    input_data = np.random.randn(input_size)
+
+    dataset = (
+        dataset_ops.Dataset.from_tensor_slices(input_data)
+        .repeat(num_epochs + 1).batch(batch_size))
+    iterator = dataset.make_initializable_iterator()
+    next_element = iterator.get_next()
+
+    with session.Session() as sess:
+      sess.run(iterator.initializer)
+      # Run one whole epoch to burn in the computation.
+      for _ in range(input_size // batch_size):
+        sess.run(next_element)
+      deltas = []
+      try:
+        while True:
+          start = time.time()
+          sess.run(next_element)
+          deltas.append(time.time() - start)
+      except errors.OutOfRangeError:
+        pass
+
+    median_wall_time = np.median(deltas)
+    print("Slice/repeat/batch with sess.run() input size: %d batch size: %d "
+          "Median wall time per element: %f" % (input_size, batch_size,
+                                                median_wall_time))
+    self.report_benchmark(
+        iters=len(deltas),
+        wall_time=median_wall_time,
+        name="benchmark_slice_repeat_batch_input_%d_batch_%d" % (input_size,
+                                                                 batch_size))
+
+  def benchmarkSliceRepeatBatchCallable(self):
+    input_size = 10000
+    batch_size = 100
+    num_epochs = 100
+
+    input_data = np.random.randn(input_size)
+
+    dataset = (
+        dataset_ops.Dataset.from_tensor_slices(input_data)
+        .repeat(num_epochs + 1).batch(batch_size))
+    iterator = dataset.make_initializable_iterator()
+    next_element = iterator.get_next()
+
+    with session.Session() as sess:
+      sess.run(iterator.initializer)
+      get_next_element = sess.make_callable(next_element)
+      # Run one whole epoch to burn in the computation.
+      for _ in range(input_size // batch_size):
+        get_next_element()
+      deltas = []
+      try:
+        while True:
+          start = time.time()
+          get_next_element()
+          deltas.append(time.time() - start)
+      except errors.OutOfRangeError:
+        pass
+
+    median_wall_time = np.median(deltas)
+    print(
+        "Slice/repeat/batch with callable input size: %d batch size: %d Median"
+        " wall time per element: %f" % (input_size, batch_size,
+                                        median_wall_time))
+    self.report_benchmark(
+        iters=len(deltas),
+        wall_time=median_wall_time,
+        name="benchmark_slice_repeat_batch_callable_input_%d_batch_%d" %
+        (input_size, batch_size))
+
+  def benchmarkReshapeSliceRepeatCallable(self):
+    input_size = 10000
+    batch_size = 100
+    num_epochs = 100
+
+    input_data = np.random.randn(input_size)
+
+    dataset = (
+        dataset_ops.Dataset.from_tensor_slices(input_data.reshape(100, 100))
+        .repeat(num_epochs + 1))
+    iterator = dataset.make_initializable_iterator()
+    next_element = iterator.get_next()
+
+    with session.Session() as sess:
+      sess.run(iterator.initializer)
+      get_next_element = sess.make_callable(next_element)
+      # Run one whole epoch to burn in the computation.
+      for _ in range(input_size // batch_size):
+        get_next_element()
+      deltas = []
+      try:
+        while True:
+          start = time.time()
+          get_next_element()
+          deltas.append(time.time() - start)
+      except errors.OutOfRangeError:
+        pass
+
+    median_wall_time = np.median(deltas)
+    print("Reshape/slice/repeat with callable input size: %d batch size: %d "
+          "Median wall time per element: %f" % (input_size, batch_size,
+                                                median_wall_time))
+    self.report_benchmark(
+        iters=len(deltas),
+        wall_time=median_wall_time,
+        name="benchmark_reshape_slice_repeat_callable_input_%d_batch_%d" %
+        (input_size, batch_size))
+
+  def benchmarkSliceBatchCacheRepeatCallable(self):
+    input_size = 10000
+    batch_size = 100
+    num_epochs = 100
+
+    input_data = np.random.randn(input_size)
+
+    dataset = (
+        dataset_ops.Dataset.from_tensor_slices(input_data).batch(batch_size)
+        .cache().repeat(num_epochs + 1))
+    iterator = dataset.make_initializable_iterator()
+    next_element = iterator.get_next()
+
+    with session.Session() as sess:
+      sess.run(iterator.initializer)
+      get_next_element = sess.make_callable(next_element)
+      # Run one whole epoch to burn in the computation.
+      for _ in range(input_size // batch_size):
+        get_next_element()
+      deltas = []
+      try:
+        while True:
+          start = time.time()
+          get_next_element()
+          deltas.append(time.time() - start)
+      except errors.OutOfRangeError:
+        pass
+
+    median_wall_time = np.median(deltas)
+    print(
+        "Slice/batch/cache/repeat with callable input size: %d batch size: %d "
+        "Median wall time per element: %f"
+        % (input_size, batch_size, median_wall_time))
+    self.report_benchmark(
+        iters=len(deltas),
+        wall_time=median_wall_time,
+        name="benchmark_slice_batch_cache_repeat_callable_input_%d_batch_%d" %
+        (input_size, batch_size))
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/kernel_tests/dataset_from_generator_op_test.py b/tensorflow/python/data/kernel_tests/dataset_from_generator_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/dataset_from_generator_op_test.py
rename to tensorflow/python/data/kernel_tests/dataset_from_generator_op_test.py
diff --git a/tensorflow/python/kernel_tests/filter_dataset_op_test.py b/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/filter_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/filter_dataset_op_test.py
diff --git a/tensorflow/python/kernel_tests/flat_map_dataset_op_test.py b/tensorflow/python/data/kernel_tests/flat_map_dataset_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/flat_map_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/flat_map_dataset_op_test.py
diff --git a/tensorflow/python/kernel_tests/interleave_dataset_op_test.py b/tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/interleave_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py
diff --git a/tensorflow/python/kernel_tests/iterator_ops_cluster_test.py b/tensorflow/python/data/kernel_tests/iterator_ops_cluster_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/iterator_ops_cluster_test.py
rename to tensorflow/python/data/kernel_tests/iterator_ops_cluster_test.py
diff --git a/tensorflow/python/kernel_tests/iterator_ops_test.py b/tensorflow/python/data/kernel_tests/iterator_ops_test.py
similarity index 98%
rename from tensorflow/python/kernel_tests/iterator_ops_test.py
rename to tensorflow/python/data/kernel_tests/iterator_ops_test.py
index 513c36d64fa3e8aa00410b7fd06fa2e061aec4c5..23c6d7385f8d4a12019fa514f349f2598d9629de 100644
--- a/tensorflow/python/kernel_tests/iterator_ops_test.py
+++ b/tensorflow/python/data/kernel_tests/iterator_ops_test.py
@@ -18,6 +18,8 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+import warnings
+
 import numpy as np
 
 from tensorflow.core.protobuf import config_pb2
@@ -633,6 +635,18 @@ class IteratorTest(test.TestCase):
         with self.assertRaises(errors.InvalidArgumentError):
           sess.run(restore_op)
 
+  def testRepeatedGetNextWarning(self):
+    iterator = dataset_ops.Dataset.range(10).make_one_shot_iterator()
+    warnings.simplefilter("always")
+    with warnings.catch_warnings(record=True) as w:
+      for _ in range(100):
+        iterator.get_next()
+    self.assertEqual(100 - iterator_ops.GET_NEXT_CALL_WARNING_THRESHOLD,
+                     len(w))
+    for warning in w:
+      self.assertTrue(
+          iterator_ops.GET_NEXT_CALL_WARNING_MESSAGE in str(warning.message))
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/list_files_dataset_op_test.py b/tensorflow/python/data/kernel_tests/list_files_dataset_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/list_files_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/list_files_dataset_op_test.py
diff --git a/tensorflow/python/kernel_tests/map_dataset_op_test.py b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
similarity index 99%
rename from tensorflow/python/kernel_tests/map_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/map_dataset_op_test.py
index ad6bbc043db9e44ec7893cd9ae29898a8c7fedaa..04d1abdb254feea1df6f1b8cfc5a512802107224 100644
--- a/tensorflow/python/kernel_tests/map_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
@@ -361,11 +361,12 @@ class MapDatasetTest(test.TestCase):
                 .map(lambda _: counter_var.assign_add(1))
                 .make_initializable_iterator())
     init_op = iterator.initializer
+    get_next = iterator.get_next()
 
     with self.test_session() as sess:
-      with self.assertRaisesRegexp(errors.FailedPreconditionError,
-                                   "Failed to capture resource"):
-        sess.run(init_op)
+      sess.run(init_op)
+      with self.assertRaises(errors.NotFoundError):
+        sess.run(get_next)
 
   def testSeededStatefulOperatorIsProperlyStateful(self):
     iterator = (dataset_ops.Dataset.from_tensors(0).repeat(10)
diff --git a/tensorflow/python/kernel_tests/prefetch_dataset_op_test.py b/tensorflow/python/data/kernel_tests/prefetch_dataset_op_test.py
similarity index 94%
rename from tensorflow/python/kernel_tests/prefetch_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/prefetch_dataset_op_test.py
index edea9c9027e72db33074adc31af71dc74e578f3b..646324cb95df6fc1fa0a901ebdccc8d4ef74a66c 100644
--- a/tensorflow/python/kernel_tests/prefetch_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/prefetch_dataset_op_test.py
@@ -25,10 +25,11 @@ from tensorflow.python.platform import test
 
 
 class PrefetchDatasetTest(test.TestCase):
+
   def testBufferSize(self):
     buffer_size = array_ops.placeholder(dtypes.int64, shape=[])
     iterator = dataset_ops.Dataset.range(10).prefetch(
-      buffer_size=buffer_size).make_initializable_iterator()
+        buffer_size=buffer_size).make_initializable_iterator()
     init_op = iterator.initializer
     get_next = iterator.get_next()
 
@@ -42,7 +43,7 @@ class PrefetchDatasetTest(test.TestCase):
   def testInvalidBufferSize(self):
     buffer_size = array_ops.placeholder(dtypes.int64, shape=[])
     iterator = dataset_ops.Dataset.range(10).prefetch(
-      buffer_size=buffer_size).make_initializable_iterator()
+        buffer_size=buffer_size).make_initializable_iterator()
     init_op = iterator.initializer
 
     with self.assertRaisesRegexp(errors.InvalidArgumentError, "buffer_size"):
diff --git a/tensorflow/python/kernel_tests/range_dataset_op_test.py b/tensorflow/python/data/kernel_tests/range_dataset_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/range_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/range_dataset_op_test.py
diff --git a/tensorflow/python/kernel_tests/reader_dataset_ops_test.py b/tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py
similarity index 97%
rename from tensorflow/python/kernel_tests/reader_dataset_ops_test.py
rename to tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py
index c8e7333b4b9949b6b6ef5f7f6d63e5ff8c354c37..d7140088c310767d40bd2cf3413c899375acab15 100644
--- a/tensorflow/python/kernel_tests/reader_dataset_ops_test.py
+++ b/tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py
@@ -272,6 +272,24 @@ class FixedLengthRecordReaderTest(test.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(iterator.get_next())
 
+  def testFixedLengthRecordDatasetWrongSize(self):
+    test_filenames = self._createFiles()
+    dataset = readers.FixedLengthRecordDataset(
+        test_filenames,
+        self._record_bytes + 1,  # Incorrect record length.
+        self._header_bytes,
+        self._footer_bytes,
+        buffer_size=10)
+    iterator = dataset.make_one_shot_iterator()
+
+    with self.test_session() as sess:
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r"Excluding the header \(5 bytes\) and footer \(2 bytes\), input "
+          r"file \".*fixed_length_record.0.txt\" has body length 21 bytes, "
+          r"which is not an exact multiple of the record length \(4 bytes\)."):
+        sess.run(iterator.get_next())
+
   def _iterator_checkpoint_path(self):
     return os.path.join(self.get_temp_dir(), "iterator")
 
diff --git a/tensorflow/python/kernel_tests/sequence_dataset_op_test.py b/tensorflow/python/data/kernel_tests/sequence_dataset_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/sequence_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/sequence_dataset_op_test.py
diff --git a/tensorflow/python/kernel_tests/shard_dataset_op_test.py b/tensorflow/python/data/kernel_tests/shard_dataset_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/shard_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/shard_dataset_op_test.py
diff --git a/tensorflow/python/kernel_tests/shuffle_dataset_op_test.py b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/shuffle_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
diff --git a/tensorflow/python/kernel_tests/zip_dataset_op_test.py b/tensorflow/python/data/kernel_tests/zip_dataset_op_test.py
similarity index 100%
rename from tensorflow/python/kernel_tests/zip_dataset_op_test.py
rename to tensorflow/python/data/kernel_tests/zip_dataset_op_test.py
diff --git a/tensorflow/python/data/ops/BUILD b/tensorflow/python/data/ops/BUILD
index 05acfe4de7855f398d4e14f7478f5909f3e20431..f12b358a7dc35c18338171e489fa88ba1a82d11b 100644
--- a/tensorflow/python/data/ops/BUILD
+++ b/tensorflow/python/data/ops/BUILD
@@ -21,6 +21,7 @@ py_library(
         "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:tensor_shape",
         "//tensorflow/python:tensor_util",
+        "//tensorflow/python:util",
         "//tensorflow/python/data/util:nest",
         "//tensorflow/python/data/util:sparse",
         "//third_party/py/numpy",
@@ -33,11 +34,11 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":dataset_ops",
-        "//tensorflow/python:constant_op",
         "//tensorflow/python:dataset_ops_gen",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/util:convert",
     ],
 )
 
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index dbe29c087a40b8797013067e63df3e6ad0a08889..0594c6d6a7325ae0952f012e0d543e5c80edb529 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -40,7 +40,7 @@ from tensorflow.python.ops import gen_dataset_ops
 from tensorflow.python.ops import gen_io_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import script_ops
-from tensorflow.python.ops import sparse_ops
+from tensorflow.python.util import deprecation
 
 
 class Dataset(object):
@@ -219,6 +219,7 @@ class Dataset(object):
     return TensorSliceDataset(tensors)
 
   @staticmethod
+  @deprecation.deprecated(None, "Use `tf.data.Dataset.from_tensor_slices()`.")
   def from_sparse_tensor_slices(sparse_tensor):
     """Splits each rank-N `tf.SparseTensor` in this dataset row-wise.
 
@@ -285,6 +286,23 @@ class Dataset(object):
     sess.run(value)  # (2, array([1, 1]))
     ```
 
+    NOTE: The current implementation of `Dataset.from_generator()` uses
+    @{tf.py_func} and inherits the same constraints. In particular, it
+    requires the `Dataset`- and `Iterator`-related operations to be placed
+    on a device in the same process as the Python program that called
+    `Dataset.from_generator()`. The body of `generator` will not be
+    serialized in a `GraphDef`, and you should not use this method if you
+    need to serialize your model and restore it in a different environment.
+
+    NOTE: If `generator` depends on mutable global variables or other external
+    state, be aware that the runtime may invoke `generator` multiple times
+    (in order to support repeating the `Dataset`) and at any time
+    between the call to `Dataset.from_generator()` and the production of the
+    first element from the generator. Mutating global variables or external
+    state can cause undefined behavior, and we recommend that you explicitly
+    cache any external state in `generator` before calling
+    `Dataset.from_generator()`.
+
     Args:
       generator: A callable object that takes no arguments and returns an
         object that supports the `iter()` protocol.
@@ -706,6 +724,12 @@ class Dataset(object):
   def batch(self, batch_size):
     """Combines consecutive elements of this dataset into batches.
 
+    NOTE: If the number of elements (`N`) in this dataset is not an exact
+    multiple of `batch_size`, the final batch contain smaller tensors with
+    shape `N % batch_size` in the batch dimension. If your program depends on
+    the batches having the same shape, consider using the
+    @{tf.contrib.data.batch_and_drop_remainder} transformation instead.
+
     Args:
       batch_size: A `tf.int64` scalar `tf.Tensor`, representing the number of
         consecutive elements of this dataset to combine in a single batch.
@@ -785,7 +809,7 @@ class Dataset(object):
     ```python
     # Preprocess 4 files concurrently, and interleave blocks of 16 records from
     # each file.
-    filenames = ["/var/data/file1.txt", "/var/data/file2.txt", ..."]
+    filenames = ["/var/data/file1.txt", "/var/data/file2.txt", ...]
     dataset = (Dataset.from_tensor_slices(filenames)
                .interleave(lambda x:
                    TextLineDataset(x).map(parse_fn, num_parallel_calls=1),
@@ -944,11 +968,7 @@ class TensorSliceDataset(Dataset):
     batch_dim = flat_tensors[0].get_shape()[0]
     for t in flat_tensors[1:]:
       batch_dim.assert_is_compatible_with(t.get_shape()[0])
-    self._tensors = nest.pack_sequence_as(tensors, [
-        sparse_ops.serialize_many_sparse(tensor)
-        if sparse_tensor_lib.is_sparse(tensor) else tensor
-        for tensor in nest.flatten(tensors)
-    ])
+    self._tensors = sparse.serialize_many_sparse_tensors(tensors)
     self._output_classes = sparse.get_classes(tensors)
     self._output_shapes = nest.pack_sequence_as(
         tensors, [t.get_shape()[1:] for t in nest.flatten(tensors)])
@@ -1233,7 +1253,26 @@ class ShuffleDataset(Dataset):
                buffer_size,
                seed=None,
                reshuffle_each_iteration=None):
-    """See `Dataset.shuffle()` for details."""
+    """Randomly shuffles the elements of this dataset.
+
+    Args:
+      input_dataset: The input dataset.
+      buffer_size: A `tf.int64` scalar `tf.Tensor`, representing the
+        number of elements from this dataset from which the new
+        dataset will sample.
+      seed: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the
+        random seed that will be used to create the distribution. See
+        @{tf.set_random_seed} for behavior.
+      reshuffle_each_iteration: (Optional.) A boolean, which if true indicates
+        that the dataset should be pseudorandomly reshuffled each time it is
+        iterated over. (Defaults to `True`.)
+
+    Returns:
+      A `Dataset`.
+
+    Raises:
+      ValueError: if invalid arguments are provided.
+    """
     super(ShuffleDataset, self).__init__()
     self._input_dataset = input_dataset
     self._buffer_size = ops.convert_to_tensor(
diff --git a/tensorflow/python/data/ops/iterator_ops.py b/tensorflow/python/data/ops/iterator_ops.py
index 663bed07b257b4ccdd657370e370c0f4e2fdf77b..0cbdb3ab19d8f1b966a867dfcf709c1a4a49b871 100644
--- a/tensorflow/python/data/ops/iterator_ops.py
+++ b/tensorflow/python/data/ops/iterator_ops.py
@@ -17,6 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import warnings
+
 from tensorflow.python.data.util import nest
 from tensorflow.python.data.util import sparse
 from tensorflow.python.framework import dtypes
@@ -25,6 +27,26 @@ from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import gen_dataset_ops
 
 
+# NOTE(mrry): It is legitimate to call `Iterator.get_next()` multiple
+# times, e.g. when you are distributing different elements to multiple
+# devices in a single step. However, a common pitfall arises when
+# users call `Iterator.get_next()` in each iteration of their training
+# loop. `Iterator.get_next()` adds ops to the graph, and executing
+# each op allocates resources (including threads); as a consequence,
+# invoking it in every iteration of a training loop causes slowdown
+# and eventual resource exhaustion. To guard against this outcome, we
+# log a warning when the number of uses crosses a threshold of suspicion.
+GET_NEXT_CALL_WARNING_THRESHOLD = 32
+
+GET_NEXT_CALL_WARNING_MESSAGE = (
+    "An unusually high number of `Iterator.get_next()` calls was detected. "
+    "This often indicates that `Iterator.get_next()` is being called inside "
+    "a training loop, which will cause gradual slowdown and eventual resource "
+    "exhaustion. If this is the case, restructure your code to call "
+    "`next_element = iterator.get_next() once outside the loop, and use "
+    "`next_element` inside the loop.")
+
+
 class Iterator(object):
   """Represents the state of iterating through a `Dataset`."""
 
@@ -56,6 +78,7 @@ class Iterator(object):
     self._output_shapes = output_shapes
     self._string_handle = gen_dataset_ops.iterator_to_string_handle(
         self._iterator_resource)
+    self._get_next_call_count = 0
 
   @staticmethod
   def from_structure(output_types,
@@ -282,6 +305,10 @@ class Iterator(object):
     Returns:
       A nested structure of `tf.Tensor` objects.
     """
+    self._get_next_call_count += 1
+    if self._get_next_call_count > GET_NEXT_CALL_WARNING_THRESHOLD:
+      warnings.warn(GET_NEXT_CALL_WARNING_MESSAGE)
+
     return sparse.deserialize_sparse_tensors(
         nest.pack_sequence_as(self._output_types,
                               gen_dataset_ops.iterator_get_next(
diff --git a/tensorflow/python/data/ops/readers.py b/tensorflow/python/data/ops/readers.py
index c6fb8531aea13850524e6b9a83911d7afe950395..830dc5cec4a54469d001f0ba57d1adc7bc5efd11 100644
--- a/tensorflow/python/data/ops/readers.py
+++ b/tensorflow/python/data/ops/readers.py
@@ -18,7 +18,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.data.ops.dataset_ops import Dataset
-from tensorflow.python.framework import constant_op
+from tensorflow.python.data.util import convert
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
@@ -29,18 +29,6 @@ from tensorflow.python.ops import gen_dataset_ops
 _DEFAULT_READER_BUFFER_SIZE_BYTES = 256 * 1024  # 256 KB
 
 
-def _convert_optional_param_to_tensor(argument_name,
-                                      argument_value,
-                                      argument_default=0,
-                                      argument_dtype=dtypes.int64):
-  if argument_value is not None:
-    return ops.convert_to_tensor(
-        argument_value, dtype=argument_dtype, name=argument_name)
-  else:
-    return constant_op.constant(
-        argument_default, dtype=argument_dtype, name=argument_name)
-
-
 class TextLineDataset(Dataset):
   """A `Dataset` comprising lines from one or more text files."""
 
@@ -58,12 +46,12 @@ class TextLineDataset(Dataset):
     super(TextLineDataset, self).__init__()
     self._filenames = ops.convert_to_tensor(
         filenames, dtype=dtypes.string, name="filenames")
-    self._compression_type = _convert_optional_param_to_tensor(
+    self._compression_type = convert.optional_param_to_tensor(
         "compression_type",
         compression_type,
         argument_default="",
         argument_dtype=dtypes.string)
-    self._buffer_size = _convert_optional_param_to_tensor(
+    self._buffer_size = convert.optional_param_to_tensor(
         "buffer_size", buffer_size, _DEFAULT_READER_BUFFER_SIZE_BYTES)
 
   def _as_variant_tensor(self):
@@ -100,12 +88,12 @@ class TFRecordDataset(Dataset):
     # Force the type to string even if filenames is an empty list.
     self._filenames = ops.convert_to_tensor(
         filenames, dtypes.string, name="filenames")
-    self._compression_type = _convert_optional_param_to_tensor(
+    self._compression_type = convert.optional_param_to_tensor(
         "compression_type",
         compression_type,
         argument_default="",
         argument_dtype=dtypes.string)
-    self._buffer_size = _convert_optional_param_to_tensor(
+    self._buffer_size = convert.optional_param_to_tensor(
         "buffer_size",
         buffer_size,
         argument_default=_DEFAULT_READER_BUFFER_SIZE_BYTES)
@@ -155,11 +143,11 @@ class FixedLengthRecordDataset(Dataset):
     self._record_bytes = ops.convert_to_tensor(
         record_bytes, dtype=dtypes.int64, name="record_bytes")
 
-    self._header_bytes = _convert_optional_param_to_tensor(
+    self._header_bytes = convert.optional_param_to_tensor(
         "header_bytes", header_bytes)
-    self._footer_bytes = _convert_optional_param_to_tensor(
+    self._footer_bytes = convert.optional_param_to_tensor(
         "footer_bytes", footer_bytes)
-    self._buffer_size = _convert_optional_param_to_tensor(
+    self._buffer_size = convert.optional_param_to_tensor(
         "buffer_size", buffer_size, _DEFAULT_READER_BUFFER_SIZE_BYTES)
 
   def _as_variant_tensor(self):
diff --git a/tensorflow/python/data/util/BUILD b/tensorflow/python/data/util/BUILD
index f7d7fe98d3eca10b6481e3c0f7d08b42e95ef81a..e32c7b54a48dd887c2748897c3ce3661aab9f497 100644
--- a/tensorflow/python/data/util/BUILD
+++ b/tensorflow/python/data/util/BUILD
@@ -62,6 +62,30 @@ py_test(
     ],
 )
 
+py_library(
+    name = "convert",
+    srcs = ["convert.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+    ],
+)
+
+py_test(
+    name = "convert_test",
+    size = "small",
+    srcs = ["convert_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":convert",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:util",
+    ],
+)
+
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/python/data/util/convert.py b/tensorflow/python/data/util/convert.py
new file mode 100644
index 0000000000000000000000000000000000000000..eeb1d700f3c67a1a2ab627aa8a291755bc2127e4
--- /dev/null
+++ b/tensorflow/python/data/util/convert.py
@@ -0,0 +1,34 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Helpers constructing Datasets."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+
+
+def optional_param_to_tensor(argument_name,
+                             argument_value,
+                             argument_default=0,
+                             argument_dtype=dtypes.int64):
+  if argument_value is not None:
+    return ops.convert_to_tensor(
+        argument_value, dtype=argument_dtype, name=argument_name)
+  else:
+    return constant_op.constant(
+        argument_default, dtype=argument_dtype, name=argument_name)
diff --git a/tensorflow/python/data/util/convert_test.py b/tensorflow/python/data/util/convert_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..2cb6488070eb422f6c8d56ca5d712cbdf09fa883
--- /dev/null
+++ b/tensorflow/python/data/util/convert_test.py
@@ -0,0 +1,53 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for utilities working with user input."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.util import convert
+from tensorflow.python.framework import dtypes
+from tensorflow.python.platform import test
+from tensorflow.python.util import compat
+
+
+class ConvertTest(test.TestCase):
+
+  def testInteger(self):
+    resp = convert.optional_param_to_tensor("foo", 3)
+    with self.test_session() as sess:
+      self.assertEqual(3, sess.run(resp))
+
+  def testIntegerDefault(self):
+    resp = convert.optional_param_to_tensor("foo", None)
+    with self.test_session() as sess:
+      self.assertEqual(0, sess.run(resp))
+
+  def testStringDefault(self):
+    resp = convert.optional_param_to_tensor("bar", None, "default",
+                                            dtypes.string)
+    with self.test_session() as sess:
+      self.assertEqual(compat.as_bytes("default"), sess.run(resp))
+
+  def testString(self):
+    resp = convert.optional_param_to_tensor("bar", "value", "default",
+                                            dtypes.string)
+    with self.test_session() as sess:
+      self.assertEqual(compat.as_bytes("value"), sess.run(resp))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/util/nest.py b/tensorflow/python/data/util/nest.py
index bd7ab3d34f826450a1de8821286c1237a2b5dedd..2455395635c4c8fa5d157a38d4e7a118f554fd9f 100644
--- a/tensorflow/python/data/util/nest.py
+++ b/tensorflow/python/data/util/nest.py
@@ -379,9 +379,9 @@ def assert_shallow_structure(shallow_tree, input_tree, check_types=True):
     if check_types and isinstance(shallow_tree, dict):
       if set(input_tree) != set(shallow_tree):
         raise ValueError(
-          "The two structures don't have the same keys. Input "
-          "structure has keys %s, while shallow structure has keys %s."
-          % (list(_six.iterkeys(input_tree)),
+            "The two structures don't have the same keys. Input "
+            "structure has keys %s, while shallow structure has keys %s." %
+            (list(_six.iterkeys(input_tree)),
              list(_six.iterkeys(shallow_tree))))
       input_tree = list(_six.iteritems(input_tree))
       shallow_tree = list(_six.iteritems(shallow_tree))
diff --git a/tensorflow/python/data/util/nest_test.py b/tensorflow/python/data/util/nest_test.py
index 8c84d9d1dfc1d398e1068a6ca2c13a99e9fadcb9..90dd7dfe7775b2f10611e5579784fbda63fc9669 100644
--- a/tensorflow/python/data/util/nest_test.py
+++ b/tensorflow/python/data/util/nest_test.py
@@ -271,8 +271,9 @@ class NestTest(test.TestCase):
     inp_ab1 = {"a": (1, 1), "b": {"c": (2, 2)}}
     inp_ab2 = {"a": (1, 1), "b": {"d": (2, 2)}}
     expected_message = (
-        "The two structures don't have the same keys. Input "
-        "structure has keys \['c'\], while shallow structure has keys \['d'\].")
+        r"The two structures don't have the same keys. Input "
+        r"structure has keys \['c'\], while shallow structure has "
+        r"keys \['d'\].")
     with self.assertRaisesRegexp(ValueError, expected_message):
       nest.assert_shallow_structure(inp_ab2, inp_ab1)
 
diff --git a/tensorflow/python/data/util/sparse.py b/tensorflow/python/data/util/sparse.py
index b4219198d3bce612c7fde926e780ad779f1076d9..5ebcb4ea81b23b60dc46bae78bfa792f4a8ab6d8 100644
--- a/tensorflow/python/data/util/sparse.py
+++ b/tensorflow/python/data/util/sparse.py
@@ -57,7 +57,7 @@ def as_dense_shapes(shapes, classes):
 
 
 def as_dense_types(types, classes):
-  """Converts sparse tensor types to `dtypes.string`.
+  """Converts sparse tensor types to `dtypes.variant`.
 
   Args:
     types: a structure of types to convert.
@@ -65,11 +65,11 @@ def as_dense_types(types, classes):
 
   Returns:
     a structure matching the nested structure of `types`, containing
-    `dtypes.string` at positions where `classes` contains `tf.SparseTensor` and
+    `dtypes.variant` at positions where `classes` contains `tf.SparseTensor` and
     matching contents of `types` otherwise
   """
   ret = nest.pack_sequence_as(types, [
-      dtypes.string if c is sparse_tensor.SparseTensor else ty
+      dtypes.variant if c is sparse_tensor.SparseTensor else ty
       for ty, c in zip(nest.flatten(types), nest.flatten(classes))
   ])
   return ret
@@ -116,6 +116,24 @@ def get_classes(tensors):
   ])
 
 
+def serialize_many_sparse_tensors(tensors):
+  """Serializes many sparse tensors into a batch.
+
+  Args:
+    tensors: a tensor structure to serialize.
+
+  Returns:
+    `tensors` with any sparse tensors replaced by the serialized batch.
+  """
+
+  ret = nest.pack_sequence_as(tensors, [
+      sparse_ops.serialize_many_sparse(tensor, out_type=dtypes.variant)
+      if sparse_tensor.is_sparse(tensor) else tensor
+      for tensor in nest.flatten(tensors)
+  ])
+  return ret
+
+
 def serialize_sparse_tensors(tensors):
   """Serializes sparse tensors.
 
@@ -127,7 +145,7 @@ def serialize_sparse_tensors(tensors):
   """
 
   ret = nest.pack_sequence_as(tensors, [
-      sparse_ops.serialize_sparse(tensor)
+      sparse_ops.serialize_sparse(tensor, out_type=dtypes.variant)
       if isinstance(tensor, sparse_tensor.SparseTensor) else tensor
       for tensor in nest.flatten(tensors)
   ])
diff --git a/tensorflow/python/data/util/sparse_test.py b/tensorflow/python/data/util/sparse_test.py
index a707570bab71357aa982aaed5c3d175e763f6b5f..d49b3ff34bd0ebd6beef1bea168dad22059317be 100644
--- a/tensorflow/python/data/util/sparse_test.py
+++ b/tensorflow/python/data/util/sparse_test.py
@@ -168,7 +168,7 @@ class SparseTest(test.TestCase):
         {
             "types": dtypes.int32,
             "classes": sparse_tensor.SparseTensor,
-            "expected": dtypes.string
+            "expected": dtypes.variant
         },
         {
             "types": (dtypes.int32),
@@ -178,7 +178,7 @@ class SparseTest(test.TestCase):
         {
             "types": (dtypes.int32),
             "classes": (sparse_tensor.SparseTensor),
-            "expected": (dtypes.string)
+            "expected": (dtypes.variant)
         },
         {
             "types": (dtypes.int32, ()),
@@ -193,12 +193,12 @@ class SparseTest(test.TestCase):
         {
             "types": (dtypes.int32, ()),
             "classes": (sparse_tensor.SparseTensor, ()),
-            "expected": (dtypes.string, ())
+            "expected": (dtypes.variant, ())
         },
         {
             "types": ((), dtypes.int32),
             "classes": ((), sparse_tensor.SparseTensor),
-            "expected": ((), dtypes.string)
+            "expected": ((), dtypes.variant)
         },
         {
             "types": (dtypes.int32, (), dtypes.int32),
@@ -209,7 +209,7 @@ class SparseTest(test.TestCase):
             "types": (dtypes.int32, (), dtypes.int32),
             "classes": (sparse_tensor.SparseTensor, (),
                         sparse_tensor.SparseTensor),
-            "expected": (dtypes.string, (), dtypes.string)
+            "expected": (dtypes.variant, (), dtypes.variant)
         },
         {
             "types": ((), dtypes.int32, ()),
@@ -219,7 +219,7 @@ class SparseTest(test.TestCase):
         {
             "types": ((), dtypes.int32, ()),
             "classes": ((), sparse_tensor.SparseTensor, ()),
-            "expected": ((), dtypes.string, ())
+            "expected": ((), dtypes.variant, ())
         },
     )
     for test_case in test_cases:
@@ -227,45 +227,6 @@ class SparseTest(test.TestCase):
           sparse.as_dense_types(test_case["types"], test_case["classes"]),
           test_case["expected"])
 
-  def assertSparseValuesEqual(self, a, b):
-    if not isinstance(a, sparse_tensor.SparseTensor):
-      self.assertFalse(isinstance(b, sparse_tensor.SparseTensor))
-      self.assertEqual(a, b)
-      return
-    self.assertTrue(isinstance(b, sparse_tensor.SparseTensor))
-    with self.test_session():
-      self.assertAllEqual(a.eval().indices, b.eval().indices)
-      self.assertAllEqual(a.eval().values, b.eval().values)
-      self.assertAllEqual(a.eval().dense_shape, b.eval().dense_shape)
-
-  def testSerializeDeserialize(self):
-    test_cases = (
-        (),
-        sparse_tensor.SparseTensor(
-            indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
-        sparse_tensor.SparseTensor(
-            indices=[[3, 4]], values=[-1], dense_shape=[4, 5]),
-        sparse_tensor.SparseTensor(
-            indices=[[0, 0], [3, 4]], values=[1, -1], dense_shape=[4, 5]),
-        (sparse_tensor.SparseTensor(
-            indices=[[0, 0]], values=[1], dense_shape=[1, 1])),
-        (sparse_tensor.SparseTensor(
-            indices=[[0, 0]], values=[1], dense_shape=[1, 1]), ()),
-        ((), sparse_tensor.SparseTensor(
-            indices=[[0, 0]], values=[1], dense_shape=[1, 1])),
-    )
-    for expected in test_cases:
-      classes = sparse.get_classes(expected)
-      shapes = nest.map_structure(lambda _: tensor_shape.TensorShape(None),
-                                  classes)
-      types = nest.map_structure(lambda _: dtypes.int32, classes)
-      actual = sparse.deserialize_sparse_tensors(
-          sparse.serialize_sparse_tensors(expected), types, shapes,
-          sparse.get_classes(expected))
-      nest.assert_same_structure(expected, actual)
-      for a, e in zip(nest.flatten(actual), nest.flatten(expected)):
-        self.assertSparseValuesEqual(a, e)
-
   def testGetClasses(self):
     s = sparse_tensor.SparseTensor(indices=[[0]], values=[1], dense_shape=[1])
     d = ops.Tensor
@@ -324,6 +285,75 @@ class SparseTest(test.TestCase):
       self.assertEqual(
           sparse.get_classes(test_case["classes"]), test_case["expected"])
 
+  def assertSparseValuesEqual(self, a, b):
+    if not isinstance(a, sparse_tensor.SparseTensor):
+      self.assertFalse(isinstance(b, sparse_tensor.SparseTensor))
+      self.assertEqual(a, b)
+      return
+    self.assertTrue(isinstance(b, sparse_tensor.SparseTensor))
+    with self.test_session():
+      self.assertAllEqual(a.eval().indices, b.eval().indices)
+      self.assertAllEqual(a.eval().values, b.eval().values)
+      self.assertAllEqual(a.eval().dense_shape, b.eval().dense_shape)
+
+  def testSerializeDeserialize(self):
+    test_cases = (
+        (),
+        sparse_tensor.SparseTensor(
+            indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
+        sparse_tensor.SparseTensor(
+            indices=[[3, 4]], values=[-1], dense_shape=[4, 5]),
+        sparse_tensor.SparseTensor(
+            indices=[[0, 0], [3, 4]], values=[1, -1], dense_shape=[4, 5]),
+        (sparse_tensor.SparseTensor(
+            indices=[[0, 0]], values=[1], dense_shape=[1, 1])),
+        (sparse_tensor.SparseTensor(
+            indices=[[0, 0]], values=[1], dense_shape=[1, 1]), ()),
+        ((),
+         sparse_tensor.SparseTensor(
+             indices=[[0, 0]], values=[1], dense_shape=[1, 1])),
+    )
+    for expected in test_cases:
+      classes = sparse.get_classes(expected)
+      shapes = nest.map_structure(lambda _: tensor_shape.TensorShape(None),
+                                  classes)
+      types = nest.map_structure(lambda _: dtypes.int32, classes)
+      actual = sparse.deserialize_sparse_tensors(
+          sparse.serialize_sparse_tensors(expected), types, shapes,
+          sparse.get_classes(expected))
+      nest.assert_same_structure(expected, actual)
+      for a, e in zip(nest.flatten(actual), nest.flatten(expected)):
+        self.assertSparseValuesEqual(a, e)
+
+  def testSerializeManyDeserialize(self):
+    test_cases = (
+        (),
+        sparse_tensor.SparseTensor(
+            indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
+        sparse_tensor.SparseTensor(
+            indices=[[3, 4]], values=[-1], dense_shape=[4, 5]),
+        sparse_tensor.SparseTensor(
+            indices=[[0, 0], [3, 4]], values=[1, -1], dense_shape=[4, 5]),
+        (sparse_tensor.SparseTensor(
+            indices=[[0, 0]], values=[1], dense_shape=[1, 1])),
+        (sparse_tensor.SparseTensor(
+            indices=[[0, 0]], values=[1], dense_shape=[1, 1]), ()),
+        ((),
+         sparse_tensor.SparseTensor(
+             indices=[[0, 0]], values=[1], dense_shape=[1, 1])),
+    )
+    for expected in test_cases:
+      classes = sparse.get_classes(expected)
+      shapes = nest.map_structure(lambda _: tensor_shape.TensorShape(None),
+                                  classes)
+      types = nest.map_structure(lambda _: dtypes.int32, classes)
+      actual = sparse.deserialize_sparse_tensors(
+          sparse.serialize_many_sparse_tensors(expected), types, shapes,
+          sparse.get_classes(expected))
+      nest.assert_same_structure(expected, actual)
+      for a, e in zip(nest.flatten(actual), nest.flatten(expected)):
+        self.assertSparseValuesEqual(a, e)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD
index 68b97ddbe3048b7aef18fcf8cc2b41ee545ee55f..f0e90f67772d114142ccc218ed9f42b723a1b556 100644
--- a/tensorflow/python/debug/BUILD
+++ b/tensorflow/python/debug/BUILD
@@ -31,6 +31,7 @@ py_library(
         ":debug_graphs",
         ":debug_utils",
         ":grpc_debug_server",
+        ":grpc_debug_test_server",
         ":hooks",
         ":local_cli_wrapper",
         "//tensorflow/python:util",
@@ -41,15 +42,23 @@ py_library(
 py_library(
     name = "debug_pip",
     deps = [
+        ":cli_test_utils",
         ":debug_py",
         ":grpc_debug_test_server",
         ":offline_analyzer",
         ":session_debug_testlib",
+        ":source_remote",
     ] + if_not_windows([
         ":debug_examples",
     ]),
 )
 
+py_library(
+    name = "common",
+    srcs = ["lib/common.py"],
+    srcs_version = "PY2AND3",
+)
+
 py_library(
     name = "debug_graphs",
     srcs = ["lib/debug_graphs.py"],
@@ -110,6 +119,18 @@ py_library(
     ],
 )
 
+py_library(
+    name = "source_remote",
+    srcs = ["lib/source_remote.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":common",
+        ":debug_service_pb2_grpc",
+        "//tensorflow/core/debug:debug_service_proto_py",
+        "//tensorflow/python/profiler:tfprof_logger",
+    ],
+)
+
 py_library(
     name = "stepper",
     srcs = ["lib/stepper.py"],
@@ -180,9 +201,11 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":command_parser",
+        ":common",
         ":debugger_cli_common",
         ":tensor_format",
         "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:platform",
         "//tensorflow/python:variables",
         "//third_party/py/numpy",
         "@six_archive//:six",
@@ -321,7 +344,11 @@ py_library(
     name = "grpc_wrapper",
     srcs = ["wrappers/grpc_wrapper.py"],
     srcs_version = "PY2AND3",
-    deps = [":framework"],
+    deps = [
+        ":common",
+        ":framework",
+        ":source_remote",
+    ],
 )
 
 py_library(
@@ -332,6 +359,7 @@ py_library(
         ":analyzer_cli",
         ":cli_shared",
         ":command_parser",
+        ":common",
         ":debug_data",
         ":debugger_cli_common",
         ":framework",
@@ -426,6 +454,20 @@ py_binary(
     ],
 )
 
+py_test(
+    name = "common_test",
+    size = "small",
+    srcs = ["lib/common_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":common",
+        "//tensorflow/python:client",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:platform_test",
+    ],
+)
+
 py_test(
     name = "debug_graphs_test",
     size = "small",
@@ -515,6 +557,32 @@ py_test(
     ],
 )
 
+py_test(
+    name = "source_remote_test",
+    size = "small",
+    srcs = ["lib/source_remote_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_windows",
+        "nomac",
+        "oss_serial",
+    ],
+    deps = [
+        ":grpc_debug_test_server",
+        ":source_remote",
+        ":source_utils",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:client",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:util",
+        "//tensorflow/python:variables",
+    ],
+)
+
 cuda_py_test(
     name = "stepper_test",
     size = "small",
@@ -574,8 +642,11 @@ py_test(
     size = "small",
     srcs = ["cli/curses_ui_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_windows"],
+    tags = [
+        "no_windows",
+    ],
     deps = [
+        ":cli_test_utils",
         ":curses_ui",
         ":debugger_cli_common",
         ":tensor_format",
@@ -765,6 +836,7 @@ py_test(
     srcs = ["cli/tensor_format_test.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":cli_test_utils",
         ":debug_data",
         ":tensor_format",
         "//tensorflow/core:protos_all_py",
@@ -808,6 +880,12 @@ py_test(
     ],
 )
 
+py_library(
+    name = "cli_test_utils",
+    srcs = ["cli/cli_test_utils.py"],
+    srcs_version = "PY2AND3",
+)
+
 cuda_py_test(
     name = "analyzer_cli_test",
     size = "small",
@@ -815,6 +893,7 @@ cuda_py_test(
     additional_deps = [
         ":analyzer_cli",
         ":cli_config",
+        ":cli_test_utils",
         ":command_parser",
         ":debug_data",
         ":debug_utils",
@@ -924,6 +1003,7 @@ cuda_py_test(
         "//tensorflow/python:variables",
     ],
     data = ["//tensorflow/tools/dist_test/server:grpc_tensorflow_server"],
+    grpc_enabled = True,
     tags = [
         "no_oss",  # Incompatible with bazel_pip.
         "no_windows",
diff --git a/tensorflow/python/debug/README.md b/tensorflow/python/debug/README.md
index b26411cd1538250b61364b6c7257fd03d5b6278b..a2273b050bb1ecd5a35938c3de57fb8562f1d26d 100644
--- a/tensorflow/python/debug/README.md
+++ b/tensorflow/python/debug/README.md
@@ -28,7 +28,7 @@ models:
 
 * Easy access through session wrappers
 * Easy integration with common high-level APIs, such as
-  [tf-learn](https://www.tensorflow.org/get_started/tflearn) and
+  [TensorFlow Estimators](https://www.tensorflow.org/programmers_guide/estimators) and
   [Keras](https://keras.io/)
 * Inspection of runtime tensor values and node connections
 * Conditional breaking after runs that generate tensors satisfying given
diff --git a/tensorflow/python/debug/__init__.py b/tensorflow/python/debug/__init__.py
index 821350ee907c46aaa52b5f47ca763f34458eeb3e..34da44b60df9dbda836d6c91089c5ee90f11c584 100644
--- a/tensorflow/python/debug/__init__.py
+++ b/tensorflow/python/debug/__init__.py
@@ -30,6 +30,8 @@ See the @{$python/tfdbg} guide.
 @@GrpcDebugWrapperSession
 @@LocalCLIDebugHook
 @@LocalCLIDebugWrapperSession
+@@TensorBoardDebugHook
+@@TensorBoardDebugWrapperSession
 @@WatchOptions
 
 @@reconstruct_non_debug_graph_def
@@ -60,9 +62,11 @@ from tensorflow.python.debug.lib.debug_utils import watch_graph_with_blacklists
 from tensorflow.python.debug.wrappers.dumping_wrapper import DumpingDebugWrapperSession
 from tensorflow.python.debug.wrappers.framework import WatchOptions
 from tensorflow.python.debug.wrappers.grpc_wrapper import GrpcDebugWrapperSession
+from tensorflow.python.debug.wrappers.grpc_wrapper import TensorBoardDebugWrapperSession
 from tensorflow.python.debug.wrappers.hooks import DumpingDebugHook
 from tensorflow.python.debug.wrappers.hooks import GrpcDebugHook
 from tensorflow.python.debug.wrappers.hooks import LocalCLIDebugHook
+from tensorflow.python.debug.wrappers.hooks import TensorBoardDebugHook
 from tensorflow.python.debug.wrappers.local_cli_wrapper import LocalCLIDebugWrapperSession
 
 from tensorflow.python.util import all_util as _all_util
diff --git a/tensorflow/python/debug/cli/analyzer_cli.py b/tensorflow/python/debug/cli/analyzer_cli.py
index afa3363d99c1e7341f3901dfc8e79d07fb675cfd..156afdfd4c44f2f1a07ffdd1e68ad48bbbe31cba 100644
--- a/tensorflow/python/debug/cli/analyzer_cli.py
+++ b/tensorflow/python/debug/cli/analyzer_cli.py
@@ -402,6 +402,12 @@ class DebugAnalyzer(object):
         action="store_true",
         help="Print the tensor in its entirety, i.e., do not use ellipses "
         "(may be slow for large results).")
+    ap.add_argument(
+        "-w",
+        "--write_path",
+        default="",
+        help="Path of the numpy file to write the evaluation result to, "
+        "using numpy.save()")
     self._arg_parsers["eval"] = ap
 
   def add_tensor_filter(self, filter_name, filter_callable):
@@ -972,7 +978,8 @@ class DebugAnalyzer(object):
             print_all=parsed.print_all,
             tensor_slicing=tensor_slicing,
             highlight_options=highlight_options,
-            include_numeric_summary=parsed.numeric_summary)
+            include_numeric_summary=parsed.numeric_summary,
+            write_path=parsed.write_path)
       else:
         output = cli_shared.error(
             "Invalid number (%d) for tensor %s, which generated one dump." %
@@ -1018,7 +1025,8 @@ class DebugAnalyzer(object):
             np_printoptions,
             print_all=parsed.print_all,
             tensor_slicing=tensor_slicing,
-            highlight_options=highlight_options)
+            highlight_options=highlight_options,
+            write_path=parsed.write_path)
       _add_main_menu(output, node_name=node_name, enable_print_tensor=False)
 
     return output
@@ -1071,7 +1079,8 @@ class DebugAnalyzer(object):
         "from eval of expression '%s'" % parsed.expression,
         np_printoptions,
         print_all=parsed.print_all,
-        include_numeric_summary=True)
+        include_numeric_summary=True,
+        write_path=parsed.write_path)
 
   def _reconstruct_print_source_command(self,
                                         parsed,
diff --git a/tensorflow/python/debug/cli/analyzer_cli_test.py b/tensorflow/python/debug/cli/analyzer_cli_test.py
index 847f9ec401499abb8ec4f310fa4d5118b2afca7b..6b110fda9eba301f298e84b63d091bb300549bee 100644
--- a/tensorflow/python/debug/cli/analyzer_cli_test.py
+++ b/tensorflow/python/debug/cli/analyzer_cli_test.py
@@ -30,6 +30,7 @@ from tensorflow.python.client import session
 from tensorflow.python.debug.cli import analyzer_cli
 from tensorflow.python.debug.cli import cli_config
 from tensorflow.python.debug.cli import cli_shared
+from tensorflow.python.debug.cli import cli_test_utils
 from tensorflow.python.debug.cli import command_parser
 from tensorflow.python.debug.cli import debugger_cli_common
 from tensorflow.python.debug.lib import debug_data
@@ -55,7 +56,8 @@ def no_rewrite_session_config():
   rewriter_config = rewriter_config_pb2.RewriterConfig(
       disable_model_pruning=True,
       constant_folding=rewriter_config_pb2.RewriterConfig.OFF,
-      arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF)
+      arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF,
+      dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF)
 
   graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_config)
   return config_pb2.ConfigProto(graph_options=graph_options)
@@ -1017,6 +1019,24 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
         list_inputs_node_name=node_name,
         list_outputs_node_name=node_name)
 
+  def testPrintTensorAndWriteToNpyFile(self):
+    node_name = "simple_mul_add/matmul"
+    tensor_name = node_name + ":0"
+    npy_path = os.path.join(self._dump_root, "matmul.npy")
+    out = self._registry.dispatch_command(
+        "print_tensor", [tensor_name, "-w", npy_path],
+        screen_info={"cols": 80})
+
+    self.assertEqual([
+        "Tensor \"%s:DebugIdentity\":" % tensor_name,
+        "  dtype: float64",
+        "  shape: (2, 1)",
+        "",
+    ], out.lines[:4])
+    self.assertTrue(out.lines[4].startswith("Saved value to: %s (" % npy_path))
+    # Load the numpy file and verify its contents.
+    self.assertAllClose([[7.0], [-2.0]], np.load(npy_path))
+
   def testPrintTensorHighlightingRanges(self):
     node_name = "simple_mul_add/matmul"
     tensor_name = node_name + ":0"
@@ -1207,21 +1227,44 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
         "eval", ["np.matmul(`%s`, `%s`.T)" % (tensor_name, tensor_name)],
         screen_info={"cols": 80})
 
+    cli_test_utils.assert_lines_equal_ignoring_whitespace(
+        self,
+        ["Tensor \"from eval of expression "
+         "'np.matmul(`simple_mul_add/matmul:0`, "
+         "`simple_mul_add/matmul:0`.T)'\":",
+         "  dtype: float64",
+         "  shape: (2, 2)",
+         "",
+         "Numeric summary:",
+         "| - + | total |",
+         "| 2 2 |     4 |",
+         "|           min           max          mean           std |"],
+        out.lines[:8])
+    cli_test_utils.assert_array_lines_close(
+        self, [-14.0, 49.0, 6.25, 25.7524270701], out.lines[8:9])
+    cli_test_utils.assert_array_lines_close(
+        self, [[49.0, -14.0], [-14.0, 4.0]], out.lines[10:])
+
+  def testEvalExpressionAndWriteToNpyFile(self):
+    node_name = "simple_mul_add/matmul"
+    tensor_name = node_name + ":0"
+    npy_path = os.path.join(self._dump_root, "matmul_eval.npy")
+    out = self._registry.dispatch_command(
+        "eval",
+        ["np.matmul(`%s`, `%s`.T)" % (tensor_name, tensor_name), "-w",
+         npy_path], screen_info={"cols": 80})
+
     self.assertEqual([
         "Tensor \"from eval of expression "
         "'np.matmul(`simple_mul_add/matmul:0`, "
         "`simple_mul_add/matmul:0`.T)'\":",
         "  dtype: float64",
         "  shape: (2, 2)",
-        "",
-        "Numeric summary:",
-        "| - + | total |",
-        "| 2 2 |     4 |",
-        "|           min           max          mean           std |",
-        "|         -14.0          49.0          6.25 25.7524270701 |",
-        "",
-        "array([[ 49., -14.],",
-        "       [-14.,   4.]])"], out.lines)
+        ""], out.lines[:4])
+
+    self.assertTrue(out.lines[4].startswith("Saved value to: %s (" % npy_path))
+    # Load the numpy file and verify its contents.
+    self.assertAllClose([[49.0, -14.0], [-14.0, 4.0]], np.load(npy_path))
 
   def testAddGetTensorFilterLambda(self):
     analyzer = analyzer_cli.DebugAnalyzer(self._debug_dump,
diff --git a/tensorflow/python/debug/cli/cli_shared.py b/tensorflow/python/debug/cli/cli_shared.py
index df972eacf7332ed4b9a7ccf513da1db91e71fb4c..a0fe6066acd1462a94e93d6091db237d01cfede3 100644
--- a/tensorflow/python/debug/cli/cli_shared.py
+++ b/tensorflow/python/debug/cli/cli_shared.py
@@ -25,8 +25,10 @@ import six
 from tensorflow.python.debug.cli import command_parser
 from tensorflow.python.debug.cli import debugger_cli_common
 from tensorflow.python.debug.cli import tensor_format
+from tensorflow.python.debug.lib import common
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import variables
+from tensorflow.python.platform import gfile
 
 RL = debugger_cli_common.RichLine
 
@@ -151,7 +153,8 @@ def format_tensor(tensor,
                   print_all=False,
                   tensor_slicing=None,
                   highlight_options=None,
-                  include_numeric_summary=False):
+                  include_numeric_summary=False,
+                  write_path=None):
   """Generate formatted str to represent a tensor or its slices.
 
   Args:
@@ -171,6 +174,8 @@ def format_tensor(tensor,
       for more details.
     include_numeric_summary: Whether a text summary of the numeric values (if
       applicable) will be included.
+    write_path: A path to save the tensor value (after any slicing) to
+      (optinal). `numpy.save()` is used to save the value.
 
   Returns:
     An instance of `debugger_cli_common.RichTextLines` representing the
@@ -185,6 +190,16 @@ def format_tensor(tensor,
     value = tensor
     sliced_name = tensor_name
 
+  auxiliary_message = None
+  if write_path:
+    with gfile.Open(write_path, "wb") as output_file:
+      np.save(output_file, value)
+    line = debugger_cli_common.RichLine("Saved value to: ")
+    line += debugger_cli_common.RichLine(write_path, font_attr="bold")
+    line += " (%sB)" % bytes_to_readable_str(gfile.Stat(write_path).length)
+    auxiliary_message = debugger_cli_common.rich_text_lines_from_rich_line_list(
+        [line, debugger_cli_common.RichLine("")])
+
   if print_all:
     np_printoptions["threshold"] = value.size
   else:
@@ -195,6 +210,7 @@ def format_tensor(tensor,
       sliced_name,
       include_metadata=True,
       include_numeric_summary=include_numeric_summary,
+      auxiliary_message=auxiliary_message,
       np_printoptions=np_printoptions,
       highlight_options=highlight_options)
 
@@ -214,51 +230,6 @@ def error(msg):
       RL("ERROR: " + msg, COLOR_RED)])
 
 
-def get_graph_element_name(elem):
-  """Obtain the name or string representation of a graph element.
-
-  If the graph element has the attribute "name", return name. Otherwise, return
-  a __str__ representation of the graph element. Certain graph elements, such as
-  `SparseTensor`s, do not have the attribute "name".
-
-  Args:
-    elem: The graph element in question.
-
-  Returns:
-    If the attribute 'name' is available, return the name. Otherwise, return
-    str(fetch).
-  """
-
-  return elem.name if hasattr(elem, "name") else str(elem)
-
-
-def _get_fetch_names(fetches):
-  """Get a flattened list of the names in run() call fetches.
-
-  Args:
-    fetches: Fetches of the `Session.run()` call. It maybe a Tensor, an
-      Operation or a Variable. It may also be nested lists, tuples or
-      dicts. See doc of `Session.run()` for more details.
-
-  Returns:
-    (list of str) A flattened list of fetch names from `fetches`.
-  """
-
-  lines = []
-  if isinstance(fetches, (list, tuple)):
-    for fetch in fetches:
-      lines.extend(_get_fetch_names(fetch))
-  elif isinstance(fetches, dict):
-    for key in fetches:
-      lines.extend(_get_fetch_names(fetches[key]))
-  else:
-    # This ought to be a Tensor, an Operation or a Variable, for which the name
-    # attribute should be available. (Bottom-out condition of the recursion.)
-    lines.append(get_graph_element_name(fetches))
-
-  return lines
-
-
 def _recommend_command(command, description, indent=2, create_link=False):
   """Generate a RichTextLines object that describes a recommended command.
 
@@ -327,14 +298,14 @@ def get_run_start_intro(run_call_count,
     (RichTextLines) Formatted intro message about the `Session.run()` call.
   """
 
-  fetch_lines = _get_fetch_names(fetches)
+  fetch_lines = common.get_flattened_names(fetches)
 
   if not feed_dict:
     feed_dict_lines = [debugger_cli_common.RichLine("  (Empty)")]
   else:
     feed_dict_lines = []
     for feed_key in feed_dict:
-      feed_key_name = get_graph_element_name(feed_key)
+      feed_key_name = common.get_graph_element_name(feed_key)
       feed_dict_line = debugger_cli_common.RichLine("  ")
       feed_dict_line += debugger_cli_common.RichLine(
           feed_key_name,
@@ -446,10 +417,10 @@ def get_run_short_description(run_call_count,
   description = "run #%d: " % run_call_count
 
   if isinstance(fetches, (ops.Tensor, ops.Operation, variables.Variable)):
-    description += "1 fetch (%s); " % get_graph_element_name(fetches)
+    description += "1 fetch (%s); " % common.get_graph_element_name(fetches)
   else:
     # Could be (nested) list, tuple, dict or namedtuple.
-    num_fetches = len(_get_fetch_names(fetches))
+    num_fetches = len(common.get_flattened_names(fetches))
     if num_fetches > 1:
       description += "%d fetches; " % num_fetches
     else:
diff --git a/tensorflow/python/debug/cli/cli_test_utils.py b/tensorflow/python/debug/cli/cli_test_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a963d8da53ff0cf543ac33b389633cb2b1916b2
--- /dev/null
+++ b/tensorflow/python/debug/cli/cli_test_utils.py
@@ -0,0 +1,65 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Testing utilities for tfdbg command-line interface."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import re
+
+import numpy as np
+
+
+def assert_lines_equal_ignoring_whitespace(test, expected_lines, actual_lines):
+  """Assert equality in lines, ignoring all whitespace.
+
+  Args:
+    test: An instance of unittest.TestCase or its subtypes (e.g.,
+      TensorFlowTestCase).
+    expected_lines: Expected lines as an iterable of strings.
+    actual_lines: Actual lines as an iterable of strings.
+  """
+  test.assertEqual(
+      len(expected_lines), len(actual_lines),
+      "Mismatch in the number of lines: %d vs %d" % (
+          len(expected_lines), len(actual_lines)))
+  for expected_line, actual_line in zip(expected_lines, actual_lines):
+    test.assertEqual("".join(expected_line.split()),
+                     "".join(actual_line.split()))
+
+
+# Regular expression for separators between values in a string representation
+# of an ndarray, exclusing whitespace.
+_ARRAY_VALUE_SEPARATOR_REGEX = re.compile(r"(array|\(|\[|\]|\)|\||,)")
+
+
+def assert_array_lines_close(test, expected_array, array_lines):
+  """Assert that the array value represented by lines is close to expected.
+
+  Note that the shape of the array represented by the `array_lines` is ignored.
+
+  Args:
+    test: An instance of TensorFlowTestCase.
+    expected_array: Expected value of the array.
+    array_lines: A list of strings representing the array.
+      E.g., "array([[ 1.0, 2.0 ], [ 3.0, 4.0 ]])"
+      Assumes that values are separated by commas, parentheses, brackets, "|"
+      characters and whitespace.
+  """
+  elements = []
+  for line in array_lines:
+    line = re.sub(_ARRAY_VALUE_SEPARATOR_REGEX, " ", line)
+    elements.extend(float(s) for s in line.split())
+  test.assertAllClose(np.array(expected_array).flatten(), elements)
diff --git a/tensorflow/python/debug/cli/command_parser.py b/tensorflow/python/debug/cli/command_parser.py
index aabb3ed7b6008cf2a785d91b937620680d2942cc..e6e44b796dfff997b739d98ab4239cbf0c790404 100644
--- a/tensorflow/python/debug/cli/command_parser.py
+++ b/tensorflow/python/debug/cli/command_parser.py
@@ -540,4 +540,11 @@ def get_print_tensor_argparser(description):
       action="store_true",
       help="Include summary for non-empty tensors of numeric (int*, float*, "
       "complex*) and Boolean types.")
+  ap.add_argument(
+      "-w",
+      "--write_path",
+      type=str,
+      default="",
+      help="Path of the numpy file to write the tensor data to, using "
+      "numpy.save().")
   return ap
diff --git a/tensorflow/python/debug/cli/curses_ui_test.py b/tensorflow/python/debug/cli/curses_ui_test.py
index 4ca11e7e4104009e01d7db7f6b25912ad94c2118..02511cbe6a586879e259f59236f0d75c8d55bfba 100644
--- a/tensorflow/python/debug/cli/curses_ui_test.py
+++ b/tensorflow/python/debug/cli/curses_ui_test.py
@@ -25,6 +25,7 @@ import threading
 import numpy as np
 from six.moves import queue
 
+from tensorflow.python.debug.cli import cli_test_utils
 from tensorflow.python.debug.cli import curses_ui
 from tensorflow.python.debug.cli import debugger_cli_common
 from tensorflow.python.debug.cli import tensor_format
@@ -1056,13 +1057,10 @@ class CursesTest(test_util.TensorFlowTestCase):
     self.assertEqual(11, len(ui.scroll_messages))
 
     for i in range(11):
-      self.assertEqual([
-          "Tensor \"m\":", "", "array([[ 1.,  1.,  1.,  1.,  1.],",
-          "       [ 1.,  1.,  1.,  1.,  1.],",
-          "       [ 1.,  1.,  1.,  1.,  1.],",
-          "       [ 1.,  1.,  1.,  1.,  1.],",
-          "       [ 1.,  1.,  1.,  1.,  1.]])"
-      ], ui.unwrapped_outputs[i].lines)
+      cli_test_utils.assert_lines_equal_ignoring_whitespace(
+          self, ["Tensor \"m\":", ""], ui.unwrapped_outputs[i].lines[:2])
+      self.assertEqual(
+          repr(np.ones([5, 5])).split("\n"), ui.unwrapped_outputs[i].lines[2:])
 
     self.assertEqual({
         0: None,
@@ -1165,13 +1163,10 @@ class CursesTest(test_util.TensorFlowTestCase):
     self.assertEqual(4, len(ui.output_array_pointer_indices))
 
     for i in range(4):
-      self.assertEqual([
-          "Tensor \"m\":", "", "array([[ 1.,  1.,  1.,  1.,  1.],",
-          "       [ 1.,  1.,  1.,  1.,  1.],",
-          "       [ 1.,  1.,  1.,  1.,  1.],",
-          "       [ 1.,  1.,  1.,  1.,  1.],",
-          "       [ 1.,  1.,  1.,  1.,  1.]])"
-      ], ui.unwrapped_outputs[i].lines)
+      cli_test_utils.assert_lines_equal_ignoring_whitespace(
+          self, ["Tensor \"m\":", ""], ui.unwrapped_outputs[i].lines[:2])
+      self.assertEqual(
+          repr(np.ones([5, 5])).split("\n"), ui.unwrapped_outputs[i].lines[2:])
 
     self.assertEqual({
         0: None,
diff --git a/tensorflow/python/debug/cli/tensor_format.py b/tensorflow/python/debug/cli/tensor_format.py
index 05ccf93f15385566719abf8064296c2e1cfec027..d4aea76d652e7606939f3d8a89ff0378da0774d2 100644
--- a/tensorflow/python/debug/cli/tensor_format.py
+++ b/tensorflow/python/debug/cli/tensor_format.py
@@ -72,6 +72,7 @@ class HighlightOptions(object):
 def format_tensor(tensor,
                   tensor_label,
                   include_metadata=False,
+                  auxiliary_message=None,
                   include_numeric_summary=False,
                   np_printoptions=None,
                   highlight_options=None):
@@ -84,6 +85,8 @@ def format_tensor(tensor,
       suppress the tensor name line in the return value.
     include_metadata: Whether metadata such as dtype and shape are to be
       included in the formatted text.
+    auxiliary_message: An auxiliary message to display under the tensor label,
+      dtype and shape information lines.
     include_numeric_summary: Whether a text summary of the numeric values (if
       applicable) will be included.
     np_printoptions: A dictionary of keyword arguments that are passed to a
@@ -138,6 +141,9 @@ def format_tensor(tensor,
   formatted = debugger_cli_common.RichTextLines(
       lines, font_attr_segs=font_attr_segs)
 
+  if auxiliary_message:
+    formatted.extend(auxiliary_message)
+
   if include_numeric_summary:
     formatted.append("Numeric summary:")
     formatted.extend(numeric_summary(tensor))
diff --git a/tensorflow/python/debug/cli/tensor_format_test.py b/tensorflow/python/debug/cli/tensor_format_test.py
index d3beb5f7bc8538a36437d1a322904cd141210985..18ddbb6437cf463afa2cc4e8f02cb592e016049a 100644
--- a/tensorflow/python/debug/cli/tensor_format_test.py
+++ b/tensorflow/python/debug/cli/tensor_format_test.py
@@ -17,12 +17,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import re
+
 import numpy as np
-from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.core.framework import tensor_pb2
 from tensorflow.core.framework import tensor_shape_pb2
 from tensorflow.core.framework import types_pb2
+from tensorflow.python.debug.cli import cli_test_utils
 from tensorflow.python.debug.cli import tensor_format
 from tensorflow.python.debug.lib import debug_data
 from tensorflow.python.framework import test_util
@@ -40,21 +42,109 @@ class RichTextLinesTest(test_util.TensorFlowTestCase):
         {"dtype": tensor.dtype, "shape": tensor.shape},
         annotations["tensor_metadata"])
 
-  def _checkBeginIndices(self, expected_indices, annot):
-    self.assertEqual({tensor_format.BEGIN_INDICES_KEY: expected_indices},
-                     annot)
-
-  def _checkOmittedIndices(self, expected_indices, annot):
-    self.assertEqual({tensor_format.OMITTED_INDICES_KEY: expected_indices},
-                     annot)
+  # Regular expression for text representation of float numbers, possibly in
+  # engineering notation.
+  _ELEMENT_REGEX = re.compile(
+      r"([+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?|nan|inf|-inf)")
+
+  def _checkBeginIndicesAnnotations(self, out, a):
+    """Check the beginning-index annotations of an ndarray representation.
+
+    Args:
+      out: An instance of RichTextLines representing a numpy.ndarray.
+      a: The numpy.ndarray being represented.
+
+    Raises:
+      ValueError: if any ellipses ("...") are found in the lines representing
+        the array.
+    """
+    begin_line_num = 0
+    while not out.lines[begin_line_num].startswith("array"):
+      begin_line_num += 1
+    element_index = 0
+    for line_num in range(begin_line_num, len(out.lines)):
+      line = out.lines[line_num]
+      if "..." in line:
+        raise ValueError("Unexpected found ellipses in line representing array")
+      matches = re.finditer(self._ELEMENT_REGEX, line)
+      for line_item_index, _ in enumerate(matches):
+        subscripts = list(np.unravel_index(element_index, a.shape))
+        if line_item_index == 0:
+          self.assertEqual({tensor_format.BEGIN_INDICES_KEY: subscripts},
+                           out.annotations[line_num])
+        element_index += 1
+    self.assertEqual(element_index, np.size(a))
+
+  def _checkTensorElementLocations(self, out, a):
+    """Check the results of locate_tensor_element on an ndarray representation.
+
+    that represents a numpy.ndaray.
+
+    Args:
+      out: An instance of RichTextLines representing a numpy.ndarray.
+      a: The numpy.ndarray being represented.
+
+    Raises:
+      ValueError: if any ellipses ("...") are found in the lines representing
+        the array.
+    """
+    # First, locate the beginning of the tensor value section.
+    begin_line_num = 0
+    while not out.lines[begin_line_num].startswith("array"):
+      begin_line_num += 1
+    # Second, find all matches to tensor-value regex.
+    element_index = 0
+    for line_num in range(begin_line_num, len(out.lines)):
+      line = out.lines[line_num]
+      if "..." in line:
+        raise ValueError("Unexpected found ellipses in line representing array")
+      matches = re.finditer(self._ELEMENT_REGEX, line)
+      for match in matches:
+        subscripts = list(np.unravel_index(element_index, a.shape))
+        is_omitted, row, start_col, end_col = (
+            tensor_format.locate_tensor_element(out, subscripts))
+        self.assertFalse(is_omitted)
+        self.assertEqual(line_num, row)
+        self.assertEqual(match.start(), start_col)
+        self.assertEqual(match.end(), end_col)
+        element_index += 1
+    self.assertEqual(element_index, np.size(a))
+
+  def _findFirst(self, lines, string):
+    """Find first occurrence of a string in a list of strings."""
+    for i, line in enumerate(lines):
+      find_index = line.find(string)
+      if find_index >= 0:
+        return i, find_index
+
+  def _extractBoldNumbers(self, out, start_line):
+    """Extract all numbers that have the bold font attribute.
+
+    Args:
+      out: An instance of RichTextLines.
+      start_line: 0-based index to start from.
+
+    Returns:
+      A list of floats.
+    """
+    floats = []
+    for i in range(start_line, len(out.lines)):
+      if i not in out.font_attr_segs:
+        continue
+      line_attrs = out.font_attr_segs[i]
+      for begin, end, attr_value in line_attrs:
+        if attr_value == "bold":
+          floats.append(float(out.lines[i][begin:end]))
+    return floats
 
   def testFormatZeroDimensionTensor(self):
-    a = np.array(42.0, dtype=np.float32)
+    a = np.array(42, dtype=np.int32)
 
     out = tensor_format.format_tensor(a, "a")
 
-    self.assertEqual(["Tensor \"a\":", "", "array(42.0, dtype=float32)"],
-                     out.lines)
+    cli_test_utils.assert_lines_equal_ignoring_whitespace(
+        self, ["Tensor \"a\":", ""], out.lines[:2])
+    self.assertTrue(out.lines[2].startswith("array(42"))
     self._checkTensorMetadata(a, out.annotations)
 
   def testFormatTensorHighlightsTensorNameWithoutDebugOp(self):
@@ -81,82 +171,51 @@ class RichTextLinesTest(test_util.TensorFlowTestCase):
     out = tensor_format.format_tensor(
         a, "a", np_printoptions={"linewidth": 40})
 
-    self.assertEqual([
-        "Tensor \"a\":",
-        "",
-        "array([ 0.,  0.,  0.,  0.,  0.,  0.,",
-        "        0.,  0.,  0.,  0.,  0.,  0.,",
-        "        0.,  0.,  0.,  0.,  0.,  0.,",
-        "        0.,  0.])",
-    ], out.lines)
+    cli_test_utils.assert_lines_equal_ignoring_whitespace(
+        self, ["Tensor \"a\":", ""], out.lines[:2])
+    self.assertEqual(repr(a).split("\n"), out.lines[2:])
 
     self._checkTensorMetadata(a, out.annotations)
 
     # Check annotations for beginning indices of the lines.
-    self._checkBeginIndices([0], out.annotations[2])
-    self._checkBeginIndices([6], out.annotations[3])
-    self._checkBeginIndices([12], out.annotations[4])
-    self._checkBeginIndices([18], out.annotations[5])
+    self._checkBeginIndicesAnnotations(out, a)
 
   def testFormatTensor2DNoEllipsisNoRowBreak(self):
     a = np.linspace(0.0, 1.0 - 1.0 / 16.0, 16).reshape([4, 4])
 
     out = tensor_format.format_tensor(a, "a")
 
-    self.assertEqual([
-        "Tensor \"a\":",
-        "",
-        "array([[ 0.    ,  0.0625,  0.125 ,  0.1875],",
-        "       [ 0.25  ,  0.3125,  0.375 ,  0.4375],",
-        "       [ 0.5   ,  0.5625,  0.625 ,  0.6875],",
-        "       [ 0.75  ,  0.8125,  0.875 ,  0.9375]])",
-    ], out.lines)
+    cli_test_utils.assert_lines_equal_ignoring_whitespace(
+        self, ["Tensor \"a\":", ""], out.lines[:2])
+    self.assertEqual(repr(a).split("\n"), out.lines[2:])
 
     self._checkTensorMetadata(a, out.annotations)
-
-    # Check annotations for the beginning indices of the lines.
-    for i in xrange(2, 6):
-      self._checkBeginIndices([i  - 2, 0], out.annotations[i])
+    self._checkBeginIndicesAnnotations(out, a)
 
   def testFormatTensorSuppressingTensorName(self):
     a = np.linspace(0.0, 1.0 - 1.0 / 16.0, 16).reshape([4, 4])
 
     out = tensor_format.format_tensor(a, None)
-
-    self.assertEqual([
-        "array([[ 0.    ,  0.0625,  0.125 ,  0.1875],",
-        "       [ 0.25  ,  0.3125,  0.375 ,  0.4375],",
-        "       [ 0.5   ,  0.5625,  0.625 ,  0.6875],",
-        "       [ 0.75  ,  0.8125,  0.875 ,  0.9375]])",
-    ], out.lines)
+    self.assertEqual(repr(a).split("\n"), out.lines)
 
     self._checkTensorMetadata(a, out.annotations)
-
-    # Check annotations for the beginning indices of the lines.
-    for i in xrange(4):
-      self._checkBeginIndices([i, 0], out.annotations[i])
+    self._checkBeginIndicesAnnotations(out, a)
 
   def testFormatTensorWithMetadata(self):
     a = np.linspace(0.0, 1.0 - 1.0 / 16.0, 16).reshape([4, 4])
 
     out = tensor_format.format_tensor(a, "a", include_metadata=True)
 
-    self.assertEqual([
-        "Tensor \"a\":",
-        "  dtype: float64",
-        "  shape: (4, 4)",
-        "",
-        "array([[ 0.    ,  0.0625,  0.125 ,  0.1875],",
-        "       [ 0.25  ,  0.3125,  0.375 ,  0.4375],",
-        "       [ 0.5   ,  0.5625,  0.625 ,  0.6875],",
-        "       [ 0.75  ,  0.8125,  0.875 ,  0.9375]])",
-    ], out.lines)
+    cli_test_utils.assert_lines_equal_ignoring_whitespace(
+        self,
+        ["Tensor \"a\":",
+         "  dtype: float64",
+         "  shape: (4, 4)",
+         ""], out.lines[:4])
+    self.assertEqual(repr(a).split("\n"), out.lines[4:])
 
     self._checkTensorMetadata(a, out.annotations)
-
-    # Check annotations for the beginning indices of the lines.
-    for i in xrange(4, 7):
-      self._checkBeginIndices([i  - 4, 0], out.annotations[i])
+    self._checkBeginIndicesAnnotations(out, a)
 
   def testFormatTensor2DNoEllipsisWithRowBreak(self):
     a = np.linspace(0.0, 1.0 - 1.0 / 40.0, 40).reshape([2, 20])
@@ -168,58 +227,26 @@ class RichTextLinesTest(test_util.TensorFlowTestCase):
         {"dtype": a.dtype, "shape": a.shape},
         out.annotations["tensor_metadata"])
 
-    self.assertEqual([
-        "Tensor \"a\":",
-        "",
-        "array([[ 0.   ,  0.025,  0.05 ,  0.075,  0.1  ,",
-        "         0.125,  0.15 ,  0.175,  0.2  ,  0.225,",
-        "         0.25 ,  0.275,  0.3  ,  0.325,  0.35 ,",
-        "         0.375,  0.4  ,  0.425,  0.45 ,  0.475],",
-        "       [ 0.5  ,  0.525,  0.55 ,  0.575,  0.6  ,",
-        "         0.625,  0.65 ,  0.675,  0.7  ,  0.725,",
-        "         0.75 ,  0.775,  0.8  ,  0.825,  0.85 ,",
-        "         0.875,  0.9  ,  0.925,  0.95 ,  0.975]])",
-    ], out.lines)
+    cli_test_utils.assert_lines_equal_ignoring_whitespace(
+        self, ["Tensor \"a\":", ""], out.lines[:2])
+    self.assertEqual(repr(a).split("\n"), out.lines[2:])
 
     self._checkTensorMetadata(a, out.annotations)
 
     # Check annotations for the beginning indices of the lines.
-    self._checkBeginIndices([0, 0], out.annotations[2])
-    self._checkBeginIndices([0, 5], out.annotations[3])
-    self._checkBeginIndices([0, 10], out.annotations[4])
-    self._checkBeginIndices([0, 15], out.annotations[5])
-    self._checkBeginIndices([1, 0], out.annotations[6])
-    self._checkBeginIndices([1, 5], out.annotations[7])
-    self._checkBeginIndices([1, 10], out.annotations[8])
-    self._checkBeginIndices([1, 15], out.annotations[9])
-
-  def testFormatTensor3DNoEllipsis(self):  # TODO(cais): Test name.
+    self._checkBeginIndicesAnnotations(out, a)
+
+  def testFormatTensor3DNoEllipsis(self):
     a = np.linspace(0.0, 1.0 - 1.0 / 24.0, 24).reshape([2, 3, 4])
 
     out = tensor_format.format_tensor(a, "a")
 
-    self.assertEqual([
-        "Tensor \"a\":",
-        "",
-        "array([[[ 0.        ,  0.04166667,  0.08333333,  0.125     ],",
-        "        [ 0.16666667,  0.20833333,  0.25      ,  0.29166667],",
-        "        [ 0.33333333,  0.375     ,  0.41666667,  0.45833333]],",
-        "",
-        "       [[ 0.5       ,  0.54166667,  0.58333333,  0.625     ],",
-        "        [ 0.66666667,  0.70833333,  0.75      ,  0.79166667],",
-        "        [ 0.83333333,  0.875     ,  0.91666667,  0.95833333]]])",
-    ], out.lines)
+    cli_test_utils.assert_lines_equal_ignoring_whitespace(
+        self, ["Tensor \"a\":", ""], out.lines[:2])
+    self.assertEqual(repr(a).split("\n"), out.lines[2:])
 
     self._checkTensorMetadata(a, out.annotations)
-
-    # Check annotations for beginning indices of the lines.
-    self._checkBeginIndices([0, 0, 0], out.annotations[2])
-    self._checkBeginIndices([0, 1, 0], out.annotations[3])
-    self._checkBeginIndices([0, 2, 0], out.annotations[4])
-    self.assertNotIn(5, out.annotations)
-    self._checkBeginIndices([1, 0, 0], out.annotations[6])
-    self._checkBeginIndices([1, 1, 0], out.annotations[7])
-    self._checkBeginIndices([1, 2, 0], out.annotations[8])
+    self._checkBeginIndicesAnnotations(out, a)
 
   def testFormatTensor3DNoEllipsisWithArgwhereHighlightWithMatches(self):
     a = np.linspace(0.0, 1.0 - 1.0 / 24.0, 24).reshape([2, 3, 4])
@@ -235,39 +262,22 @@ class RichTextLinesTest(test_util.TensorFlowTestCase):
     out = tensor_format.format_tensor(
         a, "a", highlight_options=highlight_options)
 
-    self.assertEqual([
-        "Tensor \"a\": "
-        "Highlighted(between 0.26 and 0.5): 5 of 24 element(s) (20.83%)",
-        "",
-        "array([[[ 0.        ,  0.04166667,  0.08333333,  0.125     ],",
-        "        [ 0.16666667,  0.20833333,  0.25      ,  0.29166667],",
-        "        [ 0.33333333,  0.375     ,  0.41666667,  0.45833333]],",
-        "",
-        "       [[ 0.5       ,  0.54166667,  0.58333333,  0.625     ],",
-        "        [ 0.66666667,  0.70833333,  0.75      ,  0.79166667],",
-        "        [ 0.83333333,  0.875     ,  0.91666667,  0.95833333]]])",
-    ], out.lines)
+    cli_test_utils.assert_lines_equal_ignoring_whitespace(
+        self,
+        ["Tensor \"a\": "
+         "Highlighted(between 0.26 and 0.5): 5 of 24 element(s) (20.83%)",
+         ""],
+        out.lines[:2])
+    self.assertEqual(repr(a).split("\n"), out.lines[2:])
 
     self._checkTensorMetadata(a, out.annotations)
 
     # Check annotations for beginning indices of the lines.
-    self._checkBeginIndices([0, 0, 0], out.annotations[2])
-    self._checkBeginIndices([0, 1, 0], out.annotations[3])
-    self._checkBeginIndices([0, 2, 0], out.annotations[4])
-    self.assertNotIn(5, out.annotations)
-    self._checkBeginIndices([1, 0, 0], out.annotations[6])
-    self._checkBeginIndices([1, 1, 0], out.annotations[7])
-    self._checkBeginIndices([1, 2, 0], out.annotations[8])
+    self._checkBeginIndicesAnnotations(out, a)
 
-    # Check font attribute segments for highlighted elements.
-    self.assertNotIn(2, out.font_attr_segs)
-    self.assertEqual([(49, 59, "bold")], out.font_attr_segs[3])
-    self.assertEqual([(10, 20, "bold"), (23, 28, "bold"), (36, 46, "bold"),
-                      (49, 59, "bold")], out.font_attr_segs[4])
-    self.assertNotIn(5, out.font_attr_segs)
-    self.assertNotIn(6, out.font_attr_segs)
-    self.assertNotIn(7, out.font_attr_segs)
-    self.assertNotIn(8, out.font_attr_segs)
+    self.assertAllClose(
+        [0.29166667, 0.33333333, 0.375, 0.41666667, 0.45833333],
+        self._extractBoldNumbers(out, 2))
 
   def testFormatTensor3DNoEllipsisWithArgwhereHighlightWithNoMatches(self):
     a = np.linspace(0.0, 1.0 - 1.0 / 24.0, 24).reshape([2, 3, 4])
@@ -279,93 +289,54 @@ class RichTextLinesTest(test_util.TensorFlowTestCase):
     out = tensor_format.format_tensor(
         a, "a", highlight_options=highlight_options)
 
-    self.assertEqual([
-        "Tensor \"a\": Highlighted: 0 of 24 element(s) (0.00%)", "",
-        "array([[[ 0.        ,  0.04166667,  0.08333333,  0.125     ],",
-        "        [ 0.16666667,  0.20833333,  0.25      ,  0.29166667],",
-        "        [ 0.33333333,  0.375     ,  0.41666667,  0.45833333]],", "",
-        "       [[ 0.5       ,  0.54166667,  0.58333333,  0.625     ],",
-        "        [ 0.66666667,  0.70833333,  0.75      ,  0.79166667],",
-        "        [ 0.83333333,  0.875     ,  0.91666667,  0.95833333]]])"
-    ], out.lines)
+    cli_test_utils.assert_lines_equal_ignoring_whitespace(
+        self,
+        ["Tensor \"a\": Highlighted: 0 of 24 element(s) (0.00%)", ""],
+        out.lines[:2])
+    self.assertEqual(repr(a).split("\n"), out.lines[2:])
 
     self._checkTensorMetadata(a, out.annotations)
-
-    # Check annotations for beginning indices of the lines.
-    self._checkBeginIndices([0, 0, 0], out.annotations[2])
-    self._checkBeginIndices([0, 1, 0], out.annotations[3])
-    self._checkBeginIndices([0, 2, 0], out.annotations[4])
-    self.assertNotIn(5, out.annotations)
-    self._checkBeginIndices([1, 0, 0], out.annotations[6])
-    self._checkBeginIndices([1, 1, 0], out.annotations[7])
-    self._checkBeginIndices([1, 2, 0], out.annotations[8])
+    self._checkBeginIndicesAnnotations(out, a)
 
     # Check font attribute segments for highlighted elements.
-    self.assertNotIn(2, out.font_attr_segs)
-    self.assertNotIn(3, out.font_attr_segs)
-    self.assertNotIn(4, out.font_attr_segs)
-    self.assertNotIn(5, out.font_attr_segs)
-    self.assertNotIn(6, out.font_attr_segs)
-    self.assertNotIn(7, out.font_attr_segs)
-    self.assertNotIn(8, out.font_attr_segs)
+    for i in range(2, len(out.lines)):
+      self.assertNotIn(i, out.font_attr_segs)
 
   def testFormatTensorWithEllipses(self):
-    a = np.zeros([11, 11, 11])
+    a = (np.arange(11 * 11 * 11) + 1000).reshape([11, 11, 11]).astype(np.int32)
 
     out = tensor_format.format_tensor(
         a, "a", False, np_printoptions={"threshold": 100, "edgeitems": 2})
 
-    self.assertEqual([
-        "Tensor \"a\":",
-        "",
-        "array([[[ 0.,  0., ...,  0.,  0.],",
-        "        [ 0.,  0., ...,  0.,  0.],",
-        "        ..., ",
-        "        [ 0.,  0., ...,  0.,  0.],",
-        "        [ 0.,  0., ...,  0.,  0.]],",
-        "",
-        "       [[ 0.,  0., ...,  0.,  0.],",
-        "        [ 0.,  0., ...,  0.,  0.],",
-        "        ..., ",
-        "        [ 0.,  0., ...,  0.,  0.],",
-        "        [ 0.,  0., ...,  0.,  0.]],",
-        "",
-        "       ..., ",
-        "       [[ 0.,  0., ...,  0.,  0.],",
-        "        [ 0.,  0., ...,  0.,  0.],",
-        "        ..., ",
-        "        [ 0.,  0., ...,  0.,  0.],",
-        "        [ 0.,  0., ...,  0.,  0.]],",
-        "",
-        "       [[ 0.,  0., ...,  0.,  0.],",
-        "        [ 0.,  0., ...,  0.,  0.],",
-        "        ..., ",
-        "        [ 0.,  0., ...,  0.,  0.],",
-        "        [ 0.,  0., ...,  0.,  0.]]])",
-    ], out.lines)
+    cli_test_utils.assert_lines_equal_ignoring_whitespace(
+        self, ["Tensor \"a\":", ""], out.lines[:2])
+    self.assertEqual(repr(a).split("\n"), out.lines[2:])
 
     self._checkTensorMetadata(a, out.annotations)
 
     # Check annotations for beginning indices of the lines.
-    for i in xrange(2):
-      self._checkBeginIndices([i, 0, 0], out.annotations[i * 6 + 2])
-      self._checkBeginIndices([i, 1, 0], out.annotations[i * 6 + 3])
-      self._checkOmittedIndices([i, 2, 0], out.annotations[i * 6 + 4])
-      self._checkBeginIndices([i, 9, 0], out.annotations[i * 6 + 5])
-      self._checkBeginIndices([i, 10, 0], out.annotations[i * 6 + 6])
-      self.assertNotIn(i * 6 + 7, out.annotations)
-
-    p = 15
-    for i in xrange(2):
-      self._checkBeginIndices([9 + i, 0, 0], out.annotations[p + i * 6])
-      self._checkBeginIndices([9 + i, 1, 0], out.annotations[p + i * 6 + 1])
-      self._checkOmittedIndices(
-          [9 + i, 2, 0], out.annotations[p + i * 6 + 2])
-      self._checkBeginIndices([9 + i, 9, 0], out.annotations[p + i * 6 + 3])
-      self._checkBeginIndices([9 + i, 10, 0], out.annotations[p + i * 6 + 4])
-
-      if i < 1:
-        self.assertNotIn(p + i * 6 + 5, out.annotations)
+    actual_row_0_0_0, _ = self._findFirst(out.lines, "1000")
+    self.assertEqual({tensor_format.BEGIN_INDICES_KEY: [0, 0, 0]},
+                     out.annotations[actual_row_0_0_0])
+    actual_row_0_1_0, _ = self._findFirst(out.lines, "1011")
+    self.assertEqual({tensor_format.BEGIN_INDICES_KEY: [0, 1, 0]},
+                     out.annotations[actual_row_0_1_0])
+    # Find the first line that is completely omitted.
+    omitted_line = 2
+    while not out.lines[omitted_line].strip().startswith("..."):
+      omitted_line += 1
+    self.assertEqual({tensor_format.OMITTED_INDICES_KEY: [0, 2, 0]},
+                     out.annotations[omitted_line])
+
+    actual_row_10_10_0, _ = self._findFirst(out.lines, "2320")
+    self.assertEqual({tensor_format.BEGIN_INDICES_KEY: [10, 10, 0]},
+                     out.annotations[actual_row_10_10_0])
+    # Find the last line that is completely omitted.
+    omitted_line = len(out.lines) - 1
+    while not out.lines[omitted_line].strip().startswith("..."):
+      omitted_line -= 1
+    self.assertEqual({tensor_format.OMITTED_INDICES_KEY: [10, 2, 0]},
+                     out.annotations[omitted_line])
 
   def testFormatUninitializedTensor(self):
     tensor_proto = tensor_pb2.TensorProto(
@@ -396,63 +367,11 @@ class RichTextLinesTest(test_util.TensorFlowTestCase):
     out = tensor_format.format_tensor(
         a, "a", np_printoptions={"linewidth": 40})
 
-    self.assertEqual([
-        "Tensor \"a\":",
-        "",
-        "array([ 0.,  0.,  0.,  0.,  0.,  0.,",
-        "        0.,  0.,  0.,  0.,  0.,  0.,",
-        "        0.,  0.,  0.,  0.,  0.,  0.,",
-        "        0.,  0.])",
-    ], out.lines)
+    cli_test_utils.assert_lines_equal_ignoring_whitespace(
+        self, ["Tensor \"a\":", ""], out.lines[:2])
+    self.assertEqual(repr(a).split("\n"), out.lines[2:])
 
-    is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
-        out, [0])
-    self.assertFalse(is_omitted)
-    self.assertEqual(2, row)
-    self.assertEqual(8, start_col)
-    self.assertEqual(10, end_col)
-
-    is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
-        out, [5])
-    self.assertFalse(is_omitted)
-    self.assertEqual(2, row)
-    self.assertEqual(33, start_col)
-    self.assertEqual(35, end_col)
-
-    is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
-        out, [6])
-    self.assertFalse(is_omitted)
-    self.assertEqual(3, row)
-    self.assertEqual(8, start_col)
-    self.assertEqual(10, end_col)
-
-    is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
-        out, [11])
-    self.assertFalse(is_omitted)
-    self.assertEqual(3, row)
-    self.assertEqual(33, start_col)
-    self.assertEqual(35, end_col)
-
-    is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
-        out, [12])
-    self.assertFalse(is_omitted)
-    self.assertEqual(4, row)
-    self.assertEqual(8, start_col)
-    self.assertEqual(10, end_col)
-
-    is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
-        out, [18])
-    self.assertFalse(is_omitted)
-    self.assertEqual(5, row)
-    self.assertEqual(8, start_col)
-    self.assertEqual(10, end_col)
-
-    is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
-        out, [19])
-    self.assertFalse(is_omitted)
-    self.assertEqual(5, row)
-    self.assertEqual(13, start_col)
-    self.assertEqual(15, end_col)
+    self._checkTensorElementLocations(out, a)
 
     with self.assertRaisesRegexp(
         ValueError, "Indices exceed tensor dimensions"):
@@ -472,49 +391,11 @@ class RichTextLinesTest(test_util.TensorFlowTestCase):
     out = tensor_format.format_tensor(
         a, "a", np_printoptions={"linewidth": 40})
 
-    self.assertEqual([
-        "Tensor \"a\":",
-        "",
-        "array([ 0.,  0.,  0.,  0.,  0.,  0.,",
-        "        0.,  0.,  0.,  0.,  0.,  0.,",
-        "        0.,  0.,  0.,  0.,  0.,  0.,",
-        "        0.,  0.])",
-    ], out.lines)
-
-    (are_omitted, rows, start_cols,
-     end_cols) = tensor_format.locate_tensor_element(out, [[0]])
-    self.assertEqual([False], are_omitted)
-    self.assertEqual([2], rows)
-    self.assertEqual([8], start_cols)
-    self.assertEqual([10], end_cols)
-
-    (are_omitted, rows, start_cols,
-     end_cols) = tensor_format.locate_tensor_element(out, [[0], [5]])
-    self.assertEqual([False, False], are_omitted)
-    self.assertEqual([2, 2], rows)
-    self.assertEqual([8, 33], start_cols)
-    self.assertEqual([10, 35], end_cols)
+    cli_test_utils.assert_lines_equal_ignoring_whitespace(
+        self, ["Tensor \"a\":", ""], out.lines[:2])
+    self.assertEqual(repr(a).split("\n"), out.lines[2:])
 
-    (are_omitted, rows, start_cols,
-     end_cols) = tensor_format.locate_tensor_element(out, [[0], [6]])
-    self.assertEqual([False, False], are_omitted)
-    self.assertEqual([2, 3], rows)
-    self.assertEqual([8, 8], start_cols)
-    self.assertEqual([10, 10], end_cols)
-
-    (are_omitted, rows, start_cols,
-     end_cols) = tensor_format.locate_tensor_element(out, [[0], [5], [6]])
-    self.assertEqual([False, False, False], are_omitted)
-    self.assertEqual([2, 2, 3], rows)
-    self.assertEqual([8, 33, 8], start_cols)
-    self.assertEqual([10, 35, 10], end_cols)
-
-    (are_omitted, rows, start_cols,
-     end_cols) = tensor_format.locate_tensor_element(out, [[0], [5], [6], [19]])
-    self.assertEqual([False, False, False, False], are_omitted)
-    self.assertEqual([2, 2, 3, 5], rows)
-    self.assertEqual([8, 33, 8, 13], start_cols)
-    self.assertEqual([10, 35, 10, 15], end_cols)
+    self._checkTensorElementLocations(out, a)
 
   def testBatchModeWithErrors(self):
     a = np.zeros(20)
@@ -522,14 +403,9 @@ class RichTextLinesTest(test_util.TensorFlowTestCase):
     out = tensor_format.format_tensor(
         a, "a", np_printoptions={"linewidth": 40})
 
-    self.assertEqual([
-        "Tensor \"a\":",
-        "",
-        "array([ 0.,  0.,  0.,  0.,  0.,  0.,",
-        "        0.,  0.,  0.,  0.,  0.,  0.,",
-        "        0.,  0.,  0.,  0.,  0.,  0.,",
-        "        0.,  0.])",
-    ], out.lines)
+    cli_test_utils.assert_lines_equal_ignoring_whitespace(
+        self, ["Tensor \"a\":", ""], out.lines[:2])
+    self.assertEqual(repr(a).split("\n"), out.lines[2:])
 
     with self.assertRaisesRegexp(ValueError, "Dimensions mismatch"):
       tensor_format.locate_tensor_element(out, [[0, 0], [0]])
@@ -554,104 +430,22 @@ class RichTextLinesTest(test_util.TensorFlowTestCase):
     out = tensor_format.format_tensor(
         a, "a", np_printoptions={"linewidth": 100})
 
-    self.assertEqual([
-        "Tensor \"a\":",
-        "",
-        "array([[  1.00000000e-08,   1.00000000e-08,   1.00000000e-08],",
-        "       [             nan,   1.00000000e-08,              inf],",
-        "       [  1.00000000e-08,   1.00000000e-08,   1.00000000e-08]])",
-    ], out.lines)
-
-    is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
-        out, [0, 0])
-    self.assertFalse(is_omitted)
-    self.assertEqual(2, row)
-    self.assertEqual(10, start_col)
-    self.assertEqual(24, end_col)
-
-    is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
-        out, [0, 2])
-    self.assertFalse(is_omitted)
-    self.assertEqual(2, row)
-    self.assertEqual(46, start_col)
-    self.assertEqual(60, end_col)
-
-    is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
-        out, [1, 0])
-    self.assertFalse(is_omitted)
-    self.assertEqual(3, row)
-    self.assertEqual(21, start_col)
-    self.assertEqual(24, end_col)
-
-    is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
-        out, [1, 1])
-    self.assertFalse(is_omitted)
-    self.assertEqual(3, row)
-    self.assertEqual(28, start_col)
-    self.assertEqual(42, end_col)
-
-    is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
-        out, [1, 2])
-    self.assertFalse(is_omitted)
-    self.assertEqual(3, row)
-    self.assertEqual(57, start_col)
-    self.assertEqual(60, end_col)
+    cli_test_utils.assert_lines_equal_ignoring_whitespace(
+        self, ["Tensor \"a\":", ""], out.lines[:2])
+    self.assertEqual(repr(a).split("\n"), out.lines[2:])
 
-    is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
-        out, [2, 2])
-    self.assertFalse(is_omitted)
-    self.assertEqual(4, row)
-    self.assertEqual(46, start_col)
-    self.assertEqual(60, end_col)
+    self._checkTensorElementLocations(out, a)
 
   def testLocateTensorElement2DNoEllipsis(self):
     a = np.linspace(0.0, 1.0 - 1.0 / 16.0, 16).reshape([4, 4])
 
     out = tensor_format.format_tensor(a, "a")
 
-    self.assertEqual([
-        "Tensor \"a\":",
-        "",
-        "array([[ 0.    ,  0.0625,  0.125 ,  0.1875],",
-        "       [ 0.25  ,  0.3125,  0.375 ,  0.4375],",
-        "       [ 0.5   ,  0.5625,  0.625 ,  0.6875],",
-        "       [ 0.75  ,  0.8125,  0.875 ,  0.9375]])",
-    ], out.lines)
+    cli_test_utils.assert_lines_equal_ignoring_whitespace(
+        self, ["Tensor \"a\":", ""], out.lines[:2])
+    self.assertEqual(repr(a).split("\n"), out.lines[2:])
 
-    is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
-        out, [0, 0])
-    self.assertFalse(is_omitted)
-    self.assertEqual(2, row)
-    self.assertEqual(9, start_col)
-    self.assertEqual(11, end_col)
-
-    is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
-        out, [0, 3])
-    self.assertFalse(is_omitted)
-    self.assertEqual(2, row)
-    self.assertEqual(36, start_col)
-    self.assertEqual(42, end_col)
-
-    is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
-        out, [1, 0])
-    self.assertFalse(is_omitted)
-    self.assertEqual(3, row)
-    self.assertEqual(9, start_col)
-    self.assertEqual(13, end_col)
-
-    is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
-        out, [1, 3])
-    self.assertFalse(is_omitted)
-    self.assertEqual(3, row)
-    self.assertEqual(36, start_col)
-    self.assertEqual(42, end_col)
-
-    is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
-        out, [3, 3])
-    self.assertFalse(is_omitted)
-    self.assertEqual(5, row)
-    self.assertEqual(36, start_col)
-    self.assertEqual(42, end_col)
+    self._checkTensorElementLocations(out, a)
 
     with self.assertRaisesRegexp(
         ValueError, "Indices exceed tensor dimensions"):
@@ -670,55 +464,20 @@ class RichTextLinesTest(test_util.TensorFlowTestCase):
 
     out = tensor_format.format_tensor(a, "a", include_numeric_summary=True)
 
-    self.assertEqual([
-        "Tensor \"a\":",
-        "",
-        "Numeric summary:",
-        "|  0  + | total |",
-        "|  1 15 |    16 |",
-        "|           min           max          mean           std |",
-        "|           0.0        0.9375       0.46875 0.28811076429 |",
-        "",
-        "array([[ 0.    ,  0.0625,  0.125 ,  0.1875],",
-        "       [ 0.25  ,  0.3125,  0.375 ,  0.4375],",
-        "       [ 0.5   ,  0.5625,  0.625 ,  0.6875],",
-        "       [ 0.75  ,  0.8125,  0.875 ,  0.9375]])",
-    ], out.lines)
-
-    is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
-        out, [0, 0])
-    self.assertFalse(is_omitted)
-    self.assertEqual(8, row)
-    self.assertEqual(9, start_col)
-    self.assertEqual(11, end_col)
-
-    is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
-        out, [0, 3])
-    self.assertFalse(is_omitted)
-    self.assertEqual(8, row)
-    self.assertEqual(36, start_col)
-    self.assertEqual(42, end_col)
-
-    is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
-        out, [1, 0])
-    self.assertFalse(is_omitted)
-    self.assertEqual(9, row)
-    self.assertEqual(9, start_col)
-    self.assertEqual(13, end_col)
-
-    is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
-        out, [1, 3])
-    self.assertFalse(is_omitted)
-    self.assertEqual(9, row)
-    self.assertEqual(36, start_col)
-    self.assertEqual(42, end_col)
-
-    is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
-        out, [3, 3])
-    self.assertFalse(is_omitted)
-    self.assertEqual(11, row)
-    self.assertEqual(36, start_col)
-    self.assertEqual(42, end_col)
+    cli_test_utils.assert_lines_equal_ignoring_whitespace(
+        self,
+        ["Tensor \"a\":",
+         "",
+         "Numeric summary:",
+         "|  0  + | total |",
+         "|  1 15 |    16 |",
+         "|           min           max          mean           std |"],
+        out.lines[:6])
+    cli_test_utils.assert_array_lines_close(
+        self, [0.0, 0.9375, 0.46875, 0.28811076429], out.lines[6:7])
+    cli_test_utils.assert_array_lines_close(self, a, out.lines[8:])
+
+    self._checkTensorElementLocations(out, a)
 
     with self.assertRaisesRegexp(
         ValueError, "Indices exceed tensor dimensions"):
@@ -733,100 +492,75 @@ class RichTextLinesTest(test_util.TensorFlowTestCase):
       tensor_format.locate_tensor_element(out, [0])
 
   def testLocateTensorElement3DWithEllipses(self):
-    a = np.zeros([11, 11, 11])
+    a = (np.arange(11 * 11 * 11) + 1000).reshape([11, 11, 11]).astype(np.int32)
 
     out = tensor_format.format_tensor(
         a, "a", False, np_printoptions={"threshold": 100, "edgeitems": 2})
 
-    self.assertEqual([
-        "Tensor \"a\":",
-        "",
-        "array([[[ 0.,  0., ...,  0.,  0.],",
-        "        [ 0.,  0., ...,  0.,  0.],",
-        "        ..., ",
-        "        [ 0.,  0., ...,  0.,  0.],",
-        "        [ 0.,  0., ...,  0.,  0.]],",
-        "",
-        "       [[ 0.,  0., ...,  0.,  0.],",
-        "        [ 0.,  0., ...,  0.,  0.],",
-        "        ..., ",
-        "        [ 0.,  0., ...,  0.,  0.],",
-        "        [ 0.,  0., ...,  0.,  0.]],",
-        "",
-        "       ..., ",
-        "       [[ 0.,  0., ...,  0.,  0.],",
-        "        [ 0.,  0., ...,  0.,  0.],",
-        "        ..., ",
-        "        [ 0.,  0., ...,  0.,  0.],",
-        "        [ 0.,  0., ...,  0.,  0.]],",
-        "",
-        "       [[ 0.,  0., ...,  0.,  0.],",
-        "        [ 0.,  0., ...,  0.,  0.],",
-        "        ..., ",
-        "        [ 0.,  0., ...,  0.,  0.],",
-        "        [ 0.,  0., ...,  0.,  0.]]])",
-    ], out.lines)
+    cli_test_utils.assert_lines_equal_ignoring_whitespace(
+        self, ["Tensor \"a\":", ""], out.lines[:2])
 
+    actual_row_0_0_0, actual_col_0_0_0 = self._findFirst(out.lines, "1000")
     is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
         out, [0, 0, 0])
     self.assertFalse(is_omitted)
-    self.assertEqual(2, row)
-    self.assertEqual(10, start_col)
-    self.assertEqual(12, end_col)
+    self.assertEqual(actual_row_0_0_0, row)
+    self.assertEqual(actual_col_0_0_0, start_col)
+    self.assertEqual(actual_col_0_0_0 + 4, end_col)
 
+    actual_row_0_0_10, _ = self._findFirst(out.lines, "1010")
     is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
         out, [0, 0, 10])
     self.assertFalse(is_omitted)
-    self.assertEqual(2, row)
+    self.assertEqual(actual_row_0_0_10, row)
     self.assertIsNone(start_col)  # Passes ellipsis.
     self.assertIsNone(end_col)
 
+    actual_row_0_1_0, actual_col_0_1_0 = self._findFirst(out.lines, "1011")
     is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
         out, [0, 1, 0])
     self.assertFalse(is_omitted)
-    self.assertEqual(3, row)
-    self.assertEqual(10, start_col)
-    self.assertEqual(12, end_col)
+    self.assertEqual(actual_row_0_1_0, row)
+    self.assertEqual(actual_col_0_1_0, start_col)
+    self.assertEqual(actual_col_0_1_0 + 4, end_col)
 
     is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
         out, [0, 2, 0])
     self.assertTrue(is_omitted)  # In omitted line.
-    self.assertEqual(4, row)
     self.assertIsNone(start_col)
     self.assertIsNone(end_col)
 
     is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
         out, [0, 2, 10])
     self.assertTrue(is_omitted)  # In omitted line.
-    self.assertEqual(4, row)
     self.assertIsNone(start_col)
     self.assertIsNone(end_col)
 
     is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
         out, [0, 8, 10])
     self.assertTrue(is_omitted)  # In omitted line.
-    self.assertEqual(4, row)
     self.assertIsNone(start_col)
     self.assertIsNone(end_col)
 
+    actual_row_0_10_1, actual_col_0_10_1 = self._findFirst(out.lines, "1111")
     is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
         out, [0, 10, 1])
     self.assertFalse(is_omitted)
-    self.assertEqual(6, row)
-    self.assertEqual(15, start_col)
-    self.assertEqual(17, end_col)
+    self.assertEqual(actual_row_0_10_1, row)
+    self.assertEqual(actual_col_0_10_1, start_col)
+    self.assertEqual(actual_col_0_10_1 + 4, end_col)
 
     is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
         out, [5, 1, 1])
     self.assertTrue(is_omitted)  # In omitted line.
-    self.assertEqual(14, row)
     self.assertIsNone(start_col)
     self.assertIsNone(end_col)
 
+    actual_row_10_10_10, _ = self._findFirst(out.lines, "2330")
     is_omitted, row, start_col, end_col = tensor_format.locate_tensor_element(
         out, [10, 10, 10])
     self.assertFalse(is_omitted)
-    self.assertEqual(25, row)
+    self.assertEqual(actual_row_10_10_10, row)
     self.assertIsNone(start_col)  # Past ellipsis.
     self.assertIsNone(end_col)
 
@@ -843,71 +577,50 @@ class RichTextLinesTest(test_util.TensorFlowTestCase):
       tensor_format.locate_tensor_element(out, [5, 5])
 
   def testLocateTensorElement3DWithEllipsesBatchMode(self):
-    a = np.zeros([11, 11, 11])
+    a = (np.arange(11 * 11 * 11) + 1000).reshape([11, 11, 11]).astype(np.int32)
 
     out = tensor_format.format_tensor(
         a, "a", False, np_printoptions={"threshold": 100,
                                         "edgeitems": 2})
 
-    self.assertEqual([
-        "Tensor \"a\":",
-        "",
-        "array([[[ 0.,  0., ...,  0.,  0.],",
-        "        [ 0.,  0., ...,  0.,  0.],",
-        "        ..., ",
-        "        [ 0.,  0., ...,  0.,  0.],",
-        "        [ 0.,  0., ...,  0.,  0.]],",
-        "",
-        "       [[ 0.,  0., ...,  0.,  0.],",
-        "        [ 0.,  0., ...,  0.,  0.],",
-        "        ..., ",
-        "        [ 0.,  0., ...,  0.,  0.],",
-        "        [ 0.,  0., ...,  0.,  0.]],",
-        "",
-        "       ..., ",
-        "       [[ 0.,  0., ...,  0.,  0.],",
-        "        [ 0.,  0., ...,  0.,  0.],",
-        "        ..., ",
-        "        [ 0.,  0., ...,  0.,  0.],",
-        "        [ 0.,  0., ...,  0.,  0.]],",
-        "",
-        "       [[ 0.,  0., ...,  0.,  0.],",
-        "        [ 0.,  0., ...,  0.,  0.],",
-        "        ..., ",
-        "        [ 0.,  0., ...,  0.,  0.],",
-        "        [ 0.,  0., ...,  0.,  0.]]])",
-    ], out.lines)
+    cli_test_utils.assert_lines_equal_ignoring_whitespace(
+        self, ["Tensor \"a\":", ""], out.lines[:2])
+    self.assertEqual(repr(a).split("\n"), out.lines[2:])
+
+    actual_row_0_0_0, actual_col_0_0_0 = self._findFirst(out.lines, "1000")
+    actual_row_0_0_10, _ = self._findFirst(out.lines, "1010")
+    actual_row_10_10_10, _ = self._findFirst(out.lines, "2330")
 
     (are_omitted, rows, start_cols,
      end_cols) = tensor_format.locate_tensor_element(out, [[0, 0, 0]])
     self.assertEqual([False], are_omitted)
-    self.assertEqual([2], rows)
-    self.assertEqual([10], start_cols)
-    self.assertEqual([12], end_cols)
+    self.assertEqual([actual_row_0_0_0], rows)
+    self.assertEqual([actual_col_0_0_0], start_cols)
+    self.assertEqual([actual_col_0_0_0 + 4], end_cols)
 
     (are_omitted, rows, start_cols,
      end_cols) = tensor_format.locate_tensor_element(out,
                                                      [[0, 0, 0], [0, 0, 10]])
     self.assertEqual([False, False], are_omitted)
-    self.assertEqual([2, 2], rows)
-    self.assertEqual([10, None], start_cols)
-    self.assertEqual([12, None], end_cols)
+    self.assertEqual([actual_row_0_0_0, actual_row_0_0_10], rows)
+    self.assertEqual([actual_col_0_0_0, None], start_cols)
+    self.assertEqual([actual_col_0_0_0 + 4, None], end_cols)
 
     (are_omitted, rows, start_cols,
      end_cols) = tensor_format.locate_tensor_element(out,
                                                      [[0, 0, 0], [0, 2, 0]])
     self.assertEqual([False, True], are_omitted)
     self.assertEqual([2, 4], rows)
-    self.assertEqual([10, None], start_cols)
-    self.assertEqual([12, None], end_cols)
+    self.assertEqual(2, len(start_cols))
+    self.assertEqual(2, len(end_cols))
 
     (are_omitted, rows, start_cols,
      end_cols) = tensor_format.locate_tensor_element(out,
                                                      [[0, 0, 0], [10, 10, 10]])
     self.assertEqual([False, False], are_omitted)
-    self.assertEqual([2, 25], rows)
-    self.assertEqual([10, None], start_cols)
-    self.assertEqual([12, None], end_cols)
+    self.assertEqual([actual_row_0_0_0, actual_row_10_10_10], rows)
+    self.assertEqual([actual_col_0_0_0, None], start_cols)
+    self.assertEqual([actual_col_0_0_0 + 4, None], end_cols)
 
   def testLocateTensorElementAnnotationsUnavailable(self):
     tensor_proto = tensor_pb2.TensorProto(
@@ -931,41 +644,41 @@ class NumericSummaryTest(test_util.TensorFlowTestCase):
     x = np.array([np.nan, np.nan, -np.inf, np.inf, np.inf, np.inf, -2, -3, -4,
                   0, 1, 2, 2, 2, 2, 0, 0, 0, np.inf, np.inf, np.inf])
     out = tensor_format.numeric_summary(x)
-    self.assertEqual(
-        "|  nan -inf    -    0    + +inf | total |", out.lines[0])
-    self.assertEqual(
-        "|    2    1    3    4    5    6 |    21 |", out.lines[1])
-    self.assertEqual(
-        "|           min           max          mean           std |",
-        out.lines[2])
-    self.assertEqual(
-        "|          -4.0           2.0           0.0 1.95789002075 |",
-        out.lines[3])
+    cli_test_utils.assert_lines_equal_ignoring_whitespace(
+        self,
+        ["|  nan -inf    -    0    + +inf | total |",
+         "|    2    1    3    4    5    6 |    21 |",
+         "|     min     max    mean    std |"], out.lines[:3])
+    cli_test_utils.assert_array_lines_close(
+        self, [-4.0, 2.0, 0.0, 1.95789002075], out.lines[3:4])
 
   def testNumericSummaryOnFloatMissingCategories(self):
     x = np.array([np.nan, np.nan])
     out = tensor_format.numeric_summary(x)
     self.assertEqual(2, len(out.lines))
-    self.assertEqual("| nan | total |", out.lines[0])
-    self.assertEqual("|   2 |     2 |", out.lines[1])
+    cli_test_utils.assert_lines_equal_ignoring_whitespace(
+        self, ["| nan | total |", "|   2 |     2 |"], out.lines[:2])
 
     x = np.array([-np.inf, np.inf, 0, 0, np.inf, np.inf])
     out = tensor_format.numeric_summary(x)
-    self.assertEqual("| -inf    0 +inf | total |", out.lines[0])
-    self.assertEqual("|    1    2    3 |     6 |", out.lines[1])
-    self.assertEqual("|  min  max mean  std |", out.lines[2])
-    self.assertEqual("|  0.0  0.0  0.0  0.0 |", out.lines[3])
+    cli_test_utils.assert_lines_equal_ignoring_whitespace(
+        self,
+        ["| -inf    0 +inf | total |",
+         "|    1    2    3 |     6 |",
+         "|  min  max mean  std |"], out.lines[:3])
+    cli_test_utils.assert_array_lines_close(
+        self, [0.0, 0.0, 0.0, 0.0], out.lines[3:4])
 
     x = np.array([-120, 120, 130])
     out = tensor_format.numeric_summary(x)
-    self.assertEqual("| - + | total |", out.lines[0])
-    self.assertEqual("| 1 2 |     3 |", out.lines[1])
-    self.assertEqual(
-        "|           min           max          mean           std |",
-        out.lines[2])
-    self.assertEqual(
-        "|          -120           130 43.3333333333 115.566238822 |",
-        out.lines[3])
+    cli_test_utils.assert_lines_equal_ignoring_whitespace(
+        self,
+        ["| - + | total |",
+         "| 1 2 |     3 |",
+         "|       min       max     mean      std |"],
+        out.lines[:3])
+    cli_test_utils.assert_array_lines_close(
+        self, [-120, 130, 43.3333333333, 115.566238822], out.lines[3:4])
 
   def testNumericSummaryOnEmptyFloat(self):
     x = np.array([], dtype=np.float32)
@@ -976,33 +689,31 @@ class NumericSummaryTest(test_util.TensorFlowTestCase):
   def testNumericSummaryOnInt(self):
     x = np.array([-3] * 50 + [3] * 200 + [0], dtype=np.int32)
     out = tensor_format.numeric_summary(x)
-    self.assertEqual("|   -   0   + | total |", out.lines[0])
-    self.assertEqual("|  50   1 200 |   251 |", out.lines[1])
-    self.assertEqual(
-        "|           min           max          mean           std |",
-        out.lines[2])
-    self.assertEqual(
-        "|            -3             3 1.79282868526 2.39789673081 |",
-        out.lines[3])
+    cli_test_utils.assert_lines_equal_ignoring_whitespace(
+        self,
+        ["|   -   0   + | total |",
+         "|  50   1 200 |   251 |",
+         "|      min     max    mean     std |"],
+        out.lines[:3])
+    cli_test_utils.assert_array_lines_close(
+        self, [-3, 3, 1.79282868526, 2.39789673081], out.lines[3:4])
 
   def testNumericSummaryOnBool(self):
     x = np.array([False, True, True, False], dtype=np.bool)
     out = tensor_format.numeric_summary(x)
-    self.assertEqual(2, len(out.lines))
-    self.assertEqual("| False  True | total |", out.lines[0])
-    self.assertEqual("|     2     2 |     4 |", out.lines[1])
+    cli_test_utils.assert_lines_equal_ignoring_whitespace(
+        self,
+        ["| False  True | total |", "|     2     2 |     4 |"], out.lines)
 
     x = np.array([True] * 10, dtype=np.bool)
     out = tensor_format.numeric_summary(x)
-    self.assertEqual(2, len(out.lines))
-    self.assertEqual("| True | total |", out.lines[0])
-    self.assertEqual("|   10 |    10 |", out.lines[1])
+    cli_test_utils.assert_lines_equal_ignoring_whitespace(
+        self, ["| True | total |", "|   10 |    10 |"], out.lines)
 
     x = np.array([False] * 10, dtype=np.bool)
     out = tensor_format.numeric_summary(x)
-    self.assertEqual(2, len(out.lines))
-    self.assertEqual("| False | total |", out.lines[0])
-    self.assertEqual("|    10 |    10 |", out.lines[1])
+    cli_test_utils.assert_lines_equal_ignoring_whitespace(
+        self, ["| False | total |", "|    10 |    10 |"], out.lines)
 
     x = np.array([], dtype=np.bool)
     out = tensor_format.numeric_summary(x)
diff --git a/tensorflow/python/debug/examples/examples_test.sh b/tensorflow/python/debug/examples/examples_test.sh
index 25916f1903cd41c7f714fd0eb7bad0329dde8ceb..2df6c0b6a2701022e3fed6648208b9708197bebc 100755
--- a/tensorflow/python/debug/examples/examples_test.sh
+++ b/tensorflow/python/debug/examples/examples_test.sh
@@ -23,6 +23,9 @@
 
 set -e
 
+# Filter out LOG(INFO)
+export TF_CPP_MIN_LOG_LEVEL=1
+
 IS_VIRTUALENV=0
 PYTHON_BIN_PATH=""
 while true; do
diff --git a/tensorflow/python/debug/lib/common.py b/tensorflow/python/debug/lib/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..19a0d8c5010021c02de26f5b401fea10c7563a58
--- /dev/null
+++ b/tensorflow/python/debug/lib/common.py
@@ -0,0 +1,87 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Common values and methods for TensorFlow Debugger."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import json
+
+GRPC_URL_PREFIX = "grpc://"
+
+# A key for a Session.run() call.
+RunKey = collections.namedtuple("RunKey", ["feed_names", "fetch_names"])
+
+
+def get_graph_element_name(elem):
+  """Obtain the name or string representation of a graph element.
+
+  If the graph element has the attribute "name", return name. Otherwise, return
+  a __str__ representation of the graph element. Certain graph elements, such as
+  `SparseTensor`s, do not have the attribute "name".
+
+  Args:
+    elem: The graph element in question.
+
+  Returns:
+    If the attribute 'name' is available, return the name. Otherwise, return
+    str(fetch).
+  """
+
+  return elem.name if hasattr(elem, "name") else str(elem)
+
+
+def get_flattened_names(feeds_or_fetches):
+  """Get a flattened list of the names in run() call feeds or fetches.
+
+  Args:
+    feeds_or_fetches: Feeds or fetches of the `Session.run()` call. It maybe
+      a Tensor, an Operation or a Variable. It may also be nested lists, tuples
+      or dicts. See doc of `Session.run()` for more details.
+
+  Returns:
+    (list of str) A flattened list of fetch names from `feeds_or_fetches`.
+  """
+
+  lines = []
+  if isinstance(feeds_or_fetches, (list, tuple)):
+    for item in feeds_or_fetches:
+      lines.extend(get_flattened_names(item))
+  elif isinstance(feeds_or_fetches, dict):
+    for key in feeds_or_fetches:
+      lines.extend(get_flattened_names(feeds_or_fetches[key]))
+  else:
+    # This ought to be a Tensor, an Operation or a Variable, for which the name
+    # attribute should be available. (Bottom-out condition of the recursion.)
+    lines.append(get_graph_element_name(feeds_or_fetches))
+
+  return lines
+
+
+def get_run_key(feed_dict, fetches):
+  """Summarize the names of feeds and fetches as a RunKey JSON string.
+
+  Args:
+    feed_dict: The feed_dict given to the `Session.run()` call.
+    fetches: The fetches from the `Session.run()` call.
+
+  Returns:
+    A JSON Array consisting of two items. They first items is a flattened
+    Array of the names of the feeds. The second item is a flattened Array of
+    the names of the fetches.
+  """
+  return json.dumps(RunKey(get_flattened_names(feed_dict),
+                           get_flattened_names(fetches)))
diff --git a/tensorflow/python/debug/lib/common_test.py b/tensorflow/python/debug/lib/common_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..5af0dafcf9fd81763b30eb159a3e21ef8b7f9ac9
--- /dev/null
+++ b/tensorflow/python/debug/lib/common_test.py
@@ -0,0 +1,59 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Unit tests for common values and methods of TensorFlow Debugger."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+
+from tensorflow.python.debug.lib import common
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import test_util
+from tensorflow.python.platform import googletest
+
+
+class CommonTest(test_util.TensorFlowTestCase):
+
+  def testOnFeedOneFetch(self):
+    a = constant_op.constant(10.0, name="a")
+    b = constant_op.constant(20.0, name="b")
+    run_key = common.get_run_key({"a": a}, [b])
+    loaded = json.loads(run_key)
+    self.assertItemsEqual(["a:0"], loaded[0])
+    self.assertItemsEqual(["b:0"], loaded[1])
+
+  def testGetRunKeyFlat(self):
+    a = constant_op.constant(10.0, name="a")
+    b = constant_op.constant(20.0, name="b")
+    run_key = common.get_run_key({"a": a}, [a, b])
+    loaded = json.loads(run_key)
+    self.assertItemsEqual(["a:0"], loaded[0])
+    self.assertItemsEqual(["a:0", "b:0"], loaded[1])
+
+  def testGetRunKeyNestedFetches(self):
+    a = constant_op.constant(10.0, name="a")
+    b = constant_op.constant(20.0, name="b")
+    c = constant_op.constant(30.0, name="c")
+    d = constant_op.constant(30.0, name="d")
+    run_key = common.get_run_key(
+        {}, {"set1": [a, b], "set2": {"c": c, "d": d}})
+    loaded = json.loads(run_key)
+    self.assertItemsEqual([], loaded[0])
+    self.assertItemsEqual(["a:0", "b:0", "c:0", "d:0"], loaded[1])
+
+
+if __name__ == "__main__":
+  googletest.main()
diff --git a/tensorflow/python/debug/lib/debug_gradients.py b/tensorflow/python/debug/lib/debug_gradients.py
index b01a58719cb45b3a42052e0f3522f39a7c5c63c5..16f51a4b32f711b97077643cec669bb8970e0b21 100644
--- a/tensorflow/python/debug/lib/debug_gradients.py
+++ b/tensorflow/python/debug/lib/debug_gradients.py
@@ -156,9 +156,12 @@ class GradientsDebugger(object):
     # TODO(cais): Implement value_stack.
     grad_debug_op_name = _tensor_to_grad_debug_op_name(input_tensor, self._uuid)
     # pylint: disable=protected-access
-    debug_grad_identity = gen_array_ops._debug_gradient_identity(
-        input_tensor, name=grad_debug_op_name)
+    identity_op = (gen_array_ops._debug_gradient_ref_identity
+                   if input_tensor.dtype._is_ref_dtype
+                   else gen_array_ops._debug_gradient_identity)
+    debug_grad_identity = identity_op(input_tensor, name=grad_debug_op_name)
     # pylint: enable=protected-access
+    assert debug_grad_identity.dtype == input_tensor.dtype
     if debug_grad_identity.op.name != grad_debug_op_name:
       raise ValueError(
           "The graph already contains an op named %s" % grad_debug_op_name)
@@ -261,32 +264,22 @@ class GradientsDebugger(object):
       The GradientsDebugger instance itself.
     """
     tensor_name_pattern = re.compile(tensor_name_regex)
-
-    # pylint: disable=protected-access
     with graph.as_default():
       for op in graph.get_operations():
         for output in op.outputs:
           if tensor_name_pattern.match(output.name):
             debug_op = self.identify_gradient(output)
 
-            for consumer in output.consumers():
+            # Make a copy of output.consumers() since we'll modify the consumers
+            # TODO(skyewm): this is unnecessary once the C API is enabled
+            for consumer in list(output.consumers()):
               if consumer == debug_op.op:
                 continue
 
               # Locate the slot index of the original input.
-              input_slots = []
-              for i, consumer_input in enumerate(consumer._inputs):
+              for i, consumer_input in enumerate(consumer.inputs):
                 if consumer_input == output:
-                  input_slots.append(i)
-
-              for slot in input_slots:
-                consumer._inputs[slot] = debug_op
-                debug_op._consumers.append(consumer)
-
-            del output._consumers[:]
-            output._consumers.append(debug_op.op)
-    # pylint: enable=protected-access
-
+                  consumer._update_input(i, debug_op)  # pylint: disable=protected-access
     return self
 
   def _check_same_graph(self, tensor):
@@ -369,6 +362,12 @@ def _identify_gradient_grad(op, dy):
   return dy
 
 
+@ops.RegisterGradient("DebugGradientRefIdentity")
+def _identify_gradient_grad_ref(op, dy):
+  """Gradient function for the DebugIdentity op."""
+  return _identify_gradient_grad(op, dy)
+
+
 def gradient_values_from_dump(grad_debugger, x_tensor, dump):
   """Find gradient values from a `DebugDumpDir` object.
 
diff --git a/tensorflow/python/debug/lib/debug_gradients_test.py b/tensorflow/python/debug/lib/debug_gradients_test.py
index 966578320e22caba28344248cbc0562fdc3dfee2..6fd89e018aa3b2a21dad4b56a4aa1a5b01a1d69d 100644
--- a/tensorflow/python/debug/lib/debug_gradients_test.py
+++ b/tensorflow/python/debug/lib/debug_gradients_test.py
@@ -343,7 +343,9 @@ class IdentifyGradientTest(test_util.TensorFlowTestCase):
         self.sess.graph,
         debug_urls=debug_url)
     run_metadata = config_pb2.RunMetadata()
+    self.assertAllClose(2.0, self.sess.run(self.u))
     self.sess.run(train_op, options=run_options, run_metadata=run_metadata)
+    self.assertAllClose(-1.0, self.sess.run(self.u))
 
     dump = debug_data.DebugDumpDir(
         dump_dir, partition_graphs=run_metadata.partition_graphs)
diff --git a/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py b/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py
index cc1a3805385eb5097dc65738440bdefa28d5d3e3..bd00f738610627a4b3bc7c61476164188a7b460c 100644
--- a/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py
+++ b/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py
@@ -164,7 +164,7 @@ class ReconstructNonDebugGraphTest(test_util.TensorFlowTestCase):
       self._compareOriginalAndReconstructedGraphDefs(sess, loop)
 
   def testReconstructGraphWithGradients(self):
-    with session.Session() as sess:
+    with session.Session(config=self._no_rewrite_session_config()) as sess:
       u = variables.Variable(12.0, name="u")
       v = variables.Variable(30.0, name="v")
       x = constant_op.constant(1.1, name="x")
diff --git a/tensorflow/python/debug/lib/debug_service_pb2_grpc.py b/tensorflow/python/debug/lib/debug_service_pb2_grpc.py
index 98adc3284b94afc8190f7ee4240d7c5fbf37b4b5..16573eab6f0e61c12020c4becb72369c38f05b42 100755
--- a/tensorflow/python/debug/lib/debug_service_pb2_grpc.py
+++ b/tensorflow/python/debug/lib/debug_service_pb2_grpc.py
@@ -23,6 +23,7 @@ from __future__ import print_function
 import grpc
 
 from tensorflow.core.debug import debug_service_pb2 as tensorflow_dot_core_dot_debug_dot_debug__service__pb2
+from tensorflow.core.protobuf import debug_pb2 as tensorflow_dot_core_dot_protobuf_dot_debug__pb2
 from tensorflow.core.util import event_pb2 as tensorflow_dot_core_dot_util_dot_event__pb2
 
 
@@ -42,6 +43,16 @@ class EventListenerStub(object):
         request_serializer=tensorflow_dot_core_dot_util_dot_event__pb2.Event.SerializeToString,
         response_deserializer=tensorflow_dot_core_dot_debug_dot_debug__service__pb2.EventReply.FromString,
         )
+    self.SendTracebacks = channel.unary_unary(
+        '/tensorflow.EventListener/SendTracebacks',
+        request_serializer=tensorflow_dot_core_dot_debug_dot_debug__service__pb2.CallTraceback.SerializeToString,
+        response_deserializer=tensorflow_dot_core_dot_debug_dot_debug__service__pb2.EventReply.FromString,
+        )
+    self.SendSourceFiles = channel.unary_unary(
+        '/tensorflow.EventListener/SendSourceFiles',
+        request_serializer=tensorflow_dot_core_dot_protobuf_dot_debug__pb2.DebuggedSourceFiles.SerializeToString,
+        response_deserializer=tensorflow_dot_core_dot_debug_dot_debug__service__pb2.EventReply.FromString,
+        )
 
 
 class EventListenerServicer(object):
@@ -62,6 +73,20 @@ class EventListenerServicer(object):
     context.set_details('Method not implemented!')
     raise NotImplementedError('Method not implemented!')
 
+  def SendTracebacks(self, request, context):
+    """Send the tracebacks of ops in a Python graph definition.
+    """
+    context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+    context.set_details('Method not implemented!')
+    raise NotImplementedError('Method not implemented!')
+
+  def SendSourceFiles(self, request, context):
+    """Send a collection of source code files being debugged.
+    """
+    context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+    context.set_details('Method not implemented!')
+    raise NotImplementedError('Method not implemented!')
+
 
 def add_EventListenerServicer_to_server(servicer, server):
   rpc_method_handlers = {
@@ -70,6 +95,16 @@ def add_EventListenerServicer_to_server(servicer, server):
           request_deserializer=tensorflow_dot_core_dot_util_dot_event__pb2.Event.FromString,
           response_serializer=tensorflow_dot_core_dot_debug_dot_debug__service__pb2.EventReply.SerializeToString,
       ),
+      'SendTracebacks': grpc.unary_unary_rpc_method_handler(
+          servicer.SendTracebacks,
+          request_deserializer=tensorflow_dot_core_dot_debug_dot_debug__service__pb2.CallTraceback.FromString,
+          response_serializer=tensorflow_dot_core_dot_debug_dot_debug__service__pb2.EventReply.SerializeToString,
+      ),
+      'SendSourceFiles': grpc.unary_unary_rpc_method_handler(
+          servicer.SendSourceFiles,
+          request_deserializer=tensorflow_dot_core_dot_protobuf_dot_debug__pb2.DebuggedSourceFiles.FromString,
+          response_serializer=tensorflow_dot_core_dot_debug_dot_debug__service__pb2.EventReply.SerializeToString,
+      ),
   }
   generic_handler = grpc.method_handlers_generic_handler(
       'tensorflow.EventListener', rpc_method_handlers)
diff --git a/tensorflow/python/debug/lib/grpc_debug_server.py b/tensorflow/python/debug/lib/grpc_debug_server.py
index 5ab910fb0c9d89bc31a15ecbec48516f07a02979..1b559f1f27538364d8e12339d321e41d33c52590 100644
--- a/tensorflow/python/debug/lib/grpc_debug_server.py
+++ b/tensorflow/python/debug/lib/grpc_debug_server.py
@@ -458,3 +458,36 @@ class EventListenerBaseServicer(debug_service_pb2_grpc.EventListenerServicer):
         `debug_op` as a `str`.
     """
     return list(self._gated_grpc_debug_watches)
+
+  def SendTracebacks(self, request, context):
+    """Base implementation of the handling of SendTracebacks calls.
+
+    The base implementation does nothing with the incoming request.
+    Override in an implementation of the server if necessary.
+
+    Args:
+      request: A `CallTraceback` proto, containing information about the
+        type (e.g., graph vs. eager execution) and source-code traceback of the
+        call and (any) associated `tf.Graph`s.
+      context: Server context.
+
+    Returns:
+      A `EventReply` proto.
+    """
+    return debug_service_pb2.EventReply()
+
+  def SendSourceFiles(self, request, context):
+    """Base implementation of the handling of SendSourceFiles calls.
+
+    The base implementation does nothing with the incoming request.
+    Override in an implementation of the server if necessary.
+
+    Args:
+      request: A `DebuggedSourceFiles` proto, containing the path, content, size
+        and last-modified timestamp of source files.
+      context: Server context.
+
+    Returns:
+      A `EventReply` proto.
+    """
+    return debug_service_pb2.EventReply()
diff --git a/tensorflow/python/debug/lib/grpc_debug_test_server.py b/tensorflow/python/debug/lib/grpc_debug_test_server.py
index 76e45c0bedbb463c872bfca466c6991c9d459e49..917004694845c752d1f6bf88cc2a203eb8f9ba73 100644
--- a/tensorflow/python/debug/lib/grpc_debug_test_server.py
+++ b/tensorflow/python/debug/lib/grpc_debug_test_server.py
@@ -238,6 +238,15 @@ class EventListenerTestServicer(grpc_debug_server.EventListenerBaseServicer):
         self, server_port,
         functools.partial(EventListenerTestStreamHandler, dump_dir, self))
 
+    # Members for storing the graph ops traceback and source files.
+    self._call_types = []
+    self._call_keys = []
+    self._origin_stacks = []
+    self._origin_id_to_strings = []
+    self._graph_tracebacks = []
+    self._graph_versions = []
+    self._source_files = None
+
   def _initialize_toggle_watch_state(self, toggle_watches):
     self._toggle_watches = toggle_watches
     self._toggle_watch_state = dict()
@@ -259,6 +268,100 @@ class EventListenerTestServicer(grpc_debug_server.EventListenerBaseServicer):
     self.core_metadata_json_strings = []
     self.partition_graph_defs = []
     self.debug_tensor_values = collections.defaultdict(list)
+    self._call_types = []
+    self._call_keys = []
+    self._origin_stacks = []
+    self._origin_id_to_strings = []
+    self._graph_tracebacks = []
+    self._graph_versions = []
+    self._source_files = None
+
+  def SendTracebacks(self, request, context):
+    self._call_types.append(request.call_type)
+    self._call_keys.append(request.call_key)
+    self._origin_stacks.append(request.origin_stack)
+    self._origin_id_to_strings.append(request.origin_id_to_string)
+    self._graph_tracebacks.append(request.graph_traceback)
+    self._graph_versions.append(request.graph_version)
+    return debug_service_pb2.EventReply()
+
+  def SendSourceFiles(self, request, context):
+    self._source_files = request
+    return debug_service_pb2.EventReply()
+
+  def query_op_traceback(self, op_name):
+    """Query the traceback of an op.
+
+    Args:
+      op_name: Name of the op to query.
+
+    Returns:
+      The traceback of the op, as a list of 3-tuples:
+        (filename, lineno, function_name)
+
+    Raises:
+      ValueError: If the op cannot be found in the tracebacks received by the
+        server so far.
+    """
+    for op_log_proto in self._graph_tracebacks:
+      for log_entry in op_log_proto.log_entries:
+        if log_entry.name == op_name:
+          return self._code_def_to_traceback(log_entry.code_def,
+                                             op_log_proto.id_to_string)
+    raise ValueError(
+        "Op '%s' does not exist in the tracebacks received by the debug "
+        "server." % op_name)
+
+  def query_origin_stack(self):
+    """Query the stack of the origin of the execution call.
+
+    Returns:
+      A `list` of all tracebacks. Each item corresponds to an execution call,
+        i.e., a `SendTracebacks` request. Each item is a `list` of 3-tuples:
+        (filename, lineno, function_name).
+    """
+    ret = []
+    for stack, id_to_string in zip(
+        self._origin_stacks, self._origin_id_to_strings):
+      ret.append(self._code_def_to_traceback(stack, id_to_string))
+    return ret
+
+  def query_call_types(self):
+    return self._call_types
+
+  def query_call_keys(self):
+    return self._call_keys
+
+  def query_graph_versions(self):
+    return self._graph_versions
+
+  def query_source_file_line(self, file_path, lineno):
+    """Query the content of a given line in a source file.
+
+    Args:
+      file_path: Path to the source file.
+      lineno: Line number as an `int`.
+
+    Returns:
+      Content of the line as a string.
+
+    Raises:
+      ValueError: If no source file is found at the given file_path.
+    """
+    if not self._source_files:
+      raise ValueError(
+          "This debug server has not received any source file contents yet.")
+    for source_file_proto in self._source_files.source_files:
+      if source_file_proto.file_path == file_path:
+        return source_file_proto.lines[lineno - 1]
+    raise ValueError(
+        "Source file at path %s has not been received by the debug server",
+        file_path)
+
+  def _code_def_to_traceback(self, code_def, id_to_string):
+    return [(id_to_string[trace.file_id],
+             trace.lineno,
+             id_to_string[trace.function_id]) for trace in code_def.traces]
 
 
 def start_server_on_separate_thread(dump_to_filesystem=True,
diff --git a/tensorflow/python/debug/lib/session_debug_grpc_test.py b/tensorflow/python/debug/lib/session_debug_grpc_test.py
index e1ddd4ee642f2a11cf4bb65b1d60b8f731b9c8f6..367b3535450ac4bd17d4c5dba0eaf149aa4b68b3 100644
--- a/tensorflow/python/debug/lib/session_debug_grpc_test.py
+++ b/tensorflow/python/debug/lib/session_debug_grpc_test.py
@@ -248,10 +248,79 @@ class SessionDebugGrpcTest(session_debug_testlib.SessionDebugTestBase):
     self.assertEqual(
         14, len(dump.get_tensors("v/read", 0, "DebugNumericSummary")[0]))
 
-  def testConstructGrpcDebugHookWithGrpcInUrlRaisesValueError(self):
-    """Tests that the hook raises an error if the URL starts with grpc://."""
+  def testTensorBoardDebugHookWorks(self):
+    u = variables.Variable(2.1, name="u")
+    v = variables.Variable(20.0, name="v")
+    w = math_ops.multiply(u, v, name="w")
+
+    sess = session.Session(config=no_rewrite_session_config())
+    sess.run(u.initializer)
+    sess.run(v.initializer)
+
+    grpc_debug_hook = hooks.TensorBoardDebugHook(
+        ["localhost:%d" % self._server_port])
+    sess = monitored_session._HookedSession(sess, [grpc_debug_hook])
+
+    # Activate watch point on a tensor before calling sess.run().
+    self._server.request_watch("u/read", 0, "DebugIdentity")
+    self.assertAllClose(42.0, sess.run(w))
+
+    # self.assertAllClose(42.0, sess.run(w))
+    dump = debug_data.DebugDumpDir(self._dump_root)
+    self.assertAllClose([2.1], dump.get_tensors("u/read", 0, "DebugIdentity"))
+
+    # Check that the server has received the stack trace.
+    self.assertTrue(self._server.query_op_traceback("u"))
+    self.assertTrue(self._server.query_op_traceback("u/read"))
+    self.assertTrue(self._server.query_op_traceback("v"))
+    self.assertTrue(self._server.query_op_traceback("v/read"))
+    self.assertTrue(self._server.query_op_traceback("w"))
+
+    # Check that the server has received the python file content.
+    # Query an arbitrary line to make sure that is the case.
+    with open(__file__, "rt") as this_source_file:
+      first_line = this_source_file.readline().strip()
+      self.assertEqual(
+          first_line, self._server.query_source_file_line(__file__, 1))
+
+    self._server.clear_data()
+    # Call sess.run() again, and verify that this time the traceback and source
+    # code is not sent, because the graph version is not newer.
+    self.assertAllClose(42.0, sess.run(w))
+    with self.assertRaises(ValueError):
+      self._server.query_op_traceback("delta_1")
     with self.assertRaises(ValueError):
-      hooks.GrpcDebugHook(["grpc://foo:42"])
+      self._server.query_source_file_line(__file__, 1)
+
+  def testTensorBoardDebugHookDisablingTracebackSourceCodeSendingWorks(self):
+    u = variables.Variable(2.1, name="u")
+    v = variables.Variable(20.0, name="v")
+    w = math_ops.multiply(u, v, name="w")
+
+    sess = session.Session(config=no_rewrite_session_config())
+    sess.run(variables.global_variables_initializer())
+
+    grpc_debug_hook = hooks.TensorBoardDebugHook(
+        ["localhost:%d" % self._server_port],
+        send_traceback_and_source_code=False)
+    sess = monitored_session._HookedSession(sess, [grpc_debug_hook])
+
+    # Activate watch point on a tensor before calling sess.run().
+    self._server.request_watch("u/read", 0, "DebugIdentity")
+    self.assertAllClose(42.0, sess.run(w))
+
+    # Check that the server has _not_ received any tracebacks, as a result of
+    # the disabling above.
+    with self.assertRaisesRegexp(
+        ValueError, r"Op .*u/read.* does not exist"):
+      self.assertTrue(self._server.query_op_traceback("u/read"))
+    with self.assertRaisesRegexp(
+        ValueError, r".* has not received any source file"):
+      self._server.query_source_file_line(__file__, 1)
+
+  def testConstructGrpcDebugHookWithOrWithouGrpcInUrlWorks(self):
+    hooks.GrpcDebugHook(["grpc://foo:42424"])
+    hooks.GrpcDebugHook(["foo:42424"])
 
 
 class LargeGraphAndLargeTensorsDebugTest(test_util.TensorFlowTestCase):
@@ -684,6 +753,112 @@ class SessionDebugGrpcGatingTest(test_util.TensorFlowTestCase):
           # to disable the breakpoint at delta:0:DebugIdentity.
           self.assertSetEqual(set(), self._server_1.breakpoints)
 
+  def testTensorBoardDebuggerWrapperToggleBreakpointsWorks(self):
+    with session.Session(config=no_rewrite_session_config()) as sess:
+      v_1 = variables.Variable(50.0, name="v_1")
+      v_2 = variables.Variable(-50.0, name="v_2")
+      delta_1 = constant_op.constant(5.0, name="delta_1")
+      delta_2 = constant_op.constant(-5.0, name="delta_2")
+      inc_v_1 = state_ops.assign_add(v_1, delta_1, name="inc_v_1")
+      inc_v_2 = state_ops.assign_add(v_2, delta_2, name="inc_v_2")
+
+      sess.run([v_1.initializer, v_2.initializer])
+
+      # The TensorBoardDebugWrapperSession should add a DebugIdentity debug op
+      # with attribute gated_grpc=True for every tensor in the graph.
+      sess = grpc_wrapper.TensorBoardDebugWrapperSession(
+          sess, self._debug_server_url_1)
+
+      for i in xrange(4):
+        self._server_1.clear_data()
+
+        if i in (0, 2):
+          # Enable breakpoint at delta_[1,2]:0:DebugIdentity in runs 0 and 2.
+          self._server_1.request_watch(
+              "delta_1", 0, "DebugIdentity", breakpoint=True)
+          self._server_1.request_watch(
+              "delta_2", 0, "DebugIdentity", breakpoint=True)
+        else:
+          # Disable the breakpoint in runs 1 and 3.
+          self._server_1.request_unwatch("delta_1", 0, "DebugIdentity")
+          self._server_1.request_unwatch("delta_2", 0, "DebugIdentity")
+
+        output = sess.run([inc_v_1, inc_v_2])
+        self.assertAllClose([50.0 + 5.0 * (i + 1), -50 - 5.0 * (i + 1)], output)
+
+        if i in (0, 2):
+          # During runs 0 and 2, the server should have received the published
+          # debug tensor delta:0:DebugIdentity. The breakpoint should have been
+          # unblocked by EventReply reponses from the server.
+          self.assertAllClose(
+              [5.0],
+              self._server_1.debug_tensor_values["delta_1:0:DebugIdentity"])
+          self.assertAllClose(
+              [-5.0],
+              self._server_1.debug_tensor_values["delta_2:0:DebugIdentity"])
+          # After the runs, the server should have properly registered the
+          # breakpoints.
+        else:
+          # After the end of runs 1 and 3, the server has received the requests
+          # to disable the breakpoint at delta:0:DebugIdentity.
+          self.assertSetEqual(set(), self._server_1.breakpoints)
+
+        if i == 0:
+          # Check that the server has received the stack trace.
+          self.assertTrue(self._server_1.query_op_traceback("delta_1"))
+          self.assertTrue(self._server_1.query_op_traceback("delta_2"))
+          self.assertTrue(self._server_1.query_op_traceback("inc_v_1"))
+          self.assertTrue(self._server_1.query_op_traceback("inc_v_2"))
+          # Check that the server has received the python file content.
+          # Query an arbitrary line to make sure that is the case.
+          with open(__file__, "rt") as this_source_file:
+            first_line = this_source_file.readline().strip()
+          self.assertEqual(
+              first_line, self._server_1.query_source_file_line(__file__, 1))
+        else:
+          # In later Session.run() calls, the traceback shouldn't have been sent
+          # because it is already sent in the 1st call. So calling
+          # query_op_traceback() should lead to an exception, because the test
+          # debug server clears the data at the beginning of every iteration.
+          with self.assertRaises(ValueError):
+            self._server_1.query_op_traceback("delta_1")
+          with self.assertRaises(ValueError):
+            self._server_1.query_source_file_line(__file__, 1)
+
+  def testTensorBoardDebuggerWrapperDisablingTracebackSourceSendingWorks(self):
+    with session.Session(config=no_rewrite_session_config()) as sess:
+      v_1 = variables.Variable(50.0, name="v_1")
+      v_2 = variables.Variable(-50.0, name="v_2")
+      delta_1 = constant_op.constant(5.0, name="delta_1")
+      delta_2 = constant_op.constant(-5.0, name="delta_2")
+      inc_v_1 = state_ops.assign_add(v_1, delta_1, name="inc_v_1")
+      inc_v_2 = state_ops.assign_add(v_2, delta_2, name="inc_v_2")
+
+      sess.run(variables.global_variables_initializer())
+
+      # Disable the sending of traceback and source code.
+      sess = grpc_wrapper.TensorBoardDebugWrapperSession(
+          sess, self._debug_server_url_1, send_traceback_and_source_code=False)
+
+      for i in xrange(4):
+        self._server_1.clear_data()
+
+        if i == 0:
+          self._server_1.request_watch(
+              "delta_1", 0, "DebugIdentity", breakpoint=True)
+
+        output = sess.run([inc_v_1, inc_v_2])
+        self.assertAllClose([50.0 + 5.0 * (i + 1), -50 - 5.0 * (i + 1)], output)
+
+        # No op traceback or source code should have been received by the debug
+        # server due to the disabling above.
+        with self.assertRaisesRegexp(
+            ValueError, r"Op .*delta_1.* does not exist"):
+          self.assertTrue(self._server_1.query_op_traceback("delta_1"))
+        with self.assertRaisesRegexp(
+            ValueError, r".* has not received any source file"):
+          self._server_1.query_source_file_line(__file__, 1)
+
   def testGetGrpcDebugWatchesReturnsCorrectAnswer(self):
     with session.Session() as sess:
       v = variables.Variable(50.0, name="v")
diff --git a/tensorflow/python/debug/lib/source_remote.py b/tensorflow/python/debug/lib/source_remote.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b6b2b995ecd13cffddaa38bd2ec673e6b824574
--- /dev/null
+++ b/tensorflow/python/debug/lib/source_remote.py
@@ -0,0 +1,209 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Communicating tracebacks and source code with debug server."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import socket
+
+import grpc
+
+from tensorflow.core.debug import debug_service_pb2
+from tensorflow.core.protobuf import debug_pb2
+from tensorflow.python.debug.lib import common
+from tensorflow.python.debug.lib import debug_service_pb2_grpc
+from tensorflow.python.debug.lib import source_utils
+from tensorflow.python.platform import gfile
+from tensorflow.python.profiler import tfprof_logger
+
+
+def _load_debugged_source_file(file_path, source_file_proto):
+  file_stat = gfile.Stat(file_path)
+  source_file_proto.host = socket.gethostname()
+  source_file_proto.file_path = file_path
+  source_file_proto.last_modified = file_stat.mtime_nsec
+  source_file_proto.bytes = file_stat.length
+  try:
+    with gfile.Open(file_path, "r") as f:
+      source_file_proto.lines.extend(f.read().splitlines())
+  except IOError:
+    pass
+
+
+def _string_to_id(string, string_to_id):
+  if string not in string_to_id:
+    string_to_id[string] = len(string_to_id)
+  return string_to_id[string]
+
+
+def _format_origin_stack(origin_stack, call_traceback_proto):
+  """Format a traceback stack for a `CallTraceback` proto.
+
+  Args:
+    origin_stack: The stack list as returned by `traceback.extract_stack()`.
+    call_traceback_proto: A `CallTraceback` proto whose fields are to be
+      populated.
+  """
+  string_to_id = dict()
+  string_to_id[None] = 0
+  for frame in origin_stack:
+    file_path, lineno, func_name, line_text = frame
+    call_traceback_proto.origin_stack.traces.add(
+        file_id=_string_to_id(file_path, string_to_id),
+        lineno=lineno,
+        function_id=_string_to_id(func_name, string_to_id),
+        line_id=_string_to_id(line_text, string_to_id))
+
+  id_to_string = call_traceback_proto.origin_id_to_string
+  for key, value in string_to_id.items():
+    id_to_string[value] = key if key is not None else ""
+
+
+def _source_file_paths_outside_tensorflow_py_library(code_defs, id_to_string):
+  """Extract source file paths outside TensorFlow Python library.
+
+  Args:
+    code_defs: An iterable of `CodeDef` protos, i.e., an iterable of stack
+      traces.
+    id_to_string: A proto map from integer ids to strings.
+
+  Returns:
+    An iterable of source file paths outside the TensorFlow Python library.
+  """
+  file_ids = set()
+  for code_def in code_defs:
+    for trace in code_def.traces:
+      file_ids.add(trace.file_id)
+  non_tf_files = (id_to_string[file_id] for file_id in file_ids)
+  non_tf_files = (
+      f for f in non_tf_files
+      if not source_utils.guess_is_tensorflow_py_library(f) and gfile.Exists(f))
+  return non_tf_files
+
+
+def _send_call_tracebacks(destinations,
+                          origin_stack,
+                          is_eager_execution=False,
+                          call_key=None,
+                          graph=None,
+                          send_source=True):
+  """Send the tracebacks of a TensorFlow execution call.
+
+  To gRPC debug server(s). This applies to graph execution (`tf.Session.run()`)
+  calls and eager execution calls.
+
+  If `send_source`, also sends the underlying source files outside the
+  TensorFlow library.
+
+  Args:
+    destinations: gRPC destination addresses, a `str` or a `list` of `str`s,
+      e.g., "localhost:4242". If a `list`, gRPC requests containing the same
+      `CallTraceback` proto payload will be sent to all the destinations.
+    origin_stack: The traceback stack for the origin of the execution call. For
+      graph execution, this is the traceback of the `tf.Session.run()`
+      invocation. For eager execution, this is the traceback of the Python
+      line that executes the eager opertion.
+    is_eager_execution: (`bool`) whether an eager execution call (i.e., not a
+      `tf.Session.run` or derived methods) is being sent.
+    call_key: The key of the execution call, as a string. For graph execution,
+      this is a string describing the feeds, fetches (and targets) names of the
+      `tf.Session.run` call. For eager execution, this is ignored.
+    graph: A Python `tf.Graph` object (i.e., *not* a `tf.GraphDef`), which
+      contains op tracebacks, if applicable.
+    send_source: Whether the source files involved in the op tracebacks but
+      outside the TensorFlow library are to be sent.
+  """
+  if not isinstance(destinations, list):
+    destinations = [destinations]
+  # Strip grpc:// prefix, if any is present.
+  destinations = [
+      dest[len(common.GRPC_URL_PREFIX):]
+      if dest.startswith(common.GRPC_URL_PREFIX) else dest
+      for dest in destinations]
+
+  call_type = (debug_service_pb2.CallTraceback.EAGER_EXECUTION
+               if is_eager_execution
+               else debug_service_pb2.CallTraceback.GRAPH_EXECUTION)
+  graph_traceback = tfprof_logger.merge_default_with_oplog(
+      graph, add_trainable_var=False) if graph else None
+  call_traceback = debug_service_pb2.CallTraceback(
+      call_type=call_type, call_key=call_key, graph_traceback=graph_traceback,
+      graph_version=graph.version if graph else None)
+
+  _format_origin_stack(origin_stack, call_traceback)
+
+  if send_source:
+    source_file_paths = set()
+    source_file_paths.update(_source_file_paths_outside_tensorflow_py_library(
+        (log_entry.code_def for log_entry
+         in call_traceback.graph_traceback.log_entries),
+        call_traceback.graph_traceback.id_to_string))
+    source_file_paths.update(_source_file_paths_outside_tensorflow_py_library(
+        [call_traceback.origin_stack], call_traceback.origin_id_to_string))
+
+    debugged_source_files = debug_pb2.DebuggedSourceFiles()
+    for file_path in source_file_paths:
+      _load_debugged_source_file(
+          file_path, debugged_source_files.source_files.add())
+
+  for destination in destinations:
+    channel = grpc.insecure_channel(destination)
+    stub = debug_service_pb2_grpc.EventListenerStub(channel)
+    stub.SendTracebacks(call_traceback)
+    if send_source:
+      stub.SendSourceFiles(debugged_source_files)
+
+
+def send_graph_tracebacks(destinations,
+                          run_key,
+                          origin_stack,
+                          graph,
+                          send_source=True):
+  """Send the tracebacks of a graph execution call to debug server(s).
+
+  Args:
+    destinations: gRPC destination addresses, a `str` or a `list` of `str`s,
+      e.g., "localhost:4242". If a `list`, gRPC requests containing the same
+      `CallTraceback` proto payload will be sent to all the destinations.
+    run_key: A string describing the feeds, fetches (and targets) names of the
+      `tf.Session.run` call.
+    origin_stack: The traceback of the `tf.Session.run()` invocation.
+    graph: A Python `tf.Graph` object (i.e., *not* a `tf.GraphDef`), which
+      contains op tracebacks.
+    send_source: Whether the source files involved in the op tracebacks but
+      outside the TensorFlow library are to be sent.
+  """
+  _send_call_tracebacks(
+      destinations, origin_stack, is_eager_execution=False, call_key=run_key,
+      graph=graph, send_source=send_source)
+
+
+def send_eager_tracebacks(destinations,
+                          origin_stack,
+                          send_source=True):
+  """Send the tracebacks of an eager execution call to debug server(s).
+
+  Args:
+    destinations: gRPC destination addresses, a `str` or a `list` of `str`s,
+      e.g., "localhost:4242". If a `list`, gRPC requests containing the same
+    origin_stack: The traceback of the eager operation invocation.
+    send_source: Whether the source files involved in the op tracebacks but
+      outside the TensorFlow library are to be sent.
+  """
+  _send_call_tracebacks(
+      destinations, origin_stack, is_eager_execution=True,
+      send_source=send_source)
diff --git a/tensorflow/python/debug/lib/source_remote_test.py b/tensorflow/python/debug/lib/source_remote_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..27bafa45e1207513e46fd2ae0f92d5bfa686ffd5
--- /dev/null
+++ b/tensorflow/python/debug/lib/source_remote_test.py
@@ -0,0 +1,171 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Unit tests for source_remote."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import traceback
+
+from tensorflow.core.debug import debug_service_pb2
+from tensorflow.python.client import session
+from tensorflow.python.debug.lib import grpc_debug_test_server
+from tensorflow.python.debug.lib import source_remote
+from tensorflow.python.debug.lib import source_utils
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import math_ops
+# Import resource_variable_ops for the variables-to-tensor implicit conversion.
+from tensorflow.python.ops import resource_variable_ops  # pylint: disable=unused-import
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import googletest
+from tensorflow.python.util import tf_inspect
+
+
+def line_number_above():
+  return tf_inspect.stack()[1][2] - 1
+
+
+class SendTracebacksTest(test_util.TensorFlowTestCase):
+
+  @classmethod
+  def setUpClass(cls):
+    test_util.TensorFlowTestCase.setUpClass()
+    (cls._server_port, cls._debug_server_url, cls._server_dump_dir,
+     cls._server_thread,
+     cls._server) = grpc_debug_test_server.start_server_on_separate_thread()
+    cls._server_address = "localhost:%d" % cls._server_port
+    (cls._server_port_2, cls._debug_server_url_2, cls._server_dump_dir_2,
+     cls._server_thread_2,
+     cls._server_2) = grpc_debug_test_server.start_server_on_separate_thread()
+    cls._server_address_2 = "localhost:%d" % cls._server_port_2
+    cls._curr_file_path = os.path.normpath(os.path.abspath(__file__))
+
+  @classmethod
+  def tearDownClass(cls):
+    # Stop the test server and join the thread.
+    cls._server.stop_server().wait()
+    cls._server_thread.join()
+    cls._server_2.stop_server().wait()
+    cls._server_thread_2.join()
+    test_util.TensorFlowTestCase.tearDownClass()
+
+  def tearDown(self):
+    ops.reset_default_graph()
+    self._server.clear_data()
+    self._server_2.clear_data()
+    super(SendTracebacksTest, self).tearDown()
+
+  def _findFirstTraceInsideTensorFlowPyLibrary(self, op):
+    """Find the first trace of an op that belongs to the TF Python library."""
+    for trace in op.traceback:
+      if source_utils.guess_is_tensorflow_py_library(trace[0]):
+        return trace
+
+  def testSendGraphTracebacksToSingleDebugServer(self):
+    this_func_name = "testSendGraphTracebacksToSingleDebugServer"
+    with session.Session() as sess:
+      a = variables.Variable(21.0, name="a")
+      a_lineno = line_number_above()
+      b = variables.Variable(2.0, name="b")
+      b_lineno = line_number_above()
+      math_ops.add(a, b, name="x")
+      x_lineno = line_number_above()
+
+      send_stack = traceback.extract_stack()
+      send_lineno = line_number_above()
+      source_remote.send_graph_tracebacks(
+          self._server_address, "dummy_run_key", send_stack, sess.graph)
+
+      tb = self._server.query_op_traceback("a")
+      self.assertIn((self._curr_file_path, a_lineno, this_func_name), tb)
+      tb = self._server.query_op_traceback("b")
+      self.assertIn((self._curr_file_path, b_lineno, this_func_name), tb)
+      tb = self._server.query_op_traceback("x")
+      self.assertIn((self._curr_file_path, x_lineno, this_func_name), tb)
+
+      self.assertIn(
+          (self._curr_file_path, send_lineno, this_func_name),
+          self._server.query_origin_stack()[-1])
+
+      self.assertEqual(
+          "      a = variables.Variable(21.0, name=\"a\")",
+          self._server.query_source_file_line(__file__, a_lineno))
+      # Files in the TensorFlow code base shouldn not have been sent.
+      tf_trace_file_path = self._findFirstTraceInsideTensorFlowPyLibrary(a.op)
+      with self.assertRaises(ValueError):
+        self._server.query_source_file_line(tf_trace_file_path, 0)
+      self.assertEqual([debug_service_pb2.CallTraceback.GRAPH_EXECUTION],
+                       self._server.query_call_types())
+      self.assertEqual(["dummy_run_key"], self._server.query_call_keys())
+      self.assertEqual(
+          [sess.graph.version], self._server.query_graph_versions())
+
+  def testSendGraphTracebacksToTwoDebugServers(self):
+    this_func_name = "testSendGraphTracebacksToTwoDebugServers"
+    with session.Session() as sess:
+      a = variables.Variable(21.0, name="two/a")
+      a_lineno = line_number_above()
+      b = variables.Variable(2.0, name="two/b")
+      b_lineno = line_number_above()
+      x = math_ops.add(a, b, name="two/x")
+      x_lineno = line_number_above()
+
+      send_traceback = traceback.extract_stack()
+      send_lineno = line_number_above()
+      source_remote.send_graph_tracebacks(
+          [self._server_address, self._server_address_2],
+          "dummy_run_key", send_traceback, sess.graph)
+
+      servers = [self._server, self._server_2]
+      for server in servers:
+        tb = server.query_op_traceback("two/a")
+        self.assertIn((self._curr_file_path, a_lineno, this_func_name), tb)
+        tb = server.query_op_traceback("two/b")
+        self.assertIn((self._curr_file_path, b_lineno, this_func_name), tb)
+        tb = server.query_op_traceback("two/x")
+        self.assertIn((self._curr_file_path, x_lineno, this_func_name), tb)
+
+        self.assertIn(
+            (self._curr_file_path, send_lineno, this_func_name),
+            server.query_origin_stack()[-1])
+
+        self.assertEqual(
+            "      x = math_ops.add(a, b, name=\"two/x\")",
+            server.query_source_file_line(__file__, x_lineno))
+        tf_trace_file_path = self._findFirstTraceInsideTensorFlowPyLibrary(x.op)
+        with self.assertRaises(ValueError):
+          server.query_source_file_line(tf_trace_file_path, 0)
+        self.assertEqual([debug_service_pb2.CallTraceback.GRAPH_EXECUTION],
+                         server.query_call_types())
+        self.assertEqual(["dummy_run_key"], server.query_call_keys())
+        self.assertEqual([sess.graph.version], server.query_graph_versions())
+
+  def testSendEagerTracebacksToSingleDebugServer(self):
+    this_func_name = "testSendEagerTracebacksToSingleDebugServer"
+    send_traceback = traceback.extract_stack()
+    send_lineno = line_number_above()
+    source_remote.send_eager_tracebacks(self._server_address, send_traceback)
+
+    self.assertEqual([debug_service_pb2.CallTraceback.EAGER_EXECUTION],
+                     self._server.query_call_types())
+    self.assertIn((self._curr_file_path, send_lineno, this_func_name),
+                  self._server.query_origin_stack()[-1])
+
+
+if __name__ == "__main__":
+  googletest.main()
diff --git a/tensorflow/python/debug/lib/stepper.py b/tensorflow/python/debug/lib/stepper.py
index 1fa0b3dba2b547bf1d311e42e1005a8e501f9829..c27b3f51cddb51654b1ff5a35fd7d689fc4109c4 100644
--- a/tensorflow/python/debug/lib/stepper.py
+++ b/tensorflow/python/debug/lib/stepper.py
@@ -80,7 +80,7 @@ class NodeStepper(object):
   when they are required as data dependencies.
 
   The temporary directories are automatically clean when the NodeStepper
-  instance exits as a context mananger.
+  instance exits as a context manager.
 
   Once the tracing is complete, it will issue a run() call on the
   underlying session, using the aforementioned feed_dict prepared by the input
diff --git a/tensorflow/python/debug/wrappers/dumping_wrapper.py b/tensorflow/python/debug/wrappers/dumping_wrapper.py
index 962318e54a479069d58e06c09c141b097fd15782..3fac2e59717a828424a808b770812afc7772bfe2 100644
--- a/tensorflow/python/debug/wrappers/dumping_wrapper.py
+++ b/tensorflow/python/debug/wrappers/dumping_wrapper.py
@@ -73,6 +73,7 @@ class DumpingDebugWrapperSession(framework.NonInteractiveDebugWrapperSession):
         self, sess, watch_fn=watch_fn, thread_name_filter=thread_name_filter,
         pass_through_operrors=pass_through_operrors)
 
+    session_root = os.path.expanduser(session_root)
     if gfile.Exists(session_root):
       if not gfile.IsDirectory(session_root):
         raise ValueError(
diff --git a/tensorflow/python/debug/wrappers/framework.py b/tensorflow/python/debug/wrappers/framework.py
index 4e243cb6c9649a24009a0c9ac501c59eaac3bd79..909150eb6aa21b45af39f7cbfd6248c701ae1fb5 100644
--- a/tensorflow/python/debug/wrappers/framework.py
+++ b/tensorflow/python/debug/wrappers/framework.py
@@ -706,7 +706,8 @@ class BaseDebugWrapperSession(session.SessionInterface):
         exec_type, exec_value, exec_tb)
 
   def __del__(self):
-    self._sess.__del__()
+    if hasattr(self._sess, "__del__"):
+      self._sess.__del__()
 
   def close(self):
     self._sess.close()
diff --git a/tensorflow/python/debug/wrappers/grpc_wrapper.py b/tensorflow/python/debug/wrappers/grpc_wrapper.py
index 4062016607c8a56eb275fe4712a47c84bc7ed01c..74d7c2b9e242f947a33c0bdb6508847808d69c0b 100644
--- a/tensorflow/python/debug/wrappers/grpc_wrapper.py
+++ b/tensorflow/python/debug/wrappers/grpc_wrapper.py
@@ -17,15 +17,54 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import traceback
+
 # Google-internal import(s).
+from tensorflow.python.debug.lib import common
 from tensorflow.python.debug.wrappers import framework
 
 
+def publish_traceback(debug_server_urls,
+                      graph,
+                      feed_dict,
+                      fetches,
+                      old_graph_version):
+  """Publish traceback and source code if graph version is new.
+
+  `graph.version` is compared with `old_graph_version`. If the former is higher
+  (i.e., newer), the graph traceback and the associated source code is sent to
+  the debug server at the specified gRPC URLs.
+
+  Args:
+    debug_server_urls: A single gRPC debug server URL as a `str` or a `list` of
+      debug server URLs.
+    graph: A Python `tf.Graph` object.
+    feed_dict: Feed dictionary given to the `Session.run()` call.
+    fetches: Fetches from the `Session.run()` call.
+    old_graph_version: Old graph version to compare to.
+
+  Returns:
+    If `graph.version > old_graph_version`, the new graph version as an `int`.
+    Else, the `old_graph_version` is returned.
+  """
+  # TODO(cais): Consider moving this back to the top, after grpc becomes a
+  # pip dependency of tensorflow or tf_debug.
+  # pylint:disable=g-import-not-at-top
+  from tensorflow.python.debug.lib import source_remote
+  # pylint:enable=g-import-not-at-top
+  if graph.version > old_graph_version:
+    run_key = common.get_run_key(feed_dict, fetches)
+    source_remote.send_graph_tracebacks(
+        debug_server_urls, run_key, traceback.extract_stack(), graph,
+        send_source=True)
+    return graph.version
+  else:
+    return old_graph_version
+
+
 class GrpcDebugWrapperSession(framework.NonInteractiveDebugWrapperSession):
   """Debug Session wrapper that send debug data to gRPC stream(s)."""
 
-  _GRPC_URL_PREFIX = "grpc://"
-
   def __init__(self,
                sess,
                grpc_debug_server_addresses,
@@ -38,7 +77,7 @@ class GrpcDebugWrapperSession(framework.NonInteractiveDebugWrapperSession):
       sess: The TensorFlow `Session` object being wrapped.
       grpc_debug_server_addresses: (`str` or `list` of `str`) Single or a list
         of the gRPC debug server addresses, in the format of
-        <host:port>, without the "grpc://" prefix. For example:
+        <host:port>, with or without the "grpc://" prefix. For example:
           "localhost:7000",
           ["localhost:7000", "192.168.0.2:8000"]
       watch_fn: (`Callable`) A Callable that can be used to define per-run
@@ -62,8 +101,7 @@ class GrpcDebugWrapperSession(framework.NonInteractiveDebugWrapperSession):
 
     if isinstance(grpc_debug_server_addresses, str):
       self._grpc_debug_server_urls = [
-          self._GRPC_URL_PREFIX + grpc_debug_server_addresses
-      ]
+          self._normalize_grpc_url(grpc_debug_server_addresses)]
     elif isinstance(grpc_debug_server_addresses, list):
       self._grpc_debug_server_urls = []
       for address in grpc_debug_server_addresses:
@@ -71,7 +109,7 @@ class GrpcDebugWrapperSession(framework.NonInteractiveDebugWrapperSession):
           raise TypeError(
               "Expected type str in list grpc_debug_server_addresses, "
               "received type %s" % type(address))
-        self._grpc_debug_server_urls.append(self._GRPC_URL_PREFIX + address)
+        self._grpc_debug_server_urls.append(self._normalize_grpc_url(address))
     else:
       raise TypeError(
           "Expected type str or list in grpc_debug_server_addresses, "
@@ -93,3 +131,75 @@ class GrpcDebugWrapperSession(framework.NonInteractiveDebugWrapperSession):
     """
 
     return self._grpc_debug_server_urls
+
+  def _normalize_grpc_url(self, address):
+    return (common.GRPC_URL_PREFIX + address
+            if not address.startswith(common.GRPC_URL_PREFIX) else address)
+
+
+class TensorBoardDebugWrapperSession(GrpcDebugWrapperSession):
+  """A tfdbg Session wrapper that can be used with TensorBoard Debugger Plugin.
+
+  This wrapper is the same as `GrpcDebugWrapperSession`, except that it uses a
+    predefined `watch_fn` that
+    1) uses `DebugIdentity` debug ops with the `gated_grpc` attribute set to
+        `True` to allow the interactive enabling and disabling of tensor
+       breakpoints.
+    2) watches all tensors in the graph.
+  This saves the need for the user to define a `watch_fn`.
+  """
+
+  def __init__(self,
+               sess,
+               grpc_debug_server_addresses,
+               thread_name_filter=None,
+               send_traceback_and_source_code=True,
+               log_usage=True):
+    """Constructor of TensorBoardDebugWrapperSession.
+
+    Args:
+      sess: The `tf.Session` instance to be wrapped.
+      grpc_debug_server_addresses: gRPC address(es) of debug server(s), as a
+        `str` or a `list` of `str`s. E.g., "localhost:2333",
+        "grpc://localhost:2333", ["192.168.0.7:2333", "192.168.0.8:2333"].
+      thread_name_filter: Optional filter for thread names.
+      send_traceback_and_source_code: Whether traceback of graph elements and
+        the source code are to be sent to the debug server(s).
+      log_usage: Whether the usage of this class is to be logged (if
+        applicable).
+    """
+    def _gated_grpc_watch_fn(fetches, feeds):
+      del fetches, feeds  # Unused.
+      return framework.WatchOptions(
+          debug_ops=["DebugIdentity(gated_grpc=true)"])
+
+    super(TensorBoardDebugWrapperSession, self).__init__(
+        sess,
+        grpc_debug_server_addresses,
+        watch_fn=_gated_grpc_watch_fn,
+        thread_name_filter=thread_name_filter,
+        log_usage=log_usage)
+
+    self._send_traceback_and_source_code = send_traceback_and_source_code
+    # Keeps track of the latest version of Python graph object that has been
+    # sent to the debug servers.
+    self._sent_graph_version = -1
+
+  def run(self,
+          fetches,
+          feed_dict=None,
+          options=None,
+          run_metadata=None,
+          callable_runner=None,
+          callable_runner_args=None):
+    if self._send_traceback_and_source_code:
+      self._sent_graph_version = publish_traceback(
+          self._grpc_debug_server_urls, self.graph, feed_dict, fetches,
+          self._sent_graph_version)
+    return super(TensorBoardDebugWrapperSession, self).run(
+        fetches,
+        feed_dict=feed_dict,
+        options=options,
+        run_metadata=run_metadata,
+        callable_runner=callable_runner,
+        callable_runner_args=callable_runner_args)
diff --git a/tensorflow/python/debug/wrappers/hooks.py b/tensorflow/python/debug/wrappers/hooks.py
index 4efa97973eb893a0105ca6abce6d306c1f6867d8..989ad801e53615f7bd26b8b4fb850b8a56cd193c 100644
--- a/tensorflow/python/debug/wrappers/hooks.py
+++ b/tensorflow/python/debug/wrappers/hooks.py
@@ -27,9 +27,6 @@ from tensorflow.python.debug.wrappers import grpc_wrapper
 from tensorflow.python.debug.wrappers import local_cli_wrapper
 from tensorflow.python.training import session_run_hook
 
-# The prefix for GRPC endpoint URLs.
-_GRPC_ENDPOINT_PREFIX = "grpc://"
-
 
 class LocalCLIDebugHook(session_run_hook.SessionRunHook):
   """Command-line-interface debugger hook.
@@ -249,8 +246,8 @@ class GrpcDebugHook(session_run_hook.SessionRunHook):
 
     Args:
       grpc_debug_server_addresses: (`list` of `str`) A list of the gRPC debug
-        server addresses, in the format of <host:port>, without the "grpc://"
-        prefix. For example: ["localhost:7000", "192.168.0.2:8000"]
+        server addresses, in the format of <host:port>, with or without the
+        "grpc://" prefix. For example: ["localhost:7000", "192.168.0.2:8000"]
       watch_fn: A function that allows for customizing which ops to watch at
         which specific steps. See doc of
         `dumping_wrapper.DumpingDebugWrapperSession.__init__` for details.
@@ -258,23 +255,14 @@ class GrpcDebugHook(session_run_hook.SessionRunHook):
         wrapper session will be active. See doc of `BaseDebugWrapperSession` for
         more details.
       log_usage: (bool) Whether usage is to be logged.
-
-    Raises:
-      ValueError: if any debugger server addresses start with grpc://.
     """
-
-    for address in grpc_debug_server_addresses:
-      if address.startswith(_GRPC_ENDPOINT_PREFIX):
-        raise ValueError(
-            ("Debug server address %r starts with %r. It should not because "
-             "the hook already automatically adds the prefix.") % (
-                 address, _GRPC_ENDPOINT_PREFIX))
-
-    # A wrapper session responsible for GRPC communication.
     self._grpc_debug_wrapper_session = None
     self._thread_name_filter = thread_name_filter
+    self._grpc_debug_server_addresses = (
+        grpc_debug_server_addresses
+        if isinstance(grpc_debug_server_addresses, list)
+        else [grpc_debug_server_addresses])
 
-    self._grpc_debug_server_addresses = grpc_debug_server_addresses
     self._watch_fn = watch_fn
     self._log_usage = log_usage
 
@@ -315,3 +303,56 @@ class GrpcDebugHook(session_run_hook.SessionRunHook):
 
     return session_run_hook.SessionRunArgs(
         None, feed_dict=None, options=run_options)
+
+
+class TensorBoardDebugHook(GrpcDebugHook):
+  """A tfdbg hook that can be used with TensorBoard Debugger Plugin.
+
+  This hook is the same as `GrpcDebugHook`, except that it uses a predefined
+    `watch_fn` that
+    1) uses `DebugIdentity` debug ops with the `gated_grpc` attribute set to
+        `True`, to allow the interactive enabling and disabling of tensor
+       breakpoints.
+    2) watches all tensors in the graph.
+  This saves the need for the user to define a `watch_fn`.
+  """
+
+  def __init__(self,
+               grpc_debug_server_addresses,
+               thread_name_filter=None,
+               send_traceback_and_source_code=True,
+               log_usage=True):
+    """Constructor of TensorBoardDebugHook.
+
+    Args:
+      grpc_debug_server_addresses: gRPC address(es) of debug server(s), as a
+        `str` or a `list` of `str`s. E.g., "localhost:2333",
+        "grpc://localhost:2333", ["192.168.0.7:2333", "192.168.0.8:2333"].
+      thread_name_filter: Optional filter for thread names.
+      send_traceback_and_source_code: Whether traceback of graph elements and
+        the source code are to be sent to the debug server(s).
+      log_usage: Whether the usage of this class is to be logged (if
+        applicable).
+    """
+    def _gated_grpc_watch_fn(fetches, feeds):
+      del fetches, feeds  # Unused.
+      return framework.WatchOptions(
+          debug_ops=["DebugIdentity(gated_grpc=true)"])
+
+    super(TensorBoardDebugHook, self).__init__(
+        grpc_debug_server_addresses,
+        watch_fn=_gated_grpc_watch_fn,
+        thread_name_filter=thread_name_filter,
+        log_usage=log_usage)
+
+    self._grpc_debug_server_addresses = grpc_debug_server_addresses
+    self._send_traceback_and_source_code = send_traceback_and_source_code
+    self._sent_graph_version = -1
+
+  def before_run(self, run_context):
+    if self._send_traceback_and_source_code:
+      self._sent_graph_version = grpc_wrapper.publish_traceback(
+          self._grpc_debug_server_addresses, run_context.session.graph,
+          run_context.original_args.feed_dict,
+          run_context.original_args.fetches, self._sent_graph_version)
+    return super(TensorBoardDebugHook, self).before_run(run_context)
diff --git a/tensorflow/python/debug/wrappers/local_cli_wrapper.py b/tensorflow/python/debug/wrappers/local_cli_wrapper.py
index 5bf6d9d1f4a4533a04495be9a1bf8364c3bb3db1..1465cb72950c8fa6a453ebd4290bbf6382173ff8 100644
--- a/tensorflow/python/debug/wrappers/local_cli_wrapper.py
+++ b/tensorflow/python/debug/wrappers/local_cli_wrapper.py
@@ -31,6 +31,7 @@ from tensorflow.python.debug.cli import debugger_cli_common
 from tensorflow.python.debug.cli import profile_analyzer_cli
 from tensorflow.python.debug.cli import stepper_cli
 from tensorflow.python.debug.cli import ui_factory
+from tensorflow.python.debug.lib import common
 from tensorflow.python.debug.lib import debug_data
 from tensorflow.python.debug.wrappers import framework
 
@@ -81,6 +82,7 @@ class LocalCLIDebugWrapperSession(framework.BaseDebugWrapperSession):
     if not dump_root:
       self._dump_root = tempfile.mktemp(prefix=_DUMP_ROOT_PREFIX)
     else:
+      dump_root = os.path.expanduser(dump_root)
       if os.path.isfile(dump_root):
         raise ValueError("dump_root path points to a file: %s" % dump_root)
       elif os.path.isdir(dump_root) and os.listdir(dump_root):
@@ -464,7 +466,7 @@ class LocalCLIDebugWrapperSession(framework.BaseDebugWrapperSession):
     feed_key = None
     feed_value = None
     for key in self._feed_dict:
-      key_name = cli_shared.get_graph_element_name(key)
+      key_name = common.get_graph_element_name(key)
       if key_name == tensor_name:
         feed_key = key_name
         feed_value = self._feed_dict[key]
@@ -561,7 +563,7 @@ class LocalCLIDebugWrapperSession(framework.BaseDebugWrapperSession):
                                            list(self._tensor_filters.keys()))
     if self._feed_dict:
       # Register tab completion for feed_dict keys.
-      feed_keys = [cli_shared.get_graph_element_name(key)
+      feed_keys = [common.get_graph_element_name(key)
                    for key in self._feed_dict.keys()]
       curses_cli.register_tab_comp_context(["print_feed", "pf"], feed_keys)
 
diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD
index b491a637bacccd181cab0960f08a5306b719bdd0..f470e181200f19d672cced3ea21d05aa2eee0bea 100644
--- a/tensorflow/python/eager/BUILD
+++ b/tensorflow/python/eager/BUILD
@@ -110,6 +110,7 @@ cuda_py_test(
         "//tensorflow/python:array_ops",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:nn_ops",
+        "//tensorflow/python:resource_variable_ops",
         "//tensorflow/python:random_ops",
         "//tensorflow/python:nn_grad",
         "//tensorflow/python:training",
@@ -144,6 +145,7 @@ cuda_py_test(
         ":test",
         "//tensorflow/python:clip_ops",
         "//tensorflow/python:math_ops",
+        "//tensorflow/python:resource_variable_ops",
     ],
 )
 
@@ -415,6 +417,7 @@ cuda_py_test(
         "//tensorflow/python:layers",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:random_ops",
+        "//tensorflow/python:resource_variable_ops",
         "//tensorflow/python:sparse_ops",
         "//tensorflow/python:tensor_shape",
     ],
diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py
index 0144f3b1e59d733e951bc1f7408c803facab4eac..a2a3e230bbb4232fe916c658a6b0ac8d6d33658d 100644
--- a/tensorflow/python/eager/backprop.py
+++ b/tensorflow/python/eager/backprop.py
@@ -344,7 +344,7 @@ def implicit_val_and_grad(f):
 
   def grad_fn(*args):
     """Computes the gradient of the wrapped function."""
-    tape.push_new_tape()
+    this_tape = tape.push_new_tape()
     try:
       end_node = f(*args)
       if end_node is None:
@@ -352,15 +352,18 @@ def implicit_val_and_grad(f):
                          "did you forget to return a value from {}?".format(
                              f.__name__))
     finally:
-      popped_tape = tape.pop_tape()
-      variables = popped_tape.watched_variables()
+      tape.pop_tape(this_tape)
+    # Sorting variables by id, which is monotonically increasing in construction
+    # order. This ensures unique order across executions.
+    variables = list(sorted(this_tape.watched_variables(),
+                            key=lambda v: v.handle._id))  # pylint: disable=protected-access
     sources = [x.handle for x in variables]
 
     if not sources:
       raise ValueError("No trainable variables were accessed while the "
                        "function was being computed.")
     grad = imperative_grad.imperative_grad(_default_vspace,
-                                           popped_tape,
+                                           this_tape,
                                            nest.flatten(end_node),
                                            sources)
     return end_node, list(zip(grad, variables))
@@ -540,14 +543,14 @@ def _ensure_unique_tensor_objects(parameter_positions, args):
     if i in parameter_positions:
       tid = ops.tensor_id(t)
       if tid in s:
-        args[i] = args[i]._dup()  # pylint: disable=protected-access
+        args[i] = gen_array_ops.identity(args[i])
       else:
         s.add(tid)
   return args
 
 
 def val_and_grad_function(f, params=None):
-  """Returns a function that computes f and is derivative w.r.t. params.
+  """Returns a function that computes f and its derivative w.r.t. params.
 
   Example:
   ```python
@@ -649,7 +652,7 @@ def make_vjp(f, params=None):
     """Computes the value and gradient of the decorated function."""
     parameter_positions = _get_arg_spec(f, params, args)
     assert not kwds, "The gradient function can't take keyword arguments."
-    tape.push_new_tape()
+    this_tape = tape.push_new_tape()
     try:
       sources = []
       args = [
@@ -670,12 +673,12 @@ def make_vjp(f, params=None):
       flat_result = [gen_array_ops.identity(x) for x in flat_result]
       result = nest.pack_sequence_as(result, flat_result)
     finally:
-      t = tape.pop_tape()
+      tape.pop_tape(this_tape)
     def vjp(dy=None):
       if dy is not None:
         dy = [ops.convert_to_tensor(x) for x in nest.flatten(dy)]
       return imperative_grad.imperative_grad(
-          _default_vspace, t, nest.flatten(result), sources,
+          _default_vspace, this_tape, nest.flatten(result), sources,
           output_gradients=dy)
     return result, vjp
 
@@ -707,7 +710,7 @@ def _aggregate_grads(gradients):
       if isinstance(grad, ops.Tensor):
         indexed_slices = ops.IndexedSlices(
             grad,
-            constant_op.constant(range(grad.shape[0])),
+            math_ops.range(grad.shape[0]),
             constant_op.constant(grad.shape.as_list()))
         indexed_slices_list.append(indexed_slices)
       else:
@@ -741,6 +744,10 @@ def _fast_fill(value, shape, dtype):
 
 def _zeros(shape, dtype):
   """Wraps array_ops.zeros to cache last zero for a given shape and dtype."""
+  if dtype == dtypes.variant:
+    # TODO(apassos): need to save enough information about variant tensors to do
+    # a zeros
+    return None
   if [shape, dtype] != _last_shape_dtype:
     _last_shape_dtype[:] = [shape, dtype]
     _last_zero[0] = _fast_fill(0, shape, dtype)
@@ -832,11 +839,11 @@ class GradientTape(object):
     self._persistent = persistent
 
   def __enter__(self):
-    tape.push_new_tape(persistent=self._persistent)
+    self._tape = tape.push_new_tape(persistent=self._persistent)
     return self
 
   def __exit__(self, typ, value, traceback):
-    self._tape = tape.pop_tape()
+    tape.pop_tape(self._tape)
 
   def watch(self, tensor):
     """Ensures that `tensor` is being traced by this tape.
diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py
index 9816dd022eb5b524888a8058ef550a107ef3a00d..a12113893ab3eac671e8138472bc95e9d8b89499 100644
--- a/tensorflow/python/eager/backprop_test.py
+++ b/tensorflow/python/eager/backprop_test.py
@@ -30,6 +30,7 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import embedding_ops
 from tensorflow.python.ops import gradients
@@ -44,6 +45,7 @@ from tensorflow.python.training import training
 
 class BackpropTest(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
   def testAggregateGradients(self):
 
     def fn(x):
@@ -60,7 +62,7 @@ class BackpropTest(test.TestCase):
     var_np = np.random.rand(4, 2).astype(np.float32)
     var = constant_op.constant(var_np)
     grad = backprop.gradients_function(fn, [0])(var)[0]
-    grad = ops.convert_to_tensor(grad).numpy()
+    grad = self.evaluate(ops.convert_to_tensor(grad))
 
     with context.graph_mode(), self.test_session():
       tf_var = array_ops.constant(var_np, dtypes.float32)
@@ -151,6 +153,21 @@ class BackpropTest(test.TestCase):
     opt.apply_gradients([(grad, embedding)])
     self.assertAllClose(expected, embedding.read_value())
 
+  def testImplicitGradOrdering(self):
+    v0 = resource_variable_ops.ResourceVariable(1.0)
+    v1 = resource_variable_ops.ResourceVariable(2.0)
+
+    def f():
+      x = v1 * v1
+      y = v0 * v0
+      return x + y
+
+    grads = backprop.implicit_grad(f)()
+    ordered_variables = [x[1] for x in grads]
+    self.assertTrue(ordered_variables[0] is v0)
+    self.assertTrue(ordered_variables[1] is v1)
+
+  @test_util.assert_no_new_tensors
   def testGradientNone(self):
 
     def loss(x, l):
@@ -165,6 +182,7 @@ class BackpropTest(test.TestCase):
     g, = backprop.gradients_function(loss, [0])(logits, labels)
     self.assertAllEqual(g.numpy(), [[-0.5, 0.5]])
 
+  @test_util.assert_no_new_tensors
   def testSecondGrad(self):
 
     def first(x):
@@ -181,6 +199,7 @@ class BackpropTest(test.TestCase):
     grad = backprop.gradients_function(second, [0])(f)[0]
     self.assertAllEqual([[0.0]], grad)
 
+  @test_util.assert_no_new_tensors
   def testMakeVJP(self):
 
     def f(x):
@@ -191,6 +210,7 @@ class BackpropTest(test.TestCase):
     self.assertAllEqual(result, 9.0)
     self.assertAllEqual(vjp(2.0)[0], 12.0)
 
+  @test_util.assert_no_new_tensors
   def testGradGrad(self):
 
     def sq(x):
@@ -204,6 +224,7 @@ class BackpropTest(test.TestCase):
 
     self.assertAllEqual(gradgrad(constant_op.constant(3.0))[0], 2.0)
 
+  @test_util.assert_no_new_tensors
   def testGradGradExp(self):
 
     def grad(x):
@@ -214,6 +235,22 @@ class BackpropTest(test.TestCase):
 
     self.assertAllEqual(gradgrad(constant_op.constant(0.0))[0], 1.0)
 
+  @test_util.assert_no_new_tensors
+  def testStopGradient(self):
+    grad = backprop.gradients_function(
+        lambda x: array_ops.stop_gradient(math_ops.argmax(x)))
+    self.assertAllEqual(grad([0.0])[0], None)
+
+  @test_util.assert_no_new_tensors
+  def testArgmax(self):
+    def argmax(x):
+      i = math_ops.argmax(x)
+      return array_ops.stop_gradient(i)
+
+    grad = backprop.gradients_function(argmax)
+    self.assertAllEqual(grad([0.0])[0], None)
+
+  @test_util.assert_no_new_tensors
   def testGPU(self):
     if not context.context().num_gpus():
       self.skipTest('No GPUs found')
@@ -229,6 +266,7 @@ class BackpropTest(test.TestCase):
     grad = backprop.gradients_function(fn, [0])(constant_op.constant(1.0))[0]
     self.assertAllEqual(grad, 1.0)
 
+  @test_util.assert_no_new_tensors
   def testGPUImplicitGrad(self):
     if not context.context().num_gpus():
       self.skipTest('No GPU found')
@@ -244,6 +282,7 @@ class BackpropTest(test.TestCase):
     self.assertEqual(
         backprop.implicit_grad(f)()[0][0].cpu().numpy(), 1.0)
 
+  @test_util.assert_no_new_tensors
   def testCPU(self):
 
     def fn(x):
@@ -254,6 +293,7 @@ class BackpropTest(test.TestCase):
     grad = backprop.gradients_function(fn, [0])(constant_op.constant(1.0))[0]
     self.assertAllEqual(grad, 1.0)
 
+  @test_util.assert_no_new_tensors
   def testTensorCopyGPU2CPU2GPU(self):
     if not context.context().num_gpus():
       self.skipTest('No GPUs found')
@@ -268,6 +308,7 @@ class BackpropTest(test.TestCase):
     grad = backprop.gradients_function(f, [0])(a, b)[0]
     self.assertAllEqual(grad, 1.0)
 
+  @test_util.assert_no_new_tensors
   def testEmptyParams(self):
 
     def fn(a, b):
@@ -279,6 +320,7 @@ class BackpropTest(test.TestCase):
     self.assertAllEqual(dx, y.numpy())
     self.assertAllEqual(dy, x.numpy())
 
+  @test_util.assert_no_new_tensors
   def testUnconnectedNone(self):
     v = resource_variable_ops.ResourceVariable(
         1.0, name='testUnconnectedNone')
@@ -289,6 +331,7 @@ class BackpropTest(test.TestCase):
 
     self.assertEqual(backprop.implicit_grad(f)()[0][0], None)
 
+  @test_util.assert_no_new_tensors
   def testGradientTape(self):
     with backprop.GradientTape() as g:
       x = constant_op.constant(3.0)
@@ -303,6 +346,7 @@ class BackpropTest(test.TestCase):
     grad = g.gradient(y, [x])[0]
     self.assertEqual(grad.numpy(), 6.0)
 
+  @test_util.assert_no_new_tensors
   def testGradientTapeGradientCalledMultipleTimes(self):
     with backprop.GradientTape() as g:
       x = constant_op.constant(3.0)
@@ -314,6 +358,7 @@ class BackpropTest(test.TestCase):
         RuntimeError, 'GradientTape.gradient can only be called once'):
       g.gradient(y, [x])
 
+  @test_util.assert_no_new_tensors
   def testPersistentTape(self):
     with backprop.GradientTape(persistent=True) as g:
       x = constant_op.constant(3.0)
@@ -326,6 +371,7 @@ class BackpropTest(test.TestCase):
     self.assertEqual(dy_dx.numpy(), 2*3)
     del g
 
+  @test_util.assert_no_new_tensors
   def testPersistentNestedTape(self):
     with backprop.GradientTape(persistent=True) as g:
       x = constant_op.constant(3.0)
@@ -345,6 +391,7 @@ class BackpropTest(test.TestCase):
     self.assertEqual(grad.numpy(), 12.0)
     del g
 
+  @test_util.assert_no_new_tensors
   def testGradientTapeVariable(self):
     v = resource_variable_ops.ResourceVariable(1.0, name='v')
     with backprop.GradientTape() as g:
@@ -352,6 +399,7 @@ class BackpropTest(test.TestCase):
     grad = g.gradient(y, [v])[0]
     self.assertAllEqual(grad, 2.0)
 
+  @test_util.assert_no_new_tensors
   def testEmptyParamsForValueAndGradFunction(self):
     def fn(a, b):
       return a * b
@@ -364,6 +412,7 @@ class BackpropTest(test.TestCase):
     self.assertAllEqual(dx, y)
     self.assertAllEqual(dy, x)
 
+  @test_util.assert_no_new_tensors
   def testNonEmptyParamsForValueAndGradFunction(self):
     def fn(a, b):
       return a * b
@@ -376,6 +425,7 @@ class BackpropTest(test.TestCase):
     self.assertEqual(1, len(grads))
     self.assertAllEqual(grads[0], x)
 
+  @test_util.assert_no_new_tensors
   def testTensorCopyCPU2GPU2CPU(self):
     if not context.context().num_gpus():
       self.skipTest('No GPUs found')
@@ -460,6 +510,7 @@ class BackpropTest(test.TestCase):
 
     self.assertAllEqual(backprop.gradients_function(f)(1.0)[0], 3.0)
 
+  @test_util.assert_no_new_tensors
   def testExceptionSafety(self):
 
     def f(unused_x):
@@ -475,6 +526,7 @@ class BackpropTest(test.TestCase):
 
     self.assertAllEqual(backprop.gradients_function(real_f)(1.0)[0], 2.0)
 
+  @test_util.assert_no_new_tensors
   def testMultiValueConvertToTensor(self):
     x = resource_variable_ops.ResourceVariable(
         initial_value=array_ops.constant([1.0]), name='x')
@@ -535,6 +587,7 @@ class BackpropTest(test.TestCase):
         initial_value=1., name='testSameObjectForMultipleArguments.Variable')
     self.assertAllEqual([1., 1.], np_g(v, v))
 
+  @test_util.assert_no_new_tensors
   def testImplicitGradientsCustomGradientAndCachedVariableValue(self):
 
     @custom_gradient.custom_gradient
@@ -560,6 +613,7 @@ class BackpropTest(test.TestCase):
     self.assertAllEqual(7, grad)
     self.assertAllEqual(x, var)
 
+  @test_util.assert_no_new_tensors
   def testCustomGradient(self):
 
     @custom_gradient.custom_gradient
@@ -586,6 +640,7 @@ class BackpropTest(test.TestCase):
         var.assign_sub(lr*grad)
     self.assertAllEqual(losses, [4.0, 3., 2., 1., 0.])
 
+  @test_util.assert_no_new_tensors
   def testCustomGradientIdentity(self):
 
     @custom_gradient.custom_gradient
diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py
index 92f4e15c054bd8cf3886b8c22e414abdfccbdae5..cbf588336d75dbc16e73ea227d8ebba639a84f1c 100644
--- a/tensorflow/python/eager/context.py
+++ b/tensorflow/python/eager/context.py
@@ -18,14 +18,18 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import collections
 import contextlib
 import copy
 import random
 import threading
 
+from tensorflow.core.protobuf import config_pb2
 from tensorflow.python import pywrap_tensorflow
+from tensorflow.python.framework import c_api_util
 from tensorflow.python.framework import device as pydev
 from tensorflow.python.framework import errors
+from tensorflow.python.util import compat
 from tensorflow.python.util import tf_contextlib
 
 GRAPH_MODE = 0
@@ -62,6 +66,41 @@ class _EagerContext(threading.local):
     self.scalar_cache = {}
 
 
+ContextStackEntry = collections.namedtuple(
+    "ContextStackEntry", ["is_building_function", "enter_context_fn"])
+
+
+class ContextStack(threading.local):
+  """A thread-local stack of context switches."""
+
+  def __init__(self):
+    super(ContextStack, self).__init__()
+    self.stack = []
+
+  def push(self, is_building_function, enter_context_fn):
+    """Push metadata about a context switch onto the stack.
+
+    A context switch can take one of two forms: installing a graph as the
+    default graph, or entering the eager context.
+
+    Args:
+      is_building_function: (bool.) Whether the context is building a function.
+      enter_context_fn: (function.) A callable that executes the context switch.
+        For example, `graph.as_default` or `eager_mode`.
+    """
+
+    self.stack.append(
+        ContextStackEntry(is_building_function, enter_context_fn))
+
+  def pop(self):
+    """Pop the stack."""
+
+    self.stack.pop()
+
+
+context_stack = ContextStack()
+
+
 # TODO(agarwal): rename to EagerContext / EagerRuntime ?
 # TODO(agarwal): consider keeping the corresponding Graph here.
 class Context(object):
@@ -97,6 +136,9 @@ class Context(object):
     """Set a global eager mode seed for random ops."""
     self._seed = seed
     self._rng = random.Random(self._seed)
+    # Also clear the kernel cache, to reset any existing seeds
+    if self._context_handle is not None:
+      pywrap_tensorflow.TFE_ContextClearCaches(self._context_handle)
 
   def _internal_operation_seed(self):
     """Returns a fake operation seed.
@@ -183,10 +225,14 @@ class Context(object):
     ctx = self._eager_context
     old_mode = ctx.mode
     ctx.mode = mode
+    if mode == EAGER_MODE:
+      context_stack.push(False, eager_mode)
     try:
       yield
     finally:
       ctx.mode = old_mode
+      if mode == EAGER_MODE:
+        context_stack.pop()
 
   def in_graph_mode(self):
     """Returns True if current thread is in GRAPH mode."""
@@ -288,6 +334,21 @@ class Context(object):
     self._initialize_handle_and_devices()
     return self._num_gpus
 
+  def add_function(self, fn):
+    """Add a function definition to the context.
+
+    Once added, the function (identified by its name) can be executed like any
+    other operation.
+
+    Args:
+      fn: A wrapped TF_Function (returned from TF_GraphToFunction_wrapper).
+    """
+    with errors.raise_exception_on_not_ok_status() as status:
+      pywrap_tensorflow.TFE_ContextAddFunction(
+          self._handle,  # pylint: disable=protected-access
+          fn,
+          status)
+
   def add_function_def(self, fdef):
     """Add a function definition to the context.
 
@@ -340,6 +401,42 @@ class Context(object):
     """Get the list of post-execution callbacks added to the context."""
     return self._post_execution_callbacks
 
+  def enable_run_metadata(self):
+    """Enables tracing of op execution via RunMetadata.
+
+    To retrieve the accumulated metadata call context.export_run_metadata()
+    and to stop tracing call context.disable_run_metadata().
+    """
+    if not self._context_handle:
+      self._initialize_handle_and_devices()
+    pywrap_tensorflow.TFE_ContextEnableRunMetadata(self._context_handle)
+
+  def disable_run_metadata(self):
+    """Disables tracing of op execution via RunMetadata."""
+    if not self._context_handle:
+      return
+    pywrap_tensorflow.TFE_ContextDisableRunMetadata(self._context_handle)
+
+  def export_run_metadata(self):
+    """Returns a RunMetadata proto with accumulated information.
+
+    The returned protocol buffer contains information since the most recent call
+    to either enable_run_metadata or export_run_metadata.
+
+    Returns:
+      A RunMetadata protocol buffer. Or None if not enabled.
+    """
+    if not self._context_handle:
+      return None
+    with c_api_util.tf_buffer() as buffer_:
+      with errors.raise_exception_on_not_ok_status() as status:
+        pywrap_tensorflow.TFE_ContextExportRunMetadata(
+            self._context_handle, buffer_, status)
+      proto_data = pywrap_tensorflow.TF_GetBuffer(buffer_)
+    run_metadata = config_pb2.RunMetadata()
+    run_metadata.ParseFromString(compat.as_bytes(proto_data))
+    return run_metadata
+
 _context = None
 _context_lock = threading.Lock()
 
@@ -458,3 +555,29 @@ def num_gpus():
     The number of available GPU devices.
   """
   return context().num_gpus()
+
+
+def enable_run_metadata():
+  """Enables tracing of op execution via RunMetadata.
+
+  To retrieve the accumulated metadata call context.export_run_metadata()
+  and to stop tracing call context.disable_run_metadata().
+  """
+  context().enable_run_metadata()
+
+
+def disable_run_metadata():
+  """Disables tracing of op execution via RunMetadata."""
+  context().disable_run_metadata()
+
+
+def export_run_metadata():
+  """Returns a RunMetadata proto with accumulated information.
+
+  The returned protocol buffer contains information since the most recent call
+  to either enable_run_metadata or export_run_metadata.
+
+  Returns:
+    A RunMetadata protocol buffer.
+  """
+  return context().export_run_metadata()
diff --git a/tensorflow/python/eager/core_test.py b/tensorflow/python/eager/core_test.py
index 2449162dcaa47cb71dde3be70675654709fec794..a70fa7280485497c4795bd890c1a19d2aa52d895 100644
--- a/tensorflow/python/eager/core_test.py
+++ b/tensorflow/python/eager/core_test.py
@@ -84,6 +84,34 @@ class TFETest(test_util.TensorFlowTestCase):
     self.assertTrue(has_cpu_device)
     del ctx
 
+  def testRunMetadata(self):
+    context.enable_run_metadata()
+    t = constant_op.constant(1.0)
+    _ = t + t  # Runs an operation which will be in the RunMetadata
+    run_metadata = context.export_run_metadata()
+    context.disable_run_metadata()
+    step_stats = run_metadata.step_stats
+    self.assertGreater(len(step_stats.dev_stats), 0)
+    cpu_stats = step_stats.dev_stats[0]
+    self.assertEqual('/job:localhost/replica:0/task:0/device:CPU:0',
+                     cpu_stats.device)
+    self.assertEqual(len(cpu_stats.node_stats), 1)
+    self.assertEqual(cpu_stats.node_stats[0].node_name, 'Add')
+
+  def testContextStackContainsEagerMode(self):
+    # Eager execution has been enabled, and no other context
+    # switch has occurred, so `context_stack` should contain
+    # exactly one entry.
+    self.assertEqual(len(context.context_stack.stack), 1)
+    stack_entry = context.context_stack.stack[0]
+
+    # The entry should log that eager mode was entered.
+    self.assertIs(stack_entry.enter_context_fn, context.eager_mode)
+
+    # It is not possible to build a graph function when eager execution
+    # is enabled; the stack entry should reflect this fact.
+    self.assertFalse(stack_entry.is_building_function)
+
   def _runInThread(self, target, args):
     t = threading.Thread(target=target, args=args)
     try:
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 9bcd9c23c7bad4d4e3b93fa4bb5fc2c316d5c828..81b1f6f12a1899ddccb711a81122905bfd363748 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -25,15 +25,19 @@ import threading
 
 import numpy as np
 
+from tensorflow.core.framework import function_pb2
+from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.eager import context
 from tensorflow.python.eager import execute
 from tensorflow.python.eager import tape
 from tensorflow.python.eager.graph_only_ops import graph_placeholder
+from tensorflow.python.framework import c_api_util
 from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import graph_to_function_def
+from tensorflow.python.framework import dtypes as dtypes_module
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import gradients_impl
+from tensorflow.python.util import compat
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_decorator
 
@@ -47,28 +51,6 @@ _scoped_captures = threading.local()
 _scoped_captures.tensors = None
 
 
-def make_function_def(graph, operations, inputs, outputs):
-  """Makes function def where accesses to resources are serialized."""
-  last_op_using_resource_tensor = {}
-
-  # TODO(apassos) probably control flow has to be handled delicately here as in
-  # if a resource is accessed inside a control flow context we need the control
-  # dependency to point to something outside the context which is guaranteed to
-  # happen after the access.
-  #
-  # TODO(apassos) this should do some form of alias analysis as ops which
-  # forward the resources such as Identity and Switch can cause serialization to
-  # fail.
-  for op in operations:
-    for t in op.inputs:
-      if t.dtype == dtypes.resource:
-        if t.name in last_op_using_resource_tensor:
-          op._add_control_input(last_op_using_resource_tensor[t.name])  # pylint: disable=protected-access
-        last_op_using_resource_tensor[t.name] = op
-  return graph_to_function_def.graph_to_function_def(
-      graph, operations, inputs, outputs)
-
-
 @contextlib.contextmanager
 def capture_tensors(captures):
   old = _scoped_captures.__dict__.get("tensors", None)
@@ -85,8 +67,25 @@ def capture_value(tensor_map, value, dtype, name):
   if captured_value is None:
     captured_value = graph_placeholder(
         dtype=dtype or value.dtype, shape=value.shape, name=name)
-    if captured_value.dtype == dtypes.resource:
-      captured_value._handle_data = value._handle_data  # pylint: disable=protected-access
+    if captured_value.dtype == dtypes_module.resource:
+      handle_data = value._handle_data  # pylint: disable=protected-access
+      captured_value._handle_data = handle_data  # pylint: disable=protected-access
+      if handle_data is not None and handle_data.is_set:
+        # Ensure that shapes and dtypes are propagated.
+        shapes, types = zip(*[(pair.shape, pair.dtype)
+                              for pair in handle_data.shape_and_type])
+        ranks = [len(s.dim) if not s.unknown_rank else -1 for s in shapes]
+        shapes = [[d.size for d in s.dim]
+                  if not s.unknown_rank else None for s in shapes]
+        with errors.raise_exception_on_not_ok_status() as status:
+          pywrap_tensorflow.TF_GraphSetOutputHandleShapesAndTypes_wrapper(
+              captured_value._op._graph._c_graph,  # pylint: disable=protected-access
+              captured_value._as_tf_output(),  # pylint: disable=protected-access
+              shapes,
+              ranks,
+              types,
+              status)
+
     tensor_map[ops.tensor_id(value)] = (value, captured_value)
   else:
     captured_value = captured_value[1]
@@ -101,7 +100,7 @@ def _convert_to_graph_tensor(value, dtype=None, name=None, as_ref=False):
   Arguments:
     value: A Tensor object.
     dtype: The datatype of the value produced by the node in the graph.
-    name:  Name of the node in the graph.
+    name:  str, Name of the node in the graph.
     as_ref: Ignored (required by register_tensor_conversion_function).
 
   Returns:
@@ -109,22 +108,47 @@ def _convert_to_graph_tensor(value, dtype=None, name=None, as_ref=False):
     is not enabled. A placeholder which will have the value of the
     tensor at runtime otherwise.
   """
+  del as_ref  # Unused.
+
   if context.in_eager_mode():
     return value
-  _ = as_ref
+
+  default_graph = ops.get_default_graph()
+  if not default_graph.building_function:
+    return value
+
   tensor_map = _scoped_captures.tensors
   if tensor_map is None:
     # Capturing is not enabled.
+    if value.dtype == dtypes_module.resource:
+      return value
     return constant_op.constant(value.numpy())
+  if type(value) == ops.Tensor and value.graph is default_graph:
+    # The tensor has already been converted and captured. The type check
+    # is intentional: we are checking that value is a Tensor and not an
+    # EagerTensor.
+    return value
   return capture_value(tensor_map, value, dtype, name)
 
 
 class CapturingGraph(ops.Graph):
+  """Graph used when constructing eager functions."""
 
   def __init__(self, captures):
     super(CapturingGraph, self).__init__()
     self._building_function = True
     self.captures = captures
+    # Map from resource tensor name to last op (in program order) which uses
+    # this tensor. Used to enforce that execution order matches program order
+    # for resource tensors.
+    self._last_op_using_resource_tensor = {}
+
+  # TODO(apassos) remove once the C API is used by default.
+  def _use_c_api_hack(self):
+    return True
+
+  def clear_resource_control_flow_state(self):
+    self._last_op_using_resource_tensor = {}
 
   def create_op(
       self,
@@ -137,12 +161,31 @@ class CapturingGraph(ops.Graph):
       op_def=None,
       compute_shapes=True,
       compute_device=True):
+    # TODO(apassos) probably control flow has to be handled delicately here as
+    # in if a resource is accessed inside a control flow context we need the
+    # control dependency to point to something outside the context which is
+    # guaranteed to happen after the access.
+    #
+    # TODO(apassos) this should do some form of alias analysis as ops which
+    # forward the resources such as Identity and Switch can cause serialization
+    # to fail.
+    resource_inputs = set()
+    control_inputs = set()
     for i, inp in enumerate(inputs):
       if inp.graph is not self:
         inputs[i] = capture_value(self.captures, inp, inp.dtype, inp.op.name)
-    return super(CapturingGraph, self).create_op(
-        op_type, inputs, dtypes, input_types, name, attrs, op_def,
-        compute_shapes, compute_device)
+      inp = inputs[i]
+      if inp.dtype == dtypes_module.resource:
+        if inp.name in self._last_op_using_resource_tensor:
+          control_inputs.add(self._last_op_using_resource_tensor[inp.name])
+        resource_inputs.add(inp.name)
+    with self.control_dependencies(list(control_inputs)):
+      op = super(CapturingGraph, self).create_op(
+          op_type, inputs, dtypes, input_types, name, attrs, op_def,
+          compute_shapes, compute_device)
+    for name in resource_inputs:
+      self._last_op_using_resource_tensor[name] = op
+    return op
 
 
 # TODO(apassos): it'd be really nice if we could scope this registration.
@@ -196,14 +239,52 @@ def _inference_name(n):
   return "__inference_%s_%s" % (n, ops.uid())
 
 
-class _DefinedFunction(object):
-  """Mocks the interface of tf _DefinedFunction."""
+# TODO(apassos) get rid of this by splitting framework.function._DefinedFunction
+# so it doesn't have the definition-generating logic and is just a container for
+# an already-defined function.
+class _EagerDefinedFunction(object):
+  """Function object with the interface of tf _DefinedFunction."""
 
-  def __init__(self, fdef):
-    self.definition = fdef
-    self.name = fdef.signature.name
+  def __init__(self, name, graph, operations, inputs, outputs):
+    """Initializes an eager defined function.
+
+    Args:
+      name: str, the name for the created function.
+      graph: Graph, the graph containing the operations in the function
+      operations: list of Operation; the subset of operations in the graph
+        which will be in the function
+      inputs: the tensors in the graph to be used as inputs to the function
+      outputs: the tensors in the graph which will be outputs to the function
+    """
+    with errors.raise_exception_on_not_ok_status() as status:
+      fn = pywrap_tensorflow.TF_GraphToFunction_wrapper(
+          graph._c_graph,  # pylint: disable=protected-access
+          compat.as_str(name),
+          False,
+          [o._c_op for o in operations],  # pylint: disable=protected-access
+          [t._as_tf_output() for t in inputs],  # pylint: disable=protected-access
+          [t._as_tf_output() for t in outputs],  # pylint: disable=protected-access
+          [],
+          None,
+          compat.as_str(""),
+          status)
+    # TODO(apassos) avoid creating a FunctionDef (specially to grab the
+    # signature, but also in general it's nice not to depend on it.
+    with c_api_util.tf_buffer() as buffer_:
+      with errors.raise_exception_on_not_ok_status() as status:
+        pywrap_tensorflow.TF_FunctionToFunctionDef(fn, buffer_, status)
+      proto_data = pywrap_tensorflow.TF_GetBuffer(buffer_)
+    function_def = function_pb2.FunctionDef()
+    function_def.ParseFromString(compat.as_bytes(proto_data))
+    if context.in_eager_mode():
+      _register(fn)
+    self.definition = function_def
+    self.name = function_def.signature.name
+    self.signature = function_def.signature
     self.grad_func_name = None
     self.python_grad_func = None
+    self._c_func = fn
+    self._grad_func = None
 
 
 def _map_sequence_obj_to_idx(sequence):
@@ -215,50 +296,51 @@ class GraphModeFunction(object):
   """Callable object representing a graph-mode function.
 
   Args:
-    input_placeholders: list of placeholder values to feed when calling
-      the wrapped function.
+    name: str the name of the created function
+    input_placeholders: list of placeholder values (tensors) to feed when
+      calling the wrapped function.
     extra_inputs: Tensor inputs this function definition closed over which
       are passed as arguments. Need to track so gradients are supported
       correctly.
-    fdef: the function definition we want to call.
-    graph: the graph from which the fdef operations were pulled. Used as
+    graph: the Graph from which the operations will be pulled. Used as
       a context when computing gradients.
-    operations: the subset of operations in the graph used in the function
+    operations: the subset of Operations in the graph used in the function
       definition.
-    func_outputs: the python outputs of the graph-mode function, with
-      tensorflow.Tensor objects to be replaced by tfe values when called.
-    func_outputs_to_fdef_outputs: Maps id(obj) in func_outputs to index of
-      fdef's outputs. It allows mapping fdef output tensors to nested
-      func_outputs structure.
-    output_shapes: List of shapes of all tensors which are output by the
-      internal function.
+    outputs: a flat list of the Tensors in the graph used as outputs to the
+      function
+    func_outputs: a possibly nested python object which will be returned by
+      this function. The Tensors in this structure will be replaced by their
+      corresponding values in outputs.
+    output_shapes: List of shapes of all tensors in outputs
     variables: (optional) List of variables to watch during function execution.
   """
 
   def __init__(self,
+               name,
                input_placeholders,
                extra_inputs,
-               fdef,
                graph,
                operations,
+               outputs,
                func_outputs,
-               func_outputs_to_fdef_outputs,
                output_shapes,
                variables=None):
-    assert len(input_placeholders) == len(fdef.signature.input_arg), "%s %s" % (
-        len(input_placeholders), len(fdef.signature.input_arg))
+    defined_function = _EagerDefinedFunction(
+        name, graph, operations, input_placeholders, outputs)
+    if len(input_placeholders) != len(defined_function.signature.input_arg):
+      raise ValueError("Internal error: invalid lengths. %s %s" % (
+          len(input_placeholders), len(defined_function.signature.input_arg)))
     self._input_placeholders = input_placeholders
     self._extra_inputs = list(extra_inputs)
     self._graph = graph
     self._has_backprop = False
-    self._func_name = fdef.signature.name
-    self._fdef = _DefinedFunction(fdef)
-    self._num_outputs = len(fdef.signature.output_arg)
+    self._func_name = name
+    self._function_def = defined_function
+    self._num_outputs = len(defined_function.signature.output_arg)
     self._ops = operations
     self._func_outputs = func_outputs
     self._returns = [func_outputs] if isinstance(
         func_outputs, (ops.Tensor, type(None))) else list(func_outputs)
-    self._returns_to_fedf_outputs = func_outputs_to_fdef_outputs
     self._output_shapes = output_shapes
     self._variables = variables if variables is not None else []
 
@@ -272,49 +354,47 @@ class GraphModeFunction(object):
     with self._graph.as_default(), context.graph_mode():
       c = _CapturingContext()
       with c:
-        filtered_outputs = [
-            x for x in self._returns if x is not None
-        ]
+        filtered_outputs = [x for x in self._returns if x is not None]
         self._out_grad_placeholders = [
-            graph_placeholder(x.dtype, x.shape) for x in filtered_outputs
-        ]
+            graph_placeholder(x.dtype, x.shape) for x in filtered_outputs]
         in_gradients = gradients_impl.gradients(
             filtered_outputs,
             self._input_placeholders,
             grad_ys=self._out_grad_placeholders)
-        shapes = [x.shape for x in in_gradients if x is not None]
+        shapes = tuple(x.shape for x in in_gradients if x is not None)
     captures = list(sorted(c.captured_tensors, key=lambda x: x.name))
-    forward_function_def = make_function_def(
-        self._graph, self._ops, self._input_placeholders,
+    forward_name = _forward_name(self._func_name)
+    self._forward_fdef = _EagerDefinedFunction(
+        forward_name, self._graph, self._ops, self._input_placeholders,
         filtered_outputs + captures)
-    self._forward_fdef = _DefinedFunction(forward_function_def)
-    _register_with_name(_forward_name(self._func_name), forward_function_def)
-    backward_outputs = [x for x in in_gradients if x is not None]
+    backward_outputs = tuple(x for x in in_gradients if x is not None)
     all_inputs = self._out_grad_placeholders + captures
-    backward_function_def = make_function_def(
-        self._graph, [x.op for x in self._out_grad_placeholders
-                     ] + list(sorted(c.known_ops, key=lambda x: x.name)),
-        all_inputs, backward_outputs)
-    _register_with_name(_backward_name(self._func_name), backward_function_def)
+    # Excluding input ops from the body as we do not intend to execute these
+    # operations when the function is executed.
+    all_ignored_ops = frozenset(x.op for x in all_inputs)
+    # Enforce a deterministic order of operations in the generated graph. This
+    # means rerunning the function-defining code will always define the same
+    # function, which is useful if we serialize this etc.
+    function_def_ops = tuple(x
+                             for x in sorted(c.known_ops, key=lambda x: x.name)
+                             if x not in all_ignored_ops)
+    bname = _backward_name(self._func_name)
     self._backward_function = GraphModeFunction(
-        all_inputs, [], backward_function_def, self._graph, c.known_ops,
-        in_gradients, _map_sequence_obj_to_idx(backward_outputs), shapes)
+        bname, all_inputs, [], self._graph, function_def_ops,
+        backward_outputs, in_gradients, shapes)
 
   def _backprop_call(self, args):
     """Calls the wrapped function and records the result on a tape."""
     all_args = args + self._extra_inputs
-    signature = self._forward_fdef.definition.signature
+    signature = self._forward_fdef.signature
     ctx = context.context()
     if ctx.in_graph_mode():
       g = ops.get_default_graph()
       g._add_function(self._forward_fdef)  # pylint: disable=protected-access
-      def make_tensor(x):
-        if isinstance(x, ops.Tensor):
-          return x
-        return ops.internal_convert_to_tensor(x, ctx=ctx)
       op = g.create_op(
-          signature.name, [make_tensor(x) for x in all_args],
-          [dtypes.DType(x.type) for x in signature.output_arg],
+          signature.name,
+          [ops.internal_convert_to_tensor(x, ctx=ctx) for x in all_args],
+          tuple(dtypes_module.DType(x.type) for x in signature.output_arg),
           op_def=signature,
           name="FunctionCall",
           compute_shapes=False)
@@ -334,7 +414,7 @@ class GraphModeFunction(object):
     side_outputs = outputs[len(self._returns):]
 
     def backward_function(*args):
-      return self._backward_function(*(list(args) + side_outputs))
+      return self._backward_function(*(list(args) + side_outputs))  # pylint: disable=not-callable
 
     tape.record_operation(
         signature.name,
@@ -344,17 +424,41 @@ class GraphModeFunction(object):
 
     return self._build_call_outputs(real_outputs)
 
+  @property
+  def output_shapes(self):
+    # TODO(ebrevdo): Should we only keep the output shapes associated
+    # with len(self._returns) outputs?
+    return nest.pack_sequence_as(self._func_outputs, self._output_shapes)
+
+  @property
+  def output_dtypes(self):
+    return nest.map_structure(
+        lambda x: x.dtype if x is not None else None, self._func_outputs)
+
+  @property
+  def captured_inputs(self):
+    return self._extra_inputs
+
+  @property
+  def name(self):
+    """Returns the name of the function in Eager-compatible format."""
+    return self._function_def.name.encode("utf-8")
+
+  def add_to_graph(self, g):
+    if self._function_def.name not in g._functions:  # pylint: disable=protected-access
+      g._add_function(self._function_def)  # pylint: disable=protected-access
+    for f in self._graph._functions.values():  # pylint: disable=protected-access
+      if f.name not in g._functions:  # pylint: disable=protected-access
+        g._add_function(f)  # pylint: disable=protected-access
+
   def __call__(self, *args):
     """Executes the passed function in eager mode."""
     for v in self._variables:
       if v._trainable:  # pylint: disable=protected-access
         tape.watch_variable(v)
 
-    tensor_inputs = [
-        x for x in nest.flatten(args)
-        if isinstance(x, ops.Tensor)
-    ]
-
+    tensor_inputs = [x for x in nest.flatten(args)
+                     if isinstance(x, ops.Tensor)]
     if tape.should_record(tensor_inputs) or tape.should_record(
         self._extra_inputs):
       if not self._has_backprop:
@@ -364,16 +468,13 @@ class GraphModeFunction(object):
     ctx = context.context()
     if ctx.in_graph_mode():
       g = ops.get_default_graph()
-      if self._fdef.name not in g._functions:  # pylint: disable=protected-access
-        g._add_function(self._fdef)  # pylint: disable=protected-access
-      for f in self._graph._functions.values():  # pylint: disable=protected-access
-        if f.name not in g._functions:  # pylint: disable=protected-access
-          g._add_function(f)  # pylint: disable=protected-access
-      signature = self._fdef.definition.signature
+      self.add_to_graph(g)
+      signature = self._function_def.definition.signature
       args = list(tensor_inputs) + self._extra_inputs
       op = g.create_op(
-          signature.name, [ops.convert_to_tensor(x) for x in args],
-          [dtypes.DType(x.type) for x in signature.output_arg],
+          signature.name,
+          [ops.internal_convert_to_tensor(x, ctx=ctx) for x in args],
+          tuple(dtypes_module.DType(x.type) for x in signature.output_arg),
           op_def=signature,
           name="FunctionCall",
           compute_shapes=False)
@@ -414,14 +515,13 @@ class GraphModeFunction(object):
 def _get_defun_inputs(args):
   """Maps the inputs args to graph inputs."""
   ret = []
-  for a in args:
+  flat_args = nest.flatten(args)
+  for a in flat_args:
     if isinstance(a, ops.Tensor):
       ret.append(graph_placeholder(a.dtype, a.shape))
-    elif type(a) in (tuple, list):
-      ret.append(_get_defun_inputs(a))
     else:
       ret.append(a)
-  return tuple(ret) if type(args) is tuple else ret
+  return nest.pack_sequence_as(args, ret)
 
 
 def _defun_internal(name, func, args, kwds):
@@ -446,45 +546,46 @@ def _defun_internal(name, func, args, kwds):
       func_inputs = _get_defun_inputs(args)
 
       with capture_tensors(captures):
-        tape.push_new_tape()
+        this_tape = tape.push_new_tape()
         try:
           func_outputs = func(*func_inputs, **kwds)
         finally:
-          variables = tape.pop_tape().watched_variables()
+          tape.pop_tape(this_tape)
+        variables = this_tape.watched_variables()
+
+        # Returning a closed-over tensor as an output does not trigger a
+        # call to convert_to_tensor, so we manually capture all such tensors.
+        outputs_list = nest.flatten(func_outputs)
+        func_def_outputs = [
+            _convert_to_graph_tensor(x) for x in outputs_list if x is not None
+        ]
+
       ids = list(sorted(captures.keys()))
       if ids:
         extra_inputs, extra_placeholders = zip(* [captures[x] for x in ids])
       else:
         extra_inputs = []
         extra_placeholders = []
-      outputs_list = nest.flatten(func_outputs)
-      output_shapes = [x.shape for x in outputs_list if x is not None]
+      output_shapes = tuple(
+          x.shape if isinstance(x, ops.Tensor) else None
+          for x in outputs_list)
 
-  flat_inputs = [
-      x for x in nest.flatten(func_inputs) if isinstance(x, ops.Tensor)
-  ]
+  flat_inputs = [x for x in nest.flatten(func_inputs)
+                 if isinstance(x, ops.Tensor)]
   all_inputs = flat_inputs + list(extra_placeholders)
-
-  func_def_outputs = [x for x in outputs_list if x is not None]
-  inference_function_def = make_function_def(
-      tmp_graph, tmp_graph.get_operations(), all_inputs, func_def_outputs)
+  all_ignored_ops = frozenset(x.op for x in all_inputs)
+  fname = _inference_name(name)
+  operations = tuple(x for x in tmp_graph.get_operations()
+                     if x not in all_ignored_ops)
   # Register any other functions defined in the graph
   # TODO(ashankar): Oh lord, forgive me for this lint travesty.
-  for f in tmp_graph._functions.values():  # pylint: disable=protected-access
-    # TODO(ashankar): What about the gradient registry?
-    _register_with_name(f.name, f.definition)
-  _register_with_name(_inference_name(name), inference_function_def)
-
+  if context.in_eager_mode():
+    for f in tmp_graph._functions.values():  # pylint: disable=protected-access
+      # TODO(ashankar): What about the gradient registry?
+      _register(f._c_func)  # pylint: disable=protected-access
   return GraphModeFunction(
-      all_inputs,
-      extra_inputs,
-      inference_function_def,
-      tmp_graph,
-      tmp_graph.get_operations(),
-      func_outputs,
-      _map_sequence_obj_to_idx(func_def_outputs),
-      output_shapes,
-      variables=variables)
+      fname, all_inputs, extra_inputs, tmp_graph, operations, func_def_outputs,
+      func_outputs, output_shapes, variables)
 
 
 # Defun uses this instead of Tensor as a cache key. Using dtype because
@@ -501,15 +602,16 @@ def _cache_key(x):
     return _TensorDtype(x.dtype, x._shape_tuple())  # pylint: disable=protected-access
   if isinstance(x, np.ndarray):
     return ("array", x.shape, tuple(x.reshape(-1)))
-  if type(x) in (list, tuple):
+  if isinstance(x, (list, tuple)):
     return tuple([_cache_key(a) for a in x])
+  if isinstance(x, dict):
+    return tuple(tuple([_cache_key(k), _cache_key(v)]) for k, v in x.items())
   return x
 
 
-def _register_with_name(name, fdef):
-  """Registers the function `fdef` with the name `name`."""
-  fdef.signature.name = name
-  context.context().add_function_def(fdef)
+def _register(fn):
+  """Registers the function `fn`."""
+  context.context().add_function(fn)
 
 
 # TODO(apassos): better error messages for non-hashable arguments.
@@ -532,7 +634,8 @@ def named_defun(func, name):
     """Decorated version of func."""
     # Macroexpand on non-Tensor arguments
     cache_key = tuple(_cache_key(x) for x in args)
-    assert all(not isinstance(x, ops.EagerTensor) for x in kwds.values())
+    if any(isinstance(x, ops.EagerTensor) for x in kwds.values()):
+      raise ValueError("Tensor keyword arguments are not supported.")
     cache_key = (cache_key, tuple(kwds.items()))
 
     if cache_key not in arguments_to_functions:
@@ -595,3 +698,55 @@ def defun(func):
   """
   # TODO(apassos): deal with captured global state. Deal with control flow.
   return tf_decorator.make_decorator(func, named_defun(func, func.__name__))
+
+
+def make_defun_op(func, *args, **kwds):
+  """Compile func into graph_mode, assuming func arguments are *args, **kwargs.
+
+  `make_defun_op` converts a function that constructs a TensorFlow graph into
+  a function object and attaches it to the graph.  The resulting function
+  object can be queried for its properties, and called directly with different
+  inputs to execute.
+
+  More details on use cases and limitations are available in the
+  documentation for `defun`.
+
+  Example:
+  ```python
+  def f(x, y):
+    return tf.reduce_mean(tf.multiply(x ** 2, 3) + y)
+
+  def g(x, y):
+    return tf.reduce_mean(tf.multiply(x ** 2, 3) + y)
+
+  z = tf.constant([[0.0, 0.0]])
+  g_op = make_defun_op(g, z, z)
+
+  assert g_op.output_shapes == tf.TensorShape([])
+  assert g_op.output_types == tf.float32
+
+  x = tf.constant([[2.0, 3.0]])
+  y = tf.constant([[3.0, -2.0]])
+
+  # The plain function and defun-compiled function should return the same value.
+  assert f(x, y).numpy() == g_op(x, y).numpy()
+  ```
+
+  Args:
+    func: function to be compiled.
+    *args: List arguments to pass to `func` when attaching to the graph.
+    **kwds: Keyword arguments to pass to `func` when attaching to the graph.
+
+  Returns:
+     A wrapper object which can be queried for its output properties,
+     and which can be called directly the way a `@defun` wrapped function
+     can.
+
+  Raises:
+    ValueError: if any of the keyword arguments to `func` are `EagerTensor`
+      objects (not yet supported).
+  """
+  name = func.__name__
+  if any(isinstance(x, ops.EagerTensor) for x in kwds.values()):
+    raise ValueError("Tensor keyword arguments are not supported.")
+  return _defun_internal(name, func, args, kwds)
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index c55f2f1d5957cabfaf3bae617d88dca55f7b8e4b..9b08a35ff1aaaab2559b89d4c8106685783503d5 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -17,6 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import collections
+
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
 from tensorflow.python.eager import function
@@ -27,6 +29,7 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import function as tf_function
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import clip_ops
 from tensorflow.python.ops import math_ops
@@ -56,6 +59,20 @@ class FunctionTest(test.TestCase):
     out = sq(t)
     self.assertAllEqual(out, math_ops.matmul(t, t).numpy())
 
+  def testNestedInputsGraphMode(self):
+    matmul = function.defun(math_ops.matmul)
+
+    pair = collections.namedtuple('pair', ['a', 'b'])
+
+    @function.defun
+    def a_times_b(inputs):
+      return matmul(inputs.a['a'], inputs.b['b'])
+
+    t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
+
+    out = a_times_b(pair({'a': t}, {'b': t}))
+    self.assertAllEqual(out, math_ops.matmul(t, t).numpy())
+
   def testGraphModeWithGradients(self):
     v = resource_variable_ops.ResourceVariable(1.0, name='v')
 
@@ -68,6 +85,81 @@ class FunctionTest(test.TestCase):
 
     self.assertAllEqual(step(), 2.0)
 
+  def testBasicDefunOpGraphMode(self):
+    matmul = function.defun(math_ops.matmul)
+
+    def sq(a):
+      return matmul(a, a)
+
+    t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
+
+    sq_op = function.make_defun_op(sq, t)
+
+    self.assertEqual(sq_op.output_shapes, tensor_shape.TensorShape([2, 2]))
+    out = sq_op(t)
+    self.assertAllEqual(out, math_ops.matmul(t, t).numpy())
+
+  def testNestedInputsDefunOpGraphMode(self):
+    matmul = function.defun(math_ops.matmul)
+
+    pair = collections.namedtuple('pair', ['a', 'b'])
+    def a_times_b(inputs):
+      return matmul(inputs.a['a'], inputs.b['b'])
+
+    t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
+
+    inputs = pair({'a': t}, {'b': t})
+    sq_op = function.make_defun_op(a_times_b, inputs)
+
+    self.assertEqual(sq_op.output_shapes, tensor_shape.TensorShape([2, 2]))
+    out = sq_op(inputs)
+    self.assertAllEqual(out, math_ops.matmul(t, t).numpy())
+
+  def testNestedOutputDefunOpGraphMode(self):
+    matmul = function.defun(math_ops.matmul)
+
+    def sq(a):
+      return (matmul(a, a), {'b': constant_op.constant(1.0)})
+
+    t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
+
+    sq_op = function.make_defun_op(sq, t)
+
+    self.assertEqual(sq_op.output_shapes,
+                     (tensor_shape.TensorShape([2, 2]),
+                      {'b': tensor_shape.TensorShape([])}))
+    self.assertEqual(sq_op.output_dtypes,
+                     (dtypes.float32, {'b': dtypes.float32}))
+    (a, b) = sq_op(t)
+    self.assertAllEqual(a, math_ops.matmul(t, t).numpy())
+    self.assertAllEqual(b['b'].numpy(), 1.0)
+
+  def testDefunOpGraphModeWithGradients(self):
+    v = resource_variable_ops.ResourceVariable(1.0, name='v')
+
+    def step():
+      def inner():
+        return v * v
+
+      return backprop.implicit_grad(inner)()[0][0]
+
+    step_op = function.make_defun_op(step)
+
+    self.assertEqual(step_op.output_dtypes, dtypes.float32)
+    self.assertEqual(step_op.output_shapes, tensor_shape.TensorShape([]))
+    self.assertAllEqual(step_op(), 2.0)
+
+  def testDefunOpGraphModeNoneOutput(self):
+    def fn(unused_a, unused_b):
+      return None
+
+    x = constant_op.constant(1)
+    fn_op = function.make_defun_op(fn, x, x)
+
+    self.assertEqual(fn_op.output_dtypes, None)
+    self.assertEqual(fn_op.output_shapes, None)
+    self.assertAllEqual(fn_op(x, x), None)
+
   def testDefunReadVariable(self):
     v = resource_variable_ops.ResourceVariable(1.0)
 
@@ -87,6 +179,42 @@ class FunctionTest(test.TestCase):
 
     self.assertEqual(3.0, float(f()))
 
+  def testDefunShapeInferenceWithCapturedResourceVariable(self):
+    v = resource_variable_ops.ResourceVariable([[1, 2], [3, 4]])
+
+    def f():
+      x = constant_op.constant([[1, 2], [3, 4]])
+      out = math_ops.matmul(v, x)
+      self.assertEqual(out.get_shape(), tensor_shape.TensorShape([2, 2]))
+
+    compiled = function.defun(f)
+    compiled()
+
+  def testDefunShapeInferenceWithCapturedResourceVariableInGraphMode(self):
+    with context.graph_mode():
+      v = resource_variable_ops.ResourceVariable([[1, 2], [3, 4]])
+
+      def f():
+        x = constant_op.constant([[1, 2], [3, 4]])
+        out = math_ops.matmul(v, x)
+        self.assertEqual(out.get_shape(), tensor_shape.TensorShape([2, 2]))
+
+      compiled = function.defun(f)
+      compiled()
+
+  def testDefunShapeInferenceWithCapturedVariableInGraphMode(self):
+    with context.graph_mode():
+      v = variables.Variable([[1, 2], [3, 4]])
+
+      def f():
+        x = constant_op.constant([[1, 2], [3, 4]])
+        out = math_ops.matmul(v, x)
+        self.assertEqual(out.get_shape(), tensor_shape.TensorShape([2, 2]))
+
+      # Check that shape inference works while creating the defun
+      compiled = function.defun(f)
+      compiled()
+
   def testDefunDifferentiable(self):
     v = resource_variable_ops.ResourceVariable(1.0)
 
@@ -310,6 +438,38 @@ class FunctionTest(test.TestCase):
 
     self.assertAllEqual(3, add_one(constant_op.constant(2)))
 
+  def testVariableCaptureInNestedFunctions(self):
+    v = resource_variable_ops.ResourceVariable(1)
+
+    @function.defun
+    def read():
+      return v.read_value()
+
+    @function.defun
+    def outer():
+      return read()
+
+    self.assertEqual(1, int(outer()))
+
+  def testReturnCapturedEagerTensor(self):
+    t = constant_op.constant(1)
+
+    @function.defun
+    def read():
+      return t
+
+    self.assertEqual(1, int(read()))
+
+  def testReturnCapturedGraphTensor(self):
+    with context.graph_mode(), self.test_session():
+      t = constant_op.constant(1)
+
+      @function.defun
+      def read():
+        return t
+
+      self.assertEqual(1, int(self.evaluate(read())))
+
   def testSequenceInputs(self):
     clip_by_global_norm = function.defun(clip_ops.clip_by_global_norm)
     t_list = [constant_op.constant(1.0), constant_op.constant(2.0)]
diff --git a/tensorflow/python/eager/gen_op.bzl b/tensorflow/python/eager/gen_op.bzl
index 1c99d342befaf04112ac83aeecce2b122eb361c5..8bc1d6c10a60b89a026cb34dbf6fd98d29e909c2 100644
--- a/tensorflow/python/eager/gen_op.bzl
+++ b/tensorflow/python/eager/gen_op.bzl
@@ -10,7 +10,9 @@ def tfe_gen_op_wrapper_py(name,
                           out=None,
                           visibility=None,
                           deps=[],
-                          generated_target_name=None):
+                          generated_target_name=None,
+                          # ApiDefs will be loaded in the order specified in this list.
+                          api_def_srcs=[]):
   """Generate an eager-mode Python op wrapper for an op library."""
   # Construct a cc_binary containing the specified ops.
   tool_name = "gen_" + name + "_py_wrappers_cc"
@@ -30,11 +32,25 @@ def tfe_gen_op_wrapper_py(name,
   if not out:
     out = "gen_" + name + ".py"
 
+  if not api_def_srcs:
+    api_def_args_str = ","
+  else:
+    api_def_args = []
+    for api_def_src in api_def_srcs:
+      # Add directory of the first ApiDef source to args.
+      # We are assuming all ApiDefs in a single api_def_src are in the
+      # same directory.
+      api_def_args.append(
+          "$$(dirname $$(echo $(locations " + api_def_src +
+          ") | cut -d\" \" -f1))")
+    api_def_args_str = ",".join(api_def_args)
+
   native.genrule(
       name=name + "_pygenrule",
       outs=[out],
+      srcs=api_def_srcs,
       tools=[tool_name] + tf_binary_additional_srcs(),
-      cmd=("$(location " + tool_name + ")  > $@"))
+      cmd=("$(location " + tool_name + ") " + api_def_args_str + " > $@"))
 
   # Make a py_library out of the generated python file.
   if not generated_target_name:
diff --git a/tensorflow/python/eager/graph_callable.py b/tensorflow/python/eager/graph_callable.py
index 837a75c808f94d4561a0eb68c8e77700d0e413da..5c13ea89081a7d060c0ed1201f0169b739a204c2 100644
--- a/tensorflow/python/eager/graph_callable.py
+++ b/tensorflow/python/eager/graph_callable.py
@@ -296,6 +296,7 @@ def _graph_callable_internal(func, shape_and_dtypes):
       # Call the function again, now replacing usages of variables with
       # placeholders. This assumes the variable capturing scope created above
       # knows about all variables.
+      tmp_graph.clear_resource_control_flow_state()
       with variable_captures.capturing_scope(), function.capture_tensors(
           captures):
         captured_outputs = func(*func_inputs)
@@ -317,46 +318,33 @@ def _graph_callable_internal(func, shape_and_dtypes):
   placeholder_inputs = flat_inputs+ list(extra_placeholders)
 
   func_def_outputs = [x for x in outputs_list if isinstance(x, tf_ops.Tensor)]
-  initializer_function_def = function.make_function_def(
-      tmp_graph,
-      initializing_operations,
-      placeholder_inputs,
-      func_def_outputs)
+  initialization_name = function._inference_name(func.__name__)  # pylint: disable=protected-access
   # TODO(ashankar): Oh lord, forgive me for this lint travesty.
   # Also, what about the gradient registry of these functions? Those need to be
   # addressed as well.
   for f in tmp_graph._functions.values():  # pylint: disable=protected-access
-    function._register_with_name(f.name, f.definition)  # pylint: disable=protected-access
-  function._register_with_name(function._inference_name(func.__name__),  # pylint: disable=protected-access
-                               initializer_function_def)
+    function._register(f._c_func)  # pylint: disable=protected-access
   initializer_function = function.GraphModeFunction(
+      initialization_name,
       placeholder_inputs,
       extra_inputs,
-      initializer_function_def,
       tmp_graph,
       initializing_operations,
+      func_def_outputs,
       func_outputs,
-      function._map_sequence_obj_to_idx(func_def_outputs),  # pylint: disable=protected-access
       output_shapes)
 
   capture_func_def_outputs = [
       x for x in captured_outlist if isinstance(x, tf_ops.Tensor)]
-  captured_function_def = function.make_function_def(
-      tmp_graph,
-      capturing_operations,
-      placeholder_inputs,
-      capture_func_def_outputs)
-  function._register_with_name(function._inference_name(func.__name__),  # pylint: disable=protected-access
-                               captured_function_def)
-
+  captured_function_name = function._inference_name(func.__name__)  # pylint: disable=protected-access
   captured_function = function.GraphModeFunction(
+      captured_function_name,
       placeholder_inputs,
       extra_inputs,
-      captured_function_def,
       tmp_graph,
       capturing_operations,
+      capture_func_def_outputs,
       captured_outputs,
-      function._map_sequence_obj_to_idx(capture_func_def_outputs),  # pylint: disable=protected-access
       output_shapes,
       variables=[x.variable for x in sorted_variables])
 
diff --git a/tensorflow/python/eager/graph_callable_test.py b/tensorflow/python/eager/graph_callable_test.py
index 548e16a909f8fe846ea6d5a7a33c4247c5d90054..b9e6ca2a93ac6ff02b741051234dbdd8a55bf12b 100644
--- a/tensorflow/python/eager/graph_callable_test.py
+++ b/tensorflow/python/eager/graph_callable_test.py
@@ -152,7 +152,6 @@ class GraphCallableTest(test.TestCase):
     self.assertAllEqual(5, f(constant_op.constant(2)))
 
   def testNestedFunction(self):
-
     # TensorFlow function (which is what would be used in TensorFlow graph
     # construction).
     @function.Defun(dtypes.int32, dtypes.int32)
diff --git a/tensorflow/python/eager/ops_test.py b/tensorflow/python/eager/ops_test.py
index 70e23b9311792fd7e5243bbc9fd6e4009f1493a9..f8c5037dcf8d4c9c2ca90c641981c9280b946c4f 100644
--- a/tensorflow/python/eager/ops_test.py
+++ b/tensorflow/python/eager/ops_test.py
@@ -33,6 +33,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import sparse_ops
 
 
@@ -254,11 +255,12 @@ class OpsTest(test_util.TensorFlowTestCase):
         'using.*DEVICE_PLACEMENT_SILENT'):
       reshaped = array_ops.reshape(value, shape.gpu())
 
-  def testInvalidInputDataType(self):
+  def testInt64(self):
     # Fill requires the first input to be an int32 tensor.
-    with self.assertRaisesRegexp(errors.InvalidArgumentError, 'int64'):
-      array_ops.fill(constant_op.constant([2], dtype=dtypes.int64),
-                     constant_op.constant(1))
+    self.assertAllEqual(
+        [1.0, 1.0],
+        array_ops.fill(constant_op.constant([2], dtype=dtypes.int64),
+                       constant_op.constant(1)))
 
   def testOutputOnHostMemory(self):
     if not context.context().num_gpus():
@@ -322,6 +324,13 @@ class OpsTest(test_util.TensorFlowTestCase):
   def testIdentity(self):
     self.assertAllEqual(2, array_ops.identity(2))
 
+  def testIdentityOnVariable(self):
+    if not context.context().num_gpus():
+      self.skipTest('No GPUs found')
+    with context.device('/gpu:0'):
+      v = resource_variable_ops.ResourceVariable(True)
+    self.assertAllEqual(True, array_ops.identity(v))
+
   def testIncompatibleSetShape(self):
     x = constant_op.constant(1)
     with self.assertRaises(ValueError):
diff --git a/tensorflow/python/eager/python_eager_op_gen.cc b/tensorflow/python/eager/python_eager_op_gen.cc
index 956fbdac50d05fbd23ab93ec97145645805ac5e7..90a8779ff845b2fd63d1ba1019e8601fef257e42 100644
--- a/tensorflow/python/eager/python_eager_op_gen.cc
+++ b/tensorflow/python/eager/python_eager_op_gen.cc
@@ -99,6 +99,15 @@ string TensorPBString(const TensorProto& pb) {
   return strings::StrCat("\"\"\"", ProtoShortDebugString(pb), "\"\"\"");
 }
 
+const ApiDef::Arg* FindInputArg(StringPiece name, const ApiDef& api_def) {
+  for (int i = 0; i < api_def.in_arg_size(); ++i) {
+    if (api_def.in_arg(i).name() == name) {
+      return &api_def.in_arg(i);
+    }
+  }
+  return nullptr;
+}
+
 class GenEagerPythonOp : public python_op_gen_internal::GenPythonOp {
  public:
   GenEagerPythonOp(const OpDef& op_def, const ApiDef& api_def,
@@ -164,14 +173,14 @@ string GenEagerPythonOp::FlattenInputs(
       } else if (inputs_state == WAS_LIST_INPUT) {
         strings::StrAppend(&inputs, " + ");
       }
-      strings::StrAppend(&inputs, "list(", param_names_[i], ")");
+      strings::StrAppend(&inputs, "list(", param_names_[i].GetRenameTo(), ")");
       inputs_state = WAS_LIST_INPUT;
       if (output_sizes != nullptr) {
         if (!arg.number_attr().empty()) {
           output_sizes->emplace_back(AttrVarName(arg.number_attr(), nullptr));
         } else {
           output_sizes->emplace_back(
-              strings::StrCat("len(", param_names_[i], ")"));
+              strings::StrCat("len(", param_names_[i].GetRenameTo(), ")"));
         }
       }
     } else {
@@ -182,7 +191,7 @@ string GenEagerPythonOp::FlattenInputs(
       } else {
         strings::StrAppend(&inputs, "[");
       }
-      strings::StrAppend(&inputs, param_names_[i]);
+      strings::StrAppend(&inputs, param_names_[i].GetRenameTo());
       inputs_state = WAS_SOLO_INPUT;
       if (output_sizes != nullptr) output_sizes->emplace_back();
     }
@@ -195,15 +204,21 @@ string GenEagerPythonOp::FlattenInputs(
 }
 
 string GenEagerPythonOp::Code() {
+  if (api_def_.visibility() == ApiDef::SKIP) {
+    return "";
+  }
   // This has all the input args followed by those attrs that don't have
   // defaults.
-  std::vector<string> args_no_default;
+  std::vector<python_op_gen_internal::ParamNames> params_no_default;
   // The parameters with defaults (these have to be listed after those without).
   // No input args are included, just attrs.
-  std::vector<std::pair<string, string>> args_with_defaults;
-  for (int i = 0; i < op_def_.input_arg_size(); ++i) {
-    const auto& arg(op_def_.input_arg(i));
-    args_no_default.push_back(arg.name());
+  std::vector<std::pair<python_op_gen_internal::ParamNames, string>>
+      params_with_default;
+
+  for (int i = 0; i < api_def_.arg_order_size(); ++i) {
+    const auto& arg = *FindInputArg(api_def_.arg_order(i), op_def_);
+    const auto& api_def_arg = *FindInputArg(api_def_.arg_order(i), api_def_);
+    params_no_default.emplace_back(api_def_arg.name(), api_def_arg.rename_to());
     if (!arg.type_attr().empty()) {
       AddAttrForArg(arg.type_attr(), i);
     } else if (!arg.type_list_attr().empty()) {
@@ -215,31 +230,39 @@ string GenEagerPythonOp::Code() {
   }
   for (int i = 0; i < op_def_.attr_size(); ++i) {
     const auto& attr(op_def_.attr(i));
+    const auto& api_def_attr(api_def_.attr(i));
     // Do not add inferred attrs to the Python function signature.
     if (inferred_attrs_.find(attr.name()) == inferred_attrs_.end()) {
-      if (attr.has_default_value()) {
+      if (api_def_attr.has_default_value()) {
         if (attr.type() == "tensor") {
-          args_with_defaults.emplace_back(
-              attr.name(),
-              strings::StrCat("_execute.make_tensor(",
-                              TensorPBString(attr.default_value().tensor()),
-                              ", \"", attr.name(), "\")"));
+          params_with_default.emplace_back(
+              python_op_gen_internal::ParamNames(api_def_attr.name(),
+                                                 api_def_attr.rename_to()),
+              strings::StrCat(
+                  "_execute.make_tensor(",
+                  TensorPBString(api_def_attr.default_value().tensor()), ", \"",
+                  api_def_attr.rename_to(), "\")"));
         } else if (attr.type() == "list(tensor)") {
           std::vector<string> pbtxt;
-          for (const auto& pb : attr.default_value().list().tensor()) {
+          for (const auto& pb : api_def_attr.default_value().list().tensor()) {
             pbtxt.emplace_back(TensorPBString(pb));
           }
-          args_with_defaults.emplace_back(
-              attr.name(),
-              strings::StrCat("[_execute.make_tensor(_pb, \"", attr.name(),
-                              "\") for _pb in ", VectorToTuple(pbtxt), "]"));
+          params_with_default.emplace_back(
+              python_op_gen_internal::ParamNames(api_def_attr.name(),
+                                                 api_def_attr.rename_to()),
+              strings::StrCat("[_execute.make_tensor(_pb, \"",
+                              api_def_attr.rename_to(), "\") for _pb in ",
+                              VectorToTuple(pbtxt), "]"));
         } else {
-          args_with_defaults.emplace_back(
-              attr.name(), python_op_gen_internal::AttrValueToPython(
-                               attr.type(), attr.default_value(), "_dtypes."));
+          params_with_default.emplace_back(
+              python_op_gen_internal::ParamNames(api_def_attr.name(),
+                                                 api_def_attr.rename_to()),
+              python_op_gen_internal::AttrValueToPython(
+                  attr.type(), api_def_attr.default_value(), "_dtypes."));
         }
       } else {
-        args_no_default.push_back(attr.name());
+        params_no_default.emplace_back(api_def_attr.name(),
+                                       api_def_attr.rename_to());
       }
     }
   }
@@ -247,34 +270,37 @@ string GenEagerPythonOp::Code() {
   // Save the list of attr parameters (attrs that won't be inferred),
   // those with defaults go at the end.
   // Get the attrs in the order we want by taking the attrs without defaults
-  // from the end of args_no_default, and adding args_no_default.
-  attrs_.reserve(args_no_default.size() - op_def_.input_arg_size() +
-                 args_with_defaults.size());
-  attrs_.insert(attrs_.end(),
-                args_no_default.begin() + op_def_.input_arg_size(),
-                args_no_default.end());
-  for (const auto& a : args_with_defaults) {
-    attrs_.push_back(a.first);
+  // from the end of params_no_default, and adding params_no_default.
+  attrs_.reserve(params_no_default.size() - op_def_.input_arg_size() +
+                 params_with_default.size());
+  for (int i = op_def_.input_arg_size(); i < params_no_default.size(); ++i) {
+    attrs_.push_back(params_no_default[i].GetName());
+  }
+  for (const auto& p : params_with_default) {
+    attrs_.push_back(p.first.GetName());
+  }
+
+  param_names_.reserve(params_no_default.size() + params_with_default.size());
+  param_names_.insert(param_names_.begin(), params_no_default.begin(),
+                      params_no_default.end());
+  for (const auto& param_and_default : params_with_default) {
+    param_names_.push_back(param_and_default.first);
   }
 
-  param_names_.reserve(args_no_default.size() + args_with_defaults.size());
   string parameters;
-  for (const string& name : args_no_default) {
+  for (const auto& param : params_no_default) {
     if (!parameters.empty()) strings::StrAppend(&parameters, ", ");
-    const string param = python_op_gen_internal::AvoidPythonReserved(name);
-    strings::StrAppend(&parameters, param);
-    param_names_.push_back(param);
+    strings::StrAppend(&parameters, param.GetRenameTo());
   }
-  for (const auto& name_default : args_with_defaults) {
+  for (const auto& param_and_default : params_with_default) {
     if (!parameters.empty()) strings::StrAppend(&parameters, ", ");
-    const string param =
-        python_op_gen_internal::AvoidPythonReserved(name_default.first);
-    strings::StrAppend(&parameters, param, "=", name_default.second);
-    param_names_.push_back(param);
+    strings::StrAppend(&parameters, param_and_default.first.GetRenameTo(), "=",
+                       param_and_default.second);
   }
   if (!parameters.empty()) strings::StrAppend(&parameters, ", ");
   strings::StrAppend(&parameters, "name=None");
 
+  AddExport();
   AddDefLine(parameters);
   AddDocStringDescription();
   AddDocStringArgs();
@@ -297,25 +323,26 @@ string GenEagerPythonOp::Code() {
         // inputs are lists and have the same length.
         for (auto iter = arg_list->second.begin();
              iter != arg_list->second.end(); ++iter) {
-          const string& arg_name = param_names_[*iter];
-          ExpectListArg(arg_name);
+          const string& arg_api_name = param_names_[*iter].GetRenameTo();
+          ExpectListArg(arg_api_name);
           if (iter == arg_list->second.begin()) {
-            AddInferredAttr(attr.name(), strings::StrCat("len(", arg_name, ")"),
+            AddInferredAttr(attr.name(),
+                            strings::StrCat("len(", arg_api_name, ")"),
                             &result_, &attr_expressions_);
           } else {
             const auto& attr_var = attr_expressions_[attr.name()];
-            strings::StrAppend(&result_, "  if len(", arg_name,
+            strings::StrAppend(&result_, "  if len(", arg_api_name,
                                ") != ", attr_var,
                                ":\n"
                                "    raise ValueError(\n"
                                "        \"List argument '",
-                               arg_name, "' to '", op_name_,
+                               arg_api_name, "' to '", op_name_,
                                "' Op with length %d \"\n"
                                "        \"must match length %d of argument '",
                                inferred_attrs_[attr.name()],
                                "'.\" %\n"
                                "        (len(",
-                               arg_name, "), ", attr_var, "))\n");
+                               arg_api_name, "), ", attr_var, "))\n");
           }
         }
       }
@@ -325,65 +352,76 @@ string GenEagerPythonOp::Code() {
   // Values for non-inferred attrs.
   for (int i = 0; i < attrs_.size(); ++i) {
     const string& attr_name = attrs_[i];
-    const string& param = param_names_[i + op_def_.input_arg_size()];
+    const auto& param = param_names_[i + op_def_.input_arg_size()];
     const auto& attr = *FindAttr(attr_name, op_def_);
+    const string& attr_api_name = param.GetRenameTo();
     StringPiece attr_type = attr.type();
-    attr_expressions_[attr_name] = param;
-    const int default_index = i - (attrs_.size() - args_with_defaults.size());
+    attr_expressions_[attr_name] = attr_api_name;
+    const int default_index = i - (attrs_.size() - params_with_default.size());
     if (default_index >= 0) {
-      const string& default_value = args_with_defaults[default_index].second;
-      strings::StrAppend(&result_, "  if ", param, " is None:\n");
-      strings::StrAppend(&result_, "    ", param, " = ", default_value, "\n");
+      const string& default_value = params_with_default[default_index].second;
+      strings::StrAppend(&result_, "  if ", attr_api_name, " is None:\n");
+      strings::StrAppend(&result_, "    ", attr_api_name, " = ", default_value,
+                         "\n");
     }
     if (attr_type.starts_with("list(")) {
-      ExpectListArg(param);
+      ExpectListArg(attr_api_name);
     }
 
     if (attr_type == "string") {
-      strings::StrAppend(&result_, "  ", param, " = _execute.make_str(", param,
-                         ", \"", param, "\")\n");
+      strings::StrAppend(&result_, "  ", attr_api_name, " = _execute.make_str(",
+                         attr_api_name, ", \"", attr_api_name, "\")\n");
     } else if (attr_type == "list(string)") {
-      strings::StrAppend(&result_, "  ", param, " = [_execute.make_str(_s, \"",
-                         param, "\") for _s in ", param, "]\n");
+      strings::StrAppend(&result_, "  ", attr_api_name,
+                         " = [_execute.make_str(_s, \"", attr_api_name,
+                         "\") for _s in ", attr_api_name, "]\n");
     } else if (attr_type == "int") {
-      strings::StrAppend(&result_, "  ", param, " = _execute.make_int(", param,
-                         ", \"", param, "\")\n");
+      strings::StrAppend(&result_, "  ", attr_api_name, " = _execute.make_int(",
+                         attr_api_name, ", \"", attr_api_name, "\")\n");
     } else if (attr_type == "list(int)") {
-      strings::StrAppend(&result_, "  ", param, " = [_execute.make_int(_i, \"",
-                         param, "\") for _i in ", param, "]\n");
+      strings::StrAppend(&result_, "  ", attr_api_name,
+                         " = [_execute.make_int(_i, \"", attr_api_name,
+                         "\") for _i in ", attr_api_name, "]\n");
     } else if (attr_type == "float") {
-      strings::StrAppend(&result_, "  ", param, " = _execute.make_float(",
-                         param, ", \"", param, "\")\n");
+      strings::StrAppend(&result_, "  ", attr_api_name,
+                         " = _execute.make_float(", attr_api_name, ", \"",
+                         attr_api_name, "\")\n");
     } else if (attr_type == "list(float)") {
-      strings::StrAppend(&result_, "  ", param,
-                         " = [_execute.make_float(_f, \"", param,
-                         "\") for _f in ", param, "]\n");
+      strings::StrAppend(&result_, "  ", attr_api_name,
+                         " = [_execute.make_float(_f, \"", attr_api_name,
+                         "\") for _f in ", attr_api_name, "]\n");
     } else if (attr_type == "bool") {
-      strings::StrAppend(&result_, "  ", param, " = _execute.make_bool(", param,
-                         ", \"", param, "\")\n");
+      strings::StrAppend(&result_, "  ", attr_api_name,
+                         " = _execute.make_bool(", attr_api_name, ", \"",
+                         attr_api_name, "\")\n");
     } else if (attr_type == "list(bool)") {
-      strings::StrAppend(&result_, "  ", param, " = [_execute.make_bool(_b, \"",
-                         param, "\") for _b in ", param, "]\n");
+      strings::StrAppend(&result_, "  ", attr_api_name,
+                         " = [_execute.make_bool(_b, \"", attr_api_name,
+                         "\") for _b in ", attr_api_name, "]\n");
     } else if (attr_type == "type") {
-      strings::StrAppend(&result_, "  ", param, " = _execute.make_type(", param,
-                         ", \"", param, "\")\n");
+      strings::StrAppend(&result_, "  ", attr_api_name,
+                         " = _execute.make_type(", attr_api_name, ", \"",
+                         attr_api_name, "\")\n");
     } else if (attr_type == "list(type)") {
-      strings::StrAppend(&result_, "  ", param, " = [_execute.make_type(_t, \"",
-                         param, "\") for _t in ", param, "]\n");
+      strings::StrAppend(&result_, "  ", attr_api_name,
+                         " = [_execute.make_type(_t, \"", attr_api_name,
+                         "\") for _t in ", attr_api_name, "]\n");
     } else if (attr_type == "shape") {
-      strings::StrAppend(&result_, "  ", param, " = _execute.make_shape(",
-                         param, ", \"", param, "\")\n");
+      strings::StrAppend(&result_, "  ", attr_api_name,
+                         " = _execute.make_shape(", attr_api_name, ", \"",
+                         attr_api_name, "\")\n");
     } else if (attr_type == "list(shape)") {
-      strings::StrAppend(&result_, "  ", param,
-                         " = [_execute.make_shape(_s, \"", param,
-                         "\") for _s in ", param, "]\n");
+      strings::StrAppend(&result_, "  ", attr_api_name,
+                         " = [_execute.make_shape(_s, \"", attr_api_name,
+                         "\") for _s in ", attr_api_name, "]\n");
     } else if (attr_type == "tensor") {
-      strings::StrAppend(&result_, "  ", param, " = _execute.make_tensor(",
-                         param, ", \"", param, "\")\n");
+      strings::StrAppend(&result_, "  ", attr_api_name,
+                         " = _execute.make_tensor(", attr_api_name, ", \"",
+                         attr_api_name, "\")\n");
     } else if (attr_type == "list(tensor)") {
-      strings::StrAppend(&result_, "  ", param,
-                         " = [_execute.make_tensor(_t, \"", param,
-                         "\") for _t in ", param, "]\n");
+      strings::StrAppend(&result_, "  ", attr_api_name,
+                         " = [_execute.make_tensor(_t, \"", attr_api_name,
+                         "\") for _t in ", attr_api_name, "]\n");
     } else if (attr_type != "func") {
       return strings::StrCat("# No definition for ", function_name_,
                              " since we don't support attrs with type\n"
@@ -484,16 +522,20 @@ string GenEagerPythonOp::Code() {
 
   bool eager_allowed = true;
   string ref_arg;
-  for (const auto& arg : op_def_.input_arg()) {
+  for (int i = 0; i < op_def_.input_arg_size(); ++i) {
+    const auto& arg = op_def_.input_arg(i);
     if (arg.is_ref()) {
       eager_allowed = false;
-      ref_arg = arg.name();
+      DCHECK_EQ(op_def_.input_arg(i).name(), api_def_.in_arg(i).name());
+      ref_arg = api_def_.in_arg(i).rename_to();
     }
   }
-  for (const auto& arg : op_def_.output_arg()) {
+  for (int i = 0; i < op_def_.output_arg_size(); ++i) {
+    const auto& arg = op_def_.output_arg(i);
     if (arg.is_ref()) {
       eager_allowed = false;
-      ref_arg = arg.name();
+      DCHECK_EQ(op_def_.output_arg(i).name(), api_def_.out_arg(i).name());
+      ref_arg = api_def_.out_arg(i).rename_to();
     }
   }
 
@@ -553,6 +595,7 @@ void GenEagerPythonOp::AddEagerInferredAttrs() {
   // Figure out values for inferred attrs, and cast to eager tensors.
   for (int i = 0; i < op_def_.attr_size(); ++i) {
     const auto& attr(op_def_.attr(i));
+    const auto& api_def_attr(api_def_.attr(i));
     auto arg_list = attr_to_args_.find(attr.name());
     if (arg_list != attr_to_args_.end()) {
       if (attr.type() == "type") {
@@ -565,14 +608,15 @@ void GenEagerPythonOp::AddEagerInferredAttrs() {
           strings::StrAppend(
               &conversion, ", ",
               python_op_gen_internal::AttrValueToPython(
-                  attr.type(), attr.default_value(), "_dtypes."));
+                  attr.type(), api_def_attr.default_value(), "_dtypes."));
         }
         strings::StrAppend(&conversion, ")");
         const string var_name = AttrVarName(attr.name(), &attr_expressions_);
         if (output_sizes.size() == 1) {
           // Avoid creating a temporary variable in the case where
           // we can easily assign to the right value directly.
-          const string inputs_var = param_names_[arg_list->second.front()];
+          const string inputs_var =
+              param_names_[arg_list->second.front()].GetRenameTo();
           if (output_sizes.front().empty()) {
             strings::StrAppend(&result_, "    ", var_name, ", (", inputs_var,
                                ",) = ", conversion, "\n");
@@ -589,7 +633,7 @@ void GenEagerPythonOp::AddEagerInferredAttrs() {
           Unflatten("    ", output_sizes, inputs_var, &result_);
           std::vector<string> p;
           for (int j : arg_list->second) {
-            p.emplace_back(param_names_[j]);
+            p.emplace_back(param_names_[j].GetRenameTo());
           }
           strings::StrAppend(&result_, "    ", VectorToTuple(p), " = ",
                              inputs_var, "\n");
@@ -608,14 +652,14 @@ void GenEagerPythonOp::AddEagerInferredAttrs() {
           std::vector<string> lists;
           for (auto iter = arg_list->second.begin();
                iter != arg_list->second.end(); ++iter) {
-            lists.push_back(param_names_[*iter]);
+            lists.push_back(param_names_[*iter].GetRenameTo());
           }
           inputs_var = VectorToTuple(lists);
           conversion = "_execute.args_to_mixed_eager_tensors";
         } else {
           // For one list(tensor) argument, we just convert every
           // element of the list to an eager tensor.
-          inputs_var = param_names_[arg_list->second.front()];
+          inputs_var = param_names_[arg_list->second.front()].GetRenameTo();
           conversion = "_execute.convert_to_mixed_eager_tensors";
         }
         strings::StrAppend(&result_, "    ", var_name, ", ", inputs_var, " = ",
@@ -630,7 +674,7 @@ void GenEagerPythonOp::AddEagerInputCasts() {
   for (int i = 0; i < op_def_.input_arg_size(); ++i) {
     const auto& arg(op_def_.input_arg(i));
     if (!arg.type_attr().empty() || !arg.type_list_attr().empty()) continue;
-    const string& param = param_names_[i];
+    const string& param = param_names_[i].GetRenameTo();
     const string fn = arg.number_attr().empty() ? "" : "n_";
     const string dtype =
         python_op_gen_internal::DataTypeToPython(arg.type(), "_dtypes.");
diff --git a/tensorflow/python/eager/python_eager_op_gen_main.cc b/tensorflow/python/eager/python_eager_op_gen_main.cc
index cd74c438ec6f5cd7f807a7205f76eff7421aeb74..05351bd8b115ae07482b82166974e86758bc7712 100644
--- a/tensorflow/python/eager/python_eager_op_gen_main.cc
+++ b/tensorflow/python/eager/python_eager_op_gen_main.cc
@@ -21,34 +21,32 @@ limitations under the License.
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/framework/op_gen_lib.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/init_main.h"
 
 namespace tensorflow {
 namespace {
 
-constexpr char kBaseApiDef[] =
-    "tensorflow/core/api_def/base_api/*.pbtxt";
-constexpr char kPythonApiDef[] =
-    "tensorflow/core/api_def/python_api/*.pbtxt";
-constexpr bool kUseApiDef = false;
-
-void PrintAllPythonOps(const std::vector<string>& hidden_ops) {
+void PrintAllPythonOps(const std::vector<string>& hidden_ops,
+                       const std::vector<string>& api_def_dirs) {
   OpList ops;
   OpRegistry::Global()->Export(false, &ops);
 
   ApiDefMap api_def_map(ops);
-  if (kUseApiDef) {
+  if (!api_def_dirs.empty()) {
     Env* env = Env::Default();
 
-    std::vector<string> base_api_files;
-    std::vector<string> python_api_files;
-    TF_CHECK_OK(env->GetMatchingPaths(kBaseApiDef, &base_api_files));
-    TF_CHECK_OK(env->GetMatchingPaths(kPythonApiDef, &python_api_files));
-
-    TF_CHECK_OK(api_def_map.LoadFileList(env, base_api_files));
-    TF_CHECK_OK(api_def_map.LoadFileList(env, python_api_files));
+    for (const auto& api_def_dir : api_def_dirs) {
+      std::vector<string> api_files;
+      TF_CHECK_OK(env->GetMatchingPaths(io::JoinPath(api_def_dir, "*.pbtxt"),
+                                        &api_files));
+      TF_CHECK_OK(api_def_map.LoadFileList(env, api_files));
+    }
+    api_def_map.UpdateDocs();
   }
+
   PrintEagerPythonOps(ops, api_def_map, hidden_ops, true /* require_shapes */);
 }
 
@@ -58,8 +56,15 @@ void PrintAllPythonOps(const std::vector<string>& hidden_ops) {
 int main(int argc, char* argv[]) {
   tensorflow::port::InitMain(argv[0], &argc, &argv);
 
+  // Usage:
+  //   python_eager_op_gen_main api_def_dir1,api_def_dir2,...
   if (argc == 1) {
-    tensorflow::PrintAllPythonOps({});
+    tensorflow::PrintAllPythonOps({}, {});
+  } else if (argc == 2) {
+    const std::vector<tensorflow::string> api_def_dirs =
+        tensorflow::str_util::Split(argv[1], ",",
+                                    tensorflow::str_util::SkipEmpty());
+    tensorflow::PrintAllPythonOps({}, api_def_dirs);
   } else {
     return -1;
   }
diff --git a/tensorflow/python/eager/pywrap_tensor.cc b/tensorflow/python/eager/pywrap_tensor.cc
index 91192fea62dd3b0f94350a9b25ce8568e248e7e3..6fa076507d11ab9c88891cbeb0a4fb3959e4e99d 100644
--- a/tensorflow/python/eager/pywrap_tensor.cc
+++ b/tensorflow/python/eager/pywrap_tensor.cc
@@ -332,7 +332,7 @@ void EagerTensor_dealloc(EagerTensor* self) {
   tensorflow::ClearDecrefCache();
   auto id = self->id;
   Py_TYPE(self)->tp_free(self);
-  TFE_Py_TapeStackDeleteTrace(id);
+  TFE_Py_TapeSetDeleteTrace(id);
 }
 
 // Getter for `_id`.
diff --git a/tensorflow/python/eager/pywrap_tfe.h b/tensorflow/python/eager/pywrap_tfe.h
index a33b17ada6f94e43ac16696c502be4b885e9d33a..cecef426032f967afd122b1cfeec6f29d2d7e7a5 100644
--- a/tensorflow/python/eager/pywrap_tfe.h
+++ b/tensorflow/python/eager/pywrap_tfe.h
@@ -87,22 +87,25 @@ TFE_TensorHandle* EagerTensor_Handle(const PyObject* o);
 // newly created type, or nullptr on error.
 PyObject* TFE_Py_InitEagerTensor(PyObject* base_class);
 
-// Pushes a new tape into the thread-local stack.
-// `persistent` must be a PyBool_Type, i.e either Py_True or Py_False
-void TFE_Py_TapeStackPushNew(PyObject* persistent);
+// Creates a new tape and adds it to the active set. `persistent` must be a
+// PyBool_Type, i.e either Py_True or Py_False
+PyObject* TFE_Py_TapeSetNew(PyObject* persistent);
 
-// Pops the tape from the top of the stack and returns it.
-PyObject* TFE_Py_TapeStackPop();
-
-// Pushes an existing tape onto the stack.
-void TFE_Py_TapeStackPush(PyObject* tape);
+// Removes the passed tape from the set of active tapes.
+void TFE_Py_TapeSetRemove(PyObject* tape);
 
 // Returns true if the tape stack is empty.
-PyObject* TFE_Py_TapeStackIsEmpty();
+PyObject* TFE_Py_TapeSetIsEmpty();
+
+PyObject* TFE_Py_TapeSetShouldRecord(PyObject* tensors);
+void TFE_Py_TapeSetWatch(PyObject* tensor);
+void TFE_Py_TapeSetDeleteTrace(tensorflow::int64 tensor_id);
+
+// Stops any gradient recording on the current thread.
+void TFE_Py_TapeSetStopOnThread();
 
-PyObject* TFE_Py_TapeStackShouldRecord(PyObject* tensors);
-void TFE_Py_TapeStackWatch(PyObject* tensor);
-void TFE_Py_TapeStackDeleteTrace(tensorflow::int64 tensor_id);
+// Restarts gradient recording on the current thread.
+void TFE_Py_TapeSetRestartOnThread();
 
 // Records an operation in the gradient tape stack.type is a string for the
 // operation type, used in the backprop code. output_tensors should be a list of
@@ -111,13 +114,12 @@ void TFE_Py_TapeStackDeleteTrace(tensorflow::int64 tensor_id);
 // operation. backward_function should be the function to be called during
 // backprop to, given the gradients of the output tensors, produce the gradients
 // of the input tensors.
-void TFE_Py_TapeStackRecordOperation(PyObject* op_type,
-                                     PyObject* output_tensors,
-                                     PyObject* input_tensor_ids,
-                                     PyObject* backward_function);
+void TFE_Py_TapeSetRecordOperation(PyObject* op_type, PyObject* output_tensors,
+                                   PyObject* input_tensor_ids,
+                                   PyObject* backward_function);
 
 // Watches the given variable object on the given tape.
-void TFE_Py_TapeStackWatchVariable(PyObject* variable);
+void TFE_Py_TapeSetWatchVariable(PyObject* variable);
 
 // Computes a gradient based on information recorded on the tape.`tape` must
 // have been produced by TFE_Py_NewTape. `vspace` must be a
diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index ce823cb5679462e28efa925ab98100bbe864ad9b..38c3cb21743eb3657274e8e6ce5ebc3fc85e26b9 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -472,9 +472,19 @@ class GradientTape
   explicit GradientTape(bool persistent)
       : tensorflow::eager::GradientTape<PyObject, PyObject>(persistent) {}
 
+  virtual ~GradientTape() {
+    for (PyObject* v : watched_variables_) {
+      Py_DECREF(v);
+    }
+  }
+
   void WatchVariable(PyObject* v) {
-    watched_variables_.insert(v);
-    Py_INCREF(v);
+    auto insert_result = watched_variables_.insert(v);
+    if (insert_result.second) {
+      // Only increment the reference count if we aren't already watching this
+      // variable.
+      Py_INCREF(v);
+    }
     PyObject* handle = PyObject_GetAttrString(v, "handle");
     if (handle == nullptr) {
       return;
@@ -528,65 +538,67 @@ static PyTypeObject TFE_Py_Tape_Type = {
     "TFE_Py_Tape objects",                        /* tp_doc */
 };
 
+// Note: in the current design no mutex is needed here because of the python
+// GIL, which is always held when any TFE_Py_* methods are called. We should
+// revisit this if/when decide to not hold the GIL while manipulating the tape
+// stack.
+static std::unordered_set<TFE_Py_Tape*>* tape_set = nullptr;
+std::unordered_set<TFE_Py_Tape*>* GetTapeSet() {
+  if (tape_set == nullptr) {
+    tape_set = new std::unordered_set<TFE_Py_Tape*>;
+  }
+  return tape_set;
+}
+
 // xcode 7 doesn't define thread_local, so for compatibility we implement our
 // own. TODO(apassos) remove once we can deprecate xcode 7.
 #ifndef __APPLE__
-thread_local std::vector<TFE_Py_Tape*>* tape_stack = nullptr;
-std::vector<TFE_Py_Tape*>* GetTapeStack() {
-  if (tape_stack == nullptr) {
-    tape_stack = new std::vector<TFE_Py_Tape*>;
-  }
-  return tape_stack;
+bool* ThreadTapeIsStopped() {
+  thread_local bool thread_tape_is_stopped{false};
+  return &thread_tape_is_stopped;
 }
 #else
-static tensorflow::mutex stack_mu(tensorflow::LINKER_INITIALIZED);
-static std::unordered_map<std::thread::id, std::vector<TFE_Py_Tape*>*>*
-    tape_stack GUARDED_BY(stack_mu) = nullptr;
-std::vector<TFE_Py_Tape*>* GetTapeStack() {
-  tensorflow::mutex_lock ml(stack_mu);
-  if (tape_stack == nullptr) {
-    tape_stack =
-        new std::unordered_map<std::thread::id, std::vector<TFE_Py_Tape*>*>;
-  }
-  auto it = tape_stack->find(std::this_thread::get_id());
-  if (it != tape_stack->end()) {
-    return it->second;
-  }
-  return tape_stack
-      ->emplace(std::this_thread::get_id(), new std::vector<TFE_Py_Tape*>)
-      .first->second;
+static std::unordered_map<std::thread::id, bool>* tape_is_stopped = nullptr;
+bool* ThreadTapeIsStopped() {
+  if (tape_is_stopped == nullptr) {
+    tape_is_stopped = new std::unordered_map<std::thread::id, bool>;
+  }
+  auto it = tape_is_stopped->find(std::this_thread::get_id());
+  if (it != tape_is_stopped->end()) {
+    return &(it->second);
+  }
+  return &(tape_is_stopped->emplace(std::this_thread::get_id(), false)
+               .first->second);
 }
 #endif
 
-void TFE_Py_TapeStackPushNew(PyObject* persistent) {
+void TFE_Py_TapeSetStopOnThread() { *ThreadTapeIsStopped() = true; }
+
+void TFE_Py_TapeSetRestartOnThread() { *ThreadTapeIsStopped() = false; }
+
+PyObject* TFE_Py_TapeSetNew(PyObject* persistent) {
   TFE_Py_Tape_Type.tp_new = PyType_GenericNew;
-  if (PyType_Ready(&TFE_Py_Tape_Type) < 0) return;
+  if (PyType_Ready(&TFE_Py_Tape_Type) < 0) return nullptr;
   TFE_Py_Tape* tape = PyObject_NEW(TFE_Py_Tape, &TFE_Py_Tape_Type);
   tape->tape = new GradientTape(persistent == Py_True);
-  GetTapeStack()->push_back(tape);
-}
-
-void TFE_Py_TapeStackPush(PyObject* tape) {
   Py_INCREF(tape);
-  GetTapeStack()->push_back(reinterpret_cast<TFE_Py_Tape*>(tape));
+  GetTapeSet()->insert(reinterpret_cast<TFE_Py_Tape*>(tape));
+  return reinterpret_cast<PyObject*>(tape);
 }
 
-PyObject* TFE_Py_TapeStackIsEmpty() {
-  if (GetTapeStack()->empty()) {
+PyObject* TFE_Py_TapeSetIsEmpty() {
+  if (*ThreadTapeIsStopped() || GetTapeSet()->empty()) {
     Py_RETURN_TRUE;
   }
   Py_RETURN_FALSE;
 }
 
-PyObject* TFE_Py_TapeStackPop() {
-  auto* stack = GetTapeStack();
-  if (stack->empty()) {
-    PyErr_SetString(PyExc_RuntimeError, "tape stack is empty.");
-    return nullptr;
-  }
-  TFE_Py_Tape* top = stack->back();
-  stack->pop_back();
-  return reinterpret_cast<PyObject*>(top);
+void TFE_Py_TapeSetRemove(PyObject* tape) {
+  auto* stack = GetTapeSet();
+  stack->erase(reinterpret_cast<TFE_Py_Tape*>(tape));
+  // We kept a reference to the tape in the set to ensure it wouldn't get
+  // deleted under us; cleaning it up here.
+  Py_DECREF(tape);
 }
 
 static std::vector<tensorflow::int64> MakeIntList(PyObject* list) {
@@ -602,7 +614,11 @@ static std::vector<tensorflow::int64> MakeIntList(PyObject* list) {
   tensor_ids.reserve(len);
   for (int i = 0; i < len; ++i) {
     PyObject* item = PySequence_Fast_GET_ITEM(seq, i);
+#if PY_MAJOR_VERSION >= 3
     if (PyLong_Check(item)) {
+#else
+    if (PyLong_Check(item) || PyInt_Check(item)) {
+#endif
       tensorflow::int64 id = MakeInt(item);
       tensor_ids.push_back(id);
     } else {
@@ -613,12 +629,15 @@ static std::vector<tensorflow::int64> MakeIntList(PyObject* list) {
   return tensor_ids;
 }
 
-PyObject* TFE_Py_TapeStackShouldRecord(PyObject* tensors) {
+PyObject* TFE_Py_TapeSetShouldRecord(PyObject* tensors) {
   if (tensors == Py_None) {
     Py_RETURN_FALSE;
   }
-  auto* stack = GetTapeStack();
-  if (stack->empty()) {
+  if (*ThreadTapeIsStopped()) {
+    Py_RETURN_FALSE;
+  }
+  auto* tape_set = GetTapeSet();
+  if (tape_set->empty()) {
     Py_RETURN_FALSE;
   }
   PyObject* seq = PySequence_Fast(tensors, "expected a sequence");
@@ -635,7 +654,7 @@ PyObject* TFE_Py_TapeStackShouldRecord(PyObject* tensors) {
     tensor_ids.push_back(FastTensorId(item));
   }
   Py_DECREF(seq);
-  for (TFE_Py_Tape* tape : *stack) {
+  for (TFE_Py_Tape* tape : *tape_set) {
     if (tape->tape->ShouldRecord(tensor_ids)) {
       Py_RETURN_TRUE;
     }
@@ -643,12 +662,15 @@ PyObject* TFE_Py_TapeStackShouldRecord(PyObject* tensors) {
   Py_RETURN_FALSE;
 }
 
-void TFE_Py_TapeStackWatch(PyObject* tensor) {
+void TFE_Py_TapeSetWatch(PyObject* tensor) {
+  if (*ThreadTapeIsStopped()) {
+    return;
+  }
   tensorflow::int64 tensor_id = FastTensorId(tensor);
   if (PyErr_Occurred()) {
     return;
   }
-  for (TFE_Py_Tape* tape : *GetTapeStack()) {
+  for (TFE_Py_Tape* tape : *GetTapeSet()) {
     tape->tape->Watch(tensor_id);
   }
 }
@@ -713,8 +735,14 @@ std::vector<tensorflow::int64> MakeTensorIDList(PyObject* tensors) {
   return list;
 }
 
-void TFE_Py_TapeStackWatchVariable(PyObject* variable) {
-  for (TFE_Py_Tape* tape : *GetTapeStack()) {
+void TFE_Py_TapeSetWatchVariable(PyObject* variable) {
+  if (*ThreadTapeIsStopped()) {
+    return;
+  }
+  // Note: making a copy because watching a variable can trigger a change to the
+  // set of tapes by allowing python's garbage collector to run.
+  auto tape_set = *GetTapeSet();
+  for (TFE_Py_Tape* tape : tape_set) {
     tape->tape->WatchVariable(variable);
   }
 }
@@ -725,17 +753,15 @@ PyObject* TFE_Py_TapeWatchedVariables(PyObject* tape) {
   PyObject* result = PySet_New(nullptr);
   for (PyObject* variable : watched_variables) {
     PySet_Add(result, variable);
-    Py_DECREF(variable);
   }
   return result;
 }
 
-void TFE_Py_TapeStackRecordOperation(PyObject* op_type,
-                                     PyObject* output_tensors,
-                                     PyObject* input_tensors,
-                                     PyObject* backward_function) {
-  auto* stack = GetTapeStack();
-  if (stack->empty()) {
+void TFE_Py_TapeSetRecordOperation(PyObject* op_type, PyObject* output_tensors,
+                                   PyObject* input_tensors,
+                                   PyObject* backward_function) {
+  auto* set = GetTapeSet();
+  if (set->empty() || *ThreadTapeIsStopped()) {
     return;
   }
   std::vector<tensorflow::int64> input_ids = MakeTensorIDList(input_tensors);
@@ -770,7 +796,7 @@ void TFE_Py_TapeStackRecordOperation(PyObject* op_type,
     return;
   }
 
-  for (TFE_Py_Tape* tape : *stack) {
+  for (TFE_Py_Tape* tape : *set) {
     Py_INCREF(backward_function);
     tape->tape->RecordOperation(
         op_type_str, output_info, input_ids, backward_function,
@@ -778,8 +804,8 @@ void TFE_Py_TapeStackRecordOperation(PyObject* op_type,
   }
 }
 
-void TFE_Py_TapeStackDeleteTrace(tensorflow::int64 tensor_id) {
-  for (TFE_Py_Tape* tape : *GetTapeStack()) {
+void TFE_Py_TapeSetDeleteTrace(tensorflow::int64 tensor_id) {
+  for (TFE_Py_Tape* tape : *GetTapeSet()) {
     tape->tape->DeleteTrace(tensor_id);
   }
 }
diff --git a/tensorflow/python/eager/tape.py b/tensorflow/python/eager/tape.py
index 14b5238f74039ec23bd197699de68c4c0254e8d3..ad82266beca05d9f508a702124390fd934161ffd 100644
--- a/tensorflow/python/eager/tape.py
+++ b/tensorflow/python/eager/tape.py
@@ -35,7 +35,8 @@ class Tape(object):
 
 def push_new_tape(persistent=False):
   """Pushes a new tape onto the tape stack."""
-  pywrap_tensorflow.TFE_Py_TapeStackPushNew(persistent)
+  tape = pywrap_tensorflow.TFE_Py_TapeSetNew(persistent)
+  return Tape(tape)
 
 
 def watch(tensor):
@@ -44,7 +45,7 @@ def watch(tensor):
   Args:
     tensor: tensor to be watched.
   """
-  pywrap_tensorflow.TFE_Py_TapeStackWatch(tensor)
+  pywrap_tensorflow.TFE_Py_TapeSetWatch(tensor)
 
 
 def watch_variable(variable):
@@ -53,42 +54,39 @@ def watch_variable(variable):
   Args:
     variable: variable to be watched.
   """
-  pywrap_tensorflow.TFE_Py_TapeStackWatchVariable(variable)
+  pywrap_tensorflow.TFE_Py_TapeSetWatchVariable(variable)
 
 
-def pop_tape():
+def pop_tape(tape):
   """Pops the top tape in the stack, if any."""
-  return Tape(pywrap_tensorflow.TFE_Py_TapeStackPop())
+  pywrap_tensorflow.TFE_Py_TapeSetRemove(tape._tape)  # pylint: disable=protected-access
 
 
 @contextlib.contextmanager
 def stop_recording():
-  stack = []
-  while not pywrap_tensorflow.TFE_Py_TapeStackIsEmpty():
-    stack.append(pop_tape()._tape)  # pylint: disable=protected-access
   try:
+    pywrap_tensorflow.TFE_Py_TapeSetStopOnThread()
     yield
   finally:
-    for tape in reversed(stack):
-      pywrap_tensorflow.TFE_Py_TapeStackPush(tape)
+    pywrap_tensorflow.TFE_Py_TapeSetRestartOnThread()
 
 
 def should_record(tensors):
   """Returns true if any tape in the stack watches any of these tensors."""
-  return pywrap_tensorflow.TFE_Py_TapeStackShouldRecord(tensors)
+  return pywrap_tensorflow.TFE_Py_TapeSetShouldRecord(tensors)
 
 
 def record_operation(op_type, output_tensors, input_tensors, backward_function):
   """Records the operation on all tapes in the stack."""
-  pywrap_tensorflow.TFE_Py_TapeStackRecordOperation(
+  pywrap_tensorflow.TFE_Py_TapeSetRecordOperation(
       op_type, output_tensors, input_tensors, backward_function)
 
 
 def delete_trace(tensor_id):
   """Deletes traces for this Tensor from all tapes in the stack."""
-  pywrap_tensorflow.TFE_Py_TapeStackDeleteTrace(tensor_id)
+  pywrap_tensorflow.TFE_Py_TapeSetDeleteTrace(tensor_id)
 
 
 def could_possibly_record():
   """Returns True if any tape is active."""
-  return not pywrap_tensorflow.TFE_Py_TapeStackIsEmpty()
+  return not pywrap_tensorflow.TFE_Py_TapeSetIsEmpty()
diff --git a/tensorflow/python/eager/tensor_test.py b/tensorflow/python/eager/tensor_test.py
index 7a4593ec464ab1834a555a131b8b717f5010de62..2568d3dc0543f925a90f53d77cff724e7effa535 100644
--- a/tensorflow/python/eager/tensor_test.py
+++ b/tensorflow/python/eager/tensor_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import copy
+import re
 
 import numpy as np
 
@@ -106,6 +107,11 @@ class TFETensorTest(test_util.TensorFlowTestCase):
     t = _create_tensor(n)
     self.assertAllEqual([[1, 2], [3, 4]], t)
 
+  def testNumpyArrayDtype(self):
+    tensor = constant_op.constant([1.0, 2.0, 3.0])
+    numpy_tensor = np.asarray(tensor, dtype=np.int32)
+    self.assertAllEqual(numpy_tensor, [1, 2, 3])
+
   def testCopy(self):
     t = constant_op.constant(1.0)
     tt = copy.copy(t)
@@ -174,8 +180,8 @@ class TFETensorTest(test_util.TensorFlowTestCase):
     np.set_printoptions(threshold=2, edgeitems=1)
 
     t = _create_tensor(np.arange(10, dtype=np.int32))
-    self.assertIn("[0 ..., 9]", str(t))
-    self.assertIn("[0, ..., 9]", repr(t))
+    self.assertTrue(re.match(r".*\[.*0.*\.\.\..*9.*\]", str(t)))
+    self.assertTrue(re.match(r".*\[.*0.*\.\.\..*9.*\]", repr(t)))
 
     # Clean up: reset to previous printoptions.
     np.set_printoptions(threshold=orig_threshold, edgeitems=orig_edgeitems)
diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD
index 03f386e9cf885fb88cbb557a99b9d0abe78b3062..63436157371148bc858344a57bf4e180d8a34526 100644
--- a/tensorflow/python/estimator/BUILD
+++ b/tensorflow/python/estimator/BUILD
@@ -215,6 +215,7 @@ py_test(
     srcs_version = "PY2AND3",
     tags = [
         "no_pip",
+        "noasan",  # test flakily times out in asan mode.
         "notsan",  # b/67510291
     ],
     deps = [
@@ -281,6 +282,7 @@ py_library(
         ":model_fn",
         ":numpy_io",
         ":prediction_keys",
+        ":warm_starting_util",
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:check_ops",
@@ -423,6 +425,7 @@ py_library(
         ":model_fn",
         ":run_config",
         ":util",
+        ":warm_starting_util",
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:client",
         "//tensorflow/python:control_flow_ops",
@@ -433,6 +436,7 @@ py_library(
         "//tensorflow/python:summary",
         "//tensorflow/python:training",
         "//tensorflow/python:util",
+        "//tensorflow/python/data",
         "//tensorflow/python/saved_model:builder",
         "//tensorflow/python/saved_model:tag_constants",
         "//third_party/py/numpy",
@@ -645,6 +649,7 @@ py_test(
         "//tensorflow/python:string_ops",
         "//tensorflow/python:training",
         "//tensorflow/python/feature_column",
+        "//tensorflow/python/ops/losses",
         "//tensorflow/python/saved_model:signature_constants",
         "//third_party/py/numpy",
         "@six_archive//:six",
diff --git a/tensorflow/python/estimator/canned/dnn.py b/tensorflow/python/estimator/canned/dnn.py
index 6f94b2288b999b8d4d3d9f6cb2b3cb4945c39e0d..0392ff9a71920cb966a3731e03b7fc74030292c6 100644
--- a/tensorflow/python/estimator/canned/dnn.py
+++ b/tensorflow/python/estimator/canned/dnn.py
@@ -22,6 +22,7 @@ import six
 
 from tensorflow.python.estimator import estimator
 from tensorflow.python.estimator import model_fn
+from tensorflow.python.estimator import warm_starting_util
 from tensorflow.python.estimator.canned import head as head_lib
 from tensorflow.python.estimator.canned import optimizers
 from tensorflow.python.feature_column import feature_column as feature_column_lib
@@ -88,7 +89,6 @@ def _dnn_logit_fn_builder(units, hidden_units, feature_columns, activation_fn,
         partitioner=input_layer_partitioner):
       net = feature_column_lib.input_layer(
           features=features, feature_columns=feature_columns)
-
     for layer_id, num_hidden_units in enumerate(hidden_units):
       with variable_scope.variable_scope(
           'hiddenlayer_%d' % layer_id, values=(net,)) as hidden_layer_scope:
@@ -110,15 +110,23 @@ def _dnn_logit_fn_builder(units, hidden_units, feature_columns, activation_fn,
           kernel_initializer=init_ops.glorot_uniform_initializer(),
           name=logits_scope)
     _add_hidden_layer_summary(logits, logits_scope.name)
+
     return logits
 
   return dnn_logit_fn
 
 
-def _dnn_model_fn(
-    features, labels, mode, head, hidden_units, feature_columns,
-    optimizer='Adagrad', activation_fn=nn.relu, dropout=None,
-    input_layer_partitioner=None, config=None):
+def _dnn_model_fn(features,
+                  labels,
+                  mode,
+                  head,
+                  hidden_units,
+                  feature_columns,
+                  optimizer='Adagrad',
+                  activation_fn=nn.relu,
+                  dropout=None,
+                  input_layer_partitioner=None,
+                  config=None):
   """Deep Neural Net model_fn.
 
   Args:
@@ -151,6 +159,7 @@ def _dnn_model_fn(
   if not isinstance(features, dict):
     raise ValueError('features should be a dictionary of `Tensor`s. '
                      'Given type: {}'.format(type(features)))
+
   optimizer = optimizers.get_optimizer_instance(
       optimizer, learning_rate=_LEARNING_RATE)
   num_ps_replicas = config.num_ps_replicas if config else 0
@@ -217,6 +226,12 @@ class DNNClassifier(estimator.Estimator):
         l1_regularization_strength=0.001
       ))
 
+  # Or estimator with warm-starting from a previous checkpoint.
+  estimator = DNNClassifier(
+      feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
+      hidden_units=[1024, 512, 256],
+      warm_start_from="/path/to/checkpoint/dir")
+
   # Input builders
   def input_fn_train: # returns x, y
     pass
@@ -251,18 +266,21 @@ class DNNClassifier(estimator.Estimator):
   @end_compatibility
   """
 
-  def __init__(self,
-               hidden_units,
-               feature_columns,
-               model_dir=None,
-               n_classes=2,
-               weight_column=None,
-               label_vocabulary=None,
-               optimizer='Adagrad',
-               activation_fn=nn.relu,
-               dropout=None,
-               input_layer_partitioner=None,
-               config=None):
+  def __init__(
+      self,
+      hidden_units,
+      feature_columns,
+      model_dir=None,
+      n_classes=2,
+      weight_column=None,
+      label_vocabulary=None,
+      optimizer='Adagrad',
+      activation_fn=nn.relu,
+      dropout=None,
+      input_layer_partitioner=None,
+      config=None,
+      warm_start_from=None,
+  ):
     """Initializes a `DNNClassifier` instance.
 
     Args:
@@ -300,6 +318,11 @@ class DNNClassifier(estimator.Estimator):
       input_layer_partitioner: Optional. Partitioner for input layer. Defaults
         to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
       config: `RunConfig` object to configure the runtime settings.
+      warm_start_from: A string filepath to a checkpoint to warm-start from, or
+        a `WarmStartSettings` object to fully configure warm-starting.  If the
+        string filepath is provided instead of a `WarmStartSettings`, then all
+        weights are warm-started, and it is assumed that vocabularies and Tensor
+        names are unchanged.
     """
     if n_classes == 2:
       head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
@@ -309,8 +332,10 @@ class DNNClassifier(estimator.Estimator):
       head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
           n_classes, weight_column=weight_column,
           label_vocabulary=label_vocabulary)
+
     def _model_fn(features, labels, mode, config):
-      return _dnn_model_fn(
+      """Call the defined shared _dnn_model_fn and possibly warm-start."""
+      estimator_spec = _dnn_model_fn(
           features=features,
           labels=labels,
           mode=mode,
@@ -322,6 +347,15 @@ class DNNClassifier(estimator.Estimator):
           dropout=dropout,
           input_layer_partitioner=input_layer_partitioner,
           config=config)
+      # pylint: disable=protected-access
+      warm_start_settings = warm_starting_util._get_default_warm_start_settings(
+          warm_start_from)
+      if warm_start_settings:
+        warm_starting_util._warm_start(warm_start_settings)
+      # pylint: enable=protected-access
+
+      return estimator_spec
+
     super(DNNClassifier, self).__init__(
         model_fn=_model_fn, model_dir=model_dir, config=config)
 
@@ -354,6 +388,12 @@ class DNNRegressor(estimator.Estimator):
         l1_regularization_strength=0.001
       ))
 
+  # Or estimator with warm-starting from a previous checkpoint.
+  estimator = DNNRegressor(
+      feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
+      hidden_units=[1024, 512, 256],
+      warm_start_from="/path/to/checkpoint/dir")
+
   # Input builders
   def input_fn_train: # returns x, y
     pass
@@ -388,17 +428,20 @@ class DNNRegressor(estimator.Estimator):
   @end_compatibility
   """
 
-  def __init__(self,
-               hidden_units,
-               feature_columns,
-               model_dir=None,
-               label_dimension=1,
-               weight_column=None,
-               optimizer='Adagrad',
-               activation_fn=nn.relu,
-               dropout=None,
-               input_layer_partitioner=None,
-               config=None):
+  def __init__(
+      self,
+      hidden_units,
+      feature_columns,
+      model_dir=None,
+      label_dimension=1,
+      weight_column=None,
+      optimizer='Adagrad',
+      activation_fn=nn.relu,
+      dropout=None,
+      input_layer_partitioner=None,
+      config=None,
+      warm_start_from=None,
+  ):
     """Initializes a `DNNRegressor` instance.
 
     Args:
@@ -430,9 +473,16 @@ class DNNRegressor(estimator.Estimator):
       input_layer_partitioner: Optional. Partitioner for input layer. Defaults
         to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
       config: `RunConfig` object to configure the runtime settings.
+      warm_start_from: A string filepath to a checkpoint to warm-start from, or
+        a `WarmStartSettings` object to fully configure warm-starting.  If the
+        string filepath is provided instead of a `WarmStartSettings`, then all
+        weights are warm-started, and it is assumed that vocabularies and Tensor
+        names are unchanged.
     """
+
     def _model_fn(features, labels, mode, config):
-      return _dnn_model_fn(
+      """Call the defined shared _dnn_model_fn and possibly warm-start."""
+      estimator_spec = _dnn_model_fn(
           features=features,
           labels=labels,
           mode=mode,
@@ -446,5 +496,14 @@ class DNNRegressor(estimator.Estimator):
           dropout=dropout,
           input_layer_partitioner=input_layer_partitioner,
           config=config)
+      # pylint: disable=protected-access
+      warm_start_settings = warm_starting_util._get_default_warm_start_settings(
+          warm_start_from)
+      if warm_start_settings:
+        warm_starting_util._warm_start(warm_start_settings)
+      # pylint: enable=protected-access
+
+      return estimator_spec
+
     super(DNNRegressor, self).__init__(
         model_fn=_model_fn, model_dir=model_dir, config=config)
diff --git a/tensorflow/python/estimator/canned/dnn_linear_combined.py b/tensorflow/python/estimator/canned/dnn_linear_combined.py
index 3c61bd5b07ba04193f0ed9de3567264b898114cf..1d06a54a321233722ba0736f7d658cd6029991f8 100644
--- a/tensorflow/python/estimator/canned/dnn_linear_combined.py
+++ b/tensorflow/python/estimator/canned/dnn_linear_combined.py
@@ -23,6 +23,7 @@ import math
 import six
 
 from tensorflow.python.estimator import estimator
+from tensorflow.python.estimator import warm_starting_util
 from tensorflow.python.estimator.canned import dnn
 from tensorflow.python.estimator.canned import head as head_lib
 from tensorflow.python.estimator.canned import linear
@@ -74,12 +75,19 @@ def _add_layer_summary(value, tag):
   summary.histogram('%s/activation' % tag, value)
 
 
-def _dnn_linear_combined_model_fn(
-    features, labels, mode, head,
-    linear_feature_columns=None, linear_optimizer='Ftrl',
-    dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None,
-    dnn_activation_fn=nn.relu, dnn_dropout=None,
-    input_layer_partitioner=None, config=None):
+def _dnn_linear_combined_model_fn(features,
+                                  labels,
+                                  mode,
+                                  head,
+                                  linear_feature_columns=None,
+                                  linear_optimizer='Ftrl',
+                                  dnn_feature_columns=None,
+                                  dnn_optimizer='Adagrad',
+                                  dnn_hidden_units=None,
+                                  dnn_activation_fn=nn.relu,
+                                  dnn_dropout=None,
+                                  input_layer_partitioner=None,
+                                  config=None):
   """Deep Neural Net and Linear combined model_fn.
 
   Args:
@@ -121,6 +129,7 @@ def _dnn_linear_combined_model_fn(
   if not linear_feature_columns and not dnn_feature_columns:
     raise ValueError(
         'Either linear_feature_columns or dnn_feature_columns must be defined.')
+
   num_ps_replicas = config.num_ps_replicas if config else 0
   input_layer_partitioner = input_layer_partitioner or (
       partitioned_variables.min_max_variable_partitioner(
@@ -243,7 +252,9 @@ class DNNLinearCombinedClassifier(estimator.Estimator):
           categorical_feature_a_emb, categorical_feature_b_emb,
           numeric_feature],
       dnn_hidden_units=[1000, 500, 100],
-      dnn_optimizer=tf.train.ProximalAdagradOptimizer(...))
+      dnn_optimizer=tf.train.ProximalAdagradOptimizer(...),
+      # warm-start settings
+      warm_start_from="/path/to/checkpoint/dir")
 
   # To apply L1 and L2 regularization, you can set optimizers as follows:
   tf.train.ProximalAdagradOptimizer(
@@ -297,7 +308,8 @@ class DNNLinearCombinedClassifier(estimator.Estimator):
                weight_column=None,
                label_vocabulary=None,
                input_layer_partitioner=None,
-               config=None):
+               config=None,
+               warm_start_from=None):
     """Initializes a DNNLinearCombinedClassifier instance.
 
     Args:
@@ -339,6 +351,11 @@ class DNNLinearCombinedClassifier(estimator.Estimator):
       input_layer_partitioner: Partitioner for input layer. Defaults to
         `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
       config: RunConfig object to configure the runtime settings.
+      warm_start_from: A string filepath to a checkpoint to warm-start from, or
+        a `WarmStartSettings` object to fully configure warm-starting.  If the
+        string filepath is provided instead of a `WarmStartSettings`, then all
+        weights are warm-started, and it is assumed that vocabularies and Tensor
+        names are unchanged.
 
     Raises:
       ValueError: If both linear_feature_columns and dnn_features_columns are
@@ -360,8 +377,10 @@ class DNNLinearCombinedClassifier(estimator.Estimator):
           n_classes,
           weight_column=weight_column,
           label_vocabulary=label_vocabulary)
+
     def _model_fn(features, labels, mode, config):
-      return _dnn_linear_combined_model_fn(
+      """Call the _dnn_linear_combined_model_fn and possibly warm-start."""
+      estimator_spec = _dnn_linear_combined_model_fn(
           features=features,
           labels=labels,
           mode=mode,
@@ -375,6 +394,14 @@ class DNNLinearCombinedClassifier(estimator.Estimator):
           dnn_dropout=dnn_dropout,
           input_layer_partitioner=input_layer_partitioner,
           config=config)
+      # pylint: disable=protected-access
+      warm_start_settings = warm_starting_util._get_default_warm_start_settings(
+          warm_start_from)
+      if warm_start_settings:
+        warm_starting_util._warm_start(warm_start_settings)
+      # pylint: enable=protected-access
+
+      return estimator_spec
 
     super(DNNLinearCombinedClassifier, self).__init__(
         model_fn=_model_fn, model_dir=model_dir, config=config)
@@ -407,7 +434,9 @@ class DNNLinearCombinedRegressor(estimator.Estimator):
           categorical_feature_a_emb, categorical_feature_b_emb,
           numeric_feature],
       dnn_hidden_units=[1000, 500, 100],
-      dnn_optimizer=tf.train.ProximalAdagradOptimizer(...))
+      dnn_optimizer=tf.train.ProximalAdagradOptimizer(...),
+      # warm-start settings
+      warm_start_from="/path/to/checkpoint/dir")
 
   # To apply L1 and L2 regularization, you can set optimizers as follows:
   tf.train.ProximalAdagradOptimizer(
@@ -460,7 +489,8 @@ class DNNLinearCombinedRegressor(estimator.Estimator):
                label_dimension=1,
                weight_column=None,
                input_layer_partitioner=None,
-               config=None):
+               config=None,
+               warm_start_from=None):
     """Initializes a DNNLinearCombinedRegressor instance.
 
     Args:
@@ -496,6 +526,11 @@ class DNNLinearCombinedRegressor(estimator.Estimator):
       input_layer_partitioner: Partitioner for input layer. Defaults to
         `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
       config: RunConfig object to configure the runtime settings.
+      warm_start_from: A string filepath to a checkpoint to warm-start from, or
+        a `WarmStartSettings` object to fully configure warm-starting.  If the
+        string filepath is provided instead of a `WarmStartSettings`, then all
+        weights are warm-started, and it is assumed that vocabularies and Tensor
+        names are unchanged.
 
     Raises:
       ValueError: If both linear_feature_columns and dnn_features_columns are
@@ -510,7 +545,8 @@ class DNNLinearCombinedRegressor(estimator.Estimator):
                        'must be defined.')
 
     def _model_fn(features, labels, mode, config):
-      return _dnn_linear_combined_model_fn(
+      """Call the _dnn_linear_combined_model_fn and possibly warm-start."""
+      estimator_spec = _dnn_linear_combined_model_fn(
           features=features,
           labels=labels,
           mode=mode,
@@ -526,6 +562,14 @@ class DNNLinearCombinedRegressor(estimator.Estimator):
           dnn_dropout=dnn_dropout,
           input_layer_partitioner=input_layer_partitioner,
           config=config)
+      # pylint: disable=protected-access
+      warm_start_settings = warm_starting_util._get_default_warm_start_settings(
+          warm_start_from)
+      if warm_start_settings:
+        warm_starting_util._warm_start(warm_start_settings)
+      # pylint: enable=protected-access
+
+      return estimator_spec
 
     super(DNNLinearCombinedRegressor, self).__init__(
         model_fn=_model_fn, model_dir=model_dir, config=config)
diff --git a/tensorflow/python/estimator/canned/dnn_linear_combined_test.py b/tensorflow/python/estimator/canned/dnn_linear_combined_test.py
index 2151df8423774f0e6f9e51a114efe66472204962..84675bf2a4a1655026bbba37c5d7a63d2f788c46 100644
--- a/tensorflow/python/estimator/canned/dnn_linear_combined_test.py
+++ b/tensorflow/python/estimator/canned/dnn_linear_combined_test.py
@@ -26,6 +26,7 @@ import six
 
 from tensorflow.core.example import example_pb2
 from tensorflow.core.example import feature_pb2
+from tensorflow.python.estimator import warm_starting_util
 from tensorflow.python.estimator.canned import dnn_linear_combined
 from tensorflow.python.estimator.canned import dnn_testing_utils
 from tensorflow.python.estimator.canned import linear_testing_utils
@@ -47,6 +48,7 @@ from tensorflow.python.training import gradient_descent
 from tensorflow.python.training import input as input_lib
 from tensorflow.python.training import optimizer as optimizer_lib
 
+
 try:
   # pylint: disable=g-import-not-at-top
   import pandas as pd
@@ -731,5 +733,156 @@ class DNNLinearCombinedTests(test.TestCase):
         next(est.predict(input_fn=input_fn)))
 
 
+class DNNLinearCombinedWarmStartingTest(test.TestCase):
+
+  def setUp(self):
+    # Create a directory to save our old checkpoint and vocabularies to.
+    self._ckpt_and_vocab_dir = tempfile.mkdtemp()
+
+    # Make a dummy input_fn.
+    def _input_fn():
+      features = {
+          'age': [[23.], [31.]],
+          'city': [['Palo Alto'], ['Mountain View']],
+      }
+      return features, [0, 1]
+
+    self._input_fn = _input_fn
+
+  def tearDown(self):
+    # Clean up checkpoint / vocab dir.
+    writer_cache.FileWriterCache.clear()
+    shutil.rmtree(self._ckpt_and_vocab_dir)
+
+  def test_classifier_basic_warm_starting(self):
+    """Tests correctness of DNNLinearCombinedClassifier default warm-start."""
+    age = feature_column.numeric_column('age')
+    city = feature_column.embedding_column(
+        feature_column.categorical_column_with_vocabulary_list(
+            'city', vocabulary_list=['Mountain View', 'Palo Alto']),
+        dimension=5)
+
+    # Create a DNNLinearCombinedClassifier and train to save a checkpoint.
+    dnn_lc_classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
+        linear_feature_columns=[age],
+        dnn_feature_columns=[city],
+        dnn_hidden_units=[256, 128],
+        model_dir=self._ckpt_and_vocab_dir,
+        n_classes=4,
+        linear_optimizer='SGD',
+        dnn_optimizer='SGD')
+    dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1)
+
+    # Create a second DNNLinearCombinedClassifier, warm-started from the first.
+    # Use a learning_rate = 0.0 optimizer to check values (use SGD so we don't
+    # have accumulator values that change).
+    warm_started_dnn_lc_classifier = (
+        dnn_linear_combined.DNNLinearCombinedClassifier(
+            linear_feature_columns=[age],
+            dnn_feature_columns=[city],
+            dnn_hidden_units=[256, 128],
+            n_classes=4,
+            linear_optimizer=gradient_descent.GradientDescentOptimizer(
+                learning_rate=0.0),
+            dnn_optimizer=gradient_descent.GradientDescentOptimizer(
+                learning_rate=0.0),
+            warm_start_from=dnn_lc_classifier.model_dir))
+
+    warm_started_dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1)
+    for variable_name in warm_started_dnn_lc_classifier.get_variable_names():
+      self.assertAllClose(
+          dnn_lc_classifier.get_variable_value(variable_name),
+          warm_started_dnn_lc_classifier.get_variable_value(variable_name))
+
+  def test_regressor_basic_warm_starting(self):
+    """Tests correctness of DNNLinearCombinedRegressor default warm-start."""
+    age = feature_column.numeric_column('age')
+    city = feature_column.embedding_column(
+        feature_column.categorical_column_with_vocabulary_list(
+            'city', vocabulary_list=['Mountain View', 'Palo Alto']),
+        dimension=5)
+
+    # Create a DNNLinearCombinedRegressor and train to save a checkpoint.
+    dnn_lc_regressor = dnn_linear_combined.DNNLinearCombinedRegressor(
+        linear_feature_columns=[age],
+        dnn_feature_columns=[city],
+        dnn_hidden_units=[256, 128],
+        model_dir=self._ckpt_and_vocab_dir,
+        linear_optimizer='SGD',
+        dnn_optimizer='SGD')
+    dnn_lc_regressor.train(input_fn=self._input_fn, max_steps=1)
+
+    # Create a second DNNLinearCombinedRegressor, warm-started from the first.
+    # Use a learning_rate = 0.0 optimizer to check values (use SGD so we don't
+    # have accumulator values that change).
+    warm_started_dnn_lc_regressor = (
+        dnn_linear_combined.DNNLinearCombinedRegressor(
+            linear_feature_columns=[age],
+            dnn_feature_columns=[city],
+            dnn_hidden_units=[256, 128],
+            linear_optimizer=gradient_descent.GradientDescentOptimizer(
+                learning_rate=0.0),
+            dnn_optimizer=gradient_descent.GradientDescentOptimizer(
+                learning_rate=0.0),
+            warm_start_from=dnn_lc_regressor.model_dir))
+
+    warm_started_dnn_lc_regressor.train(input_fn=self._input_fn, max_steps=1)
+    for variable_name in warm_started_dnn_lc_regressor.get_variable_names():
+      self.assertAllClose(
+          dnn_lc_regressor.get_variable_value(variable_name),
+          warm_started_dnn_lc_regressor.get_variable_value(variable_name))
+
+  def test_warm_starting_selective_variables(self):
+    """Tests selecting variables to warm-start."""
+    age = feature_column.numeric_column('age')
+    city = feature_column.embedding_column(
+        feature_column.categorical_column_with_vocabulary_list(
+            'city', vocabulary_list=['Mountain View', 'Palo Alto']),
+        dimension=5)
+
+    # Create a DNNLinearCombinedClassifier and train to save a checkpoint.
+    dnn_lc_classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
+        linear_feature_columns=[age],
+        dnn_feature_columns=[city],
+        dnn_hidden_units=[256, 128],
+        model_dir=self._ckpt_and_vocab_dir,
+        n_classes=4,
+        linear_optimizer='SGD',
+        dnn_optimizer='SGD')
+    dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1)
+
+    # Create a second DNNLinearCombinedClassifier, warm-started from the first.
+    # Use a learning_rate = 0.0 optimizer to check values (use SGD so we don't
+    # have accumulator values that change).
+    warm_started_dnn_lc_classifier = (
+        dnn_linear_combined.DNNLinearCombinedClassifier(
+            linear_feature_columns=[age],
+            dnn_feature_columns=[city],
+            dnn_hidden_units=[256, 128],
+            n_classes=4,
+            linear_optimizer=gradient_descent.GradientDescentOptimizer(
+                learning_rate=0.0),
+            dnn_optimizer=gradient_descent.GradientDescentOptimizer(
+                learning_rate=0.0),
+            # The provided regular expression will only warm-start the deep
+            # portion of the model.
+            warm_start_from=warm_starting_util.WarmStartSettings(
+                ckpt_to_initialize_from=dnn_lc_classifier.model_dir,
+                vars_to_warm_start='.*(dnn).*')))
+
+    warm_started_dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1)
+    for variable_name in warm_started_dnn_lc_classifier.get_variable_names():
+      if 'dnn' in variable_name:
+        self.assertAllClose(
+            dnn_lc_classifier.get_variable_value(variable_name),
+            warm_started_dnn_lc_classifier.get_variable_value(variable_name))
+      elif 'linear' in variable_name:
+        linear_values = warm_started_dnn_lc_classifier.get_variable_value(
+            variable_name)
+        # Since they're not warm-started, the linear weights will be
+        # zero-initialized.
+        self.assertAllClose(np.zeros_like(linear_values), linear_values)
+
+
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/estimator/canned/dnn_test.py b/tensorflow/python/estimator/canned/dnn_test.py
index e005cec263b93b90cb710e020aaf33d54cb69e2f..fc90b7c35e5e3f63d4592989e5ebb393fb245360 100644
--- a/tensorflow/python/estimator/canned/dnn_test.py
+++ b/tensorflow/python/estimator/canned/dnn_test.py
@@ -73,6 +73,15 @@ class DNNLogitFnTest(dnn_testing_utils.BaseDNNLogitFnTest, test.TestCase):
                                                   dnn._dnn_logit_fn_builder)
 
 
+class DNNWarmStartingTest(dnn_testing_utils.BaseDNNWarmStartingTest,
+                          test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNWarmStartingTest.__init__(self, _dnn_classifier_fn,
+                                                       _dnn_regressor_fn)
+
+
 class DNNClassifierEvaluateTest(
     dnn_testing_utils.BaseDNNClassifierEvaluateTest, test.TestCase):
 
diff --git a/tensorflow/python/estimator/canned/dnn_testing_utils.py b/tensorflow/python/estimator/canned/dnn_testing_utils.py
index 3ffca14261386b156771906fda80914971ea1c68..2bdec693033858fd3bbbb137259b2d129fc72797 100644
--- a/tensorflow/python/estimator/canned/dnn_testing_utils.py
+++ b/tensorflow/python/estimator/canned/dnn_testing_utils.py
@@ -28,6 +28,7 @@ import six
 from tensorflow.core.framework import summary_pb2
 from tensorflow.python.client import session as tf_session
 from tensorflow.python.estimator import model_fn
+from tensorflow.python.estimator import warm_starting_util
 from tensorflow.python.estimator.canned import head as head_lib
 from tensorflow.python.estimator.canned import metric_keys
 from tensorflow.python.estimator.canned import prediction_keys
@@ -39,6 +40,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
 from tensorflow.python.ops import partitioned_variables
@@ -49,6 +51,7 @@ from tensorflow.python.platform import test
 from tensorflow.python.summary import summary as summary_lib
 from tensorflow.python.summary.writer import writer_cache
 from tensorflow.python.training import checkpoint_utils
+from tensorflow.python.training import gradient_descent
 from tensorflow.python.training import monitored_session
 from tensorflow.python.training import optimizer
 from tensorflow.python.training import saver
@@ -64,6 +67,10 @@ HIDDEN_WEIGHTS_NAME_PATTERN = 'dnn/hiddenlayer_%d/kernel'
 HIDDEN_BIASES_NAME_PATTERN = 'dnn/hiddenlayer_%d/bias'
 LOGITS_WEIGHTS_NAME = 'dnn/logits/kernel'
 LOGITS_BIASES_NAME = 'dnn/logits/bias'
+OCCUPATION_EMBEDDING_NAME = ('dnn/input_from_feature_columns/input_layer/'
+                             'occupation_embedding/embedding_weights')
+CITY_EMBEDDING_NAME = ('dnn/input_from_feature_columns/input_layer/'
+                       'city_embedding/embedding_weights')
 
 
 def assert_close(expected, actual, rtol=1e-04, message='', name='assert_close'):
@@ -696,6 +703,301 @@ class BaseDNNLogitFnTest(object):
             self.assertAllClose(expected_logits, sess.run(logits))
 
 
+class BaseDNNWarmStartingTest(object):
+
+  def __init__(self, _dnn_classifier_fn, _dnn_regressor_fn):
+    self._dnn_classifier_fn = _dnn_classifier_fn
+    self._dnn_regressor_fn = _dnn_regressor_fn
+
+  def setUp(self):
+    # Create a directory to save our old checkpoint and vocabularies to.
+    self._ckpt_and_vocab_dir = tempfile.mkdtemp()
+
+    # Make a dummy input_fn.
+    def _input_fn():
+      features = {
+          'city': [['Palo Alto'], ['Mountain View']],
+          'locality': [['Palo Alto'], ['Mountain View']],
+          'occupation': [['doctor'], ['consultant']]
+      }
+      return features, [0, 1]
+
+    self._input_fn = _input_fn
+
+  def tearDown(self):
+    # Clean up checkpoint / vocab dir.
+    writer_cache.FileWriterCache.clear()
+    shutil.rmtree(self._ckpt_and_vocab_dir)
+
+  def assertAllNotClose(self, t1, t2):
+    """Helper assert for arrays."""
+    sum_of_abs_diff = 0.0
+    for x, y in zip(t1, t2):
+      try:
+        for a, b in zip(x, y):
+          sum_of_abs_diff += abs(b - a)
+      except TypeError:
+        sum_of_abs_diff += abs(y - x)
+    self.assertGreater(sum_of_abs_diff, 0)
+
+  def test_classifier_basic_warm_starting(self):
+    """Tests correctness of DNNClassifier default warm-start."""
+    city = feature_column.embedding_column(
+        feature_column.categorical_column_with_vocabulary_list(
+            'city', vocabulary_list=['Mountain View', 'Palo Alto']),
+        dimension=5)
+
+    # Create a DNNClassifier and train to save a checkpoint.
+    dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=[256, 128],
+        feature_columns=[city],
+        model_dir=self._ckpt_and_vocab_dir,
+        n_classes=4,
+        optimizer='SGD')
+    dnn_classifier.train(input_fn=self._input_fn, max_steps=1)
+
+    # Create a second DNNClassifier, warm-started from the first.  Use a
+    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
+    # accumulator values that change).
+    warm_started_dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=[256, 128],
+        feature_columns=[city],
+        n_classes=4,
+        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
+        warm_start_from=dnn_classifier.model_dir)
+
+    warm_started_dnn_classifier.train(input_fn=self._input_fn, max_steps=1)
+    for variable_name in warm_started_dnn_classifier.get_variable_names():
+      self.assertAllClose(
+          dnn_classifier.get_variable_value(variable_name),
+          warm_started_dnn_classifier.get_variable_value(variable_name))
+
+  def test_regressor_basic_warm_starting(self):
+    """Tests correctness of DNNRegressor default warm-start."""
+    city = feature_column.embedding_column(
+        feature_column.categorical_column_with_vocabulary_list(
+            'city', vocabulary_list=['Mountain View', 'Palo Alto']),
+        dimension=5)
+
+    # Create a DNNRegressor and train to save a checkpoint.
+    dnn_regressor = self._dnn_regressor_fn(
+        hidden_units=[256, 128],
+        feature_columns=[city],
+        model_dir=self._ckpt_and_vocab_dir,
+        optimizer='SGD')
+    dnn_regressor.train(input_fn=self._input_fn, max_steps=1)
+
+    # Create a second DNNRegressor, warm-started from the first.  Use a
+    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
+    # accumulator values that change).
+    warm_started_dnn_regressor = self._dnn_regressor_fn(
+        hidden_units=[256, 128],
+        feature_columns=[city],
+        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
+        warm_start_from=dnn_regressor.model_dir)
+
+    warm_started_dnn_regressor.train(input_fn=self._input_fn, max_steps=1)
+    for variable_name in warm_started_dnn_regressor.get_variable_names():
+      self.assertAllClose(
+          dnn_regressor.get_variable_value(variable_name),
+          warm_started_dnn_regressor.get_variable_value(variable_name))
+
+  def test_warm_starting_selective_variables(self):
+    """Tests selecting variables to warm-start."""
+    city = feature_column.embedding_column(
+        feature_column.categorical_column_with_vocabulary_list(
+            'city', vocabulary_list=['Mountain View', 'Palo Alto']),
+        dimension=5)
+
+    # Create a DNNClassifier and train to save a checkpoint.
+    dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=[256, 128],
+        feature_columns=[city],
+        model_dir=self._ckpt_and_vocab_dir,
+        n_classes=4,
+        optimizer='SGD')
+    dnn_classifier.train(input_fn=self._input_fn, max_steps=1)
+
+    # Create a second DNNClassifier, warm-started from the first.  Use a
+    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
+    # accumulator values that change).
+    warm_started_dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=[256, 128],
+        feature_columns=[city],
+        n_classes=4,
+        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
+        # The provided regular expression will only warm-start the city
+        # embedding, not the kernels and biases of the hidden weights.
+        warm_start_from=warm_starting_util.WarmStartSettings(
+            ckpt_to_initialize_from=dnn_classifier.model_dir,
+            vars_to_warm_start='.*(city).*'))
+
+    warm_started_dnn_classifier.train(input_fn=self._input_fn, max_steps=1)
+    for variable_name in warm_started_dnn_classifier.get_variable_names():
+      if 'city' in variable_name:
+        self.assertAllClose(
+            dnn_classifier.get_variable_value(variable_name),
+            warm_started_dnn_classifier.get_variable_value(variable_name))
+      elif 'bias' in variable_name:
+        # Hidden layer biases are zero-initialized.
+        bias_values = warm_started_dnn_classifier.get_variable_value(
+            variable_name)
+        self.assertAllClose(np.zeros_like(bias_values), bias_values)
+      elif 'kernel' in variable_name:
+        # We can't override the glorot uniform initializer used for the kernels
+        # in the dense layers, so just make sure we're not getting the same
+        # values from the old checkpoint.
+        self.assertAllNotClose(
+            dnn_classifier.get_variable_value(variable_name),
+            warm_started_dnn_classifier.get_variable_value(variable_name))
+
+  def test_warm_starting_with_vocab_remapping_and_partitioning(self):
+    """Tests warm-starting with vocab remapping and partitioning."""
+    vocab_list = ['doctor', 'lawyer', 'consultant']
+    vocab_file = os.path.join(self._ckpt_and_vocab_dir, 'occupation_vocab')
+    with open(vocab_file, 'w') as f:
+      f.write('\n'.join(vocab_list))
+    occupation = feature_column.embedding_column(
+        feature_column.categorical_column_with_vocabulary_file(
+            'occupation',
+            vocabulary_file=vocab_file,
+            vocabulary_size=len(vocab_list)),
+        dimension=2)
+
+    # Create a DNNClassifier and train to save a checkpoint.
+    partitioner = partitioned_variables.fixed_size_partitioner(num_shards=2)
+    dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=[256, 128],
+        feature_columns=[occupation],
+        model_dir=self._ckpt_and_vocab_dir,
+        n_classes=4,
+        optimizer='SGD',
+        input_layer_partitioner=partitioner)
+    dnn_classifier.train(input_fn=self._input_fn, max_steps=1)
+
+    # Create a second DNNClassifier, warm-started from the first.  Use a
+    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
+    # accumulator values that change).  Use a a new FeatureColumn with a
+    # different vocabulary for occupation.
+    new_vocab_list = ['doctor', 'consultant', 'engineer']
+    new_vocab_file = os.path.join(self._ckpt_and_vocab_dir,
+                                  'new_occupation_vocab')
+    with open(new_vocab_file, 'w') as f:
+      f.write('\n'.join(new_vocab_list))
+    new_occupation = feature_column.embedding_column(
+        feature_column.categorical_column_with_vocabulary_file(
+            'occupation',
+            vocabulary_file=new_vocab_file,
+            vocabulary_size=len(new_vocab_list)),
+        dimension=2)
+    # We can create our VocabInfo object from the new and old occupation
+    # FeatureColumn's.
+    occupation_vocab_info = warm_starting_util.VocabInfo(
+        new_vocab=new_occupation.categorical_column.vocabulary_file,
+        new_vocab_size=new_occupation.categorical_column.vocabulary_size,
+        num_oov_buckets=new_occupation.categorical_column.num_oov_buckets,
+        old_vocab=occupation.categorical_column.vocabulary_file,
+        old_vocab_size=occupation.categorical_column.vocabulary_size,
+        # Can't use constant_initializer with load_and_remap.  In practice,
+        # use a truncated normal initializer.
+        backup_initializer=init_ops.random_uniform_initializer(
+            minval=0.39, maxval=0.39))
+    warm_started_dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=[256, 128],
+        feature_columns=[occupation],
+        n_classes=4,
+        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
+        warm_start_from=warm_starting_util.WarmStartSettings(
+            ckpt_to_initialize_from=dnn_classifier.model_dir,
+            var_name_to_vocab_info={
+                OCCUPATION_EMBEDDING_NAME: occupation_vocab_info
+            },
+            # Explicitly providing None here will only warm-start variables
+            # referenced in var_name_to_vocab_info (no hidden weights will be
+            # warmstarted).
+            vars_to_warm_start=None),
+        input_layer_partitioner=partitioner)
+
+    warm_started_dnn_classifier.train(input_fn=self._input_fn, max_steps=1)
+    # 'doctor' was ID-0 and still ID-0.
+    self.assertAllClose(
+        dnn_classifier.get_variable_value(OCCUPATION_EMBEDDING_NAME)[0, :],
+        warm_started_dnn_classifier.get_variable_value(
+            OCCUPATION_EMBEDDING_NAME)[0, :])
+    # 'consultant' was ID-2 and now ID-1.
+    self.assertAllClose(
+        dnn_classifier.get_variable_value(OCCUPATION_EMBEDDING_NAME)[2, :],
+        warm_started_dnn_classifier.get_variable_value(
+            OCCUPATION_EMBEDDING_NAME)[1, :])
+    # 'engineer' is a new entry and should be initialized with the
+    # backup_initializer in VocabInfo.
+    self.assertAllClose([0.39] * 2,
+                        warm_started_dnn_classifier.get_variable_value(
+                            OCCUPATION_EMBEDDING_NAME)[2, :])
+    for variable_name in warm_started_dnn_classifier.get_variable_names():
+      if 'bias' in variable_name:
+        # Hidden layer biases are zero-initialized.
+        bias_values = warm_started_dnn_classifier.get_variable_value(
+            variable_name)
+        self.assertAllClose(np.zeros_like(bias_values), bias_values)
+      elif 'kernel' in variable_name:
+        # We can't override the glorot uniform initializer used for the kernels
+        # in the dense layers, so just make sure we're not getting the same
+        # values from the old checkpoint.
+        self.assertAllNotClose(
+            dnn_classifier.get_variable_value(variable_name),
+            warm_started_dnn_classifier.get_variable_value(variable_name))
+
+  def test_warm_starting_with_naming_change(self):
+    """Tests warm-starting with a Tensor name remapping."""
+    locality = feature_column.embedding_column(
+        feature_column.categorical_column_with_vocabulary_list(
+            'locality', vocabulary_list=['Mountain View', 'Palo Alto']),
+        dimension=5)
+
+    # Create a DNNClassifier and train to save a checkpoint.
+    dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=[256, 128],
+        feature_columns=[locality],
+        model_dir=self._ckpt_and_vocab_dir,
+        n_classes=4,
+        optimizer='SGD')
+    dnn_classifier.train(input_fn=self._input_fn, max_steps=1)
+
+    # Create a second DNNClassifier, warm-started from the first.  Use a
+    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
+    # accumulator values that change).
+    city = feature_column.embedding_column(
+        feature_column.categorical_column_with_vocabulary_list(
+            'city', vocabulary_list=['Mountain View', 'Palo Alto']),
+        dimension=5)
+    warm_started_dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=[256, 128],
+        feature_columns=[city],
+        n_classes=4,
+        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
+        # The 'city' variable correspond to the 'locality' variable in the
+        # previous model.
+        warm_start_from=warm_starting_util.WarmStartSettings(
+            ckpt_to_initialize_from=dnn_classifier.model_dir,
+            var_name_to_prev_var_name={
+                CITY_EMBEDDING_NAME:
+                    CITY_EMBEDDING_NAME.replace('city', 'locality')
+            }))
+
+    warm_started_dnn_classifier.train(input_fn=self._input_fn, max_steps=1)
+    for variable_name in warm_started_dnn_classifier.get_variable_names():
+      if 'city' in variable_name:
+        self.assertAllClose(
+            dnn_classifier.get_variable_value(
+                CITY_EMBEDDING_NAME.replace('city', 'locality')),
+            warm_started_dnn_classifier.get_variable_value(CITY_EMBEDDING_NAME))
+      else:
+        self.assertAllClose(
+            dnn_classifier.get_variable_value(variable_name),
+            warm_started_dnn_classifier.get_variable_value(variable_name))
+
+
 class BaseDNNClassifierEvaluateTest(object):
 
   def __init__(self, dnn_classifier_fn):
diff --git a/tensorflow/python/estimator/canned/head.py b/tensorflow/python/estimator/canned/head.py
index fa5d02c4767f9c21e7d0a3a2dad917f3cbf22c02..204e1119f2191457359ecaf9fd012fcb5a2b0463 100644
--- a/tensorflow/python/estimator/canned/head.py
+++ b/tensorflow/python/estimator/canned/head.py
@@ -54,11 +54,13 @@ _PREDICT_SERVING_KEY = 'predict'
 
 
 # A LossSpec contains
-# * a scalar `Tensor` representing weighted, sum-reduced loss
-# * a scalar `Tensor` representing the sum of example weights
+# * a scalar `Tensor` representing reduced weighted training loss
+# * a scalar `Tensor` representing the unreduced unweighted loss
+# * a scalar `Tensor` representing the example weights
 # * possibly processed labels (e.g. vocabulary lookup, shape manipulation, etc)
 LossSpec = collections.namedtuple(
-    'LossSpec', ['weighted_sum_loss', 'example_weight_sum', 'processed_labels'])
+    'LossSpec', ['training_loss', 'unreduced_loss', 'weights',
+                 'processed_labels'])
 
 
 def _summary_key(head_name, val):
@@ -159,8 +161,9 @@ class _Head(object):
 
     Returns:
       A LossSpec that contains
-      * the scalar `Tensor` representing weighted, sum-reduced loss
-      * the scalar `Tensor` representing the sum of example weights
+      * the scalar `Tensor` representing reduced weighted training loss
+      * the scalar `Tensor` representing the unreduced unweighted loss
+      * the scalar `Tensor` representing the example weights
       * possibly processed labels (e.g. vocabulary lookup, shape manipulation,
         etc.)
 
@@ -170,7 +173,8 @@ class _Head(object):
 
   @abc.abstractmethod
   def create_estimator_spec(
-      self, features, mode, logits, labels=None, train_op_fn=None):
+      self, features, mode, logits, labels=None, train_op_fn=None,
+      regularization_losses=None):
     """Returns `EstimatorSpec` that a model_fn can return.
 
     Please note that,
@@ -182,10 +186,12 @@ class _Head(object):
       logits: logits `Tensor` to be used by the head.
       labels: Labels `Tensor`, or `dict` of same.
       train_op_fn: Function that takes a scalar loss `Tensor` and returns an op
-          to optimize the model with the loss. This is used in TRAIN mode and
-          must not be None. None is allowed in other modes. If you want to
-          optimize loss yourself you can pass `no_op_train_fn` and then use
-          EstimatorSpec.loss to compute and apply gradients.
+        to optimize the model with the loss. This is used in TRAIN mode and
+        must not be None. None is allowed in other modes. If you want to
+        optimize loss yourself you can pass `no_op_train_fn` and then use
+        EstimatorSpec.loss to compute and apply gradients.
+      regularization_losses: A list of additional scalar losses to be added to
+        the training loss, such as regularization losses.
 
     Returns:
       `EstimatorSpec`.
@@ -456,10 +462,12 @@ def _recall_at_threshold(labels, predictions, weights, threshold, name=None):
     return array_ops.squeeze(precision_tensor), array_ops.squeeze(update_op)
 
 
-def _multi_class_head_with_softmax_cross_entropy_loss(n_classes,
-                                                      weight_column=None,
-                                                      label_vocabulary=None,
-                                                      name=None):
+def _multi_class_head_with_softmax_cross_entropy_loss(
+    n_classes,
+    weight_column=None,
+    label_vocabulary=None,
+    loss_reduction=losses.Reduction.SUM,
+    name=None):
   """Creates a '_Head' for multi class classification.
 
   The head expects `logits` with shape `[D0, D1, ... DN, n_classes]`.
@@ -489,6 +497,8 @@ def _multi_class_head_with_softmax_cross_entropy_loss(n_classes,
       integer within [0, n_classes). If given, labels must be of string type and
       have any value in `label_vocabulary`. Note that errors will be raised if
       `label_vocabulary` is not provided but labels are strings.
+    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
+      reduce training loss over batch. Defaults to `SUM`.
     name: name of the head. If provided, summary and metrics keys will be
       suffixed by `"/" + name`. Also used as `name_scope` when creating ops.
 
@@ -496,16 +506,23 @@ def _multi_class_head_with_softmax_cross_entropy_loss(n_classes,
     An instance of `_Head` for multi class classification.
 
   Raises:
-    ValueError: if `n_classes`, `metric_class_ids` or `label_keys` is invalid.
+    ValueError: If `n_classes`, `label_vocabulary` or `loss_reduction` is
+      invalid.
   """
   if label_vocabulary is not None and not isinstance(label_vocabulary,
                                                      (list, tuple)):
     raise ValueError(
         'label_vocabulary should be a list or a tuple. Given type: {}'.format(
             type(label_vocabulary)))
-
-  return _MultiClassHeadWithSoftmaxCrossEntropyLoss(n_classes, weight_column,
-                                                    label_vocabulary, name)
+  if (loss_reduction not in losses.Reduction.all() or
+      loss_reduction == losses.Reduction.NONE):
+    raise ValueError('Invalid loss_reduction: {}'.format(loss_reduction))
+  return _MultiClassHeadWithSoftmaxCrossEntropyLoss(
+      n_classes=n_classes,
+      weight_column=weight_column,
+      label_vocabulary=label_vocabulary,
+      loss_reduction=loss_reduction,
+      name=name)
 
 
 class _MultiClassHeadWithSoftmaxCrossEntropyLoss(_Head):
@@ -515,12 +532,14 @@ class _MultiClassHeadWithSoftmaxCrossEntropyLoss(_Head):
                n_classes,
                weight_column=None,
                label_vocabulary=None,
+               loss_reduction=losses.Reduction.SUM,
                name=None):
     if (n_classes is None) or (n_classes <= 2):
       raise ValueError('n_classes must be > 2: %s.' % n_classes)
     self._n_classes = n_classes
     self._weight_column = weight_column
     self._label_vocabulary = label_vocabulary
+    self._loss_reduction = loss_reduction
     self._name = name
 
   @property
@@ -531,24 +550,20 @@ class _MultiClassHeadWithSoftmaxCrossEntropyLoss(_Head):
   def logits_dimension(self):
     return self._n_classes
 
-  def _eval_metric_ops(self, labels, class_ids, weights, weighted_sum_loss,
-                       example_weight_sum):
+  def _eval_metric_ops(
+      self, labels, class_ids, weights, unreduced_loss, regularization_loss):
     """Returns the Eval metric ops."""
     with ops.name_scope(
         None, 'metrics',
-        (labels, class_ids, weights, weighted_sum_loss, example_weight_sum)):
+        (labels, class_ids, weights, unreduced_loss, regularization_loss)):
       keys = metric_keys.MetricKeys
       metric_ops = {
           # Estimator already adds a metric for loss.
           # TODO(xiejw): Any other metrics?
           _summary_key(self._name, keys.LOSS_MEAN):
               metrics_lib.mean(
-                  # Both values and weights here are reduced, scalar Tensors.
-                  # values is the actual mean we want -- weights represents the
-                  # total weight of the batch and is needed to calculate
-                  # update_op over many batches.
-                  values=(weighted_sum_loss / example_weight_sum),
-                  weights=example_weight_sum,
+                  values=unreduced_loss,
+                  weights=weights,
                   name=keys.LOSS_MEAN),
           _summary_key(self._name, keys.ACCURACY):
               metrics_lib.accuracy(
@@ -557,6 +572,11 @@ class _MultiClassHeadWithSoftmaxCrossEntropyLoss(_Head):
                   weights=weights,
                   name=keys.ACCURACY),
       }
+      if regularization_loss is not None:
+        metric_ops[_summary_key(self._name, keys.LOSS_REGULARIZATION)] = (
+            metrics_lib.mean(
+                values=regularization_loss,
+                name=keys.LOSS_REGULARIZATION))
     return metric_ops
 
   def _label_ids(self, labels):
@@ -588,18 +608,17 @@ class _MultiClassHeadWithSoftmaxCrossEntropyLoss(_Head):
     unweighted_loss = array_ops.expand_dims(unweighted_loss, axis=-1)
     weights = _get_weights_and_check_match_logits(
         features=features, weight_column=self._weight_column, logits=logits)
-    weighted_sum_loss = losses.compute_weighted_loss(
-        unweighted_loss, weights=weights, reduction=losses.Reduction.SUM)
-    # _weights() can return 1.
-    example_weight_sum = math_ops.reduce_sum(
-        weights * array_ops.ones_like(unweighted_loss))
+    training_loss = losses.compute_weighted_loss(
+        unweighted_loss, weights=weights, reduction=self._loss_reduction)
     return LossSpec(
-        weighted_sum_loss=weighted_sum_loss,
-        example_weight_sum=example_weight_sum,
+        training_loss=training_loss,
+        unreduced_loss=unweighted_loss,
+        weights=weights,
         processed_labels=label_ids)
 
   def create_estimator_spec(
-      self, features, mode, logits, labels=None, train_op_fn=None):
+      self, features, mode, logits, labels=None, train_op_fn=None,
+      regularization_losses=None):
     """Returns an `EstimatorSpec`.
 
     Args:
@@ -612,6 +631,12 @@ class _MultiClassHeadWithSoftmaxCrossEntropyLoss(_Head):
         equals `TRAIN` or `EVAL`.
       train_op_fn: Function that takes a scalar loss `Tensor` and returns
         `train_op`. Required in TRAIN mode.
+      regularization_losses: A list of additional scalar losses to be added to
+        the training loss, such as regularization losses. These losses are
+        usually expressed as a batch average, so for best results users need to
+        set `loss_reduction=MEAN_PER_ELEMENT` or
+        `loss_reduction=SUM_BY_NONZERO_WEIGHTS` when creating the head to
+        avoid scaling errors.
     Returns:
       `EstimatorSpec`.
     Raises:
@@ -655,40 +680,62 @@ class _MultiClassHeadWithSoftmaxCrossEntropyLoss(_Head):
                 _PREDICT_SERVING_KEY: export_output.PredictOutput(predictions)
             })
 
-      weighted_sum_loss, example_weight_sum, label_ids = self.create_loss(
+      training_loss, unreduced_loss, weights, label_ids = self.create_loss(
           features=features, mode=mode, logits=logits, labels=labels)
+      if regularization_losses:
+        regularization_loss = math_ops.add_n(regularization_losses)
+        regularized_training_loss = math_ops.add_n(
+            [training_loss, regularization_loss])
+      else:
+        regularization_loss = None
+        regularized_training_loss = training_loss
       # Eval.
       if mode == model_fn.ModeKeys.EVAL:
         return model_fn.EstimatorSpec(
             mode=model_fn.ModeKeys.EVAL,
             predictions=predictions,
-            loss=weighted_sum_loss,
+            loss=regularized_training_loss,
             eval_metric_ops=self._eval_metric_ops(
                 labels=label_ids,
                 class_ids=class_ids,
-                weights=_weights(features, self._weight_column),
-                weighted_sum_loss=weighted_sum_loss,
-                example_weight_sum=example_weight_sum))
+                weights=weights,
+                unreduced_loss=unreduced_loss,
+                regularization_loss=regularization_loss))
 
       # Train.
       if train_op_fn is None:
         raise ValueError('train_op_fn cannot be None.')
+      # Only summarize mean_loss for SUM reduction to preserve backwards
+      # compatibility. Otherwise skip it to avoid unnecessary computation.
+      if self._loss_reduction == losses.Reduction.SUM:
+        example_weight_sum = math_ops.reduce_sum(
+            weights * array_ops.ones_like(unreduced_loss))
+        mean_loss = training_loss / example_weight_sum
+      else:
+        mean_loss = None
     with ops.name_scope(''):
+      keys = metric_keys.MetricKeys
       summary.scalar(
-          _summary_key(self._name, metric_keys.MetricKeys.LOSS),
-          weighted_sum_loss)
-      summary.scalar(
-          _summary_key(self._name, metric_keys.MetricKeys.LOSS_MEAN),
-          weighted_sum_loss / example_weight_sum)
+          _summary_key(self._name, keys.LOSS),
+          regularized_training_loss)
+      if mean_loss is not None:
+        summary.scalar(
+            _summary_key(self._name, keys.LOSS_MEAN),
+            mean_loss)
+      if regularization_loss is not None:
+        summary.scalar(
+            _summary_key(self._name, keys.LOSS_REGULARIZATION),
+            regularization_loss)
     return model_fn.EstimatorSpec(
         mode=model_fn.ModeKeys.TRAIN,
         predictions=predictions,
-        loss=weighted_sum_loss,
-        train_op=train_op_fn(weighted_sum_loss))
+        loss=regularized_training_loss,
+        train_op=train_op_fn(regularized_training_loss))
 
 
 def _binary_logistic_head_with_sigmoid_cross_entropy_loss(
-    weight_column=None, thresholds=None, label_vocabulary=None, name=None):
+    weight_column=None, thresholds=None, label_vocabulary=None,
+    loss_reduction=losses.Reduction.SUM, name=None):
   """Creates a `_Head` for single label binary classification.
 
   This head uses `sigmoid_cross_entropy_with_logits` loss.
@@ -723,6 +770,8 @@ def _binary_logistic_head_with_sigmoid_cross_entropy_loss(
       [0, 1]. If given, labels must be string type and have any value in
       `label_vocabulary`. Note that errors will be raised if `label_vocabulary`
       is not provided but labels are strings.
+    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
+      reduce training loss over batch. Defaults to `SUM`.
     name: name of the head. If provided, summary and metrics keys will be
       suffixed by `"/" + name`. Also used as `name_scope` when creating ops.
 
@@ -730,7 +779,8 @@ def _binary_logistic_head_with_sigmoid_cross_entropy_loss(
     An instance of `_Head` for binary classification.
 
   Raises:
-    ValueError: if `thresholds` contains a value outside of `(0, 1)`.
+    ValueError: If `thresholds` contains a value outside of `(0, 1)`.
+    ValueError: If `loss_reduction` is invalid.
   """
   thresholds = tuple(thresholds) if thresholds else tuple()
   if label_vocabulary is not None and not isinstance(label_vocabulary,
@@ -742,10 +792,14 @@ def _binary_logistic_head_with_sigmoid_cross_entropy_loss(
   for threshold in thresholds:
     if (threshold <= 0.0) or (threshold >= 1.0):
       raise ValueError('thresholds not in (0, 1): {}.'.format((thresholds,)))
+  if (loss_reduction not in losses.Reduction.all() or
+      loss_reduction == losses.Reduction.NONE):
+    raise ValueError('Invalid loss_reduction: {}'.format(loss_reduction))
   return _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(
       weight_column=weight_column,
       thresholds=thresholds,
       label_vocabulary=label_vocabulary,
+      loss_reduction=loss_reduction,
       name=name)
 
 
@@ -756,10 +810,12 @@ class _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(_Head):
                weight_column=None,
                thresholds=None,
                label_vocabulary=None,
+               loss_reduction=losses.Reduction.SUM,
                name=None):
     self._weight_column = weight_column
     self._thresholds = thresholds
     self._label_vocabulary = label_vocabulary
+    self._loss_reduction = loss_reduction
     self._name = name
 
   @property
@@ -771,10 +827,10 @@ class _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(_Head):
     return 1
 
   def _eval_metric_ops(self, labels, logits, logistic, class_ids, weights,
-                       weighted_sum_loss, example_weight_sum):
+                       unreduced_loss):
     with ops.name_scope(None, 'metrics',
                         (labels, logits, logistic, class_ids, weights,
-                         weighted_sum_loss, example_weight_sum)):
+                         unreduced_loss)):
       keys = metric_keys.MetricKeys
       labels_mean = _indicator_labels_mean(
           labels=labels, weights=weights, name=keys.LABEL_MEAN)
@@ -782,12 +838,8 @@ class _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(_Head):
           # Estimator already adds a metric for loss.
           _summary_key(self._name, keys.LOSS_MEAN):
               metrics_lib.mean(
-                  # Both values and weights here are reduced, scalar Tensors.
-                  # values is the actual mean we want -- weights represents the
-                  # total weight of the batch and is needed to calculate
-                  # update_op over many batches.
-                  values=(weighted_sum_loss / example_weight_sum),
-                  weights=example_weight_sum,
+                  values=unreduced_loss,
+                  weights=weights,
                   name=keys.LOSS_MEAN),
           _summary_key(self._name, keys.ACCURACY):
               metrics_lib.accuracy(
@@ -863,14 +915,12 @@ class _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(_Head):
         labels=labels, logits=logits)
     weights = _get_weights_and_check_match_logits(
         features=features, weight_column=self._weight_column, logits=logits)
-    weighted_sum_loss = losses.compute_weighted_loss(
-        unweighted_loss, weights=weights, reduction=losses.Reduction.SUM)
-    # _weights() can return 1.
-    example_weight_sum = math_ops.reduce_sum(
-        weights * array_ops.ones_like(unweighted_loss))
+    training_loss = losses.compute_weighted_loss(
+        unweighted_loss, weights=weights, reduction=self._loss_reduction)
     return LossSpec(
-        weighted_sum_loss=weighted_sum_loss,
-        example_weight_sum=example_weight_sum,
+        training_loss=training_loss,
+        unreduced_loss=unweighted_loss,
+        weights=weights,
         processed_labels=labels)
 
   def create_estimator_spec(
@@ -919,47 +969,55 @@ class _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(_Head):
                 _PREDICT_SERVING_KEY: export_output.PredictOutput(predictions)
             })
 
-      (weighted_sum_loss, example_weight_sum,
-       processed_labels) = self.create_loss(
-           features=features, mode=mode, logits=logits, labels=labels)
+      (training_loss, unreduced_loss, weights, processed_labels) = (
+          self.create_loss(
+              features=features, mode=mode, logits=logits, labels=labels))
 
       # Eval.
       if mode == model_fn.ModeKeys.EVAL:
-        weights = _get_weights_and_check_match_logits(
-            features=features, weight_column=self._weight_column, logits=logits)
         return model_fn.EstimatorSpec(
             mode=model_fn.ModeKeys.EVAL,
             predictions=predictions,
-            loss=weighted_sum_loss,
+            loss=training_loss,
             eval_metric_ops=self._eval_metric_ops(
                 labels=processed_labels,
                 logits=logits,
                 logistic=logistic,
                 class_ids=class_ids,
                 weights=weights,
-                weighted_sum_loss=weighted_sum_loss,
-                example_weight_sum=example_weight_sum))
+                unreduced_loss=unreduced_loss))
 
       # Train.
       if train_op_fn is None:
         raise ValueError('train_op_fn can not be None.')
+      # Only summarize mean_loss for SUM reduction to preserve backwards
+      # compatibility. Otherwise skip it to avoid unnecessary computation.
+      if self._loss_reduction == losses.Reduction.SUM:
+        example_weight_sum = math_ops.reduce_sum(
+            weights * array_ops.ones_like(unreduced_loss))
+        mean_loss = training_loss / example_weight_sum
+      else:
+        mean_loss = None
     with ops.name_scope(''):
       summary.scalar(
           _summary_key(self._name, metric_keys.MetricKeys.LOSS),
-          weighted_sum_loss)
-      summary.scalar(
-          _summary_key(self._name, metric_keys.MetricKeys.LOSS_MEAN),
-          weighted_sum_loss / example_weight_sum)
+          training_loss)
+      if mean_loss is not None:
+        summary.scalar(
+            _summary_key(self._name, metric_keys.MetricKeys.LOSS_MEAN),
+            mean_loss)
     return model_fn.EstimatorSpec(
         mode=model_fn.ModeKeys.TRAIN,
         predictions=predictions,
-        loss=weighted_sum_loss,
-        train_op=train_op_fn(weighted_sum_loss))
+        loss=training_loss,
+        train_op=train_op_fn(training_loss))
 
 
-def _regression_head_with_mean_squared_error_loss(weight_column=None,
-                                                  label_dimension=1,
-                                                  name=None):
+def _regression_head_with_mean_squared_error_loss(
+    weight_column=None,
+    label_dimension=1,
+    loss_reduction=losses.Reduction.SUM,
+    name=None):
   """Creates a `_Head` for regression using the `mean_squared_error` loss.
 
   The loss is the weighted sum over all input dimensions. Namely, if the input
@@ -985,27 +1043,42 @@ def _regression_head_with_mean_squared_error_loss(weight_column=None,
     label_dimension: Number of regression labels per example. This is the size
       of the last dimension of the labels `Tensor` (typically, this has shape
       `[batch_size, label_dimension]`).
+    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
+      reduce training loss over batch. Defaults to `SUM`.
     name: name of the head. If provided, summary and metrics keys will be
       suffixed by `"/" + name`. Also used as `name_scope` when creating ops.
 
   Returns:
     An instance of `_Head` for linear regression.
+
+  Raises:
+    ValueError: If `label_dimension` or `loss_reduction` is invalid.
   """
+  if (loss_reduction not in losses.Reduction.all() or
+      loss_reduction == losses.Reduction.NONE):
+    raise ValueError('Invalid loss_reduction: {}'.format(loss_reduction))
   return _RegressionHeadWithMeanSquaredErrorLoss(
       weight_column=weight_column,
       label_dimension=label_dimension,
+      loss_reduction=loss_reduction,
       name=name)
 
 
 class _RegressionHeadWithMeanSquaredErrorLoss(_Head):
   """`Head` for regression using the mean squared loss."""
 
-  def __init__(self, label_dimension, weight_column=None, name=None):
+  def __init__(
+      self,
+      label_dimension,
+      weight_column=None,
+      loss_reduction=losses.Reduction.SUM,
+      name=None):
     """`Head` for regression."""
     if label_dimension < 1:
       raise ValueError('Invalid label_dimension %s.' % label_dimension)
     self._logits_dimension = label_dimension
     self._weight_column = weight_column
+    self._loss_reduction = loss_reduction
     self._name = name
 
   @property
@@ -1029,14 +1102,12 @@ class _RegressionHeadWithMeanSquaredErrorLoss(_Head):
     weights = _get_weights_and_check_match_logits(
         features=features, weight_column=self._weight_column, logits=logits,
         allow_per_logit_weights=True)
-    weighted_sum_loss = losses.compute_weighted_loss(
-        unweighted_loss, weights=weights, reduction=losses.Reduction.SUM)
-    # _weights() can return 1.
-    example_weight_sum = math_ops.reduce_sum(
-        weights * array_ops.ones_like(unweighted_loss))
+    training_loss = losses.compute_weighted_loss(
+        unweighted_loss, weights=weights, reduction=self._loss_reduction)
     return LossSpec(
-        weighted_sum_loss=weighted_sum_loss,
-        example_weight_sum=example_weight_sum,
+        training_loss=training_loss,
+        unreduced_loss=unweighted_loss,
+        weights=weights,
         processed_labels=labels)
 
   def create_estimator_spec(
@@ -1074,7 +1145,7 @@ class _RegressionHeadWithMeanSquaredErrorLoss(_Head):
                 _PREDICT_SERVING_KEY: export_output.PredictOutput(predictions)
             })
 
-      weighted_sum_loss, example_weight_sum, _ = self.create_loss(
+      training_loss, unreduced_loss, weights, _ = self.create_loss(
           features=features, mode=mode, logits=logits, labels=labels)
 
       # Eval.
@@ -1083,34 +1154,39 @@ class _RegressionHeadWithMeanSquaredErrorLoss(_Head):
         eval_metric_ops = {
             _summary_key(self._name, metric_keys.MetricKeys.LOSS_MEAN):
                 metrics_lib.mean(
-                    # Both values and weights here are reduced, scalar Tensors.
-                    # values is the actual mean we want -- weights represents
-                    # the total weight of the batch and is needed to calculate
-                    # update_op over many batches.
-                    values=(weighted_sum_loss / example_weight_sum),
-                    weights=example_weight_sum)
+                    values=unreduced_loss,
+                    weights=weights)
         }
         return model_fn.EstimatorSpec(
             mode=model_fn.ModeKeys.EVAL,
             predictions=predictions,
-            loss=weighted_sum_loss,
+            loss=training_loss,
             eval_metric_ops=eval_metric_ops)
 
       # Train.
       if train_op_fn is None:
         raise ValueError('train_op_fn can not be None.')
+      # Only summarize mean_loss for SUM reduction to preserve backwards
+      # compatibility. Otherwise skip it to avoid unnecessary computation.
+      if self._loss_reduction == losses.Reduction.SUM:
+        example_weight_sum = math_ops.reduce_sum(
+            weights * array_ops.ones_like(unreduced_loss))
+        mean_loss = training_loss / example_weight_sum
+      else:
+        mean_loss = None
     with ops.name_scope(''):
       summary.scalar(
           _summary_key(self._name, metric_keys.MetricKeys.LOSS),
-          weighted_sum_loss)
-      summary.scalar(
-          _summary_key(self._name, metric_keys.MetricKeys.LOSS_MEAN),
-          weighted_sum_loss / example_weight_sum)
+          training_loss)
+      if mean_loss is not None:
+        summary.scalar(
+            _summary_key(self._name, metric_keys.MetricKeys.LOSS_MEAN),
+            mean_loss)
     return model_fn.EstimatorSpec(
         mode=model_fn.ModeKeys.TRAIN,
         predictions=predictions,
-        loss=weighted_sum_loss,
-        train_op=train_op_fn(weighted_sum_loss))
+        loss=training_loss,
+        train_op=train_op_fn(training_loss))
 
 
 def _assert_range(labels, n_classes, message=None):
diff --git a/tensorflow/python/estimator/canned/head_test.py b/tensorflow/python/estimator/canned/head_test.py
index f3afd84125d8758fec61d9afc08a64a0210c1f6d..28b8e635fb483252edf68a4140bb57c3b99fb96a 100644
--- a/tensorflow/python/estimator/canned/head_test.py
+++ b/tensorflow/python/estimator/canned/head_test.py
@@ -39,6 +39,7 @@ from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import string_ops
+from tensorflow.python.ops.losses import losses
 from tensorflow.python.platform import test
 from tensorflow.python.saved_model import signature_constants
 from tensorflow.python.training import monitored_session
@@ -100,6 +101,16 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase):
       head_lib._multi_class_head_with_softmax_cross_entropy_loss(
           n_classes=2)
 
+  def test_invalid_loss_reduction(self):
+    with self.assertRaisesRegexp(
+        ValueError, r'Invalid loss_reduction: invalid_loss_reduction'):
+      head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+          n_classes=3, loss_reduction='invalid_loss_reduction')
+    with self.assertRaisesRegexp(
+        ValueError, r'Invalid loss_reduction: none'):
+      head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+          n_classes=3, loss_reduction=losses.Reduction.NONE)
+
   def test_invalid_logits_shape(self):
     n_classes = 3
     head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes)
@@ -149,7 +160,7 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase):
     # Dynamic shape.
     labels_placeholder = array_ops.placeholder(dtype=dtypes.int64)
     logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
-    weighted_sum_loss = head.create_loss(
+    training_loss = head.create_loss(
         features=features,
         mode=model_fn.ModeKeys.EVAL,
         logits=logits_placeholder,
@@ -158,7 +169,7 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase):
       with self.assertRaisesRegexp(
           errors.InvalidArgumentError,
           r'\[expected_labels_shape: \] \[2 1\] \[labels_shape: \] \[2 2\]'):
-        weighted_sum_loss.eval({
+        training_loss.eval({
             logits_placeholder: logits_2x3,
             labels_placeholder: labels_2x2
         })
@@ -203,21 +214,21 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase):
 
     labels_placeholder = array_ops.placeholder(dtype=dtypes.int64)
     logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
-    weighted_sum_loss = head.create_loss(
+    training_loss = head.create_loss(
         features={'x': np.array(((42.,),))},
         mode=model_fn.ModeKeys.EVAL,
         logits=logits_placeholder,
         labels=labels_placeholder)[0]
     with self.test_session():
       with self.assertRaisesOpError('Label IDs must < n_classes'):
-        weighted_sum_loss.eval({
+        training_loss.eval({
             labels_placeholder: labels_2x1_with_large_id,
             logits_placeholder: logits_2x3
         })
 
     with self.test_session():
       with self.assertRaisesOpError('Label IDs must >= 0'):
-        weighted_sum_loss.eval({
+        training_loss.eval({
             labels_placeholder: labels_2x1_with_negative_id,
             logits_placeholder: logits_2x3
         })
@@ -264,7 +275,7 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase):
     # Dynamic shape.
     labels_placeholder = array_ops.placeholder(dtype=dtypes.int64)
     logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
-    weighted_sum_loss = head.create_loss(
+    training_loss = head.create_loss(
         features=features,
         mode=model_fn.ModeKeys.EVAL,
         logits=logits_placeholder,
@@ -273,7 +284,7 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase):
       with self.assertRaisesRegexp(
           errors.InvalidArgumentError,
           r'\[expected_labels_shape: \] \[2 1\] \[labels_shape: \] \[3 1\]'):
-        weighted_sum_loss.eval({
+        training_loss.eval({
             labels_placeholder: values_3x1,
             logits_placeholder: values_2x3
         })
@@ -383,9 +394,9 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase):
     labels = np.array(((1,), (1,)), dtype=np.int64)
     features = {'x': np.array(((42,),), dtype=np.int32)}
     # loss = cross_entropy(labels, logits) = [10, 0].
-    expected_weighted_sum_loss = 10.
+    expected_training_loss = 10.
     # Create loss.
-    weighted_sum_loss = head.create_loss(
+    training_loss = head.create_loss(
         features=features,
         mode=model_fn.ModeKeys.EVAL,
         logits=logits,
@@ -393,10 +404,7 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase):
     with self.test_session():
       _initialize_variables(self, monitored_session.Scaffold())
       self.assertAllClose(
-          expected_weighted_sum_loss,
-          weighted_sum_loss.eval(),
-          rtol=1e-2,
-          atol=1e-2)
+          expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2)
 
   def test_eval_labels_none(self):
     """Tests that error is raised when labels is None."""
@@ -476,6 +484,52 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase):
     ]
     self.assertItemsEqual(expected_metric_keys, spec.eval_metric_ops.keys())
 
+  def test_eval_with_regularization_losses(self):
+    n_classes = 3
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes, loss_reduction=losses.Reduction.MEAN)
+    logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32)
+    labels = np.array(((1,), (1,)), dtype=np.int64)
+    features = {'x': np.array(((42,),), dtype=np.int32)}
+    regularization_losses = [1.5, 0.5]
+    expected_regularization_loss = 2.
+    # unregularized_loss = sum(cross_entropy(labels, logits)) / batch_size
+    #                    = sum(10, 0) / 2 = 5.
+    expected_unregularized_loss = 5.
+    expected_regularized_loss = (
+        expected_unregularized_loss + expected_regularization_loss)
+    # Create estimator spec.
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels,
+        regularization_losses=regularization_losses)
+
+    keys = metric_keys.MetricKeys
+    expected_metrics = {
+        keys.LOSS_MEAN: expected_unregularized_loss,
+        keys.LOSS_REGULARIZATION: expected_regularization_loss,
+        keys.ACCURACY: 0.5,  # 1 of 2 labels is correct.
+    }
+
+    # Assert predictions, loss, and metrics.
+    tol = 1e-2
+    with self.test_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNone(spec.scaffold.summary_op)
+      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
+      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
+      loss, metrics = sess.run((spec.loss, update_ops))
+      self.assertAllClose(expected_regularized_loss, loss, rtol=tol, atol=tol)
+      # Check results of both update (in `metrics`) and value ops.
+      self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol)
+      self.assertAllClose(
+          expected_metrics, {k: value_ops[k].eval()
+                             for k in value_ops},
+          rtol=tol,
+          atol=tol)
+
   def test_eval_with_label_vocabulary_create_loss(self):
     n_classes = 3
     head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
@@ -484,8 +538,8 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase):
     labels = [[b'iroh'], [b'iroh']]
     features = {'x': np.array(((42,),), dtype=np.int32)}
     # loss = cross_entropy(labels, logits) = [10, 0].
-    expected_weighted_sum_loss = 10.
-    weighted_sum_loss = head.create_loss(
+    expected_training_loss = 10.
+    training_loss = head.create_loss(
         features=features,
         mode=model_fn.ModeKeys.EVAL,
         logits=logits,
@@ -493,10 +547,7 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase):
     with self.test_session():
       _initialize_variables(self, monitored_session.Scaffold())
       self.assertAllClose(
-          expected_weighted_sum_loss,
-          weighted_sum_loss.eval(),
-          rtol=1e-2,
-          atol=1e-2)
+          expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2)
 
   def test_eval_with_label_vocabulary(self):
     n_classes = 3
@@ -584,27 +635,61 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase):
           rtol=tol, atol=tol)
 
   def test_train_create_loss(self):
-    n_classes = 3
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes)
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes=3)
 
     logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32)
     labels = np.array(((1,), (1,)), dtype=np.int64)
     features = {'x': np.array(((42,),), dtype=np.int32)}
 
-    # loss = cross_entropy(labels, logits) = [10, 0].
-    expected_weighted_sum_loss = 10.
-    weighted_sum_loss = head.create_loss(
+    # unreduced_loss = cross_entropy(labels, logits) = [10, 0].
+    expected_unreduced_loss = [[10.], [0.]]
+    # Weights default to 1.
+    expected_weights = 1.
+    # training_loss = 1 * 10 + 1 * 0
+    expected_training_loss = 10.
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
         features=features,
         mode=model_fn.ModeKeys.TRAIN,
         logits=logits,
-        labels=labels)[0]
+        labels=labels)
+    tol = 1e-2
+    with self.test_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(
+          expected_training_loss, training_loss.eval(), rtol=tol, atol=tol)
+      self.assertAllClose(
+          expected_unreduced_loss, unreduced_loss.eval(), rtol=tol, atol=tol)
+      self.assertAllClose(expected_weights, actual_weights)
+
+  def test_train_create_loss_loss_reduction(self):
+    """Tests create_loss with loss_reduction."""
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes=3, loss_reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS)
+
+    logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32)
+    labels = np.array(((1,), (1,)), dtype=np.int64)
+    features = {'x': np.array(((42,),), dtype=np.int32)}
+
+    # unreduced_loss = cross_entropy(labels, logits) = [10, 0].
+    expected_unreduced_loss = [[10.], [0.]]
+    # Weights default to 1.
+    expected_weights = 1.
+    # training_loss = 1 * 10 + 1 * 0 / num_nonzero_weights
+    expected_training_loss = 10. / 2.
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels)
+    tol = 1e-2
     with self.test_session():
       _initialize_variables(self, monitored_session.Scaffold())
       self.assertAllClose(
-          expected_weighted_sum_loss,
-          weighted_sum_loss.eval(),
-          rtol=1e-2,
-          atol=1e-2)
+          expected_training_loss, training_loss.eval(), rtol=tol, atol=tol)
+      self.assertAllClose(
+          expected_unreduced_loss, unreduced_loss.eval(), rtol=tol, atol=tol)
+      self.assertAllClose(expected_weights, actual_weights)
 
   def test_train_labels_none(self):
     """Tests that error is raised when labels is None."""
@@ -702,10 +787,55 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase):
               expected_loss / 2,
       }, summary_str, tol)
 
-  def test_train_with_one_dim_label_and_weights_create_loss(self):
+  def test_train_with_regularization_losses(self):
     n_classes = 3
     head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes, weight_column='label_weights')
+        n_classes, loss_reduction=losses.Reduction.MEAN)
+
+    logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32)
+    labels = np.array(((1,), (1,)), dtype=np.int64)
+    features = {'x': np.array(((42,),), dtype=np.int32)}
+    expected_train_result = 'my_train_op'
+    def _train_op_fn(loss):
+      return string_ops.string_join(
+          [constant_op.constant(expected_train_result),
+           string_ops.as_string(loss, precision=2)])
+
+    regularization_losses = [1.5, 0.5]
+    expected_regularization_loss = 2.
+    # unregularized_loss = sum(cross_entropy(labels, logits)) / batch_size
+    #                    = sum(10, 0) / 2 = 5.
+    # loss = unregularized_loss + regularization_loss = 7.
+    expected_loss = 7.
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn,
+        regularization_losses=regularization_losses)
+
+    # Assert predictions, loss, train_op, and summaries.
+    tol = 1e-2
+    with self.test_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNotNone(spec.scaffold.summary_op)
+      loss, train_result, summary_str = sess.run((spec.loss, spec.train_op,
+                                                  spec.scaffold.summary_op))
+      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
+      self.assertEqual(
+          six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)),
+          train_result)
+      _assert_simple_summaries(self, {
+          metric_keys.MetricKeys.LOSS: expected_loss,
+          metric_keys.MetricKeys.LOSS_REGULARIZATION: (
+              expected_regularization_loss),
+      }, summary_str, tol)
+
+  def test_train_one_dim_create_loss(self):
+    """Tests create_loss with 1D labels and weights (shape [batch_size])."""
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes=3, weight_column='label_weights')
 
     logits = np.array(((10, 0, 0), (0, 10, 0), (0, 0, 10),), dtype=np.float32)
     labels_rank_1 = np.array((1, 2, 2,), dtype=np.int64)
@@ -715,33 +845,30 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase):
         'label_weights': weights_rank_1
     }
 
-    # loss = cross_entropy(labels, logits) = [10, 10, 0].
-    # weighted sum loss = 1 * 10 + 2 * 10 + 3 * 0 = 30.
-    expected_weighted_sum_loss = 30.
-    # example weight sum = 1 + 2 + 3
-    expected_example_weight_sum = 6.
-    weighted_sum_loss, example_weight_sum, _ = head.create_loss(
+    # unreduced_loss = cross_entropy(labels, logits) = [10, 10, 0].
+    expected_unreduced_loss = [[10.], [10.], [0.]]
+    # weights are reshaped to [3, 1] to match logits.
+    expected_weights = [[1.], [2.], [3.]]
+    # training_loss = 1 * 10 + 2 * 10 + 3 * 0 = 30.
+    expected_training_loss = 30.
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
         features=features,
         mode=model_fn.ModeKeys.TRAIN,
         logits=logits,
         labels=labels_rank_1)
+    tol = 1e-2
     with self.test_session():
       _initialize_variables(self, monitored_session.Scaffold())
       self.assertAllClose(
-          expected_weighted_sum_loss,
-          weighted_sum_loss.eval(),
-          rtol=1e-2,
-          atol=1e-2)
+          expected_training_loss, training_loss.eval(), rtol=tol, atol=tol)
       self.assertAllClose(
-          expected_example_weight_sum,
-          example_weight_sum.eval(),
-          rtol=1e-2,
-          atol=1e-2)
+          expected_unreduced_loss, unreduced_loss.eval(), rtol=tol, atol=tol)
+      self.assertAllClose(expected_weights, actual_weights.eval())
 
-  def test_train_with_one_dim_label_and_weights(self):
-    n_classes = 3
+  def test_train_one_dim(self):
+    """Tests train with 1D labels and weights (shape [batch_size])."""
     head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes, weight_column='label_weights')
+        n_classes=3, weight_column='label_weights')
 
     logits = np.array(((10, 0, 0), (0, 10, 0), (0, 0, 10),), dtype=np.float32)
     labels_rank_1 = np.array((1, 2, 2,), dtype=np.int64)
@@ -803,8 +930,8 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase):
     labels = [[b'iroh'], [b'iroh']]
     features = {'x': np.array(((42,),), dtype=np.int32)}
     # loss = cross_entropy(labels, logits) = [10, 0].
-    expected_weighted_sum_loss = 10.
-    weighted_sum_loss = head.create_loss(
+    expected_training_loss = 10.
+    training_loss = head.create_loss(
         features=features,
         mode=model_fn.ModeKeys.TRAIN,
         logits=logits,
@@ -812,10 +939,7 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase):
     with self.test_session():
       _initialize_variables(self, monitored_session.Scaffold())
       self.assertAllClose(
-          expected_weighted_sum_loss,
-          weighted_sum_loss.eval(),
-          rtol=1e-2,
-          atol=1e-2)
+          expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2)
 
   def test_train_with_vocabulary(self):
     n_classes = 3
@@ -909,22 +1033,25 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase):
     labels = np.array([[[0], [1]], [[1], [2]]], dtype=np.int64)
     weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32)
 
-    # loss = cross_entropy(labels, logits) = [[0, 12], [0, 15]].
-    # weighted_sum_loss = 1*0 + 1.5*12 + 2*0 + 2.5*15 = 55.5
-    expected_weighted_sum_loss = 55.5
-    expected_example_weight_sum = np.sum(weights)
-    weighted_sum_loss, example_weight_sum, _ = head.create_loss(
+    # unreduced_loss = cross_entropy(labels, logits) = [[0, 12], [0, 15]].
+    expected_unreduced_loss = [[[0.], [12.]], [[0.], [15.]]]
+    # weights are reshaped to [2, 2, 1] to match logits.
+    expected_weights = [[[1.], [1.5]], [[2.], [2.5]]]
+    # training_loss = 1*0 + 1.5*12 + 2*0 + 2.5*15 = 55.5
+    expected_training_loss = 55.5
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
         features={'weights': weights},
         mode=model_fn.ModeKeys.TRAIN,
         logits=logits,
         labels=labels)
+    tol = 1e-2
     with self.test_session():
       _initialize_variables(self, monitored_session.Scaffold())
       self.assertAllClose(
-          expected_weighted_sum_loss, weighted_sum_loss.eval(),
-          rtol=1e-2, atol=1e-2)
+          expected_training_loss, training_loss.eval(), rtol=tol, atol=tol)
       self.assertAllClose(
-          expected_example_weight_sum, example_weight_sum.eval())
+          expected_unreduced_loss, unreduced_loss.eval(), rtol=tol, atol=tol)
+      self.assertAllClose(expected_weights, actual_weights.eval())
 
   def test_multi_dim_weighted_train(self):
     """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 2]."""
@@ -1067,6 +1194,16 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase):
       head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
           thresholds=(0.5, 1.))
 
+  def test_invalid_loss_reduction(self):
+    with self.assertRaisesRegexp(
+        ValueError, r'Invalid loss_reduction: invalid_loss_reduction'):
+      head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+          loss_reduction='invalid_loss_reduction')
+    with self.assertRaisesRegexp(
+        ValueError, r'Invalid loss_reduction: none'):
+      head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+          loss_reduction=losses.Reduction.NONE)
+
   def test_invalid_logits_shape(self):
     head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
     self.assertEqual(1, head.logits_dimension)
@@ -1112,7 +1249,7 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase):
     # Dynamic shape.
     labels_placeholder = array_ops.placeholder(dtype=dtypes.float32)
     logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
-    weighted_sum_loss = head.create_loss(
+    training_loss = head.create_loss(
         features={'x': np.array(((42.,),))},
         mode=model_fn.ModeKeys.EVAL,
         logits=logits_placeholder,
@@ -1121,7 +1258,7 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase):
       with self.assertRaisesRegexp(
           errors.InvalidArgumentError,
           r'\[expected_labels_shape: \] \[2 1\] \[labels_shape: \] \[2 2\]'):
-        weighted_sum_loss.eval({
+        training_loss.eval({
             logits_placeholder: logits_2x1,
             labels_placeholder: labels_2x2
         })
@@ -1153,7 +1290,7 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase):
     # Dynamic shape.
     labels_placeholder = array_ops.placeholder(dtype=dtypes.float32)
     logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
-    weighted_sum_loss = head.create_loss(
+    training_loss = head.create_loss(
         features={'x': values_2x1},
         mode=model_fn.ModeKeys.EVAL,
         logits=logits_placeholder,
@@ -1162,7 +1299,7 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase):
       with self.assertRaisesRegexp(
           errors.InvalidArgumentError,
           r'\[expected_labels_shape: \] \[3 1\] \[labels_shape: \] \[2 1\]'):
-        weighted_sum_loss.eval({
+        training_loss.eval({
             labels_placeholder: values_2x1,
             logits_placeholder: values_3x1
         })
@@ -1170,7 +1307,7 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase):
       with self.assertRaisesRegexp(
           errors.InvalidArgumentError,
           r'\[expected_labels_shape: \] \[2 1\] \[labels_shape: \] \[3 1\]'):
-        weighted_sum_loss.eval({
+        training_loss.eval({
             labels_placeholder: values_3x1,
             logits_placeholder: values_2x1
         })
@@ -1254,9 +1391,9 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase):
     features = {'x': np.array(((42,),), dtype=np.int32)}
 
     # loss = cross_entropy(labels, logits) = [0, 41].
-    expected_weighted_sum_loss = 41.
+    expected_training_loss = 41.
     # Create loss.
-    weighted_sum_loss = head.create_loss(
+    training_loss = head.create_loss(
         features=features,
         mode=model_fn.ModeKeys.EVAL,
         logits=logits,
@@ -1264,10 +1401,7 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase):
     with self.test_session():
       _initialize_variables(self, monitored_session.Scaffold())
       self.assertAllClose(
-          expected_weighted_sum_loss,
-          weighted_sum_loss.eval(),
-          rtol=1e-2,
-          atol=1e-2)
+          expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2)
 
   def test_eval_labels_none(self):
     """Tests that error is raised when labels is None."""
@@ -1358,14 +1492,14 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase):
     labels = [[b'iroh'], [b'iroh']]
     features = {'x': np.array(((42,),), dtype=np.int32)}
     # Create loss.
-    weighted_sum_loss = head.create_loss(
+    training_loss = head.create_loss(
         features=features,
         mode=model_fn.ModeKeys.EVAL,
         logits=logits,
         labels=labels)[0]
     with self.test_session():
       _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(41., weighted_sum_loss.eval())
+      self.assertAllClose(41., training_loss.eval())
 
   def test_eval_with_vocabulary_list(self):
     head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
@@ -1401,9 +1535,9 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase):
     # loss = -ln(probabilities[label[i]])) = [-ln(0.269), -ln(0.731)]
     #      = [1.31304389, 0.31334182]
     # weighted sum loss = 1.62638571
-    expected_weighted_sum_loss = 1.62638571
+    expected_training_loss = 1.62638571
     # Create loss.
-    weighted_sum_loss = head.create_loss(
+    training_loss = head.create_loss(
         features=features,
         mode=model_fn.ModeKeys.EVAL,
         logits=logits,
@@ -1411,10 +1545,7 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase):
     with self.test_session():
       _initialize_variables(self, monitored_session.Scaffold())
       self.assertAllClose(
-          expected_weighted_sum_loss,
-          weighted_sum_loss.eval(),
-          rtol=1e-2,
-          atol=1e-2)
+          expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2)
 
   def test_eval_with_thresholds(self):
     thresholds = [0.25, 0.5, 0.75]
@@ -1477,17 +1608,49 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase):
     logits = np.array(((45,), (-41,),), dtype=np.float32)
     labels = np.array(((1,), (1,),), dtype=np.float64)
     features = {'x': np.array(((42,),), dtype=np.float32)}
-    # loss = cross_entropy(labels, logits) = [0, 41].
-    expected_weighted_sum_loss = 41.
+    # unreduced_loss = cross_entropy(labels, logits) = [0, 41]
+    expected_unreduced_loss = [[0.], [41.]]
+    # weights default to 1.
+    expected_weights = 1.
+    # training loss = 1 * 0 + 1 * 41
+    expected_training_loss = 41.
     # Create loss.
-    weighted_sum_loss = head.create_loss(
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
         features=features,
         mode=model_fn.ModeKeys.TRAIN,
         logits=logits,
-        labels=labels)[0]
+        labels=labels)
+    with self.test_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(expected_training_loss, training_loss.eval())
+      self.assertAllClose(expected_unreduced_loss, unreduced_loss.eval())
+      self.assertAllClose(expected_weights, actual_weights)
+
+  def test_train_create_loss_loss_reduction(self):
+    """Tests create_loss with loss_reduction."""
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        loss_reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS)
+
+    logits = np.array(((45,), (-41,),), dtype=np.float32)
+    labels = np.array(((1,), (1,),), dtype=np.float64)
+    features = {'x': np.array(((42,),), dtype=np.float32)}
+    # unreduced_loss = cross_entropy(labels, logits) = [0, 41]
+    expected_unreduced_loss = [[0.], [41.]]
+    # weights default to 1.
+    expected_weights = 1.
+    # training loss = (1 * 0 + 1 * 41) / num_nonzero_weights
+    expected_training_loss = 41. / 2.
+    # Create loss.
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels)
     with self.test_session():
       _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(expected_weighted_sum_loss, weighted_sum_loss.eval())
+      self.assertAllClose(expected_training_loss, training_loss.eval())
+      self.assertAllClose(expected_unreduced_loss, unreduced_loss.eval())
+      self.assertAllClose(expected_weights, actual_weights)
 
   def test_train_labels_none(self):
     """Tests that error is raised when labels is None."""
@@ -1598,9 +1761,9 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase):
     #         -0.4 * log(sigmoid(-0.3)) -0.6 * log(sigmoid(0.3))]
     #      = [0.57407698418, 0.67435524446]
     # weighted sum loss = 0.57407698418 + 0.67435524446
-    expected_weighted_sum_loss = 1.24843222864
+    expected_training_loss = 1.24843222864
     # Create loss.
-    weighted_sum_loss = head.create_loss(
+    training_loss = head.create_loss(
         features=features,
         mode=model_fn.ModeKeys.TRAIN,
         logits=logits,
@@ -1608,10 +1771,7 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase):
     with self.test_session():
       _initialize_variables(self, monitored_session.Scaffold())
       self.assertAllClose(
-          expected_weighted_sum_loss,
-          weighted_sum_loss.eval(),
-          rtol=1e-2,
-          atol=1e-2)
+          expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2)
 
   def test_float_labels_train(self):
     head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
@@ -1658,9 +1818,9 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase):
     #         -0.4 * log(sigmoid(-0.3)) -0.6 * log(sigmoid(0.3))]
     #      = [0.57407698418, 0.67435524446]
     # weighted sum loss = 0.57407698418 + 0.67435524446
-    expected_weighted_sum_loss = 1.24843222864
+    expected_training_loss = 1.24843222864
     # Create loss.
-    weighted_sum_loss = head.create_loss(
+    training_loss = head.create_loss(
         features=features,
         mode=model_fn.ModeKeys.EVAL,
         logits=logits,
@@ -1668,10 +1828,7 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase):
     with self.test_session():
       _initialize_variables(self, monitored_session.Scaffold())
       self.assertAllClose(
-          expected_weighted_sum_loss,
-          weighted_sum_loss.eval(),
-          rtol=1e-2,
-          atol=1e-2)
+          expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2)
 
   def test_float_labels_eval(self):
     head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
@@ -1790,8 +1947,8 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase):
       self.assertAllClose(
           expected_metrics, {k: value_ops[k].eval() for k in value_ops})
 
-  def test_train_with_one_dim_labels_and_weights_create_loss(self):
-    """3 examples, 1 batch."""
+  def test_train_one_dim_create_loss(self):
+    """Tests create_loss with 1D labels and weights (shape [batch_size])."""
     head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
         weight_column='label_weights')
 
@@ -1803,13 +1960,14 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase):
         'x': np.array(((42.,), (43.,), (44.,)), dtype=np.float32),
         'label_weights': weights_rank_1,
     }
-    # losses = cross_entropy(labels, logits) = [0, 41, 44]
-    # weighted sum loss = 1 * 0 + .1 * 41 + 1.5 * 44
-    expected_weighted_sum_loss = 70.1
-    # example weight sum = 1 + 0.1 + 1.5
-    expected_example_weight_sum = 2.6
+    # unreduced_loss = cross_entropy(labels, logits) = [0, 41, 44]
+    expected_unreduced_loss = [[0.], [41.], [44.]]
+    # weights are reshaped to [3, 1] to match logits.
+    expected_weights = [[1.], [.1], [1.5]]
+    # training loss = 1 * 0 + .1 * 41 + 1.5 * 44
+    expected_training_loss = 70.1
     # Create loss.
-    weighted_sum_loss, example_weight_sum, _ = head.create_loss(
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
         features=features,
         mode=model_fn.ModeKeys.TRAIN,
         logits=logits,
@@ -1817,18 +1975,15 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase):
     with self.test_session():
       _initialize_variables(self, monitored_session.Scaffold())
       self.assertAllClose(
-          expected_weighted_sum_loss,
-          weighted_sum_loss.eval(),
-          rtol=1e-2,
-          atol=1e-2)
+          expected_training_loss, training_loss.eval(),
+          rtol=1e-2, atol=1e-2)
       self.assertAllClose(
-          expected_example_weight_sum,
-          example_weight_sum.eval(),
-          rtol=1e-2,
-          atol=1e-2)
+          expected_unreduced_loss, unreduced_loss.eval(),
+          rtol=1e-2, atol=1e-2)
+      self.assertAllClose(expected_weights, actual_weights.eval())
 
-  def test_train_with_one_dim_labels_and_weights(self):
-    """3 examples, 1 batch."""
+  def test_train_one_dim(self):
+    """Tests train with 1D labels and weights (shape [batch_size])."""
     head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
         weight_column='label_weights')
 
@@ -1933,12 +2088,14 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase):
     logits = np.array([[[10], [-10]], [[12], [-12]]], dtype=np.float32)
     labels = np.array([[[0], [0]], [[1], [1]]], dtype=np.float64)
     weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32)
-    # loss = cross_entropy(labels, logits) = [[10, 0], [0, 12]].
-    # weighted_sum_loss = 1*10 + 1.5*0 + 2*0 + 2.5*12 = 40
-    expected_weighted_sum_loss = 40.
-    expected_example_weight_sum = np.sum(weights)
+    # unreduced_loss = cross_entropy(labels, logits) = [[10, 0], [0, 12]].
+    expected_unreduced_loss = [[[10.], [0.]], [[0.], [12.]]]
+    # Weights are reshaped to [2, 2, 1] to match logits.
+    expected_weights = [[[1.], [1.5]], [[2.], [2.5]]]
+    # training_loss = 1*10 + 1.5*0 + 2*0 + 2.5*12 = 40
+    expected_training_loss = 40.
     # Create loss.
-    weighted_sum_loss, example_weight_sum, _ = head.create_loss(
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
         features={'weights': weights},
         mode=model_fn.ModeKeys.TRAIN,
         logits=logits,
@@ -1947,10 +2104,12 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase):
     with self.test_session():
       _initialize_variables(self, monitored_session.Scaffold())
       self.assertAllClose(
-          expected_weighted_sum_loss, weighted_sum_loss.eval(),
+          expected_training_loss, training_loss.eval(),
           rtol=tol, atol=tol)
       self.assertAllClose(
-          expected_example_weight_sum, example_weight_sum.eval())
+          expected_unreduced_loss, unreduced_loss.eval(),
+          rtol=tol, atol=tol)
+      self.assertAllClose(expected_weights, actual_weights.eval())
 
   def test_multi_dim_weighted_train(self):
     """Logits and labels of shape [2, 2, 1], weights [2, 2]."""
@@ -2096,6 +2255,16 @@ class RegressionHeadWithMeanSquaredErrorLossTest(test.TestCase):
     with self.assertRaisesRegexp(ValueError, r'Invalid label_dimension'):
       head_lib._regression_head_with_mean_squared_error_loss(label_dimension=0)
 
+  def test_invalid_loss_reduction(self):
+    with self.assertRaisesRegexp(
+        ValueError, r'Invalid loss_reduction: invalid_loss_reduction'):
+      head_lib._regression_head_with_mean_squared_error_loss(
+          loss_reduction='invalid_loss_reduction')
+    with self.assertRaisesRegexp(
+        ValueError, r'Invalid loss_reduction: none'):
+      head_lib._regression_head_with_mean_squared_error_loss(
+          loss_reduction=losses.Reduction.NONE)
+
   def test_invalid_logits(self):
     head = head_lib._regression_head_with_mean_squared_error_loss(
         label_dimension=3)
@@ -2154,7 +2323,7 @@ class RegressionHeadWithMeanSquaredErrorLossTest(test.TestCase):
             labels_placeholder: values_3d,
             logits_placeholder: values_1d
         })
-    weighted_sum_loss = head.create_loss(
+    training_loss = head.create_loss(
         features={'x': values_1d},
         mode=model_fn.ModeKeys.EVAL,
         logits=logits_placeholder,
@@ -2163,7 +2332,7 @@ class RegressionHeadWithMeanSquaredErrorLossTest(test.TestCase):
       with self.assertRaisesRegexp(
           errors.InvalidArgumentError,
           r'\[expected_labels_shape: \] \[2 3\] \[labels_shape: \] \[2 1\]'):
-        weighted_sum_loss.eval({
+        training_loss.eval({
             labels_placeholder: values_1d,
             logits_placeholder: values_3d
         })
@@ -2206,7 +2375,7 @@ class RegressionHeadWithMeanSquaredErrorLossTest(test.TestCase):
             labels_placeholder: values_3d,
             logits_placeholder: values_1d
         })
-    weighted_sum_loss = head.create_loss(
+    training_loss = head.create_loss(
         features={'x': values_1d},
         mode=model_fn.ModeKeys.TRAIN,
         logits=logits_placeholder,
@@ -2215,7 +2384,7 @@ class RegressionHeadWithMeanSquaredErrorLossTest(test.TestCase):
       with self.assertRaisesRegexp(
           errors.InvalidArgumentError,
           r'\[expected_labels_shape: \] \[2 3\] \[labels_shape: \] \[2 1\]'):
-        weighted_sum_loss.eval({
+        training_loss.eval({
             labels_placeholder: values_1d,
             logits_placeholder: values_3d
         })
@@ -2261,7 +2430,7 @@ class RegressionHeadWithMeanSquaredErrorLossTest(test.TestCase):
     labels = np.array(((43,), (44,),), dtype=np.int32)
     features = {'x': np.array(((42,),), dtype=np.float32)}
     # Create loss.
-    weighted_sum_loss = head.create_loss(
+    training_loss = head.create_loss(
         features=features,
         mode=model_fn.ModeKeys.EVAL,
         logits=logits,
@@ -2269,7 +2438,7 @@ class RegressionHeadWithMeanSquaredErrorLossTest(test.TestCase):
     with self.test_session():
       _initialize_variables(self, monitored_session.Scaffold())
       # loss = [(43-45)^2, (44-41)] = [4, 9]
-      self.assertAllClose(13., weighted_sum_loss.eval())
+      self.assertAllClose(13., training_loss.eval())
 
   def test_eval_labels_none(self):
     """Tests that error is raised when labels is None."""
@@ -2348,16 +2517,48 @@ class RegressionHeadWithMeanSquaredErrorLossTest(test.TestCase):
     logits = np.array(((45,), (41,),), dtype=np.float32)
     labels = np.array(((43,), (44,),), dtype=np.int32)
     features = {'x': np.array(((42,),), dtype=np.float32)}
+    # unreduced_loss = [(43-45)^2, (44-41)] = [4, 9]
+    expected_unreduced_loss = [[4.], [9.]]
+    # weights default to 1.
+    expected_weights = 1
+    # training_loss = 1 * 4 + 1 * 9 = 13
+    expected_training_loss = 13.
     # Create loss.
-    weighted_sum_loss = head.create_loss(
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
         features=features,
         mode=model_fn.ModeKeys.TRAIN,
         logits=logits,
-        labels=labels)[0]
+        labels=labels)
     with self.test_session():
       _initialize_variables(self, monitored_session.Scaffold())
-      # loss = [(43-45)^2, (44-41)] = [4, 9]
-      self.assertAllClose(13., weighted_sum_loss.eval())
+      self.assertAllClose(expected_training_loss, training_loss.eval())
+      self.assertAllClose(expected_unreduced_loss, unreduced_loss.eval())
+      self.assertAllClose(expected_weights, actual_weights)
+
+  def test_train_create_loss_loss_reduction(self):
+    """Tests create_loss with loss_reduction."""
+    head = head_lib._regression_head_with_mean_squared_error_loss(
+        loss_reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS)
+    logits = np.array(((45,), (41,),), dtype=np.float32)
+    labels = np.array(((43,), (44,),), dtype=np.int32)
+    features = {'x': np.array(((42,),), dtype=np.float32)}
+    # unreduced_loss = [(43-45)^2, (44-41)] = [4, 9]
+    expected_unreduced_loss = [[4.], [9.]]
+    # weights default to 1.
+    expected_weights = 1
+    # training_loss = (1 * 4 + 1 * 9) / num_nonzero_weights
+    expected_training_loss = 13. / 2.
+    # Create loss.
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels)
+    with self.test_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(expected_training_loss, training_loss.eval())
+      self.assertAllClose(expected_unreduced_loss, unreduced_loss.eval())
+      self.assertAllClose(expected_weights, actual_weights)
 
   def test_train_labels_none(self):
     """Tests that error is raised when labels is None."""
@@ -2588,34 +2789,35 @@ class RegressionHeadWithMeanSquaredErrorLossTest(test.TestCase):
           metric_keys.MetricKeys.LOSS_MEAN: 39.0769231,
       }, summary_str)
 
-  def test_test_with_one_dim_label_and_weight_create_loss(self):
-    """1d label, 3 examples, 1 batch."""
+  def test_train_one_dim_create_loss(self):
+    """Tests create_loss with 1D labels and weights (shape [batch_size])."""
     head = head_lib._regression_head_with_mean_squared_error_loss(
         weight_column='label_weights')
     logits = np.array(((45,), (41,), (44,)), dtype=np.float32)
     x_feature_rank_1 = np.array((42., 43., 44.,), dtype=np.float32)
     weight_rank_1 = np.array((1., .1, 1.5,), dtype=np.float64)
     labels_rank_1 = np.array((35., 42., 45.,))
-    # loss = [(35-45)^2, (42-41)^2, (45-44)^2] = [100, 1, 1].
-    # weighted sum loss = 100 * 1 + 1 * .1 + 1.5 * 1 = 101.6
-    expected_unreduced_loss = 101.6
-    # example weight sum = 1 + 0.1 + 1.5
-    expected_example_weight_sum = 2.6
+    # unreduced_loss = [(35-45)^2, (42-41)^2, (45-44)^2] = [100, 1, 1].
+    expected_unreduced_loss = [[100.], [1.], [1.]]
+    # weights are reshaped to [3, 1] to match logits.
+    expected_weights = [[1.], [.1], [1.5]]
+    # training_loss = 100 * 1 + 1 * .1 + 1.5 * 1 = 101.6
+    expected_training_loss = 101.6
     features = {'x': x_feature_rank_1, 'label_weights': weight_rank_1}
     # Create loss.
-    weighted_sum_loss, example_weight_sum, _ = head.create_loss(
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
         features=features,
         mode=model_fn.ModeKeys.TRAIN,
         logits=logits,
         labels=labels_rank_1)
     with self.test_session():
       _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(expected_unreduced_loss, weighted_sum_loss.eval())
-      self.assertAllClose(expected_example_weight_sum,
-                          example_weight_sum.eval())
+      self.assertAllClose(expected_training_loss, training_loss.eval())
+      self.assertAllClose(expected_unreduced_loss, unreduced_loss.eval())
+      self.assertAllClose(expected_weights, actual_weights.eval())
 
-  def test_with_one_dim_label_and_weight(self):
-    """1d label, 3 examples, 1 batch."""
+  def test_train_one_dim(self):
+    """Tests train with 1D labels and weights (shape [batch_size])."""
     head = head_lib._regression_head_with_mean_squared_error_loss(
         weight_column='label_weights')
     self.assertEqual(1, head.logits_dimension)
@@ -2683,7 +2885,7 @@ class RegressionHeadWithMeanSquaredErrorLossTest(test.TestCase):
         'label_weights': np.array(((1., .1, 1.5),))
     }
     # Create loss.
-    weighted_sum_loss = head.create_loss(
+    training_loss = head.create_loss(
         features=features,
         mode=model_fn.ModeKeys.EVAL,
         logits=logits,
@@ -2692,7 +2894,7 @@ class RegressionHeadWithMeanSquaredErrorLossTest(test.TestCase):
       _initialize_variables(self, monitored_session.Scaffold())
       # loss = [(35-45)^2, (42-41)^2, (45-44)^2] = [100, 1, 1].
       # weighted sum loss = 1 * 100 + .1 * 1 + 1.5 * 1 = 101.6
-      self.assertAllClose(101.6, weighted_sum_loss.eval())
+      self.assertAllClose(101.6, training_loss.eval())
 
   def test_weighted_multi_value_eval(self):
     """3d label, 1 example, 1 batch."""
@@ -2752,7 +2954,7 @@ class RegressionHeadWithMeanSquaredErrorLossTest(test.TestCase):
         'label_weights': np.array(((1., .1, 1.5),))
     }
     # Create loss.
-    weighted_sum_loss = head.create_loss(
+    training_loss = head.create_loss(
         features=features,
         mode=model_fn.ModeKeys.TRAIN,
         logits=logits,
@@ -2761,7 +2963,7 @@ class RegressionHeadWithMeanSquaredErrorLossTest(test.TestCase):
       _initialize_variables(self, monitored_session.Scaffold())
       # loss = [(35-45)^2, (42-41)^2, (45-44)^2] = [100, 1, 1].
       # weighted sum loss = 1 * 100 + .1 * 1 + 1.5 * 1 = 101.6
-      self.assertAllClose(101.6, weighted_sum_loss.eval())
+      self.assertAllClose(101.6, training_loss.eval())
 
   def test_weighted_multi_value_train(self):
     """3d label, 1 example, 1 batch."""
@@ -2943,24 +3145,26 @@ class RegressionHeadWithMeanSquaredErrorLossTest(test.TestCase):
     labels = np.array([[[01., 02., 03.], [12., 13., 14.]],
                        [[23., 24., 25.], [34., 35., 36.]]])
     weights = np.array([[1., 1.5], [2., 2.5]])
-    expected_weighted_sum_loss = np.sum(
+    expected_unreduced_loss = [[[1., 1., 1.], [4., 4., 4.]],
+                               [[9., 9., 9.], [16., 16., 16.]]]
+    expected_training_loss = np.sum(
         np.array([[[1. * x for x in [1., 1., 1.]],
                    [1.5 * x for x in [4., 4., 4.]]],
                   [[2. * x for x in [9., 9., 9.]],
                    [2.5 * x for x in [16., 16., 16.]]]]))
-    # Weights are expanded to [2, 2, label_dimension].
-    expected_example_weight_sum = np.sum(weights) * label_dimension
+    # Weights are expanded to [2, 2, 1] to match logits.
+    expected_weights = [[[1.], [1.5]], [[2.], [2.5]]]
     # Create loss.
-    weighted_sum_loss, example_weight_sum, _ = head.create_loss(
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
         features={'label_weights': weights},
         mode=model_fn.ModeKeys.TRAIN,
         logits=logits,
         labels=labels)
     with self.test_session():
       _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(expected_weighted_sum_loss, weighted_sum_loss.eval())
-      self.assertAllClose(
-          expected_example_weight_sum, example_weight_sum.eval())
+      self.assertAllClose(expected_training_loss, training_loss.eval())
+      self.assertAllClose(expected_unreduced_loss, unreduced_loss.eval())
+      self.assertAllClose(expected_weights, actual_weights.eval())
 
   def test_multi_dim_weighted_train(self):
     """Logits, labels of shape [2, 2, 3], weight shape [2, 2]."""
diff --git a/tensorflow/python/estimator/canned/linear.py b/tensorflow/python/estimator/canned/linear.py
index 8658ee38e99a5a6ba16560774302a1d6de8bc49e..97cfd24a101edbb88bca54fe3e213d126002779b 100644
--- a/tensorflow/python/estimator/canned/linear.py
+++ b/tensorflow/python/estimator/canned/linear.py
@@ -23,11 +23,15 @@ import math
 import six
 
 from tensorflow.python.estimator import estimator
+from tensorflow.python.estimator import warm_starting_util
 from tensorflow.python.estimator.canned import head as head_lib
 from tensorflow.python.estimator.canned import optimizers
 from tensorflow.python.feature_column import feature_column as feature_column_lib
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import nn
 from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.ops import variable_scope
+from tensorflow.python.summary import summary
 from tensorflow.python.training import ftrl
 from tensorflow.python.training import training_util
 
@@ -42,6 +46,26 @@ def _get_default_optimizer(feature_columns):
   return ftrl.FtrlOptimizer(learning_rate=learning_rate)
 
 
+def _compute_fraction_of_zero(cols_to_vars):
+  """Given a linear cols_to_vars dict, compute the fraction of zero weights.
+
+  Args:
+    cols_to_vars: A dictionary mapping FeatureColumns to lists of tf.Variables
+      like one returned from feature_column_lib.linear_model.
+
+  Returns:
+    The fraction of zeros (sparsity) in the linear model.
+  """
+  all_weight_vars = []
+  for var_or_var_list in cols_to_vars.values():
+    # Skip empty-lists associated with columns that created no Variables.
+    if var_or_var_list:
+      all_weight_vars += [
+          array_ops.reshape(var, [-1]) for var in var_or_var_list
+      ]
+  return nn.zero_fraction(array_ops.concat(all_weight_vars, axis=0))
+
+
 def _linear_logit_fn_builder(units, feature_columns):
   """Function builder for a linear logit_fn.
 
@@ -66,8 +90,22 @@ def _linear_logit_fn_builder(units, feature_columns):
     Returns:
       A `Tensor` representing the logits.
     """
-    return feature_column_lib.linear_model(
-        features=features, feature_columns=feature_columns, units=units)
+    cols_to_vars = {}
+    logits = feature_column_lib.linear_model(
+        features=features,
+        feature_columns=feature_columns,
+        units=units,
+        cols_to_vars=cols_to_vars)
+    bias = cols_to_vars.pop('bias')
+    if units > 1:
+      summary.histogram('bias', bias)
+    else:
+      # If units == 1, the bias value is a length-1 list of a scalar Tensor,
+      # so we should provide a scalar summary.
+      summary.scalar('bias', bias[0][0])
+    summary.scalar('fraction_of_zero_weights',
+                   _compute_fraction_of_zero(cols_to_vars))
+    return logits
 
   return linear_logit_fn
 
@@ -98,6 +136,7 @@ def _linear_model_fn(features, labels, mode, head, feature_columns, optimizer,
   if not isinstance(features, dict):
     raise ValueError('features should be a dictionary of `Tensor`s. '
                      'Given type: {}'.format(type(features)))
+
   optimizer = optimizers.get_optimizer_instance(
       optimizer or _get_default_optimizer(feature_columns),
       learning_rate=_LEARNING_RATE)
@@ -159,6 +198,13 @@ class LinearClassifier(estimator.Estimator):
         l1_regularization_strength=0.001
       ))
 
+  # Or estimator with warm-starting from a previous checkpoint.
+  estimator = LinearClassifier(
+      feature_columns=[categorical_column_a,
+                       categorical_feature_a_x_categorical_feature_b],
+      warm_start_from="/path/to/checkpoint/dir")
+
+
   # Input builders
   def input_fn_train: # returns x, y (where y represents label's class index).
     ...
@@ -198,7 +244,8 @@ class LinearClassifier(estimator.Estimator):
                label_vocabulary=None,
                optimizer='Ftrl',
                config=None,
-               partitioner=None):
+               partitioner=None,
+               warm_start_from=None):
     """Construct a `LinearClassifier` estimator object.
 
     Args:
@@ -230,6 +277,11 @@ class LinearClassifier(estimator.Estimator):
         to FTRL optimizer.
       config: `RunConfig` object to configure the runtime settings.
       partitioner: Optional. Partitioner for input layer.
+      warm_start_from: A string filepath to a checkpoint to warm-start from, or
+        a `WarmStartSettings` object to fully configure warm-starting.  If the
+        string filepath is provided instead of a `WarmStartSettings`, then all
+        weights and biases are warm-started, and it is assumed that vocabularies
+        and Tensor names are unchanged.
 
     Returns:
       A `LinearClassifier` estimator.
@@ -245,8 +297,10 @@ class LinearClassifier(estimator.Estimator):
       head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
           n_classes, weight_column=weight_column,
           label_vocabulary=label_vocabulary)
+
     def _model_fn(features, labels, mode, config):
-      return _linear_model_fn(
+      """Call the defined shared _linear_model_fn and possibly warm-start."""
+      estimator_spec = _linear_model_fn(
           features=features,
           labels=labels,
           mode=mode,
@@ -255,6 +309,15 @@ class LinearClassifier(estimator.Estimator):
           optimizer=optimizer,
           partitioner=partitioner,
           config=config)
+      # pylint: disable=protected-access
+      warm_start_settings = warm_starting_util._get_default_warm_start_settings(
+          warm_start_from)
+      if warm_start_settings:
+        warm_starting_util._warm_start(warm_start_settings)
+      # pylint: enable=protected-access
+
+      return estimator_spec
+
     super(LinearClassifier, self).__init__(
         model_fn=_model_fn,
         model_dir=model_dir,
@@ -279,6 +342,13 @@ class LinearRegressor(estimator.Estimator):
       feature_columns=[categorical_column_a,
                        categorical_feature_a_x_categorical_feature_b])
 
+  # Or estimator with warm-starting from a previous checkpoint.
+  estimator = LinearRegressor(
+      feature_columns=[categorical_column_a,
+                       categorical_feature_a_x_categorical_feature_b],
+      warm_start_from="/path/to/checkpoint/dir")
+
+
   # Input builders
   def input_fn_train: # returns x, y
     ...
@@ -317,7 +387,8 @@ class LinearRegressor(estimator.Estimator):
                weight_column=None,
                optimizer='Ftrl',
                config=None,
-               partitioner=None):
+               partitioner=None,
+               warm_start_from=None):
     """Initializes a `LinearRegressor` instance.
 
     Args:
@@ -341,11 +412,18 @@ class LinearRegressor(estimator.Estimator):
         to FTRL optimizer.
       config: `RunConfig` object to configure the runtime settings.
       partitioner: Optional. Partitioner for input layer.
+      warm_start_from: A string filepath to a checkpoint to warm-start from, or
+        a `WarmStartSettings` object to fully configure warm-starting.  If the
+        string filepath is provided instead of a `WarmStartSettings`, then all
+        weights and biases are warm-started, and it is assumed that vocabularies
+        and Tensor names are unchanged.
     """
     head = head_lib._regression_head_with_mean_squared_error_loss(  # pylint: disable=protected-access
         label_dimension=label_dimension, weight_column=weight_column)
+
     def _model_fn(features, labels, mode, config):
-      return _linear_model_fn(
+      """Call the defined shared _linear_model_fn and possibly warm-start."""
+      estimator_spec = _linear_model_fn(
           features=features,
           labels=labels,
           mode=mode,
@@ -354,6 +432,15 @@ class LinearRegressor(estimator.Estimator):
           optimizer=optimizer,
           partitioner=partitioner,
           config=config)
+      # pylint: disable=protected-access
+      warm_start_settings = warm_starting_util._get_default_warm_start_settings(
+          warm_start_from)
+      if warm_start_settings:
+        warm_starting_util._warm_start(warm_start_settings)
+      # pylint: enable=protected-access
+
+      return estimator_spec
+
     super(LinearRegressor, self).__init__(
         model_fn=_model_fn,
         model_dir=model_dir,
diff --git a/tensorflow/python/estimator/canned/linear_test.py b/tensorflow/python/estimator/canned/linear_test.py
index 907ab4801f451985c67da7ddd9d945775976bd07..59a230417d1692664ac3555cbf40cfa039c95be9 100644
--- a/tensorflow/python/estimator/canned/linear_test.py
+++ b/tensorflow/python/estimator/canned/linear_test.py
@@ -119,8 +119,6 @@ class LinearClassifierIntegrationTest(
 
 
 # Tests for Linear logit_fn.
-
-
 class LinearLogitFnTest(linear_testing_utils.BaseLinearLogitFnTest,
                         test.TestCase):
 
@@ -129,5 +127,15 @@ class LinearLogitFnTest(linear_testing_utils.BaseLinearLogitFnTest,
     linear_testing_utils.BaseLinearLogitFnTest.__init__(self)
 
 
+# Tests for warm-starting with Linear logit_fn.
+class LinearWarmStartingTest(linear_testing_utils.BaseLinearWarmStartingTest,
+                             test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearWarmStartingTest.__init__(
+        self, _linear_classifier_fn, _linear_regressor_fn)
+
+
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/estimator/canned/linear_testing_utils.py b/tensorflow/python/estimator/canned/linear_testing_utils.py
index 138b75a9d6b03bf29b94866a1024b3fb7ae7f075..cccb9af4b21daca45b9db5b921cd6a0a726edb7e 100644
--- a/tensorflow/python/estimator/canned/linear_testing_utils.py
+++ b/tensorflow/python/estimator/canned/linear_testing_utils.py
@@ -31,6 +31,7 @@ from tensorflow.core.example import feature_pb2
 from tensorflow.python.client import session as tf_session
 from tensorflow.python.estimator import estimator
 from tensorflow.python.estimator import run_config
+from tensorflow.python.estimator import warm_starting_util
 from tensorflow.python.estimator.canned import linear
 from tensorflow.python.estimator.canned import metric_keys
 from tensorflow.python.estimator.export import export
@@ -43,17 +44,20 @@ from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import data_flow_ops
+from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import parsing_ops
+from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables
+from tensorflow.python.ops import variables as variables_lib
 from tensorflow.python.platform import gfile
 from tensorflow.python.platform import test
 from tensorflow.python.summary.writer import writer_cache
 from tensorflow.python.training import checkpoint_utils
+from tensorflow.python.training import gradient_descent
 from tensorflow.python.training import input as input_lib
-from tensorflow.python.training import optimizer
+from tensorflow.python.training import optimizer as optimizer_lib
 from tensorflow.python.training import queue_runner
 from tensorflow.python.training import saver
 from tensorflow.python.training import session_run_hook
@@ -74,6 +78,7 @@ except ImportError:
 # Names of variables created by model.
 AGE_WEIGHT_NAME = 'linear/linear_model/age/weights'
 HEIGHT_WEIGHT_NAME = 'linear/linear_model/height/weights'
+OCCUPATION_WEIGHT_NAME = 'linear/linear_model/occupation/weights'
 BIAS_NAME = 'linear/linear_model/bias_weights'
 LANGUAGE_WEIGHT_NAME = 'linear/linear_model/language/weights'
 
@@ -94,7 +99,7 @@ def assert_close(expected, actual, rtol=1e-04, name='assert_close'):
 
 
 def save_variables_to_ckpt(model_dir):
-  init_all_op = [variables.global_variables_initializer()]
+  init_all_op = [variables_lib.global_variables_initializer()]
   with tf_session.Session() as sess:
     sess.run(init_all_op)
     saver.Saver().save(sess, os.path.join(model_dir, 'model.ckpt'))
@@ -139,7 +144,7 @@ class CheckPartitionerVarHook(session_run_hook.SessionRunHook):
       partitioned_weight = variable_scope.get_variable(
           self._var_name, shape=(self._var_dim, 1))
       self._test_case.assertTrue(
-          isinstance(partitioned_weight, variables.PartitionedVariable))
+          isinstance(partitioned_weight, variables_lib.PartitionedVariable))
       for part in partitioned_weight:
         self._test_case.assertEqual(self._var_dim // self._partitions,
                                     part.get_shape()[0])
@@ -240,9 +245,9 @@ class BaseLinearRegressorEvaluationTest(object):
 
   def test_evaluation_for_simple_data(self):
     with ops.Graph().as_default():
-      variables.Variable([[11.0]], name=AGE_WEIGHT_NAME)
-      variables.Variable([2.0], name=BIAS_NAME)
-      variables.Variable(
+      variables_lib.Variable([[11.0]], name=AGE_WEIGHT_NAME)
+      variables_lib.Variable([2.0], name=BIAS_NAME)
+      variables_lib.Variable(
           100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
       save_variables_to_ckpt(self._model_dir)
 
@@ -262,9 +267,9 @@ class BaseLinearRegressorEvaluationTest(object):
   def test_evaluation_batch(self):
     """Tests evaluation for batch_size==2."""
     with ops.Graph().as_default():
-      variables.Variable([[11.0]], name=AGE_WEIGHT_NAME)
-      variables.Variable([2.0], name=BIAS_NAME)
-      variables.Variable(
+      variables_lib.Variable([[11.0]], name=AGE_WEIGHT_NAME)
+      variables_lib.Variable([2.0], name=BIAS_NAME)
+      variables_lib.Variable(
           100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
       save_variables_to_ckpt(self._model_dir)
 
@@ -287,9 +292,9 @@ class BaseLinearRegressorEvaluationTest(object):
   def test_evaluation_weights(self):
     """Tests evaluation with weights."""
     with ops.Graph().as_default():
-      variables.Variable([[11.0]], name=AGE_WEIGHT_NAME)
-      variables.Variable([2.0], name=BIAS_NAME)
-      variables.Variable(
+      variables_lib.Variable([[11.0]], name=AGE_WEIGHT_NAME)
+      variables_lib.Variable([2.0], name=BIAS_NAME)
+      variables_lib.Variable(
           100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
       save_variables_to_ckpt(self._model_dir)
 
@@ -318,10 +323,10 @@ class BaseLinearRegressorEvaluationTest(object):
     x_dim = 3
     label_dim = 2
     with ops.Graph().as_default():
-      variables.Variable(
+      variables_lib.Variable(
           [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], name=AGE_WEIGHT_NAME)
-      variables.Variable([7.0, 8.0], name=BIAS_NAME)
-      variables.Variable(100, name='global_step', dtype=dtypes.int64)
+      variables_lib.Variable([7.0, 8.0], name=BIAS_NAME)
+      variables_lib.Variable(100, name='global_step', dtype=dtypes.int64)
       save_variables_to_ckpt(self._model_dir)
 
     linear_regressor = self._linear_regressor_fn(
@@ -352,10 +357,10 @@ class BaseLinearRegressorEvaluationTest(object):
 
   def test_evaluation_for_multiple_feature_columns(self):
     with ops.Graph().as_default():
-      variables.Variable([[10.0]], name=AGE_WEIGHT_NAME)
-      variables.Variable([[2.0]], name=HEIGHT_WEIGHT_NAME)
-      variables.Variable([5.0], name=BIAS_NAME)
-      variables.Variable(
+      variables_lib.Variable([[10.0]], name=AGE_WEIGHT_NAME)
+      variables_lib.Variable([[2.0]], name=HEIGHT_WEIGHT_NAME)
+      variables_lib.Variable([5.0], name=BIAS_NAME)
+      variables_lib.Variable(
           100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
       save_variables_to_ckpt(self._model_dir)
 
@@ -401,9 +406,9 @@ class BaseLinearRegressorPredictTest(object):
   def test_1d(self):
     """Tests predict when all variables are one-dimensional."""
     with ops.Graph().as_default():
-      variables.Variable([[10.]], name='linear/linear_model/x/weights')
-      variables.Variable([.2], name=BIAS_NAME)
-      variables.Variable(100, name='global_step', dtype=dtypes.int64)
+      variables_lib.Variable([[10.]], name='linear/linear_model/x/weights')
+      variables_lib.Variable([.2], name=BIAS_NAME)
+      variables_lib.Variable(100, name='global_step', dtype=dtypes.int64)
       save_variables_to_ckpt(self._model_dir)
 
     linear_regressor = self._linear_regressor_fn(
@@ -428,12 +433,12 @@ class BaseLinearRegressorPredictTest(object):
     x_dim = 4
     feature_columns = (feature_column_lib.numeric_column('x', shape=(x_dim,)),)
     with ops.Graph().as_default():
-      variables.Variable(  # shape=[x_dim, label_dimension]
+      variables_lib.Variable(  # shape=[x_dim, label_dimension]
           [[1., 2., 3.], [2., 3., 4.], [3., 4., 5.], [4., 5., 6.]],
           name='linear/linear_model/x/weights')
-      variables.Variable(  # shape=[label_dimension]
+      variables_lib.Variable(  # shape=[label_dimension]
           [.2, .4, .6], name=BIAS_NAME)
-      variables.Variable(100, name='global_step', dtype=dtypes.int64)
+      variables_lib.Variable(100, name='global_step', dtype=dtypes.int64)
       save_variables_to_ckpt(self._model_dir)
 
     linear_regressor = self._linear_regressor_fn(
@@ -457,10 +462,10 @@ class BaseLinearRegressorPredictTest(object):
   def testTwoFeatureColumns(self):
     """Tests predict with two feature columns."""
     with ops.Graph().as_default():
-      variables.Variable([[10.]], name='linear/linear_model/x0/weights')
-      variables.Variable([[20.]], name='linear/linear_model/x1/weights')
-      variables.Variable([.2], name=BIAS_NAME)
-      variables.Variable(100, name='global_step', dtype=dtypes.int64)
+      variables_lib.Variable([[10.]], name='linear/linear_model/x0/weights')
+      variables_lib.Variable([[20.]], name='linear/linear_model/x1/weights')
+      variables_lib.Variable([.2], name=BIAS_NAME)
+      variables_lib.Variable(100, name='global_step', dtype=dtypes.int64)
       save_variables_to_ckpt(self._model_dir)
 
     linear_regressor = self._linear_regressor_fn(
@@ -690,8 +695,8 @@ class BaseLinearRegressorTrainingTest(object):
         return control_flow_ops.no_op()
 
     mock_optimizer = test.mock.NonCallableMock(
-        spec=optimizer.Optimizer,
-        wraps=optimizer.Optimizer(use_locking=False, name='my_optimizer'))
+        spec=optimizer_lib.Optimizer,
+        wraps=optimizer_lib.Optimizer(use_locking=False, name='my_optimizer'))
     mock_optimizer.minimize = test.mock.MagicMock(wraps=_minimize)
 
     # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks.
@@ -810,9 +815,9 @@ class BaseLinearRegressorTrainingTest(object):
     bias = 5.0
     initial_global_step = 100
     with ops.Graph().as_default():
-      variables.Variable([[age_weight]], name=AGE_WEIGHT_NAME)
-      variables.Variable([bias], name=BIAS_NAME)
-      variables.Variable(
+      variables_lib.Variable([[age_weight]], name=AGE_WEIGHT_NAME)
+      variables_lib.Variable([bias], name=BIAS_NAME)
+      variables_lib.Variable(
           initial_global_step,
           name=ops.GraphKeys.GLOBAL_STEP,
           dtype=dtypes.int64)
@@ -843,9 +848,9 @@ class BaseLinearRegressorTrainingTest(object):
     bias = 5.0
     initial_global_step = 100
     with ops.Graph().as_default():
-      variables.Variable([[age_weight]], name=AGE_WEIGHT_NAME)
-      variables.Variable([bias], name=BIAS_NAME)
-      variables.Variable(
+      variables_lib.Variable([[age_weight]], name=AGE_WEIGHT_NAME)
+      variables_lib.Variable([bias], name=BIAS_NAME)
+      variables_lib.Variable(
           initial_global_step,
           name=ops.GraphKeys.GLOBAL_STEP,
           dtype=dtypes.int64)
@@ -910,8 +915,8 @@ class BaseLinearClassifierTrainingTest(object):
         return state_ops.assign_add(global_step, 1).op
 
     mock_optimizer = test.mock.NonCallableMock(
-        spec=optimizer.Optimizer,
-        wraps=optimizer.Optimizer(use_locking=False, name='my_optimizer'))
+        spec=optimizer_lib.Optimizer,
+        wraps=optimizer_lib.Optimizer(use_locking=False, name='my_optimizer'))
     mock_optimizer.minimize = test.mock.MagicMock(wraps=_minimize)
 
     # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks.
@@ -1124,10 +1129,11 @@ class BaseLinearClassifierTrainingTest(object):
     bias = [-35.0] if n_classes == 2 else [-35.0] * n_classes
     initial_global_step = 100
     with ops.Graph().as_default():
-      variables.Variable(age_weight, name=AGE_WEIGHT_NAME)
-      variables.Variable(bias, name=BIAS_NAME)
-      variables.Variable(
-          initial_global_step, name=ops.GraphKeys.GLOBAL_STEP,
+      variables_lib.Variable(age_weight, name=AGE_WEIGHT_NAME)
+      variables_lib.Variable(bias, name=BIAS_NAME)
+      variables_lib.Variable(
+          initial_global_step,
+          name=ops.GraphKeys.GLOBAL_STEP,
           dtype=dtypes.int64)
       save_variables_to_ckpt(self._model_dir)
 
@@ -1184,10 +1190,11 @@ class BaseLinearClassifierTrainingTest(object):
     bias = [-35.0]
     initial_global_step = 100
     with ops.Graph().as_default():
-      variables.Variable(age_weight, name=AGE_WEIGHT_NAME)
-      variables.Variable(bias, name=BIAS_NAME)
-      variables.Variable(
-          initial_global_step, name=ops.GraphKeys.GLOBAL_STEP,
+      variables_lib.Variable(age_weight, name=AGE_WEIGHT_NAME)
+      variables_lib.Variable(bias, name=BIAS_NAME)
+      variables_lib.Variable(
+          initial_global_step,
+          name=ops.GraphKeys.GLOBAL_STEP,
           dtype=dtypes.int64)
       save_variables_to_ckpt(self._model_dir)
 
@@ -1228,10 +1235,11 @@ class BaseLinearClassifierTrainingTest(object):
     bias = [-35.0] if n_classes == 2 else [-35.0] * n_classes
     initial_global_step = 100
     with ops.Graph().as_default():
-      variables.Variable(age_weight, name=AGE_WEIGHT_NAME)
-      variables.Variable(bias, name=BIAS_NAME)
-      variables.Variable(
-          initial_global_step, name=ops.GraphKeys.GLOBAL_STEP,
+      variables_lib.Variable(age_weight, name=AGE_WEIGHT_NAME)
+      variables_lib.Variable(bias, name=BIAS_NAME)
+      variables_lib.Variable(
+          initial_global_step,
+          name=ops.GraphKeys.GLOBAL_STEP,
           dtype=dtypes.int64)
       save_variables_to_ckpt(self._model_dir)
 
@@ -1310,9 +1318,9 @@ class BaseLinearClassifierEvaluationTest(object):
     bias = [-30.0] if n_classes == 2 else [-30.0] * n_classes
 
     with ops.Graph().as_default():
-      variables.Variable(age_weight, name=AGE_WEIGHT_NAME)
-      variables.Variable(bias, name=BIAS_NAME)
-      variables.Variable(
+      variables_lib.Variable(age_weight, name=AGE_WEIGHT_NAME)
+      variables_lib.Variable(bias, name=BIAS_NAME)
+      variables_lib.Variable(
           100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
       save_variables_to_ckpt(self._model_dir)
 
@@ -1372,10 +1380,11 @@ class BaseLinearClassifierEvaluationTest(object):
     bias = [-35.0] if n_classes == 2 else [-35.0] * n_classes
     initial_global_step = 100
     with ops.Graph().as_default():
-      variables.Variable(age_weight, name=AGE_WEIGHT_NAME)
-      variables.Variable(bias, name=BIAS_NAME)
-      variables.Variable(
-          initial_global_step, name=ops.GraphKeys.GLOBAL_STEP,
+      variables_lib.Variable(age_weight, name=AGE_WEIGHT_NAME)
+      variables_lib.Variable(bias, name=BIAS_NAME)
+      variables_lib.Variable(
+          initial_global_step,
+          name=ops.GraphKeys.GLOBAL_STEP,
           dtype=dtypes.int64)
       save_variables_to_ckpt(self._model_dir)
 
@@ -1445,10 +1454,11 @@ class BaseLinearClassifierEvaluationTest(object):
     bias = [-35.0] if n_classes == 2 else [-35.0] * n_classes
     initial_global_step = 100
     with ops.Graph().as_default():
-      variables.Variable(age_weight, name=AGE_WEIGHT_NAME)
-      variables.Variable(bias, name=BIAS_NAME)
-      variables.Variable(
-          initial_global_step, name=ops.GraphKeys.GLOBAL_STEP,
+      variables_lib.Variable(age_weight, name=AGE_WEIGHT_NAME)
+      variables_lib.Variable(bias, name=BIAS_NAME)
+      variables_lib.Variable(
+          initial_global_step,
+          name=ops.GraphKeys.GLOBAL_STEP,
           dtype=dtypes.int64)
       save_variables_to_ckpt(self._model_dir)
 
@@ -1539,9 +1549,9 @@ class BaseLinearClassifierPredictTest(object):
     bias = [10.0] if n_classes == 2 else [10.0] * n_classes
 
     with ops.Graph().as_default():
-      variables.Variable(age_weight, name=AGE_WEIGHT_NAME)
-      variables.Variable(bias, name=BIAS_NAME)
-      variables.Variable(100, name='global_step', dtype=dtypes.int64)
+      variables_lib.Variable(age_weight, name=AGE_WEIGHT_NAME)
+      variables_lib.Variable(bias, name=BIAS_NAME)
+      variables_lib.Variable(100, name='global_step', dtype=dtypes.int64)
       save_variables_to_ckpt(self._model_dir)
 
     est = self._linear_classifier_fn(
@@ -1815,12 +1825,12 @@ class BaseLinearLogitFnTest(object):
     with ops.Graph().as_default():
       logit_fn = linear._linear_logit_fn_builder(units=2, feature_columns=[age])
       logits = logit_fn(features={'age': [[23.], [31.]]})
-      with variable_scope.variable_scope('linear_model', reuse=True):
-        bias_var = variable_scope.get_variable('bias_weights')
+      bias_var = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES,
+                                    'linear_model/bias_weights')[0]
       age_var = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES,
                                    'linear_model/age')[0]
       with tf_session.Session() as sess:
-        sess.run([variables.global_variables_initializer()])
+        sess.run([variables_lib.global_variables_initializer()])
         self.assertAllClose([[0., 0.], [0., 0.]], logits.eval())
         sess.run(bias_var.assign([10., 5.]))
         self.assertAllClose([[10., 5.], [10., 5.]], logits.eval())
@@ -1828,3 +1838,262 @@ class BaseLinearLogitFnTest(object):
         # [2 * 23 + 10, 3 * 23 + 5] = [56, 74].
         # [2 * 31 + 10, 3 * 31 + 5] = [72, 98]
         self.assertAllClose([[56., 74.], [72., 98.]], logits.eval())
+
+  def test_compute_fraction_of_zero(self):
+    """Tests the calculation of sparsity."""
+    age = feature_column_lib.numeric_column('age')
+    occupation = feature_column_lib.categorical_column_with_hash_bucket(
+        'occupation', hash_bucket_size=5)
+    with ops.Graph().as_default():
+      cols_to_vars = {}
+      feature_column_lib.linear_model(
+          features={
+              'age': [[23.], [31.]],
+              'occupation': [['doctor'], ['engineer']]
+          },
+          feature_columns=[age, occupation],
+          units=3,
+          cols_to_vars=cols_to_vars)
+      cols_to_vars.pop('bias')
+      fraction_zero = linear._compute_fraction_of_zero(cols_to_vars)
+      age_var = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES,
+                                   'linear_model/age')[0]
+      with tf_session.Session() as sess:
+        sess.run([variables_lib.global_variables_initializer()])
+        # Upon initialization, all variables will be zero.
+        self.assertAllClose(1, fraction_zero.eval())
+
+        sess.run(age_var.assign([[2.0, 0.0, -1.0]]))
+        # 1 of the 3 age weights are zero, and all of the 15 (5 hash buckets
+        # x 3-dim output) are zero.
+        self.assertAllClose(16. / 18., fraction_zero.eval())
+
+
+class BaseLinearWarmStartingTest(object):
+
+  def __init__(self, _linear_classifier_fn, _linear_regressor_fn):
+    self._linear_classifier_fn = _linear_classifier_fn
+    self._linear_regressor_fn = _linear_regressor_fn
+
+  def setUp(self):
+    # Create a directory to save our old checkpoint and vocabularies to.
+    self._ckpt_and_vocab_dir = tempfile.mkdtemp()
+
+    # Make a dummy input_fn.
+    def _input_fn():
+      features = {
+          'age': [[23.], [31.]],
+          'age_in_years': [[23.], [31.]],
+          'occupation': [['doctor'], ['consultant']]
+      }
+      return features, [0, 1]
+
+    self._input_fn = _input_fn
+
+  def tearDown(self):
+    # Clean up checkpoint / vocab dir.
+    writer_cache.FileWriterCache.clear()
+    shutil.rmtree(self._ckpt_and_vocab_dir)
+
+  def test_classifier_basic_warm_starting(self):
+    """Tests correctness of LinearClassifier default warm-start."""
+    age = feature_column_lib.numeric_column('age')
+
+    # Create a LinearClassifier and train to save a checkpoint.
+    linear_classifier = self._linear_classifier_fn(
+        feature_columns=[age],
+        model_dir=self._ckpt_and_vocab_dir,
+        n_classes=4,
+        optimizer='SGD')
+    linear_classifier.train(input_fn=self._input_fn, max_steps=1)
+
+    # Create a second LinearClassifier, warm-started from the first.  Use a
+    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
+    # accumulator values that change).
+    warm_started_linear_classifier = self._linear_classifier_fn(
+        feature_columns=[age],
+        n_classes=4,
+        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
+        warm_start_from=linear_classifier.model_dir)
+
+    warm_started_linear_classifier.train(input_fn=self._input_fn, max_steps=1)
+    for variable_name in warm_started_linear_classifier.get_variable_names():
+      self.assertAllClose(
+          linear_classifier.get_variable_value(variable_name),
+          warm_started_linear_classifier.get_variable_value(variable_name))
+
+  def test_regressor_basic_warm_starting(self):
+    """Tests correctness of LinearRegressor default warm-start."""
+    age = feature_column_lib.numeric_column('age')
+
+    # Create a LinearRegressor and train to save a checkpoint.
+    linear_regressor = self._linear_regressor_fn(
+        feature_columns=[age],
+        model_dir=self._ckpt_and_vocab_dir,
+        optimizer='SGD')
+    linear_regressor.train(input_fn=self._input_fn, max_steps=1)
+
+    # Create a second LinearRegressor, warm-started from the first.  Use a
+    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
+    # accumulator values that change).
+    warm_started_linear_regressor = self._linear_regressor_fn(
+        feature_columns=[age],
+        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
+        warm_start_from=linear_regressor.model_dir)
+
+    warm_started_linear_regressor.train(input_fn=self._input_fn, max_steps=1)
+    for variable_name in warm_started_linear_regressor.get_variable_names():
+      self.assertAllClose(
+          linear_regressor.get_variable_value(variable_name),
+          warm_started_linear_regressor.get_variable_value(variable_name))
+
+  def test_warm_starting_selective_variables(self):
+    """Tests selecting variables to warm-start."""
+    age = feature_column_lib.numeric_column('age')
+
+    # Create a LinearClassifier and train to save a checkpoint.
+    linear_classifier = self._linear_classifier_fn(
+        feature_columns=[age],
+        model_dir=self._ckpt_and_vocab_dir,
+        n_classes=4,
+        optimizer='SGD')
+    linear_classifier.train(input_fn=self._input_fn, max_steps=1)
+
+    # Create a second LinearClassifier, warm-started from the first.  Use a
+    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
+    # accumulator values that change).
+    warm_started_linear_classifier = self._linear_classifier_fn(
+        feature_columns=[age],
+        n_classes=4,
+        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
+        # The provided regular expression will only warm-start the age variable
+        # and not the bias.
+        warm_start_from=warm_starting_util.WarmStartSettings(
+            ckpt_to_initialize_from=linear_classifier.model_dir,
+            vars_to_warm_start='.*(age).*'))
+
+    warm_started_linear_classifier.train(input_fn=self._input_fn, max_steps=1)
+    self.assertAllClose(
+        linear_classifier.get_variable_value(AGE_WEIGHT_NAME),
+        warm_started_linear_classifier.get_variable_value(AGE_WEIGHT_NAME))
+    # Bias should still be zero from initialization.
+    self.assertAllClose(
+        [0.0] * 4, warm_started_linear_classifier.get_variable_value(BIAS_NAME))
+
+  def test_warm_starting_with_vocab_remapping_and_partitioning(self):
+    """Tests warm-starting with vocab remapping and partitioning."""
+    vocab_list = ['doctor', 'lawyer', 'consultant']
+    vocab_file = os.path.join(self._ckpt_and_vocab_dir, 'occupation_vocab')
+    with open(vocab_file, 'w') as f:
+      f.write('\n'.join(vocab_list))
+    occupation = feature_column_lib.categorical_column_with_vocabulary_file(
+        'occupation',
+        vocabulary_file=vocab_file,
+        vocabulary_size=len(vocab_list))
+
+    # Create a LinearClassifier and train to save a checkpoint.
+    partitioner = partitioned_variables.fixed_size_partitioner(num_shards=2)
+    linear_classifier = self._linear_classifier_fn(
+        feature_columns=[occupation],
+        model_dir=self._ckpt_and_vocab_dir,
+        n_classes=4,
+        optimizer='SGD',
+        partitioner=partitioner)
+    linear_classifier.train(input_fn=self._input_fn, max_steps=1)
+
+    # Create a second LinearClassifier, warm-started from the first.  Use a
+    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
+    # accumulator values that change).  Use a a new FeatureColumn with a
+    # different vocabulary for occupation.
+    new_vocab_list = ['doctor', 'consultant', 'engineer']
+    new_vocab_file = os.path.join(self._ckpt_and_vocab_dir,
+                                  'new_occupation_vocab')
+    with open(new_vocab_file, 'w') as f:
+      f.write('\n'.join(new_vocab_list))
+    new_occupation = feature_column_lib.categorical_column_with_vocabulary_file(
+        'occupation',
+        vocabulary_file=new_vocab_file,
+        vocabulary_size=len(new_vocab_list))
+    # We can create our VocabInfo object from the new and old occupation
+    # FeatureColumn's.
+    occupation_vocab_info = warm_starting_util.VocabInfo(
+        new_vocab=new_occupation.vocabulary_file,
+        new_vocab_size=new_occupation.vocabulary_size,
+        num_oov_buckets=new_occupation.num_oov_buckets,
+        old_vocab=occupation.vocabulary_file,
+        old_vocab_size=occupation.vocabulary_size,
+        # Can't use constant_initializer with load_and_remap.  In practice,
+        # use a truncated normal initializer.
+        backup_initializer=init_ops.random_uniform_initializer(
+            minval=0.39, maxval=0.39))
+    warm_started_linear_classifier = self._linear_classifier_fn(
+        feature_columns=[occupation],
+        n_classes=4,
+        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
+        warm_start_from=warm_starting_util.WarmStartSettings(
+            ckpt_to_initialize_from=linear_classifier.model_dir,
+            var_name_to_vocab_info={
+                OCCUPATION_WEIGHT_NAME: occupation_vocab_info
+            },
+            # Explicitly providing None here will only warm-start variables
+            # referenced in var_name_to_vocab_info (the bias will not be
+            # warm-started).
+            vars_to_warm_start=None),
+        partitioner=partitioner)
+
+    warm_started_linear_classifier.train(input_fn=self._input_fn, max_steps=1)
+    # 'doctor' was ID-0 and still ID-0.
+    self.assertAllClose(
+        linear_classifier.get_variable_value(OCCUPATION_WEIGHT_NAME)[0, :],
+        warm_started_linear_classifier.get_variable_value(
+            OCCUPATION_WEIGHT_NAME)[0, :])
+    # 'consultant' was ID-2 and now ID-1.
+    self.assertAllClose(
+        linear_classifier.get_variable_value(OCCUPATION_WEIGHT_NAME)[2, :],
+        warm_started_linear_classifier.get_variable_value(
+            OCCUPATION_WEIGHT_NAME)[1, :])
+    # 'engineer' is a new entry and should be initialized with the
+    # backup_initializer in VocabInfo.
+    self.assertAllClose([0.39] * 4,
+                        warm_started_linear_classifier.get_variable_value(
+                            OCCUPATION_WEIGHT_NAME)[2, :])
+    # Bias should still be zero (from initialization logic).
+    self.assertAllClose(
+        [0.0] * 4, warm_started_linear_classifier.get_variable_value(BIAS_NAME))
+
+  def test_warm_starting_with_naming_change(self):
+    """Tests warm-starting with a Tensor name remapping."""
+    age_in_years = feature_column_lib.numeric_column('age_in_years')
+
+    # Create a LinearClassifier and train to save a checkpoint.
+    linear_classifier = self._linear_classifier_fn(
+        feature_columns=[age_in_years],
+        model_dir=self._ckpt_and_vocab_dir,
+        n_classes=4,
+        optimizer='SGD')
+    linear_classifier.train(input_fn=self._input_fn, max_steps=1)
+
+    # Create a second LinearClassifier, warm-started from the first.  Use a
+    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
+    # accumulator values that change).
+    warm_started_linear_classifier = self._linear_classifier_fn(
+        feature_columns=[feature_column_lib.numeric_column('age')],
+        n_classes=4,
+        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
+        # The 'age' variable correspond to the 'age_in_years' variable in the
+        # previous model.
+        warm_start_from=warm_starting_util.WarmStartSettings(
+            ckpt_to_initialize_from=linear_classifier.model_dir,
+            var_name_to_prev_var_name={
+                AGE_WEIGHT_NAME: AGE_WEIGHT_NAME.replace('age', 'age_in_years')
+            }))
+
+    warm_started_linear_classifier.train(input_fn=self._input_fn, max_steps=1)
+    self.assertAllClose(
+        linear_classifier.get_variable_value(
+            AGE_WEIGHT_NAME.replace('age', 'age_in_years')),
+        warm_started_linear_classifier.get_variable_value(AGE_WEIGHT_NAME))
+    # The bias is also warm-started (with no name remapping).
+    self.assertAllClose(
+        linear_classifier.get_variable_value(BIAS_NAME),
+        warm_started_linear_classifier.get_variable_value(BIAS_NAME))
diff --git a/tensorflow/python/estimator/canned/metric_keys.py b/tensorflow/python/estimator/canned/metric_keys.py
index 7dc4bfe5ffb5f762b56f4fc91b8a75ee4ba1796e..44eb680939203fea67e3391326a6f1013f022ad5 100644
--- a/tensorflow/python/estimator/canned/metric_keys.py
+++ b/tensorflow/python/estimator/canned/metric_keys.py
@@ -25,6 +25,7 @@ class MetricKeys(object):
   """Metric key strings."""
   LOSS = model_fn.LOSS_METRIC_KEY
   LOSS_MEAN = model_fn.AVERAGE_LOSS_METRIC_KEY
+  LOSS_REGULARIZATION = 'regularization_loss'
 
   ACCURACY = 'accuracy'
   # This is the best the model could do by always predicting one class.
diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py
index f267f4a54e541c8942fd6430a802798e430a5a47..90eecc1fda5c432a348bbaa4d35c4dc92f2d7489 100644
--- a/tensorflow/python/estimator/estimator.py
+++ b/tensorflow/python/estimator/estimator.py
@@ -30,6 +30,7 @@ from google.protobuf import message
 from tensorflow.core.framework import summary_pb2
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.client import session as tf_session
+from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.eager import context
 from tensorflow.python.estimator import model_fn as model_fn_lib
 from tensorflow.python.estimator import run_config
@@ -261,9 +262,19 @@ class Estimator(object):
     """Trains a model given training data input_fn.
 
     Args:
-      input_fn: Input function returning a tuple of:
-          features - `Tensor` or dictionary of string feature name to `Tensor`.
-          labels - `Tensor` or dictionary of `Tensor` with labels.
+      input_fn: A function that provides input data for training as minibatches.
+        See @{$get_started/premade_estimators#create_input_functions} for more
+        information. The function should construct and return one of
+        the following:
+
+          * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a
+            tuple (features, labels) with same constraints as below.
+          * A tuple (features, labels): Where features is a `Tensor` or a
+            dictionary of string feature name to `Tensor` and labels is a
+            `Tensor` or a dictionary of string label name to `Tensor`. Both
+            features and labels are consumed by `model_fn`. They should satisfy
+            the expectation of `model_fn` from inputs.
+
       hooks: List of `SessionRunHook` subclass instances. Used for callbacks
         inside the training loop.
       steps: Number of steps for which to train model. If `None`, train forever
@@ -331,10 +342,19 @@ class Estimator(object):
     `StopIteration`).
 
     Args:
-      input_fn: Input function returning a tuple of:
-          features - Dictionary of string feature name to `Tensor` or
-            `SparseTensor`.
-          labels - `Tensor` or dictionary of `Tensor` with labels.
+      input_fn: A function that constructs the input data for evaluation.
+        See @{$get_started/premade_estimators#create_input_functions} for more
+        information. The function should construct and return one of
+        the following:
+
+          * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a
+            tuple (features, labels) with same constraints as below.
+          * A tuple (features, labels): Where features is a `Tensor` or a
+            dictionary of string feature name to `Tensor` and labels is a
+            `Tensor` or a dictionary of string label name to `Tensor`. Both
+            features and labels are consumed by `model_fn`. They should satisfy
+            the expectation of `model_fn` from inputs.
+
       steps: Number of steps for which to evaluate model. If `None`, evaluates
         until `input_fn` raises an end-of-input exception.
       hooks: List of `SessionRunHook` subclass instances. Used for callbacks
@@ -381,11 +401,20 @@ class Estimator(object):
     """Yields predictions for given features.
 
     Args:
-      input_fn: Input function returning features which is a dictionary of
-        string feature name to `Tensor` or `SparseTensor`. If it returns a
-        tuple, first item is extracted as features. Prediction continues until
-        `input_fn` raises an end-of-input exception (`OutOfRangeError` or
+      input_fn: A function that constructs the features. Prediction continues
+        until `input_fn` raises an end-of-input exception (`OutOfRangeError` or
         `StopIteration`).
+        See @{$get_started/premade_estimators#create_input_functions} for more
+        information. The function should construct and return one of
+        the following:
+
+          * A 'tf.data.Dataset' object: Outputs of `Dataset` object must have
+            same constraints as below.
+          * features: A `Tensor` or a dictionary of string feature name to
+            `Tensor`. features are consumed by `model_fn`. They should satisfy
+            the expectation of `model_fn` from inputs.
+          * A tuple, in which case the first item is extracted as features.
+
       predict_keys: list of `str`, name of the keys to predict. It is used if
         the `EstimatorSpec.predictions` is a `dict`. If `predict_keys` is used
         then rest of the predictions will be filtered from the dictionary. If
@@ -416,7 +445,7 @@ class Estimator(object):
     with ops.Graph().as_default() as g:
       random_seed.set_random_seed(self._config.tf_random_seed)
       self._create_and_assert_global_step(g)
-      features = self._get_features_from_input_fn(
+      features, input_hooks = self._get_features_from_input_fn(
           input_fn, model_fn_lib.ModeKeys.PREDICT)
       estimator_spec = self._call_model_fn(
           features, None, model_fn_lib.ModeKeys.PREDICT, self.config)
@@ -426,7 +455,7 @@ class Estimator(object):
               checkpoint_filename_with_path=checkpoint_path,
               scaffold=estimator_spec.scaffold,
               config=self._session_config),
-          hooks=hooks) as mon_sess:
+          hooks=input_hooks + hooks) as mon_sess:
         while not mon_sess.should_stop():
           preds_evaluated = mon_sess.run(predictions)
           if not isinstance(predictions, dict):
@@ -460,7 +489,8 @@ class Estimator(object):
       self, export_dir_base, serving_input_receiver_fn,
       assets_extra=None,
       as_text=False,
-      checkpoint_path=None):
+      checkpoint_path=None,
+      strip_default_attrs=False):
     # pylint: disable=line-too-long
     """Exports inference graph as a SavedModel into given dir.
 
@@ -485,7 +515,7 @@ class Estimator(object):
     `ExportOutput`s, and the inputs are always the input receivers provided by
     the serving_input_receiver_fn.
 
-    Extra assets may be written into the SavedModel via the extra_assets
+    Extra assets may be written into the SavedModel via the assets_extra
     argument.  This should be a dict, where each key gives a destination path
     (including the filename) relative to the assets.extra directory.  The
     corresponding value gives the full path of the source file to be copied.
@@ -502,6 +532,9 @@ class Estimator(object):
       as_text: whether to write the SavedModel proto in text format.
       checkpoint_path: The checkpoint path to export.  If `None` (the default),
         the most recent checkpoint found within the model directory is chosen.
+      strip_default_attrs: Boolean. If `True`, default-valued attributes will be
+        removed from the NodeDefs. For a detailed guide, see
+        [Stripping Default-Valued Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes).
 
     Returns:
       The string path to the exported directory.
@@ -562,7 +595,8 @@ class Estimator(object):
             signature_def_map=signature_def_map,
             assets_collection=ops.get_collection(
                 ops.GraphKeys.ASSET_FILEPATHS),
-            legacy_init_op=local_init_op)
+            legacy_init_op=local_init_op,
+            strip_default_attrs=strip_default_attrs)
         builder.save(as_text)
 
       # Add the extra assets
@@ -582,6 +616,11 @@ class Estimator(object):
   def _get_features_from_input_fn(self, input_fn, mode):
     """Extracts the `features` from return values of `input_fn`."""
     result = self._call_input_fn(input_fn, mode)
+    input_hooks = []
+    if isinstance(result, dataset_ops.Dataset):
+      iterator = result.make_initializable_iterator()
+      input_hooks.append(_DatasetInitializerHook(iterator))
+      result = iterator.get_next()
     if isinstance(result, (list, tuple)):
       # Unconditionally drop the label (the second element of result).
       result = result[0]
@@ -590,16 +629,22 @@ class Estimator(object):
       logging.warning('Input graph does not use tf.data.Dataset or contain a '
                       'QueueRunner. That means predict yields forever. '
                       'This is probably a mistake.')
-    return result
+    return result, input_hooks
 
   def _get_features_and_labels_from_input_fn(self, input_fn, mode):
+    """Extracts the `features` and labels from return values of `input_fn`."""
     result = self._call_input_fn(input_fn, mode)
+    input_hooks = []
+    if isinstance(result, dataset_ops.Dataset):
+      iterator = result.make_initializable_iterator()
+      input_hooks.append(_DatasetInitializerHook(iterator))
+      result = iterator.get_next()
     if isinstance(result, (list, tuple)):
       if len(result) != 2:
         raise ValueError(
-            'input_fn should return (feautures, labels) as a len 2 tuple.')
-      return result
-    return result, None
+            'input_fn should return (features, labels) as a len 2 tuple.')
+      return result[0], result[1], input_hooks
+    return result, None, input_hooks
 
   def _extract_batch_length(self, preds_evaluated):
     """Extracts batch length of predictions."""
@@ -671,9 +716,10 @@ class Estimator(object):
     Raises:
       ValueError: if input_fn takes invalid arguments.
     """
-    del mode  # unused
     input_fn_args = util.fn_args(input_fn)
     kwargs = {}
+    if 'mode' in input_fn_args:
+      kwargs['mode'] = mode
     if 'params' in input_fn_args:
       kwargs['params'] = self.params
     if 'config' in input_fn_args:
@@ -710,7 +756,10 @@ class Estimator(object):
       kwargs['params'] = self.params
     if 'config' in model_fn_args:
       kwargs['config'] = config
+
+    logging.info('Calling model_fn.')
     model_fn_results = self._model_fn(features=features, **kwargs)
+    logging.info('Done calling model_fn.')
 
     if not isinstance(model_fn_results, model_fn_lib.EstimatorSpec):
       raise ValueError('model_fn should return an EstimatorSpec.')
@@ -723,8 +772,10 @@ class Estimator(object):
       random_seed.set_random_seed(self._config.tf_random_seed)
       global_step_tensor = self._create_and_assert_global_step(g)
       training_util._get_or_create_global_step_read()  # pylint: disable=protected-access
-      features, labels = self._get_features_and_labels_from_input_fn(
-          input_fn, model_fn_lib.ModeKeys.TRAIN)
+      features, labels, input_hooks = (
+          self._get_features_and_labels_from_input_fn(
+              input_fn, model_fn_lib.ModeKeys.TRAIN))
+      worker_hooks.extend(input_hooks)
       estimator_spec = self._call_model_fn(
           features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
       # Check if the user created a loss summary, and add one if they didn't.
@@ -822,8 +873,9 @@ class Estimator(object):
     with ops.Graph().as_default() as g:
       random_seed.set_random_seed(self._config.tf_random_seed)
       global_step_tensor = self._create_and_assert_global_step(g)
-      features, labels = self._get_features_and_labels_from_input_fn(
-          input_fn, model_fn_lib.ModeKeys.EVAL)
+      features, labels, input_hooks = (
+          self._get_features_and_labels_from_input_fn(
+              input_fn, model_fn_lib.ModeKeys.EVAL))
       estimator_spec = self._call_model_fn(
           features, labels, model_fn_lib.ModeKeys.EVAL, self.config)
 
@@ -844,7 +896,8 @@ class Estimator(object):
             'already defines a default metric with the same name.')
       eval_dict[ops.GraphKeys.GLOBAL_STEP] = global_step_tensor
 
-      all_hooks = list(hooks or [])
+      all_hooks = list(input_hooks)
+      all_hooks.extend(hooks)
       all_hooks.extend(list(estimator_spec.evaluation_hooks or []))
 
       eval_results = evaluation._evaluate_once(  # pylint: disable=protected-access
@@ -1039,3 +1092,16 @@ def _has_dataset_or_queue_runner(maybe_tensor):
 
   # Now, check queue.
   return ops.get_default_graph().get_collection(ops.GraphKeys.QUEUE_RUNNERS)
+
+
+class _DatasetInitializerHook(training.SessionRunHook):
+
+  def __init__(self, iterator):
+    self._iterator = iterator
+
+  def begin(self):
+    self._initializer = self._iterator.initializer
+
+  def after_create_session(self, session, coord):
+    del coord
+    session.run(self._initializer)
diff --git a/tensorflow/python/estimator/estimator_lib.py b/tensorflow/python/estimator/estimator_lib.py
index bed2b674192bd4054baa2ee5d30fc72c0e8d54ed..01699e7399c4089281e9ece76e534e1f82692257 100644
--- a/tensorflow/python/estimator/estimator_lib.py
+++ b/tensorflow/python/estimator/estimator_lib.py
@@ -41,6 +41,8 @@ from tensorflow.python.estimator.run_config import RunConfig
 from tensorflow.python.estimator.training import EvalSpec
 from tensorflow.python.estimator.training import train_and_evaluate
 from tensorflow.python.estimator.training import TrainSpec
+from tensorflow.python.estimator.warm_starting_util import VocabInfo
+from tensorflow.python.estimator.warm_starting_util import WarmStartSettings
 
 
 from tensorflow.python.util.all_util import remove_undocumented
@@ -76,6 +78,10 @@ _allowed_symbols = [
     'Exporter',
     'LatestExporter',
     'FinalExporter',
+
+    # Warm-starting
+    'WarmStartSettings',
+    'VocabInfo',
 ]
 
 remove_undocumented(__name__, allowed_exception_list=_allowed_symbols)
diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py
index c1b773b8c408dbfe7df685d5dcf2748ae5428adf..ed1676a92de19203be8bc61fc6efeb559a2fb8aa 100644
--- a/tensorflow/python/estimator/estimator_test.py
+++ b/tensorflow/python/estimator/estimator_test.py
@@ -40,6 +40,7 @@ from tensorflow.python.estimator.inputs import numpy_io
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.layers import layers
 from tensorflow.python.lib.io import file_io
 from tensorflow.python.ops import array_ops
@@ -57,6 +58,7 @@ from tensorflow.python.platform import gfile
 from tensorflow.python.platform import test
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.saved_model import loader
+from tensorflow.python.saved_model import loader_impl
 from tensorflow.python.saved_model import tag_constants
 from tensorflow.python.summary import summary
 from tensorflow.python.summary import summary_iterator
@@ -418,6 +420,7 @@ class EstimatorTrainTest(test.TestCase):
     self.assertEqual(1, model_fn_call_count[0])
 
   def test_callable_input_fn(self):
+    expected_mode = model_fn_lib.ModeKeys.TRAIN
     expected_params = {'batch_size': 10}
     expected_config = run_config.RunConfig().replace(tf_random_seed=4321)
     input_fn_call_count = [0]
@@ -430,8 +433,9 @@ class EstimatorTrainTest(test.TestCase):
 
     class InputFn(object):
 
-      def __call__(self, params, config):
+      def __call__(self, mode, params, config):
         input_fn_call_count[0] += 1
+        test_self.assertEqual(expected_mode, mode)
         test_self.assertEqual(expected_params, params)
         test_self.assertEqual(4321, config.tf_random_seed)
         return dummy_input_fn()
@@ -444,6 +448,7 @@ class EstimatorTrainTest(test.TestCase):
     self.assertEqual(1, input_fn_call_count[0])
 
   def test_input_fn_args(self):
+    expected_mode = model_fn_lib.ModeKeys.TRAIN
     expected_params = {'batch_size': 10}
     expected_config = run_config.RunConfig().replace(tf_random_seed=4321)
     input_fn_call_count = [0]
@@ -452,8 +457,9 @@ class EstimatorTrainTest(test.TestCase):
       del params, config
       return model_fn_global_step_incrementer(features, labels, mode)
 
-    def _input_fn(params, config):
+    def _input_fn(mode, params, config):
       input_fn_call_count[0] += 1
+      self.assertEqual(expected_mode, mode)
       self.assertEqual(expected_params, params)
       self.assertEqual(4321, config.tf_random_seed)
       return dummy_input_fn()
@@ -913,9 +919,84 @@ class EstimatorGetVariablesTest(test.TestCase):
     self.assertEqual(3., est.get_variable_value('three'))
 
 
+class EstimatorDatasetIntegrationTest(test.TestCase):
+  """Tests dataset integration."""
+
+  def test_returned_by_input_fn(self):
+
+    def _input_fn():
+      return dataset_ops.Dataset.from_tensors(([1.], [2.]))
+
+    def _model_fn(features, labels, mode):
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=features + labels,  # 1 + 2
+          train_op=state_ops.assign_add(training.get_global_step(), 1))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(_input_fn, steps=1)
+    scores = est.evaluate(_input_fn, steps=1)
+    self.assertEqual(3., scores[model_fn_lib.LOSS_METRIC_KEY])
+
+  def test_with_none_labels(self):
+
+    def _input_fn():
+      return dataset_ops.Dataset.from_tensors([7.])
+
+    def _model_fn(features, labels, mode):
+      self.assertIsNone(labels)
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=features,  # 7
+          train_op=state_ops.assign_add(training.get_global_step(), 1))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(_input_fn, steps=1)
+    scores = est.evaluate(_input_fn, steps=1)
+    self.assertEqual(7., scores[model_fn_lib.LOSS_METRIC_KEY])
+
+  def test_with_predict(self):
+
+    def _input_fn():
+      return dataset_ops.Dataset.from_tensors([10.])
+
+    def _model_fn(features, labels, mode):
+      _ = labels
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          predictions=features,  # 10
+          loss=features,  # 10
+          train_op=state_ops.assign_add(training.get_global_step(), 1))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(_input_fn, steps=1)
+    self.assertEqual([10.], next(est.predict(input_fn=_input_fn)))
+
+  def test_batching(self):
+
+    def _input_fn():
+      return dataset_ops.Dataset.from_tensor_slices(([[1.], [2.]],
+                                                     [[10.], [20.]])).batch(1)
+
+    def _model_fn(features, labels, mode):
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          predictions=features,
+          loss=features + (0 if labels is None else labels),  # 11, 22
+          train_op=state_ops.assign_add(training.get_global_step(), 1))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(_input_fn)
+    scores = est.evaluate(_input_fn)
+    # (11 + 22)/2 = 16.5
+    self.assertEqual(16.5, scores[model_fn_lib.LOSS_METRIC_KEY])
+    self.assertEqual([1., 2.], list(est.predict(_input_fn)))
+
+
 class EstimatorEvaluateTest(test.TestCase):
 
   def test_input_fn_args(self):
+    expected_mode = model_fn_lib.ModeKeys.EVAL
     expected_params = {'batch_size': 10}
     expected_config = run_config.RunConfig().replace(tf_random_seed=4321)
     input_fn_call_count = [0]
@@ -924,8 +1005,9 @@ class EstimatorEvaluateTest(test.TestCase):
       del params, config
       return model_fn_global_step_incrementer(features, labels, mode)
 
-    def _input_fn(params, config):
+    def _input_fn(mode, params, config):
       input_fn_call_count[0] += 1
+      self.assertEqual(expected_mode, mode)
       self.assertEqual(expected_params, params)
       self.assertEqual(4321, config.tf_random_seed)
       return dummy_input_fn()
@@ -1189,6 +1271,7 @@ class EstimatorEvaluateTest(test.TestCase):
 class EstimatorPredictTest(test.TestCase):
 
   def test_input_fn_args(self):
+    expected_mode = model_fn_lib.ModeKeys.PREDICT
     expected_params = {'batch_size': 10}
     expected_config = run_config.RunConfig().replace(tf_random_seed=4321)
     input_fn_call_count = [0]
@@ -1201,8 +1284,9 @@ class EstimatorPredictTest(test.TestCase):
           train_op=state_ops.assign_add(training.get_global_step(), 1),
           predictions=constant_op.constant([[10.]]))
 
-    def _input_fn(params, config):
+    def _input_fn(mode, params, config):
       input_fn_call_count[0] += 1
+      self.assertEqual(expected_mode, mode)
       self.assertEqual(expected_params, params)
       self.assertEqual(4321, config.tf_random_seed)
       return dummy_input_fn()
@@ -1976,6 +2060,65 @@ class EstimatorExportTest(test.TestCase):
 
     gfile.DeleteRecursively(tmpdir)
 
+  def test_export_savedmodel_proto_strip_default_attrs(self):
+    tmpdir = tempfile.mkdtemp()
+    est = estimator.Estimator(model_fn=_model_fn_for_export_tests)
+    est.train(input_fn=dummy_input_fn, steps=1)
+    feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
+                    'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+
+    # Perform the export.
+    export_dir_base = os.path.join(
+        compat.as_bytes(tmpdir), compat.as_bytes('export'))
+    export_dir_stripped = est.export_savedmodel(
+        export_dir_base, serving_input_receiver_fn, strip_default_attrs=True)
+    export_dir_not_stripped = est.export_savedmodel(
+        export_dir_base, serving_input_receiver_fn, strip_default_attrs=False)
+
+    # Load the SavedModel from disk as-is to verify default attrs
+    # are stripped. Reimporting the SavedModel via the loader causes the
+    # default attrs to be populated in the NodeDefs.
+
+    # pylint: disable=protected-access
+    saved_model_stripped_pb = loader_impl._parse_saved_model(
+        export_dir_stripped)
+    saved_model_not_stripped_pb = loader_impl._parse_saved_model(
+        export_dir_not_stripped)
+    self.assertIsNotNone(saved_model_stripped_pb)
+    self.assertIsNotNone(saved_model_not_stripped_pb)
+    # pylint: enable=protected-access
+
+    meta_graph_def_stripped = [
+        x for x in saved_model_stripped_pb.meta_graphs
+        if x.meta_info_def.tags == [tag_constants.SERVING]][0]
+    meta_graph_def_not_stripped = [
+        x for x in saved_model_not_stripped_pb.meta_graphs
+        if x.meta_info_def.tags == [tag_constants.SERVING]][0]
+
+    # "weight" node in graph is a "Variable" Op with 2 default valued attrs.
+    #   o "container"    : "".
+    #   o "shared_name"  : "".
+
+    # saved_model_stripped_pb was exported with strip_default_attrs set to True.
+    # "weight" node shouldn't have attributes "container" and "shared_name".
+    node_def = test_util.get_node_def_from_graph(
+        'weight', meta_graph_def_stripped.graph_def)
+    self.assertNotIn('container', node_def.attr)
+    self.assertNotIn('shared_name', node_def.attr)
+
+    # saved_model_not_stripped_pb was exported with strip_default_attrs
+    # disabled. "weight" node should have attributes "container" and
+    # "shared_name".
+    node_def = test_util.get_node_def_from_graph(
+        'weight', meta_graph_def_not_stripped.graph_def)
+    self.assertIn('container', node_def.attr)
+    self.assertIn('shared_name', node_def.attr)
+
+    # Clean up.
+    gfile.DeleteRecursively(tmpdir)
+
 
 class EstimatorHookOrderingTest(test.TestCase):
 
diff --git a/tensorflow/python/estimator/exporter.py b/tensorflow/python/estimator/exporter.py
index c6f20d4a9e2a6b3384ba59ae2df67ff7a3464aa9..ba522f396d0eda1bb3d13b21acfddcc3d593e21b 100644
--- a/tensorflow/python/estimator/exporter.py
+++ b/tensorflow/python/estimator/exporter.py
@@ -73,7 +73,8 @@ class _SavedModelExporter(Exporter):
                name,
                serving_input_receiver_fn,
                assets_extra=None,
-               as_text=False):
+               as_text=False,
+               strip_default_attrs=True):
     """Create an `Exporter` to use with `tf.estimator.EvalSpec`.
 
     Args:
@@ -90,6 +91,9 @@ class _SavedModelExporter(Exporter):
         `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
       as_text: whether to write the SavedModel proto in text format. Defaults to
         `False`.
+      strip_default_attrs: Boolean. If set, default attrs in the `GraphDef` will
+        be stripped on write. This is the default behavior and recommended for
+        better forward compatibility of the resulting `SavedModel`.
 
     Raises:
       ValueError: if any arguments is invalid.
@@ -98,6 +102,7 @@ class _SavedModelExporter(Exporter):
     self._serving_input_receiver_fn = serving_input_receiver_fn
     self._assets_extra = assets_extra
     self._as_text = as_text
+    self._strip_default_attrs = strip_default_attrs
 
   @property
   def name(self):
@@ -112,7 +117,8 @@ class _SavedModelExporter(Exporter):
         self._serving_input_receiver_fn,
         assets_extra=self._assets_extra,
         as_text=self._as_text,
-        checkpoint_path=checkpoint_path)
+        checkpoint_path=checkpoint_path,
+        strip_default_attrs=self._strip_default_attrs)
 
     return export_result
 
@@ -197,8 +203,8 @@ class LatestExporter(Exporter):
       as_text: whether to write the SavedModel proto in text format. Defaults to
         `False`.
       exports_to_keep: Number of exports to keep.  Older exports will be
-       garbage-collected.  Defaults to 5.  Set to `None` to disable garbage
-       collection.
+        garbage-collected.  Defaults to 5.  Set to `None` to disable garbage
+        collection.
 
     Raises:
       ValueError: if any arguments is invalid.
diff --git a/tensorflow/python/estimator/exporter_test.py b/tensorflow/python/estimator/exporter_test.py
index 8e0f66cece754dea95987d136d90855e6818236b..70b5612804b2d91d66482d98ae080f42dfa17455 100644
--- a/tensorflow/python/estimator/exporter_test.py
+++ b/tensorflow/python/estimator/exporter_test.py
@@ -69,7 +69,8 @@ class LatestExporterTest(test.TestCase):
         _serving_input_receiver_fn,
         assets_extra={"from/path": "to/path"},
         as_text=False,
-        checkpoint_path="checkpoint_path")
+        checkpoint_path="checkpoint_path",
+        strip_default_attrs=True)
 
   def test_only_the_last_export_is_saved(self):
 
@@ -102,7 +103,8 @@ class LatestExporterTest(test.TestCase):
         _serving_input_receiver_fn,
         assets_extra={"from/path": "to/path"},
         as_text=False,
-        checkpoint_path="checkpoint_path")
+        checkpoint_path="checkpoint_path",
+        strip_default_attrs=True)
 
   def test_garbage_collect_exports(self):
     export_dir_base = tempfile.mkdtemp() + "export/"
diff --git a/tensorflow/python/estimator/inputs/numpy_io.py b/tensorflow/python/estimator/inputs/numpy_io.py
index 750af20e8a1e27c0f9c4fcf3ebf586c41bc9c66c..c4c2e30e8771c5cb1e492fed751c71583dcf477b 100644
--- a/tensorflow/python/estimator/inputs/numpy_io.py
+++ b/tensorflow/python/estimator/inputs/numpy_io.py
@@ -19,7 +19,10 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+
+import numpy as np
 from six import string_types
+
 from tensorflow.python.estimator.inputs.queues import feeding_functions
 
 # Key name to pack the target into dict of `features`. See
@@ -36,6 +39,13 @@ def _get_unique_target_key(features):
   temporarily and unpacked after calling the feeding function. Toward this goal,
   this function returns a key not existed in the `features` to pack the
   `target`.
+
+  Args:
+    features: OrderedDict of numpy arrays
+
+  Returns:
+    A unique key that can be used to insert the subsequent target into
+      features dict.
   """
   target_key = _TARGET_KEY
   while target_key in features:
@@ -43,6 +53,39 @@ def _get_unique_target_key(features):
   return target_key
 
 
+def _validate_and_convert_features(x):
+  """Type check input data and make a shadow copy as an ordered dict.
+
+  Args:
+    x: numpy array object or dict of numpy array objects. If an array,
+      the array will be treated as a single feature.
+
+  Returns:
+    OrderedDict copy of x.
+
+  Raises:
+    ValueError: if x is empty
+    TypeError: if x is an unknown type.
+  """
+  if isinstance(x, dict):
+    if not x:
+      raise ValueError('x cannot be an empty dict')
+    # Make a shadow copy and also ensure the order of iteration is consistent.
+    ordered_dict_data = collections.OrderedDict(
+        sorted(x.items(), key=lambda t: t[0]))
+  elif isinstance(x, np.ndarray):
+    if x.size == 0:
+      raise ValueError('x cannot be an empty array')
+
+    # Make a shadow copy and convert to dict to align with dict processing.
+    ordered_dict_data = collections.OrderedDict({'__direct_np_input__': x})
+  else:
+    x_type = type(x).__name__
+    raise TypeError('x must be a dict or array; got {}'.format(x_type))
+
+  return ordered_dict_data
+
+
 def numpy_input_fn(x,
                    y=None,
                    batch_size=128,
@@ -70,7 +113,8 @@ def numpy_input_fn(x,
   ```
 
   Args:
-    x: dict of numpy array object.
+    x: numpy array object or dict of numpy array objects. If an array,
+      the array will be treated as a single feature.
     y: numpy array object or dict of numpy array object. `None` if absent.
     batch_size: Integer, size of batches to return.
     num_epochs: Integer, number of epochs to iterate over data. If `None` will
@@ -90,23 +134,19 @@ def numpy_input_fn(x,
       values in `x` have same shape).
     ValueError: if duplicate keys are in both `x` and `y` when `y` is a dict.
     ValueError: if x or y is an empty dict.
-    TypeError: `x` is not a dict or `shuffle` is not bool.
+    TypeError: `x` is not a dict or array, or if `shuffle` is not bool.
   """
-
   if not isinstance(shuffle, bool):
     raise TypeError('shuffle must be explicitly set as boolean; '
                     'got {}'.format(shuffle))
 
   def input_fn():
     """Numpy input function."""
-    if not isinstance(x, dict):
-      raise TypeError('x must be dict; got {}'.format(type(x).__name__))
-    if not x:
-      raise ValueError('x cannot be empty')
 
-    # Make a shadow copy and also ensure the order of iteration is consistent.
-    ordered_dict_data = collections.OrderedDict(
-        sorted(x.items(), key=lambda t: t[0]))
+    # Note that `x` should not be used after conversion to ordered_dict_data,
+    # as type could be either dict or array.
+    ordered_dict_data = _validate_and_convert_features(x)
+
     # Deep copy keys which is a view in python 3
     feature_keys = list(ordered_dict_data.keys())
 
@@ -161,7 +201,13 @@ def numpy_input_fn(x,
     if batch:
       batch.pop(0)
 
-    features = dict(zip(feature_keys, batch[:len(feature_keys)]))
+    if isinstance(x, np.ndarray):
+      # Return as the same type as original array.
+      features = batch[0]
+    else:
+      # Return as the original dict type
+      features = dict(zip(feature_keys, batch[:len(feature_keys)]))
+
     if target_keys is None:
       # TODO(martinwicke), return consistent result
       return features
diff --git a/tensorflow/python/estimator/inputs/numpy_io_test.py b/tensorflow/python/estimator/inputs/numpy_io_test.py
index 1374e3f7e12e76683f14737747b490c9a5e319eb..92d057e25da785cf5ee310ca1c80f67a5fbdb43a 100644
--- a/tensorflow/python/estimator/inputs/numpy_io_test.py
+++ b/tensorflow/python/estimator/inputs/numpy_io_test.py
@@ -24,6 +24,7 @@ from tensorflow.python.estimator.inputs import numpy_io
 from tensorflow.python.framework import errors
 from tensorflow.python.platform import test
 from tensorflow.python.training import coordinator
+from tensorflow.python.training import monitored_session
 from tensorflow.python.training import queue_runner_impl
 
 
@@ -231,10 +232,10 @@ class NumpyIoTest(test.TestCase):
       coord.join(threads)
 
   def testNumpyInputFnWithXAsNonDict(self):
-    x = np.arange(32, 36)
+    x = list(range(32, 36))
     y = np.arange(4)
     with self.test_session():
-      with self.assertRaisesRegexp(TypeError, 'x must be dict'):
+      with self.assertRaisesRegexp(TypeError, 'x must be a dict or array'):
         failing_input_fn = numpy_io.numpy_input_fn(
             x, y, batch_size=2, shuffle=False, num_epochs=1)
         failing_input_fn()
@@ -243,7 +244,15 @@ class NumpyIoTest(test.TestCase):
     x = {}
     y = np.arange(4)
     with self.test_session():
-      with self.assertRaisesRegexp(ValueError, 'x cannot be empty'):
+      with self.assertRaisesRegexp(ValueError, 'x cannot be an empty'):
+        failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
+        failing_input_fn()
+
+  def testNumpyInputFnWithXIsEmptyArray(self):
+    x = np.array([[], []])
+    y = np.arange(4)
+    with self.test_session():
+      with self.assertRaisesRegexp(ValueError, 'x cannot be an empty'):
         failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
         failing_input_fn()
 
@@ -369,6 +378,82 @@ class NumpyIoTest(test.TestCase):
         failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
         failing_input_fn()
 
+  def testNumpyInputFnWithXIsArray(self):
+    x = np.arange(4) * 1.0
+    y = np.arange(-32, -28)
+
+    input_fn = numpy_io.numpy_input_fn(
+        x, y, batch_size=2, shuffle=False, num_epochs=1)
+    features, target = input_fn()
+
+    with monitored_session.MonitoredSession() as session:
+      res = session.run([features, target])
+      self.assertAllEqual(res[0], [0, 1])
+      self.assertAllEqual(res[1], [-32, -31])
+
+      session.run([features, target])
+      with self.assertRaises(errors.OutOfRangeError):
+        session.run([features, target])
+
+  def testNumpyInputFnWithXIsNDArray(self):
+    x = np.arange(16).reshape(4, 2, 2) * 1.0
+    y = np.arange(-48, -32).reshape(4, 2, 2)
+
+    input_fn = numpy_io.numpy_input_fn(
+        x, y, batch_size=2, shuffle=False, num_epochs=1)
+    features, target = input_fn()
+
+    with monitored_session.MonitoredSession() as session:
+      res = session.run([features, target])
+      self.assertAllEqual(res[0], [[[0, 1], [2, 3]], [[4, 5], [6, 7]]])
+      self.assertAllEqual(
+          res[1], [[[-48, -47], [-46, -45]], [[-44, -43], [-42, -41]]])
+
+      session.run([features, target])
+      with self.assertRaises(errors.OutOfRangeError):
+        session.run([features, target])
+
+  def testNumpyInputFnWithXIsArrayYIsDict(self):
+    x = np.arange(4) * 1.0
+    y = {'y1': np.arange(-32, -28)}
+
+    input_fn = numpy_io.numpy_input_fn(
+        x, y, batch_size=2, shuffle=False, num_epochs=1)
+    features_tensor, targets_tensor = input_fn()
+
+    with monitored_session.MonitoredSession() as session:
+      features, targets = session.run([features_tensor, targets_tensor])
+      self.assertEqual(len(features), 2)
+      self.assertAllEqual(features, [0, 1])
+      self.assertEqual(len(targets), 1)
+      self.assertAllEqual(targets['y1'], [-32, -31])
+
+      session.run([features_tensor, targets_tensor])
+      with self.assertRaises(errors.OutOfRangeError):
+        session.run([features_tensor, targets_tensor])
+
+  def testArrayAndDictGiveSameOutput(self):
+    a = np.arange(4) * 1.0
+    b = np.arange(32, 36)
+    x_arr = np.vstack((a, b))
+    x_dict = {'feature1': x_arr}
+    y = np.arange(-48, -40).reshape(2, 4)
+
+    input_fn_arr = numpy_io.numpy_input_fn(
+        x_arr, y, batch_size=2, shuffle=False, num_epochs=1)
+    features_arr, targets_arr = input_fn_arr()
+
+    input_fn_dict = numpy_io.numpy_input_fn(
+        x_dict, y, batch_size=2, shuffle=False, num_epochs=1)
+    features_dict, targets_dict = input_fn_dict()
+
+    with monitored_session.MonitoredSession() as session:
+      res_arr, res_dict = session.run([
+          (features_arr, targets_arr), (features_dict, targets_dict)])
+
+      self.assertAllEqual(res_arr[0], res_dict[0]['feature1'])
+      self.assertAllEqual(res_arr[1], res_dict[1])
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/estimator/run_config.py b/tensorflow/python/estimator/run_config.py
index d71964d2ec8e8ce21934428c3fff88f65b2751da..dc714d4d22ccf6c14c544f84cba99b2bac55da88 100644
--- a/tensorflow/python/estimator/run_config.py
+++ b/tensorflow/python/estimator/run_config.py
@@ -54,35 +54,68 @@ _TASK_TYPE_KEY = 'type'
 _TASK_ID_KEY = 'index'
 _CLUSTER_KEY = 'cluster'
 _SERVICE_KEY = 'service'
+_SESSION_MASTER_KEY = 'session_master'
+_EVAL_SESSION_MASTER_KEY = 'eval_session_master'
+_MODEL_DIR_KEY = 'model_dir'
 _LOCAL_MASTER = ''
 _GRPC_SCHEME = 'grpc://'
 
 
-def _get_master(cluster_spec, task_type, task_id):
-  """Returns the appropriate string for the TensorFlow master."""
+def _get_session_master(cluster_spec, task_type, task_id, tf_config):
+  """Returns the appropriate address for TensorFlow master.
+
+  The order of precedence to deteremine the TF session master is as follows:
+  1. If `tf_session_master` is set in TF_CONFIG environment variable, takes it.
+  2. If the cluster has only one node, returns empty string ''.
+  3. Returns the grpc address according to the task type and id in the cluster.
+     This is between-graph replication.
+
+  Note: task_type and task_id must be validated. Typically, validated using
+  `_validate_task_type_and_task_id`.
+
+  Args:
+    cluster_spec: A `ClusterSpec` instance.
+    task_type: String. Task type for current node.
+    task_id: Int. Task id for current node.
+    tf_config: Dict. Python dict for the TF_CONFIG environment variable.
+
+  Raises:
+    RuntimeError: If `cluster_spec` is not set.
+
+  """
+  if _SESSION_MASTER_KEY in tf_config:
+    return tf_config[_SESSION_MASTER_KEY]
+
   if not cluster_spec:
-    raise RuntimeError(
-        'Internal error: `_get_master` does not expect empty cluster_spec.')
+    raise RuntimeError('Internal error: `_get_session_master` '
+                       'does not expect empty cluster_spec.')
 
   jobs = cluster_spec.jobs
+
+  # If there is only one node in the cluster, do things locally by setting
+  # master to ''.  If a service or user sets TF_CONFIG with a single node, it's
+  # more performant to use a direct master rather than an RPC service.
+  if len(jobs) == 1 and len(cluster_spec.job_tasks(jobs[0])) == 1:
+    return _LOCAL_MASTER
+
   # Lookup the master in cluster_spec using task_type and task_id,
   # if possible.
-  if task_type not in jobs:
-    raise ValueError(
-        '%s is not a valid task_type in the cluster_spec:\n'
-        '%s\n\n'
-        'Note that these values may be coming from the TF_CONFIG environment '
-        'variable.' % (task_type, cluster_spec))
   addresses = cluster_spec.job_tasks(task_type)
-  if not 0 <= task_id < len(addresses):
-    raise ValueError(
-        '%d is not a valid task_id for task_type %s in the cluster_spec:\n'
-        '%s\n\n'
-        'Note that these values may be coming from the TF_CONFIG environment '
-        'variable.' % (task_id, task_type, cluster_spec))
   return _GRPC_SCHEME + addresses[task_id]
 
 
+def _get_eval_session_master(task_type, tf_config):
+  """Returns the appropriate address for TensorFlow evaluation master."""
+  if task_type == TaskType.EVALUATOR:
+    return tf_config.get(_EVAL_SESSION_MASTER_KEY, _LOCAL_MASTER)
+
+  if _EVAL_SESSION_MASTER_KEY in tf_config:
+    raise ValueError('Key ({}) should not be set for task type other than {}. '
+                     'Task type: {}'.format(_EVAL_SESSION_MASTER_KEY,
+                                            TaskType.EVALUATOR, task_type))
+  return _LOCAL_MASTER
+
+
 def _count_ps(cluster_spec):
   """Counts the number of parameter servers in cluster_spec."""
   if not cluster_spec:
@@ -140,9 +173,56 @@ def _validate_task_type_and_task_id(cluster_spec, task_env, chief_task_type):
   # cluster spec, which will be checked later (when retrieving the `master`)
   if task_id < 0:
     raise ValueError('Task index must be non-negative number.')
+
+  # Evaluator is not part of the training cluster.
+  if task_type == TaskType.EVALUATOR:
+    return task_type, task_id
+
+  if task_type not in cluster_spec.jobs:
+    raise ValueError(
+        '%s is not a valid task_type in the cluster_spec:\n'
+        '%s\n\n'
+        'Note that these values may be coming from the TF_CONFIG environment '
+        'variable.' % (task_type, cluster_spec))
+  addresses = cluster_spec.job_tasks(task_type)
+  if not 0 <= task_id < len(addresses):
+    raise ValueError(
+        '%d is not a valid task_id for task_type %s in the cluster_spec:\n'
+        '%s\n\n'
+        'Note that these values may be coming from the TF_CONFIG environment '
+        'variable.' % (task_id, task_type, cluster_spec))
+
   return task_type, task_id
 
 
+def _get_global_id_in_cluster(
+    cluster_spec, task_type, task_id, chief_task_type):
+  """Returns the global id in cluster."""
+  # Note: This is implementation details, which user should not rely on.
+  # The first id is 0, which is always for the `chief` node. All other nodes,
+  # except `ps`, are ordered alphabetical based on task type (alphabetically)
+  # and task id (ascendingly). `ps` are ordered last.
+
+  # Sort task names in cluster
+  task_type_ordered_list = [chief_task_type]
+  task_type_ordered_list.extend([
+      t for t in sorted(cluster_spec.jobs)
+      if t != chief_task_type and t != TaskType.PS
+  ])
+  if TaskType.PS in cluster_spec.jobs:
+    task_type_ordered_list.append(TaskType.PS)
+
+  next_global_id = 0
+  for t in task_type_ordered_list:
+    if t == task_type:
+      return next_global_id + task_id
+    next_global_id += len(cluster_spec.job_tasks(t))
+
+  # This should never happen.
+  raise RuntimeError('Internal Error: `task_type` ({}) is not in '
+                     'cluster_spec ({}).'.format(task_type, cluster_spec))
+
+
 def _validate_save_ckpt_with_replaced_keys(new_copy, replaced_keys):
   """Validates the save ckpt properties."""
   # Ensure one (and only one) of save_steps and save_secs is not None.
@@ -358,6 +438,12 @@ class RunConfig(object):
           save_checkpoints_secs is not None):
       raise ValueError(_SAVE_CKPT_ERR)
 
+    tf_config = json.loads(os.environ.get(_TF_CONFIG_ENV, '{}'))
+    if tf_config:
+      logging.info('TF_CONFIG environment variable: %s', tf_config)
+
+    model_dir = _get_model_dir(tf_config, model_dir)
+
     RunConfig._replace(
         self,
         allowed_properties_list=_DEFAULT_REPLACEABLE_LIST,
@@ -371,14 +457,10 @@ class RunConfig(object):
         keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours,
         log_step_count_steps=log_step_count_steps)
 
-    self._init_distributed_setting_from_environment_var()
+    self._init_distributed_setting_from_environment_var(tf_config)
 
-  def _init_distributed_setting_from_environment_var(self):
-    """Initialize distributed properties based on environment variable."""
-
-    tf_config = json.loads(os.environ.get(_TF_CONFIG_ENV) or '{}')
-    if tf_config:
-      logging.info('TF_CONFIG environment variable: %s', tf_config)
+  def _init_distributed_setting_from_environment_var(self, tf_config):
+    """Initialize distributed properties based on `tf_config`."""
 
     self._service = _validate_service(tf_config.get(_SERVICE_KEY))
     self._cluster_spec = server_lib.ClusterSpec(tf_config.get(_CLUSTER_KEY, {}))
@@ -393,24 +475,34 @@ class RunConfig(object):
       self._task_type, self._task_id = _validate_task_type_and_task_id(
           self._cluster_spec, task_env, TaskType.CHIEF)
 
+      self._evaluation_master = _get_eval_session_master(
+          self._task_type, tf_config)
+
       if self._task_type != TaskType.EVALUATOR:
-        self._master = _get_master(
-            self._cluster_spec, self._task_type, self._task_id)
+        self._master = _get_session_master(self._cluster_spec, self._task_type,
+                                           self._task_id, tf_config)
         self._num_ps_replicas = _count_ps(self._cluster_spec)
         self._num_worker_replicas = _count_worker(
             self._cluster_spec, chief_task_type=TaskType.CHIEF)
+        self._global_id = _get_global_id_in_cluster(
+            self._cluster_spec,
+            self._task_type,
+            self._task_id,
+            chief_task_type=TaskType.CHIEF)
       else:
         # Evaluator is not part of the training cluster.
         self._cluster_spec = server_lib.ClusterSpec({})
         self._master = _LOCAL_MASTER
         self._num_ps_replicas = 0
         self._num_worker_replicas = 0
+        self._global_id = None  # undefined
 
       self._is_chief = self._task_type == TaskType.CHIEF
     else:
       # Local mode.
       self._task_type = task_env.get(_TASK_TYPE_KEY, TaskType.WORKER)
       self._task_id = int(task_env.get(_TASK_ID_KEY, 0))
+      self._global_id = 0
 
       if self._task_type != TaskType.WORKER:
         raise ValueError(
@@ -419,7 +511,9 @@ class RunConfig(object):
         raise ValueError(
             'If "cluster" is not set in TF_CONFIG, task index must be 0.')
 
-      self._master = ''
+      self._master = tf_config.get(_SESSION_MASTER_KEY, _LOCAL_MASTER)
+      self._evaluation_master = tf_config.get(_EVAL_SESSION_MASTER_KEY,
+                                              _LOCAL_MASTER)
       self._is_chief = True
       self._num_ps_replicas = 0
       self._num_worker_replicas = 1
@@ -443,8 +537,16 @@ class RunConfig(object):
       raise ValueError('If `master` node exists in `cluster`, task_type '
                        '`evaluator` is not supported.')
 
-    self._master = _get_master(
-        self._cluster_spec, self._task_type, self._task_id)
+    self._global_id = _get_global_id_in_cluster(
+        self._cluster_spec,
+        self._task_type,
+        self._task_id,
+        chief_task_type=TaskType.MASTER)
+
+    self._master = _get_session_master(self._cluster_spec, self._task_type,
+                                       self._task_id, tf_config)
+    self._evaluation_master = _get_eval_session_master(self._task_type,
+                                                       tf_config)
     self._num_ps_replicas = _count_ps(self._cluster_spec)
     self._num_worker_replicas = _count_worker(
         self._cluster_spec, chief_task_type=TaskType.MASTER)
@@ -457,7 +559,7 @@ class RunConfig(object):
 
   @property
   def evaluation_master(self):
-    return ''
+    return self._evaluation_master
 
   @property
   def is_chief(self):
@@ -479,6 +581,46 @@ class RunConfig(object):
   def task_id(self):
     return self._task_id
 
+  @property
+  def global_id_in_cluster(self):
+    """The global id in the training cluster.
+
+    All global ids in the training cluster are assigned from an increasing
+    sequence of consecutive integers. The first id is 0.
+
+    Note: Task id (the property field `task_id`) is tracking the index of the
+    node among all nodes with the SAME task type. For example, given the cluster
+    definition as follows:
+
+    ```
+      cluster = {'chief': ['host0:2222'],
+                 'ps': ['host1:2222', 'host2:2222'],
+                 'worker': ['host3:2222', 'host4:2222', 'host5:2222']}
+    ```
+
+    Nodes with task type `worker` can have id 0, 1, 2.  Nodes with task type
+    `ps` can have id, 0, 1. So, `task_id` is not unique, but the pair
+    (`task_type`, `task_id`) can uniquely determine a node in the cluster.
+
+    Global id, i.e., this field, is tracking the index of the node among ALL
+    nodes in the cluster. It is uniquely assigned.  For example, for the cluster
+    spec given above, the global ids are assigned as:
+    ```
+      task_type  | task_id  |  global_id
+      --------------------------------
+      chief      | 0        |  0
+      worker     | 0        |  1
+      worker     | 1        |  2
+      worker     | 2        |  3
+      ps         | 0        |  4
+      ps         | 1        |  5
+    ```
+
+    Returns:
+      An integer id.
+    """
+    return self._global_id
+
   @property
   def task_type(self):
     return self._task_type
@@ -593,3 +735,31 @@ class RunConfig(object):
     _validate_save_ckpt_with_replaced_keys(config, kwargs.keys())
     _validate_properties(config)
     return config
+
+
+def _get_model_dir(tf_config, model_dir):
+  """Returns `model_dir` based user provided `tf_config` or `model_dir`."""
+  # pylint: disable=g-explicit-bool-comparison
+
+  # Empty string is treated as False in Python condition check, which triggers
+  # some confusing error messages. For example, 'a or b' returns None if a is ''
+  # and b is None. `None` is allowed for model_dir but '' is not allowed. Here,
+  # explicitly check empty string to provide clear error message.
+  if model_dir == '':
+    raise ValueError('model_dir should be non-empty.')
+
+  model_dir_in_tf_config = tf_config.get('model_dir')
+  if model_dir_in_tf_config == '':
+    raise ValueError('model_dir in TF_CONFIG should be non-empty.')
+
+  if model_dir_in_tf_config:
+    if model_dir and model_dir_in_tf_config != model_dir:
+      raise ValueError(
+          '`model_dir` provided in RunConfig construct, if set, '
+          'must have the same value as the model_dir in TF_CONFIG. '
+          'model_dir: {}\nTF_CONFIG["model_dir"]: {}.\n'.format(
+              model_dir, model_dir_in_tf_config))
+
+    logging.info('Using model_dir in TF_CONFIG: %s', model_dir_in_tf_config)
+
+  return model_dir or model_dir_in_tf_config
diff --git a/tensorflow/python/estimator/run_config_test.py b/tensorflow/python/estimator/run_config_test.py
index ecc850d5405837e8bf803b9a7c8c156ff19b7a90..a3eef4c53fd90a1ce69f3067d0b5c15909f43cec 100644
--- a/tensorflow/python/estimator/run_config_test.py
+++ b/tensorflow/python/estimator/run_config_test.py
@@ -31,6 +31,10 @@ _SAVE_CKPT_ERR = (
     '`save_checkpoints_steps` and `save_checkpoints_secs` cannot be both set.'
 )
 _MODEL_DIR_ERR = 'model_dir should be non-empty'
+_MODEL_DIR_TF_CONFIG_ERR = 'model_dir in TF_CONFIG should be non-empty'
+_MODEL_DIR_MISMATCH_ERR = (
+    '`model_dir` provided in RunConfig construct, if set, '
+    'must have the same value as the model_dir in TF_CONFIG. ')
 _SAVE_SUMMARY_STEPS_ERR = 'save_summary_steps should be >= 0'
 _SAVE_CKPT_STEPS_ERR = 'save_checkpoints_steps should be >= 0'
 _SAVE_CKPT_SECS_ERR = 'save_checkpoints_secs should be >= 0'
@@ -40,6 +44,8 @@ _KEEP_CKPT_HOURS_ERR = 'keep_checkpoint_every_n_hours should be > 0'
 _TF_RANDOM_SEED_ERR = 'tf_random_seed must be integer'
 _ONE_CHIEF_ERR = 'The "cluster" in TF_CONFIG must have only one "chief" node.'
 _ONE_MASTER_ERR = 'The "cluster" in TF_CONFIG must have only one "master" node.'
+_INVALID_TASK_TYPE_FOR_EVAL_MASTER = (
+    'Key.*eval.*master.*should not be set for task type other than')
 _MISSING_CHIEF_ERR = 'If "cluster" is set .* it must have one "chief" node'
 _MISSING_TASK_TYPE_ERR = 'If "cluster" is set .* task type must be set'
 _MISSING_TASK_ID_ERR = 'If "cluster" is set .* task index must be set'
@@ -256,8 +262,9 @@ class RunConfigDistributedSettingTest(test.TestCase):
             'index': 0
         }
     }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
     self._assert_distributed_properties(
-        run_config=_create_run_config_with_cluster_spec(tf_config),
+        run_config=run_config,
         expected_cluster_spec={},
         expected_task_type=run_config_lib.TaskType.WORKER,
         expected_task_id=0,
@@ -266,6 +273,33 @@ class RunConfigDistributedSettingTest(test.TestCase):
         expected_is_chief=True,
         expected_num_worker_replicas=1,
         expected_num_ps_replicas=0)
+    self.assertEqual(0, run_config.global_id_in_cluster)
+
+  def test_session_master_for_local(self):
+    tf_config = {'session_master': '_my_master'}
+    self._assert_distributed_properties(
+        run_config=_create_run_config_with_cluster_spec(tf_config),
+        expected_cluster_spec={},
+        expected_task_type=run_config_lib.TaskType.WORKER,
+        expected_task_id=0,
+        expected_master='_my_master',
+        expected_evaluation_master='',
+        expected_is_chief=True,
+        expected_num_worker_replicas=1,
+        expected_num_ps_replicas=0)
+
+  def test_eval_session_master_for_local(self):
+    tf_config = {'eval_session_master': '_my_eval_master'}
+    self._assert_distributed_properties(
+        run_config=_create_run_config_with_cluster_spec(tf_config),
+        expected_cluster_spec={},
+        expected_task_type=run_config_lib.TaskType.WORKER,
+        expected_task_id=0,
+        expected_master='',
+        expected_evaluation_master='_my_eval_master',
+        expected_is_chief=True,
+        expected_num_worker_replicas=1,
+        expected_num_ps_replicas=0)
 
   def test_invalid_task_type_for_local(self):
     tf_config = {
@@ -310,6 +344,50 @@ class RunConfigDistributedSettingTest(test.TestCase):
         expected_num_worker_replicas=4,
         expected_num_ps_replicas=2)
 
+  def test_session_master_from_single_node_tf_config(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+        },
+        'task': {
+            'type': run_config_lib.TaskType.CHIEF,
+            'index': 0
+        },
+        'session_master': '_my_master'
+    }
+    self.assertEqual('_my_master',
+                     _create_run_config_with_cluster_spec(tf_config).master)
+
+  def test_session_master_from_multiple_nodes_tf_config(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
+        },
+        'task': {
+            'type': run_config_lib.TaskType.CHIEF,
+            'index': 0
+        },
+        'session_master': '_my_master'
+    }
+    self.assertEqual('_my_master',
+                     _create_run_config_with_cluster_spec(tf_config).master)
+
+  def test_fail_with_eval_session_master_for_non_evaluator(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+        },
+        'task': {
+            'type': run_config_lib.TaskType.CHIEF,
+            'index': 0
+        },
+        'eval_session_master': 'grpc://123',
+    }
+    with self.assertRaisesRegexp(
+        ValueError, _INVALID_TASK_TYPE_FOR_EVAL_MASTER):
+      _create_run_config_with_cluster_spec(tf_config)
+
   def test_fail_with_multiple_chief_nodes(self):
     tf_config = {
         'cluster': {
@@ -344,7 +422,7 @@ class RunConfigDistributedSettingTest(test.TestCase):
         expected_cluster_spec=tf_config['cluster'],
         expected_task_type=run_config_lib.TaskType.CHIEF,
         expected_task_id=0,
-        expected_master='grpc://host0:0',
+        expected_master='',
         expected_evaluation_master='',
         expected_is_chief=True,
         expected_num_worker_replicas=1,
@@ -468,8 +546,9 @@ class RunConfigDistributedSettingTest(test.TestCase):
             'index': 12
         }
     }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
     self._assert_distributed_properties(
-        run_config=_create_run_config_with_cluster_spec(tf_config),
+        run_config=run_config,
         expected_cluster_spec={},
         expected_task_type=run_config_lib.TaskType.EVALUATOR,
         expected_task_id=12,
@@ -478,6 +557,23 @@ class RunConfigDistributedSettingTest(test.TestCase):
         expected_is_chief=False,  # evaluator is never chief.
         expected_num_worker_replicas=0,  # evaluator is not in training cluster.
         expected_num_ps_replicas=0)
+    self.assertIsNone(run_config.global_id_in_cluster)
+
+  def test_eval_master_for_evaluator(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.EVALUATOR,
+            'index': 12
+        },
+        'eval_session_master': 'grpc://123',
+    }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self.assertEqual('grpc://123', run_config.evaluation_master)
 
   def test_fail_with_invalid_task_index_for_evaluator(self):
     tf_config = {
@@ -492,6 +588,71 @@ class RunConfigDistributedSettingTest(test.TestCase):
     with self.assertRaisesRegexp(ValueError, _NEGATIVE_TASK_INDEX_ERR):
       _create_run_config_with_cluster_spec(tf_config)
 
+  def test_global_id_in_cluster_for_chief(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'],
+            run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.CHIEF,
+            'index': 0,
+        },
+    }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self.assertEqual(0, run_config.global_id_in_cluster)
+
+  def test_global_id_in_cluster_for_worker(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'],
+            run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.WORKER,
+            'index': 2,
+        },
+    }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self.assertEqual(3, run_config.global_id_in_cluster)
+
+  def test_global_id_in_cluster_for_ps(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'],
+            run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.PS,
+            'index': 1,
+        },
+    }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self.assertEqual(5, run_config.global_id_in_cluster)
+
+  def test_global_id_in_cluster_for_multipe_worker_types(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+            'worker': ['host3:3', 'host4:4', 'host5:5'],
+            'other_type': ['host3:1', 'host4:2'],
+            run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5']
+        },
+        'task': {
+            'type': 'other_type',
+            'index': 1,
+        },
+    }
+    # Though 'other_type' is defined after 'worker', based on alphabetical
+    # order, the task type order should be 'chief', 'other_type', 'worker',
+    # 'ps', where 'chief' and 'ps' are predefined to be the top and last in the
+    # order list.
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self.assertEqual(2, run_config.global_id_in_cluster)
+
 
 class RunConfigDistributedSettingWithMasterTest(test.TestCase):
 
@@ -524,7 +685,7 @@ class RunConfigDistributedSettingWithMasterTest(test.TestCase):
     with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TYPE_FOR_LOCAL_ERR):
       _create_run_config_with_cluster_spec(tf_config)
 
-  def test_master_tf_config(self):
+  def test_master_node(self):
     tf_config = {
         'cluster': {
             run_config_lib.TaskType.MASTER: ['host0:0'],
@@ -547,6 +708,50 @@ class RunConfigDistributedSettingWithMasterTest(test.TestCase):
         expected_num_worker_replicas=4,
         expected_num_ps_replicas=2)
 
+  def test_session_master_in_single_node_tf_config(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host0:0'],
+        },
+        'task': {
+            'type': run_config_lib.TaskType.MASTER,
+            'index': 0
+        },
+        'session_master': '_my_master'
+    }
+    self.assertEqual('_my_master',
+                     _create_run_config_with_cluster_spec(tf_config).master)
+
+  def test_session_master_in_multiple_nodes_tf_config(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host0:0'],
+            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
+        },
+        'task': {
+            'type': run_config_lib.TaskType.MASTER,
+            'index': 0
+        },
+        'session_master': '_my_master'
+    }
+    self.assertEqual('_my_master',
+                     _create_run_config_with_cluster_spec(tf_config).master)
+
+  def test_fail_with_eval_session_master(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host0:0'],
+        },
+        'task': {
+            'type': run_config_lib.TaskType.MASTER,
+            'index': 0
+        },
+        'eval_session_master': 'grpc://123',
+    }
+    with self.assertRaisesRegexp(
+        ValueError, _INVALID_TASK_TYPE_FOR_EVAL_MASTER):
+      _create_run_config_with_cluster_spec(tf_config)
+
   def test_fail_with_multiple_master_nodes(self):
     tf_config = {
         'cluster': {
@@ -572,7 +777,7 @@ class RunConfigDistributedSettingWithMasterTest(test.TestCase):
         expected_cluster_spec=tf_config['cluster'],
         expected_task_type=run_config_lib.TaskType.MASTER,
         expected_task_id=0,
-        expected_master='grpc://host0:0',
+        expected_master='',
         expected_evaluation_master='',
         expected_is_chief=True,
         expected_num_worker_replicas=1,
@@ -716,6 +921,71 @@ class RunConfigDistributedSettingWithMasterTest(test.TestCase):
                                  _INVALID_CHIEF_IN_CLUSTER_WITH_MASTER_ERR):
       _create_run_config_with_cluster_spec(tf_config)
 
+  def test_global_id_in_cluster_for_master(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host0:0'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'],
+            run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.MASTER,
+            'index': 0,
+        },
+    }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self.assertEqual(0, run_config.global_id_in_cluster)
+
+  def test_global_id_in_cluster_for_worker(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host0:0'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'],
+            run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.WORKER,
+            'index': 2,
+        },
+    }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self.assertEqual(3, run_config.global_id_in_cluster)
+
+  def test_global_id_in_cluster_for_ps(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host0:0'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'],
+            run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.PS,
+            'index': 1,
+        },
+    }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self.assertEqual(5, run_config.global_id_in_cluster)
+
+  def test_global_id_in_cluster_for_multipe_worker_types(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host0:0'],
+            'worker': ['host3:3', 'host4:4', 'host5:5'],
+            'other_type': ['host3:1', 'host4:2'],
+            run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5']
+        },
+        'task': {
+            'type': 'other_type',
+            'index': 1,
+        },
+    }
+    # Though 'other_type' is defined after 'worker', based on alphabetical
+    # order, the task type order should be 'chief', 'other_type', 'worker',
+    # 'ps', where 'chief' and 'ps' are predefined to be the top and last in the
+    # order list.
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self.assertEqual(2, run_config.global_id_in_cluster)
+
 
 class RunConfigSaveCheckpointsTest(test.TestCase):
 
@@ -793,5 +1063,45 @@ class RunConfigServiceKeyTest(test.TestCase):
       _create_run_config_with_cluster_spec(tf_config)
 
 
+class RunConfigModelDirTest(test.TestCase):
+
+  def test_default(self):
+    run_config = run_config_lib.RunConfig()
+    self.assertIsNone(run_config.model_dir)
+
+  def test_model_dir_in_constructor(self):
+    run_config = run_config_lib.RunConfig(model_dir='/tmp/123')
+    self.assertEqual('/tmp/123', run_config.model_dir)
+
+  def test_model_dir_in_tf_config(self):
+    tf_config = {
+        'model_dir': '/tmp/123',
+    }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self.assertEqual('/tmp/123', run_config.model_dir)
+
+  def test_model_dir_both_set_in_both_constructor_and_tf_config(self):
+    model_dir = '/tmp/123'
+    tf_config = {'model_dir': model_dir}
+    kwargs = {'model_dir': model_dir}
+    run_config = _create_run_config_with_cluster_spec(tf_config, **kwargs)
+    self.assertEqual('/tmp/123', run_config.model_dir)
+
+  def test_model_dir_different_in_both_constructor_and_tf_config(self):
+    tf_config = {'model_dir': '/tmp/123'}
+    kwargs = {'model_dir': '/tmp/456'}
+    with self.assertRaisesRegexp(ValueError, _MODEL_DIR_MISMATCH_ERR):
+      _create_run_config_with_cluster_spec(tf_config, **kwargs)
+
+  def test_fail_with_empty_string_in_constructor(self):
+    with self.assertRaisesRegexp(ValueError, _MODEL_DIR_ERR):
+      run_config_lib.RunConfig(model_dir='')
+
+  def test_fail_with_empty_string_in_tf_config(self):
+    with self.assertRaisesRegexp(ValueError, _MODEL_DIR_TF_CONFIG_ERR):
+      tf_config = {'model_dir': ''}
+      _create_run_config_with_cluster_spec(tf_config)
+
+
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/estimator/training.py b/tensorflow/python/estimator/training.py
index 1131995b3ef1a832c3312d27a46d8395d62cecc7..52fb1d39ae2e9c84e4269785a72be4f9a495b73c 100644
--- a/tensorflow/python/estimator/training.py
+++ b/tensorflow/python/estimator/training.py
@@ -43,6 +43,8 @@ _DELAY_SECS_PER_WORKER = 5
 _TF_CONFIG_ENV = 'TF_CONFIG'
 _ENVIRONMENT_KEY = 'environment'
 _ENVIRONMENT_GOOGLE_VALUE = 'google'
+_TRAINER_JOBS = (run_config_lib.TaskType.CHIEF, run_config_lib.TaskType.MASTER,
+                 run_config_lib.TaskType.WORKER)
 
 
 def _validate_input_fn(input_fn):
@@ -415,52 +417,17 @@ def train_and_evaluate(estimator, train_spec, eval_spec):
   Raises:
     ValueError: if environment variable `TF_CONFIG` is incorrectly set.
   """
-
-  if not isinstance(estimator, estimator_lib.Estimator):
-    raise TypeError('`estimator` must have type `tf.estimator.Estimator`, '
-                    'given {}'.format(type(estimator)))
-  config = estimator.config
-
   executor = _TrainingExecutor(estimator=estimator, train_spec=train_spec,
                                eval_spec=eval_spec)
 
-  if (not config.cluster_spec and
-      config.task_type != run_config_lib.TaskType.EVALUATOR):
-    logging.info('Running training and evaluation locally (non-distributed).')
-    executor.run_local()
-    return
-
-  # Distributed case.
-  if not config.task_type:
-    # TODO(xiejw): Improve the error message about how to set the TF_CONFIG
-    # correctly.
-    raise ValueError(
-        '`estimator.config` must have task_type set. This usually means '
-        'TF_CONFIG environment is not set correctly.')
-
-  if config.task_type == 'local':
-    raise ValueError(
-        '`task.type` in TF_CONFIG cannot be `local`. Leaving `cluster` and '
-        '`task` properties in TF_CONFIG absent triggers train and evaluate '
-        '`Estimator` locally (non-distributed).')
-
+  config = estimator.config
   if (config.task_type == run_config_lib.TaskType.EVALUATOR and
       config.task_id > 0):
     raise ValueError(
         'For distributed training, there can only be one `evaluator` task '
         '(with task id 0).  Given task id {}'.format(config.task_id))
 
-  # For task type foo, call executor.run_foo.
-  available_tasks = [x for x in dir(executor) if x.startswith('run_')
-                     and x != 'run_local'
-                     and callable(getattr(executor, x))]
-  task_to_run = 'run_' + config.task_type
-  if task_to_run not in available_tasks:
-    raise ValueError(
-        'Task type {} is not supported. Supported task types are {}'.format(
-            config.task_type, [x[len('run_'):] for x in available_tasks]))
-  getattr(executor, task_to_run)()
-  return
+  executor.run()
 
 
 class _StopAtSecsHook(session_run_hook.SessionRunHook):
@@ -486,7 +453,12 @@ class _TrainingExecutor(object):
   training and evaluation based on the setting in `tf.estimator.RunConfig`.
   """
 
-  def __init__(self, estimator, train_spec, eval_spec):
+  def __init__(self,
+               estimator,
+               train_spec,
+               eval_spec,
+               train_hooks=None,
+               continuous_eval_listener=None):
     if not isinstance(estimator, estimator_lib.Estimator):
       raise TypeError('`estimator` must have type `tf.estimator.Estimator`.')
     self._estimator = estimator
@@ -499,10 +471,65 @@ class _TrainingExecutor(object):
       raise TypeError('`eval_spec` must have type `tf.estimator.EvalSpec`.')
     self._eval_spec = eval_spec
 
+    self._train_hooks = _validate_hooks(train_hooks)
+
+    if (continuous_eval_listener and
+        not isinstance(continuous_eval_listener, _ContinuousEvalListener)):
+      raise TypeError('`continuous_eval_listener` must have type '
+                      '`_ContinuousEvalListener`.')
+    self._continuous_eval_listener = (
+        continuous_eval_listener or _ContinuousEvalListener())
+
   @property
   def estimator(self):
     return self._estimator
 
+  def run(self):
+    """Executes the run_foo for task type `foo`.
+
+    `_TrainingExecutor` predefines the procedure for task type 'chief',
+    'worker', 'ps', and 'evaluator'. For task type `foo`, the corresponding
+    procedure is `run_foo'. This `run` method invoke the procedure base on the
+    `RunConfig.task_type`.
+
+    Raises:
+      ValueError: if the estimator.config is mis-configured.
+    """
+    config = self._estimator.config
+
+    if (not config.cluster_spec and
+        config.task_type != run_config_lib.TaskType.EVALUATOR):
+      logging.info('Running training and evaluation locally (non-distributed).')
+      self.run_local()
+      return
+
+    # Distributed case.
+    if not config.task_type:
+      # TODO(xiejw): Improve the error message about how to set the TF_CONFIG
+      # correctly.
+      raise ValueError(
+          '`estimator.config` must have task_type set. This usually means '
+          'TF_CONFIG environment is not set correctly.')
+
+    if config.task_type == 'local':
+      raise ValueError(
+          '`task.type` in TF_CONFIG cannot be `local`. Leaving `cluster` and '
+          '`task` properties in TF_CONFIG absent triggers train and evaluate '
+          '`Estimator` locally (non-distributed).')
+
+    # For task type foo, call executor.run_foo.
+    available_tasks = [
+        x for x in dir(self)
+        if x.startswith('run_') and x != 'run_local' and
+        callable(getattr(self, x))
+    ]
+    task_to_run = 'run_' + config.task_type
+    if task_to_run not in available_tasks:
+      raise ValueError(
+          'Task type {} is not supported. Supported task types are {}'.format(
+              config.task_type, [x[len('run_'):] for x in available_tasks]))
+    getattr(self, task_to_run)()
+
   def run_chief(self):
     """Runs task chief."""
     # TODO(xiejw): To allow execution framework to add train hooks.
@@ -594,7 +621,8 @@ class _TrainingExecutor(object):
                            self._eval_spec.throttle_secs))
 
     stop_hook = _StopAtSecsHook(self._eval_spec.throttle_secs)
-    train_hooks = list(self._train_spec.hooks) + [stop_hook]
+    train_hooks = (
+        list(self._train_spec.hooks) + [stop_hook] + list(self._train_hooks))
     logging.info('Start train and evaluate loop. The evaluate will happen '
                  'after {} secs (eval_spec.throttle_secs) or training is '
                  'finished.'.format(self._eval_spec.throttle_secs))
@@ -613,27 +641,55 @@ class _TrainingExecutor(object):
       # _should_stop_local_train will then end the while True as the stopping
       # condition is satisfied (both checks use the same global_step value,
       # i.e., no race condition)
-      metrics = evaluator.evaluate_and_export()
+      eval_result = evaluator.evaluate_and_export()
 
-      if not metrics:
-        #  This is unexpected. Training should always end with a new checkpoint.
-        raise RuntimeError('There was no new checkpoint after the training.')
+      if eval_result.status != _EvalStatus.EVALUATED:
+        #  This is unexpected; should never happen.
+        #  Training should always end with a new checkpoint.
+        raise RuntimeError('There was no new checkpoint after the training. '
+                           'Eval status: {}'.format(eval_result.status))
 
-      if _should_stop_local_train(metrics[ops.GraphKeys.GLOBAL_STEP]):
+      if _should_stop_local_train(
+          eval_result.metrics[ops.GraphKeys.GLOBAL_STEP]):
         break
 
   def _start_std_server(self, config):
     """Creates, starts, and returns a server_lib.Server."""
-    if (not config.cluster_spec or not config.task_type or not config.master or
+    if (not config.cluster_spec or not config.task_type or
         config.task_id is None):
       raise RuntimeError('Could not start server; be sure to specify '
-                         'cluster_spec, task_type, master, and task in '
+                         'cluster_spec, task_type, and task in '
                          'RunConfig or set the TF_CONFIG environment variable.')
+
+    if not config.master:
+      jobs = config.cluster_spec.jobs
+      if (len(jobs) == 1 and len(config.cluster_spec.job_tasks(jobs[0])) == 1
+          and config.task_type in _TRAINER_JOBS):
+        # For distributed training, config.master is empty if and only if it has
+        # a single node in the cluster spec. In this case, we should not start
+        # the server.
+        logging.info('Skip starting Tensorflow server as there is only one '
+                     'node in the cluster.')
+        return
+      else:
+        raise RuntimeError(
+            'Could not start server; be sure to specify master in '
+            'RunConfig or set the TF_CONFIG environment variable.')
+
+    logging.info('Start Tensorflow server.')
+
+    if config.session_config is None:
+      session_config=config_pb2.ConfigProto(log_device_placement=False)
+    else:
+      session_config=config_pb2.ConfigProto(
+          log_device_placement=False,
+          gpu_options=config.session_config.gpu_options)
+
     server = server_lib.Server(
         config.cluster_spec,
         job_name=config.task_type,
         task_index=config.task_id,
-        config=config_pb2.ConfigProto(log_device_placement=False),
+        config=session_config,
         start=False)
     server.start()
     return server
@@ -662,10 +718,11 @@ class _TrainingExecutor(object):
                    start_delay_secs)
       time.sleep(start_delay_secs)
 
-    self._estimator.train(input_fn=self._train_spec.input_fn,
-                          max_steps=self._train_spec.max_steps,
-                          hooks=self._train_spec.hooks,
-                          saving_listeners=saving_listeners)
+    self._estimator.train(
+        input_fn=self._train_spec.input_fn,
+        max_steps=self._train_spec.max_steps,
+        hooks=list(self._train_spec.hooks) + list(self._train_hooks),
+        saving_listeners=saving_listeners)
 
   def _start_continuous_evaluation(self):
     """Repeatedly calls `Estimator` evaluate and export until training ends."""
@@ -678,9 +735,11 @@ class _TrainingExecutor(object):
     evaluator = _TrainingExecutor._Evaluator(self._estimator, self._eval_spec,
                                              self._train_spec.max_steps)
 
-    while True:
-      if latest_eval_result:
-        global_step = latest_eval_result.get(ops.GraphKeys.GLOBAL_STEP)
+    should_early_stop = False
+    while not should_early_stop:
+      if (latest_eval_result and
+          latest_eval_result.status == _EvalStatus.EVALUATED):
+        global_step = latest_eval_result.metrics.get(ops.GraphKeys.GLOBAL_STEP)
         if (global_step and self._train_spec.max_steps and
             global_step >= self._train_spec.max_steps):
           logging.info(
@@ -689,21 +748,46 @@ class _TrainingExecutor(object):
               self._train_spec.max_steps)
           return
 
-      # Final export signal: For any eval result with global_step >= train
-      # max_steps, the evaluator will send the final export signal. The next
-      # iteration of while loop will end the continuous eval as the stopping
-      # condition is satisfied (both checks use the same global_step value,
-      # i.e., no race condition)
-      start = time.time()
-      latest_eval_result = evaluator.evaluate_and_export()
+      latest_eval_result, should_early_stop = self._execute_evaluator_once(
+          evaluator, self._continuous_eval_listener,
+          self._eval_spec.throttle_secs)
+
+  def _execute_evaluator_once(self, evaluator, continuous_eval_listener,
+                              throttle_secs):
+    """Executes the `evaluator`."""
+    start = time.time()
 
-      # Throttle if necessary.
-      elapsed_time = time.time() - start
-      difference = self._eval_spec.throttle_secs  - elapsed_time
-      if difference > 0:
-        logging.info('Waiting %f secs before starting next eval run.',
-                     difference)
-        time.sleep(difference)
+    eval_result = None
+    should_early_stop = False
+
+    if not continuous_eval_listener.before_eval():
+      logging.info('Exiting evaluation, as requested by '
+                   '_ContinuousEvalListener.before_eval.')
+      should_early_stop = True
+      return (eval_result, should_early_stop)
+
+    # Final export signal: For any eval result with global_step >= train
+    # max_steps, the evaluator will send the final export signal. The next
+    # iteration of while loop will end the continuous eval as the stopping
+    # condition is satisfied (both checks use the same global_step value,
+    # i.e., no race condition)
+    eval_result = evaluator.evaluate_and_export()
+
+    if not self._continuous_eval_listener.after_eval(eval_result):
+      logging.info('Exiting evaluation, as requested by '
+                   '_ContinuousEvalListener.after_eval.')
+      should_early_stop = True
+      return (eval_result, should_early_stop)
+
+    # Throttle if necessary.
+    elapsed_time = time.time() - start
+    difference = throttle_secs  - elapsed_time
+    if difference > 0:
+      logging.info('Waiting %f secs before starting next eval run.',
+                   difference)
+      time.sleep(difference)
+
+    return (eval_result, should_early_stop)
 
   class _Evaluator(object):
     """A helper class to call `Estimator.evaluate` and export model."""
@@ -724,8 +808,7 @@ class _TrainingExecutor(object):
       """Evaluate and (maybe) export the current model.
 
       Returns:
-        Evaluation results. Returns `None` if current round of evaluation is
-        skipped.
+        An `EvalResult` instance.
 
       Raises:
         RuntimeError: for any unexpected internal error.
@@ -735,39 +818,32 @@ class _TrainingExecutor(object):
       if not latest_ckpt_path:
         self._log_err_msg('Estimator is not trained yet. Will start an '
                           'evaluation when a checkpoint is ready.')
-        return None
+        return _EvalResult(status=_EvalStatus.MISSING_CHECKPOINT)
 
       if latest_ckpt_path == self._previous_ckpt_path:
         self._log_err_msg(
             'No new checkpoint ready for evaluation. Skip the current '
             'evaluation pass as evaluation results are expected to be same '
             'for the same checkpoint.')
-        return None
-      eval_result = self._estimator.evaluate(
+        return _EvalResult(status=_EvalStatus.NO_NEW_CHECKPOINT)
+
+      metrics = self._estimator.evaluate(
           input_fn=self._eval_spec.input_fn,
           steps=self._eval_spec.steps,
           name=self._eval_spec.name,
           checkpoint_path=latest_ckpt_path,
           hooks=self._eval_spec.hooks)
 
-      if not eval_result:
-        raise RuntimeError(
-            'Internal error: `Estimator.evaluate` should never return empty '
-            'result.')
-      if not isinstance(eval_result, dict):
-        raise TypeError(
-            '`Estimator.evaluate` should return dict. Given {}.'.format(
-                type(eval_result)))
-      if ops.GraphKeys.GLOBAL_STEP not in eval_result:
-        raise RuntimeError(
-            'Internal error: `Estimator.evaluate` result should have '
-            '`global_step` in result. Given {}'.format(eval_result))
+      # _EvalResult validates the metrics.
+      eval_result = _EvalResult(
+          status=_EvalStatus.EVALUATED,
+          metrics=metrics,
+          checkpoint_path=latest_ckpt_path)
 
-      is_the_final_export = (eval_result[ops.GraphKeys.GLOBAL_STEP] >=
-                             self._max_training_steps
-                             if self._max_training_steps else False)
-      self._export_eval_result(eval_result, latest_ckpt_path,
-                               is_the_final_export)
+      is_the_final_export = (
+          eval_result.metrics[ops.GraphKeys.GLOBAL_STEP] >=
+          self._max_training_steps if self._max_training_steps else False)
+      self._export_eval_result(eval_result, is_the_final_export)
 
       if is_the_final_export:
         logging.debug('Calling exporter with the `is_the_final_export=True`.')
@@ -784,8 +860,7 @@ class _TrainingExecutor(object):
         logging.warning(message)
         self._last_warning_time = current_time
 
-    def _export_eval_result(self, eval_result, checkpoint_path,
-                            is_the_final_export):
+    def _export_eval_result(self, eval_result, is_the_final_export):
       """Export `eval_result` according to exporters in `EvalSpec`."""
       export_dir_base = os.path.join(
           compat.as_str_any(self._estimator.model_dir),
@@ -797,6 +872,114 @@ class _TrainingExecutor(object):
             export_path=os.path.join(
                 compat.as_str_any(export_dir_base),
                 compat.as_str_any(exporter.name)),
-            checkpoint_path=checkpoint_path,
-            eval_result=eval_result,
+            checkpoint_path=eval_result.checkpoint_path,
+            eval_result=eval_result.metrics,
             is_the_final_export=is_the_final_export)
+
+
+class _EvalStatus(object):
+  """The status of an evaluation event.
+
+  For local training and evaluation, the status can only be `EVALUATED` as
+  `Estimator.train` always generates a new checkpoint.
+
+  For distributed training and evaluation, a separated evaluator keeps looking
+  for new checkpoint. So, multiple situations might occur:
+
+  - EVALUATED: A new checkpoint is found since last evaluation.
+      `Estimator.evaluate` will be invoked.
+  - MISSING_CHECKPOINT: No checkpoint can be found. Typically, this means
+      the trainer has not yet produced any checkpoint.
+  - NO_NEW_CHECKPOINT: No new checkpoint can be found since last evaluation.
+      Typically, this means the trainer has not yet produced any new checkpoint.
+  """
+
+  EVALUATED = 'evaluated'
+  MISSING_CHECKPOINT = 'missing checkpoint'
+  NO_NEW_CHECKPOINT = 'no new checkpoint'
+
+
+class _EvalResult(
+    collections.namedtuple('EvalResult',
+                           ['status', 'metrics', 'checkpoint_path'])):
+  """_EvalResult holds the result of an evaluation event."""
+
+  def __new__(cls, status, metrics=None, checkpoint_path=None):
+    """Creates a validated `_EvalResult`.
+
+    Args:
+      status: See `_EvalStatus`.
+      metrics: The evaluation results returned by `Estimator.evaluate`. Only set
+          if status is `EVALUATED`.
+      checkpoint_path: The corresponding checkpoint path for the `metrics`. Only
+          set if status is `EVALUATED`.
+    Returns:
+      A validated `_EvalResult` object.
+
+    Raises:
+      ValueError: If validation fails.
+      TypeError: If any of the arguments is not the expected type.
+    """
+
+    if status != _EvalStatus.EVALUATED:
+      if metrics:
+        raise ValueError(
+            'metrics must be `None` if status is not {}; got status {},'
+            ' metrics {}'.format(_EvalStatus.EVALUATED, status, metrics))
+      if checkpoint_path:
+        raise ValueError(
+            'checkpoint must be `None` if status is not {}; got status {}, '
+            'checkpoint_path {}'.format(
+                _EvalStatus.EVALUATED, status, checkpoint_path))
+      return super(_EvalResult, cls).__new__(cls, status, metrics,
+                                             checkpoint_path)
+
+    # Now, evaluated case.
+    assert status == _EvalStatus.EVALUATED
+
+    # Validates metrics.
+    if not metrics:
+      raise ValueError(
+          'Internal error: `Estimator.evaluate` should never return empty '
+          'metrics.')
+    if not isinstance(metrics, dict):
+      raise TypeError(
+          '`Estimator.evaluate` should return dict. Given {}.'.format(
+              type(metrics)))
+    if ops.GraphKeys.GLOBAL_STEP not in metrics:
+      raise ValueError(
+          'Internal error: `Estimator.evaluate` result should have '
+          '`global_step` in result. Given {}'.format(metrics))
+
+    # Validates checkpoint_path.
+    if not checkpoint_path:
+      raise ValueError(
+          'Internal error: `checkpoint_path` should never be empty.')
+
+    return super(_EvalResult, cls).__new__(cls, status, metrics,
+                                           checkpoint_path)
+
+
+class _ContinuousEvalListener(object):
+  """Interface for listeners that take action before or after evaluation."""
+
+  def before_eval(self):
+    """Called before evaluation.
+
+    Returns:
+      `False` if you want to skip the current evaluation and early stop the
+      continuous evaluation; `True` otherwise.
+    """
+    return True
+
+  def after_eval(self, eval_result):
+    """Called after the evaluation is executed.
+
+    Args:
+      eval_result: An `_EvalResult` instance.
+
+    Returns:
+      False if you want to early stop continuous evaluation; `True` otherwise.
+    """
+    del eval_result
+    return True
diff --git a/tensorflow/python/estimator/training_test.py b/tensorflow/python/estimator/training_test.py
index 17d018aa8808266c273a282e4042817d1368bdfe..4f7da848086514b6241799645997c8c6a246631f 100644
--- a/tensorflow/python/estimator/training_test.py
+++ b/tensorflow/python/estimator/training_test.py
@@ -72,6 +72,7 @@ _NONE_EXPORTER_NAME_MSG = (
     'An Exporter cannot have a name that is `None` or empty.')
 _INVALID_TRAIN_SPEC_MSG = '`train_spec` must have type `tf.estimator.TrainSpec`'
 _INVALID_EVAL_SPEC_MSG = '`eval_spec` must have type `tf.estimator.EvalSpec`'
+_INVALID_EVAL_LISTENER_MSG = 'must have type `_ContinuousEvalListener`'
 _INVALID_CONFIG_FOR_STD_SERVER_MSG = 'Could not start server; .*TF_CONFIG'
 _INVALID_LOCAL_TASK_WITH_CLUSTER = '`task.type` in TF_CONFIG cannot be `local`'
 _INVALID_TASK_TYPE = '`estimator.config` must have task_type set.'
@@ -81,7 +82,7 @@ _INVALID_TASK_TYPE = '`estimator.config` must have task_type set.'
 _INVALID_TASK_TO_RUN = (
     'Task type .* is not supported. Supported task types are ((?!local).)*$')
 _INVALID_EMPTY_EVAL_RESULT_ERR = (
-    'Internal error: `Estimator.evaluate` should never return empty result')
+    'Internal error: `Estimator.evaluate` should never return empty metrics')
 _INVALID_EVAL_RESULT_TYPE_ERR = '`Estimator.evaluate` should return dict.'
 _MISSING_GLOBAL_STEP_IN_EVAL_RESULT_ERR = (
     'Internal error: `Estimator.evaluate` result should have `global_step`')
@@ -311,61 +312,21 @@ class EvalSpecTest(test.TestCase):
       training.EvalSpec(input_fn=lambda: 1, exporters=_create_exporter(None))
 
 
-class TrainAndEvaluteTest(test.TestCase):
+class TrainAndEvaluateTest(test.TestCase):
 
-  def _mock_executor_instance(self):
-    mock_instance = test.mock.Mock()
-    mock_instance.call_task = {}
-
-    def task_fn(name):
-      def _fn():
-        mock_instance.call_task[name] = 1
-      return _fn
-
-    mock_instance.run_chief = task_fn('chief')
-    mock_instance.run_master = task_fn('master')
-    mock_instance.run_ps = task_fn('ps')
-    mock_instance.run_evaluator = task_fn('evaluator')
-    mock_instance.run_worker = task_fn('worker')
-    mock_instance.run_local = task_fn('local')
-
-    return mock_instance
-
-  def _test_run_task_in_distributed_training(self, run_config):
+  def test_run_task(self):
     mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.config = run_config
     mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
     mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
 
     with test.mock.patch.object(training, '_TrainingExecutor') as mock_executor:
-      mock_executor_instance = self._mock_executor_instance()
+      mock_executor_instance = test.mock.Mock()
       mock_executor.return_value = mock_executor_instance
       training.train_and_evaluate(mock_est, mock_train_spec, mock_eval_spec)
       mock_executor.assert_called_with(estimator=mock_est,
                                        train_spec=mock_train_spec,
                                        eval_spec=mock_eval_spec)
-      return mock_executor_instance
-
-  def test_run_chief(self):
-    mock_executor = self._test_run_task_in_distributed_training(
-        run_config=_create_run_config_with_cluster_spec(_TF_CONFIG_FOR_CHIEF))
-    self.assertEqual(1, mock_executor.call_task['chief'])
-
-  def test_run_worker(self):
-    mock_executor = self._test_run_task_in_distributed_training(
-        run_config=_create_run_config_with_cluster_spec(_TF_CONFIG_FOR_WORKER))
-    self.assertEqual(1, mock_executor.call_task['worker'])
-
-  def test_run_ps(self):
-    mock_executor = self._test_run_task_in_distributed_training(
-        run_config=_create_run_config_with_cluster_spec(_TF_CONFIG_FOR_PS))
-    self.assertEqual(1, mock_executor.call_task['ps'])
-
-  def test_run_evaluator(self):
-    mock_executor = self._test_run_task_in_distributed_training(
-        run_config=_create_run_config_with_cluster_spec(
-            _TF_CONFIG_FOR_EVALUATOR))
-    self.assertEqual(1, mock_executor.call_task['evaluator'])
+      self.assertTrue(mock_executor_instance.run.called)
 
   def test_error_out_if_evaluator_task_id_is_non_zero(self):
     tf_config = {
@@ -377,93 +338,15 @@ class TrainAndEvaluteTest(test.TestCase):
             'index': 1
         }
     }
-    with self.assertRaisesRegexp(ValueError, _INVALID_EVAL_TASK_ID_ERR):
-      self._test_run_task_in_distributed_training(
-          run_config=_create_run_config_with_cluster_spec(tf_config))
-
-  def test_run_local(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.config = run_config_lib.RunConfig()
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    with test.mock.patch.object(training, '_TrainingExecutor') as mock_executor:
-      mock_executor_instance = self._mock_executor_instance()
-      mock_executor.return_value = mock_executor_instance
-      training.train_and_evaluate(mock_est, mock_train_spec, mock_eval_spec)
-      self.assertEqual(1, mock_executor_instance.call_task['local'])
-
-      mock_executor.assert_called_with(estimator=mock_est,
-                                       train_spec=mock_train_spec,
-                                       eval_spec=mock_eval_spec)
 
-  def test_invalid_local_task(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host0:0'],
-            'local': ['hos1:1'],
-        },
-        'task': {
-            'type': 'local',
-            'index': 0
-        }
-    }
     mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
     mock_est.config = _create_run_config_with_cluster_spec(tf_config)
     mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
     mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
 
-    with self.assertRaisesRegexp(ValueError, _INVALID_LOCAL_TASK_WITH_CLUSTER):
+    with self.assertRaisesRegexp(ValueError, _INVALID_EVAL_TASK_ID_ERR):
       training.train_and_evaluate(mock_est, mock_train_spec, mock_eval_spec)
 
-  def test_unsupported_task_due_to_missing_run_task(self):
-    unsupported_task = 'alloc'
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host0:0'],
-            unsupported_task: ['hos1:1'],
-        },
-        'task': {
-            'type': unsupported_task,
-            'index': 0
-        }
-    }
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.config = _create_run_config_with_cluster_spec(tf_config)
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    with test.mock.patch.object(training, '_TrainingExecutor') as mock_executor:
-      # mock_instance has no run_alloc method.
-      mock_instance = self._mock_executor_instance()
-      mock_executor.return_value = mock_instance
-      with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TO_RUN):
-        training.train_and_evaluate(mock_est, mock_train_spec, mock_eval_spec)
-
-  def test_unsupported_task_due_to_not_callable(self):
-    unsupported_task = 'alloc'
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host0:0'],
-            unsupported_task: ['hos1:1'],
-        },
-        'task': {
-            'type': unsupported_task,
-            'index': 0
-        }
-    }
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.config = _create_run_config_with_cluster_spec(tf_config)
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    with test.mock.patch.object(training, '_TrainingExecutor') as mock_executor:
-      mock_instance = self._mock_executor_instance()
-      mock_instance.run_alloc = 123  # not callable
-      mock_executor.return_value = mock_instance
-      with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TO_RUN):
-        training.train_and_evaluate(mock_est, mock_train_spec, mock_eval_spec)
-
   def test_invalid_estimator(self):
     invalid_estimator = object()
     mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
@@ -473,19 +356,6 @@ class TrainAndEvaluteTest(test.TestCase):
       training.train_and_evaluate(invalid_estimator, mock_train_spec,
                                   mock_eval_spec)
 
-  def test_invalid_task_type(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.config = test.mock.Mock()
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    mock_est.config = test.mock.Mock()
-    mock_est.config.cluster_spec = {'1': 'dummy'}
-    mock_est.config.task_type = ''
-
-    with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TYPE):
-      training.train_and_evaluate(mock_est, mock_train_spec, mock_eval_spec)
-
 
 class TrainingExecutorConstructorTest(test.TestCase):
   """Tests constructor of _TrainingExecutor."""
@@ -522,6 +392,29 @@ class TrainingExecutorConstructorTest(test.TestCase):
     with self.assertRaisesRegexp(TypeError, _INVALID_EVAL_SPEC_MSG):
       training._TrainingExecutor(estimator, train_spec, invalid_eval_spec)
 
+  def test_invalid_train_hooks(self):
+    estimator = estimator_lib.Estimator(model_fn=lambda features: features)
+    train_spec = training.TrainSpec(input_fn=lambda: 1)
+    eval_spec = training.EvalSpec(input_fn=lambda: 1)
+    invalid_train_hooks = [object()]
+
+    with self.assertRaisesRegexp(TypeError, _INVALID_HOOK_MSG):
+      training._TrainingExecutor(
+          estimator, train_spec, eval_spec, train_hooks=invalid_train_hooks)
+
+  def test_invalid_continuous_eval_listener(self):
+    estimator = estimator_lib.Estimator(model_fn=lambda features: features)
+    train_spec = training.TrainSpec(input_fn=lambda: 1)
+    eval_spec = training.EvalSpec(input_fn=lambda: 1)
+    invalid_continuous_eval_listener = object()
+
+    with self.assertRaisesRegexp(TypeError, _INVALID_EVAL_LISTENER_MSG):
+      training._TrainingExecutor(
+          estimator,
+          train_spec,
+          eval_spec,
+          continuous_eval_listener=invalid_continuous_eval_listener)
+
 
 class _TrainingExecutorTrainingTest(object):
   """Tests training of _TrainingExecutor."""
@@ -530,6 +423,8 @@ class _TrainingExecutorTrainingTest(object):
     self._run_config = run_config
 
   def _run_task(self, executor):
+    # We should not call executor.run as the test here is intended to test
+    # run_foo explicitly (foo is the task type).
     return getattr(executor, 'run_' + self._run_config.task_type)()
 
   @test.mock.patch.object(time, 'sleep')
@@ -554,19 +449,40 @@ class _TrainingExecutorTrainingTest(object):
 
     self.assertTrue(mock_server_instance.start.called)
 
-    mock_est.train.assert_called_with(input_fn=train_spec.input_fn,
-                                      max_steps=train_spec.max_steps,
-                                      hooks=train_spec.hooks,
-                                      saving_listeners=test.mock.ANY)
+    mock_est.train.assert_called_with(
+        input_fn=train_spec.input_fn,
+        max_steps=train_spec.max_steps,
+        hooks=list(train_spec.hooks),
+        saving_listeners=test.mock.ANY)
     mock_est.evaluate.assert_not_called()
     mock_est.export_savedmodel.assert_not_called()
 
+  @test.mock.patch.object(time, 'sleep')
+  @test.mock.patch.object(server_lib, 'Server')
+  def test_train_with_train_hooks(self, unused_mock_server, unused_mock_sleep):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.config = self._run_config
+    train_spec = training.TrainSpec(
+        input_fn=lambda: 1, max_steps=2, hooks=[_FakeHook()])
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+    extra_hooks = [_FakeHook()]
+
+    executor = training._TrainingExecutor(
+        mock_est, train_spec, mock_eval_spec, train_hooks=extra_hooks)
+    self._run_task(executor)
+
+    mock_est.train.assert_called_with(
+        input_fn=train_spec.input_fn,
+        max_steps=train_spec.max_steps,
+        hooks=list(train_spec.hooks) + extra_hooks,
+        saving_listeners=test.mock.ANY)
+
   @test.mock.patch.object(time, 'sleep')
   @test.mock.patch.object(server_lib, 'Server')
   def test_no_server_startup_in_google(self, mock_server, unused_mock_sleep):
     mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
     mock_est.config = self._run_config
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec, hooks=[])
     mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
 
     executor = training._TrainingExecutor(mock_est, mock_train_spec,
@@ -598,7 +514,8 @@ class _TrainingExecutorTrainingTest(object):
     mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
 
     mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = {'worker': 'dummy'}
+    mock_est.config.cluster_spec = server_lib.ClusterSpec(
+        {'worker': ['dummy', 'dummy1']})
     mock_est.config.master = ''
     mock_est.config.task_type = 'worker'
     mock_est.config.task_id = 2
@@ -608,13 +525,33 @@ class _TrainingExecutorTrainingTest(object):
       self._run_task(training._TrainingExecutor(mock_est, mock_train_spec,
                                                 mock_eval_spec))
 
+  @test.mock.patch.object(time, 'sleep')
+  @test.mock.patch.object(server_lib, 'Server')
+  def test_single_worker_node_with_empty_tf_master(
+      self, mock_server, unused_mock_sleep):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec, hooks=[])
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
+    # Single node cluster.
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'worker': ['dummy']})
+    mock_est.config.master = ''
+    mock_est.config.task_type = 'worker'
+    mock_est.config.task_id = 2
+
+    self._run_task(training._TrainingExecutor(mock_est, mock_train_spec,
+                                              mock_eval_spec))
+    self.assertTrue(mock_est.train.called)
+    mock_server.assert_not_called()
+
   def test_fail_with_empty_task_type(self):
     mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
     mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
     mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
 
     mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = {'worker': 'dummy'}
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'worker': ['dummy']})
     mock_est.config.master = 'grpc://...'
     mock_est.config.task_type = ''
     mock_est.config.task_id = 2
@@ -630,7 +567,7 @@ class _TrainingExecutorTrainingTest(object):
     mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
 
     mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = {'worker': 'dummy'}
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'worker': ['dummy']})
     mock_est.config.master = 'grpc://...'
     mock_est.config.task_type = 'worker'
     mock_est.config.task_id = None
@@ -655,7 +592,7 @@ class TrainingExecutorRunWorkerTest(_TrainingExecutorTrainingTest,
   def test_delay_for_worker(self, _):
     mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
     mock_est.config = self._run_config
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec, hooks=[])
     mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
 
     executor = training._TrainingExecutor(mock_est, mock_train_spec,
@@ -682,7 +619,7 @@ class TrainingExecutorRunChiefTest(_TrainingExecutorTrainingTest,
   def test_no_delay_for_chief(self, _):
     mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
     mock_est.config = self._run_config
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec, hooks=[])
     mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
 
     executor = training._TrainingExecutor(mock_est, mock_train_spec,
@@ -705,7 +642,8 @@ class TrainingExecutorRunMasterTest(test.TestCase):
     mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
     mock_est.evaluate = lambda *args, **kw: {ops.GraphKeys.GLOBAL_STEP: 123}
     mock_est.config = self._run_config
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec, max_steps=123)
+    mock_train_spec = test.mock.Mock(
+        spec=training.TrainSpec, max_steps=123, hooks=[])
     mock_eval_spec = test.mock.Mock(spec=training.EvalSpec, exporters=[])
 
     executor = training._TrainingExecutor(mock_est, mock_train_spec,
@@ -738,19 +676,42 @@ class TrainingExecutorRunMasterTest(test.TestCase):
 
     self.assertTrue(mock_server_instance.start.called)
 
-    mock_est.train.assert_called_with(input_fn=train_spec.input_fn,
-                                      max_steps=train_spec.max_steps,
-                                      hooks=train_spec.hooks,
-                                      saving_listeners=test.mock.ANY)
+    mock_est.train.assert_called_with(
+        input_fn=train_spec.input_fn,
+        max_steps=train_spec.max_steps,
+        hooks=list(train_spec.hooks),
+        saving_listeners=test.mock.ANY)
     mock_est.export_savedmodel.assert_not_called()
 
+  @test.mock.patch.object(time, 'sleep')
+  @test.mock.patch.object(server_lib, 'Server')
+  def test_train_with_train_hooks(self, mock_server, unused_mock_sleep):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.evaluate = lambda *args, **kw: {ops.GraphKeys.GLOBAL_STEP: 123}
+    mock_est.config = self._run_config
+    train_spec = training.TrainSpec(
+        input_fn=lambda: 1, max_steps=2, hooks=[_FakeHook()])
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec, exporters=[])
+    extra_hooks = [_FakeHook()]
+
+    executor = training._TrainingExecutor(
+        mock_est, train_spec, mock_eval_spec, train_hooks=extra_hooks)
+    executor.run_master()
+
+    mock_est.train.assert_called_with(
+        input_fn=train_spec.input_fn,
+        max_steps=train_spec.max_steps,
+        hooks=list(train_spec.hooks) + extra_hooks,
+        saving_listeners=test.mock.ANY)
+
   @test.mock.patch.object(time, 'sleep')
   @test.mock.patch.object(server_lib, 'Server')
   def test_no_server_startup_in_google(self, mock_server, unused_mock_sleep):
     mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
     mock_est.evaluate = lambda *args, **kw: {ops.GraphKeys.GLOBAL_STEP: 123}
     mock_est.config = self._run_config
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec, max_steps=123)
+    mock_train_spec = test.mock.Mock(
+        spec=training.TrainSpec, max_steps=123, hooks=[])
     mock_eval_spec = test.mock.Mock(spec=training.EvalSpec, exporters=[])
 
     executor = training._TrainingExecutor(mock_est, mock_train_spec,
@@ -768,7 +729,7 @@ class TrainingExecutorRunMasterTest(test.TestCase):
     mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
     mock_est.config.cluster_spec = None
     mock_est.config.master = 'grpc://...'
-    mock_est.config.task_type = 'worker'
+    mock_est.config.task_type = 'master'
     mock_est.config.task_id = 2
 
     with self.assertRaisesRegexp(RuntimeError,
@@ -782,23 +743,49 @@ class TrainingExecutorRunMasterTest(test.TestCase):
     mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
 
     mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = {'worker': 'dummy'}
+    mock_est.config.cluster_spec = server_lib.ClusterSpec(
+        {'master': ['dummy'], 'worker': ['dummy1']})
     mock_est.config.master = ''
-    mock_est.config.task_type = 'worker'
-    mock_est.config.task_id = 2
+    mock_est.config.task_type = 'master'
+    mock_est.config.task_id = 0
 
     with self.assertRaisesRegexp(RuntimeError,
                                  _INVALID_CONFIG_FOR_STD_SERVER_MSG):
       training._TrainingExecutor(
           mock_est, mock_train_spec, mock_eval_spec).run_master()
 
+  @test.mock.patch.object(time, 'sleep')
+  @test.mock.patch.object(server_lib, 'Server')
+  def test_single_master_node_with_empty_tf_master(
+      self, mock_server, unused_mock_sleep):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.evaluate = lambda *args, **kw: {ops.GraphKeys.GLOBAL_STEP: 123}
+
+    mock_train_spec = test.mock.Mock(
+        spec=training.TrainSpec, max_steps=123, hooks=[])
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec, exporters=[])
+
+    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
+    mock_est.config.cluster_spec = server_lib.ClusterSpec(
+        {'master': ['dummy']})
+    mock_est.config.master = ''
+    mock_est.config.task_type = 'master'
+    mock_est.config.task_id = 0
+
+    executor = training._TrainingExecutor(
+        mock_est, mock_train_spec, mock_eval_spec)
+    executor.run_master()
+
+    mock_server.assert_not_called()
+    self.assertTrue(mock_est.train.called)
+
   def test_fail_with_empty_task_type(self):
     mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
     mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
     mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
 
     mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = {'worker': 'dummy'}
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'master': ['dummy']})
     mock_est.config.master = 'grpc://...'
     mock_est.config.task_type = ''
     mock_est.config.task_id = 2
@@ -814,9 +801,9 @@ class TrainingExecutorRunMasterTest(test.TestCase):
     mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
 
     mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = {'worker': 'dummy'}
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'master': ['dummy']})
     mock_est.config.master = 'grpc://...'
-    mock_est.config.task_type = 'worker'
+    mock_est.config.task_type = 'master'
     mock_est.config.task_id = None
 
     with self.assertRaisesRegexp(RuntimeError,
@@ -993,6 +980,28 @@ class TrainingExecutorRunEvaluatorTest(test.TestCase):
         hooks=eval_spec.hooks)
     self.assertFalse(mock_est.train.called)
 
+  def test_evaluate_with_train_hooks(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.latest_checkpoint.return_value = 'latest_it_is'
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec)
+
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: 1,
+        steps=2,
+        hooks=[_FakeHook()],
+        name='cont_eval',
+        start_delay_secs=0,
+        throttle_secs=0)
+
+    # The train_hooks will not be called during eval.
+    mock_hook = test.mock.Mock(spec=session_run_hook.SessionRunHook)
+    executor = training._TrainingExecutor(
+        mock_est, mock_train_spec, eval_spec, train_hooks=[mock_hook])
+    executor.run_evaluator()
+
+    mock_hook.begin.assert_not_called()
+
   def test_evaluate_multiple_times(self):
     training_max_step = 200
 
@@ -1036,6 +1045,88 @@ class TrainingExecutorRunEvaluatorTest(test.TestCase):
     self.assertEqual(2, mock_est.times_export_was_called)
     self.assertEqual(1, mock_est.times_final_export_was_called)
 
+  def test_evaluate_listener_before_eval(self):
+    training_max_step = 200
+
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.model_dir = compat.as_bytes(test.get_temp_dir())
+    # Without early stopping, this eval will be run twice.
+    mock_est.evaluate.side_effect = [{
+        _GLOBAL_STEP_KEY: training_max_step // 2
+    }, {
+        _GLOBAL_STEP_KEY: training_max_step
+    }]
+    mock_est.latest_checkpoint.side_effect = ['path_1', 'path_2']
+
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec, hooks=[])
+    mock_train_spec.max_steps = training_max_step
+
+    class _Listener(training._ContinuousEvalListener):
+
+      def __init__(self):
+        self.call_count = 0
+
+      def before_eval(self):
+        self.call_count += 1
+        return  self.call_count == 1
+
+    listener = _Listener()
+
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: 1, start_delay_secs=0, throttle_secs=0)
+
+    training._TrainingExecutor(
+        mock_est, mock_train_spec, eval_spec,
+        continuous_eval_listener=listener).run_evaluator()
+
+    # Before_eval returns False during the second time, so, evaluate will be
+    # called once.
+    self.assertEqual(1, mock_est.evaluate.call_count)
+    self.assertEqual(2, listener.call_count)
+
+  def test_evaluate_listener_after_eval(self):
+    training_max_step = 200
+
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.model_dir = compat.as_bytes(test.get_temp_dir())
+    # Without early stopping, this eval will be run twice.
+    expected_eval_metrics = [{
+        _GLOBAL_STEP_KEY: training_max_step // 2
+    }, {
+        _GLOBAL_STEP_KEY: training_max_step
+    }]
+    mock_est.evaluate.side_effect = expected_eval_metrics
+    mock_est.latest_checkpoint.side_effect = ['path_1', 'path_2']
+
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_train_spec.max_steps = training_max_step
+
+    class _Listener(training._ContinuousEvalListener):
+
+      def __init__(self):
+        self.call_count = 0
+
+      def after_eval(self, eval_result):
+        self.call_count += 1
+        self.eval_result = eval_result
+        return False
+
+    listener = _Listener()
+
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: 1, start_delay_secs=0, throttle_secs=0)
+
+    training._TrainingExecutor(
+        mock_est, mock_train_spec, eval_spec,
+        continuous_eval_listener=listener).run_evaluator()
+
+    # after_eval returns False during the first time, so, evaluate will be
+    # called once.
+    self.assertEqual(1, mock_est.evaluate.call_count)
+    self.assertEqual(1, listener.call_count)
+    self.assertAllEqual(expected_eval_metrics[0], listener.eval_result.metrics)
+    self.assertEqual('path_1', listener.eval_result.checkpoint_path)
+
   def test_final_export_is_true_in_the_end(self):
     training_max_step = 200
 
@@ -1108,6 +1199,70 @@ class TrainingExecutorRunEvaluatorTest(test.TestCase):
     # successuful evaluation)
     self.assertEqual(2, mock_log.call_count)
 
+  def test_continuous_eval_listener_eval_result(self):
+    training_max_step = 200
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    expected_eval_metrics = [{
+        _GLOBAL_STEP_KEY: training_max_step // 2
+    }, {
+        _GLOBAL_STEP_KEY: training_max_step
+    }]
+    mock_est.evaluate.side_effect = expected_eval_metrics
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_train_spec.max_steps = training_max_step
+
+    class _Listener(training._ContinuousEvalListener):
+
+      def __init__(self):
+        self.eval_results = []
+
+      def after_eval(self, eval_result):
+        self.eval_results.append(eval_result)
+        return True
+
+    continuous_eval_listener = _Listener()
+
+    self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec)
+
+    # First two items are invalid, next two items are same.
+    mock_est.latest_checkpoint.side_effect = [
+        None, '', 'same', 'same', 'path_2'
+    ]
+    expected_eval_results = [
+        training._EvalResult(training._EvalStatus.MISSING_CHECKPOINT),
+        training._EvalResult(training._EvalStatus.MISSING_CHECKPOINT),
+        training._EvalResult(
+            training._EvalStatus.EVALUATED,
+            metrics=expected_eval_metrics[0],
+            checkpoint_path='same'),
+        training._EvalResult(training._EvalStatus.NO_NEW_CHECKPOINT),
+        training._EvalResult(
+            training._EvalStatus.EVALUATED,
+            metrics=expected_eval_metrics[1],
+            checkpoint_path='path_2'),
+    ]
+
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: 1, start_delay_secs=0, throttle_secs=0)
+
+    executor = training._TrainingExecutor(
+        mock_est,
+        mock_train_spec,
+        eval_spec,
+        continuous_eval_listener=continuous_eval_listener)
+    executor.run_evaluator()
+
+    # Three checkpoint paths are invalid.
+    self.assertEqual(5, mock_est.latest_checkpoint.call_count)
+    self.assertEqual(2, mock_est.evaluate.call_count)
+
+    self.assertEqual(5, len(continuous_eval_listener.eval_results))
+    for i, result in enumerate(continuous_eval_listener.eval_results):
+      self.assertEqual(expected_eval_results[i].status, result.status)
+      self.assertAllEqual(expected_eval_results[i].metrics, result.metrics)
+      self.assertEqual(expected_eval_results[i].checkpoint_path,
+                       result.checkpoint_path)
+
   def test_sleep_start_delay_secs(self):
     training_max_step = 200
     start_delay_secs = 123
@@ -1184,7 +1339,7 @@ class TrainingExecutorRunEvaluatorTest(test.TestCase):
     mock_est.evaluate.return_value = {}
 
     executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
-    with self.assertRaisesRegexp(RuntimeError, _INVALID_EMPTY_EVAL_RESULT_ERR):
+    with self.assertRaisesRegexp(ValueError, _INVALID_EMPTY_EVAL_RESULT_ERR):
       executor.run_evaluator()
 
   def test_errors_out_if_evaluate_returns_non_dict(self):
@@ -1206,7 +1361,7 @@ class TrainingExecutorRunEvaluatorTest(test.TestCase):
     mock_est.evaluate.return_value = {'loss': 123}
 
     executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
-    with self.assertRaisesRegexp(RuntimeError,
+    with self.assertRaisesRegexp(ValueError,
                                  _MISSING_GLOBAL_STEP_IN_EVAL_RESULT_ERR):
       executor.run_evaluator()
 
@@ -1246,7 +1401,7 @@ class TrainingExecutorRunPsTest(test.TestCase):
     mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
     mock_est.config.cluster_spec = None
     mock_est.config.master = 'grpc://...'
-    mock_est.config.task_type = 'gs'
+    mock_est.config.task_type = 'ps'
     mock_est.config.task_id = 2
 
     with self.assertRaisesRegexp(RuntimeError,
@@ -1260,9 +1415,9 @@ class TrainingExecutorRunPsTest(test.TestCase):
     mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
 
     mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = {'gs': 'dummy'}
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'ps': ['dummy']})
     mock_est.config.master = ''
-    mock_est.config.task_type = 'gs'
+    mock_est.config.task_type = 'ps'
     mock_est.config.task_id = 2
 
     with self.assertRaisesRegexp(RuntimeError,
@@ -1276,7 +1431,7 @@ class TrainingExecutorRunPsTest(test.TestCase):
     mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
 
     mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = {'gs': 'dummy'}
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'ps': ['dummy']})
     mock_est.config.master = 'grpc://...'
     mock_est.config.task_type = ''
     mock_est.config.task_id = 2
@@ -1292,9 +1447,9 @@ class TrainingExecutorRunPsTest(test.TestCase):
     mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
 
     mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = {'gs': 'dummy'}
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'ps': ['dummy']})
     mock_est.config.master = 'grpc://...'
-    mock_est.config.task_type = 'gs'
+    mock_est.config.task_type = 'ps'
     mock_est.config.task_id = None
 
     with self.assertRaisesRegexp(RuntimeError,
@@ -1480,6 +1635,26 @@ class TrainingExecutorRunLocalTest(test.TestCase):
     self.assertEqual(train_spec.input_fn, train_args['input_fn'])
     self.assertEqual(train_spec.max_steps, train_args['max_steps'])
 
+  def test_train_hooks(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, model_dir='path/')
+    mock_est.latest_checkpoint.return_value = 'checkpoint_path/'
+    train_spec = training.TrainSpec(
+        input_fn=lambda: 1, max_steps=300, hooks=[_FakeHook()])
+    eval_spec = training.EvalSpec(input_fn=lambda: 1, steps=2)
+    mock_est.evaluate.return_value = {_GLOBAL_STEP_KEY: train_spec.max_steps}
+    extra_hooks = [_FakeHook()]
+
+    executor = training._TrainingExecutor(
+        mock_est, train_spec, eval_spec, train_hooks=extra_hooks)
+    executor.run_local()
+
+    train_args = mock_est.train.call_args[1]
+    self.assertEqual(
+        list(train_spec.hooks) + extra_hooks, [
+            h for h in train_args['hooks']
+            if not isinstance(h, training._StopAtSecsHook)
+        ])
+
   def test_errors_out_if_throttle_secs_is_zero(self):
     mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
     train_spec = training.TrainSpec(input_fn=lambda: 1)
@@ -1527,7 +1702,7 @@ class TrainingExecutorRunLocalTest(test.TestCase):
     mock_est.evaluate.return_value = {}
 
     executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
-    with self.assertRaisesRegexp(RuntimeError, _INVALID_EMPTY_EVAL_RESULT_ERR):
+    with self.assertRaisesRegexp(ValueError, _INVALID_EMPTY_EVAL_RESULT_ERR):
       executor.run_local()
 
   def test_errors_out_if_evaluate_returns_non_dict(self):
@@ -1547,11 +1722,152 @@ class TrainingExecutorRunLocalTest(test.TestCase):
     mock_est.evaluate.return_value = {'loss': 123}
 
     executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
-    with self.assertRaisesRegexp(RuntimeError,
+    with self.assertRaisesRegexp(ValueError,
                                  _MISSING_GLOBAL_STEP_IN_EVAL_RESULT_ERR):
       executor.run_local()
 
 
+class TrainAndEvaluateRunTest(test.TestCase):
+
+  def _test_run_task_and_executor(self, run_config):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.config = run_config
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec,
+                                          mock_eval_spec)
+
+    executor.call_task = {}
+
+    def task_fn(name):
+
+      def _fn():
+        executor.call_task[name] = 1
+
+      return _fn
+
+    executor.run_chief = task_fn('chief')
+    executor.run_master = task_fn('master')
+    executor.run_ps = task_fn('ps')
+    executor.run_evaluator = task_fn('evaluator')
+    executor.run_worker = task_fn('worker')
+    executor.run_local = task_fn('local')
+    return executor
+
+  def test_run_chief(self):
+    executor = self._test_run_task_and_executor(
+        run_config=_create_run_config_with_cluster_spec(_TF_CONFIG_FOR_CHIEF))
+    executor.run()
+    self.assertEqual(1, executor.call_task['chief'])
+
+  def test_run_worker(self):
+    executor = self._test_run_task_and_executor(
+        run_config=_create_run_config_with_cluster_spec(_TF_CONFIG_FOR_WORKER))
+    executor.run()
+    self.assertEqual(1, executor.call_task['worker'])
+
+  def test_run_ps(self):
+    executor = self._test_run_task_and_executor(
+        run_config=_create_run_config_with_cluster_spec(_TF_CONFIG_FOR_PS))
+    executor.run()
+    self.assertEqual(1, executor.call_task['ps'])
+
+  def test_run_evaluator(self):
+    executor = self._test_run_task_and_executor(
+        run_config=_create_run_config_with_cluster_spec(
+            _TF_CONFIG_FOR_EVALUATOR))
+    executor.run()
+    self.assertEqual(1, executor.call_task['evaluator'])
+
+  def test_run_local(self):
+    executor = self._test_run_task_and_executor(
+        run_config=run_config_lib.RunConfig())
+    executor.run()
+    self.assertEqual(1, executor.call_task['local'])
+
+  def test_invalid_local_task(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+            'local': ['hos1:1'],
+        },
+        'task': {
+            'type': 'local',  # invalid task type.
+            'index': 0
+        }
+    }
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.config = _create_run_config_with_cluster_spec(tf_config)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec,
+                                          mock_eval_spec)
+    with self.assertRaisesRegexp(ValueError, _INVALID_LOCAL_TASK_WITH_CLUSTER):
+      executor.run()
+
+  def test_unsupported_task_due_to_missing_run_task(self):
+    unsupported_task = 'alloc'
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+            unsupported_task: ['hos1:1'],
+        },
+        'task': {
+            'type': unsupported_task,
+            'index': 0
+        }
+    }
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.config = _create_run_config_with_cluster_spec(tf_config)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec,
+                                          mock_eval_spec)
+    with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TO_RUN):
+      executor.run()
+
+  def test_unsupported_task_due_to_not_callable(self):
+    unsupported_task = 'alloc'
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+            unsupported_task: ['hos1:1'],
+        },
+        'task': {
+            'type': unsupported_task,
+            'index': 0
+        }
+    }
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.config = _create_run_config_with_cluster_spec(tf_config)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec,
+                                          mock_eval_spec)
+    executor.run_alloc = 123  # not callable
+    with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TO_RUN):
+      executor.run()
+
+  def test_invalid_task_type(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.config = test.mock.Mock()
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    mock_est.config = test.mock.Mock()
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'1': ['dummy']})
+    mock_est.config.task_type = ''
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec,
+                                          mock_eval_spec)
+    with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TYPE):
+      executor.run()
+
+
 class TrainAndEvaluateIntegrationTest(test.TestCase):
 
   def setUp(self):
diff --git a/tensorflow/python/estimator/util.py b/tensorflow/python/estimator/util.py
index 12f2592d848c3ce55777ffdae5cee7ac602ee87f..b7ba76d8714e6b13551bb3e18083f45e53d2afc3 100644
--- a/tensorflow/python/estimator/util.py
+++ b/tensorflow/python/estimator/util.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 
-"""Utility to retrieve function args.."""
+"""Utility to retrieve function args."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -21,7 +21,6 @@ from __future__ import print_function
 
 import functools
 
-from tensorflow.python.util import tf_decorator
 from tensorflow.python.util import tf_inspect
 
 
@@ -45,14 +44,13 @@ def fn_args(fn):
   Raises:
     ValueError: if partial function has positionally bound arguments
   """
-  _, fn = tf_decorator.unwrap(fn)
   if isinstance(fn, functools.partial):
     args = fn_args(fn.func)
     args = [a for a in args[len(fn.args):] if a not in (fn.keywords or [])]
   else:
     if _is_callable_object(fn):
       fn = fn.__call__
-    args = tf_inspect.getargspec(fn).args
+    args = tf_inspect.getfullargspec(fn).args
     if _is_bounded_method(fn):
       args.remove('self')
   return tuple(args)
diff --git a/tensorflow/python/estimator/warm_starting_util.py b/tensorflow/python/estimator/warm_starting_util.py
index e5655db08201601030c4473e3194e89ef89f5a68..c748b318b730f4a4ff855c5e4558da88ada9581b 100644
--- a/tensorflow/python/estimator/warm_starting_util.py
+++ b/tensorflow/python/estimator/warm_starting_util.py
@@ -21,101 +21,233 @@ from __future__ import print_function
 import collections
 import six
 
-from tensorflow.python.feature_column import feature_column
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables
+from tensorflow.python.ops import variables as variables_lib
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import checkpoint_ops
 from tensorflow.python.training import checkpoint_utils
 from tensorflow.python.training import saver
 
 
-class _WarmStartSettings(
-    collections.namedtuple("_WarmStartSettings", [
+class VocabInfo(
+    collections.namedtuple("VocabInfo", [
+        "new_vocab",
+        "new_vocab_size",
+        "num_oov_buckets",
+        "old_vocab",
+        "old_vocab_size",
+        "backup_initializer",
+    ])):
+  """Vocabulary information for WarmStartSettings.
+
+  See @{tf.estimator.WarmStartSettings$WarmStartSettings} for examples of using
+  VocabInfo to warm-start.
+
+  Attributes:
+    new_vocab: [Required] A path to the new vocabulary file (used with the
+      model to be trained).
+    new_vocab_size: [Required] An integer indicating how many entries of the new
+      vocabulary will used in training.
+    num_oov_buckets: [Required] An integer indicating how many OOV buckets are
+      associated with the vocabulary.
+    old_vocab: [Required] A path to the old vocabulary file (used with the
+      checkpoint to be warm-started from).
+    old_vocab_size: [Optional] An integer indicating how many entries of the old
+      vocabulary were used in the creation of the checkpoint. If not provided,
+      the entire old vocabulary will be used.
+    backup_initializer: [Optional] A variable initializer used for variables
+      corresponding to new vocabulary entries and OOV. If not provided, these
+      entries will be zero-initialized.
+  """
+
+  def __new__(cls,
+              new_vocab,
+              new_vocab_size,
+              num_oov_buckets,
+              old_vocab,
+              old_vocab_size=-1,
+              backup_initializer=None):
+    return super(VocabInfo, cls).__new__(
+        cls,
+        new_vocab,
+        new_vocab_size,
+        num_oov_buckets,
+        old_vocab,
+        old_vocab_size,
+        backup_initializer,
+    )
+
+
+class WarmStartSettings(
+    collections.namedtuple("WarmStartSettings", [
         "ckpt_to_initialize_from",
-        "col_to_prev_vocab",
-        "col_to_prev_tensor",
-        "exclude_columns",
+        "vars_to_warm_start",
+        "var_name_to_vocab_info",
+        "var_name_to_prev_var_name",
     ])):
-  """Settings for warm-starting input layer in models.
+  """Settings for warm-starting in Estimators.
+
+  Example Use with canned `DNNEstimator`:
+
+  ```
+  emb_vocab_file = tf.feature_column.embedding_column(
+      tf.feature_column.categorical_column_with_vocabulary_file(
+          "sc_vocab_file", "new_vocab.txt", vocab_size=100),
+      dimension=8)
+  emb_vocab_list = tf.feature_column.embedding_column(
+      tf.feature_column.categorical_column_with_vocabulary_list(
+          "sc_vocab_list", vocabulary_list=["a", "b"]),
+      dimension=8)
+  estimator = tf.estimator.DNNClassifier(
+    hidden_units=[128, 64], feature_columns=[emb_vocab_file, emb_vocab_list],
+    warm_start_from=ws)
+  ```
+
+  where `ws` could be defined as:
+
+  Warm-start all weights in the model (input layer and hidden weights).
+  Either the directory or a specific checkpoint can be provided (in the case
+  of the former, the latest checkpoint will be used):
+
+  ```
+  ws = WarmStartSettings(ckpt_to_initialize_from="/tmp")
+  ws = WarmStartSettings(ckpt_to_initialize_from="/tmp/model-1000")
+  ```
+
+  Warm-start only the embeddings (input layer) and their accumulator variables:
+
+  ```
+  ws = WarmStartSettings(ckpt_to_initialize_from="/tmp",
+                         vars_to_warm_start=".*input_layer.*")
+  ```
+
+  Warm-start everything except the optimizer accumulator variables
+  (DNN defaults to Adagrad):
+
+  ```
+  ws = WarmStartSettings(ckpt_to_initialize_from="/tmp",
+                         vars_to_warm_start="^(?!.*(Adagrad))")
+  ```
+
+  Warm-start all weights but the embedding parameters corresponding to
+  `sc_vocab_file` have a different vocab from the one used in the current
+  model:
+
+  ```
+  vocab_info = ws_util.VocabInfo(
+      new_vocab=sc_vocab_file.vocabulary_file,
+      new_vocab_size=sc_vocab_file.vocabulary_size,
+      num_oov_buckets=sc_vocab_file.num_oov_buckets,
+      old_vocab="old_vocab.txt"
+  )
+  ws = WarmStartSettings(
+      ckpt_to_initialize_from="/tmp",
+      var_name_to_vocab_info={
+          "input_layer/sc_vocab_file_embedding/embedding_weights": vocab_info
+      })
+  ```
+
+  Warm-start only `sc_vocab_file` embeddings (and no other variables), which
+  have a different vocab from the one used in the current model:
+
+  ```
+  vocab_info = ws_util.VocabInfo(
+      new_vocab=sc_vocab_file.vocabulary_file,
+      new_vocab_size=sc_vocab_file.vocabulary_size,
+      num_oov_buckets=sc_vocab_file.num_oov_buckets,
+      old_vocab="old_vocab.txt"
+  )
+  ws = WarmStartSettings(
+      ckpt_to_initialize_from="/tmp",
+      vars_to_warm_start=None,
+      var_name_to_vocab_info={
+          "input_layer/sc_vocab_file_embedding/embedding_weights": vocab_info
+      })
+  ```
+
+  Warm-start all weights but the parameters corresponding to `sc_vocab_file`
+  have a different vocab from the one used in current checkpoint, and only
+  100 of those entries were used:
+
+  ```
+  vocab_info = ws_util.VocabInfo(
+      new_vocab=sc_vocab_file.vocabulary_file,
+      new_vocab_size=sc_vocab_file.vocabulary_size,
+      num_oov_buckets=sc_vocab_file.num_oov_buckets,
+      old_vocab="old_vocab.txt",
+      old_vocab_size=100
+  )
+  ws = WarmStartSettings(
+      ckpt_to_initialize_from="/tmp",
+      var_name_to_vocab_info={
+          "input_layer/sc_vocab_file_embedding/embedding_weights": vocab_info
+      })
+  ```
+
+  Warm-start all weights but the parameters corresponding to `sc_vocab_file`
+  have a different vocab from the one used in current checkpoint and the
+  parameters corresponding to `sc_vocab_list` have a different name from the
+  current checkpoint:
+
+  ```
+  vocab_info = ws_util.VocabInfo(
+      new_vocab=sc_vocab_file.vocabulary_file,
+      new_vocab_size=sc_vocab_file.vocabulary_size,
+      num_oov_buckets=sc_vocab_file.num_oov_buckets,
+      old_vocab="old_vocab.txt",
+      old_vocab_size=100
+  )
+  ws = WarmStartSettings(
+      ckpt_to_initialize_from="/tmp",
+      var_name_to_vocab_info={
+          "input_layer/sc_vocab_file_embedding/embedding_weights": vocab_info
+      },
+      var_name_to_prev_var_name={
+          "input_layer/sc_vocab_list_embedding/embedding_weights":
+              "old_tensor_name"
+      })
+  ```
 
   Attributes:
     ckpt_to_initialize_from: [Required] A string specifying the directory with
       checkpoint file(s) or path to checkpoint from which to warm-start the
       model parameters.
-    col_to_prev_vocab: [Optional] Dict of `FeatureColumn` to vocabularies used
-      for the `FeatureColumn` in `ckpt_to_initialize_from`.  Vocabularies can
-      be represented either by a string (path to vocabulary), or tuple of
-      (string, int), representing (path of the vocabulary, vocab_size) if only
-      `vocab_size` entries of the old vocabulary were used in the checkpoint. If
-      the dict is not explicitly provided, the vocabularies are assumed to be
-      same between previous and present checkpoints.
-    col_to_prev_tensor: [Optional] Dict of `FeatureColumn` to name of the
-      variable (corresponding to the `FeatureColumn`) in
-      `ckpt_to_initialize_from`. If not explicitly provided, the name of the
-      variable is assumed to be same between previous and present checkpoints.
-    exclude_columns: [Optional] List of `FeatureColumn`s that should not be
-      warm-started from provided checkpoint.
-
-  Example Uses:
-
-  # Feature columns defining transformations on inputs.
-  sc_vocab_file = tf.feature_column.categorical_column_with_vocabulary_file(
-      "sc_vocab_file", "new_vocab.txt", vocab_size=100)
-  sc_vocab_list = tf.feature_column.cateogorical_column_with_vocabulary_list(
-      "sc_vocab_list", vocabulary_list=["a", "b"])
-
-  # Warm-start all weights. The parameters corresponding to "sc_vocab_file" have
-  # the same name and same vocab as current checkpoint. The parameters
-  # corresponding to "sc_vocab_list" have the same name.
-  ws = _WarmStartSettings(ckpt_to_initialize_from="/tmp")
-
-  # Warm-start all weights but the parameters corresponding to "sc_vocab_file"
-  # have a different vocab from the one used in current checkpoint.
-  ws = _WarmStartSettings(ckpt_to_initialize_from="/tmp",
-                          col_to_prev_vocab={sc_vocab_file: "old_vocab.txt"})
-
-  # Warm-start all weights but the parameters corresponding to "sc_vocab_file"
-  # have a different vocab from the one used in current checkpoint, and only
-  # 100 of those entries were used.
-  ws = _WarmStartSettings(ckpt_to_initialize_from="/tmp",
-                          col_to_prev_vocab={sc_vocab_file:
-                                             ("old_vocab.txt", 100)})
-
-  # Warm-start all weights but the parameters corresponding to "sc_vocab_file"
-  # have a different vocab from the one used in current checkpoint and the
-  # parameters corresponding to "sc_vocab_list" have a different name from the
-  # current checkpoint.
-  ws = _WarmStartSettings(ckpt_to_initialize_from="/tmp",
-                          col_to_prev_vocab={sc_vocab_file: "old_vocab.txt"},
-                          col_to_prev_tensor={sc_vocab_list: "old_tensor_name"})
-
-  # Warm-start all weights except those corrresponding to "sc_vocab_file".
-  ws = _WarmStartSettings(ckpt_to_initialize_from="/tmp",
-                          exclude_columns=[sc_vocab_file])
+    vars_to_warm_start: [Optional] A regular expression that captures which
+      variables to warm-start (see tf.get_collection).  Defaults to `'.*'`,
+      which warm-starts all variables.  If `None` is explicitly given, only
+      variables specified in `var_name_to_vocab_info` will be warm-started.
+    var_name_to_vocab_info: [Optional] Dict of variable names (strings) to
+      VocabInfo. The variable names should be "full" variables, not the names
+      of the partitions.  If not explicitly provided, the variable is assumed to
+      have no vocabulary.
+    var_name_to_prev_var_name: [Optional] Dict of variable names (strings) to
+      name of the previously-trained variable in `ckpt_to_initialize_from`. If
+      not explicitly provided, the name of the variable is assumed to be same
+      between previous checkpoint and current model.
   """
 
   def __new__(cls,
               ckpt_to_initialize_from,
-              col_to_prev_vocab=None,
-              col_to_prev_tensor=None,
-              exclude_columns=None):
+              vars_to_warm_start=".*",
+              var_name_to_vocab_info=None,
+              var_name_to_prev_var_name=None):
     if not ckpt_to_initialize_from:
       raise ValueError(
-          "`ckpt_to_initialize_from` MUST be set in _WarmStartSettings")
-    return super(_WarmStartSettings, cls).__new__(
+          "`ckpt_to_initialize_from` MUST be set in WarmStartSettings")
+    return super(WarmStartSettings, cls).__new__(
         cls,
         ckpt_to_initialize_from,
-        col_to_prev_vocab or {},
-        col_to_prev_tensor or {},
-        exclude_columns or [],)
+        vars_to_warm_start,
+        var_name_to_vocab_info or {},
+        var_name_to_prev_var_name or {},
+    )
 
 
 def _is_variable(x):
-  return (isinstance(x, variables.Variable) or
+  return (isinstance(x, variables_lib.Variable) or
           isinstance(x, resource_variable_ops.ResourceVariable))
 
 
@@ -135,11 +267,12 @@ def _infer_var_name(var):
   """
   name_to_var_dict = saver.BaseSaverBuilder.OpListToDict(var)
   if len(name_to_var_dict) > 1:
-    raise TypeError("`var` passed as arg violates the constraints.")
+    raise TypeError("`var` = %s passed as arg violates the constraints.  "
+                    "name_to_var_dict = %s" % (var, name_to_var_dict))
   return list(name_to_var_dict.keys())[0]
 
 
-def _warmstart_var(var, prev_ckpt, prev_tensor_name=None):
+def _warm_start_var(var, prev_ckpt, prev_tensor_name=None):
   """Warm-starts given variable from `prev_tensor_name` tensor in `prev_ckpt`.
 
   Args:
@@ -147,69 +280,26 @@ def _warmstart_var(var, prev_ckpt, prev_tensor_name=None):
       Can be either of the following:
       (i) `Variable`
       (ii) `ResourceVariable`
-      (iii) `PartitionedVariable`
-      (iv) list of `Variable` and/or `PartitionedVariable`: The list may
-        contain one or more variables that has been sharded.  For example:
-        [Variable('a/part_0'), Variable('b/part_0'), Variable('a/part_1'),
-         PartitionedVariable([Variable('c/part_0'), Variable('c/part_1')])]
-        where we have three whole Variables represented ('a', 'b', and 'c').
+      (iii) list of `Variable`: The list must contain slices of the same larger
+        variable.
+      (iv) `PartitionedVariable`
     prev_ckpt: A string specifying the directory with checkpoint file(s) or path
       to checkpoint. The given checkpoint must have tensor with name
       `prev_tensor_name` (if not None) or tensor with name same as given `var`.
     prev_tensor_name: Name of the tensor to lookup in provided `prev_ckpt`. If
       None, we lookup tensor with same name as given `var`.
-
-  Raises:
-    ValueError: If prev_tensor_name is not None, but the given var represents
-      more than one Variable.
-    TypeError: If var is not one of the allowed types.
   """
   if _is_variable(var):
     current_var_name = _infer_var_name([var])
-  elif isinstance(var, variables.PartitionedVariable):
+  elif isinstance(var, list) and all(_is_variable(v) for v in var):
+    current_var_name = _infer_var_name(var)
+  elif isinstance(var, variables_lib.PartitionedVariable):
     current_var_name = _infer_var_name([var])
     var = var._get_variable_list()  # pylint: disable=protected-access
-  elif (isinstance(var, list) and all(
-      _is_variable(v) or isinstance(v, variables.PartitionedVariable)
-      for v in var)):
-    # Convert length-1 lists of vars to single tf.Variables.  This ensures that
-    # checkpoint_utils.init_from_checkpoint() doesn't incorrectly assume
-    # slice info is present.
-    if len(var) == 1:
-      current_var_name = _infer_var_name(var)
-      var = var[0]
-    else:
-      # If we have multiple elements in var, we cannot assume they all
-      # represent the same Variable.
-      name_to_var_dict = saver.BaseSaverBuilder.OpListToDict(
-          var, convert_variable_to_tensor=False)
-      if prev_tensor_name:
-        # Providing a prev_tensor_name is only viable if var representes a
-        # single Variable.
-        if len(name_to_var_dict) > 1:
-          raise ValueError("var represented more than one Variable, but "
-                           "prev_tensor_name was provided.")
-        checkpoint_utils.init_from_checkpoint(prev_ckpt, {
-            prev_tensor_name: var
-        })
-      else:
-        # OpListToDict gives us roughly what we need, but
-        # the values in the dict may be PartitionedVariables (which
-        # init_from_checkpoint does not expect) that we need to convert to
-        # lists.
-        name_to_var_dict_fixed = {}
-        for name, var in six.iteritems(name_to_var_dict):
-          if isinstance(var, variables.PartitionedVariable):
-            name_to_var_dict_fixed[name] = var._get_variable_list()  # pylint: disable=protected-access
-          else:
-            name_to_var_dict_fixed[name] = var
-        checkpoint_utils.init_from_checkpoint(prev_ckpt, name_to_var_dict_fixed)
-      return
   else:
     raise TypeError(
-        "var MUST be one of the following: a Variable, PartitionedVariable, or "
-        "list of Variable's and/or PartitionedVariable's, but is {}".format(
-            type(var)))
+        "var MUST be one of the following: a Variable, list of Variable or "
+        "PartitionedVariable, but is {}".format(type(var)))
   if not prev_tensor_name:
     # Assume tensor name remains the same.
     prev_tensor_name = current_var_name
@@ -219,15 +309,15 @@ def _warmstart_var(var, prev_ckpt, prev_tensor_name=None):
 # pylint: disable=protected-access
 # Accesses protected members of tf.Variable to reset the variable's internal
 # state.
-def _warmstart_var_with_vocab(var,
-                              current_vocab_path,
-                              current_vocab_size,
-                              prev_ckpt,
-                              prev_vocab_path,
-                              previous_vocab_size=-1,
-                              current_oov_buckets=0,
-                              prev_tensor_name=None,
-                              initializer=None):
+def _warm_start_var_with_vocab(var,
+                               current_vocab_path,
+                               current_vocab_size,
+                               prev_ckpt,
+                               prev_vocab_path,
+                               previous_vocab_size=-1,
+                               current_oov_buckets=0,
+                               prev_tensor_name=None,
+                               initializer=None):
   """Warm-starts given variable from `prev_tensor_name` tensor in `prev_ckpt`.
 
   Use this method when the `var` is backed by vocabulary. This method stitches
@@ -270,7 +360,7 @@ def _warmstart_var_with_vocab(var,
     var = [var]
   elif isinstance(var, list) and all(_is_variable(v) for v in var):
     var = var
-  elif isinstance(var, variables.PartitionedVariable):
+  elif isinstance(var, variables_lib.PartitionedVariable):
     var = var._get_variable_list()
   else:
     raise TypeError(
@@ -290,10 +380,10 @@ def _warmstart_var_with_vocab(var,
           full_shape=slice_info.full_shape,
           var_offset=slice_info.var_offset)
 
-    # TODO(vihanjain): Support _WarmstartSettings where class vocabularies need
+    # TODO(eddz): Support WarmStartSettings where class vocabularies need
     # remapping too.
     init = checkpoint_ops._load_and_remap_matrix_initializer(
-        ckpt_path=saver.latest_checkpoint(prev_ckpt),
+        ckpt_path=checkpoint_utils._get_checkpoint_filename(prev_ckpt),
         old_tensor_name=prev_tensor_name,
         new_row_vocab_size=current_vocab_size,
         new_col_vocab_size=v_shape[1],
@@ -311,114 +401,127 @@ def _warmstart_var_with_vocab(var,
 # pylint: enable=protected-access
 
 
-def _warmstart_input_layer(cols_to_vars, warmstart_settings):
-  """Warm-starts input layer of a model using given settings.
+def _warm_start(warm_start_settings):
+  """Warmstarts a model using the given settings.
 
-  Args:
-    cols_to_vars: Dict of feature columns to corresponding graph variables.
-    warmstart_settings: An object of `_WarmStartSettings`.
-
-    Typical usage example:
-
-    ```python
-    tfcl = tf.contrib.layers
-    # Define features and transformations.
-    sc_vocab_list = tf.feature_column.categorical_column_with_vocabulary_list(
-        "sc_vocab_list", vocabulary_list=["a", "b"])
-    sc_vocab_file = tf.feature_column.categorical_column_with_vocabulary_file(
-        "sc_vocab_file", "new_vocab.txt", vocab_size=100)
-    cross = tf.feature_column.crossed_column(
-      [sc_vocab_list, sc_vocab_file], hash_bucket_size=5000)
-
-    all_cols = set(sc_vocab_list, sc_vocab_file, cross)
-    batch_features = tf.parse_example(
-        serialized=serialized_examples,
-        features=tf.contrib.layers.create_feature_spec_for_parsing(all_cols))
-
-    cols_to_vars = {}
-    tf.feature_column.linear_model(
-        features=batch_features,
-        feature_columns=all_cols,
-        units=1,
-        cols_to_vars=cols_to_vars)
-
-    # Warm-start entire input layer.
-    ws_settings = _WarmStartSettings(
-        "/tmp/prev_model_dir",
-        col_to_prev_vocab={sc_vocab_file: "old_vocab.txt"})
-    _warmstart_input_layer(cols_to_vars, ws_settings)
-    # Warm-start bias too.
-    _warmstart_var(cols_to_vars['bias'], ws_settings.ckpt_to_initialize_from)
-    ```
-
-    The above example effectively warm-starts full linear model.
+  Currently, this is intended for use only in canned Estimators.  Once made
+  public, it can be used in any model_fn.
 
+  Args:
+    warm_start_settings: An object of `WarmStartSettings`.
   Raises:
-    ValueError: If a column in cols_to_vars has an entry in
-      warmstart_settings.cols_to_prev_vocab, but is not an instance of
-      _VocabularyFileCategoricalColumn or _EmbeddingColumn.
+    ValueError: If the WarmStartSettings contains prev_var_name or VocabInfo
+      configuration for variable names that are not used.  This is to ensure
+      a stronger check for variable configuration than relying on users to
+      examine the logs.
   """
-  for col, var in six.iteritems(cols_to_vars):
-    if not isinstance(col, feature_column._FeatureColumn):  # pylint: disable=protected-access
-      raise TypeError(
-          "Keys in dict `cols_to_vars` must be of type FeatureColumn. Found "
-          "key of type: {}".format(type(col)))
-    if col in warmstart_settings.exclude_columns:
-      logging.info("Skipping warm-starting column: {}".format(col.name))
-      continue
-
-    prev_tensor_name = warmstart_settings.col_to_prev_tensor.get(col)
-    # pylint: disable=protected-access
-    is_sparse_vocab_column = isinstance(
-        col, feature_column._VocabularyFileCategoricalColumn)
-    is_embedding_vocab_column = (
-        isinstance(col, feature_column._EmbeddingColumn) and
-        isinstance(col.categorical_column,
-                   feature_column._VocabularyFileCategoricalColumn))
-    if is_sparse_vocab_column or is_embedding_vocab_column:
-      # pylint: enable=protected-access
-      initializer = None
-      if is_embedding_vocab_column:
-        initializer = col.initializer
-        vocabulary_file = col.categorical_column.vocabulary_file
-        vocabulary_size = col.categorical_column.vocabulary_size
-        num_oov_buckets = col.categorical_column.num_oov_buckets
-      else:
-        vocabulary_file = col.vocabulary_file
-        vocabulary_size = col.vocabulary_size
-        num_oov_buckets = col.num_oov_buckets
-      prev_vocab = warmstart_settings.col_to_prev_vocab.get(
-          col, vocabulary_file)
-      if isinstance(prev_vocab, str):
-        prev_vocab_path = prev_vocab
-        previous_vocab_size = -1
-        logging.info(
-            "Warm-starting column: {}; prev_vocab: {}; "
-            "prev_tensor: {}".format(col.name, prev_vocab_path,
-                                     (prev_tensor_name or "Unchanged")))
-      elif isinstance(prev_vocab, tuple):
-        prev_vocab_path = prev_vocab[0]
-        previous_vocab_size = prev_vocab[1]
-        logging.info("Warm-starting column: {}; prev_vocab: {} (first {} "
-                     "entries); prev_tensor: {}".format(
-                         col.name, prev_vocab_path, previous_vocab_size,
-                         (prev_tensor_name or "Unchanged")))
-
-      _warmstart_var_with_vocab(
-          var,
-          current_vocab_path=vocabulary_file,
-          current_vocab_size=vocabulary_size,
-          prev_ckpt=warmstart_settings.ckpt_to_initialize_from,
-          prev_vocab_path=prev_vocab_path,
-          previous_vocab_size=previous_vocab_size,
-          current_oov_buckets=num_oov_buckets,
-          prev_tensor_name=prev_tensor_name,
-          initializer=initializer)
+  logging.info("Warm-starting from: ",
+               warm_start_settings.ckpt_to_initialize_from)
+  # We have to deal with partitioned variables, since get_collection flattens
+  # out the list.
+  grouped_variables = {}
+  # Both warm_start_settings.vars_to_warm_start = '.*' and
+  # warm_start_settings.vars_to_warm_start = None will match everything here.
+  for v in ops.get_collection(
+      ops.GraphKeys.TRAINABLE_VARIABLES,
+      scope=warm_start_settings.vars_to_warm_start):
+    if not isinstance(v, list):
+      var_name = _infer_var_name([v])
     else:
-      if col in warmstart_settings.col_to_prev_vocab:
-        raise ValueError("Vocabulary provided for column %s which is not a "
-                         "_VocabularyFileCategoricalColumn or _EmbeddingColumn")
-      logging.info("Warm-starting column: {}; prev_tensor: {}".format(
-          col.name, prev_tensor_name or "Unchanged"))
-      _warmstart_var(var, warmstart_settings.ckpt_to_initialize_from,
-                     prev_tensor_name)
+      var_name = _infer_var_name(v)
+    grouped_variables.setdefault(var_name, []).append(v)
+
+  # Keep track of which var_names in var_name_to_prev_var_name and
+  # var_name_to_vocab_info have been used.  Err on the safer side by throwing an
+  # exception if any are unused by the end of the loop.  It is easy to misname
+  # a variable during this configuration, in which case without this check, we
+  # would fail to warm-start silently.
+  prev_var_name_used = set()
+  vocab_info_used = set()
+
+  for var_name, variable in six.iteritems(grouped_variables):
+    prev_var_name = warm_start_settings.var_name_to_prev_var_name.get(var_name)
+    if prev_var_name:
+      prev_var_name_used.add(var_name)
+    vocab_info = warm_start_settings.var_name_to_vocab_info.get(var_name)
+    if vocab_info:
+      vocab_info_used.add(var_name)
+      logging.info(
+          "Warm-starting variable: {}; current_vocab: {} current_vocab_size: {}"
+          " prev_vocab: {} prev_vocab_size: {} current_oov: {} prev_tensor: {}"
+          " initializer: {}".format(
+              var_name,
+              vocab_info.new_vocab,
+              vocab_info.new_vocab_size,
+              vocab_info.old_vocab,
+              (vocab_info.old_vocab_size if vocab_info.old_vocab_size > 0
+               else "All"),
+              vocab_info.num_oov_buckets,
+              prev_var_name or "Unchanged",
+              vocab_info.backup_initializer or "zero-initialized"))
+      _warm_start_var_with_vocab(
+          variable,
+          current_vocab_path=vocab_info.new_vocab,
+          current_vocab_size=vocab_info.new_vocab_size,
+          prev_ckpt=warm_start_settings.ckpt_to_initialize_from,
+          prev_vocab_path=vocab_info.old_vocab,
+          previous_vocab_size=vocab_info.old_vocab_size,
+          current_oov_buckets=vocab_info.num_oov_buckets,
+          prev_tensor_name=prev_var_name,
+          initializer=vocab_info.backup_initializer)
+    else:
+      # For the special value of warm_start_settings.vars_to_warm_start = None,
+      # we only warm-start variables with explicitly specified vocabularies.
+      if warm_start_settings.vars_to_warm_start:
+        logging.info("Warm-starting variable: {}; prev_var_name: {}".format(
+            var_name, prev_var_name or "Unchanged"))
+        # Because we use a default empty list in grouped_variables, single
+        # unpartitioned variables will be lists here, which we rectify in order
+        # for init_from_checkpoint logic to work correctly.
+        if len(variable) == 1:
+          variable = variable[0]
+        _warm_start_var(variable, warm_start_settings.ckpt_to_initialize_from,
+                        prev_var_name)
+
+  prev_var_name_not_used = set(
+      warm_start_settings.var_name_to_prev_var_name.keys()) - prev_var_name_used
+  vocab_info_not_used = set(
+      warm_start_settings.var_name_to_vocab_info.keys()) - vocab_info_used
+
+  if prev_var_name_not_used:
+    raise ValueError(
+        "You provided the following variables in "
+        "warm_start_settings.var_name_to_prev_var_name that were not used: "
+        "{0}.  Perhaps you misspelled them?  Here is the list of viable "
+        "variable names: {1}".format(prev_var_name_not_used,
+                                     grouped_variables.keys()))
+  if vocab_info_not_used:
+    raise ValueError(
+        "You provided the following variables in "
+        "warm_start_settings.var_name_to_vocab_info that were not used: {0}. "
+        " Perhaps you misspelled them?  Here is the list of viable variable "
+        "names: {1}".format(vocab_info_not_used, grouped_variables.keys()))
+
+
+def _get_default_warm_start_settings(warm_start_from):
+  """Returns default WarmStartSettings.
+
+  Args:
+    warm_start_from: Either a string representing the filepath of a checkpoint
+      to initialize from, or an instance of WarmStartSettings.
+
+  Returns:
+    Either None or an instance of WarmStartSettings.
+
+  Raises:
+    ValueError: If warm_start_from is not None but is neither a string nor an
+      instance of WarmStartSettings.
+  """
+  if warm_start_from is None:
+    return None
+  if isinstance(warm_start_from, six.string_types):
+    return WarmStartSettings(ckpt_to_initialize_from=warm_start_from)
+  elif isinstance(warm_start_from, WarmStartSettings):
+    return warm_start_from
+  else:
+    raise ValueError("warm_start_from must be a string or a WarmStartSettings")
diff --git a/tensorflow/python/estimator/warm_starting_util_test.py b/tensorflow/python/estimator/warm_starting_util_test.py
index a05dbfd7449c9e108649da9ec5a40fe220233953..3985d9ebd04e6963339fcf9999f6367fe4dadc1a 100644
--- a/tensorflow/python/estimator/warm_starting_util_test.py
+++ b/tensorflow/python/estimator/warm_starting_util_test.py
@@ -50,9 +50,7 @@ class WarmStartingUtilTest(test.TestCase):
     sess.run(variables.global_variables_initializer())
     saver = saver_lib.Saver()
     ckpt_prefix = os.path.join(self.get_temp_dir(), "model")
-    ckpt_state_name = "checkpoint"
-    saver.save(
-        sess, ckpt_prefix, global_step=0, latest_filename=ckpt_state_name)
+    saver.save(sess, ckpt_prefix, global_step=0)
 
   def _create_prev_run_var(self,
                            var_name,
@@ -72,36 +70,6 @@ class WarmStartingUtilTest(test.TestCase):
           var = var._get_variable_list()
         return var, sess.run(var)
 
-  def _create_prev_run_multiple_vars(self,
-                                     var_names,
-                                     initializers,
-                                     shapes=None,
-                                     partitioners=None):
-    if not shapes:
-      shapes = [None] * len(var_names)
-    if not partitioners:
-      partitioners = [None] * len(var_names)
-    with ops.Graph().as_default() as g:
-      with self.test_session(graph=g) as sess:
-        var_list = []
-        for var_name, shape, initializer, partitioner in zip(
-            var_names, shapes, initializers, partitioners):
-          var_list.append(
-              variable_scope.get_variable(
-                  var_name,
-                  shape=shape,
-                  initializer=initializer,
-                  partitioner=partitioner))
-        self._write_checkpoint(sess)
-        run_vars = []
-        for var, partitioner in zip(var_list, partitioners):
-          if partitioner:
-            self.assertTrue(isinstance(var, variables.PartitionedVariable))
-            run_vars.append(sess.run(var._get_variable_list()))
-          else:
-            run_vars.append(sess.run(var))
-        return var_list, run_vars
-
   def _create_dummy_inputs(self):
     return {
         "sc_int": array_ops.sparse_placeholder(dtypes.int32),
@@ -120,9 +88,7 @@ class WarmStartingUtilTest(test.TestCase):
           feature_columns=feature_cols,
           units=1,
           cols_to_vars=cols_to_vars)
-    # Return a dictionary mapping each column to its variable, dropping the
-    # 'bias' key that's also filled.
-    cols_to_vars.pop("bias")
+    # Return a dictionary mapping each column to its variable.
     return cols_to_vars
 
   def _assert_cols_to_vars(self, cols_to_vars, cols_to_expected_values, sess):
@@ -138,7 +104,7 @@ class WarmStartingUtilTest(test.TestCase):
       with self.test_session(graph=g) as sess:
         fruit_weights = variable_scope.get_variable(
             "fruit_weights", initializer=[[0.], [0.], [0.], [0.]])
-        ws_util._warmstart_var(fruit_weights, self.get_temp_dir())
+        ws_util._warm_start_var(fruit_weights, self.get_temp_dir())
         sess.run(variables.global_variables_initializer())
         self.assertAllEqual(prev_val, fruit_weights.eval(sess))
 
@@ -154,7 +120,7 @@ class WarmStartingUtilTest(test.TestCase):
       with self.test_session(graph=g) as sess:
         fruit_weights = variable_scope.get_variable(
             "fruit_weights", initializer=[[0.], [0.], [0.], [0.]])
-        ws_util._warmstart_var(fruit_weights, self.get_temp_dir())
+        ws_util._warm_start_var(fruit_weights, self.get_temp_dir())
         sess.run(variables.global_variables_initializer())
         self.assertAllEqual(prev_val, fruit_weights.eval(sess))
 
@@ -171,7 +137,7 @@ class WarmStartingUtilTest(test.TestCase):
             partitioner=lambda shape, dtype: [2, 1])
         self.assertTrue(
             isinstance(fruit_weights, variables.PartitionedVariable))
-        ws_util._warmstart_var(fruit_weights, self.get_temp_dir())
+        ws_util._warm_start_var(fruit_weights, self.get_temp_dir())
         sess.run(variables.global_variables_initializer())
         fruit_weights = fruit_weights._get_variable_list()
         new_val = np.concatenate(
@@ -195,7 +161,7 @@ class WarmStartingUtilTest(test.TestCase):
             partitioner=lambda shape, dtype: [2, 1])
         self.assertTrue(
             isinstance(fruit_weights, variables.PartitionedVariable))
-        ws_util._warmstart_var(
+        ws_util._warm_start_var(
             fruit_weights,
             self.get_temp_dir(),
             prev_tensor_name="old_scope/fruit_weights")
@@ -205,103 +171,10 @@ class WarmStartingUtilTest(test.TestCase):
             [fruit_weights[0].eval(sess), fruit_weights[1].eval(sess)], axis=0)
         self.assertAllEqual(prev_val, new_val)
 
-  def testWarmStartVarMultipleVars(self):
-    _, prev_vals = self._create_prev_run_multiple_vars(
-        var_names=["fruit_weights", "other_weights"],
-        initializers=[[[0.5], [1.], [1.5], [2.]], [[.05], [.1], [.15], [.2]]])
-
-    with ops.Graph().as_default() as g:
-      with self.test_session(graph=g) as sess:
-        fruit_weights = variable_scope.get_variable(
-            "fruit_weights", initializer=[[0.], [0.], [0.], [0.]])
-        other_weights = variable_scope.get_variable(
-            "other_weights", initializer=[[0.], [0.], [0.], [0.]])
-        ws_util._warmstart_var([fruit_weights, other_weights],
-                               self.get_temp_dir())
-        sess.run(variables.global_variables_initializer())
-        self.assertAllEqual(prev_vals[0], fruit_weights.eval(sess))
-        self.assertAllEqual(prev_vals[1], other_weights.eval(sess))
-
-  def testWarmStartVarMultipleVarsBothPartitioned(self):
-    _, prev_vals = self._create_prev_run_multiple_vars(
-        var_names=["fruit_weights", "other_weights"],
-        shapes=[[4, 1], [4, 1]],
-        initializers=[[[0.5], [1.], [1.5], [2.]], [[.05], [.1], [.15], [.2]]],
-        partitioners=[lambda shape, dtype: [2, 1], lambda shape, dtype: [2, 1]])
-
-    with ops.Graph().as_default() as g:
-      with self.test_session(graph=g) as sess:
-        fruit_weights = variable_scope.get_variable(
-            "fruit_weights",
-            shape=[4, 1],
-            initializer=[[0.], [0.], [0.], [0.]],
-            partitioner=lambda shape, dtype: [2, 1])
-        other_weights = variable_scope.get_variable(
-            "other_weights",
-            shape=[4, 1],
-            initializer=[[0.], [0.], [0.], [0.]],
-            partitioner=lambda shape, dtype: [2, 1])
-        ws_util._warmstart_var([fruit_weights, other_weights],
-                               self.get_temp_dir())
-        sess.run(variables.global_variables_initializer())
-        fruit_weights = fruit_weights._get_variable_list()
-        new_fruit_weights_val = np.concatenate(
-            [fruit_weights[0].eval(sess), fruit_weights[1].eval(sess)], axis=0)
-        other_weights = other_weights._get_variable_list()
-        new_other_weights_val = np.concatenate(
-            [other_weights[0].eval(sess), other_weights[1].eval(sess)], axis=0)
-        self.assertAllEqual(
-            np.concatenate(prev_vals[0], axis=0), new_fruit_weights_val)
-        self.assertAllEqual(
-            np.concatenate(prev_vals[1], axis=0), new_other_weights_val)
-
-  def testWarmStartVarMultipleVarsMixOfPartitions(self):
-    # First is not partitioned, but the second two are.
-    _, prev_vals = self._create_prev_run_multiple_vars(
-        var_names=["fruit_weights", "other_weights", "veggie_weights"],
-        shapes=[None, [4, 1], [4, 1]],
-        initializers=[[[0.5], [1.], [1.5], [2.]], [[.05], [.1], [.15], [.2]],
-                      [[5.], [10.], [15.], [20.]]],
-        partitioners=[
-            None, lambda shape, dtype: [2, 1], lambda shape, dtype: [2, 1]
-        ])
-
-    with ops.Graph().as_default() as g:
-      with self.test_session(graph=g) as sess:
-        fruit_weights = variable_scope.get_variable(
-            "fruit_weights", initializer=[[0.], [0.], [0.], [0.]])
-        other_weights = variable_scope.get_variable(
-            "other_weights",
-            shape=[4, 1],
-            initializer=[[0.], [0.], [0.], [0.]],
-            partitioner=lambda shape, dtype: [2, 1])
-        veggie_weights = variable_scope.get_variable(
-            "veggie_weights",
-            shape=[4, 1],
-            initializer=[[0.], [0.], [0.], [0.]],
-            partitioner=lambda shape, dtype: [2, 1])
-        # Flatten one of the partitioned variables.
-        ws_util._warmstart_var([fruit_weights, other_weights] +
-                               veggie_weights._get_variable_list(),
-                               self.get_temp_dir())
-        sess.run(variables.global_variables_initializer())
-        veggie_weights = veggie_weights._get_variable_list()
-        new_veggie_weights_val = np.concatenate(
-            [veggie_weights[0].eval(sess), veggie_weights[1].eval(sess)],
-            axis=0)
-        other_weights = other_weights._get_variable_list()
-        new_other_weights_val = np.concatenate(
-            [other_weights[0].eval(sess), other_weights[1].eval(sess)], axis=0)
-        self.assertAllEqual(prev_vals[0], fruit_weights.eval(sess))
-        self.assertAllEqual(
-            np.concatenate(prev_vals[1], axis=0), new_other_weights_val)
-        self.assertAllEqual(
-            np.concatenate(prev_vals[2], axis=0), new_veggie_weights_val)
-
   def testWarmStartVarWithVocab(self):
     prev_vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"],
                                         "old_vocab")
-    _, _ = self._create_prev_run_var(
+    self._create_prev_run_var(
         "fruit_weights", initializer=[[0.5], [1.], [1.5], [2.]])
 
     # New vocab with elements in reverse order and one new element.
@@ -312,8 +185,8 @@ class WarmStartingUtilTest(test.TestCase):
       with self.test_session(graph=g) as sess:
         fruit_weights = variable_scope.get_variable(
             "fruit_weights", initializer=[[0.], [0.], [0.], [0.], [0.]])
-        ws_util._warmstart_var_with_vocab(fruit_weights, new_vocab_path, 5,
-                                          self.get_temp_dir(), prev_vocab_path)
+        ws_util._warm_start_var_with_vocab(fruit_weights, new_vocab_path, 5,
+                                           self.get_temp_dir(), prev_vocab_path)
         sess.run(variables.global_variables_initializer())
         self.assertAllEqual([[2.], [1.5], [1.], [0.5], [0.]],
                             fruit_weights.eval(sess))
@@ -321,7 +194,7 @@ class WarmStartingUtilTest(test.TestCase):
   def testWarmStartVarWithVocabConstrainedOldVocabSize(self):
     prev_vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"],
                                         "old_vocab")
-    _, _ = self._create_prev_run_var(
+    self._create_prev_run_var(
         "fruit_weights", initializer=[[0.5], [1.], [1.5], [2.]])
 
     # New vocab with elements in reverse order and one new element.
@@ -332,7 +205,7 @@ class WarmStartingUtilTest(test.TestCase):
       with self.test_session(graph=g) as sess:
         fruit_weights = variable_scope.get_variable(
             "fruit_weights", initializer=[[0.], [0.], [0.], [0.], [0.]])
-        ws_util._warmstart_var_with_vocab(
+        ws_util._warm_start_var_with_vocab(
             fruit_weights,
             new_vocab_path,
             5,
@@ -347,7 +220,7 @@ class WarmStartingUtilTest(test.TestCase):
   def testWarmStartVarWithVocabPrevVarPartitioned(self):
     prev_vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"],
                                         "old_vocab")
-    _, _ = self._create_prev_run_var(
+    self._create_prev_run_var(
         "fruit_weights",
         shape=[4, 1],
         initializer=[[0.5], [1.], [1.5], [2.]],
@@ -361,8 +234,8 @@ class WarmStartingUtilTest(test.TestCase):
       with self.test_session(graph=g) as sess:
         fruit_weights = variable_scope.get_variable(
             "fruit_weights", initializer=[[0.], [0.], [0.], [0.], [0.]])
-        ws_util._warmstart_var_with_vocab(fruit_weights, new_vocab_path, 5,
-                                          self.get_temp_dir(), prev_vocab_path)
+        ws_util._warm_start_var_with_vocab(fruit_weights, new_vocab_path, 5,
+                                           self.get_temp_dir(), prev_vocab_path)
         sess.run(variables.global_variables_initializer())
         self.assertAllEqual([[2.], [1.5], [1.], [0.5], [0.]],
                             fruit_weights.eval(sess))
@@ -370,7 +243,7 @@ class WarmStartingUtilTest(test.TestCase):
   def testWarmStartVarWithVocabCurrentVarPartitioned(self):
     prev_vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"],
                                         "old_vocab")
-    _, _ = self._create_prev_run_var(
+    self._create_prev_run_var(
         "fruit_weights", initializer=[[0.5], [1.], [1.5], [2.]])
 
     # New vocab with elements in reverse order and one new element.
@@ -384,7 +257,7 @@ class WarmStartingUtilTest(test.TestCase):
             shape=[6, 1],
             initializer=[[0.], [0.], [0.], [0.], [0.], [0.]],
             partitioner=lambda shape, dtype: [2, 1])
-        ws_util._warmstart_var_with_vocab(
+        ws_util._warm_start_var_with_vocab(
             fruit_weights,
             new_vocab_path,
             5,
@@ -403,7 +276,7 @@ class WarmStartingUtilTest(test.TestCase):
   def testWarmStartVarWithVocabBothVarsPartitioned(self):
     prev_vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"],
                                         "old_vocab")
-    _, _ = self._create_prev_run_var(
+    self._create_prev_run_var(
         "fruit_weights",
         shape=[4, 1],
         initializer=[[0.5], [1.], [1.5], [2.]],
@@ -421,8 +294,8 @@ class WarmStartingUtilTest(test.TestCase):
             shape=[6, 1],
             initializer=[[0.], [0.], [0.], [0.], [0.], [0.]],
             partitioner=lambda shape, dtype: [2, 1])
-        ws_util._warmstart_var_with_vocab(fruit_weights, new_vocab_path, 6,
-                                          self.get_temp_dir(), prev_vocab_path)
+        ws_util._warm_start_var_with_vocab(fruit_weights, new_vocab_path, 6,
+                                           self.get_temp_dir(), prev_vocab_path)
         sess.run(variables.global_variables_initializer())
         self.assertTrue(
             isinstance(fruit_weights, variables.PartitionedVariable))
@@ -432,7 +305,7 @@ class WarmStartingUtilTest(test.TestCase):
         self.assertAllEqual([[0.5], [0.], [0.]],
                             fruit_weights_vars[1].eval(sess))
 
-  def testWarmStartInputLayer_SparseColumnIntegerized(self):
+  def testWarmStart_SparseColumnIntegerized(self):
     # Create feature column.
     sc_int = fc.categorical_column_with_identity("sc_int", num_buckets=10)
 
@@ -443,28 +316,28 @@ class WarmStartingUtilTest(test.TestCase):
     self.assertAllEqual(np.ones([10, 1]), prev_int_val)
 
     partitioner = lambda shape, dtype: [1] * len(shape)
-    # New graph, new session WITHOUT warmstarting.
+    # New graph, new session WITHOUT warm-starting.
     with ops.Graph().as_default() as g:
       with self.test_session(graph=g) as sess:
         cols_to_vars = self._create_linear_model([sc_int], partitioner)
         sess.run(variables.global_variables_initializer())
-        # Without warmstarting, the weights should be initialized using default
+        # Without warm-starting, the weights should be initialized using default
         # initializer (which is init_ops.zeros_initializer).
         self._assert_cols_to_vars(cols_to_vars, {sc_int: [np.zeros([10, 1])]},
                                   sess)
 
-    # New graph, new session with warmstarting.
+    # New graph, new session with warm-starting.
     with ops.Graph().as_default() as g:
       with self.test_session(graph=g) as sess:
         cols_to_vars = self._create_linear_model([sc_int], partitioner)
-        ws_util._warmstart_input_layer(cols_to_vars,
-                                       ws_util._WarmStartSettings(
-                                           self.get_temp_dir()))
+        ws_util._warm_start(
+            ws_util.WarmStartSettings(
+                self.get_temp_dir(), vars_to_warm_start=".*sc_int.*"))
         sess.run(variables.global_variables_initializer())
-        # Verify weights were correctly warmstarted.
+        # Verify weights were correctly warm-started.
         self._assert_cols_to_vars(cols_to_vars, {sc_int: [prev_int_val]}, sess)
 
-  def testWarmStartInputLayer_SparseColumnHashed(self):
+  def testWarmStart_SparseColumnHashed(self):
     # Create feature column.
     sc_hash = fc.categorical_column_with_hash_bucket(
         "sc_hash", hash_bucket_size=15)
@@ -474,29 +347,66 @@ class WarmStartingUtilTest(test.TestCase):
         "linear_model/sc_hash/weights", shape=[15, 1], initializer=norms())
 
     partitioner = lambda shape, dtype: [1] * len(shape)
-    # New graph, new session WITHOUT warmstarting.
+    # New graph, new session WITHOUT warm-starting.
     with ops.Graph().as_default() as g:
       with self.test_session(graph=g) as sess:
         cols_to_vars = self._create_linear_model([sc_hash], partitioner)
         sess.run(variables.global_variables_initializer())
-        # Without warmstarting, the weights should be initialized using default
+        # Without warm-starting, the weights should be initialized using default
         # initializer (which is init_ops.zeros_initializer).
         self._assert_cols_to_vars(cols_to_vars, {sc_hash: [np.zeros([15, 1])]},
                                   sess)
 
-    # New graph, new session with warmstarting.
+    # New graph, new session with warm-starting.
     with ops.Graph().as_default() as g:
       with self.test_session(graph=g) as sess:
         cols_to_vars = self._create_linear_model([sc_hash], partitioner)
-        ws_util._warmstart_input_layer(cols_to_vars,
-                                       ws_util._WarmStartSettings(
-                                           self.get_temp_dir()))
+        ws_util._warm_start(
+            ws_util.WarmStartSettings(
+                self.get_temp_dir(), vars_to_warm_start=".*sc_hash.*"))
         sess.run(variables.global_variables_initializer())
-        # Verify weights were correctly warmstarted.
+        # Verify weights were correctly warm-started.
         self._assert_cols_to_vars(cols_to_vars, {sc_hash: [prev_hash_val]},
                                   sess)
 
-  def testWarmStartInputLayer_SparseColumnVocabulary(self):
+  def testWarmStart_SparseColumnVocabulary(self):
+    # Create vocab for sparse column "sc_vocab".
+    vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"],
+                                   "vocab")
+    # Create feature column.
+    sc_vocab = fc.categorical_column_with_vocabulary_file(
+        "sc_vocab", vocabulary_file=vocab_path, vocabulary_size=4)
+
+    # Save checkpoint from which to warm-start.
+    _, prev_vocab_val = self._create_prev_run_var(
+        "linear_model/sc_vocab/weights", shape=[4, 1], initializer=ones())
+
+    partitioner = lambda shape, dtype: [1] * len(shape)
+    # New graph, new session WITHOUT warm-starting.
+    with ops.Graph().as_default() as g:
+      with self.test_session(graph=g) as sess:
+        cols_to_vars = self._create_linear_model([sc_vocab], partitioner)
+        sess.run(variables.global_variables_initializer())
+        # Without warm-starting, the weights should be initialized using default
+        # initializer (which is init_ops.zeros_initializer).
+        self._assert_cols_to_vars(cols_to_vars, {sc_vocab: [np.zeros([4, 1])]},
+                                  sess)
+
+    # New graph, new session with warm-starting.
+    with ops.Graph().as_default() as g:
+      with self.test_session(graph=g) as sess:
+        cols_to_vars = self._create_linear_model([sc_vocab], partitioner)
+        # Since old vocab is not explicitly set in WarmStartSettings, the old
+        # vocab is assumed to be same as new vocab.
+        ws_util._warm_start(
+            ws_util.WarmStartSettings(
+                self.get_temp_dir(), vars_to_warm_start=".*sc_vocab.*"))
+        sess.run(variables.global_variables_initializer())
+        # Verify weights were correctly warm-started.
+        self._assert_cols_to_vars(cols_to_vars, {sc_vocab: [prev_vocab_val]},
+                                  sess)
+
+  def testWarmStart_ExplicitCheckpointFile(self):
     # Create vocab for sparse column "sc_vocab".
     vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"],
                                    "vocab")
@@ -509,31 +419,33 @@ class WarmStartingUtilTest(test.TestCase):
         "linear_model/sc_vocab/weights", shape=[4, 1], initializer=ones())
 
     partitioner = lambda shape, dtype: [1] * len(shape)
-    # New graph, new session WITHOUT warmstarting.
+    # New graph, new session WITHOUT warm-starting.
     with ops.Graph().as_default() as g:
       with self.test_session(graph=g) as sess:
         cols_to_vars = self._create_linear_model([sc_vocab], partitioner)
         sess.run(variables.global_variables_initializer())
-        # Without warmstarting, the weights should be initialized using default
+        # Without warm-starting, the weights should be initialized using default
         # initializer (which is init_ops.zeros_initializer).
         self._assert_cols_to_vars(cols_to_vars, {sc_vocab: [np.zeros([4, 1])]},
                                   sess)
 
-    # New graph, new session with warmstarting.
+    # New graph, new session with warm-starting.
     with ops.Graph().as_default() as g:
       with self.test_session(graph=g) as sess:
         cols_to_vars = self._create_linear_model([sc_vocab], partitioner)
         # Since old vocab is not explicitly set in WarmStartSettings, the old
         # vocab is assumed to be same as new vocab.
-        ws_util._warmstart_input_layer(cols_to_vars,
-                                       ws_util._WarmStartSettings(
-                                           self.get_temp_dir()))
+        ws_util._warm_start(
+            ws_util.WarmStartSettings(
+                # Explicitly provide the file prefix instead of just the dir.
+                os.path.join(self.get_temp_dir(), "model-0"),
+                vars_to_warm_start=".*sc_vocab.*"))
         sess.run(variables.global_variables_initializer())
-        # Verify weights were correctly warmstarted.
+        # Verify weights were correctly warm-started.
         self._assert_cols_to_vars(cols_to_vars, {sc_vocab: [prev_vocab_val]},
                                   sess)
 
-  def testWarmStartInputLayer_SparseColumnVocabularyConstrainedVocabSizes(self):
+  def testWarmStart_SparseColumnVocabularyConstrainedVocabSizes(self):
     # Create old vocabulary, and use a size smaller than the total number of
     # entries.
     old_vocab_path = self._write_vocab(["apple", "guava", "banana"],
@@ -553,32 +465,39 @@ class WarmStartingUtilTest(test.TestCase):
         "linear_model/sc_vocab/weights", shape=[2, 1], initializer=ones())
 
     partitioner = lambda shape, dtype: [1] * len(shape)
-    # New graph, new session WITHOUT warmstarting.
+    # New graph, new session WITHOUT warm-starting.
     with ops.Graph().as_default() as g:
       with self.test_session(graph=g) as sess:
         cols_to_vars = self._create_linear_model([sc_vocab], partitioner)
         sess.run(variables.global_variables_initializer())
-        # Without warmstarting, the weights should be initialized using default
+        # Without warm-starting, the weights should be initialized using default
         # initializer (which is init_ops.zeros_initializer).
         self._assert_cols_to_vars(cols_to_vars, {sc_vocab: [np.zeros([2, 1])]},
                                   sess)
 
-    # New graph, new session with warmstarting.
+    # New graph, new session with warm-starting.
     with ops.Graph().as_default() as g:
       with self.test_session(graph=g) as sess:
         cols_to_vars = self._create_linear_model([sc_vocab], partitioner)
-        warmstart_settings = ws_util._WarmStartSettings(
+        vocab_info = ws_util.VocabInfo(
+            new_vocab=sc_vocab.vocabulary_file,
+            new_vocab_size=sc_vocab.vocabulary_size,
+            num_oov_buckets=sc_vocab.num_oov_buckets,
+            old_vocab=old_vocab_path,
+            old_vocab_size=old_vocab_size)
+        warm_start_settings = ws_util.WarmStartSettings(
             ckpt_to_initialize_from=self.get_temp_dir(),
-            col_to_prev_vocab={
-                sc_vocab: (old_vocab_path, old_vocab_size)
+            vars_to_warm_start=".*sc_vocab.*",
+            var_name_to_vocab_info={
+                "linear_model/sc_vocab/weights": vocab_info
             })
-        ws_util._warmstart_input_layer(cols_to_vars, warmstart_settings)
+        ws_util._warm_start(warm_start_settings)
         sess.run(variables.global_variables_initializer())
-        # Verify weights were correctly warmstarted.  'banana' isn't in the
+        # Verify weights were correctly warm-started.  'banana' isn't in the
         # first two entries of the old vocabulary, so it's newly initialized.
         self._assert_cols_to_vars(cols_to_vars, {sc_vocab: [[[1], [0]]]}, sess)
 
-  def testWarmStartInputLayer_BucketizedColumn(self):
+  def testWarmStart_BucketizedColumn(self):
     # Create feature column.
     real = fc.numeric_column("real")
     real_bucket = fc.bucketized_column(real, boundaries=[0., 1., 2., 3.])
@@ -590,29 +509,29 @@ class WarmStartingUtilTest(test.TestCase):
         initializer=norms())
 
     partitioner = lambda shape, dtype: [1] * len(shape)
-    # New graph, new session WITHOUT warmstarting.
+    # New graph, new session WITHOUT warm-starting.
     with ops.Graph().as_default() as g:
       with self.test_session(graph=g) as sess:
         cols_to_vars = self._create_linear_model([real_bucket], partitioner)
         sess.run(variables.global_variables_initializer())
-        # Without warmstarting, the weights should be initialized using default
+        # Without warm-starting, the weights should be initialized using default
         # initializer (which is init_ops.zeros_initializer).
         self._assert_cols_to_vars(cols_to_vars,
                                   {real_bucket: [np.zeros([5, 1])]}, sess)
 
-    # New graph, new session with warmstarting.
+    # New graph, new session with warm-starting.
     with ops.Graph().as_default() as g:
       with self.test_session(graph=g) as sess:
         cols_to_vars = self._create_linear_model([real_bucket], partitioner)
-        ws_util._warmstart_input_layer(cols_to_vars,
-                                       ws_util._WarmStartSettings(
-                                           self.get_temp_dir()))
+        ws_util._warm_start(
+            ws_util.WarmStartSettings(
+                self.get_temp_dir(), vars_to_warm_start=".*real_bucketized.*"))
         sess.run(variables.global_variables_initializer())
-        # Verify weights were correctly warmstarted.
+        # Verify weights were correctly warm-started.
         self._assert_cols_to_vars(cols_to_vars,
                                   {real_bucket: [prev_bucket_val]}, sess)
 
-  def testWarmStartInputLayer_MultipleCols(self):
+  def testWarmStart_MultipleCols(self):
     # Create vocab for sparse column "sc_vocab".
     vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"],
                                    "vocab")
@@ -630,7 +549,8 @@ class WarmStartingUtilTest(test.TestCase):
     cross = fc.crossed_column([sc_keys, sc_vocab], hash_bucket_size=20)
     all_linear_cols = [sc_int, sc_hash, sc_keys, sc_vocab, real_bucket, cross]
 
-    # Save checkpoint from which to warm-start.
+    # Save checkpoint from which to warm-start.  Also create a bias variable,
+    # so we can check that it's also warm-started.
     with ops.Graph().as_default() as g:
       with self.test_session(graph=g) as sess:
         sc_int_weights = variable_scope.get_variable(
@@ -649,22 +569,24 @@ class WarmStartingUtilTest(test.TestCase):
             "linear_model/sc_keys_X_sc_vocab/weights",
             shape=[20, 1],
             initializer=rand())
+        bias = variable_scope.get_variable(
+            "linear_model/bias_weights",
+            shape=[1],
+            initializer=rand())
         self._write_checkpoint(sess)
         (prev_int_val, prev_hash_val, prev_keys_val, prev_vocab_val,
-         prev_bucket_val, prev_cross_val) = sess.run([
+         prev_bucket_val, prev_cross_val, prev_bias_val) = sess.run([
              sc_int_weights, sc_hash_weights, sc_keys_weights, sc_vocab_weights,
-             real_bucket_weights, cross_weights
+             real_bucket_weights, cross_weights, bias
          ])
-        # Verify we initialized the values correctly.
-        self.assertAllEqual(np.ones([10, 1]), prev_int_val)
 
     partitioner = lambda shape, dtype: [1] * len(shape)
-    # New graph, new session WITHOUT warmstarting.
+    # New graph, new session WITHOUT warm-starting.
     with ops.Graph().as_default() as g:
       with self.test_session(graph=g) as sess:
         cols_to_vars = self._create_linear_model(all_linear_cols, partitioner)
         sess.run(variables.global_variables_initializer())
-        # Without warmstarting, all weights should be initialized using default
+        # Without warm-starting, all weights should be initialized using default
         # initializer (which is init_ops.zeros_initializer).
         self._assert_cols_to_vars(cols_to_vars, {
             sc_int: [np.zeros([10, 1])],
@@ -675,15 +597,23 @@ class WarmStartingUtilTest(test.TestCase):
             cross: [np.zeros([20, 1])],
         }, sess)
 
-    # New graph, new session with warmstarting.
+    # New graph, new session with warm-starting.
     with ops.Graph().as_default() as g:
       with self.test_session(graph=g) as sess:
         cols_to_vars = self._create_linear_model(all_linear_cols, partitioner)
-        ws_util._warmstart_input_layer(cols_to_vars,
-                                       ws_util._WarmStartSettings(
-                                           self.get_temp_dir()))
+        vocab_info = ws_util.VocabInfo(
+            new_vocab=sc_vocab.vocabulary_file,
+            new_vocab_size=sc_vocab.vocabulary_size,
+            num_oov_buckets=sc_vocab.num_oov_buckets,
+            old_vocab=vocab_path)
+        ws_util._warm_start(
+            ws_util.WarmStartSettings(
+                self.get_temp_dir(),
+                var_name_to_vocab_info={
+                    "linear_model/sc_vocab/weights": vocab_info
+                }))
         sess.run(variables.global_variables_initializer())
-        # Verify weights were correctly warmstarted.
+        # Verify weights were correctly warm-started.
         self._assert_cols_to_vars(cols_to_vars, {
             sc_int: [prev_int_val],
             sc_hash: [prev_hash_val],
@@ -691,9 +621,10 @@ class WarmStartingUtilTest(test.TestCase):
             sc_vocab: [prev_vocab_val],
             real_bucket: [prev_bucket_val],
             cross: [prev_cross_val],
+            "bias": [prev_bias_val],
         }, sess)
 
-  def testWarmStartInputLayerMoreSettings(self):
+  def testWarmStartMoreSettings(self):
     # Create old and new vocabs for sparse column "sc_vocab".
     prev_vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"],
                                         "old_vocab")
@@ -712,11 +643,11 @@ class WarmStartingUtilTest(test.TestCase):
     # Save checkpoint from which to warm-start.
     with ops.Graph().as_default() as g:
       with self.test_session(graph=g) as sess:
-        _ = variable_scope.get_variable(
+        variable_scope.get_variable(
             "linear_model/sc_hash/weights", shape=[15, 1], initializer=norms())
         sc_keys_weights = variable_scope.get_variable(
             "some_other_name", shape=[4, 1], initializer=rand())
-        _ = variable_scope.get_variable(
+        variable_scope.get_variable(
             "linear_model/sc_vocab/weights",
             initializer=[[0.5], [1.], [2.], [3.]])
         self._write_checkpoint(sess)
@@ -728,20 +659,30 @@ class WarmStartingUtilTest(test.TestCase):
       partitions[0] = min(2, shape[0].value)
       return partitions
 
-    # New graph, new session with warmstarting.
+    # New graph, new session with warm-starting.
     with ops.Graph().as_default() as g:
       with self.test_session(graph=g) as sess:
         cols_to_vars = self._create_linear_model(all_linear_cols, _partitioner)
-        ws_settings = ws_util._WarmStartSettings(
+        vocab_info = ws_util.VocabInfo(
+            new_vocab=sc_vocab.vocabulary_file,
+            new_vocab_size=sc_vocab.vocabulary_size,
+            num_oov_buckets=sc_vocab.num_oov_buckets,
+            old_vocab=prev_vocab_path)
+        ws_settings = ws_util.WarmStartSettings(
             self.get_temp_dir(),
-            col_to_prev_vocab={sc_vocab: prev_vocab_path},
-            col_to_prev_tensor={sc_keys: "some_other_name"},
-            exclude_columns=[sc_hash])
-        ws_util._warmstart_input_layer(cols_to_vars, ws_settings)
+            vars_to_warm_start=".*(sc_keys|sc_vocab).*",
+            var_name_to_vocab_info={
+                ws_util._infer_var_name(cols_to_vars[sc_vocab]): vocab_info
+            },
+            var_name_to_prev_var_name={
+                ws_util._infer_var_name(cols_to_vars[sc_keys]):
+                    "some_other_name"
+            })
+        ws_util._warm_start(ws_settings)
         sess.run(variables.global_variables_initializer())
-        # Verify weights were correctly warmstarted.  Var corresponding to
+        # Verify weights were correctly warm-started.  Var corresponding to
         # sc_hash should not be warm-started.  Var corresponding to sc_vocab
-        # should be correctly warmstarted after vocab remapping.
+        # should be correctly warm-started after vocab remapping.
         self._assert_cols_to_vars(cols_to_vars, {
             sc_keys:
                 np.split(prev_keys_val, 2),
@@ -752,7 +693,140 @@ class WarmStartingUtilTest(test.TestCase):
             ]
         }, sess)
 
-  def testWarmStartInputLayerEmbeddingColumn(self):
+  def testWarmStartMoreSettingsNoPartitioning(self):
+    # Create old and new vocabs for sparse column "sc_vocab".
+    prev_vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"],
+                                        "old_vocab")
+    new_vocab_path = self._write_vocab(
+        ["orange", "guava", "banana", "apple", "raspberry",
+         "blueberry"], "new_vocab")
+    # Create feature columns.
+    sc_hash = fc.categorical_column_with_hash_bucket(
+        "sc_hash", hash_bucket_size=15)
+    sc_keys = fc.categorical_column_with_vocabulary_list(
+        "sc_keys", vocabulary_list=["a", "b", "c", "e"])
+    sc_vocab = fc.categorical_column_with_vocabulary_file(
+        "sc_vocab", vocabulary_file=new_vocab_path, vocabulary_size=6)
+    all_linear_cols = [sc_hash, sc_keys, sc_vocab]
+
+    # Save checkpoint from which to warm-start.
+    with ops.Graph().as_default() as g:
+      with self.test_session(graph=g) as sess:
+        variable_scope.get_variable(
+            "linear_model/sc_hash/weights", shape=[15, 1], initializer=norms())
+        sc_keys_weights = variable_scope.get_variable(
+            "some_other_name", shape=[4, 1], initializer=rand())
+        variable_scope.get_variable(
+            "linear_model/sc_vocab/weights",
+            initializer=[[0.5], [1.], [2.], [3.]])
+        self._write_checkpoint(sess)
+        prev_keys_val = sess.run(sc_keys_weights)
+
+    # New graph, new session with warm-starting.
+    with ops.Graph().as_default() as g:
+      with self.test_session(graph=g) as sess:
+        cols_to_vars = self._create_linear_model(all_linear_cols,
+                                                 partitioner=None)
+        vocab_info = ws_util.VocabInfo(
+            new_vocab=sc_vocab.vocabulary_file,
+            new_vocab_size=sc_vocab.vocabulary_size,
+            num_oov_buckets=sc_vocab.num_oov_buckets,
+            old_vocab=prev_vocab_path)
+        ws_settings = ws_util.WarmStartSettings(
+            self.get_temp_dir(),
+            vars_to_warm_start=".*(sc_keys|sc_vocab).*",
+            var_name_to_vocab_info={
+                ws_util._infer_var_name(cols_to_vars[sc_vocab]): vocab_info
+            },
+            var_name_to_prev_var_name={
+                ws_util._infer_var_name(cols_to_vars[sc_keys]):
+                    "some_other_name"
+            })
+        ws_util._warm_start(ws_settings)
+        sess.run(variables.global_variables_initializer())
+        # Verify weights were correctly warm-started.  Var corresponding to
+        # sc_hash should not be warm-started.  Var corresponding to sc_vocab
+        # should be correctly warm-started after vocab remapping.
+        self._assert_cols_to_vars(cols_to_vars, {
+            sc_keys: [prev_keys_val],
+            sc_hash: [np.zeros([15, 1])],
+            sc_vocab: [np.array([[3.], [2.], [1.], [0.5], [0.], [0.]])]
+        }, sess)
+
+  def testWarmStartVarsToWarmstartIsNone(self):
+    # Create old and new vocabs for sparse column "sc_vocab".
+    prev_vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"],
+                                        "old_vocab")
+    new_vocab_path = self._write_vocab(
+        ["orange", "guava", "banana", "apple", "raspberry",
+         "blueberry"], "new_vocab")
+    # Create feature columns.
+    sc_hash = fc.categorical_column_with_hash_bucket(
+        "sc_hash", hash_bucket_size=15)
+    sc_keys = fc.categorical_column_with_vocabulary_list(
+        "sc_keys", vocabulary_list=["a", "b", "c", "e"])
+    sc_vocab = fc.categorical_column_with_vocabulary_file(
+        "sc_vocab", vocabulary_file=new_vocab_path, vocabulary_size=6)
+    all_linear_cols = [sc_hash, sc_keys, sc_vocab]
+
+    # Save checkpoint from which to warm-start.
+    with ops.Graph().as_default() as g:
+      with self.test_session(graph=g) as sess:
+        variable_scope.get_variable(
+            "linear_model/sc_hash/weights", shape=[15, 1], initializer=norms())
+        variable_scope.get_variable(
+            "some_other_name", shape=[4, 1], initializer=rand())
+        variable_scope.get_variable(
+            "linear_model/sc_vocab/weights",
+            initializer=[[0.5], [1.], [2.], [3.]])
+        self._write_checkpoint(sess)
+
+    def _partitioner(shape, dtype):  # pylint:disable=unused-argument
+      # Partition each var into 2 equal slices.
+      partitions = [1] * len(shape)
+      partitions[0] = min(2, shape[0].value)
+      return partitions
+
+    # New graph, new session with warm-starting.
+    with ops.Graph().as_default() as g:
+      with self.test_session(graph=g) as sess:
+        cols_to_vars = self._create_linear_model(all_linear_cols, _partitioner)
+        vocab_info = ws_util.VocabInfo(
+            new_vocab=sc_vocab.vocabulary_file,
+            new_vocab_size=sc_vocab.vocabulary_size,
+            num_oov_buckets=sc_vocab.num_oov_buckets,
+            old_vocab=prev_vocab_path)
+        ws_settings = ws_util.WarmStartSettings(
+            self.get_temp_dir(),
+            # The special value of None here will ensure that only the variable
+            # specified in var_name_to_vocab_info (sc_vocab embedding) is
+            # warm-started.
+            vars_to_warm_start=None,
+            var_name_to_vocab_info={
+                ws_util._infer_var_name(cols_to_vars[sc_vocab]): vocab_info
+            },
+            # Even though this is provided, the None value for
+            # vars_to_warm_start overrides the logic, and this will not be
+            # warm-started.
+            var_name_to_prev_var_name={
+                ws_util._infer_var_name(cols_to_vars[sc_keys]):
+                    "some_other_name"
+            })
+        ws_util._warm_start(ws_settings)
+        sess.run(variables.global_variables_initializer())
+        # Verify weights were correctly warm-started.  Var corresponding to
+        # sc_vocab should be correctly warm-started after vocab remapping,
+        # and neither of the other two should be warm-started..
+        self._assert_cols_to_vars(cols_to_vars, {
+            sc_keys: [np.zeros([2, 1]), np.zeros([2, 1])],
+            sc_hash: [np.zeros([8, 1]), np.zeros([7, 1])],
+            sc_vocab: [
+                np.array([[3.], [2.], [1.]]),
+                np.array([[0.5], [0.], [0.]])
+            ]
+        }, sess)
+
+  def testWarmStartEmbeddingColumn(self):
     # Create old and new vocabs for embedding column "sc_vocab".
     prev_vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"],
                                         "old_vocab")
@@ -763,7 +837,7 @@ class WarmStartingUtilTest(test.TestCase):
     # Save checkpoint from which to warm-start.
     with ops.Graph().as_default() as g:
       with self.test_session(graph=g) as sess:
-        _ = variable_scope.get_variable(
+        variable_scope.get_variable(
             "input_layer/sc_vocab_embedding/embedding_weights",
             initializer=[[0.5, 0.4], [1., 1.1], [2., 2.2], [3., 3.3]])
         self._write_checkpoint(sess)
@@ -774,58 +848,167 @@ class WarmStartingUtilTest(test.TestCase):
       partitions[0] = min(2, shape[0].value)
       return partitions
 
+    # Create feature columns.
+    sc_vocab = fc.categorical_column_with_vocabulary_file(
+        "sc_vocab", vocabulary_file=new_vocab_path, vocabulary_size=6)
+    emb_vocab_column = fc.embedding_column(
+        categorical_column=sc_vocab,
+        dimension=2)
+    all_deep_cols = [emb_vocab_column]
+    # New graph, new session with warm-starting.
+    with ops.Graph().as_default() as g:
+      with self.test_session(graph=g) as sess:
+        cols_to_vars = {}
+        with variable_scope.variable_scope("", partitioner=_partitioner):
+          # Create the variables.
+          fc.input_layer(
+              features=self._create_dummy_inputs(),
+              feature_columns=all_deep_cols,
+              cols_to_vars=cols_to_vars)
+        vocab_info = ws_util.VocabInfo(
+            new_vocab=sc_vocab.vocabulary_file,
+            new_vocab_size=sc_vocab.vocabulary_size,
+            num_oov_buckets=sc_vocab.num_oov_buckets,
+            old_vocab=prev_vocab_path,
+            # Can't use constant_initializer with load_and_remap.  In practice,
+            # use a truncated normal initializer.
+            backup_initializer=init_ops.random_uniform_initializer(
+                minval=0.42, maxval=0.42))
+        ws_settings = ws_util.WarmStartSettings(
+            self.get_temp_dir(),
+            var_name_to_vocab_info={
+                ws_util._infer_var_name(cols_to_vars[emb_vocab_column]):
+                    vocab_info
+            })
+        ws_util._warm_start(ws_settings)
+        sess.run(variables.global_variables_initializer())
+        # Verify weights were correctly warm-started. Var corresponding to
+        # emb_vocab_column should be correctly warm-started after vocab
+        # remapping. Missing values are filled in with the EmbeddingColumn's
+        # initializer.
+        self._assert_cols_to_vars(
+            cols_to_vars, {
+                emb_vocab_column: [
+                    np.array([[3., 3.3], [2., 2.2], [1., 1.1]]),
+                    np.array([[0.5, 0.4], [0.42, 0.42], [0.42, 0.42]])
+                ]
+            }, sess)
+
+  def testWarmStartEmbeddingColumnLinearModel(self):
+    # Create old and new vocabs for embedding column "sc_vocab".
+    prev_vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"],
+                                        "old_vocab")
+    new_vocab_path = self._write_vocab(
+        ["orange", "guava", "banana", "apple", "raspberry", "blueberry"],
+        "new_vocab")
+
+    # Save checkpoint from which to warm-start.
+    with ops.Graph().as_default() as g:
+      with self.test_session(graph=g) as sess:
+        variable_scope.get_variable(
+            "linear_model/sc_vocab_embedding/embedding_weights",
+            initializer=[[0.5, 0.4], [1., 1.1], [2., 2.2], [3., 3.3]])
+        variable_scope.get_variable(
+            "linear_model/sc_vocab_embedding/weights",
+            initializer=[[0.69], [0.71]])
+        self._write_checkpoint(sess)
+
+    def _partitioner(shape, dtype):  # pylint:disable=unused-argument
+      # Partition each var into 2 equal slices.
+      partitions = [1] * len(shape)
+      partitions[0] = min(2, shape[0].value)
+      return partitions
+
     # Create feature columns.
     sc_vocab = fc.categorical_column_with_vocabulary_file(
         "sc_vocab", vocabulary_file=new_vocab_path, vocabulary_size=6)
     emb_vocab = fc.embedding_column(
         categorical_column=sc_vocab,
-        dimension=2,
-        # Can't use constant_initializer with load_and_remap.  In practice,
-        # use a truncated normal initializer.
-        initializer=init_ops.random_uniform_initializer(
-            minval=0.42, maxval=0.42))
+        dimension=2)
     all_deep_cols = [emb_vocab]
-    # New graph, new session with warmstarting.
+    # New graph, new session with warm-starting.
     with ops.Graph().as_default() as g:
       with self.test_session(graph=g) as sess:
         cols_to_vars = {}
         with variable_scope.variable_scope("", partitioner=_partitioner):
           # Create the variables.
-          fc.input_layer(
+          fc.linear_model(
               features=self._create_dummy_inputs(),
               feature_columns=all_deep_cols,
               cols_to_vars=cols_to_vars)
-        ws_settings = ws_util._WarmStartSettings(
-            self.get_temp_dir(), col_to_prev_vocab={
-                emb_vocab: prev_vocab_path
+
+        # Construct the vocab_info for the embedding weight.
+        vocab_info = ws_util.VocabInfo(
+            new_vocab=sc_vocab.vocabulary_file,
+            new_vocab_size=sc_vocab.vocabulary_size,
+            num_oov_buckets=sc_vocab.num_oov_buckets,
+            old_vocab=prev_vocab_path,
+            # Can't use constant_initializer with load_and_remap.  In practice,
+            # use a truncated normal initializer.
+            backup_initializer=init_ops.random_uniform_initializer(
+                minval=0.42, maxval=0.42))
+        ws_settings = ws_util.WarmStartSettings(
+            self.get_temp_dir(),
+            vars_to_warm_start=".*sc_vocab.*",
+            var_name_to_vocab_info={
+                "linear_model/sc_vocab_embedding/embedding_weights": vocab_info
             })
-        ws_util._warmstart_input_layer(cols_to_vars, ws_settings)
+        ws_util._warm_start(ws_settings)
         sess.run(variables.global_variables_initializer())
-        # Verify weights were correctly warmstarted. Var corresponding to
-        # emb_vocab should be correctly warmstarted after vocab remapping.
+        # Verify weights were correctly warm-started. Var corresponding to
+        # emb_vocab should be correctly warm-started after vocab remapping.
         # Missing values are filled in with the EmbeddingColumn's initializer.
         self._assert_cols_to_vars(
             cols_to_vars, {
                 emb_vocab: [
+                    # embedding_weights part 0.
                     np.array([[3., 3.3], [2., 2.2], [1., 1.1]]),
-                    np.array([[0.5, 0.4], [0.42, 0.42], [0.42, 0.42]])
+                    # embedding_weights part 1.
+                    np.array([[0.5, 0.4], [0.42, 0.42], [0.42, 0.42]]),
+                    # linear weights part 0.
+                    np.array([[0.69]]),
+                    # linear weights part 1.
+                    np.array([[0.71]])
                 ]
             }, sess)
 
   def testErrorConditions(self):
-    self.assertRaises(ValueError, ws_util._WarmStartSettings, None)
+    self.assertRaises(ValueError, ws_util.WarmStartSettings, None)
     x = variable_scope.get_variable(
         "x",
         shape=[4, 1],
         initializer=ones(),
         partitioner=lambda shape, dtype: [2, 1])
 
-    # List of PartitionedVariable is invalid type when warmstarting with vocab.
-    self.assertRaises(TypeError, ws_util._warmstart_var_with_vocab, [x], "/tmp",
-                      5, "/tmp", "/tmp")
+    # List of PartitionedVariable is invalid type when warm-starting with vocab.
+    self.assertRaises(TypeError, ws_util._warm_start_var_with_vocab, [x],
+                      "/tmp", 5, "/tmp", "/tmp")
     # Keys of type other than FeatureColumn.
-    self.assertRaises(TypeError, ws_util._warmstart_input_layer,
-                      {"StringType": x}, ws_util._WarmStartSettings("/tmp"))
+    self.assertRaises(TypeError, ws_util._warm_start, {"StringType": x},
+                      ws_util.WarmStartSettings("/tmp"))
+
+    # Unused variable names raises ValueError.
+    with ops.Graph().as_default():
+      with self.test_session() as sess:
+        x = variable_scope.get_variable(
+            "x",
+            shape=[4, 1],
+            initializer=ones(),
+            partitioner=lambda shape, dtype: [2, 1])
+        self._write_checkpoint(sess)
+
+    self.assertRaises(ValueError, ws_util._warm_start,
+                      ws_util.WarmStartSettings(
+                          self.get_temp_dir(),
+                          var_name_to_vocab_info={
+                              "y": ws_util.VocabInfo("", 1, 0, "")
+                          }))
+    self.assertRaises(ValueError, ws_util._warm_start,
+                      ws_util.WarmStartSettings(
+                          self.get_temp_dir(),
+                          var_name_to_prev_var_name={
+                              "y": "y2"
+                          }))
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/feature_column/BUILD b/tensorflow/python/feature_column/BUILD
index b1c81dd58c7d2d9cf95821ea78eda2e7ee675d25..76d44fc474f936733f4eeeefd5d9510964ebb430 100644
--- a/tensorflow/python/feature_column/BUILD
+++ b/tensorflow/python/feature_column/BUILD
@@ -48,6 +48,7 @@ py_library(
         "//tensorflow/python:sparse_ops",
         "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:string_ops",
+        "//tensorflow/python:template",
         "//tensorflow/python:tensor_shape",
         "//tensorflow/python:training",
         "//tensorflow/python:util",
diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py
index 0686480ca48adab5766f25a83cb0de15678cf617..a7fe528ee1d85c3c06d4e9376ca4937aaf168b8a 100644
--- a/tensorflow/python/feature_column/feature_column.py
+++ b/tensorflow/python/feature_column/feature_column.py
@@ -134,7 +134,7 @@ import math
 import numpy as np
 import six
 
-from tensorflow.python.eager import context
+
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
@@ -150,6 +150,7 @@ from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import parsing_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops import string_ops
+from tensorflow.python.ops import template
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import gfile
@@ -158,6 +159,56 @@ from tensorflow.python.training import checkpoint_utils
 from tensorflow.python.util import nest
 
 
+def _internal_input_layer(features,
+                          feature_columns,
+                          weight_collections=None,
+                          trainable=True,
+                          cols_to_vars=None,
+                          scope=None):
+  """See input_layer. `scope` is a name or variable scope to use."""
+
+  feature_columns = _clean_feature_columns(feature_columns)
+  for column in feature_columns:
+    if not isinstance(column, _DenseColumn):
+      raise ValueError(
+          'Items of feature_columns must be a _DenseColumn. '
+          'You can wrap a categorical column with an '
+          'embedding_column or indicator_column. Given: {}'.format(column))
+  weight_collections = list(weight_collections or [])
+  if ops.GraphKeys.GLOBAL_VARIABLES not in weight_collections:
+    weight_collections.append(ops.GraphKeys.GLOBAL_VARIABLES)
+  if ops.GraphKeys.MODEL_VARIABLES not in weight_collections:
+    weight_collections.append(ops.GraphKeys.MODEL_VARIABLES)
+
+  # a non-None `scope` can allow for variable reuse, when, e.g., this function
+  # is wrapped by a `make_template`.
+  with variable_scope.variable_scope(
+      scope, default_name='input_layer', values=features.values()):
+    builder = _LazyBuilder(features)
+    output_tensors = []
+    ordered_columns = []
+    for column in sorted(feature_columns, key=lambda x: x.name):
+      ordered_columns.append(column)
+      with variable_scope.variable_scope(
+          None, default_name=column._var_scope_name):  # pylint: disable=protected-access
+        tensor = column._get_dense_tensor(  # pylint: disable=protected-access
+            builder,
+            weight_collections=weight_collections,
+            trainable=trainable)
+        num_elements = column._variable_shape.num_elements()  # pylint: disable=protected-access
+        batch_size = array_ops.shape(tensor)[0]
+        output_tensors.append(
+            array_ops.reshape(tensor, shape=(batch_size, num_elements)))
+        if cols_to_vars is not None:
+          # Retrieve any variables created (some _DenseColumn's don't create
+          # variables, in which case an empty list is returned).
+          cols_to_vars[column] = ops.get_collection(
+              ops.GraphKeys.GLOBAL_VARIABLES,
+              scope=variable_scope.get_variable_scope().name)
+    _verify_static_batch_size_equality(output_tensors, ordered_columns)
+    return array_ops.concat(output_tensors, 1)
+
+
 def input_layer(features,
                 feature_columns,
                 weight_collections=None,
@@ -194,7 +245,7 @@ def input_layer(features,
       `bucketized_column`, `indicator_column`. If you have categorical features,
       you can wrap them with an `embedding_column` or `indicator_column`.
     weight_collections: A list of collection names to which the Variable will be
-      added. Note that, variables will also be added to collections
+      added. Note that variables will also be added to collections
       `tf.GraphKeys.GLOBAL_VARIABLES` and `ops.GraphKeys.MODEL_VARIABLES`.
     trainable: If `True` also add the variable to the graph collection
       `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
@@ -216,52 +267,66 @@ def input_layer(features,
   Raises:
     ValueError: if an item in `feature_columns` is not a `_DenseColumn`.
   """
-  feature_columns = _clean_feature_columns(feature_columns)
-  for column in feature_columns:
-    if not isinstance(column, _DenseColumn):
-      raise ValueError(
-          'Items of feature_columns must be a _DenseColumn. '
-          'You can wrap a categorical column with an '
-          'embedding_column or indicator_column. Given: {}'.format(column))
-  weight_collections = list(weight_collections or [])
-  if ops.GraphKeys.GLOBAL_VARIABLES not in weight_collections:
-    weight_collections.append(ops.GraphKeys.GLOBAL_VARIABLES)
-  if ops.GraphKeys.MODEL_VARIABLES not in weight_collections:
-    weight_collections.append(ops.GraphKeys.MODEL_VARIABLES)
-  with variable_scope.variable_scope(
-      None, default_name='input_layer', values=features.values()):
-    builder = _LazyBuilder(features)
-    output_tensors = []
-    ordered_columns = []
-    for column in sorted(feature_columns, key=lambda x: x.name):
-      ordered_columns.append(column)
-      with variable_scope.variable_scope(
-          None, default_name=column._var_scope_name):  # pylint: disable=protected-access
-        if column._var_scope_name == column.name:  # pylint: disable=protected-access
-          tensor = _get_dense_tensor(
-              column=column,
-              builder=builder,
-              weight_collections=weight_collections,
-              trainable=trainable)
-        else:
-          # This is typically the case for shared_embedding_columns. The
-          # embedding weights variable will be under the common variable_scope,
-          # but the ops for each column will be under a separate name_scope.
-          with ops.name_scope(column.name):
-            tensor = _get_dense_tensor(
-                column=column,
-                builder=builder,
-                weight_collections=weight_collections,
-                trainable=trainable)
-        output_tensors.append(tensor)
-        if cols_to_vars is not None:
-          # Retrieve any variables created (some _DenseColumn's don't create
-          # variables, in which case an empty list is returned).
-          cols_to_vars[column] = ops.get_collection(
-              ops.GraphKeys.GLOBAL_VARIABLES,
-              scope=variable_scope.get_variable_scope().name)
-    _verify_static_batch_size_equality(output_tensors, ordered_columns)
-    return array_ops.concat(output_tensors, 1)
+  return _internal_input_layer(features, feature_columns, weight_collections,
+                               trainable, cols_to_vars)
+
+
+# TODO(akshayka): InputLayer should be a subclass of Layer, and it
+# should implement the logic in input_layer using Layer's build-and-call
+# paradigm; input_layer should create an instance of InputLayer and
+# return the result of inovking its apply method, just as functional layers do.
+class InputLayer(object):
+  """An object-oriented version of `input_layer` that reuses variables."""
+
+  def __init__(self,
+               feature_columns,
+               weight_collections=None,
+               trainable=True,
+               cols_to_vars=None):
+    """See `input_layer`."""
+
+    self._feature_columns = feature_columns
+    self._weight_collections = weight_collections
+    self._trainable = trainable
+    self._cols_to_vars = cols_to_vars
+    self._input_layer_template = template.make_template(
+        'feature_column_input_layer',
+        _internal_input_layer,
+        create_scope_now_=True)
+    self._scope = self._input_layer_template.variable_scope
+
+  def __call__(self, features):
+    return self._input_layer_template(
+        features=features,
+        feature_columns=self._feature_columns,
+        weight_collections=self._weight_collections,
+        trainable=self._trainable,
+        cols_to_vars=None,
+        scope=self._scope)
+
+  @property
+  def non_trainable_variables(self):
+    return self._input_layer_template.non_trainable_variables
+
+  @property
+  def non_trainable_weights(self):
+    return self._input_layer_template.non_trainable_weights
+
+  @property
+  def trainable_variables(self):
+    return self._input_layer_template.trainable_variables
+
+  @property
+  def trainable_weights(self):
+    return self._input_layer_template.trainable_weights
+
+  @property
+  def variables(self):
+    return self._input_layer_template.variables
+
+  @property
+  def weights(self):
+    return self._input_layer_template.weights
 
 
 def linear_model(features,
@@ -355,26 +420,13 @@ def linear_model(features,
       with variable_scope.variable_scope(
           None, default_name=column._var_scope_name):  # pylint: disable=protected-access
         ordered_columns.append(column)
-        if column._var_scope_name == column.name:  # pylint: disable=protected-access
-          weighted_sum = _create_weighted_sum(
-              column=column,
-              builder=builder,
-              units=units,
-              sparse_combiner=sparse_combiner,
-              weight_collections=weight_collections,
-              trainable=trainable)
-        else:
-          # This is typically the case for shared_embedding_columns. The
-          # embedding weights variable will be under the common variable_scope,
-          # but the ops for each column will be under a separate name_scope.
-          with ops.name_scope(column.name):
-            weighted_sum = _create_weighted_sum(
-                column=column,
-                builder=builder,
-                units=units,
-                sparse_combiner=sparse_combiner,
-                weight_collections=weight_collections,
-                trainable=trainable)
+        weighted_sum = _create_weighted_sum(
+            column=column,
+            builder=builder,
+            units=units,
+            sparse_combiner=sparse_combiner,
+            weight_collections=weight_collections,
+            trainable=trainable)
         weighted_sums.append(weighted_sum)
         if cols_to_vars is not None:
           # Retrieve the variables created.
@@ -579,10 +631,6 @@ def embedding_column(
       is specified.
     ValueError: if `initializer` is specified and is not callable.
     RuntimeError: If eager execution is enabled.
-
-  @compatibility(eager)
-  Not compatible with eager execution.
-  @end_compatibility
   """
   if (dimension is None) or (dimension < 1):
     raise ValueError('Invalid dimension {}.'.format(dimension))
@@ -594,8 +642,6 @@ def embedding_column(
     raise ValueError('initializer must be callable if specified. '
                      'Embedding of column_name: {}'.format(
                          categorical_column.name))
-  if not context.in_graph_mode():
-    raise RuntimeError('Embedding_column not supported in eager mode.')
   if initializer is None:
     initializer = init_ops.truncated_normal_initializer(
         mean=0.0, stddev=1 / math.sqrt(dimension))
@@ -605,7 +651,6 @@ def embedding_column(
       dimension=dimension,
       combiner=combiner,
       initializer=initializer,
-      shared_embedding_collection_name=None,
       ckpt_to_load_from=ckpt_to_load_from,
       tensor_name_in_ckpt=tensor_name_in_ckpt,
       max_norm=max_norm,
@@ -749,7 +794,7 @@ def _shared_embedding_columns(
 
   result = []
   for column in categorical_columns:
-    result.append(_EmbeddingColumn(
+    result.append(_SharedEmbeddingColumn(
         categorical_column=column,
         dimension=dimension,
         combiner=combiner,
@@ -1623,21 +1668,6 @@ class _DenseColumn(_FeatureColumn):
     pass
 
 
-def _get_dense_tensor(
-    column,
-    builder,
-    weight_collections,
-    trainable):
-  """Creates a dense Tensor for a _DenseColumn for input_layer."""
-  tensor = column._get_dense_tensor(  # pylint: disable=protected-access
-      builder,
-      weight_collections=weight_collections,
-      trainable=trainable)
-  num_elements = column._variable_shape.num_elements()  # pylint: disable=protected-access
-  batch_size = array_ops.shape(tensor)[0]
-  return array_ops.reshape(tensor, shape=(batch_size, num_elements))
-
-
 def _create_weighted_sum(
     column,
     builder,
@@ -1648,11 +1678,19 @@ def _create_weighted_sum(
   """Creates a weighted sum for a dense or sparse column for linear_model."""
   if isinstance(column, _CategoricalColumn):
     return _create_categorical_column_weighted_sum(
-        column, builder, units, sparse_combiner, weight_collections,
-        trainable)
+        column=column,
+        builder=builder,
+        units=units,
+        sparse_combiner=sparse_combiner,
+        weight_collections=weight_collections,
+        trainable=trainable)
   else:
     return _create_dense_column_weighted_sum(
-        column, builder, units, weight_collections, trainable)
+        column=column,
+        builder=builder,
+        units=units,
+        weight_collections=weight_collections,
+        trainable=trainable)
 
 
 def _create_dense_column_weighted_sum(
@@ -1920,29 +1958,26 @@ def _to_sparse_input(input_tensor, ignore_value=None):
   if isinstance(input_tensor, sparse_tensor_lib.SparseTensor):
     return input_tensor
   with ops.name_scope(None, 'to_sparse_input', (input_tensor, ignore_value,)):
-    input_rank = input_tensor.get_shape().ndims
-    if input_rank is None:
-      # TODO(b/32318825): Implement dense_to_sparse_tensor for undefined rank.
-      raise ValueError('Undefined input_tensor shape.')
     if ignore_value is None:
-      ignore_value = '' if input_tensor.dtype == dtypes.string else -1
-    dense_shape = math_ops.cast(array_ops.shape(input_tensor), dtypes.int64)
-    indices = array_ops.where(math_ops.not_equal(
-        input_tensor, math_ops.cast(ignore_value, input_tensor.dtype)))
-    # Flattens the tensor and indices for use with gather.
-    flat_tensor = array_ops.reshape(input_tensor, [-1])
-    flat_indices = indices[:, input_rank - 1]
-    # Computes the correct flattened indices for 2d (or higher) tensors.
-    if input_rank > 1:
-      higher_dims = indices[:, :input_rank - 1]
-      shape_offsets = array_ops.stack(
-          _shape_offsets(array_ops.unstack(dense_shape)[1:]))
-      offsets = math_ops.reduce_sum(
-          math_ops.multiply(higher_dims, shape_offsets),
-          reduction_indices=[1])
-      flat_indices = math_ops.add(flat_indices, offsets)
-    values = array_ops.gather(flat_tensor, flat_indices)
-    return sparse_tensor_lib.SparseTensor(indices, values, dense_shape)
+      if input_tensor.dtype == dtypes.string:
+        # Exception due to TF strings are converted to numpy objects by default.
+        ignore_value = ''
+      elif input_tensor.dtype.is_integer:
+        ignore_value = -1  # -1 has a special meaning of missing feature
+      else:
+        # NOTE: `as_numpy_dtype` is a property, so with the parentheses this is
+        # constructing a new numpy object of the given type, which yields the
+        # default value for that type.
+        ignore_value = input_tensor.dtype.as_numpy_dtype()
+    ignore_value = math_ops.cast(
+        ignore_value, input_tensor.dtype, name='ignore_value')
+    indices = array_ops.where(
+        math_ops.not_equal(input_tensor, ignore_value), name='indices')
+    return sparse_tensor_lib.SparseTensor(
+        indices=indices,
+        values=array_ops.gather_nd(input_tensor, indices, name='values'),
+        dense_shape=array_ops.shape(
+            input_tensor, out_type=dtypes.int64, name='dense_shape'))
 
 
 def _clean_feature_columns(feature_columns):
@@ -2103,24 +2138,16 @@ class _EmbeddingColumn(
     _DenseColumn,
     collections.namedtuple('_EmbeddingColumn', (
         'categorical_column', 'dimension', 'combiner', 'initializer',
-        'shared_embedding_collection_name', 'ckpt_to_load_from',
-        'tensor_name_in_ckpt', 'max_norm', 'trainable'
+        'ckpt_to_load_from', 'tensor_name_in_ckpt', 'max_norm', 'trainable'
     ))):
   """See `embedding_column`."""
 
   @property
   def name(self):
     if not hasattr(self, '_name'):
-      if self.shared_embedding_collection_name:
-        self._name = '{}_shared_embedding'.format(self.categorical_column.name)
-      else:
-        self._name = '{}_embedding'.format(self.categorical_column.name)
+      self._name = '{}_embedding'.format(self.categorical_column.name)
     return self._name
 
-  @property
-  def _var_scope_name(self):
-    return self.shared_embedding_collection_name or self.name
-
   @property
   def _parse_example_spec(self):
     return self.categorical_column._parse_example_spec  # pylint: disable=protected-access
@@ -2142,7 +2169,75 @@ class _EmbeddingColumn(
     sparse_weights = sparse_tensors.weight_tensor
 
     embedding_shape = (self.categorical_column._num_buckets, self.dimension)  # pylint: disable=protected-access
-    if self.shared_embedding_collection_name:
+    embedding_weights = variable_scope.get_variable(
+        name='embedding_weights',
+        shape=embedding_shape,
+        dtype=dtypes.float32,
+        initializer=self.initializer,
+        trainable=self.trainable and trainable,
+        collections=weight_collections)
+    if self.ckpt_to_load_from is not None:
+      to_restore = embedding_weights
+      if isinstance(to_restore, variables.PartitionedVariable):
+        to_restore = to_restore._get_variable_list()  # pylint: disable=protected-access
+      checkpoint_utils.init_from_checkpoint(self.ckpt_to_load_from, {
+          self.tensor_name_in_ckpt: to_restore
+      })
+
+    # Return embedding lookup result.
+    return _safe_embedding_lookup_sparse(
+        embedding_weights=embedding_weights,
+        sparse_ids=sparse_ids,
+        sparse_weights=sparse_weights,
+        combiner=self.combiner,
+        name='%s_weights' % self.name,
+        max_norm=self.max_norm)
+
+
+class _SharedEmbeddingColumn(
+    _DenseColumn,
+    collections.namedtuple('_SharedEmbeddingColumn', (
+        'categorical_column', 'dimension', 'combiner', 'initializer',
+        'shared_embedding_collection_name', 'ckpt_to_load_from',
+        'tensor_name_in_ckpt', 'max_norm', 'trainable'
+    ))):
+  """See `embedding_column`."""
+
+  @property
+  def name(self):
+    if not hasattr(self, '_name'):
+      self._name = '{}_shared_embedding'.format(self.categorical_column.name)
+    return self._name
+
+  @property
+  def _var_scope_name(self):
+    return self.shared_embedding_collection_name
+
+  @property
+  def _parse_example_spec(self):
+    return self.categorical_column._parse_example_spec  # pylint: disable=protected-access
+
+  def _transform_feature(self, inputs):
+    return inputs.get(self.categorical_column)
+
+  @property
+  def _variable_shape(self):
+    if not hasattr(self, '_shape'):
+      self._shape = tensor_shape.vector(self.dimension)
+    return self._shape
+
+  def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None):
+    # This method is called from a variable_scope with name _var_scope_name,
+    # which is shared among all shared embeddings. Open a name_scope here, so
+    # that the ops for different columns have distinct names.
+    with ops.name_scope(None, default_name=self.name):
+      # Get sparse IDs and weights.
+      sparse_tensors = self.categorical_column._get_sparse_tensors(  # pylint: disable=protected-access
+          inputs, weight_collections=weight_collections, trainable=trainable)
+      sparse_ids = sparse_tensors.id_tensor
+      sparse_weights = sparse_tensors.weight_tensor
+
+      embedding_shape = (self.categorical_column._num_buckets, self.dimension)  # pylint: disable=protected-access
       shared_embedding_collection = ops.get_collection(
           self.shared_embedding_collection_name)
       if shared_embedding_collection:
@@ -2154,7 +2249,7 @@ class _EmbeddingColumn(
               'The feature_column library already adds a variable under the '
               'hood.'.format(shared_embedding_collection))
         embedding_weights = shared_embedding_collection[0]
-        if embedding_weights.shape != embedding_shape:
+        if embedding_weights.get_shape() != embedding_shape:
           raise ValueError(
               'Shared embedding collection {} contains variable {} of '
               'unexpected shape {}. Expected shape is {}. '
@@ -2163,7 +2258,7 @@ class _EmbeddingColumn(
               'The feature_column library already adds a variable under the '
               'hood.'.format(
                   self.shared_embedding_collection_name, embedding_weights.name,
-                  embedding_weights.shape, embedding_shape))
+                  embedding_weights.get_shape(), embedding_shape))
       else:
         embedding_weights = variable_scope.get_variable(
             name='embedding_weights',
@@ -2174,30 +2269,22 @@ class _EmbeddingColumn(
             collections=weight_collections)
         ops.add_to_collection(
             self.shared_embedding_collection_name, embedding_weights)
-    else:
-      embedding_weights = variable_scope.get_variable(
-          name='embedding_weights',
-          shape=embedding_shape,
-          dtype=dtypes.float32,
-          initializer=self.initializer,
-          trainable=self.trainable and trainable,
-          collections=weight_collections)
-    if self.ckpt_to_load_from is not None:
-      to_restore = embedding_weights
-      if isinstance(to_restore, variables.PartitionedVariable):
-        to_restore = to_restore._get_variable_list()  # pylint: disable=protected-access
-      checkpoint_utils.init_from_checkpoint(self.ckpt_to_load_from, {
-          self.tensor_name_in_ckpt: to_restore
-      })
-
-    # Return embedding lookup result.
-    return _safe_embedding_lookup_sparse(
-        embedding_weights=embedding_weights,
-        sparse_ids=sparse_ids,
-        sparse_weights=sparse_weights,
-        combiner=self.combiner,
-        name='%s_weights' % self.name,
-        max_norm=self.max_norm)
+      if self.ckpt_to_load_from is not None:
+        to_restore = embedding_weights
+        if isinstance(to_restore, variables.PartitionedVariable):
+          to_restore = to_restore._get_variable_list()  # pylint: disable=protected-access
+        checkpoint_utils.init_from_checkpoint(self.ckpt_to_load_from, {
+            self.tensor_name_in_ckpt: to_restore
+        })
+
+      # Return embedding lookup result.
+      return _safe_embedding_lookup_sparse(
+          embedding_weights=embedding_weights,
+          sparse_ids=sparse_ids,
+          sparse_weights=sparse_weights,
+          combiner=self.combiner,
+          name='%s_weights' % self.name,
+          max_norm=self.max_norm)
 
 
 def _create_tuple(shape, value):
diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py
index d974f14b8a35df7c86c0ab548c74772142fbbca4..2374680b968813b76d0ec115aa46c547eb9ab036 100644
--- a/tensorflow/python/feature_column/feature_column_test.py
+++ b/tensorflow/python/feature_column/feature_column_test.py
@@ -26,6 +26,8 @@ import numpy as np
 from tensorflow.core.example import example_pb2
 from tensorflow.core.example import feature_pb2
 from tensorflow.python.client import session
+from tensorflow.python.eager import backprop
+from tensorflow.python.eager import context
 from tensorflow.python.estimator.inputs import numpy_io
 from tensorflow.python.feature_column import feature_column as fc_lib
 from tensorflow.python.feature_column import feature_column_lib as fc
@@ -34,11 +36,13 @@ from tensorflow.python.feature_column.feature_column import _DenseColumn
 from tensorflow.python.feature_column.feature_column import _FeatureColumn
 from tensorflow.python.feature_column.feature_column import _LazyBuilder
 from tensorflow.python.feature_column.feature_column import _transform_features
+from tensorflow.python.feature_column.feature_column import InputLayer
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import lookup_ops
 from tensorflow.python.ops import parsing_ops
@@ -1646,8 +1650,9 @@ class LinearModelTest(test.TestCase):
         indices=((0,), (1,)),
         values=('sedan', 'hardtop'),
         dense_shape=(2,))
+    country_data = np.array(['US', 'CA'])
 
-    net = fc.linear_model(features, [price_buckets, body_style])
+    net = fc.linear_model(features, [price_buckets, body_style, country])
     bias = get_linear_model_bias()
     price_buckets_var = get_linear_model_column_var(price_buckets)
     body_style_var = get_linear_model_column_var(body_style)
@@ -1656,15 +1661,14 @@ class LinearModelTest(test.TestCase):
       sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
       sess.run(bias.assign([5.]))
 
-      self.assertAllClose(
-          [[10 - 1000 + 5.], [1000 - 10 + 5.]],
-          sess.run(net, feed_dict={
-              features['price']: price_data,
-              features['body-style']: body_style_data}))
-
-    # Dense categorical_column with unknown shape is not allowed.
-    with self.assertRaisesRegexp(ValueError, 'Undefined input_tensor shape.'):
-      fc.linear_model(features, [price_buckets, body_style, country])
+      self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]],
+                          sess.run(
+                              net,
+                              feed_dict={
+                                  features['price']: price_data,
+                                  features['body-style']: body_style_data,
+                                  features['country']: country_data
+                              }))
 
   def test_with_rank_0_feature(self):
     price = fc.numeric_column('price')
@@ -1690,6 +1694,105 @@ class LinearModelTest(test.TestCase):
 
 class InputLayerTest(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
+  def test_retrieving_input(self):
+    features = {'a': [0.]}
+    input_layer = InputLayer(fc.numeric_column('a'))
+    inputs = self.evaluate(input_layer(features))
+    self.assertAllClose([[0.]], inputs)
+
+  def test_reuses_variables(self):
+    with context.eager_mode():
+      sparse_input = sparse_tensor.SparseTensor(
+          indices=((0, 0), (1, 0), (2, 0)),
+          values=(0, 1, 2),
+          dense_shape=(3, 3))
+
+      # Create feature columns (categorical and embedding).
+      categorical_column = fc.categorical_column_with_identity(key='a',
+                                                               num_buckets=3)
+      embedding_dimension = 2
+      def _embedding_column_initializer(shape, dtype, partition_info):
+        del shape  # unused
+        del dtype  # unused
+        del partition_info  # unused
+        embedding_values = (
+            (1, 0),  # id 0
+            (0, 1),  # id 1
+            (1, 1))  # id 2
+        return embedding_values
+      embedding_column = fc.embedding_column(
+          categorical_column,
+          dimension=embedding_dimension,
+          initializer=_embedding_column_initializer)
+
+      input_layer = InputLayer([embedding_column])
+      features = {'a': sparse_input}
+
+      inputs = input_layer(features)
+      variables = input_layer.variables
+
+      # Sanity check: test that the inputs are correct.
+      self.assertAllEqual([[1, 0], [0, 1], [1, 1]], inputs)
+
+      # Check that only one variable was created.
+      self.assertEqual(1, len(variables))
+
+      # Check that invoking input_layer on the same features does not create
+      # additional variables
+      _ = input_layer(features)
+      self.assertEqual(1, len(variables))
+      self.assertEqual(variables[0], input_layer.variables[0])
+
+  def test_feature_column_input_layer_gradient(self):
+    with context.eager_mode():
+      sparse_input = sparse_tensor.SparseTensor(
+          indices=((0, 0), (1, 0), (2, 0)),
+          values=(0, 1, 2),
+          dense_shape=(3, 3))
+
+      # Create feature columns (categorical and embedding).
+      categorical_column = fc.categorical_column_with_identity(key='a',
+                                                               num_buckets=3)
+      embedding_dimension = 2
+
+      def _embedding_column_initializer(shape, dtype, partition_info):
+        del shape  # unused
+        del dtype  # unused
+        del partition_info  # unused
+        embedding_values = (
+            (1, 0),  # id 0
+            (0, 1),  # id 1
+            (1, 1))  # id 2
+        return embedding_values
+
+      embedding_column = fc.embedding_column(
+          categorical_column,
+          dimension=embedding_dimension,
+          initializer=_embedding_column_initializer)
+
+      input_layer = InputLayer([embedding_column])
+      features = {'a': sparse_input}
+
+      def scale_matrix():
+        matrix = input_layer(features)
+        return 2 * matrix
+
+      # Sanity check: Verify that scale_matrix returns the correct output.
+      self.assertAllEqual([[2, 0], [0, 2], [2, 2]], scale_matrix())
+
+      # Check that the returned gradient is correct.
+      grad_function = backprop.implicit_grad(scale_matrix)
+      grads_and_vars = grad_function()
+      indexed_slice = grads_and_vars[0][0]
+      gradient = grads_and_vars[0][0].values
+
+      self.assertAllEqual([0, 1, 2], indexed_slice.indices)
+      self.assertAllEqual([[2, 2], [2, 2], [2, 2]], gradient)
+
+
+class FunctionalInputLayerTest(test.TestCase):
+
   def test_raises_if_empty_feature_columns(self):
     with self.assertRaisesRegexp(ValueError,
                                  'feature_columns must not be empty'):
@@ -2016,9 +2119,9 @@ class InputLayerTest(test.TestCase):
 
   def test_with_1d_unknown_shape_sparse_tensor(self):
     embedding_values = (
-        (1., 2., 3., 4., 5.),  # id 0
-        (6., 7., 8., 9., 10.),  # id 1
-        (11., 12., 13., 14., 15.)  # id 2
+        (1., 2.),  # id 0
+        (6., 7.),  # id 1
+        (11., 12.)  # id 2
     )
     def _initializer(shape, dtype, partition_info):
       del shape, dtype, partition_info
@@ -2035,8 +2138,8 @@ class InputLayerTest(test.TestCase):
     # embedded_body_style has 5 dims in input_layer.
     country = fc.categorical_column_with_vocabulary_list(
         'country', vocabulary_list=['US', 'JP', 'CA'])
-    embedded_country = fc.embedding_column(country, dimension=5,
-                                           initializer=_initializer)
+    embedded_country = fc.embedding_column(
+        country, dimension=2, initializer=_initializer)
 
     # Provides 1-dim tensor and dense tensor.
     features = {
@@ -2054,22 +2157,24 @@ class InputLayerTest(test.TestCase):
         indices=((0,), (1,)),
         values=('sedan', 'hardtop'),
         dense_shape=(2,))
+    country_data = np.array([['US'], ['CA']])
 
-    # Dense categorical_column with unknown shape is not allowed.
-    with self.assertRaisesRegexp(ValueError, 'Undefined input_tensor shape.'):
-      fc.input_layer(features, [price, one_hot_body_style, embedded_country])
-
-    net = fc.input_layer(features, [price, one_hot_body_style])
-    self.assertEqual(1 + 3, net.shape[1])
+    net = fc.input_layer(features,
+                         [price, one_hot_body_style, embedded_country])
+    self.assertEqual(1 + 3 + 2, net.shape[1])
     with _initialized_session() as sess:
 
       # Each row is formed by concatenating `embedded_body_style`,
       # `one_hot_body_style`, and `price` in order.
       self.assertAllEqual(
-          [[0., 0., 1., 11.], [1., 0., 0., 12.]],
-          sess.run(net, feed_dict={
-              features['price']: price_data,
-              features['body-style']: body_style_data}))
+          [[0., 0., 1., 1., 2., 11.], [1., 0., 0., 11., 12., 12.]],
+          sess.run(
+              net,
+              feed_dict={
+                  features['price']: price_data,
+                  features['body-style']: body_style_data,
+                  features['country']: country_data
+              }))
 
   def test_with_rank_0_feature(self):
     # price has 1 dimension in input_layer
@@ -3446,7 +3551,6 @@ class EmbeddingColumnTest(test.TestCase):
     self.assertEqual('mean', embedding_column.combiner)
     self.assertIsNotNone(embedding_column.initializer)
     self.assertIsNone(embedding_column.ckpt_to_load_from)
-    self.assertIsNone(embedding_column.shared_embedding_collection_name)
     self.assertIsNone(embedding_column.tensor_name_in_ckpt)
     self.assertIsNone(embedding_column.max_norm)
     self.assertTrue(embedding_column.trainable)
@@ -3471,7 +3575,6 @@ class EmbeddingColumnTest(test.TestCase):
     self.assertEqual(embedding_dimension, embedding_column.dimension)
     self.assertEqual('my_combiner', embedding_column.combiner)
     self.assertEqual('my_initializer', embedding_column.initializer())
-    self.assertIsNone(embedding_column.shared_embedding_collection_name)
     self.assertEqual('my_ckpt', embedding_column.ckpt_to_load_from)
     self.assertEqual('my_ckpt_tensor', embedding_column.tensor_name_in_ckpt)
     self.assertEqual(42., embedding_column.max_norm)
@@ -3503,7 +3606,6 @@ class EmbeddingColumnTest(test.TestCase):
       self.assertEqual(embedding_dimension, embedding_column.dimension)
       self.assertEqual('my_combiner', embedding_column.combiner)
       self.assertEqual('my_initializer', embedding_column.initializer())
-      self.assertIsNone(embedding_column.shared_embedding_collection_name)
       self.assertEqual('my_ckpt', embedding_column.ckpt_to_load_from)
       self.assertEqual('my_ckpt_tensor', embedding_column.tensor_name_in_ckpt)
       self.assertEqual(42., embedding_column.max_norm)
@@ -4245,25 +4347,256 @@ class SharedEmbeddingColumnTest(test.TestCase):
               dense_shape=[1, 2]),
           features['bbb'].eval())
 
-  def test_input_layer(self):
+  def test_transform_feature(self):
+    a = fc.categorical_column_with_identity(key='aaa', num_buckets=3)
+    b = fc.categorical_column_with_identity(key='bbb', num_buckets=3)
+    a_embedded, b_embedded = fc_lib._shared_embedding_columns(
+        [a, b], dimension=2)
+    features = {
+        'aaa': sparse_tensor.SparseTensor(
+            indices=((0, 0), (1, 0), (1, 1)),
+            values=(0, 1, 0),
+            dense_shape=(2, 2)),
+        'bbb': sparse_tensor.SparseTensor(
+            indices=((0, 0), (1, 0), (1, 1)),
+            values=(1, 2, 1),
+            dense_shape=(2, 2)),
+    }
+    outputs = _transform_features(features, [a, a_embedded, b, b_embedded])
+    output_a = outputs[a]
+    output_a_embedded = outputs[a_embedded]
+    output_b = outputs[b]
+    output_b_embedded = outputs[b_embedded]
+    with _initialized_session():
+      _assert_sparse_tensor_value(
+          self, output_a.eval(), output_a_embedded.eval())
+      _assert_sparse_tensor_value(
+          self, output_b.eval(), output_b_embedded.eval())
+
+  def test_get_dense_tensor(self):
+    # Inputs.
+    vocabulary_size = 3
+    # -1 values are ignored.
+    input_a = np.array(
+        [[2, -1, -1],  # example 0, ids [2]
+         [0, 1, -1]])  # example 1, ids [0, 1]
+    input_b = np.array(
+        [[0, -1, -1],  # example 0, ids [0]
+         [-1, -1, -1]])  # example 1, ids []
+    input_features = {
+        'aaa': input_a,
+        'bbb': input_b
+    }
+
+    # Embedding variable.
+    embedding_dimension = 2
+    embedding_values = (
+        (1., 2.),  # id 0
+        (3., 5.),  # id 1
+        (7., 11.)  # id 2
+    )
+    def _initializer(shape, dtype, partition_info):
+      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
+      self.assertEqual(dtypes.float32, dtype)
+      self.assertIsNone(partition_info)
+      return embedding_values
+
+    # Expected lookup result, using combiner='mean'.
+    expected_lookups_a = (
+        # example 0:
+        (7., 11.),  # ids [2], embedding = [7, 11]
+        # example 1:
+        (2., 3.5),  # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
+    )
+    expected_lookups_b = (
+        # example 0:
+        (1., 2.),  # ids [0], embedding = [1, 2]
+        # example 1:
+        (0., 0.),  # ids [], embedding = [0, 0]
+    )
+
+    # Build columns.
+    categorical_column_a = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    categorical_column_b = fc.categorical_column_with_identity(
+        key='bbb', num_buckets=vocabulary_size)
+    embedding_column_a, embedding_column_b = fc_lib._shared_embedding_columns(
+        [categorical_column_a, categorical_column_b],
+        dimension=embedding_dimension, initializer=_initializer)
+
+    # Provide sparse input and get dense result.
+    embedding_lookup_a = embedding_column_a._get_dense_tensor(
+        _LazyBuilder(input_features))
+    embedding_lookup_b = embedding_column_b._get_dense_tensor(
+        _LazyBuilder(input_features))
+
+    # Assert expected embedding variable and lookups.
+    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+    self.assertItemsEqual(
+        ('embedding_weights:0',), tuple([v.name for v in global_vars]))
+    embedding_var = global_vars[0]
+    with _initialized_session():
+      self.assertAllEqual(embedding_values, embedding_var.eval())
+      self.assertAllEqual(expected_lookups_a, embedding_lookup_a.eval())
+      self.assertAllEqual(expected_lookups_b, embedding_lookup_b.eval())
+
+  def test_get_dense_tensor_placeholder_inputs(self):
+    # Inputs.
+    vocabulary_size = 3
+    # -1 values are ignored.
+    input_a = np.array(
+        [[2, -1, -1],  # example 0, ids [2]
+         [0, 1, -1]])  # example 1, ids [0, 1]
+    input_b = np.array(
+        [[0, -1, -1],  # example 0, ids [0]
+         [-1, -1, -1]])  # example 1, ids []
+    # Specify shape, because dense input must have rank specified.
+    input_a_placeholder = array_ops.placeholder(
+        dtype=dtypes.int64, shape=[None, 3])
+    input_b_placeholder = array_ops.placeholder(
+        dtype=dtypes.int64, shape=[None, 3])
+    input_features = {
+        'aaa': input_a_placeholder,
+        'bbb': input_b_placeholder,
+    }
+    feed_dict = {
+        input_a_placeholder: input_a,
+        input_b_placeholder: input_b,
+    }
+
+    # Embedding variable.
+    embedding_dimension = 2
+    embedding_values = (
+        (1., 2.),  # id 0
+        (3., 5.),  # id 1
+        (7., 11.)  # id 2
+    )
+    def _initializer(shape, dtype, partition_info):
+      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
+      self.assertEqual(dtypes.float32, dtype)
+      self.assertIsNone(partition_info)
+      return embedding_values
+
+    # Build columns.
+    categorical_column_a = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    categorical_column_b = fc.categorical_column_with_identity(
+        key='bbb', num_buckets=vocabulary_size)
+    embedding_column_a, embedding_column_b = fc_lib._shared_embedding_columns(
+        [categorical_column_a, categorical_column_b],
+        dimension=embedding_dimension, initializer=_initializer)
+
+    # Provide sparse input and get dense result.
+    embedding_lookup_a = embedding_column_a._get_dense_tensor(
+        _LazyBuilder(input_features))
+    embedding_lookup_b = embedding_column_b._get_dense_tensor(
+        _LazyBuilder(input_features))
+
+    with _initialized_session() as sess:
+      sess.run([embedding_lookup_a, embedding_lookup_b], feed_dict=feed_dict)
+
+  def test_linear_model(self):
+    # Inputs.
+    batch_size = 2
+    vocabulary_size = 3
+    # -1 values are ignored.
+    input_a = np.array(
+        [[2, -1, -1],  # example 0, ids [2]
+         [0, 1, -1]])  # example 1, ids [0, 1]
+    input_b = np.array(
+        [[0, -1, -1],  # example 0, ids [0]
+         [-1, -1, -1]])  # example 1, ids []
+
+    # Embedding variable.
+    embedding_dimension = 2
+    embedding_shape = (vocabulary_size, embedding_dimension)
+    zeros_embedding_values = np.zeros(embedding_shape)
+    def _initializer(shape, dtype, partition_info):
+      self.assertAllEqual(embedding_shape, shape)
+      self.assertEqual(dtypes.float32, dtype)
+      self.assertIsNone(partition_info)
+      return zeros_embedding_values
+
+    # Build columns.
+    categorical_column_a = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    categorical_column_b = fc.categorical_column_with_identity(
+        key='bbb', num_buckets=vocabulary_size)
+    embedding_column_a, embedding_column_b = fc_lib._shared_embedding_columns(
+        [categorical_column_a, categorical_column_b],
+        dimension=embedding_dimension, initializer=_initializer)
+
+    with ops.Graph().as_default():
+      predictions = fc.linear_model({
+          categorical_column_a.name: input_a,
+          categorical_column_b.name: input_b,
+      }, (embedding_column_a, embedding_column_b))
+      # Linear weights do not follow the column name. But this is a rare use
+      # case, and fixing it would add too much complexity to the code.
+      expected_var_names = (
+          'linear_model/bias_weights:0',
+          'linear_model/aaa_bbb_shared_embedding/weights:0',
+          'linear_model/aaa_bbb_shared_embedding/embedding_weights:0',
+          'linear_model/aaa_bbb_shared_embedding_1/weights:0',
+      )
+      self.assertItemsEqual(
+          expected_var_names,
+          [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
+      trainable_vars = {
+          v.name: v for v in ops.get_collection(
+              ops.GraphKeys.TRAINABLE_VARIABLES)
+      }
+      self.assertItemsEqual(expected_var_names, trainable_vars.keys())
+      bias = trainable_vars['linear_model/bias_weights:0']
+      embedding_weights = trainable_vars[
+          'linear_model/aaa_bbb_shared_embedding/embedding_weights:0']
+      linear_weights_a = trainable_vars[
+          'linear_model/aaa_bbb_shared_embedding/weights:0']
+      linear_weights_b = trainable_vars[
+          'linear_model/aaa_bbb_shared_embedding_1/weights:0']
+      with _initialized_session():
+        # Predictions with all zero weights.
+        self.assertAllClose(np.zeros((1,)), bias.eval())
+        self.assertAllClose(zeros_embedding_values, embedding_weights.eval())
+        self.assertAllClose(
+            np.zeros((embedding_dimension, 1)), linear_weights_a.eval())
+        self.assertAllClose(
+            np.zeros((embedding_dimension, 1)), linear_weights_b.eval())
+        self.assertAllClose(np.zeros((batch_size, 1)), predictions.eval())
+
+        # Predictions with all non-zero weights.
+        embedding_weights.assign((
+            (1., 2.),  # id 0
+            (3., 5.),  # id 1
+            (7., 11.)  # id 2
+        )).eval()
+        linear_weights_a.assign(((4.,), (6.,))).eval()
+        # example 0, ids [2], embedding[0] = [7, 11]
+        # example 1, ids [0, 1], embedding[1] = mean([1, 2] + [3, 5]) = [2, 3.5]
+        # sum(embeddings * linear_weights)
+        # = [4*7 + 6*11, 4*2 + 6*3.5] = [94, 29]
+        linear_weights_b.assign(((3.,), (5.,))).eval()
+        # example 0, ids [0], embedding[0] = [1, 2]
+        # example 1, ids [], embedding[1] = 0, 0]
+        # sum(embeddings * linear_weights)
+        # = [3*1 + 5*2, 3*0 +5*0] = [13, 0]
+        self.assertAllClose([[94. + 13.], [29.]], predictions.eval())
+
+  def _test_input_layer(self, trainable=True):
     # Inputs.
     vocabulary_size = 3
     sparse_input_a = sparse_tensor.SparseTensorValue(
         # example 0, ids [2]
         # example 1, ids [0, 1]
-        # example 2, ids []
-        # example 3, ids [1]
-        indices=((0, 0), (1, 0), (1, 4), (3, 0)),
-        values=(2, 0, 1, 1),
-        dense_shape=(4, 5))
+        indices=((0, 0), (1, 0), (1, 4)),
+        values=(2, 0, 1),
+        dense_shape=(2, 5))
     sparse_input_b = sparse_tensor.SparseTensorValue(
         # example 0, ids [0]
         # example 1, ids []
-        # example 2, ids []
-        # example 3, ids [1]
-        indices=((0, 0), (3, 0)),
-        values=(0, 1),
-        dense_shape=(4, 5))
+        indices=((0, 0),),
+        values=(0,),
+        dense_shape=(2, 5))
 
     # Embedding variable.
     embedding_dimension = 2
@@ -4288,14 +4621,6 @@ class SharedEmbeddingColumnTest(test.TestCase):
         # A ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
         # B ids [], embedding = [0, 0]
         (2., 3.5, 0., 0.),
-        # example 2:
-        # A ids [], embedding = [0, 0]
-        # B ids [], embedding = [0, 0]
-        (0., 0., 0., 0.),
-        # example 3:
-        # A ids [1], embedding = [3, 5]
-        # B ids [1], embedding = [3, 5]
-        (3., 5., 3., 5.),
     )
 
     # Build columns.
@@ -4305,7 +4630,8 @@ class SharedEmbeddingColumnTest(test.TestCase):
         key='bbb', num_buckets=vocabulary_size)
     embedding_column_a, embedding_column_b = fc_lib._shared_embedding_columns(
         [categorical_column_a, categorical_column_b],
-        dimension=embedding_dimension, initializer=_initializer)
+        dimension=embedding_dimension, initializer=_initializer,
+        trainable=trainable)
 
     # Provide sparse input and get dense result.
     input_layer = fc.input_layer(
@@ -4318,17 +4644,26 @@ class SharedEmbeddingColumnTest(test.TestCase):
         ['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'],
         tuple([v.name for v in global_vars]))
     trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
-    self.assertItemsEqual(
-        ['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'],
-        tuple([v.name for v in trainable_vars]))
+    if trainable:
+      self.assertItemsEqual(
+          ['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'],
+          tuple([v.name for v in trainable_vars]))
+    else:
+      self.assertItemsEqual([], tuple([v.name for v in trainable_vars]))
     shared_embedding_vars = ops.get_collection('aaa_bbb_shared_embedding')
     self.assertItemsEqual(
         ['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'],
         tuple([v.name for v in shared_embedding_vars]))
     with _initialized_session():
-      self.assertAllEqual(embedding_values, trainable_vars[0].eval())
+      self.assertAllEqual(embedding_values, shared_embedding_vars[0].eval())
       self.assertAllEqual(expected_lookups, input_layer.eval())
 
+  def test_input_layer(self):
+    self._test_input_layer()
+
+  def test_input_layer_no_trainable(self):
+    self._test_input_layer(trainable=False)
+
 
 class WeightedCategoricalColumnTest(test.TestCase):
 
diff --git a/tensorflow/python/framework/constant_op.py b/tensorflow/python/framework/constant_op.py
index bf3be34d85120f3d873367aa55948d27d34977cf..ac915157f528e78a960f0a5bf85539955c192eba 100644
--- a/tensorflow/python/framework/constant_op.py
+++ b/tensorflow/python/framework/constant_op.py
@@ -45,6 +45,7 @@ import numpy as np
 import six
 
 from tensorflow.core.framework import attr_value_pb2
+from tensorflow.core.framework import types_pb2
 from tensorflow.python.eager import context
 from tensorflow.python.eager import execute
 from tensorflow.python.framework import dtypes
@@ -71,7 +72,7 @@ def _eager_fill(dims, value, ctx):
   attr_t = value.dtype.as_datatype_enum
   dims = convert_to_eager_tensor(dims, ctx, dtypes.int32)
   inputs_flat = [dims, value]
-  attrs = ("T", attr_t)
+  attrs = ("T", attr_t, "index_type", types_pb2.DT_INT32)
   result, = execute.execute(
       b"Fill", 1, inputs=inputs_flat, attrs=attrs, ctx=ctx)
   return result
diff --git a/tensorflow/python/framework/dtypes.py b/tensorflow/python/framework/dtypes.py
index db124ab12acdfb9724f9800f5be36b9f1d45f323..b0422eb6be091a3fcf4b213f04a2e13a3ae8a963 100644
--- a/tensorflow/python/framework/dtypes.py
+++ b/tensorflow/python/framework/dtypes.py
@@ -18,9 +18,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+
 import numpy as np
 
 from tensorflow.core.framework import types_pb2
+from tensorflow.python import pywrap_tensorflow
+
+
+_np_bfloat16 = pywrap_tensorflow.TF_bfloat16_type()
 
 
 class DType(object):
@@ -146,8 +151,9 @@ class DType(object):
   @property
   def is_floating(self):
     """Returns whether this is a (non-quantized, real) floating point type."""
-    return self.is_numpy_compatible and np.issubdtype(self.as_numpy_dtype,
-                                                      np.floating)
+    return ((self.is_numpy_compatible and np.issubdtype(self.as_numpy_dtype,
+                                                        np.floating))
+            or self.base_dtype == bfloat16)
 
   @property
   def is_complex(self):
@@ -157,7 +163,7 @@ class DType(object):
   @property
   def is_quantized(self):
     """Returns whether this is a quantized data type."""
-    return self.base_dtype in [qint8, quint8, qint16, quint16, qint32, bfloat16]
+    return self.base_dtype in [qint8, quint8, qint16, quint16, qint32]
 
   @property
   def is_unsigned(self):
@@ -194,6 +200,8 @@ class DType(object):
       try:
         return np.iinfo(self.as_numpy_dtype()).min
       except:
+        if self.base_dtype == bfloat16:
+          return _np_bfloat16(float.fromhex("-0x1.FEp127"))
         raise TypeError("Cannot find minimum value of %s." % self)
 
   @property
@@ -216,6 +224,8 @@ class DType(object):
       try:
         return np.iinfo(self.as_numpy_dtype()).max
       except:
+        if self.base_dtype == bfloat16:
+          return _np_bfloat16(float.fromhex("0x1.FEp127"))
         raise TypeError("Cannot find maximum value of %s." % self)
 
   @property
@@ -486,6 +496,8 @@ _np_qint16 = np.dtype([("qint16", np.int16, 1)])
 _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
 _np_qint32 = np.dtype([("qint32", np.int32, 1)])
 
+# _np_bfloat16 is defined by a module import.
+
 # Custom struct dtype for directly-fed ResourceHandles of supported type(s).
 np_resource = np.dtype([("resource", np.ubyte, 1)])
 
@@ -511,7 +523,7 @@ _NP_TO_TF = frozenset([
     (_np_qint16, qint16),
     (_np_quint16, quint16),
     (_np_qint32, qint32),
-    # NOTE(touts): Intentionally no way to feed a DT_BFLOAT16.
+    (_np_bfloat16, bfloat16),
 ])
 _TF_TO_NP = {
     types_pb2.DT_HALF: np.float16,
@@ -536,7 +548,7 @@ _TF_TO_NP = {
     types_pb2.DT_QINT16: _np_qint16,
     types_pb2.DT_QUINT16: _np_quint16,
     types_pb2.DT_QINT32: _np_qint32,
-    types_pb2.DT_BFLOAT16: np.uint16,
+    types_pb2.DT_BFLOAT16: _np_bfloat16,
 
     # Ref types
     types_pb2.DT_HALF_REF: np.float16,
@@ -559,7 +571,7 @@ _TF_TO_NP = {
     types_pb2.DT_QINT16_REF: _np_qint16,
     types_pb2.DT_QUINT16_REF: _np_quint16,
     types_pb2.DT_QINT32_REF: _np_qint32,
-    types_pb2.DT_BFLOAT16_REF: np.uint16,
+    types_pb2.DT_BFLOAT16_REF: _np_bfloat16,
 }
 
 
diff --git a/tensorflow/python/framework/dtypes_test.py b/tensorflow/python/framework/dtypes_test.py
index 67842e14b1077fdf69aa3405f4f43fc92e499b4d..e49e2fda5d84da4f8f87fae73874351afe0a20f2 100644
--- a/tensorflow/python/framework/dtypes_test.py
+++ b/tensorflow/python/framework/dtypes_test.py
@@ -176,7 +176,7 @@ class TypesTest(test_util.TensorFlowTestCase):
     self.assertEqual(dtypes.as_dtype("float64").is_floating, True)
     self.assertEqual(dtypes.as_dtype("string").is_floating, False)
     self.assertEqual(dtypes.as_dtype("bool").is_floating, False)
-    self.assertEqual(dtypes.as_dtype("bfloat16").is_integer, False)
+    self.assertEqual(dtypes.as_dtype("bfloat16").is_floating, True)
     self.assertEqual(dtypes.as_dtype("qint8").is_floating, False)
     self.assertEqual(dtypes.as_dtype("qint16").is_floating, False)
     self.assertEqual(dtypes.as_dtype("qint32").is_floating, False)
@@ -276,6 +276,9 @@ class TypesTest(test_util.TensorFlowTestCase):
       if numpy_dtype in (np.float16, np.float32, np.float64):
         self.assertEquals(dtype.min, np.finfo(numpy_dtype).min)
         self.assertEquals(dtype.max, np.finfo(numpy_dtype).max)
+      if numpy_dtype == dtypes.bfloat16.as_numpy_dtype:
+        self.assertEquals(dtype.min, float.fromhex("-0x1.FEp127"))
+        self.assertEquals(dtype.max, float.fromhex("0x1.FEp127"))
 
   def testRepr(self):
     for enum, name in dtypes._TYPE_TO_STRING.items():
diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py
index 29cf2237244810a888d53927f44889b4a4e9704e..416bbf4f48a6439eded68589ee5687789f42c02b 100644
--- a/tensorflow/python/framework/function.py
+++ b/tensorflow/python/framework/function.py
@@ -82,8 +82,8 @@ class Defun(object):
     return x + y, x - y
 
   # Building the graph.
-  a = tf.Constant([1.0])
-  b = tf.Constant([2.0])
+  a = tf.constant([1.0])
+  b = tf.constant([2.0])
   c, d = MyFunc(a, b, name='mycall')
   ```
   """
@@ -682,7 +682,7 @@ class _FuncGraph(ops.Graph):
 
   def create_op(self, op_type, inputs, data_types, **kwargs):
     for i, x in enumerate(inputs):
-      if x.graph is not self:
+      if isinstance(x, ops.EagerTensor) or x.graph is not self:
         # Referring to a tensor from other graph.
         if x in self._captured:
           # Captured already.
@@ -692,7 +692,10 @@ class _FuncGraph(ops.Graph):
         else:
           # Substitute with a placeholder.
           self.extra_inputs.append(x)
-          ph = array_ops.placeholder(x.dtype, shape=x.get_shape())
+          # Hoist the new input placeholder out of any control flow context
+          # we're currently in.
+          with ops.control_dependencies(None):
+            ph = array_ops.placeholder(x.dtype, shape=x.get_shape())
           # pylint: disable=protected-access
           ph._handle_data = x._handle_data
           # pylint: enable=protected-access
diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py
index ba43e9199b4764fef4b86056a1ae57bd9070003e..57e5a724c99bd77df8cd11eff99288fa6647f4ac 100644
--- a/tensorflow/python/framework/function_test.py
+++ b/tensorflow/python/framework/function_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import re
 import time
+import sys
 
 import numpy as np
 
@@ -724,6 +725,38 @@ class FunctionTest(test.TestCase):
         # NOTE: We still do not support capturing control deps.
         _ = Foo(x)
 
+  def testCaptureInWhileLoop(self):
+    g = ops.Graph()
+    with g.as_default():
+      x = constant_op.constant(1)
+
+      @function.Defun()
+      def Foo():
+        return control_flow_ops.while_loop(lambda i: i < 10,
+                                           lambda i: i + x,
+                                           [0])
+      y = Foo()
+
+    with self.test_session(graph=g) as sess:
+      self.assertEqual(sess.run(y), 10)
+
+  def testCaptureInCond(self):
+    g = ops.Graph()
+    with g.as_default():
+      x = constant_op.constant(1)
+
+      @function.Defun(dtypes.bool)
+      def Foo(pred):
+        return control_flow_ops.cond(pred,
+                                     lambda: x,
+                                     lambda: x + 1)
+      y = Foo(True)
+      z = Foo(False)
+
+    with self.test_session(graph=g) as sess:
+      self.assertEqual(sess.run(y), 1)
+      self.assertEqual(sess.run(z), 2)
+
   def testStableName(self):
 
     @function.Defun()
@@ -733,8 +766,12 @@ class FunctionTest(test.TestCase):
     # We added more randomness to function names in C API.
     # TODO(iga): Remove this if statement when we switch to C API.
     if ops._USE_C_API:  # pylint: disable=protected-access
-      self.assertEqual("Foo_aCYSbwBkR5A",
-                       Foo.instantiate([dtypes.float32] * 3).name)
+      if sys.byteorder == 'big':
+        self.assertEqual("Foo_kEdkAG8SJvg",
+                         Foo.instantiate([dtypes.float32] * 3).name)
+      else:
+        self.assertEqual("Foo_aCYSbwBkR5A",
+                         Foo.instantiate([dtypes.float32] * 3).name)
     else:
       self.assertEqual("Foo_d643acf7",
                        Foo.instantiate([dtypes.float32] * 3).name)
@@ -882,6 +919,94 @@ class FunctionTest(test.TestCase):
           np.array([1.0, 0.0]).astype(np.float32),
           sess.run(dinp, {inp: x}))
 
+  def testFunctionMarkedStateful(self):
+
+    @function.Defun(dtypes.int32, dtypes.float32)
+    def Foo(t, x):
+      return x[t]
+
+    @function.Defun(dtypes.int64)
+    def Bar(x):
+      return x
+
+    # NOTE(mrry): All functions are currently considered stateless by the
+    # runtime, so we simulate a "stateful" function.
+    # TODO(b/70565970): Remove this hack when we are able to build stateful
+    # functions using the API.
+    # pylint: disable=protected-access
+    Foo._signature.is_stateful = True
+    Bar._signature.is_stateful = True
+    # pylint: enable=protected-access
+
+    result_1 = Foo(3, [1.0, 2.0, 3.0, 4.0])
+    result_2 = Bar(constant_op.constant(100, dtype=dtypes.int64))
+
+    with session.Session() as sess:
+      self.assertEqual(4.0, sess.run(result_1))
+      self.assertEqual(100, sess.run(result_2))
+      self.assertEqual((4.0, 100), sess.run((result_1, result_2)))
+
+  def testStatefulFunction(self):
+
+    @function.Defun()
+    def FunctionWithStatelessOp():
+      return constant_op.constant(42.0)
+
+    @function.Defun()
+    def FunctionWithStatefulOp():
+      return random_ops.random_uniform([100], maxval=10, dtype=dtypes.int32)
+
+    @function.Defun()
+    def FunctionWithStatelessFunctionCall():
+      return FunctionWithStatelessOp()
+
+    @function.Defun()
+    def FunctionWithStatefulFunctionCall():
+      return FunctionWithStatefulOp()
+
+    # Test that the `is_stateful` bit is propagated.
+    self.assertFalse(FunctionWithStatelessOp.definition.signature.is_stateful)
+    self.assertTrue(FunctionWithStatefulOp.definition.signature.is_stateful)
+    self.assertFalse(
+        FunctionWithStatelessFunctionCall.definition.signature.is_stateful)
+    self.assertTrue(
+        FunctionWithStatefulFunctionCall.definition.signature.is_stateful)
+
+    # Ensure that two invocations of the same random-number-generating
+    # function produce different results.
+    result1 = FunctionWithStatefulFunctionCall()
+    result2 = FunctionWithStatefulFunctionCall()
+
+    # Statefulness affects how the function is treated by the various
+    # optimization passes, so run the test in each optimizer
+    # configuration.
+    for config in _OptimizerOptions():
+      with session.Session(config=config) as sess:
+        val1, val2 = sess.run((result1, result2))
+        self.assertFalse(all(val1 == val2))
+        val3, val4 = sess.run((result1, result2))
+        self.assertFalse(all(val3 == val1))
+        self.assertFalse(all(val4 == val2))
+
+  def testSameFunctionOnTwoDevices(self):
+
+    @function.Defun(dtypes.float32)
+    def AddOne(x):
+      return x + 1.0
+
+    with ops.device("/cpu:0"):
+      f_0 = AddOne(41.0)
+
+    with ops.device("/cpu:1"):
+      f_1 = AddOne(43.0)
+
+    for config in _OptimizerOptions():
+      config.device_count["CPU"] = 2
+      with session.Session(config=config) as sess:
+        self.assertEqual(42.0, sess.run(f_0))
+        self.assertEqual(44.0, sess.run(f_1))
+        self.assertEqual((42.0, 44.0), sess.run((f_0, f_1)))
+
 
 @test_util.with_c_api
 class FunctionsFromProtos(test.TestCase):
diff --git a/tensorflow/python/framework/graph_to_function_def.py b/tensorflow/python/framework/graph_to_function_def.py
index 448f87aa6ee31127113ed10aee8e4e0fa06482f1..5bf30ee68491c5c0686cc9572f024299dbfe587a 100644
--- a/tensorflow/python/framework/graph_to_function_def.py
+++ b/tensorflow/python/framework/graph_to_function_def.py
@@ -58,7 +58,7 @@ def _is_in_placeholders(op, func_arg_placeholders):
 
 
 def _get_node_def(op):
-  return op._node_def  # pylint: disable=protected-access
+  return op.node_def  # pylint: disable=protected-access
 
 
 def _get_op_def(op):
@@ -110,6 +110,13 @@ def _add_op_node(op, func, input_dict):
                                                (node_def.input[i],
                                                 input_dict.items()))
       node_def.input[i] = input_dict[node_def.input[i]]
+  # The function is stateful if any of its operations are stateful.
+  # NOTE(mrry): The "Const" node typically does not have an `OpDef` associated
+  # with it, so we assume any nodes without an `OpDef` are stateless.
+  # TODO(skyewm): Remove the `is not None` test after we transition to the C
+  # API.
+  if op.op_def is not None and op.op_def.is_stateful:
+    func.signature.is_stateful = True
 
 
 def graph_to_function_def(graph, operations, inputs, outputs, out_names=None):
diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py
index 434cbda7ad6ede8f6d07dc2ecfb74ea42cad2d46..a3dbe43f06eadb311338165bb07c3dccdf0299c3 100644
--- a/tensorflow/python/framework/importer.py
+++ b/tensorflow/python/framework/importer.py
@@ -179,12 +179,11 @@ def _ProcessInputMapParam(input_map):
 
 def _ProcessReturnElementsParam(return_elements):
   """Type-checks and possibly canonicalizes `return_elements`."""
-  if return_elements is not None:
-    return_elements = tuple(return_elements)
-    if not all(isinstance(x, compat.bytes_or_text_types)
-               for x in return_elements):
-      raise TypeError('return_elements must be a list of strings.')
-  return return_elements
+  if return_elements is None: return None
+  if not all(isinstance(x, compat.bytes_or_text_types)
+             for x in return_elements):
+    raise TypeError('return_elements must be a list of strings.')
+  return tuple(compat.as_str(x) for x in return_elements)
 
 
 def _FindAttrInOpDef(attr_name, op_def):
@@ -194,24 +193,151 @@ def _FindAttrInOpDef(attr_name, op_def):
   return None
 
 
-def _PopulateTFImportGraphDefOptions(options, prefix, return_elements):
+def _RemoveDefaultAttrs(op_dict, producer_op_list, graph_def):
+  """Removes unknown default attrs according to `producer_op_list`.
+
+  Removes any unknown attrs in `graph_def` (i.e. attrs that do not appear in
+  the OpDefs in `op_dict`) that have a default value in `producer_op_list`.
+
+  Args:
+    op_dict: dict mapping operation name to OpDef.
+    producer_op_list: OpList proto.
+    graph_def: GraphDef proto
+  """
+  producer_op_dict = {op.name: op for op in producer_op_list.op}
+  for node in graph_def.node:
+    # Remove any default attr values that aren't in op_def.
+    if node.op in producer_op_dict:
+      op_def = op_dict[node.op]
+      producer_op_def = producer_op_dict[node.op]
+      # We make a copy of node.attr to iterate through since we may modify
+      # node.attr inside the loop.
+      for key in list(node.attr):
+        if _FindAttrInOpDef(key, op_def) is None:
+          # No attr_def in consumer, look in producer.
+          attr_def = _FindAttrInOpDef(key, producer_op_def)
+          if (attr_def and attr_def.HasField('default_value') and
+              node.attr[key] == attr_def.default_value):
+            # Unknown attr had default value in producer, delete it so it can be
+            # understood by consumer.
+            del node.attr[key]
+
+
+def _ConvertInputMapValues(name, input_map):
+  """Ensures all input map values are tensors.
+
+  This should be called from inside the import name scope.
+
+  Args:
+    name: the `name` argument passed to import_graph_def
+    input_map: the `input_map` argument passed to import_graph_def.
+
+  Returns:
+    An possibly-updated version of `input_map`.
+
+  Raises:
+    ValueError: if input map values cannot be converted due to empty name scope.
+  """
+  if not all(isinstance(v, ops.Tensor) for v in input_map.values()):
+    if name == '':  # pylint: disable=g-explicit-bool-comparison
+      raise ValueError(
+          'tf.import_graph_def() requires a non-empty `name` if `input_map` '
+          'contains non-Tensor values. Try calling tf.convert_to_tensor() on '
+          '`input_map` values before calling tf.import_graph_def().')
+    with ops.name_scope('_inputs'):
+      input_map = {k: ops.convert_to_tensor(v) for k, v in input_map.items()}
+  return input_map
+
+
+def _PopulateTFImportGraphDefOptions(options, prefix, input_map,
+                                     return_elements):
   """Populates the TF_ImportGraphDefOptions `options`."""
   c_api.TF_ImportGraphDefOptionsSetPrefix(options, prefix)
-
+  c_api.TF_ImportGraphDefOptionsSetUniquifyNames(options, True)
+  c_api.TF_ImportGraphDefOptionsSetUniquifyPrefix(options, True)
+
+  for input_src, input_dst in input_map.items():
+    input_src = compat.as_str(input_src)
+    if input_src.startswith('^'):
+      src_name = compat.as_bytes(input_src[1:])
+      dst_op = input_dst._as_tf_output().oper  # pylint: disable=protected-access
+      c_api.TF_ImportGraphDefOptionsRemapControlDependency(options, src_name,
+                                                           dst_op)
+    else:
+      src_name, src_idx = _ParseTensorName(input_src)
+      src_name = compat.as_str(src_name)
+      dst_output = input_dst._as_tf_output()  # pylint: disable=protected-access
+      c_api.TF_ImportGraphDefOptionsAddInputMapping(options, src_name,
+                                                    src_idx, dst_output)
   for name in return_elements or []:
     if ':' in name:
       op_name, index = _ParseTensorName(name)
+      op_name = compat.as_str(op_name)
       c_api.TF_ImportGraphDefOptionsAddReturnOutput(options, op_name, index)
     else:
-      c_api.TF_ImportGraphDefOptionsAddReturnOperation(options, name)
+      c_api.TF_ImportGraphDefOptionsAddReturnOperation(options,
+                                                       compat.as_str(name))
 
 
 def _ProcessNewOps(graph):
   """Processes the newly-added TF_Operations in `graph`."""
-  for c_op in c_api_util.new_tf_operations(graph):
-    graph._create_op_from_tf_operation(c_op)  # pylint: disable=protected-access
-
-  # TODO(skyewm): colocation logic
+  # Maps from a node to the names of the ops it's colocated with, if colocation
+  # is specified in the attributes.
+  colocation_pairs = {}
+
+  for new_op in graph._add_new_tf_operations(compute_devices=False):  # pylint: disable=protected-access
+    colocation_names = _GetColocationNames(new_op)
+    if colocation_names:
+      colocation_pairs[new_op] = colocation_names
+      # Don't apply this op's device function, since colocation constraints
+      # override device functions. Note that this op's device may still be set
+      # by the loop below.
+    else:
+      with _MaybeDevice(new_op.device):
+        graph._apply_device_functions(new_op)  # pylint: disable=protected-access
+
+  # The following loop populates the device field of ops that are colocated
+  # with another op.  This is implied by the colocation attribute, but we
+  # propagate the device field for completeness.
+  for op, coloc_op_list in colocation_pairs.items():
+    coloc_device = None
+    # Find any device in the list of colocated ops that have a device, if it
+    # exists.  We assume that if multiple ops have devices, they refer to the
+    # same device.  Otherwise, a runtime error will occur since the colocation
+    # property cannot be guaranteed.
+    #
+    # One possible improvement is to try to check for compatibility of all
+    # devices in this list at import time here, which would require
+    # implementing a compatibility function for device specs in python.
+    for coloc_op_name in coloc_op_list:
+      try:
+        coloc_op = graph._get_operation_by_name_unsafe(coloc_op_name)  # pylint: disable=protected-access
+      except KeyError:
+        raise ValueError('Specified colocation to an op that '
+                         'does not exist during import: %s in %s' % (
+                             coloc_op_name, op.name))
+      if coloc_op.device:
+        coloc_device = pydev.DeviceSpec.from_string(coloc_op.device)
+        break
+    if coloc_device:
+      op._set_device(coloc_device)  # pylint: disable=protected-access
+
+
+def _GetColocationNames(op):
+  """Returns names of the ops that `op` should be colocated with."""
+  colocation_names = []
+  try:
+    class_values = op.get_attr('_class')
+  except ValueError:
+    # No _class attr
+    return
+  for val in class_values:
+    val = compat.as_str(val)
+    if val.startswith('loc:@'):
+      colocation_node_name = val[len('loc:@'):]
+      if colocation_node_name != op.name:
+        colocation_names.append(colocation_node_name)
+  return colocation_names
 
 
 def _GatherReturnElements(requested_return_elements, graph, results):
@@ -296,10 +422,9 @@ def import_graph_def(graph_def, input_map=None, return_elements=None,
 
   op_dict = op_def_registry.get_registered_ops()
 
-  if producer_op_list is None:
-    producer_op_dict = None
-  else:
-    producer_op_dict = {op.name: op for op in producer_op_list.op}
+  if producer_op_list is not None:
+    # TODO(skyewm): make a copy of graph_def so we're not mutating the argument?
+    _RemoveDefaultAttrs(op_dict, producer_op_list, graph_def)
 
   graph = ops.get_default_graph()
 
@@ -312,17 +437,53 @@ def import_graph_def(graph_def, input_map=None, return_elements=None,
       else:
         prefix = ''
 
+      # Generate any input map tensors inside name scope
+      input_map = _ConvertInputMapValues(name, input_map)
+
     scoped_options = c_api_util.ScopedTFImportGraphDefOptions()
     options = scoped_options.options
-    _PopulateTFImportGraphDefOptions(options, prefix, return_elements)
+    _PopulateTFImportGraphDefOptions(options, prefix, input_map,
+                                     return_elements)
 
     with c_api_util.tf_buffer(graph_def.SerializeToString()) as serialized:
-      with errors.raise_exception_on_not_ok_status() as status:
-        results = c_api.TF_GraphImportGraphDefWithResults(
-            graph._c_graph, serialized, options, status)  # pylint: disable=protected-access
+      try:
+        with errors.raise_exception_on_not_ok_status() as status:
+          results = c_api.TF_GraphImportGraphDefWithResults(
+              graph._c_graph, serialized, options, status)  # pylint: disable=protected-access
+      except errors.InvalidArgumentError as e:
+        # Convert to ValueError for backwards compatibility.
+        raise ValueError(str(e))
 
     _ProcessNewOps(graph)
 
+    # Create _DefinedFunctions for any imported functions.
+    #
+    # We do this by creating _DefinedFunctions directly from `graph_def`, and
+    # adding them to `graph`. Adding an existing function to a TF_Graph is a
+    # no-op, so this only has the effect of updating the Python state (usually
+    # _DefinedFunction.add_to_graph also adds the function to the TF_Graph).
+    #
+    # TODO(skyewm): fetch the TF_Functions directly from the TF_Graph
+    # TODO(skyewm): avoid sending serialized FunctionDefs back to the TF_Graph
+    if graph_def.library and graph_def.library.function:
+      # pylint: disable=protected-access
+      functions = function._from_library(graph_def.library)
+      for f in functions:
+        f.add_to_graph(graph)
+      # pylint: enable=protected-access
+
+    # Treat input mappings that don't appear in the graph as an error, because
+    # they are likely to be due to a typo.
+    missing_unused_input_keys = (
+        c_api.TF_ImportGraphDefResultsMissingUnusedInputMappings_wrapper(
+            results))
+    if missing_unused_input_keys:
+      missing_unused_input_keys = [compat.as_str(s)
+                                   for s in missing_unused_input_keys]
+      raise ValueError(
+          'Attempted to map inputs that were not found in graph_def: [%s]'
+          % ', '.join(missing_unused_input_keys))
+
     if return_elements is None:
       return None
     else:
@@ -359,16 +520,7 @@ def import_graph_def(graph_def, input_map=None, return_elements=None,
       # more nuanced.
       g.graph_def_versions.CopyFrom(graph_def.versions)
 
-      if not all(isinstance(v, ops.Tensor) for v in input_map.values()):
-        if not scope:
-          # The caller must have passed `name=''`.
-          raise ValueError(
-              'tf.import_graph_def() requires a non-empty `name` if `input_map`'
-              ' contains non-Tensor values. Try calling tf.convert_to_tensor() '
-              'on `input_map` values before calling tf.import_graph_def().')
-        with ops.name_scope('_inputs'):
-          input_map = {k: ops.convert_to_tensor(v)
-                       for k, v in input_map.items()}
+      input_map = _ConvertInputMapValues(name, input_map)
 
       # NOTE(mrry): We do this in two passes, because there may be a cycle in
       # `graph_def`.
@@ -388,21 +540,6 @@ def import_graph_def(graph_def, input_map=None, return_elements=None,
             value = node.attr[key]
             if value is None or value.WhichOneof('value') is None:
               node.attr[key].CopyFrom(attr_def.default_value)
-        if producer_op_dict:
-          # Remove any default attr values that aren't in op_def.
-          if node.op in producer_op_dict:
-            producer_op_def = producer_op_dict[node.op]
-            # We make a copy of node.attr to iterate through since we
-            # may modify node.attr inside the loop.
-            for key in list(node.attr):
-              if _FindAttrInOpDef(key, op_def) is None:
-                # No attr_def in consumer, look in producer.
-                attr_def = _FindAttrInOpDef(key, producer_op_def)
-                if (attr_def and attr_def.HasField('default_value') and
-                    node.attr[key] == attr_def.default_value):
-                  # Unknown attr had default value in producer, delete it
-                  # so it can be understood by consumer.
-                  del node.attr[key]
 
         output_types = _OutputTypes(node, op_dict)
         name_to_op[node.name] = g.create_op(
@@ -505,13 +642,13 @@ def import_graph_def(graph_def, input_map=None, return_elements=None,
                   node, 'Input tensor %r %s' % (input_name, te)))
 
         # pylint: disable=protected-access
-        if op._input_dtypes != input_types:
+        if op._input_types != input_types:
           raise ValueError(
               _InvalidNodeMessage(
                   node,
                   'Input types mismatch (expected %r but got %r)'
                   % (', '.join(dtypes.as_dtype(x).name for x in input_types),
-                     ', '.join(x.name for x in op._input_dtypes))))
+                     ', '.join(x.name for x in op._input_types))))
         # pylint: enable=protected-access
 
         if not g._is_function(op.type):  # pylint: disable=protected-access
diff --git a/tensorflow/python/framework/importer_test.py b/tensorflow/python/framework/importer_test.py
index 5a6187c8a6df1d4c076722c7655d4bd1b276c211..acaec37f810cb00daa9bae17ffbcb675648b9fe1 100644
--- a/tensorflow/python/framework/importer_test.py
+++ b/tensorflow/python/framework/importer_test.py
@@ -34,6 +34,7 @@ from tensorflow.python.framework import test_ops  # pylint: disable=unused-impor
 from tensorflow.python.framework import test_util
 from tensorflow.python.framework import versions
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
@@ -111,8 +112,6 @@ class ImportGraphDefTest(test.TestCase):
       self.assertNotEqual(None, a.op_def)
 
   def testMultipleImport(self):
-    if ops._USE_C_API: return  # TODO(skyewm): set uniquify_names
-
     graph_def = self._MakeGraphDef("""
     node { name: 'A' op: 'IntOutput' }
     node { name: 'B' op: 'IntInput' input: 'A:0' }
@@ -156,16 +155,16 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(list(b3.inputs), [a3.outputs[0]])
 
       # Import with existing de-duped node names
-      a4, b4 = importer.import_graph_def(
+      a1_1, b1_1 = importer.import_graph_def(
           self._MakeGraphDef("""
           node { name: 'A_1' op: 'IntOutput' }
           node { name: 'B_1' op: 'IntInput' input: 'A_1:0' }
           """),
           return_elements=["A_1", "B_1"],
           name="")
-      self.assertEqual(a4.name, "A_1_1")
-      self.assertEqual(b4.name, "B_1_1")
-      self.assertEqual(list(b4.inputs), [a4.outputs[0]])
+      self.assertEqual(a1_1.name, "A_1_1")
+      self.assertEqual(b1_1.name, "B_1_1")
+      self.assertEqual(list(b1_1.inputs), [a1_1.outputs[0]])
 
       # Create a name scope and then import node with same name
       with ops.name_scope("foo"):
@@ -201,8 +200,6 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(outer_inner_c.name, "outer/inner/c_1")
 
   def testInputMap(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     with ops.Graph().as_default():
       feed_a_0 = constant_op.constant(0, dtype=dtypes.int32)
       feed_b_1 = constant_op.constant(1, dtype=dtypes.int32)
@@ -230,8 +227,6 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(d.inputs[1], feed_b_1)
 
   def testInputMapBytes(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     with ops.Graph().as_default():
       feed_a_0 = constant_op.constant(0, dtype=dtypes.int32)
       feed_b_1 = constant_op.constant(1, dtype=dtypes.int32)
@@ -259,8 +254,6 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(d.inputs[1], feed_b_1)
 
   def testInputMapUnicode(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     with ops.Graph().as_default():
       feed_a_0 = constant_op.constant(0, dtype=dtypes.int32)
       feed_b_1 = constant_op.constant(1, dtype=dtypes.int32)
@@ -299,8 +292,6 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(b.inputs[0], a.outputs[0])
 
   def testInputMapImplicitZerothOutput(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     with ops.Graph().as_default():
       feed_a_0 = constant_op.constant(0, dtype=dtypes.int32)
       b, = importer.import_graph_def(
@@ -341,47 +332,46 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(d.inputs[1], b.outputs[0])
 
       self.assertEqual(a.outputs[0].dtype, dtypes.int32_ref)
-      self.assertEqual(c._input_dtypes, [dtypes.int32, dtypes.int32])
+      self.assertEqual(c._input_types, [dtypes.int32, dtypes.int32])
       self.assertEqual(c.outputs, [])
-      self.assertEqual(d._input_dtypes, [dtypes.int32_ref, dtypes.int32])
+      self.assertEqual(d._input_types, [dtypes.int32_ref, dtypes.int32])
       self.assertEqual(d.outputs, [])
 
-  def testCyclic(self):
-    # Importing cycles not supported with C API enabled (this test will
-    # eventually be deleted).
-    # TODO(skyewm): write while loop test
-    if ops._USE_C_API: return
+  def testWhileLoop(self):
+    # Produce GraphDef containing while loop.
+    graph = ops.Graph()
+    with graph.as_default():
+      r = control_flow_ops.while_loop(lambda i: i < 10, lambda i: i + 1, [0])
+      # Add an op that consumes the while loop output.
+      math_ops.add(r, 1)
+    graph_def = graph.as_graph_def()
 
+    # Import the GraphDef and make sure it runs.
     with ops.Graph().as_default():
-      a, b = importer.import_graph_def(
-          self._MakeGraphDef("""
-          node { name: 'A' op: 'Unary'
-                 attr { key: 'T' value { type: DT_INT32 } } input: 'B:0' }
-          node { name: 'B' op: 'Unary'
-                 attr { key: 'T' value { type: DT_INT32 } } input: 'A:0' }
-          """),
-          return_elements=["A", "B"])
-
-      self.assertEqual(a.inputs[0], b.outputs[0])
-      self.assertEqual(b.inputs[0], a.outputs[0])
+      imported_r, = importer.import_graph_def(graph_def,
+                                              return_elements=[r.name])
+      self.assertEqual(imported_r.name, "import/" + r.name)
+      with self.test_session() as sess:
+        self.assertEqual(sess.run(imported_r), 10)
 
   def testTypeMismatchInGraphDef(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+    if ops._USE_C_API:
+      # TODO(skyewm): improve error message
+      error_msg = ("Input 0 of node import/B was passed int32 from import/A:0 "
+                   "incompatible with expected float.")
+    else:
+      error_msg = ("Cannot convert a tensor of type int32 to an input of type "
+                   "float")
 
     with ops.Graph().as_default():
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'A' op: 'IntOutput' }
             node { name: 'B' op: 'FloatInput' input: 'A:0' }
             """))
-      self.assertTrue(
-          "Cannot convert a tensor of type int32 to an input of type float" in
-          str(e.exception))
 
   def testShapeWhitelist(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     # Barrier's shape is an output vector of 2, but the
     # graph says it's a scalar.  This is currently whitelisted.
     with ops.Graph().as_default():
@@ -389,14 +379,14 @@ class ImportGraphDefTest(test.TestCase):
           self._MakeGraphDef("""
           node { name: 'A' op: 'Barrier'
                  attr { key: '_output_shapes'
-                        value { list { shape { } } } } }
+                        value { list { shape { } } } }
+                 attr { key: 'component_types'
+                        value { list { type: DT_FLOAT } } } }
           """),
           return_elements=["A"],
           name="import")
 
   def testShapeWhitelistViolation(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     # L2 loss produces a scalar shape, but the graph
     # has the wrong shape, so raise an error.
     with ops.Graph().as_default():
@@ -416,45 +406,51 @@ class ImportGraphDefTest(test.TestCase):
             "Shapes () and (43,) are not compatible" in str(e.exception))
 
   def testInvalidSignatureTooManyInputsInGraphDef(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+    if ops._USE_C_API:
+      # TODO(skyewm): improve error message
+      error_msg = "NodeDef expected inputs '' do not match 1 inputs specified"
+    else:
+      error_msg = r"More inputs specified \('A:0'\) than the op expects"
 
     with ops.Graph().as_default():
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'A' op: 'IntOutput' }
             node { name: 'B' op: 'None' input: 'A:0' }
             """))
-      self.assertTrue("More inputs specified ('A:0') than the op expects" in
-                      str(e.exception))
 
   def testInvalidSignatureNotEnoughInputsInGraphDef(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+    if ops._USE_C_API:
+      # TODO(skyewm): improve error message
+      error_msg = ("NodeDef expected inputs 'int32, float' do not match 1 "
+                   "inputs specified")
+    else:
+      error_msg = (r"Input types mismatch \(expected 'int32, float32' but "
+                   r"got 'int32'\)")
 
     with ops.Graph().as_default():
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'A' op: 'IntOutput' }
             node { name: 'B' op: 'IntInputFloatInput' input: 'A:0' }
             """))
-      self.assertTrue("Input types mismatch (expected 'int32, float32' but "
-                      "got 'int32')" in str(e.exception))
 
   def testMissingInputOpInGraphDef(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+    if ops._USE_C_API:
+      error_msg = "Node 'B': Unknown input node 'A:0'"
+    else:
+      error_msg = "Input tensor 'A:0' not found"
 
     with ops.Graph().as_default():
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'B' op: 'FloatInput' input: 'A:0' }
             """))
-      self.assertTrue("Input tensor 'A:0' not found" in str(e.exception))
 
   def testMissingInputOpInGraphDefButAppearsInInputMap(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     with ops.Graph().as_default():
       feed_a_0 = constant_op.constant(5.0)
       b, = importer.import_graph_def(
@@ -466,111 +462,124 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(b.inputs[0], feed_a_0)
 
   def testMissingInputTensorInGraphDef(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+    if ops._USE_C_API:
+      error_msg = ("Node 'B': Connecting to invalid output 1 of source node A "
+                   "which has 1 outputs")
+    else:
+      error_msg = "Input tensor 'A:1' not found"
 
     with ops.Graph().as_default():
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'A' op: 'FloatOutput' }
             node { name: 'B' op: 'FloatInput' input: 'A:1' }
             """))
-      self.assertTrue("Input tensor 'A:1' not found" in str(e.exception))
 
   def testMissingControlInputInGraphDef(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+    if ops._USE_C_API:
+      error_msg = r"Node 'B': Unknown input node '\^A'"
+    else:
+      error_msg = r"Control input '\^A' not found"
 
     with ops.Graph().as_default():
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'B' op: 'None' input: '^A' }
             """))
-      self.assertTrue("Control input '^A' not found" in str(e.exception))
 
   def testInvalidTensorNameOutputIndexInGraphDef(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+    if ops._USE_C_API:
+      error_msg = "Node 'B': Unknown input node 'A:B'"
+    else:
+      error_msg = "Cannot convert 'A:B' to a tensor name."
 
     with ops.Graph().as_default():
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'B' op: 'None' input: 'A:B' }
             """))
-      self.assertEqual("Cannot convert 'A:B' to a tensor name.",
-                       str(e.exception))
 
   def testInvalidTensorNameInGraphDef(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+    if ops._USE_C_API:
+      error_msg = "Node 'B': Unknown input node 'A:B:0'"
+    else:
+      error_msg = "Cannot convert 'A:B:0' to a tensor name."
 
     with ops.Graph().as_default():
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'B' op: 'None' input: 'A:B:0' }
             """))
-      self.assertEqual("Cannot convert 'A:B:0' to a tensor name.",
-                       str(e.exception))
 
   def testMissingReturnOperation(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+    if ops._USE_C_API:
+      error_msg = "Requested return node 'B' not found in graph def"
+    else:
+      error_msg = "return_element 'B' not found in graph_def."
 
     with ops.Graph().as_default():
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'A' op: 'None' }
             """),
             return_elements=["B"])
-      self.assertTrue(
-          "return_element 'B' not found in graph_def." in str(e.exception))
 
   def testMissingReturnTensor(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+    if ops._USE_C_API:
+      error_msg = (r"Invalid return output 1 of node 'A', which has 1 "
+                   r"output\(s\)")
+    else:
+      error_msg = "return_element 'A:1' not found in graph_def."
 
     with ops.Graph().as_default():
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'A' op: 'IntOutput' }
             """),
             return_elements=["A:1"])
-      self.assertTrue(
-          "return_element 'A:1' not found in graph_def." in str(e.exception))
 
-      with self.assertRaises(ValueError) as e:
+      if ops._USE_C_API:
+        error_msg = "Requested return tensor 'B:0' not found in graph def"
+      else:
+        error_msg = "return_element 'B:0' not found in graph_def."
+
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'A' op: 'IntOutput' }
             """),
             return_elements=["B:0"])
-      self.assertTrue(
-          "return_element 'B:0' not found in graph_def." in str(e.exception))
 
-      with self.assertRaises(ValueError) as e:
+      if ops._USE_C_API:
+        error_msg = "Cannot convert 'A:B:0' to a tensor name."
+      else:
+        error_msg = "return_element 'A:B:0' not found in graph_def."
+
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'A' op: 'IntOutput' }
             """),
             return_elements=["A:B:0"])
-      self.assertTrue(
-          "return_element 'A:B:0' not found in graph_def." in str(e.exception))
 
   def testMissingInputMap(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     with ops.Graph().as_default():
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(
+          ValueError,
+          r"Attempted to map inputs that were not found in graph_def: \[B:0\]"):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'A' op: 'None' }
             """),
             input_map={"B:0": constant_op.constant(5.0)})
-      self.assertTrue("not found in graph_def: [B:0]" in str(e.exception))
 
   def testInputMapUnusedAsInput(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     with ops.Graph().as_default():
       # Mapping an unused node output should succeed.
       importer.import_graph_def(
@@ -580,28 +589,30 @@ class ImportGraphDefTest(test.TestCase):
           input_map={"A:0": constant_op.constant(5.0)})
 
       # Mapping a non-existent output of an existing node should fail.
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(
+          ValueError,
+          r"Attempted to map inputs that were not found in graph_def: \[A:2\]"):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'A' op: 'IntOutput' }
             """),
             input_map={"A:2": constant_op.constant(5.0)})
-      self.assertTrue("not found in graph_def: [A:2]" in str(e.exception))
 
   def testInputMapTypeMismatch(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
+    if ops._USE_C_API:
+      error_msg = ("Input 0 of node import/B was passed float from Const:0 "
+                   "incompatible with expected int32.")
+    else:
+      error_msg = ("Cannot convert a tensor of type float32 to an input of "
+                   "type int32.")
     with ops.Graph().as_default():
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'A' op: 'IntOutput' }
             node { name: 'B' op: 'IntInput' input: 'A:0' }
             """),
             input_map={"A:0": constant_op.constant(5.0)})
-      self.assertTrue(
-          "Cannot convert a tensor of type float32 to an input of type int32."
-          in str(e.exception))
 
   def testNoReturns(self):
     with ops.Graph().as_default() as g:
@@ -651,8 +662,6 @@ class ImportGraphDefTest(test.TestCase):
           b.node_def.attr["_class"])
 
   def testColocationWithDeviceFn(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     original_graph_def = self._MakeGraphDef("""
           node { name: 'A' op: 'None' attr {
             key: '_class'
@@ -674,23 +683,17 @@ class ImportGraphDefTest(test.TestCase):
 
     with ops.Graph().as_default():
       with ops.device(CustomDeviceFn):
-        b, = importer.import_graph_def(
-            original_graph_def, return_elements=["B"], name="imported_graph")
-
-      self.assertProtoEqualsVersion("""
-          node { name: 'imported_graph/A' op: 'None' device: "/device:A:0"
-                attr {
-                  key: '_class' value { list { s: 'loc:@imported_graph/A' } }
-                }
-          }
-          node { name: 'imported_graph/B' op: 'None' device: "/device:A:0"
-                attr {
-                  key: '_class' value { list { s: 'loc:@imported_graph/A' } }
-          } }""", b.graph.as_graph_def())
-
-    # Test a scenario where 'A' doesn't get a device; 'A' should
-    # not have a device, but during runtime will get colocated with
-    # 'B' because of the colocation attribute.
+        a, b = importer.import_graph_def(original_graph_def,
+                                         return_elements=["A", "B"],
+                                         name="imported_graph")
+      self.assertEqual(a.device, "/device:A:0")
+      self.assertEqual(b.device, "/device:A:0")
+      self.assertEqual(a.colocation_groups(), [b"loc:@imported_graph/A"])
+      self.assertEqual(b.colocation_groups(), [b"loc:@imported_graph/A"])
+
+    # Test a scenario where 'A' doesn't get a device; 'A' should not have a
+    # device, but during runtime will get colocated with 'B' because of the
+    # colocation attribute. B's device function is still overridden by A.
     def BDeviceFn(op):
       if "B" in op.name:
         return "/device:B:0"
@@ -698,19 +701,13 @@ class ImportGraphDefTest(test.TestCase):
 
     with ops.Graph().as_default():
       with ops.device(BDeviceFn):
-        b, = importer.import_graph_def(
-            original_graph_def, return_elements=["B"], name="imported_graph")
-
-      self.assertProtoEqualsVersion("""
-          node { name: 'imported_graph/A' op: 'None'
-                attr {
-                  key: '_class' value { list { s: 'loc:@imported_graph/A' } }
-                }
-          }
-          node { name: 'imported_graph/B' op: 'None'
-                attr {
-                  key: '_class' value { list { s: 'loc:@imported_graph/A' } }
-          } }""", b.graph.as_graph_def())
+        a, b = importer.import_graph_def(original_graph_def,
+                                         return_elements=["A", "B"],
+                                         name="imported_graph")
+      self.assertEqual(a.device, "")
+      self.assertEqual(b.device, "")
+      self.assertEqual(a.colocation_groups(), [b"loc:@imported_graph/A"])
+      self.assertEqual(b.colocation_groups(), [b"loc:@imported_graph/A"])
 
     # Only A gets a device, so B inherits it implicitly.
     def ADeviceFn(op):
@@ -720,23 +717,15 @@ class ImportGraphDefTest(test.TestCase):
 
     with ops.Graph().as_default():
       with ops.device(ADeviceFn):
-        b, = importer.import_graph_def(
-            original_graph_def, return_elements=["B"], name="imported_graph")
-
-      self.assertProtoEqualsVersion("""
-          node { name: 'imported_graph/A' op: 'None' device: "/device:A:0"
-                attr {
-                  key: '_class' value { list { s: 'loc:@imported_graph/A' } }
-                }
-          }
-          node { name: 'imported_graph/B' op: 'None' device: "/device:A:0"
-                attr {
-                  key: '_class' value { list { s: 'loc:@imported_graph/A' } }
-          } }""", b.graph.as_graph_def())
+        a, b = importer.import_graph_def(original_graph_def,
+                                         return_elements=["A", "B"],
+                                         name="imported_graph")
+      self.assertEqual(a.device, "/device:A:0")
+      self.assertEqual(b.device, "/device:A:0")
+      self.assertEqual(a.colocation_groups(), [b"loc:@imported_graph/A"])
+      self.assertEqual(b.colocation_groups(), [b"loc:@imported_graph/A"])
 
   def testMultipleColocationWithDeviceFn(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     original_graph_def = self._MakeGraphDef("""
           node { name: 'A' op: 'None'}
           node { name: 'B' op: 'None'}
@@ -757,24 +746,18 @@ class ImportGraphDefTest(test.TestCase):
 
     with ops.Graph().as_default():
       with ops.device(CustomDeviceFn):
-        c, = importer.import_graph_def(
-            original_graph_def, return_elements=["C"], name="imported_graph")
-
-      self.assertProtoEqualsVersion("""
-          node { name: 'imported_graph/A' op: 'None' }
-          node { name: 'imported_graph/B' op: 'None' device: "/device:B:0" }
-          node { name: 'imported_graph/C' op: 'None' device: "/device:B:0"
-                 attr {
-                   key: '_class' value {
-                     list { s: 'loc:@imported_graph/A'
-                            s: 'loc:@imported_graph/B' }
-                   }
-                 }
-               }""", c.graph.as_graph_def())
+        a, b, c = importer.import_graph_def(original_graph_def,
+                                            return_elements=["A", "B", "C"],
+                                            name="imported_graph")
+      self.assertEqual(a.device, "")
+      self.assertEqual(b.device, "/device:B:0")
+      self.assertEqual(c.device, "/device:B:0")
+      self.assertEqual(a.colocation_groups(), [b"loc:@imported_graph/A"])
+      self.assertEqual(b.colocation_groups(), [b"loc:@imported_graph/B"])
+      self.assertEqual(c.colocation_groups(),
+                       [b"loc:@imported_graph/A", b"loc:@imported_graph/B"])
 
   def testNamePrefixColocationAttrsMultipleImport(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     original_graph_def = self._MakeGraphDef("""
           node { name: 'A' op: 'None' }
           node { name: 'B' op: 'None'  attr {
@@ -783,32 +766,33 @@ class ImportGraphDefTest(test.TestCase):
           } }""")
 
     with ops.Graph().as_default():
-      b, = importer.import_graph_def(
-          original_graph_def, return_elements=["B"], name="")
-      _, = importer.import_graph_def(
-          original_graph_def, return_elements=["B"], name="")
-      self.assertProtoEqualsVersion("""
-          node { name: 'A' op: 'None' }
-          node { name: 'B' op: 'None'  attr {
-            key: '_class'
-            value { list { s: 'loc:@A' } }
-          } }
-          node { name: 'A_1' op: 'None' }
-          node { name: 'B_1' op: 'None'  attr {
-            key: '_class'
-            value { list { s: 'loc:@A_1' } }
-          } }""", b.graph.as_graph_def())
+      a, b = importer.import_graph_def(
+          original_graph_def, return_elements=["A", "B"], name="")
+      a_1, b_1 = importer.import_graph_def(
+          original_graph_def, return_elements=["A", "B"], name="")
 
-  def testNamePrefixColocationAttrsNotFound(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+      self.assertEqual(a.name, "A")
+      self.assertEqual(b.name, "B")
+      self.assertEqual(b.colocation_groups(), [b"loc:@A"])
 
+      self.assertEqual(a_1.name, "A_1")
+      self.assertEqual(b_1.name, "B_1")
+      self.assertEqual(b_1.colocation_groups(), [b"loc:@A_1"])
+
+  def testNamePrefixColocationAttrsNotFound(self):
     original_graph_def = self._MakeGraphDef("""
           node { name: 'B' op: 'None'  attr {
             key: '_class'
             value { list { s: 'loc:@A' } }
           } }""")
+
+    if ops._USE_C_API:
+      error_msg = "Node 'B' expects to be colocated with unknown node 'A'"
+    else:
+      error_msg = "does not exist during import"
+
     with ops.Graph().as_default():
-      with self.assertRaisesRegexp(ValueError, "does not exist during import"):
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             original_graph_def, return_elements=["B"], name="imported_graph")
 
@@ -825,8 +809,6 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual("graph_def must be a GraphDef proto.", str(e.exception))
 
   def testInvalidInputForInputMap(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     with ops.Graph().as_default():
       with self.assertRaises(TypeError) as e:
         importer.import_graph_def(
@@ -858,10 +840,9 @@ class ImportGraphDefTest(test.TestCase):
 
   def testInvalidInputForReturnOperations(self):
     with ops.Graph().as_default():
-      with self.assertRaises(TypeError) as e:
+      with self.assertRaisesRegexp(
+          TypeError, "return_elements must be a list of strings."):
         importer.import_graph_def(self._MakeGraphDef(""), return_elements=[7])
-      self.assertEqual("return_elements must be a list of strings.",
-                       str(e.exception))
 
       if ops._USE_C_API:
         error_msg = "Cannot convert 'a:b:c' to a tensor name."
@@ -872,17 +853,19 @@ class ImportGraphDefTest(test.TestCase):
                                   return_elements=["a:b:c"])
 
   def testDuplicateOperationNames(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
+    if ops._USE_C_API:
+      error_msg = "Node 'A' is not unique"
+    else:
+      error_msg = "Duplicate name 'A' in GraphDef."
 
     with ops.Graph().as_default():
-      with self.assertRaises(ValueError) as e:
+      with self.assertRaisesRegexp(ValueError, error_msg):
         importer.import_graph_def(
             self._MakeGraphDef("""
             node { name: 'A' op: 'IntOutput' }
             node { name: 'B' op: 'IntOutput' }
             node { name: 'A' op: 'IntOutput' }
             """))
-      self.assertEqual("Duplicate name 'A' in GraphDef.", str(e.exception))
 
   def testWithExtensionAndAttr(self):
     with ops.Graph().as_default() as g:
@@ -895,8 +878,6 @@ class ImportGraphDefTest(test.TestCase):
       self.assertAllEqual(pack.outputs[0].eval(), [5.0, 5.0])
 
   def testWithDevice(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     with ops.Graph().as_default() as g:
       # No device.
       a = constant_op.constant(3.0, name="a")
@@ -940,8 +921,6 @@ class ImportGraphDefTest(test.TestCase):
         self.assertEqual(c.device + "/device:GPU:0", c5.device)
 
   def testWithDeviceFunctionDependingOnInputs(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     with ops.Graph().as_default() as g:
       with ops.device("/job:ps"):
         v1 = constant_op.constant(1.0)
@@ -967,8 +946,6 @@ class ImportGraphDefTest(test.TestCase):
     self.assertEqual(2, len(ops_with_two_inputs))
 
   def testGradient(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     with ops.Graph().as_default() as g:
       inputs = array_ops.placeholder(
           dtypes.float32, shape=[None, 100], name="input")
@@ -1046,23 +1023,26 @@ class ImportGraphDefTest(test.TestCase):
             sess.run(x)
 
   def testVersionHigh(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     with ops.Graph().as_default() as g:
       pat = (r"GraphDef min consumer version %d above current version %d "
              r"for TensorFlow \S+\.  Please upgrade TensorFlow\.$" %
              (1 << 30, versions.GRAPH_DEF_VERSION))
-      importer.import_graph_def(self._MakeGraphDef("", min_consumer=1 << 30))
-      x = constant_op.constant(
-          7)  # Need at least one op to get a C++ graph generated
-      with self.test_session(graph=g) as sess:
-        with self.assertRaisesRegexp(Exception, pat):
-          sess.run(x)
+
+      if ops._USE_C_API:
+        with self.assertRaisesRegexp(ValueError, pat):
+          importer.import_graph_def(self._MakeGraphDef("",
+                                                       min_consumer=1 << 30))
+      else:
+        # Python API only throws when graph is run
+        importer.import_graph_def(self._MakeGraphDef("", min_consumer=1 << 30))
+        x = constant_op.constant(
+            7)  # Need at least one op to get a C++ graph generated
+        with self.test_session(graph=g) as sess:
+          with self.assertRaisesRegexp(Exception, pat):
+            sess.run(x)
 
   def testVersionAppliesToOpConstruction(self):
     """These tests rely on shape fns in test_ops.cc."""
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     with ops.Graph().as_default():
       importer.import_graph_def(
           self._MakeGraphDef(
@@ -1089,8 +1069,6 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(123.0, a[0].get_attr("default_float"))
 
   def testDefaultAttrsRemoved(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     producer_op_list = op_def_pb2.OpList()
     text_format.Merge("""
       op {
@@ -1107,23 +1085,28 @@ class ImportGraphDefTest(test.TestCase):
           """),
           return_elements=["A"],
           producer_op_list=producer_op_list)
-      with self.assertRaisesRegexp(ValueError, "No attr named 'default_int'"):
+      if ops._USE_C_API:
+        error_msg = "Operation 'import/A' has no attr named 'default_int'."
+      else:
+        error_msg = "No attr named 'default_int'"
+      with self.assertRaisesRegexp(ValueError, error_msg):
         a[0].get_attr("default_int")
 
-    # Attr only in producer_op_list with non-default value is preserved.
-    with ops.Graph().as_default():
-      a = importer.import_graph_def(
-          self._MakeGraphDef("""
-          node { name: 'A' op: 'OpWithFutureDefaultAttr'
-                 attr { key: 'default_int' value { i: 987 } } }
-          """),
-          return_elements=["A"],
-          producer_op_list=producer_op_list)
-      self.assertEqual(987, a[0].get_attr("default_int"))
+    # Unknown attrs cannot be imported using C API. This test will eventually be
+    # deleted.
+    if not ops._USE_C_API:
+      # Attr only in producer_op_list with non-default value is preserved.
+      with ops.Graph().as_default():
+        a = importer.import_graph_def(
+            self._MakeGraphDef("""
+            node { name: 'A' op: 'OpWithFutureDefaultAttr'
+                   attr { key: 'default_int' value { i: 987 } } }
+            """),
+            return_elements=["A"],
+            producer_op_list=producer_op_list)
+        self.assertEqual(987, a[0].get_attr("default_int"))
 
   def testFunctions(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     dtype = dtypes.float32
     @function.Defun(dtype, dtype, dtype, dtype)
     def Grad(x, y, dout1, dout2):  # pylint: disable=unused-argument
@@ -1201,8 +1184,6 @@ class ImportGraphDefTest(test.TestCase):
         self.assertEqual(sess.run("outer:0"), 21)
 
   def testImportInsideDefun(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     g = ops.Graph()
     with g.as_default():
       @function.Defun()
@@ -1226,8 +1207,6 @@ class ImportGraphDefTest(test.TestCase):
       self.assertEqual(z_val, -2.0)
 
   def testImportGraphWithFunctionTwice(self):
-    if ops._USE_C_API: return  # TODO(skyewm): make this work with C API
-
     g = ops.Graph()
     with g.as_default():
       @function.Defun()
diff --git a/tensorflow/python/framework/meta_graph.py b/tensorflow/python/framework/meta_graph.py
index 44ddc013b2817956b95bf4da068b2fb77f87a07c..fc1a82361ba59cddc02a65a96da98283d871fd2c 100644
--- a/tensorflow/python/framework/meta_graph.py
+++ b/tensorflow/python/framework/meta_graph.py
@@ -31,6 +31,7 @@ from tensorflow.core.framework import graph_pb2
 from tensorflow.core.framework import op_def_pb2
 from tensorflow.core.protobuf import meta_graph_pb2
 from tensorflow.core.protobuf import saver_pb2
+from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.eager import context
 from tensorflow.python.framework import graph_io
 from tensorflow.python.framework import importer
@@ -442,6 +443,67 @@ def add_collection_def(meta_graph_def, key, graph=None,
     return
 
 
+def _is_default_attr_value(op_def, attr_name, attr_value):
+  """Checks if given attribute matches the default value in the op def."""
+  for attr_def in op_def.attr:
+    if attr_def.name == attr_name:
+      if not attr_def.HasField("default_value"):
+        return False
+      # pywrap_tensorflow.EqualAttrValueWrapper returns an empty string
+      # if both arguments represent an equivalent AttrValue instance.
+      return not pywrap_tensorflow.EqualAttrValueWrapper(
+          attr_value.SerializeToString(),
+          attr_def.default_value.SerializeToString())
+  return False
+
+
+def _strip_graph_default_valued_attrs(meta_graph_def):
+  """Strips default valued attributes for node defs in given MetaGraphDef.
+
+  This method also sets `meta_info_def.stripped_default_attrs` in the given
+  `MetaGraphDef` proto to True.
+
+  Args:
+    meta_graph_def: `MetaGraphDef` protocol buffer
+
+  Returns:
+    None.
+  """
+  # Map function op names to their function definitions.
+  op_name_to_function = {}
+  for function_def in meta_graph_def.graph_def.library.function:
+    op_name_to_function[function_def.signature.name] = function_def
+
+  # Get all registered ops.
+  registered_ops = op_def_registry.get_registered_ops()
+
+  def _strip_node_default_valued_attrs(node_def):
+    """Removes default valued attributes from a single node def."""
+    if node_def.op in op_name_to_function or node_def.op not in registered_ops:
+      return
+    op_def = registered_ops[node_def.op]
+
+    attrs_to_strip = set()
+    for attr_name, attr_value in node_def.attr.items():
+      if _is_default_attr_value(op_def, attr_name, attr_value):
+        attrs_to_strip.add(attr_name)
+
+    for attr in attrs_to_strip:
+      del node_def.attr[attr]
+
+  # Process all NodeDef instances in graph_def.
+  for node_def in meta_graph_def.graph_def.node:
+    _strip_node_default_valued_attrs(node_def)
+
+  # Process all NodeDef instances in graph_def.library.function.
+  for function_def in meta_graph_def.graph_def.library.function:
+    for function_node_def in function_def.node_def:
+      _strip_node_default_valued_attrs(function_node_def)
+
+  # Tell consumers of this graph that default valued attrs have been stripped.
+  meta_graph_def.meta_info_def.stripped_default_attrs = True
+
+
 def create_meta_graph_def(meta_info_def=None,
                           graph_def=None,
                           saver_def=None,
@@ -449,7 +511,9 @@ def create_meta_graph_def(meta_info_def=None,
                           graph=None,
                           export_scope=None,
                           exclude_nodes=None,
-                          clear_extraneous_savers=False):
+                          clear_extraneous_savers=False,
+                          strip_default_attrs=False):
+  # pylint: disable=line-too-long
   """Construct and returns a `MetaGraphDef` protocol buffer.
 
   Args:
@@ -464,12 +528,17 @@ def create_meta_graph_def(meta_info_def=None,
     clear_extraneous_savers: Remove any preexisting SaverDefs from the SAVERS
         collection.  Note this method does not alter the graph, so any
         extraneous Save/Restore ops should have been removed already, as needed.
+    strip_default_attrs: Boolean. If `True`, default-valued attributes will be
+        removed from the NodeDefs. For a detailed guide, see
+        [Stripping Default-Valued Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes).
+
   Returns:
     MetaGraphDef protocol buffer.
 
   Raises:
     TypeError: If the arguments are not of the correct proto buffer type.
   """
+  # pylint: enable=line-too-long
   # Type check.
   if graph and not isinstance(graph, ops.Graph):
     raise TypeError("graph must be of type Graph, not %s", type(graph))
@@ -511,6 +580,10 @@ def create_meta_graph_def(meta_info_def=None,
         stripped_op_list_for_graph(meta_graph_def.graph_def))
   # pylint: enable=g-explicit-length-test
 
+  # Strip default valued attributes in graph_def.
+  if strip_default_attrs:
+    _strip_graph_default_valued_attrs(meta_graph_def)
+
   # Adds saver_def.
   if saver_def:
     meta_graph_def.saver_def.MergeFrom(saver_def)
@@ -655,13 +728,14 @@ def import_scoped_meta_graph(meta_graph_or_file,
     if clear_devices:
       for node in input_graph_def.node:
         node.device = ""
+
+    scope_to_prepend_to_names = graph.unique_name(
+        import_scope or "", mark_as_used=False)
+
     importer.import_graph_def(
         input_graph_def, name=(import_scope or ""), input_map=input_map,
         producer_op_list=producer_op_list)
 
-    scope_to_prepend_to_names = "/".join(
-        [part for part in [graph.get_name_scope(), import_scope] if part])
-
     # Restores all the other collections.
     for key, col_def in sorted(meta_graph_def.collection_def.items()):
       # Don't add unbound_inputs to the new graph.
@@ -724,6 +798,7 @@ def export_scoped_meta_graph(filename=None,
                              clear_devices=False,
                              saver_def=None,
                              clear_extraneous_savers=False,
+                             strip_default_attrs=False,
                              **kwargs):
   """Returns `MetaGraphDef` proto. Optionally writes it to filename.
 
@@ -752,6 +827,8 @@ def export_scoped_meta_graph(filename=None,
     clear_extraneous_savers: Remove any Saver-related information from the
         graph (both Save/Restore ops and SaverDefs) that are not associated
         with the provided SaverDef.
+    strip_default_attrs: Set to true if default valued attributes must be
+        removed while exporting the GraphDef.
     **kwargs: Optional keyed arguments, including meta_info_def and
         collection_list.
 
@@ -773,6 +850,7 @@ def export_scoped_meta_graph(filename=None,
     if graph_def:
       new_graph_def = graph_pb2.GraphDef()
       new_graph_def.versions.CopyFrom(graph_def.versions)
+      new_graph_def.library.CopyFrom(graph_def.library)
 
       if clear_extraneous_savers:
         exclude_nodes = _find_extraneous_saver_nodes(graph_def, saver_def)
@@ -799,7 +877,7 @@ def export_scoped_meta_graph(filename=None,
                                 export_scope,
                                 exclude_nodes):
           value = graph._nodes_by_id[key]
-      # pylint: enable=protected-access
+          # pylint: enable=protected-access
           node_def = _node_def(value.node_def, export_scope, unbound_inputs,
                                clear_devices=clear_devices)
           graph_def.node.extend([node_def])
@@ -810,6 +888,9 @@ def export_scoped_meta_graph(filename=None,
           bytesize += value.node_def.ByteSize()
           if bytesize >= (1 << 31) or bytesize < 0:
             raise ValueError("GraphDef cannot be larger than 2GB.")
+
+      graph._copy_functions_to_graph_def(graph_def, bytesize)  # pylint: disable=protected-access
+
     # It's possible that not all the inputs are in the export_scope.
     # If we would like such information included in the exported meta_graph,
     # add them to a special unbound_inputs collection.
@@ -833,6 +914,7 @@ def export_scoped_meta_graph(filename=None,
       exclude_nodes=exclude_nodes,
       clear_extraneous_savers=clear_extraneous_savers,
       saver_def=saver_def,
+      strip_default_attrs=strip_default_attrs,
       **kwargs)
 
   if filename:
@@ -877,3 +959,5 @@ def copy_scoped_meta_graph(from_scope, to_scope,
                                       graph=to_graph,
                                       import_scope=to_scope)
   return var_list
+
+
diff --git a/tensorflow/python/framework/meta_graph_test.py b/tensorflow/python/framework/meta_graph_test.py
index 4c22c913b850685bd6e50b03b5fbb09a01441b68..b5ed1352843eac31b3e34eb96385acd13a5bc7a9 100644
--- a/tensorflow/python/framework/meta_graph_test.py
+++ b/tensorflow/python/framework/meta_graph_test.py
@@ -24,6 +24,7 @@ import random
 import shutil
 
 from tensorflow.core.framework import graph_pb2
+from tensorflow.core.protobuf import meta_graph_pb2
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import function
@@ -58,6 +59,7 @@ def _TestDir(test_name):
 # pylint: enable=invalid-name
 
 
+@test_util.with_c_api
 class SimpleMetaGraphTest(test.TestCase):
 
   def testNoVariables(self):
@@ -102,7 +104,8 @@ class SimpleMetaGraphTest(test.TestCase):
       # Re-exports the current graph state for comparison to the original.
       new_meta_graph_def, _ = meta_graph.export_scoped_meta_graph(filename +
                                                                   "_new")
-      self.assertProtoEquals(meta_graph_def, new_meta_graph_def)
+      test_util.assert_meta_graph_protos_equal(self, meta_graph_def,
+                                               new_meta_graph_def)
 
       # Ensures that we can still get a reference to our graph collections.
       new_input_tensor = ops.get_collection("input_tensor")[0]
@@ -154,7 +157,110 @@ class SimpleMetaGraphTest(test.TestCase):
     op_list = meta_graph.stripped_op_list_for_graph(graph)
     self.assertEqual(["Const"], [op.name for op in op_list.op])
 
+  def testDefaultAttrStripping(self):
+    """Verifies that default attributes are stripped from a graph def."""
 
+    # Complex Op has 2 attributes with defaults:
+    #   o "T"    : float32.
+    #   o "Tout" : complex64.
+
+    # When inputs to the Complex Op are float32 instances, "T" maps to float32
+    # and "Tout" maps to complex64. Since these attr values map to their
+    # defaults, they must be stripped unless stripping of default attrs is
+    # disabled.
+    with self.test_session():
+      real_num = constant_op.constant(1.0, dtype=dtypes.float32, name="real")
+      imag_num = constant_op.constant(2.0, dtype=dtypes.float32, name="imag")
+      math_ops.complex(real_num, imag_num, name="complex")
+
+      # strip_default_attrs is enabled.
+      meta_graph_def, _ = meta_graph.export_scoped_meta_graph(
+          graph_def=ops.get_default_graph().as_graph_def(),
+          strip_default_attrs=True)
+      node_def = test_util.get_node_def_from_graph("complex",
+                                                   meta_graph_def.graph_def)
+      self.assertNotIn("T", node_def.attr)
+      self.assertNotIn("Tout", node_def.attr)
+      self.assertTrue(meta_graph_def.meta_info_def.stripped_default_attrs)
+
+      # strip_default_attrs is disabled.
+      meta_graph_def, _ = meta_graph.export_scoped_meta_graph(
+          graph_def=ops.get_default_graph().as_graph_def(),
+          strip_default_attrs=False)
+      node_def = test_util.get_node_def_from_graph("complex",
+                                                   meta_graph_def.graph_def)
+      self.assertIn("T", node_def.attr)
+      self.assertIn("Tout", node_def.attr)
+      self.assertFalse(meta_graph_def.meta_info_def.stripped_default_attrs)
+
+    # When inputs to the Complex Op are float64 instances, "T" maps to float64
+    # and "Tout" maps to complex128. Since these attr values don't map to their
+    # defaults, they must not be stripped.
+    with self.test_session(graph=ops.Graph()):
+      real_num = constant_op.constant(1.0, dtype=dtypes.float64, name="real")
+      imag_num = constant_op.constant(2.0, dtype=dtypes.float64, name="imag")
+      math_ops.complex(real_num, imag_num, name="complex")
+      meta_graph_def, _ = meta_graph.export_scoped_meta_graph(
+          graph_def=ops.get_default_graph().as_graph_def(),
+          strip_default_attrs=True)
+      node_def = test_util.get_node_def_from_graph("complex",
+                                                   meta_graph_def.graph_def)
+      self.assertEqual(node_def.attr["T"].type, dtypes.float64)
+      self.assertEqual(node_def.attr["Tout"].type, dtypes.complex128)
+      self.assertTrue(meta_graph_def.meta_info_def.stripped_default_attrs)
+
+  def testDefaultAttrStrippingNestedFunctions(self):
+    """Verifies that default attributes are stripped from function node defs."""
+    with self.test_session():
+      @function.Defun(dtypes.float32, dtypes.float32)
+      def f0(i, j):
+        return math_ops.complex(i, j, name="double_nested_complex")
+
+      @function.Defun(dtypes.float32, dtypes.float32)
+      def f1(i, j):
+        return f0(i, j)
+
+      _ = f1(constant_op.constant(1.0), constant_op.constant(2.0))
+      meta_graph_def, _ = meta_graph.export_scoped_meta_graph(
+          graph_def=ops.get_default_graph().as_graph_def(),
+          strip_default_attrs=True)
+
+      double_nested_complex_node_def = None
+      for function_def in meta_graph_def.graph_def.library.function:
+        for node_def in function_def.node_def:
+          if node_def.name.startswith("double_nested_complex"):
+            double_nested_complex_node_def = node_def
+            break
+        if double_nested_complex_node_def:
+          break
+
+      self.assertIsNotNone(double_nested_complex_node_def)
+      self.assertNotIn("T", double_nested_complex_node_def.attr)
+      self.assertNotIn("Tout", double_nested_complex_node_def.attr)
+      self.assertTrue(meta_graph_def.meta_info_def.stripped_default_attrs)
+
+  def testDefaultAttrStrippingUnregisteredOps(self):
+    """Verifies that nodes with un-registered ops are not stripped."""
+    graph_def = graph_pb2.GraphDef()
+    node = graph_def.node.add()
+    node.name = "node_with_unreg_op"
+    node.op = "unreg_op"
+    node.attr["attr_1"].i = 1
+
+    meta_info_def = meta_graph_pb2.MetaGraphDef.MetaInfoDef()
+    meta_info_def.stripped_op_list.op.add()
+
+    with self.test_session():
+      meta_graph_def = meta_graph.create_meta_graph_def(
+          meta_info_def=meta_info_def, graph_def=graph_def,
+          strip_default_attrs=True)
+      node_def = test_util.get_node_def_from_graph("node_with_unreg_op",
+                                                   meta_graph_def.graph_def)
+      self.assertEqual(node_def.attr["attr_1"].i, 1)
+      self.assertTrue(meta_graph_def.meta_info_def.stripped_default_attrs)
+
+
+@test_util.with_c_api
 class ScopedMetaGraphTest(test.TestCase):
 
   def _testScopedExport(self, test_dir, exported_filenames):
@@ -332,10 +438,13 @@ class ScopedMetaGraphTest(test.TestCase):
     ]
     orig_meta_graphs = self._testScopedExport(test_dir, filenames)
     new_meta_graphs = self._testScopedImport(test_dir, filenames)
-    # Delete the unbound_inputs to allow directly calling ProtoEqual.
-    del orig_meta_graphs[0].collection_def["unbound_inputs"]
-    del new_meta_graphs[0].collection_def["unbound_inputs"]
     for a, b in zip(orig_meta_graphs, new_meta_graphs):
+      # The unbound input strings are slightly different with the C API enabled
+      # ("images" vs "images:0") due to the original import_graph_def code
+      # vs. ImportGraphDef in C++.
+      # TODO(skyewm): update the pbtxts once _USE_C_API is removed.
+      del a.collection_def["unbound_inputs"]
+      del b.collection_def["unbound_inputs"]
       test_util.assert_meta_graph_protos_equal(self, a, b)
 
   def testScopedImportUnderNameScope(self):
@@ -353,6 +462,19 @@ class ScopedMetaGraphTest(test.TestCase):
         self.assertEqual(list(imported_variables.values())[0].name,
                          "foo/bar/myvar:0")
 
+  def testImportsUsingSameScopeName(self):
+    with ops.Graph().as_default():
+      variables.Variable(0, name="v")
+      meta_graph_def, _ = meta_graph.export_scoped_meta_graph()
+    with ops.Graph().as_default():
+      for suffix in ["", "_1"]:
+        imported_variables = meta_graph.import_scoped_meta_graph(
+            meta_graph_def, import_scope="s")
+        self.assertEqual(len(imported_variables), 1)
+        self.assertEqual(list(imported_variables.keys())[0], "v:0")
+        self.assertEqual(list(imported_variables.values())[0].name,
+                         "s" + suffix + "/v:0")
+
   def testScopedImportWithSelectedCollections(self):
     meta_graph_filename = os.path.join(
         _TestDir("selected_collections_import"), "meta_graph.pb")
@@ -456,7 +578,8 @@ class ScopedMetaGraphTest(test.TestCase):
                                                       "exported_queue1.pbtxt")
     new_meta_graph = self._testScopedImportWithQueue(
         test_dir, "exported_queue1.pbtxt", "exported_new_queue1.pbtxt")
-    self.assertProtoEquals(orig_meta_graph, new_meta_graph)
+    test_util.assert_meta_graph_protos_equal(self, orig_meta_graph,
+                                             new_meta_graph)
 
   # Verifies that we can export a subgraph in a nested name scope containing a
   # "hidden1/hidden2" and import it into "new_hidden1/new_hidden2" in a new
@@ -602,6 +725,7 @@ class ScopedMetaGraphTest(test.TestCase):
     self.assertEqual("", str(graph2.as_graph_element("matmul").device))
 
 
+@test_util.with_c_api
 class MetaGraphWithVariableScopeTest(test.TestCase):
 
   def testMetricsCollection(self):
@@ -659,6 +783,7 @@ class MetaGraphWithVariableScopeTest(test.TestCase):
         initializer = variables.local_variables_initializer()
 
 
+@test_util.with_c_api
 class ExportImportAcrossScopesTest(test.TestCase):
 
   def testPartionedVariables(self):
@@ -729,7 +854,7 @@ class ExportImportAcrossScopesTest(test.TestCase):
             if shared_name_value.s:
               node.attr[shared_name_attr].s = b""
 
-    self.assertProtoEquals(expected, result)
+    test_util.assert_meta_graph_protos_equal(self, expected, result)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/framework/op_def_library_test.py b/tensorflow/python/framework/op_def_library_test.py
index 715e863b787b41f81a0f3a8ac9e4f6b48f349e2a..817007ce6c18e11d19038e09d77a8f27bd7eca91 100644
--- a/tensorflow/python/framework/op_def_library_test.py
+++ b/tensorflow/python/framework/op_def_library_test.py
@@ -26,8 +26,8 @@ from tensorflow.core.framework import tensor_shape_pb2
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import test_ops
 from tensorflow.python.framework import test_util
-from tensorflow.python.framework.op_def_library import OpDefLibrary
 from tensorflow.python.platform import googletest
 
 
@@ -36,73 +36,11 @@ def _unknown_shape(op):
   return [tensor_shape.unknown_shape() for _ in op.outputs]
 
 
-# NOTE(mrry): Dummy shape registrations for ops used in the tests, since they
-# don't have C++ op registrations on which to attach C++ shape fns.
-ops.RegisterShape("Attr")(_unknown_shape)
-ops.RegisterShape("AttrBool")(_unknown_shape)
-ops.RegisterShape("AttrBoolList")(_unknown_shape)
-ops.RegisterShape("AttrDefault")(_unknown_shape)
-ops.RegisterShape("AttrEmptyListDefault")(_unknown_shape)
-ops.RegisterShape("AttrEnum")(_unknown_shape)
-ops.RegisterShape("AttrEnumList")(_unknown_shape)
-ops.RegisterShape("AttrFloat")(_unknown_shape)
-ops.RegisterShape("AttrListDefault")(_unknown_shape)
-ops.RegisterShape("AttrListMin")(_unknown_shape)
-ops.RegisterShape("AttrMin")(_unknown_shape)
-ops.RegisterShape("AttrShape")(_unknown_shape)
-ops.RegisterShape("AttrShapeList")(_unknown_shape)
-ops.RegisterShape("AttrPartialShape")(_unknown_shape)
-ops.RegisterShape("AttrPartialShapeList")(_unknown_shape)
-ops.RegisterShape("AttrTypeDefault")(_unknown_shape)
-ops.RegisterShape("AttrListTypeDefault")(_unknown_shape)
-ops.RegisterShape("Binary")(_unknown_shape)
-ops.RegisterShape("ComplexStruct")(_unknown_shape)
-ops.RegisterShape("InPolymorphicTwice")(_unknown_shape)
-ops.RegisterShape("MixedStruct")(_unknown_shape)
-ops.RegisterShape("NInPolymorphicTwice")(_unknown_shape)
-ops.RegisterShape("NInTwice")(_unknown_shape)
-ops.RegisterShape("NInTwoTypeVariables")(_unknown_shape)
-ops.RegisterShape("NIntsIn")(_unknown_shape)
-ops.RegisterShape("NIntsOut")(_unknown_shape)
-ops.RegisterShape("NIntsOutDefault")(_unknown_shape)
-ops.RegisterShape("NPolymorphicIn")(_unknown_shape)
-ops.RegisterShape("NPolymorphicOut")(_unknown_shape)
-ops.RegisterShape("NPolymorphicOutDefault")(_unknown_shape)
-ops.RegisterShape("NPolymorphicRestrictIn")(_unknown_shape)
-ops.RegisterShape("NPolymorphicRestrictOut")(_unknown_shape)
-ops.RegisterShape("OutT")(_unknown_shape)
-ops.RegisterShape("OutTypeList")(_unknown_shape)
-ops.RegisterShape("OutTypeListRestrict")(_unknown_shape)
-ops.RegisterShape("Polymorphic")(_unknown_shape)
-ops.RegisterShape("PolymorphicDefaultOut")(_unknown_shape)
-ops.RegisterShape("PolymorphicOut")(_unknown_shape)
-ops.RegisterShape("RefIn")(_unknown_shape)
-ops.RegisterShape("RefOut")(_unknown_shape)
-ops.RegisterShape("ReservedAttr")(_unknown_shape)
-ops.RegisterShape("ReservedInput")(_unknown_shape)
-ops.RegisterShape("Restrict")(_unknown_shape)
-ops.RegisterShape("Simple")(_unknown_shape)
-ops.RegisterShape("SimpleStruct")(_unknown_shape)
-ops.RegisterShape("TwoRefsIn")(_unknown_shape)
-ops.RegisterShape("TypeList")(_unknown_shape)
-ops.RegisterShape("TypeListRestrict")(_unknown_shape)
-ops.RegisterShape("TypeListTwice")(_unknown_shape)
-
-
+@test_util.with_c_api
 class OpDefLibraryTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
-    self._lib = OpDefLibrary()
-    self._g = ops.Graph()
-    self._default_graph_controller = self._g.as_default()
-    self._default_graph_controller.__enter__()
-    self._add_op("name: 'Simple' input_arg { name: 'a' type: DT_INT32 } "
-                 "output_arg { name: 'out' type: DT_FLOAT }")
-    self._add_op("name: 'OutT' output_arg { name: 'a' type_attr: 'T' } "
-                 "attr { name: 'T' type: 'type' }")
-
-  def tearDown(self):
-    self._default_graph_controller.__exit__(None, None, None)
+    self._lib = test_ops._op_def_lib
 
   def _add_op(self, ascii):
     op_def = op_def_pb2.OpDef()
@@ -177,1374 +115,1226 @@ class OpDefLibraryTest(test_util.TensorFlowTestCase):
                      "Arg 'a' of 'NoTypes' must have one type field not 0")
 
   def testSimple(self):
-    out = self._lib.apply_op("Simple", a=3)
-    self.assertEqual(dtypes.float32, out.dtype)
-    self.assertProtoEquals("""
-      name: 'Simple' op: 'Simple' input: 'Simple/a'
-      """, out.op.node_def)
-
-    out = self._lib.apply_op("Simple", a=4)
-    self.assertProtoEquals("""
-      name: 'Simple_1' op: 'Simple' input: 'Simple_1/a'
-      """, out.op.node_def)
-
-    out = self._lib.apply_op("Simple", a=5, name="named")
-    self.assertProtoEquals("""
-      name: 'named' op: 'Simple' input: 'named/a'
-      """, out.op.node_def)
-
-    out = self._lib.apply_op("Simple", a=[[1, 2, 3], [4, 5, 6]], name="two_d")
-    self.assertProtoEquals("""
-      name: 'two_d' op: 'Simple' input: 'two_d/a'
-      """, out.op.node_def)
+    with ops.Graph().as_default():
+      out = self._lib.apply_op("Simple", a=3)
+      self.assertEqual(dtypes.float32, out.dtype)
+      self.assertProtoEquals("""
+        name: 'Simple' op: 'Simple' input: 'Simple/a'
+        """, out.op.node_def)
+
+      out = self._lib.apply_op("Simple", a=4)
+      self.assertProtoEquals("""
+        name: 'Simple_1' op: 'Simple' input: 'Simple_1/a'
+        """, out.op.node_def)
+
+      out = self._lib.apply_op("Simple", a=5, name="named")
+      self.assertProtoEquals("""
+        name: 'named' op: 'Simple' input: 'named/a'
+        """, out.op.node_def)
+
+      out = self._lib.apply_op("Simple", a=[[1, 2, 3], [4, 5, 6]], name="two_d")
+      self.assertProtoEquals("""
+        name: 'two_d' op: 'Simple' input: 'two_d/a'
+        """, out.op.node_def)
 
   def testSimpleFailures(self):
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("Simple", a="Bad string")
-    self.assertEqual(str(cm.exception),
-                     "Expected int32 passed to parameter 'a' of op 'Simple', "
-                     "got 'Bad string' of type 'str' instead.")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("Simple", a=self.Tensor(dtypes.string))
-    self.assertEqual(str(cm.exception),
-                     "Input 'a' of 'Simple' Op has type string "
-                     "that does not match expected type of int32.")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("Simple", a=6, extra="bogus")
-    self.assertEqual(str(cm.exception),
-                     "apply_op() got unexpected keyword arguments: extra")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("Simple", a=6, extra1="bogus", extra2="also_bogus")
-    self.assertEqual(str(cm.exception),
-                     "apply_op() got unexpected keyword arguments: extra1, "
-                     "extra2")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("Simple")
-    self.assertEqual(str(cm.exception), "No argument for input a")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("Simple", wrong=7)
-    self.assertEqual(str(cm.exception), "No argument for input a")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("Simple", a={"label": 1})
-    self.assertEqual(str(cm.exception),
-                     "Expected int32 passed to parameter 'a' of op 'Simple', "
-                     "got {'label': 1} of type 'dict' instead.")
+    with ops.Graph().as_default():
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("Simple", a="Bad string")
+      self.assertEqual(str(cm.exception),
+                       "Expected int32 passed to parameter 'a' of op 'Simple', "
+                       "got 'Bad string' of type 'str' instead.")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("Simple", a=self.Tensor(dtypes.string))
+      self.assertEqual(str(cm.exception),
+                       "Input 'a' of 'Simple' Op has type string "
+                       "that does not match expected type of int32.")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("Simple", a=6, extra="bogus")
+      self.assertEqual(str(cm.exception),
+                       "apply_op() got unexpected keyword arguments: extra")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("Simple", a=6, extra1="bogus", extra2="also_bogus")
+      self.assertEqual(str(cm.exception),
+                       "apply_op() got unexpected keyword arguments: extra1, "
+                       "extra2")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("Simple")
+      self.assertEqual(str(cm.exception), "No argument for input a")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("Simple", wrong=7)
+      self.assertEqual(str(cm.exception), "No argument for input a")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("Simple", a={"label": 1})
+      self.assertEqual(str(cm.exception),
+                       "Expected int32 passed to parameter 'a' of op 'Simple', "
+                       "got {'label': 1} of type 'dict' instead.")
 
   def testReservedInput(self):
-    self._add_op("name: 'ReservedInput' "
-                 "input_arg { name: 'input' type: DT_INT32 } ")
-    op = self._lib.apply_op("ReservedInput", input_=7, name="x")
-    self.assertProtoEquals("""
-      name: 'x' op: 'ReservedInput' input: 'x/input'
-      """, op.node_def)
+    with ops.Graph().as_default():
+      op = self._lib.apply_op("ReservedInput", input_=7, name="x")
+      self.assertProtoEquals("""
+        name: 'x' op: 'ReservedInput' input: 'x/input'
+        """, op.node_def)
 
   def testPolymorphic(self):
-    self._add_op("name: 'Polymorphic' "
-                 "input_arg { name: 'a' type_attr: 'T' } "
-                 "output_arg { name: 'out' type_attr: 'T' } "
-                 "attr { name: 'T' type: 'type' }")
-
-    out = self._lib.apply_op("Polymorphic", a=7, name="p")
-    self.assertEqual(dtypes.int32, out.dtype)
-    self.assertProtoEquals("""
-      name: 'p' op: 'Polymorphic' input: 'p/a'
-      attr { key: 'T' value { type: DT_INT32 } }
-      """, out.op.node_def)
-
-    out = self._lib.apply_op("Polymorphic", a="s", name="q")
-    self.assertEqual(dtypes.string, out.dtype)
-    self.assertProtoEquals("""
-      name: 'q' op: 'Polymorphic' input: 'q/a'
-      attr { key: 'T' value { type: DT_STRING } }
-      """, out.op.node_def)
-
-    out = self._lib.apply_op("Polymorphic", a=["s", "t", "u"], name="r")
-    self.assertEqual(dtypes.string, out.dtype)
-    self.assertProtoEquals("""
-      name: 'r' op: 'Polymorphic' input: 'r/a'
-      attr { key: 'T' value { type: DT_STRING } }
-      """, out.op.node_def)
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("Polymorphic", a="s", T=dtypes.string)
-    self.assertEqual(str(cm.exception),
-                     "Should not specify value for inferred attr 'T'.")
+    with ops.Graph().as_default():
+      out = self._lib.apply_op("Polymorphic", a=7, name="p")
+      self.assertEqual(dtypes.int32, out.dtype)
+      self.assertProtoEquals("""
+        name: 'p' op: 'Polymorphic' input: 'p/a'
+        attr { key: 'T' value { type: DT_INT32 } }
+        """, out.op.node_def)
+
+      out = self._lib.apply_op("Polymorphic", a="s", name="q")
+      self.assertEqual(dtypes.string, out.dtype)
+      self.assertProtoEquals("""
+        name: 'q' op: 'Polymorphic' input: 'q/a'
+        attr { key: 'T' value { type: DT_STRING } }
+        """, out.op.node_def)
+
+      out = self._lib.apply_op("Polymorphic", a=["s", "t", "u"], name="r")
+      self.assertEqual(dtypes.string, out.dtype)
+      self.assertProtoEquals("""
+        name: 'r' op: 'Polymorphic' input: 'r/a'
+        attr { key: 'T' value { type: DT_STRING } }
+        """, out.op.node_def)
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("Polymorphic", a="s", T=dtypes.string)
+      self.assertEqual(str(cm.exception),
+                       "Should not specify value for inferred attr 'T'.")
 
   def testPolymorphicOut(self):
-    self._add_op("name: 'PolymorphicOut' "
-                 "output_arg { name: 'out' type_attr: 'T' } "
-                 "attr { name: 'T' type: 'type' }")
-
-    out = self._lib.apply_op("PolymorphicOut", T=dtypes.int32, name="p")
-    self.assertEqual(dtypes.int32, out.dtype)
-    self.assertProtoEquals("""
-      name: 'p' op: 'PolymorphicOut'
-      attr { key: 'T' value { type: DT_INT32 } }
-      """, out.op.node_def)
-
-    out = self._lib.apply_op("PolymorphicOut", T=dtypes.bool, name="q")
-    self.assertEqual(dtypes.bool, out.dtype)
-    self.assertProtoEquals("""
-      name: 'q' op: 'PolymorphicOut'
-      attr { key: 'T' value { type: DT_BOOL } }
-      """, out.op.node_def)
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("PolymorphicOut")
-    self.assertEqual(str(cm.exception),
-                     "No argument for attr T")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("PolymorphicOut", T=None)
-    self.assertEqual(str(cm.exception),
-                     "Expected DataType for argument 'T' not None.")
+    with ops.Graph().as_default():
+      out = self._lib.apply_op("PolymorphicOut", T=dtypes.int32, name="p")
+      self.assertEqual(dtypes.int32, out.dtype)
+      self.assertProtoEquals("""
+        name: 'p' op: 'PolymorphicOut'
+        attr { key: 'T' value { type: DT_INT32 } }
+        """, out.op.node_def)
+
+      out = self._lib.apply_op("PolymorphicOut", T=dtypes.bool, name="q")
+      self.assertEqual(dtypes.bool, out.dtype)
+      self.assertProtoEquals("""
+        name: 'q' op: 'PolymorphicOut'
+        attr { key: 'T' value { type: DT_BOOL } }
+        """, out.op.node_def)
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("PolymorphicOut")
+      self.assertEqual(str(cm.exception),
+                       "No argument for attr T")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("PolymorphicOut", T=None)
+      self.assertEqual(str(cm.exception),
+                       "Expected DataType for argument 'T' not None.")
 
   def testPolymorphicDefaultOut(self):
-    self._add_op("name: 'PolymorphicDefaultOut' "
-                 "output_arg { name: 'out' type_attr: 'T' } "
-                 "attr { name: 'T' type: 'type' "
-                 "  default_value { type: DT_STRING } }")
-
-    out = self._lib.apply_op("PolymorphicDefaultOut", T=None, name="p")
-    self.assertEqual(dtypes.string, out.dtype)
-    self.assertProtoEquals("""
-      name: 'p' op: 'PolymorphicDefaultOut'
-      attr { key: 'T' value { type: DT_STRING } }
-      """, out.op.node_def)
-
-    out = self._lib.apply_op("PolymorphicDefaultOut", T=dtypes.bool, name="q")
-    self.assertEqual(dtypes.bool, out.dtype)
-    self.assertProtoEquals("""
-      name: 'q' op: 'PolymorphicDefaultOut'
-      attr { key: 'T' value { type: DT_BOOL } }
-      """, out.op.node_def)
+    with ops.Graph().as_default():
+      out = self._lib.apply_op("PolymorphicDefaultOut", T=None, name="p")
+      self.assertEqual(dtypes.string, out.dtype)
+      self.assertProtoEquals("""
+        name: 'p' op: 'PolymorphicDefaultOut'
+        attr { key: 'T' value { type: DT_STRING } }
+        """, out.op.node_def)
+
+      out = self._lib.apply_op("PolymorphicDefaultOut", T=dtypes.bool, name="q")
+      self.assertEqual(dtypes.bool, out.dtype)
+      self.assertProtoEquals("""
+        name: 'q' op: 'PolymorphicDefaultOut'
+        attr { key: 'T' value { type: DT_BOOL } }
+        """, out.op.node_def)
 
   def testBinary(self):
-    self._add_op("name: 'Binary' "
-                 "input_arg { name: 'a' type_attr: 'T' } "
-                 "input_arg { name: 'b' type_attr: 'T' } "
-                 "output_arg { name: 'out' type_attr: 'T' } "
-                 "attr { name: 'T' type: 'type' }")
-
-    out = self._lib.apply_op("Binary", a=8, b=9, name="b")
-    self.assertEqual(dtypes.int32, out.dtype)
-    self.assertProtoEquals("""
-      name: 'b' op: 'Binary' input: 'b/a' input: 'b/b'
-      attr { key: 'T' value { type: DT_INT32 } }
-      """, out.op.node_def)
-
-    out = self._lib.apply_op("Binary", a="left", b="right", name="c")
-    self.assertEqual(dtypes.string, out.dtype)
-    self.assertProtoEquals("""
-      name: 'c' op: 'Binary' input: 'c/a' input: 'c/b'
-      attr { key: 'T' value { type: DT_STRING } }
-      """, out.op.node_def)
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("Binary", a="left", b=12)
-    self.assertEqual(str(cm.exception),
-                     "Expected string passed to parameter 'b' of op 'Binary', "
-                     "got 12 of type 'int' instead.")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("Binary",
-                         a=self.Tensor(dtypes.string),
-                         b=self.Tensor(dtypes.int32))
-    self.assertEqual(str(cm.exception),
-                     "Input 'b' of 'Binary' Op has type int32 "
-                     "that does not match type string of argument 'a'.")
+    with ops.Graph().as_default():
+      out = self._lib.apply_op("Binary", a=8, b=9, name="b")
+      self.assertEqual(dtypes.int32, out.dtype)
+      self.assertProtoEquals("""
+        name: 'b' op: 'Binary' input: 'b/a' input: 'b/b'
+        attr { key: 'T' value { type: DT_INT32 } }
+        """, out.op.node_def)
+
+      out = self._lib.apply_op("Binary", a="left", b="right", name="c")
+      self.assertEqual(dtypes.string, out.dtype)
+      self.assertProtoEquals("""
+        name: 'c' op: 'Binary' input: 'c/a' input: 'c/b'
+        attr { key: 'T' value { type: DT_STRING } }
+        """, out.op.node_def)
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("Binary", a="left", b=12)
+      self.assertEqual(str(cm.exception),
+                       "Expected string passed to parameter 'b' of op 'Binary',"
+                       " got 12 of type 'int' instead.")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("Binary",
+                           a=self.Tensor(dtypes.string),
+                           b=self.Tensor(dtypes.int32))
+      self.assertEqual(str(cm.exception),
+                       "Input 'b' of 'Binary' Op has type int32 "
+                       "that does not match type string of argument 'a'.")
 
   def testRestrict(self):
-    self._add_op("name: 'Restrict' "
-                 "input_arg { name: 'a' type_attr: 'T' } "
-                 "output_arg { name: 'out' type_attr: 'T' } "
-                 "attr { name: 'T' type: 'type' allowed_values { list { "
-                 "  type: DT_STRING type: DT_BOOL } } }")
-
-    out = self._lib.apply_op("Restrict", a="foo", name="g")
-    self.assertEqual(dtypes.string, out.dtype)
-    self.assertProtoEquals("""
-      name: 'g' op: 'Restrict' input: 'g/a'
-      attr { key: 'T' value { type: DT_STRING } }
-      """, out.op.node_def)
-
-    out = self._lib.apply_op("Restrict", a=True, name="h")
-    self.assertEqual(dtypes.bool, out.dtype)
-    self.assertProtoEquals("""
-      name: 'h' op: 'Restrict' input: 'h/a'
-      attr { key: 'T' value { type: DT_BOOL } }
-      """, out.op.node_def)
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("Restrict", a=17)
-    self.assertEqual(str(cm.exception),
-                     "Value passed to parameter 'a' has DataType int32 "
-                     "not in list of allowed values: string, bool")
+    with ops.Graph().as_default():
+      out = self._lib.apply_op("Restrict", a="foo", name="g")
+      self.assertEqual(dtypes.string, out.dtype)
+      self.assertProtoEquals("""
+        name: 'g' op: 'Restrict' input: 'g/a'
+        attr { key: 'T' value { type: DT_STRING } }
+        """, out.op.node_def)
+
+      out = self._lib.apply_op("Restrict", a=True, name="h")
+      self.assertEqual(dtypes.bool, out.dtype)
+      self.assertProtoEquals("""
+        name: 'h' op: 'Restrict' input: 'h/a'
+        attr { key: 'T' value { type: DT_BOOL } }
+        """, out.op.node_def)
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("Restrict", a=17)
+      self.assertEqual(str(cm.exception),
+                       "Value passed to parameter 'a' has DataType int32 "
+                       "not in list of allowed values: string, bool")
 
   def testTypeList(self):
-    self._add_op("name: 'TypeList' "
-                 "input_arg { name: 'a' type_list_attr: 'T' } "
-                 "attr { name: 'T' type: 'list(type)' }")
-
-    op = self._lib.apply_op("TypeList", a=["foo"], name="z")
-    self.assertProtoEquals("""
-      name: 'z' op: 'TypeList' input: 'z/a_0'
-      attr { key: 'T' value { list { type: DT_STRING } } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("TypeList", a=[True, 12], name="y")
-    self.assertProtoEquals("""
-      name: 'y' op: 'TypeList' input: 'y/a_0' input: 'y/a_1'
-      attr { key: 'T' value { list { type: DT_BOOL type: DT_INT32 } } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("TypeList", a=[], name="empty")
-    self.assertProtoEquals("""
-      name: 'empty' op: 'TypeList' attr { key: 'T' value { list { } } }
-      """, op.node_def)
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("TypeList", a=17)
-    self.assertStartsWith(str(cm.exception),
-                          "Expected list for 'a' "
-                          "argument to 'TypeList' Op, not ")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("TypeList", a=[self.Tensor(dtypes.int32), None])
-    self.assertStartsWith(str(cm.exception),
-                          "Tensors in list passed to 'a' of 'TypeList' Op "
-                          "have types [int32, <NOT CONVERTIBLE TO TENSOR>]")
+    with ops.Graph().as_default():
+      op = self._lib.apply_op("TypeList", a=["foo"], name="z")
+      self.assertProtoEquals("""
+        name: 'z' op: 'TypeList' input: 'z/a_0'
+        attr { key: 'T' value { list { type: DT_STRING } } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("TypeList", a=[True, 12], name="y")
+      self.assertProtoEquals("""
+        name: 'y' op: 'TypeList' input: 'y/a_0' input: 'y/a_1'
+        attr { key: 'T' value { list { type: DT_BOOL type: DT_INT32 } } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("TypeList", a=[], name="empty")
+      self.assertProtoEquals("""
+        name: 'empty' op: 'TypeList' attr { key: 'T' value { list { } } }
+        """, op.node_def)
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("TypeList", a=17)
+      self.assertStartsWith(str(cm.exception),
+                            "Expected list for 'a' "
+                            "argument to 'TypeList' Op, not ")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("TypeList", a=[self.Tensor(dtypes.int32), None])
+      self.assertStartsWith(str(cm.exception),
+                            "Tensors in list passed to 'a' of 'TypeList' Op "
+                            "have types [int32, <NOT CONVERTIBLE TO TENSOR>]")
 
   def testTypeListTwice(self):
-    self._add_op("name: 'TypeListTwice' "
-                 "input_arg { name: 'a' type_list_attr: 'T' } "
-                 "input_arg { name: 'b' type_list_attr: 'T' } "
-                 "attr { name: 'T' type: 'list(type)' }")
-
-    op = self._lib.apply_op("TypeListTwice",
-                            a=["foo", True],
-                            b=["bar", False],
-                            name="z")
-    self.assertProtoEquals("""
-      name: 'z' op: 'TypeListTwice'
-      input: 'z/a_0' input: 'z/a_1' input: 'z/b_0' input: 'z/b_1'
-      attr { key: 'T' value { list { type: DT_STRING type: DT_BOOL } } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("TypeListTwice", a=[], b=[], name="empty")
-    self.assertProtoEquals("""
-      name: 'empty' op: 'TypeListTwice' attr { key: 'T' value { list { } } }
-      """, op.node_def)
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("TypeListTwice", a=["foo", True], b=["bar", 6])
-    self.assertEqual(str(cm.exception),
-                     "Input 'b' of 'TypeListTwice' Op has type list of "
-                     "string, int32 that does not match type list "
-                     "string, bool of argument 'a'.")
+    with ops.Graph().as_default():
+      op = self._lib.apply_op("TypeListTwice",
+                              a=["foo", True],
+                              b=["bar", False],
+                              name="z")
+      self.assertProtoEquals("""
+        name: 'z' op: 'TypeListTwice'
+        input: 'z/a_0' input: 'z/a_1' input: 'z/b_0' input: 'z/b_1'
+        attr { key: 'T' value { list { type: DT_STRING type: DT_BOOL } } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("TypeListTwice", a=[], b=[], name="empty")
+      self.assertProtoEquals("""
+        name: 'empty' op: 'TypeListTwice' attr { key: 'T' value { list { } } }
+        """, op.node_def)
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("TypeListTwice", a=["foo", True], b=["bar", 6])
+      self.assertEqual(str(cm.exception),
+                       "Input 'b' of 'TypeListTwice' Op has type list of "
+                       "string, int32 that does not match type list "
+                       "string, bool of argument 'a'.")
 
   def testOutTypeList(self):
-    self._add_op("name: 'OutTypeList' "
-                 "output_arg { name: 'out' type_list_attr: 'T' } "
-                 "attr { name: 'T' type: 'list(type)' }")
-
-    out, = self._lib.apply_op("OutTypeList", T=[dtypes.float32], name="x")
-    self.assertEqual(dtypes.float32, out.dtype)
-    self.assertProtoEquals("""
-      name: 'x' op: 'OutTypeList'
-      attr { key: 'T' value { list { type: DT_FLOAT } } }
-      """, out.op.node_def)
-
-    out1, out2 = self._lib.apply_op("OutTypeList",
-                                    T=[dtypes.int32, dtypes.bool],
-                                    name="w")
-    self.assertEqual(dtypes.int32, out1.dtype)
-    self.assertEqual(dtypes.bool, out2.dtype)
-    self.assertProtoEquals("""
-      name: 'w' op: 'OutTypeList'
-      attr { key: 'T' value { list { type: DT_INT32 type: DT_BOOL } } }
-      """, out1.op.node_def)
-
-    out = self._lib.apply_op("OutTypeList", T=[], name="empty")
-    self.assertEqual([], out)
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("OutTypeList", T=dtypes.int32)
-    self.assertEqual(str(cm.exception), "Expected list for attr T")
+    with ops.Graph().as_default():
+      out, = self._lib.apply_op("OutTypeList", T=[dtypes.float32], name="x")
+      self.assertEqual(dtypes.float32, out.dtype)
+      self.assertProtoEquals("""
+        name: 'x' op: 'OutTypeList'
+        attr { key: 'T' value { list { type: DT_FLOAT } } }
+        """, out.op.node_def)
+
+      out1, out2 = self._lib.apply_op("OutTypeList",
+                                      T=[dtypes.int32, dtypes.bool],
+                                      name="w")
+      self.assertEqual(dtypes.int32, out1.dtype)
+      self.assertEqual(dtypes.bool, out2.dtype)
+      self.assertProtoEquals("""
+        name: 'w' op: 'OutTypeList'
+        attr { key: 'T' value { list { type: DT_INT32 type: DT_BOOL } } }
+        """, out1.op.node_def)
+
+      out = self._lib.apply_op("OutTypeList", T=[], name="empty")
+      self.assertEqual([], out)
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("OutTypeList", T=dtypes.int32)
+      self.assertEqual(str(cm.exception), "Expected list for attr T")
 
   def testTypeListRestrict(self):
-    self._add_op("name: 'TypeListRestrict' "
-                 "input_arg { name: 'a' type_list_attr: 'T' } "
-                 "attr { name: 'T' type: 'list(type)' allowed_values { list { "
-                 "  type: DT_STRING type: DT_BOOL } } }")
-
-    op = self._lib.apply_op("TypeListRestrict", a=["foo", False], name="v")
-    self.assertProtoEquals("""
-      name: 'v' op: 'TypeListRestrict' input: 'v/a_0' input: 'v/a_1'
-      attr { key: 'T' value { list { type: DT_STRING type: DT_BOOL } } }
-      """, op.node_def)
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("TypeListRestrict", a=[True, 12])
-    self.assertEqual(str(cm.exception),
-                     "Value passed to parameter 'a' has DataType int32 "
-                     "not in list of allowed values: string, bool")
+    with ops.Graph().as_default():
+      op = self._lib.apply_op("TypeListRestrict", a=["foo", False], name="v")
+      self.assertProtoEquals("""
+        name: 'v' op: 'TypeListRestrict' input: 'v/a_0' input: 'v/a_1'
+        attr { key: 'T' value { list { type: DT_STRING type: DT_BOOL } } }
+        """, op.node_def)
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("TypeListRestrict", a=[True, 12])
+      self.assertEqual(str(cm.exception),
+                       "Value passed to parameter 'a' has DataType int32 "
+                       "not in list of allowed values: string, bool")
 
   def testOutTypeListRestrict(self):
-    self._add_op("name: 'OutTypeListRestrict' "
-                 "output_arg { name: 'out' type_list_attr: 't' } "
-                 "attr { name: 't' type: 'list(type)' allowed_values { list { "
-                 "  type: DT_STRING type: DT_BOOL } } }")
-
-    out1, out2 = self._lib.apply_op("OutTypeListRestrict",
-                                    t=[dtypes.bool, dtypes.string],
-                                    name="u")
-    self.assertEqual(dtypes.bool, out1.dtype)
-    self.assertEqual(dtypes.string, out2.dtype)
-    self.assertProtoEquals("""
-      name: 'u' op: 'OutTypeListRestrict'
-      attr { key: 't' value { list { type: DT_BOOL type: DT_STRING } } }
-      """, out1.op.node_def)
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("OutTypeListRestrict", t=[dtypes.string, dtypes.int32])
-    self.assertEqual(str(cm.exception),
-                     "Value passed to parameter 't' has DataType int32 "
-                     "not in list of allowed values: string, bool")
+    with ops.Graph().as_default():
+      out1, out2 = self._lib.apply_op("OutTypeListRestrict",
+                                      t=[dtypes.bool, dtypes.string],
+                                      name="u")
+      self.assertEqual(dtypes.bool, out1.dtype)
+      self.assertEqual(dtypes.string, out2.dtype)
+      self.assertProtoEquals("""
+        name: 'u' op: 'OutTypeListRestrict'
+        attr { key: 't' value { list { type: DT_BOOL type: DT_STRING } } }
+        """, out1.op.node_def)
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("OutTypeListRestrict",
+                           t=[dtypes.string, dtypes.int32])
+      self.assertEqual(str(cm.exception),
+                       "Value passed to parameter 't' has DataType int32 "
+                       "not in list of allowed values: string, bool")
 
   def testAttr(self):
-    self._add_op("name: 'Attr' attr { name: 'a' type: 'int' }")
-    op = self._lib.apply_op("Attr", a=12, name="t")
-    self.assertProtoEquals("""
-      name: 't' op: 'Attr' attr { key: 'a' value { i: 12 } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("Attr", a=tensor_shape.Dimension(13), name="u")
-    self.assertProtoEquals("""
-      name: 'u' op: 'Attr' attr { key: 'a' value { i: 13 } }
-      """, op.node_def)
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("Attr", a="bad")
-    self.assertEqual(str(cm.exception),
-                     "Expected int for argument 'a' not 'bad'.")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("Attr", a=[12])
-    self.assertEqual(str(cm.exception),
-                     "Expected int for argument 'a' not [12].")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("Attr", a=None)
-    self.assertEqual(str(cm.exception),
-                     "Expected int for argument 'a' not None.")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("Attr")
-    self.assertEqual(str(cm.exception), "No argument for attr a")
+    with ops.Graph().as_default():
+      op = self._lib.apply_op("Attr", a=12, name="t")
+      self.assertProtoEquals("""
+        name: 't' op: 'Attr' attr { key: 'a' value { i: 12 } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("Attr", a=tensor_shape.Dimension(13), name="u")
+      self.assertProtoEquals("""
+        name: 'u' op: 'Attr' attr { key: 'a' value { i: 13 } }
+        """, op.node_def)
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("Attr", a="bad")
+      self.assertEqual(str(cm.exception),
+                       "Expected int for argument 'a' not 'bad'.")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("Attr", a=[12])
+      self.assertEqual(str(cm.exception),
+                       "Expected int for argument 'a' not [12].")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("Attr", a=None)
+      self.assertEqual(str(cm.exception),
+                       "Expected int for argument 'a' not None.")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("Attr")
+      self.assertEqual(str(cm.exception), "No argument for attr a")
 
   def testAttrFloat(self):
-    self._add_op("name: 'AttrFloat' attr { name: 'a' type: 'float' }")
-
-    op = self._lib.apply_op("AttrFloat", a=1.2, name="t")
-    self.assertProtoEquals("""
-      name: 't' op: 'AttrFloat' attr { key: 'a' value { f: 1.2 } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("AttrFloat", a=12, name="u")
-    self.assertProtoEquals("""
-      name: 'u' op: 'AttrFloat' attr { key: 'a' value { f: 12 } }
-      """, op.node_def)
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("AttrFloat", a="bad")
-    self.assertEqual(str(cm.exception),
-                     "Expected float for argument 'a' not 'bad'.")
+    with ops.Graph().as_default():
+      op = self._lib.apply_op("AttrFloat", a=1.2, name="t")
+      self.assertProtoEquals("""
+        name: 't' op: 'AttrFloat' attr { key: 'a' value { f: 1.2 } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("AttrFloat", a=12, name="u")
+      self.assertProtoEquals("""
+        name: 'u' op: 'AttrFloat' attr { key: 'a' value { f: 12 } }
+        """, op.node_def)
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("AttrFloat", a="bad")
+      self.assertEqual(str(cm.exception),
+                       "Expected float for argument 'a' not 'bad'.")
 
   def testAttrBool(self):
-    self._add_op("name: 'AttrBool' attr { name: 'a' type: 'bool' }")
-
-    op = self._lib.apply_op("AttrBool", a=True, name="t")
-    self.assertProtoEquals("""
-      name: 't' op: 'AttrBool' attr { key: 'a' value { b: true } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("AttrBool", a=False, name="u")
-    self.assertProtoEquals("""
-      name: 'u' op: 'AttrBool' attr { key: 'a' value { b: false } }
-      """, op.node_def)
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("AttrBool", a=0)
-    self.assertEqual(str(cm.exception),
-                     "Expected bool for argument 'a' not 0.")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("AttrBool", a=1)
-    self.assertEqual(str(cm.exception),
-                     "Expected bool for argument 'a' not 1.")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("AttrBool", a=[])
-    self.assertEqual(str(cm.exception),
-                     "Expected bool for argument 'a' not [].")
+    with ops.Graph().as_default():
+      op = self._lib.apply_op("AttrBool", a=True, name="t")
+      self.assertProtoEquals("""
+        name: 't' op: 'AttrBool' attr { key: 'a' value { b: true } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("AttrBool", a=False, name="u")
+      self.assertProtoEquals("""
+        name: 'u' op: 'AttrBool' attr { key: 'a' value { b: false } }
+        """, op.node_def)
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("AttrBool", a=0)
+      self.assertEqual(str(cm.exception),
+                       "Expected bool for argument 'a' not 0.")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("AttrBool", a=1)
+      self.assertEqual(str(cm.exception),
+                       "Expected bool for argument 'a' not 1.")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("AttrBool", a=[])
+      self.assertEqual(str(cm.exception),
+                       "Expected bool for argument 'a' not [].")
 
   def testAttrBoolList(self):
-    self._add_op("name: 'AttrBoolList' attr { name: 'a' type: 'list(bool)' }")
-
-    op = self._lib.apply_op("AttrBoolList", a=[True, False, True], name="t")
-    self.assertProtoEquals("""
-      name: 't' op: 'AttrBoolList'
-      attr { key: 'a' value { list { b: true b: false b:true } } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("AttrBoolList", a=[], name="u")
-    self.assertProtoEquals("""
-      name: 'u' op: 'AttrBoolList' attr { key: 'a' value { list { } } }
-      """, op.node_def)
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("AttrBoolList", a=[0])
-    self.assertEqual(str(cm.exception),
-                     "Expected bool for argument 'a' not 0.")
+    with ops.Graph().as_default():
+      op = self._lib.apply_op("AttrBoolList", a=[True, False, True], name="t")
+      self.assertProtoEquals("""
+        name: 't' op: 'AttrBoolList'
+        attr { key: 'a' value { list { b: true b: false b:true } } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("AttrBoolList", a=[], name="u")
+      self.assertProtoEquals("""
+        name: 'u' op: 'AttrBoolList' attr { key: 'a' value { list { } } }
+        """, op.node_def)
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("AttrBoolList", a=[0])
+      self.assertEqual(str(cm.exception),
+                       "Expected bool for argument 'a' not 0.")
 
   def testAttrMin(self):
-    self._add_op("name: 'AttrMin' attr { name: 'a' type: 'int' "
-                 "has_minimum: true minimum: 5 }")
-    op = self._lib.apply_op("AttrMin", a=12, name="s")
-    self.assertProtoEquals("""
-      name: 's' op: 'AttrMin' attr { key: 'a' value { i: 12 } }
-      """, op.node_def)
+    with ops.Graph().as_default():
+      op = self._lib.apply_op("AttrMin", a=12, name="s")
+      self.assertProtoEquals("""
+        name: 's' op: 'AttrMin' attr { key: 'a' value { i: 12 } }
+        """, op.node_def)
 
-    with self.assertRaises(ValueError) as cm:
-      self._lib.apply_op("AttrMin", a=2)
-    self.assertEqual(str(cm.exception),
-                     "Attr 'a' of 'AttrMin' Op passed 2 less than minimum 5.")
+      with self.assertRaises(ValueError) as cm:
+        self._lib.apply_op("AttrMin", a=2)
+      self.assertEqual(str(cm.exception),
+                       "Attr 'a' of 'AttrMin' Op passed 2 less than minimum 5.")
 
   def testAttrListMin(self):
-    self._add_op("name: 'AttrListMin' attr { name: 'a' type: 'list(int)' "
-                 "has_minimum: true minimum: 2 }")
-
-    op = self._lib.apply_op("AttrListMin", a=[1, 2], name="r")
-    self.assertProtoEquals("""
-      name: 'r' op: 'AttrListMin'
-      attr { key: 'a' value { list { i: 1 i: 2 } } }
-      """, op.node_def)
-
-    with self.assertRaises(ValueError) as cm:
-      self._lib.apply_op("AttrListMin", a=[17])
-    self.assertEqual(str(cm.exception),
-                     "Attr 'a' of 'AttrListMin' Op "
-                     "passed list of length 1 less than minimum 2.")
+    with ops.Graph().as_default():
+      op = self._lib.apply_op("AttrListMin", a=[1, 2], name="r")
+      self.assertProtoEquals("""
+        name: 'r' op: 'AttrListMin'
+        attr { key: 'a' value { list { i: 1 i: 2 } } }
+        """, op.node_def)
+
+      with self.assertRaises(ValueError) as cm:
+        self._lib.apply_op("AttrListMin", a=[17])
+      self.assertEqual(str(cm.exception),
+                       "Attr 'a' of 'AttrListMin' Op "
+                       "passed list of length 1 less than minimum 2.")
 
   def testAttrEnum(self):
-    self._add_op("name: 'AttrEnum' "
-                 "attr { name: 'a' type: 'string' "
-                 "  allowed_values { list { s: 'apples' s: 'oranges' } } }")
-
-    op = self._lib.apply_op("AttrEnum", a="oranges", name="e")
-    self.assertProtoEquals("""
-      name: 'e' op: 'AttrEnum' attr { key: 'a' value { s: 'oranges' } }
-      """, op.node_def)
-
-    with self.assertRaises(ValueError) as cm:
-      self._lib.apply_op("AttrEnum", a="invalid")
-    self.assertEqual(str(cm.exception),
-                     'Attr \'a\' of \'AttrEnum\' Op '
-                     'passed string \'invalid\' not in: '
-                     '"apples", "oranges".')
+    with ops.Graph().as_default():
+      op = self._lib.apply_op("AttrEnum", a="oranges", name="e")
+      self.assertProtoEquals("""
+        name: 'e' op: 'AttrEnum' attr { key: 'a' value { s: 'oranges' } }
+        """, op.node_def)
+
+      with self.assertRaises(ValueError) as cm:
+        self._lib.apply_op("AttrEnum", a="invalid")
+      self.assertEqual(str(cm.exception),
+                       'Attr \'a\' of \'AttrEnum\' Op '
+                       'passed string \'invalid\' not in: '
+                       '"apples", "oranges".')
 
   def testAttrEnumList(self):
-    self._add_op("name: 'AttrEnumList' "
-                 "attr { name: 'a' type: 'list(string)' "
-                 "  allowed_values { list { s: 'apples' s: 'oranges' } } }")
-
-    op = self._lib.apply_op("AttrEnumList", a=["oranges", "apples"], name="f")
-    self.assertProtoEquals("""
-      name: 'f' op: 'AttrEnumList'
-      attr { key: 'a' value { list { s: 'oranges' s: 'apples' } } }
-      """, op.node_def)
-
-    with self.assertRaises(ValueError) as cm:
-      self._lib.apply_op("AttrEnumList", a=["apples", "invalid", "oranges"])
-    self.assertEqual(str(cm.exception),
-                     'Attr \'a\' of \'AttrEnumList\' Op '
-                     'passed string \'invalid\' not '
-                     'in: "apples", "oranges".')
+    with ops.Graph().as_default():
+      op = self._lib.apply_op("AttrEnumList", a=["oranges", "apples"], name="f")
+      self.assertProtoEquals("""
+        name: 'f' op: 'AttrEnumList'
+        attr { key: 'a' value { list { s: 'oranges' s: 'apples' } } }
+        """, op.node_def)
+
+      with self.assertRaises(ValueError) as cm:
+        self._lib.apply_op("AttrEnumList", a=["apples", "invalid", "oranges"])
+      self.assertEqual(str(cm.exception),
+                       'Attr \'a\' of \'AttrEnumList\' Op '
+                       'passed string \'invalid\' not '
+                       'in: "apples", "oranges".')
 
   def testAttrShape(self):
-    self._add_op("name: 'AttrShape' attr { name: 'a' type: 'shape' }")
-
-    op = self._lib.apply_op("AttrShape", a=[5], name="s1")
-    self.assertProtoEquals("""
-      name: 's1' op: 'AttrShape'
-      attr { key: 'a' value { shape { dim { size: 5 } } } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("AttrShape", a=(4, 3, 2), name="s2")
-    self.assertProtoEquals("""
-      name: 's2' op: 'AttrShape'
-      attr { key: 'a' value {
-        shape { dim { size: 4 } dim { size: 3 } dim { size: 2 } } } }
-      """, op.node_def)
-
-    op = self._lib.apply_op(
-        "AttrShape", a=tensor_shape.TensorShape([3, 2]), name="s3")
-    self.assertProtoEquals("""
-      name: 's3' op: 'AttrShape'
-      attr { key: 'a' value {
-        shape { dim { size: 3 } dim { size: 2 } } } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("AttrShape", a=[], name="s4")
-    self.assertProtoEquals("""
-      name: 's4' op: 'AttrShape' attr { key: 'a' value { shape { } } }
-      """, op.node_def)
-
-    shape = tensor_shape_pb2.TensorShapeProto()
-    shape.dim.add().size = 6
-    shape.dim.add().size = 3
-    op = self._lib.apply_op("AttrShape", a=shape, name="s5")
-    self.assertProtoEquals("""
-      name: 's5' op: 'AttrShape'
-      attr { key: 'a' value { shape { dim { size: 6 } dim { size: 3 } } } }
-      """, op.node_def)
-
-    # TODO(josh11b): Re-enable this test once we stop promoting scalars to shapes.
-    # with self.assertRaises(TypeError) as cm:
-    #   self._lib.apply_op("AttrShape", a=5)
-    # self.assertEqual(str(cm.exception),
-    #                  "Don't know how to convert 5 to a TensorShapeProto for "
-    #                  "argument 'a'")
-
-    with self.assertRaises(TypeError):
-      self._lib.apply_op("AttrShape", a="ABC")
+    with ops.Graph().as_default():
+      op = self._lib.apply_op("AttrShape", a=[5], name="s1")
+      self.assertProtoEquals("""
+        name: 's1' op: 'AttrShape'
+        attr { key: 'a' value { shape { dim { size: 5 } } } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("AttrShape", a=(4, 3, 2), name="s2")
+      self.assertProtoEquals("""
+        name: 's2' op: 'AttrShape'
+        attr { key: 'a' value {
+          shape { dim { size: 4 } dim { size: 3 } dim { size: 2 } } } }
+        """, op.node_def)
+
+      op = self._lib.apply_op(
+          "AttrShape", a=tensor_shape.TensorShape([3, 2]), name="s3")
+      self.assertProtoEquals("""
+        name: 's3' op: 'AttrShape'
+        attr { key: 'a' value {
+          shape { dim { size: 3 } dim { size: 2 } } } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("AttrShape", a=[], name="s4")
+      self.assertProtoEquals("""
+        name: 's4' op: 'AttrShape' attr { key: 'a' value { shape { } } }
+        """, op.node_def)
+
+      shape = tensor_shape_pb2.TensorShapeProto()
+      shape.dim.add().size = 6
+      shape.dim.add().size = 3
+      op = self._lib.apply_op("AttrShape", a=shape, name="s5")
+      self.assertProtoEquals("""
+        name: 's5' op: 'AttrShape'
+        attr { key: 'a' value { shape { dim { size: 6 } dim { size: 3 } } } }
+        """, op.node_def)
+
+      # TODO(josh11b): Re-enable this test once we stop promoting scalars to
+      # shapes.
+      # with self.assertRaises(TypeError) as cm:
+      #   self._lib.apply_op("AttrShape", a=5)
+      # self.assertEqual(str(cm.exception),
+      #                  "Don't know how to convert 5 to a TensorShapeProto for"
+      #                  " argument 'a'")
+
+      with self.assertRaises(TypeError):
+        self._lib.apply_op("AttrShape", a="ABC")
 
   def testAttrShapeList(self):
-    self._add_op("name: 'AttrShapeList' attr { name: 'a' type: 'list(shape)' }")
-
-    op = self._lib.apply_op("AttrShapeList", a=[[3, 2], [6, 5, 4]], name="sl")
-    self.assertProtoEquals("""
-      name: 'sl' op: 'AttrShapeList'
-      attr { key: 'a' value { list {
-        shape { dim { size: 3 } dim { size: 2 } }
-        shape { dim { size: 6 } dim { size: 5 } dim { size: 4 } } } } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("AttrShapeList", a=[], name="esl")
-    self.assertProtoEquals("""
-      name: 'esl' op: 'AttrShapeList' attr { key: 'a' value { list { } } }
-      """, op.node_def)
+    with ops.Graph().as_default():
+      op = self._lib.apply_op("AttrShapeList", a=[[3, 2], [6, 5, 4]], name="sl")
+      self.assertProtoEquals("""
+        name: 'sl' op: 'AttrShapeList'
+        attr { key: 'a' value { list {
+          shape { dim { size: 3 } dim { size: 2 } }
+          shape { dim { size: 6 } dim { size: 5 } dim { size: 4 } } } } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("AttrShapeList", a=[], name="esl")
+      self.assertProtoEquals("""
+        name: 'esl' op: 'AttrShapeList' attr { key: 'a' value { list { } } }
+        """, op.node_def)
 
   def testAttrPartialShape(self):
-    self._add_op(
-        "name: 'AttrPartialShape' attr { name: 'a' type: 'shape' }")
-
-    op = self._lib.apply_op("AttrPartialShape", a=[5], name="s1")
-    self.assertProtoEquals("""
-      name: 's1' op: 'AttrPartialShape'
-      attr { key: 'a' value { shape { dim { size: 5 } } } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("AttrPartialShape", a=(4, None, 2), name="s2")
-    self.assertProtoEquals("""
-      name: 's2' op: 'AttrPartialShape'
-      attr { key: 'a' value {
-        shape { dim { size: 4 } dim { size: -1 } dim { size: 2 } } } }
-      """, op.node_def)
-
-    op = self._lib.apply_op(
-        "AttrPartialShape", a=tensor_shape.TensorShape([3, None]), name="s3")
-    self.assertProtoEquals("""
-      name: 's3' op: 'AttrPartialShape'
-      attr { key: 'a' value {
-        shape { dim { size: 3 } dim { size: -1 } } } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("AttrPartialShape", a=[], name="s4")
-    self.assertProtoEquals("""
-      name: 's4' op: 'AttrPartialShape'
-      attr { key: 'a' value { shape { } } }
-      """, op.node_def)
-
-    shape = tensor_shape_pb2.TensorShapeProto()
-    shape.dim.add().size = -1
-    shape.dim.add().size = 3
-    op = self._lib.apply_op("AttrPartialShape", a=shape, name="s5")
-    self.assertProtoEquals("""
-      name: 's5' op: 'AttrPartialShape'
-      attr { key: 'a' value {
-        shape { dim { size: -1 } dim { size: 3 } } } }
-      """, op.node_def)
-
-    # TODO(ebrevdo): Re-enable once we stop promoting scalars to shapes.
-    # with self.assertRaises(TypeError) as cm:
-    #   self._lib.apply_op("AttrPartialShape", a=5)
-    # self.assertEqual(str(cm.exception),
-    #                  "Don't know how to convert 5 to a TensorShapeProto for "
-    #                  "argument 'a'")
-
-    with self.assertRaises(TypeError):
-      self._lib.apply_op("AttrPartialShape", a="ABC")
+    with ops.Graph().as_default():
+      op = self._lib.apply_op("AttrPartialShape", a=[5], name="s1")
+      self.assertProtoEquals("""
+        name: 's1' op: 'AttrPartialShape'
+        attr { key: 'a' value { shape { dim { size: 5 } } } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("AttrPartialShape", a=(4, None, 2), name="s2")
+      self.assertProtoEquals("""
+        name: 's2' op: 'AttrPartialShape'
+        attr { key: 'a' value {
+          shape { dim { size: 4 } dim { size: -1 } dim { size: 2 } } } }
+        """, op.node_def)
+
+      op = self._lib.apply_op(
+          "AttrPartialShape", a=tensor_shape.TensorShape([3, None]), name="s3")
+      self.assertProtoEquals("""
+        name: 's3' op: 'AttrPartialShape'
+        attr { key: 'a' value {
+          shape { dim { size: 3 } dim { size: -1 } } } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("AttrPartialShape", a=[], name="s4")
+      self.assertProtoEquals("""
+        name: 's4' op: 'AttrPartialShape'
+        attr { key: 'a' value { shape { } } }
+        """, op.node_def)
+
+      shape = tensor_shape_pb2.TensorShapeProto()
+      shape.dim.add().size = -1
+      shape.dim.add().size = 3
+      op = self._lib.apply_op("AttrPartialShape", a=shape, name="s5")
+      self.assertProtoEquals("""
+        name: 's5' op: 'AttrPartialShape'
+        attr { key: 'a' value {
+          shape { dim { size: -1 } dim { size: 3 } } } }
+        """, op.node_def)
+
+      # TODO(ebrevdo): Re-enable once we stop promoting scalars to shapes.
+      # with self.assertRaises(TypeError) as cm:
+      #   self._lib.apply_op("AttrPartialShape", a=5)
+      # self.assertEqual(str(cm.exception),
+      #                  "Don't know how to convert 5 to a TensorShapeProto for"
+      #                  " argument 'a'")
+
+      with self.assertRaises(TypeError):
+        self._lib.apply_op("AttrPartialShape", a="ABC")
 
   def testAttrPartialShapeList(self):
-    self._add_op("""
-      name: 'AttrPartialShapeList'
-      attr { name: 'a' type: 'list(shape)' }
-    """)
-
-    op = self._lib.apply_op(
-        "AttrPartialShapeList", a=[[3, 2], [6, None, 4]], name="sl")
-    self.assertProtoEquals("""
-      name: 'sl' op: 'AttrPartialShapeList'
-      attr { key: 'a' value { list {
-        shape { dim { size: 3 } dim { size: 2 } }
-        shape { dim { size: 6 } dim { size: -1 } dim { size: 4 } } } } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("AttrPartialShapeList", a=[], name="esl")
-    self.assertProtoEquals("""
-      name: 'esl' op: 'AttrPartialShapeList' attr {
-        key: 'a' value { list { } } }
-      """, op.node_def)
+    with ops.Graph().as_default():
+      op = self._lib.apply_op(
+          "AttrPartialShapeList", a=[[3, 2], [6, None, 4]], name="sl")
+      self.assertProtoEquals("""
+        name: 'sl' op: 'AttrPartialShapeList'
+        attr { key: 'a' value { list {
+          shape { dim { size: 3 } dim { size: 2 } }
+          shape { dim { size: 6 } dim { size: -1 } dim { size: 4 } } } } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("AttrPartialShapeList", a=[], name="esl")
+      self.assertProtoEquals("""
+        name: 'esl' op: 'AttrPartialShapeList' attr {
+          key: 'a' value { list { } } }
+        """, op.node_def)
 
   def testAttrDefault(self):
-    self._add_op("name: 'AttrDefault' "
-                 "attr { name: 'a' type: 'string' "
-                 "  default_value { s: 'banana' } }")
-
-    op = self._lib.apply_op("AttrDefault", a=None, name="d")
-    self.assertProtoEquals("""
-      name: 'd' op: 'AttrDefault' attr { key: 'a' value { s: 'banana' } }
-      """, op.node_def)
+    with ops.Graph().as_default():
+      op = self._lib.apply_op("AttrDefault", a=None, name="d")
+      self.assertProtoEquals("""
+        name: 'd' op: 'AttrDefault' attr { key: 'a' value { s: 'banana' } }
+        """, op.node_def)
 
-    op = self._lib.apply_op("AttrDefault", a="kiwi", name="c")
-    self.assertProtoEquals("""
-      name: 'c' op: 'AttrDefault' attr { key: 'a' value { s: 'kiwi' } }
-      """, op.node_def)
+      op = self._lib.apply_op("AttrDefault", a="kiwi", name="c")
+      self.assertProtoEquals("""
+        name: 'c' op: 'AttrDefault' attr { key: 'a' value { s: 'kiwi' } }
+        """, op.node_def)
 
   def testAttrListDefault(self):
-    self._add_op("name: 'AttrListDefault' "
-                 "attr { name: 'a' type: 'list(int)' "
-                 "  default_value { list { i: 5 i: 15 } } }")
-
-    op = self._lib.apply_op("AttrListDefault", a=None, name="b")
-    self.assertProtoEquals("""
-      name: 'b' op: 'AttrListDefault'
-      attr { key: 'a' value { list { i: 5 i: 15 } } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("AttrListDefault", a=[3], name="a")
-    self.assertProtoEquals("""
-      name: 'a' op: 'AttrListDefault'
-      attr { key: 'a' value { list { i: 3 } } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("AttrListDefault", a=[], name="empty")
-    self.assertProtoEquals("""
-      name: 'empty' op: 'AttrListDefault'
-      attr { key: 'a' value { list { } } }
-      """, op.node_def)
+    with ops.Graph().as_default():
+      op = self._lib.apply_op("AttrListDefault", a=None, name="b")
+      self.assertProtoEquals("""
+        name: 'b' op: 'AttrListDefault'
+        attr { key: 'a' value { list { i: 5 i: 15 } } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("AttrListDefault", a=[3], name="a")
+      self.assertProtoEquals("""
+        name: 'a' op: 'AttrListDefault'
+        attr { key: 'a' value { list { i: 3 } } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("AttrListDefault", a=[], name="empty")
+      self.assertProtoEquals("""
+        name: 'empty' op: 'AttrListDefault'
+        attr { key: 'a' value { list { } } }
+        """, op.node_def)
 
   def testAttrEmptyListDefault(self):
-    self._add_op("name: 'AttrEmptyListDefault' "
-                 "attr { name: 'a' type: 'list(float)' "
-                 "       default_value { list { } } }")
-
-    op = self._lib.apply_op("AttrEmptyListDefault", a=None, name="b")
-    self.assertProtoEquals("""
-      name: 'b' op: 'AttrEmptyListDefault'
-      attr { key: 'a' value { list { } } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("AttrEmptyListDefault", a=[3], name="a")
-    self.assertProtoEquals("""
-      name: 'a' op: 'AttrEmptyListDefault'
-      attr { key: 'a' value { list { f: 3 } } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("AttrEmptyListDefault", a=[], name="empty")
-    self.assertProtoEquals("""
-      name: 'empty' op: 'AttrEmptyListDefault'
-      attr { key: 'a' value { list { } } }
-      """, op.node_def)
+    with ops.Graph().as_default():
+      op = self._lib.apply_op("AttrEmptyListDefault", a=None, name="b")
+      self.assertProtoEquals("""
+        name: 'b' op: 'AttrEmptyListDefault'
+        attr { key: 'a' value { list { } } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("AttrEmptyListDefault", a=[3], name="a")
+      self.assertProtoEquals("""
+        name: 'a' op: 'AttrEmptyListDefault'
+        attr { key: 'a' value { list { f: 3 } } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("AttrEmptyListDefault", a=[], name="empty")
+      self.assertProtoEquals("""
+        name: 'empty' op: 'AttrEmptyListDefault'
+        attr { key: 'a' value { list { } } }
+        """, op.node_def)
 
   def testReservedAttr(self):
-    self._add_op("name: 'ReservedAttr' "
-                 "attr { name: 'range' type: 'int' } ")
-    op = self._lib.apply_op("ReservedAttr", range_=7, name="x")
-    self.assertProtoEquals("""
-      name: 'x' op: 'ReservedAttr' attr { key: 'range' value { i: 7 } }
-      """, op.node_def)
+    with ops.Graph().as_default():
+      op = self._lib.apply_op("ReservedAttr", range_=7, name="x")
+      self.assertProtoEquals("""
+        name: 'x' op: 'ReservedAttr' attr { key: 'range' value { i: 7 } }
+        """, op.node_def)
 
   def testDefaultAttrType(self):
-    self._add_op("name: 'AttrTypeDefault' "
-                 "input_arg { name: 'a' type_attr: 'T' } "
-                 "attr { name: 'T' type: 'type' "
-                 "       default_value { type: DT_INT32 } }")
-
-    # Give an input whose type has no obvious output type.
-    op = self._lib.apply_op("AttrTypeDefault", a=[], name="n")
-    self.assertProtoEquals("""
-      name: 'n' op: 'AttrTypeDefault' input: 'n/a'
-      attr { key: 'T' value { type: DT_INT32 } }
-      """, op.node_def)
-
-    # Give an input whose type can be inferred as different
-    # than the default.
-    op = self._lib.apply_op("AttrTypeDefault", a=[1.0], name="f")
-    self.assertProtoEquals("""
-      name: 'f' op: 'AttrTypeDefault' input: 'f/a'
-      attr { key: 'T' value { type: DT_FLOAT } }
-      """, op.node_def)
+    with ops.Graph().as_default():
+      # Give an input whose type has no obvious output type.
+      op = self._lib.apply_op("AttrTypeDefault", a=[], name="n")
+      self.assertProtoEquals("""
+        name: 'n' op: 'AttrTypeDefault' input: 'n/a'
+        attr { key: 'T' value { type: DT_INT32 } }
+        """, op.node_def)
+
+      # Give an input whose type can be inferred as different
+      # than the default.
+      op = self._lib.apply_op("AttrTypeDefault", a=[1.0], name="f")
+      self.assertProtoEquals("""
+        name: 'f' op: 'AttrTypeDefault' input: 'f/a'
+        attr { key: 'T' value { type: DT_FLOAT } }
+        """, op.node_def)
 
   def testDefaultListAttrType(self):
-    self._add_op("name: 'AttrListTypeDefault' "
-                 "input_arg { name: 'a' type_attr: 'T' number_attr: 'N' } "
-                 "input_arg { name: 'b' type_attr: 'T' number_attr: 'N' } "
-                 "attr { name: 'T' type: 'type' "
-                 "       default_value { type: DT_INT32 } }"
-                 "attr { name: 'N' type: 'int' }")
-
-    # Give an input whose type can be inferred as different
-    # than the default.
-    op = self._lib.apply_op("AttrListTypeDefault", a=[1.0], b=[2.0], name="n")
-    self.assertProtoEquals("""
-      name: 'n' op: 'AttrListTypeDefault' input: 'n/a_0' input: 'n/b_0'
-      attr { key: 'T' value { type: DT_FLOAT } }
-      attr { key: 'N' value { i: 1 } }
-      """, op.node_def)
+    with ops.Graph().as_default():
+      # Give an input whose type can be inferred as different
+      # than the default.
+      op = self._lib.apply_op("AttrListTypeDefault", a=[1.0], b=[2.0], name="n")
+      self.assertProtoEquals("""
+        name: 'n' op: 'AttrListTypeDefault' input: 'n/a_0' input: 'n/b_0'
+        attr { key: 'T' value { type: DT_FLOAT } }
+        attr { key: 'N' value { i: 1 } }
+        """, op.node_def)
 
   def testNIntsIn(self):
-    self._add_op("name: 'NIntsIn' "
-                 "input_arg { name: 'a' type: DT_INT32 number_attr: 'N' } "
-                 "attr { name: 'N' type: 'int' has_minimum: true minimum: 2 }")
-
-    op = self._lib.apply_op("NIntsIn", a=[1, 2], name="n")
-    self.assertProtoEquals("""
-      name: 'n' op: 'NIntsIn' input: 'n/a_0' input: 'n/a_1'
-      attr { key: 'N' value { i: 2 } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("NIntsIn", a=[5, 4, 3, 2, 1], name="o")
-    self.assertProtoEquals("""
-      name: 'o' op: 'NIntsIn'
-      input: 'o/a_0' input: 'o/a_1' input: 'o/a_2' input: 'o/a_3' input: 'o/a_4'
-      attr { key: 'N' value { i: 5 } }
-      """, op.node_def)
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("NIntsIn", a=["foo", "bar"])
-    self.assertEqual(str(cm.exception),
-                     "Tensors in list passed to 'a' of 'NIntsIn' Op have types "
-                     "[string, string] that do not match expected type int32.")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("NIntsIn",
-                         a=[self.Tensor(dtypes.string),
-                            self.Tensor(dtypes.string)])
-    self.assertEqual(str(cm.exception),
-                     "Tensors in list passed to 'a' of 'NIntsIn' Op have "
-                     "types [string, string] that do not match expected type "
-                     "int32.")
-
-    with self.assertRaises(ValueError) as cm:
-      self._lib.apply_op("NIntsIn", a=[99])
-    self.assertEqual(str(cm.exception),
-                     "List argument 'a' to 'NIntsIn' Op "
-                     "with length 1 shorter than "
-                     "minimum length 2.")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("NIntsIn", a=[38, "bar"])
-    self.assertEqual(str(cm.exception),
-                     "Tensors in list passed to 'a' of 'NIntsIn' Op have types "
-                     "[int32, string] that do not match expected type int32.")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("NIntsIn",
-                         a=[self.Tensor(dtypes.int32),
-                            self.Tensor(dtypes.string)])
-    self.assertEqual(str(cm.exception),
-                     "Tensors in list passed to 'a' of 'NIntsIn' Op "
-                     "have types [int32, string] that do not match expected "
-                     "type int32.")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("NIntsIn", a=17)
-    self.assertStartsWith(str(cm.exception),
-                          "Expected list for 'a' argument "
-                          "to 'NIntsIn' Op, not ")
+    with ops.Graph().as_default():
+      op = self._lib.apply_op("NIntsIn", a=[1, 2], name="n")
+      self.assertProtoEquals("""
+        name: 'n' op: 'NIntsIn' input: 'n/a_0' input: 'n/a_1'
+        attr { key: 'N' value { i: 2 } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("NIntsIn", a=[5, 4, 3, 2, 1], name="o")
+      self.assertProtoEquals("""
+        name: 'o' op: 'NIntsIn'
+        input: 'o/a_0' input: 'o/a_1' input: 'o/a_2' input: 'o/a_3' input: 'o/a_4'
+        attr { key: 'N' value { i: 5 } }
+        """, op.node_def)
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("NIntsIn", a=["foo", "bar"])
+      self.assertEqual(
+          str(cm.exception),
+          "Tensors in list passed to 'a' of 'NIntsIn' Op have types "
+          "[string, string] that do not match expected type int32.")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("NIntsIn",
+                           a=[self.Tensor(dtypes.string),
+                              self.Tensor(dtypes.string)])
+      self.assertEqual(str(cm.exception),
+                       "Tensors in list passed to 'a' of 'NIntsIn' Op have "
+                       "types [string, string] that do not match expected type "
+                       "int32.")
+
+      with self.assertRaises(ValueError) as cm:
+        self._lib.apply_op("NIntsIn", a=[99])
+      self.assertEqual(str(cm.exception),
+                       "List argument 'a' to 'NIntsIn' Op "
+                       "with length 1 shorter than "
+                       "minimum length 2.")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("NIntsIn", a=[38, "bar"])
+      self.assertEqual(
+          str(cm.exception),
+          "Tensors in list passed to 'a' of 'NIntsIn' Op have types "
+          "[int32, string] that do not match expected type int32.")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("NIntsIn",
+                           a=[self.Tensor(dtypes.int32),
+                              self.Tensor(dtypes.string)])
+      self.assertEqual(str(cm.exception),
+                       "Tensors in list passed to 'a' of 'NIntsIn' Op "
+                       "have types [int32, string] that do not match expected "
+                       "type int32.")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("NIntsIn", a=17)
+      self.assertStartsWith(str(cm.exception),
+                            "Expected list for 'a' argument "
+                            "to 'NIntsIn' Op, not ")
 
   def testNPolymorphicIn(self):
-    self._add_op("name: 'NPolymorphicIn' "
-                 "input_arg { name: 'a' type_attr: 'T' number_attr: 'N' } "
-                 "attr { name: 'T' type: 'type' } "
-                 "attr { name: 'N' type: 'int' has_minimum: true minimum: 2 }")
-
-    op = self._lib.apply_op("NPolymorphicIn", a=[1, 2], name="n")
-    self.assertProtoEquals("""
-      name: 'n' op: 'NPolymorphicIn' input: 'n/a_0' input: 'n/a_1'
-      attr { key: 'T' value { type: DT_INT32 } }
-      attr { key: 'N' value { i: 2 } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("NPolymorphicIn", a=[5, 4, 3, 2, 1], name="o")
-    self.assertProtoEquals("""
-      name: 'o' op: 'NPolymorphicIn'
-      input: 'o/a_0' input: 'o/a_1' input: 'o/a_2' input: 'o/a_3' input: 'o/a_4'
-      attr { key: 'T' value { type: DT_INT32 } }
-      attr { key: 'N' value { i: 5 } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("NPolymorphicIn", a=["foo", "bar"], name="p")
-    self.assertProtoEquals("""
-      name: 'p' op: 'NPolymorphicIn' input: 'p/a_0' input: 'p/a_1'
-      attr { key: 'T' value { type: DT_STRING } }
-      attr { key: 'N' value { i: 2 } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("NPolymorphicIn",
-                            a=[1, self.Tensor(dtypes.float32, name="x")],
-                            name="q")
-    self.assertProtoEquals("""
-      name: 'q' op: 'NPolymorphicIn' input: 'q/a_0' input: 'x'
-      attr { key: 'T' value { type: DT_FLOAT } }
-      attr { key: 'N' value { i: 2 } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("NPolymorphicIn",
-                            a=[self.Tensor(dtypes.float32, name="y"),
-                               self.Tensor(dtypes.float32_ref, name="z")],
-                            name="r")
-    self.assertProtoEquals("""
-      name: 'r' op: 'NPolymorphicIn' input: 'y' input: 'z'
-      attr { key: 'T' value { type: DT_FLOAT } }
-      attr { key: 'N' value { i: 2 } }
-      """, op.node_def)
-
-    with self.assertRaises(ValueError) as cm:
-      self._lib.apply_op("NPolymorphicIn", a=[99])
-    self.assertEqual(str(cm.exception),
-                     "List argument 'a' to 'NPolymorphicIn' Op with length 1 "
-                     "shorter than minimum length 2.")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("NPolymorphicIn", a=[38, "bar"])
-    self.assertEqual(str(cm.exception),
-                     "Tensors in list passed to 'a' of 'NPolymorphicIn' Op "
-                     "have types [int32, string] that don't all match.")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("NPolymorphicIn", a=[38, self.Tensor(dtypes.string)])
-    self.assertEqual(str(cm.exception),
-                     "Tensors in list passed to 'a' of 'NPolymorphicIn' Op "
-                     "have types [int32, string] that don't all match.")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("NPolymorphicIn", a=[38, None])
-    self.assertEqual(str(cm.exception),
-                     "Tensors in list passed to 'a' of 'NPolymorphicIn' Op "
-                     "have types [int32, <NOT CONVERTIBLE TO TENSOR>] that "
-                     "don't all match.")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("NPolymorphicIn",
-                         a=["abcd", self.Tensor(dtypes.int32)])
-    self.assertEqual(str(cm.exception),
-                     "Tensors in list passed to 'a' of 'NPolymorphicIn' Op "
-                     "have types [string, int32] that don't all match.")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("NPolymorphicIn", a=17)
-    self.assertStartsWith(str(cm.exception),
-                          "Expected list for 'a' argument "
-                          "to 'NPolymorphicIn' Op, not ")
+    with ops.Graph().as_default():
+      op = self._lib.apply_op("NPolymorphicIn", a=[1, 2], name="n")
+      self.assertProtoEquals("""
+        name: 'n' op: 'NPolymorphicIn' input: 'n/a_0' input: 'n/a_1'
+        attr { key: 'T' value { type: DT_INT32 } }
+        attr { key: 'N' value { i: 2 } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("NPolymorphicIn", a=[5, 4, 3, 2, 1], name="o")
+      self.assertProtoEquals("""
+        name: 'o' op: 'NPolymorphicIn'
+        input: 'o/a_0' input: 'o/a_1' input: 'o/a_2' input: 'o/a_3' input: 'o/a_4'
+        attr { key: 'T' value { type: DT_INT32 } }
+        attr { key: 'N' value { i: 5 } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("NPolymorphicIn", a=["foo", "bar"], name="p")
+      self.assertProtoEquals("""
+        name: 'p' op: 'NPolymorphicIn' input: 'p/a_0' input: 'p/a_1'
+        attr { key: 'T' value { type: DT_STRING } }
+        attr { key: 'N' value { i: 2 } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("NPolymorphicIn",
+                              a=[1, self.Tensor(dtypes.float32, name="x")],
+                              name="q")
+      self.assertProtoEquals("""
+        name: 'q' op: 'NPolymorphicIn' input: 'q/a_0' input: 'x'
+        attr { key: 'T' value { type: DT_FLOAT } }
+        attr { key: 'N' value { i: 2 } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("NPolymorphicIn",
+                              a=[self.Tensor(dtypes.float32, name="y"),
+                                 self.Tensor(dtypes.float32_ref, name="z")],
+                              name="r")
+      self.assertProtoEquals("""
+        name: 'r' op: 'NPolymorphicIn' input: 'y' input: 'z'
+        attr { key: 'T' value { type: DT_FLOAT } }
+        attr { key: 'N' value { i: 2 } }
+        """, op.node_def)
+
+      with self.assertRaises(ValueError) as cm:
+        self._lib.apply_op("NPolymorphicIn", a=[99])
+      self.assertEqual(str(cm.exception),
+                       "List argument 'a' to 'NPolymorphicIn' Op with length 1 "
+                       "shorter than minimum length 2.")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("NPolymorphicIn", a=[38, "bar"])
+      self.assertEqual(str(cm.exception),
+                       "Tensors in list passed to 'a' of 'NPolymorphicIn' Op "
+                       "have types [int32, string] that don't all match.")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("NPolymorphicIn", a=[38, self.Tensor(dtypes.string)])
+      self.assertEqual(str(cm.exception),
+                       "Tensors in list passed to 'a' of 'NPolymorphicIn' Op "
+                       "have types [int32, string] that don't all match.")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("NPolymorphicIn", a=[38, None])
+      self.assertEqual(str(cm.exception),
+                       "Tensors in list passed to 'a' of 'NPolymorphicIn' Op "
+                       "have types [int32, <NOT CONVERTIBLE TO TENSOR>] that "
+                       "don't all match.")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("NPolymorphicIn",
+                           a=["abcd", self.Tensor(dtypes.int32)])
+      self.assertEqual(str(cm.exception),
+                       "Tensors in list passed to 'a' of 'NPolymorphicIn' Op "
+                       "have types [string, int32] that don't all match.")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("NPolymorphicIn", a=17)
+      self.assertStartsWith(str(cm.exception),
+                            "Expected list for 'a' argument "
+                            "to 'NPolymorphicIn' Op, not ")
 
   def testNPolymorphicRestrictIn(self):
-    self._add_op("name: 'NPolymorphicRestrictIn' "
-                 "input_arg { name: 'a' type_attr: 'T' number_attr: 'N' } "
-                 "attr { name: 'T' type: 'type' allowed_values { "
-                 "  list { type: DT_STRING type: DT_BOOL } } } "
-                 "attr { name: 'N' type: 'int' has_minimum: true minimum: 2 }")
-
-    op = self._lib.apply_op("NPolymorphicRestrictIn", a=["foo", "bar"],
-                            name="p")
-    self.assertProtoEquals("""
-      name: 'p' op: 'NPolymorphicRestrictIn' input: 'p/a_0' input: 'p/a_1'
-      attr { key: 'T' value { type: DT_STRING } }
-      attr { key: 'N' value { i: 2 } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("NPolymorphicRestrictIn",
-                            a=[False, True, False],
-                            name="b")
-    self.assertProtoEquals("""
-      name: 'b' op: 'NPolymorphicRestrictIn'
-      input: 'b/a_0' input: 'b/a_1' input: 'b/a_2'
-      attr { key: 'T' value { type: DT_BOOL } }
-      attr { key: 'N' value { i: 3 } }
-      """, op.node_def)
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("NPolymorphicRestrictIn", a=[1, 2])
-    self.assertEqual(str(cm.exception),
-                     "Value passed to parameter 'a' has DataType int32 not in "
-                     "list of allowed values: string, bool")
+    with ops.Graph().as_default():
+      op = self._lib.apply_op("NPolymorphicRestrictIn", a=["foo", "bar"],
+                              name="p")
+      self.assertProtoEquals("""
+        name: 'p' op: 'NPolymorphicRestrictIn' input: 'p/a_0' input: 'p/a_1'
+        attr { key: 'T' value { type: DT_STRING } }
+        attr { key: 'N' value { i: 2 } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("NPolymorphicRestrictIn",
+                              a=[False, True, False],
+                              name="b")
+      self.assertProtoEquals("""
+        name: 'b' op: 'NPolymorphicRestrictIn'
+        input: 'b/a_0' input: 'b/a_1' input: 'b/a_2'
+        attr { key: 'T' value { type: DT_BOOL } }
+        attr { key: 'N' value { i: 3 } }
+        """, op.node_def)
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("NPolymorphicRestrictIn", a=[1, 2])
+      self.assertEqual(
+          str(cm.exception),
+          "Value passed to parameter 'a' has DataType int32 not in "
+          "list of allowed values: string, bool")
 
   def testNInTwice(self):
-    self._add_op("name: 'NInTwice' "
-                 "input_arg { name: 'a' type: DT_INT32 number_attr: 'N' } "
-                 "input_arg { name: 'b' type: DT_STRING number_attr: 'N' } "
-                 "attr { name: 'N' type: 'int' has_minimum: true minimum: 0 }")
-
-    op = self._lib.apply_op("NInTwice", a=[1, 2], b=["one", "two"], name="n")
-    self.assertProtoEquals("""
-      name: 'n' op: 'NInTwice'
-      input: 'n/a_0' input: 'n/a_1' input: 'n/b_0' input: 'n/b_1'
-      attr { key: 'N' value { i: 2 } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("NInTwice", a=[], b=[], name="o")
-    self.assertProtoEquals("""
-      name: 'o' op: 'NInTwice' attr { key: 'N' value { i: 0 } }
-      """, op.node_def)
-
-    with self.assertRaises(ValueError) as cm:
-      self._lib.apply_op("NInTwice", a=[1, 2, 3], b=["too short"])
-    self.assertEqual(str(cm.exception),
-                     "List argument 'b' to 'NInTwice' Op "
-                     "with length 1 must match "
-                     "length 3 of argument 'a'.")
+    with ops.Graph().as_default():
+      op = self._lib.apply_op("NInTwice", a=[1, 2], b=["one", "two"], name="n")
+      self.assertProtoEquals("""
+        name: 'n' op: 'NInTwice'
+        input: 'n/a_0' input: 'n/a_1' input: 'n/b_0' input: 'n/b_1'
+        attr { key: 'N' value { i: 2 } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("NInTwice", a=[], b=[], name="o")
+      self.assertProtoEquals("""
+        name: 'o' op: 'NInTwice' attr { key: 'N' value { i: 0 } }
+        """, op.node_def)
+
+      with self.assertRaises(ValueError) as cm:
+        self._lib.apply_op("NInTwice", a=[1, 2, 3], b=["too short"])
+      self.assertEqual(str(cm.exception),
+                       "List argument 'b' to 'NInTwice' Op "
+                       "with length 1 must match "
+                       "length 3 of argument 'a'.")
 
   def testNInPolymorphicTwice(self):
-    self._add_op("name: 'NInPolymorphicTwice' "
-                 "input_arg { name: 'a' type_attr: 'T' number_attr: 'N' } "
-                 "input_arg { name: 'b' type_attr: 'T' number_attr: 'N' } "
-                 "attr { name: 'T' type: 'type' } "
-                 "attr { name: 'N' type: 'int' has_minimum: true minimum: 0 }")
-
-    op = self._lib.apply_op("NInPolymorphicTwice", a=[1, 2], b=[3, 4], name="n")
-    self.assertProtoEquals("""
-      name: 'n' op: 'NInPolymorphicTwice'
-      input: 'n/a_0' input: 'n/a_1' input: 'n/b_0' input: 'n/b_1'
-      attr { key: 'T' value { type: DT_INT32 } }
-      attr { key: 'N' value { i: 2 } }
-      """, op.node_def)
-
-    with self.assertRaises(ValueError) as cm:
-      self._lib.apply_op("NInPolymorphicTwice", a=[1, 2, 3], b=[5])
-    self.assertEqual(str(cm.exception),
-                     "List argument 'b' to 'NInPolymorphicTwice' Op "
-                     "with length 1 "
-                     "must match length 3 of argument 'a'.")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("NInPolymorphicTwice", a=[1, 2], b=["one", "two"])
-    self.assertEqual(str(cm.exception),
-                     "Tensors in list passed to 'b' of 'NInPolymorphicTwice' "
-                     "Op have types [string, string] that do not match type "
-                     "int32 inferred from earlier arguments.")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("NInPolymorphicTwice",
-                         a=[self.Tensor(dtypes.int32)],
-                         b=[self.Tensor(dtypes.string)])
-    self.assertEqual(str(cm.exception),
-                     "Tensors in list passed to 'b' of "
-                     "'NInPolymorphicTwice' Op have types [string] that do not "
-                     "match type int32 inferred from earlier arguments.")
+    with ops.Graph().as_default():
+      op = self._lib.apply_op("NInPolymorphicTwice", a=[1, 2], b=[3, 4],
+                              name="n")
+      self.assertProtoEquals("""
+        name: 'n' op: 'NInPolymorphicTwice'
+        input: 'n/a_0' input: 'n/a_1' input: 'n/b_0' input: 'n/b_1'
+        attr { key: 'T' value { type: DT_INT32 } }
+        attr { key: 'N' value { i: 2 } }
+        """, op.node_def)
+
+      with self.assertRaises(ValueError) as cm:
+        self._lib.apply_op("NInPolymorphicTwice", a=[1, 2, 3], b=[5])
+      self.assertEqual(str(cm.exception),
+                       "List argument 'b' to 'NInPolymorphicTwice' Op "
+                       "with length 1 "
+                       "must match length 3 of argument 'a'.")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("NInPolymorphicTwice", a=[1, 2], b=["one", "two"])
+      self.assertEqual(str(cm.exception),
+                       "Tensors in list passed to 'b' of 'NInPolymorphicTwice' "
+                       "Op have types [string, string] that do not match type "
+                       "int32 inferred from earlier arguments.")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("NInPolymorphicTwice",
+                           a=[self.Tensor(dtypes.int32)],
+                           b=[self.Tensor(dtypes.string)])
+      self.assertEqual(str(cm.exception),
+                       "Tensors in list passed to 'b' of "
+                       "'NInPolymorphicTwice' Op have types [string] that do "
+                       "not match type int32 inferred from earlier arguments.")
 
   def testNInTwoTypeVariables(self):
-    self._add_op("name: 'NInTwoTypeVariables' "
-                 "input_arg { name: 'a' type_attr: 'S' number_attr: 'N' } "
-                 "input_arg { name: 'b' type_attr: 'T' number_attr: 'N' } "
-                 "attr { name: 'S' type: 'type' } "
-                 "attr { name: 'T' type: 'type' } "
-                 "attr { name: 'N' type: 'int' has_minimum: true minimum: 0 }")
-
-    op = self._lib.apply_op("NInTwoTypeVariables",
-                            a=[1, 2],
-                            b=[True, False],
-                            name="n")
-    self.assertProtoEquals("""
-      name: 'n' op: 'NInTwoTypeVariables'
-      input: 'n/a_0' input: 'n/a_1' input: 'n/b_0' input: 'n/b_1'
-      attr { key: 'S' value { type: DT_INT32 } }
-      attr { key: 'T' value { type: DT_BOOL } }
-      attr { key: 'N' value { i: 2 } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("NInTwoTypeVariables", a=[1, 2], b=[3, 4], name="o")
-    self.assertProtoEquals("""
-      name: 'o' op: 'NInTwoTypeVariables'
-      input: 'o/a_0' input: 'o/a_1' input: 'o/b_0' input: 'o/b_1'
-      attr { key: 'S' value { type: DT_INT32 } }
-      attr { key: 'T' value { type: DT_INT32 } }
-      attr { key: 'N' value { i: 2 } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("NInTwoTypeVariables",
-                            a=[self.Tensor(dtypes.int32, name="q")],
-                            b=[self.Tensor(dtypes.string, name="r")],
-                            name="p")
-    self.assertProtoEquals("""
-      name: 'p' op: 'NInTwoTypeVariables' input: 'q' input: 'r'
-      attr { key: 'S' value { type: DT_INT32 } }
-      attr { key: 'T' value { type: DT_STRING } }
-      attr { key: 'N' value { i: 1 } }
-      """, op.node_def)
-
-    with self.assertRaises(ValueError) as cm:
-      self._lib.apply_op("NInTwoTypeVariables", a=[1, 2, 3], b=["5"])
-    self.assertEqual(str(cm.exception),
-                     "List argument 'b' to 'NInTwoTypeVariables' Op "
-                     "with length 1 "
-                     "must match length 3 of argument 'a'.")
+    with ops.Graph().as_default():
+      op = self._lib.apply_op("NInTwoTypeVariables",
+                              a=[1, 2],
+                              b=[True, False],
+                              name="n")
+      self.assertProtoEquals("""
+        name: 'n' op: 'NInTwoTypeVariables'
+        input: 'n/a_0' input: 'n/a_1' input: 'n/b_0' input: 'n/b_1'
+        attr { key: 'S' value { type: DT_INT32 } }
+        attr { key: 'T' value { type: DT_BOOL } }
+        attr { key: 'N' value { i: 2 } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("NInTwoTypeVariables", a=[1, 2], b=[3, 4],
+                              name="o")
+      self.assertProtoEquals("""
+        name: 'o' op: 'NInTwoTypeVariables'
+        input: 'o/a_0' input: 'o/a_1' input: 'o/b_0' input: 'o/b_1'
+        attr { key: 'S' value { type: DT_INT32 } }
+        attr { key: 'T' value { type: DT_INT32 } }
+        attr { key: 'N' value { i: 2 } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("NInTwoTypeVariables",
+                              a=[self.Tensor(dtypes.int32, name="q")],
+                              b=[self.Tensor(dtypes.string, name="r")],
+                              name="p")
+      self.assertProtoEquals("""
+        name: 'p' op: 'NInTwoTypeVariables' input: 'q' input: 'r'
+        attr { key: 'S' value { type: DT_INT32 } }
+        attr { key: 'T' value { type: DT_STRING } }
+        attr { key: 'N' value { i: 1 } }
+        """, op.node_def)
+
+      with self.assertRaises(ValueError) as cm:
+        self._lib.apply_op("NInTwoTypeVariables", a=[1, 2, 3], b=["5"])
+      self.assertEqual(str(cm.exception),
+                       "List argument 'b' to 'NInTwoTypeVariables' Op "
+                       "with length 1 "
+                       "must match length 3 of argument 'a'.")
 
   def testInPolymorphicTwice(self):
-    self._add_op("name: 'InPolymorphicTwice' "
-                 "input_arg { name: 'a' type_attr: 'T' number_attr: 'N' } "
-                 "input_arg { name: 'b' type_attr: 'T' number_attr: 'M' } "
-                 "attr { name: 'T' type: 'type' } "
-                 "attr { name: 'N' type: 'int' has_minimum: true minimum: 0 } "
-                 "attr { name: 'M' type: 'int' has_minimum: true minimum: 0 } ")
-
-    op = self._lib.apply_op("InPolymorphicTwice", a=[8], b=[3, 4, 5], name="n")
-    self.assertProtoEquals("""
-      name: 'n' op: 'InPolymorphicTwice'
-      input: 'n/a_0' input: 'n/b_0' input: 'n/b_1' input: 'n/b_2'
-      attr { key: 'T' value { type: DT_INT32 } }
-      attr { key: 'N' value { i: 1 } }
-      attr { key: 'M' value { i: 3 } }
-      """, op.node_def)
-
-    op = self._lib.apply_op("InPolymorphicTwice", a=[8], b=[], name="o")
-    self.assertProtoEquals("""
-      name: 'o' op: 'InPolymorphicTwice' input: 'o/a_0'
-      attr { key: 'T' value { type: DT_INT32 } }
-      attr { key: 'N' value { i: 1 } }
-      attr { key: 'M' value { i: 0 } }
-      """, op.node_def)
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("InPolymorphicTwice", a=[], b=[3, 4, 5])
-    self.assertEqual(str(cm.exception),
-                     "Don't know how to infer type variable from empty input "
-                     "list passed to input 'a' of 'InPolymorphicTwice' Op.")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("InPolymorphicTwice", a=[1, 2], b=["one", "two"])
-    self.assertEqual(str(cm.exception),
-                     "Tensors in list passed to 'b' of 'InPolymorphicTwice' Op "
-                     "have types [string, string] that do not match type int32 "
-                     "inferred from earlier arguments.")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("InPolymorphicTwice",
-                         a=[self.Tensor(dtypes.int32)],
-                         b=[self.Tensor(dtypes.string)])
-    self.assertEqual(str(cm.exception),
-                     "Tensors in list passed to 'b' of 'InPolymorphicTwice' "
-                     "Op have types [string] that do not match type int32 "
-                     "inferred from earlier arguments.")
+    with ops.Graph().as_default():
+      op = self._lib.apply_op("InPolymorphicTwice", a=[8], b=[3, 4, 5],
+                              name="n")
+      self.assertProtoEquals("""
+        name: 'n' op: 'InPolymorphicTwice'
+        input: 'n/a_0' input: 'n/b_0' input: 'n/b_1' input: 'n/b_2'
+        attr { key: 'T' value { type: DT_INT32 } }
+        attr { key: 'N' value { i: 1 } }
+        attr { key: 'M' value { i: 3 } }
+        """, op.node_def)
+
+      op = self._lib.apply_op("InPolymorphicTwice", a=[8], b=[], name="o")
+      self.assertProtoEquals("""
+        name: 'o' op: 'InPolymorphicTwice' input: 'o/a_0'
+        attr { key: 'T' value { type: DT_INT32 } }
+        attr { key: 'N' value { i: 1 } }
+        attr { key: 'M' value { i: 0 } }
+        """, op.node_def)
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("InPolymorphicTwice", a=[], b=[3, 4, 5])
+      self.assertEqual(str(cm.exception),
+                       "Don't know how to infer type variable from empty input "
+                       "list passed to input 'a' of 'InPolymorphicTwice' Op.")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("InPolymorphicTwice", a=[1, 2], b=["one", "two"])
+      self.assertEqual(
+          str(cm.exception),
+          "Tensors in list passed to 'b' of 'InPolymorphicTwice' Op "
+          "have types [string, string] that do not match type int32 "
+          "inferred from earlier arguments.")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("InPolymorphicTwice",
+                           a=[self.Tensor(dtypes.int32)],
+                           b=[self.Tensor(dtypes.string)])
+      self.assertEqual(str(cm.exception),
+                       "Tensors in list passed to 'b' of 'InPolymorphicTwice' "
+                       "Op have types [string] that do not match type int32 "
+                       "inferred from earlier arguments.")
 
   def testNIntsOut(self):
-    self._add_op("name: 'NIntsOut' "
-                 "output_arg { name: 'a' type: DT_INT32 number_attr: 'N' } "
-                 "attr { name: 'N' type: 'int' has_minimum: true minimum: 2 }")
-
-    out1, out2 = self._lib.apply_op("NIntsOut", N=2, name="n")
-    self.assertEqual(dtypes.int32, out1.dtype)
-    self.assertEqual(dtypes.int32, out2.dtype)
-    self.assertProtoEquals("""
-      name: 'n' op: 'NIntsOut' attr { key: 'N' value { i: 2 } }
-      """, out1.op.node_def)
-
-    out1, out2, out3, out4, out5 = self._lib.apply_op(
-        "NIntsOut", N=5, name="o")
-    self.assertEqual(dtypes.int32, out1.dtype)
-    self.assertEqual(dtypes.int32, out2.dtype)
-    self.assertEqual(dtypes.int32, out3.dtype)
-    self.assertEqual(dtypes.int32, out4.dtype)
-    self.assertEqual(dtypes.int32, out5.dtype)
-    self.assertProtoEquals("""
-      name: 'o' op: 'NIntsOut' attr { key: 'N' value { i: 5 } }
-      """, out5.op.node_def)
-
-    with self.assertRaises(ValueError) as cm:
-      self._lib.apply_op("NIntsOut", N=1)
-    self.assertEqual(str(cm.exception),
-                     "Attr 'N' of 'NIntsOut' Op passed 1 less than minimum 2.")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("NIntsOut", N=[3])
-    self.assertEqual(str(cm.exception),
-                     "Expected int for argument 'N' not [3].")
+    with ops.Graph().as_default():
+      out1, out2 = self._lib.apply_op("NIntsOut", N=2, name="n")
+      self.assertEqual(dtypes.int32, out1.dtype)
+      self.assertEqual(dtypes.int32, out2.dtype)
+      self.assertProtoEquals("""
+        name: 'n' op: 'NIntsOut' attr { key: 'N' value { i: 2 } }
+        """, out1.op.node_def)
+
+      out1, out2, out3, out4, out5 = self._lib.apply_op(
+          "NIntsOut", N=5, name="o")
+      self.assertEqual(dtypes.int32, out1.dtype)
+      self.assertEqual(dtypes.int32, out2.dtype)
+      self.assertEqual(dtypes.int32, out3.dtype)
+      self.assertEqual(dtypes.int32, out4.dtype)
+      self.assertEqual(dtypes.int32, out5.dtype)
+      self.assertProtoEquals("""
+        name: 'o' op: 'NIntsOut' attr { key: 'N' value { i: 5 } }
+        """, out5.op.node_def)
+
+      with self.assertRaises(ValueError) as cm:
+        self._lib.apply_op("NIntsOut", N=1)
+      self.assertEqual(
+          str(cm.exception),
+          "Attr 'N' of 'NIntsOut' Op passed 1 less than minimum 2.")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("NIntsOut", N=[3])
+      self.assertEqual(str(cm.exception),
+                       "Expected int for argument 'N' not [3].")
 
   def testNIntsOutDefault(self):
-    self._add_op("name: 'NIntsOutDefault' "
-                 "output_arg { name: 'a' type: DT_INT32 number_attr: 'N' } "
-                 "attr { name: 'N' type: 'int' has_minimum: true minimum: 2"
-                 "  default_value { i:3 } }")
-
-    out1, out2, out3 = self._lib.apply_op(
-        "NIntsOutDefault", N=None, name="z")
-    self.assertEqual(dtypes.int32, out1.dtype)
-    self.assertEqual(dtypes.int32, out2.dtype)
-    self.assertEqual(dtypes.int32, out3.dtype)
-    self.assertProtoEquals("""
-      name: 'z' op: 'NIntsOutDefault' attr { key: 'N' value { i: 3 } }
-      """, out1.op.node_def)
-
-    out1, out2 = self._lib.apply_op("NIntsOutDefault", N=2, name="y")
-    self.assertEqual(dtypes.int32, out1.dtype)
-    self.assertEqual(dtypes.int32, out2.dtype)
-    self.assertProtoEquals("""
-      name: 'y' op: 'NIntsOutDefault' attr { key: 'N' value { i: 2 } }
-      """, out2.op.node_def)
+    with ops.Graph().as_default():
+      out1, out2, out3 = self._lib.apply_op(
+          "NIntsOutDefault", N=None, name="z")
+      self.assertEqual(dtypes.int32, out1.dtype)
+      self.assertEqual(dtypes.int32, out2.dtype)
+      self.assertEqual(dtypes.int32, out3.dtype)
+      self.assertProtoEquals("""
+        name: 'z' op: 'NIntsOutDefault' attr { key: 'N' value { i: 3 } }
+        """, out1.op.node_def)
+
+      out1, out2 = self._lib.apply_op("NIntsOutDefault", N=2, name="y")
+      self.assertEqual(dtypes.int32, out1.dtype)
+      self.assertEqual(dtypes.int32, out2.dtype)
+      self.assertProtoEquals("""
+        name: 'y' op: 'NIntsOutDefault' attr { key: 'N' value { i: 2 } }
+        """, out2.op.node_def)
 
   def testNPolymorphicOut(self):
-    self._add_op("name: 'NPolymorphicOut' "
-                 "output_arg { name: 'a' type_attr: 'T' number_attr: 'N' } "
-                 "attr { name: 'T' type: 'type' } "
-                 "attr { name: 'N' type: 'int' has_minimum: true minimum: 2 }")
-
-    out1, out2 = self._lib.apply_op("NPolymorphicOut",
-                                    N=2,
-                                    T=dtypes.int32,
-                                    name="n")
-    self.assertEqual(dtypes.int32, out1.dtype)
-    self.assertEqual(dtypes.int32, out2.dtype)
-    self.assertProtoEquals("""
-      name: 'n' op: 'NPolymorphicOut'
-      attr { key: 'T' value { type: DT_INT32 } }
-      attr { key: 'N' value { i: 2 } }
-      """, out1.op.node_def)
-
-    out1, out2, out3 = self._lib.apply_op(
-        "NPolymorphicOut", T=dtypes.string, N=3, name="o")
-    self.assertEqual(dtypes.string, out1.dtype)
-    self.assertEqual(dtypes.string, out2.dtype)
-    self.assertEqual(dtypes.string, out3.dtype)
-    self.assertProtoEquals("""
-      name: 'o' op: 'NPolymorphicOut'
-      attr { key: 'T' value { type: DT_STRING } }
-      attr { key: 'N' value { i: 3 } }
-      """, out3.op.node_def)
-
-    with self.assertRaises(ValueError) as cm:
-      self._lib.apply_op("NPolymorphicOut", N=1, T=dtypes.string)
-    self.assertEqual(str(cm.exception),
-                     "Attr 'N' of 'NPolymorphicOut' Op "
-                     "passed 1 less than minimum 2.")
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("NPolymorphicOut", N=3, T=[dtypes.string])
-    self.assertEqual(
-        str(cm.exception),
-        "Expected DataType for argument 'T' not [tf.string].")
+    with ops.Graph().as_default():
+      out1, out2 = self._lib.apply_op("NPolymorphicOut",
+                                      N=2,
+                                      T=dtypes.int32,
+                                      name="n")
+      self.assertEqual(dtypes.int32, out1.dtype)
+      self.assertEqual(dtypes.int32, out2.dtype)
+      self.assertProtoEquals("""
+        name: 'n' op: 'NPolymorphicOut'
+        attr { key: 'T' value { type: DT_INT32 } }
+        attr { key: 'N' value { i: 2 } }
+        """, out1.op.node_def)
+
+      out1, out2, out3 = self._lib.apply_op(
+          "NPolymorphicOut", T=dtypes.string, N=3, name="o")
+      self.assertEqual(dtypes.string, out1.dtype)
+      self.assertEqual(dtypes.string, out2.dtype)
+      self.assertEqual(dtypes.string, out3.dtype)
+      self.assertProtoEquals("""
+        name: 'o' op: 'NPolymorphicOut'
+        attr { key: 'T' value { type: DT_STRING } }
+        attr { key: 'N' value { i: 3 } }
+        """, out3.op.node_def)
+
+      with self.assertRaises(ValueError) as cm:
+        self._lib.apply_op("NPolymorphicOut", N=1, T=dtypes.string)
+      self.assertEqual(str(cm.exception),
+                       "Attr 'N' of 'NPolymorphicOut' Op "
+                       "passed 1 less than minimum 2.")
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("NPolymorphicOut", N=3, T=[dtypes.string])
+      self.assertEqual(
+          str(cm.exception),
+          "Expected DataType for argument 'T' not [tf.string].")
 
   def testNPolymorphicOutDefault(self):
-    self._add_op("name: 'NPolymorphicOutDefault' "
-                 "output_arg { name: 'a' type_attr: 'T' number_attr: 'N' } "
-                 "attr { name: 'T' type: 'type'"
-                 "  default_value { type: DT_BOOL } } "
-                 "attr { name: 'N' type: 'int' has_minimum: true minimum: 2 "
-                 "  default_value { i: 2 } }")
-
-    out1, out2 = self._lib.apply_op(
-        "NPolymorphicOutDefault", N=None, T=None, name="r")
-    self.assertEqual(dtypes.bool, out1.dtype)
-    self.assertEqual(dtypes.bool, out2.dtype)
-    self.assertProtoEquals("""
-      name: 'r' op: 'NPolymorphicOutDefault'
-      attr { key: 'T' value { type: DT_BOOL } }
-      attr { key: 'N' value { i: 2 } }
-      """, out1.op.node_def)
-
-    out1, out2, out3 = self._lib.apply_op(
-        "NPolymorphicOutDefault", N=3, T=None, name="s")
-    self.assertEqual(dtypes.bool, out1.dtype)
-    self.assertEqual(dtypes.bool, out2.dtype)
-    self.assertEqual(dtypes.bool, out3.dtype)
-    self.assertProtoEquals("""
-      name: 's' op: 'NPolymorphicOutDefault'
-      attr { key: 'T' value { type: DT_BOOL } }
-      attr { key: 'N' value { i: 3 } }
-      """, out1.op.node_def)
-
-    out1, out2 = self._lib.apply_op(
-        "NPolymorphicOutDefault", N=None, T=dtypes.int32, name="t")
-    self.assertEqual(dtypes.int32, out1.dtype)
-    self.assertEqual(dtypes.int32, out2.dtype)
-    self.assertProtoEquals("""
-      name: 't' op: 'NPolymorphicOutDefault'
-      attr { key: 'T' value { type: DT_INT32 } }
-      attr { key: 'N' value { i: 2 } }
-      """, out1.op.node_def)
-
-    out1, out2, out3 = self._lib.apply_op(
-        "NPolymorphicOutDefault", N=3, T=dtypes.int32, name="u")
-    self.assertEqual(dtypes.int32, out1.dtype)
-    self.assertEqual(dtypes.int32, out2.dtype)
-    self.assertEqual(dtypes.int32, out3.dtype)
-    self.assertProtoEquals("""
-      name: 'u' op: 'NPolymorphicOutDefault'
-      attr { key: 'T' value { type: DT_INT32 } }
-      attr { key: 'N' value { i: 3 } }
-      """, out1.op.node_def)
+    with ops.Graph().as_default():
+      out1, out2 = self._lib.apply_op(
+          "NPolymorphicOutDefault", N=None, T=None, name="r")
+      self.assertEqual(dtypes.bool, out1.dtype)
+      self.assertEqual(dtypes.bool, out2.dtype)
+      self.assertProtoEquals("""
+        name: 'r' op: 'NPolymorphicOutDefault'
+        attr { key: 'T' value { type: DT_BOOL } }
+        attr { key: 'N' value { i: 2 } }
+        """, out1.op.node_def)
+
+      out1, out2, out3 = self._lib.apply_op(
+          "NPolymorphicOutDefault", N=3, T=None, name="s")
+      self.assertEqual(dtypes.bool, out1.dtype)
+      self.assertEqual(dtypes.bool, out2.dtype)
+      self.assertEqual(dtypes.bool, out3.dtype)
+      self.assertProtoEquals("""
+        name: 's' op: 'NPolymorphicOutDefault'
+        attr { key: 'T' value { type: DT_BOOL } }
+        attr { key: 'N' value { i: 3 } }
+        """, out1.op.node_def)
+
+      out1, out2 = self._lib.apply_op(
+          "NPolymorphicOutDefault", N=None, T=dtypes.int32, name="t")
+      self.assertEqual(dtypes.int32, out1.dtype)
+      self.assertEqual(dtypes.int32, out2.dtype)
+      self.assertProtoEquals("""
+        name: 't' op: 'NPolymorphicOutDefault'
+        attr { key: 'T' value { type: DT_INT32 } }
+        attr { key: 'N' value { i: 2 } }
+        """, out1.op.node_def)
+
+      out1, out2, out3 = self._lib.apply_op(
+          "NPolymorphicOutDefault", N=3, T=dtypes.int32, name="u")
+      self.assertEqual(dtypes.int32, out1.dtype)
+      self.assertEqual(dtypes.int32, out2.dtype)
+      self.assertEqual(dtypes.int32, out3.dtype)
+      self.assertProtoEquals("""
+        name: 'u' op: 'NPolymorphicOutDefault'
+        attr { key: 'T' value { type: DT_INT32 } }
+        attr { key: 'N' value { i: 3 } }
+        """, out1.op.node_def)
 
   def testNPolymorphicRestrictOut(self):
-    self._add_op("name: 'NPolymorphicRestrictOut' "
-                 "output_arg { name: 'a' type_attr: 'T' number_attr: 'N' } "
-                 "attr { name: 'T' type: 'type' allowed_values { "
-                 "  list { type: DT_STRING type: DT_BOOL } } } "
-                 "attr { name: 'N' type: 'int' has_minimum: true minimum: 2 }")
-
-    out1, out2, out3 = self._lib.apply_op(
-        "NPolymorphicRestrictOut", N=3, T=dtypes.bool, name="u")
-    self.assertEqual(dtypes.bool, out1.dtype)
-    self.assertEqual(dtypes.bool, out2.dtype)
-    self.assertEqual(dtypes.bool, out3.dtype)
-    self.assertProtoEquals("""
-      name: 'u' op: 'NPolymorphicRestrictOut'
-      attr { key: 'T' value { type: DT_BOOL } }
-      attr { key: 'N' value { i: 3 } }
-      """, out1.op.node_def)
-
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("NPolymorphicRestrictOut", N=2, T=dtypes.int32)
-    self.assertEqual(str(cm.exception),
-                     "Value passed to parameter 'T' has DataType int32 "
-                     "not in list of allowed values: string, bool")
+    with ops.Graph().as_default():
+      out1, out2, out3 = self._lib.apply_op(
+          "NPolymorphicRestrictOut", N=3, T=dtypes.bool, name="u")
+      self.assertEqual(dtypes.bool, out1.dtype)
+      self.assertEqual(dtypes.bool, out2.dtype)
+      self.assertEqual(dtypes.bool, out3.dtype)
+      self.assertProtoEquals("""
+        name: 'u' op: 'NPolymorphicRestrictOut'
+        attr { key: 'T' value { type: DT_BOOL } }
+        attr { key: 'N' value { i: 3 } }
+        """, out1.op.node_def)
+
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("NPolymorphicRestrictOut", N=2, T=dtypes.int32)
+      self.assertEqual(str(cm.exception),
+                       "Value passed to parameter 'T' has DataType int32 "
+                       "not in list of allowed values: string, bool")
 
   def testRef(self):
-    self._add_op("name: 'RefIn' "
-                 "input_arg { name: 'a' type_attr: 'T' is_ref: true } "
-                 "attr { name: 'T' type: 'type' } ")
-    self._add_op("name: 'TwoRefsIn' "
-                 "input_arg { name: 'a' type_attr: 'T' is_ref: true } "
-                 "input_arg { name: 'b' type_attr: 'T' is_ref: true } "
-                 "attr { name: 'T' type: 'type' } ")
-    self._add_op("name: 'RefOut' "
-                 "output_arg { name: 'a' type_attr: 'T' is_ref: true } "
-                 "attr { name: 'T' type: 'type' } ")
-
-    out = self._lib.apply_op("RefOut", T=dtypes.bool, name="o")
-    self.assertEqual(dtypes.bool_ref, out.dtype)
-    self.assertProtoEquals("""
-      name: 'o' op: 'RefOut'
-      attr { key: 'T' value { type: DT_BOOL } }
-      """, out.op.node_def)
-
-    op = self._lib.apply_op("RefIn", a=out, name="i")
-    self.assertProtoEquals("""
-      name: 'i' op: 'RefIn' input: 'o'
-      attr { key: 'T' value { type: DT_BOOL } }
-      attr { key: "_class" value { list { s: "loc:@o" } } }
-      """, op.node_def)
-
-    # Can pass ref to non-ref input.
-    out = self._lib.apply_op("RefOut", T=dtypes.int32, name="r")
-    out = self._lib.apply_op("Simple", a=out, name="s")
-    self.assertProtoEquals("""
-      name: 's' op: 'Simple' input: 'r'
-      """, out.op.node_def)
-
-    # Can't pass non-ref to ref input.
-    with self.assertRaises(TypeError) as cm:
-      self._lib.apply_op("RefIn", a=2)
-    self.assertEqual(str(cm.exception),
-                     "'RefIn' Op requires that input 'a' be a mutable tensor " +
-                     "(e.g.: a tf.Variable)")
-
-    input_a = self._lib.apply_op("RefOut", T=dtypes.int32, name="t")
-    input_b = self._lib.apply_op("RefOut", T=dtypes.int32, name="u")
-    op = self._lib.apply_op("TwoRefsIn", a=input_a, b=input_b, name="v")
-    # NOTE(mrry): The order of colocation constraints is an implementation
-    # detail.
-    self.assertProtoEquals("""
-      name: 'v' op: 'TwoRefsIn' input: 't' input: 'u'
-      attr { key: 'T' value { type: DT_INT32 } }
-      attr { key: "_class" value { list { s: "loc:@t" s: "loc:@u" } } }
-      """, op.node_def)
+    with ops.Graph().as_default():
+      out = self._lib.apply_op("RefOut", T=dtypes.bool, name="o")
+      self.assertEqual(dtypes.bool_ref, out.dtype)
+      self.assertProtoEquals("""
+        name: 'o' op: 'RefOut'
+        attr { key: 'T' value { type: DT_BOOL } }
+        """, out.op.node_def)
+
+      op = self._lib.apply_op("RefIn", a=out, name="i")
+      self.assertProtoEquals("""
+        name: 'i' op: 'RefIn' input: 'o'
+        attr { key: 'T' value { type: DT_BOOL } }
+        attr { key: "_class" value { list { s: "loc:@o" } } }
+        """, op.node_def)
+
+      # Can pass ref to non-ref input.
+      out = self._lib.apply_op("RefOut", T=dtypes.int32, name="r")
+      out = self._lib.apply_op("Simple", a=out, name="s")
+      self.assertProtoEquals("""
+        name: 's' op: 'Simple' input: 'r'
+        """, out.op.node_def)
+
+      # Can't pass non-ref to ref input.
+      with self.assertRaises(TypeError) as cm:
+        self._lib.apply_op("RefIn", a=2)
+      self.assertEqual(
+          str(cm.exception),
+          "'RefIn' Op requires that input 'a' be a mutable tensor " +
+          "(e.g.: a tf.Variable)")
+
+      input_a = self._lib.apply_op("RefOut", T=dtypes.int32, name="t")
+      input_b = self._lib.apply_op("RefOut", T=dtypes.int32, name="u")
+      op = self._lib.apply_op("TwoRefsIn", a=input_a, b=input_b, name="v")
+      # NOTE(mrry): The order of colocation constraints is an implementation
+      # detail.
+      self.assertProtoEquals("""
+        name: 'v' op: 'TwoRefsIn' input: 't' input: 'u'
+        attr { key: 'T' value { type: DT_INT32 } }
+        attr { key: "_class" value { list { s: "loc:@t" s: "loc:@u" } } }
+        """, op.node_def)
 
   def testSpecifyDevice(self):
-    with self._g.device("/job:ADevice"):
-      self._lib.apply_op("Simple", a=3)
-    # We look at the whole graph here to make sure the Const op is also given
-    # the specified device.
-    graph_def = self._g.as_graph_def()
-    self.assertEqual(len(graph_def.node), 2)
-    for node in graph_def.node:
-      self.assertDeviceEqual(node.device, "/job:ADevice")
+    graph = ops.Graph()
+    with graph.as_default():
+      with graph.device("/job:ADevice"):
+        self._lib.apply_op("Simple", a=3)
+      # We look at the whole graph here to make sure the Const op is also given
+      # the specified device.
+      graph_def = graph.as_graph_def()
+      self.assertEqual(len(graph_def.node), 2)
+      for node in graph_def.node:
+        self.assertDeviceEqual(node.device, "/job:ADevice")
 
   def testStructuredOutputSingleList(self):
-    self._add_op("name: 'SimpleStruct' "
-                 "output_arg { name: 'a' type: DT_INT32 number_attr: 'n_a' } "
-                 "attr { name: 'n_a' type: 'int' }")
-    for n_a in [0, 1, 3]:
-      a = self._lib.apply_op("SimpleStruct", n_a=n_a)
-      self.assertTrue(isinstance(a, list))
-      self.assertEqual(n_a, len(a))
+    with ops.Graph().as_default():
+      for n_a in [0, 1, 3]:
+        a = self._lib.apply_op("SimpleStruct", n_a=n_a)
+        self.assertTrue(isinstance(a, list))
+        self.assertEqual(n_a, len(a))
 
   def testStructuredOutputListAndSingle(self):
-    self._add_op("name: 'MixedStruct' "
-                 "output_arg { name: 'a' type: DT_INT32 number_attr: 'n_a' } "
-                 "output_arg { name: 'b' type: DT_FLOAT } "
-                 "attr { name: 'n_a' type: 'int' }")
-    for n_a in [0, 1, 3]:
-      a, b = self._lib.apply_op("MixedStruct", n_a=n_a)
-      self.assertTrue(isinstance(a, list))
-      self.assertEqual(n_a, len(a))
-      self.assertTrue(all(x.dtype == dtypes.int32 for x in a))
-      self.assertTrue(isinstance(b, ops.Tensor))
-      self.assertEqual(dtypes.float32, b.dtype)
+    with ops.Graph().as_default():
+      for n_a in [0, 1, 3]:
+        a, b = self._lib.apply_op("MixedStruct", n_a=n_a)
+        self.assertTrue(isinstance(a, list))
+        self.assertEqual(n_a, len(a))
+        self.assertTrue(all(x.dtype == dtypes.int32 for x in a))
+        self.assertTrue(isinstance(b, ops.Tensor))
+        self.assertEqual(dtypes.float32, b.dtype)
 
   def testStructuredOutputMultipleLists(self):
-    self._add_op("name: 'ComplexStruct' "
-                 "output_arg { name: 'a' type: DT_INT32 number_attr: 'n_a' } "
-                 "output_arg { name: 'b' type: DT_INT64 number_attr: 'n_b' } "
-                 "output_arg { name: 'c' type_list_attr: 't_c' } "
-                 "attr { name: 'n_a' type: 'int' } "
-                 "attr { name: 'n_b' type: 'int' } "
-                 "attr { name: 't_c' type: 'list(type)' }")
-    for n_a in [0, 1, 3]:
-      for n_b in [0, 1, 3]:
-        for t_c in [[],
-                    [dtypes.int32],
-                    [dtypes.int32, dtypes.float32]]:
-          a, b, c = self._lib.apply_op("ComplexStruct",
-                                       n_a=n_a,
-                                       n_b=n_b,
-                                       t_c=t_c)
-
-          self.assertEqual(n_a, len(a))
-          self.assertTrue(all(x.dtype == dtypes.int32 for x in a))
-          self.assertEqual(n_b, len(b))
-          self.assertTrue(all(x.dtype == dtypes.int64 for x in b))
-          self.assertEqual(t_c, [x.dtype for x in c])
-
-
+    with ops.Graph().as_default():
+      for n_a in [0, 1, 3]:
+        for n_b in [0, 1, 3]:
+          for t_c in [[],
+                      [dtypes.int32],
+                      [dtypes.int32, dtypes.float32]]:
+            a, b, c = self._lib.apply_op("ComplexStruct",
+                                         n_a=n_a,
+                                         n_b=n_b,
+                                         t_c=t_c)
+
+            self.assertEqual(n_a, len(a))
+            self.assertTrue(all(x.dtype == dtypes.int32 for x in a))
+            self.assertEqual(n_b, len(b))
+            self.assertTrue(all(x.dtype == dtypes.int64 for x in b))
+            self.assertEqual(t_c, [x.dtype for x in c])
+
+
+@test_util.with_c_api
 class OpDefLibraryGraphTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
-    self._lib = OpDefLibrary()
-    self._g = ops.Graph()
-    self._add_op("name: 'Simple' input_arg { name: 'a' type: DT_INT32 } "
-                 "output_arg { name: 'out' type: DT_FLOAT }")
-    self._add_op("name: 'Binary' "
-                 "input_arg { name: 'a' type_attr: 'T' } "
-                 "input_arg { name: 'b' type_attr: 'T' } "
-                 "output_arg { name: 'out' type_attr: 'T' } "
-                 "attr { name: 'T' type: 'type' }")
+    self._lib = test_ops._op_def_lib
 
   def _add_op(self, ascii):
     op_def = op_def_pb2.OpDef()
@@ -1556,15 +1346,15 @@ class OpDefLibraryGraphTest(test_util.TensorFlowTestCase):
     self.assertEqual(out.graph, ops.get_default_graph())
 
   def testDefaultGraph(self):
-    with self._g.as_default():
+    graph = ops.Graph()
+    with graph.as_default():
       out = self._lib.apply_op("Simple", a=3)
-      self.assertEqual(out.graph, self._g)
+      self.assertEqual(out.graph, graph)
 
   def testDifferentGraphFails(self):
-    with self._g.as_default():
+    with ops.Graph().as_default():
       a = self._lib.apply_op("Simple", a=3)
-    other_g = ops.Graph()
-    with other_g.as_default():
+    with ops.Graph().as_default():
       b = self._lib.apply_op("Simple", a=4)
     with self.assertRaises(ValueError) as cm:
       self._lib.apply_op("Binary", a=a, b=b)
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 60df8f82f0dcfb011a98802d358b2644727d7a00..e7f08a64a622c7a8332aa095ad6de86015d18a2e 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import collections
 import copy
 import linecache
+import os
 import re
 import sys
 import threading
@@ -35,6 +36,7 @@ from tensorflow.core.framework import graph_pb2
 from tensorflow.core.framework import node_def_pb2
 from tensorflow.core.framework import op_def_pb2
 from tensorflow.core.framework import versions_pb2
+from tensorflow.core.protobuf import config_pb2
 from tensorflow.python import pywrap_tensorflow as c_api
 from tensorflow.python.eager import context
 from tensorflow.python.eager import core
@@ -47,28 +49,19 @@ from tensorflow.python.framework import op_def_registry
 from tensorflow.python.framework import registry
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import versions
+from tensorflow.python.ops import control_flow_util
 from tensorflow.python.platform import app
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import compat
 from tensorflow.python.util import decorator_utils
 from tensorflow.python.util import tf_contextlib
 
+
 # Temporary global switch determining if we should enable the work-in-progress
 # calls to the C API. Currently disabled by default but can be manually enabled
-# e.g. in tests. This will be removed once all functionality is supported and
-# there's no performance penalty with it enabled.
-#
-# TODO(skyewm) before we can remove this:
-# - functions
-# - import_graph_def() incrementally adds inputs to ops (i.e. creates an
-#   Operation and then calls _add_input()). The current code requires that all
-#   inputs be specified when creating the Operation (since we call
-#   TF_FinishOperation()).
-# - ops_test.py (and others?) create unregistered op types
-# - while loop
-# - performance (e.g. delete/refactor redundant Python functionality, switch to
-#   new session API)
-_USE_C_API = False
+# in code or via the environment variable. This will be removed once all
+# functionality is supported and there's no performance penalty with it enabled.
+_USE_C_API = os.getenv("TF_C_API_GRAPH_CONSTRUCTION", "0") is not "0"
 
 
 def tensor_id(tensor):
@@ -373,6 +366,19 @@ class Tensor(_TensorLike):
       A `TensorShape` representing the shape of this tensor.
 
     """
+    if _USE_C_API:
+      graph = self._op._graph._c_graph  # pylint: disable=protected-access
+      with errors.raise_exception_on_not_ok_status() as status:
+        num_dims = c_api.TF_GraphGetTensorNumDims(graph, self._as_tf_output(),
+                                                  status)
+      if num_dims == -1:
+        dim_list = None
+      else:
+        with errors.raise_exception_on_not_ok_status() as status:
+          dim_list = c_api.TF_GraphGetTensorShape_wrapper(
+              graph, self._as_tf_output(), num_dims, status)
+        dim_list = [None if i == -1 else i for i in dim_list]
+      return tensor_shape.TensorShape(dim_list)
     return self._shape
 
   def __iter__(self):
@@ -392,8 +398,8 @@ class Tensor(_TensorLike):
       yield self[i]
 
   def _shape_as_list(self):
-    if self._shape.ndims is not None:
-      return [dim.value for dim in self._shape.dims]
+    if self.shape.ndims is not None:
+      return [dim.value for dim in self.shape.dims]
     else:
       return None
 
@@ -409,7 +415,7 @@ class Tensor(_TensorLike):
     Returns:
       Integer rank or None
     """
-    return self._shape.ndims
+    return self.shape.ndims
 
   def get_shape(self):
     """Alias of Tensor.shape."""
@@ -440,14 +446,35 @@ class Tensor(_TensorLike):
     ```
 
     Args:
-      shape: A `TensorShape` representing the shape of this tensor.
+      shape: A `TensorShape` representing the shape of this tensor, a
+      `TensorShapeProto`, a list, a tuple, or None.
 
     Raises:
       ValueError: If `shape` is not compatible with the current shape of
         this tensor.
     """
-    # TODO(skyewm): call C API
-    self._shape = self._shape.merge_with(shape)
+    if not _USE_C_API:
+      self._shape = self._shape.merge_with(shape)  # pylint: disable=protected-access
+      return
+    if not isinstance(shape, tensor_shape.TensorShape):
+      shape = tensor_shape.TensorShape(shape)
+    dim_list = []
+    if shape.dims is None:
+      unknown_shape = True
+    else:
+      unknown_shape = False
+      for dim in shape.dims:
+        if dim.value is None:
+          dim_list.append(-1)
+        else:
+          dim_list.append(dim.value)
+    with errors.raise_exception_on_not_ok_status() as status:
+      c_api.TF_GraphSetTensorShape_wrapper(
+          self._op._graph._c_graph,  # pylint: disable=protected-access
+          self._as_tf_output(),
+          dim_list,
+          unknown_shape,
+          status)
 
   @property
   def value_index(self):
@@ -460,7 +487,17 @@ class Tensor(_TensorLike):
     Returns:
       A list of `Operation`s.
     """
-    return self._consumers
+    if self._op._c_op:  # pylint: disable=protected-access
+      consumer_names = c_api.TF_OperationOutputConsumers_wrapper(
+          self._as_tf_output())
+      # pylint: disable=protected-access
+      return [
+          self.graph._get_operation_by_name_unsafe(name)
+          for name in consumer_names
+      ]
+      # pylint: enable=protected-access
+    else:
+      return self._consumers
 
   def _add_consumer(self, consumer):
     """Add a consumer to this tensor.
@@ -471,6 +508,9 @@ class Tensor(_TensorLike):
     Raises:
       TypeError: if the consumer is not an Operation.
     """
+    # pylint: disable=protected-access
+    assert not self._op._c_op, "Tensor._add_consumer doesn't work with C API"
+    # pylint: enable=protected-access
     if not isinstance(consumer, Operation):
       raise TypeError("Consumer must be an Operation: %s" % consumer)
     self._consumers.append(consumer)
@@ -598,11 +638,6 @@ class Tensor(_TensorLike):
     """
     return _eval_using_default_session(self, feed_dict, self.graph, session)
 
-  def _dup(self):
-    ret = copy.copy(self)
-    ret._id = uid()  # pylint: disable=protected-access
-    return ret
-
 
 # TODO(agarwal): consider getting rid of this.
 class _EagerTensorBase(Tensor):
@@ -641,8 +676,8 @@ class _EagerTensorBase(Tensor):
   def __float__(self):
     return float(self.numpy())
 
-  def __array__(self):
-    return np.array(self.numpy())
+  def __array__(self, dtype=None):
+    return np.array(self.numpy(), dtype=dtype)
 
   def __format__(self, format_spec):
     return self.numpy().__format__(format_spec)
@@ -728,9 +763,6 @@ class _EagerTensorBase(Tensor):
     return new_tensor
     # pylint: enable=protected-access
 
-  def _dup(self):
-    return self._copy(device_name=self.device)
-
   @property
   def shape(self):
     return tensor_shape.TensorShape(self._shape_tuple())
@@ -938,7 +970,7 @@ def internal_convert_to_tensor(value,
     # Fast path for EagerTensors that don't need any conversion.
     if isinstance(value, EagerTensor):
       # Note that we don't check that value's dtype matches the dtype
-      # argument.  We exepct that the C runtime will do that checking
+      # argument.  We expect that the C runtime will do that checking
       # when we execute the kernel.
       return value
 
@@ -1520,7 +1552,7 @@ class Operation(object):
     # an Operation for that op. This is useful for creating Operations for ops
     # indirectly created by C API methods, e.g. the ops created by
     # TF_ImportGraphDef. When `node_def` is a TF_Operation, all optional fields
-    # except `control_inputs` should be None.
+    # should be None.
 
     if isinstance(node_def, node_def_pb2.NodeDef):
       if node_def.ByteSize() >= (1 << 31) or node_def.ByteSize() < 0:
@@ -1533,6 +1565,7 @@ class Operation(object):
     elif type(node_def).__name__ == "SwigPyObject":
       assert inputs is None
       assert output_types is None
+      assert control_inputs is None
       assert input_types is None
       assert original_op is None
       assert op_def is None
@@ -1578,34 +1611,34 @@ class Operation(object):
                           "a Tensor, or IndexedSlices: %s" % c)
         self._control_inputs.append(control_op)
 
+    self._id_value = self._graph._next_id()  # pylint: disable=protected-access
     self._original_op = original_op
     self._op_def = op_def
     self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access
+    self._control_flow_context = self.graph._get_control_flow_context()  # pylint: disable=protected-access
 
     # Initialize self._c_op.
     if c_op:
       # TODO(skyewm): remove this assert when we remove USE_C_API
       assert self._graph._c_graph  # pylint: disable=protected-access
       self._c_op = c_op
-      self._add_control_inputs(self._control_inputs)
     elif self._graph._c_graph:  # pylint: disable=protected-access
-      if self._op_def:
-        # TODO(skyewm): op_def_library.apply_op() flattens the incoming
-        # inputs. Refactor so we don't have to do this here.
-        grouped_inputs = self._reconstruct_sequence_inputs(
-            self._op_def, self._inputs, self._node_def.attr)
-      else:
-        # If no OpDef is specified, assume all inputs are scalar.
-        grouped_inputs = self._inputs
-
+      if op_def is None:
+        op_def = self._graph._registered_ops[node_def.op]
+      # TODO(skyewm): op_def_library.apply_op() flattens the incoming inputs.
+      # Refactor so we don't have to do this here.
+      grouped_inputs = self._reconstruct_sequence_inputs(
+          op_def, inputs, node_def.attr)
       self._c_op = _create_c_op(self._graph, self._node_def, grouped_inputs,
                                 self._control_inputs)
     else:
       self._c_op = None
 
-    # Mark that we consume the inputs.
-    for input_tensor in self.inputs:
-      input_tensor._add_consumer(self)  # pylint: disable=protected-access
+    # Mark that we consume the inputs. This is unnecessary and unsupported with
+    # the C API enabled, since the C API tracks the tensor consumers instead.
+    if not self._c_op:
+      for input_tensor in self._inputs:
+        input_tensor._add_consumer(self)  # pylint: disable=protected-access
 
     # Initialize self._outputs.
     if self._c_op:
@@ -1622,20 +1655,21 @@ class Operation(object):
         for i, output_type in enumerate(output_types)
     ]
 
-    # Add this op to the current control flow context.
-    self._control_flow_context = g._get_control_flow_context()  # pylint: disable=protected-access
+    if not c_op:
+      self._control_flow_post_processing()
+
+  def _control_flow_post_processing(self):
+    """Add this op to its control flow context.
+
+    This may add new ops and change this op's inputs. self.inputs must be
+    available before calling this method.
+    """
+    for input_tensor in self.inputs:
+      control_flow_util.CheckInputFromValidContext(self, input_tensor.op)
     if self._control_flow_context is not None:
       self._control_flow_context.AddOp(self)
-    # NOTE(keveman): Control flow context's AddOp could be creating new ops and
-    # setting op.inputs[index] = new_op. Thus the new ops' id could be larger
-    # than this op's id even though this op depend on them. Therefore, delaying
-    # assigning id to this op until all ops this could be dependent on are
-    # created.
-    self._id_value = self._graph._next_id()  # pylint: disable=protected-access
     self._recompute_node_def()
 
-    self._graph._add_op(self)  # pylint: disable=protected-access
-
   def _reconstruct_sequence_inputs(self, op_def, inputs, attrs):
     """Regroups a flat list of input tensors into scalar and sequence inputs.
 
@@ -1795,7 +1829,7 @@ class Operation(object):
       c_api.SetRequestedDevice(
           self._graph._c_graph,  # pylint: disable=protected-access
           self._c_op,  # pylint: disable=protected-access
-          _device_string(device))
+          compat.as_str(_device_string(device)))
     else:
       self._node_def.device = _device_string(device)
 
@@ -1902,6 +1936,13 @@ class Operation(object):
     else:
       self._add_control_inputs([op])
 
+  def _remove_all_control_inputs(self):
+    """Removes any control inputs to this operation."""
+    if self._c_op:
+      c_api.RemoveAllControlInputs(self._graph._c_graph, self._c_op)  # pylint: disable=protected-access
+    else:
+      del self.control_inputs[:]
+
   # Methods below are used when building the NodeDef and Graph proto.
   def _recompute_node_def(self):
     # TODO(skyewm): remove this function when we switch to C API
@@ -1931,23 +1972,23 @@ class Operation(object):
   class _InputList(object):
     """Immutable input list wrapper."""
 
-    def __init__(self, op):
-      self._op = op
+    def __init__(self, inputs):
+      self._inputs = inputs
 
     def __iter__(self):
-      return iter(self._op._inputs)
+      return iter(self._inputs)
 
     def __len__(self):
-      return len(self._op._inputs)
+      return len(self._inputs)
 
     def __bool__(self):
-      return bool(self._op._inputs)
+      return bool(self._inputs)
 
     # Python 3 wants __bool__, Python 2.7 wants __nonzero__
     __nonzero__ = __bool__
 
     def __getitem__(self, i):
-      return self._op._inputs[i]
+      return self._inputs[i]
 
 # pylint: enable=protected-access
 
@@ -1956,17 +1997,14 @@ class Operation(object):
     """The list of `Tensor` objects representing the data inputs of this op."""
     if self._c_op:
       tf_outputs = c_api.GetOperationInputs(self._c_op)
-      # TODO(skyewm): return Operation._InputList
       # pylint: disable=protected-access
-      return [self.graph._get_tensor_by_tf_output(tf_output)
-              for tf_output in tf_outputs]
+      retval = [
+          self.graph._get_tensor_by_tf_output(tf_output)
+          for tf_output in tf_outputs
+      ]
       # pylint: enable=protected-access
-    else:
-      return Operation._InputList(self)
-
-  @property
-  def _input_dtypes(self):
-    return self._input_types
+      return Operation._InputList(retval)
+    return Operation._InputList(self._inputs)
 
   @property
   def _input_types(self):
@@ -2084,7 +2122,7 @@ class Operation(object):
 
   def _set_attr(self, attr_name, attr_value):
     """Private method used to set an attribute in the node_def."""
-    if _USE_C_API:
+    if self._c_op:
       buf = c_api.TF_NewBufferFromString(
           compat.as_bytes(attr_value.SerializeToString()))
       try:
@@ -2653,11 +2691,16 @@ class Graph(object):
 
     # TODO(skyewm): fold as much of the above as possible into the C
     # implementation
-    if _USE_C_API:
+    if _USE_C_API or self._use_c_api_hack():
       self._scoped_c_graph = c_api_util.ScopedTFGraph()
     else:
       self._scoped_c_graph = None
 
+  # TODO(apassos) remove once the C API is used by default.
+  def _use_c_api_hack(self):
+    """Temporary hack; can be overridden to force C API usage."""
+    return False
+
   def _convert_stack(self, stack, include_func_start_lineno=False):
     """Converts a stack extracted using _extract_stack() to a traceback stack.
 
@@ -2858,6 +2901,20 @@ class Graph(object):
     """
     self._control_flow_context = ctx
 
+  def _copy_functions_to_graph_def(self, graph_def, starting_bytesize):
+    """If this graph contains functions, copy them to `graph_def`."""
+    bytesize = starting_bytesize
+    for f in self._functions.values():
+      bytesize += f.definition.ByteSize()
+      if bytesize >= (1 << 31) or bytesize < 0:
+        raise ValueError("GraphDef cannot be larger than 2GB.")
+      graph_def.library.function.extend([f.definition])
+      if f.grad_func_name:
+        grad_def = function_pb2.GradientDef()
+        grad_def.function_name = f.name
+        grad_def.gradient_func = f.grad_func_name
+        graph_def.library.gradient.extend([grad_def])
+
   def _as_graph_def(self, from_version=None, add_shapes=False):
     # pylint: disable=line-too-long
     """Returns a serialized `GraphDef` representation of this graph.
@@ -2886,33 +2943,42 @@ class Graph(object):
 
     """
     # pylint: enable=line-too-long
-    with self._lock:
-      graph = graph_pb2.GraphDef()
-      graph.versions.CopyFrom(self._graph_def_versions)
-      bytesize = 0
-      for op_id in sorted(self._nodes_by_id):
-        op = self._nodes_by_id[op_id]
-        if from_version is None or op_id > from_version:
-          graph.node.extend([op.node_def])
-          if op.outputs and add_shapes:
-            assert "_output_shapes" not in graph.node[-1].attr
-            graph.node[-1].attr["_output_shapes"].list.shape.extend(
-                [output.get_shape().as_proto() for output in op.outputs])
-          bytesize += op.node_def.ByteSize()
-          if bytesize >= (1 << 31) or bytesize < 0:
-            raise ValueError("GraphDef cannot be larger than 2GB.")
-      if self._functions:
-        for f in self._functions.values():
-          bytesize += f.definition.ByteSize()
-          if bytesize >= (1 << 31) or bytesize < 0:
-            raise ValueError("GraphDef cannot be larger than 2GB.")
-          graph.library.function.extend([f.definition])
-          if f.grad_func_name:
-            grad_def = function_pb2.GradientDef()
-            grad_def.function_name = f.name
-            grad_def.gradient_func = f.grad_func_name
-            graph.library.gradient.extend([grad_def])
-      return graph, self._version
+    if _USE_C_API:
+      with self._lock:
+        with c_api_util.tf_buffer() as buf:
+          with errors.raise_exception_on_not_ok_status() as status:
+            c_api.TF_GraphToGraphDef(self._c_graph, buf, status)
+          data = c_api.TF_GetBuffer(buf)
+        graph = graph_pb2.GraphDef()
+        graph.ParseFromString(compat.as_bytes(data))
+        # Strip the experimental library field iff it's empty.
+        if not graph.library.function:
+          graph.ClearField("library")
+
+        if add_shapes:
+          for node in graph.node:
+            op = self._nodes_by_name[node.name]
+            if op.outputs:
+              node.attr["_output_shapes"].list.shape.extend(
+                  [output.get_shape().as_proto() for output in op.outputs])
+    else:
+      with self._lock:
+        graph = graph_pb2.GraphDef()
+        graph.versions.CopyFrom(self._graph_def_versions)
+        bytesize = 0
+        for op_id in sorted(self._nodes_by_id):
+          op = self._nodes_by_id[op_id]
+          if from_version is None or op_id > from_version:
+            graph.node.extend([op.node_def])
+            if op.outputs and add_shapes:
+              assert "_output_shapes" not in graph.node[-1].attr
+              graph.node[-1].attr["_output_shapes"].list.shape.extend(
+                  [output.get_shape().as_proto() for output in op.outputs])
+            bytesize += op.node_def.ByteSize()
+            if bytesize >= (1 << 31) or bytesize < 0:
+              raise ValueError("GraphDef cannot be larger than 2GB.")
+        self._copy_functions_to_graph_def(graph, bytesize)
+    return graph, self._version
 
   def as_graph_def(self, from_version=None, add_shapes=False):
     # pylint: disable=line-too-long
@@ -2986,9 +3052,14 @@ class Graph(object):
     # Add function to graph
     # pylint: disable=protected-access
     if self._c_graph:
-      assert function._c_func, (
-          "Cannot add function created without C API support to graph "
-          "created with C API support")
+      # Handle functions created without using the C API. TODO(apassos,skyewm)
+      # remove this when all functions are generated using the C API by default
+      # as this will be unnecessary.
+      if not function._c_func:
+        with errors.raise_exception_on_not_ok_status() as status:
+          serialized = function.definition.SerializeToString()
+          function._c_func = c_api.TF_FunctionImportFunctionDef(
+              serialized, status)
       with errors.raise_exception_on_not_ok_status() as status:
         gradient = function._grad_func._c_func if function._grad_func else None
         c_api.TF_GraphCopyFunction(self._c_graph, function._c_func, gradient,
@@ -3099,12 +3170,11 @@ class Graph(object):
         input_types=input_types,
         original_op=self._default_original_op,
         op_def=op_def)
-
     self._create_op_helper(ret, compute_shapes=compute_shapes,
                            compute_device=compute_device)
     return ret
 
-  def _create_op_from_tf_operation(self, c_op):
+  def _create_op_from_tf_operation(self, c_op, compute_device=True):
     """Creates an `Operation` in this graph from the supplied TF_Operation.
 
     This method is like create_op() except the new Operation is constructed
@@ -3112,19 +3182,23 @@ class Graph(object):
     field. This is used to create Operation objects around TF_Operations created
     indirectly by the C API (e.g. by TF_ImportGraphDef, TF_FinishWhile).
 
+    This function does not call Operation._control_flow_post_processing or
+    Graph._control_dependencies_for_inputs (since the inputs may not be
+    available yet). The caller is responsible for calling these methods.
+
     Args:
       c_op: a wrapped TF_Operation
+      compute_device: (Optional.) If True, device functions will be executed
+        to compute the device property of the Operation.
 
     Returns:
       An `Operation` object.
     """
     self._check_not_finalized()
-    tf_outputs = c_api.GetOperationInputs(c_op)
-    input_ops = set(self._get_operation_by_tf_operation(output.oper)
-                    for output in tf_outputs)
-    control_inputs = self._control_dependencies_for_inputs(input_ops)
-    ret = Operation(c_op, self, control_inputs=control_inputs)
-    self._create_op_helper(ret)
+    ret = Operation(c_op, self)
+    assert ret.name not in self._names_in_use
+    self._names_in_use[ret.name] = 1
+    self._create_op_helper(ret, compute_device=compute_device)
     return ret
 
   def _create_op_helper(self, op, compute_shapes=True, compute_device=True):
@@ -3138,6 +3212,8 @@ class Graph(object):
     # compute_shapes argument.
     if op._c_op or compute_shapes:  # pylint: disable=protected-access
       set_shapes_for_outputs(op)
+    # TODO(b/XXXX): move to Operation.__init__ once _USE_C_API flag is removed.
+    self._add_op(op)
 
     # Apply any additional attributes requested. Do not overwrite any existing
     # attributes.
@@ -3217,6 +3293,37 @@ class Graph(object):
           op._set_attr("container", attr_value_pb2.AttrValue(  # pylint: disable=protected-access
               s=compat.as_bytes(self._container)))
 
+  def _add_new_tf_operations(self, compute_devices=True):
+    """Creates `Operations` in this graph for any new TF_Operations.
+
+    This is useful for when TF_Operations are indirectly created by the C API
+    outside of the Operation constructor (e.g. by TF_ImportGraphDef,
+    TF_FinishWhile). This ensures there are corresponding Operations for all
+    TF_Operations in the underlying TF_Graph.
+
+    Args:
+      compute_devices: (Optional.) If True, device functions will be executed
+        to compute the device properties of each new Operation.
+
+    Returns:
+      A list of the new `Operation` objects.
+    """
+    # Create all Operation objects before accessing their inputs since an op may
+    # be created before its inputs.
+    new_ops = [
+        self._create_op_from_tf_operation(c_op, compute_device=compute_devices)
+        for c_op in c_api_util.new_tf_operations(self)
+    ]
+
+    for op in new_ops:
+      new_control_inputs = self._control_dependencies_for_inputs(op.inputs)
+      # pylint: disable=protected-access
+      op._add_control_inputs(new_control_inputs)
+      op._control_flow_post_processing()
+      # pylint: enable=protected-access
+
+    return new_ops
+
   def as_graph_element(self, obj, allow_tensor=True, allow_operation=True):
     """Returns the object referred to by `obj`, as an `Operation` or `Tensor`.
 
@@ -3727,6 +3834,9 @@ class Graph(object):
         above.
     """
     if name:
+      if isinstance(name, compat.bytes_or_text_types):
+        name = compat.as_str(name)
+
       if self._name_stack:
         # Scopes created in a nested scope may have initial characters
         # that are illegal as the initial character of an op name
@@ -4517,15 +4627,11 @@ def control_dependencies(control_inputs):
   See @{tf.Graph.control_dependencies}
   for more details.
 
-  When eager execution is enabled, any callable object in the `control_inputs`
-  list will be called.
-
   Args:
     control_inputs: A list of `Operation` or `Tensor` objects which
       must be executed or computed before running the operations
       defined in the context.  Can also be `None` to clear the control
-      dependencies. If eager execution is enabled, any callable object in the
-      `control_inputs` list will be called.
+      dependencies.
 
   Returns:
    A context manager that specifies control dependencies for all
@@ -4534,11 +4640,6 @@ def control_dependencies(control_inputs):
   if context.in_graph_mode():
     return get_default_graph().control_dependencies(control_inputs)
   else:
-    if control_inputs:
-      # Excute any pending callables.
-      for control in control_inputs:
-        if callable(control):
-          control()
     return _NullContextmanager()
 
 
@@ -4756,10 +4857,71 @@ class _DefaultGraphStack(_DefaultStack):  # pylint: disable=protected-access
     super(_DefaultGraphStack, self).reset()
     self._global_default_graph = None
 
+  @tf_contextlib.contextmanager
+  def get_controller(self, default):
+    try:
+      context.context_stack.push(default.building_function, default.as_default)
+      with super(_DefaultGraphStack, self).get_controller(default) as g:
+        yield g
+    finally:
+      context.context_stack.pop()
+
 
 _default_graph_stack = _DefaultGraphStack()
 
 
+# pylint: disable=g-doc-return-or-yield,line-too-long
+@tf_contextlib.contextmanager
+def init_scope():
+  """A context manager that lifts ops out of control-flow scopes and function-building graphs.
+
+  There is often a need to lift variable initialization ops out of control-flow
+  scopes, function-building graphs, and gradient tapes. Entering an
+  `init_scope` is a mechanism for satisfying these desiderata. In particular,
+  entering an `init_scope` has three effects:
+
+    (1) All control dependencies are cleared the moment the scope is entered;
+        this is equivalent to entering the context manager returned from
+        `control_dependencies(None)`, which has the side-effect of exiting
+        control-flow scopes like `tf.cond` and `tf.while_loop`.
+
+    (2) All operations that are created while the scope is active are lifted
+        into the lowest context on the `context_stack` that is not building a
+        graph function. Here, a context is defined as either a graph or an eager
+        context. Every context switch, i.e., every installation of a graph as
+        the default graph and every switch into eager mode, is logged in a
+        thread-local stack called the `context_stack`; the log entry for a
+        context switch is popped from the stack when the context is exited.
+        Entering an `init_scope` is equivalent to crawling up the
+        `context_stack`, finding the first context that is not building a graph
+        function, and entering it. A caveat is that if graph mode is enabled
+        but the default graph stack is empty, then entering an `init_scope`
+        will simply install a fresh graph as the default one.
+
+    (3) The gradient tape is paused while the scope is active.
+  """
+  # pylint: enable=g-doc-return-or-yield,line-too-long
+
+  outer_context = None
+  if context.in_graph_mode() and not _default_graph_stack.stack:
+    outer_context = get_default_graph().as_default
+  else:
+    for stack_entry in reversed(context.context_stack.stack):
+      if not stack_entry.is_building_function:
+        outer_context = stack_entry.enter_context_fn
+        break
+
+  if outer_context is None:
+    raise AssertionError("All graphs are building functions, and no "
+                         "eager context was previously active.")
+
+  try:
+    with outer_context(), control_dependencies(None), tape.stop_recording():
+      yield
+  finally:
+    pass
+
+
 def enable_eager_execution(config=None, device_policy=None):
   """Enables, for the rest of the lifetime of this program, eager execution.
 
@@ -4794,6 +4956,16 @@ def enable_eager_execution(config=None, device_policy=None):
      or if trying to create a context with nontrivial options which differ
      from those of the existing context.
   """
+  if config is not None and not isinstance(config, config_pb2.ConfigProto):
+    raise TypeError(
+        "config must be a tf.ConfigProto, but got %s" % type(config))
+  if device_policy not in (None, context.DEVICE_PLACEMENT_EXPLICIT,
+                           context.DEVICE_PLACEMENT_WARN,
+                           context.DEVICE_PLACEMENT_SILENT):
+    raise ValueError(
+        "device_policy must be one of None, tfe.DEVICE_PLACEMENT_EXPLICIT, "
+        "tfe.DEVICE_PLACEMENT_WARN, tfe.DEVICE_PLACEMENT_SILENT"
+    )
   # pylint: disable=protected-access
   if context._default_mode == context.GRAPH_MODE:
     graph_mode_has_been_used = (
@@ -4806,6 +4978,13 @@ def enable_eager_execution(config=None, device_policy=None):
   if context._context is None:
     context._context = context.Context(config=config,
                                        device_policy=device_policy)
+    if context.context_stack.stack:
+      raise AssertionError("Invariant violated: The context stack must "
+                           "be empty when eager execution is enabled.")
+    # Log that eager execution has been enabled by pushing an entry onto the
+    # context stack; this entry won't ever be popped, as it's impossible to
+    # disable eager execution
+    context.context_stack.push(False, context.eager_mode)
   elif ((config is not None and config is not context._context._config)
         or (device_policy is not None
             and device_policy is not context._context._device_policy)):
@@ -5281,11 +5460,18 @@ class name_scope(object):  # pylint: disable=invalid-name
     """
     if self._in_eager_mode:
       self._old_name = self._ctx.scope_name
-      if self._name:
-        scope_name = (self._old_name + self._name + "/"
-                      if self._old_name else self._name + "/")
-      else:
+      if not self._name:
         scope_name = ""
+      else:
+        if self._name[-1] == "/":
+          # A trailing slash breaks out of nested name scopes, indicating a
+          # fully specified scope name, for compatibility with Graph.name_scope.
+          scope_name = self._name
+        else:
+          name_with_trailing_slash = self._name + "/"
+          scope_name = (
+              self._old_name + name_with_trailing_slash
+              if self._old_name else name_with_trailing_slash)
       self._ctx.scope_name = scope_name
       return scope_name
     else:
@@ -5301,8 +5487,12 @@ class name_scope(object):  # pylint: disable=invalid-name
       g = _get_graph_from_inputs(self._values)
       self._g_manager = g.as_default()
       self._g_manager.__enter__()
-      self._name_scope = g.name_scope(self._name)
-      return self._name_scope.__enter__()
+      try:
+        self._name_scope = g.name_scope(self._name)
+        return self._name_scope.__enter__()
+      except:
+        self._g_manager.__exit__(*sys.exc_info())
+        raise
 
   def __exit__(self, type_arg, value_arg, traceback_arg):
     if self._in_eager_mode:
diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index cd296ccdc5ef372038fb62f0311a056cfc5ceaae..78519f108ba69a8f3f296debf2e199d6613bf86a 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -26,6 +26,7 @@ from tensorflow.core.framework import types_pb2
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.client import session
 from tensorflow.python.eager import context
+from tensorflow.python.eager import function as eager_function
 from tensorflow.python.framework import common_shapes
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import device as pydev
@@ -43,6 +44,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import resources
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
@@ -203,13 +205,13 @@ class OperationTest(test_util.TensorFlowTestCase):
     self.assertEqual(dtypes.float32, float_t.dtype)
     self.assertEqual(op, float_t.op)
     self.assertEqual(0, float_t._value_index)
-    self.assertEqual(0, len(float_t._consumers))
+    self.assertEqual(0, len(float_t.consumers()))
     self.assertEqual("myop", float_t._as_node_def_input())
 
     self.assertEqual(dtypes.string, label_str_t.dtype)
     self.assertEqual(op, label_str_t.op)
     self.assertEqual(1, label_str_t._value_index)
-    self.assertEqual(0, len(label_str_t._consumers))
+    self.assertEqual(0, len(label_str_t.consumers()))
     self.assertEqual("myop:1", label_str_t._as_node_def_input())
 
     self.assertProtoEquals("op:'FloatOutputStringOutput' name:'myop'",
@@ -223,8 +225,8 @@ class OperationTest(test_util.TensorFlowTestCase):
     self.assertEqual(1, len(op2.inputs))
     self.assertIs(float_t, op2.inputs[0])
 
-    self.assertEqual(1, len(float_t._consumers))
-    self.assertEqual(op2, float_t._consumers[0])
+    self.assertEqual(1, len(float_t.consumers()))
+    self.assertEqual(op2, float_t.consumers()[0])
 
     self.assertProtoEquals("op:'FloatOutput' name:'myop1'", op1.node_def)
     self.assertProtoEquals("op:'FloatInput' name:'myop2' input:'myop1'",
@@ -243,14 +245,14 @@ class OperationTest(test_util.TensorFlowTestCase):
     op3 = test_ops.foo2(float1_t, label2_str_t, label2_str_t, name="myop3").d.op
     self.assertEqual(2, len(op3.values()))
 
-    self.assertEqual(1, len(float1_t._consumers))
-    self.assertEqual(op3, float1_t._consumers[0])
+    self.assertEqual(1, len(float1_t.consumers()))
+    self.assertEqual(op3, float1_t.consumers()[0])
 
-    self.assertEqual(0, len(float2_t._consumers))
+    self.assertEqual(0, len(float2_t.consumers()))
 
-    self.assertEqual(2, len(label2_str_t._consumers))
-    self.assertEqual(op3, label2_str_t._consumers[0])
-    self.assertEqual(op3, label2_str_t._consumers[1])
+    self.assertEqual(2, len(label2_str_t.consumers()))
+    self.assertEqual(op3, label2_str_t.consumers()[0])
+    self.assertEqual(op3, label2_str_t.consumers()[1])
 
     self.assertProtoEquals("""
     op:'Foo2' name:'myop3'
@@ -274,6 +276,7 @@ class OperationTest(test_util.TensorFlowTestCase):
     op1 = ops.Operation(
         ops._NodeDef("RefOutputFloatOutput", "op1"), g, [],
         [dtypes.float32_ref, dtypes.float32])
+    g._add_op(op1)
     self.assertProtoEquals("op:'RefOutputFloatOutput' name:'op1'", op1.node_def)
     self.assertEquals([], list(op1.inputs))
     ref_t, nonref_t = op1.values()
@@ -282,12 +285,14 @@ class OperationTest(test_util.TensorFlowTestCase):
         ops._NodeDef("RefInputFloatInput", "op2"),
         g, [ref_t, nonref_t], [],
         input_types=[dtypes.float32_ref, dtypes.float32])
+    g._add_op(op2)
     self.assertProtoEquals(
         "op:'RefInputFloatInput' name:'op2' input:'op1' input:'op1:1'",
         op2.node_def)
     self.assertEquals([ref_t, nonref_t], list(op2.inputs))
     op3 = ops.Operation(
         ops._NodeDef("TwoFloatInputs", "op3"), g, [ref_t, nonref_t], [])
+    g._add_op(op3)
     self.assertProtoEquals(
         "op:'TwoFloatInputs' name:'op3' input:'op1' input:'op1:1'",
         op3.node_def)
@@ -482,6 +487,30 @@ class OperationTest(test_util.TensorFlowTestCase):
     z._add_control_inputs([x, y, y])  # pylint: disable=protected-access
     self.assertEqual(z.control_inputs, [x, y])
 
+  def testRemoveAllControlInputs(self):
+    a = constant_op.constant(1)
+    with ops.control_dependencies([a]):
+      b = constant_op.constant(2)
+    c = constant_op.constant(3)
+    d = constant_op.constant(4)
+    e = constant_op.constant(5)
+    with ops.control_dependencies([a, c]):
+      f = d + e
+
+    self.assertEqual(a.op.control_inputs, [])
+    self.assertEqual(b.op.control_inputs, [a.op])
+    self.assertEqual(f.op.control_inputs, [a.op, c.op])
+
+    a.op._remove_all_control_inputs()  # pylint: disable=protected-access
+    self.assertEqual(a.op.control_inputs, [])
+
+    b.op._remove_all_control_inputs()  # pylint: disable=protected-access
+    self.assertEqual(b.op.control_inputs, [])
+
+    f.op._remove_all_control_inputs()  # pylint: disable=protected-access
+    self.assertEqual(f.op.control_inputs, [])
+    self.assertEqual(list(f.op.inputs), [d, e])
+
   def testControlInputCycle(self):
     # Non-C API path has a different error message
     if not ops._USE_C_API: return
@@ -508,16 +537,22 @@ class OperationTest(test_util.TensorFlowTestCase):
 
     z.op._update_input(0, y)  # pylint: disable=protected-access
     self.assertEquals(list(z.op.inputs), [y, y])
+    self.assertEquals(x.consumers(), [])
+    self.assertEquals(y.consumers(), [z.op, z.op])
     with session.Session(graph=g) as sess:
       self.assertEquals(sess.run(z), 4)
 
     z.op._update_input(0, x)  # pylint: disable=protected-access
     self.assertEquals(list(z.op.inputs), [x, y])
+    self.assertEquals(x.consumers(), [z.op])
+    self.assertEquals(y.consumers(), [z.op])
     with session.Session(graph=g) as sess:
       self.assertEquals(sess.run(z), 3)
 
     z.op._update_input(1, y)  # pylint: disable=protected-access
     self.assertEquals(list(z.op.inputs), [x, y])
+    self.assertEquals(x.consumers(), [z.op])
+    self.assertEquals(y.consumers(), [z.op])
     with session.Session(graph=g) as sess:
       self.assertEquals(sess.run(z), 3)
 
@@ -625,6 +660,15 @@ class OperationTest(test_util.TensorFlowTestCase):
       with self.assertRaisesRegexp(ValueError, "must be from the same graph"):
         y * x  # pylint: disable=pointless-statement
 
+  def testInputsAreImmutable(self):
+    g = ops.Graph()
+    with g.as_default():
+      x = test_ops.int_output()
+      op = test_ops.int_input_int_output(x, name="myop").op
+    with self.assertRaisesRegexp(
+        AttributeError, "'_InputList' object has no attribute 'append'"):
+      op.inputs.append(None)
+
 
 @test_util.with_c_api
 class CreateOpTest(test_util.TensorFlowTestCase):
@@ -734,6 +778,29 @@ class CreateOpFromTFOperationTest(test_util.TensorFlowTestCase):
     self.assertEqual(len(op.outputs), 1)
     self.assertEqual(op.outputs[0].shape, tensor_shape.matrix(2, 3))
 
+  def testUniqueName(self):
+    g = ops.Graph()
+    with g.as_default():
+      if ops._USE_C_API:
+        c_op = ops._create_c_op(g, ops._NodeDef("IntOutput", "myop"), [], [])
+        c_op2 = ops._create_c_op(g, ops._NodeDef("IntOutput", "myop_1"), [], [])
+        op = g._create_op_from_tf_operation(c_op)
+        op2 = g._create_op_from_tf_operation(c_op2)
+      else:
+        # Test pure-Python version to make sure C API has same behavior.
+        op = test_ops.int_output(name="myop").op
+        op2 = test_ops.int_output(name="myop_1").op
+
+      # Create ops with same names as op1 and op2. We expect the new names to be
+      # uniquified.
+      op3 = test_ops.int_output(name="myop").op
+      op4 = test_ops.int_output(name="myop_1").op
+
+    self.assertEqual(op.name, "myop")
+    self.assertEqual(op2.name, "myop_1")
+    self.assertEqual(op3.name, "myop_2")
+    self.assertEqual(op4.name, "myop_1_1")
+
   def testCond(self):
     g = ops.Graph()
     with g.as_default():
@@ -741,10 +808,10 @@ class CreateOpFromTFOperationTest(test_util.TensorFlowTestCase):
 
       def true_fn():
         if ops._USE_C_API:
-          c_op = ops._create_c_op(ops.get_default_graph(),
-                                  ops._NodeDef("IntInput", "cond/myop"), [x],
-                                  [])
-          ops.get_default_graph()._create_op_from_tf_operation(c_op)
+          ops._create_c_op(ops.get_default_graph(),
+                           ops._NodeDef("IntInput", "cond/myop"), [x], [])
+          new_ops = g._add_new_tf_operations()
+          self.assertEqual(len(new_ops), 1)
         else:
           # Test pure-Python version to make sure C API has same behavior.
           test_ops.int_input(x, name="myop")
@@ -774,10 +841,10 @@ class CreateOpFromTFOperationTest(test_util.TensorFlowTestCase):
 
       def body(i):
         if ops._USE_C_API:
-          c_op = ops._create_c_op(ops.get_default_graph(),
-                                  ops._NodeDef("IntInput", "myloop/myop"), [x],
-                                  [])
-          ops.get_default_graph()._create_op_from_tf_operation(c_op)
+          ops._create_c_op(ops.get_default_graph(),
+                           ops._NodeDef("IntInput", "myloop/myop"), [x], [])
+          new_ops = g._add_new_tf_operations()
+          self.assertEqual(len(new_ops), 1)
         else:
           # Test pure-Python version to make sure C API has same behavior.
           test_ops.int_input(x, name="myop")
@@ -808,11 +875,11 @@ class CreateOpFromTFOperationTest(test_util.TensorFlowTestCase):
       def body(i):
         c = constant_op.constant(1.0, name="c")
         if ops._USE_C_API:
-          c_op = ops._create_c_op(ops.get_default_graph(),
-                                  ops._NodeDef("IntInput", "myloop/myop"), [x],
-                                  [])
+          ops._create_c_op(ops.get_default_graph(),
+                           ops._NodeDef("IntInput", "myloop/myop"), [x], [])
           with ops.control_dependencies([c]):
-            ops.get_default_graph()._create_op_from_tf_operation(c_op)
+            new_ops = g._add_new_tf_operations()
+            self.assertEqual(len(new_ops), 1)
         else:
           with ops.control_dependencies([c]):
             test_ops.int_input(x, name="myop")
@@ -828,10 +895,6 @@ class CreateOpFromTFOperationTest(test_util.TensorFlowTestCase):
     self.assertEqual(op.control_inputs, [c])
 
   def testWhileLoopWithExternalControlDep(self):
-    # TODO(skyewm): enable once ControlFlowContext._RemoveExternalControlEdges
-    # works with C API enabled
-    if ops._USE_C_API: self.skipTest("Not yet implemented with C API enabled")
-
     g = ops.Graph()
     with g.as_default():
       x = test_ops.int_output()
@@ -839,11 +902,11 @@ class CreateOpFromTFOperationTest(test_util.TensorFlowTestCase):
 
       def body(i):
         if ops._USE_C_API:
-          c_op = ops._create_c_op(ops.get_default_graph(),
-                                  ops._NodeDef("IntInput", "myloop/myop"), [x],
-                                  [])
+          ops._create_c_op(ops.get_default_graph(),
+                           ops._NodeDef("IntInput", "myloop/myop"), [x], [])
           with ops.control_dependencies([c]):
-            ops.get_default_graph()._create_op_from_tf_operation(c_op)
+            new_ops = g._add_new_tf_operations()
+            self.assertEqual(len(new_ops), 1)
         else:
           with ops.control_dependencies([c]):
             test_ops.int_input(x, name="myop")
@@ -1537,7 +1600,7 @@ class ControlDependenciesTest(test_util.TensorFlowTestCase):
       self.assertEqual(future.calls, 1)
     else:
       a = constant_op.constant(1.0)
-      b = future
+      b = future()
       with ops.control_dependencies([a, b]):
         c = constant_op.constant(3.0)
       self.assertEqual(future.calls, 1)
@@ -1705,6 +1768,37 @@ class ControlDependenciesTest(test_util.TensorFlowTestCase):
 @test_util.with_c_api
 class OpScopeTest(test_util.TensorFlowTestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
+  def testNames(self):
+    with ops.name_scope("foo") as foo:
+      self.assertEqual("foo/", foo)
+      with ops.name_scope("foo2") as foo2:
+        self.assertEqual("foo/foo2/", foo2)
+      with ops.name_scope(None) as empty1:
+        self.assertEqual("", empty1)
+        with ops.name_scope("foo3") as foo3:
+          self.assertEqual("foo3/", foo3)
+      with ops.name_scope("") as empty2:
+        self.assertEqual("", empty2)
+    with ops.name_scope("foo/") as outer_foo:
+      self.assertEqual("foo/", outer_foo)
+      with ops.name_scope("") as empty3:
+        self.assertEqual("", empty3)
+      with ops.name_scope("foo4") as foo4:
+        self.assertEqual("foo/foo4/", foo4)
+      with ops.name_scope("foo5//") as foo5:
+        self.assertEqual("foo5//", foo5)
+        with ops.name_scope("foo6") as foo6:
+          self.assertEqual("foo5//foo6/", foo6)
+      with ops.name_scope("/") as foo7:
+        self.assertEqual("/", foo7)
+      with ops.name_scope("//") as foo8:
+        self.assertEqual("//", foo8)
+      with ops.name_scope("a//b/c") as foo9:
+        self.assertEqual("foo/a//b/c/", foo9)
+    with ops.name_scope("a//b/c") as foo10:
+      self.assertEqual("a//b/c/", foo10)
+
   @test_util.run_in_graph_and_eager_modes()
   def testEagerDefaultScopeName(self):
     with ops.name_scope(None, "default") as scope:
@@ -1785,6 +1879,204 @@ class OpScopeTest(test_util.TensorFlowTestCase):
     self._testGraphElements([a, variable, b])
 
 
+class InitScopeTest(test_util.TensorFlowTestCase):
+
+  def testClearsControlDependencies(self):
+    g = ops.Graph()
+    a_1 = _apply_op(g, "FloatOutput", [], [dtypes.float32])
+    a_2 = _apply_op(g, "FloatOutput", [], [dtypes.float32])
+    a_3 = _apply_op(g, "FloatOutput", [], [dtypes.float32])
+    a_4 = _apply_op(g, "FloatOutput", [], [dtypes.float32])
+
+    with g.as_default():
+      with g.control_dependencies([a_1]):
+        with g.control_dependencies([a_2]):
+          with ops.init_scope():
+            with g.control_dependencies([a_3]):
+              with g.control_dependencies([a_4]):
+                # deps [a_3, a_4]
+                b_3_4 = _apply_op(g, "FloatOutput", [], [dtypes.float32])
+              # deps = [a_3]
+              b_3 = _apply_op(g, "FloatOutput", [], [dtypes.float32])
+            # deps back to None
+            b_none = _apply_op(g, "FloatOutput", [], [dtypes.float32])
+          # deps back to [a_1, a_2]
+          b_1_2 = _apply_op(g, "FloatOutput", [], [dtypes.float32])
+        # deps back to [a_1]
+        b_1 = _apply_op(g, "FloatOutput", [], [dtypes.float32])
+        with ops.init_scope():
+          # deps are None again
+          b_none2 = _apply_op(g, "FloatOutput", [], [dtypes.float32])
+
+    self.assertItemsEqual([a_3.op, a_4.op], b_3_4.op.control_inputs)
+    self.assertItemsEqual([a_3.op], b_3.op.control_inputs)
+    self.assertItemsEqual([], b_none.op.control_inputs)
+    self.assertItemsEqual([a_1.op, a_2.op], b_1_2.op.control_inputs)
+    self.assertItemsEqual([a_1.op], b_1.op.control_inputs)
+    self.assertItemsEqual([], b_none2.op.control_inputs)
+
+  def testLiftsOpsFromFunctions(self):
+    g0 = ops.Graph()
+    g1 = ops.Graph()
+    g1._building_function = True  # pylint: disable=protected-access
+    g2 = ops.Graph()
+    g2._building_function = True  # pylint: disable=protected-access
+
+    with g0.as_default():
+      with g1.as_default():
+        with g2.as_default():
+          with ops.init_scope():
+            _ = constant_op.constant(1.0)
+
+    self.assertEqual(len(g2.get_operations()), 0)
+    self.assertEqual(len(g1.get_operations()), 0)
+    self.assertEqual(len(g0.get_operations()), 1)
+
+  def testComposes(self):
+    g0 = ops.Graph()
+    g1 = ops.Graph()
+    g1._building_function = True  # pylint: disable=protected-access
+    g2 = ops.Graph()
+    g2._building_function = True  # pylint: disable=protected-access
+    g3 = ops.Graph()
+    g3._building_function = False  # pylint: disable=protected-access
+
+    with g0.as_default():
+      with g1.as_default():
+        with ops.init_scope():
+          # This op should be lifted into g0.
+          _ = constant_op.constant(1.0)
+          self.assertIs(g0, ops.get_default_graph())
+          self.assertEqual(len(g2.get_operations()), 0)
+          self.assertEqual(len(g1.get_operations()), 0)
+          self.assertEqual(len(g0.get_operations()), 1)
+        with g2.as_default():
+          with ops.init_scope():
+            # This op should be lifted into g0.
+            _ = constant_op.constant(1.0)
+            self.assertIs(g0, ops.get_default_graph())
+            with g3.as_default():
+              with ops.init_scope():
+                # This op should be lifted into g3, because g3 is not building a
+                # function.
+                _ = constant_op.constant(1.0)
+                self.assertIs(g3, ops.get_default_graph())
+
+    self.assertEqual(len(g3.get_operations()), 1)
+    self.assertEqual(len(g2.get_operations()), 0)
+    self.assertEqual(len(g1.get_operations()), 0)
+    self.assertEqual(len(g0.get_operations()), 2)
+
+  def testEscapesToEagerContext(self):
+    g = ops.Graph()
+    g._building_function = True  # pylint: disable=protected-access
+    with context.eager_mode():
+      with context.graph_mode():
+        with g.as_default():
+          with ops.init_scope():
+            # Because g is building a function, init_scope should
+            # escape out to the eager context.
+            self.assertTrue(context.in_eager_mode())
+          # g should be reinstated as the default graph, and the
+          # graph context should be re-entered.
+          self.assertIs(g, ops.get_default_graph())
+          self.assertTrue(context.in_graph_mode())
+
+  def testAllGraphsBuildingFunctionsRaisesError(self):
+    g = ops.Graph()
+    g._building_function = True  # pylint: disable=protected-access
+    with g.as_default():
+      with self.assertRaises(AssertionError):
+        with ops.init_scope():
+          pass
+
+  def testStaysInEagerWhenOnlyEagerContextActive(self):
+    with context.eager_mode():
+      with ops.init_scope():
+        self.assertTrue(context.eager_mode())
+      self.assertTrue(context.eager_mode())
+
+  def testEscapesDefunWhenInEagerMode(self):
+
+    def function_with_variables():
+      with ops.init_scope():
+        v = resource_variable_ops.ResourceVariable(3)
+      return v.assign_add(1)
+
+    with context.eager_mode():
+      # Each invocation of function_with_variables recreates a variable.
+      self.assertEqual(4, int(function_with_variables()))
+      self.assertEqual(4, int(function_with_variables()))
+
+      compiled = eager_function.defun(function_with_variables)
+      # The init_scope in function_with_variables lifts the variable out
+      # of the graph function constructed by defun; hence,
+      # compiled now appears to be stateful.
+      self.assertEqual(4, int(compiled()))
+      self.assertEqual(5, int(compiled()))
+
+  def testEscapesDefunWhenInGraphMode(self):
+    def function_with_variables(name):
+      with ops.init_scope():
+        _ = variable_scope.get_variable(name, shape=(1,))
+
+    g = ops.Graph()
+    with g.as_default():
+      with self.test_session():
+        # First ensure that graphs that are not building functions are
+        # not escaped.
+        function_with_variables("foo")
+        with self.assertRaisesRegexp(ValueError,
+                                     r"Variable foo already exists.*"):
+          # This will fail because reuse is not set to True.
+          function_with_variables("foo")
+
+        compiled = eager_function.defun(function_with_variables)
+        compiled("bar")
+        self.assertEqual(
+            len(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)), 2)
+
+        # The second call to `compiled` should not create variables: the
+        # init_scope has lifted the variable creation code out of the defun.
+        compiled("bar")
+        self.assertEqual(
+            len(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)), 2)
+
+  def testEscapesNestedDefun(self):
+
+    def inner_function():
+      with ops.init_scope():
+        v = resource_variable_ops.ResourceVariable(1)
+      return v.assign_add(2)
+
+    def outer_function(inner=None):
+      with ops.init_scope():
+        v0 = resource_variable_ops.ResourceVariable(0)
+      return v0.assign_add(1) + inner()
+
+    with context.eager_mode():
+      # Each invocation of outer_function recreates variables.
+      self.assertEqual(4, int(outer_function(inner=inner_function)))
+      self.assertEqual(4, int(outer_function(inner=inner_function)))
+
+      compiled_inner = eager_function.defun(inner_function)
+      compiled_outer = eager_function.defun(outer_function)
+      # The init_scope lifts variables out of the graph functions
+      # constructed by defun; hence, compiled_outer should now appear to be
+      # stateful.
+      self.assertEqual(4, int(compiled_outer(inner=compiled_inner)))
+      self.assertEqual(7, int(compiled_outer(inner=compiled_inner)))
+
+  def testInstallsDefaultGraphWhenGraphStackIsEmptyInGraphMode(self):
+    with context.graph_mode():
+      # pylint: disable=protected-access
+      self.assertEqual(len(ops._default_graph_stack.stack), 0)
+      with ops.init_scope():
+        self.assertEqual(len(ops._default_graph_stack.stack), 1)
+      self.assertEqual(len(ops._default_graph_stack.stack), 0)
+      # pylint: enable=protected-access
+
+
 @test_util.with_c_api
 class GraphTest(test_util.TensorFlowTestCase):
 
@@ -1876,6 +2168,24 @@ class GraphTest(test_util.TensorFlowTestCase):
     gc.collect()
     self.assertIsNone(g_ref())
 
+  def testRunnableAfterInvalidShape(self):
+    with ops.Graph().as_default():
+      with self.assertRaises(ValueError):
+        math_ops.add([1, 2], [1, 2, 3])
+      a = constant_op.constant(1)
+      with session.Session() as sess:
+        sess.run(a)
+
+  def testRunnableAfterInvalidShapeWithKernelLabelMap(self):
+    g = ops.Graph()
+    with g.as_default():
+      with g._kernel_label_map({"KernelLabelRequired": "overload_1"}):
+        with self.assertRaises(ValueError):
+          test_ops.kernel_label_required(1)
+      a = constant_op.constant(1)
+      with session.Session() as sess:
+        sess.run(a)
+
 
 @test_util.with_c_api
 class AttrScopeTest(test_util.TensorFlowTestCase):
@@ -1890,7 +2200,6 @@ class AttrScopeTest(test_util.TensorFlowTestCase):
       b = compat.as_text(x.get_attr("_B"))
     except ValueError:
       b = None
-    print(a, b)
     return (a, b)
 
   def testNoLabel(self):
@@ -1981,6 +2290,8 @@ class AsGraphDefTest(test_util.TensorFlowTestCase):
       t4.set_shape([43, 37])
       t5.set_shape([43, None])
 
+      b = constant_op.constant(1.0)  # pylint: disable=unused-variable
+
       gd = g.as_graph_def(add_shapes=True)
       self.assertProtoEqualsVersion("""
       node { name: "FiveFloatOutputs" op: "FiveFloatOutputs"
@@ -1997,6 +2308,26 @@ class AsGraphDefTest(test_util.TensorFlowTestCase):
           }
         }
       }
+    node { name: "Const" op: "Const"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape { }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value { type: DT_FLOAT }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_FLOAT
+            tensor_shape { }
+         float_val: 1.0  } } } }
       """, gd)
 
 
@@ -2279,6 +2610,18 @@ class NameScopeTest(test_util.TensorFlowTestCase):
         self.assertEqual("scope1", g.get_name_scope())
       self.assertEqual("", g.get_name_scope())
 
+  def testTwoGraphs(self):
+
+    def f():
+      g1 = ops.Graph()
+      g2 = ops.Graph()
+      with g1.as_default():
+        with g2.as_default():
+          with ops.name_scope("_"):
+            pass
+
+    self.assertRaisesRegexp(ValueError, "'_' is not a valid scope name", f)
+
 
 @test_util.with_c_api
 class TracebackTest(test_util.TensorFlowTestCase):
@@ -2336,7 +2679,7 @@ class OutputTypesTest(test_util.TensorFlowTestCase):
     with g.as_default():
       x = constant_op.constant([1, 1, 2, 4, 4, 4, 7, 8, 8],
                                dtype=dtypes.double)
-      y, _ = gen_array_ops.unique(x)
+      y, _ = gen_array_ops._unique(x)
       self.assertEqual([types_pb2.DT_DOUBLE, types_pb2.DT_INT32],
                        y.op._output_types)  # pylint: disable=protected-access
 
@@ -2353,47 +2696,14 @@ class OutputTypesTest(test_util.TensorFlowTestCase):
 
 
 @test_util.with_c_api
-class InputTypesTest(test_util.TensorFlowTestCase):
-  """Tests Operation._input_dtypes and Operation._input_types properties.
-
-  This test should not exist as _input_types is a private property.
-  This property is used by many tests that would normally cover its
-  behavior. However, we can't yet run these tests in C
-  API mode because they use _set_device method. This test will be deleted
-  once we port _set_device.
-  """
-  # TODO(iga): Remove this test
-
-  def setUp(self):
-    self.prev_use_c_api = ops._USE_C_API  # pylint: disable=protected-access
-    ops._USE_C_API = True  # pylint: disable=protected-access
-
-  def tearDown(self):
-    ops._USE_C_API = self.prev_use_c_api  # pylint: disable=protected-access
-
-  def testZeroInputs(self):
-    g = ops.Graph()
-    with g.as_default():
-      # Using a constant because creating unregistered ops
-      # doesn't work with the C API.
-      op = constant_op.constant(12, dtype=dtypes.uint16).op
-      # pylint: disable=protected-access
-      self.assertEqual([], op._input_types)
-      self.assertEqual([], op._input_dtypes)
-      # pylint: enable=protected-access
-
-  def testTwoInputs(self):
-    g = ops.Graph()
-    with g.as_default():
-      x = constant_op.constant(1.0, dtype=dtypes.double)
-      y = constant_op.constant(2.0, dtype=dtypes.double)
-      z = math_ops.multiply(x, y)
-      # pylint: disable=protected-access
-      self.assertTrue(isinstance(z.op._input_types[0], dtypes.DType))
-      self.assertTrue(isinstance(z.op._input_types[1], dtypes.DType))
-      self.assertEqual([dtypes.double, dtypes.double], z.op._input_types)
-      self.assertEqual([dtypes.double, dtypes.double], z.op._input_dtypes)
-      # pylint: enable=protected-access
+class EnableEagerExecutionTest(test_util.TensorFlowTestCase):
+
+  def testBadArgumentsToEnableEagerExecution(self):
+    with self.assertRaisesRegexp(TypeError, "config must be a tf.ConfigProto"):
+      ops.enable_eager_execution(context.DEVICE_PLACEMENT_SILENT)
+    with self.assertRaisesRegexp(ValueError, "device_policy must be one of"):
+      c = config_pb2.ConfigProto()
+      ops.enable_eager_execution(c, c)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/framework/python_op_gen.cc b/tensorflow/python/framework/python_op_gen.cc
index c57f0a98421fa88e5faa870157116c1617c19620..65810fa7094409c7429dbaaa6c1e62efb263eafc 100644
--- a/tensorflow/python/framework/python_op_gen.cc
+++ b/tensorflow/python/framework/python_op_gen.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <stdio.h>
 #include <sstream>
 #include <unordered_map>
+#include "tensorflow/core/framework/api_def.pb.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_def.pb_text.h"
@@ -480,15 +481,15 @@ string GenPythonOp::Code() {
   }
   // This has all the input args followed by those attrs that don't have
   // defaults.
-  std::vector<string> args_no_default;
+  std::vector<ParamNames> params_no_default;
   // The parameters with defaults (these have to be listed after those without).
   // No input args are included, just attrs.
-  std::vector<string> args_with_defaults;
+  std::vector<ParamNames> params_with_default;
 
   for (int i = 0; i < api_def_.arg_order_size(); ++i) {
     const auto& arg = *FindInputArg(api_def_.arg_order(i), op_def_);
     const auto& api_def_arg = *FindInputArg(api_def_.arg_order(i), api_def_);
-    args_no_default.push_back(api_def_arg.rename_to());
+    params_no_default.emplace_back(api_def_arg.name(), api_def_arg.rename_to());
     if (!arg.type_attr().empty()) {
       gtl::InsertIfNotPresent(&inferred_attrs_, arg.type_attr(), arg.name());
     } else if (!arg.type_list_attr().empty()) {
@@ -504,9 +505,9 @@ string GenPythonOp::Code() {
     // Do not add inferred attrs to the Python function signature.
     if (inferred_attrs_.find(attr.name()) == inferred_attrs_.end()) {
       if (attr.has_default_value()) {
-        args_with_defaults.push_back(attr.rename_to());
+        params_with_default.emplace_back(attr.name(), attr.rename_to());
       } else {
-        args_no_default.push_back(attr.rename_to());
+        params_no_default.emplace_back(attr.name(), attr.rename_to());
       }
     }
   }
@@ -515,27 +516,30 @@ string GenPythonOp::Code() {
   // those with defaults go at the end.
   // Get the attrs in the order we want by taking the attrs without defaults
   // from the end of args_no_default, and adding args_no_default.
-  attrs_.reserve(args_no_default.size() - op_def_.input_arg_size() +
-                 args_with_defaults.size());
-  attrs_.insert(attrs_.end(),
-                args_no_default.begin() + op_def_.input_arg_size(),
-                args_no_default.end());
-  attrs_.insert(attrs_.end(), args_with_defaults.begin(),
-                args_with_defaults.end());
-
-  param_names_.reserve(args_no_default.size() + args_with_defaults.size());
+  attrs_.reserve(params_no_default.size() - op_def_.input_arg_size() +
+                 params_with_default.size());
+  for (int i = op_def_.input_arg_size(); i < params_no_default.size(); ++i) {
+    attrs_.push_back(params_no_default[i].GetName());
+  }
+  for (int i = 0; i < params_with_default.size(); ++i) {
+    attrs_.push_back(params_with_default[i].GetName());
+  }
+
+  param_names_.reserve(params_no_default.size() + params_with_default.size());
+  param_names_.insert(param_names_.begin(), params_no_default.begin(),
+                      params_no_default.end());
+  for (const auto& param : params_with_default) {
+    param_names_.push_back(param);
+  }
+
   string parameters;
-  for (const string& name : args_no_default) {
+  for (const auto& param : params_no_default) {
     AddDelimiter(&parameters, ", ");
-    const string param = AvoidPythonReserved(name);
-    strings::StrAppend(&parameters, param);
-    param_names_.push_back(param);
+    strings::StrAppend(&parameters, param.GetRenameTo());
   }
-  for (const string& name : args_with_defaults) {
+  for (const auto& param_and_default : params_with_default) {
     AddDelimiter(&parameters, ", ");
-    const string param = AvoidPythonReserved(name);
-    strings::StrAppend(&parameters, param, "=None");
-    param_names_.push_back(param);
+    strings::StrAppend(&parameters, param_and_default.GetRenameTo(), "=None");
   }
   AddDelimiter(&parameters, ", ");
   strings::StrAppend(&parameters, "name=None");
@@ -557,10 +561,11 @@ string GenPythonOp::Code() {
 }
 
 void GenPythonOp::AddExport() {
-  if (api_def_.visibility() != api_def_.VISIBLE) {
+  if (api_def_.visibility() != ApiDef::VISIBLE) {
     return;
   }
-  strings::StrAppend(&result_, "tf_export(");
+
+  strings::StrAppend(&result_, "@tf_export(");
 
   // Add all endpoint names to tf_export.
   bool first_endpoint = true;
@@ -570,7 +575,10 @@ void GenPythonOp::AddExport() {
     } else {
       first_endpoint = false;
     }
-    strings::StrAppend(&result_, "'", endpoint.name(), "'");
+    string endpoint_name;
+    python_op_gen_internal::GenerateLowerCaseOpName(endpoint.name(),
+                                                    &endpoint_name);
+    strings::StrAppend(&result_, "'", endpoint_name, "'");
   }
   strings::StrAppend(&result_, ")\n");
 }
@@ -603,9 +611,9 @@ void GenPythonOp::AddDocStringInputs() {
     StringPiece description = api_def_arg.description();
     string desc;
     if (ConsumeEquals(&description)) {  // Skip the generated type info.
-      desc = strings::StrCat(param_names_[i], ": ");
+      desc = strings::StrCat(param_names_[i].GetRenameTo(), ": ");
     } else {
-      desc = strings::StrCat(param_names_[i], ": ",
+      desc = strings::StrCat(param_names_[i].GetRenameTo(), ": ",
                              ArgTypeName(op_def_, arg, inferred_attrs_, false));
     }
     if (!description.empty()) {
@@ -750,7 +758,8 @@ void GenPythonOp::AddBody(const string& prefix) {
 void GenPythonOp::AddBodyNoReturn(const string& apply_prefix) {
   string args = strings::StrCat("\"", op_def_.name(), "\", ");
   for (size_t i = 0; i < param_names_.size(); ++i) {
-    strings::StrAppend(&args, param_names_[i], "=", param_names_[i], ", ");
+    strings::StrAppend(&args, AvoidPythonReserved(param_names_[i].GetName()),
+                       "=", param_names_[i].GetRenameTo(), ", ");
   }
   strings::StrAppend(&args, "name=name)");
 
diff --git a/tensorflow/python/framework/python_op_gen_internal.h b/tensorflow/python/framework/python_op_gen_internal.h
index c1efbf9be2277dbc047868dde5110b5505fc9e23..6b53825a6d325c00eaf9f60fbcd9d4e0f9c9183c 100644
--- a/tensorflow/python/framework/python_op_gen_internal.h
+++ b/tensorflow/python/framework/python_op_gen_internal.h
@@ -41,6 +41,28 @@ void GenerateLowerCaseOpName(const string& str, string* result);
 
 string DataTypeToPython(DataType dtype, const string& dtype_module);
 
+// Names that corresponds to a single input parameter.
+class ParamNames {
+ public:
+  // Create param based on Arg.
+  ParamNames(const string& name, const string& rename_to) : name_(name) {
+    rename_to_ = AvoidPythonReserved(rename_to);
+  }
+
+  // Get original parameter name.
+  string GetName() const { return name_; }
+
+  // Get the name to rename the parameter to. Note that AvoidPythonReserved
+  // has already been applied.
+  string GetRenameTo() const { return rename_to_; }
+
+ private:
+  // Original parameter name.
+  string name_;
+  // API name for this parameter.
+  string rename_to_;
+};
+
 class GenPythonOp {
  public:
   GenPythonOp(const OpDef& op_def, const ApiDef& api_def,
@@ -84,7 +106,7 @@ class GenPythonOp {
 
   // All parameters, including inputs & non-inferred attrs, required and those
   // with defaults, except "name"
-  std::vector<string> param_names_;
+  std::vector<ParamNames> param_names_;
 };
 
 }  // namespace python_op_gen_internal
diff --git a/tensorflow/python/framework/python_op_gen_main.cc b/tensorflow/python/framework/python_op_gen_main.cc
index 61b1d02a5e85f40c884ffe77104b425b3554b796..bc5ca195da50499c6fbab822a9a093be3f0277e0 100644
--- a/tensorflow/python/framework/python_op_gen_main.cc
+++ b/tensorflow/python/framework/python_op_gen_main.cc
@@ -34,12 +34,6 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
-constexpr char kBaseApiDef[] =
-    "tensorflow/core/api_def/base_api/*.pbtxt";
-constexpr char kPythonApiDef[] =
-    "tensorflow/core/api_def/python_api/*.pbtxt";
-constexpr bool kUseApiDef = false;
-
 Status ReadOpListFromFile(const string& filename,
                           std::vector<string>* op_list) {
   std::unique_ptr<RandomAccessFile> file;
@@ -110,22 +104,23 @@ string InferSourceFileName(const char* argv_zero) {
 }
 
 void PrintAllPythonOps(const std::vector<string>& op_list,
+                       const std::vector<string>& api_def_dirs,
                        const string& source_file_name, bool require_shapes,
                        bool op_list_is_whitelist) {
   OpList ops;
   OpRegistry::Global()->Export(false, &ops);
 
   ApiDefMap api_def_map(ops);
-  if (kUseApiDef) {
+  if (!api_def_dirs.empty()) {
     Env* env = Env::Default();
 
-    std::vector<string> base_api_files;
-    std::vector<string> python_api_files;
-    TF_CHECK_OK(env->GetMatchingPaths(kBaseApiDef, &base_api_files));
-    TF_CHECK_OK(env->GetMatchingPaths(kPythonApiDef, &python_api_files));
-
-    TF_CHECK_OK(api_def_map.LoadFileList(env, base_api_files));
-    TF_CHECK_OK(api_def_map.LoadFileList(env, python_api_files));
+    for (const auto& api_def_dir : api_def_dirs) {
+      std::vector<string> api_files;
+      TF_CHECK_OK(env->GetMatchingPaths(io::JoinPath(api_def_dir, "*.pbtxt"),
+                                        &api_files));
+      TF_CHECK_OK(api_def_map.LoadFileList(env, api_files));
+    }
+    api_def_map.UpdateDocs();
   }
 
   if (op_list_is_whitelist) {
@@ -154,23 +149,30 @@ int main(int argc, char* argv[]) {
       tensorflow::InferSourceFileName(argv[0]);
 
   // Usage:
-  //   gen_main [ @FILENAME | OpName[,OpName]* ] (0 | 1) [0 | 1]
-  if (argc == 2) {
-    tensorflow::PrintAllPythonOps({}, source_file_name,
-                                  tensorflow::string(argv[1]) == "1",
-                                  false /* op_list_is_whitelist */);
-  } else if (argc == 3) {
-    std::vector<tensorflow::string> hidden_ops;
-    TF_CHECK_OK(tensorflow::ParseOpListCommandLine(argv[1], &hidden_ops));
-    tensorflow::PrintAllPythonOps(hidden_ops, source_file_name,
+  //   gen_main api_def_dir1,api_def_dir2,...
+  //       [ @FILENAME | OpName[,OpName]* ] (0 | 1) [0 | 1]
+  if (argc < 3) {
+    return -1;
+  }
+  std::vector<tensorflow::string> api_def_dirs = tensorflow::str_util::Split(
+      argv[1], ",", tensorflow::str_util::SkipEmpty());
+
+  if (argc == 3) {
+    tensorflow::PrintAllPythonOps({}, api_def_dirs, source_file_name,
                                   tensorflow::string(argv[2]) == "1",
                                   false /* op_list_is_whitelist */);
   } else if (argc == 4) {
+    std::vector<tensorflow::string> hidden_ops;
+    TF_CHECK_OK(tensorflow::ParseOpListCommandLine(argv[2], &hidden_ops));
+    tensorflow::PrintAllPythonOps(hidden_ops, api_def_dirs, source_file_name,
+                                  tensorflow::string(argv[3]) == "1",
+                                  false /* op_list_is_whitelist */);
+  } else if (argc == 5) {
     std::vector<tensorflow::string> op_list;
-    TF_CHECK_OK(tensorflow::ParseOpListCommandLine(argv[1], &op_list));
-    tensorflow::PrintAllPythonOps(op_list, source_file_name,
-                                  tensorflow::string(argv[2]) == "1",
-                                  tensorflow::string(argv[3]) == "1");
+    TF_CHECK_OK(tensorflow::ParseOpListCommandLine(argv[2], &op_list));
+    tensorflow::PrintAllPythonOps(op_list, api_def_dirs, source_file_name,
+                                  tensorflow::string(argv[3]) == "1",
+                                  tensorflow::string(argv[4]) == "1");
   } else {
     return -1;
   }
diff --git a/tensorflow/python/framework/subscribe.py b/tensorflow/python/framework/subscribe.py
index cdcb74e88fd30ee4ff6abbc4b0fbb3c37d5f785e..7797d991da7c1c3a429bbf9e60772f0a1952c723 100644
--- a/tensorflow/python/framework/subscribe.py
+++ b/tensorflow/python/framework/subscribe.py
@@ -137,11 +137,18 @@ def _subscribe_new(tensor, side_effects, control_cache):
     # are subscribed at the same time, we remove the control dependency from
     # the original op only once and we add the dependencies to all the
     # new identities.
+    if ops._USE_C_API:  # pylint: disable=protected-access
+      new_control_inputs = consumer_op.control_inputs
+    else:
+      # Make a copy so we don't modify the actual control inputs (this is fixed
+      # in the C API).
+      new_control_inputs = list(consumer_op.control_inputs)
+    if tensor.op in new_control_inputs:
+      new_control_inputs.remove(tensor.op)
+    new_control_inputs.append(out.op)
     # pylint: disable=protected-access
-    if tensor.op in consumer_op._control_inputs:
-      consumer_op._control_inputs.remove(tensor.op)
-    consumer_op._control_inputs.append(out.op)
-    consumer_op._recompute_node_def()
+    consumer_op._remove_all_control_inputs()
+    consumer_op._add_control_inputs(new_control_inputs)
     # pylint: enable=protected-access
   return out
 
@@ -167,12 +174,8 @@ def _subscribe_extend(tensor, side_effects):
     for s in side_effects:
       outs += s(source_tensor)
 
-  for out in outs:
-    out_type = type(out)
-    if out_type is ops.Tensor:
-      out = out.op
-    tensor.op._control_inputs.append(out)  # pylint: disable=protected-access
-  tensor.op._recompute_node_def()  # pylint: disable=protected-access
+  out_ops = [out.op if isinstance(out, ops.Tensor) else out for out in outs]
+  tensor.op._add_control_inputs(out_ops)  # pylint: disable=protected-access
 
   return tensor
 
diff --git a/tensorflow/python/framework/subscribe_test.py b/tensorflow/python/framework/subscribe_test.py
index 01df20241dd8687ea41217ceddd1be8b9f975978..8b95b25e82a1886c43e08f47a612300750643fb1 100644
--- a/tensorflow/python/framework/subscribe_test.py
+++ b/tensorflow/python/framework/subscribe_test.py
@@ -36,6 +36,7 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import googletest
 
 
+@test_util.with_c_api
 class SubscribeTest(test_util.TensorFlowTestCase):
 
   def _ExpectSubscribedIdentities(self, container):
@@ -58,12 +59,12 @@ class SubscribeTest(test_util.TensorFlowTestCase):
       return t
 
     c0 = c
-    self.assertTrue(c0.op in d.op._control_inputs)
+    self.assertTrue(c0.op in d.op.control_inputs)
     c = subscribe.subscribe(c,
                             lambda t: script_ops.py_func(sub, [t], [t.dtype]))
     # Verify that control dependencies are correctly moved to the subscription.
-    self.assertFalse(c0.op in d.op._control_inputs)
-    self.assertTrue(c.op in d.op._control_inputs)
+    self.assertFalse(c0.op in d.op.control_inputs)
+    self.assertTrue(c.op in d.op.control_inputs)
 
     with self.test_session() as sess:
       c_out = sess.run([c])
diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py
index 9fc0e494633d239c1343a88eb0bbbb5a8ee75cb8..1b90c7ad4d68287bfa5c1c74c82d2936a20e4a80 100644
--- a/tensorflow/python/framework/tensor_util.py
+++ b/tensorflow/python/framework/tensor_util.py
@@ -49,8 +49,20 @@ def SlowAppendFloat16ArrayToTensorProto(tensor_proto, proto_values):
   tensor_proto.half_val.extend([
       ExtractBitsFromFloat16(x) for x in proto_values])
 
+
+def ExtractBitsFromBFloat16(x):
+  return np.asscalar(
+      np.asarray(x, dtype=dtypes.bfloat16.as_numpy_dtype).view(np.uint16))
+
+
+def SlowAppendBFloat16ArrayToTensorProto(tensor_proto, proto_values):
+  tensor_proto.half_val.extend([
+      ExtractBitsFromBFloat16(x) for x in proto_values])
+
+
 if _FAST_TENSOR_UTIL_AVAILABLE:
   _NP_TO_APPEND_FN = {
+      dtypes.bfloat16.as_numpy_dtype: SlowAppendBFloat16ArrayToTensorProto,
       # TODO(sesse): We should have a
       # fast_tensor_util.AppendFloat16ArrayToTensorProto,
       # but it seems np.float16_t doesn't exist?
@@ -121,6 +133,7 @@ else:
     tensor_proto.bool_val.extend([np.asscalar(x) for x in proto_values])
 
   _NP_TO_APPEND_FN = {
+      dtypes.bfloat16.as_numpy_dtype: SlowAppendBFloat16ArrayToTensorProto,
       np.float16: SlowAppendFloat16ArrayToTensorProto,
       np.float32: SlowAppendFloat32ArrayToTensorProto,
       np.float64: SlowAppendFloat64ArrayToTensorProto,
diff --git a/tensorflow/python/framework/test_ops.cc b/tensorflow/python/framework/test_ops.cc
index 25bb7af20cfce6d96e8a877f370142dc00ecb9ca..c6c6c2233c9a81467f57abe2d42f0df9b7ce7106 100644
--- a/tensorflow/python/framework/test_ops.cc
+++ b/tensorflow/python/framework/test_ops.cc
@@ -26,6 +26,16 @@ REGISTER_OP("KernelLabel")
     .Output("result: string")
     .SetShapeFn(shape_inference::ScalarShape);
 
+REGISTER_OP("KernelLabelRequired")
+    .Input("input: int32")
+    .Output("result: string")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      shape_inference::ShapeHandle out;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &out));
+      c->set_output(0, c->Scalar());
+      return Status::OK();
+    });
+
 REGISTER_OP("GraphDefVersion")
     .Output("version: int32")
     .SetIsStateful()
@@ -104,6 +114,14 @@ REGISTER_KERNEL_BUILDER(Name("KernelLabel")
                             .Label("overload_2"),
                         KernelLabelOp<OVERLOAD_2_LABEL>);
 
+// All "KernelLabelRequired" kernels have labels
+REGISTER_KERNEL_BUILDER(
+    Name("KernelLabelRequired").Device(DEVICE_CPU).Label("overload_1"),
+    KernelLabelOp<OVERLOAD_1_LABEL>);
+REGISTER_KERNEL_BUILDER(
+    Name("KernelLabelRequired").Device(DEVICE_CPU).Label("overload_2"),
+    KernelLabelOp<OVERLOAD_2_LABEL>);
+
 class GraphDefVersionOp : public OpKernel {
  public:
   explicit GraphDefVersionOp(OpKernelConstruction* ctx)
@@ -369,4 +387,255 @@ REGISTER_OP("FuncAttr")
     .Attr("f: func")
     .SetShapeFn(shape_inference::UnknownShape);
 
+REGISTER_OP("Simple")
+    .Input("a: int32")
+    .Output("out: float")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("OutT").Output("a: T").Attr("T: type").SetShapeFn(
+    shape_inference::UnknownShape);
+
+REGISTER_OP("ReservedInput")
+    .Input("input: int32")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("Polymorphic")
+    .Input("a: T")
+    .Output("out: T")
+    .Attr("T: type")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("PolymorphicOut")
+    .Output("out: T")
+    .Attr("T: type")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("PolymorphicDefaultOut")
+    .Output("out: T")
+    .Attr("T: type = DT_STRING")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("Binary")
+    .Input("a: T")
+    .Input("b: T")
+    .Output("out: T")
+    .Attr("T: type")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("Restrict")
+    .Input("a: T")
+    .Output("out: T")
+    .Attr("T: {string, bool}")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("TypeList")
+    .Input("a: T")
+    .Attr("T: list(type) >= 0")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("TypeListTwice")
+    .Input("a: T")
+    .Input("b: T")
+    .Attr("T: list(type) >= 0")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("OutTypeList")
+    .Output("out: T")
+    .Attr("T: list(type) >= 0")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("TypeListRestrict")
+    .Input("a: T")
+    .Attr("T: list({string, bool})")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("OutTypeListRestrict")
+    .Output("out: t")
+    .Attr("t: list({string, bool})")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("Attr").Attr("a: int").SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("AttrFloat")
+    .Attr("a: float")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("AttrBool")
+    .Attr("a: bool")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("AttrBoolList")
+    .Attr("a: list(bool)")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("AttrMin")
+    .Attr("a: int >= 5")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("AttrListMin")
+    .Attr("a: list(int) >= 2")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("AttrEnum")
+    .Attr("a: {'apples', 'oranges'}")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("AttrEnumList")
+    .Attr("a: list({'apples', 'oranges'})")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("AttrShape")
+    .Attr("a: shape")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("AttrShapeList")
+    .Attr("a: list(shape)")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("AttrPartialShape")
+    .Attr("a: shape")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("AttrPartialShapeList")
+    .Attr("a: list(shape)")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("AttrDefault")
+    .Attr("a: string = 'banana'")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("AttrListDefault")
+    .Attr("a: list(int) = [5, 15]")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("AttrEmptyListDefault")
+    .Attr("a: list(float) = []")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("ReservedAttr")
+    .Attr("range: int")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("AttrTypeDefault")
+    .Input("a: T")
+    .Attr("T: type = DT_INT32")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("AttrListTypeDefault")
+    .Input("a: N * T")
+    .Input("b: N * T")
+    .Attr("T: type = DT_INT32")
+    .Attr("N: int")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("NIntsIn")
+    .Input("a: N * int32")
+    .Attr("N: int >= 2")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("NPolymorphicIn")
+    .Input("a: N * T")
+    .Attr("T: type")
+    .Attr("N: int >= 2")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("NPolymorphicRestrictIn")
+    .Input("a: N * T")
+    .Attr("T: {string, bool}")
+    .Attr("N: int >= 2")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("NInTwice")
+    .Input("a: N * int32")
+    .Input("b: N * string")
+    .Attr("N: int >= 0")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("NInPolymorphicTwice")
+    .Input("a: N * T")
+    .Input("b: N * T")
+    .Attr("T: type")
+    .Attr("N: int >= 0")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("NInTwoTypeVariables")
+    .Input("a: N * S")
+    .Input("b: N * T")
+    .Attr("S: type")
+    .Attr("T: type")
+    .Attr("N: int >= 0")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("InPolymorphicTwice")
+    .Input("a: N * T")
+    .Input("b: M * T")
+    .Attr("T: type")
+    .Attr("N: int >= 0")
+    .Attr("M: int >= 0")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("NIntsOut")
+    .Output("a: N * int32")
+    .Attr("N: int >= 2")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("NIntsOutDefault")
+    .Output("a: N * int32")
+    .Attr("N: int >= 2 = 3")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("NPolymorphicOut")
+    .Output("a: N * T")
+    .Attr("T: type")
+    .Attr("N: int >= 2")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("NPolymorphicOutDefault")
+    .Output("a: N * T")
+    .Attr("T: type = DT_BOOL")
+    .Attr("N: int >= 2 = 2")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("NPolymorphicRestrictOut")
+    .Output("a: N * T")
+    .Attr("T: {string, bool}")
+    .Attr("N: int >= 2")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("RefIn")
+    .Input("a: Ref(T)")
+    .Attr("T: type")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("TwoRefsIn")
+    .Input("a: Ref(T)")
+    .Input("b: Ref(T)")
+    .Attr("T: type")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("RefOut")
+    .Output("a: Ref(T)")
+    .Attr("T: type")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("SimpleStruct")
+    .Output("a: n_a * int32")
+    .Attr("n_a: int >= 0")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("MixedStruct")
+    .Output("a: n_a * int32")
+    .Output("b: float")
+    .Attr("n_a: int >= 0")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("ComplexStruct")
+    .Output("a: n_a * int32")
+    .Output("b: n_b * int64")
+    .Output("c: t_c")
+    .Attr("n_a: int >= 0")
+    .Attr("n_b: int >= 0")
+    .Attr("t_c: list(type) >= 0")
+    .SetShapeFn(shape_inference::UnknownShape);
+
 }  // end namespace tensorflow
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 4c026590c2da8d5db9fda92ed7586729868ff895..729c93987017a166cb346004c9880e764eeec4ef 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -47,19 +47,23 @@ from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.client import device_lib
 from tensorflow.python.client import session
+from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
 from tensorflow.python.eager import tape
 from tensorflow.python.framework import device as pydev
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
 from tensorflow.python.framework import versions
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import variables
 from tensorflow.python.platform import googletest
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import server_lib
 from tensorflow.python.util import compat
+from tensorflow.python.util import nest
 from tensorflow.python.util.protobuf import compare
 
 
@@ -158,6 +162,16 @@ def assert_meta_graph_protos_equal(tester, a, b):
   # proto comparison below.
   a.ClearField("collection_def")
   b.ClearField("collection_def")
+
+  # Check the graph_defs.
+  assert_equal_graph_def(a.graph_def, b.graph_def, checkpoint_v2=True)
+  # Check graph_def versions (ignored by assert_equal_graph_def).
+  tester.assertProtoEquals(a.graph_def.versions, b.graph_def.versions)
+  # Compared the fields directly, remove their raw values from the
+  # proto comparison below.
+  a.ClearField("graph_def")
+  b.ClearField("graph_def")
+
   tester.assertProtoEquals(a, b)
 
 
@@ -174,7 +188,7 @@ def _strip_checkpoint_v2_randomized(graph_def):
       if attr_tensor_value and len(attr_tensor_value.string_val) == 1:
         attr_tensor_string_value = attr_tensor_value.string_val[0]
         if (attr_tensor_string_value and
-            re.match(_SHARDED_SAVE_OP_PATTERN, attr_tensor_string_value)):
+            re.match(_SHARDED_SAVE_OP_PATTERN, str(attr_tensor_string_value))):
           delete_keys.append(attr_key)
     for attr_key in delete_keys:
       del node.attr[attr_key]
@@ -188,6 +202,10 @@ def CudaSupportsHalfMatMulAndConv():
   return pywrap_tensorflow.CudaSupportsHalfMatMulAndConv()
 
 
+def InstallStackTraceHandler():
+  pywrap_tensorflow.InstallStacktraceHandler()
+
+
 def NHWCToNCHW(input_tensor):
   """Converts the input from the NHWC format to NCHW.
 
@@ -453,6 +471,62 @@ class IsolateTest(object):
         type_arg, value_arg, traceback_arg)
 
 
+def assert_no_new_tensors(f):
+  """Decorator for asserting that no new Tensors persist after a test.
+
+  Mainly useful for checking that code using the Python C API has correctly
+  manipulated reference counts.
+
+  Clears the caches that it knows about, runs the garbage collector, then checks
+  that there are no Tensor or Tensor-like objects still around. This includes
+  Tensors to which something still has a reference (e.g. from missing
+  Py_DECREFs) and uncollectable cycles (i.e. Python reference cycles where one
+  of the objects has __del__ defined).
+
+  Args:
+    f: The test case to run.
+  Returns:
+    The decorated test case.
+  """
+
+  def decorator(self, **kwargs):
+    """Finds existing Tensors, runs the test, checks for new Tensors."""
+
+    def _is_tensor(obj):
+      try:
+        return (isinstance(obj, ops.Tensor) or
+                isinstance(obj, variables.Variable))
+      except ReferenceError:
+        # If the object no longer exists, we don't care about it.
+        return False
+
+    tensors_before = set(id(obj) for obj in gc.get_objects() if _is_tensor(obj))
+    outside_container_prefix = ops.get_default_graph()._container_prefix
+    with IsolateTest():
+      # Run the test in a new graph so that collections get cleared when it's
+      # done, but inherit the container prefix so that we can print the values
+      # of variables which get leaked when executing eagerly.
+      ops.get_default_graph()._container_prefix = outside_container_prefix
+      f(self, **kwargs)
+    # Make an effort to clear caches, which would otherwise look like leaked
+    # Tensors.
+    backprop._last_zero = [None]
+    backprop._shape_dtype = [None, None]
+    context.get_default_context().scalar_cache().clear()
+    gc.collect()
+    tensors_after = [
+        obj for obj in gc.get_objects()
+        if _is_tensor(obj) and id(obj) not in tensors_before
+    ]
+    if tensors_after:
+      raise AssertionError(("%d Tensors not deallocated after test: %s" % (
+          len(tensors_after),
+          str(tensors_after),
+      )))
+
+  return decorator
+
+
 def assert_no_garbage_created(f):
   """Test method decorator to assert that no garbage has been created.
 
@@ -507,7 +581,8 @@ def run_in_graph_and_eager_modes(
       garbage for legitimate reasons (e.g. they define a class which inherits
       from `object`), and because DEBUG_SAVEALL is sticky in some Python
       interpreters (meaning that tests which rely on objects being collected
-      elsewhere in the unit test file will not work).
+      elsewhere in the unit test file will not work). Additionally, checks that
+      nothing still has a reference to Tensors that the test allocated.
   Returns:
     Returns a decorator that will run the decorated test function
         using both a graph and using eager execution.
@@ -544,7 +619,8 @@ def run_in_graph_and_eager_modes(
             f(self, **kwargs)
 
       if assert_no_eager_garbage:
-        run_eager_mode = assert_no_garbage_created(run_eager_mode)
+        run_eager_mode = assert_no_new_tensors(
+            assert_no_garbage_created(run_eager_mode))
 
       with context.eager_mode():
         with IsolateTest():
@@ -715,25 +791,22 @@ class TensorFlowTestCase(googletest.TestCase):
       fail_msg += " : %r" % (msg) if msg else ""
       self.fail(fail_msg)
 
-  def _eval_helper(self, tensors):
-    if isinstance(tensors, ops.EagerTensor):
-      return tensors.numpy()
-    if isinstance(tensors, resource_variable_ops.ResourceVariable):
-      return tensors.read_value().numpy()
-
-    if isinstance(tensors, tuple):
-      return tuple([self._eval_helper(t) for t in tensors])
-    elif isinstance(tensors, list):
-      return [self._eval_helper(t) for t in tensors]
-    elif isinstance(tensors, dict):
-      assert not tensors, "Only support empty dict now."
-      return dict()
-    elif tensors is None:
+  def _eval_tensor(self, tensor):
+    if tensor is None:
       return None
-    elif callable(tensors):
-      return self._eval_helper(tensors())
+    elif isinstance(tensor, ops.EagerTensor):
+      return tensor.numpy()
+    elif isinstance(tensor, resource_variable_ops.ResourceVariable):
+      return tensor.read_value().numpy()
+    elif callable(tensor):
+      return self._eval_helper(tensor())
     else:
-      raise ValueError("Unsupported type %s." % type(tensors))
+      raise ValueError("Unsupported type %s." % type(tensor))
+
+  def _eval_helper(self, tensors):
+    if tensors is None:
+      return None
+    return nest.map_structure(self._eval_tensor, tensors)
 
   def evaluate(self, tensors):
     """Evaluates tensors and returns numpy values.
@@ -1057,7 +1130,8 @@ class TensorFlowTestCase(googletest.TestCase):
   def assertAllClose(self, a, b, rtol=1e-6, atol=1e-6):
     """Asserts that two numpy arrays, or dicts of same, have near values.
 
-    This does not support nested dicts.
+    This does not support nested dicts. `a` and `b` can be namedtuples too,
+    which are converted to dicts.
 
     Args:
       a: The expected numpy ndarray (or anything can be converted to one), or
@@ -1070,6 +1144,11 @@ class TensorFlowTestCase(googletest.TestCase):
     Raises:
       ValueError: if only one of `a` and `b` is a dict.
     """
+    # Check if a and/or b are namedtuples.
+    if hasattr(a, "_asdict"):
+      a = a._asdict()
+    if hasattr(b, "_asdict"):
+      b = b._asdict()
     is_a_dict = isinstance(a, dict)
     if is_a_dict != isinstance(b, dict):
       raise ValueError("Can't compare dict to non-dict, %s vs %s." % (a, b))
@@ -1092,7 +1171,9 @@ class TensorFlowTestCase(googletest.TestCase):
                                     float_rtol=1e-6,
                                     float_atol=1e-6,
                                     half_rtol=1e-3,
-                                    half_atol=1e-3):
+                                    half_atol=1e-3,
+                                    bfloat16_rtol=1e-2,
+                                    bfloat16_atol=1e-2):
     """Like assertAllClose, but also suitable for comparing fp16 arrays.
 
     In particular, the tolerance is reduced to 1e-3 if at least
@@ -1107,9 +1188,12 @@ class TensorFlowTestCase(googletest.TestCase):
       float_atol: absolute tolerance for float32.
       half_rtol: relative tolerance for float16.
       half_atol: absolute tolerance for float16.
+      bfloat16_rtol: relative tolerance for bfloat16.
+      bfloat16_atol: absolute tolerance for bfloat16.
     """
     a = self._GetNdArray(a)
     b = self._GetNdArray(b)
+    # types with lower tol are put later to overwrite previous ones.
     if (a.dtype == np.float32 or b.dtype == np.float32 or
         a.dtype == np.complex64 or b.dtype == np.complex64):
       rtol = max(rtol, float_rtol)
@@ -1117,6 +1201,10 @@ class TensorFlowTestCase(googletest.TestCase):
     if a.dtype == np.float16 or b.dtype == np.float16:
       rtol = max(rtol, half_rtol)
       atol = max(atol, half_atol)
+    if (a.dtype == dtypes.bfloat16.as_numpy_dtype or
+        b.dtype == dtypes.bfloat16.as_numpy_dtype):
+      rtol = max(rtol, bfloat16_rtol)
+      atol = max(atol, bfloat16_atol)
 
     self.assertAllClose(a, b, rtol=rtol, atol=atol)
 
@@ -1301,3 +1389,21 @@ def create_local_cluster(num_workers, num_ps, protocol="grpc",
   ]
 
   return workers, ps_servers
+
+
+def get_node_def_from_graph(node_name, graph_def):
+  """Returns the `NodeDef` instance for given node name in the graph def.
+
+  This method explores only the NodeDefs in `graph_def.node`.
+
+  Args:
+    node_name: Name of the NodeDef to search for.
+    graph_def: An instance of `GraphDef` proto.
+
+  Returns:
+    the `NodeDef` instance whose name field matches the given node_name or None.
+  """
+  for node_def in graph_def.node:
+    if node_def.name == node_name:
+      return node_def
+  return None
diff --git a/tensorflow/python/framework/test_util_test.py b/tensorflow/python/framework/test_util_test.py
index 9aed3457a60c97867a03bd75aa9357fe93c26cc5..6ddb3533e5bfcaca1dfea95ee35c078427e7529b 100644
--- a/tensorflow/python/framework/test_util_test.py
+++ b/tensorflow/python/framework/test_util_test.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import collections
 import random
 import threading
 
@@ -32,6 +33,7 @@ from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_ops  # pylint: disable=unused-import
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import random_ops
@@ -40,6 +42,7 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import googletest
 
 
+@test_util.with_c_api
 class TestUtilTest(test_util.TensorFlowTestCase):
 
   def test_assert_ops_in_graph(self):
@@ -184,8 +187,8 @@ class TestUtilTest(test_util.TensorFlowTestCase):
   def _WeMustGoDeeper(self, msg):
     with self.assertRaisesOpError(msg):
       with ops.Graph().as_default():
-        node_def = ops._NodeDef("op_type", "name")
-        node_def_orig = ops._NodeDef("op_type_orig", "orig")
+        node_def = ops._NodeDef("IntOutput", "name")
+        node_def_orig = ops._NodeDef("IntOutput", "orig")
         op_orig = ops.Operation(node_def_orig, ops.get_default_graph())
         op = ops.Operation(node_def, ops.get_default_graph(),
                            original_op=op_orig)
@@ -210,6 +213,18 @@ class TestUtilTest(test_util.TensorFlowTestCase):
     with self.assertRaisesRegexp(ValueError, r"Can't compare dict to non-dict"):
       self.assertAllClose({"a": 1}, 1)
 
+  def testAllCloseNamedtuples(self):
+    a = 7
+    b = (2., 3.)
+    c = np.ones((3, 2, 4)) * 7.
+    expected = {"a": a, "b": b, "c": c}
+    my_named_tuple = collections.namedtuple("MyNamedTuple", ["a", "b", "c"])
+
+    # Identity.
+    self.assertAllClose(expected, my_named_tuple(a=a, b=b, c=c))
+    self.assertAllClose(
+        my_named_tuple(a=a, b=b, c=c), my_named_tuple(a=a, b=b, c=c))
+
   def testAllCloseDicts(self):
     a = 7
     b = (2., 3.)
@@ -316,6 +331,10 @@ class TestUtilTest(test_util.TensorFlowTestCase):
       )
 
   def testRandomSeed(self):
+    # Call setUp again for WithCApi case (since it makes a new defeault graph
+    # after setup).
+    # TODO(skyewm): remove this when C API is permanently enabled.
+    self.setUp()
     a = random.randint(1, 1000)
     a_np_rand = np.random.rand(1)
     with self.test_session():
@@ -339,7 +358,25 @@ class TestUtilTest(test_util.TensorFlowTestCase):
     with context.eager_mode():
       self.assertEqual(2, self.evaluate(model))
 
+  @test_util.run_in_graph_and_eager_modes()
+  def test_nested_tensors_evaluate(self):
+    expected = {"a": 1, "b": 2, "nested": {"d": 3, "e": 4}}
+    nested = {"a": constant_op.constant(1),
+              "b": constant_op.constant(2),
+              "nested": {"d": constant_op.constant(3),
+                         "e": constant_op.constant(4)}}
+
+    self.assertEqual(expected, self.evaluate(nested))
+
+  def test_get_node_def_from_graph(self):
+    graph_def = graph_pb2.GraphDef()
+    node_foo = graph_def.node.add()
+    node_foo.name = "foo"
+    self.assertIs(test_util.get_node_def_from_graph("foo", graph_def), node_foo)
+    self.assertIsNone(test_util.get_node_def_from_graph("bar", graph_def))
+
 
+@test_util.with_c_api
 class GarbageCollectionTest(test_util.TensorFlowTestCase):
 
   def test_no_reference_cycle_decorator(self):
@@ -363,6 +400,26 @@ class GarbageCollectionTest(test_util.TensorFlowTestCase):
 
     ReferenceCycleTest().test_has_no_cycle()
 
+  def test_no_leaked_tensor_decorator(self):
+
+    class LeakedTensorTest(object):
+
+      def __init__(inner_self):  # pylint: disable=no-self-argument
+        inner_self.assertEqual = self.assertEqual  # pylint: disable=invalid-name
+
+      @test_util.assert_no_new_tensors
+      def test_has_leak(self):
+        self.a = constant_op.constant([3.])
+
+      @test_util.assert_no_new_tensors
+      def test_has_no_leak(self):
+        constant_op.constant([3.])
+
+    with self.assertRaisesRegexp(AssertionError, "Tensors not deallocated"):
+      LeakedTensorTest().test_has_leak()
+
+    LeakedTensorTest().test_has_no_leak()
+
 
 @test_util.with_c_api
 class IsolationTest(test_util.TensorFlowTestCase):
diff --git a/tensorflow/python/framework/versions.py b/tensorflow/python/framework/versions.py
index 81529e2b1e06e70fb2839c037c555ef41bcdd291..f03b81eb28a7073873579390eae133d3c930c5a0 100644
--- a/tensorflow/python/framework/versions.py
+++ b/tensorflow/python/framework/versions.py
@@ -25,11 +25,13 @@ __version__ = pywrap_tensorflow.__version__
 __git_version__ = pywrap_tensorflow.__git_version__
 __compiler_version__ = pywrap_tensorflow.__compiler_version__
 __cxx11_abi_flag__ = pywrap_tensorflow.__cxx11_abi_flag__
+__monolithic_build__ = pywrap_tensorflow.__monolithic_build__
 
 VERSION = __version__
 GIT_VERSION = __git_version__
 COMPILER_VERSION = __compiler_version__
 CXX11_ABI_FLAG = __cxx11_abi_flag__
+MONOLITHIC_BUILD = __monolithic_build__
 
 GRAPH_DEF_VERSION = pywrap_tensorflow.GRAPH_DEF_VERSION
 GRAPH_DEF_VERSION_MIN_CONSUMER = (
@@ -42,6 +44,7 @@ __all__ = [
     "__git_version__",
     "__compiler_version__",
     "__cxx11_abi_flag__",
+    "__monolithic_build__",
     "COMPILER_VERSION",
     "CXX11_ABI_FLAG",
     "GIT_VERSION",
@@ -49,4 +52,5 @@ __all__ = [
     "GRAPH_DEF_VERSION_MIN_CONSUMER",
     "GRAPH_DEF_VERSION_MIN_PRODUCER",
     "VERSION",
+    "MONOLITHIC_BUILD",
 ]
diff --git a/tensorflow/python/grappler/cluster.i b/tensorflow/python/grappler/cluster.i
index 18fda345e6dc55dd89ea9071f30ab998fda5ee76..0c8d04ff29518d587079a76e3fee3b2e327c6c5c 100644
--- a/tensorflow/python/grappler/cluster.i
+++ b/tensorflow/python/grappler/cluster.i
@@ -54,6 +54,23 @@ bool _PyObjAs(PyObject *input, tensorflow::NamedDevice *out) {
   $1 = &temp;
 }
 
+%typemap(in) const tensorflow::NamedDevice& (tensorflow::NamedDevice temp) {
+  char* c_string;
+  Py_ssize_t py_size;
+  if (PyBytes_AsStringAndSize($input, &c_string, &py_size) == -1) {
+    // Python has raised an error (likely TypeError or UnicodeEncodeError).
+    SWIG_fail;
+  }
+
+  if (!temp.ParseFromString(string(c_string, py_size))) {
+    PyErr_SetString(
+        PyExc_TypeError,
+        "The NamedDevice could not be parsed as a valid protocol buffer");
+    SWIG_fail;
+  }
+  $1 = &temp;
+}
+
 %typemap(in) const tensorflow::RunMetadata& (tensorflow::RunMetadata temp) {
   char* c_string;
   Py_ssize_t py_size;
@@ -83,6 +100,7 @@ bool _PyObjAs(PyObject *input, tensorflow::NamedDevice *out) {
 #include <memory>
 #include <vector>
 #include "tensorflow/core/grappler/devices.h"
+#include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/grappler/clusters/single_machine.h"
 #include "tensorflow/core/grappler/clusters/virtual_cluster.h"
 #include "tensorflow/core/grappler/costs/graph_memory.h"
@@ -90,6 +108,8 @@ bool _PyObjAs(PyObject *input, tensorflow::NamedDevice *out) {
 #include "tensorflow/core/grappler/costs/measuring_cost_estimator.h"
 #include "tensorflow/core/grappler/costs/utils.h"
 #include "tensorflow/core/protobuf/device_properties.pb.h"
+#include "tensorflow/core/framework/kernel_def.pb.h"
+#include "tensorflow/core/framework/memory_types.h"
 
 // Provide the implementation of the GCluster struct here.
 struct GCluster {
@@ -134,13 +154,17 @@ static GCluster TF_NewVirtualCluster(
   }
   tensorflow::grappler::Cluster*cluster_ =
       new tensorflow::grappler::VirtualCluster(devices);
+  PyGILState_STATE gstate = PyGILState_Ensure();
   tensorflow::Status status = cluster_->Provision();
+  PyGILState_Release(gstate);
   tensorflow::Set_TF_Status_from_Status(out_status, status);
   return GCluster(cluster_);
 }
 
 static void TF_ShutdownCluster(GCluster cluster) {
+  PyGILState_STATE gstate = PyGILState_Ensure();
   cluster->Shutdown();
+  PyGILState_Release(gstate);
 }
 
 tensorflow::Status _GetOpPerformanceDataAndRunTime(
@@ -181,6 +205,106 @@ static PyObject* TF_ListDevices(GCluster cluster) {
   return result;
 }
 
+static std::vector<string> TF_ListAvailableOps() {
+  tensorflow::OpRegistry* registry = tensorflow::OpRegistry::Global();
+  std::vector<tensorflow::OpDef> ops;
+  registry->GetRegisteredOps(&ops);
+  std::vector<string> op_names;
+  for (const tensorflow::OpDef& op : ops) {
+    op_names.push_back(op.name());
+  }
+  std::sort(op_names.begin(), op_names.end());
+  return op_names;
+}
+
+static PyObject* TF_GetSupportedDevices(GCluster cluster, GItem item) {
+  if (cluster.is_none() || item.is_none()) {
+    Py_RETURN_NONE;
+  }
+  const std::unordered_map<string, tensorflow::DeviceProperties>& devices = cluster->GetDevices();
+  std::unordered_map<string, std::vector<string>> device_types;
+  for (const auto& dev : devices) {
+    device_types[dev.second.type()].push_back(dev.first);
+  }
+
+  std::unordered_map<string, std::set<string>> supported_device_types;
+  std::unordered_map<string, std::set<string>> device_restrictions;
+
+  for (const auto& node : item->graph.node()) {
+    for (const auto& dev : device_types) {
+      const string& type = dev.first;
+      if (cluster->type() != "single_machine") {
+        // The actual kernel may not be linked in this binary.
+        supported_device_types[node.name()].insert(type);
+      } else {
+        // Check the kernel capabilities
+        const tensorflow::DeviceType dev_type(type);
+        tensorflow::Status s = tensorflow::FindKernelDef(dev_type, node, nullptr, nullptr);
+        if (s.ok()) {
+          supported_device_types[node.name()].insert(type);
+
+          // Check which inputs are restricted to reside on the host.
+          // TODO: extends this to support outputs as well
+          tensorflow::MemoryTypeVector inp_mtypes;
+          tensorflow::MemoryTypeVector out_mtypes;
+          s = tensorflow::MemoryTypesForNode(tensorflow::OpRegistry::Global(), dev_type, node,
+                                             &inp_mtypes, &out_mtypes);
+          if (s.ok()) {
+            for (int i = 0; i < inp_mtypes.size(); ++i) {
+              if (inp_mtypes[i] == tensorflow::HOST_MEMORY) {
+                device_restrictions[tensorflow::grappler::NodeName(node.input(i))].insert("CPU");
+                break;
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  PyGILState_STATE gstate = PyGILState_Ensure();
+  PyObject* result = PyDict_New();
+
+  for (const auto& supported_dev : supported_device_types) {
+    const string& node = supported_dev.first;
+    std::set<string> feasible;
+    const auto it = device_restrictions.find(node);
+    if (it != device_restrictions.end()) {
+      const std::set<string>& candidates = supported_dev.second;
+      const std::set<string>& valid = it->second;
+      std::set_intersection(candidates.begin(), candidates.end(), valid.begin(), valid.end(),
+                            std::inserter(feasible, feasible.begin()));
+    } else {
+      feasible = supported_dev.second;
+    }
+
+    std::vector<string> device_names;
+    for (const string& type : feasible) {
+      auto it = device_types.find(type);
+      CHECK(it != device_types.end());
+      for (const string& name : it->second) {
+        device_names.push_back(name);
+      }
+    }
+
+    PyObject* dev = PyList_New(device_names.size());
+    for (int i = 0; i < device_names.size(); ++i) {
+      PyList_SetItem(dev, i, PyString_FromString(device_names[i].c_str()));
+    }
+    CHECK_EQ(0, PyDict_SetItem(result, PyString_FromString(node.c_str()), dev));
+  }
+  PyGILState_Release(gstate);
+  return result;
+}
+
+
+static double TF_EstimatePerformance(const tensorflow::NamedDevice& device) {
+  tensorflow::grappler::OpLevelCostEstimator estimator;
+  tensorflow::grappler::OpLevelCostEstimator::DeviceInfo info =
+      estimator.GetDeviceInfo(device.properties());
+  return info.gigaops;
+}
+
 static PyObject* TF_MeasureCosts(
     GItem item,
     GCluster cluster,
@@ -307,10 +431,12 @@ static GCluster TF_NewVirtualCluster(
     TF_Status* out_status);
 static void TF_ShutdownCluster(GCluster cluster);
 static PyObject* TF_ListDevices(GCluster cluster);
+static std::vector<string> TF_ListAvailableOps();
+static PyObject* TF_GetSupportedDevices(GCluster cluster, GItem item);
+static float TF_EstimatePerformance(const tensorflow::NamedDevice& device);
 static PyObject* TF_MeasureCosts(
     GItem item, GCluster cluster,
     bool generate_timeline, TF_Status* out_status);
 static PyObject* TF_DeterminePeakMemoryUsage(
     GItem item, GCluster cluster,
     TF_Status* out_status);
-
diff --git a/tensorflow/python/grappler/cluster.py b/tensorflow/python/grappler/cluster.py
index cf795fddb71cd9f6119f8b27e17464cb44e77d1e..079d07115b31da86600821a098aec08ec60bf436 100644
--- a/tensorflow/python/grappler/cluster.py
+++ b/tensorflow/python/grappler/cluster.py
@@ -18,6 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import contextlib
+
 from tensorflow.core.framework import step_stats_pb2
 from tensorflow.core.grappler.costs import op_performance_data_pb2
 from tensorflow.core.protobuf import device_properties_pb2
@@ -56,9 +58,13 @@ class Cluster(object):
         self._tf_cluster = tf_cluster.TF_NewVirtualCluster(
             devices_serialized, status)
 
-  def __del__(self):
+  def Shutdown(self):
     if self._tf_cluster is not None:
       tf_cluster.TF_ShutdownCluster(self._tf_cluster)
+      self._tf_cluster = None
+
+  def __del__(self):
+    self.Shutdown()
 
   @property
   def tf_cluster(self):
@@ -74,6 +80,18 @@ class Cluster(object):
         devices.append(device_properties_pb2.NamedDevice.FromString(raw_dev))
     return devices
 
+  def ListAvailableOps(self):
+    """Returns a list of all the available operations (sorted alphatically)."""
+    return tf_cluster.TF_ListAvailableOps()
+
+  def GetSupportedDevices(self, item):
+    return tf_cluster.TF_GetSupportedDevices(self._tf_cluster, item.tf_item)
+
+  def EstimatePerformance(self, device):
+    """Estimate the performance of the specified device."""
+    serialized = device.SerializeToString()
+    return tf_cluster.TF_EstimatePerformance(serialized)
+
   def MeasureCosts(self, item):
     """Returns the cost of running the specified item.
 
@@ -108,3 +126,14 @@ class Cluster(object):
           item.tf_item, self._tf_cluster, status)
 
     return ret_from_swig
+
+
+@contextlib.contextmanager
+def Provision(allow_soft_placement=True,
+              disable_detailed_stats=True,
+              disable_timeline=True,
+              devices=None):
+  cluster = Cluster(allow_soft_placement, disable_detailed_stats,
+                    disable_timeline, devices)
+  yield cluster
+  cluster.Shutdown()
diff --git a/tensorflow/python/grappler/cluster_test.py b/tensorflow/python/grappler/cluster_test.py
index f1f02963de0fac8f2a04075e9efd97aec429da9d..2292b2c732b2d5d0d40b44d8ca831f4e72b057c6 100644
--- a/tensorflow/python/grappler/cluster_test.py
+++ b/tensorflow/python/grappler/cluster_test.py
@@ -23,6 +23,8 @@ from tensorflow.python.framework import meta_graph
 from tensorflow.python.framework import ops
 from tensorflow.python.grappler import cluster
 from tensorflow.python.grappler import item
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.platform import test
 
@@ -43,7 +45,7 @@ class ClusterTest(test.TestCase):
       op_perfs, run_time, step_stats = grappler_cluster.MeasureCosts(
           grappler_item)
       self.assertTrue(run_time > 0)
-      self.assertEqual(len(op_perfs), 10)
+      self.assertEqual(len(op_perfs), 9)
       self.assertTrue(step_stats.dev_stats)
 
   def testNoDetailedStats(self):
@@ -65,7 +67,7 @@ class ClusterTest(test.TestCase):
 
   def testMemoryEstimates(self):
     with ops.Graph().as_default() as g:
-      with ops.device('/job:localhost/replica:0/task:0/cpu:0'):
+      with ops.device('/job:localhost/replica:0/task:0/device:CPU:0'):
         a = random_ops.random_uniform(shape=())
         b = random_ops.random_uniform(shape=())
         c = a + b
@@ -77,7 +79,7 @@ class ClusterTest(test.TestCase):
             disable_detailed_stats=True, disable_timeline=True)
         peak_mem = grappler_cluster.DeterminePeakMemoryUsage(grappler_item)
         self.assertLessEqual(1, len(peak_mem))
-        snapshot = peak_mem['/job:localhost/replica:0/task:0/cpu:0']
+        snapshot = peak_mem['/job:localhost/replica:0/task:0/device:CPU:0']
         peak_usage = snapshot[0]
         self.assertEqual(52, peak_usage)
         live_tensors = snapshot[1]
@@ -106,6 +108,77 @@ class ClusterTest(test.TestCase):
       self.assertGreater(run_time, 0)
       self.assertEqual(len(op_perfs), 15)
 
+      estimated_perf = grappler_cluster.EstimatePerformance(named_device)
+      self.assertEqual(7680.0, estimated_perf)
+
+  def testContext(self):
+    with ops.Graph().as_default() as g:
+      a = random_ops.random_uniform(shape=())
+      b = random_ops.random_uniform(shape=())
+      c = a + b
+      train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+      train_op.append(c)
+      mg = meta_graph.create_meta_graph_def(graph=g)
+      grappler_item = item.Item(mg)
+
+    with cluster.Provision(
+        disable_detailed_stats=False, disable_timeline=False) as gcluster:
+      op_perfs, run_time, step_stats = gcluster.MeasureCosts(grappler_item)
+      self.assertTrue(run_time > 0)
+      self.assertEqual(len(op_perfs), 9)
+      self.assertTrue(step_stats.dev_stats)
+
+  def testAvailableOps(self):
+    with cluster.Provision() as gcluster:
+      op_names = gcluster.ListAvailableOps()
+      self.assertTrue(b'Add' in op_names)
+      self.assertTrue(b'MatMul' in op_names)
+      self.assertEqual(op_names, sorted(op_names))
+
+  def testSupportDevices(self):
+    with ops.Graph().as_default() as g:
+      a = random_ops.random_uniform(shape=(2, 3))
+      b = random_ops.random_uniform(shape=(2, 3))
+      c = a + b
+      dims = math_ops.range(0, array_ops.rank(c), 1)
+      d = math_ops.reduce_sum(a, axis=dims)
+      train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+      train_op.append(d)
+      mg = meta_graph.create_meta_graph_def(graph=g)
+      grappler_item = item.Item(mg)
+
+      device_properties = device_properties_pb2.DeviceProperties(
+          type='GPU', frequency=1000, num_cores=60)
+      named_gpu = device_properties_pb2.NamedDevice(
+          properties=device_properties, name='/GPU:0')
+      device_properties = device_properties_pb2.DeviceProperties(
+          type='CPU', frequency=3000, num_cores=6)
+      named_cpu = device_properties_pb2.NamedDevice(
+          properties=device_properties, name='/CPU:0')
+      virtual_cluster = cluster.Cluster(devices=[named_cpu, named_gpu])
+      supported_dev = virtual_cluster.GetSupportedDevices(grappler_item)
+      self.assertEqual(supported_dev['add'], ['/CPU:0', '/GPU:0'])
+      self.assertEqual(supported_dev['Sum'], ['/CPU:0', '/GPU:0'])
+      self.assertEqual(supported_dev['range'], ['/CPU:0', '/GPU:0'])
+
+      real_cluster = cluster.Cluster()
+      supported_dev = real_cluster.GetSupportedDevices(grappler_item)
+      if test.is_gpu_available():
+        self.assertEqual(supported_dev['add'], [
+            '/job:localhost/replica:0/task:0/device:CPU:0',
+            '/job:localhost/replica:0/task:0/device:GPU:0'
+        ])
+        self.assertEqual(supported_dev['Sum'], [
+            '/job:localhost/replica:0/task:0/device:CPU:0',
+            '/job:localhost/replica:0/task:0/device:GPU:0'
+        ])
+        # The axis tensor must reside on the host
+        self.assertEqual(supported_dev['range'],
+                         ['/job:localhost/replica:0/task:0/device:CPU:0'])
+      else:
+        self.assertEqual(supported_dev['add'],
+                         ['/job:localhost/replica:0/task:0/device:CPU:0'])
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/grappler/cost_analyzer_test.py b/tensorflow/python/grappler/cost_analyzer_test.py
index f4933a45149ac1d003e24e1ebc6b20d0ad708b6c..511908c79ce47d6849bf97d11bc42f2f1bb13f18 100644
--- a/tensorflow/python/grappler/cost_analyzer_test.py
+++ b/tensorflow/python/grappler/cost_analyzer_test.py
@@ -89,13 +89,10 @@ class CostAnalysisTest(test.TestCase):
     self.assertTrue(b"MatMul" in report)
     self.assertTrue(b"ApplyAdam" in report)
     self.assertTrue(b"Conv2D" in report)
-    self.assertTrue(b"Conv2DBackpropInput" in report)
     self.assertTrue(b"Conv2DBackpropFilter" in report)
     self.assertTrue(b"Softmax" in report)
 
-    for op_type in [
-        b"MatMul", b"Conv2D", b"Conv2DBackpropInput", b"Conv2DBackpropFilter"
-    ]:
+    for op_type in [b"MatMul", b"Conv2D", b"Conv2DBackpropFilter"]:
       matcher = re.compile(
           br"\s+" + op_type + br",\s*(\d+),\s*(\d+),\s*([\d\.eE+-]+)%,\s*" +
           br"([\d\.eE+-]+)%,\s*(-?\d+),\s*(\d+),", re.MULTILINE)
@@ -130,7 +127,8 @@ class CostAnalysisTest(test.TestCase):
 
     # Check the report
     self.assertTrue(
-        "Peak usage for device /job:localhost/replica:0/task:0/cpu:0: 16 bytes"
+        "Peak usage for device /job:localhost/replica:0/task:0/device:CPU:0: "
+        "16 bytes"
         in report)
     self.assertTrue("  a:0 uses 4 bytes" in report)
     self.assertTrue("  b:0 uses 4 bytes" in report)
diff --git a/tensorflow/python/grappler/datasets_test.py b/tensorflow/python/grappler/datasets_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..2d942af597c180576ebe65e26ad39923754092f3
--- /dev/null
+++ b/tensorflow/python/grappler/datasets_test.py
@@ -0,0 +1,447 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the datasets shape inference."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.ops import iterator_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import meta_graph
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.grappler import item
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test
+
+
+class GrapplerTest(test.TestCase):
+
+  def testFromTensors(self):
+    test_cases = [{
+        'tensor': 0,
+        'shape': tensor_shape.TensorShape([])
+    }, {
+        'tensor': np.array([1, 2, 3]),
+        'shape': tensor_shape.TensorShape([3])
+    }, {
+        'tensor': np.array([[1, 2, 3]]),
+        'shape': tensor_shape.TensorShape([1, 3])
+    }]
+
+    for test_case in test_cases:
+      with ops.Graph().as_default() as g:
+        dataset = dataset_ops.Dataset.from_tensors(test_case['tensor'])
+        iterator = dataset.make_one_shot_iterator()
+        get_next = iterator.get_next()
+        train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+        train_op.append(get_next)
+        mg = meta_graph.create_meta_graph_def(graph=g)
+        grappler_item = item.Item(mg)
+        op_properties = grappler_item.GetOpProperties()
+        self.assertEqual(test_case['shape'],
+                         op_properties['IteratorGetNext'][0].shape)
+
+  def testFromTensorSlices(self):
+    test_cases = [{
+        'tensor': np.array([1, 2, 3]),
+        'shape': tensor_shape.TensorShape([])
+    }, {
+        'tensor': np.array([[1, 2, 3]]),
+        'shape': tensor_shape.TensorShape([3])
+    }, {
+        'tensor': np.array([[[1, 2, 3]]]),
+        'shape': tensor_shape.TensorShape([1, 3])
+    }]
+
+    for test_case in test_cases:
+      with ops.Graph().as_default() as g:
+        dataset = dataset_ops.Dataset.from_tensor_slices(test_case['tensor'])
+        iterator = dataset.make_one_shot_iterator()
+        get_next = iterator.get_next()
+        train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+        train_op.append(get_next)
+        mg = meta_graph.create_meta_graph_def(graph=g)
+        grappler_item = item.Item(mg)
+        op_properties = grappler_item.GetOpProperties()
+        self.assertEqual(test_case['shape'],
+                         op_properties['IteratorGetNext'][0].shape)
+
+  def testFromGenerator(self):
+    test_cases = [{
+        'tensor': 0,
+        'shape': tensor_shape.TensorShape([])
+    }, {
+        'tensor': np.array([1, 2, 3]),
+        'shape': tensor_shape.TensorShape([3])
+    }, {
+        'tensor': np.array([[1, 2, 3]]),
+        'shape': tensor_shape.TensorShape([1, 3])
+    }]
+
+    for test_case in test_cases:
+
+      def make_generator(tensor):
+
+        def generator():
+          yield tensor
+
+        return generator
+
+      with ops.Graph().as_default() as g:
+        dataset = dataset_ops.Dataset.from_generator(
+            make_generator(test_case['tensor']),
+            dtypes.int64,
+            output_shapes=test_case['shape'])
+        iterator = dataset.make_one_shot_iterator()
+        get_next = iterator.get_next()
+        train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+        train_op.append(get_next)
+        mg = meta_graph.create_meta_graph_def(graph=g)
+        grappler_item = item.Item(mg)
+        op_properties = grappler_item.GetOpProperties()
+        self.assertEqual(test_case['shape'],
+                         op_properties['IteratorGetNext'][0].shape)
+
+  def testRange(self):
+    with ops.Graph().as_default() as g:
+      dataset = dataset_ops.Dataset.range(42)
+      iterator = dataset.make_one_shot_iterator()
+      get_next = iterator.get_next()
+      train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+      train_op.append(get_next)
+      mg = meta_graph.create_meta_graph_def(graph=g)
+      grappler_item = item.Item(mg)
+      op_properties = grappler_item.GetOpProperties()
+      self.assertEqual(tensor_shape.scalar(),
+                       op_properties['IteratorGetNext'][0].shape)
+
+  def _testTransformation(self, fn):
+    test_cases = [{
+        'tensor': 0,
+        'shape': tensor_shape.TensorShape({})
+    }, {
+        'tensor': np.array([1, 2, 3]),
+        'shape': tensor_shape.TensorShape([3])
+    }, {
+        'tensor': np.array([[1, 2, 3]]),
+        'shape': tensor_shape.TensorShape([1, 3])
+    }]
+
+    for test_case in test_cases:
+      with ops.Graph().as_default() as g:
+        dataset = dataset_ops.Dataset.from_tensors(test_case['tensor'])
+        dataset = fn(dataset, test_case['tensor'], test_case['shape'])
+        iterator = dataset.make_one_shot_iterator()
+        get_next = iterator.get_next()
+        train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+        train_op.append(get_next)
+        mg = meta_graph.create_meta_graph_def(graph=g)
+        grappler_item = item.Item(mg)
+        op_properties = grappler_item.GetOpProperties()
+        self.assertEqual(test_case['shape'],
+                         op_properties['IteratorGetNext'][0].shape)
+
+  def testConcatenate(self):
+
+    def fn(dataset, tensor, shape):
+      del shape
+      return dataset.concatenate(dataset_ops.Dataset.from_tensors(tensor))
+
+    self._testTransformation(fn)
+
+  def testPrefetch(self):
+
+    def fn(dataset, tensor, shape):
+      del tensor, shape
+      return dataset.prefetch(42)
+
+    self._testTransformation(fn)
+
+  def testRepeat(self):
+
+    def fn(dataset, tensor, shape):
+      del tensor, shape
+      return dataset.repeat(42)
+
+    self._testTransformation(fn)
+
+  def testShuffle(self):
+
+    def fn(dataset, tensor, shape):
+      del tensor, shape
+      return dataset.shuffle(42)
+
+    self._testTransformation(fn)
+
+  def testCache(self):
+
+    def fn(dataset, tensor, shape):
+      del tensor, shape
+      return dataset.cache()
+
+    self._testTransformation(fn)
+
+  def testTake(self):
+
+    def fn(dataset, tensor, shape):
+      del tensor, shape
+      return dataset.take(42)
+
+    self._testTransformation(fn)
+
+  def testSkip(self):
+
+    def fn(dataset, tensor, shape):
+      del tensor, shape
+      return dataset.skip(42)
+
+    self._testTransformation(fn)
+
+  def testShard(self):
+
+    def fn(dataset, tensor, shape):
+      del tensor, shape
+      return dataset.shard(42, 0)
+
+    self._testTransformation(fn)
+
+  def testFilter(self):
+
+    def fn(dataset, tensor, shape):
+      del tensor, shape
+      return dataset.filter(lambda x: True)
+
+    self._testTransformation(fn)
+
+  def as_tensor_shape(self, proto_with_symbolic_values):
+    for i in range(len(proto_with_symbolic_values.dim)):
+      if proto_with_symbolic_values.dim[i].size < -1:
+        proto_with_symbolic_values.dim[i].size = -1
+    return tensor_shape.TensorShape(proto_with_symbolic_values)
+
+  def testBatch(self):
+    test_cases = [{
+        'tensor': 0,
+        'shape': tensor_shape.TensorShape([None])
+    }, {
+        'tensor': np.array([1, 2, 3]),
+        'shape': tensor_shape.TensorShape([None, 3])
+    }, {
+        'tensor': np.array([[1, 2, 3]]),
+        'shape': tensor_shape.TensorShape([None, 1, 3])
+    }]
+
+    for test_case in test_cases:
+      with ops.Graph().as_default() as g:
+        dataset = dataset_ops.Dataset.from_tensors(test_case['tensor'])
+        dataset = dataset.batch(42)
+        iterator = dataset.make_one_shot_iterator()
+        get_next = iterator.get_next()
+        train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+        train_op.append(get_next)
+        mg = meta_graph.create_meta_graph_def(graph=g)
+        grappler_item = item.Item(mg)
+        op_properties = grappler_item.GetOpProperties()
+        inferred_shape = self.as_tensor_shape(
+            op_properties['IteratorGetNext'][0].shape)
+        self.assertTrue(test_case['shape'][0].is_compatible_with(
+            inferred_shape[0]))
+        self.assertEqual(test_case['shape'][1:], inferred_shape[1:])
+
+  def testPaddedBatch(self):
+    test_cases = [{
+        'tensor': 0,
+        'shape': tensor_shape.TensorShape([None])
+    }, {
+        'tensor': np.array([1, 2, 3]),
+        'shape': tensor_shape.TensorShape([None, 4])
+    }, {
+        'tensor': np.array([[1, 2, 3]]),
+        'shape': tensor_shape.TensorShape([None, 2, 4])
+    }]
+
+    for test_case in test_cases:
+      with ops.Graph().as_default() as g:
+        dataset = dataset_ops.Dataset.from_tensors(test_case['tensor'])
+        dataset = dataset.padded_batch(42, padded_shapes=test_case['shape'][1:])
+        iterator = dataset.make_one_shot_iterator()
+        get_next = iterator.get_next()
+        train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+        train_op.append(get_next)
+        mg = meta_graph.create_meta_graph_def(graph=g)
+        grappler_item = item.Item(mg)
+        op_properties = grappler_item.GetOpProperties()
+        inferred_shape = self.as_tensor_shape(
+            op_properties['IteratorGetNext'][0].shape)
+        self.assertTrue(test_case['shape'][0].is_compatible_with(
+            inferred_shape[0]))
+        self.assertEqual(test_case['shape'][1:], inferred_shape[1:])
+
+  def testFlatMap(self):
+    test_cases = [{
+        'tensor': 0,
+        'shape': tensor_shape.TensorShape([])
+    }, {
+        'tensor': np.array([1, 2, 3]),
+        'shape': tensor_shape.TensorShape([3])
+    }, {
+        'tensor': np.array([[1, 2, 3]]),
+        'shape': tensor_shape.TensorShape([1, 3])
+    }]
+
+    for test_case in test_cases:
+      with ops.Graph().as_default() as g:
+        dataset = dataset_ops.Dataset.range(42)
+
+        def make_dataset(tensor):
+
+          def dataset_fn(n):
+            return dataset_ops.Dataset.from_tensors(tensor).repeat(n)
+
+          return dataset_fn
+
+        dataset = dataset.flat_map(make_dataset(test_case['tensor']))
+        iterator = dataset.make_one_shot_iterator()
+        get_next = iterator.get_next()
+        train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+        train_op.append(get_next)
+        mg = meta_graph.create_meta_graph_def(graph=g)
+        grappler_item = item.Item(mg)
+        op_properties = grappler_item.GetOpProperties()
+        self.assertEqual(test_case['shape'],
+                         op_properties['IteratorGetNext'][0].shape)
+
+  def testInterleave(self):
+    test_cases = [{
+        'tensor': 0,
+        'shape': tensor_shape.TensorShape([])
+    }, {
+        'tensor': np.array([1, 2, 3]),
+        'shape': tensor_shape.TensorShape([3])
+    }, {
+        'tensor': np.array([[1, 2, 3]]),
+        'shape': tensor_shape.TensorShape([1, 3])
+    }]
+
+    for test_case in test_cases:
+      with ops.Graph().as_default() as g:
+        dataset = dataset_ops.Dataset.range(42)
+
+        def make_dataset(tensor):
+
+          def dataset_fn(n):
+            return dataset_ops.Dataset.from_tensors(tensor).repeat(n)
+
+          return dataset_fn
+
+        dataset = dataset.interleave(
+            make_dataset(test_case['tensor']), cycle_length=42)
+        iterator = dataset.make_one_shot_iterator()
+        get_next = iterator.get_next()
+        train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+        train_op.append(get_next)
+        mg = meta_graph.create_meta_graph_def(graph=g)
+        grappler_item = item.Item(mg)
+        op_properties = grappler_item.GetOpProperties()
+        self.assertEqual(test_case['shape'],
+                         op_properties['IteratorGetNext'][0].shape)
+
+  def testMap(self):
+    test_cases = [{
+        'tensor': 0,
+        'shape': tensor_shape.TensorShape([])
+    }, {
+        'tensor': np.array([1, 2, 3]),
+        'shape': tensor_shape.TensorShape([3])
+    }, {
+        'tensor': np.array([[1, 2, 3]]),
+        'shape': tensor_shape.TensorShape([3, 1])
+    }, {
+        'tensor': np.array([[[1, 2, 3], [4, 5, 6]]]),
+        'shape': tensor_shape.TensorShape([3, 2, 1])
+    }]
+
+    for test_case in test_cases:
+      with ops.Graph().as_default() as g:
+        dataset = dataset_ops.Dataset.from_tensors(test_case['tensor'])
+        dataset = dataset.map(array_ops.transpose)
+        iterator = dataset.make_one_shot_iterator()
+        get_next = iterator.get_next()
+        train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+        train_op.append(get_next)
+        mg = meta_graph.create_meta_graph_def(graph=g)
+        grappler_item = item.Item(mg)
+        op_properties = grappler_item.GetOpProperties()
+        self.assertEqual(test_case['shape'],
+                         op_properties['IteratorGetNext'][0].shape)
+
+  def testFromStructure(self):
+    test_cases = [{
+        'shape': tensor_shape.TensorShape([])
+    }, {
+        'shape': tensor_shape.TensorShape([3])
+    }, {
+        'shape': tensor_shape.TensorShape([1, 2])
+    }, {
+        'shape': tensor_shape.TensorShape([1, 2, 3])
+    }]
+
+    for test_case in test_cases:
+      with ops.Graph().as_default() as g:
+        iterator = iterator_ops.Iterator.from_structure(
+            dtypes.int64, output_shapes=test_case['shape'])
+        get_next = iterator.get_next()
+        train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+        train_op.append(get_next)
+        mg = meta_graph.create_meta_graph_def(graph=g)
+        grappler_item = item.Item(mg)
+        op_properties = grappler_item.GetOpProperties()
+        self.assertEqual(test_case['shape'],
+                         op_properties['IteratorGetNext'][0].shape)
+
+  def testFromStringHandle(self):
+    test_cases = [{
+        'shape': tensor_shape.TensorShape([])
+    }, {
+        'shape': tensor_shape.TensorShape([3])
+    }, {
+        'shape': tensor_shape.TensorShape([1, 2])
+    }, {
+        'shape': tensor_shape.TensorShape([1, 2, 3])
+    }]
+
+    for test_case in test_cases:
+      with ops.Graph().as_default() as g:
+        iterator = iterator_ops.Iterator.from_structure(dtypes.int64)
+        handle = iterator.string_handle()
+        iterator = iterator_ops.Iterator.from_string_handle(
+            handle, dtypes.int64, output_shapes=test_case['shape'])
+        get_next = iterator.get_next()
+        train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+        train_op.append(get_next)
+        mg = meta_graph.create_meta_graph_def(graph=g)
+        grappler_item = item.Item(mg)
+        op_properties = grappler_item.GetOpProperties()
+        self.assertEqual(test_case['shape'],
+                         op_properties['IteratorGetNext'][0].shape)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/grappler/item.i b/tensorflow/python/grappler/item.i
index 7dd79f7c82c35a1bb84881a350d31eb00e9c40f7..d0fc1a04f220e0a053257e0206bb07b25f3767c6 100644
--- a/tensorflow/python/grappler/item.i
+++ b/tensorflow/python/grappler/item.i
@@ -42,9 +42,12 @@ struct GItem {
 #include <unordered_set>
 #include <map>
 #include "tensorflow/c/tf_status_helper.h"
+#include "tensorflow/core/framework/node_def_util.h"
+#include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/grappler/costs/op_performance_data.pb.h"
 #include "tensorflow/core/grappler/grappler_item_builder.h"
 #include "tensorflow/core/grappler/costs/graph_properties.h"
+#include "tensorflow/core/grappler/utils/topological_sort.h"
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/protobuf/meta_graph.pb.h"
@@ -80,6 +83,7 @@ static GItem TF_NewItem(
   tensorflow::grappler::ItemConfig cfg;
   cfg.ignore_user_placement = ignore_user_placement;
   cfg.ignore_colocation = ignore_colocation;
+  cfg.inline_functions = true;
   std::unique_ptr<tensorflow::grappler::GrapplerItem> item =
       tensorflow::grappler::GrapplerItemFromMetaGraphDef("item", meta_graph, cfg);
   if (!item) {
@@ -92,7 +96,8 @@ static GItem TF_NewItem(
   return GItem(item.release());
 }
 
-static std::vector<string> TF_IdentifyImportantOps(GItem item) {
+static std::vector<string> TF_IdentifyImportantOps(GItem item, bool sort_topologically,
+                                                   TF_Status* status) {
   if (item.is_none()) {
     return {};
   }
@@ -108,8 +113,23 @@ static std::vector<string> TF_IdentifyImportantOps(GItem item) {
   }
 
   std::vector<string> ops;
-  for (const auto& op_name : op_names) {
-    ops.push_back(op_name);
+  if (sort_topologically) {
+    tensorflow::GraphDef subgraph;
+    for (const tensorflow::NodeDef& node : item->graph.node()) {
+      if (op_names.find(node.name()) != op_names.end()) {
+        *subgraph.add_node() = node;
+      }
+    }
+    tensorflow::Status s = tensorflow::grappler::TopologicalSort(&subgraph);
+    tensorflow::Set_TF_Status_from_Status(status, s);
+    for (const tensorflow::NodeDef& node : subgraph.node()) {
+      ops.push_back(node.name());
+    }
+  }
+  else {
+    for (const auto& op_name : op_names) {
+      ops.push_back(op_name);
+    }
   }
 
   return ops;
@@ -120,7 +140,7 @@ static PyObject* TF_GetOpProperties(GItem item) {
     Py_RETURN_NONE;
   }
   tensorflow::grappler::GraphProperties properties(*item);
-  tensorflow::Status status = properties.InferStatically();
+  tensorflow::Status status = properties.InferStatically(false);
   if (!status.ok()) {
     Py_RETURN_NONE;
   }
@@ -145,6 +165,139 @@ static PyObject* TF_GetOpProperties(GItem item) {
   return props;
 }
 
+class ColocationGroups {
+public:
+  void Group(const string& x, const string& y) {
+    Rep* x_root = Find(x);
+    Rep* y_root = Find(y);
+
+    // x and y are already in the same set
+    if (x_root == y_root) {
+      return;
+    }
+    // x and y are not in same set, so we merge them
+    // Use the occasion to strengthen what we know about the handle by merging the
+    // information about the 2 subsets.
+    if (x_root->rank < y_root->rank) {
+      x_root->parent = y_root;
+    } else if (x_root->rank > y_root->rank) {
+      y_root->parent = x_root;
+    } else {
+      // Arbitrarily make one root the new parent
+      y_root->parent = x_root;
+      x_root->rank = x_root->rank + 1;
+    }
+  }
+
+  void ExtractGroups(std::vector<std::vector<string>>* groups) {
+    groups->reserve(nodes_.size());
+    std::unordered_map<const Rep*, int> group_ids;
+    for (const auto& rep : nodes_) {
+      Rep* r = Find(rep.first);
+      auto it = group_ids.find(r);
+      std::vector<string>* g;
+      if (it == group_ids.end()) {
+        int id = group_ids.size();
+        group_ids[r] = id;
+        groups->resize(id+1);
+        g = &groups->back();
+      } else {
+        int id = it->second;
+        g = &((*groups)[id]);
+      }
+      g->push_back(rep.first);
+    }
+  }
+
+private:
+  struct Rep {
+    // Parent in the tree used to encode the set.
+    Rep* parent;
+    // Rank in the tree, used to figure out how to compress the path to the root
+    // of the tree.
+    int rank;
+    // The node.
+    string value;
+  };
+
+  Rep* Find(const string& n) {
+    auto it = nodes_.find(n);
+    if (it == nodes_.end()) {
+      // This is the first time we process this handle, create an entry for it.
+      Rep* node = new Rep;
+      node->parent = node;
+      node->rank = 0;
+      node->value = n;
+      nodes_[n] = node;
+      return node;
+    }
+    // Return the representative for the set, which is the root of the tree. Apply
+    // path compression to speedup future queries.
+    Rep* node = it->second;
+    Rep* root = node->parent;
+    while (root != root->parent) {
+      root = root->parent;
+    }
+    while (node->parent != root) {
+      Rep* next = node->parent;
+      node->parent = root;
+      node = next;
+    }
+    return root;
+  }
+
+  std::unordered_map<string, Rep*> nodes_;
+};
+
+static PyObject* TF_GetColocationGroups(GItem item) {
+  if (item.is_none()) {
+    Py_RETURN_NONE;
+  }
+  ColocationGroups groupings;
+  tensorflow::OpRegistry* registry = tensorflow::OpRegistry::Global();
+  for (const auto& node : item->graph.node()) {
+    const tensorflow::OpDef* op_def;
+    tensorflow::Status s = registry->LookUpOpDef(node.op(), &op_def);
+    if (!s.ok()) {
+      continue;
+    }
+    tensorflow::NameRangeMap inputs;
+    tensorflow::NameRangeMap outputs;
+    s = tensorflow::NameRangesForNode(node, *op_def, &inputs, &outputs);
+    if (!s.ok()) {
+      continue;
+    }
+    int i = 0;
+    for (const auto& arg : op_def->input_arg()) {
+      if (!arg.is_ref()) {
+        continue;
+      }
+      const auto& range = inputs[arg.name()];
+      for (int i = range.first; i < range.second; ++i) {
+        string input = tensorflow::grappler::NodeName(node.input(i));
+        groupings.Group(node.name(), input);
+      }
+    }
+  }
+
+  std::vector<std::vector<string>> groups;
+  groupings.ExtractGroups(&groups);
+
+  PyGILState_STATE gstate = PyGILState_Ensure();
+  PyObject* result = PyList_New(groups.size());
+  for (int i = 0; i < groups.size(); ++i) {
+    const std::vector<string>& group = groups[i];
+    PyObject* g = PyTuple_New(group.size());
+    for (int j = 0; j < group.size(); ++j) {
+      const string& node_name = group[j];
+      PyTuple_SetItem(g, j, PyString_FromString(node_name.c_str()));
+    }
+    PyList_SetItem(result, i, g);
+  }
+  PyGILState_Release(gstate);
+  return result;
+}
+
 %}
 
 
@@ -152,5 +305,7 @@ static PyObject* TF_GetOpProperties(GItem item) {
 static GItem TF_NewItem(
     const tensorflow::MetaGraphDef& meta_graph, bool ignore_colocation,
     bool ignore_user_placement, TF_Status* out_status);
-static std::vector<string> TF_IdentifyImportantOps(GItem item);
+static std::vector<string> TF_IdentifyImportantOps(GItem item, bool sort_topologically,
+                                                   TF_Status* status);
 static PyObject* TF_GetOpProperties(GItem item);
+static PyObject* TF_GetColocationGroups(GItem item);
diff --git a/tensorflow/python/grappler/item.py b/tensorflow/python/grappler/item.py
index 4fc94ec9680464aea17875189ac4a749f3fa11dc..4a083849bd39f606877069419396d8c42ef077eb 100644
--- a/tensorflow/python/grappler/item.py
+++ b/tensorflow/python/grappler/item.py
@@ -50,8 +50,10 @@ class Item(object):
     self._tf_item = None
     self._BuildTFItem()
 
-  def IdentifyImportantOps(self):
-    return tf_item.TF_IdentifyImportantOps(self.tf_item)
+  def IdentifyImportantOps(self, sort_topologically=False):
+    with errors.raise_exception_on_not_ok_status() as status:
+      return tf_item.TF_IdentifyImportantOps(self.tf_item, sort_topologically,
+                                             status)
 
   def GetOpProperties(self):
     ret_from_swig = tf_item.TF_GetOpProperties(self.tf_item)
@@ -64,6 +66,17 @@ class Item(object):
       properties[key] = prop
     return properties
 
+  def GetColocationGroups(self):
+    """Return a list of hard colocation constraints.
+
+    All the nodes in a colocation tuple must be placed on the same device for
+    the model to work.
+
+    Returns:
+      A list of colocation tuples.
+    """
+    return tf_item.TF_GetColocationGroups(self.tf_item)
+
   @property
   def metagraph(self):
     return self._metagraph
diff --git a/tensorflow/python/grappler/item_test.py b/tensorflow/python/grappler/item_test.py
index 71c68d25cd928d5cb2dc5028ed331d468c5b9826..cd70e2fdecc74f9d99240ac566f3c28e900a06c2 100644
--- a/tensorflow/python/grappler/item_test.py
+++ b/tensorflow/python/grappler/item_test.py
@@ -26,6 +26,9 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.grappler import item
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gen_array_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
 
@@ -53,7 +56,7 @@ class ItemTest(test.TestCase):
       mg = meta_graph.create_meta_graph_def(graph=g)
       grappler_item = item.Item(mg)
       op_list = grappler_item.IdentifyImportantOps()
-      self.assertEqual([b'Const', b'Const_1', b'add'], op_list)
+      self.assertItemsEqual([b'Const', b'Const_1', b'add'], op_list)
 
   def testOpProperties(self):
     with ops.Graph().as_default() as g:
@@ -104,6 +107,21 @@ class ItemTest(test.TestCase):
     newest_tf_item = grappler_item.tf_item
     self.assertEqual(new_tf_item, newest_tf_item)
 
+  def testColocationContraints(self):
+    with ops.Graph().as_default() as g:
+      c = constant_op.constant([10])
+      v = variables.Variable([3], dtype=dtypes.int32)
+      i = gen_array_ops._ref_identity(v)
+      a = state_ops.assign(i, c)
+      train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+      train_op.append(a)
+      mg = meta_graph.create_meta_graph_def(graph=g)
+      grappler_item = item.Item(mg)
+      groups = grappler_item.GetColocationGroups()
+      self.assertEqual(len(groups), 1)
+      self.assertItemsEqual(
+          groups[0], ['Assign', 'RefIdentity', 'Variable', 'Variable/Assign'])
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py
index 626e0502cbb87bb5cb16c4be3098102b5c29af19..25c5ef6b68452c0b8f8dc67a15187db1df5e3934 100644
--- a/tensorflow/python/grappler/layout_optimizer_test.py
+++ b/tensorflow/python/grappler/layout_optimizer_test.py
@@ -34,9 +34,13 @@ from tensorflow.python.grappler import tf_optimizer
 from tensorflow.python.layers import convolutional as conv_layers
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import functional_ops
+from tensorflow.python.ops import gen_array_ops
+from tensorflow.python.ops import gen_math_ops
+from tensorflow.python.ops import gen_nn_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
 from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 from tensorflow.python.training import gradient_descent
@@ -78,6 +82,36 @@ def _two_layer_model(x):
   return h_pool2
 
 
+def _model_with_second_port():
+  random_seed.set_random_seed(0)
+  x = random_ops.truncated_normal([2, 5, 5, 4], seed=0)
+  scale = constant_op.constant(0.1, shape=[4])
+  offset = constant_op.constant(0.3, shape=[4])
+  y, mean, _ = nn.fused_batch_norm(x, scale, offset)
+  mul = math_ops.add(y, mean)
+  output = array_ops.identity(mul)
+  return output
+
+
+def _model_with_branch(x):
+  x_image = array_ops.reshape(x, [-1, 28, 28, 1])
+  w_conv1 = _weight([5, 5, 1, 32])
+  w_conv2 = _weight([5, 5, 1, 32])
+  c_conv1 = _conv2d(x_image, w_conv1)
+  c_conv2 = _conv2d(x_image, w_conv2)
+  add = math_ops.add(c_conv1, c_conv2)
+  return add
+
+
+def _model_with_vec_and_4d(x):
+  x_image = array_ops.reshape(x, [-1, 28, 28, 1])
+  w_conv1 = _weight([5, 5, 1, 32])
+  c_conv1 = _conv2d(x_image, w_conv1)
+  vector = constant_op.constant(6.4, shape=[32])
+  add = math_ops.add(c_conv1, vector)
+  return add
+
+
 def _loop():
   random_seed.set_random_seed(0)
   x1 = random_ops.truncated_normal([1, 784], seed=0)
@@ -89,6 +123,30 @@ def _loop():
   return outputs
 
 
+def _loop_with_branch():
+  random_seed.set_random_seed(0)
+  x1 = random_ops.truncated_normal([1, 784], seed=0)
+  x2 = random_ops.truncated_normal([1, 784], seed=0)
+  x3 = random_ops.truncated_normal([1, 784], seed=0)
+  x4 = random_ops.truncated_normal([1, 784], seed=0)
+  elems = (x1, x2, x3, x4)
+  outputs = functional_ops.map_fn(
+      _model_with_branch, elems, dtype=dtypes.float32)
+  return outputs
+
+
+def _loop_with_vec_and_4d():
+  random_seed.set_random_seed(0)
+  x1 = random_ops.truncated_normal([1, 784], seed=0)
+  x2 = random_ops.truncated_normal([1, 784], seed=0)
+  x3 = random_ops.truncated_normal([1, 784], seed=0)
+  x4 = random_ops.truncated_normal([1, 784], seed=0)
+  elems = (x1, x2, x3, x4)
+  outputs = functional_ops.map_fn(
+      _model_with_vec_and_4d, elems, dtype=dtypes.float32)
+  return outputs
+
+
 def _get_config(layout_optimizer=True):
   if layout_optimizer:
     rewrite_options = rewriter_config_pb2.RewriterConfig(
@@ -126,9 +184,34 @@ def _get_cluster():
   return cluster
 
 
+def _is_transpose(node):
+  return node.endswith('TransposeNHWCToNCHW-LayoutOptimizer') or node.endswith(
+      'TransposeNCHWToNHWC-LayoutOptimizer')
+
+
+def _is_permute(node):
+  return node.endswith('VecPermuteNHWCToNCHW-LayoutOptimizer') or node.endswith(
+      'VecPermuteNCHWToNHWC-LayoutOptimizer')
+
+
 class LayoutOptimizerTest(test.TestCase):
   """Tests the Grappler layout optimizer."""
 
+  def _assert_trans_nchw_to_nhwc(self, name, nodes):
+    self.assertIn(name + '-TransposeNCHWToNHWC-LayoutOptimizer', nodes)
+
+  def _assert_trans_nhwc_to_nchw(self, name, nodes):
+    self.assertIn(name + '-TransposeNHWCToNCHW-LayoutOptimizer', nodes)
+
+  def _assert_map_nhwc_to_nchw(self, name, nodes):
+    self.assertIn(name + '-DimMapNHWCToNCHW-LayoutOptimizer', nodes)
+
+  def _assert_vec_nchw_to_nhwc(self, name, nodes):
+    self.assertIn(name + '-VecPermuteNCHWToNHWC-LayoutOptimizer', nodes)
+
+  def _assert_vec_nhwc_to_nchw(self, name, nodes):
+    self.assertIn(name + '-VecPermuteNHWCToNCHW-LayoutOptimizer', nodes)
+
   def _train(self, checkpoint_path, layout_optimizer=False, restore=False):
     ops.reset_default_graph()
     graph = ops.get_default_graph()
@@ -180,7 +263,7 @@ class LayoutOptimizerTest(test.TestCase):
       nodes = []
       num_transposes = 0
       for node in metadata.cost_graph.node:
-        if node.name.startswith('LayoutOptimizerTranspose'):
+        if _is_transpose(node.name):
           num_transposes += 1
         nodes.append(node.name)
 
@@ -188,16 +271,150 @@ class LayoutOptimizerTest(test.TestCase):
       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
       expected_num_transposes = 2
       self.assertEqual(expected_num_transposes, num_transposes)
-      self.assertIn('LayoutOptimizerTransposeNHWCToNCHW-Conv2D-Reshape-0',
-                    nodes)
-      self.assertIn('LayoutOptimizerTransposeNCHWToNHWC-Relu_1-MaxPool_1',
-                    nodes)
+      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
+      self._assert_trans_nchw_to_nhwc('Relu_1-0-0', nodes)
 
       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
 
-  def testLoop(self):
+  def testSplitWithNonConstAxis(self):
     if test.is_gpu_available(cuda_only=True):
-      output = _loop()
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      conv = _two_layer_model(x)
+      dim = array_ops.placeholder(dtype='int32')
+      split = array_ops.split(conv, 2, axis=dim)
+      scale = constant_op.constant(0.1, shape=[32])
+      offset = constant_op.constant(0.3, shape=[32])
+      bn0 = nn.fused_batch_norm(split[0], scale, offset)
+      bn1 = nn.fused_batch_norm(split[1], scale, offset)
+      add = bn0[0] + bn1[0]
+      output = array_ops.identity(add)
+
+      with session.Session() as sess:
+        output_val_ref = sess.run(output, feed_dict={dim: 3})
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(output, run_metadata=metadata, feed_dict={dim: 3})
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
+      self._assert_trans_nchw_to_nhwc('add_2-0-0', nodes)
+      self._assert_map_nhwc_to_nchw('split-0', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testSplitVWithNonConstAxis(self):
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      conv = _two_layer_model(x)
+      dim = array_ops.placeholder(dtype='int32')
+      sizes = constant_op.constant([50, 10, 4], shape=[3])
+      split = gen_array_ops._split_v(
+          value=conv, size_splits=sizes, axis=dim, num_split=3)
+      output = math_ops.reduce_sum(split[0])
+
+      with session.Session() as sess:
+        output_val_ref = sess.run(output, feed_dict={dim: 3})
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(output, run_metadata=metadata, feed_dict={dim: 3})
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      # Four transposes were initially added in the Expand phase of
+      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
+      self._assert_trans_nchw_to_nhwc('SplitV-0-0', nodes)
+      self._assert_map_nhwc_to_nchw('SplitV-2', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testPadWithConstPaddings(self):
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      conv = _two_layer_model(x)
+      paddings_val = [[1, 2], [3, 4], [5, 6], [7, 8]]
+      paddings = constant_op.constant(
+          paddings_val, dtype='int32', name='PaddingsConst')
+      pad = array_ops.pad(conv, paddings)
+      output = array_ops.identity(pad)
+
+      with session.Session() as sess:
+        output_val_ref = sess.run(output)
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(output, run_metadata=metadata)
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      # Four transposes were initially added in the Expand phase of
+      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
+      self._assert_trans_nchw_to_nhwc('Pad-0-0', nodes)
+      self.assertIn('Pad-PaddingsConst-LayoutOptimizer', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testReduceSum(self):
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      conv = _two_layer_model(x)
+      reduce_sum = math_ops.reduce_sum(conv)
+      output = array_ops.identity(reduce_sum)
+
+      with session.Session() as sess:
+        output_val_ref = sess.run(output)
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(output, run_metadata=metadata)
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      # Three transposes were initially added in the Expand phase of
+      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
+      expected_num_transposes = 1
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testCast(self):
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      conv = _two_layer_model(x)
+      cast = math_ops.cast(conv, dtype='bool')
+      output = array_ops.identity(cast)
 
       with session.Session() as sess:
         output_val_ref = sess.run(output)
@@ -206,6 +423,772 @@ class LayoutOptimizerTest(test.TestCase):
         metadata = config_pb2.RunMetadata()
         output_val = sess.run(output, run_metadata=metadata)
 
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      # Four transposes were initially added in the Expand phase of
+      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
+      self._assert_trans_nchw_to_nhwc('Cast-0-0', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testSqueeze(self):
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      conv = _two_layer_model(x)
+      reduce_sum = math_ops.reduce_sum(conv, axis=[1, 2])
+      squeeze = array_ops.squeeze(reduce_sum)
+      output = array_ops.identity(squeeze)
+
+      with session.Session() as sess:
+        output_val_ref = sess.run(output)
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(output, run_metadata=metadata)
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      # Three transposes were initially added in the Expand phase of
+      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
+      expected_num_transposes = 1
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testReduceSumAlongHWC(self):
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      conv = _two_layer_model(x)
+      reduce_sum = math_ops.reduce_sum(conv, axis=[1, 2, 3])
+      output = array_ops.identity(reduce_sum)
+
+      with session.Session() as sess:
+        output_val_ref = sess.run(output)
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(output, run_metadata=metadata)
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      # Three transposes were initially added in the Expand phase of
+      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
+      expected_num_transposes = 1
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testReduceSumAlongNHW(self):
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      conv = _two_layer_model(x)
+      reduce_sum = math_ops.reduce_sum(conv, axis=[0, 1, 2])
+      output = array_ops.identity(reduce_sum)
+
+      with session.Session() as sess:
+        output_val_ref = sess.run(output)
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(output, run_metadata=metadata)
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      # Three transposes were initially added in the Expand phase of
+      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
+      expected_num_transposes = 1
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testReduceSumAlongC(self):
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      conv = _two_layer_model(x)
+      reduce_sum = math_ops.reduce_sum(conv, axis=[3])
+      output = array_ops.identity(reduce_sum)
+
+      with session.Session() as sess:
+        output_val_ref = sess.run(output)
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(output, run_metadata=metadata)
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      # Three transposes were initially added in the Expand phase of
+      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
+      expected_num_transposes = 1
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testConcatWithControlDependency(self):
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      conv = _two_layer_model(x)
+      axis = constant_op.constant(3)
+      var = variables.Variable(3)
+      assign = state_ops.assign(var, 6)
+      with ops.control_dependencies([assign]):
+        concat = array_ops.concat([conv, conv], axis)
+      output = array_ops.identity(concat)
+
+      with session.Session() as sess:
+        output_val_ref = sess.run(output)
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(output, run_metadata=metadata)
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      # Four transposes were initially added in the Expand phase of
+      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
+      self._assert_trans_nchw_to_nhwc('concat-0-0', nodes)
+      self.assertIn('concat-Const_2-LayoutOptimizer', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testFill(self):
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = array_ops.placeholder(dtype='float32')
+      conv = _two_layer_model(x)
+      shape = array_ops.shape(conv)
+      scalar = array_ops.constant(5.7)
+      fill = array_ops.fill(shape, scalar)
+      output = array_ops.identity(fill)
+
+      x_val = [3.4] * 784
+      with session.Session() as sess:
+        output_val_ref = sess.run(output, feed_dict={x: x_val})
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(
+            output, run_metadata=metadata, feed_dict={
+                x: x_val
+            })
+
+      nodes = []
+      num_transposes = 0
+      num_vec_permute = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        if _is_permute(node.name):
+          num_vec_permute += 1
+        nodes.append(node.name)
+
+      # Four transposes were initially added in the Expand phase of
+      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      # Two vector permute nodes were initially added in the Expand phase of
+      # LayoutOptimizer; they cancelled out each other in the Collapse phase.
+      expected_vec_permute = 0
+      self.assertEqual(expected_vec_permute, num_vec_permute)
+      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
+      self._assert_trans_nchw_to_nhwc('Fill-0-0', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testTile(self):
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      conv = _two_layer_model(x)
+      multiple = array_ops.placeholder(dtype='int32')
+      tile = array_ops.tile(conv, multiple)
+      output = array_ops.identity(tile)
+
+      multiple_val = [2, 3, 4, 1]
+      with session.Session() as sess:
+        output_val_ref = sess.run(output, feed_dict={multiple: multiple_val})
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(
+            output, run_metadata=metadata, feed_dict={
+                multiple: multiple_val
+            })
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      # Four transposes were initially added in the Expand phase of
+      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
+      self._assert_trans_nchw_to_nhwc('Tile-0-0', nodes)
+      self._assert_vec_nhwc_to_nchw('Tile-1', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testReverseWithConstDims(self):
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      conv = _two_layer_model(x)
+      dims = constant_op.constant([3, 1], name='DimsConst')
+      reverse = array_ops.reverse(conv, dims)
+      output = array_ops.identity(reverse)
+
+      with session.Session() as sess:
+        output_val_ref = sess.run(output)
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(output, run_metadata=metadata)
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      # Four transposes were initially added in the Expand phase of
+      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
+      self._assert_trans_nchw_to_nhwc('ReverseV2-0-0', nodes)
+      self.assertIn('ReverseV2-DimsConst-LayoutOptimizer', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testReverseWithNonConstDims(self):
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      conv = _two_layer_model(x)
+      dims = array_ops.placeholder(dtype='int32')
+      reverse = array_ops.reverse(conv, dims)
+      output = array_ops.identity(reverse)
+
+      dims_val = [2, 3]
+      with session.Session() as sess:
+        output_val_ref = sess.run(output, feed_dict={dims: dims_val})
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(
+            output, run_metadata=metadata, feed_dict={
+                dims: dims_val
+            })
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      # Four transposes were initially added in the Expand phase of
+      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
+      self._assert_trans_nchw_to_nhwc('ReverseV2-0-0', nodes)
+      self._assert_map_nhwc_to_nchw('ReverseV2-1', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testSelectOp(self):
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      conv = _two_layer_model(x)
+      add = math_ops.add(conv, conv)
+      mean = math_ops.reduce_mean(conv)
+      condition = math_ops.less(conv, mean)
+      select = gen_math_ops._select(condition, conv, add)
+      output = array_ops.identity(select)
+
+      with session.Session() as sess:
+        output_val_ref = sess.run(output)
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(output, run_metadata=metadata)
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
+      self._assert_trans_nchw_to_nhwc('Select-0-0', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testSelectOpScalarCondition(self):
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      conv = _two_layer_model(x)
+      add = math_ops.add(conv, conv)
+      condition = constant_op.constant(True)
+      select = gen_math_ops._select(condition, conv, add)
+      output = array_ops.identity(select)
+
+      with session.Session() as sess:
+        output_val_ref = sess.run(output)
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(output, run_metadata=metadata)
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
+      self._assert_trans_nchw_to_nhwc('Select-0-0', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testPadWithNonConstPaddings(self):
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      conv = _two_layer_model(x)
+      paddings = array_ops.placeholder(dtype='int32')
+      pad = array_ops.pad(conv, paddings)
+      output = array_ops.identity(pad)
+
+      paddings_val = [[1, 2], [3, 4], [5, 6], [7, 8]]
+      with session.Session() as sess:
+        output_val_ref = sess.run(output, feed_dict={paddings: paddings_val})
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(
+            output, run_metadata=metadata, feed_dict={
+                paddings: paddings_val
+            })
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      # Four transposes were initially added in the Expand phase of
+      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
+      self._assert_trans_nchw_to_nhwc('Pad-0-0', nodes)
+      self._assert_vec_nhwc_to_nchw('Pad-1', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testMaxPoolV2(self):
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      conv = _two_layer_model(x)
+      ksize = constant_op.constant([1, 2, 3, 1], shape=[4])
+      strides = array_ops.placeholder(dtype='int32', shape=[4])
+      max_pool = gen_nn_ops._max_pool_v2(conv, ksize, strides, 'VALID')
+      output = array_ops.identity(max_pool)
+
+      strides_val = [1, 3, 2, 1]
+      with session.Session() as sess:
+        output_val_ref = sess.run(output, feed_dict={strides: strides_val})
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(
+            output, run_metadata=metadata, feed_dict={
+                strides: strides_val
+            })
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
+      self._assert_trans_nchw_to_nhwc('MaxPoolV2-0-0', nodes)
+      self._assert_vec_nhwc_to_nchw('MaxPoolV2-2', nodes)
+      self.assertIn('MaxPoolV2-Const_2-LayoutOptimizer', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testMaxPoolGradV2(self):
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      conv = _two_layer_model(x)
+      ksize = constant_op.constant([1, 2, 3, 1], shape=[4])
+      strides = array_ops.placeholder(dtype='int32', shape=[4])
+      max_pool_grad = gen_nn_ops.max_pool_grad_v2(conv, conv, conv, ksize,
+                                                  strides, 'VALID')
+      output = array_ops.identity(max_pool_grad)
+
+      strides_val = [1, 3, 2, 1]
+      with session.Session() as sess:
+        output_val_ref = sess.run(output, feed_dict={strides: strides_val})
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(
+            output, run_metadata=metadata, feed_dict={
+                strides: strides_val
+            })
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
+      self._assert_trans_nchw_to_nhwc('MaxPoolGradV2-0-0', nodes)
+      self._assert_vec_nhwc_to_nchw('MaxPoolGradV2-4', nodes)
+      self.assertIn('MaxPoolGradV2-Const_2-LayoutOptimizer', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testSliceWithNonConstAxis(self):
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      conv = _two_layer_model(x)
+      size = array_ops.placeholder(dtype='int32')
+      s = array_ops.slice(conv, [0, 0, 0, 0], size)
+      output = array_ops.identity(s)
+
+      size_val = [1, 2, 3, 4]
+      with session.Session() as sess:
+        output_val_ref = sess.run(output, feed_dict={size: size_val})
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(
+            output, run_metadata=metadata, feed_dict={
+                size: size_val
+            })
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      # Four transposes were initially added in the Expand phase of
+      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
+      self._assert_trans_nchw_to_nhwc('Slice-0-0', nodes)
+      self._assert_vec_nhwc_to_nchw('Slice-2', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testStridedSliceWithNonConstAxis(self):
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      conv = _two_layer_model(x)
+      end = array_ops.placeholder(dtype='int32')
+      s = array_ops.strided_slice(conv, [0, 0, 0, 0], end, strides=[1, 2, 3, 1])
+      output = array_ops.identity(s)
+
+      end_val = [1, 2, 3, 4]
+      with session.Session() as sess:
+        output_val_ref = sess.run(output, feed_dict={end: end_val})
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(
+            output, run_metadata=metadata, feed_dict={
+                end: end_val
+            })
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      # Four transposes were initially added in the Expand phase of
+      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
+      self._assert_trans_nchw_to_nhwc('StridedSlice-0-0', nodes)
+      self._assert_vec_nhwc_to_nchw('StridedSlice-2', nodes)
+      self.assertIn('StridedSlice-StridedSlice/begin-LayoutOptimizer', nodes)
+      self.assertIn('StridedSlice-StridedSlice/strides-LayoutOptimizer', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testStridedSliceWithMask(self):
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      conv = _two_layer_model(x)
+      # This will generate a StridedSlice op with begin mask and end mask.
+      s = conv[:, :, 1:-1, :]
+      output = array_ops.identity(s)
+
+      with session.Session() as sess:
+        output_val_ref = sess.run(output)
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(output, run_metadata=metadata)
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      # Four transposes were initially added in the Expand phase of
+      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
+      self._assert_trans_nchw_to_nhwc('strided_slice-0-0', nodes)
+      self.assertIn('strided_slice-strided_slice/stack-LayoutOptimizer', nodes)
+      self.assertIn('strided_slice-strided_slice/stack_1-LayoutOptimizer',
+                    nodes)
+      self.assertIn('strided_slice-strided_slice/stack_2-LayoutOptimizer',
+                    nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testStridedSliceGradWithNonConstAxis(self):
+    if test.is_gpu_available(cuda_only=True):
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      conv = _two_layer_model(x)
+      end = array_ops.placeholder(dtype='int32')
+      shape = array_ops.shape(conv)
+      end_val = [1, 2, 3, 4]
+      s = array_ops.strided_slice(
+          conv, [0, 0, 0, 0], end_val, strides=[1, 2, 3, 1])
+      s_grad = array_ops.strided_slice_grad(shape, [0, 0, 0, 0], end,
+                                            [1, 2, 3, 1], s)
+      output = array_ops.identity(s_grad)
+
+      with session.Session() as sess:
+        output_val_ref = sess.run(output, feed_dict={end: end_val})
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(
+            output, run_metadata=metadata, feed_dict={
+                end: end_val
+            })
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      # Four transposes were initially added in the Expand phase of
+      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
+      self._assert_trans_nchw_to_nhwc('StridedSliceGrad-0-0', nodes)
+      self._assert_vec_nhwc_to_nchw('StridedSliceGrad-2', nodes)
+      self.assertIn('StridedSlice-StridedSliceGrad/begin-LayoutOptimizer',
+                    nodes)
+      self.assertIn('StridedSlice-StridedSliceGrad/strides-LayoutOptimizer',
+                    nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testShapeN(self):
+    if test.is_gpu_available(cuda_only=True):
+      x = array_ops.placeholder(dtype='float32')
+      conv = _two_layer_model(x)
+      shapen = array_ops.shape_n([conv, conv])
+      output = math_ops.add(shapen[0], shapen[1])
+
+      x_val = [1.7] * 784
+      with session.Session() as sess:
+        output_val_ref = sess.run(output, feed_dict={x: x_val})
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(
+            output, run_metadata=metadata, feed_dict={
+                x: x_val
+            })
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      expected_num_transposes = 1
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
+      self._assert_vec_nchw_to_nhwc('ShapeN-0-0', nodes)
+      self.assertAllEqual(output_val_ref, output_val)
+
+  def testLoop(self):
+    if test.is_gpu_available(cuda_only=True):
+      output = _loop()
+
+      with session.Session() as sess:
+        output_val_ref = sess.run(output)
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(output, run_metadata=metadata)
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      # Four transposes were initially added in the Expand phase of
+      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes)
+      self._assert_trans_nchw_to_nhwc('map/while/MaxPool_1-0-2', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testLoopWithBranch(self):
+    if test.is_gpu_available(cuda_only=True):
+      output = _loop_with_branch()
+
+      with session.Session() as sess:
+        output_val_ref = sess.run(output)
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(output, run_metadata=metadata)
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes)
+      self._assert_trans_nchw_to_nhwc('map/while/Add-0-2', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testLoopWithVecAnd4D(self):
+    if test.is_gpu_available(cuda_only=True):
+      output = _loop_with_vec_and_4d()
+
+      with session.Session() as sess:
+        output_val_ref = sess.run(output)
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(output, run_metadata=metadata)
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes)
+      self._assert_trans_nchw_to_nhwc('map/while/Add-0-2', nodes)
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
+  def testBinaryOpSecondPort(self):
+    if test.is_gpu_available(cuda_only=True):
+      output = _model_with_second_port()
+
+      with session.Session() as sess:
+        output_val_ref = sess.run(output)
+
+      with session.Session(config=_get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(output, run_metadata=metadata)
+
+      nodes = []
+      num_transposes = 0
+      for node in metadata.cost_graph.node:
+        if _is_transpose(node.name):
+          num_transposes += 1
+        nodes.append(node.name)
+
+      expected_num_transposes = 2
+      self.assertEqual(expected_num_transposes, num_transposes)
+      self._assert_trans_nhwc_to_nchw('FusedBatchNorm-0', nodes)
+      self._assert_trans_nchw_to_nhwc('Add-0-0', nodes)
       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
 
   def testGradient(self):
@@ -219,7 +1202,7 @@ class LayoutOptimizerTest(test.TestCase):
     for node in optimized_graph.node:
       if node.op in ['Conv2D', 'Conv2DBackpropFilter', 'Conv2DBackpropInput']:
         found += 1
-        self.assertEqual(node.attr['data_format'].s, 'NCHW')
+        self.assertEqual(node.attr['data_format'].s, b'NCHW')
     self.assertEqual(found, 5)
 
   def testDepthwise(self):
@@ -236,7 +1219,7 @@ class LayoutOptimizerTest(test.TestCase):
           'DepthwiseConv2dNativeBackpropInput'
       ]:
         found += 1
-        self.assertEqual(node.attr['data_format'].s, 'NCHW')
+        self.assertEqual(node.attr['data_format'].s, b'NCHW')
     self.assertEqual(found, 6)
 
   def testCheckpointCompatibility(self):
diff --git a/tensorflow/python/grappler/memory_optimizer_test.py b/tensorflow/python/grappler/memory_optimizer_test.py
index 9fbadeceb3b1a8c9f949bc59a5ec75c5b7420cac..948911f099674af4c6dd19bfdac75e5fc1f75c78 100644
--- a/tensorflow/python/grappler/memory_optimizer_test.py
+++ b/tensorflow/python/grappler/memory_optimizer_test.py
@@ -18,12 +18,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.core.framework import attr_value_pb2
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.python.client import session
 from tensorflow.python.framework import meta_graph
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
+from tensorflow.python.framework import test_util
 from tensorflow.python.grappler import tf_optimizer
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
@@ -33,6 +35,7 @@ from tensorflow.python.platform import test
 from tensorflow.python.training import training as train
 
 
+@test_util.with_c_api
 class MemoryOptimizerSwapTest(test.TestCase):
   """Tests the Grappler memory optimizer."""
 
@@ -66,7 +69,7 @@ class MemoryOptimizerSwapTest(test.TestCase):
     train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
     train_op.append(d)
 
-    d.op.node_def.attr['_swap_to_host'].i = 0
+    d.op._set_attr('_swap_to_host', attr_value_pb2.AttrValue(i=0))
 
     mg = meta_graph.create_meta_graph_def(graph=ops.get_default_graph())
     graph_size = len(mg.graph_def.node)
@@ -92,6 +95,7 @@ class MemoryOptimizerSwapTest(test.TestCase):
         self.assertEqual('c', node.input[1])
 
 
+@test_util.with_c_api
 class MemoryOptimizerRecomputeTest(test.TestCase):
   """Tests the Python interface to recomputation rewrites.
 
@@ -128,6 +132,7 @@ class MemoryOptimizerRecomputeTest(test.TestCase):
         rewriter_config_pb2.RewriterConfig(
             disable_model_pruning=True,
             constant_folding=rewriter_config_pb2.RewriterConfig.OFF,
+            dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF,
             layout_optimizer=rewriter_config_pb2.RewriterConfig.OFF,
             arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF,
             memory_optimization=rewriter_config_pb2.RewriterConfig.
@@ -152,6 +157,7 @@ class MemoryOptimizerRecomputeTest(test.TestCase):
         rewriter_config_pb2.RewriterConfig(
             disable_model_pruning=True,
             constant_folding=rewriter_config_pb2.RewriterConfig.OFF,
+            dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF,
             layout_optimizer=rewriter_config_pb2.RewriterConfig.OFF,
             arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF,
             memory_optimization=rewriter_config_pb2.RewriterConfig.
@@ -209,6 +215,66 @@ class MemoryOptimizerRecomputeTest(test.TestCase):
         loss_op_name=loss_op_name)
     self.assertAllClose(original_loss, memory_optimized_loss, rtol=1e-4)
 
+  def _annotated_graph(self):
+    graph = ops.Graph()
+    with graph.as_default():
+      random_seed.set_random_seed(2)
+      current_activation = variable_scope.get_variable(
+          name='start', shape=[1, 2, 2, 5])
+      conv_filter = variable_scope.get_variable(
+          name='filter', shape=[5, 5, 5, 5])
+      for layer_number in range(3):
+        with variable_scope.variable_scope('layer_{}'.format(layer_number)):
+          after_conv = nn.conv2d(current_activation, conv_filter, [1, 1, 1, 1],
+                                 'SAME')
+          current_activation = 2. * after_conv
+          current_activation.op._set_attr(
+              '_recompute_hint',
+              # The value of the attribute does not matter; just that the key
+              # exists in the op's attributes.
+              attr_value_pb2.AttrValue(i=1))
+          current_activation += 5.
+          current_activation.op._set_attr(
+              '_recompute_hint', attr_value_pb2.AttrValue(i=0))
+          current_activation = nn.relu(current_activation)
+          current_activation.op._set_attr(
+              '_recompute_hint', attr_value_pb2.AttrValue(i=1))
+      loss = math_ops.reduce_mean(current_activation)
+      optimizer = train.AdamOptimizer(0.001)
+      train_op = optimizer.minimize(loss)
+      init_op = variables.global_variables_initializer()
+    return graph, init_op, train_op
+
+  def testHintNoMetaGraph(self):
+    # Closer to expected usage, but does not check that a re-write actually
+    # happens; see testHintDoesRewrite.
+    graph, init_op, train_op = self._annotated_graph()
+    with graph.as_default():
+      manual_memory_config = rewriter_config_pb2.RewriterConfig(
+          memory_optimization=rewriter_config_pb2.RewriterConfig.MANUAL)
+      graph_options = config_pb2.GraphOptions(
+          rewrite_options=manual_memory_config)
+      session_config = config_pb2.ConfigProto(graph_options=graph_options)
+      with session.Session(config=session_config) as sess:
+        sess.run(init_op)
+        sess.run(train_op)
+
+  def testHintDoesRewrite(self):
+    graph = self._annotated_graph()[0]
+    with graph.as_default():
+      metagraph = train.export_meta_graph()
+    self.assertEqual(
+        0,
+        len([node for node in metagraph.graph_def.node
+             if 'Recomputed/' in node.name]))
+    rewritten_graph_def = tf_optimizer.OptimizeGraph(
+        rewriter_config_pb2.RewriterConfig(
+            memory_optimization=rewriter_config_pb2.RewriterConfig.MANUAL),
+        metagraph)
+    self.assertEqual(
+        9,
+        len([node for node in rewritten_graph_def.node
+             if 'Recomputed/' in node.name]))
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/grappler/model_analyzer.cc b/tensorflow/python/grappler/model_analyzer.cc
index 7d365c3be923e216b44149921b76d734c2b9a82f..d23eb811ac2b0a6a8802979b4d966b5617c8a8d9 100644
--- a/tensorflow/python/grappler/model_analyzer.cc
+++ b/tensorflow/python/grappler/model_analyzer.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/python/grappler/model_analyzer.h"
 
 #include <iomanip>
+#include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/grappler/costs/graph_properties.h"
 #include "tensorflow/core/grappler/grappler_item.h"
@@ -25,26 +26,26 @@ namespace grappler {
 
 ModelAnalyzer::ModelAnalyzer(const GrapplerItem& item) : item_(item) {}
 
-Status ModelAnalyzer::GenerateReport(std::ostream& os) {
+Status ModelAnalyzer::GenerateReport(bool debug, std::ostream& os) {
   GraphProperties properties(item_);
-  TF_RETURN_IF_ERROR(properties.InferStatically());
+  TF_RETURN_IF_ERROR(properties.InferStatically(false));
 
   for (const auto& node : item_.MainOpsFanin()) {
-    PrintNodeInfo(node, properties, os);
+    PrintNodeInfo(node, properties, debug, os);
   }
   for (const auto& node : item_.EnqueueOpsFanin()) {
-    PrintNodeInfo(node, properties, os);
+    PrintNodeInfo(node, properties, debug, os);
   }
 
   return Status::OK();
 }
 
 void ModelAnalyzer::PrintNodeInfo(const NodeDef* node,
-                                  const GraphProperties& properties,
+                                  const GraphProperties& properties, bool debug,
                                   std::ostream& os) const {
   os << node->name() << " [" << node->op() << "]" << std::endl;
   if (properties.HasOutputProperties(node->name())) {
-    std::vector<OpInfo::TensorProperties> props =
+    const std::vector<OpInfo::TensorProperties>& props =
         properties.GetOutputProperties(node->name());
     for (int i = 0; i < props.size(); ++i) {
       const OpInfo::TensorProperties& prop = props[i];
@@ -75,6 +76,27 @@ void ModelAnalyzer::PrintNodeInfo(const NodeDef* node,
       os << std::endl;
     }
   }
+
+  if (debug) {
+    const OpRegistrationData* op_reg_data;
+    Status status = OpRegistry::Global()->LookUp(node->op(), &op_reg_data);
+    if (!status.ok()) {
+      os << "\tCouldn't find op registration for " << node->op() << std::endl;
+    } else if (!op_reg_data->shape_inference_fn) {
+      os << "\tCouldn't find shape function for op " << node->op() << std::endl;
+    } else if (properties.HasInputProperties(node->name())) {
+      const std::vector<OpInfo::TensorProperties>& props =
+          properties.GetInputProperties(node->name());
+      for (int i = 0; i < props.size(); ++i) {
+        const OpInfo::TensorProperties& prop = props[i];
+        if (prop.has_value()) {
+          os << "\t"
+             << "input " << i << " (" << DataTypeString(prop.dtype())
+             << ") has known value" << std::endl;
+        }
+      }
+    }
+  }
 }
 
 }  // end namespace grappler
diff --git a/tensorflow/python/grappler/model_analyzer.h b/tensorflow/python/grappler/model_analyzer.h
index a14034103ca70e59ac24d88318edc198e7d1c5f4..5bc551927d88db723e21b29903d6f5b941048139 100644
--- a/tensorflow/python/grappler/model_analyzer.h
+++ b/tensorflow/python/grappler/model_analyzer.h
@@ -31,11 +31,11 @@ class GraphProperties;
 class ModelAnalyzer {
  public:
   explicit ModelAnalyzer(const GrapplerItem& item);
-  Status GenerateReport(std::ostream& os);
+  Status GenerateReport(bool debug, std::ostream& os);
 
  private:
   void PrintNodeInfo(const NodeDef* node, const GraphProperties& properties,
-                     std::ostream& os) const;
+                     bool debug, std::ostream& os) const;
 
   const GrapplerItem& item_;
 };
diff --git a/tensorflow/python/grappler/model_analyzer.i b/tensorflow/python/grappler/model_analyzer.i
index 726143a0bb4db28538f4338eb3773d85332dc122..7c3a692d0efc501341ff1dff3cf24b8a4830ec84 100644
--- a/tensorflow/python/grappler/model_analyzer.i
+++ b/tensorflow/python/grappler/model_analyzer.i
@@ -40,7 +40,7 @@ limitations under the License.
 %}
 
 %{
-string GenerateModelReport(const tensorflow::MetaGraphDef& metagraph) {
+string GenerateModelReport(const tensorflow::MetaGraphDef& metagraph, bool debug) {
   tensorflow::grappler::ItemConfig cfg;
   cfg.apply_optimizations = false;
   std::unique_ptr<tensorflow::grappler::GrapplerItem> item =
@@ -53,10 +53,10 @@ string GenerateModelReport(const tensorflow::MetaGraphDef& metagraph) {
   tensorflow::grappler::ModelAnalyzer analyzer(*item);
 
   std::stringstream os;
-  analyzer.GenerateReport(os);
+  analyzer.GenerateReport(debug, os);
   return os.str();
 }
 
 %}
 
-string GenerateModelReport(const tensorflow::MetaGraphDef& metagraph);
+string GenerateModelReport(const tensorflow::MetaGraphDef& metagraph, bool debug);
diff --git a/tensorflow/python/grappler/model_analyzer.py b/tensorflow/python/grappler/model_analyzer.py
index c852d71ad8b047f5437ca62c49a5500bc29cec60..535889e1c4034952562a05e4d044fcafeddbc0ca 100644
--- a/tensorflow/python/grappler/model_analyzer.py
+++ b/tensorflow/python/grappler/model_analyzer.py
@@ -22,16 +22,18 @@ from tensorflow.python import pywrap_tensorflow as tf_wrap
 from tensorflow.python.framework import errors
 
 
-def GenerateModelReport(metagraph):
+def GenerateModelReport(metagraph, debug=False):
   """Report what's known statically about each node in the provided metagraph.
 
   Args:
     metagraph: A TensorFlow MetaGraphDef.
+    debug: Add some information useful for debugging.
 
   Returns:
     A string containing the report.
   """
   with errors.raise_exception_on_not_ok_status():
-    ret_from_swig = tf_wrap.GenerateModelReport(metagraph.SerializeToString())
+    ret_from_swig = tf_wrap.GenerateModelReport(metagraph.SerializeToString(),
+                                                debug)
 
   return ret_from_swig
diff --git a/tensorflow/python/grappler/model_analyzer_test.py b/tensorflow/python/grappler/model_analyzer_test.py
index b59d1650f4b5e4c7239c2275213e9a26c3aafafe..ec172755f1ae43fc7581e97c6a18471da45f9100 100644
--- a/tensorflow/python/grappler/model_analyzer_test.py
+++ b/tensorflow/python/grappler/model_analyzer_test.py
@@ -49,6 +49,24 @@ class PyWrapOptimizeGraphTest(test.TestCase):
     # Also print the report to make it easier to debug
     print("{}".format(report))
 
+  def testDebugMode(self):
+    """Make sure arguments can be passed correctly."""
+    a = constant_op.constant([10, 11], name="a")
+    b = constant_op.constant([10], name="b")
+    c = math_ops.add(a, b, name="c")
+    train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+    train_op.append(c)
+    mg = meta_graph.create_meta_graph_def(graph=ops.get_default_graph())
+
+    report = model_analyzer.GenerateModelReport(mg, debug=True)
+
+    # Check the report headers
+    self.assertTrue(b"input 0 (int32) has known value" in report)
+    self.assertTrue(b"input 1 (int32) has known value" in report)
+
+    # Also print the report to make it easier to debug
+    print("{}".format(report))
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index 5959659a40ccdfbc6448f425ce776fae4d0bcd79..1f20b3ae0eb1ddf981f12f9a12c4e8153711c7f9 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -392,7 +392,7 @@ py_test(
 
 py_test(
     name = "core_test",
-    size = "small",
+    size = "medium",
     srcs = ["_impl/keras/layers/core_test.py"],
     srcs_version = "PY2AND3",
     deps = [
@@ -553,7 +553,7 @@ py_test(
 
 py_test(
     name = "data_utils_test",
-    size = "small",
+    size = "medium",
     srcs = ["_impl/keras/utils/data_utils_test.py"],
     srcs_version = "PY2AND3",
     tags = [
diff --git a/tensorflow/python/keras/_impl/keras/__init__.py b/tensorflow/python/keras/_impl/keras/__init__.py
index 74cc9d0488c88de04bf29aafcd0e23895c59826a..a70250d796b4dd8d08ac65ebdac84b307b917b13 100644
--- a/tensorflow/python/keras/_impl/keras/__init__.py
+++ b/tensorflow/python/keras/_impl/keras/__init__.py
@@ -40,4 +40,4 @@ from tensorflow.python.keras._impl.keras.layers import Input
 from tensorflow.python.keras._impl.keras.models import Model
 from tensorflow.python.keras._impl.keras.models import Sequential
 
-__version__ = '2.1.1-tf'
+__version__ = '2.1.2-tf'
diff --git a/tensorflow/python/keras/_impl/keras/applications/imagenet_utils.py b/tensorflow/python/keras/_impl/keras/applications/imagenet_utils.py
index 58841e5db06229727ea088388a901633216aa6fe..63ee83cb51e8366f391f192a9408566076cad468 100644
--- a/tensorflow/python/keras/_impl/keras/applications/imagenet_utils.py
+++ b/tensorflow/python/keras/_impl/keras/applications/imagenet_utils.py
@@ -20,6 +20,8 @@ from __future__ import print_function
 
 import json
 
+import numpy as np
+
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras.utils.data_utils import get_file
 from tensorflow.python.platform import tf_logging as logging
@@ -28,12 +30,15 @@ from tensorflow.python.platform import tf_logging as logging
 CLASS_INDEX = None
 CLASS_INDEX_PATH = 'https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json'
 
+# Global tensor of imagenet mean for preprocessing symbolic inputs
+_IMAGENET_MEAN = None
 
-def preprocess_input(x, data_format=None, mode='caffe'):
-  """Preprocesses a tensor encoding a batch of images.
+
+def _preprocess_numpy_input(x, data_format, mode):
+  """Preprocesses a image tensor as a Numpy array.
 
   Arguments:
-      x: input Numpy tensor, 4D.
+      x: input Numpy, 3D or 4D.
       data_format: data format of the image tensor.
       mode: One of "caffe", "tf".
           - caffe: will convert the images from RGB to BGR,
@@ -44,16 +49,11 @@ def preprocess_input(x, data_format=None, mode='caffe'):
               sample-wise.
 
   Returns:
-      Preprocessed tensor.
+      Preprocessed array.
   """
-  if data_format is None:
-    data_format = K.image_data_format()
-  assert data_format in {'channels_last', 'channels_first'}
-
   if mode == 'tf':
-    x /= 255.
-    x -= 0.5
-    x *= 2.
+    x /= 127.5
+    x -= 1.
     return x
 
   if data_format == 'channels_first':
@@ -79,6 +79,81 @@ def preprocess_input(x, data_format=None, mode='caffe'):
   return x
 
 
+def _preprocess_symbolic_input(x, data_format, mode):
+  """Preprocesses a symbolic image tensor.
+
+  Arguments:
+      x: symoblic tensor, 3D or 4D.
+      data_format: data format of the image tensor.
+      mode: One of "caffe", "tf".
+          - caffe: will convert the images from RGB to BGR,
+              then will zero-center each color channel with
+              respect to the ImageNet dataset,
+              without scaling.
+          - tf: will scale pixels between -1 and 1,
+              sample-wise.
+
+  Returns:
+      Preprocessed tensor.
+  """
+  global _IMAGENET_MEAN
+
+  if mode == 'tf':
+    x /= 127.5
+    x -= 1.
+    return x
+
+  if data_format == 'channels_first':
+    # 'RGB'->'BGR'
+    if K.ndim(x) == 3:
+      x = x[::-1, ...]
+    else:
+      x = x[:, ::-1, ...]
+  else:
+    # 'RGB'->'BGR'
+    x = x[..., ::-1]
+
+  if _IMAGENET_MEAN is None:
+    _IMAGENET_MEAN = K.constant(-np.array([103.939, 116.779, 123.68]))
+  # Zero-center by mean pixel
+  if K.dtype(x) != K.dtype(_IMAGENET_MEAN):
+    x = K.bias_add(x, K.cast(_IMAGENET_MEAN, K.dtype(x)), data_format)
+  else:
+    x = K.bias_add(x, _IMAGENET_MEAN, data_format)
+  return x
+
+
+def preprocess_input(x, data_format=None, mode='caffe'):
+  """Preprocesses a tensor encoding a batch of images.
+
+  Arguments:
+      x: input Numpy or symoblic tensor, 3D or 4D.
+      data_format: data format of the image tensor.
+      mode: One of "caffe", "tf".
+          - caffe: will convert the images from RGB to BGR,
+              then will zero-center each color channel with
+              respect to the ImageNet dataset,
+              without scaling.
+          - tf: will scale pixels between -1 and 1,
+              sample-wise.
+
+  Returns:
+      Preprocessed tensor.
+
+  Raises:
+      ValueError: in case of incorrect data_format.
+  """
+  if data_format is None:
+    data_format = K.image_data_format()
+  if data_format not in {'channels_first', 'channels_last'}:
+    raise ValueError('Unknown data_format ' + str(data_format))
+
+  if isinstance(x, np.ndarray):
+    return _preprocess_numpy_input(x, data_format=data_format, mode=mode)
+  else:
+    return _preprocess_symbolic_input(x, data_format=data_format, mode=mode)
+
+
 def decode_predictions(preds, top=5):
   """Decodes the prediction of an ImageNet model.
 
diff --git a/tensorflow/python/keras/_impl/keras/applications/imagenet_utils_test.py b/tensorflow/python/keras/_impl/keras/applications/imagenet_utils_test.py
index 517ba91219fc0ec0b61ccd673b420021a0db483d..d843dace59f1c88744217fbaee605d2ac859ec55 100644
--- a/tensorflow/python/keras/_impl/keras/applications/imagenet_utils_test.py
+++ b/tensorflow/python/keras/_impl/keras/applications/imagenet_utils_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python.keras._impl import keras
+from tensorflow.python.keras._impl.keras.applications.imagenet_utils import preprocess_input
 from tensorflow.python.platform import test
 
 
@@ -29,22 +30,62 @@ class ImageNetUtilsTest(test.TestCase):
   def test_preprocess_input(self):
     # Test batch of images
     x = np.random.uniform(0, 255, (2, 10, 10, 3))
-    self.assertEqual(
-        keras.applications.imagenet_utils.preprocess_input(x).shape, x.shape)
-    out1 = keras.applications.imagenet_utils.preprocess_input(
-        x, 'channels_last')
-    out2 = keras.applications.imagenet_utils.preprocess_input(
-        np.transpose(x, (0, 3, 1, 2)), 'channels_first')
+    self.assertEqual(preprocess_input(x).shape, x.shape)
+    out1 = preprocess_input(x, 'channels_last')
+    out2 = preprocess_input(np.transpose(x, (0, 3, 1, 2)), 'channels_first')
     self.assertAllClose(out1, out2.transpose(0, 2, 3, 1))
 
     # Test single image
     x = np.random.uniform(0, 255, (10, 10, 3))
-    self.assertEqual(
-        keras.applications.imagenet_utils.preprocess_input(x).shape, x.shape)
-    out1 = keras.applications.imagenet_utils.preprocess_input(
-        x, 'channels_last')
-    out2 = keras.applications.imagenet_utils.preprocess_input(
-        np.transpose(x, (2, 0, 1)), 'channels_first')
+    self.assertEqual(preprocess_input(x).shape, x.shape)
+    out1 = preprocess_input(x, 'channels_last')
+    out2 = preprocess_input(np.transpose(x, (2, 0, 1)), 'channels_first')
+    self.assertAllClose(out1, out2.transpose(1, 2, 0))
+
+  def test_preprocess_input_symbolic(self):
+    # Test image batch
+    x = np.random.uniform(0, 255, (2, 10, 10, 3))
+    inputs = keras.layers.Input(shape=x.shape[1:])
+    outputs = keras.layers.Lambda(
+        preprocess_input, output_shape=x.shape[1:])(inputs)
+    model = keras.models.Model(inputs, outputs)
+    assert model.predict(x).shape == x.shape
+    # pylint: disable=g-long-lambda
+    outputs1 = keras.layers.Lambda(lambda x:
+                                   preprocess_input(x, 'channels_last'),
+                                   output_shape=x.shape[1:])(inputs)
+    model1 = keras.models.Model(inputs, outputs1)
+    out1 = model1.predict(x)
+    x2 = np.transpose(x, (0, 3, 1, 2))
+    inputs2 = keras.layers.Input(shape=x2.shape[1:])
+    # pylint: disable=g-long-lambda
+    outputs2 = keras.layers.Lambda(lambda x:
+                                   preprocess_input(x, 'channels_first'),
+                                   output_shape=x2.shape[1:])(inputs2)
+    model2 = keras.models.Model(inputs2, outputs2)
+    out2 = model2.predict(x2)
+    self.assertAllClose(out1, out2.transpose(0, 2, 3, 1))
+
+    # Test single image
+    x = np.random.uniform(0, 255, (10, 10, 3))
+    inputs = keras.layers.Input(shape=x.shape)
+    outputs = keras.layers.Lambda(preprocess_input,
+                                  output_shape=x.shape)(inputs)
+    model = keras.models.Model(inputs, outputs)
+    assert model.predict(x[np.newaxis])[0].shape == x.shape
+    # pylint: disable=g-long-lambda
+    outputs1 = keras.layers.Lambda(lambda x:
+                                   preprocess_input(x, 'channels_last'),
+                                   output_shape=x.shape)(inputs)
+    model1 = keras.models.Model(inputs, outputs1)
+    out1 = model1.predict(x[np.newaxis])[0]
+    x2 = np.transpose(x, (2, 0, 1))
+    inputs2 = keras.layers.Input(shape=x2.shape)
+    outputs2 = keras.layers.Lambda(lambda x:
+                                   preprocess_input(x, 'channels_first'),
+                                   output_shape=x2.shape)(inputs2)  # pylint: disable=g-long-lambda
+    model2 = keras.models.Model(inputs2, outputs2)
+    out2 = model2.predict(x2[np.newaxis])[0]
     self.assertAllClose(out1, out2.transpose(1, 2, 0))
 
   def test_obtain_input_shape(self):
diff --git a/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py b/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py
index de29b92575e48410614d3b32520d99436891344a..2e73cefb6ce32c2a770eb9bde5ffb220be2da92c 100644
--- a/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py
+++ b/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py
@@ -23,6 +23,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
+
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras.applications import imagenet_utils
 from tensorflow.python.keras._impl.keras.applications.imagenet_utils import _obtain_input_shape
@@ -208,8 +210,9 @@ def InceptionResNetV2(include_top=True,  # pylint: disable=invalid-name
   Arguments:
       include_top: whether to include the fully-connected
           layer at the top of the network.
-      weights: one of `None` (random initialization)
-          or `'imagenet'` (pre-training on ImageNet).
+      weights: one of `None` (random initialization),
+          'imagenet' (pre-training on ImageNet),
+          or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
       input_shape: optional shape tuple, only to be specified
@@ -239,10 +242,11 @@ def InceptionResNetV2(include_top=True,  # pylint: disable=invalid-name
       ValueError: in case of invalid argument for `weights`,
           or invalid input shape.
   """
-  if weights not in {'imagenet', None}:
+  if not (weights in {'imagenet', None} or os.path.exists(weights)):
     raise ValueError('The `weights` argument should be either '
-                     '`None` (random initialization) or `imagenet` '
-                     '(pre-training on ImageNet).')
+                     '`None` (random initialization), `imagenet` '
+                     '(pre-training on ImageNet), '
+                     'or the path to the weights file to be loaded.')
 
   if weights == 'imagenet' and include_top and classes != 1000:
     raise ValueError('If using `weights` as imagenet with `include_top`'
@@ -365,5 +369,7 @@ def InceptionResNetV2(include_top=True,  # pylint: disable=invalid-name
           cache_subdir='models',
           file_hash='d19885ff4a710c122648d3b5c3b684e4')
     model.load_weights(weights_path)
+  elif weights is not None:
+    model.load_weights(weights)
 
   return model
diff --git a/tensorflow/python/keras/_impl/keras/applications/inception_v3.py b/tensorflow/python/keras/_impl/keras/applications/inception_v3.py
index d4fea4fbb0223d079149224e2d3d89487834ca40..4424b9280413bb8e556ab376b0c0acccf4030c73 100644
--- a/tensorflow/python/keras/_impl/keras/applications/inception_v3.py
+++ b/tensorflow/python/keras/_impl/keras/applications/inception_v3.py
@@ -29,6 +29,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
+
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras import layers
 from tensorflow.python.keras._impl.keras.applications import imagenet_utils
@@ -118,8 +120,9 @@ def InceptionV3(include_top=True,
   Arguments:
       include_top: whether to include the fully-connected
           layer at the top of the network.
-      weights: one of `None` (random initialization)
-          or "imagenet" (pre-training on ImageNet).
+      weights: one of `None` (random initialization),
+          "imagenet" (pre-training on ImageNet),
+          or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
       input_shape: optional shape tuple, only to be specified
@@ -151,10 +154,11 @@ def InceptionV3(include_top=True,
       ValueError: in case of invalid argument for `weights`,
           or invalid input shape.
   """
-  if weights not in {'imagenet', None}:
+  if not (weights in {'imagenet', None} or os.path.exists(weights)):
     raise ValueError('The `weights` argument should be either '
-                     '`None` (random initialization) or `imagenet` '
-                     '(pre-training on ImageNet).')
+                     '`None` (random initialization), `imagenet` '
+                     '(pre-training on ImageNet), '
+                     'or the path to the weights file to be loaded.')
 
   if weights == 'imagenet' and include_top and classes != 1000:
     raise ValueError('If using `weights` as imagenet with `include_top`'
@@ -383,6 +387,8 @@ def InceptionV3(include_top=True,
           cache_subdir='models',
           file_hash='bcbd6486424b2319ff4ef7d526e38f63')
     model.load_weights(weights_path)
+  elif weights is not None:
+    model.load_weights(weights)
   return model
 
 
diff --git a/tensorflow/python/keras/_impl/keras/applications/mobilenet.py b/tensorflow/python/keras/_impl/keras/applications/mobilenet.py
index 653bd8c09f2d7a4ac2f6cb5e6c792b2285b378cc..5f97c138fc038688a009dfa83b48c8f367ee8df2 100644
--- a/tensorflow/python/keras/_impl/keras/applications/mobilenet.py
+++ b/tensorflow/python/keras/_impl/keras/applications/mobilenet.py
@@ -67,7 +67,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import warnings
+import os
 
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras import constraints
@@ -89,6 +89,7 @@ from tensorflow.python.keras._impl.keras.layers import Reshape
 from tensorflow.python.keras._impl.keras.models import Model
 from tensorflow.python.keras._impl.keras.utils import conv_utils
 from tensorflow.python.keras._impl.keras.utils.data_utils import get_file
+from tensorflow.python.platform import tf_logging as logging
 
 BASE_WEIGHT_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.6/'
 
@@ -348,8 +349,9 @@ def MobileNet(input_shape=None,  # pylint: disable=invalid-name
       dropout: dropout rate
       include_top: whether to include the fully-connected
           layer at the top of the network.
-      weights: `None` (random initialization) or
-          `imagenet` (ImageNet weights)
+      weights: one of `None` (random initialization),
+          'imagenet' (pre-training on ImageNet),
+          or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of
           `layers.Input()`)
           to use as image input for the model.
@@ -378,16 +380,11 @@ def MobileNet(input_shape=None,  # pylint: disable=invalid-name
       RuntimeError: If attempting to run this model with a
           backend that does not support separable convolutions.
   """
-
-  if K.backend() != 'tensorflow':
-    raise RuntimeError('Only TensorFlow backend is currently supported, '
-                       'as other backends do not support '
-                       'depthwise convolution.')
-
-  if weights not in {'imagenet', None}:
+  if not (weights in {'imagenet', None} or os.path.exists(weights)):
     raise ValueError('The `weights` argument should be either '
-                     '`None` (random initialization) or `imagenet` '
-                     '(pre-training on ImageNet).')
+                     '`None` (random initialization), `imagenet` '
+                     '(pre-training on ImageNet), '
+                     'or the path to the weights file to be loaded.')
 
   if weights == 'imagenet' and include_top and classes != 1000:
     raise ValueError('If using `weights` as ImageNet with `include_top` '
@@ -438,15 +435,15 @@ def MobileNet(input_shape=None,  # pylint: disable=invalid-name
                        ' Input shape provided = %s' % (input_shape,))
 
   if K.image_data_format() != 'channels_last':
-    warnings.warn('The MobileNet family of models is only available '
-                  'for the input data format "channels_last" '
-                  '(width, height, channels). '
-                  'However your settings specify the default '
-                  'data format "channels_first" (channels, width, height).'
-                  ' You should set `image_data_format="channels_last"` '
-                  'in your Keras config located at ~/.keras/keras.json. '
-                  'The model being returned right now will expect inputs '
-                  'to follow the "channels_last" data format.')
+    logging.warning('The MobileNet family of models is only available '
+                    'for the input data format "channels_last" '
+                    '(width, height, channels). '
+                    'However your settings specify the default '
+                    'data format "channels_first" (channels, width, height).'
+                    ' You should set `image_data_format="channels_last"` '
+                    'in your Keras config located at ~/.keras/keras.json. '
+                    'The model being returned right now will expect inputs '
+                    'to follow the "channels_last" data format.')
     K.set_image_data_format('channels_last')
     old_data_format = 'channels_first'
   else:
@@ -534,9 +531,13 @@ def MobileNet(input_shape=None,  # pylint: disable=invalid-name
       weigh_path = BASE_WEIGHT_PATH + model_name
       weights_path = get_file(model_name, weigh_path, cache_subdir='models')
     model.load_weights(weights_path)
+  elif weights is not None:
+    model.load_weights(weights)
 
   if old_data_format:
     K.set_image_data_format(old_data_format)
+  elif weights is not None:
+    model.load_weights(weights)
   return model
 
 
diff --git a/tensorflow/python/keras/_impl/keras/applications/resnet50.py b/tensorflow/python/keras/_impl/keras/applications/resnet50.py
index 717b626fdc3c65d510cf190e53b4b1c04a89ebfa..8ab46693aa6e46de6c6df320c745ca9ed01fbe0b 100644
--- a/tensorflow/python/keras/_impl/keras/applications/resnet50.py
+++ b/tensorflow/python/keras/_impl/keras/applications/resnet50.py
@@ -26,6 +26,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
+
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras import layers
 from tensorflow.python.keras._impl.keras.applications.imagenet_utils import _obtain_input_shape
@@ -161,8 +163,9 @@ def ResNet50(include_top=True,
   Arguments:
       include_top: whether to include the fully-connected
           layer at the top of the network.
-      weights: one of `None` (random initialization)
-          or "imagenet" (pre-training on ImageNet).
+      weights: one of `None` (random initialization),
+          'imagenet' (pre-training on ImageNet),
+          or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
       input_shape: optional shape tuple, only to be specified
@@ -194,10 +197,11 @@ def ResNet50(include_top=True,
       ValueError: in case of invalid argument for `weights`,
           or invalid input shape.
   """
-  if weights not in {'imagenet', None}:
+  if not (weights in {'imagenet', None} or os.path.exists(weights)):
     raise ValueError('The `weights` argument should be either '
-                     '`None` (random initialization) or `imagenet` '
-                     '(pre-training on ImageNet).')
+                     '`None` (random initialization), `imagenet` '
+                     '(pre-training on ImageNet), '
+                     'or the path to the weights file to be loaded.')
 
   if weights == 'imagenet' and include_top and classes != 1000:
     raise ValueError('If using `weights` as imagenet with `include_top`'
@@ -283,4 +287,6 @@ def ResNet50(include_top=True,
           cache_subdir='models',
           md5_hash='a268eb855778b3df3c7506639542a6af')
     model.load_weights(weights_path)
+  elif weights is not None:
+    model.load_weights(weights)
   return model
diff --git a/tensorflow/python/keras/_impl/keras/applications/vgg16.py b/tensorflow/python/keras/_impl/keras/applications/vgg16.py
index a0862e6407747cd0ad3d698c63da77b17c272e1b..38dbbdc809e708cc19d5529665352fe4807fad90 100644
--- a/tensorflow/python/keras/_impl/keras/applications/vgg16.py
+++ b/tensorflow/python/keras/_impl/keras/applications/vgg16.py
@@ -25,6 +25,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
+
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras.applications.imagenet_utils import _obtain_input_shape
 from tensorflow.python.keras._impl.keras.applications.imagenet_utils import decode_predictions  # pylint: disable=unused-import
@@ -68,8 +70,9 @@ def VGG16(include_top=True,
   Arguments:
       include_top: whether to include the 3 fully-connected
           layers at the top of the network.
-      weights: one of `None` (random initialization)
-          or "imagenet" (pre-training on ImageNet).
+      weights: one of `None` (random initialization),
+          'imagenet' (pre-training on ImageNet),
+          or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
       input_shape: optional shape tuple, only to be specified
@@ -101,10 +104,11 @@ def VGG16(include_top=True,
       ValueError: in case of invalid argument for `weights`,
           or invalid input shape.
   """
-  if weights not in {'imagenet', None}:
+  if not (weights in {'imagenet', None} or os.path.exists(weights)):
     raise ValueError('The `weights` argument should be either '
-                     '`None` (random initialization) or `imagenet` '
-                     '(pre-training on ImageNet).')
+                     '`None` (random initialization), `imagenet` '
+                     '(pre-training on ImageNet), '
+                     'or the path to the weights file to be loaded.')
 
   if weights == 'imagenet' and include_top and classes != 1000:
     raise ValueError('If using `weights` as imagenet with `include_top`'
@@ -211,4 +215,6 @@ def VGG16(include_top=True,
         dense = model.get_layer(name='fc1')
         layer_utils.convert_dense_weights_data_format(dense, shape,
                                                       'channels_first')
+  elif weights is not None:
+    model.load_weights(weights)
   return model
diff --git a/tensorflow/python/keras/_impl/keras/applications/vgg19.py b/tensorflow/python/keras/_impl/keras/applications/vgg19.py
index cfa1c95336e8ab798e4d5bd67f9c7f89e4705ca7..126c64260b51a7d4e6ca653e850e22c03799dcb0 100644
--- a/tensorflow/python/keras/_impl/keras/applications/vgg19.py
+++ b/tensorflow/python/keras/_impl/keras/applications/vgg19.py
@@ -25,6 +25,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
+
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras.applications.imagenet_utils import _obtain_input_shape
 from tensorflow.python.keras._impl.keras.applications.imagenet_utils import decode_predictions  # pylint: disable=unused-import
@@ -68,8 +70,9 @@ def VGG19(include_top=True,
   Arguments:
       include_top: whether to include the 3 fully-connected
           layers at the top of the network.
-      weights: one of `None` (random initialization)
-          or "imagenet" (pre-training on ImageNet).
+      weights: one of `None` (random initialization),
+         'imagenet' (pre-training on ImageNet),
+         or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
       input_shape: optional shape tuple, only to be specified
@@ -101,10 +104,11 @@ def VGG19(include_top=True,
       ValueError: in case of invalid argument for `weights`,
           or invalid input shape.
   """
-  if weights not in {'imagenet', None}:
+  if not (weights in {'imagenet', None} or os.path.exists(weights)):
     raise ValueError('The `weights` argument should be either '
-                     '`None` (random initialization) or `imagenet` '
-                     '(pre-training on ImageNet).')
+                     '`None` (random initialization), `imagenet` '
+                     '(pre-training on ImageNet), '
+                     'or the path to the weights file to be loaded.')
 
   if weights == 'imagenet' and include_top and classes != 1000:
     raise ValueError('If using `weights` as imagenet with `include_top`'
@@ -207,8 +211,6 @@ def VGG19(include_top=True,
           cache_subdir='models',
           file_hash='253f8cb515780f3b799900260a226db6')
     model.load_weights(weights_path)
-    if K.backend() == 'theano':
-      layer_utils.convert_all_kernels_in_model(model)
 
     if K.image_data_format() == 'channels_first':
       if include_top:
@@ -217,4 +219,6 @@ def VGG19(include_top=True,
         dense = model.get_layer(name='fc1')
         layer_utils.convert_dense_weights_data_format(dense, shape,
                                                       'channels_first')
+  elif weights is not None:
+    model.load_weights(weights)
   return model
diff --git a/tensorflow/python/keras/_impl/keras/applications/xception.py b/tensorflow/python/keras/_impl/keras/applications/xception.py
index 14f6ad809015aae451f8ddc13fa64166b06995a6..821983140852b9f1ab505376d824db2392f54391 100644
--- a/tensorflow/python/keras/_impl/keras/applications/xception.py
+++ b/tensorflow/python/keras/_impl/keras/applications/xception.py
@@ -36,6 +36,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
+
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras import layers
 from tensorflow.python.keras._impl.keras.applications import imagenet_utils
@@ -80,8 +82,9 @@ def Xception(include_top=True,
   Arguments:
       include_top: whether to include the fully-connected
           layer at the top of the network.
-      weights: one of `None` (random initialization)
-          or "imagenet" (pre-training on ImageNet).
+      weights: one of `None` (random initialization),
+          'imagenet' (pre-training on ImageNet),
+          or the path to the weights file to be loaded.
       input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
           to use as image input for the model.
       input_shape: optional shape tuple, only to be specified
@@ -114,18 +117,16 @@ def Xception(include_top=True,
       RuntimeError: If attempting to run this model with a
           backend that does not support separable convolutions.
   """
-  if weights not in {'imagenet', None}:
+  if not (weights in {'imagenet', None} or os.path.exists(weights)):
     raise ValueError('The `weights` argument should be either '
-                     '`None` (random initialization) or `imagenet` '
-                     '(pre-training on ImageNet).')
+                     '`None` (random initialization), `imagenet` '
+                     '(pre-training on ImageNet), '
+                     'or the path to the weights file to be loaded.')
 
   if weights == 'imagenet' and include_top and classes != 1000:
     raise ValueError('If using `weights` as imagenet with `include_top`'
                      ' as true, `classes` should be 1000')
 
-  if K.backend() != 'tensorflow':
-    raise RuntimeError('The Xception model is only available with '
-                       'the TensorFlow backend.')
   if K.image_data_format() != 'channels_last':
     logging.warning(
         'The Xception model is only available for the '
@@ -297,9 +298,13 @@ def Xception(include_top=True,
           cache_subdir='models',
           file_hash='b0042744bf5b25fce3cb969f33bebb97')
     model.load_weights(weights_path)
+  elif weights is not None:
+    model.load_weights(weights)
 
   if old_data_format:
     K.set_image_data_format(old_data_format)
+  elif weights is not None:
+    model.load_weights(weights)
   return model
 
 
diff --git a/tensorflow/python/keras/_impl/keras/backend.py b/tensorflow/python/keras/_impl/keras/backend.py
index ec7a5dcffd0c0f0dda90bbc92de54af82680b607..9476085bd8cbc36f63d3c6c8ecad732b557a4f8a 100644
--- a/tensorflow/python/keras/_impl/keras/backend.py
+++ b/tensorflow/python/keras/_impl/keras/backend.py
@@ -3120,8 +3120,8 @@ def sparse_categorical_crossentropy(target, output, from_logits=False):
   logits = array_ops.reshape(output, [-1, int(output_shape[-1])])
   res = nn.sparse_softmax_cross_entropy_with_logits(
       labels=targets, logits=logits)
-  if len(output_shape) == 3:
-    # if our output includes timesteps we need to reshape
+  if len(output_shape) >= 3:
+    # If our output includes timesteps or spatial dimensions we need to reshape
     return array_ops.reshape(res, array_ops.shape(output)[:-1])
   else:
     return res
diff --git a/tensorflow/python/keras/_impl/keras/backend_test.py b/tensorflow/python/keras/_impl/keras/backend_test.py
index e45e566dcac62a2d91c8e6d68caa5c15d8d80244..e34f1b6926a8fd2c472664d330fe3cd9d714f021 100644
--- a/tensorflow/python/keras/_impl/keras/backend_test.py
+++ b/tensorflow/python/keras/_impl/keras/backend_test.py
@@ -22,6 +22,7 @@ import scipy.sparse
 
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.keras._impl import keras
+from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 from tensorflow.python.util import tf_inspect
 
@@ -114,12 +115,19 @@ class BackendUtilsTest(test.TestCase):
     self.assertEqual(keras.backend.get_uid('foo'), 1)
 
   def test_learning_phase(self):
-    with self.test_session():
+    with self.test_session() as sess:
       keras.backend.set_learning_phase(1)
       self.assertEqual(keras.backend.learning_phase(), 1)
       with self.assertRaises(ValueError):
         keras.backend.set_learning_phase(2)
 
+      # Test running with a learning-phase-consuming layer
+      keras.backend.set_learning_phase(0)
+      x = keras.Input((3,))
+      y = keras.layers.BatchNormalization()(x)
+      sess.run(variables.global_variables_initializer())
+      sess.run(y, feed_dict={x: np.random.random((2, 3))})
+
   def test_int_shape(self):
     x = keras.backend.placeholder(shape=(3, 4))
     self.assertEqual(keras.backend.int_shape(x), (3, 4))
diff --git a/tensorflow/python/keras/_impl/keras/callbacks.py b/tensorflow/python/keras/_impl/keras/callbacks.py
index 16109b52b3ad05c1f5dd46f05bef493ce15f4295..8da3b857182237a47daa0f00a2340959a448160e 100644
--- a/tensorflow/python/keras/_impl/keras/callbacks.py
+++ b/tensorflow/python/keras/_impl/keras/callbacks.py
@@ -189,6 +189,7 @@ class Callback(object):
 
   def __init__(self):
     self.validation_data = None
+    self.model = None
 
   def set_params(self, params):
     self.params = params
diff --git a/tensorflow/python/keras/_impl/keras/callbacks_test.py b/tensorflow/python/keras/_impl/keras/callbacks_test.py
index 9c17fbb4a7eb318a91f04a6de8e956c8b2c17545..79dfcd1bb669db09de0cbaa103914efaaf19c6fb 100644
--- a/tensorflow/python/keras/_impl/keras/callbacks_test.py
+++ b/tensorflow/python/keras/_impl/keras/callbacks_test.py
@@ -685,8 +685,8 @@ class KerasCallbacksTest(test.TestCase):
       # fit w/o validation data should raise ValueError if histogram_freq > 0
       cbs = callbacks_factory(histogram_freq=1)
       with self.assertRaises(ValueError):
-        model.fit(x_train, y_train, batch_size=BATCH_SIZE,
-                  callbacks=cbs, epochs=3)
+        model.fit(
+            x_train, y_train, batch_size=BATCH_SIZE, callbacks=cbs, epochs=3)
 
       for cb in cbs:
         cb.on_train_end()
@@ -695,8 +695,8 @@ class KerasCallbacksTest(test.TestCase):
       # histogram_freq > 0
       cbs = callbacks_factory(histogram_freq=1)
       with self.assertRaises(ValueError):
-        model.fit_generator(data_generator(True), len(x_train), epochs=2,
-                            callbacks=cbs)
+        model.fit_generator(
+            data_generator(True), len(x_train), epochs=2, callbacks=cbs)
 
       for cb in cbs:
         cb.on_train_end()
@@ -705,10 +705,13 @@ class KerasCallbacksTest(test.TestCase):
       # histogram_freq > 0
       cbs = callbacks_factory(histogram_freq=1)
       with self.assertRaises(ValueError):
-        model.fit_generator(data_generator(True), len(x_train), epochs=2,
-                            validation_data=data_generator(False),
-                            validation_steps=1,
-                            callbacks=cbs)
+        model.fit_generator(
+            data_generator(True),
+            len(x_train),
+            epochs=2,
+            validation_data=data_generator(False),
+            validation_steps=1,
+            callbacks=cbs)
 
       for cb in cbs:
         cb.on_train_end()
diff --git a/tensorflow/python/keras/_impl/keras/datasets/boston_housing.py b/tensorflow/python/keras/_impl/keras/datasets/boston_housing.py
index 4359be89280f7ffa3479af38cd66ebd3aaf6c30e..5d5d2c4f75003847306aad88a7a1f4804ee48707 100644
--- a/tensorflow/python/keras/_impl/keras/datasets/boston_housing.py
+++ b/tensorflow/python/keras/_impl/keras/datasets/boston_housing.py
@@ -48,7 +48,7 @@ def load_data(path='boston_housing.npz', seed=113, test_split=0.2):
   f.close()
 
   np.random.seed(seed)
-  indices = np.arrange(len(x))
+  indices = np.arange(len(x))
   np.random.shuffle(indices)
   x = x[indices]
   y = y[indices]
diff --git a/tensorflow/python/keras/_impl/keras/datasets/imdb.py b/tensorflow/python/keras/_impl/keras/datasets/imdb.py
index 0e83473899c303e3ad96d253cf31a1def476fa52..7d55ebc8e47c86d2b0e24ea3802012b6e9d1d3a9 100644
--- a/tensorflow/python/keras/_impl/keras/datasets/imdb.py
+++ b/tensorflow/python/keras/_impl/keras/datasets/imdb.py
@@ -43,7 +43,7 @@ def load_data(path='imdb.npz',
           the most frequent words are kept
       skip_top: skip the top N most frequently occurring words
           (which may not be informative).
-      maxlen: truncate sequences after this length.
+      maxlen: sequences longer than this will be filtered out.
       seed: random seed for sample shuffling.
       start_char: The start of a sequence will be marked with this character.
           Set to 1 because 0 is usually the padding character.
@@ -74,12 +74,12 @@ def load_data(path='imdb.npz',
   f.close()
 
   np.random.seed(seed)
-  indices = np.arrange(len(x_train))
+  indices = np.arange(len(x_train))
   np.random.shuffle(indices)
   x_train = x_train[indices]
   labels_train = labels_train[indices]
 
-  indices = np.arrange(len(x_test))
+  indices = np.arange(len(x_test))
   np.random.shuffle(indices)
   x_test = x_test[indices]
   labels_test = labels_test[indices]
diff --git a/tensorflow/python/keras/_impl/keras/datasets/reuters.py b/tensorflow/python/keras/_impl/keras/datasets/reuters.py
index d05eb0ef8caed93963b0059a023a06172d4e9ddb..3fed12b59fc2102fb5d3d30837772f594189082f 100644
--- a/tensorflow/python/keras/_impl/keras/datasets/reuters.py
+++ b/tensorflow/python/keras/_impl/keras/datasets/reuters.py
@@ -73,7 +73,7 @@ def load_data(path='reuters.npz',
   npzfile.close()
 
   np.random.seed(seed)
-  indices = np.arrange(len(xs))
+  indices = np.arange(len(xs))
   np.random.shuffle(indices)
   xs = xs[indices]
   labels = labels[indices]
diff --git a/tensorflow/python/keras/_impl/keras/engine/topology.py b/tensorflow/python/keras/_impl/keras/engine/topology.py
index 4a7bb2e83894f06c433964409ccb2bd3ebfed128..d6e0be8e432eb535a053a4c09fda35a32f6c70f3 100644
--- a/tensorflow/python/keras/_impl/keras/engine/topology.py
+++ b/tensorflow/python/keras/_impl/keras/engine/topology.py
@@ -27,7 +27,6 @@ import numpy as np
 from six.moves import zip  # pylint: disable=redefined-builtin
 
 from tensorflow.python.eager import context
-from tensorflow.python.framework import tensor_shape
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras import constraints
 from tensorflow.python.keras._impl.keras import initializers
@@ -109,7 +108,7 @@ class Layer(tf_base_layers.Layer):
       set_weights(weights)
       get_config()
       count_params()
-      _compute_output_shape(input_shape)
+      compute_output_shape(input_shape)
       compute_mask(x, mask)
       get_input_at(node_index)
       get_output_at(node_index)
@@ -274,7 +273,7 @@ class Layer(tf_base_layers.Layer):
       del self._initial_weights
     return output
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     """Computes the output shape of the layer.
 
     Assumes that the layer will be built
@@ -289,10 +288,13 @@ class Layer(tf_base_layers.Layer):
     Returns:
         An input shape tuple.
     """
-    if isinstance(input_shape, list):
-      return [tensor_shape.TensorShape(shape) for shape in input_shape]
-    else:
-      return tensor_shape.TensorShape(input_shape)
+    logging.warning(
+        'All custom layers should implement the '
+        '`compute_output_shape` method. This layer (' + self.name + ') '
+        'is relying on the base `Layer.compute_output_shape` implementation, '
+        'which will start raising a `NotImplementedError` '
+        'as of July 1st, 2018.')
+    return input_shape
 
   def compute_mask(self, inputs, mask=None):  # pylint: disable=unused-argument
     """Computes an output mask tensor.
@@ -1426,10 +1428,11 @@ def preprocess_weights_for_loading(layer,
         weights[1] = np.transpose(weights[1], (3, 2, 0, 1))
 
   # convert the weights of CuDNNLSTM so that they could be loaded into LSTM
-  if layer.__class__.__name__ == 'LSTM':
+  if layer.__class__.__name__ == 'LSTM' and len(weights) == 3:
     # determine if we're loading a CuDNNLSTM layer from the number of bias
     # weights:
     # CuDNNLSTM has (units * 8) weights; while LSTM has (units * 4)
+    # if there's no bias weight in the file, skip this conversion
     units = weights[1].shape[0]
     bias = weights[2]
     if len(bias) == units * 8:
diff --git a/tensorflow/python/keras/_impl/keras/engine/topology_test.py b/tensorflow/python/keras/_impl/keras/engine/topology_test.py
index 32e692ba7c22007fc717792f05d5511dc6317cca..479ee877fd2471a67b5b5b81e8fbf338ce755a7b 100644
--- a/tensorflow/python/keras/_impl/keras/engine/topology_test.py
+++ b/tensorflow/python/keras/_impl/keras/engine/topology_test.py
@@ -279,7 +279,7 @@ class TopologyConstructionTest(test.TestCase):
 
       model = keras.models.Model(inputs=[a, b], outputs=[c, d], name='model')
       self.assertEqual(len(model.layers), 6)
-      output_shapes = model._compute_output_shape([(None, 32), (None, 32)])
+      output_shapes = model.compute_output_shape([(None, 32), (None, 32)])
       self.assertListEqual(output_shapes[0].as_list(), [None, 64])
       self.assertListEqual(output_shapes[1].as_list(), [None, 5])
       self.assertListEqual(
@@ -360,8 +360,8 @@ class TopologyConstructionTest(test.TestCase):
       self.assertListEqual(
           model.compute_mask([e, f], [None, None]), [None, None])
       self.assertListEqual(
-          final_model._compute_output_shape([(10, 32), (10, 32)]), [(10, 7),
-                                                                    (10, 64)])
+          final_model.compute_output_shape([(10, 32), (10, 32)]), [(10, 7),
+                                                                   (10, 64)])
 
       # run recursive model
       fn = keras.backend.function(final_model.inputs, final_model.outputs)
diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py
index b4205bf4a397690ce6dd3424e0dd4076d9860e9d..debea2503ee2e440000847c0ce92185e3d230138 100644
--- a/tensorflow/python/keras/_impl/keras/engine/training.py
+++ b/tensorflow/python/keras/_impl/keras/engine/training.py
@@ -194,7 +194,7 @@ def _standardize_sample_or_class_weights(x_weight, output_names, weight_type):
     return x_weights
   else:
     raise TypeError('The model has multiple outputs, so `' + weight_type + '` '
-                    'should be either a list of a dict. '
+                    'should be either a list or a dict. '
                     'Provided `' + weight_type + '` type not understood: ' +
                     str(x_weight))
 
@@ -375,7 +375,7 @@ def _make_batches(size, batch_size):
   Returns:
       A list of tuples of array indices.
   """
-  num_batches = int(np.ceil(size / float(batch_size)))
+  num_batches = (size + batch_size - 1) // batch_size  # round up
   return [(i * batch_size, min(size, (i + 1) * batch_size))
           for i in range(num_batches)]
 
@@ -657,7 +657,7 @@ class Model(Network):
     elif isinstance(loss, list):
       if len(loss) != len(self.outputs):
         raise ValueError('When passing a list as loss, '
-                         'it should have one entry per model outputs. '
+                         'it should have one entry per model output. '
                          'The model has ' + str(len(self.outputs)) +
                          ' outputs, but you passed loss=' + str(loss))
       loss_functions = [losses.get(l) for l in loss]
@@ -700,7 +700,7 @@ class Model(Network):
     elif isinstance(loss_weights, list):
       if len(loss_weights) != len(self.outputs):
         raise ValueError('When passing a list as loss_weights, '
-                         'it should have one entry per model outputs. '
+                         'it should have one entry per model output. '
                          'The model has ' + str(len(self.outputs)) +
                          ' outputs, but you passed loss_weights=' +
                          str(loss_weights))
@@ -716,7 +716,7 @@ class Model(Network):
       if isinstance(target_tensors, list):
         if len(target_tensors) != len(self.outputs):
           raise ValueError('When passing a list as `target_tensors`, '
-                           'it should have one entry per model outputs. '
+                           'it should have one entry per model output. '
                            'The model has ' + str(len(self.outputs)) +
                            ' outputs, but you passed target_tensors=' +
                            str(target_tensors))
@@ -789,7 +789,7 @@ class Model(Network):
     elif isinstance(sample_weight_mode, list):
       if len(sample_weight_mode) != len(self.outputs):
         raise ValueError('When passing a list as sample_weight_mode, '
-                         'it should have one entry per model outputs. '
+                         'it should have one entry per model output. '
                          'The model has ' + str(len(self.outputs)) +
                          ' outputs, but you passed '
                          'sample_weight_mode=' + str(sample_weight_mode))
@@ -1414,6 +1414,13 @@ class Model(Network):
                                      self._feed_loss_fns):
       if loss_fn is losses.sparse_categorical_crossentropy:
         output_shapes.append(output_shape[:-1] + (1,))
+      elif (not hasattr(loss_fn, '__name__') or
+            getattr(losses, loss_fn.__name__, None) is None):
+        # If `loss_fn` is not a function (e.g. callable class)
+        # or if it not in the `losses` module, then
+        # it is a user-defined loss and we make no assumptions
+        # about it.
+        output_shapes.append(None)
       else:
         output_shapes.append(output_shape)
     x = _standardize_input_data(
@@ -1919,7 +1926,7 @@ class Model(Network):
 
   def fit_generator(self,
                     generator,
-                    steps_per_epoch,
+                    steps_per_epoch=None,
                     epochs=1,
                     verbose=1,
                     callbacks=None,
@@ -1956,7 +1963,9 @@ class Model(Network):
             to yield from `generator` before declaring one epoch
             finished and starting the next epoch. It should typically
             be equal to the number of unique samples of your dataset
-            divided by the batch size. Not used if using `Sequence`.
+            divided by the batch size.
+            Optional for `Sequence`: if unspecified, will use
+            `len(generator)` as a number of steps.
         epochs: Integer, total number of iterations on the data.
         verbose: Verbosity mode, 0, 1, or 2.
         callbacks: List of callbacks to be called during training.
@@ -1967,11 +1976,15 @@ class Model(Network):
         validation_steps: Only relevant if `validation_data`
             is a generator. Total number of steps (batches of samples)
             to yield from `generator` before stopping.
+            Optional for `Sequence`: if unspecified, will use
+            `len(generator)` as a number of steps.
         class_weight: Dictionary mapping class indices to a weight
             for the class.
-        max_queue_size: Maximum size for the generator queue
-        workers: Maximum number of processes to spin up
-            when using process-based threading.
+        max_queue_size: Maximum size for the generator queue.
+        workers: Integer. Maximum number of processes to spin up
+            when using process based threading.
+            If unspecified, `workers` will default to 1. If 0, will
+            execute the generator on the main thread.
         use_multiprocessing: If True, use process based threading.
             Note that because
             this implementation relies on multiprocessing,
@@ -2031,15 +2044,33 @@ class Model(Network):
     if do_validation:
       self._make_test_function()
 
+    is_sequence = isinstance(generator, Sequence)
+    if not is_sequence and use_multiprocessing and workers > 1:
+      logging.warning('Using a generator with `use_multiprocessing=True`'
+                      ' and multiple workers may duplicate your data.'
+                      ' Please consider using the`keras.utils.Sequence'
+                      ' class.')
+    if steps_per_epoch is None:
+      if is_sequence:
+        steps_per_epoch = len(generator)
+      else:
+        raise ValueError('`steps_per_epoch=None` is only valid for a'
+                         ' generator based on the `keras.utils.Sequence`'
+                         ' class. Please specify `steps_per_epoch` or use'
+                         ' the `keras.utils.Sequence` class.')
+
     # python 2 has 'next', 3 has '__next__'
     # avoid any explicit version checks
-    val_gen = (hasattr(validation_data, 'next') or
-               hasattr(validation_data, '__next__') or
-               isinstance(validation_data, Sequence))
-    if val_gen and not validation_steps:
-      raise ValueError('When using a generator for validation data, '
-                       'you must specify a value for '
-                       '`validation_steps`.')
+    val_gen = (
+        hasattr(validation_data, 'next') or
+        hasattr(validation_data, '__next__') or
+        isinstance(validation_data, Sequence))
+    if (val_gen and not isinstance(validation_data, Sequence) and
+        not validation_steps):
+      raise ValueError('`validation_steps=None` is only valid for a'
+                       ' generator based on the `keras.utils.Sequence`'
+                       ' class. Please specify `validation_steps` or use'
+                       ' the `keras.utils.Sequence` class.')
 
     # Prepare display labels.
     out_labels = self._get_deduped_metrics_names()
@@ -2084,28 +2115,24 @@ class Model(Network):
         val_data += [0.]
       for cbk in callbacks:
         cbk.validation_data = val_data
-    is_sequence = isinstance(generator, Sequence)
-    if not is_sequence and use_multiprocessing and workers > 1:
-      logging.warning(
-          logging.warning('Using a generator with `use_multiprocessing=True`'
-                          ' and multiple workers may duplicate your data.'
-                          ' Please consider using the`keras.utils.Sequence'
-                          ' class.'))
-    if is_sequence:
-      steps_per_epoch = len(generator)
     enqueuer = None
 
     try:
-      if is_sequence:
-        enqueuer = OrderedEnqueuer(
-            generator, use_multiprocessing=use_multiprocessing, shuffle=shuffle)
+      if workers > 0:
+        if is_sequence:
+          enqueuer = OrderedEnqueuer(
+              generator,
+              use_multiprocessing=use_multiprocessing,
+              shuffle=shuffle)
+        else:
+          enqueuer = GeneratorEnqueuer(
+              generator,
+              use_multiprocessing=use_multiprocessing,
+              wait_time=wait_time)
+        enqueuer.start(workers=workers, max_queue_size=max_queue_size)
+        output_generator = enqueuer.get()
       else:
-        enqueuer = GeneratorEnqueuer(
-            generator,
-            use_multiprocessing=use_multiprocessing,
-            wait_time=wait_time)
-      enqueuer.start(workers=workers, max_queue_size=max_queue_size)
-      output_generator = enqueuer.get()
+        output_generator = generator
 
       callback_model.stop_training = False
       while epoch < epochs:
@@ -2119,6 +2146,7 @@ class Model(Network):
             raise ValueError('Output of generator should be '
                              'a tuple `(x, y, sample_weight)` '
                              'or `(x, y)`. Found: ' + str(generator_output))
+
           if len(generator_output) == 2:
             x, y = generator_output
             sample_weight = None
@@ -2196,7 +2224,7 @@ class Model(Network):
 
   def evaluate_generator(self,
                          generator,
-                         steps,
+                         steps=None,
                          max_queue_size=10,
                          workers=1,
                          use_multiprocessing=False,
@@ -2214,10 +2242,13 @@ class Model(Network):
             when using multiprocessing.
         steps: Total number of steps (batches of samples)
             to yield from `generator` before stopping.
-            Not used if using `Sequence`.
+            Optional for `Sequence`: if unspecified, will use
+            the `len(generator)` as a number of steps.
         max_queue_size: maximum size for the generator queue
-        workers: maximum number of processes to spin up
-            when using process-based threading.
+        workers: Integer. Maximum number of processes to spin up
+            when using process based threading.
+            If unspecified, `workers` will default to 1. If 0, will
+            execute the generator on the main thread.
         use_multiprocessing: if True, use process based threading.
             Note that because
             this implementation relies on multiprocessing,
@@ -2258,26 +2289,34 @@ class Model(Network):
     batch_sizes = []
     is_sequence = isinstance(generator, Sequence)
     if not is_sequence and use_multiprocessing and workers > 1:
-      logging.warning(
-          logging.warning('Using a generator with `use_multiprocessing=True`'
-                          ' and multiple workers may duplicate your data.'
-                          ' Please consider using the`keras.utils.Sequence'
-                          ' class.'))
-    if is_sequence:
-      steps = len(generator)
+      logging.warning('Using a generator with `use_multiprocessing=True`'
+                      ' and multiple workers may duplicate your data.'
+                      ' Please consider using the`keras.utils.Sequence'
+                      ' class.')
+    if steps is None:
+      if is_sequence:
+        steps = len(generator)
+      else:
+        raise ValueError('`steps=None` is only valid for a generator'
+                         ' based on the `keras.utils.Sequence` class.'
+                         ' Please specify `steps` or use the'
+                         ' `keras.utils.Sequence` class.')
     enqueuer = None
 
     try:
-      if is_sequence:
-        enqueuer = OrderedEnqueuer(
-            generator, use_multiprocessing=use_multiprocessing)
+      if workers > 0:
+        if is_sequence:
+          enqueuer = OrderedEnqueuer(
+              generator, use_multiprocessing=use_multiprocessing)
+        else:
+          enqueuer = GeneratorEnqueuer(
+              generator,
+              use_multiprocessing=use_multiprocessing,
+              wait_time=wait_time)
+        enqueuer.start(workers=workers, max_queue_size=max_queue_size)
+        output_generator = enqueuer.get()
       else:
-        enqueuer = GeneratorEnqueuer(
-            generator,
-            use_multiprocessing=use_multiprocessing,
-            wait_time=wait_time)
-      enqueuer.start(workers=workers, max_queue_size=max_queue_size)
-      output_generator = enqueuer.get()
+        output_generator = generator
 
       while steps_done < steps:
         generator_output = next(output_generator)
@@ -2297,11 +2336,11 @@ class Model(Network):
         outs = self.test_on_batch(x, y, sample_weight=sample_weight)
 
         if isinstance(x, list):
-          batch_size = len(x[0])
+          batch_size = x[0].shape[0]
         elif isinstance(x, dict):
-          batch_size = len(list(x.values())[0])
+          batch_size = list(x.values())[0].shape[0]
         else:
-          batch_size = len(x)
+          batch_size = x.shape[0]
         if batch_size == 0:
           raise ValueError('Received an empty batch. '
                            'Batches should at least contain one item.')
@@ -2325,7 +2364,7 @@ class Model(Network):
 
   def predict_generator(self,
                         generator,
-                        steps,
+                        steps=None,
                         max_queue_size=10,
                         workers=1,
                         use_multiprocessing=False,
@@ -2343,10 +2382,13 @@ class Model(Network):
                 when using multiprocessing.
         steps: Total number of steps (batches of samples)
             to yield from `generator` before stopping.
+            Optional for `Sequence`: if unspecified, will use
+            the `len(generator)` as a number of steps.
         max_queue_size: Maximum size for the generator queue.
-          Not used if using `Sequence`.
-        workers: Maximum number of processes to spin up
-            when using process-based threading.
+        workers: Integer. Maximum number of processes to spin up
+            when using process based threading.
+            If unspecified, `workers` will default to 1. If 0, will
+            execute the generator on the main thread.
         use_multiprocessing: If `True`, use process based threading.
             Note that because
             this implementation relies on multiprocessing,
@@ -2382,26 +2424,34 @@ class Model(Network):
     all_outs = []
     is_sequence = isinstance(generator, Sequence)
     if not is_sequence and use_multiprocessing and workers > 1:
-      logging.warning(
-          logging.warning('Using a generator with `use_multiprocessing=True`'
-                          ' and multiple workers may duplicate your data.'
-                          ' Please consider using the`keras.utils.Sequence'
-                          ' class.'))
-    if is_sequence:
-      steps = len(generator)
+      logging.warn('Using a generator with `use_multiprocessing=True`'
+                   ' and multiple workers may duplicate your data.'
+                   ' Please consider using the`keras.utils.Sequence'
+                   ' class.')
+    if steps is None:
+      if is_sequence:
+        steps = len(generator)
+      else:
+        raise ValueError('`steps=None` is only valid for a generator'
+                         ' based on the `keras.utils.Sequence` class.'
+                         ' Please specify `steps` or use the'
+                         ' `keras.utils.Sequence` class.')
     enqueuer = None
 
     try:
-      if is_sequence:
-        enqueuer = OrderedEnqueuer(
-            generator, use_multiprocessing=use_multiprocessing)
+      if workers > 0:
+        if is_sequence:
+          enqueuer = OrderedEnqueuer(
+              generator, use_multiprocessing=use_multiprocessing)
+        else:
+          enqueuer = GeneratorEnqueuer(
+              generator,
+              use_multiprocessing=use_multiprocessing,
+              wait_time=wait_time)
+        enqueuer.start(workers=workers, max_queue_size=max_queue_size)
+        output_generator = enqueuer.get()
       else:
-        enqueuer = GeneratorEnqueuer(
-            generator,
-            use_multiprocessing=use_multiprocessing,
-            wait_time=wait_time)
-      enqueuer.start(workers=workers, max_queue_size=max_queue_size)
-      output_generator = enqueuer.get()
+        output_generator = generator
 
       if verbose == 1:
         progbar = Progbar(target=steps)
diff --git a/tensorflow/python/keras/_impl/keras/engine/training_test.py b/tensorflow/python/keras/_impl/keras/engine/training_test.py
index 17a26f978e24776baee77182e1f901e3ee1091c8..7650bfb6e80aa581f7c14f3c693106bcd6e73740 100644
--- a/tensorflow/python/keras/_impl/keras/engine/training_test.py
+++ b/tensorflow/python/keras/_impl/keras/engine/training_test.py
@@ -399,7 +399,7 @@ class LossWeightingTest(test.TestCase):
       model.add(keras.layers.Activation('softmax'))
       model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
 
-      np.random.seed(1337)
+      np.random.seed(43)
       (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data(
           train_samples=train_samples,
           test_samples=test_samples,
@@ -836,6 +836,11 @@ class TestGeneratorMethods(test.TestCase):
                             use_multiprocessing=False,
                             validation_data=custom_generator(),
                             validation_steps=10)
+        model.fit_generator(custom_generator(),
+                            steps_per_epoch=5,
+                            validation_data=custom_generator(),
+                            validation_steps=1,
+                            workers=0)
         model.predict_generator(custom_generator(),
                                 steps=5,
                                 max_queue_size=10,
@@ -845,6 +850,10 @@ class TestGeneratorMethods(test.TestCase):
                                 steps=5,
                                 max_queue_size=10,
                                 use_multiprocessing=False)
+        model.predict_generator(custom_generator(),
+                                steps=5,
+                                max_queue_size=10,
+                                workers=0)
         model.evaluate_generator(custom_generator(),
                                  steps=5,
                                  max_queue_size=10,
@@ -854,6 +863,11 @@ class TestGeneratorMethods(test.TestCase):
                                  steps=5,
                                  max_queue_size=10,
                                  use_multiprocessing=False)
+        model.evaluate_generator(custom_generator(),
+                                 steps=5,
+                                 max_queue_size=10,
+                                 use_multiprocessing=False,
+                                 workers=0)
 
         # Test legacy API
         model.fit_generator(custom_generator(),
@@ -1439,4 +1453,13 @@ class TestTrainingWithDataTensors(test.TestCase):
 
 
 if __name__ == '__main__':
+  # Bazel sets these environment variables to very long paths.
+  # Tempfile uses them to create long paths, and in turn multiprocessing
+  # library tries to create sockets named after paths. Delete whatever bazel
+  # writes to these to avoid tests failing due to socket addresses being too
+  # long.
+  for var in ('TMPDIR', 'TMP', 'TEMP'):
+    if var in os.environ:
+      del os.environ[var]
+
   test.main()
diff --git a/tensorflow/python/keras/_impl/keras/estimator.py b/tensorflow/python/keras/_impl/keras/estimator.py
index a1dfa81a79a445d900347aa55e2c848c70d83085..624e92a04b8860d9a3974f2edb4a443482958259 100644
--- a/tensorflow/python/keras/_impl/keras/estimator.py
+++ b/tensorflow/python/keras/_impl/keras/estimator.py
@@ -23,23 +23,32 @@ import os
 
 from tensorflow.python.client import session
 from tensorflow.python.estimator import estimator as estimator_lib
-from tensorflow.python.estimator import model_fn as model_fn_lib
 from tensorflow.python.estimator import export as export_lib
+from tensorflow.python.estimator import model_fn as model_fn_lib
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
 from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras import models
 from tensorflow.python.keras._impl.keras.utils.generic_utils import CustomObjectScope
+from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import metrics as metrics_module
 from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.saved_model import signature_constants
 from tensorflow.python.training import saver as saver_lib
 from tensorflow.python.training import training_util
-from tensorflow.python.saved_model import signature_constants
 
 _DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
 
 
+def _cast_tensor_to_floatx(x):
+  """Cast tensor to keras's floatx dtype if it is not already the same dtype."""
+  if x.dtype == K.floatx():
+    return x
+  else:
+    return math_ops.cast(x, K.floatx())
+
+
 def _create_ordered_io(keras_model, estimator_io_dict, is_input=True):
   """Create a list of tensors from IO dictionary based on Keras IO order.
 
@@ -68,7 +77,7 @@ def _create_ordered_io(keras_model, estimator_io_dict, is_input=True):
                                         ', '.join(keras_io_names)))
   tensors = []
   for io_name in keras_io_names:
-    tensors.append(estimator_io_dict[io_name])
+    tensors.append(_cast_tensor_to_floatx(estimator_io_dict[io_name]))
   return tensors
 
 
@@ -116,7 +125,8 @@ def _clone_and_build_model(mode,
       target_tensors = _create_ordered_io(keras_model, labels, is_input=False)
     else:
       target_tensors = [
-          sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(labels)
+          _cast_tensor_to_floatx(
+              sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(labels))
       ]
 
     model.compile(
@@ -191,7 +201,8 @@ def _create_keras_model_fn(keras_model, custom_objects=None):
         train_op=train_op,
         eval_metric_ops=eval_metric_ops,
         export_outputs={
-            _DEFAULT_SERVING_KEY: export_lib.export_output.PredictOutput(predictions)
+            _DEFAULT_SERVING_KEY:
+            export_lib.export_output.PredictOutput(predictions)
         })
 
   return model_fn
diff --git a/tensorflow/python/keras/_impl/keras/estimator_test.py b/tensorflow/python/keras/_impl/keras/estimator_test.py
index a7ea3b48a33d4e2d485dd5ca40e39a6f3387facb..9fc48b4117e7ee2c717d5418754254aa02b82869 100644
--- a/tensorflow/python/keras/_impl/keras/estimator_test.py
+++ b/tensorflow/python/keras/_impl/keras/estimator_test.py
@@ -25,8 +25,6 @@ import numpy as np
 
 from tensorflow.python.estimator import run_config as run_config_lib
 from tensorflow.python.estimator.inputs import numpy_io
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras._impl import keras
 from tensorflow.python.keras._impl.keras import testing_utils
@@ -80,22 +78,17 @@ def get_resource_for_simple_model(is_sequential, is_evaluate):
   y_test = keras.utils.to_categorical(y_test)
 
   train_input_fn = numpy_io.numpy_input_fn(
-      x={input_name: np.array(x_train, dtype=np.float32)},
-      y=np.array(y_train, dtype=np.float32),
+      x={input_name: x_train},
+      y=y_train,
       shuffle=False,
       num_epochs=None,
       batch_size=16)
 
   evaluate_input_fn = numpy_io.numpy_input_fn(
-      x={input_name: np.array(x_test, dtype=np.float32)},
-      y=np.array(y_test, dtype=np.float32),
-      num_epochs=1,
-      shuffle=False)
+      x={input_name: x_test}, y=y_test, num_epochs=1, shuffle=False)
 
   predict_input_fn = numpy_io.numpy_input_fn(
-      x={input_name: np.array(x_test, dtype=np.float32)},
-      num_epochs=1,
-      shuffle=False)
+      x={input_name: x_test}, num_epochs=1, shuffle=False)
 
   inference_input_fn = evaluate_input_fn if is_evaluate else predict_input_fn
 
@@ -243,41 +236,13 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
     d_test = keras.utils.to_categorical(d_test)
 
     def train_input_fn():
-      input_dict = {
-          'input_a':
-              ops.convert_to_tensor(
-                  np.array(a_train, dtype=np.float32), dtype=dtypes.float32),
-          'input_b':
-              ops.convert_to_tensor(
-                  np.array(b_train, dtype=np.float32), dtype=dtypes.float32)
-      }
-      output_dict = {
-          'dense_2':
-              ops.convert_to_tensor(
-                  np.array(c_train, dtype=np.float32), dtype=dtypes.float32),
-          'dense_3':
-              ops.convert_to_tensor(
-                  np.array(d_train, dtype=np.float32), dtype=dtypes.float32)
-      }
+      input_dict = {'input_a': a_train, 'input_b': b_train}
+      output_dict = {'dense_2': c_train, 'dense_3': d_train}
       return input_dict, output_dict
 
     def eval_input_fn():
-      input_dict = {
-          'input_a':
-              ops.convert_to_tensor(
-                  np.array(a_test, dtype=np.float32), dtype=dtypes.float32),
-          'input_b':
-              ops.convert_to_tensor(
-                  np.array(b_test, dtype=np.float32), dtype=dtypes.float32)
-      }
-      output_dict = {
-          'dense_2':
-              ops.convert_to_tensor(
-                  np.array(c_test, dtype=np.float32), dtype=dtypes.float32),
-          'dense_3':
-              ops.convert_to_tensor(
-                  np.array(d_test, dtype=np.float32), dtype=dtypes.float32)
-      }
+      input_dict = {'input_a': a_test, 'input_b': b_test}
+      output_dict = {'dense_2': c_test, 'dense_3': d_test}
       return input_dict, output_dict
 
     with self.test_session():
@@ -347,26 +312,12 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
     y_train = keras.utils.to_categorical(y_train)
 
     def invald_input_name_input_fn():
-      input_dict = {
-          'invalid_input_name':
-              ops.convert_to_tensor(
-                  np.array(x_train, dtype=np.float32), dtype=dtypes.float32),
-      }
-      output = ops.convert_to_tensor(
-          np.array(y_train, dtype=np.float32), dtype=dtypes.float32)
-      return input_dict, output
+      input_dict = {'invalid_input_name': x_train}
+      return input_dict, y_train
 
     def invald_output_name_input_fn():
-      input_dict = {
-          'input_1':
-              ops.convert_to_tensor(
-                  np.array(x_train, dtype=np.float32), dtype=dtypes.float32),
-      }
-      output_dict = {
-          'invalid_output_name':
-              ops.convert_to_tensor(
-                  np.array(y_train, dtype=np.float32), dtype=dtypes.float32),
-      }
+      input_dict = {'input_1': x_train}
+      output_dict = {'invalid_output_name': y_train}
       return input_dict, output_dict
 
     model = simple_functional_model()
diff --git a/tensorflow/python/keras/_impl/keras/layers/advanced_activations.py b/tensorflow/python/keras/_impl/keras/layers/advanced_activations.py
index 1cb881a13f348fedc55ee48518a54b852d680876..e4b9afd38aa21924693f32b5d0fdf64a97019bce 100644
--- a/tensorflow/python/keras/_impl/keras/layers/advanced_activations.py
+++ b/tensorflow/python/keras/_impl/keras/layers/advanced_activations.py
@@ -61,6 +61,9 @@ class LeakyReLU(Layer):
     base_config = super(LeakyReLU, self).get_config()
     return dict(list(base_config.items()) + list(config.items()))
 
+  def compute_output_shape(self, input_shape):
+    return input_shape
+
 
 class PReLU(Layer):
   """Parametric Rectified Linear Unit.
@@ -143,6 +146,9 @@ class PReLU(Layer):
       neg = -self.alpha * K.relu(-inputs)
     return pos + neg
 
+  def compute_output_shape(self, input_shape):
+    return input_shape
+
   def get_config(self):
     config = {
         'alpha_initializer': initializers.serialize(self.alpha_initializer),
@@ -182,6 +188,9 @@ class ELU(Layer):
   def call(self, inputs):
     return K.elu(inputs, self.alpha)
 
+  def compute_output_shape(self, input_shape):
+    return input_shape
+
   def get_config(self):
     config = {'alpha': float(self.alpha)}
     base_config = super(ELU, self).get_config()
@@ -216,6 +225,9 @@ class ThresholdedReLU(Layer):
   def call(self, inputs, mask=None):
     return inputs * K.cast(inputs > self.theta, K.floatx())
 
+  def compute_output_shape(self, input_shape):
+    return input_shape
+
   def get_config(self):
     config = {'theta': float(self.theta)}
     base_config = super(ThresholdedReLU, self).get_config()
diff --git a/tensorflow/python/keras/_impl/keras/layers/convolutional.py b/tensorflow/python/keras/_impl/keras/layers/convolutional.py
index 1cbae9126317479c808730ad89e86d42ae201bc6..22496e8a765d4e86e7ef7ac5a25e6f4af94a28ce 100644
--- a/tensorflow/python/keras/_impl/keras/layers/convolutional.py
+++ b/tensorflow/python/keras/_impl/keras/layers/convolutional.py
@@ -894,7 +894,7 @@ class UpSampling1D(Layer):
     self.size = int(size)
     self.input_spec = InputSpec(ndim=3)
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     size = self.size * input_shape[1] if input_shape[1] is not None else None
     return tensor_shape.TensorShape([input_shape[0], size, input_shape[2]])
@@ -950,7 +950,7 @@ class UpSampling2D(Layer):
     self.size = conv_utils.normalize_tuple(size, 2, 'size')
     self.input_spec = InputSpec(ndim=4)
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     if self.data_format == 'channels_first':
       height = self.size[0] * input_shape[
@@ -1017,7 +1017,7 @@ class UpSampling3D(Layer):
     self.input_spec = InputSpec(ndim=5)
     super(UpSampling3D, self).__init__(**kwargs)
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     if self.data_format == 'channels_first':
       dim1 = self.size[0] * input_shape[
@@ -1072,7 +1072,7 @@ class ZeroPadding1D(Layer):
     self.padding = conv_utils.normalize_tuple(padding, 2, 'padding')
     self.input_spec = InputSpec(ndim=3)
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     if input_shape[1] is not None:
       length = input_shape[1] + self.padding[0] + self.padding[1]
     else:
@@ -1154,7 +1154,7 @@ class ZeroPadding2D(Layer):
                        'Found: ' + str(padding))
     self.input_spec = InputSpec(ndim=4)
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     if self.data_format == 'channels_first':
       if input_shape[2] is not None:
@@ -1263,7 +1263,7 @@ class ZeroPadding3D(Layer):
           'Found: ' + str(padding))
     self.input_spec = InputSpec(ndim=5)
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     if self.data_format == 'channels_first':
       if input_shape[2] is not None:
@@ -1330,7 +1330,7 @@ class Cropping1D(Layer):
     self.cropping = conv_utils.normalize_tuple(cropping, 2, 'cropping')
     self.input_spec = InputSpec(ndim=3)
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     if input_shape[1] is not None:
       length = input_shape[1] - self.cropping[0] - self.cropping[1]
@@ -1428,7 +1428,7 @@ class Cropping2D(Layer):
                        'Found: ' + str(cropping))
     self.input_spec = InputSpec(ndim=4)
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     # pylint: disable=invalid-unary-operand-type
     if self.data_format == 'channels_first':
@@ -1560,7 +1560,7 @@ class Cropping3D(Layer):
           'Found: ' + str(cropping))
     self.input_spec = InputSpec(ndim=5)
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     # pylint: disable=invalid-unary-operand-type
     if self.data_format == 'channels_first':
diff --git a/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py b/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py
index c88122ce1887c4cb93efadc82f504792c862941d..4f0e9fc691133ae7f9a7834e17379cb8e25a8a2c 100644
--- a/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py
+++ b/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py
@@ -127,7 +127,7 @@ class ConvRecurrent2D(Recurrent):
     self.input_spec = [InputSpec(ndim=5)]
     self.state_spec = None
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     if isinstance(input_shape, list):
       input_shape = input_shape[0]
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
@@ -467,9 +467,9 @@ class ConvLSTM2D(ConvRecurrent2D):
                        'Got input shape: ' + str(input_shape))
 
     if self.return_state:
-      output_shape = tuple(self._compute_output_shape(input_shape)[0].as_list())
+      output_shape = tuple(self.compute_output_shape(input_shape)[0].as_list())
     else:
-      output_shape = tuple(self._compute_output_shape(input_shape).as_list())
+      output_shape = tuple(self.compute_output_shape(input_shape).as_list())
     if self.return_sequences:
       output_shape = (input_shape[0],) + output_shape[2:]
     else:
diff --git a/tensorflow/python/keras/_impl/keras/layers/core.py b/tensorflow/python/keras/_impl/keras/layers/core.py
index 517129fab05a504245725032e715b624a3b975a7..6ee3fb48b2f1426b87c5c1947e90d0797e9b9ff7 100644
--- a/tensorflow/python/keras/_impl/keras/layers/core.py
+++ b/tensorflow/python/keras/_impl/keras/layers/core.py
@@ -79,6 +79,9 @@ class Masking(Layer):
         K.not_equal(inputs, self.mask_value), axis=-1, keepdims=True)
     return inputs * K.cast(boolean_mask, inputs.dtype)
 
+  def compute_output_shape(self, input_shape):
+    return input_shape
+
   def get_config(self):
     config = {'mask_value': self.mask_value}
     base_config = super(Masking, self).get_config()
@@ -104,13 +107,13 @@ class Dropout(tf_core_layers.Dropout, Layer):
   """
 
   def __init__(self, rate, noise_shape=None, seed=None, **kwargs):
-    self.supports_masking = True
     # Inheritance call order:
     # 1) tf.layers.Dropout, 2) keras.layers.Layer, 3) tf.layers.Layer
     super(Dropout, self).__init__(rate=rate,
                                   noise_shape=noise_shape,
                                   seed=seed,
                                   **kwargs)
+    self.supports_masking = True
 
   def call(self, inputs, training=None):
     if training is None:
@@ -295,6 +298,9 @@ class Activation(Layer):
   def call(self, inputs):
     return self.activation(inputs)
 
+  def compute_output_shape(self, input_shape):
+    return input_shape
+
   def get_config(self):
     config = {'activation': activations.serialize(self.activation)}
     base_config = super(Activation, self).get_config()
@@ -385,7 +391,7 @@ class Reshape(Layer):
       raise ValueError(msg)
     return output_shape
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     if None in input_shape[1:]:
       output_shape = [input_shape[0]]
@@ -441,7 +447,7 @@ class Permute(Layer):
     self.dims = tuple(dims)
     self.input_spec = InputSpec(ndim=len(self.dims) + 1)
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     output_shape = copy.copy(input_shape)
     for i, dim in enumerate(self.dims):
@@ -507,7 +513,7 @@ class RepeatVector(Layer):
     self.n = n
     self.input_spec = InputSpec(ndim=2)
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     return tensor_shape.TensorShape([input_shape[0], self.n, input_shape[1]])
 
@@ -547,8 +553,19 @@ class Lambda(Layer):
   Arguments:
       function: The function to be evaluated.
           Takes input tensor as first argument.
+      output_shape: Expected output shape from function.
+            This argument can be inferred if not explicitly provided.
+            Can be a tuple or function.
+            If a tuple, it only specifies the first dimension onward;
+                 sample dimension is assumed either the same as the input:
+                 `output_shape = (input_shape[0], ) + output_shape`
+                 or, the input is `None` and
+                 the sample dimension is also `None`:
+                 `output_shape = (None, ) + output_shape`
+            If a function, it specifies the entire shape as a function of the
+            input shape: `output_shape = f(input_shape)`
       arguments: optional dictionary of keyword arguments to be passed
-          to the function.
+            to the function.
 
   Input shape:
       Arbitrary. Use the keyword argument input_shape
@@ -557,16 +574,52 @@ class Lambda(Layer):
 
   Output shape:
       Specified by `output_shape` argument
-      (or auto-inferred when using TensorFlow).
   """
 
-  def __init__(self, function, mask=None, arguments=None, **kwargs):
+  def __init__(self, function, output_shape=None, mask=None, arguments=None,
+               **kwargs):
     super(Lambda, self).__init__(**kwargs)
     self.function = function
     self.arguments = arguments if arguments else {}
     if mask is not None:
       self.supports_masking = True
     self.mask = mask
+    if output_shape is None:
+      self._output_shape = None
+    elif isinstance(output_shape, (tuple, list)):
+      self._output_shape = tuple(output_shape)
+    else:
+      if not callable(output_shape):
+        raise TypeError('In Lambda, `output_shape` '
+                        'must be a list, a tuple, or a function.')
+      self._output_shape = output_shape
+
+  def _compute_output_shape(self, input_shape):
+    input_shape = tuple(tensor_shape.TensorShape(input_shape).as_list())
+
+    if self._output_shape is None:
+      x = K.placeholder(shape=input_shape)
+      x = self.call(x)
+      if isinstance(x, list):
+        return [tensor_shape.TensorShape(K.int_shape(x_elem)) for x_elem in x]
+      else:
+        return tensor_shape.TensorShape(K.int_shape(x))
+    elif isinstance(self._output_shape, (tuple, list)):
+      if isinstance(input_shape, list):
+        num_samples = input_shape[0][0]
+      else:
+        num_samples = input_shape[0] if input_shape else None
+      return tensor_shape.TensorShape((num_samples,) +
+                                      tuple(self._output_shape))
+    else:
+      shape = self._output_shape(input_shape)
+      if not isinstance(shape, (list, tuple)):
+        raise ValueError(
+            '`output_shape` function must return a tuple or a list of tuples.')
+      if isinstance(shape, list):
+        if isinstance(shape[0], int) or shape[0] is None:
+          shape = tuple(shape)
+      return tensor_shape.TensorShape(shape)
 
   def call(self, inputs, mask=None):
     arguments = self.arguments
@@ -587,9 +640,21 @@ class Lambda(Layer):
       function = self.function.__name__
       function_type = 'function'
 
+    if isinstance(self._output_shape, python_types.LambdaType):
+      output_shape = func_dump(self._output_shape)
+      output_shape_type = 'lambda'
+    elif callable(self._output_shape):
+      output_shape = self._output_shape.__name__
+      output_shape_type = 'function'
+    else:
+      output_shape = self._output_shape
+      output_shape_type = 'raw'
+
     config = {
         'function': function,
         'function_type': function_type,
+        'output_shape': output_shape,
+        'output_shape_type': output_shape_type,
         'arguments': self.arguments
     }
     base_config = super(Lambda, self).get_config()
@@ -614,6 +679,19 @@ class Lambda(Layer):
     else:
       raise TypeError('Unknown function type:', function_type)
 
+    output_shape_type = config.pop('output_shape_type')
+    if output_shape_type == 'function':
+      # Simple lookup in custom objects
+      output_shape = deserialize_keras_object(
+          config['output_shape'],
+          custom_objects=custom_objects,
+          printable_module_name='output_shape function in Lambda layer')
+    elif output_shape_type == 'lambda':
+      # Unsafe deserialization from bytecode
+      output_shape = func_load(config['output_shape'], globs=globs)
+    else:
+      output_shape = config['output_shape']
+
     # If arguments were numpy array, they have been saved as
     # list. We need to recover the ndarray
     if 'arguments' in config:
@@ -625,6 +703,7 @@ class Lambda(Layer):
             config['arguments'][key] = np.array(arg_dict['value'])
 
     config['function'] = function
+    config['output_shape'] = output_shape
     return cls(**config)
 
 
@@ -755,6 +834,9 @@ class ActivityRegularization(Layer):
     self.l1 = l1
     self.l2 = l2
 
+  def compute_output_shape(self, input_shape):
+    return input_shape
+
   def get_config(self):
     config = {'l1': self.l1, 'l2': self.l2}
     base_config = super(ActivityRegularization, self).get_config()
diff --git a/tensorflow/python/keras/_impl/keras/layers/core_test.py b/tensorflow/python/keras/_impl/keras/layers/core_test.py
index dd768dc268ef6b39f64b522fd88393610c832287..bdb99c91c289cf808fec7b891376dbfcf5504aca 100644
--- a/tensorflow/python/keras/_impl/keras/layers/core_test.py
+++ b/tensorflow/python/keras/_impl/keras/layers/core_test.py
@@ -47,6 +47,11 @@ class CoreLayersTest(test.TestCase):
                   'noise_shape': [3, 1]},
           input_shape=(3, 2))
 
+    # https://github.com/tensorflow/tensorflow/issues/14819
+    with self.test_session():
+      dropout = keras.layers.Dropout(0.5)
+      self.assertEqual(True, dropout.supports_masking)
+
     with self.test_session():
       testing_utils.layer_test(
           keras.layers.SpatialDropout1D,
@@ -220,6 +225,34 @@ class CoreLayersTest(test.TestCase):
       self.assertEqual(1, len(layer.losses))
       _ = layer.get_config()
 
+  def test_lambda_output_shape(self):
+    with self.test_session():
+      l = keras.layers.Lambda(lambda x: x + 1, output_shape=(1, 1))
+      l(keras.backend.variable(np.ones((1, 1))))
+      self.assertEqual((1, 1), l.get_config()['output_shape'])
+
+  def test_lambda_output_shape_function(self):
+    def get_output_shape(input_shape):
+      return 1 * input_shape
+
+    with self.test_session():
+      l = keras.layers.Lambda(lambda x: x + 1, output_shape=get_output_shape)
+      l(keras.backend.variable(np.ones((1, 1))))
+      self.assertEqual('lambda', l.get_config()['output_shape_type'])
+
+  def test_lambda_config_serialization(self):
+    with self.test_session():
+      # test serialization with output_shape and output_shape_type
+      layer = keras.layers.Lambda(lambda x: x + 1, output_shape=(1, 1))
+      layer(keras.backend.variable(np.ones((1, 1))))
+      config = layer.get_config()
+      layer = keras.layers.deserialize({
+          'class_name': 'Lambda',
+          'config': config
+      })
+
+      layer = keras.layers.Lambda.from_config(config)
 
 if __name__ == '__main__':
   test.main()
+
diff --git a/tensorflow/python/keras/_impl/keras/layers/embeddings.py b/tensorflow/python/keras/_impl/keras/layers/embeddings.py
index 3ac5e5661e192f9583b228df05a5e37545d388fe..51c520be38f5fac32fec9e4a13c380a2e477c709 100644
--- a/tensorflow/python/keras/_impl/keras/layers/embeddings.py
+++ b/tensorflow/python/keras/_impl/keras/layers/embeddings.py
@@ -129,7 +129,7 @@ class Embedding(Layer):
     else:
       return K.not_equal(inputs, 0)
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     if self.input_length is None:
       return tensor_shape.TensorShape(input_shape + [self.output_dim])
diff --git a/tensorflow/python/keras/_impl/keras/layers/local.py b/tensorflow/python/keras/_impl/keras/layers/local.py
index bf1d495b9dda6302f95094fbda40fc5a6b5f79ed..0a31b87fb564b2833c0dea1ebb3a977b07f13a24 100644
--- a/tensorflow/python/keras/_impl/keras/layers/local.py
+++ b/tensorflow/python/keras/_impl/keras/layers/local.py
@@ -146,7 +146,7 @@ class LocallyConnected1D(Layer):
     self.input_spec = InputSpec(ndim=3, axes={2: input_dim})
     self.built = True
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     length = conv_utils.conv_output_length(input_shape[1], self.kernel_size[0],
                                            self.padding, self.strides[0])
@@ -337,7 +337,7 @@ class LocallyConnected2D(Layer):
       self.input_spec = InputSpec(ndim=4, axes={-1: input_filter})
     self.built = True
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     if self.data_format == 'channels_first':
       rows = input_shape[2]
diff --git a/tensorflow/python/keras/_impl/keras/layers/merge.py b/tensorflow/python/keras/_impl/keras/layers/merge.py
index 888be2736934c314474bdc9259498fa2b415a4db..76eb03cf274a648da127b9d3e0c911096d361812 100644
--- a/tensorflow/python/keras/_impl/keras/layers/merge.py
+++ b/tensorflow/python/keras/_impl/keras/layers/merge.py
@@ -172,7 +172,7 @@ class _Merge(Layer):
     else:
       return self._merge_function(inputs)
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     if input_shape[0] is None:
       output_shape = None
     else:
@@ -358,7 +358,7 @@ class Concatenate(_Merge):
                        'on a list of inputs.')
     return K.concatenate(inputs, axis=self.axis)
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     if not isinstance(input_shape, list):
       raise ValueError('A `Concatenate` layer should be called '
                        'on a list of inputs.')
@@ -485,7 +485,7 @@ class Dot(_Merge):
     output = K.batch_dot(x1, x2, axes)
     return output
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     if not isinstance(input_shape, list) or len(input_shape) != 2:
       raise ValueError('A `Dot` layer should be called '
                        'on a list of 2 inputs.')
diff --git a/tensorflow/python/keras/_impl/keras/layers/merge_test.py b/tensorflow/python/keras/_impl/keras/layers/merge_test.py
index 1f34c367e4b7593a9a7c7d320cdc1d8d75c4959e..bb03dda1fc645222c1ced97cfce8d459586dd89d 100644
--- a/tensorflow/python/keras/_impl/keras/layers/merge_test.py
+++ b/tensorflow/python/keras/_impl/keras/layers/merge_test.py
@@ -188,9 +188,9 @@ class MergeLayersTest(test.TestCase):
       self.assertEqual(out.shape, (2, 1))
       self.assertAllClose(out, expected, atol=1e-4)
 
-      # test _compute_output_shape
+      # test compute_output_shape
       layer = keras.layers.Dot(axes=-1)
-      self.assertEqual(layer._compute_output_shape([(4, 5), (4, 5)]), (4, 1))
+      self.assertEqual(layer.compute_output_shape([(4, 5), (4, 5)]), (4, 1))
 
   def test_dot_errors(self):
     i1 = keras.layers.Input(shape=(4, 5))
@@ -206,7 +206,7 @@ class MergeLayersTest(test.TestCase):
       keras.layers.dot([i1, i2, i3], axes=-1)
     with self.assertRaises(ValueError):
       dot = keras.layers.Dot(1)
-      dot._compute_output_shape(1)
+      dot.compute_output_shape(1)
 
   def test_merge_subtract(self):
     i1 = keras.layers.Input(shape=(4, 5))
diff --git a/tensorflow/python/keras/_impl/keras/layers/noise.py b/tensorflow/python/keras/_impl/keras/layers/noise.py
index 9caa8b7024aa31118802a5bac5edac756dccc0f9..459f13145f090f1942543ec2f5da4e9b8cd71509 100644
--- a/tensorflow/python/keras/_impl/keras/layers/noise.py
+++ b/tensorflow/python/keras/_impl/keras/layers/noise.py
@@ -59,6 +59,9 @@ class GaussianNoise(Layer):
 
     return K.in_train_phase(noised, inputs, training=training)
 
+  def compute_output_shape(self, input_shape):
+    return input_shape
+
   def get_config(self):
     config = {'stddev': self.stddev}
     base_config = super(GaussianNoise, self).get_config()
@@ -105,6 +108,9 @@ class GaussianDropout(Layer):
       return K.in_train_phase(noised, inputs, training=training)
     return inputs
 
+  def compute_output_shape(self, input_shape):
+    return input_shape
+
   def get_config(self):
     config = {'rate': self.rate}
     base_config = super(GaussianDropout, self).get_config()
@@ -167,6 +173,9 @@ class AlphaDropout(Layer):
       return K.in_train_phase(dropped_inputs, inputs, training=training)
     return inputs
 
+  def compute_output_shape(self, input_shape):
+    return input_shape
+
   def get_config(self):
     config = {'rate': self.rate}
     base_config = super(AlphaDropout, self).get_config()
diff --git a/tensorflow/python/keras/_impl/keras/layers/pooling.py b/tensorflow/python/keras/_impl/keras/layers/pooling.py
index afe4ebfdc5305a91dc287203d56a9b389b468663..b133e2dfaf1bcacd055f6a597bd557f696469ffc 100644
--- a/tensorflow/python/keras/_impl/keras/layers/pooling.py
+++ b/tensorflow/python/keras/_impl/keras/layers/pooling.py
@@ -351,7 +351,7 @@ class _GlobalPooling1D(Layer):
     super(_GlobalPooling1D, self).__init__(**kwargs)
     self.input_spec = InputSpec(ndim=3)
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     return tensor_shape.TensorShape([input_shape[0], input_shape[2]])
 
@@ -398,7 +398,7 @@ class _GlobalPooling2D(Layer):
     self.data_format = conv_utils.normalize_data_format(data_format)
     self.input_spec = InputSpec(ndim=4)
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     if self.data_format == 'channels_last':
       return tensor_shape.TensorShape([input_shape[0], input_shape[3]])
@@ -493,7 +493,7 @@ class _GlobalPooling3D(Layer):
     self.data_format = conv_utils.normalize_data_format(data_format)
     self.input_spec = InputSpec(ndim=5)
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     if self.data_format == 'channels_last':
       return tensor_shape.TensorShape([input_shape[0], input_shape[4]])
diff --git a/tensorflow/python/keras/_impl/keras/layers/recurrent.py b/tensorflow/python/keras/_impl/keras/layers/recurrent.py
index 8df1840b4cbfddd3d31708da5eb3a57333d621ef..6e38cf2f4181f36fdd0dfeadf699f445774459e3 100644
--- a/tensorflow/python/keras/_impl/keras/layers/recurrent.py
+++ b/tensorflow/python/keras/_impl/keras/layers/recurrent.py
@@ -412,7 +412,7 @@ class RNN(Layer):
   def states(self, states):
     self._states = states
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     if isinstance(input_shape, list):
       input_shape = input_shape[0]
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
@@ -2266,7 +2266,7 @@ class Recurrent(Layer):
     self.dropout = 0
     self.recurrent_dropout = 0
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     if isinstance(input_shape, list):
       input_shape = input_shape[0]
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
diff --git a/tensorflow/python/keras/_impl/keras/layers/wrappers.py b/tensorflow/python/keras/_impl/keras/layers/wrappers.py
index aefa5a1c020b490991708056d609ae1efa8d4a9a..452801b65639be217ac26d3caa69f070c776634e 100644
--- a/tensorflow/python/keras/_impl/keras/layers/wrappers.py
+++ b/tensorflow/python/keras/_impl/keras/layers/wrappers.py
@@ -181,11 +181,11 @@ class TimeDistributed(Wrapper):
     super(TimeDistributed, self).build()
     self.built = True
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     child_input_shape = tensor_shape.TensorShape([input_shape[0]] +
                                                  input_shape[2:])
-    child_output_shape = self.layer._compute_output_shape(  # pylint: disable=protected-access
+    child_output_shape = self.layer.compute_output_shape(
         child_input_shape).as_list()
     timesteps = input_shape[1]
     return tensor_shape.TensorShape([child_output_shape[0], timesteps] +
@@ -231,7 +231,7 @@ class TimeDistributed(Wrapper):
       if hasattr(y, '_uses_learning_phase'):
         uses_learning_phase = y._uses_learning_phase
       # Shape: (num_samples, timesteps, ...)
-      output_shape = self._compute_output_shape(input_shape).as_list()
+      output_shape = self.compute_output_shape(input_shape).as_list()
       y = K.reshape(y, (-1, input_length) + tuple(output_shape[2:]))
 
     # Apply activity regularizer if any:
@@ -301,16 +301,16 @@ class Bidirectional(Wrapper):
     self.forward_layer.set_weights(weights[:nw // 2])
     self.backward_layer.set_weights(weights[nw // 2:])
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tuple(tensor_shape.TensorShape(input_shape).as_list())
     if self.merge_mode in ['sum', 'ave', 'mul']:
-      return self.forward_layer._compute_output_shape(input_shape)  # pylint: disable=protected-access
+      return self.forward_layer.compute_output_shape(input_shape)
     elif self.merge_mode == 'concat':
-      shape = self.forward_layer._compute_output_shape(input_shape).as_list()  # pylint: disable=protected-access
+      shape = self.forward_layer.compute_output_shape(input_shape).as_list()
       shape[-1] *= 2
       return tensor_shape.TensorShape(shape)
     elif self.merge_mode is None:
-      shape = self.forward_layer._compute_output_shape(input_shape)  # pylint: disable=protected-access
+      shape = self.forward_layer.compute_output_shape(input_shape)
       return [shape, copy.copy(shape)]
 
   def call(self, inputs, training=None, mask=None):
diff --git a/tensorflow/python/keras/_impl/keras/layers/wrappers_test.py b/tensorflow/python/keras/_impl/keras/layers/wrappers_test.py
index a0951b8240dac5162161962456c34df4c2a16595..0866c4b0aeddc91ba6eeca6395875b4f2574dbc0 100644
--- a/tensorflow/python/keras/_impl/keras/layers/wrappers_test.py
+++ b/tensorflow/python/keras/_impl/keras/layers/wrappers_test.py
@@ -158,7 +158,7 @@ class BidirectionalTest(test.TestCase):
 
         # test compute output shape
         ref_shape = model.layers[-1].output.get_shape()
-        shape = model.layers[-1]._compute_output_shape(
+        shape = model.layers[-1].compute_output_shape(
             (None, timesteps, dim))
         self.assertListEqual(shape.as_list(), ref_shape.as_list())
 
diff --git a/tensorflow/python/keras/_impl/keras/losses.py b/tensorflow/python/keras/_impl/keras/losses.py
index 19212aeee8cd4fbc723ba3e47c9d3e226ec339a9..1d6319abb13619932fe76966a69004dcfcd0e022 100644
--- a/tensorflow/python/keras/_impl/keras/losses.py
+++ b/tensorflow/python/keras/_impl/keras/losses.py
@@ -61,10 +61,10 @@ def categorical_hinge(y_true, y_pred):
 
 def logcosh(y_true, y_pred):
 
-  def cosh(x):
-    return (K.exp(x) + K.exp(-x)) / 2
+  def _logcosh(x):
+    return x + K.softplus(-2. * x) - K.log(2.)
 
-  return K.mean(K.log(cosh(y_pred - y_true)), axis=-1)
+  return K.mean(_logcosh(y_pred - y_true), axis=-1)
 
 
 def categorical_crossentropy(y_true, y_pred):
diff --git a/tensorflow/python/keras/_impl/keras/models.py b/tensorflow/python/keras/_impl/keras/models.py
index ba202827ce3fca397ab487f58c01667b9b0c4444..e262cc8c8e9d728c1e7f504ffaf543faa1f3db50 100644
--- a/tensorflow/python/keras/_impl/keras/models.py
+++ b/tensorflow/python/keras/_impl/keras/models.py
@@ -1070,7 +1070,7 @@ class Sequential(Model):
 
   def fit_generator(self,
                     generator,
-                    steps_per_epoch,
+                    steps_per_epoch=None,
                     epochs=1,
                     verbose=1,
                     callbacks=None,
@@ -1101,8 +1101,10 @@ class Sequential(Model):
         steps_per_epoch: Total number of steps (batches of samples)
             to yield from `generator` before declaring one epoch
             finished and starting the next epoch. It should typically
-            be equal to the number of unique samples of your dataset
+            be equal to the number of samples of your dataset
             divided by the batch size.
+            Optional for `Sequence`: if unspecified, will use
+            the `len(generator)` as a number of steps.
         epochs: Integer, total number of iterations on the data.
             Note that in conjunction with initial_epoch, the parameter
             epochs is to be understood as "final epoch". The model is
@@ -1118,8 +1120,10 @@ class Sequential(Model):
             is a generator.
             Number of steps to yield from validation generator
             at the end of every epoch. It should typically
-            be equal to the number of unique samples of your
+            be equal to the number of samples of your
             validation dataset divided by the batch size.
+            Optional for `Sequence`: if unspecified, will use
+            the `len(validation_data)` as a number of steps.
         class_weight: Dictionary mapping class indices to a weight
             for the class.
         max_queue_size: Maximum size for the generator queue
@@ -1195,7 +1199,7 @@ class Sequential(Model):
 
   def evaluate_generator(self,
                          generator,
-                         steps,
+                         steps=None,
                          max_queue_size=10,
                          workers=1,
                          use_multiprocessing=False,
@@ -1210,6 +1214,8 @@ class Sequential(Model):
             or (inputs, targets, sample_weights)
         steps: Total number of steps (batches of samples)
             to yield from `generator` before stopping.
+            Optional for `Sequence`: if unspecified, will use
+            the `len(generator)` as a number of steps.
         max_queue_size: maximum size for the generator queue
         workers: maximum number of processes to spin up
         use_multiprocessing: if True, use process based threading.
@@ -1254,7 +1260,7 @@ class Sequential(Model):
 
   def predict_generator(self,
                         generator,
-                        steps,
+                        steps=None,
                         max_queue_size=10,
                         workers=1,
                         use_multiprocessing=False,
@@ -1269,6 +1275,8 @@ class Sequential(Model):
         generator: generator yielding batches of input samples.
         steps: Total number of steps (batches of samples)
             to yield from `generator` before stopping.
+            Optional for `Sequence`: if unspecified, will use
+            the `len(generator)` as a number of steps.
         max_queue_size: maximum size for the generator queue
         workers: maximum number of processes to spin up
         use_multiprocessing: if True, use process based threading.
diff --git a/tensorflow/python/keras/_impl/keras/models_test.py b/tensorflow/python/keras/_impl/keras/models_test.py
index 61938066b98b9f6bb48e7e68870d15ed60ad3dd9..edfc0ce0ebc0321589a452e7357c517feeb626cf 100644
--- a/tensorflow/python/keras/_impl/keras/models_test.py
+++ b/tensorflow/python/keras/_impl/keras/models_test.py
@@ -306,7 +306,7 @@ class TestSequential(test.TestCase):
         def call(self, inputs):
           return [3 * inputs, 2 * inputs]
 
-        def _compute_output_shape(self, input_shape):
+        def compute_output_shape(self, input_shape):
           return [input_shape, input_shape]
 
       with self.assertRaises(ValueError):
diff --git a/tensorflow/python/keras/_impl/keras/preprocessing/image.py b/tensorflow/python/keras/_impl/keras/preprocessing/image.py
index 12dc718cd791d0a5829c4809474a83783ed561f9..82441de5925cac0d66af95202c613b3e5e9aeb79 100644
--- a/tensorflow/python/keras/_impl/keras/preprocessing/image.py
+++ b/tensorflow/python/keras/_impl/keras/preprocessing/image.py
@@ -556,7 +556,8 @@ class ImageDataGenerator(object):
                           save_to_dir=None,
                           save_prefix='',
                           save_format='png',
-                          follow_links=False):
+                          follow_links=False,
+                          interpolation='nearest'):
     return DirectoryIterator(
         directory,
         self,
@@ -571,7 +572,8 @@ class ImageDataGenerator(object):
         save_to_dir=save_to_dir,
         save_prefix=save_prefix,
         save_format=save_format,
-        follow_links=follow_links)
+        follow_links=follow_links,
+        interpolation=interpolation)
 
   def standardize(self, x):
     """Apply the normalization configuration to a batch of inputs.
@@ -596,7 +598,7 @@ class ImageDataGenerator(object):
         x -= self.mean
       else:
         logging.warning('This ImageDataGenerator specifies '
-                        '`featurewise_center`, but it hasn\'t'
+                        '`featurewise_center`, but it hasn\'t '
                         'been fit on any training data. Fit it '
                         'first by calling `.fit(numpy_data)`.')
     if self.featurewise_std_normalization:
@@ -604,7 +606,7 @@ class ImageDataGenerator(object):
         x /= (self.std + 1e-7)
       else:
         logging.warning('This ImageDataGenerator specifies '
-                        '`featurewise_std_normalization`, but it hasn\'t'
+                        '`featurewise_std_normalization`, but it hasn\'t '
                         'been fit on any training data. Fit it '
                         'first by calling `.fit(numpy_data)`.')
     if self.zca_whitening:
@@ -614,7 +616,7 @@ class ImageDataGenerator(object):
         x = np.reshape(whitex, x.shape)
       else:
         logging.warning('This ImageDataGenerator specifies '
-                        '`zca_whitening`, but it hasn\'t'
+                        '`zca_whitening`, but it hasn\'t '
                         'been fit on any training data. Fit it '
                         'first by calling `.fit(numpy_data)`.')
     return x
@@ -833,8 +835,7 @@ class Iterator(Sequence):
     return self._get_batches_of_transformed_samples(index_array)
 
   def __len__(self):
-    length = int(np.ceil(self.n / float(self.batch_size)))
-    return np.maximum(length, 0)
+    return (self.n + self.batch_size - 1) // self.batch_size  # round up
 
   def on_epoch_end(self):
     self._set_index_array()
@@ -1091,6 +1092,12 @@ class DirectoryIterator(Iterator):
           images (if `save_to_dir` is set).
       save_format: Format to use for saving sample images
           (if `save_to_dir` is set).
+      interpolation: Interpolation method used to resample the image if the
+          target size is different from that of the loaded image.
+          Supported methods are "nearest", "bilinear", and "bicubic".
+          If PIL version 1.1.3 or newer is installed, "lanczos" is also
+          supported. If PIL version 3.4.0 or newer is installed, "box" and
+          "hamming" are also supported. By default, "nearest" is used.
   """
 
   def __init__(self,
@@ -1107,7 +1114,8 @@ class DirectoryIterator(Iterator):
                save_to_dir=None,
                save_prefix='',
                save_format='png',
-               follow_links=False):
+               follow_links=False,
+               interpolation='nearest'):
     if data_format is None:
       data_format = K.image_data_format()
     self.directory = directory
@@ -1138,6 +1146,7 @@ class DirectoryIterator(Iterator):
     self.save_to_dir = save_to_dir
     self.save_prefix = save_prefix
     self.save_format = save_format
+    self.interpolation = interpolation
 
     white_list_formats = {'png', 'jpg', 'jpeg', 'bmp', 'ppm'}
 
@@ -1192,7 +1201,8 @@ class DirectoryIterator(Iterator):
       fname = self.filenames[j]
       img = load_img(os.path.join(self.directory, fname),
                      grayscale=grayscale,
-                     target_size=self.target_size)
+                     target_size=self.target_size,
+                     interpolation=self.interpolation)
       x = img_to_array(img, data_format=self.data_format)
       x = self.image_data_generator.random_transform(x)
       x = self.image_data_generator.standardize(x)
diff --git a/tensorflow/python/keras/_impl/keras/testing_utils.py b/tensorflow/python/keras/_impl/keras/testing_utils.py
index f204a5df3e654eebd5c0165f383f2c418961f5ba..b889e311b37d48732641205a90ca83af34ea4489 100644
--- a/tensorflow/python/keras/_impl/keras/testing_utils.py
+++ b/tensorflow/python/keras/_impl/keras/testing_utils.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python.framework import tensor_shape
 from tensorflow.python.keras._impl import keras
 from tensorflow.python.util import tf_inspect
 
@@ -109,7 +110,8 @@ def layer_test(layer_cls, kwargs=None, input_shape=None, input_dtype=None,
   # check shape inference
   model = keras.models.Model(x, y)
   expected_output_shape = tuple(
-      layer._compute_output_shape(input_shape).as_list())  # pylint: disable=protected-access
+      layer.compute_output_shape(
+          tensor_shape.TensorShape(input_shape)).as_list())
   actual_output = model.predict(input_data)
   actual_output_shape = actual_output.shape
   for expected_dim, actual_dim in zip(expected_output_shape,
diff --git a/tensorflow/python/keras/_impl/keras/utils/data_utils.py b/tensorflow/python/keras/_impl/keras/utils/data_utils.py
index 1f2e9ac44076582c7aea083203b13fddaa597474..d0be29f8298fbc83ac518bb7ddf5eda312119e96 100644
--- a/tensorflow/python/keras/_impl/keras/utils/data_utils.py
+++ b/tensorflow/python/keras/_impl/keras/utils/data_utils.py
@@ -28,6 +28,7 @@ import sys
 import tarfile
 import threading
 import time
+import traceback
 import zipfile
 
 import numpy as np
@@ -475,16 +476,26 @@ class OrderedEnqueuer(SequenceEnqueuer):
 
   def __init__(self, sequence, use_multiprocessing=False, shuffle=False):
     self.sequence = sequence
+    self.use_multiprocessing = use_multiprocessing
 
     # Doing Multiprocessing.Value += x is not process-safe.
     global _SEQUENCE_COUNTER
     if _SEQUENCE_COUNTER is None:
-      _SEQUENCE_COUNTER = multiprocessing.Value('i', 0)
+      if self.use_multiprocessing:
+        _SEQUENCE_COUNTER = multiprocessing.Value('i', 0)
+      else:
+        _SEQUENCE_COUNTER = 0
 
-    with _SEQUENCE_COUNTER.get_lock():
-      self.uid = _SEQUENCE_COUNTER.value
-      _SEQUENCE_COUNTER.value += 1
-    self.use_multiprocessing = use_multiprocessing
+    if self.use_multiprocessing:
+      with _SEQUENCE_COUNTER.get_lock():
+        self.uid = _SEQUENCE_COUNTER.value
+        _SEQUENCE_COUNTER.value += 1
+    else:
+      self.uid = _SEQUENCE_COUNTER
+      if isinstance(_SEQUENCE_COUNTER, int):
+        _SEQUENCE_COUNTER += 1
+      else:
+        _SEQUENCE_COUNTER.value += 1
     self.shuffle = shuffle
     self.workers = 0
     self.executor = None
@@ -560,9 +571,9 @@ class OrderedEnqueuer(SequenceEnqueuer):
         self.queue.task_done()
         if inputs is not None:
           yield inputs
-    except Exception as e:
+    except Exception as e:  # pylint: disable=broad-except
       self.stop()
-      raise StopIteration(e)
+      six.raise_from(StopIteration(e), e)
 
   def _send_sequence(self):
     """Send current Sequence to all workers."""
@@ -623,6 +634,7 @@ class GeneratorEnqueuer(SequenceEnqueuer):
     self._use_multiprocessing = use_multiprocessing
     self._threads = []
     self._stop_event = None
+    self._manager = None
     self.queue = None
     self.seed = seed
 
@@ -640,18 +652,27 @@ class GeneratorEnqueuer(SequenceEnqueuer):
         try:
           if self._use_multiprocessing or self.queue.qsize() < max_queue_size:
             generator_output = next(self._generator)
-            self.queue.put(generator_output)
+            self.queue.put((True, generator_output))
           else:
             time.sleep(self.wait_time)
         except StopIteration:
           break
-        except Exception:
+        except Exception as e:  # pylint: disable=broad-except
+          # Can't pick tracebacks.
+          # As a compromise, print the traceback and pickle None instead.
+          if self._use_multiprocessing:
+            traceback.print_exc()
+            setattr(e, '__traceback__', None)
+          elif not hasattr(e, '__traceback__'):
+            setattr(e, '__traceback__', sys.exc_info()[2])
+          self.queue.put((False, e))
           self._stop_event.set()
-          raise
+          break
 
     try:
       if self._use_multiprocessing:
-        self.queue = multiprocessing.Queue(maxsize=max_queue_size)
+        self._manager = multiprocessing.Manager()
+        self.queue = self._manager.Queue(maxsize=max_queue_size)
         self._stop_event = multiprocessing.Event()
       else:
         self.queue = queue.Queue()
@@ -695,9 +716,8 @@ class GeneratorEnqueuer(SequenceEnqueuer):
         else:
           thread.join(timeout)
 
-    if self._use_multiprocessing:
-      if self.queue is not None:
-        self.queue.close()
+    if self._manager:
+      self._manager.shutdown()
 
     self._threads = []
     self._stop_event = None
@@ -713,12 +733,23 @@ class GeneratorEnqueuer(SequenceEnqueuer):
     """
     while self.is_running():
       if not self.queue.empty():
-        inputs = self.queue.get()
-        if inputs is not None:
-          yield inputs
+        success, value = self.queue.get()
+        # Rethrow any exceptions found in the queue
+        if not success:
+          six.reraise(value.__class__, value, value.__traceback__)
+        # Yield regular values
+        if value is not None:
+          yield value
       else:
         all_finished = all([not thread.is_alive() for thread in self._threads])
         if all_finished and self.queue.empty():
           raise StopIteration()
         else:
           time.sleep(self.wait_time)
+
+      # Make sure to rethrow the first exception in the queue, if any
+    while not self.queue.empty():
+      success, value = self.queue.get()
+      if not success:
+        six.reraise(value.__class__, value, value.__traceback__)
+
diff --git a/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py b/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py
index 47c5b4cff06c083f8ebd699b5cb9da85b74116e0..677e98e871d4a148b13c1aa22696917ed8dc90f9 100644
--- a/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py
+++ b/tensorflow/python/keras/_impl/keras/utils/data_utils_test.py
@@ -186,7 +186,7 @@ class TestEnqueuers(test.TestCase):
         use_multiprocessing=False)
     enqueuer.start(3, 10)
     gen_output = enqueuer.get()
-    with self.assertRaises(StopIteration):
+    with self.assertRaises(IndexError):
       next(gen_output)
 
   @unittest.skipIf(
@@ -198,7 +198,7 @@ class TestEnqueuers(test.TestCase):
         use_multiprocessing=True)
     enqueuer.start(3, 10)
     gen_output = enqueuer.get()
-    with self.assertRaises(StopIteration):
+    with self.assertRaises(IndexError):
       next(gen_output)
 
   def test_ordered_enqueuer_threads(self):
@@ -299,4 +299,13 @@ class TestEnqueuers(test.TestCase):
 
 
 if __name__ == '__main__':
+  # Bazel sets these environment variables to very long paths.
+  # Tempfile uses them to create long paths, and in turn multiprocessing
+  # library tries to create sockets named after paths. Delete whatever bazel
+  # writes to these to avoid tests failing due to socket addresses being too
+  # long.
+  for var in ('TMPDIR', 'TMP', 'TEMP'):
+    if var in os.environ:
+      del os.environ[var]
+
   test.main()
diff --git a/tensorflow/python/keras/_impl/keras/utils/generic_utils.py b/tensorflow/python/keras/_impl/keras/utils/generic_utils.py
index 025e5d30a597c560804293b12b0bd063764c87fe..e9e54c2a2a713423b77e8279740f0338263206eb 100644
--- a/tensorflow/python/keras/_impl/keras/utils/generic_utils.py
+++ b/tensorflow/python/keras/_impl/keras/utils/generic_utils.py
@@ -17,6 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import codecs
 import marshal
 import os
 import sys
@@ -197,10 +198,11 @@ def func_dump(func):
       A tuple `(code, defaults, closure)`.
   """
   if os.name == 'nt':
-    code = marshal.dumps(
-        func.__code__).replace(b'\\', b'/').decode('raw_unicode_escape')
+    raw_code = marshal.dumps(func.__code__).replace(b'\\', b'/')
+    code = codecs.encode(raw_code, 'base64').decode('ascii')
   else:
-    code = marshal.dumps(func.__code__).decode('raw_unicode_escape')
+    raw_code = marshal.dumps(func.__code__)
+    code = codecs.encode(raw_code, 'base64').decode('ascii')
   defaults = func.__defaults__
   if func.__closure__:
     closure = tuple(c.cell_contents for c in func.__closure__)
@@ -225,7 +227,30 @@ def func_load(code, defaults=None, closure=None, globs=None):
     code, defaults, closure = code
     if isinstance(defaults, list):
       defaults = tuple(defaults)
-  code = marshal.loads(code.encode('raw_unicode_escape'))
+
+  def ensure_value_to_cell(value):
+    """Ensures that a value is converted to a python cell object.
+
+    Arguments:
+        value: Any value that needs to be casted to the cell type
+
+    Returns:
+        A value wrapped as a cell object (see function "func_load")
+    """
+    def dummy_fn():
+      # pylint: disable=pointless-statement
+      value  # just access it so it gets captured in .__closure__
+
+    cell_value = dummy_fn.__closure__[0]
+    if not isinstance(value, type(cell_value)):
+      return cell_value
+    else:
+      return value
+
+  if closure is not None:
+    closure = tuple(ensure_value_to_cell(_) for _ in closure)
+  raw_code = codecs.decode(code.encode('ascii'), 'base64')
+  code = marshal.loads(raw_code)
   if globs is None:
     globs = globals()
   return python_types.FunctionType(
diff --git a/tensorflow/python/keras/_impl/keras/utils/io_utils.py b/tensorflow/python/keras/_impl/keras/utils/io_utils.py
index 2003e19a0a759d84ec1785d9bab6bde560ba030a..a8fc18c17aee58fa406c3057cc98844d9687a9ba 100644
--- a/tensorflow/python/keras/_impl/keras/utils/io_utils.py
+++ b/tensorflow/python/keras/_impl/keras/utils/io_utils.py
@@ -78,7 +78,7 @@ class HDF5Matrix(object):
   def __len__(self):
     return self.end - self.start
 
-  def  __del__(self):
+  def __del__(self):
     self._f.close()
 
   def __getitem__(self, key):
diff --git a/tensorflow/python/keras/_impl/keras/utils/np_utils.py b/tensorflow/python/keras/_impl/keras/utils/np_utils.py
index 896016d4d8bb48192e32ab094f7b7a0e6799921c..67d83bf42c4387be6e5ba578663ecf02ade054c8 100644
--- a/tensorflow/python/keras/_impl/keras/utils/np_utils.py
+++ b/tensorflow/python/keras/_impl/keras/utils/np_utils.py
@@ -35,7 +35,7 @@ def to_categorical(y, num_classes=None):
   """
   y = np.array(y, dtype='int')
   input_shape = y.shape
-  if input_shape and input_shape[-1] == 1:
+  if input_shape and input_shape[-1] == 1 and len(input_shape) > 1:
     input_shape = tuple(input_shape[:-1])
   y = y.ravel()
   if not num_classes:
diff --git a/tensorflow/python/keras/_impl/keras/utils/np_utils_test.py b/tensorflow/python/keras/_impl/keras/utils/np_utils_test.py
index 9680c295cd31c40114726a919d4e327c07ddd240..1e974c2ef2aee3b6a83ad777673505f8c75b2b58 100644
--- a/tensorflow/python/keras/_impl/keras/utils/np_utils_test.py
+++ b/tensorflow/python/keras/_impl/keras/utils/np_utils_test.py
@@ -28,8 +28,9 @@ class TestNPUtils(test.TestCase):
 
   def test_to_categorical(self):
     num_classes = 5
-    shapes = [(3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
-    expected_shapes = [(3, num_classes),
+    shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
+    expected_shapes = [(1, num_classes),
+                       (3, num_classes),
                        (4, 3, num_classes),
                        (5, 4, 3, num_classes),
                        (3, num_classes)]
diff --git a/tensorflow/python/keras/_impl/keras/utils/training_utils.py b/tensorflow/python/keras/_impl/keras/utils/training_utils.py
index 8939c814cf3f9c6fa2f2af79e71919c6666e5561..0bf4ac8a24d3011e05f2db101cd02931e0b65849 100644
--- a/tensorflow/python/keras/_impl/keras/utils/training_utils.py
+++ b/tensorflow/python/keras/_impl/keras/utils/training_utils.py
@@ -112,12 +112,22 @@ def multi_gpu_model(model, gpus):
   from tensorflow.python.keras._impl.keras.layers.core import Lambda
   from tensorflow.python.keras._impl.keras.layers.merge import concatenate
 
-  if gpus <= 1:
-    raise ValueError('For multi-gpu usage to be effective, '
-                     'call `multi_gpu_model` with `gpus >= 2`. '
-                     'Received: `gpus=%d`' % gpus)
-
-  target_devices = ['/cpu:0'] + ['/gpu:%d' % i for i in range(gpus)]
+  if isinstance(gpus, (list, tuple)):
+    if len(gpus) <= 1:
+      raise ValueError('For multi-gpu usage to be effective, '
+                       'call `multi_gpu_model` with `len(gpus) >= 2`. '
+                       'Received: `gpus=%s`' % gpus)
+    num_gpus = len(gpus)
+    target_gpu_ids = gpus
+  else:
+    if gpus <= 1:
+      raise ValueError('For multi-gpu usage to be effective, '
+                       'call `multi_gpu_model` with `gpus >= 2`. '
+                       'Received: `gpus=%d`' % gpus)
+    num_gpus = gpus
+    target_gpu_ids = range(num_gpus)
+
+  target_devices = ['/cpu:0'] + ['/gpu:%d' % i for i in target_gpu_ids]
   available_devices = _get_available_devices()
   available_devices = [
       _normalize_device_name(name) for name in available_devices
@@ -145,7 +155,7 @@ def multi_gpu_model(model, gpus):
     batch_size = shape[:1]
     input_shape = shape[1:]
     step = batch_size // parts
-    if i == gpus - 1:
+    if i == num_gpus - 1:
       size = batch_size - step * i
     else:
       size = step
@@ -160,9 +170,9 @@ def multi_gpu_model(model, gpus):
 
   # Place a copy of the model on each GPU,
   # each getting a slice of the inputs.
-  for i in range(gpus):
-    with ops.device('/gpu:%d' % i):
-      with ops.name_scope('replica_%d' % i):
+  for i, gpu_id in enumerate(target_gpu_ids):
+    with ops.device('/gpu:%d' % gpu_id):
+      with ops.name_scope('replica_%d' % gpu_id):
         inputs = []
         # Retrieve a slice of the input.
         for x in model.inputs:
@@ -172,8 +182,9 @@ def multi_gpu_model(model, gpus):
               output_shape=input_shape,
               arguments={
                   'i': i,
-                  'parts': gpus
-              })(x)
+                  'parts': num_gpus
+              })(
+                  x)
           inputs.append(slice_i)
 
         # Apply model on slice
@@ -189,6 +200,7 @@ def multi_gpu_model(model, gpus):
   # Merge outputs on CPU.
   with ops.device('/cpu:0'):
     merged = []
-    for outputs in all_outputs:
-      merged.append(concatenate(outputs, axis=0))
+    for name, outputs in zip(model.output_names, all_outputs):
+      merged.append(concatenate(outputs, axis=0, name=name))
     return Model(model.inputs, merged)
+
diff --git a/tensorflow/python/keras/_impl/keras/utils/training_utils_test.py b/tensorflow/python/keras/_impl/keras/utils/training_utils_test.py
index 51fbd041a4943b1837c5f725a06c0c08fb9cb216..12354c49ca72cddc0f395bcfcfabab18c1189227 100644
--- a/tensorflow/python/keras/_impl/keras/utils/training_utils_test.py
+++ b/tensorflow/python/keras/_impl/keras/utils/training_utils_test.py
@@ -33,6 +33,7 @@ class TestMultiGPUModel(test.TestCase):
     output_dim = 1
     hidden_dim = 10
     epochs = 2
+    target_gpu_id = [0, 2, 4]
 
     with self.test_session():
       model = keras.models.Sequential()
@@ -42,8 +43,12 @@ class TestMultiGPUModel(test.TestCase):
 
       x = np.random.random((num_samples, input_dim))
       y = np.random.random((num_samples, output_dim))
+
       parallel_model = keras.utils.multi_gpu_model(model, gpus=gpus)
+      parallel_model.compile(loss='mse', optimizer='rmsprop')
+      parallel_model.fit(x, y, epochs=epochs)
 
+      parallel_model = keras.utils.multi_gpu_model(model, gpus=target_gpu_id)
       parallel_model.compile(loss='mse', optimizer='rmsprop')
       parallel_model.fit(x, y, epochs=epochs)
 
@@ -56,6 +61,7 @@ class TestMultiGPUModel(test.TestCase):
     output_dim_b = 2
     hidden_dim = 10
     epochs = 2
+    target_gpu_id = [0, 2, 4]
 
     with self.test_session():
       input_a = keras.Input((input_dim_a,))
@@ -76,6 +82,10 @@ class TestMultiGPUModel(test.TestCase):
       parallel_model.compile(loss='mse', optimizer='rmsprop')
       parallel_model.fit([a_x, b_x], [a_y, b_y], epochs=epochs)
 
+      parallel_model = keras.utils.multi_gpu_model(model, gpus=target_gpu_id)
+      parallel_model.compile(loss='mse', optimizer='rmsprop')
+      parallel_model.fit([a_x, b_x], [a_y, b_y], epochs=epochs)
+
   def multi_gpu_test_invalid_devices(self):
     with self.test_session():
       input_shape = (1000, 10)
@@ -92,3 +102,16 @@ class TestMultiGPUModel(test.TestCase):
         parallel_model = keras.utils.multi_gpu_model(
             model, gpus=len(keras.backend._get_available_gpus()) + 1)
         parallel_model.fit(x, y, epochs=2)
+
+      with self.assertRaises(ValueError):
+        parallel_model = keras.utils.multi_gpu_model(
+            model, gpus=[0, 2, 4, 6, 8])
+        parallel_model.fit(x, y, epochs=2)
+
+      with self.assertRaises(ValueError):
+        parallel_model = keras.utils.multi_gpu_model(model, gpus=1)
+        parallel_model.fit(x, y, epochs=2)
+
+      with self.assertRaises(ValueError):
+        parallel_model = keras.utils.multi_gpu_model(model, gpus=[0])
+        parallel_model.fit(x, y, epochs=2)
diff --git a/tensorflow/python/keras/_impl/keras/wrappers/scikit_learn.py b/tensorflow/python/keras/_impl/keras/wrappers/scikit_learn.py
index 31ef4773ad6481264aea09c72f955a5a6ef8a11d..bc788d874f663caefd46d56fbf715a802fe08ec1 100644
--- a/tensorflow/python/keras/_impl/keras/wrappers/scikit_learn.py
+++ b/tensorflow/python/keras/_impl/keras/wrappers/scikit_learn.py
@@ -38,18 +38,18 @@ class BaseWrapper(object):
       build_fn: callable function or class instance
       **sk_params: model parameters & fitting parameters
 
-  The build_fn should construct, compile and return a Keras model, which
+  The `build_fn` should construct, compile and return a Keras model, which
   will then be used to fit/predict. One of the following
-  three values could be passed to build_fn:
+  three values could be passed to `build_fn`:
   1. A function
-  2. An instance of a class that implements the __call__ method
+  2. An instance of a class that implements the `__call__` method
   3. None. This means you implement a class that inherits from either
-  `KerasClassifier` or `KerasRegressor`. The __call__ method of the
-  present class will then be treated as the default build_fn.
+  `KerasClassifier` or `KerasRegressor`. The `__call__` method of the
+  present class will then be treated as the default `build_fn`.
 
   `sk_params` takes both model parameters and fitting parameters. Legal model
   parameters are the arguments of `build_fn`. Note that like all other
-  estimators in scikit-learn, 'build_fn' should provide default values for
+  estimators in scikit-learn, `build_fn` should provide default values for
   its arguments, so that you could create the estimator without passing any
   values to `sk_params`.
 
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 2ec162578cebf4b36dc0ebc68f8ac26b6df8f422..de6aba4477fee84c01da6d684418f1101733ce39 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -81,6 +81,21 @@ tf_py_test(
     ],
 )
 
+cuda_py_test(
+    name = "list_ops_test",
+    size = "small",
+    srcs = ["list_ops_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:list_ops",
+        "//tensorflow/python/eager:context",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:client_testlib",
+    ],
+    grpc_enabled = True,
+)
+
 cuda_py_test(
     name = "benchmark_test",
     size = "small",
@@ -294,6 +309,19 @@ tf_py_test(
     ],
 )
 
+tf_py_test(
+    name = "decode_compressed_op_test",
+    size = "small",
+    srcs = ["decode_compressed_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:parsing_ops",
+    ],
+)
+
 cuda_py_test(
     name = "determinant_op_test",
     size = "small",
@@ -356,6 +384,7 @@ tf_py_test(
     srcs = ["fractional_avg_pool_op_test.py"],
     additional_deps = [
         "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:nn_grad",
@@ -370,6 +399,7 @@ tf_py_test(
     srcs = ["fractional_max_pool_op_test.py"],
     additional_deps = [
         "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:nn_grad",
@@ -483,6 +513,7 @@ tf_py_test(
         "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:training",
     ],
+    grpc_enabled = True,
 )
 
 tf_py_test(
@@ -517,6 +548,18 @@ tf_py_test(
     ],
 )
 
+tf_py_test(
+    name = "matrix_logarithm_op_test",
+    size = "small",
+    srcs = ["matrix_logarithm_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:linalg_ops",
+    ],
+)
+
 cuda_py_test(
     name = "matrix_inverse_op_test",
     size = "small",
@@ -602,6 +645,23 @@ tf_py_test(
     ],
 )
 
+tf_py_test(
+    name = "parse_single_example_op_test",
+    size = "small",
+    srcs = ["parse_single_example_op_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:platform",
+    ],
+)
+
 tf_py_test(
     name = "partitioned_variables_test",
     size = "small",
@@ -676,6 +736,7 @@ cuda_py_test(
         "//tensorflow/python:gradients",
         "//tensorflow/python:state_ops",
         "//tensorflow/python:variables",
+        "//tensorflow/python:resource_variable_ops",
     ],
     tags = ["noasan"],  # http://b/32635055
 )
@@ -1230,7 +1291,9 @@ cuda_py_test(
 
 cuda_py_test(
     name = "control_flow_ops_py_test",
-    size = "small",
+    # TOOD(b/70473603): change this back to "small" once the C API is
+    # permanently enabled
+    size = "medium",
     srcs = ["control_flow_ops_py_test.py"],
     additional_deps = [
         "//third_party/py/numpy",
@@ -1264,6 +1327,19 @@ cuda_py_test(
     ],
 )
 
+tf_py_test(
+    name = "control_flow_util_test",
+    size = "small",
+    srcs = ["control_flow_util_test.py"],
+    additional_deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:control_flow_ops_gen",
+        "//tensorflow/python:control_flow_util",
+        "//tensorflow/python:test_ops",
+    ],
+)
+
 cuda_py_test(
     name = "conv1d_test",
     size = "small",
@@ -1370,7 +1446,7 @@ cuda_py_test(
 
 cuda_py_test(
     name = "dynamic_partition_op_test",
-    size = "small",
+    size = "medium",
     srcs = ["dynamic_partition_op_test.py"],
     additional_deps = [
         "//third_party/py/numpy",
@@ -1427,6 +1503,7 @@ cuda_py_test(
         "//tensorflow/python:variable_scope",
         "//tensorflow/python:variables",
     ],
+    grpc_enabled = True,
     tags = ["no_windows"],
 )
 
@@ -1631,6 +1708,8 @@ cuda_py_test(
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:script_ops",
+        "//tensorflow/python/eager:context",
+        "//tensorflow/python/eager:function",
     ],
     tags = ["no_windows"],
 )
@@ -2055,7 +2134,7 @@ cuda_py_test(
 
 cuda_py_test(
     name = "transpose_op_test",
-    size = "medium",
+    size = "large",
     srcs = ["transpose_op_test.py"],
     additional_deps = [
         "//third_party/py/numpy",
@@ -2063,6 +2142,11 @@ cuda_py_test(
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
     ],
+    shard_count = 2,
+    tags = [
+        "no_gpu",
+        "no_oss",
+    ],
 )
 
 cuda_py_test(
@@ -2143,6 +2227,7 @@ cuda_py_test(
     srcs = ["atrous_convolution_test.py"],
     additional_deps = [
         "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:nn_grad",
@@ -2776,101 +2861,6 @@ tf_py_test(
     ],
 )
 
-tf_py_test(
-    name = "batch_dataset_op_test",
-    size = "small",
-    srcs = ["batch_dataset_op_test.py"],
-    additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:string_ops",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
-tf_py_test(
-    name = "dataset_constructor_op_test",
-    size = "small",
-    srcs = ["dataset_constructor_op_test.py"],
-    additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:random_ops",
-        "//tensorflow/python:resource_variable_ops",
-        "//tensorflow/python:session",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
-    ],
-    tags = [
-        "manual",
-        "nomac",  # b/62040583
-    ],
-)
-
-tf_py_test(
-    name = "dataset_from_generator_op_test",
-    size = "small",
-    srcs = ["dataset_from_generator_op_test.py"],
-    additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:sparse",
-    ],
-)
-
-tf_py_test(
-    name = "filter_dataset_op_test",
-    size = "small",
-    srcs = ["filter_dataset_op_test.py"],
-    additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:functional_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
-tf_py_test(
-    name = "flat_map_dataset_op_test",
-    size = "small",
-    srcs = ["flat_map_dataset_op_test.py"],
-    additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:session",
-        "//tensorflow/python:sparse_ops",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:training",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
 tf_py_test(
     name = "garbage_collection_test",
     size = "small",
@@ -2885,263 +2875,6 @@ tf_py_test(
     ],
 )
 
-tf_py_test(
-    name = "list_files_dataset_op_test",
-    size = "small",
-    srcs = ["list_files_dataset_op_test.py"],
-    additional_deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
-tf_py_test(
-    name = "interleave_dataset_op_test",
-    size = "small",
-    srcs = ["interleave_dataset_op_test.py"],
-    additional_deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:session",
-        "//tensorflow/python:sparse_ops",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:training",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
-tf_py_test(
-    name = "map_dataset_op_test",
-    size = "small",
-    srcs = ["map_dataset_op_test.py"],
-    additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:data_flow_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:functional_ops",
-        "//tensorflow/python:lookup_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:random_ops",
-        "//tensorflow/python:script_ops",
-        "//tensorflow/python:sparse_ops",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:string_ops",
-        "//tensorflow/python:variable_scope",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
-tf_py_test(
-    name = "prefetch_dataset_op_test",
-    size = "small",
-    srcs = ["prefetch_dataset_op_test.py"],
-    additional_deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dataset_ops_gen",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
-tf_py_test(
-    name = "range_dataset_op_test",
-    size = "small",
-    srcs = ["range_dataset_op_test.py"],
-    additional_deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dataset_ops_gen",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:io_ops",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:variables",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:iterator_ops",
-    ],
-)
-
-tf_py_test(
-    name = "reader_dataset_ops_test",
-    size = "small",
-    srcs = ["reader_dataset_ops_test.py"],
-    additional_deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dataset_ops_gen",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:io_ops",
-        "//tensorflow/python:lib",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/ops:iterator_ops",
-        "//tensorflow/python/data/ops:readers",
-    ],
-)
-
-tf_py_test(
-    name = "sequence_dataset_op_test",
-    size = "small",
-    srcs = ["sequence_dataset_op_test.py"],
-    additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
-tf_py_test(
-    name = "shuffle_dataset_op_test",
-    size = "small",
-    srcs = ["shuffle_dataset_op_test.py"],
-    additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:iterator_ops",
-    ],
-)
-
-tf_py_test(
-    name = "shard_dataset_op_test",
-    size = "small",
-    srcs = ["shard_dataset_op_test.py"],
-    additional_deps = [
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:errors",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
-tf_py_test(
-    name = "cache_dataset_op_test",
-    size = "small",
-    srcs = ["cache_dataset_op_test.py"],
-    additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:variables",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:iterator_ops",
-    ],
-)
-
-tf_py_test(
-    name = "zip_dataset_op_test",
-    size = "small",
-    srcs = ["zip_dataset_op_test.py"],
-    additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
-tf_py_test(
-    name = "concatenate_dataset_op_test",
-    size = "small",
-    srcs = ["concatenate_dataset_op_test.py"],
-    additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-    ],
-)
-
-tf_py_test(
-    name = "iterator_ops_test",
-    size = "small",
-    srcs = ["iterator_ops_test.py"],
-    additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/python/data/ops:readers",
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:iterator_ops",
-        "//tensorflow/python/data/util:sparse",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dataset_ops_gen",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:function",
-        "//tensorflow/python:functional_ops",
-        "//tensorflow/python:gradients",
-        "//tensorflow/python:io_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:random_ops",
-        "//tensorflow/python:script_ops",
-        "//tensorflow/python:session",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:training",
-    ],
-)
-
-tf_py_test(
-    name = "iterator_ops_cluster_test",
-    size = "small",
-    srcs = ["iterator_ops_cluster_test.py"],
-    additional_deps = [
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:function",
-        "//tensorflow/python:functional_ops",
-        "//tensorflow/python:session",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:iterator_ops",
-    ],
-    tags = [
-        "no_oss",  # Test flaky due to port collisions.
-        "no_windows",
-    ],
-)
-
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
index 1bf2b70c1beb25739fddebb3a552c368dc7a48be..1dbe7deb97c6e4305dbf998813249bd80ace3363 100644
--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
@@ -33,10 +33,13 @@ from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import test_ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import gradients_impl
+from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test as test_lib
 
@@ -274,26 +277,34 @@ class ReverseV2Test(test_util.TensorFlowTestCase):
     x_np = np.array([1, 200, 3, 40, 5], dtype=np_dtype)
 
     for use_gpu in [False, True]:
-      with self.test_session(use_gpu=use_gpu):
-        x_tf = array_ops.reverse_v2(x_np, [0]).eval()
-        self.assertAllEqual(x_tf, np.asarray(x_np)[::-1])
+      for axis_dtype in [dtypes.int32, dtypes.int64]:
+        with self.test_session(use_gpu=use_gpu):
+          x_tf = array_ops.reverse_v2(x_np,
+              constant_op.constant([0], dtype=axis_dtype)).eval()
+          self.assertAllEqual(x_tf, np.asarray(x_np)[::-1])
 
   def _reverse2DimAuto(self, np_dtype):
     x_np = np.array([[1, 200, 3], [4, 5, 60]], dtype=np_dtype)
 
     for reverse_f in [array_ops.reverse_v2, array_ops.reverse]:
       for use_gpu in [False, True]:
-        with self.test_session(use_gpu=use_gpu):
-          x_tf_1 = reverse_f(x_np, [0]).eval()
-          x_tf_2 = reverse_f(x_np, [-2]).eval()
-          x_tf_3 = reverse_f(x_np, [1]).eval()
-          x_tf_4 = reverse_f(x_np, [-1]).eval()
-          x_tf_5 = reverse_f(x_np, [1, 0]).eval()
-          self.assertAllEqual(x_tf_1, np.asarray(x_np)[::-1, :])
-          self.assertAllEqual(x_tf_2, np.asarray(x_np)[::-1, :])
-          self.assertAllEqual(x_tf_3, np.asarray(x_np)[:, ::-1])
-          self.assertAllEqual(x_tf_4, np.asarray(x_np)[:, ::-1])
-          self.assertAllEqual(x_tf_5, np.asarray(x_np)[::-1, ::-1])
+        for axis_dtype in [dtypes.int32, dtypes.int64]:
+          with self.test_session(use_gpu=use_gpu):
+            x_tf_1 = reverse_f(x_np,
+                constant_op.constant([0], dtype=axis_dtype)).eval()
+            x_tf_2 = reverse_f(x_np,
+                constant_op.constant([-2], dtype=axis_dtype)).eval()
+            x_tf_3 = reverse_f(x_np,
+                constant_op.constant([1], dtype=axis_dtype)).eval()
+            x_tf_4 = reverse_f(x_np,
+                constant_op.constant([-1], dtype=axis_dtype)).eval()
+            x_tf_5 = reverse_f(x_np,
+                constant_op.constant([1, 0], dtype=axis_dtype)).eval()
+            self.assertAllEqual(x_tf_1, np.asarray(x_np)[::-1, :])
+            self.assertAllEqual(x_tf_2, np.asarray(x_np)[::-1, :])
+            self.assertAllEqual(x_tf_3, np.asarray(x_np)[:, ::-1])
+            self.assertAllEqual(x_tf_4, np.asarray(x_np)[:, ::-1])
+            self.assertAllEqual(x_tf_5, np.asarray(x_np)[::-1, ::-1])
 
   # This is the version of reverse that uses axis indices rather than
   # bool tensors
@@ -1090,5 +1101,47 @@ class InvertPermutationTest(test_util.TensorFlowTestCase):
         self.assertAllEqual(y.eval(), [2, 4, 3, 0, 1])
 
 
+class GuaranteeConstOpTest(test_util.TensorFlowTestCase):
+
+  def testSimple(self):
+    with self.test_session():
+      a = array_ops.constant(10)
+      guarantee_a = array_ops.guarantee_const(a)
+      self.assertEqual(10, guarantee_a.eval())
+
+  def testVariables(self):
+    with self.test_session() as sess:
+      for use_resource in [False, True]:
+        a = variable_scope.get_variable(
+            "var_{}".format(use_resource), [],
+            initializer=init_ops.constant_initializer(10.0),
+            use_resource=use_resource)
+        guarantee_a = array_ops.guarantee_const(a)
+        sess.run(variables.global_variables_initializer())
+        self.assertEqual(10.0, guarantee_a.eval())
+
+  def testResourceRejection(self):
+    with self.test_session() as sess:
+      a = variable_scope.get_variable(
+          "resource_var", [],
+          initializer=init_ops.constant_initializer(10.0),
+          use_resource=True)
+      guarantee_a = array_ops.guarantee_const(a.handle)
+      sess.run(variables.global_variables_initializer())
+      with self.assertRaisesWithPredicateMatch(errors.InvalidArgumentError,
+                                               "cannot be a resource variable"):
+        guarantee_a.eval()
+
+
+class SnapshotOpTest(test_util.TensorFlowTestCase):
+
+  def testInvertPermutation(self):
+    for dtype in [dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64]:
+      with self.test_session(use_gpu=True):
+        x = constant_op.constant([0, 1, 2, 3], dtype=dtype)
+        y = gen_array_ops._snapshot(x)
+        self.assertAllEqual(y.eval(), [0, 1, 2, 3])
+
+
 if __name__ == "__main__":
   test_lib.main()
diff --git a/tensorflow/python/kernel_tests/atrous_convolution_test.py b/tensorflow/python/kernel_tests/atrous_convolution_test.py
index 3ac27d11c57062b8d7c4c1d9b89bc576c6959dec..04248fb2bab4333ed164f7871d2e9d5002dc52ad 100644
--- a/tensorflow/python/kernel_tests/atrous_convolution_test.py
+++ b/tensorflow/python/kernel_tests/atrous_convolution_test.py
@@ -26,6 +26,7 @@ from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradient_checker
 from tensorflow.python.ops import nn_ops
 import tensorflow.python.ops.nn_grad  # pylint: disable=unused-import
@@ -108,6 +109,18 @@ class AtrousConvolutionTest(test.TestCase):
 
     add_check(check, y1, y2)
 
+  def test_unknown_spatial_dims_for_channel_last_format(self):
+    x = array_ops.placeholder(dtypes.float32, [1, None, None, 10])
+    w = array_ops.zeros([3, 3, 10, 20])
+    y = nn_ops.convolution(x, w, "VALID", dilation_rate=[2, 2], data_format="NHWC")
+    self.assertEqual(y.shape.as_list(), [1, None, None, 20])
+
+  def test_unknown_spatial_dims_for_channel_first_format(self):
+    x = array_ops.placeholder(dtypes.float32, [1, 10, None, None])
+    w = array_ops.zeros([3, 3, 10, 20])
+    y = nn_ops.convolution(x, w, "VALID", dilation_rate=[2, 2], data_format="NCHW")
+    self.assertEqual(y.shape.as_list(), [1, 20, None, None])
+
   @test_util.run_in_graph_and_eager_modes()
   def testAtrousConvolution2D(self):
     with self._delay_checks() as add_check:
diff --git a/tensorflow/python/kernel_tests/bcast_ops_test.py b/tensorflow/python/kernel_tests/bcast_ops_test.py
index 7c18044c5c5072ef03681165f6194b73a29392ca..9e512346053a4c3af089170f47313606c4a307c2 100644
--- a/tensorflow/python/kernel_tests/bcast_ops_test.py
+++ b/tensorflow/python/kernel_tests/bcast_ops_test.py
@@ -18,6 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
 from tensorflow.python.ops.gen_array_ops import _broadcast_args
 from tensorflow.python.ops.gen_array_ops import _broadcast_gradient_args
 from tensorflow.python.platform import test
@@ -135,6 +137,19 @@ class BcastOpsTest(test.TestCase):
     self.assertAllEqual(r0, [0, 1, 3])
     self.assertAllEqual(r1, [])
 
+  def testDataTypes(self):
+    for dtype in [dtypes.int32, dtypes.int64]:
+      r = self._GetBroadcastShape(
+          constant_op.constant([2, 3, 5], dtype=dtype),
+          constant_op.constant([1], dtype=dtype))
+      self.assertAllEqual(r, [2, 3, 5])
+
+      r0, r1 = self._GetGradientArgs(
+          constant_op.constant([2, 3, 5], dtype=dtype),
+          constant_op.constant([1], dtype=dtype))
+      self.assertAllEqual(r0, [])
+      self.assertAllEqual(r1, [0, 1, 2])
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/check_ops_test.py b/tensorflow/python/kernel_tests/check_ops_test.py
index 7ce0f1e7b8a4df7c8c3acb36c0d46f60cbf0f703..2e94603a3f3d4ca9074320cfb4e9bf06b6640e82 100644
--- a/tensorflow/python/kernel_tests/check_ops_test.py
+++ b/tensorflow/python/kernel_tests/check_ops_test.py
@@ -117,7 +117,7 @@ class AssertEqualTest(test.TestCase):
   def test_error_message_eager(self):
     expected_error_msg_full = r"""big does not equal small
 Condition x == y did not hold.
-Indices of first 6 different values:
+Indices of first 3 different values:
 \[\[0 0\]
  \[1 1\]
  \[2 0\]\]
@@ -129,6 +129,21 @@ First 6 elements of x:
 \[2 2 3 3 6 6\]
 First 6 elements of y:
 \[20  2  3 30 60  6\]
+"""
+    expected_error_msg_default = r"""big does not equal small
+Condition x == y did not hold.
+Indices of first 3 different values:
+\[\[0 0\]
+ \[1 1\]
+ \[2 0\]\]
+Corresponding x values:
+\[2 3 6\]
+Corresponding y values:
+\[20 30 60\]
+First 3 elements of x:
+\[2 2 3\]
+First 3 elements of y:
+\[20  2  3\]
 """
     expected_error_msg_short = r"""big does not equal small
 Condition x == y did not hold.
@@ -151,6 +166,9 @@ First 2 elements of y:
                                    expected_error_msg_full):
         check_ops.assert_equal(big, small, message="big does not equal small",
                                summarize=10)
+      with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                   expected_error_msg_default):
+        check_ops.assert_equal(big, small, message="big does not equal small")
       with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                    expected_error_msg_short):
         check_ops.assert_equal(big, small, message="big does not equal small",
@@ -270,6 +288,118 @@ class AssertNoneEqualTest(test.TestCase):
       assert x is None
 
 
+class AssertAllCloseTest(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes()
+  def test_doesnt_raise_when_equal(self):
+    x = constant_op.constant(1., name="x")
+    y = constant_op.constant(1., name="y")
+    with ops.control_dependencies(
+        [check_ops.assert_near(x, y, message="failure message")]):
+      out = array_ops.identity(x)
+      self.evaluate(out)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def test_doesnt_raise_when_close_enough_32_bit_due_to_default_rtol(self):
+    eps = np.finfo(np.float32).eps
+    # Default rtol/atol is 10*eps
+    x = constant_op.constant(1., name="x")
+    y = constant_op.constant(1. + 2 * eps, name="y", dtype=np.float32)
+    with ops.control_dependencies(
+        [check_ops.assert_near(x, y, atol=0., message="failure message")]):
+      out = array_ops.identity(x)
+      self.evaluate(out)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def test_doesnt_raise_when_close_enough_32_bit_due_to_default_atol(self):
+    eps = np.finfo(np.float32).eps
+    # Default rtol/atol is 10*eps
+    x = constant_op.constant(0., name="x")
+    y = constant_op.constant(0. + 2 * eps, name="y", dtype=np.float32)
+    with ops.control_dependencies(
+        [check_ops.assert_near(x, y, rtol=0., message="failure message")]):
+      out = array_ops.identity(x)
+      self.evaluate(out)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def test_doesnt_raise_when_close_enough_64_bit_due_to_default_rtol(self):
+    eps = np.finfo(np.float64).eps
+    # Default rtol/atol is 10*eps
+    x = constant_op.constant(1., name="x", dtype=np.float64)
+    y = constant_op.constant(1. + 2 * eps, name="y", dtype=np.float64)
+    with ops.control_dependencies(
+        [check_ops.assert_near(x, y, atol=0., message="failure message")]):
+      out = array_ops.identity(x)
+      self.evaluate(out)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def test_doesnt_raise_when_close_enough_64_bit_due_to_default_atol(self):
+    eps = np.finfo(np.float64).eps
+    # Default rtol/atol is 10*eps
+    x = constant_op.constant(0., name="x", dtype=np.float64)
+    y = constant_op.constant(0. + 2 * eps, name="y", dtype=np.float64)
+    with ops.control_dependencies(
+        [check_ops.assert_near(x, y, rtol=0., message="failure message")]):
+      out = array_ops.identity(x)
+      self.evaluate(out)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def test_doesnt_raise_when_close_enough_due_to_custom_rtol(self):
+    x = constant_op.constant(1., name="x")
+    y = constant_op.constant(1.1, name="y")
+    with ops.control_dependencies(
+        [check_ops.assert_near(x, y, atol=0., rtol=0.5,
+                               message="failure message")]):
+      out = array_ops.identity(x)
+      self.evaluate(out)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def test_doesnt_raise_when_close_enough_due_to_custom_atol(self):
+    x = constant_op.constant(0., name="x")
+    y = constant_op.constant(0.1, name="y", dtype=np.float32)
+    with ops.control_dependencies(
+        [check_ops.assert_near(x, y, atol=0.5, rtol=0.,
+                               message="failure message")]):
+      out = array_ops.identity(x)
+      self.evaluate(out)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def test_doesnt_raise_when_both_empty(self):
+    larry = constant_op.constant([])
+    curly = constant_op.constant([])
+    with ops.control_dependencies([check_ops.assert_near(larry, curly)]):
+      out = array_ops.identity(larry)
+    self.evaluate(out)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def test_raises_when_atol_violated(self):
+    x = constant_op.constant(10., name="x")
+    y = constant_op.constant(10.2, name="y")
+    with self.assertRaisesOpError("x and y not equal to tolerance"):
+      with ops.control_dependencies(
+          [check_ops.assert_near(x, y, atol=0.1,
+                                 message="failure message")]):
+        out = array_ops.identity(x)
+        self.evaluate(out)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def test_raises_when_default_rtol_violated(self):
+    x = constant_op.constant(0.1, name="x")
+    y = constant_op.constant(0.0, name="y")
+    with self.assertRaisesOpError("x and y not equal to tolerance"):
+      with ops.control_dependencies(
+          [check_ops.assert_near(x, y, message="failure message")]):
+        out = array_ops.identity(x)
+        self.evaluate(out)
+
+  def test_returns_none_with_eager(self):
+    with context.eager_mode():
+      t1 = constant_op.constant([1., 2.])
+      t2 = constant_op.constant([1., 2.])
+      x = check_ops.assert_near(t1, t2)
+      assert x is None
+
+
 class AssertLessTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
diff --git a/tensorflow/python/kernel_tests/constant_op_eager_test.py b/tensorflow/python/kernel_tests/constant_op_eager_test.py
index 3b71586b55451df86bf214437be3ceec8a4265eb..8e9d75667d49bf9e377ccb9290a3a91786b5a1cb 100644
--- a/tensorflow/python/kernel_tests/constant_op_eager_test.py
+++ b/tensorflow/python/kernel_tests/constant_op_eager_test.py
@@ -237,6 +237,39 @@ class ConstantTest(test.TestCase):
     self._testAll((1, x))
     self._testAll((x, 1))
 
+  def testInvalidLength(self):
+
+    class BadList(list):
+
+      def __init__(self):
+        super(BadList, self).__init__([1, 2, 3])  # pylint: disable=invalid-length-returned
+
+      def __len__(self):
+        return -1
+
+    with self.assertRaisesRegexp(ValueError, "should return >= 0"):
+      constant_op.constant([BadList()])
+    with self.assertRaisesRegexp(ValueError, "mixed types"):
+      constant_op.constant([1, 2, BadList()])
+    with self.assertRaisesRegexp(ValueError, "should return >= 0"):
+      constant_op.constant(BadList())
+    with self.assertRaisesRegexp(ValueError, "should return >= 0"):
+      constant_op.constant([[BadList(), 2], 3])
+    with self.assertRaisesRegexp(ValueError, "should return >= 0"):
+      constant_op.constant([BadList(), [1, 2, 3]])
+    with self.assertRaisesRegexp(ValueError, "should return >= 0"):
+      constant_op.constant([BadList(), []])
+
+    # TODO(allenl, josh11b): These cases should return exceptions rather than
+    # working (currently shape checking only checks the first element of each
+    # sequence recursively). Maybe the first one is fine, but the second one
+    # silently truncating is rather bad.
+
+    # with self.assertRaisesRegexp(ValueError, "should return >= 0"):
+    #   constant_op.constant([[3, 2, 1], BadList()])
+    # with self.assertRaisesRegexp(ValueError, "should return >= 0"):
+    #   constant_op.constant([[], BadList()])
+
   def testSparseValuesRaiseErrors(self):
     with self.assertRaisesRegexp(ValueError, "non-rectangular Python sequence"):
       constant_op.constant([[1, 2], [3]], dtype=dtypes_lib.int32)
diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py
index 68817cc2566847255d289f822aa69308e9c2e329..030c690167fd7edef9ad929eb5cee5f03d9d5883 100644
--- a/tensorflow/python/kernel_tests/constant_op_test.py
+++ b/tensorflow/python/kernel_tests/constant_op_test.py
@@ -44,7 +44,8 @@ class ConstantTest(test.TestCase):
     np_ans = np.array(x)
     with self.test_session(use_gpu=False):
       tf_ans = ops.convert_to_tensor(x).eval()
-    if np_ans.dtype in [np.float32, np.float64, np.complex64, np.complex128]:
+    dtype = dtypes_lib.as_dtype(np_ans.dtype)
+    if dtype.is_floating or dtype.is_complex:
       self.assertAllClose(np_ans, tf_ans)
     else:
       self.assertAllEqual(np_ans, tf_ans)
@@ -53,7 +54,8 @@ class ConstantTest(test.TestCase):
     np_ans = np.array(x)
     with self.test_session(use_gpu=True):
       tf_ans = ops.convert_to_tensor(x).eval()
-    if np_ans.dtype in [np.float32, np.float64, np.complex64, np.complex128]:
+    dtype = dtypes_lib.as_dtype(np_ans.dtype)
+    if dtype.is_floating or dtype.is_complex:
       self.assertAllClose(np_ans, tf_ans)
     else:
       self.assertAllEqual(np_ans, tf_ans)
@@ -62,6 +64,19 @@ class ConstantTest(test.TestCase):
     self._testCpu(x)
     self._testGpu(x)
 
+  def testBFloat16(self):
+    bfloat16 = dtypes_lib.bfloat16.as_numpy_dtype
+    self._testAll(np.arange(-15, 15).reshape([2, 3, 5]).astype(bfloat16))
+    self._testAll(
+        np.random.normal(size=30).reshape([2, 3, 5]).astype(bfloat16))
+    self._testAll(np.empty((2, 0, 5)).astype(bfloat16))
+
+  def testHalf(self):
+    self._testAll(np.arange(-15, 15).reshape([2, 3, 5]).astype(np.float16))
+    self._testAll(
+        np.random.normal(size=30).reshape([2, 3, 5]).astype(np.float16))
+    self._testAll(np.empty((2, 0, 5)).astype(np.float16))
+
   def testFloat(self):
     self._testAll(np.arange(-15, 15).reshape([2, 3, 5]).astype(np.float32))
     self._testAll(
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 1b7f9b110c2c6f86a1c22ed6eeae2d966cabdad3..6e18ed132cd6337378fdb8ec774f7946da8d61ed 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -38,6 +38,7 @@ from tensorflow.python.framework import function
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import data_flow_ops
@@ -68,16 +69,6 @@ from tensorflow.python.training import gradient_descent
 from tensorflow.python.util import nest
 
 
-def check_op_order(graph):
-  """Sanity check on the ordering of op id."""
-
-  for op in graph.get_operations():
-    for v in op.inputs:
-      assert v.op._id < op._id or op.type == "Merge", (
-          "The id of %s must be less than the id of %s" % (v.op.name, op.name))
-  return True
-
-
 def check_consumers(graph):
   """Sanity check on the consumer list of the tensors."""
 
@@ -122,14 +113,16 @@ def opt_cfg():
               do_constant_folding=True)))
 
 
-def isum(s):
+def isum(s, maximum_iterations=None):
   i = constant_op.constant(0, name="i")
   c = lambda i, s: math_ops.less(i, 10)
   b = lambda i, s: [math_ops.add(i, 1), math_ops.add(i, s)]
-  _, r_s = control_flow_ops.while_loop(c, b, [i, s])
+  _, r_s = control_flow_ops.while_loop(
+      c, b, [i, s], maximum_iterations=maximum_iterations)
   return r_s
 
 
+@test_util.with_c_api
 class ControlFlowTest(test.TestCase):
 
   def testRefIdentity(self):
@@ -140,7 +133,6 @@ class ControlFlowTest(test.TestCase):
       op = state_ops.assign(v, 9)
       v2 = control_flow_ops.with_dependencies([op], v)
 
-      self.assertTrue(check_op_order(v.graph))
       self.assertTrue(isinstance(v2, ops.Tensor))
       variables.global_variables_initializer().run()
       self.assertEqual(9, v2.eval())
@@ -396,7 +388,6 @@ class ControlFlowTest(test.TestCase):
 
       val = r.values.eval()
       ind = r.indices.eval()
-    self.assertTrue(check_op_order(x.values.graph))
     self.assertAllEqual(11, val)
     self.assertAllEqual(0, ind)
 
@@ -443,7 +434,6 @@ class ControlFlowTest(test.TestCase):
 
       val = r.values.eval()
       ind = r.indices.eval()
-    self.assertTrue(check_op_order(x.values.graph))
     self.assertAllEqual(11, val)
     self.assertAllEqual(0, ind)
     self.assertTrue(ind.dtype == np.int64)
@@ -472,7 +462,6 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.cond(pred, fn1, fn2)
 
       result = r.eval()
-    self.assertTrue(check_op_order(x.graph))
     self.assertAllEqual(11, result)
 
   def testCond_1(self):
@@ -486,7 +475,6 @@ class ControlFlowTest(test.TestCase):
           math_ops.less(1, 0), lambda: math_ops.add(x, 1),
           lambda: math_ops.subtract(x, 1))
       result = r.eval()
-    self.assertTrue(check_op_order(x.graph))
     self.assertAllEqual(9, result)
 
   def testCond_3(self):
@@ -499,7 +487,6 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.cond(pred, fn3, fn2)
 
       result = r.eval()
-    self.assertTrue(check_op_order(x.graph))
     self.assertAllEqual(12, result)
 
   def testCond_4(self):
@@ -518,7 +505,6 @@ class ControlFlowTest(test.TestCase):
       variables.global_variables_initializer().run()
       self.assertEqual(len(r), 2)
       result = r[1].eval()
-      self.assertTrue(check_op_order(age.graph))
       self.assertAllEqual(True, result)
       self.assertAllEqual(7, v1.eval())
       self.assertAllEqual(2, v2.eval())
@@ -746,6 +732,178 @@ class ControlFlowTest(test.TestCase):
       r = isum(s)
       self.assertAllEqual(45, r.eval())
 
+  def testWhileWithMaximumIterations(self):
+    with self.test_session():
+      s = constant_op.constant([1, 2, 3, 4, 5])
+      r = isum(s, maximum_iterations=3)
+      self.assertAllEqual([1+3, 2+3, 3+3, 4+3, 5+3], r.eval())
+
+  def testWhileWithMaximumIterationsAndSingleArgument(self):
+    with self.test_session():
+      r = control_flow_ops.while_loop(
+          lambda i: i < 3,
+          lambda i: i + 1,
+          [0],
+          maximum_iterations=1)
+      self.assertEqual(1, r.eval())
+
+  def testSingleNestedMaximumIterationsWhileLoopGradientInXLAContext(self):
+    v = constant_op.constant(1.0)
+    def training_loop_with_gradient(i):
+      out = control_flow_ops.while_loop(
+          lambda i_, _: i_ < 3,
+          lambda i_, j: [i_ + 1, j * v],
+          [0, 1.0],
+          maximum_iterations=i)
+      g = gradients_impl.gradients(out, v)
+      with ops.control_dependencies(g):
+        return i + 1
+
+    xla_context = control_flow_ops.XLAControlFlowContext()
+    xla_context.Enter()
+    # Create training loop, ensure we can call gradient() of
+    # while_loop inside the training loop.
+    loop = control_flow_ops.while_loop(
+        lambda i: i < 3, training_loop_with_gradient, [0])
+    xla_context.Exit()
+
+    loop_execute = array_ops.identity(loop)  # Because loop is not fetchable.
+
+    # Should execute without issue.
+    self.assertEqual(3, self.evaluate(loop_execute))
+
+  def testInvalidMaximumIterationsWhileLoopGradientInXLAContext(self):
+    v = constant_op.constant(1.0)
+    def inner_body(i, x):
+      out = control_flow_ops.while_loop(
+          lambda i, _: i < 3,
+          lambda i, j: [i + 1, j * v],
+          [0, x],
+          maximum_iterations=i)
+      return out
+
+    def create_while_loop(maximum_iterations=None):
+      return control_flow_ops.while_loop(
+          lambda i, _: i < 3, inner_body, [0, 1.0],
+          maximum_iterations=maximum_iterations)
+
+    loop_no_xla = create_while_loop(maximum_iterations=5)
+    # maximum_iterations is fine outside of an XLA scope
+    gs = gradients_impl.gradients(loop_no_xla, v)
+    self.evaluate(gs)  # This should execute without error.
+
+    xla_context = control_flow_ops.XLAControlFlowContext()
+    xla_context.Enter()
+    loop_no_maxiter = create_while_loop()
+    loop_with_maxiter = create_while_loop(maximum_iterations=2)
+    xla_context.Exit()
+
+    with self.assertRaisesRegexp(
+        ValueError,
+        r"Cannot create a gradient accumulator for tensor '.+' inside "
+        r"XLA while_loop because maximum_iterations was not passed to "
+        r"the tf.while_loop call \('.+'\)."):
+      _ = gradients_impl.gradients(loop_no_maxiter, v)
+
+    with self.assertRaisesRegexp(
+        ValueError,
+        r"Cannot create a gradient accumulator for tensor '.+' inside XLA "
+        r"while_loop. maximum_iterations tensor '.+' for while_loop context "
+        r"'.+' must be statically known \(e.g. a constant value or known "
+        r"shape dimension\), or be defined at or outside the while loop "
+        r"context '.*' \(currently defined in '.*'\)"):
+      _ = gradients_impl.gradients(loop_with_maxiter, v)
+
+  def testInvalidMaximumIterationsFromSiblingContextWhileLoopInXLAContext(self):
+    v = constant_op.constant(1.0)
+
+    def create_while_loop():
+      max_iter_holder = []
+      def create_mi():
+        max_iter_holder.append(array_ops.placeholder(dtypes.int32, shape=()))
+        return 1.0
+      _ = control_flow_ops.cond(constant_op.constant(True),
+                                create_mi, create_mi)
+
+      return control_flow_ops.while_loop(
+          lambda i, _: i < 3, lambda i, x: (i + 1, v * x), (0, 1.0),
+          maximum_iterations=max_iter_holder[0])
+
+    xla_context = control_flow_ops.XLAControlFlowContext()
+    xla_context.Enter()
+    loop = create_while_loop()
+    xla_context.Exit()
+
+    with self.assertRaisesRegexp(
+        ValueError,
+        r"Cannot create a gradient accumulator for tensor '.+' inside XLA "
+        r"while_loop. maximum_iterations tensor '.*Placeholder:0' for "
+        r"while_loop context '.+' must be statically known \(e.g. a constant "
+        r"value or known shape dimension\), or be defined at or outside the "
+        r"while loop context '' \(currently defined in 'cond/.+'\)"):
+      _ = gradients_impl.gradients(loop, v)
+
+  def testNestedWhileLoopWithMaxItersFromOuterContextInXLAContext(self):
+    v = constant_op.constant(1.0)
+
+    p = array_ops.placeholder(dtype=dtypes.int32)
+
+    def mid_body_builder(iterations):
+      def mid_body(i, x):
+        r = control_flow_ops.while_loop(
+            lambda *_: True,
+            lambda i, x: (i + 1, v * x),
+            (0, x),
+            maximum_iterations=iterations, name="inner")
+        return (i + 1, gradients_impl.gradients(x + r[1], v)[0])
+      return mid_body
+
+    def outer_body(i, x):
+      iterations = array_ops.size(p, name="iterations")
+      return (
+          i + 1,
+          x + control_flow_ops.while_loop(
+              lambda *_: True, mid_body_builder(iterations), (0, x),
+              maximum_iterations=iterations, name="mid")[1])
+
+    def create_while_loop():
+      with ops.device("/cpu:0"):
+        r = control_flow_ops.while_loop(
+            lambda *_: True, outer_body, (0, 1.0),
+            maximum_iterations=5, name="outer")
+        return array_ops.identity(r[1])
+
+    xla_context = control_flow_ops.XLAControlFlowContext()
+    xla_context.Enter()
+    final_with_xla_context = create_while_loop()
+    xla_context.Exit()
+
+    final_without_xla_context = create_while_loop()
+
+    with self.test_session(use_gpu=False) as sess:
+      opts = config_pb2.RunOptions(
+          trace_level=config_pb2.RunOptions.FULL_TRACE)
+      run_metadata = config_pb2.RunMetadata()
+
+      final_value_without_xla_context = sess.run(
+          final_without_xla_context,
+          feed_dict={p: [0, 0, 0]})
+
+      final_value_with_xla_context = sess.run(
+          final_with_xla_context,
+          feed_dict={p: [0, 0, 0]},
+          options=opts, run_metadata=run_metadata)
+
+      node_stats = run_metadata.step_stats.dev_stats[0].node_stats
+      stack_push_count = len(
+          [x for x in node_stats if x.node_name.endswith("StackPushV2")])
+      # Pushes to the stack = product of maximum_iterations values;
+      # the last two "3"s comes from size(p), when p == [0, 0, 0].
+      self.assertEqual(stack_push_count, 5 * 3 * 3)
+
+      self.assertAllClose(
+          final_value_with_xla_context, final_value_without_xla_context)
+
   # Have more than 10 parallel iterations and hence exercise k-bound
   # most of the time.
   def testWhile_3(self):
@@ -766,7 +924,6 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(lambda i, m, c, o: math_ops.less(i, d),
                                       compute, [i, m, c, o])
       result = r[3].eval()
-    self.assertTrue(check_op_order(i.graph))
     self.assertAllEqual(10100, result)
 
   def testWhile_4(self):
@@ -788,7 +945,6 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(lambda i, m, c, o: math_ops.less(i, s),
                                       compute, [i, m, c, o])
       result = r[3].eval()
-    self.assertTrue(check_op_order(i.graph))
     self.assertAllEqual(42, result)
 
   def testWhile_5(self):
@@ -813,7 +969,6 @@ class ControlFlowTest(test.TestCase):
               tensor_shape.unknown_shape()
           ])
       result = r[2].eval()
-    self.assertTrue(check_op_order(i.graph))
     self.assertAllEqual(np.array([0, 1, 2, 3, 4, 5, 6]), result)
 
   def testBufferForwarding(self):
@@ -914,7 +1069,13 @@ class ControlFlowTest(test.TestCase):
       self.assertTrue(r[1].get_shape()[0].value is None)
       self.assertEqual(r[1].get_shape()[1], tensor_shape.Dimension(2))
 
-      with self.assertRaisesRegexp(ValueError, "not an invariant for"):
+      with self.assertRaisesRegexp(
+          ValueError,
+          r"The shape for while_1/Merge_1:0 is not an invariant for the loop. "
+          r"It enters the loop with shape \(2, 2\), but has shape \(4, 2\) "
+          r"after one iteration. Provide shape invariants using either the "
+          r"`shape_invariants` argument of tf.while_loop or set_shape\(\) on "
+          r"the loop variables."):
         r = control_flow_ops.while_loop(c, b, [i, m])
 
   def testWhileShapeInferenceSparseTensor(self):
@@ -1253,7 +1414,6 @@ class ControlFlowTest(test.TestCase):
 
       r = control_flow_ops.while_loop(
           loop_iterator, loop_body, [n], parallel_iterations=1)
-      self.assertTrue(check_op_order(n.graph))
       variables.global_variables_initializer().run()
       self.assertEqual(3, r.eval())
       result = select.eval()
@@ -1278,7 +1438,6 @@ class ControlFlowTest(test.TestCase):
 
       r = control_flow_ops.while_loop(
           loop_iterator, loop_body, [n], parallel_iterations=1)
-      self.assertTrue(check_op_order(n.graph))
       variables.global_variables_initializer().run()
       self.assertEqual(3, r.eval())
       result1 = select1.eval()
@@ -1305,7 +1464,6 @@ class ControlFlowTest(test.TestCase):
           parallel_iterations=1)
       variables.global_variables_initializer().run()
       result = r[1].eval()
-    self.assertTrue(check_op_order(n.graph))
     self.assertAllClose(np.array([10.0, 10.0, 10.0]), result)
 
   # b/24814703
@@ -1450,7 +1608,8 @@ class ControlFlowTest(test.TestCase):
     gpu_dev_name = test.gpu_device_name() if test.is_gpu_available(
     ) else "/device:GPU:0"
 
-    with self.test_session(graph=ops.Graph()) as sess:
+    graph = ops.Graph()
+    with graph.as_default():
       v = constant_op.constant(2.0, name="v")
       c = lambda v: math_ops.less(v, 100.0)
 
@@ -1461,7 +1620,8 @@ class ControlFlowTest(test.TestCase):
       loop = control_flow_ops.while_loop(c, b, [v], parallel_iterations=1)
       r = gradients_impl.gradients(
           loop, v, colocate_gradients_with_ops=colocate)[0]
-    r_ops = r.graph.get_operations()
+
+    r_ops = graph.get_operations()
     r_devices = [(op.name, op.device) for op in r_ops]
 
     self.assertTrue(any("Square" in op.name for op in r_ops))
@@ -1475,7 +1635,9 @@ class ControlFlowTest(test.TestCase):
         self.assertTrue(gpu_dev_name in dev)
       else:
         self.assertFalse(gpu_dev_name in dev)
-    self.assertAllClose(1024.0, sess.run(r))
+
+    with self.test_session(graph=graph) as sess:
+      self.assertAllClose(1024.0, sess.run(r))
 
   def testWhileGrad_ColocateGradients(self):
     self._testWhileGrad_ColocateGradients(colocate=False)
@@ -2274,8 +2436,7 @@ class ControlFlowTest(test.TestCase):
       # Duplicate events cause an error if exclusive = True
       r4 = control_flow_ops.case(
           [(x < y, f1), (x < y, f2)], default=f3, exclusive=True)
-      with self.assertRaisesOpError(
-          "More than one condition evaluated as True but exclusive=True."):
+      with self.assertRaisesOpError("Input error:"):
         r4.eval()
 
       # Check that the default is called if none of the others are
@@ -2622,6 +2783,124 @@ class ControlFlowTest(test.TestCase):
           1)
 
 
+@test_util.with_c_api
+class ControlFlowContextCheckTest(test.TestCase):
+
+  def _getWhileTensor(self):
+    """Creates and returns a tensor from a while context."""
+    tensor = []
+
+    def body(i):
+      if not tensor:
+        tensor.append(constant_op.constant(1))
+      return i + tensor[0]
+
+    control_flow_ops.while_loop(lambda i: i < 10, body, [0])
+    return tensor[0]
+
+  def _getCondTensor(self):
+    cond_tensor = []
+    def true_fn():
+      if not cond_tensor:
+        cond_tensor.append(constant_op.constant(1))
+      return cond_tensor[0]
+    control_flow_ops.cond(math_ops.less(1, 2), true_fn,
+                          lambda: constant_op.constant(0))
+    return cond_tensor[0]
+
+  def testInvalidContext(self):
+    # Accessing a while loop tensor outside of control flow is illegal.
+    while_tensor = self._getWhileTensor()
+    with self.assertRaisesRegexp(
+        ValueError,
+        "Cannot use 'while/Const_1' as input to 'Add' because 'while/Const_1' "
+        "is in a while loop. See info log for more details."):
+      math_ops.add(1, while_tensor)
+
+  def testInvalidContextInCond(self):
+    # Accessing a while loop tensor in cond is illegal.
+    while_tensor = self._getWhileTensor()
+    with self.assertRaisesRegexp(
+        ValueError,
+        "Cannot use 'while/Const_1' as input to 'cond/Add' because "
+        "'while/Const_1' is in a while loop. See info log for more details."):
+      # TODO(skyewm): this passes if we return while_tensor directly instead
+      # of using it as input to another op.
+      control_flow_ops.cond(math_ops.less(1, 2),
+                            lambda: math_ops.add(1, while_tensor),
+                            lambda: constant_op.constant(0))
+
+  def testInvalidContextInWhile(self):
+    # Accessing a while loop tensor in a different while loop is illegal.
+    while_tensor = self._getWhileTensor()
+    with self.assertRaisesRegexp(
+        ValueError,
+        "Cannot use 'while_1/Add' as input to 'while/Const_1' because they are "
+        "in different while loops. See info log for more details."):
+      control_flow_ops.while_loop(lambda i: i < 10,
+                                  lambda x: math_ops.add(1, while_tensor), [0])
+
+    with self.assertRaisesRegexp(
+        ValueError,
+        "Cannot use 'while_2/NextIteration' as input to 'while/Const_1' "
+        "because they are in different while loops. See info log for more "
+        "details."):
+      control_flow_ops.while_loop(lambda i: i < 10, lambda i: while_tensor, [0])
+
+  def testValidCondContext(self):
+    # Accessing a tensor from a cond context is OK (although dangerous).
+    cond_tensor = self._getCondTensor()
+    math_ops.add(1, cond_tensor)
+
+  def testValidCondContextBranches(self):
+    # Accessing a tensor from a cond context from the other branch's cond
+    # context is OK (although dangerous).
+    cond_tensor = []
+    def branch_fn():
+      if not cond_tensor:
+        cond_tensor.append(constant_op.constant(1))
+      return cond_tensor[0]
+
+    control_flow_ops.cond(math_ops.less(1, 2), branch_fn, branch_fn)
+
+  def testValidWhileContext(self):
+    # Accessing a tensor in a nested while is OK.
+    def body(_):
+      c = constant_op.constant(1)
+      return control_flow_ops.while_loop(lambda i: i < 3, lambda i: i + c, [0])
+
+    control_flow_ops.while_loop(lambda i: i < 5, body, [0])
+
+  def testValidNestedContexts(self):
+    # Accessing a tensor from a cond context in a while context, all inside an
+    # outer while context, is OK.
+    def body(_):
+      cond_tensor = self._getCondTensor()
+      # Create another cond containing the while loop for good measure
+      return control_flow_ops.cond(
+          math_ops.less(1, 2),
+          lambda: control_flow_ops.while_loop(lambda i: i < 3,
+                                              lambda i: i + cond_tensor, [0]),
+          lambda: constant_op.constant(0))
+
+    control_flow_ops.while_loop(lambda i: i < 5, body, [0])
+
+  def testInvalidNestedContexts(self):
+    # Accessing a tensor from a while context in a different while context, all
+    # inside a cond context, is illegal.
+    def true_fn():
+      while_tensor = self._getWhileTensor()
+      return control_flow_ops.while_loop(lambda i: i < 3,
+                                         lambda i: i + while_tensor, [0])
+    with self.assertRaisesRegexp(
+        ValueError,
+        "Cannot use 'cond/while_1/add' as input to 'cond/while/Const_1' because"
+        " they are in different while loops. See info log for more details."):
+      control_flow_ops.cond(math_ops.less(1, 2), true_fn,
+                            lambda: constant_op.constant(0))
+
+
+@test_util.with_c_api
 class TupleTest(test.TestCase):
 
   def testTensors(self):
@@ -2707,6 +2986,7 @@ class TupleTest(test.TestCase):
       self.assertEquals(1, var.eval())
 
 
+@test_util.with_c_api
 class AssertTest(test.TestCase):
 
   def testGuardedAssertDoesNotCopyWhenTrue(self):
@@ -2744,6 +3024,7 @@ class AssertTest(test.TestCase):
       self.assertEqual([], guarded_memcpy_nodestat_names)
 
 
+@test_util.with_c_api
 class WhileOpBenchmark(test.Benchmark):
   """Evaluate the performance of while_loop op."""
 
@@ -2857,6 +3138,7 @@ class WhileOpBenchmark(test.Benchmark):
         name="unroll_same_device", iters=iters, wall_time=duration)
 
 
+@test_util.with_c_api
 class EagerTest(test.TestCase):
 
   def testCond(self):
@@ -2875,6 +3157,22 @@ class EagerTest(test.TestCase):
       self.assertAllEqual(isum(tensor).numpy(),
                           [46, 47, 48, 49, 50])
 
+  def testWhileLoopWithMaxIterations(self):
+    with context.eager_mode():
+      tensor = constant_op.constant([1, 2, 3, 4, 5])
+      self.assertAllEqual(isum(tensor, maximum_iterations=3).numpy(),
+                          [1+3, 2+3, 3+3, 4+3, 5+3])
+
+  def testWhileWithMaximumIterationsAndSingleArgument(self):
+    with context.eager_mode():
+      tensor = constant_op.constant(0)
+      r = control_flow_ops.while_loop(
+          lambda i: i < 3,
+          lambda i: i + 1,
+          [tensor],
+          maximum_iterations=1)
+      self.assertEqual(1, r.numpy())
+
   def testWithDependencies(self):
     with context.eager_mode():
       t1 = constant_op.constant(1)
@@ -2903,5 +3201,6 @@ class EagerTest(test.TestCase):
                                  default=f3, exclusive=True)
       self.assertAllEqual(r1.numpy(), 17)
 
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/control_flow_util_test.py b/tensorflow/python/kernel_tests/control_flow_util_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..39e96f74b0461da0cf499e303b30a4a41aae4899
--- /dev/null
+++ b/tensorflow/python/kernel_tests/control_flow_util_test.py
@@ -0,0 +1,71 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for tensorflow.python.ops.control_flow_util."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import test_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import control_flow_util
+from tensorflow.python.ops import gen_control_flow_ops
+from tensorflow.python.platform import test
+
+
+class ControlFlowUtilTest(test.TestCase):
+
+  def testIsSwitch(self):
+    switch_false, _ = control_flow_ops.switch(1, True)
+    switch = switch_false.op
+    self.assertTrue(control_flow_util.IsSwitch(switch))
+
+    ref_switch_false, _ = control_flow_ops.ref_switch(test_ops.ref_output(),
+                                                      True)
+    ref_switch = ref_switch_false.op
+    self.assertTrue(control_flow_util.IsSwitch(ref_switch))
+
+    self.assertFalse(control_flow_util.IsSwitch(test_ops.int_output().op))
+
+  def testIsLoopEnter(self):
+    enter = gen_control_flow_ops.enter(1, frame_name="name").op
+    self.assertTrue(control_flow_util.IsLoopEnter(enter))
+    self.assertFalse(control_flow_util.IsLoopConstantEnter(enter))
+
+    ref_enter = gen_control_flow_ops.ref_enter(test_ops.ref_output(),
+                                               frame_name="name").op
+    self.assertTrue(control_flow_util.IsLoopEnter(ref_enter))
+    self.assertFalse(control_flow_util.IsLoopConstantEnter(ref_enter))
+
+    const_enter = gen_control_flow_ops.enter(1, frame_name="name",
+                                             is_constant=True).op
+    self.assertTrue(control_flow_util.IsLoopEnter(const_enter))
+    self.assertTrue(control_flow_util.IsLoopConstantEnter(const_enter))
+
+    self.assertFalse(control_flow_util.IsLoopEnter(test_ops.int_output().op))
+
+  def testIsLoopExit(self):
+    exit_op = control_flow_ops.exit(1).op
+    self.assertTrue(control_flow_util.IsLoopExit(exit_op))
+
+    ref_exit = control_flow_ops.exit(test_ops.ref_output()).op
+    self.assertTrue(control_flow_util.IsLoopExit(ref_exit))
+
+    self.assertFalse(control_flow_util.IsLoopExit(test_ops.int_output().op))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/kernel_tests/conv2d_backprop_filter_grad_test.py b/tensorflow/python/kernel_tests/conv2d_backprop_filter_grad_test.py
index 1679857bd5b9c5a9a1fbf89f207befc4382223b1..be299beee48cd8fb058393840eddfe08da1d6d99 100644
--- a/tensorflow/python/kernel_tests/conv2d_backprop_filter_grad_test.py
+++ b/tensorflow/python/kernel_tests/conv2d_backprop_filter_grad_test.py
@@ -42,17 +42,21 @@ class Conv2DBackpropFilterGradTest(test.TestCase):
           filter_shape = [3, 3, 4, 6]
           # Make a convolution op with the current settings, just to easily get
           # the shape of the output.
-          conv_out = nn_ops.conv2d(in_val,
-                                   array_ops.zeros(filter_shape),
-                                   [1, stride, stride, 1], padding)
+          conv_out = nn_ops.conv2d(
+              in_val,
+              array_ops.zeros(filter_shape),
+              strides=[1, stride, stride, 1],
+              padding=padding)
           out_backprop_shape = conv_out.get_shape().as_list()
           out_backprop_val = constant_op.constant(
               2 * np.random.random_sample(out_backprop_shape) - 1,
               dtype=dtypes.float32)
-          output = nn_ops.conv2d_backprop_filter(in_val, filter_shape,
-                                                 out_backprop_val,
-                                                 [1, stride, stride, 1],
-                                                 padding)
+          output = nn_ops.conv2d_backprop_filter(
+              in_val,
+              filter_shape,
+              out_backprop_val,
+              strides=[1, stride, stride, 1],
+              padding=padding)
           err = gradient_checker.compute_gradient_error(
               [in_val, out_backprop_val], [in_shape, out_backprop_shape],
               output, filter_shape)
@@ -60,6 +64,42 @@ class Conv2DBackpropFilterGradTest(test.TestCase):
           err_tolerance = 2e-3
           self.assertLess(err, err_tolerance)
 
+  def testGradientDilatedConv(self):
+    if test.is_gpu_available(cuda_only=True):
+      with self.test_session(use_gpu=True):
+        for padding in ["SAME", "VALID"]:
+          for stride in [1, 2]:
+            np.random.seed(1)
+            in_shape = [5, 8, 6, 4]
+            in_val = constant_op.constant(
+                2 * np.random.random_sample(in_shape) - 1, dtype=dtypes.float32)
+            filter_shape = [3, 3, 4, 6]
+            # Make a convolution op with the current settings,
+            # just to easily get the shape of the output.
+            conv_out = nn_ops.conv2d(
+                in_val,
+                array_ops.zeros(filter_shape),
+                dilations=[1, 2, 2, 1],
+                strides=[1, stride, stride, 1],
+                padding=padding)
+            out_backprop_shape = conv_out.get_shape().as_list()
+            out_backprop_val = constant_op.constant(
+                2 * np.random.random_sample(out_backprop_shape) - 1,
+                dtype=dtypes.float32)
+            output = nn_ops.conv2d_backprop_filter(
+                in_val,
+                filter_shape,
+                out_backprop_val,
+                dilations=[1, 2, 2, 1],
+                strides=[1, stride, stride, 1],
+                padding=padding)
+            err = gradient_checker.compute_gradient_error(
+                [in_val, out_backprop_val], [in_shape, out_backprop_shape],
+                output, filter_shape)
+            print("conv2d_backprop_filter gradient err = %g " % err)
+            err_tolerance = 2e-3
+            self.assertLess(err, err_tolerance)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/conv_ops_test.py b/tensorflow/python/kernel_tests/conv_ops_test.py
index 22e5400c3745a735d783fef761276694dc830c32..3e9bd3dade6d08835780362cd73f5f01368e83ac 100644
--- a/tensorflow/python/kernel_tests/conv_ops_test.py
+++ b/tensorflow/python/kernel_tests/conv_ops_test.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import collections
 import os
 import time
 
@@ -32,6 +33,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradient_checker
+from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import nn_impl
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import random_ops
@@ -162,8 +164,8 @@ class Conv2DTest(test.TestCase):
       # as we will be using its gradients as reference for fp16 gradients.
       return [dtypes.float32, dtypes.float16]
 
-  def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, strides,
-                            padding, data_format, dtype, use_gpu):
+  def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, dilations,
+                            strides, padding, data_format, dtype, use_gpu):
     """Verifies the output values of the convolution function.
 
     Args:
@@ -171,6 +173,7 @@ class Conv2DTest(test.TestCase):
         [batch, input_rows, input_cols, input_depth].
       filter_in_sizes: Filter tensor dimensions in
         [kernel_rows, kernel_cols, input_depth, output_depth].
+      dilations: Dilated rate: [col_dilation, row_dilation]
       strides: Stride: [col_stride, row_stride]
       padding: Padding type.
       data_format: Format of the data tensors.
@@ -194,11 +197,18 @@ class Conv2DTest(test.TestCase):
       t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype)
       t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype)
       strides = [1] + strides + [1]
+      dilations = [1] + dilations + [1]
       if data_format == "NCHW":
         t1 = test_util.NHWCToNCHW(t1)
         strides = test_util.NHWCToNCHW(strides)
+        dilations = test_util.NHWCToNCHW(dilations)
       conv = nn_ops.conv2d(
-          t1, t2, strides=strides, padding=padding, data_format=data_format)
+          t1,
+          t2,
+          dilations=dilations,
+          strides=strides,
+          padding=padding,
+          data_format=data_format)
       if data_format == "NCHW":
         conv = test_util.NCHWToNHWC(conv)
 
@@ -240,14 +250,87 @@ class Conv2DTest(test.TestCase):
     for i in range(1, len(values)):
       self.assertAllClose(values[0], values[i], rtol=1e-5, atol=1e-5)
 
+  def _ComputeReferenceDilatedConv(self, tensor_in_sizes, filter_in_sizes,
+                                   stride, dilation, padding, data_format,
+                                   use_gpu):
+    total_size_1 = 1
+    total_size_2 = 1
+    for s in tensor_in_sizes:
+      total_size_1 *= s
+    for s in filter_in_sizes:
+      total_size_2 *= s
+
+    # Initializes the input tensor with array containing incrementing
+    # numbers from 1.
+    x1 = [f * 1.0 for f in range(1, total_size_1 + 1)]
+    x2 = [f * 1.0 for f in range(1, total_size_2 + 1)]
+    with test_util.device(use_gpu):
+      t1 = constant_op.constant(x1, shape=tensor_in_sizes)
+      t2 = constant_op.constant(x2, shape=filter_in_sizes)
+      if isinstance(stride, collections.Iterable):
+        strides = list(stride)
+      else:
+        strides = [stride, stride]
+      if data_format == "NCHW":
+        t1 = test_util.NHWCToNCHW(t1)
+        full_strides = [1, 1] + strides
+        full_dilation = [1, 1] + dilation
+      else:
+        full_strides = [1] + strides + [1]
+        full_dilation = [1] + dilation + [1]
+      expected = nn_ops.convolution(
+          t1,
+          t2,
+          padding=padding,
+          strides=strides,
+          dilation_rate=dilation,
+          data_format=data_format)
+      computed = nn_ops.conv2d(
+          t1,
+          t2,
+          strides=full_strides,
+          dilations=full_dilation,
+          padding=padding,
+          data_format=data_format)
+      if data_format == "NCHW":
+        expected = test_util.NCHWToNHWC(expected)
+        computed = test_util.NCHWToNHWC(computed)
+    return expected, computed
+
+  def _VerifyDilatedConvValues(self, tensor_in_sizes, filter_in_sizes, strides,
+                               padding, dilations):
+    expected_results = []
+    computed_results = []
+    default_dilations = (dilations[0] == 1 and dilations[1] == 1)
+    for data_format, use_gpu in GetTestConfigs():
+      # If any dilation rate is larger than 1, only do test on the GPU
+      # because we currently do not have a CPU implementation for arbitrary
+      # dilation rates.
+      if default_dilations or use_gpu:
+        expected, computed = self._ComputeReferenceDilatedConv(
+            tensor_in_sizes, filter_in_sizes, strides, dilations, padding,
+            data_format, use_gpu)
+        expected_results.append(expected)
+        computed_results.append(computed)
+        tolerance = 1e-2 if use_gpu else 1e-5
+        expected_values = self.evaluate(expected_results)
+        computed_values = self.evaluate(computed_results)
+        for e_value, c_value in zip(expected_values, computed_values):
+          print("expected = ", e_value)
+          print("actual = ", c_value)
+          self.assertAllClose(
+              e_value.flatten(), c_value.flatten(), atol=tolerance, rtol=1e-4)
+
   def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, strides, padding,
                     expected):
     tensors = []
+    dilations = [1, 1]
     for (data_format, use_gpu) in GetTestConfigs():
       for dtype in self._DtypesToTest(use_gpu):
         result = self._SetupValuesForDevice(
             tensor_in_sizes,
             filter_in_sizes,
+            dilations,
             strides,
             padding,
             data_format,
@@ -279,6 +362,16 @@ class Conv2DTest(test.TestCase):
         padding="VALID",
         expected=expected_output)
 
+  @test_util.run_in_graph_and_eager_modes()
+  def testConv2D2x2Filter2x1Dilation(self):
+    if test.is_gpu_available(cuda_only=True):
+      self._VerifyDilatedConvValues(
+          tensor_in_sizes=[1, 4, 4, 1],
+          filter_in_sizes=[2, 2, 1, 1],
+          strides=[1, 1],
+          dilations=[2, 1],
+          padding="VALID")
+
   @test_util.run_in_graph_and_eager_modes()
   def testConv2DEmpty(self):
     expected_output = []
@@ -289,6 +382,16 @@ class Conv2DTest(test.TestCase):
         padding="VALID",
         expected=expected_output)
 
+  @test_util.run_in_graph_and_eager_modes()
+  def testConv2DEmptyDilation(self):
+    if test.is_gpu_available(cuda_only=True):
+      self._VerifyDilatedConvValues(
+          tensor_in_sizes=[0, 2, 3, 3],
+          filter_in_sizes=[1, 1, 3, 3],
+          strides=[1, 1],
+          dilations=[2, 1],
+          padding="VALID")
+
   @test_util.run_in_graph_and_eager_modes()
   def testConv2D2x2Filter(self):
     # The outputs are computed using third_party/py/IPython/notebook.
@@ -300,6 +403,16 @@ class Conv2DTest(test.TestCase):
         padding="VALID",
         expected=expected_output)
 
+  @test_util.run_in_graph_and_eager_modes()
+  def testConv2D2x2FilterDilation(self):
+    if test.is_gpu_available(cuda_only=True):
+      self._VerifyDilatedConvValues(
+          tensor_in_sizes=[1, 2, 3, 3],
+          filter_in_sizes=[2, 2, 3, 3],
+          strides=[1, 1],
+          dilations=[1, 2],
+          padding="VALID")
+
   @test_util.run_in_graph_and_eager_modes()
   def testConv2D1x2Filter(self):
     # The outputs are computed using third_party/py/IPython/notebook.
@@ -314,6 +427,16 @@ class Conv2DTest(test.TestCase):
         padding="VALID",
         expected=expected_output)
 
+  @test_util.run_in_graph_and_eager_modes()
+  def testConv2D1x2FilterDilation(self):
+    if test.is_gpu_available(cuda_only=True):
+      self._VerifyDilatedConvValues(
+          tensor_in_sizes=[1, 2, 3, 3],
+          filter_in_sizes=[1, 2, 3, 3],
+          strides=[1, 1],
+          dilations=[2, 1],
+          padding="VALID")
+
   @test_util.run_in_graph_and_eager_modes()
   def testConv2D2x2FilterStride2(self):
     expected_output = [2271.0, 2367.0, 2463.0]
@@ -386,13 +509,23 @@ class Conv2DTest(test.TestCase):
         padding="VALID",
         expected=[50, 60])
 
-    # TODO this currently fails.
-    # self._VerifyValues(tensor_in_sizes=[1, 8, 8, 1],
-    #                   filter_in_sizes=[2, 2, 1, 1],
-    #                   strides=[4, 4], padding="SAME",
-    #                   expected=[72, 112, 392, 432])
+  @test_util.run_in_graph_and_eager_modes()
+  def testConv2DKernelSizeMatchesInputSizeDilation(self):
+    if test.is_gpu_available(cuda_only=True):
+      self._VerifyDilatedConvValues(
+          tensor_in_sizes=[1, 3, 3, 1],
+          filter_in_sizes=[2, 2, 1, 2],
+          strides=[1, 1],
+          dilations=[2, 2],
+          padding="VALID")
 
-    # Testing for backprops
+  # TODO this currently fails.
+  # self._VerifyValues(tensor_in_sizes=[1, 8, 8, 1],
+  #                   filter_in_sizes=[2, 2, 1, 1],
+  #                   strides=[4, 4], padding="SAME",
+  #                   expected=[72, 112, 392, 432])
+
+  # Testing for backprops
   def _RunAndVerifyBackpropInput(self, input_sizes, filter_sizes, output_sizes,
                                  strides, padding, expected, data_format,
                                  use_gpu, err):
@@ -663,6 +796,20 @@ class Conv2DTest(test.TestCase):
           data_format=data_format,
           use_gpu=use_gpu)
 
+  @test_util.run_in_graph_and_eager_modes()
+  def testConv2DBackpropFilterWithEmptyInput(self):
+    expected = [0, 0, 0, 0]
+    for (data_format, use_gpu) in GetTestConfigs():
+      self._RunAndVerifyBackpropFilter(
+          input_sizes=[0, 2, 3, 1],
+          filter_sizes=[2, 2, 1, 1],
+          output_sizes=[0, 1, 2, 1],
+          strides=[1, 1],
+          padding="VALID",
+          expected=expected,
+          data_format=data_format,
+          use_gpu=use_gpu)
+
   @test_util.run_in_graph_and_eager_modes()
   def testConv2D2x2Depth3ValidBackpropFilter(self):
     expected = [
@@ -724,6 +871,255 @@ class Conv2DTest(test.TestCase):
           data_format=data_format,
           use_gpu=use_gpu)
 
+  # Testing for backprops
+  def _RunAndVerifyBackpropInputDilation(self, input_sizes, filter_sizes,
+                                         output_sizes, strides, dilations,
+                                         padding, data_format, use_gpu, err):
+    total_input_size = 1
+    total_filter_size = 1
+    for s in input_sizes:
+      total_input_size *= s
+    for s in filter_sizes:
+      total_filter_size *= s
+    # Initializes the input tensor with array containing incrementing
+    # numbers from 1.
+    x1 = [f * 1.0 for f in range(1, total_input_size + 1)]
+    x2 = [f * 1.0 for f in range(1, total_filter_size + 1)]
+    default_dilations = (dilations[0] == 1 and dilations[1] == 1)
+    if default_dilations or use_gpu:
+      with self.test_session(use_gpu=use_gpu) as sess:
+        if data_format == "NCHW":
+          input_sizes = test_util.NHWCToNCHW(input_sizes)
+        t1 = constant_op.constant(x1, shape=input_sizes)
+        t2 = constant_op.constant(x2, shape=filter_sizes)
+        full_strides = [1] + strides + [1]
+        full_dilations = [1] + dilations + [1]
+        if data_format == "NCHW":
+          full_strides = test_util.NHWCToNCHW(full_strides)
+          full_dilations = test_util.NHWCToNCHW(full_dilations)
+        conv_forward = nn_ops.conv2d(
+            t1,
+            t2,
+            strides=full_strides,
+            dilations=full_dilations,
+            padding=padding,
+            data_format=data_format)
+        conv_forward_2 = nn_ops.convolution(
+            t1,
+            t2,
+            padding=padding,
+            strides=strides,
+            dilation_rate=dilations,
+            data_format=data_format)
+        if data_format == "NCHW":
+          conv_forward = test_util.NCHWToNHWC(conv_forward)
+          conv_forward_2 = test_util.NCHWToNHWC(conv_forward_2)
+        conv = gradients_impl.gradients(conv_forward, t1)[0]
+        conv_2 = gradients_impl.gradients(conv_forward_2, t1)[0]
+        # "values" consists of two tensors for two backprops
+        value = sess.run(conv)
+        value_2 = sess.run(conv_2)
+        self.assertShapeEqual(value, conv)
+        self.assertShapeEqual(value_2, conv_2)
+      print("expected = ", value_2)
+      print("actual = ", value)
+      self.assertArrayNear(value_2.flatten(), value.flatten(), err)
+
+  # Testing for backprops
+  def _RunAndVerifyBackpropFilterDilation(self, input_sizes, filter_sizes,
+                                          output_sizes, strides, dilations,
+                                          padding, data_format, use_gpu, err):
+    total_input_size = 1
+    total_filter_size = 1
+    for s in input_sizes:
+      total_input_size *= s
+    for s in filter_sizes:
+      total_filter_size *= s
+    # Initializes the input tensor with array containing incrementing
+    # numbers from 1.
+    x1 = [f * 1.0 for f in range(1, total_input_size + 1)]
+    x2 = [f * 1.0 for f in range(1, total_filter_size + 1)]
+    default_dilations = (dilations[0] == 1 and dilations[1] == 1)
+    if default_dilations or use_gpu:
+      with self.test_session(use_gpu=use_gpu) as sess:
+        if data_format == "NCHW":
+          input_sizes = test_util.NHWCToNCHW(input_sizes)
+        t1 = constant_op.constant(x1, shape=input_sizes)
+        t2 = constant_op.constant(x2, shape=filter_sizes)
+        full_strides = [1] + strides + [1]
+        full_dilations = [1] + dilations + [1]
+        if data_format == "NCHW":
+          full_strides = test_util.NHWCToNCHW(full_strides)
+          full_dilations = test_util.NHWCToNCHW(full_dilations)
+        conv_forward = nn_ops.conv2d(
+            t1,
+            t2,
+            strides=full_strides,
+            dilations=full_dilations,
+            padding=padding,
+            data_format=data_format)
+        conv_forward_2 = nn_ops.convolution(
+            t1,
+            t2,
+            padding=padding,
+            strides=strides,
+            dilation_rate=dilations,
+            data_format=data_format)
+        if data_format == "NCHW":
+          conv_forward = test_util.NCHWToNHWC(conv_forward)
+          conv_forward_2 = test_util.NCHWToNHWC(conv_forward_2)
+        conv = gradients_impl.gradients(conv_forward, t2)[0]
+        conv_2 = gradients_impl.gradients(conv_forward, t2)[0]
+        value = sess.run(conv)
+        value_2 = sess.run(conv_2)
+        self.assertShapeEqual(value, conv)
+        self.assertShapeEqual(value_2, conv_2)
+      print("expected = ", value_2)
+      print("actual = ", value)
+      self.assertArrayNear(value_2.flatten(), value.flatten(), err)
+
+  def testConv2D2x2Depth3ValidBackpropFilterStride1x1Dilation2x1(self):
+    if test.is_gpu_available(cuda_only=True):
+      for (data_format, use_gpu) in GetTestConfigs():
+        self._RunAndVerifyBackpropFilterDilation(
+            input_sizes=[1, 3, 6, 1],
+            filter_sizes=[2, 2, 1, 1],
+            output_sizes=[1, 1, 5, 1],
+            strides=[1, 1],
+            dilations=[2, 1],
+            padding="VALID",
+            data_format=data_format,
+            use_gpu=use_gpu,
+            err=1e-5)
+
+  def testConv2D2x2Depth1ValidBackpropFilterDilation1x2(self):
+    if test.is_gpu_available(cuda_only=True):
+      for (data_format, use_gpu) in GetTestConfigs():
+        self._RunAndVerifyBackpropFilterDilation(
+            input_sizes=[1, 2, 3, 1],
+            filter_sizes=[2, 2, 1, 1],
+            output_sizes=[1, 1, 2, 1],
+            strides=[1, 1],
+            dilations=[1, 2],
+            padding="VALID",
+            data_format=data_format,
+            use_gpu=use_gpu,
+            err=1e-5)
+
+  def testConv2DEmptyBackpropFilterDilation1x2(self):
+    if test.is_gpu_available(cuda_only=True):
+      for (data_format, use_gpu) in GetTestConfigs():
+        self._RunAndVerifyBackpropFilterDilation(
+            input_sizes=[1, 2, 3, 1],
+            filter_sizes=[2, 2, 1, 0],
+            output_sizes=[1, 1, 2, 0],
+            strides=[1, 1],
+            dilations=[1, 2],
+            padding="VALID",
+            data_format=data_format,
+            use_gpu=use_gpu,
+            err=1e-5)
+
+  def testConv2D2x2Depth3ValidBackpropFilterDilation2x2(self):
+    if test.is_gpu_available(cuda_only=True):
+      for (data_format, use_gpu) in GetTestConfigs():
+        self._RunAndVerifyBackpropFilterDilation(
+            input_sizes=[1, 3, 4, 3],
+            filter_sizes=[2, 2, 3, 3],
+            output_sizes=[1, 1, 2, 3],
+            strides=[1, 1],
+            dilations=[2, 2],
+            padding="VALID",
+            data_format=data_format,
+            use_gpu=use_gpu,
+            err=1e-5)
+
+  def testConv2DKernelSizeMatchesInputSizeBackpropFilterDilation2x2(self):
+    if test.is_gpu_available(cuda_only=True):
+      for (data_format, use_gpu) in GetTestConfigs():
+        self._RunAndVerifyBackpropFilterDilation(
+            input_sizes=[1, 3, 3, 1],
+            filter_sizes=[2, 2, 1, 2],
+            output_sizes=[1, 1, 1, 2],
+            strides=[1, 1],
+            dilations=[2, 2],
+            padding="VALID",
+            data_format=data_format,
+            use_gpu=use_gpu,
+            err=1e-5)
+
+  def testConv2D2x2Depth3ValidBackpropInputStride1x1Dilation2x1(self):
+    if test.is_gpu_available(cuda_only=True):
+      for (data_format, use_gpu) in GetTestConfigs():
+        self._RunAndVerifyBackpropInputDilation(
+            input_sizes=[1, 3, 6, 1],
+            filter_sizes=[2, 2, 1, 1],
+            output_sizes=[1, 1, 5, 1],
+            strides=[1, 1],
+            dilations=[2, 1],
+            padding="VALID",
+            data_format=data_format,
+            use_gpu=use_gpu,
+            err=1e-5)
+
+  def testConv2D2x2Depth1ValidBackpropInputDilation1x2(self):
+    if test.is_gpu_available(cuda_only=True):
+      for (data_format, use_gpu) in GetTestConfigs():
+        self._RunAndVerifyBackpropInputDilation(
+            input_sizes=[1, 2, 3, 1],
+            filter_sizes=[2, 2, 1, 1],
+            output_sizes=[1, 1, 2, 1],
+            strides=[1, 1],
+            dilations=[1, 2],
+            padding="VALID",
+            data_format=data_format,
+            use_gpu=use_gpu,
+            err=1e-5)
+
+  def testConv2DEmptyBackpropInputDilation1x2(self):
+    if test.is_gpu_available(cuda_only=True):
+      for (data_format, use_gpu) in GetTestConfigs():
+        self._RunAndVerifyBackpropInputDilation(
+            input_sizes=[0, 2, 3, 1],
+            filter_sizes=[2, 2, 1, 1],
+            output_sizes=[0, 1, 2, 1],
+            strides=[1, 1],
+            dilations=[1, 2],
+            padding="VALID",
+            data_format=data_format,
+            use_gpu=use_gpu,
+            err=1e-5)
+
+  def testConv2D2x2Depth3ValidBackpropInputDilation2x1(self):
+    if test.is_gpu_available(cuda_only=True):
+      for (data_format, use_gpu) in GetTestConfigs():
+        # The GPU version of this test is not very stable. So adjusting the
+        # error threshold to 1e-4.
+        self._RunAndVerifyBackpropInputDilation(
+            input_sizes=[1, 3, 2, 3],
+            filter_sizes=[2, 2, 3, 3],
+            output_sizes=[1, 1, 2, 3],
+            strides=[1, 1],
+            dilations=[2, 1],
+            padding="VALID",
+            data_format=data_format,
+            use_gpu=use_gpu,
+            err=1e-4)
+
+  def testConv2DKernelSizeMatchesInputSizeBackpropInputDilation2x2(self):
+    if test.is_gpu_available(cuda_only=True):
+      for (data_format, use_gpu) in GetTestConfigs():
+        self._RunAndVerifyBackpropInputDilation(
+            input_sizes=[1, 3, 3, 1],
+            filter_sizes=[2, 2, 1, 2],
+            output_sizes=[1, 1, 1, 2],
+            strides=[1, 1],
+            dilations=[2, 2],
+            padding="VALID",
+            data_format=data_format,
+            use_gpu=use_gpu,
+            err=1e-5)
+
   # Gradient checkers
   def ConstructAndTestGradient(self, batch, input_rows, input_cols, filter_rows,
                                filter_cols, in_depth, out_depth, stride_rows,
@@ -1126,6 +1522,36 @@ class Conv2DTest(test.TestCase):
                 strides=[1, 1, 1, 1],
                 padding="VALID"))
 
+  def testCPUConv2DNCHWUnimplemented(self):
+    with self.test_session(use_gpu=False):
+      with self.assertRaisesRegexp(errors_impl.UnimplementedError,
+                                   "NHWC tensor format for now"):
+        conv = self._SetupValuesForDevice(
+            tensor_in_sizes=[1, 4, 4, 1],
+            filter_in_sizes=[2, 2, 1, 1],
+            dilations=[1, 1],
+            strides=[1, 1],
+            padding="VALID",
+            data_format="NCHW",
+            dtype=dtypes.float32,
+            use_gpu=False)
+        self.evaluate(conv)
+
+  def testCPUConv2DDilatedUnimplemented(self):
+    with self.test_session(use_gpu=False):
+      with self.assertRaisesRegexp(errors_impl.UnimplementedError,
+                                   "dilated rate of 1 for now"):
+        conv = self._SetupValuesForDevice(
+            tensor_in_sizes=[1, 4, 4, 1],
+            filter_in_sizes=[2, 2, 1, 1],
+            dilations=[2, 1],
+            strides=[1, 1],
+            padding="VALID",
+            data_format="NHWC",
+            dtype=dtypes.float32,
+            use_gpu=False)
+        self.evaluate(conv)
+
 
 class DepthwiseConv2DTest(test.TestCase):
 
@@ -1457,6 +1883,22 @@ def GetInceptionFwdTest(input_size, filter_size, stride, padding,
   return Test
 
 
+def GetInceptionFwdDilatedConvTest(input_size, filter_size, stride, padding):
+
+  def Test(self):
+    if test.is_gpu_available(cuda_only=True) and stride == 1:
+      tf_logging.info("Testing InceptionFwd with dilations %s",
+                      (input_size, filter_size, stride, padding))
+      self._VerifyDilatedConvValues(
+          tensor_in_sizes=input_size,
+          filter_in_sizes=filter_size,
+          strides=[stride, stride],
+          dilations=[2, 2],
+          padding=padding)
+
+  return Test
+
+
 def GetInceptionBackInputTest(input_size, filter_size, output_size, stride,
                               padding,
                               gpu_only=False):
@@ -1497,6 +1939,10 @@ if __name__ == "__main__":
             test_util.run_in_graph_and_eager_modes()(
                 GetInceptionFwdTest(input_size_, filter_size_, stride_,
                                     padding_)))
+    setattr(
+        Conv2DTest, "testInceptionFwdDilatedConv_" + str(index),
+        test_util.run_in_graph_and_eager_modes()(GetInceptionFwdDilatedConvTest(
+            input_size_, filter_size_, stride_, padding_)))
     setattr(Conv2DTest, "testInceptionBackInput_" + str(index),
             test_util.run_in_graph_and_eager_modes()(
                 GetInceptionBackInputTest(input_size_, filter_size_,
@@ -1519,6 +1965,9 @@ if __name__ == "__main__":
   setattr(Conv2DTest, "testInceptionFwd_No_Winograd_Nonfused",
           test_util.run_in_graph_and_eager_modes()(
               GetInceptionFwdTest(ishape, fshape, 1, "SAME", gpu_only=True)))
+  setattr(Conv2DTest, "testInceptionFwdDilatedConv_No_Winograd_Nonfused",
+          test_util.run_in_graph_and_eager_modes()(
+              GetInceptionFwdDilatedConvTest(ishape, fshape, 1, "SAME")))
   setattr(Conv2DTest, "testInceptionBackInput_No_Winograd_Nonfused",
           test_util.run_in_graph_and_eager_modes()(
               GetInceptionBackInputTest(ishape, fshape, oshape, 1, "SAME",
diff --git a/tensorflow/python/kernel_tests/cwise_ops_test.py b/tensorflow/python/kernel_tests/cwise_ops_test.py
index e0c53950e6ccb22f47a1c5a19a62b8373fbe4445..cea12ea8ecfa7a4f592454a96f7f3dc9dd3663ed 100644
--- a/tensorflow/python/kernel_tests/cwise_ops_test.py
+++ b/tensorflow/python/kernel_tests/cwise_ops_test.py
@@ -416,7 +416,7 @@ class UnaryOpTest(test.TestCase):
     self._compareCpu(x, np.square, math_ops.square)
     self._compareCpu(y, np.sqrt, math_ops.sqrt)
     self._compareCpu(y, self._rsqrt, math_ops.rsqrt)
-    self._compareCpu(x, np.exp, math_ops.exp)
+    self._compareBoth(x, np.exp, math_ops.exp)
     self._compareCpu(x, np.expm1, math_ops.expm1)
     self._compareCpu(y, np.log, math_ops.log)
     self._compareCpu(y, np.log1p, math_ops.log1p)
@@ -460,7 +460,7 @@ class UnaryOpTest(test.TestCase):
     self._compareCpu(x, np.square, math_ops.square)
     self._compareCpu(y, np.sqrt, math_ops.sqrt)
     self._compareCpu(y, self._rsqrt, math_ops.rsqrt)
-    self._compareCpu(x, np.exp, math_ops.exp)
+    self._compareBoth(x, np.exp, math_ops.exp)
     self._compareCpu(x, np.expm1, math_ops.expm1)
     self._compareCpu(y, np.log, math_ops.log)
     self._compareCpu(y, np.log1p, math_ops.log1p)
diff --git a/tensorflow/python/kernel_tests/decode_bmp_op_test.py b/tensorflow/python/kernel_tests/decode_bmp_op_test.py
index c086f4617064241da98138888e2ce1659d1b3821..c67c26b7be0777587eb6d7c49119ad6cd2e22953 100644
--- a/tensorflow/python/kernel_tests/decode_bmp_op_test.py
+++ b/tensorflow/python/kernel_tests/decode_bmp_op_test.py
@@ -68,28 +68,68 @@ class DecodeBmpOpTest(test.TestCase):
   def testGrayscale(self):
     img_bytes = [[[255], [0]], [[255], [0]]]
     encoded_bytes = [
-        0x42, 0x40,
-        0x3d, 0, 0, 0,
-        0, 0,
-        0, 0,
-        0x36, 0, 0, 0,
-        0x28, 0, 0, 0,
-        0x2, 0, 0, 0,
-        0x2, 0, 0, 0,
-        0x1, 0,
-        0x8, 0,
-        0, 0, 0, 0,
-        0x10, 0, 0, 0,
-        0x13, 0xb, 0, 0,
-        0x13, 0xb, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
+        0x42,
+        0x40,
+        0x3d,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0x36,
+        0,
+        0,
+        0,
+        0x28,
+        0,
+        0,
+        0,
+        0x2,
+        0,
+        0,
+        0,
+        0x2,
+        0,
+        0,
+        0,
+        0x1,
+        0,
+        0x8,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0x10,
+        0,
+        0,
+        0,
+        0x13,
+        0xb,
+        0,
+        0,
+        0x13,
+        0xb,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
         0xff,
         0,
-        0, 0,
+        0,
+        0,
         0xff,
         0,
-        0, 0,
+        0,
+        0,
     ]
 
     byte_string = bytes(bytearray(encoded_bytes))
@@ -100,54 +140,6 @@ class DecodeBmpOpTest(test.TestCase):
       decoded = decode.eval()
       self.assertAllEqual(decoded, img_bytes)
 
-  def testIncompleteHeader(self):
-    # Encoded BMP bytes from Wikipedia
-    encoded_bytes = [
-        0x42, 0x40,
-        0x46, 0, 0, 0,
-    ]
-
-    byte_string = bytes(bytearray(encoded_bytes))
-    img_in = constant_op.constant(byte_string, dtype=dtypes.string)
-    decode = array_ops.squeeze(image_ops.decode_bmp(img_in))
-
-    with self.test_session():
-      with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
-          "requires at least 32 bytes to find the header"):
-        decoded = decode.eval()
-
-  def testIncompleteBody(self):
-    # Encoded BMP bytes from Wikipedia
-    encoded_bytes = [
-        0x42, 0x40,
-        0x46, 0, 0, 0,
-        0, 0,
-        0, 0,
-        0x36, 0, 0, 0,
-        0x28, 0, 0, 0,
-        0x2, 0, 0, 0,
-        0x2, 0, 0, 0,
-        0x1, 0,
-        0x18, 0,
-        0, 0, 0, 0,
-        0x10, 0, 0, 0,
-        0x13, 0xb, 0, 0,
-        0x13, 0xb, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0, 0,
-        0, 0, 0xff,
-        0xff, 0xff, 0xff,
-        0, 0,
-    ]
-
-    byte_string = bytes(bytearray(encoded_bytes))
-    img_in = constant_op.constant(byte_string, dtype=dtypes.string)
-    decode = array_ops.squeeze(image_ops.decode_bmp(img_in))
-
-    with self.test_session():
-      with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
-          "requires at least 68 bytes, got 62 bytes"):
-        decoded = decode.eval()
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/decode_compressed_op_test.py b/tensorflow/python/kernel_tests/decode_compressed_op_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9bda58ca747670861b89917a4ef1cc14eac4132
--- /dev/null
+++ b/tensorflow/python/kernel_tests/decode_compressed_op_test.py
@@ -0,0 +1,73 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for DecodeRaw op from parsing_ops."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gzip
+import zlib
+
+from six import BytesIO
+
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import parsing_ops
+from tensorflow.python.platform import test
+
+
+class DecodeCompressedOpTest(test.TestCase):
+
+  def _compress(self, bytes_in, compression_type):
+    if not compression_type:
+      return bytes_in
+    elif compression_type == "ZLIB":
+      return zlib.compress(bytes_in)
+    else:
+      out = BytesIO()
+      with gzip.GzipFile(fileobj=out, mode="wb") as f:
+        f.write(bytes_in)
+      return out.getvalue()
+
+  def testDecompress(self):
+    for compression_type in ["ZLIB", "GZIP", ""]:
+      with self.test_session():
+        in_bytes = array_ops.placeholder(dtypes.string, shape=[2])
+        decompressed = parsing_ops.decode_compressed(
+            in_bytes, compression_type=compression_type)
+        self.assertEqual([2], decompressed.get_shape().as_list())
+
+        result = decompressed.eval(
+            feed_dict={in_bytes: [self._compress(b"AaAA", compression_type),
+                                  self._compress(b"bBbb", compression_type)]})
+        self.assertAllEqual([b"AaAA", b"bBbb"], result)
+
+  def testDecompressWithRaw(self):
+    for compression_type in ["ZLIB", "GZIP", ""]:
+      with self.test_session():
+        in_bytes = array_ops.placeholder(dtypes.string, shape=[None])
+        decompressed = parsing_ops.decode_compressed(
+            in_bytes, compression_type=compression_type)
+        decode = parsing_ops.decode_raw(decompressed, out_type=dtypes.int16)
+
+        result = decode.eval(
+            feed_dict={in_bytes: [self._compress(b"AaBC", compression_type)]})
+        self.assertAllEqual(
+            [[ord("A") + ord("a") * 256, ord("B") + ord("C") * 256]], result)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/kernel_tests/decode_raw_op_test.py b/tensorflow/python/kernel_tests/decode_raw_op_test.py
index 009f3ea4b311c6c30e917362b9561b170e3e2068..0c7025f54e672bb09e601715a58864673a670d12 100644
--- a/tensorflow/python/kernel_tests/decode_raw_op_test.py
+++ b/tensorflow/python/kernel_tests/decode_raw_op_test.py
@@ -90,8 +90,9 @@ class DecodeRawOpTest(test.TestCase):
       in_bytes = array_ops.placeholder(dtypes.string, shape=[None])
       decode = parsing_ops.decode_raw(in_bytes, out_type=dtypes.float16)
 
-      result = decode.eval(feed_dict={in_bytes: [""]})
-      self.assertEqual(len(result), 1)
+      for num_inputs in range(3):
+        result = decode.eval(feed_dict={in_bytes: [""] * num_inputs})
+        self.assertEqual((num_inputs, 0), result.shape)
 
   def testToUInt16(self):
     with self.test_session():
diff --git a/tensorflow/python/kernel_tests/distributions/kullback_leibler_test.py b/tensorflow/python/kernel_tests/distributions/kullback_leibler_test.py
index b1d8da771612fe42a153a1a11b6cb26bdcb983a0..d0fa1fe98996fd234f457bd0199fad5efc2547dc 100644
--- a/tensorflow/python/kernel_tests/distributions/kullback_leibler_test.py
+++ b/tensorflow/python/kernel_tests/distributions/kullback_leibler_test.py
@@ -59,13 +59,21 @@ class KLTest(test.TestCase):
     # pylint: disable=unused-argument,unused-variable
 
     with self.test_session():
-      a = MyDistException(loc=0.0, scale=1.0)
+      a = MyDistException(loc=0.0, scale=1.0, allow_nan_stats=False)
       kl = kullback_leibler.kl_divergence(a, a, allow_nan_stats=False)
       with self.assertRaisesOpError(
           "KL calculation between .* and .* returned NaN values"):
         kl.eval()
+      with self.assertRaisesOpError(
+          "KL calculation between .* and .* returned NaN values"):
+        a.kl_divergence(a).eval()
+      a = MyDistException(loc=0.0, scale=1.0, allow_nan_stats=True)
       kl_ok = kullback_leibler.kl_divergence(a, a)
       self.assertAllEqual([float("nan")], kl_ok.eval())
+      self_kl_ok = a.kl_divergence(a)
+      self.assertAllEqual([float("nan")], self_kl_ok.eval())
+      cross_ok = a.cross_entropy(a)
+      self.assertAllEqual([float("nan")], cross_ok.eval())
 
   def testRegistrationFailures(self):
 
@@ -86,16 +94,22 @@ class KLTest(test.TestCase):
     for (k, v) in _DIVERGENCES.items():
       self.assertEqual(v, _registered_kl(*k))
 
-  def testIndirectRegistration(self):
+  def _testIndirectRegistration(self, fn):
 
     class Sub1(normal.Normal):
-      pass
+
+      def entropy(self):
+        return ""
 
     class Sub2(normal.Normal):
-      pass
+
+      def entropy(self):
+        return ""
 
     class Sub11(Sub1):
-      pass
+
+      def entropy(self):
+        return ""
 
     # pylint: disable=unused-argument,unused-variable
     @kullback_leibler.RegisterKL(Sub1, Sub1)
@@ -116,16 +130,30 @@ class KLTest(test.TestCase):
     sub2 = Sub2(loc=0.0, scale=1.0)
     sub11 = Sub11(loc=0.0, scale=1.0)
 
-    self.assertEqual("sub1-1", kullback_leibler.kl_divergence(sub1, sub1))
-    self.assertEqual("sub1-2", kullback_leibler.kl_divergence(sub1, sub2))
-    self.assertEqual("sub2-1", kullback_leibler.kl_divergence(sub2, sub1))
-    self.assertEqual("sub1-1", kullback_leibler.kl_divergence(sub11, sub11))
-    self.assertEqual("sub1-1", kullback_leibler.kl_divergence(sub11, sub1))
-    self.assertEqual("sub1-2", kullback_leibler.kl_divergence(sub11, sub2))
-    self.assertEqual("sub1-1", kullback_leibler.kl_divergence(sub11, sub1))
-    self.assertEqual("sub1-2", kullback_leibler.kl_divergence(sub11, sub2))
-    self.assertEqual("sub2-1", kullback_leibler.kl_divergence(sub2, sub11))
-    self.assertEqual("sub1-1", kullback_leibler.kl_divergence(sub1, sub11))
+    self.assertEqual("sub1-1", fn(sub1, sub1))
+    self.assertEqual("sub1-2", fn(sub1, sub2))
+    self.assertEqual("sub2-1", fn(sub2, sub1))
+    self.assertEqual("sub1-1", fn(sub11, sub11))
+    self.assertEqual("sub1-1", fn(sub11, sub1))
+    self.assertEqual("sub1-2", fn(sub11, sub2))
+    self.assertEqual("sub1-1", fn(sub11, sub1))
+    self.assertEqual("sub1-2", fn(sub11, sub2))
+    self.assertEqual("sub2-1", fn(sub2, sub11))
+    self.assertEqual("sub1-1", fn(sub1, sub11))
+
+  def testIndirectRegistrationKLFun(self):
+    self._testIndirectRegistration(kullback_leibler.kl_divergence)
+
+  def testIndirectRegistrationKLSelf(self):
+    self._testIndirectRegistration(
+        lambda p, q: p.kl_divergence(q))
+
+  def testIndirectRegistrationCrossEntropy(self):
+    self._testIndirectRegistration(
+        lambda p, q: p.cross_entropy(q))
+
+  def testFunctionCrossEntropy(self):
+    self._testIndirectRegistration(kullback_leibler.cross_entropy)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/kernel_tests/distributions/util_test.py b/tensorflow/python/kernel_tests/distributions/util_test.py
index 8fd26a1c9afe0ab701db199147e2de7c3ded3211..f54f146e0ac102cf25d8a66f021e8c7af9901c93 100644
--- a/tensorflow/python/kernel_tests/distributions/util_test.py
+++ b/tensorflow/python/kernel_tests/distributions/util_test.py
@@ -25,6 +25,7 @@ import numpy as np
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradient_checker
 from tensorflow.python.ops import gradients_impl
@@ -55,6 +56,7 @@ def _logit(x):
   return np.log(x) - np.log1p(-x)
 
 
+@test_util.with_c_api
 class AssertCloseTest(test.TestCase):
 
   def testAssertCloseIntegerDtype(self):
@@ -145,6 +147,7 @@ class AssertCloseTest(test.TestCase):
           array_ops.identity(w).eval(feed_dict=feed_dict)
 
 
+@test_util.with_c_api
 class GetLogitsAndProbsTest(test.TestCase):
 
   def testImproperArguments(self):
@@ -298,6 +301,7 @@ class GetLogitsAndProbsTest(test.TestCase):
         logit.eval(feed_dict={l: np.ones([int(2**11+1)])})
 
 
+@test_util.with_c_api
 class EmbedCheckCategoricalEventShapeTest(test.TestCase):
 
   def testTooSmall(self):
@@ -335,6 +339,7 @@ class EmbedCheckCategoricalEventShapeTest(test.TestCase):
         du.embed_check_categorical_event_shape(param)
 
 
+@test_util.with_c_api
 class EmbedCheckIntegerCastingClosedTest(test.TestCase):
 
   def testCorrectlyAssertsNonnegative(self):
@@ -370,6 +375,7 @@ class EmbedCheckIntegerCastingClosedTest(test.TestCase):
         x_checked.eval(feed_dict={x: np.array([1, -1], dtype=np.int32)})
 
 
+@test_util.with_c_api
 class LogCombinationsTest(test.TestCase):
 
   def testLogCombinationsBinomial(self):
@@ -400,6 +406,7 @@ class LogCombinationsTest(test.TestCase):
       self.assertEqual([2, 2], log_binom.get_shape())
 
 
+@test_util.with_c_api
 class DynamicShapeTest(test.TestCase):
 
   def testSameDynamicShape(self):
@@ -504,6 +511,7 @@ class DynamicShapeTest(test.TestCase):
               }))
 
 
+@test_util.with_c_api
 class RotateTransposeTest(test.TestCase):
 
   def _np_rotate_transpose(self, x, shift):
@@ -537,6 +545,7 @@ class RotateTransposeTest(test.TestCase):
                                   shift: shift_value}))
 
 
+@test_util.with_c_api
 class PickVectorTest(test.TestCase):
 
   def testCorrectlyPicksVector(self):
@@ -557,6 +566,128 @@ class PickVectorTest(test.TestCase):
                               constant_op.constant(False), x, y))  # No eval.
 
 
+@test_util.with_c_api
+class PreferStaticRankTest(test.TestCase):
+
+  def testNonEmptyConstantTensor(self):
+    x = array_ops.zeros((2, 3, 4))
+    rank = du.prefer_static_rank(x)
+    self.assertIsInstance(rank, np.ndarray)
+    self.assertEqual(3, rank)
+
+  def testEmptyConstantTensor(self):
+    x = constant_op.constant([])
+    rank = du.prefer_static_rank(x)
+    self.assertIsInstance(rank, np.ndarray)
+    self.assertEqual(1, rank)
+
+  def testScalarTensor(self):
+    x = constant_op.constant(1.)
+    rank = du.prefer_static_rank(x)
+    self.assertIsInstance(rank, np.ndarray)
+    self.assertEqual(0, rank)
+
+  def testDynamicRankEndsUpBeingNonEmpty(self):
+    x = array_ops.placeholder(np.float64, shape=None)
+    rank = du.prefer_static_rank(x)
+    with self.test_session():
+      self.assertAllEqual(2, rank.eval(feed_dict={x: np.zeros((2, 3))}))
+
+  def testDynamicRankEndsUpBeingEmpty(self):
+    x = array_ops.placeholder(np.int32, shape=None)
+    rank = du.prefer_static_rank(x)
+    with self.test_session():
+      self.assertAllEqual(1, rank.eval(feed_dict={x: []}))
+
+  def testDynamicRankEndsUpBeingScalar(self):
+    x = array_ops.placeholder(np.int32, shape=None)
+    rank = du.prefer_static_rank(x)
+    with self.test_session():
+      self.assertAllEqual(0, rank.eval(feed_dict={x: 1}))
+
+
+@test_util.with_c_api
+class PreferStaticShapeTest(test.TestCase):
+
+  def testNonEmptyConstantTensor(self):
+    x = array_ops.zeros((2, 3, 4))
+    shape = du.prefer_static_shape(x)
+    self.assertIsInstance(shape, np.ndarray)
+    self.assertAllEqual(np.array([2, 3, 4]), shape)
+
+  def testEmptyConstantTensor(self):
+    x = constant_op.constant([])
+    shape = du.prefer_static_shape(x)
+    self.assertIsInstance(shape, np.ndarray)
+    self.assertAllEqual(np.array([0]), shape)
+
+  def testScalarTensor(self):
+    x = constant_op.constant(1.)
+    shape = du.prefer_static_shape(x)
+    self.assertIsInstance(shape, np.ndarray)
+    self.assertAllEqual(np.array([]), shape)
+
+  def testDynamicShapeEndsUpBeingNonEmpty(self):
+    x = array_ops.placeholder(np.float64, shape=None)
+    shape = du.prefer_static_shape(x)
+    with self.test_session():
+      self.assertAllEqual((2, 3), shape.eval(feed_dict={x: np.zeros((2, 3))}))
+
+  def testDynamicShapeEndsUpBeingEmpty(self):
+    x = array_ops.placeholder(np.int32, shape=None)
+    shape = du.prefer_static_shape(x)
+    with self.test_session():
+      self.assertAllEqual(np.array([0]), shape.eval(feed_dict={x: []}))
+
+  def testDynamicShapeEndsUpBeingScalar(self):
+    x = array_ops.placeholder(np.int32, shape=None)
+    shape = du.prefer_static_shape(x)
+    with self.test_session():
+      self.assertAllEqual(np.array([]), shape.eval(feed_dict={x: 1}))
+
+
+@test_util.with_c_api
+class PreferStaticValueTest(test.TestCase):
+
+  def testNonEmptyConstantTensor(self):
+    x = array_ops.zeros((2, 3, 4))
+    value = du.prefer_static_value(x)
+    self.assertIsInstance(value, np.ndarray)
+    self.assertAllEqual(np.zeros((2, 3, 4)), value)
+
+  def testEmptyConstantTensor(self):
+    x = constant_op.constant([])
+    value = du.prefer_static_value(x)
+    self.assertIsInstance(value, np.ndarray)
+    self.assertAllEqual(np.array([]), value)
+
+  def testScalarTensor(self):
+    x = constant_op.constant(1.)
+    value = du.prefer_static_value(x)
+    self.assertIsInstance(value, np.ndarray)
+    self.assertAllEqual(np.array(1.), value)
+
+  def testDynamicValueEndsUpBeingNonEmpty(self):
+    x = array_ops.placeholder(np.float64, shape=None)
+    value = du.prefer_static_value(x)
+    with self.test_session():
+      self.assertAllEqual(np.zeros((2, 3)),
+                          value.eval(feed_dict={x: np.zeros((2, 3))}))
+
+  def testDynamicValueEndsUpBeingEmpty(self):
+    x = array_ops.placeholder(np.int32, shape=None)
+    value = du.prefer_static_value(x)
+    with self.test_session():
+      self.assertAllEqual(np.array([]), value.eval(feed_dict={x: []}))
+
+  def testDynamicValueEndsUpBeingScalar(self):
+    x = array_ops.placeholder(np.int32, shape=None)
+    value = du.prefer_static_value(x)
+    with self.test_session():
+      self.assertAllEqual(np.array(1), value.eval(feed_dict={x: 1}))
+
+
+@test_util.with_c_api
 class FillTriangularTest(test.TestCase):
 
   def setUp(self):
@@ -587,7 +718,7 @@ class FillTriangularTest(test.TestCase):
     x_ = np.asarray(x_)
     with self.test_session() as sess:
       static_shape = None if use_deferred_shape else x_.shape
-      x_pl = array_ops.placeholder(dtype=x_.dtype, shape=static_shape)
+      x_pl = array_ops.placeholder_with_default(x_, shape=static_shape)
       # Add `zeros_like(x)` such that x's value and gradient are identical. We
       # do this so we can ensure each gradient value is mapped to the right
       # gradient location.  (Not doing this means the gradient wrt `x` is simple
@@ -651,6 +782,7 @@ class FillTriangularTest(test.TestCase):
     self._run_test(self._rng.randn(2, 3, int(7*8/2)), upper=True)
 
 
+@test_util.with_c_api
 class ReduceWeightedLogSumExp(test.TestCase):
 
   def _reduce_weighted_logsumexp(self, logx, w, axis, keep_dims=False):
@@ -747,6 +879,7 @@ class ReduceWeightedLogSumExp(test.TestCase):
           du.reduce_weighted_logsumexp(x, w, axis=[0, 1]).eval())
 
 
+@test_util.with_c_api
 class GenNewSeedTest(test.TestCase):
 
   def testOnlyNoneReturnsNone(self):
@@ -757,6 +890,7 @@ class GenNewSeedTest(test.TestCase):
 # TODO(jvdillon): Merge this test back into:
 # tensorflow/python/kernel_tests/softplus_op_test.py
 # once TF core is accepting new ops.
+@test_util.with_c_api
 class SoftplusTest(test.TestCase):
 
   def _npSoftplus(self, np_features):
diff --git a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
index b4fb5aa41175ba61ace0bff9a15d91ec4ee3ac55..fedbf9e696923a34968e7a907e4099c520d1447b 100644
--- a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
+++ b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
@@ -18,6 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import unittest
+
 import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
@@ -219,6 +221,7 @@ class DynamicPartitionTest(test.TestCase):
     self.assertAllEqual([], partition_vals[0])
     self.assertAllEqual([], partition_vals[1])
 
+  @unittest.skip("Fails on windows.")
   def testGPUTooManyParts(self):
     # This test only makes sense on the GPU. There we do not check
     # for errors. In this case, we should discard all but the first
@@ -239,6 +242,7 @@ class DynamicPartitionTest(test.TestCase):
     self.assertAllEqual([6], partition_vals[0])
     self.assertAllEqual([5], partition_vals[1])
 
+  @unittest.skip("Fails on windows.")
   def testGPUPartsTooLarge(self):
     # This test only makes sense on the GPU. There we do not check
     # for errors. In this case, we should discard all the values
@@ -262,6 +266,7 @@ class DynamicPartitionTest(test.TestCase):
     self.assertAllEqual([], partition_vals[3])
     self.assertAllEqual([], partition_vals[4])
 
+  @unittest.skip("Fails on windows.")
   def testGPUAllIndicesBig(self):
     # This test only makes sense on the GPU. There we do not check
     # for errors. In this case, we should discard all the values
diff --git a/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py b/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py
index 48a51c8072416f3d494129f18912d67491fa5281..feec9934e459590bb1dd0bc5c7cf40013d3d8b88 100644
--- a/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py
+++ b/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py
@@ -23,6 +23,8 @@ import math
 import numpy as np
 
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_nn_ops
 from tensorflow.python.ops import gradient_checker
 from tensorflow.python.ops import nn_ops
@@ -310,6 +312,35 @@ class FractionalAvgTest(test.TestCase):
     self._ValidateFractionalAvgPoolResult(rand_mat, [1, 2, 2, 1], pseudo_random,
                                           overlapping)
 
+  def testDifferentInputTensorShape(self):
+    """Runs the operation in one session with different input tensor shapes."""
+    with self.test_session() as sess:
+      input_holder = array_ops.placeholder(dtypes.float32,
+                                           [None, None, None, 3])
+      pooling_ratio = [1, 1.5, 1.5, 1]
+      pseudo_random = False
+      overlapping = False
+      p, r, c = nn_ops.fractional_avg_pool(
+          input_holder,
+          pooling_ratio,
+          pseudo_random,
+          overlapping,
+          deterministic=True,
+          seed=self._SEED,
+          seed2=self._SEED2)
+      # First run.
+      input_a = np.zeros([3, 32, 32, 3])
+      actual, row_seq, col_seq = sess.run([p, r, c], {input_holder: input_a})
+      expected = self._GetExpectedFractionalAvgPoolResult(
+          input_a, row_seq, col_seq, overlapping)
+      self.assertSequenceEqual(expected.shape, actual.shape)
+      # Second run.
+      input_b = np.zeros([4, 60, 60, 3])
+      actual, row_seq, col_seq = sess.run([p, r, c], {input_holder: input_b})
+      expected = self._GetExpectedFractionalAvgPoolResult(
+          input_b, row_seq, col_seq, overlapping)
+      self.assertSequenceEqual(expected.shape, actual.shape)
+
 
 class FractionalAvgPoolGradTest(test.TestCase):
   """Tests for FractionalAvgPoolGrad.
diff --git a/tensorflow/python/kernel_tests/fractional_max_pool_op_test.py b/tensorflow/python/kernel_tests/fractional_max_pool_op_test.py
index d380c31de35510c415420b3302fe1d4ff07877d2..5983ae7759dbf3eb2db9867def829ce8dbeb4b73 100644
--- a/tensorflow/python/kernel_tests/fractional_max_pool_op_test.py
+++ b/tensorflow/python/kernel_tests/fractional_max_pool_op_test.py
@@ -23,6 +23,8 @@ import math
 import numpy as np
 
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_nn_ops
 from tensorflow.python.ops import gradient_checker
 from tensorflow.python.ops import nn_ops
@@ -281,6 +283,35 @@ class FractionalMaxPoolTest(test.TestCase):
     self._ValidateFractionalMaxPoolResult(rand_mat, [1, 2, 2, 1], pseudo_random,
                                           overlapping)
 
+  def testDifferentInputTensorShape(self):
+    """Runs the operation in one session with different input tensor shapes."""
+    with self.test_session() as sess:
+      input_holder = array_ops.placeholder(dtypes.float32,
+                                           [None, None, None, 3])
+      pooling_ratio = [1, 1.5, 1.5, 1]
+      pseudo_random = False
+      overlapping = False
+      p, r, c = nn_ops.fractional_max_pool(
+          input_holder,
+          pooling_ratio,
+          pseudo_random,
+          overlapping,
+          deterministic=True,
+          seed=self._SEED,
+          seed2=self._SEED2)
+      # First run.
+      input_a = np.zeros([3, 32, 32, 3])
+      actual, row_seq, col_seq = sess.run([p, r, c], {input_holder: input_a})
+      expected = self._GetExpectedFractionalMaxPoolResult(
+          input_a, row_seq, col_seq, overlapping)
+      self.assertSequenceEqual(expected.shape, actual.shape)
+      # Second run.
+      input_b = np.zeros([4, 45, 45, 3])
+      actual, row_seq, col_seq = sess.run([p, r, c], {input_holder: input_b})
+      expected = self._GetExpectedFractionalMaxPoolResult(
+          input_b, row_seq, col_seq, overlapping)
+      self.assertSequenceEqual(expected.shape, actual.shape)
+
 
 class FractionalMaxPoolGradTest(test.TestCase):
   """Tests for FractionalMaxPoolGrad.
diff --git a/tensorflow/python/kernel_tests/gather_nd_op_test.py b/tensorflow/python/kernel_tests/gather_nd_op_test.py
index 5109ed98c92002917a5dfa3b4cd79953fd950af8..91ebe8de9921268b2a3c5ad645585e1fe83c7419 100644
--- a/tensorflow/python/kernel_tests/gather_nd_op_test.py
+++ b/tensorflow/python/kernel_tests/gather_nd_op_test.py
@@ -255,6 +255,35 @@ class GatherNdTest(test.TestCase):
     with self.test_session(use_gpu=True):
       self.assertAllEqual(expected_grads, grads.eval())
 
+  def testGradientsRank7Elements(self):
+    # Shape [1,1,2,1,1,2,2]
+    indices = constant_op.constant(
+        [[[
+            [[[[0, 0, 0, 0, 0, 1], [0, 0, 1, 0, 0, 0]]]],
+            [[[[0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 1]]]]
+        ]]],
+        dtype=dtypes.int32)
+    inputs = constant_op.constant(
+        [[[
+            [[[[1, 3], [5, 7]]]],
+            [[[[2, 4], [6, 8]]]]
+        ]]], dtype=dtypes.float64)
+    outputs = array_ops.gather_nd(inputs, indices)
+
+    grad_vals = constant_op.constant(
+        [[[
+            [[[[1, 2], [3, 4]]]],
+            [[[[5, 6], [7, 8]]]]
+        ]]], dtype=dtypes.float64)
+    grads = gradients_impl.gradients([outputs], [inputs], [grad_vals])[0]
+    expected_grads = np.array(
+        [[[
+            [[[[5, 6], [1, 2]]]],
+            [[[[3, 4], [7, 8]]]]
+        ]]], dtype=np.float64)
+    with self.test_session(use_gpu=True):
+      self.assertAllEqual(expected_grads, grads.eval())
+
   def testGradientsInt64Indices(self):
     indices = constant_op.constant(
         [[[0, 1], [1, 0]], [[0, 0], [1, 1]]], dtype=dtypes.int64)
diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py
index 157c0935403955d629b680eb6fc7627603ecbbf3..19a7d2f9d51fff46ee817ad03ef62383f6727791 100644
--- a/tensorflow/python/kernel_tests/init_ops_test.py
+++ b/tensorflow/python/kernel_tests/init_ops_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
@@ -146,6 +147,18 @@ class ConstantInitializersTest(test.TestCase):
       self.assertEqual(x.dtype.base_dtype, dtypes.int32)
       self.assertAllEqual(x.eval(), 7 * np.ones(shape, dtype=np.int32))
 
+  def testConstantTupleInitializer(self):
+    with self.test_session(use_gpu=True):
+      shape = [3]
+      x = variable_scope.get_variable(
+          "x",
+          shape=shape,
+          dtype=dtypes.int32,
+          initializer=init_ops.constant_initializer((10, 20, 30)))
+      x.initializer.run()
+      self.assertEqual(x.dtype.base_dtype, dtypes.int32)
+      self.assertAllEqual(x.eval(), [10, 20, 30])
+
   def _testNDimConstantInitializer(self, name, value, shape, expected):
     with self.test_session(use_gpu=True):
       init = init_ops.constant_initializer(value, dtype=dtypes.int32)
@@ -214,6 +227,16 @@ class ConstantInitializersTest(test.TestCase):
     self._testNDimConstantInitializerMoreValues(
         np.asarray(value).reshape(tuple([2, 4])), shape)
 
+  def testInvalidValueTypeForConstantInitializerCausesTypeError(self):
+    c = constant_op.constant([1.0, 2.0, 3.0])
+    with self.assertRaisesRegexp(
+        TypeError, r"Invalid type for initial value: .*Tensor.*"):
+      init_ops.constant_initializer(c, dtype=dtypes.float32)
+    v = variables.Variable([3.0, 2.0, 1.0])
+    with self.assertRaisesRegexp(
+        TypeError, r"Invalid type for initial value: .*Variable.*"):
+      init_ops.constant_initializer(v, dtype=dtypes.float32)
+
 
 class RandomNormalInitializationTest(test.TestCase):
 
diff --git a/tensorflow/python/kernel_tests/list_ops_test.py b/tensorflow/python/kernel_tests/list_ops_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..8fae044e2e1e8a92db898c97b4e7824564747f69
--- /dev/null
+++ b/tensorflow/python/kernel_tests/list_ops_test.py
@@ -0,0 +1,165 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for ops which manipulate lists of tensors."""
+
+# pylint: disable=g-bad-name
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np  # pylint: disable=unused-import
+
+from tensorflow.python.client import session
+from tensorflow.python.eager import backprop
+from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import list_ops
+from tensorflow.python.platform import test
+from tensorflow.python.training import server_lib
+
+
+def scalar_shape():
+  return ops.convert_to_tensor([], dtype=dtypes.int32)
+
+
+class ListOpsTest(test_util.TensorFlowTestCase):
+
+  def testPushPop(self):
+    l = list_ops.empty_tensor_list(element_dtype=dtypes.float32,
+                                   element_shape=scalar_shape())
+    l = list_ops.tensor_list_push_back(l, constant_op.constant(1.0))
+    l, e = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32)
+    self.assertAllEqual(e, 1.0)
+
+  def testPushPopGPU(self):
+    if not context.num_gpus():
+      return
+    with context.device("gpu:0"):
+      self.testPushPop()
+
+  def testStack(self):
+    l = list_ops.empty_tensor_list(element_dtype=dtypes.float32,
+                                   element_shape=scalar_shape())
+    l = list_ops.tensor_list_push_back(l, constant_op.constant(1.0))
+    l = list_ops.tensor_list_push_back(l, constant_op.constant(2.0))
+    t = list_ops.tensor_list_stack(l, element_dtype=dtypes.float32)
+    self.assertAllEqual(t, [1.0, 2.0])
+
+  def testStackGPU(self):
+    if not context.num_gpus():
+      return
+    with context.device("gpu:0"):
+      self.testStack()
+
+  def testTensorListFromTensor(self):
+    t = constant_op.constant([1.0, 2.0])
+    l = list_ops.tensor_list_from_tensor(t, element_shape=scalar_shape())
+    l, e = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32)
+    self.assertAllEqual(e, 2.0)
+    l, e = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32)
+    self.assertAllEqual(e, 1.0)
+    self.assertAllEqual(list_ops.tensor_list_length(l), 0)
+
+  def testFromTensorGPU(self):
+    if not context.num_gpus():
+      return
+    with context.device("gpu:0"):
+      self.testTensorListFromTensor()
+
+  def testUnknownShape(self):
+    l = list_ops.empty_tensor_list(element_dtype=dtypes.float32,
+                                   element_shape=-1)
+    l = list_ops.tensor_list_push_back(l, constant_op.constant(1.0))
+    l = list_ops.tensor_list_push_back(l, constant_op.constant([1.0, 2.0]))
+    _, e = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32)
+    self.assertAllEqual(e, [1.0, 2.0])
+
+  def testCPUGPUCopy(self):
+    if not context.num_gpus():
+      return
+    t = constant_op.constant([1.0, 2.0])
+    l = list_ops.tensor_list_from_tensor(t, element_shape=scalar_shape())
+    with context.device("gpu:0"):
+      l_gpu = array_ops.identity(l)
+      self.assertAllEqual(
+          list_ops.tensor_list_pop_back(
+              l_gpu, element_dtype=dtypes.float32)[1],
+          2.0)
+    l_cpu = array_ops.identity(l_gpu)
+    self.assertAllEqual(
+        list_ops.tensor_list_pop_back(
+            l_cpu, element_dtype=dtypes.float32)[1],
+        2.0)
+
+  def testSerialize(self):
+    # pylint: disable=g-import-not-at-top
+    try:
+      import portpicker
+    except ImportError:
+      return
+    with context.graph_mode():
+      worker_port = portpicker.pick_unused_port()
+      ps_port = portpicker.pick_unused_port()
+      cluster_dict = {
+          "worker": ["localhost:%s" % worker_port],
+          "ps": ["localhost:%s" % ps_port]
+      }
+      cs = server_lib.ClusterSpec(cluster_dict)
+
+      worker = server_lib.Server(
+          cs, job_name="worker", protocol="grpc", task_index=0, start=True)
+      unused_ps = server_lib.Server(
+          cs, job_name="ps", protocol="grpc", task_index=0, start=True)
+      with ops.Graph().as_default(), session.Session(target=worker.target):
+        with ops.device("/job:worker"):
+          t = constant_op.constant([[1.0], [2.0]])
+          l = list_ops.tensor_list_from_tensor(t, element_shape=[1])
+        with ops.device("/job:ps"):
+          l_ps = array_ops.identity(l)
+          l_ps, e = list_ops.tensor_list_pop_back(
+              l_ps, element_dtype=dtypes.float32)
+        with ops.device("/job:worker"):
+          worker_e = array_ops.identity(e)
+        self.assertAllEqual(worker_e.eval(), [2.0])
+
+  def testPushPopGradients(self):
+    with backprop.GradientTape() as tape:
+      l = list_ops.empty_tensor_list(element_dtype=dtypes.float32,
+                                     element_shape=scalar_shape())
+      c = constant_op.constant(1.0)
+      tape.watch(c)
+      l = list_ops.tensor_list_push_back(l, c)
+      l, e = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32)
+      e = 2 * e
+    self.assertAllEqual(tape.gradient(e, [c])[0], 2.0)
+
+  def testStackFromTensorGradients(self):
+    with backprop.GradientTape() as tape:
+      c = constant_op.constant([1.0, 2.0])
+      tape.watch(c)
+      l = list_ops.tensor_list_from_tensor(c, element_shape=scalar_shape())
+      c2 = list_ops.tensor_list_stack(
+          l, element_dtype=dtypes.float32)
+      result = c2 * 2.0
+    self.assertAllEqual(tape.gradient(result, [c])[0], [2.0, 2.0])
+
+
+if __name__ == "__main__":
+  ops.enable_eager_execution()
+  test.main()
diff --git a/tensorflow/python/kernel_tests/lookup_ops_test.py b/tensorflow/python/kernel_tests/lookup_ops_test.py
index e4c799cb1cfce35143b887feb9ae1af6455d7b25..5f08339fe5f15d12f08b0e18df03d612402d86f8 100644
--- a/tensorflow/python/kernel_tests/lookup_ops_test.py
+++ b/tensorflow/python/kernel_tests/lookup_ops_test.py
@@ -618,15 +618,20 @@ class IndexToStringTableFromFileTest(test.TestCase):
     return vocabulary_file
 
   def test_index_to_string_table(self):
-    vocabulary_file = self._createVocabFile("i2f_vocab1.txt")
-    with self.test_session():
-      table = lookup_ops.index_to_string_table_from_file(
-          vocabulary_file=vocabulary_file)
-      features = table.lookup(constant_op.constant([0, 1, 2, 3], dtypes.int64))
-      self.assertRaises(errors_impl.OpError, features.eval)
-      lookup_ops.tables_initializer().run()
-      self.assertAllEqual((b"brain", b"salad", b"surgery", b"UNK"),
-                          features.eval())
+    vocabulary_path = self._createVocabFile("i2f_vocab1.txt")
+    # vocabulary_file supports string and tensor
+    type_funcs = [str, constant_op.constant]
+    for type_func in type_funcs:
+      vocabulary_file = type_func(vocabulary_path)
+      with self.test_session():
+        table = lookup_ops.index_to_string_table_from_file(
+            vocabulary_file=vocabulary_file)
+        features = table.lookup(
+            constant_op.constant([0, 1, 2, 3], dtypes.int64))
+        self.assertRaises(errors_impl.OpError, features.eval)
+        lookup_ops.tables_initializer().run()
+        self.assertAllEqual((b"brain", b"salad", b"surgery", b"UNK"),
+                            features.eval())
 
   def test_index_to_string_table_from_multicolumn_file(self):
     vocabulary_file = self._createVocabFile(
diff --git a/tensorflow/python/kernel_tests/losses_test.py b/tensorflow/python/kernel_tests/losses_test.py
index da57f918ac286bb59e0525a02482b672dc40dc89..81af3a0887d09a7736a145a5b3c99c9391691724 100644
--- a/tensorflow/python/kernel_tests/losses_test.py
+++ b/tensorflow/python/kernel_tests/losses_test.py
@@ -1340,8 +1340,8 @@ class ComputeWeightedLossTest(test.TestCase):
               self.assertAllClose(
                   np.sum(self._raw_losses), unweighted_loss.eval())
             else:
-              # reduction one of losses.Reduction.MEAN and
-              # losses.Reduction.SUM_BY_NONZERO_WEIGHTS.
+              # reduction one of MEAN, SUM_OVER_NONZERO_WEIGHTS,
+              # SUM_BY_NONZERO_WEIGHTS or SUM_OVER_BATCH_SIZE.
               self.assertAllClose(
                   np.mean(self._raw_losses), unweighted_loss.eval())
 
@@ -1435,10 +1435,15 @@ class ComputeWeightedLossTest(test.TestCase):
               self.assertAllClose(
                   weighted_sum / np.sum(broadcast_weights),
                   weighted_loss.eval())
-            elif reduction == losses.Reduction.SUM_BY_NONZERO_WEIGHTS:
+            elif (reduction == losses.Reduction.SUM_OVER_NONZERO_WEIGHTS or
+                  reduction == losses.Reduction.SUM_BY_NONZERO_WEIGHTS):
               self.assertAllClose(
                   weighted_sum / np.count_nonzero(broadcast_weights),
                   weighted_loss.eval())
+            elif reduction == losses.Reduction.SUM_OVER_BATCH_SIZE:
+              self.assertAllClose(
+                  weighted_sum / self._raw_losses.size,
+                  weighted_loss.eval())
 
   def test1x1x1Weight(self):
     self._test_valid_weights((((17.0,),),))
diff --git a/tensorflow/python/kernel_tests/matrix_exponential_op_test.py b/tensorflow/python/kernel_tests/matrix_exponential_op_test.py
index c5a7a3ba99ba161c197643a3e3c5aed5d37e9d2b..6203a412d7faec4fe9f6179141301579b5900291 100644
--- a/tensorflow/python/kernel_tests/matrix_exponential_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_exponential_op_test.py
@@ -46,10 +46,8 @@ def np_expm(x):
 class ExponentialOpTest(test.TestCase):
 
   def _verifyExponential(self, x, np_type):
-    # TODO(pfau): add matrix logarithm and test that it is inverse of expm.
     inp = x.astype(np_type)
     with self.test_session(use_gpu=True):
-      # Verify that x^{-1} * x == Identity matrix.
       tf_ans = gen_linalg_ops._matrix_exponential(inp)
       if x.size == 0:
         np_ans = np.empty(x.shape, dtype=np_type)
@@ -121,7 +119,7 @@ class ExponentialOpTest(test.TestCase):
       gen_linalg_ops._matrix_exponential(np.array([[1., 2., 3.], [3., 4., 5.]]))
 
   def testWrongDimensions(self):
-    # The input to the inverse should be at least a 2-dimensional tensor.
+    # The input to the exponential should be at least a 2-dimensional tensor.
     tensor3 = constant_op.constant([1., 2.])
     with self.assertRaises(ValueError):
       gen_linalg_ops._matrix_exponential(tensor3)
diff --git a/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py b/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..18ed59828c15f5ad21fe054cd6e40991c02bb356
--- /dev/null
+++ b/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py
@@ -0,0 +1,166 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for tensorflow.ops.gen_linalg_ops.matrix_logarithm."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+import numpy as np
+
+from tensorflow.python.client import session
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gen_linalg_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+
+class LogarithmOpTest(test.TestCase):
+
+  def _verifyLogarithm(self, x, np_type):
+    inp = x.astype(np_type)
+    with self.test_session(use_gpu=True):
+      # Verify that expm(logm(A)) == A.
+      tf_ans = gen_linalg_ops._matrix_exponential(
+          gen_linalg_ops._matrix_logarithm(inp))
+      out = tf_ans.eval()
+      self.assertAllClose(inp, out, rtol=1e-4, atol=1e-3)
+
+  def _verifyLogarithmComplex(self, x):
+    for np_type in [np.complex64, np.complex128]:
+      self._verifyLogarithm(x, np_type)
+
+  def _makeBatch(self, matrix1, matrix2):
+    matrix_batch = np.concatenate(
+        [np.expand_dims(matrix1, 0),
+         np.expand_dims(matrix2, 0)])
+    matrix_batch = np.tile(matrix_batch, [2, 3, 1, 1])
+    return matrix_batch
+
+  def testNonsymmetric(self):
+    # 2x2 matrices
+    matrix1 = np.array([[1., 2.], [3., 4.]])
+    matrix2 = np.array([[1., 3.], [3., 5.]])
+    matrix1 = matrix1.astype(np.complex64)
+    matrix1 += 1j * matrix1
+    matrix2 = matrix2.astype(np.complex64)
+    matrix2 += 1j * matrix2
+    self._verifyLogarithmComplex(matrix1)
+    self._verifyLogarithmComplex(matrix2)
+    # Complex batch
+    self._verifyLogarithmComplex(self._makeBatch(matrix1, matrix2))
+
+  def testSymmetricPositiveDefinite(self):
+    # 2x2 matrices
+    matrix1 = np.array([[2., 1.], [1., 2.]])
+    matrix2 = np.array([[3., -1.], [-1., 3.]])
+    matrix1 = matrix1.astype(np.complex64)
+    matrix1 += 1j * matrix1
+    matrix2 = matrix2.astype(np.complex64)
+    matrix2 += 1j * matrix2
+    self._verifyLogarithmComplex(matrix1)
+    self._verifyLogarithmComplex(matrix2)
+    # Complex batch
+    self._verifyLogarithmComplex(self._makeBatch(matrix1, matrix2))
+
+  def testNonSquareMatrix(self):
+    # When the logarithm of a non-square matrix is attempted we should return
+    # an error
+    with self.assertRaises(ValueError):
+      gen_linalg_ops._matrix_logarithm(
+          np.array([[1., 2., 3.], [3., 4., 5.]], dtype=np.complex64))
+
+  def testWrongDimensions(self):
+    # The input to the logarithm should be at least a 2-dimensional tensor.
+    tensor3 = constant_op.constant([1., 2.], dtype=dtypes.complex64)
+    with self.assertRaises(ValueError):
+      gen_linalg_ops._matrix_logarithm(tensor3)
+
+  def testEmpty(self):
+    self._verifyLogarithmComplex(np.empty([0, 2, 2], dtype=np.complex64))
+    self._verifyLogarithmComplex(np.empty([2, 0, 0], dtype=np.complex64))
+
+  def testRandomSmallAndLarge(self):
+    np.random.seed(42)
+    for dtype in np.complex64, np.complex128:
+      for batch_dims in [(), (1,), (3,), (2, 2)]:
+        for size in 8, 31, 32:
+          shape = batch_dims + (size, size)
+          matrix = np.random.uniform(
+              low=-1.0, high=1.0,
+              size=np.prod(shape)).reshape(shape).astype(dtype)
+          self._verifyLogarithmComplex(matrix)
+
+  def testConcurrentExecutesWithoutError(self):
+    with self.test_session(use_gpu=True) as sess:
+      matrix1 = math_ops.cast(
+          random_ops.random_normal([5, 5], seed=42), dtypes.complex64)
+      matrix2 = math_ops.cast(
+          random_ops.random_normal([5, 5], seed=42), dtypes.complex64)
+      logm1 = gen_linalg_ops._matrix_logarithm(matrix1)
+      logm2 = gen_linalg_ops._matrix_logarithm(matrix2)
+      logm = sess.run([logm1, logm2])
+      self.assertAllEqual(logm[0], logm[1])
+
+
+class MatrixLogarithmBenchmark(test.Benchmark):
+
+  shapes = [
+      (4, 4),
+      (10, 10),
+      (16, 16),
+      (101, 101),
+      (256, 256),
+      (1000, 1000),
+      (1024, 1024),
+      (2048, 2048),
+      (513, 4, 4),
+      (513, 16, 16),
+      (513, 256, 256),
+  ]
+
+  def _GenerateMatrix(self, shape):
+    batch_shape = shape[:-2]
+    shape = shape[-2:]
+    assert shape[0] == shape[1]
+    n = shape[0]
+    matrix = np.ones(shape).astype(np.complex64) / (
+        2.0 * n) + np.diag(np.ones(n).astype(np.complex64))
+    return variables.Variable(np.tile(matrix, batch_shape + (1, 1)))
+
+  def benchmarkMatrixLogarithmOp(self):
+    for shape in self.shapes:
+      with ops.Graph().as_default(), \
+          session.Session() as sess, \
+          ops.device("/cpu:0"):
+        matrix = self._GenerateMatrix(shape)
+        logm = gen_linalg_ops._matrix_logarithm(matrix)
+        variables.global_variables_initializer().run()
+        self.run_op_benchmark(
+            sess,
+            control_flow_ops.group(logm),
+            min_iters=25,
+            name="matrix_logarithm_cpu_{shape}".format(
+                shape=shape))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/kernel_tests/parse_single_example_op_test.py b/tensorflow/python/kernel_tests/parse_single_example_op_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..bf4c89b368c98c54001ad533dec847c41f5b3e43
--- /dev/null
+++ b/tensorflow/python/kernel_tests/parse_single_example_op_test.py
@@ -0,0 +1,937 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for tensorflow.ops.parsing_ops."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import itertools
+
+import numpy as np
+
+from tensorflow.core.example import example_pb2
+from tensorflow.core.example import feature_pb2
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors_impl
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import parsing_ops
+from tensorflow.python.platform import test
+from tensorflow.python.platform import tf_logging
+
+# Helpers for creating Example objects
+example = example_pb2.Example
+feature = feature_pb2.Feature
+features = lambda d: feature_pb2.Features(feature=d)
+bytes_feature = lambda v: feature(bytes_list=feature_pb2.BytesList(value=v))
+int64_feature = lambda v: feature(int64_list=feature_pb2.Int64List(value=v))
+float_feature = lambda v: feature(float_list=feature_pb2.FloatList(value=v))
+# Helpers for creating SequenceExample objects
+feature_list = lambda l: feature_pb2.FeatureList(feature=l)
+feature_lists = lambda d: feature_pb2.FeatureLists(feature_list=d)
+sequence_example = example_pb2.SequenceExample
+
+
+def empty_sparse(dtype, shape=None):
+  if shape is None:
+    shape = [0]
+  return (np.empty(shape=(0, len(shape)), dtype=np.int64),
+          np.array([], dtype=dtype), np.array(shape, dtype=np.int64))
+
+
+def flatten(list_of_lists):
+  """Flatten one level of nesting."""
+  return itertools.chain.from_iterable(list_of_lists)
+
+
+def flatten_values_tensors_or_sparse(tensors_list):
+  """Flatten each SparseTensor object into 3 Tensors for session.run()."""
+  return list(
+      flatten([[v.indices, v.values, v.dense_shape] if isinstance(
+          v, sparse_tensor.SparseTensor) else [v] for v in tensors_list]))
+
+
+def _compare_output_to_expected(tester, dict_tensors, expected_tensors,
+                                flat_output):
+  tester.assertEqual(set(dict_tensors.keys()), set(expected_tensors.keys()))
+
+  i = 0  # Index into the flattened output of session.run()
+  for k, v in dict_tensors.items():
+    expected_v = expected_tensors[k]
+    tf_logging.info("Comparing key: %s", k)
+    if isinstance(v, sparse_tensor.SparseTensor):
+      # Three outputs for SparseTensor : indices, values, shape.
+      tester.assertEqual([k, len(expected_v)], [k, 3])
+      tester.assertAllEqual(expected_v[0], flat_output[i])
+      tester.assertAllEqual(expected_v[1], flat_output[i + 1])
+      tester.assertAllEqual(expected_v[2], flat_output[i + 2])
+      i += 3
+    else:
+      # One output for standard Tensor.
+      tester.assertAllEqual(expected_v, flat_output[i])
+      i += 1
+
+
+class ParseExampleTest(test.TestCase):
+
+  def _test(self, kwargs, expected_values=None, expected_err=None):
+    with self.test_session() as sess:
+      if expected_err:
+        with self.assertRaisesWithPredicateMatch(expected_err[0],
+                                                 expected_err[1]):
+          out = parsing_ops.parse_single_example(**kwargs)
+          sess.run(flatten_values_tensors_or_sparse(out.values()))
+        return
+      else:
+        # Returns dict w/ Tensors and SparseTensors.
+        out = parsing_ops.parse_single_example(**kwargs)
+        # Also include a test with the example names specified to retain
+        # code coverage of the unfused version, and ensure that the two
+        # versions produce the same results.
+        out_with_example_name = parsing_ops.parse_single_example(
+            example_names="name", **kwargs)
+        for result_dict in [out, out_with_example_name]:
+          result = flatten_values_tensors_or_sparse(result_dict.values())
+          # Check values.
+          tf_result = sess.run(result)
+          _compare_output_to_expected(self, result_dict, expected_values,
+                                      tf_result)
+
+      for k, f in kwargs["features"].items():
+        if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None:
+          self.assertEqual(tuple(out[k].get_shape().as_list()), f.shape)
+        elif isinstance(f, parsing_ops.VarLenFeature):
+          self.assertEqual(
+              tuple(out[k].indices.get_shape().as_list()), (None, 1))
+          self.assertEqual(tuple(out[k].values.get_shape().as_list()), (None,))
+          self.assertEqual(
+              tuple(out[k].dense_shape.get_shape().as_list()), (1,))
+
+  def testEmptySerializedWithAllDefaults(self):
+    sparse_name = "st_a"
+    a_name = "a"
+    b_name = "b"
+    c_name = "c:has_a_tricky_name"
+    a_default = [0, 42, 0]
+    b_default = np.random.rand(3, 3).astype(bytes)
+    c_default = np.random.rand(2).astype(np.float32)
+
+    expected_st_a = (  # indices, values, shape
+        np.empty((0, 1), dtype=np.int64),  # indices
+        np.empty((0,), dtype=np.int64),  # sp_a is DT_INT64
+        np.array([0], dtype=np.int64))  # max_elems = 0
+
+    expected_output = {
+        sparse_name: expected_st_a,
+        a_name: np.array([a_default]),
+        b_name: np.array(b_default),
+        c_name: np.array(c_default),
+    }
+
+    self._test({
+        "serialized": ops.convert_to_tensor(""),
+        "features": {
+            sparse_name:
+                parsing_ops.VarLenFeature(dtypes.int64),
+            a_name:
+                parsing_ops.FixedLenFeature(
+                    (1, 3), dtypes.int64, default_value=a_default),
+            b_name:
+                parsing_ops.FixedLenFeature(
+                    (3, 3), dtypes.string, default_value=b_default),
+            c_name:
+                parsing_ops.FixedLenFeature(
+                    (2,), dtypes.float32, default_value=c_default),
+        }
+    }, expected_output)
+
+  def testEmptySerializedWithoutDefaultsShouldFail(self):
+    input_features = {
+        "st_a":
+            parsing_ops.VarLenFeature(dtypes.int64),
+        "a":
+            parsing_ops.FixedLenFeature(
+                (1, 3), dtypes.int64, default_value=[0, 42, 0]),
+        "b":
+            parsing_ops.FixedLenFeature(
+                (3, 3),
+                dtypes.string,
+                default_value=np.random.rand(3, 3).astype(bytes)),
+        # Feature "c" is missing a default, this gap will cause failure.
+        "c":
+            parsing_ops.FixedLenFeature(
+                (2,), dtype=dtypes.float32),
+    }
+
+    # Edge case where the key is there but the feature value is empty
+    original = example(features=features({"c": feature()}))
+    self._test(
+        {
+            "serialized": original.SerializeToString(),
+            "features": input_features,
+        },
+        expected_err=(errors_impl.OpError,
+                      "Feature: c \\(data type: float\\) is required"))
+
+    # Standard case of missing key and value.
+    self._test(
+        {
+            "serialized": "",
+            "features": input_features,
+        },
+        expected_err=(errors_impl.OpError,
+                      "Feature: c \\(data type: float\\) is required"))
+
+  def testDenseNotMatchingShapeShouldFail(self):
+    original = example(features=features({
+        "a": float_feature([-1, -1]),
+    }))
+
+    serialized = original.SerializeToString()
+
+    self._test(
+        {
+            "serialized": ops.convert_to_tensor(serialized),
+            "features": {
+                "a": parsing_ops.FixedLenFeature((1, 3), dtypes.float32)
+            }
+        },
+        # TODO(mrry): Consider matching the `tf.parse_example()` error message.
+        expected_err=(errors_impl.OpError, "Key: a."))
+
+  def testDenseDefaultNoShapeShouldFail(self):
+    original = example(features=features({
+        "a": float_feature([1, 1, 3]),
+    }))
+
+    serialized = original.SerializeToString()
+
+    self._test(
+        {
+            "serialized": ops.convert_to_tensor(serialized),
+            "features": {
+                "a": parsing_ops.FixedLenFeature(None, dtypes.float32)
+            }
+        },
+        expected_err=(ValueError, "Missing shape for feature a"))
+
+  def testSerializedContainingSparse(self):
+    original = [
+        example(features=features({
+            "st_c": float_feature([3, 4])
+        })),
+        example(features=features({
+            "st_c": float_feature([]),  # empty float list
+        })),
+        example(features=features({
+            "st_d": feature(),  # feature with nothing in it
+        })),
+        example(features=features({
+            "st_c": float_feature([1, 2, -1]),
+            "st_d": bytes_feature([b"hi"])
+        }))
+    ]
+
+    expected_outputs = [{
+        "st_c": (np.array([[0], [1]], dtype=np.int64),
+                 np.array([3.0, 4.0], dtype=np.float32),
+                 np.array([2], dtype=np.int64)),
+        "st_d":
+            empty_sparse(bytes)
+    }, {
+        "st_c": empty_sparse(np.float32),
+        "st_d": empty_sparse(bytes)
+    }, {
+        "st_c": empty_sparse(np.float32),
+        "st_d": empty_sparse(bytes)
+    }, {
+        "st_c": (np.array([[0], [1], [2]], dtype=np.int64),
+                 np.array([1.0, 2.0, -1.0], dtype=np.float32),
+                 np.array([3], dtype=np.int64)),
+        "st_d": (np.array([[0]], dtype=np.int64), np.array(["hi"], dtype=bytes),
+                 np.array([1], dtype=np.int64))
+    }]
+
+    for proto, expected_output in zip(original, expected_outputs):
+      self._test({
+          "serialized": ops.convert_to_tensor(proto.SerializeToString()),
+          "features": {
+              "st_c": parsing_ops.VarLenFeature(dtypes.float32),
+              "st_d": parsing_ops.VarLenFeature(dtypes.string)
+          },
+      }, expected_output)
+
+  def testSerializedContainingSparseFeature(self):
+    original = [
+        example(features=features({
+            "val": float_feature([3, 4]),
+            "idx": int64_feature([5, 10])
+        })),
+        example(features=features({
+            "val": float_feature([]),  # empty float list
+            "idx": int64_feature([])
+        })),
+        example(features=features({
+            "val": feature(),  # feature with nothing in it
+            # missing idx feature
+        })),
+        example(features=features({
+            "val": float_feature([1, 2, -1]),
+            "idx":
+                int64_feature([0, 9, 3])  # unsorted
+        }))
+    ]
+
+    expected_outputs = [{
+        "sp": (np.array([[5], [10]], dtype=np.int64),
+               np.array([3.0, 4.0], dtype=np.float32),
+               np.array([13], dtype=np.int64))
+    }, {
+        "sp": empty_sparse(np.float32, shape=[13])
+    }, {
+        "sp": empty_sparse(np.float32, shape=[13])
+    }, {
+        "sp": (np.array([[0], [3], [9]], dtype=np.int64),
+               np.array([1.0, -1.0, 2.0], dtype=np.float32),
+               np.array([13], dtype=np.int64))
+    }]
+
+    for proto, expected_output in zip(original, expected_outputs):
+      self._test({
+          "serialized": ops.convert_to_tensor(proto.SerializeToString()),
+          "features": {
+              "sp":
+                  parsing_ops.SparseFeature(["idx"], "val", dtypes.float32,
+                                            [13])
+          }
+      }, expected_output)
+
+  def testSerializedContainingSparseFeatureReuse(self):
+    original = [
+        example(features=features({
+            "val1": float_feature([3, 4]),
+            "val2": float_feature([5, 6]),
+            "idx": int64_feature([5, 10])
+        })),
+        example(features=features({
+            "val1": float_feature([]),  # empty float list
+            "idx": int64_feature([])
+        })),
+    ]
+
+    expected_outputs = [{
+        "sp1": (np.array([[5], [10]], dtype=np.int64),
+                np.array([3.0, 4.0], dtype=np.float32),
+                np.array([13], dtype=np.int64)),
+        "sp2": (np.array([[5], [10]], dtype=np.int64),
+                np.array([5.0, 6.0], dtype=np.float32),
+                np.array([7], dtype=np.int64))
+    }, {
+        "sp1": empty_sparse(np.float32, shape=[13]),
+        "sp2": empty_sparse(np.float32, shape=[7])
+    }]
+
+    for proto, expected_output in zip(original, expected_outputs):
+      self._test({
+          "serialized": ops.convert_to_tensor(proto.SerializeToString()),
+          "features": {
+              "sp1":
+                  parsing_ops.SparseFeature("idx", "val1", dtypes.float32, 13),
+              "sp2":
+                  parsing_ops.SparseFeature(
+                      "idx",
+                      "val2",
+                      dtypes.float32,
+                      size=7,
+                      already_sorted=True)
+          }
+      }, expected_output)
+
+  def testSerializedContaining3DSparseFeature(self):
+    original = [
+        example(features=features({
+            "val": float_feature([3, 4]),
+            "idx0": int64_feature([5, 10]),
+            "idx1": int64_feature([0, 2]),
+        })),
+        example(features=features({
+            "val": float_feature([]),  # empty float list
+            "idx0": int64_feature([]),
+            "idx1": int64_feature([]),
+        })),
+        example(features=features({
+            "val": feature(),  # feature with nothing in it
+            # missing idx feature
+        })),
+        example(features=features({
+            "val": float_feature([1, 2, -1]),
+            "idx0": int64_feature([0, 9, 3]),  # unsorted
+            "idx1": int64_feature([1, 0, 2]),
+        }))
+    ]
+
+    expected_outputs = [{
+        "sp": (np.array([[5, 0], [10, 2]], dtype=np.int64),
+               np.array([3.0, 4.0], dtype=np.float32),
+               np.array([13, 3], dtype=np.int64))
+    }, {
+        "sp": empty_sparse(np.float32, shape=[13, 3])
+    }, {
+        "sp": empty_sparse(np.float32, shape=[13, 3])
+    }, {
+        "sp": (np.array([[0, 1], [3, 2], [9, 0]], dtype=np.int64),
+               np.array([1.0, -1.0, 2.0], dtype=np.float32),
+               np.array([13, 3], dtype=np.int64))
+    }]
+
+    for proto, expected_output in zip(original, expected_outputs):
+      self._test({
+          "serialized": ops.convert_to_tensor(proto.SerializeToString()),
+          "features": {
+              "sp":
+                  parsing_ops.SparseFeature(["idx0", "idx1"], "val",
+                                            dtypes.float32, [13, 3])
+          }
+      }, expected_output)
+
+  def testSerializedContainingDense(self):
+    aname = "a"
+    bname = "b*has+a:tricky_name"
+    original = [
+        example(features=features({
+            aname: float_feature([1, 1]),
+            bname: bytes_feature([b"b0_str"]),
+        })), example(features=features({
+            aname: float_feature([-1, -1]),
+            bname: bytes_feature([b""]),
+        }))
+    ]
+
+    expected_outputs = [{
+        aname: np.array([1, 1], dtype=np.float32).reshape(1, 2, 1),
+        bname: np.array(["b0_str"], dtype=bytes).reshape(1, 1, 1, 1)
+    }, {
+        aname: np.array([-1, -1], dtype=np.float32).reshape(1, 2, 1),
+        bname: np.array([""], dtype=bytes).reshape(1, 1, 1, 1)
+    }]
+
+    for proto, expected_output in zip(original, expected_outputs):
+      # No defaults, values required
+      self._test({
+          "serialized": ops.convert_to_tensor(proto.SerializeToString()),
+          "features": {
+              aname:
+                  parsing_ops.FixedLenFeature((1, 2, 1), dtype=dtypes.float32),
+              bname:
+                  parsing_ops.FixedLenFeature(
+                      (1, 1, 1, 1), dtype=dtypes.string),
+          }
+      }, expected_output)
+
+  # This test is identical as the previous one except
+  # for the creation of 'serialized'.
+  def testSerializedContainingDenseWithConcat(self):
+    aname = "a"
+    bname = "b*has+a:tricky_name"
+    # TODO(lew): Feature appearing twice should be an error in future.
+    original = [
+        (example(features=features({
+            aname: float_feature([10, 10]),
+        })), example(features=features({
+            aname: float_feature([1, 1]),
+            bname: bytes_feature([b"b0_str"]),
+        }))),
+        (
+            example(features=features({
+                bname: bytes_feature([b"b100"]),
+            })),
+            example(features=features({
+                aname: float_feature([-1, -1]),
+                bname: bytes_feature([b"b1"]),
+            })),),
+    ]
+
+    expected_outputs = [{
+        aname: np.array([1, 1], dtype=np.float32).reshape(1, 2, 1),
+        bname: np.array(["b0_str"], dtype=bytes).reshape(1, 1, 1, 1)
+    }, {
+        aname: np.array([-1, -1], dtype=np.float32).reshape(1, 2, 1),
+        bname: np.array(["b1"], dtype=bytes).reshape(1, 1, 1, 1)
+    }]
+
+    for (m, n), expected_output in zip(original, expected_outputs):
+      # No defaults, values required
+      self._test({
+          "serialized":
+              ops.convert_to_tensor(
+                  m.SerializeToString() + n.SerializeToString()),
+          "features": {
+              aname:
+                  parsing_ops.FixedLenFeature((1, 2, 1), dtype=dtypes.float32),
+              bname:
+                  parsing_ops.FixedLenFeature(
+                      (1, 1, 1, 1), dtype=dtypes.string),
+          }
+      }, expected_output)
+
+  def testSerializedContainingDenseScalar(self):
+    original = [
+        example(features=features({
+            "a": float_feature([1]),
+        })), example(features=features({}))
+    ]
+
+    expected_outputs = [{
+        "a": np.array([1], dtype=np.float32)
+    }, {
+        "a": np.array([-1], dtype=np.float32)
+    }]
+
+    for proto, expected_output in zip(original, expected_outputs):
+      self._test({
+          "serialized": ops.convert_to_tensor(proto.SerializeToString()),
+          "features": {
+              "a":
+                  parsing_ops.FixedLenFeature(
+                      (1,), dtype=dtypes.float32, default_value=-1),
+          }
+      }, expected_output)
+
+  def testSerializedContainingDenseWithDefaults(self):
+    original = [
+        example(features=features({
+            "a": float_feature([1, 1]),
+        })),
+        example(features=features({
+            "b": bytes_feature([b"b1"]),
+        })),
+        example(features=features({
+            "b": feature()
+        })),
+    ]
+
+    expected_outputs = [{
+        "a": np.array([1, 1], dtype=np.float32).reshape(1, 2, 1),
+        "b": np.array("tmp_str", dtype=bytes).reshape(1, 1, 1, 1)
+    }, {
+        "a": np.array([3, -3], dtype=np.float32).reshape(1, 2, 1),
+        "b": np.array("b1", dtype=bytes).reshape(1, 1, 1, 1)
+    }, {
+        "a": np.array([3, -3], dtype=np.float32).reshape(1, 2, 1),
+        "b": np.array("tmp_str", dtype=bytes).reshape(1, 1, 1, 1)
+    }]
+
+    for proto, expected_output in zip(original, expected_outputs):
+      self._test({
+          "serialized": ops.convert_to_tensor(proto.SerializeToString()),
+          "features": {
+              "a":
+                  parsing_ops.FixedLenFeature(
+                      (1, 2, 1),
+                      dtype=dtypes.float32,
+                      default_value=[3.0, -3.0]),
+              "b":
+                  parsing_ops.FixedLenFeature(
+                      (1, 1, 1, 1),
+                      dtype=dtypes.string,
+                      default_value="tmp_str"),
+          }
+      }, expected_output)
+
+  def testSerializedContainingSparseAndSparseFeatureAndDenseWithNoDefault(self):
+    original = [
+        example(features=features({
+            "c": float_feature([3, 4]),
+            "val": bytes_feature([b"a", b"b"]),
+            "idx": int64_feature([0, 3])
+        })), example(features=features({
+            "c": float_feature([1, 2]),
+            "val": bytes_feature([b"c"]),
+            "idx": int64_feature([7])
+        }))
+    ]
+
+    a_default = np.array([[1, 2, 3]], dtype=np.int64)
+    b_default = np.random.rand(3, 3).astype(bytes)
+
+    expected_st_a = empty_sparse(np.int64)
+
+    expected_outputs = [{
+        "st_a":
+            expected_st_a,
+        "sp": (np.array([[0], [3]], dtype=np.int64),
+               np.array(["a", "b"], dtype=bytes), np.array(
+                   [13], dtype=np.int64)),
+        "a":
+            a_default,
+        "b":
+            b_default,
+        "c":
+            np.array([3, 4], dtype=np.float32)
+    }, {
+        "st_a":
+            expected_st_a,
+        "sp": (np.array([[7]], dtype=np.int64), np.array(["c"], dtype=bytes),
+               np.array([13], dtype=np.int64)),
+        "a":
+            a_default,
+        "b":
+            b_default,
+        "c":
+            np.array([1, 2], dtype=np.float32)
+    }]
+
+    for proto, expected_output in zip(original, expected_outputs):
+      self._test(
+          {
+              "serialized": ops.convert_to_tensor(proto.SerializeToString()),
+              "features": {
+                  "st_a":
+                      parsing_ops.VarLenFeature(dtypes.int64),
+                  "sp":
+                      parsing_ops.SparseFeature("idx", "val", dtypes.string, 13
+                                               ),
+                  "a":
+                      parsing_ops.FixedLenFeature(
+                          (1, 3), dtypes.int64, default_value=a_default),
+                  "b":
+                      parsing_ops.FixedLenFeature(
+                          (3, 3), dtypes.string, default_value=b_default),
+                  # Feature "c" must be provided, since it has no default_value.
+                  "c":
+                      parsing_ops.FixedLenFeature((2,), dtypes.float32),
+              }
+          },
+          expected_output)
+
+  def testSerializedContainingSparseAndSparseFeatureWithReuse(self):
+    original = [
+        example(features=features({
+            "val": bytes_feature([b"a", b"b"]),
+            "idx": int64_feature([0, 3])
+        })), example(features=features({
+            "val": bytes_feature([b"c", b"d"]),
+            "idx": int64_feature([7, 1])
+        }))
+    ]
+
+    expected_outputs = [{
+        "idx": (np.array([[0], [1]], dtype=np.int64),
+                np.array([0, 3], dtype=np.int64), np.array([2],
+                                                           dtype=np.int64)),
+        "sp": (np.array([[0], [3]], dtype=np.int64),
+               np.array(["a", "b"], dtype=bytes), np.array(
+                   [13], dtype=np.int64))
+    },
+                        {
+                            "idx": (np.array([[0], [1]], dtype=np.int64),
+                                    np.array([7, 1], dtype=np.int64),
+                                    np.array([2], dtype=np.int64)),
+                            "sp": (np.array([[1], [7]], dtype=np.int64),
+                                   np.array(["d", "c"], dtype=bytes),
+                                   np.array([13], dtype=np.int64))
+                        }]
+
+    for proto, expected_output in zip(original, expected_outputs):
+      self._test({
+          "serialized": ops.convert_to_tensor(proto.SerializeToString()),
+          "features": {
+              "idx":
+                  parsing_ops.VarLenFeature(dtypes.int64),
+              "sp":
+                  parsing_ops.SparseFeature(["idx"], "val", dtypes.string, [13]
+                                           ),
+          }
+      }, expected_output)
+
+  def testSerializedContainingVarLenDense(self):
+    aname = "a"
+    bname = "b"
+    cname = "c"
+    dname = "d"
+    original = [
+        example(features=features({
+            cname: int64_feature([2]),
+        })),
+        example(features=features({
+            aname: float_feature([1, 1]),
+            bname: bytes_feature([b"b0_str", b"b1_str"]),
+        })),
+        example(features=features({
+            aname: float_feature([-1, -1, 2, 2]),
+            bname: bytes_feature([b"b1"]),
+        })),
+        example(features=features({
+            aname: float_feature([]),
+            cname: int64_feature([3]),
+        })),
+    ]
+
+    expected_outputs = [
+        {
+            aname: np.empty(shape=(0, 2, 1), dtype=np.int64),
+            bname: np.empty(shape=(0, 1, 1, 1), dtype=bytes),
+            cname: np.array([2], dtype=np.int64),
+            dname: np.empty(shape=(0,), dtype=bytes)
+        },
+        {
+            aname:
+                np.array([[[1], [1]]], dtype=np.float32),
+            bname:
+                np.array(["b0_str", "b1_str"], dtype=bytes).reshape(2, 1, 1, 1),
+            cname:
+                np.empty(shape=(0,), dtype=np.int64),
+            dname:
+                np.empty(shape=(0,), dtype=bytes)
+        },
+        {
+            aname: np.array([[[-1], [-1]], [[2], [2]]], dtype=np.float32),
+            bname: np.array(["b1"], dtype=bytes).reshape(1, 1, 1, 1),
+            cname: np.empty(shape=(0,), dtype=np.int64),
+            dname: np.empty(shape=(0,), dtype=bytes)
+        },
+        {
+            aname: np.empty(shape=(0, 2, 1), dtype=np.int64),
+            bname: np.empty(shape=(0, 1, 1, 1), dtype=bytes),
+            cname: np.array([3], dtype=np.int64),
+            dname: np.empty(shape=(0,), dtype=bytes)
+        },
+    ]
+
+    for proto, expected_output in zip(original, expected_outputs):
+      self._test({
+          "serialized": ops.convert_to_tensor(proto.SerializeToString()),
+          "features": {
+              aname:
+                  parsing_ops.FixedLenSequenceFeature(
+                      (2, 1), dtype=dtypes.float32, allow_missing=True),
+              bname:
+                  parsing_ops.FixedLenSequenceFeature(
+                      (1, 1, 1), dtype=dtypes.string, allow_missing=True),
+              cname:
+                  parsing_ops.FixedLenSequenceFeature(
+                      shape=[], dtype=dtypes.int64, allow_missing=True),
+              dname:
+                  parsing_ops.FixedLenSequenceFeature(
+                      shape=[], dtype=dtypes.string, allow_missing=True),
+          }
+      }, expected_output)
+
+    # Test with padding values.
+    # NOTE(mrry): Since we parse a single example at a time, the fixed-length
+    # sequences will not be padded, and the padding value will be ignored.
+    for proto, expected_output in zip(original, expected_outputs):
+      self._test({
+          "serialized": ops.convert_to_tensor(proto.SerializeToString()),
+          "features": {
+              aname:
+                  parsing_ops.FixedLenSequenceFeature(
+                      (2, 1), dtype=dtypes.float32, allow_missing=True),
+              bname:
+                  parsing_ops.FixedLenSequenceFeature(
+                      (1, 1, 1), dtype=dtypes.string, allow_missing=True),
+              cname:
+                  parsing_ops.FixedLenSequenceFeature(
+                      shape=[], dtype=dtypes.int64, allow_missing=True),
+              dname:
+                  parsing_ops.FixedLenSequenceFeature(
+                      shape=[], dtype=dtypes.string, allow_missing=True),
+          }
+      }, expected_output)
+
+    # Change number of required values so the inputs are not a
+    # multiple of this size.
+    self._test(
+        {
+            "serialized":
+                ops.convert_to_tensor(original[2].SerializeToString()),
+            "features": {
+                aname:
+                    parsing_ops.FixedLenSequenceFeature(
+                        (2, 1), dtype=dtypes.float32, allow_missing=True),
+                bname:
+                    parsing_ops.FixedLenSequenceFeature(
+                        (2, 1, 1), dtype=dtypes.string, allow_missing=True),
+            }
+        },
+        # TODO(mrry): Consider matching the `tf.parse_example()` error message.
+        expected_err=(errors_impl.OpError, "Key: b."))
+
+    self._test(
+        {
+            "serialized": ops.convert_to_tensor(""),
+            "features": {
+                aname:
+                    parsing_ops.FixedLenSequenceFeature(
+                        (2, 1),
+                        dtype=dtypes.float32,
+                        allow_missing=True,
+                        default_value=[]),
+                bname:
+                    parsing_ops.FixedLenSequenceFeature(
+                        (2, 1, 1), dtype=dtypes.string, allow_missing=True),
+            }
+        },
+        expected_err=(ValueError,
+                      "Cannot reshape a tensor with 0 elements to shape"))
+
+    self._test(
+        {
+            "serialized": ops.convert_to_tensor(""),
+            "features": {
+                aname:
+                    parsing_ops.FixedLenFeature(
+                        (None, 2, 1), dtype=dtypes.float32),
+                bname:
+                    parsing_ops.FixedLenSequenceFeature(
+                        (2, 1, 1), dtype=dtypes.string, allow_missing=True),
+            }
+        },
+        expected_err=(ValueError,
+                      "First dimension of shape for feature a unknown. "
+                      "Consider using FixedLenSequenceFeature."))
+
+    self._test(
+        {
+            "serialized": ops.convert_to_tensor(""),
+            "features": {
+                cname:
+                    parsing_ops.FixedLenFeature(
+                        (1, None), dtype=dtypes.int64, default_value=[[1]]),
+            }
+        },
+        expected_err=(ValueError,
+                      "All dimensions of shape for feature c need to be known "
+                      r"but received \(1, None\)."))
+
+    self._test(
+        {
+            "serialized": ops.convert_to_tensor(""),
+            "features": {
+                aname:
+                    parsing_ops.FixedLenSequenceFeature(
+                        (2, 1), dtype=dtypes.float32, allow_missing=True),
+                bname:
+                    parsing_ops.FixedLenSequenceFeature(
+                        (1, 1, 1), dtype=dtypes.string, allow_missing=True),
+                cname:
+                    parsing_ops.FixedLenSequenceFeature(
+                        shape=[], dtype=dtypes.int64, allow_missing=False),
+                dname:
+                    parsing_ops.FixedLenSequenceFeature(
+                        shape=[], dtype=dtypes.string, allow_missing=True),
+            }
+        },
+        expected_err=(ValueError,
+                      "Unsupported: FixedLenSequenceFeature requires "
+                      "allow_missing to be True."))
+
+
+class ParseSingleExampleTest(test.TestCase):
+
+  def _test(self, kwargs, expected_values=None, expected_err=None):
+    with self.test_session() as sess:
+      if expected_err:
+        with self.assertRaisesWithPredicateMatch(expected_err[0],
+                                                 expected_err[1]):
+          out = parsing_ops.parse_single_example(**kwargs)
+          sess.run(flatten_values_tensors_or_sparse(out.values()))
+      else:
+        # Returns dict w/ Tensors and SparseTensors.
+        out = parsing_ops.parse_single_example(**kwargs)
+        # Check values.
+        tf_result = sess.run(flatten_values_tensors_or_sparse(out.values()))
+        _compare_output_to_expected(self, out, expected_values, tf_result)
+
+      # Check shapes.
+      for k, f in kwargs["features"].items():
+        if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None:
+          self.assertEqual(tuple(out[k].get_shape()),
+                           tensor_shape.as_shape(f.shape))
+        elif isinstance(f, parsing_ops.VarLenFeature):
+          self.assertEqual(
+              tuple(out[k].indices.get_shape().as_list()), (None, 1))
+          self.assertEqual(tuple(out[k].values.get_shape().as_list()), (None,))
+          self.assertEqual(
+              tuple(out[k].dense_shape.get_shape().as_list()), (1,))
+
+  def testSingleExampleWithSparseAndSparseFeatureAndDense(self):
+    original = example(features=features({
+        "c": float_feature([3, 4]),
+        "d": float_feature([0.0, 1.0]),
+        "val": bytes_feature([b"a", b"b"]),
+        "idx": int64_feature([0, 3]),
+        "st_a": float_feature([3.0, 4.0])
+    }))
+
+    serialized = original.SerializeToString()
+
+    expected_st_a = (
+        np.array(
+            [[0], [1]], dtype=np.int64),  # indices
+        np.array(
+            [3.0, 4.0], dtype=np.float32),  # values
+        np.array(
+            [2], dtype=np.int64))  # shape: max_values = 2
+
+    expected_sp = (  # indices, values, shape
+        np.array(
+            [[0], [3]], dtype=np.int64), np.array(
+                ["a", "b"], dtype="|S"), np.array(
+                    [13], dtype=np.int64))  # max_values = 13
+
+    a_default = [1, 2, 3]
+    b_default = np.random.rand(3, 3).astype(bytes)
+    expected_output = {
+        "st_a": expected_st_a,
+        "sp": expected_sp,
+        "a": [a_default],
+        "b": b_default,
+        "c": np.array([3, 4], dtype=np.float32),
+        "d": np.array([0.0, 1.0], dtype=np.float32),
+    }
+
+    self._test(
+        {
+            "serialized":
+                ops.convert_to_tensor(serialized),
+            "features": {
+                "st_a":
+                    parsing_ops.VarLenFeature(dtypes.float32),
+                "sp":
+                    parsing_ops.SparseFeature(
+                        ["idx"], "val", dtypes.string, [13]),
+                "a":
+                    parsing_ops.FixedLenFeature(
+                        (1, 3), dtypes.int64, default_value=a_default),
+                "b":
+                    parsing_ops.FixedLenFeature(
+                        (3, 3), dtypes.string, default_value=b_default),
+                # Feature "c" must be provided, since it has no default_value.
+                "c":
+                    parsing_ops.FixedLenFeature(2, dtypes.float32),
+                "d":
+                    parsing_ops.FixedLenSequenceFeature([],
+                                                        dtypes.float32,
+                                                        allow_missing=True)
+            }
+        },
+        expected_output)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/kernel_tests/parsing_ops_test.py b/tensorflow/python/kernel_tests/parsing_ops_test.py
index ff75b94322c799bcecbc4fb424403325fe493911..59b3ee2013a9922c420726d9accb2cf9355b2d42 100644
--- a/tensorflow/python/kernel_tests/parsing_ops_test.py
+++ b/tensorflow/python/kernel_tests/parsing_ops_test.py
@@ -426,7 +426,7 @@ class ParseExampleTest(test.TestCase):
             bname: bytes_feature([b"b0_str"]),
         })), example(features=features({
             aname: float_feature([-1, -1]),
-            bname: bytes_feature([b"b1"]),
+            bname: bytes_feature([b""]),
         }))
     ]
 
@@ -438,7 +438,7 @@ class ParseExampleTest(test.TestCase):
                 [[1, 1], [-1, -1]], dtype=np.float32).reshape(2, 1, 2, 1),
         bname:
             np.array(
-                ["b0_str", "b1"], dtype=bytes).reshape(2, 1, 1, 1, 1),
+                ["b0_str", ""], dtype=bytes).reshape(2, 1, 1, 1, 1),
     }
 
     # No defaults, values required
diff --git a/tensorflow/python/kernel_tests/partitioned_variables_test.py b/tensorflow/python/kernel_tests/partitioned_variables_test.py
index d40517510046959e353cad4df0c6ddbed0db90aa..56a07cb012f08dec750c5ee18cc73b3b127ef5dd 100644
--- a/tensorflow/python/kernel_tests/partitioned_variables_test.py
+++ b/tensorflow/python/kernel_tests/partitioned_variables_test.py
@@ -46,6 +46,15 @@ class PartitionerCreatorsTest(test.TestCase):
         self.assertEqual(len(v0_list), 5)
         self.assertAllEqual(v0_part, (5, 1))
 
+  def testFixedSizePartitionerInt64(self):
+    with self.test_session():
+      partitioner = partitioned_variables.fixed_size_partitioner(4, axis=0)
+      with variable_scope.variable_scope("root", partitioner=partitioner):
+        v0 = variable_scope.get_variable(
+            "v0", dtype=dtypes.int64, shape=[20])
+        v0_list = v0._get_variable_list()
+        self.assertEqual(len(v0_list), 4)
+
   def testResourceFixedSizePartitioner(self):
     with self.test_session():
       partitioner = partitioned_variables.fixed_size_partitioner(5, axis=0)
diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py
index 6be8997cabdb4cba87f90378c405a63aa6f78ea3..5c0ea8ec8edbd1a1f523630f61afbe28adf77a19 100644
--- a/tensorflow/python/kernel_tests/pooling_ops_test.py
+++ b/tensorflow/python/kernel_tests/pooling_ops_test.py
@@ -361,6 +361,16 @@ class PoolingTest(test.TestCase):
         expected=expected_output,
         use_gpu=use_gpu)
 
+  def _testAvgPoolEmptyInput(self, use_gpu):
+    self._VerifyValues(
+        nn_ops.avg_pool,
+        input_sizes=[0, 8, 8, 8],
+        ksize=[1, 3, 3, 1],
+        strides=[1, 2, 2, 1],
+        padding="SAME",
+        expected=[],
+        use_gpu=use_gpu)
+
   def testAvgPooling(self):
     for use_gpu in True, False:
       self._testAvgPoolValidPadding(use_gpu)
@@ -371,6 +381,7 @@ class PoolingTest(test.TestCase):
       self._testAvgPoolSamePadding4(use_gpu)
       self._testAvgPoolSamePaddingPacket4(use_gpu)
       self._testAvgPoolSamePaddingPacket8(use_gpu)
+      self._testAvgPoolEmptyInput(use_gpu)
 
   def _testMaxPoolValidPadding(self, use_gpu):
     expected_output = [13.0, 14.0, 15.0]
@@ -543,6 +554,16 @@ class PoolingTest(test.TestCase):
           use_gpu=use_gpu,
           v2=v2)
 
+  def _testMaxPoolEmptyInput(self, use_gpu):
+    self._VerifyValues(
+        gen_nn_ops._max_pool_v2,
+        input_sizes=[0, 8, 8, 8],
+        ksize=[1, 3, 3, 1],
+        strides=[1, 2, 2, 1],
+        padding="SAME",
+        expected=[],
+        use_gpu=use_gpu)
+
   def testMaxPooling(self):
     for use_gpu in True, False:
       self._testMaxPoolValidPadding(use_gpu)
@@ -551,6 +572,7 @@ class PoolingTest(test.TestCase):
       self._testMaxPoolValidPaddingUnevenStride(use_gpu)
       self._testMaxPoolSamePaddingPacket4(use_gpu)
       self._testMaxPoolSamePaddingPacket8(use_gpu)
+      self._testMaxPoolEmptyInput(use_gpu)
 
   # Tests for DepthwiseMaxPooling on CPU only.
   def testDepthwiseMaxPool1x1DepthWindow1(self):
diff --git a/tensorflow/python/kernel_tests/py_func_test.py b/tensorflow/python/kernel_tests/py_func_test.py
index 7ed99c1be9b62a145b9584fd6412f1074f501ae8..92fb68820e04c3db1385296d91d956134b8ff2d4 100644
--- a/tensorflow/python/kernel_tests/py_func_test.py
+++ b/tensorflow/python/kernel_tests/py_func_test.py
@@ -23,82 +23,93 @@ from six.moves import queue
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.python.client import session as session_lib
+from tensorflow.python.eager import context
+from tensorflow.python.eager import function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import script_ops
 from tensorflow.python.platform import test
 
 
-class PyOpTest(test.TestCase):
+def np_func(x, y):
+  return np.sinh(x) + np.cosh(y)
 
-  def testBasic(self):
 
-    def my_func(x, y):
-      return np.sinh(x) + np.cosh(y)
+def matmul(x, y):
+  return math_ops.matmul(x, y)
 
-    # single type
+
+class PyFuncTest(test.TestCase):
+  """Encapsulates tests for py_func and eager_py_func."""
+
+  # ----- Tests for py_func -----
+  def testSingleType(self):
     with self.test_session():
       x = constant_op.constant(1.0, dtypes.float32)
       y = constant_op.constant(2.0, dtypes.float32)
-      z = script_ops.py_func(my_func, [x, y], dtypes.float32)
-      self.assertEqual(z.eval(), my_func(1.0, 2.0).astype(np.float32))
+      z = self.evaluate(script_ops.py_func(np_func, [x, y], dtypes.float32))
+      self.assertEqual(z, np_func(1.0, 2.0).astype(np.float32))
 
-    # scalar
+  def testScalar(self):
     with self.test_session():
       x = constant_op.constant(1.0, dtypes.float32)
       y = constant_op.constant(2.0, dtypes.float32)
-      z = script_ops.py_func(my_func, [x, y], [dtypes.float32])
-      self.assertEqual(z[0].eval(), my_func(1.0, 2.0).astype(np.float32))
+      z = self.evaluate(
+          script_ops.eager_py_func(np_func, [x, y], [dtypes.float32]))
+      self.assertEqual(z[0], np_func(1.0, 2.0).astype(np.float32))
 
-    # array
+  def testArray(self):
     with self.test_session():
       x = constant_op.constant([1.0, 2.0], dtypes.float64)
       y = constant_op.constant([2.0, 3.0], dtypes.float64)
-      z = script_ops.py_func(my_func, [x, y], [dtypes.float64])
-      self.assertAllEqual(z[0].eval(),
-                          my_func([1.0, 2.0], [2.0, 3.0]).astype(np.float64))
+      z = self.evaluate(script_ops.py_func(np_func, [x, y], [dtypes.float64]))
+      self.assertAllEqual(z[0],
+                          np_func([1.0, 2.0], [2.0, 3.0]).astype(np.float64))
 
-    # a bit exotic type (complex64)
+  def testComplexType(self):
     with self.test_session():
       x = constant_op.constant(1 + 2j, dtypes.complex64)
       y = constant_op.constant(3 + 4j, dtypes.complex64)
-      z, = script_ops.py_func(my_func, [x, y], [dtypes.complex64])
-      self.assertAllClose(z.eval(), my_func(1 + 2j, 3 + 4j))
+      z = self.evaluate(script_ops.py_func(np_func, [x, y], dtypes.complex64))
+      self.assertAllClose(z, np_func(1 + 2j, 3 + 4j))
 
-    # a bit excotic function (rfft)
+  def testRFFT(self):
     with self.test_session():
       x = constant_op.constant([1., 2., 3., 4.], dtypes.float32)
 
       def rfft(x):
         return np.fft.rfft(x).astype(np.complex64)
 
-      y, = script_ops.py_func(rfft, [x], [dtypes.complex64])
-      self.assertAllClose(y.eval(), np.fft.rfft([1., 2., 3., 4.]))
+      y = self.evaluate(script_ops.py_func(rfft, [x], dtypes.complex64))
+      self.assertAllClose(y, np.fft.rfft([1., 2., 3., 4.]))
 
-    # returns a python literal.
+  def testPythonLiteral(self):
     with self.test_session():
 
       def literal(x):
-        return 1.0 if x == 0.0 else 0.0
+        return 1.0 if float(x) == 0.0 else 0.0
 
       x = constant_op.constant(0.0, dtypes.float64)
-      y, = script_ops.py_func(literal, [x], [dtypes.float64])
-      self.assertAllClose(y.eval(), 1.0)
+      y = self.evaluate(script_ops.py_func(literal, [x], dtypes.float64))
+      self.assertAllClose(y, 1.0)
 
-    # returns a list
+  def testList(self):
     with self.test_session():
 
       def list_func(x):
         return [x, x + 1]
 
       x = constant_op.constant(0.0, dtypes.float64)
-      y, z = script_ops.py_func(list_func, [x], [dtypes.float64] * 2)
-      self.assertAllClose(y.eval(), 0.0)
-      self.assertAllClose(z.eval(), 1.0)
+      y = self.evaluate(
+          script_ops.py_func(list_func, [x], [dtypes.float64] * 2))
+      self.assertAllClose(y, [0.0, 1.0])
 
+  def testTuple(self):
     # returns a tuple
     with self.test_session():
 
@@ -106,17 +117,17 @@ class PyOpTest(test.TestCase):
         return x, x + 1
 
       x = constant_op.constant(0.0, dtypes.float64)
-      y, z = script_ops.py_func(tuple_func, [x], [dtypes.float64] * 2)
-      self.assertAllClose(y.eval(), 0.0)
-      self.assertAllClose(z.eval(), 1.0)
+      y = self.evaluate(
+          script_ops.py_func(tuple_func, [x], [dtypes.float64] * 2))
+      self.assertAllClose(y, [0.0, 1.0])
 
     # returns a tuple, Tout and inp a tuple
     with self.test_session():
       x = constant_op.constant(0.0, dtypes.float64)
-      y, z = script_ops.py_func(tuple_func, (x,), (dtypes.float64,
-                                                   dtypes.float64))
-      self.assertAllClose(y.eval(), 0.0)
-      self.assertAllClose(z.eval(), 1.0)
+      y = self.evaluate(
+          script_ops.py_func(tuple_func, (x,),
+                             (dtypes.float64, dtypes.float64)))
+      self.assertAllClose(y, [0.0, 1.0])
 
   def testStrings(self):
 
@@ -128,10 +139,12 @@ class PyOpTest(test.TestCase):
 
     with self.test_session():
       x = constant_op.constant([b"hello", b"hi"], dtypes.string)
-      y, = script_ops.py_func(read_fixed_length_numpy_strings, [],
-                              [dtypes.string])
-      z, = script_ops.py_func(read_and_return_strings, [x, y], [dtypes.string])
-      self.assertListEqual(list(z.eval()), [b"hello there", b"hi there"])
+      y = self.evaluate(
+          script_ops.py_func(read_fixed_length_numpy_strings, [],
+                             dtypes.string))
+      z = self.evaluate(
+          script_ops.py_func(read_and_return_strings, [x, y], dtypes.string))
+      self.assertAllEqual(z, [b"hello there", b"hi there"])
 
   def testStringsAreConvertedToBytes(self):
 
@@ -143,10 +156,12 @@ class PyOpTest(test.TestCase):
 
     with self.test_session():
       x = constant_op.constant(["hello", "hi"], dtypes.string)
-      y, = script_ops.py_func(read_fixed_length_numpy_strings, [],
-                              [dtypes.string])
-      z, = script_ops.py_func(read_and_return_strings, [x, y], [dtypes.string])
-      self.assertListEqual(list(z.eval()), [b"hello there", b"hi there"])
+      y = self.evaluate(
+          script_ops.py_func(read_fixed_length_numpy_strings, [],
+                             dtypes.string))
+      z = self.evaluate(
+          script_ops.py_func(read_and_return_strings, [x, y], dtypes.string))
+      self.assertAllEqual(z, [b"hello there", b"hi there"])
 
   def testObjectArraysAreConvertedToBytes(self):
 
@@ -186,16 +201,8 @@ class PyOpTest(test.TestCase):
 
   def testNoInput(self):
     with self.test_session():
-      x, = script_ops.py_func(lambda: 42.0, [], [dtypes.float64])
-      self.assertAllClose(x.eval(), 42.0)
-
-  def testCleanup(self):
-    for _ in xrange(1000):
-      g = ops.Graph()
-      with g.as_default():
-        c = constant_op.constant([1.], dtypes.float32)
-        _ = script_ops.py_func(lambda x: x + 1, [c], [dtypes.float32])
-    self.assertTrue(script_ops._py_funcs.size() < 100)
+      x = self.evaluate(script_ops.py_func(lambda: 42.0, [], dtypes.float64))
+      self.assertAllClose(x, 42.0)
 
   def testAlias(self):
     with self.test_session():
@@ -242,8 +249,8 @@ class PyOpTest(test.TestCase):
       # Create a numpy array aliasing a tensor and a tensor aliasing this array
       z, = script_ops.py_func(ident, [p], [dtypes.float32])
       z += 0.0  # Makes sure we release the tensor aliasing the numpy array x[0]
-                # above instead of using its memory as the return value of
-                # session.run
+      # above instead of using its memory as the return value of
+      # session.run
       self.assertEqual(0.0, z.eval(feed_dict={p: [0.0]}))
 
   def testStateful(self):
@@ -319,10 +326,10 @@ class PyOpTest(test.TestCase):
       def value(self):
         return self._value
 
-    with self.test_session() as sess:
+    with self.test_session():
       s = State()
       op = s.increment(constant_op.constant(2, dtypes.int64))
-      ret = sess.run(op)
+      ret = self.evaluate(op)
       self.assertIsNone(ret)
       self.assertAllEqual([3], s.value)
 
@@ -336,15 +343,24 @@ class PyOpTest(test.TestCase):
     with self.test_session() as sess:
       self.assertEqual(sess.run(f), [])
 
-  def _testExceptionHandling(self, py_exp, tf_exp):
+  def _testExceptionHandling(self, py_exp, tf_exp, eager=False):
 
     def raise_exception():
       raise py_exp("blah")  # pylint: disable=not-callable
 
-    f = script_ops.py_func(raise_exception, [], [])
-    with self.test_session() as sess:
+    if eager:
+      if context.in_eager_mode():
+        with self.assertRaisesRegexp(tf_exp, "blah"):
+          f = script_ops.eager_py_func(raise_exception, [], [])
+        return
+      else:
+        f = script_ops.eager_py_func(raise_exception, [], [])
+    else:
+      f = script_ops.py_func(raise_exception, [], [])
+
+    with self.test_session():
       with self.assertRaisesRegexp(tf_exp, "blah"):
-        sess.run(f)
+        self.evaluate(f)
 
   def testExceptionHandling(self):
     self._testExceptionHandling(ValueError, errors.InvalidArgumentError)
@@ -358,6 +374,89 @@ class PyOpTest(test.TestCase):
 
     self._testExceptionHandling(WeirdError, errors.UnknownError)
 
+  # ----- Tests shared by py_func and eager_py_func -----
+  def testCleanup(self):
+    for _ in xrange(1000):
+      g = ops.Graph()
+      with g.as_default():
+        c = constant_op.constant([1.], dtypes.float32)
+        _ = script_ops.py_func(lambda x: x + 1, [c], [dtypes.float32])
+        _ = script_ops.eager_py_func(lambda x: x + 1, [c], [dtypes.float32])
+    self.assertTrue(script_ops._py_funcs.size() < 100)
+
+  # ----- Tests for eager_py_func -----
+  @test_util.run_in_graph_and_eager_modes()
+  def testEagerSingleOutputInt32(self):
+    a = array_ops.ones((3, 3), dtype=dtypes.int32)
+    x = array_ops.ones((3, 1), dtype=dtypes.int32)
+    output = script_ops.eager_py_func(matmul, inp=[a, x], Tout=dtypes.int32)
+    with self.test_session():
+      ret = self.evaluate(output)
+      self.assertAllEqual(ret, [[3], [3], [3]])
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testEagerSingleOutputFloat32(self):
+    a = array_ops.ones((3, 3), dtype=dtypes.float32)
+    x = array_ops.ones((3, 1), dtype=dtypes.float32)
+    output = script_ops.eager_py_func(matmul, inp=[a, x], Tout=dtypes.float32)
+    with self.test_session():
+      ret = self.evaluate(output)
+      self.assertAllClose(ret, [[3.0], [3.0], [3.0]])
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testEagerArrayOutput(self):
+    a = array_ops.ones((3, 3), dtype=dtypes.int32)
+    x = array_ops.ones((3, 1), dtype=dtypes.int32)
+    output = script_ops.eager_py_func(
+        lambda a, x: [matmul(a, x)], inp=[a, x], Tout=[dtypes.int32])
+
+    with self.test_session():
+      ret = self.evaluate(output)
+      self.assertAllEqual(ret, [[[3], [3], [3]]])
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testEagerReturnNone(self):
+
+    def no_return_value():
+      return
+
+    output = script_ops.eager_py_func(no_return_value, inp=[], Tout=[])
+    ret = self.evaluate(output)
+    if context.in_eager_mode():
+      self.assertEquals(len(ret), 0)
+    else:
+      self.assertIsNone(ret)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testEagerPyFuncInDefun(self):
+
+    def wrapper():
+      a = array_ops.ones((3, 3), dtype=dtypes.int32)
+      x = array_ops.ones((3, 1), dtype=dtypes.int32)
+      return script_ops.eager_py_func(matmul, inp=[a, x], Tout=dtypes.int32)
+
+    wrapped = function.defun(wrapper)
+    ret = self.evaluate(wrapped())
+    self.assertAllEqual(ret, [[3], [3], [3]])
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testEagerExceptionHandling(self):
+    self._testExceptionHandling(
+        ValueError, errors.InvalidArgumentError, eager=True)
+    self._testExceptionHandling(
+        TypeError, errors.InvalidArgumentError, eager=True)
+    self._testExceptionHandling(
+        StopIteration, errors.OutOfRangeError, eager=True)
+    self._testExceptionHandling(
+        MemoryError, errors.ResourceExhaustedError, eager=True)
+    self._testExceptionHandling(
+        NotImplementedError, errors.UnimplementedError, eager=True)
+
+    class WeirdError(Exception):
+      pass
+
+    self._testExceptionHandling(WeirdError, errors.UnknownError, eager=True)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/random/multinomial_op_test.py b/tensorflow/python/kernel_tests/random/multinomial_op_test.py
index ca48ba6cadee431c3af41b72646d4f1b3e60ec66..a9dc7b7de000024f23b88406bf0c1c2f32ac4fac 100644
--- a/tensorflow/python/kernel_tests/random/multinomial_op_test.py
+++ b/tensorflow/python/kernel_tests/random/multinomial_op_test.py
@@ -57,12 +57,14 @@ class MultinomialTest(test.TestCase):
   @test_util.run_in_graph_and_eager_modes()
   def testSmallEntropy(self):
     random_seed.set_random_seed(1618)
-    with test_util.device(use_gpu=True):
-      # A logit value of -10 corresponds to a probability of ~5e-5.
-      logits = constant_op.constant([[-10., 10., -10.], [-10., -10., 10.]])
-      num_samples = 1000
-      samples = self.evaluate(random_ops.multinomial(logits, num_samples))
-      self.assertAllEqual([[1] * num_samples, [2] * num_samples], samples)
+    for output_dtype in [np.int32, np.int64]:
+      with test_util.device(use_gpu=True):
+        # A logit value of -10 corresponds to a probability of ~5e-5.
+        logits = constant_op.constant([[-10., 10., -10.], [-10., -10., 10.]])
+        num_samples = 1000
+        samples = self.evaluate(random_ops.multinomial(
+            logits, num_samples, output_dtype=output_dtype))
+        self.assertAllEqual([[1] * num_samples, [2] * num_samples], samples)
 
   def testOneOpMultipleStepsIndependent(self):
     with self.test_session(use_gpu=True) as sess:
diff --git a/tensorflow/python/kernel_tests/random/random_ops_test.py b/tensorflow/python/kernel_tests/random/random_ops_test.py
index 56aaa53b981497d91ca01f390df26691f142556f..5a2903a4234202c828168b6538baf320b961c776 100644
--- a/tensorflow/python/kernel_tests/random/random_ops_test.py
+++ b/tensorflow/python/kernel_tests/random/random_ops_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
@@ -174,6 +175,17 @@ class TruncatedNormalTest(test.TestCase):
       diff = rnd2 - rnd1
       self.assertTrue(np.linalg.norm(diff.eval()) > 0.1)
 
+  def testEagerSeed(self):
+    with context.eager_mode():
+      # Ensure a context has been created
+      random_ops.random_normal([])
+      # Set the same seed twice and check that the values match
+      context.set_global_seed(42)
+      rnd1 = random_ops.random_normal([])
+      context.set_global_seed(42)
+      rnd2 = random_ops.random_normal([])
+      self.assertAllEqual(rnd1, rnd2)
+
 
 class RandomUniformTest(test.TestCase):
 
diff --git a/tensorflow/python/kernel_tests/record_input_test.py b/tensorflow/python/kernel_tests/record_input_test.py
index 1ec48ac361b81e66fd77e8a4506bebf910ea0e8a..068860d5d46d1e3dfac87aa6b1f986d78d9c8316 100644
--- a/tensorflow/python/kernel_tests/record_input_test.py
+++ b/tensorflow/python/kernel_tests/record_input_test.py
@@ -26,13 +26,17 @@ from tensorflow.python.ops import data_flow_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
-
 class RecordInputOpTest(test.TestCase):
 
-  def generateTestData(self, prefix, n, m):
+  def generateTestData(self,
+                       prefix,
+                       n,
+                       m,
+                       compression_type=tf_record.TFRecordCompressionType.NONE):
+    options = tf_record.TFRecordOptions(compression_type)
     for i in range(n):
       f = os.path.join(self.get_temp_dir(), prefix + "." + str(i))
-      w = tf_record.TFRecordWriter(f)
+      w = tf_record.TFRecordWriter(f, options=options)
 
       for j in range(m):
         w.write("{0:0{width}}".format(i * m + j, width=10).encode("utf-8"))
@@ -52,6 +56,44 @@ class RecordInputOpTest(test.TestCase):
 
       self.assertEqual(sess.run(yield_op), b"0000000000")
 
+  def testRecordInputSimpleGzip(self):
+    with self.test_session() as sess:
+      self.generateTestData(
+          "basic",
+          1,
+          1,
+          compression_type=tf_record.TFRecordCompressionType.GZIP)
+
+      yield_op = data_flow_ops.RecordInput(
+          file_pattern=os.path.join(self.get_temp_dir(), "basic.*"),
+          parallelism=1,
+          buffer_size=1,
+          batch_size=1,
+          name="record_input",
+          compression_type=tf_record.TFRecordCompressionType.GZIP).get_yield_op(
+          )
+
+      self.assertEqual(sess.run(yield_op), b"0000000000")
+
+  def testRecordInputSimpleZlib(self):
+    with self.test_session() as sess:
+      self.generateTestData(
+          "basic",
+          1,
+          1,
+          compression_type=tf_record.TFRecordCompressionType.ZLIB)
+
+      yield_op = data_flow_ops.RecordInput(
+          file_pattern=os.path.join(self.get_temp_dir(), "basic.*"),
+          parallelism=1,
+          buffer_size=1,
+          batch_size=1,
+          name="record_input",
+          compression_type=tf_record.TFRecordCompressionType.ZLIB).get_yield_op(
+          )
+
+      self.assertEqual(sess.run(yield_op), b"0000000000")
+
   def testRecordInputEpochs(self):
     files = 100
     records_per_file = 100
diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py
index 8cd1f52d80039deccfe4623b8bae9bb1482b8392..dd11ba700d518ab230c1160d17f4cc0833a79198 100644
--- a/tensorflow/python/kernel_tests/relu_op_test.py
+++ b/tensorflow/python/kernel_tests/relu_op_test.py
@@ -441,6 +441,24 @@ class CreluTest(test.TestCase):
             np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]).astype(t),
             use_gpu=True)
 
+  def testNumbersWithAxis0(self):
+    with self.test_session():
+      crelu = nn_ops.crelu(
+          np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]), axis=0)
+      tf_relu = crelu.eval()
+      np_crelu = np.array([[0, 7, 0, 3, 0], [1, 0, 5, 0, 9], [9, 0, 5, 0, 1],
+                           [0, 3, 0, 7, 0]])
+      self.assertAllEqual(np_crelu, tf_relu)
+
+  def testNumbersWithAxis1(self):
+    with self.test_session():
+      crelu = nn_ops.crelu(
+          np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]), axis=1)
+      tf_relu = crelu.eval()
+      np_crelu = np.array([[0, 7, 0, 3, 0, 9, 0, 5, 0, 1],
+                           [1, 0, 5, 0, 9, 0, 3, 0, 7, 0]])
+      self.assertAllEqual(np_crelu, tf_relu)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
index 4c7a9cb0f9542afe8fc1608a05864b739d741c97..7b131a5b8ca46cc205ec29d5a48cd704b1c67b04 100644
--- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py
+++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
@@ -302,7 +302,7 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
     self.evaluate(variables.global_variables_initializer())
     self.assertEqual(3.0, self.evaluate(v.value()))
     self.evaluate(resource_variable_ops.destroy_resource_op(v.handle))
-    with self.assertRaises(errors.NotFoundError):
+    with self.assertRaises(errors.FailedPreconditionError):
       self.evaluate(v.value())
     # Handle to a resource not actually created.
     handle = resource_variable_ops.var_handle_op(dtype=dtypes.int32, shape=[])
diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
index a79d66e9889b4dc55a66c505bac9b29a453356be..9f5794951524b2689daa5fc4eefb19703262b8f0 100644
--- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
+++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
@@ -27,6 +27,7 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradients_impl
+from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
@@ -157,6 +158,20 @@ class StatefulScatterNdTest(test.TestCase):
       result = sess.run(scatter)
       self.assertAllClose(result, expected)
 
+  def testSimpleResource(self):
+    indices = constant_op.constant([[4], [3], [1], [7]], dtype=dtypes.int32)
+    updates = constant_op.constant([9, 10, 11, 12], dtype=dtypes.float32)
+    ref = resource_variable_ops.ResourceVariable(
+        [0, 0, 0, 0, 0, 0, 0, 0], dtype=dtypes.float32)
+    expected = np.array([0, 11, 0, 10, 9, 0, 0, 12])
+    scatter = state_ops.scatter_nd_update(ref, indices, updates)
+    init = variables.global_variables_initializer()
+
+    with self.test_session(use_gpu=True) as sess:
+      sess.run(init)
+      sess.run(scatter)
+      self.assertAllClose(ref.eval(), expected)
+
   def testSimple2(self):
     indices = constant_op.constant([[1, 0], [1, 1]], dtype=dtypes.int32)
     updates = constant_op.constant([11., 12.], dtype=dtypes.float32)
@@ -335,7 +350,7 @@ class StatefulScatterNdTest(test.TestCase):
         indices = np.array([2, 0, 5])
         op(ref, indices, updates).eval()
 
-        # Indicies out of range should not fail.
+        # Indices out of range should not fail.
         indices = np.array([-1, 0, 5])
         op(ref, indices, updates).eval()
         indices = np.array([2, 0, 6])
@@ -487,6 +502,43 @@ class ScatterNdTest(test.TestCase):
       if self.non_aliasing_add_test:
         self.assertAllEqual(expected_input_grad, input_grad.eval())
 
+  def testGradientsRank7SliceUpdate(self):
+    indices = constant_op.constant(
+        [[[
+            [[[[0, 0, 0, 0, 0, 1], [0, 0, 1, 0, 0, 0]]]],
+            [[[[0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 1]]]]
+        ]]], dtype=dtypes.int32)
+    updates = constant_op.constant(
+        [[[
+            [[[[5, 6], [2, 4]]]],
+            [[[[1, 3], [6, 8]]]]
+        ]]], dtype=dtypes.float64)
+    shape = constant_op.constant([1, 1, 2, 1, 1, 2, 2], dtype=dtypes.int32)
+    input_ = array_ops.zeros(shape, dtype=dtypes.float64)
+    outputs = self.scatter_nd(indices, updates, shape, input_)
+
+    grad_vals = constant_op.constant(
+        [[[
+            [[[[1, 2], [3, 4]]]],
+            [[[[5, 6], [7, 8]]]]
+        ]]], dtype=dtypes.float64)
+    updates_grad, input_grad = gradients_impl.gradients(
+        [outputs], [updates, input_], [grad_vals])
+    expected_updates_grad = np.array(
+        [[[
+            [[[[3, 4], [5, 6]]]],
+            [[[[1, 2], [7, 8]]]]
+        ]]], dtype=np.float64)
+    expected_input_grad = np.array(
+        [[[
+            [[[[1, 2], [3, 4]]]],
+            [[[[5, 6], [7, 8]]]]
+        ]]], dtype=np.float64)
+    with self.test_session():
+      self.assertAllEqual(expected_updates_grad, updates_grad.eval())
+      if self.non_aliasing_add_test:
+        self.assertAllEqual(expected_input_grad, input_grad.eval())
+
   def testScatterNdRepatedIndicesAdd(self):
     indices = array_ops.zeros([100000, 1], dtypes.int32)
     values = np.random.randn(100000)
diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
index 99f9f09690f4a38f68a230efcd0dd2bf223376be..5a54f448d092093db668570d055801f9f9cd0f9f 100644
--- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
+++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
@@ -46,13 +46,13 @@ class SegmentReductionHelper(test.TestCase):
     return constant_op.constant(
         np_values, shape=input_shape, dtype=dtype), np_values
 
-  def _segmentReduce(self, indices, x, op1, op2=None, num_out_rows=None):
+  def _segmentReduce(self, indices, x, op1, op2=None, num_segments=None):
     if not x.size:
       return np.array([])
     indices = np.asarray(indices)
-    if num_out_rows is None:
-      num_out_rows = indices[-1] + 1
-    output = [None] * num_out_rows
+    if num_segments is None:
+      num_segments = indices[-1] + 1
+    output = [None] * num_segments
     slice_shape = x.shape[indices.ndim:]
     x_flat = x.reshape((indices.size,) + slice_shape)
     for i, index in enumerate(indices.ravel()):
@@ -259,13 +259,34 @@ class UnsortedSegmentSumTest(SegmentReductionHelper):
         with self.test_session(use_gpu=True):
           tf_x, np_x = self._input(shape, dtype=dtype)
           np_ans = self._segmentReduce(
-              indices, np_x, np.add, op2=None, num_out_rows=num_segments)
+              indices, np_x, np.add, op2=None, num_segments=num_segments)
           s = math_ops.unsorted_segment_sum(
               data=tf_x, segment_ids=indices, num_segments=num_segments)
           tf_ans = s.eval()
         self.assertAllClose(np_ans, tf_ans)
         self.assertShapeEqual(np_ans, s)
 
+  def testNumSegmentsTypes(self):
+    dtypes = [dtypes_lib.int32, dtypes_lib.int64]
+    indices_flat = np.array([0, 4, 0, 8, 3, 8, 4, 7, 7, 3])
+    num_segments = 12
+    for indices in indices_flat, indices_flat.reshape(5, 2):
+      shape = indices.shape + (2,)
+      for dtype in dtypes:
+        with self.test_session(use_gpu=True):
+          tf_x, np_x = self._input(shape)
+          num_segments_constant = constant_op.constant(
+              num_segments, dtype=dtype)
+          np_ans = self._segmentReduce(
+              indices, np_x, np.add, op2=None, num_segments=num_segments)
+          s = math_ops.unsorted_segment_sum(
+              data=tf_x,
+              segment_ids=indices,
+              num_segments=num_segments_constant)
+          tf_ans = s.eval()
+        self.assertAllClose(np_ans, tf_ans)
+        self.assertShapeEqual(np_ans, s)
+
   def testGradientSegmentSum(self):
     num_cols = 2
     indices_flat = np.array([0, 4, 0, 8, 3, 8, 4, 7, 7, 3])
@@ -376,7 +397,7 @@ class UnsortedSegmentSumTest(SegmentReductionHelper):
         with self.test_session(use_gpu=True):
           tf_x, np_x = self._input(shape, dtype=dtype)
           np_ans = self._segmentReduce(
-              indices, np_x, np.add, op2=None, num_out_rows=num_segments)
+              indices, np_x, np.add, op2=None, num_segments=num_segments)
           # Replace np_ans[8] with 0 for the value
           np_ans[8:] = 0
           # Replace 8 with -1 in indices
@@ -396,8 +417,15 @@ class SparseSegmentReductionHelper(SegmentReductionHelper):
     return (constant_op.constant(
         indices, dtype=dtypes_lib.int32), indices, a, b)
 
-  def _sparseSegmentReduce(self, x, indices, segment_indices, op1, op2=None):
-    return self._segmentReduce(segment_indices, x[indices], op1, op2)
+  def _sparseSegmentReduce(self,
+                           x,
+                           indices,
+                           segment_indices,
+                           op1,
+                           op2=None,
+                           num_segments=None):
+    return self._segmentReduce(
+        segment_indices, x[indices], op1, op2, num_segments=num_segments)
 
 
 class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
@@ -454,6 +482,31 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
         tf_ans = s.eval()
         self.assertAllClose(np_ans, tf_ans)
 
+  def testWithNumSegments(self):
+    tf_x, np_x = self._input([10, 4], dtype=dtypes_lib.float32)
+    ops_list = [(np.add, None, math_ops.sparse_segment_sum_with_num_segments),
+                (self._mean_cum_op, self._mean_reduce_op,
+                 math_ops.sparse_segment_mean_with_num_segments)]
+    segment_indices = [0, 2, 2, 2]
+    tf_indices = [8, 3, 0, 9]
+    num_segments = 5
+    with self.test_session(use_gpu=False):
+      for np_op1, np_op2, tf_op in ops_list:
+        np_ans = self._sparseSegmentReduce(
+            np_x,
+            tf_indices,
+            segment_indices,
+            np_op1,
+            np_op2,
+            num_segments=num_segments)
+        s = tf_op(
+            data=tf_x,
+            indices=tf_indices,
+            segment_ids=segment_indices,
+            num_segments=num_segments)
+        tf_ans = s.eval()
+        self.assertAllClose(np_ans, tf_ans)
+
   def testSegmentIdsGreaterThanZero(self):
     tf_x, np_x = self._input([10, 4], dtype=dtypes_lib.float32)
     ops_list = [(np.add, None, math_ops.sparse_segment_sum), (
@@ -562,6 +615,63 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
         with self.assertRaisesOpError("segment ids must be >= 0"):
           s.eval()
 
+  def testSegmentWithNumSegmentsValid(self):
+    # Baseline for the test*WithNumSegmentsInvalid* methods below.
+    tf_x, _ = self._input([10, 4], dtype=dtypes_lib.float32)
+    ops_list = [
+        math_ops.sparse_segment_sum_with_num_segments,
+        math_ops.sparse_segment_mean_with_num_segments,
+    ]
+    num_segments = 5
+    segment_indices = [0, 1, 3, 3]
+    tf_indices = [8, 3, 0, 9]
+    with self.test_session(use_gpu=False):
+      for tf_op in ops_list:
+        s = tf_op(
+            data=tf_x,
+            indices=tf_indices,
+            segment_ids=segment_indices,
+            num_segments=num_segments)
+        s.eval()
+
+  def testSegmentWithNumSegmentsInvalid1(self):
+    tf_x, _ = self._input([10, 4], dtype=dtypes_lib.float32)
+    ops_list = [
+        math_ops.sparse_segment_sum_with_num_segments,
+        math_ops.sparse_segment_mean_with_num_segments,
+    ]
+    num_segments = 5
+    segment_indices = [0, 1, 3, 5]
+    tf_indices = [8, 3, 0, 9]
+    with self.test_session(use_gpu=False):
+      for tf_op in ops_list:
+        s = tf_op(
+            data=tf_x,
+            indices=tf_indices,
+            segment_ids=segment_indices,
+            num_segments=num_segments)
+        with self.assertRaisesOpError("segment ids must be < num_segments"):
+          s.eval()
+
+  def testSegmentWithNumSegmentsInvalid2(self):
+    tf_x, _ = self._input([10, 4], dtype=dtypes_lib.float32)
+    ops_list = [
+        math_ops.sparse_segment_sum_with_num_segments,
+        math_ops.sparse_segment_mean_with_num_segments,
+    ]
+    num_segments = -2
+    segment_indices = [0, 1, 3, 3]
+    tf_indices = [8, 3, 0, 9]
+    with self.test_session(use_gpu=False):
+      for tf_op in ops_list:
+        with self.assertRaisesRegexp(
+            ValueError, "Cannot specify a negative value for num_segments"):
+          tf_op(
+              data=tf_x,
+              indices=tf_indices,
+              segment_ids=segment_indices,
+              num_segments=num_segments)
+
   def testGradient(self):
     shape = [10, 4]
 
@@ -580,6 +690,32 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
             delta=1)
       self.assertAllClose(jacob_t, jacob_n)
 
+  def testGradientWithEmptySegmentsAtEnd(self):
+    shape = [10, 4]
+
+    num_segments = 5
+    segment_indices = [0, 1, 2, 2]
+    num_indices = len(segment_indices)
+    for tf_op in [
+        math_ops.sparse_segment_sum_with_num_segments,
+        math_ops.sparse_segment_mean_with_num_segments,
+    ]:
+      with self.test_session():
+        tf_indices, _, tf_x, np_x = self._sparse_input(
+            shape, num_indices, dtype=dtypes_lib.float64)
+        s = tf_op(
+            data=tf_x,
+            indices=tf_indices,
+            segment_ids=segment_indices,
+            num_segments=num_segments)
+        jacob_t, jacob_n = gradient_checker.compute_gradient(
+            tf_x,
+            shape,
+            s, [5, 4],
+            x_init_value=np_x.astype(np.double),
+            delta=1)
+      self.assertAllClose(jacob_t, jacob_n)
+
   def testGradientValid(self):
     # Baseline for the testGradient*Invalid* methods below.
     tf_x, _ = self._input([3, 4], dtype=dtypes_lib.float32)
@@ -625,7 +761,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
     ops_list = [
         math_ops.sparse_segment_mean_grad, math_ops.sparse_segment_sqrt_n_grad
     ]
-    segment_indices = [0, 1, 1, 1]  # 2 segments
+    segment_indices = [0, 1, 1, 4]  # 5 segments
     tf_indices = [8, 3, 0, 9]
     with self.test_session(use_gpu=False):
       for tf_op in ops_list:
diff --git a/tensorflow/python/kernel_tests/sparse_matmul_op_test.py b/tensorflow/python/kernel_tests/sparse_matmul_op_test.py
index 6ca447967196e5cdd59df74ae637f374826a8c30..4935ed6ca557f723b14713fdcde4e11c411bea1a 100644
--- a/tensorflow/python/kernel_tests/sparse_matmul_op_test.py
+++ b/tensorflow/python/kernel_tests/sparse_matmul_op_test.py
@@ -69,7 +69,7 @@ class SparseMatMulTest(test.TestCase):
 
     np_ans = np.matrix(np_x) * np.matrix(np_y)
     self.assertShapeEqual(np_ans, tf_ans)
-    self.assertAllClose(np_ans, out, rtol=1e-4, atol=1e-4)
+    self.assertAllCloseAccordingToType(np_ans, out, rtol=1e-4, atol=1e-4)
 
   def testBasic(self):
     x = np.arange(0., 4.).reshape([4, 1]).astype(np.float32)
@@ -128,7 +128,8 @@ class SparseMatMulTest(test.TestCase):
 
 class MatMulGradientTest(test.TestCase):
 
-  def _testGradients(self, tr_a, tr_b, sp_a, sp_b, a_dtype, b_dtype, name):
+  def _testGradients(self, tr_a, tr_b, sp_a, sp_b, a_dtype, b_dtype, delta,
+                     name):
     with self.test_session():
       a = constant_op.constant(
           RandMatrix(
@@ -151,12 +152,12 @@ class MatMulGradientTest(test.TestCase):
           a, [2, 3] if tr_a else [3, 2],
           m, [3, 4],
           x_init_value=a.eval(),
-          delta=1 / 64.) + gradient_checker.compute_gradient_error(
+          delta=delta) + gradient_checker.compute_gradient_error(
               b, [4, 2] if tr_b else [2, 4],
               m, [3, 4],
               x_init_value=b.eval(),
-              delta=1 / 64.))
-    self.assertLess(err, 1 / 128.)
+              delta=delta))
+    self.assertLess(err, delta / 2.)
 
   def testGradientInput(self):
     for tr_a in [True, False]:
@@ -165,9 +166,15 @@ class MatMulGradientTest(test.TestCase):
           for sp_b in [True, False]:
             for a_dtype in (dtypes.float32, dtypes.bfloat16):
               for b_dtype in (dtypes.float32, dtypes.bfloat16):
+                # Note: bfloat16 only has 7 mantissa bits, versus float32 with
+                # 10. Hence, we shift by 2 bits to pass the test.
+                if a_dtype == dtypes.bfloat16 and b_dtype == dtypes.bfloat16:
+                  delta = 1 / 16.
+                else:
+                  delta = 1 / 64.
                 name = "sparse_matmul_%s_%s_%s_%s" % (tr_a, tr_b, sp_a, sp_b)
                 self._testGradients(tr_a, tr_b, sp_a, sp_b, a_dtype, b_dtype,
-                                    name)
+                                    delta, name)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/kernel_tests/sparse_ops_test.py b/tensorflow/python/kernel_tests/sparse_ops_test.py
index 1ab78a07784b7d0a6b4852c5336d1d0519f3b00f..cb5a66312fdfbc930483d59248848cf39cb6f9ba 100644
--- a/tensorflow/python/kernel_tests/sparse_ops_test.py
+++ b/tensorflow/python/kernel_tests/sparse_ops_test.py
@@ -938,6 +938,7 @@ class SparseTransposeTest(test.TestCase):
           sp_trans = sparse_ops.sparse_transpose(sp_input, perm=perm)
           dn_trans = sparse_ops.sparse_tensor_to_dense(sp_trans).eval()
           expected_trans = array_ops.transpose(dn_input, perm=perm).eval()
+          self.assertAllEqual(expected_trans.shape, sp_trans.get_shape())
           self.assertAllEqual(dn_trans, expected_trans)
 
 
diff --git a/tensorflow/python/kernel_tests/sparse_reshape_op_test.py b/tensorflow/python/kernel_tests/sparse_reshape_op_test.py
index 0d2887f3cef88605e87bddb7830845f12e37220b..89a54c8ab6fb19c79404222365124b72dd3b6f3f 100644
--- a/tensorflow/python/kernel_tests/sparse_reshape_op_test.py
+++ b/tensorflow/python/kernel_tests/sparse_reshape_op_test.py
@@ -57,6 +57,25 @@ class SparseReshapeTest(test.TestCase):
     sp_output = sparse_ops.sparse_reshape(sp_input, shape=(1, 5, 2, 3))
     self.assertAllEqual((1, 5, 2, 3), sp_output.get_shape())
 
+  def testStaticShapeInfoPreservedWithInferredDims(self):
+    sp_input = sparse_tensor.SparseTensor.from_value(
+        self._SparseTensorValue_2x3x4())
+    self.assertAllEqual((2, 3, 4), sp_input.get_shape())
+    sp_output = sparse_ops.sparse_reshape(sp_input, shape=(2, -1))
+    self.assertAllEqual((2, 3 * 4), sp_output.get_shape())
+
+  def testRaisesIfMoreThanOneInferredDim(self):
+    sp_input = sparse_tensor.SparseTensor.from_value(
+        self._SparseTensorValue_2x3x4())
+    with self.assertRaisesRegexp(ValueError, "At most one dimension can"):
+      sparse_ops.sparse_reshape(sp_input, shape=(-1, 2, -1))
+
+  def testRaisesIfInferredShapeNotPossible(self):
+    sp_input = sparse_tensor.SparseTensor.from_value(
+        self._SparseTensorValue_2x3x4())
+    with self.assertRaisesRegexp(ValueError, "Cannot reshape"):
+      sparse_ops.sparse_reshape(sp_input, shape=(-1, 7))
+
   def testSameShape(self):
     with self.test_session(use_gpu=False) as sess:
       input_val = self._SparseTensorValue_5x6()
diff --git a/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py b/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py
index 78c113f51442c00984c1a5ab32a4dcc1a555ca9a..27b39a626fcc6b2705bf9e797b5293ed3f1c7820 100644
--- a/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py
+++ b/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py
@@ -64,12 +64,14 @@ class SerializeSparseTest(test.TestCase):
     shape = np.array([3, 4, 5]).astype(np.int64)
     return sparse_tensor_lib.SparseTensorValue(ind, val, shape)
 
-  def testSerializeDeserialize(self):
+  def _testSerializeDeserializeHelper(self,
+                                      serialize_fn,
+                                      deserialize_fn,
+                                      out_type=dtypes.string):
     with self.test_session(use_gpu=False) as sess:
       sp_input = self._SparseTensorValue_5x6(np.arange(6))
-      serialized = sparse_ops.serialize_sparse(sp_input)
-      sp_deserialized = sparse_ops.deserialize_sparse(
-          serialized, dtype=dtypes.int32)
+      serialized = serialize_fn(sp_input, out_type=out_type)
+      sp_deserialized = deserialize_fn(serialized, dtype=dtypes.int32)
 
       indices, values, shape = sess.run(sp_deserialized)
 
@@ -77,14 +79,25 @@ class SerializeSparseTest(test.TestCase):
       self.assertAllEqual(values, sp_input[1])
       self.assertAllEqual(shape, sp_input[2])
 
-  def testSerializeDeserializeBatch(self):
+  def testSerializeDeserialize(self):
+    self._testSerializeDeserializeHelper(sparse_ops.serialize_sparse,
+                                         sparse_ops.deserialize_sparse)
+
+  def testVariantSerializeDeserialize(self):
+    self._testSerializeDeserializeHelper(sparse_ops.serialize_sparse,
+                                         sparse_ops.deserialize_sparse,
+                                         dtypes.variant)
+
+  def _testSerializeDeserializeBatchHelper(self,
+                                           serialize_fn,
+                                           deserialize_fn,
+                                           out_type=dtypes.string):
     with self.test_session(use_gpu=False) as sess:
       sp_input = self._SparseTensorValue_5x6(np.arange(6))
-      serialized = sparse_ops.serialize_sparse(sp_input)
+      serialized = serialize_fn(sp_input, out_type=out_type)
       serialized = array_ops.stack([serialized, serialized])
 
-      sp_deserialized = sparse_ops.deserialize_sparse(
-          serialized, dtype=dtypes.int32)
+      sp_deserialized = deserialize_fn(serialized, dtype=dtypes.int32)
 
       combined_indices, combined_values, combined_shape = sess.run(
           sp_deserialized)
@@ -97,16 +110,29 @@ class SerializeSparseTest(test.TestCase):
       self.assertAllEqual(combined_values[6:], sp_input[1])
       self.assertAllEqual(combined_shape, [2, 5, 6])
 
-  def testSerializeDeserializeBatchInconsistentShape(self):
+  def testSerializeDeserializeBatch(self):
+    self._testSerializeDeserializeBatchHelper(sparse_ops.serialize_sparse,
+                                              sparse_ops.deserialize_sparse)
+
+  def testSerializeDeserializeManyBatch(self):
+    self._testSerializeDeserializeBatchHelper(
+        sparse_ops.serialize_sparse, sparse_ops.deserialize_many_sparse)
+
+  def testVariantSerializeDeserializeBatch(self):
+    self._testSerializeDeserializeBatchHelper(sparse_ops.serialize_sparse,
+                                              sparse_ops.deserialize_sparse,
+                                              dtypes.variant)
+
+  def _testSerializeDeserializeBatchInconsistentShapeHelper(
+      self, serialize_fn, deserialize_fn, out_type=dtypes.string):
     with self.test_session(use_gpu=False) as sess:
       sp_input0 = self._SparseTensorValue_5x6(np.arange(6))
       sp_input1 = self._SparseTensorValue_3x4(np.arange(6))
-      serialized0 = sparse_ops.serialize_sparse(sp_input0)
-      serialized1 = sparse_ops.serialize_sparse(sp_input1)
+      serialized0 = serialize_fn(sp_input0, out_type=out_type)
+      serialized1 = serialize_fn(sp_input1, out_type=out_type)
       serialized = array_ops.stack([serialized0, serialized1])
 
-      sp_deserialized = sparse_ops.deserialize_sparse(
-          serialized, dtype=dtypes.int32)
+      sp_deserialized = deserialize_fn(serialized, dtype=dtypes.int32)
 
       combined_indices, combined_values, combined_shape = sess.run(
           sp_deserialized)
@@ -119,15 +145,26 @@ class SerializeSparseTest(test.TestCase):
       self.assertAllEqual(combined_values[6:], sp_input1[1])
       self.assertAllEqual(combined_shape, [2, 5, 6])
 
-  def testSerializeDeserializeNestedBatch(self):
+  def testSerializeDeserializeBatchInconsistentShape(self):
+    self._testSerializeDeserializeBatchInconsistentShapeHelper(
+        sparse_ops.serialize_sparse, sparse_ops.deserialize_sparse)
+
+  def testVariantSerializeDeserializeBatchInconsistentShape(self):
+    self._testSerializeDeserializeBatchInconsistentShapeHelper(
+        sparse_ops.serialize_sparse, sparse_ops.deserialize_sparse,
+        dtypes.variant)
+
+  def _testSerializeDeserializeNestedBatchHelper(self,
+                                                 serialize_fn,
+                                                 deserialize_fn,
+                                                 out_type=dtypes.string):
     with self.test_session(use_gpu=False) as sess:
       sp_input = self._SparseTensorValue_5x6(np.arange(6))
-      serialized = sparse_ops.serialize_sparse(sp_input)
+      serialized = serialize_fn(sp_input, out_type=out_type)
       serialized = array_ops.stack([serialized, serialized])
       serialized = array_ops.stack([serialized, serialized])
 
-      sp_deserialized = sparse_ops.deserialize_sparse(
-          serialized, dtype=dtypes.int32)
+      sp_deserialized = deserialize_fn(serialized, dtype=dtypes.int32)
 
       combined_indices, combined_values, combined_shape = sess.run(
           sp_deserialized)
@@ -151,40 +188,29 @@ class SerializeSparseTest(test.TestCase):
 
       self.assertAllEqual(combined_shape, [2, 2, 5, 6])
 
-  def testSerializeDeserializeMany(self):
-    with self.test_session(use_gpu=False) as sess:
-      sp_input0 = self._SparseTensorValue_5x6(np.arange(6))
-      sp_input1 = self._SparseTensorValue_3x4(np.arange(6))
-      serialized0 = sparse_ops.serialize_sparse(sp_input0)
-      serialized1 = sparse_ops.serialize_sparse(sp_input1)
-      serialized_concat = array_ops.stack([serialized0, serialized1])
-
-      sp_deserialized = sparse_ops.deserialize_many_sparse(
-          serialized_concat, dtype=dtypes.int32)
-
-      combined_indices, combined_values, combined_shape = sess.run(
-          sp_deserialized)
-
-      self.assertAllEqual(combined_indices[:6, 0], [0] * 6)  # minibatch 0
-      self.assertAllEqual(combined_indices[:6, 1:], sp_input0[0])
-      self.assertAllEqual(combined_indices[6:, 0], [1] * 6)  # minibatch 1
-      self.assertAllEqual(combined_indices[6:, 1:], sp_input1[0])
-      self.assertAllEqual(combined_values[:6], sp_input0[1])
-      self.assertAllEqual(combined_values[6:], sp_input1[1])
-      self.assertAllEqual(combined_shape, [2, 5, 6])
-
-  def testFeedSerializeDeserializeMany(self):
+  def testSerializeDeserializeNestedBatch(self):
+    self._testSerializeDeserializeNestedBatchHelper(
+        sparse_ops.serialize_sparse, sparse_ops.deserialize_sparse)
+
+  def testVariantSerializeDeserializeNestedBatch(self):
+    self._testSerializeDeserializeNestedBatchHelper(
+        sparse_ops.serialize_sparse, sparse_ops.deserialize_sparse,
+        dtypes.variant)
+
+  def _testFeedSerializeDeserializeBatchHelper(self,
+                                               serialize_fn,
+                                               deserialize_fn,
+                                               out_type=dtypes.string):
     with self.test_session(use_gpu=False) as sess:
       sp_input0 = self._SparseTensorPlaceholder()
       sp_input1 = self._SparseTensorPlaceholder()
       input0_val = self._SparseTensorValue_5x6(np.arange(6))
       input1_val = self._SparseTensorValue_3x4(np.arange(6))
-      serialized0 = sparse_ops.serialize_sparse(sp_input0)
-      serialized1 = sparse_ops.serialize_sparse(sp_input1)
+      serialized0 = serialize_fn(sp_input0, out_type=out_type)
+      serialized1 = serialize_fn(sp_input1, out_type=out_type)
       serialized_concat = array_ops.stack([serialized0, serialized1])
 
-      sp_deserialized = sparse_ops.deserialize_many_sparse(
-          serialized_concat, dtype=dtypes.int32)
+      sp_deserialized = deserialize_fn(serialized_concat, dtype=dtypes.int32)
 
       combined_indices, combined_values, combined_shape = sess.run(
           sp_deserialized, {sp_input0: input0_val,
@@ -198,40 +224,96 @@ class SerializeSparseTest(test.TestCase):
       self.assertAllEqual(combined_values[6:], input1_val[1])
       self.assertAllEqual(combined_shape, [2, 5, 6])
 
-  def testSerializeManyDeserializeManyRoundTrip(self):
+  def testFeedSerializeDeserializeBatch(self):
+    self._testFeedSerializeDeserializeBatchHelper(sparse_ops.serialize_sparse,
+                                                  sparse_ops.deserialize_sparse)
+
+  def testFeedSerializeDeserializeManyBatch(self):
+    self._testFeedSerializeDeserializeBatchHelper(
+        sparse_ops.serialize_sparse, sparse_ops.deserialize_many_sparse)
+
+  def testFeedVariantSerializeDeserializeBatch(self):
+    self._testFeedSerializeDeserializeBatchHelper(sparse_ops.serialize_sparse,
+                                                  sparse_ops.deserialize_sparse,
+                                                  dtypes.variant)
+
+  def _testSerializeManyShapeHelper(self,
+                                    serialize_many_fn,
+                                    out_type=dtypes.string):
     with self.test_session(use_gpu=False) as sess:
       # N == 4 because shape_value == [4, 5]
       indices_value = np.array([[0, 0], [0, 1], [2, 0]], dtype=np.int64)
       values_value = np.array([b"a", b"b", b"c"])
       shape_value = np.array([4, 5], dtype=np.int64)
       sparse_tensor = self._SparseTensorPlaceholder(dtype=dtypes.string)
-      serialized = sparse_ops.serialize_many_sparse(sparse_tensor)
-      deserialized = sparse_ops.deserialize_many_sparse(
-          serialized, dtype=dtypes.string)
-      serialized_value, deserialized_value = sess.run(
-          [serialized, deserialized],
+      serialized = serialize_many_fn(sparse_tensor, out_type=out_type)
+      serialized_value = sess.run(
+          serialized,
           feed_dict={
               sparse_tensor.indices: indices_value,
               sparse_tensor.values: values_value,
               sparse_tensor.dense_shape: shape_value
           })
       self.assertEqual(serialized_value.shape, (4, 3))
+
+  def testSerializeManyShape(self):
+    self._testSerializeManyShapeHelper(sparse_ops.serialize_many_sparse)
+
+  def testVariantSerializeManyShape(self):
+    # NOTE: The following test is a no-op as it is currently not possible to
+    # convert the serialized variant value to a numpy value.
+    pass
+
+  def _testSerializeManyDeserializeBatchHelper(self,
+                                               serialize_many_fn,
+                                               deserialize_fn,
+                                               out_type=dtypes.string):
+    with self.test_session(use_gpu=False) as sess:
+      # N == 4 because shape_value == [4, 5]
+      indices_value = np.array([[0, 0], [0, 1], [2, 0]], dtype=np.int64)
+      values_value = np.array([b"a", b"b", b"c"])
+      shape_value = np.array([4, 5], dtype=np.int64)
+      sparse_tensor = self._SparseTensorPlaceholder(dtype=dtypes.string)
+      serialized = serialize_many_fn(sparse_tensor, out_type=out_type)
+      deserialized = deserialize_fn(serialized, dtype=dtypes.string)
+      deserialized_value = sess.run(
+          deserialized,
+          feed_dict={
+              sparse_tensor.indices: indices_value,
+              sparse_tensor.values: values_value,
+              sparse_tensor.dense_shape: shape_value
+          })
       self.assertAllEqual(deserialized_value.indices, indices_value)
       self.assertAllEqual(deserialized_value.values, values_value)
       self.assertAllEqual(deserialized_value.dense_shape, shape_value)
 
-  def testDeserializeFailsWrongType(self):
+  def testSerializeManyDeserializeBatch(self):
+    self._testSerializeManyDeserializeBatchHelper(
+        sparse_ops.serialize_many_sparse, sparse_ops.deserialize_sparse)
+
+  def testSerializeManyDeserializeManyBatch(self):
+    self._testSerializeManyDeserializeBatchHelper(
+        sparse_ops.serialize_many_sparse, sparse_ops.deserialize_many_sparse)
+
+  def testVariantSerializeManyDeserializeBatch(self):
+    self._testSerializeManyDeserializeBatchHelper(
+        sparse_ops.serialize_many_sparse, sparse_ops.deserialize_sparse,
+        dtypes.variant)
+
+  def _testDeserializeFailsWrongTypeHelper(self,
+                                           serialize_fn,
+                                           deserialize_fn,
+                                           out_type=dtypes.string):
     with self.test_session(use_gpu=False) as sess:
       sp_input0 = self._SparseTensorPlaceholder()
       sp_input1 = self._SparseTensorPlaceholder()
       input0_val = self._SparseTensorValue_5x6(np.arange(6))
       input1_val = self._SparseTensorValue_3x4(np.arange(6))
-      serialized0 = sparse_ops.serialize_sparse(sp_input0)
-      serialized1 = sparse_ops.serialize_sparse(sp_input1)
+      serialized0 = serialize_fn(sp_input0, out_type=out_type)
+      serialized1 = serialize_fn(sp_input1, out_type=out_type)
       serialized_concat = array_ops.stack([serialized0, serialized1])
 
-      sp_deserialized = sparse_ops.deserialize_many_sparse(
-          serialized_concat, dtype=dtypes.int64)
+      sp_deserialized = deserialize_fn(serialized_concat, dtype=dtypes.int64)
 
       with self.assertRaisesOpError(
           r"Requested SparseTensor of type int64 but "
@@ -240,41 +322,78 @@ class SerializeSparseTest(test.TestCase):
                  {sp_input0: input0_val,
                   sp_input1: input1_val})
 
-  def testDeserializeFailsInconsistentRank(self):
+  def testDeserializeFailsWrongType(self):
+    self._testDeserializeFailsWrongTypeHelper(sparse_ops.serialize_sparse,
+                                              sparse_ops.deserialize_sparse)
+
+  def testDeserializeManyFailsWrongType(self):
+    self._testDeserializeFailsWrongTypeHelper(
+        sparse_ops.serialize_sparse, sparse_ops.deserialize_many_sparse)
+
+  def testVariantDeserializeFailsWrongType(self):
+    self._testDeserializeFailsWrongTypeHelper(sparse_ops.serialize_sparse,
+                                              sparse_ops.deserialize_sparse,
+                                              dtypes.variant)
+
+  def _testDeserializeFailsInconsistentRankHelper(self,
+                                                  serialize_fn,
+                                                  deserialize_fn,
+                                                  out_type=dtypes.string):
     with self.test_session(use_gpu=False) as sess:
       sp_input0 = self._SparseTensorPlaceholder()
       sp_input1 = self._SparseTensorPlaceholder()
       input0_val = self._SparseTensorValue_5x6(np.arange(6))
       input1_val = self._SparseTensorValue_1x1x1()
-      serialized0 = sparse_ops.serialize_sparse(sp_input0)
-      serialized1 = sparse_ops.serialize_sparse(sp_input1)
+      serialized0 = serialize_fn(sp_input0, out_type=out_type)
+      serialized1 = serialize_fn(sp_input1, out_type=out_type)
       serialized_concat = array_ops.stack([serialized0, serialized1])
 
-      sp_deserialized = sparse_ops.deserialize_many_sparse(
-          serialized_concat, dtype=dtypes.int32)
+      sp_deserialized = deserialize_fn(serialized_concat, dtype=dtypes.int32)
 
       with self.assertRaisesOpError(
-          r"Inconsistent rank across SparseTensors: rank prior to "
-          r"SparseTensor\[1\] was: 3 but rank of SparseTensor\[1\] is: 4"):
+          r"Inconsistent shape across SparseTensors: rank prior to "
+          r"SparseTensor\[1\] was: 2 but rank of SparseTensor\[1\] is: 3"):
         sess.run(sp_deserialized,
                  {sp_input0: input0_val,
                   sp_input1: input1_val})
 
-  def testDeserializeFailsInvalidProto(self):
+  def testDeserializeFailsInconsistentRank(self):
+    self._testDeserializeFailsInconsistentRankHelper(
+        sparse_ops.serialize_sparse, sparse_ops.deserialize_sparse)
+
+  def testDeserializeManyFailsInconsistentRank(self):
+    self._testDeserializeFailsInconsistentRankHelper(
+        sparse_ops.serialize_sparse, sparse_ops.deserialize_many_sparse)
+
+  def testVariantDeserializeFailsInconsistentRank(self):
+    self._testDeserializeFailsInconsistentRankHelper(
+        sparse_ops.serialize_sparse, sparse_ops.deserialize_sparse,
+        dtypes.variant)
+
+  def _testDeserializeFailsInvalidProtoHelper(self,
+                                              serialize_fn,
+                                              deserialize_fn,
+                                              out_type=dtypes.string):
     with self.test_session(use_gpu=False) as sess:
       sp_input0 = self._SparseTensorPlaceholder()
       input0_val = self._SparseTensorValue_5x6(np.arange(6))
-      serialized0 = sparse_ops.serialize_sparse(sp_input0)
+      serialized0 = serialize_fn(sp_input0, out_type=out_type)
       serialized1 = ["a", "b", "c"]
       serialized_concat = array_ops.stack([serialized0, serialized1])
 
-      sp_deserialized = sparse_ops.deserialize_many_sparse(
-          serialized_concat, dtype=dtypes.int32)
+      sp_deserialized = deserialize_fn(serialized_concat, dtype=dtypes.int32)
 
-      with self.assertRaisesOpError(
-          r"Could not parse serialized_sparse\[1, 0\]"):
+      with self.assertRaisesOpError(r"Could not parse serialized proto"):
         sess.run(sp_deserialized, {sp_input0: input0_val})
 
+  def testDeserializeFailsInvalidProto(self):
+    self._testDeserializeFailsInvalidProtoHelper(sparse_ops.serialize_sparse,
+                                                 sparse_ops.deserialize_sparse)
+
+  def testDeserializeManyFailsInvalidProto(self):
+    self._testDeserializeFailsInvalidProtoHelper(
+        sparse_ops.serialize_sparse, sparse_ops.deserialize_many_sparse)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/summary_image_op_test.py b/tensorflow/python/kernel_tests/summary_image_op_test.py
index d2152ab560ad27b8a761ff8029fa425fdc9ff20d..4718827e8885c328cb2e84c2f1e8880bdbdb6cae 100644
--- a/tensorflow/python/kernel_tests/summary_image_op_test.py
+++ b/tensorflow/python/kernel_tests/summary_image_op_test.py
@@ -50,7 +50,6 @@ class SummaryImageOpTest(test.TestCase):
     self.assertProtoEquals(expected, image_summ)
 
   def testImageSummary(self):
-    np.random.seed(7)
     for depth in (1, 3, 4):
       for positive in False, True:
         with self.test_session(graph=ops.Graph()) as sess:
diff --git a/tensorflow/python/kernel_tests/svd_op_test.py b/tensorflow/python/kernel_tests/svd_op_test.py
index 9871eacb0308ff72800b6a2d037eff974de020ed..d20567bf0ecf587e6bb12bfd4a2a57658d2f5914 100644
--- a/tensorflow/python/kernel_tests/svd_op_test.py
+++ b/tensorflow/python/kernel_tests/svd_op_test.py
@@ -190,10 +190,11 @@ class SvdGradOpTest(test.TestCase):
   pass  # Filled in below
 
 
-def _GetSvdGradOpTest(dtype_, shape_, compute_uv_):
+def _GetSvdGradOpTest(dtype_, shape_, compute_uv_, full_matrices_):
 
   def _NormalizingSvd(tf_a):
-    tf_s, tf_u, tf_v = linalg_ops.svd(tf_a, compute_uv=True, full_matrices=True)
+    tf_s, tf_u, tf_v = linalg_ops.svd(
+        tf_a, compute_uv=True, full_matrices=full_matrices_)
     # Singular vectors are only unique up to an arbitrary phase. We normalize
     # the vectors such that the first component of u (if m >=n) or v (if n > m)
     # have phase 0.
@@ -270,17 +271,20 @@ if __name__ == "__main__":
                          _GetSvdOpTest(dtype, shape, use_static_shape,
                                        compute_uv, full_matrices))
   for compute_uv in False, True:
-    dtypes = ([np.float32, np.float64] + [np.complex64, np.complex128] *
-              (not compute_uv))
-    for dtype in dtypes:
-      mat_shapes = ([(10, 11), (11, 10),
-                     (11, 11)] + [(5, 11), (11, 5)] * (not compute_uv))
-      for mat_shape in mat_shapes:
-        for batch_dims in [(), (3,)]:
-          shape = batch_dims + mat_shape
-          name = "%s_%s_compute_uv_%s" % (dtype.__name__,
-                                          "_".join(map(str, shape)), compute_uv)
-          _AddTest(SvdGradOpTest, "SvdGrad", name,
-                   _GetSvdGradOpTest(dtype, shape, compute_uv))
+    for full_matrices in False, True:
+      dtypes = ([np.float32, np.float64]
+                + [np.complex64, np.complex128] * (not compute_uv))
+      for dtype in dtypes:
+        mat_shapes = [(10, 11), (11, 10), (11, 11)]
+        if not full_matrices or not compute_uv:
+          mat_shapes += [(5, 11), (11, 5)]
+        for mat_shape in mat_shapes:
+          for batch_dims in [(), (3,)]:
+            shape = batch_dims + mat_shape
+            name = "%s_%s_compute_uv_%s_full_%s" % (
+                dtype.__name__, "_".join(map(str, shape)), compute_uv,
+                full_matrices)
+            _AddTest(SvdGradOpTest, "SvdGrad", name,
+                     _GetSvdGradOpTest(dtype, shape, compute_uv, full_matrices))
 
   test.main()
diff --git a/tensorflow/python/kernel_tests/template_test.py b/tensorflow/python/kernel_tests/template_test.py
index 40c0ade62a8df5a73b61c5679685ad9368c9dbbf..8792ab41a07aac2dc8c9fcb956c378054a309a41 100644
--- a/tensorflow/python/kernel_tests/template_test.py
+++ b/tensorflow/python/kernel_tests/template_test.py
@@ -34,9 +34,10 @@ from tensorflow.python.platform import test
 from tensorflow.python.training import gradient_descent
 
 
-def variable_scoped_function():
+def variable_scoped_function(trainable=True):
   return variable_scope.get_variable(
-      "dummy", shape=[1], initializer=init_ops.zeros_initializer())
+      "dummy", shape=[1], trainable=trainable,
+      initializer=init_ops.zeros_initializer())
 
 
 def internally_variable_scoped_function(scope_name):
@@ -181,7 +182,8 @@ class TemplateTest(test.TestCase):
   def test_unique_name_raise_error_in_eager(self):
     with context.eager_mode():
       with self.assertRaisesRegexp(
-          ValueError, "unique_name cannot be used in eager mode."):
+          ValueError,
+          "unique_name_ cannot be used when eager exeuction is enabled."):
         template.make_template(
             "_", variable_scoped_function, unique_name_="s1")
 
@@ -306,6 +308,7 @@ class TemplateTest(test.TestCase):
     self.assertEqual("s1/nested/x:0", v1.name)
     self.assertEqual("s1_1/nested/x:0", v3.name)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_nested_templates(self):
 
     def nested_template():
@@ -313,35 +316,101 @@ class TemplateTest(test.TestCase):
       nested2 = template.make_template("nested", variable_scoped_function)
       v1 = nested1()
       v2 = nested2()
+
+      # nested1 and nested2 should not share variables
       self.assertNotEqual(v1, v2)
-      return v2
+
+      # Variables created by nested1 should be isolated from variables
+      # created by nested2.
+      self.assertEqual(nested1.variables, [v1])
+      self.assertEqual(nested2.variables, [v2])
+      self.assertEqual(nested1.trainable_variables, [v1])
+      self.assertEqual(nested2.trainable_variables, [v2])
+      self.assertEqual(len(nested1.non_trainable_variables), 0)
+      self.assertEqual(len(nested2.non_trainable_variables), 0)
+      return v1, v2
 
     tmpl1 = template.make_template("s1", nested_template)
     tmpl2 = template.make_template("s1", nested_template)
 
-    v1 = tmpl1()
-    v2 = tmpl1()
-    v3 = tmpl2()
-    self.assertTrue(v1, v2)
-    self.assertNotEqual(v1, v3)
-    self.assertEqual("s1/nested_1/dummy:0", v1.name)
-    self.assertEqual("s1_1/nested_1/dummy:0", v3.name)
+    v1, v2 = tmpl1()
+    v3, v4 = tmpl1()
+    v5, v6 = tmpl2()
+
+    # The second invocation of tmpl1 should reuse the variables
+    # created in the first invocation.
+    self.assertEqual([v1, v2], [v3, v4])
+    self.assertEqual(tmpl1.variables, [v1, v2])
+    self.assertEqual(tmpl1.trainable_variables, [v1, v2])
+    self.assertEqual(len(tmpl1.non_trainable_variables), 0)
+
+    # tmpl1 and tmpl2 should not share variables.
+    self.assertNotEqual([v1, v2], [v5, v6])
+    self.assertSequenceEqual(tmpl2.variables, [v5, v6])
+    self.assertSequenceEqual(tmpl2.trainable_variables, [v5, v6])
+    self.assertEqual(len(tmpl2.non_trainable_variables), 0)
+    self.assertEqual("s1/nested/dummy:0", v1.name)
+    self.assertEqual("s1/nested_1/dummy:0", v2.name)
+    self.assertEqual("s1_1/nested/dummy:0", v5.name)
+    self.assertEqual("s1_1/nested_1/dummy:0", v6.name)
 
-  def test_nested_eager_templates_raises_error(self):
+  @test_util.run_in_graph_and_eager_modes()
+  def test_nested_templates_with_defun(self):
+
+    def variable_scoped_function_no_return_value(trainable=True):
+      # defun cannot compile functions that return non-Tensor objects
+      _ = variable_scope.get_variable(
+          "dummy",
+          shape=[1],
+          trainable=trainable,
+          initializer=init_ops.zeros_initializer())
 
     def nested_template():
-      nested1 = template.make_template("nested", variable_scoped_function)
-      nested2 = template.make_template("nested", variable_scoped_function)
-      v1 = nested1()
-      v2 = nested2()
+      nested1 = template.make_template_internal(
+          "nested",
+          variable_scoped_function_no_return_value,
+          create_graph_function_=True)
+      nested2 = template.make_template_internal(
+          "nested",
+          variable_scoped_function_no_return_value,
+          create_graph_function_=True)
+      nested1()
+      nested2()
+      v1 = nested1.variables
+      v2 = nested2.variables
+
+      # nested1 and nested2 should not share variables
       self.assertNotEqual(v1, v2)
-      return v2
 
-    with context.eager_mode():
-      tmpl1 = template.make_template("s1", nested_template)
-      with self.assertRaisesRegexp(
-          ValueError, "Nested EagerTemaplates are not currently supported."):
-        tmpl1()
+      # Variables created by nested1 should be isolated from variables
+      # created by nested2.
+      self.assertEqual(nested1.variables, v1)
+      self.assertEqual(nested2.variables, v2)
+      self.assertEqual(nested1.trainable_variables, v1)
+      self.assertEqual(nested2.trainable_variables, v2)
+      self.assertEqual(len(nested1.non_trainable_variables), 0)
+      self.assertEqual(len(nested2.non_trainable_variables), 0)
+
+    tmpl1 = template.make_template("s1", nested_template)
+    tmpl2 = template.make_template("s1", nested_template)
+
+    tmpl1()
+    v1 = tmpl1.variables
+    tmpl1()
+    v2 = tmpl1.variables
+    tmpl2()
+    v3 = tmpl2.variables
+
+    # The second invocation of tmpl1 should reuse the variables
+    # created in the first invocation.
+    self.assertSequenceEqual(v1, v2)
+
+    # tmpl1 and tmpl2 should not share variables.
+    self.assertNotEqual(v1, v3)
+    self.assertEqual("s1/nested/dummy:0", v1[0].name)
+    self.assertEqual("s1/nested_1/dummy:0", v1[1].name)
+    self.assertEqual("s1_1/nested/dummy:0", v3[0].name)
+    self.assertEqual("s1_1/nested_1/dummy:0", v3[1].name)
 
   @test_util.run_in_graph_and_eager_modes()
   def test_immediate_scope_creation(self):
@@ -413,7 +482,7 @@ class TemplateTest(test.TestCase):
     self.assertEqual(custom_getter_count[0], 2)
 
     # Test that custom getter is called when the variable scope is created
-  # during construction
+    # during construction
     custom_getter_count[0] = 0
     tmpl2 = template.make_template(
         "s2",
@@ -539,6 +608,36 @@ class TemplateTest(test.TestCase):
     # Ensure we can get the scopes before either template is actually called.
     self.assertEqual(1, len(ta.trainable_variables))
     self.assertEqual(1, len(tb.trainable_variables))
+    # None non-trainable variable was created.
+    self.assertEqual([], list(ta.non_trainable_variables))
+    self.assertEqual([], list(tb.non_trainable_variables))
+    # Ensure variables returns all the variables.
+    self.assertEqual(1, len(ta.variables))
+    self.assertEqual(1, len(tb.variables))
+
+  @test_util.run_in_graph_and_eager_modes()
+  def test_non_trainable_variables(self):
+    # Make sure non_trainable_variables are created.
+    with variable_scope.variable_scope("foo2"):
+      ta = template.make_template("a", variable_scoped_function,
+                                  trainable=True)
+      tb = template.make_template("b", variable_scoped_function,
+                                  trainable=False)
+    # Initially there are not variables created.
+    self.assertEqual([], list(ta.variables))
+    self.assertEqual([], list(tb.variables))
+    # After calling there are variables created.
+    ta()
+    tb()
+    # Check the trainable and non_trainable variables.
+    self.assertEqual(1, len(ta.trainable_variables))
+    self.assertEqual([], list(ta.non_trainable_variables))
+
+    self.assertEqual([], list(tb.trainable_variables))
+    self.assertEqual(1, len(tb.non_trainable_variables))
+    # Ensure variables returns all the variables.
+    self.assertEqual(1, len(ta.variables))
+    self.assertEqual(1, len(tb.variables))
 
   # TODO(apassos) handle local variables in Eager
   def test_local_variables(self):
@@ -559,6 +658,31 @@ class TemplateTest(test.TestCase):
     self.assertEqual(0, len(ta.local_variables))
     self.assertEqual(1, len(tb.local_variables))
 
+  @test_util.run_in_graph_and_eager_modes()
+  def test_make_template_with_defun(self):
+
+    def variable_scoped_function_no_return_value(scope_name):
+      # defun cannot compile functions that return non-Tensor objects
+      with variable_scope.variable_scope(scope_name):
+        _ = variable_scope.get_variable(
+            "dummy", shape=[1], initializer=init_ops.zeros_initializer())
+
+    tmpl = template.make_template_internal(
+        "s1",
+        variable_scoped_function_no_return_value,
+        create_graph_function_=True,
+        scope_name="test")
+
+    # The first invocation of tmpl1 creates variables, the second should
+    # be executed as a graph function.
+    tmpl()
+    v1 = tmpl.variables
+    tmpl()
+    v2 = tmpl.variables
+
+    self.assertSequenceEqual(v1, v2)
+    self.assertEqual("s1/test/dummy:0", v1[0].name)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/tensor_array_ops_test.py b/tensorflow/python/kernel_tests/tensor_array_ops_test.py
index 835fdbe2aa531ed28f59279e4e83d9f8297a3b98..aad2443eea7ad87faf481973e91ca3df32ccfb44 100644
--- a/tensorflow/python/kernel_tests/tensor_array_ops_test.py
+++ b/tensorflow/python/kernel_tests/tensor_array_ops_test.py
@@ -43,10 +43,6 @@ import tensorflow.python.ops.nn_grad  # pylint: disable=unused-import
 from tensorflow.python.platform import test
 
 
-# TODO(ebrevdo): Delete this line after Dec. 4, 2017.
-tensor_array_ops._ENABLE_IDENTICAL_ELEMENT_SHAPES = True
-
-
 def _make_converter(tf_dtype):
   def _converter(x):
     if tf_dtype == dtypes.string:
diff --git a/tensorflow/python/kernel_tests/transpose_op_test.py b/tensorflow/python/kernel_tests/transpose_op_test.py
index c551d9c3d056b50600d1331749ba865439748f7e..290200ce45488a9796f437d9f748e06483e83d96 100644
--- a/tensorflow/python/kernel_tests/transpose_op_test.py
+++ b/tensorflow/python/kernel_tests/transpose_op_test.py
@@ -53,11 +53,11 @@ class TransposeTest(test.TestCase):
       # Gradient check on CPU.
       xs = list(np.shape(x))
       ys = list(np.shape(tf_ans))
-      if x.dtype == np.float32:
+      if x.dtype in [np.float32, np.complex64]:
         jacob_t, jacob_n = gradient_checker.compute_gradient(inx, xs, y, ys, x,
                                                              1e-2)
         self.assertAllClose(jacob_t, jacob_n, 1e-3, 1e-3)
-      elif x.dtype == np.float64:
+      elif x.dtype in [np.float64, np.complex128]:
         jacob_t, jacob_n = gradient_checker.compute_gradient(inx, xs, y, ys, x,
                                                              1e-2)
         self.assertAllClose(jacob_t, jacob_n, 1e-6, 1e-6)
diff --git a/tensorflow/python/kernel_tests/unique_op_test.py b/tensorflow/python/kernel_tests/unique_op_test.py
index 6390b7c51808cf338f0651bbbdb30c7b71af7d8e..6366d2e181c8cfabba8a78b664c25c85debc67ef 100644
--- a/tensorflow/python/kernel_tests/unique_op_test.py
+++ b/tensorflow/python/kernel_tests/unique_op_test.py
@@ -63,23 +63,24 @@ class UniqueTest(test.TestCase):
       self.assertEqual(x[i], tf_y[tf_idx[i]].decode('ascii'))
 
   def testInt32Axis(self):
-    x = np.array([[1, 0, 0], [1, 0, 0], [2, 0, 0]])
-    with self.test_session() as sess:
-      y0, idx0 = gen_array_ops.unique_v2(x, axis=[0])
-      tf_y0, tf_idx0 = sess.run([y0, idx0])
-      y1, idx1 = gen_array_ops.unique_v2(x, axis=[1])
-      tf_y1, tf_idx1 = sess.run([y1, idx1])
-    self.assertAllEqual(tf_y0, np.array([[1, 0, 0], [2, 0, 0]]))
-    self.assertAllEqual(tf_idx0, np.array([0, 0, 1]))
-    self.assertAllEqual(tf_y1, np.array([[1, 0], [1, 0], [2, 0]]))
-    self.assertAllEqual(tf_idx1, np.array([0, 1, 1]))
+    for dtype in [np.int32, np.int64]:
+      x = np.array([[1, 0, 0], [1, 0, 0], [2, 0, 0]])
+      with self.test_session() as sess:
+        y0, idx0 = gen_array_ops._unique_v2(x, axis=np.array([0], dtype))
+        tf_y0, tf_idx0 = sess.run([y0, idx0])
+        y1, idx1 = gen_array_ops._unique_v2(x, axis=np.array([1], dtype))
+        tf_y1, tf_idx1 = sess.run([y1, idx1])
+      self.assertAllEqual(tf_y0, np.array([[1, 0, 0], [2, 0, 0]]))
+      self.assertAllEqual(tf_idx0, np.array([0, 0, 1]))
+      self.assertAllEqual(tf_y1, np.array([[1, 0], [1, 0], [2, 0]]))
+      self.assertAllEqual(tf_idx1, np.array([0, 1, 1]))
 
   def testInt32V2(self):
     # This test is only temporary, once V2 is used
     # by default, the axis will be wrapped to allow `axis=None`.
     x = np.random.randint(2, high=10, size=7000)
     with self.test_session() as sess:
-      y, idx = gen_array_ops.unique_v2(x, axis=[])
+      y, idx = gen_array_ops._unique_v2(x, axis=np.array([], np.int32))
       tf_y, tf_idx = sess.run([y, idx])
 
     self.assertEqual(len(x), len(tf_idx))
diff --git a/tensorflow/python/kernel_tests/unstack_op_test.py b/tensorflow/python/kernel_tests/unstack_op_test.py
index c2dcff978a4ac07b290352c98f2fc062583a3df1..84818755766a435c873f30e96dc0080af4f78b84 100644
--- a/tensorflow/python/kernel_tests/unstack_op_test.py
+++ b/tensorflow/python/kernel_tests/unstack_op_test.py
@@ -22,6 +22,7 @@ import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradient_checker
 from tensorflow.python.platform import test
@@ -42,15 +43,35 @@ class UnstackOpTest(test.TestCase):
     np.random.seed(7)
     with self.test_session(use_gpu=True):
       for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
-        data = np.random.randn(*shape)
-        # Convert data to a single tensorflow tensor
-        x = constant_op.constant(data)
-        # Unpack into a list of tensors
-        cs = array_ops.unstack(x, num=shape[0])
-        self.assertEqual(type(cs), list)
-        self.assertEqual(len(cs), shape[0])
-        cs = [c.eval() for c in cs]
-        self.assertAllEqual(cs, data)
+        for dtype in [
+            np.bool, np.float16, np.float32, np.float64, np.int32, np.int64
+        ]:
+          data = np.random.randn(*shape).astype(dtype)
+          # Convert data to a single tensorflow tensor
+          x = constant_op.constant(data)
+          # Unpack into a list of tensors
+          cs = array_ops.unstack(x, num=shape[0])
+          self.assertEqual(type(cs), list)
+          self.assertEqual(len(cs), shape[0])
+          cs = [c.eval() for c in cs]
+          self.assertAllEqual(cs, data)
+
+  def testSimpleGpu(self):
+    if not test_util.is_gpu_available():
+      self.skipTest('No GPU available')
+    np.random.seed(7)
+    with self.test_session(use_gpu=True, force_gpu=True):
+      for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
+        for dtype in [np.float16, np.float32, np.float64, np.int32, np.int64]:
+          data = np.random.randn(*shape).astype(dtype)
+          # Convert data to a single tensorflow tensor
+          x = constant_op.constant(data)
+          # Unpack into a list of tensors
+          cs = array_ops.unstack(x, num=shape[0])
+          self.assertEqual(type(cs), list)
+          self.assertEqual(len(cs), shape[0])
+          cs = [c.eval() for c in cs]
+          self.assertAllEqual(cs, data)
 
   def testGradientsAxis0(self):
     for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py
index 70fe0a47852dd3b6ce999d049fe817efe68bed05..f1a86625e080274e33abecc9db6e0c9957010d01 100644
--- a/tensorflow/python/kernel_tests/variable_scope_test.py
+++ b/tensorflow/python/kernel_tests/variable_scope_test.py
@@ -117,7 +117,7 @@ class VariableScopeTest(test.TestCase):
         w = variable_scope.get_variable("w", [])
         self.assertEqual(w.dtype.base_dtype, dtypes.float16)
 
-  def testEagerVaribleStore(self):
+  def testEagerVariableStore(self):
     with context.eager_mode():
       store = variable_scope.EagerVariableStore()
       with store.as_default():
@@ -746,6 +746,130 @@ class VariableScopeTest(test.TestCase):
           with ops.name_scope("scope2") as sc2:
             self.assertEqual(sc2, "outer_1/default/scope2/")
 
+  def testBasicWhenAuxiliaryNameScopeIsFalse(self):
+    with self.test_session():
+      with variable_scope.variable_scope(
+          "scope", auxiliary_name_scope=False) as scope:
+        self.assertEqual(scope.original_name_scope, "")
+        self.assertEqual(variable_scope.get_variable("w", []).name, "scope/w:0")
+        self.assertEqual(constant_op.constant([], name="c").name, "c:0")
+      with variable_scope.variable_scope(scope, auxiliary_name_scope=False):
+        self.assertEqual(scope.original_name_scope, "")
+        self.assertEqual(
+            variable_scope.get_variable("w1", []).name, "scope/w1:0")
+        self.assertEqual(constant_op.constant([], name="c1").name, "c1:0")
+      # Recheck: new name scope is NOT created before
+      with ops.name_scope("scope"):
+        self.assertEqual(constant_op.constant([], name="c").name, "scope/c:0")
+
+      with variable_scope.variable_scope("outer"):
+        with variable_scope.variable_scope(
+            "inner", auxiliary_name_scope=False) as inner:
+          self.assertEqual(inner.original_name_scope, "outer/")
+          self.assertEqual(
+              variable_scope.get_variable("w", []).name, "outer/inner/w:0")
+          self.assertEqual(constant_op.constant([], name="c").name, "outer/c:0")
+        with variable_scope.variable_scope(
+            inner, auxiliary_name_scope=False) as inner1:
+          self.assertEqual(inner1.original_name_scope, "outer/")
+          self.assertEqual(
+              variable_scope.get_variable("w1", []).name, "outer/inner/w1:0")
+          self.assertEqual(
+              constant_op.constant([], name="c1").name, "outer/c1:0")
+        # Recheck: new name scope is NOT created before
+        with ops.name_scope("inner"):
+          self.assertEqual(
+              constant_op.constant([], name="c").name, "outer/inner/c:0")
+
+  def testCreatedByDefaultNameWhenAuxiliaryNameScopeIsFalse(self):
+    with self.test_session():
+      with variable_scope.variable_scope(
+          None, default_name="default", auxiliary_name_scope=False) as scope:
+        self.assertEqual(scope.original_name_scope, "")
+        self.assertEqual(
+            variable_scope.get_variable("w", []).name, "default/w:0")
+        self.assertEqual(constant_op.constant([], name="c").name, "c:0")
+      # Recheck: new name scope is NOT created before
+      with ops.name_scope("default"):
+        self.assertEqual(constant_op.constant([], name="c").name, "default/c:0")
+
+      with variable_scope.variable_scope("outer"):
+        with variable_scope.variable_scope(
+            None, default_name="default", auxiliary_name_scope=False) as inner:
+          self.assertEqual(inner.original_name_scope, "outer/")
+          self.assertEqual(
+              variable_scope.get_variable("w", []).name, "outer/default/w:0")
+          self.assertEqual(constant_op.constant([], name="c").name, "outer/c:0")
+        # Recheck: new name scope is NOT created before
+        with ops.name_scope("default"):
+          self.assertEqual(
+              constant_op.constant([], name="c").name, "outer/default/c:0")
+
+  def testReenterRootScopeWhenAuxiliaryNameScopeIsFalse(self):
+    with self.test_session():
+      root_scope = variable_scope.get_variable_scope()
+      with variable_scope.variable_scope(
+          root_scope, auxiliary_name_scope=False) as scope:
+        self.assertEqual(scope.original_name_scope, "")
+        self.assertEqual(variable_scope.get_variable("w", []).name, "w:0")
+        self.assertEqual(constant_op.constant([], name="c").name, "c:0")
+
+      with variable_scope.variable_scope("outer"):
+        with variable_scope.variable_scope(
+            root_scope, auxiliary_name_scope=False) as inner:
+          self.assertEqual(inner.original_name_scope, "")
+          self.assertEqual(variable_scope.get_variable("w1", []).name, "w1:0")
+          self.assertEqual(
+              constant_op.constant([], name="c1").name, "outer/c1:0")
+
+  def testAuxiliaryNameScopeIsInvalid(self):
+    with self.test_session():
+      with self.assertRaisesRegexp(TypeError, "auxiliary_name_scope"):
+        with variable_scope.variable_scope(
+            None, default_name="scope", auxiliary_name_scope="invalid"):
+          pass
+
+      with self.assertRaisesRegexp(TypeError, "auxiliary_name_scope"):
+        with variable_scope.variable_scope(
+            "scope", auxiliary_name_scope="invalid"):
+          pass
+
+      with variable_scope.variable_scope("scope") as scope:
+        pass
+      with self.assertRaisesRegexp(TypeError, "auxiliary_name_scope"):
+        with variable_scope.variable_scope(
+            scope, auxiliary_name_scope="invalid"):
+          pass
+
+  def testReuseScopeWithoutNameScopeCollision(self):
+    # Github issue: #13429
+    with self.test_session():
+      with variable_scope.variable_scope("outer"):
+        with variable_scope.variable_scope("inner") as inner:
+          pass
+
+      with variable_scope.variable_scope(
+          inner, auxiliary_name_scope=False) as scope:
+        with ops.name_scope(scope.original_name_scope):
+          self.assertEqual(
+              variable_scope.get_variable("w", []).name, "outer/inner/w:0")
+          self.assertEqual(
+              constant_op.constant([], name="c").name, "outer/inner/c:0")
+        with ops.name_scope("inner"):
+          self.assertEqual(constant_op.constant([], name="c").name, "inner/c:0")
+
+      with variable_scope.variable_scope("another"):
+        with variable_scope.variable_scope(
+            inner, auxiliary_name_scope=False) as scope1:
+          with ops.name_scope(scope1.original_name_scope):
+            self.assertEqual(
+                variable_scope.get_variable("w1", []).name, "outer/inner/w1:0")
+            self.assertEqual(
+                constant_op.constant([], name="c1").name, "outer/inner/c1:0")
+          with ops.name_scope("inner"):
+            self.assertEqual(
+                constant_op.constant([], name="c").name, "another/inner/c:0")
+
   @test_util.run_in_graph_and_eager_modes()
   def testGetLocalVar(self):
     # Check that local variable respects naming.
@@ -881,6 +1005,18 @@ class VariableScopeTest(test.TestCase):
     # Ensure it is possible to do get_variable with a _ref dtype passed in.
     _ = variable_scope.get_variable("w", shape=[5, 6], dtype=v.dtype)
 
+  def testTwoGraphs(self):
+
+    def f():
+      g1 = ops.Graph()
+      g2 = ops.Graph()
+      with g1.as_default():
+        with g2.as_default():
+          with variable_scope.variable_scope("_"):
+            pass
+
+    self.assertRaisesRegexp(ValueError, "'_' is not a valid scope name", f)
+
 
 def axis0_into1_partitioner(shape=None, **unused_kwargs):
   part = [1] * len(shape)
@@ -901,35 +1037,6 @@ def axis0_into3_partitioner(shape=None, **unused_kwargs):
 
 class VariableScopeWithPartitioningTest(test.TestCase):
 
-  def testInitFromNonInitializer(self):
-    with self.test_session() as sess:
-      # Test various dtypes with zeros initializer as following:
-      types = [
-          dtypes.int8, dtypes.uint8, dtypes.int16, dtypes.uint16, dtypes.int32,
-          dtypes.int64, dtypes.bool
-      ]
-
-      # Use different variable_name to distinguish various dtypes
-      for (i, dtype) in enumerate(types):
-        x = variable_scope.get_variable(
-            name="x%d" % i,
-            shape=(3, 4),
-            dtype=dtype,
-            partitioner=axis0_into2_partitioner)
-        y = variable_scope.get_variable(
-            name="y%d" % i,
-            shape=(6, 4),
-            dtype=dtype,
-            partitioner=axis0_into2_partitioner,
-            initializer=init_ops.zeros_initializer(dtype=dtype))
-
-        variables_lib.global_variables_initializer().run()
-        # x and y would become var list after partition
-        val_x = sess.run(list(x))
-        val_y = sess.run(list(y))
-
-        self.assertAllEqual(val_x, val_y)
-
   def testResultNameMatchesRequested(self):
     with variable_scope.variable_scope(
         "scope0", partitioner=axis0_into2_partitioner):
diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py
index 6be2bc3e7692bdba569f011243f368f0ee7abc94..00faf3faa1004ddbb310137500dbec0db4a52196 100644
--- a/tensorflow/python/layers/base.py
+++ b/tensorflow/python/layers/base.py
@@ -103,10 +103,16 @@ class Layer(object):
     self.built = False
     self.input_spec = None
 
+    if activity_regularizer and context.in_eager_mode():
+      raise ValueError(
+          ('Activity regularization is not supported when executing eagerly. '
+           'Got activity_regularizer=%s') % (activity_regularizer,))
     self._activity_regularizer = activity_regularizer
     self._trainable_weights = []
     self._non_trainable_weights = []
     self._updates = []
+    # When executing eagerly, _losses is a list of zero-argument lambdas which
+    # return tensors. When using graph execution, _losses is a list of ops.
     self._losses = []
     self._reuse = kwargs.get('_reuse')
     self._graph = ops.get_default_graph()
@@ -125,6 +131,9 @@ class Layer(object):
 
     self._init_set_name(name)
 
+    # Holds functions for creating regularizer ops.
+    self._regularizer_factories = []
+
     # Determine variable scope.
     scope = kwargs.get('_scope')
     if scope:
@@ -285,11 +294,41 @@ class Layer(object):
       inputs_hash = None
     return self._per_input_updates.get(inputs_hash, [])
 
+  def _get_regularizer_factories(self):
+    try:
+      # Some subclasses of Layer do not use its constructor.
+      return self._regularizer_factories
+    except AttributeError:
+      self._regularizer_factories = []
+      return self._regularizer_factories
+
+  def _maybe_create_variable_regularizers(self):
+    """Creates added but uninstantiated regularizers."""
+    factories = self._get_regularizer_factories()
+    if factories:
+      for factory in factories:
+        factory()
+      factories[:] = []
+
   @property
   def losses(self):
+    """Losses which are associated with this `Layer`.
+
+    Note that when executing eagerly, getting this property evaluates
+    regularizers. When using graph execution, variable regularization ops have
+    already been created and are simply returned here.
+
+    Returns:
+      A list of tensors.
+    """
+    self._maybe_create_variable_regularizers()
     if context.in_eager_mode():
-      raise RuntimeError('Layer.losses not supported in Eager mode.')
-    return self._losses
+      # _losses may only contain variable regularization losses when executing
+      # eagerly, and they have been saved as lambdas to be executed when
+      # requested.
+      return [regularizer() for regularizer in self._losses]
+    else:
+      return self._losses
 
   def add_loss(self, losses, inputs=None):
     """Add loss tensor(s), potentially dependent on layer inputs.
@@ -303,6 +342,11 @@ class Layer(object):
     The `get_losses_for` method allows to retrieve the losses relevant to a
     specific set of inputs.
 
+    Note that `add_loss` is not supported when executing eagerly. Instead,
+    variable regularizers may be added through `add_variable`. Activity
+    regularization is not supported directly (but such losses may be returned
+    from `Layer.call()`).
+
     Arguments:
       losses: Loss tensor, or list/tuple of tensors.
       inputs: Optional input tensor(s) that the loss(es) depend on. Must
@@ -361,6 +405,7 @@ class Layer(object):
       inputs_hash = layers_util.object_list_uid(inputs)
     else:
       inputs_hash = None
+    self._maybe_create_variable_regularizers()
     return self._per_input_losses.get(inputs_hash, [])
 
   def build(self, _):
@@ -383,14 +428,9 @@ class Layer(object):
     """Determines op naming for the Layer."""
     return current_variable_scope.original_name_scope
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     """Computes the output shape of the layer given the input shape.
 
-    Assumes that the layer will be built to match that input shape.
-    If this method is not implemented by child classes, the default
-    assumption will be that the layer does not alter the shape of the tensors
-    passing through it.
-
     Args:
       input_shape: A (possibly nested tuple of) `TensorShape`.  It need not
         be fully defined (e.g. the batch size may be unknown).
@@ -404,7 +444,7 @@ class Layer(object):
       ValueError: if `input_shape` is incomplete or is incompatible with the
         the layer.
     """
-    return input_shape
+    raise NotImplementedError
 
   def _make_unique_name(self, name_uid_map=None, avoid_names=None,
                         namespace='', zero_based=False):
@@ -460,24 +500,20 @@ class Layer(object):
       instance is returned.
 
     Raises:
-      RuntimeError: If called in Eager mode with regularizers.
+      RuntimeError: If called in Eager mode with partioned variable
+        regularization.
     """
-    # Note that we currently don't support variable regularization in Eager
-    # mode. An alternative is for users to directly compute these losses before
-    # performing a backward pass.
-    if context.in_graph_mode():
+
+    in_graph_mode = context.in_graph_mode()
+    if in_graph_mode:
       existing_variables = set(tf_variables.global_variables())
-    else:
-      existing_variables = []
-      if regularizer is not None:
-        raise RuntimeError('Variable regularization not supported in Eager '
-                           'mode.')
     if dtype is None:
       dtype = self.dtype or dtypes.float32
 
     self._set_scope(None)
+    reuse = self.built or self._reuse
     with vs.variable_scope(
-        self._scope, reuse=(self.built or self._reuse)) as scope:
+        self._scope, reuse=reuse, auxiliary_name_scope=False) as scope:
       with ops.name_scope(self._name_scope_name(scope)):
         variable = vs.get_variable(name,
                                    shape=shape,
@@ -486,28 +522,56 @@ class Layer(object):
                                    constraint=constraint,
                                    trainable=trainable and self.trainable,
                                    partitioner=partitioner)
-        if (context.in_graph_mode() and trainable and self.trainable
-            and variable not in tf_variables.trainable_variables()):
-          # A custom getter / variable scope overrode the trainable flag.
-          trainable = False
-        if variable in existing_variables:
-          return variable
+
+        if in_graph_mode:
+          if (trainable and self.trainable
+              and variable not in tf_variables.trainable_variables()):
+            # A custom getter / variable scope overrode the trainable flag.
+            trainable = False
+          if variable in existing_variables:
+            # To match the behavior of tf.get_variable(), we only apply
+            # regularization if the variable is newly created.
+            return variable
+
         if regularizer:
-          # To match the behavior of tf.get_variable(), we only
-          # apply regularization if the variable is newly created.
-          if isinstance(variable, tf_variables.PartitionedVariable):
-            for v in variable:
-              with ops.colocate_with(v.op):
-                with ops.name_scope(name + '/Regularizer'):
-                  regularization = regularizer(v)
-              if regularization is not None:
-                self.add_loss(regularization)
+          def regularizer_factory():
+            if context.in_graph_mode():
+              with vs.variable_scope(scope, reuse=reuse,
+                                     auxiliary_name_scope=False):
+                with ops.name_scope(self._name_scope_name(scope)):
+                  if isinstance(variable, tf_variables.PartitionedVariable):
+                    for v in variable:
+                      with ops.colocate_with(v.op):
+                        with ops.name_scope(name + '/Regularizer'):
+                          regularization = regularizer(v)
+                      if regularization is not None:
+                        self.add_loss(regularization)
+                  else:
+                    with ops.colocate_with(variable.op):
+                      with ops.name_scope(name + '/Regularizer'):
+                        regularization = regularizer(variable)
+                    if regularization is not None:
+                      self.add_loss(regularization)
+            else:
+              if isinstance(variable, tf_variables.PartitionedVariable):
+                raise RuntimeError(
+                    'Partitioned variable regularization is not yet '
+                    'supported when executing eagerly. File a feature request'
+                    'if this is important to you.')
+              # Save a zero-argument lambda which runs the regularizer on the
+              # variable, to be executed when `Layer.losses` is requested.
+              # This makes losses responsive to variable updates when
+              # executing eagerly.
+              self._losses.append(lambda: regularizer(variable))
+
+          if hasattr(self, '_defer_regularizers') and self._defer_regularizers:
+            # _defer_regularizers exists and is set to True if `build` was
+            # invoked in `__call__`: deferring regularizer construction
+            # prevents the regularizer from being created in an `init_scope`.
+            self._get_regularizer_factories().append(regularizer_factory)
           else:
-            with ops.colocate_with(variable.op):
-              with ops.name_scope(name + '/Regularizer'):
-                regularization = regularizer(variable)
-            if regularization is not None:
-              self.add_loss(regularization)
+            regularizer_factory()
+
     if trainable:
       self._trainable_weights.append(variable)
     else:
@@ -575,11 +639,11 @@ class Layer(object):
         # variable scope with this setting. We avoid re-creating variable scopes
         # after this point as an optimization.
         self._always_reuse_variable_scope = vs.variable_scope(
-            self._scope, reuse=True)
+            self._scope, reuse=True, auxiliary_name_scope=False)
         scope_context_manager = self._always_reuse_variable_scope
     else:
       scope_context_manager = vs.variable_scope(
-          self._scope, reuse=self._reuse)
+          self._scope, reuse=self._reuse, auxiliary_name_scope=False)
     with scope_context_manager as scope:
       with ops.name_scope(self._name_scope_name(scope)):
         if not self.built:
@@ -602,11 +666,19 @@ class Layer(object):
           self._assert_input_compatibility(inputs)
           if input_list and self._dtype is None:
             try:
-              self._dtype = input_list[0].dtype.name
+              self._dtype = input_list[0].dtype.base_dtype.name
             except AttributeError:
               pass
           input_shapes = nest.map_structure(lambda x: x.get_shape(), inputs)
-          self.build(input_shapes)
+
+          # Signal to `add_variable` that regularizer construction should be
+          # deferred.
+          self._defer_regularizers = True
+          with ops.init_scope():
+            self.build(input_shapes)
+          # Create any regularizers added by `build`.
+          self._maybe_create_variable_regularizers()
+          self._defer_regularizers = False
         try:
           # Note: not all sub-classes of Layer call Layer.__init__ (especially
           # the ones under tensorflow/python/keras). Hence we recompute this
@@ -627,9 +699,9 @@ class Layer(object):
             raise ValueError('A layer\'s `call` method should return a Tensor '
                              'or a list of Tensors, not None.')
         else:
-          # Deferred mode behavior: use `_compute_output_shape` to
+          # Deferred mode behavior: use `compute_output_shape` to
           # infer the number of outputs of the layer and their shapes.
-          output_shapes = self._compute_output_shape(input_shapes)
+          output_shapes = self.compute_output_shape(input_shapes)
           output_shapes = nest.flatten(output_shapes)
           outputs = [
               # TODO(fchollet): name the deferred tensors?
diff --git a/tensorflow/python/layers/base_test.py b/tensorflow/python/layers/base_test.py
index 1eea20deefe2f033ab9827f9d5b92f8661618d21..06ba214c0fc60202c773f8f231b17c3b728f5c52 100644
--- a/tensorflow/python/layers/base_test.py
+++ b/tensorflow/python/layers/base_test.py
@@ -88,6 +88,11 @@ class BaseLayerTest(test.TestCase):
           regularizer=regularizer)
       self.assertEqual(len(layer.losses), 1)
 
+  def testNoEagerActivityRegularizer(self):
+    with context.eager_mode():
+      with self.assertRaisesRegexp(ValueError, 'activity_regularizer'):
+        core_layers.Dense(1, activity_regularizer=lambda *args, **kwargs: 0.)
+
   def testGetVariable(self):
     with self.test_session():
 
@@ -469,6 +474,87 @@ class BaseLayerTest(test.TestCase):
     layer.apply(x)
     self.assertEqual(len(layer.get_losses_for(x)), 1)
 
+  def testNameScopeIsConsistentWithVariableScope(self):
+    # Github issue 13429.
+
+    class MyLayer(base_layers.Layer):
+
+      def build(self, input_shape):
+        self.my_var = self.add_variable('my_var', (), dtypes.float32)
+        self.built = True
+
+      def call(self, inputs):
+        return math_ops.multiply(inputs, self.my_var, name='my_op')
+
+    def _gen_layer(x, name=None):
+      layer = MyLayer(name=name)
+      out = layer.apply(x)
+      return layer, out
+
+    # unnamed layer
+    with ops.Graph().as_default():
+      x = array_ops.placeholder(dtypes.float32, (), 'x')
+      layer, op = _gen_layer(x)
+      layer1, op1 = _gen_layer(op)
+      layer2, op2 = _gen_layer(op1)
+
+      self.assertEqual(layer.my_var.name, 'my_layer/my_var:0')
+      self.assertEqual(op.name, 'my_layer/my_op:0')
+      self.assertEqual(layer1.my_var.name, 'my_layer_1/my_var:0')
+      self.assertEqual(op1.name, 'my_layer_1/my_op:0')
+      self.assertEqual(layer2.my_var.name, 'my_layer_2/my_var:0')
+      self.assertEqual(op2.name, 'my_layer_2/my_op:0')
+    # name starts from zero
+    with ops.Graph().as_default():
+      x = array_ops.placeholder(dtypes.float32, (), 'x')
+      layer, op = _gen_layer(x, name='name')
+      layer1, op1 = _gen_layer(op, name='name_1')
+      layer2, op2 = _gen_layer(op1, name='name_2')
+
+      self.assertEqual(layer.my_var.name, 'name/my_var:0')
+      self.assertEqual(op.name, 'name/my_op:0')
+      self.assertEqual(layer1.my_var.name, 'name_1/my_var:0')
+      self.assertEqual(op1.name, 'name_1/my_op:0')
+      self.assertEqual(layer2.my_var.name, 'name_2/my_var:0')
+      self.assertEqual(op2.name, 'name_2/my_op:0')
+    # name starts from one
+    with ops.Graph().as_default():
+      x = array_ops.placeholder(dtypes.float32, (), 'x')
+      layer, op = _gen_layer(x, name='name_1')
+      layer1, op1 = _gen_layer(op, name='name_2')
+      layer2, op2 = _gen_layer(op1, name='name_3')
+
+      self.assertEqual(layer.my_var.name, 'name_1/my_var:0')
+      self.assertEqual(op.name, 'name_1/my_op:0')
+      self.assertEqual(layer1.my_var.name, 'name_2/my_var:0')
+      self.assertEqual(op1.name, 'name_2/my_op:0')
+      self.assertEqual(layer2.my_var.name, 'name_3/my_var:0')
+      self.assertEqual(op2.name, 'name_3/my_op:0')
+
+  def testVariablesAreLiftedFromFunctionBuildingGraphs(self):
+    class MyLayer(base_layers.Layer):
+
+      def build(self, input_shape):
+        self.my_var = self.add_variable('my_var', (), dtypes.float32)
+        self.built = True
+
+      def call(self, inputs):
+        return inputs
+
+    outer_graph = ops.get_default_graph()
+    function_building_graph = ops.Graph()
+    function_building_graph._building_function = True
+    with outer_graph.as_default():
+      with function_building_graph.as_default():
+        layer = MyLayer()
+        # Create a variable by invoking build through __call__ and assert that
+        # it is both tracked and lifted into the outer graph.
+        inputs = array_ops.placeholder(dtypes.float32, (), 'inputs')
+        layer.apply(inputs)
+        self.assertEqual(len(layer.variables), 1)
+        self.assertEqual(len(layer.trainable_variables), 1)
+        self.assertEqual(layer.variables[0].graph, outer_graph)
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py
index 7213fa1db8ee2eb4a36366464703b30d3f1a84c3..ab1fa551e1171db60cbb3b080f453036862c895c 100644
--- a/tensorflow/python/layers/convolutional.py
+++ b/tensorflow/python/layers/convolutional.py
@@ -64,8 +64,8 @@ class _Conv(base.Layer):
       linear activation.
     use_bias: Boolean, whether the layer uses a bias.
     kernel_initializer: An initializer for the convolution kernel.
-    bias_initializer: An initializer for the bias vector. If None, no bias will
-      be applied.
+    bias_initializer: An initializer for the bias vector. If None, the default
+      initializer will be used.
     kernel_regularizer: Optional regularizer for the convolution kernel.
     bias_regularizer: Optional regularizer for the bias vector.
     activity_regularizer: Optional regularizer function for the output.
@@ -192,7 +192,7 @@ class _Conv(base.Layer):
       return self.activation(outputs)
     return outputs
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     if self.data_format == 'channels_last':
       space = input_shape[1:-1]
@@ -254,8 +254,8 @@ class Conv1D(_Conv):
       linear activation.
     use_bias: Boolean, whether the layer uses a bias.
     kernel_initializer: An initializer for the convolution kernel.
-    bias_initializer: An initializer for the bias vector. If None, no bias will
-      be applied.
+    bias_initializer: An initializer for the bias vector. If None, the default
+      initializer will be used.
     kernel_regularizer: Optional regularizer for the convolution kernel.
     bias_regularizer: Optional regularizer for the bias vector.
     activity_regularizer: Optional regularizer function for the output.
@@ -362,8 +362,8 @@ def conv1d(inputs,
       linear activation.
     use_bias: Boolean, whether the layer uses a bias.
     kernel_initializer: An initializer for the convolution kernel.
-    bias_initializer: An initializer for the bias vector. If None, no bias will
-      be applied.
+    bias_initializer: An initializer for the bias vector. If None, the default
+      initializer will be used.
     kernel_regularizer: Optional regularizer for the convolution kernel.
     bias_regularizer: Optional regularizer for the bias vector.
     activity_regularizer: Optional regularizer function for the output.
@@ -450,8 +450,8 @@ class Conv2D(_Conv):
       linear activation.
     use_bias: Boolean, whether the layer uses a bias.
     kernel_initializer: An initializer for the convolution kernel.
-    bias_initializer: An initializer for the bias vector. If None, no bias will
-      be applied.
+    bias_initializer: An initializer for the bias vector. If None, the default
+      initializer will be used.
     kernel_regularizer: Optional regularizer for the convolution kernel.
     bias_regularizer: Optional regularizer for the bias vector.
     activity_regularizer: Optional regularizer function for the output.
@@ -565,8 +565,8 @@ def conv2d(inputs,
       linear activation.
     use_bias: Boolean, whether the layer uses a bias.
     kernel_initializer: An initializer for the convolution kernel.
-    bias_initializer: An initializer for the bias vector. If None, no bias will
-      be applied.
+    bias_initializer: An initializer for the bias vector. If None, the default
+      initializer will be used.
     kernel_regularizer: Optional regularizer for the convolution kernel.
     bias_regularizer: Optional regularizer for the bias vector.
     activity_regularizer: Optional regularizer function for the output.
@@ -654,8 +654,8 @@ class Conv3D(_Conv):
       linear activation.
     use_bias: Boolean, whether the layer uses a bias.
     kernel_initializer: An initializer for the convolution kernel.
-    bias_initializer: An initializer for the bias vector. If None, no bias will
-      be applied.
+    bias_initializer: An initializer for the bias vector. If None, the default
+      initializer will be used.
     kernel_regularizer: Optional regularizer for the convolution kernel.
     bias_regularizer: Optional regularizer for the bias vector.
     activity_regularizer: Optional regularizer function for the output.
@@ -770,8 +770,8 @@ def conv3d(inputs,
       linear activation.
     use_bias: Boolean, whether the layer uses a bias.
     kernel_initializer: An initializer for the convolution kernel.
-    bias_initializer: An initializer for the bias vector. If None, no bias will
-      be applied.
+    bias_initializer: An initializer for the bias vector. If None, the default
+      initializer will be used.
     kernel_regularizer: Optional regularizer for the convolution kernel.
     bias_regularizer: Optional regularizer for the bias vector.
     activity_regularizer: Optional regularizer function for the output.
@@ -860,8 +860,8 @@ class SeparableConv2D(Conv2D):
     use_bias: Boolean, whether the layer uses a bias.
     depthwise_initializer: An initializer for the depthwise convolution kernel.
     pointwise_initializer: An initializer for the pointwise convolution kernel.
-    bias_initializer: An initializer for the bias vector. If None, no bias will
-      be applied.
+    bias_initializer: An initializer for the bias vector. If None, the default
+      initializer will be used.
     depthwise_regularizer: Optional regularizer for the depthwise
       convolution kernel.
     pointwise_regularizer: Optional regularizer for the pointwise
@@ -1004,7 +1004,7 @@ class SeparableConv2D(Conv2D):
       return self.activation(outputs)
     return outputs
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     if self.data_format == 'channels_first':
       rows = input_shape[2]
@@ -1089,8 +1089,8 @@ def separable_conv2d(inputs,
     use_bias: Boolean, whether the layer uses a bias.
     depthwise_initializer: An initializer for the depthwise convolution kernel.
     pointwise_initializer: An initializer for the pointwise convolution kernel.
-    bias_initializer: An initializer for the bias vector. If None, no bias will
-      be applied.
+    bias_initializer: An initializer for the bias vector. If None, the default
+      initializer will be used.
     depthwise_regularizer: Optional regularizer for the depthwise
       convolution kernel.
     pointwise_regularizer: Optional regularizer for the pointwise
@@ -1175,8 +1175,8 @@ class Conv2DTranspose(Conv2D):
       linear activation.
     use_bias: Boolean, whether the layer uses a bias.
     kernel_initializer: An initializer for the convolution kernel.
-    bias_initializer: An initializer for the bias vector. If None, no bias will
-      be applied.
+    bias_initializer: An initializer for the bias vector. If None, the default
+      initializer will be used.
     kernel_regularizer: Optional regularizer for the convolution kernel.
     bias_regularizer: Optional regularizer for the bias vector.
     activity_regularizer: Optional regularizer function for the output.
@@ -1232,7 +1232,8 @@ class Conv2DTranspose(Conv2D):
 
   def build(self, input_shape):
     if len(input_shape) != 4:
-      raise ValueError('Inputs should have rank 4. Received input shape: ' + str(input_shape))
+      raise ValueError('Inputs should have rank 4. Received input shape: ' +
+                       str(input_shape))
     if self.data_format == 'channels_first':
       channel_axis = 1
     else:
@@ -1324,7 +1325,7 @@ class Conv2DTranspose(Conv2D):
       return self.activation(outputs)
     return outputs
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     output_shape = list(input_shape)
     if self.data_format == 'channels_first':
@@ -1390,8 +1391,8 @@ def conv2d_transpose(inputs,
       linear activation.
     use_bias: Boolean, whether the layer uses a bias.
     kernel_initializer: An initializer for the convolution kernel.
-    bias_initializer: An initializer for the bias vector. If `None`, then no
-      bias will be applied.
+    bias_initializer: An initializer for the bias vector. If `None`, the default
+      initializer will be used.
     kernel_regularizer: Optional regularizer for the convolution kernel.
     bias_regularizer: Optional regularizer for the bias vector.
     activity_regularizer: Optional regularizer function for the output.
@@ -1463,8 +1464,8 @@ class Conv3DTranspose(Conv3D):
       linear activation.
     use_bias: Boolean, whether the layer uses a bias.
     kernel_initializer: An initializer for the convolution kernel.
-    bias_initializer: An initializer for the bias vector. If `None`, then no
-      bias will be applied.
+    bias_initializer: An initializer for the bias vector. If `None`, the default
+      initializer will be used.
     kernel_regularizer: Optional regularizer for the convolution kernel.
     bias_regularizer: Optional regularizer for the bias vector.
     activity_regularizer: Optional regularizer function for the output.
@@ -1642,7 +1643,7 @@ class Conv3DTranspose(Conv3D):
       return self.activation(outputs)
     return outputs
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     output_shape = list(input_shape)
     if self.data_format == 'channels_first':
@@ -1704,8 +1705,8 @@ def conv3d_transpose(inputs,
       linear activation.
     use_bias: Boolean, whether the layer uses a bias.
     kernel_initializer: An initializer for the convolution kernel.
-    bias_initializer: An initializer for the bias vector. If None, no bias will
-      be applied.
+    bias_initializer: An initializer for the bias vector. If None, the default
+      initializer will be used.
     kernel_regularizer: Optional regularizer for the convolution kernel.
     bias_regularizer: Optional regularizer for the bias vector.
     activity_regularizer: Optional regularizer function for the output.
diff --git a/tensorflow/python/layers/convolutional_test.py b/tensorflow/python/layers/convolutional_test.py
index da10fe68a0c1366fccde67e01bee6155a26d481e..e41eb5c32ff8ee825c0bd900efd58166017004d5 100644
--- a/tensorflow/python/layers/convolutional_test.py
+++ b/tensorflow/python/layers/convolutional_test.py
@@ -20,9 +20,11 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import test_util
 from tensorflow.python.layers import convolutional as conv_layers
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
@@ -32,6 +34,7 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
 
+@test_util.with_c_api
 class ConvTest(test.TestCase):
 
   def testInvalidDataFormat(self):
@@ -97,16 +100,14 @@ class ConvTest(test.TestCase):
     self.assertListEqual(layer.bias.get_shape().as_list(), [32])
 
   def testUnknownInputChannels(self):
-    images = random_ops.random_uniform((5, 7, 9, 4))
-    images._shape = tensor_shape.as_shape((5, 7, 9, None))
+    images = array_ops.placeholder(dtypes.float32, (5, 7, 9, None))
     layer = conv_layers.Conv2D(32, [3, 3], activation=nn_ops.relu)
     with self.assertRaisesRegexp(ValueError,
                                  'The channel dimension of the inputs '
                                  'should be defined. Found `None`.'):
       _ = layer.apply(images)
 
-    images = random_ops.random_uniform((5, 4, 7, 9))
-    images._shape = tensor_shape.as_shape((5, None, 7, 9))
+    images = array_ops.placeholder(dtypes.float32, (5, None, 7, 9))
     layer = conv_layers.Conv2D(32, [3, 3], data_format='channels_first')
     with self.assertRaisesRegexp(ValueError,
                                  'The channel dimension of the inputs '
@@ -167,16 +168,14 @@ class ConvTest(test.TestCase):
     self.assertListEqual(layer.bias.get_shape().as_list(), [32])
 
   def testUnknownInputChannelsConv1D(self):
-    data = random_ops.random_uniform((5, 4, 7))
-    data._shape = tensor_shape.as_shape((5, 4, None))
+    data = array_ops.placeholder(dtypes.float32, (5, 4, None))
     layer = conv_layers.Conv1D(32, 3, activation=nn_ops.relu)
     with self.assertRaisesRegexp(ValueError,
                                  'The channel dimension of the inputs '
                                  'should be defined. Found `None`.'):
       _ = layer.apply(data)
 
-    data = random_ops.random_uniform((5, 7, 4))
-    data._shape = tensor_shape.as_shape((5, None, 4))
+    data = array_ops.placeholder(dtypes.float32, (5, None, 4))
     layer = conv_layers.Conv1D(32, 3, data_format='channels_first')
     with self.assertRaisesRegexp(ValueError,
                                  'The channel dimension of the inputs '
@@ -195,8 +194,7 @@ class ConvTest(test.TestCase):
     self.assertListEqual(layer.bias.get_shape().as_list(), [32])
 
   def testUnknownInputChannelsConv3D(self):
-    volumes = random_ops.random_uniform((5, 6, 7, 9, 9))
-    volumes._shape = tensor_shape.as_shape((5, 6, 7, 9, None))
+    volumes = array_ops.placeholder(dtypes.float32, (5, 6, 7, 9, None))
     layer = conv_layers.Conv3D(32, [3, 3, 3], activation=nn_ops.relu)
     with self.assertRaisesRegexp(ValueError,
                                  'The channel dimension of the inputs '
@@ -328,6 +326,7 @@ class ConvTest(test.TestCase):
     self.assertEqual(conv3d.bias_constraint, b_constraint)
 
 
+@test_util.with_c_api
 class SeparableConv2DTest(test.TestCase):
 
   def testInvalidDataFormat(self):
@@ -571,6 +570,7 @@ class SeparableConv2DTest(test.TestCase):
     self.assertEqual(layer.bias_constraint, b_constraint)
 
 
+@test_util.with_c_api
 class Conv2DTransposeTest(test.TestCase):
 
   def testInvalidDataFormat(self):
@@ -756,6 +756,7 @@ class Conv2DTransposeTest(test.TestCase):
     self.assertEqual(layer.bias_constraint, b_constraint)
 
 
+@test_util.with_c_api
 class Conv3DTransposeTest(test.TestCase):
 
   def testInvalidDataFormat(self):
diff --git a/tensorflow/python/layers/core.py b/tensorflow/python/layers/core.py
index 7be1fa5cfe95f13f67ee94bb20304fba00b33d1b..e5b93a54f79bef68d96ab7efccc883033e7001c7 100644
--- a/tensorflow/python/layers/core.py
+++ b/tensorflow/python/layers/core.py
@@ -43,7 +43,7 @@ class Dense(base.Layer):
   """Densely-connected layer class.
 
   This layer implements the operation:
-  `outputs = activation(inputs.kernel + bias)`
+  `outputs = activation(inputs * kernel + bias)`
   Where `activation` is the activation function passed as the `activation`
   argument (if not `None`), `kernel` is a weights matrix created by the layer,
   and `bias` is a bias vector created by the layer
@@ -166,7 +166,7 @@ class Dense(base.Layer):
       return self.activation(outputs)  # pylint: disable=not-callable
     return outputs
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape)
     input_shape = input_shape.with_rank_at_least(2)
     if input_shape[-1].value is None:
@@ -310,6 +310,9 @@ class Dropout(base.Layer):
                             dropped_inputs,
                             lambda: array_ops.identity(inputs))
 
+  def compute_output_shape(self, input_shape):
+    return input_shape
+
 
 def dropout(inputs,
             rate=0.5,
@@ -375,10 +378,10 @@ class Flatten(base.Layer):
   def call(self, inputs):
     outputs = array_ops.reshape(inputs, (array_ops.shape(inputs)[0], -1))
     if context.in_graph_mode():
-      outputs.set_shape(self._compute_output_shape(inputs.get_shape()))
+      outputs.set_shape(self.compute_output_shape(inputs.get_shape()))
     return outputs
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     output_shape = [input_shape[0]]
     if all(input_shape[1:]):
diff --git a/tensorflow/python/layers/core_test.py b/tensorflow/python/layers/core_test.py
index 2d47cc69798d8c3e34e14e24301e8be9a00f49bc..15ce6cba21fcc78126f7db58ab18934db69c15fd 100644
--- a/tensorflow/python/layers/core_test.py
+++ b/tensorflow/python/layers/core_test.py
@@ -59,6 +59,14 @@ class DenseTest(test.TestCase):
     dense.apply(random_ops.random_uniform((5, 2)))
     self.assertEqual(dense.name, 'dense_2')
 
+  def testVariableInput(self):
+    with self.test_session():
+      v = variable_scope.get_variable(
+          'X', initializer=init_ops.zeros_initializer(), shape=(1, 1))
+      x = core_layers.Dense(1)(v)
+      variables.global_variables_initializer().run()
+      self.assertAllEqual(x.eval(), [[0.0]])
+
   @test_util.run_in_graph_and_eager_modes()
   def testCall(self):
     dense = core_layers.Dense(2, activation=nn_ops.relu, name='my_dense')
@@ -315,20 +323,20 @@ class DenseTest(test.TestCase):
     ts = tensor_shape.TensorShape
     # pylint: disable=protected-access
     with self.assertRaises(ValueError):
-      dense._compute_output_shape(ts(None))
+      dense.compute_output_shape(ts(None))
     with self.assertRaises(ValueError):
-      dense._compute_output_shape(ts([]))
+      dense.compute_output_shape(ts([]))
     with self.assertRaises(ValueError):
-      dense._compute_output_shape(ts([1]))
+      dense.compute_output_shape(ts([1]))
     self.assertEqual(
         [None, 2],
-        dense._compute_output_shape((None, 3)).as_list())
+        dense.compute_output_shape((None, 3)).as_list())
     self.assertEqual(
         [None, 2],
-        dense._compute_output_shape(ts([None, 3])).as_list())
+        dense.compute_output_shape(ts([None, 3])).as_list())
     self.assertEqual(
         [None, 4, 2],
-        dense._compute_output_shape(ts([None, 4, 3])).as_list())
+        dense.compute_output_shape(ts([None, 4, 3])).as_list())
     # pylint: enable=protected-access
 
   @test_util.run_in_graph_and_eager_modes()
@@ -448,13 +456,13 @@ class FlattenTest(test.TestCase):
       self.assertEqual(y.get_shape().as_list(), [1, 12])
 
   def testComputeShape(self):
-    shape = core_layers.Flatten()._compute_output_shape((1, 2, 3, 2))
+    shape = core_layers.Flatten().compute_output_shape((1, 2, 3, 2))
     self.assertEqual(shape.as_list(), [1, 12])
 
-    shape = core_layers.Flatten()._compute_output_shape((None, 3, 2))
+    shape = core_layers.Flatten().compute_output_shape((None, 3, 2))
     self.assertEqual(shape.as_list(), [None, 6])
 
-    shape = core_layers.Flatten()._compute_output_shape((None, 3, None))
+    shape = core_layers.Flatten().compute_output_shape((None, 3, None))
     self.assertEqual(shape.as_list(), [None, None])
 
   def testFunctionalFlatten(self):
diff --git a/tensorflow/python/layers/network.py b/tensorflow/python/layers/network.py
index edc52545f92cb9b9c6f78f5c58fe44b3187d370b..ade57da411d67241e027e0bb559e49bc3c077e6d 100644
--- a/tensorflow/python/layers/network.py
+++ b/tensorflow/python/layers/network.py
@@ -709,7 +709,7 @@ class GraphNetwork(base.Layer):
     outputs, _ = self._run_internal_graph(inputs, masks)
     return outputs
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     if isinstance(input_shape, list):
       input_shapes = []
       for shape in input_shape:
@@ -731,12 +731,12 @@ class GraphNetwork(base.Layer):
     cache_key = layers_util.object_list_uid(input_shapes)
     if cache_key not in self._output_shape_cache:
       # Cache miss. We have to run the network graph manually (recursive calls
-      # to `_compute_output_shape`).
+      # to `compute_output_shape`).
       layers_to_output_shapes = {}
       for i in range(len(input_shapes)):
         layer = self._input_layers[i]
         input_shape = input_shapes[i]
-        # It's an input layer: then `_compute_output_shape` is identity,
+        # It's an input layer: then `compute_output_shape` is identity,
         # and there is only one node and one tensor output.
         shape_key = layer.name + '_0_0'
         layers_to_output_shapes[shape_key] = input_shape
@@ -767,9 +767,9 @@ class GraphNetwork(base.Layer):
               input_shapes.append(input_shape)
 
             if len(input_shapes) == 1:
-              output_shape = layer._compute_output_shape(input_shapes[0])  # pylint: disable=protected-access
+              output_shape = layer.compute_output_shape(input_shapes[0])
             else:
-              output_shape = layer._compute_output_shape(input_shapes)  # pylint: disable=protected-access
+              output_shape = layer.compute_output_shape(input_shapes)
             if isinstance(output_shape, list):
               output_shapes = [
                   tuple(tensor_shape.TensorShape(shape).as_list())
@@ -794,17 +794,17 @@ class GraphNetwork(base.Layer):
 
         # Store in cache.
         self._output_shape_cache[cache_key] = output_shapes
-      else:
-        # Cache hit.
-        output_shapes = self._output_shape_cache[cache_key]
+    else:
+      # Cache hit.
+      output_shapes = self._output_shape_cache[cache_key]
 
-      if isinstance(output_shapes, list):
-        if len(output_shapes) == 1:
-          return tensor_shape.TensorShape(output_shapes[0])
-        else:
-          return [tensor_shape.TensorShape(shape) for shape in output_shapes]
+    if isinstance(output_shapes, list):
+      if len(output_shapes) == 1:
+        return tensor_shape.TensorShape(output_shapes[0])
       else:
-        return tensor_shape.TensorShape(output_shapes)
+        return [tensor_shape.TensorShape(shape) for shape in output_shapes]
+    else:
+      return tensor_shape.TensorShape(output_shapes)
 
   def _run_internal_graph(self, inputs, masks=None):
     """Computes output tensors for new inputs.
diff --git a/tensorflow/python/layers/network_test.py b/tensorflow/python/layers/network_test.py
index af7813e26420eb6e85b204fd5b50e7ddafc2e5a1..7a2c7fb3fc782f6e59b8b483ec43c4abddf4d023 100644
--- a/tensorflow/python/layers/network_test.py
+++ b/tensorflow/python/layers/network_test.py
@@ -333,8 +333,8 @@ class NetworkTest(test.TestCase):
     self.assertEqual(net.get_input_at(0), x)
     self.assertEqual(net.get_output_at(0), y)
 
-    # _compute_output_shape
-    self.assertEqual(net._compute_output_shape((3, 32)).as_list(), [3, 2])
+    # compute_output_shape
+    self.assertEqual(net.compute_output_shape((3, 32)).as_list(), [3, 2])
 
   def testInvalidNetworks(self):
     # redundant inputs
@@ -504,7 +504,7 @@ class DeferredModeTest(test.TestCase):
       def call(self, inputs):
         return inputs[0] + inputs[1]
 
-      def _compute_output_shape(self, input_shape):
+      def compute_output_shape(self, input_shape):
         return input_shape[0]
 
     c = AddLayer()([a, input_b])  # pylint: disable=not-callable
diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py
index 83237b8733454255f0306b7ca267ab92ecfc66cc..890c12f6e00daabe7e64c00814fcb3ff8f04ae3a 100644
--- a/tensorflow/python/layers/normalization.py
+++ b/tensorflow/python/layers/normalization.py
@@ -241,7 +241,7 @@ class BatchNormalization(base.Layer):
                          'axis == [1] or axis == [3]')
 
     # Raise parameters of fp16 batch norm to fp32
-    if self.dtype == dtypes.float16:
+    if self.dtype == dtypes.float16 or self.dtype == dtypes.bfloat16:
       param_dtype = dtypes.float32
     else:
       param_dtype = self.dtype or dtypes.float32
@@ -625,6 +625,9 @@ class BatchNormalization(base.Layer):
 
     return outputs
 
+  def compute_output_shape(self, input_shape):
+    return input_shape
+
 
 def batch_normalization(inputs,
                         axis=-1,
diff --git a/tensorflow/python/layers/normalization_test.py b/tensorflow/python/layers/normalization_test.py
index 7c91c3284e72247aab2e5fa3bad924ede891c996..e147f348b0a60dbefb38aa9f89318f261c03684e 100644
--- a/tensorflow/python/layers/normalization_test.py
+++ b/tensorflow/python/layers/normalization_test.py
@@ -105,9 +105,17 @@ class BNTest(test.TestCase):
                          infer_use_gpu):
     batch, height, width, input_channels = 2, 4, 5, 3
     shape = [batch, height, width, input_channels]
-    checkpoint = os.path.join(self.get_temp_dir(), 'cp_%s_%s_%s_%s' %
-                              (dtype, train1_use_gpu, train2_use_gpu,
-                               infer_use_gpu))
+
+    # Not all characters in a dtype string representation are allowed in
+    # filenames in all operating systems. This map will sanitize these.
+    dtype_to_valid_fn = {
+        dtypes.float16: 'float16',
+        dtypes.float32: 'float32',
+    }
+    checkpoint = os.path.join(
+        self.get_temp_dir(), 'cp_%s_%s_%s_%s' % (
+            dtype_to_valid_fn[dtype], train1_use_gpu, train2_use_gpu,
+            infer_use_gpu))
 
     self._train(
         checkpoint,
diff --git a/tensorflow/python/layers/pooling.py b/tensorflow/python/layers/pooling.py
index 78dd617bec85cc29c93a86df3601f2accd5c240a..c6bd7aae07f55772d96cb60b39cf4ef40d9b3581 100644
--- a/tensorflow/python/layers/pooling.py
+++ b/tensorflow/python/layers/pooling.py
@@ -85,7 +85,7 @@ class _Pooling1D(base.Layer):
     else:
       return array_ops.squeeze(outputs, 1)
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     length = utils.conv_output_length(input_shape[1], self.pool_size[0],
                                       self.padding, self.strides[0])
@@ -273,7 +273,7 @@ class _Pooling2D(base.Layer):
         data_format=utils.convert_data_format(self.data_format, 4))
     return outputs
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     if self.data_format == 'channels_first':
       rows = input_shape[2]
@@ -487,7 +487,7 @@ class _Pooling3D(base.Layer):
       outputs = array_ops.transpose(outputs, (0, 4, 1, 2, 3))
     return outputs
 
-  def _compute_output_shape(self, input_shape):
+  def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
     if self.data_format == 'channels_first':
       len_dim1 = input_shape[2]
diff --git a/tensorflow/python/layers/utils.py b/tensorflow/python/layers/utils.py
index 766a6800d443a79d9bd130833c27f26c844cadaf..e8be347799acf2e92e79ca76f44f25d573489940 100644
--- a/tensorflow/python/layers/utils.py
+++ b/tensorflow/python/layers/utils.py
@@ -208,7 +208,7 @@ def smart_cond(pred, fn1, fn2, name=None):
     else:
       return fn2()
   else:
-    return control_flow_ops.cond(pred, fn1, fn2, name)
+    return control_flow_ops.cond(pred, true_fn=fn1, false_fn=fn2, name=name)
 
 
 def constant_value(pred):
@@ -216,7 +216,7 @@ def constant_value(pred):
 
   Arguments:
     pred: A scalar, either a Python bool or a TensorFlow boolean variable
-      or tensor.
+      or tensor, or the Python integer 1 or 0.
 
   Returns:
     True or False if `pred` has a constant boolean value, None otherwise.
@@ -224,6 +224,12 @@ def constant_value(pred):
   Raises:
     TypeError: If `pred` is not a Variable, Tensor or bool.
   """
+  # Allow integer booleans.
+  if pred == 0:
+    pred = False
+  elif pred == 1:
+    pred = True
+
   if isinstance(pred, bool):
     pred_value = pred
   elif isinstance(pred, variables.Variable):
diff --git a/tensorflow/python/lib/core/bfloat16.cc b/tensorflow/python/lib/core/bfloat16.cc
new file mode 100644
index 0000000000000000000000000000000000000000..7f07deebef3d8e8f24f73a42f29f4ade4cae568d
--- /dev/null
+++ b/tensorflow/python/lib/core/bfloat16.cc
@@ -0,0 +1,674 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <array>
+
+#include "tensorflow/python/lib/core/bfloat16.h"
+
+#include "tensorflow/core/framework/numeric_types.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/python/lib/core/numpy.h"
+#include "tensorflow/python/lib/core/safe_ptr.h"
+
+namespace tensorflow {
+namespace {
+
+// Workarounds for Python 2 vs 3 API differences.
+#if PY_MAJOR_VERSION < 3
+
+PyObject* MakePyString(const string& s) {
+  return PyString_FromString(s.c_str());
+}
+
+typedef long HashType;  // NOLINT
+
+bool TfPyInt_Check(PyObject* object) { return PyInt_Check(object); }
+
+PyObject* TfPyInt_FromLong(long x) {  // NOLINT
+  return PyInt_FromLong(x);
+}
+
+long TfPyInt_AsLong(PyObject* x) {  // NOLINT
+  return PyInt_AsLong(x);
+}
+
+#else  // PY_MAJOR_VERSION < 3
+
+PyObject* MakePyString(const string& s) {
+  return PyUnicode_FromString(s.c_str());
+}
+
+bool TfPyInt_Check(PyObject* object) {
+  if (!PyLong_Check(object)) {
+    return 0;
+  }
+  int overflow = 0;
+  PyLong_AsLongAndOverflow(object, &overflow);
+  return (overflow == 0);
+}
+
+PyObject* TfPyInt_FromLong(long x) {  // NOLINT
+  return PyLong_FromLong(x);
+}
+
+long TfPyInt_AsLong(PyObject* x) {  // NOLINT
+  return PyLong_AsLong(x);
+}
+
+typedef Py_hash_t HashType;
+
+#endif  // PY_MAJOR_VERSION < 3
+
+// Forward declaration.
+extern PyTypeObject PyBfloat16_Type;
+
+// Representation of a Python bfloat16 object.
+struct PyBfloat16 {
+  PyObject_HEAD;  // Python object header
+  bfloat16 value;
+};
+
+// Returns true if 'object' is a PyBfloat16.
+bool PyBfloat16_Check(PyObject* object) {
+  return PyObject_IsInstance(object,
+                             reinterpret_cast<PyObject*>(&PyBfloat16_Type));
+}
+
+// Extracts the value of a PyBfloat16 object.
+bfloat16 PyBfloat16_Bfloat16(PyObject* object) {
+  return reinterpret_cast<PyBfloat16*>(object)->value;
+}
+
+// Constructs a PyBfloat16 object from a bfloat16.
+Safe_PyObjectPtr PyBfloat16_FromBfloat16(bfloat16 x) {
+  Safe_PyObjectPtr ref =
+      make_safe(PyBfloat16_Type.tp_alloc(&PyBfloat16_Type, 0));
+  PyBfloat16* p = reinterpret_cast<PyBfloat16*>(ref.get());
+  if (p) {
+    p->value = x;
+  }
+  return ref;
+}
+
+// Converts a Python object to a bfloat16 value. Returns true on success,
+// returns false and reports a Python error on failure.
+bool AsBfloat16(PyObject* arg, bfloat16* output) {
+  if (PyBfloat16_Check(arg)) {
+    *output = PyBfloat16_Bfloat16(arg);
+    return true;
+  }
+  if (PyFloat_Check(arg)) {
+    double d = PyFloat_AsDouble(arg);
+    if (PyErr_Occurred()) {
+      return false;
+    }
+    // TODO(phawkins): check for overflow
+    *output = bfloat16(d);
+    return true;
+  }
+  if (TfPyInt_Check(arg)) {
+    long l = TfPyInt_AsLong(arg);  // NOLINT
+    if (PyErr_Occurred()) {
+      return false;
+    }
+    // TODO(phawkins): check for overflow
+    *output = bfloat16(static_cast<float>(l));
+    return true;
+  }
+  if (PyArray_IsScalar(arg, Float)) {
+    float f;
+    PyArray_ScalarAsCtype(arg, &f);
+    *output = bfloat16(f);
+    return true;
+  }
+  PyErr_Format(PyExc_TypeError, "expected number, got %s",
+               arg->ob_type->tp_name);
+  return false;
+}
+
+// Converts a PyBfloat16 into a PyFloat.
+PyObject* PyBfloat16_Float(PyObject* self) {
+  bfloat16 x = PyBfloat16_Bfloat16(self);
+  return PyFloat_FromDouble(static_cast<double>(x));
+}
+
+// Converts a PyBfloat16 into a PyInt.
+PyObject* PyBfloat16_Int(PyObject* self) {
+  bfloat16 x = PyBfloat16_Bfloat16(self);
+  long y = static_cast<long>(x);  // NOLINT
+  return TfPyInt_FromLong(y);
+}
+
+// Negates a PyBfloat16.
+PyObject* PyBfloat16_Negative(PyObject* self) {
+  bfloat16 x = PyBfloat16_Bfloat16(self);
+  return PyBfloat16_FromBfloat16(-x).release();
+}
+
+// Binary arithmetic operators on PyBfloat16 values.
+#define BFLOAT16_BINOP(name, op)                                  \
+  PyObject* PyBfloat16_##name(PyObject* a, PyObject* b) {         \
+    bfloat16 x, y;                                                \
+    if (!AsBfloat16(a, &x) || !AsBfloat16(b, &y)) return nullptr; \
+    bfloat16 z = x op y;                                          \
+    return PyBfloat16_FromBfloat16(z).release();                  \
+  }
+BFLOAT16_BINOP(Add, +)
+BFLOAT16_BINOP(Subtract, -)
+BFLOAT16_BINOP(Multiply, *)
+BFLOAT16_BINOP(Divide, /)
+#undef BFLOAT16_BINOP
+
+// Python number methods for PyBfloat16 objects.
+PyNumberMethods PyBfloat16_AsNumber = {
+    PyBfloat16_Add,       // nb_add
+    PyBfloat16_Subtract,  // nb_subtract
+    PyBfloat16_Multiply,  // nb_multiply
+#if PY_MAJOR_VERSION < 3
+    PyBfloat16_Divide,  // nb_divide
+#endif
+    nullptr,              // nb_remainder
+    nullptr,              // nb_divmod
+    nullptr,              // nb_power
+    PyBfloat16_Negative,  // nb_negative
+    nullptr,              // nb_positive
+    nullptr,              // nb_absolute
+    nullptr,              // nb_nonzero
+    nullptr,              // nb_invert
+    nullptr,              // nb_lshift
+    nullptr,              // nb_rshift
+    nullptr,              // nb_and
+    nullptr,              // nb_xor
+    nullptr,              // nb_or
+#if PY_MAJOR_VERSION < 3
+    nullptr,  // nb_coerce
+#endif
+    PyBfloat16_Int,  // nb_int
+#if PY_MAJOR_VERSION < 3
+    PyBfloat16_Int,  // nb_long
+#else
+    nullptr,  // reserved
+#endif
+    PyBfloat16_Float,  // nb_float
+#if PY_MAJOR_VERSION < 3
+    nullptr,  // nb_oct
+    nullptr,  // nb_hex
+#endif
+
+    nullptr,  // nb_inplace_add
+    nullptr,  // nb_inplace_subtract
+    nullptr,  // nb_inplace_multiply
+#if PY_MAJOR_VERSION < 3
+    nullptr,  // nb_inplace_divide
+#endif
+    nullptr,  // nb_inplace_remainder
+    nullptr,  // nb_inplace_power
+    nullptr,  // nb_inplace_lshift
+    nullptr,  // nb_inplace_rshift
+    nullptr,  // nb_inplace_and
+    nullptr,  // nb_inplace_xor
+    nullptr,  // nb_inplace_or
+
+    nullptr,            // nb_floor_divide
+    PyBfloat16_Divide,  // nb_true_divide
+    nullptr,            // nb_inplace_floor_divide
+    nullptr,            // nb_inplace_true_divide
+    nullptr,            // nb_index
+};
+
+// Constructs a new PyBfloat16.
+PyObject* PyBfloat16_New(PyTypeObject* type, PyObject* args, PyObject* kwds) {
+  if (kwds && PyDict_Size(kwds)) {
+    PyErr_SetString(PyExc_TypeError, "constructor takes no keyword arguments");
+    return nullptr;
+  }
+  Py_ssize_t size = PyTuple_Size(args);
+  if (size != 1) {
+    PyErr_SetString(PyExc_TypeError,
+                    "expected number as argument to bfloat16 constructor");
+    return nullptr;
+  }
+  PyObject* arg = PyTuple_GetItem(args, 0);
+
+  if (PyBfloat16_Check(arg)) {
+    Py_INCREF(arg);
+    return arg;
+  } else {
+    bfloat16 value;
+    if (!AsBfloat16(arg, &value)) {
+      return nullptr;
+    }
+    return PyBfloat16_FromBfloat16(value).release();
+  }
+}
+
+// Comparisons on PyBfloat16s.
+PyObject* PyBfloat16_RichCompare(PyObject* a, PyObject* b, int op) {
+  bfloat16 x, y;
+  if (!AsBfloat16(a, &x) || !AsBfloat16(b, &y)) return nullptr;
+  bool result;
+  switch (op) {
+    case Py_LT:
+      result = x < y;
+      break;
+    case Py_LE:
+      result = x <= y;
+      break;
+    case Py_EQ:
+      result = x == y;
+      break;
+    case Py_NE:
+      result = x != y;
+      break;
+    case Py_GT:
+      result = x > y;
+      break;
+    case Py_GE:
+      result = x >= y;
+      break;
+    default:
+      LOG(FATAL) << "Invalid op type " << op;
+  }
+  return PyBool_FromLong(result);
+}
+
+// Implementation of repr() for PyBfloat16.
+PyObject* PyBfloat16_Repr(PyObject* self) {
+  bfloat16 x = reinterpret_cast<PyBfloat16*>(self)->value;
+  string v = strings::StrCat("bfloat16(", static_cast<float>(x), ")");
+  return MakePyString(v);
+}
+
+// Implementation of str() for PyBfloat16.
+PyObject* PyBfloat16_Str(PyObject* self) {
+  bfloat16 x = reinterpret_cast<PyBfloat16*>(self)->value;
+  string v = strings::StrCat(static_cast<float>(x));
+  return MakePyString(v);
+}
+
+// Hash function for PyBfloat16. We use the identity function, which is a weak
+// hash function.
+HashType PyBfloat16_Hash(PyObject* self) {
+  bfloat16 x = reinterpret_cast<PyBfloat16*>(self)->value;
+  return x.value;
+}
+
+// Python type for PyBfloat16 objects.
+PyTypeObject PyBfloat16_Type = {
+#if PY_MAJOR_VERSION < 3
+    PyObject_HEAD_INIT(nullptr) 0,  // ob_size
+#else
+    PyVarObject_HEAD_INIT(nullptr, 0)
+#endif
+    "bfloat16",                                // tp_name
+    sizeof(PyBfloat16),                        // tp_basicsize
+    0,                                         // tp_itemsize
+    nullptr,                                   // tp_dealloc
+    nullptr,                                   // tp_print
+    nullptr,                                   // tp_getattr
+    nullptr,                                   // tp_setattr
+    nullptr,                                   // tp_compare / tp_reserved
+    PyBfloat16_Repr,                           // tp_repr
+    &PyBfloat16_AsNumber,                      // tp_as_number
+    nullptr,                                   // tp_as_sequence
+    nullptr,                                   // tp_as_mapping
+    PyBfloat16_Hash,                           // tp_hash
+    nullptr,                                   // tp_call
+    PyBfloat16_Str,                            // tp_str
+    nullptr,                                   // tp_getattro
+    nullptr,                                   // tp_setattro
+    nullptr,                                   // tp_as_buffer
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  // tp_flags
+    "bfloat16 floating-point values",          // tp_doc
+    nullptr,                                   // tp_traverse
+    nullptr,                                   // tp_clear
+    PyBfloat16_RichCompare,                    // tp_richcompare
+    0,                                         // tp_weaklistoffset
+    nullptr,                                   // tp_iter
+    nullptr,                                   // tp_iternext
+    nullptr,                                   // tp_methods
+    nullptr,                                   // tp_members
+    nullptr,                                   // tp_getset
+    nullptr,                                   // tp_base
+    nullptr,                                   // tp_dict
+    nullptr,                                   // tp_descr_get
+    nullptr,                                   // tp_descr_set
+    0,                                         // tp_dictoffset
+    nullptr,                                   // tp_init
+    nullptr,                                   // tp_alloc
+    PyBfloat16_New,                            // tp_new
+    nullptr,                                   // tp_free
+    nullptr,                                   // tp_is_gc
+    nullptr,                                   // tp_bases
+    nullptr,                                   // tp_mro
+    nullptr,                                   // tp_cache
+    nullptr,                                   // tp_subclasses
+    nullptr,                                   // tp_weaklist
+    nullptr,                                   // tp_del
+    0,                                         // tp_version_tag
+};
+
+// Numpy support
+
+PyArray_ArrFuncs NPyBfloat16_ArrFuncs;
+
+PyArray_Descr NPyBfloat16_Descr = {
+    PyObject_HEAD_INIT(nullptr) & PyBfloat16_Type,  // typeobj
+    // We must register bfloat16 with a kind other than "f", because numpy
+    // considers two types with the same kind and size to be equal, but
+    // float16 != bfloat16.
+    'V',  // kind
+    // TODO(phawkins): there doesn't seem to be a way of guaranteeing a type
+    // character is unique.
+    'E',                                                  // type
+    '=',                                                  // byteorder
+    NPY_NEEDS_PYAPI | NPY_USE_GETITEM | NPY_USE_SETITEM,  // hasobject
+    0,                                                    // type_num
+    sizeof(bfloat16),                                     // elsize
+    alignof(bfloat16),                                    // alignment
+    nullptr,                                              // subarray
+    nullptr,                                              // fields
+    nullptr,                                              // names
+    &NPyBfloat16_ArrFuncs,                                // f
+};
+
+// Registered numpy type ID. Global variable populated by the registration code.
+int npy_bfloat16_ = -1;
+
+// Implementations of NumPy array methods.
+
+PyObject* NPyBfloat16_GetItem(void* data, void* arr) {
+  bfloat16 x;
+  memcpy(&x, data, sizeof(bfloat16));
+  return PyBfloat16_FromBfloat16(x).release();
+}
+
+int NPyBfloat16_SetItem(PyObject* item, void* data, void* arr) {
+  bfloat16 x;
+  if (!AsBfloat16(item, &x)) return -1;
+  memcpy(data, &x, sizeof(bfloat16));
+  return 0;
+}
+
+void ByteSwap16(void* value) {
+  char* p = reinterpret_cast<char*>(value);
+  std::swap(p[0], p[1]);
+}
+
+void NPyBfloat16_CopySwapN(void* dstv, npy_intp dstride, void* srcv,
+                           npy_intp sstride, npy_intp n, int swap, void* arr) {
+  char* dst = reinterpret_cast<char*>(dstv);
+  char* src = reinterpret_cast<char*>(srcv);
+  if (!src) {
+    return;
+  }
+  if (swap) {
+    for (npy_intp i = 0; i < n; i++) {
+      char* r = dst + dstride * i;
+      memcpy(r, src + sstride * i, sizeof(uint16_t));
+      ByteSwap16(r);
+    }
+  } else if (dstride == sizeof(uint16_t) && sstride == sizeof(uint16_t)) {
+    memcpy(dst, src, n * sizeof(uint16_t));
+  } else {
+    for (npy_intp i = 0; i < n; i++) {
+      memcpy(dst + dstride * i, src + sstride * i, sizeof(uint16_t));
+    }
+  }
+}
+
+void NPyBfloat16_CopySwap(void* dst, void* src, int swap, void* arr) {
+  if (!src) {
+    return;
+  }
+  memcpy(dst, src, sizeof(uint16_t));
+  if (swap) {
+    ByteSwap16(dst);
+  }
+}
+
+npy_bool NPyBfloat16_NonZero(void* data, void* arr) {
+  bfloat16 x;
+  memcpy(&x, data, sizeof(x));
+  return x != static_cast<bfloat16>(0);
+}
+
+// NumPy casts
+
+// Performs a NumPy array cast from type 'From' to 'To'.
+template <typename From, typename To>
+void NPyCast(void* from_void, void* to_void, npy_intp n, void* fromarr,
+             void* toarr) {
+  const From* from = reinterpret_cast<From*>(from_void);
+  To* to = reinterpret_cast<To*>(to_void);
+  for (npy_intp i = 0; i < n; ++i) {
+    to[i] = static_cast<To>(from[i]);
+  }
+}
+
+// Registers a cast between bfloat16 and type 'T'. 'numpy_type' is the NumPy
+// type corresponding to 'T'. If 'cast_is_safe', registers that bfloat16 can be
+// safely coerced to T.
+template <typename T>
+bool RegisterBfloat16Cast(int numpy_type, bool cast_is_safe) {
+  if (PyArray_RegisterCastFunc(PyArray_DescrFromType(numpy_type), npy_bfloat16_,
+                               NPyCast<T, bfloat16>) < 0) {
+    return false;
+  }
+  if (PyArray_RegisterCastFunc(&NPyBfloat16_Descr, numpy_type,
+                               NPyCast<bfloat16, T>) < 0) {
+    return false;
+  }
+  if (cast_is_safe && PyArray_RegisterCanCast(&NPyBfloat16_Descr, numpy_type,
+                                              NPY_NOSCALAR) < 0) {
+    return false;
+  }
+  return true;
+}
+
+template <typename InType, typename OutType, typename Functor>
+void BinaryUFunc(char** args, npy_intp* dimensions, npy_intp* steps,
+                 void* data) {
+  const char* i0 = args[0];
+  const char* i1 = args[1];
+  char* o = args[2];
+  for (npy_intp k = 0; k < *dimensions; k++) {
+    InType x = *reinterpret_cast<const InType*>(i0);
+    InType y = *reinterpret_cast<const InType*>(i1);
+    *reinterpret_cast<OutType*>(o) = Functor()(x, y);
+    i0 += steps[0];
+    i1 += steps[1];
+    o += steps[2];
+  }
+}
+
+template <typename Functor>
+void CompareUFunc(char** args, npy_intp* dimensions, npy_intp* steps,
+                  void* data) {
+  BinaryUFunc<bfloat16, npy_bool, Functor>(args, dimensions, steps, data);
+}
+
+struct Bfloat16EqFunctor {
+  npy_bool operator()(bfloat16 a, bfloat16 b) { return a == b; }
+};
+struct Bfloat16NeFunctor {
+  npy_bool operator()(bfloat16 a, bfloat16 b) { return a != b; }
+};
+struct Bfloat16LtFunctor {
+  npy_bool operator()(bfloat16 a, bfloat16 b) { return a < b; }
+};
+struct Bfloat16GtFunctor {
+  npy_bool operator()(bfloat16 a, bfloat16 b) { return a > b; }
+};
+struct Bfloat16LeFunctor {
+  npy_bool operator()(bfloat16 a, bfloat16 b) { return a <= b; }
+};
+struct Bfloat16GeFunctor {
+  npy_bool operator()(bfloat16 a, bfloat16 b) { return a >= b; }
+};
+
+// Initializes the module.
+bool Initialize() {
+  // It's critical to import umath to avoid crash in open source build.
+  import_umath1(false);
+
+  Safe_PyObjectPtr numpy_str = make_safe(MakePyString("numpy"));
+  if (!numpy_str) {
+    return false;
+  }
+  Safe_PyObjectPtr numpy = make_safe(PyImport_Import(numpy_str.get()));
+  if (!numpy) {
+    return false;
+  }
+
+  // We hit a mysterious crash if we haven't initialized numpy before this:
+  PyBfloat16_Type.tp_base = &PyGenericArrType_Type;
+
+  if (PyType_Ready(&PyBfloat16_Type) < 0) {
+    return false;
+  }
+
+  // Initializes the NumPy descriptor.
+  PyArray_InitArrFuncs(&NPyBfloat16_ArrFuncs);
+  NPyBfloat16_ArrFuncs.getitem = NPyBfloat16_GetItem;
+  NPyBfloat16_ArrFuncs.setitem = NPyBfloat16_SetItem;
+  NPyBfloat16_ArrFuncs.copyswapn = NPyBfloat16_CopySwapN;
+  NPyBfloat16_ArrFuncs.copyswap = NPyBfloat16_CopySwap;
+  NPyBfloat16_ArrFuncs.nonzero = NPyBfloat16_NonZero;
+
+  Py_TYPE(&NPyBfloat16_Descr) = &PyArrayDescr_Type;
+  npy_bfloat16_ = PyArray_RegisterDataType(&NPyBfloat16_Descr);
+  if (npy_bfloat16_ < 0) return false;
+
+  // Support dtype(bfloat16)
+  if (PyDict_SetItemString(PyBfloat16_Type.tp_dict, "dtype",
+                           reinterpret_cast<PyObject*>(&NPyBfloat16_Descr)) <
+      0) {
+    return false;
+  }
+
+  // Register casts
+
+  // We lie shamelessly and say that a cast from half to bfloat16 is safe.
+  // Numpy frequently uses the smallest legal representation type for small
+  // float constants (e.g., 1.0), which is often float16. Things break if these
+  // cannot be converted transparently to bfloat16.
+  if (!RegisterBfloat16Cast<Eigen::half>(NPY_HALF, /*cast_is_safe=*/true)) {
+    return false;
+  }
+
+  if (!RegisterBfloat16Cast<float>(NPY_FLOAT, /*cast_is_safe=*/true)) {
+    return false;
+  }
+  if (!RegisterBfloat16Cast<double>(NPY_DOUBLE, /*cast_is_safe=*/true)) {
+    return false;
+  }
+  if (!RegisterBfloat16Cast<int32>(NPY_INT32, /*cast_is_safe=*/false)) {
+    return false;
+  }
+  if (!RegisterBfloat16Cast<int64>(NPY_INT64, /*cast_is_safe=*/false)) {
+    return false;
+  }
+  // Following the numpy convention. imag part is dropped when converting to
+  // float.
+  if (!RegisterBfloat16Cast<complex64>(NPY_COMPLEX64, /*cast_is_safe=*/true)) {
+    return false;
+  }
+  if (!RegisterBfloat16Cast<complex128>(NPY_COMPLEX128,
+                                        /*cast_is_safe=*/true)) {
+    return false;
+  }
+
+  // Register ufuncs
+  auto register_ufunc = [&](const char* name, PyUFuncGenericFunction fn,
+                            const std::array<int, 3>& types) {
+    Safe_PyObjectPtr ufunc_obj =
+        make_safe(PyObject_GetAttrString(numpy.get(), name));
+    if (!ufunc_obj) {
+      return false;
+    }
+    PyUFuncObject* ufunc = reinterpret_cast<PyUFuncObject*>(ufunc_obj.get());
+    if (types.size() != ufunc->nargs) {
+      PyErr_Format(PyExc_AssertionError,
+                   "ufunc %s takes %d arguments, loop takes %lu", name,
+                   ufunc->nargs, types.size());
+      return false;
+    }
+    if (PyUFunc_RegisterLoopForType(ufunc, npy_bfloat16_, fn,
+                                    const_cast<int*>(types.data()),
+                                    nullptr) < 0) {
+      return false;
+    }
+    return true;
+  };
+
+  // Comparisons
+  const std::array<int, 3> compare_types = {npy_bfloat16_, npy_bfloat16_,
+                                            NPY_BOOL};
+
+  if (!register_ufunc("equal", CompareUFunc<Bfloat16EqFunctor>,
+                      compare_types)) {
+    return false;
+  }
+  if (!register_ufunc("not_equal", CompareUFunc<Bfloat16NeFunctor>,
+                      compare_types)) {
+    return false;
+  }
+  if (!register_ufunc("less", CompareUFunc<Bfloat16LtFunctor>, compare_types)) {
+    return false;
+  }
+  if (!register_ufunc("greater", CompareUFunc<Bfloat16GtFunctor>,
+                      compare_types)) {
+    return false;
+  }
+  if (!register_ufunc("less_equal", CompareUFunc<Bfloat16LeFunctor>,
+                      compare_types)) {
+    return false;
+  }
+  if (!register_ufunc("greater_equal", CompareUFunc<Bfloat16GeFunctor>,
+                      compare_types)) {
+    return false;
+  }
+  return true;
+}
+
+}  // namespace
+
+void RegisterNumpyBfloat16() {
+  if (npy_bfloat16_ >= 0) {
+    // Already initialized.
+    return;
+  }
+  if (!Initialize()) {
+    if (!PyErr_Occurred()) {
+      PyErr_SetString(PyExc_RuntimeError, "cannot load bfloat16 module.");
+    }
+    PyErr_Print();
+  }
+}
+
+PyObject* Bfloat16PyType() {
+  CHECK(PyBfloat16_Type.tp_base != nullptr);
+  Py_INCREF(&PyBfloat16_Type);
+  return reinterpret_cast<PyObject*>(&PyBfloat16_Type);
+}
+
+int Bfloat16NumpyType() {
+  CHECK_GE(npy_bfloat16_, 0);
+  return npy_bfloat16_;
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/python/lib/core/bfloat16.h b/tensorflow/python/lib/core/bfloat16.h
new file mode 100644
index 0000000000000000000000000000000000000000..a609928ba9029af00553a4664bef18d3749e64db
--- /dev/null
+++ b/tensorflow/python/lib/core/bfloat16.h
@@ -0,0 +1,34 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_PYTHON_LIB_CORE_BFLOAT16_H_
+#define TENSORFLOW_PYTHON_LIB_CORE_BFLOAT16_H_
+
+#include <Python.h>
+
+namespace tensorflow {
+
+// Register the bfloat16 numpy type.
+void RegisterNumpyBfloat16();
+
+// Returns the PyObject for the bfloat16 type.
+PyObject* Bfloat16PyType();
+
+// Returns the id number of the bfloat16 numpy type.
+int Bfloat16NumpyType();
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_PYTHON_LIB_CORE_BFLOAT16_H_
diff --git a/tensorflow/python/lib/core/bfloat16.i b/tensorflow/python/lib/core/bfloat16.i
new file mode 100644
index 0000000000000000000000000000000000000000..10444b676b2549e0d9f96391f96e7a523f768d85
--- /dev/null
+++ b/tensorflow/python/lib/core/bfloat16.i
@@ -0,0 +1,30 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+%{
+#include "tensorflow/python/lib/core/bfloat16.h"
+%}
+
+%init %{
+tensorflow::RegisterNumpyBfloat16();
+%}
+
+%{
+PyObject* TF_bfloat16_type() {
+  return tensorflow::Bfloat16PyType();
+}
+%}
+
+PyObject* TF_bfloat16_type();
diff --git a/tensorflow/python/lib/core/bfloat16_test.py b/tensorflow/python/lib/core/bfloat16_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..985a11272c8a633d80b0372c0b6c669949e9cba8
--- /dev/null
+++ b/tensorflow/python/lib/core/bfloat16_test.py
@@ -0,0 +1,231 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Test cases for the bfloat16 Python type."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+import numpy as np
+
+# pylint: disable=unused-import,g-bad-import-order
+from tensorflow.python import pywrap_tensorflow
+from tensorflow.python.platform import test
+
+
+bfloat16 = pywrap_tensorflow.TF_bfloat16_type()
+
+
+class Bfloat16Test(test.TestCase):
+
+  def float_values(self):
+    """Returns values that should round trip exactly to float and back."""
+    epsilon = float.fromhex("1.0p-7")
+    return [
+        0.0, 1.0, -1, 0.5, -0.5, epsilon, 1.0 + epsilon, 1.0 - epsilon,
+        -1.0 - epsilon, -1.0 + epsilon, 3.5, 42.0, 255.0, 256.0,
+        float("inf"), float("-inf"), float("nan")]
+
+  def _assertFloatIdentical(self, v, w):
+    if math.isnan(v):
+      self.assertTrue(math.isnan(w))
+    else:
+      self.assertEqual(v, w)
+
+  def testRoundTripToFloat(self):
+    for v in self.float_values():
+      self._assertFloatIdentical(v, float(bfloat16(v)))
+
+  def testRoundTripToInt(self):
+    for v in [-256, -255, -34, -2, -1, 0, 1, 2, 10, 47, 128, 255, 256, 512]:
+      self.assertEqual(v, int(bfloat16(v)))
+
+  def testStr(self):
+    self.assertEqual("0", str(bfloat16(0.0)))
+    self.assertEqual("1", str(bfloat16(1.0)))
+    self.assertEqual("-3.5", str(bfloat16(-3.5)))
+    self.assertEqual("0.0078125", str(bfloat16(float.fromhex("1.0p-7"))))
+    self.assertEqual("inf", str(bfloat16(float("inf"))))
+    self.assertEqual("-inf", str(bfloat16(float("-inf"))))
+    self.assertEqual("nan", str(bfloat16(float("nan"))))
+
+  def testRepr(self):
+    self.assertEqual("bfloat16(0)", repr(bfloat16(0)))
+    self.assertEqual("bfloat16(1)", repr(bfloat16(1)))
+    self.assertEqual("bfloat16(-3.5)", repr(bfloat16(-3.5)))
+    self.assertEqual("bfloat16(0.0078125)",
+                     repr(bfloat16(float.fromhex("1.0p-7"))))
+    self.assertEqual("bfloat16(inf)", repr(bfloat16(float("inf"))))
+    self.assertEqual("bfloat16(-inf)", repr(bfloat16(float("-inf"))))
+    self.assertEqual("bfloat16(nan)", repr(bfloat16(float("nan"))))
+
+  def testHash(self):
+    self.assertEqual(0, hash(bfloat16(0.0)))
+    self.assertEqual(0x3f80, hash(bfloat16(1.0)))
+    self.assertEqual(0x7fc0, hash(bfloat16(float("nan"))))
+
+  # Tests for Python operations
+  def testNegate(self):
+    for v in self.float_values():
+      self._assertFloatIdentical(-v, float(-bfloat16(v)))
+
+  def testAdd(self):
+    self._assertFloatIdentical(0, float(bfloat16(0) + bfloat16(0)))
+    self._assertFloatIdentical(1, float(bfloat16(1) + bfloat16(0)))
+    self._assertFloatIdentical(0, float(bfloat16(1) + bfloat16(-1)))
+    self._assertFloatIdentical(5.5, float(bfloat16(2) + bfloat16(3.5)))
+    self._assertFloatIdentical(1.25, float(bfloat16(3.5) + bfloat16(-2.25)))
+    self._assertFloatIdentical(float("inf"),
+                               float(bfloat16(float("inf")) + bfloat16(-2.25)))
+    self._assertFloatIdentical(float("-inf"),
+                               float(bfloat16(float("-inf")) + bfloat16(-2.25)))
+    self.assertTrue(math.isnan(float(bfloat16(3.5) + bfloat16(float("nan")))))
+
+  def testSub(self):
+    self._assertFloatIdentical(0, float(bfloat16(0) - bfloat16(0)))
+    self._assertFloatIdentical(1, float(bfloat16(1) - bfloat16(0)))
+    self._assertFloatIdentical(2, float(bfloat16(1) - bfloat16(-1)))
+    self._assertFloatIdentical(-1.5, float(bfloat16(2) - bfloat16(3.5)))
+    self._assertFloatIdentical(5.75, float(bfloat16(3.5) - bfloat16(-2.25)))
+    self._assertFloatIdentical(float("-inf"),
+                               float(bfloat16(-2.25) - bfloat16(float("inf"))))
+    self._assertFloatIdentical(float("inf"),
+                               float(bfloat16(-2.25) - bfloat16(float("-inf"))))
+    self.assertTrue(math.isnan(float(bfloat16(3.5) - bfloat16(float("nan")))))
+
+  def testMul(self):
+    self._assertFloatIdentical(0, float(bfloat16(0) * bfloat16(0)))
+    self._assertFloatIdentical(0, float(bfloat16(1) * bfloat16(0)))
+    self._assertFloatIdentical(-1, float(bfloat16(1) * bfloat16(-1)))
+    self._assertFloatIdentical(-7.875, float(bfloat16(3.5) * bfloat16(-2.25)))
+    self._assertFloatIdentical(float("-inf"),
+                               float(bfloat16(float("inf")) * bfloat16(-2.25)))
+    self._assertFloatIdentical(float("inf"),
+                               float(bfloat16(float("-inf")) * bfloat16(-2.25)))
+    self.assertTrue(math.isnan(float(bfloat16(3.5) * bfloat16(float("nan")))))
+
+  def testDiv(self):
+    self.assertTrue(math.isnan(float(bfloat16(0) / bfloat16(0))))
+    self._assertFloatIdentical(float("inf"), float(bfloat16(1) / bfloat16(0)))
+    self._assertFloatIdentical(-1, float(bfloat16(1) / bfloat16(-1)))
+    self._assertFloatIdentical(-1.75, float(bfloat16(3.5) / bfloat16(-2)))
+    self._assertFloatIdentical(float("-inf"),
+                               float(bfloat16(float("inf")) / bfloat16(-2.25)))
+    self._assertFloatIdentical(float("inf"),
+                               float(bfloat16(float("-inf")) / bfloat16(-2.25)))
+    self.assertTrue(math.isnan(float(bfloat16(3.5) / bfloat16(float("nan")))))
+
+  def testLess(self):
+    for v in self.float_values():
+      for w in self.float_values():
+        self.assertEqual(v < w, bfloat16(v) < bfloat16(w))
+
+  def testLessEqual(self):
+    for v in self.float_values():
+      for w in self.float_values():
+        self.assertEqual(v <= w, bfloat16(v) <= bfloat16(w))
+
+  def testGreater(self):
+    for v in self.float_values():
+      for w in self.float_values():
+        self.assertEqual(v > w, bfloat16(v) > bfloat16(w))
+
+  def testGreaterEqual(self):
+    for v in self.float_values():
+      for w in self.float_values():
+        self.assertEqual(v >= w, bfloat16(v) >= bfloat16(w))
+
+  def testEqual(self):
+    for v in self.float_values():
+      for w in self.float_values():
+        self.assertEqual(v == w, bfloat16(v) == bfloat16(w))
+
+  def testNotEqual(self):
+    for v in self.float_values():
+      for w in self.float_values():
+        self.assertEqual(v != w, bfloat16(v) != bfloat16(w))
+
+
+class Bfloat16NumPyTest(test.TestCase):
+
+  def testDtype(self):
+    self.assertEqual(bfloat16, np.dtype(bfloat16))
+
+  def testArray(self):
+    x = np.array([[1, 2, 3]], dtype=bfloat16)
+    self.assertEqual(bfloat16, x.dtype)
+    self.assertEqual("[[bfloat16(1) bfloat16(2) bfloat16(3)]]", str(x))
+    self.assertAllEqual(x, x)
+    self.assertAllClose(x, x)
+    self.assertTrue((x == x).all())
+
+  def testComparisons(self):
+    x = np.array([401408, 7, -32], dtype=np.float32)
+    bx = x.astype(bfloat16)
+    y = np.array([82432, 7, 0], dtype=np.float32)
+    by = y.astype(bfloat16)
+    self.assertAllEqual(x == y, bx == by)
+    self.assertAllEqual(x != y, bx != by)
+    self.assertAllEqual(x < y, bx < by)
+    self.assertAllEqual(x > y, bx > by)
+    self.assertAllEqual(x <= y, bx <= by)
+    self.assertAllEqual(x >= y, bx >= by)
+
+  def testEqual2(self):
+    a = np.array([401408], bfloat16)
+    b = np.array([82432], bfloat16)
+    self.assertFalse(a.__eq__(b))
+
+  def testCasts(self):
+    for dtype in [
+        np.float16, np.float32, np.float64, np.int32, np.int64,
+        np.complex64, np.complex128]:
+      x = np.array([[1, 2, 3]], dtype=dtype)
+      y = x.astype(bfloat16)
+      z = y.astype(dtype)
+      self.assertTrue(np.all(x == y))
+      self.assertEqual(bfloat16, y.dtype)
+      self.assertTrue(np.all(x == z))
+      self.assertEqual(dtype, z.dtype)
+
+  def testConformNumpyComplex(self):
+    for dtype in [np.complex64, np.complex128]:
+      x = np.array([1.1, 2.2 + 2.2j, 3.3], dtype=dtype)
+      y_np = x.astype(np.float32)
+      y_tf = x.astype(bfloat16)
+      self.assertAllClose(y_np, y_tf, atol=2e-2)
+
+      z_np = y_np.astype(dtype)
+      z_tf = y_tf.astype(dtype)
+      self.assertAllClose(z_np, z_tf, atol=2e-2)
+
+  def testAdd(self):
+    x = np.array([[1, 2, 3]], dtype=bfloat16)
+    y = np.array([[4, 5, 6]], dtype=bfloat16)
+    self.assertAllClose(np.array([[5, 7, 9]]), x + y)
+
+  def testLogSumExp(self):
+    x = np.array([[1, 2, 3]], dtype=np.float32)
+    y = np.array([[4, 5, 6]], dtype=np.float32)
+    self.assertAllClose(np.logaddexp(x, y),
+                        np.logaddexp(x.astype(bfloat16), y.astype(bfloat16)),
+                        atol=2e-2)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/lib/core/ndarray_tensor.cc b/tensorflow/python/lib/core/ndarray_tensor.cc
index cf2c2e6eb00cccf82adf3c9eb65b685130a2f632..994af69386b278f6b88c051f898cd6a9dc607f3f 100644
--- a/tensorflow/python/lib/core/ndarray_tensor.cc
+++ b/tensorflow/python/lib/core/ndarray_tensor.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/python/lib/core/bfloat16.h"
 #include "tensorflow/python/lib/core/ndarray_tensor_bridge.h"
 
 namespace tensorflow {
@@ -125,6 +126,10 @@ Status PyArray_TYPE_to_TF_DataType(PyArrayObject* array,
       // custom struct type.
       return PyArrayDescr_to_TF_DataType(descr, out_tf_datatype);
     default:
+      if (pyarray_type == Bfloat16NumpyType()) {
+        *out_tf_datatype = TF_BFLOAT16;
+        break;
+      }
       // TODO(mrry): Support these.
       return errors::Internal("Unsupported feed type");
   }
diff --git a/tensorflow/python/lib/core/ndarray_tensor_bridge.cc b/tensorflow/python/lib/core/ndarray_tensor_bridge.cc
index 82c45f5a315d485585b1514634201225f4123de1..65e2178cda498294ffc4a5066b5692132e86180f 100644
--- a/tensorflow/python/lib/core/ndarray_tensor_bridge.cc
+++ b/tensorflow/python/lib/core/ndarray_tensor_bridge.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/c/c_api.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/python/lib/core/bfloat16.h"
 #include "tensorflow/python/lib/core/ndarray_tensor_bridge.h"
 
 namespace tensorflow {
@@ -175,7 +176,7 @@ Status TF_DataType_to_PyArray_TYPE(TF_DataType tf_datatype,
       *out_pyarray_type = NPY_INT32;
       break;
     case TF_BFLOAT16:
-      *out_pyarray_type = NPY_UINT16;
+      *out_pyarray_type = Bfloat16NumpyType();
       break;
     default:
       return errors::Internal("Tensorflow type ", tf_datatype,
diff --git a/tensorflow/python/lib/core/numpy.h b/tensorflow/python/lib/core/numpy.h
index 0eafe890dbafd065ece72482f4b0c0080ce458ef..25322b458b8475882830599dd4ae02f10d97094b 100644
--- a/tensorflow/python/lib/core/numpy.h
+++ b/tensorflow/python/lib/core/numpy.h
@@ -32,6 +32,7 @@ limitations under the License.
 #include <Python.h>
 
 #include "numpy/arrayobject.h"
+#include "numpy/ufuncobject.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/python/lib/core/py_func.cc b/tensorflow/python/lib/core/py_func.cc
index 8bf831f8badf4f17dc74f0ec29f12e0acdac171e..dc56b3948626de7d76895378ade04b14e7d779b1 100644
--- a/tensorflow/python/lib/core/py_func.cc
+++ b/tensorflow/python/lib/core/py_func.cc
@@ -18,15 +18,19 @@ limitations under the License.
 #include <array>
 
 #include "numpy/arrayobject.h"
+#include "tensorflow/c/eager/c_api.h"
+#include "tensorflow/c/tf_status_helper.h"
 #include "tensorflow/core/framework/allocation_description.pb.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/threadpool.h"
-#include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/python/eager/pywrap_tfe.h"
 #include "tensorflow/python/lib/core/ndarray_tensor_bridge.h"
+#include "tensorflow/python/lib/core/py_util.h"
+#include "tensorflow/python/lib/core/safe_ptr.h"
 #include <Python.h>
 
 namespace tensorflow {
@@ -48,6 +52,9 @@ struct PyCall {
   // with this "token".
   string token;
 
+  // True if the call is associated with an EagerPyFunc.
+  bool eager;
+
   // Inputs and outputs of this function invocation.
   std::vector<Tensor> ins;
   std::vector<Tensor> out;
@@ -55,19 +62,26 @@ struct PyCall {
 
 // Givens the 'call', prepares the token and inputs as a python tuple
 // that is appropriate for calling the trampoline.
-Status MakeArgTuple(PyCall* call, PyObject** tuple) {
+Status MakeArgTuple(const PyCall* call, PyObject** tuple) {
   int64 n = call->ins.size();
   PyObject* lst = PyList_New(n);
   CHECK(lst);
   for (int64 i = 0; i < n; ++i) {
+    PyObject* arg = nullptr;
     const Tensor& t = call->ins[i];
-    PyObject* a = nullptr;
-    Status s = ConvertTensorToNdarray(t, &a);
-    if (!s.ok()) {
-      Py_DECREF(lst);
-      return s;
+    if (call->eager) {
+      arg = EagerTensorFromHandle(TFE_NewTensorHandle(t));
+      if (arg == nullptr) {
+        return errors::Internal("Unable to procure EagerTensor from Tensor.");
+      }
+    } else {
+      Status s = ConvertTensorToNdarray(t, &arg);
+      if (!s.ok()) {
+        Py_DECREF(lst);
+        return s;
+      }
     }
-    PyList_SetItem(lst, i, a);
+    PyList_SetItem(lst, i, arg);
   }
   *tuple = Py_BuildValue("(sN)", call->token.c_str(), lst);
   CHECK(*tuple);
@@ -133,46 +147,16 @@ bool IsSingleNone(PyObject* obj) {
   return item == Py_None;
 }
 
-// py.__class__.__name__
-const char* ClassName(PyObject* py) {
-/* PyPy doesn't have a separate C API for old-style classes. */
-#if PY_MAJOR_VERSION < 3 && !defined(PYPY_VERSION)
-  if (PyClass_Check(py))
-    return PyString_AS_STRING(
-        CHECK_NOTNULL(reinterpret_cast<PyClassObject*>(py)->cl_name));
-  if (PyInstance_Check(py))
-    return PyString_AS_STRING(CHECK_NOTNULL(
-        reinterpret_cast<PyInstanceObject*>(py)->in_class->cl_name));
-#endif
-  if (Py_TYPE(py) == &PyType_Type) {
-    return reinterpret_cast<PyTypeObject*>(py)->tp_name;
-  }
-  return Py_TYPE(py)->tp_name;
-}
-
-string PyExcFetch() {
-  CHECK(PyErr_Occurred()) << "Must only call PyExcFetch after an exception.";
-  PyObject* ptype;
-  PyObject* pvalue;
-  PyObject* ptraceback;
-  PyErr_Fetch(&ptype, &pvalue, &ptraceback);
-  PyErr_NormalizeException(&ptype, &pvalue, &ptraceback);
-  string err = ClassName(ptype);
-  if (pvalue) {
-    PyObject* str = PyObject_Str(pvalue);
-    if (str) {
-#if PY_MAJOR_VERSION < 3
-      strings::StrAppend(&err, ": ", PyString_AS_STRING(str));
-#else
-      strings::StrAppend(&err, ": ", PyUnicode_AsUTF8(str));
-#endif
-      Py_DECREF(str);
-    }
-    Py_DECREF(pvalue);
-  }
-  Py_DECREF(ptype);
-  Py_XDECREF(ptraceback);
-  return err;
+// Retrieves a Tensor from `eager_tensor` and stores it in `output_tensor`.
+Status ExtractTensorFromEagerTensor(const PyObject* eager_tensor,
+                                    Tensor* output_tensor,
+                                    TF_Status* tf_status) {
+  // TODO(akshayka): Lift the restriction requiring output tensors to
+  // lie in host memory; EagerPyFunc should be able to dispatch ops on GPU
+  // tensors, so we should eventually implement a GPU kernel for EagerPyFunc.
+  *output_tensor = *TFE_TensorHandleUnderlyingTensorInHostMemory(
+      EagerTensor_Handle(eager_tensor), tf_status);
+  return StatusFromTF_Status(tf_status);
 }
 
 // Calls the registered py function through the trampoline.
@@ -195,18 +179,18 @@ Status DoCallPyFunc(PyCall* call, bool* out_log_on_error) {
     if (PyErr_Occurred()) {
       if (PyErr_ExceptionMatches(PyExc_ValueError) ||
           PyErr_ExceptionMatches(PyExc_TypeError)) {
-        return errors::InvalidArgument(PyExcFetch());
+        return errors::InvalidArgument(PyExceptionFetch());
       } else if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
         *out_log_on_error = false;
-        return errors::OutOfRange(PyExcFetch());
+        return errors::OutOfRange(PyExceptionFetch());
       } else if (PyErr_ExceptionMatches(PyExc_MemoryError)) {
-        return errors::ResourceExhausted(PyExcFetch());
+        return errors::ResourceExhausted(PyExceptionFetch());
       } else if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
-        return errors::Unimplemented(PyExcFetch());
+        return errors::Unimplemented(PyExceptionFetch());
       } else {
         // TODO(ebrevdo): Check if exception is an OpError and use the
         // OpError.error_code property to map it back in the Status.
-        return errors::Unknown(PyExcFetch());
+        return errors::Unknown(PyExceptionFetch());
       }
     } else {
       return errors::Internal("Failed to run py callback ", call->token,
@@ -214,21 +198,37 @@ Status DoCallPyFunc(PyCall* call, bool* out_log_on_error) {
     }
   }
 
-  // Process the return values and converts them to tf Tensors.
+  // Process the return values and convert them to TF Tensors.
   Status s;
   if (PyList_Check(result)) {
-    // 'result' is a list.
     call->out.clear();
     for (int i = 0; i < PyList_Size(result); ++i) {
       Tensor t;
-      s = ConvertNdarrayToTensor(PyList_GetItem(result, i), &t);
+      if (call->eager) {
+        auto tf_status = tensorflow::make_safe(TF_NewStatus());
+        s = ExtractTensorFromEagerTensor(PyList_GetItem(result, i), &t,
+                                         tf_status.get());
+      } else {
+        s = ConvertNdarrayToTensor(PyList_GetItem(result, i), &t);
+      }
+
       if (!s.ok()) {
         break;
       }
       call->out.push_back(t);
     }
+  } else if (EagerTensor_CheckExact(result) || result == Py_None) {
+    DCHECK(call->eager);
+    Tensor t;
+    if (result != Py_None) {
+      auto tf_status = tensorflow::make_safe(TF_NewStatus());
+      s = ExtractTensorFromEagerTensor(result, &t, tf_status.get());
+      if (s.ok()) {
+        call->out.push_back(t);
+      }
+    }
   } else if (PyArray_Check(result)) {
-    // 'result' is a single ndarray.
+    DCHECK(!call->eager);
     if (!IsSingleNone(result)) {
       Tensor t;
       s = ConvertNdarrayToTensor(result, &t);
@@ -417,11 +417,13 @@ class PyFuncOp : public OpKernel {
  public:
   explicit PyFuncOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("token", &token_));
+    eager_ = type_string() == "EagerPyFunc";
   }
 
   void Compute(OpKernelContext* ctx) override {
     PyCall call;
     call.token = token_;
+    call.eager = eager_;
     for (int i = 0; i < ctx->num_inputs(); ++i) {
       call.ins.push_back(ctx->input(i));
     }
@@ -430,6 +432,9 @@ class PyFuncOp : public OpKernel {
     py_threadstate = PyGILState_Ensure();
     bool log_on_error;
     Status s = DoCallPyFunc(&call, &log_on_error);
+    // Sometimes py_funcs can be called without a session and leak memory. This
+    // ensures we clear the decref cache so this doesn't happen.
+    ClearDecrefCache();
     PyGILState_Release(py_threadstate);
 
     // Ensures that GIL is released even when !s.ok().
@@ -460,9 +465,15 @@ class PyFuncOp : public OpKernel {
  private:
   string token_;
 
+  // True if and only if this op should execute the python function eagerly,
+  // i.e., if and only if the eager attribute is set.
+  bool eager_;
+
   TF_DISALLOW_COPY_AND_ASSIGN(PyFuncOp);
 };
+
 REGISTER_KERNEL_BUILDER(Name("PyFunc").Device(DEVICE_CPU), PyFuncOp);
 REGISTER_KERNEL_BUILDER(Name("PyFuncStateless").Device(DEVICE_CPU), PyFuncOp);
+REGISTER_KERNEL_BUILDER(Name("EagerPyFunc").Device(DEVICE_CPU), PyFuncOp);
 
 }  // end namespace tensorflow
diff --git a/tensorflow/python/lib/core/py_func.h b/tensorflow/python/lib/core/py_func.h
index 5a451d5f43285d19dff6c158ebc28045b3ff13d4..3197a7ddfa0ce3db9f8244215690e5ede5096ac2 100644
--- a/tensorflow/python/lib/core/py_func.h
+++ b/tensorflow/python/lib/core/py_func.h
@@ -24,21 +24,27 @@ limitations under the License.
 
 namespace tensorflow {
 
-// Called by py code on initialization.
+// Called by python code on initialization.
 //
 // "trampoline" must represent a python function which has the
 // following signature:
-//   (string, list(ndarray)) -> ndarray | list(ndarray) | python scalar
+//   (string, list(ndarray)) | (string, list(EagerTensor)) ->
+//     ndarray | list(ndarray) | python scalar |
+//     EagerTensor | list(EagerTensor) | None
 //
 // The trampoline takes two arguments, the first is a string token
 // used by the python frontend's dispatching logic; the second is a
-// list of numpy ndarrays.
+// list of numpy ndarrays or EagerTensor objects. It can return a
+// single numpy ndarray, a list of numpy ndarrays, a python scalar, an
+// EagerTensor, a list of EagerTensors, or None.
 //
-// The trampoline can return a single numpy ndarray, a list of numpy
-// ndarrays, or a simply python scalar. The C++ runtime converts them,
-// if supported, back to Tensor objects.
+// PyFunc requires inputs and outputs to be ndarrays. EagerPyFunc requires
+// inputs to be a list of EagerTensors and outputs to be an EagerTensor, a list
+// of EagerTensors, or None.
 //
-// This is called by script_ops.py during its module initialization.
+// The C++ runtime converts outputs back to Tensor objects.
+//
+// This function is called by script_ops.py during its module initialization.
 //
 // TODO(zhifengc): Support distributed runtime.
 void InitializePyTrampoline(PyObject* trampoline);
diff --git a/tensorflow/python/lib/core/py_seq_tensor.cc b/tensorflow/python/lib/core/py_seq_tensor.cc
index 71cb38f8fd24beeb9efe149a6bd39e0ef2031051..317bdc2e14747583f372808f48a5928273f5570a 100644
--- a/tensorflow/python/lib/core/py_seq_tensor.cc
+++ b/tensorflow/python/lib/core/py_seq_tensor.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/python/lib/core/numpy.h"
+#include "tensorflow/python/lib/core/py_util.h"
 #include "tensorflow/python/lib/core/safe_ptr.h"
 
 namespace tensorflow {
@@ -89,12 +90,25 @@ Status InferShapeAndType(PyObject* obj, TensorShape* shape, DataType* dtype) {
       *dtype = DT_STRING;
     } else if (PySequence_Check(obj)) {
       auto length = PySequence_Length(obj);
-      shape->AddDim(length);
       if (length > 0) {
+        shape->AddDim(length);
         obj = PySequence_GetItem(obj, 0);
         continue;
-      } else {
+      } else if (length == 0) {
+        shape->AddDim(length);
         *dtype = DT_INVALID;  // Invalid dtype for empty tensors.
+      } else {
+        // The sequence does not have a valid length (PySequence_Length < 0).
+        if (PyErr_Occurred()) {
+          // PySequence_Length failed and set an exception. Fetch the message
+          // and convert it to a failed status.
+          return errors::InvalidArgument(PyExceptionFetch());
+        } else {
+          // This is almost certainly dead code: PySequence_Length failed but
+          // did not set an exception.
+          return errors::InvalidArgument(
+              "Attempted to convert an invalid sequence to a Tensor.");
+        }
       }
     } else if (IsPyFloat(obj)) {
       *dtype = DT_DOUBLE;
diff --git a/tensorflow/python/lib/core/py_util.cc b/tensorflow/python/lib/core/py_util.cc
new file mode 100644
index 0000000000000000000000000000000000000000..2635694e23c07dd8e75d4bb0cfb9e83a2042d921
--- /dev/null
+++ b/tensorflow/python/lib/core/py_util.cc
@@ -0,0 +1,70 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/python/lib/core/py_util.h"
+
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include <Python.h>
+
+namespace tensorflow {
+namespace {
+
+// py.__class__.__name__
+const char* ClassName(PyObject* py) {
+/* PyPy doesn't have a separate C API for old-style classes. */
+#if PY_MAJOR_VERSION < 3 && !defined(PYPY_VERSION)
+  if (PyClass_Check(py))
+    return PyString_AS_STRING(
+        CHECK_NOTNULL(reinterpret_cast<PyClassObject*>(py)->cl_name));
+  if (PyInstance_Check(py))
+    return PyString_AS_STRING(CHECK_NOTNULL(
+        reinterpret_cast<PyInstanceObject*>(py)->in_class->cl_name));
+#endif
+  if (Py_TYPE(py) == &PyType_Type) {
+    return reinterpret_cast<PyTypeObject*>(py)->tp_name;
+  }
+  return Py_TYPE(py)->tp_name;
+}
+
+}  // end namespace
+
+string PyExceptionFetch() {
+  CHECK(PyErr_Occurred())
+      << "Must only call PyExceptionFetch after an exception.";
+  PyObject* ptype;
+  PyObject* pvalue;
+  PyObject* ptraceback;
+  PyErr_Fetch(&ptype, &pvalue, &ptraceback);
+  PyErr_NormalizeException(&ptype, &pvalue, &ptraceback);
+  string err = ClassName(ptype);
+  if (pvalue) {
+    PyObject* str = PyObject_Str(pvalue);
+    if (str) {
+#if PY_MAJOR_VERSION < 3
+      strings::StrAppend(&err, ": ", PyString_AS_STRING(str));
+#else
+      strings::StrAppend(&err, ": ", PyUnicode_AsUTF8(str));
+#endif
+      Py_DECREF(str);
+    }
+    Py_DECREF(pvalue);
+  }
+  Py_DECREF(ptype);
+  Py_XDECREF(ptraceback);
+  return err;
+}
+
+}  // end namespace tensorflow
diff --git a/tensorflow/python/lib/core/py_util.h b/tensorflow/python/lib/core/py_util.h
new file mode 100644
index 0000000000000000000000000000000000000000..44dfe7ba21285d06667a8d0f6ab8ac0ec8f2aa00
--- /dev/null
+++ b/tensorflow/python/lib/core/py_util.h
@@ -0,0 +1,27 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_PYTHON_LIB_CORE_UTIL_H_
+#define TENSORFLOW_PYTHON_LIB_CORE_UTIL_H_
+
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+// Fetch the exception message as a string. An exception must be set
+// (PyErr_Occurred() must be true).
+string PyExceptionFetch();
+}  // end namespace tensorflow
+
+#endif  // TENSORFLOW_PYTHON_LIB_CORE_UTIL_H_
diff --git a/tensorflow/python/lib/core/safe_ptr.cc b/tensorflow/python/lib/core/safe_ptr.cc
index 456ea3348baa634075082fedde9dac175e237997..ce34b6d0041878c4122d36ab8bf9db6c17253680 100644
--- a/tensorflow/python/lib/core/safe_ptr.cc
+++ b/tensorflow/python/lib/core/safe_ptr.cc
@@ -16,25 +16,21 @@ limitations under the License.
 #include "tensorflow/python/lib/core/safe_ptr.h"
 
 namespace tensorflow {
-namespace {
 
-inline void Py_DECREF_wrapper(PyObject* o) { Py_DECREF(o); }
-
-}  // namespace
-
-Safe_PyObjectPtr make_safe(PyObject* o) {
-  return Safe_PyObjectPtr(o, Py_DECREF_wrapper);
+Safe_PyObjectPtr make_safe(PyObject* object) {
+  return Safe_PyObjectPtr(object);
 }
 
 Safe_TF_TensorPtr make_safe(TF_Tensor* tensor) {
-  return Safe_TF_TensorPtr(tensor, TF_DeleteTensor);
+  return Safe_TF_TensorPtr(tensor);
 }
 
 Safe_TFE_TensorHandlePtr make_safe(TFE_TensorHandle* handle) {
-  return Safe_TFE_TensorHandlePtr(handle, TFE_DeleteTensorHandle);
+  return Safe_TFE_TensorHandlePtr(handle);
 }
 
 Safe_TF_StatusPtr make_safe(TF_Status* status) {
-  return Safe_TF_StatusPtr(status, TF_DeleteStatus);
+  return Safe_TF_StatusPtr(status);
 }
+
 }  // namespace tensorflow
diff --git a/tensorflow/python/lib/core/safe_ptr.h b/tensorflow/python/lib/core/safe_ptr.h
index 70cd2fdf6ccf4d722892f06e1e3aa40919b63ac7..80db840aebcc7ca341b0f6c40fdaee2136d21aaa 100644
--- a/tensorflow/python/lib/core/safe_ptr.h
+++ b/tensorflow/python/lib/core/safe_ptr.h
@@ -17,39 +17,51 @@ limitations under the License.
 #define THIRD_PARTY_TENSORFLOW_PYTHON_LIB_CORE_SAFE_PTR_H_
 
 #include <memory>
-#include <Python.h>
 
+#include <Python.h>
 #include "tensorflow/c/c_api.h"
 #include "tensorflow/c/eager/c_api.h"
 
 namespace tensorflow {
+namespace detail {
+
+struct PyDecrefDeleter {
+  void operator()(PyObject* p) const { Py_DECREF(p); }
+};
+
+struct TFTensorDeleter {
+  void operator()(TF_Tensor* p) const { TF_DeleteTensor(p); }
+};
+
+struct TFETensorHandleDeleter {
+  void operator()(TFE_TensorHandle* p) const { TFE_DeleteTensorHandle(p); }
+};
+
+struct TFStatusDeleter {
+  void operator()(TF_Status* p) const { TF_DeleteStatus(p); }
+};
+
+}  // namespace detail
 
 // Safe container for an owned PyObject. On destruction, the reference count of
 // the contained object will be decremented.
-typedef void (*Py_DECREF_wrapper_type)(PyObject*);
-typedef std::unique_ptr<PyObject, Py_DECREF_wrapper_type> Safe_PyObjectPtr;
+using Safe_PyObjectPtr = std::unique_ptr<PyObject, detail::PyDecrefDeleter>;
 Safe_PyObjectPtr make_safe(PyObject* o);
 
 // Safe containers for an owned TF_Tensor. On destruction, the tensor will be
 // deleted by TF_DeleteTensor.
-// Note: can't use decltype(&TF_DeleteTensor) due to SWIG
-typedef void (*TF_DeleteTensor_type)(TF_Tensor*);
-typedef std::unique_ptr<TF_Tensor, TF_DeleteTensor_type> Safe_TF_TensorPtr;
+using Safe_TF_TensorPtr = std::unique_ptr<TF_Tensor, detail::TFTensorDeleter>;
 Safe_TF_TensorPtr make_safe(TF_Tensor* tensor);
 
 // Safe containers for an owned TFE_TensorHandle. On destruction, the handle
-// will be deleted by TFE_DeleteTensorHandle. Note: can't use
-// decltype(&TFE_DeleteTensorHandle) due to SWIG
-typedef void (*TFE_DeleteTensorHandle_type)(TFE_TensorHandle*);
-typedef std::unique_ptr<TFE_TensorHandle, TFE_DeleteTensorHandle_type>
-    Safe_TFE_TensorHandlePtr;
+// will be deleted by TFE_DeleteTensorHandle.
+using Safe_TFE_TensorHandlePtr =
+    std::unique_ptr<TFE_TensorHandle, detail::TFETensorHandleDeleter>;
 Safe_TFE_TensorHandlePtr make_safe(TFE_TensorHandle* handle);
 
 // Safe containers for an owned TF_Status. On destruction, the handle
-// will be deleted by TF_DeleteStatus. Note: can't use
-// decltype(&TF_DeleteStatus) due to SWIG
-typedef void (*TF_DeleteStatus_type)(TF_Status*);
-typedef std::unique_ptr<TF_Status, TF_DeleteStatus_type> Safe_TF_StatusPtr;
+// will be deleted by TF_DeleteStatus.
+using Safe_TF_StatusPtr = std::unique_ptr<TF_Status, detail::TFStatusDeleter>;
 Safe_TF_StatusPtr make_safe(TF_Status* status);
 
 }  // namespace tensorflow
diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py
index 87f8d1486011683c89095aeb04e2d01461f83749..55cae0bcbfca8a9cacfe525fe3b69c7fb232acd3 100644
--- a/tensorflow/python/ops/array_grad.py
+++ b/tensorflow/python/ops/array_grad.py
@@ -524,6 +524,16 @@ def _TransposeGrad(op, grad):
   return [array_ops.transpose(grad, array_ops.invert_permutation(p)), None]
 
 
+@ops.RegisterGradient("ConjugateTranspose")
+def _ConjugateTransposeGrad(op, grad):
+  """Returns conj(unshuffle(grad))."""
+  p = op.inputs[1]
+  return [
+      array_ops.transpose(
+          grad, array_ops.invert_permutation(p), conjugate=True), None
+  ]
+
+
 ops.NotDifferentiable("Shape")
 
 
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 38eff54c692acd4cb9e2a75caa2c9e9cc23045be..78b4a7101cd25844419d25f78ee97edddae03c3b 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -70,6 +70,7 @@ See the @{$python/array_ops} guide.
 @@quantize_v2
 @@quantized_concat
 @@setdiff1d
+@@guarantee_const
 @@fake_quant_with_min_max_args
 @@fake_quant_with_min_max_args_gradient
 @@fake_quant_with_min_max_vars
@@ -125,11 +126,8 @@ def identity(input, name=None):  # pylint: disable=redefined-builtin
   if context.in_graph_mode():
     return gen_array_ops.identity(input, name=name)
   else:
-    try:
-      in_device = input.device
-    except AttributeError:
-      input = ops.convert_to_tensor(input)
-      in_device = input.device
+    input = ops.convert_to_tensor(input)
+    in_device = input.device
     # TODO(ashankar): Does 'identity' need to invoke execution callbacks?
     if context.context().device_name != in_device:
       return input._copy()  # pylint: disable=protected-access
@@ -451,18 +449,21 @@ def _slice_helper(tensor, slice_spec, var=None):
   This operation extracts the specified region from the tensor.
   The notation is similar to NumPy with the restriction that
   currently only support basic indexing. That means that
-  using a tensor as input is not currently allowed
+  using a non-scalar tensor as input is not currently allowed.
 
   Some useful examples:
 
   ```python
   # strip leading and trailing 2 elements
   foo = tf.constant([1,2,3,4,5,6])
-  print(foo[2:-2].eval())  # [3,4]
+  print(foo[2:-2].eval())  # => [3,4]
 
   # skip every row and reverse every column
   foo = tf.constant([[1,2,3], [4,5,6], [7,8,9]])
-  print(foo[::2,::-1].eval())  # [[3,2,1], [9,8,7]]
+  print(foo[::2,::-1].eval())  # => [[3,2,1], [9,8,7]]
+
+  # Use scalar tensors as indices on both dimensions
+  print(foo[tf.constant(0), tf.constant(2)].eval())  # => 3
 
   # Insert another dimension
   foo = tf.constant([[1,2,3], [4,5,6], [7,8,9]])
@@ -473,9 +474,9 @@ def _slice_helper(tensor, slice_spec, var=None):
 
   # Ellipses (3 equivalent operations)
   foo = tf.constant([[1,2,3], [4,5,6], [7,8,9]])
-  print(foo[tf.newaxis, :, :].eval())  # [[[1,2,3], [4,5,6], [7,8,9]]]
-  print(foo[tf.newaxis, ...].eval())  # [[[1,2,3], [4,5,6], [7,8,9]]]
-  print(foo[tf.newaxis].eval())  # [[[1,2,3], [4,5,6], [7,8,9]]]
+  print(foo[tf.newaxis, :, :].eval())  # => [[[1,2,3], [4,5,6], [7,8,9]]]
+  print(foo[tf.newaxis, ...].eval())  # => [[[1,2,3], [4,5,6], [7,8,9]]]
+  print(foo[tf.newaxis].eval())  # => [[[1,2,3], [4,5,6], [7,8,9]]]
   ```
 
   Notes:
@@ -1092,6 +1093,27 @@ def concat(values, axis, name="concat"):
   tf.shape(tf.concat([t3, t4], 0))  # [4, 3]
   tf.shape(tf.concat([t3, t4], 1))  # [2, 6]
   ```
+  As in Python, the `axis` could also be negative numbers. Negative `axis`
+  are interpreted as counting from the end of the rank, i.e.,
+   `axis + rank(values)`-th dimension.
+
+  For example:
+
+  ```python
+  t1 = [[[1, 2], [2, 3]], [[4, 4], [5, 3]]]
+  t2 = [[[7, 4], [8, 4]], [[2, 10], [15, 11]]]
+  tf.concat([t1, t2], -1)
+  ```
+
+  would produce:
+
+  ```python
+  [[[ 1,  2,  7,  4],
+    [ 2,  3,  8,  4]],
+
+   [[ 4,  4,  2, 10],
+    [ 5,  3, 15, 11]]]
+  ```
 
   Note: If you are concatenating along a new axis consider using stack.
   E.g.
@@ -1109,7 +1131,10 @@ def concat(values, axis, name="concat"):
   Args:
     values: A list of `Tensor` objects or a single `Tensor`.
     axis: 0-D `int32` `Tensor`.  Dimension along which to concatenate. Must be
-      in the range `[-rank(values), rank(values))`.
+      in the range `[-rank(values), rank(values))`. As in Python, indexing
+      for axis is 0-based. Positive axis in the rage of
+      `[0, rank(values))` refers to `axis`-th dimension. And negative axis
+      refers to `axis + rank(values)`-th dimension.
     name: A name for the operation (optional).
 
   Returns:
@@ -1254,6 +1279,17 @@ def sparse_mask(a, mask_indices, name=None):
     return ops.IndexedSlices(out_values, out_indices, a.dense_shape)
 
 
+def unique(x, out_idx=dtypes.int32, name=None):
+  # TODO(yongtang): switch to v2 once API deprecation
+  # period (3 weeks) pass.
+  # TODO(yongtang): The documentation should also
+  # be updated when switch  to v2.
+  return gen_array_ops._unique(x, out_idx, name)
+
+
+unique.__doc__ = gen_array_ops._unique.__doc__
+
+
 def split(value, num_or_size_splits, axis=0, num=None, name="split"):
   """Splits a tensor into sub tensors.
 
@@ -1305,7 +1341,7 @@ def split(value, num_or_size_splits, axis=0, num=None, name="split"):
   size_splits = ops.convert_to_tensor(num_or_size_splits)
   if size_splits._rank() == 0 and size_splits.dtype.is_integer:
     return gen_array_ops._split(
-        split_dim=axis, num_split=num_or_size_splits, value=value, name=name)
+        axis=axis, num_split=num_or_size_splits, value=value, name=name)
 
   if num is None:
     num = size_splits._shape_tuple()[0]
@@ -1315,7 +1351,7 @@ def split(value, num_or_size_splits, axis=0, num=None, name="split"):
   return gen_array_ops._split_v(
       value=value,
       size_splits=size_splits,
-      split_dim=axis,
+      axis=axis,
       num_split=num,
       name=name)
 
@@ -1496,20 +1532,17 @@ def zeros(shape, dtype=dtypes.float32, name=None):
       zero = ""
     else:
       zero = 0
-    # Checking for boolean dtype to prevent attempting to run fill on the GPU
-    # which does not have a boolean kernel registered.
-    if context.in_eager_mode() and dtype != dtypes.bool:
-      return fill(shape, constant(zero, dtype=dtype), name=name)
-    try:
-      if isinstance(shape, ops.Tensor):
-        # TODO(apassos) this is required to reproduce the behavior from before
-        # Tensors were iterable. It's a crutch.
-        raise TypeError
-      shape = tensor_shape.as_shape(shape)
-      output = constant(zero, shape=shape, dtype=dtype, name=name)
-    except (TypeError, ValueError):
-      shape = ops.convert_to_tensor(shape, dtype=dtypes.int32, name="shape")
-      output = fill(shape, constant(zero, dtype=dtype), name=name)
+    if not isinstance(shape, ops.Tensor):
+      try:
+        # Go through tensor shapes to get int64-if-needed semantics
+        shape = constant_op._tensor_shape_tensor_conversion_function(
+            tensor_shape.TensorShape(shape))
+      except (TypeError, ValueError):
+        # Happens when shape is a list with tensor elements
+        shape = ops.convert_to_tensor(shape, dtype=dtypes.int32)
+    if not shape._shape_tuple():
+      shape = reshape(shape, [-1])  # Ensure it's a vector
+    output = fill(shape, constant(zero, dtype=dtype), name=name)
   assert output.dtype.base_dtype == dtype
   return output
 
@@ -1627,15 +1660,17 @@ def ones(shape, dtype=dtypes.float32, name=None):
   dtype = dtypes.as_dtype(dtype).base_dtype
   with ops.name_scope(name, "ones", [shape]) as name:
     one = True if dtype == dtypes.bool else 1
-    try:
-      if isinstance(shape, ops.Tensor):
-        raise TypeError(
-            "preserving semantics from before tensors were iterable")
-      shape = tensor_shape.as_shape(shape)
-      output = constant(one, shape=shape, dtype=dtype, name=name)
-    except (TypeError, ValueError):
-      shape = ops.convert_to_tensor(shape, dtype=dtypes.int32, name="shape")
-      output = fill(shape, constant(one, dtype=dtype), name=name)
+    if not isinstance(shape, ops.Tensor):
+      try:
+        # Go through tensor shapes to get int64-if-needed semantics
+        shape = constant_op._tensor_shape_tensor_conversion_function(
+            tensor_shape.TensorShape(shape))
+      except (TypeError, ValueError):
+        # Happens when shape is a list with tensor elements
+        shape = ops.convert_to_tensor(shape, dtype=dtypes.int32)
+    if not shape._shape_tuple():
+      shape = reshape(shape, [-1])  # Ensure it's a vector
+    output = fill(shape, constant(one, dtype=dtype), name=name)
   assert output.dtype.base_dtype == dtype
   return output
 
@@ -2008,7 +2043,7 @@ def edit_distance(hypothesis, truth, normalize=True, name="edit_distance"):
   hypothesis = tf.SparseTensor(
       [[0, 0, 0],
        [1, 0, 0]],
-      ["a", "b"]
+      ["a", "b"],
       (2, 1, 1))
 
   # 'truth' is a tensor of shape `[2, 2]` with variable-length values:
@@ -2020,7 +2055,7 @@ def edit_distance(hypothesis, truth, normalize=True, name="edit_distance"):
       [[0, 1, 0],
        [1, 0, 0],
        [1, 0, 1],
-       [1, 1, 0]]
+       [1, 1, 0]],
       ["a", "b", "c", "a"],
       (2, 2, 2))
 
@@ -2537,9 +2572,9 @@ def where(condition, x=None, y=None, name=None):
     with ops.name_scope(name, "Where", [condition]) as name:
       condition = ops.convert_to_tensor(
           condition, preferred_dtype=dtypes.bool, name="condition")
-      return gen_array_ops.where(input=condition, name=name)
+      return gen_array_ops.where(condition=condition, name=name)
   elif x is not None and y is not None:
-    return gen_math_ops._select(condition=condition, t=x, e=y, name=name)
+    return gen_math_ops._select(condition=condition, x=x, y=y, name=name)
   else:
     raise ValueError("x and y must both be non-None or both be None.")
 
diff --git a/tensorflow/python/ops/bitwise_ops_test.py b/tensorflow/python/ops/bitwise_ops_test.py
index fa1b219b1771dbd8f99939d8f6571d2a8791433e..f9b025b787e4f49e1dcde6c589f66c59d779fcef 100644
--- a/tensorflow/python/ops/bitwise_ops_test.py
+++ b/tensorflow/python/ops/bitwise_ops_test.py
@@ -36,7 +36,7 @@ class BitwiseOpTest(test_util.TensorFlowTestCase):
 
   def testBinaryOps(self):
     dtype_list = [dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64,
-                  dtypes.uint8, dtypes.uint16]
+                  dtypes.uint8, dtypes.uint16, dtypes.uint32, dtypes.uint64]
 
     with self.test_session(use_gpu=True) as sess:
       for dtype in dtype_list:
@@ -135,5 +135,36 @@ class BitwiseOpTest(test_util.TensorFlowTestCase):
                   bitwise_ops.right_shift(lhs, rhs)])
 
 
+  def testShapeInference(self):
+    dtype_list = [dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64,
+                  dtypes.uint8, dtypes.uint16]
+
+    with self.test_session(use_gpu=True) as sess:
+      for dtype in dtype_list:
+        lhs = constant_op.constant([[0], [3], [5]], dtype=dtype)
+        rhs = constant_op.constant([[1, 2, 4]], dtype=dtype)
+
+        and_tensor = bitwise_ops.bitwise_and(lhs, rhs)
+        or_tensor = bitwise_ops.bitwise_or(lhs, rhs)
+        xor_tensor = bitwise_ops.bitwise_xor(lhs, rhs)
+        ls_tensor = bitwise_ops.left_shift(lhs, rhs)
+        rs_tensor = bitwise_ops.right_shift(lhs, rhs)
+
+        and_result, or_result, xor_result, ls_result, rs_result = sess.run(
+            [and_tensor, or_tensor, xor_tensor, ls_tensor, rs_tensor])
+
+        # Compare shape inference with result
+        self.assertAllEqual(and_tensor.get_shape().as_list(), and_result.shape)
+        self.assertAllEqual(and_tensor.get_shape().as_list(), [3, 3])
+        self.assertAllEqual(or_tensor.get_shape().as_list(), or_result.shape)
+        self.assertAllEqual(or_tensor.get_shape().as_list(), [3, 3])
+        self.assertAllEqual(xor_tensor.get_shape().as_list(), xor_result.shape)
+        self.assertAllEqual(xor_tensor.get_shape().as_list(), [3, 3])
+        self.assertAllEqual(ls_tensor.get_shape().as_list(), ls_result.shape)
+        self.assertAllEqual(ls_tensor.get_shape().as_list(), [3, 3])
+        self.assertAllEqual(rs_tensor.get_shape().as_list(), rs_result.shape)
+        self.assertAllEqual(rs_tensor.get_shape().as_list(), [3, 3])
+
+
 if __name__ == "__main__":
   googletest.main()
diff --git a/tensorflow/python/ops/check_ops.py b/tensorflow/python/ops/check_ops.py
index 1377af3eac43a5846353257304ef7e022d3506d4..eb7806ed0b4dc3022671d6b4248dc5924988534b 100644
--- a/tensorflow/python/ops/check_ops.py
+++ b/tensorflow/python/ops/check_ops.py
@@ -23,6 +23,7 @@ See the @{$python/check_ops} guide.
 @@assert_non_positive
 @@assert_equal
 @@assert_none_equal
+@@assert_near
 @@assert_less
 @@assert_less_equal
 @@assert_greater
@@ -70,6 +71,7 @@ __all__ = [
     'assert_non_positive',
     'assert_equal',
     'assert_none_equal',
+    'assert_near',
     'assert_integer',
     'assert_less',
     'assert_less_equal',
@@ -338,8 +340,11 @@ def assert_equal(x, y, data=None, summarize=None, message=None, name=None):
       eq = math_ops.equal(x, y)
       condition = math_ops.reduce_all(eq)
       if not condition:
-        # Prepare a message with first elements of x and y
+        # Prepare a message with first elements of x and y.
         summary_msg = ''
+        # Default to printing 3 elements like control_flow_ops.Assert (used
+        # by graph mode) does.
+        summarize = 3 if summarize is None else summarize
         if summarize:
           # reshape((-1,)) is the fastest way to get a flat array view.
           x_np = x.numpy().reshape((-1,))
@@ -351,15 +356,13 @@ def assert_equal(x, y, data=None, summarize=None, message=None, name=None):
                          (x_sum, x_np[:x_sum],
                           y_sum, y_np[:y_sum]))
 
-        # Get the values that actually differed and their indices
+        # Get the values that actually differed and their indices.
         mask = math_ops.logical_not(eq)
         indices = array_ops.where(mask)
         indices_np = indices.numpy()
         x_vals = array_ops.boolean_mask(x, mask)
         y_vals = array_ops.boolean_mask(y, mask)
-        diff_to_print = 0
-        if summarize:
-          diff_to_print = min(summarize, indices_np.size)
+        summarize = min(summarize, indices_np.shape[0])
 
         raise errors.InvalidArgumentError(
             node_def=None, op=None,
@@ -370,9 +373,9 @@ def assert_equal(x, y, data=None, summarize=None, message=None, name=None):
                      '%s'
                      %
                      (message or '',
-                      diff_to_print, indices_np[:diff_to_print],
-                      x_vals.numpy().reshape((-1,))[:diff_to_print],
-                      y_vals.numpy().reshape((-1,))[:diff_to_print],
+                      summarize, indices_np[:summarize],
+                      x_vals.numpy().reshape((-1,))[:summarize],
+                      y_vals.numpy().reshape((-1,))[:summarize],
                       summary_msg)))
       return
 
@@ -442,6 +445,83 @@ def assert_none_equal(
     return control_flow_ops.Assert(condition, data, summarize=summarize)
 
 
+def assert_near(
+    x, y, rtol=None, atol=None, data=None, summarize=None, message=None,
+    name=None):
+  """Assert the condition `x` and `y` are close element-wise.
+
+  Example of adding a dependency to an operation:
+
+  ```python
+  with tf.control_dependencies([tf.assert_near(x, y)]):
+    output = tf.reduce_sum(x)
+  ```
+
+  This condition holds if for every pair of (possibly broadcast) elements
+  `x[i]`, `y[i]`, we have
+
+  ```tf.abs(x[i] - y[i]) <= atol + rtol * tf.abs(y[i])```.
+
+  If both `x` and `y` are empty, this is trivially satisfied.
+
+  The default `atol` and `rtol` is `10 * eps`, where `eps` is the smallest
+  representable positive number such that `1 + eps != eps`.  This is about
+  `1.2e-6` in `32bit`, `2.22e-15` in `64bit`, and `0.00977` in `16bit`.
+  See `numpy.finfo`.
+
+  Args:
+    x:  Float or complex `Tensor`.
+    y:  Float or complex `Tensor`, same `dtype` as, and broadcastable to, `x`.
+    rtol:  `Tensor`.  Same `dtype` as, and broadcastable to, `x`.
+      The relative tolerance.  Default is `10 * eps`.
+    atol:  `Tensor`.  Same `dtype` as, and broadcastable to, `x`.
+      The absolute tolerance.  Default is `10 * eps`.
+    data:  The tensors to print out if the condition is False.  Defaults to
+      error message and first few entries of `x`, `y`.
+    summarize: Print this many entries of each tensor.
+    message: A string to prefix to the default message.
+    name: A name for this operation (optional).  Defaults to "assert_near".
+
+  Returns:
+    Op that raises `InvalidArgumentError` if `x` and `y` are not close enough.
+
+  @compatibility(numpy)
+  Similar to `numpy.assert_allclose`, except tolerance depends on data type.
+  This is due to the fact that `TensorFlow` is often used with `32bit`, `64bit`,
+  and even `16bit` data.
+  @end_compatibility
+  """
+  message = message or ''
+  with ops.name_scope(name, 'assert_near', [x, y, rtol, atol, data]):
+    x = ops.convert_to_tensor(x, name='x')
+    y = ops.convert_to_tensor(y, name='y', dtype=x.dtype)
+
+    eps = np.finfo(x.dtype.as_numpy_dtype).eps
+    rtol = 10 * eps if rtol is None else rtol
+    atol = 10 * eps if atol is None else atol
+
+    rtol = ops.convert_to_tensor(rtol, name='rtol', dtype=x.dtype)
+    atol = ops.convert_to_tensor(atol, name='atol', dtype=x.dtype)
+
+    if context.in_eager_mode():
+      x_name = _shape_and_dtype_str(x)
+      y_name = _shape_and_dtype_str(y)
+    else:
+      x_name = x.name
+      y_name = y.name
+
+    if data is None:
+      data = [
+          message,
+          'x and y not equal to tolerance rtol = %s, atol = %s' % (rtol, atol),
+          'x (%s) = ' % x_name, x, 'y (%s) = ' % y_name, y
+      ]
+    tol = atol + rtol * math_ops.abs(y)
+    diff = math_ops.abs(x - y)
+    condition = math_ops.reduce_all(math_ops.less(diff, tol))
+    return control_flow_ops.Assert(condition, data, summarize=summarize)
+
+
 def assert_less(x, y, data=None, summarize=None, message=None, name=None):
   """Assert the condition `x < y` holds element-wise.
 
diff --git a/tensorflow/python/ops/control_flow_grad.py b/tensorflow/python/ops/control_flow_grad.py
index 22dc6771ec0690fe807b34b3dea6295edf7dbbf0..97b57177b29986a006df992f4c0c2b79e11467aa 100644
--- a/tensorflow/python/ops/control_flow_grad.py
+++ b/tensorflow/python/ops/control_flow_grad.py
@@ -23,6 +23,7 @@ from six.moves import xrange  # pylint: disable=redefined-builtin
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import math_ops
 # go/tf-wildcard-import
 # pylint: disable=wildcard-import,undefined-variable
@@ -52,7 +53,8 @@ def _SwitchGrad(op, *grad):
       # TODO(yuanbyu): Perform shape inference with this new input.
       if grad[1] is not None:
         # pylint: disable=protected-access
-        control_flow_ops._AddNextAndBackEdge(merge_grad, grad[1])
+        control_flow_ops._AddNextAndBackEdge(merge_grad, grad[1],
+                                             enforce_shape_invariant=False)
         # pylint: enable=protected-access
       return None, None
     elif grad[0] is not None:
@@ -91,7 +93,7 @@ def _MergeGrad(op, grad, _):
   input_op = op.inputs[0].op
   graph = ops.get_default_graph()
   # pylint: disable=protected-access
-  op_ctxt = control_flow_ops._GetOutputContext(input_op)
+  op_ctxt = control_flow_util.GetOutputContext(input_op)
   grad_ctxt = graph._get_control_flow_context()
   # pylint: enable=protected-access
   if isinstance(op_ctxt, WhileContext):
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index 38c959df8ded422a9313a8b44fc646e1e98b3108..86941a7f2ae7d6ba8622d5c4ceafdb9a689eaca0 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -52,6 +52,7 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+import functools
 
 import six
 from six.moves import xrange  # pylint: disable=redefined-builtin
@@ -66,6 +67,7 @@ from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_util as util
 from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import gen_control_flow_ops
 from tensorflow.python.ops import gen_data_flow_ops
@@ -505,29 +507,6 @@ def _convert_flows_to_tensorarrays(tensors_or_tensorarrays, tensors_or_flows):
       for (ta, t_or_flow) in zip(tensors_or_tensorarrays, tensors_or_flows)]
 
 
-def _IsLoopConstantEnter(op):
-  """Return true iff op is a loop invariant."""
-  is_enter = (op.type == "Enter" or op.type == "RefEnter")
-  return is_enter and op.get_attr("is_constant")
-
-
-def _GetLoopConstantEnter(value):
-  """Return the enter op if we can infer `value` to be a loop invariant."""
-  id_ops = {"Switch", "RefSwitch", "Identity", "RefIdentity"}
-  op = value.op
-  while op.type in id_ops:
-    op = op.inputs[0].op
-  return op if _IsLoopConstantEnter(op) else None
-
-
-def _GetOutputContext(op):
-  """Return the control flow context for the output of an op."""
-  ctxt = op._get_control_flow_context()
-  if IsLoopExit(op):
-    ctxt = ctxt.outer_context
-  return ctxt
-
-
 def _ShapeLessThanOrEqual(shape1, shape2):
   if shape2.dims is None:
     return True
@@ -612,6 +591,8 @@ def _EnforceShapeInvariant(merge_var, next_var):
     m_shape = merge_var.get_shape()
     n_shape = next_var.get_shape()
     if not _ShapeLessThanOrEqual(n_shape, m_shape):
+      # TODO(skyewm): get original loop input that caused the shape error and
+      # report its name instead of the merge node's.
       raise ValueError(
           "The shape for %s is not an invariant for the loop. It enters "
           "the loop with shape %s, but has shape %s after one iteration. "
@@ -663,11 +644,17 @@ def _EnforceShapeInvariant(merge_var, next_var):
              n_values_shape, n_indices_shape, n_shape_shape))
 
 
-def _AddNextAndBackEdge(m, v):
+def _AddNextAndBackEdge(m, v, enforce_shape_invariant=True):
   """Add NextIteration and back edge from v to m."""
   if isinstance(m, ops.Tensor):
     v = ops.convert_to_tensor(v)
     v = _NextIteration(v)
+    if enforce_shape_invariant:
+      # Make sure the shapes of loop outputs are correct. We do this before
+      # calling _update_input, which will raise a less-helpful error message if
+      # the types don't match.
+      # TODO(skyewm): call this for other cases below (needs testing)
+      _EnforceShapeInvariant(m, v)
     m.op._update_input(1, v)   # pylint: disable=protected-access
   elif isinstance(m, ops.IndexedSlices):
     # pylint: disable=protected-access
@@ -694,6 +681,78 @@ def _AddNextAndBackEdge(m, v):
   return v
 
 
+def GetMaxSizeFromNestedMaximumIterations(value, while_ctxt):
+  """Calculate a max_size for use by stack ops inside an XLA while_loop.
+
+  Args:
+    value: The value inside the while_loop forward context.  Used for printing
+      error messages.
+    while_ctxt: The forward context inside which value resides.  This does
+      not always match the value's immediate context, as `value` may be
+      inside e.g. a cond context inside the while_loop.
+
+  Returns:
+    A tensor containing the `max_size` to feed to a Stack initializer.
+
+  Raises:
+    ValueError: If `value` is nested inside a `while_loop` that either
+      lacks a `maximum_iterations` parameter, or the `maximum_iterations`
+      parameter:
+
+        - is inside a `while_loop` that is a parent of the calling context, and
+        - cannot be evaluated at graph build time to a constant.
+  """
+  value_name = value.name
+  # curr_ctxt is the context that tf.gradients was called in.
+  curr_ctxt = ops.get_default_graph()._get_control_flow_context()  # pylint: disable=protected-access
+
+  curr_ctxt_name = curr_ctxt.name if curr_ctxt is not None else ""
+  max_size = constant_op.constant(1)
+
+  # Loop through all containing while contexts between value and the
+  # current context, multiplying together each context's
+  # max_iterations to get the maximum stack size.
+  while while_ctxt not in (None, curr_ctxt):
+    max_iter = while_ctxt.maximum_iterations
+    if max_iter is None:
+      raise ValueError(
+          "Cannot create a gradient accumulator for tensor '%s' inside "
+          "XLA while_loop because maximum_iterations was not passed to "
+          "the tf.while_loop call ('%s')."
+          % (value_name, while_ctxt.name))
+
+    # pylint: disable=protected-access
+    max_iter_ctxt = max_iter.op._get_control_flow_context()
+    # pylint: enable=protected-access
+
+    # If max_iter_ctxt (non-strictly) contains curr_ctxt, then it's OK to use.
+    if util.IsContainingContext(curr_ctxt, max_iter_ctxt):
+      max_size *= max_iter
+    else:
+      # We cannot use max_iter because it's defined in a nested while
+      # or cond context, so will fail if we try to use it as input to
+      # any ops in curr_ctxt (e.g. max_size or the final accumulator
+      # stack). Attempt to get a constant value out to use instead.
+      const_max_iter = tensor_util.constant_value(max_iter)
+      if const_max_iter is None:
+        raise ValueError(
+            "Cannot create a gradient accumulator for tensor '%s' inside XLA "
+            "while_loop. maximum_iterations tensor '%s' for while_loop context "
+            "'%s' must be statically known (e.g. a constant value or known "
+            "shape dimension), or be defined at or outside the while loop "
+            "context '%s' (currently defined in '%s')." % (
+                value_name, max_iter.name, while_ctxt.name,
+                curr_ctxt_name, max_iter_ctxt.name))
+      max_size *= const_max_iter
+
+    # Find the next outer WhileContext (or stop if we reach the
+    # tf.gradient's context).
+    while_ctxt = util.GetContainingWhileContext(
+        while_ctxt.outer_context, stop_ctxt=curr_ctxt)
+
+  return max_size
+
+
 class GradLoopState(object):
   """The state used for constructing the gradient graph for a while loop.
 
@@ -762,22 +821,26 @@ class GradLoopState(object):
 
       outer_grad_ctxt = outer_grad_state.grad_context
       outer_grad_ctxt.Enter()
-      self._grad_context = WhileContext(forward_ctxt.parallel_iterations,
-                                        forward_ctxt.back_prop,
-                                        forward_ctxt.swap_memory,
-                                        forward_ctxt.name,
-                                        self)
+      self._grad_context = WhileContext(
+          maximum_iterations=forward_ctxt.maximum_iterations,
+          parallel_iterations=forward_ctxt.parallel_iterations,
+          back_prop=forward_ctxt.back_prop,
+          swap_memory=forward_ctxt.swap_memory,
+          name=forward_ctxt.name,
+          grad_state=self)
       real_cnt = outer_grad_state.AddBackpropAccumulatedValue(history_cnt, cnt)
       self._grad_index = self._grad_context.AddBackpropLoopCounter(
           real_cnt, outer_grad_state)
       outer_grad_ctxt.Exit()
     else:
       if outer_forward_ctxt: outer_forward_ctxt.Enter()
-      self._grad_context = WhileContext(forward_ctxt.parallel_iterations,
-                                        forward_ctxt.back_prop,
-                                        forward_ctxt.swap_memory,
-                                        forward_ctxt.name,
-                                        self)
+      self._grad_context = WhileContext(
+          maximum_iterations=forward_ctxt.maximum_iterations,
+          parallel_iterations=forward_ctxt.parallel_iterations,
+          back_prop=forward_ctxt.back_prop,
+          swap_memory=forward_ctxt.swap_memory,
+          name=forward_ctxt.name,
+          grad_state=self)
       self._grad_index = self._grad_context.AddBackpropLoopCounter(
           cnt, outer_grad_state)
       if outer_forward_ctxt: outer_forward_ctxt.Exit()
@@ -902,14 +965,26 @@ class GradLoopState(object):
 
     Raises:
       TypeError: For internal errors involving the value condition context.
+      ValueError: If `value` is inside a XLA scope and a valid max size
+        for the stack can't be found.
     """
-    curr_ctxt = ops.get_default_graph()._get_control_flow_context()
+    # curr_ctxt is the context that tf.gradients was called in.
+    curr_ctxt = ops.get_default_graph()._get_control_flow_context()  # pylint: disable=protected-access
     with ops.control_dependencies(None):
       if curr_ctxt: curr_ctxt.Enter()
       with ops.colocate_with(value):
+        # We only need to pass maximum_iterations to the stack if
+        # we're inside an XLA context.
+        if not util.IsInXLAContext(value.op):
+          max_size = constant_op.constant(-1, dtypes.int32)
+        else:
+          max_size = GetMaxSizeFromNestedMaximumIterations(
+              value, self.forward_context)
         # pylint: disable=protected-access
-        acc = gen_data_flow_ops._stack_v2(-1, value.dtype.base_dtype,
-                                          name="f_acc")
+        acc = gen_data_flow_ops._stack_v2(
+            max_size=max_size,
+            elem_type=value.dtype.base_dtype,
+            name="f_acc")
         # pylint: enable=protected-access
       if curr_ctxt: curr_ctxt.Exit()
 
@@ -918,7 +993,7 @@ class GradLoopState(object):
 
       # Add the stack_push op in the context of value.op.
       swap_enabled = self.forward_context.swap_memory
-      value_ctxt = _GetOutputContext(value.op)
+      value_ctxt = util.GetOutputContext(value.op)
       if value_ctxt == self.forward_context:
         # value is not nested in the forward context.
         self.forward_context.Enter()
@@ -1028,7 +1103,7 @@ class GradLoopState(object):
       cur_value = value
       cur_grad_state = self
       while True:
-        enter_op = _GetLoopConstantEnter(cur_value)
+        enter_op = util.GetLoopConstantEnter(cur_value)
         if enter_op:
           # Special case: cur_value comes from a constant Enter node.
           cur_value = enter_op.inputs[0]
@@ -1081,7 +1156,7 @@ class ControlFlowState(object):
 
   def GetGradState(self, op, before):
     """Return the grad state for this op if it's in a forward loop context."""
-    if before and IsLoopExit(op):
+    if before and util.IsLoopExit(op):
       forward_ctxt = op._get_control_flow_context()
       forward_ctxt = forward_ctxt.outer_context
       if forward_ctxt:
@@ -1241,8 +1316,8 @@ class ControlFlowState(object):
     Returns:
       A zero tensor of the same shape of op.outputs[index].
     """
-    if IsLoopSwitch(op): return None
-    dead_branch = IsSwitch(op)
+    if util.IsLoopSwitch(op): return None
+    dead_branch = util.IsSwitch(op)
     forward_ctxt = _GetWhileContext(op)
     grad_state = self._map.get(forward_ctxt)
     if grad_state is None:
@@ -1342,7 +1417,7 @@ def MaybeCreateControlFlowState(between_op_list, between_ops,
   """
   loop_state = None
   for op in between_op_list:
-    if IsLoopExit(op):
+    if util.IsLoopExit(op):
       if loop_state is None:
         loop_state = ControlFlowState()
       if colocate_gradients_with_ops:
@@ -1353,28 +1428,10 @@ def MaybeCreateControlFlowState(between_op_list, between_ops,
   return loop_state
 
 
-def IsSwitch(op):
-  """Return true if `op` is a Switch."""
-  return op.type == "Switch" or op.type == "RefSwitch"
-
-
-def IsLoopExit(op):
-  """Return true if `op` is an Exit."""
-  return op.type == "Exit" or op.type == "RefExit"
-
-
-def IsLoopSwitch(op):
-  """Return true if `op` is the Switch for a while loop."""
-  if IsSwitch(op):
-    ctxt = op._get_control_flow_context()
-    return ctxt and isinstance(ctxt, WhileContext)
-  return False
-
-
 def ZerosLikeOutsideLoop(op, index):
   """Create zeros_like for the specified output of an op."""
   val = op.outputs[index]
-  if not IsSwitch(op):
+  if not util.IsSwitch(op):
     return array_ops.zeros_like(val, optimize=False)
   else:
     op_ctxt = op._get_control_flow_context()
@@ -1445,6 +1502,10 @@ class ControlFlowContext(object):
       g.as_graph_element(op)._set_control_flow_context(self)
       # pylint: enable=protected-access
 
+  @property
+  def name(self):
+    return self._name
+
   @property
   def outer_context(self):
     """Return the context containing this context."""
@@ -1511,7 +1572,7 @@ class ControlFlowContext(object):
     return None
 
   def _IsInOuterContext(self, op):
-    op_ctxt = _GetOutputContext(op)
+    op_ctxt = util.GetOutputContext(op)
     outer_ctxt = self.outer_context
     while outer_ctxt != op_ctxt:
       if outer_ctxt is None:
@@ -1529,11 +1590,11 @@ class ControlFlowContext(object):
     else:
       internal_control_inputs = []
       for x in op.control_inputs:
-        ctxt = _GetOutputContext(x)
+        ctxt = util.GetOutputContext(x)
         if ctxt is not None and ctxt.GetWhileContext() == while_ctxt:
           internal_control_inputs.append(x)
     if len(internal_control_inputs) != len(op.control_inputs):
-      del op.control_inputs[:]
+      op._remove_all_control_inputs()
       op._add_control_inputs(internal_control_inputs)
     return internal_control_inputs
   # pylint: enable=protected-access
@@ -1547,6 +1608,18 @@ class ControlFlowContext(object):
     """Returns the pivot node for this context, or None."""
     return None
 
+  def IsWhileContext(self):
+    return False
+
+  def IsCondContext(self):
+    return False
+
+  def IsXLAContext(self):
+    return False
+
+  def __str__(self):
+    return self.name
+
 
 class CondContext(ControlFlowContext):
   """The context for the conditional construct."""
@@ -1600,10 +1673,6 @@ class CondContext(ControlFlowContext):
     super(CondContext, self).__init__(values_def=context_def.values_def,
                                       import_scope=import_scope)
 
-  @property
-  def name(self):
-    return self._name
-
   @property
   def pred(self):
     return self._pred
@@ -1720,7 +1789,7 @@ class CondContext(ControlFlowContext):
         op._add_control_input(self._pivot.op)
       # pylint: enable=protected-access
 
-    if self._outer_context or not IsLoopExit(op):
+    if self._outer_context or not util.IsLoopExit(op):
       op.graph.prevent_fetching(op)
 
     if self._outer_context:
@@ -1785,6 +1854,9 @@ class CondContext(ControlFlowContext):
       result = [result]
     return original_result, result
 
+  def IsCondContext(self):
+    return True
+
 
 def _UnpackIfSingleton(res):
   if isinstance(res, (list, _basetuple)) and len(res) == 1:
@@ -1793,6 +1865,7 @@ def _UnpackIfSingleton(res):
     return res
 
 
+# pylint: disable=redefined-outer-name
 # pylint: disable=g-doc-args
 @deprecation.deprecated_args(
     None,
@@ -1969,6 +2042,7 @@ def cond(pred, true_fn=None, false_fn=None, strict=False, name=None,
       merges = _UnpackIfSingleton(merges)
     return merges
 # pylint: enable=g-doc-args
+# pylint: enable=redefined-outer-name
 
 
 def _resource_safe_shape(t):
@@ -1986,12 +2060,19 @@ def _resource_safe_shape(t):
 class WhileContext(ControlFlowContext):
   """The context for the loop construct."""
 
-  def __init__(self, parallel_iterations=10, back_prop=True, swap_memory=False,
-               name="while_context", grad_state=None, context_def=None,
+  def __init__(self,
+               maximum_iterations=None,
+               parallel_iterations=10,
+               back_prop=True,
+               swap_memory=False,
+               name="while_context",
+               grad_state=None,
+               context_def=None,
                import_scope=None):
     """"Creates a `WhileContext`.
 
     Args:
+      maximum_iterations: Optional upper bound on number of loop iterations.
       parallel_iterations: The number of iterations allowed to run in parallel.
       back_prop: Whether backprop is enabled for this while loop.
       swap_memory: Whether GPU-CPU memory swap is enabled for this loop.
@@ -2006,16 +2087,17 @@ class WhileContext(ControlFlowContext):
       self._init_from_proto(context_def, import_scope=import_scope)
     else:
       ControlFlowContext.__init__(self)
-      self._init_from_args(parallel_iterations, back_prop, swap_memory,
-                           name)
+      self._init_from_args(maximum_iterations, parallel_iterations, back_prop,
+                           swap_memory, name)
     # The gradient loop state.
     self._grad_state = grad_state
 
-  def _init_from_args(self, parallel_iterations, back_prop, swap_memory,
-                      name):
+  def _init_from_args(self, maximum_iterations, parallel_iterations, back_prop,
+                      swap_memory, name):
     """Creates a new `WhileContext` from arguments.
 
     Args:
+      maximum_iterations: Optional upper bound on number of loop iterations.
       parallel_iterations: The number of iterations allowed to run in parallel.
       back_prop: Whether backprop is enabled for this while loop.
       swap_memory: Whether GPU-CPU memory swap is enabled for this loop.
@@ -2028,6 +2110,7 @@ class WhileContext(ControlFlowContext):
       raise ValueError("`parallel_iterations` must be a positive integer: "
                        "%s" % parallel_iterations)
     self._name = ops.get_default_graph().unique_name(name)
+    self._maximum_iterations = maximum_iterations
     self._parallel_iterations = parallel_iterations
     self._back_prop = back_prop
     self._swap_memory = swap_memory
@@ -2055,6 +2138,12 @@ class WhileContext(ControlFlowContext):
     g = ops.get_default_graph()
     self._name = ops.prepend_name_scope(
         context_def.context_name, import_scope)
+    if context_def.maximum_iterations_name:
+      self._maximum_iterations = g.as_graph_element(
+          ops.prepend_name_scope(context_def.maximum_iterations_name,
+                                 import_scope))
+    else:
+      self._maximum_iterations = None
     self._parallel_iterations = context_def.parallel_iterations
     self._back_prop = context_def.back_prop
     self._swap_memory = context_def.swap_memory
@@ -2079,8 +2168,9 @@ class WhileContext(ControlFlowContext):
                                        import_scope=import_scope)
 
   @property
-  def name(self):
-    return self._name
+  def maximum_iterations(self):
+    """The maximum number of iterations that will be executed."""
+    return self._maximum_iterations
 
   @property
   def parallel_iterations(self):
@@ -2132,6 +2222,9 @@ class WhileContext(ControlFlowContext):
       context_def.context_name = ops.strip_name_scope(
           self.name, export_scope)
       context_def.parallel_iterations = self._parallel_iterations
+      if self._maximum_iterations is not None:
+        context_def.maximum_iterations_name = ops.strip_name_scope(
+            self._maximum_iterations.name, export_scope)
       context_def.back_prop = self._back_prop
       context_def.swap_memory = self._swap_memory
       context_def.pivot_for_pred_name = ops.strip_name_scope(
@@ -2190,7 +2283,7 @@ class WhileContext(ControlFlowContext):
         grad_ctxt = grad_ctxt.GetWhileContext()
         if grad_ctxt.grad_state:
           forward_ctxt = _GetWhileContext(val.op)
-          if IsLoopExit(val.op):
+          if util.IsLoopExit(val.op):
             forward_ctxt = forward_ctxt.outer_context
             if forward_ctxt:
               forward_ctxt = forward_ctxt.GetWhileContext()
@@ -2272,7 +2365,7 @@ class WhileContext(ControlFlowContext):
       self._MaybeAddControlDependency(op)
       for x in op.outputs:
         self._values.add(x.name)
-    if self._outer_context or not IsLoopExit(op):
+    if self._outer_context or not util.IsLoopExit(op):
       op.graph.prevent_fetching(op)
       for x in op.outputs:
         op.graph.prevent_feeding(x)
@@ -2291,7 +2384,7 @@ class WhileContext(ControlFlowContext):
         return True
       # pylint: enable=protected-access
       for x in op.inputs:
-        if not _IsLoopConstantEnter(x.op):
+        if not util.IsLoopConstantEnter(x.op):
           return False
       return True
     if _IsOpFree(op):
@@ -2454,7 +2547,6 @@ class WhileContext(ControlFlowContext):
         zeros_shape = array_ops.shape_internal(value, optimize=False)
         acc = array_ops.zeros(zeros_shape, grad.dtype)
         if self.outer_context: self.outer_context.Exit()
-      acc._shape = grad.get_shape()  # pylint: disable=protected-access
 
     self.Enter()
     self.AddName(acc.name)
@@ -2527,9 +2619,17 @@ class WhileContext(ControlFlowContext):
     if shape_acc is not None:
       self.AddName(shape_acc.name)
       init_acc.append(shape_acc)
+
+    # Set use_input_shape=False since the accumulator tensors will grow in
+    # size. If use_input_shape=True, the _update_input call below will result in
+    # incompatible shapes.
     enter_acc = [_Enter(x, self._name, is_constant=False,
                         parallel_iterations=self._parallel_iterations,
-                        name="b_acc") for x in init_acc]
+                        use_input_shape=False, name="b_acc") for x in init_acc]
+    # Manually set appropriate partial shapes.
+    enter_acc[0].set_shape([None])
+    if values_acc.shape.dims is not None:
+      enter_acc[1].set_shape([None] + values_acc.shape.as_list()[1:])
     self.loop_enters.extend(enter_acc)
 
     merge_acc = [merge([x, x], name="b_acc")[0] for x in enter_acc]
@@ -2607,7 +2707,7 @@ class WhileContext(ControlFlowContext):
 
     if control_pivot is not None:
       for var in enter_vars:
-        if _IsLoopConstantEnter(var.op.inputs[0].op):
+        if util.IsLoopConstantEnter(var.op.inputs[0].op):
           # pylint: disable=protected-access
           var.op._add_control_input(control_pivot.op)
           # pylint: enable=protected-access
@@ -2683,11 +2783,6 @@ class WhileContext(ControlFlowContext):
     exit_vars = [exit(x[0]) for x in switch_vars]
     self._loop_exits = exit_vars
 
-    # Make sure the shapes of loop outputs are correct.
-    for m_var, n_var in zip(merge_vars, next_vars):
-      if isinstance(m_var, ops.Tensor):
-        _EnforceShapeInvariant(m_var, n_var)
-
     # Exit the loop.
     self.ExitResult(exit_vars)
 
@@ -2743,10 +2838,14 @@ class WhileContext(ControlFlowContext):
         graph._record_op_seen_by_control_dependencies(x.op)
     # pylint: enable=protected-access
 
+  def IsWhileContext(self):
+    return True
 
+
+# pylint: disable=redefined-outer-name
 def while_loop(cond, body, loop_vars, shape_invariants=None,
                parallel_iterations=10, back_prop=True, swap_memory=False,
-               name=None):
+               name=None, maximum_iterations=None):
   """Repeat `body` while the condition `cond` is true.
 
   `cond` is a callable returning a boolean scalar tensor. `body` is a callable
@@ -2818,6 +2917,10 @@ def while_loop(cond, body, loop_vars, shape_invariants=None,
     back_prop: Whether backprop is enabled for this while loop.
     swap_memory: Whether GPU-CPU memory swap is enabled for this loop.
     name: Optional name prefix for the returned tensors.
+    maximum_iterations: Optional maximum number of iterations of the while loop
+      to run.  If provided, the `cond` output is AND-ed with an additional
+      condition ensuring the number of iterations executed is no greater than
+      `maximum_iterations`.
 
   Returns:
     The output tensors for the loop variables after the loop. When the length
@@ -2871,18 +2974,53 @@ def while_loop(cond, body, loop_vars, shape_invariants=None,
     if parallel_iterations < 1:
       raise TypeError("parallel_iterations must be a positive integer.")
 
+    if maximum_iterations is not None:
+      maximum_iterations = ops.convert_to_tensor(
+          maximum_iterations, name="maximum_iterations")
+      if maximum_iterations.shape.ndims != 0:
+        raise ValueError("maximum_iterations must be a scalar, saw shape: %s" %
+                         maximum_iterations.shape)
+
+      counter = constant_op.constant(
+          0, dtype=maximum_iterations.dtype, name="iteration_counter")
+      orig_cond = cond
+      orig_body = body
+      if len(loop_vars) == 1:
+        loop_vars = (counter, loop_vars[0])
+        cond = lambda i, lv: (  # pylint: disable=g-long-lambda
+            math_ops.logical_and(i < maximum_iterations, orig_cond(lv)))
+        body = lambda i, lv: (i + 1, orig_body(lv))
+      else:
+        loop_vars = (counter, loop_vars)
+        cond = lambda i, lv: (  # pylint: disable=g-long-lambda
+            math_ops.logical_and(i < maximum_iterations, orig_cond(*lv)))
+        body = lambda i, lv: (i + 1, orig_body(*lv))
+
     if context.in_eager_mode():
       while cond(*loop_vars):
         loop_vars = body(*loop_vars)
-      return loop_vars
+      if maximum_iterations is not None:
+        return loop_vars[1]
+      else:
+        return loop_vars
 
     if shape_invariants is not None:
+      if maximum_iterations is not None:
+        shape_invariants = (tensor_shape.TensorShape([]), shape_invariants)
       nest.assert_same_structure(loop_vars, shape_invariants)
 
-    loop_context = WhileContext(parallel_iterations, back_prop, swap_memory)  # pylint: disable=redefined-outer-name
+    loop_context = WhileContext(
+        maximum_iterations=maximum_iterations,
+        parallel_iterations=parallel_iterations,
+        back_prop=back_prop,
+        swap_memory=swap_memory)
     ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, loop_context)
     result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
-    return result
+    if maximum_iterations is not None:
+      return result[1]
+    else:
+      return result
+# pylint: enable=redefined-outer-name
 
 
 def _AsTensorList(x, p):
@@ -3095,23 +3233,105 @@ def tuple(tensors, name=None, control_inputs=None):
     return tpl
 
 
-def _assert_exclusive(preds):
-  """Returns an Assert op that checks that the predicates are exclusive."""
-  preds_c = array_ops.stack(preds, name="preds_c")
+def _assert_at_most_n_true(predicates, n, msg):
+  """Returns an Assert op that checks that at most n predicates are True.
+
+  Args:
+    predicates: list of bool scalar tensors.
+    n: maximum number of true predicates allowed.
+    msg: Error message.
+  """
+  preds_c = array_ops.stack(predicates, name="preds_c")
   num_true_conditions = math_ops.reduce_sum(
       math_ops.cast(preds_c, dtypes.int32), name="num_true_conds")
-  at_most_one_true_condition = math_ops.less(
-      num_true_conditions, constant_op.constant(2, name="two_true_conds"))
+  condition = math_ops.less_equal(num_true_conditions,
+                                  constant_op.constant(n, name="n_true_conds"))
+  preds_names = ", ".join(getattr(p, "name", "?") for p in predicates)
+  error_msg = [
+      "%s: more than %d conditions (%s) evaluated as True:" %
+      (msg, n, preds_names), preds_c
+  ]
+  return Assert(condition, data=error_msg, summarize=len(predicates))
+
+
+def _case_create_default_action(predicates, actions):
+  """Creates default action for a list of actions and their predicates.
+
+  It uses the input actions to select an arbitrary as default and makes sure
+  that corresponding predicates have valid values.
+
+  Args:
+    predicates: a list of bool scalar tensors
+    actions: a list of callable objects which return tensors.
+
+  Returns:
+    a callable
+  """
+  k = len(predicates) - 1  # could pick any
+  predicate, action = predicates[k], actions[k]
+  other_predicates, other_actions = predicates[:k], actions[:k]
+
+  def default_action():
+    others_msg = ("Implementation error: "
+                  "selected default action #%d was called, but some of other "
+                  "predicates are True: " % k)
+    default_msg = ("Input error: "
+                   "None of conditions evaluated as True:",
+                   array_ops.stack(predicates, name="preds_c"))
+    with ops.control_dependencies([
+        _assert_at_most_n_true(other_predicates, n=0, msg=others_msg),
+        Assert(predicate, data=default_msg)
+    ]):
+      return action()
+
+  return default_action, other_predicates, other_actions
+
+
+def _case_verify_and_canonicalize_args(pred_fn_pairs, exclusive, name):
+  """Verifies input arguments for the case function.
+
+  Args:
+    pred_fn_pairs: Dict or list of pairs of a boolean scalar tensor and a
+                   callable which returns a list of tensors.
+    exclusive: True iff at most one predicate is allowed to evaluate to `True`.
+    name: A name for the case operation.
+
+  Raises:
+    TypeError: If `pred_fn_pairs` is not a list/dictionary.
+    TypeError: If `pred_fn_pairs` is a list but does not contain 2-tuples.
+    TypeError: If `fns[i]` is not callable for any i, or `default` is not
+               callable.
+
+  Returns:
+    a tuple <list of scalar bool tensors, list of callables>.
+  """
+  if not isinstance(pred_fn_pairs, (list, _basetuple, dict)):
+    raise TypeError("fns must be a list, tuple, or dict")
 
-  error_msg = [("More than one condition evaluated as True but "
-                "exclusive=True.  Conditions: (%s), Values:"
-                % ", ".join([p.name for p in preds])),
-               preds_c]
-  return Assert(condition=at_most_one_true_condition, data=error_msg,
-                summarize=len(preds))
+  if isinstance(pred_fn_pairs, collections.OrderedDict):
+    pred_fn_pairs = pred_fn_pairs.items()
+  elif isinstance(pred_fn_pairs, dict):
+    pred_fn_pairs = sorted(pred_fn_pairs.items(), key=lambda item: item[0].name)
+    if not exclusive:
+      logging.warn("%s: An unordered dictionary of predicate/fn pairs was "
+                   "provided, but exclusive=False. The order of conditional "
+                   "tests is deterministic but not guaranteed.", name)
+  for pred_fn_pair in pred_fn_pairs:
+    if not isinstance(pred_fn_pair, _basetuple) or len(pred_fn_pair) != 2:
+      raise TypeError("Each entry in pred_fn_pairs must be a 2-tuple")
+    pred, fn = pred_fn_pair
+    if pred.dtype != dtypes.bool:
+      raise TypeError("pred must be of type bool: %s", pred.name)
+    if not callable(fn):
+      raise TypeError("fn for pred %s must be callable." % pred.name)
+  predicates, actions = zip(*pred_fn_pairs)
+  return predicates, actions
 
 
-def case(pred_fn_pairs, default=None, exclusive=False, strict=False,
+def case(pred_fn_pairs,
+         default=None,
+         exclusive=False,
+         strict=False,
          name="case"):
   """Create a case operation.
 
@@ -3196,152 +3416,44 @@ def case(pred_fn_pairs, default=None, exclusive=False, strict=False,
     TypeError: If `pred_fn_pairs` is a list but does not contain 2-tuples.
     TypeError: If `fns[i]` is not callable for any i, or `default` is not
                callable.
-    ValueError: If in eager mode and all predicates are false and no
-               default is provided.
-    ValueError: If in eager mode and is passed a dictionary.
   """
-  pfp = pred_fn_pairs  # For readability
-  if not (isinstance(pfp, list) or isinstance(pfp, _basetuple)
-          or isinstance(pfp, dict)):
-    raise TypeError("fns must be a list, tuple, or dict")
-  if isinstance(pfp, dict):
-    if context.in_eager_mode():
-      raise ValueError(
-          "In eager mode the predicates must be a list, not a dictionary.")
-    if isinstance(pfp, collections.OrderedDict):
-      pfp = pfp.items()
+  predicates, actions = _case_verify_and_canonicalize_args(
+      pred_fn_pairs, exclusive, name)
+  with ops.name_scope(name, "case", [predicates]):
+    if default is None:
+      default, predicates, actions = _case_create_default_action(
+          predicates, actions)
+    fn = default
+    # To eval conditions in direct order we create nested conditions in reverse:
+    #   cond(c[0], true_fn=.., false_fn=cond(c[1], ...))
+    for predicate, action in reversed(list(zip(predicates, actions))):
+      fn = functools.partial(
+          cond, predicate, true_fn=action, false_fn=fn, strict=strict)
+    if exclusive:
+      with ops.control_dependencies([
+          _assert_at_most_n_true(
+              predicates, n=1, msg="Input error: exclusive=True")
+      ]):
+        return fn()
     else:
-      pfp = sorted(pfp.items(), key=lambda item: item[0].name)
-      if not exclusive:
-        logging.warn("%s: An unordered dictionary of predicate/fn pairs was "
-                     "provided, but exclusive=False. The order of conditional "
-                     "tests is deterministic but not guaranteed.", name)
-  for tup in pfp:
-    if not isinstance(tup, _basetuple) or len(tup) != 2:
-      raise TypeError("Each entry in pred_fn_pairs must be a 2-tuple")
-    pred, fn = tup
-    if pred.dtype != dtypes.bool:
-      raise TypeError("pred must be of type bool: %s", pred.name)
-    if not callable(fn):
-      raise TypeError("fn for pred %s must be callable." % pred.name)
+      return fn()
 
-  if default is not None and not callable(default):
-    raise TypeError("default must be callable.")
 
-  if context.in_eager_mode():
-    for pred, fn in pfp:
-      if pred:
-        return fn()
-    if default is None:
-      raise ValueError("tf.case received all false predicates and no default.")
-    return default()
+class XLAControlFlowContext(ControlFlowContext):
+  """Base class for XLA and TPU control flow contexts."""
 
-  preds, fns = map(list, zip(*pfp))
-  del pfp  # From now on, preds and fns form the source of truth.
+  def __init__(self):
+    super(XLAControlFlowContext, self).__init__()
+    self._name = "XLAControlFlowContext"
 
-  with ops.name_scope(name, "case", [preds]):
-    exclusivity_assert = _assert_exclusive(preds) if exclusive else None
-    # If no default is provided, then we remove one of the (predicate, function)
-    # pairs and define the default to be the removed function with an additional
-    # control dependency that asserts that the removed predicate holds.
-    if default is None:
-      all_preds = _basetuple(preds)  # For the error message.
-      last_pred, last_fn = preds.pop(), fns.pop()
-      def new_default():
-        preds_c = array_ops.stack(all_preds, name="preds_c")
-        error_msg = [
-            ("None of the conditions evaluated as True. Conditions: (%s), "
-             "Values:" % ", ".join([p.name for p in all_preds])),
-            preds_c]
-        assertion = Assert(condition=last_pred,
-                           data=error_msg, summarize=len(all_preds))
-        with ops.control_dependencies([assertion]):
-          return last_fn()
-      default = new_default
-
-    if not preds:
-      return default()
-    not_preds = []
-    for i, p in enumerate(preds):
-      with ops.name_scope("not_%d" % i):
-        not_preds.append(math_ops.logical_not(p))
-    and_not_preds = [constant_op.constant(True, name="always_true")]
-    for i, notp in enumerate(not_preds):
-      with ops.name_scope("and_not_%d" % i):
-        and_not_preds.append(math_ops.logical_and(and_not_preds[-1], notp))
-
-    # preds = [p1, p2, p3]
-    # fns = [f1, f2, f3]
-    # not_preds = [~p1, ~p2, ~p3]
-    # and_not_preds = [True, ~p1, ~p1 & ~p2, ~p1 & ~p2 & ~p3]
-    # case_preds = [p1,
-    #               p2 & ~p1,
-    #               p3 & ~p2 & ~p1,
-    #              ~p3 & ~p2 & ~p1]
-
-    case_preds = []
-    for i, (p, and_not_p_prev) in enumerate(zip(preds, and_not_preds[:-1])):
-      with ops.name_scope("case_%d" % i):
-        case_preds.append(math_ops.logical_and(p, and_not_p_prev))
-    with ops.name_scope("case_none_are_true"):
-      case_preds.append(and_not_preds[-1])
-
-    # Create an empty tensor, or list, with the right type and shape
-    with ops.name_scope("case_create_empty"):
-      def _create_empty_constant(dtype, shape):
-        value = ("" if dtype == dtypes.string else dtype.as_numpy_dtype())
-        if shape.ndims is None:
-          return array_ops.constant(value, dtype=dtype)
-        else:
-          temp_shape = [1 if x.value is None else x.value for x in shape]
-          result = array_ops.constant(value, shape=temp_shape, dtype=dtype)
-          result._shape = shape  # pylint: disable=protected-access
-          return result
-
-      def _correct_empty(v):
-        if isinstance(v, ops.Operation):
-          return no_op()
-        elif isinstance(v, tensor_array_ops.TensorArray):
-          return v
-        elif not hasattr(v, "dtype"):
-          return ops.convert_to_tensor(v)
-        elif isinstance(v, sparse_tensor.SparseTensor):
-          return sparse_tensor.SparseTensor(indices=[[0] * len(v.get_shape())],
-                                            values=[v.dtype.as_numpy_dtype()],
-                                            dense_shape=v.get_shape())
-        else:
-          return _create_empty_constant(v.dtype, v.get_shape())
-
-      empty = lambda: nest.map_structure(_correct_empty, default())
-
-    # case_sequence = [
-    #   cond(~p3 & ~p2 & ~p1, default, empty),
-    #   cond(p3 & ~p2 & ~p1, f3, lambda: case_sequence[0]),
-    #   cond(p2 & ~p1, f2, lambda: case_sequence[1]),
-    #   cond(p1, f1, lambda: case_sequence[2])
-    # ]
-    #
-    # And the return value will be case_sequence[-1]
-    def _build_case():
-      all_fns = [fn for fn in fns]
-      all_fns.append(default)
-      prev_case = None
-      for i, (cp, fn) in enumerate(list(zip(case_preds, all_fns))[::-1]):
-        prev_case = cond(
-            cp, fn,
-            empty if i == 0 else lambda: prev_case,
-            strict=strict, name="If_%d" % i)
-      return prev_case
-
-    if exclusivity_assert is not None:
-      with ops.control_dependencies([exclusivity_assert]):
-        case_seq = _build_case()
-    else:
-      case_seq = _build_case()
+  def IsXLAContext(self):
+    return True
 
-    if not strict:
-      case_seq = _UnpackIfSingleton(case_seq)
-    return case_seq
+  def AddOp(self, _):
+    pass
+
+  def AddValue(self, x):
+    return x
 
 
 ops.register_proto_function(ops.GraphKeys.COND_CONTEXT,
diff --git a/tensorflow/python/ops/control_flow_ops_test.py b/tensorflow/python/ops/control_flow_ops_test.py
index 3e8f39dd240af3a5030d259603ab648d50c27cd3..cc5a42bf3ddd4b37d037f8d28a2fe6af79f79ba1 100644
--- a/tensorflow/python/ops/control_flow_ops_test.py
+++ b/tensorflow/python/ops/control_flow_ops_test.py
@@ -51,6 +51,7 @@ TestTuple = collections.namedtuple("TestTuple", "a b")
 SingletonTestTuple = collections.namedtuple("SingletonTestTuple", "a")
 
 
+@test_util.with_c_api
 class GroupTestCase(test_util.TensorFlowTestCase):
 
   def _StripNode(self, nd):
@@ -132,6 +133,7 @@ class GroupTestCase(test_util.TensorFlowTestCase):
         control_flow_ops.group(1, 2)
 
 
+@test_util.with_c_api
 class ShapeTestCase(test_util.TensorFlowTestCase):
 
   def testShape(self):
@@ -143,6 +145,7 @@ class ShapeTestCase(test_util.TensorFlowTestCase):
                             [constant_op.constant(1.0)], tensor).get_shape())
 
 
+@test_util.with_c_api
 class WithDependenciesTestCase(test_util.TensorFlowTestCase):
 
   def testTupleDependencies(self):
@@ -174,6 +177,7 @@ class WithDependenciesTestCase(test_util.TensorFlowTestCase):
         self.assertEquals(1, counter.eval())
 
 
+@test_util.with_c_api
 class SwitchTestCase(test_util.TensorFlowTestCase):
 
   def testIndexedSlicesWithDenseShape(self):
@@ -431,6 +435,7 @@ class CondTest(test_util.TensorFlowTestCase):
           control_flow_ops.cond(True, lambda: x, lambda: x, fn2=lambda: x)
 
 
+@test_util.with_c_api
 class ContextTest(test_util.TensorFlowTestCase):
 
   def testCondContext(self):
@@ -447,18 +452,25 @@ class ContextTest(test_util.TensorFlowTestCase):
               c.to_proto(),
               control_flow_ops.CondContext.from_proto(c.to_proto()).to_proto())
 
-  def testWhileContext(self):
+  def _testWhileContextHelper(self, maximum_iterations=None):
     with self.test_session() as sess:
       i = constant_op.constant(0)
       c = lambda i: math_ops.less(i, 10)
       b = lambda i: math_ops.add(i, 1)
-      control_flow_ops.while_loop(c, b, [i])
+      control_flow_ops.while_loop(
+          c, b, [i], maximum_iterations=maximum_iterations)
       for op in sess.graph.get_operations():
-        c = op._get_control_flow_context()
-        if c:
-          self.assertProtoEquals(
-              c.to_proto(),
-              control_flow_ops.WhileContext.from_proto(c.to_proto()).to_proto())
+        context = op._get_control_flow_context()
+        if context:
+          self.assertProtoEquals(context.to_proto(),
+                                 control_flow_ops.WhileContext.from_proto(
+                                     context.to_proto()).to_proto())
+
+  def testWhileContext(self):
+    self._testWhileContextHelper()
+
+  def testWhileContextWithMaximumIterations(self):
+    self._testWhileContextHelper(maximum_iterations=10)
 
   def testControlContextImportScope(self):
     with self.test_session():
@@ -516,6 +528,7 @@ def _RawNestedShape(nested_shape):
 
 
 # TODO(yori): Add tests for indexed slices.
+@test_util.with_c_api
 class DataTypesTest(test_util.TensorFlowTestCase):
 
   def assertAllEqualNested(self, a, b):
@@ -540,7 +553,9 @@ class DataTypesTest(test_util.TensorFlowTestCase):
 
   def _testReturnValues(self, fn_true, fn_false, expected_value_true,
                         expected_value_false, strict=False,
-                        check_cond=True):
+                        check_cond=True, feed_dict=None):
+    if feed_dict is None: feed_dict = {}
+
     condition = array_ops.placeholder(dtypes.bool)
     output_cond = control_flow_ops.cond(condition, fn_true, fn_false,
                                         strict=strict)
@@ -549,13 +564,17 @@ class DataTypesTest(test_util.TensorFlowTestCase):
 
     with self.test_session() as sess:
       variables.global_variables_initializer().run()
+      true_feed_dict = {condition: True}
+      true_feed_dict.update(feed_dict)
       result_cond, result_case = sess.run([output_cond, output_case],
-                                          feed_dict={condition: True})
+                                          feed_dict=true_feed_dict)
       self.assertAllEqualNested(result_cond, expected_value_true)
       if check_cond:
         self.assertAllEqualNested(result_case, expected_value_true)
+      false_feed_dict = {condition: False}
+      false_feed_dict.update(feed_dict)
       result_cond, result_case = sess.run([output_cond, output_case],
-                                          feed_dict={condition: False})
+                                          feed_dict=false_feed_dict)
       self.assertAllEqualNested(result_cond, expected_value_false)
       if check_cond:
         self.assertAllEqualNested(result_case, expected_value_false)
@@ -631,26 +650,26 @@ class DataTypesTest(test_util.TensorFlowTestCase):
 
   def test_tensors_unknown_shape(self):
     def _BuildTrueBranch(dtype):
+      tensor = array_ops.placeholder(dtype=dtype, shape=None)
       def _Build():
-        tensor = array_ops.zeros([2, 2], dtype=dtype)
-        tensor._shape = tensor_shape.TensorShape(None)
         return tensor
-      return _Build
+      return _Build, tensor
 
     def _BuildFalseBranch(dtype):
+      tensor = array_ops.placeholder(dtype=dtype, shape=None)
       def _Build():
-        tensor = array_ops.ones([2, 2], dtype=dtype)
-        tensor._shape = tensor_shape.TensorShape(None)
         return tensor
-      return _Build
+      return _Build, tensor
 
     for dtype in (dtypes.float16, dtypes.int8, dtypes.int32, dtypes.uint8):
       shape = tensor_shape.TensorShape(None)
-      fn_true = _BuildTrueBranch(dtype)
-      fn_false = _BuildFalseBranch(dtype)
+      fn_true, true_tensor = _BuildTrueBranch(dtype)
+      fn_false, false_tensor = _BuildFalseBranch(dtype)
       self._testShape(fn_true, fn_false, shape)
       self._testReturnValues(fn_true, fn_false,
-                             np.zeros([2, 2]), np.ones([2, 2]))
+                             np.zeros([2, 2]), np.ones([2, 2]),
+                             feed_dict={true_tensor: np.zeros([2, 2]),
+                                        false_tensor: np.ones([2, 2])})
 
   def test_sparse_tensors(self):
     shape = tensor_shape.TensorShape([None, None])
@@ -674,26 +693,29 @@ class DataTypesTest(test_util.TensorFlowTestCase):
 
   def test_tensors_with_partially_specified_shapes(self):
     def _BuildBranch(dtype, shape):
+      a = array_ops.placeholder(dtype=dtype, shape=shape[0])
+      b = array_ops.placeholder(dtype=dtype, shape=shape[1])
+      c = array_ops.placeholder(dtype=dtype, shape=shape[2])
       def _Build():
-        a = array_ops.zeros([2, 2], dtype=dtype)
-        b = array_ops.zeros([5], dtype=dtype)
-        c = array_ops.ones([3, 3], dtype=dtype)
-        a._shape = tensor_shape.TensorShape(shape[0])
-        b._shape = tensor_shape.TensorShape(shape[1])
-        c._shape = tensor_shape.TensorShape(shape[2])
         return a, b, c
-      return _Build
+      return _Build, (a, b, c)
 
     for dtype in (dtypes.float16, dtypes.int8, dtypes.int32, dtypes.uint8):
       shape = (tensor_shape.TensorShape([None, 2]),
                tensor_shape.TensorShape([None]),
                tensor_shape.TensorShape([3, None]))
-      fn_true = _BuildBranch(dtype, shape)
-      fn_false = _BuildBranch(dtype, shape)
+      fn_true, true_tensors = _BuildBranch(dtype, shape)
+      fn_false, false_tensors = _BuildBranch(dtype, shape)
       self._testShape(fn_true, fn_false, shape)
       self._testReturnValues(fn_true, fn_false,
                              (np.zeros([2, 2]), np.zeros(5), np.ones([3, 3])),
-                             (np.zeros([2, 2]), np.zeros(5), np.ones([3, 3])))
+                             (np.zeros([2, 2]), np.zeros(5), np.ones([3, 3])),
+                             feed_dict={true_tensors[0]: np.zeros([2, 2]),
+                                        false_tensors[0]: np.zeros([2, 2]),
+                                        true_tensors[1]: np.zeros([5]),
+                                        false_tensors[1]: np.zeros([5]),
+                                        true_tensors[2]: np.ones([3, 3]),
+                                        false_tensors[2]: np.ones([3, 3])})
 
   def test_tensor_arrays(self):
     element_shape = tensor_shape.TensorShape([2])
@@ -837,6 +859,7 @@ class DataTypesTest(test_util.TensorFlowTestCase):
     self.assertEqual(matrix.get_shape(), tensor_shape.TensorShape([2, 2]))
 
 
+@test_util.with_c_api
 class CaseTest(test_util.TensorFlowTestCase):
 
   def testCase_withDefault(self):
@@ -860,8 +883,7 @@ class CaseTest(test_util.TensorFlowTestCase):
     with self.test_session() as sess:
       self.assertEqual(sess.run(output, feed_dict={x: 1}), 2)
       self.assertEqual(sess.run(output, feed_dict={x: 3}), 8)
-      with self.assertRaisesRegexp(errors.InvalidArgumentError,
-                                   "More than one condition evaluated as True"):
+      with self.assertRaisesRegexp(errors.InvalidArgumentError, "Input error:"):
         sess.run(output, feed_dict={x: 2})
 
   def testCase_multiple_matches_non_exclusive(self):
@@ -886,11 +908,7 @@ class CaseTest(test_util.TensorFlowTestCase):
       self.assertEqual(sess.run(output, feed_dict={x: 1}), 2)
       self.assertEqual(sess.run(output, feed_dict={x: 2}), 4)
       self.assertEqual(sess.run(output, feed_dict={x: 3}), 6)
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r"\[None of the conditions evaluated as True. "
-          r"Conditions: \(Equal:0, Equal_1:0, Equal_2:0\), Values:\] "
-          r"\[0 0 0\]"):
+      with self.assertRaisesRegexp(errors.InvalidArgumentError, "Input error:"):
         sess.run(output, feed_dict={x: 4})
 
   def testCase_withoutDefault_oneCondition(self):
@@ -899,10 +917,7 @@ class CaseTest(test_util.TensorFlowTestCase):
     output = control_flow_ops.case(conditions, exclusive=True)
     with self.test_session() as sess:
       self.assertEqual(sess.run(output, feed_dict={x: 1}), 2)
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r"\[None of the conditions evaluated as True. "
-          r"Conditions: \(Equal:0\), Values:\] \[0\]"):
+      with self.assertRaisesRegexp(errors.InvalidArgumentError, "Input error:"):
         sess.run(output, feed_dict={x: 4})
 
 
diff --git a/tensorflow/python/ops/control_flow_util.py b/tensorflow/python/ops/control_flow_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..eee31102db57b44ee29cb04ea79aabf003603f2f
--- /dev/null
+++ b/tensorflow/python/ops/control_flow_util.py
@@ -0,0 +1,263 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Utilty functions for control flow.
+
+This file is necessary to avoid cyclic dependencies between ops.py and
+control_flow_ops.py.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import traceback
+
+from tensorflow.python.platform import tf_logging as logging
+
+
+def IsInXLAContext(op):
+  try:
+    xla_compile = op.get_attr("_XlaCompile")
+    if xla_compile: return True
+  except ValueError:
+    pass
+  ctxt = op._get_control_flow_context()  # pylint: disable=protected-access
+  return GetContainingXLAContext(ctxt) is not None
+
+
+def IsInWhileLoop(op):
+  ctxt = op._get_control_flow_context()  # pylint: disable=protected-access
+  return GetContainingWhileContext(ctxt) is not None
+
+
+def IsInCond(op):
+  ctxt = op._get_control_flow_context()  # pylint: disable=protected-access
+  return GetContainingCondContext(ctxt) is not None
+
+
+def IsSwitch(op):
+  """Return true if `op` is a Switch."""
+  return op.type == "Switch" or op.type == "RefSwitch"
+
+
+def IsLoopEnter(op):
+  """Returns true if `op` is an Enter."""
+  return op.type == "Enter" or op.type == "RefEnter"
+
+
+def IsLoopExit(op):
+  """Return true if `op` is an Exit."""
+  return op.type == "Exit" or op.type == "RefExit"
+
+
+def IsLoopSwitch(op):
+  """Return true if `op` is the Switch for a while loop."""
+  if IsSwitch(op):
+    ctxt = op._get_control_flow_context()  # pylint: disable=protected-access
+    return ctxt and ctxt.IsWhileContext()
+  return False
+
+
+def IsLoopConstantEnter(op):
+  """Return true iff op is a loop invariant."""
+  return IsLoopEnter(op) and op.get_attr("is_constant")
+
+
+def GetLoopConstantEnter(value):
+  """Return the enter op if we can infer `value` to be a loop invariant."""
+  id_ops = {"Switch", "RefSwitch", "Identity", "RefIdentity"}
+  op = value.op
+  while op.type in id_ops:
+    op = op.inputs[0].op
+  return op if IsLoopConstantEnter(op) else None
+
+
+def GetOutputContext(op):
+  """Return the control flow context for the output of an op."""
+  ctxt = op._get_control_flow_context()  # pylint: disable=protected-access
+  # Exit nodes usually have a control flow context, except in the case where the
+  # exit node was imported via import_graph_def (in which case no nodes have
+  # control flow contexts).
+  if ctxt is not None and IsLoopExit(op):
+    ctxt = ctxt.outer_context
+  return ctxt
+
+
+def GetContainingWhileContext(ctxt, stop_ctxt=None):
+  """Returns the first ancestor WhileContext of `ctxt`.
+
+  Returns `ctxt` if `ctxt` is a WhileContext, or None if `ctxt` is not in a
+  while loop.
+
+  Args:
+    ctxt: ControlFlowContext
+    stop_ctxt: ControlFlowContext, optional. If provided, the search will end
+      if it sees stop_ctxt.
+
+  Returns:
+    `ctxt` if `ctxt` is a WhileContext, the most nested WhileContext containing
+    `ctxt`, or None if `ctxt` is not in a while loop.  If `stop_ctxt` is not
+    `None`, this returns `ctxt` if it matches `stop_ctxt` in its traversal.
+  """
+  while ctxt:
+    if ctxt.IsWhileContext() or ctxt == stop_ctxt: return ctxt
+    ctxt = ctxt.outer_context
+  return None
+
+
+def GetContainingXLAContext(ctxt):
+  """Returns the first ancestor XLAContext of `ctxt`.
+
+  Returns `ctxt` if `ctxt` is a XLAContext, or None if `ctxt` is not in a
+  while loop.
+
+  Args:
+    ctxt: ControlFlowContext
+
+  Returns:
+    `ctxt` if `ctxt` is a XLAContext, the most nested XLAContext containing
+    `ctxt`, or None if `ctxt` is not in a while loop.
+  """
+  while ctxt:
+    if ctxt.IsXLAContext(): return ctxt
+    ctxt = ctxt.outer_context
+  return None
+
+
+def GetContainingCondContext(ctxt):
+  """Returns the first ancestor CondContext of `ctxt`.
+
+  Returns `ctxt` if `ctxt` is a CondContext, or None if `ctxt` is not in a cond.
+
+  Args:
+    ctxt: ControlFlowContext
+
+  Returns:
+    `ctxt` if `ctxt` is a CondContext, the most nested CondContext containing
+    `ctxt`, or None if `ctxt` is not in a cond.
+  """
+  while ctxt:
+    if ctxt.IsCondContext(): return ctxt
+    ctxt = ctxt.outer_context
+  return None
+
+
+def IsContainingContext(ctxt, maybe_containing_ctxt):
+  """Returns true if `maybe_containing_ctxt` is or contains `ctxt`."""
+  while ctxt is not maybe_containing_ctxt:
+    if ctxt is None: return False
+    ctxt = ctxt.outer_context
+  return True
+
+
+def CheckInputFromValidContext(op, input_op):
+  """Returns whether `input_op` can be used from `op`s context.
+
+  Conceptually, only inputs from op's while context or any ancestor while
+  context (including outside of any context) are valid. In practice, there are
+  many other edge cases as well.
+
+  Args:
+    op: Operation
+    input_op: Operation
+
+  Raises:
+    ValueError: if input_op is from an invalid context.
+  """
+  op_ctxt = op._get_control_flow_context()  # pylint: disable=protected-access
+  input_ctxt = GetOutputContext(input_op)
+  valid = False
+
+  if not input_ctxt:
+    # input_op isn't in a control flow context.
+    valid = True
+  elif op_ctxt is input_ctxt:
+    # input_op is in the same context as op.
+    valid = True
+  else:
+    while_ctxt = GetContainingWhileContext(op_ctxt)
+    input_while_ctxt = GetContainingWhileContext(input_ctxt)
+
+    if while_ctxt is None:
+      if input_while_ctxt is None:
+        # Neither op nor input_op is in a while loop, but one or both are in
+        # conds. We allow this, although execution will fail if the branch
+        # corresponding to input_op's cond context isn't taken.
+        valid = True
+      # Invalid if op isn't in a while loop and input_op is. Unless...
+      if IsLoopEnter(op):
+        # WhileContext._BuildLoop clears context for Enter nodes.
+        valid = True
+      if IsSwitch(op):
+        # CondContext.AddValue clears context for Switch nodes.
+        valid = True
+    elif IsContainingContext(while_ctxt, input_while_ctxt):
+      # input_op is in a while loop which contains op's while loop (or not in a
+      # while loop at all).
+      valid = True
+    elif (while_ctxt.grad_state and
+          IsContainingContext(while_ctxt.grad_state.forward_context,
+                              input_while_ctxt)):
+      # op is in a gradient context and input_op is in the associated forward
+      # pass context or an ancestor thereof. This case is need to build while
+      # loop gradients.
+      # NOTE(skyewm): we theoretically also need this case for custom gradient
+      # functions that close over tensors from ancestor contexts, but I haven't
+      # verified this.
+      valid = True
+    elif (while_ctxt.grad_state and
+          while_ctxt.grad_state.forward_context is
+          input_while_ctxt._outer_context):  # pylint: disable=protected-access
+      # op is in a gradient context and input_op is in a child of the associated
+      # forward pass context. This case is needed for the gradients of while
+      # loops with conds.
+      valid = True
+    elif (input_while_ctxt.grad_state and
+          input_while_ctxt.grad_state.forward_context is while_ctxt):
+      # input_op is in the gradient context of op's context. This case is needed
+      # when the gradient of a while loop gradient is requested (this will
+      # eventually fail unless there is a stop_gradient() or similar).
+      valid = True
+    elif (input_while_ctxt.grad_state and
+          input_ctxt.grad_state.forward_context.grad_state and
+          input_ctxt.grad_state.forward_context.grad_state.forward_context is
+          while_ctxt):
+      # input_op is in the grad grad context of op's context. This case is
+      # needed when the gradient of a while loop gradient is requested (this
+      # will eventually fail unless there is a stop_gradient() or similar).
+      valid = True
+
+  if not valid:
+    if while_ctxt:
+      error_msg = (
+          "Cannot use '%s' as input to '%s' because they are in different while"
+          " loops." % (op.name, input_op.name))
+    else:
+      error_msg = (
+          "Cannot use '%s' as input to '%s' because '%s' is in a while loop."
+          % (input_op.name, op.name, input_op.name))
+
+    # Log the error message plus the relevant stack traces. The stacks may be
+    # useful for debugging this error, but we don't want to raise an
+    # unreadable exception.
+    log_msg = error_msg
+    log_msg += "\n\n%s while context: %s" % (op.name, while_ctxt)
+    log_msg += "\n%s while context: %s" % (input_op.name, input_while_ctxt)
+    log_msg += "\n\nTraceback for %s:\n%s\nTraceback for %s:\n%s\n" % (
+        op.name, "".join(traceback.format_list(op.traceback)),
+        input_op.name, "".join(traceback.format_list(input_op.traceback)))
+    logging.info(log_msg)
+    raise ValueError(error_msg + " See info log for more details.")
diff --git a/tensorflow/python/ops/data_flow_ops.py b/tensorflow/python/ops/data_flow_ops.py
index c186eb5b7ecaa5c74841aca15f0f11e994eba2ea..f441f6d4bf7986bbfb15593edf2b2b1bfe6ec71f 100644
--- a/tensorflow/python/ops/data_flow_ops.py
+++ b/tensorflow/python/ops/data_flow_ops.py
@@ -31,6 +31,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
+from tensorflow.python.lib.io import python_io
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gen_data_flow_ops
@@ -2225,7 +2226,8 @@ class RecordInput(object):
                shift_ratio=0,
                seed=0,
                name=None,
-               batches=None):
+               batches=None,
+               compression_type=None):
     """Constructs a RecordInput Op.
 
     Args:
@@ -2243,6 +2245,8 @@ class RecordInput(object):
         how many batches to create, which are returned as a list when
         `get_yield_op()` is called. An example use case is to split processing
         between devices on one computer.
+      compression_type: The type of compression for the file. Currently ZLIB and
+        GZIP are supported. Defaults to none.
 
     Raises:
       ValueError: If one of the arguments is invalid.
@@ -2257,12 +2261,17 @@ class RecordInput(object):
     self._shift_ratio = shift_ratio
     self._seed = seed
     self._name = name
+    self._compression_type = python_io.TFRecordCompressionType.NONE
+    if compression_type is not None:
+      self._compression_type = compression_type
 
   def get_yield_op(self):
     """Adds a node that yields a group of records every time it is executed.
     If RecordInput `batches` parameter is not None, it yields a list of
     record batches with the specified `batch_size`.
     """
+    compression_type = python_io.TFRecordOptions.get_compression_type_string(
+        python_io.TFRecordOptions(self._compression_type))
     records = gen_data_flow_ops.record_input(
         file_pattern=self._file_pattern,
         file_buffer_size=self._buffer_size,
@@ -2270,6 +2279,7 @@ class RecordInput(object):
         file_shuffle_shift_ratio=self._shift_ratio,
         batch_size=self._batch_size,
         file_random_seed=self._seed,
+        compression_type=compression_type,
         name=self._name)
     if self._batches is None:
       return records
diff --git a/tensorflow/python/ops/distributions/categorical.py b/tensorflow/python/ops/distributions/categorical.py
index 84ca6db4c4b4deea86fb0a0f626eda42f0283d1f..2046a08d618faf592fb3fc8230d8f3c4c5e8c7c7 100644
--- a/tensorflow/python/ops/distributions/categorical.py
+++ b/tensorflow/python/ops/distributions/categorical.py
@@ -263,11 +263,12 @@ class Categorical(distribution.Distribution):
       logits_2d = self.logits
     else:
       logits_2d = array_ops.reshape(self.logits, [-1, self.event_size])
-    draws = random_ops.multinomial(logits_2d, n, seed=seed)
+    draws = random_ops.multinomial(
+        logits_2d, n, seed=seed, output_dtype=self.dtype)
     draws = array_ops.reshape(
         array_ops.transpose(draws),
         array_ops.concat([[n], self.batch_shape_tensor()], 0))
-    return math_ops.cast(draws, self.dtype)
+    return draws
 
   def _cdf(self, k):
     k = ops.convert_to_tensor(k, name="k")
diff --git a/tensorflow/python/ops/distributions/dirichlet_multinomial.py b/tensorflow/python/ops/distributions/dirichlet_multinomial.py
index d792e9fe52dee4325d0956dbb74c8b408d5a1e8c..aa2b511c5413944df665198eacc26066b8457773 100644
--- a/tensorflow/python/ops/distributions/dirichlet_multinomial.py
+++ b/tensorflow/python/ops/distributions/dirichlet_multinomial.py
@@ -122,21 +122,22 @@ class DirichletMultinomial(distribution.Distribution):
   #### Examples
 
   ```python
-  alpha = [1, 2, 3]
-  n = 2
+  alpha = [1., 2., 3.]
+  n = 2.
   dist = DirichletMultinomial(n, alpha)
   ```
 
-  Creates a 3-class distribution, with the 3rd class is most likely to be drawn.
+  Creates a 3-class distribution, with the 3rd class is most likely to be
+  drawn.
   The distribution functions can be evaluated on counts.
 
   ```python
   # counts same shape as alpha.
-  counts = [0, 0, 2]
+  counts = [0., 0., 2.]
   dist.prob(counts)  # Shape []
 
-  # alpha will be broadcast to [[1, 2, 3], [1, 2, 3]] to match counts.
-  counts = [[1, 1, 0], [1, 0, 1]]
+  # alpha will be broadcast to [[1., 2., 3.], [1., 2., 3.]] to match counts.
+  counts = [[1., 1., 0.], [1., 0., 1.]]
   dist.prob(counts)  # Shape [2]
 
   # alpha will be broadcast to shape [5, 7, 3] to match counts.
@@ -147,12 +148,12 @@ class DirichletMultinomial(distribution.Distribution):
   Creates a 2-batch of 3-class distributions.
 
   ```python
-  alpha = [[1, 2, 3], [4, 5, 6]]  # Shape [2, 3]
-  n = [3, 3]
+  alpha = [[1., 2., 3.], [4., 5., 6.]]  # Shape [2, 3]
+  n = [3., 3.]
   dist = DirichletMultinomial(n, alpha)
 
-  # counts will be broadcast to [[2, 1, 0], [2, 1, 0]] to match alpha.
-  counts = [2, 1, 0]
+  # counts will be broadcast to [[2., 1., 0.], [2., 1., 0.]] to match alpha.
+  counts = [2., 1., 0.]
   dist.prob(counts)  # Shape [2]
   ```
 
diff --git a/tensorflow/python/ops/distributions/distribution.py b/tensorflow/python/ops/distributions/distribution.py
index 22687a093ae72edff1d53131cab49fa004aa3be0..098622c52f431fed1f6e21ffaaed9ebc7142f227 100644
--- a/tensorflow/python/ops/distributions/distribution.py
+++ b/tensorflow/python/ops/distributions/distribution.py
@@ -31,6 +31,7 @@ from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops.distributions import kullback_leibler
 from tensorflow.python.ops.distributions import util
 from tensorflow.python.util import tf_inspect
 
@@ -43,10 +44,26 @@ __all__ = [
 ]
 
 _DISTRIBUTION_PUBLIC_METHOD_WRAPPERS = [
-    "batch_shape_tensor", "batch_shape", "event_shape_tensor", "event_shape",
-    "sample", "log_prob", "prob", "log_cdf", "cdf", "log_survival_function",
-    "survival_function", "entropy", "mean", "variance", "stddev", "mode",
-    "covariance"]
+    "batch_shape",
+    "batch_shape_tensor",
+    "cdf",
+    "covariance",
+    "cross_entropy",
+    "entropy",
+    "event_shape",
+    "event_shape_tensor",
+    "kl_divergence",
+    "log_cdf",
+    "log_prob",
+    "log_survival_function",
+    "mean",
+    "mode",
+    "prob",
+    "sample",
+    "stddev",
+    "survival_function",
+    "variance",
+]
 
 
 @six.add_metaclass(abc.ABCMeta)
@@ -608,7 +625,7 @@ class Distribution(_BaseDistribution):
     """Indicates that `event_shape == []`.
 
     Args:
-      name: The name to give this op.
+      name: Python `str` prepended to names of ops created by this function.
 
     Returns:
       is_scalar_event: `bool` scalar `Tensor`.
@@ -622,7 +639,7 @@ class Distribution(_BaseDistribution):
     """Indicates that `batch_shape == []`.
 
     Args:
-      name: The name to give this op.
+      name: Python `str` prepended to names of ops created by this function.
 
     Returns:
       is_scalar_batch: `bool` scalar `Tensor`.
@@ -683,7 +700,7 @@ class Distribution(_BaseDistribution):
 
     Args:
       value: `float` or `double` `Tensor`.
-      name: The name to give this op.
+      name: Python `str` prepended to names of ops created by this function.
 
     Returns:
       log_prob: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
@@ -710,7 +727,7 @@ class Distribution(_BaseDistribution):
 
     Args:
       value: `float` or `double` `Tensor`.
-      name: The name to give this op.
+      name: Python `str` prepended to names of ops created by this function.
 
     Returns:
       prob: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
@@ -747,7 +764,7 @@ class Distribution(_BaseDistribution):
 
     Args:
       value: `float` or `double` `Tensor`.
-      name: The name to give this op.
+      name: Python `str` prepended to names of ops created by this function.
 
     Returns:
       logcdf: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
@@ -780,7 +797,7 @@ class Distribution(_BaseDistribution):
 
     Args:
       value: `float` or `double` `Tensor`.
-      name: The name to give this op.
+      name: Python `str` prepended to names of ops created by this function.
 
     Returns:
       cdf: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
@@ -818,7 +835,7 @@ class Distribution(_BaseDistribution):
 
     Args:
       value: `float` or `double` `Tensor`.
-      name: The name to give this op.
+      name: Python `str` prepended to names of ops created by this function.
 
     Returns:
       `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
@@ -853,7 +870,7 @@ class Distribution(_BaseDistribution):
 
     Args:
       value: `float` or `double` `Tensor`.
-      name: The name to give this op.
+      name: Python `str` prepended to names of ops created by this function.
 
     Returns:
       `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
@@ -899,7 +916,7 @@ class Distribution(_BaseDistribution):
 
     Args:
       value: `float` or `double` `Tensor`.
-      name: The name to give this op.
+      name: Python `str` prepended to names of ops created by this function.
 
     Returns:
       quantile: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
@@ -923,7 +940,7 @@ class Distribution(_BaseDistribution):
     denotes expectation, and `Var.shape = batch_shape + event_shape`.
 
     Args:
-      name: The name to give this op.
+      name: Python `str` prepended to names of ops created by this function.
 
     Returns:
       variance: Floating-point `Tensor` with shape identical to
@@ -954,7 +971,7 @@ class Distribution(_BaseDistribution):
     denotes expectation, and `stddev.shape = batch_shape + event_shape`.
 
     Args:
-      name: The name to give this op.
+      name: Python `str` prepended to names of ops created by this function.
 
     Returns:
       stddev: Floating-point `Tensor` with shape identical to
@@ -1002,7 +1019,7 @@ class Distribution(_BaseDistribution):
     length-`k'` vector.
 
     Args:
-      name: The name to give this op.
+      name: Python `str` prepended to names of ops created by this function.
 
     Returns:
       covariance: Floating-point `Tensor` with shape `[B1, ..., Bn, k', k']`
@@ -1020,6 +1037,67 @@ class Distribution(_BaseDistribution):
     with self._name_scope(name):
       return self._mode()
 
+  def _cross_entropy(self, other):
+    return kullback_leibler.cross_entropy(
+        self, other, allow_nan_stats=self.allow_nan_stats)
+
+  def cross_entropy(self, other, name="cross_entropy"):
+    """Computes the (Shannon) cross entropy.
+
+    Denote this distribution (`self`) by `P` and the `other` distribution by
+    `Q`. Assuming `P, Q` are absolutely continuous with respect to
+    one another and permit densities `p(x) dr(x)` and `q(x) dr(x)`, (Shanon)
+    cross entropy is defined as:
+
+    ```none
+    H[P, Q] = E_p[-log q(X)] = -int_F p(x) log q(x) dr(x)
+    ```
+
+    where `F` denotes the support of the random variable `X ~ P`.
+
+    Args:
+      other: `tf.distributions.Distribution` instance.
+      name: Python `str` prepended to names of ops created by this function.
+
+    Returns:
+      cross_entropy: `self.dtype` `Tensor` with shape `[B1, ..., Bn]`
+        representing `n` different calculations of (Shanon) cross entropy.
+    """
+    with self._name_scope(name):
+      return self._cross_entropy(other)
+
+  def _kl_divergence(self, other):
+    return kullback_leibler.kl_divergence(
+        self, other, allow_nan_stats=self.allow_nan_stats)
+
+  def kl_divergence(self, other, name="kl_divergence"):
+    """Computes the Kullback--Leibler divergence.
+
+    Denote this distribution (`self`) by `p` and the `other` distribution by
+    `q`. Assuming `p, q` are absolutely continuous with respect to reference
+    measure `r`, the KL divergence is defined as:
+
+    ```none
+    KL[p, q] = E_p[log(p(X)/q(X))]
+             = -int_F p(x) log q(x) dr(x) + int_F p(x) log p(x) dr(x)
+             = H[p, q] - H[p]
+    ```
+
+    where `F` denotes the support of the random variable `X ~ p`, `H[., .]`
+    denotes (Shanon) cross entropy, and `H[.]` denotes (Shanon) entropy.
+
+    Args:
+      other: `tf.distributions.Distribution` instance.
+      name: Python `str` prepended to names of ops created by this function.
+
+    Returns:
+      kl_divergence: `self.dtype` `Tensor` with shape `[B1, ..., Bn]`
+        representing `n` different calculations of the Kullback-Leibler
+        divergence.
+    """
+    with self._name_scope(name):
+      return self._kl_divergence(other)
+
   @contextlib.contextmanager
   def _name_scope(self, name=None, values=None):
     """Helper function to standardize op scope."""
diff --git a/tensorflow/python/ops/distributions/kullback_leibler.py b/tensorflow/python/ops/distributions/kullback_leibler.py
index a6ab581cc22ce8e9a278bb8e0c7e6afc2dcc30eb..829b9611cff02895b67ec39711b8c53e682eb3c5 100644
--- a/tensorflow/python/ops/distributions/kullback_leibler.py
+++ b/tensorflow/python/ops/distributions/kullback_leibler.py
@@ -110,6 +110,38 @@ def kl_divergence(distribution_a, distribution_b,
       return array_ops.identity(kl_t, name="checked_kl")
 
 
+def cross_entropy(ref, other,
+                  allow_nan_stats=True, name=None):
+  """Computes the (Shannon) cross entropy.
+
+  Denote two distributions by `P` (`ref`) and `Q` (`other`). Assuming `P, Q`
+  are absolutely continuous with respect to one another and permit densities
+  `p(x) dr(x)` and `q(x) dr(x)`, (Shanon) cross entropy is defined as:
+
+  ```none
+  H[P, Q] = E_p[-log q(X)] = -int_F p(x) log q(x) dr(x)
+  ```
+
+  where `F` denotes the support of the random variable `X ~ P`.
+
+  Args:
+    ref: `tf.distributions.Distribution` instance.
+    other: `tf.distributions.Distribution` instance.
+    allow_nan_stats: Python `bool`, default `True`. When `True`,
+      statistics (e.g., mean, mode, variance) use the value "`NaN`" to
+      indicate the result is undefined. When `False`, an exception is raised
+      if one or more of the statistic's batch members are undefined.
+    name: Python `str` prepended to names of ops created by this function.
+
+  Returns:
+    cross_entropy: `ref.dtype` `Tensor` with shape `[B1, ..., Bn]`
+      representing `n` different calculations of (Shanon) cross entropy.
+  """
+  with ops.name_scope(name, "cross_entropy"):
+    return ref.entropy() + kl_divergence(
+        ref, other, allow_nan_stats=allow_nan_stats)
+
+
 class RegisterKL(object):
   """Decorator to register a KL divergence implementation function.
 
diff --git a/tensorflow/python/ops/distributions/transformed_distribution.py b/tensorflow/python/ops/distributions/transformed_distribution.py
index ba25b2c3485706cc769b8f37118a994e065c1f93..1efcf9d32e9ea9924bb080459efb7015e33ccd54 100644
--- a/tensorflow/python/ops/distributions/transformed_distribution.py
+++ b/tensorflow/python/ops/distributions/transformed_distribution.py
@@ -434,7 +434,7 @@ class TransformedDistribution(distribution_lib.Distribution):
     log_prob = self.distribution.log_prob(x)
     if self._is_maybe_event_override:
       log_prob = math_ops.reduce_sum(log_prob, self._reduce_event_indices)
-    log_prob = ildj + log_prob
+    log_prob += math_ops.cast(ildj, log_prob.dtype)
     if self._is_maybe_event_override:
       log_prob.set_shape(array_ops.broadcast_static_shape(
           y.get_shape().with_rank_at_least(1)[:-1], self.batch_shape))
@@ -457,7 +457,7 @@ class TransformedDistribution(distribution_lib.Distribution):
     prob = self.distribution.prob(x)
     if self._is_maybe_event_override:
       prob = math_ops.reduce_prod(prob, self._reduce_event_indices)
-    prob *= math_ops.exp(ildj)
+    prob *= math_ops.exp(math_ops.cast(ildj, prob.dtype))
     if self._is_maybe_event_override:
       prob.set_shape(array_ops.broadcast_static_shape(
           y.get_shape().with_rank_at_least(1)[:-1], self.batch_shape))
@@ -546,7 +546,9 @@ class TransformedDistribution(distribution_lib.Distribution):
       ], 0)
       entropy = array_ops.tile(entropy, multiples)
     dummy = array_ops.zeros([], self.dtype)
-    entropy -= self.bijector.inverse_log_det_jacobian(dummy)
+    entropy -= math_ops.cast(
+        self.bijector.inverse_log_det_jacobian(dummy),
+        entropy.dtype)
     entropy.set_shape(self.batch_shape)
     return entropy
 
diff --git a/tensorflow/python/ops/distributions/util.py b/tensorflow/python/ops/distributions/util.py
index 41b86f79409aef76dbd710606d09b21f34cab7ba..5bc25128a8d6f77895fc4decc98a8978ae8400f3 100644
--- a/tensorflow/python/ops/distributions/util.py
+++ b/tensorflow/python/ops/distributions/util.py
@@ -25,6 +25,7 @@ import numpy as np
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
@@ -699,6 +700,88 @@ def pick_vector(cond,
         [array_ops.where(cond, 0, n)], [array_ops.where(cond, n, -1)])
 
 
+def prefer_static_broadcast_shape(
+    shape1, shape2, name="prefer_static_broadcast_shape"):
+  """Convenience function which statically broadcasts shape when possible.
+
+  Args:
+    shape1:  `1-D` integer `Tensor`.  Already converted to tensor!
+    shape2:  `1-D` integer `Tensor`.  Already converted to tensor!
+    name:  A string name to prepend to created ops.
+
+  Returns:
+    The broadcast shape, either as `TensorShape` (if broadcast can be done
+      statically), or as a `Tensor`.
+  """
+  with ops.name_scope(name, values=[shape1, shape2]):
+    def make_shape_tensor(x):
+      return ops.convert_to_tensor(x, name="shape", dtype=dtypes.int32)
+
+    def get_tensor_shape(s):
+      if isinstance(s, tensor_shape.TensorShape):
+        return s
+      s_ = tensor_util.constant_value(make_shape_tensor(s))
+      if s_ is not None:
+        return tensor_shape.TensorShape(s_)
+      return None
+
+    def get_shape_tensor(s):
+      if not isinstance(s, tensor_shape.TensorShape):
+        return make_shape_tensor(s)
+      if s.is_fully_defined():
+        return make_shape_tensor(s.as_list())
+      raise ValueError("Cannot broadcast from partially "
+                       "defined `TensorShape`.")
+
+    shape1_ = get_tensor_shape(shape1)
+    shape2_ = get_tensor_shape(shape2)
+    if shape1_ is not None and shape2_ is not None:
+      return array_ops.broadcast_static_shape(shape1_, shape2_)
+
+    shape1_ = get_shape_tensor(shape1)
+    shape2_ = get_shape_tensor(shape2)
+    return array_ops.broadcast_dynamic_shape(shape1_, shape2_)
+
+
+def prefer_static_rank(x):
+  """Return static rank of tensor `x` if available, else `tf.rank(x)`.
+
+  Args:
+    x: `Tensor` (already converted).
+
+  Returns:
+    Numpy array (if static rank is obtainable), else `Tensor`.
+  """
+  return prefer_static_value(array_ops.rank(x))
+
+
+def prefer_static_shape(x):
+  """Return static shape of tensor `x` if available, else `tf.shape(x)`.
+
+  Args:
+    x: `Tensor` (already converted).
+
+  Returns:
+    Numpy array (if static shape is obtainable), else `Tensor`.
+  """
+  return prefer_static_value(array_ops.shape(x))
+
+
+def prefer_static_value(x):
+  """Return static value of tensor `x` if available, else `x`.
+
+  Args:
+    x: `Tensor` (already converted).
+
+  Returns:
+    Numpy array (if static value is obtainable), else `Tensor`.
+  """
+  static_x = tensor_util.constant_value(x)
+  if static_x is not None:
+    return static_x
+  return x
+
+
 def gen_new_seed(seed, salt):
   """Generate a new seed, from the given seed and salt."""
   if seed is None:
@@ -751,6 +834,7 @@ def fill_triangular(x, upper=False, name=None):
   """
 
   with ops.name_scope(name, "fill_triangular", values=[x]):
+    x = ops.convert_to_tensor(x, name="x")
     if x.shape.with_rank_at_least(1)[-1].value is not None:
       # Formula derived by solving for n: m = n(n+1)/2.
       m = np.int32(x.shape[-1].value)
@@ -1050,8 +1134,8 @@ def dimension_size(x, axis):
   """Returns the size of a specific dimension."""
   # Since tf.gather isn't "constant-in, constant-out", we must first check the
   # static shape or fallback to dynamic shape.
-  s = x.shape.with_rank_at_least(axis + 1)[axis].value
-  if axis > -1 and s is not None:
+  s = x.shape.with_rank_at_least(np.abs(axis))[axis].value
+  if s is not None:
     return s
   return array_ops.shape(x)[axis]
 
@@ -1099,28 +1183,100 @@ def process_quadrature_grid_and_probs(
     probs /= linalg_ops.norm(probs, ord=1, axis=-1, keep_dims=True,
                              name="probs")
 
-    def _static_dim_size(x, axis):
+    def _static_event_size(x):
       """Returns the static size of a specific dimension or `None`."""
-      return x.shape.with_rank_at_least(axis + 1)[axis].value
+      return x.shape.with_rank_at_least(1)[-1].value
 
-    m, n = _static_dim_size(probs, axis=0), _static_dim_size(grid, axis=0)
+    m, n = _static_event_size(probs), _static_event_size(grid)
     if m is not None and n is not None:
       if m != n:
         raise ValueError("`quadrature_grid_and_probs` must be a `tuple` of "
                          "same-length zero-th-dimension `Tensor`s "
                          "(saw lengths {}, {})".format(m, n))
     elif validate_args:
-      grid = control_flow_ops.with_dependencies([
+      assertions = [
           check_ops.assert_equal(
-              dimension_size(probs, axis=0),
-              dimension_size(grid, axis=0),
+              dimension_size(probs, axis=-1),
+              dimension_size(grid, axis=-1),
               message=("`quadrature_grid_and_probs` must be a `tuple` of "
                        "same-length zero-th-dimension `Tensor`s")),
-      ], grid)
-
+      ]
+      with ops.control_dependencies(assertions):
+        grid = array_ops.identity(grid)
+        probs = array_ops.identity(probs)
     return grid, probs
 
 
+def pad(x, axis, front=False, back=False, value=0, count=1, name=None):
+  """Pads `value` to the front and/or back of a `Tensor` dim, `count` times.
+
+  Args:
+    x: `Tensor` input.
+    axis: Scalar `int`-like `Tensor` representing the single dimension to pad.
+      (Negative indexing is supported.)
+    front: Python `bool`; if `True` the beginning of the `axis` dimension is
+      padded with `value`, `count` times. If `False` no front padding is made.
+    back: Python `bool`; if `True` the end of the `axis` dimension is
+      padded with `value`, `count` times. If `False` no end padding is made.
+    value: Scalar `int`-like `Tensor` representing the actual value added to the
+      front and/or back of the `axis` dimension of `x`.
+    count: Scalar `int`-like `Tensor` representing number of elements added to
+      the front and/or back of the `axis` dimension of `x`. E.g., if
+      `front = back = True` then `2 * count` elements are added.
+    name: Python `str` name prefixed to Ops created by this function.
+
+  Returns:
+    pad: The padded version of input `x`.
+
+  Raises:
+    ValueError: if both `front` and `back` are `False`.
+    TypeError: if `count` is not `int`-like.
+  """
+  with ops.name_scope(name, "pad", [x, value, count]):
+    x = ops.convert_to_tensor(x, name="x")
+    value = ops.convert_to_tensor(value, dtype=x.dtype, name="value")
+    count = ops.convert_to_tensor(count, name="count")
+    if not count.dtype.is_integer:
+      raise TypeError("`count.dtype` (`{}`) must be `int`-like.".format(
+          count.dtype.name))
+    if not front and not back:
+      raise ValueError("At least one of `front`, `back` must be `True`.")
+    ndims = (x.shape.ndims if x.shape.ndims is not None
+             else array_ops.rank(x, name="ndims"))
+    axis = ops.convert_to_tensor(axis, name="axis")
+    axis_ = tensor_util.constant_value(axis)
+    if axis_ is not None:
+      axis = axis_
+      if axis < 0:
+        axis = ndims + axis
+      count_ = tensor_util.constant_value(count)
+      if axis_ >= 0 or x.shape.ndims is not None:
+        head = x.shape[:axis]
+        middle = tensor_shape.TensorShape(
+            None if count_ is None
+            else (x.shape[axis] + count_ * (front + back)))
+        tail = x.shape[axis+1:]
+        final_shape = head.concatenate(middle.concatenate(tail))
+      else:
+        final_shape = None
+    else:
+      axis = array_ops.where(axis < 0, ndims + axis, axis)
+      final_shape = None
+    x = array_ops.pad(
+        x,
+        paddings=array_ops.one_hot(
+            indices=array_ops.stack([axis if front else -1,
+                                     axis if back else -1]),
+            depth=ndims,
+            axis=0,
+            on_value=count,
+            dtype=dtypes.int32),
+        constant_values=value)
+    if final_shape is not None:
+      x.set_shape(final_shape)
+    return x
+
+
 class AppendDocstring(object):
   """Helper class to promote private subclass docstring to public counterpart.
 
diff --git a/tensorflow/python/ops/gradient_checker.py b/tensorflow/python/ops/gradient_checker.py
index 1ff196805507f0ca7a1123df0d2a37925fc3e503..193046ba70e3448db4e5baac54be3699983b34b8 100644
--- a/tensorflow/python/ops/gradient_checker.py
+++ b/tensorflow/python/ops/gradient_checker.py
@@ -29,6 +29,7 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradients
+from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import tf_logging as logging
 
 
@@ -151,6 +152,15 @@ def _compute_numeric_jacobian(x, x_shape, x_data, y, y_shape, delta,
     and "y_size" columns where "x_size" is the number of elements in x and
     "y_size" is the number of elements in y.
   """
+  # bfloat16 doesn't have enough bits to represent high precision numbers such
+  # as delta. Convert to float32 here. Since numeric_jacobian is expected to
+  # be the groundtruth to compare against, it shouldn't lose any information.
+  if x.dtype == dtypes.bfloat16:
+    x = math_ops.cast(x, dtypes.float32)
+  if y.dtype == dtypes.bfloat16:
+    y = math_ops.cast(y, dtypes.float32)
+  if x_data.dtype == dtypes.bfloat16.as_numpy_dtype:
+    x_data = x_data.astype(np.float32)
 
   # To compute the jacobian, we treat x and y as one-dimensional vectors
   x_size = _product(x_shape) * (2 if x.dtype.is_complex else 1)
@@ -181,7 +191,7 @@ def _compute_numeric_jacobian(x, x_shape, x_data, y, y_shape, delta,
 
 
 def _compute_dx_and_dy(x, y, y_shape):
-  """Returns a node to compute gradient of x wrt y."""
+  """Returns a node to compute gradient of y wrt x."""
   # We make up a dy so that we can compute the gradients. We don't really use
   # the value of dy -- we will always feed it. We need to add an identity node
   # so that we can always feed it properly. Otherwise, for the Add operation,
@@ -189,7 +199,7 @@ def _compute_dx_and_dy(x, y, y_shape):
   with x.graph.as_default():
     dy_orig = constant_op.constant(1.0, shape=y_shape, dtype=y.dtype)
     dy = array_ops.identity(dy_orig)
-  # We compute the gradients for x wrt. y
+  # We compute the gradients for y wrt. x
   grads = gradients.gradients(y, x, dy)
   assert len(grads) == 1
   return grads[0], dy_orig
@@ -206,8 +216,8 @@ def _compute_gradient(x,
                       extra_feed_dict=None):
   """Computes the theoretical and numerical jacobian."""
   t = dtypes.as_dtype(x.dtype)
-  allowed_types = [dtypes.float16, dtypes.float32, dtypes.float64,
-                   dtypes.complex64, dtypes.complex128]
+  allowed_types = [dtypes.float16, dtypes.bfloat16, dtypes.float32,
+                   dtypes.float64, dtypes.complex64, dtypes.complex128]
   assert t.base_dtype in allowed_types, "Don't support type %s for x" % t.name
   t2 = dtypes.as_dtype(y.dtype)
   assert t2.base_dtype in allowed_types, "Don't support type %s for y" % t2.name
diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py
index 8d00a3c6ab2fdfff53b7e9659710659265cedc65..20c7a9fd6629cfe4657d8c0a25e2c6c2aad8ed49 100644
--- a/tensorflow/python/ops/gradients_impl.py
+++ b/tensorflow/python/ops/gradients_impl.py
@@ -38,6 +38,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_grad  # pylint: disable=unused-import
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops import image_grad  # pylint: disable=unused-import
 from tensorflow.python.ops import linalg_grad  # pylint: disable=unused-import
@@ -668,10 +669,10 @@ def _UpdatePendingAndEnqueueReady(grads, op, queue, pending_count, loop_state):
     ready = (pending_count[x.op._id] == 0)
     if loop_state and not ready:
       ready = (pending_count[x.op._id] > 0 and
-               control_flow_ops.IsLoopSwitch(x.op))
+               control_flow_util.IsLoopSwitch(x.op))
     # pylint: enable=protected-access
     if ready:
-      if control_flow_ops.IsLoopExit(x.op):
+      if control_flow_util.IsLoopExit(x.op):
         # if x is an exit without real gradient, defer processing them.
         grad_state = loop_state.GetGradState(x.op, before=False)
         grad_state.deferred_exits.append(x)
@@ -711,7 +712,7 @@ def _SetGrad(grads, t, grad):
   if isinstance(t_grads, list):
     t_grads.append(grad)
   else:
-    assert control_flow_ops.IsLoopSwitch(op)
+    assert control_flow_util.IsLoopSwitch(op)
     op_grads[t.value_index] = grad
 
 
@@ -851,7 +852,7 @@ def _AggregatedGrads(grads, op, loop_state, aggregation_method=None):
   for i, out_grad in enumerate(out_grads):
     if loop_state:
       if isinstance(out_grad, (ops.Tensor, ops.IndexedSlices)):
-        assert control_flow_ops.IsLoopSwitch(op)
+        assert control_flow_util.IsLoopSwitch(op)
         continue
     # Grads have to be Tensors or IndexedSlices
     if (isinstance(out_grad, collections.Sequence) and not all([
@@ -976,9 +977,7 @@ def hessians(ys, xs, name="hessians", colocate_gradients_with_ops=False,
 
   `hessians()` adds ops to the graph to output the Hessian matrix of `ys`
   with respect to `xs`.  It returns a list of `Tensor` of length `len(xs)`
-  where each tensor is the Hessian of `sum(ys)`. This function currently
-  only supports evaluating the Hessian with respect to (a list of) one-
-  dimensional tensors.
+  where each tensor is the Hessian of `sum(ys)`.
 
   The Hessian is a matrix of second-order partial derivatives of a scalar
   tensor (see https://en.wikipedia.org/wiki/Hessian_matrix for more details).
@@ -1004,31 +1003,32 @@ def hessians(ys, xs, name="hessians", colocate_gradients_with_ops=False,
       'colocate_gradients_with_ops': colocate_gradients_with_ops,
       'gate_gradients': gate_gradients,
       'aggregation_method': aggregation_method
-    }
+  }
   # Compute first-order derivatives and iterate for each x in xs.
   hessians = []
   _gradients = gradients(ys, xs, **kwargs)
-  for i, _gradient, x in zip(range(len(xs)), _gradients, xs):
-    # Ensure that x is a vector.
-    check_rank = check_ops.assert_rank(
-      x, 1, message='Cannot compute Hessian because element %d of `xs` does '
-      'not have rank one.' % i
-    )
-    with ops.control_dependencies([check_rank]):
-      # Declare an iterator and tensor array loop variables for the gradients.
-      n = array_ops.size(x)
-      loop_vars = [
+  for gradient, x in zip(_gradients, xs):
+    # change shape to one-dimension without graph branching
+    gradient = array_ops.reshape(gradient, [-1])
+
+    # Declare an iterator and tensor array loop variables for the gradients.
+    n = array_ops.size(x)
+    loop_vars = [
         array_ops.constant(0, dtypes.int32),
         tensor_array_ops.TensorArray(x.dtype, n)
-      ]
-      # Iterate over all elements of the gradient and compute second order
-      # derivatives.
-      _, hessian = control_flow_ops.while_loop(
-          lambda j, _: j < n,
-          lambda j, result: (j + 1,
-                             result.write(j, gradients(_gradient[j], x)[0])),
-          loop_vars
-      )
-
-      hessians.append(hessian.stack())
+    ]
+    # Iterate over all elements of the gradient and compute second order
+    # derivatives.
+    _, hessian = control_flow_ops.while_loop(
+        lambda j, _: j < n,
+        lambda j, result: (j + 1,
+                           result.write(j, gradients(gradient[j], x)[0])),
+        loop_vars
+    )
+
+    _shape = array_ops.shape(x)
+    _reshaped_hessian = array_ops.reshape(
+        hessian.stack(), array_ops.concat((_shape, _shape), 0)
+    )
+    hessians.append(_reshaped_hessian)
   return hessians
diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py
index dacc2947fe31b0cbe81f6acacd52fb4a74719090..d39b934819177e3c15af95a0777ba96869c5e9cf 100644
--- a/tensorflow/python/ops/gradients_test.py
+++ b/tensorflow/python/ops/gradients_test.py
@@ -573,9 +573,7 @@ class HessianVectorProductTest(test_util.TensorFlowTestCase):
       self.assertAllClose(hess_v_value, hess_v_actual)
 
 
-# TODO(skyewm): reenable C API once
-# ControlFlowContext._RemoveExternalControlEdges works with C API enabled
-# @test_util.with_c_api
+@test_util.with_c_api
 class HessianTest(test_util.TensorFlowTestCase):
 
   def testHessian1D(self):
@@ -623,6 +621,45 @@ class HessianTest(test_util.TensorFlowTestCase):
         with self.assertRaises(ValueError):
           gradients.hessians(x, x)
 
+  def testHessian2D_square_matrix(self):
+    # Manually compute the Hessian explicitly for a low-dimensional problem
+    # and check that `hessian` matches. Specifically, the Hessian of
+    # f(x) = 1/2 * x^T * x is H = constant (block identity matrix)
+    m = 3
+    rng = np.random.RandomState([1, 2, 3])
+    x_value = rng.randn(m, m).astype("float32")
+    with self.test_session(use_gpu=True):
+      x = constant_op.constant(x_value)
+      x_square = math_ops.reduce_sum(
+          math_ops.matmul(array_ops.transpose(x), x) * 0.5
+      )
+      hess = gradients.hessians(x_square, x)[0]
+      hess_actual = hess.eval()
+    hess_value = np.bmat([
+        [elem*np.ones((m, m)) for elem in vec]
+        for vec in np.eye(m)
+    ]).astype("float32")
+    self.assertAllEqual((m, m, m, m), hess_actual.shape)
+    self.assertAllClose(hess_value, hess_actual.reshape((m * m, m * m)))
+
+  def testHessian2D_non_square_matrix(self):
+    m = 3
+    n = 4
+    rng = np.random.RandomState([1, 2, 3])
+    x_value = rng.randn(m, n).astype("float32")
+    with self.test_session(use_gpu=True):
+      x = constant_op.constant(x_value)
+      x_square = math_ops.reduce_sum(
+          math_ops.matmul(array_ops.transpose(x), x) * 0.5
+      )
+      hess = gradients.hessians(x_square, x)[0]
+      hess_actual = hess.eval()
+    hess_value = np.bmat([
+        [elem*np.ones((n, n)) for elem in vec]
+        for vec in np.eye(m)
+    ]).astype("float32")
+    self.assertAllEqual((m, n, m, n), hess_actual.shape)
+    self.assertAllClose(hess_value, hess_actual.reshape((m * n, m * n)))
 
 @test_util.with_c_api
 class IndexedSlicesToTensorTest(test_util.TensorFlowTestCase):
@@ -667,8 +704,8 @@ class IndexedSlicesToTensorTest(test_util.TensorFlowTestCase):
   def testWarnings(self):
     # TODO(gunan) Reenable after this issue is fixed:
     # https://github.com/google/protobuf/issues/2812
-    if sys.version_info >= (3, 6):
-      self.skipTest("Skipped test for Python 3.6+")
+    if sys.version_info >= (3, 5):
+      self.skipTest("Skipped test for Python 3.5+")
 
     # Smaller than the threshold: no warning.
     c_sparse = ops.IndexedSlices(
diff --git a/tensorflow/python/ops/hidden_ops.txt b/tensorflow/python/ops/hidden_ops.txt
index f834d9002c3e14451bdf2de31cf3c1505e39be4b..f6ef6f3f3da4389a16a84fa0b3570d3cd1262472 100644
--- a/tensorflow/python/ops/hidden_ops.txt
+++ b/tensorflow/python/ops/hidden_ops.txt
@@ -8,6 +8,7 @@ ConcatV2
 ConjugateTranspose
 Const
 DebugGradientIdentity
+DebugGradientRefIdentity
 EditDistance
 ExpandDims
 ListDiff
@@ -21,6 +22,7 @@ ParallelConcat
 Placeholder
 RefIdentity
 Reverse
+Snapshot
 SpaceToBatch
 Split
 SplitV
@@ -28,6 +30,8 @@ Squeeze
 Slice
 TileGrad  # Exported through array_grad instead of array_ops.
 ZerosLike  # TODO(josh11b): Use this instead of the Python version.
+Unique
+UniqueV2
 Unpack
 
 # candidate_sampling_ops
@@ -224,6 +228,7 @@ BatchSelfAdjointEigV2
 BatchSvd
 LogMatrixDeterminant
 MatrixExponential
+MatrixLogarithm
 MatrixSolveLs
 SelfAdjointEig
 SelfAdjointEigV2
@@ -341,6 +346,7 @@ TruncatedNormal
 # script_ops
 PyFunc
 PyFuncStateless
+EagerPyFunc
 
 # sdca_ops
 
@@ -354,8 +360,8 @@ DestroyTemporaryVariable
 AddSparseToTensorsMap
 AddManySparseToTensorsMap
 TakeManySparseFromTensorsMap
-DeserializeSparse
 DeserializeManySparse
+DeserializeSparse
 SerializeManySparse
 SerializeSparse
 SparseAdd
diff --git a/tensorflow/python/ops/histogram_ops.py b/tensorflow/python/ops/histogram_ops.py
index 51e4be9343abc6ad68786e05e9cdf87ea48e3d00..4313b79b5b3e6045a5102c6ac29a2c3291e1b0aa 100644
--- a/tensorflow/python/ops/histogram_ops.py
+++ b/tensorflow/python/ops/histogram_ops.py
@@ -17,6 +17,7 @@
 
 Please see @{$python/histogram_ops} guide.
 
+@@histogram_fixed_width_bins
 @@histogram_fixed_width
 """
 
@@ -32,6 +33,70 @@ from tensorflow.python.ops import gen_math_ops
 from tensorflow.python.ops import math_ops
 
 
+def histogram_fixed_width_bins(values,
+                               value_range,
+                               nbins=100,
+                               dtype=dtypes.int32,
+                               name=None):
+  """Bins the given values for use in a histogram.
+
+  Given the tensor `values`, this operation returns a rank 1 `Tensor`
+  representing the indices of a histogram into which each element
+  of `values` would be binned. The bins are equal width and
+  determined by the arguments `value_range` and `nbins`.
+
+  Args:
+    values:  Numeric `Tensor`.
+    value_range:  Shape [2] `Tensor` of same `dtype` as `values`.
+      values <= value_range[0] will be mapped to hist[0],
+      values >= value_range[1] will be mapped to hist[-1].
+    nbins:  Scalar `int32 Tensor`.  Number of histogram bins.
+    dtype:  dtype for returned histogram.
+    name:  A name for this operation (defaults to 'histogram_fixed_width').
+
+  Returns:
+    A `Tensor` holding the indices of the binned values whose shape matches
+    `values`. 
+
+  Examples:
+
+  ```python
+  # Bins will be:  (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
+  nbins = 5
+  value_range = [0.0, 5.0]
+  new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]
+
+  with tf.get_default_session() as sess:
+    indices = tf.histogram_fixed_width_bins(new_values, value_range, nbins=5)
+    variables.global_variables_initializer().run()
+    sess.run(indices) => [0, 0, 1, 2, 4]
+  ```
+  """
+  with ops.name_scope(name, 'histogram_fixed_width_bins',
+                      [values, value_range, nbins]) as scope:
+    values = ops.convert_to_tensor(values, name='values')
+    shape = array_ops.shape(values)
+
+    values = array_ops.reshape(values, [-1])
+    value_range = ops.convert_to_tensor(value_range, name='value_range')
+    nbins = ops.convert_to_tensor(nbins, dtype=dtypes.int32, name='nbins')
+    nbins_float = math_ops.cast(nbins, values.dtype)
+
+    # Map tensor values that fall within value_range to [0, 1].
+    scaled_values = math_ops.truediv(values - value_range[0],
+                                     value_range[1] - value_range[0],
+                                     name='scaled_values')
+
+    # map tensor values within the open interval value_range to {0,.., nbins-1},
+    # values outside the open interval will be zero or less, or nbins or more.
+    indices = math_ops.floor(nbins_float * scaled_values, name='indices')
+
+    # Clip edge cases (e.g. value = value_range[1]) or "outliers."
+    indices = math_ops.cast(
+        clip_ops.clip_by_value(indices, 0, nbins_float - 1), dtypes.int32)
+    return array_ops.reshape(indices, shape)
+
+
 def histogram_fixed_width(values,
                           value_range,
                           nbins=100,
diff --git a/tensorflow/python/ops/histogram_ops_test.py b/tensorflow/python/ops/histogram_ops_test.py
index 19ad6cd2ba2b8278656a33a331995336037db356..80ee09057581db7298562fc22b443f5ddee73ef8 100644
--- a/tensorflow/python/ops/histogram_ops_test.py
+++ b/tensorflow/python/ops/histogram_ops_test.py
@@ -21,11 +21,64 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import constant_op
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import histogram_ops
 from tensorflow.python.platform import test
 
 
+class BinValuesFixedWidth(test.TestCase):
+
+  def test_empty_input_gives_all_zero_counts(self):
+    # Bins will be:
+    #   (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
+    value_range = [0.0, 5.0]
+    values = []
+    expected_bins = []
+    with self.test_session():
+      bins = histogram_ops.histogram_fixed_width_bins(values, value_range, nbins=5)
+      self.assertEqual(dtypes.int32, bins.dtype)
+      self.assertAllClose(expected_bins, bins.eval())
+
+  def test_1d_values_int32_output(self):
+    # Bins will be:
+    #   (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
+    value_range = [0.0, 5.0]
+    values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]
+    expected_bins = [0, 0, 1, 2, 4, 4]
+    with self.test_session():
+      bins = histogram_ops.histogram_fixed_width_bins(
+          values, value_range, nbins=5, dtype=dtypes.int64)
+      self.assertEqual(dtypes.int32, bins.dtype)
+      self.assertAllClose(expected_bins, bins.eval())
+
+  def test_1d_float64_values_int32_output(self):
+    # Bins will be:
+    #   (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
+    value_range = np.float64([0.0, 5.0])
+    values = np.float64([-1.0, 0.0, 1.5, 2.0, 5.0, 15])
+    expected_bins = [0, 0, 1, 2, 4, 4]
+    with self.test_session():
+      bins = histogram_ops.histogram_fixed_width_bins(
+          values, value_range, nbins=5)
+      self.assertEqual(dtypes.int32, bins.dtype)
+      self.assertAllClose(expected_bins, bins.eval())
+
+  def test_2d_values(self):
+    # Bins will be:
+    #   (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
+    value_range = [0.0, 5.0]
+    values = constant_op.constant(
+      [[-1.0, 0.0, 1.5], [2.0, 5.0, 15]],
+      shape=(2, 3))
+    expected_bins = [[0, 0, 1], [2, 4, 4]]
+    with self.test_session():
+      bins = histogram_ops.histogram_fixed_width_bins(
+          values, value_range, nbins=5)
+      self.assertEqual(dtypes.int32, bins.dtype)
+      self.assertAllClose(expected_bins, bins.eval())
+
+
 class HistogramFixedWidthTest(test.TestCase):
 
   def setUp(self):
diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index b9c89d62d556633100770d75cce89c63eeeb19ad..9f09d0a4d1ff4eed9647b6c74db0b1803df0ad70 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -219,15 +219,17 @@ def random_flip_up_down(image, seed=None):
   Raises:
     ValueError: if the shape of `image` not supported.
   """
-  image = ops.convert_to_tensor(image, name='image')
-  image = control_flow_ops.with_dependencies(
-      _Check3DImage(image, require_static=False), image)
-  uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed)
-  mirror_cond = math_ops.less(uniform_random, .5)
-  result = control_flow_ops.cond(mirror_cond,
-                                 lambda: array_ops.reverse(image, [0]),
-                                 lambda: image)
-  return fix_image_flip_shape(image, result)
+  with ops.name_scope(None, 'random_flip_up_down', [image]) as scope:
+    image = ops.convert_to_tensor(image, name='image')
+    image = control_flow_ops.with_dependencies(
+        _Check3DImage(image, require_static=False), image)
+    uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed)
+    mirror_cond = math_ops.less(uniform_random, .5)
+    result = control_flow_ops.cond(mirror_cond,
+                                   lambda: array_ops.reverse(image, [0]),
+                                   lambda: image,
+                                   name=scope)
+    return fix_image_flip_shape(image, result)
 
 
 def random_flip_left_right(image, seed=None):
@@ -248,15 +250,17 @@ def random_flip_left_right(image, seed=None):
   Raises:
     ValueError: if the shape of `image` not supported.
   """
-  image = ops.convert_to_tensor(image, name='image')
-  image = control_flow_ops.with_dependencies(
-      _Check3DImage(image, require_static=False), image)
-  uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed)
-  mirror_cond = math_ops.less(uniform_random, .5)
-  result = control_flow_ops.cond(mirror_cond,
-                                 lambda: array_ops.reverse(image, [1]),
-                                 lambda: image)
-  return fix_image_flip_shape(image, result)
+  with ops.name_scope(None, 'random_flip_left_right', [image]) as scope:
+    image = ops.convert_to_tensor(image, name='image')
+    image = control_flow_ops.with_dependencies(
+        _Check3DImage(image, require_static=False), image)
+    uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed)
+    mirror_cond = math_ops.less(uniform_random, .5)
+    result = control_flow_ops.cond(mirror_cond,
+                                   lambda: array_ops.reverse(image, [1]),
+                                   lambda: image,
+                                   name=scope)
+    return fix_image_flip_shape(image, result)
 
 
 def flip_left_right(image):
@@ -276,10 +280,12 @@ def flip_left_right(image):
   Raises:
     ValueError: if the shape of `image` not supported.
   """
-  image = ops.convert_to_tensor(image, name='image')
-  image = control_flow_ops.with_dependencies(
-      _Check3DImage(image, require_static=False), image)
-  return fix_image_flip_shape(image, array_ops.reverse(image, [1]))
+  with ops.name_scope(None, 'flip_left_right', [image]) as scope:
+    image = ops.convert_to_tensor(image, name='image')
+    image = control_flow_ops.with_dependencies(
+        _Check3DImage(image, require_static=False), image)
+    return fix_image_flip_shape(image,
+                                array_ops.reverse(image, [1], name=scope))
 
 
 def flip_up_down(image):
@@ -299,10 +305,12 @@ def flip_up_down(image):
   Raises:
     ValueError: if the shape of `image` not supported.
   """
-  image = ops.convert_to_tensor(image, name='image')
-  image = control_flow_ops.with_dependencies(
-      _Check3DImage(image, require_static=False), image)
-  return fix_image_flip_shape(image, array_ops.reverse(image, [0]))
+  with ops.name_scope(None, 'flip_up_down', [image]) as scope:
+    image = ops.convert_to_tensor(image, name='image')
+    image = control_flow_ops.with_dependencies(
+        _Check3DImage(image, require_static=False), image)
+    return fix_image_flip_shape(image,
+                                array_ops.reverse(image, [0], name=scope))
 
 
 def rot90(image, k=1, name=None):
@@ -356,10 +364,11 @@ def transpose_image(image):
   Raises:
     ValueError: if the shape of `image` not supported.
   """
-  image = ops.convert_to_tensor(image, name='image')
-  image = control_flow_ops.with_dependencies(
-      _Check3DImage(image, require_static=False), image)
-  return array_ops.transpose(image, [1, 0, 2], name='transpose_image')
+  with ops.name_scope(None, 'transpose_image', [image]) as scope:
+    image = ops.convert_to_tensor(image, name='image')
+    image = control_flow_ops.with_dependencies(
+        _Check3DImage(image, require_static=False), image)
+    return array_ops.transpose(image, [1, 0, 2], name=scope)
 
 
 def central_crop(image, central_fraction):
@@ -386,32 +395,33 @@ def central_crop(image, central_fraction):
   Returns:
     3-D float Tensor
   """
-  image = ops.convert_to_tensor(image, name='image')
-  if central_fraction <= 0.0 or central_fraction > 1.0:
-    raise ValueError('central_fraction must be within (0, 1]')
-  if central_fraction == 1.0:
-    return image
+  with ops.name_scope(None, 'central_crop', [image]):
+    image = ops.convert_to_tensor(image, name='image')
+    if central_fraction <= 0.0 or central_fraction > 1.0:
+      raise ValueError('central_fraction must be within (0, 1]')
+    if central_fraction == 1.0:
+      return image
 
-  image = control_flow_ops.with_dependencies(
-      _Check3DImage(image, require_static=False), image)
+    image = control_flow_ops.with_dependencies(
+        _Check3DImage(image, require_static=False), image)
 
-  img_shape = array_ops.shape(image)
-  depth = image.get_shape()[2]
-  img_h = math_ops.to_double(img_shape[0])
-  img_w = math_ops.to_double(img_shape[1])
-  bbox_h_start = math_ops.to_int32((img_h - img_h * central_fraction) / 2)
-  bbox_w_start = math_ops.to_int32((img_w - img_w * central_fraction) / 2)
+    img_shape = array_ops.shape(image)
+    depth = image.get_shape()[2]
+    img_h = math_ops.to_double(img_shape[0])
+    img_w = math_ops.to_double(img_shape[1])
+    bbox_h_start = math_ops.to_int32((img_h - img_h * central_fraction) / 2)
+    bbox_w_start = math_ops.to_int32((img_w - img_w * central_fraction) / 2)
 
-  bbox_h_size = img_shape[0] - bbox_h_start * 2
-  bbox_w_size = img_shape[1] - bbox_w_start * 2
+    bbox_h_size = img_shape[0] - bbox_h_start * 2
+    bbox_w_size = img_shape[1] - bbox_w_start * 2
 
-  bbox_begin = array_ops.stack([bbox_h_start, bbox_w_start, 0])
-  bbox_size = array_ops.stack([bbox_h_size, bbox_w_size, -1])
-  image = array_ops.slice(image, bbox_begin, bbox_size)
+    bbox_begin = array_ops.stack([bbox_h_start, bbox_w_start, 0])
+    bbox_size = array_ops.stack([bbox_h_size, bbox_w_size, -1])
+    image = array_ops.slice(image, bbox_begin, bbox_size)
 
-  # The first two dimensions are dynamic and unknown.
-  image.set_shape([None, None, depth])
-  return image
+    # The first two dimensions are dynamic and unknown.
+    image.set_shape([None, None, depth])
+    return image
 
 
 def pad_to_bounding_box(image, offset_height, offset_width, target_height,
@@ -444,53 +454,54 @@ def pad_to_bounding_box(image, offset_height, offset_width, target_height,
       `target_*` arguments, or either `offset_height` or `offset_width` is
       negative.
   """
-  image = ops.convert_to_tensor(image, name='image')
+  with ops.name_scope(None, 'pad_to_bounding_box', [image]):
+    image = ops.convert_to_tensor(image, name='image')
 
-  is_batch = True
-  image_shape = image.get_shape()
-  if image_shape.ndims == 3:
-    is_batch = False
-    image = array_ops.expand_dims(image, 0)
-  elif image_shape.ndims is None:
-    is_batch = False
-    image = array_ops.expand_dims(image, 0)
-    image.set_shape([None] * 4)
-  elif image_shape.ndims != 4:
-    raise ValueError('\'image\' must have either 3 or 4 dimensions.')
-
-  assert_ops = _CheckAtLeast3DImage(image, require_static=False)
-
-  batch, height, width, depth = _ImageDimensions(image, rank=4)
-
-  after_padding_width = target_width - offset_width - width
-  after_padding_height = target_height - offset_height - height
-
-  assert_ops += _assert(offset_height >= 0, ValueError,
-                        'offset_height must be >= 0')
-  assert_ops += _assert(offset_width >= 0, ValueError,
-                        'offset_width must be >= 0')
-  assert_ops += _assert(after_padding_width >= 0, ValueError,
-                        'width must be <= target - offset')
-  assert_ops += _assert(after_padding_height >= 0, ValueError,
-                        'height must be <= target - offset')
-  image = control_flow_ops.with_dependencies(assert_ops, image)
-
-  # Do not pad on the depth dimensions.
-  paddings = array_ops.reshape(
-      array_ops.stack([
-          0, 0, offset_height, after_padding_height, offset_width,
-          after_padding_width, 0, 0
-      ]), [4, 2])
-  padded = array_ops.pad(image, paddings)
-
-  padded_shape = [None if _is_tensor(i) else i
-                  for i in [batch, target_height, target_width, depth]]
-  padded.set_shape(padded_shape)
-
-  if not is_batch:
-    padded = array_ops.squeeze(padded, squeeze_dims=[0])
-
-  return padded
+    is_batch = True
+    image_shape = image.get_shape()
+    if image_shape.ndims == 3:
+      is_batch = False
+      image = array_ops.expand_dims(image, 0)
+    elif image_shape.ndims is None:
+      is_batch = False
+      image = array_ops.expand_dims(image, 0)
+      image.set_shape([None] * 4)
+    elif image_shape.ndims != 4:
+      raise ValueError('\'image\' must have either 3 or 4 dimensions.')
+
+    assert_ops = _CheckAtLeast3DImage(image, require_static=False)
+    batch, height, width, depth = _ImageDimensions(image, rank=4)
+
+    after_padding_width = target_width - offset_width - width
+
+    after_padding_height = target_height - offset_height - height
+
+    assert_ops += _assert(offset_height >= 0, ValueError,
+                          'offset_height must be >= 0')
+    assert_ops += _assert(offset_width >= 0, ValueError,
+                          'offset_width must be >= 0')
+    assert_ops += _assert(after_padding_width >= 0, ValueError,
+                          'width must be <= target - offset')
+    assert_ops += _assert(after_padding_height >= 0, ValueError,
+                          'height must be <= target - offset')
+    image = control_flow_ops.with_dependencies(assert_ops, image)
+
+    # Do not pad on the depth dimensions.
+    paddings = array_ops.reshape(
+        array_ops.stack([
+            0, 0, offset_height, after_padding_height, offset_width,
+            after_padding_width, 0, 0
+        ]), [4, 2])
+    padded = array_ops.pad(image, paddings)
+
+    padded_shape = [None if _is_tensor(i) else i
+                    for i in [batch, target_height, target_width, depth]]
+    padded.set_shape(padded_shape)
+
+    if not is_batch:
+      padded = array_ops.squeeze(padded, squeeze_dims=[0])
+
+    return padded
 
 
 def crop_to_bounding_box(image, offset_height, offset_width, target_height,
@@ -523,51 +534,52 @@ def crop_to_bounding_box(image, offset_height, offset_width, target_height,
       `target_*` arguments, or either `offset_height` or `offset_width` is
       negative, or either `target_height` or `target_width` is not positive.
   """
-  image = ops.convert_to_tensor(image, name='image')
+  with ops.name_scope(None, 'crop_to_bounding_box', [image]):
+    image = ops.convert_to_tensor(image, name='image')
 
-  is_batch = True
-  image_shape = image.get_shape()
-  if image_shape.ndims == 3:
-    is_batch = False
-    image = array_ops.expand_dims(image, 0)
-  elif image_shape.ndims is None:
-    is_batch = False
-    image = array_ops.expand_dims(image, 0)
-    image.set_shape([None] * 4)
-  elif image_shape.ndims != 4:
-    raise ValueError('\'image\' must have either 3 or 4 dimensions.')
-
-  assert_ops = _CheckAtLeast3DImage(image, require_static=False)
-
-  batch, height, width, depth = _ImageDimensions(image, rank=4)
-
-  assert_ops += _assert(offset_width >= 0, ValueError,
-                        'offset_width must be >= 0.')
-  assert_ops += _assert(offset_height >= 0, ValueError,
-                        'offset_height must be >= 0.')
-  assert_ops += _assert(target_width > 0, ValueError,
-                        'target_width must be > 0.')
-  assert_ops += _assert(target_height > 0, ValueError,
-                        'target_height must be > 0.')
-  assert_ops += _assert(width >= (target_width + offset_width), ValueError,
-                        'width must be >= target + offset.')
-  assert_ops += _assert(height >= (target_height + offset_height), ValueError,
-                        'height must be >= target + offset.')
-  image = control_flow_ops.with_dependencies(assert_ops, image)
-
-  cropped = array_ops.slice(
-      image,
-      array_ops.stack([0, offset_height, offset_width, 0]),
-      array_ops.stack([-1, target_height, target_width, -1]))
-
-  cropped_shape = [None if _is_tensor(i) else i
-                   for i in [batch, target_height, target_width, depth]]
-  cropped.set_shape(cropped_shape)
-
-  if not is_batch:
-    cropped = array_ops.squeeze(cropped, squeeze_dims=[0])
-
-  return cropped
+    is_batch = True
+    image_shape = image.get_shape()
+    if image_shape.ndims == 3:
+      is_batch = False
+      image = array_ops.expand_dims(image, 0)
+    elif image_shape.ndims is None:
+      is_batch = False
+      image = array_ops.expand_dims(image, 0)
+      image.set_shape([None] * 4)
+    elif image_shape.ndims != 4:
+      raise ValueError('\'image\' must have either 3 or 4 dimensions.')
+
+    assert_ops = _CheckAtLeast3DImage(image, require_static=False)
+
+    batch, height, width, depth = _ImageDimensions(image, rank=4)
+
+    assert_ops += _assert(offset_width >= 0, ValueError,
+                          'offset_width must be >= 0.')
+    assert_ops += _assert(offset_height >= 0, ValueError,
+                          'offset_height must be >= 0.')
+    assert_ops += _assert(target_width > 0, ValueError,
+                          'target_width must be > 0.')
+    assert_ops += _assert(target_height > 0, ValueError,
+                          'target_height must be > 0.')
+    assert_ops += _assert(width >= (target_width + offset_width), ValueError,
+                          'width must be >= target + offset.')
+    assert_ops += _assert(height >= (target_height + offset_height), ValueError,
+                          'height must be >= target + offset.')
+    image = control_flow_ops.with_dependencies(assert_ops, image)
+
+    cropped = array_ops.slice(
+        image,
+        array_ops.stack([0, offset_height, offset_width, 0]),
+        array_ops.stack([-1, target_height, target_width, -1]))
+
+    cropped_shape = [None if _is_tensor(i) else i
+                     for i in [batch, target_height, target_width, depth]]
+    cropped.set_shape(cropped_shape)
+
+    if not is_batch:
+      cropped = array_ops.squeeze(cropped, squeeze_dims=[0])
+
+    return cropped
 
 
 def resize_image_with_crop_or_pad(image, target_height, target_width):
@@ -598,88 +610,90 @@ def resize_image_with_crop_or_pad(image, target_height, target_width):
     If `images` was 3-D, a 3-D float Tensor of shape
     `[new_height, new_width, channels]`.
   """
-  image = ops.convert_to_tensor(image, name='image')
-  image_shape = image.get_shape()
-  is_batch = True
-  if image_shape.ndims == 3:
-    is_batch = False
-    image = array_ops.expand_dims(image, 0)
-  elif image_shape.ndims is None:
-    is_batch = False
-    image = array_ops.expand_dims(image, 0)
-    image.set_shape([None] * 4)
-  elif image_shape.ndims != 4:
-    raise ValueError('\'image\' must have either 3 or 4 dimensions.')
-
-  assert_ops = _CheckAtLeast3DImage(image, require_static=False)
-  assert_ops += _assert(target_width > 0, ValueError,
-                        'target_width must be > 0.')
-  assert_ops += _assert(target_height > 0, ValueError,
-                        'target_height must be > 0.')
-
-  image = control_flow_ops.with_dependencies(assert_ops, image)
-  # `crop_to_bounding_box` and `pad_to_bounding_box` have their own checks.
-  # Make sure our checks come first, so that error messages are clearer.
-  if _is_tensor(target_height):
-    target_height = control_flow_ops.with_dependencies(
-        assert_ops, target_height)
-  if _is_tensor(target_width):
-    target_width = control_flow_ops.with_dependencies(assert_ops, target_width)
-
-  def max_(x, y):
-    if _is_tensor(x) or _is_tensor(y):
-      return math_ops.maximum(x, y)
-    else:
-      return max(x, y)
+  with ops.name_scope(None, 'resize_image_with_crop_or_pad', [image]):
+    image = ops.convert_to_tensor(image, name='image')
+    image_shape = image.get_shape()
+    is_batch = True
+    if image_shape.ndims == 3:
+      is_batch = False
+      image = array_ops.expand_dims(image, 0)
+    elif image_shape.ndims is None:
+      is_batch = False
+      image = array_ops.expand_dims(image, 0)
+      image.set_shape([None] * 4)
+    elif image_shape.ndims != 4:
+      raise ValueError('\'image\' must have either 3 or 4 dimensions.')
+
+    assert_ops = _CheckAtLeast3DImage(image, require_static=False)
+    assert_ops += _assert(target_width > 0, ValueError,
+                          'target_width must be > 0.')
+    assert_ops += _assert(target_height > 0, ValueError,
+                          'target_height must be > 0.')
+
+    image = control_flow_ops.with_dependencies(assert_ops, image)
+    # `crop_to_bounding_box` and `pad_to_bounding_box` have their own checks.
+    # Make sure our checks come first, so that error messages are clearer.
+    if _is_tensor(target_height):
+      target_height = control_flow_ops.with_dependencies(
+          assert_ops, target_height)
+    if _is_tensor(target_width):
+      target_width = control_flow_ops.with_dependencies(
+          assert_ops, target_width)
+
+    def max_(x, y):
+      if _is_tensor(x) or _is_tensor(y):
+        return math_ops.maximum(x, y)
+      else:
+        return max(x, y)
 
-  def min_(x, y):
-    if _is_tensor(x) or _is_tensor(y):
-      return math_ops.minimum(x, y)
-    else:
-      return min(x, y)
+    def min_(x, y):
+      if _is_tensor(x) or _is_tensor(y):
+        return math_ops.minimum(x, y)
+      else:
+        return min(x, y)
 
-  def equal_(x, y):
-    if _is_tensor(x) or _is_tensor(y):
-      return math_ops.equal(x, y)
-    else:
-      return x == y
+    def equal_(x, y):
+      if _is_tensor(x) or _is_tensor(y):
+        return math_ops.equal(x, y)
+      else:
+        return x == y
 
-  _, height, width, _ = _ImageDimensions(image, rank=4)
-  width_diff = target_width - width
-  offset_crop_width = max_(-width_diff // 2, 0)
-  offset_pad_width = max_(width_diff // 2, 0)
+    _, height, width, _ = _ImageDimensions(image, rank=4)
+    width_diff = target_width - width
+    offset_crop_width = max_(-width_diff // 2, 0)
+    offset_pad_width = max_(width_diff // 2, 0)
 
-  height_diff = target_height - height
-  offset_crop_height = max_(-height_diff // 2, 0)
-  offset_pad_height = max_(height_diff // 2, 0)
+    height_diff = target_height - height
+    offset_crop_height = max_(-height_diff // 2, 0)
+    offset_pad_height = max_(height_diff // 2, 0)
 
-  # Maybe crop if needed.
-  cropped = crop_to_bounding_box(image, offset_crop_height, offset_crop_width,
-                                 min_(target_height, height),
-                                 min_(target_width, width))
+    # Maybe crop if needed.
+    cropped = crop_to_bounding_box(image, offset_crop_height, offset_crop_width,
+                                   min_(target_height, height),
+                                   min_(target_width, width))
 
-  # Maybe pad if needed.
-  resized = pad_to_bounding_box(cropped, offset_pad_height, offset_pad_width,
-                                target_height, target_width)
+    # Maybe pad if needed.
+    resized = pad_to_bounding_box(cropped, offset_pad_height, offset_pad_width,
+                                  target_height, target_width)
 
-  # In theory all the checks below are redundant.
-  if resized.get_shape().ndims is None:
-    raise ValueError('resized contains no shape.')
+    # In theory all the checks below are redundant.
+    if resized.get_shape().ndims is None:
+      raise ValueError('resized contains no shape.')
 
-  _, resized_height, resized_width, _ = _ImageDimensions(resized, rank=4)
+    _, resized_height, resized_width, _ = _ImageDimensions(resized, rank=4)
 
-  assert_ops = []
-  assert_ops += _assert(equal_(resized_height, target_height), ValueError,
-                        'resized height is not correct.')
-  assert_ops += _assert(equal_(resized_width, target_width), ValueError,
-                        'resized width is not correct.')
+    assert_ops = []
+    assert_ops += _assert(equal_(resized_height, target_height), ValueError,
+                          'resized height is not correct.')
+    assert_ops += _assert(equal_(resized_width, target_width), ValueError,
+                          'resized width is not correct.')
 
-  resized = control_flow_ops.with_dependencies(assert_ops, resized)
+    resized = control_flow_ops.with_dependencies(assert_ops, resized)
 
-  if not is_batch:
-    resized = array_ops.squeeze(resized, squeeze_dims=[0])
+    if not is_batch:
+      resized = array_ops.squeeze(resized, squeeze_dims=[0])
 
-  return resized
+    return resized
 
 
 class ResizeMethod(object):
@@ -736,66 +750,68 @@ def resize_images(images,
     If `images` was 3-D, a 3-D float Tensor of shape
     `[new_height, new_width, channels]`.
   """
-  images = ops.convert_to_tensor(images, name='images')
-  if images.get_shape().ndims is None:
-    raise ValueError('\'images\' contains no shape.')
-  # TODO(shlens): Migrate this functionality to the underlying Op's.
-  is_batch = True
-  if images.get_shape().ndims == 3:
-    is_batch = False
-    images = array_ops.expand_dims(images, 0)
-  elif images.get_shape().ndims != 4:
-    raise ValueError('\'images\' must have either 3 or 4 dimensions.')
-
-  _, height, width, _ = images.get_shape().as_list()
+  with ops.name_scope(None, 'resize_images', [images, size]):
+    images = ops.convert_to_tensor(images, name='images')
+    if images.get_shape().ndims is None:
+      raise ValueError('\'images\' contains no shape.')
+    # TODO(shlens): Migrate this functionality to the underlying Op's.
+    is_batch = True
+    if images.get_shape().ndims == 3:
+      is_batch = False
+      images = array_ops.expand_dims(images, 0)
+    elif images.get_shape().ndims != 4:
+      raise ValueError('\'images\' must have either 3 or 4 dimensions.')
+
+    _, height, width, _ = images.get_shape().as_list()
+
+    try:
+      size = ops.convert_to_tensor(size, dtypes.int32, name='size')
+    except (TypeError, ValueError):
+      raise ValueError('\'size\' must be a 1-D int32 Tensor')
+    if not size.get_shape().is_compatible_with([2]):
+      raise ValueError('\'size\' must be a 1-D Tensor of 2 elements: '
+                       'new_height, new_width')
+    size_const_as_shape = tensor_util.constant_value_as_shape(size)
+    new_height_const = size_const_as_shape[0].value
+    new_width_const = size_const_as_shape[1].value
+
+    # If we can determine that the height and width will be unmodified by this
+    # transformation, we avoid performing the resize.
+    if all(x is not None
+           for x in [new_width_const, width, new_height_const, height]) and (
+               width == new_width_const and height == new_height_const):
+      if not is_batch:
+        images = array_ops.squeeze(images, squeeze_dims=[0])
+      return images
+
+    if method == ResizeMethod.BILINEAR:
+      images = gen_image_ops.resize_bilinear(images,
+                                             size,
+                                             align_corners=align_corners)
+    elif method == ResizeMethod.NEAREST_NEIGHBOR:
+      images = gen_image_ops.resize_nearest_neighbor(images,
+                                                     size,
+                                                     align_corners=
+                                                     align_corners)
+    elif method == ResizeMethod.BICUBIC:
+      images = gen_image_ops.resize_bicubic(images,
+                                            size,
+                                            align_corners=align_corners)
+    elif method == ResizeMethod.AREA:
+      images = gen_image_ops.resize_area(images,
+                                         size,
+                                         align_corners=align_corners)
+    else:
+      raise ValueError('Resize method is not implemented.')
+
+    # NOTE(mrry): The shape functions for the resize ops cannot unpack
+    # the packed values in `new_size`, so set the shape here.
+    images.set_shape([None, new_height_const, new_width_const, None])
 
-  try:
-    size = ops.convert_to_tensor(size, dtypes.int32, name='size')
-  except (TypeError, ValueError):
-    raise ValueError('\'size\' must be a 1-D int32 Tensor')
-  if not size.get_shape().is_compatible_with([2]):
-    raise ValueError('\'size\' must be a 1-D Tensor of 2 elements: '
-                     'new_height, new_width')
-  size_const_as_shape = tensor_util.constant_value_as_shape(size)
-  new_height_const = size_const_as_shape[0].value
-  new_width_const = size_const_as_shape[1].value
-
-  # If we can determine that the height and width will be unmodified by this
-  # transformation, we avoid performing the resize.
-  if all(x is not None
-         for x in [new_width_const, width, new_height_const, height]) and (
-             width == new_width_const and height == new_height_const):
     if not is_batch:
       images = array_ops.squeeze(images, squeeze_dims=[0])
     return images
 
-  if method == ResizeMethod.BILINEAR:
-    images = gen_image_ops.resize_bilinear(images,
-                                           size,
-                                           align_corners=align_corners)
-  elif method == ResizeMethod.NEAREST_NEIGHBOR:
-    images = gen_image_ops.resize_nearest_neighbor(images,
-                                                   size,
-                                                   align_corners=align_corners)
-  elif method == ResizeMethod.BICUBIC:
-    images = gen_image_ops.resize_bicubic(images,
-                                          size,
-                                          align_corners=align_corners)
-  elif method == ResizeMethod.AREA:
-    images = gen_image_ops.resize_area(images,
-                                       size,
-                                       align_corners=align_corners)
-  else:
-    raise ValueError('Resize method is not implemented.')
-
-  # NOTE(mrry): The shape functions for the resize ops cannot unpack
-  # the packed values in `new_size`, so set the shape here.
-  images.set_shape([None, new_height_const, new_width_const, None])
-
-  if not is_batch:
-    images = array_ops.squeeze(images, squeeze_dims=[0])
-  return images
-
 
 def per_image_standardization(image):
   """Linearly scales `image` to have zero mean and unit norm.
@@ -816,27 +832,28 @@ def per_image_standardization(image):
   Raises:
     ValueError: if the shape of 'image' is incompatible with this function.
   """
-  image = ops.convert_to_tensor(image, name='image')
-  image = control_flow_ops.with_dependencies(
-      _Check3DImage(image, require_static=False), image)
-  num_pixels = math_ops.reduce_prod(array_ops.shape(image))
+  with ops.name_scope(None, 'per_image_standardization', [image]) as scope:
+    image = ops.convert_to_tensor(image, name='image')
+    image = control_flow_ops.with_dependencies(
+        _Check3DImage(image, require_static=False), image)
+    num_pixels = math_ops.reduce_prod(array_ops.shape(image))
 
-  image = math_ops.cast(image, dtype=dtypes.float32)
-  image_mean = math_ops.reduce_mean(image)
+    image = math_ops.cast(image, dtype=dtypes.float32)
+    image_mean = math_ops.reduce_mean(image)
 
-  variance = (math_ops.reduce_mean(math_ops.square(image)) -
-              math_ops.square(image_mean))
-  variance = gen_nn_ops.relu(variance)
-  stddev = math_ops.sqrt(variance)
+    variance = (math_ops.reduce_mean(math_ops.square(image)) -
+                math_ops.square(image_mean))
+    variance = gen_nn_ops.relu(variance)
+    stddev = math_ops.sqrt(variance)
 
-  # Apply a minimum normalization that protects us against uniform images.
-  min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32))
-  pixel_value_scale = math_ops.maximum(stddev, min_stddev)
-  pixel_value_offset = image_mean
+    # Apply a minimum normalization that protects us against uniform images.
+    min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32))
+    pixel_value_scale = math_ops.maximum(stddev, min_stddev)
+    pixel_value_offset = image_mean
 
-  image = math_ops.subtract(image, pixel_value_offset)
-  image = math_ops.div(image, pixel_value_scale)
-  return image
+    image = math_ops.subtract(image, pixel_value_offset)
+    image = math_ops.div(image, pixel_value_scale, name=scope)
+    return image
 
 
 def random_brightness(image, max_delta, seed=None):
@@ -980,8 +997,8 @@ def adjust_gamma(image, gamma=1, gain=1):
 
   Args:
     image : A Tensor.
-    gamma : A scalar. Non negative real number.
-    gain  : A scalar. The constant multiplier.
+    gamma : A scalar or tensor. Non negative real number.
+    gain  : A scalar or tensor. The constant multiplier.
 
   Returns:
     A Tensor. Gamma corrected output image.
@@ -1000,17 +1017,20 @@ def adjust_gamma(image, gamma=1, gain=1):
   """
 
   with ops.op_scope([image, gamma, gain], None, 'adjust_gamma'):
-    # Convert pixel value to DT_FLOAT for computing adjusted image
+    # Convert pixel value to DT_FLOAT for computing adjusted image.
     img = ops.convert_to_tensor(image, name='img', dtype=dtypes.float32)
-    # Keep image dtype for computing the scale of corresponding dtype
+    # Keep image dtype for computing the scale of corresponding dtype.
     image = ops.convert_to_tensor(image, name='image')
 
-    if gamma < 0:
-      raise ValueError('Gamma should be a non-negative real number')
-    # scale = max(dtype) - min(dtype)
+    assert_op = _assert(gamma >= 0, ValueError,
+                        'Gamma should be a non-negative real number.')
+    if assert_op:
+      gamma = control_flow_ops.with_dependencies(assert_op, gamma)
+   
+    # scale = max(dtype) - min(dtype).
     scale = constant_op.constant(image.dtype.limits[1] - image.dtype.limits[0],
                                  dtype=dtypes.float32)
-    # According to the definition of gamma correction
+    # According to the definition of gamma correction.
     adjusted_img = (img / scale) ** gamma * scale * gain
 
     return adjusted_img
@@ -1118,10 +1138,8 @@ def rgb_to_grayscale(images, name=None):
     # Reference for converting between RGB and grayscale.
     # https://en.wikipedia.org/wiki/Luma_%28video%29
     rgb_weights = [0.2989, 0.5870, 0.1140]
-    rank_1 = array_ops.expand_dims(array_ops.rank(images) - 1, 0)
-    gray_float = math_ops.reduce_sum(
-        flt_image * rgb_weights, rank_1, keepdims=True)
-    gray_float.set_shape(images.get_shape()[:-1].concatenate([1]))
+    gray_float = math_ops.tensordot(flt_image, rgb_weights, [-1, -1])
+    gray_float = array_ops.expand_dims(gray_float, -1)
     return convert_image_dtype(gray_float, orig_dtype, name=name)
 
 
@@ -1168,7 +1186,7 @@ def random_hue(image, max_delta, seed=None):
       set_random_seed for its interaction with the graph-level random seed.
 
   Returns:
-    3-D float tensor of shape `[height, width, channels]`.
+    Adjusted image(s), same shape and DType as `image`.
 
   Raises:
     ValueError: if `max_delta` is invalid.
@@ -1275,30 +1293,9 @@ def adjust_saturation(image, saturation_factor, name=None):
     orig_dtype = image.dtype
     flt_image = convert_image_dtype(image, dtypes.float32)
 
-    # TODO(zhengxq): we will switch to the fused version after we add a GPU
-    # kernel for that.
-    fused = os.environ.get('TF_ADJUST_SATURATION_FUSED', '')
-    fused = fused.lower() in ('true', 't', '1')
-
-    if fused:
-      return convert_image_dtype(
-          gen_image_ops.adjust_saturation(flt_image, saturation_factor),
-          orig_dtype)
-
-    hsv = gen_image_ops.rgb_to_hsv(flt_image)
-
-    hue = array_ops.slice(hsv, [0, 0, 0], [-1, -1, 1])
-    saturation = array_ops.slice(hsv, [0, 0, 1], [-1, -1, 1])
-    value = array_ops.slice(hsv, [0, 0, 2], [-1, -1, 1])
-
-    saturation *= saturation_factor
-    saturation = clip_ops.clip_by_value(saturation, 0.0, 1.0)
-
-    hsv_altered = array_ops.concat([hue, saturation, value], 2)
-    rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered)
-
-    return convert_image_dtype(rgb_altered, orig_dtype)
-
+    return convert_image_dtype(
+        gen_image_ops.adjust_saturation(flt_image, saturation_factor),
+        orig_dtype)
 
 def decode_image(contents, channels=None, name=None):
   """Convenience function for `decode_bmp`, `decode_gif`, `decode_jpeg`,
@@ -1499,7 +1496,7 @@ def sample_distorted_bounding_box(image_size, bounding_boxes, seed=None,
       # Draw the bounding box in an image summary.
       image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
                                                     bbox_for_draw)
-      tf.image_summary('images_with_box', image_with_box)
+      tf.summary.image('images_with_box', image_with_box)
 
       # Employ the bounding box to distort the image.
       distorted_image = tf.slice(image, begin, size)
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index d1554b399f3776933bf970f7b2ceb8db5865d844..3a49d41c9ea031126286a4b70861394d6907381f 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -189,6 +189,44 @@ class AdjustGamma(test_util.TensorFlowTestCase):
 
       self.assertAllClose(y_tf, y_np, 1e-6)
 
+  def test_adjust_gamma_less_zero(self):
+    """White image should be returned for gamma equal to zero"""
+    with self.test_session():
+      x_data = np.random.uniform(0, 255, (8, 8))
+      x_np = np.array(x_data, dtype=np.float32)
+    
+      x = constant_op.constant(x_np, shape=x_np.shape)
+
+      err_msg = 'Gamma should be a non-negative real number.'
+      
+      try:
+        image_ops.adjust_gamma(x, gamma=-1)
+      except Exception as e:
+        if err_msg not in str(e):
+          raise
+      else:
+        raise AssertionError("Exception not raised: %s" % err_msg)
+
+  def test_adjust_gamma_less_zero_tensor(self):
+    """White image should be returned for gamma equal to zero"""
+    with self.test_session():
+      x_data = np.random.uniform(0, 255, (8, 8))
+      x_np = np.array(x_data, dtype=np.float32)
+    
+      x = constant_op.constant(x_np, shape=x_np.shape)
+      y = constant_op.constant(-1.0, dtype=dtypes.float32)
+      
+      image = image_ops.adjust_gamma(x, gamma=y)
+      
+      err_msg = 'Gamma should be a non-negative real number.'
+      try:
+        image.eval()
+      except Exception as e:
+        if err_msg not in str(e):
+          raise
+      else:
+        raise AssertionError("Exception not raised: %s" % err_msg)
+      
   def test_adjust_gamma_zero(self):
     """White image should be returned for gamma equal to zero"""
     with self.test_session():
@@ -281,6 +319,21 @@ class AdjustHueTest(test_util.TensorFlowTestCase):
       y_tf = y.eval()
       self.assertAllEqual(y_tf, y_np)
 
+  def testBatchAdjustHue(self):
+    x_shape = [2, 1, 2, 3]
+    x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+    x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+
+    delta = 0.25
+    y_data = [13, 0, 11, 226, 54, 221, 234, 8, 92, 1, 217, 255]
+    y_np = np.array(y_data, dtype=np.uint8).reshape(x_shape)
+
+    with self.test_session(use_gpu=True):
+      x = constant_op.constant(x_np, shape=x_shape)
+      y = image_ops.adjust_hue(x, delta)
+      y_tf = y.eval()
+      self.assertAllEqual(y_tf, y_np)
+
   def _adjustHueNp(self, x_np, delta_h):
     self.assertEqual(x_np.shape[-1], 3)
     x_v = x_np.reshape([-1, 3])
@@ -359,6 +412,89 @@ class AdjustHueTest(test_util.TensorFlowTestCase):
       self._adjustHueTf(x_np, delta_h)
 
 
+class FlipImageBenchmark(test.Benchmark):
+
+  def _benchmarkFlipLeftRight(self, device, cpu_count):
+    image_shape = [299, 299, 3]
+    warmup_rounds = 100
+    benchmark_rounds = 1000
+    config = config_pb2.ConfigProto()
+    if cpu_count is not None:
+      config.inter_op_parallelism_threads = 1
+      config.intra_op_parallelism_threads = cpu_count
+    with session.Session("", graph=ops.Graph(), config=config) as sess:
+      with ops.device(device):
+        inputs = variables.Variable(
+            random_ops.random_uniform(
+                image_shape, dtype=dtypes.float32) * 255,
+            trainable=False,
+            dtype=dtypes.float32)
+        run_op = image_ops.flip_left_right(inputs)
+        sess.run(variables.global_variables_initializer())
+        for i in xrange(warmup_rounds + benchmark_rounds):
+          if i == warmup_rounds:
+            start = time.time()
+          sess.run(run_op)
+    end = time.time()
+    step_time = (end - start) / benchmark_rounds
+    tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all")
+    print("benchmarkFlipLeftRight_299_299_3_%s step_time: %.2f us" %
+          (tag, step_time * 1e6))
+    self.report_benchmark(
+        name="benchmarkFlipLeftRight_299_299_3_%s" % (tag),
+        iters=benchmark_rounds,
+        wall_time=step_time)
+
+  def _benchmarkRandomFlipLeftRight(self, device, cpu_count):
+    image_shape = [299, 299, 3]
+    warmup_rounds = 100
+    benchmark_rounds = 1000
+    config = config_pb2.ConfigProto()
+    if cpu_count is not None:
+      config.inter_op_parallelism_threads = 1
+      config.intra_op_parallelism_threads = cpu_count
+    with session.Session("", graph=ops.Graph(), config=config) as sess:
+      with ops.device(device):
+        inputs = variables.Variable(
+            random_ops.random_uniform(
+                image_shape, dtype=dtypes.float32) * 255,
+            trainable=False,
+            dtype=dtypes.float32)
+        run_op = image_ops.random_flip_left_right(inputs)
+        sess.run(variables.global_variables_initializer())
+        for i in xrange(warmup_rounds + benchmark_rounds):
+          if i == warmup_rounds:
+            start = time.time()
+          sess.run(run_op)
+    end = time.time()
+    step_time = (end - start) / benchmark_rounds
+    tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all")
+    print("benchmarkRandomFlipLeftRight_299_299_3_%s step_time: %.2f us" %
+          (tag, step_time * 1e6))
+    self.report_benchmark(
+        name="benchmarkRandomFlipLeftRight_299_299_3_%s" % (tag),
+        iters=benchmark_rounds,
+        wall_time=step_time)
+
+  def benchmarkFlipLeftRightCpu1(self):
+    self._benchmarkFlipLeftRight("/cpu:0", 1)
+
+  def benchmarkFlipLeftRightCpuAll(self):
+    self._benchmarkFlipLeftRight("/cpu:0", None)
+
+  def benchmarkFlipLeftRightGpu(self):
+    self._benchmarkFlipLeftRight(test.gpu_device_name(), None)
+
+  def benchmarkRandomFlipLeftRightCpu1(self):
+    self._benchmarkRandomFlipLeftRight("/cpu:0", 1)
+
+  def benchmarkRandomFlipLeftRightCpuAll(self):
+    self._benchmarkRandomFlipLeftRight("/cpu:0", None)
+
+  def benchmarkRandomFlipLeftRightGpu(self):
+    self._benchmarkRandomFlipLeftRight(test.gpu_device_name(), None)
+
+
 class AdjustHueBenchmark(test.Benchmark):
 
   def _benchmarkAdjustHue(self, device, cpu_count):
@@ -632,6 +768,21 @@ class AdjustSaturationTest(test_util.TensorFlowTestCase):
       y_tf = y.eval()
       self.assertAllEqual(y_tf, y_np)
 
+  def testBatchSaturation(self):
+    x_shape = [2, 1, 2, 3]
+    x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+    x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+
+    saturation_factor = 0.5
+    y_data = [6, 9, 13, 140, 180, 226, 135, 121, 234, 172, 255, 128]
+    y_np = np.array(y_data, dtype=np.uint8).reshape(x_shape)
+
+    with self.test_session(use_gpu=True):
+      x = constant_op.constant(x_np, shape=x_shape)
+      y = image_ops.adjust_saturation(x, saturation_factor)
+      y_tf = y.eval()
+      self.assertAllEqual(y_tf, y_np)
+
   def _adjust_saturation(self, image, saturation_factor):
     image = ops.convert_to_tensor(image, name="image")
     orig_dtype = image.dtype
@@ -744,6 +895,7 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
     with self.test_session(use_gpu=True):
       x_tf = constant_op.constant(x_np, shape=x_np.shape)
       y = image_ops.flip_left_right(x_tf)
+      self.assertTrue(y.op.name.startswith('flip_left_right'))
       y_tf = y.eval()
       self.assertAllEqual(y_tf, y_np)
 
@@ -754,6 +906,7 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
     with self.test_session(use_gpu=True):
       x_tf = constant_op.constant(x_np, shape=x_np.shape)
       y = image_ops.random_flip_left_right(x_tf)
+      self.assertTrue(y.op.name.startswith('random_flip_left_right'))
 
       count_flipped = 0
       count_unflipped = 0
@@ -784,6 +937,7 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
     with self.test_session(use_gpu=True):
       x_tf = constant_op.constant(x_np, shape=x_np.shape)
       y = image_ops.flip_up_down(x_tf)
+      self.assertTrue(y.op.name.startswith('flip_up_down'))
       y_tf = y.eval()
       self.assertAllEqual(y_tf, y_np)
 
@@ -794,6 +948,7 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
     with self.test_session(use_gpu=True):
       x_tf = constant_op.constant(x_np, shape=x_np.shape)
       y = image_ops.random_flip_up_down(x_tf)
+      self.assertTrue(y.op.name.startswith('random_flip_up_down'))
       count_flipped = 0
       count_unflipped = 0
       for _ in range(50):
@@ -823,6 +978,7 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
     with self.test_session(use_gpu=True):
       x_tf = constant_op.constant(x_np, shape=x_np.shape)
       y = image_ops.transpose_image(x_tf)
+      self.assertTrue(y.op.name.startswith('transpose_image'))
       y_tf = y.eval()
       self.assertAllEqual(y_tf, y_np)
 
@@ -1047,6 +1203,7 @@ class PerImageWhiteningTest(test_util.TensorFlowTestCase):
     with self.test_session(use_gpu=True):
       x = constant_op.constant(x_np, shape=x_shape)
       y = image_ops.per_image_standardization(x)
+      self.assertTrue(y.op.name.startswith('per_image_standardization'))
       y_tf = y.eval()
       self.assertAllClose(y_tf, y_np, atol=1e-4)
 
@@ -1228,6 +1385,11 @@ class CropToBoundingBoxTest(test_util.TensorFlowTestCase):
     for params, err_msg in test_config:
       self._assertRaises(x, x_shape, *params, err_msg=err_msg)
 
+  def testNameScope(self):
+    image = array_ops.placeholder(dtypes.float32, shape=[55, 66, 3])
+    y = image_ops.crop_to_bounding_box(image, 0, 0, 55, 66)
+    self.assertTrue(y.name.startswith('crop_to_bounding_box'))
+
 
 class CentralCropTest(test_util.TensorFlowTestCase):
 
@@ -1304,6 +1466,13 @@ class CentralCropTest(test_util.TensorFlowTestCase):
       with self.assertRaises(ValueError):
         _ = image_ops.central_crop(x, 1.01)
 
+  def testNameScope(self):
+    x_shape = [13, 9, 3]
+    x_np = np.ones(x_shape, dtype=np.float32)
+    with self.test_session(use_gpu=True):
+      y = image_ops.central_crop(x_np, 1.0)
+      self.assertTrue(y.op.name.startswith('central_crop'))
+
 
 class PadToBoundingBoxTest(test_util.TensorFlowTestCase):
 
@@ -1507,6 +1676,11 @@ class PadToBoundingBoxTest(test_util.TensorFlowTestCase):
     for config_item in test_config:
       self._assertRaises(x, x_shape, *config_item)
 
+  def testNameScope(self):
+    image = array_ops.placeholder(dtypes.float32, shape=[55, 66, 3])
+    y = image_ops.pad_to_bounding_box(image, 0, 0, 55, 66)
+    self.assertTrue(y.op.name.startswith('pad_to_bounding_box'))
+
 
 class SelectDistortedCropBoxTest(test_util.TensorFlowTestCase):
 
@@ -2111,6 +2285,13 @@ class ResizeImagesTest(test_util.TensorFlowTestCase):
     self._assertShapeInference([59, 60, None], [55, 66], [55, 66, None])
     self._assertShapeInference([None, None, None], [55, 66], [55, 66, None])
 
+  def testNameScope(self):
+    img_shape = [1, 3, 2, 1]
+    with self.test_session(use_gpu=True):
+      single_image = array_ops.placeholder(dtypes.float32, shape=[50, 60, 3])
+      y = image_ops.resize_images(single_image, [55, 66])
+      self.assertTrue(y.op.name.startswith('resize_images'))
+
 
 class ResizeImageWithCropOrPadTest(test_util.TensorFlowTestCase):
 
@@ -2386,6 +2567,11 @@ class ResizeImageWithCropOrPadTest(test_util.TensorFlowTestCase):
     self._assertRaises(x, x_shape, target_height, target_width,
                        "target_width must be > 0")
 
+  def testNameScope(self):
+    image = array_ops.placeholder(dtypes.float32, shape=[50, 60, 3])
+    y = image_ops.resize_image_with_crop_or_pad(image, 55, 66)
+    self.assertTrue(y.op.name.startswith('resize_image_with_crop_or_pad'))
+
 
 def _SimpleColorRamp():
   """Build a simple color ramp RGB image."""
diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py
index 9eea3c21f89791ab4629e30ca895ef75af32f920..5dc43d65b955613698efccd06f60f1c1b05842d6 100644
--- a/tensorflow/python/ops/init_ops.py
+++ b/tensorflow/python/ops/init_ops.py
@@ -34,6 +34,8 @@ from __future__ import print_function
 
 import math
 
+import numpy as np
+
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
@@ -128,14 +130,17 @@ class Constant(Initializer):
   tensor shape, the initializer will raise a `ValueError`.
 
   Args:
-    value: A Python scalar, list of values, or a N-dimensional numpy array. All
-      elements of the initialized variable will be set to the corresponding
-      value in the `value` argument.
+    value: A Python scalar, list or tuple of values, or a N-dimensional numpy
+      array. All elements of the initialized variable will be set to the
+      corresponding value in the `value` argument.
     dtype: The data type.
     verify_shape: Boolean that enables verification of the shape of `value`. If
       `True`, the initializer will throw an error if the shape of `value` is not
       compatible with the shape of the initialized tensor.
 
+  Raises:
+    TypeError: If the input `value` is not one of the expected types.
+
   Examples:
     The following example can be rewritten using a numpy.ndarray instead
     of the `value` list, even reshaped, as shown in the two commented lines
@@ -187,6 +192,11 @@ class Constant(Initializer):
   """
 
   def __init__(self, value=0, dtype=dtypes.float32, verify_shape=False):
+    if not (np.isscalar(value) or isinstance(value, (list, tuple, np.ndarray))):
+      raise TypeError(
+          "Invalid type for initial value: %s (expected Python scalar, list or "
+          "tuple of values, or numpy.ndarray)." % type(value))
+
     self.value = value
     self.dtype = dtypes.as_dtype(dtype)
     self._verify_shape = verify_shape
diff --git a/tensorflow/python/ops/linalg/linalg_impl.py b/tensorflow/python/ops/linalg/linalg_impl.py
index bf15f0e2e55385032b194c7718e175114e77dd7b..db33a08137e1d2508314c2d28bdbbb001198e6c1 100644
--- a/tensorflow/python/ops/linalg/linalg_impl.py
+++ b/tensorflow/python/ops/linalg/linalg_impl.py
@@ -41,6 +41,7 @@ einsum = special_math_ops.einsum
 expm = gen_linalg_ops._matrix_exponential
 eye = linalg_ops.eye
 inv = linalg_ops.matrix_inverse
+logm = gen_linalg_ops._matrix_logarithm
 lstsq = linalg_ops.matrix_solve_ls
 norm = linalg_ops.norm
 qr = linalg_ops.qr
diff --git a/tensorflow/python/ops/linalg_grad.py b/tensorflow/python/ops/linalg_grad.py
index 8a76fe3ce55bbdea1677f83fe075ed3bdc8d875d..13a32c83d99363e687f7e2365a91c8e453c81c7e 100644
--- a/tensorflow/python/ops/linalg_grad.py
+++ b/tensorflow/python/ops/linalg_grad.py
@@ -268,13 +268,13 @@ def _SelfAdjointEigV2Grad(op, grad_e, grad_v):
 
 @ops.RegisterGradient("Svd")
 def _SvdGrad(op, grad_s, grad_u, grad_v):
-  """Gradient for Svd based on Giles' algorithm. Reference at top of file."""
-
-  if op.get_attr("compute_uv") and not op.get_attr("full_matrices"):
-    raise NotImplementedError(
-        "SVD gradient is not implemented for compute_uv=True and "
-        "full_matrices=False.")
+  """Gradient for the singular value decomposition."""
 
+  # The derivation for the compute_uv=False case, and most of
+  # the derivation for the full_matrices=True case, are in
+  # Giles' paper (see reference at top of file).  A derivation for
+  # the full_matrices=False case is available at
+  # https://j-towns.github.io/papers/svd-derivative.pdf
   a = op.inputs[0]
   a_shape = a.get_shape().with_rank_at_least(2)
 
@@ -300,7 +300,7 @@ def _SvdGrad(op, grad_s, grad_u, grad_v):
         "SVD gradient has not been implemented for input with unknown "
         "inner matrix shape.")
 
-  if not op.get_attr("full_matrices") or not op.get_attr("compute_uv"):
+  if not op.get_attr("compute_uv"):
     s, u, v = linalg_ops.svd(a, compute_uv=True, full_matrices=True)
   else:
     s = op.outputs[0]
@@ -329,14 +329,10 @@ def _SvdGrad(op, grad_s, grad_u, grad_v):
       grad_a.set_shape(a_shape)
       return grad_a
 
-    # TODO(rmlarsen): Define a gradient that is numerically stable for
-    # abs(m-n) > 1. Currently this does not work because there are effectively
-    # multiple singular values with value zero. I am not sure if this is a true
-    # instability or if it simply throws off the finite difference gradient
-    # checker.
-    if abs(m - n) > 1:
+    if op.get_attr("full_matrices") and abs(m - n) > 1:
       raise NotImplementedError(
-          "svd gradient is not implemented for abs(m - n) > 1")
+          "svd gradient is not implemented for abs(m - n) > 1 "
+          "when full_matrices is True")
     s_mat = array_ops.matrix_diag(s)
     s2 = math_ops.square(s)
 
@@ -352,32 +348,45 @@ def _SvdGrad(op, grad_s, grad_u, grad_v):
             array_ops.expand_dims(s2, -2) - array_ops.expand_dims(s2, -1)),
         array_ops.zeros_like(s))
     s_inv_mat = array_ops.matrix_diag(math_ops.reciprocal(s))
+
+    v1 = v[..., :, :m]
+    grad_v1 = grad_v[..., :, :m]
+
     u_gu = math_ops.matmul(u, grad_u, adjoint_a=True)
-    v_gv = math_ops.matmul(v, grad_v, adjoint_a=True)
+    v_gv = math_ops.matmul(v1, grad_v1, adjoint_a=True)
 
-    if m == n:
-      f_u = f * u_gu
-      f_v = f * v_gv
-    else:
-      dv2 = array_ops.matrix_transpose(v_gv[..., m:n, :m]) - v_gv[..., :m, m:n]
-      f_u = f * u_gu
-      f_v = f * v_gv[..., :m, :m]
+    f_u = f * u_gu
+    f_v = f * v_gv
 
-    grad_a_nouv = (
+    term1_nouv = (
         grad_s_mat + math_ops.matmul(f_u + _linalg.adjoint(f_u), s_mat) +
         math_ops.matmul(s_mat, f_v + _linalg.adjoint(f_v)))
 
-    if m != n:
-      grad_a_nouv = array_ops.concat(
-          [grad_a_nouv, math_ops.matmul(s_inv_mat, dv2)], -1)
+    term1 = math_ops.matmul(u, math_ops.matmul(term1_nouv, v1, adjoint_b=True))
+
+    if m == n:
+      grad_a_before_transpose = term1
+    else:
+      gv1t = array_ops.matrix_transpose(grad_v1)
+      gv1t_v1 = math_ops.matmul(gv1t, v1)
+      term2_nous = gv1t - math_ops.matmul(gv1t_v1, v1, adjoint_b=True)
+
+      if op.get_attr("full_matrices"):
+        v2 = v[..., :, m:n]
+        grad_v2 = grad_v[..., :, m:n]
+
+        v1t_gv2 = math_ops.matmul(v1, grad_v2, adjoint_a=True)
+        term2_nous -= math_ops.matmul(v1t_gv2, v2, adjoint_b=True)
+
+      u_s_inv = math_ops.matmul(u, s_inv_mat)
+      term2 = math_ops.matmul(u_s_inv, term2_nous)
+
+      grad_a_before_transpose = term1 + term2
 
     if use_adjoint:
-      # Use (U X V^H)^H = V (U X)^H.
-      grad_a = math_ops.matmul(
-          v, math_ops.matmul(u, grad_a_nouv), adjoint_b=True)
+      grad_a = array_ops.matrix_transpose(grad_a_before_transpose)
     else:
-      grad_a = math_ops.matmul(u,
-                               math_ops.matmul(grad_a_nouv, v, adjoint_b=True))
+      grad_a = grad_a_before_transpose
 
     grad_a.set_shape(a_shape)
     return grad_a
diff --git a/tensorflow/python/ops/list_ops.py b/tensorflow/python/ops/list_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b31c0063983d19ce281183ec57a230c5909e5b1
--- /dev/null
+++ b/tensorflow/python/ops/list_ops.py
@@ -0,0 +1,63 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Ops to manipulate lists of tensors."""
+
+# pylint: disable=g-bad-name
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import gen_list_ops
+# go/tf-wildcard-import
+# pylint: disable=wildcard-import
+from tensorflow.python.ops.gen_list_ops import *
+# pylint: enable=wildcard-import
+
+
+@ops.RegisterGradient("TensorListPushBack")
+def _PushBackGradient(op, dresult):
+  return gen_list_ops.tensor_list_pop_back(
+      dresult, element_dtype=op.get_attr("element_dtype"))
+
+
+@ops.RegisterGradient("TensorListPopBack")
+def _PopBackGradient(unused_op, dlist, delement):
+  if dlist is None:
+    dlist = gen_list_ops.empty_tensor_list(
+        element_dtype=delement.dtype,
+        element_shape=-1)
+  return gen_list_ops.tensor_list_push_back(dlist, delement)
+
+
+@ops.RegisterGradient("TensorListStack")
+def _TensorListStack(unused_op, dtensor):
+  return gen_list_ops.tensor_list_from_tensor(dtensor,
+                                              element_shape=dtensor.shape[1:])
+
+
+@ops.RegisterGradient("TensorListFromTensor")
+def _TensorListFromTensor(op, dlist):
+  if op.inputs[0].shape[0] is not None:
+    num_elements = op.inputs[0].shape[0]
+  else:
+    num_elements = None
+  if dlist is None:
+    dlist = gen_list_ops.empty_tensor_list(
+        element_dtype=op.inputs[0].dtype,
+        element_shape=-1)
+  return gen_list_ops.tensor_list_stack(
+      dlist, element_dtype=op.inputs[0].dtype,
+      num_elements=num_elements)
diff --git a/tensorflow/python/ops/logging_ops.py b/tensorflow/python/ops/logging_ops.py
index 08e3f83a0b21a8444ad3500c62fe624440edc255..51ab2aec2298a9072c90c226992f122a804ec02e 100644
--- a/tensorflow/python/ops/logging_ops.py
+++ b/tensorflow/python/ops/logging_ops.py
@@ -39,8 +39,8 @@ def Print(input_, data, message=None, first_n=None, summarize=None,
           name=None):
   """Prints a list of tensors.
 
-  This is an identity op with the side effect of printing `data` when
-  evaluating.
+  This is an identity op (behaves like `tf.identity`) with the side effect
+  of printing `data` when evaluating.
 
   Note: This op prints to the standard error. It is not currently compatible
     with jupyter notebook (printing to the notebook *server's* output, not into
@@ -57,7 +57,7 @@ def Print(input_, data, message=None, first_n=None, summarize=None,
     name: A name for the operation (optional).
 
   Returns:
-    Same tensor as `input_`.
+    A `Tensor`. Has the same type and contents as `input_`.
   """
   return gen_logging_ops._print(input_, data, message, first_n, summarize, name)
 
diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py
index 8bc0bc7d06bb79056a0e691c1a39c97fa51f750f..333e36873af31a7a89d59d02af87d86227446bd0 100644
--- a/tensorflow/python/ops/lookup_ops.py
+++ b/tensorflow/python/ops/lookup_ops.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import collections
 import functools
+import six
 
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
@@ -83,10 +84,10 @@ def _check_table_dtypes(table, key_dtype, value_dtype):
     TypeError: when 'key_dtype' or 'value_dtype' doesn't match the table data
       types.
   """
-  if key_dtype != table.key_dtype:
+  if key_dtype.base_dtype != table.key_dtype:
     raise TypeError("Invalid key dtype, expected %s but got %s." %
                     (table.key_dtype, key_dtype))
-  if value_dtype != table.value_dtype:
+  if value_dtype.base_dtype != table.value_dtype:
     raise TypeError("Invalid value dtype, expected %s but got %s." %
                     (table.value_dtype, value_dtype))
 
@@ -216,7 +217,7 @@ class InitializableLookupTableBase(LookupInterface):
     if isinstance(keys, sparse_tensor.SparseTensor):
       key_tensor = keys.values
 
-    if keys.dtype != self._key_dtype:
+    if keys.dtype.base_dtype != self._key_dtype:
       raise TypeError("Signature mismatch. Keys must be dtype %s, got %s." %
                       (self._key_dtype, keys.dtype))
 
@@ -527,7 +528,7 @@ class TextFileInitializer(TableInitializerBase):
     ops.add_to_collection(ops.GraphKeys.TABLE_INITIALIZERS, init_op)
     # If the filename tensor is anything other than a string constant (e.g., if
     # it is a placeholder) then it does not make sense to track it as an asset.
-    if constant_op.is_constant(filename):
+    if context.in_graph_mode() and constant_op.is_constant(filename):
       ops.add_to_collection(ops.GraphKeys.ASSET_FILEPATHS, filename)
     return init_op
 
@@ -688,19 +689,22 @@ class IdTableWithHashBuckets(LookupInterface):
 
   For example, if an instance of `IdTableWithHashBuckets` is initialized with a
   string-to-id table that maps:
-  - emerson -> 0
-  - lake -> 1
-  - palmer -> 2
+
+  * `emerson -> 0`
+  * `lake -> 1`
+  * `palmer -> 2`
 
   The `IdTableWithHashBuckets` object will performs the following mapping:
-  - emerson -> 0
-  - lake -> 1
-  - palmer -> 2
-  - <other term> -> bucket id between 3 and 3 + num_oov_buckets - 1, calculated
-    by: hash(<term>) % num_oov_buckets + vocab_size
 
-  If input_tensor is ["emerson", "lake", "palmer", "king", "crimson"],
-  the lookup result is [0, 1, 2, 4, 7]
+  * `emerson -> 0`
+  * `lake -> 1`
+  * `palmer -> 2`
+  * `<other term> -> bucket_id`, where bucket_id will be between `3` and
+  `3 + num_oov_buckets - 1`, calculated by:
+  `hash(<term>) % num_oov_buckets + vocab_size`
+
+  If input_tensor is `["emerson", "lake", "palmer", "king", "crimson"]`,
+  the lookup result is `[0, 1, 2, 4, 7]`.
 
   If `table` is None, only out-of-vocabulary buckets are used.
 
@@ -845,7 +849,7 @@ class IdTableWithHashBuckets(LookupInterface):
     Raises:
       TypeError: when `keys` doesn't match the table key data type.
     """
-    if keys.dtype != self._key_dtype:
+    if keys.dtype.base_dtype != self._key_dtype:
       raise TypeError("Signature mismatch. Keys must be dtype %s, got %s." %
                       (self._key_dtype, keys.dtype))
     values = keys
@@ -959,7 +963,7 @@ def index_table_from_file(vocabulary_file=None,
       than zero.
   """
   if vocabulary_file is None or (
-      isinstance(vocabulary_file, str) and not vocabulary_file):
+      isinstance(vocabulary_file, six.string_types) and not vocabulary_file):
     raise ValueError("vocabulary_file must be specified and must not be empty.")
   if num_oov_buckets < 0:
     raise ValueError("num_oov_buckets must be greater or equal than 0, got %d."
@@ -1163,7 +1167,7 @@ def index_to_string_table_from_file(vocabulary_file,
   ```
 
   Args:
-    vocabulary_file: The vocabulary filename.
+    vocabulary_file: The vocabulary filename, may be a constant scalar `Tensor`.
     vocab_size: Number of the elements in the vocabulary, if known.
     default_value: The value to use for out-of-vocabulary indices.
     name: A name for this op (optional).
@@ -1181,8 +1185,10 @@ def index_to_string_table_from_file(vocabulary_file,
     ValueError: when `vocabulary_file` is empty.
     ValueError: when `vocab_size` is invalid.
   """
-  if not vocabulary_file:
-    raise ValueError("vocabulary_file must be specified.")
+  if vocabulary_file is None or (
+      isinstance(vocabulary_file, six.string_types) and not vocabulary_file):
+    raise ValueError("vocabulary_file must be specified and must not be empty.")
+
   if vocab_size is not None and vocab_size < 1:
     raise ValueError("vocab_size must be greater than 0, got %d." % vocab_size)
 
diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py
index b74971f654294e25e131a6ba21d982da16cf4264..72508eb4350f57bb06b3829890f92554677c98d5 100644
--- a/tensorflow/python/ops/losses/losses_impl.py
+++ b/tensorflow/python/ops/losses/losses_impl.py
@@ -31,19 +31,28 @@ from tensorflow.python.util.deprecation import deprecated_args
 
 
 class Reduction(object):
-  """Types of loss reduction."""
+  """Types of loss reduction.
+
+  Contains the following values:
+  `NONE`: Un-reduced weighted losses with the same shape as input.
+  `SUM`: Scalar sum of weighted losses.
+  `MEAN`: Scalar `SUM` divided by sum of weights.
+  `SUM_OVER_BATCH_SIZE`: Scalar `SUM` divided by number of elements in losses.
+  `SUM_OVER_NONZERO_WEIGHTS`: Scalar `SUM` divided by number of non-zero
+     weights.
+  `SUM_BY_NONZERO_WEIGHTS`: Same as `SUM_OVER_NONZERO_WEIGHTS`.
+  """
 
-  # Un-reduced weighted losses with the same shape as input.
   NONE = "none"
 
-  # Scalar sum of `NONE`.
   SUM = "weighted_sum"
 
-  # Scalar `SUM` divided by sum of weights.
   MEAN = "weighted_mean"
 
-  # Scalar `SUM` divided by number of non-zero weights.
+  SUM_OVER_BATCH_SIZE = "weighted_sum_over_batch_size"
+
   SUM_BY_NONZERO_WEIGHTS = "weighted_sum_by_nonzero_weights"
+  SUM_OVER_NONZERO_WEIGHTS = SUM_BY_NONZERO_WEIGHTS
 
   @classmethod
   def all(cls):
@@ -51,6 +60,8 @@ class Reduction(object):
         cls.NONE,
         cls.SUM,
         cls.MEAN,
+        cls.SUM_OVER_BATCH_SIZE,
+        cls.SUM_OVER_NONZERO_WEIGHTS,
         cls.SUM_BY_NONZERO_WEIGHTS)
 
   @classmethod
@@ -135,6 +146,12 @@ def _num_present(losses, weights, per_batch=False):
     return math_ops.reduce_sum(present, name=scope)
 
 
+def _num_elements(losses):
+  """Computes the number of elements in `losses` tensor."""
+  with ops.name_scope(None, "num_elements", values=[losses]) as scope:
+    return array_ops.size(losses, name=scope, out_type=losses.dtype)
+
+
 def compute_weighted_loss(
     losses, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES,
     reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
@@ -157,6 +174,13 @@ def compute_weighted_loss(
     ValueError: If `weights` is `None` or the shape is not compatible with
       `losses`, or if the number of dimensions (rank) of either `losses` or
       `weights` is missing.
+
+  Note:
+    When calculating the gradient of a weighted loss contributions from
+    both `losses` and `weights` are considered. If your `weights` depend
+    on some model parameters but you do not want this to affect the loss
+    gradient, you need to apply @{tf.stop_gradient} to `weights` before
+    passing them to `compute_weighted_loss`.
   """
   Reduction.validate(reduction)
   with ops.name_scope(scope, "weighted_loss", (losses, weights)):
@@ -175,8 +199,11 @@ def compute_weighted_loss(
           loss = _safe_mean(
               loss,
               math_ops.reduce_sum(array_ops.ones_like(losses) * weights))
-        elif reduction == Reduction.SUM_BY_NONZERO_WEIGHTS:
+        elif (reduction == Reduction.SUM_BY_NONZERO_WEIGHTS or
+              reduction == Reduction.SUM_OVER_NONZERO_WEIGHTS):
           loss = _safe_mean(loss, _num_present(losses, weights))
+        elif reduction == Reduction.SUM_OVER_BATCH_SIZE:
+          loss = _safe_mean(loss, _num_elements(losses))
 
       # Convert the result back to the input type.
       loss = math_ops.cast(loss, input_dtype)
@@ -485,7 +512,7 @@ def mean_pairwise_squared_error(
 
   Raises:
     ValueError: If the shape of `predictions` doesn't match that of `labels` or
-      if the shape of `weights` is invalid.  Also if `labels` or `predictions
+      if the shape of `weights` is invalid.  Also if `labels` or `predictions`
       is None.
   """
   if labels is None:
diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py
index 38fe093ba7236ff7fe7b580a893501c84c71f6b1..bca4c665d27f2513ed0029ae0c674f46a060567f 100644
--- a/tensorflow/python/ops/math_grad.py
+++ b/tensorflow/python/ops/math_grad.py
@@ -184,6 +184,15 @@ def _SparseSegmentSumGrad(op, grad):
           None)
 
 
+@ops.RegisterGradient("SparseSegmentSumWithNumSegments")
+def _SparseSegmentSumWithNumSegmentsGrad(op, grad):
+  """Gradient for SparseSegmentSumWithNumSegments."""
+  input_rows = array_ops.shape(op.inputs[0])[0]
+  return (math_ops.unsorted_segment_sum(
+      array_ops.gather(grad, op.inputs[2]), op.inputs[1], input_rows), None,
+          None, None)
+
+
 @ops.RegisterGradient("SparseSegmentMean")
 def _SparseSegmentMeanGrad(op, grad):
   """Gradient for SparseSegmentMean."""
@@ -192,6 +201,14 @@ def _SparseSegmentMeanGrad(op, grad):
                                             dim0), None, None)
 
 
+@ops.RegisterGradient("SparseSegmentMeanWithNumSegments")
+def _SparseSegmentMeanWithNumSegmentsGrad(op, grad):
+  """Gradient for SparseSegmentMeanWithNumSegments."""
+  dim0 = array_ops.shape(op.inputs[0])[0]
+  return (math_ops.sparse_segment_mean_grad(grad, op.inputs[1], op.inputs[2],
+                                            dim0), None, None, None)
+
+
 @ops.RegisterGradient("SparseSegmentSqrtN")
 def _SparseSegmentSqrtNGrad(op, grad):
   """Gradient for SparseSegmentSqrtN."""
@@ -200,6 +217,14 @@ def _SparseSegmentSqrtNGrad(op, grad):
                                               dim0), None, None)
 
 
+@ops.RegisterGradient("SparseSegmentSqrtNWithNumSegments")
+def _SparseSegmentSqrtNWithNumSegmentsGrad(op, grad):
+  """Gradient for SparseSegmentSqrtNWithNumSegments."""
+  dim0 = array_ops.shape(op.inputs[0])[0]
+  return (math_ops.sparse_segment_sqrt_n_grad(grad, op.inputs[1], op.inputs[2],
+                                              dim0), None, None, None)
+
+
 def _SegmentMinOrMaxGrad(op, grad, is_sorted):
   """Gradient for SegmentMin and (unsorted) SegmentMax. They share similar code."""
   zeros = array_ops.zeros(array_ops.shape(op.inputs[0]),
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index e2e23dccefabd500e184d7af95222052b609fa96..cfdfa09757654aeb10426e1361176baca38d7b6a 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -90,6 +90,7 @@ See the @{$python/math_ops} guide.
 @@cholesky
 @@cholesky_solve
 @@matrix_exponential
+@@matrix_logarithm
 @@matrix_solve
 @@matrix_triangular_solve
 @@matrix_solve_ls
@@ -252,7 +253,7 @@ def abs(x, name=None):
   Returns:
     A `Tensor` or `SparseTensor` the same size and type as `x` with absolute
       values.
-    Note, for `complex64` or `complex128' input, the returned `Tensor` will be
+    Note, for `complex64` or `complex128` input, the returned `Tensor` will be
       of type `float32` or `float64`, respectively.
   """
   with ops.name_scope(name, "Abs", [x]) as name:
@@ -950,6 +951,7 @@ _TRUEDIV_TABLE = {
     dtypes.int16: dtypes.float32,
     dtypes.int32: dtypes.float64,
     dtypes.int64: dtypes.float64,
+    dtypes.bfloat16: None,
     dtypes.float16: None,
     dtypes.float32: None,
     dtypes.float64: None,
@@ -1436,7 +1438,7 @@ def reduce_mean(input_tensor,
     input_tensor: The tensor to reduce. Should have numeric type.
     axis: The dimensions to reduce. If `None` (the default),
       reduces all dimensions. Must be in the range
-      `[-rank(input_tensor), rank(input_tensor))`.
+      `[-rank(input_tensor), rank(input_tensor)]`.
     keepdims: If true, retains reduced dimensions with length 1.
     name: A name for the operation (optional).
     reduction_indices: The old (deprecated) name for axis.
@@ -2002,7 +2004,7 @@ def matmul(a,
       # matmul currently doesn't handle bfloat16 inputs.
       use_sparse_matmul = True
     if use_sparse_matmul:
-      return sparse_matmul(
+      ret = sparse_matmul(
           a,
           b,
           transpose_a=transpose_a,
@@ -2010,6 +2012,12 @@ def matmul(a,
           a_is_sparse=a_is_sparse,
           b_is_sparse=b_is_sparse,
           name=name)
+      # sparse_matmul always returns float32, even with
+      # bfloat16 inputs. This prevents us from configuring bfloat16 training.
+      # casting to bfloat16 also matches non-sparse matmul behavior better.
+      if a.dtype == dtypes.bfloat16 and b.dtype == dtypes.bfloat16:
+        ret = cast(ret, dtypes.bfloat16)
+      return ret
     else:
       return gen_math_ops._mat_mul(
           a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
@@ -2494,6 +2502,159 @@ def reduced_shape(input_shape, axes):
       ])  # [1, 1]
 
 
+def sparse_segment_sum(data, indices, segment_ids, name=None,
+                       num_segments=None):
+  r"""Computes the sum along sparse segments of a tensor.
+
+  Read @{$math_ops#segmentation$the section on segmentation} for an explanation
+  of segments.
+
+  Like `SegmentSum`, but `segment_ids` can have rank less than `data`'s first
+  dimension, selecting a subset of dimension 0, specified by `indices`.
+  `segment_ids` is allowed to have missing ids, in which case the output will
+  be zeros at those indices. In those cases `num_segments` is used to determine
+  the size of the output.
+
+  For example:
+
+  ```python
+  c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
+
+  # Select two rows, one segment.
+  tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 0]))
+  # => [[0 0 0 0]]
+
+  # Select two rows, two segment.
+  tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 1]))
+  # => [[ 1  2  3  4]
+  #     [-1 -2 -3 -4]]
+
+  # With missing segment ids.
+  tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 2]),
+                        num_segments=4)
+  # => [[ 1  2  3  4]
+  #     [ 0  0  0  0]
+  #     [-1 -2 -3 -4]
+  #     [ 0  0  0  0]]
+
+  # Select all rows, two segments.
+  tf.sparse_segment_sum(c, tf.constant([0, 1, 2]), tf.constant([0, 0, 1]))
+  # => [[0 0 0 0]
+  #     [5 6 7 8]]
+
+  # Which is equivalent to:
+  tf.segment_sum(c, tf.constant([0, 0, 1]))
+  ```
+
+  Args:
+    data: A `Tensor` with data that will be assembled in the output.
+    indices: A 1-D `Tensor` with indices into `data`. Has same rank as
+      `segment_ids`.
+    segment_ids: A 1-D `Tensor` with indices into the output `Tensor`.
+      Values should be sorted and can be repeated.
+    name: A name for the operation (optional).
+    num_segments: An optional int32 scalar. Indicates the size of the output
+      `Tensor`.
+
+  Returns:
+    A `tensor` of the shape as data, except for dimension 0 which
+    has size `k`, the number of segments specified via `num_segments` or
+    inferred for the last element in `segments_ids`.
+  """
+  if num_segments is not None:
+    return gen_math_ops.sparse_segment_sum_with_num_segments(
+        data=data,
+        indices=indices,
+        segment_ids=segment_ids,
+        num_segments=num_segments,
+        name=name)
+  else:
+    return gen_math_ops.sparse_segment_sum(
+        data=data,
+        indices=indices,
+        segment_ids=segment_ids,
+        name=name)
+
+
+def sparse_segment_mean(data, indices, segment_ids, name=None,
+                        num_segments=None):
+  r"""Computes the mean along sparse segments of a tensor.
+
+  Read @{$math_ops#segmentation$the section on segmentation} for an explanation
+  of segments.
+
+  Like `SegmentMean`, but `segment_ids` can have rank less than `data`'s first
+  dimension, selecting a subset of dimension 0, specified by `indices`.
+  `segment_ids` is allowed to have missing ids, in which case the output will
+  be zeros at those indices. In those cases `num_segments` is used to determine
+  the size of the output.
+
+  Args:
+    data: A `Tensor` with data that will be assembled in the output.
+    indices: A 1-D `Tensor` with indices into `data`. Has same rank as
+      `segment_ids`.
+    segment_ids: A 1-D `Tensor` with indices into the output `Tensor`.
+      Values should be sorted and can be repeated.
+    name: A name for the operation (optional).
+    num_segments: An optional int32 scalar. Indicates the size of the output
+      `Tensor`.
+
+  Returns:
+    A `tensor` of the shape as data, except for dimension 0 which
+    has size `k`, the number of segments specified via `num_segments` or
+    inferred for the last element in `segments_ids`.
+  """
+  if num_segments is not None:
+    return gen_math_ops.sparse_segment_mean_with_num_segments(
+        data=data,
+        indices=indices,
+        segment_ids=segment_ids,
+        num_segments=num_segments,
+        name=name)
+  else:
+    return gen_math_ops.sparse_segment_mean(
+        data=data,
+        indices=indices,
+        segment_ids=segment_ids,
+        name=name)
+
+
+def sparse_segment_sqrt_n(data, indices, segment_ids, name=None,
+                          num_segments=None):
+  r"""Computes the sum along sparse segments of a tensor divided by the sqrt(N).
+
+  `N` is the size of the segment being reduced.
+
+  Args:
+    data: A `Tensor` with data that will be assembled in the output.
+    indices: A 1-D `Tensor` with indices into `data`. Has same rank as
+      `segment_ids`.
+    segment_ids: A 1-D `Tensor` with indices into the output `Tensor`.
+      Values should be sorted and can be repeated.
+    name: A name for the operation (optional).
+    num_segments: An optional int32 scalar. Indicates the size of the output
+      `Tensor`.
+
+  Returns:
+    A `tensor` of the shape as data, except for dimension 0 which
+    has size `k`, the number of segments specified via `num_segments` or
+    inferred for the last element in `segments_ids`.
+  """
+  if num_segments is not None:
+    return gen_math_ops.sparse_segment_sqrt_n_with_num_segments(
+        data=data,
+        indices=indices,
+        segment_ids=segment_ids,
+        num_segments=num_segments,
+        name=name)
+  else:
+    return gen_math_ops.sparse_segment_sqrt_n(
+        data=data,
+        indices=indices,
+        segment_ids=segment_ids,
+        name=name)
+
+
 def tensordot(a, b, axes, name=None):
   r"""Tensor contraction of a and b along specified axes.
 
diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py
index e04121ee31d1b6c82151bf7415b3e73614b24781..25e1613a651cf3bc144e121b61f1edd64a16596e 100644
--- a/tensorflow/python/ops/metrics_impl.py
+++ b/tensorflow/python/ops/metrics_impl.py
@@ -175,7 +175,7 @@ def _maybe_expand_labels(labels, predictions):
 
 
 def _safe_div(numerator, denominator, name):
-  """Divides two values, returning 0 if the denominator is <= 0.
+  """Divides two tensors element-wise, returning 0 if the denominator is <= 0.
 
   Args:
     numerator: A real `Tensor`.
@@ -185,11 +185,11 @@ def _safe_div(numerator, denominator, name):
   Returns:
     0 if `denominator` <= 0, else `numerator` / `denominator`
   """
-  return array_ops.where(
-      math_ops.greater(denominator, 0),
-      math_ops.truediv(numerator, denominator),
-      0,
-      name=name)
+  t = math_ops.truediv(numerator, denominator)
+  zero = array_ops.zeros_like(t, dtype=denominator.dtype)
+  condition = math_ops.greater(denominator, zero)
+  zero = math_ops.cast(zero, t.dtype)
+  return array_ops.where(condition, t, zero, name=name)
 
 
 def _safe_scalar_div(numerator, denominator, name):
diff --git a/tensorflow/python/ops/nn_batchnorm_test.py b/tensorflow/python/ops/nn_batchnorm_test.py
index 8aed2e293fa2dd6559d342f109a996d810db13bf..fc013b565b764f0d22df29f99e78cb97498c5ced 100644
--- a/tensorflow/python/ops/nn_batchnorm_test.py
+++ b/tensorflow/python/ops/nn_batchnorm_test.py
@@ -21,9 +21,12 @@ from __future__ import print_function
 import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
+from tensorflow.core.framework import graph_pb2
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import importer
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_nn_ops
 from tensorflow.python.ops import gradient_checker
@@ -34,8 +37,18 @@ import tensorflow.python.ops.nn_grad  # pylint: disable=unused-import
 from tensorflow.python.platform import test
 
 
+@test_util.with_c_api
 class BatchNormalizationTest(test.TestCase):
 
+  def SetProducerVersion(self, graph, producer_version):
+    # The C API doesn't expose altering GraphDefVersions. We can indirectly set
+    # it via import_graph_def though.
+    graph_def = graph_pb2.GraphDef()
+    graph_def.versions.producer = producer_version
+    with graph.as_default():
+      importer.import_graph_def(graph_def)
+    assert graph.graph_def_versions.producer, producer_version
+
   def _npBatchNorm(self, x, m, v, beta, gamma, epsilon,
                    scale_after_normalization, shift_after_normalization):
     y = (x - m) / np.sqrt(v + epsilon)
@@ -52,9 +65,7 @@ class BatchNormalizationTest(test.TestCase):
   def _tfBatchNormV1(self, x, m, v, beta, gamma, epsilon,
                      scale_after_normalization):
     """Original implementation."""
-    # _batch_norm_with_global_normalization is deprecated in v9
-    ops.get_default_graph().graph_def_versions.producer = 8
-    # pylint: disable=protected-access
+    self.SetProducerVersion(ops.get_default_graph(), 8)
     return gen_nn_ops._batch_norm_with_global_normalization(
         x, m, v, beta, gamma, epsilon, scale_after_normalization)
     # pylint: enable=protected-access
@@ -222,7 +233,7 @@ class BatchNormalizationTest(test.TestCase):
         epsilon = 0.001
         for scale_after_normalization in [True, False]:
           # _batch_norm_with_global_normalization_grad is deprecated in v9
-          ops.get_default_graph().graph_def_versions.producer = 8
+          self.SetProducerVersion(ops.get_default_graph(), 8)
           grad = gen_nn_ops._batch_norm_with_global_normalization_grad(
               x, m, v, gamma, backprop, epsilon, scale_after_normalization)
           dx, dm, dv, db, dg = grad
@@ -334,6 +345,7 @@ class BatchNormalizationTest(test.TestCase):
         (2, 3, 2, 4, 5), (1, 1, 1, 4, 5), atol=0.005)
 
 
+@test_util.with_c_api
 class SufficientStatisticsTest(test.TestCase):
 
   def _npSuffStats(self, x, axes, shift, keep_dims):
@@ -393,6 +405,7 @@ class SufficientStatisticsTest(test.TestCase):
           self._testSuffStats([1, 2, 3], [0, 2], shift, keep_dims, has_shape)
 
 
+@test_util.with_c_api
 class NormalizeMomentsTest(test.TestCase):
 
   def _npNormalizeMoments(self, counts, mean_ss, variance_ss, shift):
@@ -436,6 +449,7 @@ class NormalizeMomentsTest(test.TestCase):
       self._testNormalizeMoments([2, 3], shift)
 
 
+@test_util.with_c_api
 class MomentsTest(test.TestCase):
 
   def _unweighted_moments(self, x, axes, keep_dims=False, extra_out_grads=None):
@@ -573,6 +587,7 @@ class MomentsTest(test.TestCase):
     self._testGlobalGradient(from_y="var")
 
 
+@test_util.with_c_api
 class WeightedMomentsTest(MomentsTest):
   """Tests for nn.weighted_moments.
 
diff --git a/tensorflow/python/ops/nn_fused_batchnorm_test.py b/tensorflow/python/ops/nn_fused_batchnorm_test.py
index e72d34d1f728344709cd7429ab560379a2836cab..0593ed2cfa64eca59ca02904ca71b4fd4936af1b 100644
--- a/tensorflow/python/ops/nn_fused_batchnorm_test.py
+++ b/tensorflow/python/ops/nn_fused_batchnorm_test.py
@@ -171,6 +171,10 @@ class BatchNormalizationTest(test.TestCase):
         x, x_shape, y, y_shape, delta=1e-3, x_init_value=x_init_val)
     _, numerical_grad = gradient_checker.compute_gradient(
         x32, x_shape, y32, y_shape, delta=1e-3, x_init_value=x32_init_val)
+
+    # If grad is empty, no error.
+    if theoretical_grad.size == 0 and numerical_grad.size == 0:
+      return 0
     return np.fabs(theoretical_grad - numerical_grad).max()
 
   def _test_gradient(self,
@@ -333,7 +337,7 @@ class BatchNormalizationTest(test.TestCase):
     self.assertLess(err_grad_x_2, err_tolerance)
     self.assertLess(err_grad_scale, err_tolerance)
 
-  def testInference(self):
+  def testInferenceShape1(self):
     x_shape = [1, 1, 6, 1]
     for dtype in [np.float16, np.float32]:
       if test.is_gpu_available(cuda_only=True):
@@ -344,6 +348,7 @@ class BatchNormalizationTest(test.TestCase):
       self._test_inference(
           x_shape, dtype, [1], np.float32, use_gpu=False, data_format='NHWC')
 
+  def testInferenceShape2(self):
     x_shape = [1, 1, 6, 2]
     if test.is_gpu_available(cuda_only=True):
       for dtype in [np.float16, np.float32]:
@@ -352,12 +357,14 @@ class BatchNormalizationTest(test.TestCase):
         self._test_inference(
             x_shape, dtype, [2], np.float32, use_gpu=False, data_format='NHWC')
 
+  def testInferenceShape3(self):
     x_shape = [1, 2, 1, 6]
     if test.is_gpu_available(cuda_only=True):
       for dtype in [np.float16, np.float32]:
         self._test_inference(
             x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NCHW')
 
+  def testInferenceShape4(self):
     x_shape = [27, 131, 127, 6]
     for dtype in [np.float16, np.float32]:
       if test.is_gpu_available(cuda_only=True):
@@ -368,7 +375,18 @@ class BatchNormalizationTest(test.TestCase):
       self._test_inference(
           x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC')
 
-  def testTraining(self):
+  def testInferenceShape5(self):
+    x_shape = [0, 131, 127, 6]
+    for dtype in [np.float16, np.float32]:
+      if test.is_gpu_available(cuda_only=True):
+        self._test_inference(
+            x_shape, dtype, [131], np.float32, use_gpu=True, data_format='NCHW')
+        self._test_inference(
+            x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC')
+      self._test_inference(
+          x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC')
+
+  def testTrainingShape1(self):
     x_shape = [1, 1, 6, 1]
     for dtype in [np.float16, np.float32]:
       if test.is_gpu_available(cuda_only=True):
@@ -379,6 +397,7 @@ class BatchNormalizationTest(test.TestCase):
       self._test_training(
           x_shape, dtype, [1], np.float32, use_gpu=False, data_format='NHWC')
 
+  def testTrainingShape2(self):
     x_shape = [1, 1, 6, 2]
     for dtype in [np.float16, np.float32]:
       if test.is_gpu_available(cuda_only=True):
@@ -387,12 +406,14 @@ class BatchNormalizationTest(test.TestCase):
       self._test_training(
           x_shape, dtype, [2], np.float32, use_gpu=False, data_format='NHWC')
 
+  def testTrainingShape3(self):
     x_shape = [1, 2, 1, 6]
     if test.is_gpu_available(cuda_only=True):
       for dtype in [np.float16, np.float32]:
         self._test_training(
             x_shape, dtype, [2], np.float32, use_gpu=True, data_format='NCHW')
 
+  def testTrainingShape4(self):
     x_shape = [27, 131, 127, 6]
     for dtype in [np.float16, np.float32]:
       if test.is_gpu_available(cuda_only=True):
@@ -403,7 +424,18 @@ class BatchNormalizationTest(test.TestCase):
       self._test_training(
           x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC')
 
-  def testBatchNormGrad(self):
+  def testTrainingShape5(self):
+    x_shape = [0, 131, 127, 6]
+    for dtype in [np.float16, np.float32]:
+      if test.is_gpu_available(cuda_only=True):
+        self._test_training(
+            x_shape, dtype, [131], np.float32, use_gpu=True, data_format='NCHW')
+        self._test_training(
+            x_shape, dtype, [6], np.float32, use_gpu=True, data_format='NHWC')
+      self._test_training(
+          x_shape, dtype, [6], np.float32, use_gpu=False, data_format='NHWC')
+
+  def testBatchNormGradShape1(self):
     for is_training in [True, False]:
       x_shape = [1, 1, 6, 1]
       for dtype in [np.float16, np.float32]:
@@ -430,6 +462,8 @@ class BatchNormalizationTest(test.TestCase):
             data_format='NHWC',
             is_training=is_training)
 
+  def testBatchNormGradShape2(self):
+    for is_training in [True, False]:
       x_shape = [1, 1, 6, 2]
       for dtype in [np.float16, np.float32]:
         if test.is_gpu_available(cuda_only=True):
@@ -448,6 +482,8 @@ class BatchNormalizationTest(test.TestCase):
             data_format='NHWC',
             is_training=is_training)
 
+  def testBatchNormGradShape3(self):
+    for is_training in [True, False]:
       x_shape = [1, 2, 1, 6]
       if test.is_gpu_available(cuda_only=True):
         for dtype in [np.float16, np.float32]:
@@ -459,6 +495,8 @@ class BatchNormalizationTest(test.TestCase):
               data_format='NCHW',
               is_training=is_training)
 
+  def testBatchNormGradShape4(self):
+    for is_training in [True, False]:
       x_shape = [5, 7, 11, 4]
       for dtype in [np.float16, np.float32]:
         if test.is_gpu_available(cuda_only=True):
@@ -484,6 +522,33 @@ class BatchNormalizationTest(test.TestCase):
             data_format='NHWC',
             is_training=is_training)
 
+  def testBatchNormGradShape5(self):
+    for is_training in [True, False]:
+      x_shape = [0, 7, 11, 4]
+      for dtype in [np.float16, np.float32]:
+        if test.is_gpu_available(cuda_only=True):
+          self._test_gradient(
+              x_shape,
+              dtype, [7],
+              np.float32,
+              use_gpu=True,
+              data_format='NCHW',
+              is_training=is_training)
+          self._test_gradient(
+              x_shape,
+              dtype, [4],
+              np.float32,
+              use_gpu=True,
+              data_format='NHWC',
+              is_training=is_training)
+        self._test_gradient(
+            x_shape,
+            dtype, [4],
+            np.float32,
+            use_gpu=False,
+            data_format='NHWC',
+            is_training=is_training)
+
   def _testBatchNormGradGrad(self, config):
     shape = config['shape']
     err_tolerance = config['err_tolerance']
@@ -515,26 +580,37 @@ class BatchNormalizationTest(test.TestCase):
           is_training=is_training,
           err_tolerance=err_tolerance)
 
-  def testBatchNormGradGrad(self):
-    configs = [{
+  def testBatchNormGradGradConfig1(self):
+    config = {
         'shape': [2, 3, 4, 5],
         'err_tolerance': 1e-2,
         'dtype': np.float32,
-    }, {
+    }
+    self._testBatchNormGradGrad(config)
+
+  def testBatchNormGradGradConfig2(self):
+    config = {
         'shape': [2, 3, 2, 2],
         'err_tolerance': 1e-3,
         'dtype': np.float32,
-    }, {
+    }
+    self._testBatchNormGradGrad(config)
+
+  def testBatchNormGradGradConfig3(self):
+    config = {
         'shape': [2, 3, 4, 5],
         'err_tolerance': 1e-2,
         'dtype': np.float16,
-    }, {
+    }
+    self._testBatchNormGradGrad(config)
+
+  def testBatchNormGradGradConfig4(self):
+    config = {
         'shape': [2, 3, 2, 2],
         'err_tolerance': 2e-3,
         'dtype': np.float16,
-    }]
-    for config in configs:
-      self._testBatchNormGradGrad(config)
+    }
+    self._testBatchNormGradGrad(config)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py
index 4b406ba8404d60fbed43afa30f44b1e1a9b26d84..cfff73774b5e585ed702369b9a74ff34e0a5febb 100644
--- a/tensorflow/python/ops/nn_grad.py
+++ b/tensorflow/python/ops/nn_grad.py
@@ -41,33 +41,48 @@ def _Conv2DBackpropInputGrad(op, grad):
   Returns:
     the gradients w.r.t. the input and the filter
   """
-  return [None,
-          nn_ops.conv2d_backprop_filter(grad, array_ops.shape(op.inputs[1]),
-                                        op.inputs[2], op.get_attr("strides"),
-                                        op.get_attr("padding"),
-                                        op.get_attr("use_cudnn_on_gpu"),
-                                        op.get_attr("data_format")),
-          nn_ops.conv2d(grad, op.inputs[1], op.get_attr("strides"),
-                        op.get_attr("padding"), op.get_attr("use_cudnn_on_gpu"),
-                        op.get_attr("data_format"))]
+  return [
+      None,
+      nn_ops.conv2d_backprop_filter(
+          grad,
+          array_ops.shape(op.inputs[1]),
+          op.inputs[2],
+          dilations=op.get_attr("dilations"),
+          strides=op.get_attr("strides"),
+          padding=op.get_attr("padding"),
+          use_cudnn_on_gpu=op.get_attr("use_cudnn_on_gpu"),
+          data_format=op.get_attr("data_format")),
+      nn_ops.conv2d(
+          grad,
+          op.inputs[1],
+          dilations=op.get_attr("dilations"),
+          strides=op.get_attr("strides"),
+          padding=op.get_attr("padding"),
+          use_cudnn_on_gpu=op.get_attr("use_cudnn_on_gpu"),
+          data_format=op.get_attr("data_format"))
+  ]
 
 
 @ops.RegisterGradient("Conv2DBackpropFilter")
 def _Conv2DBackpropFilterGrad(op, grad):
   return [
       nn_ops.conv2d_backprop_input(
-          array_ops.shape(op.inputs[0]), grad, op.inputs[2],
-          op.get_attr("strides"),
-          op.get_attr("padding"),
-          op.get_attr("use_cudnn_on_gpu"),
-          op.get_attr("data_format")),
-      None,
+          array_ops.shape(op.inputs[0]),
+          grad,
+          op.inputs[2],
+          dilations=op.get_attr("dilations"),
+          strides=op.get_attr("strides"),
+          padding=op.get_attr("padding"),
+          use_cudnn_on_gpu=op.get_attr("use_cudnn_on_gpu"),
+          data_format=op.get_attr("data_format")), None,
       nn_ops.conv2d(
-          op.inputs[0], grad,
-          op.get_attr("strides"),
-          op.get_attr("padding"),
-          op.get_attr("use_cudnn_on_gpu"),
-          op.get_attr("data_format"))
+          op.inputs[0],
+          grad,
+          dilations=op.get_attr("dilations"),
+          strides=op.get_attr("strides"),
+          padding=op.get_attr("padding"),
+          use_cudnn_on_gpu=op.get_attr("use_cudnn_on_gpu"),
+          data_format=op.get_attr("data_format"))
   ]
 
 
@@ -231,7 +246,7 @@ def _LogSoftmaxGrad(op, grad):
     The gradients w.r.t. the input.
   """
   softmax = math_ops.exp(op.outputs[0])
-  return grad - math_ops.reduce_sum(grad, 1, keep_dims=True) * softmax
+  return grad - math_ops.reduce_sum(grad, 1, keepdims=True) * softmax
 
 
 @ops.RegisterGradient("BiasAdd")
@@ -466,25 +481,32 @@ def _SparseSoftmaxCrossEntropyWithLogitsGrad(op, grad_0, _):
 
 @ops.RegisterGradient("Conv2D")
 def _Conv2DGrad(op, grad):
+  dilations = op.get_attr("dilations")
   strides = op.get_attr("strides")
   padding = op.get_attr("padding")
   use_cudnn_on_gpu = op.get_attr("use_cudnn_on_gpu")
   data_format = op.get_attr("data_format")
   shape_0, shape_1 = array_ops.shape_n([op.inputs[0], op.inputs[1]])
-  return [nn_ops.conv2d_backprop_input(shape_0,
-                                       op.inputs[1],
-                                       grad,
-                                       strides,
-                                       padding,
-                                       use_cudnn_on_gpu,
-                                       data_format),
-          nn_ops.conv2d_backprop_filter(op.inputs[0],
-                                        shape_1,
-                                        grad,
-                                        strides,
-                                        padding,
-                                        use_cudnn_on_gpu,
-                                        data_format)]
+  return [
+      nn_ops.conv2d_backprop_input(
+          shape_0,
+          op.inputs[1],
+          grad,
+          dilations=dilations,
+          strides=strides,
+          padding=padding,
+          use_cudnn_on_gpu=use_cudnn_on_gpu,
+          data_format=data_format),
+      nn_ops.conv2d_backprop_filter(
+          op.inputs[0],
+          shape_1,
+          grad,
+          dilations=dilations,
+          strides=strides,
+          padding=padding,
+          use_cudnn_on_gpu=use_cudnn_on_gpu,
+          data_format=data_format)
+  ]
 
 
 @ops.RegisterGradient("DepthwiseConv2dNative")
diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py
index 00e3c7dc0f30a9b37f742917fde2f3a58b60ba64..fd96f7b8fcf423e2381f84b50b0532e46ce2fe6e 100644
--- a/tensorflow/python/ops/nn_impl.py
+++ b/tensorflow/python/ops/nn_impl.py
@@ -27,6 +27,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import candidate_sampling_ops
 from tensorflow.python.ops import embedding_ops
+from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import gen_nn_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
@@ -341,7 +342,7 @@ def l2_normalize(x, axis=None, epsilon=1e-12, name=None, dim=None):
   with ops.name_scope(name, "l2_normalize", [x]) as name:
     axis = deprecated_argument_lookup("axis", axis, "dim", dim)
     x = ops.convert_to_tensor(x, name="x")
-    square_sum = math_ops.reduce_sum(math_ops.square(x), axis, keep_dims=True)
+    square_sum = math_ops.reduce_sum(math_ops.square(x), axis, keepdims=True)
     x_inv_norm = math_ops.rsqrt(math_ops.maximum(square_sum, epsilon))
     return math_ops.multiply(x, x_inv_norm, name=name)
 
@@ -593,8 +594,8 @@ def sufficient_statistics(x, axes, shift=None, keep_dims=False, name=None):
     else:  # no shift.
       m_ss = x
       v_ss = math_ops.square(x)
-    m_ss = math_ops.reduce_sum(m_ss, axes, keep_dims=keep_dims, name="mean_ss")
-    v_ss = math_ops.reduce_sum(v_ss, axes, keep_dims=keep_dims, name="var_ss")
+    m_ss = math_ops.reduce_sum(m_ss, axes, keepdims=keep_dims, name="mean_ss")
+    v_ss = math_ops.reduce_sum(v_ss, axes, keepdims=keep_dims, name="var_ss")
   return counts, m_ss, v_ss, shift
 
 
@@ -664,12 +665,12 @@ def moments(x, axes,
     # on 32-bit floats before converting the mean and variance back to fp16
     y = math_ops.cast(x, dtypes.float32) if x.dtype == dtypes.float16 else x
     # Compute true mean while keeping the dims for proper broadcasting.
-    mean = math_ops.reduce_mean(y, axes, keep_dims=True, name="mean")
+    mean = math_ops.reduce_mean(y, axes, keepdims=True, name="mean")
     # sample variance, not unbiased variance
     variance = math_ops.reduce_mean(
         math_ops.squared_difference(y, array_ops.stop_gradient(mean)),
         axes,
-        keep_dims=True,
+        keepdims=True,
         name="variance")
     if not keep_dims:
       mean = array_ops.squeeze(mean, axes)
@@ -714,7 +715,7 @@ def weighted_moments(x, axes, frequency_weights, name=None, keep_dims=False):
     # Note that we use keep_dims=True for our reductions regardless of the arg;
     # this is so that the results remain broadcast-compatible with the inputs.
     weighted_input_sum = math_ops.reduce_sum(
-        frequency_weights * x, axes, name="weighted_input_sum", keep_dims=True)
+        frequency_weights * x, axes, name="weighted_input_sum", keepdims=True)
 
     # The shape of the weights isn't necessarily the same as x's
     # shape, just broadcast-compatible with it -- so this expression
@@ -725,7 +726,7 @@ def weighted_moments(x, axes, frequency_weights, name=None, keep_dims=False):
     broadcasted_weights = frequency_weights + array_ops.zeros_like(x)
 
     sum_of_weights = math_ops.reduce_sum(
-        broadcasted_weights, axes, name="sum_of_weights", keep_dims=True)
+        broadcasted_weights, axes, name="sum_of_weights", keepdims=True)
 
     divisor = math_ops.reciprocal(sum_of_weights, name="inv_weight_sum")
 
@@ -736,7 +737,7 @@ def weighted_moments(x, axes, frequency_weights, name=None, keep_dims=False):
         frequency_weights * math_ops.squared_difference(x, weighted_mean),
         axes,
         name="weighted_distsq",
-        keep_dims=True)
+        keepdims=True)
 
     weighted_variance = math_ops.multiply(weighted_distsq, divisor)
 
@@ -863,7 +864,7 @@ def fused_batch_norm(
   # currently only use the V2 version for float16 inputs, which is not supported
   # by the V1 version.
   # pylint: disable=protected-access
-  if x.dtype == dtypes.float16:
+  if x.dtype == dtypes.float16 or x.dtype == dtypes.bfloat16:
     fused_batch_norm_func = gen_nn_ops._fused_batch_norm_v2
   else:
     fused_batch_norm_func = gen_nn_ops._fused_batch_norm
@@ -981,10 +982,11 @@ def _compute_sampled_logits(weights,
         Default is `"mod"`. See `tf.nn.embedding_lookup` for more details.
     name: A name for the operation (optional).
   Returns:
-    out_logits, out_labels: `Tensor` objects each with shape
+    out_logits: `Tensor` object with shape
         `[batch_size, num_true + num_sampled]`, for passing to either
         `nn.sigmoid_cross_entropy_with_logits` (NCE) or
         `nn.softmax_cross_entropy_with_logits` (sampled softmax).
+    out_labels: A Tensor object with the same shape as `out_logits`.
   """
 
   if isinstance(weights, variables.PartitionedVariable):
@@ -1095,15 +1097,16 @@ def _compute_sampled_logits(weights,
 
     # Construct output logits and labels. The true labels/logits start at col 0.
     out_logits = array_ops.concat([true_logits, sampled_logits], 1)
-    # true_logits is a float tensor, ones_like(true_logits) is a float tensor
-    # of ones. We then divide by num_true to ensure the per-example labels sum
-    # to 1.0, i.e. form a proper probability distribution.
+
+    # true_logits is a float tensor, ones_like(true_logits) is a float
+    # tensor of ones. We then divide by num_true to ensure the per-example
+    # labels sum to 1.0, i.e. form a proper probability distribution.
     out_labels = array_ops.concat([
         array_ops.ones_like(true_logits) / num_true,
         array_ops.zeros_like(sampled_logits)
     ], 1)
 
-  return out_logits, out_labels
+    return out_logits, out_labels
 
 
 def nce_loss(weights,
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index ec7b9372cad9e51e83e93947ce46d66f15c339fd..865e459e900c0bbfd9b08fbc62725ac6f6a4bcf6 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -452,6 +452,7 @@ class _WithSpaceToBatch(object):
     self.input_shape = input_shape
     self.spatial_dims = spatial_dims
     self.dilation_rate = dilation_rate
+    self.data_format = data_format
     self.op = build_op(num_spatial_dims, "VALID")
     self.call = self._with_space_to_batch_call
 
@@ -496,6 +497,14 @@ class _WithSpaceToBatch(object):
 
     result_converted = array_ops.batch_to_space_nd(
         input=result, block_shape=dilation_rate, crops=crops)
+
+    # Recover channel information for output shape if channels are not last.
+    if self.data_format is not None and self.data_format.startswith("NC"):
+      if not result_converted.shape[1].value:
+        output_shape = result_converted.shape.as_list()
+        output_shape[1] = filter.shape[-1]
+        result_converted.set_shape(output_shape)
+
     return result_converted
 
   def __call__(self, inp, filter):  # pylint: disable=redefined-builtin
@@ -823,7 +832,8 @@ class Convolution(object):
         padding=padding,
         build_op=self._build_op,
         filter_shape=filter_shape,
-        spatial_dims=spatial_dims)
+        spatial_dims=spatial_dims,
+        data_format=data_format)
 
   def _build_op(self, _, padding):
     return _NonAtrousConvolution(
@@ -1205,13 +1215,14 @@ def conv2d_transpose(value,
       raise ValueError("padding must be either VALID or SAME:"
                        " {}".format(padding))
 
-    return gen_nn_ops.conv2d_backprop_input(input_sizes=output_shape_,
-                                            filter=filter,
-                                            out_backprop=value,
-                                            strides=strides,
-                                            padding=padding,
-                                            data_format=data_format,
-                                            name=name)
+    return gen_nn_ops.conv2d_backprop_input(
+        input_sizes=output_shape_,
+        filter=filter,
+        out_backprop=value,
+        strides=strides,
+        padding=padding,
+        data_format=data_format,
+        name=name)
 
 
 def atrous_conv2d_transpose(value,
@@ -1343,12 +1354,13 @@ def atrous_conv2d_transpose(value,
                    (in_width + pad_right_extra) // rate,
                    output_shape[3]]
 
-    value = gen_nn_ops.conv2d_backprop_input(input_sizes=input_sizes,
-                                             filter=filters,
-                                             out_backprop=value,
-                                             strides=[1, 1, 1, 1],
-                                             padding="VALID",
-                                             data_format="NHWC")
+    value = gen_nn_ops.conv2d_backprop_input(
+        input_sizes=input_sizes,
+        filter=filters,
+        out_backprop=value,
+        strides=[1, 1, 1, 1],
+        padding="VALID",
+        data_format="NHWC")
 
     # The crops argument to batch_to_space includes both padding components.
     batch_to_space_crop = [[pad_top, pad_bottom + pad_bottom_extra],
@@ -1486,7 +1498,7 @@ def bias_add_v1(value, bias, name=None):
     return gen_nn_ops._bias_add_v1(value, bias, name=name)
 
 
-def crelu(features, name=None):
+def crelu(features, name=None, axis=-1):
   """Computes Concatenated ReLU.
 
   Concatenates a ReLU which selects only the positive part of the activation
@@ -1498,13 +1510,14 @@ def crelu(features, name=None):
     features: A `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`,
       `int16`, or `int8`.
     name: A name for the operation (optional).
+    axis: The axis that the output values are concatenated along. Default is -1.
 
   Returns:
     A `Tensor` with the same type as `features`.
   """
   with ops.name_scope(name, "CRelu", [features]) as name:
     features = ops.convert_to_tensor(features, name="features")
-    c = array_ops.concat([features, -features], -1, name=name)
+    c = array_ops.concat([features, -features], axis, name=name)
     return gen_nn_ops.relu(c)
 
 
@@ -1626,7 +1639,8 @@ def _softmax(logits, compute_op, dim=-1, name=None):
 
   # Swap logits' dimension of dim and its last dimension.
   input_rank = array_ops.rank(logits)
-  logits = _swap_axis(logits, dim, math_ops.subtract(input_rank, 1))
+  dim_axis = dim % shape.ndims
+  logits = _swap_axis(logits, dim_axis, math_ops.subtract(input_rank, 1))
   shape_after_swap = array_ops.shape(logits)
 
   # Reshape logits into a matrix.
@@ -1637,7 +1651,8 @@ def _softmax(logits, compute_op, dim=-1, name=None):
 
   # Transform back the output tensor.
   output = array_ops.reshape(output, shape_after_swap)
-  output = _swap_axis(output, dim, math_ops.subtract(input_rank, 1), name=name)
+  output = _swap_axis(
+      output, dim_axis, math_ops.subtract(input_rank, 1), name=name)
 
   # Make shape inference work since reshape and transpose may erase its static
   # shape.
@@ -2251,6 +2266,12 @@ def nth_element(input, n, reverse=False, name=None):
   return gen_nn_ops.nth_element(input, n, reverse=reverse, name=name)
 
 
+@deprecation.deprecated_arg_values(
+    None, "`NCHW` for data_format is deprecated, use `NCW` instead",
+    warn_once=True, data_format="NCHW")
+@deprecation.deprecated_arg_values(
+    None, "`NHWC` for data_format is deprecated, use `NWC` instead",
+    warn_once=True, data_format="NHWC")
 def conv1d(value, filters, stride, padding,
            use_cudnn_on_gpu=None, data_format=None,
            name=None):
@@ -2258,9 +2279,9 @@ def conv1d(value, filters, stride, padding,
 
   Given an input tensor of shape
     [batch, in_width, in_channels]
-  if data_format is "NHWC", or
+  if data_format is "NWC", or
     [batch, in_channels, in_width]
-  if data_format is "NCHW",
+  if data_format is "NCW",
   and a filter / kernel tensor of shape
   [filter_width, in_channels, out_channels], this op reshapes
   the arguments to pass them to conv2d to perform the equivalent
@@ -2285,9 +2306,9 @@ def conv1d(value, filters, stride, padding,
       the filter is moved right at each step.
     padding: 'SAME' or 'VALID'
     use_cudnn_on_gpu: An optional `bool`.  Defaults to `True`.
-    data_format: An optional `string` from `"NHWC", "NCHW"`.  Defaults
-      to `"NHWC"`, the data is stored in the order of
-      [batch, in_width, in_channels].  The `"NCHW"` format stores
+    data_format: An optional `string` from `"NWC", "NCW"`.  Defaults
+      to `"NWC"`, the data is stored in the order of
+      [batch, in_width, in_channels].  The `"NCW"` format stores
       data as [batch, in_channels, in_width].
     name: A name for the operation (optional).
 
@@ -2299,15 +2320,16 @@ def conv1d(value, filters, stride, padding,
   """
   with ops.name_scope(name, "conv1d", [value, filters]) as name:
     # Reshape the input tensor to [batch, 1, in_width, in_channels]
-    if data_format is None or data_format == "NHWC":
+    if data_format is None or data_format == "NHWC" or data_format == "NWC":
       data_format = "NHWC"
       spatial_start_dim = 1
       strides = [1, 1, stride, 1]
-    elif data_format == "NCHW":
+    elif data_format == "NCHW" or data_format == "NCW":
+      data_format = "NCHW"
       spatial_start_dim = 2
       strides = [1, 1, 1, stride]
     else:
-      raise ValueError("data_format must be \"NHWC\" or \"NCHW\".")
+      raise ValueError("data_format must be \"NWC\" or \"NCW\".")
     value = array_ops.expand_dims(value, spatial_start_dim)
     filters = array_ops.expand_dims(filters, 0)
     result = gen_nn_ops.conv2d(value, filters, strides, padding,
diff --git a/tensorflow/python/ops/nn_test.py b/tensorflow/python/ops/nn_test.py
index 3b918e4f74c64868ef74f7e26295941c6f2801ff..66bc0803b736829ad8d8f3243bf23e146f2f89b9 100644
--- a/tensorflow/python/ops/nn_test.py
+++ b/tensorflow/python/ops/nn_test.py
@@ -90,6 +90,18 @@ class SoftmaxTest(test_lib.TestCase):
     self.assertAllClose(y_tf_np, y_np, eps)
     self.assertAllClose(y_tf_last_dim_np, y_np, eps)
 
+  def testSoftmaxAxes(self):
+    arr = np.linspace(0., 1, 12).reshape(3, 4)
+    x_neg_axis = nn_ops.softmax(arr, axis=-2)
+    y_pos_axis = nn_ops.softmax(arr, axis=0)
+    z_gt_axis = nn_ops.softmax(arr, axis=4)
+    x_neg_axis_tf = self.evaluate(x_neg_axis)
+    y_pos_axis_tf = self.evaluate(y_pos_axis)
+    z_gt_axis_tf = self.evaluate(z_gt_axis)
+    eps = 1e-3
+    self.assertAllClose(x_neg_axis_tf, y_pos_axis_tf, eps)
+    self.assertAllClose(y_pos_axis_tf, z_gt_axis_tf, eps)
+
   def testGradient(self):
     x_shape = [5, 10]
     x_np = np.random.randn(*x_shape).astype(np.float64)
@@ -164,6 +176,18 @@ class LogSoftmaxTest(test_lib.TestCase):
     eps = 1e-3
     self.assertAllClose(y_tf_np, y_np, eps)
 
+  def testLogSoftmaxAxes(self):
+    arr = np.linspace(0., 1, 12).reshape(3, 4)
+    x_neg_axis = nn_ops.log_softmax(arr, axis=-2)
+    y_pos_axis = nn_ops.log_softmax(arr, axis=0)
+    z_gt_axis = nn_ops.log_softmax(arr, axis=4)
+    x_neg_axis_tf = self.evaluate(x_neg_axis)
+    y_pos_axis_tf = self.evaluate(y_pos_axis)
+    z_gt_axis_tf = self.evaluate(z_gt_axis)
+    eps = 1e-3
+    self.assertAllClose(x_neg_axis_tf, y_pos_axis_tf, eps)
+    self.assertAllClose(y_pos_axis_tf, z_gt_axis_tf, eps)
+
   def testGradient(self):
     x_shape = [5, 10]
     x_np = np.random.randn(*x_shape).astype(np.float64)
@@ -953,5 +977,64 @@ class MomentsTest(test_lib.TestCase):
     self.doOutputTest((10, 10, 10, 30), (1, 2, 3))
 
 
+class DataFormatDimMapTest(test_lib.TestCase):
+
+  def _test(self, x_val, y_val_expected):
+    x = constant_op.constant(x_val)
+    y = nn_ops.data_format_dim_map(x)
+    with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess:
+      y_val = sess.run(y)
+      self.assertAllEqual(y_val, y_val_expected)
+
+  def test(self):
+    self._test(0, 0)
+    self._test(1, 2)
+    self._test(2, 3)
+    self._test(3, 1)
+    self._test(-1, 1)
+    self._test(-2, 3)
+    self._test(-3, 2)
+    self._test(-4, 0)
+    self._test([1, 3], [2, 1])
+    self._test([1, 3, -2], [2, 1, 3])
+    self._test([1, -3, -2], [2, 2, 3])
+    self._test([[1, -3], [1, -1]], [[2, 2], [2, 1]])
+
+
+class DataFormatVectorPermuteTest(test_lib.TestCase):
+
+  def testNHWCToNCHW(self):
+    x_val = [7, 4, 9, 3]
+    x = constant_op.constant(x_val)
+    y = nn_ops.data_format_vec_permute(x)
+    with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess:
+      y_val = sess.run(y)
+      self.assertAllEqual(y_val, [7, 3, 4, 9])
+
+  def testNCHWToNHWC(self):
+    x_val = [7, 4, 9, 3]
+    x = constant_op.constant(x_val)
+    y = nn_ops.data_format_vec_permute(x, src_format="NCHW", dst_format="NHWC")
+    with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess:
+      y_val = sess.run(y)
+      self.assertAllEqual(y_val, [7, 9, 3, 4])
+
+  def testNHWCToNCHW2D(self):
+    x_val = [[7, 4], [9, 3], [4, 5], [5, 1]]
+    x = constant_op.constant(x_val)
+    y = nn_ops.data_format_vec_permute(x)
+    with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess:
+      y_val = sess.run(y)
+      self.assertAllEqual(y_val, [[7, 4], [5, 1], [9, 3], [4, 5]])
+
+  def testNCHWToNHWC2D(self):
+    x_val = [[7, 4], [9, 3], [4, 5], [5, 1]]
+    x = constant_op.constant(x_val)
+    y = nn_ops.data_format_vec_permute(x, src_format="NCHW", dst_format="NHWC")
+    with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess:
+      y_val = sess.run(y)
+      self.assertAllEqual(y_val, [[7, 4], [4, 5], [5, 1], [9, 3]])
+
+
 if __name__ == "__main__":
   test_lib.main()
diff --git a/tensorflow/python/ops/parsing_ops.py b/tensorflow/python/ops/parsing_ops.py
index 14aef01dec337d7f59c799695871c8a169c3d63a..7b6f08f68cec60a464a31671bab2cf88b3293bb9 100644
--- a/tensorflow/python/ops/parsing_ops.py
+++ b/tensorflow/python/ops/parsing_ops.py
@@ -385,7 +385,7 @@ def parse_example(serialized, features, name=None, example_names=None):
   A `values[i]` comes from a position `k` in the feature of an example at batch
   entry `batch`. This positional information is recorded in `indices[i]` as
   `[batch, index_0, index_1, ...]` where `index_j` is the `k-th` value of
-  the feature in the example at with key `SparseFeature.index_key[j].
+  the feature in the example at with key `SparseFeature.index_key[j]`.
   In other words, we split the indices (except the first index indicating the
   batch entry) of a `SparseTensor` by dimension into different features of the
   `Example`. Due to its complexity a `VarLenFeature` should be preferred over a
@@ -749,6 +749,8 @@ def parse_single_example(serialized, features, name=None, example_names=None):
   """
   if not features:
     raise ValueError("Missing features.")
+  if example_names is None:
+    return parse_single_example_v2(serialized, features, name)
   features = _prepend_none_dimension(features)
   (sparse_keys, sparse_types, dense_keys, dense_types, dense_defaults,
    dense_shapes) = _features_to_raw_params(
@@ -1205,3 +1207,199 @@ def decode_csv(records, record_defaults, field_delim=",",
       field_delim=field_delim, use_quote_delim=use_quote_delim,
       na_value=na_value, name=name)
   # pylint: enable=protected-access
+
+
+# TODO(b/70890287): Combine the implementation of this op and
+# `parse_single_example()` after 1/10/2018.
+def parse_single_example_v2(serialized, features, name=None):
+  # pylint: disable=line-too-long
+  """Parses an `Example` proto into a `dict` of tensors.
+
+  Parses a serialized
+  [`Example`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
+  proto given in `serialized`.
+
+  This op parses serialized examples into a dictionary mapping keys to `Tensor`
+  and `SparseTensor` objects. `features` is a dict from keys to `VarLenFeature`,
+  `SparseFeature`, and `FixedLenFeature` objects. Each `VarLenFeature`
+  and `SparseFeature` is mapped to a `SparseTensor`, and each
+  `FixedLenFeature` is mapped to a `Tensor`.
+
+  Each `VarLenFeature` maps to a `SparseTensor` of the specified type
+  representing a ragged matrix. Its indices are `[index]` where
+  `index` is the value's index in the list of values associated with
+  that feature and example.
+
+  Each `SparseFeature` maps to a `SparseTensor` of the specified type
+  representing a Tensor of `dense_shape` `SparseFeature.size`.
+  Its `values` come from the feature in the examples with key `value_key`.
+  A `values[i]` comes from a position `k` in the feature of an example at batch
+  entry `batch`. This positional information is recorded in `indices[i]` as
+  `[batch, index_0, index_1, ...]` where `index_j` is the `k-th` value of
+  the feature in the example at with key `SparseFeature.index_key[j]`.
+  In other words, we split the indices (except the first index indicating the
+  batch entry) of a `SparseTensor` by dimension into different features of the
+  `Example`. Due to its complexity a `VarLenFeature` should be preferred over a
+  `SparseFeature` whenever possible.
+
+  Each `FixedLenFeature` `df` maps to a `Tensor` of the specified type (or
+  `tf.float32` if not specified) and shape `df.shape`.
+
+  `FixedLenFeature` entries with a `default_value` are optional. With no default
+  value, we will fail if that `Feature` is missing from any example in
+  `serialized`.
+
+  Each `FixedLenSequenceFeature` `df` maps to a `Tensor` of the specified type
+  (or `tf.float32` if not specified) and shape `(None,) + df.shape`.
+
+  Args:
+    serialized: A scalar (0-D Tensor) string, a serialized `Example` proto.
+    features: A `dict` mapping feature keys to `FixedLenFeature`,
+      `VarLenFeature`, and `SparseFeature` values.
+    name: A name for this operation (optional).
+
+  Returns:
+    A `dict` mapping feature keys to `Tensor` and `SparseTensor` values.
+
+  Raises:
+    ValueError: if any feature is invalid.
+  """
+  if not features:
+    raise ValueError("Missing: features was %s." % features)
+  features = _prepend_none_dimension(features)
+  (sparse_keys, sparse_types, dense_keys, dense_types,
+   dense_defaults, dense_shapes) = _features_to_raw_params(
+       features,
+       [VarLenFeature, SparseFeature, FixedLenFeature, FixedLenSequenceFeature])
+  outputs = _parse_single_example_v2_raw(serialized, sparse_keys, sparse_types,
+                                         dense_keys, dense_types,
+                                         dense_defaults, dense_shapes, name)
+  return _construct_sparse_tensors_for_sparse_features(features, outputs)
+
+
+def _parse_single_example_v2_raw(serialized, sparse_keys, sparse_types,
+                                 dense_keys, dense_types, dense_defaults,
+                                 dense_shapes, name):
+  """Parses `Example` protos.
+
+  Args:
+    serialized: A scalar (0-D Tensor) string, containing a binary
+      serialized `Example` proto.
+    sparse_keys: A list of string keys in the examples' features.
+      The results for these keys will be returned as `SparseTensor` objects.
+    sparse_types: A list of `DTypes` of the same length as `sparse_keys`.
+      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
+      and `tf.string` (`BytesList`) are supported.
+    dense_keys: A list of string keys in the examples' features.
+      The results for these keys will be returned as `Tensor`s
+    dense_types: A list of DTypes of the same length as `dense_keys`.
+      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
+      and `tf.string` (`BytesList`) are supported.
+    dense_defaults: A dict mapping string keys to `Tensor`s.
+      The keys of the dict must match the dense_keys of the feature.
+    dense_shapes: A list of tuples with the same length as `dense_keys`.
+      The shape of the data for each dense feature referenced by `dense_keys`.
+      Required for any input tensors identified by `dense_keys`.  Must be
+      either fully defined, or may contain an unknown first dimension.
+      An unknown first dimension means the feature is treated as having
+      a variable number of blocks, and the output shape along this dimension
+      is considered unknown at graph build time.  Padding is applied for
+      minibatch elements smaller than the maximum number of blocks for the
+      given feature along this dimension.
+    name: A name for this operation (optional).
+
+  Returns:
+    A `dict` mapping keys to `Tensor`s and `SparseTensor`s.
+
+  Raises:
+    ValueError: If sparse and dense key sets intersect, or input lengths do not
+      match up.
+  """
+  with ops.name_scope(name, "ParseSingleExample", [serialized]):
+    serialized = ops.convert_to_tensor(serialized, name="serialized")
+    dense_defaults = collections.OrderedDict(
+    ) if dense_defaults is None else dense_defaults
+    sparse_keys = [] if sparse_keys is None else sparse_keys
+    sparse_types = [] if sparse_types is None else sparse_types
+    dense_keys = [] if dense_keys is None else dense_keys
+    dense_types = [] if dense_types is None else dense_types
+    dense_shapes = ([[]] * len(dense_keys)
+                    if dense_shapes is None else dense_shapes)
+
+    num_dense = len(dense_keys)
+    num_sparse = len(sparse_keys)
+
+    if len(dense_shapes) != num_dense:
+      raise ValueError("len(dense_shapes) != len(dense_keys): %d vs. %d" %
+                       (len(dense_shapes), num_dense))
+    if len(dense_types) != num_dense:
+      raise ValueError("len(dense_types) != len(num_dense): %d vs. %d" %
+                       (len(dense_types), num_dense))
+    if len(sparse_types) != num_sparse:
+      raise ValueError("len(sparse_types) != len(sparse_keys): %d vs. %d" %
+                       (len(sparse_types), num_sparse))
+    if num_dense + num_sparse == 0:
+      raise ValueError("Must provide at least one sparse key or dense key")
+    if not set(dense_keys).isdisjoint(set(sparse_keys)):
+      raise ValueError(
+          "Dense and sparse keys must not intersect; intersection: %s" %
+          set(dense_keys).intersection(set(sparse_keys)))
+
+    # Convert dense_shapes to TensorShape object.
+    dense_shapes = [tensor_shape.as_shape(shape) for shape in dense_shapes]
+
+    dense_defaults_vec = []
+    for i, key in enumerate(dense_keys):
+      default_value = dense_defaults.get(key)
+      dense_shape = dense_shapes[i]
+      if (dense_shape.ndims is not None and dense_shape.ndims > 0 and
+          dense_shape[0].value is None):
+        # Variable stride dense shape, the default value should be a
+        # scalar padding value
+        if default_value is None:
+          default_value = ops.convert_to_tensor(
+              "" if dense_types[i] == dtypes.string else 0,
+              dtype=dense_types[i])
+        else:
+          # Reshape to a scalar to ensure user gets an error if they
+          # provide a tensor that's not intended to be a padding value
+          # (0 or 2+ elements).
+          key_name = "padding_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
+          default_value = ops.convert_to_tensor(
+              default_value, dtype=dense_types[i], name=key_name)
+          default_value = array_ops.reshape(default_value, [])
+      else:
+        if default_value is None:
+          default_value = constant_op.constant([], dtype=dense_types[i])
+        elif not isinstance(default_value, ops.Tensor):
+          key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
+          default_value = ops.convert_to_tensor(
+              default_value, dtype=dense_types[i], name=key_name)
+          default_value = array_ops.reshape(default_value, dense_shape)
+
+      dense_defaults_vec.append(default_value)
+
+    # Finally, convert dense_shapes to TensorShapeProto
+    dense_shapes = [shape.as_proto() for shape in dense_shapes]
+
+    # pylint: disable=protected-access
+    outputs = gen_parsing_ops.parse_single_example(
+        serialized=serialized,
+        dense_defaults=dense_defaults_vec,
+        num_sparse=len(sparse_keys),
+        sparse_keys=sparse_keys,
+        sparse_types=sparse_types,
+        dense_keys=dense_keys,
+        dense_shapes=dense_shapes,
+        name=name)
+    # pylint: enable=protected-access
+
+    (sparse_indices, sparse_values, sparse_shapes, dense_values) = outputs
+
+    sparse_tensors = [
+        sparse_tensor.SparseTensor(ix, val, shape)
+        for (ix, val,
+             shape) in zip(sparse_indices, sparse_values, sparse_shapes)
+    ]
+
+    return dict(zip(sparse_keys + dense_keys, sparse_tensors + dense_values))
diff --git a/tensorflow/python/ops/quantized_conv_ops_test.py b/tensorflow/python/ops/quantized_conv_ops_test.py
index 5ea47ea40e5f283736523d5d09a63176b5e8fbbf..5e9e71002705293403de83276fb70099d8864907 100644
--- a/tensorflow/python/ops/quantized_conv_ops_test.py
+++ b/tensorflow/python/ops/quantized_conv_ops_test.py
@@ -93,7 +93,7 @@ class Conv2DTest(test.TestCase):
     quantized_range = ((quantized_max - quantized_min) * range_adjust)
     range_scale = (quantized_range / number_of_steps)
     lowest_quantized = -(1 << (number_of_bits - 1))
-    result = np.array([(quantized_min + ((x - lowest_quantized) * range_scale))
+    result = np.array([(quantized_min + ((float(x) - lowest_quantized) * range_scale))
                        for x in quantized.flatten()])
     return result
 
diff --git a/tensorflow/python/ops/quantized_ops_test.py b/tensorflow/python/ops/quantized_ops_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..4bf3b35e13879069e40162fc50180520a5f855f6
--- /dev/null
+++ b/tensorflow/python/ops/quantized_ops_test.py
@@ -0,0 +1,57 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functional tests for quantized operations."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test
+
+
+class QuantizedOpsTest(test.TestCase):
+
+  def __init__(self, method_name="runTest"):
+    super(QuantizedOpsTest, self).__init__(method_name)
+
+  def testQuantizeOp(self):
+    expected_output = [1, 1, 2, 127, 255, 255]
+    with self.test_session(use_gpu=False) as sess:
+      x = constant_op.constant([1.0, 1.25, 1.75, 127.0, 255.0, 500.0], shape=[6], dtype=dtypes.float32)
+      x_min = 0.0
+      x_max = 255.0
+      op = array_ops.quantize(x, x_min, x_max, dtypes.quint8, mode="MIN_FIRST")
+      value = sess.run(op)
+      self.assertArrayNear(expected_output, value.output, 0.1)
+
+  def testDequantizeOp(self):
+    expected_output = [1.0, 2.0, 4.0, 8.0, 16.0, 255.0]
+    inp = np.array([1, 2, 4, 8, 16, 255]).astype(np.uint8)
+    with self.test_session(use_gpu=False) as sess:
+      x = constant_op.constant(inp, shape=[6], dtype=dtypes.quint8)
+      x_min = 0.0
+      x_max = 255.0
+      op = array_ops.dequantize(x, x_min, x_max, mode="MIN_FIRST")
+      value = sess.run(op)
+      self.assertArrayNear(expected_output, value, 0.1)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/ops/random_ops.py b/tensorflow/python/ops/random_ops.py
index 52fb5131cfa6d7152ef49d7c10d5f57292d81f24..a2264a7bdfff398e405ccd4a509d20c592ee886b 100644
--- a/tensorflow/python/ops/random_ops.py
+++ b/tensorflow/python/ops/random_ops.py
@@ -152,7 +152,7 @@ def truncated_normal(shape,
     mean: A 0-D Tensor or Python value of type `dtype`. The mean of the
       truncated normal distribution.
     stddev: A 0-D Tensor or Python value of type `dtype`. The standard deviation
-      of the truncated normal distribution.
+      of the normal distribution, before truncation.
     dtype: The type of the output.
     seed: A Python integer. Used to create a random seed for the distribution.
       See
@@ -220,8 +220,8 @@ def random_uniform(shape,
     ValueError: If `dtype` is integral and `maxval` is not specified.
   """
   dtype = dtypes.as_dtype(dtype)
-  if dtype not in (dtypes.float16, dtypes.float32, dtypes.float64, dtypes.int32,
-                   dtypes.int64):
+  if dtype not in (dtypes.float16, dtypes.bfloat16, dtypes.float32,
+                   dtypes.float64, dtypes.int32, dtypes.int64):
     raise ValueError("Invalid dtype %r" % dtype)
   if maxval is None:
     if dtype.is_integer:
@@ -316,7 +316,7 @@ def random_crop(value, size, seed=None, name=None):
     return array_ops.slice(value, offset, size, name=name)
 
 
-def multinomial(logits, num_samples, seed=None, name=None):
+def multinomial(logits, num_samples, seed=None, name=None, output_dtype=None):
   """Draws samples from a multinomial distribution.
 
   Example:
@@ -336,6 +336,7 @@ def multinomial(logits, num_samples, seed=None, name=None):
       @{tf.set_random_seed}
       for behavior.
     name: Optional name for the operation.
+    output_dtype: integer type to use for the output. Defaults to int64.
 
   Returns:
     The drawn samples of shape `[batch_size, num_samples]`.
@@ -344,7 +345,7 @@ def multinomial(logits, num_samples, seed=None, name=None):
     logits = ops.convert_to_tensor(logits, name="logits")
     seed1, seed2 = random_seed.get_seed(seed)
     return gen_random_ops.multinomial(
-        logits, num_samples, seed=seed1, seed2=seed2)
+        logits, num_samples, seed=seed1, seed2=seed2, output_dtype=output_dtype)
 
 
 ops.NotDifferentiable("Multinomial")
diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py
index 343e38f960e00933293dcb1d2df8371355b16d08..879c206313e476088b388f39a9a112f5cc449152 100644
--- a/tensorflow/python/ops/resource_variable_ops.py
+++ b/tensorflow/python/ops/resource_variable_ops.py
@@ -184,11 +184,12 @@ class ResourceVariable(variables.Variable):
     assign = a.assign(2.0)
     with tf.control_dependencies([assign]):
       b = a.read_value()
-
-    other_assign = a.assign(3.0)
+    with tf.control_dependencies([b]):
+      other_assign = a.assign(3.0)
     with tf.control_dependencies([other_assign]):
-      tf.Print(b, [b]).run()  # Will print 2.0 because the value was read before
-                              # other_assign ran.
+      # Will print 2.0 because the value was read before other_assign ran. If
+      # `a` was a tf.Variable instead, 2.0 or 3.0 could be printed.
+      tf.Print(b, [b]).eval()
   ```
 
   To enforce these consistency properties tf.ResourceVariable might make more
@@ -275,10 +276,6 @@ class ResourceVariable(variables.Variable):
           dtype=dtype,
           constraint=constraint)
 
-  # LINT.IfChange
-  # _VariableFromResource inherits from ResourceVariable but
-  # doesn't call the constructor, so changes here might need to be reflected
-  # there.
   # pylint: disable=unused-argument
   def _init_from_args(self,
                       initial_value=None,
@@ -437,7 +434,8 @@ class ResourceVariable(variables.Variable):
               self._initializer_op = (
                   gen_resource_variable_ops.assign_variable_op(
                       self._handle,
-                      self._build_initializer_expr(initial_value),
+                      self._try_guard_against_uninitialized_dependencies(
+                          initial_value),
                       name=n))
           with ops.name_scope("Read"), ops.colocate_with(self._handle):
             # Manually assign reads to the handle's device to avoid log
@@ -521,7 +519,6 @@ class ResourceVariable(variables.Variable):
     self._dtype = dtypes.as_dtype(self._handle.op.get_attr("dtype"))
     self._graph_element = self.value()
     self._constraint = None
-  # LINT.ThenChange(//tensorflow/python/eager/graph_callable.py)
 
   def __nonzero__(self):
     return self.__bool__()
@@ -887,26 +884,14 @@ def _ReadGrad(_, grad):
 def _GatherGrad(op, grad):
   """Gradient for gather op."""
   # Build appropriately shaped IndexedSlices
-  # Walk graph back until the original handle is found.
-  # TODO(apassos): more robust way of getting the shape.
-  # TODO(apassos): implement this for EAGER mode.
-  if context.in_eager_mode():
-    dense_shape = gen_resource_variable_ops.variable_shape(op.inputs[0])
-    return (ops.IndexedSlices(grad,
-                              op.inputs[1],
-                              dense_shape=dense_shape),
-            None)
   handle = op.inputs[0]
-  while handle.op.type != "VarHandleOp":
-    handle = handle.op.inputs[0]
-  params_shape = ops.convert_to_tensor(
-      tensor_shape.TensorShape(handle.op.get_attr("shape")))
   indices = op.inputs[1]
+  params_shape = gen_resource_variable_ops.variable_shape(handle)
   size = array_ops.expand_dims(array_ops.size(indices), 0)
   values_shape = array_ops.concat([size, params_shape[1:]], 0)
   values = array_ops.reshape(grad, values_shape)
   indices = array_ops.reshape(indices, size)
-  return [ops.IndexedSlices(values, indices, params_shape), None]
+  return (ops.IndexedSlices(values, indices, params_shape), None)
 
 
 def _to_proto_fn(v, export_scope=None):
diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py
index e30b19842f08d335ce7967b77dcb49578fb3fe85..fd14740a00a24b006cd1e47b20d46e86e261528a 100644
--- a/tensorflow/python/ops/rnn.py
+++ b/tensorflow/python/ops/rnn.py
@@ -35,6 +35,7 @@ from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import rnn_cell_impl
 from tensorflow.python.ops import tensor_array_ops
@@ -665,7 +666,7 @@ def _dynamic_rnn_loop(cell,
     final_outputs:
       A `Tensor` of shape `[time, batch_size, cell.output_size]`.  If
       `cell.output_size` is a (possibly nested) tuple of ints or `TensorShape`
-      objects, then this returns a (possibly nsted) tuple of Tensors matching
+      objects, then this returns a (possibly nested) tuple of Tensors matching
       the corresponding shapes.
     final_state:
       A `Tensor`, or possibly nested tuple of Tensors, matching in length
@@ -806,11 +807,28 @@ def _dynamic_rnn_loop(cell,
 
     return (time + 1, output_ta_t, new_state)
 
+  # TODO(pbar) `loop_bound` can be reduced to `max_sequence_length` once
+  # TensorArray shape inference is working.  When sequence lengths are highly
+  # variable, this will reduce the performance overheads of padding to a fixed
+  # maximum length.
+  loop_bound = time_steps
+
+  # This is a workaround since we cannot currently use maximum_iterations if
+  # time_steps is defined inside control flow, see the comment in
+  # control_flow_ops.py.
+  if (context.in_eager_mode() or
+      not (control_flow_util.IsInWhileLoop(time_steps.op) or
+           control_flow_util.IsInCond(time_steps.op))):
+    maximum_iterations = time_steps
+  else:
+    maximum_iterations = None
+
   _, output_final_ta, final_state = control_flow_ops.while_loop(
-      cond=lambda time, *_: time < time_steps,
+      cond=lambda time, *_: time < loop_bound,
       body=_time_step,
       loop_vars=(time, output_ta, state),
       parallel_iterations=parallel_iterations,
+      maximum_iterations=maximum_iterations,
       swap_memory=swap_memory)
 
   # Unpack final output if not using output tuples.
diff --git a/tensorflow/python/ops/rnn_cell_impl.py b/tensorflow/python/ops/rnn_cell_impl.py
index 8aaf77f1733fc0569ebcbc71373a204cfb3f2913..b41aff76d4961c8a563599ee01e5956ab05fc71d 100644
--- a/tensorflow/python/ops/rnn_cell_impl.py
+++ b/tensorflow/python/ops/rnn_cell_impl.py
@@ -238,7 +238,8 @@ class RNNCell(base_layer.Layer):
     # Try to use the last cached zero_state. This is done to avoid recreating
     # zeros, especially when eager execution is enabled.
     state_size = self.state_size
-    if hasattr(self, "_last_zero_state"):
+    is_eager = context.in_eager_mode()
+    if is_eager and hasattr(self, "_last_zero_state"):
       (last_state_size, last_batch_size, last_dtype,
        last_output) = getattr(self, "_last_zero_state")
       if (last_batch_size == batch_size and
@@ -247,7 +248,8 @@ class RNNCell(base_layer.Layer):
         return last_output
     with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]):
       output = _zero_state_tensors(state_size, batch_size, dtype)
-    self._last_zero_state = (state_size, batch_size, dtype, output)
+    if is_eager:
+      self._last_zero_state = (state_size, batch_size, dtype, output)
     return output
 
 
@@ -265,7 +267,7 @@ class _LayerRNNCell(RNNCell):
   `call` methods do not access Variables `tf.get_variable`.
   """
 
-  def __call__(self, inputs, state, scope=None):
+  def __call__(self, inputs, state, scope=None, *args, **kwargs):
     """Run this RNN cell on inputs, starting from the given state.
 
     Args:
@@ -274,8 +276,9 @@ class _LayerRNNCell(RNNCell):
         with shape `[batch_size, self.state_size]`.  Otherwise, if
         `self.state_size` is a tuple of integers, this should be a tuple
         with shapes `[batch_size, s] for s in self.state_size`.
-      scope: `VariableScope` for the created subgraph; if not provided,
-        defaults to standard `tf.layers.Layer` behavior.
+      scope: optional cell scope.
+      *args: Additional positional arguments.
+      **kwargs: Additional keyword arguments.
 
     Returns:
       A pair containing:
@@ -287,7 +290,8 @@ class _LayerRNNCell(RNNCell):
     # Bypass RNNCell's variable capturing semantics for LayerRNNCell.
     # Instead, it is up to subclasses to provide a proper build
     # method.  See the class docstring for more details.
-    return base_layer.Layer.__call__(self, inputs, state, scope=scope)
+    return base_layer.Layer.__call__(self, inputs, state, scope=scope,
+                                     *args, **kwargs)
 
 
 class BasicRNNCell(_LayerRNNCell):
@@ -1037,7 +1041,7 @@ class DropoutWrapper(RNNCell):
       inputs = self._dropout(inputs, "input",
                              self._recurrent_input_noise,
                              self._input_keep_prob)
-    output, new_state = self._cell(inputs, state, scope)
+    output, new_state = self._cell(inputs, state, scope=scope)
     if _should_dropout(self._state_keep_prob):
       # Identify which subsets of the state to perform dropout on and
       # which ones to keep.
diff --git a/tensorflow/python/ops/script_ops.py b/tensorflow/python/ops/script_ops.py
index 2c3667dffedf111f37a9f6eadcc7f1de83c2347e..c0c1ade495455df6a4965eefba4b823ca84e7c31 100644
--- a/tensorflow/python/ops/script_ops.py
+++ b/tensorflow/python/ops/script_ops.py
@@ -29,11 +29,41 @@ import numpy as np
 import six
 
 from tensorflow.python import pywrap_tensorflow
+from tensorflow.python.eager import context
 from tensorflow.python.framework import function
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import gen_script_ops
 
 
+class EagerFunc(object):
+  """A wrapper for a function owned by an EagerPyFunc."""
+
+  def __init__(self, func, Tout):
+    """Constructs an EagerFunc.
+
+    Args:
+      func: The function to wrap.
+      Tout: A list of datatypes for the output; an empty list if the output is
+            None.
+    """
+    self._func = func
+    self._out_dtypes = Tout
+
+  def __call__(self, *args, **kwargs):
+    """Passes args, kwargs to `self._func`, which is executed eagerly."""
+    with context.eager_mode():
+      ret = self._func(*args, **kwargs)
+      if isinstance(ret, (tuple, list)):
+        return [
+            ops.convert_to_tensor(x, dtype=dtype)
+            for (x, dtype) in zip(ret, self._out_dtypes)
+        ]
+      elif ret is None:
+        return ret
+      else:
+        return ops.convert_to_tensor(ret, dtype=self._out_dtypes[0])
+
+
 class FuncRegistry(object):
   """A helper class to keep track of registered py functions.
 
@@ -91,16 +121,20 @@ class FuncRegistry(object):
     if func is None:
       raise ValueError("callback %s is not found" % token)
     ret = func(*args)
-    # Strings seem to lead to a memory leak here if they're not wrapped in a
-    # list.
-    if isinstance(ret, six.binary_type):
-      ret = [ret]
-    # Ensures that we return either a single numpy array or a list of numpy
-    # arrays.
-    if isinstance(ret, (tuple, list)):
-      return [self._convert(x) for x in ret]
+
+    if isinstance(func, EagerFunc):
+      return ret
     else:
-      return self._convert(ret)
+      # Strings seem to lead to a memory leak here if they're not wrapped in a
+      # list.
+      if isinstance(ret, six.binary_type):
+        ret = [ret]
+      # Ensures that we return either a single numpy array or a list of numpy
+      # arrays.
+      if isinstance(ret, (tuple, list)):
+        return [self._convert(x) for x in ret]
+      else:
+        return self._convert(ret)
 
   def size(self):
     """Returns how many functions are currently registered."""
@@ -129,6 +163,86 @@ class CleanupFunc(object):
     _py_funcs.remove(self._token)
 
 
+def _internal_py_func(func, inp, Tout, stateful=None, eager=False, name=None):
+  """See documentation for py_func and eager_py_func."""
+
+  is_list_or_tuple = False
+  if isinstance(Tout, (list, tuple)):
+    is_list_or_tuple = True
+  else:
+    Tout = [Tout]
+
+  if eager:
+    func = EagerFunc(func, Tout)
+
+  token = _py_funcs.insert(func)
+  # We tie the registered function's lifetime with the current default graph,
+  # i.e., when the current graph is destroyed, we remove its py funcs.
+  graph = ops.get_default_graph()
+
+  # pylint: disable=protected-access
+  while isinstance(graph, function._FuncGraph):
+    # If the py_func was declared inside a _FuncGraph, its lifetime should be
+    # bound to that of the outer graph instead.
+    graph = graph._outer_graph
+
+  cleanup = CleanupFunc(token)
+
+  # TODO(zhifengc): Consider adding a Graph method to collect
+  # `cleanup` objects in one of its member.
+  if not hasattr(graph, "_cleanup_py_funcs_used_in_graph"):
+    graph._cleanup_py_funcs_used_in_graph = []
+
+  # When `graph` is destroyed, elements in _cleanup_py_funcs_used_in_graph
+  # will be destroyed and their __del__ will remove the 'token' from
+  # the funcs registry.
+  graph._cleanup_py_funcs_used_in_graph.append(cleanup)
+  # pylint: enable=protected-access
+
+  # pylint: disable=protected-access
+  if eager:
+    result = gen_script_ops._eager_py_func(
+        input=inp, token=token, Tout=Tout, name=name)
+  else:
+    if stateful:
+      result = gen_script_ops._py_func(
+          input=inp, token=token, Tout=Tout, name=name)
+    else:
+      result = gen_script_ops._py_func_stateless(
+          input=inp, token=token, Tout=Tout, name=name)
+  # pylint: enable=protected-access
+  return result if is_list_or_tuple else result[0]
+
+
+def eager_py_func(func, inp, Tout, name=None):
+  """Wraps a python function into a TensorFlow op.
+
+  When the returned op is executed, `func` is invoked with eager execution
+  enabled. Inputs are Tensor objects and func must return None or objects
+  that may be converted to Tensor objects.
+
+  This function has the same limitations as `py_func` with respect to
+  serialization and distribution.
+
+  Args:
+    func: A Python function which accepts a list of `Tensor` objects
+      having element types that match the corresponding `tf.Tensor` objects
+      in `inp` and returns a list of `Tensor` objects (or a single
+      `Tensor`, or `None`) having element types that match the
+      corresponding values in `Tout`.
+    inp: A list of `Tensor` objects.
+    Tout: A list or tuple of tensorflow data types or a single tensorflow data
+      type if there is only one, indicating what `func` returns; an empty list
+      if no value is returned (i.e., if the return value is `None`).
+    name: A name for the operation (optional).
+
+  Returns:
+    A list of `Tensor` or a single `Tensor` which `func` computes; an empty list
+    if `func` returns None.
+  """
+  return _internal_py_func(func=func, inp=inp, Tout=Tout, eager=True, name=name)
+
+
 def py_func(func, inp, Tout, stateful=True, name=None):
   """Wraps a python function and uses it as a TensorFlow op.
 
@@ -182,46 +296,12 @@ def py_func(func, inp, Tout, stateful=True, name=None):
   Returns:
     A list of `Tensor` or a single `Tensor` which `func` computes.
   """
-  token = _py_funcs.insert(func)
-  # We tie the registered function's life-time with the current
-  # default graph. I.e., when the current graph is destroyed, we
-  # should remove its py funcs.
-  g = ops.get_default_graph()
-
-  # pylint: disable=protected-access
-  while isinstance(g, function._FuncGraph):
-    # If the py_func was declared inside a _FuncGraph, its lifetime should be
-    # bound to that of the outer graph instead.
-    g = g._outer_graph
-
-  cleanup = CleanupFunc(token)
-
-  # TODO(zhifengc): Consider adding a Graph method to collect
-  # `cleanup` objects in one of its member.
-  if not hasattr(g, "_cleanup_py_funcs_used_in_graph"):
-    g._cleanup_py_funcs_used_in_graph = []
-
-  # When g is destroyed, elements in _cleanup_py_funcs_used_in_graph
-  # will be destroyed and their __del__ will remove the 'token' from
-  # the funcs registry.
-  g._cleanup_py_funcs_used_in_graph.append(cleanup)
-  # pylint: enable=protected-access
-
-  if isinstance(Tout, (list, tuple)):
-    is_list_or_tuple = True
-  else:
-    Tout = [Tout]
-    is_list_or_tuple = False
-  # pylint: disable=protected-access
-  if stateful:
-    result = gen_script_ops._py_func(
-        input=inp, token=token, Tout=Tout, name=name)
-  else:
-    result = gen_script_ops._py_func_stateless(
-        input=inp, token=token, Tout=Tout, name=name)
-  # pylint: enable=protected-access
-  return result if is_list_or_tuple else result[0]
+  return _internal_py_func(
+      func=func, inp=inp, Tout=Tout, stateful=stateful, eager=False, name=name)
 
 
+# TODO(akshayka): PyFuncs where the 'eager' attribute is set to True should be
+# differentiable, i.e., the gradient of PyFunc should propagate Nones if the
+# eager attribute is not set, and otherwise, it should return the gradient.
 ops.NotDifferentiable("PyFunc")
 ops.NotDifferentiable("PyFuncStateless")
diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py
index cdfe9e1c1ec7f39912f5e767ffdd291f29f5be88..c368d166f5654a3fc5c3464e552e6497b6ee19a3 100644
--- a/tensorflow/python/ops/sparse_ops.py
+++ b/tensorflow/python/ops/sparse_ops.py
@@ -557,6 +557,7 @@ def sparse_reshape(sp_input, shape, name=None):
     TypeError: If `sp_input` is not a `SparseTensor`.
     ValueError:  If argument `shape` requests a `SparseTensor` with a different
       number of elements than `sp_input`.
+    ValueError:  If `shape` has more than one inferred (== -1) dimension.
   """
   sp_input = _convert_to_sparse_tensor(sp_input)
   shape = math_ops.cast(shape, dtype=dtypes.int64)
@@ -568,16 +569,26 @@ def sparse_reshape(sp_input, shape, name=None):
     reshaped_shape_const = tensor_util.constant_value(shape)
     if (reshaped_shape_const is not None
         and sp_input.get_shape().is_fully_defined()):
-      # Don't deal with inferred dimensions. That would add significant code.
-      if all(n >= 0 for n in reshaped_shape_const):
-        reshaped_size = np.prod(reshaped_shape_const)
-        in_shape_size = np.prod(sp_input.get_shape().as_list())
-        if reshaped_size != in_shape_size:
-          raise ValueError(
-              "Cannot reshape a tensor with %d elements to shape %s "
-              "(%d elements)."
-              % (in_shape_size, reshaped_shape_const, reshaped_size))
-        reshaped_shape = reshaped_shape_const
+      num_implied = sum((dim == -1) for dim in reshaped_shape_const)
+      if num_implied > 1:
+        raise ValueError("At most one dimension can be inferred (-1). Found: %s"
+                         % reshaped_shape_const)
+      original_reshaped_shape = list(reshaped_shape_const)  # Copy.
+      in_shape_size = np.prod(sp_input.get_shape().as_list())
+      if num_implied:
+        implied_idx = original_reshaped_shape.index(-1)
+        non_implied_idx = (
+            original_reshaped_shape[:implied_idx] +
+            original_reshaped_shape[implied_idx + 1:])
+        reshaped_shape_const[implied_idx] = (
+            in_shape_size // np.prod(non_implied_idx))
+      reshaped_size = np.prod(reshaped_shape_const)
+      if reshaped_size != in_shape_size:
+        raise ValueError(
+            "Cannot reshape a tensor with %d elements to shape %s "
+            "(%d elements)."
+            % (in_shape_size, original_reshaped_shape, reshaped_size))
+      reshaped_shape = reshaped_shape_const
 
     return sparse_tensor.SparseTensor(
         reshaped_ind, array_ops.identity(sp_input.values),
@@ -1385,16 +1396,17 @@ def sparse_fill_empty_rows(sp_input, default_value, name=None):
             empty_row_indicator)
 
 
-def serialize_sparse(sp_input, name=None):
-  """Serialize a `SparseTensor` into a string 3-vector (1-D `Tensor`) object.
+def serialize_sparse(sp_input, name=None, out_type=dtypes.string):
+  """Serialize a `SparseTensor` into a 3-vector (1-D `Tensor`) object.
 
   Args:
     sp_input: The input `SparseTensor`.
     name: A name prefix for the returned tensors (optional).
+    out_type: The `dtype` to use for serialization.
 
   Returns:
-    A string 3-vector (1D `Tensor`), with each column representing the
-    serialized `SparseTensor`'s indices, values, and shape (respectively).
+    A 3-vector (1-D `Tensor`), with each column representing the serialized
+    `SparseTensor`'s indices, values, and shape (respectively).
 
   Raises:
     TypeError: If `sp_input` is not a `SparseTensor`.
@@ -1402,11 +1414,15 @@ def serialize_sparse(sp_input, name=None):
   sp_input = _convert_to_sparse_tensor(sp_input)
 
   return gen_sparse_ops._serialize_sparse(
-      sp_input.indices, sp_input.values, sp_input.dense_shape, name=name)
+      sp_input.indices,
+      sp_input.values,
+      sp_input.dense_shape,
+      name=name,
+      out_type=out_type)
 
 
-def serialize_many_sparse(sp_input, name=None):
-  """Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` string `Tensor`.
+def serialize_many_sparse(sp_input, name=None, out_type=dtypes.string):
+  """Serialize `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor`.
 
   The `SparseTensor` must have rank `R` greater than 1, and the first dimension
   is treated as the minibatch dimension.  Elements of the `SparseTensor`
@@ -1419,11 +1435,12 @@ def serialize_many_sparse(sp_input, name=None):
   Args:
     sp_input: The input rank `R` `SparseTensor`.
     name: A name prefix for the returned tensors (optional).
+    out_type: The `dtype` to use for serialization.
 
   Returns:
-    A string matrix (2-D `Tensor`) with `N` rows and `3` columns.
-    Each column represents serialized `SparseTensor`'s indices, values, and
-    shape (respectively).
+    A matrix (2-D `Tensor`) with `N` rows and `3` columns. Each column
+    represents serialized `SparseTensor`'s indices, values, and shape
+    (respectively).
 
   Raises:
     TypeError: If `sp_input` is not a `SparseTensor`.
@@ -1431,16 +1448,57 @@ def serialize_many_sparse(sp_input, name=None):
   sp_input = _convert_to_sparse_tensor(sp_input)
 
   return gen_sparse_ops._serialize_many_sparse(
-      sp_input.indices, sp_input.values, sp_input.dense_shape, name=name)
+      sp_input.indices,
+      sp_input.values,
+      sp_input.dense_shape,
+      name=name,
+      out_type=out_type)
 
 
 def deserialize_sparse(serialized_sparse, dtype, rank=None, name=None):
   """Deserialize `SparseTensor` objects.
 
-  The input is expected to have shape [d_1, ..., d_m, 3], where the last
-  dimension stores a serialized `SparseTensor`. The method deserializes
-  all input `SparseTensor`s, concatenates them into a single tensor, and
-  reshapes the sparse tensor to preserve the structure of the input.
+  The input `serialized_sparse` must have the shape `[?, ?, ..., ?, 3]` where
+  the last dimension stores serialized `SparseTensor` objects and the other N
+  dimensions (N >= 0) correspond to a batch. The ranks of the original
+  `SparseTensor` objects must all match. When the final `SparseTensor` is
+  created, its rank is the rank of the incoming `SparseTensor` objects plus N;
+  the sparse tensors have been concatenated along new dimensions, one for each
+  batch.
+
+  The output `SparseTensor` object's shape values for the original dimensions
+  are the max across the input `SparseTensor` objects' shape values for the
+  corresponding dimensions. The new dimensions match the size of the batch.
+
+  The input `SparseTensor` objects' indices are assumed ordered in
+  standard lexicographic order.  If this is not the case, after this
+  step run `SparseReorder` to restore index ordering.
+
+  For example, if the serialized input is a `[2 x 3]` matrix representing two
+  original `SparseTensor` objects:
+
+      index = [ 0]
+              [10]
+              [20]
+      values = [1, 2, 3]
+      shape = [50]
+
+  and
+
+      index = [ 2]
+              [10]
+      values = [4, 5]
+      shape = [30]
+
+  then the final deserialized `SparseTensor` will be:
+
+      index = [0  0]
+              [0 10]
+              [0 20]
+              [1  2]
+              [1 10]
+      values = [1, 2, 3, 4, 5]
+      shape = [2 50]
 
   Args:
     serialized_sparse: The serialized `SparseTensor` objects.
@@ -1914,8 +1972,16 @@ def sparse_transpose(sp_input, perm=None, name=None):
     indices = sp_input.indices
     transposed_indices = array_ops.transpose(
         array_ops.gather(array_ops.transpose(indices), perm))
-    dense_shape = sp_input.dense_shape
-    transposed_dense_shape = array_ops.gather(dense_shape, perm)
+
+    perm_ = tensor_util.constant_value(ops.convert_to_tensor(perm))
+    if perm_ is not None and sp_input.get_shape().is_fully_defined():
+      old_shape_ = sp_input.get_shape().as_list()
+      transposed_dense_shape = list(old_shape_)  # Copy.
+      for i, p in enumerate(perm_):
+        transposed_dense_shape[i] = old_shape_[p]
+    else:
+      dense_shape = sp_input.dense_shape
+      transposed_dense_shape = array_ops.gather(dense_shape, perm)
     transposed_st = sparse_tensor.SparseTensor(
         transposed_indices, sp_input.values,
         transposed_dense_shape)
diff --git a/tensorflow/python/ops/special_math_ops_test.py b/tensorflow/python/ops/special_math_ops_test.py
index 6581e9f922518e5ebae0bc43aa8595d5b686e188..c1a66717d86dd8278dbe676f1714d226351c245f 100644
--- a/tensorflow/python/ops/special_math_ops_test.py
+++ b/tensorflow/python/ops/special_math_ops_test.py
@@ -223,7 +223,7 @@ class EinsumTest(test.TestCase):
 
   dim_mismatch_cases = [('ijk,jkl->il', [(2, 3, 4), (3, 5, 6)])]
 
-  def test_simple(self):
+  def disabled_test_simple(self):
     for case in self.simple_cases:
       self.run_test(case)
 
diff --git a/tensorflow/python/ops/state_ops.py b/tensorflow/python/ops/state_ops.py
index dfc657893cd6e7622833e4234d2f490dc1f2b690..dee495f78fa5c2fa099772d0a84f5ff0981c8c59 100644
--- a/tensorflow/python/ops/state_ops.py
+++ b/tensorflow/python/ops/state_ops.py
@@ -347,5 +347,71 @@ def scatter_update(ref, indices, updates, use_locking=True, name=None):
   if ref.dtype._is_ref_dtype:
     return gen_state_ops.scatter_update(ref, indices, updates,
                                         use_locking=use_locking, name=name)
-  return gen_resource_variable_ops.resource_scatter_update(
-      ref.handle, indices, ops.convert_to_tensor(updates, ref.dtype), name=name)
+  with ops.control_dependencies(
+      [gen_resource_variable_ops.resource_scatter_update(
+          ref.handle, indices, ops.convert_to_tensor(updates, ref.dtype),
+          name=name)]):
+    return ref.read_value()
+
+
+def scatter_nd_update(ref, indices, updates, use_locking=True, name=None):
+  r"""Applies sparse `updates` to individual values or slices in a Variable.
+
+  `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
+
+  `indices` must be integer tensor, containing indices into `ref`.
+  It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
+
+  The innermost dimension of `indices` (with length `K`) corresponds to
+  indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
+  dimension of `ref`.
+
+  `updates` is `Tensor` of rank `Q-1+P-K` with shape:
+
+  ```
+  [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].
+  ```
+
+  For example, say we want to update 4 scattered elements to a rank-1 tensor to
+  8 elements. In Python, that update would look like this:
+
+  ```python
+      ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8])
+      indices = tf.constant([[4], [3], [1] ,[7]])
+      updates = tf.constant([9, 10, 11, 12])
+      update = tf.scatter_nd_update(ref, indices, updates)
+      with tf.Session() as sess:
+        print sess.run(update)
+  ```
+
+  The resulting update to ref would look like this:
+
+      [1, 11, 3, 10, 9, 6, 7, 12]
+
+  See @{tf.scatter_nd} for more details about how to make updates to
+  slices.
+
+  Args:
+    ref: A Variable.
+    indices: A `Tensor`. Must be one of the following types: `int32`, `int64`.
+      A Tensor. Must be one of the following types: int32, int64.
+      A tensor of indices into ref.
+    updates: A `Tensor`. Must have the same type as `ref`.
+      A Tensor. Must have the same type as ref. A tensor of updated
+      values to add to ref.
+    use_locking: An optional `bool`. Defaults to `True`.
+      An optional bool. Defaults to True. If True, the assignment will
+      be protected by a lock; otherwise the behavior is undefined,
+      but may exhibit less contention.
+    name: A name for the operation (optional).
+
+  Returns:
+    The value of the variable after the update.
+  """
+  if ref.dtype._is_ref_dtype:
+    return gen_state_ops.scatter_nd_update(
+        ref, indices, updates, use_locking, name)
+  with ops.control_dependencies([gen_state_ops.resource_scatter_nd_update(
+      ref.handle, indices, ops.convert_to_tensor(updates, dtype=ref.dtype),
+      use_locking, name)]):
+    return ref.read_value()
diff --git a/tensorflow/python/ops/template.py b/tensorflow/python/ops/template.py
index 98578b799a814962b560e8ed40868b2e94010f4e..99a71cbe79cfb2772a279960d2aec1def52960c0 100644
--- a/tensorflow/python/ops/template.py
+++ b/tensorflow/python/ops/template.py
@@ -22,9 +22,12 @@ import functools
 import traceback
 
 from tensorflow.python.eager import context
+from tensorflow.python.eager import function
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.util import tf_contextlib
+from tensorflow.python.util import tf_decorator
 from tensorflow.python.util.deprecation import deprecated
 
 
@@ -127,8 +130,8 @@ def make_template(name_, func_, create_scope_now_=False, unique_name_=None,
 
   Returns:
     A function to encapsulate a set of variables which should be created once
-    and reused. An enclosing scope will created, either where `make_template`
-    is called, or wherever the result is called, depending on the value of
+    and reused. An enclosing scope will be created either when `make_template`
+    is called or when the result is called, depending on the value of
     `create_scope_now_`. Regardless of the value, the first time the template
     is called it will enter the scope with no reuse, and call `func_` to create
     variables, which are guaranteed to be unique. All subsequent calls will
@@ -137,15 +140,86 @@ def make_template(name_, func_, create_scope_now_=False, unique_name_=None,
   Raises:
     ValueError: if the name is None.
   """
+  return make_template_internal(
+      name_,
+      func_,
+      create_scope_now_,
+      unique_name_,
+      custom_getter_,
+      create_graph_function_=False,
+      **kwargs)
+
+
+def make_template_internal(name_,
+                           func_,
+                           create_scope_now_=False,
+                           unique_name_=None,
+                           custom_getter_=None,
+                           create_graph_function_=False,
+                           **kwargs):
+  """Make a template, optionally compiling func_ into a graph function.
+
+  See `make_template` for full documentation.
+
+  Args:
+    name_: A name for the scope created by this template. If necessary, the name
+      will be made unique by appending `_N` to the name.
+    func_: The function to wrap.
+    create_scope_now_: Boolean controlling whether the scope should be created
+      when the template is constructed or when the template is called. Default
+      is False, meaning the scope is created when the template is called.
+    unique_name_: When used, it overrides name_ and is not made unique. If a
+      template of the same scope/unique_name already exists and reuse is false,
+      an error is raised. Defaults to None. If executing eagerly, must be None.
+    custom_getter_: Optional custom getter for variables used in `func_`. See
+      the @{tf.get_variable} `custom_getter` documentation for
+      more information.
+    create_graph_function_: When True, the first invocation of the template will
+      execute `func_` as is, to allow for variable creation; however, the second
+      invocation and every invocation thereafter will execute func as a graph
+      function.  In particular, this implies that `func_` must satisfy the
+      properties that `function.defun` requires of functions: See the
+      documentation of `function.defun` for details. When executing eagerly,
+      setting this flag to True can improve performance. Regardless of whether
+      eager execution is enabled, enabling this flag gives the caller access to
+      graph-function semantics, i.e., accesses to variables are totally ordered
+      and side-effecting ops are not pruned.
+    **kwargs: Keyword arguments to apply to `func_`.
+
+  Returns:
+    A function to encapsulate a set of variables which should be created once
+    and reused. An enclosing scope will be created either when `make_template`
+    is called or when the result is called, depending on the value of
+    `create_scope_now_`. Regardless of the value, the first time the template
+    is called it will enter the scope with no reuse, and call `func_` to create
+    variables, which are guaranteed to be unique. All subsequent calls will
+    re-enter the scope and reuse those variables.
+
+  Raises:
+    ValueError: if the name is None.
+    ValueError: if unique_name_ is not None and eager execution is enabled.
+  """
+
   if kwargs:
-    func_ = functools.partial(func_, **kwargs)
+    func_ = tf_decorator.make_decorator(func_, functools.partial(
+        func_, **kwargs))
   if context.in_eager_mode():
+    if unique_name_ is not None:
+      raise ValueError(
+          "unique_name_ cannot be used when eager exeuction is enabled.")
     return EagerTemplate(
-        name_, func_, create_scope_now=create_scope_now_,
-        unique_name=unique_name_, custom_getter=custom_getter_)
+        name_,
+        func_,
+        create_scope_now=create_scope_now_,
+        custom_getter=custom_getter_,
+        create_graph_function=create_graph_function_)
   return Template(
-      name_, func_, create_scope_now=create_scope_now_,
-      unique_name=unique_name_, custom_getter=custom_getter_)
+      name_,
+      func_,
+      create_scope_now=create_scope_now_,
+      unique_name=unique_name_,
+      custom_getter=custom_getter_,
+      create_graph_function=create_graph_function_)
 
 
 def _skip_common_stack_elements(stacktrace, base_case):
@@ -169,7 +243,7 @@ class Template(object):
   """
 
   def __init__(self, name, func, create_scope_now=False, unique_name=None,
-               custom_getter=None):
+               custom_getter=None, create_graph_function=False):
     """Creates a template for the given function.
 
     Args:
@@ -183,13 +257,20 @@ class Template(object):
         through much lower level code, and you want to be sure of the scope
         name without knowing exactly where it will be first called. If set to
         True, the scope will be created in the constructor, and all subsequent
-        times in __call__, leading to a trailing numeral being added to the
+        times in `__call__`, leading to a trailing numeral being added to the
         names of all created Tensors. If set to False, the scope will be created
         at the first call location.
-      unique_name: When used, it overrides name_ and is not made unique. If a
+      unique_name: When used, it overrides `name` and is not made unique. If a
         template of the same scope/unique_name already exists and reuse is
         false, an error is raised. Defaults to None.
-      custom_getter: optional custom getter to pass to variable_scope()
+      custom_getter: optional custom getter to pass to `variable_scope()`
+      create_graph_function: When True, the first invocation of the template
+        will execute `func` as is, to allow for variable creation; however, the
+        second invocation and every invocation thereafter will execute `func` as
+        a graph function. Enabling this flag gives the caller access to
+        graph-function semantics, i.e., accesses to variables are totally
+        ordered and side-effecting ops are not pruned.
+
 
     Raises:
       ValueError: if the name is None.
@@ -212,15 +293,24 @@ class Template(object):
     # This variable keeps track of whether the template has been called yet,
     # which is not the same as whether the scope has been created.
     self._variables_created = False
+    self._create_graph_function = create_graph_function
 
-  def _call_func(self, args, kwargs, check_for_new_variables):
+  def _call_func(self, args, kwargs):
     try:
       vars_at_start = len(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))
       trainable_at_start = len(
           ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES))
 
       result = self._func(*args, **kwargs)
-      if check_for_new_variables:
+      if self._create_graph_function and not self._variables_created:
+        # Only execute self._func as a graph function once variables are
+        # created.
+        self._func = function.defun(self._func)
+
+      if self._variables_created:
+        # Variables were previously created, implying this is not the first
+        # time the template has been called. Check to make sure that no new
+        # trainable variables were created this time around.
         trainable_variables = ops.get_collection(
             ops.GraphKeys.TRAINABLE_VARIABLES)
         # If a variable that we intend to train is created as a side effect
@@ -240,6 +330,8 @@ class Template(object):
                        "the first time, perhaps you used tf.Variable when you "
                        "meant tf.get_variable: %s",
                        variables[vars_at_start:])
+      else:
+        self._variables_created = True
       return result
     except Exception as exc:
       # Reraise the exception, but append the original definition to the
@@ -259,20 +351,11 @@ class Template(object):
 
   def __call__(self, *args, **kwargs):
     if self._variable_scope:
-      if self._variables_created:
-        # This is not the first visit to __call__, so variables have already
-        # been created, and we want to reuse them.
-        with variable_scope.variable_scope(self._variable_scope, reuse=True):
-          return self._call_func(args, kwargs, check_for_new_variables=True)
-      else:
-        # This is the first visit to __call__, but the scope has already been
-        # created in the constructor. Set _variables_created after the inner
-        # function is successfully called so that subsequent calls take the if
-        # branch above.
-        with variable_scope.variable_scope(self._variable_scope):
-          result = self._call_func(args, kwargs, check_for_new_variables=False)
-          self._variables_created = True
-          return result
+      # Only reuse variables if they were already created.
+      with variable_scope.variable_scope(
+          self._variable_scope, reuse=self._variables_created):
+        result = self._call_func(args, kwargs)
+      return result
     else:
       # The scope was not created at construction time, so create it here.
       # Subsequent calls should reuse variables.
@@ -280,8 +363,7 @@ class Template(object):
           self._unique_name, self._name,
           custom_getter=self._custom_getter) as vs:
         self._variable_scope = vs
-        result = self._call_func(args, kwargs, check_for_new_variables=False)
-        self._variables_created = True
+        result = self._call_func(args, kwargs)
         return result
 
   @property
@@ -307,6 +389,12 @@ class Template(object):
       # To prevent partial matches on the scope_name, we add '/' at the end.
       return name if name[-1] == "/" else name + "/"
 
+  @property
+  def variables(self):
+    """Returns the list of global and local variables created by the Template.
+    """
+    return self.global_variables + self.local_variables
+
   @property
   def trainable_variables(self):
     """Returns the list of trainable variables created by the Template."""
@@ -316,6 +404,14 @@ class Template(object):
     else:
       return []
 
+  @property
+  def non_trainable_variables(self):
+    """Returns the list of non-trainable variables created by the Template."""
+    # TODO(apassos) Make sure it matches Eager when using local variables.
+    global_variables = self.global_variables
+    trainable_variables = set(self.trainable_variables)
+    return [x for x in global_variables if x not in trainable_variables]
+
   @property
   def global_variables(self):
     """Returns the list of global variables created by the Template."""
@@ -334,6 +430,21 @@ class Template(object):
     else:
       return []
 
+  @property
+  def weights(self):
+    """List of weights/variables created by the Template."""
+    return self.variables
+
+  @property
+  def trainable_weights(self):
+    """List of trainable weights/variables created by the Template."""
+    return self.trainable_variables
+
+  @property
+  def non_trainable_weights(self):
+    """List of non-trainable weights/variables created by the Template."""
+    return self.non_trainable_variables
+
   @property
   @deprecated(
       "2017-02-21", "The .var_scope property is deprecated. Please change your "
@@ -343,6 +454,61 @@ class Template(object):
     return self._variable_scope
 
 
+class _EagerTemplateVariableStore(object):
+  """Wrapper around EagerVariableStore to support nesting EagerTemplates.
+  """
+
+  def __init__(self, variable_scope_name):
+    self._variable_scope_name = variable_scope_name
+    default = variable_scope._get_default_variable_store()  # pylint: disable=protected-access
+    if default._store_eager_variables:  # pylint: disable=protected-access
+      self._eager_variable_store = variable_scope.EagerVariableStore(default)
+    else:
+      self._eager_variable_store = variable_scope.EagerVariableStore()
+
+  def set_variable_scope_name(self, variable_scope_name):
+    self._variable_scope_name = variable_scope_name
+
+  @tf_contextlib.contextmanager
+  def as_default(self):
+    try:
+      with self._eager_variable_store.as_default():
+        yield
+    finally:
+      # Each _EagerTemplateVariableStore object lives underneath a variable
+      # scope (see EagerTemplate.__call__). This variable scope's subscopes are
+      # closed when the EagerTemplate object returns from __call__. For
+      # top-level _EagerTemplateVariableStore objects, the variable store to
+      # which the variable scope is attached is different from the
+      # EagerVariableStore; as such it is necessary to close its subscopes
+      # here as well.
+      if self._variable_scope_name is None:
+        raise RuntimeError("A variable scope must be set before an "
+                           "_EagerTemplateVariableStore object exits.")
+      self._eager_variable_store._store.close_variable_subscopes(  # pylint: disable=protected-access
+          self._variable_scope_name)
+
+  def _variables_in_scope(self, variable_list):
+    if self._variable_scope_name is None:
+      raise RuntimeError(
+          "A variable scope must be set before variables can be accessed.")
+    return [
+        v for v in variable_list
+        if v.name.startswith(self._variable_scope_name + "/")
+    ]
+
+  def variables(self):
+    return self._variables_in_scope(self._eager_variable_store.variables())
+
+  def trainable_variables(self):
+    return self._variables_in_scope(
+        self._eager_variable_store.trainable_variables())
+
+  def non_trainable_variables(self):
+    return self._variables_in_scope(
+        self._eager_variable_store.non_trainable_variables())
+
+
 class EagerTemplate(Template):
   """Wrap a function to aid in variable sharing in Eager mode.
 
@@ -355,8 +521,8 @@ class EagerTemplate(Template):
   call.
   """
 
-  def __init__(self, name, func, create_scope_now=False, unique_name=None,
-               custom_getter=None):
+  def __init__(self, name, func, create_scope_now=False, custom_getter=None,
+               create_graph_function=False):
     """Creates a template for the given function.
 
     Args:
@@ -370,43 +536,53 @@ class EagerTemplate(Template):
         through much lower level code, and you want to be sure of the scope
         name without knowing exactly where it will be first called. If set to
         True, the scope will be created in the constructor, and all subsequent
-        times in __call__, leading to a trailing numeral being added to the
+        times in `__call__`, leading to a trailing numeral being added to the
         names of all created Tensors. If set to False, the scope will be created
         at the first call location.
-      unique_name: When used, it overrides name_ and is not made unique. If a
-        template of the same scope/unique_name already exists and reuse is
-        false, an error is raised. Defaults to None.
-      custom_getter: optional custom getter to pass to variable_scope()
+      custom_getter: optional custom getter to pass to `variable_scope()`
+      create_graph_function: When True, the first invocation of the template
+        will execute `func` as is, to allow for variable creation; however, the
+        second invocation and every invocation thereafter will execute `func` as
+        a graph function. Enabling this flag allows the caller to reap the
+        performance benefits associated with executing graphs, at the cost of
+        sacrificing debuggability; however, not all functions can be compiled
+        into graph functions. See the documentation for `function.defun` for
+        details.
 
     Raises:
-      RuntimeError: if eager mode is not enabled.
-      ValueError: if the name is None or unique_name is provided.
+      RuntimeError: if eager execution is not enabled.
     """
     if not context.in_eager_mode():
       raise RuntimeError(
           "{} objects can only be used when eager execution is enabled, use "
           "tf.Template for graph construction".
           format(type(self)))
-    if unique_name:
-      raise ValueError("unique_name cannot be used in eager mode.")
-    super(EagerTemplate, self).__init__(name, func, create_scope_now,
-                                        unique_name, custom_getter)
-    # Create an eager variable store only if the current variable store cannot
-    # store eager variables. This should allow for correct nesting.
-    default_vstore = variable_scope._get_default_variable_store()  # pylint: disable=protected-access
-    if default_vstore._store_eager_variables:  # pylint: disable=protected-access
-      raise ValueError("Nested EagerTemaplates are not currently supported.")
+    super(EagerTemplate, self).__init__(name, func, create_scope_now, None,
+                                        custom_getter, create_graph_function)
+    if self._variable_scope is not None:
+      variable_scope_name = self._variable_scope.name
     else:
-      self._eager_variable_store = variable_scope.EagerVariableStore()
+      # Defer setting the variable scope name until the variable scope
+      # is created in __call__.
+      variable_scope_name = None
+    self._template_store = _EagerTemplateVariableStore(variable_scope_name)
 
-  def _call_func(self, args, kwargs, check_for_new_variables):
+  def _call_func(self, args, kwargs):
     try:
-      vars_at_start = self._eager_variable_store.variables()
-      trainable_at_start = self._eager_variable_store.trainable_variables()
+      vars_at_start = self._template_store.variables()
+      trainable_at_start = self._template_store.trainable_variables()
 
       result = self._func(*args, **kwargs)
-      if check_for_new_variables:
-        trainable_variables = self._eager_variable_store.trainable_variables()
+      if self._create_graph_function and not self._variables_created:
+        # Only execute self._func as a graph function once variables are
+        # created.
+        self._func = function.defun(self._func)
+
+      if self._variables_created:
+        # Variables were previously created, implying this is not the first
+        # time the template has been called. Check to make sure that no new
+        # trainable variables were created this time around.
+        trainable_variables = self._template_store.trainable_variables()
         # If a variable that we intend to train is created as a side effect
         # of creating a template, then that is almost certainly an error.
         if len(trainable_at_start) != len(trainable_variables):
@@ -419,12 +595,14 @@ class EagerTemplate(Template):
         # Non-trainable tracking variables are a legitimate reason why a new
         # variable would be created, but it is a relatively advanced use-case,
         # so log it.
-        variables = self._eager_variable_store.variables()
+        variables = self._template_store.variables()
         if len(vars_at_start) != len(variables):
           logging.info("New variables created when calling a template after "
                        "the first time, perhaps you used tf.Variable when you "
                        "meant tf.get_variable: %s",
                        list(set(variables) - set(vars_at_start)))
+      else:
+        self._variables_created = True
       return result
     except Exception as exc:
       # Reraise the exception, but append the original definition to the
@@ -443,26 +621,15 @@ class EagerTemplate(Template):
       raise
 
   def __call__(self, *args, **kwargs):
+    # In both branches below, the template store is installed as default after
+    # the variable scope is opened in order to ensure that templates nested at
+    # the same level correctly uniquify lower variable scope names.
     if self._variable_scope:
-      if self._variables_created:
-        # This is not the first visit to __call__, so variables have already
-        # been created, and we want to reuse them.
-        with variable_scope.variable_scope(self._variable_scope,
-                                           reuse=variable_scope.AUTO_REUSE):
-          with self._eager_variable_store.as_default():
-            return self._call_func(args, kwargs, check_for_new_variables=True)
-      else:
-        # This is the first visit to __call__, but the scope has already been
-        # created in the constructor. Set _variables_created after the inner
-        # function is successfully called so that subsequent calls take the if
-        # branch above.
-        with variable_scope.variable_scope(self._variable_scope,
-                                           reuse=variable_scope.AUTO_REUSE):
-          with self._eager_variable_store.as_default():
-            result = self._call_func(args, kwargs,
-                                     check_for_new_variables=False)
-        self._variables_created = True
-        return result
+      with variable_scope.variable_scope(
+          self._variable_scope, reuse=variable_scope.AUTO_REUSE):
+        with self._template_store.as_default():
+          result = self._call_func(args, kwargs)
+      return result
     else:
       # The scope was not created at construction time, so create it here.
       # Subsequent calls should reuse variables.
@@ -470,10 +637,11 @@ class EagerTemplate(Template):
           self._unique_name, self._name,
           custom_getter=self._custom_getter) as vs:
         self._variable_scope = vs
-        with self._eager_variable_store.as_default():
-          result = self._call_func(args, kwargs,
-                                   check_for_new_variables=False)
-        self._variables_created = True
+        # Because the scope was not created at construction time, the template
+        # store's variable scope name is unset; set it here.
+        self._template_store.set_variable_scope_name(vs.name)
+        with self._template_store.as_default():
+          result = self._call_func(args, kwargs)
         return result
 
   @property
@@ -501,20 +669,34 @@ class EagerTemplate(Template):
 
   @property
   def variables(self):
-    """Returns the list of trainable variables created by the Template."""
+    """Returns the list of variables created by the Template."""
     # Currently there is no local variable in Eager mode.
-    return self._eager_variable_store.variables()
+    if not self._variables_created:
+      return []
+    return self._template_store.variables()
 
   @property
   def trainable_variables(self):
     """Returns the list of trainable variables created by the Template."""
     # Currently there is no local variable in Eager mode.
-    return self._eager_variable_store.trainable_variables()
+    if not self._variables_created:
+      return []
+    return self._template_store.trainable_variables()
+
+  @property
+  def non_trainable_variables(self):
+    """Returns the list of non-trainable variables created by the Template."""
+    # Currently there is no local variable in Eager mode.
+    if not self._variables_created:
+      return []
+    return self._template_store.non_trainable_variables()
 
   @property
   def global_variables(self):
     """Returns the list of global variables created by the Template."""
     # Currently there is no local variable in Eager mode.
+    if not self._variables_created:
+      return []
     return self.variables
 
   @property
diff --git a/tensorflow/python/ops/tensor_array_ops.py b/tensorflow/python/ops/tensor_array_ops.py
index 605654d9be7985f4b0d2677cf688c31796db31b5..398521c9b5ae9240f03a2ba5c4b0681bd8b3bfd7 100644
--- a/tensorflow/python/ops/tensor_array_ops.py
+++ b/tensorflow/python/ops/tensor_array_ops.py
@@ -36,9 +36,6 @@ from tensorflow.python.ops import gen_data_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.util import tf_should_use
 
-# TODO(ebrevdo): Set to True in Dec. 4, 2017.
-_ENABLE_IDENTICAL_ELEMENT_SHAPES = False
-
 
 # _GraphTensorArray accesses many of the hidden generated ops, but is in
 # fact built to wrap these methods.
@@ -150,18 +147,15 @@ class _GraphTensorArray(object):
         # will retroactively set the device value of this op.
         def create():
           """Create the TensorArray op."""
-          ta_kwargs = {}
-          if _ENABLE_IDENTICAL_ELEMENT_SHAPES:
-            ta_kwargs["identical_element_shapes"] = infer_shape
           return gen_data_flow_ops._tensor_array_v3(
               dtype=dtype,
               size=size,
               element_shape=element_shape,
+              identical_element_shapes=infer_shape,
               dynamic_size=dynamic_size,
               clear_after_read=clear_after_read,
               tensor_array_name=tensor_array_name,
-              name=scope,
-              **ta_kwargs)
+              name=scope)
         if colocate_with_first_write_call:
           with ops.device(None), ops.colocate_with(None, ignore_existing=True):
             self._handle, self._flow = create()
diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py
index 3643861a16d3c6f6467a0e4427e2a08cc3e167c6..3a39af8e207f154446204b452a00537f9c25fdb1 100644
--- a/tensorflow/python/ops/variable_scope.py
+++ b/tensorflow/python/ops/variable_scope.py
@@ -23,6 +23,7 @@ import collections as collections_lib
 import copy
 import enum  # pylint: disable=g-bad-import-order
 import functools
+import sys
 import traceback
 
 import six
@@ -843,6 +844,7 @@ class _VariableStore(object):
     Raises:
       ValueError: When giving unsupported dtype.
     """
+    del shape
     # If dtype is DT_FLOAT, provide a uniform unit scaling initializer
     if dtype.is_floating:
       initializer = init_ops.glorot_uniform_initializer()
@@ -850,9 +852,8 @@ class _VariableStore(object):
     # If dtype is DT_INT/DT_UINT, provide a default value `zero`
     # If dtype is DT_BOOL, provide a default value `FALSE`
     elif dtype.is_integer or dtype.is_unsigned or dtype.is_bool:
-      initializer = init_ops.zeros_initializer()(
-          shape=shape, dtype=dtype.base_dtype)
-      initializing_from_value = True
+      initializer = init_ops.zeros_initializer()
+      initializing_from_value = False
     # NOTES:Do we need to support for handling DT_STRING and DT_COMPLEX here?
     else:
       raise ValueError("An initializer for variable %s of %s is required"
@@ -1217,8 +1218,15 @@ class EagerVariableStore(object):
   ```
   """
 
-  def __init__(self):
-    self._store = _VariableStore()
+  def __init__(self, store=None):
+    if store is not None:
+      if not store._store_eager_variables:  # pylint: disable=protected-access
+        raise ValueError("Cannot construct EagerVariableStore from a "
+                         "VariableStore object that does not hold eager "
+                         "variables.")
+      self._store = store
+    else:
+      self._store = _VariableStore()
     self._store._store_eager_variables = True  # pylint: disable=protected-access
 
   def as_default(self):
@@ -1584,6 +1592,10 @@ class _pure_variable_scope(object):  # pylint: disable=invalid-name
           else self._name_or_scope)
       self._reuse = (self._reuse
                      or self._old.reuse)  # Re-using is inherited by sub-scopes.
+      if self._old_name_scope is None:
+        name_scope = self._name_or_scope
+      else:
+        name_scope = self._old_name_scope
       variable_scope_object = VariableScope(
           self._reuse,
           name=self._new_name,
@@ -1594,7 +1606,7 @@ class _pure_variable_scope(object):  # pylint: disable=invalid-name
           dtype=self._old.dtype,
           use_resource=self._old.use_resource,
           custom_getter=self._old.custom_getter,
-          name_scope=self._old_name_scope or self._name_or_scope,
+          name_scope=name_scope,
           constraint=self._constraint)
       if self._initializer is not None:
         variable_scope_object.set_initializer(self._initializer)
@@ -1763,7 +1775,8 @@ class variable_scope(object):  # pylint: disable=invalid-name
                reuse=None,
                dtype=None,
                use_resource=None,
-               constraint=None):
+               constraint=None,
+               auxiliary_name_scope=True):
     """Initialize the context manager.
 
     Args:
@@ -1795,6 +1808,8 @@ class variable_scope(object):  # pylint: disable=invalid-name
         variable and return the Tensor for the projected value
         (which must have the same shape). Constraints are not safe to
         use when doing asynchronous distributed training.
+      auxiliary_name_scope: If `True`, we create an auxiliary name scope with
+        the scope. If `False`, we don't touch name scope.
 
     Returns:
       A scope that can be captured and reused.
@@ -1832,6 +1847,10 @@ class variable_scope(object):  # pylint: disable=invalid-name
       self._graph = ops._get_graph_from_inputs(self._values)  # pylint: disable=protected-access
     self._cached_pure_variable_scope = None
     self._current_name_scope = None
+    if not isinstance(auxiliary_name_scope, bool):
+      raise TypeError("The auxiliary_name_scope must be `True` or `False`, "
+                      "while get {}".format(auxiliary_name_scope))
+    self._auxiliary_name_scope = auxiliary_name_scope
 
   def __enter__(self):
     # If the default graph is building a function, then we should not replace it
@@ -1845,11 +1864,45 @@ class variable_scope(object):  # pylint: disable=invalid-name
       self._graph_context_manager.__enter__()
     if self._cached_pure_variable_scope is not None:
       # Fast path for re-entering variable_scopes. We've held on to the pure
-      # variable scope from a previous __enter__, so we avoid some overhead by
-      # re-using that object.
+      # variable scope from a previous successful __enter__, so we avoid some
+      # overhead by re-using that object.
       if self._current_name_scope is not None:
         self._current_name_scope.__enter__()
       return self._cached_pure_variable_scope.__enter__()
+
+    try:
+      return self._enter_scope_uncached()
+    except:
+      if self._graph_context_manager is not None:
+        self._graph_context_manager.__exit__(*sys.exc_info())
+      raise
+
+  def _enter_scope_uncached(self):
+    """Enters the context manager when there is no cached scope yet.
+
+    Returns:
+      The entered variable scope.
+
+    Raises:
+      TypeError: A wrong type is passed as `scope` at __init__().
+      ValueError: `reuse` is incorrectly set at __init__().
+    """
+    if self._auxiliary_name_scope:
+      # Create a new name scope later
+      current_name_scope = None
+    else:
+      # Reenter the current name scope
+      name_scope = ops.get_name_scope()
+      if name_scope:
+        # Hack to reenter
+        name_scope += "/"
+        current_name_scope = ops.name_scope(name_scope)
+      else:
+        # Root scope
+        current_name_scope = ops.name_scope(name_scope)
+
+    # IMPORTANT: Only assign to self._cached_pure_variable_scope and
+    # self._current_name_scope after successful __enter__() calls.
     if self._name_or_scope is not None:
       if not isinstance(self._name_or_scope,
                         (VariableScope,) + six.string_types):
@@ -1859,14 +1912,19 @@ class variable_scope(object):  # pylint: disable=invalid-name
         name_scope = self._name_or_scope
       else:
         name_scope = self._name_or_scope.name.split("/")[-1]
-      if name_scope:
-        self._current_name_scope = ops.name_scope(name_scope)
-        current_name_scope_name = self._current_name_scope.__enter__()
+      if name_scope or current_name_scope:
+        current_name_scope = current_name_scope or ops.name_scope(name_scope)
+        try:
+          current_name_scope_name = current_name_scope.__enter__()
+        except:
+          current_name_scope.__exit__(*sys.exc_info())
+          raise
+        self._current_name_scope = current_name_scope
         if isinstance(self._name_or_scope, six.string_types):
           old_name_scope = current_name_scope_name
         else:
           old_name_scope = self._name_or_scope.original_name_scope
-        self._cached_pure_variable_scope = _pure_variable_scope(
+        pure_variable_scope = _pure_variable_scope(
             self._name_or_scope,
             reuse=self._reuse,
             initializer=self._initializer,
@@ -1878,11 +1936,17 @@ class variable_scope(object):  # pylint: disable=invalid-name
             dtype=self._dtype,
             use_resource=self._use_resource,
             constraint=self._constraint)
-        return self._cached_pure_variable_scope.__enter__()
+        try:
+          entered_pure_variable_scope = pure_variable_scope.__enter__()
+        except:
+          pure_variable_scope.__exit__(*sys.exc_info())
+          raise
+        self._cached_pure_variable_scope = pure_variable_scope
+        return entered_pure_variable_scope
       else:
         self._current_name_scope = None
         # This can only happen if someone is entering the root variable scope.
-        self._cached_pure_variable_scope = _pure_variable_scope(
+        pure_variable_scope = _pure_variable_scope(
             self._name_or_scope,
             reuse=self._reuse,
             initializer=self._initializer,
@@ -1893,15 +1957,27 @@ class variable_scope(object):  # pylint: disable=invalid-name
             dtype=self._dtype,
             use_resource=self._use_resource,
             constraint=self._constraint)
-        return self._cached_pure_variable_scope.__enter__()
+        try:
+          entered_pure_variable_scope = pure_variable_scope.__enter__()
+        except:
+          pure_variable_scope.__exit__(*sys.exc_info())
+          raise
+        self._cached_pure_variable_scope = pure_variable_scope
+        return entered_pure_variable_scope
 
     else:  # Here name_or_scope is None. Using default name, but made unique.
       if self._reuse:
         raise ValueError("reuse=True cannot be used without a name_or_scope")
-      self._current_name_scope = ops.name_scope(self._default_name)
-      current_name_scope_name = self._current_name_scope.__enter__()
+      current_name_scope = current_name_scope or ops.name_scope(
+          self._default_name)
+      try:
+        current_name_scope_name = current_name_scope.__enter__()
+      except:
+        current_name_scope.__exit__(*sys.exc_info())
+        raise
+      self._current_name_scope = current_name_scope
       unique_default_name = _get_unique_variable_scope(self._default_name)
-      self._cached_pure_variable_scope = _pure_variable_scope(
+      pure_variable_scope = _pure_variable_scope(
           unique_default_name,
           initializer=self._initializer,
           regularizer=self._regularizer,
@@ -1912,7 +1988,13 @@ class variable_scope(object):  # pylint: disable=invalid-name
           dtype=self._dtype,
           use_resource=self._use_resource,
           constraint=self._constraint)
-      return self._cached_pure_variable_scope.__enter__()
+      try:
+        entered_pure_variable_scope = pure_variable_scope.__enter__()
+      except:
+        pure_variable_scope.__exit__(*sys.exc_info())
+        raise
+      self._cached_pure_variable_scope = pure_variable_scope
+      return entered_pure_variable_scope
 
   def __exit__(self, type_arg, value_arg, traceback_arg):
     self._cached_pure_variable_scope.__exit__(
diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index e0748d87e2d6ef2c2f8565669357f881334fa737..b25855633ed4ce485090fb47b09e1b5ce0ff2228 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -362,7 +362,8 @@ class Variable(object):
         # using their initialized_value() method.
         self._initializer_op = state_ops.assign(
             self._variable,
-            self._build_initializer_expr(self._initial_value),
+            self._try_guard_against_uninitialized_dependencies(
+                self._initial_value),
             validate_shape=validate_shape).op
 
         # TODO(vrv): Change this class to not take caching_device, but
@@ -781,88 +782,142 @@ class Variable(object):
 
     setattr(Variable, operator, _run_op)
 
-  def _build_initializer_expr(self, initial_value):
-    """Build an expression suitable to initialize a variable.
+  def _try_guard_against_uninitialized_dependencies(self, initial_value):
+    """Attempt to guard against dependencies on uninitialized variables.
 
-    Replace references to variables in initial_value with references to the
-    variable initial values instead.
+    Replace references to variables in `initial_value` with references to the
+    variable's initialized values. The initialized values are essentially
+    conditional TensorFlow graphs that return a variable's value if it is
+    initialized or its `initial_value` if it hasn't been initialized. This
+    replacement is done on a best effort basis:
+
+    - If the `initial_value` graph contains cycles, we don't do any
+      replacements for that graph.
+    - If the variables that `initial_value` depends on are not present in the
+      `GLOBAL_VARIABLES` or `LOCAL_VARIABLES` we don't replace them.
+
+    In these cases, it is up to the caller to ensure that the `initial_value`
+    graph uses initialized variables or that they guard access to variables
+    using their `initialized_value` method.
 
     Args:
-      initial_value: original expression
+      initial_value: `Tensor`. The initial value.
     Returns:
-      A tensorflow expression suitable to initialize a variable.
+      A `Tensor` suitable to initialize a variable.
+    Raises:
+      TypeError: If `initial_value` is not a `Tensor`.
     """
-    if isinstance(initial_value, Variable):
-      return initial_value.initialized_value()
-    elif isinstance(initial_value, ops.Tensor):
-      new_op = self._build_initializer_expr(initial_value.op)
-      if new_op != initial_value.op:
-        if isinstance(new_op, ops.Tensor):
-          return new_op
-        else:
-          return ops.Tensor(new_op, initial_value.value_index,
-                            initial_value.dtype)
-      else:
-        return initial_value
-    elif isinstance(initial_value, ops.Operation):
-      if initial_value.node_def.op in [
-          "IsVariableInitialized", "VarIsInitializedOp", "ReadVariableOp"
-      ]:
-        return initial_value
-      if initial_value.node_def.op in ["Variable", "VariableV2", "VarHandleOp"]:
-        return self._find_initialized_value_for_variable(initial_value)
-      modified = False
-      new_inputs = []
-      for tensor in initial_value.inputs:
-        new_tensor = self._build_initializer_expr(tensor)
-        new_inputs.append(new_tensor)
-        if new_tensor != tensor:
-          modified = True
-
-      if modified:
-        new_name = initial_value.node_def.name + "_" + self.name
-        new_name = new_name.replace(":", "_")
-        new_op = initial_value.node_def.op
-        new_op = new_op.replace("RefSwitch", "Switch")
-        new_value = self.graph.create_op(
-            new_op,
-            new_inputs,
-            # pylint: disable=protected-access
-            initial_value._output_types,
-            # pylint: enable=protected-access
-            name=new_name,
-            attrs=initial_value.node_def.attr)
-        return new_value
-      else:
-        return initial_value
-    else:
+    if not isinstance(initial_value, ops.Tensor):
+      raise TypeError("initial_value needs to be a Tensor: %s" % initial_value)
+
+    # Don't modify initial_value if it contains any cyclic dependencies.
+    def has_cycle(op, path):
+      """Detect cycles in the dependencies of `initial_value`."""
+      if op.name in path:
+        return True
+      path.add(op.name)
+      for op_input in op.inputs:
+        if has_cycle(op_input.op, path):
+          return True
+      for op_control_input in op.control_inputs:
+        if has_cycle(op_control_input, path):
+          return True
+      path.remove(op.name)
+      return False
+    if has_cycle(initial_value.op, path=set()):
       return initial_value
 
+    return self._safe_initial_value_from_tensor(initial_value, op_cache={})
+
+  def _safe_initial_value_from_tensor(self, tensor, op_cache):
+    """Replace dependencies on variables with their initialized values.
+
+    Args:
+      tensor: A `Tensor`. The tensor to replace.
+      op_cache: A dict mapping operation names to `Operation`s. Used to memoize
+        the results so as to avoid creating redundant operations.
+    Returns:
+      A `Tensor` compatible with `tensor`. Any inputs that lead to variable
+      values will be replaced with a corresponding graph that uses the
+      variable's initialized values. This is done on a best-effort basis. If no
+      modifications need to be made then `tensor` will be returned unchanged.
+    """
+    op = tensor.op
+    new_op = op_cache.get(op.name)
+    if new_op is None:
+      new_op = self._safe_initial_value_from_op(op, op_cache)
+      op_cache[op.name] = new_op
+    return new_op.outputs[tensor.value_index]
+
+  def _safe_initial_value_from_op(self, op, op_cache):
+    """Replace dependencies on variables with their initialized values.
+
+    Args:
+      op: An `Operation`. The operation to replace.
+      op_cache: A dict mapping operation names to `Operation`s. Used to memoize
+        the results so as to avoid creating redundant operations.
+    Returns:
+      An `Operation` compatible with `op`. Any inputs that lead to variable
+      values will be replaced with a corresponding graph that uses the
+      variable's initialized values. This is done on a best-effort basis. If no
+      modifications need to be made then `op` will be returned unchanged.
+    """
+    op_type = op.node_def.op
+    if op_type in ("IsVariableInitialized", "VarIsInitializedOp",
+                   "ReadVariableOp"):
+      return op
+
+    # Attempt to find the initialized_value of any variable reference / handles.
+    # TODO(b/70206927): Fix handling of ResourceVariables.
+    if op_type in ("Variable", "VariableV2", "VarHandleOp"):
+      initialized_value = self._find_initialized_value_for_variable(op)
+      return op if initialized_value is None else initialized_value.op
+
+    # Recursively build initializer expressions for inputs.
+    modified = False
+    new_op_inputs = []
+    for op_input in op.inputs:
+      new_op_input = self._safe_initial_value_from_tensor(op_input, op_cache)
+      new_op_inputs.append(new_op_input)
+      modified = modified or (new_op_input != op_input)
+
+    # If at least one input was modified, replace the op.
+    if modified:
+      new_op_type = op_type
+      if new_op_type == "RefSwitch":
+        new_op_type = "Switch"
+      new_op_name = op.node_def.name + "_" + self.name
+      new_op_name = new_op_name.replace(":", "_")
+      return self.graph.create_op(
+          new_op_type, new_op_inputs,
+          op._output_types,  # pylint: disable=protected-access
+          name=new_op_name, attrs=op.node_def.attr)
+
+    return op
+
   def _find_initialized_value_for_variable(self, variable_op):
-    """Find the initial value for a variable op.
+    """Find the initialized value for a variable op.
 
     To do so, lookup the variable op in the variables collection.
 
     Args:
-      variable_op: a TensorFlow variable Operation
+      variable_op: A variable `Operation`.
     Returns:
-      The initial value for the variable.
+      A `Tensor` representing the initialized value for the variable or `None`
+      if the initialized value could not be found.
     """
     try:
       var_names = [variable_op.node_def.name, variable_op.node_def.name + ":0"]
-      global_vars = self.graph.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
-      for var in global_vars:
-        if var.name in var_names:
-          return var.initialized_value()
-      local_vars = self.graph.get_collection(ops.GraphKeys.LOCAL_VARIABLES)
-      for var in local_vars:
-        if var.name == var_names:
-          return var.initialized_value()
+      for collection_name in (ops.GraphKeys.GLOBAL_VARIABLES,
+                              ops.GraphKeys.LOCAL_VARIABLES):
+        for var in self.graph.get_collection(collection_name):
+          if var.name in var_names:
+            return var.initialized_value()
     except AttributeError:
-      # Return the variable itself when an incomplete user defined variable type
-      # was put in the collection.
-      return variable_op
-    return variable_op
+      # Return None when an incomplete user-defined variable type was put in
+      # the collection.
+      return None
+    return None
 
   # NOTE(mrry): This enables the Variable's overloaded "right" binary
   # operators to run when the left operand is an ndarray, because it
diff --git a/tensorflow/python/platform/app.py b/tensorflow/python/platform/app.py
index 1d8acf3f006bd26ece974ef3f3674e7f13d9f827..9b92d9a18005ca5e6be3820427e3a3ba60a8ec2d 100644
--- a/tensorflow/python/platform/app.py
+++ b/tensorflow/python/platform/app.py
@@ -114,13 +114,8 @@ def run(main=None, argv=None):
   # Define help flags.
   _define_help_flags()
 
-  # Parse flags.
-  try:
-    argv = flags.FLAGS(_sys.argv if argv is None else argv)
-  except flags.Error as error:
-    _sys.stderr.write('FATAL Flags parsing error: %s\n' % error)
-    _sys.stderr.write('Pass --helpshort or --helpfull to see help on flags.\n')
-    _sys.exit(1)
+  # Parse known flags.
+  argv = flags.FLAGS(_sys.argv if argv is None else argv, known_only=True)
 
   main = main or _sys.modules['__main__'].main
 
diff --git a/tensorflow/python/platform/flags.py b/tensorflow/python/platform/flags.py
index e9a36ae75d6ce4763ff83c97bec008a4da0897b0..6225db77440e9d63eade956c5c4749c9e2884f6c 100644
--- a/tensorflow/python/platform/flags.py
+++ b/tensorflow/python/platform/flags.py
@@ -18,5 +18,108 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import logging as _logging
+import sys as _sys
+
 # go/tf-wildcard-import
 from absl.flags import *  # pylint: disable=wildcard-import
+import six as _six
+
+from tensorflow.python.util import tf_decorator
+
+
+# Since we wrap absl.flags DEFINE functions, we need to declare this module
+# does not affect key flags.
+disclaim_key_flags()  # pylint: disable=undefined-variable
+
+
+_RENAMED_ARGUMENTS = {
+    'flag_name': 'name',
+    'default_value': 'default',
+    'docstring': 'help',
+}
+
+
+def _wrap_define_function(original_function):
+  """Wraps absl.flags's define functions so tf.flags accepts old names."""
+
+  def wrapper(*args, **kwargs):
+    """Wrapper function that turns old keyword names to new ones."""
+    has_old_names = False
+    for old_name, new_name in _six.iteritems(_RENAMED_ARGUMENTS):
+      if old_name in kwargs:
+        has_old_names = True
+        value = kwargs.pop(old_name)
+        kwargs[new_name] = value
+    if has_old_names:
+      _logging.warning(
+          'Use of the keyword argument names (flag_name, default_value, '
+          'docstring) is deprecated, please use (name, default, help) instead.')
+    return original_function(*args, **kwargs)
+
+  return tf_decorator.make_decorator(original_function, wrapper)
+
+
+class _FlagValuesWrapper(object):
+  """Wrapper class for absl.flags.FLAGS.
+
+  The difference is that tf.flags.FLAGS implicitly parses flags with sys.argv
+  when accessing the FLAGS values before it's explicitly parsed,
+  while absl.flags.FLAGS raises an exception.
+  """
+
+  def __init__(self, flags_object):
+    self.__dict__['__wrapped'] = flags_object
+
+  def __getattribute__(self, name):
+    if name == '__dict__':
+      return super(_FlagValuesWrapper, self).__getattribute__(name)
+    return self.__dict__['__wrapped'].__getattribute__(name)
+
+  def __getattr__(self, name):
+    wrapped = self.__dict__['__wrapped']
+    # To maintain backwards compatibility, implicitly parse flags when reading
+    # a flag.
+    if not wrapped.is_parsed():
+      wrapped(_sys.argv)
+    return wrapped.__getattr__(name)
+
+  def __setattr__(self, name, value):
+    return self.__dict__['__wrapped'].__setattr__(name, value)
+
+  def __delattr__(self, name):
+    return self.__dict__['__wrapped'].__delattr__(name)
+
+  def __dir__(self):
+    return self.__dict__['__wrapped'].__dir__()
+
+  def __getitem__(self, name):
+    return self.__dict__['__wrapped'].__getitem__(name)
+
+  def __setitem__(self, name, flag):
+    return self.__dict__['__wrapped'].__setitem__(name, flag)
+
+  def __len__(self):
+    return self.__dict__['__wrapped'].__len__()
+
+  def __iter__(self):
+    return self.__dict__['__wrapped'].__iter__()
+
+  def __str__(self):
+    return self.__dict__['__wrapped'].__str__()
+
+  def __call__(self, *args, **kwargs):
+    return self.__dict__['__wrapped'].__call__(*args, **kwargs)
+
+
+# pylint: disable=invalid-name,used-before-assignment
+# absl.flags APIs use `default` as the name of the default value argument.
+# Allow the following functions continue to accept `default_value`.
+DEFINE_string = _wrap_define_function(DEFINE_string)
+DEFINE_boolean = _wrap_define_function(DEFINE_boolean)
+DEFINE_bool = DEFINE_boolean
+DEFINE_float = _wrap_define_function(DEFINE_float)
+DEFINE_integer = _wrap_define_function(DEFINE_integer)
+# pylint: enable=invalid-name,used-before-assignment
+
+FLAGS = _FlagValuesWrapper(FLAGS)  # pylint: disable=used-before-assignment
diff --git a/tensorflow/python/platform/flags_test.py b/tensorflow/python/platform/flags_test.py
index 23060e17d279cfb282f20610e0a1639db3a43ecf..bd3c8e39959a41ada22f7ee4cef4d3d462e9e6cf 100644
--- a/tensorflow/python/platform/flags_test.py
+++ b/tensorflow/python/platform/flags_test.py
@@ -17,18 +17,110 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import sys
 import unittest
 
 from absl import flags as absl_flags
 
 from tensorflow.python.platform import flags
+from tensorflow.python.platform import test
+
+
+flags.DEFINE_string(
+    flag_name='old_string', default_value='default', docstring='docstring')
+flags.DEFINE_string(
+    name='new_string', default='default', help='docstring')
+flags.DEFINE_integer(
+    flag_name='old_integer', default_value=1, docstring='docstring')
+flags.DEFINE_integer(
+    name='new_integer', default=1, help='docstring')
+flags.DEFINE_float(
+    flag_name='old_float', default_value=1.5, docstring='docstring')
+flags.DEFINE_float(
+    name='new_float', default=1.5, help='docstring')
+flags.DEFINE_bool(
+    flag_name='old_bool', default_value=True, docstring='docstring')
+flags.DEFINE_bool(
+    name='new_bool', default=True, help='docstring')
+flags.DEFINE_boolean(
+    flag_name='old_boolean', default_value=False, docstring='docstring')
+flags.DEFINE_boolean(
+    name='new_boolean', default=False, help='docstring')
 
 
 class FlagsTest(unittest.TestCase):
 
-  def test_global_flags_object(self):
-    self.assertIs(flags.FLAGS, absl_flags.FLAGS)
+  def setUp(self):
+    self.original_flags = flags.FlagValues()
+    self.wrapped_flags = flags._FlagValuesWrapper(self.original_flags)
+    flags.DEFINE_string(
+        'test', 'default', 'test flag', flag_values=self.wrapped_flags)
+
+  def test_attribute_overrides(self):
+    # Test that methods defined in absl.flags.FlagValues are the same as the
+    # wrapped ones.
+    self.assertEqual(flags.FLAGS.is_parsed, absl_flags.FLAGS.is_parsed)
+
+  def test_getattr(self):
+    self.assertFalse(self.wrapped_flags.is_parsed())
+    with test.mock.patch.object(sys, 'argv', new=['program', '--test=new']):
+      self.assertEqual('new', self.wrapped_flags.test)
+    self.assertTrue(self.wrapped_flags.is_parsed())
+
+  def test_setattr(self):
+    self.assertEqual('default', self.wrapped_flags.test)
+    self.wrapped_flags.test = 'new'
+    self.assertEqual('new', self.wrapped_flags.test)
+
+  def test_delattr(self):
+    del self.wrapped_flags.test
+    self.assertNotIn('test', self.wrapped_flags)
+    with self.assertRaises(AttributeError):
+      _ = self.wrapped_flags.test
+
+  def test_dir(self):
+    self.assertEqual(['test'], dir(self.wrapped_flags))
+
+  def test_getitem(self):
+    self.assertIs(self.original_flags['test'], self.wrapped_flags['test'])
+
+  def test_setitem(self):
+    flag = flags.Flag(flags.ArgumentParser(), flags.ArgumentSerializer(),
+                      'fruit', 'apple', 'the fruit type')
+    self.wrapped_flags['fruit'] = flag
+    self.assertIs(self.original_flags['fruit'], self.wrapped_flags['fruit'])
+    self.assertEqual('apple', self.wrapped_flags.fruit)
+
+  def test_len(self):
+    self.assertEqual(1, len(self.wrapped_flags))
+
+  def test_iter(self):
+    self.assertEqual(['test'], list(self.wrapped_flags))
+
+  def test_str(self):
+    self.assertEqual(str(self.wrapped_flags), str(self.original_flags))
+
+  def test_call(self):
+    self.wrapped_flags(['program', '--test=new'])
+    self.assertEqual('new', self.wrapped_flags.test)
+
+  def test_keyword_arguments(self):
+    test_cases = (
+        ('old_string', 'default'),
+        ('new_string', 'default'),
+        ('old_integer', 1),
+        ('new_integer', 1),
+        ('old_float', 1.5),
+        ('new_float', 1.5),
+        ('old_bool', True),
+        ('new_bool', True),
+        ('old_boolean', False),
+        ('new_boolean', False),
+    )
+    for flag_name, default_value in test_cases:
+      self.assertEqual(default_value, absl_flags.FLAGS[flag_name].default)
+      self.assertEqual('docstring', absl_flags.FLAGS[flag_name].help)
 
 
-if __name__ == "__main__":
+if __name__ == '__main__':
   unittest.main()
diff --git a/tensorflow/python/platform/stacktrace_handler.i b/tensorflow/python/platform/stacktrace_handler.i
new file mode 100644
index 0000000000000000000000000000000000000000..be4eea4c2f8eadb54f2773b8d97058cb6f51d63a
--- /dev/null
+++ b/tensorflow/python/platform/stacktrace_handler.i
@@ -0,0 +1,27 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+%include "tensorflow/python/platform/base.i"
+
+%{
+#include "tensorflow/core/platform/stacktrace_handler.h"
+%}
+
+%ignoreall
+%unignore tensorflow;
+%unignore tensorflow::testing;
+%unignore tensorflow::testing::InstallStacktraceHandler;
+%include "tensorflow/core/platform/stacktrace_handler.h"
+%unignoreall
diff --git a/tensorflow/python/platform/stacktrace_handler_test.py b/tensorflow/python/platform/stacktrace_handler_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f0e534f4cbd97ecbd7db1fae3b48af72310c24f
--- /dev/null
+++ b/tensorflow/python/platform/stacktrace_handler_test.py
@@ -0,0 +1,80 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+"""Test to make sure stack trace is generated in case of test failures."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import os
+import signal
+import subprocess
+import sys
+
+from tensorflow.python.platform import test
+from tensorflow.python.platform import tf_logging as logging
+
+
+# FLAGS defined at the bottom:
+# child (bool) set to true if we are running in the child process.
+FLAGS = None
+
+_CHILD_FLAG_HELP = 'Boolean. Set to true if this is the child process.'
+
+
+class StacktraceHandlerTest(test.TestCase):
+
+  def testChildProcessKillsItself(self):
+    if FLAGS.child:
+      os.kill(os.getpid(), signal.SIGABRT)
+
+  def testGeneratesStacktrace(self):
+    if FLAGS.child:
+      return
+
+    # Subprocess sys.argv[0] with --child=True
+    if sys.executable:
+      child_process = subprocess.Popen(
+          [sys.executable, sys.argv[0], '--child=True'], cwd=os.getcwd(),
+          stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    else:
+      child_process = subprocess.Popen(
+          [sys.argv[0], '--child=True'], cwd=os.getcwd(),
+          stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+    # Capture its output. capture both stdout and stderr and append them.
+    # We are not worried about timing or order of messages in this test.
+    child_output = child_process.stdout.read() + child_process.stderr.read()
+
+    # Make sure the child process is dead before we proceed.
+    child_process.wait()
+
+    logging.info('Output from the child process:')
+    logging.info(child_output)
+
+    # Verify a stack trace is printed.
+    self.assertIn(b'PyEval_EvalFrame', child_output)
+
+
+if __name__ == '__main__':
+  parser = argparse.ArgumentParser()
+  parser.add_argument(
+      '--child', type=bool, default=False, help=_CHILD_FLAG_HELP)
+  FLAGS, unparsed = parser.parse_known_args()
+
+  # Now update argv, so that unittest library does not get confused.
+  sys.argv = [sys.argv[0]] + unparsed
+  test.main()
diff --git a/tensorflow/python/platform/sysconfig.py b/tensorflow/python/platform/sysconfig.py
index 57635fb4d9d6698f1a6f1a51918fe3f269d8909b..f6c4f2227fbba75e4fdb41ddeaa55ba3f9168677 100644
--- a/tensorflow/python/platform/sysconfig.py
+++ b/tensorflow/python/platform/sysconfig.py
@@ -27,6 +27,7 @@ from __future__ import print_function
 import os.path as _os_path
 
 from tensorflow.python.framework.versions import CXX11_ABI_FLAG as _CXX11_ABI_FLAG
+from tensorflow.python.framework.versions import MONOLITHIC_BUILD as _MONOLITHIC_BUILD
 from tensorflow.python.util.all_util import remove_undocumented
 
 
@@ -75,8 +76,9 @@ def get_link_flags():
     The link flags.
   """
   flags = []
-  flags.append('-L%s' % get_lib())
-  flags.append('-ltensorflow_framework')
+  if not _MONOLITHIC_BUILD:
+    flags.append('-L%s' % get_lib())
+    flags.append('-ltensorflow_framework')
   return flags
 
 _allowed_symbols = []
diff --git a/tensorflow/python/platform/test.py b/tensorflow/python/platform/test.py
index 72025f671721a49fe501f894355b92ced8b7a4de..ec280c6e1ee75f8192b318c6830c62cd9dec9c55 100644
--- a/tensorflow/python/platform/test.py
+++ b/tensorflow/python/platform/test.py
@@ -70,6 +70,7 @@ StubOutForTesting = _googletest.StubOutForTesting  # pylint: disable=invalid-nam
 
 def main(argv=None):
   """Runs all unit tests."""
+  _test_util.InstallStackTraceHandler()
   return _googletest.main(argv)
 
 
diff --git a/tensorflow/python/profiler/BUILD b/tensorflow/python/profiler/BUILD
index 519b05975f03c5f1899f527636a4c855feceaacc..c815aad0a065eaba4a0dc52487b5ee67e271a146 100644
--- a/tensorflow/python/profiler/BUILD
+++ b/tensorflow/python/profiler/BUILD
@@ -57,7 +57,10 @@ cuda_py_test(
         "//tensorflow/python:platform",
         "//tensorflow/python:variables",
     ],
-    tags = ["no_pip"],
+    tags = [
+        "no_pip",
+        "oss_serial",
+    ],
 )
 
 cuda_py_test(
diff --git a/tensorflow/python/profiler/internal/BUILD b/tensorflow/python/profiler/internal/BUILD
index dcac070a3f198d33e5a94233865b775a2e1254bb..362a1c49e64118134a4039ae3a5d939ed0b6d730 100644
--- a/tensorflow/python/profiler/internal/BUILD
+++ b/tensorflow/python/profiler/internal/BUILD
@@ -21,6 +21,7 @@ py_library(
     name = "model_analyzer_testlib",
     srcs = ["model_analyzer_testlib.py"],
     srcs_version = "PY2AND3",
+    visibility = ["//visibility:public"],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:framework_for_generated_wrappers",
diff --git a/tensorflow/python/profiler/internal/model_analyzer_testlib.py b/tensorflow/python/profiler/internal/model_analyzer_testlib.py
index 350a62c0eacafcab6e19c5110fe50c15cb5139ff..895646997b116dc5beb23264e97d1ec4c5aaa810 100644
--- a/tensorflow/python/profiler/internal/model_analyzer_testlib.py
+++ b/tensorflow/python/profiler/internal/model_analyzer_testlib.py
@@ -109,3 +109,9 @@ def ProfilerFromFile(profile_file):
   profiler = model_analyzer.Profiler.__new__(model_analyzer.Profiler)
   yield profiler
   print_mdl.DeleteProfiler()
+
+
+def CheckAndRemoveDoc(profile):
+  assert 'Doc:' in profile
+  start_pos = profile.find('Profile:')
+  return profile[start_pos + 9:]
diff --git a/tensorflow/python/profiler/internal/print_model_analysis_test.py b/tensorflow/python/profiler/internal/print_model_analysis_test.py
index 797c430e99b21a73a2260d45d6c9f25e26122806..186c028d7ccf63d8a4b6c1c97e793611671ad08f 100644
--- a/tensorflow/python/profiler/internal/print_model_analysis_test.py
+++ b/tensorflow/python/profiler/internal/print_model_analysis_test.py
@@ -18,22 +18,13 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from google.protobuf import text_format
-
-from tensorflow.core.profiler import tfprof_options_pb2
-from tensorflow.core.profiler import tfprof_output_pb2
-from tensorflow.python.client import session
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.platform import test
 
-# pylint: disable=g-bad-import-order
-# XXX: this depends on pywrap_tensorflow and must come later
-from tensorflow.python import pywrap_tensorflow as print_mdl
 
 # pylint: disable=bad-whitespace
 # pylint: disable=bad-continuation
@@ -69,407 +60,6 @@ class PrintModelAnalysisTest(test.TestCase):
     x = nn_ops.conv2d(image, kernel, [1, 2, 2, 1], padding='SAME')
     return x
 
-  def testPrintModelAnalysis(self):
-    opts = tfprof_options_pb2.OptionsProto()
-    opts.max_depth = TEST_OPTIONS['max_depth']
-    opts.min_bytes = TEST_OPTIONS['min_bytes']
-    opts.min_micros = TEST_OPTIONS['min_micros']
-    opts.min_params = TEST_OPTIONS['min_params']
-    opts.min_float_ops = TEST_OPTIONS['min_float_ops']
-    opts.order_by = TEST_OPTIONS['order_by']
-    opts.step = -1
-    for p in TEST_OPTIONS['account_type_regexes']:
-      opts.account_type_regexes.append(p)
-    for p in TEST_OPTIONS['start_name_regexes']:
-      opts.start_name_regexes.append(p)
-    for p in TEST_OPTIONS['trim_name_regexes']:
-      opts.trim_name_regexes.append(p)
-    for p in TEST_OPTIONS['show_name_regexes']:
-      opts.show_name_regexes.append(p)
-    for p in TEST_OPTIONS['hide_name_regexes']:
-      opts.hide_name_regexes.append(p)
-    opts.account_displayed_op_only = TEST_OPTIONS['account_displayed_op_only']
-    for p in TEST_OPTIONS['select']:
-      opts.select.append(p)
-    opts.output = TEST_OPTIONS['output']
-
-    with session.Session() as sess, ops.device('/cpu:0'):
-      _ = self._BuildSmallModel()
-      tfprof_pb = tfprof_output_pb2.GraphNodeProto()
-      tfprof_pb.ParseFromString(
-          print_mdl.PrintModelAnalysis(
-              sess.graph.as_graph_def(add_shapes=True).SerializeToString(),
-              b'',
-              b'',
-              b'scope',
-              opts.SerializeToString()))
-
-      expected_pb = tfprof_output_pb2.GraphNodeProto()
-      text_format.Merge(r"""name: "_TFProfRoot"
-          exec_micros: 0
-          requested_bytes: 0
-          total_exec_micros: 0
-          total_requested_bytes: 0
-          total_parameters: 648
-          children {
-            name: "Conv2D"
-            exec_micros: 0
-            requested_bytes: 0
-            total_exec_micros: 0
-            total_requested_bytes: 0
-            total_parameters: 0
-            float_ops: 0
-            total_float_ops: 0
-            input_shapes {
-              key: 0
-              value {
-                dim {
-                  size: 2
-                }
-                dim {
-                  size: 6
-                }
-                dim {
-                  size: 6
-                }
-                dim {
-                  size: 3
-                }
-              }
-            }
-            input_shapes {
-              key: 1
-              value {
-                dim {
-                  size: 6
-                }
-                dim {
-                  size: 6
-                }
-                dim {
-                  size: 3
-                }
-                dim {
-                  size: 6
-                }
-              }
-            }
-            accelerator_exec_micros: 0
-            cpu_exec_micros: 0
-            total_accelerator_exec_micros: 0
-            total_cpu_exec_micros: 0
-            run_count: 0
-            total_run_count: 0
-            total_definition_count: 1
-          }
-          children {
-            name: "DW"
-            exec_micros: 0
-            requested_bytes: 0
-            parameters: 648
-            total_exec_micros: 0
-            total_requested_bytes: 0
-            total_parameters: 648
-            children {
-              name: "DW/Assign"
-              exec_micros: 0
-              requested_bytes: 0
-              total_exec_micros: 0
-              total_requested_bytes: 0
-              total_parameters: 0
-              float_ops: 0
-              total_float_ops: 0
-              input_shapes {
-                key: 0
-                value {
-                  dim {
-                    size: 6
-                  }
-                  dim {
-                    size: 6
-                  }
-                  dim {
-                    size: 3
-                  }
-                  dim {
-                    size: 6
-                  }
-                }
-              }
-              input_shapes {
-                key: 1
-                value {
-                  dim {
-                    size: 6
-                  }
-                  dim {
-                    size: 6
-                  }
-                  dim {
-                    size: 3
-                  }
-                  dim {
-                    size: 6
-                  }
-                }
-              }
-              accelerator_exec_micros: 0
-              cpu_exec_micros: 0
-              total_accelerator_exec_micros: 0
-              total_cpu_exec_micros: 0
-              run_count: 0
-              total_run_count: 0
-              total_definition_count: 1
-            }
-            children {
-              name: "DW/Initializer"
-              exec_micros: 0
-              requested_bytes: 0
-              total_exec_micros: 0
-              total_requested_bytes: 0
-              total_parameters: 0
-              children {
-                name: "DW/Initializer/random_normal"
-                exec_micros: 0
-                requested_bytes: 0
-                total_exec_micros: 0
-                total_requested_bytes: 0
-                total_parameters: 0
-                children {
-                  name: "DW/Initializer/random_normal/RandomStandardNormal"
-                  exec_micros: 0
-                  requested_bytes: 0
-                  total_exec_micros: 0
-                  total_requested_bytes: 0
-                  total_parameters: 0
-                  float_ops: 0
-                  total_float_ops: 0
-                  input_shapes {
-                    key: 0
-                    value {
-                      dim {
-                        size: 4
-                      }
-                    }
-                  }
-                  accelerator_exec_micros: 0
-                  cpu_exec_micros: 0
-                  total_accelerator_exec_micros: 0
-                  total_cpu_exec_micros: 0
-                  run_count: 0
-                  total_run_count: 0
-                  total_definition_count: 1
-                }
-                children {
-                  name: "DW/Initializer/random_normal/mean"
-                  exec_micros: 0
-                  requested_bytes: 0
-                  total_exec_micros: 0
-                  total_requested_bytes: 0
-                  total_parameters: 0
-                  float_ops: 0
-                  total_float_ops: 0
-                  accelerator_exec_micros: 0
-                  cpu_exec_micros: 0
-                  total_accelerator_exec_micros: 0
-                  total_cpu_exec_micros: 0
-                  run_count: 0
-                  total_run_count: 0
-                  total_definition_count: 1
-                }
-                children {
-                  name: "DW/Initializer/random_normal/mul"
-                  exec_micros: 0
-                  requested_bytes: 0
-                  total_exec_micros: 0
-                  total_requested_bytes: 0
-                  total_parameters: 0
-                  float_ops: 0
-                  total_float_ops: 0
-                  input_shapes {
-                    key: 0
-                    value {
-                      dim {
-                        size: 6
-                      }
-                      dim {
-                        size: 6
-                      }
-                      dim {
-                        size: 3
-                      }
-                      dim {
-                        size: 6
-                      }
-                    }
-                  }
-                  input_shapes {
-                    key: 1
-                    value {
-                      dim {
-                        size: 1
-                      }
-                    }
-                  }
-                  accelerator_exec_micros: 0
-                  cpu_exec_micros: 0
-                  total_accelerator_exec_micros: 0
-                  total_cpu_exec_micros: 0
-                  run_count: 0
-                  total_run_count: 0
-                  total_definition_count: 1
-                }
-                children {
-                  name: "DW/Initializer/random_normal/shape"
-                  exec_micros: 0
-                  requested_bytes: 0
-                  total_exec_micros: 0
-                  total_requested_bytes: 0
-                  total_parameters: 0
-                  float_ops: 0
-                  total_float_ops: 0
-                  accelerator_exec_micros: 0
-                  cpu_exec_micros: 0
-                  total_accelerator_exec_micros: 0
-                  total_cpu_exec_micros: 0
-                  run_count: 0
-                  total_run_count: 0
-                  total_definition_count: 1
-                }
-                children {
-                  name: "DW/Initializer/random_normal/stddev"
-                  exec_micros: 0
-                  requested_bytes: 0
-                  total_exec_micros: 0
-                  total_requested_bytes: 0
-                  total_parameters: 0
-                  float_ops: 0
-                  total_float_ops: 0
-                  accelerator_exec_micros: 0
-                  cpu_exec_micros: 0
-                  total_accelerator_exec_micros: 0
-                  total_cpu_exec_micros: 0
-                  run_count: 0
-                  total_run_count: 0
-                  total_definition_count: 1
-                }
-                float_ops: 0
-                total_float_ops: 0
-                input_shapes {
-                  key: 0
-                  value {
-                    dim {
-                      size: 6
-                    }
-                    dim {
-                      size: 6
-                    }
-                    dim {
-                      size: 3
-                    }
-                    dim {
-                      size: 6
-                    }
-                  }
-                }
-                input_shapes {
-                  key: 1
-                  value {
-                    dim {
-                      size: 1
-                    }
-                  }
-                }
-                accelerator_exec_micros: 0
-                cpu_exec_micros: 0
-                total_accelerator_exec_micros: 0
-                total_cpu_exec_micros: 0
-                run_count: 0
-                total_run_count: 0
-                total_definition_count: 6
-              }
-              float_ops: 0
-              total_float_ops: 0
-              accelerator_exec_micros: 0
-              cpu_exec_micros: 0
-              total_accelerator_exec_micros: 0
-              total_cpu_exec_micros: 0
-              run_count: 0
-              total_run_count: 0
-              total_definition_count: 7
-            }
-            children {
-              name: "DW/read"
-              exec_micros: 0
-              requested_bytes: 0
-              total_exec_micros: 0
-              total_requested_bytes: 0
-              total_parameters: 0
-              float_ops: 0
-              total_float_ops: 0
-              input_shapes {
-                key: 0
-                value {
-                  dim {
-                    size: 6
-                  }
-                  dim {
-                    size: 6
-                  }
-                  dim {
-                    size: 3
-                  }
-                  dim {
-                    size: 6
-                  }
-                }
-              }
-              accelerator_exec_micros: 0
-              cpu_exec_micros: 0
-              total_accelerator_exec_micros: 0
-              total_cpu_exec_micros: 0
-              run_count: 0
-              total_run_count: 0
-              total_definition_count: 1
-            }
-            float_ops: 0
-            total_float_ops: 0
-            accelerator_exec_micros: 0
-            cpu_exec_micros: 0
-            total_accelerator_exec_micros: 0
-            total_cpu_exec_micros: 0
-            run_count: 0
-            total_run_count: 0
-            total_definition_count: 10
-          }
-          children {
-            name: "zeros"
-            exec_micros: 0
-            requested_bytes: 0
-            total_exec_micros: 0
-            total_requested_bytes: 0
-            total_parameters: 0
-            float_ops: 0
-            total_float_ops: 0
-            accelerator_exec_micros: 0
-            cpu_exec_micros: 0
-            total_accelerator_exec_micros: 0
-            total_cpu_exec_micros: 0
-            run_count: 0
-            total_run_count: 0
-            total_definition_count: 1
-          }
-          float_ops: 0
-          total_float_ops: 0
-          accelerator_exec_micros: 0
-          cpu_exec_micros: 0
-          total_accelerator_exec_micros: 0
-          total_cpu_exec_micros: 0
-          run_count: 0
-          total_run_count: 0
-          total_definition_count: 13""", expected_pb)
-      self.assertEqual(expected_pb, tfprof_pb)
-
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/profiler/internal/run_metadata_test.py b/tensorflow/python/profiler/internal/run_metadata_test.py
index 4c915ac79a4534231846295f51c56f088948b594..fd893d6cde66e576976352bd8e0da639d22ce067 100644
--- a/tensorflow/python/profiler/internal/run_metadata_test.py
+++ b/tensorflow/python/profiler/internal/run_metadata_test.py
@@ -205,17 +205,13 @@ class RunMetadataTest(test.TestCase):
     for _, f in six.iteritems(back_to_forward):
       self.assertTrue(f in forward_op)
 
-  # pylint: disable=pointless-string-statement
-  """
-  # TODO(xpan): This test is flaky because RunMetadata returned from TensorFlow
-  # is random. Still being investigated.
   def testLoopGPU(self):
     if not test.is_gpu_available():
       return
 
     ops.reset_default_graph()
     with ops.device('/device:GPU:0'):
-      tfprof_node, run_meta = _run_loop_model()
+      _, run_meta = _run_loop_model()
       # The while-loop caused a node to appear 4 times in scheduling.
       ret = _extract_node(run_meta,
                           'rnn/while/basic_rnn_cell/MatMul')
@@ -227,11 +223,6 @@ class RunMetadataTest(test.TestCase):
 
       self.assertGreaterEqual(len(ret['gpu:0/stream:all']), 4, '%s' % run_meta)
 
-      total_accelerator_execs = 0
-      for node in ret['gpu:0/stream:all']:
-        total_accelerator_execs += node.op_end_rel_micros
-  """
-
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/profiler/model_analyzer.py b/tensorflow/python/profiler/model_analyzer.py
index 46a921c0a13ecca0febf6aa4085539abbd1a6fbf..8f780545607f7ba2337c83ad2c3740f542b802f6 100644
--- a/tensorflow/python/profiler/model_analyzer.py
+++ b/tensorflow/python/profiler/model_analyzer.py
@@ -28,7 +28,9 @@ from google.protobuf import message
 from tensorflow.core.profiler import tfprof_options_pb2
 from tensorflow.core.profiler import tfprof_output_pb2
 from tensorflow.python import pywrap_tensorflow as print_mdl
+from tensorflow.python.eager import context
 from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
 from tensorflow.python.profiler import option_builder
 from tensorflow.python.profiler import tfprof_logger
 
@@ -45,6 +47,14 @@ ALL_ADVICE = {
 }
 
 
+def _graph_string(graph):
+  """Helper to serialize a graph to string."""
+  if graph:
+    return graph.as_graph_def(add_shapes=True).SerializeToString()
+  else:
+    return b''
+
+
 def _build_options(options):
   """Build tfprof.OptionsProto.
 
@@ -151,24 +161,25 @@ class Profiler(object):
   ```
   """
 
-  def __init__(self, graph, op_log=None):
+  def __init__(self, graph=None, op_log=None):
     """Constructor.
 
     Args:
-      graph: tf.Graph.
+      graph: tf.Graph. If None and eager execution is not enabled, use
+          default graph.
       op_log: optional. tensorflow::tfprof::OpLogProto proto. Used to define
           extra op types.
     """
+    if not graph and context.in_graph_mode():
+      graph = ops.get_default_graph()
     self._coverage = 0.0
     self._graph = graph
     # pylint: disable=protected-access
-    op_log = tfprof_logger._merge_default_with_oplog(
+    op_log = tfprof_logger.merge_default_with_oplog(
         self._graph, op_log=op_log)
     # pylint: enable=protected-access
-
     print_mdl.NewProfiler(
-        self._graph.as_graph_def(add_shapes=True).SerializeToString(),
-        op_log.SerializeToString())
+        _graph_string(self._graph), op_log.SerializeToString())
 
   def __del__(self):
     print_mdl.DeleteProfiler()
@@ -177,19 +188,19 @@ class Profiler(object):
     """Add statistics of a step.
 
     Args:
-      step: int, A step used to identify the RunMetadata. Must be different
-         across different AddStep() calls.
+      step: int, An id used to group one or more different `run_meta` together.
+          When profiling with the profile_xxx APIs, user can use the `step`
+          id in the `options` to profile these `run_meta` together.
       run_meta: RunMetadata proto that contains statistics of a session run.
     """
     # pylint: disable=protected-access
-    op_log = tfprof_logger._merge_default_with_oplog(
+    op_log = tfprof_logger.merge_default_with_oplog(
         self._graph, run_meta=run_meta)
     # pylint: enable=protected-access
     # TODO(xpan): P1: Better to find the current graph.
-    self._coverage = print_mdl.AddStep(
-        step,
-        self._graph.as_graph_def(add_shapes=True).SerializeToString(),
-        run_meta.SerializeToString(), op_log.SerializeToString())
+    self._coverage = print_mdl.AddStep(step, _graph_string(self._graph),
+                                       run_meta.SerializeToString(),
+                                       op_log.SerializeToString())
 
   def profile_python(self, options):
     """Profile the statistics of the Python codes.
@@ -277,12 +288,23 @@ class Profiler(object):
         print_mdl.Profile('advise'.encode('utf-8'), opts.SerializeToString()))
     return advise_pb
 
+  def serialize_to_string(self):
+    """Serialize the ProfileProto to a binary string.
+
+      Users can write it to file for offline analysis by tfprof commandline
+      or graphical interface.
+
+    Returns:
+      ProfileProto binary string.
+    """
+    return print_mdl.SerializeToString()
+
   def _write_profile(self, filename):
     """Writes the profile to a file."""
     print_mdl.WriteProfile(filename)
 
 
-def profile(graph,
+def profile(graph=None,
             run_meta=None,
             op_log=None,
             cmd='scope',
@@ -293,7 +315,8 @@ def profile(graph,
     https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler/README.md
 
   Args:
-    graph: required tf.Graph.
+    graph: tf.Graph. If None and eager execution is not enabled, use
+        default graph.
     run_meta: optional tensorflow.RunMetadata proto. It is necessary to
         to support run time information profiling, such as time and memory.
     op_log: tensorflow.tfprof.OpLogProto proto. User can assign "types" to
@@ -310,12 +333,14 @@ def profile(graph,
     If cmd is 'op' or 'code', returns MultiGraphNodeProto proto.
     Side effect: stdout/file/timeline.json depending on options['output']
   """
+  if not graph and context.in_graph_mode():
+    graph = ops.get_default_graph()
+
   if options == _DEFAULT_PROFILE_OPTIONS:
     options = (option_builder.ProfileOptionBuilder
                .trainable_variables_parameter())
-
   # pylint: disable=protected-access
-  op_log = tfprof_logger._merge_default_with_oplog(
+  op_log = tfprof_logger.merge_default_with_oplog(
       graph, op_log, run_meta, add_trace=cmd == 'code')
   # pylint: enable=protected-access
 
@@ -323,14 +348,14 @@ def profile(graph,
 
   run_meta_str = run_meta.SerializeToString() if run_meta else b''
 
+  graph_str = _graph_string(graph)
+
   if cmd == 'code' or cmd == 'op':
     tfprof_node = tfprof_output_pb2.MultiGraphNodeProto()
-    ret = print_mdl.PrintModelAnalysis(
-        graph.as_graph_def(add_shapes=True).SerializeToString(),
-        run_meta_str,
-        op_log.SerializeToString(),
-        cmd.encode('utf-8'),
-        opts.SerializeToString())
+    ret = print_mdl.PrintModelAnalysis(graph_str, run_meta_str,
+                                       op_log.SerializeToString(),
+                                       cmd.encode('utf-8'),
+                                       opts.SerializeToString())
     try:
       tfprof_node.ParseFromString(ret)
     except message.DecodeError as e:
@@ -338,12 +363,10 @@ def profile(graph,
 
   elif cmd == 'graph' or cmd == 'scope':
     tfprof_node = tfprof_output_pb2.GraphNodeProto()
-    ret = print_mdl.PrintModelAnalysis(
-        graph.as_graph_def(add_shapes=True).SerializeToString(),
-        run_meta_str,
-        op_log.SerializeToString(),
-        cmd.encode('utf-8'),
-        opts.SerializeToString())
+    ret = print_mdl.PrintModelAnalysis(graph_str, run_meta_str,
+                                       op_log.SerializeToString(),
+                                       cmd.encode('utf-8'),
+                                       opts.SerializeToString())
     try:
       tfprof_node.ParseFromString(ret)
     except message.DecodeError as e:
@@ -355,7 +378,7 @@ def profile(graph,
   return tfprof_node
 
 
-def advise(graph, run_meta=None, options=_DEFAULT_ADVISE_OPTIONS):
+def advise(graph=None, run_meta=None, options=_DEFAULT_ADVISE_OPTIONS):
   """Auto profile and advise.
 
     Builds profiles and automatically check anomalies of various
@@ -363,18 +386,22 @@ def advise(graph, run_meta=None, options=_DEFAULT_ADVISE_OPTIONS):
     https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler/README.md
 
   Args:
-    graph: required tf.Graph.
+    graph: tf.Graph. If None and eager execution is not enabled, use
+        default graph.
     run_meta: optional tensorflow.RunMetadata proto. It is necessary to
         to support run time information profiling, such as time and memory.
     options: see ALL_ADVICE example above. Default checks everything.
   Returns:
     Returns AdviceProto proto
   """
+  if not graph and context.in_eager_execution():
+    graph = ops.get_default_graph()
+
   if options == _DEFAULT_ADVISE_OPTIONS:
     options = ALL_ADVICE.copy()
 
   # pylint: disable=protected-access
-  op_log = tfprof_logger._merge_default_with_oplog(
+  op_log = tfprof_logger.merge_default_with_oplog(
       graph, None, run_meta, add_trace=True)
   # pylint: enable=protected-access
 
@@ -384,9 +411,6 @@ def advise(graph, run_meta=None, options=_DEFAULT_ADVISE_OPTIONS):
   ret = tfprof_output_pb2.AdviceProto()
   ret.ParseFromString(
       print_mdl.PrintModelAnalysis(
-          graph.as_graph_def(add_shapes=True).SerializeToString(),
-          run_meta_str,
-          op_log.SerializeToString(),
-          'advise'.encode('utf-8'),
-          opts.SerializeToString()))
+          _graph_string(graph), run_meta_str, op_log.SerializeToString(),
+          'advise'.encode('utf-8'), opts.SerializeToString()))
   return ret
diff --git a/tensorflow/python/profiler/model_analyzer_test.py b/tensorflow/python/profiler/model_analyzer_test.py
index 26fb99efe6753f36fbe50a5a310927276ddcbf2e..915385558889e64277611bd71251f8f937a18159 100644
--- a/tensorflow/python/profiler/model_analyzer_test.py
+++ b/tensorflow/python/profiler/model_analyzer_test.py
@@ -23,12 +23,20 @@ import os
 import random
 import re
 
+import numpy as np
+
 from tensorflow.core.profiler import profile_pb2
+from tensorflow.core.profiler import tfprof_log_pb2
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.python.client import session
+from tensorflow.python.eager import context
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gradients
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import gfile
@@ -60,7 +68,7 @@ class PrintModelAnalysisTest(test.TestCase):
                          '  DW (3x3x3x6, 162/162 params)\n'
                          '  DW2 (2x2x6x12, 288/288 params)\n'
                          '  ScalarW (1, 1/1 params)\n',
-                         f.read())
+                         lib.CheckAndRemoveDoc(f.read()))
 
   def testSelectEverythingDetail(self):
     ops.reset_default_graph()
@@ -87,7 +95,7 @@ class PrintModelAnalysisTest(test.TestCase):
 
         with gfile.Open(outfile, 'r') as f:
           # pylint: disable=line-too-long
-          dump_str = f.read()
+          dump_str = lib.CheckAndRemoveDoc(f.read())
           outputs = dump_str.split('\n')
 
           self.assertEqual(outputs[0],
@@ -130,7 +138,7 @@ class PrintModelAnalysisTest(test.TestCase):
     with lib.ProfilerFromFile(profile_file) as profiler:
       profiler.profile_name_scope(options=opts)
       with gfile.Open(outfile, 'r') as f:
-        self.assertEqual(dump_str, f.read())
+        self.assertEqual(dump_str, lib.CheckAndRemoveDoc(f.read()))
 
   def testSelectEverything(self):
     ops.reset_default_graph()
@@ -158,13 +166,6 @@ class PrintModelAnalysisTest(test.TestCase):
       model_analyzer.profile(
           sess.graph, run_meta, options=opts)
 
-      with gfile.Open(outfile, 'r') as f:
-        # pylint: disable=line-too-long
-        self.assertEqual(
-            'node name | # parameters | # float_ops | assigned devices | op types | op count (run|defined) | input shapes\n_TFProfRoot (--/451 params, --/11.34k flops, _kTFScopeParent, --/8|--/36, )\n  Conv2D (0/0 params, 5.83k/5.83k flops, /job:localhost/replica:0/task:0/device:cpu:0, /job:localhost/replica:0/task:0/device:cpu:0|Conv2D, 1/1|1/1, 0:2x6x6x3|1:3x3x3x6)\n  Conv2D_1 (0/0 params, 4.61k/4.61k flops, /job:localhost/replica:0/task:0/device:cpu:0, /job:localhost/replica:0/task:0/device:cpu:0|Conv2D, 1/1|1/1, 0:2x3x3x6|1:2x2x6x12)\n  DW (3x3x3x6, 162/162 params, 0/324 flops, /job:localhost/replica:0/task:0/device:cpu:0, /job:localhost/replica:0/task:0/device:cpu:0|VariableV2|_trainable_variables, 1/2|1/10, )\n    DW/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:3x3x3x6|1:3x3x3x6)\n    DW/Initializer (0/0 params, 0/324 flops, _kTFScopeParent, 0/0|1/7, )\n      DW/Initializer/random_normal (0/0 params, 162/324 flops, Add, 0/0|1/6, 0:3x3x3x6|1:1)\n        DW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:4)\n        DW/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n        DW/Initializer/random_normal/mul (0/0 params, 162/162 flops, Mul, 0/0|1/1, 0:3x3x3x6|1:1)\n        DW/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n        DW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n    DW/read (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/device:cpu:0, /job:localhost/replica:0/task:0/device:cpu:0|Identity, 1/1|1/1, 0:3x3x3x6)\n  DW2 (2x2x6x12, 288/288 params, 0/576 flops, /job:localhost/replica:0/task:0/device:cpu:0, /job:localhost/replica:0/task:0/device:cpu:0|VariableV2|_trainable_variables, 1/2|1/10, )\n    DW2/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:2x2x6x12|1:2x2x6x12)\n    DW2/Initializer (0/0 params, 0/576 flops, _kTFScopeParent, 0/0|1/7, )\n      DW2/Initializer/random_normal (0/0 params, 288/576 flops, Add, 0/0|1/6, 0:2x2x6x12|1:1)\n        DW2/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:4)\n        DW2/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n        DW2/Initializer/random_normal/mul (0/0 params, 288/288 flops, Mul, 0/0|1/1, 0:2x2x6x12|1:1)\n        DW2/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n        DW2/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n    DW2/read (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/device:cpu:0, /job:localhost/replica:0/task:0/device:cpu:0|Identity, 1/1|1/1, 0:2x2x6x12)\n  ScalarW (1, 1/1 params, 0/2 flops, VariableV2|_trainable_variables, 0/0|1/10, )\n    ScalarW/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:1|1:1)\n    ScalarW/Initializer (0/0 params, 0/2 flops, _kTFScopeParent, 0/0|1/7, )\n      ScalarW/Initializer/random_normal (0/0 params, 1/2 flops, Add, 0/0|1/6, 0:1|1:1)\n        ScalarW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:0)\n        ScalarW/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n        ScalarW/Initializer/random_normal/mul (0/0 params, 1/1 flops, Mul, 0/0|1/1, 0:1|1:1)\n        ScalarW/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n        ScalarW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n    ScalarW/read (0/0 params, 0/0 flops, Identity, 0/0|1/1, 0:1)\n  _retval_Conv2D_1_0_0 (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/device:cpu:0, /job:localhost/replica:0/task:0/device:cpu:0|_retval_Conv2D_1_0_0, 1/1|1/1, )\n  init (0/0 params, 0/0 flops, NoOp, 0/0|1/1, 0:1|1:3x3x3x6|2:2x2x6x12)\n  zeros (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/device:cpu:0, /job:localhost/replica:0/task:0/device:cpu:0|Const, 1/1|1/1, )\n',
-            f.read())
-        # pylint: enable=line-too-long
-
   def testSimpleCodeView(self):
     ops.reset_default_graph()
     outfile = os.path.join(test.get_temp_dir(), 'dump')
@@ -195,7 +196,7 @@ class PrintModelAnalysisTest(test.TestCase):
         # pylint: disable=line-too-long
         self.assertEqual(
             'node name | requested bytes | # parameters | # float_ops | assigned devices | in',
-            f.read()[0:80])
+            lib.CheckAndRemoveDoc(f.read())[0:80])
         # pylint: enable=line-too-long
 
   def testComplexCodeView(self):
@@ -224,12 +225,14 @@ class PrintModelAnalysisTest(test.TestCase):
         with gfile.Open(outfile, 'r') as f:
           lines = f.read().split('\n')
           result = '\n'.join([l[:min(len(l), 80)] for l in lines])
-          self.assertEqual(compat.as_bytes('node name | # parameters | # float_ops\n_TFProfRoot (--/2.84k params, --/168.85k flops)\n  model_analyzer_testlib.py:63:BuildFullModel (0/1.80k params, 0/45.37k flops)\n    model_analyzer_testlib.py:40:BuildSmallModel (0/0 params, 0/0 flops)\n    model_analyzer_testlib.py:44:BuildSmallModel (0/4 params, 0/8 flops)\n    model_analyzer_testlib.py:48:BuildSmallModel (0/648 params, 0/1.30k flops)\n    model_analyzer_testlib.py:49:BuildSmallModel (0/0 params, 0/23.33k flops)\n    model_analyzer_testlib.py:53:BuildSmallModel (0/1.15k params, 0/2.30k flops)\n    model_analyzer_testlib.py:54:BuildSmallModel (0/0 params, 0/18.43k flops)\n  model_analyzer_testlib.py:63:BuildFullModel (gradient) (0/0 params, 0/67.39k f\n    model_analyzer_testlib.py:49:BuildSmallModel (gradient) (0/0 params, 0/46.66\n    model_analyzer_testlib.py:54:BuildSmallModel (gradient) (0/0 params, 0/20.74\n  model_analyzer_testlib.py:67:BuildFullModel (0/1.04k params, 0/18.57k flops)\n  model_analyzer_testlib.py:67:BuildFullModel (gradient) (0/0 params, 0/37.00k f\n  model_analyzer_testlib.py:69:BuildFullModel (0/0 params, 0/0 flops)\n  model_analyzer_testlib.py:70:BuildFullModel (0/0 params, 0/258 flops)\n  model_analyzer_testlib.py:70:BuildFullModel (gradient) (0/0 params, 0/129 flop\n  model_analyzer_testlib.py:72:BuildFullModel (0/0 params, 0/141 flops)\n'),
-                           compat.as_bytes(result))
+          self.assertEqual(
+              compat.as_bytes(
+                  'node name | # parameters | # float_ops\n_TFProfRoot (--/2.84k params, --/168.86k flops)\n  model_analyzer_testlib.py:63:BuildFullModel (0/1.80k params, 0/45.37k flops)\n    model_analyzer_testlib.py:40:BuildSmallModel (0/0 params, 0/0 flops)\n    model_analyzer_testlib.py:44:BuildSmallModel (0/4 params, 0/8 flops)\n    model_analyzer_testlib.py:48:BuildSmallModel (0/648 params, 0/1.30k flops)\n    model_analyzer_testlib.py:49:BuildSmallModel (0/0 params, 0/23.33k flops)\n    model_analyzer_testlib.py:53:BuildSmallModel (0/1.15k params, 0/2.30k flops)\n    model_analyzer_testlib.py:54:BuildSmallModel (0/0 params, 0/18.43k flops)\n  model_analyzer_testlib.py:63:BuildFullModel (gradient) (0/0 params, 0/67.39k f\n    model_analyzer_testlib.py:49:BuildSmallModel (gradient) (0/0 params, 0/46.66\n    model_analyzer_testlib.py:54:BuildSmallModel (gradient) (0/0 params, 0/20.74\n  model_analyzer_testlib.py:67:BuildFullModel (0/1.04k params, 0/18.58k flops)\n  model_analyzer_testlib.py:67:BuildFullModel (gradient) (0/0 params, 0/37.00k f\n  model_analyzer_testlib.py:69:BuildFullModel (0/0 params, 0/0 flops)\n  model_analyzer_testlib.py:70:BuildFullModel (0/0 params, 0/258 flops)\n  model_analyzer_testlib.py:70:BuildFullModel (gradient) (0/0 params, 0/129 flop\n  model_analyzer_testlib.py:72:BuildFullModel (0/0 params, 0/141 flops)\n'
+              ), compat.as_bytes(lib.CheckAndRemoveDoc(result)))
 
         self.assertLess(0, tfprof_node.total_exec_micros)
         self.assertEqual(2844, tfprof_node.total_parameters)
-        self.assertEqual(168854, tfprof_node.total_float_ops)
+        self.assertEqual(168863, tfprof_node.total_float_ops)
         self.assertEqual(8, len(tfprof_node.children))
         self.assertEqual('_TFProfRoot', tfprof_node.name)
         self.assertEqual(
@@ -346,8 +349,9 @@ class PrintModelAnalysisTest(test.TestCase):
       with gfile.Open(outfile, 'r') as f:
         # pylint: disable=line-too-long
         self.assertEqual(
-            'nodename|requestedbytes|peakbytes|residualbytes|outputbytes|totalexecutiontime|acceleratorexecutiontime|cpuexecutiontime|#parameters|opoccurrence(run|defined)|inputshapes\nConst0B(0',
-            f.read().replace('\t', '').replace(' ', '')[0:180])
+            'nodename|requestedbytes|peakbytes|residualbytes|outputbytes|totalexecutiontime|acceleratorexecutiontime|cpuexecutiontime|#parameters|opoccurrence(run|defined)|inputshapes',
+            lib.CheckAndRemoveDoc(f.read()).replace('\t',
+                                                    '').replace(' ', '')[0:170])
         # pylint: enable=line-too-long
 
       total_children = 0
@@ -370,7 +374,6 @@ class PrintModelAnalysisTest(test.TestCase):
         self.assertLessEqual(len(tfprof_node.graph_nodes), last_occurrence)
         last_occurrence = len(tfprof_node.graph_nodes)
 
-      self.assertEqual(total_children, 15)
       self.assertGreater(input_shapes, 0)
 
   def testAdvisor(self):
@@ -694,6 +697,101 @@ class PrintModelAnalysisTest(test.TestCase):
                       exception_str)
       self.assertTrue(mat is None)
 
+  def testTrackPersistentBytes(self):
+    ops.reset_default_graph()
+    a = array_ops.constant(np.ones((100, 100)))
+    b = array_ops.constant(np.ones((100, 100)))
+    c = a * b
+
+    with session.Session() as sess:
+      run_options = config_pb2.RunOptions(
+          trace_level=config_pb2.RunOptions.FULL_TRACE)
+      run_metadata = config_pb2.RunMetadata()
+      sess.run(c, options=run_options, run_metadata=run_metadata)
+
+      options = option_builder.ProfileOptionBuilder.time_and_memory()
+      options['min_bytes'] = 0
+      options['select'] = ('bytes', 'peak_bytes', 'output_bytes',
+                           'residual_bytes')
+      ret = model_analyzer.profile(
+          sess.graph, run_meta=run_metadata, cmd='scope', options=options)
+
+      run_metadata = config_pb2.RunMetadata()
+      sess.run(c, options=run_options, run_metadata=run_metadata)
+      ret2 = model_analyzer.profile(
+          sess.graph, run_meta=run_metadata, cmd='scope', options=options)
+
+      n = lib.SearchTFProfNode(ret, 'mul')
+      n2 = lib.SearchTFProfNode(ret2, 'mul')
+      self.assertGreater(n.peak_bytes, 0)
+      self.assertGreater(n.output_bytes, 0)
+      self.assertGreater(n.residual_bytes, 0)
+      self.assertEqual(n.peak_bytes, n2.peak_bytes)
+      self.assertEqual(n.output_bytes, n2.output_bytes)
+      self.assertEqual(n.residual_bytes, n2.residual_bytes)
+
+  def testTraceLoopBytes(self):
+    if not test.is_gpu_available(): return
+    ops.reset_default_graph()
+    steps = 100
+
+    with ops.device('/gpu:0'):
+      x = array_ops.ones((100, 100), dtype=dtypes.float32)
+      n = array_ops.constant(steps, dtype=dtypes.int32)
+      x1 = array_ops.ones((100, 100))
+
+      x *= x1
+      def loop_body(i, x):
+        x *= x
+        return i + 1, x
+
+      _, y = control_flow_ops.while_loop(
+          lambda i, x: i < n, loop_body,
+          [array_ops.constant(0), x])
+
+    grad = gradients.gradients(y, [x1])
+
+    with session.Session() as sess:
+      run_options = config_pb2.RunOptions(
+          trace_level=config_pb2.RunOptions.FULL_TRACE)
+      run_metadata = config_pb2.RunMetadata()
+      sess.run(grad, options=run_options, run_metadata=run_metadata)
+
+      options = option_builder.ProfileOptionBuilder.time_and_memory()
+      options['min_bytes'] = 0
+      options['min_micros'] = 0
+      options['select'] = ('bytes', 'peak_bytes', 'output_bytes',
+                           'residual_bytes')
+      options['output'] = 'none'
+      ret_pb = model_analyzer.profile(
+          sess.graph, run_meta=run_metadata, cmd='scope', options=options)
+      self.assertGreater(ret_pb.total_requested_bytes, 1000000)
+
+  def testEager(self):
+    ops.reset_default_graph()
+    with context.eager_mode():
+      outfile = os.path.join(test.get_temp_dir(), 'dump')
+      opts = builder(
+          builder.time_and_memory()).with_file_output(outfile).build()
+      context.enable_run_metadata()
+      lib.BuildSmallModel()
+
+      profiler = model_analyzer.Profiler()
+      profiler.add_step(0, context.export_run_metadata())
+      context.disable_run_metadata()
+      profiler.profile_operations(opts)
+      with gfile.Open(outfile, 'r') as f:
+        out_str = f.read()
+        self.assertTrue('Conv2D' in out_str)
+        self.assertTrue('VarHandleOp' in out_str)
+
+      with gfile.Open('/tmp/eager_profile', 'wb') as f:
+        profile_pb = tfprof_log_pb2.ProfileProto()
+        profile_pb.ParseFromString(profiler.serialize_to_string())
+        profile_pb_str = '%s' % profile_pb
+        self.assertTrue('Conv2D' in profile_pb_str)
+        self.assertTrue('VarHandleOp' in profile_pb_str)
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/profiler/profile_context.py b/tensorflow/python/profiler/profile_context.py
index c7c7ad63012a153d41aa9d616dbd39acb46096f6..18eb66ef988c9f49eb04264545d417d8a986e16e 100644
--- a/tensorflow/python/profiler/profile_context.py
+++ b/tensorflow/python/profiler/profile_context.py
@@ -50,11 +50,12 @@ def _profiled_run(self,
   """Overwrites the session.run()."""
   # pylint: disable=protected-access
   # Count the session steps.
-  with self.profile_context._new_step() as step:
+  with self.profile_context._new_step() as state:
+    step, locked = state
     # Fast path if no need for profiling.
-    if not self.profile_context._is_fast_path():
+    if locked and not self.profile_context._is_fast_path(step):
       # Maybe trace this step.
-      if self.profile_context._should_trace(self.graph, fetches):
+      if self.profile_context._should_trace(step, self.graph, fetches):
         if self.profile_context._debug:
           sys.stderr.write('debug: tracing step: %d\n' % step)
         # Enable tracing, perform auto profiling or auto dump.
@@ -81,7 +82,7 @@ def _profiled_run(self,
         ret = self._profiler_run_internal(fetches, feed_dict, options)
 
       # Maybe dump profile.
-      self.profile_context._maybe_dump()
+      self.profile_context._maybe_dump(step)
 
       # Maybe profile:
       to_profiles = self.profile_context._profile_candidates()
@@ -225,26 +226,26 @@ class ProfileContext(object):
     self._dump_next_step = True
     self._slow_path_steps.add(self._step)
 
-  def _is_fast_path(self):
-    if self._step in self._slow_path_steps:
+  def _is_fast_path(self, step):
+    if step in self._slow_path_steps:
       return False
     # When user doesn't set the tracing steps explicitly, auto decide it.
-    if (self._auto_tracing and self._step > WARMUP_STEPS and
+    if (self._auto_tracing and step > WARMUP_STEPS and
         self._traced_steps <= MAX_TRACED_STEPS):
       return False
     return True
 
-  def _should_trace(self, graph, fetches):
+  def _should_trace(self, step, graph, fetches):
     """Whether should do tracing at current step."""
     if self._traced_steps > MAX_TRACED_STEPS:
       return False
     # Check user-set tracing steps.
-    if self._step in self._trace_steps or self._trace_next_step:
+    if step in self._trace_steps or self._trace_next_step:
       self._traced_steps += 1
       return True
 
     # If no user-set tracing steps set and passes warm up steps, auto trace.
-    if self._auto_tracing and self._step > WARMUP_STEPS:
+    if self._auto_tracing and step > WARMUP_STEPS:
       # If the fetches have not been seen before, trace it.
       with graph.as_default():
         fetch_names = [f.name for f in
@@ -257,23 +258,23 @@ class ProfileContext(object):
         self._traced_steps += 1
         return True
       # If the trace coverage is low, does some random tracing.
-      if (self.profiler._coverage < 0.5 and self._step < MAX_TRACED_STEPS and  # pylint: disable=protected-access
+      if (self.profiler._coverage < 0.5 and step < MAX_TRACED_STEPS and  # pylint: disable=protected-access
           self._rng.randint(0, 10) < 2):
         self._traced_steps += 1
         return True
     return False
 
-  def _maybe_dump(self):
+  def _maybe_dump(self, step):
     """Maybe dump the profile file."""
-    if not (self._step in self._dump_steps or self._dump_next_step):
+    if not (step in self._dump_steps or self._dump_next_step):
       return
     if self._debug:
-      sys.stderr.write('debug: dumping file at step: %d\n' % self._step)
+      sys.stderr.write('debug: dumping file at step: %d\n' % step)
     if not gfile.Exists(self._profiler_dir):
       gfile.MakeDirs(self._profiler_dir)
 
     filename = os.path.join(compat.as_bytes(self._profiler_dir),
-                            compat.as_bytes('profile_%d' % self._step))
+                            compat.as_bytes('profile_%d' % step))
     self.profiler._write_profile(filename)  # pylint: disable=protected-access
 
   def _dump_file(self, pb, basename):
@@ -284,11 +285,13 @@ class ProfileContext(object):
 
   @contextlib.contextmanager
   def _new_step(self):
-    with self._lock:
-      yield self._step
-      self._step += 1
-      self._trace_next_step = False
-      self._dump_next_step = False
+    acquired = self._lock.acquire(False)
+    yield (self._step, acquired)
+    self._step += 1
+    self._trace_next_step = False
+    self._dump_next_step = False
+    if acquired:
+      self._lock.release()
 
   def _profile_candidates(self):
     to_profile = []
diff --git a/tensorflow/python/profiler/tfprof_logger.py b/tensorflow/python/profiler/tfprof_logger.py
index 838064a1f0836a2041c2823f54fea4e6b5606d7f..ffda7ddad759ce68bf718bcfa6e568cfadd59b53 100644
--- a/tensorflow/python/profiler/tfprof_logger.py
+++ b/tensorflow/python/profiler/tfprof_logger.py
@@ -25,6 +25,7 @@ import sys
 
 import six
 from tensorflow.core.profiler import tfprof_log_pb2
+from tensorflow.python.eager import context
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.platform import gfile
@@ -139,12 +140,13 @@ def _get_logged_ops(graph, run_meta=None, add_trace=True,
   return logged_ops, string_to_id
 
 
-def _merge_default_with_oplog(graph, op_log=None, run_meta=None,
-                              add_trace=True, add_trainable_var=True):
+def merge_default_with_oplog(graph, op_log=None, run_meta=None,
+                             add_trace=True, add_trainable_var=True):
   """Merge the tfprof default extra info with caller's op_log.
 
   Args:
-    graph: tf.Graph.
+    graph: tf.Graph. If None and eager execution is not enabled, use
+        default graph.
     op_log: OpLogProto proto.
     run_meta: RunMetadata proto used to complete shape information.
     add_trace: Whether to add op trace information.
@@ -153,7 +155,13 @@ def _merge_default_with_oplog(graph, op_log=None, run_meta=None,
   Returns:
     tmp_op_log: Merged OpLogProto proto.
   """
+  if not graph and context.in_graph_mode():
+    graph = ops.get_default_graph()
+
   tmp_op_log = tfprof_log_pb2.OpLogProto()
+  if not graph:
+    return tmp_op_log
+
   logged_ops, string_to_id = _get_logged_ops(
       graph, run_meta, add_trace=add_trace, add_trainable_var=add_trainable_var)
 
@@ -190,7 +198,8 @@ def write_op_log(graph, log_dir, op_log=None, run_meta=None, add_trace=True):
     information with best effort.
 
   Args:
-    graph: tf.Graph.
+    graph: tf.Graph. If None and eager execution is not enabled, use
+        default graph.
     log_dir: directory to write the log file.
     op_log: (Optional) OpLogProto proto to be written. If not provided, an new
         one is created.
@@ -199,7 +208,9 @@ def write_op_log(graph, log_dir, op_log=None, run_meta=None, add_trace=True):
     add_trace: Whether to add python code trace information.
         Used to support "code" view.
   """
-  op_log = _merge_default_with_oplog(graph, op_log, run_meta, add_trace)
+  if not graph and context.in_graph_mode():
+    graph = ops.get_default_graph()
+  op_log = merge_default_with_oplog(graph, op_log, run_meta, add_trace)
 
   with gfile.Open(os.path.join(log_dir, 'tfprof_log'), 'w') as log:
     log.write(op_log.SerializeToString())
diff --git a/tensorflow/python/pywrap_tensorflow.py b/tensorflow/python/pywrap_tensorflow.py
index 91373fa544b62e1b4760a92bf6630edf0c7f1ee4..5c0c5783dce19ec8fa1b090827d06d203e83de68 100644
--- a/tensorflow/python/pywrap_tensorflow.py
+++ b/tensorflow/python/pywrap_tensorflow.py
@@ -60,6 +60,7 @@ try:
   from tensorflow.python.pywrap_tensorflow_internal import __git_version__
   from tensorflow.python.pywrap_tensorflow_internal import __compiler_version__
   from tensorflow.python.pywrap_tensorflow_internal import __cxx11_abi_flag__
+  from tensorflow.python.pywrap_tensorflow_internal import __monolithic_build__
 
   if _use_dlopen_global_flags:
     pywrap_dlopen_global_flags.reset_dlopen_flags()
diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i
index 82b154164e85a1044860ef501c3d32cd00eb6fde..083931aa8369f46b4e859b5ed4764c4bdfa9c3c3 100644
--- a/tensorflow/python/pywrap_tfe.i
+++ b/tensorflow/python/pywrap_tfe.i
@@ -18,21 +18,27 @@ limitations under the License.
 %rename("%s") TFE_NewContext;
 %rename("%s") TFE_DeleteContext;
 %rename("%s") TFE_ContextListDevices;
+%rename("%s") TFE_ContextAddFunction;
 %rename("%s") TFE_ContextAddFunctionDef;
+%rename("%s") TFE_ContextEnableRunMetadata;
+%rename("%s") TFE_ContextDisableRunMetadata;
+%rename("%s") TFE_ContextExportRunMetadata;
+%rename("%s") TFE_ContextClearCaches;
 %rename("%s") TFE_OpNameGetAttrType;
 %rename("%s") TFE_Py_InitEagerTensor;
 %rename("%s") TFE_Py_RegisterExceptionClass;
 %rename("%s") TFE_Py_Execute;
 %rename("%s") TFE_Py_UID;
-%rename("%s") TFE_Py_TapeStackPushNew;
-%rename("%s") TFE_Py_TapeStackPush;
-%rename("%s") TFE_Py_TapeStackPop;
-%rename("%s") TFE_Py_TapeStackIsEmpty;
-%rename("%s") TFE_Py_TapeStackShouldRecord;
-%rename("%s") TFE_Py_TapeStackWatch;
-%rename("%s") TFE_Py_TapeStackDeleteTrace;
-%rename("%s") TFE_Py_TapeStackRecordOperation;
-%rename("%s") TFE_Py_TapeStackWatchVariable;
+%rename("%s") TFE_Py_TapeSetNew;
+%rename("%s") TFE_Py_TapeSetRemove;
+%rename("%s") TFE_Py_TapeSetStopOnThread;
+%rename("%s") TFE_Py_TapeSetRestartOnThread;
+%rename("%s") TFE_Py_TapeSetIsEmpty;
+%rename("%s") TFE_Py_TapeSetShouldRecord;
+%rename("%s") TFE_Py_TapeSetWatch;
+%rename("%s") TFE_Py_TapeSetDeleteTrace;
+%rename("%s") TFE_Py_TapeSetRecordOperation;
+%rename("%s") TFE_Py_TapeSetWatchVariable;
 %rename("%s") TFE_Py_TapeGradient;
 %rename("%s") TFE_Py_TapeWatchedVariables;
 %rename("%s") TFE_NewContextOptions;
@@ -149,7 +155,7 @@ limitations under the License.
   }
   $1 = &temp;
   $1->resize(PyInt_AsLong($input), nullptr);
-}
+} 
 
 // Create new Status object.
 %typemap(in, numinputs=0) TF_Status *out_status {
diff --git a/tensorflow/python/saved_model/BUILD b/tensorflow/python/saved_model/BUILD
index 39c6439811604c5b175c75c24e682b346fde09fc..e34aa7cc2ca41ecdd7c9ff52ab8f3d552f26fe69 100644
--- a/tensorflow/python/saved_model/BUILD
+++ b/tensorflow/python/saved_model/BUILD
@@ -25,6 +25,7 @@ py_library(
         ":main_op",
         ":signature_constants",
         ":signature_def_utils",
+        ":simple_save",
         ":tag_constants",
         ":utils",
         "//tensorflow/python:util",
@@ -89,6 +90,23 @@ py_library(
     ],
 )
 
+py_library(
+    name = "simple_save",
+    srcs = [
+        "simple_save.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":builder",
+        ":signature_constants",
+        ":signature_def_utils",
+        ":tag_constants",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:lib",
+        "//tensorflow/python:util",
+    ],
+)
+
 py_library(
     name = "main_op",
     srcs = [
@@ -198,6 +216,22 @@ py_test(
     ],
 )
 
+py_test(
+    name = "simple_save_test",
+    size = "small",
+    srcs = ["simple_save_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":loader",
+        ":signature_constants",
+        ":simple_save",
+        ":tag_constants",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:variables",
+    ],
+)
+
 # -----------------------------------------------------------------------------
 # Google-internal targets.  These must be at the end for syncrepo.
 
diff --git a/tensorflow/python/saved_model/README.md b/tensorflow/python/saved_model/README.md
index 8213e52ce9c004c9b9c53b76e08a028508703d06..5eeaf73a4370b0558a2c11d17a3546171b886a69 100644
--- a/tensorflow/python/saved_model/README.md
+++ b/tensorflow/python/saved_model/README.md
@@ -93,7 +93,7 @@ with an asset of the same name, only the first version is retained.
 Each meta graph added to the SavedModel must be annotated with user specified
 tags. The tags provide a means to identify the specific meta graph to load and
 restore, along with the shared set of variables and assets. These tags
-typically annotate a MetaGraph with it's functionality (e.g. serving or
+typically annotate a MetaGraph with its functionality (e.g. serving or
 training), and possibly hardware specific aspects such as GPU.
 
 #### Usage
@@ -117,6 +117,35 @@ with tf.Session(graph=tf.Graph()) as sess:
 builder.save()
 ~~~
 
+#### Stripping Default valued attributes
+The SavedModelBuilder class allows users to control whether default-valued
+attributes must be stripped from the NodeDefs while adding a meta graph to the
+SavedModel bundle. Both `SavedModelBuilder.add_meta_graph_and_variables` and
+`SavedModelBuilder.add_meta_graph` methods accept a Boolean flag
+`strip_default_attrs` that controls this behavior.
+
+If `strip_default_attrs` is `False`, the exported MetaGraphDef will have the
+default valued attributes in all it's NodeDef instances. This can break forward
+compatibility with a sequence of events such as the following:
+
+* An existing Op (`Foo`) is updated to include a new attribute (`T`) with a
+  default (`bool`) at version 101.
+* A model producer (such as a Trainer) binary picks up this change
+  (version 101) to the OpDef and re-exports an existing model that uses Op `Foo`.
+* A model consumer (such as Tensorflow Serving) running an older binary
+  (version 100) doesn't have attribute `T` for Op `Foo`, but tries to import
+  this model. The model consumer doesn't recognize attribute `T` in a NodeDef
+  that uses Op `Foo` and therefore fails to load the model.
+
+By setting `strip_default_attrs` to `True`, the model producers can strip away
+any default valued attributes in the NodeDefs. This helps ensure that newly
+added attributes with defaults don't cause older model consumers to fail loading
+models regenerated with newer training binaries.
+
+TIP: If you care about forward compatibility, then set `strip_default_attrs`
+to `True` while using `SavedModelBuilder.add_meta_graph_and_variables` and
+`SavedModelBuilder.add_meta_graph`.
+
 ### Loader
 The SavedModel loader is implemented in C++ and Python.
 
diff --git a/tensorflow/python/saved_model/builder_impl.py b/tensorflow/python/saved_model/builder_impl.py
index 16651ffebc5f5911d7c270425f599036a8e80e0c..62ee53b816c2a38327fa116d2924446e6bf24a1e 100644
--- a/tensorflow/python/saved_model/builder_impl.py
+++ b/tensorflow/python/saved_model/builder_impl.py
@@ -239,7 +239,9 @@ class SavedModelBuilder(object):
                      assets_collection=None,
                      legacy_init_op=None,
                      clear_devices=False,
-                     main_op=None):
+                     main_op=None,
+                     strip_default_attrs=False):
+    # pylint: disable=line-too-long
     """Adds the current meta graph to the SavedModel.
 
     Creates a Saver in the current scope and uses the Saver to export the meta
@@ -260,11 +262,15 @@ class SavedModelBuilder(object):
       main_op: Op or group of ops to execute when the graph is loaded. Note
           that when the main_op is specified it is run after the restore op at
           load-time.
+      strip_default_attrs: Boolean. If `True`, default-valued attributes will be
+        removed from the NodeDefs. For a detailed guide, see
+        [Stripping Default-Valued Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes).
 
     Raises:
       AssertionError: If the variables for the SavedModel have not been saved
           yet, or if the graph already contains one or more legacy init ops.
     """
+    # pylint: enable=line-too-long
     if not self._has_saved_variables:
       raise AssertionError(
           "Graph state including variables and assets has not been saved yet. "
@@ -299,7 +305,8 @@ class SavedModelBuilder(object):
     # there are edge cases where that option breaks the graph.  Until that is
     # resolved, we just leave the option set to False for now.
     # TODO(soergel): Reinstate clear_extraneous_savers=True when possible.
-    meta_graph_def = saver.export_meta_graph(clear_devices=clear_devices)
+    meta_graph_def = saver.export_meta_graph(
+        clear_devices=clear_devices, strip_default_attrs=strip_default_attrs)
 
     # Tag the meta graph def and add it to the SavedModel.
     self._tag_and_add_meta_graph(meta_graph_def, tags, signature_def_map)
@@ -311,7 +318,9 @@ class SavedModelBuilder(object):
                                    assets_collection=None,
                                    legacy_init_op=None,
                                    clear_devices=False,
-                                   main_op=None):
+                                   main_op=None,
+                                   strip_default_attrs=False):
+    # pylint: disable=line-too-long
     """Adds the current meta graph to the SavedModel and saves variables.
 
     Creates a Saver to save the variables from the provided session. Exports the
@@ -334,7 +343,11 @@ class SavedModelBuilder(object):
       main_op: Op or group of ops to execute when the graph is loaded. Note
           that when the main_op is specified it is run after the restore op at
           load-time.
+      strip_default_attrs: Boolean. If `True`, default-valued attributes will be
+        removed from the NodeDefs. For a detailed guide, see
+        [Stripping Default-Valued Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes).
     """
+    # pylint: enable=line-too-long
     if self._has_saved_variables:
       raise AssertionError("Graph state including variables and assets has "
                            "already been saved. Please invoke "
@@ -388,7 +401,8 @@ class SavedModelBuilder(object):
     # there are edge cases where that option breaks the graph.  Until that is
     # resolved, we just leave the option set to False for now.
     # TODO(soergel): Reinstate clear_extraneous_savers=True when possible.
-    meta_graph_def = saver.export_meta_graph(clear_devices=clear_devices)
+    meta_graph_def = saver.export_meta_graph(
+        clear_devices=clear_devices, strip_default_attrs=strip_default_attrs)
 
     # Tag the meta graph def and add it to the SavedModel.
     self._tag_and_add_meta_graph(meta_graph_def, tags, signature_def_map)
diff --git a/tensorflow/python/saved_model/saved_model.py b/tensorflow/python/saved_model/saved_model.py
index 8c59f7afe778006605da31dc82fb6bbfe883f087..caabd7bc30455b55e89711a1ccab6238971f595e 100644
--- a/tensorflow/python/saved_model/saved_model.py
+++ b/tensorflow/python/saved_model/saved_model.py
@@ -30,6 +30,9 @@ from tensorflow.python.saved_model import signature_def_utils
 from tensorflow.python.saved_model import tag_constants
 from tensorflow.python.saved_model import utils
 # pylint: enable=unused-import
+# pylint: disable=wildcard-import
+from tensorflow.python.saved_model.simple_save import *
+# pylint: enable=wildcard-import
 
 from tensorflow.python.util.all_util import remove_undocumented
 
@@ -41,6 +44,7 @@ _allowed_symbols = [
     "main_op",
     "signature_constants",
     "signature_def_utils",
+    "simple_save",
     "tag_constants",
     "utils",
 ]
diff --git a/tensorflow/python/saved_model/saved_model_test.py b/tensorflow/python/saved_model/saved_model_test.py
index 92ca7dec6f63b50b33dde9909b4738676fb8c783..1ea619ff55dea00f8ee09024ab45dcd324a2ddce 100644
--- a/tensorflow/python/saved_model/saved_model_test.py
+++ b/tensorflow/python/saved_model/saved_model_test.py
@@ -20,13 +20,17 @@ from __future__ import print_function
 
 import os
 
+from tensorflow.core.framework import op_def_pb2
 from tensorflow.core.framework import types_pb2
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.core.protobuf import meta_graph_pb2
 from tensorflow.python.client import session
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
+from tensorflow.python.framework import op_def_registry
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.lib.io import file_io
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
@@ -36,6 +40,7 @@ from tensorflow.python.platform import test
 from tensorflow.python.saved_model import builder as saved_model_builder
 from tensorflow.python.saved_model import constants
 from tensorflow.python.saved_model import loader
+from tensorflow.python.saved_model import loader_impl
 from tensorflow.python.saved_model import main_op
 from tensorflow.python.saved_model import signature_def_utils
 from tensorflow.python.saved_model import tag_constants
@@ -865,6 +870,132 @@ class SavedModelTest(test.TestCase):
       self.assertEqual(
           42, ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0].eval())
 
+  def testStripDefaultAttrs(self):
+    export_dir = os.path.join(test.get_temp_dir(), "test_strip_default_attrs")
+    builder = saved_model_builder.SavedModelBuilder(export_dir)
+
+    # Add a graph with two float32 variables and a Complex Op composing them
+    # with strip_default_attrs enabled.
+    with session.Session(graph=ops.Graph()) as sess:
+      real_num = variables.Variable(1.0, dtype=dtypes.float32, name="real")
+      imag_num = variables.Variable(2.0, dtype=dtypes.float32, name="imag")
+      math_ops.complex(real_num, imag_num, name="complex")
+      sess.run(variables.global_variables_initializer())
+      builder.add_meta_graph_and_variables(
+          sess, ["foo"], strip_default_attrs=True)
+
+    # Add a graph with the same float32 variables and a Complex Op composing
+    # them with strip_default_attrs disabled.
+    with session.Session(graph=ops.Graph()) as sess:
+      real_num = variables.Variable(1.0, dtype=dtypes.float32, name="real")
+      imag_num = variables.Variable(2.0, dtype=dtypes.float32, name="imag")
+      math_ops.complex(real_num, imag_num, name="complex")
+      sess.run(variables.global_variables_initializer())
+      builder.add_meta_graph(["bar"], strip_default_attrs=False)
+
+    # Save the SavedModel to disk in text format.
+    builder.save(as_text=True)
+
+    # Loading graph "foo" via the loader must restore the defaults for the
+    # "Complex" node based on the "Complex" OpDef in the Op registry.
+    sess = session.Session(graph=ops.Graph())
+    meta_graph_def = loader.load(sess, ["foo"], export_dir)
+    complex_node = test_util.get_node_def_from_graph("complex",
+                                                     meta_graph_def.graph_def)
+    self.assertIn("T", complex_node.attr)
+    self.assertIn("Tout", complex_node.attr)
+
+    # Load graph "foo" from disk as-is to verify default attrs are stripped.
+    # pylint: disable=protected-access
+    saved_model_pb = loader_impl._parse_saved_model(export_dir)
+    self.assertIsNotNone(saved_model_pb)
+    # pylint: enable=protected-access
+
+    meta_graph_foo_def = None
+    meta_graph_bar_def = None
+    for meta_graph_def in saved_model_pb.meta_graphs:
+      if set(meta_graph_def.meta_info_def.tags) == set(["foo"]):
+        meta_graph_foo_def = meta_graph_def
+      elif set(meta_graph_def.meta_info_def.tags) == set(["bar"]):
+        meta_graph_bar_def = meta_graph_def
+
+    self.assertIsNotNone(meta_graph_foo_def)
+    self.assertIsNotNone(meta_graph_bar_def)
+
+    # "Complex" Op has 2 attributes with defaults:
+    #   o "T"    : float32.   (input type)
+    #   o "Tout" : complex64. (output type)
+
+    # "Complex" Op in graph "foo" shouldn't have attributes "T" and "Tout".
+    # Graph "foo" was saved with strip_default_attrs set to True.
+    node_def = test_util.get_node_def_from_graph("complex",
+                                                 meta_graph_foo_def.graph_def)
+    self.assertNotIn("T", node_def.attr)
+    self.assertNotIn("Tout", node_def.attr)
+
+    # "Complex" Op in graph "bar" must have attributes "T" and "Tout".
+    # Graph "bar" was saved with strip_default_attrs set to False.
+    node_def = test_util.get_node_def_from_graph("complex",
+                                                 meta_graph_bar_def.graph_def)
+    self.assertIn("T", node_def.attr)
+    self.assertIn("Tout", node_def.attr)
+
+  def testStripDefaultAttrsInconsistentConsumerDefaults(self):
+    export_dir = os.path.join(test.get_temp_dir(),
+                              "test_strip_default_attrs_no_consumer_defaults")
+    builder = saved_model_builder.SavedModelBuilder(export_dir)
+
+    # Add a graph with two float32 variables and a Complex Op composing them
+    # with strip_default_attrs enabled. This must remove the following
+    # defaults for the "Complex" Op:
+    #   o "T"    : float32.   (input type)
+    #   o "Tout" : complex64. (output type)
+    with session.Session(graph=ops.Graph()) as sess:
+      real_num = variables.Variable(1.0, dtype=dtypes.float32, name="real")
+      imag_num = variables.Variable(2.0, dtype=dtypes.float32, name="imag")
+      math_ops.complex(real_num, imag_num, name="complex")
+      sess.run(variables.global_variables_initializer())
+      builder.add_meta_graph_and_variables(
+          sess, ["foo"], strip_default_attrs=True)
+
+    # Save the SavedModel to disk in text format.
+    builder.save(as_text=True)
+
+    # Update the Op registry to remove defaults for all attrs("T", "Tout") from
+    # the "Complex" OpDef.
+    complex_op_def = op_def_registry.get_registered_ops()["Complex"]
+    original_complex_op_def = op_def_pb2.OpDef()
+    original_complex_op_def.CopyFrom(complex_op_def)
+    for attr_def in complex_op_def.attr:
+      attr_def.ClearField("default_value")
+
+    # Loading the SavedModel via the loader must fail because the SavedModel
+    # does not have any attr values for the "Complex" node and the current
+    # op registry does not have have any default values for the "Complex" op.
+    sess = session.Session(graph=ops.Graph())
+    with self.assertRaisesRegexp(
+        ValueError,
+        "Expected one attr with name .*T(out)?.* in name: \"complex\".*"):
+      loader.load(sess, ["foo"], export_dir)
+
+    # Update the Op registry to change the defaults for attr "Tout"
+    # (complex64 -> complex128).
+    complex_op_def.CopyFrom(original_complex_op_def)
+    for attr_def in complex_op_def.attr:
+      if attr_def.name == "Tout":
+        attr_def.default_value.type = types_pb2.DT_COMPLEX128
+
+    # Loading the SavedModel via the loader must set "Tout" attr_value for the
+    # "Complex" node according to the latest defaults (complex128). This is
+    # expected to fail the model import as there is no OpKernel registered to
+    # handle attrs "T" (float32) and "Tout" (complex128).
+    sess = session.Session(graph=ops.Graph())
+    with self.assertRaisesRegexp(
+        errors.InvalidArgumentError,
+        ".*No OpKernel was registered to support Op \'Complex\' with these "
+        "attrs..*"):
+      loader.load(sess, ["foo"], export_dir)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/saved_model/python/saved_model/utils.py b/tensorflow/python/saved_model/simple_save.py
similarity index 97%
rename from tensorflow/contrib/saved_model/python/saved_model/utils.py
rename to tensorflow/python/saved_model/simple_save.py
index 9f34af64a6253eecf45351d4e844265b922d9313..9a81e5cd80705482865e05b040d712418a993da1 100644
--- a/tensorflow/contrib/saved_model/python/saved_model/utils.py
+++ b/tensorflow/python/saved_model/simple_save.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""SavedModel utility functions."""
+"""SavedModel simple save functionality."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -39,7 +39,7 @@ def simple_save(session, export_dir, inputs, outputs, legacy_init_op=None):
       to configure a SavedModel, this method has a few practical implications:
     - It will be treated as a graph for inference / serving (i.e. uses the tag
       `tag_constants.SERVING`)
-    - The saved model will load in TensorFlow Serving and supports the
+    - The SavedModel will load in TensorFlow Serving and supports the
       [Predict API](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/predict.proto).
       To use the Classify, Regress, or MultiInference APIs, please
       use either
diff --git a/tensorflow/contrib/saved_model/python/saved_model/utils_test.py b/tensorflow/python/saved_model/simple_save_test.py
similarity index 95%
rename from tensorflow/contrib/saved_model/python/saved_model/utils_test.py
rename to tensorflow/python/saved_model/simple_save_test.py
index 36dfb88871f39218ea19c2e6f40675914510e4c4..b2fa40d4f13ff99568cd5a5c8bf39db726e23132 100644
--- a/tensorflow/contrib/saved_model/python/saved_model/utils_test.py
+++ b/tensorflow/python/saved_model/simple_save_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for saved_model utils."""
+"""Tests for SavedModel simple save functionality."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -20,16 +20,16 @@ from __future__ import print_function
 
 import os
 
-from tensorflow.contrib.saved_model.python.saved_model import utils
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 from tensorflow.python.saved_model import loader
 from tensorflow.python.saved_model import signature_constants
+from tensorflow.python.saved_model import simple_save
 from tensorflow.python.saved_model import tag_constants
 
 
-class UtilsTest(test.TestCase):
+class SimpleSaveTest(test.TestCase):
 
   def _init_and_validate_variable(self, sess, variable_name, variable_value):
     v = variables.Variable(variable_value, name=variable_name)
@@ -65,7 +65,7 @@ class UtilsTest(test.TestCase):
       var_y = self._init_and_validate_variable(sess, "var_y", 2)
       inputs = {"x": var_x}
       outputs = {"y": var_y}
-      utils.simple_save(sess, export_dir, inputs, outputs)
+      simple_save.simple_save(sess, export_dir, inputs, outputs)
 
     # Restore the graph with a valid tag and check the global variables and
     # signature def map.
diff --git a/tensorflow/python/summary/summary_iterator.py b/tensorflow/python/summary/summary_iterator.py
index 301f560d41378b0ec29537cd82e3e3b333f59674..6969c4cf1500bf4b1fda900336158e5af4395ea6 100644
--- a/tensorflow/python/summary/summary_iterator.py
+++ b/tensorflow/python/summary/summary_iterator.py
@@ -13,301 +13,14 @@
 # limitations under the License.
 # ==============================================================================
 
-"""Reads Summaries from and writes Summaries to event files."""
+"""Provides a method for reading events from an event file via an iterator."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import os.path
-import threading
-import time
-
-import six
-
-from tensorflow.core.framework import graph_pb2
-from tensorflow.core.framework import summary_pb2
 from tensorflow.core.util import event_pb2
-from tensorflow.python import pywrap_tensorflow
-from tensorflow.python.framework import ops
 from tensorflow.python.lib.io import tf_record
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.util import compat
-
-
-class SummaryWriter(object):
-  """Writes `Summary` protocol buffers to event files.
-
-  The `SummaryWriter` class provides a mechanism to create an event file in a
-  given directory and add summaries and events to it. The class updates the
-  file contents asynchronously. This allows a training program to call methods
-  to add data to the file directly from the training loop, without slowing down
-  training.
-  """
-
-  def __init__(self, logdir, graph=None, max_queue=10, flush_secs=120,
-               graph_def=None):
-    """Creates a `SummaryWriter` and an event file.
-
-    On construction the summary writer creates a new event file in `logdir`.
-    This event file will contain `Event` protocol buffers constructed when you
-    call one of the following functions: `add_summary()`, `add_session_log()`,
-    `add_event()`, or `add_graph()`.
-
-    If you pass a `Graph` to the constructor it is added to
-    the event file. (This is equivalent to calling `add_graph()` later).
-
-    TensorBoard will pick the graph from the file and display it graphically so
-    you can interactively explore the graph you built. You will usually pass
-    the graph from the session in which you launched it:
-
-    ```python
-    ...create a graph...
-    # Launch the graph in a session.
-    sess = tf.Session()
-    # Create a summary writer, add the 'graph' to the event file.
-    writer = tf.summary.FileWriter(<some-directory>, sess.graph)
-    ```
-
-    The other arguments to the constructor control the asynchronous writes to
-    the event file:
-
-    *  `flush_secs`: How often, in seconds, to flush the added summaries
-       and events to disk.
-    *  `max_queue`: Maximum number of summaries or events pending to be
-       written to disk before one of the 'add' calls block.
-
-    Args:
-      logdir: A string. Directory where event file will be written.
-      graph: A `Graph` object, such as `sess.graph`.
-      max_queue: Integer. Size of the queue for pending events and summaries.
-      flush_secs: Number. How often, in seconds, to flush the
-        pending events and summaries to disk.
-      graph_def: DEPRECATED: Use the `graph` argument instead.
-    """
-    self._logdir = logdir
-    if not gfile.IsDirectory(self._logdir):
-      gfile.MakeDirs(self._logdir)
-    self._event_queue = six.moves.queue.Queue(max_queue)
-    self._ev_writer = pywrap_tensorflow.EventsWriter(
-        compat.as_bytes(os.path.join(self._logdir, "events")))
-    self._closed = False
-    self._worker = _EventLoggerThread(self._event_queue, self._ev_writer,
-                                      flush_secs)
-    # For storing used tags for session.run() outputs.
-    self._session_run_tags = {}
-    self._worker.start()
-    if graph is not None or graph_def is not None:
-      # Calling it with both graph and graph_def for backward compatibility.
-      self.add_graph(graph=graph, graph_def=graph_def)
-
-  def get_logdir(self):
-    """Returns the directory where event file will be written."""
-    return self._logdir
-
-  def reopen(self):
-    """Reopens the summary writer.
-
-    Can be called after `close()` to add more events in the same directory.
-    The events will go into a new events file.
-
-    Does nothing if the summary writer was not closed.
-    """
-    if self._closed:
-      self._closed = False
-
-  def add_summary(self, summary, global_step=None):
-    """Adds a `Summary` protocol buffer to the event file.
-
-    This method wraps the provided summary in an `Event` protocol buffer
-    and adds it to the event file.
-
-    You can pass the result of evaluating any summary op, using
-    @{tf.Session.run} or
-    @{tf.Tensor.eval}, to this
-    function. Alternatively, you can pass a `tf.Summary` protocol
-    buffer that you populate with your own data. The latter is
-    commonly done to report evaluation results in event files.
-
-    Args:
-      summary: A `Summary` protocol buffer, optionally serialized as a string.
-      global_step: Number. Optional global step value to record with the
-        summary.
-    """
-    if isinstance(summary, bytes):
-      summ = summary_pb2.Summary()
-      summ.ParseFromString(summary)
-      summary = summ
-    event = event_pb2.Event(wall_time=time.time(), summary=summary)
-    if global_step is not None:
-      event.step = int(global_step)
-    self.add_event(event)
-
-  def add_session_log(self, session_log, global_step=None):
-    """Adds a `SessionLog` protocol buffer to the event file.
-
-    This method wraps the provided session in an `Event` protocol buffer
-    and adds it to the event file.
-
-    Args:
-      session_log: A `SessionLog` protocol buffer.
-      global_step: Number. Optional global step value to record with the
-        summary.
-    """
-    event = event_pb2.Event(wall_time=time.time(), session_log=session_log)
-    if global_step is not None:
-      event.step = int(global_step)
-    self.add_event(event)
-
-  def add_event(self, event):
-    """Adds an event to the event file.
-
-    Args:
-      event: An `Event` protocol buffer.
-    """
-    if not self._closed:
-      self._event_queue.put(event)
-
-  def _add_graph_def(self, graph_def, global_step=None):
-    graph_bytes = graph_def.SerializeToString()
-    event = event_pb2.Event(wall_time=time.time(), graph_def=graph_bytes)
-    if global_step is not None:
-      event.step = int(global_step)
-    self._event_queue.put(event)
-
-  def add_graph(self, graph, global_step=None, graph_def=None):
-    """Adds a `Graph` to the event file.
-
-    The graph described by the protocol buffer will be displayed by
-    TensorBoard. Most users pass a graph in the constructor instead.
-
-    Args:
-      graph: A `Graph` object, such as `sess.graph`.
-      global_step: Number. Optional global step counter to record with the
-        graph.
-      graph_def: DEPRECATED. Use the `graph` parameter instead.
-
-    Raises:
-      ValueError: If both graph and graph_def are passed to the method.
-    """
-
-    if graph is not None and graph_def is not None:
-      raise ValueError("Please pass only graph, or graph_def (deprecated), "
-                       "but not both.")
-
-    if isinstance(graph, ops.Graph) or isinstance(graph_def, ops.Graph):
-      # The user passed a `Graph`.
-
-      # Check if the user passed it via the graph or the graph_def argument and
-      # correct for that.
-      if not isinstance(graph, ops.Graph):
-        logging.warning("When passing a `Graph` object, please use the `graph`"
-                        " named argument instead of `graph_def`.")
-        graph = graph_def
-
-      # Serialize the graph with additional info.
-      true_graph_def = graph.as_graph_def(add_shapes=True)
-    elif (isinstance(graph, graph_pb2.GraphDef)
-          or isinstance(graph_def, graph_pb2.GraphDef)):
-      # The user passed a `GraphDef`.
-      logging.warning("Passing a `GraphDef` to the SummaryWriter is deprecated."
-                      " Pass a `Graph` object instead, such as `sess.graph`.")
-
-      # Check if the user passed it via the graph or the graph_def argument and
-      # correct for that.
-      if isinstance(graph, graph_pb2.GraphDef):
-        true_graph_def = graph
-      else:
-        true_graph_def = graph_def
-
-    else:
-      # The user passed neither `Graph`, nor `GraphDef`.
-      raise TypeError("The passed graph must be an instance of `Graph` "
-                      "or the deprecated `GraphDef`")
-    # Finally, add the graph_def to the summary writer.
-    self._add_graph_def(true_graph_def, global_step)
-
-  def add_run_metadata(self, run_metadata, tag, global_step=None):
-    """Adds a metadata information for a single session.run() call.
-
-    Args:
-      run_metadata: A `RunMetadata` protobuf object.
-      tag: The tag name for this metadata.
-      global_step: Number. Optional global step counter to record with the
-        StepStats.
-
-    Raises:
-      ValueError: If the provided tag was already used for this type of event.
-    """
-    if tag in self._session_run_tags:
-      raise ValueError("The provided tag was already used for this event type")
-    self._session_run_tags[tag] = True
-
-    tagged_metadata = event_pb2.TaggedRunMetadata()
-    tagged_metadata.tag = tag
-    # Store the `RunMetadata` object as bytes in order to have postponed
-    # (lazy) deserialization when used later.
-    tagged_metadata.run_metadata = run_metadata.SerializeToString()
-    event = event_pb2.Event(wall_time=time.time(),
-                            tagged_run_metadata=tagged_metadata)
-    if global_step is not None:
-      event.step = int(global_step)
-    self._event_queue.put(event)
-
-  def flush(self):
-    """Flushes the event file to disk.
-
-    Call this method to make sure that all pending events have been written to
-    disk.
-    """
-    self._event_queue.join()
-    self._ev_writer.Flush()
-
-  def close(self):
-    """Flushes the event file to disk and close the file.
-
-    Call this method when you do not need the summary writer anymore.
-    """
-    self.flush()
-    self._ev_writer.Close()
-    self._closed = True
-
-
-class _EventLoggerThread(threading.Thread):
-  """Thread that logs events."""
-
-  def __init__(self, queue, ev_writer, flush_secs):
-    """Creates an _EventLoggerThread.
-
-    Args:
-      queue: A Queue from which to dequeue events.
-      ev_writer: An event writer. Used to log brain events for
-       the visualizer.
-      flush_secs: How often, in seconds, to flush the
-        pending file to disk.
-    """
-    threading.Thread.__init__(self)
-    self.daemon = True
-    self._queue = queue
-    self._ev_writer = ev_writer
-    self._flush_secs = flush_secs
-    # The first event will be flushed immediately.
-    self._next_event_flush_time = 0
-
-  def run(self):
-    while True:
-      event = self._queue.get()
-      try:
-        self._ev_writer.WriteEvent(event)
-        # Flush the event writer every so often.
-        now = time.time()
-        if now > self._next_event_flush_time:
-          self._ev_writer.Flush()
-          # Do it again in two minutes.
-          self._next_event_flush_time = now + self._flush_secs
-      finally:
-        self._queue.task_done()
 
 
 def summary_iterator(path):
@@ -352,37 +65,3 @@ def summary_iterator(path):
   # pylint: enable=line-too-long
   for r in tf_record.tf_record_iterator(path):
     yield event_pb2.Event.FromString(r)
-
-
-class SummaryWriterCache(object):
-  """Cache for summary writers.
-
-  This class caches summary writers, one per directory.
-  """
-  # Cache, keyed by directory.
-  _cache = {}
-
-  # Lock protecting _SUMMARY_WRITERS.
-  _lock = threading.RLock()
-
-  @staticmethod
-  def clear():
-    """Clear cached summary writers. Currently only used for unit tests."""
-    with SummaryWriterCache._lock:
-      SummaryWriterCache._cache = {}
-
-  @staticmethod
-  def get(logdir):
-    """Returns the SummaryWriter for the specified directory.
-
-    Args:
-      logdir: str, name of the directory.
-
-    Returns:
-      A `SummaryWriter`.
-    """
-    with SummaryWriterCache._lock:
-      if logdir not in SummaryWriterCache._cache:
-        SummaryWriterCache._cache[logdir] = SummaryWriter(
-            logdir, graph=ops.get_default_graph())
-      return SummaryWriterCache._cache[logdir]
diff --git a/tensorflow/python/tensorflow.i b/tensorflow/python/tensorflow.i
index d221dd523b2835d51e61487c22caee961ec28e5f..82b908ac0e95643d1daf5ed062be44a58cfea97f 100644
--- a/tensorflow/python/tensorflow.i
+++ b/tensorflow/python/tensorflow.i
@@ -33,6 +33,8 @@ limitations under the License.
 %include "tensorflow/python/client/tf_session.i"
 %include "tensorflow/python/client/device_lib.i"
 
+%include "tensorflow/python/lib/core/bfloat16.i"
+
 %include "tensorflow/python/lib/io/file_io.i"
 %include "tensorflow/python/training/quantize_training.i"
 %include "tensorflow/python/training/server_lib.i"
@@ -40,6 +42,7 @@ limitations under the License.
 %include "tensorflow/python/framework/python_op_gen.i"
 
 %include "tensorflow/python/framework/cpp_shape_inference.i"
+%include "tensorflow/python/platform/stacktrace_handler.i"
 %include "tensorflow/python/util/kernel_registry.i"
 
 %include "tensorflow/python/util/transform_graph.i"
diff --git a/tensorflow/python/tools/BUILD b/tensorflow/python/tools/BUILD
index 69586c6a47762701344aafe449e96868875f8926..63f16c53a29fd65c32077dd29e3b1823c11d457b 100644
--- a/tensorflow/python/tools/BUILD
+++ b/tensorflow/python/tools/BUILD
@@ -251,6 +251,7 @@ py_test(
     tags = ["manual"],
     deps = [
         ":saved_model_cli",
+        "//tensorflow/core:protos_all_py",
     ],
 )
 
diff --git a/tensorflow/python/tools/saved_model_cli.py b/tensorflow/python/tools/saved_model_cli.py
index cff2c186e382b5195b3ed35fa5966a44d42eb64f..ce64fdf70981cd78ac9dc7e5dbae15b90df654a2 100644
--- a/tensorflow/python/tools/saved_model_cli.py
+++ b/tensorflow/python/tools/saved_model_cli.py
@@ -152,7 +152,9 @@ def _print_tensor_info(tensor_info):
   Args:
     tensor_info: TensorInfo object to be printed.
   """
-  print('    dtype: ' + types_pb2.DataType.keys()[tensor_info.dtype])
+  print('    dtype: ' +
+        {value: key
+         for (key, value) in types_pb2.DataType.items()}[tensor_info.dtype])
   # Display shape as tuple.
   if tensor_info.tensor_shape.unknown_rank:
     shape = 'unknown_rank'
@@ -553,7 +555,7 @@ def create_parser():
       'To show all inputs and outputs TensorInfo for a specific'
       ' SignatureDef specified by the SignatureDef key in a'
       ' MetaGraph.\n'
-      '$saved_model_cli show --dir /tmp/saved_model --tag_set serve'
+      '$saved_model_cli show --dir /tmp/saved_model --tag_set serve '
       '--signature_def serving_default\n\n'
       'To show all available information in the SavedModel\n:'
       '$saved_model_cli show --dir /tmp/saved_model --all')
diff --git a/tensorflow/python/tools/saved_model_cli_test.py b/tensorflow/python/tools/saved_model_cli_test.py
index a55cf168b23e8fc4efeb5175e3c01cad1a68fa57..0789e1e107cf63b41e37dd7afea0e673d93b2f89 100644
--- a/tensorflow/python/tools/saved_model_cli_test.py
+++ b/tensorflow/python/tools/saved_model_cli_test.py
@@ -28,6 +28,8 @@ import sys
 import numpy as np
 from six import StringIO
 
+from tensorflow.core.framework import types_pb2
+from tensorflow.core.protobuf import meta_graph_pb2
 from tensorflow.python.debug.wrappers import local_cli_wrapper
 from tensorflow.python.platform import test
 from tensorflow.python.tools import saved_model_cli
@@ -200,6 +202,14 @@ Method name is: tensorflow/serving/predict"""
     self.assertEqual(output, expected_output)
     self.assertEqual(err.getvalue().strip(), '')
 
+  def testPrintREFTypeTensor(self):
+    ref_tensor_info = meta_graph_pb2.TensorInfo()
+    ref_tensor_info.dtype = types_pb2.DT_FLOAT_REF
+    with captured_output() as (out, err):
+      saved_model_cli._print_tensor_info(ref_tensor_info)
+    self.assertTrue('DT_FLOAT_REF' in out.getvalue().strip())
+    self.assertEqual(err.getvalue().strip(), '')
+
   def testInputPreProcessFormats(self):
     input_str = 'input1=/path/file.txt[ab3];input2=file2'
     input_expr_str = 'input3=np.zeros([2,2]);input4=[4,5]'
@@ -217,7 +227,6 @@ Method name is: tensorflow/serving/predict"""
     input_str = (r'inputx=C:\Program Files\data.npz[v:0];'
                  r'input:0=c:\PROGRA~1\data.npy')
     input_dict = saved_model_cli.preprocess_inputs_arg_string(input_str)
-    print(input_dict)
     self.assertTrue(input_dict['inputx'] == (r'C:\Program Files\data.npz',
                                              'v:0'))
     self.assertTrue(input_dict['input:0'] == (r'c:\PROGRA~1\data.npy', None))
diff --git a/tensorflow/python/tools/selective_registration_header_lib.py b/tensorflow/python/tools/selective_registration_header_lib.py
index 7f7470994dd75e22be6cbb55c5cfe17ece2e95ad..dc0612bb3f3eca29fd75ed568eded5f582572e19 100644
--- a/tensorflow/python/tools/selective_registration_header_lib.py
+++ b/tensorflow/python/tools/selective_registration_header_lib.py
@@ -54,7 +54,7 @@ def get_ops_and_kernels(proto_fileformat, proto_files, default_ops_str):
       kernel_class = pywrap_tensorflow.TryFindKernelClass(
           node_def.SerializeToString())
       if kernel_class:
-        op_and_kernel = (str(node_def.op), kernel_class.decode('utf-8'))
+        op_and_kernel = (str(node_def.op), str(kernel_class.decode('utf-8')))
         if op_and_kernel not in ops:
           ops.add(op_and_kernel)
       else:
diff --git a/tensorflow/python/training/adam_test.py b/tensorflow/python/training/adam_test.py
index 0d534db60dc92443d2795e751a574018bc03f612..ffb66abc4c1a38353d602a711cab86b0d63b9e96 100644
--- a/tensorflow/python/training/adam_test.py
+++ b/tensorflow/python/training/adam_test.py
@@ -207,6 +207,9 @@ class AdamOptimizerTest(test.TestCase):
           # Validate updated params
           self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
           self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
+          if use_resource:
+            self.assertEqual("var0_%d/Adam:0" % (i,),
+                             opt.get_slot(var=var0, name="m").name)
 
   def testBasic(self):
     with self.test_session():
diff --git a/tensorflow/python/training/basic_session_run_hooks.py b/tensorflow/python/training/basic_session_run_hooks.py
index 1fb00343ef23d6b6dc9ca41f4868f0a7d80feb7c..752d585cd17e1b1a89abbae7c9e61fa966ad7f93 100644
--- a/tensorflow/python/training/basic_session_run_hooks.py
+++ b/tensorflow/python/training/basic_session_run_hooks.py
@@ -514,6 +514,8 @@ class StepCounterHook(session_run_hook.SessionRunHook):
 
     self._summary_writer = summary_writer
     self._output_dir = output_dir
+    self._last_global_step = None
+    self._global_step_check_count = 0
 
   def begin(self):
     if self._summary_writer is None and self._output_dir:
@@ -527,6 +529,14 @@ class StepCounterHook(session_run_hook.SessionRunHook):
   def before_run(self, run_context):  # pylint: disable=unused-argument
     return SessionRunArgs(self._global_step_tensor)
 
+  def _log_and_record(self, elapsed_steps, elapsed_time, global_step):
+    steps_per_sec = elapsed_steps / elapsed_time
+    if self._summary_writer is not None:
+      summary = Summary(value=[Summary.Value(
+          tag=self._summary_tag, simple_value=steps_per_sec)])
+      self._summary_writer.add_summary(summary, global_step)
+    logging.info("%s: %g", self._summary_tag, steps_per_sec)
+
   def after_run(self, run_context, run_values):
     _ = run_context
 
@@ -538,12 +548,31 @@ class StepCounterHook(session_run_hook.SessionRunHook):
         elapsed_time, elapsed_steps = self._timer.update_last_triggered_step(
             global_step)
         if elapsed_time is not None:
-          steps_per_sec = elapsed_steps / elapsed_time
-          if self._summary_writer is not None:
-            summary = Summary(value=[Summary.Value(
-                tag=self._summary_tag, simple_value=steps_per_sec)])
-            self._summary_writer.add_summary(summary, global_step)
-          logging.info("%s: %g", self._summary_tag, steps_per_sec)
+          self._log_and_record(elapsed_steps, elapsed_time, global_step)
+
+    # Check whether the global step has been increased. Here, we do not use the
+    # timer.last_triggered_step as the timer might record a different global
+    # step value such that the comparison could be unreliable. For simplicity,
+    # we just compare the stale_global_step with previously recorded version.
+    if stale_global_step == self._last_global_step:
+      # Here, we use a counter to count how many times we have observed that the
+      # global step has not been increased. For some Optimizers, the global step
+      # is not increased each time by design. For example, SyncReplicaOptimizer
+      # doesn't increase the global step in worker's main train step.
+      self._global_step_check_count += 1
+      if self._global_step_check_count % 20 == 0:
+        self._global_step_check_count = 0
+        logging.warning(
+            "It seems that global step (tf.train.get_global_step) has not "
+            "been increased. Current value (could be stable): %s vs previous "
+            "value: %s. You could increase the global step by passing "
+            "tf.train.get_global_step() to Optimizer.apply_gradients or "
+            "Optimizer.minimize.", stale_global_step, self._last_global_step)
+    else:
+      # Whenever we observe the increment, reset the counter.
+      self._global_step_check_count = 0
+
+    self._last_global_step = stale_global_step
 
 
 class NanLossDuringTrainingError(RuntimeError):
diff --git a/tensorflow/python/training/basic_session_run_hooks_test.py b/tensorflow/python/training/basic_session_run_hooks_test.py
index e7ff7e12211ae57a8589c799efbf9eab3b3fe5da..2547661e5250e94136a100aa8c30c9dbb7455018 100644
--- a/tensorflow/python/training/basic_session_run_hooks_test.py
+++ b/tensorflow/python/training/basic_session_run_hooks_test.py
@@ -780,9 +780,12 @@ class StepCounterHookTest(test.TestCase):
       hook.begin()
       sess.run(variables_lib.global_variables_initializer())
       mon_sess = monitored_session._HookedSession(sess, [hook])
-      for _ in range(30):
-        time.sleep(0.01)
-        mon_sess.run(train_op)
+      with test.mock.patch.object(tf_logging, 'warning') as mock_log:
+        for _ in range(30):
+          time.sleep(0.01)
+          mon_sess.run(train_op)
+        # logging.warning should not be called.
+        self.assertIsNone(mock_log.call_args)
       hook.end(sess)
       summary_writer.assert_summaries(
           test_case=self,
@@ -857,6 +860,24 @@ class StepCounterHookTest(test.TestCase):
       summary_value = summary_writer.summaries[2][0].value[0]
       self.assertEqual('bar/foo/sec', summary_value.tag)
 
+  def test_log_warning_if_global_step_not_increased(self):
+    with ops.Graph().as_default(), session_lib.Session() as sess:
+      variables.get_or_create_global_step()
+      train_op = training_util._increment_global_step(0)  # keep same.
+      sess.run(variables_lib.global_variables_initializer())
+      hook = basic_session_run_hooks.StepCounterHook(
+          every_n_steps=1, every_n_secs=None)
+      hook.begin()
+      mon_sess = monitored_session._HookedSession(sess, [hook])
+      mon_sess.run(train_op)  # Run one step to record global step.
+      with test.mock.patch.object(tf_logging, 'warning') as mock_log:
+        for _ in range(30):
+          mon_sess.run(train_op)
+        self.assertRegexpMatches(
+            str(mock_log.call_args),
+            'global step.*has not been increased')
+      hook.end(sess)
+
 
 class SummarySaverHookTest(test.TestCase):
 
diff --git a/tensorflow/python/training/checkpoint_utils_test.py b/tensorflow/python/training/checkpoint_utils_test.py
index 8dbc980b6bf1d594a84613fca3368e00acb9e958..cd17faa040d5b85263b54bc53100b18f736a12e0 100644
--- a/tensorflow/python/training/checkpoint_utils_test.py
+++ b/tensorflow/python/training/checkpoint_utils_test.py
@@ -143,7 +143,7 @@ class CheckpointsTest(test.TestCase):
         self.assertAllEqual(my4.eval(session), v4)
 
         # Check that tensors are not explicitly in the graph.
-        self.assertLess(len(str(session.graph.as_graph_def())), 28000)
+        self.assertLess(len(str(session.graph.as_graph_def())), 29000)
 
   def testInitWithScopeDoesNotCaptureSuffixes(self):
     checkpoint_dir = self.get_temp_dir()
diff --git a/tensorflow/python/training/learning_rate_decay.py b/tensorflow/python/training/learning_rate_decay.py
index 802b930b0e391685b07802cbf6973b763e52d147..3ee49650e01bd31d7d34fe1e109599531626058c 100644
--- a/tensorflow/python/training/learning_rate_decay.py
+++ b/tensorflow/python/training/learning_rate_decay.py
@@ -103,8 +103,8 @@ def exponential_decay(learning_rate, global_step, decay_steps, decay_rate,
 def piecewise_constant(x, boundaries, values, name=None):
   """Piecewise constant from boundaries and interval values.
 
-  Example: use a learning rate that's 1.0 for the first 100000 steps, 0.5
-    for steps 100001 to 110000, and 0.1 for any additional steps.
+  Example: use a learning rate that's 1.0 for the first 100001 steps, 0.5
+    for the next 10000 steps, and 0.1 for any additional steps.
 
   ```python
   global_step = tf.Variable(0, trainable=False)
@@ -120,7 +120,7 @@ def piecewise_constant(x, boundaries, values, name=None):
       `float64`, `uint8`, `int8`, `int16`, `int32`, `int64`.
     boundaries: A list of `Tensor`s or `int`s or `float`s with strictly
       increasing entries, and with all elements having the same type as `x`.
-    values: A list of `Tensor`s or float`s or `int`s that specifies the values
+    values: A list of `Tensor`s or `float`s or `int`s that specifies the values
       for the intervals defined by `boundaries`. It should have one more element
       than `boundaries`, and all elements should have the same type.
     name: A string. Optional name of the operation. Defaults to
@@ -362,7 +362,13 @@ def inverse_time_decay(learning_rate, global_step, decay_steps, decay_rate,
   The function returns the decayed learning rate.  It is computed as:
 
   ```python
-  decayed_learning_rate = learning_rate / (1 + decay_rate * t)
+  decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step)
+  ```
+
+  or, if `staircase` is `True`, as:
+
+  ```python
+  decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step))
   ```
 
   Example: decay 1/t with a rate of 0.5:
@@ -371,8 +377,9 @@ def inverse_time_decay(learning_rate, global_step, decay_steps, decay_rate,
   ...
   global_step = tf.Variable(0, trainable=False)
   learning_rate = 0.1
-  k = 0.5
-  learning_rate = tf.train.inverse_time_decay(learning_rate, global_step, k)
+  decay_steps = 1.0
+  decay_rate = 0.5
+  learning_rate = tf.train.inverse_time_decay(learning_rate, global_step, decay_steps, decay_rate)
 
   # Passing global_step to minimize() will increment it at each step.
   learning_step = (
@@ -417,11 +424,12 @@ def inverse_time_decay(learning_rate, global_step, decay_steps, decay_rate,
     return math_ops.div(learning_rate, denom, name=name)
 
 
-def cosine_decay(learning_rate, global_step, decay_steps, name=None):
+def cosine_decay(learning_rate, global_step, decay_steps, alpha=0.0,
+                 name=None):
   """Applies cosine decay to the learning rate.
 
   See [Loshchilov & Hutter, ICLR2016], SGDR: Stochastic Gradient Descent
-  with Warm Restarts.
+  with Warm Restarts. https://arxiv.org/abs/1608.03983
 
   When training a model, it is often recommended to lower the learning rate as
   the training progresses.  This function applies a cosine decay function
@@ -432,7 +440,8 @@ def cosine_decay(learning_rate, global_step, decay_steps, name=None):
   The function returns the decayed learning rate.  It is computed as:
   ```python
   global_step = min(global_step, decay_steps)
-  decayed = 0.5 * (1 + cos(pi * global_step / decay_steps))
+  cosine_decay = 0.5 * (1 + cos(pi * global_step / decay_steps))
+  decayed = (1 - alpha) * cosine_decay + alpha
   decayed_learning_rate = learning_rate * decayed
   ```
 
@@ -449,6 +458,8 @@ def cosine_decay(learning_rate, global_step, decay_steps, name=None):
       Global step to use for the decay computation.
     decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number.
       Number of steps to decay over.
+    alpha: A scalar `float32` or `float64` Tensor or a Python number.
+      Minimum learning rate value as a fraction of learning_rate.
     name: String. Optional name of the operation.  Defaults to 'CosineDecay'.
   Returns:
     A scalar `Tensor` of the same type as `learning_rate`.  The decayed
@@ -469,7 +480,96 @@ def cosine_decay(learning_rate, global_step, decay_steps, name=None):
     cosine_decayed = 0.5 * (
         1.0 + math_ops.cos(constant_op.constant(math.pi) * completed_fraction))
 
-    return math_ops.multiply(learning_rate, cosine_decayed)
+    decayed = (1 - alpha) * cosine_decayed + alpha
+    return math_ops.multiply(learning_rate, decayed)
+
+
+def cosine_decay_restarts(learning_rate, global_step, first_decay_steps,
+                          t_mul=2.0, m_mul=1.0, alpha=0.0, name=None):
+  """Applies cosine decay with restarts to the learning rate.
+
+  See [Loshchilov & Hutter, ICLR2016], SGDR: Stochastic Gradient Descent
+  with Warm Restarts. https://arxiv.org/abs/1608.03983
+
+  When training a model, it is often recommended to lower the learning rate as
+  the training progresses.  This function applies a cosine decay function with
+  restarts to a provided initial learning rate.  It requires a `global_step`
+  value to compute the decayed learning rate.  You can just pass a TensorFlow
+  variable that you increment at each training step.
+
+  The function returns the decayed learning rate while taking into account
+  possible warm restarts. The learning rate multiplier first decays
+  from 1 to `alpha` for `first_decay_steps` steps. Then, a warm
+  restart is performed. Each new warm restart runs for `t_mul` times more steps
+  and with `m_mul` times smaller initial learning rate.
+
+  Example usage:
+  ```python
+  first_decay_steps = 1000
+  lr_decayed = cosine_decay_restarts(learning_rate, global_step,
+                                     first_decay_steps)
+  ```
+
+  Args:
+    learning_rate: A scalar `float32` or `float64` Tensor or a Python number.
+      The initial learning rate.
+    global_step: A scalar `int32` or `int64` `Tensor` or a Python number.
+      Global step to use for the decay computation.
+    first_decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number.
+      Number of steps to decay over.
+    t_mul: A scalar `float32` or `float64` `Tensor` or a Python number.
+      Used to derive the number of iterations in the i-th period
+    m_mul: A scalar `float32` or `float64` `Tensor` or a Python number.
+      Used to derive the initial learning rate of the i-th period:
+    alpha: A scalar `float32` or `float64` Tensor or a Python number.
+      Minimum learning rate value as a fraction of the learning_rate.
+    name: String. Optional name of the operation.  Defaults to 'SGDRDecay'.
+  Returns:
+    A scalar `Tensor` of the same type as `learning_rate`.  The decayed
+    learning rate.
+  Raises:
+    ValueError: if `global_step` is not supplied.
+  """
+  if global_step is None:
+    raise ValueError("cosine decay restarts requires global_step")
+  with ops.name_scope(name, "SGDRDecay",
+                      [learning_rate, global_step]) as name:
+    learning_rate = ops.convert_to_tensor(learning_rate,
+                                          name="initial_learning_rate")
+    dtype = learning_rate.dtype
+    global_step = math_ops.cast(global_step, dtype)
+    first_decay_steps = math_ops.cast(first_decay_steps, dtype)
+    alpha = math_ops.cast(alpha, dtype)
+    t_mul = math_ops.cast(t_mul, dtype)
+    m_mul = math_ops.cast(m_mul, dtype)
+
+    completed_fraction = global_step / first_decay_steps
+
+    def compute_step(completed_fraction, geometric=False):
+      if geometric:
+        i_restart = math_ops.floor(math_ops.log(1.0 - completed_fraction * (
+            1.0 - t_mul)) / math_ops.log(t_mul))
+
+        sum_r = (1.0 - t_mul ** i_restart) / (1.0 - t_mul)
+        completed_fraction = (completed_fraction - sum_r) / t_mul ** i_restart
+
+      else:
+        i_restart = math_ops.floor(completed_fraction)
+        completed_fraction = completed_fraction - i_restart
+
+      return i_restart, completed_fraction
+
+    i_restart, completed_fraction = control_flow_ops.cond(
+        math_ops.equal(t_mul, 1.0),
+        lambda: compute_step(completed_fraction, geometric=False),
+        lambda: compute_step(completed_fraction, geometric=True))
+
+    m_fac = m_mul ** i_restart
+    cosine_decayed = 0.5 * m_fac * (1.0 + math_ops.cos(
+        constant_op.constant(math.pi) * completed_fraction))
+    decayed = (1 - alpha) * cosine_decayed + alpha
+
+  return math_ops.multiply(learning_rate, decayed, name=name)
 
 
 def linear_cosine_decay(learning_rate, global_step, decay_steps,
@@ -480,6 +580,10 @@ def linear_cosine_decay(learning_rate, global_step, decay_steps,
   See [Bello et al., ICML2017] Neural Optimizer Search with RL.
   https://arxiv.org/abs/1709.07417
 
+  For the idea of warm starts here controlled by `num_periods`,
+  see [Loshchilov & Hutter, ICLR2016] SGDR: Stochastic Gradient Descent
+  with Warm Restarts. https://arxiv.org/abs/1608.03983
+
   Note that linear cosine decay is more aggressive than cosine decay and
   larger initial learning rates can typically be used.
 
@@ -556,6 +660,10 @@ def noisy_linear_cosine_decay(learning_rate, global_step, decay_steps,
   See [Bello et al., ICML2017] Neural Optimizer Search with RL.
   https://arxiv.org/abs/1709.07417
 
+  For the idea of warm starts here controlled by `num_periods`,
+  see [Loshchilov & Hutter, ICLR2016] SGDR: Stochastic Gradient Descent
+  with Warm Restarts. https://arxiv.org/abs/1608.03983
+
   Note that linear cosine decay is more aggressive than cosine decay and
   larger initial learning rates can typically be used.
 
diff --git a/tensorflow/python/training/learning_rate_decay_test.py b/tensorflow/python/training/learning_rate_decay_test.py
index ff41d80940a4b2f5d4c27f8691094422cd0cb18f..1ce8c156a0b126f680bad62267f90e31a23febed 100644
--- a/tensorflow/python/training/learning_rate_decay_test.py
+++ b/tensorflow/python/training/learning_rate_decay_test.py
@@ -342,10 +342,11 @@ class InverseDecayTest(test_util.TensorFlowTestCase):
 
 class CosineDecayTest(test_util.TensorFlowTestCase):
 
-  def np_cosine_decay(self, step, decay_steps):
+  def np_cosine_decay(self, step, decay_steps, alpha=0.0):
     step = min(step, decay_steps)
     completed_fraction = step / decay_steps
-    return 0.5 * (1.0 + math.cos(math.pi * completed_fraction))
+    decay = 0.5 * (1.0 + math.cos(math.pi * completed_fraction))
+    return (1.0 - alpha) * decay + alpha
 
   def testDecay(self):
     num_training_steps = 1000
@@ -357,6 +358,77 @@ class CosineDecayTest(test_util.TensorFlowTestCase):
         expected = self.np_cosine_decay(step, num_training_steps)
         self.assertAllClose(decayed_lr.eval(), expected, 1e-6)
 
+  def testAlpha(self):
+    num_training_steps = 1000
+    initial_lr = 1.0
+    alpha = 0.1
+    for step in range(0, 1500, 250):
+      with self.test_session():
+        decayed_lr = learning_rate_decay.cosine_decay(
+            initial_lr, step, num_training_steps, alpha)
+        expected = self.np_cosine_decay(step, num_training_steps, alpha)
+        self.assertAllClose(decayed_lr.eval(), expected, 1e-6)
+
+
+class CosineDecayRestartsTest(test_util.TensorFlowTestCase):
+  def np_cosine_decay_restarts(self, step, decay_steps, t_mul=2.0, m_mul=1.0,
+                               alpha=0.0):
+    fac = 1.0
+    while step >= decay_steps:
+      step = step - decay_steps
+      decay_steps *= t_mul
+      fac *= m_mul
+
+    completed_fraction = step / decay_steps
+    decay = fac * 0.5 * (1.0 + math.cos(math.pi * completed_fraction))
+    return (1.0 - alpha) * decay + alpha
+
+  def testDecay(self):
+    num_training_steps = 1000
+    initial_lr = 1.0
+    for step in range(0, 1500, 250):
+      with self.test_session():
+        decayed_lr = learning_rate_decay.cosine_decay_restarts(
+            initial_lr, step, num_training_steps)
+        expected = self.np_cosine_decay_restarts(step, num_training_steps)
+        self.assertAllClose(decayed_lr.eval(), expected, 1e-6)
+
+  def testAlpha(self):
+    num_training_steps = 1000
+    initial_lr = 1.0
+    alpha = 0.1
+    for step in range(0, 1500, 250):
+      with self.test_session():
+        decayed_lr = learning_rate_decay.cosine_decay_restarts(
+            initial_lr, step, num_training_steps, alpha=alpha)
+        expected = self.np_cosine_decay_restarts(step, num_training_steps,
+                                                 alpha=alpha)
+        self.assertAllClose(decayed_lr.eval(), expected, 1e-6)
+
+  def testMMul(self):
+    num_training_steps = 1000
+    initial_lr = 1.0
+    m_mul = 0.9
+    for step in range(0, 1500, 250):
+      with self.test_session():
+        decayed_lr = learning_rate_decay.cosine_decay_restarts(
+            initial_lr, step, num_training_steps, m_mul=m_mul)
+        expected = self.np_cosine_decay_restarts(step, num_training_steps,
+                                                 m_mul=m_mul)
+        self.assertAllClose(decayed_lr.eval(), expected, 1e-6)
+
+  def testTMul(self):
+    num_training_steps = 1000
+    initial_lr = 1.0
+    t_mul = 1.0
+    for step in range(0, 1500, 250):
+      with self.test_session():
+        decayed_lr = learning_rate_decay.cosine_decay_restarts(
+            initial_lr, step, num_training_steps, t_mul=t_mul)
+        expected = self.np_cosine_decay_restarts(step, num_training_steps,
+                                                 t_mul=t_mul)
+        self.assertAllClose(decayed_lr.eval(), expected, 1e-6)
+
 
 class LinearCosineDecayTest(test_util.TensorFlowTestCase):
 
diff --git a/tensorflow/python/training/momentum_test.py b/tensorflow/python/training/momentum_test.py
index 7268b3abc93f911a29b11cb95b1f005db6f49167..6865513b0e4aad18d77887770a11243642958e7a 100644
--- a/tensorflow/python/training/momentum_test.py
+++ b/tensorflow/python/training/momentum_test.py
@@ -234,23 +234,38 @@ class MomentumOptimizerTest(test.TestCase):
           self.assertAllClose(var0_np, var0.eval())
           self.assertAllClose(var1_np, var1.eval())
 
+  @test_util.run_in_graph_and_eager_modes(reset_test=True)
   def testMinimizeSparseResourceVariable(self):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
-      with self.test_session():
-        var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+      var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+
+      # pylint: disable=cell-var-from-loop
+      def loss():
         x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
         pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
-        loss = pred * pred
-        sgd_op = momentum_lib.MomentumOptimizer(
-            learning_rate=1.0, momentum=0.0).minimize(loss)
-        variables.global_variables_initializer().run()
-        # Fetch params to validate initial values
-        self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
-        # Run 1 step of sgd
-        sgd_op.run()
-        # Validate updated params
-        self.assertAllCloseAccordingToType(
-            [[-111, -138]], var0.eval())
+        return pred * pred
+      # pylint: enable=cell-var-from-loop
+
+      opt = momentum_lib.MomentumOptimizer(learning_rate=1.0, momentum=0.0)
+      sgd_op = opt.minimize(loss if context.in_eager_mode() else loss())
+      self.evaluate(variables.global_variables_initializer())
+      # Run 1 step of sgd
+      self.evaluate(sgd_op)
+      # Validate updated params
+      self.assertAllCloseAccordingToType([[-111, -138]], self.evaluate(var0))
+
+  @test_util.run_in_graph_and_eager_modes(reset_test=True)
+  def testMinimizeWith2DIndiciesForEmbeddingLookup(self):
+    var0 = resource_variable_ops.ResourceVariable(array_ops.ones([2, 2]))
+
+    def loss():
+      return math_ops.reduce_sum(embedding_ops.embedding_lookup(var0, [[1]]))
+
+    opt = momentum_lib.MomentumOptimizer(learning_rate=1.0, momentum=0.0)
+    sgd_op = opt.minimize(loss if context.in_eager_mode() else loss())
+    self.evaluate(variables.global_variables_initializer())
+    self.evaluate(sgd_op)
+    self.assertAllCloseAccordingToType([[1, 1], [0, 0]], self.evaluate(var0))
 
   def testTensorLearningRateAndMomentum(self):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
diff --git a/tensorflow/python/training/monitored_session.py b/tensorflow/python/training/monitored_session.py
index e931555470354d1f5c76ad7d46cff1308b015116..fa3517db27be4581deb85f77f022406b8b30ec56 100644
--- a/tensorflow/python/training/monitored_session.py
+++ b/tensorflow/python/training/monitored_session.py
@@ -52,7 +52,6 @@ _PREEMPTION_ERRORS = (errors.AbortedError, errors.UnavailableError)
 USE_DEFAULT = object()
 
 
-# TODO(touts): Share that with the Supervisor.
 class Scaffold(object):
   """Structure to create or gather pieces commonly needed to train a model.
 
@@ -213,6 +212,7 @@ class Scaffold(object):
     self._saver.build()
 
     ops.get_default_graph().finalize()
+    logging.info('Graph was finalized.')
     return self
 
   @property
@@ -266,8 +266,10 @@ class Scaffold(object):
 
   @staticmethod
   def _default_local_init_op():
-    return control_flow_ops.group(variables.local_variables_initializer(),
-                                  lookup_ops.tables_initializer())
+    return control_flow_ops.group(
+        variables.local_variables_initializer(),
+        lookup_ops.tables_initializer(),
+        resources.initialize_resources(resources.local_resources()))
 
 
 def MonitoredTrainingSession(master='',  # pylint: disable=invalid-name
diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py
index b31d02eb8d7afe2dd675192fc99fb7c24b515c00..56cf4d42ee194885057d8bf45d9b3c1c407c4a11 100644
--- a/tensorflow/python/training/optimizer.py
+++ b/tensorflow/python/training/optimizer.py
@@ -644,7 +644,8 @@ class Optimizer(object):
     Returns:
       Valid types for loss, variables and gradients.
     """
-    return set([dtypes.float16, dtypes.float32, dtypes.float64])
+    return set(
+        [dtypes.float16, dtypes.bfloat16, dtypes.float32, dtypes.float64])
 
   def _create_slots(self, var_list):
     """Create all slots needed by the variables.
diff --git a/tensorflow/python/training/quantize_training.i b/tensorflow/python/training/quantize_training.i
index 40c60769731d3f7255647a07141d86b1c2594b01..17ffcd6e0758c9c1bc8bab864b6b7a2a18bc9cbf 100644
--- a/tensorflow/python/training/quantize_training.i
+++ b/tensorflow/python/training/quantize_training.i
@@ -65,6 +65,9 @@ def do_quantize_training_on_graphdef(input_graph, num_bits):
 
   graph.ParseFromString(result_graph_string)
   return graph
+
+do_quantize_training_on_graphdef._tf_api_names = [
+    'train.do_quantize_training_on_graphdef']
 %}
 
 %unignoreall
diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py
index bd47736d4bc1b9b05d26bc45512f693a1bc0f937..2c59b82ebe2264e56da1a3b977b27eba2ed6f494 100644
--- a/tensorflow/python/training/saver.py
+++ b/tensorflow/python/training/saver.py
@@ -241,6 +241,34 @@ class BaseSaverBuilder(object):
     else:
       raise RuntimeError("Unexpected write_version: " + self._write_version)
 
+  def bulk_restore(self, filename_tensor, saveables, preferred_shard,
+                   restore_sequentially):
+    """Restore all tensors contained in saveables.
+
+    By default, this issues separate calls to `restore_op` for each saveable.
+    Subclasses may override to load multiple saveables in a single call.
+
+    Args:
+      filename_tensor: String Tensor.
+      saveables: List of BaseSaverBuilder.SaveableObject objects.
+      preferred_shard: Int.  Shard to open first when loading a sharded file.
+      restore_sequentially: Bool.  If true, each restore is sequential.
+
+    Returns:
+      A list of Tensors resulting from reading 'saveable' from
+        'filename'.
+
+    """
+    all_tensors = []
+    assign_ops = []
+    for saveable in saveables:
+      restore_control_inputs = assign_ops[-1:] if restore_sequentially else []
+      with ops.device(_set_cpu0(saveable.device) if saveable.device else None):
+        with ops.control_dependencies(restore_control_inputs):
+          all_tensors.extend(
+              self.restore_op(filename_tensor, saveable, preferred_shard))
+    return all_tensors
+
   # pylint: disable=unused-argument
   def restore_op(self, filename_tensor, saveable, preferred_shard):
     """Create ops to restore 'saveable'.
@@ -349,7 +377,7 @@ class BaseSaverBuilder(object):
     last_device = None
     for shard, (device, saveables) in enumerate(per_device):
       last_device = device
-      with ops.device(device):
+      with ops.device(_set_cpu0(device)):
         sharded_filename = self.sharded_filename(tmp_checkpoint_prefix, shard,
                                                  num_shards_tensor)
         sharded_prefixes.append(sharded_filename)
@@ -357,7 +385,7 @@ class BaseSaverBuilder(object):
 
     with ops.control_dependencies([x.op for x in sharded_saves]):
       # Co-locates the merge step with the last device.
-      with ops.device(last_device):
+      with ops.device(_set_cpu0(last_device)):
         # V2 format write path consists of a metadata merge step.  Once merged,
         # attempts to delete the temporary directory, "<user-fed prefix>_temp".
         merge_step = gen_io_ops.merge_v2_checkpoints(
@@ -416,30 +444,32 @@ class BaseSaverBuilder(object):
     Returns:
       An Operation that restores the variables.
     """
+    all_tensors = self.bulk_restore(filename_tensor, saveables, preferred_shard,
+                                    restore_sequentially)
+
     assign_ops = []
+    idx = 0
+    # Load and optionally reshape on the CPU, as string tensors are not
+    # available on the GPU.
+    # TODO(touts): Re-enable restore on GPU when we can support annotating
+    # string tensors as "HostMemory" inputs.
     for saveable in saveables:
-      restore_control_inputs = assign_ops[-1:] if restore_sequentially else []
-      # Load and optionally reshape on the CPU, as string tensors are not
-      # available on the GPU.
-      # TODO(touts): Re-enable restore on GPU when we can support annotating
-      # string tensors as "HostMemory" inputs.
-      with ops.device(_set_cpu0(saveable.device) if saveable.device else None):
-        with ops.control_dependencies(restore_control_inputs):
-          tensors = self.restore_op(filename_tensor, saveable, preferred_shard)
-          shapes = None
-          if reshape:
-            # Compute the shapes, let the restore op decide if and how to do
-            # the reshape.
-            shapes = []
-            for spec in saveable.specs:
-              v = spec.tensor
-              shape = v.get_shape()
-              if not shape.is_fully_defined():
-                shape = array_ops.shape(v)
-              shapes.append(shape)
-          assign_ops.append(saveable.restore(tensors, shapes))
-
-      # Create a Noop that has control dependencies from all the updates.
+      shapes = None
+      if reshape:
+        # Compute the shapes, let the restore op decide if and how to do
+        # the reshape.
+        shapes = []
+        for spec in saveable.specs:
+          v = spec.tensor
+          shape = v.get_shape()
+          if not shape.is_fully_defined():
+            shape = array_ops.shape(v)
+          shapes.append(shape)
+      saveable_tensors = all_tensors[idx:idx + len(saveable.specs)]
+      idx += len(saveable.specs)
+      assign_ops.append(saveable.restore(saveable_tensors, shapes))
+
+    # Create a Noop that has control dependencies from all the updates.
     return control_flow_ops.group(*assign_ops, name=name)
 
   def _AddShardedRestoreOps(self, filename_tensor, per_device,
@@ -797,6 +827,25 @@ class BaseSaverBuilder(object):
           version=self._write_version)
 
 
+class BulkSaverBuilder(BaseSaverBuilder):
+  """SaverBuilder with support for bulk restoring multiple saveables."""
+
+  def bulk_restore(self, filename_tensor, saveables, preferred_shard,
+                   restore_sequentially):
+
+    # Ignored: bulk restore is internally sequential.
+    del restore_sequentially
+    restore_specs = []
+    for saveable in saveables:
+      for spec in saveable.specs:
+        restore_specs.append((spec.name, spec.slice_spec, spec.tensor.dtype))
+
+    names, slices, dtypes = zip(*restore_specs)
+    # Load all tensors onto CPU 0 for compatibility with existing code.
+    with ops.device("cpu:0"):
+      return io_ops.restore_v2(filename_tensor, names, slices, dtypes)
+
+
 def _get_saver_or_default():
   """Returns the saver from SAVERS collection, or creates a default one.
 
@@ -1261,6 +1310,7 @@ class Saver(object):
     if not self.saver_def or context.in_eager_mode():
       if self._builder is None:
         self._builder = BaseSaverBuilder(self._write_version)
+
       if self._var_list is None:
         # pylint: disable=protected-access
         self._var_list = variables._all_saveable_objects()
@@ -1509,7 +1559,9 @@ class Saver(object):
            latest_filename=None,
            meta_graph_suffix="meta",
            write_meta_graph=True,
-           write_state=True):
+           write_state=True,
+           strip_default_attrs=False):
+    # pylint: disable=line-too-long
     """Saves variables.
 
     This method runs the ops added by the constructor for saving variables.
@@ -1535,6 +1587,9 @@ class Saver(object):
         graph file.
       write_state: `Boolean` indicating whether or not to write the
         `CheckpointStateProto`.
+      strip_default_attrs: Boolean. If `True`, default-valued attributes will be
+        removed from the NodeDefs. For a detailed guide, see
+        [Stripping Default-Valued Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes).
 
     Returns:
       A string: path prefix used for the checkpoint files.  If the saver is
@@ -1548,6 +1603,7 @@ class Saver(object):
         collides with `save_path`.
       RuntimeError: If save and restore ops weren't built.
     """
+    # pylint: enable=line-too-long
     if not self._is_built and context.in_graph_mode():
       raise RuntimeError(
           "`build()` should be called before save if defer_build==True")
@@ -1618,7 +1674,8 @@ class Saver(object):
           checkpoint_file, meta_graph_suffix=meta_graph_suffix)
       if context.in_graph_mode():
         with sess.graph.as_default():
-          self.export_meta_graph(meta_graph_filename)
+          self.export_meta_graph(
+              meta_graph_filename, strip_default_attrs=strip_default_attrs)
 
     if self._is_empty:
       return None
@@ -1631,7 +1688,9 @@ class Saver(object):
                         as_text=False,
                         export_scope=None,
                         clear_devices=False,
-                        clear_extraneous_savers=False):
+                        clear_extraneous_savers=False,
+                        strip_default_attrs=False):
+    # pylint: disable=line-too-long
     """Writes `MetaGraphDef` to save_path/filename.
 
     Args:
@@ -1644,10 +1703,14 @@ class Saver(object):
       clear_extraneous_savers: Remove any Saver-related information from the
         graph (both Save/Restore ops and SaverDefs) that are not associated
         with this Saver.
+      strip_default_attrs: Boolean. If `True`, default-valued attributes will be
+        removed from the NodeDefs. For a detailed guide, see
+        [Stripping Default-Valued Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes).
 
     Returns:
       A `MetaGraphDef` proto.
     """
+    # pylint: enable=line-too-long
     return export_meta_graph(
         filename=filename,
         graph_def=ops.get_default_graph().as_graph_def(add_shapes=True),
@@ -1656,7 +1719,8 @@ class Saver(object):
         as_text=as_text,
         export_scope=export_scope,
         clear_devices=clear_devices,
-        clear_extraneous_savers=clear_extraneous_savers)
+        clear_extraneous_savers=clear_extraneous_savers,
+        strip_default_attrs=strip_default_attrs)
 
   def restore(self, sess, save_path):
     """Restores previously saved variables.
@@ -1859,7 +1923,9 @@ def export_meta_graph(filename=None,
                       export_scope=None,
                       clear_devices=False,
                       clear_extraneous_savers=False,
+                      strip_default_attrs=False,
                       **kwargs):
+  # pylint: disable=line-too-long
   """Returns `MetaGraphDef` proto. Optionally writes it to filename.
 
   This function exports the graph, saver, and collection objects into
@@ -1885,6 +1951,9 @@ def export_meta_graph(filename=None,
     clear_extraneous_savers: Remove any Saver-related information from the
         graph (both Save/Restore ops and SaverDefs) that are not associated
         with the provided SaverDef.
+    strip_default_attrs: Boolean. If `True`, default-valued attributes will be
+      removed from the NodeDefs. For a detailed guide, see
+      [Stripping Default-Valued Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes).
     **kwargs: Optional keyed arguments.
 
   Returns:
@@ -1899,6 +1968,7 @@ def export_meta_graph(filename=None,
   execution is enabled.
   @end_compatibility
   """
+  # pylint: enable=line-too-long
   if context.in_eager_mode():
     raise RuntimeError("Exporting/importing meta graphs is not supported when "
                        "eager execution is enabled. No graph exists when eager "
@@ -1914,6 +1984,7 @@ def export_meta_graph(filename=None,
       export_scope=export_scope,
       clear_devices=clear_devices,
       clear_extraneous_savers=clear_extraneous_savers,
+      strip_default_attrs=strip_default_attrs,
       **kwargs)
   return meta_graph_def
 
diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py
index b7f1297b8f9400ea128656a051189b0e82601be0..c5a6f49df599434ab3bc1a9fe3d85db6f824071e 100644
--- a/tensorflow/python/training/saver_test.py
+++ b/tensorflow/python/training/saver_test.py
@@ -38,6 +38,7 @@ from tensorflow.core.protobuf import queue_runner_pb2
 from tensorflow.core.protobuf import saver_pb2
 from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.client import session
+from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -73,6 +74,7 @@ from tensorflow.python.training.checkpoint_state_pb2 import CheckpointState
 from tensorflow.python.util import compat
 
 
+@test_util.with_c_api
 class SaverTest(test.TestCase):
 
   def basicSaveRestore(self, variable_op):
@@ -541,6 +543,23 @@ class SaverTest(test.TestCase):
       save = saver_module.Saver({"v0": v0_2})
       variables.global_variables_initializer().run()
 
+  def testSharedServerOnGPU(self):
+    if not test.is_gpu_available():
+      return
+    save_path = os.path.join(self.get_temp_dir(), "gpu")
+    with session.Session("", graph=ops_lib.Graph()) as sess:
+      with sess.graph.device(test.gpu_device_name()):
+        v0_1 = variables.Variable(123.45)
+      save = saver_module.Saver({"v0": v0_1}, sharded=True, allow_empty=True)
+      variables.global_variables_initializer().run()
+      save.save(sess, save_path)
+
+    with session.Session("", graph=ops_lib.Graph()) as sess:
+      with sess.graph.device(test.gpu_device_name()):
+        v0_2 = variables.Variable(543.21)
+      save = saver_module.Saver({"v0": v0_2}, sharded=True, allow_empty=True)
+      variables.global_variables_initializer().run()
+
   def testVariables(self):
     save_path = os.path.join(self.get_temp_dir(), "variables")
     with session.Session("", graph=ops_lib.Graph()) as sess:
@@ -724,6 +743,7 @@ class SaverTest(test.TestCase):
       save.save(sess, save_path)
 
 
+@test_util.with_c_api
 class SaveRestoreShardedTest(test.TestCase):
 
   _WRITE_VERSION = saver_pb2.SaverDef.V1
@@ -774,9 +794,13 @@ class SaveRestoreShardedTest(test.TestCase):
         with sess.graph.device("/cpu:0"):
           v0 = variables.Variable(111, name="v0")
           t0 = saver_test_utils.CheckpointedOp(name="t0")
-        save = saver_module.Saver({"v0": v0, "t0": t0.saveable},
-                                  write_version=self._WRITE_VERSION,
-                                  sharded=True)
+        save = saver_module.Saver(
+            {
+                "v0": v0,
+                "t0": t0.saveable
+            },
+            write_version=self._WRITE_VERSION,
+            sharded=True)
         variables.global_variables_initializer().run()
         t0.insert("k11", 33.0).run()
         self.assertEqual(111, v0.eval())
@@ -794,9 +818,13 @@ class SaveRestoreShardedTest(test.TestCase):
         with sess.graph.device("/cpu:0"):
           v1 = variables.Variable(222)
           t1 = saver_test_utils.CheckpointedOp(name="t1")
-        save = saver_module.Saver({"v1": v1, "t1": t1.saveable},
-                                  write_version=self._WRITE_VERSION,
-                                  sharded=True)
+        save = saver_module.Saver(
+            {
+                "v1": v1,
+                "t1": t1.saveable
+            },
+            write_version=self._WRITE_VERSION,
+            sharded=True)
         variables.global_variables_initializer().run()
         t1.insert("k22", 44.0).run()
         self.assertEqual(222, v1.eval())
@@ -990,10 +1018,12 @@ class SaveRestoreShardedTest(test.TestCase):
     self._testPartitionedVariables(use_resource=True)
 
 
+@test_util.with_c_api
 class SaveRestoreShardedTestV2(SaveRestoreShardedTest):
   _WRITE_VERSION = saver_pb2.SaverDef.V2
 
 
+@test_util.with_c_api
 class MaxToKeepTest(test.TestCase):
 
   def _get_test_dir(self, dirname):
@@ -1263,6 +1293,7 @@ class MaxToKeepTest(test.TestCase):
       self.assertFalse(gfile.Exists(save._MetaGraphFilename(s1)))
 
 
+@test_util.with_c_api
 class KeepCheckpointEveryNHoursTest(test.TestCase):
 
   def _get_test_dir(self, dirname):
@@ -1321,6 +1352,7 @@ class KeepCheckpointEveryNHoursTest(test.TestCase):
       self.assertTrue(saver_module.checkpoint_exists(s4))
 
 
+@test_util.with_c_api
 class SaveRestoreWithVariableNameMap(test.TestCase):
 
   def _testNonReshape(self, variable_op):
@@ -1397,6 +1429,7 @@ class SaveRestoreWithVariableNameMap(test.TestCase):
     self._testNonReshape(variables.Variable)
 
 
+@test_util.with_c_api
 class LatestCheckpointWithRelativePaths(test.TestCase):
 
   @staticmethod
@@ -1498,6 +1531,7 @@ class LatestCheckpointWithRelativePaths(test.TestCase):
           self.assertEqual(v0.eval(), 2.0)
 
 
+@test_util.with_c_api
 class CheckpointStateTest(test.TestCase):
 
   def _get_test_dir(self, dirname):
@@ -1612,6 +1646,7 @@ class CheckpointStateTest(test.TestCase):
                      os.path.join(save_dir, "./model.ckpt-687529"))
 
 
+@test_util.with_c_api
 class MetaGraphTest(test.TestCase):
 
   def _get_test_dir(self, dirname):
@@ -1881,7 +1916,8 @@ class MetaGraphTest(test.TestCase):
       # Generates a new MetaGraphDef.
       new_meta_graph_def = new_saver.export_meta_graph()
       # It should be the same as the original.
-      self.assertProtoEquals(meta_graph_def, new_meta_graph_def)
+      test_util.assert_meta_graph_protos_equal(self, meta_graph_def,
+                                               new_meta_graph_def)
 
   def _testGraphExtensionSave(self, test_dir):
     filename = os.path.join(test_dir, "metafile")
@@ -2039,6 +2075,42 @@ class MetaGraphTest(test.TestCase):
         self.assertEqual(o.summary, "")
         self.assertEqual(o.description, "")
 
+  def testStripDefaultValuedAttrs(self):
+    """Verifies that default valued attrs are stripped, unless disabled."""
+
+    # With strip_default_attrs enabled, attributes "T" (float32) and "Tout"
+    # (complex64) in the "Complex" op must be removed.
+    with self.test_session():
+      real_num = variables.Variable(1.0, dtype=dtypes.float32, name="real")
+      imag_num = variables.Variable(2.0, dtype=dtypes.float32, name="imag")
+      math_ops.complex(real_num, imag_num, name="complex")
+
+      save = saver_module.Saver({"real_num": real_num, "imag_num": imag_num})
+      variables.global_variables_initializer()
+
+      meta_graph_def = save.export_meta_graph(strip_default_attrs=True)
+      node_def = test_util.get_node_def_from_graph("complex",
+                                                   meta_graph_def.graph_def)
+      self.assertNotIn("T", node_def.attr)
+      self.assertNotIn("Tout", node_def.attr)
+
+    # With strip_default_attrs disabled, attributes "T" (float32) and "Tout"
+    # (complex64) in the "Complex" op must *not* be removed, even if they map
+    # to their defaults.
+    with self.test_session(graph=ops_lib.Graph()):
+      real_num = variables.Variable(1.0, dtype=dtypes.float32, name="real")
+      imag_num = variables.Variable(2.0, dtype=dtypes.float32, name="imag")
+      math_ops.complex(real_num, imag_num, name="complex")
+
+      save = saver_module.Saver({"real_num": real_num, "imag_num": imag_num})
+      variables.global_variables_initializer()
+
+      meta_graph_def = save.export_meta_graph(strip_default_attrs=False)
+      node_def = test_util.get_node_def_from_graph("complex",
+                                                   meta_graph_def.graph_def)
+      self.assertIn("T", node_def.attr)
+      self.assertIn("Tout", node_def.attr)
+
   def testImportIntoNamescope(self):
     # Test that we can import a meta graph into a namescope.
     test_dir = self._get_test_dir("import_into_namescope")
@@ -2129,7 +2201,33 @@ class MetaGraphTest(test.TestCase):
               10, size=[1, 10])
       })
 
+  def testPreserveDatasetAndFunctions(self):
+    with ops_lib.Graph().as_default() as g:
+      dataset = dataset_ops.Dataset.range(10).map(lambda x: x * x)
+      iterator = dataset.make_one_shot_iterator()
+      next_element = iterator.get_next()
+      _ = array_ops.identity(next_element, name="output")
+
+      # Generate three MetaGraphDef protos using different code paths.
+      meta_graph_def_simple = saver_module.export_meta_graph()
+      meta_graph_def_devices_cleared = saver_module.export_meta_graph(
+          clear_devices=True)
+      meta_graph_def_from_graph_def = saver_module.export_meta_graph(
+          clear_devices=True, graph_def=g.as_graph_def())
+
+    for meta_graph_def in [meta_graph_def_simple,
+                           meta_graph_def_devices_cleared,
+                           meta_graph_def_from_graph_def]:
+      with session.Session(graph=ops_lib.Graph()) as sess:
+        saver_module.import_meta_graph(meta_graph_def, import_scope="new_model")
+        sess.run(variables.global_variables_initializer())
+        for i in range(10):
+          self.assertEqual(i * i, sess.run("new_model/output:0"))
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run("new_model/output:0")
+
 
+@test_util.with_c_api
 class CheckpointReaderTest(test.TestCase):
 
   _WRITE_VERSION = saver_pb2.SaverDef.V1
@@ -2182,10 +2280,12 @@ class CheckpointReaderTest(test.TestCase):
       pywrap_tensorflow.NewCheckpointReader("non-existent")
 
 
+@test_util.with_c_api
 class CheckpointReaderForV2Test(CheckpointReaderTest):
   _WRITE_VERSION = saver_pb2.SaverDef.V2
 
 
+@test_util.with_c_api
 class WriteGraphTest(test.TestCase):
 
   def _get_test_dir(self, dirname):
@@ -2213,6 +2313,7 @@ class WriteGraphTest(test.TestCase):
     self.assertTrue(os.path.exists(path))
 
 
+@test_util.with_c_api
 class SaverUtilsTest(test.TestCase):
 
   def setUp(self):
@@ -2255,6 +2356,7 @@ class SaverUtilsTest(test.TestCase):
     self.assertTrue(mtimes[1] >= mtimes[0])
 
 
+@test_util.with_c_api
 class ScopedGraphTest(test.TestCase):
 
   def _get_test_dir(self, dirname):
@@ -2558,51 +2660,5 @@ class ScopedGraphTest(test.TestCase):
       self.assertEqual(2.0, var_dict2["variable2:0"].eval())
 
 
-# TODO(b/64763924): Remove after Jan 1st 2018.
-class LenientNamesTest(test.TestCase):
-
-  def setUp(self):
-    super(LenientNamesTest, self).setUp()
-    os.putenv("TF_SAVER_LENIENT_NAMES", "True")
-
-  def tearDown(self):
-    os.putenv("TF_SAVER_LENIENT_NAMES", "")
-    super(LenientNamesTest, self).tearDown()
-
-  def testSaveRestore(self):
-    save_path = os.path.join(self.get_temp_dir(), "basic_save_restore")
-
-    # Build a graph with 2 parameter nodes, and Save and
-    # Restore nodes for them.
-    v0 = variables.Variable(10.0, name="v0")
-    v1 = variables.Variable(20.0, name="v1")
-    v2 = saver_test_utils.CheckpointedOp(name="v2")
-    v2_init = v2.insert("k1", 30.0)
-    save = saver_module.Saver(
-        {
-            "v0:0": v0,
-            "v1": v1,
-            "v2": v2.saveable
-        }, restore_sequentially=True)
-    init_all_op = [variables.global_variables_initializer(), v2_init]
-
-    with self.test_session() as sess:
-      sess.run(init_all_op)
-      save.save(sess, save_path)
-
-    with self.test_session() as sess:
-      v0 = variables.Variable(-1.0, name="v0")
-      v1 = variables.Variable(-1.0, name="v1")
-      v2 = saver_test_utils.CheckpointedOp(name="v2")
-      save = saver_module.Saver({"v0": v0, "v1": v1, "v2": v2.saveable})
-
-      save.restore(sess, save_path)
-      # Check that the parameter nodes have been restored.
-      self.assertEqual(10.0, v0.eval())
-      self.assertEqual(20.0, v1.eval())
-      self.assertEqual(b"k1", v2.keys().eval())
-      self.assertEqual(30.0, v2.values().eval())
-
-
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/training/server_lib.py b/tensorflow/python/training/server_lib.py
index 2091eca0b9c6f0af4a043a4639b6fb72b90cef56..29da67a30a58c1b8b8e172b2ccede340880fef58 100644
--- a/tensorflow/python/training/server_lib.py
+++ b/tensorflow/python/training/server_lib.py
@@ -307,6 +307,12 @@ class ClusterSpec(object):
   def __ne__(self, other):
     return self._cluster_spec != other
 
+  def __str__(self):
+    key_values = self.as_dict()
+    string_items = [
+        repr(k) + ": " + repr(key_values[k]) for k in sorted(key_values)]
+    return "ClusterSpec({" + ", ".join(string_items) + "})"
+
   def as_dict(self):
     """Returns a dictionary from job names to their tasks.
 
diff --git a/tensorflow/python/training/server_lib_test.py b/tensorflow/python/training/server_lib_test.py
index 26aac787ed40b644af1f611270b7aaee77623877..063044f0d05d4237830e415ac2ad800c98ae8beb 100644
--- a/tensorflow/python/training/server_lib_test.py
+++ b/tensorflow/python/training/server_lib_test.py
@@ -421,6 +421,17 @@ class ServerDefTest(test.TestCase):
 
 class ClusterSpecTest(test.TestCase):
 
+  def testStringConversion(self):
+    cluster_spec = server_lib.ClusterSpec({
+        "ps": ["ps0:1111"],
+        "worker": ["worker0:3333", "worker1:4444"]
+    })
+
+    expected_str = (
+        "ClusterSpec({'ps': ['ps0:1111'], 'worker': ['worker0:3333', "
+        "'worker1:4444']})")
+    self.assertEqual(expected_str, str(cluster_spec))
+
   def testProtoDictDefEquivalences(self):
     cluster_spec = server_lib.ClusterSpec({
         "ps": ["ps0:2222", "ps1:2222"],
diff --git a/tensorflow/python/training/session_manager.py b/tensorflow/python/training/session_manager.py
index a13b6dd976a835d14c03ed90f40b172e0bcbfd07..b396a1e7d0a06ec7b952ba2980e081e01e681d4d 100644
--- a/tensorflow/python/training/session_manager.py
+++ b/tensorflow/python/training/session_manager.py
@@ -480,7 +480,9 @@ class SessionManager(object):
     if self._local_init_op is not None:
       is_ready_for_local_init, msg = self._model_ready_for_local_init(sess)
       if is_ready_for_local_init:
+        logging.info("Running local_init_op.")
         sess.run(self._local_init_op)
+        logging.info("Done running local_init_op.")
         return True, None
       else:
         return False, msg
diff --git a/tensorflow/python/training/session_manager_test.py b/tensorflow/python/training/session_manager_test.py
index 5879fd330adec58dde45f3da8ae16c9a297f3b24..6670d9365f2994a70b7228170179f97d314041c9 100644
--- a/tensorflow/python/training/session_manager_test.py
+++ b/tensorflow/python/training/session_manager_test.py
@@ -26,6 +26,7 @@ from tensorflow.python.framework import errors
 from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import gfile
 from tensorflow.python.platform import test
@@ -504,6 +505,7 @@ class SessionManagerTest(test.TestCase):
           trainable=False,
           collections=[ops.GraphKeys.LOCAL_VARIABLES],
           name="x")
+      # TODO(b/70206927): Use ResourceVariables once they are handled properly.
       v_res = variables.Variable(1, name="v_res")
       w_res = variables.Variable(
           v_res,
@@ -556,6 +558,24 @@ class SessionManagerTest(test.TestCase):
       self.assertEquals(1, sess.run(w_res))
       self.assertEquals(3, sess.run(x_res))
 
+  def testPrepareSessionWithCyclicInitializer(self):
+    # Regression test. Previously Variable._build_initializer_expr would enter
+    # into an infinite recursion when the variable's initial_value involved
+    # cyclic dependencies.
+    with ops.Graph().as_default():
+      i = control_flow_ops.while_loop(lambda i: i < 1, lambda i: i + 1, [0])
+      v = variables.Variable(array_ops.identity(i), name="v")
+      with self.test_session():
+        self.assertEqual(False, variables.is_variable_initialized(v).eval())
+      sm = session_manager.SessionManager(
+          ready_op=variables.report_uninitialized_variables())
+      sess = sm.prepare_session("", init_op=v.initializer)
+      self.assertEqual(1, sess.run(v))
+      self.assertEqual(
+          True,
+          variables.is_variable_initialized(
+              sess.graph.get_tensor_by_name("v:0")).eval(session=sess))
+
   def testPrepareSessionDidNotInitLocalVariable(self):
     with ops.Graph().as_default():
       v = variables.Variable(1, name="v")
diff --git a/tensorflow/python/training/slot_creator_test.py b/tensorflow/python/training/slot_creator_test.py
index 08a3c8dc53a5e88559ddeaf1f95d441fa5adfd29..b0f48e4ecd4d41946a8a5ed5a0c507a2344a943a 100644
--- a/tensorflow/python/training/slot_creator_test.py
+++ b/tensorflow/python/training/slot_creator_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variable_scope
@@ -29,6 +30,7 @@ from tensorflow.python.platform import test
 from tensorflow.python.training import slot_creator
 
 
+@test_util.with_c_api
 class SlotCreatorTest(test.TestCase):
 
   def testCreateSlotFromVariable(self):
diff --git a/tensorflow/python/training/supervisor.py b/tensorflow/python/training/supervisor.py
index a634a842b67033d5fde6bf8cf819f681e892a247..e4514aaea223b6b254a7a72e11e6b70b576fd54b 100644
--- a/tensorflow/python/training/supervisor.py
+++ b/tensorflow/python/training/supervisor.py
@@ -36,11 +36,15 @@ from tensorflow.python.training import coordinator
 from tensorflow.python.training import saver as saver_mod
 from tensorflow.python.training import session_manager as session_manager_mod
 from tensorflow.python.training import training_util
+from tensorflow.python.util import deprecation
 
 
 class Supervisor(object):
   """A training helper that checkpoints models and computes summaries.
 
+  This class is deprecated. Please use
+  ${tf.train.MonitoredTrainingSession} instead.
+
   The Supervisor is a small wrapper around a `Coordinator`, a `Saver`,
   and a `SessionManager` that takes care of common needs of TensorFlow
   training programs.
@@ -198,6 +202,8 @@ class Supervisor(object):
   # the default behavior should be used.
   USE_DEFAULT = 0
 
+  @deprecation.deprecated(None,
+                          "Please switch to tf.train.MonitoredTrainingSession")
   def __init__(self,
                graph=None,
                ready_op=USE_DEFAULT,
diff --git a/tensorflow/python/training/sync_replicas_optimizer.py b/tensorflow/python/training/sync_replicas_optimizer.py
index b52d101a2154f02273a7a24f4ed3ef55209da20c..47702fdad05d13015e0cbf7768129b0c53b6c14c 100644
--- a/tensorflow/python/training/sync_replicas_optimizer.py
+++ b/tensorflow/python/training/sync_replicas_optimizer.py
@@ -449,7 +449,7 @@ class _SyncReplicasOptimizerHook(session_run_hook.SessionRunHook):
   """A SessionRunHook handles ops related to SyncReplicasOptimizer."""
 
   def __init__(self, sync_optimizer, is_chief, num_tokens):
-    """Creates hook to handle SyncReplicaOptimizer initialization ops.
+    """Creates hook to handle SyncReplicasOptimizer initialization ops.
 
     Args:
       sync_optimizer: `SyncReplicasOptimizer` which this hook will initialize.
diff --git a/tensorflow/python/training/training.py b/tensorflow/python/training/training.py
index fa02ad84cce3ccaa391571df3a2de4b65b255c84..03811fa38dd021fd5ff222bfbe32234606d6c681 100644
--- a/tensorflow/python/training/training.py
+++ b/tensorflow/python/training/training.py
@@ -38,6 +38,7 @@ See the @{$python/train} guide.
 @@clip_by_global_norm
 @@global_norm
 @@cosine_decay
+@@cosine_decay_restarts
 @@linear_cosine_decay
 @@noisy_linear_cosine_decay
 @@exponential_decay
diff --git a/tensorflow/python/util/nest.py b/tensorflow/python/util/nest.py
index f5802d9359128b5ecc9b9506ee9a9a21cfc19ef7..4ce871de72fb43420e25bfa7cd13002b09f83f18 100644
--- a/tensorflow/python/util/nest.py
+++ b/tensorflow/python/util/nest.py
@@ -47,6 +47,15 @@ def _sorted(dict_):
     raise TypeError("nest only supports dicts with sortable keys.")
 
 
+def _is_namedtuple(instance):
+  """Returns True iff `instance` is a `namedtuple`."""
+  return (
+      isinstance(instance, tuple) and
+      hasattr(instance, "_fields") and
+      isinstance(instance._fields, _collections.Sequence) and
+      all(isinstance(f, _six.string_types) for f in instance._fields))
+
+
 def _sequence_like(instance, args):
   """Converts the sequence `args` to the same type as `instance`.
 
@@ -66,11 +75,7 @@ def _sequence_like(instance, args):
     # corresponding `OrderedDict` to pack it back).
     result = dict(zip(_sorted(instance), args))
     return type(instance)((key, result[key]) for key in _six.iterkeys(instance))
-  elif (isinstance(instance, tuple) and
-        hasattr(instance, "_fields") and
-        isinstance(instance._fields, _collections.Sequence) and
-        all(isinstance(f, _six.string_types) for f in instance._fields)):
-    # This is a namedtuple
+  elif _is_namedtuple(instance):
     return type(instance)(*args)
   else:
     # Not a namedtuple
@@ -456,9 +461,9 @@ def assert_shallow_structure(shallow_tree, input_tree, check_types=True):
       if set(input_tree) != set(shallow_tree):
         raise ValueError(
             "The two structures don't have the same keys. Input "
-            "structure has keys %s, while shallow structure has keys %s."
-            % (list(_six.iterkeys(input_tree)),
-               list(_six.iterkeys(shallow_tree))))
+            "structure has keys %s, while shallow structure has keys %s." %
+            (list(_six.iterkeys(input_tree)),
+             list(_six.iterkeys(shallow_tree))))
 
       input_tree = list(_six.iteritems(input_tree))
       shallow_tree = list(_six.iteritems(shallow_tree))
@@ -677,6 +682,85 @@ def get_traverse_shallow_structure(traverse_fn, structure):
   return _sequence_like(structure, level_traverse)
 
 
+def yield_flat_paths(nest):
+  """Yields paths for some nested structure.
+
+  Paths are lists of objects which can be str-converted, which may include
+  integers or other types which are used as indices in a dict.
+
+  The flat list will be in the corresponding order as if you called
+  `snt.nest.flatten` on the structure. This is handy for naming Tensors such
+  the TF scope structure matches the tuple structure.
+
+  E.g. if we have a tuple `value = Foo(a=3, b=Bar(c=23, d=42))`
+
+  ```shell
+  >>> nest.flatten(value)
+  [3, 23, 42]
+  >>> list(nest.yield_flat_paths(value))
+  [('a',), ('b', 'c'), ('b', 'd')]
+  ```
+
+  ```shell
+  >>> list(nest.yield_flat_paths({'a': [3]}))
+  [('a', 0)]
+  >>> list(nest.yield_flat_paths({'a': 3}))
+  [('a',)]
+  ```
+
+  Args:
+    nest: the value to produce a flattened paths list for.
+
+  Yields:
+    Tuples containing index or key values which form the path to a specific
+      leaf value in the nested structure.
+  """
+
+  # The _maybe_add_final_path_element function is used below in order to avoid
+  # adding trailing slashes when the sub-element recursed into is a leaf.
+  if isinstance(nest, dict):
+    for key in _sorted(nest):
+      value = nest[key]
+      for sub_path in yield_flat_paths(value):
+        yield (key,) + sub_path
+  elif _is_namedtuple(nest):
+    for key in nest._fields:
+      value = getattr(nest, key)
+      for sub_path in yield_flat_paths(value):
+        yield (key,) + sub_path
+  elif isinstance(nest, _six.string_types):
+    yield ()
+  elif isinstance(nest, _collections.Sequence):
+    for idx, value in enumerate(nest):
+      for sub_path in yield_flat_paths(value):
+        yield (idx,) + sub_path
+  else:
+    yield ()
+
+
+def flatten_with_joined_string_paths(structure, separator="/"):
+  """Returns a list of (string path, data element) tuples.
+
+  The order of tuples produced matches that of `nest.flatten`. This allows you
+  to flatten a nested structure while keeping information about where in the
+  structure each data element was located. See `nest.yield_flat_paths`
+  for more information.
+
+  Args:
+    structure: the nested structure to flatten.
+    separator: string to separate levels of hierarchy in the results, defaults
+      to '/'.
+
+  Returns:
+    A list of (string, data element) tuples.
+  """
+  flat_paths = yield_flat_paths(structure)
+  def stringify_and_join(path_elements):
+    return separator.join(str(path_element) for path_element in path_elements)
+  flat_string_paths = [stringify_and_join(path) for path in flat_paths]
+  return list(zip(flat_string_paths, flatten(structure)))
+
+
 _pywrap_tensorflow.RegisterSequenceClass(_collections.Sequence)
 
 
@@ -691,6 +775,8 @@ _allowed_symbols = [
     "flatten_up_to",
     "map_structure_up_to",
     "get_traverse_shallow_structure",
+    "yield_flat_paths",
+    "flatten_with_joined_string_paths",
 ]
 
 remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/python/util/nest_test.py b/tensorflow/python/util/nest_test.py
index 26aeaeec19b334b466f185fe765974fca61ae3b8..4906649f013da38f6b18f1645958aa4b244a9d05 100644
--- a/tensorflow/python/util/nest_test.py
+++ b/tensorflow/python/util/nest_test.py
@@ -388,8 +388,9 @@ class NestTest(test.TestCase):
     inp_ab1 = {"a": (1, 1), "b": {"c": (2, 2)}}
     inp_ab2 = {"a": (1, 1), "b": {"d": (2, 2)}}
     expected_message = (
-        "The two structures don't have the same keys. Input "
-        "structure has keys \['c'\], while shallow structure has keys \['d'\].")
+        r"The two structures don't have the same keys. Input "
+        r"structure has keys \['c'\], while shallow structure has "
+        r"keys \['d'\].")
 
     with self.assertRaisesRegexp(ValueError, expected_message):
       nest.assert_shallow_structure(inp_ab2, inp_ab1)
@@ -438,8 +439,7 @@ class NestTest(test.TestCase):
     input_tree_flattened_as_shallow_tree = nest.flatten_up_to(shallow_tree,
                                                               input_tree)
     self.assertEqual(input_tree_flattened_as_shallow_tree, [0, 1, 2, 3, 4])
-    shallow_tree = collections.OrderedDict([("a", 0),
-                                            ("c", {"d": 3, "e": 1})])
+    shallow_tree = collections.OrderedDict([("a", 0), ("c", {"d": 3, "e": 1})])
     input_tree_flattened_as_shallow_tree = nest.flatten_up_to(shallow_tree,
                                                               input_tree)
     self.assertEqual(input_tree_flattened_as_shallow_tree,
@@ -584,6 +584,59 @@ class NestTest(test.TestCase):
         TypeError, "didn't return a depth=1 structure of bools"):
       nest.get_traverse_shallow_structure(lambda _: [1], [1])
 
+  def testYieldFlatStringPaths(self):
+    for inputs_expected in ({"inputs": [], "expected": []},
+                            {"inputs": 3, "expected": [()]},
+                            {"inputs": [3], "expected": [(0,)]},
+                            {"inputs": {"a": 3}, "expected": [("a",)]},
+                            {"inputs": {"a": {"b": 4}},
+                             "expected": [("a", "b")]},
+                            {"inputs": [{"a": 2}], "expected": [(0, "a")]},
+                            {"inputs": [{"a": [2]}], "expected": [(0, "a", 0)]},
+                            {"inputs": [{"a": [(23, 42)]}],
+                             "expected": [(0, "a", 0, 0), (0, "a", 0, 1)]},
+                            {"inputs": [{"a": ([23], 42)}],
+                             "expected": [(0, "a", 0, 0), (0, "a", 1)]},
+                            {"inputs": {"a": {"a": 2}, "c": [[[4]]]},
+                             "expected": [("a", "a"), ("c", 0, 0, 0)]},
+                            {"inputs": {"0": [{"1": 23}]},
+                             "expected": [("0", 0, "1")]}):
+      inputs = inputs_expected["inputs"]
+      expected = inputs_expected["expected"]
+      self.assertEqual(list(nest.yield_flat_paths(inputs)), expected)
+
+  def testFlattenWithStringPaths(self):
+    for inputs_expected in (
+        {"inputs": [], "expected": []},
+        {"inputs": [23, "42"], "expected": [("0", 23), ("1", "42")]},
+        {"inputs": [[[[108]]]], "expected": [("0/0/0/0", 108)]}):
+      inputs = inputs_expected["inputs"]
+      expected = inputs_expected["expected"]
+      self.assertEqual(
+          nest.flatten_with_joined_string_paths(inputs, separator="/"),
+          expected)
+
+  # Need a separate test for namedtuple as we can't declare tuple definitions
+  # in the @parameterized arguments.
+  def testFlattenNamedTuple(self):
+    # pylint: disable=invalid-name
+    Foo = collections.namedtuple("Foo", ["a", "b"])
+    Bar = collections.namedtuple("Bar", ["c", "d"])
+    # pylint: enable=invalid-name
+    test_cases = [
+        (Foo(a=3, b=Bar(c=23, d=42)),
+         [("a", 3), ("b/c", 23), ("b/d", 42)]),
+        (Foo(a=Bar(c=23, d=42), b=Bar(c=0, d="something")),
+         [("a/c", 23), ("a/d", 42), ("b/c", 0), ("b/d", "something")]),
+        (Bar(c=42, d=43),
+         [("c", 42), ("d", 43)]),
+        (Bar(c=[42], d=43),
+         [("c/0", 42), ("d", 43)]),
+    ]
+    for inputs, expected in test_cases:
+      self.assertEqual(
+          list(nest.flatten_with_joined_string_paths(inputs)), expected)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/util/py_checkpoint_reader.i b/tensorflow/python/util/py_checkpoint_reader.i
index 0cd095d9d947f5cf76adaf83dc16272c4374573e..8004898cbcbce7ce593ce35efdc6493e052468bd 100644
--- a/tensorflow/python/util/py_checkpoint_reader.i
+++ b/tensorflow/python/util/py_checkpoint_reader.i
@@ -164,6 +164,8 @@ def NewCheckpointReader(filepattern):
   with errors.raise_exception_on_not_ok_status() as status:
     from tensorflow.python.util import compat
     return CheckpointReader(compat.as_bytes(filepattern), status)
+
+NewCheckpointReader._tf_api_names = ['train.NewCheckpointReader']
 %}
 
 %include "tensorflow/c/checkpoint_reader.h"
diff --git a/tensorflow/python/util/stat_summarizer.i b/tensorflow/python/util/stat_summarizer.i
index 80739195872a056e7a5443dfb81ab1440300dbff..6aeaa0e31b9b48f7e6705ab7146828cc0e0e5e08 100644
--- a/tensorflow/python/util/stat_summarizer.i
+++ b/tensorflow/python/util/stat_summarizer.i
@@ -27,8 +27,8 @@ limitations under the License.
 
 %ignoreall
 
-%unignore NewStatSummarizer;
-%unignore DeleteStatSummarizer;
+%unignore _NewStatSummarizer;
+%unignore _DeleteStatSummarizer;
 %unignore tensorflow;
 %unignore tensorflow::StatSummarizer;
 %unignore tensorflow::StatSummarizer::StatSummarizer;
@@ -43,21 +43,20 @@ limitations under the License.
 
 // TODO(ashankar): Remove the unused argument from the API.
 %{
-tensorflow::StatSummarizer* NewStatSummarizer(
+tensorflow::StatSummarizer* _NewStatSummarizer(
       const string& unused) {
   return new tensorflow::StatSummarizer(tensorflow::StatSummarizerOptions());
 }
 %}
 
-
 %{
-void DeleteStatSummarizer(tensorflow::StatSummarizer* ss) {
+void _DeleteStatSummarizer(tensorflow::StatSummarizer* ss) {
   delete ss;
 }
 %}
 
-tensorflow::StatSummarizer* NewStatSummarizer(const string& unused);
-void DeleteStatSummarizer(tensorflow::StatSummarizer* ss);
+tensorflow::StatSummarizer* _NewStatSummarizer(const string& unused);
+void _DeleteStatSummarizer(tensorflow::StatSummarizer* ss);
 
 %extend tensorflow::StatSummarizer {
   void ProcessStepStatsStr(const string& step_stats_str) {
@@ -77,3 +76,21 @@ void DeleteStatSummarizer(tensorflow::StatSummarizer* ss);
 
 %include "tensorflow/core/util/stat_summarizer.h"
 %unignoreall
+
+%insert("python") %{
+
+# Wrapping NewStatSummarizer and DeletStatSummarizer because
+# SWIG-generated functions are built-in functions and do not support
+# setting _tf_api_names attribute.
+
+def NewStatSummarizer(unused):
+  return _NewStatSummarizer(unused)
+
+def DeleteStatSummarizer(stat_summarizer):
+  _DeleteStatSummarizer(stat_summarizer)
+
+NewStatSummarizer._tf_api_names = ["contrib.stat_summarizer.NewStatSummarizer"]
+DeleteStatSummarizer._tf_api_names = [
+    "contrib.stat_summarizer.DeleteStatSummarizer"]
+StatSummarizer._tf_api_names = ["contrib.stat_summarizer.StatSummarizer"]
+%}
diff --git a/tensorflow/python/util/tf_decorator.py b/tensorflow/python/util/tf_decorator.py
index 780fcba64f934e25ffc9cd24f57369de758d5e45..3d837a40449ece056c154e1b09636a8885047035 100644
--- a/tensorflow/python/util/tf_decorator.py
+++ b/tensorflow/python/util/tf_decorator.py
@@ -89,9 +89,14 @@ def make_decorator(target,
   decorator = TFDecorator(decorator_name, target, decorator_doc,
                           decorator_argspec)
   setattr(decorator_func, '_tf_decorator', decorator)
-  decorator_func.__name__ = target.__name__
-  decorator_func.__module__ = target.__module__
-  decorator_func.__doc__ = decorator.__doc__
+  # Objects that are callables (e.g., a functools.partial object) may not have
+  # the following attributes.
+  if hasattr(target, '__name__'):
+    decorator_func.__name__ = target.__name__
+  if hasattr(target, '__module__'):
+    decorator_func.__module__ = target.__module__
+  if hasattr(target, '__doc__'):
+    decorator_func.__doc__ = decorator.__doc__
   decorator_func.__wrapped__ = target
   return decorator_func
 
@@ -139,10 +144,11 @@ class TFDecorator(object):
     self._decorator_name = decorator_name
     self._decorator_doc = decorator_doc
     self._decorator_argspec = decorator_argspec
-    self.__name__ = target.__name__
+    if hasattr(target, '__name__'):
+      self.__name__ = target.__name__
     if self._decorator_doc:
       self.__doc__ = self._decorator_doc
-    elif target.__doc__:
+    elif hasattr(target, '__doc__') and target.__doc__:
       self.__doc__ = target.__doc__
     else:
       self.__doc__ = ''
diff --git a/tensorflow/python/util/tf_decorator_test.py b/tensorflow/python/util/tf_decorator_test.py
index 3f6a10b44081db2f5ce0d8ffb0333cd3c76fc269..0f9712c987d442358ecb4f81f46ef0898e380b01 100644
--- a/tensorflow/python/util/tf_decorator_test.py
+++ b/tensorflow/python/util/tf_decorator_test.py
@@ -19,6 +19,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import functools
+
 from tensorflow.python.platform import test
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import tf_decorator
@@ -195,6 +197,23 @@ class TfMakeDecoratorTest(test.TestCase):
     decorator = getattr(decorated, '_tf_decorator')
     self.assertEqual('test_decorator_name', decorator.decorator_name)
 
+  def testCompatibleWithNamelessCallables(self):
+
+    class Callable(object):
+
+      def __call__(self):
+        pass
+
+    callable_object = Callable()
+    # Smoke test: This should not raise an exception, even though
+    # `callable_object` does not have a `__name__` attribute.
+    _ = tf_decorator.make_decorator(callable_object, test_wrapper)
+
+    partial = functools.partial(test_function, x=1)
+    # Smoke test: This should not raise an exception, even though `partial` does
+    # not have `__name__`, `__module__`, and `__doc__` attributes.
+    _ = tf_decorator.make_decorator(partial, test_wrapper)
+
 
 class TfDecoratorUnwrapTest(test.TestCase):
 
diff --git a/tensorflow/python/util/tf_inspect.py b/tensorflow/python/util/tf_inspect.py
index 9ed125704b1cf2ced585db0b169a184d27e1ad72..c4168f7b1ac80976a957e96c79c72fe3b288d622 100644
--- a/tensorflow/python/util/tf_inspect.py
+++ b/tensorflow/python/util/tf_inspect.py
@@ -45,6 +45,26 @@ def getargspec(object):  # pylint: disable=redefined-builtin
                if d.decorator_argspec is not None), _inspect.getargspec(target))
 
 
+def getfullargspec(obj):  # pylint: disable=redefined-builtin
+  """TFDecorator-aware replacement for inspect.getfullargspec and fallback to
+  inspect.getargspec in Python 2.
+
+  Args:
+    obj: A callable, possibly decorated.
+
+  Returns:
+    The `FullArgSpec` (`ArgSpec` in Python 2) that describes the signature of
+    the outermost decorator that changes the callable's signature. If the
+    callable is not decorated, `inspect.getfullargspec()`
+    (`inspect.getargspec()` in Python 2) will be called directly on the
+    callable.
+  """
+  spec_fn = getattr(_inspect, 'getfullargspec', getattr(_inspect, 'getargspec'))
+  decorators, target = tf_decorator.unwrap(obj)
+  return next((d.decorator_argspec for d in decorators
+               if d.decorator_argspec is not None), spec_fn(target))
+
+
 def getcallargs(func, *positional, **named):
   """TFDecorator-aware replacement for inspect.getcallargs.
 
@@ -97,7 +117,16 @@ def getdoc(object):  # pylint: disable=redefined-builtin
 
 def getfile(object):  # pylint: disable=redefined-builtin
   """TFDecorator-aware replacement for inspect.getfile."""
-  return _inspect.getfile(tf_decorator.unwrap(object)[1])
+  unwrapped_object = tf_decorator.unwrap(object)[1]
+
+  # Work around for the case when object is a stack frame
+  # and only .pyc files are used. In this case, getfile
+  # might return incorrect path. So, we get the path from f_globals
+  # instead.
+  if (hasattr(unwrapped_object, 'f_globals') and
+      '__file__' in unwrapped_object.f_globals):
+    return unwrapped_object.f_globals['__file__']
+  return _inspect.getfile(unwrapped_object)
 
 
 def getmembers(object, predicate=None):  # pylint: disable=redefined-builtin
diff --git a/tensorflow/python/util/tfprof.i b/tensorflow/python/util/tfprof.i
index 8d11cdfd58b47a5cdd6ec5c65b30df5621e59768..06f12631fa7ef04b24d469be00ba181ed9ac4e13 100644
--- a/tensorflow/python/util/tfprof.i
+++ b/tensorflow/python/util/tfprof.i
@@ -47,6 +47,7 @@ using tensorflow::int64;
 %unignore tensorflow::tfprof::ProfilerFromFile;
 %unignore tensorflow::tfprof::DeleteProfiler;
 %unignore tensorflow::tfprof::AddStep;
+%unignore tensorflow::tfprof::SerializeToString;
 %unignore tensorflow::tfprof::WriteProfile;
 %unignore tensorflow::tfprof::Profile;
 
diff --git a/tensorflow/python/util/util.cc b/tensorflow/python/util/util.cc
index c3d7611ad43b05f510481925fbfe1f930cf95ff8..a41fa7df253bcf4bce280574b89ed0dda8330521 100644
--- a/tensorflow/python/util/util.cc
+++ b/tensorflow/python/util/util.cc
@@ -29,7 +29,7 @@ bool WarnedThatSetIsNotSequence = false;
 
 // Returns 1 if `o` is considered a sequence for the purposes of Flatten().
 // Returns 0 otherwise.
-// Returns -1 if an error occured.
+// Returns -1 if an error occurred.
 int IsSequenceHelper(PyObject* o) {
   if (PyDict_Check(o)) return true;
   if (PySet_Check(o) && !WarnedThatSetIsNotSequence) {
diff --git a/tensorflow/stream_executor/blas.h b/tensorflow/stream_executor/blas.h
index eb1b19c5d963d56c6175251a54e2ab5072a01760..072f08554688276a05d9be85718de8750bd874c2 100644
--- a/tensorflow/stream_executor/blas.h
+++ b/tensorflow/stream_executor/blas.h
@@ -30,8 +30,8 @@ limitations under the License.
 //  Stream stream{stream_exec};
 //  stream
 //    .Init()
-//    .ThenBlasAxpy(1024, 5.5, x, 1, &y, 1)
-//    .BlockHostUntilDone();
+//    .ThenBlasAxpy(1024, 5.5, x, 1, &y, 1);
+//  SE_CHECK_OK(stream.BlockHostUntilDone());
 //
 // By using stream operations in this manner the user can easily intermix custom
 // kernel launches (via StreamExecutor::ThenLaunch()) with these pre-canned BLAS
diff --git a/tensorflow/stream_executor/cuda/cuda_blas.cc b/tensorflow/stream_executor/cuda/cuda_blas.cc
index cb2b06d47cd8ccf82e9df81d63049915b9b47582..44a3a745ad86dc24f632e4a36691fba06171c9fb 100644
--- a/tensorflow/stream_executor/cuda/cuda_blas.cc
+++ b/tensorflow/stream_executor/cuda/cuda_blas.cc
@@ -36,6 +36,7 @@ limitations under the License.
 #include <assert.h>
 #include <complex>
 
+#include "tensorflow/core/util/env_var.h"
 #include "tensorflow/stream_executor/cuda/cuda_activation.h"
 #include "tensorflow/stream_executor/cuda/cuda_gpu_executor.h"
 #include "tensorflow/stream_executor/cuda/cuda_helpers.h"
@@ -268,6 +269,11 @@ PERFTOOLS_GPUTOOLS_CUBLAS_WRAP(cublasSgemmEx)
 PERFTOOLS_GPUTOOLS_CUBLAS_WRAP(cublasGemmEx)
 #endif
 
+#if CUDA_VERSION >= 9000
+PERFTOOLS_GPUTOOLS_CUBLAS_WRAP(cublasGetMathMode)
+PERFTOOLS_GPUTOOLS_CUBLAS_WRAP(cublasSetMathMode)
+#endif
+
 }  // namespace wrap
 
 static string ToString(cublasStatus_t status) {
@@ -299,6 +305,18 @@ static string ToString(cublasStatus_t status) {
   }
 }
 
+// Decide whether to enable TENSOR_OP_MATH
+static bool TensorOpMathEnabled() {
+  static bool is_enabled = [] {
+    bool is_disabled;
+    TF_CHECK_OK(
+        tensorflow::ReadBoolFromEnvVar("TF_DISABLE_CUBLAS_TENSOR_OP_MATH",
+                                       /*default_val=*/false, &is_disabled));
+    return !is_disabled;
+  }();
+  return is_enabled;
+}
+
 // cuBLAS has interfaces that permit pointers to be passed from either the host
 // memory space or the device memory space; however, you must instruct it as to
 // which address space those pointers are in with cublasSetPointerMode.
@@ -360,6 +378,65 @@ class ScopedCublasPointerMode {
   bool ok_;                       // Whether the change was successful.
 };
 
+#if CUDA_VERSION >= 9000
+// cuBLAS has interfaces that permit computations to use the Volta hardware.
+// This must be enabled via the cublasGet/SetMathMode APIs.
+//
+// This helper sets the cuBLAS math mode to a desired value for a cuBLAS call
+// you are about to perform in a given scope.
+//
+// The prior cuBLAS math mode is retained and restored when this object goes
+// out of scope.
+class ScopedCublasMathMode {
+ public:
+  // Note that, because the setting of the cublas math mode is fallible,
+  // construction of this scoped datatype must be paired with a call to
+  // Init().
+  //
+  // Parameters:
+  //  handle: The cublas library handle to act upon in setting the math mode.
+  explicit ScopedCublasMathMode(CUDAExecutor *parent, cublasHandle_t handle)
+      : parent_(parent), handle_(handle), ok_(false) {}
+
+  // Attempts the switch to the requested scoped math mode, new_mode.
+  //
+  // Note that when false is returned, an appropriate error has already been
+  // logged.
+  bool Init(cublasMath_t new_mode) {
+    cublasStatus_t ret = wrap::cublasGetMathMode(parent_, handle_, &old_mode_);
+    if (ret != CUBLAS_STATUS_SUCCESS) {
+      LOG(ERROR) << "failed to get old cublas math mode: " << ToString(ret);
+      return ok_ = false;
+    }
+
+    ret = wrap::cublasSetMathMode(parent_, handle_, new_mode);
+    if (ret != CUBLAS_STATUS_SUCCESS) {
+      LOG(ERROR) << "failed to set new cublas math mode: " << ToString(ret);
+      return ok_ = false;
+    }
+    return ok_ = true;
+  }
+
+  // Switches back to the prior math mode, if the switch operation was
+  // successful in the first place.
+  ~ScopedCublasMathMode() {
+    if (ok_) {
+      cublasStatus_t ret = wrap::cublasSetMathMode(parent_, handle_, old_mode_);
+      if (ret != CUBLAS_STATUS_SUCCESS) {
+        LOG(ERROR) << "failed to set former cublas math mode: "
+                   << ToString(ret);
+      }
+    }
+  }
+
+ private:
+  CUDAExecutor *parent_;   // Executor establishing this math mode for.
+  cublasHandle_t handle_;  // Handle to the cuBLAS instance of interest.
+  cublasMath_t old_mode_;  // Prior cuBLAS math mode, to be restored.
+  bool ok_;                // Whether the change was successful.
+};
+#endif  // CUDA_VERSION >= 9000
+
 bool CUDABlas::Init() {
   cublasStatus_t ret = wrap::cublasCreate(parent_, &blas_);
   if (ret != CUBLAS_STATUS_SUCCESS) {
@@ -532,7 +609,7 @@ cudaDataType_t CUDAComputationType(blas::ComputationType ty) {
 template <typename FuncT, typename... Args>
 bool CUDABlas::DoBlasInternalImpl(FuncT cublas_func, Stream *stream,
                                   bool pointer_mode_host, bool err_on_failure,
-                                  Args... args) {
+                                  bool use_tensor_op_math, Args... args) {
   mutex_lock lock{mu_};
 
   CHECK(blas_ != nullptr);
@@ -545,7 +622,14 @@ bool CUDABlas::DoBlasInternalImpl(FuncT cublas_func, Stream *stream,
                                            : CUBLAS_POINTER_MODE_DEVICE)) {
     return false;
   }
-
+#if CUDA_VERSION >= 9000
+  ScopedCublasMathMode math_mode{parent_, blas_};
+  if (use_tensor_op_math) {
+    if (!math_mode.Init(CUBLAS_TENSOR_OP_MATH)) {
+      return false;
+    }
+  }
+#endif
   cublasStatus_t ret = cublas_func(parent_, blas_, args...);
   if (err_on_failure && ret != CUBLAS_STATUS_SUCCESS) {
     LOG(ERROR) << "failed to run cuBLAS routine " << cublas_func.kName << ": "
@@ -1762,14 +1846,26 @@ bool CUDABlas::DoBlasGemm(
                       "precondition violation";
     }
   }
-  // TODO(sesse): Consider supporting the Hgemm interface, which uses half
-  // calculations internally (faster on newer devices, such as Pascal and TX1,
-  // but less precise).
-  return DoBlasInternal(
+
+  bool use_tensor_ops = false;
+#if CUDA_VERSION >= 9000
+  int cc_major, cc_minor;
+  stream->parent()->GetDeviceDescription().cuda_compute_capability(&cc_major,
+                                                                   &cc_minor);
+
+  // GPUs < sm_70 don't support Volta hardware.
+  if (cc_major >= 7 && TensorOpMathEnabled()) {
+    use_tensor_ops = true;
+  }
+#endif
+
+  return DoBlasInternalImpl(
       wrap::cublasSgemmEx, stream, true /* = pointer_mode_host */,
-      CUDABlasTranspose(transa), CUDABlasTranspose(transb), m, n, k, &alpha,
-      CUDAMemory(a), SE_CUDA_DATA_HALF, lda, CUDAMemory(b), SE_CUDA_DATA_HALF,
-      ldb, &beta, CUDAMemoryMutable(c), SE_CUDA_DATA_HALF, ldc);
+      true /* = err_on_failure= */, use_tensor_ops, CUDABlasTranspose(transa),
+      CUDABlasTranspose(transb), m, n, k, &alpha, CUDAMemory(a),
+      SE_CUDA_DATA_HALF, lda, CUDAMemory(b), SE_CUDA_DATA_HALF, ldb, &beta,
+      CUDAMemoryMutable(c), SE_CUDA_DATA_HALF, ldc);
+
 #else
   LOG(ERROR) << "fp16 sgemm is not implemented in this cuBLAS version "
              << "(need at least CUDA 7.5)";
@@ -2031,6 +2127,26 @@ bool CUDABlas::DoBlasGemmWithProfilingImpl(
   return result;
 }
 
+static bool UsesTensorOps(blas::AlgorithmType algo) {
+#if CUDA_VERSION >= 9000
+  cublasGemmAlgo_t cublas_algo = static_cast<cublasGemmAlgo_t>(algo);
+  return cublas_algo >= CUBLAS_GEMM_DEFAULT_TENSOR_OP;
+#else
+  return false;
+#endif
+}
+
+template <typename InType>
+static bool TensorOpsAvailable(int cc_major) {
+#if CUDA_VERSION >= 9000
+  if (cc_major >= 7 && TensorOpMathEnabled() &&
+      std::is_same<InType, Eigen::half>::value) {
+    return true;
+  }
+#endif
+  return false;
+}
+
 template <typename InT, typename OutT, typename CompT>
 bool CUDABlas::DoBlasGemmWithAlgorithmImpl(
     Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
@@ -2049,6 +2165,10 @@ bool CUDABlas::DoBlasGemmWithAlgorithmImpl(
     return false;
   }
 
+  if (UsesTensorOps(algorithm) && !TensorOpsAvailable<InT>(cc_major)) {
+    return false;
+  }
+
   struct TimerDeleter {
     void operator()(CUDATimer *t) {
       t->Destroy();
@@ -2098,10 +2218,19 @@ bool CUDABlas::GetBlasGemmAlgorithms(
 // still return the out_algorithms. Caller needs to make sure that in this case,
 // the returned vector is empty.
 #if CUDA_VERSION >= 8000
-  for (cublasGemmAlgo_t algo :
-       {CUBLAS_GEMM_DFALT, CUBLAS_GEMM_ALGO0, CUBLAS_GEMM_ALGO1,
-        CUBLAS_GEMM_ALGO2, CUBLAS_GEMM_ALGO3, CUBLAS_GEMM_ALGO4,
-        CUBLAS_GEMM_ALGO5, CUBLAS_GEMM_ALGO6, CUBLAS_GEMM_ALGO7}) {
+  for (cublasGemmAlgo_t algo : {
+         CUBLAS_GEMM_DFALT, CUBLAS_GEMM_ALGO0, CUBLAS_GEMM_ALGO1,
+             CUBLAS_GEMM_ALGO2, CUBLAS_GEMM_ALGO3, CUBLAS_GEMM_ALGO4,
+             CUBLAS_GEMM_ALGO5, CUBLAS_GEMM_ALGO6, CUBLAS_GEMM_ALGO7,
+#if CUDA_VERSION >= 9000
+             CUBLAS_GEMM_ALGO8, CUBLAS_GEMM_ALGO9, CUBLAS_GEMM_ALGO10,
+             CUBLAS_GEMM_ALGO11, CUBLAS_GEMM_ALGO12, CUBLAS_GEMM_ALGO13,
+             CUBLAS_GEMM_ALGO14, CUBLAS_GEMM_ALGO15, CUBLAS_GEMM_ALGO16,
+             CUBLAS_GEMM_ALGO17, CUBLAS_GEMM_DFALT_TENSOR_OP,
+             CUBLAS_GEMM_ALGO0_TENSOR_OP, CUBLAS_GEMM_ALGO1_TENSOR_OP,
+             CUBLAS_GEMM_ALGO2_TENSOR_OP
+#endif
+       }) {
     out_algorithms->push_back(algo);
   }
 #endif
diff --git a/tensorflow/stream_executor/cuda/cuda_blas.h b/tensorflow/stream_executor/cuda/cuda_blas.h
index 80cda971173fe34658f3403f1354babbd02e6ff9..deb211c04bcaa9e98ee04c5e9066a2a13092cb06 100644
--- a/tensorflow/stream_executor/cuda/cuda_blas.h
+++ b/tensorflow/stream_executor/cuda/cuda_blas.h
@@ -84,7 +84,7 @@ class CUDABlas : public blas::BlasSupport {
   template <typename FuncT, typename... Args>
   bool DoBlasInternalImpl(FuncT cublas_func, Stream *stream,
                           bool pointer_mode_host, bool err_on_failure,
-                          Args... args);
+                          bool use_tensor_op_math, Args... args);
 
   // Convenience functions that call DoBlasInternalImpl with different values
   // for err_on_failure.
@@ -92,13 +92,17 @@ class CUDABlas : public blas::BlasSupport {
   bool DoBlasInternal(FuncT cublas_func, Stream *stream, bool pointer_mode_host,
                       Args... args) {
     return DoBlasInternalImpl(cublas_func, stream, pointer_mode_host,
-                              /*err_on_failure=*/true, args...);
+                              /*err_on_failure=*/true, /*use_tensor_ops=*/false,
+                              args...);
   }
   template <typename FuncT, typename... Args>
   bool DoBlasInternalFailureOK(FuncT cublas_func, Stream *stream,
                                bool pointer_mode_host, Args... args) {
+    // Tensor ops are hard-coded off in this path, but can still be enabled with
+    // a specific algorithm choice as in DoBlasGemmWithAlgorithmImpl().
     return DoBlasInternalImpl(cublas_func, stream, pointer_mode_host,
-                              /*err_on_failure=*/false, args...);
+                              /*err_on_failure=*/false,
+                              /*use_tensor_ops=*/false, args...);
   }
 
   // A helper function to implement DoBlasGemmBatched interfaces for generic
diff --git a/tensorflow/stream_executor/cuda/cuda_diagnostics.cc b/tensorflow/stream_executor/cuda/cuda_diagnostics.cc
index 00506fa54be93b54966a5b374b02286b7e784776..f35542e18fdba2b92f12b950e432937d0a1ef577 100644
--- a/tensorflow/stream_executor/cuda/cuda_diagnostics.cc
+++ b/tensorflow/stream_executor/cuda/cuda_diagnostics.cc
@@ -232,7 +232,7 @@ port::StatusOr<DriverVersion> Diagnostician::FindDsoVersion() {
       result = StringToDriverVersion(version);
     }
 #else
-#if !defined(PLATFORM_WINDOWS)
+#if !defined(PLATFORM_WINDOWS) && !defined(NVIDIA_TEGRA)
   // Callback used when iterating through DSOs. Looks for the driver-interfacing
   // DSO and yields its version number into the callback data, when found.
   auto iterate_phdr =
@@ -366,8 +366,8 @@ port::StatusOr<DriverVersion> Diagnostician::FindKernelDriverVersion() {
   contents[kContentsSize - 1] = '\0';
 
   if (retcode != 0) {
-    LOG(INFO) << "driver version file contents: \"\"\"" << contents.begin()
-              << "\"\"\"";
+    VLOG(1) << "driver version file contents: \"\"\"" << contents.begin()
+            << "\"\"\"";
     fclose(driver_version_file);
     return FindKernelModuleVersion(contents.begin());
   }
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index d78362d4fbac3a6058743383d832bfc3df133a2f..384445e6c1629e5518459b5382aa9b92698fb6ff 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -559,10 +559,11 @@ class ScopedFilterDescriptor {
 // A helper function to decide whether to enable the TENSOR_OP_MATH math type
 static bool TensorOpMathEnabled() {
   static bool is_enabled = [] {
-    bool ret;
-    TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_DISABLE_TENSOR_OP_MATH",
-                                               /*default=*/false, &ret));
-    return !ret;
+    bool is_disabled;
+    TF_CHECK_OK(
+        tensorflow::ReadBoolFromEnvVar("TF_DISABLE_CUDNN_TENSOR_OP_MATH",
+                                       /*default_val=*/false, &is_disabled));
+    return !is_disabled;
   }();
   return is_enabled;
 }
@@ -2677,7 +2678,7 @@ bool CudnnSupport::GetConvolveBackwardFilterAlgorithms(
       // CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD,
       // clang-format on
   };
-#if CUDNN_VERSION >= 5110
+#if CUDNN_VERSION >= 5100
   if (CudnnEnvVar<WinogradNonfused>::IsEnabled() && with_winograd_nonfused) {
     algo_types.push_back(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED);
   }
@@ -2761,14 +2762,27 @@ bool CudnnSupport::DoBatchNormalizationForwardImpl(
   float zero = 0.0;
 
   if (is_training) {
-    stream->ThenMemZero(batch_mean, batch_mean->size());
-    stream->ThenMemZero(batch_var, batch_var->size());
+    CHECK_EQ(batch_mean->is_null(), batch_var->is_null())
+        << "batch_mean and batch_var must both be null or both be non-null";
+
+    void* batch_mean_opaque;
+    void* batch_var_opaque;
+    if (!batch_mean->is_null() && !batch_var->is_null()) {
+      stream->ThenMemZero(batch_mean, batch_mean->size());
+      stream->ThenMemZero(batch_var, batch_var->size());
+      batch_mean_opaque = batch_mean->opaque();
+      batch_var_opaque = batch_var->opaque();
+    } else {
+      batch_mean_opaque = nullptr;
+      batch_var_opaque = nullptr;
+    }
+
     status = wrap::cudnnBatchNormalizationForwardTraining(
         parent_, ToHandle(dnn_handle_), mode, &one, &zero,
         x_descriptor.handle(), x.opaque(), x_descriptor.handle(), y->opaque(),
         scale_offset_descriptor.handle(), scale.opaque(), offset.opaque(), 1.0,
-        batch_mean->opaque(), batch_var->opaque(), epsilon,
-        saved_mean->opaque(), saved_inv_var->opaque());
+        batch_mean_opaque, batch_var_opaque, epsilon, saved_mean->opaque(),
+        saved_inv_var->opaque());
 #if CUDNN_VERSION < 5000
     CHECK(inv_var_to_var);
     inv_var_to_var();
@@ -2797,28 +2811,28 @@ bool CudnnSupport::DoBatchNormalizationForwardImpl(
 bool CudnnSupport::DoBatchNormalizationBackward(
     Stream* stream, const DeviceMemory<float>& y_backprop,
     const DeviceMemory<float>& x, const DeviceMemory<float>& scale,
-    const DeviceMemory<float>& mean, const DeviceMemory<float>& variance,
+    const DeviceMemory<float>& mean, const DeviceMemory<float>& inv_var,
     const dnn::BatchDescriptor& x_desc,
     const dnn::BatchDescriptor& scale_offset_desc, const double epsilon,
     DeviceMemory<float>* x_backprop, DeviceMemory<float>* scale_backprop,
     DeviceMemory<float>* offset_backprop) {
   return DoBatchNormalizationBackwardImpl(
       stream, CUDNN_DATA_FLOAT, CUDNN_DATA_FLOAT, y_backprop, x, scale, mean,
-      variance, x_desc, scale_offset_desc, epsilon, x_backprop, scale_backprop,
+      inv_var, x_desc, scale_offset_desc, epsilon, x_backprop, scale_backprop,
       offset_backprop);
 }
 
 bool CudnnSupport::DoBatchNormalizationBackward(
     Stream* stream, const DeviceMemory<Eigen::half>& y_backprop,
     const DeviceMemory<Eigen::half>& x, const DeviceMemory<float>& scale,
-    const DeviceMemory<float>& mean, const DeviceMemory<float>& variance,
+    const DeviceMemory<float>& mean, const DeviceMemory<float>& inv_var,
     const dnn::BatchDescriptor& x_desc,
     const dnn::BatchDescriptor& scale_offset_desc, const double epsilon,
     DeviceMemory<Eigen::half>* x_backprop, DeviceMemory<float>* scale_backprop,
     DeviceMemory<float>* offset_backprop) {
   return DoBatchNormalizationBackwardImpl(
       stream, CUDNN_DATA_HALF, CUDNN_DATA_FLOAT, y_backprop, x, scale, mean,
-      variance, x_desc, scale_offset_desc, epsilon, x_backprop, scale_backprop,
+      inv_var, x_desc, scale_offset_desc, epsilon, x_backprop, scale_backprop,
       offset_backprop);
 }
 
@@ -2827,7 +2841,7 @@ bool CudnnSupport::DoBatchNormalizationBackwardImpl(
     Stream* stream, int cudnn_input_type, int cudnn_scale_type,
     const DeviceMemory<T>& y_backprop, const DeviceMemory<T>& x,
     const DeviceMemory<U>& scale, const DeviceMemory<U>& mean,
-    const DeviceMemory<U>& variance, const dnn::BatchDescriptor& x_desc,
+    const DeviceMemory<U>& inv_var, const dnn::BatchDescriptor& x_desc,
     const dnn::BatchDescriptor& scale_offset_desc, const double epsilon,
     DeviceMemory<T>* x_backprop, DeviceMemory<U>* scale_backprop,
     DeviceMemory<U>* offset_backprop) {
@@ -2854,7 +2868,7 @@ bool CudnnSupport::DoBatchNormalizationBackwardImpl(
       y_backprop.opaque(), x_descriptor.handle(), x_backprop->opaque(),
       scale_offset_descriptor.handle(), scale.opaque(),
       scale_backprop->opaque(), offset_backprop->opaque(), epsilon,
-      mean.opaque(), variance.opaque());
+      mean.opaque(), inv_var.opaque());
   if (status != CUDNN_STATUS_SUCCESS) {
     LOG(ERROR) << "failed to enqueue backward batch normalization on stream: "
                << ToString(status);
@@ -2927,7 +2941,6 @@ bool CudnnSupport::DoFusedConvolve(
       side_input_scale, bias_descriptor, biases, activation_mode,
       output_descriptor, output_data, scratch_allocator, algorithm_config,
       output_profile_result);
-  return true;
 }
 
 bool CudnnSupport::DoFusedConvolve(
@@ -2950,7 +2963,6 @@ bool CudnnSupport::DoFusedConvolve(
       side_input_scale, bias_descriptor, biases, activation_mode,
       output_descriptor, output_data, scratch_allocator, algorithm_config,
       output_profile_result);
-  return true;
 }
 
 bool CudnnSupport::DoFusedConvolve(
@@ -2974,7 +2986,6 @@ bool CudnnSupport::DoFusedConvolve(
       side_input_scale, bias_descriptor, biases, activation_mode,
       output_descriptor, output_data, scratch_allocator, algorithm_config,
       output_profile_result);
-  return true;
 }
 
 bool CudnnSupport::DoFusedConvolve(
@@ -4251,7 +4262,12 @@ bool CudnnSupport::DoDepthConcatenate(
   for (size_t i = 0; i < input_data.size(); ++i) {
     const auto& dimensions = input_dimensions[i];
     tmp.resize(dimensions.ElementCount());
-    stream->ThenMemcpyD2H<float>(*input_data[i], &tmp).BlockHostUntilDone();
+    stream->ThenMemcpyD2H<float>(*input_data[i], &tmp);
+    port::Status block_status = stream->BlockHostUntilDone();
+    if (!block_status.ok()) {
+      LOG(ERROR) << "BlockHostUntilDone failed: " << block_status;
+      return false;
+    }
 
     for (int64 batch = 0; batch < output_dimensions.count(); ++batch) {
       for (int64 yx = 0; yx < area; ++yx) {
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.h b/tensorflow/stream_executor/cuda/cuda_dnn.h
index 14986286f1dd4c4ced1ebaf6adbada8e52096b92..ee28c0bf57a51a63be7ebbce5c8f80e09737bb16 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.h
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.h
@@ -226,7 +226,7 @@ class CudnnSupport : public dnn::DnnSupport {
   bool DoBatchNormalizationBackward(
       Stream* stream, const DeviceMemory<float>& y_backprop,
       const DeviceMemory<float>& x, const DeviceMemory<float>& scale,
-      const DeviceMemory<float>& mean, const DeviceMemory<float>& variance,
+      const DeviceMemory<float>& mean, const DeviceMemory<float>& inv_var,
       const dnn::BatchDescriptor& x_desc,
       const dnn::BatchDescriptor& scale_offset_desc, const double epsilon,
       DeviceMemory<float>* x_backprop, DeviceMemory<float>* scale_backprop,
@@ -235,7 +235,7 @@ class CudnnSupport : public dnn::DnnSupport {
   bool DoBatchNormalizationBackward(
       Stream* stream, const DeviceMemory<Eigen::half>& y_backprop,
       const DeviceMemory<Eigen::half>& x, const DeviceMemory<float>& scale,
-      const DeviceMemory<float>& mean, const DeviceMemory<float>& variance,
+      const DeviceMemory<float>& mean, const DeviceMemory<float>& inv_var,
       const dnn::BatchDescriptor& x_desc,
       const dnn::BatchDescriptor& scale_offset_desc, const double epsilon,
       DeviceMemory<Eigen::half>* x_backprop,
@@ -637,7 +637,7 @@ class CudnnSupport : public dnn::DnnSupport {
       Stream* stream, int cudnn_input_type, int cudnn_scale_type,
       const DeviceMemory<T>& y_backprop, const DeviceMemory<T>& x,
       const DeviceMemory<U>& scale, const DeviceMemory<U>& mean,
-      const DeviceMemory<U>& variance, const dnn::BatchDescriptor& x_desc,
+      const DeviceMemory<U>& inv_var, const dnn::BatchDescriptor& x_desc,
       const dnn::BatchDescriptor& scale_offset_desc, const double epsilon,
       DeviceMemory<T>* x_backprop, DeviceMemory<U>* scale_backprop,
       DeviceMemory<U>* offset_backprop);
diff --git a/tensorflow/stream_executor/cuda/cuda_driver.cc b/tensorflow/stream_executor/cuda/cuda_driver.cc
index b6a96ed3e5cbda044c00bb9b940d68f80373587a..a017ff64d4c69b6952b442464877dc26a800ad37 100644
--- a/tensorflow/stream_executor/cuda/cuda_driver.cc
+++ b/tensorflow/stream_executor/cuda/cuda_driver.cc
@@ -1115,19 +1115,20 @@ CUDADriver::ContextGetSharedMemConfig(CudaContext* context) {
   return true;
 }
 
-/* static */ bool CUDADriver::SynchronizeStream(CudaContext* context,
-                                                CUstream stream) {
+/* static */ port::Status CUDADriver::SynchronizeStream(CudaContext *context,
+                                                        CUstream stream) {
   ScopedActivateContext activated{context};
   CHECK(stream != nullptr);
   CUresult res = cuStreamSynchronize(stream);
   if (res != CUDA_SUCCESS) {
-    LOG(ERROR) << "could not synchronize on CUDA stream: " << ToString(res)
-               << " :: " << port::CurrentStackTrace();
-    return false;
+    port::Status status = port::InternalError(
+        port::StrCat("could not synchronize on CUDA stream: ", ToString(res)));
+    LOG(ERROR) << status << " :: " << port::CurrentStackTrace();
+    return status;
   }
   VLOG(2) << "successfully synchronized stream " << stream << " on context "
           << context;
-  return true;
+  return port::Status::OK();
 }
 
 /* static */ bool CUDADriver::IsStreamIdle(CudaContext *context,
diff --git a/tensorflow/stream_executor/cuda/cuda_driver.h b/tensorflow/stream_executor/cuda/cuda_driver.h
index 68494aba6597c2cd1ee52a7b4cb411cd50fad77b..4002ba2021d1a2e2c36bd1786a3084ee8c08bb78 100644
--- a/tensorflow/stream_executor/cuda/cuda_driver.h
+++ b/tensorflow/stream_executor/cuda/cuda_driver.h
@@ -304,7 +304,7 @@ class CUDADriver {
   // amount of time?
   //
   // http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__STREAM.html#group__CUDA__STREAM_1g15e49dd91ec15991eb7c0a741beb7dad
-  static bool SynchronizeStream(CudaContext* context, CUstream stream);
+  static port::Status SynchronizeStream(CudaContext* context, CUstream stream);
 
   // Blocks the calling thread until the operations associated with the context
   // have been completed, via cuCtxSynchronize.
diff --git a/tensorflow/stream_executor/cuda/cuda_fft.cc b/tensorflow/stream_executor/cuda/cuda_fft.cc
index 7be2bccebc00d461d673b819fdc95841f452db08..a922f14fb4af695877b449d2f960fae1a356a82f 100644
--- a/tensorflow/stream_executor/cuda/cuda_fft.cc
+++ b/tensorflow/stream_executor/cuda/cuda_fft.cc
@@ -184,12 +184,11 @@ port::Status CUDAFftPlan::Initialize(
         return port::Status{port::error::INTERNAL,
                             "Failed to set auto allocation for cuFFT plan."};
       }
-      size_t size_in_bytes;
       switch (rank) {
         case 1:
           ret = wrap::cufftMakePlan1d(parent, plan_, elem_count_[0],
                                       CUDAFftType(type), /*batch=*/1,
-                                      &size_in_bytes);
+                                      &scratch_size_bytes_);
           if (ret != CUFFT_SUCCESS) {
             LOG(ERROR) << "failed to make cuFFT 1d plan:" << ret;
             return port::Status{port::error::INTERNAL,
@@ -199,7 +198,7 @@ port::Status CUDAFftPlan::Initialize(
         case 2:
           ret = wrap::cufftMakePlan2d(parent, plan_, elem_count_[0],
                                       elem_count_[1], CUDAFftType(type),
-                                      &size_in_bytes);
+                                      &scratch_size_bytes_);
           if (ret != CUFFT_SUCCESS) {
             LOG(ERROR) << "failed to make cuFFT 2d plan:" << ret;
             return port::Status{port::error::INTERNAL,
@@ -209,7 +208,7 @@ port::Status CUDAFftPlan::Initialize(
         case 3:
           ret = wrap::cufftMakePlan3d(parent, plan_, elem_count_[0],
                                       elem_count_[1], elem_count_[2],
-                                      CUDAFftType(type), &size_in_bytes);
+                                      CUDAFftType(type), &scratch_size_bytes_);
           if (ret != CUFFT_SUCCESS) {
             LOG(ERROR) << "failed to make cuFFT 3d plan:" << ret;
             return port::Status{port::error::INTERNAL,
@@ -223,24 +222,7 @@ port::Status CUDAFftPlan::Initialize(
           return port::Status{port::error::INVALID_ARGUMENT,
                               "cufftPlan only takes rank 1, 2, or 3."};
       }
-      // TODO(yangzihao): refactor this code and the one with the same function
-      // in the batch mode.
-      if (size_in_bytes != 0) {
-        auto allocated =
-            scratch_allocator->AllocateBytes(stream, size_in_bytes);
-        if (!allocated.ok() || (scratch_ = allocated.ValueOrDie()) == nullptr) {
-          LOG(ERROR) << "failed to allocate work area.";
-          return allocated.status();
-        }
-      }
-      // Connect work area with allocated space.
-      ret = wrap::cufftSetWorkArea(parent, plan_, scratch_.opaque());
-      if (ret != CUFFT_SUCCESS) {
-        LOG(ERROR) << "failed to set work area for cuFFT plan:" << ret;
-        return port::Status{port::error::INTERNAL,
-                            "Failed to set work area for cuFFT plan."};
-      }
-      return port::Status::OK();
+      return UpdateScratchAllocator(stream, scratch_allocator);
     }
   } else {
     // For either multiple batches or rank higher than 3, use cufftPlanMany().
@@ -270,32 +252,18 @@ port::Status CUDAFftPlan::Initialize(
             port::error::INTERNAL,
             "Failed to set auto allocation for cuFFT batched plan."};
       }
-      size_t size_in_bytes;
       ret = wrap::cufftMakePlanMany(
           parent, plan_, rank, elem_count_,
           input_embed ? input_embed_ : nullptr, input_stride, input_distance,
           output_embed ? output_embed_ : nullptr, output_stride,
-          output_distance, CUDAFftType(type), batch_count, &size_in_bytes);
+          output_distance, CUDAFftType(type), batch_count,
+          &scratch_size_bytes_);
       if (ret != CUFFT_SUCCESS) {
         LOG(ERROR) << "failed to make cuFFT batched plan:" << ret;
         return port::Status{port::error::INTERNAL,
                             "Failed to make cuFFT batched plan."};
       }
-      if (size_in_bytes != 0) {
-        auto allocated =
-            scratch_allocator->AllocateBytes(stream, size_in_bytes);
-        if (!allocated.ok() || (scratch_ = allocated.ValueOrDie()) == nullptr) {
-          LOG(ERROR) << "failed to allocate work area.";
-          return allocated.status();
-        }
-      }
-      // Connect work area with allocated space.
-      ret = wrap::cufftSetWorkArea(parent, plan_, scratch_.opaque());
-      if (ret != CUFFT_SUCCESS) {
-        LOG(ERROR) << "failed to set work area for cuFFT batched plan:" << ret;
-        return port::Status{port::error::INTERNAL,
-                            "Failed to set work area for cuFFT batched plan."};
-      }
+      return UpdateScratchAllocator(stream, scratch_allocator);
     }
   }
   return port::Status::OK();
@@ -312,6 +280,26 @@ port::Status CUDAFftPlan::Initialize(CUDAExecutor *parent, Stream *stream,
                     /*output_distance=*/0, type, 1, scratch_allocator);
 }
 
+port::Status CUDAFftPlan::UpdateScratchAllocator(
+    Stream *stream, ScratchAllocator *scratch_allocator) {
+  if (scratch_size_bytes_ != 0) {
+    auto allocated =
+        scratch_allocator->AllocateBytes(stream, scratch_size_bytes_);
+    if (!allocated.ok() || (scratch_ = allocated.ValueOrDie()) == nullptr) {
+      LOG(ERROR) << "failed to allocate work area.";
+      return allocated.status();
+    }
+  }
+  // Connect work area with allocated space.
+  cufftResult_t ret = wrap::cufftSetWorkArea(parent_, plan_, scratch_.opaque());
+  if (ret != CUFFT_SUCCESS) {
+    LOG(ERROR) << "failed to set work area for cuFFT plan:" << ret;
+    return port::Status{port::error::INTERNAL,
+                        "Failed to set work area for cuFFT plan."};
+  }
+  return port::Status::OK();
+}
+
 CUDAFftPlan::~CUDAFftPlan() { wrap::cufftDestroy(parent_, plan_); }
 
 int CUDAFftPlan::GetFftDirection() const {
@@ -461,6 +449,17 @@ std::unique_ptr<fft::Plan> CUDAFft::CreateBatchedPlanWithScratchAllocator(
   return std::move(fft_plan_ptr);
 }
 
+void CUDAFft::UpdatePlanWithScratchAllocator(
+    Stream *stream, fft::Plan *plan, ScratchAllocator *scratch_allocator) {
+  CUDAFftPlan *cuda_fft_plan = dynamic_cast<CUDAFftPlan *>(plan);
+  port::Status status =
+      cuda_fft_plan->UpdateScratchAllocator(stream, scratch_allocator);
+  if (!status.ok()) {
+    LOG(FATAL) << "failed to update custom allocator for cufft plan: "
+               << status.error_message();
+  }
+}
+
 template <typename FuncT, typename InputT, typename OutputT>
 bool CUDAFft::DoFftInternal(Stream *stream, fft::Plan *plan, FuncT cufftExec,
                             const DeviceMemory<InputT> &input,
diff --git a/tensorflow/stream_executor/cuda/cuda_fft.h b/tensorflow/stream_executor/cuda/cuda_fft.h
index 16102eb945a11d7083ebcfe29796b3fb5aa15a9c..04c7dfe501c451e4848bef68bed9685c079dd523 100644
--- a/tensorflow/stream_executor/cuda/cuda_fft.h
+++ b/tensorflow/stream_executor/cuda/cuda_fft.h
@@ -50,6 +50,7 @@ class CUDAFftPlan : public fft::Plan {
         plan_(-1),
         fft_type_(fft::Type::kInvalid),
         scratch_(nullptr),
+        scratch_size_bytes_(0),
         is_initialized_(false) {}
   ~CUDAFftPlan() override;
 
@@ -76,6 +77,9 @@ class CUDAFftPlan : public fft::Plan {
                           uint64 *elem_count, fft::Type type,
                           ScratchAllocator *scratch_allocator);
 
+  port::Status UpdateScratchAllocator(Stream *stream,
+                                      ScratchAllocator *scratch_allocator);
+
  protected:
   bool IsInitialized() const { return is_initialized_; }
 
@@ -84,6 +88,7 @@ class CUDAFftPlan : public fft::Plan {
   cufftHandle plan_;
   fft::Type fft_type_;
   DeviceMemory<uint8> scratch_;
+  size_t scratch_size_bytes_;
   bool is_initialized_;
 };
 
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index 64d14f29dfee2a78a0fee1d8b336f4aa191ba086..4bbd531e14f18fc24d87b4fa655fe72e9f56b129 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -380,9 +380,9 @@ bool CUDAExecutor::Launch(Stream *stream, const ThreadDim &thread_dims,
                                 thread_dims.z, args.number_of_shared_bytes(),
                                 custream, kernel_params,
                                 nullptr /* = extra */)) {
-    LOG(ERROR) << "failed to launch CUDA kernel with args: "
+    LOG(ERROR) << "failed to launch CUDA kernel " << kernel.name() << " with "
                << args.number_of_arguments()
-               << "; thread dim: " << thread_dims.ToString()
+               << " args; thread dim: " << thread_dims.ToString()
                << "; block dim: " << block_dims.ToString();
     return false;
   }
@@ -664,7 +664,7 @@ bool CUDAExecutor::StopTimer(Stream *stream, Timer *timer) {
   return AsCUDATimer(timer)->Stop(AsCUDAStream(stream));
 }
 
-bool CUDAExecutor::BlockHostUntilDone(Stream *stream) {
+port::Status CUDAExecutor::BlockHostUntilDone(Stream *stream) {
   return CUDADriver::SynchronizeStream(context_, AsCUDAStreamValue(stream));
 }
 
@@ -861,6 +861,9 @@ static int TryToReadNumaNode(const string &pci_bus_id, int device_ordinal) {
 #elif defined(PLATFORM_WINDOWS)
   // Windows support for NUMA is not currently implemented. Return node 0.
   return 0;
+#elif defined(__aarch64__)
+  LOG(INFO) << "ARM64 does not support NUMA - returning NUMA node zero";
+  return 0;
 #else
   VLOG(2) << "trying to read NUMA node for device ordinal: " << device_ordinal;
   static const int kUnknownNumaNode = -1;
@@ -925,16 +928,129 @@ struct UnqueryableDeviceParams {
   uint64 shared_memory_alloc_granularity;
 };
 
+// http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#compute-capabilities
+// https://developer.download.nvidia.com/compute/cuda/CUDA_Occupancy_calculator.xls
 static const UnqueryableDeviceParams kAllUnqueryableDeviceParams[] = {
-  {
-    3, 5,       // compute capability (3.5)
-    16,         // blocks_per_core_limit
-    64 * 1024,  // registers_per_core_limit
-    255,        // registers_per_thread_limit
-    4,          // warp_alloc_granularity
-    256,        // register_alloc_granularity
-    256         // shared_memory_alloc_granularity
-  }
+    {
+        2, 0,       // compute capability (2.0)
+        8,          // blocks_per_core_limit
+        32 * 1024,  // registers_per_core_limit
+        63,         // registers_per_thread_limit
+        2,          // warp_alloc_granularity
+        64,         // register_alloc_granularity
+        128,        // shared_memory_alloc_granularity
+    },
+    {
+        2, 1,       // compute capability (2.1)
+        8,          // blocks_per_core_limit
+        32 * 1024,  // registers_per_core_limit
+        63,         // registers_per_thread_limit
+        2,          // warp_alloc_granularity
+        64,         // register_alloc_granularity
+        128,        // shared_memory_alloc_granularity
+    },
+    {
+        3, 0,       // compute capability (3.0)
+        16,         // blocks_per_core_limit
+        64 * 1024,  // registers_per_core_limit
+        63,         // registers_per_thread_limit
+        4,          // warp_alloc_granularity
+        256,        // register_alloc_granularity
+        256,        // shared_memory_alloc_granularity
+    },
+    {
+        3, 2,       // compute capability (3.2)
+        16,         // blocks_per_core_limit
+        64 * 1024,  // registers_per_core_limit
+        255,        // registers_per_thread_limit
+        4,          // warp_alloc_granularity
+        256,        // register_alloc_granularity
+        256,        // shared_memory_alloc_granularity
+    },
+    {
+        3, 5,       // compute capability (3.5)
+        16,         // blocks_per_core_limit
+        64 * 1024,  // registers_per_core_limit
+        255,        // registers_per_thread_limit
+        4,          // warp_alloc_granularity
+        256,        // register_alloc_granularity
+        256,        // shared_memory_alloc_granularity
+    },
+    {
+        3, 7,        // compute capability (3.7)
+        16,          // blocks_per_core_limit
+        128 * 1024,  // registers_per_core_limit
+        255,         // registers_per_thread_limit
+        4,           // warp_alloc_granularity
+        256,         // register_alloc_granularity
+        256,         // shared_memory_alloc_granularity
+    },
+    {
+        5, 0,       // compute capability (5.0)
+        32,         // blocks_per_core_limit
+        64 * 1024,  // registers_per_core_limit
+        255,        // registers_per_thread_limit
+        4,          // warp_alloc_granularity
+        256,        // register_alloc_granularity
+        256,        // shared_memory_alloc_granularity
+    },
+    {
+        5, 2,       // compute capability (5.2)
+        32,         // blocks_per_core_limit
+        64 * 1024,  // registers_per_core_limit
+        255,        // registers_per_thread_limit
+        4,          // warp_alloc_granularity
+        256,        // register_alloc_granularity
+        256,        // shared_memory_alloc_granularity
+    },
+    {
+        5, 3,       // compute capability (5.3)
+        32,         // blocks_per_core_limit
+        64 * 1024,  // registers_per_core_limit
+        255,        // registers_per_thread_limit
+        4,          // warp_alloc_granularity
+        256,        // register_alloc_granularity
+        256,        // shared_memory_alloc_granularity
+    },
+    {
+        6, 0,       // compute capability (6.0)
+        32,         // blocks_per_core_limit
+        64 * 1024,  // registers_per_core_limit
+        255,        // registers_per_thread_limit
+        2,          // warp_alloc_granularity
+        256,        // register_alloc_granularity
+        256,        // shared_memory_alloc_granularity
+    },
+    {
+        6, 1,       // compute capability (6.1)
+        32,         // blocks_per_core_limit
+        64 * 1024,  // registers_per_core_limit
+        255,        // registers_per_thread_limit
+        4,          // warp_alloc_granularity
+        256,        // register_alloc_granularity
+        256,        // shared_memory_alloc_granularity
+    },
+    {
+        6, 2,       // compute capability (6.2)
+        32,         // blocks_per_core_limit
+        64 * 1024,  // registers_per_core_limit
+        255,        // registers_per_thread_limit
+        4,          // warp_alloc_granularity
+        256,        // register_alloc_granularity
+        256,        // shared_memory_alloc_granularity
+    },
+    // TODO(jlebar): Confirm the alloc granularity values for sm_70.  These are
+    // not published in the spreadsheet linked above.  Currently we guess that
+    // they're the same as sm_60.
+    {
+        7, 0,       // compute capability (7.0)
+        32,         // blocks_per_core_limit
+        64 * 1024,  // registers_per_core_limit
+        255,        // registers_per_thread_limit
+        2,          // warp_alloc_granularity
+        256,        // register_alloc_granularity
+        256,        // shared_memory_alloc_granularity
+    },
 };
 
 DeviceDescription *CUDAExecutor::PopulateDeviceDescription() const {
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
index 8ff4a30d6251dfe4cbbbf1a9c632b6383e964436..dbbbcd476f096ff912d391604ba349f6cb979478 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
@@ -152,7 +152,7 @@ class CUDAExecutor : public internal::StreamExecutorInterface {
 
   Event::Status PollForEventStatus(Event *event) override;
 
-  bool BlockHostUntilDone(Stream *stream) override;
+  port::Status BlockHostUntilDone(Stream *stream) override;
 
   int PlatformDeviceCount() override { return CUDADriver::GetDeviceCount(); }
 
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index 0d2cd4a9f2fb3068f9a803e616ff5fa1712f4945..f4162b096299ca9405e1f3045e370d0da1acf8da 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -908,8 +908,8 @@ class DnnSupport {
   //    the running variance.
   //  reserve_space_1: saved mean, to be reused in the backward gradient
   //    computation.
-  //  reserve_space_2: saved variance, to be reused in the backward gradient
-  //    computation.
+  //  reserve_space_2: saved inv_var (1/sqrt(epsilon + variance), to be reused
+  //    in the backward gradient computation.
   //  is_training: Set to true for training, false for inference.
   //  var_to_inv_var: a function to convert the variance to inverted variance
   //    for cuDNN v4 forward inference.
@@ -957,6 +957,7 @@ class DnnSupport {
   //  y_backprop: gradient with regard to output y.
   //  x: input data.
   //  scale: scaling parameters.
+  //  inv_var: 1/sqrt(epsilon + variance) of x.
   //  x_desc: dimensions of the input data, which is the same as the dimensions
   //    of the output.
   //  scale_offset_desc: dimensions of scale and offset.
@@ -967,7 +968,7 @@ class DnnSupport {
   virtual bool DoBatchNormalizationBackward(
       Stream* stream, const DeviceMemory<float>& y_backprop,
       const DeviceMemory<float>& x, const DeviceMemory<float>& scale,
-      const DeviceMemory<float>& mean, const DeviceMemory<float>& variance,
+      const DeviceMemory<float>& mean, const DeviceMemory<float>& inv_var,
       const dnn::BatchDescriptor& x_desc,
       const dnn::BatchDescriptor& scale_offset_desc, const double epsilon,
       DeviceMemory<float>* x_backprop, DeviceMemory<float>* scale_backprop,
@@ -981,7 +982,7 @@ class DnnSupport {
   virtual bool DoBatchNormalizationBackward(
       Stream* stream, const DeviceMemory<Eigen::half>& y_backprop,
       const DeviceMemory<Eigen::half>& x, const DeviceMemory<float>& scale,
-      const DeviceMemory<float>& mean, const DeviceMemory<float>& variance,
+      const DeviceMemory<float>& mean, const DeviceMemory<float>& inv_var,
       const dnn::BatchDescriptor& x_desc,
       const dnn::BatchDescriptor& scale_offset_desc, const double epsilon,
       DeviceMemory<Eigen::half>* x_backprop,
@@ -1132,7 +1133,7 @@ class DnnSupport {
   //    space in order to speed up the convolution operation.
   //  algorithm: an integer to specify which algorithm should be used for the
   //    operation. kDefaultAlgorithm means the system will pick an algorithm
-  //    by default. The coding of the algorithm is be interpretted by the
+  //    by default. The coding of the algorithm is be interpreted by the
   //    underlying implementation.
   //  output_profile_result: the output profile result for this call. The
   //    profiling is only enabled when this is not nullptr.
@@ -2023,7 +2024,7 @@ class DnnSupport {
   //  output_h_desc: descriptor for the output "h" state.
   //  output_h_data: the memory region that stores the output "h" data.
   //  output_c_desc: descriptor for the output "c" state.
-  //  output_c_data: the memory region that stores the outptu "c" data. This
+  //  output_c_data: the memory region that stores the output "c" data. This
   //    must be specified for LSTM models.
   //  is_training: whether this is used in training or inference. That decides
   //    whether respace_space data need to be produced.
@@ -2032,7 +2033,7 @@ class DnnSupport {
   //  retains the data and feed it to the backward pass.
   //  workspace_allocator: an allocator to create temporary workspace used in
   //    this kernel. The caller is responsible for retaining the memory long
-  //    enough for the lifespan of this operation, and recycles aftewards.
+  //    enough for the lifespan of this operation, and recycles afterwards.
   virtual bool DoRnnForward(Stream* stream, const dnn::RnnDescriptor& rnn_desc,
                             const dnn::RnnSequenceTensorDescriptor& input_desc,
                             const DeviceMemory<Eigen::half>& input_data,
@@ -2111,7 +2112,7 @@ class DnnSupport {
   //  output_h_desc: descriptor for the output "h" state.
   //  output_h_data: the memory region that stores the output "h" data.
   //  output_c_desc: descriptor for the output "c" state.
-  //  output_c_data: the memory region that stores the outptu "c" data. This
+  //  output_c_data: the memory region that stores the output "c" data. This
   //    must be specified for LSTM models.
   //  output_backprop_data: the device memory region that contains the backprop
   //    to the output sequence.
diff --git a/tensorflow/stream_executor/fft.h b/tensorflow/stream_executor/fft.h
index 98cd77e2062bef45dd46e73ac29782eb12591e64..6b1728829abdeb5c4e20534675801a437341d732 100644
--- a/tensorflow/stream_executor/fft.h
+++ b/tensorflow/stream_executor/fft.h
@@ -34,8 +34,8 @@ limitations under the License.
 //     stream_exec.AsFft()->Create1dPlan(&stream, 1024, Type::kC2CForward);
 //  stream
 //    .Init()
-//    .ThenFft(plan.get(), x, &y)
-//    .BlockHostUntilDone();
+//    .ThenFft(plan.get(), x, &y);
+//  SE_CHECK_OK(stream.BlockHostUntilDone());
 //
 // By using stream operations in this manner the user can easily intermix custom
 // kernel launches (via StreamExecutor::ThenLaunch()) with these pre-canned FFT
@@ -167,6 +167,15 @@ class FftSupport {
       bool in_place_fft, int batch_count,
       ScratchAllocator *scratch_allocator) = 0;
 
+  // Updates the plan's work area with space allocated by a new scratch
+  // allocator. This facilitates plan reuse with scratch allocators.
+  //
+  // This requires that the plan was originally created using a scratch
+  // allocator, as otherwise scratch space will have been allocated internally
+  // by cuFFT.
+  virtual void UpdatePlanWithScratchAllocator(
+      Stream *stream, Plan *plan, ScratchAllocator *scratch_allocator) = 0;
+
   // Computes complex-to-complex FFT in the transform direction as specified
   // by direction parameter.
   virtual bool DoFft(Stream *stream, Plan *plan,
@@ -233,6 +242,9 @@ class FftSupport {
       uint64 output_stride, uint64 output_distance, fft::Type type,            \
       bool in_place_fft, int batch_count, ScratchAllocator *scratch_allocator) \
       override;                                                                \
+  void UpdatePlanWithScratchAllocator(Stream *stream, fft::Plan *plan,         \
+                                      ScratchAllocator *scratch_allocator)     \
+      override;                                                                \
   bool DoFft(Stream *stream, fft::Plan *plan,                                  \
              const DeviceMemory<std::complex<float>> &input,                   \
              DeviceMemory<std::complex<float>> *output) override;              \
diff --git a/tensorflow/stream_executor/host/host_gpu_executor.cc b/tensorflow/stream_executor/host/host_gpu_executor.cc
index 0af2c8cc3d751aa35958a21c81a71496f994e1fb..542f521ef778c3a69ec9adba74405131e07bcf1a 100644
--- a/tensorflow/stream_executor/host/host_gpu_executor.cc
+++ b/tensorflow/stream_executor/host/host_gpu_executor.cc
@@ -162,7 +162,7 @@ void HostExecutor::DeallocateStream(Stream *stream) {}
 
 bool HostExecutor::CreateStreamDependency(Stream *dependent, Stream *other) {
   AsHostStream(dependent)->EnqueueTask(
-      [other]() { other->BlockHostUntilDone(); });
+      [other]() { SE_CHECK_OK(other->BlockHostUntilDone()); });
   AsHostStream(dependent)->BlockUntilDone();
   return true;
 }
@@ -177,9 +177,9 @@ bool HostExecutor::StopTimer(Stream *stream, Timer *timer) {
   return true;
 }
 
-bool HostExecutor::BlockHostUntilDone(Stream *stream) {
+port::Status HostExecutor::BlockHostUntilDone(Stream *stream) {
   AsHostStream(stream)->BlockUntilDone();
-  return true;
+  return port::Status::OK();
 }
 
 DeviceDescription *HostExecutor::PopulateDeviceDescription() const {
diff --git a/tensorflow/stream_executor/host/host_gpu_executor.h b/tensorflow/stream_executor/host/host_gpu_executor.h
index 77b07e4a577fe321901a19369107701ec1904a80..e2c0e6d6b77130bd190b026f1eaff68d21dbf632 100644
--- a/tensorflow/stream_executor/host/host_gpu_executor.h
+++ b/tensorflow/stream_executor/host/host_gpu_executor.h
@@ -139,7 +139,7 @@ class HostExecutor : public internal::StreamExecutorInterface {
 
   bool StopTimer(Stream *stream, Timer *timer) override;
 
-  bool BlockHostUntilDone(Stream *stream) override;
+  port::Status BlockHostUntilDone(Stream *stream) override;
 
   int PlatformDeviceCount() override { return 1; }
 
diff --git a/tensorflow/stream_executor/kernel.cc b/tensorflow/stream_executor/kernel.cc
index e1b3635d52eac8c7181395fa76592ae3161a035a..81e531efb31ea7d8d6ac03b56aea6aa5f01d64d1 100644
--- a/tensorflow/stream_executor/kernel.cc
+++ b/tensorflow/stream_executor/kernel.cc
@@ -57,6 +57,15 @@ void KernelMetadata::set_shared_memory_bytes(int shared_memory_bytes) {
   has_shared_memory_bytes_ = true;
 }
 
+KernelBase::KernelBase(KernelBase &&from)
+    : parent_(from.parent_),
+      implementation_(std::move(from.implementation_)),
+      name_(std::move(from.name_)),
+      demangled_name_(std::move(from.demangled_name_)),
+      metadata_(from.metadata_) {
+  from.parent_ = nullptr;
+}
+
 KernelBase::KernelBase(StreamExecutor *parent)
     : parent_(parent),
       implementation_(parent->implementation()->CreateKernelImplementation()) {}
diff --git a/tensorflow/stream_executor/kernel.h b/tensorflow/stream_executor/kernel.h
index 8ef091f929c0ae5a068059732b57c0729fd5be07..5358eac1ae070efb2bead75c73208e9d283b498c 100644
--- a/tensorflow/stream_executor/kernel.h
+++ b/tensorflow/stream_executor/kernel.h
@@ -136,7 +136,7 @@ class KernelMetadata {
 // Thread-compatible.
 class KernelBase {
  public:
-  KernelBase(KernelBase &&) = default;
+  KernelBase(KernelBase &&from);
 
   // Constructs an "empty" (not-yet-loaded) kernel instance.
   //
@@ -340,8 +340,8 @@ class KernelArgIterator {
 //
 // This class exists as a way to pass kernel arguments to
 // StreamExecutorInterface::Launch. That Launch method is virtual, so it can't
-// be templated to accept any KernelArgsArray type, therfore a reference to this
-// base type is passed instead.
+// be templated to accept any KernelArgsArray type, therefore a reference to
+// this base type is passed instead.
 //
 // Performance is not a concern here because each of these methods will be
 // called at most once per kernel launch. Past performance concerns with
diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc
index 22fd6bce78ff0e907444be7f161b27c159a75214..ba5001e273632c893b05eea64542f1b156e28c47 100644
--- a/tensorflow/stream_executor/stream.cc
+++ b/tensorflow/stream_executor/stream.cc
@@ -342,7 +342,7 @@ Stream &Stream::ThenBatchNormalizationForward(
 Stream &Stream::ThenBatchNormalizationBackward(
     const DeviceMemory<float> &y_backprop, const DeviceMemory<float> &x,
     const DeviceMemory<float> &scale, const DeviceMemory<float> &mean,
-    const DeviceMemory<float> &variance, const dnn::BatchDescriptor &x_desc,
+    const DeviceMemory<float> &inv_var, const dnn::BatchDescriptor &x_desc,
     const dnn::BatchDescriptor &scale_offset_desc, const double epsilon,
     DeviceMemory<float> *x_backprop, DeviceMemory<float> *scale_backprop,
     DeviceMemory<float> *offset_backprop) {
@@ -352,7 +352,7 @@ Stream &Stream::ThenBatchNormalizationBackward(
   if (ok()) {
     if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
       CheckError(dnn->DoBatchNormalizationBackward(
-          this, y_backprop, x, scale, mean, variance, x_desc, scale_offset_desc,
+          this, y_backprop, x, scale, mean, inv_var, x_desc, scale_offset_desc,
           epsilon, x_backprop, scale_backprop, offset_backprop));
     } else {
       SetErrorAndLogNoDnnSupport();
@@ -392,7 +392,7 @@ Stream &Stream::ThenBatchNormalizationForward(
 Stream &Stream::ThenBatchNormalizationBackward(
     const DeviceMemory<Eigen::half> &y_backprop,
     const DeviceMemory<Eigen::half> &x, const DeviceMemory<float> &scale,
-    const DeviceMemory<float> &mean, const DeviceMemory<float> &variance,
+    const DeviceMemory<float> &mean, const DeviceMemory<float> &inv_var,
     const dnn::BatchDescriptor &x_desc,
     const dnn::BatchDescriptor &scale_offset_desc, const double epsilon,
     DeviceMemory<Eigen::half> *x_backprop, DeviceMemory<float> *scale_backprop,
@@ -403,7 +403,7 @@ Stream &Stream::ThenBatchNormalizationBackward(
   if (ok()) {
     if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
       CheckError(dnn->DoBatchNormalizationBackward(
-          this, y_backprop, x, scale, mean, variance, x_desc, scale_offset_desc,
+          this, y_backprop, x, scale, mean, inv_var, x_desc, scale_offset_desc,
           epsilon, x_backprop, scale_backprop, offset_backprop));
     } else {
       SetErrorAndLogNoDnnSupport();
@@ -5055,22 +5055,24 @@ Stream &Stream::ThenEnqueueOnBackgroundThread(
   });
 }
 
-bool Stream::BlockHostUntilDone() {
+port::Status Stream::BlockHostUntilDone() {
   VLOG_CALL();
 
   if (!ok()) {
-    LOG(INFO)
-        << "stream " << this
-        << " did not block host until done; was already in an error state";
-    return false;
+    port::Status status = port::Status(
+        port::error::INTERNAL,
+        "stream did not block host until done; was already in an error state");
+    LOG(INFO) << status << " " << this;
+    return status;
   }
 
+  port::Status first_error;
   {
     // Wait until all active sub-streams have done their tasks.
     mutex_lock lock{mu_};
     for (auto &stream : sub_streams_) {
       if (!stream.second) {
-        CheckError(stream.first->BlockHostUntilDone());
+        first_error.Update(stream.first->BlockHostUntilDone());
         // Set this sub-stream as available.
         stream.second = true;
       }
@@ -5079,8 +5081,9 @@ bool Stream::BlockHostUntilDone() {
 
   temporary_memory_manager_.DeallocateFinalizedTemporaries();
 
-  CheckError(parent_->BlockHostUntilDone(this));
-  return ok();
+  first_error.Update(parent_->BlockHostUntilDone(this));
+  CheckError(first_error.ok());
+  return first_error;
 }
 
 }  // namespace gputools
diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h
index 023cffb96510fea0cf2fc54bd609fa38cf124b0a..a2fb2ea2375d0f245ae3bf3ccb04803d01663def 100644
--- a/tensorflow/stream_executor/stream.h
+++ b/tensorflow/stream_executor/stream.h
@@ -113,7 +113,7 @@ class Stream {
 
   // Initialize the stream. This must be performed before entraining any other
   // operations.
-  Stream &Init();
+  Stream &Init() LOCKS_EXCLUDED(mu_);
 
   // Initializes timer t via the StreamExecutor.
   Stream &InitTimer(Timer *t);
@@ -124,11 +124,11 @@ class Stream {
   // Get or create a sub-stream from this stream. If there is any sub-stream in
   // the pool that can be reused then just return this sub-stream.  Otherwise
   // create a new sub-stream.
-  Stream *GetOrCreateSubStream();
+  Stream *GetOrCreateSubStream() LOCKS_EXCLUDED(mu_);
 
   // Return the sub-stream back to the host stream so that it can be reused
   // later.
-  void ReturnSubStream(Stream *sub_stream);
+  void ReturnSubStream(Stream *sub_stream) LOCKS_EXCLUDED(mu_);
 
   // Allocate temporary memories. The stream will deallocate them when blocked
   // or destroyed.
@@ -234,7 +234,7 @@ class Stream {
   Stream &ThenBatchNormalizationBackward(
       const DeviceMemory<float> &y_backprop, const DeviceMemory<float> &x,
       const DeviceMemory<float> &scale, const DeviceMemory<float> &mean,
-      const DeviceMemory<float> &variance, const dnn::BatchDescriptor &x_desc,
+      const DeviceMemory<float> &inv_var, const dnn::BatchDescriptor &x_desc,
       const dnn::BatchDescriptor &scale_offset_desc, const double epsilon,
       DeviceMemory<float> *x_backprop, DeviceMemory<float> *scale_backprop,
       DeviceMemory<float> *offset_backprop);
@@ -255,7 +255,7 @@ class Stream {
   Stream &ThenBatchNormalizationBackward(
       const DeviceMemory<Eigen::half> &y_backprop,
       const DeviceMemory<Eigen::half> &x, const DeviceMemory<float> &scale,
-      const DeviceMemory<float> &mean, const DeviceMemory<float> &variance,
+      const DeviceMemory<float> &mean, const DeviceMemory<float> &inv_var,
       const dnn::BatchDescriptor &x_desc,
       const dnn::BatchDescriptor &scale_offset_desc, const double epsilon,
       DeviceMemory<Eigen::half> *x_backprop,
@@ -1903,8 +1903,9 @@ class Stream {
   // entrained on the stream (enqueued to this point in program
   // execution) to complete.
   //
-  // Returns true if the stream is ok().
-  bool BlockHostUntilDone();
+  // Returns an OK status if the blocking was successful and the stream is ok().
+  // Otherwise returns an error describing why the blocking failed.
+  port::Status BlockHostUntilDone() LOCKS_EXCLUDED(mu_);
 
   // Warning! This method interacts with internal threads in
   // sometimes-unpredictable ways and is intended for GPU-Executor-internal
@@ -1960,14 +1961,14 @@ class Stream {
   friend struct ThenBlasImpl;  // for implementing ThenBlasXXX.
   friend class ocl::CLBlas;    // for parent_.
 
-  bool InErrorState() const {
+  bool InErrorState() const LOCKS_EXCLUDED(mu_) {
     tf_shared_lock lock{mu_};
     return !ok_;
   }
 
   // Sets the error state if operation_retcode is false.
   // This is a useful shorthand for many stream routines.
-  void CheckError(bool operation_retcode) {
+  void CheckError(bool operation_retcode) LOCKS_EXCLUDED(mu_) {
     if (operation_retcode) {
       return;
     }
diff --git a/tensorflow/stream_executor/stream_executor_internal.cc b/tensorflow/stream_executor/stream_executor_internal.cc
index 95b285b992df91eb1adc01423bb07e2298dba9c4..273d970b6fa4a581381689191b183a30f4f2bcd3 100644
--- a/tensorflow/stream_executor/stream_executor_internal.cc
+++ b/tensorflow/stream_executor/stream_executor_internal.cc
@@ -15,9 +15,6 @@ limitations under the License.
 
 #include "tensorflow/stream_executor/stream_executor_internal.h"
 
-#include "tensorflow/stream_executor/lib/statusor.h"
-#include "tensorflow/stream_executor/lib/stringprintf.h"
-
 namespace perftools {
 namespace gputools {
 namespace internal {
@@ -40,7 +37,6 @@ StreamExecutorFactory* MakeOpenCLExecutorImplementation() {
 
 StreamExecutorFactory MakeHostExecutorImplementation;
 
-
 }  // namespace internal
 }  // namespace gputools
 }  // namespace perftools
diff --git a/tensorflow/stream_executor/stream_executor_internal.h b/tensorflow/stream_executor/stream_executor_internal.h
index 14445a7657be10a6d3d93ef0aabebcfa17d38b72..37ef182e1445a85dd0a97eac02ba064a26dc0f1d 100644
--- a/tensorflow/stream_executor/stream_executor_internal.h
+++ b/tensorflow/stream_executor/stream_executor_internal.h
@@ -219,7 +219,7 @@ class StreamExecutorInterface {
   virtual void DeallocateTimer(Timer *timer) = 0;
   virtual bool StartTimer(Stream *stream, Timer *timer) = 0;
   virtual bool StopTimer(Stream *stream, Timer *timer) = 0;
-  virtual bool BlockHostUntilDone(Stream *stream) = 0;
+  virtual port::Status BlockHostUntilDone(Stream *stream) = 0;
   virtual int PlatformDeviceCount() = 0;
   virtual port::Status EnablePeerAccessTo(StreamExecutorInterface *other) = 0;
   virtual bool CanEnablePeerAccessTo(StreamExecutorInterface *other) = 0;
diff --git a/tensorflow/stream_executor/stream_executor_pimpl.cc b/tensorflow/stream_executor/stream_executor_pimpl.cc
index 76afb85068bafb805678a9bc03b55b2efa1523c6..afca1c2e597b55b1b8d0b76d4e79995d6f6af822 100644
--- a/tensorflow/stream_executor/stream_executor_pimpl.cc
+++ b/tensorflow/stream_executor/stream_executor_pimpl.cc
@@ -432,8 +432,8 @@ bool StreamExecutor::Launch(Stream *stream, const ThreadDim &thread_dims,
   return implementation_->Launch(stream, thread_dims, block_dims, kernel, args);
 }
 
-bool StreamExecutor::BlockHostUntilDone(Stream *stream) {
-  bool result;
+port::Status StreamExecutor::BlockHostUntilDone(Stream *stream) {
+  port::Status result;
   SCOPED_TRACE(TraceListener::BlockHostUntilDone, &result, stream);
 
   result = implementation_->BlockHostUntilDone(stream);
@@ -566,19 +566,18 @@ port::Status StreamExecutor::SynchronousMemcpyD2H(
           << device_src.opaque() << ", size=" << size
           << ", host_dst=" << host_dst << ")" << StackTraceIfVLOG10();
 
-  port::Status result{port::Status::OK()};
+  port::Status result;
   SCOPED_TRACE(TraceListener::SynchronousMemcpyD2H, &result, device_src, size,
                host_dst);
 
-  port::Status status =
-      implementation_->SynchronousMemcpy(host_dst, device_src, size);
-  if (!status.ok()) {
-    return port::Status{port::error::INTERNAL,
-                        port::Printf("failed to synchronously memcpy "
-                                     "device-to-host: device %p to host %p "
-                                     "size %lld: %s",
-                                     device_src.opaque(), host_dst, size,
-                                     status.ToString().c_str())};
+  result = implementation_->SynchronousMemcpy(host_dst, device_src, size);
+  if (!result.ok()) {
+    result = port::Status{port::error::INTERNAL,
+                          port::Printf("failed to synchronously memcpy "
+                                       "device-to-host: device %p to host %p "
+                                       "size %lld: %s",
+                                       device_src.opaque(), host_dst, size,
+                                       result.ToString().c_str())};
   }
 
   return result;
@@ -590,19 +589,18 @@ port::Status StreamExecutor::SynchronousMemcpyH2D(
           << ", size=" << size << ", device_dst" << device_dst->opaque() << ")"
           << StackTraceIfVLOG10();
 
-  port::Status result{port::Status::OK()};
+  port::Status result;
   SCOPED_TRACE(TraceListener::SynchronousMemcpyH2D, &result, host_src, size,
                device_dst);
 
-  port::Status status =
-      implementation_->SynchronousMemcpy(device_dst, host_src, size);
-  if (!status.ok()) {
+  result = implementation_->SynchronousMemcpy(device_dst, host_src, size);
+  if (!result.ok()) {
     result = port::Status{
         port::error::INTERNAL,
         port::Printf("failed to synchronously memcpy host-to-device: host "
                      "%p to device %p size %lld: %s",
                      host_src, device_dst->opaque(), size,
-                     status.ToString().c_str())};
+                     result.ToString().c_str())};
   }
 
   return result;
diff --git a/tensorflow/stream_executor/stream_executor_pimpl.h b/tensorflow/stream_executor/stream_executor_pimpl.h
index 66c50d47e95fe4e9bf6df24cd61139630000cefb..a2a77218cbbafeeb9d4d8ca04b2e0a8a5024ebf9 100644
--- a/tensorflow/stream_executor/stream_executor_pimpl.h
+++ b/tensorflow/stream_executor/stream_executor_pimpl.h
@@ -481,7 +481,7 @@ class StreamExecutor {
   // Causes the host code to synchronously wait for operations entrained onto
   // stream to complete. Effectively a join on the asynchronous device
   // operations enqueued on the stream before this program point.
-  bool BlockHostUntilDone(Stream *stream);
+  port::Status BlockHostUntilDone(Stream *stream);
 
   // Synchronously allocates size bytes on the underlying platform and returns
   // an opaque void* representing that allocation. In the case of failure,
diff --git a/tensorflow/stream_executor/trace_listener.h b/tensorflow/stream_executor/trace_listener.h
index 88c54f982b3cfde925dbe0ca4f7bc3a738e5f3ac..d1e87c348b1f867009fdb6b741d984b2f58cef21 100644
--- a/tensorflow/stream_executor/trace_listener.h
+++ b/tensorflow/stream_executor/trace_listener.h
@@ -65,7 +65,8 @@ class TraceListener {
                                             const port::Status* result) {}
 
   virtual void BlockHostUntilDoneBegin(int64 correlation_id, Stream* stream) {}
-  virtual void BlockHostUntilDoneComplete(int64 correlation_id, bool result) {}
+  virtual void BlockHostUntilDoneComplete(int64 correlation_id,
+                                          const port::Status* result) {}
 };
 
 }  // namespace gputools
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 8d392fb36dccacd48f77615dbd827fbf8564c69c..383c97344a068e0174037f986baca21671f376e7 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -1,45 +1,43 @@
 # -*- Python -*-
 
-
 # Return the options to use for a C++ library or binary build.
 # Uses the ":optmode" config_setting to pick the options.
 load(
     "//tensorflow/core:platform/default/build_config_root.bzl",
     "tf_cuda_tests_tags",
     "tf_sycl_tests_tags",
+    "tf_additional_grpc_deps_py",
     "tf_additional_xla_deps_py",
-    "if_static",)
+    "if_static",
+)
 load(
     "@local_config_cuda//cuda:build_defs.bzl",
     "if_cuda",
-    "cuda_default_copts",)
-
+    "cuda_default_copts",
+)
 load(
     "//third_party/mkl:build_defs.bzl",
-    "if_mkl",)
+    "if_mkl",
+)
 
 def register_extension_info(**kwargs):
     pass
 
-
 # Given a source file, generate a test name.
 # i.e. "common_runtime/direct_session_test.cc" becomes
 #      "common_runtime_direct_session_test"
 def src_to_test_name(src):
   return src.replace("/", "_").split(".")[0]
 
-
 def full_path(relative_paths):
   return [PACKAGE_NAME + "/" + relative for relative in relative_paths]
 
-
 # List of proto files for android builds
 def tf_android_core_proto_sources(core_proto_sources_relative):
   return [
       "//tensorflow/core:" + p for p in core_proto_sources_relative
   ]
 
-
 # Returns the list of pb.h and proto.h headers that are generated for
 # tf_android_core_proto_sources().
 def tf_android_core_proto_headers(core_proto_sources_relative):
@@ -51,13 +49,11 @@ def tf_android_core_proto_headers(core_proto_sources_relative):
       for p in core_proto_sources_relative
   ])
 
-
 # Sanitize a dependency so that it works correctly from code that includes
 # TensorFlow as a submodule.
 def clean_dep(dep):
   return str(Label(dep))
 
-
 def if_android_x86(a):
   return select({
       clean_dep("//tensorflow:android_x86"): a,
@@ -65,35 +61,30 @@ def if_android_x86(a):
       "//conditions:default": [],
   })
 
-
 def if_android_arm(a):
   return select({
       clean_dep("//tensorflow:android_arm"): a,
       "//conditions:default": [],
   })
 
-
 def if_android_arm64(a):
   return select({
       clean_dep("//tensorflow:android_arm64"): a,
       "//conditions:default": [],
   })
 
-
 def if_android_mips(a):
   return select({
       clean_dep("//tensorflow:android_mips"): a,
       "//conditions:default": [],
   })
 
-
 def if_not_android(a):
   return select({
       clean_dep("//tensorflow:android"): [],
       "//conditions:default": a,
   })
 
-
 def if_not_android_mips_and_mips64(a):
   return select({
       clean_dep("//tensorflow:android_mips"): [],
@@ -101,20 +92,23 @@ def if_not_android_mips_and_mips64(a):
       "//conditions:default": a,
   })
 
-
 def if_android(a):
   return select({
       clean_dep("//tensorflow:android"): a,
       "//conditions:default": [],
   })
 
-
 def if_ios(a):
   return select({
       clean_dep("//tensorflow:ios"): a,
       "//conditions:default": [],
   })
 
+def if_ios_x86_64(a):
+  return select({
+      clean_dep("//tensorflow:ios_x86_64"): a,
+      "//conditions:default": [],
+  })
 
 def if_mobile(a):
   return select({
@@ -123,7 +117,6 @@ def if_mobile(a):
       "//conditions:default": [],
   })
 
-
 def if_not_mobile(a):
   return select({
       clean_dep("//tensorflow:android"): [],
@@ -131,7 +124,6 @@ def if_not_mobile(a):
       "//conditions:default": a,
   })
 
-
 def if_not_windows(a):
   return select({
       clean_dep("//tensorflow:windows"): [],
@@ -139,6 +131,12 @@ def if_not_windows(a):
       "//conditions:default": a,
   })
 
+def if_windows(a):
+  return select({
+      clean_dep("//tensorflow:windows"): a,
+      clean_dep("//tensorflow:windows_msvc"): a,
+      "//conditions:default": [],
+  })
 
 def if_linux_x86_64(a):
   return select({
@@ -152,22 +150,45 @@ def if_darwin(a):
       "//conditions:default": [],
   })
 
-WIN_COPTS = [
-    "/DLANG_CXX11",
-    "/D__VERSION__=\\\"MSVC\\\"",
-    "/DPLATFORM_WINDOWS",
-    "/DTF_COMPILE_LIBRARY",
-    "/DEIGEN_HAS_C99_MATH",
-    "/DTENSORFLOW_USE_EIGEN_THREADPOOL",
-    "/DEIGEN_AVOID_STL_ARRAY",
-    "/Iexternal/gemmlowp",
-    "/wd4018", # -Wno-sign-compare
-    "/U_HAS_EXCEPTIONS", "/D_HAS_EXCEPTIONS=1", "/EHsc", # -fno-exceptions
-    "/DNOGDI",
-]
+def if_override_eigen_strong_inline(a):
+  return select({
+      clean_dep("//tensorflow:override_eigen_strong_inline"): a,
+      "//conditions:default": [],
+  })
+
+def get_win_copts(is_external=False):
+    WINDOWS_COPTS = [
+        "/D__VERSION__=\\\"MSVC\\\"",
+        "/DPLATFORM_WINDOWS",
+        "/DEIGEN_HAS_C99_MATH",
+        "/DTENSORFLOW_USE_EIGEN_THREADPOOL",
+        "/DEIGEN_AVOID_STL_ARRAY",
+        "/Iexternal/gemmlowp",
+        "/wd4018",  # -Wno-sign-compare
+        "/U_HAS_EXCEPTIONS",
+        "/D_HAS_EXCEPTIONS=1",
+        "/EHsc",  # -fno-exceptions
+        "/DNOGDI",
+    ]
+    if is_external:
+      return WINDOWS_COPTS + ["/UTF_COMPILE_LIBRARY"]
+    else:
+      return WINDOWS_COPTS + ["/DTF_COMPILE_LIBRARY"]
 
 # LINT.IfChange
-def tf_copts():
+def tf_copts(android_optimization_level_override="-O2", is_external=False):
+  # For compatibility reasons, android_optimization_level_override
+  # is currently only being set for Android.
+  # To clear this value, and allow the CROSSTOOL default
+  # to be used, pass android_optimization_level_override=None
+  android_copts = [
+      "-std=c++11",
+      "-DTF_LEAN_BINARY",
+      "-Wno-narrowing",
+      "-fomit-frame-pointer",
+  ]
+  if android_optimization_level_override:
+    android_copts.append(android_optimization_level_override)
   return (
       if_not_windows([
           "-DEIGEN_AVOID_STL_ARRAY",
@@ -179,22 +200,20 @@ def tf_copts():
       + if_mkl(["-DINTEL_MKL=1", "-DEIGEN_USE_VML", "-fopenmp",])
       + if_android_arm(["-mfpu=neon"])
       + if_linux_x86_64(["-msse3"])
+      + if_ios_x86_64(["-msse4.1"])
       + select({
-            clean_dep("//tensorflow:android"): [
-                "-std=c++11",
-                "-DTF_LEAN_BINARY",
-                "-O2",
-                "-Wno-narrowing",
-                "-fomit-frame-pointer",
-            ],
+            clean_dep("//tensorflow:framework_shared_object"): [],
+            "//conditions:default": ["-DTENSORFLOW_MONOLITHIC_BUILD"],
+      })
+      + select({
+            clean_dep("//tensorflow:android"): android_copts,
             clean_dep("//tensorflow:darwin"): [],
-            clean_dep("//tensorflow:windows"): WIN_COPTS,
-            clean_dep("//tensorflow:windows_msvc"): WIN_COPTS,
+            clean_dep("//tensorflow:windows"): get_win_copts(is_external),
+            clean_dep("//tensorflow:windows_msvc"): get_win_copts(is_external),
             clean_dep("//tensorflow:ios"): ["-std=c++11"],
             "//conditions:default": ["-pthread"]
       }))
 
-
 def tf_opts_nortti_if_android():
   return if_android([
       "-fno-rtti",
@@ -202,13 +221,11 @@ def tf_opts_nortti_if_android():
       "-DGOOGLE_PROTOBUF_NO_STATIC_INITIALIZER",
   ])
 
-
 # LINT.ThenChange(//tensorflow/contrib/android/cmake/CMakeLists.txt)
 
-
 # Given a list of "op_lib_names" (a list of files in the ops directory
 # without their .cc extensions), generate a library for that file.
-def tf_gen_op_libs(op_lib_names, deps=None):
+def tf_gen_op_libs(op_lib_names, deps=None, is_external=True):
   # Make library out of each op so it can also be used to generate wrappers
   # for various languages.
   if not deps:
@@ -216,20 +233,18 @@ def tf_gen_op_libs(op_lib_names, deps=None):
   for n in op_lib_names:
     native.cc_library(
         name=n + "_op_lib",
-        copts=tf_copts(),
+        copts=tf_copts(is_external=is_external),
         srcs=["ops/" + n + ".cc"],
         deps=deps + [clean_dep("//tensorflow/core:framework")],
         visibility=["//visibility:public"],
         alwayslink=1,
         linkstatic=1,)
 
-
 def _make_search_paths(prefix, levels_to_root):
   return ",".join(
       ["-rpath,%s/%s" % (prefix, "/".join([".."] * search_level))
        for search_level in range(levels_to_root + 1)])
 
-
 def _rpath_linkopts(name):
   # Search parent directories up to the TensorFlow root directory for shared
   # object dependencies, even if this op shared object is deeply nested
@@ -248,7 +263,6 @@ def _rpath_linkopts(name):
       ],
   })
 
-
 # Bazel-generated shared objects which must be linked into TensorFlow binaries
 # to define symbols from //tensorflow/core:framework and //tensorflow/core:lib.
 def tf_binary_additional_srcs():
@@ -258,7 +272,6 @@ def tf_binary_additional_srcs():
           clean_dep("//tensorflow:libtensorflow_framework.so"),
       ])
 
-
 def tf_cc_shared_object(
     name,
     srcs=[],
@@ -281,9 +294,9 @@ def tf_cc_shared_object(
       **kwargs)
 
 register_extension_info(
-    extension_name="tf_cc_shared_object",
-    label_regex_for_dep="{extension_name}")
-
+    extension_name = "tf_cc_shared_object",
+    label_regex_for_dep = "{extension_name}",
+)
 
 # Links in the framework shared object
 # (//third_party/tensorflow:libtensorflow_framework.so) when not building
@@ -293,9 +306,11 @@ def tf_cc_binary(name,
                  srcs=[],
                  deps=[],
                  linkopts=[],
+                 copts=tf_copts(),
                  **kwargs):
   native.cc_binary(
       name=name,
+      copts=copts,
       srcs=srcs + tf_binary_additional_srcs(),
       deps=deps + if_mkl(
           [
@@ -306,16 +321,15 @@ def tf_cc_binary(name,
       **kwargs)
 
 register_extension_info(
-    extension_name="tf_cc_binary",
-    label_regex_for_dep="{extension_name}.*")
-
+    extension_name = "tf_cc_binary",
+    label_regex_for_dep = "{extension_name}.*",
+)
 
 def tf_gen_op_wrapper_cc(name,
                          out_ops_file,
                          pkg="",
                          op_gen=clean_dep("//tensorflow/cc:cc_op_gen_main"),
                          deps=None,
-                         override_file=None,
                          include_internal_ops=0,
                          # ApiDefs will be loaded in the order specified in this list.
                          api_def_srcs=[]):
@@ -326,18 +340,12 @@ def tf_gen_op_wrapper_cc(name,
   tf_cc_binary(
       name=tool,
       copts=tf_copts(),
-      linkopts=["-lm"],
+      linkopts=if_not_windows(["-lm"]),
       linkstatic=1,  # Faster to link this one-time-use binary dynamically
       deps=[op_gen] + deps)
 
   srcs = api_def_srcs[:]
 
-  if override_file == None:
-    override_arg = ","
-  else:
-    srcs += [override_file]
-    override_arg = "$(location " + override_file + ")"
-
   if not api_def_srcs:
     api_def_args_str = ","
   else:
@@ -350,6 +358,7 @@ def tf_gen_op_wrapper_cc(name,
           " $$(dirname $$(echo $(locations " + api_def_src +
           ") | cut -d\" \" -f1))")
     api_def_args_str = ",".join(api_def_args)
+
   native.genrule(
       name=name + "_genrule",
       outs=[
@@ -359,10 +368,9 @@ def tf_gen_op_wrapper_cc(name,
       srcs=srcs,
       tools=[":" + tool] + tf_binary_additional_srcs(),
       cmd=("$(location :" + tool + ") $(location :" + out_ops_file + ".h) " +
-           "$(location :" + out_ops_file + ".cc) " + override_arg + " " +
+           "$(location :" + out_ops_file + ".cc) " +
            str(include_internal_ops) + " " + api_def_args_str))
 
-
 # Given a list of "op_lib_names" (a list of files in the ops directory
 # without their .cc extensions), generate individual C++ .cc and .h
 # files for each of the ops files mentioned, and then generate a
@@ -401,7 +409,6 @@ def tf_gen_op_wrappers_cc(name,
                               clean_dep("//tensorflow/cc:const_op"),
                           ],
                           op_gen=clean_dep("//tensorflow/cc:cc_op_gen_main"),
-                          override_file=None,
                           include_internal_ops=0,
                           visibility=None,
                           # ApiDefs will be loaded in the order apecified in this list.
@@ -416,7 +423,6 @@ def tf_gen_op_wrappers_cc(name,
         "ops/" + n,
         pkg=pkg,
         op_gen=op_gen,
-        override_file=override_file,
         include_internal_ops=include_internal_ops,
         api_def_srcs=api_def_srcs)
     subsrcs += ["ops/" + n + ".cc"]
@@ -455,7 +461,6 @@ def tf_gen_op_wrappers_cc(name,
       alwayslink=1,
       visibility=[clean_dep("//tensorflow:internal")])
 
-
 # Generates a Python library target wrapping the ops registered in "deps".
 #
 # Args:
@@ -476,6 +481,8 @@ def tf_gen_op_wrappers_cc(name,
 #     "name" arg)
 #   op_whitelist: if not empty, only op names in this list will be wrapped. It
 #     is invalid to specify both "hidden" and "op_whitelist".
+#   cc_linkopts: Optional linkopts to be added to tf_cc_binary that contains the
+#     specified ops.
 def tf_gen_op_wrapper_py(name,
                          out=None,
                          hidden=None,
@@ -484,7 +491,9 @@ def tf_gen_op_wrapper_py(name,
                          require_shape_functions=False,
                          hidden_file=None,
                          generated_target_name=None,
-                         op_whitelist=[]):
+                         op_whitelist=[],
+                         cc_linkopts=[],
+                         api_def_srcs=[]):
   if (hidden or hidden_file) and op_whitelist:
     fail('Cannot pass specify both hidden and op_whitelist.')
 
@@ -494,7 +503,7 @@ def tf_gen_op_wrapper_py(name,
     deps = [str(Label("//tensorflow/core:" + name + "_op_lib"))]
   tf_cc_binary(
       name=tool_name,
-      linkopts=["-lm"],
+      linkopts=if_not_windows(["-lm"]) + cc_linkopts,
       copts=tf_copts(),
       linkstatic=1,  # Faster to link this one-time-use binary dynamically
       deps=([
@@ -517,22 +526,39 @@ def tf_gen_op_wrapper_py(name,
     op_list_arg = "''"
     op_list_is_whitelist = False
 
+  # Prepare ApiDef directories to pass to the genrule.
+  if not api_def_srcs:
+    api_def_args_str = ","
+  else:
+    api_def_args = []
+    for api_def_src in api_def_srcs:
+      # Add directory of the first ApiDef source to args.
+      # We are assuming all ApiDefs in a single api_def_src are in the
+      # same directory.
+      api_def_args.append(
+          "$$(dirname $$(echo $(locations " + api_def_src +
+          ") | cut -d\" \" -f1))")
+    api_def_args_str = ",".join(api_def_args)
+
   if hidden_file:
     # `hidden_file` is file containing a list of op names to be hidden in the
     # generated module.
     native.genrule(
         name=name + "_pygenrule",
         outs=[out],
-        srcs=[hidden_file],
+        srcs=api_def_srcs + [hidden_file],
         tools=[tool_name] + tf_binary_additional_srcs(),
-        cmd=("$(location " + tool_name + ") @$(location " + hidden_file + ") " +
+        cmd=("$(location " + tool_name + ") " + api_def_args_str +
+             " @$(location " + hidden_file + ") " +
              ("1" if require_shape_functions else "0") + " > $@"))
   else:
     native.genrule(
         name=name + "_pygenrule",
         outs=[out],
+        srcs=api_def_srcs,
         tools=[tool_name] + tf_binary_additional_srcs(),
-        cmd=("$(location " + tool_name + ") " + op_list_arg + " " +
+        cmd=("$(location " + tool_name + ") " + api_def_args_str + " " +
+             op_list_arg + " " +
              ("1" if require_shape_functions else "0") + " " +
              ("1" if op_list_is_whitelist else "0") + " > $@"))
 
@@ -548,7 +574,6 @@ def tf_gen_op_wrapper_py(name,
           clean_dep("//tensorflow/python:framework_for_generated_wrappers_v2"),
       ],)
 
-
 # Define a bazel macro that creates cc_test for tensorflow.
 #
 # Links in the framework shared object
@@ -571,7 +596,15 @@ def tf_cc_test(name,
       name="%s%s" % (name, suffix),
       srcs=srcs + tf_binary_additional_srcs(),
       copts=tf_copts() + extra_copts,
-      linkopts=["-lpthread", "-lm"] + linkopts + _rpath_linkopts(name),
+      linkopts=select({
+        "//tensorflow:android": [
+            "-pie",
+          ],
+        "//conditions:default": [
+            "-lpthread",
+            "-lm"
+        ],
+      }) + linkopts + _rpath_linkopts(name),
       deps=deps + if_mkl(
           [
               "//third_party/mkl:intel_binary_blob",
@@ -591,9 +624,9 @@ def tf_cc_test(name,
       **kwargs)
 
 register_extension_info(
-    extension_name="tf_cc_test",
-    label_regex_for_dep="{extension_name}.*")
-
+    extension_name = "tf_cc_test",
+    label_regex_for_dep = "{extension_name}.*",
+)
 
 # Part of the testing workflow requires a distinguishable name for the build
 # rules that involve a GPU, even if otherwise identical to the base rule.
@@ -618,9 +651,9 @@ def tf_cc_test_gpu(name,
       args=args)
 
 register_extension_info(
-    extension_name="tf_cc_test_gpu",
-    label_regex_for_dep="{extension_name}")
-
+    extension_name = "tf_cc_test_gpu",
+    label_regex_for_dep = "{extension_name}",
+)
 
 def tf_cuda_cc_test(name,
                     srcs=[],
@@ -662,9 +695,9 @@ def tf_cuda_cc_test(name,
       args=args)
 
 register_extension_info(
-    extension_name="tf_cuda_cc_test",
-    label_regex_for_dep="{extension_name}")
-
+    extension_name = "tf_cuda_cc_test",
+    label_regex_for_dep = "{extension_name}",
+)
 
 def tf_cuda_only_cc_test(name,
                     srcs=[],
@@ -685,7 +718,7 @@ def tf_cuda_only_cc_test(name,
       deps=deps + if_cuda([
           clean_dep("//tensorflow/core:cuda"),
           clean_dep("//tensorflow/core:gpu_lib")]),
-      linkopts=["-lpthread", "-lm"] + linkopts + _rpath_linkopts(name),
+      linkopts=if_not_windows(["-lpthread", "-lm"]) + linkopts + _rpath_linkopts(name),
       linkstatic=linkstatic or select({
           # cc_tests with ".so"s in srcs incorrectly link on Darwin
           # unless linkstatic=1.
@@ -696,9 +729,9 @@ def tf_cuda_only_cc_test(name,
       tags=tags + tf_cuda_tests_tags())
 
 register_extension_info(
-    extension_name="tf_cuda_only_cc_test",
-    label_regex_for_dep="{extension_name}_gpu")
-
+    extension_name = "tf_cuda_only_cc_test",
+    label_regex_for_dep = "{extension_name}_gpu",
+)
 
 # Create a cc_test for each of the tensorflow tests listed in "tests"
 def tf_cc_tests(srcs,
@@ -722,7 +755,6 @@ def tf_cc_tests(srcs,
         linkopts=linkopts,
         nocopts=nocopts)
 
-
 def tf_cc_test_mkl(srcs,
                    deps,
                    name="",
@@ -732,7 +764,6 @@ def tf_cc_test_mkl(srcs,
                    args=None):
   if_mkl(tf_cc_tests(srcs, deps, name, linkstatic=linkstatic, tags=tags, size=size, args=args, nocopts="-fno-exceptions"))
 
-
 def tf_cc_tests_gpu(srcs,
                     deps,
                     name="",
@@ -742,7 +773,6 @@ def tf_cc_tests_gpu(srcs,
                     args=None):
   tf_cc_tests(srcs, deps, linkstatic, tags=tags, size=size, args=args)
 
-
 def tf_cuda_cc_tests(srcs,
                      deps,
                      name="",
@@ -775,9 +805,9 @@ def tf_java_test(name,
       **kwargs)
 
 register_extension_info(
-    extension_name="tf_java_test",
-    label_regex_for_dep="{extension_name}")
-
+    extension_name = "tf_java_test",
+    label_regex_for_dep = "{extension_name}",
+)
 
 def _cuda_copts():
   """Gets the appropriate set of copts for (maybe) CUDA compilation.
@@ -797,10 +827,8 @@ def _cuda_copts():
       ]),
   })
 
-
 # Build defs for TensorFlow kernels
 
-
 # When this target is built using --config=cuda, a cc_library is built
 # that passes -DGOOGLE_CUDA=1 and '-x cuda', linking in additional
 # libraries needed by GPU kernels.
@@ -824,11 +852,11 @@ def tf_gpu_kernel_library(srcs,
       **kwargs)
 
 register_extension_info(
-    extension_name="tf_gpu_kernel_library",
-    label_regex_for_dep="{extension_name}")
-
+    extension_name = "tf_gpu_kernel_library",
+    label_regex_for_dep = "{extension_name}",
+)
 
-def tf_cuda_library(deps=None, cuda_deps=None, copts=None, **kwargs):
+def tf_cuda_library(deps=None, cuda_deps=None, copts=tf_copts(), **kwargs):
   """Generate a cc_library with a conditional set of CUDA dependencies.
 
   When the library is built with --config=cuda:
@@ -848,8 +876,6 @@ def tf_cuda_library(deps=None, cuda_deps=None, copts=None, **kwargs):
     deps = []
   if not cuda_deps:
     cuda_deps = []
-  if not copts:
-    copts = []
 
   native.cc_library(
       deps=deps + if_cuda(cuda_deps + [
@@ -860,10 +886,9 @@ def tf_cuda_library(deps=None, cuda_deps=None, copts=None, **kwargs):
       **kwargs)
 
 register_extension_info(
-    extension_name="tf_cuda_library",
-    label_regex_for_dep="{extension_name}")
-
-
+    extension_name = "tf_cuda_library",
+    label_regex_for_dep = "{extension_name}",
+)
 
 def tf_kernel_library(name,
                       prefix=None,
@@ -872,7 +897,8 @@ def tf_kernel_library(name,
                       hdrs=None,
                       deps=None,
                       alwayslink=1,
-                      copts=tf_copts(),
+                      copts=None,
+                      is_external=False,
                       **kwargs):
   """A rule to build a TensorFlow OpKernel.
 
@@ -901,7 +927,9 @@ def tf_kernel_library(name,
     hdrs = []
   if not deps:
     deps = []
-
+  if not copts:
+    copts = []
+  copts = copts + tf_copts(is_external=is_external)
   if prefix:
     if native.glob([prefix + "*.cu.cc"], exclude=["*test*"]):
       if not gpu_srcs:
@@ -934,9 +962,9 @@ def tf_kernel_library(name,
       **kwargs)
 
 register_extension_info(
-    extension_name="tf_kernel_library",
-    label_regex_for_dep="{extension_name}(_gpu)?")
-
+    extension_name = "tf_kernel_library",
+    label_regex_for_dep = "{extension_name}(_gpu)?",
+)
 
 def tf_mkl_kernel_library(name,
                           prefix=None,
@@ -975,9 +1003,9 @@ def tf_mkl_kernel_library(name,
       ))
 
 register_extension_info(
-    extension_name="tf_mkl_kernel_library",
-    label_regex_for_dep="{extension_name}")
-
+    extension_name = "tf_mkl_kernel_library",
+    label_regex_for_dep = "{extension_name}",
+)
 
 # Bazel rules for building swig files.
 def _py_wrap_cc_impl(ctx):
@@ -1011,44 +1039,41 @@ def _py_wrap_cc_impl(ctx):
       progress_message="SWIGing " + src.path)
   return struct(files=depset(outputs))
 
-
 _py_wrap_cc = rule(
-    attrs={
-        "srcs":
-            attr.label_list(
-                mandatory=True,
-                allow_files=True,),
-        "swig_includes":
-            attr.label_list(
-                cfg="data",
-                allow_files=True,),
-        "deps":
-            attr.label_list(
-                allow_files=True,
-                providers=["cc"],),
-        "toolchain_deps":
-            attr.label_list(
-                allow_files=True,),
-        "module_name":
-            attr.string(mandatory=True),
-        "py_module_name":
-            attr.string(mandatory=True),
-        "_swig":
-            attr.label(
-                default=Label("@swig//:swig"),
-                executable=True,
-                cfg="host",),
-        "_swiglib":
-            attr.label(
-                default=Label("@swig//:templates"),
-                allow_files=True,),
+    attrs = {
+        "srcs": attr.label_list(
+            mandatory = True,
+            allow_files = True,
+        ),
+        "swig_includes": attr.label_list(
+            cfg = "data",
+            allow_files = True,
+        ),
+        "deps": attr.label_list(
+            allow_files = True,
+            providers = ["cc"],
+        ),
+        "toolchain_deps": attr.label_list(
+            allow_files = True,
+        ),
+        "module_name": attr.string(mandatory = True),
+        "py_module_name": attr.string(mandatory = True),
+        "_swig": attr.label(
+            default = Label("@swig//:swig"),
+            executable = True,
+            cfg = "host",
+        ),
+        "_swiglib": attr.label(
+            default = Label("@swig//:templates"),
+            allow_files = True,
+        ),
     },
-    outputs={
+    outputs = {
         "cc_out": "%{module_name}.cc",
         "py_out": "%{py_module_name}.py",
     },
-    implementation=_py_wrap_cc_impl,)
-
+    implementation = _py_wrap_cc_impl,
+)
 
 def _get_repository_roots(ctx, files):
   """Returns abnormal root directories under which files reside.
@@ -1079,7 +1104,6 @@ def _get_repository_roots(ctx, files):
       result[root] -= 1
   return [k for v, k in sorted([(v, k) for k, v in result.items()])]
 
-
 # Bazel rule for collecting the header files that a target depends on.
 def _transitive_hdrs_impl(ctx):
   outputs = depset()
@@ -1087,21 +1111,20 @@ def _transitive_hdrs_impl(ctx):
     outputs += dep.cc.transitive_headers
   return struct(files=outputs)
 
-
 _transitive_hdrs = rule(
-    attrs={
+    attrs = {
         "deps": attr.label_list(
-            allow_files=True,
-            providers=["cc"],),
+            allow_files = True,
+            providers = ["cc"],
+        ),
     },
-    implementation=_transitive_hdrs_impl,)
-
+    implementation = _transitive_hdrs_impl,
+)
 
 def transitive_hdrs(name, deps=[], **kwargs):
   _transitive_hdrs(name=name + "_gather", deps=deps)
   native.filegroup(name=name, srcs=[":" + name + "_gather"])
 
-
 # Create a header only library that includes all the headers exported by
 # the libraries in deps.
 def cc_header_only_library(name, deps=[], includes=[], **kwargs):
@@ -1127,7 +1150,6 @@ def cc_header_only_library(name, deps=[], includes=[], **kwargs):
                     includes=includes,
                     **kwargs)
 
-
 def tf_custom_op_library_additional_deps():
   return [
       "@protobuf_archive//:protobuf_headers",
@@ -1136,7 +1158,6 @@ def tf_custom_op_library_additional_deps():
       clean_dep("//tensorflow/core:framework_headers_lib"),
   ]
 
-
 # Traverse the dependency graph along the "deps" attribute of the
 # target and return a struct with one field called 'tf_collected_deps'.
 # tf_collected_deps will be the union of the deps of the current target
@@ -1150,16 +1171,15 @@ def _collect_deps_aspect_impl(target, ctx):
         alldeps = alldeps | dep.tf_collected_deps
   return struct(tf_collected_deps=alldeps)
 
-
 collect_deps_aspect = aspect(
-    implementation=_collect_deps_aspect_impl, attr_aspects=["deps"])
-
+    attr_aspects = ["deps"],
+    implementation = _collect_deps_aspect_impl,
+)
 
 def _dep_label(dep):
   label = dep.label
   return label.package + ":" + label.name
 
-
 # This rule checks that the transitive dependencies of targets listed
 # in the 'deps' attribute don't depend on the targets listed in
 # the 'disallowed_deps' attribute.
@@ -1176,22 +1196,24 @@ def _check_deps_impl(ctx):
                   disallowed_dep))
   return struct()
 
-
 check_deps = rule(
     _check_deps_impl,
-    attrs={
-        "deps":
-            attr.label_list(
-                aspects=[collect_deps_aspect], mandatory=True,
-                allow_files=True),
-        "disallowed_deps":
-            attr.label_list(mandatory=True, allow_files=True)
-    },)
-
+    attrs = {
+        "deps": attr.label_list(
+            aspects = [collect_deps_aspect],
+            mandatory = True,
+            allow_files = True,
+        ),
+        "disallowed_deps": attr.label_list(
+            mandatory = True,
+            allow_files = True,
+        ),
+    },
+)
 
 # Helper to build a dynamic library (.so) from the sources containing
 # implementations of custom ops and kernels.
-def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[]):
+def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[], linkopts=[]):
   cuda_deps = [
       clean_dep("//tensorflow/core:stream_executor_headers_lib"),
       "@local_config_cuda//cuda:cuda_headers",
@@ -1219,8 +1241,8 @@ def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[]):
       srcs=srcs,
       deps=deps + if_cuda(cuda_deps),
       data=[name + "_check_deps"],
-      copts=tf_copts(),
-      linkopts=select({
+      copts=tf_copts(is_external=True),
+      linkopts=linkopts + select({
           "//conditions:default": [
               "-lm",
           ],
@@ -1228,9 +1250,9 @@ def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[]):
       }),)
 
 register_extension_info(
-    extension_name="tf_custom_op_library",
-    label_regex_for_dep="{extension_name}")
-
+    extension_name = "tf_custom_op_library",
+    label_regex_for_dep = "{extension_name}",
+)
 
 def tf_custom_op_py_library(name,
                             srcs=[],
@@ -1249,18 +1271,16 @@ def tf_custom_op_py_library(name,
       deps=deps,)
 
 register_extension_info(
-    extension_name="tf_custom_op_py_library",
-    label_regex_for_dep="{extension_name}")
-
+    extension_name = "tf_custom_op_py_library",
+    label_regex_for_dep = "{extension_name}",
+)
 
 def tf_extension_linkopts():
   return []  # No extension link opts
 
-
 def tf_extension_copts():
   return []  # No extension c opts
 
-
 def tf_py_wrap_cc(name,
                              srcs,
                              swig_includes=[],
@@ -1328,19 +1348,39 @@ def tf_py_wrap_cc(name,
           "//conditions:default": [":" + cc_library_name],
       }))
 
-
-def py_test(deps=[], **kwargs):
+# This macro is for running python tests against system installed pip package
+# on Windows.
+#
+# py_test is built as an exectuable python zip file on Windows, which contains all
+# dependencies of the target. Because of the C++ extensions, it would be very
+# inefficient if the py_test zips all runfiles, plus we don't need them when running
+# tests against system installed pip package. So we'd like to get rid of the deps
+# of py_test in this case.
+#
+# In order to trigger the tests without bazel clean after getting rid of deps,
+# we introduce the following :
+# 1. When --define=no_tensorflow_py_deps=true, the py_test depends on a marker
+#    file of the pip package, the test gets to rerun when the pip package change.
+#    Note that this only works on Windows. See the definition of
+#    //third_party/tensorflow/tools/pip_package:win_pip_package_marker for specific reasons.
+# 2. When --define=no_tensorflow_py_deps=false (by default), it's a normal py_test.
+def py_test(deps=[], data=[], **kwargs):
   native.py_test(
       deps=select({
           "//conditions:default": deps,
-          clean_dep("//tensorflow:no_tensorflow_py_deps"): []
+          clean_dep("//tensorflow:no_tensorflow_py_deps"): [],
+      }),
+      data = data + select({
+          "//conditions:default": [],
+          clean_dep("//tensorflow:no_tensorflow_py_deps"):
+          ["//tensorflow/tools/pip_package:win_pip_package_marker"],
       }),
       **kwargs)
 
 register_extension_info(
-    extension_name="py_test",
-    label_regex_for_dep="{extension_name}")
-
+    extension_name = "py_test",
+    label_regex_for_dep = "{extension_name}",
+)
 
 def tf_py_test(name,
                srcs,
@@ -1352,10 +1392,13 @@ def tf_py_test(name,
                shard_count=1,
                additional_deps=[],
                flaky=0,
-               xla_enabled=False):
+               xla_enabled=False,
+               grpc_enabled=False):
   if xla_enabled:
     additional_deps = additional_deps + tf_additional_xla_deps_py()
-  native.py_test(
+  if grpc_enabled:
+    additional_deps = additional_deps + tf_additional_grpc_deps_py()
+  py_test(
       name=name,
       size=size,
       srcs=srcs,
@@ -1365,20 +1408,17 @@ def tf_py_test(name,
       visibility=[clean_dep("//tensorflow:internal")],
       shard_count=shard_count,
       data=data,
-      deps=select({
-          "//conditions:default": [
-              clean_dep("//tensorflow/python:extra_py_tests_deps"),
-              clean_dep("//tensorflow/python:gradient_checker"),
+      deps=[
+            clean_dep("//tensorflow/python:extra_py_tests_deps"),
+            clean_dep("//tensorflow/python:gradient_checker"),
           ] + additional_deps,
-          clean_dep("//tensorflow:no_tensorflow_py_deps"): []
-      }),
       flaky=flaky,
       srcs_version="PY2AND3")
 
 register_extension_info(
-    extension_name="tf_py_test",
-    label_regex_map={"additional_deps": "deps:{extension_name}"})
-
+    extension_name = "tf_py_test",
+    label_regex_map = {"additional_deps": "deps:{extension_name}"},
+)
 
 def cuda_py_test(name,
                  srcs,
@@ -1390,7 +1430,8 @@ def cuda_py_test(name,
                  additional_deps=[],
                  tags=[],
                  flaky=0,
-                 xla_enabled=False):
+                 xla_enabled=False,
+                 grpc_enabled=False):
   test_tags = tags + tf_cuda_tests_tags()
   tf_py_test(
       name=name,
@@ -1403,12 +1444,13 @@ def cuda_py_test(name,
       shard_count=shard_count,
       additional_deps=additional_deps,
       flaky=flaky,
-      xla_enabled=xla_enabled)
+      xla_enabled=xla_enabled,
+      grpc_enabled=grpc_enabled)
 
 register_extension_info(
-    extension_name="cuda_py_test",
-    label_regex_map={"additional_deps": "additional_deps:{extension_name}"})
-
+    extension_name = "cuda_py_test",
+    label_regex_map = {"additional_deps": "additional_deps:{extension_name}"},
+)
 
 def sycl_py_test(name,
                  srcs,
@@ -1420,7 +1462,8 @@ def sycl_py_test(name,
                  additional_deps=[],
                  tags=[],
                  flaky=0,
-                 xla_enabled=False):
+                 xla_enabled=False,
+                 grpc_enabled=False):
   test_tags = tags + tf_sycl_tests_tags()
   tf_py_test(
       name=name,
@@ -1433,12 +1476,13 @@ def sycl_py_test(name,
       shard_count=shard_count,
       additional_deps=additional_deps,
       flaky=flaky,
-      xla_enabled=xla_enabled)
+      xla_enabled=xla_enabled,
+      grpc_enabled=grpc_enabled)
 
 register_extension_info(
-    extension_name="sycl_py_test",
-    label_regex_map={"additional_deps": "additional_deps:{extension_name}"})
-
+    extension_name = "sycl_py_test",
+    label_regex_map = {"additional_deps": "additional_deps:{extension_name}"},
+)
 
 def py_tests(name,
              srcs,
@@ -1448,7 +1492,8 @@ def py_tests(name,
              tags=[],
              shard_count=1,
              prefix="",
-             xla_enabled=False):
+             xla_enabled=False,
+             grpc_enabled=False):
   for src in srcs:
     test_name = src.split("/")[-1].split(".")[0]
     if prefix:
@@ -1462,8 +1507,8 @@ def py_tests(name,
         shard_count=shard_count,
         data=data,
         additional_deps=additional_deps,
-        xla_enabled=xla_enabled)
-
+        xla_enabled=xla_enabled,
+        grpc_enabled=grpc_enabled)
 
 def cuda_py_tests(name,
                   srcs,
@@ -1473,7 +1518,8 @@ def cuda_py_tests(name,
                   shard_count=1,
                   tags=[],
                   prefix="",
-                  xla_enabled=False):
+                  xla_enabled=False,
+                  grpc_enabled=False):
   test_tags = tags + tf_cuda_tests_tags()
   py_tests(
       name=name,
@@ -1484,8 +1530,8 @@ def cuda_py_tests(name,
       tags=test_tags,
       shard_count=shard_count,
       prefix=prefix,
-      xla_enabled=xla_enabled)
-
+      xla_enabled=xla_enabled,
+      grpc_enabled=grpc_enabled)
 
 # Creates a genrule named <name> for running tools/proto_text's generator to
 # make the proto_text functions, for the protos passed in <srcs>.
@@ -1509,19 +1555,17 @@ def tf_generate_proto_text_sources(name, srcs_relative_dir, srcs):
       ],)
   return struct(hdrs=out_hdrs, srcs=out_srcs)
 
-
 def tf_genrule_cmd_append_to_srcs(to_append):
   return ("cat $(SRCS) > $(@) && " + "echo >> $(@) && " + "echo " + to_append +
           " >> $(@)")
 
-
 def tf_version_info_genrule():
   native.genrule(
       name="version_info_gen",
       srcs=[
-          clean_dep("//tensorflow/tools/git:gen/spec.json"),
-          clean_dep("//tensorflow/tools/git:gen/head"),
-          clean_dep("//tensorflow/tools/git:gen/branch_ref"),
+          clean_dep("@local_config_git//:gen/spec.json"),
+          clean_dep("@local_config_git//:gen/head"),
+          clean_dep("@local_config_git//:gen/branch_ref"),
       ],
       outs=["util/version_info.cc"],
       cmd=
@@ -1529,7 +1573,6 @@ def tf_version_info_genrule():
       local=1,
       tools=[clean_dep("//tensorflow/tools/git:gen_git_source.py")],)
 
-
 def tf_py_build_info_genrule():
   native.genrule(
       name="py_build_info_gen",
@@ -1539,14 +1582,15 @@ def tf_py_build_info_genrule():
       local=1,
       tools=[clean_dep("//tensorflow/tools/build_info:gen_build_info.py")],)
 
-
 def cc_library_with_android_deps(deps,
                                  android_deps=[],
                                  common_deps=[],
+                                 copts=tf_copts(),
                                  **kwargs):
   deps = if_not_android(deps) + if_android(android_deps) + common_deps
-  native.cc_library(deps=deps, **kwargs)
+  native.cc_library(deps=deps, copts=copts, **kwargs)
 
 register_extension_info(
-    extension_name="cc_library_with_android_deps",
-    label_regex_for_dep="{extension_name}")
+    extension_name = "cc_library_with_android_deps",
+    label_regex_for_dep = "{extension_name}",
+)
diff --git a/tensorflow/tf_exported_symbols.lds b/tensorflow/tf_exported_symbols.lds
index bddb87f00cb5fd1ede2cb9d5cc4079d6e66f7896..3ff824e5e1707c65b5ad3cc22dd32267953964c6 100644
--- a/tensorflow/tf_exported_symbols.lds
+++ b/tensorflow/tf_exported_symbols.lds
@@ -4,3 +4,4 @@
 *TF_*
 *TFE_*
 *nsync_*
+*pywrap_xla*
diff --git a/tensorflow/tf_version_script.lds b/tensorflow/tf_version_script.lds
index 11f66c5c8b27f412b2023d6f3036c56d3d1e530c..6b28943f01cfdb174fd135c670a6bb409ee0e102 100644
--- a/tensorflow/tf_version_script.lds
+++ b/tensorflow/tf_version_script.lds
@@ -5,6 +5,7 @@ tensorflow {
     *TF_*;
     *TFE_*;
     *nsync_*;
+    *pywrap_xla*;
   local:
     *;
 };
diff --git a/tensorflow/tools/api/generator/BUILD b/tensorflow/tools/api/generator/BUILD
index 3896a21b99f4756239a7ae9f3db9593504845aea..fa0f9b59aa938168cb3d318797c797eeabc9c7d9 100644
--- a/tensorflow/tools/api/generator/BUILD
+++ b/tensorflow/tools/api/generator/BUILD
@@ -41,7 +41,17 @@ genrule(
     # every module exported using tf_export. For e.g. if an op is decorated with
     # @tf_export('module1.module2', 'module3'). Then, outs should include
     # api/module1/module2/__init__.py and api/module3/__init__.py.
-    outs = ["api/__init__.py"],
+    outs = [
+        "api/__init__.py",
+        "api/bitwise/__init__.py",
+        "api/contrib/__init__.py",
+        "api/contrib/stat_summarizer/__init__.py",
+        "api/image/__init__.py",
+        "api/linalg/__init__.py",
+        "api/nn/__init__.py",
+        "api/spectral/__init__.py",
+        "api/train/__init__.py",
+    ],
     cmd = "$(location create_python_api) $(OUTS)",
     tools = ["create_python_api"],
 )
diff --git a/tensorflow/tools/api/generator/create_python_api.py b/tensorflow/tools/api/generator/create_python_api.py
index 5f1286aaf6c913cd299ebbfb65949ace0f593417..aab856b723cf2686e8fc9feb156b9be28470fc98 100644
--- a/tensorflow/tools/api/generator/create_python_api.py
+++ b/tensorflow/tools/api/generator/create_python_api.py
@@ -107,7 +107,8 @@ def get_api_imports():
   # Import all required modules in their parent modules.
   # For e.g. if we import 'tf.foo.bar.Value'. Then, we also
   # import 'bar' in 'tf.foo'.
-  for dest_module in module_imports.keys():
+  dest_modules = set(module_imports.keys())
+  for dest_module in dest_modules:
     dest_module_split = dest_module.split('.')
     for dest_submodule_index in range(1, len(dest_module_split)):
       dest_submodule = '.'.join(dest_module_split[:dest_submodule_index])
diff --git a/tensorflow/tools/api/golden/tensorflow.-g-p-u-options.pbtxt b/tensorflow/tools/api/golden/tensorflow.-g-p-u-options.pbtxt
index 30f7e4e11655797fbd8f0ea65c2eb84768ca486b..875d802a9c458e299f73c130bb2b37c5d8828aad 100644
--- a/tensorflow/tools/api/golden/tensorflow.-g-p-u-options.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.-g-p-u-options.pbtxt
@@ -18,6 +18,14 @@ tf_class {
     name: "DESCRIPTOR"
     mtype: "<type \'google.protobuf.pyext._message.MessageDescriptor\'>"
   }
+  member {
+    name: "EXPERIMENTAL_FIELD_NUMBER"
+    mtype: "<type \'int\'>"
+  }
+  member {
+    name: "Experimental"
+    mtype: "<class \'google.protobuf.pyext.cpp_message.GeneratedProtocolMessageType\'>"
+  }
   member {
     name: "Extensions"
     mtype: "<type \'getset_descriptor\'>"
diff --git a/tensorflow/tools/api/golden/tensorflow.-meta-graph-def.-meta-info-def.pbtxt b/tensorflow/tools/api/golden/tensorflow.-meta-graph-def.-meta-info-def.pbtxt
index ebf49f434ae468311a07374cdca1140336983a81..b0e983115499c5b5b79459affc931600ad16256b 100644
--- a/tensorflow/tools/api/golden/tensorflow.-meta-graph-def.-meta-info-def.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.-meta-graph-def.-meta-info-def.pbtxt
@@ -18,6 +18,10 @@ tf_class {
     name: "META_GRAPH_VERSION_FIELD_NUMBER"
     mtype: "<type \'int\'>"
   }
+  member {
+    name: "STRIPPED_DEFAULT_ATTRS_FIELD_NUMBER"
+    mtype: "<type \'int\'>"
+  }
   member {
     name: "STRIPPED_OP_LIST_FIELD_NUMBER"
     mtype: "<type \'int\'>"
diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.-bernoulli.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.-bernoulli.pbtxt
index cfe09345acccc410ad3041a965901134440e3c77..ca96f4eaece0020235d24901f51306a65676c1c9 100644
--- a/tensorflow/tools/api/golden/tensorflow.distributions.-bernoulli.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.distributions.-bernoulli.pbtxt
@@ -64,6 +64,10 @@ tf_class {
     name: "covariance"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'covariance\'], "
   }
+  member_method {
+    name: "cross_entropy"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'cross_entropy\'], "
+  }
   member_method {
     name: "entropy"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'entropy\'], "
@@ -80,6 +84,10 @@ tf_class {
     name: "is_scalar_event"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'is_scalar_event\'], "
   }
+  member_method {
+    name: "kl_divergence"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'kl_divergence\'], "
+  }
   member_method {
     name: "log_cdf"
     argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'log_cdf\'], "
diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.-beta.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.-beta.pbtxt
index 2e6578bae1604f69e4697bb4668dd69d94bd68b5..d0508acd9f4f6c190b205301223599cf5b027955 100644
--- a/tensorflow/tools/api/golden/tensorflow.distributions.-beta.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.distributions.-beta.pbtxt
@@ -68,6 +68,10 @@ tf_class {
     name: "covariance"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'covariance\'], "
   }
+  member_method {
+    name: "cross_entropy"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'cross_entropy\'], "
+  }
   member_method {
     name: "entropy"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'entropy\'], "
@@ -84,6 +88,10 @@ tf_class {
     name: "is_scalar_event"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'is_scalar_event\'], "
   }
+  member_method {
+    name: "kl_divergence"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'kl_divergence\'], "
+  }
   member_method {
     name: "log_cdf"
     argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'log_cdf\'], "
diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.-categorical.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.-categorical.pbtxt
index d42b0e82e4fab3e30d3ebf1b8bea8b44bb61ea0f..ff0fbb56cd4b9e4c288a168a7c3d9e83c552b0e2 100644
--- a/tensorflow/tools/api/golden/tensorflow.distributions.-categorical.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.distributions.-categorical.pbtxt
@@ -68,6 +68,10 @@ tf_class {
     name: "covariance"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'covariance\'], "
   }
+  member_method {
+    name: "cross_entropy"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'cross_entropy\'], "
+  }
   member_method {
     name: "entropy"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'entropy\'], "
@@ -84,6 +88,10 @@ tf_class {
     name: "is_scalar_event"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'is_scalar_event\'], "
   }
+  member_method {
+    name: "kl_divergence"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'kl_divergence\'], "
+  }
   member_method {
     name: "log_cdf"
     argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'log_cdf\'], "
diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.-dirichlet-multinomial.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.-dirichlet-multinomial.pbtxt
index 710164743e851f0bb5c31ebe78b260b623e87378..d75e4a2f88b29ff7f638d72f98876a230b191dce 100644
--- a/tensorflow/tools/api/golden/tensorflow.distributions.-dirichlet-multinomial.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.distributions.-dirichlet-multinomial.pbtxt
@@ -68,6 +68,10 @@ tf_class {
     name: "covariance"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'covariance\'], "
   }
+  member_method {
+    name: "cross_entropy"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'cross_entropy\'], "
+  }
   member_method {
     name: "entropy"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'entropy\'], "
@@ -84,6 +88,10 @@ tf_class {
     name: "is_scalar_event"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'is_scalar_event\'], "
   }
+  member_method {
+    name: "kl_divergence"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'kl_divergence\'], "
+  }
   member_method {
     name: "log_cdf"
     argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'log_cdf\'], "
diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.-dirichlet.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.-dirichlet.pbtxt
index 6cc361672ed8da313e1bebc41fbf093e019d38ad..b838b9ae21decba0323211f08d09fe373ababf23 100644
--- a/tensorflow/tools/api/golden/tensorflow.distributions.-dirichlet.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.distributions.-dirichlet.pbtxt
@@ -64,6 +64,10 @@ tf_class {
     name: "covariance"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'covariance\'], "
   }
+  member_method {
+    name: "cross_entropy"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'cross_entropy\'], "
+  }
   member_method {
     name: "entropy"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'entropy\'], "
@@ -80,6 +84,10 @@ tf_class {
     name: "is_scalar_event"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'is_scalar_event\'], "
   }
+  member_method {
+    name: "kl_divergence"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'kl_divergence\'], "
+  }
   member_method {
     name: "log_cdf"
     argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'log_cdf\'], "
diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.-distribution.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.-distribution.pbtxt
index 40ad07d1be4bdea9585eb276debb1fdf3dfff583..6f06b7d50dd9f5f405673d572503ff549f148f33 100644
--- a/tensorflow/tools/api/golden/tensorflow.distributions.-distribution.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.distributions.-distribution.pbtxt
@@ -55,6 +55,10 @@ tf_class {
     name: "covariance"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'covariance\'], "
   }
+  member_method {
+    name: "cross_entropy"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'cross_entropy\'], "
+  }
   member_method {
     name: "entropy"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'entropy\'], "
@@ -71,6 +75,10 @@ tf_class {
     name: "is_scalar_event"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'is_scalar_event\'], "
   }
+  member_method {
+    name: "kl_divergence"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'kl_divergence\'], "
+  }
   member_method {
     name: "log_cdf"
     argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'log_cdf\'], "
diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.-exponential.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.-exponential.pbtxt
index 8f34d25fea873827997ecd9df10cf1b3bfd0e56b..d34f9cde5d4d4161883f6d1b4646f22f054d16ad 100644
--- a/tensorflow/tools/api/golden/tensorflow.distributions.-exponential.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.distributions.-exponential.pbtxt
@@ -65,6 +65,10 @@ tf_class {
     name: "covariance"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'covariance\'], "
   }
+  member_method {
+    name: "cross_entropy"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'cross_entropy\'], "
+  }
   member_method {
     name: "entropy"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'entropy\'], "
@@ -81,6 +85,10 @@ tf_class {
     name: "is_scalar_event"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'is_scalar_event\'], "
   }
+  member_method {
+    name: "kl_divergence"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'kl_divergence\'], "
+  }
   member_method {
     name: "log_cdf"
     argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'log_cdf\'], "
diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.-gamma.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.-gamma.pbtxt
index 0ae88fba3b4fd176641cc17c916181cc9a6a12c6..df268b8d99eb6bf22264ddb63231074413686efa 100644
--- a/tensorflow/tools/api/golden/tensorflow.distributions.-gamma.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.distributions.-gamma.pbtxt
@@ -64,6 +64,10 @@ tf_class {
     name: "covariance"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'covariance\'], "
   }
+  member_method {
+    name: "cross_entropy"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'cross_entropy\'], "
+  }
   member_method {
     name: "entropy"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'entropy\'], "
@@ -80,6 +84,10 @@ tf_class {
     name: "is_scalar_event"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'is_scalar_event\'], "
   }
+  member_method {
+    name: "kl_divergence"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'kl_divergence\'], "
+  }
   member_method {
     name: "log_cdf"
     argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'log_cdf\'], "
diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.-laplace.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.-laplace.pbtxt
index e7cd595e946cb91f162a2a1af8753e44cdfbc0e1..303dcb4ed3bf8416b822bb010c2e87e8ef03b7c9 100644
--- a/tensorflow/tools/api/golden/tensorflow.distributions.-laplace.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.distributions.-laplace.pbtxt
@@ -64,6 +64,10 @@ tf_class {
     name: "covariance"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'covariance\'], "
   }
+  member_method {
+    name: "cross_entropy"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'cross_entropy\'], "
+  }
   member_method {
     name: "entropy"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'entropy\'], "
@@ -80,6 +84,10 @@ tf_class {
     name: "is_scalar_event"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'is_scalar_event\'], "
   }
+  member_method {
+    name: "kl_divergence"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'kl_divergence\'], "
+  }
   member_method {
     name: "log_cdf"
     argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'log_cdf\'], "
diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.-multinomial.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.-multinomial.pbtxt
index 7a4a16ff836a485e65cb6e061e27b92907cb4a63..ecda8acb15c49c390eaae203a0082e78e53499bd 100644
--- a/tensorflow/tools/api/golden/tensorflow.distributions.-multinomial.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.distributions.-multinomial.pbtxt
@@ -68,6 +68,10 @@ tf_class {
     name: "covariance"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'covariance\'], "
   }
+  member_method {
+    name: "cross_entropy"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'cross_entropy\'], "
+  }
   member_method {
     name: "entropy"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'entropy\'], "
@@ -84,6 +88,10 @@ tf_class {
     name: "is_scalar_event"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'is_scalar_event\'], "
   }
+  member_method {
+    name: "kl_divergence"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'kl_divergence\'], "
+  }
   member_method {
     name: "log_cdf"
     argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'log_cdf\'], "
diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.-normal.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.-normal.pbtxt
index 14c8c34cc2d8efacec706bdb894d9f069d5e7033..92b9eeea223b488cda1ebcabd31ec808e78fcf70 100644
--- a/tensorflow/tools/api/golden/tensorflow.distributions.-normal.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.distributions.-normal.pbtxt
@@ -64,6 +64,10 @@ tf_class {
     name: "covariance"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'covariance\'], "
   }
+  member_method {
+    name: "cross_entropy"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'cross_entropy\'], "
+  }
   member_method {
     name: "entropy"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'entropy\'], "
@@ -80,6 +84,10 @@ tf_class {
     name: "is_scalar_event"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'is_scalar_event\'], "
   }
+  member_method {
+    name: "kl_divergence"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'kl_divergence\'], "
+  }
   member_method {
     name: "log_cdf"
     argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'log_cdf\'], "
diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.-student-t.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.-student-t.pbtxt
index 30db6d3f35c1c8ea7bbc376a20093302dd373bd9..9aa7f9a63465c78f79ae4a8a11bc63d92d027dab 100644
--- a/tensorflow/tools/api/golden/tensorflow.distributions.-student-t.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.distributions.-student-t.pbtxt
@@ -68,6 +68,10 @@ tf_class {
     name: "covariance"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'covariance\'], "
   }
+  member_method {
+    name: "cross_entropy"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'cross_entropy\'], "
+  }
   member_method {
     name: "entropy"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'entropy\'], "
@@ -84,6 +88,10 @@ tf_class {
     name: "is_scalar_event"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'is_scalar_event\'], "
   }
+  member_method {
+    name: "kl_divergence"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'kl_divergence\'], "
+  }
   member_method {
     name: "log_cdf"
     argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'log_cdf\'], "
diff --git a/tensorflow/tools/api/golden/tensorflow.distributions.-uniform.pbtxt b/tensorflow/tools/api/golden/tensorflow.distributions.-uniform.pbtxt
index 46cbdf225f68e879fd18ef4a07048746a9a71b08..d1b9d3069629c552d6c6048642934f422a13dce7 100644
--- a/tensorflow/tools/api/golden/tensorflow.distributions.-uniform.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.distributions.-uniform.pbtxt
@@ -64,6 +64,10 @@ tf_class {
     name: "covariance"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'covariance\'], "
   }
+  member_method {
+    name: "cross_entropy"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'cross_entropy\'], "
+  }
   member_method {
     name: "entropy"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'entropy\'], "
@@ -80,6 +84,10 @@ tf_class {
     name: "is_scalar_event"
     argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'is_scalar_event\'], "
   }
+  member_method {
+    name: "kl_divergence"
+    argspec: "args=[\'self\', \'other\', \'name\'], varargs=None, keywords=None, defaults=[\'kl_divergence\'], "
+  }
   member_method {
     name: "log_cdf"
     argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'log_cdf\'], "
diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-classifier.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-classifier.pbtxt
index f5ed263f0e20d6fdf7f23a3a2ab06029084d20e4..ab697b1b95b15e3ac7974e7092f1d5934b088bb6 100644
--- a/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-classifier.pbtxt
@@ -29,7 +29,7 @@ tf_class {
   }
   member_method {
     name: "export_savedmodel"
-    argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
+    argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\', \'strip_default_attrs\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'False\'], "
   }
   member_method {
     name: "get_variable_names"
diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-regressor.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-regressor.pbtxt
index 61a29942c577a056e94dfe661fa5fec952b4f634..b73f6433e226f6b570b68c6a419c53d5c808d9d6 100644
--- a/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-regressor.pbtxt
@@ -29,7 +29,7 @@ tf_class {
   }
   member_method {
     name: "export_savedmodel"
-    argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
+    argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\', \'strip_default_attrs\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'False\'], "
   }
   member_method {
     name: "get_variable_names"
diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-classifier.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-classifier.pbtxt
index 16e3b246156792418109981cc85ce0b07854a62c..46d59570577d0e31f61687e445f24770c561764d 100644
--- a/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-classifier.pbtxt
@@ -21,7 +21,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'hidden_units\', \'feature_columns\', \'model_dir\', \'n_classes\', \'weight_column\', \'label_vocabulary\', \'optimizer\', \'activation_fn\', \'dropout\', \'input_layer_partitioner\', \'config\'], varargs=None, keywords=None, defaults=[\'None\', \'2\', \'None\', \'None\', \'Adagrad\', \'<function relu instance>\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'hidden_units\', \'feature_columns\', \'model_dir\', \'n_classes\', \'weight_column\', \'label_vocabulary\', \'optimizer\', \'activation_fn\', \'dropout\', \'input_layer_partitioner\', \'config\', \'warm_start_from\'], varargs=None, keywords=None, defaults=[\'None\', \'2\', \'None\', \'None\', \'Adagrad\', \'<function relu instance>\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "evaluate"
@@ -29,7 +29,7 @@ tf_class {
   }
   member_method {
     name: "export_savedmodel"
-    argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
+    argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\', \'strip_default_attrs\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'False\'], "
   }
   member_method {
     name: "get_variable_names"
diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt
index c6765ae277983eee54d0d998d6ad85c065460653..439e87375ba09de0b42fb483588bb51bf80b0476 100644
--- a/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt
@@ -21,7 +21,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'model_dir\', \'linear_feature_columns\', \'linear_optimizer\', \'dnn_feature_columns\', \'dnn_optimizer\', \'dnn_hidden_units\', \'dnn_activation_fn\', \'dnn_dropout\', \'n_classes\', \'weight_column\', \'label_vocabulary\', \'input_layer_partitioner\', \'config\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'Ftrl\', \'None\', \'Adagrad\', \'None\', \'<function relu instance>\', \'None\', \'2\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'model_dir\', \'linear_feature_columns\', \'linear_optimizer\', \'dnn_feature_columns\', \'dnn_optimizer\', \'dnn_hidden_units\', \'dnn_activation_fn\', \'dnn_dropout\', \'n_classes\', \'weight_column\', \'label_vocabulary\', \'input_layer_partitioner\', \'config\', \'warm_start_from\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'Ftrl\', \'None\', \'Adagrad\', \'None\', \'<function relu instance>\', \'None\', \'2\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "evaluate"
@@ -29,7 +29,7 @@ tf_class {
   }
   member_method {
     name: "export_savedmodel"
-    argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
+    argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\', \'strip_default_attrs\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'False\'], "
   }
   member_method {
     name: "get_variable_names"
diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt
index e3a820db46e085d0aa61f76e2ffd6e32abbfd855..f79a8be3f69be7b19b0708023d440922e4cafdeb 100644
--- a/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt
@@ -21,7 +21,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'model_dir\', \'linear_feature_columns\', \'linear_optimizer\', \'dnn_feature_columns\', \'dnn_optimizer\', \'dnn_hidden_units\', \'dnn_activation_fn\', \'dnn_dropout\', \'label_dimension\', \'weight_column\', \'input_layer_partitioner\', \'config\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'Ftrl\', \'None\', \'Adagrad\', \'None\', \'<function relu instance>\', \'None\', \'1\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'model_dir\', \'linear_feature_columns\', \'linear_optimizer\', \'dnn_feature_columns\', \'dnn_optimizer\', \'dnn_hidden_units\', \'dnn_activation_fn\', \'dnn_dropout\', \'label_dimension\', \'weight_column\', \'input_layer_partitioner\', \'config\', \'warm_start_from\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'Ftrl\', \'None\', \'Adagrad\', \'None\', \'<function relu instance>\', \'None\', \'1\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "evaluate"
@@ -29,7 +29,7 @@ tf_class {
   }
   member_method {
     name: "export_savedmodel"
-    argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
+    argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\', \'strip_default_attrs\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'False\'], "
   }
   member_method {
     name: "get_variable_names"
diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-regressor.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-regressor.pbtxt
index a4c8cf667179ba9863251469195cb75f1a60560e..c466dcb4c23eb36e8313df23c68da8ee39104c7b 100644
--- a/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-regressor.pbtxt
@@ -21,7 +21,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'hidden_units\', \'feature_columns\', \'model_dir\', \'label_dimension\', \'weight_column\', \'optimizer\', \'activation_fn\', \'dropout\', \'input_layer_partitioner\', \'config\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'None\', \'Adagrad\', \'<function relu instance>\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'hidden_units\', \'feature_columns\', \'model_dir\', \'label_dimension\', \'weight_column\', \'optimizer\', \'activation_fn\', \'dropout\', \'input_layer_partitioner\', \'config\', \'warm_start_from\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'None\', \'Adagrad\', \'<function relu instance>\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "evaluate"
@@ -29,7 +29,7 @@ tf_class {
   }
   member_method {
     name: "export_savedmodel"
-    argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
+    argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\', \'strip_default_attrs\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'False\'], "
   }
   member_method {
     name: "get_variable_names"
diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-estimator.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-estimator.pbtxt
index 787952eced27532cbd8596e9aacb3ce5abd7fade..d0bf043754b60240c507fe34b21b0599b94b69e2 100644
--- a/tensorflow/tools/api/golden/tensorflow.estimator.-estimator.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.estimator.-estimator.pbtxt
@@ -28,7 +28,7 @@ tf_class {
   }
   member_method {
     name: "export_savedmodel"
-    argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
+    argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\', \'strip_default_attrs\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'False\'], "
   }
   member_method {
     name: "get_variable_names"
diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-linear-classifier.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-linear-classifier.pbtxt
index 99c03aa6297f4726970b83ad1f88924d320c5e33..cb9e95588dbec1b3ee367be9b61f6f3bc1f77725 100644
--- a/tensorflow/tools/api/golden/tensorflow.estimator.-linear-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.estimator.-linear-classifier.pbtxt
@@ -21,7 +21,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'feature_columns\', \'model_dir\', \'n_classes\', \'weight_column\', \'label_vocabulary\', \'optimizer\', \'config\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'2\', \'None\', \'None\', \'Ftrl\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'feature_columns\', \'model_dir\', \'n_classes\', \'weight_column\', \'label_vocabulary\', \'optimizer\', \'config\', \'partitioner\', \'warm_start_from\'], varargs=None, keywords=None, defaults=[\'None\', \'2\', \'None\', \'None\', \'Ftrl\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "evaluate"
@@ -29,7 +29,7 @@ tf_class {
   }
   member_method {
     name: "export_savedmodel"
-    argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
+    argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\', \'strip_default_attrs\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'False\'], "
   }
   member_method {
     name: "get_variable_names"
diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-linear-regressor.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-linear-regressor.pbtxt
index e2ab96d5b46d9cdebc558e756ca26158fddb3f26..637f19ba2614265e69147093f2f21f1f9393d244 100644
--- a/tensorflow/tools/api/golden/tensorflow.estimator.-linear-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.estimator.-linear-regressor.pbtxt
@@ -21,7 +21,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'feature_columns\', \'model_dir\', \'label_dimension\', \'weight_column\', \'optimizer\', \'config\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'None\', \'Ftrl\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'feature_columns\', \'model_dir\', \'label_dimension\', \'weight_column\', \'optimizer\', \'config\', \'partitioner\', \'warm_start_from\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'None\', \'Ftrl\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "evaluate"
@@ -29,7 +29,7 @@ tf_class {
   }
   member_method {
     name: "export_savedmodel"
-    argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
+    argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\', \'strip_default_attrs\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'False\'], "
   }
   member_method {
     name: "get_variable_names"
diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt
index d006ecb254724405bfec4000f063a93c41e77055..091b1be0c83480757445542acb97e139bd74ef03 100644
--- a/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt
@@ -10,6 +10,10 @@ tf_class {
     name: "evaluation_master"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "global_id_in_cluster"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "is_chief"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-vocab-info.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-vocab-info.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..a16e3aedae96e7289e73c49ac7890550dd5ddb08
--- /dev/null
+++ b/tensorflow/tools/api/golden/tensorflow.estimator.-vocab-info.pbtxt
@@ -0,0 +1,39 @@
+path: "tensorflow.estimator.VocabInfo"
+tf_class {
+  is_instance: "<class \'tensorflow.python.estimator.warm_starting_util.VocabInfo\'>"
+  is_instance: "<class \'tensorflow.python.estimator.warm_starting_util.VocabInfo\'>"
+  is_instance: "<type \'tuple\'>"
+  member {
+    name: "backup_initializer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "new_vocab"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "new_vocab_size"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "num_oov_buckets"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "old_vocab"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "old_vocab_size"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "count"
+  }
+  member_method {
+    name: "index"
+  }
+}
diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-warm-start-settings.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-warm-start-settings.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..afdd6bb058353594415cd1abe726070f84ae46b6
--- /dev/null
+++ b/tensorflow/tools/api/golden/tensorflow.estimator.-warm-start-settings.pbtxt
@@ -0,0 +1,31 @@
+path: "tensorflow.estimator.WarmStartSettings"
+tf_class {
+  is_instance: "<class \'tensorflow.python.estimator.warm_starting_util.WarmStartSettings\'>"
+  is_instance: "<class \'tensorflow.python.estimator.warm_starting_util.WarmStartSettings\'>"
+  is_instance: "<type \'tuple\'>"
+  member {
+    name: "ckpt_to_initialize_from"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "var_name_to_prev_var_name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "var_name_to_vocab_info"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "vars_to_warm_start"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "count"
+  }
+  member_method {
+    name: "index"
+  }
+}
diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.pbtxt
index cdc367b99e80104da988172bc25e76c679976b2d..a7a6cc1e49ddfe07569dff035e38931a0510addd 100644
--- a/tensorflow/tools/api/golden/tensorflow.estimator.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.estimator.pbtxt
@@ -68,6 +68,14 @@ tf_module {
     name: "TrainSpec"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "VocabInfo"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "WarmStartSettings"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "export"
     mtype: "<type \'module\'>"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt
index 07b8d900da5dbd9f2c9396ecaf06b9d22ef50a0b..7fe3e2db09c45f26283d0da01d313405a97d0e54 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt
@@ -146,6 +146,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
@@ -156,7 +160,7 @@ tf_class {
   }
   member_method {
     name: "evaluate_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=kwargs, defaults=[\'10\', \'1\', \'False\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'10\', \'1\', \'False\'], "
   }
   member_method {
     name: "fit"
@@ -164,7 +168,7 @@ tf_class {
   }
   member_method {
     name: "fit_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps_per_epoch\', \'epochs\', \'verbose\', \'callbacks\', \'validation_data\', \'validation_steps\', \'class_weight\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'shuffle\', \'initial_epoch\'], varargs=None, keywords=kwargs, defaults=[\'1\', \'1\', \'None\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\', \'True\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps_per_epoch\', \'epochs\', \'verbose\', \'callbacks\', \'validation_data\', \'validation_steps\', \'class_weight\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'shuffle\', \'initial_epoch\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'1\', \'1\', \'None\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\', \'True\', \'0\'], "
   }
   member_method {
     name: "from_config"
@@ -224,7 +228,7 @@ tf_class {
   }
   member_method {
     name: "predict_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=kwargs, defaults=[\'10\', \'1\', \'False\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
   }
   member_method {
     name: "predict_on_batch"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt
index 546bac44e4c9905d13c4f3b0e3d9c1b5cc6c5e59..0a6096813155d59eb1c7920f2bcd250ed9730982 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt
@@ -159,6 +159,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
@@ -169,7 +173,7 @@ tf_class {
   }
   member_method {
     name: "evaluate_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=kwargs, defaults=[\'10\', \'1\', \'False\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'10\', \'1\', \'False\'], "
   }
   member_method {
     name: "fit"
@@ -177,7 +181,7 @@ tf_class {
   }
   member_method {
     name: "fit_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps_per_epoch\', \'epochs\', \'verbose\', \'callbacks\', \'validation_data\', \'validation_steps\', \'class_weight\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'shuffle\', \'initial_epoch\'], varargs=None, keywords=kwargs, defaults=[\'1\', \'1\', \'None\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\', \'True\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps_per_epoch\', \'epochs\', \'verbose\', \'callbacks\', \'validation_data\', \'validation_steps\', \'class_weight\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'shuffle\', \'initial_epoch\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'1\', \'1\', \'None\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\', \'True\', \'0\'], "
   }
   member_method {
     name: "from_config"
@@ -245,7 +249,7 @@ tf_class {
   }
   member_method {
     name: "predict_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=kwargs, defaults=[\'10\', \'1\', \'False\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
   }
   member_method {
     name: "predict_on_batch"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-activation.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-activation.pbtxt
index 38e6128644529f012bdf1c9a7aa6656c1cef1ecd..f4ab075959906cdf350ec5d49dc86f928b7eb7ae 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-activation.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-activation.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-activity-regularization.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-activity-regularization.pbtxt
index 0fa60646612ab383a5022990c06b76571e269f05..eb558cddafc3972127786353072767f0d53bf174 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-activity-regularization.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-activity-regularization.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-add.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-add.pbtxt
index 75d56bf445847abfdc2b3e78d0ce5543aef152d9..a32151e22fab59e999c1e916e5c628d2e1b3f5ee 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-add.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-add.pbtxt
@@ -125,6 +125,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-alpha-dropout.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-alpha-dropout.pbtxt
index 6e52b6238d5b255f75d1105f2e895267117a2029..46b1713196fdd2470aefa6227dd19cdbf93185b9 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-alpha-dropout.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-alpha-dropout.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling1-d.pbtxt
index 0e16774e8614e9b7ec7d4e90e176ba25f1512257..d6c98fa225ce924bc8e20f8531516eaed4d32ffb 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling1-d.pbtxt
@@ -126,6 +126,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling2-d.pbtxt
index 98112762cf842519956af94ac8593c418e26c0d1..754fd310c6d8ddb994db0590342b29f8cb7abd71 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling2-d.pbtxt
@@ -126,6 +126,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling3-d.pbtxt
index 2e093c0359664e8553c1be2c3b2d930df2c3aebe..9b62880c7931d151fb98cc1dc3149dcbd4dd103d 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling3-d.pbtxt
@@ -126,6 +126,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average.pbtxt
index bada65e2f93cfa223b51d9ed3d44ab88cbad5a77..9bfaf2756284c7d287895e8d0b22d96ff1fa1627 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average.pbtxt
@@ -125,6 +125,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool1-d.pbtxt
index 120807c4b530c3fec508373bfc15131ffb532f72..3e2aba55fd63326bb0e232fdce06f32884db7a0a 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool1-d.pbtxt
@@ -126,6 +126,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool2-d.pbtxt
index 834365f0f70e8447a8b6ba62cffe95a3c2a17e51..fb37308cce0124538648c3837e1e802794d7f1ae 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool2-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool2-d.pbtxt
@@ -126,6 +126,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool3-d.pbtxt
index 462a52ec1ebd4ce7f4b5289b76242ae1f992c032..813470ffc7c87727eb0b958e54806f530399806a 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool3-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool3-d.pbtxt
@@ -126,6 +126,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-batch-normalization.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-batch-normalization.pbtxt
index b802b363d013f819824c849ded762ff08a32cede..e251ac18e511b58a49816126d9941b98e4f91088 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-batch-normalization.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-batch-normalization.pbtxt
@@ -125,6 +125,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt
index 5279b2ab17d1fd3e8ca8cc75e9f7866ddaf25fb5..2b8ac4f1f4857eb437bc3d67cd68989d3c6842f7 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt
@@ -129,6 +129,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-concatenate.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-concatenate.pbtxt
index b800eb9796b04f0ffdb24768130669cec8e5babe..c9a0b887258de2d6b5aa88280b1f7b0d3bf7f6e2 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-concatenate.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-concatenate.pbtxt
@@ -125,6 +125,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt
index 8c2b110c6d3d0a12bf8bfde9ac939f66d6f93419..b847e224d6baeb11135c51ee270f2daa2d52f8a4 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt
@@ -126,6 +126,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv1-d.pbtxt
index 47c63c11573e9fe20106f0a6a84a8940ae5f01e5..577f206e3510a9995d5d383ac440b4f68ea39fe5 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv1-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv1-d.pbtxt
@@ -126,6 +126,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d-transpose.pbtxt
index e90b90e8016d4a955c57010cbf387d359963dafa..72924c32b43e5edb39938cc0cd909cffefa61be1 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d-transpose.pbtxt
@@ -127,6 +127,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d.pbtxt
index aa571b722dea6511925c9bf7f10714f252b897e7..16be08d9b2bae8fe1faecf34c4d87ac9b9baf142 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d.pbtxt
@@ -126,6 +126,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
index 911c73f8462df78c1353c5803660bffca4e33694..d898c546278188ca84a94660d9dc0c7be03e0b24 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
@@ -126,6 +126,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d.pbtxt
index bb111b327c22d6fdb502c8454fe114ee427d2a77..72b72d6b3b1e410dda0b0a529449f0135203fc1b 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d.pbtxt
@@ -126,6 +126,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution1-d.pbtxt
index 5a5ec635cce36c4e4561f73700e73a3ae215c596..ee93247f63ed700dc6058041bd0ea4ff5c879078 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution1-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution1-d.pbtxt
@@ -126,6 +126,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt
index 190b670fa2a4b04e124c3d1f63e691dfbe8cdbbb..e5023287e5f38553f3553a37b5a908790072b5c7 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt
@@ -127,6 +127,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d.pbtxt
index a26ec82f2b96e69c445f05aa852a7b37ab67dbd7..ba38cb7121c9d312e7ba9d7147bdc67673d1ad2e 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d.pbtxt
@@ -126,6 +126,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
index 19b5bdf36befbcf8877fc28b54d9c712d83a74b2..a7001bbe34f899bdba6c49f7d2d1c7d9becc1313 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
@@ -126,6 +126,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d.pbtxt
index 773ef01feb8fe179ec34d3e392395afb79200b8f..98d52c430c659d0fc3e9299f7bede9190dad2fcf 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d.pbtxt
@@ -126,6 +126,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping1-d.pbtxt
index 3a67ac00ab193b3e7e72a105b6df2757c0164b74..33b6ebe1af731f66f88a9493502f69049ab34b42 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping1-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping1-d.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping2-d.pbtxt
index de5a695b69f5b6977546fbf6211b26973c47fda2..4b241ebb0f68c270a9448b02138d44f82211f418 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping2-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping2-d.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping3-d.pbtxt
index bf251b4df5dae7b9541679062ef5fa163e22bda5..1856a9ee21347ed6ca3dd592517eb644e205a5b7 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping3-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping3-d.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dense.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dense.pbtxt
index 92a74cec68090271a59dd44dd93c1fc4821afbdd..a8c37af31f649d28ca2ab7614178f2dee58c13fc 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dense.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dense.pbtxt
@@ -125,6 +125,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dot.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dot.pbtxt
index cdd62eee0d3a26303caaa0f643b9fedae81f91d7..86578d958e151b47b892b3ada0dbc745d32dbe59 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dot.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dot.pbtxt
@@ -125,6 +125,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dropout.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dropout.pbtxt
index 7935143b2cf4f84dfd7d81286dea96ca9f57ec6c..e2e21b5f123f63fa38cb0e344be9a12fc091f20b 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dropout.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dropout.pbtxt
@@ -125,6 +125,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-e-l-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-e-l-u.pbtxt
index 497eb004992e4256fec158e1eb50dbb0b915aeea..348012dcde3407dad74ea3f56842e3182098b632 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-e-l-u.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-e-l-u.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-embedding.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-embedding.pbtxt
index 35616cbebb388f1198aebdf0eeb5eaed76ea52e1..0419251083f63cbd57244e76f35aee74db434eab 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-embedding.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-embedding.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-flatten.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-flatten.pbtxt
index 427c6fde90334a39cd1e3bef96952c792a1d3955..73609752886c8c57a78f6bc02cc46d2c7ff6e996 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-flatten.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-flatten.pbtxt
@@ -125,6 +125,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt
index 763184899ca05c39b56e002f1e50ce07210c7409..337e85e812d8ef19e873dd49d39108ff3d452bbb 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt
index 889f2cbc2345e605035b71d69261e92c56aa645f..1357dc0f0d6455b18bef0dabe08639e0dee1ab49 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt
@@ -193,6 +193,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-dropout.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-dropout.pbtxt
index 1428691afe2a525cc46ddb4f1b73239cdb613b31..b71a08f6c3b5e62970ba90c1d27dde5a4067e3e6 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-dropout.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-dropout.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-noise.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-noise.pbtxt
index 655734cc432f7f18dfc5dc1f5f255650cb574a1a..a01a6067efb3ce217b603da5ea9c2c17c51c8ef7 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-noise.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-noise.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt
index d97f06ea137f1128ffb6e1ebfcc10e0160904387..e53e78a977b32eaf2e31867044aedd39ab2dd34f 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt
@@ -125,6 +125,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt
index 52886b2106aa3f99ded8a66a20f5cf6bec48b233..48fcd1044e06b2fe61aadb6c3675ce82197ff003 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt
@@ -125,6 +125,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt
index ccb6459357f8248cc760995de94f5ef305d8c64b..66c06ed47289eb2d83d97778a7b13dab821722d2 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt
@@ -125,6 +125,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt
index 1f25eb1cc64fcfe3489fef1c32f1b806ca74b478..4f2420f74ab3069952e4a44bf61e5e12b3e80ea3 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt
@@ -125,6 +125,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt
index a37d6dda28a653836bf4c495165f2aff05744298..7912a6d933b851521358e0246d04688da410b909 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt
@@ -125,6 +125,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt
index 9f276fd54714756d6db17921c7f4f139a8b05a8b..d5b2d2c274ad97071497045271c0a595f8e0e062 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt
@@ -125,6 +125,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool1-d.pbtxt
index eaa9b477d853c12e7b7dd183e09073a8116b24e6..d88ff17eb6df7bbba7d3af4344fc8ddc367ae44c 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool1-d.pbtxt
@@ -125,6 +125,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool2-d.pbtxt
index f4d37a5f63432dec1131bc7bce0cabd1af6e8db3..c8cc5a0ddfdd54cbb47de922591a9842abf63396 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool2-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool2-d.pbtxt
@@ -125,6 +125,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool3-d.pbtxt
index afddd2d4cbf7d1ba089fbc35a684664aded4e2a6..7956c5a340d963cfd5976e8af56da222848a164a 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool3-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool3-d.pbtxt
@@ -125,6 +125,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt
index 12cd49c9551c4520fd05592d7a3f456b3d328859..0a7e16413dfbd80d448eb1bad5771915475d96b2 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt
@@ -125,6 +125,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt
index 146241c172319211827d77482c634ae6218137b0..6c8a58a996f5313ea48e395e7e443a7c21f198ee 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt
@@ -125,6 +125,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt
index 00475301aa009385e7f23241864475f38bf00da8..7678ce8aab63fcfa76c0ac61346a723c1dfe1ee7 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt
@@ -125,6 +125,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-layer.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-layer.pbtxt
index 49841237cef52d3b16b498510f7c24744d57b4e9..d46fd41a3f33002a9bbe755851278c9729ccd1d1 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-layer.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-layer.pbtxt
@@ -125,6 +125,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt
index 4ce7c34f6c75c179442b6d7473281086115f4b64..0dbbdf283836e4121c925200749784abdeb0a5a8 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt
index e1a1d0d58ecbc9a5aa6e1bbde49d92aec9714f42..964ef89c2e2abdf8b6f7dc3893751f56dd380e90 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt
@@ -197,6 +197,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-lambda.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-lambda.pbtxt
index 59508c2f11073caca1f30544efaea435730ce228..ca0144929942f7024a4e8bac5552bf0547ceb56d 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-lambda.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-lambda.pbtxt
@@ -90,7 +90,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'function\', \'mask\', \'arguments\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\'], "
+    argspec: "args=[\'self\', \'function\', \'output_shape\', \'mask\', \'arguments\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "add_loss"
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-layer.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-layer.pbtxt
index ca904a2b8c77e55430e4f76ff4fa2be641c199a6..c52ad727545c0bf4f199714d71180eac3f1bf62a 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-layer.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-layer.pbtxt
@@ -123,6 +123,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-leaky-re-l-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-leaky-re-l-u.pbtxt
index f52fd02515f30f4011a154cda4274d7e7dd34a88..6a7b23c5409914396f2ce10fcb593a1ca8d65c9e 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-leaky-re-l-u.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-leaky-re-l-u.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected1-d.pbtxt
index b5c32d1cdf3dc7e35d9c78dd81431bb67aab1b27..324745e5a33de47ed91f1b5c037445ee01780ba3 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected1-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected1-d.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected2-d.pbtxt
index 0ac2b83a999b3c8245ce616ccf5d79833747aee3..e12ae0505440c31068f0ac132adfd675b93e0593 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected2-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected2-d.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-masking.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-masking.pbtxt
index de2a28d985d3f05c639a103b316f66f15d326f95..244e79b4ffe60ddd6aa56d2780d80dfd66c494a9 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-masking.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-masking.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool1-d.pbtxt
index 130d932fd6de0fde1052843cd9b10bf2a748441c..56cbf5df785ef0e2614ea7e9e6cfe1335e148eec 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool1-d.pbtxt
@@ -126,6 +126,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool2-d.pbtxt
index 82a6f6d539080436ef2e49b5a4b342a2dfff3ae2..33c2d30e86f9cdc3fb9f4f498bfc2c94497fe2dd 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool2-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool2-d.pbtxt
@@ -126,6 +126,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool3-d.pbtxt
index ca2fd4e502b6bb87c44363f91f9dcd26b386eb3a..94f91059b7a1e291c38fe0045accc6c03f226603 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool3-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool3-d.pbtxt
@@ -126,6 +126,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling1-d.pbtxt
index 885e30f8799fd7e156c9f048b59483ad00d41fba..247230a6d68b8ea93a30a2f5846d8baaa78cb13e 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling1-d.pbtxt
@@ -126,6 +126,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling2-d.pbtxt
index 102879d2f536cc6bfdb31558c36412b3d1e93885..8d61b67e7ce9564d31b0bd904a58540d19c89172 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling2-d.pbtxt
@@ -126,6 +126,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling3-d.pbtxt
index 424061614659249605569a571ff09adf52db3997..ad2e30802006e934730e5c75247e958329f7121c 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling3-d.pbtxt
@@ -126,6 +126,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-maximum.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-maximum.pbtxt
index 4b32c2e99f9a0e3abdb3f1bfe27137f6f5052491..9e889ca8637f759d495092c9bc6862005e5e8f23 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-maximum.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-maximum.pbtxt
@@ -125,6 +125,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-multiply.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-multiply.pbtxt
index 0c964235ae7d2c352c053b97f902bf2516263628..932680941d269660533e93077818c4884c6e28c4 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-multiply.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-multiply.pbtxt
@@ -125,6 +125,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-p-re-l-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-p-re-l-u.pbtxt
index 797a073b8a74bc482a555bb12806afe36d0df79e..db644f958f5d781c2dcc5bbbca52e3b656230510 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-p-re-l-u.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-p-re-l-u.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-permute.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-permute.pbtxt
index 7dc1fa6964eb0fcba47ae2db270152364e244eaa..2043e1a1263f0f0745b7c6446cc670fd6b0f0000 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-permute.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-permute.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt
index c7c9b10f22dfc9799217727e5020d6f45bb488f3..74fa1db02076f5a5cdc1feb412ea2ce5095e326d 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt
@@ -128,6 +128,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-repeat-vector.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-repeat-vector.pbtxt
index dedb48151a84f00f96db8942e08f5508cecfcbba..4b0e98520a0dd86c085fa7345af445e1ae253d3b 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-repeat-vector.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-repeat-vector.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-reshape.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-reshape.pbtxt
index bb30c0a945da7b6f869fa385eebbb8301851e8ae..34bc71af8a26ff6e4d7c81a3877751df5209906f 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-reshape.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-reshape.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv2-d.pbtxt
index f289664ba27063bcceb3b419e99e57066625cdbf..a6d9b57c8813acc85436cc08041159c17c252806 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv2-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv2-d.pbtxt
@@ -127,6 +127,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution2-d.pbtxt
index d78872861253f2f782a79e50e0f0a174464f388a..551d6953796fcc63b6d9b58fec5a45ef03f6dc2a 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution2-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution2-d.pbtxt
@@ -127,6 +127,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt
index 10c7f8867cbb979e4e7a724fae41babd81d0a1ea..3414810db44da6ff0e3f77b1a5db24329de7a88a 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt
index 588df21088fffb1ce207132a0cf043f103f71afc..cf34034ef0abf36c0e7ff18ee8adcc8aeaeae5eb 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt
@@ -185,6 +185,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt
index 9773c4acc750c59a810cc467a9239a397c62ec25..e4727072e375b9fc4dc99a1536eaaf3df5415369 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt
@@ -126,6 +126,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt
index d4de587a4801c56ca5903bdd1169b816d008765d..c5ff7043115ccdd3bc4a1147790b20feda410f65 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt
@@ -126,6 +126,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt
index af210fab8dc444bfb3b3f8fda0edb5121f6ad0ba..476a7f362cf88e234e964f6f6645ee4ed0cbaff8 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt
@@ -126,6 +126,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt
index 5779e41342214cc5ec60589d6c3879a79c4a639d..b76499658da58c178728246b3199391ca064fa3e 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt
@@ -128,6 +128,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt
index 8cfb33a14896b767deae34d4b76485729ef0122f..2376d815a6400034a51e3d17f98a030209356cf3 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt
index dedef65ff931618082a4a4d1fdc01e38043ce837..2a7059d9aa7ac12d8130c30622bc5f190562695c 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt
@@ -125,6 +125,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling1-d.pbtxt
index bb42cdcb65643190f1d634e2ad23447fb40c90ee..a81b83be49e0073f242efc6890e419b4fe172ab2 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling1-d.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling2-d.pbtxt
index 6d3c2ebfef8af42c288d7de6124e1ae326994c1d..5403279d45ec7b93bae7907b891c659a043e96d0 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling2-d.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling3-d.pbtxt
index d790cf2e08030d3b3f362a19474fd6d1d7833c65..96c337caf28d43fabd0b90df016f4e8ab0c408db 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling3-d.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt
index 313b3a9e155c11e46fd70f2fea0d8dec003d6667..58bffa087521517fe7f0b5dcd6cae0a8b39a4e25 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding1-d.pbtxt
index ba6c23ae75afae9177fa4f1fda34dc3f6d12939e..b81a4b1c50b22f13eacb521cfc8bc288bd40c81f 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding1-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding1-d.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding2-d.pbtxt
index cb587d67b0d99cf38823c2d74b833474ec4b5b10..1a26f2f3c9bbaa2aa567e76e1aafe14805ecff38 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding2-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding2-d.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding3-d.pbtxt
index 415720cbe11134f6b2426a2eab395566e65cbf8f..310277fe67433fd870ae3d907984f402576925b2 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding3-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding3-d.pbtxt
@@ -124,6 +124,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt
index 4e522813a5a3956b4888f95b2f14ecd52d897256..d239098b0b2bf37fea924ed52074385acf48de96 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt
@@ -146,6 +146,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
@@ -156,7 +160,7 @@ tf_class {
   }
   member_method {
     name: "evaluate_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=kwargs, defaults=[\'10\', \'1\', \'False\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'10\', \'1\', \'False\'], "
   }
   member_method {
     name: "fit"
@@ -164,7 +168,7 @@ tf_class {
   }
   member_method {
     name: "fit_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps_per_epoch\', \'epochs\', \'verbose\', \'callbacks\', \'validation_data\', \'validation_steps\', \'class_weight\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'shuffle\', \'initial_epoch\'], varargs=None, keywords=kwargs, defaults=[\'1\', \'1\', \'None\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\', \'True\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps_per_epoch\', \'epochs\', \'verbose\', \'callbacks\', \'validation_data\', \'validation_steps\', \'class_weight\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'shuffle\', \'initial_epoch\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'1\', \'1\', \'None\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\', \'True\', \'0\'], "
   }
   member_method {
     name: "from_config"
@@ -224,7 +228,7 @@ tf_class {
   }
   member_method {
     name: "predict_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=kwargs, defaults=[\'10\', \'1\', \'False\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
   }
   member_method {
     name: "predict_on_batch"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt
index ddbb358c84ca50fceb4fb71eddf0083f034f65e1..7c1bfcb22558ec3a64c63ebbf0466f9114ef68ee 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt
@@ -159,6 +159,10 @@ tf_class {
     name: "compute_mask"
     argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
@@ -169,7 +173,7 @@ tf_class {
   }
   member_method {
     name: "evaluate_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=kwargs, defaults=[\'10\', \'1\', \'False\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'10\', \'1\', \'False\'], "
   }
   member_method {
     name: "fit"
@@ -177,7 +181,7 @@ tf_class {
   }
   member_method {
     name: "fit_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps_per_epoch\', \'epochs\', \'verbose\', \'callbacks\', \'validation_data\', \'validation_steps\', \'class_weight\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'shuffle\', \'initial_epoch\'], varargs=None, keywords=kwargs, defaults=[\'1\', \'1\', \'None\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\', \'True\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps_per_epoch\', \'epochs\', \'verbose\', \'callbacks\', \'validation_data\', \'validation_steps\', \'class_weight\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'shuffle\', \'initial_epoch\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'1\', \'1\', \'None\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\', \'True\', \'0\'], "
   }
   member_method {
     name: "from_config"
@@ -245,7 +249,7 @@ tf_class {
   }
   member_method {
     name: "predict_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=kwargs, defaults=[\'10\', \'1\', \'False\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
   }
   member_method {
     name: "predict_on_batch"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-directory-iterator.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-directory-iterator.pbtxt
index 66cd37bb3a378ccd1bbdffd79f87338c9b4cf265..04174bff5f04fead68af68afeec80316867009a4 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-directory-iterator.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-directory-iterator.pbtxt
@@ -6,7 +6,7 @@ tf_class {
   is_instance: "<type \'object\'>"
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'directory\', \'image_data_generator\', \'target_size\', \'color_mode\', \'classes\', \'class_mode\', \'batch_size\', \'shuffle\', \'seed\', \'data_format\', \'save_to_dir\', \'save_prefix\', \'save_format\', \'follow_links\'], varargs=None, keywords=None, defaults=[\'(256, 256)\', \'rgb\', \'None\', \'categorical\', \'32\', \'True\', \'None\', \'None\', \'None\', \'\', \'png\', \'False\'], "
+    argspec: "args=[\'self\', \'directory\', \'image_data_generator\', \'target_size\', \'color_mode\', \'classes\', \'class_mode\', \'batch_size\', \'shuffle\', \'seed\', \'data_format\', \'save_to_dir\', \'save_prefix\', \'save_format\', \'follow_links\', \'interpolation\'], varargs=None, keywords=None, defaults=[\'(256, 256)\', \'rgb\', \'None\', \'categorical\', \'32\', \'True\', \'None\', \'None\', \'None\', \'\', \'png\', \'False\', \'nearest\'], "
   }
   member_method {
     name: "next"
diff --git a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-image-data-generator.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-image-data-generator.pbtxt
index 7e33285e7abbc10df7f697e10071e429c5183d9e..41f27d1f740457f4b7c4f74cb089a448a0fed845 100644
--- a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-image-data-generator.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-image-data-generator.pbtxt
@@ -16,7 +16,7 @@ tf_class {
   }
   member_method {
     name: "flow_from_directory"
-    argspec: "args=[\'self\', \'directory\', \'target_size\', \'color_mode\', \'classes\', \'class_mode\', \'batch_size\', \'shuffle\', \'seed\', \'save_to_dir\', \'save_prefix\', \'save_format\', \'follow_links\'], varargs=None, keywords=None, defaults=[\'(256, 256)\', \'rgb\', \'None\', \'categorical\', \'32\', \'True\', \'None\', \'None\', \'\', \'png\', \'False\'], "
+    argspec: "args=[\'self\', \'directory\', \'target_size\', \'color_mode\', \'classes\', \'class_mode\', \'batch_size\', \'shuffle\', \'seed\', \'save_to_dir\', \'save_prefix\', \'save_format\', \'follow_links\', \'interpolation\'], varargs=None, keywords=None, defaults=[\'(256, 256)\', \'rgb\', \'None\', \'categorical\', \'32\', \'True\', \'None\', \'None\', \'\', \'png\', \'False\', \'nearest\'], "
   }
   member_method {
     name: "random_transform"
diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling1-d.pbtxt
index 6e595ca34385d14f3ea7eb0da9a633f6f308f72f..de81206bc8b25046cd48c79ff8f154041c0e0cb0 100644
--- a/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling1-d.pbtxt
@@ -108,6 +108,10 @@ tf_class {
     name: "call"
     argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling2-d.pbtxt
index 7b6c30773b95984ee8438820a45bf2c607a912ff..72d5496464210efd9e423996dfb274dd9564f761 100644
--- a/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling2-d.pbtxt
@@ -108,6 +108,10 @@ tf_class {
     name: "call"
     argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling3-d.pbtxt
index 7a7664e80013557c922a1d399de16e32a78f60ff..595e77ff9f8b64b6606fb075f3edf2281b4c3c1f 100644
--- a/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling3-d.pbtxt
@@ -108,6 +108,10 @@ tf_class {
     name: "call"
     argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-batch-normalization.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-batch-normalization.pbtxt
index c9f5c18f25628d8b1d575113232da8a75d0e428c..0c4aa2ff2612269727026141574726ad6df5cdbd 100644
--- a/tensorflow/tools/api/golden/tensorflow.layers.-batch-normalization.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.layers.-batch-normalization.pbtxt
@@ -107,6 +107,10 @@ tf_class {
     name: "call"
     argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'False\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-conv1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-conv1-d.pbtxt
index 1fa00d7b2f9d3d34861e2030a98487d660e81305..5f576d0189309442dc4cea3d3617ab3144420165 100644
--- a/tensorflow/tools/api/golden/tensorflow.layers.-conv1-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.layers.-conv1-d.pbtxt
@@ -108,6 +108,10 @@ tf_class {
     name: "call"
     argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d-transpose.pbtxt
index a92a1094ac0c042e6ab9a2d153e8a06ab183d0ec..675a7c76e569d3163ecd2c547841b4c36078b21d 100644
--- a/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d-transpose.pbtxt
@@ -109,6 +109,10 @@ tf_class {
     name: "call"
     argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d.pbtxt
index 7fa78ab20b1260c1eb87293e200015c7b2895b19..eaabbf6aab172aea5c51f8071076890bb6b5bcf7 100644
--- a/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d.pbtxt
@@ -108,6 +108,10 @@ tf_class {
     name: "call"
     argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d-transpose.pbtxt
index e92e4859ae5b179f8b2a2328219aa6f16d740903..838e070d79d2d7cfbd631f1a5e9960412cfdae5a 100644
--- a/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d-transpose.pbtxt
@@ -109,6 +109,10 @@ tf_class {
     name: "call"
     argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d.pbtxt
index 87e5c2949e681e224efc94265559c31256082f1e..4bd8cfc1a48cd839e2ffa54d0d0ca863060406d8 100644
--- a/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d.pbtxt
@@ -108,6 +108,10 @@ tf_class {
     name: "call"
     argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-dense.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-dense.pbtxt
index cc4ee4c8a5ec6bc8f2395fedb8aed8d334342013..57eccb03ffeb90652b019b5ce8a519797e4a3a3d 100644
--- a/tensorflow/tools/api/golden/tensorflow.layers.-dense.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.layers.-dense.pbtxt
@@ -107,6 +107,10 @@ tf_class {
     name: "call"
     argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-dropout.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-dropout.pbtxt
index 99ab2ef97c73bbd305a3755b78e8174b643fe0a0..a1ec00eeeaa98a6199e29b187b0760ddc92db09d 100644
--- a/tensorflow/tools/api/golden/tensorflow.layers.-dropout.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.layers.-dropout.pbtxt
@@ -107,6 +107,10 @@ tf_class {
     name: "call"
     argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'False\'], "
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-flatten.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-flatten.pbtxt
index f4074c5a4f6b45896f49e295830c91f58b46c84a..a06943d51a52f1951056136445b0d5786d801b5b 100644
--- a/tensorflow/tools/api/golden/tensorflow.layers.-flatten.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.layers.-flatten.pbtxt
@@ -107,6 +107,10 @@ tf_class {
     name: "call"
     argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-layer.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-layer.pbtxt
index ec51609dee9bedb75566163e35225a1797d4cd5c..24fda0c87ed0aeabd0fd4a16bb2efab444f8cd8a 100644
--- a/tensorflow/tools/api/golden/tensorflow.layers.-layer.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.layers.-layer.pbtxt
@@ -106,6 +106,10 @@ tf_class {
     name: "call"
     argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling1-d.pbtxt
index 745c532e94beda1280e366bf592d347a5275ad11..4c3d00e0e1ddfe95c56f9ebc7c5d609c79dd44d4 100644
--- a/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling1-d.pbtxt
@@ -108,6 +108,10 @@ tf_class {
     name: "call"
     argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling2-d.pbtxt
index f8244c01b64105d0c4467c3f90ccec4e2d06adb4..f7e2017b0c9438130f1cfb2431eb73ca4d3103c5 100644
--- a/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling2-d.pbtxt
@@ -108,6 +108,10 @@ tf_class {
     name: "call"
     argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling3-d.pbtxt
index df5378f279bbeb254f4a9fee2724b07baee87203..84780926a38ff811a5ab35fadfac690a6dbbbbe2 100644
--- a/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling3-d.pbtxt
@@ -108,6 +108,10 @@ tf_class {
     name: "call"
     argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv2-d.pbtxt
index c55d2bccc9b10d142d82073013639121dc45ebf1..4d91ab1d8c9b5d2c8f7db5fd645b3c126eb609c2 100644
--- a/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv2-d.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv2-d.pbtxt
@@ -109,6 +109,10 @@ tf_class {
     name: "call"
     argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt
index 62e634afb87b9dcc02ab0ceaaa7bdff62f9bfefa..1d9c0c0f6d28dfb1a218586075bcb6820b1c62b1 100644
--- a/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt
@@ -88,6 +88,10 @@ tf_module {
     name: "logdet"
     argspec: "args=[\'matrix\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "logm"
+    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "lstsq"
     argspec: "args=[\'matrix\', \'rhs\', \'l2_regularizer\', \'fast\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'True\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/tensorflow.losses.-reduction.pbtxt b/tensorflow/tools/api/golden/tensorflow.losses.-reduction.pbtxt
index 4bdc73370bffb3c44945fc5c9e4fbafcdd72255e..258ad5047eb6e82eeb9c0941b0acf0573e5ca61d 100644
--- a/tensorflow/tools/api/golden/tensorflow.losses.-reduction.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.losses.-reduction.pbtxt
@@ -18,6 +18,14 @@ tf_class {
     name: "SUM_BY_NONZERO_WEIGHTS"
     mtype: "<type \'str\'>"
   }
+  member {
+    name: "SUM_OVER_BATCH_SIZE"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "SUM_OVER_NONZERO_WEIGHTS"
+    mtype: "<type \'str\'>"
+  }
   member_method {
     name: "__init__"
   }
diff --git a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt
index ebd9c079b543e79eb0d6cfa369394362e9a8825f..8ce022e4549712bb13dedcd66481a3ad2a2db0e5 100644
--- a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt
@@ -54,15 +54,15 @@ tf_module {
   }
   member_method {
     name: "conv2d"
-    argspec: "args=[\'input\', \'filter\', \'strides\', \'padding\', \'use_cudnn_on_gpu\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'NHWC\', \'None\'], "
+    argspec: "args=[\'input\', \'filter\', \'strides\', \'padding\', \'use_cudnn_on_gpu\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'NHWC\', \'[1, 1, 1, 1]\', \'None\'], "
   }
   member_method {
     name: "conv2d_backprop_filter"
-    argspec: "args=[\'input\', \'filter_sizes\', \'out_backprop\', \'strides\', \'padding\', \'use_cudnn_on_gpu\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'NHWC\', \'None\'], "
+    argspec: "args=[\'input\', \'filter_sizes\', \'out_backprop\', \'strides\', \'padding\', \'use_cudnn_on_gpu\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'NHWC\', \'[1, 1, 1, 1]\', \'None\'], "
   }
   member_method {
     name: "conv2d_backprop_input"
-    argspec: "args=[\'input_sizes\', \'filter\', \'out_backprop\', \'strides\', \'padding\', \'use_cudnn_on_gpu\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'NHWC\', \'None\'], "
+    argspec: "args=[\'input_sizes\', \'filter\', \'out_backprop\', \'strides\', \'padding\', \'use_cudnn_on_gpu\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'NHWC\', \'[1, 1, 1, 1]\', \'None\'], "
   }
   member_method {
     name: "conv2d_transpose"
@@ -70,11 +70,11 @@ tf_module {
   }
   member_method {
     name: "conv3d"
-    argspec: "args=[\'input\', \'filter\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NDHWC\', \'None\'], "
+    argspec: "args=[\'input\', \'filter\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'NDHWC\', \'[1, 1, 1, 1, 1]\', \'None\'], "
   }
   member_method {
     name: "conv3d_backprop_filter_v2"
-    argspec: "args=[\'input\', \'filter_sizes\', \'out_backprop\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NDHWC\', \'None\'], "
+    argspec: "args=[\'input\', \'filter_sizes\', \'out_backprop\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'NDHWC\', \'[1, 1, 1, 1, 1]\', \'None\'], "
   }
   member_method {
     name: "conv3d_transpose"
@@ -86,7 +86,7 @@ tf_module {
   }
   member_method {
     name: "crelu"
-    argspec: "args=[\'features\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'features\', \'name\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\', \'-1\'], "
   }
   member_method {
     name: "ctc_beam_search_decoder"
@@ -106,15 +106,15 @@ tf_module {
   }
   member_method {
     name: "depthwise_conv2d_native"
-    argspec: "args=[\'input\', \'filter\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'None\'], "
+    argspec: "args=[\'input\', \'filter\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'[1, 1, 1, 1]\', \'None\'], "
   }
   member_method {
     name: "depthwise_conv2d_native_backprop_filter"
-    argspec: "args=[\'input\', \'filter_sizes\', \'out_backprop\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'None\'], "
+    argspec: "args=[\'input\', \'filter_sizes\', \'out_backprop\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'[1, 1, 1, 1]\', \'None\'], "
   }
   member_method {
     name: "depthwise_conv2d_native_backprop_input"
-    argspec: "args=[\'input_sizes\', \'filter\', \'out_backprop\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'None\'], "
+    argspec: "args=[\'input_sizes\', \'filter\', \'out_backprop\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'[1, 1, 1, 1]\', \'None\'], "
   }
   member_method {
     name: "dilation2d"
@@ -234,7 +234,7 @@ tf_module {
   }
   member_method {
     name: "quantized_conv2d"
-    argspec: "args=[\'input\', \'filter\', \'min_input\', \'max_input\', \'min_filter\', \'max_filter\', \'strides\', \'padding\', \'out_type\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'qint32\'>\", \'None\'], "
+    argspec: "args=[\'input\', \'filter\', \'min_input\', \'max_input\', \'min_filter\', \'max_filter\', \'strides\', \'padding\', \'out_type\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'qint32\'>\", \'[1, 1, 1, 1]\', \'None\'], "
   }
   member_method {
     name: "quantized_max_pool"
diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt
index 49066eecaa0fda4a7a60c62b7a087d054bd73079..a2e728f94b41341b1a7c2a06d2c92d490f6eeb87 100644
--- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt
@@ -117,6 +117,10 @@ tf_class {
     name: "call"
     argspec: "args=[\'self\', \'inputs\', \'state\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt
index bf38f678b69269e0b0a99b7812a9a304d7aaec1d..4211faa1ec615da8938d9a858a19a9e9a76378cd 100644
--- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt
@@ -117,6 +117,10 @@ tf_class {
     name: "call"
     argspec: "args=[\'self\', \'inputs\', \'state\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt
index 81dcd90e81e9185f087892a5ebda0bb8460b0d8d..0d253e5dd233d6d2b6ad0070a463c283a8769dab 100644
--- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt
@@ -116,6 +116,10 @@ tf_class {
     name: "call"
     argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt
index 8ff225897ae26adb3723aaf729030771e26833a4..f61a5a28e3cd249a2cc2c84fc401cecc49a7945c 100644
--- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt
@@ -116,6 +116,10 @@ tf_class {
     name: "call"
     argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt
index ba15ffb792d81177040b078865134b0de7ca7a99..06fdc638c82b0d19b03857e33f083a94b7fd133b 100644
--- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt
@@ -117,6 +117,10 @@ tf_class {
     name: "call"
     argspec: "args=[\'self\', \'inputs\', \'state\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt
index 8d17153972cfd99072eee1db56728e67b98db0da..ef48cff0c329a7af5009d31fda429cf649c24261 100644
--- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt
@@ -117,6 +117,10 @@ tf_class {
     name: "call"
     argspec: "args=[\'self\', \'inputs\', \'state\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt
index 68c3064dd4f2f1453102cffd078e6a2e5356e0d5..9a6c73a079884b8ab92be1c9e89b2a9f34aad851 100644
--- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt
@@ -116,6 +116,10 @@ tf_class {
     name: "call"
     argspec: "args=[\'self\', \'inputs\', \'state\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt
index 86ff0fee2b369fb77bdcba6b19dc89f39a48642b..27488f8e73f20456fae911511ecd2e41a60da351 100644
--- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt
@@ -115,6 +115,10 @@ tf_class {
     name: "call"
     argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt
index 1a6f8a3b7dc1990b83f518ee1970ab36b2594fda..3310836ed26387718115c2454300b9edfe930451 100644
--- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt
@@ -116,6 +116,10 @@ tf_class {
     name: "call"
     argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
   }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "count_params"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt
index 0edd4153d772459d941cb260c26fd9e09f017f12..db1ed4218514ad51f28703c27598eada9464511e 100644
--- a/tensorflow/tools/api/golden/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.pbtxt
@@ -124,6 +124,10 @@ tf_module {
     name: "LogMessage"
     mtype: "<class \'google.protobuf.pyext.cpp_message.GeneratedProtocolMessageType\'>"
   }
+  member {
+    name: "MONOLITHIC_BUILD"
+    mtype: "<type \'int\'>"
+  }
   member {
     name: "MetaGraphDef"
     mtype: "<class \'google.protobuf.pyext.cpp_message.GeneratedProtocolMessageType\'>"
@@ -652,6 +656,10 @@ tf_module {
     name: "assert_less_equal"
     argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
   }
+  member_method {
+    name: "assert_near"
+    argspec: "args=[\'x\', \'y\', \'rtol\', \'atol\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "assert_negative"
     argspec: "args=[\'x\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
@@ -1140,6 +1148,10 @@ tf_module {
     name: "group"
     argspec: "args=[], varargs=inputs, keywords=kwargs, defaults=None"
   }
+  member_method {
+    name: "guarantee_const"
+    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "hessians"
     argspec: "args=[\'ys\', \'xs\', \'name\', \'colocate_gradients_with_ops\', \'gate_gradients\', \'aggregation_method\'], varargs=None, keywords=None, defaults=[\'hessians\', \'False\', \'False\', \'None\'], "
@@ -1148,6 +1160,10 @@ tf_module {
     name: "histogram_fixed_width"
     argspec: "args=[\'values\', \'value_range\', \'nbins\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'100\', \"<dtype: \'int32\'>\", \'None\'], "
   }
+  member_method {
+    name: "histogram_fixed_width_bins"
+    argspec: "args=[\'values\', \'value_range\', \'nbins\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'100\', \"<dtype: \'int32\'>\", \'None\'], "
+  }
   member_method {
     name: "identity"
     argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -1394,7 +1410,7 @@ tf_module {
   }
   member_method {
     name: "multinomial"
-    argspec: "args=[\'logits\', \'num_samples\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+    argspec: "args=[\'logits\', \'num_samples\', \'seed\', \'name\', \'output_dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "multiply"
@@ -1706,11 +1722,11 @@ tf_module {
   }
   member_method {
     name: "serialize_many_sparse"
-    argspec: "args=[\'sp_input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'sp_input\', \'name\', \'out_type\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'string\'>\"], "
   }
   member_method {
     name: "serialize_sparse"
-    argspec: "args=[\'sp_input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'sp_input\', \'name\', \'out_type\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'string\'>\"], "
   }
   member_method {
     name: "serialize_tensor"
@@ -1838,15 +1854,15 @@ tf_module {
   }
   member_method {
     name: "sparse_segment_mean"
-    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\', \'num_segments\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
   }
   member_method {
     name: "sparse_segment_sqrt_n"
-    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\', \'num_segments\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
   }
   member_method {
     name: "sparse_segment_sum"
-    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\', \'num_segments\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
   }
   member_method {
     name: "sparse_slice"
@@ -2062,7 +2078,7 @@ tf_module {
   }
   member_method {
     name: "while_loop"
-    argspec: "args=[\'cond\', \'body\', \'loop_vars\', \'shape_invariants\', \'parallel_iterations\', \'back_prop\', \'swap_memory\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'True\', \'False\', \'None\'], "
+    argspec: "args=[\'cond\', \'body\', \'loop_vars\', \'shape_invariants\', \'parallel_iterations\', \'back_prop\', \'swap_memory\', \'name\', \'maximum_iterations\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'True\', \'False\', \'None\', \'None\'], "
   }
   member_method {
     name: "write_file"
diff --git a/tensorflow/tools/api/golden/tensorflow.profiler.-profiler.pbtxt b/tensorflow/tools/api/golden/tensorflow.profiler.-profiler.pbtxt
index 0fb363aca48031e13487d716a0375973f93b3dc8..acb61dae9f0d184ba998aa820ec40de5bc38c3eb 100644
--- a/tensorflow/tools/api/golden/tensorflow.profiler.-profiler.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.profiler.-profiler.pbtxt
@@ -4,7 +4,7 @@ tf_class {
   is_instance: "<type \'object\'>"
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'graph\', \'op_log\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'graph\', \'op_log\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
   }
   member_method {
     name: "add_step"
@@ -30,4 +30,8 @@ tf_class {
     name: "profile_python"
     argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "serialize_to_string"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
 }
diff --git a/tensorflow/tools/api/golden/tensorflow.profiler.pbtxt b/tensorflow/tools/api/golden/tensorflow.profiler.pbtxt
index 26b25ee3d47241dbf351018f2aacbda12ff33492..7b4d3ac522abc4229c5623da25c4ec818d86f829 100644
--- a/tensorflow/tools/api/golden/tensorflow.profiler.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.profiler.pbtxt
@@ -26,11 +26,11 @@ tf_module {
   }
   member_method {
     name: "advise"
-    argspec: "args=[\'graph\', \'run_meta\', \'options\'], varargs=None, keywords=None, defaults=[\'None\', \'0\'], "
+    argspec: "args=[\'graph\', \'run_meta\', \'options\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'0\'], "
   }
   member_method {
     name: "profile"
-    argspec: "args=[\'graph\', \'run_meta\', \'op_log\', \'cmd\', \'options\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'scope\', \'0\'], "
+    argspec: "args=[\'graph\', \'run_meta\', \'op_log\', \'cmd\', \'options\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'scope\', \'0\'], "
   }
   member_method {
     name: "write_op_log"
diff --git a/tensorflow/tools/api/golden/tensorflow.saved_model.builder.-saved-model-builder.pbtxt b/tensorflow/tools/api/golden/tensorflow.saved_model.builder.-saved-model-builder.pbtxt
index 56d76902fd0fe72ced6c0267295d9a9dc822a745..ca8e5884b18110d4293225e595c030e9629b5663 100644
--- a/tensorflow/tools/api/golden/tensorflow.saved_model.builder.-saved-model-builder.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.saved_model.builder.-saved-model-builder.pbtxt
@@ -8,11 +8,11 @@ tf_class {
   }
   member_method {
     name: "add_meta_graph"
-    argspec: "args=[\'self\', \'tags\', \'signature_def_map\', \'assets_collection\', \'legacy_init_op\', \'clear_devices\', \'main_op\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'None\'], "
+    argspec: "args=[\'self\', \'tags\', \'signature_def_map\', \'assets_collection\', \'legacy_init_op\', \'clear_devices\', \'main_op\', \'strip_default_attrs\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'None\', \'False\'], "
   }
   member_method {
     name: "add_meta_graph_and_variables"
-    argspec: "args=[\'self\', \'sess\', \'tags\', \'signature_def_map\', \'assets_collection\', \'legacy_init_op\', \'clear_devices\', \'main_op\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'None\'], "
+    argspec: "args=[\'self\', \'sess\', \'tags\', \'signature_def_map\', \'assets_collection\', \'legacy_init_op\', \'clear_devices\', \'main_op\', \'strip_default_attrs\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'None\', \'False\'], "
   }
   member_method {
     name: "save"
diff --git a/tensorflow/tools/api/golden/tensorflow.saved_model.pbtxt b/tensorflow/tools/api/golden/tensorflow.saved_model.pbtxt
index 5683766b28975a3a17da3cdbfbaa4e8baab5f3ba..e1a0385092c1384bcb5958fce2e24693ee731ae5 100644
--- a/tensorflow/tools/api/golden/tensorflow.saved_model.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.saved_model.pbtxt
@@ -32,4 +32,8 @@ tf_module {
     name: "utils"
     mtype: "<type \'module\'>"
   }
+  member_method {
+    name: "simple_save"
+    argspec: "args=[\'session\', \'export_dir\', \'inputs\', \'outputs\', \'legacy_init_op\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
 }
diff --git a/tensorflow/tools/api/golden/tensorflow.train.-saver.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-saver.pbtxt
index 04c11712cd4c200bb2c04342e66924abf59c5f73..2cda458f468b2d748b43954b14b670df7145243f 100644
--- a/tensorflow/tools/api/golden/tensorflow.train.-saver.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.train.-saver.pbtxt
@@ -20,7 +20,7 @@ tf_class {
   }
   member_method {
     name: "export_meta_graph"
-    argspec: "args=[\'self\', \'filename\', \'collection_list\', \'as_text\', \'export_scope\', \'clear_devices\', \'clear_extraneous_savers\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\', \'None\', \'False\', \'False\'], "
+    argspec: "args=[\'self\', \'filename\', \'collection_list\', \'as_text\', \'export_scope\', \'clear_devices\', \'clear_extraneous_savers\', \'strip_default_attrs\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\', \'None\', \'False\', \'False\', \'False\'], "
   }
   member_method {
     name: "from_proto"
@@ -36,7 +36,7 @@ tf_class {
   }
   member_method {
     name: "save"
-    argspec: "args=[\'self\', \'sess\', \'save_path\', \'global_step\', \'latest_filename\', \'meta_graph_suffix\', \'write_meta_graph\', \'write_state\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'meta\', \'True\', \'True\'], "
+    argspec: "args=[\'self\', \'sess\', \'save_path\', \'global_step\', \'latest_filename\', \'meta_graph_suffix\', \'write_meta_graph\', \'write_state\', \'strip_default_attrs\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'meta\', \'True\', \'True\', \'False\'], "
   }
   member_method {
     name: "set_last_checkpoints"
diff --git a/tensorflow/tools/api/golden/tensorflow.train.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.pbtxt
index 3ffc6407306b4e44ec23052187b6f9376bba833c..e49c719a334455d1f8f39fa67332be8bb81f2bc2 100644
--- a/tensorflow/tools/api/golden/tensorflow.train.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.train.pbtxt
@@ -266,7 +266,11 @@ tf_module {
   }
   member_method {
     name: "cosine_decay"
-    argspec: "args=[\'learning_rate\', \'global_step\', \'decay_steps\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'learning_rate\', \'global_step\', \'decay_steps\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\'], "
+  }
+  member_method {
+    name: "cosine_decay_restarts"
+    argspec: "args=[\'learning_rate\', \'global_step\', \'first_decay_steps\', \'t_mul\', \'m_mul\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'2.0\', \'1.0\', \'0.0\', \'None\'], "
   }
   member_method {
     name: "create_global_step"
@@ -282,7 +286,7 @@ tf_module {
   }
   member_method {
     name: "export_meta_graph"
-    argspec: "args=[\'filename\', \'meta_info_def\', \'graph_def\', \'saver_def\', \'collection_list\', \'as_text\', \'graph\', \'export_scope\', \'clear_devices\', \'clear_extraneous_savers\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'False\', \'None\', \'None\', \'False\', \'False\'], "
+    argspec: "args=[\'filename\', \'meta_info_def\', \'graph_def\', \'saver_def\', \'collection_list\', \'as_text\', \'graph\', \'export_scope\', \'clear_devices\', \'clear_extraneous_savers\', \'strip_default_attrs\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'False\', \'None\', \'None\', \'False\', \'False\', \'False\'], "
   }
   member_method {
     name: "generate_checkpoint_state_proto"
diff --git a/tensorflow/tools/api/golden/tensorflow.variable_scope.pbtxt b/tensorflow/tools/api/golden/tensorflow.variable_scope.pbtxt
index de1ad7e860a616f6737cd451b9c7d90d1ab079c9..e62dec93e6f06a10f48d72b0cda74426887806fb 100644
--- a/tensorflow/tools/api/golden/tensorflow.variable_scope.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.variable_scope.pbtxt
@@ -4,6 +4,6 @@ tf_class {
   is_instance: "<type \'object\'>"
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'name_or_scope\', \'default_name\', \'values\', \'initializer\', \'regularizer\', \'caching_device\', \'partitioner\', \'custom_getter\', \'reuse\', \'dtype\', \'use_resource\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'name_or_scope\', \'default_name\', \'values\', \'initializer\', \'regularizer\', \'caching_device\', \'partitioner\', \'custom_getter\', \'reuse\', \'dtype\', \'use_resource\', \'constraint\', \'auxiliary_name_scope\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'True\'], "
   }
 }
diff --git a/tensorflow/tools/api/tests/BUILD b/tensorflow/tools/api/tests/BUILD
index f80dd6fe5b6a70b4198fff8da7b457645452b3e1..8fb6b1cdfd8981e427062e186f6ac26b24231b8b 100644
--- a/tensorflow/tools/api/tests/BUILD
+++ b/tensorflow/tools/api/tests/BUILD
@@ -18,8 +18,8 @@ py_test(
     srcs = ["api_compatibility_test.py"],
     data = [
         ":convert_from_multiline",
-        "//tensorflow/core:base_api_def",
-        "//tensorflow/core:python_api_def",
+        "//tensorflow/core/api_def:base_api_def",
+        "//tensorflow/core/api_def:python_api_def",
         "//tensorflow/python:hidden_ops",
         "//tensorflow/tools/api/golden:api_golden",
         "//tensorflow/tools/api/tests:API_UPDATE_WARNING.txt",
diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py
index a8fdf4c9a07a21269920c61d7f560562dab7b5f4..afcbf50944cc47b3ae3086b17279f2ce2fdc6ee7 100644
--- a/tensorflow/tools/api/tests/api_compatibility_test.py
+++ b/tensorflow/tools/api/tests/api_compatibility_test.py
@@ -248,14 +248,15 @@ class ApiCompatibilityTest(test.TestCase):
       logging.info('No differences found between API and golden.')
 
   @unittest.skipUnless(
-      sys.version_info.major == 2 and os.uname()[0] == 'Linux',
-      'API compabitility test goldens are generated using python2 on Linux.')
+      sys.version_info.major == 2,
+      'API compabitility test goldens are generated using python2.')
   def testAPIBackwardsCompatibility(self):
     # Extract all API stuff.
     visitor = python_object_to_proto_visitor.PythonObjectToProtoVisitor()
 
     public_api_visitor = public_api.PublicAPIVisitor(visitor)
     public_api_visitor.do_not_descend_map['tf'].append('contrib')
+    public_api_visitor.do_not_descend_map['tf.GPUOptions'] = ['Experimental']
     traverse.traverse(tf, public_api_visitor)
 
     proto_dict = visitor.GetProtos()
diff --git a/tensorflow/tools/benchmark/benchmark_model.cc b/tensorflow/tools/benchmark/benchmark_model.cc
index 9809ad52de1319951aa82007ef9b933c6e707bf7..ecab6f8769ae2d0126f63580030ed6ff756015d0 100644
--- a/tensorflow/tools/benchmark/benchmark_model.cc
+++ b/tensorflow/tools/benchmark/benchmark_model.cc
@@ -530,7 +530,7 @@ int Main(int argc, char** argv) {
   }
 
   // Capture overall inference time without stat logging overhead. This is the
-  // timing data that can be compared to other libaries.
+  // timing data that can be compared to other libraries.
   SleepSeconds(inter_benchmark_sleep_seconds);
   int64 no_stat_time_us = 0;
   int64 no_stat_num_runs = 0;
diff --git a/tensorflow/tools/ci_build/Dockerfile.android b/tensorflow/tools/ci_build/Dockerfile.android
index 99a69d7b43bbc19f0b1e9ee7c741426c6651dfd6..dcf077791a9752f2e22999b082a9805bb3775c8d 100644
--- a/tensorflow/tools/ci_build/Dockerfile.android
+++ b/tensorflow/tools/ci_build/Dockerfile.android
@@ -1,4 +1,4 @@
-FROM ubuntu:14.04
+FROM ubuntu:16.04
 
 LABEL maintainer="Jan Prach <jendap@google.com>"
 
diff --git a/tensorflow/tools/ci_build/Dockerfile.cmake b/tensorflow/tools/ci_build/Dockerfile.cmake
index 37ba24d65a2e95833511fa9b3e4044db634a08fd..ec90c83aacd068e8f9c16e5be8eb6e1cef098ea6 100644
--- a/tensorflow/tools/ci_build/Dockerfile.cmake
+++ b/tensorflow/tools/ci_build/Dockerfile.cmake
@@ -23,7 +23,10 @@ RUN /install/install_deb_packages.sh
 
 RUN apt-get update
 RUN apt-get install -y --no-install-recommends python-pip
+RUN pip install --upgrade astor
+RUN pip install --upgrade gast
 RUN pip install --upgrade numpy
+RUN pip install --upgrade termcolor
 
 # Install golang
 RUN add-apt-repository -y ppa:ubuntu-lxc/lxd-stable
diff --git a/tensorflow/tools/ci_build/Dockerfile.cpu b/tensorflow/tools/ci_build/Dockerfile.cpu
index 57a854a9df738dea5d8560b54765099f32d0ff86..c61fda09af040140c000d6aa4a58f525fb98d80d 100644
--- a/tensorflow/tools/ci_build/Dockerfile.cpu
+++ b/tensorflow/tools/ci_build/Dockerfile.cpu
@@ -1,4 +1,4 @@
-FROM ubuntu:14.04
+FROM ubuntu:16.04
 
 LABEL maintainer="Jan Prach <jendap@google.com>"
 
diff --git a/tensorflow/tools/ci_build/Dockerfile.cpu.mpi b/tensorflow/tools/ci_build/Dockerfile.cpu.mpi
index 2bf7fd1d23406da8381fc5071ddf4ae56d1cb0ee..d9f5b7c0364e6fbea18a9a32adca6613a5c37011 100644
--- a/tensorflow/tools/ci_build/Dockerfile.cpu.mpi
+++ b/tensorflow/tools/ci_build/Dockerfile.cpu.mpi
@@ -1,4 +1,4 @@
-FROM ubuntu:14.04
+FROM ubuntu:16.04
 
 LABEL authors="Andrew Gibiansky <andrew.gibiansky@gmail.com>, Joel Hestness <jthestness@gmail.com>"
 
diff --git a/tensorflow/tools/ci_build/Dockerfile.gpu b/tensorflow/tools/ci_build/Dockerfile.gpu
index 2d46ccb6b17ac3ab3af49c1649074eda8a840331..7591ecc04efa887ec1d35ba92881386f5a25241d 100644
--- a/tensorflow/tools/ci_build/Dockerfile.gpu
+++ b/tensorflow/tools/ci_build/Dockerfile.gpu
@@ -1,8 +1,8 @@
-FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu14.04
+FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
 
 LABEL maintainer="Jan Prach <jendap@google.com>"
 
-# In the Ubuntu 14.04 images, cudnn is placed in system paths. Move them to
+# In the Ubuntu 16.04 images, cudnn is placed in system paths. Move them to
 # /usr/local/cuda
 RUN cp -P /usr/include/cudnn.h /usr/local/cuda/include
 RUN cp -P /usr/lib/x86_64-linux-gnu/libcudnn* /usr/local/cuda/lib64
diff --git a/tensorflow/tools/ci_build/Dockerfile.gpu_clang b/tensorflow/tools/ci_build/Dockerfile.gpu_clang
deleted file mode 100644
index 0ecd8c75e036fc18d37882834ed467d0edb096b1..0000000000000000000000000000000000000000
--- a/tensorflow/tools/ci_build/Dockerfile.gpu_clang
+++ /dev/null
@@ -1,36 +0,0 @@
-FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu14.04
-
-LABEL maintainer="Ilya Biryukov <ibiryukov@google.com>"
-
-# In the Ubuntu 14.04 images, cudnn is placed in system paths. Move them to
-# /usr/local/cuda
-RUN cp /usr/include/cudnn.h /usr/local/cuda/include
-RUN cp /usr/lib/x86_64-linux-gnu/libcudnn* /usr/local/cuda/lib64
-
-# Copy and run the install scripts.
-COPY install/*.sh /install/
-RUN /install/install_bootstrap_deb_packages.sh
-RUN add-apt-repository -y ppa:openjdk-r/ppa
-
-# LLVM requires cmake version 3.4.3, but ppa:george-edison55/cmake-3.x only
-# provides version 3.2.2.
-# So we skip it in `install_deb_packages.sh`, and later install it from
-# https://cmake.org in `install_cmake_for_clang.sh`.
-RUN /install/install_deb_packages.sh --without_cmake
-RUN /install/install_pip_packages.sh
-RUN /install/install_bazel.sh
-RUN /install/install_golang.sh
-
-# Install cmake and build clang
-RUN /install/install_cmake_for_clang.sh
-RUN /install/build_and_install_clang.sh
-
-# Set up the master bazelrc configuration file.
-COPY install/.bazelrc /etc/bazel.bazelrc
-ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
-
-# Configure the build for our CUDA configuration.
-ENV TF_NEED_CUDA 1
-ENV TF_CUDA_CLANG 1
-ENV CLANG_CUDA_COMPILER_PATH /usr/local/bin/clang
-ENV TF_CUDA_COMPUTE_CAPABILITIES 3.0
diff --git a/tensorflow/tools/ci_build/Dockerfile.hadoop b/tensorflow/tools/ci_build/Dockerfile.hadoop
index 6010aedb339abadd8ee09d50d4eb279c5d3236f8..d05dedafbe28d93e768712a3bdbc1730ab8bb092 100644
--- a/tensorflow/tools/ci_build/Dockerfile.hadoop
+++ b/tensorflow/tools/ci_build/Dockerfile.hadoop
@@ -1,4 +1,4 @@
-FROM ubuntu:14.04
+FROM ubuntu:16.04
 
 LABEL maintainer="Jonathan Hseu <jhseu@google.com>"
 
diff --git a/tensorflow/tools/ci_build/builds/configured b/tensorflow/tools/ci_build/builds/configured
index de1e354170eec2477b6c895c421bc2a0fd9f7318..868a3beac5f1b2e993f16b3b1f1995ff58afde34 100755
--- a/tensorflow/tools/ci_build/builds/configured
+++ b/tensorflow/tools/ci_build/builds/configured
@@ -32,15 +32,6 @@ COMMAND=("$@")
 
 export CI_BUILD_PYTHON="${CI_BUILD_PYTHON:-python}"
 export PYTHON_BIN_PATH="${PYTHON_BIN_PATH:-$(which ${CI_BUILD_PYTHON})}"
-if [ "${CONTAINER_TYPE}" == "gpu" ]; then
-  export TF_NEED_CUDA=1
-elif [ "${CONTAINER_TYPE}" == "gpu_clang" ]; then
-  export TF_NEED_CUDA=1
-  export TF_CUDA_CLANG=1
-  export CLANG_CUDA_COMPILER_PATH="/usr/local/bin/clang"
-else
-  export TF_NEED_CUDA=0
-fi
 
 pushd "${CI_TENSORFLOW_SUBMODULE_PATH:-.}"
 yes "" | $PYTHON_BIN_PATH configure.py
diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh
index 552df1434eab8c4414b8b9a8f7be9c61998d8462..82042b93c02275b51530b306d8cf4519482e5410 100755
--- a/tensorflow/tools/ci_build/builds/pip.sh
+++ b/tensorflow/tools/ci_build/builds/pip.sh
@@ -296,13 +296,11 @@ create_activate_virtualenv_and_install_tensorflow() {
     die "FAILED to create virtualenv directory: ${VIRTUALENV_DIR}"
   fi
 
-  # Verify that virtualenv exists
-  if [[ -z $(which virtualenv) ]]; then
-    die "FAILED: virtualenv not available on path"
-  fi
-
-  virtualenv ${VIRTUALENV_FLAGS} \
-    -p "${PYTHON_BIN_PATH}" "${VIRTUALENV_DIR}" || \
+  # Use the virtualenv from the default python version (i.e., python-virtualenv)
+  # to create the virtualenv directory for testing. Use the -p flag to specify
+  # the python version inside the to-be-created virtualenv directory.
+  ${PYTHON_BIN_PATH} -m virtualenv -p "${PYTHON_BIN_PATH}" ${VIRTUALENV_FLAGS} \
+    "${VIRTUALENV_DIR}" || \
     die "FAILED: Unable to create virtualenv"
 
   source "${VIRTUALENV_DIR}/bin/activate" || \
@@ -345,7 +343,7 @@ do_clean_virtualenv_smoke_test() {
   then
     echo "Smoke test of tensorflow install in clean virtualenv PASSED."
   else
-    echo "Smoke test of tensroflow install in clean virtualenv FAILED."
+    echo "Smoke test of tensorflow install in clean virtualenv FAILED."
     return 1
   fi
 
diff --git a/tensorflow/tools/ci_build/builds/print_build_info.sh b/tensorflow/tools/ci_build/builds/print_build_info.sh
index 7c43419a76ff26be7370326a9113f4e3db2a2b1c..e366abf8bb831688d90a0e3eabed101e42bdaf96 100755
--- a/tensorflow/tools/ci_build/builds/print_build_info.sh
+++ b/tensorflow/tools/ci_build/builds/print_build_info.sh
@@ -88,7 +88,7 @@ fi
 # Print info
 echo "TF_BUILD_INFO = {"\
 "container_type: \"${CONTAINER_TYPE}\", "\
-"command: \"${COMMAND[@]}\", "\
+"command: \"${COMMAND[*]}\", "\
 "source_HEAD: \"${TF_HEAD}\", "\
 "source_remote_origin: \"${TF_FETCH_URL}\", "\
 "OS: \"${OS}\", "\
diff --git a/tensorflow/tools/ci_build/builds/test_user_ops.sh b/tensorflow/tools/ci_build/builds/test_user_ops.sh
index 358f82ac5da0dde655b4f2d1f145f2070b64238b..caa3a40817c80b27271f76de0a95a743cb2916f6 100755
--- a/tensorflow/tools/ci_build/builds/test_user_ops.sh
+++ b/tensorflow/tools/ci_build/builds/test_user_ops.sh
@@ -82,11 +82,11 @@ TF_CFLAGS=( $("${PYTHON_BIN_PATH}" \
 TF_LFLAGS=( $("${PYTHON_BIN_PATH}" \
 	      -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') )
 
-if [[ -z "${TF_CFLAGS}" || -z "${TF_LFLAGS}" ]]; then
+if [[ -z "${TF_CFLAGS[*]}" || -z "${TF_LFLAGS[*]}" ]]; then
   die "FAILED to determine TensorFlow compilation or linking flags"
 else
-  echo "TensorFlow compile flags: ${TF_CFLAGS[@]}"
-  echo "TensorFlow link flags: ${TF_LFLAGS[@]}"
+  echo "TensorFlow compile flags: ${TF_CFLAGS[*]}"
+  echo "TensorFlow link flags: ${TF_LFLAGS[*]}"
 fi
 
 # Check g++ availability
diff --git a/tensorflow/tools/ci_build/ci_build.sh b/tensorflow/tools/ci_build/ci_build.sh
index 5164a2501269a1613a10c0a5a129221a7cd3e47b..072dd6ab995bb41c3197d6c898405be487534593 100755
--- a/tensorflow/tools/ci_build/ci_build.sh
+++ b/tensorflow/tools/ci_build/ci_build.sh
@@ -18,7 +18,7 @@
 #                    <COMMAND>
 #
 # CONTAINER_TYPE: Type of the docker container used the run the build:
-#                 e.g., (cpu | gpu | gpu_clang | android | tensorboard)
+#                 e.g., (cpu | gpu | android | tensorboard)
 #
 # DOCKERFILE_PATH: (Optional) Path to the Dockerfile used for docker build.
 #                  If this optional value is not supplied (via the
@@ -79,7 +79,7 @@ if [[ "${CONTAINER_TYPE}" == "cmake" ]]; then
 fi
 
 # Use nvidia-docker if the container is GPU.
-if [[ "${CONTAINER_TYPE}" == "gpu" ]] || [[ "${CONTAINER_TYPE}" == "gpu_clang" ]]; then
+if [[ "${CONTAINER_TYPE}" == "gpu" ]]; then
   DOCKER_BINARY="nvidia-docker"
 else
   DOCKER_BINARY="docker"
@@ -99,7 +99,7 @@ BUILD_TAG="${BUILD_TAG:-tf_ci}"
 
 # Add extra params for cuda devices and libraries for GPU container.
 # And clear them if we are not building for GPU.
-if [[ "${CONTAINER_TYPE}" != "gpu" ]] && [[ "${CONTAINER_TYPE}" != "gpu_clang" ]]; then
+if [[ "${CONTAINER_TYPE}" != "gpu" ]]; then
   GPU_EXTRA_PARAMS=""
 fi
 
diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh
index 2217b110e3f4e5dd2a212fe0cb65ac9f46ce943a..9d23b508aa1c1d20d0f4b5979aa7be2c295fe325 100755
--- a/tensorflow/tools/ci_build/ci_parameterized_build.sh
+++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh
@@ -18,7 +18,7 @@
 #   ci_parameterized_build.sh
 #
 # The script obeys the following required environment variables:
-#   TF_BUILD_CONTAINER_TYPE:   (CPU | GPU | GPU_CLANG | ANDROID | ANDROID_FULL)
+#   TF_BUILD_CONTAINER_TYPE:   (CPU | GPU | ANDROID | ANDROID_FULL)
 #   TF_BUILD_PYTHON_VERSION:   (PYTHON2 | PYTHON3 | PYTHON3.5)
 #   TF_BUILD_IS_PIP:           (NO_PIP | PIP | BOTH)
 #
@@ -88,6 +88,9 @@
 #   TF_NIGHTLY:
 #                     If this run is being used to build the tf_nightly pip
 #                     packages.
+#   TF_CUDA_CLANG:
+#                     If set to 1, builds and runs cuda_clang configuration.
+#                     Only available inside GPU containers.
 #
 # This script can be used by Jenkins parameterized / matrix builds.
 
@@ -246,16 +249,34 @@ if [[ "$(uname -s)" == "Darwin" ]]; then
   OPT_FLAG="${OPT_FLAG} ${NO_DOCKER_OPT_FLAG}"
 fi
 
+# In DO_DOCKER mode, appends environment variable to docker's run invocation.
+# Otherwise, exports the corresponding variable.
+function set_script_variable() {
+  local VAR="$1"
+  local VALUE="$2"
+  if [[ $DO_DOCKER == "1" ]]; then
+    TF_BUILD_APPEND_CI_DOCKER_EXTRA_PARAMS="${TF_BUILD_APPEND_CI_DOCKER_EXTRA_PARAMS} -e $VAR=$VALUE"
+  else
+    export $VAR="$VALUE"
+  fi
+}
+
+
 # Process container type
 if [[ ${CTYPE} == "cpu" ]] || [[ ${CTYPE} == "debian.jessie.cpu" ]]; then
   :
-elif [[ ${CTYPE} == "gpu" ]] || [[ ${CTYPE} == "gpu_clang" ]]; then
-  if [[ ${CTYPE} == "gpu" ]]; then
-    OPT_FLAG="${OPT_FLAG} --config=cuda"
-  else # ${CTYPE} == "gpu_clang"
+elif [[ ${CTYPE} == "gpu" ]]; then
+  set_script_variable TF_NEED_CUDA 1
+
+  if [[ $TF_CUDA_CLANG == "1" ]]; then
     OPT_FLAG="${OPT_FLAG} --config=cuda_clang"
-  fi
 
+    set_script_variable TF_CUDA_CLANG 1
+    # For cuda_clang we download `clang` while building.
+    set_script_variable TF_DOWNLOAD_CLANG 1
+  else
+    OPT_FLAG="${OPT_FLAG} --config=cuda"
+  fi
 
   # Attempt to determine CUDA capability version automatically and use it if
   # CUDA capability version is not specified by the environment variables.
@@ -407,7 +428,7 @@ if [[ ${TF_BUILD_IS_PIP} == "no_pip" ]] ||
     # CPU only command, fully parallel.
     NO_PIP_MAIN_CMD="${MAIN_CMD} ${BAZEL_CMD} ${OPT_FLAG} ${EXTRA_ARGS} -- "\
 "${BAZEL_TARGET}"
-  elif [[ ${CTYPE} == "gpu" ]] || [[ ${CTYPE} == "gpu_clang" ]]; then
+  elif [[ ${CTYPE} == "gpu" ]]; then
     # GPU only command, run as many jobs as the GPU count only.
     NO_PIP_MAIN_CMD="${BAZEL_CMD} ${OPT_FLAG} "\
 "--local_test_jobs=${TF_GPU_COUNT} "\
diff --git a/tensorflow/tools/ci_build/ci_sanity.sh b/tensorflow/tools/ci_build/ci_sanity.sh
index 404a9a6b6296652c009d5725919a21c9cd6e8178..b728c878da0f729c74b20e66cfc97868c3e953f3 100755
--- a/tensorflow/tools/ci_build/ci_sanity.sh
+++ b/tensorflow/tools/ci_build/ci_sanity.sh
@@ -99,7 +99,8 @@ do_pylint() {
 "^tensorflow/contrib/eager/python/metrics_impl\.py.*\[E0202.*method-hidden "\
 "^tensorflow/python/platform/gfile\.py.*\[E0301.*non-iterator "\
 "^tensorflow/python/keras/_impl/keras/callbacks\.py.*\[E1133.*not-an-iterable "\
-"^tensorflow/python/keras/_impl/keras/layers/recurrent\.py.*\[E0203.*access-member-before-definition"
+"^tensorflow/python/keras/_impl/keras/layers/recurrent\.py.*\[E0203.*access-member-before-definition "\
+"^tensorflow/python/kernel_tests/constant_op_eager_test.py.*\[E0303.*invalid-length-returned"
 
   echo "ERROR_WHITELIST=\"${ERROR_WHITELIST}\""
 
@@ -110,9 +111,9 @@ do_pylint() {
   fi
 
   if [[ $1 == "PYTHON2" ]]; then
-    PYLINT_BIN="python /usr/local/lib/python2.7/dist-packages/pylint/lint.py"
+    PYLINT_BIN="python -m pylint"
   elif [[ $1 == "PYTHON3" ]]; then
-    PYLINT_BIN="python3 /usr/local/lib/python3.4/dist-packages/pylint/lint.py"
+    PYLINT_BIN="python3 -m pylint"
   else
     echo "Unrecognized python version (PYTHON2 | PYTHON3): $1"
     return 1
diff --git a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
index 6e7b752c06f43fe7f8fa26bd52a28ed33f38edd8..cfeaebdbf57c01fef7cd81dae76217429336d0ff 100755
--- a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
+++ b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
@@ -45,7 +45,7 @@ for i in `seq 0 $((TF_GPU_COUNT-1))`; do
       # This export only works within the brackets, so it is isolated to one
       # single command.
       export CUDA_VISIBLE_DEVICES=$i
-      echo "Running test $@ on GPU $CUDA_VISIBLE_DEVICES"
+      echo "Running test $* on GPU $CUDA_VISIBLE_DEVICES"
       $@
     )
     return_code=$?
diff --git a/tensorflow/tools/ci_build/install/build_and_install_clang.sh b/tensorflow/tools/ci_build/install/build_and_install_clang.sh
deleted file mode 100755
index 99664344777256b9eb8c3764bb1900f26b43cc6e..0000000000000000000000000000000000000000
--- a/tensorflow/tools/ci_build/install/build_and_install_clang.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-set -ex
-
-LLVM_SVN_REVISION="314281"
-CLANG_TMP_DIR=/tmp/clang-build
-
-mkdir "$CLANG_TMP_DIR"
-
-pushd "$CLANG_TMP_DIR"
-
-# Checkout llvm+clang
-svn co -q -r$LLVM_SVN_REVISION http://llvm.org/svn/llvm-project/llvm/trunk "$CLANG_TMP_DIR/llvm"
-svn co -q -r$LLVM_SVN_REVISION http://llvm.org/svn/llvm-project/cfe/trunk "$CLANG_TMP_DIR/llvm/tools/clang"
-
-# Build 1st stage. Compile clang with system compiler
-mkdir "$CLANG_TMP_DIR/build-1"
-cd "$CLANG_TMP_DIR/build-1"
-cmake -G"Unix Makefiles" -DCMAKE_BUILD_TYPE=Release "$CLANG_TMP_DIR/llvm"
-make -j `nproc` clang clang-headers
-
-# Build 2nd stage. Compile clang with clang built in stage 1
-mkdir "$CLANG_TMP_DIR/build-2"
-cd "$CLANG_TMP_DIR/build-2"
-
-CC="$CLANG_TMP_DIR/build-1/bin/clang" \
-CXX="$CLANG_TMP_DIR/build-1/bin/clang++" \
-cmake -G"Unix Makefiles" -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local "$CLANG_TMP_DIR/llvm"
-
-make -j `nproc` install-clang install-clang-headers
-
-popd
-
-# Cleanup
-rm -rf "$CLANG_TMP_DIR"
diff --git a/tensorflow/tools/ci_build/install/install_bazel.sh b/tensorflow/tools/ci_build/install/install_bazel.sh
index 1454264a8007104c6ad20d3e393076d1cc20513c..cf8737c2d8c746b6ad6c436745193290e31326ea 100755
--- a/tensorflow/tools/ci_build/install/install_bazel.sh
+++ b/tensorflow/tools/ci_build/install/install_bazel.sh
@@ -15,7 +15,7 @@
 # ==============================================================================
 
 # Select bazel version.
-BAZEL_VERSION="0.5.4"
+BAZEL_VERSION="0.8.0"
 
 set +e
 local_bazel_ver=$(bazel version 2>&1 | grep -i label | awk '{print $3}')
diff --git a/tensorflow/tools/ci_build/install/install_deb_packages.sh b/tensorflow/tools/ci_build/install/install_deb_packages.sh
index 4ab307c9253a8019f2c794b696db030722751770..96408105339d9a3e21aecb3bae9894551f8b6811 100755
--- a/tensorflow/tools/ci_build/install/install_deb_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_deb_packages.sh
@@ -48,6 +48,7 @@ apt-get install -y --no-install-recommends \
     git \
     libcurl4-openssl-dev \
     libtool \
+    libssl-dev \
     mlocate \
     openjdk-8-jdk \
     openjdk-8-jre-headless \
diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh
index b8ed1ab7676ff4efaef01dd5009effbf5ab05a92..71744c04f2f432bc76eadfac406233ad8241a52a 100755
--- a/tensorflow/tools/ci_build/install/install_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh
@@ -27,6 +27,9 @@ easy_install3 -U pip
 pip2 install wheel
 pip3 install wheel
 
+pip2 install virtualenv
+pip3 install virtualenv
+
 # Install six.
 pip2 install --upgrade six==1.10.0
 pip3 install --upgrade six==1.10.0
@@ -94,3 +97,10 @@ pip3 install portpicker
 pip2 install grpcio
 pip3 install grpcio
 
+# Eager-to-graph execution needs astor, gast and termcolor:
+pip2 install --upgrade astor
+pip3 install --upgrade astor
+pip2 install --upgrade gast
+pip3 install --upgrade gast
+pip2 install --upgrade termcolor
+pip3 install --upgrade termcolor
diff --git a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
index 479242aa4376883f851486ca38a859a75d4f4f51..aefc49f60482148e565a5262eebd5b3ac85987cf 100755
--- a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
@@ -39,6 +39,8 @@ if [[ -z $pip35_version ]]; then
 fi
 
 set -e
+pip3.5 install --upgrade virtualenv
+
 # Install six.
 pip3.5 install --upgrade absl-py
 pip3.5 install --upgrade six==1.10.0
@@ -58,7 +60,7 @@ pip3.5 install --no-binary=:all: --upgrade numpy==1.12.0
 
 pip3.5 install scipy==0.18.1
 
-pip3.5 install scikit-learn==0.18.1
+pip3.5 install scikit-learn==0.19.1
 
 # pandas required by `inflow`
 pip3 install pandas==0.19.2
@@ -72,4 +74,9 @@ pip3.5 install werkzeug
 
 pip3.5 install grpcio
 
+# Eager-to-graph execution needs astor, gast and termcolor:
+pip3.5 install --upgrade astor
+pip3.5 install --upgrade gast
+pip3.5 install --upgrade termcolor
+
 # LINT.ThenChange(//tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh)
diff --git a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
index c354aaa154e8d01ba69f157dd195ef439270c2ec..bfaa044c82887bd1dc99d13952e09c9cc49cf11b 100755
--- a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
@@ -22,29 +22,42 @@
 
 # fkrull/deadsnakes is for Python3.6
 add-apt-repository -y ppa:fkrull/deadsnakes
+
 apt-get update
+apt-get upgrade
+
+# Install python dep
+apt-get install python-dev
+# Install bz2 dep
+apt-get install libbz2-dev
+# Install curses dep
+apt-get install libncurses5 libncurses5-dev
+apt-get install libncursesw5 libncursesw5-dev
+# Install readline dep
+apt-get install libreadline6 libreadline6-dev
+# Install sqlite3 dependencies
+apt-get install libsqlite3-dev
 
 set -e
+
 # Install Python 3.6 and dev library
-apt-get install -y --no-install-recommends python3.6 libpython3.6-dev
-
-# Install pip3.6
-set +e
-pip35_version=$(pip3.6 --version | grep "python 3.6")
-if [[ -z $pip35_version ]]; then
-  set -e
-  wget -q https://bootstrap.pypa.io/get-pip.py
-  python3.6 get-pip.py
-  rm -f get-pip.py
-fi
+wget https://www.python.org/ftp/python/3.6.1/Python-3.6.1.tar.xz
+tar xvf Python-3.6.1.tar.xz
+cd Python-3.6.1
+
+./configure
+make altinstall
+ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3
+
+pip3 install --upgrade virtualenv
 
 set -e
 # Install six.
-pip3.6 install --upgrade absl-py
-pip3.6 install --upgrade six==1.10.0
+pip3 install --upgrade absl-py
+pip3 install --upgrade six==1.10.0
 
 # Install protobuf.
-pip3.6 install --upgrade protobuf==3.3.0
+pip3 install --upgrade protobuf==3.3.0
 
 # Remove obsolete version of six, which can sometimes confuse virtualenv.
 rm -rf /usr/lib/python3/dist-packages/six*
@@ -54,22 +67,31 @@ rm -rf /usr/lib/python3/dist-packages/six*
 # numpy needs to be installed from source to fix segfaults. See:
 # https://github.com/tensorflow/tensorflow/issues/6968
 # This workaround isn't needed for Ubuntu 16.04 or later.
-pip3.6 install --no-binary=:all: --upgrade numpy==1.12.0
+pip3 install --no-binary=:all: --upgrade numpy==1.12.0
 
-pip3.6 install scipy==0.18.1
+pip3 install scipy==0.18.1
 
-pip3.6 install scikit-learn==0.18.1
+pip3 install scikit-learn==0.19.1
 
 # pandas required by `inflow`
 pip3 install pandas==0.19.2
 
+pip3 install gnureadline
+
+pip3 install bz2file
+
 # Install recent-enough version of wheel for Python 3.6 wheel builds
-pip3.6 install wheel==0.29.0
+pip3 install wheel==0.29.0
+
+pip3 install portpicker
 
-pip3.6 install portpicker
+pip3 install werkzeug
 
-pip3.6 install werkzeug
+pip3 install grpcio
 
-pip3.6 install grpcio
+# Eager-to-graph execution needs astor, gast and termcolor:
+pip3 install --upgrade astor
+pip3 install --upgrade gast
+pip3 install --upgrade termcolor
 
 # LINT.ThenChange(//tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh)
diff --git a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
index 88116d9f246cabdf19c8b24bf8c95fdf52076fe0..1bd1852ffc570166ecc6efca1420bc54d702ed89 100755
--- a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
+++ b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
@@ -82,6 +82,7 @@ if [[ $1 == "PI_ONE" ]]; then
 else
   PI_COPTS='--copt=-march=armv7-a --copt=-mfpu=neon-vfpv4
   --copt=-std=gnu11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR
+  --copt=-O3
   --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
   --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2
   --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8'
diff --git a/tensorflow/tools/ci_build/remote/Dockerfile.gpu b/tensorflow/tools/ci_build/remote/Dockerfile.gpu
index e13d2c1c20838a35ec90aa24f85c969cb1f4d52a..47ffd44163dd3e4b99f06689e1aa6f19f84cc2ca 100644
--- a/tensorflow/tools/ci_build/remote/Dockerfile.gpu
+++ b/tensorflow/tools/ci_build/remote/Dockerfile.gpu
@@ -18,7 +18,9 @@ RUN curl -fSsL -O https://bootstrap.pypa.io/get-pip.py && \
     rm get-pip.py
 
 # Set up grpc
-RUN pip install --upgrade enum34 futures mock numpy six backports.weakref && \
+RUN pip install --upgrade \
+        enum34 futures astor gast mock numpy six \
+        backports.weakref termcolor && \
     pip install --pre 'protobuf>=3.0.0a3' && \
     pip install 'grpcio>=1.1.3'
 
diff --git a/tensorflow/tools/ci_build/remote/remote_docker_build.sh b/tensorflow/tools/ci_build/remote/remote_docker_build.sh
index 3ac6840f4e7a881da4ab973a7fadd921ed288828..e00a66aabaf1068c772aabce2391616518be44d4 100755
--- a/tensorflow/tools/ci_build/remote/remote_docker_build.sh
+++ b/tensorflow/tools/ci_build/remote/remote_docker_build.sh
@@ -124,7 +124,7 @@ function build_tf_image {
 
 
 function publish_tf_image {
-  $gcr_tf_image="gcr.io/tensorflow/${tf_image}"
+  gcr_tf_image="gcr.io/tensorflow/${tf_image}"
   docker tag $tf_image $gcr_tf_image
   gcloud docker -- push $gcr_tf_image
 }
diff --git a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
index 44b6d52952838d013f09275a3387198249837df8..7b2d7e1a568b0235a5bdd55bb23e542772902576 100644
--- a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
+++ b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh
@@ -21,7 +21,6 @@ failing_cpu_cc_tests="\
     //tensorflow/core:lib_core_status_test + \
     //tensorflow/core:lib_monitoring_collection_registry_test + \
     //tensorflow/core:lib_strings_numbers_test + \
-    //tensorflow/core:lib_strings_str_util_test + \
     //tensorflow/core/platform/hadoop:hadoop_file_system_test + \
     //tensorflow/core:platform_file_system_test + \
     //tensorflow/core:platform_logging_test + \
@@ -43,7 +42,6 @@ broken_cpu_cc_tests="\
     //tensorflow/core/platform/cloud:gcs_file_system_test + \
     //tensorflow/core/kernels/cloud:bigquery_table_accessor_test + \
     //tensorflow/core/kernels/hexagon:graph_transferer_test + \
-    //tensorflow/core/kernels/hexagon:quantized_matmul_op_for_hexagon_test + \
     //tensorflow/core/kernels:remote_fused_graph_execute_utils_test + \
     //tensorflow/core/kernels:requantize_op_test + \
     //tensorflow/core/kernels:requantization_range_op_test + \
@@ -96,10 +94,6 @@ exclude_cpu_cc_tests="${failing_cpu_cc_tests} + ${broken_cpu_cc_tests}"
 
 exclude_gpu_cc_tests="${extra_failing_gpu_cc_tests} + ${exclude_cpu_cc_tests}"
 
-function clean_output_base() {
-  bazel clean --expunge
-}
-
 function run_configure_for_cpu_build {
   # Due to a bug in Bazel: https://github.com/bazelbuild/bazel/issues/2182
   # yes "" | ./configure doesn't work on Windows, so we set all the
@@ -108,14 +102,11 @@ function run_configure_for_cpu_build {
   if [ -z "$TF_ENABLE_XLA" ]; then
     export TF_ENABLE_XLA=0
   fi
-  if [ -z "$CC_OPT_FLAGS" ]; then
-    export CC_OPT_FLAGS="-march=native"
-  fi
   if [ -z "$TF_NEED_MKL" ]; then
     export TF_NEED_MKL=0
   fi
   export TF_NEED_VERBS=0
-  export TF_NEED_GCP=0
+  export TF_NEED_GCP=1
   export TF_NEED_HDFS=0
   export TF_NEED_OPENCL_SYCL=0
   echo "" | ./configure
@@ -126,17 +117,14 @@ function run_configure_for_gpu_build {
   # yes "" | ./configure doesn't work on Windows, so we set all the
   # environment variables in advance to avoid interact with the script.
   export TF_NEED_CUDA=1
-  export TF_CUDA_VERSION=8.0
-  export CUDA_TOOLKIT_PATH="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0"
-  export TF_CUDNN_VERSION=6.0
+  export TF_CUDA_VERSION=9.0
+  export CUDA_TOOLKIT_PATH="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.0"
+  export TF_CUDNN_VERSION=7.0
   export CUDNN_INSTALL_PATH="C:/tools/cuda"
   export TF_CUDA_COMPUTE_CAPABILITIES="3.7"
   if [ -z "$TF_ENABLE_XLA" ]; then
     export TF_ENABLE_XLA=0
   fi
-  if [ -z "$CC_OPT_FLAGS" ]; then
-    export CC_OPT_FLAGS="-march=native"
-  fi
   export TF_NEED_VERBS=0
   export TF_NEED_MKL=0
   export TF_NEED_GCP=0
diff --git a/tensorflow/tools/ci_build/windows/bazel/common_env.sh b/tensorflow/tools/ci_build/windows/bazel/common_env.sh
index 4a653698a2d7c12ce59a53bf96e1551a633f7cab..1c35d74af72ad0a72b0016356888c8cf77e20e56 100644
--- a/tensorflow/tools/ci_build/windows/bazel/common_env.sh
+++ b/tensorflow/tools/ci_build/windows/bazel/common_env.sh
@@ -32,34 +32,20 @@ mkdir -p "$TMPDIR"
 # Set bash path
 export BAZEL_SH=${BAZEL_SH:-"C:/tools/msys64/usr/bin/bash"}
 
-# Set Python path for ./configure
-export PYTHON_BIN_PATH="C:/Program Files/Anaconda3/python.exe"
-export PYTHON_LIB_PATH="C:/Program Files/Anaconda3/lib/site-packages"
-
-# Set Python path for cc_configure.bzl
-export BAZEL_PYTHON="C:/Program Files/Anaconda3/python.exe"
+export PYTHON_BASE_PATH="${PYTHON_DIRECTORY:-Program Files/Anaconda3}"
 
-# Set Visual Studio path
-export BAZEL_VS="C:/Program Files (x86)/Microsoft Visual Studio 14.0"
+# Set Python path for ./configure
+export PYTHON_BIN_PATH="C:/${PYTHON_BASE_PATH}/python.exe"
+export PYTHON_LIB_PATH="C:/${PYTHON_BASE_PATH}/lib/site-packages"
 
 # Add python into PATH, it's needed because gen_git_source.py uses
 # '/usr/bin/env python' as a shebang
-export PATH="/c/Program Files/Anaconda3:$PATH"
+export PATH="/c/${PYTHON_BASE_PATH}:$PATH"
 
 # Make sure we have pip in PATH
-export PATH="/c/Program Files/Anaconda3/Scripts:$PATH"
+export PATH="/c/${PYTHON_BASE_PATH}/Scripts:$PATH"
 
 # Add Cuda and Cudnn dll directories into PATH
-export PATH="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/bin:$PATH"
-export PATH="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/extras/CUPTI/libx64:$PATH"
+export PATH="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.0/bin:$PATH"
+export PATH="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.0/extras/CUPTI/libx64:$PATH"
 export PATH="/c/tools/cuda/bin:$PATH"
-
-# Set the common build options on Windows
-export BUILD_OPTS='--config=monolithic --copt=-w --host_copt=-w --verbose_failures --experimental_ui'
-
-# Build TF with wrapper-less CROSSTOOL
-# TODO(pcloudy): Remove this after wrapper-less CROSSTOOL becomes default
-export NO_MSVC_WRAPPER=1
-
-export USE_DYNAMIC_CRT=1
-
diff --git a/tensorflow/tools/ci_build/windows/cpu/bazel/run_cc_test_windows.sh b/tensorflow/tools/ci_build/windows/cpu/bazel/run_cc_test_windows.sh
index 8c419347d6f4b3af2e47bb96f246dc7281a92364..748a961e44c5429664e37a1456adcf02a56fa3d4 100644
--- a/tensorflow/tools/ci_build/windows/cpu/bazel/run_cc_test_windows.sh
+++ b/tensorflow/tools/ci_build/windows/cpu/bazel/run_cc_test_windows.sh
@@ -42,8 +42,6 @@ source "tensorflow/tools/ci_build/windows/bazel/common_env.sh" \
 source "tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh" \
   || { echo "Failed to source bazel_test_lib.sh" >&2; exit 1; }
 
-clean_output_base
-
 run_configure_for_cpu_build
 
 # Compliling the following test is extremely slow with -c opt
@@ -54,5 +52,5 @@ passing_tests=$(bazel query "kind(cc_test, //tensorflow/cc/... + //tensorflow/co
   # We need to strip \r so that the result could be store into a variable under MSYS
   tr '\r' ' ')
 
-bazel test $BUILD_OPTS -k $slow_compiling_test --test_output=errors
-bazel test -c opt $BUILD_OPTS -k $passing_tests --test_output=errors
+bazel test -k $slow_compiling_test --test_output=errors
+bazel test -c opt -k $passing_tests --test_output=errors
diff --git a/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat b/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat
index 56bff077746b8195a93b6ab8d7ce707b06549daa..957729bb37db3ae49800c277f4090a52117c699d 100644
--- a/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat
+++ b/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat
@@ -30,11 +30,13 @@ IF DEFINED SWIG_EXE (ECHO SWIG_EXE is set to %SWIG_EXE%) ELSE (SET SWIG_EXE="C:\
 IF DEFINED PY_EXE (ECHO PY_EXE is set to %PY_EXE%) ELSE (SET PY_EXE="C:\Program Files\Anaconda3\python.exe")
 IF DEFINED PY_LIB (ECHO PY_LIB is set to %PY_LIB%) ELSE (SET PY_LIB="C:\Program Files\Anaconda3\libs\python35.lib")
 
+IF DEFINED DISABLE_FORCEINLINE (ECHO DISABLE_FORCEINLINE is set to %DISABLE_FORCEINLINE%) ELSE (SET DISABLE_FORCEINLINE="OFF")
+
 SET CMAKE_DIR=%REPO_ROOT%\tensorflow\contrib\cmake
 SET MSBUILD_EXE="C:\Program Files (x86)\MSBuild\14.0\Bin\msbuild.exe"
 
 :: Run cmake to create Visual Studio Project files.
-%CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY%
+%CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY% -Dtensorflow_DISABLE_EIGEN_FORCEINLINE=%DISABLE_FORCEINLINE%
 
 :: Run msbuild in the resulting VS project files to build a pip package.
 %MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 tf_python_build_pip_package.vcxproj
diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
index 8520ca898f84a4990aaf4348d1cfb09dce2ff7ab..8b8ba31a0dda88ad3c43330e0208a9fa6a7d0276 100644
--- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
@@ -44,8 +44,9 @@ source "tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh" \
 
 run_configure_for_cpu_build
 
-clean_output_base
-
+# --define=override_eigen_strong_inline=true speeds up the compiling of conv_grad_ops_3d.cc and conv_ops_3d.cc
+# by 20 minutes. See https://github.com/tensorflow/tensorflow/issues/10521
+BUILD_OPTS="--define=override_eigen_strong_inline=true"
 bazel build -c opt $BUILD_OPTS tensorflow/tools/pip_package:build_pip_package || exit $?
 
 # Create a python test directory to avoid package name conflict
@@ -60,11 +61,8 @@ reinstall_tensorflow_pip ${PIP_NAME}
 
 # Define no_tensorflow_py_deps=true so that every py_test has no deps anymore,
 # which will result testing system installed tensorflow
-# TODO(pcloudy): Remove TF_SAVER_LENIENT_NAMES after
-# https://github.com/tensorflow/tensorflow/issues/12844 is fixed.
 bazel test -c opt $BUILD_OPTS -k --test_output=errors \
   --define=no_tensorflow_py_deps=true --test_lang_filters=py \
   --test_tag_filters=-no_pip,-no_windows,-no_oss \
   --build_tag_filters=-no_pip,-no_windows,-no_oss --build_tests_only \
-  --test_env=TF_SAVER_LENIENT_NAMES=True \
   //${PY_TEST_DIR}/tensorflow/python/...
diff --git a/tensorflow/tools/ci_build/windows/gpu/bazel/run_cc_test_windows.sh b/tensorflow/tools/ci_build/windows/gpu/bazel/run_cc_test_windows.sh
index 3fd960deabbb0ace8c9598589f9f9a72fd09b3a9..f26f8727e51bf0247578c1cdfaa67e1b0f7f299d 100644
--- a/tensorflow/tools/ci_build/windows/gpu/bazel/run_cc_test_windows.sh
+++ b/tensorflow/tools/ci_build/windows/gpu/bazel/run_cc_test_windows.sh
@@ -56,5 +56,5 @@ passing_tests=$(bazel query "kind(cc_test, //tensorflow/cc/... + //tensorflow/co
 
 # TODO(pcloudy): There is a bug in Bazel preventing build with GPU support without -c opt
 # Re-enable this test after it is fixed.
-# bazel test --config=win-cuda $BUILD_OPTS -k $slow_compiling_test --test_output=errors
-bazel test -c opt --config=win-cuda $BUILD_OPTS -k $passing_tests --test_output=errors
+# bazel test --config=win-cuda -k $slow_compiling_test --test_output=errors
+bazel test -c opt --config=win-cuda -k $passing_tests --test_output=errors
diff --git a/tensorflow/tools/ci_build/windows/gpu/bazel/run_libtensorflow.bat b/tensorflow/tools/ci_build/windows/gpu/bazel/run_libtensorflow.bat
new file mode 100644
index 0000000000000000000000000000000000000000..773d9c8865cddeea56e1489876a465c4cc5c018e
--- /dev/null
+++ b/tensorflow/tools/ci_build/windows/gpu/bazel/run_libtensorflow.bat
@@ -0,0 +1 @@
+c:\tools\msys64\usr\bin\bash -l %cd%/tensorflow/tools/ci_build/windows/libtensorflow_gpu.sh %*
diff --git a/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat b/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat
index 832943ad6c82855a76be0782c5332fb8e0f202b6..5a362de3992156fea8a5fc6ab4c70ba67ab47f89 100644
--- a/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat
+++ b/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat
@@ -31,11 +31,13 @@ IF DEFINED PY_EXE (ECHO PY_EXE is set to %PY_EXE%) ELSE (SET PY_EXE="C:\Program
 IF DEFINED PY_LIB (ECHO PY_LIB is set to %PY_LIB%) ELSE (SET PY_LIB="C:\Program Files\Anaconda3\libs\python35.lib")
 IF DEFINED CUDNN_HOME (ECHO CUDNN_HOME is set to %CUDNN_HOME%) ELSE (SET CUDNN_HOME="c:\tools\cuda")
 verbosity:quiet
+IF DEFINED DISABLE_FORCEINLINE (ECHO DISABLE_FORCEINLINE is set to %DISABLE_FORCEINLINE%) ELSE (SET DISABLE_FORCEINLINE="OFF")
+
 SET CMAKE_DIR=%REPO_ROOT%\tensorflow\contrib\cmake
 SET MSBUILD_EXE="C:\Program Files (x86)\MSBuild\14.0\Bin\msbuild.exe"
 
 :: Run cmake to create Visual Studio Project files.
-%CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_ENABLE_GPU=ON -DCUDNN_HOME=%CUDNN_HOME% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY%
+%CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_ENABLE_GPU=ON -DCUDNN_HOME=%CUDNN_HOME% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY% -Dtensorflow_DISABLE_EIGEN_FORCEINLINE=%DISABLE_FORCEINLINE%
 
 :: Run msbuild in the resulting VS project files to build a pip package.
 %MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 tf_python_build_pip_package.vcxproj
diff --git a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
index 47ca42d6422f27fe1086fda75d33687cfe2db9b0..922bb67bbf6ce34f55acad6d3399bd810032abd0 100644
--- a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
@@ -44,9 +44,7 @@ source "tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh" \
 
 run_configure_for_gpu_build
 
-clean_output_base
-
-bazel build -c opt $BUILD_OPTS tensorflow/tools/pip_package:build_pip_package || exit $?
+bazel build -c opt tensorflow/tools/pip_package:build_pip_package || exit $?
 
 # Create a python test directory to avoid package name conflict
 PY_TEST_DIR="py_test_dir"
@@ -61,11 +59,8 @@ reinstall_tensorflow_pip ${PIP_NAME}
 # Define no_tensorflow_py_deps=true so that every py_test has no deps anymore,
 # which will result testing system installed tensorflow
 # GPU tests are very flaky when running concurrently, so set local_test_jobs=1
-# TODO(pcloudy): Remove TF_SAVER_LENIENT_NAMES after
-# https://github.com/tensorflow/tensorflow/issues/12844 is fixed.
-bazel test -c opt $BUILD_OPTS -k --test_output=errors \
+bazel test -c opt -k --test_output=errors \
   --define=no_tensorflow_py_deps=true --test_lang_filters=py \
   --test_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu,no_oss \
   --build_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu,no_oss \
-  --test_env=TF_SAVER_LENIENT_NAMES=True \
   --local_test_jobs=1 --build_tests_only //${PY_TEST_DIR}/tensorflow/python/...
diff --git a/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh
index 9ac3613f27e1bc96501490b7610f047785b9ada2..fa28e3d79ca4ee5f429a41dd3e871248d5c047ca 100755
--- a/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh
+++ b/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh
@@ -31,36 +31,22 @@ if [ ! -e "WORKSPACE" ]; then
   exit 1
 fi
 
-# Enable JNI support for Windows in Bazel.
-# This can be removed once
-# https://github.com/bazelbuild/bazel/pull/2599
-# has been merged and we switch to a bazel release containing it.
-cp "${JAVA_HOME}/include/win32/jni_md.h" "./tensorflow/java/src/main/native/windows_jni_md.h"
-sed -i -e "s|@bazel_tools//tools/jdk:jni_md_header-linux|windows_jni_md.h|" ./tensorflow/java/src/main/native/BUILD
-#### END HACKS TO BE RESOLVED WITH NEW BAZEL VERSIONS ####
-
 export TF_BAZEL_TARGETS="//tensorflow:libtensorflow.so"
 export TF_BAZEL_TARGETS="${TF_BAZEL_TARGETS} //tensorflow/tools/lib_package:clicenses_generate"
 export TF_BAZEL_TARGETS="${TF_BAZEL_TARGETS} //tensorflow/java:libtensorflow_jni.so"
 export TF_BAZEL_TARGETS="${TF_BAZEL_TARGETS} //tensorflow/tools/lib_package:jnilicenses_generate"
 
-clean_output_base
 run_configure_for_cpu_build
 
 # build_libtensorflow_tarball in ../builds/libtensorflow.sh
 # cannot be used on Windows since it relies on pkg_tar rules.
 # So we do something special here
-bazel build -c opt ${BUILD_OPTS} \
+bazel build -c opt \
   tensorflow:libtensorflow.so \
   tensorflow/tools/lib_package:clicenses_generate \
   tensorflow/java:libtensorflow_jni.so \
   tensorflow/tools/lib_package:jnilicenses_generate
 
-# Revert the hacks above
-git checkout ./tensorflow/tools/pip_package/BUILD
-git checkout ./tensorflow/java/src/main/native/BUILD
-rm -f ./tensorflow/java/src/main/native/windows_jni_md.h
-
 DIR=lib_package
 rm -rf ${DIR}
 mkdir -p ${DIR}
@@ -74,13 +60,16 @@ rm -f ${DIR}/tensorflow_jni.dll
 
 # Zip up the .dll, LICENSE and include files for the C library.
 mkdir -p ${DIR}/include/tensorflow/c
+mkdir -p ${DIR}/include/tensorflow/c/eager
 mkdir -p ${DIR}/lib
 cp bazel-bin/tensorflow/libtensorflow.so ${DIR}/lib/tensorflow.dll
 cp tensorflow/c/c_api.h ${DIR}/include/tensorflow/c
+cp tensorflow/c/eager/c_api.h ${DIR}/include/tensorflow/c/eager
 cp bazel-genfiles/tensorflow/tools/lib_package/include/tensorflow/c/LICENSE ${DIR}/include/tensorflow/c
 cd ${DIR}
-zip -j libtensorflow-cpu-windows-$(uname -m).zip \
+zip libtensorflow-cpu-windows-$(uname -m).zip \
   lib/tensorflow.dll \
+  include/tensorflow/c/eager/c_api.h \
   include/tensorflow/c/c_api.h \
   include/tensorflow/c/LICENSE
 rm -rf lib include
diff --git a/tensorflow/tools/ci_build/windows/libtensorflow_gpu.sh b/tensorflow/tools/ci_build/windows/libtensorflow_gpu.sh
new file mode 100644
index 0000000000000000000000000000000000000000..573c926203fc76b787ba08b10bd71c8effda29b6
--- /dev/null
+++ b/tensorflow/tools/ci_build/windows/libtensorflow_gpu.sh
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Script to produce binary release of libtensorflow (C API, Java jars etc.).
+
+set -ex
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# Setup environment for bazel builds
+source "${SCRIPT_DIR}/bazel/common_env.sh"
+source "${SCRIPT_DIR}/bazel/bazel_test_lib.sh"
+
+# Sanity check that this is being run from the root of the git repository.
+cd ${SCRIPT_DIR}/../../../..
+if [ ! -e "WORKSPACE" ]; then
+  echo "Must run this from the root of the bazel workspace"
+  echo "Currently at ${PWD}, script is at ${SCRIPT_DIR}"
+  exit 1
+fi
+
+export TF_BAZEL_TARGETS="//tensorflow:libtensorflow.so"
+export TF_BAZEL_TARGETS="${TF_BAZEL_TARGETS} //tensorflow/tools/lib_package:clicenses_generate"
+export TF_BAZEL_TARGETS="${TF_BAZEL_TARGETS} //tensorflow/java:libtensorflow_jni.so"
+export TF_BAZEL_TARGETS="${TF_BAZEL_TARGETS} //tensorflow/tools/lib_package:jnilicenses_generate"
+
+run_configure_for_gpu_build
+
+# build_libtensorflow_tarball in ../builds/libtensorflow.sh
+# cannot be used on Windows since it relies on pkg_tar rules.
+# So we do something special here
+bazel build -c opt \
+  tensorflow:libtensorflow.so \
+  tensorflow/tools/lib_package:clicenses_generate \
+  tensorflow/java:libtensorflow_jni.so \
+  tensorflow/tools/lib_package:jnilicenses_generate
+
+DIR=lib_package
+rm -rf ${DIR}
+mkdir -p ${DIR}
+
+# Zip up the .dll and the LICENSE for the JNI library.
+cp bazel-bin/tensorflow/java/libtensorflow_jni.so ${DIR}/tensorflow_jni.dll
+zip -j ${DIR}/libtensorflow_jni-gpu-windows-$(uname -m).zip \
+  ${DIR}/tensorflow_jni.dll \
+  bazel-genfiles/tensorflow/tools/lib_package/include/tensorflow/jni/LICENSE
+rm -f ${DIR}/tensorflow_jni.dll
+
+# Zip up the .dll, LICENSE and include files for the C library.
+mkdir -p ${DIR}/include/tensorflow/c
+mkdir -p ${DIR}/lib
+cp bazel-bin/tensorflow/libtensorflow.so ${DIR}/lib/tensorflow.dll
+cp tensorflow/c/c_api.h ${DIR}/include/tensorflow/c
+cp bazel-genfiles/tensorflow/tools/lib_package/include/tensorflow/c/LICENSE ${DIR}/include/tensorflow/c
+cd ${DIR}
+zip -j libtensorflow-gpu-windows-$(uname -m).zip \
+  lib/tensorflow.dll \
+  include/tensorflow/c/c_api.h \
+  include/tensorflow/c/LICENSE
+rm -rf lib include
diff --git a/tensorflow/tools/compatibility/BUILD b/tensorflow/tools/compatibility/BUILD
index 51e4c6cef38a4c1606cd16d1c2ac75edc1f1249a..4f90c4d940670c43f65cc3f95971469627ab35c9 100644
--- a/tensorflow/tools/compatibility/BUILD
+++ b/tensorflow/tools/compatibility/BUILD
@@ -10,10 +10,7 @@ load(
 
 py_binary(
     name = "tf_upgrade",
-    srcs = [
-        "ast_edits.py",
-        "tf_upgrade.py",
-    ],
+    srcs = ["tf_upgrade.py"],
     srcs_version = "PY2AND3",
 )
 
@@ -22,7 +19,7 @@ py_test(
     srcs = ["tf_upgrade_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "tf_upgrade",
+        ":tf_upgrade",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_test_lib",
         "@six_archive//:six",
@@ -48,11 +45,11 @@ genrule(
         "test_file_v1_0.py",
         "report.txt",
     ],
-    cmd = ("$(location tf_upgrade)" +
+    cmd = ("$(location :tf_upgrade)" +
            " --infile $(location testdata/test_file_v0_11.py)" +
            " --outfile $(location test_file_v1_0.py)" +
            " --reportfile $(location report.txt)"),
-    tools = ["tf_upgrade"],
+    tools = [":tf_upgrade"],
 )
 
 py_test(
diff --git a/tensorflow/tools/compatibility/ast_edits.py b/tensorflow/tools/compatibility/ast_edits.py
deleted file mode 100644
index e7e4c91692132946f303f1b7ea48c5089a14de2e..0000000000000000000000000000000000000000
--- a/tensorflow/tools/compatibility/ast_edits.py
+++ /dev/null
@@ -1,497 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Upgrader for Python scripts according to an API change specification."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import ast
-import collections
-import os
-import shutil
-import sys
-import tempfile
-import traceback
-
-
-class APIChangeSpec(object):
-  """This class defines the transformations that need to happen.
-
-  This class must provide the following fields:
-
-  * `function_keyword_renames`: maps function names to a map of old -> new
-    argument names
-  * `function_renames`: maps function names to new function names
-  * `change_to_function`: a set of function names that have changed (for
-    notifications)
-  * `function_reorders`: maps functions whose argument order has changed to the
-    list of arguments in the new order
-  * `function_handle`: maps function names to custom handlers for the function
-
-  For an example, see `TFAPIChangeSpec`.
-  """
-
-
-class _FileEditTuple(collections.namedtuple(
-    "_FileEditTuple", ["comment", "line", "start", "old", "new"])):
-  """Each edit that is recorded by a _FileEditRecorder.
-
-  Fields:
-    comment: A description of the edit and why it was made.
-    line: The line number in the file where the edit occurs (1-indexed).
-    start: The line number in the file where the edit occurs (0-indexed).
-    old: text string to remove (this must match what was in file).
-    new: text string to add in place of `old`.
-  """
-
-  __slots__ = ()
-
-
-class _FileEditRecorder(object):
-  """Record changes that need to be done to the file."""
-
-  def __init__(self, filename):
-    # all edits are lists of chars
-    self._filename = filename
-
-    self._line_to_edit = collections.defaultdict(list)
-    self._errors = []
-
-  def process(self, text):
-    """Process a list of strings, each corresponding to the recorded changes.
-
-    Args:
-      text: A list of lines of text (assumed to contain newlines)
-    Returns:
-      A tuple of the modified text and a textual description of what is done.
-    Raises:
-      ValueError: if substitution source location does not have expected text.
-    """
-
-    change_report = ""
-
-    # Iterate of each line
-    for line, edits in self._line_to_edit.items():
-      offset = 0
-      # sort by column so that edits are processed in order in order to make
-      # indexing adjustments cumulative for changes that change the string
-      # length
-      edits.sort(key=lambda x: x.start)
-
-      # Extract each line to a list of characters, because mutable lists
-      # are editable, unlike immutable strings.
-      char_array = list(text[line - 1])
-
-      # Record a description of the change
-      change_report += "%r Line %d\n" % (self._filename, line)
-      change_report += "-" * 80 + "\n\n"
-      for e in edits:
-        change_report += "%s\n" % e.comment
-      change_report += "\n    Old: %s" % (text[line - 1])
-
-      # Make underscore buffers for underlining where in the line the edit was
-      change_list = [" "] * len(text[line - 1])
-      change_list_new = [" "] * len(text[line - 1])
-
-      # Iterate for each edit
-      for e in edits:
-        # Create effective start, end by accounting for change in length due
-        # to previous edits
-        start_eff = e.start + offset
-        end_eff = start_eff + len(e.old)
-
-        # Make sure the edit is changing what it should be changing
-        old_actual = "".join(char_array[start_eff:end_eff])
-        if old_actual != e.old:
-          raise ValueError("Expected text %r but got %r" %
-                           ("".join(e.old), "".join(old_actual)))
-        # Make the edit
-        char_array[start_eff:end_eff] = list(e.new)
-
-        # Create the underline highlighting of the before and after
-        change_list[e.start:e.start + len(e.old)] = "~" * len(e.old)
-        change_list_new[start_eff:end_eff] = "~" * len(e.new)
-
-        # Keep track of how to generate effective ranges
-        offset += len(e.new) - len(e.old)
-
-      # Finish the report comment
-      change_report += "         %s\n" % "".join(change_list)
-      text[line - 1] = "".join(char_array)
-      change_report += "    New: %s" % (text[line - 1])
-      change_report += "         %s\n\n" % "".join(change_list_new)
-    return "".join(text), change_report, self._errors
-
-  def add(self, comment, line, start, old, new, error=None):
-    """Add a new change that is needed.
-
-    Args:
-      comment: A description of what was changed
-      line: Line number (1 indexed)
-      start: Column offset (0 indexed)
-      old: old text
-      new: new text
-      error: this "edit" is something that cannot be fixed automatically
-    Returns:
-      None
-    """
-
-    self._line_to_edit[line].append(
-        _FileEditTuple(comment, line, start, old, new))
-    if error:
-      self._errors.append("%s:%d: %s" % (self._filename, line, error))
-
-
-class _ASTCallVisitor(ast.NodeVisitor):
-  """AST Visitor that processes function calls.
-
-  Updates function calls from old API version to new API version using a given
-  change spec.
-  """
-
-  def __init__(self, filename, lines, api_change_spec):
-    self._filename = filename
-    self._file_edit = _FileEditRecorder(filename)
-    self._lines = lines
-    self._api_change_spec = api_change_spec
-
-  def process(self, lines):
-    return self._file_edit.process(lines)
-
-  def generic_visit(self, node):
-    ast.NodeVisitor.generic_visit(self, node)
-
-  def _rename_functions(self, node, full_name):
-    function_renames = self._api_change_spec.function_renames
-    try:
-      new_name = function_renames[full_name]
-      self._file_edit.add("Renamed function %r to %r" % (full_name,
-                                                         new_name),
-                          node.lineno, node.col_offset, full_name, new_name)
-    except KeyError:
-      pass
-
-  def _get_attribute_full_path(self, node):
-    """Traverse an attribute to generate a full name e.g. tf.foo.bar.
-
-    Args:
-      node: A Node of type Attribute.
-
-    Returns:
-      a '.'-delimited full-name or None if the tree was not a simple form.
-      i.e. `foo()+b).bar` returns None, while `a.b.c` would return "a.b.c".
-    """
-    curr = node
-    items = []
-    while not isinstance(curr, ast.Name):
-      if not isinstance(curr, ast.Attribute):
-        return None
-      items.append(curr.attr)
-      curr = curr.value
-    items.append(curr.id)
-    return ".".join(reversed(items))
-
-  def _find_true_position(self, node):
-    """Return correct line number and column offset for a given node.
-
-    This is necessary mainly because ListComp's location reporting reports
-    the next token after the list comprehension list opening.
-
-    Args:
-      node: Node for which we wish to know the lineno and col_offset
-    """
-    import re
-    find_open = re.compile("^\s*(\\[).*$")
-    find_string_chars = re.compile("['\"]")
-
-    if isinstance(node, ast.ListComp):
-      # Strangely, ast.ListComp returns the col_offset of the first token
-      # after the '[' token which appears to be a bug. Workaround by
-      # explicitly finding the real start of the list comprehension.
-      line = node.lineno
-      col = node.col_offset
-      # loop over lines
-      while 1:
-        # Reverse the text to and regular expression search for whitespace
-        text = self._lines[line-1]
-        reversed_preceding_text = text[:col][::-1]
-        # First find if a [ can be found with only whitespace between it and
-        # col.
-        m = find_open.match(reversed_preceding_text)
-        if m:
-          new_col_offset = col - m.start(1) - 1
-          return line, new_col_offset
-        else:
-          if (reversed_preceding_text=="" or
-             reversed_preceding_text.isspace()):
-            line = line - 1
-            prev_line = self._lines[line - 1]
-            # TODO(aselle):
-            # this is poor comment detection, but it is good enough for
-            # cases where the comment does not contain string literal starting/
-            # ending characters. If ast gave us start and end locations of the
-            # ast nodes rather than just start, we could use string literal
-            # node ranges to filter out spurious #'s that appear in string
-            # literals.
-            comment_start = prev_line.find("#")
-            if comment_start ==  -1:
-              col = len(prev_line) -1
-            elif find_string_chars.search(prev_line[comment_start:]) is None:
-              col = comment_start
-            else:
-              return None, None
-          else:
-            return None, None
-    # Most other nodes return proper locations (with notably does not), but
-    # it is not possible to use that in an argument.
-    return node.lineno, node.col_offset
-
-
-  def visit_Call(self, node):  # pylint: disable=invalid-name
-    """Handle visiting a call node in the AST.
-
-    Args:
-      node: Current Node
-    """
-
-
-    # Find a simple attribute name path e.g. "tf.foo.bar"
-    full_name = self._get_attribute_full_path(node.func)
-
-    # Make sure the func is marked as being part of a call
-    node.func.is_function_for_call = True
-
-    if full_name:
-      # Call special handlers
-      function_handles = self._api_change_spec.function_handle
-      if full_name in function_handles:
-        function_handles[full_name](self._file_edit, node)
-
-      # Examine any non-keyword argument and make it into a keyword argument
-      # if reordering required.
-      function_reorders = self._api_change_spec.function_reorders
-      function_keyword_renames = (
-          self._api_change_spec.function_keyword_renames)
-
-      if full_name in function_reorders:
-        reordered = function_reorders[full_name]
-        for idx, arg in enumerate(node.args):
-          lineno, col_offset = self._find_true_position(arg)
-          if lineno is None or col_offset is None:
-            self._file_edit.add(
-                "Failed to add keyword %r to reordered function %r"
-                % (reordered[idx], full_name), arg.lineno, arg.col_offset,
-                "", "",
-                error="A necessary keyword argument failed to be inserted.")
-          else:
-            keyword_arg = reordered[idx]
-            if (full_name in function_keyword_renames and
-                keyword_arg in function_keyword_renames[full_name]):
-              keyword_arg = function_keyword_renames[full_name][keyword_arg]
-            self._file_edit.add("Added keyword %r to reordered function %r"
-                                % (reordered[idx], full_name), lineno,
-                                col_offset, "", keyword_arg + "=")
-
-      # Examine each keyword argument and convert it to the final renamed form
-      renamed_keywords = ({} if full_name not in function_keyword_renames else
-                          function_keyword_renames[full_name])
-      for keyword in node.keywords:
-        argkey = keyword.arg
-        argval = keyword.value
-
-        if argkey in renamed_keywords:
-          argval_lineno, argval_col_offset = self._find_true_position(argval)
-          if argval_lineno is not None and argval_col_offset is not None:
-            # TODO(aselle): We should scan backward to find the start of the
-            # keyword key. Unfortunately ast does not give you the location of
-            # keyword keys, so we are forced to infer it from the keyword arg
-            # value.
-            key_start = argval_col_offset - len(argkey) - 1
-            key_end = key_start + len(argkey) + 1
-            if (self._lines[argval_lineno - 1][key_start:key_end] ==
-                argkey + "="):
-              self._file_edit.add("Renamed keyword argument from %r to %r" %
-                                  (argkey, renamed_keywords[argkey]),
-                                  argval_lineno,
-                                  argval_col_offset - len(argkey) - 1,
-                                  argkey + "=", renamed_keywords[argkey] + "=")
-              continue
-          self._file_edit.add(
-              "Failed to rename keyword argument from %r to %r" %
-              (argkey, renamed_keywords[argkey]),
-              argval.lineno,
-              argval.col_offset - len(argkey) - 1,
-              "", "",
-              error="Failed to find keyword lexographically. Fix manually.")
-
-    ast.NodeVisitor.generic_visit(self, node)
-
-  def visit_Attribute(self, node):  # pylint: disable=invalid-name
-    """Handle bare Attributes i.e. [tf.foo, tf.bar].
-
-    Args:
-      node: Node that is of type ast.Attribute
-    """
-    full_name = self._get_attribute_full_path(node)
-    if full_name:
-      self._rename_functions(node, full_name)
-    if full_name in self._api_change_spec.change_to_function:
-      if not hasattr(node, "is_function_for_call"):
-        new_text = full_name + "()"
-        self._file_edit.add("Changed %r to %r"%(full_name, new_text),
-                            node.lineno, node.col_offset, full_name, new_text)
-
-    ast.NodeVisitor.generic_visit(self, node)
-
-
-class ASTCodeUpgrader(object):
-  """Handles upgrading a set of Python files using a given API change spec."""
-
-  def __init__(self, api_change_spec):
-    if not isinstance(api_change_spec, APIChangeSpec):
-      raise TypeError("Must pass APIChangeSpec to ASTCodeUpgrader, got %s" %
-                      type(api_change_spec))
-    self._api_change_spec = api_change_spec
-
-  def process_file(self, in_filename, out_filename):
-    """Process the given python file for incompatible changes.
-
-    Args:
-      in_filename: filename to parse
-      out_filename: output file to write to
-    Returns:
-      A tuple representing number of files processed, log of actions, errors
-    """
-
-    # Write to a temporary file, just in case we are doing an implace modify.
-    with open(in_filename, "r") as in_file, \
-        tempfile.NamedTemporaryFile("w", delete=False) as temp_file:
-      ret = self.process_opened_file(
-          in_filename, in_file, out_filename, temp_file)
-
-    shutil.move(temp_file.name, out_filename)
-    return ret
-
-  # Broad exceptions are required here because ast throws whatever it wants.
-  # pylint: disable=broad-except
-  def process_opened_file(self, in_filename, in_file, out_filename, out_file):
-    """Process the given python file for incompatible changes.
-
-    This function is split out to facilitate StringIO testing from
-    tf_upgrade_test.py.
-
-    Args:
-      in_filename: filename to parse
-      in_file: opened file (or StringIO)
-      out_filename: output file to write to
-      out_file: opened file (or StringIO)
-    Returns:
-      A tuple representing number of files processed, log of actions, errors
-    """
-    process_errors = []
-    text = "-" * 80 + "\n"
-    text += "Processing file %r\n outputting to %r\n" % (in_filename,
-                                                         out_filename)
-    text += "-" * 80 + "\n\n"
-
-    parsed_ast = None
-    lines = in_file.readlines()
-    try:
-      parsed_ast = ast.parse("".join(lines))
-    except Exception:
-      text += "Failed to parse %r\n\n" % in_filename
-      text += traceback.format_exc()
-    if parsed_ast:
-      visitor = _ASTCallVisitor(in_filename, lines, self._api_change_spec)
-      visitor.visit(parsed_ast)
-      out_text, new_text, process_errors = visitor.process(lines)
-      text += new_text
-      if out_file:
-        out_file.write(out_text)
-    text += "\n"
-    return 1, text, process_errors
-  # pylint: enable=broad-except
-
-  def process_tree(self, root_directory, output_root_directory,
-                   copy_other_files):
-    """Processes upgrades on an entire tree of python files in place.
-
-    Note that only Python files. If you have custom code in other languages,
-    you will need to manually upgrade those.
-
-    Args:
-      root_directory: Directory to walk and process.
-      output_root_directory: Directory to use as base.
-      copy_other_files: Copy files that are not touched by this converter.
-
-    Returns:
-      A tuple of files processed, the report string ofr all files, and errors
-    """
-
-    # make sure output directory doesn't exist
-    if output_root_directory and os.path.exists(output_root_directory):
-      print("Output directory %r must not already exist." % (
-          output_root_directory))
-      sys.exit(1)
-
-    # make sure output directory does not overlap with root_directory
-    norm_root = os.path.split(os.path.normpath(root_directory))
-    norm_output = os.path.split(os.path.normpath(output_root_directory))
-    if norm_root == norm_output:
-      print("Output directory %r same as input directory %r" % (
-          root_directory, output_root_directory))
-      sys.exit(1)
-
-    # Collect list of files to process (we do this to correctly handle if the
-    # user puts the output directory in some sub directory of the input dir)
-    files_to_process = []
-    files_to_copy = []
-    for dir_name, _, file_list in os.walk(root_directory):
-      py_files = [f for f in file_list if f.endswith(".py")]
-      copy_files = [f for f in file_list if not f.endswith(".py")]
-      for filename in py_files:
-        fullpath = os.path.join(dir_name, filename)
-        fullpath_output = os.path.join(
-            output_root_directory, os.path.relpath(fullpath, root_directory))
-        files_to_process.append((fullpath, fullpath_output))
-      if copy_other_files:
-        for filename in copy_files:
-          fullpath = os.path.join(dir_name, filename)
-          fullpath_output = os.path.join(
-              output_root_directory, os.path.relpath(fullpath, root_directory))
-          files_to_copy.append((fullpath, fullpath_output))
-
-    file_count = 0
-    tree_errors = []
-    report = ""
-    report += ("=" * 80) + "\n"
-    report += "Input tree: %r\n" % root_directory
-    report += ("=" * 80) + "\n"
-
-    for input_path, output_path in files_to_process:
-      output_directory = os.path.dirname(output_path)
-      if not os.path.isdir(output_directory):
-        os.makedirs(output_directory)
-      file_count += 1
-      _, l_report, l_errors = self.process_file(input_path, output_path)
-      tree_errors += l_errors
-      report += l_report
-    for input_path, output_path in files_to_copy:
-      output_directory = os.path.dirname(output_path)
-      if not os.path.isdir(output_directory):
-        os.makedirs(output_directory)
-      shutil.copy(input_path, output_path)
-    return file_count, report, tree_errors
diff --git a/tensorflow/tools/compatibility/tf_upgrade.py b/tensorflow/tools/compatibility/tf_upgrade.py
index 72fe4a48cdd1c374d7dc39b8bb820a365a730e13..fa1cc739056e7d50ace73e9ca6645b5dc04621e5 100644
--- a/tensorflow/tools/compatibility/tf_upgrade.py
+++ b/tensorflow/tools/compatibility/tf_upgrade.py
@@ -19,11 +19,486 @@ from __future__ import division
 from __future__ import print_function
 
 import argparse
+import ast
+import collections
+import os
+import shutil
+import sys
+import tempfile
+import traceback
 
-from tensorflow.tools.compatibility import ast_edits
 
+class APIChangeSpec(object):
+  """This class defines the transformations that need to happen.
 
-class TFAPIChangeSpec(ast_edits.APIChangeSpec):
+  This class must provide the following fields:
+
+  * `function_keyword_renames`: maps function names to a map of old -> new
+    argument names
+  * `function_renames`: maps function names to new function names
+  * `change_to_function`: a set of function names that have changed (for
+    notifications)
+  * `function_reorders`: maps functions whose argument order has changed to the
+    list of arguments in the new order
+  * `function_handle`: maps function names to custom handlers for the function
+
+  For an example, see `TFAPIChangeSpec`.
+  """
+
+
+class _FileEditTuple(collections.namedtuple(
+    "_FileEditTuple", ["comment", "line", "start", "old", "new"])):
+  """Each edit that is recorded by a _FileEditRecorder.
+
+  Fields:
+    comment: A description of the edit and why it was made.
+    line: The line number in the file where the edit occurs (1-indexed).
+    start: The line number in the file where the edit occurs (0-indexed).
+    old: text string to remove (this must match what was in file).
+    new: text string to add in place of `old`.
+  """
+
+  __slots__ = ()
+
+
+class _FileEditRecorder(object):
+  """Record changes that need to be done to the file."""
+
+  def __init__(self, filename):
+    # all edits are lists of chars
+    self._filename = filename
+
+    self._line_to_edit = collections.defaultdict(list)
+    self._errors = []
+
+  def process(self, text):
+    """Process a list of strings, each corresponding to the recorded changes.
+
+    Args:
+      text: A list of lines of text (assumed to contain newlines)
+    Returns:
+      A tuple of the modified text and a textual description of what is done.
+    Raises:
+      ValueError: if substitution source location does not have expected text.
+    """
+
+    change_report = ""
+
+    # Iterate of each line
+    for line, edits in self._line_to_edit.items():
+      offset = 0
+      # sort by column so that edits are processed in order in order to make
+      # indexing adjustments cumulative for changes that change the string
+      # length
+      edits.sort(key=lambda x: x.start)
+
+      # Extract each line to a list of characters, because mutable lists
+      # are editable, unlike immutable strings.
+      char_array = list(text[line - 1])
+
+      # Record a description of the change
+      change_report += "%r Line %d\n" % (self._filename, line)
+      change_report += "-" * 80 + "\n\n"
+      for e in edits:
+        change_report += "%s\n" % e.comment
+      change_report += "\n    Old: %s" % (text[line - 1])
+
+      # Make underscore buffers for underlining where in the line the edit was
+      change_list = [" "] * len(text[line - 1])
+      change_list_new = [" "] * len(text[line - 1])
+
+      # Iterate for each edit
+      for e in edits:
+        # Create effective start, end by accounting for change in length due
+        # to previous edits
+        start_eff = e.start + offset
+        end_eff = start_eff + len(e.old)
+
+        # Make sure the edit is changing what it should be changing
+        old_actual = "".join(char_array[start_eff:end_eff])
+        if old_actual != e.old:
+          raise ValueError("Expected text %r but got %r" %
+                           ("".join(e.old), "".join(old_actual)))
+        # Make the edit
+        char_array[start_eff:end_eff] = list(e.new)
+
+        # Create the underline highlighting of the before and after
+        change_list[e.start:e.start + len(e.old)] = "~" * len(e.old)
+        change_list_new[start_eff:end_eff] = "~" * len(e.new)
+
+        # Keep track of how to generate effective ranges
+        offset += len(e.new) - len(e.old)
+
+      # Finish the report comment
+      change_report += "         %s\n" % "".join(change_list)
+      text[line - 1] = "".join(char_array)
+      change_report += "    New: %s" % (text[line - 1])
+      change_report += "         %s\n\n" % "".join(change_list_new)
+    return "".join(text), change_report, self._errors
+
+  def add(self, comment, line, start, old, new, error=None):
+    """Add a new change that is needed.
+
+    Args:
+      comment: A description of what was changed
+      line: Line number (1 indexed)
+      start: Column offset (0 indexed)
+      old: old text
+      new: new text
+      error: this "edit" is something that cannot be fixed automatically
+    Returns:
+      None
+    """
+
+    self._line_to_edit[line].append(
+        _FileEditTuple(comment, line, start, old, new))
+    if error:
+      self._errors.append("%s:%d: %s" % (self._filename, line, error))
+
+
+class _ASTCallVisitor(ast.NodeVisitor):
+  """AST Visitor that processes function calls.
+
+  Updates function calls from old API version to new API version using a given
+  change spec.
+  """
+
+  def __init__(self, filename, lines, api_change_spec):
+    self._filename = filename
+    self._file_edit = _FileEditRecorder(filename)
+    self._lines = lines
+    self._api_change_spec = api_change_spec
+
+  def process(self, lines):
+    return self._file_edit.process(lines)
+
+  def generic_visit(self, node):
+    ast.NodeVisitor.generic_visit(self, node)
+
+  def _rename_functions(self, node, full_name):
+    function_renames = self._api_change_spec.function_renames
+    try:
+      new_name = function_renames[full_name]
+      self._file_edit.add("Renamed function %r to %r" % (full_name,
+                                                         new_name),
+                          node.lineno, node.col_offset, full_name, new_name)
+    except KeyError:
+      pass
+
+  def _get_attribute_full_path(self, node):
+    """Traverse an attribute to generate a full name e.g. tf.foo.bar.
+
+    Args:
+      node: A Node of type Attribute.
+
+    Returns:
+      a '.'-delimited full-name or None if the tree was not a simple form.
+      i.e. `foo()+b).bar` returns None, while `a.b.c` would return "a.b.c".
+    """
+    curr = node
+    items = []
+    while not isinstance(curr, ast.Name):
+      if not isinstance(curr, ast.Attribute):
+        return None
+      items.append(curr.attr)
+      curr = curr.value
+    items.append(curr.id)
+    return ".".join(reversed(items))
+
+  def _find_true_position(self, node):
+    """Return correct line number and column offset for a given node.
+
+    This is necessary mainly because ListComp's location reporting reports
+    the next token after the list comprehension list opening.
+
+    Args:
+      node: Node for which we wish to know the lineno and col_offset
+    """
+    import re
+    find_open = re.compile("^\s*(\\[).*$")
+    find_string_chars = re.compile("['\"]")
+
+    if isinstance(node, ast.ListComp):
+      # Strangely, ast.ListComp returns the col_offset of the first token
+      # after the '[' token which appears to be a bug. Workaround by
+      # explicitly finding the real start of the list comprehension.
+      line = node.lineno
+      col = node.col_offset
+      # loop over lines
+      while 1:
+        # Reverse the text to and regular expression search for whitespace
+        text = self._lines[line-1]
+        reversed_preceding_text = text[:col][::-1]
+        # First find if a [ can be found with only whitespace between it and
+        # col.
+        m = find_open.match(reversed_preceding_text)
+        if m:
+          new_col_offset = col - m.start(1) - 1
+          return line, new_col_offset
+        else:
+          if (reversed_preceding_text=="" or
+             reversed_preceding_text.isspace()):
+            line = line - 1
+            prev_line = self._lines[line - 1]
+            # TODO(aselle):
+            # this is poor comment detection, but it is good enough for
+            # cases where the comment does not contain string literal starting/
+            # ending characters. If ast gave us start and end locations of the
+            # ast nodes rather than just start, we could use string literal
+            # node ranges to filter out spurious #'s that appear in string
+            # literals.
+            comment_start = prev_line.find("#")
+            if comment_start ==  -1:
+              col = len(prev_line) -1
+            elif find_string_chars.search(prev_line[comment_start:]) is None:
+              col = comment_start
+            else:
+              return None, None
+          else:
+            return None, None
+    # Most other nodes return proper locations (with notably does not), but
+    # it is not possible to use that in an argument.
+    return node.lineno, node.col_offset
+
+
+  def visit_Call(self, node):  # pylint: disable=invalid-name
+    """Handle visiting a call node in the AST.
+
+    Args:
+      node: Current Node
+    """
+
+
+    # Find a simple attribute name path e.g. "tf.foo.bar"
+    full_name = self._get_attribute_full_path(node.func)
+
+    # Make sure the func is marked as being part of a call
+    node.func.is_function_for_call = True
+
+    if full_name:
+      # Call special handlers
+      function_handles = self._api_change_spec.function_handle
+      if full_name in function_handles:
+        function_handles[full_name](self._file_edit, node)
+
+      # Examine any non-keyword argument and make it into a keyword argument
+      # if reordering required.
+      function_reorders = self._api_change_spec.function_reorders
+      function_keyword_renames = (
+          self._api_change_spec.function_keyword_renames)
+
+      if full_name in function_reorders:
+        reordered = function_reorders[full_name]
+        for idx, arg in enumerate(node.args):
+          lineno, col_offset = self._find_true_position(arg)
+          if lineno is None or col_offset is None:
+            self._file_edit.add(
+                "Failed to add keyword %r to reordered function %r"
+                % (reordered[idx], full_name), arg.lineno, arg.col_offset,
+                "", "",
+                error="A necessary keyword argument failed to be inserted.")
+          else:
+            keyword_arg = reordered[idx]
+            if (full_name in function_keyword_renames and
+                keyword_arg in function_keyword_renames[full_name]):
+              keyword_arg = function_keyword_renames[full_name][keyword_arg]
+            self._file_edit.add("Added keyword %r to reordered function %r"
+                                % (reordered[idx], full_name), lineno,
+                                col_offset, "", keyword_arg + "=")
+
+      # Examine each keyword argument and convert it to the final renamed form
+      renamed_keywords = ({} if full_name not in function_keyword_renames else
+                          function_keyword_renames[full_name])
+      for keyword in node.keywords:
+        argkey = keyword.arg
+        argval = keyword.value
+
+        if argkey in renamed_keywords:
+          argval_lineno, argval_col_offset = self._find_true_position(argval)
+          if argval_lineno is not None and argval_col_offset is not None:
+            # TODO(aselle): We should scan backward to find the start of the
+            # keyword key. Unfortunately ast does not give you the location of
+            # keyword keys, so we are forced to infer it from the keyword arg
+            # value.
+            key_start = argval_col_offset - len(argkey) - 1
+            key_end = key_start + len(argkey) + 1
+            if (self._lines[argval_lineno - 1][key_start:key_end] ==
+                argkey + "="):
+              self._file_edit.add("Renamed keyword argument from %r to %r" %
+                                  (argkey, renamed_keywords[argkey]),
+                                  argval_lineno,
+                                  argval_col_offset - len(argkey) - 1,
+                                  argkey + "=", renamed_keywords[argkey] + "=")
+              continue
+          self._file_edit.add(
+              "Failed to rename keyword argument from %r to %r" %
+              (argkey, renamed_keywords[argkey]),
+              argval.lineno,
+              argval.col_offset - len(argkey) - 1,
+              "", "",
+              error="Failed to find keyword lexographically. Fix manually.")
+
+    ast.NodeVisitor.generic_visit(self, node)
+
+  def visit_Attribute(self, node):  # pylint: disable=invalid-name
+    """Handle bare Attributes i.e. [tf.foo, tf.bar].
+
+    Args:
+      node: Node that is of type ast.Attribute
+    """
+    full_name = self._get_attribute_full_path(node)
+    if full_name:
+      self._rename_functions(node, full_name)
+    if full_name in self._api_change_spec.change_to_function:
+      if not hasattr(node, "is_function_for_call"):
+        new_text = full_name + "()"
+        self._file_edit.add("Changed %r to %r"%(full_name, new_text),
+                            node.lineno, node.col_offset, full_name, new_text)
+
+    ast.NodeVisitor.generic_visit(self, node)
+
+
+class ASTCodeUpgrader(object):
+  """Handles upgrading a set of Python files using a given API change spec."""
+
+  def __init__(self, api_change_spec):
+    if not isinstance(api_change_spec, APIChangeSpec):
+      raise TypeError("Must pass APIChangeSpec to ASTCodeUpgrader, got %s" %
+                      type(api_change_spec))
+    self._api_change_spec = api_change_spec
+
+  def process_file(self, in_filename, out_filename):
+    """Process the given python file for incompatible changes.
+
+    Args:
+      in_filename: filename to parse
+      out_filename: output file to write to
+    Returns:
+      A tuple representing number of files processed, log of actions, errors
+    """
+
+    # Write to a temporary file, just in case we are doing an implace modify.
+    with open(in_filename, "r") as in_file, \
+        tempfile.NamedTemporaryFile("w", delete=False) as temp_file:
+      ret = self.process_opened_file(
+          in_filename, in_file, out_filename, temp_file)
+
+    shutil.move(temp_file.name, out_filename)
+    return ret
+
+  # Broad exceptions are required here because ast throws whatever it wants.
+  # pylint: disable=broad-except
+  def process_opened_file(self, in_filename, in_file, out_filename, out_file):
+    """Process the given python file for incompatible changes.
+
+    This function is split out to facilitate StringIO testing from
+    tf_upgrade_test.py.
+
+    Args:
+      in_filename: filename to parse
+      in_file: opened file (or StringIO)
+      out_filename: output file to write to
+      out_file: opened file (or StringIO)
+    Returns:
+      A tuple representing number of files processed, log of actions, errors
+    """
+    process_errors = []
+    text = "-" * 80 + "\n"
+    text += "Processing file %r\n outputting to %r\n" % (in_filename,
+                                                         out_filename)
+    text += "-" * 80 + "\n\n"
+
+    parsed_ast = None
+    lines = in_file.readlines()
+    try:
+      parsed_ast = ast.parse("".join(lines))
+    except Exception:
+      text += "Failed to parse %r\n\n" % in_filename
+      text += traceback.format_exc()
+    if parsed_ast:
+      visitor = _ASTCallVisitor(in_filename, lines, self._api_change_spec)
+      visitor.visit(parsed_ast)
+      out_text, new_text, process_errors = visitor.process(lines)
+      text += new_text
+      if out_file:
+        out_file.write(out_text)
+    text += "\n"
+    return 1, text, process_errors
+  # pylint: enable=broad-except
+
+  def process_tree(self, root_directory, output_root_directory,
+                   copy_other_files):
+    """Processes upgrades on an entire tree of python files in place.
+
+    Note that only Python files. If you have custom code in other languages,
+    you will need to manually upgrade those.
+
+    Args:
+      root_directory: Directory to walk and process.
+      output_root_directory: Directory to use as base.
+      copy_other_files: Copy files that are not touched by this converter.
+
+    Returns:
+      A tuple of files processed, the report string ofr all files, and errors
+    """
+
+    # make sure output directory doesn't exist
+    if output_root_directory and os.path.exists(output_root_directory):
+      print("Output directory %r must not already exist." % (
+          output_root_directory))
+      sys.exit(1)
+
+    # make sure output directory does not overlap with root_directory
+    norm_root = os.path.split(os.path.normpath(root_directory))
+    norm_output = os.path.split(os.path.normpath(output_root_directory))
+    if norm_root == norm_output:
+      print("Output directory %r same as input directory %r" % (
+          root_directory, output_root_directory))
+      sys.exit(1)
+
+    # Collect list of files to process (we do this to correctly handle if the
+    # user puts the output directory in some sub directory of the input dir)
+    files_to_process = []
+    files_to_copy = []
+    for dir_name, _, file_list in os.walk(root_directory):
+      py_files = [f for f in file_list if f.endswith(".py")]
+      copy_files = [f for f in file_list if not f.endswith(".py")]
+      for filename in py_files:
+        fullpath = os.path.join(dir_name, filename)
+        fullpath_output = os.path.join(
+            output_root_directory, os.path.relpath(fullpath, root_directory))
+        files_to_process.append((fullpath, fullpath_output))
+      if copy_other_files:
+        for filename in copy_files:
+          fullpath = os.path.join(dir_name, filename)
+          fullpath_output = os.path.join(
+              output_root_directory, os.path.relpath(fullpath, root_directory))
+          files_to_copy.append((fullpath, fullpath_output))
+
+    file_count = 0
+    tree_errors = []
+    report = ""
+    report += ("=" * 80) + "\n"
+    report += "Input tree: %r\n" % root_directory
+    report += ("=" * 80) + "\n"
+
+    for input_path, output_path in files_to_process:
+      output_directory = os.path.dirname(output_path)
+      if not os.path.isdir(output_directory):
+        os.makedirs(output_directory)
+      file_count += 1
+      _, l_report, l_errors = self.process_file(input_path, output_path)
+      tree_errors += l_errors
+      report += l_report
+    for input_path, output_path in files_to_copy:
+      output_directory = os.path.dirname(output_path)
+      if not os.path.isdir(output_directory):
+        os.makedirs(output_directory)
+      shutil.copy(input_path, output_path)
+    return file_count, report, tree_errors
+
+
+class TFAPIChangeSpec(APIChangeSpec):
   """List of maps that describe what changed in the API."""
 
   def __init__(self):
@@ -238,7 +713,7 @@ Simple usage:
       default="report.txt")
   args = parser.parse_args()
 
-  upgrade = ast_edits.ASTCodeUpgrader(TFAPIChangeSpec())
+  upgrade = ASTCodeUpgrader(TFAPIChangeSpec())
   report_text = None
   report_filename = args.report_filename
   files_processed = 0
diff --git a/tensorflow/tools/compatibility/tf_upgrade_test.py b/tensorflow/tools/compatibility/tf_upgrade_test.py
index ac838a2791fd9ce3244f344c495e9a97dcd513ca..a495f9883b284869d043441d1cfecca01296eda3 100644
--- a/tensorflow/tools/compatibility/tf_upgrade_test.py
+++ b/tensorflow/tools/compatibility/tf_upgrade_test.py
@@ -22,7 +22,6 @@ import tempfile
 import six
 from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test as test_lib
-from tensorflow.tools.compatibility import ast_edits
 from tensorflow.tools.compatibility import tf_upgrade
 
 
@@ -37,7 +36,7 @@ class TestUpgrade(test_util.TensorFlowTestCase):
   def _upgrade(self, old_file_text):
     in_file = six.StringIO(old_file_text)
     out_file = six.StringIO()
-    upgrader = ast_edits.ASTCodeUpgrader(tf_upgrade.TFAPIChangeSpec())
+    upgrader = tf_upgrade.ASTCodeUpgrader(tf_upgrade.TFAPIChangeSpec())
     count, report, errors = (
         upgrader.process_opened_file("test.py", in_file,
                                      "test_out.py", out_file))
@@ -140,7 +139,7 @@ class TestUpgradeFiles(test_util.TensorFlowTestCase):
     upgraded = "tf.multiply(a, b)\n"
     temp_file.write(original)
     temp_file.close()
-    upgrader = ast_edits.ASTCodeUpgrader(tf_upgrade.TFAPIChangeSpec())
+    upgrader = tf_upgrade.ASTCodeUpgrader(tf_upgrade.TFAPIChangeSpec())
     upgrader.process_file(temp_file.name, temp_file.name)
     self.assertAllEqual(open(temp_file.name).read(), upgraded)
     os.unlink(temp_file.name)
diff --git a/tensorflow/tools/dist_test/python/census_widendeep.py b/tensorflow/tools/dist_test/python/census_widendeep.py
index 6f578d6f673ccfe013a5f39472922e221d2bf2bb..8feb5386e9881596c20fba9e537a0439c8187ac4 100644
--- a/tensorflow/tools/dist_test/python/census_widendeep.py
+++ b/tensorflow/tools/dist_test/python/census_widendeep.py
@@ -263,8 +263,7 @@ if __name__ == "__main__":
       "--data_dir",
       type=str,
       default="/tmp/census-data",
-      help="Directory for storing the census data"
-  )
+      help="Directory for storing the census data")
   parser.add_argument(
       "--model_dir",
       type=str,
diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel
index 3525c7524f3bd844be5284d2a076eb78d1bb1a02..5dc4a053fd2cae7d83739507fea31e7afc92d77c 100644
--- a/tensorflow/tools/docker/Dockerfile.devel
+++ b/tensorflow/tools/docker/Dockerfile.devel
@@ -57,7 +57,7 @@ RUN echo "startup --batch" >>/etc/bazel.bazelrc
 RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
     >>/etc/bazel.bazelrc
 # Install the most recent bazel release.
-ENV BAZEL_VERSION 0.5.4
+ENV BAZEL_VERSION 0.8.0
 WORKDIR /
 RUN mkdir /bazel && \
     cd /bazel && \
@@ -69,11 +69,8 @@ RUN mkdir /bazel && \
     rm -f /bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh
 
 # Download and build TensorFlow.
-
-RUN git clone https://github.com/tensorflow/tensorflow.git && \
-    cd tensorflow && \
-    git checkout r1.4
 WORKDIR /tensorflow
+RUN git clone --branch=r1.5 --depth=1 https://github.com/tensorflow/tensorflow.git .
 
 # TODO(craigcitro): Don't install the pip package, since it makes it
 # more difficult to experiment with local changes. Instead, just add
diff --git a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl
index 8180e5e7fb65e1eff693265ed388496b356563dd..96b260ad3aeb78622dd1ad276f7d524dd598e3bf 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl
+++ b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl
@@ -3,7 +3,7 @@ FROM tensorflow/tensorflow:latest-devel
 LABEL maintainer="Clayne Robison<clayne.b.robison@intel.com>"
 
 # These arguments are parameterized. Use --build-args to override.
-ARG TF_BRANCH=r1.4
+ARG TF_BRANCH=r1.5
 ARG WHL_DIR=/whl
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
@@ -54,7 +54,7 @@ RUN ./configure
 RUN LD_LIBRARY_PATH=${LD_LIBRARY_PATH} \
     bazel build --config=mkl \
                 --config="opt" \
-                --copt="-march=native" \
+                --copt="-march=broadwell" \
                 --copt="-O3" \
                 //tensorflow/tools/pip_package:build_pip_package && \
     mkdir ${WHL_DIR} && \
@@ -81,5 +81,3 @@ RUN echo '[ ! -z "$TERM" -a -r /etc/motd ] && cat /etc/issue && cat /etc/motd' \
 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n\
 \n "\
 	> /etc/motd
-
-CMD ["/bin/bash"]
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu
index 041f45971bca256efc0668b03f5b5effde06e2c2..07ffd3839a32ef194100322e54b9133412e4b664 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu
@@ -1,11 +1,20 @@
-FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu16.04
+FROM nvidia/cuda:9.0-base-ubuntu16.04
 
 LABEL maintainer="Craig Citro <craigcitro@google.com>"
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
+        cuda-command-line-tools-9-0 \
+        cuda-cublas-dev-9-0 \
+        cuda-cudart-dev-9-0 \
+        cuda-cufft-dev-9-0 \
+        cuda-curand-dev-9-0 \
+        cuda-cusolver-dev-9-0 \
+        cuda-cusparse-dev-9-0 \
         curl \
         git \
+        libcudnn7=7.0.5.15-1+cuda9.0 \
+        libcudnn7-dev=7.0.5.15-1+cuda9.0 \
         libcurl3-dev \
         libfreetype6-dev \
         libpng12-dev \
@@ -17,12 +26,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         unzip \
         zip \
         zlib1g-dev \
-        openjdk-8-jdk \
-        openjdk-8-jre-headless \
         wget \
         && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
+    rm -rf /var/lib/apt/lists/* && \
+    find /usr/local/cuda-9.0/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \
+    rm /usr/lib/x86_64-linux-gnu/libcudnn_static_v7.a
 
 RUN curl -fSsL -O https://bootstrap.pypa.io/get-pip.py && \
     python get-pip.py && \
@@ -58,7 +66,7 @@ RUN echo "startup --batch" >>/etc/bazel.bazelrc
 RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
     >>/etc/bazel.bazelrc
 # Install the most recent bazel release.
-ENV BAZEL_VERSION 0.5.4
+ENV BAZEL_VERSION 0.8.0
 WORKDIR /
 RUN mkdir /bazel && \
     cd /bazel && \
@@ -70,18 +78,16 @@ RUN mkdir /bazel && \
     rm -f /bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh
 
 # Download and build TensorFlow.
-
-RUN git clone https://github.com/tensorflow/tensorflow.git && \
-    cd tensorflow && \
-    git checkout r1.4
 WORKDIR /tensorflow
+RUN git clone --branch=r1.5 --depth=1 https://github.com/tensorflow/tensorflow.git .
 
 # Configure the build for our CUDA configuration.
 ENV CI_BUILD_PYTHON python
 ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
 ENV TF_NEED_CUDA 1
 ENV TF_CUDA_COMPUTE_CAPABILITIES=3.0,3.5,5.2,6.0,6.1
-
+ENV TF_CUDA_VERSION=9.0
+ENV TF_CUDNN_VERSION=7
 
 RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
     LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:${LD_LIBRARY_PATH} \
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7
deleted file mode 100644
index 3bedc8cf3462aabf25f55706b3483907c5d5b467..0000000000000000000000000000000000000000
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7
+++ /dev/null
@@ -1,115 +0,0 @@
-FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
-
-LABEL maintainer="Gunhan Gulsoy <gunan@google.com>"
-
-# It is possible to override these for releases.
-ARG TF_BRANCH=master
-ARG BAZEL_VERSION=0.5.4
-ARG TF_AVAILABLE_CPUS=32
-
-RUN apt-get update && apt-get install -y --no-install-recommends \
-        build-essential \
-        curl \
-        git \
-        golang \
-        libcurl3-dev \
-        libfreetype6-dev \
-        libpng12-dev \
-        libzmq3-dev \
-        pkg-config \
-        python-dev \
-        python-pip \
-        rsync \
-        software-properties-common \
-        unzip \
-        zip \
-        zlib1g-dev \
-        openjdk-8-jdk \
-        openjdk-8-jre-headless \
-        wget \
-        && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-
-RUN pip --no-cache-dir install --upgrade \
-        pip setuptools
-
-RUN pip --no-cache-dir install \
-        ipykernel \
-        jupyter \
-        matplotlib \
-        numpy \
-        scipy \
-        sklearn \
-        pandas \
-        wheel \
-        && \
-    python -m ipykernel.kernelspec
-
-# Set up our notebook config.
-COPY jupyter_notebook_config.py /root/.jupyter/
-
-# Jupyter has issues with being run directly:
-#   https://github.com/ipython/ipython/issues/7062
-# We just add a little wrapper script.
-COPY run_jupyter.sh /
-
-# Set up Bazel.
-
-# Running bazel inside a `docker build` command causes trouble, cf:
-#   https://github.com/bazelbuild/bazel/issues/134
-# The easiest solution is to set up a bazelrc file forcing --batch.
-RUN echo "startup --batch" >>/etc/bazel.bazelrc
-# Similarly, we need to workaround sandboxing issues:
-#   https://github.com/bazelbuild/bazel/issues/418
-RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
-    >>/etc/bazel.bazelrc
-WORKDIR /
-RUN mkdir /bazel && \
-    cd /bazel && \
-    wget --quiet https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh && \
-    wget --quiet https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE && \
-    chmod +x bazel-*.sh && \
-    ./bazel-$BAZEL_VERSION-installer-linux-x86_64.sh && \
-    rm -f /bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh
-
-# Download and build TensorFlow.
-WORKDIR /
-RUN git clone https://github.com/tensorflow/tensorflow.git && \
-    cd tensorflow && \
-    git checkout ${TF_BRANCH}
-WORKDIR /tensorflow
-
-# Configure the build for our CUDA configuration.
-ENV CI_BUILD_PYTHON=python \
-    LD_LIBRARY_PATH=/usr/local/cuda/extras/CUPTI/lib64:${LD_LIBRARY_PATH} \
-    CUDNN_INSTALL_PATH=/usr/lib/x86_64-linux-gnu \
-    PYTHON_BIN_PATH=/usr/bin/python \
-    PYTHON_LIB_PATH=/usr/local/lib/python2.7/dist-packages \
-    TF_NEED_CUDA=1 \
-    TF_CUDA_VERSION=9.0 \
-    TF_CUDA_COMPUTE_CAPABILITIES=3.0,3.5,5.2,6.0,6.1,7.0 \
-    TF_CUDNN_VERSION=7
-RUN ./configure
-
-# Build and Install TensorFlow.
-RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
-    LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:${LD_LIBRARY_PATH} \
-    bazel build -c opt \
-                --config=cuda \
-                --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \
-                --jobs=${TF_AVAILABLE_CPUS} \
-                tensorflow/tools/pip_package:build_pip_package && \
-    mkdir /pip_pkg && \
-    bazel-bin/tensorflow/tools/pip_package/build_pip_package /pip_pkg && \
-    pip --no-cache-dir install --upgrade /pip_pkg/tensorflow-*.whl && \
-    rm -rf /pip_pkg && \
-    rm -rf /root/.cache
-# Clean up pip wheel and Bazel cache when done.
-
-WORKDIR /root
-
-# TensorBoard
-EXPOSE 6006
-# IPython
-EXPOSE 8888
diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu
index e212d10290a93261e88cf9464076e5714e16ac43..b6682cd68163ec870ed815b45ac4fdd9233f88c6 100644
--- a/tensorflow/tools/docker/Dockerfile.gpu
+++ b/tensorflow/tools/docker/Dockerfile.gpu
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:8.0-cudnn6-runtime-ubuntu16.04
+FROM nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04
 
 LABEL maintainer="Craig Citro <craigcitro@google.com>"
 
diff --git a/tensorflow/tools/docker/parameterized_docker_build.sh b/tensorflow/tools/docker/parameterized_docker_build.sh
index 80a07b9b3ba7fb278b01862880893aa0a2693a28..fa867b65db50f4a197a35bc3aba98f9f6ecf4724 100755
--- a/tensorflow/tools/docker/parameterized_docker_build.sh
+++ b/tensorflow/tools/docker/parameterized_docker_build.sh
@@ -265,7 +265,7 @@ else
   DOCKERFILE="${TMP_DIR}/Dockerfile"
 
   # Modify the devel Dockerfile to specify the git branch
-  sed -r "s/([\s]*git checkout )(.*)/\1${TF_DOCKER_BUILD_DEVEL_BRANCH}/g" \
+  sed "s/^RUN git clone --branch=.* --depth=1/RUN git clone --branch=${TF_DOCKER_BUILD_DEVEL_BRANCH} --depth=1/" \
       "${ORIG_DOCKERFILE}" > "${DOCKERFILE}"
 
   # Modify python/pip version if necessary.
@@ -408,14 +408,13 @@ fi
 # Optional: set TF_DOCKER_BUILD_PUSH_WITH_CREDENTIALS to push image
 if [[ ! -z "${TF_DOCKER_BUILD_PUSH_WITH_CREDENTIALS}" ]]; then
 
-  docker login --username "${TF_DOCKER_USERNAME}" \
-  --email "${TF_DOCKER_EMAIL}" \
-  --password "${TF_DOCKER_PASSWORD}"
+  docker login -u "${TF_DOCKER_USERNAME}" \
+  -p "${TF_DOCKER_PASSWORD}"
 
   if [[ $? != "0" ]]; then
     die "FAIL: Unable to login. Invalid credentials."
   fi
-  docker push $1
+  docker push "${FINAL_IMG}"
   if [[ $? == "0" ]]; then
     docker logout
     echo "Successfully pushed Docker image ${FINAL_IMG}"
diff --git a/tensorflow/tools/docs/generate_lib.py b/tensorflow/tools/docs/generate_lib.py
index c0cde1d3bdd9023479a19112df36d3d88411da67..003f972070cb05aa6f34a3748d47f019744de058 100644
--- a/tensorflow/tools/docs/generate_lib.py
+++ b/tensorflow/tools/docs/generate_lib.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import argparse
+import fnmatch
 import os
 import sys
 
@@ -198,12 +199,12 @@ def add_dict_to_dict(add_from, add_to):
       add_to[key] = add_from[key]
 
 
-# Exclude some libaries in contrib from the documentation altogether.
+# Exclude some libraries in contrib from the documentation altogether.
 def _get_default_private_map():
   return {'tf.test': ['mock']}
 
 
-# Exclude members of some libaries.
+# Exclude members of some libraries.
 def _get_default_do_not_descend_map():
   # TODO(wicke): Shrink this list once the modules get sealed.
   return {
@@ -384,10 +385,26 @@ class _UpdateTags(py_guide_parser.PyGuideParser):
 EXCLUDED = set(['__init__.py', 'OWNERS', 'README.txt'])
 
 
-def _other_docs(src_dir, output_dir, reference_resolver):
-  """Convert all the files in `src_dir` and write results to `output_dir`."""
-  header = '<!-- DO NOT EDIT! Automatically generated file. -->\n'
+def _other_docs(src_dir, output_dir, reference_resolver, file_pattern='*.md'):
+  """Fix @{} references in all files under `src_dir` matching `file_pattern`.
 
+  A matching directory structure, with the modified files is
+  written to `output_dir`.
+
+  `{"__init__.py","OWNERS","README.txt"}` are skipped.
+
+  Files not matching `file_pattern` (using `fnmatch`) are copied with no change.
+
+  Also, files in the `api_guides/python` directory get explicit ids set on all
+  heading-2s to ensure back-links work.
+
+  Args:
+    src_dir: The directory to convert files from.
+    output_dir: The root directory to write the resulting files to.
+    reference_resolver: A `parser.ReferenceResolver` to make the replacements.
+    file_pattern: Only replace references in files matching file_patters,
+      using fnmatch. Non-matching files are copied unchanged.
+  """
   # Iterate through all the source files and process them.
   tag_updater = _UpdateTags()
   for dirpath, _, filenames in os.walk(src_dir):
@@ -415,21 +432,21 @@ def _other_docs(src_dir, output_dir, reference_resolver):
 
       suffix = os.path.relpath(path=full_in_path, start=src_dir)
       full_out_path = os.path.join(output_dir, suffix)
-      if not base_name.endswith('.md'):
-        print('Copying non-md file %s...' % suffix)
+      if not fnmatch.fnmatch(base_name, file_pattern):
+        print('Copying un-matched file %s...' % suffix)
         open(full_out_path, 'w').write(open(full_in_path).read())
         continue
       if dirpath.endswith('/api_guides/python'):
         print('Processing Python guide %s...' % base_name)
-        md_string = tag_updater.process(full_in_path)
+        content = tag_updater.process(full_in_path)
       else:
         print('Processing doc %s...' % suffix)
-        md_string = open(full_in_path).read()
+        content = open(full_in_path).read()
 
-      output = reference_resolver.replace_references(md_string,
-                                                     relative_path_to_root)
+      content = reference_resolver.replace_references(content,
+                                                      relative_path_to_root)
       with open(full_out_path, 'w') as f:
-        f.write(header + output)
+        f.write(content)
 
   print('Done.')
 
diff --git a/tensorflow/tools/git/BUILD b/tensorflow/tools/git/BUILD
index f502c8dde07de6d9f480a1b9d8690fd8f03de264..942ceab85fc8d40d9d4b67537d95204503af8bbe 100644
--- a/tensorflow/tools/git/BUILD
+++ b/tensorflow/tools/git/BUILD
@@ -7,9 +7,7 @@ package(default_visibility = ["//tensorflow:internal"])
 licenses(["notice"])  # Apache 2.0
 
 exports_files(
-    glob(["gen/*"]) + [
-        "gen_git_source.py",
-    ],
+    ["gen_git_source.py"],
 )
 
 # -----------------------------------------------------------------------------
diff --git a/tensorflow/tools/git/gen/branch_ref b/tensorflow/tools/git/gen/branch_ref
deleted file mode 100644
index 8b137891791fe96927ad78e64b0aad7bded08bdc..0000000000000000000000000000000000000000
--- a/tensorflow/tools/git/gen/branch_ref
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/tensorflow/tools/git/gen/head b/tensorflow/tools/git/gen/head
deleted file mode 100644
index 8b137891791fe96927ad78e64b0aad7bded08bdc..0000000000000000000000000000000000000000
--- a/tensorflow/tools/git/gen/head
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/tensorflow/tools/git/gen/spec.json b/tensorflow/tools/git/gen/spec.json
deleted file mode 100644
index 176bbc21ccb9112d5c29f0351ec937c302a1383e..0000000000000000000000000000000000000000
--- a/tensorflow/tools/git/gen/spec.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-  "git": false
-}
diff --git a/tensorflow/tools/git/gen_git_source.py b/tensorflow/tools/git/gen_git_source.py
index 0307d2a0ebee820fee0867c35c5761f2f8607aea..3630dbd740e981971bdc9ff45b756b45095d437d 100755
--- a/tensorflow/tools/git/gen_git_source.py
+++ b/tensorflow/tools/git/gen_git_source.py
@@ -62,7 +62,7 @@ def parse_branch_ref(filename):
     raise RuntimeError("Git directory has unparseable HEAD")
 
 
-def configure(src_base_path, debug=False):
+def configure(src_base_path, gen_path, debug=False):
   """Configure `src_base_path` to embed git hashes if available."""
 
   # TODO(aselle): No files generated or symlinked here are deleted by
@@ -71,7 +71,6 @@ def configure(src_base_path, debug=False):
   # without running ./configure again.
 
   git_path = os.path.join(src_base_path, ".git")
-  gen_path = os.path.join(src_base_path, "tensorflow", "tools", "git", "gen")
 
   # Remove and recreate the path
   if os.path.exists(gen_path):
@@ -180,6 +179,13 @@ const int tf_cxx11_abi_flag() {
   return 0;
 #endif
 }
+const int tf_monolithic_build() {
+#ifdef TENSORFLOW_MONOLITHIC_BUILD
+  return 1;
+#else
+  return 0;
+#endif
+}
 """ % git_version
   open(filename, "w").write(contents)
 
@@ -253,6 +259,10 @@ parser.add_argument(
     "--configure", type=str,
     help="Path to configure as a git repo dependency tracking sentinel")
 
+parser.add_argument(
+    "--gen_root_path", type=str,
+    help="Root path to place generated git files (created by --configure).")
+
 parser.add_argument(
     "--generate",
     type=str,
@@ -267,7 +277,9 @@ parser.add_argument(
 args = parser.parse_args()
 
 if args.configure is not None:
-  configure(args.configure, debug=args.debug)
+  if args.gen_root_path is None:
+    raise RuntimeError("Must pass --gen_root_path arg when running --configure")
+  configure(args.configure, args.gen_root_path, debug=args.debug)
 elif args.generate is not None:
   generate(args.generate)
 elif args.raw_generate is not None:
diff --git a/tensorflow/tools/git/gen_git_source.sh b/tensorflow/tools/git/gen_git_source.sh
index 788f9e6e5730f9e4699011298d689bc26226fb65..db20bb00e84b47bd15244e70b925f59e62731deb 100755
--- a/tensorflow/tools/git/gen_git_source.sh
+++ b/tensorflow/tools/git/gen_git_source.sh
@@ -36,5 +36,12 @@ const int tf_cxx11_abi_flag() {
   return 0;
 #endif
 }
+const int tf_monolithic_build() {
+#ifdef TENSORFLOW_MONOLITHIC_BUILD
+  return 1;
+#else
+  return 0;
+#endif
+}
 EOF
 
diff --git a/tensorflow/tools/graph_transforms/BUILD b/tensorflow/tools/graph_transforms/BUILD
index 9216008600b0969ae95a985f54511a24f4fac3e7..b5465b7fb32856833fc2a12c8dfea58c2e8e79dd 100644
--- a/tensorflow/tools/graph_transforms/BUILD
+++ b/tensorflow/tools/graph_transforms/BUILD
@@ -128,6 +128,7 @@ cc_library(
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
         "//tensorflow/core:framework_internal",
+        "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:tensorflow",
@@ -315,3 +316,14 @@ tf_py_test(
     ],
     main = "python/transform_graph_test.py",
 )
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+)
diff --git a/tensorflow/tools/graph_transforms/quantize_nodes.cc b/tensorflow/tools/graph_transforms/quantize_nodes.cc
index 97e8f77616b85955229619107b443315bca17925..5ccd88cfa1acfd55e90504d66417349e42fe3b50 100644
--- a/tensorflow/tools/graph_transforms/quantize_nodes.cc
+++ b/tensorflow/tools/graph_transforms/quantize_nodes.cc
@@ -759,7 +759,7 @@ Status QuantizeNodes(const GraphDef& input_graph_def,
           NodeDef reshape_dims;
           reshape_dims.set_op("Const");
           reshape_dims.set_name(unique_input_name + "/reshape_dims");
-          AddNodeInput("^" + input_name, &reshape_dims);
+          AddNodeInput("^" + NodeNameFromInput(input_name), &reshape_dims);
           SetNodeAttr("dtype", DT_INT32, &reshape_dims);
           Tensor reshape_dims_tensor(DT_INT32, {1});
           reshape_dims_tensor.flat<int32>()(0) = -1;
@@ -769,7 +769,7 @@ Status QuantizeNodes(const GraphDef& input_graph_def,
           NodeDef reduction_dims;
           reduction_dims.set_op("Const");
           reduction_dims.set_name(unique_input_name + "/reduction_dims");
-          AddNodeInput("^" + input_name, &reduction_dims);
+          AddNodeInput("^" + NodeNameFromInput(input_name), &reduction_dims);
           SetNodeAttr("dtype", DT_INT32, &reduction_dims);
           Tensor reduction_dims_tensor(DT_INT32, {1});
           reduction_dims_tensor.flat<int32>()(0) = 0;
diff --git a/tensorflow/tools/graph_transforms/sparsify_gather.cc b/tensorflow/tools/graph_transforms/sparsify_gather.cc
index 20d443c7e9070d0c82191c70ec1a855deeeb8f0b..96324d0deab400078fdf388bff69001f8e2df9aa 100644
--- a/tensorflow/tools/graph_transforms/sparsify_gather.cc
+++ b/tensorflow/tools/graph_transforms/sparsify_gather.cc
@@ -89,7 +89,10 @@ Status ObtainTensorSlice(const GraphDef& input_graph_def,
                          string* shape_slice_string) {
   string restore_node_name;
   for (const auto& node : input_graph_def.node()) {
-    if (StringPiece(node.name()).starts_with("save/Assign") &&
+    std::vector<string> node_name_parts = str_util::Split(node.name(), "/");
+    if (node_name_parts.size() == 2 &&
+        StringPiece(node_name_parts[0]).starts_with("save") &&
+        StringPiece(node_name_parts[1]).starts_with("Assign") &&
         node.input(0) == tensor_name) {
       restore_node_name = node.input(1);
       break;
diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD
index 845bad5e4990255cf47981935fc5479053334491..dbc81599de8539ce58933f9d40bf99fcae8f8e67 100644
--- a/tensorflow/tools/lib_package/BUILD
+++ b/tensorflow/tools/lib_package/BUILD
@@ -55,7 +55,10 @@ pkg_tar(
 
 pkg_tar(
     name = "cheaders",
-    files = ["//tensorflow/c:headers"],
+    files = [
+        "//tensorflow/c:headers",
+        "//tensorflow/c/eager:headers",
+    ],
     package_dir = "include/tensorflow/c",
     # Mark as "manual" till
     # https://github.com/bazelbuild/bazel/issues/2352
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index e3cbd67721aa04f170878f1d369ed65b7fde630e..ff5dd6a0b09bcc0296d7add42d51fdd83b821c64 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -6,9 +6,11 @@ package(default_visibility = ["//visibility:private"])
 load(
     "//tensorflow:tensorflow.bzl",
     "if_not_windows",
+    "if_windows",
     "transitive_hdrs",
 )
 load("//third_party/mkl:build_defs.bzl", "if_mkl")
+load("//tensorflow:tensorflow.bzl", "if_cuda")
 load("//tensorflow/core:platform/default/build_config_root.bzl", "tf_additional_license_deps")
 
 # This returns a list of headers of all public header libraries (e.g.,
@@ -33,7 +35,9 @@ transitive_hdrs(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:stream_executor",
         "//third_party/eigen3",
-    ],
+    ] + if_cuda([
+        "@local_config_cuda//cuda:cuda_headers",
+    ]),
 )
 
 py_binary(
@@ -153,10 +157,11 @@ sh_binary(
             "//tensorflow:tensorflow_py",
             "//tensorflow/contrib/boosted_trees:boosted_trees_pip",
             "//tensorflow/contrib/cluster_resolver:cluster_resolver_pip",
+            "//tensorflow/contrib/data/python/kernel_tests:dataset_serialization_test",
             "//tensorflow/contrib/data/python/ops:prefetching_py",
             "//tensorflow/contrib/eager/python/examples:examples_pip",
+            "//tensorflow/contrib/eager/python:checkpointable",
             "//tensorflow/contrib/eager/python:evaluator",
-            "//tensorflow/contrib/eager/python:summary_writer",
             "//tensorflow/contrib/gan:gan",
             "//tensorflow/contrib/graph_editor:graph_editor_pip",
             "//tensorflow/contrib/keras:keras",
@@ -167,6 +172,10 @@ sh_binary(
             "//tensorflow/contrib/ndlstm:ndlstm",
             "//tensorflow/contrib/nn:nn_py",
             "//tensorflow/contrib/predictor:predictor_pip",
+            "//tensorflow/contrib/py2tf:py2tf_internal",
+            "//tensorflow/contrib/py2tf/convert:convert",
+            "//tensorflow/contrib/py2tf/pyct:pyct",
+            "//tensorflow/contrib/py2tf/pyct/static_analysis:static_analysis",
             "//tensorflow/contrib/receptive_field:receptive_field_pip",
             "//tensorflow/contrib/session_bundle:session_bundle_pip",
             "//tensorflow/contrib/signal:signal_py",
@@ -194,3 +203,23 @@ sh_binary(
         ],
     }) + if_mkl(["//third_party/mkl:intel_binary_blob"]),
 )
+
+# A genrule for generating a marker file for the pip package on Windows
+#
+# This only works on Windows, because :simple_console_for_windows is a
+# python zip file containing everything we need for building the pip package.
+# However, on other platforms, due to https://github.com/bazelbuild/bazel/issues/4223,
+# when C++ extensions change, this generule doesn't rebuild.
+genrule(
+    name = "win_pip_package_marker",
+    srcs = if_windows([
+        ":build_pip_package",
+        ":simple_console_for_windows",
+    ]),
+    outs = ["win_pip_package_marker_file"],
+    cmd = select({
+        "//conditions:default": "touch $@",
+        "//tensorflow:windows": "md5sum $(locations :build_pip_package) $(locations :simple_console_for_windows) > $@",
+    }),
+    visibility = ["//visibility:public"],
+)
diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh
index 8249703ba717f25dbfb324557727b636c6640cc5..ca8c272a0894d1c8ab665d58bcf02bba4c300708 100755
--- a/tensorflow/tools/pip_package/build_pip_package.sh
+++ b/tensorflow/tools/pip_package/build_pip_package.sh
@@ -24,9 +24,11 @@ function real_path() {
 function cp_external() {
   local src_dir=$1
   local dest_dir=$2
-  for f in `find "$src_dir" -maxdepth 1 -mindepth 1 ! -name '*local_config_cuda*'`; do
+  for f in `find "$src_dir" -maxdepth 1 -mindepth 1 ! -name '*local_config_cuda*' ! -name '*org_tensorflow*'`; do
     cp -R "$f" "$dest_dir"
   done
+  mkdir -p "${dest_dir}/local_config_cuda/cuda/cuda/"
+  cp "${src_dir}/local_config_cuda/cuda/cuda/cuda_config.h" "${dest_dir}/local_config_cuda/cuda/cuda/"
 }
 
 PLATFORM="$(uname -s | tr 'A-Z' 'a-z')"
@@ -92,7 +94,6 @@ function main() {
       bazel-bin/tensorflow/tools/pip_package/simple_console_for_window_unzip/runfiles/org_tensorflow/tensorflow \
       "${TMPDIR}"
     mkdir "${TMPDIR}/external"
-    # Note: this makes an extra copy of org_tensorflow.
     cp_external \
       bazel-bin/tensorflow/tools/pip_package/simple_console_for_window_unzip/runfiles \
       "${TMPDIR}/external"
@@ -123,7 +124,6 @@ function main() {
         bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/org_tensorflow/tensorflow \
         "${TMPDIR}"
       mkdir "${TMPDIR}/external"
-      # Note: this makes an extra copy of org_tensorflow.
       cp_external \
         bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles \
         "${TMPDIR}/external"
diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py
index cc46dd5162b396e2dc9eac6dafbc2365cafe17d8..cddf9c8f44e3949d2e17dfd00b1a7a1dc4238d7e 100644
--- a/tensorflow/tools/pip_package/pip_smoke_test.py
+++ b/tensorflow/tools/pip_package/pip_smoke_test.py
@@ -42,6 +42,7 @@ BLACKLIST = [
     "//tensorflow/python:extra_py_tests_deps",
     "//tensorflow/cc/saved_model:saved_model_half_plus_two",
     "//tensorflow:no_tensorflow_py_deps",
+    "//tensorflow/tools/pip_package:win_pip_package_marker",
     "//tensorflow/python:test_ops_2",
     "//tensorflow/python:tf_optimizer",
     "//tensorflow/python:compare_test_proto_py",
@@ -60,6 +61,7 @@ BLACKLIST = [
     "//tensorflow/contrib/framework:checkpoint_ops_testdata",
     "//tensorflow/contrib/bayesflow:reinforce_simple_example",
     "//tensorflow/contrib/bayesflow:examples/reinforce_simple/reinforce_simple_example.py",  # pylint:disable=line-too-long
+    "//tensorflow/contrib/py2tf:py2tf_internal",
     "//tensorflow/contrib/timeseries/examples:predict",
     "//tensorflow/contrib/timeseries/examples:multivariate",
     "//tensorflow/contrib/timeseries/examples:known_anomaly",
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 0aa16acd7dc993dbea0c06f42e0a63758ca9bcf2..62df6453fb5d39728c2985a28a70a263d79804b1 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -29,14 +29,17 @@ from setuptools.dist import Distribution
 # This version string is semver compatible, but incompatible with pip.
 # For pip, we will remove all '-' characters from this string, and use the
 # result for pip.
-_VERSION = '1.4.0'
+_VERSION = '1.5.0-rc1'
 
 REQUIRED_PACKAGES = [
-    'absl-py',
+    'absl-py >= 0.1.6',
+    'astor >= 0.6.0',
+    'gast >= 0.2.0',
     'numpy >= 1.12.1',
     'six >= 1.10.0',
     'protobuf >= 3.4.0',
-    'tensorflow-tensorboard',
+    'tensorflow-tensorboard >= 0.4.0',
+    'termcolor >= 1.1.0',
 ]
 
 project_name = 'tensorflow'
@@ -76,13 +79,13 @@ CONSOLE_SCRIPTS = [
     # is now declared by the tensorboard pip package. If we remove the
     # TensorBoard command, pip will inappropriately remove it during install,
     # even though the command is not removed, just moved to a different wheel.
-    'tensorboard = tensorboard.main:run_main',
+    'tensorboard = tensorboard.main:main',
 ]
 # pylint: enable=line-too-long
 
 # remove the tensorboard console script if building tf_nightly
 if 'tf_nightly' in project_name:
-  CONSOLE_SCRIPTS.remove('tensorboard = tensorboard.main:run_main')
+  CONSOLE_SCRIPTS.remove('tensorboard = tensorboard.main:main')
 
 TEST_PACKAGES = [
     'scipy >= 0.15.1',
@@ -176,7 +179,15 @@ def find_files(pattern, root):
 
 
 matches = ['../' + x for x in find_files('*', 'external') if '.py' not in x]
-matches += ['../' + x for x in find_files('*', '_solib_k8') if '.py' not in x]
+
+so_lib_paths = [i for i in os.listdir('.')
+                if os.path.isdir(i) 
+                and fnmatch.fnmatch(i, '_solib_*')]
+
+for path in so_lib_paths:
+  matches.extend(
+      ['../' + x for x in find_files('*', path) if '.py' not in x]
+  )
 
 if os.name == 'nt':
   EXTENSION_NAME = 'python/_pywrap_tensorflow_internal.pyd'
diff --git a/tensorflow/tools/proto_text/gen_proto_text_functions.cc b/tensorflow/tools/proto_text/gen_proto_text_functions.cc
index ecb29a65a08b098cd167e5cbb2bdb5821e01a543..f0bb59acf801ba586fa8258b5b1ad9f202f014bf 100644
--- a/tensorflow/tools/proto_text/gen_proto_text_functions.cc
+++ b/tensorflow/tools/proto_text/gen_proto_text_functions.cc
@@ -132,6 +132,7 @@ int MainImpl(int argc, char** argv) {
       FILE* f = fopen(path.c_str(), "w");
       if (f == nullptr) return -1;
       if (fwrite(data.c_str(), 1, data.size(), f) != data.size()) {
+        fclose(f);
         return -1;
       }
       if (fclose(f) != 0) {
diff --git a/tensorflow/tools/quantization/quantize_graph.py b/tensorflow/tools/quantization/quantize_graph.py
index a0cfc352d4f65a32dde13893dc937a72d7434e28..3acb532263d6896c3e64fe38da649bb23c0ad1e2 100644
--- a/tensorflow/tools/quantization/quantize_graph.py
+++ b/tensorflow/tools/quantization/quantize_graph.py
@@ -408,7 +408,8 @@ class GraphRewriter(object):
       for output_node in output_nodes:
         self.quantize_nodes_recursively(output_node)
     elif self.mode == "eightbit":
-      self.set_input_graph(graph_util.remove_training_nodes(self.input_graph))
+      self.set_input_graph(graph_util.remove_training_nodes(
+          self.input_graph, protected_nodes=output_node_names))
       output_nodes = [
           self.nodes_map[output_node_name]
           for output_node_name in output_node_names
diff --git a/tensorflow/tools/test/performance.bzl b/tensorflow/tools/test/performance.bzl
index b5c4bbf5a700aedfea7abf7f1c07a62df0155cfc..cee53dd5b61e50126948e3652865a32f45eab092 100644
--- a/tensorflow/tools/test/performance.bzl
+++ b/tensorflow/tools/test/performance.bzl
@@ -21,8 +21,9 @@ def tf_cc_logged_benchmark(
     fail(" ".join(("Target must be a single well-defined test, e.g.,",
                    "//path/to:test. Received: %s" % target)))
 
-  all_tags = list(depset(tags) + \
-                  depset(["benchmark-test", "local", "manual", "regression-test"]))
+  all_tags = (
+    depset(tags) + depset(
+      ["benchmark-test", "local", "manual", "regression-test"])).to_list()
 
   tf_py_test(
       name = name,
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 43c8de530360ab32ae1005a940819400fdac32da..b27b1f21fbe0607e6f97050c530f6e0b6e3580f9 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -2,6 +2,7 @@
 
 load("//third_party/gpus:cuda_configure.bzl", "cuda_configure")
 load("//third_party/mkl:build_defs.bzl", "mkl_repository")
+load("//third_party/git:git_configure.bzl", "git_configure")
 load("//third_party/py:python_configure.bzl", "python_configure")
 load("//third_party/sycl:sycl_configure.bzl", "sycl_configure")
 load("//third_party/toolchains/cpus/arm:arm_compiler_configure.bzl", "arm_compiler_configure")
@@ -9,34 +10,52 @@ load("//third_party:repo.bzl", "tf_http_archive")
 load("@io_bazel_rules_closure//closure/private:java_import_external.bzl", "java_import_external")
 load("@io_bazel_rules_closure//closure:defs.bzl", "filegroup_external")
 
+def _extract_version_number(bazel_version):
+  """Extracts the semantic version number from a version string
+
+  Args:
+    bazel_version: the version string that begins with the semantic version
+      e.g. "1.2.3rc1 abc1234" where "abc1234" is a commit hash.
+
+  Returns:
+    The semantic version string, like "1.2.3".
+  """
+  for i in range(len(bazel_version)):
+    c = bazel_version[i]
+    if not (c.isdigit() or c == "."):
+      return bazel_version[:i]
+  return bazel_version
+
 # Parse the bazel version string from `native.bazel_version`.
+# e.g.
+# "0.10.0rc1 abc123d" => (0, 10, 0)
+# "0.3.0" => (0, 3, 0)
 def _parse_bazel_version(bazel_version):
-  # Remove commit from version.
-  version = bazel_version.split(" ", 1)[0]
-  # Split into (release, date) parts and only return the release
-  # as a tuple of integers.
-  parts = version.split("-", 1)
-  # Turn "release" into a tuple of strings
-  version_tuple = ()
-  for number in parts[0].split("."):
-    version_tuple += (str(number),)
-  return version_tuple
-
-# Check that a specific bazel version is being used.
-def check_version(bazel_version):
+  """Parses a version string into a 3-tuple of ints
+
+  int tuples can be compared directly using binary operators (<, >).
+
+  Args:
+    bazel_version: the Bazel version string
+
+  Returns:
+    An int 3-tuple of a (major, minor, patch) version.
+  """
+
+  version = _extract_version_number(bazel_version)
+  return tuple([int(n) for n in version.split(".")])
+
+def check_bazel_version_at_least(minimum_bazel_version):
   if "bazel_version" not in dir(native):
-    fail("\nCurrent Bazel version is lower than 0.2.1, expected at least %s\n" %
-         bazel_version)
+    fail("\nCurrent Bazel version is lower than 0.2.1, expected at least %s\n" % minimum_bazel_version)
   elif not native.bazel_version:
-    print("\nCurrent Bazel is not a release version, cannot check for " +
-          "compatibility.")
-    print("Make sure that you are running at least Bazel %s.\n" % bazel_version)
-  else:
-    current_bazel_version = _parse_bazel_version(native.bazel_version)
-    minimum_bazel_version = _parse_bazel_version(bazel_version)
-    if minimum_bazel_version > current_bazel_version:
-      fail("\nCurrent Bazel version is {}, expected at least {}\n".format(
-          native.bazel_version, bazel_version))
+    print("\nCurrent Bazel is not a release version, cannot check for compatibility.")
+    print("Make sure that you are running at least Bazel %s.\n" % minimum_bazel_version)
+    return
+
+  if _parse_bazel_version(native.bazel_version) < _parse_bazel_version(minimum_bazel_version):
+    fail("\nCurrent Bazel version is {}, expected at least {}\n".format(
+        native.bazel_version, minimum_bazel_version))
 
 # If TensorFlow is linked as a submodule.
 # path_prefix is no longer used.
@@ -45,8 +64,9 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   # We must check the bazel version before trying to parse any other BUILD
   # files, in case the parsing of those build files depends on the bazel
   # version we require here.
-  check_version("0.5.4")
+  check_bazel_version_at_least("0.5.4")
   cuda_configure(name="local_config_cuda")
+  git_configure(name="local_config_git")
   sycl_configure(name="local_config_sycl")
   python_configure(name="local_config_python")
 
@@ -59,11 +79,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   mkl_repository(
       name = "mkl",
       urls = [
-          "https://mirror.bazel.build/github.com/01org/mkl-dnn/releases/download/v0.9/mklml_lnx_2018.0.20170720.tgz",
-          "https://github.com/01org/mkl-dnn/releases/download/v0.9/mklml_lnx_2018.0.20170720.tgz",
+          "https://mirror.bazel.build/github.com/01org/mkl-dnn/releases/download/v0.11/mklml_lnx_2018.0.1.20171007.tgz",
+          "https://github.com/01org/mkl-dnn/releases/download/v0.11/mklml_lnx_2018.0.1.20171007.tgz",
       ],
-      sha256 = "57ba56c4c243f403ff78f417ff854ef50b9eddf4a610a917b7c95e7fa8553a4b",
-      strip_prefix = "mklml_lnx_2018.0.20170720",
+      sha256 = "6b07cb7e5451db67c2e31e785ae458b18f7f363c60a61685488f69e9ae7199d4",
+      strip_prefix = "mklml_lnx_2018.0.1.20171007",
       build_file = str(Label("//third_party/mkl:mkl.BUILD")),
   )
 
@@ -74,32 +94,32 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   tf_http_archive(
       name = "mkl_dnn",
       urls = [
-          "https://mirror.bazel.build/github.com/01org/mkl-dnn/archive/b01e3a55a07be62172e713bcd2644c5176360212.tar.gz",
-          "https://github.com/01org/mkl-dnn/archive/b01e3a55a07be62172e713bcd2644c5176360212.tar.gz",
+          "https://mirror.bazel.build/github.com/01org/mkl-dnn/archive/e0bfcaa7fcb2b1e1558f5f0676933c1db807a729.tar.gz",
+          "https://github.com/01org/mkl-dnn/archive/e0bfcaa7fcb2b1e1558f5f0676933c1db807a729.tar.gz",
       ],
-      sha256 = "0d529ad4c49dc799e6df07c2b88b115d0668735da15fb3b3862d28d33fa68165",
-      strip_prefix = "mkl-dnn-b01e3a55a07be62172e713bcd2644c5176360212",
+      sha256 = "02e244f63dd95402691a361392504c143eede9a89043426f174836638a9cbf09",
+      strip_prefix = "mkl-dnn-e0bfcaa7fcb2b1e1558f5f0676933c1db807a729",
       build_file = str(Label("//third_party/mkl_dnn:mkldnn.BUILD")),
   )
 
   tf_http_archive(
       name = "com_google_absl",
       urls = [
-          "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/cc4bed2d74f7c8717e31f9579214ab52a9c9c610.tar.gz",
-          "https://github.com/abseil/abseil-cpp/archive/cc4bed2d74f7c8717e31f9579214ab52a9c9c610.tar.gz",
+          "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/720c017e30339fd1786ce4aac68bc8559736e53f.tar.gz",
+          "https://github.com/abseil/abseil-cpp/archive/720c017e30339fd1786ce4aac68bc8559736e53f.tar.gz",
       ],
-     sha256 = "f1a7349f88d2846210c42e2f7271dabeee404c2a3b4198e34a797993e3569b03",
-     strip_prefix = "abseil-cpp-cc4bed2d74f7c8717e31f9579214ab52a9c9c610",
+     sha256 = "5996380e3e8b981f55d1c8d58e709c00dbb4806ba367be75d0925a68cc2f6478",
+     strip_prefix = "abseil-cpp-720c017e30339fd1786ce4aac68bc8559736e53f",
   )
 
   tf_http_archive(
       name = "eigen_archive",
       urls = [
-          "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/429aa5254200.tar.gz",
-          "https://bitbucket.org/eigen/eigen/get/429aa5254200.tar.gz",
+          "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/14e1418fcf12.tar.gz",
+          "https://bitbucket.org/eigen/eigen/get/14e1418fcf12.tar.gz",
       ],
-      sha256 = "61d8b6fc4279dd1dda986fb1677d15e3d641c07a3ea5abe255790b1f0c0c14e9",
-      strip_prefix = "eigen-eigen-429aa5254200",
+      sha256 = "2b526c6888639025323fd4f2600533c0f982d304ea48e4f1663e8066bd9f6368",
+      strip_prefix = "eigen-eigen-14e1418fcf12",
       build_file = str(Label("//third_party:eigen.BUILD")),
   )
 
@@ -143,11 +163,12 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   tf_http_archive(
       name = "com_googlesource_code_re2",
       urls = [
-          "https://mirror.bazel.build/github.com/google/re2/archive/b94b7cd42e9f02673cd748c1ac1d16db4052514c.tar.gz",
-          "https://github.com/google/re2/archive/b94b7cd42e9f02673cd748c1ac1d16db4052514c.tar.gz",
+          "https://mirror.bazel.build/github.com/google/re2/archive/26cd968b735e227361c9703683266f01e5df7857.tar.gz",
+          "https://github.com/google/re2/archive/26cd968b735e227361c9703683266f01e5df7857.tar.gz",
+
       ],
-      sha256 = "bd63550101e056427c9e7ff12a408c1c8b74e9803f393ca916b2926fc2c4906f",
-      strip_prefix = "re2-b94b7cd42e9f02673cd748c1ac1d16db4052514c",
+      sha256 = "e57eeb837ac40b5be37b2c6197438766e73343ffb32368efea793dfd8b28653b",
+      strip_prefix = "re2-26cd968b735e227361c9703683266f01e5df7857",
   )
 
   tf_http_archive(
@@ -216,7 +237,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   )
 
   tf_http_archive(
-      name = "sqlite_archive",
+      name = "org_sqlite",
       urls = [
           "https://mirror.bazel.build/www.sqlite.org/2017/sqlite-amalgamation-3200000.zip",
           "http://www.sqlite.org/2017/sqlite-amalgamation-3200000.zip",
@@ -248,14 +269,47 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:six.BUILD")),
   )
 
+  tf_http_archive(
+      name = "astor_archive",
+      urls = [
+          "https://mirror.bazel.build/pypi.python.org/packages/d8/be/c4276b3199ec3feee2a88bc64810fbea8f26d961e0a4cd9c68387a9f35de/astor-0.6.2.tar.gz",
+          "https://pypi.python.org/packages/d8/be/c4276b3199ec3feee2a88bc64810fbea8f26d961e0a4cd9c68387a9f35de/astor-0.6.2.tar.gz",
+      ],
+      sha256 = "ff6d2e2962d834acb125cc4dcc80c54a8c17c253f4cc9d9c43b5102a560bb75d",
+      strip_prefix = "astor-0.6.2",
+      build_file = str(Label("//third_party:astor.BUILD")),
+  )
+
+  tf_http_archive(
+      name = "gast_archive",
+      urls = [
+          "https://mirror.bazel.build/pypi.python.org/packages/5c/78/ff794fcae2ce8aa6323e789d1f8b3b7765f601e7702726f430e814822b96/gast-0.2.0.tar.gz",
+          "https://pypi.python.org/packages/5c/78/ff794fcae2ce8aa6323e789d1f8b3b7765f601e7702726f430e814822b96/gast-0.2.0.tar.gz",
+      ],
+      sha256 = "7068908321ecd2774f145193c4b34a11305bd104b4551b09273dfd1d6a374930",
+      strip_prefix = "gast-0.2.0",
+      build_file = str(Label("//third_party:gast.BUILD")),
+  )
+
+  tf_http_archive(
+      name = "termcolor_archive",
+      urls = [
+          "https://mirror.bazel.build/pypi.python.org/packages/8a/48/a76be51647d0eb9f10e2a4511bf3ffb8cc1e6b14e9e4fab46173aa79f981/termcolor-1.1.0.tar.gz",
+          "https://pypi.python.org/packages/8a/48/a76be51647d0eb9f10e2a4511bf3ffb8cc1e6b14e9e4fab46173aa79f981/termcolor-1.1.0.tar.gz",
+      ],
+      sha256 = "1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b",
+      strip_prefix = "termcolor-1.1.0",
+      build_file = str(Label("//third_party:termcolor.BUILD")),
+  )
+
   tf_http_archive(
       name = "absl_py",
       urls = [
-          "https://mirror.bazel.build/github.com/abseil/abseil-py/archive/231e3870b976c1dc61dce1749138661d21556028.tar.gz",
-          "https://github.com/abseil/abseil-py/archive/231e3870b976c1dc61dce1749138661d21556028.tar.gz",
+          "https://mirror.bazel.build/github.com/abseil/abseil-py/archive/acec853355ef987eae48a8d87a79351c15dff593.tar.gz",
+          "https://github.com/abseil/abseil-py/archive/acec853355ef987eae48a8d87a79351c15dff593.tar.gz",
       ],
-      sha256 = "8ea2b23bfdb9ae7622f3e5d95236bc600c8d8509a2f38c84732b3145585d4f73",
-      strip_prefix = "abseil-py-231e3870b976c1dc61dce1749138661d21556028",
+      sha256 = "29e4584e778bee13aa4093824133d131d927cc160561892880118d9ff7b95a6a",
+      strip_prefix = "abseil-py-acec853355ef987eae48a8d87a79351c15dff593",
   )
 
   tf_http_archive(
@@ -332,11 +386,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   tf_http_archive(
       name = "nsync",
       urls = [
-          "https://mirror.bazel.build/github.com/google/nsync/archive/93815892dddafe9146a5f7e7042281d59d0f4323.tar.gz",
-          "https://github.com/google/nsync/archive/93815892dddafe9146a5f7e7042281d59d0f4323.tar.gz",
+          "https://mirror.bazel.build/github.com/google/nsync/archive/8502189abfa44c249c01c2cad64e6ed660a9a668.tar.gz",
+          "https://github.com/google/nsync/archive/8502189abfa44c249c01c2cad64e6ed660a9a668.tar.gz",
       ],
-      sha256 = "e3bd4555415ace511338fc27e595351738eea4e9006f1612b76c82914770716b",
-      strip_prefix = "nsync-93815892dddafe9146a5f7e7042281d59d0f4323",
+      sha256 = "51f81ff4202bbb820cdbedc061bd2eb6765f2b5c06489e7a8694bedac329e8f8",
+      strip_prefix = "nsync-8502189abfa44c249c01c2cad64e6ed660a9a668",
   )
 
   tf_http_archive(
@@ -396,11 +450,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   tf_http_archive(
       name = "grpc",
       urls = [
-          "https://mirror.bazel.build/github.com/grpc/grpc/archive/54e8f37e537794c2d814c1604c1282125f64f093.tar.gz",
-          "https://github.com/grpc/grpc/archive/54e8f37e537794c2d814c1604c1282125f64f093.tar.gz",
+          "https://mirror.bazel.build/github.com/grpc/grpc/archive/730b778632e79cc3c96ad237f282d687ee325ce7.tar.gz",
+          "https://github.com/grpc/grpc/archive/730b778632e79cc3c96ad237f282d687ee325ce7.tar.gz",
       ],
-      sha256 = "c2166b6d96daddf72fe45b2c594210c65ca17ec3c1b2e12089159a9529edb5e4",
-      strip_prefix = "grpc-54e8f37e537794c2d814c1604c1282125f64f093",
+      sha256 = "8c91a8d12e1e868cf51f7340b75507a8aa017a7e1b56f46ed6816aeb803dc9bd",
+      strip_prefix = "grpc-730b778632e79cc3c96ad237f282d687ee325ce7",
   )
 
   tf_http_archive(
@@ -419,11 +473,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   tf_http_archive(
       name = "llvm",
       urls = [
-          "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/8d26b8bee4d8e7230870a600bc968c7ee8cf6f67.tar.gz",
-          "https://github.com/llvm-mirror/llvm/archive/8d26b8bee4d8e7230870a600bc968c7ee8cf6f67.tar.gz",
+          "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/bfe367d1e2a3c75b8694967a83c7f05885e8f184.tar.gz",
+          "https://github.com/llvm-mirror/llvm/archive/bfe367d1e2a3c75b8694967a83c7f05885e8f184.tar.gz",
       ],
-      sha256 = "ff5ddbe5af5e264426c8d489e7fddfc5ad7e0975f19cefe9db8c0a5d0faeb23e",
-      strip_prefix = "llvm-8d26b8bee4d8e7230870a600bc968c7ee8cf6f67",
+      sha256 = "916c82948687f6be82dbb7764f707abc319e6e4ebaef868f745bd5f44b0f281c",
+      strip_prefix = "llvm-bfe367d1e2a3c75b8694967a83c7f05885e8f184",
       build_file = str(Label("//third_party/llvm:llvm.BUILD")),
   )
 
@@ -505,11 +559,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   tf_http_archive(
       name = "aws",
       urls = [
-          "https://mirror.bazel.build/github.com/aws/aws-sdk-cpp/archive/1.0.90.tar.gz",
-          "https://github.com/aws/aws-sdk-cpp/archive/1.0.90.tar.gz",
+          "https://mirror.bazel.build/github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz",
+          "https://github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz",
       ],
-      sha256 = "f599b57aec4f03ad696044dd430b2d201864113937353adc346f53ad47991319",
-      strip_prefix = "aws-sdk-cpp-1.0.90",
+      sha256 = "b888d8ce5fc10254c3dd6c9020c7764dd53cf39cf011249d0b4deda895de1b7c",
+      strip_prefix = "aws-sdk-cpp-1.3.15",
       build_file = str(Label("//third_party:aws.BUILD")),
   )
 
@@ -621,11 +675,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
   tf_http_archive(
       name = "bazel_toolchains",
       urls = [
-          "https://mirror.bazel.build/github.com/bazelbuild/bazel-toolchains/archive/af4681c3d19f063f090222ec3d04108c4e0ca255.tar.gz",
-          "https://github.com/bazelbuild/bazel-toolchains/archive/af4681c3d19f063f090222ec3d04108c4e0ca255.tar.gz",
+          "https://mirror.bazel.build/github.com/bazelbuild/bazel-toolchains/archive/f3b09700fae5d7b6e659d7cefe0dcc6e8498504c.tar.gz",
+          "https://github.com/bazelbuild/bazel-toolchains/archive/f3b09700fae5d7b6e659d7cefe0dcc6e8498504c.tar.gz",
       ],
-      sha256 = "d58bb2d6c8603f600d522b6104d6192a65339aa26cbba9f11ff5c4b36dedb928",
-      strip_prefix = "bazel-toolchains-af4681c3d19f063f090222ec3d04108c4e0ca255",
+      sha256 = "ed829b5eea8af1f405f4cc3d6ecfc3b1365bb7843171036030a31b5127002311",
+      strip_prefix = "bazel-toolchains-f3b09700fae5d7b6e659d7cefe0dcc6e8498504c",
   )
 
   tf_http_archive(
@@ -650,16 +704,6 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party/flatbuffers:flatbuffers.BUILD")),
   )
 
-  tf_http_archive(
-      name = "double_conversion",
-      urls = [
-          "https://mirror.bazel.build/github.com/google/double-conversion/archive/5664746c5e64dc265e7fbc1a890a6698e6ad0ebb.tar.gz",
-          "https://github.com/google/double-conversion/archive/5664746c5e64dc265e7fbc1a890a6698e6ad0ebb.tar.gz",
-      ],
-      sha256 = "ce651ba63faa55f86333f50bdd58a574327ca1565a65b875b11f5132c7c72bb6",
-      strip_prefix = "double-conversion-5664746c5e64dc265e7fbc1a890a6698e6ad0ebb",
-  )
-
   tf_http_archive(
       name = "tflite_mobilenet",
       sha256 = "23f814d1c076bdf03715dfb6cab3713aa4fbdf040fd5448c43196bd2e97a4c1b",
@@ -670,6 +714,16 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
       build_file = str(Label("//third_party:tflite_mobilenet.BUILD")),
   )
 
+  tf_http_archive(
+      name = "tflite_smartreply",
+      sha256 = "8980151b85a87a9c1a3bb1ed4748119e4a85abd3cb5744d83da4d4bd0fbeef7c",
+      urls = [
+          "https://mirror.bazel.build/storage.googleapis.com/download.tensorflow.org/models/tflite/smartreply_1.0_2017_11_01.zip",
+          "https://storage.googleapis.com/download.tensorflow.org/models/tflite/smartreply_1.0_2017_11_01.zip"
+      ],
+      build_file = str(Label("//third_party:tflite_smartreply.BUILD")),
+  )
+
   ##############################################################################
   # BIND DEFINITIONS
   #
diff --git a/tools/bazel.rc b/tools/bazel.rc
index 04c24d7511469bdc8b7fa724ca1984daa8c7e84a..8b8c71756171387b7a4b834ea94015a00313492e 100644
--- a/tools/bazel.rc
+++ b/tools/bazel.rc
@@ -1,3 +1,32 @@
+# Android configs. Bazel needs to have --cpu and --fat_apk_cpu both set to the
+# target CPU to build transient dependencies correctly. See
+# https://docs.bazel.build/versions/master/user-manual.html#flag--fat_apk_cpu
+build:android --crosstool_top=//external:android/crosstool
+build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
+build:android_arm --config=android
+build:android_arm --cpu=armeabi-v7a
+build:android_arm --fat_apk_cpu=armeabi-v7a
+build:android_arm64 --config=android
+build:android_arm64 --cpu=arm64-v8a
+build:android_arm64 --fat_apk_cpu=arm64-v8a
+
+# Config to use a mostly-static build and disable modular op registration
+# support (this will revert to loading TensorFlow with RTLD_GLOBAL in Python).
+# By default, TensorFlow will build with a dependence on
+# //tensorflow:libtensorflow_framework.so.
+build:monolithic --define framework_shared_object=false
+
+# For projects which use TensorFlow as part of a Bazel build process, putting
+# nothing in a bazelrc will default to a monolithic build. The following line
+# opts in to modular op registration support by default.
+build --define framework_shared_object=true
+
+# Please note that MKL on MacOS or windows is still not supported.
+# If you would like to use a local MKL instead of downloading, please set the
+# environment variable "TF_MKL_ROOT" every time before build.
+build:mkl --define=using_mkl=true
+build:mkl -c opt
+
 build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
 build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true